Publishing 2019 R1 content

author: Alexey Suhov <alexey.suhov@intel.com> 2019-04-12 18:25:53 +0300
committer: Alexey Suhov <alexey.suhov@intel.com> 2019-04-12 18:25:53 +0300
commit: 72660e9a4d683dc6a0c50e9fad96e59b7edd1f71 (patch)
tree: 335135f31e39d2bb330d05e1775b6e5bf2c8caad
parent: 669bee86e580cbbc8ef40b440ab195ba2cbf5142 (diff)
download: dldt-72660e9a4d683dc6a0c50e9fad96e59b7edd1f71.tar.gz
dldt-72660e9a4d683dc6a0c50e9fad96e59b7edd1f71.tar.bz2
dldt-72660e9a4d683dc6a0c50e9fad96e59b7edd1f71.zip
3639 files changed, 266091 insertions, 63647 deletions
diff --git a/README.md b/README.md
index f4fca3a0b..a0820235a 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 # [OpenVINO™ Toolkit](https://01.org/openvinotoolkit) - Deep Learning Deployment Toolkit repository
-[![Stable release](https://img.shields.io/badge/version-2018.R5-green.svg)](https://github.com/opencv/dldt/releases/tag/2018_R5)
+[![Stable release](https://img.shields.io/badge/version-2019.R1-green.svg)](https://github.com/opencv/dldt/releases/tag/2019_R1)
 [![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE)
 
 This toolkit allows developers to deploy pre-trained deep learning models through a high-level C++ Inference Engine API integrated with application logic. 
diff --git a/inference-engine/CMakeLists.txt b/inference-engine/CMakeLists.txt
index 46f821d46..9e639ff7c 100644
--- a/inference-engine/CMakeLists.txt
+++ b/inference-engine/CMakeLists.txt
@@ -1,13 +1,15 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required (VERSION 3.3)
+cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
 
 project(InferenceEngine)
 
 set(DEV_BUILD TRUE)
 
+include(CTest)
+
 ## WA for problem with gtest submodule. It cannot detect uint32 type.
 ## remove Gtest submodule and this two lines together
 include (CheckTypeSize)
@@ -133,25 +135,28 @@ set (CMAKE_POSITION_INDEPENDENT_CODE ON)
 include (sanitizer)
 
 include(CheckCXXCompilerFlag)
-if(UNIX)
-    CHECK_CXX_COMPILER_FLAG("-fvisibility=hidden" COMPILER_SUPPORTS_VISIBILITY)
-    if (COMPILER_SUPPORTS_VISIBILITY)
-        #add_definitions(-fvisibility=hidden) todo: should be hidden? if so define default visibiliti explicite for each funtion
-        add_definitions(-fvisibility=default)
-    endif(COMPILER_SUPPORTS_VISIBILITY)
-endif(UNIX)
+
+include(cpplint)
 
 add_subdirectory(src)
 add_subdirectory(tests)
 add_subdirectory(thirdparty)
-if (ENABLE_SAMPLES_CORE)
-    set(InferenceEngine_DIR "${CMAKE_BINARY_DIR}")
+set(InferenceEngine_DIR "${CMAKE_BINARY_DIR}")
 
-    #to be able to link
-    set (LIB_FOLDER ${IE_MAIN_SOURCE_DIR}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib)
-    add_subdirectory(samples)
-endif()
+#to be able to link
+set (LIB_FOLDER ${IE_MAIN_SOURCE_DIR}/${BIN_FOLDER}/${IE_BUILD_CONFIGURATION}/lib)
+
+# gflags and format_reader targets are kept inside of samples directory and
+# they must be built even if samples build is disabled (required for tests and tools).
+add_subdirectory(samples)
+
+file(GLOB_RECURSE SAMPLES_SOURCES samples/*.cpp samples/*.hpp samples/*.h)
+add_cpplint_target(sample_cpplint
+    FOR_SOURCES ${SAMPLES_SOURCES}
+    EXCLUDE_PATTERNS "thirdparty/*" "pugixml/*")
 
 if (ENABLE_PYTHON)
     add_subdirectory(ie_bridges/python)
-endif()
-\ No newline at end of file
+endif()
+
+add_cpplint_report_target()
diff --git a/inference-engine/README.md b/inference-engine/README.md
index d28782e60..36053cd25 100644
--- a/inference-engine/README.md
+++ b/inference-engine/README.md
@@ -16,8 +16,8 @@ Inference Engine plugins for Intel® FPGA and Intel® Movidius™ Neural Compute
 ## Build on Linux\* Systems
 
 The software was validated on:
-- Ubuntu\* 16.04 with default GCC\* 5.4.0
-- CentOS\* 7.4 with default GCC\* 4.8.5
+- Ubuntu\* 16.04 (64-bit) with default GCC\* 5.4.0
+- CentOS\* 7.4 (64-bit) with default GCC\* 4.8.5
 - [Intel® Graphics Compute Runtime for OpenCL™ Driver package 18.28.11080](https://github.com/intel/compute-runtime/releases/tag/18.28.11080).
 
 ### Software Requirements
@@ -45,11 +45,19 @@ The software was validated on:
 You can use the following additional build options:
 - Internal JIT GEMM implementation is used by default.
 - To switch to OpenBLAS\* implementation, use `GEMM=OPENBLAS` option and `BLAS_INCLUDE_DIRS` and `BLAS_LIBRARIES` cmake options to specify path to OpenBLAS headers and library, for example use the following options on CentOS\*: `-DGEMM=OPENBLAS -DBLAS_INCLUDE_DIRS=/usr/include/openblas -DBLAS_LIBRARIES=/usr/lib64/libopenblas.so.0`
-- To switch to optimized MKL-ML\* GEMM implementation, use `GEMM=MKL` and `MKLROOT` cmake options to specify path to unpacked MKL-ML with `include` and `lib` folders, for example use the following options: `-DGEMM=MKL -DMKLROOT=<path_to_MKL>`. MKL-ML\* package can be downloaded [here](https://github.com/intel/mkl-dnn/releases/download/v0.17/mklml_lnx_2019.0.1.20180928.tgz)
 
-- OpenMP threading is used by default. To build Inference Engine with TBB threading, set `-DTHREADING=TBB` option.
+- To switch to the optimized MKL-ML\* GEMM implementation, use `-DGEMM=MKL` and `-DMKLROOT=<path_to_MKL>` cmake options to specify a path to unpacked MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be downloaded [here](https://github.com/intel/mkl-dnn/releases/download/v0.17/mklml_lnx_2019.0.1.20180928.tgz)
 
-- To build Python API wrapper, use -DENABLE_PYTHON=ON option. To specify exact Python version, use the following options: `-DPYTHON_EXECUTABLE=`which python3.6` -DPYTHON_LIBRARY=/usr/lib/x86_64-linux-gnu/libpython3.6m.so -DPYTHON_INCLUDE_DIR=/usr/include/python3.6`
+- Threading Building Blocks (TBB) is used by default. To build the Inference Engine with OpenMP* threading, set the `-DTHREADING=OMP` option.
+
+- Required versions of TBB and OpenCV packages are downloaded automatically by the CMake-based script. If you already have installed TBB or OpenCV packages configured in your environment, you may need to clean the `TBBROOT` and `OpenCV_DIR` environment variables before running the `cmake` command, otherwise they won't be downloaded and the build may fail if incompatible versions were installed.
+
+- To build the Python API wrapper, use the `-DENABLE_PYTHON=ON` option. To specify an exact Python version, use the following options:
+```sh
+  -DPYTHON_EXECUTABLE=`which python3.7` \
+  -DPYTHON_LIBRARY=/usr/lib/x86_64-linux-gnu/libpython3.7m.so \
+  -DPYTHON_INCLUDE_DIR=/usr/include/python3.7
+```
 
 - To switch on/off the CPU and GPU plugins, use `cmake` options `-DENABLE_MKL_DNN=ON/OFF` and `-DENABLE_CLDNN=ON/OFF`.
 
@@ -74,7 +82,7 @@ You can use the following additional build options:
 ## Build on Windows\* Systems:
 
 The software was validated on:
-- Microsoft\* Windows\* 10 with Visual Studio 2017 and Intel® C++ Compiler 2018 Update 3
+- Microsoft\* Windows\* 10 (64-bit) with Visual Studio 2017 and Intel® C++ Compiler 2018 Update 3
 - [Intel® Graphics Driver for Windows* [24.20] driver package](https://downloadcenter.intel.com/download/27803/Graphics-Intel-Graphics-Driver-for-Windows-10?v=t).
 
 ### Software Requirements
@@ -107,25 +115,75 @@ cmake -G "Visual Studio 15 2017 Win64" -T "Intel C++ Compiler 18.0" ^
 
 - Internal JIT GEMM implementation is used by default.
 - To switch to OpenBLAS GEMM implementation, use -DGEMM=OPENBLAS cmake option and specify path to OpenBLAS using `-DBLAS_INCLUDE_DIRS=<OPENBLAS_DIR>\include` and `-DBLAS_LIBRARIES=<OPENBLAS_DIR>\lib\libopenblas.dll.a` options. Prebuilt OpenBLAS\* package can be downloaded [here](https://sourceforge.net/projects/openblas/files/v0.2.14/OpenBLAS-v0.2.14-Win64-int64.zip/download), mingw64* runtime dependencies [here](https://sourceforge.net/projects/openblas/files/v0.2.14/mingw64_dll.zip/download)
-- To switch to optimized MKL-ML GEMM implementation, use `GEMM=MKL` and `MKLROOT` cmake options to specify path to unpacked MKL-ML with `include` and `lib` folders, for example use the following options: `-DGEMM=MKL -DMKLROOT=<path_to_MKL>`. MKL-ML\* package can be downloaded [here](https://github.com/intel/mkl-dnn/releases/download/v0.17/mklml_win_2019.0.1.20180928.zip)
+- To switch to the optimized MKL-ML\* GEMM implementation, use `-DGEMM=MKL` and `-DMKLROOT=<path_to_MKL>` cmake options to specify a path to unpacked MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be downloaded [here](https://github.com/intel/mkl-dnn/releases/download/v0.17/mklml_win_2019.0.1.20180928.zip)
+
+- Threading Building Blocks (TBB) is used by default. To build the Inference Engine with OpenMP* threading, set the `-DTHREADING=OMP` option.
 
-- OpenMP threading is used by default. To build Inference Engine with TBB threading, set `-DTHREADING=TBB` option.
+- Required versions of TBB and OpenCV packages are downloaded automatically by the CMake-based script. If you already have installed TBB or OpenCV packages configured in your environment, you may need to clean the `TBBROOT` and `OpenCV_DIR` environment variables before running the `cmake` command, otherwise they won't be downloaded and the build may fail if incompatible versions were installed.
 
-- To build Python API wrapper, use -DENABLE_PYTHON=ON option. To specify exact Python version, use the following options: `-DPYTHON_EXECUTABLE="C:\Program Files\Python36\python.exe" -DPYTHON_INCLUDE_DIR="C:\Program Files\Python36\include" -DPYTHON_LIBRARY="C:\Program Files\Python36\libs\python36.lib"`.
+- To build the Python API wrapper, use the `-DENABLE_PYTHON=ON` option. To specify an exact Python version, use the following options:
+```sh
+  -DPYTHON_EXECUTABLE="C:\Program Files\Python37\python.exe" ^
+  -DPYTHON_LIBRARY="C:\Program Files\Python37\libs\python37.lib" ^
+  -DPYTHON_INCLUDE_DIR="C:\Program Files\Python37\include"
+```
 
 6. Build generated solution in Visual Studio 2017 or run `cmake --build . --config Release` to build from the command line.
 
+7. Before running the samples, add paths to TBB and OpenCV binaries used for the build to the %PATH% environment variable. By default, TBB binaries are downloaded by the CMake-based script to the `<dldt_repo>/inference-engine/temp/tbb/lib` folder, OpenCV binaries - to the `<dldt_repo>/inference-engine/temp/opencv_4.1.0/bin` folder.
+
 ### Building Inference Engine with Ninja
 
 ```sh
 call "C:\Program Files (x86)\IntelSWTools\compilers_and_libraries_2018\windows\bin\ipsxe-comp-vars.bat" intel64 vs2017
 set CXX=icl
 set CC=icl
+:: clean TBBROOT value set by ipsxe-comp-vars.bat, required TBB package will be downloaded by dldt cmake script
+set TBBROOT=
 cmake -G Ninja -Wno-dev -DCMAKE_BUILD_TYPE=Release ..
 cmake --build . --config Release
 ```
 
-Before running the samples on Microsoft\* Windows\*, please add path to OpenMP library (<dldt_repo>/inference-engine/temp/omp/lib) and OpenCV libraries (<dldt_repo>/inference-engine/temp/opencv_4.0.0/bin) to the %PATH% environment variable.
+## Build on macOS\* Systems
+
+The software was validated on:
+- macOS\* 10.14, 64-bit
+
+### Software Requirements
+- [CMake\*](https://cmake.org/download/) 3.9 or higher
+- Clang\* compiler from Xcode\* 10.1
+- Python\* 3.4 or higher for the Inference Engine Python API wrapper
+
+### Build Steps
+1. Clone submodules:
+    ```sh
+    cd dldt/inference-engine
+    git submodule init
+    git submodule update --recursive
+    ```
+2. Install build dependencies using the `install_dependencies.sh` script in the project root folder.
+3. Create a build folder:
+```sh
+  mkdir build
+```
+4. Inference Engine uses a CMake-based build system. In the created `build` directory, run `cmake` to fetch project dependencies and create Unix makefiles, then run `make` to build the project:
+```sh
+  cmake -DCMAKE_BUILD_TYPE=Release ..
+  make -j16
+```
+You can use the following additional build options:
+- Internal JIT GEMM implementation is used by default.
+- To switch to the optimized MKL-ML\* GEMM implementation, use `-DGEMM=MKL` and `-DMKLROOT=<path_to_MKL>` cmake options to specify a path to unpacked MKL-ML with the `include` and `lib` folders. MKL-ML\* package can be downloaded [here](https://github.com/intel/mkl-dnn/releases/download/v0.17.1/mklml_mac_2019.0.1.20180928.tgz)
+
+- Threading Building Blocks (TBB) is used by default. To build the Inference Engine with OpenMP* threading, set the `-DTHREADING=OMP` option.
+
+- To build the Python API wrapper, use the `-DENABLE_PYTHON=ON` option. To specify an exact Python version, use the following options:
+```sh
+  -DPYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 \
+  -DPYTHON_LIBRARY=/Library/Frameworks/Python.framework/Versions/3.7/lib/libpython3.7m.dylib \
+  -DPYTHON_INCLUDE_DIR=/Library/Frameworks/Python.framework/Versions/3.7/include/python3.7m
+```
+
 
 ---
 \* Other names and brands may be claimed as the property of others.
diff --git a/inference-engine/cmake/FindlibGNA.cmake b/inference-engine/cmake/FindlibGNA.cmake
index eeb84800e..eccf75917 100644
--- a/inference-engine/cmake/FindlibGNA.cmake
+++ b/inference-engine/cmake/FindlibGNA.cmake
@@ -1,12 +1,10 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
 #module to locate GNA libraries
 
-cmake_minimum_required(VERSION 2.8)
-
 if (WIN32)
     set(GNA_PLATFORM_DIR win64)
     set(GNA_LIB_DIR x64)
diff --git a/inference-engine/cmake/check_features.cmake b/inference-engine/cmake/check_features.cmake
index 88ff23f59..00861fae1 100644
--- a/inference-engine/cmake/check_features.cmake
+++ b/inference-engine/cmake/check_features.cmake
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -65,10 +65,6 @@ if (ENABLE_PROFILING_RAW)
     add_definitions(-DENABLE_PROFILING_RAW=1)
 endif()
 
-if (ENABLE_GTEST_PATCHES)
-    add_definitions(-DENABLE_GTEST_PATCHES=1)
-endif()
-
 if (ENABLE_CLDNN)
     add_definitions(-DENABLE_CLDNN=1)
 endif()
@@ -77,22 +73,14 @@ if (ENABLE_MKL_DNN)
     add_definitions(-DENABLE_MKL_DNN=1)
 endif()
 
-if (ENABLE_STRESS_UNIT_TESTS)
-    add_definitions(-DENABLE_STRESS_UNIT_TESTS=1)
-endif()
-
-if (ENABLE_SEGMENTATION_TESTS)
-    add_definitions(-DENABLE_SEGMENTATION_TESTS=1)
-endif()
-
-if (ENABLE_OBJECT_DETECTION_TESTS)
-    add_definitions(-DENABLE_OBJECT_DETECTION_TESTS=1)
-endif()
-
 if (ENABLE_GNA)
     add_definitions(-DENABLE_GNA)
 endif()
 
+if (ENABLE_SAMPLES)
+    set (ENABLE_SAMPLES_CORE ON)
+endif()
+
 if (DEVELOPMENT_PLUGIN_MODE)
     message (STATUS "Enabled development plugin mode")
 
@@ -112,5 +100,4 @@ if (VERBOSE_BUILD)
     set(CMAKE_VERBOSE_MAKEFILE  ON)
 endif()
 
-
 print_enabled_features()
diff --git a/inference-engine/cmake/config.cmake.in b/inference-engine/cmake/config.cmake.in
index ed3c88033..7c3459f5a 100644
--- a/inference-engine/cmake/config.cmake.in
+++ b/inference-engine/cmake/config.cmake.in
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
diff --git a/inference-engine/cmake/cpplint.cmake b/inference-engine/cmake/cpplint.cmake
new file mode 100644
index 000000000..f4eca4c34
--- /dev/null
+++ b/inference-engine/cmake/cpplint.cmake
@@ -0,0 +1,162 @@
+# Copyright (C) 2018-2019 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+if(ENABLE_CPPLINT)
+    find_package(PythonInterp 2.7 EXACT)
+
+    if(NOT PYTHONINTERP_FOUND)
+        message(WARNING "Python was not found (required for cpplint check)")
+        set(ENABLE_CPPLINT OFF)
+    endif()
+endif()
+
+if(ENABLE_CPPLINT)
+    add_custom_target(cpplint_all ALL)
+    set(CPPLINT_ALL_OUTPUT_FILES "" CACHE INTERNAL "All cpplint output files")
+endif()
+
+function(add_cpplint_target TARGET_NAME)
+    if(NOT ENABLE_CPPLINT)
+        return()
+    endif()
+
+    set(options "")
+    set(oneValueArgs "")
+    set(multiValueArgs "FOR_TARGETS" "FOR_SOURCES" "EXCLUDE_PATTERNS")
+    cmake_parse_arguments(CPPLINT "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+    foreach(target IN LISTS CPPLINT_FOR_TARGETS)
+        get_target_property(target_sources "${target}" SOURCES)
+        list(APPEND CPPLINT_FOR_SOURCES ${target_sources})
+    endforeach()
+    list(REMOVE_DUPLICATES CPPLINT_FOR_SOURCES)
+
+    set(all_output_files "")
+    foreach(source_file IN LISTS CPPLINT_FOR_SOURCES)
+        set(exclude FALSE)
+        foreach(pattern IN LISTS CPPLINT_EXCLUDE_PATTERNS)
+            if(source_file MATCHES "${pattern}")
+                set(exclude TRUE)
+                break()
+            endif()
+        endforeach()
+
+        if(exclude)
+            continue()
+        endif()
+
+        file(RELATIVE_PATH source_file_relative "${CMAKE_CURRENT_SOURCE_DIR}" "${source_file}")
+        set(output_file "${CMAKE_CURRENT_BINARY_DIR}/cpplint/${source_file_relative}.cpplint")
+        string(REPLACE ".." "__" output_file "${output_file}")
+        get_filename_component(output_dir "${output_file}" DIRECTORY)
+        file(MAKE_DIRECTORY "${output_dir}")
+
+        add_custom_command(
+            OUTPUT
+                "${output_file}"
+            COMMAND
+                "${CMAKE_COMMAND}"
+                -D "PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}"
+                -D "CPPLINT_SCRIPT=${IE_MAIN_SOURCE_DIR}/scripts/cpplint.py"
+                -D "INPUT_FILE=${source_file}"
+                -D "OUTPUT_FILE=${output_file}"
+                -D "WORKING_DIRECTORY=${CMAKE_CURRENT_SOURCE_DIR}"
+                -D "SKIP_RETURN_CODE=${ENABLE_CPPLINT_REPORT}"
+                -P "${IE_MAIN_SOURCE_DIR}/cmake/cpplint_run.cmake"
+            DEPENDS
+                "${source_file}"
+                "${IE_MAIN_SOURCE_DIR}/scripts/cpplint.py"
+                "${IE_MAIN_SOURCE_DIR}/cmake/cpplint_run.cmake"
+            COMMENT
+                "[cpplint] ${source_file}"
+            VERBATIM)
+
+        list(APPEND all_output_files "${output_file}")
+    endforeach()
+
+    set(CPPLINT_ALL_OUTPUT_FILES
+        ${CPPLINT_ALL_OUTPUT_FILES} ${all_output_files}
+        CACHE INTERNAL
+        "All cpplint output files")
+
+    add_custom_target(${TARGET_NAME} ALL
+        DEPENDS ${all_output_files}
+        COMMENT "[cpplint] ${TARGET_NAME}")
+
+    if(CPPLINT_FOR_TARGETS)
+        foreach(target IN LISTS CPPLINT_FOR_TARGETS)
+            add_dependencies(${target} ${TARGET_NAME})
+        endforeach()
+    endif()
+
+    add_dependencies(cpplint_all ${TARGET_NAME})
+endfunction()
+
+function(add_cpplint_report_target)
+    if(NOT ENABLE_CPPLINT OR NOT ENABLE_CPPLINT_REPORT)
+        return()
+    endif()
+
+    set(cpplint_output_file "${CMAKE_BINARY_DIR}/cpplint/final_output.cpplint")
+    add_custom_command(
+        OUTPUT
+            "${cpplint_output_file}"
+        COMMAND
+            "${CMAKE_COMMAND}"
+            -D "FINAL_OUTPUT_FILE=${cpplint_output_file}"
+            -D "OUTPUT_FILES=${CPPLINT_ALL_OUTPUT_FILES}"
+            -P "${IE_MAIN_SOURCE_DIR}/cmake/cpplint_merge.cmake"
+        DEPENDS
+            ${CPPLINT_ALL_OUTPUT_FILES}
+            "${IE_MAIN_SOURCE_DIR}/cmake/cpplint_merge.cmake"
+        COMMENT
+            "[cpplint] Merge all output files"
+        VERBATIM)
+
+    set(cppcheck_output_file "${CMAKE_BINARY_DIR}/cpplint/cpplint-cppcheck-result.xml")
+    add_custom_command(
+        OUTPUT
+            "${cppcheck_output_file}"
+        COMMAND
+            "${CMAKE_COMMAND}"
+            -D "PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}"
+            -D "CONVERT_SCRIPT=${IE_MAIN_SOURCE_DIR}/scripts/cpplint_to_cppcheckxml.py"
+            -D "INPUT_FILE=${cpplint_output_file}"
+            -D "OUTPUT_FILE=${cppcheck_output_file}"
+            -P "${IE_MAIN_SOURCE_DIR}/cmake/cpplint_to_cppcheck_xml.cmake"
+        DEPENDS
+            ${cpplint_output_file}
+            "${IE_MAIN_SOURCE_DIR}/scripts/cpplint_to_cppcheckxml.py"
+            "${IE_MAIN_SOURCE_DIR}/cmake/cpplint_to_cppcheck_xml.cmake"
+        COMMENT
+            "[cpplint] Convert to cppcheck XML format"
+        VERBATIM)
+
+    set(report_dir "${IE_MAIN_SOURCE_DIR}/report/cpplint")
+    set(html_output_file "${report_dir}/index.html")
+    add_custom_command(
+        OUTPUT
+            "${html_output_file}"
+        COMMAND
+            "${CMAKE_COMMAND}"
+            -D "PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}"
+            -D "CONVERT_SCRIPT=${IE_MAIN_SOURCE_DIR}/scripts/cppcheck-htmlreport.py"
+            -D "INPUT_FILE=${cppcheck_output_file}"
+            -D "REPORT_DIR=${report_dir}"
+            -D "SOURCE_DIR=${IE_MAIN_SOURCE_DIR}"
+            -D "TITLE=${CMAKE_PROJECT_NAME}"
+            -P "${IE_MAIN_SOURCE_DIR}/cmake/cpplint_html.cmake"
+        DEPENDS
+            "${cppcheck_output_file}"
+            "${IE_MAIN_SOURCE_DIR}/scripts/cppcheck-htmlreport.py"
+            "${IE_MAIN_SOURCE_DIR}/cmake/cpplint_html.cmake"
+        COMMENT
+            "[cpplint] Generate HTML report"
+        VERBATIM)
+
+    add_custom_target(cpplint_report
+        DEPENDS "${html_output_file}"
+        COMMENT "[cpplint] Generate report")
+endfunction()
diff --git a/inference-engine/cmake/cpplint_html.cmake b/inference-engine/cmake/cpplint_html.cmake
new file mode 100644
index 000000000..55992d8b2
--- /dev/null
+++ b/inference-engine/cmake/cpplint_html.cmake
@@ -0,0 +1,30 @@
+# Copyright (C) 2018-2019 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+if(EXISTS "${REPORT_DIR}")
+    file(REMOVE_RECURSE "${REPORT_DIR}")
+endif()
+
+file(MAKE_DIRECTORY "${REPORT_DIR}")
+
+execute_process(
+    COMMAND
+        "${PYTHON_EXECUTABLE}"
+        "${CONVERT_SCRIPT}"
+        "--file=${INPUT_FILE}"
+        "--report-dir=${REPORT_DIR}"
+        "--source-dir=${SOURCE_DIR}"
+        "--title=${TITLE}")
+
+# Change cppcheck things to cpplint
+
+file(READ "${REPORT_DIR}/index.html" cur_file_content)
+
+string(REPLACE "Cppcheck" "cpplint" cur_file_content ${cur_file_content})
+string(REPLACE "a tool for static C/C++ code analysis" "an open source lint-like tool from Google" cur_file_content ${cur_file_content})
+string(REPLACE "http://cppcheck.sourceforge.net" "http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py" cur_file_content ${cur_file_content})
+string(REPLACE "IRC: <a href=\"irc://irc.freenode.net/cppcheck\">irc://irc.freenode.net/cppcheck</a>" " " cur_file_content ${cur_file_content})
+
+file(WRITE "${REPORT_DIR}/index.html" "${cur_file_content}")
diff --git a/inference-engine/cmake/cpplint_merge.cmake b/inference-engine/cmake/cpplint_merge.cmake
new file mode 100644
index 000000000..da8715736
--- /dev/null
+++ b/inference-engine/cmake/cpplint_merge.cmake
@@ -0,0 +1,11 @@
+# Copyright (C) 2018-2019 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+file(WRITE "${FINAL_OUTPUT_FILE}" "")
+
+foreach(output_file IN LISTS OUTPUT_FILES)
+    file(READ "${output_file}" cur_file_content)
+    file(APPEND "${FINAL_OUTPUT_FILE}" "${cur_file_content}\n")
+endforeach()
diff --git a/inference-engine/cmake/cpplint_run.cmake b/inference-engine/cmake/cpplint_run.cmake
new file mode 100644
index 000000000..f9c9ec58f
--- /dev/null
+++ b/inference-engine/cmake/cpplint_run.cmake
@@ -0,0 +1,37 @@
+# Copyright (C) 2018-2019 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+file(REMOVE "${OUTPUT_FILE}")
+
+execute_process(
+    COMMAND
+        "${PYTHON_EXECUTABLE}"
+        "${CPPLINT_SCRIPT}"
+        "--linelength=160"
+        "--counting=detailed"
+        "--filter=-readability/fn_size"
+        "${INPUT_FILE}"
+    WORKING_DIRECTORY "${WORKING_DIRECTORY}"
+    RESULT_VARIABLE result
+    OUTPUT_VARIABLE output
+    ERROR_VARIABLE output)
+
+# Display the cpplint output to console (to parse it form IDE)
+message("${output}")
+
+# Store cpplint output to file (replace problematic symbols)
+string(REPLACE "\"" "&quot\;" output ${output})
+string(REPLACE "<" "&lt\;" output ${output})
+string(REPLACE ">" "&gt\;" output ${output})
+string(REPLACE "'" "&apos\;" output ${output})
+string(REPLACE "&" "&amp\;" output ${output})
+file(WRITE "${OUTPUT_FILE}" ${output})
+
+if(NOT SKIP_RETURN_CODE)
+    # Pass through the cpplint return code
+    if(NOT result EQUAL 0)
+        message(FATAL_ERROR "[cpplint] Code style check failed for : ${INPUT_FILE}")
+    endif()
+endif()
diff --git a/inference-engine/cmake/cpplint_to_cppcheck_xml.cmake b/inference-engine/cmake/cpplint_to_cppcheck_xml.cmake
new file mode 100644
index 000000000..6651b93a6
--- /dev/null
+++ b/inference-engine/cmake/cpplint_to_cppcheck_xml.cmake
@@ -0,0 +1,12 @@
+# Copyright (C) 2018-2019 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+execute_process(
+    COMMAND
+        "${PYTHON_EXECUTABLE}"
+        "${CONVERT_SCRIPT}"
+    INPUT_FILE "${INPUT_FILE}"
+    OUTPUT_FILE "${OUTPUT_FILE}"
+    ERROR_FILE "${OUTPUT_FILE}")
diff --git a/inference-engine/cmake/debug.cmake b/inference-engine/cmake/debug.cmake
index 8d5ad8450..9aeb2a581 100644
--- a/inference-engine/cmake/debug.cmake
+++ b/inference-engine/cmake/debug.cmake
@@ -1,10 +1,8 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required (VERSION 2.8)
-
 function (debug_message)
     if (VERBOSE_BUILD)
         message(${ARGV})
diff --git a/inference-engine/cmake/dependencies.cmake b/inference-engine/cmake/dependencies.cmake
index cc027bf94..a541357e1 100644
--- a/inference-engine/cmake/dependencies.cmake
+++ b/inference-engine/cmake/dependencies.cmake
@@ -1,9 +1,8 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
 cmake_policy(SET CMP0054 NEW)
 
 #features trigger supported by build system
@@ -14,7 +13,7 @@ include(debug)
 include(dependency_solver)
 
 #prepare temporary folder
-if (DEFINED ENV{${DL_SDK_TEMP}})
+if (DEFINED ENV{${DL_SDK_TEMP}} AND NOT $ENV{${DL_SDK_TEMP}} STREQUAL "")
     if (WIN32)
         string(REPLACE "\\" "\\\\" TEMP $ENV{${DL_SDK_TEMP}})
     else(WIN32)
@@ -38,9 +37,6 @@ else()
     set(MODELS_BRANCH "master")
 endif()
 
-set(MODELS_PATH "${TEMP}/models")
-debug_message(STATUS "MODELS_PATH=" ${MODELS_PATH})
-
 ## enable cblas_gemm from OpenBLAS package
 if (GEMM STREQUAL "OPENBLAS")
 if(NOT BLAS_LIBRARIES OR NOT BLAS_INCLUDE_DIRS)
@@ -77,6 +73,12 @@ elseif(LINUX)
             TARGET_PATH "${TEMP}/omp"
             ENVIRONMENT "OMP"
             VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
+else(APPLE)
+    RESOLVE_DEPENDENCY(OMP
+            ARCHIVE_MAC "iomp_20190130_mac.tgz"
+            TARGET_PATH "${TEMP}/omp"
+            ENVIRONMENT "OMP"
+            VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
 endif()
 log_rpath_from_dir(OMP "${OMP}/lib")
 debug_message(STATUS "intel_omp=" ${OMP})
@@ -96,6 +98,12 @@ elseif(LINUX)
             ARCHIVE_LIN "tbb2019_20181010_lin.tgz"
             TARGET_PATH "${TEMP}/tbb"
             ENVIRONMENT "TBBROOT")
+else(APPLE)
+    RESOLVE_DEPENDENCY(TBB
+            ARCHIVE_MAC "tbb2019_20190130_mac.tgz"
+            TARGET_PATH "${TEMP}/tbb"
+            ENVIRONMENT "TBBROOT"
+            VERSION_REGEX ".*_([a-z]*_([a-z0-9]+\\.)*[0-9]+).*")
 endif()
 log_rpath_from_dir(TBB "${TBB}/lib")
 debug_message(STATUS "tbb=" ${TBB})
@@ -104,34 +112,51 @@ endif ()
 if (ENABLE_OPENCV)
 if (WIN32)
     RESOLVE_DEPENDENCY(OPENCV
-            ARCHIVE_WIN "opencv_4.0.1-0353.zip"
-            TARGET_PATH "${TEMP}/opencv_4.0.0"
+            ARCHIVE_WIN "opencv_4.1.0-0437.zip"
+            TARGET_PATH "${TEMP}/opencv_4.1.0"
+            ENVIRONMENT "OpenCV_DIR"
+            VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
+    log_rpath_from_dir(OPENCV "\\opencv_4.1.0\\bin")
+    set( ENV{OpenCV_DIR} ${OPENCV}/cmake )
+elseif(APPLE)
+    RESOLVE_DEPENDENCY(OPENCV
+            ARCHIVE_MAC "opencv_4.1.0-0437_osx.tar.xz"
+            TARGET_PATH "${TEMP}/opencv_4.1.0_osx"
             ENVIRONMENT "OpenCV_DIR"
             VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
-    log_rpath_from_dir(OPENCV "\\opencv_4.0.0\\bin")
+    log_rpath_from_dir(OPENCV "opencv_4.1.0_osx/lib")
     set( ENV{OpenCV_DIR} ${OPENCV}/cmake )
 elseif(LINUX)
 if (${LINUX_OS_NAME} STREQUAL "Ubuntu 16.04")
     RESOLVE_DEPENDENCY(OPENCV
-            ARCHIVE_LIN "opencv_4.0.0-0305_ubuntu16.tgz"
-            TARGET_PATH "${TEMP}/opencv_4.0.0_ubuntu"
+            ARCHIVE_LIN "opencv_4.1.0-0437_ubuntu16.tar.xz"
+            TARGET_PATH "${TEMP}/opencv_4.1.0_ubuntu16"
             ENVIRONMENT "OpenCV_DIR"
             VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
-    log_rpath_from_dir(OPENCV "opencv_4.0.0_ubuntu/lib")
+    log_rpath_from_dir(OPENCV "opencv_4.1.0_ubuntu16/lib")
 elseif (${LINUX_OS_NAME} STREQUAL "Ubuntu 18.04")
     RESOLVE_DEPENDENCY(OPENCV
-            ARCHIVE_LIN "opencv_4.0.0-0305_ubuntu18.tgz"
-            TARGET_PATH "${TEMP}/opencv_4.0.0_ubuntu18"
+            ARCHIVE_LIN "opencv_4.1.0-0437_ubuntu18.tar.xz"
+            TARGET_PATH "${TEMP}/opencv_4.1.0_ubuntu18"
             ENVIRONMENT "OpenCV_DIR"
             VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
-    log_rpath_from_dir(OPENCV "opencv_4.0.0_ubuntu/lib")
+    log_rpath_from_dir(OPENCV "opencv_4.1.0_ubuntu18/lib")
 elseif (${LINUX_OS_NAME} STREQUAL "CentOS 7")
     RESOLVE_DEPENDENCY(OPENCV
-            ARCHIVE_LIN "opencv_4.0.0-0305_centos.tgz"
-            TARGET_PATH "${TEMP}/opencv_4.0.0_centos"
+            ARCHIVE_LIN "opencv_4.1.0-0437_centos7.tar.xz"
+            TARGET_PATH "${TEMP}/opencv_4.1.0_centos"
+            ENVIRONMENT "OpenCV_DIR"
+            VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
+    log_rpath_from_dir(OPENCV "opencv_4.1.0_centos/lib")
+elseif (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7l" AND
+        (${LINUX_OS_NAME} STREQUAL "Debian 9" OR
+         ${LINUX_OS_NAME} STREQUAL "Raspbian 9"))
+    RESOLVE_DEPENDENCY(OPENCV
+            ARCHIVE_LIN "opencv_4.1.0-0437_debian9arm.tar.xz"
+            TARGET_PATH "${TEMP}/opencv_4.1.0_debian9arm"
             ENVIRONMENT "OpenCV_DIR"
             VERSION_REGEX ".*_([0-9]+.[0-9]+.[0-9]+).*")
-    log_rpath_from_dir(OPENCV "opencv_4.0.0_centos/lib")
+    log_rpath_from_dir(OPENCV "opencv_4.1.0_debian9arm/lib")
 endif()
     set( ENV{OpenCV_DIR} ${OPENCV}/cmake )
 endif()
diff --git a/inference-engine/cmake/dependency_solver.cmake b/inference-engine/cmake/dependency_solver.cmake
index 92d299458..178b379ff 100644
--- a/inference-engine/cmake/dependency_solver.cmake
+++ b/inference-engine/cmake/dependency_solver.cmake
@@ -1,10 +1,8 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required (VERSION 2.8)
-
 include ("download")
 
 function (resolve_archive_dependency VAR COMPONENT ARCHIVE ARCHIVE_UNIFIED ARCHIVE_WIN ARCHIVE_LIN ARCHIVE_MAC TARGET_PATH FOLDER ENVIRONMENT)
@@ -15,7 +13,7 @@ function (resolve_archive_dependency VAR COMPONENT ARCHIVE ARCHIVE_UNIFIED ARCHI
 
   if (NOT DEFINED HAS_ENV)
     if (ARCHIVE)
-      #TODO: check wether this is platform specific binary with same name per or it is in common folder
+      #TODO: check whether this is platform specific binary with same name per or it is in common folder
       DownloadAndExtract(${COMPONENT} ${ARCHIVE} ${TARGET_PATH} result_path ${FOLDER})
     else()
       DownloadAndExtractPlatformSpecific(${COMPONENT} ${ARCHIVE_UNIFIED} ${ARCHIVE_WIN} ${ARCHIVE_LIN} ${ARCHIVE_MAC} ${TARGET_PATH} result_path  ${FOLDER})
@@ -130,11 +128,3 @@ function (RESOLVE_DEPENDENCY NAME_OF_CMAKE_VAR)
   endif()
 
 endfunction(RESOLVE_DEPENDENCY)
-
-function (resolve_model_dependency network archive network_model_path)
-  RESOLVE_DEPENDENCY(${network_model_path}
-        ARCHIVE "models_archives/${archive}"
-        TARGET_PATH "${MODELS_PATH}/${network}")
-  string (REPLACE ${MODELS_PATH} "" relative_path ${${network_model_path}})
-  set(${network_model_path} ".${relative_path}" PARENT_SCOPE)
-endfunction()
diff --git a/inference-engine/cmake/download.cmake b/inference-engine/cmake/download.cmake
index 6c5ad3f21..b5f6bc74c 100644
--- a/inference-engine/cmake/download.cmake
+++ b/inference-engine/cmake/download.cmake
@@ -1,10 +1,8 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required (VERSION 2.8)
-
 function (Download from to fatal result output)
 
   if((NOT EXISTS "${to}"))
diff --git a/inference-engine/cmake/download_and_apply.cmake b/inference-engine/cmake/download_and_apply.cmake
index 4c75c6df7..d4869e4d8 100644
--- a/inference-engine/cmake/download_and_apply.cmake
+++ b/inference-engine/cmake/download_and_apply.cmake
@@ -1,10 +1,8 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required (VERSION 2.8)
-
 function (DownloadAndApply URL apply_to)
 
   if (EXISTS ${apply_to})
diff --git a/inference-engine/cmake/download_and_check.cmake b/inference-engine/cmake/download_and_check.cmake
index 6872fe2f0..5f4e49c1f 100644
--- a/inference-engine/cmake/download_and_check.cmake
+++ b/inference-engine/cmake/download_and_check.cmake
@@ -1,23 +1,22 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required (VERSION 2.8)
 include (FindWget)
 
 function (DownloadAndCheck from to fatal result)
-    set(status_res "ON")
-    set(output 1)
+  set(status_res "ON")
+  set(output 1)
 
-    get_filename_component(download_dir ${to} DIRECTORY)
-    if (NOT EXISTS ${download_dir})
-      file(MAKE_DIRECTORY ${download_dir})
-    endif()
+  get_filename_component(download_dir ${to} DIRECTORY)
+  if (NOT EXISTS ${download_dir})
+    file(MAKE_DIRECTORY ${download_dir})
+  endif()
 
-    if(NOT EXISTS "${to}")
+  if(NOT EXISTS "${to}")
+    if (${from} MATCHES "(http:)|(https:)|(ftp:)")
       message(STATUS "Downloading from ${from} to ${to} ...")
-
       find_program(aria2c "aria2c")
       if (${aria2c} STREQUAL "aria2c-NOTFOUND")
         if (NOT ${WGET_FOUND})
@@ -48,9 +47,13 @@ function (DownloadAndCheck from to fatal result)
             status_code: ${status_code}")
         endif()
       endif()
+    else()
+      message(STATUS "Copying from local folder ${from} to ${to} ... ")
+      file(COPY ${from} DESTINATION ${download_dir})
     endif()
+  endif()
 
   file(REMOVE ${to}.md5)
   set(${result} "${status_res}" PARENT_SCOPE)
 
-endfunction(DownloadAndCheck)
-\ No newline at end of file
+endfunction(DownloadAndCheck)
diff --git a/inference-engine/cmake/download_and_extract.cmake b/inference-engine/cmake/download_and_extract.cmake
index 513de811b..27af8f8dc 100644
--- a/inference-engine/cmake/download_and_extract.cmake
+++ b/inference-engine/cmake/download_and_extract.cmake
@@ -1,9 +1,8 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required (VERSION 2.8)
 include ("extract")
 include ("download_and_check")
 
@@ -120,12 +119,12 @@ function (DownloadOrExtractInternal URL archive_path unpacked_path folder fattal
     if (ENABLE_UNSAFE_LOCATIONS)
       ExtractWithVersion(${URL} ${archive_path} ${unpacked_path} ${folder} result)
       if(NOT ${result})
-        DownloadAndExtractInternal(${URL} ${archive_path} ${unpacked_path} ${folder} ${fattal} result)      
+        DownloadAndExtractInternal(${URL} ${archive_path} ${unpacked_path} ${folder} ${fattal} result)
       endif()
     else()
       debug_message("archive found on FS : ${archive_path}, however we cannot check it's checksum and think that it is invalid")
       file(REMOVE_RECURSE "${archive_path}")
-      DownloadAndExtractInternal(${URL} ${archive_path} ${unpacked_path} ${folder} ${fattal} result)      
+      DownloadAndExtractInternal(${URL} ${archive_path} ${unpacked_path} ${folder} ${fattal} result)
     endif()  
 
   
@@ -144,7 +143,11 @@ function (CheckOrDownloadAndExtract component RELATIVE_URL archive_name unpacked
   set (status "ON")
   set (on_master FALSE)
 
-  set (URL  "https://download.01.org/openvinotoolkit/2018_R5/dldt/inference_engine/${RELATIVE_URL}")
+  if(DEFINED ENV{IE_PATH_TO_DEPS})
+    set(URL "$ENV{IE_PATH_TO_DEPS}/${RELATIVE_URL}")
+  else()
+    set(URL "https://download.01.org/opencv/2019/openvinotoolkit/R1/inference_engine/${RELATIVE_URL}")
+  endif()
 
   #no message on recursive calls
   if (${use_alternatives})
diff --git a/inference-engine/cmake/extract.cmake b/inference-engine/cmake/extract.cmake
index 9b8d5a059..2aa6fd455 100644
--- a/inference-engine/cmake/extract.cmake
+++ b/inference-engine/cmake/extract.cmake
@@ -1,17 +1,15 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required (VERSION 2.8)
-
 function (extract archive_path unpacked_path folder result)
   # Slurped from a generated extract-TARGET.cmake file.
   if (NOT EXISTS ${unpacked_path})
     get_filename_component(unpacked_dir ${unpacked_path} DIRECTORY)
-    
+
     file(MAKE_DIRECTORY ${unpacked_path})
-    
+
     message(STATUS "extracting...
          src='${archive_path}'
          dst='${unpacked_path}'")
diff --git a/inference-engine/cmake/features.cmake b/inference-engine/cmake/features.cmake
index d9ff98b04..b6d22666c 100644
--- a/inference-engine/cmake/features.cmake
+++ b/inference-engine/cmake/features.cmake
@@ -1,11 +1,9 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required (VERSION 2.8)
-
-include ("options")
+include (options)
 
 #this options are aimed to optimize build time on development system
 
@@ -21,8 +19,6 @@ ie_option (ENABLE_PROFILING_ITT "ITT tracing of IE and plugins internals" ON)
 
 ie_option (ENABLE_PROFILING_RAW "Raw counters profiling (just values, no start/stop time or timeline)" OFF)
 
-#
-
 # "MKL-DNN library might use MKL-ML or OpenBLAS for gemm tasks: MKL|OPENBLAS|JIT"
 if (NOT GEMM STREQUAL "MKL"
         AND NOT GEMM STREQUAL "OPENBLAS"
@@ -30,15 +26,17 @@ if (NOT GEMM STREQUAL "MKL"
     set (GEMM "JIT")
     message(STATUS "GEMM should be set to MKL, OPENBLAS or JIT. Default option is " ${GEMM})
 endif()
+set(GEMM "${GEMM}" CACHE STRING "Gemm implementation" FORCE)
 list (APPEND IE_OPTIONS GEMM)
 
 # "MKL-DNN library based on OMP or TBB or Sequential implementation: TBB|OMP|SEQ"
 if (NOT THREADING STREQUAL "TBB"
         AND NOT THREADING STREQUAL "OMP"
         AND NOT THREADING STREQUAL "SEQ")
-    set (THREADING "OMP")
+    set (THREADING "TBB")
     message(STATUS "THREADING should be set to TBB, OMP or SEQ. Default option is " ${THREADING})
 endif()
+set(THREADING "${THREADING}" CACHE STRING "Threading" FORCE)
 list (APPEND IE_OPTIONS THREADING)
 
 # Enable postfixes for Debug/Release builds
@@ -53,7 +51,9 @@ else()
     set (IE_DEBUG_POSTFIX ${IE_DEBUG_POSTFIX_LIN})
     set (IE_RELEASE_POSTFIX ${IE_RELEASE_POSTFIX_LIN})
 endif()
+set(IE_DEBUG_POSTFIX "${IE_DEBUG_POSTFIX}" CACHE STRING "Debug postfix" FORCE)
 list (APPEND IE_OPTIONS IE_DEBUG_POSTFIX)
+set(IE_RELEASE_POSTFIX "${IE_RELEASE_POSTFIX}" CACHE STRING "Release postfix" FORCE)
 list (APPEND IE_OPTIONS IE_RELEASE_POSTFIX)
 
 ie_option (ENABLE_TESTS "unit and functional tests" OFF)
@@ -62,6 +62,7 @@ ie_option (ENABLE_GAPI_TESTS "unit tests for GAPI kernels" OFF)
 
 ie_option (GAPI_TEST_PERF "if GAPI unit tests should examine performance" OFF)
 
+ie_option (ENABLE_SAMPLES "console samples are part of inference engine package" ON)
 
 ie_option (ENABLE_SAMPLES_CORE "console samples core library" ON)
 
@@ -93,6 +94,9 @@ ie_option (ENABLE_DEBUG_SYMBOLS "generates symbols for debugging" OFF)
 
 ie_option (ENABLE_PYTHON "enables ie python bridge build" OFF)
 
+ie_option(ENABLE_CPPLINT "Enable cpplint checks during the build" OFF)
+ie_option(ENABLE_CPPLINT_REPORT "Build cpplint report instead of failing the build" OFF)
+
 #environment variables used
 
 #name of environment variable stored path to temp directory"
diff --git a/inference-engine/cmake/ie_parallel.cmake b/inference-engine/cmake/ie_parallel.cmake
index 7c183b58b..18ccdf086 100644
--- a/inference-engine/cmake/ie_parallel.cmake
+++ b/inference-engine/cmake/ie_parallel.cmake
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -94,7 +94,13 @@ function(set_ie_threading_interface_for TARGET_NAME)
                 endif ()
             endif ()
         endif ()
+
     endif ()
 
     target_compile_definitions(${TARGET_NAME} PUBLIC -DIE_THREAD=${IE_THREAD_DEFINE})
+
+    if (NOT THREADING STREQUAL "SEQ")
+        find_package(Threads REQUIRED)
+        target_link_libraries(${TARGET_NAME} PUBLIC ${CMAKE_THREAD_LIBS_INIT})
+    endif()
 endfunction(set_ie_threading_interface_for)
diff --git a/inference-engine/cmake/itt.cmake b/inference-engine/cmake/itt.cmake
index add28119f..3ed2394c4 100644
--- a/inference-engine/cmake/itt.cmake
+++ b/inference-engine/cmake/itt.cmake
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
diff --git a/inference-engine/cmake/linux_name.cmake b/inference-engine/cmake/linux_name.cmake
index 0dd8dd520..8b07919bc 100644
--- a/inference-engine/cmake/linux_name.cmake
+++ b/inference-engine/cmake/linux_name.cmake
@@ -1,10 +1,8 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 if (UNIX)
     function(get_linux_name res_var)
         if (NOT EXISTS "/etc/lsb-release")
diff --git a/inference-engine/cmake/mode.cmake b/inference-engine/cmake/mode.cmake
index 6ecdfaa6d..3e55471d8 100644
--- a/inference-engine/cmake/mode.cmake
+++ b/inference-engine/cmake/mode.cmake
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
diff --git a/inference-engine/cmake/options.cmake b/inference-engine/cmake/options.cmake
index 1f44f8758..3cc68d654 100644
--- a/inference-engine/cmake/options.cmake
+++ b/inference-engine/cmake/options.cmake
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
diff --git a/inference-engine/cmake/os_flags.cmake b/inference-engine/cmake/os_flags.cmake
index cb7c6b1ab..29608ea83 100644
--- a/inference-engine/cmake/os_flags.cmake
+++ b/inference-engine/cmake/os_flags.cmake
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -8,10 +8,13 @@ if (WIN32)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") #no asynchronous structured exception handling
     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
-    
+
+    set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Z7")
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Z7")
+
     if(ENABLE_DEBUG_SYMBOLS)
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi")
-        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Zi")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Z7")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Z7")
 
         set(DEBUG_SYMBOLS_LINKER_FLAGS "/DEBUG")
         if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
diff --git a/inference-engine/cmake/sanitizer.cmake b/inference-engine/cmake/sanitizer.cmake
index cdbe108b7..23814e7ad 100644
--- a/inference-engine/cmake/sanitizer.cmake
+++ b/inference-engine/cmake/sanitizer.cmake
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -11,7 +11,11 @@ if (ENABLE_SANITIZER)
     if (SANITIZE_RECOVER_SUPPORTED)
         set(SANITIZER_COMPILER_FLAGS "${SANITIZER_COMPILER_FLAGS} -fsanitize-recover=address")
     endif()
-    set(SANITIZER_LINKER_FLAGS "-fsanitize=address -fuse-ld=gold")
+
+    set(SANITIZER_LINKER_FLAGS "-fsanitize=address")
+    if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+        set(SANITIZER_LINKER_FLAGS "${SANITIZER_LINKER_FLAGS} -fuse-ld=gold")
+    endif()
 
     set(CMAKE_CC_FLAGS "${CMAKE_CC_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SANITIZER_COMPILER_FLAGS}")
diff --git a/inference-engine/cmake/sdl.cmake b/inference-engine/cmake/sdl.cmake
index 26618c6e5..e6229a7b2 100644
--- a/inference-engine/cmake/sdl.cmake
+++ b/inference-engine/cmake/sdl.cmake
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
diff --git a/inference-engine/cmake/share/InferenceEngineConfig-version.cmake.in b/inference-engine/cmake/share/InferenceEngineConfig-version.cmake.in
index 506fc5403..bc4c3a997 100644
--- a/inference-engine/cmake/share/InferenceEngineConfig-version.cmake.in
+++ b/inference-engine/cmake/share/InferenceEngineConfig-version.cmake.in
@@ -1,9 +1,9 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-set(InferenceEngine_VERSION 1.5.0)
+set(InferenceEngine_VERSION 1.6.0)
 set(PACKAGE_VERSION ${InferenceEngine_VERSION})
 
 set(PACKAGE_VERSION_EXACT False)
diff --git a/inference-engine/cmake/share/InferenceEngineConfig.cmake.in b/inference-engine/cmake/share/InferenceEngineConfig.cmake.in
index 8f806e9c8..860870b8c 100644
--- a/inference-engine/cmake/share/InferenceEngineConfig.cmake.in
+++ b/inference-engine/cmake/share/InferenceEngineConfig.cmake.in
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -54,72 +54,27 @@ else()
     set(THREADING "@THREADING@")
 
     # check whether setvars.sh is sourced
-    if(NOT IE_ROOT_DIR AND (DEFINED ENV{InferenceEngine_DIR} OR InferenceEngine_DIR OR DEFINED ENV{INTEL_CVSDK_DIR}))
+    if(NOT IE_ROOT_DIR AND (DEFINED ENV{InferenceEngine_DIR} OR InferenceEngine_DIR OR DEFINED ENV{INTEL_OPENVINO_DIR}))
         if (EXISTS "${InferenceEngine_DIR}")
             # InferenceEngine_DIR manually set via command line params
             set(IE_ROOT_DIR "${InferenceEngine_DIR}/..")
         elseif (EXISTS "$ENV{InferenceEngine_DIR}")
             # InferenceEngine_DIR manually set via env
             set(IE_ROOT_DIR "$ENV{InferenceEngine_DIR}/..")
-        elseif (EXISTS "$ENV{INTEL_CVSDK_DIR}/inference_engine")
+        elseif (EXISTS "$ENV{INTEL_OPENVINO_DIR}/inference_engine")
             # if we installed DL SDK
-            set(IE_ROOT_DIR "$ENV{INTEL_CVSDK_DIR}/inference_engine")
-        elseif (EXISTS "$ENV{INTEL_CVSDK_DIR}/deployment_tools/inference_engine")
+            set(IE_ROOT_DIR "$ENV{INTEL_OPENVINO_DIR}/inference_engine")
+        elseif (EXISTS "$ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine")
             # CV SDK is installed
-            set(IE_ROOT_DIR "$ENV{INTEL_CVSDK_DIR}/deployment_tools/inference_engine")
+            set(IE_ROOT_DIR "$ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine")
         endif()
     endif()
 
-    if(IE_ROOT_DIR)
-        if (WIN32)
-            set(_OS_PATH "")
-        else()
-           if (NOT EXISTS "/etc/lsb-release")
-                execute_process(COMMAND find -L /etc/ -maxdepth 1 -type f -name *-release -exec cat {} \;
-                            OUTPUT_VARIABLE release_data RESULT_VARIABLE result)
-                set(name_regex "NAME=\"([^ \"\n]*).*\"\n")
-                set(version_regex "VERSION=\"([0-9]+(\\.[0-9]+)?)[^\n]*\"")
-            else()
-                #linux version detection using cat /etc/lsb-release
-                file(READ "/etc/lsb-release" release_data)
-                set(name_regex "DISTRIB_ID=([^ \n]*)\n")
-                set(version_regex "DISTRIB_RELEASE=([0-9]+(\\.[0-9]+)?)")
-            endif()
-
-            string(REGEX MATCH ${name_regex} name ${release_data})
-            set(os_name ${CMAKE_MATCH_1})
-
-            string(REGEX MATCH ${version_regex} version ${release_data})
-            set(os_name "${os_name} ${CMAKE_MATCH_1}")
-
-            if (NOT os_name)
-                ext_message(FATAL_ERROR "Cannot detect OS via reading /etc/*-release:\n ${release_data}")
-            endif()
-
-            if (NOT InferenceEngine_FIND_QUIETLY)
-                message (STATUS "/etc/*-release distrib: ${os_name}")
-            endif()
-
-            if (${os_name} STREQUAL "Ubuntu 14.04")
-                set(_OS_PATH "ubuntu_14.04/")
-            elseif (${os_name} STREQUAL "Ubuntu 16.04")
-                set(_OS_PATH "ubuntu_16.04/")
-            elseif (${os_name} STREQUAL "Ubuntu 18.04")
-                set(_OS_PATH "ubuntu_18.04/")
-            elseif (${os_name} STREQUAL "CentOS 7")
-                set(_OS_PATH "centos_7.4/")
-            elseif (${os_name} STREQUAL "poky 2.0")
-                set(_OS_PATH "ubuntu_16.04/")
-            elseif (${os_name} STREQUAL "poky 2.5")
-                set(_OS_PATH "ubuntu_18.04/")
-            elseif (${os_name} STREQUAL "Raspbian 9")
-                set(_OS_PATH "raspbian_9/")
-            else()
-                ext_message(FATAL_ERROR "${os_name} is not supported. List of supported OS: Ubuntu 14.04, Ubuntu 16.04, Ubuntu 18.04, CentOS 7, poky 2.0, poky 2.5, Raspbian 9")
-            endif()
-        endif()
+    if(NOT IE_ROOT_DIR)  
+        ext_message(FATAL_ERROR "inference_engine directory is not found")
     endif()
 
+
     if(IE_INCLUDE_DIR AND NOT "${IE_ROOT_DIR}/include" EQUAL "${IE_INCLUDE_DIR}")
         unset(IE_INCLUDE_DIR CACHE)
     endif()
@@ -128,13 +83,13 @@ else()
         unset(IE_SRC_DIR CACHE)
     endif()
 
-    if(IE_LIBRARY AND NOT "${IE_ROOT_DIR}/lib/${_OS_PATH}/${_ARCH}" EQUAL "${IE_LIBRARY}")
+    if(IE_LIBRARY AND NOT "${IE_ROOT_DIR}/lib/${_ARCH}" EQUAL "${IE_LIBRARY}")
         unset(IE_LIBRARY CACHE)
     endif()
 
     set(_IE_ROOT_INCLUDE_DIR "${IE_ROOT_DIR}/include")
     set(_IE_ROOT_SRC_DIR "${IE_ROOT_DIR}/src")
-    set(_IE_ROOT_LIBRARY "${IE_ROOT_DIR}/lib/${_OS_PATH}/${_ARCH}")
+    set(_IE_ROOT_LIBRARY "${IE_ROOT_DIR}/lib/${_ARCH}")
 
     find_path(IE_INCLUDE_DIR inference_engine.hpp "${_IE_ROOT_INCLUDE_DIR}")
     find_path(IE_SRC_DIR extension "${_IE_ROOT_SRC_DIR}")
diff --git a/inference-engine/cmake/version.cmake b/inference-engine/cmake/version.cmake
index 645c25758..daf21cd5c 100644
--- a/inference-engine/cmake/version.cmake
+++ b/inference-engine/cmake/version.cmake
@@ -1,10 +1,8 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 function (branchName VAR)
     execute_process(
             COMMAND git rev-parse --abbrev-ref HEAD
diff --git a/inference-engine/ie_bridges/python/CMakeLists.txt b/inference-engine/ie_bridges/python/CMakeLists.txt
index 2ce462bd6..6176cccd0 100644
--- a/inference-engine/ie_bridges/python/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/CMakeLists.txt
@@ -26,6 +26,11 @@ if (NOT(IE_MAIN_SOURCE_DIR))
     if(NOT(WIN32))
         set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_BUILD_TYPE})
     endif()
+else()
+    if (UNIX OR APPLE)
+        # cython generated files requires public visibility. Force visibility required.
+        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fvisibility=default")
+    endif()
 endif()
 
 include (UseCython)
@@ -45,5 +50,4 @@ endif()
 find_package (InferenceEngine REQUIRED)
 
 set (PYTHON_BRIDGE_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
-add_subdirectory (src/openvino/inference_engine)
-add_subdirectory (src/openvino/inference_engine/dnn_builder)
-\ No newline at end of file
+add_subdirectory (src/openvino/inference_engine)
+\ No newline at end of file
diff --git a/inference-engine/ie_bridges/python/cmake/FindCython.cmake b/inference-engine/ie_bridges/python/cmake/FindCython.cmake
index 3070950fd..baadc4d20 100644
--- a/inference-engine/ie_bridges/python/cmake/FindCython.cmake
+++ b/inference-engine/ie_bridges/python/cmake/FindCython.cmake
@@ -1,4 +1,4 @@
-# Copyright (c) 2016 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/inference-engine/ie_bridges/python/cmake/UseCython.cmake b/inference-engine/ie_bridges/python/cmake/UseCython.cmake
index 1b9a0a2b6..373621b64 100644
--- a/inference-engine/ie_bridges/python/cmake/UseCython.cmake
+++ b/inference-engine/ie_bridges/python/cmake/UseCython.cmake
@@ -46,7 +46,7 @@
 #
 # See also FindCython.cmake
 
-# Copyright (c) 2016 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/inference-engine/ie_bridges/python/docs/api_overview.md b/inference-engine/ie_bridges/python/docs/api_overview.md
index 3a182ecf1..8365cc84c 100644
--- a/inference-engine/ie_bridges/python/docs/api_overview.md
+++ b/inference-engine/ie_bridges/python/docs/api_overview.md
@@ -1,7 +1,7 @@
 # Overview of Inference Engine Python* API
 
-**NOTE:** It is a preview version of the Inference Engine Python\* API for evaluation purpose only. 
-Module structure and API itself may be changed in future releases.  
+> **NOTE:** It is a preview version of the Inference Engine Python\* API for evaluation purpose only.
+> Module structure and API itself may be changed in future releases.  
 
 This API provides a simplified interface for Inference Engine functionality that allows to:
 
@@ -21,24 +21,24 @@ Supported Python* versions:
 ## Setting Up the Environment
 
 To configure the environment for the Inference Engine Python\* API, run:
- * On Ubuntu 16.04: `source <INSTALL_DIR>/bin/setupvars.sh .` 
+ * On Ubuntu 16.04: `source <INSTALL_DIR>/bin/setupvars.sh .`
  * On Windows 10: `call <INSTALL_DIR>\deployment_tools\inference_engine\python_api\setenv.bat`
- 
+
 The script automatically detects latest installed Python\* version and configures required environment if the version is supported.  
 If you want to use certain version of Python\*, set the environment variable `PYTHONPATH=<INSTALL_DIR>/deployment_tools/inference_engine/python_api/<desired_python_version>`
 after running the environment configuration script.
-   
+
 ## <a name="ienetlayer-class"></a>IENetLayer
-This class stores main information about the layer and allow to modify some layer parameters 
+This class stores main information about the layer and allow to modify some layer parameters
 ### Class attributes:
- 
-* `name` - Name of the layer 
+
+* `name` - Name of the layer
 * `type`- Layer type
 * `precision` - Layer base operating precision. Provides getter and setter interfaces.
 * `layout` - Returns the layout of shape of the layer.
 * `shape` -  Return the list of the shape of the layer.
 * `parents` - Returns a list, which contains names of layers preceding this layer.
-* `children` - Returns a list, which contains names of layers following this layer. 
+* `children` - Returns a list, which contains names of layers following this layer.
 * `affinity` - Layer affinity set by user or a default affinity set by the `IEPlugin.set_initial_affinity()` method.             
                The affinity attribute provides getter and setter interfaces, so the layer affinity can be modified directly.
                For example:                          
@@ -46,39 +46,39 @@ This class stores main information about the layer and allow to modify some laye
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> plugin = IEPlugin(device="HETERO:FPGA,CPU")
 >>> plugin.set_config({"TARGET_FALLBACK": "HETERO:FPGA,CPU"})
->>> plugin.set_initial_affinity(net) 
+>>> plugin.set_initial_affinity(net)
 >>> for l in net.layers.values():
 ...     if l.type == "Convolution":
 ...         l.affinity = "CPU"
 
 ```
-		
-To correctly set affinity for the network, you must first initialize and properly configure the HETERO plugin. 
-`set_config({"TARGET_FALLBACK": "HETERO:FPGA,GPU"})` function configures the plugin fallback devices and their order. 
-`plugin.set_initial_affinity(net)` function sets affinity parameter of model layers according to its support 
-on specified devices. 
 
-After default affinity is set by the plugin, override the default values by setting affinity manually how it's 
+To correctly set affinity for the network, you must first initialize and properly configure the HETERO plugin.
+`set_config({"TARGET_FALLBACK": "HETERO:FPGA,GPU"})` function configures the plugin fallback devices and their order.
+`plugin.set_initial_affinity(net)` function sets affinity parameter of model layers according to its support
+on specified devices.
+
+After default affinity is set by the plugin, override the default values by setting affinity manually how it's
 described in example above  
 
-To understand how default and non-default affinities are set: 
+To understand how default and non-default affinities are set:
 
 1. Call `net.layers` function right after model loading and check that layer affinity parameter is empty.
 2. Call `plugin.set_default_affinity(net)`.
 3. Call `net.layers` and check layer affinity parameters to see how plugin set a default affinity
 4. Set layer affinity how it's described above
-5. Call `net.layers` again and check layer affinity parameters to see how it was changed after manual affinity 
+5. Call `net.layers` again and check layer affinity parameters to see how it was changed after manual affinity
    setting
-          
+
 Please refer to `affinity_setting_demo.py` to see the full usage pipeline.
-    
+
 * `weights`- Dictionary with layer weights, biases or custom blobs if any
 * `params` - Layer specific parameters. Provides getter and setter interfaces to get and modify layer parameters.
-             Please note that some modifications can be ignored and\or overwriten by target plugin (e.g. modification of 
+             Please note that some modifications can be ignored and\or overwriten by target plugin (e.g. modification of
              convolution kernel size will be reflected in layer parameters but finally the plugin will ignore it and will
-             use initial kernel size) 
+             use initial kernel size)
 
-## <a name="ienetwork-class"></a>IENetwork 
+## <a name="ienetwork-class"></a>IENetwork
 
 This class contains the information about the network model read from IR and allows you to manipulate with some model parameters such as
 layers affinity and output layers.
@@ -86,18 +86,15 @@ layers affinity and output layers.
 ### Class Constructor
 
 * `__init__(model: str, weights: str)`
-
     * Parameters:
-        
         * model - Path to `.xml` file  of the IR
         * weights - Path to `.bin` file  of the IR
 
 ### Class attributes:
 
 * `name` - Name of the loaded network
-* `inputs` - A dictionary that maps input layer names to <a name="inputinfo-class"></a>InputInfo objects. 
+* `inputs` - A dictionary that maps input layer names to <a name="inputinfo-class"></a>InputInfo objects.
              For example, to get a shape of the input layer:
-
 ```py
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> net.inputs
@@ -105,10 +102,8 @@ layers affinity and output layers.
 >>> net.inputs['data'].shape
 [1, 3, 224, 224]
 ```
-
 * `outputs` - A dictionary that maps output layer names to <a name="inputinfo-class"></a>OutputInfo objects
               For example, to get a shape of the output layer:
-    
 ```py
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> net.inputs
@@ -116,10 +111,9 @@ layers affinity and output layers.
 >>> net.outputs['prob'].shape
 [1, 1000]
 ```
-    
-* `batch_size` - Batch size of the network. Provides getter and setter interfaces to get and modify the 
+
+* `batch_size` - Batch size of the network. Provides getter and setter interfaces to get and modify the
                  network batch size. For example:
-    
 ```py
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> net.batch_size
@@ -130,10 +124,8 @@ layers affinity and output layers.
 >>> net.inputs['data'].shape
     [4, 3, 224, 224]
 ```
-    
-* `layers` - Return dictionary that maps network layer names to <a name="ienetlayer-class"></a>`IENetLayer` 
+* `layers` - Return dictionary that maps network layer names to <a name="ienetlayer-class"></a>`IENetLayer`
              objects containing layer properties in topological order. For example, to list all network layers:
-             
 ```py
  >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
  >>> net.layers
@@ -141,11 +133,10 @@ layers affinity and output layers.
  ...
  }
  ```
- 
- * `stats` - Returns `LayersStatsMap` object containing dictionary that maps network layer names to calibration statistics 
+ * `stats` - Returns `LayersStatsMap` object containing dictionary that maps network layer names to calibration statistics
             represented by <a name="layerstats-class"></a> `LayerStats` objects.
             `LayersStatsMap` class inherited from built-in python `dict` and overrides default `update()`method to allow
-            to set or modify layers calibration statistics. 
+            to set or modify layers calibration statistics.
 ```py
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> net.stats.update({
@@ -153,151 +144,104 @@ layers affinity and output layers.
         "conv2_2d" : LayserStats(min=(-5, -1, 0, 1, -7, 2), max=(63, 124, 70, 174, 99, 106)),
     })
 ```
-For more details about low precision inference please refer to "Low-Precision 8-bit Integer Inference" 
-section in Inference Engine Developers Guide documentation. 
+For more details about low precision inference please refer to "Low-Precision 8-bit Integer Inference"
+section in Inference Engine Developers Guide documentation.
 
-             
 ### Class Methods
 
-* `from_ir(model: str, weights: str)` 
-
-**Note:** The function is deprecated. Please use `IENetwork()` class constructor to create valid instance of `IENetwork`
-
-    * Description:
-            
+* `from_ir(model: str, weights: str)`
+> **NOTE:** The function is deprecated. Please use `IENetwork()` class constructor to create valid instance of `IENetwork`
+    * Description:   
         The class method serves to read the model from the `.xml` and `.bin` files of the IR.
-		
     * Parameters:
-        
         * model - Path to `.xml` file  of the IR
         * weights - Path to `.bin` file  of the IR
-    
     * Return value:
-            
         An instance of the `IENetwork` class
-            
     * Usage example:
-    
 ```py
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> net
 <inference_engine.ie_api.IENetwork object at 0x7fd7dbce54b0>
 ```
-            
+
 ### Instance Methods
-	 
-* `add_outputs(outputs)`:
 
-    * Description:
-                
-        The method serves to mark any intermediate layer as output layer to retrieve the inference results 
+* `add_outputs(outputs)`:
+    * Description:        
+        The method serves to mark any intermediate layer as output layer to retrieve the inference results
         from the specified layers.
-		
     * Parameters:
-            
         * `outputs` - List of layer names to be set as model outputs. In case of setting one layer as output, string with one layer can be provided.
-            
     * Return value:
-    
         None
-            
     * Usage example:
-    
 ```py
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> net.add_outputs(["conv5_1/dwise', conv2_1/expand'])]
 >>> net.outputs
 ['prob', 'conv5_1/dwise', 'conv2_1/expand']
 ```  
-    
-**Note**
-
-The last layers (nodes without successors in graph representation of the model) are set as output 
-by default. In the case above, `prob` layer is a default output and `conv5_1/dwise`, `conv2_1/expand` are user-defined
-outputs.
+> **NOTE**: The last layers (nodes without successors in graph representation of the model) are set as output
+> by default. In the case above, `prob` layer is a default output and `conv5_1/dwise`, `conv2_1/expand` are user-defined
+> outputs.
 
 * `reshape(input_shapes: dict)`:
-    
-    * Description: 
-        
+    * Description:
         The method reshapes the network to change spatial dimensions, batch size, or any dimension.
-        
-        **Note:**
-        
-        Before using this method, make sure that the target shape is applicable for the network
-        Changing the network shape to an arbitrary value may lead to unpredictable behaviour. 
-        
+> **Note:** Before using this method, make sure that the target shape is applicable for the network. Changing the network shape to an arbitrary value may lead to unpredictable behaviour.
     * Parameters:
-    
         * `input_shapes` - The dictionary that maps input layer names to tuples with the target shape
-
-    * Return value:
-    
-        None
-            
+    * Return value:  
+        None           
     * Usage example:
-    
 ```py
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> input_layer = next(iter(net.inputs))
 >>> n, c, h, w = net.inputs[input_layer]
 >>> net.reshape({input_layer: (n, c, h*2, w*2)}]
-``` 
-
-* `serialize(path_to_xml, path_to_bin)`:
-    
-    * Description: 
-        
-        The method serializes the network and stores it in files. 
-        
-    * Parameters:
-    
-        * `path_to_xml` - path to a file, where a serialized model will be stored. 
+```
+* `serialize(path_to_xml, path_to_bin)`:    
+    * Description:         
+        The method serializes the network and stores it in files.        
+    * Parameters:  
+        * `path_to_xml` - path to a file, where a serialized model will be stored.
         * `path_to_bin` - path to a file, where serialized weights will be stored.
-
     * Return value:
-    
-        None
-            
+        None     
     * Usage example:
-    
 ```py
 >>> net = IENetwork(model=path_to_model, weights=path_to_weights)
 >>> net.serialize(path_to_xml, path_to_bin)
-``` 
+```
+
 ## <a name="layerstats-class"></a>LayerStats
-Layer calibration statistic container
+
+Layer calibration statistic container.
+
 ### Class Constructor
 
 * `__init__(min: tuple = (), max: tuple = ())`
-
     * Parameters:
-        
-        * min - Tuple with per-channel minimum layer activation values 
+        * min - Tuple with per-channel minimum layer activation values
         * max - Tuple with per-channel maximum layer activation values
 
-## <a name="inputinfo-class"></a>InputInfo 
+## <a name="inputinfo-class"></a>InputInfo
 
 This class contains the information about the network input layers
 
 ### Class attributes:
 
-* `precision` - Precision of the input data provided by user. Provides setter and getter interfaces 
+* `precision` - Precision of the input data provided by user. Provides setter and getter interfaces
                 to get and modify input layer precision.
-                
     List of applicable precisions: FP32 FP16, I32, I16, I8, U32, U16
-    
-    **Note**:  Support of any calculation precision depends on the target plugin                 
-
+> **NOTE**:  Support of any calculation precision depends on the target plugin.            
 * `layout` - Layout of the input data provided by user. Provides setter and getter interfaces  
-             to get and modify input layer layout. 
-             
+             to get and modify input layer layout.
     List of applicable layouts: NCHW, NHWC, OIHW, C, CHW, HW, NC, CN, BLOCKED
-
 * `shape` - input layer data shape
 
-
-## <a name="outputinfo-class"></a>OutputInfo 
+## <a name="outputinfo-class"></a>OutputInfo
 
 This class contains the information about the network input layers
 
@@ -305,52 +249,40 @@ This class contains the information about the network input layers
 
 * `precision` - Precision of the output data. Provides setter and getter interfaces  
                 to get and modify output layer precision.          
-
 * `layout` - Layout of the output data provided by user
-
 * `shape` - Input layer data shape
- 
+
 ## <a name="ieplugin-class"></a>IEPlugin Class
 
 This class is the main plugin interface and serves to initialize and configure the plugin.
- 
+
 ### Class Constructor
 
 * `__init__(device: str, plugin_dirs=None)`
-
     * Parameters:
-    
         * `device` - Target device name. Supported devices: CPU, GPU, FPGA, MYRIAD, HETERO
-        * `plugin_dirs` - List of paths to plugin directories 
-        
+        * `plugin_dirs` - List of paths to plugin directories
+
 ### Properties
 
 * `device` - a name of the device that was specified to initialize IEPlugin
-* `version` -  a version of the plugin 
+* `version` -  a version of the plugin
 
 ### Instance Methods
 
 *  ```load(network: IENetwork, num_requests: int=1, config=None)```
-
-    * Description:
-        
-        Loads a network that was read from the IR to the plugin and creates an executable network from a network object. 
-        You can create as many networks as you need and use them simultaneously (up to the limitation of the hardware 
+    * Description:   
+        Loads a network that was read from the IR to the plugin and creates an executable network from a network object.
+        You can create as many networks as you need and use them simultaneously (up to the limitation of the hardware
         resources).
-    
     * Parameters:
-	
         * `network` - A valid `IENetwork` instance
-        * `num_requests` - A positive integer value of infer requests to be created. Number of infer requests may be limited 
+        * `num_requests` - A positive integer value of infer requests to be created. Number of infer requests may be limited
         by device capabilities.        
         * `config` - A dictionary of plugin configuration keys and their values
-        
-    * Return value:
-        
+    * Return value:  
         None
-    
     * Usage example:
-    
 ```py
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> plugin = IEPlugin(device="CPU")
@@ -358,89 +290,52 @@ This class is the main plugin interface and serves to initialize and configure t
 >>> exec_net
 <inference_engine.ie_api.ExecutableNetwork object at 0x7f5140bbcd38>
 ```
-		
 * `set_initial_affinity(net: IENetwork)`
-    
     * Description:
-        
-        Sets initial affinity for model layers according to the HETERO plugin logic. Applicable only if 
+        Sets initial affinity for model layers according to the HETERO plugin logic. Applicable only if
         IEPlugin was initialized for HETERO device.
-        
     * Parameters:
-	
-        * `net` - A valid instance of IENetwork 
-    
-    * Return value:
-        
-        None
-        
-    * Usage example: 
-	
-		See `affinity` attribute of the `IENetLayer` class.
-    
+        * `net` - A valid instance of IENetwork
+    * Return value:  
+        None   
+    * Usage example:
+	See `affinity` attribute of the `IENetLayer` class.
 * `add_cpu_extension(extension_path: str)`
-
     * Description:
-        
-        Loads extensions library to the plugin. Applicable only for CPU device and HETERO device with CPU
-        
+        Loads extensions library to the plugin. Applicable only for CPU device and HETERO device with CPU  
     * Parameters:
-    
-        * `extension_path` - A full path to CPU extensions library   
-        
+        * `extension_path` - A full path to CPU extensions library    
      * Return value:
-        
         None
-        
     * Usage example:
-    
 ```py
 >>> plugin = IEPlugin(device="CPU")
 >>> plugin.add_cpu_extenstions(ext_lib_path)
-```
-    
-    
+```    
 * `set_config(config: dict)`
-
-    * Description: 
-     
-        Sets a configuration for the plugin. Refer to `SetConfig()` in Inference Engine C++ documentation for acceptable 
+    * Description:
+        Sets a configuration for the plugin. Refer to `SetConfig()` in Inference Engine C++ documentation for acceptable
         keys and values list.
-        
-    * Parameters: 
-        
+    * Parameters:
         * `config` - A dictionary of keys and values of acceptable configuration parameters
-        
     * Return value:
-        
         None
-    
-    * Usage examples: 
-		
-		See `set_affinity` method of the `IENetwork` class.
-
+    * Usage examples:
+	See `set_affinity` method of the `IENetwork` class.
 * `get_supported_layers(net: IENetwork)`
-    
     * Description:
-        
-        Returns the set of layers supported by the plugin. Please note that in case of CPU plugin support of 
-        a layer may depends on extension loaded by `add_cpu_extenstion()` method 
-        
+        Returns the set of layers supported by the plugin. Please note that in case of CPU plugin support of
+        a layer may depends on extension loaded by `add_cpu_extenstion()` method
     * Parameters:
-	
-        * `net` - A valid instance of IENetwork 
-    
+        * `net` - A valid instance of IENetwork
     * Return value:
-        
         Set of layers supported by the plugin
-        
-    * Usage example: 
-	
-		See `affinity` attribute of the `IENetLayer` class.
-   
+    * Usage example:
+	See `affinity` attribute of the `IENetLayer` class.
+
 ## <a name="executablenetwork"></a>ExecutableNetwork Class
 
-This class represents a network instance loaded to plugin and ready for inference. 
+This class represents a network instance loaded to plugin and ready for inference.
 
 ### Class Constructor
 
@@ -449,37 +344,28 @@ There is no explicit class constructor. To make a valid instance of `ExecutableN
 ### Class attributes
 
 * `requests` - A tuple of InferRequest instances
-
-    * Usage example:
-        
+    * Usage example:    
 ```py
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> plugin = IEPlugin(device="CPU")
 >>> exec_net = plugin.load(network=net, num_requsts=3)
 >>> exec_net.requests
-(<inference_engine.ie_api.InferRequest object at 0x7f66f56c57e0>, 
-<inference_engine.ie_api.InferRequest object at 0x7f66f56c58b8>, 
+(<inference_engine.ie_api.InferRequest object at 0x7f66f56c57e0>,
+<inference_engine.ie_api.InferRequest object at 0x7f66f56c58b8>,
 <inference_engine.ie_api.InferRequest object at 0x7f66f56c5900>)
 ```
-		
+
 ### Instance Methods
 
 * `infer(inputs=None)`
-
     * Description:
-        
         Starts synchronous inference for the first infer request of the executable network and returns output data.
         Wraps `infer()` method of the `InferRequest` class
-    
     * Parameters:
         * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer
-        
     * Return value:
-        
         A dictionary that maps output layer names to `numpy.ndarray` objects with output data of the layer
-        
     * Usage example:
-    
 ```py
 >>> net = IENetwork(model=path_to_xml_file, weights=path_to_bin_file)
 >>> plugin = IEPlugin(device="CPU")
@@ -493,35 +379,26 @@ There is no explicit class constructor. To make a valid instance of `ExecutableN
                  ......
               ]])}
 ```
-For illustration of input data preparation, please see samples (for example, `classification_sample.py`).
-      
+	For illustration of input data preparation, please see samples (for example, `classification_sample.py`).
 * `start_async(request_id, inputs=None)`
-
     * Description:
-        
         Starts asynchronous inference for specified infer request.
         Wraps `async_infer()` method of the `InferRequest` class
-        
     * Parameters:
-	
         * `request_id` - Index of infer request to start inference
         * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer
-        
     * Return value:
-        
         A handler of specified infer request, which is an instance of the `InferRequest` class.
-        
     * Usage example:
-		
 ```py
 >>> infer_request_handle = exec_net.start_async(request_id=0, inputs={input_blob: image})
 >>> infer_status = infer_request_handle.wait()
 >>> res = infer_request_handle.outputs[out_blob]
 ```
-		
-For more details about infer requests processing, see `classification_sample_async.py` (simplified case) and 
+
+For more details about infer requests processing, see `classification_sample_async.py` (simplified case) and
 `object_detection_demo_ssd_async.py` (real asynchronous use case) samples.
-        
+
 ## <a name="inferrequest"></a>InferRequest Class
 
 This class provides an interface to infer requests of `ExecutableNetwork` and serves to handle infer requests execution
@@ -529,153 +406,107 @@ and to set and get output data.
 
 ### Class Constructor
 
-There is no explicit class constructor. To make a valid `InferRequest` instance, use `load()` method of the `IEPlugin` 
-class with specified number of requests to get `ExecutableNetwork` instance which stores infer requests. 
+There is no explicit class constructor. To make a valid `InferRequest` instance, use `load()` method of the `IEPlugin`
+class with specified number of requests to get `ExecutableNetwork` instance which stores infer requests.
 
 ### Class attributes
 
 * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer
 * `outputs` - A dictionary that maps output layer names to `numpy.ndarray` objects with output data of the layer
-
     * Usage example:
-
 ```py    
 >>> exec_net.requests[0].inputs['data'][:] = image
 >>> exec_net.requests[0].infer()
 >>> res = exec_net.requests[0].outputs['prob']
->>> np.flip(np.sort(np.squeeze(res)),0) 
+>>> np.flip(np.sort(np.squeeze(res)),0)
 array([4.85416055e-01, 1.70385033e-01, 1.21873841e-01, 1.18894853e-01,
        5.45198545e-02, 2.44456064e-02, 5.41366823e-03, 3.42589128e-03,
        2.26027006e-03, 2.12283316e-03 ...])
-``` 
-	
+```
+
 ### Instance Methods
 
-It is not recommended to run inference directly on `InferRequest` instance. 
-To run inference, please use simplified methods `infer()` and `start_async()` of `ExecutableNetwork`. 
+It is not recommended to run inference directly on `InferRequest` instance.
+To run inference, please use simplified methods `infer()` and `start_async()` of `ExecutableNetwork`.
 
 * `infer(inputs=None)`
-
-    * Description: 
-    
-         Starts synchronous inference of the infer request and fill outputs array
-		 
-     * Parameters:
-	 
-        * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer
-        
-    * Return value:
-        
-        None
-        
-    * Usage example:
-    
+    * Description:
+         Starts synchronous inference of the infer request and fill outputs array		 
+     * Parameters:	 
+        * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer        
+    * Return value:        
+        None        
+    * Usage example:    
 ```py
 >>> exec_net = plugin.load(network=net, num_requests=2)
 >>> exec_net.requests[0].infer({input_blob: image})
 >>> res = exec_net.requests[0].outputs['prob']
->>> np.flip(np.sort(np.squeeze(res)),0) 
+>>> np.flip(np.sort(np.squeeze(res)),0)
 array([4.85416055e-01, 1.70385033e-01, 1.21873841e-01, 1.18894853e-01,
        5.45198545e-02, 2.44456064e-02, 5.41366823e-03, 3.42589128e-03,
-       2.26027006e-03, 2.12283316e-03 ...]) 
-```       
-                   
+       2.26027006e-03, 2.12283316e-03 ...])
+```                          
 * `async_infer(inputs=None)`
-
-    * Description: 
-    
-        Starts asynchronous inference of the infer request and fill outputs array
-		
-     * Parameters:
-	 
-        * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer
-        
-    * Return value:
-        
-        None
-        
-    * Usage example:
-    
+    * Description:     
+        Starts asynchronous inference of the infer request and fill outputs array		
+     * Parameters:	 
+        * `inputs` - A dictionary that maps input layer names to `numpy.ndarray` objects of proper shape with input data for the layer        
+    * Return value:        
+        None        
+    * Usage example:    
 ```py
 >>> exec_net = plugin.load(network=net, num_requests=2)
 >>> exec_net.requests[0].async_infer({input_blob: image})
 >>> exec_net.requests[0].wait()
 >>> res = exec_net.requests[0].outputs['prob']
->>> np.flip(np.sort(np.squeeze(res)),0) 
+>>> np.flip(np.sort(np.squeeze(res)),0)
 array([4.85416055e-01, 1.70385033e-01, 1.21873841e-01, 1.18894853e-01,
        5.45198545e-02, 2.44456064e-02, 5.41366823e-03, 3.42589128e-03,
-       2.26027006e-03, 2.12283316e-03 ...]) 
-```
-			
+       2.26027006e-03, 2.12283316e-03 ...])
+```			
 * `wait(timeout=-1)`
-
-    * Description:
-        
-        Waits for the result to become available. Blocks until specified timeout elapses or the result 
-        becomes available, whichever comes first. 
-        
-        **Note:**
-        
-        There are special values of the timeout parameter:
-        
-        * 0 - Immediately returns the inference status. It does not block or interrupt execution. 
+    * Description:        
+        Waits for the result to become available. Blocks until specified timeout elapses or the result
+        becomes available, whichever comes first.      
+> **NOTE:** There are special values of the timeout parameter:
+        * 0 - Immediately returns the inference status. It does not block or interrupt execution.
         To find statuses meaning, please refer to InferenceEngine::StatusCode in Inference Engine C++ documentation
-        
         * -1 - Waits until inference result becomes available (default value)
-        
     * Parameters:
-	
-        * `timeout` - Time to wait in milliseconds or special (0, -1) cases described above. 
+        * `timeout` - Time to wait in milliseconds or special (0, -1) cases described above.
           If not specified, `timeout` value is set to -1 by default.
-      
-    * Usage example: 
-	
-		See `async_infer()` method of the the `InferRequest` class.
-		
-
+    * Usage example:
+	See `async_infer()` method of the the `InferRequest` class.
 * `get_perf_counts()`
-
     * Description:
-        
-        Queries performance measures per layer to get feedback of what is the most time consuming layer. . 
-        
-        **Note**:
-            
-        Performance counters data and format depends on the plugin
-        
+        Queries performance measures per layer to get feedback of what is the most time consuming layer.
+> **NOTE**: Performance counters data and format depends on the plugin
     * Parameters:
-	
         None
-      
-    * Usage example: 
-	
+    * Usage example:
 ```py
 >>> exec_net = plugin.load(network=net, num_requests=2)
 >>> exec_net.requests[0].infer({input_blob: image})
 >>> exec_net.requests[0].get_perf_counts()
-{'Conv2D': {'exec_type': 'jit_avx2_1x1', 
-            'real_time': 154, 
-            'cpu_time': 154, 
-            'status': 'EXECUTED', 
+{'Conv2D': {'exec_type': 'jit_avx2_1x1',
+            'real_time': 154,
+            'cpu_time': 154,
+            'status': 'EXECUTED',
             'layer_type': 'Convolution'},
- 'Relu6':  {'exec_type': 'undef', 
-            'real_time': 0, 
-            'cpu_time': 0, 
-            'status': 'NOT_RUN', 
+ 'Relu6':  {'exec_type': 'undef',
+            'real_time': 0,
+            'cpu_time': 0,
+            'status': 'NOT_RUN',
             'layer_type': 'Clamp'}
 ...
 }
 ```
-
 * `set_batch(size)`
     * Description:   
        Sets new batch size for certain infer request when dynamic batching is enabled in executable network that created this request.
-       
-       **Note:** Support of dynamic batch size depends on the target plugin.        
-        
+> **NOTE:** Support of dynamic batch size depends on the target plugin.          
     * Parameters:
         * `batch` - new batch size to be used by all the following inference calls for this request.
-        
     * Usage example:
 ```py
 >>> plugin.set_config({"DYN_BATCH_ENABLED": "YES"})
@@ -683,5 +514,3 @@ array([4.85416055e-01, 1.70385033e-01, 1.21873841e-01, 1.18894853e-01,
 >>> exec_net.requests[0].set_batch(inputs_count)
 ```
 Please refer to `dynamic_batch_demo.py` to see the full usage example.
-
-
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/README.md b/inference-engine/ie_bridges/python/sample/benchmark_app/README.md
index 7a9a52604..f4a1f5540 100644
--- a/inference-engine/ie_bridges/python/sample/benchmark_app/README.md
+++ b/inference-engine/ie_bridges/python/sample/benchmark_app/README.md
@@ -1,4 +1,4 @@
-# Benchmark Application Demo
+# Benchmark Application Python* Demo
 
 This topic demonstrates how to run the Benchmark Application demo, which performs inference using convolutional networks.
 
@@ -8,6 +8,7 @@ This topic demonstrates how to run the Benchmark Application demo, which perform
 
 Upon the start-up, the application reads command-line parameters and loads a network and images to the Inference Engine plugin. The number of infer requests and execution approach depend on a mode defined with the `-api` command-line parameter.
 
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
 
 ### Synchronous API
 For synchronous mode, the primary metric is latency. The application creates one infer request and executes the `Infer` method. A number of executions is defined by one of the two values:
@@ -30,37 +31,69 @@ The infer requests are executed asynchronously. `Wait` method is used to wait fo
 ## Running
 
 Running the application with the `-h` or `--help`' option yields the following usage message:
-```python3 benchmark_app.py -h
+```python3 benchmark_app.py -h```
 
-benchmark_app [OPTION]
-Options:
+The command yields the following usage message:
+```
+   usage: benchmark_app.py [-h] -i PATH_TO_IMAGES -m PATH_TO_MODEL
+                        [-c PATH_TO_CLDNN_CONFIG] [-l PATH_TO_EXTENSION]
+                        [-api {sync,async}] [-d TARGET_DEVICE]
+                        [-niter NUMBER_ITERATIONS]
+                        [-nireq NUMBER_INFER_REQUESTS]
+                        [-nthreads NUMBER_THREADS] [-b BATCH_SIZE]
+                        [-pin {YES,NO}]
 
-    -h, --help                                       Print a usage message
-    -i, --path_to_images "<path>"                    Required. Path to a folder with images or to image files.
-    -m, --path_to_model "<path>"                     Required. Path to an .xml file with a trained model.
-    -pp "<path>"                                     Path to a plugin folder.
-    -api, --api_type "<sync/async>"                  Required. Enable using sync/async API.
-    -d, --target_device "<device>"                   Specify a target device to infer on: CPU, GPU, FPGA or MYRIAD. Use "-d HETERO:<comma separated devices list>" format to specify HETERO plugin. The application looks for a suitable plugin for the specified device.
-    -niter, --number_iterations "<integer>"          Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
-    -nireq, --number_infer_requests "<integer>"      Optional. Number of infer requests (default value is 2).
-    -l, --path_to_extension "<absolute_path>"        Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
-          Or
-    -c, --path_to_cldnn_config "<absolute_path>"     Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
-    -b, --batch_size "<integer>"                     Optional. Batch size value. If not specified, the batch size value is determined from IR.
-    -nthreads, --number_threads "<integer>"          Number of threads to use for inference on the CPU (including Hetero cases).
-    -pin {YES,NO}, --infer_threads_pinning {YES,NO}  Optional. Enable ("YES" is default value) or disable ("NO")CPU threads pinning for CPU-involved inference.
+Options:
+  -h, --help            Show this help message and exit.
+  -i PATH_TO_IMAGES, --path_to_images PATH_TO_IMAGES
+                        Required. Path to a folder with images or to image
+                        files.
+  -m PATH_TO_MODEL, --path_to_model PATH_TO_MODEL
+                        Required. Path to an .xml file with a trained model.
+  -c PATH_TO_CLDNN_CONFIG, --path_to_cldnn_config PATH_TO_CLDNN_CONFIG
+                        Optional. Required for GPU custom kernels. Absolute
+                        path to an .xml file with the kernels description.
+  -l PATH_TO_EXTENSION, --path_to_extension PATH_TO_EXTENSION
+                        Optional. Required for GPU custom kernels. Absolute
+                        path to an .xml file with the kernels description.
+  -api {sync,async}, --api_type {sync,async}
+                        Optional. Enable using sync/async API. Default value
+                        is sync
+  -d TARGET_DEVICE, --target_device TARGET_DEVICE
+                        Optional. Specify a target device to infer on: CPU,
+                        GPU, FPGA, HDDL or MYRIAD. Use "-d HETERO:<comma
+                        separated devices list>" format to specify HETERO
+                        plugin. The application looks for a suitable plugin
+                        for the specified device.
+  -niter NUMBER_ITERATIONS, --number_iterations NUMBER_ITERATIONS
+                        Optional. Number of iterations. If not specified, the
+                        number of iterations is calculated depending on a
+                        device.
+  -nireq NUMBER_INFER_REQUESTS, --number_infer_requests NUMBER_INFER_REQUESTS
+                        Optional. Number of infer requests (default value is
+                        2).
+  -nthreads NUMBER_THREADS, --number_threads NUMBER_THREADS
+                        Number of threads to use for inference on the CPU
+                        (including Hetero cases).
+  -b BATCH_SIZE, --batch_size BATCH_SIZE
+                        Optional. Batch size value. If not specified, the
+                        batch size value is determined from IR
+  -pin {YES,NO}, --infer_threads_pinning {YES,NO}
+                        Optional. Enable ("YES" is default value) or disable
+                        ("NO")CPU threads pinning for CPU-involved inference.
 ```
 
 Running the application with the empty list of options yields the usage message given above and an error message.
 
-To run the demo, you can use one-layer public models or one-layer pre-trained and optimized models delivered with the package that support images as input.
+To run the demo, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
 
 For example, to do inference on an image using a trained network with multiple outputs on CPU, run the following command:
 
-```python3 benchmark_app.py -i <path_to_image>/inputImage.bmp -m <path_to_model>/multiple-output.xml -d CPU
 ```
-
-> **NOTE**: Public models should be first converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+python3 benchmark_app.py -i <path_to_image>/inputImage.bmp -m <path_to_model>/multiple-output.xml -d CPU
+```
 
 ## Demo Output
 
@@ -79,3 +112,5 @@ For asynchronous API, the application outputs only throughput:
 
 ## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
diff --git a/model-optimizer/mo/front/tf/extractors/sum.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/__init__.py
index e7b06f730..86feb3005 100644
--- a/model-optimizer/mo/front/tf/extractors/sum.py
+++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/__init__.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (C) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,11 +13,6 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-from mo.front.common.partial_infer.reduce import tf_reduce_infer
 
-
-def tf_sum_ext(pb):
-    return {
-        'keep_dims': pb.attr["keep_dims"].b,
-        'infer': lambda node: tf_reduce_infer(node)
-    }
+from .benchmark import main
+from .utils.constants import HELP_MESSAGES
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py
index 761b63e63..462e03092 100644
--- a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark.py
+++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/benchmark.py
@@ -1,6 +1,5 @@
-#!/usr/bin/env python
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (C) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,7 +17,7 @@
 from statistics import median
 from openvino.inference_engine import IENetwork, IEPlugin
 
-from utils.benchmark_utils import *
+from .utils.benchmark_utils import *
 
 def main(args=None):
     try:
@@ -198,7 +197,3 @@ def main(args=None):
 
     except Exception as e:
         logging.exception(e)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/__init__.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/__init__.py
new file mode 100644
index 000000000..30917612e
--- /dev/null
+++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/__init__.py
@@ -0,0 +1,15 @@
+"""
+ Copyright (C) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/utils/benchmark_utils.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/benchmark_utils.py
index 42676141f..2f6f38be5 100644
--- a/inference-engine/ie_bridges/python/sample/benchmark_app/utils/benchmark_utils.py
+++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/benchmark_utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (C) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -26,7 +26,7 @@ from random import choice
 from datetime import datetime
 from fnmatch import fnmatch
 
-from . constants import *
+from .constants import *
 
 logging.basicConfig(format="[ %(levelname)s ] %(message)s", level=logging.INFO, stream=sys.stdout)
 logger = logging.getLogger('BenchmarkApp')
@@ -42,27 +42,29 @@ def validate_args(args):
 
 
 def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-i', '--path_to_images', type=str, required=True, help=HELP_MESSAGES['IMAGE_MESSAGE'])
-    parser.add_argument('-m', '--path_to_model', type=str, required=True, help=HELP_MESSAGES['MODEL_MESSAGE'])
-    parser.add_argument('-c', '--path_to_cldnn_config', type=str, required=False,
-                        help=HELP_MESSAGES['CUSTOM_GPU_LIBRARY_MESSAGE'])
-    parser.add_argument('-l', '--path_to_extension', type=str, required=False, default=None,
-                        help=HELP_MESSAGES['CUSTOM_GPU_LIBRARY_MESSAGE'])
-    parser.add_argument('-api', '--api_type', type=str, required=False, default='async', choices=['sync', 'async'],
-                        help=HELP_MESSAGES['API_MESSAGE'])
-    parser.add_argument('-d', '--target_device', type=str, required=False, default="CPU",
-                        help=HELP_MESSAGES['TARGET_DEVICE_MESSAGE'])
-    parser.add_argument('-niter', '--number_iterations', type=int, required=False, default=None,
-                        help=HELP_MESSAGES['ITERATIONS_COUNT_MESSAGE'])
-    parser.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=2,
-                        help=HELP_MESSAGES['INFER_REQUESTS_COUNT_MESSAGE'])
-    parser.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None,
-                        help=HELP_MESSAGES['INFER_NUM_THREADS_MESSAGE'])
-    parser.add_argument('-b', '--batch_size', type=int, required=False, default=None,
-                        help=HELP_MESSAGES['BATCH_SIZE_MESSAGE'])
-    parser.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, default='YES',
-                        choices=['YES', 'NO'], help=HELP_MESSAGES['INFER_THREADS_PINNING_MESSAGE'])
+    parser = argparse.ArgumentParser(add_help=False)
+    args = parser.add_argument_group('Options')
+    args.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help=HELP_MESSAGES["HELP"])
+    args.add_argument('-i', '--path_to_images', type=str, required=True, help=HELP_MESSAGES['IMAGE_MESSAGE'])
+    args.add_argument('-m', '--path_to_model', type=str, required=True, help=HELP_MESSAGES['MODEL_MESSAGE'])
+    args.add_argument('-c', '--path_to_cldnn_config', type=str, required=False,
+                      help=HELP_MESSAGES['CUSTOM_GPU_LIBRARY_MESSAGE'])
+    args.add_argument('-l', '--path_to_extension', type=str, required=False, default=None,
+                      help=HELP_MESSAGES['CUSTOM_GPU_LIBRARY_MESSAGE'])
+    args.add_argument('-api', '--api_type', type=str, required=False, default='async', choices=['sync', 'async'],
+                      help=HELP_MESSAGES['API_MESSAGE'])
+    args.add_argument('-d', '--target_device', type=str, required=False, default="CPU",
+                      help=HELP_MESSAGES['TARGET_DEVICE_MESSAGE'])
+    args.add_argument('-niter', '--number_iterations', type=int, required=False, default=None,
+                      help=HELP_MESSAGES['ITERATIONS_COUNT_MESSAGE'])
+    args.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=2,
+                      help=HELP_MESSAGES['INFER_REQUESTS_COUNT_MESSAGE'])
+    args.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None,
+                      help=HELP_MESSAGES['INFER_NUM_THREADS_MESSAGE'])
+    args.add_argument('-b', '--batch_size', type=int, required=False, default=None,
+                      help=HELP_MESSAGES['BATCH_SIZE_MESSAGE'])
+    args.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, default='YES',
+                      choices=['YES', 'NO'], help=HELP_MESSAGES['INFER_THREADS_PINNING_MESSAGE'])
     return parser.parse_args()
 
 
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/utils/constants.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/constants.py
index f68919e51..b9770a19c 100644
--- a/inference-engine/ie_bridges/python/sample/benchmark_app/utils/constants.py
+++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark/utils/constants.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (C) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,22 +15,24 @@
 """
 
 HELP_MESSAGES = {
-    'IMAGE_MESSAGE': "Path to a folder with images or to image files.",
-    'MULTI_INPUT_MESSAGE': "Path to multi input file containing.",
-    'MODEL_MESSAGE': "Path to an .xml file with a trained model.",
-    'PLUGIN_PATH_MESSAGE': "Path to a plugin folder.",
-    'API_MESSAGE': "Enable using sync/async API. Default value is sync",
-    'TARGET_DEVICE_MESSAGE': "Specify a target device to infer on: CPU, GPU, FPGA or MYRIAD. "
+    'HELP': "Show this help message and exit.",
+    'IMAGE_MESSAGE': "Required. Path to a folder with images or to image files.",
+    'MULTI_INPUT_MESSAGE': "Optional. Path to multi input file containing.",
+    'MODEL_MESSAGE': "Required. Path to an .xml file with a trained model.",
+    'PLUGIN_PATH_MESSAGE': "Optional. Path to a plugin folder.",
+    'API_MESSAGE': "Optional. Enable using sync/async API. Default value is sync",
+    'TARGET_DEVICE_MESSAGE': "Optional. Specify a target device to infer on: CPU, GPU, FPGA, HDDL or MYRIAD. "
                            "Use \"-d HETERO:<comma separated devices list>\" format to specify HETERO plugin. "
     "The application looks for a suitable plugin for the specified device.",
-    'ITERATIONS_COUNT_MESSAGE': "Number of iterations. "
+    'ITERATIONS_COUNT_MESSAGE': "Optional. Number of iterations. "
     "If not specified, the number of iterations is calculated depending on a device.",
-    'INFER_REQUESTS_COUNT_MESSAGE': "Number of infer requests (default value is 2).",
+    'INFER_REQUESTS_COUNT_MESSAGE': "Optional. Number of infer requests (default value is 2).",
     'INFER_NUM_THREADS_MESSAGE': "Number of threads to use for inference on the CPU "
                                  "(including Hetero cases).",
-    'CUSTOM_CPU_LIBRARY_MESSAGE': "Required for CPU custom layers. "
+    'CUSTOM_CPU_LIBRARY_MESSAGE': "Optional. Required for CPU custom layers. "
                                   "Absolute path to a shared library with the kernels implementations.",
-    'CUSTOM_GPU_LIBRARY_MESSAGE': "Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.",
+    'CUSTOM_GPU_LIBRARY_MESSAGE': "Optional. Required for GPU custom kernels. Absolute path to an .xml file with the "
+                                  "kernels description.",
     'BATCH_SIZE_MESSAGE': "Optional. Batch size value. If not specified, the batch size value is determined from IR",
     'INFER_THREADS_PINNING_MESSAGE': "Optional. Enable (\"YES\" is default value) or disable (\"NO\")"
                                      "CPU threads pinning for CPU-involved inference."
diff --git a/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark_app.py b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark_app.py
new file mode 100644
index 000000000..4f587a84d
--- /dev/null
+++ b/inference-engine/ie_bridges/python/sample/benchmark_app/benchmark_app.py
@@ -0,0 +1,37 @@
+import benchmark
+
+from argparse import ArgumentParser, SUPPRESS
+
+
+def parse_args():
+    parser = ArgumentParser(add_help=False)
+    args = parser.add_argument_group('Options')
+    args.add_argument('-h', '--help', action='help', default=SUPPRESS, help=benchmark.HELP_MESSAGES["HELP"])
+    args.add_argument('-i', '--path_to_images', type=str, required=True,
+                      help=benchmark.HELP_MESSAGES['IMAGE_MESSAGE'])
+    args.add_argument('-m', '--path_to_model', type=str, required=True,
+                      help=benchmark.HELP_MESSAGES['MODEL_MESSAGE'])
+    args.add_argument('-c', '--path_to_cldnn_config', type=str, required=False,
+                      help=benchmark.HELP_MESSAGES['CUSTOM_GPU_LIBRARY_MESSAGE'])
+    args.add_argument('-l', '--path_to_extension', type=str, required=False, default=None,
+                      help=benchmark.HELP_MESSAGES['CUSTOM_GPU_LIBRARY_MESSAGE'])
+    args.add_argument('-api', '--api_type', type=str, required=False, default='async', choices=['sync', 'async'],
+                      help=benchmark.HELP_MESSAGES['API_MESSAGE'])
+    args.add_argument('-d', '--target_device', type=str, required=False, default="CPU",
+                      help=benchmark.HELP_MESSAGES['TARGET_DEVICE_MESSAGE'])
+    args.add_argument('-niter', '--number_iterations', type=int, required=False, default=None,
+                      help=benchmark.HELP_MESSAGES['ITERATIONS_COUNT_MESSAGE'])
+    args.add_argument('-nireq', '--number_infer_requests', type=int, required=False, default=2,
+                      help=benchmark.HELP_MESSAGES['INFER_REQUESTS_COUNT_MESSAGE'])
+    args.add_argument('-nthreads', '--number_threads', type=int, required=False, default=None,
+                      help=benchmark.HELP_MESSAGES['INFER_NUM_THREADS_MESSAGE'])
+    args.add_argument('-b', '--batch_size', type=int, required=False, default=None,
+                      help=benchmark.HELP_MESSAGES['BATCH_SIZE_MESSAGE'])
+    args.add_argument('-pin', '--infer_threads_pinning', type=str, required=False, default='YES',
+                      choices=['YES', 'NO'], help=benchmark.HELP_MESSAGES['INFER_THREADS_PINNING_MESSAGE'])
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    benchmark.main(args)
diff --git a/inference-engine/ie_bridges/python/sample/classification_sample/README.md b/inference-engine/ie_bridges/python/sample/classification_sample/README.md
new file mode 100644
index 000000000..a4eec406d
--- /dev/null
+++ b/inference-engine/ie_bridges/python/sample/classification_sample/README.md
@@ -0,0 +1,79 @@
+# Image Classification Python* Sample
+
+This topic demonstrates how to run the Image Classification sample application, which performs
+inference using image classification networks such as AlexNet and GoogLeNet.
+
+### How It Works
+
+Upon the start-up, the sample application reads command line parameters and loads a network and an image to the Inference
+Engine plugin. When inference is done, the application creates an
+output image and outputs data to the standard output stream.
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
+## Running
+
+Run the application with the `-h` option yields the usage message:
+```
+python3 classification_sample.py -h
+```
+The command yields the following usage message:
+```
+usage: classification_sample.py [-h] -m MODEL -i INPUT [INPUT ...]
+                                [-l CPU_EXTENSION] [-pp PLUGIN_DIR]
+                                [-d DEVICE] [--labels LABELS] [-nt NUMBER_TOP]
+                                [-ni NUMBER_ITER] [-pc]
+
+Options:
+  -h, --help            Show this help message and exit.
+  -m MODEL, --model MODEL
+                        Required. Path to an .xml file with a trained model.
+  -i INPUT [INPUT ...], --input INPUT [INPUT ...]
+                        Required. Path to a folder with images or path to an
+                        image files
+  -l CPU_EXTENSION, --cpu_extension CPU_EXTENSION
+                        Optional. Required for CPU custom layers. MKLDNN (CPU)-targeted custom layers.
+                        Absolute path to a shared library with the kernels
+                        implementations.
+  -pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR
+                        Optional. Path to a plugin folder
+  -d DEVICE, --device DEVICE
+                        Optional. Specify the target device to infer on; CPU,
+                        GPU, FPGA, HDDL or MYRIAD is acceptable. The sample
+                        will look for a suitable plugin for device specified.
+                        Default value is CPU
+  --labels LABELS       Optional. Path to a labels mapping file
+  -nt NUMBER_TOP, --number_top NUMBER_TOP
+                        Optional. Number of top results
+  -ni NUMBER_ITER, --number_iter NUMBER_ITER
+                        Optional. Number of inference iterations
+  -pc, --perf_counts    Optional. Report performance counters
+```
+
+Running the application with the empty list of options yields the usage message given above.
+
+To run the sample, you can use AlexNet and GoogLeNet or other image classification models. You can download the pre-trained models with the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or from [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+For example, to perform inference of an AlexNet model (previously converted to the Inference Engine format) on CPU, use the following command:
+
+```
+    python3 classification_sample.py -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml
+```
+
+### Sample Output
+
+By default the application outputs top-10 inference results.
+Add the `-nt` option to the previous command to modify the number of top output results.
+For example, to get the top-5 results on GPU, run the following command:
+```
+    python3 classification_sample.py<path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml -nt 5 -d GPU
+```
+
+## See Also
+* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
+
+
diff --git a/inference-engine/ie_bridges/python/sample/classification_sample.py b/inference-engine/ie_bridges/python/sample/classification_sample/classification_sample.py
index f02459f2e..ea8742957 100644
--- a/inference-engine/ie_bridges/python/sample/classification_sample.py
+++ b/inference-engine/ie_bridges/python/sample/classification_sample/classification_sample.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (C) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 from __future__ import print_function
 import sys
 import os
-from argparse import ArgumentParser
+from argparse import ArgumentParser, SUPPRESS
 import cv2
 import numpy as np
 import logging as log
@@ -26,22 +26,29 @@ from openvino.inference_engine import IENetwork, IEPlugin
 
 
 def build_argparser():
-    parser = ArgumentParser()
-    parser.add_argument("-m", "--model", help="Path to an .xml file with a trained model.", required=True, type=str)
-    parser.add_argument("-i", "--input", help="Path to a folder with images or path to an image files", required=True,
-                        type=str, nargs="+")
-    parser.add_argument("-l", "--cpu_extension",
-                        help="MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels "
-                             "impl.", type=str, default=None)
-    parser.add_argument("-pp", "--plugin_dir", help="Path to a plugin folder", type=str, default=None)
-    parser.add_argument("-d", "--device",
-                        help="Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample "
-                             "will look for a suitable plugin for device specified (CPU by default)", default="CPU",
-                        type=str)
-    parser.add_argument("--labels", help="Labels mapping file", default=None, type=str)
-    parser.add_argument("-nt", "--number_top", help="Number of top results", default=10, type=int)
-    parser.add_argument("-ni", "--number_iter", help="Number of inference iterations", default=1, type=int)
-    parser.add_argument("-pc", "--perf_counts", help="Report performance counters", default=False, action="store_true")
+    parser = ArgumentParser(add_help=False)
+    args = parser.add_argument_group('Options')
+    args.add_argument('-h', '--help', action='help', default=SUPPRESS, help='Show this help message and exit.')
+    args.add_argument("-m", "--model", help="Required. Path to an .xml file with a trained model.", required=True,
+                      type=str)
+    args.add_argument("-i", "--input", help="Required. Path to a folder with images or path to an image files",
+                      required=True,
+                      type=str, nargs="+")
+    args.add_argument("-l", "--cpu_extension",
+                      help="Optional. Required for CPU custom layers. "
+                           "MKLDNN (CPU)-targeted custom layers. Absolute path to a shared library with the"
+                           " kernels implementations.", type=str, default=None)
+    args.add_argument("-pp", "--plugin_dir", help="Optional. Path to a plugin folder", type=str, default=None)
+    args.add_argument("-d", "--device",
+                      help="Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL, MYRIAD or HETERO: is "
+                           "acceptable. The sample will look for a suitable plugin for device specified. Default "
+                           "value is CPU",
+                      default="CPU", type=str)
+    args.add_argument("--labels", help="Optional. Path to a labels mapping file", default=None, type=str)
+    args.add_argument("-nt", "--number_top", help="Optional. Number of top results", default=10, type=int)
+    args.add_argument("-ni", "--number_iter", help="Optional. Number of inference iterations", default=1, type=int)
+    args.add_argument("-pc", "--perf_counts", help="Optional. Report performance counters", default=False,
+                      action="store_true")
 
     return parser
 
@@ -93,7 +100,6 @@ def main():
     # Loading model to the plugin
     log.info("Loading model to the plugin")
     exec_net = plugin.load(network=net)
-    del net
 
     # Start sync inference
     log.info("Starting inference ({} iterations)".format(args.number_iter))
@@ -101,7 +107,7 @@ def main():
     for i in range(args.number_iter):
         t0 = time()
         res = exec_net.infer(inputs={input_blob: images})
-        infer_time.append((time()-t0)*1000)
+        infer_time.append((time() - t0) * 1000)
     log.info("Average running time of one iteration: {} ms".format(np.average(np.asarray(infer_time))))
     if args.perf_counts:
         perf_counts = exec_net.requests[0].get_perf_counts()
@@ -120,18 +126,25 @@ def main():
             labels_map = [x.split(sep=' ', maxsplit=1)[-1].strip() for x in f]
     else:
         labels_map = None
+    classid_str = "classid"
+    probability_str = "probability"
     for i, probs in enumerate(res):
         probs = np.squeeze(probs)
         top_ind = np.argsort(probs)[-args.number_top:][::-1]
         print("Image {}\n".format(args.input[i]))
+        print(classid_str, probability_str)
+        print("{} {}".format('-' * len(classid_str), '-' * len(probability_str)))
         for id in top_ind:
-            det_label = labels_map[id] if labels_map else "#{}".format(id)
-            print("{:.7f} label {}".format(probs[id], det_label))
+            det_label = labels_map[id] if labels_map else "{}".format(id)
+            label_length = len(det_label)
+            space_num_before = (len(classid_str) - label_length) // 2
+            space_num_after = len(classid_str) - (space_num_before + label_length) + 2
+            space_num_before_prob = (len(probability_str) - len(str(probs[id]))) // 2
+            print("{}{}{}{}{:.7f}".format(' ' * space_num_before, det_label,
+                                          ' ' * space_num_after, ' ' * space_num_before_prob,
+                                          probs[id]))
         print("\n")
 
-    del exec_net
-    del plugin
-
 
 if __name__ == '__main__':
     sys.exit(main() or 0)
diff --git a/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md b/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md
new file mode 100644
index 000000000..e121f4a1f
--- /dev/null
+++ b/inference-engine/ie_bridges/python/sample/classification_sample_async/README.md
@@ -0,0 +1,89 @@
+# Image Classification Python* Sample Async
+
+This sample demonstrates how to build and execute inference in pipelined mode on example of classifications networks.
+
+The pipelined mode might increase the throughput of the pictures. The latency of one inference will be the same as for synchronous execution.
+<br>
+The throughput increases due to follow reasons:
+* Some plugins have heterogeneity inside themselves: data transferring, execution on remote device, pre-processing and post-processing on the host.
+* Using of explicit heterogeneous plugin with execution of different parts of network on different devices, for example HETERO:CPU,GPU.
+
+When two or more devices process one image, creating several infer requests and starting asynchronous inference allow for using devices in the most efficient way.
+If two devices are involved in execution, the most optimal value for `-nireq` option is 2.
+To process infer requests more efficiently, Classification Sample Async uses round-robin algorithm. It starts execution of the current infer request and switches to waiting for results of the previous one. After finishing of waiting, it switches infer requests and repeat the procedure.
+
+Another required aspect of good throughput is a number of iterations. Only with big number of iterations you can emulate the real application work and get good performance.
+
+The batch mode is an independent attribute on the pipelined mode. Pipelined mode works efficiently with any batch size.
+
+### How It Works
+
+Upon the start-up, the sample application reads command line parameters and loads a network and an image to the Inference
+Engine plugin.
+Then application creates several infer requests pointed in `-nireq` parameter and loads images for inference.
+
+Then in a loop it starts inference for the current infer request and switches to waiting for the previous one. When results are ready, it swaps infer requests.
+
+When inference is done, the application outputs data to the standard output stream.
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
+## Running
+
+Running the application with the <code>-h</code> option yields the following usage message:
+```
+python3 classification_sample_async.py -h 
+```
+The command yields the following usage message:
+```
+usage: classification_sample_async.py [-h] -m MODEL -i INPUT [INPUT ...]
+                                      [-l CPU_EXTENSION] [-pp PLUGIN_DIR]
+                                      [-d DEVICE] [--labels LABELS]
+                                      [-nt NUMBER_TOP] [-ni NUMBER_ITER] [-pc]
+
+Options:
+  -h, --help            Show this help message and exit.
+  -m MODEL, --model MODEL
+                        Required. Path to an .xml file with a trained model.
+  -i INPUT [INPUT ...], --input INPUT [INPUT ...]
+                        Required. Path to a folder with images or path to an
+                        image files
+  -l CPU_EXTENSION, --cpu_extension CPU_EXTENSION
+                        Optional. Required for CPU custom layers. Absolute
+                        path to a shared library with the kernels
+                        implementations.
+  -pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR
+                        Optional. Path to a plugin folder
+  -d DEVICE, --device DEVICE
+                        Optional. Specify the target device to infer on; CPU,
+                        GPU, FPGA, HDDL or MYRIAD is acceptable. The sample
+                        will look for a suitable plugin for device specified.
+                        Default value is CPU
+  --labels LABELS       Optional. Labels mapping file
+  -nt NUMBER_TOP, --number_top NUMBER_TOP
+                        Optional. Number of top results
+  -ni NUMBER_ITER, --number_iter NUMBER_ITER
+                        Optional. Number of inference iterations
+  -pc, --perf_counts    Optional. Report performance counters
+
+```
+
+Running the application with the empty list of options yields the usage message given above and an error message.
+
+To run the sample, you can use AlexNet and GoogLeNet or other image classification models. You can download the pre-trained models with the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or from [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+
+You can do inference on an image using a trained AlexNet network on FPGA with fallback to CPU using the following command:
+```
+    python3 classification_sample_async.py -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU -nireq 2 -ni 200
+```
+
+### Sample Output
+
+By default, the application outputs top-10 inference results for each infer request.
+It also provides throughput value measured in frames per seconds.
+
+## See Also
+* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
diff --git a/inference-engine/ie_bridges/python/sample/classification_sample_async.py b/inference-engine/ie_bridges/python/sample/classification_sample_async/classification_sample_async.py
index ae8655570..601be2da3 100644
--- a/inference-engine/ie_bridges/python/sample/classification_sample_async.py
+++ b/inference-engine/ie_bridges/python/sample/classification_sample_async/classification_sample_async.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (C) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 from __future__ import print_function
 import sys
 import os
-from argparse import ArgumentParser
+from argparse import ArgumentParser, SUPPRESS
 import cv2
 import numpy as np
 import logging as log
@@ -26,22 +26,26 @@ from openvino.inference_engine import IENetwork, IEPlugin
 
 
 def build_argparser():
-    parser = ArgumentParser()
-    parser.add_argument("-m", "--model", help="Path to an .xml file with a trained model.", required=True, type=str)
-    parser.add_argument("-i", "--input", help="Path to a folder with images or path to an image files", required=True,
-                        type=str, nargs="+")
-    parser.add_argument("-l", "--cpu_extension",
-                        help="MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels "
-                             "impl.", type=str, default=None)
-    parser.add_argument("-pp", "--plugin_dir", help="Path to a plugin folder", type=str, default=None)
-    parser.add_argument("-d", "--device",
-                        help="Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample "
-                             "will look for a suitable plugin for device specified (CPU by default)", default="CPU",
-                        type=str)
-    parser.add_argument("--labels", help="Labels mapping file", default=None, type=str)
-    parser.add_argument("-nt", "--number_top", help="Number of top results", default=10, type=int)
-    parser.add_argument("-ni", "--number_iter", help="Number of inference iterations", default=1, type=int)
-    parser.add_argument("-pc", "--perf_counts", help="Report performance counters", default=False, action="store_true")
+    parser = ArgumentParser(add_help=False)
+    args = parser.add_argument_group('Options')
+    args.add_argument('-h', '--help', action='help', default=SUPPRESS, help='Show this help message and exit.')
+    args.add_argument("-m", "--model", help="Required. Path to an .xml file with a trained model.",
+                      required=True, type=str)
+    args.add_argument("-i", "--input", help="Required. Path to a folder with images or path to an image files",
+                      required=True, type=str, nargs="+")
+    args.add_argument("-l", "--cpu_extension",
+                      help="Optional. Required for CPU custom layers. Absolute path to a shared library with the"
+                           " kernels implementations.", type=str, default=None)
+    args.add_argument("-pp", "--plugin_dir", help="Optional. Path to a plugin folder", type=str, default=None)
+    args.add_argument("-d", "--device",
+                      help="Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is "
+                           "acceptable. The sample will look for a suitable plugin for device specified. Default value is CPU",
+                      default="CPU", type=str)
+    args.add_argument("--labels", help="Optional. Labels mapping file", default=None, type=str)
+    args.add_argument("-nt", "--number_top", help="Optional. Number of top results", default=10, type=int)
+    args.add_argument("-ni", "--number_iter", help="Optional. Number of inference iterations", default=1, type=int)
+    args.add_argument("-pc", "--perf_counts", help="Optional. Report performance counters",
+                      default=False, action="store_true")
 
     return parser
 
@@ -92,7 +96,6 @@ def main():
     # Loading model to the plugin
     log.info("Loading model to the plugin")
     exec_net = plugin.load(network=net)
-    del net
 
     # Start sync inference
     log.info("Starting inference ({} iterations)".format(args.number_iter))
@@ -119,18 +122,25 @@ def main():
             labels_map = [x.split(sep=' ', maxsplit=1)[-1].strip() for x in f]
     else:
         labels_map = None
+    classid_str = "classid"
+    probability_str = "probability"
     for i, probs in enumerate(res):
         probs = np.squeeze(probs)
         top_ind = np.argsort(probs)[-args.number_top:][::-1]
         print("Image {}\n".format(args.input[i]))
+        print(classid_str, probability_str)
+        print("{} {}".format('-' * len(classid_str), '-' * len(probability_str)))
         for id in top_ind:
-            det_label = labels_map[id] if labels_map else "#{}".format(id)
-            print("{:.7f} {}".format(probs[id], det_label))
+            det_label = labels_map[id] if labels_map else "{}".format(id)
+            label_length = len(det_label)
+            space_num_before = (7 - label_length) // 2
+            space_num_after = 7 - (space_num_before + label_length) + 2
+            space_num_before_prob = (11 - len(str(probs[id]))) // 2
+            print("{}{}{}{}{:.7f}".format(' ' * space_num_before, det_label,
+                                          ' ' * space_num_after, ' ' * space_num_before_prob,
+                                          probs[id]))
         print("\n")
 
-    del exec_net
-    del plugin
-
 
 if __name__ == '__main__':
     sys.exit(main() or 0)
diff --git a/inference-engine/ie_bridges/python/sample/greengrass_samples/Greengrass-FaaS-User-Guide.docx b/inference-engine/ie_bridges/python/sample/greengrass_samples/Greengrass-FaaS-User-Guide.docx
deleted file mode 100644
index 6fedb499e..000000000
--- a/inference-engine/ie_bridges/python/sample/greengrass_samples/Greengrass-FaaS-User-Guide.docx
+++ /dev/null
diff --git a/inference-engine/ie_bridges/python/sample/greengrass_samples/README.md b/inference-engine/ie_bridges/python/sample/greengrass_samples/README.md
deleted file mode 100644
index fc9add38c..000000000
--- a/inference-engine/ie_bridges/python/sample/greengrass_samples/README.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# This README demonstrates use of all GreenGrass samples
- 
-# GreenGrass Classification Sample
-
-This topic demonstrates how to build and run the GreenGrass Image Classification sample application, which does inference using image classification networks like AlexNet and GoogLeNet on on Intel® Processors, Intel® HD Graphics and Intel® FPGA.
-
-## Running
-
-1. Modify the "accelerator" parameter inside the sample to deploy the sample on any accelerator option of your choice(CPU/GPU/FPGA)  
-   For CPU, please specify "CPU"  
-   For GPU, please specify "GPU"  
-   For FPGA, please specify "HETERO:FPGA,CPU"  
-2. Enable the option(s) on how output is displayed/consumed 
-3. Now follow the instructions listed in the Greengrass-FaaS-User-Guide.pdf to create the lambda and deploy on edge device using Greengrass
- 
-### Outputs
-
-The application publishes top-10 results on AWS IoT Cloud every second by default. For other output consumption options, please refer to Greengrass-FaaS-User-Guide.pdf
-
-### How it works
-
-Upon deployment,the sample application loads a network and an image to the Inference Engine plugin. When inference is done, the application publishes results to AWS IoT Cloud 
-
-=====================================================================================================
-
-# GreenGrass Object Detection Sample SSD
-
-This topic demonstrates how to run the GreenGrass Object Detection SSD sample application, which does inference using object detection networks like Squeezenet-SSD on Intel® Processors, Intel® HD Graphics and Intel® FPGA.
-
-## Running
-
-1. Modify the "accelerator" parameter inside the sample to deploy the sample on any accelerator option of your choice(CPU/GPU/FPGA)  
-   For CPU, please specify "CPU"  
-   For GPU, please specify "GPU"  
-   For FPGA, please specify "HETERO:FPGA,CPU"  
-2. Enable the option(s) on how output is displayed/consumed 
-3. Set the variable is_async_mode to 'True' for Asynchronous execution and 'False' for Synchronous execution
-3. Now follow the instructions listed in the Greengrass-FaaS-User-Guide.pdf to create the lambda and deploy on edge device using Greengrass
- 
-### Outputs
-
-The application publishes detection outputs such as class label, class confidence, and bounding box coordinates on AWS IoT Cloud every second. For other output consumption options, please refer to Greengrass-FaaS-User-Guide.pdf  
-
-### How it works
-
-Upon deployment,the sample application loads a network and an image to the Inference Engine plugin. When inference is done, the application publishes results to AWS IoT Cloud 
- 
-
-
diff --git a/inference-engine/ie_bridges/python/sample/greengrass_samples/greengrass_classification_sample.py b/inference-engine/ie_bridges/python/sample/greengrass_samples/greengrass_classification_sample.py
deleted file mode 100644
index 193c5a5bb..000000000
--- a/inference-engine/ie_bridges/python/sample/greengrass_samples/greengrass_classification_sample.py
+++ /dev/null
@@ -1,180 +0,0 @@
-"""
-BSD 3-clause "New" or "Revised" license
-
-Copyright (C) 2018 Intel Corporation.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-* Neither the name of the copyright holder nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""
-
-import sys
-import os
-import cv2
-import numpy as np
-import greengrasssdk
-import boto3
-import timeit
-import datetime
-import json
-from collections import OrderedDict
-
-from openvino.inference_engine import IENetwork, IEPlugin
-
-# Specify the delta in seconds between each report
-reporting_interval = 1.0
-
-# Parameters for IoT Cloud
-enable_iot_cloud_output = True
-
-# Parameters for Kinesis
-enable_kinesis_output = False
-kinesis_stream_name = ""
-kinesis_partition_key = ""
-kinesis_region = ""
-
-# Parameters for S3
-enable_s3_jpeg_output = False
-s3_bucket_name = ""
-
-# Parameters for jpeg output on local disk
-enable_local_jpeg_output = False
-
-# Create a Greengrass Core SDK client for publishing messages to AWS Cloud
-client = greengrasssdk.client("iot-data")
-
-# Create an S3 client for uploading files to S3
-if enable_s3_jpeg_output:
-    s3_client = boto3.client("s3")
-
-# Create a Kinesis client for putting records to streams
-if enable_kinesis_output:
-    kinesis_client = boto3.client("kinesis", "us-west-2")
-
-# Read environment variables set by Lambda function configuration
-PARAM_MODEL_XML = os.environ.get("PARAM_MODEL_XML")
-PARAM_INPUT_SOURCE = os.environ.get("PARAM_INPUT_SOURCE")
-PARAM_DEVICE = os.environ.get("PARAM_DEVICE")
-PARAM_OUTPUT_DIRECTORY = os.environ.get("PARAM_OUTPUT_DIRECTORY")
-PARAM_CPU_EXTENSION_PATH = os.environ.get("PARAM_CPU_EXTENSION_PATH")
-PARAM_LABELMAP_FILE = os.environ.get("PARAM_LABELMAP_FILE")
-PARAM_TOPIC_NAME = os.environ.get("PARAM_TOPIC_NAME", "intel/faas/classification")
-PARAM_NUM_TOP_RESULTS = int(os.environ.get("PARAM_NUM_TOP_RESULTS", "10"))
-
-
-def report(res_json, frame):
-    now = datetime.datetime.now()
-    date_prefix = str(now).replace(" ", "_")
-    if enable_iot_cloud_output:
-        data = json.dumps(res_json)
-        client.publish(topic=PARAM_TOPIC_NAME, payload=data)
-    if enable_kinesis_output:
-        kinesis_client.put_record(StreamName=kinesis_stream_name, Data=json.dumps(res_json),
-                                  PartitionKey=kinesis_partition_key)
-    if enable_s3_jpeg_output:
-        temp_image = os.path.join(PARAM_OUTPUT_DIRECTORY, "inference_result.jpeg")
-        cv2.imwrite(temp_image, frame)
-        with open(temp_image) as file:
-            image_contents = file.read()
-            s3_client.put_object(Body=image_contents, Bucket=s3_bucket_name, Key=date_prefix + ".jpeg")
-    if enable_local_jpeg_output:
-        cv2.imwrite(os.path.join(PARAM_OUTPUT_DIRECTORY, date_prefix + ".jpeg"), frame)
-
-
-def greengrass_classification_sample_run():
-    client.publish(topic=PARAM_TOPIC_NAME, payload="OpenVINO: Initializing...")
-    model_bin = os.path.splitext(PARAM_MODEL_XML)[0] + ".bin"
-
-    # Plugin initialization for specified device and load extensions library if specified
-    plugin = IEPlugin(device=PARAM_DEVICE, plugin_dirs="")
-    if "CPU" in PARAM_DEVICE:
-        plugin.add_cpu_extension(PARAM_CPU_EXTENSION_PATH)
-    # Read IR
-    net = IENetwork(model=PARAM_MODEL_XML, weights=model_bin)
-    assert len(net.inputs.keys()) == 1, "Sample supports only single input topologies"
-    assert len(net.outputs) == 1, "Sample supports only single output topologies"
-    input_blob = next(iter(net.inputs))
-    out_blob = next(iter(net.outputs))
-    # Read and pre-process input image
-    n, c, h, w = net.inputs[input_blob]
-    cap = cv2.VideoCapture(PARAM_INPUT_SOURCE)
-    exec_net = plugin.load(network=net)
-    del net
-    client.publish(topic=PARAM_TOPIC_NAME, payload="Starting inference on %s" % PARAM_INPUT_SOURCE)
-    start_time = timeit.default_timer()
-    inf_seconds = 0.0
-    frame_count = 0
-    res_json = []
-    labeldata = None
-    if PARAM_LABELMAP_FILE is not None:
-        with open(PARAM_LABELMAP_FILE) as labelmap_file:
-            labeldata = json.load(labelmap_file)
-
-    while (cap.isOpened()):
-        ret, frame = cap.read()
-        if not ret:
-            break
-        frameid = cap.get(cv2.CAP_PROP_POS_FRAMES)
-        initial_w = cap.get(3)
-        initial_h = cap.get(4)
-        in_frame = cv2.resize(frame, (w, h))
-        in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
-        in_frame = in_frame.reshape((n, c, h, w))
-        # Start synchronous inference
-        inf_start_time = timeit.default_timer()
-        res = exec_net.infer(inputs={input_blob: in_frame})
-        inf_seconds += timeit.default_timer() - inf_start_time
-        top_ind = np.argsort(res[out_blob], axis=1)[0, -PARAM_NUM_TOP_RESULTS:][::-1]
-        # Parse detection results of the current request
-        res_json = OrderedDict()
-        res_json["Candidates"] = OrderedDict()
-        frame_timestamp = datetime.datetime.now()
-
-        for i in top_ind:
-            classlabel = labeldata[str(i)] if labeldata else str(i)
-            res_json["Candidates"][classlabel] = round(res[out_blob][0, i], 2)
-
-        frame_count += 1
-        # Measure elapsed seconds since the last report
-        seconds_elapsed = timeit.default_timer() - start_time
-        if seconds_elapsed >= reporting_interval:
-            res_json["timestamp"] = frame_timestamp.isoformat()
-            res_json["frame_id"] = int(frameid)
-            res_json["inference_fps"] = frame_count / inf_seconds
-            start_time = timeit.default_timer()
-            report(res_json, frame)
-            frame_count = 0
-            inf_seconds = 0.0
-
-    client.publish(topic=PARAM_TOPIC_NAME, payload="End of the input, exiting...")
-    del exec_net
-    del plugin
-
-
-greengrass_classification_sample_run()
-
-
-def function_handler(event, context):
-    client.publish(topic=PARAM_TOPIC_NAME, payload='HANDLER_CALLED!')
-    return
diff --git a/inference-engine/ie_bridges/python/sample/greengrass_samples/greengrass_object_detection_sample_ssd.py b/inference-engine/ie_bridges/python/sample/greengrass_samples/greengrass_object_detection_sample_ssd.py
deleted file mode 100644
index e6898bee3..000000000
--- a/inference-engine/ie_bridges/python/sample/greengrass_samples/greengrass_object_detection_sample_ssd.py
+++ /dev/null
@@ -1,184 +0,0 @@
-"""
-BSD 3-clause "New" or "Revised" license
-
-Copyright (C) 2018 Intel Corporation.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-* Neither the name of the copyright holder nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""
-
-import sys
-import os
-import cv2
-import numpy as np
-import greengrasssdk
-import boto3
-import timeit
-import datetime
-import json
-from collections import OrderedDict
-
-from openvino.inference_engine import IENetwork, IEPlugin
-
-# Specify the delta in seconds between each report
-reporting_interval = 1.0
-
-# Parameters for IoT Cloud
-enable_iot_cloud_output = True
-
-# Parameters for Kinesis
-enable_kinesis_output = False
-kinesis_stream_name = ""
-kinesis_partition_key = ""
-kinesis_region = ""
-
-# Parameters for S3
-enable_s3_jpeg_output = False
-s3_bucket_name = "ssd_test"
-
-# Parameters for jpeg output on local disk
-enable_local_jpeg_output = False
-
-# Create a Greengrass Core SDK client for publishing messages to AWS Cloud
-client = greengrasssdk.client("iot-data")
-
-# Create an S3 client for uploading files to S3
-if enable_s3_jpeg_output:
-    s3_client = boto3.client("s3")
-
-# Create a Kinesis client for putting records to streams
-if enable_kinesis_output:
-    kinesis_client = boto3.client("kinesis", "us-west-2")
-
-# Read environment variables set by Lambda function configuration
-PARAM_MODEL_XML = os.environ.get("PARAM_MODEL_XML")
-PARAM_INPUT_SOURCE = os.environ.get("PARAM_INPUT_SOURCE")
-PARAM_DEVICE = os.environ.get("PARAM_DEVICE")
-PARAM_OUTPUT_DIRECTORY = os.environ.get("PARAM_OUTPUT_DIRECTORY")
-PARAM_CPU_EXTENSION_PATH = os.environ.get("PARAM_CPU_EXTENSION_PATH")
-PARAM_LABELMAP_FILE = os.environ.get("PARAM_LABELMAP_FILE")
-PARAM_TOPIC_NAME = os.environ.get("PARAM_TOPIC_NAME", "intel/faas/ssd")
-
-
-def report(res_json, frame):
-    now = datetime.datetime.now()
-    date_prefix = str(now).replace(" ", "_")
-    if enable_iot_cloud_output:
-        data = json.dumps(res_json)
-        client.publish(topic=PARAM_TOPIC_NAME, payload=data)
-    if enable_kinesis_output:
-        kinesis_client.put_record(StreamName=kinesis_stream_name, Data=json.dumps(res_json),
-                                  PartitionKey=kinesis_partition_key)
-    if enable_s3_jpeg_output:
-        temp_image = os.path.join(PARAM_OUTPUT_DIRECTORY, "inference_result.jpeg")
-        cv2.imwrite(temp_image, frame)
-        with open(temp_image) as file:
-            image_contents = file.read()
-            s3_client.put_object(Body=image_contents, Bucket=s3_bucket_name, Key=date_prefix + ".jpeg")
-    if enable_local_jpeg_output:
-        cv2.imwrite(os.path.join(PARAM_OUTPUT_DIRECTORY, date_prefix + ".jpeg"), frame)
-
-
-def greengrass_object_detection_sample_ssd_run():
-    client.publish(topic=PARAM_TOPIC_NAME, payload="OpenVINO: Initializing...")
-    model_bin = os.path.splitext(PARAM_MODEL_XML)[0] + ".bin"
-
-    # Plugin initialization for specified device and load extensions library if specified
-    plugin = IEPlugin(device=PARAM_DEVICE, plugin_dirs="")
-    if "CPU" in PARAM_DEVICE:
-        plugin.add_cpu_extension(PARAM_CPU_EXTENSION_PATH)
-    # Read IR
-    net = IENetwork(model=PARAM_MODEL_XML, weights=model_bin)
-    assert len(net.inputs.keys()) == 1, "Sample supports only single input topologies"
-    assert len(net.outputs) == 1, "Sample supports only single output topologies"
-    input_blob = next(iter(net.inputs))
-    out_blob = next(iter(net.outputs))
-    # Read and pre-process input image
-    n, c, h, w = net.inputs[input_blob]
-    cap = cv2.VideoCapture(PARAM_INPUT_SOURCE)
-    exec_net = plugin.load(network=net)
-    del net
-    client.publish(topic=PARAM_TOPIC_NAME, payload="Starting inference on %s" % PARAM_INPUT_SOURCE)
-    start_time = timeit.default_timer()
-    inf_seconds = 0.0
-    frame_count = 0
-    labeldata = None
-    if PARAM_LABELMAP_FILE is not None:
-        with open(PARAM_LABELMAP_FILE) as labelmap_file:
-            labeldata = json.load(labelmap_file)
-
-    while (cap.isOpened()):
-        ret, frame = cap.read()
-        if not ret:
-            break
-        frameid = cap.get(cv2.CAP_PROP_POS_FRAMES)
-        initial_w = cap.get(3)
-        initial_h = cap.get(4)
-        in_frame = cv2.resize(frame, (w, h))
-        in_frame = in_frame.transpose((2, 0, 1))  # Change data layout from HWC to CHW
-        in_frame = in_frame.reshape((n, c, h, w))
-        # Start synchronous inference
-        inf_start_time = timeit.default_timer()
-        res = exec_net.infer(inputs={input_blob: in_frame})
-        inf_seconds += timeit.default_timer() - inf_start_time
-        # Parse detection results of the current request
-        res_json = OrderedDict()
-        frame_timestamp = datetime.datetime.now()
-        object_id = 0
-        for obj in res[out_blob][0][0]:
-            if obj[2] > 0.5:
-                xmin = int(obj[3] * initial_w)
-                ymin = int(obj[4] * initial_h)
-                xmax = int(obj[5] * initial_w)
-                ymax = int(obj[6] * initial_h)
-                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 165, 20), 4)
-                obj_id = "Object" + str(object_id)
-                classlabel = labeldata[str(int(obj[1]))] if labeldata else ""
-                res_json[obj_id] = {"label": int(obj[1]), "class": classlabel, "confidence": round(obj[2], 2), "xmin": round(
-                    obj[3], 2), "ymin": round(obj[4], 2), "xmax": round(obj[5], 2), "ymax": round(obj[6], 2)}
-                object_id += 1
-        frame_count += 1
-        # Measure elapsed seconds since the last report
-        seconds_elapsed = timeit.default_timer() - start_time
-        if seconds_elapsed >= reporting_interval:
-            res_json["timestamp"] = frame_timestamp.isoformat()
-            res_json["frame_id"] = int(frameid)
-            res_json["inference_fps"] = frame_count / inf_seconds
-            start_time = timeit.default_timer()
-            report(res_json, frame)
-            frame_count = 0
-            inf_seconds = 0.0
-
-    client.publish(topic=PARAM_TOPIC_NAME, payload="End of the input, exiting...")
-    del exec_net
-    del plugin
-
-
-greengrass_object_detection_sample_ssd_run()
-
-
-def function_handler(event, context):
-    client.publish(topic=PARAM_TOPIC_NAME, payload='HANDLER_CALLED!')
-    return
diff --git a/inference-engine/ie_bridges/python/sample/jupyter_notebooks/classification_demo/classification_demo.ipynb b/inference-engine/ie_bridges/python/sample/jupyter_notebooks/classification_demo/classification_demo.ipynb
deleted file mode 100644
index 632672f96..000000000
--- a/inference-engine/ie_bridges/python/sample/jupyter_notebooks/classification_demo/classification_demo.ipynb
+++ /dev/null
@@ -1,463 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This notebook demonstrates the worklflow of a simple image classification task.\n",
-    "We will go through all the pipeline steps: downloading the model, generating the Intermediate Representation (IR) using the Model Optimizer, running inference in Python, and parsing and interpretating the output results.\n",
-    "\n",
-    "To demonstrate the scenario, we will use the pre-trained SquezeNet V1.1 Caffe\\* model. SqueezeNet is a pretty accurate and at the same time lightweight network. For more information about the model, please visit <a href=\"https://github.com/DeepScale/SqueezeNet/\">GitHub</a> page and refer to original <a href=\"https://arxiv.org/abs/1602.07360\">SqueezeNet paper</a>.\n",
-    "\n",
-    "Follow the steps to perform image classification with the SquezeNet V1.1 model:"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**1. Download the model files:** "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%bash\n",
-    "echo \"Downloading deploy.protxt ...\"\n",
-    "if [ -f deploy.prototxt ]; then \n",
-    "    echo \"deploy.protxt file already exists. Downloading skipped\"\n",
-    "else\n",
-    "    wget https://raw.githubusercontent.com/DeepScale/SqueezeNet/a47b6f13d30985279789d08053d37013d67d131b/SqueezeNet_v1.1/deploy.prototxt -q\n",
-    "    echo \"Finished!\"\n",
-    "fi"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%bash\n",
-    "! echo \"Downloading squeezenet_v1.1.caffemodel ...\"\n",
-    "if [ -f squeezenet_v1.1.caffemodel ]; then\n",
-    "    echo \"squeezenet_v1.1.caffemodel file already exists. Download skipped\"\n",
-    "else\n",
-    "    wget https://github.com/DeepScale/SqueezeNet/raw/a47b6f13d30985279789d08053d37013d67d131b/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel -q\n",
-    "    echo \"Finished!\"\n",
-    "fi"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Run the following command to see the model files:**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!ls -la"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "* `deploy.prototxt` contains the network toplogy description in text format. \n",
-    "* `squeezenet_v1.1.caffemodel` contains weights for all network layers"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**2. Optimize and convert the model from intial Caffe representation to the IR representation, which is required for scoring the model using Inference Engine. To convert and optimize the model, use the Model Optimizer command line tool.**\n",
-    "\n",
-    "To locate Model Optimizer scripts, specify the path to the Model Optimizer root directory in the `MO_ROOT` variable in the cell bellow and then run it (If you use the installed OpenVINO&trade; package, you can find the Model Optimizer in `<INSTALLATION_ROOT_DIR>/deployment_tools/model_optimizer`)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%bash\n",
-    "MO_ROOT=/localdisk/repos/model-optimizer-tensorflow/\n",
-    "echo $MO_ROOT\n",
-    "python3 $MO_ROOT/mo.py --input_model squeezenet_v1.1.caffemodel --input_proto deploy.prototxt"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**3. Now, you have the SqueezeNet model converted to the IR, and you can infer it.**\n",
-    "\n",
-    "a. First, import required modules:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from openvino.inference_engine import IENetwork, IEPlugin\n",
-    "import numpy as np\n",
-    "import cv2\n",
-    "import logging as log\n",
-    "from time import time\n",
-    "import sys\n",
-    "import glob\n",
-    "import os\n",
-    "from matplotlib import pyplot as plt\n",
-    "%matplotlib inline"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "b. Initialize required constants:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Configure logging format\n",
-    "log.basicConfig(format=\"[ %(levelname)s ] %(message)s\", level=log.INFO, stream=sys.stdout)\n",
-    "\n",
-    "# Path to IR model files\n",
-    "MODEL_XML = \"./squeezenet_v1.1.xml\"\n",
-    "MODEL_BIN = \"./squeezenet_v1.1.bin\"\n",
-    "\n",
-    "# Target device to run inference\n",
-    "TARGET_DEVICE = \"CPU\"\n",
-    "\n",
-    "# Folder with input images for the model\n",
-    "IMAGES_FOLDER = \"./images\"\n",
-    "\n",
-    "# File containing information about classes names \n",
-    "LABELS_FILE = \"./image_net_synset.txt\"\n",
-    "\n",
-    "# Number of top prediction results to parse\n",
-    "NTOP = 5\n",
-    "\n",
-    "# Required batch size - number of images which will be processed in parallel\n",
-    "BATCH = 4"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "c. Create a plugin instance for the specified target device  \n",
-    "d. Read the IR files and create an `IENEtwork` instance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plugin = IEPlugin(TARGET_DEVICE)\n",
-    "net = IENetwork(model=MODEL_XML, weights=MODEL_BIN)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "e. Set the network batch size to the constatns specified above. \n",
-    "\n",
-    "Batch size is an \"amount\" of input data that will be infered in parallel. In this cases it is a number of images, which will be classified in parallel. \n",
-    "\n",
-    "You can set the network batch size using one of the following options:\n",
-    "1. On the IR generation stage, run the Model Optimizer with `-b` command line option. For example, to generate the IR with batch size equal to 4, add `-b 4` to Model Optimizer command line options. By default, it takes the batch size from the original network in framework representation (usually, it is equal to 1, but in this case, the original Caffe model is provided with the batch size equal to 10). \n",
-    "2. Use Inference Engine after reading IR. We will use this option.\n",
-    "\n",
-    "To set the batch size with the Inference Engine:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "log.info(\"Current network batch size is {}, will be changed to {}\".format(net.batch_size, BATCH))\n",
-    "net.batch_size = BATCH"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "f. After setting batch size, you can get required information about network input layers.\n",
-    "To preprocess input images, you need to know input layer shape.\n",
-    "\n",
-    "`inputs` property of `IENetwork` returns the dicitonary with input layer names and `InputInfo` objects, which contain information about an input layer including its shape.\n",
-    "\n",
-    "SqueezeNet is a single-input toplogy, so to get the input layer name and its shape, you can get the first item from the `inputs` dictionary:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "input_layer = next(iter(net.inputs))\n",
-    "n,c,h,w = net.inputs[input_layer].shape\n",
-    "layout = net.inputs[input_layer].layout\n",
-    "log.info(\"Network input layer {} has shape {} and layout {}\".format(input_layer, (n,c,h,w), layout))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "So what do the shape and layout mean?  \n",
-    "Layout will helps to interprete the shape dimsesnions meaning. \n",
-    "\n",
-    "`NCHW` input layer layout means:\n",
-    "* the fisrt dimension of an input data is a batch of **N** images processed in parallel \n",
-    "* the second dimension is a numnber of **C**hannels expected in the input images\n",
-    "* the third and the forth are a spatial dimensions - **H**eight and **W**idth of an input image\n",
-    "\n",
-    "Our shapes means that the network expects four 3-channel images running in parallel with size 227x227."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "g. Read and preprocess input images.\n",
-    "\n",
-    "For it, go to `IMAGES_FOLDER`, find all `.bmp` files, and take four images for inference:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "search_pattern = os.path.join(IMAGES_FOLDER, \"*.bmp\")\n",
-    "images = glob.glob(search_pattern)[:BATCH]\n",
-    "log.info(\"Input images:\\n {}\".format(\"\\n\".join(images)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now you can read and preprocess the image files and create an array with input blob data.\n",
-    "\n",
-    "For preprocessing, you must do the following:\n",
-    "1. Resize the images to fit the HxW input dimenstions.\n",
-    "2. Transpose the HWC layout.\n",
-    "\n",
-    "Transposing is tricky and not really obvious.\n",
-    "As you alredy saw above, the network has the `NCHW` layout, so each input image should be in `CHW` format. But by deafult, OpenCV\\* reads images in the `HWC` format. That is why you have to swap the axes using the `numpy.transpose()` function:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "input_data = np.ndarray(shape=(n, c, h, w))\n",
-    "orig_images = [] # Will be used to show image in notebook\n",
-    "for i, img in enumerate(images):\n",
-    "    image = cv2.imread(img)\n",
-    "    orig_images.append(image)\n",
-    "    if image.shape[:-1] != (h, w):\n",
-    "        log.warning(\"Image {} is resized from {} to {}\".format(img, image.shape[:-1], (h, w)))\n",
-    "        image = cv2.resize(image, (w, h))\n",
-    "    image = image.transpose((2, 0, 1))  # Change data layout from HWC to CHW\n",
-    "    input_data[i] = image"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "i. Infer the model model to classify input images:\n",
-    "\n",
-    "1. Load the `IENetwork` object to the plugin to create `ExectuableNEtwork` object.    \n",
-    "2. Start inference using the `infer()` function specifying dictionary with input layer name and prepared data as an argument for the function.     \n",
-    "3. Measure inference time in miliseconds and calculate throughput metric in frames-per-second (FPS)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "exec_net = plugin.load(net)\n",
-    "t0 = time()\n",
-    "res_map = exec_net.infer({input_layer: input_data})\n",
-    "inf_time = (time() - t0) * 1000 \n",
-    "fps = BATCH * inf_time \n",
-    "log.info(\"Inference time: {} ms.\".format(inf_time))\n",
-    "log.info(\"Throughput: {} fps.\".format(fps))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**4. After the inference, you need to parse and interpretate the inference results.**\n",
-    "\n",
-    "First, you need to see the shape of the network output layer. It can be done in similar way as for the inputs, but here you need to call `outputs` property of `IENetwork` object:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "output_layer = next(iter(net.outputs))\n",
-    "n,c,h,w = net.outputs[output_layer].shape\n",
-    "layout = net.outputs[output_layer].layout\n",
-    "log.info(\"Network output layer {} has shape {} and layout {}\".format(output_layer, (n,c,h,w), layout))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "It is not a common case for classification netowrks to have output layer with *NCHW* layout. Usually, it is just *NC*. However, in this case, the last two dimensions are just a feature of the network and do not have much sense. Ignore them as you will remove  them on the final parsing stage. \n",
-    "\n",
-    "What are the first and second dimensions of the output layer?    \n",
-    "* The first dimension is a batch. We precoessed four images, and the prediction result for a particular image is stored in the first dimension of the output array. For example, prediction results for the third image is `res[2]` (since numeration starts from 0).\n",
-    "* The second dimension is an array with normalized probabilities (from 0 to 1) for each class. This network is trained using the <a href=\"http://image-net.org/index\">ImageNet</a> dataset with 1000 classes. Each `n`-th value in the output data for a certain image represent the probability of the image belonging to the `n`-th class. "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To parse the output results:\n",
-    "\n",
-    "a. Read the `LABELS_FILE`, which maps the class ID to human-readable class names:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with open(LABELS_FILE, 'r') as f:\n",
-    "    labels_map = [x.split(sep=' ', maxsplit=1)[-1].strip() for x in f]\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "b. Parse the output array with prediction results. The parsing algorith is the following:\n",
-    "0. Squeeze the last two \"extra\" dimensions of the output data.\n",
-    "1. Iterate over all batches.\n",
-    "2. Sort the probabilities vector descendingly to get `NTOP` classes with the highest probabilities (by default, the `numpy.argsort` sorts the data in the ascending order, but using the array slicing `[::-1]`, you can reverse the data order).\n",
-    "3. Map the `NTOP` probabilities to the corresponding labeles in `labeles_map`.\n",
-    "\n",
-    "For the vizualization, you also need to store top-1 class and probability."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "top1_res = [] # will be used for the visualization\n",
-    "res = np.squeeze(res_map[output_layer])\n",
-    "log.info(\"Top {} results: \".format(NTOP))\n",
-    "for i, probs in enumerate(res):\n",
-    "    top_ind = np.argsort(probs)[-NTOP:][::-1]\n",
-    "    print(\"Image {}\".format(images[i]))\n",
-    "    top1_ind = top_ind[0]\n",
-    "    top1_res.append((labels_map[top1_ind], probs[top1_ind]))\n",
-    "    for id in top_ind:\n",
-    "        print(\"label: {}   probability: {:.2f}% \".format(labels_map[id], probs[id] * 100))\n",
-    "    print(\"\\n\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The code above prints the results as plain text.   \n",
-    "You can also use OpenCV\\* to visualize the results using the `orig_images` and `top1_res` variables, which you created during images reading and results parsing:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.clf()\n",
-    "for i, img in enumerate(orig_images):\n",
-    "    label_str = \"{}\".format(top1_res[i][0].split(',')[0])\n",
-    "    prob_str = \"{:.2f}%\".format(top1_res[i][1])\n",
-    "    cv2.putText(img, label_str, (5, 15), cv2.FONT_HERSHEY_COMPLEX, 0.6, (220,100,10), 1)\n",
-    "    cv2.putText(img, prob_str, (5, 35), cv2.FONT_HERSHEY_COMPLEX, 0.6, (220,100,10), 1)\n",
-    "    plt.figure()\n",
-    "    plt.axis(\"off\")\n",
-    "    \n",
-    "    # We have to convert colors, because matplotlib expects an image in RGB color format \n",
-    "    # but by default, the OpenCV read images in BRG format\n",
-    "    im_to_show = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
-    "    plt.imshow(im_to_show)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/inference-engine/ie_bridges/python/sample/jupyter_notebooks/classification_demo/image_net_synset.txt b/inference-engine/ie_bridges/python/sample/jupyter_notebooks/classification_demo/image_net_synset.txt
deleted file mode 100644
index a9e8c7f50..000000000
--- a/inference-engine/ie_bridges/python/sample/jupyter_notebooks/classification_demo/image_net_synset.txt
+++ /dev/null
@@ -1,1000 +0,0 @@
-n01440764 tench, Tinca tinca
-n01443537 goldfish, Carassius auratus
-n01484850 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias
-n01491361 tiger shark, Galeocerdo cuvieri
-n01494475 hammerhead, hammerhead shark
-n01496331 electric ray, crampfish, numbfish, torpedo
-n01498041 stingray
-n01514668 cock
-n01514859 hen
-n01518878 ostrich, Struthio camelus
-n01530575 brambling, Fringilla montifringilla
-n01531178 goldfinch, Carduelis carduelis
-n01532829 house finch, linnet, Carpodacus mexicanus
-n01534433 junco, snowbird
-n01537544 indigo bunting, indigo finch, indigo bird, Passerina cyanea
-n01558993 robin, American robin, Turdus migratorius
-n01560419 bulbul
-n01580077 jay
-n01582220 magpie
-n01592084 chickadee
-n01601694 water ouzel, dipper
-n01608432 kite
-n01614925 bald eagle, American eagle, Haliaeetus leucocephalus
-n01616318 vulture
-n01622779 great grey owl, great gray owl, Strix nebulosa
-n01629819 European fire salamander, Salamandra salamandra
-n01630670 common newt, Triturus vulgaris
-n01631663 eft
-n01632458 spotted salamander, Ambystoma maculatum
-n01632777 axolotl, mud puppy, Ambystoma mexicanum
-n01641577 bullfrog, Rana catesbeiana
-n01644373 tree frog, tree-frog
-n01644900 tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui
-n01664065 loggerhead, loggerhead turtle, Caretta caretta
-n01665541 leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea
-n01667114 mud turtle
-n01667778 terrapin
-n01669191 box turtle, box tortoise
-n01675722 banded gecko
-n01677366 common iguana, iguana, Iguana iguana
-n01682714 American chameleon, anole, Anolis carolinensis
-n01685808 whiptail, whiptail lizard
-n01687978 agama
-n01688243 frilled lizard, Chlamydosaurus kingi
-n01689811 alligator lizard
-n01692333 Gila monster, Heloderma suspectum
-n01693334 green lizard, Lacerta viridis
-n01694178 African chameleon, Chamaeleo chamaeleon
-n01695060 Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis
-n01697457 African crocodile, Nile crocodile, Crocodylus niloticus
-n01698640 American alligator, Alligator mississipiensis
-n01704323 triceratops
-n01728572 thunder snake, worm snake, Carphophis amoenus
-n01728920 ringneck snake, ring-necked snake, ring snake
-n01729322 hognose snake, puff adder, sand viper
-n01729977 green snake, grass snake
-n01734418 king snake, kingsnake
-n01735189 garter snake, grass snake
-n01737021 water snake
-n01739381 vine snake
-n01740131 night snake, Hypsiglena torquata
-n01742172 boa constrictor, Constrictor constrictor
-n01744401 rock python, rock snake, Python sebae
-n01748264 Indian cobra, Naja naja
-n01749939 green mamba
-n01751748 sea snake
-n01753488 horned viper, cerastes, sand viper, horned asp, Cerastes cornutus
-n01755581 diamondback, diamondback rattlesnake, Crotalus adamanteus
-n01756291 sidewinder, horned rattlesnake, Crotalus cerastes
-n01768244 trilobite
-n01770081 harvestman, daddy longlegs, Phalangium opilio
-n01770393 scorpion
-n01773157 black and gold garden spider, Argiope aurantia
-n01773549 barn spider, Araneus cavaticus
-n01773797 garden spider, Aranea diademata
-n01774384 black widow, Latrodectus mactans
-n01774750 tarantula
-n01775062 wolf spider, hunting spider
-n01776313 tick
-n01784675 centipede
-n01795545 black grouse
-n01796340 ptarmigan
-n01797886 ruffed grouse, partridge, Bonasa umbellus
-n01798484 prairie chicken, prairie grouse, prairie fowl
-n01806143 peacock
-n01806567 quail
-n01807496 partridge
-n01817953 African grey, African gray, Psittacus erithacus
-n01818515 macaw
-n01819313 sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita
-n01820546 lorikeet
-n01824575 coucal
-n01828970 bee eater
-n01829413 hornbill
-n01833805 hummingbird
-n01843065 jacamar
-n01843383 toucan
-n01847000 drake
-n01855032 red-breasted merganser, Mergus serrator
-n01855672 goose
-n01860187 black swan, Cygnus atratus
-n01871265 tusker
-n01872401 echidna, spiny anteater, anteater
-n01873310 platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus
-n01877812 wallaby, brush kangaroo
-n01882714 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus
-n01883070 wombat
-n01910747 jellyfish
-n01914609 sea anemone, anemone
-n01917289 brain coral
-n01924916 flatworm, platyhelminth
-n01930112 nematode, nematode worm, roundworm
-n01943899 conch
-n01944390 snail
-n01945685 slug
-n01950731 sea slug, nudibranch
-n01955084 chiton, coat-of-mail shell, sea cradle, polyplacophore
-n01968897 chambered nautilus, pearly nautilus, nautilus
-n01978287 Dungeness crab, Cancer magister
-n01978455 rock crab, Cancer irroratus
-n01980166 fiddler crab
-n01981276 king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica
-n01983481 American lobster, Northern lobster, Maine lobster, Homarus americanus
-n01984695 spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish
-n01985128 crayfish, crawfish, crawdad, crawdaddy
-n01986214 hermit crab
-n01990800 isopod
-n02002556 white stork, Ciconia ciconia
-n02002724 black stork, Ciconia nigra
-n02006656 spoonbill
-n02007558 flamingo
-n02009229 little blue heron, Egretta caerulea
-n02009912 American egret, great white heron, Egretta albus
-n02011460 bittern
-n02012849 crane
-n02013706 limpkin, Aramus pictus
-n02017213 European gallinule, Porphyrio porphyrio
-n02018207 American coot, marsh hen, mud hen, water hen, Fulica americana
-n02018795 bustard
-n02025239 ruddy turnstone, Arenaria interpres
-n02027492 red-backed sandpiper, dunlin, Erolia alpina
-n02028035 redshank, Tringa totanus
-n02033041 dowitcher
-n02037110 oystercatcher, oyster catcher
-n02051845 pelican
-n02056570 king penguin, Aptenodytes patagonica
-n02058221 albatross, mollymawk
-n02066245 grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus
-n02071294 killer whale, killer, orca, grampus, sea wolf, Orcinus orca
-n02074367 dugong, Dugong dugon
-n02077923 sea lion
-n02085620 Chihuahua
-n02085782 Japanese spaniel
-n02085936 Maltese dog, Maltese terrier, Maltese
-n02086079 Pekinese, Pekingese, Peke
-n02086240 Shih-Tzu
-n02086646 Blenheim spaniel
-n02086910 papillon
-n02087046 toy terrier
-n02087394 Rhodesian ridgeback
-n02088094 Afghan hound, Afghan
-n02088238 basset, basset hound
-n02088364 beagle
-n02088466 bloodhound, sleuthhound
-n02088632 bluetick
-n02089078 black-and-tan coonhound
-n02089867 Walker hound, Walker foxhound
-n02089973 English foxhound
-n02090379 redbone
-n02090622 borzoi, Russian wolfhound
-n02090721 Irish wolfhound
-n02091032 Italian greyhound
-n02091134 whippet
-n02091244 Ibizan hound, Ibizan Podenco
-n02091467 Norwegian elkhound, elkhound
-n02091635 otterhound, otter hound
-n02091831 Saluki, gazelle hound
-n02092002 Scottish deerhound, deerhound
-n02092339 Weimaraner
-n02093256 Staffordshire bullterrier, Staffordshire bull terrier
-n02093428 American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier
-n02093647 Bedlington terrier
-n02093754 Border terrier
-n02093859 Kerry blue terrier
-n02093991 Irish terrier
-n02094114 Norfolk terrier
-n02094258 Norwich terrier
-n02094433 Yorkshire terrier
-n02095314 wire-haired fox terrier
-n02095570 Lakeland terrier
-n02095889 Sealyham terrier, Sealyham
-n02096051 Airedale, Airedale terrier
-n02096177 cairn, cairn terrier
-n02096294 Australian terrier
-n02096437 Dandie Dinmont, Dandie Dinmont terrier
-n02096585 Boston bull, Boston terrier
-n02097047 miniature schnauzer
-n02097130 giant schnauzer
-n02097209 standard schnauzer
-n02097298 Scotch terrier, Scottish terrier, Scottie
-n02097474 Tibetan terrier, chrysanthemum dog
-n02097658 silky terrier, Sydney silky
-n02098105 soft-coated wheaten terrier
-n02098286 West Highland white terrier
-n02098413 Lhasa, Lhasa apso
-n02099267 flat-coated retriever
-n02099429 curly-coated retriever
-n02099601 golden retriever
-n02099712 Labrador retriever
-n02099849 Chesapeake Bay retriever
-n02100236 German short-haired pointer
-n02100583 vizsla, Hungarian pointer
-n02100735 English setter
-n02100877 Irish setter, red setter
-n02101006 Gordon setter
-n02101388 Brittany spaniel
-n02101556 clumber, clumber spaniel
-n02102040 English springer, English springer spaniel
-n02102177 Welsh springer spaniel
-n02102318 cocker spaniel, English cocker spaniel, cocker
-n02102480 Sussex spaniel
-n02102973 Irish water spaniel
-n02104029 kuvasz
-n02104365 schipperke
-n02105056 groenendael
-n02105162 malinois
-n02105251 briard
-n02105412 kelpie
-n02105505 komondor
-n02105641 Old English sheepdog, bobtail
-n02105855 Shetland sheepdog, Shetland sheep dog, Shetland
-n02106030 collie
-n02106166 Border collie
-n02106382 Bouvier des Flandres, Bouviers des Flandres
-n02106550 Rottweiler
-n02106662 German shepherd, German shepherd dog, German police dog, alsatian
-n02107142 Doberman, Doberman pinscher
-n02107312 miniature pinscher
-n02107574 Greater Swiss Mountain dog
-n02107683 Bernese mountain dog
-n02107908 Appenzeller
-n02108000 EntleBucher
-n02108089 boxer
-n02108422 bull mastiff
-n02108551 Tibetan mastiff
-n02108915 French bulldog
-n02109047 Great Dane
-n02109525 Saint Bernard, St Bernard
-n02109961 Eskimo dog, husky
-n02110063 malamute, malemute, Alaskan malamute
-n02110185 Siberian husky
-n02110341 dalmatian, coach dog, carriage dog
-n02110627 affenpinscher, monkey pinscher, monkey dog
-n02110806 basenji
-n02110958 pug, pug-dog
-n02111129 Leonberg
-n02111277 Newfoundland, Newfoundland dog
-n02111500 Great Pyrenees
-n02111889 Samoyed, Samoyede
-n02112018 Pomeranian
-n02112137 chow, chow chow
-n02112350 keeshond
-n02112706 Brabancon griffon
-n02113023 Pembroke, Pembroke Welsh corgi
-n02113186 Cardigan, Cardigan Welsh corgi
-n02113624 toy poodle
-n02113712 miniature poodle
-n02113799 standard poodle
-n02113978 Mexican hairless
-n02114367 timber wolf, grey wolf, gray wolf, Canis lupus
-n02114548 white wolf, Arctic wolf, Canis lupus tundrarum
-n02114712 red wolf, maned wolf, Canis rufus, Canis niger
-n02114855 coyote, prairie wolf, brush wolf, Canis latrans
-n02115641 dingo, warrigal, warragal, Canis dingo
-n02115913 dhole, Cuon alpinus
-n02116738 African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus
-n02117135 hyena, hyaena
-n02119022 red fox, Vulpes vulpes
-n02119789 kit fox, Vulpes macrotis
-n02120079 Arctic fox, white fox, Alopex lagopus
-n02120505 grey fox, gray fox, Urocyon cinereoargenteus
-n02123045 tabby, tabby cat
-n02123159 tiger cat
-n02123394 Persian cat
-n02123597 Siamese cat, Siamese
-n02124075 Egyptian cat
-n02125311 cougar, puma, catamount, mountain lion, painter, panther, Felis concolor
-n02127052 lynx, catamount
-n02128385 leopard, Panthera pardus
-n02128757 snow leopard, ounce, Panthera uncia
-n02128925 jaguar, panther, Panthera onca, Felis onca
-n02129165 lion, king of beasts, Panthera leo
-n02129604 tiger, Panthera tigris
-n02130308 cheetah, chetah, Acinonyx jubatus
-n02132136 brown bear, bruin, Ursus arctos
-n02133161 American black bear, black bear, Ursus americanus, Euarctos americanus
-n02134084 ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus
-n02134418 sloth bear, Melursus ursinus, Ursus ursinus
-n02137549 mongoose
-n02138441 meerkat, mierkat
-n02165105 tiger beetle
-n02165456 ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle
-n02167151 ground beetle, carabid beetle
-n02168699 long-horned beetle, longicorn, longicorn beetle
-n02169497 leaf beetle, chrysomelid
-n02172182 dung beetle
-n02174001 rhinoceros beetle
-n02177972 weevil
-n02190166 fly
-n02206856 bee
-n02219486 ant, emmet, pismire
-n02226429 grasshopper, hopper
-n02229544 cricket
-n02231487 walking stick, walkingstick, stick insect
-n02233338 cockroach, roach
-n02236044 mantis, mantid
-n02256656 cicada, cicala
-n02259212 leafhopper
-n02264363 lacewing, lacewing fly
-n02268443 dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk
-n02268853 damselfly
-n02276258 admiral
-n02277742 ringlet, ringlet butterfly
-n02279972 monarch, monarch butterfly, milkweed butterfly, Danaus plexippus
-n02280649 cabbage butterfly
-n02281406 sulphur butterfly, sulfur butterfly
-n02281787 lycaenid, lycaenid butterfly
-n02317335 starfish, sea star
-n02319095 sea urchin
-n02321529 sea cucumber, holothurian
-n02325366 wood rabbit, cottontail, cottontail rabbit
-n02326432 hare
-n02328150 Angora, Angora rabbit
-n02342885 hamster
-n02346627 porcupine, hedgehog
-n02356798 fox squirrel, eastern fox squirrel, Sciurus niger
-n02361337 marmot
-n02363005 beaver
-n02364673 guinea pig, Cavia cobaya
-n02389026 sorrel
-n02391049 zebra
-n02395406 hog, pig, grunter, squealer, Sus scrofa
-n02396427 wild boar, boar, Sus scrofa
-n02397096 warthog
-n02398521 hippopotamus, hippo, river horse, Hippopotamus amphibius
-n02403003 ox
-n02408429 water buffalo, water ox, Asiatic buffalo, Bubalus bubalis
-n02410509 bison
-n02412080 ram, tup
-n02415577 bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis
-n02417914 ibex, Capra ibex
-n02422106 hartebeest
-n02422699 impala, Aepyceros melampus
-n02423022 gazelle
-n02437312 Arabian camel, dromedary, Camelus dromedarius
-n02437616 llama
-n02441942 weasel
-n02442845 mink
-n02443114 polecat, fitch, foulmart, foumart, Mustela putorius
-n02443484 black-footed ferret, ferret, Mustela nigripes
-n02444819 otter
-n02445715 skunk, polecat, wood pussy
-n02447366 badger
-n02454379 armadillo
-n02457408 three-toed sloth, ai, Bradypus tridactylus
-n02480495 orangutan, orang, orangutang, Pongo pygmaeus
-n02480855 gorilla, Gorilla gorilla
-n02481823 chimpanzee, chimp, Pan troglodytes
-n02483362 gibbon, Hylobates lar
-n02483708 siamang, Hylobates syndactylus, Symphalangus syndactylus
-n02484975 guenon, guenon monkey
-n02486261 patas, hussar monkey, Erythrocebus patas
-n02486410 baboon
-n02487347 macaque
-n02488291 langur
-n02488702 colobus, colobus monkey
-n02489166 proboscis monkey, Nasalis larvatus
-n02490219 marmoset
-n02492035 capuchin, ringtail, Cebus capucinus
-n02492660 howler monkey, howler
-n02493509 titi, titi monkey
-n02493793 spider monkey, Ateles geoffroyi
-n02494079 squirrel monkey, Saimiri sciureus
-n02497673 Madagascar cat, ring-tailed lemur, Lemur catta
-n02500267 indri, indris, Indri indri, Indri brevicaudatus
-n02504013 Indian elephant, Elephas maximus
-n02504458 African elephant, Loxodonta africana
-n02509815 lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens
-n02510455 giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca
-n02514041 barracouta, snoek
-n02526121 eel
-n02536864 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch
-n02606052 rock beauty, Holocanthus tricolor
-n02607072 anemone fish
-n02640242 sturgeon
-n02641379 gar, garfish, garpike, billfish, Lepisosteus osseus
-n02643566 lionfish
-n02655020 puffer, pufferfish, blowfish, globefish
-n02666196 abacus
-n02667093 abaya
-n02669723 academic gown, academic robe, judge's robe
-n02672831 accordion, piano accordion, squeeze box
-n02676566 acoustic guitar
-n02687172 aircraft carrier, carrier, flattop, attack aircraft carrier
-n02690373 airliner
-n02692877 airship, dirigible
-n02699494 altar
-n02701002 ambulance
-n02704792 amphibian, amphibious vehicle
-n02708093 analog clock
-n02727426 apiary, bee house
-n02730930 apron
-n02747177 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin
-n02749479 assault rifle, assault gun
-n02769748 backpack, back pack, knapsack, packsack, rucksack, haversack
-n02776631 bakery, bakeshop, bakehouse
-n02777292 balance beam, beam
-n02782093 balloon
-n02783161 ballpoint, ballpoint pen, ballpen, Biro
-n02786058 Band Aid
-n02787622 banjo
-n02788148 bannister, banister, balustrade, balusters, handrail
-n02790996 barbell
-n02791124 barber chair
-n02791270 barbershop
-n02793495 barn
-n02794156 barometer
-n02795169 barrel, cask
-n02797295 barrow, garden cart, lawn cart, wheelbarrow
-n02799071 baseball
-n02802426 basketball
-n02804414 bassinet
-n02804610 bassoon
-n02807133 bathing cap, swimming cap
-n02808304 bath towel
-n02808440 bathtub, bathing tub, bath, tub
-n02814533 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon
-n02814860 beacon, lighthouse, beacon light, pharos
-n02815834 beaker
-n02817516 bearskin, busby, shako
-n02823428 beer bottle
-n02823750 beer glass
-n02825657 bell cote, bell cot
-n02834397 bib
-n02835271 bicycle-built-for-two, tandem bicycle, tandem
-n02837789 bikini, two-piece
-n02840245 binder, ring-binder
-n02841315 binoculars, field glasses, opera glasses
-n02843684 birdhouse
-n02859443 boathouse
-n02860847 bobsled, bobsleigh, bob
-n02865351 bolo tie, bolo, bola tie, bola
-n02869837 bonnet, poke bonnet
-n02870880 bookcase
-n02871525 bookshop, bookstore, bookstall
-n02877765 bottlecap
-n02879718 bow
-n02883205 bow tie, bow-tie, bowtie
-n02892201 brass, memorial tablet, plaque
-n02892767 brassiere, bra, bandeau
-n02894605 breakwater, groin, groyne, mole, bulwark, seawall, jetty
-n02895154 breastplate, aegis, egis
-n02906734 broom
-n02909870 bucket, pail
-n02910353 buckle
-n02916936 bulletproof vest
-n02917067 bullet train, bullet
-n02927161 butcher shop, meat market
-n02930766 cab, hack, taxi, taxicab
-n02939185 caldron, cauldron
-n02948072 candle, taper, wax light
-n02950826 cannon
-n02951358 canoe
-n02951585 can opener, tin opener
-n02963159 cardigan
-n02965783 car mirror
-n02966193 carousel, carrousel, merry-go-round, roundabout, whirligig
-n02966687 carpenter's kit, tool kit
-n02971356 carton
-n02974003 car wheel
-n02977058 cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM
-n02978881 cassette
-n02979186 cassette player
-n02980441 castle
-n02981792 catamaran
-n02988304 CD player
-n02992211 cello, violoncello
-n02992529 cellular telephone, cellular phone, cellphone, cell, mobile phone
-n02999410 chain
-n03000134 chainlink fence
-n03000247 chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour
-n03000684 chain saw, chainsaw
-n03014705 chest
-n03016953 chiffonier, commode
-n03017168 chime, bell, gong
-n03018349 china cabinet, china closet
-n03026506 Christmas stocking
-n03028079 church, church building
-n03032252 cinema, movie theater, movie theatre, movie house, picture palace
-n03041632 cleaver, meat cleaver, chopper
-n03042490 cliff dwelling
-n03045698 cloak
-n03047690 clog, geta, patten, sabot
-n03062245 cocktail shaker
-n03063599 coffee mug
-n03063689 coffeepot
-n03065424 coil, spiral, volute, whorl, helix
-n03075370 combination lock
-n03085013 computer keyboard, keypad
-n03089624 confectionery, confectionary, candy store
-n03095699 container ship, containership, container vessel
-n03100240 convertible
-n03109150 corkscrew, bottle screw
-n03110669 cornet, horn, trumpet, trump
-n03124043 cowboy boot
-n03124170 cowboy hat, ten-gallon hat
-n03125729 cradle
-n03126707 crane
-n03127747 crash helmet
-n03127925 crate
-n03131574 crib, cot
-n03133878 Crock Pot
-n03134739 croquet ball
-n03141823 crutch
-n03146219 cuirass
-n03160309 dam, dike, dyke
-n03179701 desk
-n03180011 desktop computer
-n03187595 dial telephone, dial phone
-n03188531 diaper, nappy, napkin
-n03196217 digital clock
-n03197337 digital watch
-n03201208 dining table, board
-n03207743 dishrag, dishcloth
-n03207941 dishwasher, dish washer, dishwashing machine
-n03208938 disk brake, disc brake
-n03216828 dock, dockage, docking facility
-n03218198 dogsled, dog sled, dog sleigh
-n03220513 dome
-n03223299 doormat, welcome mat
-n03240683 drilling platform, offshore rig
-n03249569 drum, membranophone, tympan
-n03250847 drumstick
-n03255030 dumbbell
-n03259280 Dutch oven
-n03271574 electric fan, blower
-n03272010 electric guitar
-n03272562 electric locomotive
-n03290653 entertainment center
-n03291819 envelope
-n03297495 espresso maker
-n03314780 face powder
-n03325584 feather boa, boa
-n03337140 file, file cabinet, filing cabinet
-n03344393 fireboat
-n03345487 fire engine, fire truck
-n03347037 fire screen, fireguard
-n03355925 flagpole, flagstaff
-n03372029 flute, transverse flute
-n03376595 folding chair
-n03379051 football helmet
-n03384352 forklift
-n03388043 fountain
-n03388183 fountain pen
-n03388549 four-poster
-n03393912 freight car
-n03394916 French horn, horn
-n03400231 frying pan, frypan, skillet
-n03404251 fur coat
-n03417042 garbage truck, dustcart
-n03424325 gasmask, respirator, gas helmet
-n03425413 gas pump, gasoline pump, petrol pump, island dispenser
-n03443371 goblet
-n03444034 go-kart
-n03445777 golf ball
-n03445924 golfcart, golf cart
-n03447447 gondola
-n03447721 gong, tam-tam
-n03450230 gown
-n03452741 grand piano, grand
-n03457902 greenhouse, nursery, glasshouse
-n03459775 grille, radiator grille
-n03461385 grocery store, grocery, food market, market
-n03467068 guillotine
-n03476684 hair slide
-n03476991 hair spray
-n03478589 half track
-n03481172 hammer
-n03482405 hamper
-n03483316 hand blower, blow dryer, blow drier, hair dryer, hair drier
-n03485407 hand-held computer, hand-held microcomputer
-n03485794 handkerchief, hankie, hanky, hankey
-n03492542 hard disc, hard disk, fixed disk
-n03494278 harmonica, mouth organ, harp, mouth harp
-n03495258 harp
-n03496892 harvester, reaper
-n03498962 hatchet
-n03527444 holster
-n03529860 home theater, home theatre
-n03530642 honeycomb
-n03532672 hook, claw
-n03534580 hoopskirt, crinoline
-n03535780 horizontal bar, high bar
-n03538406 horse cart, horse-cart
-n03544143 hourglass
-n03584254 iPod
-n03584829 iron, smoothing iron
-n03590841 jack-o'-lantern
-n03594734 jean, blue jean, denim
-n03594945 jeep, landrover
-n03595614 jersey, T-shirt, tee shirt
-n03598930 jigsaw puzzle
-n03599486 jinrikisha, ricksha, rickshaw
-n03602883 joystick
-n03617480 kimono
-n03623198 knee pad
-n03627232 knot
-n03630383 lab coat, laboratory coat
-n03633091 ladle
-n03637318 lampshade, lamp shade
-n03642806 laptop, laptop computer
-n03649909 lawn mower, mower
-n03657121 lens cap, lens cover
-n03658185 letter opener, paper knife, paperknife
-n03661043 library
-n03662601 lifeboat
-n03666591 lighter, light, igniter, ignitor
-n03670208 limousine, limo
-n03673027 liner, ocean liner
-n03676483 lipstick, lip rouge
-n03680355 Loafer
-n03690938 lotion
-n03691459 loudspeaker, speaker, speaker unit, loudspeaker system, speaker system
-n03692522 loupe, jeweler's loupe
-n03697007 lumbermill, sawmill
-n03706229 magnetic compass
-n03709823 mailbag, postbag
-n03710193 mailbox, letter box
-n03710637 maillot
-n03710721 maillot, tank suit
-n03717622 manhole cover
-n03720891 maraca
-n03721384 marimba, xylophone
-n03724870 mask
-n03729826 matchstick
-n03733131 maypole
-n03733281 maze, labyrinth
-n03733805 measuring cup
-n03742115 medicine chest, medicine cabinet
-n03743016 megalith, megalithic structure
-n03759954 microphone, mike
-n03761084 microwave, microwave oven
-n03763968 military uniform
-n03764736 milk can
-n03769881 minibus
-n03770439 miniskirt, mini
-n03770679 minivan
-n03773504 missile
-n03775071 mitten
-n03775546 mixing bowl
-n03776460 mobile home, manufactured home
-n03777568 Model T
-n03777754 modem
-n03781244 monastery
-n03782006 monitor
-n03785016 moped
-n03786901 mortar
-n03787032 mortarboard
-n03788195 mosque
-n03788365 mosquito net
-n03791053 motor scooter, scooter
-n03792782 mountain bike, all-terrain bike, off-roader
-n03792972 mountain tent
-n03793489 mouse, computer mouse
-n03794056 mousetrap
-n03796401 moving van
-n03803284 muzzle
-n03804744 nail
-n03814639 neck brace
-n03814906 necklace
-n03825788 nipple
-n03832673 notebook, notebook computer
-n03837869 obelisk
-n03838899 oboe, hautboy, hautbois
-n03840681 ocarina, sweet potato
-n03841143 odometer, hodometer, mileometer, milometer
-n03843555 oil filter
-n03854065 organ, pipe organ
-n03857828 oscilloscope, scope, cathode-ray oscilloscope, CRO
-n03866082 overskirt
-n03868242 oxcart
-n03868863 oxygen mask
-n03871628 packet
-n03873416 paddle, boat paddle
-n03874293 paddlewheel, paddle wheel
-n03874599 padlock
-n03876231 paintbrush
-n03877472 pajama, pyjama, pj's, jammies
-n03877845 palace
-n03884397 panpipe, pandean pipe, syrinx
-n03887697 paper towel
-n03888257 parachute, chute
-n03888605 parallel bars, bars
-n03891251 park bench
-n03891332 parking meter
-n03895866 passenger car, coach, carriage
-n03899768 patio, terrace
-n03902125 pay-phone, pay-station
-n03903868 pedestal, plinth, footstall
-n03908618 pencil box, pencil case
-n03908714 pencil sharpener
-n03916031 perfume, essence
-n03920288 Petri dish
-n03924679 photocopier
-n03929660 pick, plectrum, plectron
-n03929855 pickelhaube
-n03930313 picket fence, paling
-n03930630 pickup, pickup truck
-n03933933 pier
-n03935335 piggy bank, penny bank
-n03937543 pill bottle
-n03938244 pillow
-n03942813 ping-pong ball
-n03944341 pinwheel
-n03947888 pirate, pirate ship
-n03950228 pitcher, ewer
-n03954731 plane, carpenter's plane, woodworking plane
-n03956157 planetarium
-n03958227 plastic bag
-n03961711 plate rack
-n03967562 plow, plough
-n03970156 plunger, plumber's helper
-n03976467 Polaroid camera, Polaroid Land camera
-n03976657 pole
-n03977966 police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria
-n03980874 poncho
-n03982430 pool table, billiard table, snooker table
-n03983396 pop bottle, soda bottle
-n03991062 pot, flowerpot
-n03992509 potter's wheel
-n03995372 power drill
-n03998194 prayer rug, prayer mat
-n04004767 printer
-n04005630 prison, prison house
-n04008634 projectile, missile
-n04009552 projector
-n04019541 puck, hockey puck
-n04023962 punching bag, punch bag, punching ball, punchball
-n04026417 purse
-n04033901 quill, quill pen
-n04033995 quilt, comforter, comfort, puff
-n04037443 racer, race car, racing car
-n04039381 racket, racquet
-n04040759 radiator
-n04041544 radio, wireless
-n04044716 radio telescope, radio reflector
-n04049303 rain barrel
-n04065272 recreational vehicle, RV, R.V.
-n04067472 reel
-n04069434 reflex camera
-n04070727 refrigerator, icebox
-n04074963 remote control, remote
-n04081281 restaurant, eating house, eating place, eatery
-n04086273 revolver, six-gun, six-shooter
-n04090263 rifle
-n04099969 rocking chair, rocker
-n04111531 rotisserie
-n04116512 rubber eraser, rubber, pencil eraser
-n04118538 rugby ball
-n04118776 rule, ruler
-n04120489 running shoe
-n04125021 safe
-n04127249 safety pin
-n04131690 saltshaker, salt shaker
-n04133789 sandal
-n04136333 sarong
-n04141076 sax, saxophone
-n04141327 scabbard
-n04141975 scale, weighing machine
-n04146614 school bus
-n04147183 schooner
-n04149813 scoreboard
-n04152593 screen, CRT screen
-n04153751 screw
-n04154565 screwdriver
-n04162706 seat belt, seatbelt
-n04179913 sewing machine
-n04192698 shield, buckler
-n04200800 shoe shop, shoe-shop, shoe store
-n04201297 shoji
-n04204238 shopping basket
-n04204347 shopping cart
-n04208210 shovel
-n04209133 shower cap
-n04209239 shower curtain
-n04228054 ski
-n04229816 ski mask
-n04235860 sleeping bag
-n04238763 slide rule, slipstick
-n04239074 sliding door
-n04243546 slot, one-armed bandit
-n04251144 snorkel
-n04252077 snowmobile
-n04252225 snowplow, snowplough
-n04254120 soap dispenser
-n04254680 soccer ball
-n04254777 sock
-n04258138 solar dish, solar collector, solar furnace
-n04259630 sombrero
-n04263257 soup bowl
-n04264628 space bar
-n04265275 space heater
-n04266014 space shuttle
-n04270147 spatula
-n04273569 speedboat
-n04275548 spider web, spider's web
-n04277352 spindle
-n04285008 sports car, sport car
-n04286575 spotlight, spot
-n04296562 stage
-n04310018 steam locomotive
-n04311004 steel arch bridge
-n04311174 steel drum
-n04317175 stethoscope
-n04325704 stole
-n04326547 stone wall
-n04328186 stopwatch, stop watch
-n04330267 stove
-n04332243 strainer
-n04335435 streetcar, tram, tramcar, trolley, trolley car
-n04336792 stretcher
-n04344873 studio couch, day bed
-n04346328 stupa, tope
-n04347754 submarine, pigboat, sub, U-boat
-n04350905 suit, suit of clothes
-n04355338 sundial
-n04355933 sunglass
-n04356056 sunglasses, dark glasses, shades
-n04357314 sunscreen, sunblock, sun blocker
-n04366367 suspension bridge
-n04367480 swab, swob, mop
-n04370456 sweatshirt
-n04371430 swimming trunks, bathing trunks
-n04371774 swing
-n04372370 switch, electric switch, electrical switch
-n04376876 syringe
-n04380533 table lamp
-n04389033 tank, army tank, armored combat vehicle, armoured combat vehicle
-n04392985 tape player
-n04398044 teapot
-n04399382 teddy, teddy bear
-n04404412 television, television system
-n04409515 tennis ball
-n04417672 thatch, thatched roof
-n04418357 theater curtain, theatre curtain
-n04423845 thimble
-n04428191 thresher, thrasher, threshing machine
-n04429376 throne
-n04435653 tile roof
-n04442312 toaster
-n04443257 tobacco shop, tobacconist shop, tobacconist
-n04447861 toilet seat
-n04456115 torch
-n04458633 totem pole
-n04461696 tow truck, tow car, wrecker
-n04462240 toyshop
-n04465501 tractor
-n04467665 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi
-n04476259 tray
-n04479046 trench coat
-n04482393 tricycle, trike, velocipede
-n04483307 trimaran
-n04485082 tripod
-n04486054 triumphal arch
-n04487081 trolleybus, trolley coach, trackless trolley
-n04487394 trombone
-n04493381 tub, vat
-n04501370 turnstile
-n04505470 typewriter keyboard
-n04507155 umbrella
-n04509417 unicycle, monocycle
-n04515003 upright, upright piano
-n04517823 vacuum, vacuum cleaner
-n04522168 vase
-n04523525 vault
-n04525038 velvet
-n04525305 vending machine
-n04532106 vestment
-n04532670 viaduct
-n04536866 violin, fiddle
-n04540053 volleyball
-n04542943 waffle iron
-n04548280 wall clock
-n04548362 wallet, billfold, notecase, pocketbook
-n04550184 wardrobe, closet, press
-n04552348 warplane, military plane
-n04553703 washbasin, handbasin, washbowl, lavabo, wash-hand basin
-n04554684 washer, automatic washer, washing machine
-n04557648 water bottle
-n04560804 water jug
-n04562935 water tower
-n04579145 whiskey jug
-n04579432 whistle
-n04584207 wig
-n04589890 window screen
-n04590129 window shade
-n04591157 Windsor tie
-n04591713 wine bottle
-n04592741 wing
-n04596742 wok
-n04597913 wooden spoon
-n04599235 wool, woolen, woollen
-n04604644 worm fence, snake fence, snake-rail fence, Virginia fence
-n04606251 wreck
-n04612504 yawl
-n04613696 yurt
-n06359193 web site, website, internet site, site
-n06596364 comic book
-n06785654 crossword puzzle, crossword
-n06794110 street sign
-n06874185 traffic light, traffic signal, stoplight
-n07248320 book jacket, dust cover, dust jacket, dust wrapper
-n07565083 menu
-n07579787 plate
-n07583066 guacamole
-n07584110 consomme
-n07590611 hot pot, hotpot
-n07613480 trifle
-n07614500 ice cream, icecream
-n07615774 ice lolly, lolly, lollipop, popsicle
-n07684084 French loaf
-n07693725 bagel, beigel
-n07695742 pretzel
-n07697313 cheeseburger
-n07697537 hotdog, hot dog, red hot
-n07711569 mashed potato
-n07714571 head cabbage
-n07714990 broccoli
-n07715103 cauliflower
-n07716358 zucchini, courgette
-n07716906 spaghetti squash
-n07717410 acorn squash
-n07717556 butternut squash
-n07718472 cucumber, cuke
-n07718747 artichoke, globe artichoke
-n07720875 bell pepper
-n07730033 cardoon
-n07734744 mushroom
-n07742313 Granny Smith
-n07745940 strawberry
-n07747607 orange
-n07749582 lemon
-n07753113 fig
-n07753275 pineapple, ananas
-n07753592 banana
-n07754684 jackfruit, jak, jack
-n07760859 custard apple
-n07768694 pomegranate
-n07802026 hay
-n07831146 carbonara
-n07836838 chocolate sauce, chocolate syrup
-n07860988 dough
-n07871810 meat loaf, meatloaf
-n07873807 pizza, pizza pie
-n07875152 potpie
-n07880968 burrito
-n07892512 red wine
-n07920052 espresso
-n07930864 cup
-n07932039 eggnog
-n09193705 alp
-n09229709 bubble
-n09246464 cliff, drop, drop-off
-n09256479 coral reef
-n09288635 geyser
-n09332890 lakeside, lakeshore
-n09399592 promontory, headland, head, foreland
-n09421951 sandbar, sand bar
-n09428293 seashore, coast, seacoast, sea-coast
-n09468604 valley, vale
-n09472597 volcano
-n09835506 ballplayer, baseball player
-n10148035 groom, bridegroom
-n10565667 scuba diver
-n11879895 rapeseed
-n11939491 daisy
-n12057211 yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum
-n12144580 corn
-n12267677 acorn
-n12620546 hip, rose hip, rosehip
-n12768682 buckeye, horse chestnut, conker
-n12985857 coral fungus
-n12998815 agaric
-n13037406 gyromitra
-n13040303 stinkhorn, carrion fungus
-n13044778 earthstar
-n13052670 hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa
-n13054560 bolete
-n13133613 ear, spike, capitulum
-n15075141 toilet tissue, toilet paper, bathroom tissue
diff --git a/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md b/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md
new file mode 100644
index 000000000..2c5fa61a1
--- /dev/null
+++ b/inference-engine/ie_bridges/python/sample/style_transfer_sample/README.md
@@ -0,0 +1,74 @@
+# Neural Style Transfer Python* Sample
+
+This topic demonstrates how to run the Neural Style Transfer sample application, which performs 
+inference of style transfer models.
+
+> **NOTE**: The OpenVINO™ toolkit does not include a pre-trained model to run the Neural Style Transfer sample. A public model from the [Zhaw's Neural Style Transfer repository](https://github.com/zhaw/neural_style) can be used. Read the [Converting a Style Transfer Model from MXNet*](./docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md) topic from the [Model Optimizer Developer Guide](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) to learn about how to get the trained model and how to convert it to the Inference Engine format (\*.xml + \*.bin).
+
+## How It Works
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
+## Running
+
+Running the application with the <code>-h</code> option yields the following usage message:
+```
+python3 style_transfer_sample.py --help
+```
+The command yields the following usage message:
+```
+usage: style_transfer_sample.py [-h] -m MODEL -i INPUT [INPUT ...]
+                                [-l CPU_EXTENSION] [-pp PLUGIN_DIR]
+                                [-d DEVICE] [-nt NUMBER_TOP] [-ni NUMBER_ITER]
+                                [--mean_val_r MEAN_VAL_R]
+                                [--mean_val_g MEAN_VAL_G]
+                                [--mean_val_b MEAN_VAL_B] [-pc]
+
+Options:
+  -h, --help            Show this help message and exit.
+  -m MODEL, --model MODEL
+                        Path to an .xml file with a trained model.
+  -i INPUT [INPUT ...], --input INPUT [INPUT ...]
+                        Path to a folder with images or path to an image files
+  -l CPU_EXTENSION, --cpu_extension CPU_EXTENSION
+                        Optional. Required for CPU custom layers. Absolute
+                        MKLDNN (CPU)-targeted custom layers. Absolute path to
+                        a shared library with the kernels implementations
+  -pp PLUGIN_DIR, --plugin_dir PLUGIN_DIR
+                        Path to a plugin folder
+  -d DEVICE, --device DEVICE
+                        Specify the target device to infer on; CPU, GPU, FPGA,
+                        HDDL or MYRIAD is acceptable. Sample will look for a
+                        suitable plugin for device specified. Default value is CPU
+  -nt NUMBER_TOP, --number_top NUMBER_TOP
+                        Number of top results
+  -ni NUMBER_ITER, --number_iter NUMBER_ITER
+                        Number of inference iterations
+  --mean_val_r MEAN_VAL_R, -mean_val_r MEAN_VAL_R
+                        Mean value of red chanel for mean value subtraction in
+                        postprocessing
+  --mean_val_g MEAN_VAL_G, -mean_val_g MEAN_VAL_G
+                        Mean value of green chanel for mean value subtraction
+                        in postprocessing
+  --mean_val_b MEAN_VAL_B, -mean_val_b MEAN_VAL_B
+                        Mean value of blue chanel for mean value subtraction
+                        in postprocessing
+  -pc, --perf_counts    Report performance counters
+
+```
+
+Running the application with the empty list of options yields the usage message given above and an error message.
+
+To perform inference on an image using a trained model of NST network on Intel® CPUs, use the following command:
+```
+    python3 style_transfer_sample.py -i <path_to_image>/cat.bmp -m <path_to_model>/1_decoder_FP32.xml
+```
+
+### Demo Output
+
+The application outputs an image (`out1.bmp`) or a sequence of images (`out1.bmp`, ..., `out<N>.bmp`) which are redrawn in style of the style transfer model used for sample. 
+
+## See Also 
+* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+
+
diff --git a/inference-engine/ie_bridges/python/sample/style_transfer_sample.py b/inference-engine/ie_bridges/python/sample/style_transfer_sample/style_transfer_sample.py
index 76fcadaff..fc08b1779 100644
--- a/inference-engine/ie_bridges/python/sample/style_transfer_sample.py
+++ b/inference-engine/ie_bridges/python/sample/style_transfer_sample/style_transfer_sample.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (C) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 from __future__ import print_function
 import sys
 import os
-from argparse import ArgumentParser
+from argparse import ArgumentParser, SUPPRESS
 import cv2
 import numpy as np
 import logging as log
@@ -26,30 +26,33 @@ from openvino.inference_engine import IENetwork, IEPlugin
 
 
 def build_argparser():
-    parser = ArgumentParser()
-    parser.add_argument("-m", "--model", help="Path to an .xml file with a trained model.", required=True, type=str)
-    parser.add_argument("-i", "--input", help="Path to a folder with images or path to an image files", required=True,
-                        type=str, nargs="+")
-    parser.add_argument("-l", "--cpu_extension",
-                        help="MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels "
-                             "impl.", type=str, default=None)
-    parser.add_argument("-pp", "--plugin_dir", help="Path to a plugin folder", type=str, default=None)
-    parser.add_argument("-d", "--device",
-                        help="Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample "
-                             "will look for a suitable plugin for device specified (CPU by default)", default="CPU",
-                        type=str)
-    parser.add_argument("-nt", "--number_top", help="Number of top results", default=10, type=int)
-    parser.add_argument("-ni", "--number_iter", help="Number of inference iterations", default=1, type=int)
-    parser.add_argument("--mean_val_r", "-mean_val_r",
-                        help="Mean value of red chanel for mean value subtraction in postprocessing ", default=0,
-                        type=float)
-    parser.add_argument("--mean_val_g", "-mean_val_g",
-                        help="Mean value of green chanel for mean value subtraction in postprocessing ", default=0,
-                        type=float)
-    parser.add_argument("--mean_val_b", "-mean_val_b",
-                        help="Mean value of blue chanel for mean value subtraction in postprocessing ", default=0,
-                        type=float)
-    parser.add_argument("-pc", "--perf_counts", help="Report performance counters", default=False, action="store_true")
+    parser = ArgumentParser(add_help=False)
+    args = parser.add_argument_group('Options')
+    args.add_argument('-h', '--help', action='help', default=SUPPRESS, help='Show this help message and exit.')
+    args.add_argument("-m", "--model", help="Path to an .xml file with a trained model.", required=True, type=str)
+    args.add_argument("-i", "--input", help="Path to a folder with images or path to an image files", required=True,
+                      type=str, nargs="+")
+    args.add_argument("-l", "--cpu_extension",
+                      help="Optional. Required for CPU custom layers. "
+                           "Absolute MKLDNN (CPU)-targeted custom layers. Absolute path to a shared library with the "
+                           "kernels implementations", type=str, default=None)
+    args.add_argument("-pp", "--plugin_dir", help="Path to a plugin folder", type=str, default=None)
+    args.add_argument("-d", "--device",
+                      help="Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. Sample "
+                           "will look for a suitable plugin for device specified. Default value is CPU", default="CPU",
+                      type=str)
+    args.add_argument("-nt", "--number_top", help="Number of top results", default=10, type=int)
+    args.add_argument("-ni", "--number_iter", help="Number of inference iterations", default=1, type=int)
+    args.add_argument("--mean_val_r", "-mean_val_r",
+                      help="Mean value of red chanel for mean value subtraction in postprocessing ", default=0,
+                      type=float)
+    args.add_argument("--mean_val_g", "-mean_val_g",
+                      help="Mean value of green chanel for mean value subtraction in postprocessing ", default=0,
+                      type=float)
+    args.add_argument("--mean_val_b", "-mean_val_b",
+                      help="Mean value of blue chanel for mean value subtraction in postprocessing ", default=0,
+                      type=float)
+    args.add_argument("-pc", "--perf_counts", help="Report performance counters", default=False, action="store_true")
 
     return parser
 
@@ -101,7 +104,6 @@ def main():
     # Loading model to the plugin
     log.info("Loading model to the plugin")
     exec_net = plugin.load(network=net)
-    del net
 
     # Start sync inference
     log.info("Starting inference ({} iterations)".format(args.number_iter))
@@ -133,8 +135,6 @@ def main():
         out_img = os.path.join(os.path.dirname(__file__), "out_{}.bmp".format(batch))
         cv2.imwrite(out_img, data)
         log.info("Result image was saved to {}".format(out_img))
-    del exec_net
-    del plugin
 
 
 if __name__ == '__main__':
diff --git a/inference-engine/ie_bridges/python/sample/voc_labels.txt b/inference-engine/ie_bridges/python/sample/voc_labels.txt
deleted file mode 100644
index 008dd5fba..000000000
--- a/inference-engine/ie_bridges/python/sample/voc_labels.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-background
-aeroplane
-bicycle
-bird
-boat
-bottle
-bus
-car
-cat
-chair
-cow
-diningtable
-dog
-horse
-motorbike
-person
-pottedplant
-sheep
-sofa
-train
-tvmonitor
-\ No newline at end of file
diff --git a/inference-engine/ie_bridges/python/setup.py b/inference-engine/ie_bridges/python/setup.py
index bb9df0ecd..82ed12544 100644
--- a/inference-engine/ie_bridges/python/setup.py
+++ b/inference-engine/ie_bridges/python/setup.py
@@ -167,12 +167,12 @@ except ImportError:
 
 
 c_sources = [
-    PACKAGE / 'ie_driver.cpp',
-    PACKAGE / 'ie_driver.hpp',
+    PACKAGE / 'ie_api_impl.cpp',
+    PACKAGE / 'ie_api_impl.hpp',
 
-    PACKAGE / 'c_ie_driver.pxd',
-    PACKAGE / 'ie_driver.pyx',
-    PACKAGE / 'ie_driver.pxd',
+    PACKAGE / 'ie_api_impl_defs.pxd',
+    PACKAGE / 'ie_api.pyx',
+    PACKAGE / 'ie_api.pxd',
 ]
 
 extensions = [
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
index aa8ac74b1..8e0a91aeb 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/CMakeLists.txt
@@ -5,24 +5,20 @@ set (TARGET_NAME "ie_api")
 set (CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/inference_engine)
 set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
 
-set_source_files_properties(
-    ie_api_impl_defs.pxd
-    ie_api_impl.hpp
-    ie_api_impl.cpp
-    ie_api.pyx
-    ie_api.pxd
+file(GLOB SOURCE
+        ${CMAKE_CURRENT_SOURCE_DIR}/*.pyx
+        ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
+        )
 
-  PROPERTIES CYTHON_IS_CXX TRUE
+set_source_files_properties(${SOURCE} PROPERTIES CYTHON_IS_CXX TRUE
 )
 
-cython_add_module (
-    ${TARGET_NAME}
+## Compatibility with python 2.7 which has depricated "register" specifier
+if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+    add_definitions("-Wno-register")
+endif()
 
-    ie_api_impl_defs.pxd
-    ie_api_impl.hpp
-    ie_api_impl.cpp
-    ie_api.pyx
-)
+cython_add_module (${TARGET_NAME} ${SOURCE})
 
 set_target_properties (${TARGET_NAME} PROPERTIES CXX_STANDARD 11 LINKER_LANGUAGE CXX)
 target_link_libraries (${TARGET_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/CMakeLists.txt b/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/CMakeLists.txt
deleted file mode 100644
index 1b25c3ebe..000000000
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/CMakeLists.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-# If the pyx file is a C++ file, we should specify that here.
-set(CMAKE_INCLUDE_CURRENT_DIR ON)
-
-set(TARGET_NAME "dnn_builder")
-
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PYTHON_BRIDGE_OUTPUT_DIRECTORY}/inference_engine/${TARGET_NAME})
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
-
-set_source_files_properties(
-    dnn_builder_defs.pxd
-    dnn_builder_impl.hpp
-    dnn_builder_impl.cpp
-    dnn_builder.pyx
-    dnn_builder.pxd
-
-  PROPERTIES CYTHON_IS_CXX TRUE
-)
-
-cython_add_module(
-    ${TARGET_NAME}
-
-    dnn_builder_impl_defs.pxd
-    dnn_builder_impl.hpp
-    dnn_builder_impl.cpp
-    dnn_builder.pyx
-)
-
-set_target_properties (${TARGET_NAME} PROPERTIES CXX_STANDARD 11 LINKER_LANGUAGE CXX)
-add_dependencies (${TARGET_NAME} ie_api)
-target_include_directories (${TARGET_NAME} PRIVATE ${PYTHON_BRIDGE_SRC_ROOT}/src/openvino/inference_engine )
-target_link_libraries (${TARGET_NAME} PRIVATE ${InferenceEngine_LIBRARIES})
-
-# perform copy
-ADD_CUSTOM_COMMAND (TARGET ${TARGET_NAME}
-    POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E copy ${PYTHON_BRIDGE_SRC_ROOT}/src/openvino/inference_engine/${TARGET_NAME}/__init__.py ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}
-)
-\ No newline at end of file
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/__init__.py b/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/__init__.py
deleted file mode 100644
index 79744ab14..000000000
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .dnn_builder import *
-__all__ = ["NetworkBuilder", "LayerBuilder"]
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder.pxd
deleted file mode 100644
index 9a5621508..000000000
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder.pxd
+++ /dev/null
@@ -1,26 +0,0 @@
-from .cimport dnn_builder_impl_defs as C
-from libcpp.memory cimport shared_ptr
-
-cdef class NetworkBuilder:
-    cdef C.NetworkBuilder impl
-
-cdef class INetwork:
-    cdef C.INetwork impl
-
-cdef class ILayer:
-    cdef C.ILayer impl
-
-cdef class Port:
-    cdef C.Port impl
-
-cdef class PortInfo:
-    cdef C.PortInfo impl
-
-cdef class Connection:
-    cdef C.Connection impl
-
-cdef class LayerBuilder:
-    cdef C.LayerBuilder impl
-
-cdef class LayerConstantData(dict):
-    cdef shared_ptr[C.LayerBuilder] impl
-\ No newline at end of file
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder.pyx
deleted file mode 100644
index b0754cb5f..000000000
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder.pyx
+++ /dev/null
@@ -1,423 +0,0 @@
-# #distutils: language=c++
-#from cython.operator cimport dereference as deref
-from libcpp.vector cimport vector
-from libcpp.map cimport map
-from libcpp.string cimport string
-from ..ie_api cimport IENetwork, BlobBuffer
-from .cimport dnn_builder_impl_defs as C
-from .dnn_builder_impl_defs cimport Blob
-import numpy as np
-
-
-np_precision_map = {
-            "float32": "FP32",
-            "float16": "FP16",
-            "int32": "I32",
-            "int16": "I16",
-            "uint16": "U16",
-            "int8": "I8",
-            "uint8": "U8",
-        }
-cdef class NetworkBuilder:
-    def __cinit__(self, name=None, IENetwork ie_net=None):
-        if name is not None and ie_net is not None:
-            raise AttributeError("Both name and ie_net arguments are defined")
-        elif name is not None:
-            self.impl = C.NetworkBuilder(name.encode())
-        elif ie_net is not None:
-            self.impl = C.NetworkBuilder().from_ie_network(ie_net.impl)
-
-    def build(self):
-        cdef INetwork i_net = INetwork()
-        i_net.impl = self.impl.build()
-        return i_net
-
-    def get_layer(self, id: int):
-        cdef LayerBuilder py_layer = LayerBuilder()
-        py_layer.impl = self.impl.getLayer(id)
-        return py_layer
-
-    @property
-    def layers(self):
-        cdef vector[C.LayerBuilder] c_layers = self.impl.getLayers()
-        cdef LayerBuilder py_layer
-        py_layers = {}
-        for l in c_layers:
-            py_layer = LayerBuilder()
-            py_layer.impl = l
-            py_layers[l.getName().decode()] = py_layer
-        return py_layers
-
-    def remove_layer(self, LayerBuilder layer):
-        self.impl.removeLayer(layer.impl)
-
-    def get_layer_connection(self, LayerBuilder layer):
-        cdef vector[C.Connection] c_connections = self.impl.getLayerConnections(layer.impl)
-        cdef Connection connection
-        connections = []
-        for con in c_connections:
-            connection = Connection()
-            connection.impl = con
-            connections.append(connection)
-        return connections
-
-    def disconnect(self, Connection connection):
-        self.impl.disconnect(connection.impl)
-
-    def connect(self, PortInfo input, PortInfo output):
-        self.impl.connect(input.impl, output.impl)
-
-    def add_layer(self, LayerBuilder layer, input_ports: list = None):
-        cdef vector[C.PortInfo] c_ports
-        cdef PortInfo c_port
-        if not input_ports:
-            return self.impl.addLayer(layer.impl)
-        else:
-            for p in input_ports:
-                c_port = PortInfo(p.layer_id, p.port_id)
-                c_ports.push_back(c_port.impl)
-            return self.impl.addAndConnectLayer(c_ports, layer.impl)
-
-cdef class INetwork:
-    def __iter__(self):
-        cdef ILayer layer
-        layers = []
-        cdef vector[C.ILayer] c_layers = self.impl.layers
-        for l in c_layers:
-            layer = ILayer()
-            layer.impl = l
-            layers.append(layer)
-        return iter(layers)
-
-    @property
-    def layers(self):
-        cdef ILayer layer
-        layers = {}
-        cdef vector[C.ILayer] c_layers = self.impl.layers
-        for l in c_layers:
-            layer = ILayer()
-            layer.impl = l
-            layers[l.name.decode()] = layer
-        return layers
-
-    @property
-    def inputs(self):
-        cdef ILayer layer
-        layers = {}
-        cdef vector[C.ILayer] c_layers = self.impl.inputs
-        for l in c_layers:
-            layer = ILayer()
-            layer.impl = l
-            layers[l.name.decode()] = layer
-        return layers
-
-    @property
-    def outputs(self):
-        cdef ILayer layer
-        layers = {}
-        cdef vector[C.ILayer] c_layers = self.impl.outputs
-        for l in c_layers:
-            layer = ILayer()
-            layer.impl = l
-            layers[l.name.decode()] = layer
-        return layers
-
-    @property
-    def name(self):
-        return self.impl.name.decode()
-
-
-    @property
-    def size(self):
-        return self.impl.size
-
-    def get_layer_connection(self, layer: ILayer):
-        cdef Connection connection
-        connections = []
-        cdef vector[C.Connection] c_connections = self.impl.getLayerConnections(layer.id)
-        for con in c_connections:
-            connection = Connection()
-            connection.impl = con
-            connections.append(connection)
-        return connections
-
-    def to_ie_network(self):
-        cdef IENetwork net = IENetwork()
-        net.impl = self.impl.to_ie_network()
-        return net
-
-cdef class ILayer:
-    @property
-    def name(self):
-        return self.impl.name.decode()
-
-    @property
-    def id(self):
-        return self.impl.id
-
-    @property
-    def type(self):
-        return self.impl.type.decode()
-
-    @property
-    def params(self):
-        return {k.decode(): v.decode() for k, v in self.impl.parameters}
-
-    @property
-    def input_ports(self):
-        cdef Port port
-        cdef vector[C.Port] c_ports = self.impl.in_ports
-        ports = []
-        for p in c_ports:
-            port = Port()
-            port.impl = p
-            ports.append(port)
-        return ports
-
-    @property
-    def output_ports(self):
-        cdef Port port
-        cdef vector[C.Port] c_ports = self.impl.out_ports
-        ports = []
-        for p in c_ports:
-            port = Port()
-            port.impl = p
-            ports.append(port)
-        return ports
-
-    @property
-    def constant_data(self):
-        cdef map[string, Blob.Ptr] c_constant_data
-        c_constant_data = self.impl.constant_data
-        constant_data = {}
-        cdef BlobBuffer weights_buffer
-        for weights in c_constant_data:
-            weights_buffer = BlobBuffer()
-            weights_buffer.reset(weights.second)
-            constant_data[weights.first.decode()] = weights_buffer.to_numpy()
-        return constant_data
-
-
-cdef class Port:
-    def __cinit__(self, shape: list=[]):
-        cdef vector[size_t] c_shape
-        for d in shape:
-            c_shape.push_back(d)
-        self.impl = C.Port(c_shape)
-    @property
-    def shape(self):
-        return self.impl.shape
-
-cdef class PortInfo:
-    def __cinit__(self, layer_id: int = -1, port_id: int = -1):
-        if layer_id != -1 and port_id != -1:
-            self.impl = C.PortInfo(layer_id, port_id)
-        else:
-            self.impl = C.PortInfo()
-    @property
-    def layer_id(self):
-        return self.impl.layer_id
-
-    @property
-    def port_id(self):
-        return self.impl.port_id
-
-    def __eq__(self, other):
-        return self.layer_id == other.layer_id and self.port_id == other.port_id
-
-    def __ne__(self, other):
-        return self.layer_id != other.layer_id and self.port_id != other.port_id
-
-cdef class Connection:
-    def __cinit__(self, PortInfo input = None, PortInfo output = None):
-        if input and output:
-            self.impl = C.Connection(input.impl, output.impl)
-        else:
-            self.impl = C.Connection()
-    @property
-    def _from(self):
-        cdef PortInfo port_info = PortInfo()
-        port_info.impl = self.impl._from
-        return port_info
-
-    @property
-    def to(self):
-        cdef PortInfo port_info = PortInfo()
-        port_info.impl = self.impl.to
-        return port_info
-
-    def __eq__(self, other):
-        return self._from == other._from and self.to == other.to
-
-    def __ne__(self, other):
-        return self._from != other._from and self.to != other.to
-
-
-def check_constant_data(data):
-    for k, v in data.items():
-        if not all([isinstance(x, type(v[0])) for x in v]):
-            raise TypeError("Elements of list for key {} have different data types! "
-                            "Please specify list of 'int' or 'float' values.".format(k))
-        if isinstance(v, list):
-            if isinstance(v[0], float):
-                dtype = np.float32
-            elif isinstance(v[0], int):
-                dtype = np.int32
-            else:
-                raise TypeError("Unsupported precision of the data for key {}! Given {} but 'float  or 'int' precision expected".
-                              format(k, str(v.dtype)))
-            data[k] = np.asanyarray(v, dtype=dtype)
-        elif isinstance(v, np.ndarray):
-            pass
-        else:
-            raise TypeError("Unsupported data type for key '{}'. {} given but 'list' or 'numpy.ndarray' expected".
-                            format(k, type(v)))
-    return data
-
-
-# TODO: Fix LAyerBuilder object copying - pass by reference
-# cdef class LayerConstantData(dict):
-#     def update(self, other=None, **kwargs):
-#         if other:
-#             other = check_constant_data(other)
-#         cdef vector[size_t] dims
-#         cdef Blob.Ptr blob_ptr
-#         cdef BlobBuffer buffer
-#         for k, v in other.items():
-#             if k in self.keys() and (v.shape == self[k].shape and v.dtype == self[k].dtype):
-#                 print("Reuse blob for {}\n".format(k))
-#                 self[k][:] = v
-#             else:
-#                 for dim in v.shape:
-#                     dims.push_back(dim)
-#                 ie_precision = np_precision_map.get(str(v.dtype), None)
-#                 if not ie_precision:
-#                     raise BufferError("Unsupported precision of the data for key {}! Given {} but one of the {} precisions expected".
-#                                       format(k, str(v.dtype), ", ".join(np_precision_map.keys())))
-#                 blob_ptr = deref(self.impl).allocateBlob(dims, ie_precision.encode())
-#                 buffer = BlobBuffer()
-#                 buffer.reset(blob_ptr)
-#                 np_buffer = buffer.to_numpy()
-#                 np_buffer[:] = v
-#                 deref(self.impl).addConstantData(k.encode(), blob_ptr)
-
-cdef class LayerBuilder:
-
-    def __cinit__(self, type: str=None, name: str=None):
-        if name and type:
-            self.impl = C.LayerBuilder(name.encode(), type.encode())
-        else:
-            self.impl = C.LayerBuilder()
-
-    @property
-    def id(self):
-        return self.impl.id
-    @property
-    def name(self):
-        return self.impl.getName().decode()
-    @name.setter
-    def name(self, name: str):
-        self.impl.setName(name.encode())
-
-    @property
-    def type(self):
-        return self.impl.getType().decode()
-    @type.setter
-    def type(self, type: str):
-        self.impl.setType(type.encode())
-
-    @property
-    def input_ports(self):
-        cdef Port port
-        cdef vector[C.Port] c_ports = self.impl.getInputPorts()
-        py_ports = []
-        for p in c_ports:
-            port = Port()
-            port.impl = p
-            py_ports.append(port)
-        return py_ports
-
-    @input_ports.setter
-    def input_ports(self, ports: list):
-        cdef vector[C.Port] c_ports
-        cdef Port c_port
-        for p in ports:
-            c_port = Port(p.shape)
-            c_ports.push_back(c_port.impl)
-        self.impl.setInputPorts(c_ports)
-
-    @property
-    def output_ports(self):
-        cdef Port port
-        cdef vector[C.Port] c_ports = self.impl.getOutputPorts()
-        py_ports = []
-        for p in c_ports:
-            port = Port()
-            port.impl = p
-            py_ports.append(port)
-        return py_ports
-
-    @output_ports.setter
-    def output_ports(self, ports: list):
-        cdef vector[C.Port] c_ports
-        cdef Port c_port
-        for p in ports:
-            c_port = Port(p.shape)
-            c_ports.push_back(c_port.impl)
-        self.impl.setOutputPorts(c_ports)
-
-    @property
-    def params(self):
-        return {k.decode(): v.decode() for k, v in self.impl.getParameters()}
-
-    @params.setter
-    def params(self, params_map: dict):
-        cdef map[string, string] c_params_map
-        for k, v in params_map.items():
-            c_params_map[k.encode()] = str(v).encode()
-        self.impl.setParameters(c_params_map)
-
-    def build(self):
-        cdef ILayer layer = ILayer()
-        layer.impl = self.impl.build()
-        return layer
-
-    @property
-    def constant_data(self):
-        cdef map[string, Blob.Ptr] c_constant_data
-        c_constant_data = self.impl.getConstantData()
-        constant_data = {}
-        # TODO: Fix LAyerBuilder object copying - pass by reference
-        # constant_data = LayerConstantData()
-        # constant_data.impl = make_shared[C.LayerBuilder](self.impl)
-        cdef BlobBuffer weights_buffer
-        for weights in c_constant_data:
-            weights_buffer = BlobBuffer()
-            weights_buffer.reset(weights.second)
-            constant_data[weights.first.decode()] = weights_buffer.to_numpy()
-        return constant_data
-
-    @constant_data.setter
-    def constant_data(self, data: dict):
-        cdef vector[size_t] dims
-        cdef map[string, Blob.Ptr] c_constant_data
-        cdef Blob.Ptr blob_ptr
-        cdef BlobBuffer buffer
-        data = check_constant_data(data)
-        for k, v in data.items():
-            for dim in v.shape:
-                dims.push_back(dim)
-            ie_precision = np_precision_map.get(str(v.dtype), None)
-            if not ie_precision:
-                raise BufferError("Unsupported precision of the data for key {}! Given {} but one of the {} precisions expected".
-                                  format(k, str(v.dtype), ", ".join(np_precision_map.keys())))
-            blob_ptr = self.impl.allocateBlob(dims, ie_precision.encode())
-            buffer = BlobBuffer()
-            buffer.reset(blob_ptr)
-            np_buffer = buffer.to_numpy()
-            np_buffer[:] = v
-            c_constant_data[k.encode()] = blob_ptr
-
-        self.impl.setConstantData(c_constant_data)
-
-    # TODO: Implement get\setGraph when will be supported
-\ No newline at end of file
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder_impl.cpp
deleted file mode 100644
index fc9ab4edf..000000000
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder_impl.cpp
+++ /dev/null
@@ -1,330 +0,0 @@
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//        http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "dnn_builder_impl.hpp"
-
-// using namespace InferenceEnginePython;
-// using namespace std;
-
-std::map<std::string, InferenceEngine::Precision> precision_map = {{"FP32", InferenceEngine::Precision::FP32},
-                                                                   {"FP16", InferenceEngine::Precision::FP16},
-                                                                   {"Q78",  InferenceEngine::Precision::Q78},
-                                                                   {"I32",  InferenceEngine::Precision::I32},
-                                                                   {"I16",  InferenceEngine::Precision::I16},
-                                                                   {"I8",   InferenceEngine::Precision::I8},
-                                                                   {"U16",  InferenceEngine::Precision::U16},
-                                                                   {"U8",   InferenceEngine::Precision::U8}};
-
-InferenceEnginePython::ILayer buildILayer(InferenceEngine::ILayer::CPtr it) {
-    std::vector<InferenceEnginePython::Port> in_ports;
-    std::vector<InferenceEnginePython::Port> out_ports;
-    for (const auto &port : it->getInputPorts()) {
-        in_ports.push_back(InferenceEnginePython::Port(port.shape()));
-    }
-    for (const auto &port : it->getOutputPorts()) {
-        out_ports.push_back(InferenceEnginePython::Port(port.shape()));
-    }
-
-    std::map<std::string, std::string> params_map;
-    for (const auto &params : it->getParameters()->getParameters()) {
-        params_map.emplace(params.first, params.second);
-    }
-    std::map<std::string, InferenceEngine::Blob::Ptr> data_map;
-    for (const auto &data : it->getParameters()->getConstantData()) {
-        data_map.emplace(data.first, std::const_pointer_cast<InferenceEngine::Blob>(data.second));
-    }
-    return {it,
-            it->getName(),
-            it->getId(),
-            it->getType(),
-            params_map,
-            data_map,
-            in_ports,
-            out_ports,
-    };
-}
-
-// NetworkBuilder
-InferenceEnginePython::NetworkBuilder::NetworkBuilder(const std::string &name) {
-    // TODO(  ): std::move or instance in heap? Please check in other places.
-    InferenceEngine::Builder::Network network(name);
-    network_ptr = std::make_shared<InferenceEngine::Builder::Network>(network);
-}
-
-InferenceEnginePython::NetworkBuilder InferenceEnginePython::NetworkBuilder::from_ie_network(
-        const InferenceEnginePython::IENetwork &icnn_net) {
-    InferenceEngine::Builder::Network network((InferenceEngine::ICNNNetwork &) icnn_net.actual);
-    NetworkBuilder net_builder = NetworkBuilder();
-    net_builder.network_ptr = std::make_shared<InferenceEngine::Builder::Network>(network);
-    return net_builder;
-}
-
-InferenceEnginePython::INetwork InferenceEnginePython::NetworkBuilder::build() {
-    InferenceEngine::INetwork::Ptr i_net = network_ptr->build();
-    std::vector<ILayer> layers;
-    for (const auto &it : *i_net) {
-        layers.push_back(buildILayer(it));
-    }
-    std::vector<ILayer> inputs;
-    for (const auto &it : i_net->getInputs()) {
-        inputs.push_back(buildILayer(it));
-    }
-    std::vector<ILayer> outputs;
-    for (const auto &it : i_net->getInputs()) {
-        outputs.push_back(buildILayer(it));
-    }
-    return {i_net,             // INetwork ptr
-            i_net->getName(),  // name
-            i_net->size(),     // Number of layers
-            layers,
-            inputs,
-            outputs
-    };
-}
-
-std::vector<InferenceEnginePython::LayerBuilder> InferenceEnginePython::NetworkBuilder::getLayers() {
-    std::vector<LayerBuilder> layers;
-    for (const auto &it : network_ptr->getLayers()) {
-        LayerBuilder layer;
-        layer.actual = it;
-        layer.id = it.getId();
-        layers.push_back(layer);
-    }
-    return layers;
-}
-
-InferenceEnginePython::LayerBuilder InferenceEnginePython::NetworkBuilder::getLayer(size_t layer_id) {
-    LayerBuilder layer;
-    InferenceEngine::Builder::Layer ie_layer = network_ptr->getLayer(layer_id);
-    layer.actual = ie_layer;
-    layer.id = ie_layer.getId();
-    return layer;
-}
-
-void InferenceEnginePython::NetworkBuilder::removeLayer(const LayerBuilder &layer) {
-    network_ptr->removeLayer(layer.id);
-}
-
-const std::vector<InferenceEnginePython::Connection> InferenceEnginePython::NetworkBuilder::getLayerConnections(
-        const LayerBuilder &layer) {
-    std::vector<InferenceEngine::Connection> ie_connections = network_ptr->getLayerConnections(layer.id);
-    std::vector<Connection> connections;
-    for (auto const &it : ie_connections) {
-        PortInfo input(it.from().layerId(), it.from().portId());
-        PortInfo output(it.to().layerId(), it.to().portId());
-        connections.push_back(Connection(input, output));
-    }
-    return connections;
-}
-
-void InferenceEnginePython::NetworkBuilder::disconnect(const Connection &connection) {
-    network_ptr->disconnect(connection.actual);
-}
-
-void InferenceEnginePython::NetworkBuilder::connect(const PortInfo &input, const PortInfo &output) {
-    network_ptr->connect(input.actual, output.actual);
-}
-
-size_t InferenceEnginePython::NetworkBuilder::addLayer(const LayerBuilder &layer) {
-    return network_ptr->addLayer(layer.actual);
-}
-
-size_t InferenceEnginePython::NetworkBuilder::addAndConnectLayer(const std::vector<PortInfo> &input,
-                                                                 const LayerBuilder &layer) {
-    std::vector<InferenceEngine::PortInfo> ie_ports;
-    for (const auto &it : input) {
-        ie_ports.push_back(it.actual);
-    }
-    return network_ptr->addLayer(ie_ports, layer.actual);
-}
-// NetworkBuilder end
-// NetworkBuilder end
-
-// Port
-InferenceEnginePython::Port::Port(const std::vector<size_t> &shapes) {
-    actual = InferenceEngine::Port(shapes);
-    shape = actual.shape();
-}
-
-InferenceEnginePython::PortInfo::PortInfo(size_t layer_id, size_t port_id) : PortInfo() {
-    this->actual = InferenceEngine::PortInfo(layer_id, port_id);
-    this->layer_id = layer_id;
-    this->port_id = port_id;
-}
-// Port end
-
-// INetwork
-std::vector<InferenceEnginePython::Connection> InferenceEnginePython::INetwork::getLayerConnections(size_t layer_id) {
-    std::vector<Connection> connections;
-    for (const auto &it : actual->getLayerConnections(layer_id)) {
-        PortInfo input = PortInfo(it.from().layerId(), it.from().portId());
-        PortInfo output = PortInfo(it.to().layerId(), it.to().portId());
-        connections.push_back(Connection(input, output));
-    }
-    return connections;
-}
-
-InferenceEnginePython::IENetwork InferenceEnginePython::INetwork::to_ie_network() {
-    std::shared_ptr<InferenceEngine::ICNNNetwork> icnn_net = InferenceEngine::Builder::convertToICNNNetwork(actual);
-    InferenceEngine::CNNNetwork cnn_net(icnn_net);
-    IENetwork ie_net = IENetwork();
-    ie_net.actual = cnn_net;
-    ie_net.name = name;
-    ie_net.batch_size = cnn_net.getBatchSize();
-    return ie_net;
-}
-// INetwork end
-
-// Connection
-InferenceEnginePython::Connection::Connection(PortInfo input, PortInfo output) : Connection() {
-    this->actual = InferenceEngine::Connection(InferenceEngine::PortInfo(input.layer_id, input.port_id),
-                                               InferenceEngine::PortInfo(output.layer_id, output.port_id));
-    this->_from = PortInfo(actual.from().layerId(), actual.from().portId());
-    this->to = PortInfo(actual.to().layerId(), actual.to().portId());
-}
-// Connection end
-
-// LayerBuilder
-InferenceEnginePython::LayerBuilder::LayerBuilder(const std::string &type, const std::string &name) : LayerBuilder() {
-    InferenceEngine::Builder::Layer layer(type, name);
-    this->actual = layer;
-    this->id = layer.getId();
-}
-
-const std::string &InferenceEnginePython::LayerBuilder::getName() {
-    return actual.getName();
-}
-
-const std::string &InferenceEnginePython::LayerBuilder::getType() {
-    return actual.getType();
-}
-
-std::vector<InferenceEnginePython::Port> InferenceEnginePython::LayerBuilder::getInputPorts() {
-    std::vector<Port> ports;
-    for (const auto &it : actual.getInputPorts()) {
-        ports.push_back(Port(it.shape()));
-    }
-    return ports;
-}
-
-std::vector<InferenceEnginePython::Port> InferenceEnginePython::LayerBuilder::getOutputPorts() {
-    std::vector<Port> ports;
-    for (const auto &it : actual.getOutputPorts()) {
-        ports.push_back(Port(it.shape()));
-    }
-    return ports;
-}
-
-std::map<std::string, std::string> InferenceEnginePython::LayerBuilder::getParameters() {
-    std::map<std::string, std::string> params_map;
-    for (const auto &it : actual.getParameters()) {
-        params_map.emplace(it.first, it.second);
-    }
-    return params_map;
-}
-
-void InferenceEnginePython::LayerBuilder::setParameters(std::map<std::string, std::string> params_map) {
-    std::map<std::string, InferenceEngine::Parameter> ie_params_map;
-    for (const auto &it : params_map) {
-        InferenceEngine::Parameter ie_param((it.second));
-        ie_params_map.emplace(it.first, ie_param);
-    }
-    actual = actual.setParameters(ie_params_map);
-}
-
-void InferenceEnginePython::LayerBuilder::setName(const std::string &name) {
-    actual = actual.setName(name);
-}
-
-void InferenceEnginePython::LayerBuilder::setType(const std::string &type) {
-    actual = actual.setType(type);
-}
-
-void InferenceEnginePython::LayerBuilder::setInputPorts(const std::vector<Port> ports) {
-    std::vector<InferenceEngine::Port> ie_ports;
-    for (const auto &it : ports) {
-        ie_ports.push_back(it.actual);
-    }
-    actual = actual.setInputPorts(ie_ports);
-}
-
-void InferenceEnginePython::LayerBuilder::setOutputPorts(const std::vector<Port> ports) {
-    std::vector<InferenceEngine::Port> ie_ports;
-    for (const auto &it : ports) {
-        ie_ports.push_back(it.actual);
-    }
-    actual = actual.setOutputPorts(ie_ports);
-}
-
-InferenceEnginePython::ILayer InferenceEnginePython::LayerBuilder::build() {
-    return buildILayer(actual.build());
-}
-
-std::map<std::string, InferenceEngine::Blob::Ptr> InferenceEnginePython::LayerBuilder::getConstantData() {
-    std::map<std::string, InferenceEngine::Blob::Ptr> data_map;
-    for (const auto &it : actual.getConstantData()) {
-        data_map.emplace(it.first, std::const_pointer_cast<InferenceEngine::Blob>(it.second));
-    }
-    return data_map;
-}
-
-InferenceEngine::Blob::Ptr InferenceEnginePython::LayerBuilder::allocateBlob(std::vector<size_t> dims,
-                                                                             const std::string &precision) {
-    InferenceEngine::Layout ie_layout;
-    ie_layout = InferenceEngine::TensorDesc::getLayoutByDims(dims);
-    InferenceEngine::Precision ie_precision = precision_map.at(precision);
-    const InferenceEngine::TensorDesc &tdesc = InferenceEngine::TensorDesc(ie_precision, dims, ie_layout);
-    InferenceEngine::Blob::Ptr blob;
-    switch (ie_precision) {
-        case InferenceEngine::Precision::FP32:
-            blob = InferenceEngine::make_shared_blob<float>(tdesc);
-            break;
-        case InferenceEngine::Precision::FP16:
-            blob = InferenceEngine::make_shared_blob<int>(tdesc);
-            break;
-        case InferenceEngine::Precision::I16:
-            blob = InferenceEngine::make_shared_blob<int>(tdesc);
-            break;
-        case InferenceEngine::Precision::U16:
-            blob = InferenceEngine::make_shared_blob<int>(tdesc);
-            break;
-        case InferenceEngine::Precision::U8:
-            blob = InferenceEngine::make_shared_blob<unsigned char>(tdesc);
-            break;
-        case InferenceEngine::Precision::I8:
-            blob = InferenceEngine::make_shared_blob<signed char>(tdesc);
-            break;
-        case InferenceEngine::Precision::I32:
-            blob = InferenceEngine::make_shared_blob<signed int>(tdesc);
-            break;
-        default:
-            blob = InferenceEngine::make_shared_blob<float>(tdesc);
-            break;
-    }
-
-    blob->allocate();
-    return blob;
-}
-
-void InferenceEnginePython::LayerBuilder::setConstantData(const std::map<std::string,
-                                                          InferenceEngine::Blob::Ptr> &const_data) {
-    actual.setConstantData(const_data);
-}
-// TODO(  ): Fix LAyerBuilder object copying - pass by reference
-// void LayerBuilder::addConstantData(const std::string & name, InferenceEngine::Blob::Ptr data){
-//     InferenceEngine::Blob::CPtr c_data = const_pointer_cast<const InferenceEngine::Blob>(data);
-//     actual.addConstantData(name, c_data);
-// }
-
-// LayerBuilder end
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder_impl.hpp
deleted file mode 100644
index b58994abc..000000000
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder_impl.hpp
+++ /dev/null
@@ -1,161 +0,0 @@
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//        http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <ie_blob.h>
-
-#include <iterator>
-
-#include <string>
-#include <iostream>
-#include <algorithm>
-#include <vector>
-#include <map>
-
-#include <sstream>
-#include <ie_builders.hpp>
-#include <inference_engine.hpp>
-
-#include <ie_api_impl.hpp>
-
-
-// namespace IE Python
-namespace InferenceEnginePython {
-struct LayerBuilder;
-
-struct Port {
-    Port() = default;
-
-    explicit Port(const std::vector<size_t> &shapes);
-
-    InferenceEngine::Port actual;
-    std::vector<size_t> shape;
-};
-
-struct ILayer {
-    InferenceEngine::ILayer::CPtr layer_ptr;
-    std::string name;
-    size_t id;
-    std::string type;
-    std::map<std::string, std::string> parameters;
-    std::map<std::string, InferenceEngine::Blob::Ptr> constant_data;
-    std::vector<Port> in_ports;
-    std::vector<Port> out_ports;
-};
-
-struct PortInfo {
-    PortInfo(size_t layer_id, size_t port_id);
-
-    PortInfo() : actual(0, 0) {}
-
-    InferenceEngine::PortInfo actual;
-    size_t layer_id;
-    size_t port_id;
-};
-
-struct Connection {
-    Connection() : actual(InferenceEngine::PortInfo(0), InferenceEngine::PortInfo(0)) {}
-
-    Connection(PortInfo input, PortInfo output);
-
-    InferenceEngine::Connection actual;
-    PortInfo _from;
-    PortInfo to;
-};
-
-struct INetwork {
-    InferenceEngine::INetwork::Ptr actual;
-    std::string name;
-    size_t size;
-    std::vector<ILayer> layers;
-    std::vector<ILayer> inputs;
-    std::vector<ILayer> outputs;
-
-    std::vector<Connection> getLayerConnections(size_t layer_id);
-
-    IENetwork to_ie_network();
-};
-
-struct NetworkBuilder {
-    InferenceEngine::Builder::Network::Ptr network_ptr;
-
-    explicit NetworkBuilder(const std::string &name);
-
-    NetworkBuilder() = default;
-
-    NetworkBuilder from_ie_network(const InferenceEnginePython::IENetwork &icnn_net);
-
-    INetwork build();
-
-    std::vector<LayerBuilder> getLayers();
-
-    LayerBuilder getLayer(size_t layer_id);
-
-    void removeLayer(const LayerBuilder &layer);
-
-    size_t addLayer(const LayerBuilder &layer);
-
-    size_t addAndConnectLayer(const std::vector<PortInfo> &input, const LayerBuilder &layer);
-
-    const std::vector<Connection> getLayerConnections(const LayerBuilder &layer);
-
-    void disconnect(const Connection &connection);
-
-    void connect(const PortInfo &input, const PortInfo &output);
-};
-
-struct LayerBuilder {
-    InferenceEngine::Builder::Layer actual;
-    size_t id;
-
-    LayerBuilder(const std::string &type, const std::string &name);
-
-    LayerBuilder() : actual("", "") {}
-
-    LayerBuilder from_ilayer(const ILayer &ilayer);
-
-    const std::string &getName();
-
-    void setName(const std::string &name);
-
-    const std::string &getType();
-
-    void setType(const std::string &type);
-
-    std::vector<Port> getInputPorts();
-
-    void setInputPorts(const std::vector<Port> ports);
-
-    std::vector<Port> getOutputPorts();
-
-    void setOutputPorts(const std::vector<Port> ports);
-
-
-    std::map<std::string, std::string> getParameters();
-
-    void setParameters(std::map<std::string, std::string> params_map);
-
-    ILayer build();
-
-    std::map<std::string, InferenceEngine::Blob::Ptr> getConstantData();
-
-    InferenceEngine::Blob::Ptr allocateBlob(std::vector<size_t> dims, const std::string &precision);
-
-    void setConstantData(const std::map<std::string, InferenceEngine::Blob::Ptr> &const_data);
-
-// TODO(  ): Fix LAyerBuilder object copying - pass by reference
-//    void addConstantData(const std::string & name, InferenceEngine::Blob::Ptr data);
-};
-}  // namespace InferenceEnginePython
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder_impl_defs.pxd
deleted file mode 100644
index 29795f26a..000000000
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/dnn_builder/dnn_builder_impl_defs.pxd
+++ /dev/null
@@ -1,97 +0,0 @@
-from libcpp.string cimport string
-from libcpp.vector cimport vector
-from libc.stddef cimport size_t
-from libcpp.memory cimport shared_ptr
-from libcpp.map cimport map
-from ..ie_api_impl_defs cimport IENetwork
-
-cdef extern from "<inference_engine.hpp>" namespace "InferenceEngine":
-    ctypedef vector[size_t] SizeVector
-
-    cdef cppclass TensorDesc:
-        SizeVector& getDims()
-        const Precision& getPrecision() const
-
-    cdef cppclass Blob:
-        ctypedef shared_ptr[Blob] Ptr
-        const TensorDesc& getTensorDesc() const
-        size_t element_size()  const
-
-    cdef cppclass Precision:
-        const char*name() const
-
-cdef extern from "dnn_builder_impl.hpp" namespace "InferenceEnginePython":
-    cdef cppclass ILayer:
-        const string name
-        size_t id
-        string type
-        map[string, string] parameters
-        vector[Port] in_ports
-        vector[Port] out_ports
-        map[string, Blob.Ptr] constant_data;
-
-
-    cdef cppclass INetwork:
-        string name
-        size_t size
-        vector[ILayer] layers
-        vector[ILayer] inputs
-        vector[ILayer] outputs
-        vector[Port] in_ports;
-        vector[Port] out_ports;
-        vector[Connection] getLayerConnections(size_t layer_id);
-        IENetwork to_ie_network();
-
-    cdef cppclass NetworkBuilder:
-        NetworkBuilder() except +
-        NetworkBuilder(string name) except +
-        NetworkBuilder from_ie_network(IENetwork &icnn_net) except +
-        INetwork build() except +
-        vector[LayerBuilder] getLayers() except +
-        LayerBuilder getLayer(size_t layer_id) except +
-        void removeLayer(const LayerBuilder& layer) except +
-        const vector[Connection] getLayerConnections(const LayerBuilder& layer) except +
-        void disconnect(const Connection& connection) except +
-        void connect(const PortInfo& input, const PortInfo& output) except +
-        size_t addLayer(const LayerBuilder& layer) except +
-        size_t addAndConnectLayer(const vector[PortInfo]& input, const LayerBuilder& layer);
-
-    cdef cppclass Port:
-        Port() except +
-        Port(const vector[size_t] & shapes) except +
-        const vector[size_t] shape
-
-
-    cdef cppclass PortInfo:
-        PortInfo(size_t layer_id, size_t port_id) except +
-        PortInfo() except +
-        size_t layer_id
-        size_t port_id
-
-    cdef cppclass Connection:
-        Connection(PortInfo input, PortInfo output) except +
-        Connection() except +
-        PortInfo _from
-        PortInfo to
-
-    cdef cppclass LayerBuilder:
-        LayerBuilder()
-        LayerBuilder(const string& type, const string& name ) except +
-        size_t id
-        LayerBuilder from_ilayer(const ILayer& ilayer) except +
-        string getName() except +
-        string getType() except +
-        vector[Port] getInputPorts() except +
-        vector[Port] getOutputPorts() except +
-        map[string, string] getParameters() except +
-        void setParameters(map[string, string] params_map) except +
-        void setName(const string & name) except +
-        void setType(const string & type) except +
-        void setInputPorts(const vector[Port] ports) except +
-        void setOutputPorts(const vector[Port] ports) except +
-        ILayer build() except +
-        map[string, Blob.Ptr] getConstantData()
-        void setConstantData(map[string, Blob.Ptr] &const_data)
-        # TODO: Fix LAyerBuilder object copying - pass by reference
-        # void addConstantData(const string & name, Blob.Ptr data)
-        Blob.Ptr allocateBlob(vector[size_t] dims, const string & precision)
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
index 52bb27e1a..8ee5656fb 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pxd
@@ -33,6 +33,7 @@ cdef class IENetwork:
 
 cdef class ExecutableNetwork:
     cdef unique_ptr[C.IEExecNetwork] impl
+    cdef C.IEPlugin plugin_impl
     cdef public:
         _requests, inputs, outputs
 
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
index 518125ebe..834f72c5b 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api.pyx
@@ -32,7 +32,7 @@ cdef dict_to_c_map(py_dict):
     return c_map
 
 supported_precisions = ["FP32", "FP16", "Q78", "I32", "I16", "I8", "U32", "U16"]
-supported_layouts = ["NCHW", "NHWC", "OIHW", "C", "CHW", "HW", "NC", "CN", "BLOCKED"]
+supported_layouts = ["NCHW", "NHWC", "OIHW", "C", "CHW", "HW", "NC", "CN", "BLOCKED", "NCDHW"]
 known_plugins = ['CPU', 'GPU', 'FPGA', 'MYRIAD', 'HETERO', 'HDDL']
 
 def get_version():
@@ -218,6 +218,10 @@ cdef class InferRequest:
             outputs[output] = self._get_blob_buffer(output.encode()).to_numpy()
         return deepcopy(outputs)
 
+    @property
+    def latency(self):
+        return self.impl.exec_time
+
     def set_batch(self, size):
         if size <= 0:
             raise ValueError("Batch size should be positive integer number but {} specified".format(size))
@@ -225,6 +229,7 @@ cdef class InferRequest:
 
     def _fill_inputs(self, inputs):
         for k, v in inputs.items():
+            assert k in self._inputs_list, "No input with name {} found in network".format(k)
             self.inputs[k][:] = v
 
 
@@ -357,6 +362,7 @@ cdef class IENetwork:
         cdef vector[size_t] c_shape
         net_inputs = self.inputs
         for input, shape in input_shapes.items():
+            c_shape = []
             if input not in net_inputs:
                 raise AttributeError("Specified {} layer not in network inputs {}! ".format(input, net_inputs))
             for v in shape:
@@ -396,7 +402,7 @@ cdef class IEPlugin:
         if config:
             for k, v in config.items():
                 c_config[to_std_string(k)] = to_std_string(v)
-
+        exec_net.plugin_impl = self.impl
         exec_net.impl = move(self.impl.load(network.impl, num_requests, c_config))
         exec_net.inputs = network.inputs.keys()
         exec_net.outputs = list(network.outputs.keys())
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
index 296b1bfe4..1bb3e909e 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -35,6 +35,7 @@ std::map<std::string, InferenceEngine::Layout> layout_map = {{"ANY",     Inferen
                                                              {"HW",      InferenceEngine::Layout::HW},
                                                              {"NC",      InferenceEngine::Layout::NC},
                                                              {"CN",      InferenceEngine::Layout::CN},
+                                                             {"NCDHW",   InferenceEngine::Layout::NCDHW},
                                                              {"BLOCKED", InferenceEngine::Layout::BLOCKED}};
 #define stringify(name) # name
 #define IE_CHECK_CALL(expr) {                       \
@@ -301,7 +302,6 @@ InferenceEnginePython::IEPlugin::load(const InferenceEnginePython::IENetwork &ne
     InferenceEngine::ResponseDesc response;
     auto exec_network = InferenceEnginePython::make_unique<InferenceEnginePython::IEExecNetwork>(net.name,
                                                                                                  num_requests);
-
     IE_CHECK_CALL(actual->LoadNetwork(exec_network->actual, net.actual, config, &response))
 
     for (size_t i = 0; i < num_requests; ++i) {
@@ -322,9 +322,8 @@ InferenceEnginePython::IEExecNetwork::IEExecNetwork(const std::string &name, siz
 }
 
 void InferenceEnginePython::IEExecNetwork::infer() {
-    InferenceEngine::ResponseDesc response;
     InferRequestWrap &request = infer_requests[0];
-    request.request_ptr->Infer(&response);
+    request.infer();
 }
 
 
@@ -340,13 +339,33 @@ void InferenceEnginePython::InferRequestWrap::setBatch(int size) {
     IE_CHECK_CALL(request_ptr->SetBatch(size, &response));
 }
 
+void latency_callback(InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode code){
+    if (code != InferenceEngine::StatusCode::OK) {
+        THROW_IE_EXCEPTION << "Async Infer Request failed with status code " << code;
+    }
+    InferenceEnginePython::InferRequestWrap *requestWrap;
+    InferenceEngine::ResponseDesc dsc;
+    request->GetUserData(reinterpret_cast<void**>(&requestWrap), &dsc);
+    auto end_time = Time::now();
+    auto execTime = std::chrono::duration_cast<ns>(end_time - requestWrap->start_time);
+    requestWrap->exec_time = static_cast<double>(execTime.count()) * 0.000001;
+}
+
 void InferenceEnginePython::InferRequestWrap::infer() {
     InferenceEngine::ResponseDesc response;
+    start_time = Time::now();
     IE_CHECK_CALL(request_ptr->Infer(&response));
+    auto end_time = Time::now();
+    auto execTime = std::chrono::duration_cast<ns>(end_time - start_time);
+    exec_time = static_cast<double>(execTime.count()) * 0.000001;
 }
 
+
 void InferenceEnginePython::InferRequestWrap::infer_async() {
     InferenceEngine::ResponseDesc response;
+    start_time = Time::now();
+    IE_CHECK_CALL(request_ptr->SetUserData(this, &response));
+    request_ptr->SetCompletionCallback(latency_callback);
     IE_CHECK_CALL(request_ptr->StartAsync(&response));
 }
 
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
index 7bb2dd37a..9297de689 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -23,11 +23,16 @@
 #include <vector>
 #include <set>
 
+
 #include <iostream>
 #include <algorithm>
 
 #include <sstream>
-#include <inference_engine.hpp>
+#include <chrono>
+#include "inference_engine.hpp"
+
+typedef std::chrono::high_resolution_clock Time;
+typedef std::chrono::nanoseconds ns;
 
 namespace InferenceEnginePython {
 struct IENetLayer {
@@ -111,7 +116,8 @@ struct IENetwork {
 
 struct InferRequestWrap {
     InferenceEngine::IInferRequest::Ptr request_ptr;
-
+    Time::time_point start_time;
+    double exec_time;
     void infer();
 
     void infer_async();
diff --git a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd
index 78f2a62a0..f5729b684 100644
--- a/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd
+++ b/inference-engine/ie_bridges/python/src/openvino/inference_engine/ie_api_impl_defs.pxd
@@ -45,14 +45,14 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
         vector[size_t] dims
         string precision
         string layout
-        void setPrecision(string precision)
-        void setLayout(string layout)
+        void setPrecision(string precision) except +
+        void setLayout(string layout) except +
 
     cdef cppclass OutputInfo:
         vector[size_t] dims
         string precision
         string layout
-        void setPrecision(string precision)
+        void setPrecision(string precision) except +
 
     cdef cppclass ProfileInfo:
         string status
@@ -100,7 +100,8 @@ cdef extern from "ie_api_impl.hpp" namespace "InferenceEnginePython":
         string version
 
     cdef cppclass InferRequestWrap:
-        void getBlobPtr(const string &blob_name, Blob.Ptr &blob_ptr)
+        double exec_time;
+        void getBlobPtr(const string &blob_name, Blob.Ptr &blob_ptr) except +
         map[string, ProfileInfo] getPerformanceCounts() except +
         void infer() except +
         void infer_async() except +
diff --git a/inference-engine/include/builders/ie_argmax_layer.hpp b/inference-engine/include/builders/ie_argmax_layer.hpp
index 9ac1b5d52..f5a042a30 100644
--- a/inference-engine/include/builders/ie_argmax_layer.hpp
+++ b/inference-engine/include/builders/ie_argmax_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for ArgMax layer
  */
-class INFERENCE_ENGINE_API_CLASS(ArgMaxLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ArgMaxLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit ArgMaxLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ArgMaxLayer(Layer& genLayer);
+    explicit ArgMaxLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ArgMaxLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_batch_normalization_layer.hpp b/inference-engine/include/builders/ie_batch_normalization_layer.hpp
index dbdf538c8..14d0fe216 100644
--- a/inference-engine/include/builders/ie_batch_normalization_layer.hpp
+++ b/inference-engine/include/builders/ie_batch_normalization_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for BatchNormalization layer
  */
-class INFERENCE_ENGINE_API_CLASS(BatchNormalizationLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(BatchNormalizationLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit BatchNormalizationLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit BatchNormalizationLayer(Layer& genLayer);
+    explicit BatchNormalizationLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit BatchNormalizationLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
@@ -46,19 +51,6 @@ public:
     BatchNormalizationLayer& setPort(const Port &port);
 
     /**
-     * @brief Sets weights for layer
-     * @param weights Constant blob with weights
-     * @return reference to layer builder
-     */
-    BatchNormalizationLayer& setWeights(const Blob::CPtr& weights);
-    /**
-     * @brief Sets biases for layer
-     * @param biases Constant blob with biases
-     * @return reference to layer builder
-     */
-    BatchNormalizationLayer& setBiases(const Blob::CPtr& biases);
-
-    /**
      * @brief Returns epsilon
      * @return Epsilon
      */
@@ -69,12 +61,6 @@ public:
      * @return reference to layer builder
      */
     BatchNormalizationLayer& setEpsilon(float eps);
-
-    /**
-     * @brief Validates layer before creation
-     * @param layer generic layer builder
-     */
-    static void validate(const Layer& layer);
 };
 
 }  // namespace Builder
diff --git a/inference-engine/include/builders/ie_clamp_layer.hpp b/inference-engine/include/builders/ie_clamp_layer.hpp
index a57596230..642ff7ac5 100644
--- a/inference-engine/include/builders/ie_clamp_layer.hpp
+++ b/inference-engine/include/builders/ie_clamp_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Clamp layer
  */
-class INFERENCE_ENGINE_API_CLASS(ClampLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ClampLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit ClampLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ClampLayer(Layer& genLayer);
+    explicit ClampLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ClampLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_concat_layer.hpp b/inference-engine/include/builders/ie_concat_layer.hpp
index 96cd23b97..b138d3a82 100644
--- a/inference-engine/include/builders/ie_concat_layer.hpp
+++ b/inference-engine/include/builders/ie_concat_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Concat layer
  */
-class INFERENCE_ENGINE_API_CLASS(ConcatLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ConcatLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit ConcatLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ConcatLayer(Layer& genLayer);
+    explicit ConcatLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ConcatLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_const_layer.hpp b/inference-engine/include/builders/ie_const_layer.hpp
index db0b31a20..54e7069e7 100644
--- a/inference-engine/include/builders/ie_const_layer.hpp
+++ b/inference-engine/include/builders/ie_const_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Const layer
  */
-class INFERENCE_ENGINE_API_CLASS(ConstLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ConstLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit ConstLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ConstLayer(Layer& genLayer);
+    explicit ConstLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ConstLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
@@ -51,6 +56,12 @@ public:
      * @return reference to layer builder
      */
     ConstLayer& setData(const Blob::CPtr& data);
+
+    /**
+     * @brief Returns constant data
+     * @return constant blob with data
+     */
+    const Blob::CPtr& getData() const;
 };
 
 }  // namespace Builder
diff --git a/inference-engine/include/builders/ie_convolution_layer.hpp b/inference-engine/include/builders/ie_convolution_layer.hpp
index a577d5e87..68caf9909 100644
--- a/inference-engine/include/builders/ie_convolution_layer.hpp
+++ b/inference-engine/include/builders/ie_convolution_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <vector>
 #include <string>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for ArgMax layer
  */
-class INFERENCE_ENGINE_API_CLASS(ConvolutionLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ConvolutionLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,14 +24,14 @@ public:
     explicit ConvolutionLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ConvolutionLayer(Layer& genLayer);
+    explicit ConvolutionLayer(const Layer::Ptr& layer);
     /**
-     * @brief Operator creates generic layer builder
-     * @return Generic layer builder
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
      */
-    operator Layer() const override;
+    explicit ConvolutionLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
@@ -40,19 +40,6 @@ public:
     ConvolutionLayer& setName(const std::string& name);
 
     /**
-     * @brief Sets weights for layer
-     * @param weights Constant blob with weights
-     * @return reference to layer builder
-     */
-    ConvolutionLayer& setWeights(const Blob::CPtr& weights);
-    /**
-     * @brief Sets biases for layer
-     * @param biases Constant blob with biases
-     * @return reference to layer builder
-     */
-    ConvolutionLayer& setBiases(const Blob::CPtr& biases);
-
-    /**
      * @brief Returns input port
      * @return Input port
      */
@@ -151,12 +138,6 @@ public:
      * @return reference to layer builder
      */
     ConvolutionLayer& setOutDepth(size_t outDepth);
-
-    /**
-     * @brief Validates layer before creation
-     * @param layer generic layer builder
-     */
-    static void validate(const Layer& layer);
 };
 
 }  // namespace Builder
diff --git a/inference-engine/include/builders/ie_crop_layer.hpp b/inference-engine/include/builders/ie_crop_layer.hpp
index 7bfbe94d4..275c1d2dd 100644
--- a/inference-engine/include/builders/ie_crop_layer.hpp
+++ b/inference-engine/include/builders/ie_crop_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Crop layer
  */
-class INFERENCE_ENGINE_API_CLASS(CropLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(CropLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit CropLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit CropLayer(Layer& genLayer);
+    explicit CropLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit CropLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
@@ -78,12 +83,6 @@ public:
      * @return reference to layer builder
      */
     CropLayer& setOffset(const std::vector<size_t>& offsets);
-
-    /**
-     * @brief Validates layer before creation
-     * @param layer generic layer builder
-     */
-    static void validate(const Layer& layer);
 };
 
 }  // namespace Builder
diff --git a/inference-engine/include/builders/ie_ctc_greedy_decoder_layer.hpp b/inference-engine/include/builders/ie_ctc_greedy_decoder_layer.hpp
index 78cdbd357..388bd0534 100644
--- a/inference-engine/include/builders/ie_ctc_greedy_decoder_layer.hpp
+++ b/inference-engine/include/builders/ie_ctc_greedy_decoder_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for CTCGreedyDecoder layer
  */
-class INFERENCE_ENGINE_API_CLASS(CTCGreedyDecoderLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(CTCGreedyDecoderLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit CTCGreedyDecoderLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit CTCGreedyDecoderLayer(Layer& genLayer);
+    explicit CTCGreedyDecoderLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit CTCGreedyDecoderLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_deconvolution_layer.hpp b/inference-engine/include/builders/ie_deconvolution_layer.hpp
index c8d39250b..a1cdfde18 100644
--- a/inference-engine/include/builders/ie_deconvolution_layer.hpp
+++ b/inference-engine/include/builders/ie_deconvolution_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
 #include <builders/ie_convolution_layer.hpp>
-#include <ie_inetwork.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -23,9 +23,14 @@ public:
     explicit DeconvolutionLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit DeconvolutionLayer(Layer& genLayer);
+    explicit DeconvolutionLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit DeconvolutionLayer(const Layer::CPtr& layer);
 };
 
 }  // namespace Builder
diff --git a/inference-engine/include/builders/ie_detection_output_layer.hpp b/inference-engine/include/builders/ie_detection_output_layer.hpp
index e4ee54223..c15c4f026 100644
--- a/inference-engine/include/builders/ie_detection_output_layer.hpp
+++ b/inference-engine/include/builders/ie_detection_output_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for ArgMax layer
  */
-class INFERENCE_ENGINE_API_CLASS(DetectionOutputLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(DetectionOutputLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit DetectionOutputLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit DetectionOutputLayer(Layer& genLayer);
+    explicit DetectionOutputLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit DetectionOutputLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_eltwise_layer.hpp b/inference-engine/include/builders/ie_eltwise_layer.hpp
index ffdacba15..370cd6881 100644
--- a/inference-engine/include/builders/ie_eltwise_layer.hpp
+++ b/inference-engine/include/builders/ie_eltwise_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Eltwise layer
  */
-class INFERENCE_ENGINE_API_CLASS(EltwiseLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(EltwiseLayer): public LayerDecorator {
 public:
     /**
      * @brief The enum defines all Eltwise types
@@ -23,7 +23,11 @@ public:
     enum EltwiseType {
         SUM = 1,
         MAX,
-        MUL
+        MUL,
+        SUB,
+        DIV,
+        MIN,
+        SQUARED_DIFF
     };
 
     /**
@@ -33,9 +37,14 @@ public:
     explicit EltwiseLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit EltwiseLayer(Layer& genLayer);
+    explicit EltwiseLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit EltwiseLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_elu_layer.hpp b/inference-engine/include/builders/ie_elu_layer.hpp
index ad5b3b48b..eb62a9e40 100644
--- a/inference-engine/include/builders/ie_elu_layer.hpp
+++ b/inference-engine/include/builders/ie_elu_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for ELU layer
  */
-class INFERENCE_ENGINE_API_CLASS(ELULayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ELULayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit ELULayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ELULayer(Layer& genLayer);
+    explicit ELULayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ELULayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_fully_connected_layer.hpp b/inference-engine/include/builders/ie_fully_connected_layer.hpp
index 9b03f7ddb..f0a448ae0 100644
--- a/inference-engine/include/builders/ie_fully_connected_layer.hpp
+++ b/inference-engine/include/builders/ie_fully_connected_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for FullyConnected layer
  */
-class INFERENCE_ENGINE_API_CLASS(FullyConnectedLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(FullyConnectedLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit FullyConnectedLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit FullyConnectedLayer(Layer& genLayer);
+    explicit FullyConnectedLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit FullyConnectedLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
@@ -34,19 +39,6 @@ public:
     FullyConnectedLayer& setName(const std::string& name);
 
     /**
-     * @brief Sets weights for layer
-     * @param weights Constant blob with weights
-     * @return reference to layer builder
-     */
-    FullyConnectedLayer& setWeights(const Blob::CPtr& weights);
-    /**
-     * @brief Sets biases for layer
-     * @param biases Constant blob with biases
-     * @return reference to layer builder
-     */
-    FullyConnectedLayer& setBiases(const Blob::CPtr& biases);
-
-    /**
      * @brief Returns input port
      * @return Input port
      */
diff --git a/inference-engine/include/builders/ie_grn_layer.hpp b/inference-engine/include/builders/ie_grn_layer.hpp
index f06f9034b..e544ab6e6 100644
--- a/inference-engine/include/builders/ie_grn_layer.hpp
+++ b/inference-engine/include/builders/ie_grn_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for ArgMax layer
  */
-class INFERENCE_ENGINE_API_CLASS(GRNLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(GRNLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit GRNLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit GRNLayer(Layer& genLayer);
+    explicit GRNLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit GRNLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_gru_sequence_layer.hpp b/inference-engine/include/builders/ie_gru_sequence_layer.hpp
new file mode 100644
index 000000000..5cb620a93
--- /dev/null
+++ b/inference-engine/include/builders/ie_gru_sequence_layer.hpp
@@ -0,0 +1,87 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
+#include <vector>
+#include <string>
+
+namespace InferenceEngine {
+namespace Builder {
+
+/**
+ * @brief The class represents a builder for GRUSequence layer
+ */
+class INFERENCE_ENGINE_API_CLASS(GRUSequenceLayer): public LayerDecorator {
+public:
+    /**
+     * @brief The constructor creates a builder with the name
+     * @param name Layer name
+     */
+    explicit GRUSequenceLayer(const std::string& name = "");
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer pointer to generic builder
+     */
+    explicit GRUSequenceLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit GRUSequenceLayer(const Layer::CPtr& layer);
+    /**
+     * @brief Sets the name for the layer
+     * @param name Layer name
+     * @return reference to layer builder
+     */
+    GRUSequenceLayer& setName(const std::string& name);
+
+    /**
+     * @brief Returns input ports with shapes for the layer
+     * @return Vector of ports
+     */
+    const std::vector<Port>& getInputPorts() const;
+    /**
+     * @brief Sets input ports for the layer
+     * @param ports vector of input ports
+     * @return reference to layer builder
+     */
+    GRUSequenceLayer& setInputPorts(const std::vector<Port>& ports);
+
+    /**
+     * @brief Returns output ports with shapes for the layer
+     * @return Vector of ports
+     */
+    const std::vector<Port>& getOutputPorts() const;
+    /**
+     * @brief Sets output ports for the layer
+     * @param ports vector of output ports
+     * @return reference to layer builder
+     */
+    GRUSequenceLayer& setOutputPorts(const std::vector<Port>& ports);
+
+    int getHiddenSize() const;
+    GRUSequenceLayer& setHiddenSize(int size);
+    bool getSequenceDim() const;
+    GRUSequenceLayer& setSqquenceDim(bool flag);
+    const std::vector<std::string>& getActivations() const;
+    GRUSequenceLayer& setActivations(const std::vector<std::string>& activations);
+    const std::vector<float>& getActivationsAlpha() const;
+    GRUSequenceLayer& setActivationsAlpha(const std::vector<float>& activations);
+    const std::vector<float>& getActivationsBeta() const;
+    GRUSequenceLayer& setActivationsBeta(const std::vector<float>& activations);
+    float getClip() const;
+    GRUSequenceLayer& setClip(float clip);
+    bool getLinearBeforeReset() const;
+    GRUSequenceLayer& setLinearBeforeReset(bool flag);
+    const std::string& getDirection() const;
+    GRUSequenceLayer& setDirection(const std::string& direction);
+};
+
+}  // namespace Builder
+}  // namespace InferenceEngine
+
+
diff --git a/inference-engine/include/builders/ie_input_layer.hpp b/inference-engine/include/builders/ie_input_layer.hpp
index 5312fcd59..f9a436f37 100644
--- a/inference-engine/include/builders/ie_input_layer.hpp
+++ b/inference-engine/include/builders/ie_input_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Input layer
  */
-class INFERENCE_ENGINE_API_CLASS(InputLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(InputLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit InputLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit InputLayer(Layer& genLayer);
+    explicit InputLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit InputLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
@@ -44,12 +49,6 @@ public:
      * @return reference to layer builder
      */
     InputLayer& setPort(const Port &port);
-
-    /**
-     * @brief Validates layer before creation
-     * @param layer generic layer builder
-     */
-    static void validate(const Layer& layer);
 };
 
 }  // namespace Builder
diff --git a/inference-engine/include/builders/ie_layer_builder.hpp b/inference-engine/include/builders/ie_layer_builder.hpp
index 47620fa6c..9e4038dc3 100644
--- a/inference-engine/include/builders/ie_layer_builder.hpp
+++ b/inference-engine/include/builders/ie_layer_builder.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -6,7 +6,7 @@
 
 #include <details/caseless.hpp>
 #include <ie_parameter.hpp>
-#include <ie_inetwork.hpp>
+#include <ie_network.hpp>
 #include <ie_blob.h>
 #include <string>
 #include <vector>
@@ -25,26 +25,31 @@ struct ValidatorsHolder {
     /**
      * @brief Caseless map connects type with validator
      */
-    details::caseless_map<std::string, std::function<void(const Layer&)>> validators;
+    details::caseless_map<std::string, std::function<void(const std::shared_ptr<const Layer>&, bool)>> validators;
 };
 
 /**
  * @brief This class implements a builder for IE Layer
  */
-class INFERENCE_ENGINE_API_CLASS(Layer) {
+class INFERENCE_ENGINE_API_CLASS(Layer): public ILayer,
+        public std::enable_shared_from_this<Layer> {
 public:
     /**
+     * @brief A shared pointer to the Layer builder
+     */
+    using Ptr = std::shared_ptr<Layer>;
+    /**
+     * @brief A shared pointer to the constant Layer builder
+     */
+    using CPtr = std::shared_ptr<const Layer>;
+
+    /**
      * @brief The constructor creates a Layer builder with layer type and layer name
      * @param type Layer type
      * @param name Layer name
      */
     explicit Layer(const std::string& type, const std::string& name = "");
     /**
-     * @brief The constructor creates a Layer builder from shared pointer to ILayer
-     * @param layer shared pointer to ILayer
-     */
-    explicit Layer(const ILayer::Ptr& layer);
-    /**
      * @brief The constructor creates a Layer builder from shared pointer to constant ILayer
      * @param layer shared pointer to constant ILayer
      */
@@ -57,38 +62,25 @@ public:
     Layer(idx_t id, const Layer& layer);
 
     /**
-     * @brief Returns layer builder ID
-     * @return ID
+     * @brief Compares the given Layer builder with the current one
+     * @param rhs Layer builder to compare with
+     * @return true if the given Layer builder is equal to the current one, false - otherwise
      */
-    idx_t getId() const;
+    bool operator==(const Layer& rhs) const {
+        return params == rhs.params;
+    }
 
     /**
-     * @brief Returns a reference to layer type
-     * @return Layer type
-     */
-    std::string& getType();
-    /**
-     * @brief Returns a reference to constant layer type
-     * @return constant layer type
+     * @brief Returns layer ID
+     * @return Layer ID
      */
-    const std::string& getType() const;
-    /**
-     * @brief Sets layer type
-     * @param type Layer type
-     * @return Reference to Layer builder
-     */
-    Layer& setType(const std::string& type);
+    idx_t getId() const noexcept override;
 
     /**
-     * @brief Returns a reference to layer name
+     * @brief Returns a constant reference to layer name
      * @return Layer name
      */
-    std::string& getName();
-    /**
-     * @brief Returns a reference to constant layer name
-     * @return constant layer name
-     */
-    const std::string& getName() const;
+    const std::string& getName() const noexcept override;
     /**
      * @brief Sets layer name
      * @param name Layer name
@@ -97,32 +89,27 @@ public:
     Layer& setName(const std::string& name);
 
     /**
-     * @brief Returns layer subgraph
-     * @return shared pointer to INetwork
-     */
-    INetwork::Ptr& getGraph();
-    /**
-     * @brief Returns constant layer subgraph
-     * @return constant shared pointer to INetwork
+     * @brief Returns a constant reference to layer type
+     * @return Layer type
      */
-    const INetwork::Ptr& getGraph() const;
+    const std::string& getType() const noexcept override;
     /**
-     * @brief Sets layer subgraph
-     * @param graph constant shared pointer to INetwork
+     * @brief Sets layer type
+     * @param type Layer type
      * @return Reference to Layer builder
      */
-    Layer& setGraph(const INetwork::Ptr& graph);
+    Layer& setType(const std::string& type);
 
     /**
      * @brief Returns map of parameters
      * @return map of parameters
      */
-    std::map<std::string, Parameter>& getParameters();
+    const std::map<std::string, Parameter>& getParameters() const noexcept override;
     /**
-     * @brief Returns constant map of parameters
-     * @return constant map of parameters
+     * @brief Returns map of parameters
+     * @return map of parameters
      */
-    const std::map<std::string, Parameter>& getParameters() const;
+    std::map<std::string, Parameter>& getParameters();
     /**
      * @brief Sets parameters for layer
      * @param params constant map of parameters
@@ -131,46 +118,16 @@ public:
     Layer& setParameters(const std::map<std::string, Parameter>& params);
 
     /**
-     * @brief Returns map of internal blobs
-     * @return map of internal blobs
-     */
-    std::map<std::string, Blob::CPtr>& getConstantData();
-    /**
-     * @brief Returns constant map of internal blobs
-     * @return constant map of internal blobs
-     */
-    const std::map<std::string, Blob::CPtr>& getConstantData() const;
-    /**
-     * @brief Sets constant data for layer
-     * @param constData constant map of shared pointers to blobs
-     * @return Reference to Layer builder
-     */
-    Layer& setConstantData(const std::map<std::string, Blob::Ptr>& constData);
-    /**
-     * @brief Sets constant data for layer
-     * @param constData constant map of shared pointers to constant blobs
-     * @return Reference to Layer builder
-     */
-    Layer& setConstantData(const std::map<std::string, Blob::CPtr>& constData);
-    /**
-     * @brief Adds constant data for layer by name
-     * @param name Name of constant data
-     * @param data shared pointer to constant blob
-     * @return Reference to Layer builder
+     * @brief Returns vector of input ports
+     * @return Vector of input ports
      */
-    Layer& addConstantData(const std::string& name, const Blob::CPtr& data);
-
+    const std::vector<Port>& getInputPorts() const noexcept override;
     /**
      * @brief Returns vector of input ports
      * @return Vector of input ports
      */
     std::vector<Port>& getInputPorts();
     /**
-     * @brief Returns constant vector of input ports
-     * @return constant vector of input ports
-     */
-    const std::vector<Port>& getInputPorts() const;
-    /**
      * @brief Sets input ports
      * @param ports vector of ports
      * @return Reference to Layer builder
@@ -181,12 +138,12 @@ public:
      * @brief Returns vector of output ports
      * @return Vector of output ports
      */
-    std::vector<Port>& getOutputPorts();
+    const std::vector<Port>& getOutputPorts() const noexcept override;
     /**
-     * @brief Returns constant vector of output ports
-     * @return constant vector of output ports
+     * @brief Returns vector of output ports
+     * @return Vector of output ports
      */
-    const std::vector<Port>& getOutputPorts() const;
+    std::vector<Port>& getOutputPorts();
     /**
      * @brief Sets output ports
      * @param ports vector of ports
@@ -198,30 +155,27 @@ public:
      * @brief Validates the current builder and generates ILayer object
      * @return constant shared pointer to ILayer
      */
-    const ILayer::Ptr build() const;
+    const ILayer::CPtr build() const;
 
     /**
      * @brief Validates layer builder
      */
-    void validate() const;
+    void validate(bool partial = false) const;
 
     /**
      * @brief Registers a new validator for type
      * @param type Layer type
      * @param validator Layer validator
      */
-    static void addValidator(const std::string& type, const std::function<void(const Layer&)>& validator);
+    static void addValidator(const std::string& type, const std::function<void(const Layer::CPtr&, bool)>& validator);
 
 private:
     idx_t id;
     std::string type;
     std::string name;
-    INetwork::Ptr graph;
     std::vector<Port> inPorts;
     std::vector<Port> outPorts;
     std::map<std::string, Parameter> params;
-    std::map<std::string, Blob::CPtr> constData;
-
     static std::shared_ptr<ValidatorsHolder> getValidatorsHolder();
 };
 
@@ -235,7 +189,7 @@ public:
      * @param type Layer type
      * @param validator Layer validator
      */
-    explicit ValidatorRegisterBase(const std::string& type, const std::function<void(const Layer&)>& validator) {
+    explicit ValidatorRegisterBase(const std::string& type, const std::function<void(const Layer::CPtr&, bool)>& validator) {
         InferenceEngine::Builder::Layer::addValidator(type, validator);
     }
 };
diff --git a/inference-engine/include/builders/ie_layer_fragment.hpp b/inference-engine/include/builders/ie_layer_decorator.hpp
index a9723b3c1..c3b9c3488 100644
--- a/inference-engine/include/builders/ie_layer_fragment.hpp
+++ b/inference-engine/include/builders/ie_layer_decorator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -14,36 +14,41 @@ namespace Builder {
 /**
  * @brief This class defines the basic functional for layer builders
  */
-class INFERENCE_ENGINE_API_CLASS(LayerFragment) {
+class INFERENCE_ENGINE_API_CLASS(LayerDecorator) {
 public:
     /**
      * @brief The constructor creates layer builders with layer type and layer name
      * @param type Layer type
      * @param name Layer name
      */
-    LayerFragment(const std::string& type, const std::string& name);
+    LayerDecorator(const std::string& type, const std::string& name);
     /**
      * @brief The constructor creates layer builders from reference to generic layer builder
-     * @param genLayer Generic layer builder
+     * @param layer pointer to generic layer builder
      */
-    explicit LayerFragment(Layer& genLayer);
+    explicit LayerDecorator(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates layer builders from reference to generic layer builder
+     * @param layer constant pointer to generic layer builder
+     */
+    explicit LayerDecorator(const Layer::CPtr& layer);
     /**
      * @brief The copy constructor
      * @param rval Source builder
      */
-    explicit LayerFragment(const LayerFragment& rval);
+    LayerDecorator(const LayerDecorator& rval);
 
     /**
-     * @brief Copy operator for LayerFragment
+     * @brief Copy operator for LayerDecorator
      * @param rval
      * @return Layer builder
      */
-    LayerFragment& operator=(const LayerFragment& rval);
+    LayerDecorator& operator=(const LayerDecorator& rval);
 
     /**
      * @brief Virtual destructor
      */
-    virtual ~LayerFragment() = default;
+    virtual ~LayerDecorator() = default;
 
     /**
      * @brief The operator creates generic builder
@@ -52,6 +57,18 @@ public:
     virtual operator Layer() const;
 
     /**
+     * @brief The operator creates generic builder
+     * @return Pointer to generic builder
+     */
+    virtual operator Layer::Ptr();
+
+    /**
+     * @brief The operator creates generic builder
+     * @return Constant pointer to generic builder
+     */
+    virtual operator Layer::CPtr() const;
+
+    /**
      * @brief Returns layer type
      * @return Layer type
      */
@@ -63,12 +80,14 @@ public:
     const std::string& getName() const;
 
 protected:
-    const std::vector<size_t> uInts2size_t(const std::vector<unsigned int>& vector) const;
-    Layer& getLayer() const;
+    Layer::Ptr& getLayer();
+    const Layer::CPtr getLayer() const;
+    void checkType(const std::string& type) const;
+
+    Layer::CPtr cLayer;
 
 private:
-    Layer layer;
-    Layer& refLayer;
+    Layer::Ptr layer;
 };
 
 }  // namespace Builder
diff --git a/inference-engine/include/builders/ie_lrn_layer.hpp b/inference-engine/include/builders/ie_lrn_layer.hpp
new file mode 100644
index 000000000..625de1279
--- /dev/null
+++ b/inference-engine/include/builders/ie_lrn_layer.hpp
@@ -0,0 +1,99 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
+#include <string>
+
+namespace InferenceEngine {
+namespace Builder {
+
+/**
+ * @brief The class represents a builder for LRN layer
+ */
+class INFERENCE_ENGINE_API_CLASS(LRNLayer): public LayerDecorator {
+public:
+    /**
+     * @brief The constructor creates a builder with the name
+     * @param name Layer name
+     */
+    explicit LRNLayer(const std::string& name = "");
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer pointer to generic builder
+     */
+    explicit LRNLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit LRNLayer(const Layer::CPtr& layer);
+    /**
+     * @brief Sets the name for the layer
+     * @param name Layer name
+     * @return reference to layer builder
+     */
+    LRNLayer& setName(const std::string& name);
+
+    /**
+     * @brief Returns port with shapes for the layer
+     * @return Port with shapes
+     */
+    const Port& getPort() const;
+    /**
+     * @brief Sets port shapes for the layer
+     * @param port Port with shapes
+     * @return reference to layer builder
+     */
+    LRNLayer& setPort(const Port& port);
+    /**
+     * @brief Returns side length of the region
+     * @return Size
+     */
+    size_t getSize() const;
+    /**
+     * @brief Sets side length of the region
+     * @param size Size
+     * @return reference to layer builder
+     */
+    LRNLayer& setSize(size_t size);
+    /**
+     * @brief Returns scaling parameter for the normalizing sum
+     * @return Scaling parameter
+     */
+    float getAlpha() const;
+    /**
+     * @brief Sets scaling parameter for the normalizing sum
+     * @param alpha Scaling parameter
+     * @return reference to layer builder
+     */
+    LRNLayer& setAlpha(float alpha);
+    /**
+     * @brief Returns exponent for the normalizing sum
+     * @return Exponent
+     */
+    float getBeta() const;
+    /**
+     * @brief Sets exponent for the normalizing sum
+     * @param beta Exponent
+     * @return reference to layer builder
+     */
+    LRNLayer& setBeta(float beta);
+    /**
+     * @brief Returns region type
+     * @return true if normalizing sum is performed over adjacent channels
+     */
+    float getBias() const;
+    /**
+     * @brief Sets bias for the normalizing sum
+     * @param bias Bias
+     * @return reference to layer builder
+     */
+    LRNLayer& setBias(float bias);
+};
+
+}  // namespace Builder
+}  // namespace InferenceEngine
diff --git a/inference-engine/include/builders/ie_lstm_sequence_layer.hpp b/inference-engine/include/builders/ie_lstm_sequence_layer.hpp
new file mode 100644
index 000000000..1d01f58cc
--- /dev/null
+++ b/inference-engine/include/builders/ie_lstm_sequence_layer.hpp
@@ -0,0 +1,87 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
+#include <vector>
+#include <string>
+
+namespace InferenceEngine {
+namespace Builder {
+
+/**
+ * @brief The class represents a builder for LSTMSequence layer
+ */
+class INFERENCE_ENGINE_API_CLASS(LSTMSequenceLayer): public LayerDecorator {
+public:
+    /**
+     * @brief The constructor creates a builder with the name
+     * @param name Layer name
+     */
+    explicit LSTMSequenceLayer(const std::string& name = "");
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer pointer to generic builder
+     */
+    explicit LSTMSequenceLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit LSTMSequenceLayer(const Layer::CPtr& layer);
+    /**
+     * @brief Sets the name for the layer
+     * @param name Layer name
+     * @return reference to layer builder
+     */
+    LSTMSequenceLayer& setName(const std::string& name);
+
+    /**
+     * @brief Returns input ports with shapes for the layer
+     * @return Vector of ports
+     */
+    const std::vector<Port>& getInputPorts() const;
+    /**
+     * @brief Sets input ports for the layer
+     * @param ports vector of input ports
+     * @return reference to layer builder
+     */
+    LSTMSequenceLayer& setInputPorts(const std::vector<Port>& ports);
+
+    /**
+     * @brief Returns output ports with shapes for the layer
+     * @return Vector of ports
+     */
+    const std::vector<Port>& getOutputPorts() const;
+    /**
+     * @brief Sets output ports for the layer
+     * @param ports vector of output ports
+     * @return reference to layer builder
+     */
+    LSTMSequenceLayer& setOutputPorts(const std::vector<Port>& ports);
+
+    int getHiddenSize() const;
+    LSTMSequenceLayer& setHiddenSize(int size);
+    bool getSequenceDim() const;
+    LSTMSequenceLayer& setSqquenceDim(bool flag);
+    const std::vector<std::string>& getActivations() const;
+    LSTMSequenceLayer& setActivations(const std::vector<std::string>& activations);
+    const std::vector<float>& getActivationsAlpha() const;
+    LSTMSequenceLayer& setActivationsAlpha(const std::vector<float>& activations);
+    const std::vector<float>& getActivationsBeta() const;
+    LSTMSequenceLayer& setActivationsBeta(const std::vector<float>& activations);
+    float getClip() const;
+    LSTMSequenceLayer& setClip(float clip);
+    bool getInputForget() const;
+    LSTMSequenceLayer& setInputForget(bool flag);
+    const std::string& getDirection() const;
+    LSTMSequenceLayer& setDirection(const std::string& direction);
+};
+
+}  // namespace Builder
+}  // namespace InferenceEngine
+
+
diff --git a/inference-engine/include/builders/ie_memory_layer.hpp b/inference-engine/include/builders/ie_memory_layer.hpp
index b399e9501..474220b09 100644
--- a/inference-engine/include/builders/ie_memory_layer.hpp
+++ b/inference-engine/include/builders/ie_memory_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Memory layer
  */
-class INFERENCE_ENGINE_API_CLASS(MemoryLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(MemoryLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit MemoryLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit MemoryLayer(Layer& genLayer);
+    explicit MemoryLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit MemoryLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_mvn_layer.hpp b/inference-engine/include/builders/ie_mvn_layer.hpp
index ef9235104..4e6f327fa 100644
--- a/inference-engine/include/builders/ie_mvn_layer.hpp
+++ b/inference-engine/include/builders/ie_mvn_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for MVN layer
  */
-class INFERENCE_ENGINE_API_CLASS(MVNLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(MVNLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit MVNLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit MVNLayer(Layer& genLayer);
+    explicit MVNLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit MVNLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_network_builder.hpp b/inference-engine/include/builders/ie_network_builder.hpp
index 586a267c5..9b5000c4a 100644
--- a/inference-engine/include/builders/ie_network_builder.hpp
+++ b/inference-engine/include/builders/ie_network_builder.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -7,7 +7,7 @@
 #include <builders/ie_layer_builder.hpp>
 #include <ie_icnn_network.hpp>
 #include <cpp/ie_cnn_network.h>
-#include <ie_inetwork.hpp>
+#include <ie_network.hpp>
 #include <ie_context.hpp>
 #include <ie_common.h>
 #include <ie_blob.h>
@@ -23,12 +23,43 @@ namespace Builder {
 /**
  * @brief This class implements a builder for IE Network
  */
-class INFERENCE_ENGINE_API_CLASS(Network) {
+class INFERENCE_ENGINE_API_CLASS(Network): public INetwork {
 public:
     /**
      * @brief A shared pointer to the Network builder
      */
     using Ptr = std::shared_ptr<Network>;
+    /**
+     * @brief An iterator for Network builder definition
+     */
+    using iterator = details::INetworkIterator<Network, Layer>;
+    /**
+     * @brief Begin network iterator
+     * @return Network iterator
+     */
+    iterator begin();
+    /**
+     * @brief Begin network iterator
+     * @return const INetwork iterator
+     */
+    const_iterator begin() const noexcept override;
+
+    /**
+     * @brief End network iterator
+     * @return Network iterator
+     */
+    iterator end();
+    /**
+     * @brief End network iterator
+     * @return const INetwork iterator
+     */
+    const_iterator end() const noexcept override;
+
+    /**
+     * @brief Returns a number of layers in the network.
+     * @return Layers count
+     */
+    size_t size() const noexcept override;
 
     /**
      * @brief The constructor creates a builder based on ICNNNetwork
@@ -69,11 +100,6 @@ public:
     Network(const Context& ieContext, const INetwork& network);
 
     /**
-     * @brief Virtual destructor
-     */
-    virtual ~Network() = default;
-
-    /**
      * @brief Adds new layer and connects it with previous layers
      *
      * @param inputs Vector with PortInfo objects from previous layers
@@ -112,64 +138,102 @@ public:
     void disconnect(const Connection& connection);
 
     /**
-     * @brief Returns layer builder by ID
-     *
-     * @param layerId Layer ID
+     * @brief Returns vector of layer builders
      *
-     * @return Layer buider
+     * @return Vector of layer builders
      */
-    Layer& getLayer(idx_t layerId);
+    std::vector<Layer::Ptr>& getLayers();
     /**
-     * @brief Returns constant layer builder by ID
-     *
-     * @param layerId Layer ID
+     * @brief Returns constant vector of layer builders
      *
-     * @return constant layer builder
+     * @return constant vector of layer builders
      */
-    const Layer& getLayer(idx_t layerId) const;
+    const std::vector<Layer::Ptr>& getLayers() const;
 
     /**
-     * @brief Returns vector of layer builders
-     *
-     * @return Vector of layer builders
+     * @brief Returns a constant smart pointer to a Layer interface.
+     * If the layer is missing, returns nullptr.
+     * @param id Id of the Layer
+     * @return Layer interface smart pointer
      */
-    std::vector<Layer>& getLayers();
+    const ILayer::CPtr getLayer(idx_t id) const noexcept override;
+    Layer::Ptr getLayer(idx_t layerId);
+
     /**
-     * @brief Returns constant vector of layer builders
-     *
-     * @return constant vector of layer builders
+     * @brief Returns a constant vector of input layers.
+     * @return Vector of input layers
      */
-    const std::vector<Layer>& getLayers() const;
+    const std::vector<ILayer::CPtr> getInputs() const noexcept override;
+    /**
+     * @brief Returns a vector of input layers.
+     * @return Vector of input layers
+     */
+    std::vector<Layer::Ptr> getInputs();
 
     /**
-     * @brief Returns all connections for layer
-     *
-     * @param layerId Layer ID
-     *
-     * @return Vector of connections for the current layer
+     * @brief Returns a constant vector of output layers.
+     * @return Vector of output layers
+     */
+    const std::vector<ILayer::CPtr> getOutputs() const noexcept override;
+    /**
+     * @brief Returns a vector of input layers.
+     * @return Vector of input layers
+     */
+    std::vector<Layer::Ptr> getOutputs();
+
+    /**
+     * @brief Returns a constant vector of connections for specific layer.
+     * If the layer is missing, returns empty vector.
+     * @param layerId layer index
+     * @return Vector of connections
      */
-    const std::vector<Connection> getLayerConnections(idx_t layerId) const noexcept;
+    const std::vector<Connection> getLayerConnections(idx_t layerId) const noexcept override;
 
     /**
-     * @brief Builds and validate networks
+     * @brief Returns a constant vector of all connections.
+     * @return Vector of connections
+     */
+    const std::vector<Connection>& getConnections() const;
+
+    /**
+     * @brief Returns a network name.
+     * @return Network name
+     */
+    const std::string& getName() const noexcept override;
+
+    /**
+     * @brief Returns a network context
+     * @return const reference to Context
+     */
+    const Context& getContext() const noexcept override;
+    /**
+     * @brief Returns a network context
+     * @return reference to Context
+     */
+    Context& getContext() noexcept;
+
+    /**
+     * @brief Builds and validate network
      *
      * @return const shared pointer to INetwork
      */
-    const INetwork::Ptr build() const;
+    const INetwork::CPtr build();
+
+    /**
+     * @brief Validates network
+     *
+    */
+    void validate();
 
     /**
      * @brief The operator builds network
      *
      * @return const shared pointer to INetwork
      */
-    explicit operator const INetwork::Ptr() const;
+    explicit operator const INetwork::CPtr();
 
 private:
-    const Context ctx;
-    const size_t version;
-    std::string name;
-    std::vector<Layer> layers;
-    std::vector<Connection> connections;
+    std::map<std::string, Parameter> parameters;
 };
 
 /**
@@ -178,7 +242,7 @@ private:
  * @param network constant shared pointer to INetwork object
  * @return constant shared pointer to ICNNNetwork
  */
-INFERENCE_ENGINE_API_CPP(const std::shared_ptr<ICNNNetwork>) convertToICNNNetwork(const INetwork::Ptr& network);
+INFERENCE_ENGINE_API_CPP(const std::shared_ptr<ICNNNetwork>) convertToICNNNetwork(const INetwork::CPtr& network);
 
 }  // namespace Builder
 
diff --git a/inference-engine/include/builders/ie_norm_layer.hpp b/inference-engine/include/builders/ie_norm_layer.hpp
index 58d972bd1..62090570b 100644
--- a/inference-engine/include/builders/ie_norm_layer.hpp
+++ b/inference-engine/include/builders/ie_norm_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Norm layer
  */
-class INFERENCE_ENGINE_API_CLASS(NormLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(NormLayer): public LayerDecorator {
 public:
     /**
      * @brief The enum defines all Norm types
@@ -30,9 +30,14 @@ public:
     explicit NormLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit NormLayer(Layer& genLayer);
+    explicit NormLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit NormLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_normalize_layer.hpp b/inference-engine/include/builders/ie_normalize_layer.hpp
index bc05381df..b2f2b8ecb 100644
--- a/inference-engine/include/builders/ie_normalize_layer.hpp
+++ b/inference-engine/include/builders/ie_normalize_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Normalize layer
  */
-class INFERENCE_ENGINE_API_CLASS(NormalizeLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(NormalizeLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit NormalizeLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit NormalizeLayer(Layer& genLayer);
+    explicit NormalizeLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit NormalizeLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_output_layer.hpp b/inference-engine/include/builders/ie_output_layer.hpp
index 71abd38fc..d113e6024 100644
--- a/inference-engine/include/builders/ie_output_layer.hpp
+++ b/inference-engine/include/builders/ie_output_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Output layer
  */
-class INFERENCE_ENGINE_API_CLASS(OutputLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(OutputLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit OutputLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit OutputLayer(Layer& genLayer);
+    explicit OutputLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit OutputLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_permute_layer.hpp b/inference-engine/include/builders/ie_permute_layer.hpp
index 54cfcf318..f6cad5b2a 100644
--- a/inference-engine/include/builders/ie_permute_layer.hpp
+++ b/inference-engine/include/builders/ie_permute_layer.hpp
@@ -1,10 +1,10 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
+#include <builders/ie_layer_decorator.hpp>
 #include <vector>
 #include <string>
 
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Permute layer
  */
-class INFERENCE_ENGINE_API_CLASS(PermuteLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(PermuteLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit PermuteLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit PermuteLayer(Layer& genLayer);
+    explicit PermuteLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit PermuteLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
@@ -34,19 +39,6 @@ public:
     PermuteLayer& setName(const std::string& name);
 
     /**
-     * @brief Sets weights for layer
-     * @param weights Constant blob with weights
-     * @return reference to layer builder
-     */
-    PermuteLayer& setWeights(const Blob::CPtr& weights);
-    /**
-     * @brief Sets biases for layer
-     * @param biases Constant blob with biases
-     * @return reference to layer builder
-     */
-    PermuteLayer& setBiases(const Blob::CPtr& biases);
-
-    /**
      * @brief Returns input port
      * @return Input port
      */
diff --git a/inference-engine/include/builders/ie_pooling_layer.hpp b/inference-engine/include/builders/ie_pooling_layer.hpp
index 80150ae51..b732a495b 100644
--- a/inference-engine/include/builders/ie_pooling_layer.hpp
+++ b/inference-engine/include/builders/ie_pooling_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <vector>
 #include <string>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Pooling layer
  */
-class INFERENCE_ENGINE_API_CLASS(PoolingLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(PoolingLayer): public LayerDecorator {
 public:
     /**
      * @brief The enum defines available pooling types
@@ -40,9 +40,14 @@ public:
     explicit PoolingLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit PoolingLayer(Layer& genLayer);
+    explicit PoolingLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit PoolingLayer(const Layer::CPtr& layer);
     /**
      * @brief Operator creates generic layer builder
      * @return Generic layer builder
@@ -155,12 +160,6 @@ public:
      */
     PoolingLayer& setExcludePad(bool exclude);
 
-    /**
-     * @brief Validates layer before creation
-     * @param layer generic layer builder
-     */
-    static void validate(const Layer& layer);
-
 private:
     PoolingType type;
     RoundingType roundingType;
diff --git a/inference-engine/include/builders/ie_power_layer.hpp b/inference-engine/include/builders/ie_power_layer.hpp
index 94ef1cc9f..4db69c082 100644
--- a/inference-engine/include/builders/ie_power_layer.hpp
+++ b/inference-engine/include/builders/ie_power_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Power layer
  */
-class INFERENCE_ENGINE_API_CLASS(PowerLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(PowerLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit PowerLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit PowerLayer(Layer& genLayer);
+    explicit PowerLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit PowerLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_prelu_layer.hpp b/inference-engine/include/builders/ie_prelu_layer.hpp
index 5e7dedda1..d3f7f011e 100644
--- a/inference-engine/include/builders/ie_prelu_layer.hpp
+++ b/inference-engine/include/builders/ie_prelu_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for PReLU layer
  */
-class INFERENCE_ENGINE_API_CLASS(PReLULayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(PReLULayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit PReLULayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit PReLULayer(Layer& genLayer);
+    explicit PReLULayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit PReLULayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
@@ -34,12 +39,6 @@ public:
     PReLULayer& setName(const std::string& name);
 
     /**
-     * @brief Sets weights for layer
-     * @param weights Constant blob with weights
-     * @return reference to layer builder
-     */
-    PReLULayer& setWeights(const Blob::CPtr& weights);
-    /**
      * @brief Returns port with shapes for the layer
      * @return Port with shapes
      */
diff --git a/inference-engine/include/builders/ie_prior_box_clustered_layer.hpp b/inference-engine/include/builders/ie_prior_box_clustered_layer.hpp
index 61d7f1653..ff891dcb9 100644
--- a/inference-engine/include/builders/ie_prior_box_clustered_layer.hpp
+++ b/inference-engine/include/builders/ie_prior_box_clustered_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for PriorBoxClustered layer
  */
-class INFERENCE_ENGINE_API_CLASS(PriorBoxClusteredLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(PriorBoxClusteredLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit PriorBoxClusteredLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit PriorBoxClusteredLayer(Layer& genLayer);
+    explicit PriorBoxClusteredLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit PriorBoxClusteredLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_prior_box_layer.hpp b/inference-engine/include/builders/ie_prior_box_layer.hpp
index 8051d6c6a..3e36f0d89 100644
--- a/inference-engine/include/builders/ie_prior_box_layer.hpp
+++ b/inference-engine/include/builders/ie_prior_box_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for PriorBox layer
  */
-class INFERENCE_ENGINE_API_CLASS(PriorBoxLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(PriorBoxLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit PriorBoxLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit PriorBoxLayer(Layer& genLayer);
+    explicit PriorBoxLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit PriorBoxLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_proposal_layer.hpp b/inference-engine/include/builders/ie_proposal_layer.hpp
index e7fcac461..aa145040b 100644
--- a/inference-engine/include/builders/ie_proposal_layer.hpp
+++ b/inference-engine/include/builders/ie_proposal_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Proposal layer
  */
-class INFERENCE_ENGINE_API_CLASS(ProposalLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ProposalLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit ProposalLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ProposalLayer(Layer& genLayer);
+    explicit ProposalLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ProposalLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_psroi_pooling_layer.hpp b/inference-engine/include/builders/ie_psroi_pooling_layer.hpp
index 82c9f47c8..34b510878 100644
--- a/inference-engine/include/builders/ie_psroi_pooling_layer.hpp
+++ b/inference-engine/include/builders/ie_psroi_pooling_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for PSROIPooling layer
  */
-class INFERENCE_ENGINE_API_CLASS(PSROIPoolingLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(PSROIPoolingLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit PSROIPoolingLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit PSROIPoolingLayer(Layer& genLayer);
+    explicit PSROIPoolingLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit PSROIPoolingLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_region_yolo_layer.hpp b/inference-engine/include/builders/ie_region_yolo_layer.hpp
index 1a2d645ef..1f2e37ccc 100644
--- a/inference-engine/include/builders/ie_region_yolo_layer.hpp
+++ b/inference-engine/include/builders/ie_region_yolo_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for RegionYolo layer
  */
-class INFERENCE_ENGINE_API_CLASS(RegionYoloLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(RegionYoloLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit RegionYoloLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit RegionYoloLayer(Layer& genLayer);
+    explicit RegionYoloLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit RegionYoloLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_relu6_layer.hpp b/inference-engine/include/builders/ie_relu6_layer.hpp
index 3bc3360fb..1cc384a7d 100644
--- a/inference-engine/include/builders/ie_relu6_layer.hpp
+++ b/inference-engine/include/builders/ie_relu6_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for ReLU6 layer
  */
-class INFERENCE_ENGINE_API_CLASS(ReLU6Layer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ReLU6Layer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit ReLU6Layer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ReLU6Layer(Layer& genLayer);
+    explicit ReLU6Layer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ReLU6Layer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_relu_layer.hpp b/inference-engine/include/builders/ie_relu_layer.hpp
index 9422e1924..2853858cc 100644
--- a/inference-engine/include/builders/ie_relu_layer.hpp
+++ b/inference-engine/include/builders/ie_relu_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for ReLU layer
  */
-class INFERENCE_ENGINE_API_CLASS(ReLULayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ReLULayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit ReLULayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ReLULayer(Layer& genLayer);
+    explicit ReLULayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ReLULayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_reorg_yolo_layer.hpp b/inference-engine/include/builders/ie_reorg_yolo_layer.hpp
index 4719873a0..0529ee5bc 100644
--- a/inference-engine/include/builders/ie_reorg_yolo_layer.hpp
+++ b/inference-engine/include/builders/ie_reorg_yolo_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for ReorgYolo layer
  */
-class INFERENCE_ENGINE_API_CLASS(ReorgYoloLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ReorgYoloLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit ReorgYoloLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ReorgYoloLayer(Layer& genLayer);
+    explicit ReorgYoloLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer const pointer to generic builder
+     */
+    explicit ReorgYoloLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_resample_layer.hpp b/inference-engine/include/builders/ie_resample_layer.hpp
new file mode 100644
index 000000000..4e343bd14
--- /dev/null
+++ b/inference-engine/include/builders/ie_resample_layer.hpp
@@ -0,0 +1,126 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace Builder {
+
+/**
+ * @brief The class represents a builder for Resample layer
+ */
+class INFERENCE_ENGINE_API_CLASS(ResampleLayer): public LayerDecorator {
+public:
+    /**
+     * @brief The constructor creates a builder with the name
+     * @param name Layer name
+     */
+    explicit ResampleLayer(const std::string& name = "");
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer pointer to generic builder
+     */
+    explicit ResampleLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer const pointer to generic builder
+     */
+    explicit ResampleLayer(const Layer::CPtr& layer);
+    /**
+     * @brief Sets the name for the layer
+     * @param name Layer name
+     * @return reference to layer builder
+     */
+    ResampleLayer& setName(const std::string& name);
+
+    /**
+     * @brief Returns input port
+     * @return Input port
+     */
+    const Port& getInputPort() const;
+    /**
+     * @brief Sets input port
+     * @param ports Input port
+     * @return reference to layer builder
+     */
+    ResampleLayer& setInputPort(const Port& ports);
+    /**
+     * @brief Returns output port
+     * @return Output port
+     */
+    const Port& getOutputPort() const;
+    /**
+     * @brief Sets output port
+     * @param port Output port
+     * @return reference to layer builder
+     */
+    ResampleLayer& setOutputPort(const Port& port);
+    /**
+     * @brief Returns resample type
+     * @return Type
+     */
+    const std::string& getResampleType() const;
+    /**
+     * @brief Sets resample type
+     * @param type Type
+     * @return reference to layer builder
+     */
+    ResampleLayer& setResampleType(const std::string& type);
+    /**
+     * @brief Returns flag that denotes whether to perform anti-aliasing
+     * @return true if anti-aliasing is performed
+     */
+    bool getAntialias() const;
+    /**
+     * @brief Sets flag that denotes whether to perform anti-aliasing
+     * @param flag antialias
+     * @return reference to layer builder
+     */
+    ResampleLayer& setAntialias(bool antialias);
+    /**
+     * @brief Returns resample factor
+     * @return Factor
+     */
+    float getFactor() const;
+    /**
+     * @brief Sets resample factor
+     * @param factor Factor
+     * @return reference to layer builder
+     */
+    ResampleLayer& setFactor(float factor);
+    /**
+     * @brief Returns width
+     * @return Width
+     */
+    size_t getWidth() const;
+    /**
+     * @brief Sets width
+     * @param width Width
+     * @return reference to layer builder
+     */
+    ResampleLayer& setWidth(size_t width);
+    /**
+     * @brief Returns height
+     * @return Height
+     */
+    size_t getHeight() const;
+    /**
+     * @brief Sets height
+     * @param height Height
+     * @return reference to layer builder
+     */
+    ResampleLayer& setHeight(size_t height);
+};
+
+}  // namespace Builder
+}  // namespace InferenceEngine
+
+
+
+
diff --git a/inference-engine/include/builders/ie_reshape_layer.hpp b/inference-engine/include/builders/ie_reshape_layer.hpp
index 42eacea50..578e9b7f0 100644
--- a/inference-engine/include/builders/ie_reshape_layer.hpp
+++ b/inference-engine/include/builders/ie_reshape_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Reshape layer
  */
-class INFERENCE_ENGINE_API_CLASS(ReshapeLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ReshapeLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit ReshapeLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ReshapeLayer(Layer& genLayer);
+    explicit ReshapeLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ReshapeLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_rnn_sequence_layer.hpp b/inference-engine/include/builders/ie_rnn_sequence_layer.hpp
new file mode 100644
index 000000000..885191665
--- /dev/null
+++ b/inference-engine/include/builders/ie_rnn_sequence_layer.hpp
@@ -0,0 +1,83 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
+#include <vector>
+#include <string>
+
+namespace InferenceEngine {
+namespace Builder {
+
+/**
+ * @brief The class represents a builder for RNNSequence layer
+ */
+class INFERENCE_ENGINE_API_CLASS(RNNSequenceLayer): public LayerDecorator {
+public:
+    /**
+     * @brief The constructor creates a builder with the name
+     * @param name Layer name
+     */
+    explicit RNNSequenceLayer(const std::string& name = "");
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer pointer to generic builder
+     */
+    explicit RNNSequenceLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit RNNSequenceLayer(const Layer::CPtr& layer);
+    /**
+     * @brief Sets the name for the layer
+     * @param name Layer name
+     * @return reference to layer builder
+     */
+    RNNSequenceLayer& setName(const std::string& name);
+
+    /**
+     * @brief Returns input ports with shapes for the layer
+     * @return Vector of ports
+     */
+    const std::vector<Port>& getInputPorts() const;
+    /**
+     * @brief Sets input ports for the layer
+     * @param ports vector of input ports
+     * @return reference to layer builder
+     */
+    RNNSequenceLayer& setInputPorts(const std::vector<Port>& ports);
+
+    /**
+     * @brief Returns output ports with shapes for the layer
+     * @return Vector of ports
+     */
+    const std::vector<Port>& getOutputPorts() const;
+    /**
+     * @brief Sets output ports for the layer
+     * @param ports vector of output ports
+     * @return reference to layer builder
+     */
+    RNNSequenceLayer& setOutputPorts(const std::vector<Port>& ports);
+
+    int getHiddenSize() const;
+    RNNSequenceLayer& setHiddenSize(int size);
+    bool getSequenceDim() const;
+    RNNSequenceLayer& setSqquenceDim(bool flag);
+    const std::vector<std::string>& getActivations() const;
+    RNNSequenceLayer& setActivations(const std::vector<std::string>& activations);
+    const std::vector<float>& getActivationsAlpha() const;
+    RNNSequenceLayer& setActivationsAlpha(const std::vector<float>& activations);
+    const std::vector<float>& getActivationsBeta() const;
+    RNNSequenceLayer& setActivationsBeta(const std::vector<float>& activations);
+    float getClip() const;
+    RNNSequenceLayer& setClip(float clip);
+};
+
+}  // namespace Builder
+}  // namespace InferenceEngine
+
+
diff --git a/inference-engine/include/builders/ie_roi_pooling_layer.hpp b/inference-engine/include/builders/ie_roi_pooling_layer.hpp
index d6bb57860..7105d09a8 100644
--- a/inference-engine/include/builders/ie_roi_pooling_layer.hpp
+++ b/inference-engine/include/builders/ie_roi_pooling_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for ROIPooling layer
  */
-class INFERENCE_ENGINE_API_CLASS(ROIPoolingLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ROIPoolingLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit ROIPoolingLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ROIPoolingLayer(Layer& genLayer);
+    explicit ROIPoolingLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ROIPoolingLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_scale_shift_layer.hpp b/inference-engine/include/builders/ie_scale_shift_layer.hpp
index 361664e60..9e40572d4 100644
--- a/inference-engine/include/builders/ie_scale_shift_layer.hpp
+++ b/inference-engine/include/builders/ie_scale_shift_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for ScaleShift layer
  */
-class INFERENCE_ENGINE_API_CLASS(ScaleShiftLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(ScaleShiftLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit ScaleShiftLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit ScaleShiftLayer(Layer& genLayer);
+    explicit ScaleShiftLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit ScaleShiftLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
@@ -44,19 +49,6 @@ public:
      * @return reference to layer builder
      */
     ScaleShiftLayer& setPort(const Port &port);
-
-    /**
-     * @brief Sets weights for layer
-     * @param weights Constant blob with weights
-     * @return reference to layer builder
-     */
-    ScaleShiftLayer& setWeights(const Blob::CPtr& weights);
-    /**
-     * @brief Sets biases for layer
-     * @param biases Constant blob with biases
-     * @return reference to layer builder
-     */
-    ScaleShiftLayer& setBiases(const Blob::CPtr& biases);
 };
 
 }  // namespace Builder
diff --git a/inference-engine/include/builders/ie_sigmoid_layer.hpp b/inference-engine/include/builders/ie_sigmoid_layer.hpp
index 6c483588a..d6f20a6cd 100644
--- a/inference-engine/include/builders/ie_sigmoid_layer.hpp
+++ b/inference-engine/include/builders/ie_sigmoid_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Sigmoid layer
  */
-class INFERENCE_ENGINE_API_CLASS(SigmoidLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(SigmoidLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit SigmoidLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit SigmoidLayer(Layer& genLayer);
+    explicit SigmoidLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit SigmoidLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_simpler_nms_layer.hpp b/inference-engine/include/builders/ie_simpler_nms_layer.hpp
index 28cf6ee7e..c97e84bae 100644
--- a/inference-engine/include/builders/ie_simpler_nms_layer.hpp
+++ b/inference-engine/include/builders/ie_simpler_nms_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for SimplerNMS layer
  */
-class INFERENCE_ENGINE_API_CLASS(SimplerNMSLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(SimplerNMSLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit SimplerNMSLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit SimplerNMSLayer(Layer& genLayer);
+    explicit SimplerNMSLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit SimplerNMSLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_softmax_layer.hpp b/inference-engine/include/builders/ie_softmax_layer.hpp
index 1ce13b87c..2031a6253 100644
--- a/inference-engine/include/builders/ie_softmax_layer.hpp
+++ b/inference-engine/include/builders/ie_softmax_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for SoftMax layer
  */
-class INFERENCE_ENGINE_API_CLASS(SoftMaxLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(SoftMaxLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit SoftMaxLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit SoftMaxLayer(Layer& genLayer);
+    explicit SoftMaxLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit SoftMaxLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_split_layer.hpp b/inference-engine/include/builders/ie_split_layer.hpp
index 526ed79e7..f982da0a1 100644
--- a/inference-engine/include/builders/ie_split_layer.hpp
+++ b/inference-engine/include/builders/ie_split_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Split layer
  */
-class INFERENCE_ENGINE_API_CLASS(SplitLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(SplitLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit SplitLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit SplitLayer(Layer& genLayer);
+    explicit SplitLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit SplitLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_tanh_layer.hpp b/inference-engine/include/builders/ie_tanh_layer.hpp
index acb00027a..0caf3d014 100644
--- a/inference-engine/include/builders/ie_tanh_layer.hpp
+++ b/inference-engine/include/builders/ie_tanh_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 
 namespace InferenceEngine {
@@ -14,7 +14,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for TanH layer
  */
-class INFERENCE_ENGINE_API_CLASS(TanHLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(TanHLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -23,9 +23,14 @@ public:
     explicit TanHLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit TanHLayer(Layer& genLayer);
+    explicit TanHLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit TanHLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/builders/ie_tile_layer.hpp b/inference-engine/include/builders/ie_tile_layer.hpp
index de03ba280..004d9a238 100644
--- a/inference-engine/include/builders/ie_tile_layer.hpp
+++ b/inference-engine/include/builders/ie_tile_layer.hpp
@@ -1,11 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
-#include <builders/ie_layer_fragment.hpp>
-#include <ie_inetwork.hpp>
+#include <builders/ie_layer_decorator.hpp>
+#include <ie_network.hpp>
 #include <string>
 #include <vector>
 
@@ -15,7 +15,7 @@ namespace Builder {
 /**
  * @brief The class represents a builder for Tile layer
  */
-class INFERENCE_ENGINE_API_CLASS(TileLayer): public LayerFragment {
+class INFERENCE_ENGINE_API_CLASS(TileLayer): public LayerDecorator {
 public:
     /**
      * @brief The constructor creates a builder with the name
@@ -24,9 +24,14 @@ public:
     explicit TileLayer(const std::string& name = "");
     /**
      * @brief The constructor creates a builder from generic builder
-     * @param genLayer generic builder
+     * @param layer pointer to generic builder
      */
-    explicit TileLayer(Layer& genLayer);
+    explicit TileLayer(const Layer::Ptr& layer);
+    /**
+     * @brief The constructor creates a builder from generic builder
+     * @param layer constant pointer to generic builder
+     */
+    explicit TileLayer(const Layer::CPtr& layer);
     /**
      * @brief Sets the name for the layer
      * @param name Layer name
diff --git a/inference-engine/include/cldnn/cldnn_config.hpp b/inference-engine/include/cldnn/cldnn_config.hpp
index dc440ba47..571ff5103 100644
--- a/inference-engine/include/cldnn/cldnn_config.hpp
+++ b/inference-engine/include/cldnn/cldnn_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -11,7 +11,7 @@
 #pragma once
 
 #include <string>
-#include "../ie_plugin_config.hpp"
+#include "ie_plugin_config.hpp"
 
 namespace InferenceEngine {
 
diff --git a/inference-engine/include/cpp/ie_cnn_net_reader.h b/inference-engine/include/cpp/ie_cnn_net_reader.h
index 7bc0b254e..149f86a66 100644
--- a/inference-engine/include/cpp/ie_cnn_net_reader.h
+++ b/inference-engine/include/cpp/ie_cnn_net_reader.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/cpp/ie_cnn_network.h b/inference-engine/include/cpp/ie_cnn_network.h
index 82d13cf0b..4ccccd8ce 100644
--- a/inference-engine/include/cpp/ie_cnn_network.h
+++ b/inference-engine/include/cpp/ie_cnn_network.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -111,6 +111,14 @@ public:
 
     /**
      * @brief Wraps original method
+     * ICNNNetwork::getName
+     */
+    const std::string& getName() const noexcept {
+        return actual->getName();
+    }
+
+    /**
+     * @brief Wraps original method
      * ICNNNetwork::setBatchSize
      */
     virtual void setBatchSize(const size_t size) {
@@ -222,9 +230,10 @@ public:
     /**
      * @brief Serialize network to IR and weights files.
      * @param xmlPath Path to output IR file.
-     * @param binPath Path to output weights file.
+     * @param binPath Path to output weights file. The parameter is skipped in case
+     * of executable graph info serialization.
      */
-    void serialize(const std::string &xmlPath, const std::string &binPath) const {
+    void serialize(const std::string &xmlPath, const std::string &binPath = "") const {
         CALL_STATUS_FNC(serialize, xmlPath, binPath);
     }
 
diff --git a/inference-engine/include/cpp/ie_executable_network.hpp b/inference-engine/include/cpp/ie_executable_network.hpp
index dd8e942ac..c9225a148 100644
--- a/inference-engine/include/cpp/ie_executable_network.hpp
+++ b/inference-engine/include/cpp/ie_executable_network.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -16,6 +16,7 @@
 #include "ie_iexecutable_network.hpp"
 #include "cpp/ie_infer_request.hpp"
 #include "cpp/ie_memory_state.hpp"
+#include "cpp/ie_cnn_network.h"
 #include "details/ie_exception_conversion.hpp"
 
 namespace InferenceEngine {
@@ -107,6 +108,15 @@ public:
         return actual;
     }
 
+    /**
+    * @brief Get executable graph information from a plugin represented as CNNNetwork
+    * @return CNNetwork containing Executable Graph Info
+    */
+    CNNNetwork GetExecGraphInfo() {
+        ICNNNetwork::Ptr ptr = nullptr;
+        CALL_STATUS_FNC(GetExecGraphInfo, ptr);
+        return CNNNetwork(ptr);
+    }
 
     /**
      *@brief see original function InferenceEngine::IExecutableNetwork::QueryState
diff --git a/inference-engine/include/cpp/ie_infer_request.hpp b/inference-engine/include/cpp/ie_infer_request.hpp
index 10317af8e..1205d3e12 100644
--- a/inference-engine/include/cpp/ie_infer_request.hpp
+++ b/inference-engine/include/cpp/ie_infer_request.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/cpp/ie_memory_state.hpp b/inference-engine/include/cpp/ie_memory_state.hpp
index f9bd90aaf..d20fcae91 100644
--- a/inference-engine/include/cpp/ie_memory_state.hpp
+++ b/inference-engine/include/cpp/ie_memory_state.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/cpp/ie_plugin_cpp.hpp b/inference-engine/include/cpp/ie_plugin_cpp.hpp
index 5605209d9..0cef8cf0f 100644
--- a/inference-engine/include/cpp/ie_plugin_cpp.hpp
+++ b/inference-engine/include/cpp/ie_plugin_cpp.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -77,6 +77,7 @@ public:
     }
 
     /**
+     * @deprecated Loads IExecutableNetwork to create IInferRequest.
      * @brief Wraps original method
      * IInferencePlugin::Infer(const BlobMap&, BlobMap&, ResponseDesc *resp)
      */
diff --git a/inference-engine/include/details/caseless.hpp b/inference-engine/include/details/caseless.hpp
index f3e0d7a0a..8f9d3ce2e 100644
--- a/inference-engine/include/details/caseless.hpp
+++ b/inference-engine/include/details/caseless.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/ie_blob_iterator.hpp b/inference-engine/include/details/ie_blob_iterator.hpp
index 6b083e1ad..61e7acf78 100644
--- a/inference-engine/include/details/ie_blob_iterator.hpp
+++ b/inference-engine/include/details/ie_blob_iterator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/ie_cnn_network_iterator.hpp b/inference-engine/include/details/ie_cnn_network_iterator.hpp
index 9cc65c977..ff29b5d97 100644
--- a/inference-engine/include/details/ie_cnn_network_iterator.hpp
+++ b/inference-engine/include/details/ie_cnn_network_iterator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/ie_cnn_network_tools.h b/inference-engine/include/details/ie_cnn_network_tools.h
index b80978bd9..a872fdbf7 100644
--- a/inference-engine/include/details/ie_cnn_network_tools.h
+++ b/inference-engine/include/details/ie_cnn_network_tools.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/ie_exception.hpp b/inference-engine/include/details/ie_exception.hpp
index 514a639f1..5285f0568 100644
--- a/inference-engine/include/details/ie_exception.hpp
+++ b/inference-engine/include/details/ie_exception.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -31,8 +31,21 @@
     if (!(EXPRESSION)) throw InferenceEngine::details::InferenceEngineException(__FILE__, __LINE__) << "AssertionFailed: " << #EXPRESSION  // NOLINT
 #else
 #include <cassert>
+
+class NullStream {
+ public :
+    template <class T>
+    NullStream & operator << (const T &obj) noexcept {
+        return *this;
+    }
+
+    NullStream &  operator<< (std::ostream & (*manip)(std::ostream &)) noexcept {
+        return *this;
+    }
+};
+
 #define IE_ASSERT(EXPRESSION)\
-    assert((EXPRESSION)); std::stringstream()
+    assert((EXPRESSION)); NullStream()
 #endif  // NDEBUG
 
 namespace InferenceEngine {
diff --git a/inference-engine/include/details/ie_exception_conversion.hpp b/inference-engine/include/details/ie_exception_conversion.hpp
index 3c2b9471d..1c45d824f 100644
--- a/inference-engine/include/details/ie_exception_conversion.hpp
+++ b/inference-engine/include/details/ie_exception_conversion.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/ie_inetwork_iterator.hpp b/inference-engine/include/details/ie_inetwork_iterator.hpp
index 84f8deecc..7d77bc839 100644
--- a/inference-engine/include/details/ie_inetwork_iterator.hpp
+++ b/inference-engine/include/details/ie_inetwork_iterator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -15,7 +15,7 @@
 #include <memory>
 #include <vector>
 
-#include <ie_inetwork.hpp>
+#include <ie_network.hpp>
 
 namespace InferenceEngine {
 namespace details {
@@ -33,23 +33,22 @@ public:
             allInputs.push_back(std::dynamic_pointer_cast<LT>(input));
         }
 
-        bool res = forestDFS(allInputs, [&](std::shared_ptr<LT> current) {
+        forestDFS(allInputs, [&](std::shared_ptr<LT> current) {
             sortedLayers.push_back(current);
         }, false);
 
-        if (!res) {
-            THROW_IE_EXCEPTION << "Sorting not possible, due to existed loop.";
-        }
-
         std::reverse(std::begin(sortedLayers), std::end(sortedLayers));
         currentLayer = getNextLayer();
     }
+
     bool operator!=(const INetworkIterator& that) const {
         return !operator==(that);
     }
+
     bool operator==(const INetworkIterator& that) const {
         return network == that.network && currentLayer == that.currentLayer;
     }
+
     typename INetworkIterator::reference operator*() {
         if (nullptr == currentLayer) {
             THROW_IE_EXCEPTION << "iterator out of bound";
@@ -79,27 +78,24 @@ private:
     }
 
     template<class T>
-    inline bool forestDFS(const std::vector<std::shared_ptr<LT>>& heads, const T &visit, bool bVisitBefore) {
+    inline void forestDFS(const std::vector<std::shared_ptr<LT>>& heads, const T &visit, bool bVisitBefore) {
         if (heads.empty()) {
-            return true;
+            return;
         }
 
         std::unordered_map<idx_t, bool> visited;
         for (auto & layer : heads) {
-            if (!DFS(visited, layer, visit, bVisitBefore)) {
-                return false;
-            }
+            DFS(visited, layer, visit, bVisitBefore);
         }
-        return true;
     }
 
     template<class T>
-    inline bool DFS(std::unordered_map<idx_t, bool> &visited,
+    inline void DFS(std::unordered_map<idx_t, bool> &visited,
                     const std::shared_ptr<LT> &layer,
                     const T &visit,
                     bool visitBefore) {
         if (layer == nullptr) {
-            return true;
+            return;
         }
 
         if (visitBefore)
@@ -111,25 +107,24 @@ private:
                 continue;
             }
             const auto outLayer = network->getLayer(connection.to().layerId());
+            if (!outLayer)
+                THROW_IE_EXCEPTION << "Couldn't get layer with id: " << connection.to().layerId();
             auto i = visited.find(outLayer->getId());
             if (i != visited.end()) {
                 /**
                  * cycle detected we entered still not completed node
                  */
                 if (!i->second) {
-                    return false;
+                    THROW_IE_EXCEPTION << "Sorting not possible, due to existed loop.";
                 }
                 continue;
             }
 
-            if (!DFS(visited, outLayer, visit, visitBefore)) {
-                return false;
-            }
+            DFS(visited, outLayer, visit, visitBefore);
         }
         if (!visitBefore)
             visit(layer);
         visited[layer->getId()] = true;
-        return true;
     }
 };
 
diff --git a/inference-engine/include/details/ie_irelease.hpp b/inference-engine/include/details/ie_irelease.hpp
index a1b55ddde..8bbf396ba 100644
--- a/inference-engine/include/details/ie_irelease.hpp
+++ b/inference-engine/include/details/ie_irelease.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/ie_no_copy.hpp b/inference-engine/include/details/ie_no_copy.hpp
index 8d823add5..565835aee 100644
--- a/inference-engine/include/details/ie_no_copy.hpp
+++ b/inference-engine/include/details/ie_no_copy.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/ie_no_release.hpp b/inference-engine/include/details/ie_no_release.hpp
index 3afe7c52d..70334849c 100644
--- a/inference-engine/include/details/ie_no_release.hpp
+++ b/inference-engine/include/details/ie_no_release.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/ie_pre_allocator.hpp b/inference-engine/include/details/ie_pre_allocator.hpp
index b280cc138..d4801ba13 100644
--- a/inference-engine/include/details/ie_pre_allocator.hpp
+++ b/inference-engine/include/details/ie_pre_allocator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/ie_so_loader.h b/inference-engine/include/details/ie_so_loader.h
index 6b93d26bd..4a5d39f90 100644
--- a/inference-engine/include/details/ie_so_loader.h
+++ b/inference-engine/include/details/ie_so_loader.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/ie_so_pointer.hpp b/inference-engine/include/details/ie_so_pointer.hpp
index a4973ff69..a6d7372ef 100644
--- a/inference-engine/include/details/ie_so_pointer.hpp
+++ b/inference-engine/include/details/ie_so_pointer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/os/lin_shared_object_loader.h b/inference-engine/include/details/os/lin_shared_object_loader.h
index 9e883f3d7..1126e0d13 100644
--- a/inference-engine/include/details/os/lin_shared_object_loader.h
+++ b/inference-engine/include/details/os/lin_shared_object_loader.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/details/os/win_shared_object_loader.h b/inference-engine/include/details/os/win_shared_object_loader.h
index 27be89824..269cba246 100644
--- a/inference-engine/include/details/os/win_shared_object_loader.h
+++ b/inference-engine/include/details/os/win_shared_object_loader.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/gna/gna_config.hpp b/inference-engine/include/gna/gna_config.hpp
index 29b4342d4..6b9cbe813 100644
--- a/inference-engine/include/gna/gna_config.hpp
+++ b/inference-engine/include/gna/gna_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -12,7 +12,7 @@
 #pragma once
 
 #include <string>
-#include "../ie_plugin_config.hpp"
+#include "ie_plugin_config.hpp"
 
 namespace InferenceEngine {
 
@@ -27,6 +27,8 @@ namespace GNAConfigParams {
 /**
 * @brief Scale factor that is calculated by user, in order to use static quantisation feature
 * This option should be used with floating point value serialized to string with decimal separator equals to . (dot)
+* @details For multiple input case, individual scale factors can be passed, using KEY_GNA_SCALE_FACTOR[_input_layer_name]
+* where input_layer can be obtained from from CNNNetwork::GetInputsInfo
 */
 DECLARE_GNA_CONFIG_KEY(SCALE_FACTOR);
 
diff --git a/inference-engine/include/hetero/hetero_plugin_config.hpp b/inference-engine/include/hetero/hetero_plugin_config.hpp
index 4330e1ef7..4f2e1669e 100644
--- a/inference-engine/include/hetero/hetero_plugin_config.hpp
+++ b/inference-engine/include/hetero/hetero_plugin_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -12,7 +12,7 @@
 #pragma once
 
 #include <string>
-#include "../ie_plugin_config.hpp"
+#include "ie_plugin_config.hpp"
 
 namespace InferenceEngine {
 
diff --git a/inference-engine/include/ie_allocator.hpp b/inference-engine/include/ie_allocator.hpp
index b9f5f5cc9..08b68382f 100644
--- a/inference-engine/include/ie_allocator.hpp
+++ b/inference-engine/include/ie_allocator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_api.h b/inference-engine/include/ie_api.h
index 3a71e753d..76bc7e2e7 100644
--- a/inference-engine/include/ie_api.h
+++ b/inference-engine/include/ie_api.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_blob.h b/inference-engine/include/ie_blob.h
index 21267a361..c96a01be5 100644
--- a/inference-engine/include/ie_blob.h
+++ b/inference-engine/include/ie_blob.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -304,6 +304,17 @@ public:
     }
 
     /**
+     * @brief Creates a TBlob object with the specified dimensions, layout and custom memory allocator but does not allocate the memory.
+     * @param p Precision
+     * @param l Layout
+     * @param dims Tensor dimensions
+     * @param alloc Allocator to be used
+     */
+    TBlob(const TensorDesc& tensorDesc, const std::shared_ptr<IAllocator>& alloc)
+            : Blob(tensorDesc), _allocator(alloc) {
+    }
+
+    /**
      * @deprecated Please use TensorDesc for Blob initialization.
      */
     explicit TBlob(Precision p, Layout l) : Blob(p, l) {}
@@ -588,7 +599,9 @@ protected:
  */
 template<class Type>
 inline typename TBlob<Type>::Ptr make_shared_blob(Precision p, Layout l, const SizeVector &dims) {
-    IE_ASSERT(p.hasStorageType<Type>());
+    if (!p.hasStorageType<Type>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     return std::make_shared<TBlob<Type>>(p, l, dims);
 }
 
@@ -602,7 +615,9 @@ inline typename TBlob<Type>::Ptr make_shared_blob(Precision p, Layout l, const S
  */
 template<class Type>
 inline typename TBlob<Type>::Ptr make_shared_blob(Precision p, const SizeVector &dims) {
-    IE_ASSERT(p.hasStorageType<Type>());
+    if (!p.hasStorageType<Type>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     return make_shared_blob<Type>(p, TensorDesc::getLayoutByDims(dims), dims);
 }
 
@@ -616,7 +631,9 @@ inline typename TBlob<Type>::Ptr make_shared_blob(Precision p, const SizeVector
  */
 template<typename Type, class TArg>
 inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(Precision p, Layout l, const TArg &arg) {
-    IE_ASSERT(p.hasStorageType<Type>());
+    if (!p.hasStorageType<Type>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     return std::make_shared<InferenceEngine::TBlob<Type>>(p, l, arg);
 }
 
@@ -630,7 +647,9 @@ inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(Precision p,
  */
 template<typename Type, class TArg>
 inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(Precision p, const TArg &arg) {
-    IE_ASSERT(p.hasStorageType<Type>());
+    if (!p.hasStorageType<Type>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     return make_shared_blob<Type, TArg>(p, TensorDesc::getLayoutByDims(arg), arg);
 }
 
@@ -642,7 +661,9 @@ inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(Precision p,
  */
 template<typename Type>
 inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorDesc& tensorDesc) {
-    IE_ASSERT(tensorDesc.getPrecision().hasStorageType<Type>());
+    if (!tensorDesc.getPrecision().hasStorageType<Type>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     return std::make_shared<InferenceEngine::TBlob<Type>>(tensorDesc);
 }
 
@@ -656,11 +677,28 @@ inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorD
  */
 template<typename Type>
 inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorDesc& tensorDesc, Type * ptr, size_t size = 0) {
-    IE_ASSERT(tensorDesc.getPrecision().hasStorageType<Type>());
+    if (!tensorDesc.getPrecision().hasStorageType<Type>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     return std::make_shared<InferenceEngine::TBlob<Type>>(tensorDesc, ptr, size);
 }
 
 /**
+ * @brief Creates a blob with the given tensor descriptor and allocator.
+ * @tparam Type Type of the shared pointer to be created
+ * @param tensorDesc Tensor descriptor for Blob creation
+ * @param alloc Shared pointer to IAllocator to use in the blob
+ * @return A shared pointer to the newly created blob of the given type
+ */
+template<typename Type>
+inline typename InferenceEngine::TBlob<Type>::Ptr make_shared_blob(const TensorDesc& tensorDesc, const std::shared_ptr<InferenceEngine::IAllocator>& alloc) {
+    if (!tensorDesc.getPrecision().hasStorageType<Type>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
+    return std::make_shared<InferenceEngine::TBlob<Type>>(tensorDesc, alloc);
+}
+
+/**
  * @deprecated Use TensorDesc in order to create Blob::Ptr.
  * @brief Gets a shared pointer for the new TBlob instance.
  * The created instance is based on move semantics from the given TBlob instance.
@@ -693,7 +731,9 @@ inline typename InferenceEngine::TBlob<TypeTo>::Ptr make_shared_blob(const TBlob
  */
 template<typename TypeTo>
 inline typename InferenceEngine::TBlob<TypeTo>::Ptr make_shared_blob(Precision p, Layout l = NCHW) {
-    IE_ASSERT(p.hasStorageType<TypeTo>());
+    if (!p.hasStorageType<TypeTo>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     return std::make_shared<TBlob<TypeTo>>(p, l);
 }
 
@@ -709,7 +749,9 @@ inline typename InferenceEngine::TBlob<TypeTo>::Ptr make_shared_blob(Precision p
  */
 template<typename TypeTo>
 inline typename TBlob<TypeTo>::Ptr make_shared_blob(Precision p, Layout l, SizeVector dims, const std::vector<TypeTo> &arg) {
-    IE_ASSERT(p.hasStorageType<TypeTo>());
+    if (!p.hasStorageType<TypeTo>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     auto blob = std::make_shared<TBlob<TypeTo>>(p, l, dims);
     blob->set(arg);
     return blob;
@@ -726,7 +768,9 @@ inline typename TBlob<TypeTo>::Ptr make_shared_blob(Precision p, Layout l, SizeV
  */
 template<typename TypeTo>
 inline typename TBlob<TypeTo>::Ptr make_shared_blob(Precision p, Layout l, const std::vector<TypeTo> &arg) {
-    IE_ASSERT(p.hasStorageType<TypeTo>());
+    if (!p.hasStorageType<TypeTo>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     auto blob = std::make_shared<TBlob<TypeTo>>(p, l);
     blob->set(arg);
     return blob;
@@ -742,7 +786,9 @@ inline typename TBlob<TypeTo>::Ptr make_shared_blob(Precision p, Layout l, const
  */
 template<typename TypeTo>
 inline typename TBlob<TypeTo>::Ptr make_shared_blob(Precision p, const std::vector<TypeTo> &arg) {
-    IE_ASSERT(p.hasStorageType<TypeTo>());
+    if (!p.hasStorageType<TypeTo>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     return make_shared_blob<TypeTo>(p, TensorDesc::getLayoutByDims(arg), arg);
 }
 
@@ -758,7 +804,9 @@ inline typename TBlob<TypeTo>::Ptr make_shared_blob(Precision p, const std::vect
  */
 template <typename TypeTo>
 inline typename TBlob<TypeTo>::Ptr make_shared_blob(Precision p, Layout l, const SizeVector &dims, TypeTo * ptr, size_t size = 0) {
-    IE_ASSERT(p.hasStorageType<TypeTo>());
+    if (!p.hasStorageType<TypeTo>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     auto blob = std::make_shared<TBlob<TypeTo>>(p, l, dims, ptr, size);
     return blob;
 }
@@ -774,7 +822,9 @@ inline typename TBlob<TypeTo>::Ptr make_shared_blob(Precision p, Layout l, const
  */
 template <typename TypeTo>
 inline typename TBlob<TypeTo>::Ptr make_shared_blob(Precision p, const SizeVector &dims, TypeTo * ptr, size_t size = 0) {
-    IE_ASSERT(p.hasStorageType<TypeTo>());
+    if (!p.hasStorageType<TypeTo>())
+        THROW_IE_EXCEPTION << "Cannot make shared blob! "
+                           << "The blob type cannot be used to store objects of current precision";
     return make_shared_blob<TypeTo>(p, TensorDesc::getLayoutByDims(dims), dims, ptr, size);
 }
 
diff --git a/inference-engine/include/ie_builders.hpp b/inference-engine/include/ie_builders.hpp
index ad2543fa8..6ab780298 100644
--- a/inference-engine/include/ie_builders.hpp
+++ b/inference-engine/include/ie_builders.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -20,7 +20,10 @@
 #include <builders/ie_elu_layer.hpp>
 #include <builders/ie_fully_connected_layer.hpp>
 #include <builders/ie_grn_layer.hpp>
+#include <builders/ie_gru_sequence_layer.hpp>
 #include <builders/ie_input_layer.hpp>
+#include <builders/ie_lrn_layer.hpp>
+#include <builders/ie_lstm_sequence_layer.hpp>
 #include <builders/ie_memory_layer.hpp>
 #include <builders/ie_mvn_layer.hpp>
 #include <builders/ie_norm_layer.hpp>
@@ -38,7 +41,9 @@
 #include <builders/ie_relu6_layer.hpp>
 #include <builders/ie_relu_layer.hpp>
 #include <builders/ie_reorg_yolo_layer.hpp>
+#include <builders/ie_resample_layer.hpp>
 #include <builders/ie_reshape_layer.hpp>
+#include <builders/ie_rnn_sequence_layer.hpp>
 #include <builders/ie_roi_pooling_layer.hpp>
 #include <builders/ie_scale_shift_layer.hpp>
 #include <builders/ie_sigmoid_layer.hpp>
diff --git a/inference-engine/include/ie_common.h b/inference-engine/include/ie_common.h
index e08c2652f..7d75eee23 100644
--- a/inference-engine/include/ie_common.h
+++ b/inference-engine/include/ie_common.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -85,6 +85,9 @@ enum Layout : uint8_t {
     // weight layouts
     OIHW = 64,
 
+    // Scalar
+    SCALAR = 95,
+
     // bias layouts
     C = 96,
 
diff --git a/inference-engine/include/ie_context.hpp b/inference-engine/include/ie_context.hpp
index d7aca9061..22f6f9329 100644
--- a/inference-engine/include/ie_context.hpp
+++ b/inference-engine/include/ie_context.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_data.h b/inference-engine/include/ie_data.h
index 2088919d7..0ae207306 100644
--- a/inference-engine/include/ie_data.h
+++ b/inference-engine/include/ie_data.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -112,6 +112,13 @@ public:
     void setLayout(Layout layout);
 
     /**
+     * @brief changes dims and layout at same time
+     * @param dims new dimensions
+     * @param layout new layout
+     */
+    void reshape(const SizeVector &dims, Layout layout);
+
+    /**
     * @brief Gets the layout value for this Data instance
     */
     Layout getLayout() const;
diff --git a/inference-engine/include/ie_device.hpp b/inference-engine/include/ie_device.hpp
index 2cc67cc82..6dc7c4e6c 100644
--- a/inference-engine/include/ie_device.hpp
+++ b/inference-engine/include/ie_device.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -29,7 +29,8 @@ enum class TargetDevice : uint8_t {
     eMYRIAD = 5,
     eHDDL = 6,
     eGNA = 7,
-    eHETERO = 8
+    eHETERO = 8,
+    eKMB = 9,
 };
 
 /**
@@ -53,7 +54,8 @@ class TargetDeviceInfo {
             DECL_DEVICE(MYRIAD),
             DECL_DEVICE(HDDL),
             DECL_DEVICE(GNA),
-            DECL_DEVICE(HETERO)
+            DECL_DEVICE(HETERO),
+            DECL_DEVICE(KMB)
         };
 #undef DECLARE
         return g_allDeviceInfos;
@@ -69,7 +71,8 @@ class TargetDeviceInfo {
             { "HDDL", InferenceEngine::TargetDevice::eHDDL },
             { "GNA", InferenceEngine::TargetDevice::eGNA },
             { "BALANCED", InferenceEngine::TargetDevice::eBalanced },
-            { "HETERO", InferenceEngine::TargetDevice::eHETERO }
+            { "HETERO", InferenceEngine::TargetDevice::eHETERO },
+            { "KMB", InferenceEngine::TargetDevice::eKMB }
         };
         auto val = deviceFromNameMap.find(deviceName);
         return val != deviceFromNameMap.end() ? val->second : InferenceEngine::TargetDevice::eDefault;
diff --git a/inference-engine/include/ie_error.hpp b/inference-engine/include/ie_error.hpp
index a934a7856..5016a7399 100644
--- a/inference-engine/include/ie_error.hpp
+++ b/inference-engine/include/ie_error.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_extension.h b/inference-engine/include/ie_extension.h
index 926dbd6c8..534f01823 100644
--- a/inference-engine/include/ie_extension.h
+++ b/inference-engine/include/ie_extension.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_icnn_net_reader.h b/inference-engine/include/ie_icnn_net_reader.h
index 820c2b442..ce791ed88 100644
--- a/inference-engine/include/ie_icnn_net_reader.h
+++ b/inference-engine/include/ie_icnn_net_reader.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_icnn_network.hpp b/inference-engine/include/ie_icnn_network.hpp
index 07b244412..cf6869b6a 100644
--- a/inference-engine/include/ie_icnn_network.hpp
+++ b/inference-engine/include/ie_icnn_network.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -34,6 +34,8 @@ using OutputsDataMap = std::map<std::string, DataPtr>;
  */
 class ICNNNetwork : public details::IRelease {
 public:
+    using Ptr = std::shared_ptr<ICNNNetwork>;
+
     /**
      * @brief Returns the main network operating precision.
      * This may be MIXED if not homogeneous.
diff --git a/inference-engine/include/ie_icnn_network_stats.hpp b/inference-engine/include/ie_icnn_network_stats.hpp
index 440c20267..2547fb6ba 100644
--- a/inference-engine/include/ie_icnn_network_stats.hpp
+++ b/inference-engine/include/ie_icnn_network_stats.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_iexecutable_network.hpp b/inference-engine/include/ie_iexecutable_network.hpp
index 0b0a915d4..f3f422191 100644
--- a/inference-engine/include/ie_iexecutable_network.hpp
+++ b/inference-engine/include/ie_iexecutable_network.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -11,6 +11,7 @@
 #include "ie_common.h"
 #include "ie_primitive_info.hpp"
 #include "ie_iinfer_request.hpp"
+#include "ie_icnn_network.hpp"
 #include "ie_imemory_state.hpp"
 #include "ie_input_info.hpp"
 #include <string>
@@ -73,7 +74,7 @@ public:
     virtual StatusCode Export(const std::string& modelFileName, ResponseDesc *resp) noexcept = 0;
 
     /**
-    * @brief Gets the mapping of IR layer names to implemented kernels
+    * @brief Get the mapping of IR layer names to implemented kernels
     * @param deployedTopology Map of PrimitiveInfo objects that represent the deployed topology
     * @param resp Optional: pointer to an already allocated object to contain information in case of failure
     * @return Status code of the operation: OK (0) for success
@@ -81,6 +82,14 @@ public:
     virtual StatusCode GetMappedTopology(std::map<std::string, std::vector<PrimitiveInfo::Ptr>> &deployedTopology, ResponseDesc *resp) noexcept = 0;
 
     /**
+    * @brief Get executable graph information from a device
+    * @param graphPtr network ptr to store executable graph information
+    * @param resp Optional: pointer to an already allocated object to contain information in case of failure
+    * @return Status code of the operation: OK (0) for success
+    */
+    virtual StatusCode GetExecGraphInfo(ICNNNetwork::Ptr &graphPtr, ResponseDesc *resp) noexcept = 0;
+
+    /**
      * @brief Gets state control interface for given executable network, State control essential for recurrent networks
      * @param pState reference to a pointer that receives internal states
      * @param idx requested index for receiving memory state
diff --git a/inference-engine/include/ie_iextension.h b/inference-engine/include/ie_iextension.h
index c0ea3f82d..7d529b4b5 100644
--- a/inference-engine/include/ie_iextension.h
+++ b/inference-engine/include/ie_iextension.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -161,11 +161,21 @@ public:
     /**
      * @brief check that reshape can be applied, that parameters and shapes are valid
      */
-    virtual StatusCode inferShapes(const std::vector<SizeVector>& inShapes,
-                                   const std::map<std::string, std::string>& params,
-                                   const std::map<std::string, Blob::Ptr>& blobs,
-                                   std::vector<SizeVector>& outShapes,
-                                   ResponseDesc* resp) noexcept = 0;
+    virtual StatusCode inferShapes(const std::vector<Blob::CPtr>& /*inBlobs*/,
+                                   const std::map<std::string, std::string>& /*params*/,
+                                   const std::map<std::string, Blob::Ptr>& /*blobs*/,
+                                   std::vector<SizeVector>& /*outShapes*/,
+                                   ResponseDesc* /*resp*/) noexcept { return NOT_IMPLEMENTED; }  // For backward-compatibility
+
+    /**
+     * @deprecated
+     * @brief check that reshape can be applied, that parameters and shapes are valid
+     */
+    virtual StatusCode inferShapes(const std::vector<SizeVector>& /*inShapes*/,
+                                   const std::map<std::string, std::string>& /*params*/,
+                                   const std::map<std::string, Blob::Ptr>& /*blobs*/,
+                                   std::vector<SizeVector>& /*outShapes*/,
+                                   ResponseDesc* /*resp*/) noexcept { return NOT_IMPLEMENTED; }  // For backward-compatibility
 };
 
 /**
diff --git a/inference-engine/include/ie_ihetero_plugin.hpp b/inference-engine/include/ie_ihetero_plugin.hpp
index 326c35003..f9f1f23a8 100644
--- a/inference-engine/include/ie_ihetero_plugin.hpp
+++ b/inference-engine/include/ie_ihetero_plugin.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_iinfer_request.hpp b/inference-engine/include/ie_iinfer_request.hpp
index fe09be70c..d922f5b49 100644
--- a/inference-engine/include/ie_iinfer_request.hpp
+++ b/inference-engine/include/ie_iinfer_request.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_imemory_state.hpp b/inference-engine/include/ie_imemory_state.hpp
index 2c007dfb0..4240025c7 100644
--- a/inference-engine/include/ie_imemory_state.hpp
+++ b/inference-engine/include/ie_imemory_state.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_input_info.hpp b/inference-engine/include/ie_input_info.hpp
index 17f6a67f8..590b4918d 100644
--- a/inference-engine/include/ie_input_info.hpp
+++ b/inference-engine/include/ie_input_info.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_layers.h b/inference-engine/include/ie_layers.h
index 4582842b2..3e1b9bb80 100644
--- a/inference-engine/include/ie_layers.h
+++ b/inference-engine/include/ie_layers.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -373,7 +373,7 @@ public:
      * @param def Default value of the parameter if not found
      * @return An bool value for the specified parameter
      */
-    bool GetParamsAsBool(const char *param, bool def) const {
+    bool GetParamAsBool(const char *param, bool def) const {
         std::string val = GetParamAsString(param, std::to_string(def).c_str());
         std::string loweredCaseValue;
         std::transform(val.begin(), val.end(), std::back_inserter(loweredCaseValue), [](char value) {
@@ -384,11 +384,17 @@ public:
 
         if (!(std::istringstream(loweredCaseValue) >> std::boolalpha >> result)) {
             // attempting parse using non alpha bool
-            return static_cast<bool>(GetParamAsInt(param, def));
+            return (GetParamAsInt(param, def) != 0);
         }
 
         return result;
     }
+    /**
+     * @deprecated Use GetParamAsBool function for that functionality
+     */
+    bool GetParamsAsBool(const char *param, bool def) const {
+        return GetParamAsBool(param, def);
+    }
 
     /**
      * @brief Returns a string value for the given parameter or returns the default one
@@ -398,13 +404,26 @@ public:
      */
     std::string GetParamAsString(const char *param, const char *def) const {
         auto it = params.find(param);
-        if (it == params.end()) {
+        if (it == params.end() || it->second.empty()) {
             return def;
         }
         return (*it).second;
     }
 
     /**
+    * @brief Checks the param presence in the layer
+    * @param param Name of the layer parameter
+    * @return a bool depending param presence
+    */
+    bool CheckParamPresence(const char *param) const {
+        auto it = params.find(param);
+        if (it == params.end()) {
+            return false;
+        }
+        return true;
+    }
+
+    /**
      * @brief Returns a string value for the given parameter.
      * Throws exception if parameter was not found.
      * @param param Name of the layer parameter
@@ -418,10 +437,28 @@ public:
         return (*it).second;
     }
 
+    std::vector<std::string> GetParamAsStrings(const char *param, std::vector<std::string> def) const {
+        std::string vals = GetParamAsString(param, "");
+        std::vector<std::string> result;
+        std::istringstream stream(vals);
+        std::string str;
+        if (vals.empty())
+            return def;
+        while (getline(stream, str, ',')) {
+            try {
+                result.push_back(str);
+            } catch (...) {
+                THROW_IE_EXCEPTION << "Cannot parse parameter " << param << " from IR for layer " << name << ".";
+            }
+        }
+        return result;
+    }
+
     /**
      * @brief Map of pairs: (parameter name, parameter value)
      */
     std::map<std::string, std::string> params;
+
     /**
      * @brief Map of pairs: (name, weights/biases blob)
      */
@@ -638,6 +675,107 @@ public:
     PoolingLayer(PoolingLayer &&) = default;
 };
 
+/**
+ * @brief This class represents a standard binary convolution layer
+ */
+class BinaryConvolutionLayer : public WeightableLayer {
+public:
+    /**
+    * @enum eBinaryConvolutionMode
+    * @brief Defines possible modes of binary convolution operation
+    */
+    enum eBinaryConvolutionMode {
+        xnor_popcount = 0
+    };
+
+    /**
+    * @brief Mode of binary convolution operation
+    */
+    eBinaryConvolutionMode _mode = xnor_popcount;
+
+    /**
+     * @brief A number of input feature maps (size) generating the 3'rd input dimension
+     */
+    unsigned int _in_depth = 0u;
+
+    /**
+    * @brief A pad value which is used to fill pad area
+    */
+    float _pad_value = -1.0f;
+
+    /**
+     * @brief A convolution kernel array [X, Y, Z, ...]
+     */
+    DEFINE_PROP(_kernel);
+    /**
+     * @brief A convolution paddings begin array [X, Y, Z, ...]
+     */
+    DEFINE_PROP(_padding);
+    /**
+     * @brief A convolution paddings end array [X, Y, Z, ...]
+     */
+    PropertyVector<unsigned int> _pads_end;
+    /**
+     * @brief A convolution strides array [X, Y, Z, ...]
+     */
+    DEFINE_PROP(_stride);
+    /**
+     * @brief A convolution dilations array [X, Y, Z, ...]
+     */
+    DEFINE_PROP(_dilation);
+    /**
+     * @brief A number of output feature maps (size) generating the 3'rd output dimension
+     */
+    unsigned int _out_depth = 0u;
+    /**
+     * @brief Number of groups
+     */
+    unsigned int _group = 1u;
+    /**
+     * @brief Auto padding type
+     */
+    std::string _auto_pad;
+
+    /**
+     * @brief Creates a new BinaryConvolutionLayer instance.
+     */
+    explicit BinaryConvolutionLayer(const LayerParams &p) : WeightableLayer(p),
+            _kernel(2, 0u), _padding(2, 0u), _stride(2, 1u), _dilation(2, 1u) {}
+    /**
+     * @brief assignment operator
+     */
+    BinaryConvolutionLayer & operator = (const BinaryConvolutionLayer & that) {
+        if (&that != this) {
+            WeightableLayer::operator=(that);
+            _kernel = that._kernel;
+            _padding = that._padding;
+            _pads_end = that._pads_end;
+            _stride = that._stride;
+            _dilation = that._dilation;
+            _out_depth = that._out_depth;
+            _group = that._group;
+            _mode = that._mode;
+            _in_depth = that._in_depth;
+            _pad_value = that._pad_value;
+        }
+        return *this;
+    }
+    /**
+     * @brief move assignment operator
+     */
+    BinaryConvolutionLayer& operator = (BinaryConvolutionLayer &&) = default;
+    /**
+     * @brief copy constructor
+     */
+    BinaryConvolutionLayer(const BinaryConvolutionLayer & that) : WeightableLayer(that) {
+        operator = (that);
+    }
+    /**
+     * @brief move constructor
+     */
+    BinaryConvolutionLayer(BinaryConvolutionLayer &&) = default;
+};
+
 #undef DEFINE_PROP
 
 /**
@@ -816,6 +954,21 @@ public:
     using CNNLayer::CNNLayer;
 };
 
+
+/**
+ * @brief This class represents a ReLU6 activation layer
+ * Clamps all tensor elements into the range [0, 6.0]
+ */
+class ReLU6Layer : public ClampLayer {
+public:
+    explicit ReLU6Layer(const LayerParams &prms) : ClampLayer(prms) {
+        max_value = 6.0f;
+    }
+
+    using ClampLayer::ClampLayer;
+};
+
+
 /**
  * @brief This class represents an element wise operation layer
  */
@@ -826,7 +979,9 @@ public:
      * @brief Defines possible operations that can be used
      */
     enum eOperation {
-        Sum = 0, Prod, Max
+        Sum = 0, Prod, Max, Sub, Min, Div, Squared_diff, Floor_mod, Pow,
+        Equal, Not_equal, Less, Less_equal, Greater, Greater_equal,
+        Logical_AND, Logical_OR, Logical_XOR
     };
 
     /**
@@ -963,9 +1118,219 @@ public:
 };
 
 /**
-* @class PReLULayer
-* @brief This class represents a Layer which performs Scale and Shift
-*/
+ * @brief Base class for recurrent cell layers
+ */
+class RNNCellBase : public WeightableLayer {
+public:
+    using WeightableLayer::WeightableLayer;
+
+    /**
+     * @brief Direct type of recurrent cell (including subtypes)
+     * Description of particular cell semantics is in LSTMCell, GRUCell, RNNCell.
+     */
+    enum CellType {
+        LSTM,      /**< Original LSTM cell */
+        GRU,       /**< Original GRU cell */
+        RNN,       /**< Original RNN cell */
+        GRU_LBR,   /**< GRU cell modification. "Linear before reset" */
+    };
+
+    /** @copybrief CellType */
+    CellType cellType = LSTM;
+
+    /**
+     * @brief Size of hidden state data
+     *
+     * In case of batch output state tensor will have shape [N, hidden_size]
+     */
+    int hidden_size = 0;
+
+    /**
+     * @brief Clip data into range [-clip, clip] on input of activations
+     *
+     * clip==0.0f means no clipping
+     */
+    float clip = 0.0f;
+    /**
+     * @brief Activations used inside recurrent cell
+     *
+     * Valid values: sigmoid, tanh, relu
+     */
+    std::vector<std::string> activations;
+
+    /**
+     * @brief Alpha parameters of activations
+     *
+     * Respective to activation list.
+     */
+    std::vector<float> activation_alpha;
+
+    /**
+     * @brief Beta parameters of activations
+     *
+     * Respective to activation list.
+     */
+    std::vector<float> activation_beta;
+};
+
+/**
+ * @brief LSTM Cell layer
+ *
+ * G - number of gates (=4)
+ * N - batch size
+ * S - state size (=hidden_size)
+ *
+ * Inputs:
+ *   [N,D] Xt - input data
+ *   [N,S] Ht-1 - initial hidden state
+ *   [N,S] Ct-1 - initial cell state
+ *
+ * Outputs:
+ *   [N,S] Ht - out hidden state
+ *   [N,S] Ct - out cell state
+ *
+ * Weights:
+ *   - weights [G,S,D+S]
+ *   - biases [G,S]
+ * NB!  gates order is FICO {forget, input, candidate, output}
+ *
+ * activations is {_f, _g, _h}
+ * default: {_f=sigm, _g=tanh, _h=tanh}
+ *
+ * Equations:
+ *
+ *   *  - matrix mult
+ *  (.) - eltwise mult
+ *  [,] - concatenation
+ *
+ * - ft = _f(Wf*[Ht-1, Xt] + Bf)
+ * - it = _f(Wi*[Ht-1, Xt] + Bi)
+ * - ct = _g(Wc*[Ht-1, Xt] + Bc)
+ * - ot = _f(Wo*[Ht-1, Xt] + Bo)
+ * - Ct = ft (.) Ct-1 + it (.) ct
+ * - Ht = ot (.) _h(Ct)
+ */
+using LSTMCell = RNNCellBase;
+
+/**
+ * @brief GRU Cell layer
+ *
+ * G - number of gates (=3)
+ * N - batch size
+ * S - state size (=hidden_size)
+ *
+ * Inputs:
+ *   [N,D] Xt - input data
+ *   [N,S] Ht-1 - initial hidden state
+ *
+ * Outputs:
+ *   [N,S] Ht - out hidden state
+ *
+ * Weights:
+ *   - weights [G,S,D+S]
+ *   - biases [G,S]
+ * NB!  gates order is ZRH {update, reset, output}
+ *
+ * activations is {_f, _g}
+ * default: {_f=sigm, _g=tanh}
+ *
+ * Equations:
+ *
+ *   *  - matrix mult
+ *  (.) - eltwise mult
+ *  [,] - concatenation
+ *
+ * - zt = _f(Wz*[Ht-1, Xt] + Bz)
+ * - rt = _f(Wr*[Ht-1, Xt] + Br)
+ * - ht = _g(Wh*[rt (.) Ht-1, Xt] + Bh)
+ * - Ht = (1 - zt) (.) ht + zt (.) Ht-1
+ */
+using GRUCell  = RNNCellBase;
+
+/**
+ * @brief RNN Cell layer
+ *
+ * G - number of gates (=1)
+ * N - batch size
+ * S - state size (=hidden_size)
+ *
+ * Inputs:
+ *   [N,D] Xt - input data
+ *   [N,S] Ht-1 - initial hidden state
+ *
+ * Outputs:
+ *   [N,S] Ht - out hidden state
+ *
+ * Weights:
+ *   - weights [G,S,D+S]
+ *   - biases [G,S]
+ *
+ * activations is {_f}
+ * default: {_f=tanh}
+ *
+ * Equations:
+ *
+ *   *  - matrix mult
+ *  [,] - concatenation
+ *
+ * - Ht = _f(Wi*[Ht-1, Xt] + Bi)
+ */
+using RNNCell  = RNNCellBase;
+
+/**
+ * @brief Sequence of recurrent cells
+ *
+ * N  - batch size
+ * T  - sequence size
+ * S  - state size (=hidden_size)
+ * NS - num of state tensors (LSTM=2, GRU/RNN=1)
+ * ND - num of direction (BDR=2, WFD/BWD=1)
+ *
+ * Inputs:
+ *   [N,T,D]  Xt - input data
+ *   [ND,N,S] Ht-1 - initial hidden state
+ *   [ND,N,S] Ct-1 - initial cell state  // if NS==2
+ *
+ * Outputs:
+ *   [ND,N,T,S] Xt - input data
+ *   [ND,N,S] Ht-1 - initial hidden state
+ *   [ND,N,S] Ct-1 - initial cell state  // if NS==2
+ *
+ * NB! if axis==0 batch and sequense dimensions are swapped (N <-> T) for input and output tensors
+ *
+ * Weights:
+ *   - weights [ND,G,S,D+S]
+ *   - biases [ND,G,S]
+ * NB! if ND==2 weights are concatenated cell weights [forward_cell_weights, backward_cell_weights]
+ *
+ */
+class RNNSequenceLayer : public RNNCellBase {
+public:
+    using RNNCellBase::RNNCellBase;
+
+    /**
+     * @brief An axis by which iteration is performed
+     * axis=0 means first input/output data blob dimension is sequence
+     * axis=1 means first input/output data blob dimension is batch
+     */
+    unsigned int axis = 1;
+
+    /**
+     * @brief Direction of iteration through sequence dimension
+     */
+    enum Direction {
+        FWD,  /**< Forward mode. Iterate starts from index 0 with step 1.         */
+        BWD,  /**< Backward mode. Iterate starts from last index with step -1.    */
+        BDR   /**< Bidirectional mode. First is forward pass, second is backward. */
+    };
+
+    /** @copybrief Direction */
+    Direction direction = FWD;
+};
+
+/**
+ * @brief This class represents a Layer which performs Scale and Shift
+ */
 class PReLULayer : public WeightableLayer {
 public:
     /**
@@ -975,9 +1340,9 @@ public:
 
 public:
     /**
-    * @brief A default constructor. Creates a new PReLULayer instance and initializes layer parameters with the given values.
-    * @param prms Initial layer parameters
-    */
+     * @brief A default constructor. Creates a new PReLULayer instance and initializes layer parameters with the given values.
+     * @param prms Initial layer parameters
+     */
     explicit PReLULayer(const LayerParams &prms) : WeightableLayer(prms), _channel_shared(false) {}
 };
 
@@ -1101,4 +1466,205 @@ public:
     */
     using CNNLayer::CNNLayer;
 };
+
+/**
+ * @brief This class represents a standard Strided Slice layer
+ * Strided Slice picks from input tensor according parameters
+ */
+class StridedSliceLayer : public CNNLayer {
+public:
+    /**
+    * @brief The begin_mask is a bitmask where bit i being 0 means
+    * to ignore the begin value and instead use the default value
+    */
+    std::string begin_mask;
+    /**
+    * @brief Analogous to begin_mask
+    */
+    std::string end_mask;
+    /**
+    * @brief The ellipsis_mask is a bitmask where bit i being 1 means
+    * the i-th is actually an ellipsis
+    */
+    std::string ellipsis_mask;
+    /**
+    * @brief The new_axis_mask_ is a bitmask where bit i being 1 means
+    * the i-th position creates a new 1 dimension shape
+    */
+    std::string new_axis_mask;
+    /**
+    * @brief The shrink_axis_mask is a bitmask where bit i being 1 means
+    * the i-th position shrinks the dimensionality
+    */
+    std::string shrink_axis_mask;
+
+    /**
+    * @brief Creates a new StridedSliceLayer instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+/**
+* @brief This class represents a standard Shuffle Channels layer
+* Shuffle Channels picks from input tensor according parameters
+*/
+class ShuffleChannelsLayer : public CNNLayer {
+public:
+    /**
+    * @brief The axis in tensor to shuffle channels
+    */
+    int axis = 1;
+
+    /**
+    * @brief The group of output shuffled channels
+    */
+    unsigned int group = 1;
+
+    /**
+    * @brief Creates a new ShuffleChannelsLayer instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
+* @brief This class represents a standard Depth To Space layer
+* Depth To Space picks from input tensor according parameters
+*/
+class DepthToSpaceLayer : public CNNLayer {
+public:
+    /**
+    * @brief The group of output shuffled channels
+    */
+    unsigned int block_size = 1;
+
+    /**
+    * @brief Creates a new DepthToSpaceLayer instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
+* @brief This class represents a standard Space To Depth layer
+* Depth To Space picks from input tensor according parameters
+*/
+class SpaceToDepthLayer : public CNNLayer {
+public:
+    /**
+    * @brief The group of output Space To Depth
+    */
+    unsigned int block_size = 1;
+
+    /**
+    * @brief Creates a new SpaceToDepthLayer instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
+* @brief This class represents a standard Reverse Sequence layer
+* Reverse Sequence modifies input tensor according parameters
+*/
+class ReverseSequenceLayer : public CNNLayer {
+public:
+    /**
+    * @brief The seq_axis dimension in tensor which is partially reversed
+    */
+    int seq_axis = 1;
+
+    /**
+    * @brief The batch_axis dimension in tensor along which reversal is performed
+    */
+    int batch_axis = 0;
+
+    /**
+    * @brief Creates a new ReverseSequence instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
+* @brief This class represents a standard Squeeze layer
+* Squeeze modifies input tensor dimensions according parameters
+*/
+class SqueezeLayer : public CNNLayer {
+public:
+    /**
+    * @brief Creates a new Squeeze instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
+* @brief This class represents a standard Unsqueeze layer
+* Unsqueeze modifies input tensor dimensions according parameters
+*/
+class UnsqueezeLayer : public CNNLayer {
+public:
+    /**
+    * @brief Creates a new Unsqueeze instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
+* @brief This class represents a standard RangeLayer layer
+* RangeLayer modifies input tensor dimensions according parameters
+*/
+class RangeLayer : public CNNLayer {
+public:
+    /**
+    * @brief Creates a new RangeLayer instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
+* @brief This class represents a standard Fill layer
+* RFill modifies input tensor according parameters
+*/
+class FillLayer : public CNNLayer {
+public:
+    /**
+    * @brief Creates a new Fill instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+
+/**
+* @brief This class represents a standard Expand layer
+* Expand modifies input tensor dimensions according parameters
+*/
+class ExpandLayer : public CNNLayer {
+public:
+    /**
+    * @brief Creates a new Expand instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
+/**
+ * @brief This class represents a quantization operation layer
+ * Element-wise linear quantization of floating point input values into a descrete set of floating point values
+ */
+class QuantizeLayer : public CNNLayer {
+public:
+    /**
+    * @brief The number of quantization levels
+    */
+    int levels = 1;
+
+    /**
+    * @brief Creates a new QuantizeLayer instance.
+    */
+    using CNNLayer::CNNLayer;
+};
+
 }  // namespace InferenceEngine
diff --git a/inference-engine/include/ie_layers_property.hpp b/inference-engine/include/ie_layers_property.hpp
index 52d434c47..eeac6b6ec 100644
--- a/inference-engine/include/ie_layers_property.hpp
+++ b/inference-engine/include/ie_layers_property.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -47,6 +47,13 @@ public:
         }
     }
 
+    PropertyVector(std::initializer_list<int> init_list) {
+        size_t i = 0;
+        for (const auto val : init_list) {
+            insert(i++, val);
+        }
+    }
+
     /**
      * @brief allows access up-to capacity size
      * @param index
diff --git a/inference-engine/include/ie_layouts.h b/inference-engine/include/ie_layouts.h
index f4c0e4dd6..740da27b2 100644
--- a/inference-engine/include/ie_layouts.h
+++ b/inference-engine/include/ie_layouts.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -219,6 +219,9 @@ public:
     void setLayout(Layout l) {
         bool inconsistentLayout = true;
         switch (l) {
+            case Layout::SCALAR:
+                inconsistentLayout = !dims.empty();
+                break;
             case Layout::C:
                 inconsistentLayout = dims.size() != 1;
                 break;
@@ -246,7 +249,7 @@ public:
                 break;
         }
         if (inconsistentLayout)
-            THROW_IE_EXCEPTION << "Dims(" << std::to_string(dims.size()) << ") and format(" << std::to_string(l) << ") are inconsistent.";
+            THROW_IE_EXCEPTION << "Size of dims(" << std::to_string(dims.size()) << ") and format(" << l << ") are inconsistent.";
         layout = l;
     }
 
diff --git a/inference-engine/include/ie_locked_memory.hpp b/inference-engine/include/ie_locked_memory.hpp
index 59e81f079..d0ddb9bbe 100644
--- a/inference-engine/include/ie_locked_memory.hpp
+++ b/inference-engine/include/ie_locked_memory.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_inetwork.hpp b/inference-engine/include/ie_network.hpp
index 41c02f00c..b33e7793e 100644
--- a/inference-engine/include/ie_inetwork.hpp
+++ b/inference-engine/include/ie_network.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -134,77 +134,152 @@ private:
 };
 
 /**
+ * This class describes port data
+ */
+class INFERENCE_ENGINE_API_CLASS(PortData) {
+public:
+    /**
+     * @brief A shared pointer to the PortData object.
+     */
+    using Ptr = std::shared_ptr<PortData>;
+
+    /**
+     * @brief Default constructor
+     */
+    PortData();
+
+    /**
+     * Creates port data with precision and shape
+     * @param shape Dimensions
+     * @param precision Precision
+     */
+    PortData(const SizeVector& shape, const Precision& precision);
+
+    /**
+     * @brief virtual destructor
+     */
+    virtual ~PortData() = default;
+
+    /**
+     * @brief Returns data
+     * @return Blob with data
+     */
+    const Blob::Ptr& getData() const;
+
+    /**
+     * @brief Sets data
+     * @param data Blob with data
+     */
+    void setData(const Blob::Ptr& data);
+
+    /**
+     * @brief Returns data parameters
+     * @return Map of parameters
+     */
+    const std::map<std::string, Parameter>& getParameters() const noexcept;
+
+    /**
+     * @brief Sets new shapes for data
+     * @param shape New shapes
+     */
+    void setShape(const SizeVector& shape);
+
+private:
+    Blob::Ptr data;
+    std::map<std::string, Parameter> parameters;
+
+    void createData(const TensorDesc& desc);
+};
+
+/**
  * @brief This class is the main object to describe the Inference Engine port.
  */
-class Port {
+class INFERENCE_ENGINE_API_CLASS(Port) {
 public:
     /**
      * @brief Default constructor of a port object.
      */
-    Port() = default;
+    Port();
     /**
      * @brief Constructor of a port object with shapes.
      * @param shapes port shapes
+     * @param precision Port precision
      */
-    explicit Port(const SizeVector& shapes): pShapes(shapes) {}
+    explicit Port(const SizeVector& shapes,
+                  const Precision& precision = Precision::UNSPECIFIED);
 
     /**
      * @brief Copy constructor.
      * @param port object to copy
      */
-    Port(const Port& port) {
-        this->pShapes = port.pShapes;
-    }
+    Port(const Port& port);
+
+    /**
+     * @brief Virtual destructor
+     */
+    virtual ~Port() = default;
+
+    /**
+     * @brief Compares the given Port with the current one
+     * @param rhs Port to compare with
+     * @return true if the given Port is equal to the current one, false - otherwise
+     */
+    bool operator== (const Port& rhs) const;
+
+    /**
+     * @brief Compares the given Port with the current one
+     * @param rhs Port to compare with
+     * @return true if the given Port is NOT equal to the current one, false - otherwise
+     */
+    bool operator!= (const Port& rhs) const;
 
     /**
      * @brief Returns a constant reference to a vector with shapes.
      * Shapes should be initialized if shape is empty.
      * @return constant reference to shapes
      */
-    const SizeVector& shape() const noexcept {
-        return pShapes;
-    }
+    const SizeVector& shape() const noexcept;
 
     /**
-     * @brief Returns a reference to a vector with shapes.
-     * Shapes should be initialized if shape is empty.
-     * @return reference to shapes
+     * @brief Sets new shapes for current port
+     * @param shape New shapes
      */
-    SizeVector& shape() noexcept {
-        return pShapes;
-    }
+    void setShape(const SizeVector& shape);
 
-private:
-    SizeVector pShapes;
-};
+    /**
+     * @brief Returns a constant reference to parameters
+     * @return Map with parameters
+     */
+    const std::map<std::string, Parameter>& getParameters() const noexcept;
 
-/**
- * @brief This class is the main interface to describe the Inference Engine layer parameters.
- * All methods here are constant and do not throw exceptions.
- */
-class IParameters {
-public:
     /**
-     * @brief A shared pointer to the IParameters object.
+     * @brief Sets new parameters for current port
+     * @param params New parameters
      */
-    using Ptr = std::shared_ptr<IParameters>;
+    void setParameters(const std::map<std::string, Parameter>& params) noexcept;
 
     /**
-     * @brief Virtual destructor for the parameters interface
+     * @brief Sets the new parameter for current port
+     * @param name Name of parameter
+     * @param param New value
      */
-    virtual ~IParameters() = default;
+    void setParameter(const std::string& name, const Parameter& param);
 
     /**
-     * @brief Returns a constant reference to a map with parameters.
-     * @return Map of parameters
+     * @brief Returns port data
+     * @return Port data
      */
-    virtual const std::map<std::string, Parameter>& getParameters() const noexcept = 0;
+    const PortData::Ptr& getData() const noexcept;
 
     /**
-     * @brief Returns a constant reference to a constant pointers to constant data.
-     * @return Map of constant pointers to constant data
+     * @brief Sets new port data for current port
+     * @param data Port data
      */
-    virtual const std::map<std::string, Blob::CPtr>& getConstantData() const noexcept = 0;
+    void setData(const PortData::Ptr& data);
+
+private:
+    std::map<std::string, Parameter> parameters;
+    PortData::Ptr data;
 };
 
 class INetwork;
@@ -218,10 +293,6 @@ class INetwotkIterator;
 class ILayer {
 public:
     /**
-     * @brief A shared pointer to the ILayer object
-     */
-    using Ptr = std::shared_ptr<ILayer>;
-    /**
      * @brief A shared pointer to the const ILayer object
      */
     using CPtr = std::shared_ptr<const ILayer>;
@@ -250,16 +321,10 @@ public:
     virtual const std::string& getType() const noexcept = 0;
 
     /**
-     * @brief Returns a constant smart pointer reference to a Network interface.
-     * @return Network interface smart pointer
-     */
-     virtual const std::shared_ptr<INetwork>& getGraph() const noexcept = 0;
-
-    /**
      * @brief Returns a constant smart pointer reference to a Parameters interface.
      * @return Parameters interface smart pointer
      */
-    virtual const IParameters::Ptr& getParameters() const noexcept = 0;
+    virtual const std::map<std::string, Parameter>& getParameters() const noexcept = 0;
 
     /**
      * @brief Returns a constant reference to a vector with input ports.
@@ -289,11 +354,11 @@ class INetworkIterator;
 class INetwork {
 public:
     /**
-     * @brief A shared pointer to the INetwork object.
+     * @brief A shared pointer to the constant INetwork object.
      */
-    using Ptr = std::shared_ptr<INetwork>;
+    using CPtr = std::shared_ptr<const INetwork>;
     /**
-     * @brief A constant iterator for INetwork objects definition
+     * @brief A constant iterator for INetwork definition
      */
     using const_iterator = details::INetworkIterator<const INetwork, const ILayer>;
 
@@ -326,19 +391,19 @@ public:
      * @param id Id of the Layer
      * @return Layer interface smart pointer
      */
-    virtual const ILayer::Ptr getLayer(idx_t id) const noexcept = 0;
+    virtual const ILayer::CPtr getLayer(idx_t id) const noexcept = 0;
 
     /**
      * @brief Returns a constant vector of input layers.
      * @return Vector of input layers
      */
-    virtual const std::vector<ILayer::Ptr> getInputs() const noexcept = 0;
+    virtual const std::vector<ILayer::CPtr> getInputs() const noexcept = 0;
 
     /**
      * @brief Returns a constant vector of output layers.
      * @return Vector of output layers
      */
-    virtual const std::vector<ILayer::Ptr> getOutputs() const noexcept = 0;
+    virtual const std::vector<ILayer::CPtr> getOutputs() const noexcept = 0;
 
     /**
      * @brief Returns a constant vector of connections for specific layer.
diff --git a/inference-engine/include/ie_parallel.hpp b/inference-engine/include/ie_parallel.hpp
index 4dbd3f4af..af72214ca 100644
--- a/inference-engine/include/ie_parallel.hpp
+++ b/inference-engine/include/ie_parallel.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -149,8 +149,8 @@ R parallel_sum(const T0 D0, R &input, F func) {
 #if IE_THREAD == IE_THREAD_OMP
     #pragma omp parallel for reduction(+ : sum) schedule(static)
 #endif
-    for (T0_IT dim1 = 0; dim1 < D0; dim1++) {
-        sum += func(dim1);
+    for (T0_IT dim1 = 0; dim1 < static_cast<T0_IT>(D0); dim1++) {
+        sum += static_cast<R>(func(dim1));
     }
     return sum;
 #endif
@@ -230,9 +230,9 @@ R parallel_sum3d(const T0 D0, const T1 D1, const T2 D2, R input, F func) {
 #if IE_THREAD == IE_THREAD_OMP
     #pragma omp parallel for collapse(3) reduction(+ : sum) schedule(static)
 #endif
-    for (T0_IT dim1 = 0; dim1 < D0; dim1++) {
-        for (T1_IT dim2 = 0; dim2 < D1; dim2++) {
-            for (T2_IT dim3 = 0; dim3 < D2; dim3++) {
+    for (T0_IT dim1 = 0; dim1 < static_cast<T0_IT>(D0); dim1++) {
+        for (T1_IT dim2 = 0; dim2 < static_cast<T1_IT>(D1); dim2++) {
+            for (T2_IT dim3 = 0; dim3 < static_cast<T2_IT>(D2); dim3++) {
                 sum += func(dim1, dim2, dim3);
             }
         }
diff --git a/inference-engine/include/ie_parameter.hpp b/inference-engine/include/ie_parameter.hpp
index 59526ad6a..e30d83dca 100644
--- a/inference-engine/include/ie_parameter.hpp
+++ b/inference-engine/include/ie_parameter.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -10,10 +10,13 @@
 
 #include <details/ie_exception.hpp>
 #include <algorithm>
+#include <typeinfo>
 #include <iterator>
+#include <utility>
 #include <vector>
 #include <cctype>
 #include <string>
+#include <tuple>
 #include <map>
 
 namespace InferenceEngine {
@@ -29,337 +32,245 @@ public:
     Parameter() = default;
 
     /**
-     * @brief The constructor creates a Parameter object with string value
-     * @param value string value
+     * @brief Move constructor
+     * @param parameter Parameter object
      */
-    Parameter(const std::string& value): initialized(true), value(value) {}         // NOLINT
+    Parameter(Parameter &&parameter) noexcept: ptr(std::move(parameter.ptr)) {}
 
     /**
-     * @brief The constructor creates a Parameter object with template value
-     * @param value template value
+     * @brief Copy constructor
+     * @param parameter Parameter object
      */
-    template <class T>
-    Parameter(const T& value): initialized(true), value(std::to_string(value)) {}   // NOLINT
+    Parameter(const Parameter &parameter) {
+        *this = parameter;
+    }
 
     /**
-     * @brief The constructor creates a Parameter object with a vector of template values
-     * @param values vector of template values
+     * @brief Constructor creates parameter with object
+     * @tparam T Parameter type
+     * @tparam U Identity type-transformation
+     * @param parameter object
      */
-    template <class T>
-    Parameter(const std::vector<T>& values): initialized(true) {                    // NOLINT
-        for (const auto& val : values) {
-            if (!value.empty())
-                value += ",";
-            value += std::to_string(val);
-        }
+    template<class T>
+    Parameter(T&& parameter) {                                      // NOLINT
+        ptr = new RealData<typename std::decay<T>::type>(std::forward<T>(parameter));
     }
 
     /**
-     * @brief The cast to string object
-     * Throws exception if parameter was not found.
-     * @return string value
+     * @brief Constructor creates string parameter from char *
+     * @param str char array
      */
-    operator std::string() const {                                                  // NOLINT
-        return asString();
-    }
+    Parameter(const char *str): Parameter(std::string(str)) {}      // NOLINT
 
     /**
-     * @brief Returns a string value for the given parameter or returns the default one
-     * @param def Default value of the parameter if not found
-     * @return A string value
+     * @brief Destructor
      */
-    std::string asString(std::string def) const {
-        if (!initialized) {
-            return def;
-        }
-        return value;
+    virtual ~Parameter() {
+        clear();
     }
 
     /**
-     * @brief Returns a string value for the given parameter.
-     * Throws exception if parameter was not found.
-     * @return A string value
+     * Copy operator for Parameter
+     * @param parameter Parameter object
+     * @return Parameter
      */
-    std::string asString() const {
-        if (!initialized) {
-            THROW_IE_EXCEPTION << "Parameter was not initialized!";
+    Parameter& operator=(const Parameter& parameter) {
+        if (this == &parameter) {
+            return *this;
         }
-        return value;
+        clear();
+        if (!parameter.empty())
+            ptr = parameter.ptr->copy();
+        return *this;
     }
 
     /**
-     * @brief Gets float value for the given parameter
-     * @param def - default value of the parameter if not found
-     * @return float value
+     * Remove a value from parameter
      */
-    float asFloat(float def) const {
-        std::string val = asString(std::to_string(def));
-        try {
-            return std::stof(val);
-        } catch (...) {
-            THROW_IE_EXCEPTION << "Value " << val << " cannot be casted to float.";
-        }
+    void clear() {
+        delete ptr;
+        ptr = nullptr;
     }
 
     /**
-     * @brief Returns a float value for the given layer parameter
-     * @return A float value for the specified parameter
+     * Checks that parameter contains a value
+     * @return false if parameter contains a value else false
      */
-    float asFloat() const {
-        std::string val = asString();
-        try {
-            return std::stof(val);
-        } catch (...) {
-            THROW_IE_EXCEPTION << "Value " << val << " cannot be casted to float.";
-        }
+    bool empty() const noexcept {
+        return nullptr == ptr;
     }
 
     /**
-     * @brief Returns a vector of float values for the given parameter or returns the default value
-     * @param def Default value of the parameter if not found
-     * @return vector of float values
+     * Checks the type of value
+     * @tparam T Type of value
+     * @return true if type of value is correct
      */
-    std::vector<float> asFloats(std::vector<float> def) const {
-        std::string vals = asString("");
-        std::vector<float> result;
-        std::istringstream stream(vals);
-        std::string str;
-        if (vals.empty())
-            return def;
-        while (getline(stream, str, ',')) {
-            try {
-                result.push_back(std::stof(str));
-            } catch (...) {
-                THROW_IE_EXCEPTION << "Value " << vals << " cannot be casted to floats.";
-            }
-        }
-        return result;
+    template<class T>
+    bool is() const {
+        return empty() ? false : ptr->is(typeid(T));
     }
 
     /**
-     * @brief Returns a vector of float values for the given parameter
-     * @return vector of float values
+     * Dynamic cast to specified type
+     * @tparam T type
+     * @return casted object
      */
-    std::vector<float> asFloats() const {
-        std::string vals = asString();
-        std::vector<float> result;
-        std::istringstream stream(vals);
-        std::string str;
-        while (getline(stream, str, ',')) {
-            try {
-                result.push_back(std::stof(str));
-            } catch (...) {
-                THROW_IE_EXCEPTION << "Value " << vals << " cannot be casted to floats.";
-            }
-        }
-        return result;
+    template<typename T>
+    T &&as() && {
+        return std::move(dyn_cast<T>(ptr));
     }
 
     /**
-     * @brief Returns an integer value for the given parameter or returns the default value
-     * @param def Default value of the parameter if not found
-     * @return An int value for the specified parameter
+     * Dynamic cast to specified type
+     * @tparam T type
+     * @return casted object
      */
-    int asInt(int def) const {
-        std::string val = asString(std::to_string(def));
-        try {
-            return std::stoi(val);
-        } catch (...) {
-            THROW_IE_EXCEPTION << "Value " << val << " cannot be casted to int.";
-        }
+    template<class T>
+    T& as() & {
+        return dyn_cast<T>(ptr);
     }
-
     /**
-     * @brief Returns an integer value for the given parameter
-     * @return An int value for the specified parameter
+     * Dynamic cast to specified type
+     * @tparam T type
+     * @return casted object
      */
-    int asInt() const {
-        std::string val = asString();
-        try {
-            return std::stoi(val);
-        } catch (...) {
-            THROW_IE_EXCEPTION << "Value " << val << " cannot be casted to int.";
-        }
+    template<class T>
+    const T& as() const & {
+        return dyn_cast<T>(ptr);
     }
 
-
     /**
-     * @brief Returns a vector of int values for the given parameter or returns the default value
-     * @param def Default value of the parameter if not found
-     * @return vector of int values
+     * Dynamic cast to specified type
+     * @tparam T type
+     * @return casted object
      */
-    std::vector<int> asInts(std::vector<int> def) const {
-        std::string vals = asString("");
-        std::vector<int> result;
-        std::istringstream stream(vals);
-        std::string str;
-        if (vals.empty())
-            return def;
-        while (getline(stream, str, ',')) {
-            try {
-                result.push_back(std::stoi(str));
-            } catch (...) {
-                THROW_IE_EXCEPTION << "Value " << vals << " cannot be casted to ints.";
-            }
-        }
-        return result;
+    template<class T>
+    operator T&&() && {
+        return std::move(dyn_cast<typename std::remove_cv<T>::type>(ptr));
     }
 
     /**
-     * @brief Returns a vector of int values for the given parameter
-     * @return vector of int values
+     * Dynamic cast to specified type
+     * @tparam T type
+     * @return casted object
      */
-    std::vector<int> asInts() const {
-        std::string vals = asString();
-        std::vector<int> result;
-        std::istringstream stream(vals);
-        std::string str;
-        while (getline(stream, str, ',')) {
-            try {
-                result.push_back(std::stoi(str));
-            } catch (...) {
-                THROW_IE_EXCEPTION << "Value " << vals <<  " cannot be casted to ints.";
-            }
-        }
-        return result;
+    template<class T>
+    operator T&() & {
+        return dyn_cast<typename std::remove_cv<T>::type>(ptr);
     }
+
     /**
-     * @brief Returns an unsigned integer value for the given parameter or returns the default value
-     * @param def Default value of the parameter if not found
-     * @return An unsigned integer value for the specified parameter
+     * Dynamic cast to specified type
+     * @tparam T type
+     * @return casted object
      */
-    unsigned int asUInt(unsigned int def) const {
-        std::string val = asString(std::to_string(def));
-        std::string message = "Value " + val + " cannot be casted to unsigned int.";
-        try {
-            int value = std::stoi(val);
-            if (value < 0) {
-                THROW_IE_EXCEPTION << message;
-            }
-            return static_cast<unsigned int>(value);
-        } catch (...) {
-            THROW_IE_EXCEPTION << message;
-        }
+    template<class T> operator const T&() const & {
+        return dyn_cast<typename std::remove_cv<T>::type>(ptr);
     }
 
     /**
-     * @brief Returns an unsigned integer value for the given parameter
-     * @return An unsigned integer value for the specified parameter
+     * Dynamic cast to specified type
+     * @tparam T type
+     * @return casted object
      */
-    unsigned int asUInt() const {
-        std::string val = asString();
-        std::string message = "Value " + val + " cannot be casted to unsigned int.";
-        try {
-            int value = std::stoi(val);
-            if (value < 0) {
-                THROW_IE_EXCEPTION << message;
-            }
-            return static_cast<unsigned int>(value);
-        } catch (...) {
-            THROW_IE_EXCEPTION << message;
-        }
+    template<class T> operator T&() const & {
+        return dyn_cast<typename std::remove_cv<T>::type>(ptr);
     }
 
-
     /**
-     * @brief Returns a vector of unsigned int values for the given parameter or returns the default value
-     * @param def Default value of the parameter if not found
-     * @return vector of unsigned int values
+     * @brief The comparison operator for the Parameter
+     * @param rhs object to compare
+     * @return true if objects are equal
      */
-    std::vector<unsigned int> asUInts(std::vector<unsigned int> def) const {
-        std::string vals = asString("");
-        std::vector<unsigned int> result;
-        std::istringstream stream(vals);
-        std::string str;
-        std::string message = "Value " + vals +  " cannot be casted to unsigned ints.";
-        if (vals.empty())
-            return def;
-        while (getline(stream, str, ',')) {
-            try {
-                int value = std::stoi(str);
-                if (value < 0) {
-                    THROW_IE_EXCEPTION << message;
-                }
-                result.push_back(static_cast<unsigned int>(value));
-            } catch (...) {
-                THROW_IE_EXCEPTION << message;
-            }
-        }
-        return result;
+    bool operator == (const Parameter& rhs) const {
+        return *ptr == *(rhs.ptr);
     }
-
     /**
-     * @brief Returns a vector of unsigned int values for the given parameter
-     * @return vector of unsigned int values
+     * @brief The comparison operator for the Parameter
+     * @param rhs object to compare
+     * @return true if objects aren't equal
      */
-    std::vector<unsigned int> asUInts() const {
-        std::string vals = asString();
-        std::vector<unsigned int> result;
-        std::istringstream stream(vals);
-        std::string str;
-        std::string message = "Value " + vals +  " cannot be casted to unsigned ints.";
-        while (getline(stream, str, ',')) {
-            try {
-                int value = std::stoi(str);
-                if (value < 0) {
-                    THROW_IE_EXCEPTION << message;
-                }
-                result.push_back(static_cast<unsigned int>(value));
-            } catch (...) {
-                THROW_IE_EXCEPTION << message;
-            }
-        }
-        return result;
+    bool operator != (const Parameter& rhs) const {
+        return !(*this == rhs);
     }
 
-    /**
-     * @brief Returns an boolean value for the given parameter.
-     * The valid values are (true, false, 1, 0).
-     * @param def Default value of the parameter if not found
-     * @return An bool value for the specified parameter
-     */
-    bool asBool(bool def) const {
-        std::string val = asString(std::to_string(def));
-        std::string loweredCaseValue;
-        std::transform(val.begin(), val.end(), std::back_inserter(loweredCaseValue), [](char value) {
-            return std::tolower(value);
-        });
-
-        bool result = false;
-
-        if (!(std::istringstream(loweredCaseValue) >> std::boolalpha >> result)) {
-            // attempting parse using non alpha bool
-            return static_cast<bool>(asInt(def));
+private:
+    template<class T, class EqualTo>
+    struct CheckOperatorEqual {
+        template<class U, class V>
+        static auto test(U*) -> decltype(std::declval<U>() == std::declval<V>()) {
+            return false;
         }
 
-        return result;
-    }
+        template<typename, typename>
+        static auto test(...) -> std::false_type {
+            return {};
+        }
 
-    /**
-     * @brief Returns an boolean value for the given parameter.
-     * The valid values are (true, false, 1, 0).
-     * @return An bool value for the specified parameter
-     */
-    bool asBool() const {
-        std::string val = asString();
-        std::string loweredCaseValue;
-        std::transform(val.begin(), val.end(), std::back_inserter(loweredCaseValue), [](char value) {
-            return std::tolower(value);
-        });
-
-        bool result = false;
-
-        if (!(std::istringstream(loweredCaseValue) >> std::boolalpha >> result)) {
-            // attempting parse using non alpha bool
-            return static_cast<bool>(asInt());
+        using type = typename std::is_same<bool, decltype(test<T, EqualTo>(nullptr))>::type;
+    };
+
+    template<class T, class EqualTo = T>
+    struct HasOperatorEqual : CheckOperatorEqual<T, EqualTo>::type {};
+
+    struct Any {
+        virtual ~Any() = default;
+        virtual bool is(const std::type_info&) const = 0;
+        virtual Any *copy() const = 0;
+        virtual bool operator==(const Any& rhs) const = 0;
+    };
+
+    template<class T>
+    struct RealData: Any, std::tuple<T> {
+        using std::tuple<T>::tuple;
+
+        bool is(const std::type_info& id) const override {
+            return id == typeid(T);
+        }
+        Any *copy() const override {
+            return new RealData{get()};
+        }
+
+        T& get() & {
+            return std::get<0>(*this);
         }
 
-        return result;
+        const T& get() const & {
+            return std::get<0>(*this);
+        }
+
+        template <class U>
+        typename std::enable_if<!HasOperatorEqual<U>::value, bool>::type
+        equal(const Any& left, const Any& rhs) const {
+            THROW_IE_EXCEPTION << "Parameter doesn't contain equal operator";
+        }
+
+        template <class U>
+        typename std::enable_if<HasOperatorEqual<U>::value, bool>::type
+        equal(const Any& left, const Any& rhs) const {
+            return dyn_cast<U>(&left) == dyn_cast<U>(&rhs);
+        }
+
+        bool operator==(const Any& rhs) const override {
+            return rhs.is(typeid(T)) && equal<T>(*this, rhs);
+        }
+    };
+
+    template<typename T>
+    static T &dyn_cast(Any* obj) {
+        if (obj == nullptr)
+            THROW_IE_EXCEPTION << "Parameter is empty!";
+        return dynamic_cast<RealData<T>&>(*obj).get();
     }
 
-private:
-    bool initialized;
-    std::string value;
+    template<typename T>
+    static const T &dyn_cast(const Any* obj) {
+        if (obj == nullptr)
+            THROW_IE_EXCEPTION << "Parameter is empty!";
+        return dynamic_cast<const RealData<T> &>(*obj).get();
+    }
+
+    Any *ptr = nullptr;
 };
 
 }  // namespace InferenceEngine
diff --git a/inference-engine/include/ie_plugin.hpp b/inference-engine/include/ie_plugin.hpp
index 5623dd6f0..2712f1fba 100644
--- a/inference-engine/include/ie_plugin.hpp
+++ b/inference-engine/include/ie_plugin.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_plugin_config.hpp b/inference-engine/include/ie_plugin_config.hpp
index 0e3397d95..028b40491 100644
--- a/inference-engine/include/ie_plugin_config.hpp
+++ b/inference-engine/include/ie_plugin_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_plugin_dispatcher.hpp b/inference-engine/include/ie_plugin_dispatcher.hpp
index 60d729dbf..b041d0777 100644
--- a/inference-engine/include/ie_plugin_dispatcher.hpp
+++ b/inference-engine/include/ie_plugin_dispatcher.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -23,7 +23,7 @@ public:
      * @brief A constructor
      * @param pp Vector of paths to plugin directories
      */
-    explicit PluginDispatcher(const std::vector<file_name_t> &pp) : pluginDirs(pp) {}
+    explicit PluginDispatcher(const std::vector<file_name_t> &pp = {file_name_t()}) : pluginDirs(pp) {}
 
     /**
     * @brief Loads a plugin from plugin directories
diff --git a/inference-engine/include/ie_plugin_ptr.hpp b/inference-engine/include/ie_plugin_ptr.hpp
index 6c10cf597..84f2a20ae 100644
--- a/inference-engine/include/ie_plugin_ptr.hpp
+++ b/inference-engine/include/ie_plugin_ptr.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_precision.hpp b/inference-engine/include/ie_precision.hpp
index d50fe5cd6..8726ae62c 100644
--- a/inference-engine/include/ie_precision.hpp
+++ b/inference-engine/include/ie_precision.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -30,6 +30,7 @@ public:
         I8 = 50,    /**< 8bit signed integer value */
         U16 = 60,   /**< 16bit unsigned integer value */
         I32 = 70,   /**< 32bit signed integer value */
+        BIN = 71,   /**< 1bit integer value */
         CUSTOM = 80 /**< custom precision has it's own name and size of elements */
     };
 
@@ -79,11 +80,13 @@ public:
         return Precision(8 * sizeof(T), typeName == nullptr ? typeid(T).name() : typeName);
     }
 
-    /** @brief checks whether given storage class T can be used for store objects of current precision */
+    /** @brief checks whether given storage class T can be used to store objects of current precision */
     template <class T>
     bool hasStorageType(const char * typeName = nullptr) const noexcept {
-        if (sizeof(T) != size()) {
-            return false;
+        if (precisionInfo.value != BIN) {
+            if (sizeof(T) != size()) {
+                return false;
+            }
         }
 #define CASE(x, y) case x: return std::is_same<T, y>()
 #define CASE2(x, y1, y2) case x: return std::is_same<T, y1>() || std::is_same<T, y2>()
@@ -97,6 +100,7 @@ public:
             CASE(U8, uint8_t);
             CASE(I8, int8_t);
             CASE2(Q78, int16_t, uint16_t);
+            CASE2(BIN, int8_t, uint8_t);
             default : return areSameStrings(name(), typeName == nullptr ? typeid(T).name() : typeName);
 #undef CASE
 #undef CASE2
@@ -159,6 +163,7 @@ public:
             PRECISION_NAME(FP32),
             PRECISION_NAME(FP16),
             PRECISION_NAME(MIXED),
+            PRECISION_NAME(BIN),
 #undef      PRECISION_NAME
         };
         auto i = names.find(str);
@@ -210,6 +215,7 @@ public:
             CASE(I8);
             CASE(Q78);
             CASE(MIXED);
+            CASE(BIN);
             default : return makePrecisionInfo<UNSPECIFIED>("UNSPECIFIED");
 #undef CASE
         }
@@ -257,6 +263,10 @@ template<>
 struct PrecisionTrait<Precision::I32> {
     using value_type = int32_t;
 };
+template<>
+struct PrecisionTrait<Precision::BIN> {
+    using value_type = int8_t;
+};
 
 template<class T>
 inline uint8_t type_size_or_zero() {
@@ -295,7 +305,9 @@ template<Precision::ePrecision precision>
 inline Precision::PrecisionInfo Precision::makePrecisionInfo(const char *name) {
     Precision::PrecisionInfo info;
     info.name = name;
-    info.bitsSize = 8 * type_size_or_zero<typename PrecisionTrait<precision>::value_type>();
+
+    int nBits = precision == BIN ? 1 : 8;
+    info.bitsSize = nBits * type_size_or_zero<typename PrecisionTrait<precision>::value_type>();
     info.isFloat = is_floating<precision>();
     info.value = precision;
     return info;
diff --git a/inference-engine/include/ie_preprocess.hpp b/inference-engine/include/ie_preprocess.hpp
index 1b984ff26..0a969eebd 100644
--- a/inference-engine/include/ie_preprocess.hpp
+++ b/inference-engine/include/ie_preprocess.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_primitive_info.hpp b/inference-engine/include/ie_primitive_info.hpp
index d4e4fbc68..31afb2007 100644
--- a/inference-engine/include/ie_primitive_info.hpp
+++ b/inference-engine/include/ie_primitive_info.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_tensor_info.hpp b/inference-engine/include/ie_tensor_info.hpp
index 5f71dc9ca..ccbf3e852 100644
--- a/inference-engine/include/ie_tensor_info.hpp
+++ b/inference-engine/include/ie_tensor_info.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_unicode.hpp b/inference-engine/include/ie_unicode.hpp
index f8231fa86..41e26031a 100644
--- a/inference-engine/include/ie_unicode.hpp
+++ b/inference-engine/include/ie_unicode.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_utils.hpp b/inference-engine/include/ie_utils.hpp
index 2ba9f02ec..545af5748 100644
--- a/inference-engine/include/ie_utils.hpp
+++ b/inference-engine/include/ie_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/ie_version.hpp b/inference-engine/include/ie_version.hpp
index d7431150b..922893957 100644
--- a/inference-engine/include/ie_version.hpp
+++ b/inference-engine/include/ie_version.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/inference_engine.hpp b/inference-engine/include/inference_engine.hpp
index 352d94327..2df7fda60 100644
--- a/inference-engine/include/inference_engine.hpp
+++ b/inference-engine/include/inference_engine.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/include/vpu/vpu_plugin_config.hpp b/inference-engine/include/vpu/vpu_plugin_config.hpp
new file mode 100644
index 000000000..c6cd1e990
--- /dev/null
+++ b/inference-engine/include/vpu/vpu_plugin_config.hpp
@@ -0,0 +1,213 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief A header that defines advanced related properties for VPU plugins.
+ * These properties should be used in SetConfig() and LoadNetwork() methods of plugins
+ *
+ * @file vpu_plugin_config.hpp
+ */
+
+#pragma once
+
+#include <string>
+#include "ie_plugin_config.hpp"
+
+#define VPU_CONFIG_KEY(name) InferenceEngine::VPUConfigParams::_CONFIG_KEY(VPU_##name)
+#define VPU_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_##name
+
+#define DECLARE_VPU_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_##name)
+#define DECLARE_VPU_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_##name)
+
+#define VPU_HDDL_CONFIG_KEY(name) InferenceEngine::VPUConfigParams::_CONFIG_KEY(VPU_HDDL_##name)
+#define VPU_HDDL_CONFIG_VALUE(name) InferenceEngine::VPUConfigParams::VPU_HDDL_##name
+
+#define DECLARE_VPU_HDDL_CONFIG_KEY(name) DECLARE_CONFIG_KEY(VPU_HDDL_##name)
+#define DECLARE_VPU_HDDL_CONFIG_VALUE(name) DECLARE_CONFIG_VALUE(VPU_HDDL_##name)
+
+namespace InferenceEngine {
+namespace VPUConfigParams {
+
+/**
+ * @brief Turn on HW stages usage (applicable for MyriadX devices only).
+ * This option should be used with values: CONFIG_VALUE(YES) or CONFIG_VALUE(NO) (default)
+ */
+DECLARE_VPU_CONFIG_KEY(HW_STAGES_OPTIMIZATION);
+
+/**
+ * @brief The key to specify desirable log level for devices.
+ * This option should be used with values: CONFIG_VALUE(LOG_NONE) (default),
+ * CONFIG_VALUE(LOG_WARNING), CONFIG_VALUE(LOG_INFO), CONFIG_VALUE(LOG_DEBUG)
+ */
+DECLARE_VPU_CONFIG_KEY(LOG_LEVEL);
+
+/**
+ * @deprecated
+ * @brief The key to define normalization coefficient for the network input.
+ * This option should used with be a real number. Example "255.f"
+ */
+DECLARE_VPU_CONFIG_KEY(INPUT_NORM);
+
+/**
+ * @deprecated
+ * @brief The flag to specify Bias value that is added to each element of the network input.
+ * This option should used with be a real number. Example "0.1f"
+ */
+DECLARE_VPU_CONFIG_KEY(INPUT_BIAS);
+
+/**
+ * @brief The flag for adding to the profiling information the time of obtaining a tensor.
+ * This option should be used with values: CONFIG_VALUE(YES) or CONFIG_VALUE(NO) (default)
+ */
+DECLARE_VPU_CONFIG_KEY(PRINT_RECEIVE_TENSOR_TIME);
+
+/**
+ * @brief The flag to reset stalled devices: CONFIG_VALUE(YES) or CONFIG_VALUE(NO) (default)
+ * This is a plugin scope option and must be used with the plugin's SetConfig method
+ */
+DECLARE_VPU_CONFIG_KEY(FORCE_RESET);
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: Arbitrary non-empty string. If empty (""), equals no set, default: "";
+ * This option allows to specify the number of MYX devices used for inference a specific Executable network.
+ * Note: Only one network would be allocated to one device.
+ * The number of devices for the tag is specified in the hddl_service.config file.
+ * Example:
+ * "service_settings":
+ * {
+ *     "graph_tag_map":
+ *     {
+ *         "tagA":3
+ *     }
+ * }
+ * It means that an executable network marked with tagA will be executed on 3 devices
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(GRAPH_TAG);
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: Arbitrary non-empty string. If empty (""), equals no set, default: "";
+ * This config makes the executable networks to be allocated on one certain device (instead of multiple devices).
+ * And all inference through this executable network, will be done on this device.
+ * Note: Only one network would be allocated to one device.
+ * The number of devices which will be used for stream-affinity must be specified in hddl_service.config file.
+ * Example:
+ * "service_settings":
+ * {
+ *     "stream_device_number":5
+ * }
+ * It means that 5 device will be used for stream-affinity
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(STREAM_ID);
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: Arbitrary non-empty string. If empty (""), equals no set, default: "";
+ * This config allows user to control device flexibly. This config gives a "tag" for a certain device while
+ * allocating a network to it. Afterward, user can allocating/deallocating networks to this device with this "tag".
+ * Devices used for such use case is controlled by a so-called "Bypass Scheduler" in HDDL backend, and the number
+ * of such device need to be specified in hddl_service.config file.
+ * Example:
+ * "service_settings":
+ * {
+ *     "bypass_device_number": 5
+ * }
+ * It means that 5 device will be used for Bypass scheduler.
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(DEVICE_TAG);
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: "YES/NO", default is "NO".
+ * This config is a sub-config of DEVICE_TAG, and only available when "DEVICE_TAG" is set. After a user load a
+ * network, the user got a handle for the network.
+ * If "YES", the network allocated is bind to the device (with the specified "DEVICE_TAG"), which means all afterwards
+ * inference through this network handle will be executed on this device only.
+ * If "NO", the network allocated is not bind to the device (with the specified "DEVICE_TAG"). If the same network
+ * is allocated on multiple other devices (also set BIND_DEVICE to "False"), then inference through any handle of these
+ * networks may be executed on any of these devices those have the network loaded.
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(BIND_DEVICE);
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: A signed int wrapped in a string, default is "0".
+ * This config is a sub-config of DEVICE_TAG, and only available when "DEVICE_TAG" is set and "BIND_DEVICE" is "False".
+ * When there are multiple devices running a certain network (a same network running on multiple devices in Bypass Scheduler),
+ * the device with a larger number has a higher priority, and more inference tasks will be fed to it with priority.
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(RUNTIME_PRIORITY);
+
+
+/**
+ * @brief [Only for HDDLPlugin]
+ * Type: "YES/NO", default is "NO". **Note: ONLY available when "DEVICE_TAG" is set.
+ * This config should be used only when the network has been loaded already with the same network content, the same
+ * "DEVICE_TAG" as used this time and "BIND_DEVICE" of the loaded network had been set to "NO".
+ * This config is only used to update the "RUNTIME_PRIORITY" of previous loaded network, and the application should keep using
+ * the network handle that previous allocated to do inference.
+ * - If "Yes": the "RUNTIME_PRIORITY" must be specified with a integer, and it will be set as the new runtime priority for that network on that device.
+ * - If "No": load this network to deivce.
+ * **Note: If "BIND_DEVICE" of the previously loaded network was "Yes", the behavior of "update runtime priority" is undefined.
+ */
+DECLARE_VPU_HDDL_CONFIG_KEY(UPDATE_RUNTIME_PRIORITY);
+
+/**
+ * @brief This option allows to pass extra configuration for executable network.
+ * By default, it is empty string, which means - no configuration.
+ * String format:
+ * <key>=<value>,<key>=<value>,...
+ * Supported parameters and options:
+ *   * file : path to XML file with configuration
+ *   * data : options related to data objects (input, output, intermediate), next parameter describes the option
+ *     * scale : SCALE factor for data range (applicable for input and intermediate data)
+ */
+DECLARE_VPU_CONFIG_KEY(NETWORK_CONFIG);
+
+/**
+ * @brief This option allows to to specify input output layouts for network layers.
+ * By default, this value set to VPU_CONFIG_VALUE(AUTO) value.
+ * Supported values:
+ *   VPU_CONFIG_VALUE(AUTO) executable network configured to use optimal layer layout depending on available HW
+ *   VPU_CONFIG_VALUE(NCHW) executable network forced to use NCHW input/output layouts
+ *   VPU_CONFIG_VALUE(NHWC) executable network forced to use NHWC input/output layouts
+ */
+DECLARE_VPU_CONFIG_KEY(COMPUTE_LAYOUT);
+
+/**
+ * @brief This option allows to pass custom layers binding xml.
+ * If layer is present in such an xml, it would be used during inference even if the layer is natively supported
+ */
+DECLARE_VPU_CONFIG_KEY(CUSTOM_LAYERS);
+
+/**
+ * @brief Supported keys definition for VPU_CONFIG_KEY(COMPUTE_LAYOUT) option.
+ */
+DECLARE_VPU_CONFIG_VALUE(AUTO);
+DECLARE_VPU_CONFIG_VALUE(NCHW);
+DECLARE_VPU_CONFIG_VALUE(NHWC);
+
+/**
+ * @brief This option allows to specify device.
+ * If specified device is not available then creating infer request will throw an exception.
+ */
+DECLARE_VPU_CONFIG_KEY(PLATFORM);
+
+/**
+ * @brief Supported keys definition for VPU_CONFIG_KEY(PLATFORM) option.
+ */
+DECLARE_VPU_CONFIG_VALUE(2450);
+DECLARE_VPU_CONFIG_VALUE(2480);
+
+/**
+ * @brief Ignore statistic in IR by plugin.
+ * Plugin could use statistic present in IR in order to try to improve calculations precision.
+ * If you don't want statistic to be used enable this option.
+ * This option should be used with values: CONFIG_VALUE(YES) or CONFIG_VALUE(NO) (default)
+ */
+DECLARE_VPU_CONFIG_KEY(IGNORE_IR_STATISTIC);
+
+}  // namespace VPUConfigParams
+}  // namespace InferenceEngine
diff --git a/inference-engine/install_dependencies.sh b/inference-engine/install_dependencies.sh
index fdb70e266..12dfaca94 100755
--- a/inference-engine/install_dependencies.sh
+++ b/inference-engine/install_dependencies.sh
@@ -22,6 +22,7 @@ function yes_or_no {
 # install dependencies
 if [[ -f /etc/lsb-release ]]; then
     # Ubuntu
+    system_ver=`cat /etc/lsb-release | grep -i "DISTRIB_RELEASE" | cut -d "=" -f2`
     sudo -E apt update
     sudo -E apt-get install -y \
             build-essential \
@@ -40,7 +41,6 @@ if [[ -f /etc/lsb-release ]]; then
             automake \
             libtool \
             autoconf \
-            libpng12-dev \
             libcairo2-dev \
             libpango1.0-dev \
             libglib2.0-dev \
@@ -52,6 +52,11 @@ if [[ -f /etc/lsb-release ]]; then
             gstreamer1.0-plugins-base \
             libusb-1.0-0-dev \
             libopenblas-dev
+    if [ $system_ver = "18.04" ]; then
+	    sudo -E apt-get install -y libpng-dev
+    else
+	    sudo -E apt-get install -y libpng12-dev
+    fi
 else
     # CentOS 7.x
     sudo -E yum install -y centos-release-scl epel-release
diff --git a/inference-engine/samples/CMakeLists.txt b/inference-engine/samples/CMakeLists.txt
index 1f7bb9f8b..da00b43b5 100644
--- a/inference-engine/samples/CMakeLists.txt
+++ b/inference-engine/samples/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -50,16 +50,6 @@ else ()
     set (LIBRARY_OUTPUT_PATH ${LIBRARY_OUTPUT_DIRECTORY}/lib)
 endif()
 
-# use this flag if you need to throw custom message in case if the IE package is not found.
-if (IE_NOT_FOUND_MESSAGE)
-    find_package(InferenceEngine 1.5 QUIET)
-    if (NOT(InferenceEngine_FOUND))
-        message(FATAL_ERROR ${IE_NOT_FOUND_MESSAGE})
-    endif()
-else()
-    find_package(InferenceEngine 1.5 REQUIRED)
-endif()
-
 if (WIN32)
     if (NOT "${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
         message(FATAL_ERROR "Only 64-bit supported on Windows")
@@ -69,13 +59,16 @@ if (WIN32)
     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS -DNOMINMAX")
     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") #no asynchronous structured exception handling
     set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE")
+    if (${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267") #disable some warnings
+    endif()
 else()
-    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Werror=return-type ")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") #treating warnings as errors
     if (APPLE)
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=unused-command-line-argument")
     elseif(UNIX)
         set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized -Winit-self")
-        if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+        if(NOT ${CMAKE_CXX_COMPILER_ID} STREQUAL Clang)
             set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wmaybe-uninitialized")
         endif()
     endif()
@@ -86,54 +79,70 @@ endif()
 ## to use C++11
 set (CMAKE_CXX_STANDARD 11)
 set (CMAKE_CXX_STANDARD_REQUIRED ON)
-set (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
+if (${CMAKE_CXX_COMPILER_ID} STREQUAL GNU)
+    set (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
+endif()
 ####################################
 
 set (GFLAGS_IS_SUBPROJECT TRUE)
 set (HAVE_SYS_STAT_H 1)
 set (HAVE_INTTYPES_H 1)
 
-if (WIN32)
-    # add_compile_options("/WX")
-else()
-    add_compile_options("-Werror")
+add_subdirectory(thirdparty/gflags)
+
+if (${CMAKE_CXX_COMPILER_ID} STREQUAL GNU)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
 endif()
 
-# Properties->C/C++->General->Additional Include Directories
 include_directories (
-    ${CMAKE_CURRENT_SOURCE_DIR}/common/format_reader
-    ${InferenceEngine_INCLUDE_DIRS}
-    ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/gflags/include
-    ${CMAKE_CURRENT_SOURCE_DIR}/common
+        ${CMAKE_CURRENT_SOURCE_DIR}/common
+        ${CMAKE_CURRENT_SOURCE_DIR}/common/format_reader
 )
+add_subdirectory(common/format_reader)
 
-if (UNIX)
-    set (LIB_DL dl)
+# samples build can be switched off during whole IE build
+if (IE_MAIN_SOURCE_DIR AND NOT ENABLE_SAMPLES)
+    return()
 endif()
 
-add_subdirectory(thirdparty/gflags)
-add_subdirectory(common/format_reader)
-
-# collect all samples subdirectories
-file(GLOB subdirs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
-# skip building of unnecessary subdirs
-list(REMOVE_ITEM subdirs archived common thirdparty)
-
-foreach (dir ${subdirs})
-    if (IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${dir})
-        # check if a subdirectory contains CMakeLists.txt. In this case we can build it.
-        file(GLOB is_sample_dir "${CMAKE_CURRENT_SOURCE_DIR}/${dir}/CMakeLists.txt")
-        if(is_sample_dir)
-            # check if specified sample/demo is found.
-            if (BUILD_SAMPLE_NAME)
-                list(FIND BUILD_SAMPLE_NAME ${dir} index)
-            endif()
-            if (index EQUAL -1)
-                message(STATUS "${dir} SKIPPED")
-            else()
-                # Include subdirectory to the project.
-                add_subdirectory(${dir})
+function(add_samples_to_build)
+    # check each passed sample subdirectory
+    foreach (dir ${ARGN})
+        if (IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/${dir})
+            # check if a subdirectory contains CMakeLists.txt. In this case we can build it.
+            file(GLOB is_sample_dir "${CMAKE_CURRENT_SOURCE_DIR}/${dir}/CMakeLists.txt")
+            if(is_sample_dir)
+                # check if specified sample/demo is found.
+                if (BUILD_SAMPLE_NAME)
+                    list(FIND BUILD_SAMPLE_NAME ${dir} index)
+                endif()
+                if (index EQUAL -1)
+                    message(STATUS "${dir} SKIPPED")
+                else()
+                    # Include subdirectory to the project.
+                    add_subdirectory(${dir})
+                endif()
             endif()
         endif()
+    endforeach()
+endfunction(add_samples_to_build)
+
+# use this flag if you need to throw custom message in case if the IE package is not found.
+if (IE_NOT_FOUND_MESSAGE)
+    find_package(InferenceEngine 1.6 QUIET)
+    if (NOT(InferenceEngine_FOUND))
+        message(FATAL_ERROR ${IE_NOT_FOUND_MESSAGE})
     endif()
-endforeach()
+else()
+    find_package(InferenceEngine 1.6 REQUIRED)
+endif()
+
+if (UNIX)
+    set (LIB_DL dl)
+endif()
+
+# collect all samples subdirectories
+file(GLOB samples_dirs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *)
+# skip building of unnecessary subdirectories
+list(REMOVE_ITEM samples_dirs archived common thirdparty)
+add_samples_to_build(${samples_dirs})
diff --git a/inference-engine/samples/benchmark_app/CMakeLists.txt b/inference-engine/samples/benchmark_app/CMakeLists.txt
index 87db73046..c142ea607 100644
--- a/inference-engine/samples/benchmark_app/CMakeLists.txt
+++ b/inference-engine/samples/benchmark_app/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "benchmark_app")
 
 file (GLOB SRC
diff --git a/inference-engine/samples/benchmark_app/README.md b/inference-engine/samples/benchmark_app/README.md
index ab0bbd73c..23c17e4b8 100644
--- a/inference-engine/samples/benchmark_app/README.md
+++ b/inference-engine/samples/benchmark_app/README.md
@@ -1,34 +1,51 @@
-# Benchmark Application Demo
+# Benchmark Application C++ Demo
 
-This topic demonstrates how to use the Benchmark Application to estimate deep learning inference performance on supported devices. Performance can be measured for two inference modes: synchronous and asynchronous. 
+This topic demonstrates how to use the Benchmark Application to estimate deep learning inference performance on
+supported devices. Performance can be measured for two inference modes: synchronous and asynchronous.
 
-> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Application. For the Python* implementation, refer to [Benchmark Application (Python*)](./samples/python_samples/benchmark_app/README.md)
+> **NOTE:** This topic describes usage of C++ implementation of the Benchmark Application. For the Python* implementation, refer to [Benchmark Application (Python*)](./inference-engine/ie_bridges/python/sample/benchmark_app/README.md).
 
 
 ## How It Works
 
-> **NOTE:** To achieve benchmark results similar to the official published results, set CPU frequency to 2.9GHz and GPU frequency to 1GHz.
+> **NOTE:** To achieve benchmark results similar to the official published results, set CPU frequency to 2.9 GHz and GPU frequency to 1 GHz.
 
-Upon the start-up, the application reads command-line parameters and loads a network and images to the Inference Engine plugin. The number of infer requests and execution approach depend on a mode defined with the `-api` command-line parameter.
+Upon start-up, the application reads command-line parameters and loads a network and images to the Inference Engine
+plugin, which is chosen depending on a specified device. The number of infer requests and execution approach depend
+on the mode defined with the `-api` command-line parameter.
 
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
 
-### Synchronous API
-For synchronous mode, the primary metric is latency. The application creates one infer request and executes the `Infer` method. A number of executions is defined by one of the two values:
-* Number of iterations defined with the `-niter` command-line argument
-* Predefined duration if `-niter` is skipped. Predefined duration value depends on device.
+If you run the application in the synchronous mode, it creates one infer request and executes the `Infer` method.
+If you run the application in the asynchronous mode, it creates as many infer requests as specified in the `-nireq`
+command-line parameter and executes the `StartAsync` method for each of them.
 
-During the execution, the application collects two types of metrics:
-* Latency for each infer request executed with `Infer` method
-* Duration of all executions
+The `Wait` method is used to wait for a previous execution of an infer request to complete. A number of execution steps
+is defined by one of the two values:
+* Number of iterations specified with the `-niter` command-line argument
+* Predefined duration if `-niter` is not specified. Predefined duration value depends on device.
 
-Reported latency value is calculated as mean value of all collected latencies. Reported throughput value is a derivative from reported latency and additionally depends on batch size.
+During the execution, the application collects latency for each executed infer request.
 
-### Asynchronous API
-For asynchronous mode, the primary metric is throughput in frames per second (FPS). The application creates a certain number of infer requests and executes the `StartAsync` method. A number of infer is specified with the `-nireq` command-line parameter. A number of executions is defined by one of the two values:
-* Number of iterations defined with the `-niter` command-line argument
-* Predefined duration if `-niter` is skipped. Predefined duration value depends on device.
+Reported latency value is calculated as a median value of all collected latencies. Reported throughput value is reported
+in frames per second (FPS) and calculated as a derivative from:
+* Reported latency in the Sync mode
+* The total execution time in the Async mode
+
+Throughput value also depends on batch size.
+
+The application also collects per-layer Performance Measurement (PM) counters for each executed infer request if you
+enable statistics dumping by setting the `-report_type` parameter to one of the possible values:
+* `no_counters` report includes configuration options specified, resulting FPS and latency.
+* `median_counters` report extends the `no_counters` report and additionally includes median PM counters values for each layer from the network.
+* `detailed_counters` report extends the `median_counters` report and additionally includes per-layer PM counters and latency for each executed infer request.
+
+Depending on the type, the report is stored to `benchmark_no_counters_report.csv`, `benchmark_median_counters_report.csv`,
+or `benchmark_detailed_counters_report.csv` file located in the path specified in `-report_folder`.
+
+The application also saves executable graph information serialized to a XML file if you specify a path to it with the
+`-exec_graph_path` parameter.
 
-The infer requests are executed asynchronously. `Wait` method is used to wait for previous execution to complete. The application measures all infer requests executions and reports the throughput metric based on batch size and total execution duration.
 
 ## Running
 
@@ -43,30 +60,39 @@ InferenceEngine:
 benchmark_app [OPTION]
 Options:
 
-    -h                      Print a usage message
-    -i "<path>"             Required. Path to a folder with images or to image files.
-    -m "<path>"             Required. Path to an .xml file with a trained model.
-    -pp "<path>"            Path to a plugin folder.
-    -api "<sync/async>"     Required. Enable using sync/async API.
-    -d "<device>"           Specify a target device to infer on: CPU, GPU, FPGA or MYRIAD. Use "-d HETERO:<comma separated devices list>" format to specify HETERO plugin. The application looks for a suitable plugin for the specified device.
-    -niter "<integer>"      Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
-    -nireq "<integer>"      Optional. Number of infer requests (default value is 2).
-    -l "<absolute_path>"    Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
+    -h                        Print a usage message
+    -i "<path>"               Required. Path to a folder with images or to image files.
+    -m "<path>"               Required. Path to an .xml file with a trained model.
+    -pp "<path>"              Optional. Path to a plugin folder.
+    -d "<device>"             Optional. Specify a target device to infer on: CPU, GPU, FPGA, HDDL or MYRIAD. Default value is CPU. Use "-d HETERO:<comma-separated_devices_list>" format to specify HETERO plugin. The application looks for a suitable plugin for the specified device.
+    -l "<absolute_path>"      Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
           Or
-    -c "<absolute_path>"    Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
-    -b "<integer>"          Optional. Batch size value. If not specified, the batch size value is determined from IR.
+    -c "<absolute_path>"      Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.
+    -api "<sync/async>"       Optional. Enable Sync/Async API. Default value is "async".
+    -niter "<integer>"        Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device.
+    -nireq "<integer>"        Optional. Number of infer requests. Default value is 2.
+    -b "<integer>"            Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation.
+    -stream_output            Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a multiline output.
+
+  CPU-specific performance options:
+    -nthreads "<integer>"     Optional. Number of threads to use for inference on the CPU (including HETERO cases).
+    -pin "YES"/"NO"           Optional. Enable ("YES" is default value) or disable ("NO") CPU threads pinning for CPU-involved inference.
+
+  Statistics dumping options:
+    -report_type "<type>"     Optional. Enable collecting statistics report. "no_counters" report contains configuration options specified, resulting FPS and latency. "median_counters" report extends "no_counters" report and additionally includes median PM counters values for each layer from the network. "detailed_counters" report extends "median_counters" report and additionally includes per-layer PM counters and latency for each executed infer request.
+    -report_folder            Optional. Path to a folder where statistics report is stored.
+    -exec_graph_path          Optional. Path to a file where to store executable graph information serialized.
 ```
 
 Running the application with the empty list of options yields the usage message given above and an error message.
 
-You can run the application for one input layer four-dimensional models that support images as input, for example, public 
-AlexNet and GoogLeNet models that can be downloaded 
-with the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader).
+You can run the application for one input layer four-dimensional models that support images as input, for example, public
+AlexNet and GoogLeNet models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
 
-> **NOTE**: To run the application, the model should be first converted to the Inference Engine format (\*.xml + \*.bin) 
-using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+> **NOTE**: Before running the demo with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
 
-For example, to perform inference on CPU in the synchronous mode and get estimated performance metrics for AlexNet model, run the following command:
+For example, to perform inference on CPU in the synchronous mode and get estimated performance metrics for AlexNet model,
+run the following command:
 
 ```sh
 ./benchmark_app -i <path_to_image>/inputImage.bmp -m <path_to_model>/alexnet_fp32.xml -d CPU -api sync
@@ -80,21 +106,25 @@ For the asynchronous mode:
 
 ## Demo Output
 
-Application output depends on a used API. For synchronous API, the application outputs latency and throughput:
-```
-[ INFO ] Start inference synchronously (60000 ms duration)
+The application outputs latency and throughput. Additionally, if you set the `-report_type` parameter, the application
+outputs statistics report. If you set `-exec_graph_path`, the application reports executable graph information serialized.
+Progress bar shows the progress of each execution step:
 
-[ INFO ] Latency: 37.91 ms
-[ INFO ] Throughput: 52.7566 FPS
 ```
+[Step 7/8] Start inference asynchronously (100 async inference executions, 4 inference requests in parallel)
+Progress: [....................] 100.00% done
 
-For asynchronous API, the application outputs only throughput:
-```
-[ INFO ] Start inference asynchronously (60000 ms duration, 2 inference requests in parallel)
+[Step 8/8] Dump statistics report
+[ INFO ] statistics report is stored to benchmark_detailed_counters_report.csv
+Progress: [....................] 100.00% done
 
-[ INFO ] Throughput: 48.2031 FPS
+Latency: 73.33 ms
+Throughput: 53.28 FPS
 ```
 
+All measurements including per-layer PM counters are reported in milliseconds.
+
+
 ## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
 * [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
diff --git a/inference-engine/samples/benchmark_app/benchmark_app.h b/inference-engine/samples/benchmark_app/benchmark_app.h
deleted file mode 100644
index 6ae2ffa6a..000000000
--- a/inference-engine/samples/benchmark_app/benchmark_app.h
+++ /dev/null
@@ -1,133 +0,0 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <string>
-#include <vector>
-#include <gflags/gflags.h>
-#include <iostream>
-
-#ifdef _WIN32
-#include <os/windows/w_dirent.h>
-#else
-#include <sys/stat.h>
-#include <dirent.h>
-#endif
-
-/// @brief message for help argument
-static const char help_message[] = "Print a usage message";
-
-/// @brief message for images argument
-static const char image_message[] = "Required. Path to a folder with images or to image files.";
-
-/// @brief message for images argument
-static const char multi_input_message[] = "Path to multi input file containing.";
-
-/// @brief message for model argument
-static const char model_message[] = "Required. Path to an .xml file with a trained model.";
-
-/// @brief message for plugin_path argument
-static const char plugin_path_message[] = "Path to a plugin folder.";
-
-/// @brief message for plugin argument
-static const char api_message[] = "Required. Enable using sync/async API.";
-
-/// @brief message for assigning cnn calculation to device
-static const char target_device_message[] = "Specify a target device to infer on: CPU, GPU, FPGA or MYRIAD. " \
-"Use \"-d HETERO:<comma separated devices list>\" format to specify HETERO plugin. " \
-"The application looks for a suitable plugin for the specified device.";
-
-/// @brief message for iterations count
-static const char iterations_count_message[] = "Optional. Number of iterations. " \
-"If not specified, the number of iterations is calculated depending on a device.";
-
-/// @brief message for requests count
-static const char infer_requests_count_message[] = "Optional. Number of infer requests (default value is 2).";
-
-/// @brief message for #threads for CPU inference
-static const char infer_num_threads_message[] = "Optional. Number of threads to use for inference on the CPU "
-                                                "(including Hetero cases).";
-
-/// @brief message for user library argument
-static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.";
-
-/// @brief message for clDNN custom kernels desc
-static const char custom_cldnn_message[] = "Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.";
-
-static const char batch_size_message[] = "Optional. Batch size value. If not specified, the batch size value is determined from IR";
-
-// @brief message for CPU threads pinning option
-static const char infer_threads_pinning_message[] = "Optional. Enable (\"YES\" is default value) or disable (\"NO\")" \
-                                                  "CPU threads pinning for CPU-involved inference.";
-
-/// @brief Define flag for showing help message <br>
-DEFINE_bool(h, false, help_message);
-
-/// @brief Define parameter for set image file <br>
-/// i or mif is a required parameter
-DEFINE_string(i, "", image_message);
-
-/// @brief Define parameter for set model file <br>
-/// It is a required parameter
-DEFINE_string(m, "", model_message);
-
-/// @brief Define parameter for set path to plugins <br>
-DEFINE_string(pp, "", plugin_path_message);
-
-/// @brief Enable per-layer performance report
-DEFINE_string(api, "async", api_message);
-
-/// @brief device the target device to infer on <br>
-DEFINE_string(d, "", target_device_message);
-
-/// @brief Absolute path to CPU library with user layers <br>
-/// It is a required parameter
-DEFINE_string(l, "", custom_cpu_library_message);
-
-/// @brief Define parameter for clDNN custom kernels path <br>
-/// Default is ./lib
-DEFINE_string(c, "", custom_cldnn_message);
-
-/// @brief Iterations count (default 0)
-/// Sync mode: iterations count
-/// Async mode: StartAsync counts
-DEFINE_int32(niter, 0, iterations_count_message);
-
-/// @brief Number of infer requests in parallel
-DEFINE_int32(nireq, 2, infer_requests_count_message);
-
-/// @brief Number of threads to use for inference on the CPU (also affects Hetero cases)
-DEFINE_int32(nthreads, 0, infer_num_threads_message);
-
-/// @brief Define parameter for batch size <br>
-/// Default is 0 (that means don't specify)
-DEFINE_int32(b, 0, batch_size_message);
-
-// @brief Enable plugin messages
-DEFINE_string(pin, "YES", infer_threads_pinning_message);
-/**
-* @brief This function show a help message
-*/
-static void showUsage() {
-    std::cout << std::endl;
-    std::cout << "universal_app [OPTION]" << std::endl;
-    std::cout << "Options:" << std::endl;
-    std::cout << std::endl;
-    std::cout << "    -h                        " << help_message << std::endl;
-    std::cout << "    -i \"<path>\"             " << image_message << std::endl;
-    std::cout << "    -m \"<path>\"             " << model_message << std::endl;
-    std::cout << "    -pp \"<path>\"            " << plugin_path_message << std::endl;
-    std::cout << "    -api \"<sync/async>\"     " << api_message << std::endl;
-    std::cout << "    -d \"<device>\"           " << target_device_message << std::endl;
-    std::cout << "    -niter \"<integer>\"      " << iterations_count_message << std::endl;
-    std::cout << "    -l \"<absolute_path>\"    " << custom_cpu_library_message << std::endl;
-    std::cout << "          Or" << std::endl;
-    std::cout << "    -c \"<absolute_path>\"    " << custom_cldnn_message << std::endl;
-    std::cout << "    -nireq \"<integer>\"      " << infer_requests_count_message << std::endl;
-    std::cout << "    -b \"<integer>\"          " << batch_size_message << std::endl;
-    std::cout << "    Some CPU-specific performance options" << std::endl;
-    std::cout << "    -nthreads \"<integer>\"   " << infer_num_threads_message << std::endl;
-    std::cout << "    -pin \"YES\"/\"NO\"       " << infer_threads_pinning_message << std::endl;
-}
diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp
new file mode 100644
index 000000000..8320fb766
--- /dev/null
+++ b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@@ -0,0 +1,169 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <gflags/gflags.h>
+#include <iostream>
+
+#ifdef _WIN32
+#include <os/windows/w_dirent.h>
+#else
+#include <sys/stat.h>
+#include <dirent.h>
+#endif
+
+/// @brief message for help argument
+static const char help_message[] = "Print a usage message";
+
+/// @brief message for images argument
+static const char image_message[] = "Required. Path to a folder with images or to image files.";
+
+/// @brief message for images argument
+static const char multi_input_message[] = "Path to multi input file containing.";
+
+/// @brief message for model argument
+static const char model_message[] = "Required. Path to an .xml file with a trained model.";
+
+/// @brief message for plugin_path argument
+static const char plugin_path_message[] = "Optional. Path to a plugin folder.";
+
+/// @brief message for execution mode
+static const char api_message[] = "Optional. Enable Sync/Async API. Default value is \"async\".";
+
+/// @brief message for assigning cnn calculation to device
+static const char target_device_message[] = "Optional. Specify a target device to infer on: CPU, GPU, FPGA, HDDL or MYRIAD. Default value is CPU. " \
+"Use \"-d HETERO:<comma-separated_devices_list>\" format to specify HETERO plugin. " \
+"The application looks for a suitable plugin for the specified device.";
+
+/// @brief message for iterations count
+static const char iterations_count_message[] = "Optional. Number of iterations. " \
+"If not specified, the number of iterations is calculated depending on a device.";
+
+/// @brief message for requests count
+static const char infer_requests_count_message[] = "Optional. Number of infer requests. Default value is 2.";
+
+/// @brief message for #threads for CPU inference
+static const char infer_num_threads_message[] = "Optional. Number of threads to use for inference on the CPU "
+                                                "(including HETERO cases).";
+
+/// @brief message for user library argument
+static const char custom_cpu_library_message[] = "Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.";
+
+/// @brief message for clDNN custom kernels desc
+static const char custom_cldnn_message[] = "Required for GPU custom kernels. Absolute path to an .xml file with the kernels description.";
+
+static const char batch_size_message[] = "Optional. Batch size value. If not specified, the batch size value is determined from Intermediate Representation.";
+
+// @brief message for CPU threads pinning option
+static const char infer_threads_pinning_message[] = "Optional. Enable (\"YES\" is default value) or disable (\"NO\") " \
+                                                    "CPU threads pinning for CPU-involved inference.";
+
+// @brief message for stream_output option
+static const char stream_output_message[] = "Optional. Print progress as a plain text. When specified, an interactive progress bar is replaced with a "
+                                            "multiline output.";
+
+// @brief message for report_type option
+static const char report_type_message[] = "Optional. Enable collecting statistics report. \"no_counters\" report contains "
+                                          "configuration options specified, resulting FPS and latency. \"median_counters\" "
+                                          "report extends \"no_counters\" report and additionally includes median PM "
+                                          "counters values for each layer from the network. \"detailed_counters\" report "
+                                          "extends \"median_counters\" report and additionally includes per-layer PM "
+                                          "counters and latency for each executed infer request.";
+
+// @brief message for report_folder option
+static const char report_folder_message[] = "Optional. Path to a folder where statistics report is stored.";
+
+// @brief message for exec_graph_path option
+static const char exec_graph_path_message[] = "Optional. Path to a file where to store executable graph information serialized.";
+
+/// @brief Define flag for showing help message <br>
+DEFINE_bool(h, false, help_message);
+
+/// @brief Define parameter for set image file <br>
+/// i or mif is a required parameter
+DEFINE_string(i, "", image_message);
+
+/// @brief Define parameter for set model file <br>
+/// It is a required parameter
+DEFINE_string(m, "", model_message);
+
+/// @brief Define parameter for set path to plugins <br>
+DEFINE_string(pp, "", plugin_path_message);
+
+/// @brief Define execution mode
+DEFINE_string(api, "async", api_message);
+
+/// @brief device the target device to infer on <br>
+DEFINE_string(d, "CPU", target_device_message);
+
+/// @brief Absolute path to CPU library with user layers <br>
+/// It is a required parameter
+DEFINE_string(l, "", custom_cpu_library_message);
+
+/// @brief Define parameter for clDNN custom kernels path <br>
+/// Default is ./lib
+DEFINE_string(c, "", custom_cldnn_message);
+
+/// @brief Iterations count (default 0)
+/// Sync mode: iterations count
+/// Async mode: StartAsync counts
+DEFINE_uint32(niter, 0, iterations_count_message);
+
+/// @brief Number of infer requests in parallel
+DEFINE_uint32(nireq, 2, infer_requests_count_message);
+
+/// @brief Number of threads to use for inference on the CPU (also affects Hetero cases)
+DEFINE_uint32(nthreads, 0, infer_num_threads_message);
+
+/// @brief Define parameter for batch size <br>
+/// Default is 0 (that means don't specify)
+DEFINE_uint32(b, 0, batch_size_message);
+
+// @brief Enable plugin messages
+DEFINE_string(pin, "YES", infer_threads_pinning_message);
+
+/// @brief Enables multiline text output instead of progress bar
+DEFINE_bool(stream_output, false, stream_output_message);
+
+/// @brief Enables statistics report collecting
+DEFINE_string(report_type, "", report_type_message);
+
+/// @brief Path to a folder where statistics report is stored
+DEFINE_string(report_folder, "", report_folder_message);
+
+/// @brief Path to a file where to store executable graph information serialized
+DEFINE_string(exec_graph_path, "", exec_graph_path_message);
+
+/**
+* @brief This function show a help message
+*/
+static void showUsage() {
+    std::cout << std::endl;
+    std::cout << "benchmark_app [OPTION]" << std::endl;
+    std::cout << "Options:" << std::endl;
+    std::cout << std::endl;
+    std::cout << "    -h                        " << help_message << std::endl;
+    std::cout << "    -i \"<path>\"               " << image_message << std::endl;
+    std::cout << "    -m \"<path>\"               " << model_message << std::endl;
+    std::cout << "    -pp \"<path>\"              " << plugin_path_message << std::endl;
+    std::cout << "    -d \"<device>\"             " << target_device_message << std::endl;
+    std::cout << "    -l \"<absolute_path>\"      " << custom_cpu_library_message << std::endl;
+    std::cout << "          Or" << std::endl;
+    std::cout << "    -c \"<absolute_path>\"      " << custom_cldnn_message << std::endl;
+    std::cout << "    -api \"<sync/async>\"       " << api_message << std::endl;
+    std::cout << "    -niter \"<integer>\"        " << iterations_count_message << std::endl;
+    std::cout << "    -nireq \"<integer>\"        " << infer_requests_count_message << std::endl;
+    std::cout << "    -b \"<integer>\"            " << batch_size_message << std::endl;
+    std::cout << "    -stream_output            " << stream_output_message << std::endl;
+    std::cout << std::endl << "  CPU-specific performance options:" << std::endl;
+    std::cout << "    -nthreads \"<integer>\"     " << infer_num_threads_message << std::endl;
+    std::cout << "    -pin \"YES\"/\"NO\"           " << infer_threads_pinning_message << std::endl;
+    std::cout << std::endl << "  Statistics dumping options:" << std::endl;
+    std::cout << "    -report_type \"<type>\"     " << report_type_message << std::endl;
+    std::cout << "    -report_folder            " << report_folder_message << std::endl;
+    std::cout << "    -exec_graph_path          " << exec_graph_path_message << std::endl;
+}
diff --git a/inference-engine/samples/benchmark_app/infer_request_wrap.hpp b/inference-engine/samples/benchmark_app/infer_request_wrap.hpp
new file mode 100644
index 000000000..741ee1961
--- /dev/null
+++ b/inference-engine/samples/benchmark_app/infer_request_wrap.hpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <map>
+#include <string>
+#include <chrono>
+
+#include "inference_engine.hpp"
+
+typedef std::chrono::high_resolution_clock Time;
+typedef std::chrono::nanoseconds ns;
+
+/// @brief Wrapper class for InferenceEngine::InferRequest. Handles asynchronous callbacks and calculates execution time.
+class InferReqWrap {
+public:
+    using Ptr = std::shared_ptr<InferReqWrap>;
+
+    explicit InferReqWrap(InferenceEngine::ExecutableNetwork& net) : _request(net.CreateInferRequest()) {
+        _request.SetCompletionCallback(
+                [&]() {
+                    _endTime = Time::now();
+                });
+    }
+
+    void startAsync() {
+        _startTime = Time::now();
+        _request.StartAsync();
+    }
+
+    void infer() {
+        _startTime = Time::now();
+        _request.Infer();
+        _endTime = Time::now();
+    }
+
+    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> getPerformanceCounts() {
+        return _request.GetPerformanceCounts();
+    }
+
+    void wait() {
+        InferenceEngine::StatusCode code = _request.Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
+        if (code != InferenceEngine::StatusCode::OK) {
+            throw std::logic_error("Wait");
+        }
+    }
+
+    InferenceEngine::Blob::Ptr getBlob(const std::string &name) {
+        return _request.GetBlob(name);
+    }
+
+    double getExecTime() const {
+        auto execTime = std::chrono::duration_cast<ns>(_endTime - _startTime);
+        return static_cast<double>(execTime.count()) * 0.000001;
+    }
+
+private:
+    InferenceEngine::InferRequest _request;
+    Time::time_point _startTime;
+    Time::time_point _endTime;
+};
+\ No newline at end of file
diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp
index 134287b68..3174582db 100644
--- a/inference-engine/samples/benchmark_app/main.cpp
+++ b/inference-engine/samples/benchmark_app/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -13,84 +13,104 @@
 #include <inference_engine.hpp>
 #include <format_reader_ptr.h>
 
+#include <vpu/vpu_plugin_config.hpp>
 #include <samples/common.hpp>
 #include <samples/slog.hpp>
 #include <samples/args_helper.hpp>
 
-#include "benchmark_app.h"
+#include "benchmark_app.hpp"
+#include "infer_request_wrap.hpp"
+#include "progress_bar.hpp"
+#include "statistics_report.hpp"
 
 using namespace InferenceEngine;
 
 long long getDurationInNanoseconds(const std::string& device);
 
-double getMedianValue(const std::vector<float>& sortedTimes);
-
 void fillBlobWithImage(
     Blob::Ptr& inputBlob,
     const std::vector<std::string>& filePaths,
-    const size_t batchSize,
+    const size_t& batchSize,
     const InferenceEngine::InputInfo& info);
 
-static const std::vector<std::pair<std::string, long long>> deviceDurationsInSeconds{
-    { "CPU", 60LL },
-    { "GPU", 60LL },
-    { "VPU", 60LL },
-    { "MYRIAD", 60LL },
-    { "FPGA", 120LL },
-    { "UNKNOWN", 120LL }
-};
+static const size_t progressBarDefaultTotalCount = 1000;
 
-/**
-* @brief The entry point the benchmark application
-*/
-int main(int argc, char *argv[]) {
-    try {
-        slog::info << "InferenceEngine: " << InferenceEngine::GetInferenceEngineVersion() << slog::endl;
+bool ParseAndCheckCommandLine(int argc, char *argv[]) {
+    // ---------------------------Parsing and validation of input args--------------------------------------
+    slog::info << "Parsing input parameters" << slog::endl;
+    gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
+    if (FLAGS_h) {
+        showUsage();
+        return false;
+    }
 
-        slog::info << "Parsing input parameters" << slog::endl;
-        gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true);
-        if (FLAGS_h) {
-            showUsage();
-            return 0;
-        }
+    if (FLAGS_m.empty()) {
+        throw std::logic_error("Model required is not set. Please use -h.");
+    }
 
-        if (FLAGS_m.empty()) {
-            throw std::logic_error("Model required is not set. Please use -h.");
-        }
+    if (FLAGS_api.empty()) {
+        throw std::logic_error("API not selected. Please use -h.");
+    }
 
-        if (FLAGS_api.empty()) {
-            throw std::logic_error("API not selected. Please use -h.");
-        }
+    if (FLAGS_api != "async" && FLAGS_api != "sync") {
+        throw std::logic_error("Incorrect API. Please use -h.");
+    }
 
-        if (FLAGS_api != "async" && FLAGS_api != "sync") {
-            throw std::logic_error("Incorrect API. Please use -h.");
-        }
+    if (FLAGS_i.empty()) {
+        throw std::logic_error("Input is not set. Please use -h.");
+    }
 
-        if (FLAGS_i.empty()) {
-            throw std::logic_error("Input is not set. Please use -h.");
-        }
+    if (FLAGS_niter < 0) {
+        throw std::logic_error("Number of iterations should be positive (invalid -niter option value)");
+    }
 
-        if (FLAGS_niter < 0) {
-            throw std::logic_error("Number of iterations should be positive (invalid -niter option value)");
-        }
+    if (FLAGS_nireq < 0) {
+        throw std::logic_error("Number of inference requests should be positive (invalid -nireq option value)");
+    }
 
-        if (FLAGS_nireq < 0) {
-            throw std::logic_error("Number of inference requests should be positive (invalid -nireq option value)");
-        }
+    if (FLAGS_b < 0) {
+        throw std::logic_error("Batch size should be positive (invalid -b option value)");
+    }
+
+    if (!FLAGS_report_type.empty() &&
+         FLAGS_report_type != noCntReport && FLAGS_report_type != medianCntReport && FLAGS_report_type != detailedCntReport) {
+        std::string err = "only " + std::string(noCntReport) + "/" + std::string(medianCntReport) + "/" + std::string(detailedCntReport) +
+                " report types are supported (invalid -report_type option value)";
+        throw std::logic_error(err);
+    }
+
+    return true;
+}
 
-        if (FLAGS_b < 0) {
-            throw std::logic_error("Batch size should be positive (invalid -b option value)");
+/**
+* @brief The entry point the benchmark application
+*/
+int main(int argc, char *argv[]) {
+    try {
+        slog::info << "InferenceEngine: " << InferenceEngine::GetInferenceEngineVersion() << slog::endl;
+
+        // ------------------------------ Parsing and validation of input args ---------------------------------
+        std::cout << std::endl << "[Step 1/8] Parsing and validation of input args" << std::endl;
+        ProgressBar progressBar(1, FLAGS_stream_output);
+
+        if (!ParseAndCheckCommandLine(argc, argv)) {
+            return 0;
         }
 
-        std::vector<std::string> inputs;
-        parseInputFilesArguments(inputs);
-        if (inputs.size() == 0ULL) {
+        /** This vector stores paths to the processed images **/
+        std::vector<std::string> inputImages;
+        parseInputFilesArguments(inputImages);
+        if (inputImages.size() == 0ULL) {
             throw std::logic_error("no images found");
         }
+        progressBar.addProgress(1);
+        progressBar.finish();
 
         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
 
-        slog::info << "Loading plugin" << slog::endl;
+        std::cout << "[Step 2/8] Loading plugin" << std::endl;
+        progressBar.newBar(1);
+
         InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
 
         if (!FLAGS_l.empty()) {
@@ -105,12 +125,21 @@ int main(int argc, char *argv[]) {
         }
 
         InferenceEngine::ResponseDesc resp;
+        if (FLAGS_d == "MYRIAD") {
+            plugin.SetConfig({ {CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_INFO)}, {VPU_CONFIG_KEY(LOG_LEVEL), CONFIG_VALUE(LOG_INFO)} });
+        }
 
         const Version *pluginVersion = plugin.GetVersion();
-        slog::info << pluginVersion << slog::endl << slog::endl;
+        slog::info << pluginVersion << slog::endl;
+
+        progressBar.addProgress(1);
+        progressBar.finish();
 
         // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
 
+        std::cout << "[Step 3/8] Read IR network" << std::endl;
+        progressBar.newBar(1);
+
         slog::info << "Loading network files" << slog::endl;
 
         InferenceEngine::CNNNetReader netBuilder;
@@ -125,10 +154,11 @@ int main(int argc, char *argv[]) {
         }
 
         if (inputInfo.size() != 1) {
-            throw std::logic_error("only one input layer network is supported");
+            throw std::logic_error("only networks with one input are supported");
         }
 
         // --------------------------- 3. Resize network to match image sizes and given batch----------------------
+
         if (FLAGS_b != 0) {
             // We support models having only one input layers
             ICNNNetwork::InputShapes shapes = cnnNetwork.getInputShapes();
@@ -146,15 +176,21 @@ int main(int argc, char *argv[]) {
         slog::info << (FLAGS_b != 0 ? "Network batch size was changed to: " : "Network batch size: ") << batchSize <<
             ", precision: " << precision << slog::endl;
 
+        progressBar.addProgress(1);
+        progressBar.finish();
+
         // --------------------------- 4. Configure input & output ---------------------------------------------
 
+        std::cout << "[Step 4/8] Configure input & output of the model" << std::endl;
+        progressBar.newBar(1);
+
         const InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::U8;
         for (auto& item : inputInfo) {
             /** Set the precision of input data provided by the user, should be called before load of the network to the plugin **/
             item.second->setInputPrecision(inputPrecision);
         }
 
-        const size_t imagesCount = inputs.size();
+        const size_t imagesCount = inputImages.size();
         if (batchSize > imagesCount) {
             slog::warn << "Network batch size " << batchSize << " is greater than images count " << imagesCount <<
                 ", some input files will be duplicated" << slog::endl;
@@ -182,9 +218,14 @@ int main(int argc, char *argv[]) {
             outputBlobs[item.first] = output;
         }
 
+        progressBar.addProgress(1);
+        progressBar.finish();
+
         // --------------------------- 5. Loading model to the plugin ------------------------------------------
 
-        slog::info << "Loading model to the plugin" << slog::endl;
+        std::cout << "[Step 5/8] Loading model to the plugin " << std::endl;
+        progressBar.newBar(1);
+
         std::map<std::string, std::string> networkConfig;
         if (FLAGS_d.find("CPU") != std::string::npos) {  // CPU supports few special performance-oriented keys
             // limit threading for CPU portion of inference
@@ -196,111 +237,154 @@ int main(int argc, char *argv[]) {
             if (FLAGS_api == "async" && FLAGS_d == "CPU")
                 networkConfig[PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS] = std::to_string(FLAGS_nireq);
         }
+
+        if (FLAGS_report_type == detailedCntReport || FLAGS_report_type == medianCntReport) {
+            networkConfig[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
+        }
+
         InferenceEngine::ExecutableNetwork exeNetwork = plugin.LoadNetwork(cnnNetwork, networkConfig);
 
-        // --------------------------- 6. Performance measurements stuff ------------------------------------------
+        progressBar.addProgress(1);
+        progressBar.finish();
 
-        typedef std::chrono::high_resolution_clock Time;
-        typedef std::chrono::nanoseconds ns;
+        // --------------------------- 6. Create infer requests and fill input blobs ---------------------------
+
+        std::cout << "[Step 6/8] Create infer requests and fill input blobs with images" << std::endl;
+        progressBar.newBar(1);
+
+        std::vector<InferReqWrap::Ptr> inferRequests;
+        auto numOfReq = (FLAGS_api == "async") ? FLAGS_nireq : 1;
+        inferRequests.reserve(numOfReq);
+
+        for (size_t i = 0; i < numOfReq; i++) {
+            inferRequests.push_back(std::make_shared<InferReqWrap>(exeNetwork));
+            slog::info << "Infer Request " << i << " created" << slog::endl;
+
+            for (const InputsDataMap::value_type& item : inputInfo) {
+                Blob::Ptr inputBlob = inferRequests[i]->getBlob(item.first);
+                fillBlobWithImage(inputBlob, inputImages, batchSize, *item.second);
+            }
+        }
+
+        progressBar.addProgress(1);
+        progressBar.finish();
+
+        // --------------------------- 7. Performance measurements stuff ------------------------------------------
 
-        std::vector<float> times;
         long long durationInNanoseconds;
         if (FLAGS_niter != 0) {
             durationInNanoseconds = 0LL;
-            times.reserve(FLAGS_niter);
         } else {
             durationInNanoseconds = getDurationInNanoseconds(FLAGS_d);
         }
 
-        if (FLAGS_api == "sync") {
-            InferRequest inferRequest = exeNetwork.CreateInferRequest();
-            slog::info << "Sync request created" << slog::endl;
+        std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> emptyStat = {};
+        StatisticsReport::Config config = {
+            FLAGS_d,
+            FLAGS_api,
+            batchSize,
+            FLAGS_nireq,
+            FLAGS_niter,
+            FLAGS_nthreads,
+            FLAGS_pin,
+            FLAGS_report_type,
+            FLAGS_report_folder
+        };
+        StatisticsReport statistics(config);
+        double fps;
+        double totalDuration;
+
+        size_t progressCnt = 0;
+        size_t progressBarTotalCount;
+        size_t iteration = 0;
 
-            for (const InputsDataMap::value_type& item : inputInfo) {
-                Blob::Ptr inputBlob = inferRequest.GetBlob(item.first);
-                fillBlobWithImage(inputBlob, inputs, batchSize, *item.second);
-            }
+        if (FLAGS_api == "sync") {
+            InferReqWrap::Ptr inferRequest = inferRequests[0];
 
+            std::cout << "[Step 7/8] ";
             if (FLAGS_niter != 0) {
-                slog::info << "Start inference synchronously (" << FLAGS_niter << " sync inference executions)" << slog::endl << slog::endl;
+                std::cout << "Start inference synchronously (" << FLAGS_niter << " sync inference executions)" << std::endl;
+                progressBarTotalCount = FLAGS_niter;
             } else {
-                slog::info << "Start inference synchronously (" << durationInNanoseconds * 0.000001 << " ms duration)" << slog::endl << slog::endl;
+                std::cout << "Start inference synchronously (" << durationInNanoseconds * 0.000001 << " ms duration)" << std::endl;
+                progressBarTotalCount = progressBarDefaultTotalCount;
             }
 
             // warming up - out of scope
-            inferRequest.Infer();
-            inferRequest.Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
+            inferRequest->infer();
 
             const auto startTime = Time::now();
-            auto currentTime = Time::now();
+            auto execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
 
-            size_t iteration = 0ULL;
-            while ((iteration < FLAGS_niter) || ((FLAGS_niter == 0LL) && ((currentTime - startTime).count() < durationInNanoseconds))) {
-                const auto iterationStartTime = Time::now();
-                inferRequest.Infer();
-                currentTime = Time::now();
-
-                const auto iterationDurationNs = std::chrono::duration_cast<ns>(currentTime - iterationStartTime);
-                times.push_back(static_cast<double>(iterationDurationNs.count()) * 0.000001);
+            /** Start inference & calculate performance **/
+            progressBar.newBar(progressBarTotalCount);
+            while ((iteration < FLAGS_niter) ||
+                   ((FLAGS_niter == 0) && (execTime < durationInNanoseconds))) {
+                inferRequest->infer();
+                statistics.add((FLAGS_report_type == detailedCntReport || FLAGS_report_type == medianCntReport) ?
+                               inferRequest->getPerformanceCounts() : emptyStat,
+                               inferRequest->getExecTime());
 
                 iteration++;
-            }
-
-            std::sort(times.begin(), times.end());
-            const double latency = getMedianValue(times);
-            slog::info << "Latency: " << latency << " ms" << slog::endl;
-
-            slog::info << "Throughput: " << batchSize * 1000.0 / latency << " FPS" << slog::endl;
-        } else if (FLAGS_api == "async") {
-            std::vector<InferRequest> inferRequests;
-            inferRequests.reserve(FLAGS_nireq);
 
-            for (size_t i = 0; i < FLAGS_nireq; i++) {
-                InferRequest inferRequest = exeNetwork.CreateInferRequest();
-                inferRequests.push_back(inferRequest);
-
-                for (const InputsDataMap::value_type& item : inputInfo) {
-                    Blob::Ptr inputBlob = inferRequest.GetBlob(item.first);
-                    fillBlobWithImage(inputBlob, inputs, batchSize, *item.second);
+                if (FLAGS_niter > 0) {
+                    progressBar.addProgress(1);
+                } else {
+                    execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
+                    // calculate how many progress intervals are covered by current iteration.
+                    // depends on the current iteration time and time of each progress interval.
+                    // Previously covered progress intervals must be skipped.
+                    auto progressIntervalTime = durationInNanoseconds / progressBarTotalCount;
+                    size_t newProgress = execTime / progressIntervalTime - progressCnt;
+                    progressBar.addProgress(newProgress);
+                    progressCnt += newProgress;
                 }
             }
-
+            fps = batchSize * 1000.0 / statistics.getMedianLatency();
+            totalDuration = std::chrono::duration_cast<ns>(Time::now() - startTime).count() * 0.000001;
+            progressBar.finish();
+        } else {
+            std::cout << "[Step 7/8] ";
             if (FLAGS_niter != 0) {
-                slog::info << "Start inference asynchronously (" << FLAGS_niter <<
+                std::cout << "Start inference asynchronously (" << FLAGS_niter <<
                     " async inference executions, " << FLAGS_nireq <<
-                    " inference requests in parallel)" << slog::endl << slog::endl;
+                    " inference requests in parallel)" << std::endl;
+                progressBarTotalCount = FLAGS_niter + FLAGS_nireq - 1;
             } else {
-                slog::info << "Start inference asynchronously (" << durationInNanoseconds * 0.000001 <<
+                std::cout << std::endl << "Start inference asynchronously (" << durationInNanoseconds * 0.000001 <<
                     " ms duration, " << FLAGS_nireq <<
-                    " inference requests in parallel)" << slog::endl << slog::endl;
+                    " inference requests in parallel)" << std::endl;
+                progressBarTotalCount = 1000;
             }
 
+
             size_t currentInference = 0ULL;
             bool requiredInferenceRequestsWereExecuted = false;
             long long previousInference = 1LL - FLAGS_nireq;
 
             // warming up - out of scope
-            inferRequests[0].StartAsync();
-            inferRequests[0].Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
-
-            const size_t stepsCount = FLAGS_niter + FLAGS_nireq - 1;
+            inferRequests[0]->startAsync();
+            inferRequests[0]->wait();
 
-            /** Start inference & calculate performance **/
             const auto startTime = Time::now();
+            auto execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
 
-            size_t step = 0ULL;
+            /** Start inference & calculate performance **/
+            /** to use FLAGS_niter + FLAGS_nireq - 1 to guarantee that last infer requests are executed in the same conditions **/
+            progressBar.newBar(progressBarTotalCount);
             while ((!requiredInferenceRequestsWereExecuted) ||
-                (step < stepsCount) ||
-                ((FLAGS_niter == 0LL) && ((Time::now() - startTime).count() < durationInNanoseconds))) {
+                (iteration < FLAGS_niter + FLAGS_nireq - 1) ||
+                ((FLAGS_niter == 0LL) && (execTime < durationInNanoseconds))) {
                 // start new inference
-                inferRequests[currentInference].StartAsync();
+                inferRequests[currentInference]->startAsync();
 
                 // wait the latest inference execution if exists
                 if (previousInference >= 0) {
-                    const StatusCode code = inferRequests[previousInference].Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
-                    if (code != StatusCode::OK) {
-                        throw std::logic_error("Wait");
-                    }
+                    inferRequests[previousInference]->wait();
+                    // update statistics with PM counters only in case of detailed or median reports
+                    statistics.add((FLAGS_report_type == detailedCntReport || FLAGS_report_type == medianCntReport) ?
+                                   inferRequests[previousInference]->getPerformanceCounts() : emptyStat,
+                                   inferRequests[previousInference]->getExecTime());
                 }
 
                 currentInference++;
@@ -314,16 +398,30 @@ int main(int argc, char *argv[]) {
                     previousInference = 0;
                 }
 
-                step++;
+                iteration++;
+
+                if (FLAGS_niter > 0) {
+                    progressBar.addProgress(1);
+                } else {
+                    execTime = std::chrono::duration_cast<ns>(Time::now() - startTime).count();
+                    // calculate how many progress intervals are covered by current iteration.
+                    // depends on the current iteration time and time of each progress interval.
+                    // Previously covered progress intervals must be skipped.
+                    auto progressIntervalTime = durationInNanoseconds / progressBarTotalCount;
+                    size_t newProgress = execTime / progressIntervalTime - progressCnt;
+                    progressBar.addProgress(newProgress);
+                    progressCnt += newProgress;
+                }
             }
 
             // wait the latest inference executions
             for (size_t notCompletedIndex = 0ULL; notCompletedIndex < (FLAGS_nireq - 1); ++notCompletedIndex) {
                 if (previousInference >= 0) {
-                    const StatusCode code = inferRequests[previousInference].Wait(InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
-                    if (code != StatusCode::OK) {
-                        throw std::logic_error("Wait");
-                    }
+                    inferRequests[previousInference]->wait();
+                    // update statistics with PM counters only in case of detailed or median reports
+                    statistics.add((FLAGS_report_type == detailedCntReport || FLAGS_report_type == medianCntReport) ?
+                                   inferRequests[previousInference]->getPerformanceCounts() : emptyStat,
+                                   inferRequests[previousInference]->getExecTime());
                 }
 
                 previousInference++;
@@ -331,13 +429,25 @@ int main(int argc, char *argv[]) {
                     previousInference = 0LL;
                 }
             }
+            totalDuration = std::chrono::duration_cast<ns>(Time::now() - startTime).count() * 0.000001;
+            fps = batchSize * 1000.0 * iteration / totalDuration;
+            progressBar.finish();
+        }
 
-            const double totalDuration = std::chrono::duration_cast<ns>(Time::now() - startTime).count() * 0.000001;
-            const double fps = batchSize * 1000.0 * step / totalDuration;
-            slog::info << "Throughput: " << fps << " FPS" << slog::endl;
-        } else {
-            throw std::logic_error("unknown api command line argument value");
+        std::cout << "[Step 8/8] Dump statistics report" << std::endl;
+        progressBar.newBar(1);
+        statistics.dump(fps, iteration, totalDuration);
+
+        if (!FLAGS_exec_graph_path.empty()) {
+            CNNNetwork execGraphInfo = exeNetwork.GetExecGraphInfo();
+            execGraphInfo.serialize(FLAGS_exec_graph_path);
+            slog::info << "executable graph is stored to " << FLAGS_exec_graph_path << slog::endl;
         }
+        progressBar.addProgress(1);
+        progressBar.finish();
+
+        std::cout << "Latency: " << statistics.getMedianLatency() << " ms" << std::endl;
+        std::cout << "Throughput: " << fps << " FPS" << std::endl;
     } catch (const std::exception& ex) {
         slog::err << ex.what() << slog::endl;
         return 3;
@@ -347,6 +457,16 @@ int main(int argc, char *argv[]) {
 }
 
 long long getDurationInNanoseconds(const std::string& device) {
+    static const std::vector<std::pair<std::string, long long>> deviceDurationsInSeconds{
+            { "CPU", 60LL },
+            { "GPU", 60LL },
+            { "VPU", 60LL },
+            { "MYRIAD", 60LL },
+            { "HDDL", 60LL },
+            { "FPGA", 120LL },
+            { "UNKNOWN", 120LL }
+    };
+
     auto duration = 0LL;
     for (const auto& deviceDurationInSeconds : deviceDurationsInSeconds) {
         if (device.find(deviceDurationInSeconds.first) != std::string::npos) {
@@ -370,22 +490,16 @@ long long getDurationInNanoseconds(const std::string& device) {
     return duration * 1000000000LL;
 }
 
-double getMedianValue(const std::vector<float>& sortedTimes) {
-    return (sortedTimes.size() % 2 != 0) ?
-        sortedTimes[sortedTimes.size() / 2ULL] :
-        (sortedTimes[sortedTimes.size() / 2ULL] + sortedTimes[sortedTimes.size() / 2ULL - 1ULL]) / 2.0;
-}
-
 void fillBlobWithImage(
     Blob::Ptr& inputBlob,
     const std::vector<std::string>& filePaths,
-    const size_t batchSize,
+    const size_t& batchSize,
     const InferenceEngine::InputInfo& info) {
 
-    uint8_t* inputBlobData = inputBlob->buffer().as<uint8_t*>();
+    auto inputBlobData = inputBlob->buffer().as<uint8_t*>();
     const SizeVector& inputBlobDims = inputBlob->dims();
 
-    slog::info << "Input dimensions (" << info.getTensorDesc().getLayout() << "): ";
+    slog::info << "Network Input dimensions (" << info.getTensorDesc().getLayout() << "): ";
     for (const auto& i : info.getTensorDesc().getDims()) {
         slog::info << i << " ";
     }
@@ -400,6 +514,7 @@ void fillBlobWithImage(
             inputIndex = 0ULL;
         }
 
+        slog::info << "Prepare image " << filePaths[inputIndex] << slog::endl;
         FormatReader::ReaderPtr reader(filePaths[inputIndex].c_str());
         if (reader.get() == nullptr) {
             slog::warn << "Image " << filePaths[inputIndex] << " cannot be read!" << slog::endl << slog::endl;
diff --git a/inference-engine/samples/benchmark_app/progress_bar.hpp b/inference-engine/samples/benchmark_app/progress_bar.hpp
new file mode 100644
index 000000000..bc7e48527
--- /dev/null
+++ b/inference-engine/samples/benchmark_app/progress_bar.hpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include <samples/console_progress.hpp>
+
+/// @brief Responsible for progress bar handling within the benchmark_app
+class ProgressBar {
+public:
+    ProgressBar(size_t totalNum, bool stream_output) {
+        _bar.reset(new ConsoleProgress(totalNum, stream_output));
+        _isFinished = true;
+    }
+
+    void addProgress(size_t num) {
+        _isFinished = false;
+        _bar->addProgress(num);
+    }
+
+    void finish() {
+        _isFinished = true;
+        _bar->finish();
+        std::cout << std::endl;
+    }
+
+    void newBar(size_t totalNum) {
+        if (_isFinished) {
+            _bar.reset(new ConsoleProgress(totalNum));
+        } else {
+            throw std::logic_error("Can't create new bar. Current progress bar is still in progress");
+        }
+    }
+
+private:
+    std::unique_ptr<ConsoleProgress> _bar;
+    bool _isFinished;
+};
+\ No newline at end of file
diff --git a/inference-engine/samples/benchmark_app/statistics_report.cpp b/inference-engine/samples/benchmark_app/statistics_report.cpp
new file mode 100644
index 000000000..3bb0df4e2
--- /dev/null
+++ b/inference-engine/samples/benchmark_app/statistics_report.cpp
@@ -0,0 +1,222 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string>
+#include <vector>
+#include <utility>
+#include <map>
+#include <algorithm>
+
+#include "statistics_report.hpp"
+
+void StatisticsReport::add(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &pmStat, const double &latency) {
+    if (_config.niter > 0 && _config.niter == _performanceCounters.size()) {
+        // do not add elements for the adittionaly  executed requests.
+        return;
+    }
+
+    _latencies.push_back(latency);
+    if (_config.report_type == medianCntReport || _config.report_type == detailedCntReport) {
+        // collect per-iteration statistics only in case of enabled median/detailed statistic collecting
+        _performanceCounters.push_back(pmStat);
+    }
+}
+
+void StatisticsReport::dump(const double &fps, const size_t &numProcessedReq, const double &totalExecTime) {
+    if (_config.report_type.empty()) {
+        slog::info << "Statistics collecting was not requested. No reports are dumped." << slog::endl;
+        return;
+    }
+
+    size_t numMeasuredReq = numProcessedReq;
+    if (_config.api == "async" && _config.niter > 0) {
+        // in this case number of processed requests is higher than the value of -niter option.
+        // but we need to handle statistics for -niter number of requests only
+        numMeasuredReq = _config.niter;
+    }
+
+    std::string separator =
+#if defined _WIN32 || defined __CYGWIN__
+    #   if defined UNICODE
+        L"\\";
+    #   else
+        "\\";
+    #   endif
+#else
+        "/";
+#endif
+    if (_config.report_folder.empty())
+        separator = "";
+
+    CsvDumper dumper(true, _config.report_folder + separator + "benchmark_" + _config.report_type + "_report.csv");
+
+    // resulting number of columns in csv file depends on the report_type. If it's noCntReport, then
+    // no PM data is collected and there are only 3 columns in the file (in configuration section). If it's
+    // medianCntReport then median PM values are collected per each layer and the number of columns is 6.
+    // Example from GPU:
+    //
+    // layer name;exec status;layer type;exec type;real time;cpu time;
+    // conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;615;3;
+    // Here, all the data are taken from InferenceEngine::InferenceEngineProfileInfo.
+    //
+    // In case of detailedCntReport the number of columns is 4 + numMeasuredReq * 2, because first 4 parameters
+    // are the same but realTime and cpuTime can be different on each iteration (example from 5 GPU requests):
+    // conv1;EXECUTED;Convolution;convolution_gpu_bfyx_gemm_like;630,3;617,3;616,3;615,3;617,3;
+    size_t numOfColumns = 0;
+    if (_config.report_type == noCntReport) {
+        numOfColumns = 3;
+    } else if (_config.report_type == medianCntReport) {
+        numOfColumns = 6;
+    } else {
+        // for detailedCntReport
+        numOfColumns = 4 + numMeasuredReq * 2;
+    }
+
+    auto completeCsvRow = [](CsvDumper &dumper, size_t numOfColumns, size_t filled) {
+        for (size_t i = 0; i < numOfColumns - filled; i++)
+            dumper << "";
+        dumper.endLine();
+    };
+
+    // dump execution configuration
+    dumper << "Configuration setup";
+    completeCsvRow(dumper, numOfColumns, 1);
+    dumper << "config option" << "CLI parameter" << "value";
+    completeCsvRow(dumper, numOfColumns, 3);
+
+    dumper << "target device" << " -d" << _config.device;
+    completeCsvRow(dumper, numOfColumns, 3);
+    dumper << "execution mode" << " -api" << _config.api;
+    completeCsvRow(dumper, numOfColumns, 3);
+    dumper << "batch size" << " -b" << _config.batch;
+    completeCsvRow(dumper, numOfColumns, 3);
+    dumper << "number of iterations" << " -niter" << _config.niter;
+    completeCsvRow(dumper, numOfColumns, 3);
+    dumper << "number of parallel infer requests" << " -nireq" << _config.nireq;
+    completeCsvRow(dumper, numOfColumns, 3);
+    dumper << "number of CPU threads" << " -nthreads" << _config.cpu_nthreads;
+    completeCsvRow(dumper, numOfColumns, 3);
+    dumper << "CPU pinning enabled" << " -pin" << _config.cpu_pin;
+    completeCsvRow(dumper, numOfColumns, 3);
+
+    dumper.endLine();
+
+    // write PM data from each iteration
+    if (!_performanceCounters.empty()) {
+        if (_config.report_type != medianCntReport && _config.report_type != detailedCntReport) {
+            throw std::logic_error("PM data should only be collected for median or detailed report types");
+        }
+
+        // this vector is sorted according to network layers execution order.
+        auto performanceMapSorted = preparePmStatistics();
+
+        dumper << "Performance counters";
+        completeCsvRow(dumper, numOfColumns, 1);
+        dumper << "layer name" << "exec status" << "layer type" << "exec type";
+
+        if (_config.report_type == medianCntReport) {
+            dumper << "median real time" << "median cpu time";
+            completeCsvRow(dumper, numOfColumns, 6);
+        } else {
+            // detailedCntReport case
+            for (size_t i = 0; i< _performanceCounters.size(); i++) {
+                dumper << "realTime_iter" + std::to_string(i) << "cpuTime_iter" + std::to_string(i);
+            }
+            completeCsvRow(dumper, numOfColumns, 4 + _performanceCounters.size() * 2);
+        }
+
+        for (const auto &layer : performanceMapSorted) {
+            dumper << layer.first;  // layer name
+            switch (layer.second.status) {
+                case InferenceEngine::InferenceEngineProfileInfo::EXECUTED:
+                    dumper << "EXECUTED";
+                    break;
+                case InferenceEngine::InferenceEngineProfileInfo::NOT_RUN:
+                    dumper << "NOT_RUN";
+                    break;
+                case InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT:
+                    dumper << "OPTIMIZED_OUT";
+                    break;
+            }
+            dumper << layer.second.layer_type << layer.second.exec_type;
+
+            if (_config.report_type == medianCntReport) {
+                // write median realTime and cpuTime from each processed request for current layer
+                dumper <<
+                std::to_string(getMedianValue<long long>(_perLayerRealTime[layer.first]) / 1000.0) <<
+                std::to_string(getMedianValue<long long>(_perLayerCpuTime[layer.first]) / 1000.0);
+            } else {
+                // write all realTime and cpuTime from each processed request for current layer
+                for (size_t i = 0; i < numMeasuredReq; i++) {
+                    dumper << std::to_string(_perLayerRealTime[layer.first][i] / 1000.0) << std::to_string(_perLayerCpuTime[layer.first][i] / 1000.0);
+                }
+            }
+            dumper.endLine();
+        }
+        dumper.endLine();
+    }
+
+    if (_config.report_type == detailedCntReport) {
+        dumper << "Statistics";
+        completeCsvRow(dumper, numOfColumns, 1);
+
+        dumper << "metric";
+        for (size_t i = 0; i < _latencies.size(); i++) {
+            // detailedCntReport case
+            dumper << "iter" + std::to_string(i);
+        }
+        completeCsvRow(dumper, numOfColumns, 4 + _latencies.size());
+        dumper << "latencies";
+        for (const auto &lat : _latencies) {
+            dumper << lat;
+        }
+        completeCsvRow(dumper, numOfColumns, _latencies.size());
+        dumper.endLine();
+    }
+
+    dumper << "Execution results";
+    completeCsvRow(dumper, numOfColumns, 1);
+    dumper << "number of measured infer requests" << numMeasuredReq;
+    completeCsvRow(dumper, numOfColumns, 2);
+    dumper << "latency" << getMedianValue<double>(_latencies);
+    completeCsvRow(dumper, numOfColumns, 2);
+    dumper << "throughput" << fps;
+    completeCsvRow(dumper, numOfColumns, 2);
+    dumper << "total execution time" << totalExecTime;
+    completeCsvRow(dumper, numOfColumns, 2);
+
+    slog::info << "statistics report is stored to " << dumper.getFilename() << slog::endl;
+}
+
+double StatisticsReport::getMedianLatency() {
+    return getMedianValue<double>(_latencies);
+}
+
+std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>> StatisticsReport::preparePmStatistics() {
+    if (_performanceCounters.empty()) {
+        throw std::logic_error("preparePmStatistics() was called when no PM data was collected");
+    }
+
+    // sort PM data of first processed request according to layers execution order
+    auto performanceMapSorted = perfCountersSorted(_performanceCounters[0]);
+
+    // iterate over each processed infer request and handle its PM data
+    for (auto &pm : _performanceCounters) {
+        // iterate over each layer from sorted vector and add required PM data to the per-layer maps
+        for (const auto & it : performanceMapSorted) {
+            _perLayerRealTime[it.first].push_back(pm[it.first].realTime_uSec);
+            _perLayerCpuTime[it.first].push_back(pm[it.first].cpu_uSec);
+        }
+    }
+    return performanceMapSorted;
+}
+
+template <typename T>
+T StatisticsReport::getMedianValue(const std::vector<T> &vec) {
+    std::vector<T> sortedVec(vec);
+    std::sort(sortedVec.begin(), sortedVec.end());
+    return (sortedVec.size() % 2 != 0) ?
+           sortedVec[sortedVec.size() / 2ULL] :
+           (sortedVec[sortedVec.size() / 2ULL] + sortedVec[sortedVec.size() / 2ULL - 1ULL]) / static_cast<T>(2.0);
+}
diff --git a/inference-engine/samples/benchmark_app/statistics_report.hpp b/inference-engine/samples/benchmark_app/statistics_report.hpp
new file mode 100644
index 000000000..248d7cd8c
--- /dev/null
+++ b/inference-engine/samples/benchmark_app/statistics_report.hpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <utility>
+#include <map>
+
+#include <inference_engine.hpp>
+#include <samples/common.hpp>
+#include <samples/slog.hpp>
+#include <samples/csv_dumper.hpp>
+
+// @brief statistics reports types
+static constexpr char noCntReport[] = "no_counters";
+static constexpr char medianCntReport[] = "median_counters";
+static constexpr char detailedCntReport[] = "detailed_counters";
+
+/// @brief Responsible for collecting of statistics and dumping to .csv file
+class StatisticsReport {
+public:
+    struct Config {
+        std::string device;
+        std::string api;
+        size_t batch;
+        size_t nireq;
+        size_t niter;
+        size_t cpu_nthreads;
+        std::string cpu_pin;
+        std::string report_type;
+        std::string report_folder;
+    };
+
+    explicit StatisticsReport(Config config) : _config(std::move(config)) {
+        if (_config.niter > 0) {
+            _performanceCounters.reserve(_config.niter);
+        }
+    }
+
+    void add(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &pmStat, const double &latency);
+
+    void dump(const double &fps, const size_t &numProcessedReq, const double &totalExecTime);
+
+    double getMedianLatency();
+
+private:
+    std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>> preparePmStatistics();
+
+    template <typename T>
+    T getMedianValue(const std::vector<T> &vec);
+
+    // Contains PM data for each processed infer request
+    std::vector<std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>> _performanceCounters;
+    // Contains latency of each processed infer request
+    std::vector<double> _latencies;
+
+    // configuration of current benchmark execution
+    const Config _config;
+
+    // mapping from network layer to a vector of calculated RealTime values from each processed infer request.
+    std::map<std::string, std::vector<long long>> _perLayerRealTime;
+    // mapping from network layer to a vector of calculated CPU Time values from each processed infer request.
+    std::map<std::string, std::vector<long long>> _perLayerCpuTime;
+};
diff --git a/inference-engine/samples/build_samples.sh b/inference-engine/samples/build_samples.sh
deleted file mode 100644
index f531f916b..000000000
--- a/inference-engine/samples/build_samples.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/bin/bash
-
-# Copyright (c) 2018 Intel Corporation
-# 
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-# 
-#       http://www.apache.org/licenses/LICENSE-2.0
-# 
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-error() {
-    local code="${3:-1}"
-    if [[ -n "$2" ]];then
-        echo "Error on or near line $1: $2; exiting with status ${code}"
-    else
-        echo "Error on or near line $1; exiting with status ${code}"
-    fi
-    exit "${code}" 
-}
-trap 'error ${LINENO}' ERR
-
-SAMPLES_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-if [[ -z "${InferenceEngine_DIR}" ]]; then
-    printf "\nInferenceEngine_DIR environment variable is not set. Trying to find setupvars.sh to set it. \n"
-    
-    setvars_path=$SAMPLES_PATH/../..
-    if [ -e "$setvars_path/inference_engine/bin/setvars.sh" ]; then # for Intel Deep Learning Deployment Toolkit package
-        setvars_path="$setvars_path/inference_engine/bin/setvars.sh"
-    elif [ -e "$setvars_path/../bin/setupvars.sh" ]; then # for OpenVINO package
-        setvars_path="$setvars_path/../bin/setupvars.sh"
-    elif [ -e "$setvars_path/../setupvars.sh" ]; then
-        setvars_path="$setvars_path/../setupvars.sh"
-    else
-        printf "Error: setupvars.sh is not found in hardcoded paths. \n\n"
-        exit 1
-    fi 
-    if ! source $setvars_path ; then
-        printf "Unable to run ./setupvars.sh. Please check its presence. \n\n"
-        exit 1
-    fi
-fi
-
-if ! command -v cmake &>/dev/null; then
-    printf "\n\nCMAKE is not installed. It is required to build Inference Engine samples. Please install it. \n\n"
-    exit 1
-fi
-
-build_dir=$HOME/inference_engine_samples_build
-mkdir -p $build_dir
-cd $build_dir
-cmake -DCMAKE_BUILD_TYPE=Release $SAMPLES_PATH
-make -j8
-
-printf "\nBuild completed, you can find binaries for all samples in the $HOME/inference_engine_samples_build/intel64/Release subfolder.\n\n"
diff --git a/inference-engine/samples/calibration_tool/CMakeLists.txt b/inference-engine/samples/calibration_tool/CMakeLists.txt
index f69a6e7aa..c65433698 100644
--- a/inference-engine/samples/calibration_tool/CMakeLists.txt
+++ b/inference-engine/samples/calibration_tool/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "calibration_tool")
 
 file (GLOB MAIN_SRC
diff --git a/inference-engine/samples/calibration_tool/README.md b/inference-engine/samples/calibration_tool/README.md
index f40c671f3..6e075597b 100644
--- a/inference-engine/samples/calibration_tool/README.md
+++ b/inference-engine/samples/calibration_tool/README.md
@@ -3,12 +3,14 @@
 Inference Engine Calibration Tool calibrates a given FP32 model so that is can be run in low-precision 8-bit integer
 mode while keeping the input data of this model in the original precision.
 
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
 ## Calibration Tool Options
 
 The core command-line options for the Calibration Tool are the same as for
-[Validation Application](./samples/validation_app/README.md). However, the Calibration Tool has the following specific options: `-t`, `-subset`, `-output`, and `-threshold`.
+[Validation Application](./inference-engine/samples/validation_app/README.md). However, the Calibration Tool has the following specific options: `-t`, `-subset`, `-output`, and `-threshold`.
 
-Running the Calibration Tool with the `-h` option yields the following usage message with all CLI options listed:
+Running the Calibration Tool with the `-h` option yields the following usage message:
 ```sh  
 Usage: calibration_tool [OPTION]
 
@@ -25,7 +27,7 @@ Available options:
     -lbl <path>               Labels file path. The labels file contains names of the dataset classes
     -l <absolute_path>        Required for CPU custom layers. Absolute path to a shared library with the kernel implementations.
     -c <absolute_path>        Required for GPU custom kernels. Absolute path to an .xml file with the kernel descriptions.
-    -d <device>               Target device to infer on: CPU (default), GPU, FPGA, or MYRIAD. The application looks for a suitable plugin for the specified device.
+    -d <device>               Target device to infer on: CPU (default), GPU, FPGA, HDDL or MYRIAD. The application looks for a suitable plugin for the specified device.
     -b N                      Batch size value. If not specified, the batch size value is taken from IR
     -ppType <type>            Preprocessing type. Options: "None", "Resize", "ResizeCrop"
     -ppSize N                 Preprocessing size (used with ppType="ResizeCrop")
@@ -35,7 +37,7 @@ Available options:
     -subset                   Number of pictures from the whole validation set tocreate the calibration dataset. Default value is 0, which stands forthe whole provided dataset
     -output <output_IR>       Output name for calibrated model. Default is <original_model_name>_i8.xml|bin
     -threshold                Threshold for a maximum accuracy drop of quantized model. Must be an integer number (percents) without a percent sign. Default value is 1, which stands for accepted accuracy drop in 1%
-    - stream_output           Flag for printing progress as a plain text.When used, interactive progress bar is replaced with multiline output
+    -stream_output            Flag for printing progress as a plain text.When used, interactive progress bar is replaced with multiline output
 
     Classification-specific options:
       -Czb true               "Zero is a background" flag. Some networks are trained with a modified dataset where the class IDs  are enumerated from 1, but 0 is an undefined "background" class (which is never detected)
@@ -53,6 +55,9 @@ The tool options are divided into two categories:
 2. **Network type-specific options** named as an acronym of the network type (<code>C</code> or <code>OD</code>)
    followed by a letter or a word.
 
+You can run the tool with public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the tool on a trained model, make sure the model is converted to the Inference Engine format (`*.xml` + `*.bin`) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
 
 ## Calibrate a Classification Model
 
@@ -68,7 +73,7 @@ named as labels that contain all images of this class and ImageNet*-like format,
 `.txt` file containing list of images and IDs of classes.
 
 For more information on the structure of the datasets, refer to the **Prepare a Dataset** section of the
-[Validation Application document](./samples/validation_app/README.md).
+[Validation Application document](./inference-engine/samples/validation_app/README.md).
 
 If you decide to use the subset of the given dataset, use the ImageNet-like format
 instead of "folder as classes" format. This brings a more accurate calibration as you are likely to get images
@@ -79,11 +84,9 @@ To run the sample you can use classification models that can be downloaded with
 For example, to calibrate the trained Caffe\* `resnet-50` classification model, run the following command:
 
 ```bash
-./calibration_tool -t C -m resnet-50.xml -i ILSVRC2012_val.txt -Czb false -ppType "ResizeCrop" -ppSize 342 -b 1 -d CPU -subset 2000
+./calibration_tool -t C -m <path_to_model>/resnet-50.xml -i ILSVRC2012_val.txt -Czb false -ppType "ResizeCrop" -ppSize 342 -b 1 -d CPU -subset 2000
 ```
 
-> **NOTE**: To run the tool for a model, the model should be first converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
-
 ## Calibrate Object Detection Model
 
 This topic demonstrates how to run the Calibration Tool on the Object Detection CNN on a set of images. Please
@@ -96,7 +99,7 @@ format as the SSD CNN should be supported as well.
 
 Before you start calibrating the model, make sure your dataset is in the correct format. For more information,
 refer to the **Prepare a Dataset** section of the
-[Validation Application document](./samples/validation_app/README.md).
+[Validation Application document](./inference-engine/samples/validation_app/README.md).
 
 Once you have prepared the dataset, you can calibrate the model on it by running the following command:
 ```bash
@@ -106,3 +109,5 @@ Once you have prepared the dataset, you can calibrate the model on it by running
 ## See Also
 
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
diff --git a/inference-engine/samples/calibration_tool/calibrator_processors.cpp b/inference-engine/samples/calibration_tool/calibrator_processors.cpp
index d4cf7fe37..e6a00b806 100644
--- a/inference-engine/samples/calibration_tool/calibrator_processors.cpp
+++ b/inference-engine/samples/calibration_tool/calibrator_processors.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -12,6 +12,7 @@
 #include <memory>
 #include <utility>
 #include <list>
+#include <limits>
 #include "details/ie_cnn_network_tools.h"
 #include "details/caseless.hpp"
 
@@ -37,7 +38,7 @@ CNNLayerPtr Int8Calibrator::addScaleShiftBeforeLayer(std::string name, CNNLayer:
 
     if (scale.size() == 1) {
         scale.resize(wdims[0]);
-        for (int i = 1; i < wdims[0]; i++) {
+        for (size_t i = 1; i < wdims[0]; i++) {
             scale[i] = scale[0];
         }
     }
@@ -53,7 +54,7 @@ CNNLayerPtr Int8Calibrator::addScaleShiftBeforeLayer(std::string name, CNNLayer:
     if (buffer == nullptr) {
         THROW_IE_EXCEPTION << "Could not allocate weights buffer";
     }
-    for (size_t i = 0, idx = 0; i < pData->dims[2]; i++) {
+    for (size_t i = 0; i < pData->dims[2]; i++) {
         buffer[i] = scale[i];
     }
     pScaleShift->_weights = weights;
@@ -64,7 +65,7 @@ CNNLayerPtr Int8Calibrator::addScaleShiftBeforeLayer(std::string name, CNNLayer:
     biases = make_shared_blob<float>(Precision::FP32, Layout::C, bdims);
     biases->allocate();
     buffer = biases->buffer().as<float *>();
-    for (size_t i = 0, idx = 0; i < pData->dims[2]; i++) {
+    for (size_t i = 0; i < pData->dims[2]; i++) {
         buffer[i] = 0.f;
     }
     pScaleShift->_biases = biases;
@@ -94,7 +95,6 @@ CNNLayerPtr Int8Calibrator::addScaleShiftBeforeLayer(std::string name, CNNLayer:
 
 float Int8Calibrator::compare_NRMSD(InferenceEngine::Blob::Ptr res, InferenceEngine::Blob::Ptr ref) {
     float *res_ptr = res->buffer().as<float *>();
-    size_t res_size = res->size();
 
     float *ref_ptr = ref->buffer().as<float *>();
     size_t ref_size = ref->size();
@@ -111,9 +111,12 @@ float Int8Calibrator::compare_NRMSD(InferenceEngine::Blob::Ptr res, InferenceEng
         mmin = std::min(mmin, ref_ptr[i]);
         mmax = std::max(mmax, ref_ptr[i]);
     }
+    if (std::fabs(ref_size) < std::numeric_limits<double>::epsilon()) {
+        throw std::logic_error("ref_size can't be equal to zero");
+    }
     sum /= ref_size;
 
-    sum = pow(sum, 0.5);
+    sum = pow(sum, 0.5f);
 
     sum /= mmax - mmin;
 
@@ -149,6 +152,9 @@ void Int8Calibrator::collectFP32Statistic() {
     networkReaderC = InferenceEngine::CNNNetReader();
     networkReaderC.ReadNetwork(_modelFileNameI8C);
     if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model";
+    /** Extract model name and load weights **/
+    std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin";
+    networkReaderC.ReadWeights(binFileName.c_str());
     if (_cBatch == 0) {
         // Zero means "take batch value from the IR"
         _cBatch = networkReaderC.getNetwork().getBatchSize();
@@ -163,10 +169,6 @@ void Int8Calibrator::collectFP32Statistic() {
         networkReaderC.getNetwork().reshape(input_shapes);
     }
 
-    /** Extract model name and load weights **/
-    std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin";
-    networkReaderC.ReadWeights(binFileName.c_str());
-
     auto network = networkReaderC.getNetwork();
 
 
@@ -196,10 +198,12 @@ void Int8Calibrator::collectFP32Statistic() {
     // 1. add all layers as output one
     for (auto &&layer : network) {
         std::string layerType = network.getLayerByName(layer->name.c_str())->type;
-        if (/*layerType != "Split" &&*/layerType != "Input") {
-            network.addOutput(layer->name);
+        if (layerType != "Const") {
+            if (/*layerType != "Split" &&*/layerType != "Input") {
+                network.addOutput(layer->name);
+            }
+            _statData.registerLayer(layer->name);
         }
-        _statData.registerLayer(layer->name);
     }
 
     ExecutableNetwork executable_network = _pluginI8C.LoadNetwork(network, { { CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), CONFIG_VALUE(YES) } });
@@ -207,12 +211,16 @@ void Int8Calibrator::collectFP32Statistic() {
 }
 
 void Int8Calibrator::validateInt8Config(const InferenceEngine::NetworkStatsMap &stat,
-                                        const std::map<std::string, bool> &layersToInt8) {
+                                        const std::map<std::string, bool> &layersToInt8,
+                                        bool convertFullyConnected) {
     _collectByLayer = false;
     _collectStatistic = false;
     networkReaderC = InferenceEngine::CNNNetReader();
     networkReaderC.ReadNetwork(_modelFileNameI8C);
     if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model";
+    /** Extract model name and load weights **/
+    std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin";
+    networkReaderC.ReadWeights(binFileName.c_str());
     if (_cBatch == 0) {
         // Zero means "take batch value from the IR"
         _cBatch = networkReaderC.getNetwork().getBatchSize();
@@ -227,10 +235,6 @@ void Int8Calibrator::validateInt8Config(const InferenceEngine::NetworkStatsMap &
         networkReaderC.getNetwork().reshape(input_shapes);
     }
 
-    /** Extract model name and load weights **/
-    std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin";
-    networkReaderC.ReadWeights(binFileName.c_str());
-
     // Initialize statistic
     ICNNNetworkStats *pstats = nullptr;
     StatusCode s = ((ICNNNetwork&)networkReaderC.getNetwork()).getStats(&pstats, nullptr);
@@ -239,6 +243,13 @@ void Int8Calibrator::validateInt8Config(const InferenceEngine::NetworkStatsMap &
     }
 
     auto network = networkReaderC.getNetwork();
+
+    for (auto l : network) {
+        if (l->type == "FullyConnected") {
+            l->params["quantization_level"] = (convertFullyConnected == false) ? "FP32" : "I8";
+        }
+    }
+
     for (auto l : layersToInt8) {
         network.getLayerByName(l.first.c_str())->
             params["quantization_level"] = (l.second == false) ? "FP32" : "I8";
@@ -363,6 +374,9 @@ void Int8Calibrator::collectByLayerStatistic(const InferenceEngine::NetworkStats
     networkReaderC = InferenceEngine::CNNNetReader();
     networkReaderC.ReadNetwork(_modelFileNameI8C);
     if (!networkReaderC.isParseSuccess()) THROW_IE_EXCEPTION << "cannot load a failed Model";
+    /** Extract model name and load weights **/
+    std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin";
+    networkReaderC.ReadWeights(binFileName.c_str());
     if (_cBatch != 0) {
         auto input_shapes = networkReaderC.getNetwork().getInputShapes();
         std::string input_name;
@@ -373,15 +387,11 @@ void Int8Calibrator::collectByLayerStatistic(const InferenceEngine::NetworkStats
         networkReaderC.getNetwork().reshape(input_shapes);
     }
 
-    /** Extract model name and load weights **/
-    std::string binFileName = fileNameNoExt(_modelFileNameI8C) + ".bin";
-    networkReaderC.ReadWeights(binFileName.c_str());
-
     auto network = networkReaderC.getNetwork();
     // 1. add all layers as output one
     for (auto &&layer : network) {
         std::string layerType = network.getLayerByName(layer->name.c_str())->type;
-        if (/*layerType != "Split" &&*/layerType != "Input") {
+        if (/*layerType != "Split" &&*/layerType != "Input" && layerType != "Const") {
             network.addOutput(layer->name);
         }
 
@@ -401,7 +411,6 @@ void Int8Calibrator::collectByLayerStatistic(const InferenceEngine::NetworkStats
         // currently it is only supported
 
         // if only one output from conv and if it is an output to relu
-        bool quattization = false;
         if (layerToClone->outData.size() == 1
             && layerToClone->outData[0]->inputTo.size() == 1
             && CaselessEq<std::string>()(layerToClone->outData[0]->inputTo.begin()->second->name, "relu")) {
@@ -461,16 +470,14 @@ void Int8Calibrator::collectCalibrationStatistic(size_t pics) {
                 outName = _inputsFromLayers[l];
             }
 
-            size_t N, C, statCount;
+            size_t N, C;
             if (outBlob->dims().size() == 4 && outBlob->layout() == Layout::NCHW) {
                 // TODO(amalyshe) cahnge to using of tensor desc
                 N = pics;
                 C = outBlob->dims()[2];
-                statCount = C;
             } else if (outBlob->dims().size() == 2 && outBlob->layout() == Layout::NC) {
                 N = pics;
                 C = outBlob->dims()[0];
-                statCount = 1;
             } else {
                 continue;
             }
@@ -568,10 +575,11 @@ shared_ptr<Processor::InferenceMetrics> ClassificationCalibrator::Process(bool s
         generator.readLabels(labelFileName);
     } catch (InferenceEngine::details::InferenceEngineException& ex) {
         slog::warn << "Can't read labels file " << labelFileName << slog::endl;
+        slog::warn << "Error: " << ex.what() << slog::endl;
     }
     auto validationMap = generator.getValidationMap(imagesPath);
 
-    if (validationMap.size() == 0) {
+    if (validationMap.empty()) {
         THROW_IE_EXCEPTION << "The validation dataset in " << imagesPath << "is empty. Check the dataset file or folder and the labels file";
     }
 
@@ -580,7 +588,6 @@ shared_ptr<Processor::InferenceMetrics> ClassificationCalibrator::Process(bool s
     // ----------------------------Do inference-------------------------------------------------------------
     std::vector<int> expected(batch);
     std::vector<std::string> files(batch);
-    int captured = 0;
 
     if (!_nPictures) {
         _nPictures = validationMap.size();
@@ -599,7 +606,7 @@ shared_ptr<Processor::InferenceMetrics> ClassificationCalibrator::Process(bool s
     size_t ipics = 0;
     auto iter = validationMap.begin();
     while (iter != validationMap.end() && ipics < _nPictures) {
-        int b = 0;
+        size_t b = 0;
         int filesWatched = 0;
         for (; b < batch && iter != validationMap.end() && ipics + b < _nPictures ; b++, iter++, filesWatched++) {
             expected[b] = iter->first;
@@ -608,6 +615,7 @@ shared_ptr<Processor::InferenceMetrics> ClassificationCalibrator::Process(bool s
                 files[b] = iter->second;
             } catch (const InferenceEngineException &iex) {
                 slog::warn << "Can't read file " << iter->second << slog::endl;
+                slog::warn << "Error: " << iex.what() << slog::endl;
                 // Could be some non-image file in directory
                 b--;
                 continue;
@@ -619,12 +627,11 @@ shared_ptr<Processor::InferenceMetrics> ClassificationCalibrator::Process(bool s
         collectCalibrationStatistic(b);
 
         std::vector<unsigned> results;
-        auto firstOutputData = firstOutputBlob->buffer().as<PrecisionTrait<Precision::FP32>::value_type *>();
         InferenceEngine::TopResults(1, *firstOutputBlob, results);
-        for (int i = 0; i < b; i++) {
+        for (size_t i = 0; i < b; i++) {
             int expc = expected[i];
             if (zeroBackground) expc++;
-            bool top1Scored = (results[i] == expc);
+            bool top1Scored = (static_cast<int>(results[i]) == expc);
             if (top1Scored) top1Result++;
             total++;
         }
@@ -633,6 +640,10 @@ shared_ptr<Processor::InferenceMetrics> ClassificationCalibrator::Process(bool s
 
     calculateLayersAccuracyDrop();
 
+    if (total == 0) {
+        throw std::logic_error("total can't be equal to zero");
+    }
+
     im.AccuracyResult = static_cast<float>(top1Result) / static_cast<float>(total);
 
     return std::shared_ptr<Processor::InferenceMetrics>(new CalibrationMetrics(im));
@@ -675,19 +686,14 @@ shared_ptr<Processor::InferenceMetrics> SSDObjectDetectionCalibrator::Process(bo
     for (auto &ann : annCollector.annotations()) {
         std::list<DetectedObject> dobList;
         for (auto &obj : ann.objects) {
-            DetectedObject dob(classes[obj.name], obj.bndbox.xmin, obj.bndbox.ymin, obj.bndbox.xmax, obj.bndbox.ymax, 1.0, obj.difficult != 0);
+            DetectedObject dob(classes[obj.name], static_cast<float>(obj.bndbox.xmin), static_cast<float>(obj.bndbox.ymin),
+                               static_cast<float>(obj.bndbox.xmax), static_cast<float>(obj.bndbox.ymax), 1.0f, obj.difficult != 0);
             dobList.push_back(dob);
         }
         ImageDescription id(dobList);
         desiredForFiles.insert(std::pair<std::string, ImageDescription>(ann.folder + "/" + (!subdir.empty() ? subdir + "/" : "") + ann.filename, id));
     }
 
-
-    ImageDecoder decoder;
-
-    const int maxProposalCount = outputDims[1];
-    const int objectSize = outputDims[0];
-
     for (auto &item : outInfo) {
         DataPtr outputData = item.second;
         if (!outputData) {
@@ -718,18 +724,17 @@ shared_ptr<Processor::InferenceMetrics> SSDObjectDetectionCalibrator::Process(bo
 
     while (iter != annCollector.annotations().end() && ipics < _nPictures) {
         std::vector<std::string> files;
-        int b = 0;
+        size_t b = 0;
 
         int filesWatched = 0;
         for (; b < batch && iter != annCollector.annotations().end(); b++, iter++, filesWatched++) {
             expected[b] = *iter;
             string filename = iter->folder + "/" + (!subdir.empty() ? subdir + "/" : "") + iter->filename;
             try {
-                Size orig_size = decoder.insertIntoBlob(std::string(imagesPath) + "/" + filename, b, *firstInputBlob, preprocessingOptions);
                 float scale_x, scale_y;
 
-                scale_x = 1.0 / iter->size.width;  // orig_size.width;
-                scale_y = 1.0 / iter->size.height;  // orig_size.height;
+                scale_x = 1.0f / iter->size.width;  // orig_size.width;
+                scale_y = 1.0f / iter->size.height;  // orig_size.height;
 
                 if (scaleProposalToInputSize) {
                     scale_x *= firstInputBlob->dims()[0];
@@ -742,6 +747,7 @@ shared_ptr<Processor::InferenceMetrics> SSDObjectDetectionCalibrator::Process(bo
                 files.push_back(filename);
             } catch (const InferenceEngineException &iex) {
                 slog::warn << "Can't read file " << this->imagesPath + "/" + filename << slog::endl;
+                slog::warn << "Error: " << iex.what() << slog::endl;
                 // Could be some non-image file in directory
                 b--;
                 continue;
@@ -749,9 +755,6 @@ shared_ptr<Processor::InferenceMetrics> SSDObjectDetectionCalibrator::Process(bo
             ipics++;
         }
 
-        InferenceEngine::StatusCode sts;
-        InferenceEngine::ResponseDesc dsc;
-
         // Infer model
         Infer(progress, filesWatched, im);
         collectCalibrationStatistic(b);
@@ -761,9 +764,9 @@ shared_ptr<Processor::InferenceMetrics> SSDObjectDetectionCalibrator::Process(bo
 
         // Calculating similarity
         //
-        for (int b = 0; b < files.size(); b++) {
-            ImageDescription result(detectedObjects[files[b]]);
-            im.apc.consumeImage(result, scaledDesiredForFiles.at(files[b]));
+        for (size_t j = 0; j < files.size(); j++) {
+            ImageDescription result(detectedObjects[files[j]]);
+            im.apc.consumeImage(result, scaledDesiredForFiles.at(files[j]));
         }
     }
     progress.finish();
@@ -779,7 +782,7 @@ shared_ptr<Processor::InferenceMetrics> SSDObjectDetectionCalibrator::Process(bo
         for (auto i : appc) {
             mAP += i.second;
         }
-        imCalibration.AccuracyResult = mAP / appc.size();
+        imCalibration.AccuracyResult = static_cast<float>(mAP / appc.size());
     }
     return std::shared_ptr<Processor::InferenceMetrics>(new CalibrationMetrics(imCalibration));
 }
diff --git a/inference-engine/samples/calibration_tool/calibrator_processors.h b/inference-engine/samples/calibration_tool/calibrator_processors.h
index 05e7c1ec4..fdcfc126d 100644
--- a/inference-engine/samples/calibration_tool/calibrator_processors.h
+++ b/inference-engine/samples/calibration_tool/calibrator_processors.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -66,9 +66,11 @@ public:
      * @param stat - The statistic for normalization
      * @param layersToInt8 - list of layers planned to be executed in int8. if layer is absent in this
      *                     map, it is assumed that it will be executed in int8
+     * @param convertFullyConnected - should the FullyConnected layers be converted into Int8 or not
      */
     void validateInt8Config(const InferenceEngine::NetworkStatsMap &stat,
-                                    const std::map<std::string, bool>& layersToInt8);
+                                    const std::map<std::string, bool>& layersToInt8,
+                                    bool convertFullyConnected);
 
     /**
      * Statistic collected in the collectFP32Statistic is processed with threshold passed as a parameter
@@ -105,7 +107,7 @@ protected:
     InferenceEngine::InferRequest _inferRequestI8C;
     int _cBatch = 0;
 
-    int _nPictures;
+    size_t _nPictures;
 
 private:
     /**
diff --git a/inference-engine/samples/calibration_tool/data_stats.cpp b/inference-engine/samples/calibration_tool/data_stats.cpp
index ba17e55ec..ecee50b15 100644
--- a/inference-engine/samples/calibration_tool/data_stats.cpp
+++ b/inference-engine/samples/calibration_tool/data_stats.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -90,7 +90,7 @@ void AggregatedDataStats::getDataMinMax(const std::string& name, size_t channel,
             minValues.push_back(tsS.getMinValue());
         }
         // define number of elements to throw out
-        size_t elementToTake = maxValues.size() * threshold / 100;
+        size_t elementToTake = static_cast<size_t>(maxValues.size() * (threshold / 100));
         int elementsToThrow = maxValues.size() - elementToTake;
         std::sort(maxValues.begin(), maxValues.end());
         std::sort(minValues.begin(), minValues.end());
diff --git a/inference-engine/samples/calibration_tool/data_stats.h b/inference-engine/samples/calibration_tool/data_stats.h
index 0d8b4de5f..9f2c375e1 100644
--- a/inference-engine/samples/calibration_tool/data_stats.h
+++ b/inference-engine/samples/calibration_tool/data_stats.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/calibration_tool/main.cpp b/inference-engine/samples/calibration_tool/main.cpp
index cd0101452..90ee2b0b9 100644
--- a/inference-engine/samples/calibration_tool/main.cpp
+++ b/inference-engine/samples/calibration_tool/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -38,8 +38,6 @@ using namespace InferenceEngine::details;
 
 using InferenceEngine::details::InferenceEngineException;
 
-#define DEFAULT_PATH_P "./lib"
-
 /// @brief Message for help argument
 static const char help_message[] = "Print a help message";
 /// @brief Message for images argument
@@ -56,7 +54,7 @@ static const char model_message[] = "Required. Path to an .xml file with a train
 static const char plugin_message[] = "Plugin name. For example, CPU. If this parameter is passed, "
                                      "the sample looks for a specified plugin only.";
 /// @brief Message for assigning cnn calculation to device
-static const char target_device_message[] = "Target device to infer on: CPU (default), GPU, FPGA, or MYRIAD."
+static const char target_device_message[] = "Target device to infer on: CPU (default), GPU, FPGA, HDDL or MYRIAD."
                                             " The application looks for a suitable plugin for the specified device.";
 /// @brief Message for label argument
 static const char label_message[] = "Path to a file with labels for a model";
@@ -99,9 +97,12 @@ static const char zero_background_message[] = "\"Zero is a background\" flag. So
                                               " are enumerated from 1, but 0 is an undefined \"background\" class"
                                               " (which is never detected)";
 
-static const char stream_output_message[] = "Flag for printing progress as a plain text.When used, interactive progress"
+static const char stream_output_message[] = "Flag for printing progress as a plain text. When used, interactive progress"
                                             " bar is replaced with multiline output";
 
+static const char convert_fc_message[] = "Convert FullyConnected layers to Int8 or not (false by default)";
+
+
 /// @brief Network type options and their descriptions
 static const char* types_descriptions[][2] = {
     { "C", "calibrate Classification network and write the calibrated network to IR" },
@@ -139,7 +140,7 @@ DEFINE_string(p, "", plugin_message);
 DEFINE_string(OCl, "", label_message);
 /// @brief Define parameter for a path to plugins <br>
 /// Default is ./lib
-DEFINE_string(pp, DEFAULT_PATH_P, plugin_path_message);
+DEFINE_string(pp, "", plugin_path_message);
 /// @brief Define paraneter for a target device to infer on <br>
 DEFINE_string(d, "CPU", target_device_message);
 /// @brief Define parameter for batch size <br>
@@ -189,6 +190,8 @@ DEFINE_string(output, "", output_model_name);
 
 DEFINE_string(lbl, "", labels_file_message);
 
+DEFINE_bool(convert_fc, false, convert_fc_message);
+
 /**
  * @brief This function shows a help message
  */
@@ -250,7 +253,8 @@ std::string strtolower(const std::string& s) {
 void SaveCalibratedIR(const std::string &originalName,
                       const std::string &outModelName,
                       const std::map<std::string, bool>& layersToInt8,
-                      const InferenceEngine::NetworkStatsMap& statMap) {
+                      const InferenceEngine::NetworkStatsMap& statMap,
+                      bool convertFullyConnected) {
     slog::info << "Layers profile for Int8 quantization\n";
     CNNNetReader networkReader;
     networkReader.ReadNetwork(originalName);
@@ -271,6 +275,14 @@ void SaveCalibratedIR(const std::string &originalName,
                 layer->params["quantization_level"] = "I8";
                 std::cout << layer->name << ": " << "I8" << std::endl;
             }
+        } else if (CaselessEq<std::string>()(layer->type, "fullyconnected")) {
+            if (!convertFullyConnected) {
+                layer->params["quantization_level"] = "FP32";
+                std::cout << layer->name << ": " << "FP32" << std::endl;
+            } else {
+                layer->params["quantization_level"] = "I8";
+                std::cout << layer->name << ": " << "I8" << std::endl;
+            }
         }
     }
 
@@ -340,7 +352,7 @@ int main(int argc, char *argv[]) {
         // ---------------------Loading plugin for Inference Engine------------------------------------------------
         slog::info << "Loading plugin" << slog::endl;
         /** Loading the library with extensions if provided**/
-        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp, "../../../lib/intel64", "" }).getPluginByDevice(FLAGS_d);
+        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
 
         /** Loading default extensions **/
         if (FLAGS_d.find("CPU") != std::string::npos) {
@@ -436,7 +448,7 @@ int main(int argc, char *argv[]) {
             for (float threshold = 100.0f; threshold > 95.0f; threshold -= 0.5) {
                 std::cout << "Validate int8 accuracy, threshold for activation statistics = " << threshold << std::endl;
                 InferenceEngine::NetworkStatsMap tmpStatMap = calibrator->getStatistic(threshold);
-                calibrator->validateInt8Config(tmpStatMap, {});
+                calibrator->validateInt8Config(tmpStatMap, {}, FLAGS_convert_fc);
                 shared_ptr<Processor::InferenceMetrics> pIM_I8 = processor->Process(FLAGS_stream_output);
                 const CalibrationMetrics *mI8 = dynamic_cast<const CalibrationMetrics *>(pIM_I8.get());
                 if (maximalAccuracy < mI8->AccuracyResult) {
@@ -472,7 +484,7 @@ int main(int argc, char *argv[]) {
                 while (it != orderedLayersAccuracyDrop.crend() && bAccuracy == false) {
                     slog::info << "Returning of '" << it->second << "' to FP32 precision, start validation\n";
                     layersToInt8[it->second] = false;
-                    calibrator->validateInt8Config(statMap, layersToInt8);
+                    calibrator->validateInt8Config(statMap, layersToInt8, FLAGS_convert_fc);
                     pIM_I8 = processor->Process(FLAGS_stream_output);
                     mI8 = dynamic_cast<const CalibrationMetrics *>(pIM_I8.get());
                     maximalAccuracy = mI8->AccuracyResult;
@@ -494,7 +506,7 @@ int main(int argc, char *argv[]) {
                     "current Int8 configuration accuracy: " << OUTPUT_FLOATING(100.0 * maximalAccuracy) << "% " <<
                     "with threshold for activation statistic: " << bestThreshold << "%" << std::endl;
                 std::string outModelName = FLAGS_output.empty() ? fileNameNoExt(FLAGS_m) + "_i8" : fileNameNoExt(FLAGS_output);
-                SaveCalibratedIR(FLAGS_m, outModelName, layersToInt8, statMap);
+                SaveCalibratedIR(FLAGS_m, outModelName, layersToInt8, statMap, FLAGS_convert_fc);
             } else {
                 slog::info << "Required threshold of accuracy drop cannot be achieved with any int8 quantization\n";
             }
@@ -502,7 +514,7 @@ int main(int argc, char *argv[]) {
             std::cout << "Collected activation statistics, writing maximum values to IR" << std::endl;
             statMap = calibrator->getStatistic(100.0f);
             std::string outModelName = FLAGS_output.empty() ? fileNameNoExt(FLAGS_m) + "_i8" : fileNameNoExt(FLAGS_output);
-            SaveCalibratedIR(FLAGS_m, outModelName, layersToInt8, statMap);
+            SaveCalibratedIR(FLAGS_m, outModelName, layersToInt8, statMap, FLAGS_convert_fc);
         }
 
         if (dumper.dumpEnabled()) {
@@ -521,7 +533,6 @@ int main(int argc, char *argv[]) {
             showUsage();
             return ex.list().begin()->exitCode();
         } else {
-            const char* s = ex.what();
             slog::err << "Input problems: \n" << ex.what() << slog::endl;
             showUsage();
             return ex.list().begin()->exitCode();
diff --git a/inference-engine/samples/classification_sample/CMakeLists.txt b/inference-engine/samples/classification_sample/CMakeLists.txt
index 4c80190a4..1dab0c94f 100644
--- a/inference-engine/samples/classification_sample/CMakeLists.txt
+++ b/inference-engine/samples/classification_sample/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "classification_sample")
 
 file (GLOB SRC
diff --git a/inference-engine/samples/classification_sample/README.md b/inference-engine/samples/classification_sample/README.md
index 26e943b56..348e90f42 100644
--- a/inference-engine/samples/classification_sample/README.md
+++ b/inference-engine/samples/classification_sample/README.md
@@ -1,14 +1,23 @@
-# Image Classification Sample
+# Image Classification C++ Sample
 
-This topic demonstrates how to run the Image Classification sample application, which performs 
+This topic demonstrates how to run the Image Classification sample application, which performs
 inference using image classification networks such as AlexNet and GoogLeNet.
 
-## Running
+> **NOTE:** This topic describes usage of C++ implementation of the Image Classification Sample. For the Python* implementation, refer to [Image Classification Python* Sample](./inference-engine/ie_bridges/python/sample/classification_sample/README.md).
+
+## How It Works
+
+Upon the start-up, the sample application reads command line parameters and loads a network and an image to the Inference
+Engine plugin. When inference is done, the application creates an
+output image and outputs data to the standard output stream.
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
 
-Running the application with the <code>-h</code> option yields the following usage message:
+## Running
+Running the application with the `-h` option yields the following usage message:
 ```sh
 ./classification_sample -h
-InferenceEngine: 
+InferenceEngine:
     API version ............ <version>
     Build .................. <number>
 
@@ -19,13 +28,13 @@ Options:
     -i "<path1>" "<path2>"    Required. Path to a folder with images or path to an image files: a .ubyte file for LeNet
                               and a .bmp file for the other networks.
     -m "<path>"               Required. Path to an .xml file with a trained model.
-        -l "<absolute_path>"  Optional. Absolute path to library with MKL-DNN (CPU) custom layers (*.so).
+        -l "<absolute_path>"  Required for CPU custom layers. Absolute path to a shared library with the kernels implementations.
         Or
-        -c "<absolute_path>"  Optional. Absolute path to clDNN (GPU) custom layers config (*.xml).
+        -c "<absolute_path>"  Required for GPU custom kernels. Absolute path to the .xml file with the kernels descriptions.
     -pp "<path>"              Path to a plugin folder.
-    -d "<device>"             Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified
-    -nt "<integer>"           Number of top results (default 10)
-    -ni "<integer>"           Number of iterations (default 1)
+    -d "<device>"             Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified
+    -nt "<integer>"           Number of top results. Default value is 10
+    -ni "<integer>"           Number of iterations. Default value is 1
     -pc                       Enables per-layer performance report
     -p_msg                    Enables messages from a plugin
 
@@ -33,32 +42,27 @@ Options:
 
 Running the application with the empty list of options yields the usage message given above.
 
-To run the sample you can use AlexNet and GoogLeNet models that can be downloaded with the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or other image classification models. 
+To run the sample, you can use AlexNet and GoogLeNet or other public or pre-trained image classification models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
 
-> **IMPORTANT**: To run the sample, the model should be first converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
 
-For example, to perform inference of an AlexNet model (previously converted to the Inference Engine format) on CPU, use the following command:
+For example, to perform inference of an AlexNet model on CPU, use the following command:
 
 ```sh
 ./classification_sample -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml
 ```
 
-### Outputs
+## Demo Output
+
+By default the application outputs top-10 inference results.
+Add the `-nt` option to the previous command to modify the number of top output results.
 
-By default the application outputs top-10 inference results. 
-Add the <code>-nt</code> option to the previous command to modify the number of top output results.
-<br>For example, to get the top-5 results on Intel&reg; HD Graphics, use the following commands:
+For example, to get the top-5 results on GPU, use the following commands:
 ```sh
 ./classification_sample -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml -nt 5 -d GPU
 ```
 
-### How it works
-
-Upon the start-up the sample application reads command line parameters and loads a network and an image to the Inference 
-Engine plugin. When inference is done, the application creates an 
-output image and outputs data to the standard output stream.
-
-## See Also 
+## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
-* [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
-* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
-\ No newline at end of file
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
diff --git a/inference-engine/samples/classification_sample/classification_sample.h b/inference-engine/samples/classification_sample/classification_sample.h
index 9bf4a61a9..7b84e6ac1 100644
--- a/inference-engine/samples/classification_sample/classification_sample.h
+++ b/inference-engine/samples/classification_sample/classification_sample.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -29,25 +29,25 @@ static const char plugin_path_message[] = "Path to a plugin folder.";
 static const char model_message[] = "Required. Path to an .xml file with a trained model.";
 
 /// @brief message for assigning cnn calculation to device
-static const char target_device_message[] = "Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. " \
+static const char target_device_message[] = "Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. " \
                                             "Sample will look for a suitable plugin for device specified (CPU by default)";
 
 /// @brief message for performance counters
 static const char performance_counter_message[] = "Enables per-layer performance report";
 
 /// @brief message for top results number
-static const char ntop_message[] = "Number of top results (default 10)";
+static const char ntop_message[] = "Number of top results. Default value is 10";
 
 /// @brief message for iterations count
-static const char iterations_count_message[] = "Number of iterations (default 1)";
+static const char iterations_count_message[] = "Number of iterations. Default value is 1";
 
 /// @brief message for clDNN custom kernels desc
-static const char custom_cldnn_message[] = "Required for clDNN (GPU)-targeted custom kernels."\
-                                            "Absolute path to the xml file with the kernels desc.";
+static const char custom_cldnn_message[] = "Required for GPU custom kernels. "\
+                                            "Absolute path to the .xml file with the kernels descriptions.";
 
 /// @brief message for user library argument
-static const char custom_cpu_library_message[] = "Required for MKLDNN (CPU)-targeted custom layers." \
-                                                 "Absolute path to a shared library with the kernels impl.";
+static const char custom_cpu_library_message[] = "Required for CPU custom layers. " \
+                                                 "Absolute path to a shared library with the kernels implementations.";
 
 /// @brief message for plugin messages
 static const char plugin_message[] = "Enables messages from a plugin";
@@ -70,7 +70,7 @@ DEFINE_string(pp, "", plugin_path_message);
 DEFINE_string(d, "CPU", target_device_message);
 
 /// @brief Top results number (default 10) <br>
-DEFINE_int32(nt, 10, ntop_message);
+DEFINE_uint32(nt, 10, ntop_message);
 
 /// @brief Enable per-layer performance report
 DEFINE_bool(pc, false, performance_counter_message);
@@ -84,7 +84,7 @@ DEFINE_string(c, "", custom_cldnn_message);
 DEFINE_string(l, "", custom_cpu_library_message);
 
 /// @brief Iterations count (default 1)
-DEFINE_int32(ni, 1, iterations_count_message);
+DEFINE_uint32(ni, 1, iterations_count_message);
 
 /// @brief Enable plugin messages
 DEFINE_bool(p_msg, false, plugin_message);
diff --git a/inference-engine/samples/classification_sample/main.cpp b/inference-engine/samples/classification_sample/main.cpp
index bf2941548..422e737a5 100644
--- a/inference-engine/samples/classification_sample/main.cpp
+++ b/inference-engine/samples/classification_sample/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -7,6 +7,7 @@
 #include <chrono>
 #include <memory>
 #include <string>
+#include <limits>
 
 #include <inference_engine.hpp>
 #include <ext_list.hpp>
@@ -15,6 +16,7 @@
 #include <samples/common.hpp>
 #include <samples/slog.hpp>
 #include <samples/args_helper.hpp>
+#include <samples/classification_results.h>
 
 #include "classification_sample.h"
 
@@ -68,7 +70,7 @@ int main(int argc, char *argv[]) {
 
         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
         slog::info << "Loading plugin" << slog::endl;
-        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp, "../../../lib/intel64" , "" }).getPluginByDevice(FLAGS_d);
+        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
         if (FLAGS_p_msg) {
             static_cast<InferenceEngine::InferenceEnginePluginPtr>(plugin)->SetLogCallback(error_listener);
         }
@@ -242,7 +244,7 @@ int main(int argc, char *argv[]) {
 
         double total = 0.0;
         /** Start inference & calc performance **/
-        for (int iter = 0; iter < FLAGS_ni; ++iter) {
+        for (size_t iter = 0; iter < FLAGS_ni; ++iter) {
             auto t0 = Time::now();
             infer_request.Infer();
             auto t1 = Time::now();
@@ -256,24 +258,16 @@ int main(int argc, char *argv[]) {
         slog::info << "Processing output blobs" << slog::endl;
 
         const Blob::Ptr output_blob = infer_request.GetBlob(firstOutputName);
-        auto output_data = output_blob->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
 
         /** Validating -nt value **/
-        const int resultsCnt = output_blob->size() / batchSize;
+        const size_t resultsCnt = output_blob->size() / batchSize;
         if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
             slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \
                       << resultsCnt+1 << " and more than 0)\n            will be used maximal value : " << resultsCnt;
             FLAGS_nt = resultsCnt;
         }
 
-        /** This vector stores id's of top N results **/
-        std::vector<unsigned> results;
-        TopResults(FLAGS_nt, *output_blob, results);
-
-        std::cout << std::endl << "Top " << FLAGS_nt << " results:" << std::endl << std::endl;
-
         /** Read labels from file (e.x. AlexNet.labels) **/
-        bool labelsEnabled = false;
         std::string labelFileName = fileNameNoExt(FLAGS_m) + ".labels";
         std::vector<std::string> labels;
 
@@ -285,26 +279,17 @@ int main(int argc, char *argv[]) {
                 trim(strLine);
                 labels.push_back(strLine);
             }
-            labelsEnabled = true;
         }
 
-        /** Print the result iterating over each batch **/
-        for (int image_id = 0; image_id < batchSize; ++image_id) {
-            std::cout << "Image " << imageNames[image_id] << std::endl << std::endl;
-            for (size_t id = image_id * FLAGS_nt, cnt = 0; cnt < FLAGS_nt; ++cnt, ++id) {
-                std::cout.precision(7);
-                /** Getting probability for resulting class **/
-                const auto result = output_data[results[id] + image_id*(output_blob->size() / batchSize)];
-                std::cout << std::left << std::fixed << results[id] << " " << result;
-                if (labelsEnabled) {
-                    std::cout << " label " << labels[results[id]] << std::endl;
-                } else {
-                    std::cout << " label #" << results[id] << std::endl;
-                }
-            }
-            std::cout << std::endl;
-        }
+        ClassificationResult classificationResult(output_blob, imageNames,
+                                                  batchSize, FLAGS_nt,
+                                                  labels);
+        classificationResult.print();
+
         // -----------------------------------------------------------------------------------------------------
+        if (std::fabs(total) < std::numeric_limits<double>::epsilon()) {
+            throw std::logic_error("total can't be equal to zero");
+        }
         std::cout << std::endl << "total inference time: " << total << std::endl;
         std::cout << "Average running time of one iteration: " << total / static_cast<double>(FLAGS_ni) << " ms" << std::endl;
         std::cout << std::endl << "Throughput: " << 1000 * static_cast<double>(FLAGS_ni) * batchSize / total << " FPS" << std::endl;
diff --git a/inference-engine/samples/classification_sample_async/CMakeLists.txt b/inference-engine/samples/classification_sample_async/CMakeLists.txt
index 96e6e4187..9e37440ba 100644
--- a/inference-engine/samples/classification_sample_async/CMakeLists.txt
+++ b/inference-engine/samples/classification_sample_async/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "classification_sample_async")
 
 file (GLOB SRC
diff --git a/inference-engine/samples/classification_sample_async/README.md b/inference-engine/samples/classification_sample_async/README.md
index 995a5d61e..e5feedf56 100644
--- a/inference-engine/samples/classification_sample_async/README.md
+++ b/inference-engine/samples/classification_sample_async/README.md
@@ -1,24 +1,39 @@
-# Image Classification Sample Async
+# Image Classification C++ Sample Async
 
 This sample demonstrates how to build and execute inference in pipelined mode on example of classifications networks.
 
-The pipelined mode might increase the throghput of the pictures. The latency of one inference will be the same as for syncronious execution.
-<br>
-The throughput is increased due to follow reasons:
-* Some plugins have heterogenity inside themselves. Transferring of data, execution on remote device, doigin pre-processing and post-processing on the host
-* Using of explicit heterogenious plugin with execution of different parts of network on differnt devices
+> **NOTE:** This topic describes usage of C++ implementation of the Image Classification Sample Async. For the Python* implementation, refer to [Image Classification Python* Sample Async](./inference-engine/ie_bridges/python/sample/classification_sample_async/README.md).
 
-When two and more devices are involved in inference process of one picture, creation of several infer requests and starting of asynchronious inference allows to utilize devices the most efficient way.
-If two devices are involved in execution, the most optimal value for -nireq option is 2
-To do this efficiently, Classification Sample Async uses round-robin algorithm for infer requests. It starts execution for the current infer request and swith for the waiting of results for previous one. After finishing of wait, it switches infer requsts and repeat the procedure.
+The pipelined mode might increase the throughput of the pictures. The latency of one inference will be the same as for synchronous execution.
 
-Another required aspect of seeing good throughput is number of iterations. Only having big number of iterations you can emulate the real application work and see performance
+The throughput increases due to follow reasons:
+* Some plugins have heterogeneity inside themselves. Data transferring, execution on remote device, pre-processing and post-processing on the host
+* Using of explicit heterogeneous plugin with execution of different parts of network on different devices
+
+When two or more devices process one image, creating several infer requests and starting asynchronous inference allow for using devices in the most efficient way.
+If two devices are involved in execution, the most optimal value for `-nireq` option is 2.
+
+To process infer requests more efficiently, Classification Sample Async uses round-robin algorithm. It starts execution of the current infer request and switches to waiting for results of the previous one. After finishing of waiting, it switches infer requests and repeat the procedure.
+
+Another required aspect of good throughput is a number of iterations. Only with big number of iterations you can emulate the real application work and get good performance.
 
 The batch mode is an independent attribute on the pipelined mode. Pipelined mode works efficiently with any batch size.
 
+## How It Works
+
+Upon the start-up, the sample application reads command line parameters and loads a network and an image to the Inference
+Engine plugin.
+Then application creates several infer requests pointed in `-nireq` parameter and loads images for inference.
+
+Then in a loop it starts inference for the current infer request and switches to waiting for the previous one. When results are ready, it swaps infer requests.
+
+When inference is done, the application outputs data to the standard output stream.
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
 ## Running
 
-Running the application with the <code>-h</code> option yields the following usage message:
+Running the application with the `-h` option yields the following usage message:
 ```sh
 ./classification_sample_async -h
 InferenceEngine:
@@ -36,50 +51,47 @@ Options:
     -m "<path>"             
                             Required. Path to an .xml file with a trained model.
         -l "<absolute_path>"
-                            Optional. Absolute path to library with MKL-DNN (CPU) custom layers (*.so).
+                            Required for CPU. Absolute path to a shared library with the kernel implementations
         Or
         -c "<absolute_path>"
-                            Optional. Absolute path to clDNN (GPU) custom layers config (*.xml).
+                            Required for GPU custom kernels. Absolute path to the .xml file with kernel descriptions
     -pp "<path>"            
-                            Path to a plugin folder.
+                            Optional. Path to a plugin folder.
     -d "<device>"           
-                            Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified
+                            Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified. Default value is "CPU".
     -nt "<integer>"         
-                            Number of top results (default 10)
+                            Optional. Number of top results. Default value is 10.
     -ni "<integer>"         
-                            Number of iterations (default 1)
+                            Optional. Number of iterations. Default value is 1.
     -pc                     
-                            Enables per-layer performance report
+                            Optional. Enables per-layer performance report
     -nireq "<integer>"
-                            Number of infer request for pipelined mode (default 1)
+                            Optional. Number of infer request for pipelined mode. Default value is 1.
     -p_msg                  
-                            Enables messages from a plugin
-
+                            Optional. Enables messages from a plugin
+    -nthreads "<integer>"
+                            Optional. Number of threads to use for inference on the CPU (including HETERO cases)
+    -pin "YES"/"NO"
+                            Optional. Enable ("YES", default) or disable ("NO") CPU threads pinning for CPU-involved inference
 ```
 
 Running the application with the empty list of options yields the usage message given above and an error message.
 
-You can do inference on an image using a trained AlexNet network on FPGA with fallback to Intel&reg; Processors using the following command:
+To run the sample, use AlexNet and GoogLeNet or other public or pre-trained image classification models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+You can do inference on an image using a trained AlexNet network on FPGA with fallback to CPU using the following command:
 ```sh
 ./classification_sample_async -i <path_to_image>/cat.bmp -m <path_to_model>/alexnet_fp32.xml -nt 5 -d HETERO:FPGA,CPU -nireq 2 -ni 200
 ```
 
-> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
-
-### Outputs
+## Sample Output
 
 By default the application outputs top-10 inference results for each infer request.
 In addition to this information it will provide throughput value measured in frames per seconds.
 
-### How it works
-
-Upon the start-up the sample application reads command line parameters and loads a network and an image to the Inference 
-Engine plugin.
-Then application creates several infer requests pointed in -nireq parameter and loads pictures for inference.
-
-Then in the loop it starts inference for the current infer request and switch for waiting of another one. When results are ready, infer requests will be swapped.
-
-When inference is done, the application outputs data to the standard output stream.
-
 ## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
diff --git a/inference-engine/samples/classification_sample_async/classification_sample_async.h b/inference-engine/samples/classification_sample_async/classification_sample_async.h
index c0a202cfc..2a44ac39b 100644
--- a/inference-engine/samples/classification_sample_async/classification_sample_async.h
+++ b/inference-engine/samples/classification_sample_async/classification_sample_async.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -23,45 +23,45 @@ static const char image_message[] = "Required. Path to a folder with images or p
                                     "and a .bmp file for the other networks.";
 
 /// @brief message for plugin_path argument
-static const char plugin_path_message[] = "Path to a plugin folder.";
+static const char plugin_path_message[] = "Optional. Path to a plugin folder.";
 
 /// @brief message for model argument
 static const char model_message[] = "Required. Path to an .xml file with a trained model.";
 
 /// @brief message for assigning cnn calculation to device
-static const char target_device_message[] = "Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. " \
-                                            "Sample will look for a suitable plugin for device specified (CPU by default)";
+static const char target_device_message[] = "Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. " \
+                                            "Sample will look for a suitable plugin for device specified. Default value is CPU";
 
 /// @brief message for performance counters
-static const char performance_counter_message[] = "Enables per-layer performance report";
+static const char performance_counter_message[] = "Optional. Enables per-layer performance report";
 
 /// @brief message for top results number
-static const char ntop_message[] = "Number of top results (default 10)";
+static const char ntop_message[] = "Optional. Number of top results. Default value is 10.";
 
 /// @brief message for iterations count
-static const char iterations_count_message[] = "Number of iterations (default 1)";
+static const char iterations_count_message[] = "Optional. Number of iterations. Default value is 1.";
 
 /// @brief message for iterations count
-static const char ninfer_request_message[] = "Number of infer request for pipelined mode (default 1)";
+static const char ninfer_request_message[] = "Optional. Number of infer request for pipelined mode. Default value is 1.";
 
 /// @brief message for #threads for CPU inference
 static const char infer_num_threads_message[] = "Optional. Number of threads to use for inference on the CPU "
-                                                "(including Hetero cases).";
+                                                "(including HETERO cases).";
 
 /// @brief message for clDNN custom kernels desc
-static const char custom_cldnn_message[] = "Required for clDNN (GPU)-targeted custom kernels."\
-                                            "Absolute path to the xml file with the kernels desc.";
+static const char custom_cldnn_message[] = "Required for GPU custom kernels."\
+                                            "Absolute path to the .xml file with kernels description";
 
 /// @brief message for user library argument
-static const char custom_cpu_library_message[] = "Required for MKLDNN (CPU)-targeted custom layers." \
-                                                 "Absolute path to a shared library with the kernels impl.";
+static const char custom_cpu_library_message[] = "Required for CPU custom layers." \
+                                                 "Absolute path to a shared library with the kernels implementation";
 
 // @brief message for CPU threads pinning option
-static const char cpu_threads_pinning_message[] = "Optional. Enable (\"YES\"default) or disable (\"NO\")" \
+static const char cpu_threads_pinning_message[] = "Optional. Enable (\"YES\", default) or disable (\"NO\")" \
                                                   "CPU threads pinning for CPU-involved inference.";
 
 /// @brief message for plugin messages
-static const char plugin_message[] = "Enables messages from a plugin";
+static const char plugin_message[] = "Optional. Enables messages from a plugin";
 
 
 /// @brief Define flag for showing help message <br>
@@ -82,7 +82,7 @@ DEFINE_string(pp, "", plugin_path_message);
 DEFINE_string(d, "CPU", target_device_message);
 
 /// @brief Top results number (default 10) <br>
-DEFINE_int32(nt, 10, ntop_message);
+DEFINE_uint32(nt, 10, ntop_message);
 
 /// @brief Enable per-layer performance report
 DEFINE_bool(pc, false, performance_counter_message);
@@ -96,10 +96,10 @@ DEFINE_string(c, "", custom_cldnn_message);
 DEFINE_string(l, "", custom_cpu_library_message);
 
 /// @brief Iterations count (default 1)
-DEFINE_int32(ni, 1, iterations_count_message);
+DEFINE_uint32(ni, 1, iterations_count_message);
 
 /// @brief Number of infer requests
-DEFINE_int32(nireq, 1, ninfer_request_message);
+DEFINE_uint32(nireq, 1, ninfer_request_message);
 
 /// @brief Enable plugin messages
 DEFINE_bool(p_msg, false, plugin_message);
diff --git a/inference-engine/samples/classification_sample_async/main.cpp b/inference-engine/samples/classification_sample_async/main.cpp
index e8428ef21..f73f12628 100644
--- a/inference-engine/samples/classification_sample_async/main.cpp
+++ b/inference-engine/samples/classification_sample_async/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -23,6 +23,7 @@
 #include <samples/common.hpp>
 #include <samples/slog.hpp>
 #include <samples/args_helper.hpp>
+#include <samples/classification_results.h>
 
 #include <sys/stat.h>
 #include <ext_list.hpp>
@@ -84,7 +85,7 @@ int main(int argc, char *argv[]) {
 
         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
         slog::info << "Loading plugin" << slog::endl;
-        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp, "../../../lib/intel64" , "" }).getPluginByDevice(FLAGS_d);
+        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
         if (FLAGS_p_msg) {
             static_cast<InferenceEngine::InferenceEnginePluginPtr>(plugin)->SetLogCallback(error_listener);
         }
@@ -254,7 +255,7 @@ int main(int argc, char *argv[]) {
         size_t currentInfer = 0;
         size_t prevInfer = (FLAGS_nireq > 1) ? 1 : 0;
 
-        for (int iter = 0; iter < FLAGS_ni + FLAGS_nireq; ++iter) {
+        for (size_t iter = 0; iter < FLAGS_ni + FLAGS_nireq; ++iter) {
             if (iter < FLAGS_ni) {
                 inferRequests[currentInfer].StartAsync();
             }
@@ -280,20 +281,14 @@ int main(int argc, char *argv[]) {
 
         for (size_t i = 0; i < FLAGS_nireq; i++) {
             /** Validating -nt value **/
-            const int resultsCnt = outputBlobs[i]->size() / batchSize;
+            const size_t resultsCnt = outputBlobs[i]->size() / batchSize;
             if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
                 slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \
                           << resultsCnt+1 << " and more than 0)\n            will be used maximal value : " << resultsCnt << slog::endl;
                 FLAGS_nt = resultsCnt;
             }
-            /** This vector stores id's of top N results **/
-            std::vector<unsigned> results;
-            TopResults(FLAGS_nt, *outputBlobs[i], results);
-
-            std::cout << std::endl << "Top " << FLAGS_nt << " results:" << std::endl << std::endl;
 
             /** Read labels from file (e.x. AlexNet.labels) **/
-            bool labelsEnabled = false;
             std::string labelFileName = fileNameNoExt(FLAGS_m) + ".labels";
             std::vector<std::string> labels;
 
@@ -305,26 +300,12 @@ int main(int argc, char *argv[]) {
                     trim(strLine);
                     labels.push_back(strLine);
                 }
-                labelsEnabled = true;
             }
 
-            /** Print the result iterating over each batch **/
-            for (int image_id = 0; image_id < batchSize; ++image_id) {
-                std::cout << "Image " << imageNames[image_id] << std::endl << std::endl;
-                for (size_t id = image_id * FLAGS_nt, cnt = 0; cnt < FLAGS_nt; ++cnt, ++id) {
-                    std::cout.precision(7);
-                    /** Getting probability for resulting class **/
-                    auto result = outputBlobs[i]->buffer().
-                            as<PrecisionTrait<Precision::FP32>::value_type*>()[results[id] + image_id*(outputBlobs[i]->size() / batchSize)];
-                    std::cout << std::left << std::fixed << results[id] << " " << result;
-                    if (labelsEnabled) {
-                        std::cout << " label " << labels[results[id]] << std::endl;
-                    } else {
-                        std::cout << " label #" << results[id] << std::endl;
-                    }
-                }
-                std::cout << std::endl;
-            }
+            ClassificationResult classificationResult(outputBlobs[i], imageNames,
+                                                      batchSize, FLAGS_nt,
+                                                      labels);
+            classificationResult.print();
         }
         // -----------------------------------------------------------------------------------------------------
         std::cout << std::endl << "total inference time: " << total << std::endl;
@@ -335,8 +316,7 @@ int main(int argc, char *argv[]) {
         std::map<std::string, InferenceEngineProfileInfo> performanceMap;
         if (FLAGS_pc) {
             for (size_t nireq = 0; nireq < FLAGS_nireq; nireq++) {
-                performanceMap = inferRequests[nireq].GetPerformanceCounts();
-                printPerformanceCounts(performanceMap, std::cout);
+                printPerformanceCounts(inferRequests[nireq], std::cout);
             }
         }
     }
diff --git a/inference-engine/samples/common/format_reader/CMakeLists.txt b/inference-engine/samples/common/format_reader/CMakeLists.txt
index 0498e0a52..e3ecd5850 100644
--- a/inference-engine/samples/common/format_reader/CMakeLists.txt
+++ b/inference-engine/samples/common/format_reader/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "format_reader")
 
 file (GLOB MAIN_SRC
@@ -15,7 +13,7 @@ file (GLOB LIBRARY_HEADERS
         )
 
 # Find OpenCV components if exist
-find_package(OpenCV COMPONENTS imgcodecs QUIET)
+find_package(OpenCV COMPONENTS imgcodecs videoio imgproc  QUIET)
 if(NOT(OpenCV_FOUND))
     message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " is built without OPENCV support")
 else()
@@ -34,13 +32,15 @@ add_definitions(-DIMPLEMENT_INFERENCE_ENGINE_API)
 source_group("src" FILES ${LIBRARY_SRC})
 source_group("include" FILES ${LIBRARY_HEADERS})
 
-# Properties->C/C++->General->Additional Include Directories
-include_directories (
-        ${CMAKE_CURRENT_SOURCE_DIR})
 
 # Create library file from sources.
 add_library(${TARGET_NAME} SHARED ${MAIN_SRC} ${LIBRARY_HEADERS})
 target_link_libraries(${TARGET_NAME} ${OpenCV_LIBRARIES})
 
-set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE"
-COMPILE_PDB_NAME ${TARGET_NAME})
+if(CMAKE_VERSION VERSION_LESS "2.8.11")
+	include_directories (${CMAKE_CURRENT_SOURCE_DIR})
+else()
+	target_include_directories(${TARGET_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+endif()
+
+set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
diff --git a/inference-engine/samples/common/format_reader/MnistUbyte.cpp b/inference-engine/samples/common/format_reader/MnistUbyte.cpp
index c1b04c0ee..6e46f0ec7 100644
--- a/inference-engine/samples/common/format_reader/MnistUbyte.cpp
+++ b/inference-engine/samples/common/format_reader/MnistUbyte.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/common/format_reader/MnistUbyte.h b/inference-engine/samples/common/format_reader/MnistUbyte.h
index d9d51c406..fd6ae0f75 100644
--- a/inference-engine/samples/common/format_reader/MnistUbyte.h
+++ b/inference-engine/samples/common/format_reader/MnistUbyte.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -47,7 +47,7 @@ public:
         delete this;
     }
 
-    std::shared_ptr<unsigned char> getData(int width, int height) override {
+    std::shared_ptr<unsigned char> getData(size_t width, size_t height) override {
         if ((width * height != 0) && (_width * _height != width * height)) {
             std::cout << "[ WARNING ] Image won't be resized! Please use OpenCV.\n";
             return nullptr;
diff --git a/inference-engine/samples/common/format_reader/bmp.cpp b/inference-engine/samples/common/format_reader/bmp.cpp
index 56822ffd2..b52f839ab 100644
--- a/inference-engine/samples/common/format_reader/bmp.cpp
+++ b/inference-engine/samples/common/format_reader/bmp.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/common/format_reader/bmp.h b/inference-engine/samples/common/format_reader/bmp.h
index 53ca37366..b1b05dfc2 100644
--- a/inference-engine/samples/common/format_reader/bmp.h
+++ b/inference-engine/samples/common/format_reader/bmp.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -64,7 +64,7 @@ public:
         delete this;
     }
 
-    std::shared_ptr<unsigned char> getData(int width, int height) override {
+    std::shared_ptr<unsigned char> getData(size_t width, size_t height) override {
         if ((width * height != 0) && (_width * _height != width * height)) {
             std::cout << "[ WARNING ] Image won't be resized! Please use OpenCV.\n";
             return nullptr;
diff --git a/inference-engine/samples/common/format_reader/format_reader.cpp b/inference-engine/samples/common/format_reader/format_reader.cpp
index a69843179..30f334532 100644
--- a/inference-engine/samples/common/format_reader/format_reader.cpp
+++ b/inference-engine/samples/common/format_reader/format_reader.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/common/format_reader/format_reader.h b/inference-engine/samples/common/format_reader/format_reader.h
index 8a4cfcdaf..d0c746275 100644
--- a/inference-engine/samples/common/format_reader/format_reader.h
+++ b/inference-engine/samples/common/format_reader/format_reader.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -11,7 +11,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include<iostream>
+#include <iostream>
 
 #if defined(_WIN32)
 # ifdef IMPLEMENT_FORMAT_READER
@@ -62,7 +62,7 @@ public:
      * @return shared pointer with input data
      * @In case of using OpenCV, parameters width and height will be used for image resizing
      */
-    virtual std::shared_ptr<unsigned char> getData(int width = 0, int height = 0) = 0;
+    virtual std::shared_ptr<unsigned char> getData(size_t width = 0, size_t height = 0) = 0;
 
     /**
      * \brief Get size
diff --git a/inference-engine/samples/common/format_reader/format_reader_ptr.h b/inference-engine/samples/common/format_reader/format_reader_ptr.h
index faba46378..0b82d4674 100644
--- a/inference-engine/samples/common/format_reader/format_reader_ptr.h
+++ b/inference-engine/samples/common/format_reader/format_reader_ptr.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/common/format_reader/opencv_wraper.cpp b/inference-engine/samples/common/format_reader/opencv_wraper.cpp
index b29b39ba1..835402ab6 100644
--- a/inference-engine/samples/common/format_reader/opencv_wraper.cpp
+++ b/inference-engine/samples/common/format_reader/opencv_wraper.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -27,11 +27,11 @@ OCVReader::OCVReader(const string &filename) {
     _height = img.size().height;
 }
 
-std::shared_ptr<unsigned char> OCVReader::getData(int width = 0, int height = 0) {
+std::shared_ptr<unsigned char> OCVReader::getData(size_t width = 0, size_t height = 0) {
     cv::Mat resized(img);
     if (width != 0 && height != 0) {
-        int iw = img.size().width;
-        int ih = img.size().height;
+        size_t iw = img.size().width;
+        size_t ih = img.size().height;
         if (width != iw || height != ih) {
             slog::warn << "Image is resized from (" << iw << ", " << ih << ") to (" << width << ", " << height << ")" << slog::endl;
         }
diff --git a/inference-engine/samples/common/format_reader/opencv_wraper.h b/inference-engine/samples/common/format_reader/opencv_wraper.h
index e4b40b893..5dc0b12f4 100644
--- a/inference-engine/samples/common/format_reader/opencv_wraper.h
+++ b/inference-engine/samples/common/format_reader/opencv_wraper.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -50,7 +50,7 @@ public:
         delete this;
     }
 
-    std::shared_ptr<unsigned char> getData(int width, int height) override;
+    std::shared_ptr<unsigned char> getData(size_t width, size_t height) override;
 };
 }  // namespace FormatReader
 #endif
 \ No newline at end of file
diff --git a/inference-engine/samples/common/format_reader/register.h b/inference-engine/samples/common/format_reader/register.h
index 764b5b480..34cf1f77f 100644
--- a/inference-engine/samples/common/format_reader/register.h
+++ b/inference-engine/samples/common/format_reader/register.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 /**
diff --git a/inference-engine/samples/common/os/windows/w_dirent.h b/inference-engine/samples/common/os/windows/w_dirent.h
index 40bcf9ee7..e9111d9a4 100644
--- a/inference-engine/samples/common/os/windows/w_dirent.h
+++ b/inference-engine/samples/common/os/windows/w_dirent.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -6,6 +6,10 @@
 
 #if defined(_WIN32)
 
+#ifndef NOMINMAX
+# define NOMINMAX
+#endif
+
 #include <winsock2.h>
 #include <windows.h>
 #include <stdlib.h>
diff --git a/inference-engine/samples/common/samples/args_helper.hpp b/inference-engine/samples/common/samples/args_helper.hpp
index 9edfb97b9..a38570b90 100644
--- a/inference-engine/samples/common/samples/args_helper.hpp
+++ b/inference-engine/samples/common/samples/args_helper.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/common/samples/classification_results.h b/inference-engine/samples/common/samples/classification_results.h
new file mode 100644
index 000000000..3cf0a2b06
--- /dev/null
+++ b/inference-engine/samples/common/samples/classification_results.h
@@ -0,0 +1,92 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+/**
+ * @brief a header file with ouput classification results
+ * @file classification_results.hpp
+ */
+#include <string>
+#include <vector>
+#include <iostream>
+#include <utility>
+
+#include <ie_blob.h>
+
+/**
+ * @class ClassificationResult
+ * @brief A ClassificationResult creates an output table with results
+ */
+class ClassificationResult {
+private:
+    const std::string _classidStr = "classid";
+    const std::string _probabilityStr = "probability";
+    const std::string _labelStr = "label";
+    size_t _nTop;
+    InferenceEngine::Blob::Ptr _outBlob;
+    const std::vector<std::string> _labels;
+    const std::vector<std::string> _imageNames;
+    const size_t _batchSize;
+
+    void printHeader() {
+        std::cout << _classidStr << " " << _probabilityStr;
+        if (!_labels.empty())
+            std::cout << " " << _labelStr;
+        std::string classidColumn(_classidStr.length(), '-');
+        std::string probabilityColumn(_probabilityStr.length(), '-');
+        std::string labelColumn(_labelStr.length(), '-');
+        std::cout << std::endl << classidColumn << " " << probabilityColumn;
+        if (!_labels.empty())
+            std::cout << " " << labelColumn;
+        std::cout << std::endl;
+    }
+
+public:
+    explicit ClassificationResult(InferenceEngine::Blob::Ptr output_blob,
+                                  std::vector<std::string> image_names = {},
+                                  size_t batch_size = 1,
+                                  size_t num_of_top = 10,
+                                  std::vector<std::string> labels = {}) :
+            _nTop(num_of_top),
+            _outBlob(std::move(output_blob)),
+            _labels(std::move(labels)),
+            _imageNames(std::move(image_names)),
+            _batchSize(batch_size) {
+        if (_imageNames.size() != _batchSize) {
+            throw std::logic_error("Batch size should be equal to the number of images.");
+        }
+    }
+
+    /**
+    * @brief prints formatted classification results
+    */
+    void print() {
+        /** This vector stores id's of top N results **/
+        std::vector<unsigned> results;
+        TopResults(_nTop, *_outBlob, results);
+
+        /** Print the result iterating over each batch **/
+        std::cout << std::endl << "Top " << _nTop << " results:" << std::endl << std::endl;
+        for (unsigned int image_id = 0; image_id < _batchSize; ++image_id) {
+            std::cout << "Image " << _imageNames[image_id] << std::endl << std::endl;
+            printHeader();
+
+            for (size_t id = image_id * _nTop, cnt = 0; id < (image_id + 1) * _nTop; ++cnt, ++id) {
+                std::cout.precision(7);
+                /** Getting probability for resulting class **/
+                const auto result = _outBlob->buffer().
+                        as<InferenceEngine::PrecisionTrait<InferenceEngine::Precision::FP32>::value_type*>()
+                [results[id] + image_id * (_outBlob->size() / _batchSize)];
+
+                std::cout << std::setw(static_cast<int>(_classidStr.length())) << std::left << results[id] << " ";
+                std::cout << std::left << std::setw(static_cast<int>(_probabilityStr.length())) << std::fixed << result;
+
+                if (!_labels.empty()) {
+                    std::cout << " " + _labels[results[id]];
+                }
+                std::cout << std::endl;
+            }
+            std::cout << std::endl;
+        }
+    }
+};
diff --git a/inference-engine/samples/common/samples/common.hpp b/inference-engine/samples/common/samples/common.hpp
index 88c87e3ff..44bcca3f3 100644
--- a/inference-engine/samples/common/samples/common.hpp
+++ b/inference-engine/samples/common/samples/common.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -113,7 +113,7 @@ static UNUSED InferenceEngine::InferenceEnginePluginPtr selectPlugin(const std::
  * @param filepath - full file name
  * @return filename without extension
  */
-static std::string fileNameNoExt(const std::string &filepath) {
+static UNUSED std::string fileNameNoExt(const std::string &filepath) {
     auto pos = filepath.rfind('.');
     if (pos == std::string::npos) return filepath;
     return filepath.substr(0, pos);
@@ -640,6 +640,19 @@ inline double getDurationOf(std::function<void()> func) {
     return std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1, 1000>>>(fs).count();
 }
 
+static std::vector<std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>>
+perfCountersSorted(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap) {
+    using perfItem = std::pair<std::string, InferenceEngine::InferenceEngineProfileInfo>;
+    std::vector<perfItem> sorted;
+    for (auto &kvp : perfMap) sorted.push_back(kvp);
+
+    std::stable_sort(sorted.begin(), sorted.end(),
+                     [](const perfItem& l, const perfItem& r) {
+                         return l.second.execution_index < r.second.execution_index;
+                     });
+
+    return sorted;
+}
 
 static UNUSED void printPerformanceCounts(const std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>& performanceMap,
                                           std::ostream &stream,
@@ -649,7 +662,10 @@ static UNUSED void printPerformanceCounts(const std::map<std::string, InferenceE
     if (bshowHeader) {
         stream << std::endl << "performance counts:" << std::endl << std::endl;
     }
-    for (const auto & it : performanceMap) {
+
+    auto performanceMapSorted = perfCountersSorted(performanceMap);
+
+    for (const auto & it : performanceMapSorted) {
         std::string toPrint(it.first);
         const int maxLayerName = 30;
 
@@ -683,17 +699,17 @@ static UNUSED void printPerformanceCounts(const std::map<std::string, InferenceE
 }
 
 static UNUSED void printPerformanceCounts(InferenceEngine::InferRequest request, std::ostream &stream) {
-    auto perfomanceMap = request.GetPerformanceCounts();
-    printPerformanceCounts(perfomanceMap, stream);
+    auto performanceMap = request.GetPerformanceCounts();
+    printPerformanceCounts(performanceMap, stream);
 }
 
 /**
  * @deprecated
  */
 static UNUSED void printPerformanceCountsPlugin(InferenceEngine::InferenceEnginePluginPtr plugin, std::ostream &stream) {
-    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfomanceMap;
-    plugin->GetPerformanceCounts(perfomanceMap, nullptr);
-    printPerformanceCounts(perfomanceMap, stream);
+    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> performanceMap;
+    plugin->GetPerformanceCounts(performanceMap, nullptr);
+    printPerformanceCounts(performanceMap, stream);
 }
 
 /**
@@ -883,7 +899,7 @@ public:
             for (auto desObj = desiredObjects.alist.begin(); desObj != desiredObjects.alist.end(); desObj++, j++) {
                 double iou = DetectedObject::ioU(detObj, *desObj);
                 if (iou > overlap_max) {
-                    overlap_max = iou;
+                    overlap_max = static_cast<float>(iou);
                     jmax = j;
                     desmax = desObj;
                 }
@@ -964,7 +980,7 @@ public:
                         break;
                     } else {
                         if (max_precs[j] < prec[i]) {
-                            max_precs[j] = prec[i];
+                            max_precs[j] = static_cast<float>(prec[i]);
                         }
                     }
                 }
@@ -1014,10 +1030,10 @@ static UNUSED void addRectangles(unsigned char *data, size_t height, size_t widt
     for (size_t i = 0; i < detectedObjects.size(); i++) {
         int cls = detectedObjects[i].objectType % colors.size();
 
-        int xmin = detectedObjects[i].xmin * width;
-        int xmax = detectedObjects[i].xmax * width;
-        int ymin = detectedObjects[i].ymin * height;
-        int ymax = detectedObjects[i].ymax * height;
+        int xmin = static_cast<int>(detectedObjects[i].xmin * width);
+        int xmax = static_cast<int>(detectedObjects[i].xmax * width);
+        int ymin = static_cast<int>(detectedObjects[i].ymin * height);
+        int ymax = static_cast<int>(detectedObjects[i].ymax * height);
 
         size_t shift_first = ymin*width * 3;
         size_t shift_second = ymax*width * 3;
diff --git a/inference-engine/samples/validation_app/console_progress.hpp b/inference-engine/samples/common/samples/console_progress.hpp
index 35047a4f1..89b0d74f4 100644
--- a/inference-engine/samples/validation_app/console_progress.hpp
+++ b/inference-engine/samples/common/samples/console_progress.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -69,7 +69,7 @@ public:
      * @param add - value to add
      */
     void addProgress(int add) {
-        if (add < 0 && -add > current) {
+        if (add < 0 && -add > static_cast<int>(current)) {
             add = -static_cast<int>(current);
         }
         updateProgress(current + add);
diff --git a/inference-engine/samples/validation_app/csv_dumper.hpp b/inference-engine/samples/common/samples/csv_dumper.hpp
index 2e0b22f7a..4dbcfa19f 100644
--- a/inference-engine/samples/validation_app/csv_dumper.hpp
+++ b/inference-engine/samples/common/samples/csv_dumper.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/common/samples/ocv_common.hpp b/inference-engine/samples/common/samples/ocv_common.hpp
index c979cd309..93725036f 100644
--- a/inference-engine/samples/common/samples/ocv_common.hpp
+++ b/inference-engine/samples/common/samples/ocv_common.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -27,7 +27,8 @@ void matU8ToBlob(const cv::Mat& orig_image, InferenceEngine::Blob::Ptr& blob, in
     T* blob_data = blob->buffer().as<T*>();
 
     cv::Mat resized_image(orig_image);
-    if (width != orig_image.size().width || height!= orig_image.size().height) {
+    if (static_cast<int>(width) != orig_image.size().width ||
+            static_cast<int>(height) != orig_image.size().height) {
         cv::resize(orig_image, resized_image, cv::Size(width, height));
     }
 
@@ -50,7 +51,7 @@ void matU8ToBlob(const cv::Mat& orig_image, InferenceEngine::Blob::Ptr& blob, in
  * @param mat - given cv::Mat object with an image data.
  * @return resulting Blob pointer.
  */
-static InferenceEngine::Blob::Ptr wrapMat2Blob(const cv::Mat &mat) {
+static UNUSED InferenceEngine::Blob::Ptr wrapMat2Blob(const cv::Mat &mat) {
     size_t channels = mat.channels();
     size_t height = mat.size().height;
     size_t width = mat.size().width;
diff --git a/inference-engine/samples/common/samples/slog.hpp b/inference-engine/samples/common/samples/slog.hpp
index 23eb8d34a..c50b4c94a 100644
--- a/inference-engine/samples/common/samples/slog.hpp
+++ b/inference-engine/samples/common/samples/slog.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/create_msvc2015_solution.bat b/inference-engine/samples/create_msvc2015_solution.bat
deleted file mode 100644
index b0f67c8a0..000000000
--- a/inference-engine/samples/create_msvc2015_solution.bat
+++ /dev/null
@@ -1,31 +0,0 @@
-@echo off
-
-:: Copyright (c) 2018 Intel Corporation
-::
-:: Licensed under the Apache License, Version 2.0 (the "License");
-:: you may not use this file except in compliance with the License.
-:: You may obtain a copy of the License at
-::
-::      http://www.apache.org/licenses/LICENSE-2.0
-::
-:: Unless required by applicable law or agreed to in writing, software
-:: distributed under the License is distributed on an "AS IS" BASIS,
-:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-:: See the License for the specific language governing permissions and
-:: limitations under the License.
-
-
-@setlocal
-set "ROOT_DIR=%~dp0"
-
-set "SOLUTION_DIR64=%USERPROFILE%\Documents\Intel\OpenVINO\inference_engine_samples_2015"
-if exist "%SOLUTION_DIR64%" rd /s /q "%SOLUTION_DIR64%"
-if "%InferenceEngine_DIR%"=="" set "InferenceEngine_DIR=%ROOT_DIR%\..\share"
-if exist "%ROOT_DIR%\..\..\bin\setupvars.bat" call "%ROOT_DIR%\..\..\bin\setupvars.bat"
-if exist "%ROOT_DIR%\..\..\..\bin\setupvars.bat" call "%ROOT_DIR%\..\..\..\bin\setupvars.bat"
-
-echo Creating Visual Studio 2015 (x64) files in %SOLUTION_DIR64%... && ^
-cd "%ROOT_DIR%" && cmake -E make_directory "%SOLUTION_DIR64%" && cd "%SOLUTION_DIR64%" && cmake -G "Visual Studio 14 2015 Win64" "%ROOT_DIR%"
-
-echo Done.
-pause
-\ No newline at end of file
diff --git a/inference-engine/samples/create_msvc2017_solution.bat b/inference-engine/samples/create_msvc2017_solution.bat
deleted file mode 100644
index 6bc35216e..000000000
--- a/inference-engine/samples/create_msvc2017_solution.bat
+++ /dev/null
@@ -1,31 +0,0 @@
-@echo off
-
-:: Copyright (c) 2018 Intel Corporation
-::
-:: Licensed under the Apache License, Version 2.0 (the "License");
-:: you may not use this file except in compliance with the License.
-:: You may obtain a copy of the License at
-::
-::      http://www.apache.org/licenses/LICENSE-2.0
-::
-:: Unless required by applicable law or agreed to in writing, software
-:: distributed under the License is distributed on an "AS IS" BASIS,
-:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-:: See the License for the specific language governing permissions and
-:: limitations under the License.
-
-
-@setlocal
-set "ROOT_DIR=%~dp0"
-
-set "SOLUTION_DIR64=%USERPROFILE%\Documents\Intel\OpenVINO\inference_engine_samples_2017"
-if exist "%SOLUTION_DIR64%" rd /s /q "%SOLUTION_DIR64%"
-if "%InferenceEngine_DIR%"=="" set "InferenceEngine_DIR=%ROOT_DIR%\..\share"
-if exist "%ROOT_DIR%\..\..\bin\setupvars.bat" call "%ROOT_DIR%\..\..\bin\setupvars.bat"
-if exist "%ROOT_DIR%\..\..\..\bin\setupvars.bat" call "%ROOT_DIR%\..\..\..\bin\setupvars.bat"
-
-echo Creating Visual Studio 2017 (x64) files in %SOLUTION_DIR64%... && ^
-cd "%ROOT_DIR%" && cmake -E make_directory "%SOLUTION_DIR64%" && cd "%SOLUTION_DIR64%" && cmake -G "Visual Studio 15 2017 Win64" "%ROOT_DIR%"
-
-echo Done.
-pause
-\ No newline at end of file
diff --git a/inference-engine/samples/hello_autoresize_classification/CMakeLists.txt b/inference-engine/samples/hello_autoresize_classification/CMakeLists.txt
index d70a974c7..01deda649 100644
--- a/inference-engine/samples/hello_autoresize_classification/CMakeLists.txt
+++ b/inference-engine/samples/hello_autoresize_classification/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "hello_autoresize_classification")
 
 file (GLOB SRC
diff --git a/inference-engine/samples/hello_autoresize_classification/README.md b/inference-engine/samples/hello_autoresize_classification/README.md
index 524ec227b..bb479b763 100644
--- a/inference-engine/samples/hello_autoresize_classification/README.md
+++ b/inference-engine/samples/hello_autoresize_classification/README.md
@@ -1,28 +1,33 @@
-# Hello Autoresize Classification Sample
+# Hello Autoresize Classification C++ Sample
 
 This topic describes how to run the Hello Autoresize Classification sample application.
-The sample is simplified version of [Image Classification Sample](./samples/classification_sample/README.md).
-It's intended to demonstrate using of new input autoresize API of Inference Engine in applications. Refer to
-[Integrate with customer application New Request API](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details.
+The sample is simplified version of [Image Classification Sample](./inference-engine/samples/classification_sample/README.md).
+It demonstrates how to use the new input autoresize API of Inference Engine in applications. Refer to
+[Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details.
 
 There is also new API introduced to crop a ROI object and set it as input without additional memory re-allocation.
-To properly demonstrate this new API it's required to run several networks in pipeline which is out of scope of this sample.
-Please refer to [Object Detection for SSD Demo app](./samples/object_detection_demo_ssd_async/README.md) or
-[Security Barrier Camera Demo](./samples/security_barrier_camera_demo/README.md) or
-[Crossroad Camera Demo](./samples/crossroad_camera_demo/README.md) with an example of using of new crop ROI API.
+To properly demonstrate this new API, it is required to run several networks in pipeline which is out of scope of this sample.
+Please refer to [Object Detection for SSD Demo](./inference-engine/samples/object_detection_demo_ssd_async/README.md),
+[Security Barrier Camera Demo](./inference-engine/samples/security_barrier_camera_demo/README.md), or
+[Crossroad Camera Demo](./inference-engine/samples/crossroad_camera_demo/README.md) with an example of using of new crop ROI API.
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
 
 ## Running
 
-You can do inference on an image using a trained AlexNet network on Intel&reg; Processors using the following command:
+To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+You can do inference on an image using a trained AlexNet network on CPU using the following command:
 ```sh
 ./hello_autoresize_classification <path_to_model>/alexnet_fp32.xml <path_to_image>/cat.bmp CPU
 ```
 
-> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
-
-### Outputs
+## Sample Output
 
-The application outputs top-10 inference results. 
+The application outputs top-10 inference results.
 
-## See Also 
+## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
diff --git a/inference-engine/samples/hello_autoresize_classification/main.cpp b/inference-engine/samples/hello_autoresize_classification/main.cpp
index 2ac933783..9700416cd 100644
--- a/inference-engine/samples/hello_autoresize_classification/main.cpp
+++ b/inference-engine/samples/hello_autoresize_classification/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -10,6 +10,7 @@
 
 #include <inference_engine.hpp>
 #include <samples/ocv_common.hpp>
+#include <samples/classification_results.h>
 
 using namespace InferenceEngine;
 
@@ -28,11 +29,11 @@ int main(int argc, char *argv[]) {
         // -----------------------------------------------------------------------------------------------------
 
         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
-        InferencePlugin plugin = PluginDispatcher({"../../../lib/intel64", ""}).getPluginByDevice(device_name);
+        InferencePlugin plugin = PluginDispatcher().getPluginByDevice(device_name);
         // -----------------------------------------------------------------------------------------------------
 
         // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
-        int batchSize = 1;
+        size_t batchSize = 1;
         CNNNetReader network_reader;
         network_reader.ReadNetwork(input_model);
         network_reader.ReadWeights(input_model.substr(0, input_model.size() - 4) + ".bin");
@@ -90,18 +91,9 @@ int main(int argc, char *argv[]) {
 
         // --------------------------- 8. Process output ------------------------------------------------------
         Blob::Ptr output = infer_request.GetBlob(output_name);
-        auto output_data = output->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
-
-        std::vector<unsigned> results;
-        /*  This is to sort output probabilities and put it to results vector */
-        TopResults(10, *output, results);
-
-        std::cout << std::endl << "Top 10 results:" << std::endl << std::endl;
-        for (size_t id = 0; id < 10; ++id) {
-            std::cout.precision(7);
-            auto result = output_data[results[id]];
-            std::cout << std::left << std::fixed << result << " label #" << results[id] << std::endl;
-        }
+        // Print classification results
+        ClassificationResult classificationResult(output, {input_image_path});
+        classificationResult.print();
         // -----------------------------------------------------------------------------------------------------
 
         std::cout << std::endl << "total inference time: " << total << std::endl;
diff --git a/inference-engine/samples/hello_classification/CMakeLists.txt b/inference-engine/samples/hello_classification/CMakeLists.txt
index 9531a2148..845f7e9d7 100644
--- a/inference-engine/samples/hello_classification/CMakeLists.txt
+++ b/inference-engine/samples/hello_classification/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "hello_classification")
 
 file (GLOB SRC
diff --git a/inference-engine/samples/hello_classification/main.cpp b/inference-engine/samples/hello_classification/main.cpp
index d9482e19b..b3b51584d 100644
--- a/inference-engine/samples/hello_classification/main.cpp
+++ b/inference-engine/samples/hello_classification/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -14,6 +14,7 @@
 
 #include <opencv2/opencv.hpp>
 #include <inference_engine.hpp>
+#include <samples/classification_results.h>
 
 using namespace InferenceEngine;
 
@@ -41,8 +42,7 @@ int wmain(int argc, wchar_t *argv[]) {
         // -----------------------------------------------------------------------------------------------------
 
         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
-        PluginDispatcher dispatcher({_T("../../../lib/intel64"), _T("")});
-        InferencePlugin plugin(dispatcher.getSuitablePlugin(TargetDevice::eCPU));
+        InferencePlugin plugin(PluginDispatcher().getSuitablePlugin(TargetDevice::eCPU));
         // -----------------------------------------------------------------------------------------------------
 
         // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
@@ -103,18 +103,10 @@ int wmain(int argc, wchar_t *argv[]) {
 
         // --------------------------- 8. Process output ------------------------------------------------------
         Blob::Ptr output = infer_request.GetBlob(output_name);
-        auto output_data = output->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
+        // Print classification results
+        ClassificationResult classificationResult(output, {fileNameToString(input_image_path)});
+        classificationResult.print();
 
-        std::vector<unsigned> results;
-        /*  This is to sort output probabilities and put it to results vector */
-        TopResults(10, *output, results);
-
-        std::cout << std::endl << "Top 10 results:" << std::endl << std::endl;
-        for (size_t id = 0; id < 10; ++id) {
-            std::cout.precision(7);
-            auto result = output_data[results[id]];
-            std::cout << std::left << std::fixed << result << " label #" << results[id] << std::endl;
-        }
         // -----------------------------------------------------------------------------------------------------
     } catch (const std::exception & ex) {
         std::cerr << ex.what() << std::endl;
diff --git a/inference-engine/samples/hello_request_classification/CMakeLists.txt b/inference-engine/samples/hello_request_classification/CMakeLists.txt
index 881845395..c7dbb1e5c 100644
--- a/inference-engine/samples/hello_request_classification/CMakeLists.txt
+++ b/inference-engine/samples/hello_request_classification/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "hello_request_classification")
 
 file (GLOB SRC
diff --git a/inference-engine/samples/hello_request_classification/README.md b/inference-engine/samples/hello_request_classification/README.md
index 708fa81f5..fd8d35bbc 100644
--- a/inference-engine/samples/hello_request_classification/README.md
+++ b/inference-engine/samples/hello_request_classification/README.md
@@ -1,23 +1,26 @@
-# Hello Infer Request Classification Sample
+# Hello Infer Request Classification C++ Sample
 
 This topic describes how to run the Hello Infer Classification sample application.
-The sample is simplified version of [Image Classification Sample](./samples/classification_sample/README.md).
-It's intended to demonstrate using of new Infer Request API of Inference Engine in applications. Refer to 
-[Integrate with customer application New Request API](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details.
+The sample is simplified version of [Image Classification Sample](./inference-engine/samples/classification_sample/README.md).
+It demonstrates how to use the new Infer Request API of Inference Engine in applications. Refer to
+[Integrate the Inference Engine New Request API with Your Application](./docs/IE_DG/Integrate_with_customer_application_new_API.md) for details.
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
 
 ## Running
 
-You can do inference on an image using a trained AlexNet network on Intel&reg; Processors using the following command:
-```sh
-./hello_autoresize_classification <path_to_model>/alexnet_fp32.xml <path_to_image>/cat.bmp CPU
-```
+To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
 
 > **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
 
-### Outputs
+You can do inference on an image using a trained AlexNet network on CPU using the following command:
+```sh
+./hello_autoresize_classification <path_to_model>/alexnet_fp32.xml <path_to_image>/cat.bmp CPU
+```
 
-The application outputs top-10 inference results. 
+## Sample Output
 
+The application outputs top-10 inference results.
 
-## See Also 
+## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
diff --git a/inference-engine/samples/hello_request_classification/main.cpp b/inference-engine/samples/hello_request_classification/main.cpp
index d5fabb27a..e03142b8f 100644
--- a/inference-engine/samples/hello_request_classification/main.cpp
+++ b/inference-engine/samples/hello_request_classification/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -10,6 +10,7 @@
 
 #include <opencv2/opencv.hpp>
 #include <inference_engine.hpp>
+#include <samples/classification_results.h>
 
 using namespace InferenceEngine;
 
@@ -28,7 +29,7 @@ int main(int argc, char *argv[]) {
         // -----------------------------------------------------------------------------------------------------
 
         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
-        InferencePlugin plugin = PluginDispatcher({"../../../lib/intel64", ""}).getPluginByDevice(device_name);
+        InferencePlugin plugin = PluginDispatcher().getPluginByDevice(device_name);
         // -----------------------------------------------------------------------------------------------------
 
         // --------------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------
@@ -123,18 +124,10 @@ int main(int argc, char *argv[]) {
         // --------------------------- 8. Process output -------------------------------------------------------
         for (auto &item : output_info) {
             auto output_name = item.first;
-            Blob::Ptr output = async_infer_request.GetBlob(output_name);
-            auto output_buffer = output->buffer().as<PrecisionTrait<Precision::FP32>::value_type *>();
-            std::vector<unsigned> results;
-            /**  This is to sort output probabilities and put it to results vector **/
-            TopResults(10, *output, results);
-
-            std::cout << std::endl << "Top 10 results:" << std::endl << std::endl;
-            for (size_t id = 0; id < 10; ++id) {
-                std::cout.precision(7);
-                auto result = output_buffer[results[id]];
-                std::cout << std::left << std::fixed << result << " label #" << results[id] << std::endl;
-            }
+            Blob::Ptr output = async_infer_request.GetBlob(output_name);;
+            // Print classification results
+            ClassificationResult classificationResult(output, {input_image_path});
+            classificationResult.print();
         }
         // -----------------------------------------------------------------------------------------------------
     } catch (const std::exception & ex) {
diff --git a/inference-engine/samples/hello_shape_infer_ssd/CMakeLists.txt b/inference-engine/samples/hello_shape_infer_ssd/CMakeLists.txt
index ffc9856a2..b0ef62b8b 100644
--- a/inference-engine/samples/hello_shape_infer_ssd/CMakeLists.txt
+++ b/inference-engine/samples/hello_shape_infer_ssd/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-cmake_minimum_required(VERSION 2.8)
 
 set(TARGET_NAME "hello_shape_infer_ssd")
 
diff --git a/inference-engine/samples/hello_shape_infer_ssd/README.md b/inference-engine/samples/hello_shape_infer_ssd/README.md
index f275abc36..0f3846e64 100644
--- a/inference-engine/samples/hello_shape_infer_ssd/README.md
+++ b/inference-engine/samples/hello_shape_infer_ssd/README.md
@@ -1,18 +1,22 @@
-# Hello Shape Infer Sample
+# Hello Shape Infer C++ Sample
 
 This topic demonstrates how to run the Hello Shape Infer SSD application, which does inference using object detection
 networks like SSD-VGG. The sample shows how to use [Shape Inference feature](./docs/IE_DG/ShapeInference.md).
 
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
 ## Running
 
-You can use the following command to do inference on Intel&reg; Processors on an image using a trained SSD network:
+To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+You can use the following command to do inference on CPU on an image using a trained SSD network:
 ```sh
 ./hello_shape_infer_ssd <path_to_model>/ssd_300.xml <path_to_image>/500x500.bmp CPU 3
 ```
 
-> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
-
-### Outputs
+## Sample Output
 
 The application renders an image with detected objects enclosed in rectangles. It outputs the list of classes
 of the detected objects along with the respective confidence values and the coordinates of the
@@ -20,3 +24,5 @@ rectangles to the standard output stream.
 
 ## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
diff --git a/inference-engine/samples/hello_shape_infer_ssd/main.cpp b/inference-engine/samples/hello_shape_infer_ssd/main.cpp
index 020b941ac..ee691e50e 100644
--- a/inference-engine/samples/hello_shape_infer_ssd/main.cpp
+++ b/inference-engine/samples/hello_shape_infer_ssd/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -29,7 +29,7 @@ int main(int argc, char* argv[]) {
         // -----------------------------------------------------------------------------------------------------
 
         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
-        InferencePlugin plugin = PluginDispatcher({"../../../lib/intel64", ""}).getPluginByDevice(device_name);
+        InferencePlugin plugin = PluginDispatcher().getPluginByDevice(device_name);
         IExtensionPtr cpuExtension, inPlaceExtension;
         if (device_name == "CPU") {
             cpuExtension = std::make_shared<Extensions::Cpu::CpuExtensions>();
@@ -53,7 +53,6 @@ int main(int argc, char* argv[]) {
 
         // --------------------------- Resize network to match image sizes and given batch----------------------
         if (device_name == "CPU") {
-            // register shape inference functions (SpatialTransformer) from CPU Extension
             network.AddExtension(cpuExtension);
             // register sample's custom shape inference (CustomReLU)
             network.AddExtension(inPlaceExtension);
@@ -121,7 +120,7 @@ int main(int argc, char* argv[]) {
 
         // --------------------------- 6. Prepare input --------------------------------------------------------
         Blob::Ptr input = infer_request.GetBlob(input_name);
-        for (int b = 0; b < batch_size; b++) {
+        for (size_t b = 0; b < batch_size; b++) {
             matU8ToBlob<uint8_t>(image, input, b);
         }
         // -----------------------------------------------------------------------------------------------------
diff --git a/inference-engine/samples/hello_shape_infer_ssd/shape_infer_extension.hpp b/inference-engine/samples/hello_shape_infer_ssd/shape_infer_extension.hpp
index 110fa65f0..e70afd005 100644
--- a/inference-engine/samples/hello_shape_infer_ssd/shape_infer_extension.hpp
+++ b/inference-engine/samples/hello_shape_infer_ssd/shape_infer_extension.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -78,7 +78,7 @@ private:
 
 class CustomReLUResizeImpl : public InferenceEngine::IShapeInferImpl {
 public:
-    InferenceEngine::StatusCode inferShapes(const std::vector<InferenceEngine::SizeVector>& inShapes,
+    InferenceEngine::StatusCode inferShapes(const std::vector<InferenceEngine::Blob::CPtr>& inBlobs,
                                             const std::map<std::string, std::string>& params,
                                             const std::map<std::string, InferenceEngine::Blob::Ptr>& blobs,
                                             std::vector<InferenceEngine::SizeVector>& outShapes,
@@ -89,7 +89,9 @@ public:
                          " shape inference for the first time (next messages won't be printed)" << std::endl;
             wasCalled = true;
         }
-        outShapes = inShapes;
+        for (const auto& blob : inBlobs) {
+            outShapes.push_back(blob->getTensorDesc().getDims());
+        }
         return InferenceEngine::StatusCode::OK;
     }
 };
diff --git a/inference-engine/samples/lenet_network_graph_builder/CMakeLists.txt b/inference-engine/samples/lenet_network_graph_builder/CMakeLists.txt
index aab478866..f8960bd2a 100644
--- a/inference-engine/samples/lenet_network_graph_builder/CMakeLists.txt
+++ b/inference-engine/samples/lenet_network_graph_builder/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "lenet_network_graph_builder")
 
 file (GLOB MAIN_SRC
@@ -34,4 +32,4 @@ target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} gflags format_
 
 if(UNIX)
     target_link_libraries( ${TARGET_NAME} ${LIB_DL} pthread)
-endif()
-\ No newline at end of file
+endif()
diff --git a/inference-engine/samples/lenet_network_graph_builder/README.md b/inference-engine/samples/lenet_network_graph_builder/README.md
index d7fdfb792..6ba3d1bba 100644
--- a/inference-engine/samples/lenet_network_graph_builder/README.md
+++ b/inference-engine/samples/lenet_network_graph_builder/README.md
@@ -1,12 +1,23 @@
-# Lenet Number Classifications Network using Graph Builder API
+# LeNet Number Classifications Network Using Graph Builder API
 
 This sample demonstrates how to execute inference using Inference Engine Graph Builder API to build a network on example of the LeNet classifications network.
-XML file is not required for network building now. Inference Engine Graph Builder API allows building of a network "on the fly" from source code. The sample uses 1-channel ubyte pictures as input.
-<br>
+
+XML file is not required for network building now. Inference Engine Graph Builder API allows building of a network "on the fly" from source code. The sample uses one-channel `ubyte` pictures as input.
+
+## How It Works
+
+Upon the start-up the sample reads command line parameters and builds a network using Graph Builder API and passed weights file.
+Then, the application loads built network and an image to the Inference Engine plugin.
+
+When inference is done, the application outputs inference results to the standard output stream.
+
+> **NOTE**: This sample is implemented to support models with FP32 weights only.
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
 
 ## Running
 
-Running the application with the <code>-h</code> option yields the following usage message:
+Running the application with the `-h` option yields the following usage message:
 ```sh
 ./lenet_network_graph_builder -h
 InferenceEngine:
@@ -19,11 +30,11 @@ Options:
     -h                      Print a usage message.
     -m "<path>"             Path to a .bin file with weights for trained model
     -i "<path>"             Required. Path to image or folder with images
-    -d "<device>"           Specify the target device to infer on this. Sample will look for a suitable plugin for device specified(default value is CPU)
+    -d "<device>"           Specify the target device to infer on this. Sample will look for a suitable plugin for device specified. Default value is CPU
     -pp "<path>"            Path to a plugin folder
     -pc                     Enables per-layer performance report
-    -nt "<integer>"         Number of top results (default 10)
-    -ni "<integer>"         Number of iterations (default 1)
+    -nt "<integer>"         Number of top results. Default value is 10
+    -ni "<integer>"         Number of iterations. Default value is 1
 
 ```
 
@@ -34,21 +45,10 @@ For example, to do inference of an ubyte image on a GPU run the following comman
 ./lenet_network_graph_builder -i <path_to_image> -m <path_to_weights_file> -d GPU
 ```
 
-### Outputs
+## Sample Output
 
 By default the application outputs top-10 inference results for each infer request.
 In addition to this information it will provide throughput value measured in frames per seconds.
 
-### How it works
-
-Upon the start-up the sample application reads command line parameters and loads a network and an image to the Inference 
-Engine plugin. When inference is done, the application creates an 
-output image and outputs data to the standard output stream.
-
-Upon the start-up the sample reads command line parameters and builds a network using Graph Builder API and passed weights file.
-Then, the application loads built network and an image to the Inference Engine plugin.
-
-When inference is done, the application outputs inference results to the standard output stream.
-
 ## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
diff --git a/inference-engine/samples/lenet_network_graph_builder/lenet_network_graph_builder.hpp b/inference-engine/samples/lenet_network_graph_builder/lenet_network_graph_builder.hpp
index 7cb59e2bd..47c627725 100644
--- a/inference-engine/samples/lenet_network_graph_builder/lenet_network_graph_builder.hpp
+++ b/inference-engine/samples/lenet_network_graph_builder/lenet_network_graph_builder.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -15,8 +15,6 @@
 #include <dirent.h>
 #endif
 
-#define DEFAULT_PATH_P "./lib"
-
 /// @brief message for help argument
 static const char help_message[] = "Print a usage message";
 
@@ -28,8 +26,8 @@ static const char model_message[] = "Path to an .bin file with weights for train
 
 /// @brief message for assigning cnn calculation to device
 static const char target_device_message[] = "Specify the target device to infer on this. " \
-                                            "Sample will look for a suitable plugin for device specified" \
-                                            "(default value is CPU)";
+                                            "Sample will look for a suitable plugin for device specified. " \
+                                            "Default value is CPU";
 
 /// @brief message for plugin_path argument
 static const char plugin_path_message[] = "Path to a plugin folder";
@@ -38,10 +36,10 @@ static const char plugin_path_message[] = "Path to a plugin folder";
 static const char performance_counter_message[] = "Enables per-layer performance report";
 
 /// @brief message for top results number
-static const char ntop_message[] = "Number of top results (default 10)";
+static const char ntop_message[] = "Number of top results. Default 10";
 
 /// @brief message for iterations count
-static const char iterations_count_message[] = "Number of iterations (default 1)";
+static const char iterations_count_message[] = "Number of iterations. Default value is 1";
 
 /// \brief Define flag for showing help message <br>
 DEFINE_bool(h, false, help_message);
@@ -65,10 +63,10 @@ DEFINE_string(pp, "", plugin_path_message);
 DEFINE_bool(pc, false, performance_counter_message);
 
 /// @brief Top results number (default 10) <br>
-DEFINE_int32(nt, 10, ntop_message);
+DEFINE_uint32(nt, 10, ntop_message);
 
 /// @brief Iterations count (default 1)
-DEFINE_int32(ni, 1, iterations_count_message);
+DEFINE_uint32(ni, 1, iterations_count_message);
 
 /**
  * \brief This function show a help message
@@ -87,4 +85,3 @@ static void showUsage() {
     std::cout << "    -nt \"<integer>\"         " << ntop_message << std::endl;
     std::cout << "    -ni \"<integer>\"         " << iterations_count_message << std::endl;
 }
-
diff --git a/inference-engine/samples/lenet_network_graph_builder/main.cpp b/inference-engine/samples/lenet_network_graph_builder/main.cpp
index cd9031aa0..ab63bab4c 100644
--- a/inference-engine/samples/lenet_network_graph_builder/main.cpp
+++ b/inference-engine/samples/lenet_network_graph_builder/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -6,6 +6,7 @@
 #include <vector>
 #include <string>
 #include <memory>
+#include <limits>
 
 #include <inference_engine.hpp>
 #include <ie_builders.hpp>
@@ -95,7 +96,7 @@ int main(int argc, char *argv[]) {
 
         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
         slog::info << "Loading plugin" << slog::endl;
-        InferencePlugin plugin = PluginDispatcher({FLAGS_pp, "../../../lib/intel64", ""}).getPluginByDevice(FLAGS_d);
+        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
         printPluginVersion(plugin, std::cout);
 
         /** Per layer metrics **/
@@ -108,14 +109,16 @@ int main(int argc, char *argv[]) {
         TBlob<uint8_t>::CPtr weightsPtr = ReadWeights(FLAGS_m);
 
         Builder::Network builder("LeNet");
-        size_t layerId = builder.addLayer(Builder::InputLayer("data").setPort(Port({1, 1, 28, 28})));
+        idx_t layerId = builder.addLayer(Builder::InputLayer("data").setPort(Port({1, 1, 28, 28})));
         auto ptrWeights = make_shared_blob(TensorDesc(Precision::FP32, {500}, Layout::C),
                 weightsPtr->cbuffer().as<float *>());
         auto ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {20}, Layout::C),
                 weightsPtr->cbuffer().as<float *>() + 500);
-        layerId = builder.addLayer({{layerId}}, Builder::ConvolutionLayer("conv1").setKernel({5, 5}).setDilation({1, 1})
-                  .setGroup(1).setStrides({1, 1}).setOutDepth(20).setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0})
-                  .setWeights(ptrWeights).setBiases(ptrBiases));
+        idx_t weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights));
+        idx_t biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer("conv1")
+                  .setKernel({5, 5}).setDilation({1, 1}).setGroup(1).setStrides({1, 1}).setOutDepth(20)
+                  .setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0}));
         layerId = builder.addLayer({{layerId}}, Builder::PoolingLayer("pool1").setExcludePad(true).setKernel({2, 2})
                   .setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0})
                   .setPoolingType(Builder::PoolingLayer::PoolingType::MAX)
@@ -124,9 +127,11 @@ int main(int argc, char *argv[]) {
                 weightsPtr->cbuffer().as<float *>() + 520);
         ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {50}, Layout::C),
                 weightsPtr->cbuffer().as<float *>() + 25520);
-        layerId = builder.addLayer({{layerId}}, Builder::ConvolutionLayer("conv2").setDilation({1, 1}).setGroup(1)
-                  .setKernel({5, 5}).setOutDepth(50).setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0})
-                  .setStrides({1, 1}).setWeights(ptrWeights).setBiases(ptrBiases));
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer("conv2")
+                  .setDilation({1, 1}).setGroup(1).setKernel({5, 5}).setOutDepth(50).setPaddingsBegin({0, 0})
+                  .setPaddingsEnd({0, 0}).setStrides({1, 1}));
         layerId = builder.addLayer({{layerId}}, Builder::PoolingLayer("pool2").setExcludePad(true).setKernel({2, 2})
                   .setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0}).setPoolingType(Builder::PoolingLayer::PoolingType::MAX)
                   .setRoundingType(Builder::PoolingLayer::RoundingType::CEIL).setStrides({2, 2}));
@@ -134,17 +139,21 @@ int main(int argc, char *argv[]) {
                 weightsPtr->cbuffer().as<float *>() + 102280 / 4);
         ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {500}, Layout::C),
                 weightsPtr->cbuffer().as<float *>() + 1702280 / 4);
-        layerId = builder.addLayer({{layerId}}, Builder::FullyConnectedLayer("ip1").setOutputNum(500)
-                  .setWeights(ptrWeights).setBiases(ptrBiases));
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::FullyConnectedLayer("ip1")
+                .setOutputNum(500));
         layerId = builder.addLayer({{layerId}}, Builder::ReLULayer("relu1").setNegativeSlope(0.0f));
         ptrWeights = make_shared_blob(TensorDesc(Precision::FP32, {5000}, Layout::C),
                 weightsPtr->cbuffer().as<float *>() + 1704280 / 4);
         ptrBiases = make_shared_blob(TensorDesc(Precision::FP32, {10}, Layout::C),
                 weightsPtr->cbuffer().as<float *>() + 1724280 / 4);
-        layerId = builder.addLayer({{layerId}}, Builder::FullyConnectedLayer("ip2").setOutputNum(10)
-                  .setWeights(ptrWeights).setBiases(ptrBiases));
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(ptrWeights));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(ptrBiases));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::FullyConnectedLayer("ip2")
+                  .setOutputNum(10));
         layerId = builder.addLayer({{layerId}}, Builder::SoftMaxLayer("prob").setAxis(1));
-        size_t outputId = builder.addLayer({PortInfo(layerId)}, Builder::OutputLayer("sf_out"));
+        builder.addLayer({PortInfo(layerId)}, Builder::OutputLayer("sf_out"));
 
         CNNNetwork network{Builder::convertToICNNNetwork(builder.build())};
         // -----------------------------------------------------------------------------------------------------
@@ -272,7 +281,7 @@ int main(int argc, char *argv[]) {
 
         double total = 0.0;
         /** Start inference & calc performance **/
-        for (int iter = 0; iter < FLAGS_ni; ++iter) {
+        for (size_t iter = 0; iter < FLAGS_ni; ++iter) {
             auto t0 = Time::now();
             infer_request.Infer();
             auto t1 = Time::now();
@@ -289,7 +298,7 @@ int main(int argc, char *argv[]) {
         auto outputData = outputBlob->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
 
         /** Validating -nt value **/
-        const int resultsCnt = outputBlob->size() / batchSize;
+        const size_t resultsCnt = outputBlob->size() / batchSize;
         if (FLAGS_nt > resultsCnt || FLAGS_nt < 1) {
             slog::warn << "-nt " << FLAGS_nt << " is not available for this network (-nt should be less than " \
                       << resultsCnt+1 << " and more than 0)\n            will be used maximal value : " << resultsCnt;
@@ -303,7 +312,7 @@ int main(int argc, char *argv[]) {
         std::cout << std::endl << "Top " << FLAGS_nt << " results:" << std::endl << std::endl;
 
         /** Print the result iterating over each batch **/
-        for (int image_id = 0; image_id < batchSize; ++image_id) {
+        for (size_t image_id = 0; image_id < batchSize; ++image_id) {
             std::cout << "Image " << images[image_id] << std::endl << std::endl;
             for (size_t id = image_id * FLAGS_nt, cnt = 0; cnt < FLAGS_nt; ++cnt, ++id) {
                 std::cout.precision(7);
@@ -313,6 +322,9 @@ int main(int argc, char *argv[]) {
             }
             std::cout << std::endl;
         }
+        if (std::fabs(total) < std::numeric_limits<double>::epsilon()) {
+            throw std::logic_error("total can't be equal to zero");
+        }
         // -----------------------------------------------------------------------------------------------------
         std::cout << std::endl << "total inference time: " << total << std::endl;
         std::cout << "Average running time of one iteration: " << total / static_cast<double>(FLAGS_ni) << " ms" << std::endl;
diff --git a/inference-engine/samples/object_detection_sample_ssd/CMakeLists.txt b/inference-engine/samples/object_detection_sample_ssd/CMakeLists.txt
index 60cd38ee9..436edc277 100644
--- a/inference-engine/samples/object_detection_sample_ssd/CMakeLists.txt
+++ b/inference-engine/samples/object_detection_sample_ssd/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "object_detection_sample_ssd")
 
 file (GLOB MAIN_SRC
diff --git a/inference-engine/samples/object_detection_sample_ssd/README.md b/inference-engine/samples/object_detection_sample_ssd/README.md
index dc6f477e3..a8db1a81e 100644
--- a/inference-engine/samples/object_detection_sample_ssd/README.md
+++ b/inference-engine/samples/object_detection_sample_ssd/README.md
@@ -1,14 +1,22 @@
-# Object Detection Sample SSD
+# Object Detection C++ Sample SSD
 
-This topic demonstrates how to run the Object Detection sample application, which does inference using object detection 
+This topic demonstrates how to run the Object Detection sample application, which does inference using object detection
 networks like SSD-VGG on Intel® Processors and Intel® HD Graphics.
 
+## How It Works
+
+Upon the start-up the sample application reads command line parameters and loads a network and an image to the Inference
+Engine plugin. When inference is done, the application creates an
+output image and outputs data to the standard output stream.
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
 ## Running
 
 Running the application with the <code>-h</code> option yields the following usage message:
 ```sh
 ./object_detection_sample_ssd -h
-InferenceEngine: 
+InferenceEngine:
     API version ............ <version>
     Build .................. <number>
 
@@ -18,46 +26,41 @@ Options:
     -h                      Print a usage message.
     -i "<path>"             Required. Path to an .bmp image.
     -m "<path>"             Required. Path to an .xml file with a trained model.
-      -l "<absolute_path>"    Required for MKLDNN (CPU)-targeted custom layers. Absolute path to a shared library with the kernels impl.
+      -l "<absolute_path>"    Required for CPU custom layers. Absolute path to a shared library with the kernel implementations.
           Or
-      -c "<absolute_path>"    Required for clDNN (GPU)-targeted custom kernels. Absolute path to the xml file with the kernels desc.
-    -pp "<path>"            Path to a plugin folder.
-    -d "<device>"           Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified
-    -pc                     Enables per-layer performance report
-    -ni "<integer>"         Number of iterations (default 1)
-    -p_msg                  Enables messages from a plugin
+      -c "<absolute_path>"    Required for GPU custom kernels. Absolute path to the .xml file with the kernel descriptions.
+    -pp "<path>"            Optional. Path to a plugin folder.
+    -d "<device>"           Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified
+    -pc                     Optional. Enables per-layer performance report
+    -ni "<integer>"         Optional. Number of iterations. Default value is 1
+    -p_msg                  Optional. Enables messages from a plugin
 
 ```
 
 Running the application with the empty list of options yields the usage message given above and an error message.
 
-To run the sample, you can use a set of pre-trained and optimized models delivered with the package or a Caffe* public model.
+To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
 
-> **NOTE**: A public model should be converted to the Inference Engine format (`.xml` + `.bin`) using the Model Optimizer tool. For Model Optimizer documentation, see https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer.
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
 
 For example, to do inference on a CPU with the OpenVINO&trade; toolkit person detection SSD models, run one of the following commands:
 
 ```sh
-./object_detection_sample_ssd -i <path_to_image>/inputImage.bmp -m <INSTAL_DIR>/deployment_tools/intel_models/person-detection-retail-0013/FP32/person-detection-retail-0013.xml -d CPU
+./object_detection_sample_ssd -i <path_to_image>/inputImage.bmp -m <path_to_model>person-detection-retail-0013.xml -d CPU
 ```
 or
 ```sh
-./object_detection_sample_ssd -i <path_to_image>/inputImage.jpg -m <INSTALL_DIR>/deployment_tools/intel_models/person-detection-retail-0002/FP32/person-detection-retail-0002.xml -d CPU
+./object_detection_sample_ssd -i <path_to_image>/inputImage.jpg -m <path_to_model>person-detection-retail-0002.xml -d CPU
 ```
 
-> **NOTE**: Before running the sample with another trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+## Sample Output
 
-### Outputs
-
-The application outputs an image (<code>out_0.bmp</code>) with detected objects enclosed in rectangles. It outputs the list of classes 
-of the detected objects along with the respective confidence values and the coordinates of the 
+The application outputs an image (`out_0.bmp`) with detected objects enclosed in rectangles. It outputs the list of classes
+of the detected objects along with the respective confidence values and the coordinates of the
 rectangles to the standard output stream.
 
-### How it works
-
-Upon the start-up the sample application reads command line parameters and loads a network and an image to the Inference 
-Engine plugin. When inference is done, the application creates an 
-output image and outputs data to the standard output stream.
 
-## See Also 
+## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
diff --git a/inference-engine/samples/object_detection_sample_ssd/main.cpp b/inference-engine/samples/object_detection_sample_ssd/main.cpp
index 066e9ffbe..32e41e78c 100644
--- a/inference-engine/samples/object_detection_sample_ssd/main.cpp
+++ b/inference-engine/samples/object_detection_sample_ssd/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -78,7 +78,7 @@ int main(int argc, char *argv[]) {
 
         // --------------------------- 3. Load Plugin for inference engine -------------------------------------
         slog::info << "Loading plugin" << slog::endl;
-        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp, "../../../lib/intel64" , "" }).getPluginByDevice(FLAGS_d);
+        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
         if (FLAGS_p_msg) {
             static_cast<InferenceEngine::InferenceEnginePluginPtr>(plugin)->SetLogCallback(error_listener);
         }
@@ -149,7 +149,7 @@ int main(int argc, char *argv[]) {
          */
         std::string imageInputName, imInfoInputName;
 
-        InputInfo::Ptr inputInfo = inputsInfo.begin()->second;
+        InputInfo::Ptr inputInfo = nullptr;
 
         SizeVector inputImageDims;
         /** Stores input image **/
@@ -160,6 +160,8 @@ int main(int argc, char *argv[]) {
             if (item.second->getInputData()->getTensorDesc().getDims().size() == 4) {
                 imageInputName = item.first;
 
+                inputInfo = item.second;
+
                 slog::info << "Batch size is " << std::to_string(networkReader.getNetwork().getBatchSize()) << slog::endl;
 
                 /** Creating first input blob **/
@@ -170,12 +172,15 @@ int main(int argc, char *argv[]) {
 
                 Precision inputPrecision = Precision::FP32;
                 item.second->setPrecision(inputPrecision);
-                if ((item.second->getTensorDesc().getDims()[1] != 3 && item.second->getTensorDesc().getDims()[1] != 6) ||
-                     item.second->getTensorDesc().getDims()[0] != 1) {
+                if ((item.second->getTensorDesc().getDims()[1] != 3 && item.second->getTensorDesc().getDims()[1] != 6)) {
                     throw std::logic_error("Invalid input info. Should be 3 or 6 values length");
                 }
             }
         }
+
+        if (inputInfo == nullptr) {
+            inputInfo = inputsInfo.begin()->second;
+        }
         // -----------------------------------------------------------------------------------------------------
 
         // --------------------------- 6. Prepare output blobs -------------------------------------------------
@@ -226,7 +231,7 @@ int main(int argc, char *argv[]) {
         // --------------------------- 9. Prepare input --------------------------------------------------------
         /** Collect images data ptrs **/
         std::vector<std::shared_ptr<unsigned char>> imagesData, originalImagesData;
-        std::vector<int> imageWidths, imageHeights;
+        std::vector<size_t> imageWidths, imageHeights;
         for (auto & i : images) {
             FormatReader::ReaderPtr reader(i.c_str());
             if (reader.get() == nullptr) {
@@ -285,7 +290,7 @@ int main(int argc, char *argv[]) {
             for (size_t image_id = 0; image_id < std::min(imagesData.size(), batchSize); ++image_id) {
                 p[image_id * imInfoDim + 0] = static_cast<float>(inputsInfo[imageInputName]->getTensorDesc().getDims()[2]);
                 p[image_id * imInfoDim + 1] = static_cast<float>(inputsInfo[imageInputName]->getTensorDesc().getDims()[3]);
-                for (int k = 2; k < imInfoDim; k++) {
+                for (size_t k = 2; k < imInfoDim; k++) {
                     p[image_id * imInfoDim + k] = 1.0f;  // all scale factors are set to 1.0
                 }
             }
@@ -301,7 +306,7 @@ int main(int argc, char *argv[]) {
 
         double total = 0.0;
         /** Start inference & calc performance **/
-        for (int iter = 0; iter < FLAGS_ni; ++iter) {
+        for (size_t iter = 0; iter < FLAGS_ni; ++iter) {
             auto t0 = Time::now();
             infer_request.Infer();
             auto t1 = Time::now();
@@ -322,28 +327,28 @@ int main(int argc, char *argv[]) {
 
         /* Each detection has image_id that denotes processed image */
         for (int curProposal = 0; curProposal < maxProposalCount; curProposal++) {
-            float image_id = detection[curProposal * objectSize + 0];
+            auto image_id = static_cast<int>(detection[curProposal * objectSize + 0]);
             if (image_id < 0) {
                 break;
             }
 
-            float label = detection[curProposal * objectSize + 1];
             float confidence = detection[curProposal * objectSize + 2];
-            float xmin = detection[curProposal * objectSize + 3] * imageWidths[image_id];
-            float ymin = detection[curProposal * objectSize + 4] * imageHeights[image_id];
-            float xmax = detection[curProposal * objectSize + 5] * imageWidths[image_id];
-            float ymax = detection[curProposal * objectSize + 6] * imageHeights[image_id];
+            auto label = static_cast<int>(detection[curProposal * objectSize + 1]);
+            auto xmin = static_cast<int>(detection[curProposal * objectSize + 3] * imageWidths[image_id]);
+            auto ymin = static_cast<int>(detection[curProposal * objectSize + 4] * imageHeights[image_id]);
+            auto xmax = static_cast<int>(detection[curProposal * objectSize + 5] * imageWidths[image_id]);
+            auto ymax = static_cast<int>(detection[curProposal * objectSize + 6] * imageHeights[image_id]);
 
             std::cout << "[" << curProposal << "," << label << "] element, prob = " << confidence <<
                 "    (" << xmin << "," << ymin << ")-(" << xmax << "," << ymax << ")" << " batch id : " << image_id;
 
             if (confidence > 0.5) {
                 /** Drawing only objects with >50% probability **/
-                classes[image_id].push_back(static_cast<int>(label));
-                boxes[image_id].push_back(static_cast<int>(xmin));
-                boxes[image_id].push_back(static_cast<int>(ymin));
-                boxes[image_id].push_back(static_cast<int>(xmax - xmin));
-                boxes[image_id].push_back(static_cast<int>(ymax - ymin));
+                classes[image_id].push_back(label);
+                boxes[image_id].push_back(xmin);
+                boxes[image_id].push_back(ymin);
+                boxes[image_id].push_back(xmax - xmin);
+                boxes[image_id].push_back(ymax - ymin);
                 std::cout << " WILL BE PRINTED!";
             }
             std::cout << std::endl;
diff --git a/inference-engine/samples/object_detection_sample_ssd/object_detection_sample_ssd.h b/inference-engine/samples/object_detection_sample_ssd/object_detection_sample_ssd.h
index 1e9f28791..540ed59ce 100644
--- a/inference-engine/samples/object_detection_sample_ssd/object_detection_sample_ssd.h
+++ b/inference-engine/samples/object_detection_sample_ssd/object_detection_sample_ssd.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -25,7 +25,7 @@ static const char help_message[] = "Print a usage message.";
 static const char image_message[] = "Required. Path to an .bmp image.";
 
 /// @brief message for plugin_path argument
-static const char plugin_path_message[] = "Path to a plugin folder.";
+static const char plugin_path_message[] = "Optional. Path to a plugin folder.";
 
 /// @brief message for model argument
 static const char model_message[] = "Required. Path to an .xml file with a trained model.";
@@ -35,25 +35,25 @@ static const char plugin_message[] = "Plugin name. For example MKLDNNPlugin. If
 "the sample will look for this plugin only";
 
 /// @brief message for assigning cnn calculation to device
-static const char target_device_message[] = "Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. " \
+static const char target_device_message[] = "Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. " \
 "Sample will look for a suitable plugin for device specified";
 
 /// @brief message for performance counters
-static const char performance_counter_message[] = "Enables per-layer performance report";
+static const char performance_counter_message[] = "Optional. Enables per-layer performance report";
 
 /// @brief message for iterations count
-static const char iterations_count_message[] = "Number of iterations (default 1)";
+static const char iterations_count_message[] = "Optional. Number of iterations. Default value is 1";
 
 /// @brief message for clDNN custom kernels desc
-static const char custom_cldnn_message[] = "Required for clDNN (GPU)-targeted custom kernels. "\
-"Absolute path to the xml file with the kernels desc.";
+static const char custom_cldnn_message[] = "Required for GPU custom kernels. "\
+"Absolute path to the .xml file with the kernels descriptions.";
 
 /// @brief message for user library argument
-static const char custom_cpu_library_message[] = "Required for MKLDNN (CPU)-targeted custom layers. " \
-"Absolute path to a shared library with the kernels impl.";
+static const char custom_cpu_library_message[] = "Required for CPU custom layers. " \
+"Absolute path to a shared library with the kernels implementations.";
 
 /// @brief message for plugin messages
-static const char plugin_err_message[] = "Enables messages from a plugin";
+static const char plugin_err_message[] = "Optional. Enables messages from a plugin";
 
 /// \brief Define flag for showing help message <br>
 DEFINE_bool(h, false, help_message);
@@ -85,7 +85,7 @@ DEFINE_string(c, "", custom_cldnn_message);
 DEFINE_string(l, "", custom_cpu_library_message);
 
 /// @brief Iterations count (default 1)
-DEFINE_int32(ni, 1, iterations_count_message);
+DEFINE_uint32(ni, 1, iterations_count_message);
 
 /// @brief Enable plugin messages
 DEFINE_bool(p_msg, false, plugin_err_message);
diff --git a/inference-engine/samples/perfcheck/CMakeLists.txt b/inference-engine/samples/perfcheck/CMakeLists.txt
index bc08b7ddf..4a68a8bfa 100644
--- a/inference-engine/samples/perfcheck/CMakeLists.txt
+++ b/inference-engine/samples/perfcheck/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -25,7 +25,6 @@ endif()
 
 if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
     target_compile_options(${TARGET_NAME}
-        PRIVATE "-Weverything"
         PRIVATE "-Wno-c++98-compat"
         PRIVATE "-Wno-global-constructors"
         PRIVATE "-Wno-missing-variable-declarations"
diff --git a/inference-engine/samples/perfcheck/README.md b/inference-engine/samples/perfcheck/README.md
index daf04485b..e38bd29e6 100644
--- a/inference-engine/samples/perfcheck/README.md
+++ b/inference-engine/samples/perfcheck/README.md
@@ -10,7 +10,7 @@ After inference stage, Perfcheck sample computes total time of execution, divide
 
 ## Running
 
-Running the application with the <code>-h</code> option yields the following usage message:
+Running the application with the `-h` option yields the following usage message:
 
 ```sh
 ./perfcheck -h
@@ -37,14 +37,16 @@ perfcheck [OPTIONS]
 
 Running the application with the empty list of options yields an error message.
 
-You can use the following command to do inference on Intel® Processors on images from a folder using a trained Faster R-CNN network:
+To run the sample, you can use public or pre-trained models. To download the pre-trained models, use the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) or go to [https://download.01.org/opencv/](https://download.01.org/opencv/).
+
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+You can use the following command to do inference on CPU on images from a folder using a trained Faster R-CNN network:
 
 ```sh
 ./perfcheck -m <path_to_model>/faster_rcnn.xml -inputs_dir <path_to_inputs> -d CPU
 ```
 
-> **NOTE**: Public models should be first converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer).
-
 ## Sample Output
 
 The application outputs a performance statistics that shows: total execution time (in milliseconds), number of iterations, batch size, minimum, average and maximum FPS.
@@ -63,11 +65,13 @@ Example of sample output:
 Total time:     8954.61 ms
 Num iterations: 1000
 Batch:          1
-Min fps:        110.558
-Avg fps:        111.674
-Max fps:        112.791
+Min FPS:        110.558
+Avg FPS:        111.674
+Max FPS:        112.791
 ```
 
 ## See Also
 
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
diff --git a/inference-engine/samples/perfcheck/main.cpp b/inference-engine/samples/perfcheck/main.cpp
index 88d5de9f8..0c062c61c 100644
--- a/inference-engine/samples/perfcheck/main.cpp
+++ b/inference-engine/samples/perfcheck/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -30,7 +30,7 @@
 #include "inference_engine.hpp"
 #include "ext_list.hpp"
 
-//#include "vpu/vpu_plugin_config.hpp"
+#include "vpu/vpu_plugin_config.hpp"
 #include "samples/common.hpp"
 #include "samples/slog.hpp"
 
@@ -116,7 +116,7 @@ static std::size_t getNumberRequests(const std::string &plugin) {
     return num_requests == supported_plugins.end() ? 1 : num_requests->second;
 }
 
-#if defined(WIN32)
+#if defined(WIN32) || defined(__APPLE__)
 typedef std::chrono::time_point<std::chrono::steady_clock> time_point;
 #else
 typedef std::chrono::time_point<std::chrono::system_clock> time_point;
@@ -168,9 +168,9 @@ static void printFPS(std::size_t num_requests, std::size_t num_intervals, const
     std::cout << "Num iterations: " << num_iterations << std::endl;
     std::cout << "Batch:          " << FLAGS_batch << std::endl;
 
-    std::cout << "Min fps:        " << min_fps << std::endl;
-    std::cout << "Avg fps:        " << avg_fps << std::endl;
-    std::cout << "Max fps:        " << max_fps << std::endl;
+    std::cout << "Min FPS:        " << min_fps << std::endl;
+    std::cout << "Avg FPS:        " << avg_fps << std::endl;
+    std::cout << "Max FPS:        " << max_fps << std::endl;
 }
 
 template<typename T>
@@ -417,7 +417,7 @@ int main(int argc, char *argv[]) {
             }
         }
 
-        auto plugin = InferenceEngine::PluginDispatcher({FLAGS_pp, "../../../lib/intel64", ""}).getPluginByDevice(FLAGS_d);
+        auto plugin = InferenceEngine::PluginDispatcher({FLAGS_pp}).getPluginByDevice(FLAGS_d);
 
         /* If CPU device, load default library with extensions that comes with the product */
         if (FLAGS_d.find("CPU") != std::string::npos) {
diff --git a/inference-engine/samples/perfcheck/perfcheck.h b/inference-engine/samples/perfcheck/perfcheck.h
index 01419f1a7..facc5f692 100644
--- a/inference-engine/samples/perfcheck/perfcheck.h
+++ b/inference-engine/samples/perfcheck/perfcheck.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/speech_sample/CMakeLists.txt b/inference-engine/samples/speech_sample/CMakeLists.txt
index 33e7e72d4..e789f7af8 100644
--- a/inference-engine/samples/speech_sample/CMakeLists.txt
+++ b/inference-engine/samples/speech_sample/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "speech_sample")
 
 file (GLOB MAIN_SRC
@@ -30,7 +28,7 @@ add_dependencies(${TARGET_NAME} gflags)
 set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE"
 COMPILE_PDB_NAME ${TARGET_NAME})
 
-target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} gflags)
+target_link_libraries(${TARGET_NAME} ${InferenceEngine_LIBRARIES} IE::ie_cpu_extension gflags)
 
 if(UNIX)
     target_link_libraries( ${TARGET_NAME} ${LIB_DL} pthread)
diff --git a/inference-engine/samples/speech_sample/README.md b/inference-engine/samples/speech_sample/README.md
index 31f2b8df4..a9ca93858 100644
--- a/inference-engine/samples/speech_sample/README.md
+++ b/inference-engine/samples/speech_sample/README.md
@@ -1,19 +1,87 @@
-# Automatic Speech Recognition Sample
+# Automatic Speech Recognition C++ Sample
 
 This topic shows how to run the speech sample application, which
 demonstrates acoustic model inference based on Kaldi\* neural networks
 and speech feature vectors.
 
-## Running
+## How It Works
+
+Upon the start-up, the application reads command line parameters
+and loads a Kaldi-trained neural network along with Kaldi ARK speech
+feature vector file to the Inference Engine plugin. It then performs
+inference on all speech utterances stored in the input ARK
+file. Context-windowed speech frames are processed in batches of 1-8
+frames according to the `-bs` parameter.  Batching across utterances is
+not supported by this sample.  When inference is done, the application
+creates an output ARK file.  If the `-r` option is given, error
+statistics are provided for each speech utterance as shown above.
+
+### GNA-specific details
+
+#### Quantization
 
-### Usage
+If the GNA device is selected (for example, using the `-d` GNA flag),
+the GNA Inference Engine plugin quantizes the model and input feature
+vector sequence to integer representation before performing inference.
+Several parameters control neural network quantization.  The `-q` flag
+determines the quantization mode.  Three modes are supported: static,
+dynamic, and user-defined.  In static quantization mode, the first
+utterance in the input ARK file is scanned for dynamic range.  The
+scale factor (floating point scalar multiplier) required to scale the
+maximum input value of the first utterance to 16384 (15 bits) is used
+for all subsequent inputs.  The neural network is quantized to
+accomodate the scaled input dynamic range.  In user-defined
+quantization mode, the user may specify a scale factor via the `-sf`
+flag that will be used for static quantization.  In dynamic
+quantization mode, the scale factor for each input batch is computed
+just before inference on that batch.  The input and network are
+(re)quantized on-the-fly using an efficient procedure.
+
+The `-qb` flag provides a hint to the GNA plugin regarding the preferred
+target weight resolution for all layers.  For example, when `-qb 8` is
+specified, the plugin will use 8-bit weights wherever possible in the
+network.  Note that it is not always possible to use 8-bit weights due
+to GNA hardware limitations.  For example, convolutional layers always
+use 16-bit weights (GNA harware verison 1 and 2).  This limitation
+will be removed in GNA hardware version 3 and higher.
+
+#### Execution Modes
+
+Several execution modes are supported via the `-d` flag.  If the device
+is set to `CPU` and the GNA plugin is selected, the GNA device is
+emulated in fast-but-not-bit-exact mode.  If the device is set to
+`GNA_AUTO`, then the GNA hardware is used if available and the driver is
+installed.  Otherwise, the GNA device is emulated in
+fast-but-not-bit-exact mode.  If the device is set to `GNA_HW`, then the
+GNA hardware is used if available and the driver is installed.
+Otherwise, an error will occur.  If the device is set to `GNA_SW`, the
+GNA device is emulated in fast-but-not-bit-exact mode.  Finally, if
+the device is set to `GNA_SW_EXACT`, the GNA device is emulated in
+bit-exact mode.
+
+#### Loading and Saving Models
+
+The GNA plugin supports loading and saving of the GNA-optimized model
+(non-IR) via the `-rg` and `-wg` flags.  Thereby, it is possible to avoid
+the cost of full model quantization at run time. The GNA plugin also
+supports export of firmware-compatible embedded model images for the
+Intel® Speech Enabling Developer Kit and Amazon Alexa* Premium
+Far-Field Voice Development Kit via the `-we` flag (save only).
+
+In addition to performing inference directly from a GNA model file, these options make it possible to:
+- Convert from IR format to GNA format model file (`-m`, `-wg`)
+- Convert from IR format to embedded format model file (`-m`, `-we`)
+- Convert from GNA format to embedded format model file (`-rg`, `-we`)
+
+
+## Running
 
 Running the application with the `-h` option yields the following
 usage message:
 
 ```sh
 $ ./speech_sample -h
-InferenceEngine: 
+InferenceEngine:
     API version ............ <version>
     Build .................. <number>
 
@@ -23,21 +91,22 @@ Options:
     -h                      Print a usage message.
     -i "<path>"             Required. Path to an .ark file.
     -m "<path>"             Required. Path to an .xml file with a trained model (required if -rg is missing).
-    -o "<path>"             Output file name (default name is scores.ark).
-    -l "<absolute_path>"    Required for MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels impl.
-    -d "<device>"           Specify the target device to infer on; CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT is acceptable. Sample will look for a suitable plugin for device specified
-    -p                      Plugin name. For example MKLDNNPlugin. If this parameter is pointed, the sample will look for this plugin only
-    -pp                     Path to a plugin folder.
-    -pc                     Enables performance report
-    -q "<mode>"             Input quantization mode:  static (default), dynamic, or user (use with -sf).
-    -qb "<integer>"         Weight bits for quantization:  8 or 16 (default)
-    -sf "<double>"          Optional user-specified input scale factor for quantization (use with -q user).
-    -bs "<integer>"         Batch size 1-8 (default 1)
-    -r "<path>"             Read reference score .ark file and compare scores.
-    -rg "<path>"            Read GNA model from file using path/filename provided (required if -m is missing).
-    -wg "<path>"            Write GNA model to file using path/filename provided.
-    -we "<path>"            Write GNA embedded model to file using path/filename provided.
+    -o "<path>"             Optional. Output file name (default name is "scores.ark").
+    -l "<absolute_path>"    Required for CPU custom layers. Absolute path to a shared library with the kernel implementations.
+    -d "<device>"           Optional. Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look for a suitable plugin for device specified.
+    -p                      Optional. Plugin name. For example, GPU. If this parameter is set, the sample will look for this plugin only
+    -pp                     Optional. Path to a plugin folder.
+    -pc                     Optional. Enables performance report
+    -q "<mode>"             Optional. Input quantization mode:  "static" (default), "dynamic", or "user" (use with -sf).
+    -qb "<integer>"         Optional. Weight bits for quantization:  8 or 16 (default)
+    -sf "<double>"          Optional. Input scale factor for quantization (use with -q user).
+    -bs "<integer>"         Optional. Batch size 1-8 (default 1)
+    -r "<path>"             Optional. Read reference score .ark file and compare scores.
+    -rg "<path>"            Optional. Read GNA model from file using path/filename provided (required if -m is missing).
+    -wg "<path>"            Optional. Write GNA model to file using path/filename provided.
+    -we "<path>"            Optional. Write GNA embedded model to file using path/filename provided.
     -nthreads "<integer>"   Optional. Number of threads to use for concurrent async inference requests on the GNA.
+    -cw "<integer>"         Optional. Number of frames for context windows (default is 0). Works only with context window networks. If you use the cw flag, the batch size and nthreads arguments are ignored.
 
 ```
 
@@ -46,8 +115,6 @@ usage message given above and an error message.
 
 ### Model Preparation
 
-> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
-
 You can use the following model optimizer command to convert a Kaldi
 nnet1 or nnet2 neural network to Intel IR format:
 
@@ -61,13 +128,13 @@ network, `wsj_dnn5b_smbr.nnet`, and Kaldi class counts file,
 the Intel IR network consisting of `wsj_dnn5b_smbr.xml` and
 `wsj_dnn5b_smbr.bin`.
 
-The following pretrained models are available:
+The following pre-trained models are available:
 
 * wsj\_dnn5b\_smbr
 * rm\_lstm4f
 * rm\_cnn4a\_smbr
 
-All of them can be downloaded from [https://download.01.org/openvinotoolkit/2018_R3/models_contrib/GNA/](https://download.01.org/openvinotoolkit/2018_R3/models_contrib/GNA/).
+All of them can be downloaded from [https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi](https://download.01.org/openvinotoolkit/models_contrib/speech/kaldi) or using the OpenVINO [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader) .
 
 
 ### Speech Inference
@@ -85,7 +152,9 @@ scores (`wsj_dnn5b_smbr_dev93_scores_10.ark`) corresponding to the input
 feature file (`wsj_dnn5b_smbr_dev93_10.ark`) are assumed to be available
 for comparison.
 
-### Sample Output
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
+## Sample Output
 
 The acoustic log likelihood sequences for all utterances are stored in
 the Kaldi ARK file, `scores.ark`.  If the `-r` option is used, a report on
@@ -101,81 +170,12 @@ Utterance 0: 4k0c0301
        stdev error: 0.00393488
 ```
 
-## How it works
-
-Upon the start-up the speech_sample application reads command line parameters
-and loads a Kaldi-trained neural network along with Kaldi ARK speech
-feature vector file to the Inference Engine plugin. It then performs
-inference on all speech utterances stored in the input ARK
-file. Context-windowed speech frames are processed in batches of 1-8
-frames according to the `-bs` parameter.  Batching across utterances is
-not supported by this sample.  When inference is done, the application
-creates an output ARK file.  If the `-r` option is given, error
-statistics are provided for each speech utterance as shown above.
-
-### GNA-specific details
-
-#### Quantization
-
-If the GNA device is selected (for example, using the `-d` GNA flag),
-the GNA Inference Engine plugin quantizes the model and input feature
-vector sequence to integer representation before performing inference.
-Several parameters control neural network quantization.  The `-q` flag
-determines the quantization mode.  Three modes are supported: static,
-dynamic, and user-defined.  In static quantization mode, the first
-utterance in the input ARK file is scanned for dynamic range.  The
-scale factor (floating point scalar multiplier) required to scale the
-maximum input value of the first utterance to 16384 (15 bits) is used
-for all subsequent inputs.  The neural network is quantized to
-accomodate the scaled input dynamic range.  In user-defined
-quantization mode, the user may specify a scale factor via the `-sf`
-flag that will be used for static quantization.  In dynamic
-quantization mode, the scale factor for each input batch is computed
-just before inference on that batch.  The input and network are
-(re)quantized on-the-fly using an efficient procedure.
-
-The `-qb` flag provides a hint to the GNA plugin regarding the preferred
-target weight resolution for all layers.  For example, when `-qb 8` is
-specified, the plugin will use 8-bit weights wherever possible in the
-network.  Note that it is not always possible to use 8-bit weights due
-to GNA hardware limitations.  For example, convolutional layers always
-use 16-bit weights (GNA harware verison 1 and 2).  This limitation
-will be removed in GNA hardware version 3 and higher.
-
-#### Execution Modes
-
-Several execution modes are supported via the `-d` flag.  If the device
-is set to `CPU` and the GNA plugin is selected, the GNA device is
-emulated in fast-but-not-bit-exact mode.  If the device is set to
-`GNA_AUTO`, then the GNA hardware is used if available and the driver is
-installed.  Otherwise, the GNA device is emulated in
-fast-but-not-bit-exact mode.  If the device is set to `GNA_HW`, then the
-GNA hardware is used if available and the driver is installed.
-Otherwise, an error will occur.  If the device is set to `GNA_SW`, the
-GNA device is emulated in fast-but-not-bit-exact mode.  Finally, if
-the device is set to `GNA_SW_EXACT`, the GNA device is emulated in
-bit-exact mode.
-
-#### Loading and Saving Models
-
-The GNA plugin supports loading and saving of the GNA-optimized model
-(non-IR) via the `-rg` and `-wg` flags.  Thereby, it is possible to avoid
-the cost of full model quantization at run time. The GNA plugin also
-supports export of firmware-compatible embedded model images for the
-Intel® Speech Enabling Developer Kit and Amazon Alexa* Premium
-Far-Field Voice Development Kit via the `-we` flag (save only).
-
-In addition to performing inference directly from a GNA model file, these options make it possible to:
-- Convert from IR format to GNA format model file (`-m`, `-wg`)
-- Convert from IR format to embedded format model file (`-m`, `-we`)
-- Convert from GNA format to embedded format model file (`-rg`, `-we`)
-
 ## Use of Sample in Kaldi* Speech Recognition Pipeline
 
 The Wall Street Journal DNN model used in this example was prepared
 using the Kaldi s5 recipe and the Kaldi Nnet (nnet1) framework.  It is
 possible to recognize speech by substituting the `speech_sample` for
-Kaldi's nnet-forward command.  Since the speech_sample does not yet 
+Kaldi's nnet-forward command.  Since the speech_sample does not yet
 use pipes, it is necessary to use temporary files for speaker-
 transformed feature vectors and scores when running the Kaldi speech
 recognition pipeline.  The following operations assume that feature
@@ -199,10 +199,7 @@ latgen-faster-mapped --max-active=7000 --max-mem=50000000 --beam=13.0 --lattice-
 cat out.txt | utils/int2sym.pl -f 2- words.txt | sed s:\<UNK\>::g | compute-wer --text --mode=present ark:test_filt.txt ark,p:-
 ```
 
-## Links 
-
-- [Main Page](index.html)
-- [Use of the Inference Engine](./docs/IE_DG/Integrate_with_customer_application.md)
-- [Intel's Deep Learning Model Optimizer Developer Guide](https://software.intel.com/en-us/model-optimizer-devguide)
-- [Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
-- [Deep Learning Deployment Toolkit Web Page](https://software.intel.com/en-us/computer-vision-sdk)
+## See Also
+* [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
diff --git a/inference-engine/samples/speech_sample/main.cpp b/inference-engine/samples/speech_sample/main.cpp
index e0dc005c6..4b7115a77 100644
--- a/inference-engine/samples/speech_sample/main.cpp
+++ b/inference-engine/samples/speech_sample/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -25,6 +25,7 @@
 #include <samples/common.hpp>
 #include <samples/slog.hpp>
 #include <samples/args_helper.hpp>
+#include <ext_list.hpp>
 
 #ifndef ALIGN
 #define ALIGN(memSize, pad)   ((static_cast<int>((memSize) + pad - 1) / pad) * pad)
@@ -51,6 +52,12 @@ typedef struct {
     float sumSquaredRelError;
 } score_error_t;
 
+struct InferRequestStruct {
+    InferRequest inferRequest;
+    int frameIndex;
+    uint32_t numFramesThisBatch;
+};
+
 void GetKaldiArkInfo(const char *fileName,
                      uint32_t numArrayToFindSize,
                      uint32_t *ptrNumArrays,
@@ -119,7 +126,6 @@ void LoadKaldiArkArray(const char *fileName, uint32_t arrayIndex, std::string &p
             in_file.read(reinterpret_cast<char *>(ptrNumRows), sizeof(uint32_t));        // read number of rows
             std::getline(in_file, line, '\4');                                            // read control-D
             in_file.read(reinterpret_cast<char *>(ptrNumColumns), sizeof(uint32_t));    // read number of columns
-            size_t willWrite = *ptrNumRows * *ptrNumColumns * sizeof(float);
             in_file.read(reinterpret_cast<char *>(&memory.front()),
                          *ptrNumRows * *ptrNumColumns * sizeof(float));  // read array data
         }
@@ -286,7 +292,6 @@ inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
 
 // return GNA module frequency in MHz
 float getGnaFrequencyMHz() {
-    uint32_t level = 0;
     uint32_t eax = 1;
     uint32_t ebx = 0;
     uint32_t ecx = 0;
@@ -353,12 +358,11 @@ void printPerformanceCounters(std::map<std::string,
 
     for (const auto &it : utterancePerfMap) {
         std::string const &counter_name = it.first;
-        float current_units = it.second.realTime_uSec;
+        float current_units = static_cast<float>(it.second.realTime_uSec);
         float call_units = current_units / callsNum;
-        float freq = 1.0;
         // if GNA HW counters
         // get frequency of GNA module
-        freq = getGnaFrequencyMHz();
+        float freq = getGnaFrequencyMHz();
         current_units /= freq * 1000;
         call_units /= freq;
         stream << std::setw(30) << std::left << counter_name.substr(4, counter_name.size() - 1);
@@ -414,9 +418,20 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
         throw std::logic_error("Only one of -m and -rg is allowed.");
     }
 
-    if ((FLAGS_d.compare("GPU") != 0) && (FLAGS_d.compare("CPU") != 0) && (FLAGS_d.compare("GNA_AUTO") != 0) &&
-        (FLAGS_d.compare("GNA_HW") != 0)
-        && (FLAGS_d.compare("GNA_SW") != 0) && (FLAGS_d.compare("GNA_SW_EXACT") != 0)) {
+    std::vector<std::string> possibleDeviceTypes = {
+            "CPU",
+            "GPU",
+            "GNA_AUTO",
+            "GNA_HW",
+            "GNA_SW_EXACT",
+            "GNA_SW",
+            "HETERO:GNA,CPU",
+            "HETERO:GNA_HW,CPU",
+            "HETERO:GNA_SW_EXACT,CPU",
+            "HETERO:GNA_SW,CPU",
+    };
+
+    if (std::find(possibleDeviceTypes.begin(), possibleDeviceTypes.end(), FLAGS_d) == possibleDeviceTypes.end()) {
         throw std::logic_error("Specified device is not supported.");
     }
 
@@ -447,6 +462,10 @@ bool ParseAndCheckCommandLine(int argc, char *argv[]) {
         throw std::logic_error("Not valid value for 'nthreads' argument. It should be > 0 ");
     }
 
+    if (FLAGS_cw < 0) {
+        throw std::logic_error("Not valid value for 'cw' argument. It should be > 0 ");
+    }
+
     return true;
 }
 
@@ -468,10 +487,14 @@ int main(int argc, char *argv[]) {
             slog::info << "No extensions provided" << slog::endl;
         }
 
-        bool useGna = (FLAGS_d.find("GNA") != std::string::npos);
-        auto deviceStr = FLAGS_d.substr(0, (FLAGS_d.find("_")));
+        auto isFeature = [&](const std::string xFeature) { return FLAGS_d.find(xFeature) != std::string::npos; };
+
+        bool useGna = isFeature("GNA");
+        bool useHetero = isFeature("HETERO");
+        std::string deviceStr =
+                useHetero && useGna ? "HETERO:GNA,CPU" : FLAGS_d.substr(0, (FLAGS_d.find("_")));
         float scaleFactorInput = static_cast<float>(FLAGS_sf);
-        uint32_t batchSize = (uint32_t) FLAGS_bs;
+        uint32_t batchSize = FLAGS_cw > 0 ? 1 : (uint32_t) FLAGS_bs;
         /** Extract input ark file name **/
         std::string inputArkName = fileNameNoExt(FLAGS_i) + ".ark";
 
@@ -484,7 +507,7 @@ int main(int argc, char *argv[]) {
         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
         slog::info << "Loading plugin" << slog::endl;
         /** Loading plugin for device **/
-        InferencePlugin plugin = PluginDispatcher({FLAGS_pp, "../../../lib/intel64", ""}).getPluginByDevice(deviceStr);
+        InferencePlugin plugin = PluginDispatcher({FLAGS_pp}).getPluginByDevice(deviceStr);
 
         /** Printing plugin version **/
         std::cout << plugin.GetVersion() << std::endl << std::endl;
@@ -514,9 +537,20 @@ int main(int argc, char *argv[]) {
         /** Setting plugin parameter for per layer metrics **/
         std::map<std::string, std::string> gnaPluginConfig;
         std::map<std::string, std::string> genericPluginConfig;
-        if (FLAGS_d.compare("CPU") != 0) {
-            gnaPluginConfig[GNAConfigParams::KEY_GNA_DEVICE_MODE] = FLAGS_d;
+        if (useGna) {
+            std::string gnaDevice =
+                    useHetero ? FLAGS_d.substr(FLAGS_d.find("GNA"), FLAGS_d.find(",") - FLAGS_d.find("GNA")) : FLAGS_d;
+            gnaPluginConfig[GNAConfigParams::KEY_GNA_DEVICE_MODE] =
+                    gnaDevice.find("_") == std::string::npos ? "GNA_AUTO" : gnaDevice;
+        } else if (plugin.GetVersion()->description == std::string("MKLDNNPlugin")) {
+            /**
+             * cpu_extensions library is compiled from "extension" folder containing
+             * custom MKLDNNPlugin layer implementations. These layers are not supported
+             * by mkldnn, but they can be useful for inferring custom topologies.
+            **/
+            plugin.AddExtension(std::make_shared<Extensions::Cpu::CpuExtensions>());
         }
+
         if (FLAGS_pc) {
             genericPluginConfig[PluginConfigParams::KEY_PERF_COUNT] = PluginConfigParams::YES;
         }
@@ -550,7 +584,7 @@ int main(int argc, char *argv[]) {
             gnaPluginConfig[GNAConfigParams::KEY_GNA_PRECISION] = "I16";
         }
 
-        gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string(FLAGS_nthreads);
+        gnaPluginConfig[GNAConfigParams::KEY_GNA_LIB_N_THREADS] = std::to_string(FLAGS_cw > 0 ? 1 : FLAGS_nthreads);
         gnaPluginConfig[GNA_CONFIG_KEY(COMPACT_MODE)] = CONFIG_VALUE(NO);
         // -----------------------------------------------------------------------------------------------------
 
@@ -568,6 +602,7 @@ int main(int argc, char *argv[]) {
         }
         auto t0 = Time::now();
         ExecutableNetwork executableNet;
+
         if (!FLAGS_m.empty()) {
             slog::info << "Loading model to the plugin" << slog::endl;
             executableNet = plugin.LoadNetwork(netBuilder.getNetwork(), genericPluginConfig);
@@ -576,7 +611,6 @@ int main(int argc, char *argv[]) {
             executableNet = plugin.ImportNetwork(FLAGS_rg.c_str(), genericPluginConfig);
         }
 
-
         ms loadTime = std::chrono::duration_cast<ms>(Time::now() - t0);
         slog::info << "Model loading time " << loadTime.count() << " ms" << slog::endl;
 
@@ -595,9 +629,9 @@ int main(int argc, char *argv[]) {
             return 0;
         }
 
-        std::vector<std::pair<InferRequest, size_t>> inferRequests(FLAGS_nthreads);
+        std::vector<InferRequestStruct> inferRequests(FLAGS_cw > 0 ? 1 : FLAGS_nthreads);
         for (auto& inferRequest : inferRequests) {
-            inferRequest = {executableNet.CreateInferRequest(), -1};
+            inferRequest = {executableNet.CreateInferRequest(), -1, batchSize};
         }
         // -----------------------------------------------------------------------------------------------------
 
@@ -614,7 +648,7 @@ int main(int argc, char *argv[]) {
             throw std::logic_error("Sample supports only topologies with  1 input");
         }
 
-        Blob::Ptr ptrInputBlob = inferRequests[0].first.GetBlob(cInputInfo.begin()->first);
+        Blob::Ptr ptrInputBlob = inferRequests[0].inferRequest.GetBlob(cInputInfo.begin()->first);
 
         /** configure input precision if model loaded from IR **/
         for (auto &item : inputInfo) {
@@ -632,7 +666,7 @@ int main(int argc, char *argv[]) {
             outputInfo = netBuilder.getNetwork().getOutputsInfo();
         }
 
-        Blob::Ptr ptrOutputBlob = inferRequests[0].first.GetBlob(cOutputInfo.begin()->first);
+        Blob::Ptr ptrOutputBlob = inferRequests[0].inferRequest.GetBlob(cOutputInfo.begin()->first);
 
         for (auto &item : outputInfo) {
             DataPtr outData = item.second;
@@ -699,22 +733,20 @@ int main(int argc, char *argv[]) {
             auto inputFrame = &ptrUtterance.front();
             auto outputFrame = &ptrScores.front();
 
-            size_t frameIndex{0};
+            std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> callPerfMap;
+
+            size_t frameIndex = 0;
+            numFrames += 2 * FLAGS_cw;
             uint32_t numFramesThisBatch{batchSize};
 
             auto t0 = Time::now();
             auto t1 = t0;
 
-            // Doing inference
             while (frameIndex <= numFrames) {
                 if (frameIndex == numFrames) {
-                    bool hasRequests = false;
-                    for (auto &inferRequest : inferRequests) {
-                        if (inferRequest.second != -1) {
-                            hasRequests = true;
-                        }
-                    }
-                    if (!hasRequests) {
+                    if (std::find_if(inferRequests.begin(),
+                            inferRequests.end(),
+                            [&](InferRequestStruct x) { return (x.frameIndex != -1); } ) == inferRequests.end()) {
                         break;
                     }
                 }
@@ -724,54 +756,79 @@ int main(int argc, char *argv[]) {
                     if (frameIndex == numFrames) {
                         numFramesThisBatch = 1;
                     } else {
-                        numFramesThisBatch = (numFrames - frameIndex < batchSize) ? (numFrames - frameIndex) : batchSize;
+                        numFramesThisBatch = (numFrames - frameIndex < batchSize) ? (numFrames - frameIndex)
+                                                                                  : batchSize;
                     }
 
-                    if (inferRequest.second != -1) {
-                        StatusCode code = inferRequest.first.Wait(
+                    if (inferRequest.frameIndex != -1) {
+                        StatusCode code = inferRequest.inferRequest.Wait(
                                 InferenceEngine::IInferRequest::WaitMode::RESULT_READY);
-
                         if (code != StatusCode::OK) {
-                            continue;
+                            if (!useHetero) continue;
+                            if (code != StatusCode::INFER_NOT_STARTED) continue;
                         }
 
-                        if (!FLAGS_o.empty()) {
-                            Blob::Ptr outputBlob = inferRequest.first.GetBlob(cOutputInfo.begin()->first);
-                            std::memcpy(outputFrame,
-                                        outputBlob->buffer(),
-                                        outputBlob->byteSize());
-                            outputFrame += numScoresPerFrame * sizeof(float);
-                        }
-
-                        if (!FLAGS_r.empty()) {
-                            Blob::Ptr outputBlob = inferRequest.first.GetBlob(cOutputInfo.begin()->first);
-                            CompareScores(outputBlob->buffer().as<float *>(),
-                                          &ptrReferenceScores[inferRequest.second *
-                                                              numFrameElementsReference *
-                                                              numBytesPerElementReference],
-                                          &frameError,
-                                          numFramesThisBatch,
-                                          numFrameElementsReference);
-                            UpdateScoreError(&frameError, &totalError);
+                        if (inferRequest.frameIndex >= 0) {
+                            if (!FLAGS_o.empty()) {
+                                outputFrame =
+                                        &ptrScores.front() + numScoresPerFrame * sizeof(float) * (inferRequest.frameIndex);
+                                Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.begin()->first);
+                                auto byteSize = inferRequest.numFramesThisBatch * numScoresPerFrame * sizeof(float);
+                                std::memcpy(outputFrame,
+                                            outputBlob->buffer(),
+                                            byteSize);
+                            }
+
+                            if (!FLAGS_r.empty()) {
+                                Blob::Ptr outputBlob = inferRequest.inferRequest.GetBlob(cOutputInfo.begin()->first);
+                                CompareScores(outputBlob->buffer().as<float *>(),
+                                              &ptrReferenceScores[inferRequest.frameIndex *
+                                                                  numFrameElementsReference *
+                                                                  numBytesPerElementReference],
+                                              &frameError,
+                                              inferRequest.numFramesThisBatch,
+                                              numFrameElementsReference);
+                                UpdateScoreError(&frameError, &totalError);
+                            }
+                            if (FLAGS_pc) {
+                                // retrive new counters
+                                getPerformanceCounters(inferRequest.inferRequest, callPerfMap);
+                                // summarize retrived counters with all previous
+                                sumPerformanceCounters(callPerfMap, utterancePerfMap);
+                            }
                         }
                     }
 
-                    inferRequest.second = -1;
-
                     if (frameIndex == numFrames) {
+                        inferRequest.frameIndex = -1;
                         continue;
                     }
 
-                    Blob::Ptr inputBlob = inferRequest.first.GetBlob(cInputInfo.begin()->first);
+                    Blob::Ptr inputBlob = inferRequest.inferRequest.GetBlob(cInputInfo.begin()->first);
+
                     std::memcpy(inputBlob->buffer(),
                                 inputFrame,
                                 inputBlob->byteSize());
 
-                    inferRequest.first.StartAsync();
+                    auto index = frameIndex - 2 * FLAGS_cw;
+                    inferRequest.inferRequest.StartAsync();
+                    inferRequest.frameIndex = index < 0 ? -2 : index;
+                    inferRequest.numFramesThisBatch = numFramesThisBatch;
 
-                    inferRequest.second = frameIndex;
                     frameIndex += numFramesThisBatch;
-                    inputFrame += sizeof(float) * numFrameElementsInput * numFramesThisBatch;
+
+                    if (FLAGS_cw > 0) {
+                        int i = frameIndex - FLAGS_cw;
+                        if (i > 0 && i < static_cast<int>(numFrames)) {
+                            inputFrame += sizeof(float) * numFrameElementsInput * numFramesThisBatch;
+                        } else if (i >= static_cast<int>(numFrames)) {
+                            inputFrame = &ptrUtterance.front() +
+                                         (numFrames - 1) * sizeof(float) * numFrameElementsInput *
+                                         numFramesThisBatch;
+                        }
+                    } else {
+                        inputFrame += sizeof(float) * numFrameElementsInput * numFramesThisBatch;
+                    }
                     inferRequestFetched |= true;
                 }
 
@@ -779,16 +836,6 @@ int main(int argc, char *argv[]) {
                     std::this_thread::sleep_for(std::chrono::milliseconds(1));
                     continue;
                 }
-
-                if (FLAGS_pc) {
-                    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> callPerfMap;
-                    // retrive new counters
-                    for (auto inferRequest : inferRequests) {
-                        getPerformanceCounters(inferRequest.first, callPerfMap);
-                        // summarize retrived counters with all previous
-                        sumPerformanceCounters(callPerfMap, utterancePerfMap);
-                    }
-                }
             }
             t1 = Time::now();
 
diff --git a/inference-engine/samples/speech_sample/speech_sample.hpp b/inference-engine/samples/speech_sample/speech_sample.hpp
index 37cb88f76..7a033f847 100644
--- a/inference-engine/samples/speech_sample/speech_sample.hpp
+++ b/inference-engine/samples/speech_sample/speech_sample.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -15,8 +15,6 @@
 #include <dirent.h>
 #endif
 
-#define DEFAULT_PATH_P "./lib"
-
 /// @brief message for help argument
 static const char help_message[] = "Print a usage message.";
 
@@ -34,8 +32,11 @@ static const char plugin_message[] = "Plugin name. For example MKLDNNPlugin. If
                                      "the sample will look for this plugin only";
 
 /// @brief message for assigning cnn calculation to device
-static const char target_device_message[] = "Specify the target device to infer on; CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, GNA_SW_EXACT is acceptable. " \
-                                            "Sample will look for a suitable plugin for device specified";
+static const char target_device_message[] = "Specify a target device to infer on. CPU, GPU, GNA_AUTO, GNA_HW, GNA_SW, "
+                                            "GNA_SW_EXACT and HETERO with combination of GNA as the primary device and CPU"
+                                            " as a secondary (e.g. HETERO:GNA,CPU) are supported. The sample will look "
+                                            "for a suitable plugin for device specified.";
+
 /// @brief message for performance counters
 static const char performance_counter_message[] = "Enables per-layer performance report";
 
@@ -74,6 +75,11 @@ static const char batch_size_message[] = "Batch size 1-8 (default 1)";
 static const char infer_num_threads_message[] = "Optional. Number of threads to use for concurrent async" \
 " inference requests on the GNA.";
 
+/// @brief message for context window argument
+static const char context_window_message[] = "Optional. Number of frames for context windows (default is 0). " \
+                                             "Works only with context window networks."
+                                             " If you use the cw flag, then batch size and nthreads arguments are ignored.";
+
 /// \brief Define flag for showing help message <br>
 DEFINE_bool(h, false, help_message);
 
@@ -91,7 +97,7 @@ DEFINE_string(p, "", plugin_message);
 
 /// \brief Define parameter for set path to plugins <br>
 /// Default is ./lib
-DEFINE_string(pp, DEFAULT_PATH_P, plugin_path_message);
+DEFINE_string(pp, "", plugin_path_message);
 
 /// \brief device the target device to infer on <br>
 DEFINE_string(d, "GNA_AUTO", target_device_message);
@@ -133,6 +139,9 @@ DEFINE_int32(bs, 1, batch_size_message);
 /// @brief Number of threads to use for inference on the CPU (also affects Hetero cases)
 DEFINE_int32(nthreads, 1, infer_num_threads_message);
 
+/// @brief Batch size (default 0)
+DEFINE_int32(cw, 0, context_window_message);
+
 /**
  * \brief This function show a help message
  */
@@ -159,5 +168,6 @@ static void showUsage() {
     std::cout << "    -wg \"<path>\"            " << write_gna_model_message << std::endl;
     std::cout << "    -we \"<path>\"            " << write_embedded_model_message << std::endl;
     std::cout << "    -nthreads \"<integer>\"   " << infer_num_threads_message << std::endl;
+    std::cout << "    -cw \"<integer>\"         " << context_window_message << std::endl;
 }
 
diff --git a/inference-engine/samples/style_transfer_sample/CMakeLists.txt b/inference-engine/samples/style_transfer_sample/CMakeLists.txt
index bbc971ece..ac2a1707c 100644
--- a/inference-engine/samples/style_transfer_sample/CMakeLists.txt
+++ b/inference-engine/samples/style_transfer_sample/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "style_transfer_sample")
 
 file (GLOB MAIN_SRC
diff --git a/inference-engine/samples/style_transfer_sample/README.md b/inference-engine/samples/style_transfer_sample/README.md
index 89bd837eb..a192a3ccf 100644
--- a/inference-engine/samples/style_transfer_sample/README.md
+++ b/inference-engine/samples/style_transfer_sample/README.md
@@ -1,7 +1,11 @@
-# Neural Style Transfer Sample
+# Neural Style Transfer C++ Sample
 
-This topic demonstrates how to build and run the Neural Style Transfer sample (NST sample) application, which does
-inference using models of style transfer topology.
+This topic demonstrates how to run the Neural Style Transfer sample application, which performs
+inference of style transfer models.
+
+> **NOTE**: The OpenVINO™ toolkit does not include a pre-trained model to run the Neural Style Transfer sample. A public model from the [Zhaw's Neural Style Transfer repository](https://github.com/zhaw/neural_style) can be used. Read the [Converting a Style Transfer Model from MXNet*](./docs/MO_DG/prepare_model/convert_model/mxnet_specific/Convert_Style_Transfer_From_MXNet.md) topic from the [Model Optimizer Developer Guide](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md) to learn about how to get the trained model and how to convert it to the Inference Engine format (\*.xml + \*.bin).
+
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
 
 ## Running
 
@@ -15,12 +19,12 @@ InferenceEngine:
 style_transfer_sample [OPTION]
 Options:
 
-    -h                      Print a usage message.
-    -i "<path>"             Required. Path to an .bmp image.
+    -h                      Print a usage message
+    -i "<path>"             Required. Path to a .bmp image file or a sequence of paths separated by spaces.
     -m "<path>"             Required. Path to an .xml file with a trained model.
-    -pp "<path>"            Path to a plugin folder.
-    -d "<device>"           Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. Sample will look for a suitable plugin for device specified
-    -ni "<integer>"         Number of iterations (default 1)
+    -pp "<path>"            Path to a plugin folder
+    -d "<device>"           The target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. The sample looks for a suitable plugin for the device specified.
+    -ni "<integer>"         Number of iterations. Default value is 1
     -pc                     Enables per-layer performance report
     -mean_val_r,
     -mean_val_g,
@@ -30,18 +34,16 @@ Options:
 
 Running the application with the empty list of options yields the usage message given above and an error message.
 
-You can do inference on an image using a trained model of NST network on Intel&reg; Processors using the following command:
+To perform inference on an image using a trained model of NST network on Intel® CPUs, use the following command:
 ```sh
 ./style_transfer_sample -i <path_to_image>/cat.bmp -m <path_to_model>/1_decoder_FP32.xml
 ```
 
-> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+## Sample Output
 
-### Outputs
+The application outputs an image (`out1.bmp`) or a sequence of images (`out1.bmp`, ..., `out<N>.bmp`) which are redrawn in style of the style transfer model used for sample.
 
-The application outputs an styled image(s) (<code>out(1).bmp</code>) which were redrawn in style of model which used for infer.
-Style of output images depend on models which use for sample.
-
-## See Also 
+## See Also
 * [Using Inference Engine Samples](./docs/IE_DG/Samples_Overview.md)
-
+* [Model Optimizer](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md)
+* [Model Downloader](https://github.com/opencv/open_model_zoo/tree/2018/model_downloader)
diff --git a/inference-engine/samples/style_transfer_sample/main.cpp b/inference-engine/samples/style_transfer_sample/main.cpp
index 4096335ca..9e943e376 100644
--- a/inference-engine/samples/style_transfer_sample/main.cpp
+++ b/inference-engine/samples/style_transfer_sample/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -67,7 +67,7 @@ int main(int argc, char *argv[]) {
 
         // --------------------------- 1. Load Plugin for inference engine -------------------------------------
         slog::info << "Loading plugin" << slog::endl;
-        InferencePlugin plugin = PluginDispatcher({FLAGS_pp, "../../../lib/intel64", ""}).getPluginByDevice(FLAGS_d);
+        InferencePlugin plugin = PluginDispatcher({FLAGS_pp}).getPluginByDevice(FLAGS_d);
 
         /** Printing plugin version **/
         printPluginVersion(plugin, std::cout);
@@ -213,7 +213,7 @@ int main(int argc, char *argv[]) {
 
         double total = 0.0;
         /** Start inference & calc performance **/
-        for (int iter = 0; iter < FLAGS_ni; ++iter) {
+        for (size_t iter = 0; iter < FLAGS_ni; ++iter) {
             auto t0 = Time::now();
             infer_request.Infer();
             auto t1 = Time::now();
@@ -274,7 +274,10 @@ int main(int argc, char *argv[]) {
                 if (!outFile.is_open()) {
                     throw new std::runtime_error("Cannot create " + out_img_name);
                 }
-                std::vector<unsigned char> data_img2(data_img.begin(), data_img.end());
+                std::vector<unsigned char> data_img2;
+                for (float i : data_img) {
+                    data_img2.push_back(static_cast<unsigned char>(i));
+                }
                 writeOutputBmp(data_img2.data(), H, W, outFile);
                 outFile.close();
                 slog::info << "Image " << out_img_name << " created!" << slog::endl;
diff --git a/inference-engine/samples/style_transfer_sample/style_transfer_sample.h b/inference-engine/samples/style_transfer_sample/style_transfer_sample.h
index 4377f39b7..9af35b196 100644
--- a/inference-engine/samples/style_transfer_sample/style_transfer_sample.h
+++ b/inference-engine/samples/style_transfer_sample/style_transfer_sample.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ static const char help_message[] = "Print a usage message.";
 static const char image_message[] = "Required. Path to an .bmp image.";
 
 /// @brief message for plugin_path argument
-static const char plugin_path_message[] = "Path to a plugin folder.";
+static const char plugin_path_message[] = "Optional. Path to a plugin folder.";
 
 /// @brief message for model argument
 static const char model_message[] = "Required. Path to an .xml file with a trained model.";\
@@ -32,22 +32,22 @@ static const char plugin_message[] = "Plugin name. For example MKLDNNPlugin. If
 "the sample will look for this plugin only";
 
 /// @brief message for assigning cnn calculation to device
-static const char target_device_message[] = "Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. " \
+static const char target_device_message[] = "Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. " \
 "Sample will look for a suitable plugin for device specified";
 
 /// @brief message for performance counters
-static const char performance_counter_message[] = "Enables per-layer performance report";
+static const char performance_counter_message[] = "Optional. Enables per-layer performance report";
 
 /// @brief message for iterations count
-static const char iterations_count_message[] = "Number of iterations (default 1)";
+static const char iterations_count_message[] = "Optional. Number of iterations. Default value is 1";
 
 /// @brief message for user library argument
-static const char custom_cpu_library_message[] = "Required for MKLDNN (CPU)-targeted custom layers." \
-                                                 "Absolute path to a shared library with the kernels impl.";
+static const char custom_cpu_library_message[] = "Optional. Required for CPU custom layers." \
+                                                 "Absolute path to a shared library with the kernels implementations.";
 
 /// @brief message for clDNN custom kernels desc
-static const char custom_cldnn_message[] = "Required for clDNN (GPU)-targeted custom kernels."\
-                                            "Absolute path to the xml file with the kernels desc.";
+static const char custom_cldnn_message[] = "Optional. Required for GPU custom kernels."\
+                                            "Absolute path to the xml file with the kernels descriptions.";
 
 /// @brief message for mean values arguments
 static const char preprocess_data_message[] = "Mean values. Required if the model needs mean values for preprocessing and postprocessing";
@@ -76,7 +76,7 @@ DEFINE_string(d, "CPU", target_device_message);
 DEFINE_bool(pc, false, performance_counter_message);
 
 /// @brief Iterations count (default 1)
-DEFINE_int32(ni, 1, iterations_count_message);
+DEFINE_uint32(ni, 1, iterations_count_message);
 
 /// @brief Absolute path to CPU library with user layers <br>
 /// It is a required parameter
diff --git a/inference-engine/samples/validation_app/CMakeLists.txt b/inference-engine/samples/validation_app/CMakeLists.txt
index 898256ebb..87b337c0d 100644
--- a/inference-engine/samples/validation_app/CMakeLists.txt
+++ b/inference-engine/samples/validation_app/CMakeLists.txt
@@ -1,9 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
-
 set (TARGET_NAME "validation_app")
 
 file (GLOB MAIN_SRC
@@ -22,7 +20,7 @@ source_group("src" FILES ${MAIN_SRC})
 source_group("include" FILES ${MAIN_HEADERS})
 
 # Find OpenCV components if exist
-find_package(OpenCV COMPONENTS imgcodecs QUIET)
+find_package(OpenCV COMPONENTS imgcodecs imgproc QUIET)
 if(NOT(OpenCV_FOUND))
     message(WARNING "OPENCV is disabled or not found, " ${TARGET_NAME} " skipped")
     return()
diff --git a/inference-engine/samples/validation_app/ClassificationProcessor.cpp b/inference-engine/samples/validation_app/ClassificationProcessor.cpp
index 9c52c1e0b..7db4b3219 100644
--- a/inference-engine/samples/validation_app/ClassificationProcessor.cpp
+++ b/inference-engine/samples/validation_app/ClassificationProcessor.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -33,11 +33,12 @@ ClassificationProcessor::ClassificationProcessor(const std::string& flags_m, con
 std::shared_ptr<Processor::InferenceMetrics> ClassificationProcessor::Process(bool stream_output) {
      slog::info << "Collecting labels" << slog::endl;
      ClassificationSetGenerator generator;
-     // try {
-     //     generator.readLabels(labelFileName);
-     // } catch (InferenceEngine::details::InferenceEngineException& ex) {
-     //     slog::warn << "Can't read labels file " << labelFileName << slog::endl;
-     // }
+     try {
+         generator.readLabels(labelFileName);
+     } catch (InferenceEngine::details::InferenceEngineException& ex) {
+         slog::warn << "Can't read labels file " << labelFileName << slog::endl;
+         slog::warn << "Error: " << ex.what() << slog::endl;
+     }
 
      auto validationMap = generator.getValidationMap(imagesPath);
      ImageDecoder decoder;
@@ -59,7 +60,7 @@ std::shared_ptr<Processor::InferenceMetrics> ClassificationProcessor::Process(bo
 
      auto iter = validationMap.begin();
      while (iter != validationMap.end()) {
-         int b = 0;
+         size_t b = 0;
          int filesWatched = 0;
          for (; b < batch && iter != validationMap.end(); b++, iter++, filesWatched++) {
              expected[b] = iter->first;
@@ -68,6 +69,7 @@ std::shared_ptr<Processor::InferenceMetrics> ClassificationProcessor::Process(bo
                  files[b] = iter->second;
              } catch (const InferenceEngineException& iex) {
                  slog::warn << "Can't read file " << iter->second << slog::endl;
+                 slog::warn << "Error: " << iex.what() << slog::endl;
                  // Could be some non-image file in directory
                  b--;
                  continue;
@@ -80,16 +82,16 @@ std::shared_ptr<Processor::InferenceMetrics> ClassificationProcessor::Process(bo
          auto firstOutputData = firstOutputBlob->buffer().as<PrecisionTrait<Precision::FP32>::value_type*>();
          InferenceEngine::TopResults(TOP_COUNT, *firstOutputBlob, results);
 
-         for (int i = 0; i < b; i++) {
+         for (size_t i = 0; i < b; i++) {
              int expc = expected[i];
              if (zeroBackground) expc++;
 
-             bool top1Scored = (results[0 + TOP_COUNT * i] == expc);
+             bool top1Scored = (static_cast<int>(results[0 + TOP_COUNT * i]) == expc);
              dumper << "\"" + files[i] + "\"" << top1Scored;
              if (top1Scored) im.top1Result++;
              for (int j = 0; j < TOP_COUNT; j++) {
                  unsigned classId = results[j + TOP_COUNT * i];
-                 if (classId == expc) {
+                 if (static_cast<int>(classId) == expc) {
                      im.topCountResult++;
                  }
                  dumper << classId << firstOutputData[classId + i * (firstOutputBlob->size() / batch)];
diff --git a/inference-engine/samples/validation_app/ClassificationProcessor.hpp b/inference-engine/samples/validation_app/ClassificationProcessor.hpp
index 1813ac3c4..e7a6c9486 100644
--- a/inference-engine/samples/validation_app/ClassificationProcessor.hpp
+++ b/inference-engine/samples/validation_app/ClassificationProcessor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/validation_app/ObjectDetectionProcessor.cpp b/inference-engine/samples/validation_app/ObjectDetectionProcessor.cpp
index 6109a9626..8e3a23e4f 100644
--- a/inference-engine/samples/validation_app/ObjectDetectionProcessor.cpp
+++ b/inference-engine/samples/validation_app/ObjectDetectionProcessor.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -24,7 +24,7 @@ ObjectDetectionProcessor::ObjectDetectionProcessor(const std::string& flags_m, c
         double threshold, InferenceEngine::InferencePlugin plugin, CsvDumper& dumper,
         const std::string& flags_a, const std::string& classes_list_file, PreprocessingOptions preprocessingOptions, bool scaleProposalToInputSize)
             : Processor(flags_m, flags_d, flags_i, flags_b, plugin, dumper, "Object detection network", preprocessingOptions),
-              threshold(threshold), annotationsPath(flags_a), subdir(subdir), scaleProposalToInputSize(scaleProposalToInputSize) {
+              annotationsPath(flags_a), subdir(subdir), threshold(threshold), scaleProposalToInputSize(scaleProposalToInputSize) {
     std::ifstream clf(classes_list_file);
     if (!clf) {
         throw UserException(1) <<  "Classes list file \"" << classes_list_file << "\" not found or inaccessible";
@@ -65,19 +65,15 @@ shared_ptr<Processor::InferenceMetrics> ObjectDetectionProcessor::Process(bool s
     for (auto& ann : annCollector.annotations()) {
         std::list<DetectedObject> dobList;
         for (auto& obj : ann.objects) {
-            DetectedObject dob(classes[obj.name], obj.bndbox.xmin, obj.bndbox.ymin, obj.bndbox.xmax, obj.bndbox.ymax, 1.0, obj.difficult != 0);
+            DetectedObject dob(classes[obj.name], static_cast<float>(obj.bndbox.xmin),
+                static_cast<float>(obj.bndbox.ymin), static_cast<float>(obj.bndbox.xmax),
+                static_cast<float>(obj.bndbox.ymax), 1.0f, obj.difficult != 0);
             dobList.push_back(dob);
         }
         ImageDescription id(dobList);
         desiredForFiles.insert(std::pair<std::string, ImageDescription>(ann.folder + "/" + (!subdir.empty() ? subdir + "/" : "") + ann.filename, id));
     }
 
-
-    ImageDecoder decoder;
-
-    const int maxProposalCount = outputDims[1];
-    const int objectSize = outputDims[0];
-
     for (auto & item : outInfo) {
         DataPtr outputData = item.second;
         if (!outputData) {
@@ -104,18 +100,17 @@ shared_ptr<Processor::InferenceMetrics> ObjectDetectionProcessor::Process(bool s
 
     while (iter != annCollector.annotations().end()) {
         std::vector<std::string> files;
-        int b = 0;
+        size_t b = 0;
 
         int filesWatched = 0;
         for (; b < batch && iter != annCollector.annotations().end(); b++, iter++, filesWatched++) {
             expected[b] = *iter;
             string filename = iter->folder + "/" + (!subdir.empty() ? subdir + "/" : "") + iter->filename;
             try {
-                Size orig_size = decoder.insertIntoBlob(std::string(imagesPath) + "/" + filename, b, *firstInputBlob, preprocessingOptions);
                 float scale_x, scale_y;
 
-                scale_x = 1.0 / iter->size.width;  // orig_size.width;
-                scale_y = 1.0 / iter->size.height;  // orig_size.height;
+                scale_x = 1.0f / iter->size.width;  // orig_size.width;
+                scale_y = 1.0f / iter->size.height;  // orig_size.height;
 
                 if (scaleProposalToInputSize) {
                     scale_x *= firstInputBlob->dims()[0];
@@ -128,6 +123,7 @@ shared_ptr<Processor::InferenceMetrics> ObjectDetectionProcessor::Process(bool s
                 files.push_back(filename);
             } catch (const InferenceEngineException& iex) {
                 slog::warn << "Can't read file " << this->imagesPath + "/" + filename << slog::endl;
+                slog::warn << "Error: " << iex.what() << slog::endl;
                 // Could be some non-image file in directory
                 b--;
                 continue;
@@ -135,9 +131,6 @@ shared_ptr<Processor::InferenceMetrics> ObjectDetectionProcessor::Process(bool s
         }
 
         if (files.size() == batch) {
-            InferenceEngine::StatusCode sts;
-            InferenceEngine::ResponseDesc dsc;
-
             // Infer model
             Infer(progress, filesWatched, im);
 
@@ -146,7 +139,7 @@ shared_ptr<Processor::InferenceMetrics> ObjectDetectionProcessor::Process(bool s
 
             // Calculating similarity
             //
-            for (int b = 0; b < files.size(); b++) {
+            for (size_t b = 0; b < files.size(); b++) {
                 ImageDescription result(detectedObjects[files[b]]);
                 im.apc.consumeImage(result, scaledDesiredForFiles.at(files[b]));
             }
diff --git a/inference-engine/samples/validation_app/ObjectDetectionProcessor.hpp b/inference-engine/samples/validation_app/ObjectDetectionProcessor.hpp
index 0bb223124..7a277105f 100644
--- a/inference-engine/samples/validation_app/ObjectDetectionProcessor.hpp
+++ b/inference-engine/samples/validation_app/ObjectDetectionProcessor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/validation_app/PreprocessingOptions.hpp b/inference-engine/samples/validation_app/PreprocessingOptions.hpp
index 00893088c..3e5da5e05 100644
--- a/inference-engine/samples/validation_app/PreprocessingOptions.hpp
+++ b/inference-engine/samples/validation_app/PreprocessingOptions.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/validation_app/Processor.cpp b/inference-engine/samples/validation_app/Processor.cpp
index d352331c0..cf8e73b9e 100644
--- a/inference-engine/samples/validation_app/Processor.cpp
+++ b/inference-engine/samples/validation_app/Processor.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -14,8 +14,8 @@ using namespace InferenceEngine;
 Processor::Processor(const std::string& flags_m, const std::string& flags_d, const std::string& flags_i, int flags_b,
         InferencePlugin plugin, CsvDumper& dumper, const std::string& approach, PreprocessingOptions preprocessingOptions)
 
-    : targetDevice(flags_d), modelFileName(flags_m), imagesPath(flags_i), batch(flags_b),
-      plugin(plugin), dumper(dumper), approach(approach), preprocessingOptions(preprocessingOptions) {
+    : modelFileName(flags_m), targetDevice(flags_d), imagesPath(flags_i), batch(flags_b),
+      preprocessingOptions(preprocessingOptions), dumper(dumper), plugin(plugin), approach(approach) {
 
     // --------------------Load network (Generated xml/bin files)-------------------------------------------
     slog::info << "Loading network files" << slog::endl;
diff --git a/inference-engine/samples/validation_app/Processor.hpp b/inference-engine/samples/validation_app/Processor.hpp
index 49d52630a..22ce3b613 100644
--- a/inference-engine/samples/validation_app/Processor.hpp
+++ b/inference-engine/samples/validation_app/Processor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -13,9 +13,9 @@
 
 #include "inference_engine.hpp"
 
-#include "csv_dumper.hpp"
+#include "samples/csv_dumper.hpp"
 #include "image_decoder.hpp"
-#include "console_progress.hpp"
+#include "samples/console_progress.hpp"
 
 using namespace std;
 
@@ -36,7 +36,7 @@ protected:
     std::string modelFileName;
     std::string targetDevice;
     std::string imagesPath;
-    int batch;
+    size_t batch;
     InferenceEngine::InferRequest inferRequest;
     InferenceEngine::InputsDataMap inputInfo;
     InferenceEngine::OutputsDataMap outInfo;
diff --git a/inference-engine/samples/validation_app/README.md b/inference-engine/samples/validation_app/README.md
index 4c8af4701..11c9ac7a7 100644
--- a/inference-engine/samples/validation_app/README.md
+++ b/inference-engine/samples/validation_app/README.md
@@ -15,6 +15,8 @@ Possible use cases of the tool:
 * Use Validation Application as another sample: although the code is much more complex than in classification and object
   detection samples, the source code is open and can be re-used.
 
+> **NOTE**: By default, Inference Engine samples and demos expect input with BGR channels order. If you trained your model to work with RGB order, you need to manually rearrange the default channels order in the sample or demo application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to **When to Specify Input Shapes** section of [Converting a Model Using General Conversion Parameters](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md).
+
 ## Validation Application Options
 
 The Validation Application provides the following command-line interface (CLI):
@@ -31,8 +33,8 @@ Available options:
     -m <path>                 Required. Path to an .xml file with a trained model
     -lbl <path>               Labels file path. The labels file contains names of the dataset classes
     -l <absolute_path>        Required for CPU custom layers. Absolute path to a shared library with the kernel implementations
-    -c <absolute_path>        Required for GPU custom kernels.Absolute path to an .xml file with the kernel descriptions.
-    -d <device>               Target device to infer on: CPU (default), GPU, FPGA, or MYRIAD. The application looks for a suitable plugin for the specified device.
+    -c <absolute_path>        Required for GPU custom kernels. Absolute path to an .xml file with the kernel descriptions.
+    -d <device>               Target device to infer on: CPU (default), GPU, FPGA, HDDL or MYRIAD. The application looks for a suitable plugin for the specified device.
     -b N                      Batch size value. If not specified, the batch size value is taken from IR
     -ppType <type>            Preprocessing type. Options: "None", "Resize", "ResizeCrop"
     -ppSize N                 Preprocessing size (used with ppType="ResizeCrop")
@@ -57,6 +59,8 @@ The tool options are divided into two categories:
 
 ## General Workflow
 
+> **NOTE**: By default, Inference Engine samples expect input images to have BGR channels order. If you trained you model to work with images in RGB order, you need to manually rearrange the default channels order in the sample application or reconvert your model using the Model Optimizer tool with `--reverse_input_channels` argument specified. For more information about the argument, refer to [When to Specify Input Shapes](./docs/MO_DG/prepare_model/convert_model/Converting_Model_General.md#when_to_reverse_input_channels).
+
 When executed, the Validation Application perform the following steps:
 
 1. Loads a model to an Inference Engine plugin
@@ -64,7 +68,6 @@ When executed, the Validation Application perform the following steps:
     - if you specified a directory, the application tries to load labels first. To do this, it searches for the file
       with the same name as a model, but with `.labels` extension (instead of `.xml`).
       Then it searches for the specified folder, detects its sub-folders named as known labels, and adds all images from these sub-folders to the validation set. When there are no such sub-folders, validation set is considered empty.
-
     - if you specified a `.txt` file, the application reads this file expecting every line to be in the correct format.
       For more information about the format, refer to the <a href="#preparing">Preparing the Dataset</a> section below.
 
@@ -195,6 +198,8 @@ Save this file as `VOC_SSD_Classes.txt`.
 
 ## Validate Classification Models
 
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
 Once you have prepared the dataset (refer to the <a href="#preparing">Preparing the Dataset</a> section above),
 run the following command to infer a classification model on the selected dataset:
 ```bash
@@ -206,6 +211,8 @@ run the following command to infer a classification model on the selected datase
 > **NOTE**: Validation Application was validated with SSD CNN. Any network that can be inferred by the Inference Engine
 > and has the same input and output format as one of these should be supported as well.
 
+> **NOTE**: Before running the sample with a trained model, make sure the model is converted to the Inference Engine format (\*.xml + \*.bin) using the [Model Optimizer tool](./docs/MO_DG/Deep_Learning_Model_Optimizer_DevGuide.md).
+
 Once you have prepared the dataset (refer to the <a href="#preparing">Preparing the Dataset</a> section above),
 run the following command to infer an Object Detection model on the selected dataset:
 ```bash
diff --git a/inference-engine/samples/validation_app/SSDObjectDetectionProcessor.hpp b/inference-engine/samples/validation_app/SSDObjectDetectionProcessor.hpp
index 52f3f6b1e..a8dc30e0a 100644
--- a/inference-engine/samples/validation_app/SSDObjectDetectionProcessor.hpp
+++ b/inference-engine/samples/validation_app/SSDObjectDetectionProcessor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -26,16 +26,16 @@ protected:
         const auto detectionOutArray = inferRequest.GetBlob(firstOutputName);
         const float *box = detectionOutArray->buffer().as<float*>();
 
-        const int maxProposalCount = outputDims[1];
-        const int objectSize = outputDims[0];
+        const size_t maxProposalCount = outputDims[1];
+        const size_t objectSize = outputDims[0];
 
-        for (int b = 0; b < batch; b++) {
+        for (size_t b = 0; b < batch; b++) {
             string fn = files[b];
             std::list<DetectedObject> dr = std::list<DetectedObject>();
             detectedObjects.insert(std::pair<std::string, std::list<DetectedObject>>(fn, dr));
         }
 
-        for (int i = 0; i < maxProposalCount; i++) {
+        for (size_t i = 0; i < maxProposalCount; i++) {
             float image_id = box[i * objectSize + 0];
             float label = box[i * objectSize + 1];
             float confidence = box[i * objectSize + 2];
@@ -48,7 +48,8 @@ protected:
                 break;  // Finish
             }
 
-            detectedObjects[files[image_id]].push_back(DetectedObject(label, xmin, ymin, xmax, ymax, confidence));
+            detectedObjects[files[static_cast<size_t>(image_id)]].push_back(
+                DetectedObject(static_cast<int>(label), xmin, ymin, xmax, ymax, confidence));
         }
 
         return detectedObjects;
diff --git a/inference-engine/samples/validation_app/VOCAnnotationParser.cpp b/inference-engine/samples/validation_app/VOCAnnotationParser.cpp
index 94693db4f..68e265601 100644
--- a/inference-engine/samples/validation_app/VOCAnnotationParser.cpp
+++ b/inference-engine/samples/validation_app/VOCAnnotationParser.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/validation_app/VOCAnnotationParser.hpp b/inference-engine/samples/validation_app/VOCAnnotationParser.hpp
index b23363a1c..a9d2d89ac 100644
--- a/inference-engine/samples/validation_app/VOCAnnotationParser.hpp
+++ b/inference-engine/samples/validation_app/VOCAnnotationParser.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/validation_app/YOLOObjectDetectionProcessor.hpp b/inference-engine/samples/validation_app/YOLOObjectDetectionProcessor.hpp
index fe9dad91b..816f96998 100644
--- a/inference-engine/samples/validation_app/YOLOObjectDetectionProcessor.hpp
+++ b/inference-engine/samples/validation_app/YOLOObjectDetectionProcessor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -46,10 +46,6 @@ private:
             int row = grid / S;
             int col = grid % S;
             for (int b = 0; b < B; b++) {
-                int index = grid * B + b;
-                int p_index = SS * C + grid * B + b;
-                float scale = net_out[p_index];
-                int box_index = SS * (C + B) + (grid * B + b) * 4;
                 int objectType = class_num;
 
                 float conf = confs[(grid * B + b)];
@@ -57,7 +53,6 @@ private:
                 float yc = (cords[(grid * B + b) * 4 + 1] + row) / S;
                 float w = pow(cords[(grid * B + b) * 4 + 2], 2);
                 float h = pow(cords[(grid * B + b) * 4 + 3], 2);
-                int class_index = grid * C;
                 float prob = probs[grid * C + class_num] * conf;
 
                 DetectedObject bx(objectType, xc - w / 2, yc - h / 2, xc + w / 2,
@@ -77,12 +72,12 @@ private:
 
         // Filtering out overlapping boxes
         std::vector<bool> overlapped(boxes.size(), false);
-        for (int i = 0; i < boxes.size(); i++) {
+        for (size_t i = 0; i < boxes.size(); i++) {
             if (overlapped[i])
                 continue;
 
             DetectedObject box_i = boxes[i];
-            for (int j = i + 1; j < boxes.size(); j++) {
+            for (size_t j = i + 1; j < boxes.size(); j++) {
                 DetectedObject box_j = boxes[j];
                 if (DetectedObject::ioU(box_i, box_j) >= 0.4) {
                     overlapped[j] = true;
@@ -90,7 +85,7 @@ private:
             }
         }
 
-        for (int i = 0; i < boxes.size(); i++) {
+        for (size_t i = 0; i < boxes.size(); i++) {
             if (boxes[i].prob > 0.0f) {
                 boxes_result.push_back(boxes[i]);
             }
diff --git a/inference-engine/samples/validation_app/classification_set_generator.cpp b/inference-engine/samples/validation_app/classification_set_generator.cpp
index 2ff731d06..051474e3d 100644
--- a/inference-engine/samples/validation_app/classification_set_generator.cpp
+++ b/inference-engine/samples/validation_app/classification_set_generator.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -112,7 +112,8 @@ std::vector<std::pair<int, std::string>> ClassificationSetGenerator::validationM
         try {
             classId = std::stoi(line.substr(pos + 1));
         } catch (const std::invalid_argument& e) {
-            THROW_USER_EXCEPTION(1) << "Invalid class id specified at line " << lineNumber << ":\n> " << line;
+            THROW_USER_EXCEPTION(1) << "Invalid class id specified at line " << lineNumber << ":\n> " << line
+                                    << " Error: " << e.what();
         }
         imgPath = line.substr(0, pos);
         validationMap.push_back({ classId, dir + imgPath });
diff --git a/inference-engine/samples/validation_app/classification_set_generator.hpp b/inference-engine/samples/validation_app/classification_set_generator.hpp
index 252717ebd..764364aa3 100644
--- a/inference-engine/samples/validation_app/classification_set_generator.hpp
+++ b/inference-engine/samples/validation_app/classification_set_generator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/validation_app/image_decoder.cpp b/inference-engine/samples/validation_app/image_decoder.cpp
index 7ca0894c4..b977b63d3 100644
--- a/inference-engine/samples/validation_app/image_decoder.cpp
+++ b/inference-engine/samples/validation_app/image_decoder.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -40,7 +40,7 @@ cv::Size addToBlob(std::string name, int batch_pos, Blob& blob, PreprocessingOpt
 
     // TODO This is a dirty hack to support VOC2007 (where no file extension is put into annotation).
     //      Rewrite.
-    if (name.find('.') == -1) tryName = name + ".JPEG";
+    if (name.find('.') == std::string::npos) tryName = name + ".JPEG";
 
     orig_image = imread(tryName, loadMode);
 
@@ -70,7 +70,7 @@ cv::Size addToBlob(std::string name, int batch_pos, Blob& blob, PreprocessingOpt
         THROW_IE_EXCEPTION << "Unsupported ResizeCropPolicy value";
     }
 
-    float scaleFactor = preprocessingOptions.scaleValuesTo01 ? 255.0 : 1.0;
+    float scaleFactor = preprocessingOptions.scaleValuesTo01 ? 255.0f : 1.0f;
 
     for (int c = 0; c < channels; c++) {
         for (int h = 0; h < height; h++) {
@@ -106,7 +106,7 @@ std::map<std::string, cv::Size> convertToBlob(std::vector<std::string> names, in
     }
 
     std::map<std::string, Size> res;
-    for (int b = 0; b < names.size(); b++) {
+    for (size_t b = 0; b < names.size(); b++) {
         std::string name = names[b];
         Size orig_size = add_func(name, batch_pos + b, blob, preprocessingOptions);
         res.insert(std::pair<std::string, Size>(name, orig_size));
diff --git a/inference-engine/samples/validation_app/image_decoder.hpp b/inference-engine/samples/validation_app/image_decoder.hpp
index 35cca5aae..922956e8b 100644
--- a/inference-engine/samples/validation_app/image_decoder.hpp
+++ b/inference-engine/samples/validation_app/image_decoder.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/validation_app/main.cpp b/inference-engine/samples/validation_app/main.cpp
index a2c9446a0..23137dea6 100644
--- a/inference-engine/samples/validation_app/main.cpp
+++ b/inference-engine/samples/validation_app/main.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -35,8 +35,6 @@ using namespace InferenceEngine;
 
 using InferenceEngine::details::InferenceEngineException;
 
-#define DEFAULT_PATH_P "./lib"
-
 /// @brief Message for help argument
 static const char help_message[] = "Print a help message";
 /// @brief Message for images argument
@@ -53,7 +51,7 @@ static const char model_message[] = "Required. Path to an .xml file with a train
 static const char plugin_message[] = "Plugin name. For example, CPU. If this parameter is passed, "
                                      "the sample looks for a specified plugin only.";
 /// @brief Message for assigning cnn calculation to device
-static const char target_device_message[] = "Target device to infer on: CPU (default), GPU, FPGA, or MYRIAD."
+static const char target_device_message[] = "Target device to infer on: CPU (default), GPU, FPGA, HDDL or MYRIAD."
                                             " The application looks for a suitable plugin for the specified device.";
 /// @brief Message for label argument
 static const char label_message[] = "Path to a file with labels for a model";
@@ -123,7 +121,7 @@ DEFINE_string(p, "", plugin_message);
 DEFINE_string(OCl, "", label_message);
 /// @brief Define parameter for a path to plugins <br>
 /// Default is ./lib
-DEFINE_string(pp, DEFAULT_PATH_P, plugin_path_message);
+DEFINE_string(pp, "", plugin_path_message);
 /// @brief Define parameter for a target device to infer on <br>
 DEFINE_string(d, "CPU", target_device_message);
 /// @brief Define parameter for batch size <br>
@@ -267,7 +265,7 @@ int main(int argc, char *argv[]) {
         // ---------------------Loading plugin for Inference Engine------------------------------------------------
         slog::info << "Loading plugin" << slog::endl;
         /** Loading the library with extensions if provided**/
-        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp, "../../../lib/intel64", "" }).getPluginByDevice(FLAGS_d);
+        InferencePlugin plugin = PluginDispatcher({ FLAGS_pp }).getPluginByDevice(FLAGS_d);
 
         /** Loading default extensions **/
         if (FLAGS_d.find("CPU") != std::string::npos) {
@@ -358,7 +356,6 @@ int main(int argc, char *argv[]) {
             showUsage();
             return ex.list().begin()->exitCode();
         } else {
-            const char* s = ex.what();
             slog::err << "Input problems: \n" << ex.what() << slog::endl;
             showUsage();
             return ex.list().begin()->exitCode();
diff --git a/inference-engine/samples/validation_app/pugixml/pugiconfig.hpp b/inference-engine/samples/validation_app/pugixml/pugiconfig.hpp
index 0e976cfab..085d6c67b 100644
--- a/inference-engine/samples/validation_app/pugixml/pugiconfig.hpp
+++ b/inference-engine/samples/validation_app/pugixml/pugiconfig.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/validation_app/pugixml/pugixml.cpp b/inference-engine/samples/validation_app/pugixml/pugixml.cpp
index d4db9c48d..aa18656d7 100644
--- a/inference-engine/samples/validation_app/pugixml/pugixml.cpp
+++ b/inference-engine/samples/validation_app/pugixml/pugixml.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/validation_app/pugixml/pugixml.hpp b/inference-engine/samples/validation_app/pugixml/pugixml.hpp
index 9f609d122..fd3067fc8 100644
--- a/inference-engine/samples/validation_app/pugixml/pugixml.hpp
+++ b/inference-engine/samples/validation_app/pugixml/pugixml.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/samples/validation_app/user_exception.hpp b/inference-engine/samples/validation_app/user_exception.hpp
index bdeda3cb9..dd3f43d11 100644
--- a/inference-engine/samples/validation_app/user_exception.hpp
+++ b/inference-engine/samples/validation_app/user_exception.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -85,7 +85,7 @@ public:
             ss << _list.back().what();
         } else {
             auto iter = _list.begin();
-            for (int i = 0; i < _list.size() - 1; i++) {
+            for (size_t i = 0; i < _list.size() - 1; i++) {
                 ss << "\t* " << (*iter++).what() << std::endl;
             }
             ss << "\t* " << _list.back().what();
diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt
index cabd78b10..aad2b5b1c 100644
--- a/inference-engine/src/CMakeLists.txt
+++ b/inference-engine/src/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -35,3 +35,6 @@ endfunction()
 
 add_subdirectory(extension EXCLUDE_FROM_ALL)
 add_library(IE::ie_cpu_extension ALIAS ie_cpu_extension)
+
+file(GLOB_RECURSE EXTENSION_SOURCES extension/*.cpp extension/*.hpp extension/*.h)
+add_cpplint_target(ie_cpu_extension_cpplint FOR_SOURCES ${EXTENSION_SOURCES})
diff --git a/inference-engine/src/cldnn_engine/CMakeLists.txt b/inference-engine/src/cldnn_engine/CMakeLists.txt
index 372bae86c..a2d81c326 100644
--- a/inference-engine/src/cldnn_engine/CMakeLists.txt
+++ b/inference-engine/src/cldnn_engine/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -67,10 +67,12 @@ set(CLDNN_LIBRARY clDNN_shlib)
 add_library(${TARGET_NAME} SHARED
     ${MAIN_SRC}
     ${LIBRARY_HEADERS})
-target_link_libraries(${TARGET_NAME} pugixml ${INTEL_ITT_LIBS} inference_engine ${CLDNN_LIBRARY})
+target_link_libraries(${TARGET_NAME} ${INTEL_ITT_LIBS} inference_engine ${CLDNN_LIBRARY})
 
 set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
 
 #copy default global xml file describing the custom kernels and the *.cl files
 add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
     COMMAND "${CMAKE_COMMAND}" -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/cldnn_global_custom_kernels $<TARGET_FILE_DIR:${TARGET_NAME}>/cldnn_global_custom_kernels)
+
+add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
diff --git a/inference-engine/src/cldnn_engine/cldnn_custom_layer.cpp b/inference-engine/src/cldnn_engine/cldnn_custom_layer.cpp
index a247d64aa..32fb41486 100644
--- a/inference-engine/src/cldnn_engine/cldnn_custom_layer.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_custom_layer.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -9,6 +9,11 @@
 #include <fstream>
 #include <streambuf>
 #include <climits>
+
+#ifdef _WIN32
+# include <windows.h>
+#endif
+
 #include "simple_math.h"
 
 using namespace InferenceEngine;
diff --git a/inference-engine/src/cldnn_engine/cldnn_custom_layer.h b/inference-engine/src/cldnn_engine/cldnn_custom_layer.h
index 89a802ffe..e948f2984 100644
--- a/inference-engine/src/cldnn_engine/cldnn_custom_layer.h
+++ b/inference-engine/src/cldnn_engine/cldnn_custom_layer.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -57,8 +57,8 @@ public:
     const int InputDimSourceIndex() { return m_wgDimInputIdx; }
 
 protected:
-    CLDNNCustomLayer() {}
-    explicit CLDNNCustomLayer(const std::string dirname) : m_configDir(dirname) {}
+    CLDNNCustomLayer() : m_wgDimInputIdx(0) {}
+    explicit CLDNNCustomLayer(const std::string dirname) : m_configDir(dirname), m_wgDimInputIdx(0) {}
 
     bool Error() const { return m_ErrorMessage.length() > 0; }
     void LoadSingleLayer(const pugi::xml_node& node);
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
index 4b79fe6ec..fab02d34c 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -116,12 +116,8 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(InferenceEngine::
 INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, ResponseDesc *resp) noexcept {
     try {
         plugin = make_ie_compatible_plugin(
-                {1, 5,
-#ifdef CLDNN_VERSION
-                 CLDNN_VERSION,
-#else
+                {1, 6,
                  CI_BUILD_NUMBER,
-#endif
                  "clDNNPlugin"}, std::make_shared<clDNNEngine>());
         return OK;
     }
diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.h b/inference-engine/src/cldnn_engine/cldnn_engine.h
index 6de94cfb9..6241a9463 100644
--- a/inference-engine/src/cldnn_engine/cldnn_engine.h
+++ b/inference-engine/src/cldnn_engine/cldnn_engine.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/ctc_greedy_decoder.cl b/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/ctc_greedy_decoder.cl
index 40a71078e..0467adc35 100644
--- a/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/ctc_greedy_decoder.cl
+++ b/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/ctc_greedy_decoder.cl
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/grn.cl b/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/grn.cl
index 554b8b608..1f370435a 100644
--- a/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/grn.cl
+++ b/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/grn.cl
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/interp.cl b/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/interp.cl
index ef41d13c7..649667d56 100644
--- a/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/interp.cl
+++ b/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/interp.cl
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/prior_box_clustered.cl b/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/prior_box_clustered.cl
index a61f02132..f1fe258ea 100644
--- a/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/prior_box_clustered.cl
+++ b/inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/prior_box_clustered.cl
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.cpp b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
index fe61da151..9f8f58bac 100644
--- a/inference-engine/src/cldnn_engine/cldnn_graph.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -42,6 +42,11 @@
 #include <CPP/tile.hpp>
 #include <CPP/border.hpp>
 #include <CPP/lstm.hpp>
+#include <CPP/gather.hpp>
+#include <CPP/depth_to_space.hpp>
+#include <CPP/shuffle_channels.hpp>
+#include <CPP/strided_slice.hpp>
+#include <CPP/reverse_sequence.hpp>
 #include <chrono>
 #include <cmath>
 #include <algorithm>
@@ -52,7 +57,6 @@
 #include <graph_tools.hpp>
 #include <ie_layers_internal.hpp>
 #include <net_pass.h>
-#include <ie_layers_prv.h>
 #include "cldnn_infer_request.h"
 #include <cpp_interfaces/ie_executor_manager.hpp>
 #include "details/caseless.hpp"
@@ -99,9 +103,6 @@ static void ValidateLayer(const InferenceEngine::CNNLayerPtr& layer, unsigned in
 }
 
 static void ValidateEltwiseLayer(const InferenceEngine::CNNLayerPtr& layer) {
-    if (layer->insData.size() < 2) {
-        THROW_CLDNN_EXCEPTION("Invalid number of inputs for layer: " << layer->name << ". Eltwise layer should take at least 2 inputs");
-    }
     if (layer->_fusedWith) {
         THROW_CLDNN_EXCEPTION("Unsupported fuse in layer: " << layer->name << " with: " << layer->_fusedWith->name);
     }
@@ -287,7 +288,6 @@ bool CLDNNGraph::CanProcessDynBatch(InferenceEngine::ICNNNetwork &network) const
 
 CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, const Config& config, int max_batch) : m_config(config),
     m_defaultFormat(cldnn::format::bfyx),
-    m_networkPrecision(cldnn::data_types::f32),
     m_curBatch(-1) {
     m_env.engine = std::make_shared<cldnn::engine>(cldnn::engine_configuration(
         (config.useProfiling || (config.tuningConfig.mode != cldnn::tuning_mode::tuning_disabled)),
@@ -309,7 +309,21 @@ CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, const Config& conf
         _taskExecutor = executorManager->getExecutor(TargetDeviceInfo::name(TargetDevice::eGPU));
     }
 
-    bool res = !NetPass::CombineLSTMSeq(network) ? NetPass::UnrollTI(network) : true;
+    bool res = !NetPass::CombineRNNSeq(network) ? NetPass::UnrollTI(network) : true;
+    res &= NetPass::UnrollRNN_if(network, [] (RNNCellBase rnn) -> bool {
+        if (rnn.clip != 0.0f)
+            return true;
+        if (rnn.type == "GRUCell" ||
+            rnn.type == "GRUSequence" ||
+            rnn.type == "RNNCell" ||
+            rnn.type == "RNNSequence")
+            return true;
+        if (!(rnn.type == "LSTMCell" || rnn.type == "LSTMSequence") ||
+            rnn.activations == std::vector<std::string>{"sigmoid", "tanh", "tanh"})
+            return false;
+        return true;
+    });
+
     if (!res)
         THROW_CLDNN_EXCEPTION("Plugin doesn't support Tensor Iterator in pure form. "
                               "No one TI optimization pattern was not applied successfully");
@@ -372,6 +386,14 @@ CLDNNGraph::CLDNNGraph(InferenceEngine::ICNNNetwork& network, const Config& conf
     m_env.debugOptions.ClearTimedEvents();
 }
 
+inline std::string layer_type_name_ID(InferenceEngine::CNNLayer* layer) {
+    return layer->type + ":" + layer->name;
+}
+
+inline std::string layer_type_name_ID(InferenceEngine::CNNLayerPtr layer) {
+    return layer_type_name_ID(layer.get());
+}
+
 std::vector<InferenceEngine::CNNLayerPtr> CLDNNGraph::GetNextLayers(const InferenceEngine::DataPtr data) {
     std::vector<InferenceEngine::CNNLayerPtr> nextLayers;
     if (data == nullptr) {
@@ -417,7 +439,6 @@ InferenceEngine::CNNLayerPtr CLDNNGraph::GetNextSingleLayer(const InferenceEngin
 
 void CLDNNGraph::InitFormat(InferenceEngine::ICNNNetwork &network) {
     m_defaultFormat    = FormatFromLayout(InferenceEngine::Layout::NCHW);
-    m_networkPrecision = DataTypeFromPrecision(network.getPrecision());
 }
 
 void CLDNNGraph::CompileNetwork() {
@@ -451,29 +472,30 @@ void CLDNNGraph::Load(InferenceEngine::ICNNNetwork &network) {
         THROW_CLDNN_EXCEPTION("No inputs detected.");
     }
 
+    using LayerVect = std::vector<InferenceEngine::CNNLayerPtr>;
     std::list<InferenceEngine::CNNLayerPtr> layersToHandle;
-    for (auto input : networkInputs) {
-        IE_ASSERT(input.first.compare(input.second->name()) == 0);
-        AddInputPrimitive(input.second);
-
-        auto consumers = input.second->getInputData()->getInputTo();
 
-        // collect next layers to process
-        for (auto l : consumers) {
-            layersToHandle.push_back(l.second);
+    auto push_if = [&](const LayerVect& clist) {
+        for (auto& l : clist) {
+            if ( (std::find_if( layersToHandle.begin(),
+                            layersToHandle.end(),
+                            [&](const CNNLayerPtr& x) { return layer_type_name_ID(x) == layer_type_name_ID(l); } )) == layersToHandle.end() )
+                layersToHandle.push_back(l);
         }
-    }
+    };
 
     auto allInputs = CNNNetGetAllInputLayers(network);
     for (auto input : allInputs) {
         if (LayerTypeFromStr(input->type) == ConstantBlob) {
             AddConstantBlobInput(input);
-
-            // collect next layers to process
-            for (auto nl : GetNextLayers(input)) {
-                layersToHandle.push_back(nl);
+        } else {
+            auto iter = networkInputs.find(input->name);    // regular input
+            if (iter != networkInputs.end()) {
+                AddInputPrimitive(iter->second, input->precision);
             }
         }
+        // collect next layers to process
+        push_if(GetNextLayers(input));
     }
 
     // 2. traverse layers
@@ -485,7 +507,7 @@ void CLDNNGraph::Load(InferenceEngine::ICNNNetwork &network) {
         }
         InferenceEngine::CNNLayerPtr currLayer = layersToHandle.front();
         layersToHandle.pop_front();
-        auto layerName = currLayer->name;
+        auto layerName = layer_type_name_ID(currLayer);
 
         if (m_env.primitiveIDs.find(layerName) != m_env.primitiveIDs.end()) {
             infLoopProtection = 0;
@@ -496,7 +518,7 @@ void CLDNNGraph::Load(InferenceEngine::ICNNNetwork &network) {
         try {
             GetPrevLayersPrimitives(currLayer);
         } catch (std::exception) {
-                missingInput = true;
+            missingInput = true;
         }
 
         if (missingInput) {  // some inputs aren't created yet
@@ -505,13 +527,10 @@ void CLDNNGraph::Load(InferenceEngine::ICNNNetwork &network) {
         }
 
         infLoopProtection = 0;  // found a layer with all inputs already existing
-        IE_ASSERT(_networkPrecision == currLayer->precision);
         CreateSingleLayerPrimitive(currLayer);  // currLayer will be advanced if layer was skipped or merged
-        m_env.prevPrimitiveIDs[currLayer->name] = GetPrevLayersPrimitives(currLayer);
+        m_env.prevPrimitiveIDs[layerName] = GetPrevLayersPrimitives(currLayer);
 
-        for (auto nl : GetNextLayers(currLayer)) {
-            layersToHandle.push_back(nl);
-        }
+        push_if(GetNextLayers(currLayer));
     }
 
     // 3. Handle output reordering
@@ -536,6 +555,8 @@ CLDNNGraph::LayerType CLDNNGraph::LayerTypeFromStr(const std::string &str) {
         { "TanH" , TanH },
         { "ELU" , ELU },
         { "Activation" , Activation },
+        { "Exp" , Exp },
+        { "Not" , Not },
         { "Norm" , LRN },
         { "Pooling" , Pooling },
         { "FullyConnected" , FullyConnected },
@@ -573,7 +594,13 @@ CLDNNGraph::LayerType CLDNNGraph::LayerTypeFromStr(const std::string &str) {
         { "Tile" , Tile },
         { "Pad" , Pad },
         { "LSTMCell" , LSTMCell },
-        { "RNN" , RNN },
+        { "LSTMSequence" , RNN },
+        { "RNNSequence" , RNN },
+        { "Gather" , Gather },
+        { "DepthToSpace" , DepthToSpace },
+        { "ShuffleChannels" , ShuffleChannels },
+        { "StridedSlice" , StridedSlice },
+        { "ReverseSequence" , ReverseSequence }
     };
     auto it = LayerNameToType.find(str);
     if (it != LayerNameToType.end())
@@ -604,6 +631,32 @@ cldnn::eltwise_mode CLDNNGraph::EltwiseModeFromIEEltwise(InferenceEngine::Eltwis
             return cldnn::eltwise_mode::prod;
         case InferenceEngine::EltwiseLayer::Max:
             return cldnn::eltwise_mode::max;
+        case InferenceEngine::EltwiseLayer::Sub:
+            return cldnn::eltwise_mode::sub;
+        case InferenceEngine::EltwiseLayer::Min:
+            return cldnn::eltwise_mode::min;
+        case InferenceEngine::EltwiseLayer::Div:
+            return cldnn::eltwise_mode::div;
+        case InferenceEngine::EltwiseLayer::Squared_diff:
+            return cldnn::eltwise_mode::squared_diff;
+        case InferenceEngine::EltwiseLayer::Equal:
+            return cldnn::eltwise_mode::eq;
+        case InferenceEngine::EltwiseLayer::Not_equal:
+            return cldnn::eltwise_mode::ne;
+        case InferenceEngine::EltwiseLayer::Less:
+            return cldnn::eltwise_mode::lt;
+        case InferenceEngine::EltwiseLayer::Less_equal:
+            return cldnn::eltwise_mode::le;
+        case InferenceEngine::EltwiseLayer::Greater:
+            return cldnn::eltwise_mode::gt;
+        case InferenceEngine::EltwiseLayer::Greater_equal:
+            return cldnn::eltwise_mode::ge;
+        case InferenceEngine::EltwiseLayer::Logical_AND:
+            return cldnn::eltwise_mode::logic_and;
+        case InferenceEngine::EltwiseLayer::Logical_OR:
+            return cldnn::eltwise_mode::logic_or;
+        case InferenceEngine::EltwiseLayer::Logical_XOR:
+            return cldnn::eltwise_mode::logic_xor;
         default: THROW_CLDNN_EXCEPTION("Unsupported eltwise operation: " << op);
             break;
     }
@@ -647,6 +700,7 @@ void CLDNNGraph::CreatePrimitiveFromBlob(cldnn::primitive_id primID,
     } else if ((pBlob->layout() != InferenceEngine::OIHW) &&
                (pBlob->layout() != InferenceEngine::NCHW) &&
                (pBlob->layout() != InferenceEngine::CHW) &&
+               (pBlob->layout() != InferenceEngine::NC) &&
                (pBlob->layout() != InferenceEngine::C)) {
         // TODO: support more layouts
         THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(pBlob->layout()) << ") in blob: " << primID);
@@ -712,13 +766,15 @@ void CLDNNGraph::CreateWeightAndBiasPrimitives(const InferenceEngine::CNNLayerPt
     switch (LayerTypeFromStr(layer->type)) {
     case Convolution: {
         auto convLayer = dynamic_cast<InferenceEngine::ConvolutionLayer *> (layer.get());
-        groupSize = convLayer->_group;
         if ((inFeatures % groupSize) || (convLayer->_out_depth % groupSize)) {
             THROW_CLDNN_EXCEPTION("Invalid group size in layer " << convLayer->name);
         }
+        groupSize = convLayer->_group;
+        if (groupSize >= 16)  // cldnn optimization for 16 and more groups
+            groupSize = 1;
         weightDimsVec = {
             TensorValue(convLayer->_out_depth / groupSize),
-            TensorValue(inFeatures / groupSize),
+            TensorValue(inFeatures / convLayer->_group),
             TensorValue(convLayer->_kernel[X_AXIS]),
             TensorValue(convLayer->_kernel[Y_AXIS])
         };
@@ -729,13 +785,15 @@ void CLDNNGraph::CreateWeightAndBiasPrimitives(const InferenceEngine::CNNLayerPt
         break;
     case Deconvolution: {
         auto deconvLayer = dynamic_cast<InferenceEngine::DeconvolutionLayer *> (layer.get());
-        groupSize = deconvLayer->_group;
         if ((inFeatures % groupSize) || (deconvLayer->_out_depth % groupSize)) {
             THROW_CLDNN_EXCEPTION("Invalid group size in layer " << deconvLayer->name);
         }
+        groupSize = deconvLayer->_group;
+        if (groupSize >= 16)  // cldnn optimization for 16 and more groups
+            groupSize = 1;
         weightDimsVec = {
             TensorValue(deconvLayer->_out_depth / groupSize),
-            TensorValue(inFeatures / groupSize),
+            TensorValue(inFeatures / deconvLayer->_group),
             TensorValue(deconvLayer->_kernel[X_AXIS]),
             TensorValue(deconvLayer->_kernel[Y_AXIS])
         };
@@ -754,13 +812,13 @@ void CLDNNGraph::CreateWeightAndBiasPrimitives(const InferenceEngine::CNNLayerPt
 
     // create weights primitive
     cldnn::layout weightsLayout = cldnn::layout(
-        m_networkPrecision,
+        DataTypeFromPrecision(layer->precision),
         m_defaultFormat,
         cldnn::tensor(weightDimsVec));
     size_t bytesPerGroup = weightsLayout.bytes_count();
 
     for (unsigned g = 0; g < groupSize; g++) {
-        cldnn::primitive_id weightID = layer->name + m_weightsTag + std::to_string(g);
+        cldnn::primitive_id weightID = layer_type_name_ID(layer) + m_weightsTag + std::to_string(g);
         CreatePrimitiveFromBlob(
             weightID,
             pWeightsBlob,
@@ -773,12 +831,12 @@ void CLDNNGraph::CreateWeightAndBiasPrimitives(const InferenceEngine::CNNLayerPt
     // create bias primitive
     if (pBiasBlob != nullptr) {
         cldnn::layout biasesLayout = cldnn::layout(
-            m_networkPrecision,
+            DataTypeFromPrecision(layer->precision),
             m_defaultFormat,
             cldnn::spatial(TensorValue(outFeatures / groupSize)));
         size_t bytesPerGroup = biasesLayout.bytes_count();
         for (unsigned g = 0; g < groupSize; g++) {
-            cldnn::primitive_id biasID = layer->name + m_biasesTag + std::to_string(g);
+            cldnn::primitive_id biasID = layer_type_name_ID(layer) + m_biasesTag + std::to_string(g);
             CreatePrimitiveFromBlob(
                 biasID,
                 pBiasBlob,
@@ -813,7 +871,7 @@ void CLDNNGraph::CreateScaleWeightsAndBiasesFromBN(
         THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
     }
     cldnn::layout blobLayout(
-        m_networkPrecision,
+        DataTypeFromPrecision(bnLayer->precision),
         m_defaultFormat,
         blobTensor);
 
@@ -875,7 +933,7 @@ void CLDNNGraph::CreateScaleWeightsAndBiasesFromBN(
 
 void CLDNNGraph::CreateSingleLayerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     // Initialize a profiling entry
-    InitProfileInfo(layer->name, layer->type, "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
+    InitProfileInfo(layer->name, layer->type);
 
     // First check for custom layer
     auto customLayer = m_config.customLayers.find(layer->type);
@@ -895,6 +953,8 @@ void CLDNNGraph::CreateSingleLayerPrimitive(InferenceEngine::CNNLayerPtr &layer)
         case ELU:
         case Clamp:
         case Activation:
+        case Exp:
+        case Not:
             CreateActivationPrimitive(layer, LayerTypeFromStr(layer->type));
             break;
         case LRN: CreateLRNPrimitive(layer);
@@ -967,6 +1027,16 @@ void CLDNNGraph::CreateSingleLayerPrimitive(InferenceEngine::CNNLayerPtr &layer)
             break;
         case Pad: CreatePadPrimitive(layer);
             break;
+        case Gather: CreateGatherPrimitive(layer);
+            break;
+        case DepthToSpace: CreateDepthToSpacePrimitive(layer);
+            break;
+        case ShuffleChannels: CreateShuffleChannelsPrimitive(layer);
+            break;
+        case StridedSlice: CreateStridedSlicePrimitive(layer);
+            break;
+        case ReverseSequence: CreateReverseSequencePrimitive(layer);
+            break;
         default: THROW_CLDNN_EXCEPTION("Unknown Layer Type: " << layer->type);
     }
 }
@@ -990,8 +1060,7 @@ void CLDNNGraph::CreateScaleShiftPrimitive(InferenceEngine::CNNLayerPtr &layer)
     default: THROW_CLDNN_EXCEPTION("Invalid weights dimensions in layer " << layer->name);
         break;
     }
-
-    cldnn::layout blobLayout(m_networkPrecision, m_defaultFormat, weightTensor);
+    cldnn::layout blobLayout(DataTypeFromPrecision(layer->precision), m_defaultFormat, weightTensor);
     CreatePrimitiveFromBlob(scalePrimID, scaleShiftLayer->_weights, blobLayout);
     if (scaleShiftLayer->_biases != nullptr) {
         if (scaleShiftLayer->_biases->dims() != dims) {
@@ -1002,21 +1071,20 @@ void CLDNNGraph::CreateScaleShiftPrimitive(InferenceEngine::CNNLayerPtr &layer)
         biasPrimID = "";  // 0-bias
     }
 
+    std::string scaleShiftLayerName = layer_type_name_ID(layer);
     auto scaleShiftPrim = cldnn::scale(
-        scaleShiftLayer->name,
+        scaleShiftLayerName,
         inputPrimitives[0],
         scalePrimID,
         biasPrimID);
 
-    m_env.primitiveIDs[scaleShiftLayer->name] = scaleShiftLayer->name;
+    m_env.primitiveIDs[scaleShiftLayerName] = scaleShiftLayerName;
     m_topology->add(scaleShiftPrim);
-    m_env.profilingIDs.insert(scaleShiftLayer->name);
+    m_env.profilingIDs.push_back(scaleShiftLayerName);
 }
 
 void CLDNNGraph::CreateProposalPrimitive(InferenceEngine::CNNLayerPtr & layer) {
     ValidateLayer(layer, 3);
-    IE_ASSERT(layer->insData[0].lock()->dims[3] == 1);  // only handling input batch size 1
-    IE_ASSERT(layer->insData[1].lock()->dims[3] == 1);  // only handling input batch size 1
     auto proposalLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
 
     float nms_thresh = proposalLayer->GetParamAsFloat("nms_thresh", 0.7f);
@@ -1031,6 +1099,9 @@ void CLDNNGraph::CreateProposalPrimitive(InferenceEngine::CNNLayerPtr & layer) {
     int base_size = proposalLayer->GetParamAsInt("base_size", 16);
     std::string framework = proposalLayer->GetParamAsString("framework", "");
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    bool normalize = layer->GetParamsAsBool("normalize", false);
+    bool clip_before_nms = layer->GetParamsAsBool("clip_before_nms", true);
+    bool clip_after_nms = layer->GetParamsAsBool("clip_after_nms", false);
 
     float coordinates_offset;
     bool swap_xy;
@@ -1052,8 +1123,9 @@ void CLDNNGraph::CreateProposalPrimitive(InferenceEngine::CNNLayerPtr & layer) {
         swap_xy = false;
     }
 
+    std::string proposalLayerName = layer_type_name_ID(layer);
     auto proposalPrim = cldnn::proposal(
-        proposalLayer->name,
+        proposalLayerName,
         inputPrimitives[0],  // cls_score
         inputPrimitives[1],  // bbox_pred
         inputPrimitives[2],  // im_info
@@ -1071,12 +1143,15 @@ void CLDNNGraph::CreateProposalPrimitive(InferenceEngine::CNNLayerPtr & layer) {
         box_size_scale,
         swap_xy,
         initial_clip,
+        clip_before_nms,
+        clip_after_nms,
         round_ratios,
-        shift_anchors);
+        shift_anchors,
+        normalize);
 
-    m_env.primitiveIDs[proposalLayer->name] = proposalLayer->name;
+    m_env.primitiveIDs[proposalLayerName] = proposalLayerName;
     m_topology->add(proposalPrim);
-    m_env.profilingIDs.insert(proposalLayer->name);
+    m_env.profilingIDs.push_back(proposalLayerName);
 }
 
 void CLDNNGraph::CreatePReLUPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1084,6 +1159,7 @@ void CLDNNGraph::CreatePReLUPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto preluLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
 
+    std::string preluLayerName = layer_type_name_ID(layer);
     auto inDataPtr = preluLayer->insData[0].lock();
     if (!inDataPtr) {
         THROW_CLDNN_EXCEPTION("Data inserted into PreLu " << preluLayer->name << " is nullptr");
@@ -1115,35 +1191,36 @@ void CLDNNGraph::CreatePReLUPrimitive(InferenceEngine::CNNLayerPtr &layer) {
             break;
         default: THROW_CLDNN_EXCEPTION("Invalid PReLU slope blob precision in " << preluLayer->name);
         }
-        m_topology->add(cldnn::activation(preluLayer->name, inputPrimitives[0], activation_relu_negative_slope, { slope, 0.f }));
+        m_topology->add(cldnn::activation(preluLayerName, inputPrimitives[0], activation_relu_negative_slope, { slope, 0.f }));
     } else {
         CreateGenericLayerBlobPrimitives(preluLayer);
-        cldnn::primitive_id slopePrimID(preluLayer->name + "_" + blobName + m_weightsTag);
-        m_topology->add(cldnn::activation(preluLayer->name, inputPrimitives[0], slopePrimID, activation_relu_negative_slope));
+        cldnn::primitive_id slopePrimID(preluLayerName + "_" + blobName + m_weightsTag);
+        m_topology->add(cldnn::activation(preluLayerName, inputPrimitives[0], slopePrimID, activation_relu_negative_slope));
     }
 
-    m_env.primitiveIDs[preluLayer->name] = preluLayer->name;
-    m_env.profilingIDs.insert(preluLayer->name);
+    m_env.primitiveIDs[preluLayerName] = preluLayerName;
+    m_env.profilingIDs.push_back(preluLayerName);
 }
 
 void CLDNNGraph::CreateBatchNormalizationPrimitive(InferenceEngine::CNNLayerPtr & layer) {
     ValidateLayer(layer, 1);
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    std::string bnLayerName = layer_type_name_ID(layer);
 
     auto bnLayer = dynamic_cast<InferenceEngine::BatchNormalizationLayer *> (layer.get());
-    cldnn::primitive_id weightID = bnLayer->name + "_" + m_scalesTag;
-    cldnn::primitive_id biasID = bnLayer->name + "_" + m_biasesTag;
+    cldnn::primitive_id weightID = bnLayerName + "_" + m_scalesTag;
+    cldnn::primitive_id biasID = bnLayerName + "_" + m_biasesTag;
 
 #define _SCALE_BN_OPT
 #ifdef _SCALE_BN_OPT
     // Using scale as an optimization (1 mad instead of mad+rsq)
     // create new blobs for scale shift
     CreateScaleWeightsAndBiasesFromBN(bnLayer, weightID, biasID);
-    auto scalePrim = cldnn::scale(bnLayer->name, inputPrimitives[0], weightID, biasID);
+    auto scalePrim = cldnn::scale(bnLayerName, inputPrimitives[0], weightID, biasID);
 
-    m_env.primitiveIDs[bnLayer->name] = bnLayer->name;
+    m_env.primitiveIDs[bnLayerName] = bnLayerName;
     m_topology->add(scalePrim);
-    m_env.profilingIDs.insert(bnLayer->name);
+    m_env.profilingIDs.push_back(bnLayerName);
     return;
 #endif  // _SCALE_BN_OPT
 
@@ -1159,67 +1236,85 @@ void CLDNNGraph::CreateBatchNormalizationPrimitive(InferenceEngine::CNNLayerPtr
         THROW_CLDNN_EXCEPTION("Batch normalization input doesn't have 2 or 4 dimensions in " << bnLayer->name);
     }
     cldnn::layout blobLayout(
-        m_networkPrecision,
+        DataTypeFromPrecision(layer->precision),
         m_defaultFormat,
         blobTensor);
 
     // Create variance primitive
-    cldnn::primitive_id varianceID = bnLayer->name + "_" + m_weightsTag;
+    cldnn::primitive_id varianceID = bnLayerName + "_" + m_weightsTag;
     CreatePrimitiveFromBlob(varianceID, bnLayer->_weights, blobLayout);
 
     // Create mean primitive
-    cldnn::primitive_id meanID = bnLayer->name + "_" + m_biasesTag;
+    cldnn::primitive_id meanID = bnLayerName + "_" + m_biasesTag;
     CreatePrimitiveFromBlob(meanID, bnLayer->_biases, blobLayout);
 
     auto bnPrim = cldnn::batch_norm(
-        bnLayer->name,
+        bnLayerName,
         inputPrimitives[0],
         meanID,
         varianceID,
         bnLayer->epsilon);
 
-    m_env.primitiveIDs[bnLayer->name] = bnLayer->name;
+    m_env.primitiveIDs[bnLayerName] = bnLayerName;
     m_topology->add(bnPrim);
-    m_env.profilingIDs.insert(bnLayer->name);
+    m_env.profilingIDs.push_back(bnLayerName);
 }
 
 void CLDNNGraph::CreateFlattenPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     ValidateLayer(layer, 1);
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto flattenLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
+    std::string flattenLayerName = layer_type_name_ID(layer);
 
     auto flattenPrim = cldnn::reshape(
-        flattenLayer->name,
+        flattenLayerName,
         inputPrimitives[0],
         CldnnTensorFromIEDims(flattenLayer->outData[0]->dims));
 
-    m_env.primitiveIDs[flattenLayer->name] = flattenLayer->name;
+    m_env.primitiveIDs[flattenLayerName] = flattenLayerName;
     m_topology->add(flattenPrim);
-    m_env.profilingIDs.insert(flattenLayer->name);
+    m_env.profilingIDs.push_back(flattenLayerName);
 }
 
 void CLDNNGraph::CreatePermutePrimitive(InferenceEngine::CNNLayerPtr &layer) {
     ValidateLayer(layer, 1);
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto permuteLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
-    std::vector<uint16_t> order;
+    std::vector<uint16_t> ie_order;
     for (auto& a : permuteLayer->GetParamAsInts("order"))
-        order.push_back(static_cast<uint16_t>(a));
+        ie_order.push_back(static_cast<uint16_t>(a));
 
     // if order size is less than 4 - fill the rest with just copy
-    for (auto o = order.size(); o < 4; o++)
-        order.push_back((uint16_t)o);
+    for (auto o = ie_order.size(); o < 4; o++)
+        ie_order.push_back((uint16_t)o);
 
-    auto outputDims = permuteLayer->outData[0]->dims;
+    /*
+        Because ofthe cldnn ordering: bfxy, and IE ordering: bfyx
+        wee need to adjust the permute order.
+    */
+    std::vector<uint16_t> cldnn_permute_order;
+    // 1. Switch permute order values (x and y)
+    for (auto const& o : ie_order) {
+        if (o == 2)
+            cldnn_permute_order.push_back(3);
+        else if (o == 3)
+            cldnn_permute_order.push_back(2);
+        else
+            cldnn_permute_order.push_back(o);
+    }
+    // 2. Swap x and y positions
+    std::swap(cldnn_permute_order[2], cldnn_permute_order[3]);
+
+    std::string permuteLayerName = layer_type_name_ID(layer);
 
     auto permutePrim = cldnn::permute(
-        permuteLayer->name,
+        permuteLayerName,
         inputPrimitives[0],
-        order);
+        cldnn_permute_order);
 
-    m_env.primitiveIDs[permuteLayer->name] = permuteLayer->name;
+    m_env.primitiveIDs[permuteLayerName] = permuteLayerName;
     m_topology->add(permutePrim);
-    m_env.profilingIDs.insert(permuteLayer->name);
+    m_env.profilingIDs.push_back(permuteLayerName);
 }
 
 void CLDNNGraph::CreateReshapePrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1227,15 +1322,16 @@ void CLDNNGraph::CreateReshapePrimitive(InferenceEngine::CNNLayerPtr &layer) {
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto reshapeLayer = dynamic_cast<InferenceEngine::ReshapeLayer*> (layer.get());
     IE_ASSERT(reshapeLayer->outData.size());
+    std::string reshapeLayerName = layer_type_name_ID(layer);
 
     auto reshapePrim = cldnn::reshape(
-        reshapeLayer->name,
+        reshapeLayerName,
         inputPrimitives[0],
         CldnnTensorFromIEDims(reshapeLayer->outData[0]->dims));
 
-    m_env.primitiveIDs[reshapeLayer->name] = reshapeLayer->name;
+    m_env.primitiveIDs[reshapeLayerName] = reshapeLayerName;
     m_topology->add(reshapePrim);
-    m_env.profilingIDs.insert(reshapeLayer->name);
+    m_env.profilingIDs.push_back(reshapeLayerName);
 }
 
 void CLDNNGraph::CreateNormalizePrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1254,69 +1350,73 @@ void CLDNNGraph::CreateNormalizePrimitive(InferenceEngine::CNNLayerPtr &layer) {
         eps = 1e-10f;
     }
 
+    std::string normLayerName = layer_type_name_ID(layer);
     auto normPrim = cldnn::normalize(
-        normLayer->name,
+        normLayerName,
         inputPrimitives[0],
-        normLayer->name + "_weights" + m_weightsTag,
+        normLayerName + "_weights" + m_weightsTag,
         across_spatial,
         eps);
 
-    m_env.primitiveIDs[normLayer->name] = normLayer->name;
+    m_env.primitiveIDs[normLayerName] = normLayerName;
     m_topology->add(normPrim);
-    m_env.profilingIDs.insert(normLayer->name);
+    m_env.profilingIDs.push_back(normLayerName);
 }
 
 void CLDNNGraph::CreateDetectionOutputPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     ValidateLayer(layer, 3);
     auto detectionLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
 
-    uint32_t num_classes = detectionLayer->GetParamAsUInt("num_classes", 1);
-    bool share_location = detectionLayer->GetParamsAsBool("share_location", true);
-    int background_label_id = detectionLayer->GetParamAsInt("background_label_id", 0);
-    float nms_threshold = detectionLayer->GetParamAsFloat("nms_threshold", 0.3f);
-    int top_k = detectionLayer->GetParamAsInt("top_k", -1);
-    float confidence_threshold = detectionLayer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
-    float eta = detectionLayer->GetParamAsFloat("eta", 1.0f);
-    int keep_top_k = detectionLayer->GetParamAsInt("keep_top_k", -1);
+    uint32_t num_classes            = detectionLayer->GetParamAsUInt("num_classes", 1);
+    bool share_location             = detectionLayer->GetParamsAsBool("share_location", true);
+    int background_label_id         = detectionLayer->GetParamAsInt("background_label_id", 0);
+    float nms_threshold             = detectionLayer->GetParamAsFloat("nms_threshold", 0.3f);
+    int top_k                       = detectionLayer->GetParamAsInt("top_k", -1);
+    float confidence_threshold      = detectionLayer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
+    float eta                       = detectionLayer->GetParamAsFloat("eta", 1.0f);
+    int keep_top_k                  = detectionLayer->GetParamAsInt("keep_top_k", -1);
     bool variance_encoded_in_target = detectionLayer->GetParamsAsBool("variance_encoded_in_target", false);
-    int input_width = detectionLayer->GetParamAsInt("input_width", -1);
-    int input_height = detectionLayer->GetParamAsInt("input_height", -1);
-    bool normalized = detectionLayer->GetParamsAsBool("normalized", true);
-    std::string code_type = detectionLayer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
-    bool clip = detectionLayer->GetParamsAsBool("clip", false);
-    bool decrease_label_id = detectionLayer->GetParamsAsBool("decrease_label_id", false);
-    cldnn::prior_box_code_type cldnnCodeType = PriorBoxCodeFromString(code_type);
+    int input_width                 = detectionLayer->GetParamAsInt("input_width", -1);
+    int input_height                = detectionLayer->GetParamAsInt("input_height", -1);
+    bool normalized                 = detectionLayer->GetParamsAsBool("normalized", true);
+    std::string code_type           = detectionLayer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
+    bool clip_before_nms            = detectionLayer->GetParamsAsBool("clip_before_nms", false) ||
+                                      detectionLayer->GetParamsAsBool("clip", false);  // For backward compatibility
+    bool clip_after_nms             = detectionLayer->GetParamsAsBool("clip_after_nms", false);
+    bool decrease_label_id          = detectionLayer->GetParamsAsBool("decrease_label_id", false);
 
+    cldnn::prior_box_code_type cldnnCodeType = PriorBoxCodeFromString(code_type);
     int32_t prior_info_size = normalized != 0 ? 4 : 5;
     int32_t prior_coordinates_offset = normalized != 0 ? 0 : 1;
 
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
-    auto detectionPrim = cldnn::detection_output(
-        detectionLayer->name,
-        inputPrimitives[0],
-        inputPrimitives[1],
-        inputPrimitives[2],
-        num_classes,
-        keep_top_k,
-        share_location,
-        background_label_id,
-        nms_threshold,
-        top_k,
-        eta,
-        cldnnCodeType,
-        variance_encoded_in_target,
-        confidence_threshold,
-        prior_info_size,
-        prior_coordinates_offset,
-        normalized,
-        input_width,
-        input_height,
-        decrease_label_id,
-        clip);
-
-    m_env.primitiveIDs[detectionLayer->name] = detectionLayer->name;
+    std::string detectionLayerName = layer_type_name_ID(layer);
+    auto detectionPrim = cldnn::detection_output(detectionLayerName,
+                                                 inputPrimitives[0],
+                                                 inputPrimitives[1],
+                                                 inputPrimitives[2],
+                                                 num_classes,
+                                                 keep_top_k,
+                                                 share_location,
+                                                 background_label_id,
+                                                 nms_threshold,
+                                                 top_k,
+                                                 eta,
+                                                 cldnnCodeType,
+                                                 variance_encoded_in_target,
+                                                 confidence_threshold,
+                                                 prior_info_size,
+                                                 prior_coordinates_offset,
+                                                 normalized,
+                                                 input_width,
+                                                 input_height,
+                                                 decrease_label_id,
+                                                 clip_before_nms,
+                                                 clip_after_nms);
+
+    m_env.primitiveIDs[detectionLayerName] = detectionLayerName;
     m_topology->add(detectionPrim);
-    m_env.profilingIDs.insert(detectionLayer->name);
+    m_env.profilingIDs.push_back(detectionLayerName);
 }
 
 void CLDNNGraph::CreatePriorBoxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1367,8 +1467,9 @@ void CLDNNGraph::CreatePriorBoxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         _step_h = static_cast<float>(img_h) / static_cast<float>(img_dims[1]);
     }
 
+    std::string priorBoxLayerName = layer_type_name_ID(layer);
     auto priorBoxPrim = cldnn::prior_box(
-        priorBoxLayer->name,
+        priorBoxLayerName,
         inputPrimitives[0],
         img_size,
         min_size,
@@ -1382,9 +1483,9 @@ void CLDNNGraph::CreatePriorBoxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         offset,
         scale_all_sizes);
 
-    m_env.primitiveIDs[priorBoxLayer->name] = priorBoxLayer->name;
+    m_env.primitiveIDs[priorBoxLayerName] = priorBoxLayerName;
     m_topology->add(priorBoxPrim);
-    m_env.profilingIDs.insert(priorBoxLayer->name);
+    m_env.profilingIDs.push_back(priorBoxLayerName);
 }
 
 void CLDNNGraph::CreateDeconvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1401,22 +1502,38 @@ void CLDNNGraph::CreateDeconvolutionPrimitive(InferenceEngine::CNNLayerPtr &laye
     CreateWeightAndBiasPrimitives(layer, weightPrimID, biasPrimID);
     auto allPads = getPaddings(*deconvLayer);
     cldnn::tensor stride = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
-                                         cldnn::spatial(deconvLayer->_stride[X_AXIS], deconvLayer->_stride[Y_AXIS]));
+        cldnn::spatial(deconvLayer->_stride[X_AXIS], deconvLayer->_stride[Y_AXIS]));
     cldnn::tensor padding = cldnn::tensor(cldnn::batch(0), cldnn::feature(0),
-                                         cldnn::spatial(-allPads.begin[X_AXIS], -allPads.begin[Y_AXIS]));
+        cldnn::spatial(-allPads.begin[X_AXIS], -allPads.begin[Y_AXIS]));
 
-    auto deconvPrim = cldnn::deconvolution(deconvLayer->name,
-        inputPrimitives[0],
-        weightPrimID,
-        biasPrimID,
-        stride,
-        padding,
-        false,
-        0.0f,
-        CldnnTensorFromIEDims(deconvLayer->outData[0]->dims));
-    m_env.primitiveIDs[deconvLayer->name] = deconvLayer->name;
-    m_topology->add(deconvPrim);
-    m_env.profilingIDs.insert(deconvLayer->name);
+    std::string deconvLayerName = layer_type_name_ID(layer);
+
+    if (deconvLayer->_group >= 16) {
+        auto deconvPrim = cldnn::deconvolution(deconvLayerName,
+            inputPrimitives[0],
+            weightPrimID,
+            biasPrimID,
+            deconvLayer->_group,
+            stride,
+            padding,
+            false,
+            0.0f,
+            CldnnTensorFromIEDims(deconvLayer->outData[0]->dims));
+        m_topology->add(deconvPrim);
+    } else {
+        auto deconvPrim = cldnn::deconvolution(deconvLayerName,
+            inputPrimitives[0],
+            weightPrimID,
+            biasPrimID,
+            stride,
+            padding,
+            false,
+            0.0f,
+            CldnnTensorFromIEDims(deconvLayer->outData[0]->dims));
+        m_topology->add(deconvPrim);
+    }
+    m_env.primitiveIDs[deconvLayerName] = deconvLayerName;
+    m_env.profilingIDs.push_back(deconvLayerName);
 }
 
 void CLDNNGraph::CreateCropPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1452,14 +1569,15 @@ void CLDNNGraph::CreateCropPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         TensorValue(offset[3]),
         TensorValue(offset[2]));
 
+    std::string cropLayerName = layer_type_name_ID(layer);
     auto cropPrim = cldnn::crop(
-        cropLayer->name,
+        cropLayerName,
         inputPrimitives[0],
         refSize,
         offSize);
-    m_env.primitiveIDs[cropLayer->name] = cropLayer->name;
+    m_env.primitiveIDs[cropLayerName] = cropLayerName;
     m_topology->add(cropPrim);
-    m_env.profilingIDs.insert(cropLayer->name);
+    m_env.profilingIDs.push_back(cropLayerName);
 }
 
 void CLDNNGraph::CreateROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1471,6 +1589,7 @@ void CLDNNGraph::CreateROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer)
     int pooled_height = roiPoolingLayer->GetParamAsInt("pooled_h", 0);
     float spatial_scale = roiPoolingLayer->GetParamAsFloat("spatial_scale", 1.0f);
     std::string method = roiPoolingLayer->GetParamAsString("method", "max");
+    bool position_sensitive = false;
 
     cldnn::pooling_mode mode = cldnn::pooling_mode::max;
     if (method == "bilinear") {
@@ -1478,17 +1597,18 @@ void CLDNNGraph::CreateROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer)
     }
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
 
-    auto roiPoolingPrim = cldnn::roi_pooling(
-        roiPoolingLayer->name,
-        inputPrimitives[0],  // input data
-        inputPrimitives[1],  // input rois
-        mode,
-        pooled_width,
-        pooled_height,
-        spatial_scale);
-    m_env.primitiveIDs[roiPoolingLayer->name] = roiPoolingLayer->name;
+    std::string roiPoolingLayerName = layer_type_name_ID(layer);
+    auto roiPoolingPrim = cldnn::roi_pooling(roiPoolingLayerName,
+                                             inputPrimitives[0],  // input data
+                                             inputPrimitives[1],  // input rois
+                                             mode,
+                                             position_sensitive,
+                                             pooled_width,
+                                             pooled_height,
+                                             spatial_scale);
+    m_env.primitiveIDs[roiPoolingLayerName] = roiPoolingLayerName;
     m_topology->add(roiPoolingPrim);
-    m_env.profilingIDs.insert(roiPoolingLayer->name);
+    m_env.profilingIDs.push_back(roiPoolingLayerName);
 }
 
 void CLDNNGraph::CreatePSROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1497,22 +1617,34 @@ void CLDNNGraph::CreatePSROIPoolingPrimitive(InferenceEngine::CNNLayerPtr &layer
 
     // params
     int group_size = psROIPoolingLayer->GetParamAsInt("group_size");
-    // todo: assert outputdim*group_size*group_size == input features
+    int output_dim = psROIPoolingLayer->GetParamAsInt("output_dim");
     float spatial_scale = psROIPoolingLayer->GetParamAsFloat("spatial_scale");
+    size_t spatial_bins_x = static_cast<size_t>(psROIPoolingLayer->GetParamAsInt("spatial_bins_x", 1));
+    size_t spatial_bins_y = static_cast<size_t>(psROIPoolingLayer->GetParamAsInt("spatial_bins_y", 1));
+    std::string mode_str = psROIPoolingLayer->GetParamAsString("mode", "average");
+    bool position_sensitive = true;
+
+    cldnn::pooling_mode mode = mode_str == "average" ? cldnn::pooling_mode::average
+                                                     : cldnn::pooling_mode::bilinear;
+
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
 
-    auto psROIPoolingPrim = cldnn::roi_pooling(
-        psROIPoolingLayer->name,
-        inputPrimitives[0],  // input data
-        inputPrimitives[1],  // input rois
-        cldnn::pooling_mode::average,
-        group_size,
-        group_size,
-        spatial_scale,
-        group_size);
-    m_env.primitiveIDs[psROIPoolingLayer->name] = psROIPoolingLayer->name;
+    std::string psROIPoolingLayerName = layer_type_name_ID(layer);
+    auto psROIPoolingPrim = cldnn::roi_pooling(psROIPoolingLayerName,
+                                               inputPrimitives[0],  // input data
+                                               inputPrimitives[1],  // input rois
+                                               mode,
+                                               position_sensitive,
+                                               group_size,
+                                               group_size,
+                                               spatial_scale,
+                                               output_dim,
+                                               spatial_bins_x,
+                                               spatial_bins_y);
+
+    m_env.primitiveIDs[psROIPoolingLayerName] = psROIPoolingLayerName;
     m_topology->add(psROIPoolingPrim);
-    m_env.profilingIDs.insert(psROIPoolingLayer->name);
+    m_env.profilingIDs.push_back(psROIPoolingLayerName);
 }
 
 void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer, CLDNNCustomLayerPtr customLayer) {
@@ -1547,7 +1679,7 @@ void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer
             THROW_CLDNN_EXCEPTION("Invalid dimensions for blob " << blob.first << " in layer " << genericLayer->name);
         }
         CreatePrimitiveFromBlob(blobId, blob.second, cldnn::layout(
-            m_networkPrecision,
+            DataTypeFromPrecision(blob.second->precision()),
             m_defaultFormat,
             cldnn::tensor(1, 1, TensorValue(blob.second->dims()[0]), 1)));
         // save index in blobIndex
@@ -1577,8 +1709,8 @@ void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer
                         param.format,
                         DataTypeFromPrecision(layer->precision));
                     m_topology->add(preprocessPrim);
-                    m_env.profilingIDs.insert(reorderPrimName);
-                    InitProfileInfo(reorderPrimName, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
+                    m_env.profilingIDs.push_back(reorderPrimName);
+                    InitProfileInfo(reorderPrimName, "Reorder");
                     reorderedInputs[param.portIndex] = (reorderPrimName);
                 } else {
                     reorderedInputs[param.portIndex] = inputPrimitives[param.portIndex];
@@ -1629,6 +1761,7 @@ void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer
     int xDim = outputTensor.spatial[0];
     int iidx = customLayer->InputDimSourceIndex();
 
+    std::string genericLayerName = layer_type_name_ID(layer);
     // if input index is greater than -1, take dimension from input
     if (iidx >= 0) {
         if (iidx >= genericLayer->insData.size())
@@ -1670,7 +1803,7 @@ void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer
     }
 
     auto customPrim = cldnn::custom_gpu_primitive(
-        genericLayer->name,
+        genericLayerName,
         reorderedInputs,
         { layerTitle, defineTitle, layerDefines, customLayer->KernelSource() },
         customLayer->KernelEntry(),
@@ -1681,23 +1814,24 @@ void CLDNNGraph::CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr & layer
         lws);
 
     if (outputLayout.format != cldnn::format::any &&
-        p_currentOutputs->find(genericLayer->name) == p_currentOutputs->end()) {
+        p_currentOutputs->find(genericLayerName) == p_currentOutputs->end()) {
         // Handle output reorder
-        auto reorderPrimName = genericLayer->name + m_postCustomLayerTag;
+        auto reorderPrimName = genericLayerName + m_postCustomLayerTag;
         m_topology->add(
             cldnn::reorder(
                 reorderPrimName,
-                genericLayer->name,
+                genericLayerName,
                 m_defaultFormat,
-                m_networkPrecision));
-        m_env.primitiveIDs[genericLayer->name] = reorderPrimName;
-        m_env.profilingIDs.insert(reorderPrimName);
-        InitProfileInfo(reorderPrimName, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
+                customPrim.output_layout.data_type));
+        m_env.primitiveIDs[genericLayerName] = reorderPrimName;
+        m_env.primitiveIDs[reorderPrimName] = reorderPrimName;
+        m_env.profilingIDs.push_back(reorderPrimName);
+        InitProfileInfo(reorderPrimName, "Reorder");
     } else {
-        m_env.primitiveIDs[genericLayer->name] = genericLayer->name;
+        m_env.primitiveIDs[genericLayerName] = genericLayerName;
     }
     m_topology->add(customPrim);
-    m_env.profilingIDs.insert(genericLayer->name);
+    m_env.profilingIDs.push_back(genericLayerName);
 }
 
 void CLDNNGraph::CreateSimplerNMSPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1715,8 +1849,9 @@ void CLDNNGraph::CreateSimplerNMSPrimitive(InferenceEngine::CNNLayerPtr &layer)
     std::vector<float> scale = simpleNMSLayer->GetParamAsFloats("scale");
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
 
+    std::string simpleNMSLayerName = layer_type_name_ID(layer);
     auto simpleNMSPrim = cldnn::proposal(
-        simpleNMSLayer->name,
+        simpleNMSLayerName,
         inputPrimitives[0],  // cls_score
         inputPrimitives[1],  // bbox_pred
         inputPrimitives[2],  // im_info
@@ -1729,9 +1864,9 @@ void CLDNNGraph::CreateSimplerNMSPrimitive(InferenceEngine::CNNLayerPtr &layer)
         { 0.5f, 1.0f, 2.0f },  // ratios for the SimplerNMS variant
         scale);
 
-    m_env.primitiveIDs[simpleNMSLayer->name] = simpleNMSLayer->name;
+    m_env.primitiveIDs[simpleNMSLayerName] = simpleNMSLayerName;
     m_topology->add(simpleNMSPrim);
-    m_env.profilingIDs.insert(simpleNMSLayer->name);
+    m_env.profilingIDs.push_back(simpleNMSLayerName);
 }
 
 void CLDNNGraph::CreateEltwisePrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1749,27 +1884,29 @@ void CLDNNGraph::CreateEltwisePrimitive(InferenceEngine::CNNLayerPtr &layer) {
         THROW_IE_EXCEPTION << "Number of provided coefficients is not equal to number of operands";
     }
 
+    std::string eltwiseLayerName = layer_type_name_ID(layer);
     auto eltwisePrim = cldnn::eltwise(
-        eltwiseLayer->name,
+        eltwiseLayerName,
         inputPrimitives,
         EltwiseModeFromIEEltwise(eltwiseLayer->_operation),
         coefficients);
-    m_env.primitiveIDs[eltwiseLayer->name] = eltwiseLayer->name;
+    m_env.primitiveIDs[eltwiseLayerName] = eltwiseLayerName;
     m_topology->add(eltwisePrim);
-    m_env.profilingIDs.insert(eltwiseLayer->name);
+    m_env.profilingIDs.push_back(eltwiseLayerName);
 }
 
 void CLDNNGraph::CreateConcatenatePrimitive(InferenceEngine::CNNLayerPtr &layer) {
     ValidateLayer(layer, 0);
     auto concatLayer = dynamic_cast<InferenceEngine::ConcatLayer *> (layer.get());
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    std::string concatLayerName = layer_type_name_ID(layer);
     auto concatPrim = cldnn::concatenation(
-        concatLayer->name,
+        concatLayerName,
         inputPrimitives,
         ConcatAxisFromIEAxis(concatLayer->_axis));
-    m_env.primitiveIDs[concatLayer->name] = concatLayer->name;
+    m_env.primitiveIDs[concatLayerName] = concatLayerName;
     m_topology->add(concatPrim);
-    m_env.profilingIDs.insert(concatLayer->name);
+    m_env.profilingIDs.push_back(concatLayerName);
 }
 
 void CLDNNGraph::CreateSplitPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1798,7 +1935,7 @@ std::cout << "Splitting layer: " << layer->name << "\n\tSize:" << CldnnTensorFro
             auto cropPrim = cldnn::crop(outLayer->name, inputPrimitives[0], outTensor, CldnnTensorFromIEDims(startOffset));
             m_topology->add(cropPrim);
             m_env.primitiveIDs[outLayer->name] = outLayer->name;
-            m_env.profilingIDs.insert(outLayer->name);
+            m_env.profilingIDs.push_back(outLayer->name);
             outputOffsets.push_back({ outLayer->name, CldnnTensorFromIEDims(startOffset) });
             for (size_t i = 0; i < inputDims.size(); i++) {
                 if (outLayer->dims[i] != inputDims[i]) {
@@ -1838,6 +1975,7 @@ std::cout << "Splitting layer: " << layer->name << "\n\tSize:" << CldnnTensorFro
         };
 
         for (auto& outLayer : splitLayer->outData) {
+            std::string outLayerName = splitLayer->type + ":" + outLayer->name;
             if (outLayer->dims.size() != startOffset.size()) {
                 THROW_CLDNN_EXCEPTION("Invalid dimesions in split layer: " << splitLayer->name << " output: " << outLayer->name);
             }
@@ -1854,11 +1992,11 @@ std::cout << "Splitting layer: " << layer->name << "\n\tSize:" << CldnnTensorFro
             std::reverse(reverseOffset.begin(), reverseOffset.end());
             auto offsetTensor = TensorFromIEDims(reverseOffset, 0);
 
-            auto cropPrim = cldnn::crop(outLayer->name, inputPrimitives[0], outTensor, offsetTensor);
-            m_env.primitiveIDs[outLayer->name] = outLayer->name;
+            auto cropPrim = cldnn::crop(outLayerName, inputPrimitives[0], outTensor, offsetTensor);
+            m_env.primitiveIDs[outLayerName] = outLayerName;
             m_topology->add(cropPrim);
-            m_env.profilingIDs.insert(outLayer->name);
-            InitProfileInfo(outLayer->name, "Crop", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
+            m_env.profilingIDs.push_back(outLayerName);
+            InitProfileInfo(outLayerName, "Crop");
 
             for (size_t i = 0; i < inputDims.size(); i++) {
                 if (outLayer->dims[i] != inputDims[i]) {
@@ -1868,7 +2006,7 @@ std::cout << "Splitting layer: " << layer->name << "\n\tSize:" << CldnnTensorFro
         }
 
         // set split as not_run
-        InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);  // Mark this layer as optimized out
+        InitProfileInfo(layer->name, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);  // Mark this layer as optimized out
 #endif  // _USE_SPLIT_PRIMITIVE
     }
 }
@@ -1893,9 +2031,9 @@ void CLDNNGraph::CreateFusedSplitConvMergePrimitive(InferenceEngine::CNNLayerPtr
         THROW_CLDNN_EXCEPTION("Expected single layer does not exist");
     }
     // Mark these layers as optimized out
-    InitProfileInfo(convLayer1->name, convLayer1->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
-    InitProfileInfo(convLayer2->name, convLayer2->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
-    InitProfileInfo(concatLayer->name, concatLayer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
+    InitProfileInfo(convLayer1->name, convLayer1->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
+    InitProfileInfo(convLayer2->name, convLayer2->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
+    InitProfileInfo(concatLayer->name, concatLayer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);
 
     // build the split conv primitive
     std::vector<cldnn::primitive_id> weightPrimID;
@@ -1913,7 +2051,8 @@ void CLDNNGraph::CreateFusedSplitConvMergePrimitive(InferenceEngine::CNNLayerPtr
     cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
                                            cldnn::spatial(convLayer1->_dilation[X_AXIS], convLayer1->_dilation[Y_AXIS]));
 
-    auto splitPrim = cldnn::convolution(splitLayer->name,
+    std::string splitLayerName = layer_type_name_ID(layer);
+    auto splitPrim = cldnn::convolution(splitLayerName,
                                         inputPrimitives[0],
                                         weightPrimID,
                                         biasPrimID,
@@ -1926,14 +2065,14 @@ void CLDNNGraph::CreateFusedSplitConvMergePrimitive(InferenceEngine::CNNLayerPtr
 
     layer = concatLayerPtr;
 
-    m_env.primitiveIDs[splitLayer->name]  = splitLayer->name;
-    m_env.primitiveIDs[convLayer1->name]  = splitLayer->name;
-    m_env.primitiveIDs[convLayer2->name]  = splitLayer->name;
-    m_env.primitiveIDs[concatLayer->name] = splitLayer->name;  // pair the last merged layer (concat or relu) with
+    m_env.primitiveIDs[splitLayerName]  = splitLayerName;
+    m_env.primitiveIDs[layer_type_name_ID(convLayer1)]  = splitLayerName;
+    m_env.primitiveIDs[layer_type_name_ID(convLayer2)]  = splitLayerName;
+    m_env.primitiveIDs[layer_type_name_ID(concatLayer)] = splitLayerName;  // pair the last merged layer (concat or relu) with
                                                                // this primitive name to be used as
                                                               // input prim for subsequent layers
     m_topology->add(splitPrim);
-    m_env.profilingIDs.insert(splitLayer->name);
+    m_env.profilingIDs.push_back(splitLayerName);
 }
 
 void CLDNNGraph::CreatePowerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -1944,45 +2083,46 @@ void CLDNNGraph::CreatePowerPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         THROW_CLDNN_EXCEPTION("Power Layer " << layer->name << "uses unsupported power value");
     }
 
+    std::string powerLayerName = layer_type_name_ID(layer);
     if ((powerLayer->scale == 1.0f) && (powerLayer->offset == 0.0f)) {
         if (powerLayer->power == 0.5f) {
-            auto activationPrim = cldnn::activation(powerLayer->name, inputPrimitives[0], activation_sqrt);
+            auto activationPrim = cldnn::activation(powerLayerName, inputPrimitives[0], activation_sqrt);
             m_topology->add(activationPrim);
-            m_env.profilingIDs.insert(powerLayer->name);
-            m_env.primitiveIDs[powerLayer->name] = powerLayer->name;
+            m_env.profilingIDs.push_back(powerLayerName);
+            m_env.primitiveIDs[powerLayerName] = powerLayerName;
         } else {
             // skip this layer
-            m_env.primitiveIDs[powerLayer->name] = inputPrimitives[0];  // register the previous primID for this layer too
-            InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::NOT_RUN);  // Mark this layer as not run
+            m_env.primitiveIDs[powerLayerName] = inputPrimitives[0];  // register the previous primID for this layer too
+            InitProfileInfo(layer->name, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::NOT_RUN);  // Mark this layer as not run
         }
     } else {
         // create scale primitive
-        auto scaleValuePrimName = powerLayer->name + m_scalesTag;
+        auto scaleValuePrimName = powerLayerName + m_scalesTag;
         AddSingleValuePrimitive(scaleValuePrimName,
             DataTypeFromPrecision(powerLayer->precision),
             powerLayer->scale);
 
         cldnn::primitive_id biasValuePrimName = "";
         if (powerLayer->offset != 0.0f) {
-            biasValuePrimName = powerLayer->name + m_biasesTag;
+            biasValuePrimName = powerLayerName + m_biasesTag;
             AddSingleValuePrimitive(biasValuePrimName,
                 DataTypeFromPrecision(powerLayer->precision),
                 powerLayer->offset);
         }
         auto scalePrim = cldnn::scale(
-            powerLayer->name,
+            powerLayerName,
             inputPrimitives[0],
             scaleValuePrimName,
             biasValuePrimName);
 
-        m_env.primitiveIDs[powerLayer->name] = powerLayer->name;
+        m_env.primitiveIDs[powerLayerName] = powerLayerName;
         m_topology->add(scalePrim);
-        m_env.profilingIDs.insert(powerLayer->name);
+        m_env.profilingIDs.push_back(powerLayerName);
 
         if (powerLayer->power == 0.5f) {
-            auto activationPrim = cldnn::activation(powerLayer->name+"_sqrt", powerLayer->name, activation_sqrt);
+            auto activationPrim = cldnn::activation(powerLayerName+"_sqrt", powerLayerName, activation_sqrt);
             m_topology->add(activationPrim);
-            m_env.profilingIDs.insert(powerLayer->name+"_sqrt");
+            m_env.profilingIDs.push_back(powerLayerName+"_sqrt");
         }
     }
 }
@@ -2007,10 +2147,11 @@ void CLDNNGraph::CreateSoftMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         isPrevFC = true;
     // end of WA
 
-    auto softmaxPrim = cldnn::softmax(softmaxLayer->name, inputPrimitives[0], SoftmaxDimensionFromIEAxis(softmaxLayer, isPrevFC));
-    m_env.primitiveIDs[softmaxLayer->name] = softmaxLayer->name;
+    std::string softmaxLayerName = layer_type_name_ID(layer);
+    auto softmaxPrim = cldnn::softmax(softmaxLayerName, inputPrimitives[0], SoftmaxDimensionFromIEAxis(softmaxLayer, isPrevFC));
+    m_env.primitiveIDs[softmaxLayerName] = softmaxLayerName;
     m_topology->add(softmaxPrim);
-    m_env.profilingIDs.insert(softmaxLayer->name);
+    m_env.profilingIDs.push_back(softmaxLayerName);
 }
 
 void CLDNNGraph::CreateFullyConnectedPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2018,13 +2159,14 @@ void CLDNNGraph::CreateFullyConnectedPrimitive(InferenceEngine::CNNLayerPtr &lay
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto fcLayer = dynamic_cast<InferenceEngine::FullyConnectedLayer *> (layer.get());
 
+    std::string fcLayerName = layer_type_name_ID(layer);
     // create bias primitive
     cldnn::primitive_id biasesPrimID = "";
     if (fcLayer->_biases != nullptr) {
-        biasesPrimID = fcLayer->name + m_biasesTag;
+        biasesPrimID = fcLayerName + m_biasesTag;
         CreatePrimitiveFromBlob(biasesPrimID,
             fcLayer->_biases,
-            cldnn::layout(m_networkPrecision, m_defaultFormat,
+            cldnn::layout(DataTypeFromPrecision(fcLayer->precision), m_defaultFormat,
                 cldnn::spatial(TensorValue(fcLayer->_out_num))));
     }
 
@@ -2032,7 +2174,7 @@ void CLDNNGraph::CreateFullyConnectedPrimitive(InferenceEngine::CNNLayerPtr &lay
     // gcc bug to resolve auto, at least for 5.4 version
     std::shared_ptr<Data> insData0 = fcLayer->insData[0].lock();
     IE_ASSERT(insData0 != nullptr);
-    cldnn::primitive_id weightsPrimID = fcLayer->name + m_weightsTag;
+    cldnn::primitive_id weightsPrimID = fcLayerName + m_weightsTag;
     cldnn::tensor weightsDims;
     switch (insData0->dims.size()) {
     case 4:
@@ -2048,18 +2190,18 @@ void CLDNNGraph::CreateFullyConnectedPrimitive(InferenceEngine::CNNLayerPtr &lay
     }
     CreatePrimitiveFromBlob(weightsPrimID,
                             fcLayer->_weights,
-                            cldnn::layout(m_networkPrecision, m_defaultFormat, weightsDims));
+                            cldnn::layout(DataTypeFromPrecision(fcLayer->precision), m_defaultFormat, weightsDims));
 
-    auto fcPrim = cldnn::fully_connected(fcLayer->name,
+    auto fcPrim = cldnn::fully_connected(fcLayerName,
                                          inputPrimitives[0],
                                          weightsPrimID,
                                          biasesPrimID,
                                          false,
                                          0.0f);
 
-    m_env.primitiveIDs[fcLayer->name] = fcLayer->name;
+    m_env.primitiveIDs[fcLayerName] = fcLayerName;
     m_topology->add(fcPrim);
-    m_env.profilingIDs.insert(fcLayer->name);
+    m_env.profilingIDs.push_back(fcLayerName);
 }
 
 void CLDNNGraph::CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2067,6 +2209,7 @@ void CLDNNGraph::CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto poolLayer = dynamic_cast<InferenceEngine::PoolingLayer *> (layer.get());
 
+    std::string poolLayerName = layer_type_name_ID(layer);
     auto allPads = getPaddings(*poolLayer);
     if (poolLayer->outData.size() > 1) {
         // max pooling with argmax
@@ -2119,7 +2262,7 @@ void CLDNNGraph::CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         m_env.primitiveIDs[argmaxOutputID] = argmaxPrimID;
 
         // create pooling primitive itself
-        auto poolPrim = cldnn::pooling(poolLayer->name,
+        auto poolPrim = cldnn::pooling(poolLayerName,
             inputPrimitives[0],
             argmaxPrimID,
             cldnn::pooling_mode::max_with_argmax,
@@ -2129,10 +2272,10 @@ void CLDNNGraph::CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
             { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]) },
             CldnnTensorFromIEDims(poolLayer->outData[0]->dims));
         m_topology->add(poolPrim);
-        m_env.primitiveIDs[realOutputID] = poolLayer->name;
+        m_env.primitiveIDs[realOutputID] = poolLayerName;
     } else {
         // regular pooling
-        auto poolPrim = cldnn::pooling(poolLayer->name,
+        auto poolPrim = cldnn::pooling(poolLayerName,
             inputPrimitives[0],
             PoolingModeFromIEPooling(poolLayer->_type, poolLayer->_exclude_pad),
             cldnn::spatial(TensorValue(poolLayer->_kernel[X_AXIS]), TensorValue(poolLayer->_kernel[Y_AXIS])),  // size
@@ -2141,18 +2284,19 @@ void CLDNNGraph::CreatePoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
             { 0, 0, -TensorValue(allPads.begin[X_AXIS]), -TensorValue(allPads.begin[Y_AXIS]) },
             CldnnTensorFromIEDims(poolLayer->outData[0]->dims));
     m_topology->add(poolPrim);
-        m_env.primitiveIDs[poolLayer->name] = poolLayer->name;
+        m_env.primitiveIDs[poolLayerName] = poolLayerName;
     }
 
-    m_env.profilingIDs.insert(poolLayer->name);
+    m_env.profilingIDs.push_back(poolLayerName);
 }
 
 void CLDNNGraph::CreateLRNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     ValidateLayer(layer, 1);
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
     auto lrnLayer = dynamic_cast<InferenceEngine::NormLayer *> (layer.get());
+    std::string lrnLayerName = layer_type_name_ID(layer);
     auto lrnPrim = cldnn::lrn(
-        lrnLayer->name,
+        lrnLayerName,
         inputPrimitives[0],
         lrnLayer->_size,
         static_cast<float>(lrnLayer->_k),
@@ -2160,9 +2304,9 @@ void CLDNNGraph::CreateLRNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         lrnLayer->_beta,
         lrnLayer->_isAcrossMaps ? cldnn_lrn_norm_region_across_channel : cldnn_lrn_norm_region_within_channel);
 
-    m_env.primitiveIDs[lrnLayer->name] = lrnLayer->name;
+    m_env.primitiveIDs[lrnLayerName] = lrnLayerName;
     m_topology->add(lrnPrim);
-    m_env.profilingIDs.insert(lrnLayer->name);
+    m_env.profilingIDs.push_back(lrnLayerName);
 }
 
 void CLDNNGraph::CreateActivationPrimitive(InferenceEngine::CNNLayerPtr &layer, const LayerType type) {
@@ -2186,6 +2330,10 @@ void CLDNNGraph::CreateActivationPrimitive(InferenceEngine::CNNLayerPtr &layer,
             activationType = ReLU6;
         } else if (activation_type == "clamp")  {
             activationType = Clamp;
+        } else if (activation_type == "exp")  {
+            activationType = Exp;
+        } else if (activation_type == "not")  {
+            activationType = Not;
         } else {
             THROW_CLDNN_EXCEPTION("Unsupported activation type (" + activation_type +
                                   ") in layer " + layer->name);
@@ -2230,15 +2378,26 @@ void CLDNNGraph::CreateActivationPrimitive(InferenceEngine::CNNLayerPtr &layer,
         params.b = layer->GetParamAsFloat("max");
         break;
     }
+    case Exp:
+    {
+        func = cldnn_activation_func_t::activation_exp;
+        break;
+    }
+    case Not:
+    {
+        func = cldnn_activation_func_t::activation_not;
+        break;
+    }
     default:
         THROW_CLDNN_EXCEPTION("Unsupported activation type (" + layer->type +
                               ") in layer " + layer->name);
     }
 
-    auto activationPrimitive = cldnn::activation(layer->name, inputPrimitives[0], func, params);
-    m_env.primitiveIDs[layer->name] = layer->name;
+    std::string layerName = layer_type_name_ID(layer);
+    auto activationPrimitive = cldnn::activation(layerName, inputPrimitives[0], func, params);
+    m_env.primitiveIDs[layerName] = layerName;
     m_topology->add(activationPrimitive);
-    m_env.profilingIDs.insert(layer->name);
+    m_env.profilingIDs.push_back(layerName);
 }
 
 void CLDNNGraph::CreateCopyPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2247,8 +2406,9 @@ void CLDNNGraph::CreateCopyPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     auto copyLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
 
     // Optimize out and just update references
-    m_env.primitiveIDs[copyLayer->name] = inputPrimitives[0];
-    InitProfileInfo(layer->name, layer->type, "None", InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);  // Mark this layer as optimized out
+    std::string layerName = layer_type_name_ID(layer);
+    m_env.primitiveIDs[layerName] = inputPrimitives[0];
+    InitProfileInfo(layerName, layer->type, false, InferenceEngine::InferenceEngineProfileInfo::OPTIMIZED_OUT);  // Mark this layer as optimized out
 }
 
 void CLDNNGraph::CreateUpsamplingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2260,16 +2420,17 @@ void CLDNNGraph::CreateUpsamplingPrimitive(InferenceEngine::CNNLayerPtr &layer)
     uint32_t numFilter = upsamplingLayer->GetParamAsUInt("num_filter");
     std::string sampleType = upsamplingLayer->GetParamAsString("sample_type");
 
+    std::string upsamplingLayerName = layer_type_name_ID(layer);
     auto upsamplingPrim = cldnn::upsampling(
-        upsamplingLayer->name,
+        upsamplingLayerName,
         inputPrimitives[0],
         scale,
         numFilter,
         UpsamplingTypeFromString(sampleType));
 
-    m_env.primitiveIDs[upsamplingLayer->name] = upsamplingLayer->name;
+    m_env.primitiveIDs[upsamplingLayerName] = upsamplingLayerName;
     m_topology->add(upsamplingPrim);
-    m_env.profilingIDs.insert(upsamplingLayer->name);
+    m_env.profilingIDs.push_back(upsamplingLayerName);
 }
 
 void CLDNNGraph::CreateResamplePrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2295,16 +2456,17 @@ void CLDNNGraph::CreateResamplePrimitive(InferenceEngine::CNNLayerPtr &layer) {
         THROW_CLDNN_EXCEPTION("Unsupported resampling type (" + sampleType + ") in layer " + layer->name);
     }
 
+    std::string resampleLayerName = layer_type_name_ID(layer);
     auto upsamplingPrim = cldnn::upsampling(
-        resampleLayer->name,
+        resampleLayerName,
         inputPrimitives[0],
         scale,
         inFeatures,
         cldnn::upsampling_sample_type::nearest);
 
-    m_env.primitiveIDs[resampleLayer->name] = resampleLayer->name;
+    m_env.primitiveIDs[resampleLayerName] = resampleLayerName;
     m_topology->add(upsamplingPrim);
-    m_env.profilingIDs.insert(resampleLayer->name);
+    m_env.profilingIDs.push_back(resampleLayerName);
 }
 
 void CLDNNGraph::CreateYOLO2RegionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2323,8 +2485,9 @@ void CLDNNGraph::CreateYOLO2RegionPrimitive(InferenceEngine::CNNLayerPtr &layer)
         mask_size = static_cast<uint32_t>(mask.size());
     }
 
+    std::string YOLOregionLayerName = layer_type_name_ID(layer);
     auto regionPrim = cldnn::region_yolo(
-        YOLOregionLayer->name,
+        YOLOregionLayerName,
         inputPrimitives[0],
         coords,
         classes,
@@ -2332,9 +2495,9 @@ void CLDNNGraph::CreateYOLO2RegionPrimitive(InferenceEngine::CNNLayerPtr &layer)
         mask_size,
         do_softmax);
 
-    m_env.primitiveIDs[YOLOregionLayer->name] = YOLOregionLayer->name;
+    m_env.primitiveIDs[YOLOregionLayerName] = YOLOregionLayerName;
     m_topology->add(regionPrim);
-    m_env.profilingIDs.insert(YOLOregionLayer->name);
+    m_env.profilingIDs.push_back(YOLOregionLayerName);
 }
 
 void CLDNNGraph::CreateYOLO2ReorgPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2343,14 +2506,15 @@ void CLDNNGraph::CreateYOLO2ReorgPrimitive(InferenceEngine::CNNLayerPtr &layer)
     auto YOLOreorgLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
     uint32_t stride = YOLOreorgLayer->GetParamAsUInt("stride");
 
+    std::string YOLOreorgLayerName = layer_type_name_ID(layer);
     auto reorgPrim = cldnn::reorg_yolo(
-        YOLOreorgLayer->name,
+        YOLOreorgLayerName,
         inputPrimitives[0],
         stride);
 
-    m_env.primitiveIDs[YOLOreorgLayer->name] = YOLOreorgLayer->name;
+    m_env.primitiveIDs[YOLOreorgLayerName] = YOLOreorgLayerName;
     m_topology->add(reorgPrim);
-    m_env.profilingIDs.insert(YOLOreorgLayer->name);
+    m_env.profilingIDs.push_back(YOLOreorgLayerName);
 }
 
 void CLDNNGraph::CreateArgMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2385,16 +2549,17 @@ void CLDNNGraph::CreateArgMaxPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         }
     }
 
+    std::string ArgMaxLayerName = layer_type_name_ID(layer);
     auto argmaxPrim = cldnn::arg_max_min(
-        ArgMaxLayer->name,
+        ArgMaxLayerName,
         inputPrimitives[0],
         otype,
         top_k,
         chosen_axis);
 
-    m_env.primitiveIDs[ArgMaxLayer->name] = ArgMaxLayer->name;
+    m_env.primitiveIDs[ArgMaxLayerName] = ArgMaxLayerName;
     m_topology->add(argmaxPrim);
-    m_env.profilingIDs.insert(ArgMaxLayer->name);
+    m_env.profilingIDs.push_back(ArgMaxLayerName);
 }
 
 void CLDNNGraph::CreateMaxUnpoolingPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2429,16 +2594,17 @@ void CLDNNGraph::CreateMaxUnpoolingPrimitive(InferenceEngine::CNNLayerPtr &layer
     uint32_t stride = UnpoolingLayer->GetParamAsUInt("stride");
     uint32_t kernel_size = UnpoolingLayer->GetParamAsUInt("kernel_size");
 
+    std::string UnpoolingLayerName = layer_type_name_ID(layer);
     auto unpoolingPrim = cldnn::max_unpooling(
-        UnpoolingLayer->name,
+        UnpoolingLayerName,
         real_input,
         argmax_mutable,
         cldnn::spatial(kernel_size, kernel_size),  // size
         cldnn::spatial(stride, stride) );          // stride
 
-    m_env.primitiveIDs[UnpoolingLayer->name] = UnpoolingLayer->name;
+    m_env.primitiveIDs[UnpoolingLayerName] = UnpoolingLayerName;
     m_topology->add(unpoolingPrim);
-    m_env.profilingIDs.insert(UnpoolingLayer->name);
+    m_env.profilingIDs.push_back(UnpoolingLayerName);
 }
 
 void CLDNNGraph::CreateMVNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2450,16 +2616,17 @@ void CLDNNGraph::CreateMVNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     bool normalize_variance = MvnLayer->GetParamsAsBool("normalize_variance", true);
     float eps = MvnLayer->GetParamAsFloat("eps", 1e-10f);
 
+    std::string MvnLayerName = layer_type_name_ID(layer);
     auto mvnPrim = cldnn::mvn(
-        MvnLayer->name,
+        MvnLayerName,
         inputPrimitives[0],
         across_channels,
         normalize_variance,
         eps);
 
-    m_env.primitiveIDs[MvnLayer->name] = MvnLayer->name;
+    m_env.primitiveIDs[MvnLayerName] = MvnLayerName;
     m_topology->add(mvnPrim);
-    m_env.profilingIDs.insert(MvnLayer->name);
+    m_env.profilingIDs.push_back(MvnLayerName);
 }
 
 void CLDNNGraph::CreateTilePrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2479,15 +2646,16 @@ void CLDNNGraph::CreateTilePrimitive(InferenceEngine::CNNLayerPtr &layer) {
             default: THROW_CLDNN_EXCEPTION("Unsupported tile axis: " << axis);
         }
     };
+    std::string tileLayerName = layer_type_name_ID(layer);
     auto tilePrim = cldnn::tile(
-        tileLayer->name,
+        tileLayerName,
         inputPrimitives[0],
         cldnnAxisFromIE(axis),
         tiles);
 
-    m_env.primitiveIDs[tileLayer->name] = tileLayer->name;
+    m_env.primitiveIDs[tileLayerName] = tileLayerName;
     m_topology->add(tilePrim);
-    m_env.profilingIDs.insert(tileLayer->name);
+    m_env.profilingIDs.push_back(tileLayerName);
 }
 
 void CLDNNGraph::CreatePadPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2532,17 +2700,18 @@ void CLDNNGraph::CreatePadPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     else
         THROW_CLDNN_EXCEPTION("Invalid border mode " << mode << " in layer " << padLayer->name);
 
+    std::string padLayerName = layer_type_name_ID(layer);
     auto tilePrim = cldnn::border(
-            padLayer->name,
+            padLayerName,
             inputPrimitives[0],
             pads_begin,
             pads_end,
             border_mode,
             pad_value);
 
-    m_env.primitiveIDs[padLayer->name] = padLayer->name;
+    m_env.primitiveIDs[padLayerName] = padLayerName;
     m_topology->add(tilePrim);
-    m_env.profilingIDs.insert(padLayer->name);
+    m_env.profilingIDs.push_back(padLayerName);
 }
 
 std::string get_string_id(size_t i) {
@@ -2557,10 +2726,11 @@ void CLDNNGraph::CreateLSTMCellPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     bool hasBias = false;
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
 
-    auto elementSize = cldnn::data_type_traits::size_of(m_networkPrecision);
-    cldnn::primitive_id weightID = layer->name + m_weightsTag;
-    cldnn::primitive_id recurrentID = layer->name + "_recurrent" + m_weightsTag;
-    cldnn::primitive_id biasID = layer->name + m_biasesTag;
+    auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(layer->precision));
+    std::string layerName = layer_type_name_ID(layer);
+    cldnn::primitive_id weightID = layerName + m_weightsTag;
+    cldnn::primitive_id recurrentID = layerName + "_recurrent" + m_weightsTag;
+    cldnn::primitive_id biasID = layerName + m_biasesTag;
     auto cellLayer = dynamic_cast<InferenceEngine::LSTMCell*> (layer.get());
 
     /* check incoming CNN layer and setup required variables */
@@ -2596,16 +2766,12 @@ void CLDNNGraph::CreateLSTMCellPrimitive(InferenceEngine::CNNLayerPtr &layer) {
             THROW_IE_EXCEPTION << "Wrong input shapes for LSTMCell Layer " << layer->name;
     }
 
-    /*
-     * Prepare weight/bias memory primitives:
-     *   - split weight blob into W and R
-     *   - rearrange gate order from FICO layout in IR to IOFC expected by clDNN
-     */
+    /* Prepare weight/bias memory primitives - split weight blob into W and R */
     {
         cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size));
         cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size));
-        cldnn::layout WLayout = cldnn::layout(m_networkPrecision, m_defaultFormat, wTensor);
-        cldnn::layout RLayout = cldnn::layout(m_networkPrecision, m_defaultFormat, rTensor);
+        cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor);
+        cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
 
         auto wmem = cldnn::memory::allocate(*(m_env.engine), WLayout);
         auto wtmpPointer = wmem.pointer<char>();  // implicitly maps buffer - unmap in destructor
@@ -2613,33 +2779,23 @@ void CLDNNGraph::CreateLSTMCellPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         auto rmem = cldnn::memory::allocate(*(m_env.engine), RLayout);
         auto rtmpPointer = rmem.pointer<char>();
 
-        // FICO -> IOFC
-        const std::vector<size_t> gate_offs{2, 0, 3, 1};
-
         auto wLayer = dynamic_cast<InferenceEngine::WeightableLayer *> (layer.get());
         auto pWeightsBlob = wLayer->_weights;
         auto blobBytes = static_cast<const char *>(pWeightsBlob->buffer());
         const size_t WchunkSz = lstm_input_size * elementSize;
         const size_t RchunkSz = lstm_hidden_size * elementSize;
 
-        for (int g = 0; g < 4; g++) {
-            auto wBytes = wtmpPointer.data() + gate_offs[g] * lstm_hidden_size * WchunkSz;
-            auto rBytes = rtmpPointer.data() + gate_offs[g] * lstm_hidden_size * RchunkSz;
-            for (int h = 0; h < lstm_hidden_size; h++) {
-                // copy "input size" elements to W
-                for (size_t b = 0; b < WchunkSz; b++) {
-                    wBytes[b] = blobBytes[b];
-                }
-                blobBytes += WchunkSz;
-                wBytes += WchunkSz;
+        auto wBytes = wtmpPointer.data();
+        auto rBytes = rtmpPointer.data();
 
-                // copy "lstm_hidden_size" elements to R
-                for (size_t b = 0; b < RchunkSz; b++) {
-                    rBytes[b] = blobBytes[b];
-                }
-                blobBytes += RchunkSz;
-                rBytes += RchunkSz;
-            }
+        for (int h = 0; h < 4 * lstm_hidden_size; h++) {
+            // copy "input size" elements to W
+            for (size_t b = 0; b < WchunkSz; b++)
+                *wBytes++ = *blobBytes++;
+
+            // copy "lstm_hidden_size" elements to R
+            for (size_t b = 0; b < RchunkSz; b++)
+                *rBytes++ = *blobBytes++;
         }
 
         m_topology->add(cldnn::data(weightID, wmem));
@@ -2649,71 +2805,63 @@ void CLDNNGraph::CreateLSTMCellPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         auto pBiasBlob = wLayer->_biases;
         if (pBiasBlob != nullptr) {
             cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1));
-            cldnn::layout BLayout = cldnn::layout(m_networkPrecision, m_defaultFormat, rTensor);
+            cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
 
             auto bmem = cldnn::memory::allocate(*(m_env.engine), BLayout);
             auto btmpPointer = bmem.pointer<char>();
 
             auto blobBytes = static_cast<const char *>(pBiasBlob->buffer());
             const size_t BchunkSz = lstm_hidden_size * elementSize;
+            auto bBytes = btmpPointer.data();
 
-            for (int g = 0; g < 4; g++) {
-                auto bBytes = btmpPointer.data() + gate_offs[g] * BchunkSz;
-                // copy "lstm_hidden_size" elements to B
-                for (size_t b = 0; b < BchunkSz; b++) {
-                    bBytes[b] = blobBytes[b];
-                }
-                blobBytes += BchunkSz;
-            }
+            for (size_t b = 0; b < 4 * BchunkSz; b++)
+                *bBytes++ = *blobBytes++;
 
             m_topology->add(cldnn::data(biasID, bmem));
             hasBias = true;
         }
     }
 
-    cldnn::primitive_id inReshapeID = layer->name + "_inReshape";
-    cldnn::primitive_id permuteID = layer->name + "_inputReorder";
-    cldnn::primitive_id inHiddenReshapeID = layer->name + "_inHiddenReshape";
+    cldnn::primitive_id inReshapeID = layerName + "_inReshape";
+    cldnn::primitive_id permuteID = layerName + "_inputReorder";
+    cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
 
     cldnn::tensor inputShape = { lstm_batch_size, 1, lstm_input_size, 1 };
     cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
-    cldnn::layout inputLayout = cldnn::layout(m_networkPrecision, cldnn::format::bfyx, inputShape);
+    cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape);
     m_topology->add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
     m_topology->add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
 
-    m_topology->add(cldnn::reshape(inHiddenReshapeID+"_1", inputPrimitives[1], hiddenStateShape));
-    m_topology->add(cldnn::reshape(inHiddenReshapeID+"_2", inputPrimitives[2], hiddenStateShape));
+    std::string hiddenInStr = inHiddenReshapeID + "_1";
+    std::string cellInStr = inHiddenReshapeID + "_2";
+    m_topology->add(cldnn::reshape(hiddenInStr, inputPrimitives[1], hiddenStateShape));
+    m_topology->add(cldnn::reshape(cellInStr, inputPrimitives[2], hiddenStateShape));
 
-    cldnn::tensor hiddenSz = cldnn::tensor{ 1, lstm_batch_size, lstm_hidden_size, 1 };
+    cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
     cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
-    std::string hiddenInStr = inHiddenReshapeID+"_1";
-    std::string cellInStr = inHiddenReshapeID+"_2";
 
-
-    std::string lstm_gemm_id = layer->name + "_lstm_gemm";
-    std::string lstm_elt_id = layer->name + "_lstm_elt";
-    std::string crop_id = layer->name + "_crop";
+    std::string lstm_gemm_id = layerName + "_lstm_gemm";
+    std::string lstm_elt_id = layerName + "_lstm_elt";
+    std::string crop_id = layerName + "_crop";
 
     m_topology->add(cldnn::lstm_gemm(lstm_gemm_id, permuteID,
                                      weightID, recurrentID,
                                      hasBias ? biasID : "",
                                      hiddenInStr));
-    m_topology->add(cldnn::lstm_elt(lstm_elt_id, lstm_gemm_id,
-                                    cellInStr));
-
-
+    m_topology->add(cldnn::lstm_elt(lstm_elt_id, lstm_gemm_id, cellInStr,
+                                    0, 0, {}, {}, cldnn_lstm_offset_order_fizo));
 
-
-    cldnn::primitive_id outputHiddenID = layer->name;
+    cldnn::primitive_id outputHiddenID = layerName;
     m_topology->add(cldnn::crop(outputHiddenID, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
-    m_env.primitiveIDs[outputHiddenID] = outputHiddenID;
-    m_env.primitiveIDs[layer->outData[0]->name] = outputHiddenID;
-
-    cldnn::primitive_id outputCellID = layer->outData[1]->name;
+    cldnn::primitive_id outputCellID = layer->type + ":" + layer->outData[1]->name;
     m_topology->add(cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz));
-    m_env.primitiveIDs[outputCellID] = outputCellID;
 
-    m_env.profilingIDs.insert(layer->name);
+    // output primitive IDs
+    m_env.primitiveIDs[outputHiddenID] = outputHiddenID;                                // LSTMCell:LSTMCell - "concat hidden"
+    m_env.primitiveIDs[layer->type + ":" + layer->outData[0]->name] = outputHiddenID;   // LSTMCell:LSTMCell:0 - hidden state
+    m_env.primitiveIDs[outputCellID] = outputCellID;                                    // LSTMCell:LSTMCell:1 - cell state
+
+    m_env.profilingIDs.push_back(layerName);
 }
 
 void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2722,15 +2870,17 @@ void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     bool hasInitialHidden = false, hasInitialCell = false, hasBias = false, isForward = true;
     auto inputPrimitives = GetPrevLayersPrimitives(layer);
 
-    auto elementSize = cldnn::data_type_traits::size_of(m_networkPrecision);
-    cldnn::primitive_id weightID = layer->name + m_weightsTag;
-    cldnn::primitive_id recurrentID = layer->name + "_recurrent" + m_weightsTag;
-    cldnn::primitive_id biasID = layer->name + m_biasesTag;
-    auto rnnLayer = dynamic_cast<InferenceEngine::RNNLayer*> (layer.get());
+    auto elementSize = cldnn::data_type_traits::size_of(DataTypeFromPrecision(layer->precision));
+    std::string layerName = layer_type_name_ID(layer);
+    cldnn::primitive_id weightID = layerName + m_weightsTag;
+    cldnn::primitive_id recurrentID = layerName + "_recurrent" + m_weightsTag;
+    cldnn::primitive_id biasID = layerName + m_biasesTag;
+    auto rnnLayer = dynamic_cast<InferenceEngine::RNNSequenceLayer*> (layer.get());
+    bool permute_input = (1 != rnnLayer->axis);
 
     /* check incoming CNN layer and setup required variables */
     {
-        if (rnnLayer->cellType != "LSTM")
+        if (rnnLayer->cellType != RNNSequenceLayer::LSTM)
          THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell";
 
         auto in_data0 = layer->insData[0].lock();
@@ -2740,7 +2890,7 @@ void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         auto in_dims0 = in_data0->dims;
         auto out_dims0 = layer->outData[0]->dims;
 
-        if (1 == rnnLayer->axis) {
+        if (!permute_input) {
             lstm_batch_size = in_dims0[2];
             lstm_sequence_len = in_dims0[1];
         } else {
@@ -2767,24 +2917,20 @@ void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
             hasInitialCell = true;
         }
 
-        if (rnnLayer->direction != RNNLayer::RNN_FWD && rnnLayer->direction != RNNLayer::RNN_BWD)
+        if (rnnLayer->direction != RNNSequenceLayer::FWD && rnnLayer->direction != RNNSequenceLayer::BWD)
             THROW_IE_EXCEPTION << "Support only forward and backward direction for RNN Layer " << layer->name;
-        isForward = rnnLayer->direction == RNNLayer::RNN_FWD;
+        isForward = rnnLayer->direction == RNNSequenceLayer::FWD;
 
         if (in_dims0.size() != 3 || in_dims1.size() != 2 || in_dims2.size() != 2)
             THROW_IE_EXCEPTION << "Wrong input shapes for RNN Layer " << layer->name;
     }
 
-    /*
-     * Prepare weight/bias memory primitives:
-     *   - split weight blob into W and R
-     *   - rearrange gate order from FICO layout in IR to IOFC expected by clDNN
-     */
+    /* Prepare weight/bias memory primitives - split weight blob into W and R */
     {
         cldnn::tensor wTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_input_size, 4 * lstm_hidden_size));
         cldnn::tensor rTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(lstm_hidden_size, 4 * lstm_hidden_size));
-        cldnn::layout WLayout = cldnn::layout(m_networkPrecision, m_defaultFormat, wTensor);
-        cldnn::layout RLayout = cldnn::layout(m_networkPrecision, m_defaultFormat, rTensor);
+        cldnn::layout WLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, wTensor);
+        cldnn::layout RLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
 
         auto wmem = cldnn::memory::allocate(*(m_env.engine), WLayout);
         auto wtmpPointer = wmem.pointer<char>();  // implicitly maps buffer - unmap in destructor
@@ -2792,33 +2938,23 @@ void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         auto rmem = cldnn::memory::allocate(*(m_env.engine), RLayout);
         auto rtmpPointer = rmem.pointer<char>();
 
-        // FICO -> IOFC
-        const std::vector<size_t> gate_offs{2, 0, 3, 1};
-
         auto wLayer = dynamic_cast<InferenceEngine::WeightableLayer *> (layer.get());
         auto pWeightsBlob = wLayer->_weights;
         auto blobBytes = static_cast<const char *>(pWeightsBlob->buffer());
         const size_t WchunkSz = lstm_input_size * elementSize;
         const size_t RchunkSz = lstm_hidden_size * elementSize;
 
-        for (int g = 0; g < 4; g++) {
-            auto wBytes = wtmpPointer.data() + gate_offs[g] * lstm_hidden_size * WchunkSz;
-            auto rBytes = rtmpPointer.data() + gate_offs[g] * lstm_hidden_size * RchunkSz;
-            for (int h = 0; h < lstm_hidden_size; h++) {
-                // copy "input size" elements to W
-                for (size_t b = 0; b < WchunkSz; b++) {
-                    wBytes[b] = blobBytes[b];
-                }
-                blobBytes += WchunkSz;
-                wBytes += WchunkSz;
+        auto wBytes = wtmpPointer.data();
+        auto rBytes = rtmpPointer.data();
 
-                // copy "lstm_hidden_size" elements to R
-                for (size_t b = 0; b < RchunkSz; b++) {
-                    rBytes[b] = blobBytes[b];
-                }
-                blobBytes += RchunkSz;
-                rBytes += RchunkSz;
-            }
+        for (int h = 0; h < 4 * lstm_hidden_size; h++) {
+            // copy "input size" elements to W
+            for (size_t b = 0; b < WchunkSz; b++)
+                *wBytes++ = *blobBytes++;
+
+            // copy "lstm_hidden_size" elements to R
+            for (size_t b = 0; b < RchunkSz; b++)
+                *rBytes++ = *blobBytes++;
         }
 
         m_topology->add(cldnn::data(weightID, wmem));
@@ -2828,22 +2964,17 @@ void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
         auto pBiasBlob = wLayer->_biases;
         if (pBiasBlob != nullptr) {
             cldnn::tensor bTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(1), cldnn::spatial(4 * lstm_hidden_size, 1));
-            cldnn::layout BLayout = cldnn::layout(m_networkPrecision, m_defaultFormat, rTensor);
+            cldnn::layout BLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), m_defaultFormat, rTensor);
 
             auto bmem = cldnn::memory::allocate(*(m_env.engine), BLayout);
             auto btmpPointer = bmem.pointer<char>();
 
             auto blobBytes = static_cast<const char *>(pBiasBlob->buffer());
             const size_t BchunkSz = lstm_hidden_size * elementSize;
+            auto bBytes = btmpPointer.data();
 
-            for (int g = 0; g < 4; g++) {
-                auto bBytes = btmpPointer.data() + gate_offs[g] * BchunkSz;
-                // copy "lstm_hidden_size" elements to B
-                for (size_t b = 0; b < BchunkSz; b++) {
-                    bBytes[b] = blobBytes[b];
-                }
-                blobBytes += BchunkSz;
-            }
+            for (size_t b = 0; b < 4 * BchunkSz; b++)
+                *bBytes++ = *blobBytes++;
 
             m_topology->add(cldnn::data(biasID, bmem));
             hasBias = true;
@@ -2853,13 +2984,19 @@ void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     std::vector<std::pair<cldnn::primitive_id, cldnn::tensor>> input_ids_offsets;
     std::vector<cldnn::primitive_id> output_ids_offsets;
 
-    cldnn::primitive_id inReshapeID = layer->name + "_inReshape";
-    cldnn::primitive_id permuteID = layer->name + "_inputReorder";
-    cldnn::primitive_id inHiddenReshapeID = layer->name + "_inHiddenReshape";
+    cldnn::primitive_id inReshapeID = layerName + "_inReshape";
+    cldnn::primitive_id permuteID = layerName + "_inputReorder";
+    cldnn::primitive_id inHiddenReshapeID = layerName + "_inHiddenReshape";
+
+    cldnn::tensor inputShape;
 
-    cldnn::tensor inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 };
+    if (permute_input) {
+        inputShape = { lstm_sequence_len, lstm_batch_size, lstm_input_size, 1 };
+    } else {
+        inputShape = { lstm_batch_size, lstm_sequence_len, lstm_input_size, 1 };
+    }
     cldnn::tensor hiddenStateShape = { lstm_batch_size, 1, lstm_hidden_size, 1 };
-    cldnn::layout inputLayout = cldnn::layout(m_networkPrecision, cldnn::format::bfyx, inputShape);
+    cldnn::layout inputLayout = cldnn::layout(DataTypeFromPrecision(layer->precision), cldnn::format::bfyx, inputShape);
     m_topology->add(cldnn::reshape(inReshapeID, inputPrimitives[0], inputShape));
     m_topology->add(cldnn::reorder(permuteID, inReshapeID, inputLayout));
 
@@ -2869,18 +3006,24 @@ void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
     for (int i = 0; i < lstm_sequence_len; ++i)
         input_ids_offsets.push_back({ get_string_id(i), {0, i, 0, 0} });
 
-    cldnn::primitive_id inputSplitID = layer->name + "_inputSplit";
-    m_topology->add(cldnn::split(inputSplitID, permuteID, input_ids_offsets));
+    cldnn::primitive_id inputSplitID = layerName + "_inputSplit";
 
-    cldnn::tensor hiddenSz = cldnn::tensor{ 1, lstm_batch_size, lstm_hidden_size, 1 };
+    if (permute_input) {
+        m_topology->add(cldnn::permute(layerName + "_inputSwap", permuteID, { 1, 0, 2, 3 }));
+        m_topology->add(cldnn::split(inputSplitID, layerName + "_inputSwap", input_ids_offsets));
+    } else {
+        m_topology->add(cldnn::split(inputSplitID, permuteID, input_ids_offsets));
+    }
+
+    cldnn::tensor hiddenSz = cldnn::tensor{ lstm_batch_size, 1, lstm_hidden_size, 1 };
     cldnn::tensor cellCropSz = cldnn::tensor{0, 1, 0, 0};
     std::string hiddenStr = hasInitialHidden ? inHiddenReshapeID+"_1" : "";
     std::string cellStr = hasInitialCell ? inHiddenReshapeID+"_2" : "";
 
     for (int i = 0; i < lstm_sequence_len; ++i) {
-        std::string lstm_gemm_id = layer->name + "_lstm_gemm" + get_string_id(i);
-        std::string lstm_elt_id = layer->name + "_lstm_elt" + get_string_id(i);
-        std::string crop_id = layer->name + "_crop" + get_string_id(i);
+        std::string lstm_gemm_id = layerName + "_lstm_gemm" + get_string_id(i);
+        std::string lstm_elt_id = layerName + "_lstm_elt" + get_string_id(i);
+        std::string crop_id = layerName + "_crop" + get_string_id(i);
 
         int seqIdx = isForward ? i : lstm_sequence_len - 1 - i;
         m_topology->add(cldnn::lstm_gemm(lstm_gemm_id, inputSplitID + ":" + get_string_id(seqIdx),
@@ -2888,54 +3031,46 @@ void CLDNNGraph::CreateRNNPrimitive(InferenceEngine::CNNLayerPtr &layer) {
                                             hasBias ? biasID : "",
                                             hiddenStr));
         m_topology->add(cldnn::lstm_elt(lstm_elt_id, lstm_gemm_id,
-                                            cellStr));
+                                            cellStr, 0, 0, {}, {},
+                                            cldnn_lstm_offset_order_fizo));
 
         hiddenStr = crop_id + ":hidden";
-        m_topology->add(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{0, 0, 0, 0}));
+        cellStr = crop_id + ":cell";
+        m_topology->add(cldnn::crop(hiddenStr, lstm_elt_id, hiddenSz, cldnn::tensor{ 0, 0, 0, 0 }));
         output_ids_offsets.push_back(hiddenStr);
 
         if (i < lstm_sequence_len - 1) {
-            cellStr = crop_id + ":cell";
             m_topology->add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
         } else {
             // last hidden state crop (output 2)
             if (layer->outData.size() > 1) {
-                cldnn::primitive_id outputHiddenID = layer->outData[1]->name;
+                cldnn::primitive_id outputHiddenID = layer->type + ":" + layer->outData[1]->name;
                 m_env.primitiveIDs[hiddenStr] = hiddenStr;
                 m_env.primitiveIDs[outputHiddenID] = hiddenStr;
             }
 
             // last cell state crop (output 3)
             if (layer->outData.size() > 2) {
-                cldnn::primitive_id outputCellID = layer->outData[2]->name;
-                auto cropPrim = cldnn::crop(outputCellID, lstm_elt_id, hiddenSz, cellCropSz);
-                m_topology->add(cropPrim);
-                m_env.primitiveIDs[outputCellID] = outputCellID;
+                m_topology->add(cldnn::crop(cellStr, lstm_elt_id, hiddenSz, cellCropSz));
+                cldnn::primitive_id outputCellID = layer->type + ":" + layer->outData[2]->name;
+                m_env.primitiveIDs[cellStr] = cellStr;
+                m_env.primitiveIDs[outputCellID] = cellStr;
             }
         }
     }
 
     if (!isForward) std::reverse(output_ids_offsets.begin(), output_ids_offsets.end());
 
-    // main output (concatenated hidden)
-    cldnn::primitive_id concatID = layer->name + "_outputConcat";
-    m_topology->add(cldnn::concatenation(concatID, output_ids_offsets, cldnn::concatenation::along_f));
-
-    // permute output to [1, batch, sequence, hidden_size]
-    cldnn::tensor outputTensor;
-    if (1 == rnnLayer->axis) {
-        outputTensor = cldnn::tensor(cldnn::batch(1),   cldnn::feature(lstm_batch_size), cldnn::spatial(lstm_hidden_size, lstm_sequence_len));
+    if (permute_input) {
+        m_topology->add(cldnn::concatenation(layerName + "_outputConcat", output_ids_offsets, cldnn::concatenation::along_f));
+        m_topology->add(cldnn::permute(layerName, layerName + "_outputConcat", { 1, 0, 2, 3 }));
     } else {
-        outputTensor = cldnn::tensor(cldnn::batch(1), cldnn::feature(lstm_sequence_len), cldnn::spatial(lstm_hidden_size, lstm_batch_size));
+        m_topology->add(cldnn::concatenation(layerName, output_ids_offsets, cldnn::concatenation::along_f));
     }
-    cldnn::layout outputLayout = cldnn::layout(m_networkPrecision, cldnn::format::bfyx, outputTensor);
-    cldnn::primitive_id outReshapeID = layer->name + "_outReshape";
-    m_topology->add(cldnn::reshape(outReshapeID, concatID, outputTensor));
-    m_topology->add(cldnn::reorder(layer->name, outReshapeID, outputLayout));
 
-    m_env.primitiveIDs[layer->name] = layer->name;
-    m_env.primitiveIDs[layer->outData[0]->name] = layer->name;
-    m_env.profilingIDs.insert(layer->name);
+    m_env.primitiveIDs[layerName] = layerName;
+    m_env.primitiveIDs[layer->type + ":" + layer->outData[0]->name] = layerName;
+    m_env.profilingIDs.push_back(layerName);
 }
 
 void CLDNNGraph::AddConstantBlobInput(InferenceEngine::CNNLayerPtr &layer) {
@@ -2952,7 +3087,8 @@ void CLDNNGraph::AddConstantBlobInput(InferenceEngine::CNNLayerPtr &layer) {
             break;
     case 2: constTensor = cldnn::tensor(TensorValue(constDims[1]), TensorValue(constDims[0]), 1, 1);
             break;
-        case 1:  // not implemented yet.
+    case 1: constTensor = cldnn::tensor(TensorValue(constDims[0]), 1, 1, 1);
+            break;
         default: THROW_CLDNN_EXCEPTION("Invalid constant blob dimensions");
     }
 
@@ -2962,23 +3098,10 @@ void CLDNNGraph::AddConstantBlobInput(InferenceEngine::CNNLayerPtr &layer) {
         constTensor);
 
     size_t bytes = constLayout.bytes_count();
-    cldnn::primitive_id constPrimID = layer->name;
-
-    /* clDNN Constant Propagator bug WA - use MutableData primitive instead of Data
-       to prevent FP16 -> FP32 conversion loss and crash */
-    // CreatePrimitiveFromBlob(constPrimID, constBlob, constLayout);
-    auto mem = cldnn::memory::allocate(*(m_env.engine), constLayout);
-    auto tmpPointer = mem.pointer<char>();  // implicitly maps buffer - unmap in destructor
-    auto buf = tmpPointer.data();
-    auto bufSize = constLayout.bytes_count();
+    cldnn::primitive_id constPrimID = layer_type_name_ID(layer);
 
-    auto data = static_cast<const char *>(constBlob->buffer());
-    for (size_t i = 0; i < bufSize; i++) {
-        buf[i] = data[i];
-    }
-    m_topology->add(cldnn::mutable_data(constPrimID, mem));
-
-    m_env.primitiveIDs[layer->name] = constPrimID;
+    CreatePrimitiveFromBlob(constPrimID, constBlob, constLayout);
+    m_env.primitiveIDs[constPrimID] = constPrimID;
 }
 
 void CLDNNGraph::CreateConvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer) {
@@ -2998,20 +3121,202 @@ void CLDNNGraph::CreateConvolutionPrimitive(InferenceEngine::CNNLayerPtr &layer)
     cldnn::tensor dilation = cldnn::tensor(cldnn::batch(1), cldnn::feature(1),
                                            cldnn::spatial(convLayer->_dilation[X_AXIS], convLayer->_dilation[Y_AXIS]));
 
-    auto convPrim = cldnn::convolution(convLayer->name,
-                                       inputPrimitives[0],
-                                       weightPrimID,
-                                       biasPrimID,
-                                       stride,
-                                       padding,
-                                       dilation,
-                                       false,
-                                       0.0f,
-                                       CldnnTensorFromIEDims(convLayer->outData[0]->dims));
+    std::string convLayerName = layer_type_name_ID(layer);
+    if (convLayer->_group >= 16) {
+        auto convPrim = cldnn::convolution(convLayerName,
+                                           inputPrimitives[0],
+                                           weightPrimID,
+                                           biasPrimID,
+                                           convLayer->_group,
+                                           stride,
+                                           padding,
+                                           dilation,
+                                           false,
+                                           0.0,
+                                           CldnnTensorFromIEDims(convLayer->outData[0]->dims));
+        m_topology->add(convPrim);
+    } else {
+        auto convPrim = cldnn::convolution(convLayerName,
+                                           inputPrimitives[0],
+                                           weightPrimID,
+                                           biasPrimID,
+                                           stride,
+                                           padding,
+                                           dilation,
+                                           false,
+                                           0.0f,
+                                           CldnnTensorFromIEDims(convLayer->outData[0]->dims));
+        m_topology->add(convPrim);
+    }
+    m_env.primitiveIDs[convLayerName] = convLayerName;
+    m_env.profilingIDs.push_back(convLayerName);
+}
+
+void CLDNNGraph::CreateGatherPrimitive(InferenceEngine::CNNLayerPtr &layer) {
+    ValidateLayer(layer, 2);
+
+    auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    auto gatherLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
+
+    int axis = gatherLayer->GetParamAsInt("axis", 0);
+
+    // Be careful, TensorFlow consist negative axis interpretation bug. Here: -3 = b, -2 = f, -1 = y, but must be -3 = f, -2 = y, -1 = x
+    auto cldnnAxisFromIE = [](int axis) {
+        switch (axis) {
+            case 0: return cldnn::gather::gather_axis::along_b;
+            case 1: return cldnn::gather::gather_axis::along_f;
+            case 2: return cldnn::gather::gather_axis::along_y;
+            case 3: return cldnn::gather::gather_axis::along_x;
+            case -1: return cldnn::gather::gather_axis::along_y;
+            case -2: return cldnn::gather::gather_axis::along_f;
+            case -3: return cldnn::gather::gather_axis::along_b;
+            default: THROW_CLDNN_EXCEPTION("Unsupported gather axis: " << axis);
+        }
+    };
 
-    m_env.primitiveIDs[convLayer->name] = convLayer->name;
-    m_topology->add(convPrim);
-    m_env.profilingIDs.insert(convLayer->name);
+    std::string gatherLayerName = layer_type_name_ID(layer);
+    auto gatherPrim = cldnn::gather(
+            gatherLayerName,
+            inputPrimitives[0],
+            inputPrimitives[1],
+            cldnnAxisFromIE(axis),
+            CldnnTensorFromIEDims(gatherLayer->outData[0]->dims));
+
+    m_env.primitiveIDs[gatherLayerName] = gatherLayerName;
+    m_topology->add(gatherPrim);
+    m_env.profilingIDs.push_back(gatherLayerName);
+}
+
+void CLDNNGraph::CreateDepthToSpacePrimitive(InferenceEngine::CNNLayerPtr &layer) {
+    ValidateLayer(layer, 1);
+
+    auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    auto depthToSpace = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
+
+    size_t blockSize = depthToSpace->GetParamAsInt("block_size", 2);
+
+    if (depthToSpace->input().get()->dims.size() != 4)
+        THROW_CLDNN_EXCEPTION("Unsupported size of tensor " << depthToSpace->input().get()->dims.size());
+
+    size_t blockSizeSquare = blockSize * blockSize;
+
+    if (depthToSpace->input().get()->dims[2] % blockSizeSquare != 0)
+        THROW_CLDNN_EXCEPTION("The depth of the input tensor must be divisible by squared block size = " << blockSizeSquare);
+
+    std::string depthToSpaceName = layer_type_name_ID(layer);
+    auto depthToSpacePrim = cldnn::depth_to_space(
+            depthToSpaceName,
+            inputPrimitives[0],
+            blockSize);
+
+    m_env.primitiveIDs[depthToSpaceName] = depthToSpaceName;
+    m_topology->add(depthToSpacePrim);
+    m_env.profilingIDs.push_back(depthToSpaceName);
+}
+
+void CLDNNGraph::CreateShuffleChannelsPrimitive(InferenceEngine::CNNLayerPtr &layer) {
+    ValidateLayer(layer, 1);
+
+    auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    auto shuffleChannels = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
+    const int32_t numberOfDims = shuffleChannels->input()->getDims().size();
+
+    int32_t group = shuffleChannels->GetParamAsInt("group", 1);
+    int32_t axis = shuffleChannels->GetParamAsInt("axis", 1);
+
+    if (axis < 0)
+        axis += numberOfDims;
+
+    if (axis < 0 || axis >= numberOfDims)
+        THROW_CLDNN_EXCEPTION("Incorrect axis value! Actual axis is" + std::to_string(group));
+
+    if (group < 1)
+        THROW_CLDNN_EXCEPTION("Invalid group size value (should equal at least one). Actual block size is" +
+                                       std::to_string(group));
+
+    if (shuffleChannels->input().get()->getDims()[axis] % group != 0)
+        THROW_CLDNN_EXCEPTION("Group parameter must evenly divide the channel dimension. Actual group size is " +
+                                       std::to_string(axis));
+
+    std::string shuffleChannelsName = layer_type_name_ID(layer);
+    auto shuffleChannelsPrim = cldnn::shuffle_channels(
+            shuffleChannelsName,
+            inputPrimitives[0],
+            group,
+            axis);
+
+    m_env.primitiveIDs[shuffleChannelsName] = shuffleChannelsName;
+    m_topology->add(shuffleChannelsPrim);
+    m_env.profilingIDs.push_back(shuffleChannelsName);
+}
+
+void CLDNNGraph::CreateStridedSlicePrimitive(InferenceEngine::CNNLayerPtr &layer) {
+    auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    auto stridedSliceLayer = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
+
+    auto tmp = stridedSliceLayer->GetParamAsUInts("end_mask");
+    std::vector<uint8_t> end_mask(tmp.begin(), tmp.end());
+    tmp = stridedSliceLayer->GetParamAsUInts("begin_mask");
+    std::vector<uint8_t> begin_mask(tmp.begin(), tmp.end());
+    tmp = stridedSliceLayer->GetParamAsUInts("new_axis_mask");
+    std::vector<uint8_t> new_axis_mask(tmp.begin(), tmp.end());
+    tmp = stridedSliceLayer->GetParamAsUInts("shrink_axis_mask");
+    std::vector<uint8_t> shrink_axis_mask(tmp.begin(), tmp.end());
+
+    std::string stridedSliceLayerName = layer_type_name_ID(layer);
+    auto stridedSlicePrim = cldnn::strided_slice(
+            stridedSliceLayerName,
+            inputPrimitives[0], inputPrimitives[1], inputPrimitives[2], inputPrimitives[3],
+            begin_mask, end_mask, new_axis_mask, shrink_axis_mask);
+
+    m_env.primitiveIDs[stridedSliceLayerName] = stridedSliceLayerName;
+    m_topology->add(stridedSlicePrim);
+    m_env.profilingIDs.push_back(stridedSliceLayerName);
+}
+
+void CLDNNGraph::CreateReverseSequencePrimitive(InferenceEngine::CNNLayerPtr &layer) {
+    ValidateLayer(layer, 2);
+
+    auto inputPrimitives = GetPrevLayersPrimitives(layer);
+    auto reverseSequence = dynamic_cast<InferenceEngine::GenericLayer*> (layer.get());
+    const int32_t numberOfDims = reverseSequence->input()->getDims().size();
+
+    const auto input = reverseSequence->insData[0].lock()->getDims();
+    const auto sequence_lengths = reverseSequence->insData[1].lock()->getDims();
+
+    int32_t batch_axis = reverseSequence->GetParamAsInt("batch_axis", 0);
+    int32_t seq_axis = reverseSequence->GetParamAsInt("seq_axis", 1);
+
+    if (batch_axis < 0)
+        batch_axis += input.size();
+
+    if (seq_axis < 0)
+        seq_axis += input.size();
+
+    if (batch_axis == seq_axis)
+        THROW_CLDNN_EXCEPTION("Batch axis and sequence axis should not be equal\n");
+
+    if (seq_axis < 0 || seq_axis >= input.size())
+        THROW_CLDNN_EXCEPTION("Incorrect Sequence axis value! Actual axis is " + std::to_string(seq_axis));
+
+    if (batch_axis < 0 || batch_axis >= input.size())
+        THROW_CLDNN_EXCEPTION("Incorrect Sequence axis value! Actual axis is " + std::to_string(batch_axis));
+
+    if (sequence_lengths[0] != input[batch_axis])
+        THROW_CLDNN_EXCEPTION("Sequence lengths must be a vector of length " + std::to_string(input[batch_axis])
+                            + "! Actual axis is " + std::to_string(sequence_lengths[0]));
+
+    std::string reverseSequenceLayerName = layer_type_name_ID(layer);
+    auto reverseSequencePrim = cldnn::reverse_sequence(
+            reverseSequenceLayerName,
+            inputPrimitives[0],
+            inputPrimitives[1],
+            seq_axis,
+            batch_axis);
+
+    m_env.primitiveIDs[reverseSequenceLayerName] = reverseSequenceLayerName;
+    m_topology->add(reverseSequencePrim);
+    m_env.profilingIDs.push_back(reverseSequence->name);
 }
 
 bool CLDNNGraph::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitLayer) const {
@@ -3063,7 +3368,7 @@ bool CLDNNGraph::IsValidSplitConvMerge(const InferenceEngine::SplitLayer *splitL
     return true;
 }
 
-void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo) {
+void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo, Precision inputPrecision) {
     // first create and add the input layout
     auto inputDims = inputInfo->getDims();
     InferenceEngine::Layout l = inputInfo->getTensorDesc().getLayout();
@@ -3091,7 +3396,7 @@ void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo) {
             break;
         case 3:
             if (InferenceEngine::Layout::CHW == l) {
-                dataTensor = cldnn::tensor(TensorValue(inputDims[2]), TensorValue(inputDims[1]), TensorValue(inputDims[0]), 1);
+                dataTensor = cldnn::tensor(TensorValue(inputDims[2]), TensorValue(inputDims[1]), 1, TensorValue(inputDims[0]));
             } else {
                 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 3D input " + inputInfo->name());
             }
@@ -3105,18 +3410,21 @@ void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo) {
                 THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(l) << ") in 2D input " + inputInfo->name());
             }
             break;
-        case 1:  // not implemented yet.
+        case 1:
+            dataTensor = cldnn::tensor(TensorValue(inputDims[0]), 1, 1, 1);
+            break;
         default: THROW_CLDNN_EXCEPTION("Invalid data dimensions");
     }
 
     cldnn::layout inputLayout(DataTypeFromPrecision(inputInfo->getInputPrecision()),
         FormatFromLayout(l),
         dataTensor);
-    auto inputName = inputInfo->name();
-    m_topology->add(cldnn::input_layout(inputName, inputLayout));
 
     // save the input dims
-    m_env.inputLayouts.insert({ inputName, inputLayout });
+    m_env.inputLayouts.insert({ inputInfo->name(), inputLayout });
+
+    auto inputName = "Input:" + inputInfo->name();
+    m_topology->add(cldnn::input_layout(inputName, inputLayout));
 
     // create preprocess primitive for this input
     auto preProcess = inputInfo->getPreProcess();
@@ -3124,7 +3432,7 @@ void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo) {
     size_t meanChannels = preProcess.getNumberOfChannels();
     inputLayout.format = m_defaultFormat;
     inputLayout.size = inputLayout.size.transform(m_defaultFormat, 1);
-    inputLayout.data_type = m_networkPrecision;
+    inputLayout.data_type = DataTypeFromPrecision(inputPrecision);
     auto preprocessPrimID = inputName + m_preProcessTag;
 
     if ((meanChannels > 0) &&
@@ -3144,8 +3452,8 @@ void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo) {
             }
         }
         m_topology->add(cldnn::reorder(preprocessPrimID, inputName, inputLayout, meanValues));
-        m_env.profilingIDs.insert(preprocessPrimID);
-        InitProfileInfo(preprocessPrimID, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
+        m_env.profilingIDs.push_back(preprocessPrimID);
+        InitProfileInfo(preprocessPrimID, "Reorder");
     }
     break;
 
@@ -3189,8 +3497,8 @@ void CLDNNGraph::AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo) {
             inputName,
             inputLayout,
             inputName + m_meanValuesTag));
-        m_env.profilingIDs.insert(preprocessPrimID);
-        InitProfileInfo(preprocessPrimID, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
+        m_env.profilingIDs.push_back(preprocessPrimID);
+        InitProfileInfo(preprocessPrimID, "Reorder");
     }
     break;
 
@@ -3212,12 +3520,18 @@ std::vector<cldnn::primitive_id> CLDNNGraph::GetPrevLayersPrimitives(const Infer
             THROW_CLDNN_EXCEPTION("Nonexistent input for layer: " << layer->name);
         }
         auto prevCreator = prevData->creatorLayer.lock();
-        auto prevName = prevCreator ? prevCreator->name : prevData->name;
-        if (prevCreator && prevCreator->outData.size() > 1) {
-            inputPrimitives.push_back(m_env.primitiveIDs.at(prevData->name));
+        std::string prevName;
+
+        if (prevCreator) {
+            prevName = prevCreator->type + ":";
+            if (prevCreator->outData.size() > 1)
+                prevName += prevData->name;
+            else
+                prevName += prevCreator->name;
         } else {
-            inputPrimitives.push_back(m_env.primitiveIDs.at(prevName));
+            prevName = prevData->name;
         }
+        inputPrimitives.push_back(m_env.primitiveIDs.at(prevName));
     }
     return inputPrimitives;
 }
@@ -3230,12 +3544,21 @@ void CLDNNGraph::AddOutputPrimitive(std::string outputName, const InferenceEngin
         outputData->layout != InferenceEngine::NC) {
         THROW_CLDNN_EXCEPTION("Unsupported layout (" << DebugOptions::IELayoutToString(outputData->layout) << ") in output: " << outputName);
     }
+
+    auto outputCreator = outputData->getCreatorLayer().lock();
+    std::string outLayerName = outputCreator->type + ":";
+
+    if (outputCreator->outData.size() > 1)
+        outLayerName += outputName;
+    else
+        outLayerName += outputCreator->name;
+
     auto outputReorderID = outputName + m_postProcessTag;
     Precision precision = outputPrecision == Precision::UNSPECIFIED ? outputData->getPrecision() : outputPrecision;
 
     // Find correct output ID. Start with name stored in IR.
-    std::string outputID = outputName;
-    std::string finalID = m_env.primitiveIDs.at(outputName);
+    std::string outputID = outLayerName;
+    std::string finalID = m_env.primitiveIDs.at(outLayerName);
 
     while (outputID != finalID) {
         auto prim = m_env.primitiveIDs.find(finalID);
@@ -3251,8 +3574,8 @@ void CLDNNGraph::AddOutputPrimitive(std::string outputName, const InferenceEngin
         FormatFromLayout(outputData->getLayout()),
         DataTypeFromPrecision(precision)));
     m_env.primitiveIDs[outputName] = outputReorderID;
-    m_env.profilingIDs.insert(outputReorderID);
-    InitProfileInfo(outputReorderID, "Reorder", "GPU", InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
+    m_env.profilingIDs.push_back(outputReorderID);
+    InitProfileInfo(outputReorderID, "Reorder");
     m_env.outputDims[outputName] = outputData->dims;
     m_env.prevPrimitiveIDs[outputReorderID] = {outputName};
 }
@@ -3293,6 +3616,8 @@ cldnn::data_types CLDNNGraph::DataTypeFromPrecision(InferenceEngine::Precision p
         return cldnn::data_types::f16;
     case Precision::U8:
         return cldnn::data_types::u8;
+    case Precision::I32:
+        return cldnn::data_types::i32;
     default:
         THROW_IE_EXCEPTION << PARAMETER_MISMATCH_str << "The plugin does not support " << p.name() << " precision";
         break;
@@ -3304,6 +3629,7 @@ cldnn::format CLDNNGraph::FormatFromLayout(InferenceEngine::Layout l) {
     case InferenceEngine::Layout::NCHW:
     case InferenceEngine::Layout::NC:
     case InferenceEngine::Layout::CHW:
+    case InferenceEngine::Layout::C:
         return cldnn::format::bfyx;
     case InferenceEngine::Layout::NHWC:
         return cldnn::format::byxf;
@@ -3371,7 +3697,7 @@ void CLDNNGraph::CreateGenericLayerBlobPrimitives(const InferenceEngine::Generic
             THROW_CLDNN_EXCEPTION("Unhandled blob dim in layer " + layer->name);
         }
         CreatePrimitiveFromBlob(
-            layer->name + "_" + blob.first + m_weightsTag,
+            layer->type + ":" + layer->name + "_" + blob.first + m_weightsTag,
             blob.second,
             cldnn::layout(
                 DataTypeFromPrecision(blob.second->precision()),
@@ -3412,12 +3738,15 @@ CLDNNGraph::CreateInferRequestImpl(InputsDataMap networkInputs, OutputsDataMap n
 
 void CLDNNGraph::InitProfileInfo(const std::string& layerName,
                                  const std::string& layerType,
-                                 const std::string& execType,
+                                 bool isCPU,
                                  InferenceEngine::InferenceEngineProfileInfo::LayerStatus status) {
-    m_env.perfMap[layerName].status = status;
-    m_env.perfMap[layerName].cpu_uSec = m_env.perfMap[layerName].realTime_uSec = 0;
-    layerType.copy(m_env.perfMap[layerName].layer_type, layerType.length());
-    execType.copy(m_env.perfMap[layerName].exec_type, execType.length());
+    m_env.perfMap[layerType + ":" + layerName].first = layerName;
+    auto& perfEntry = m_env.perfMap[layerType + ":" + layerName].second;
+    perfEntry.layerType = layerType;
+    perfEntry.status = status;
+    perfEntry.cpu_uSec = perfEntry.realTime_uSec = 0;
+    perfEntry.isCPU = isCPU;
+    perfEntry.status = status;
 }
 
 };  // namespace CLDNNPlugin
diff --git a/inference-engine/src/cldnn_engine/cldnn_graph.h b/inference-engine/src/cldnn_engine/cldnn_graph.h
index c26b60ad2..0ea064975 100644
--- a/inference-engine/src/cldnn_engine/cldnn_graph.h
+++ b/inference-engine/src/cldnn_engine/cldnn_graph.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -9,6 +9,7 @@
 #include <set>
 #include <memory>
 #include <string>
+#include <utility>
 #include "ie_blob.h"
 #include "ie_plugin.hpp"
 #include "cpp/ie_cnn_network.h"
@@ -29,13 +30,30 @@
 
 namespace CLDNNPlugin {
 
+struct PerfCounter {
+    InferenceEngine::InferenceEngineProfileInfo::LayerStatus status;
+    bool isCPU;
+    uint64_t realTime_uSec;
+    uint64_t cpu_uSec;
+    uint32_t num;
+    std::string layerType;
+
+public:
+    PerfCounter() : realTime_uSec(0), cpu_uSec(0), num(0),
+        status(InferenceEngine::InferenceEngineProfileInfo::NOT_RUN), isCPU(false) {}
+
+    long long realTime_avg() const { return (num == 0) ? 0 : realTime_uSec / num; }
+    long long cpu_avg() const { return (num == 0) ? 0 : cpu_uSec / num; }
+};
+
 struct InferenceEnv {
     std::shared_ptr<const cldnn::engine> engine;
     std::shared_ptr<cldnn::network> network;
     std::map<std::string, cldnn::primitive_id> primitiveIDs;
     std::map<std::string, std::vector<cldnn::primitive_id>> prevPrimitiveIDs;
-    std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap;
-    std::set<cldnn::primitive_id> profilingIDs;
+
+    std::map<cldnn::primitive_id, std::pair<std::string, PerfCounter>> perfMap;
+    std::vector<cldnn::primitive_id> profilingIDs;
 
     DebugOptions debugOptions;
 
@@ -108,6 +126,8 @@ protected:
         TanH,
         ELU,
         Activation,
+        Exp,
+        Not,
         LRN,
         Pooling,
         FullyConnected,
@@ -145,6 +165,11 @@ protected:
         Pad,
         LSTMCell,
         RNN,
+        Gather,
+        DepthToSpace,
+        ShuffleChannels,
+        StridedSlice,
+        ReverseSequence,
         NO_TYPE
     };
 
@@ -155,7 +180,6 @@ protected:
     };
 
     cldnn::format m_defaultFormat;
-    cldnn::data_types m_networkPrecision;
     void InitFormat(InferenceEngine::ICNNNetwork &network);
 
     static cldnn::data_types DataTypeFromPrecision(InferenceEngine::Precision p);
@@ -181,7 +205,7 @@ protected:
                                            cldnn::primitive_id weightsPrimID,
                                            cldnn::primitive_id biasesPrimID);
     void AddPreProcessPrimitive(InferenceEngine::InputInfo::Ptr inputInfo);
-    void AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo);
+    void AddInputPrimitive(InferenceEngine::InputInfo::Ptr inputInfo, InferenceEngine::Precision inputPrecision);
     void AddOutputPrimitive(std::string outputName, const InferenceEngine::DataPtr outputData,
                             InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::UNSPECIFIED);
     void CreateSingleLayerPrimitive(InferenceEngine::CNNLayerPtr& layer);
@@ -204,8 +228,9 @@ protected:
 
     void InitProfileInfo(const std::string& layerName,
                          const std::string& layerType,
-                         const std::string& execType,
-                         InferenceEngine::InferenceEngineProfileInfo::LayerStatus status);
+                         bool isCPU = false,
+                         InferenceEngine::InferenceEngineProfileInfo::LayerStatus status
+                                    = InferenceEngine::InferenceEngineProfileInfo::EXECUTED);
     void changeInputBatch(size_t batch);
     void CompileNetwork();
 
@@ -250,6 +275,11 @@ protected:
     void CreateLSTMCellPrimitive(InferenceEngine::CNNLayerPtr &layer);
     void AddConstantBlobInput(InferenceEngine::CNNLayerPtr &layer);
     void CreateCustomLayerPrimitive(InferenceEngine::CNNLayerPtr &layer, CLDNNCustomLayerPtr customLayer);
+    void CreateGatherPrimitive(InferenceEngine::CNNLayerPtr &layer);
+    void CreateDepthToSpacePrimitive(InferenceEngine::CNNLayerPtr &layer);
+    void CreateShuffleChannelsPrimitive(InferenceEngine::CNNLayerPtr &layer);
+    void CreateStridedSlicePrimitive(InferenceEngine::CNNLayerPtr &layer);
+    void CreateReverseSequencePrimitive(InferenceEngine::CNNLayerPtr &layer);
 };
 
 };  // namespace CLDNNPlugin
diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
index e36578cf3..c903a4fc4 100644
--- a/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
+++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -14,7 +14,7 @@ using namespace InferenceEngine;
 
 namespace CLDNNPlugin {
 
-const std::string CLDNNInferRequest::fp32_suffix = "_fp32";
+const char CLDNNInferRequest::fp32_suffix[] = "_fp32";
 
 Blob::Ptr CLDNNInferRequest::createInputBlob(const TensorDesc& desc, uint8_t* mem_ptr) {
     const Layout l = desc.getLayout();
@@ -156,20 +156,21 @@ void CLDNNInferRequest::copyInputData(std::shared_ptr<cldnn::network> network,
     size_t n = (bi == nullptr) ? inputBlob.size() : bi->buf_size;
     size_t offset = (bi == nullptr) ? 0 : bi->buf_offset;
 
+    cldnn::primitive_id internalName = "Input:" + inputName;
     switch (inputBlob.precision()) {
     case Precision::FP32: {
         float* blob_ptr = const_cast<float*>(inputBlob.cbuffer().as<const float*>()) + offset;
-        network->set_input_data(inputName, cldnn::memory::attach(inputLayout, blob_ptr, n));
+        network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
         break;
     }
     case Precision::FP16: {
         uint16_t* blob_ptr = const_cast<uint16_t*>(inputBlob.cbuffer().as<const uint16_t*>()) + offset;
-        network->set_input_data(inputName, cldnn::memory::attach(inputLayout, blob_ptr, n));
+        network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
         break;
     }
     case Precision::U8: {
         uint8_t* blob_ptr = const_cast<uint8_t*>(inputBlob.cbuffer().as<const uint8_t*>()) + offset;
-        network->set_input_data(inputName, cldnn::memory::attach(inputLayout, blob_ptr, n));
+        network->set_input_data(internalName, cldnn::memory::attach(inputLayout, blob_ptr, n));
         break;
     }
     default:
@@ -361,10 +362,10 @@ void CLDNNInferRequest::SetBatch(int new_batch) {
 CLDNNInferRequest::CLDNNInferRequest(InferenceEnv env, bool useProfiling,
                                      InputsDataMap networkInputs, OutputsDataMap networkOutputs)
         : InferRequestInternal(networkInputs, networkOutputs),
-          m_curBatch(-1),
           m_env(env),
           m_useProfiling(useProfiling) {
     if (m_env.m_max_batch > 1) {
+        SetBatch(m_env.m_max_batch);
         AllocateInputsDyn();
         AllocateOutputsDyn();
     } else {
@@ -440,20 +441,18 @@ void CLDNNInferRequest::execAndParse() {
 
         // Get profiling info for all layers
         for (auto &profiledID : m_env.profilingIDs) {
-            std::string impl = implementationsMap.at(profiledID);
-            impl.copy(m_env.perfMap[profiledID].exec_type, impl.length());
-
+            auto& perfCount = m_env.perfMap[profiledID].second;
             // Change status if layer wasn't executed by cldnn engine
-            if (executedPrimitives.find(profiledID) == executedPrimitives.end()) {
+            if (perfCount.num == 0 &&
+                executedPrimitives.find(profiledID) == executedPrimitives.end()) {
                 if (allPrimitives.find(profiledID) != allPrimitives.end() &&
                     allPrimitives.at(profiledID) == "_optimized_") {
                     // Layer was marked as optimized by cldnn
-                    m_env.perfMap[profiledID].status = InferenceEngineProfileInfo::OPTIMIZED_OUT;
+                    perfCount.status = InferenceEngineProfileInfo::OPTIMIZED_OUT;
                 } else {
                     // Layer wasn't run for some reason
-                    m_env.perfMap[profiledID].status = InferenceEngineProfileInfo::NOT_RUN;
+                    perfCount.status = InferenceEngineProfileInfo::NOT_RUN;
                 }
-                m_env.perfMap[profiledID].cpu_uSec = m_env.perfMap[profiledID].realTime_uSec = 0;
                 continue;
             }
 
@@ -468,17 +467,17 @@ void CLDNNInferRequest::execAndParse() {
                 auto count = std::chrono::duration_cast<duration_t>(interval.value->value()).count();
 
                 if (interval.name == "submission") {
-                    m_env.perfMap[profiledID].cpu_uSec = count;
+                    perfCount.cpu_uSec += count;
                 } else if (interval.name == "executing") {
-                    m_env.perfMap[profiledID].realTime_uSec = count;
+                    perfCount.realTime_uSec += count;
                 } else if (interval.name == "duration") {  // "duration" is used for CPU layers
-                    m_env.perfMap[profiledID].cpu_uSec = count;
-                    static const std::string cpuExecType("CPU");
-                    memset(m_env.perfMap[profiledID].exec_type, 0, sizeof(m_env.perfMap[profiledID].exec_type));
-                    cpuExecType.copy(m_env.perfMap[profiledID].exec_type,
-                        cpuExecType.length());  // Override execType as CPU
+                    perfCount.cpu_uSec += count;
+
+                    if (perfCount.num == 0)
+                        perfCount.isCPU = true;
                 }
             }
+            perfCount.num++;
         }
     }
 }
@@ -543,7 +542,32 @@ void CLDNNInferRequest::GetPerformanceCounts(
     if (!m_useProfiling) {
         THROW_IE_EXCEPTION << "Performance counters were not enabled";
     } else {
-        perfMap = m_env.perfMap;
+        unsigned i = 0;
+        for (auto& profiledID : m_env.profilingIDs) {
+            const auto& layerName = m_env.perfMap.at(profiledID).first;
+            if (layerName.length() == 0)    // no layer directly associated
+                continue;
+
+            const auto& perfCounter = m_env.perfMap.at(profiledID).second;
+            auto& extPerfEntry = perfMap[layerName];
+
+            // copy layer implementation
+            if (perfCounter.isCPU) {
+                static const std::string cpuExecType("CPU");
+                memset(extPerfEntry.exec_type, 0, sizeof(extPerfEntry.exec_type));
+                cpuExecType.copy(extPerfEntry.exec_type, cpuExecType.length());  // Override execType as CPU
+            } else {
+                std::string impl = implementationsMap.at(profiledID);
+                impl.copy(extPerfEntry.exec_type, impl.length());
+            }
+
+            extPerfEntry.execution_index = i++;
+            extPerfEntry.status = perfCounter.status;
+            extPerfEntry.cpu_uSec = perfCounter.cpu_avg();
+            extPerfEntry.realTime_uSec = perfCounter.realTime_avg();
+
+            perfCounter.layerType.copy(extPerfEntry.layer_type, perfCounter.layerType.length());
+        }
     }
 }
 
@@ -564,20 +588,21 @@ void CLDNNInferRequest::PrepareInput(const cldnn::primitive_id &inputName, const
         return (blob_ptr == mem_ptr) && (blob.byteSize() == memory.size());
     };
 
+    cldnn::primitive_id internalName = "Input:" + inputName;
     const cldnn::memory& memory = inputsMemory.at(inputName);
     if (inputBlob.precision() == Precision::I16) {
         // clDNN doesn't support I16 input precision, so we always have to convert input data to fp32 precision
         const cldnn::memory& fp32_mem = inputsMemory.at(inputName+fp32_suffix);
         cldnn::pointer<float> ptr = fp32_mem.pointer<float>();
         InferenceEngine::copyToFloat<int16_t>(ptr.data(), &inputBlob);
-        m_env.network->set_input_data(inputName, fp32_mem);
+        m_env.network->set_input_data(internalName, fp32_mem);
     } else if (is_same_buffer(inputBlob, memory)) {
         // If input memory was allocated by cldnn engine and wasn't overwritten by user set_input_data method won't copy input data.
         switch (inputBlob.precision()) {
             case Precision::FP32:
             case Precision::FP16:
             case Precision::U8: {
-                m_env.network->set_input_data(inputName, memory);
+                m_env.network->set_input_data(internalName, memory);
                 break;
             }
             default:
diff --git a/inference-engine/src/cldnn_engine/cldnn_infer_request.h b/inference-engine/src/cldnn_engine/cldnn_infer_request.h
index f4b9d3366..375d707ee 100644
--- a/inference-engine/src/cldnn_engine/cldnn_infer_request.h
+++ b/inference-engine/src/cldnn_engine/cldnn_infer_request.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -44,7 +44,6 @@ protected:
     InferenceEnv m_env;
 
     // dynamic batch stuff
-    int m_curBatch;
     std::map<std::string, std::vector<buf_info>> batchInputs;
     std::map<std::string, std::vector<buf_info>> batchOutputs;
 
@@ -66,7 +65,7 @@ protected:
     void PrepareInputDyn(const cldnn::primitive_id &inputName, const InferenceEngine::Blob &inputBlob);
 
 private:
-    static const std::string fp32_suffix;
+    static const char fp32_suffix[];
 };
 
 };  // namespace CLDNNPlugin
diff --git a/inference-engine/src/cldnn_engine/debug_options.cpp b/inference-engine/src/cldnn_engine/debug_options.cpp
index 5a6de1536..3c964dcae 100644
--- a/inference-engine/src/cldnn_engine/debug_options.cpp
+++ b/inference-engine/src/cldnn_engine/debug_options.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/cldnn_engine/debug_options.h b/inference-engine/src/cldnn_engine/debug_options.h
index 3001b29ca..1dad92eda 100644
--- a/inference-engine/src/cldnn_engine/debug_options.h
+++ b/inference-engine/src/cldnn_engine/debug_options.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/cldnn_engine/dllmain.cpp b/inference-engine/src/cldnn_engine/dllmain.cpp
index 31257da47..c862ee190 100644
--- a/inference-engine/src/cldnn_engine/dllmain.cpp
+++ b/inference-engine/src/cldnn_engine/dllmain.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/cldnn_engine/simple_math.cpp b/inference-engine/src/cldnn_engine/simple_math.cpp
index 20b09fb46..9ee02b496 100644
--- a/inference-engine/src/cldnn_engine/simple_math.cpp
+++ b/inference-engine/src/cldnn_engine/simple_math.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/cldnn_engine/simple_math.h b/inference-engine/src/cldnn_engine/simple_math.h
index 445b62a3f..bf2031696 100644
--- a/inference-engine/src/cldnn_engine/simple_math.h
+++ b/inference-engine/src/cldnn_engine/simple_math.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/CMakeLists.txt b/inference-engine/src/extension/CMakeLists.txt
index ca9cc2750..b0078e2cb 100644
--- a/inference-engine/src/extension/CMakeLists.txt
+++ b/inference-engine/src/extension/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -11,7 +11,22 @@ if (NOT(IE_MAIN_SOURCE_DIR))
     # to use C++11 if samples are built outside of IE repo
     set (CMAKE_CXX_STANDARD 11)
     set (CMAKE_CXX_STANDARD_REQUIRED ON)
-    set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
+    if (${CMAKE_CXX_COMPILER_ID} STREQUAL GNU)
+        set (CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
+    endif()
+endif()
+
+# treating warnings as errors
+if (WIN32)
+    if (${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
+        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4275 /wd4267") #disable some warnings
+    endif()
+else()
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
+endif()
+
+if (${CMAKE_CXX_COMPILER_ID} STREQUAL GNU)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
 endif()
 
 file(GLOB_RECURSE SRC *.cpp)
diff --git a/inference-engine/src/extension/README.md b/inference-engine/src/extension/README.md
index 94aece3c7..5b766ea83 100644
--- a/inference-engine/src/extension/README.md
+++ b/inference-engine/src/extension/README.md
@@ -17,7 +17,11 @@ when cross-compiling this library for another platform.
 
  * ArgMax
  * CTCGreedyDecoder
+ * DepthToSpace
  * DetectionOutput
+ * Expand
+ * Fill
+ * Gather
  * GRN
  * Interp
  * MVN
@@ -28,11 +32,17 @@ when cross-compiling this library for another platform.
  * PriorBoxClustered
  * Proposal
  * PSROIPooling
- * Region Yolo
- * Reorg Yolo
+ * Range
+ * RegionYolo
+ * ReorgYolo
  * Resample
+ * ReverseSequence
+ * ShuffleChannels
  * SimplerNMS
- * SpatialTransformer
+ * SpaceToDepth
+ * Squeeze
+ * StridedSlice
+ * Unsqueeze
 
 In order to add a new layer, you can use [the extensibility mechanism](./docs/IE_DG/Integrate_your_kernels_into_IE.md).
 
diff --git a/inference-engine/src/extension/cmake/CPUID.cmake b/inference-engine/src/extension/cmake/CPUID.cmake
index 7b6c26bca..4bf752816 100644
--- a/inference-engine/src/extension/cmake/CPUID.cmake
+++ b/inference-engine/src/extension/cmake/CPUID.cmake
@@ -7,7 +7,7 @@
 
 include (CheckCXXSourceRuns)
 
-if(NOT WIN32)
+if(NOT WIN32 AND NOT APPLE)
     set(CMAKE_REQUIRED_FLAGS "-std=c++11")
 endif()
 
@@ -204,14 +204,14 @@ private:
             }
 
             // load bitset with flags for function 0x80000001
-            if (nExIds_ >= 0x80000001)
+            if ((unsigned)nExIds_ >= 0x80000001)
             {
                 f_81_ECX_ = extdata_[1][2];
                 f_81_EDX_ = extdata_[1][3];
             }
 
             // Interpret CPU brand string if reported
-            if (nExIds_ >= 0x80000004)
+            if ((unsigned)nExIds_ >= 0x80000004)
             {
                 memcpy(brand +  0, extdata_[2].data(), sizeof(cpui));
                 memcpy(brand + 16, extdata_[3].data(), sizeof(cpui));
@@ -248,7 +248,7 @@ const InstructionSet::InstructionSet_Internal InstructionSet::CPU_Rep;
 // Print out supported instruction set extensions
 int main()
 {
-    std::ofstream fo(\"cpuid.txt\");
+    std::ofstream fo(\"${CMAKE_BINARY_DIR}/cpuid.txt\");
     auto& outstream = fo;//std::cout;
 
     auto support_message = [&outstream](std::string isa_feature, bool is_supported) {
diff --git a/inference-engine/src/extension/cmake/feature_defs.cmake b/inference-engine/src/extension/cmake/feature_defs.cmake
index 4c07c2da5..d40f1d365 100644
--- a/inference-engine/src/extension/cmake/feature_defs.cmake
+++ b/inference-engine/src/extension/cmake/feature_defs.cmake
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,7 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-cmake_minimum_required (VERSION 2.8)
-
 include(CPUID)
 include(OptimizationFlags)
diff --git a/inference-engine/src/extension/common/defs.h b/inference-engine/src/extension/common/defs.h
index 9bf04007c..a5dc5e89e 100644
--- a/inference-engine/src/extension/common/defs.h
+++ b/inference-engine/src/extension/common/defs.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/common/fast_exp.h b/inference-engine/src/extension/common/fast_exp.h
index 4fcd25c3a..062198d12 100644
--- a/inference-engine/src/extension/common/fast_exp.h
+++ b/inference-engine/src/extension/common/fast_exp.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/common/matrixmult.h b/inference-engine/src/extension/common/matrixmult.h
deleted file mode 100644
index 9070ddafe..000000000
--- a/inference-engine/src/extension/common/matrixmult.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-static inline void matrixMult(float *A, float *B, float *C, int m, int n, int k, bool transposeB = false) {
-    if (transposeB) {
-        for (int rowA = 0; rowA < m; rowA++) {
-            for (int rowB = 0; rowB < n; rowB++) {
-                float sum = 0;
-                for (int colA = 0; colA < k; colA++) {
-                    sum += A[rowA * k + colA] * B[rowB * k + colA];
-                }
-
-                C[rowA * n + rowB] = sum;
-            }
-        }
-    } else {
-        for (int rowA = 0; rowA < m; rowA++) {
-            for (int colB = 0; colB < n; colB++) {
-                float sum = 0;
-                for (int colA = 0; colA < k; colA++) {
-                    sum += A[rowA * k + colA] * B[colA * n + colB];
-                }
-
-                C[rowA * n + colB] = sum;
-            }
-        }
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/src/extension/common/opt_exp.h b/inference-engine/src/extension/common/opt_exp.h
index 7fb57a916..04a0a3ea1 100644
--- a/inference-engine/src/extension/common/opt_exp.h
+++ b/inference-engine/src/extension/common/opt_exp.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/common/softmax.h b/inference-engine/src/extension/common/softmax.h
index 6aaf63480..498bff8a8 100644
--- a/inference-engine/src/extension/common/softmax.h
+++ b/inference-engine/src/extension/common/softmax.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/ext_argmax.cpp b/inference-engine/src/extension/ext_argmax.cpp
index c6efa6cce..3a8dab38e 100644
--- a/inference-engine/src/extension/ext_argmax.cpp
+++ b/inference-engine/src/extension/ext_argmax.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -23,7 +23,7 @@ public:
             if (layer->insData.size() != 1 || layer->outData.empty())
                 THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
 
-            out_max_val_ = static_cast<bool>(layer->GetParamAsInt("out_max_val"));
+            out_max_val_ = layer->GetParamAsBool("out_max_val", false);
             top_k_       = layer->GetParamAsInt("top_k");
 
             has_axis_ = (layer->params.find("axis") != layer->params.end());
@@ -73,12 +73,12 @@ public:
                         dst_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] = src_vector[j].first;
                     } else {
                         // Produces max_ind and max_val
-                        dst_data[2 * i * top_k_ + j] = src_vector[j].second;
+                        dst_data[2 * i * top_k_ + j] = static_cast<float>(src_vector[j].second);
                         dst_data[2 * i * top_k_ + top_k_ + j] = src_vector[j].first;
                     }
                 } else {
                     // Produces max_ind per axis
-                    dst_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] = src_vector[j].second;
+                    dst_data[(i / axis_dist * top_k_ + j) * axis_dist + i % axis_dist] = static_cast<float>(src_vector[j].second);
                 }
             }
         }
diff --git a/inference-engine/src/extension/ext_base.cpp b/inference-engine/src/extension/ext_base.cpp
index cb00fda34..dc1339a85 100644
--- a/inference-engine/src/extension/ext_base.cpp
+++ b/inference-engine/src/extension/ext_base.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -80,6 +80,8 @@ void ExtLayerBase::addConfig(const CNNLayer* layer, std::vector<DataConfigurator
         std::vector<size_t> order(blocks.size());
         for (size_t i = 0; i < order.size(); i++) order[i] = i;
 
+        const bool isInt8 = (data->getPrecision() == Precision::I8 || data->getPrecision() == Precision::U8);
+
         if (conf.layout == ConfLayout::BLK8 || conf.layout == ConfLayout::BLK16) {
             if (data_dims.size() < 4 && data_dims.size() > 5)
                 THROW_IE_EXCEPTION << "Inapplicable blocking layout."
@@ -91,10 +93,17 @@ void ExtLayerBase::addConfig(const CNNLayer* layer, std::vector<DataConfigurator
             order.push_back(1);
             blocks[1] = div_up(blocks[1], blk_size);
             blocks.push_back(blk_size);
+        } else if (isInt8) {
+            order = {0, 2, 3, 1};
+            size_t tmp = blocks[1];
+            blocks[1] = blocks[3];
+            blocks[3] = tmp;
+
+            conf.layout = ConfLayout::PLN;
         }
 
         // All extension layers support only FP32 precision!
-        InferenceEngine::Precision precision = conf.constant ? data_desc.getPrecision() : InferenceEngine::Precision(InferenceEngine::Precision::FP32);
+        InferenceEngine::Precision precision = data_desc.getPrecision();
         if (conf.layout == ConfLayout::ANY) {
             dataConfig.desc = TensorDesc(precision, data_dims, InferenceEngine::Layout::ANY);
         } else {
@@ -103,10 +112,10 @@ void ExtLayerBase::addConfig(const CNNLayer* layer, std::vector<DataConfigurator
         port.push_back(dataConfig);
     };
 
-    for (int i = 0; i < in_l.size(); i++)
+    for (size_t i = 0; i < in_l.size(); i++)
         fill_port(config.inConfs, in_l[i], layer->insData[i].lock());
 
-    for (int i = 0; i < out_l.size(); i++)
+    for (size_t i = 0; i < out_l.size(); i++)
         fill_port(config.outConfs, out_l[i], layer->outData[i]);
 
     config.dynBatchSupport = dynBatchSupport;
diff --git a/inference-engine/src/extension/ext_base.hpp b/inference-engine/src/extension/ext_base.hpp
index 3fa756a44..79148421e 100644
--- a/inference-engine/src/extension/ext_base.hpp
+++ b/inference-engine/src/extension/ext_base.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/ext_ctc_greedy.cpp b/inference-engine/src/extension/ext_ctc_greedy.cpp
index 71c9d71b8..ae9a09906 100644
--- a/inference-engine/src/extension/ext_ctc_greedy.cpp
+++ b/inference-engine/src/extension/ext_ctc_greedy.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -21,8 +21,7 @@ public:
                 THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
 
             std::vector<DataConfigurator> inps;
-            for (const auto &in : layer->insData)
-                inps.emplace_back(ConfLayout::PLN);
+            inps.resize(layer->insData.size(), DataConfigurator(ConfLayout::PLN));
             addConfig(layer, inps, {DataConfigurator(ConfLayout::PLN)});
         } catch (InferenceEngine::details::InferenceEngineException &ex) {
             errorMsg = ex.what();
@@ -51,7 +50,7 @@ public:
             output_sequences[ii] = -1;
         }
 
-        for (int n = 0; n < N_; ++n) {
+        for (size_t n = 0; n < N_; ++n) {
             int prev_class_idx = -1;
             size_t output_index = n*T_;
 
@@ -63,21 +62,22 @@ public:
                 float max_prob = probs[0];
                 ++probs;
 
-                for (int c = 1; c < C_; ++c, ++probs) {
+                for (size_t c = 1; c < C_; ++c, ++probs) {
                     if (*probs > max_prob) {
-                        max_class_idx = c;
+                        max_class_idx = static_cast<int>(c);
                         max_prob = *probs;
                     }
                 }
 
-                if (max_class_idx < C_-1 && max_class_idx != prev_class_idx) {
-                    output_sequences[output_index] =  max_class_idx;
+                if (max_class_idx < static_cast<int>(C_) - 1 &&
+                        max_class_idx != prev_class_idx) {
+                    output_sequences[output_index] = static_cast<float>(max_class_idx);
                     output_index++;
                 }
 
                 prev_class_idx = max_class_idx;
 
-                if (t + 1 == T_ || sequence_indicators[(t + 1)*N_ + n] == 0) {
+                if (t + 1 == static_cast<int>(T_) || sequence_indicators[(t + 1)*N_ + n] == 0) {
                     break;
                 }
             }
diff --git a/inference-engine/src/extension/ext_depth_to_space.cpp b/inference-engine/src/extension/ext_depth_to_space.cpp
new file mode 100644
index 000000000..0e20681fd
--- /dev/null
+++ b/inference-engine/src/extension/ext_depth_to_space.cpp
@@ -0,0 +1,125 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class DepthToSpaceImpl: public ExtLayerBase {
+#define CNTR_SIZE 5
+
+public:
+    explicit DepthToSpaceImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.empty() || layer->outData.empty())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+
+            SizeVector src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
+            if (src_dims.size() < 3)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input dimensions!";
+            if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only F32 is supported!";
+
+            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
+            if (dst_dims.size() < 2)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of output dimensions!";
+            if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect output precision. Only F32 is supported!";
+
+            size_t block_size = layer->GetParamAsUInt("block_size", 1);
+            if (block_size == 0)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect block_size parameter is zero!";
+
+            if (src_dims[src_dims.size() - 3] % (block_size * block_size))
+                THROW_IE_EXCEPTION << layer->name << " block_size parameter is incompatible with input tensor Color dimension size!";
+
+            if (dst_dims.size() > 2 && src_dims[src_dims.size() - 3] != (dst_dims[dst_dims.size() - 3] * block_size * block_size))
+                THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Color dimension is incompatible with block_size!";
+
+            if (dst_dims[dst_dims.size() - 2] != (src_dims[src_dims.size() - 2] * block_size))
+                THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Height dimension is incompatible with block_size!";
+
+            if (dst_dims[dst_dims.size() - 1] != (src_dims[src_dims.size() - 1] * block_size))
+                THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Width dimension is incompatible with block_size!";
+
+            own_dims[0] = 1;
+            for (size_t i = 0; i < (src_dims.size() - 3); i++)
+                own_dims[0] *= src_dims[i];
+            own_dims[1] = src_dims[src_dims.size() - 2];
+            own_dims[2] = src_dims[src_dims.size() - 3] / block_size;
+            own_dims[3] = src_dims[src_dims.size() - 1];
+            own_dims[4] = block_size;
+
+            size_t C = src_dims[src_dims.size() - 2] * src_dims[src_dims.size() - 1];
+            ownStrides[0] = src_dims[src_dims.size() - 3] * C;
+            ownStrides[1] = src_dims[src_dims.size() - 1];
+            ownStrides[2] = block_size * C;
+            ownStrides[3] = 1;
+            ownStrides[4] = C;
+            work_amount_dst = ownStrides[0] * own_dims[0];
+
+            addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        const float *src_data = inputs[0]->cbuffer().as<const float *>() +
+            inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        float* dst_data = outputs[0]->cbuffer().as<float *>() +
+            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        //  Parallel
+        parallel_nt(0, [&](const int ithr, const int nthr) {
+            size_t start = 0, end = 0, src_idx = 0;
+            size_t counters[CNTR_SIZE] = { 0 };
+            splitter(work_amount_dst, nthr, ithr, start, end);
+            for (int j = CNTR_SIZE - 1, i = start; j >= 0; j--) {
+                counters[j] = i % own_dims[j];
+                src_idx += counters[j] * ownStrides[j];
+                i /= own_dims[j];
+            }
+
+            for (size_t iwork = start, i = 1; iwork < end; ++iwork) {
+                dst_data[iwork] = src_data[src_idx];
+                for (int j = CNTR_SIZE - 1; j >= 0; j--) {
+                    counters[j]++;
+                    if (counters[j] < own_dims[j]) {
+                        src_idx += ownStrides[j];
+                        break;
+                    } else {
+                        counters[j] = i = 0;
+                    }
+                }
+                if (!i) {
+                    for (src_idx = 0; i < CNTR_SIZE; ++i)
+                        src_idx += counters[i] * ownStrides[i];
+                }
+            }
+        });
+
+        return OK;
+    }
+
+private:
+    size_t work_amount_dst;
+    size_t own_dims[CNTR_SIZE];
+    size_t ownStrides[CNTR_SIZE];
+};
+
+REG_FACTORY_FOR(ImplFactory<DepthToSpaceImpl>, DepthToSpace);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_detectionoutput.cpp b/inference-engine/src/extension/ext_detectionoutput.cpp
index acf58fba3..1ec523fa7 100644
--- a/inference-engine/src/extension/ext_detectionoutput.cpp
+++ b/inference-engine/src/extension/ext_detectionoutput.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -40,7 +40,9 @@ public:
             _nms_threshold = layer->GetParamAsFloat("nms_threshold");
             _confidence_threshold = layer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
             _share_location = layer->GetParamsAsBool("share_location", true);
-            _clip = layer->GetParamsAsBool("clip", false);
+            _clip_before_nms = layer->GetParamsAsBool("clip_before_nms", false) ||
+                               layer->GetParamsAsBool("clip", false);  // for backward compatibility
+            _clip_after_nms = layer->GetParamsAsBool("clip_after_nms", false);
             _decrease_label_id = layer->GetParamsAsBool("decrease_label_id", false);
             _normalized = layer->GetParamsAsBool("normalized", true);
             _image_height = layer->GetParamAsInt("input_height", 1);
@@ -53,12 +55,15 @@ public:
             _code_type = (code_type_str == "caffe.PriorBoxParameter.CENTER_SIZE" ? CodeType::CENTER_SIZE
                                                                                  : CodeType::CORNER);
 
-            _num_priors = static_cast<int>(layer->insData[idx_priors].lock()->dims[0] / _prior_size);
+            _num_priors = static_cast<int>(layer->insData[idx_priors].lock()->getDims().back() / _prior_size);
+            _priors_batches = layer->insData[idx_priors].lock()->getDims().front() != 1;
 
-            if (_num_priors * _num_loc_classes * 4 != layer->insData[idx_location].lock()->dims[0])
-                THROW_IE_EXCEPTION << "Number of priors must match number of location predictions.";
+            if (_num_priors * _num_loc_classes * 4 != static_cast<int>(layer->insData[idx_location].lock()->getDims()[1]))
+                THROW_IE_EXCEPTION << "Number of priors must match number of location predictions ("
+                                   << _num_priors * _num_loc_classes * 4 << " vs "
+                                   << layer->insData[idx_location].lock()->getDims()[1] << ")";
 
-            if (_num_priors * _num_classes != layer->insData[idx_confidence].lock()->dims[0])
+            if (_num_priors * _num_classes != static_cast<int>(layer->insData[idx_confidence].lock()->dims[0]))
                 THROW_IE_EXCEPTION << "Number of priors must match number of confidence predictions.";
 
             if (_decrease_label_id && _background_label_id != 0)
@@ -131,10 +136,14 @@ public:
         int *indices_data          = _indices->buffer();
         int *num_priors_actual     = _num_priors_actual->buffer();
 
-        const float *prior_variances = prior_data + _num_priors*_prior_size;
-        const float *ppriors = prior_data;
-
         for (int n = 0; n < N; ++n) {
+            const float *ppriors = prior_data;
+            const float *prior_variances = prior_data + _num_priors*_prior_size;
+            if (_priors_batches) {
+                ppriors += _variance_encoded_in_target ? n*_num_priors*_prior_size : 2*n*_num_priors*_prior_size;
+                prior_variances += _variance_encoded_in_target ? 0 : n*_num_priors*_prior_size;
+            }
+
             if (_share_location) {
                 const float *ploc = loc_data + n*4*_num_priors;
                 float *pboxes = decoded_bboxes_data + n*4*_num_priors;
@@ -227,7 +236,7 @@ public:
                 // Store the new indices.
                 memset(detections_data + n*_num_classes, 0, _num_classes * sizeof(int));
 
-                for (int j = 0; j < conf_index_class_map.size(); ++j) {
+                for (size_t j = 0; j < conf_index_class_map.size(); ++j) {
                     int label = conf_index_class_map[j].second.first;
                     int idx = conf_index_class_map[j].second.second;
                     int *pindices = indices_data + n * _num_classes * _num_priors + label * _num_priors;
@@ -260,8 +269,8 @@ public:
                 for (int i = 0; i < detections_data[n*_num_classes + c]; ++i) {
                     int idx = pindices[c*_num_priors + i];
 
-                    dst_data[count * DETECTION_SIZE + 0] = n;
-                    dst_data[count * DETECTION_SIZE + 1] = _decrease_label_id ? c-1 : c;
+                    dst_data[count * DETECTION_SIZE + 0] = static_cast<float>(n);
+                    dst_data[count * DETECTION_SIZE + 1] = static_cast<float>(_decrease_label_id ? c-1 : c);
                     dst_data[count * DETECTION_SIZE + 2] = pconf[c*_num_priors + idx];
 
                     float xmin = _share_location ? pboxes[idx*4 + 0] :
@@ -273,6 +282,13 @@ public:
                     float ymax = _share_location ? pboxes[idx*4 + 3] :
                                  pboxes[c*4*_num_priors + idx*4 + 3];
 
+                    if (_clip_after_nms) {
+                        xmin = std::max(0.0f, std::min(1.0f, xmin));
+                        ymin = std::max(0.0f, std::min(1.0f, ymin));
+                        xmax = std::max(0.0f, std::min(1.0f, xmax));
+                        ymax = std::max(0.0f, std::min(1.0f, ymax));
+                    }
+
                     dst_data[count * DETECTION_SIZE + 3] = xmin;
                     dst_data[count * DETECTION_SIZE + 4] = ymin;
                     dst_data[count * DETECTION_SIZE + 5] = xmax;
@@ -304,8 +320,9 @@ private:
     int _keep_top_k = 0;
     int _code_type = 0;
 
-    bool _share_location = false;
-    bool _clip = false;
+    bool _share_location    = false;
+    bool _clip_before_nms   = false;  // clip bounding boxes before nms step
+    bool _clip_after_nms    = false;  // clip bounding boxes after nms step
     bool _decrease_label_id = false;
 
     int _image_width = 0;
@@ -320,6 +337,7 @@ private:
     int _num = 0;
     int _num_loc_classes = 0;
     int _num_priors = 0;
+    bool _priors_batches = false;
 
     enum CodeType {
         CORNER = 1,
@@ -477,7 +495,7 @@ void DetectionOutputImpl::decodeBBoxes(const float *prior_data,
             new_ymax = decode_bbox_center_y + decode_bbox_height / 2.0f;
         }
 
-        if (_clip) {
+        if (_clip_before_nms) {
             new_xmin = std::max(0.0f, std::min(1.0f, new_xmin));
             new_ymin = std::max(0.0f, std::min(1.0f, new_ymin));
             new_xmax = std::max(0.0f, std::min(1.0f, new_xmax));
diff --git a/inference-engine/src/extension/ext_detectionoutput_onnx.cpp b/inference-engine/src/extension/ext_detectionoutput_onnx.cpp
new file mode 100644
index 000000000..39412b32b
--- /dev/null
+++ b/inference-engine/src/extension/ext_detectionoutput_onnx.cpp
@@ -0,0 +1,375 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cassert>
+#include <cfloat>
+#include <vector>
+#include <cmath>
+#include <string>
+#include <utility>
+#include <algorithm>
+#include "ie_parallel.hpp"
+
+
+namespace {
+struct Indexer {
+  const std::vector<int> dims_;
+  int total_{1};
+
+  explicit Indexer(const std::vector<int>& dims) : dims_(dims) {
+      total_ = 1;
+      for (size_t i = 0; i < dims_.size(); ++i) {
+          total_ *= dims_[i];
+      }
+  }
+
+  const int operator()(const std::vector<int>& idx) const {
+      int flat_idx = 0;
+      assert(idx.size() == dims_.size());
+      for (size_t i = 0; i < dims_.size(); ++i) {
+          assert(0 <= idx[i] && idx[i] < dims_[i]);
+          flat_idx = flat_idx * dims_[i] + idx[i];
+      }
+      assert(flat_idx < total_);
+      return flat_idx;
+  }
+};
+}  // namespace
+
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+static
+void refine_boxes(const float* boxes, const float* deltas, const float* weights, const float* scores,
+                  float* refined_boxes, float* refined_boxes_areas, float* refined_scores,
+                  const int rois_num, const int classes_num,
+                  const float img_H, const float img_W,
+                  const float max_delta_log_wh,
+                  float coordinates_offset) {
+    Indexer box_idx({rois_num, 4});
+    Indexer delta_idx({rois_num, classes_num, 4});
+    Indexer score_idx({rois_num, classes_num});
+
+    Indexer refined_box_idx({classes_num, rois_num, 4});
+    Indexer refined_score_idx({classes_num, rois_num});
+
+    for (int roi_idx = 0; roi_idx < rois_num; ++roi_idx) {
+        float x0 = boxes[box_idx({roi_idx, 0})];
+        float y0 = boxes[box_idx({roi_idx, 1})];
+        float x1 = boxes[box_idx({roi_idx, 2})];
+        float y1 = boxes[box_idx({roi_idx, 3})];
+
+        if (x1 - x0 <= 0 || y1 - y0 <= 0) {
+            continue;
+        }
+
+        // width & height of box
+        const float ww = x1 - x0 + coordinates_offset;
+        const float hh = y1 - y0 + coordinates_offset;
+        // center location of box
+        const float ctr_x = x0 + 0.5f * ww;
+        const float ctr_y = y0 + 0.5f * hh;
+
+        for (int class_idx = 1; class_idx < classes_num; ++class_idx) {
+            const float dx = deltas[delta_idx({roi_idx, class_idx, 0})] / weights[0];
+            const float dy = deltas[delta_idx({roi_idx, class_idx, 1})] / weights[1];
+            const float d_log_w = deltas[delta_idx({roi_idx, class_idx, 2})] / weights[2];
+            const float d_log_h = deltas[delta_idx({roi_idx, class_idx, 3})] / weights[3];
+
+            // new center location according to deltas (dx, dy)
+            const float pred_ctr_x = dx * ww + ctr_x;
+            const float pred_ctr_y = dy * hh + ctr_y;
+            // new width & height according to deltas d(log w), d(log h)
+            const float pred_w = std::exp(std::min(d_log_w, max_delta_log_wh)) * ww;
+            const float pred_h = std::exp(std::min(d_log_h, max_delta_log_wh)) * hh;
+
+            // update upper-left corner location
+            float x0_new = pred_ctr_x - 0.5f * pred_w;
+            float y0_new = pred_ctr_y - 0.5f * pred_h;
+            // update lower-right corner location
+            float x1_new = pred_ctr_x + 0.5f * pred_w - coordinates_offset;
+            float y1_new = pred_ctr_y + 0.5f * pred_h - coordinates_offset;
+
+            // adjust new corner locations to be within the image region,
+            x0_new = std::max<float>(0.0f, std::min<float>(x0_new, img_W - coordinates_offset));
+            y0_new = std::max<float>(0.0f, std::min<float>(y0_new, img_H - coordinates_offset));
+            x1_new = std::max<float>(0.0f, std::min<float>(x1_new, img_W - coordinates_offset));
+            y1_new = std::max<float>(0.0f, std::min<float>(y1_new, img_H - coordinates_offset));
+
+            // recompute new width & height
+            const float box_w = x1_new - x0_new + coordinates_offset;
+            const float box_h = y1_new - y0_new + coordinates_offset;
+
+            refined_boxes[refined_box_idx({class_idx, roi_idx, 0})] = x0_new;
+            refined_boxes[refined_box_idx({class_idx, roi_idx, 1})] = y0_new;
+            refined_boxes[refined_box_idx({class_idx, roi_idx, 2})] = x1_new;
+            refined_boxes[refined_box_idx({class_idx, roi_idx, 3})] = y1_new;
+
+            refined_boxes_areas[refined_score_idx({class_idx, roi_idx})] = box_w * box_h;
+
+            refined_scores[refined_score_idx({class_idx, roi_idx})] = scores[score_idx({roi_idx, class_idx})];
+        }
+    }
+}
+
+template <typename T>
+static bool SortScorePairDescend(const std::pair<float, T>& pair1,
+                                 const std::pair<float, T>& pair2) {
+    return pair1.first > pair2.first;
+}
+
+
+struct ConfidenceComparator {
+    explicit ConfidenceComparator(const float* conf_data) : _conf_data(conf_data) {}
+
+    bool operator()(int idx1, int idx2) {
+        if (_conf_data[idx1] > _conf_data[idx2]) return true;
+        if (_conf_data[idx1] < _conf_data[idx2]) return false;
+        return idx1 < idx2;
+    }
+
+    const float* _conf_data;
+};
+
+static inline float JaccardOverlap(const float *decoded_bbox,
+                                   const float *bbox_sizes,
+                                   const int idx1,
+                                   const int idx2,
+                                   const float coordinates_offset = 1) {
+    float xmin1 = decoded_bbox[idx1 * 4 + 0];
+    float ymin1 = decoded_bbox[idx1 * 4 + 1];
+    float xmax1 = decoded_bbox[idx1 * 4 + 2];
+    float ymax1 = decoded_bbox[idx1 * 4 + 3];
+
+    float xmin2 = decoded_bbox[idx2 * 4 + 0];
+    float ymin2 = decoded_bbox[idx2 * 4 + 1];
+    float ymax2 = decoded_bbox[idx2 * 4 + 3];
+    float xmax2 = decoded_bbox[idx2 * 4 + 2];
+
+    if (xmin2 > xmax1 || xmax2 < xmin1 || ymin2 > ymax1 || ymax2 < ymin1) {
+        return 0.0f;
+    }
+
+    float intersect_xmin = std::max(xmin1, xmin2);
+    float intersect_ymin = std::max(ymin1, ymin2);
+    float intersect_xmax = std::min(xmax1, xmax2);
+    float intersect_ymax = std::min(ymax1, ymax2);
+
+    float intersect_width  = intersect_xmax - intersect_xmin + coordinates_offset;
+    float intersect_height = intersect_ymax - intersect_ymin + coordinates_offset;
+
+    if (intersect_width <= 0 || intersect_height <= 0) {
+        return 0.0f;
+    }
+
+    float intersect_size = intersect_width * intersect_height;
+    float bbox1_size = bbox_sizes[idx1];
+    float bbox2_size = bbox_sizes[idx2];
+
+    return intersect_size / (bbox1_size + bbox2_size - intersect_size);
+}
+
+
+static void nms_cf(const float* conf_data,
+                          const float* bboxes,
+                          const float* sizes,
+                          int* buffer,
+                          int* indices,
+                          int& detections,
+                          const int boxes_num,
+                          const int pre_nms_topn,
+                          const int post_nms_topn,
+                          const float confidence_threshold,
+                          const float nms_threshold) {
+    int count = 0;
+    for (int i = 0; i < boxes_num; ++i) {
+        if (conf_data[i] > confidence_threshold) {
+            indices[count] = i;
+            count++;
+        }
+    }
+
+    int num_output_scores = (pre_nms_topn == -1 ? count : std::min<int>(pre_nms_topn, count));
+
+    std::partial_sort_copy(indices, indices + count,
+                           buffer, buffer + num_output_scores,
+                           ConfidenceComparator(conf_data));
+
+    detections = 0;
+    for (int i = 0; i < num_output_scores; ++i) {
+        const int idx = buffer[i];
+
+        bool keep = true;
+        for (int k = 0; k < detections; ++k) {
+            const int kept_idx = indices[k];
+            float overlap = JaccardOverlap(bboxes, sizes, idx, kept_idx);
+            if (overlap > nms_threshold) {
+                keep = false;
+                break;
+            }
+        }
+        if (keep) {
+            indices[detections] = idx;
+            detections++;
+        }
+    }
+
+    detections = (post_nms_topn == -1 ? detections : std::min<int>(post_nms_topn, detections));
+}
+
+
+class ExperimentalDetectronDetectionOutputImpl: public ExtLayerBase {
+private:
+    const int INPUT_ROIS {0};
+    const int INPUT_DELTAS {1};
+    const int INPUT_SCORES {2};
+    const int INPUT_IM_INFO {3};
+
+    const int OUTPUT_BOXES {0};
+    const int OUTPUT_CLASSES {1};
+    const int OUTPUT_SCORES {2};
+
+public:
+    explicit ExperimentalDetectronDetectionOutputImpl(const CNNLayer* layer) {
+        try {
+            score_threshold_ = layer->GetParamAsFloat("score_threshold");
+            nms_threshold_ = layer->GetParamAsFloat("nms_threshold");
+            max_delta_log_wh_ = layer->GetParamAsFloat("max_delta_log_wh");
+            classes_num_ = layer->GetParamAsInt("num_classes");
+            max_detections_per_class_ = layer->GetParamAsInt("post_nms_count");
+            max_detections_per_image_ = layer->GetParamAsInt("max_detections_per_image");
+            class_agnostic_box_regression_ = layer->GetParamAsBool("class_agnostic_box_regression", false);
+            deltas_weights_ = layer->GetParamAsFloats("deltas_weights");
+
+            std::vector<DataConfigurator> inputs_layouts(layer->insData.size(), DataConfigurator(ConfLayout::PLN));
+            std::vector<DataConfigurator> outputs_layouts(layer->outData.size(), DataConfigurator(ConfLayout::PLN));
+            addConfig(layer, inputs_layouts, outputs_layouts);
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
+                       ResponseDesc *resp) noexcept override {
+        const int rois_num = inputs[INPUT_ROIS]->getTensorDesc().getDims()[0];
+        assert(classes_num_ == static_cast<int>(inputs[INPUT_SCORES]->getTensorDesc().getDims()[1]));
+        assert(4 * classes_num_ == static_cast<int>(inputs[INPUT_DELTAS]->getTensorDesc().getDims()[1]));
+
+        const auto* boxes = inputs[INPUT_ROIS]->buffer().as<const float *>();
+        const auto* deltas = inputs[INPUT_DELTAS]->buffer().as<const float *>();
+        const auto* scores = inputs[INPUT_SCORES]->buffer().as<const float *>();
+        const auto* im_info = inputs[INPUT_IM_INFO]->buffer().as<const float *>();
+
+        auto* output_boxes = outputs[OUTPUT_BOXES]->buffer().as<float *>();
+        auto* output_scores = outputs[OUTPUT_SCORES]->buffer().as<float *>();
+        auto* output_classes = outputs[OUTPUT_CLASSES]->buffer().as<float *>();
+
+        const float img_H = im_info[0];
+        const float img_W = im_info[1];
+
+        // Apply deltas.
+        std::vector<float> refined_boxes(classes_num_ * rois_num * 4, 0);
+        std::vector<float> refined_scores(classes_num_ * rois_num, 0);
+        std::vector<float> refined_boxes_areas(classes_num_ * rois_num, 0);
+        Indexer refined_box_idx({classes_num_, rois_num, 4});
+        Indexer refined_score_idx({classes_num_, rois_num});
+
+        refine_boxes(boxes, deltas, &deltas_weights_[0], scores,
+                     &refined_boxes[0], &refined_boxes_areas[0], &refined_scores[0],
+                     rois_num, classes_num_,
+                     img_H, img_W,
+                     max_delta_log_wh_,
+                     1.0f);
+
+        // Apply NMS class-wise.
+        std::vector<int> buffer(rois_num, 0);
+        std::vector<int> indices(classes_num_ * rois_num, 0);
+        std::vector<int> detections_per_class(classes_num_, 0);
+        int total_detections_num = 0;
+
+        for (int class_idx = 1; class_idx < classes_num_; ++class_idx) {
+            nms_cf(&refined_scores[refined_score_idx({class_idx, 0})],
+                   &refined_boxes[refined_box_idx({class_idx, 0, 0})],
+                   &refined_boxes_areas[refined_score_idx({class_idx, 0})],
+                   &buffer[0],
+                   &indices[total_detections_num],
+                   detections_per_class[class_idx],
+                   rois_num,
+                   -1,
+                   max_detections_per_class_,
+                   score_threshold_,
+                   nms_threshold_);
+            total_detections_num += detections_per_class[class_idx];
+        }
+
+        // Leave only max_detections_per_image_ detections.
+        // confidence, <class, index>
+        std::vector<std::pair<float, std::pair<int, int>>> conf_index_class_map;
+
+        int indices_offset = 0;
+        for (int c = 0; c < classes_num_; ++c) {
+            int n = detections_per_class[c];
+            for (int i = 0; i < n; ++i) {
+                int idx = indices[indices_offset + i];
+                float score = refined_scores[refined_score_idx({c, idx})];
+                conf_index_class_map.push_back(std::make_pair(score, std::make_pair(c, idx)));
+            }
+            indices_offset += n;
+        }
+
+        assert(max_detections_per_image_ > 0);
+        if (total_detections_num > max_detections_per_image_) {
+            std::partial_sort(conf_index_class_map.begin(),
+                              conf_index_class_map.begin() + max_detections_per_image_,
+                              conf_index_class_map.end(),
+                              SortScorePairDescend<std::pair<int, int>>);
+            conf_index_class_map.resize(max_detections_per_image_);
+            total_detections_num = max_detections_per_image_;
+        }
+
+        // Fill outputs.
+        memset(output_boxes, 0, max_detections_per_image_ * 4 * sizeof(float));
+        memset(output_scores, 0, max_detections_per_image_ * sizeof(float));
+        memset(output_classes, 0, max_detections_per_image_ * sizeof(float));
+
+        int i = 0;
+        for (const auto & detection : conf_index_class_map) {
+            float score = detection.first;
+            int cls = detection.second.first;
+            int idx = detection.second.second;
+            output_boxes[4 * i + 0] = refined_boxes[refined_box_idx({cls, idx, 0})];
+            output_boxes[4 * i + 1] = refined_boxes[refined_box_idx({cls, idx, 1})];
+            output_boxes[4 * i + 2] = refined_boxes[refined_box_idx({cls, idx, 2})];
+            output_boxes[4 * i + 3] = refined_boxes[refined_box_idx({cls, idx, 3})];
+            output_scores[i] = score;
+            output_classes[i] = static_cast<float>(cls);
+            ++i;
+        }
+
+        return OK;
+    }
+
+private:
+    float score_threshold_;
+    float nms_threshold_;
+    float max_delta_log_wh_;
+    int classes_num_;
+    int max_detections_per_class_;
+    int max_detections_per_image_;
+    bool class_agnostic_box_regression_;
+    std::vector<float> deltas_weights_;
+};
+
+
+
+REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronDetectionOutputImpl>, ExperimentalDetectronDetectionOutput);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_expand.cpp b/inference-engine/src/extension/ext_expand.cpp
new file mode 100644
index 000000000..297f586ca
--- /dev/null
+++ b/inference-engine/src/extension/ext_expand.cpp
@@ -0,0 +1,192 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class ExpandImpl: public ExtLayerBase {
+public:
+    explicit ExpandImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.empty() || layer->outData.empty())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+
+            if (layer->insData.size() != 2)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!";
+
+            SizeVector shape_dims = layer->insData[EXPAND_SHAPE].lock()->getTensorDesc().getDims();
+            if (shape_dims.size() > 1)
+                THROW_IE_EXCEPTION << layer->name << " Shape vector should be 1 dimension";
+
+            if (layer->insData[EXPAND_SHAPE].lock()->getTensorDesc().getPrecision() != Precision::I32)
+                THROW_IE_EXCEPTION << layer->name << " Shape vector should be I32!";
+
+            if (!(layer->insData[EXPAND_INPUT].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
+                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
+                !(layer->insData[EXPAND_INPUT].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
+                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
+                THROW_IE_EXCEPTION << layer->name <<
+                    " Input and output tensors should have same precision and only FP32 and I32 are supported!";
+            }
+
+            src_dims = layer->insData[EXPAND_INPUT].lock()->getTensorDesc().getDims();
+            srcStrides = layer->insData[EXPAND_INPUT].lock()->getTensorDesc().getBlockingDesc().getStrides();
+            addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
+                             { DataConfigurator(ConfLayout::PLN) });
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        int32_t* shape_dims = inputs[EXPAND_SHAPE]->cbuffer().as<int32_t *>() +
+                              inputs[EXPAND_SHAPE]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        size_t shape_size = (inputs[EXPAND_SHAPE]->getTensorDesc().getDims())[0];
+        SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
+
+        if (dst_dims.size() != shape_size) {
+            if (resp) {
+                std::string errorMsg = "Output tensor dimension mismatch";
+                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+            }
+            return PARAMETER_MISMATCH;
+        }
+
+        if (src_dims.size() > dst_dims.size()) {
+            if (resp) {
+                std::string errorMsg = "Output tensor dimension is smaller then input tensor dimension";
+                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+            }
+            return PARAMETER_MISMATCH;
+        }
+
+        size_t i;
+        for (i = 0; i < dst_dims.size(); i++) {
+            if (static_cast<int>(dst_dims[i]) != shape_dims[i]) {
+                if (resp) {
+                    std::string errorMsg = "Output tensor dimension size mismatch";
+                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                }
+                return PARAMETER_MISMATCH;
+            }
+        }
+
+        size_t prefix_size = dst_dims.size() - src_dims.size();
+        for (i = 0; i < src_dims.size(); i++) {
+            if (src_dims[i] != 1 &&
+                    static_cast<int>(src_dims[i]) != shape_dims[i + prefix_size]) {
+                if (resp) {
+                    std::string errorMsg = "In/Output corresponding dimension must have the same value, or Input dimension is equal to 1";
+                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                }
+                return PARAMETER_MISMATCH;
+            }
+        }
+
+        InferenceEngine::SizeVector dstStrides = outputs[0]->getTensorDesc().getBlockingDesc().getStrides();
+        InferenceEngine::SizeVector src_aligned(dst_dims.size());
+        InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
+        for (i = 0; i < dst_dims.size(); i++) {
+            if (i < prefix_size) {
+                src_aligned[i] = 1;
+                srcStrides_aligned[i] = srcStrides[0];
+            } else {
+                src_aligned[i] = src_dims[i - prefix_size];
+                srcStrides_aligned[i] = srcStrides[i - prefix_size];
+            }
+        }
+
+        size_t work_amount_dst = dstStrides[0] * dst_dims[0];
+
+        switch (outputs[0]->precision()) {
+        case Precision::FP32: {
+            const float *src_data = inputs[EXPAND_INPUT]->cbuffer().as<const float *>() +
+                                    inputs[EXPAND_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            float* dst_data = outputs[0]->cbuffer().as<float *>() +
+                              outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+            parallel_nt(0, [&](const int ithr, const int nthr) {
+                size_t i, src_idx, start = 0, end = 0;
+                SizeVector counters(dst_dims.size(), 0);
+                splitter(work_amount_dst, nthr, ithr, start, end);
+                for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
+                    counters[j] = i % dst_dims[j];
+                    i /= dst_dims[j];
+                }
+                for (size_t iwork = start; iwork < end; ++iwork) {
+                    for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
+                        src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
+
+                    dst_data[iwork] = src_data[src_idx];
+
+                    for (int j = dst_dims.size() - 1; j >= 0; j--) {
+                        counters[j] = (counters[j] + 1) % dst_dims[j];
+                        if (counters[j] != 0) break;
+                    }
+                }
+            });
+        }
+        break;
+        case Precision::I32: {
+            const int32_t *src_data = inputs[EXPAND_INPUT]->cbuffer().as<const int32_t *>() +
+                                      inputs[EXPAND_INPUT]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            int32_t* dst_data = outputs[0]->cbuffer().as<int32_t *>() +
+                                outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+            parallel_nt(0, [&](const int ithr, const int nthr) {
+                size_t i, src_idx, start = 0, end = 0;
+                SizeVector counters(dst_dims.size(), 0);
+                splitter(work_amount_dst, nthr, ithr, start, end);
+                for (int j = dst_dims.size() - 1, i = start; j >= 0; j--) {
+                    counters[j] = i % dst_dims[j];
+                    i /= dst_dims[j];
+                }
+                for (size_t iwork = start; iwork < end; ++iwork) {
+                    for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
+                        src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
+
+                    dst_data[iwork] = src_data[src_idx];
+
+                    for (int j = dst_dims.size() - 1; j >= 0; j--) {
+                        counters[j] = (counters[j] + 1) % dst_dims[j];
+                        if (counters[j] != 0) break;
+                    }
+                }
+            });
+        }
+                             break;
+        default:
+            if (resp) {
+                std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!";
+                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+            }
+            return GENERAL_ERROR;
+        }
+
+        return OK;
+    }
+
+private:
+    const size_t EXPAND_INPUT = 0;
+    const size_t EXPAND_SHAPE = 1;
+
+    SizeVector src_dims;
+    SizeVector srcStrides;
+};
+
+REG_FACTORY_FOR(ImplFactory<ExpandImpl>, Expand);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_fill.cpp b/inference-engine/src/extension/ext_fill.cpp
new file mode 100644
index 000000000..aea45e957
--- /dev/null
+++ b/inference-engine/src/extension/ext_fill.cpp
@@ -0,0 +1,128 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class FillImpl: public ExtLayerBase {
+public:
+    explicit FillImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.empty() || layer->outData.empty())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+
+            if (layer->insData.size() != 2)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!";
+
+            SizeVector fill_dims = layer->insData[FILL_DIMS].lock()->getTensorDesc().getDims();
+            if (fill_dims.size() > 1)
+                THROW_IE_EXCEPTION << layer->name << " Fill dimensions vector should be 1 dimension";
+
+            if (layer->insData[FILL_DIMS].lock()->getTensorDesc().getPrecision() != Precision::I32)
+                THROW_IE_EXCEPTION << layer->name << " Fill dimensions vector should be I32!";
+
+            SizeVector value_dims = layer->insData[FILL_VALUE].lock()->getTensorDesc().getDims();
+            if (value_dims.size() > 1)
+                THROW_IE_EXCEPTION << layer->name << " Value scalar should have 1 dimension";
+
+            if (!(layer->insData[FILL_VALUE].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
+                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
+                !(layer->insData[FILL_VALUE].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
+                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
+                THROW_IE_EXCEPTION << layer->name <<
+                    " 'Value' input scalars and output tensor should have same precision and only FP32 and I32 are supported!";
+            }
+
+            addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
+                             { DataConfigurator(ConfLayout::PLN) });
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        int32_t* fill_dims = inputs[FILL_DIMS]->cbuffer().as<int32_t *>() +
+                             inputs[FILL_DIMS]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        size_t fill_size = inputs[FILL_DIMS]->getTensorDesc().getDims()[0];
+        SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
+
+        if (dst_dims.size() != fill_size) {
+            if (resp) {
+                std::string errorMsg = "Output tensor dimension mismatch";
+                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+            }
+            return PARAMETER_MISMATCH;
+        }
+
+        size_t work_amount_dst = 1;
+        for (size_t i = 0; i < dst_dims.size(); i++) {
+            work_amount_dst *= fill_dims[i];
+            if (static_cast<int>(dst_dims[i]) != fill_dims[i]) {
+                if (resp) {
+                    std::string errorMsg = "Output tensor dimension size mismatch";
+                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                }
+                return PARAMETER_MISMATCH;
+            }
+        }
+
+        switch (outputs[0]->precision()) {
+        case Precision::FP32: {
+            float* dst_data = outputs[0]->cbuffer().as<float *>() +
+                              outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            float value = (inputs[FILL_VALUE]->cbuffer().as<float *>() +
+                           inputs[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
+
+            parallel_nt(0, [&](const int ithr, const int nthr) {
+                size_t start = 0, end = 0;
+                splitter(work_amount_dst, nthr, ithr, start, end);
+                std::fill_n(dst_data + start, end - start, value);
+            });
+        }
+        break;
+        case Precision::I32: {
+            int32_t* dst_data = outputs[0]->cbuffer().as<int32_t *>() +
+                                outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            int32_t value = (inputs[FILL_VALUE]->cbuffer().as<int32_t *>() +
+                             inputs[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
+
+            parallel_nt(0, [&](const int ithr, const int nthr) {
+                size_t start = 0, end = 0;
+                splitter(work_amount_dst, nthr, ithr, start, end);
+                std::fill_n(dst_data + start, end - start, value);
+            });
+            return OK;
+        }
+        break;
+        default:
+            if (resp) {
+                std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!";
+                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+            }
+            return GENERAL_ERROR;
+        }
+
+        return OK;
+    }
+
+private:
+    const size_t FILL_DIMS = 0;
+    const size_t FILL_VALUE = 1;
+};
+
+REG_FACTORY_FOR(ImplFactory<FillImpl>, Fill);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_gather.cpp b/inference-engine/src/extension/ext_gather.cpp
index 27ae07705..03527cec6 100644
--- a/inference-engine/src/extension/ext_gather.cpp
+++ b/inference-engine/src/extension/ext_gather.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -18,88 +18,19 @@ namespace InferenceEngine {
 namespace Extensions {
 namespace Cpu {
 
-inline void clipping(int *idx, const int min, const int max) {
-    (*idx) = ((*idx) > min) ? (*idx) : min;
-    (*idx) = ((*idx) < max) ? (*idx) : (max - 1);
-    return;
-}
-
-class GatherImpl: public ILayerExecImpl {
+class GatherImpl: public ExtLayerBase {
 public:
-    StatusCode init(LayerConfig& config, ResponseDesc *resp) noexcept override {
-        for (auto& input : config.inConfs) {
-            for (auto& offset : input.desc.getBlockingDesc().getOffsetPaddingToData()) {
-                if (offset) {
-                    return GENERAL_ERROR;
-                }
-            }
-        }
-        for (auto& output : config.outConfs) {
-            for (auto& offset : output.desc.getBlockingDesc().getOffsetPaddingToData()) {
-                if (offset) {
-                    return GENERAL_ERROR;
-                }
-            }
-        }
-
-        //  Check for holes in tensors
-        SizeVector dictionary_dims = config.inConfs[GATHER_DICTIONARY].desc.getDims();
-        SizeVector indexes_dims = config.inConfs[GATHER_INDEXES].desc.getDims();
-        SizeVector out_dims = config.outConfs[0].desc.getDims();
-        size_t idx_size = 1;
-        for (auto dims : indexes_dims)
-            idx_size *= dims;
-
-        size_t dct_size = 1;
-        for (auto dims : dictionary_dims)
-            dct_size *= dims;
-
-        size_t out_size = 1;
-        for (auto dims : out_dims)
-            out_size *= dims;
-
-        size_t dctSV = config.inConfs[GATHER_DICTIONARY].desc.getBlockingDesc().getStrides()[0];
-        size_t dctDV = config.inConfs[GATHER_DICTIONARY].desc.getBlockingDesc().getBlockDims()[0];
-        size_t idxSV = config.inConfs[GATHER_INDEXES].desc.getBlockingDesc().getStrides()[0];
-        size_t idxDV = config.inConfs[GATHER_INDEXES].desc.getBlockingDesc().getBlockDims()[0];
-        size_t outSV = config.outConfs[0].desc.getBlockingDesc().getStrides()[0];
-        size_t outDV = config.outConfs[0].desc.getBlockingDesc().getBlockDims()[0];
-        if (outSV * outDV == out_size && idxSV * idxDV == idx_size && dctSV * dctDV == dct_size)
-            withHoles = NONE;
-        else if (outSV * outDV != out_size && idxSV * idxDV == idx_size && dctSV * dctDV == dct_size)
-            withHoles = OUTPUT;
-
-        return OK;
-    };
-
-    StatusCode getSupportedConfigurations(std::vector<LayerConfig>& conf, ResponseDesc *resp) noexcept override {
-        if (!errorMsg.empty()) {
-            if (resp) {
-                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
-            }
-            return GENERAL_ERROR;
-        }
-        conf = confs;
-        return OK;
-    };
-
     explicit GatherImpl(const CNNLayer* layer) {
         try {
             if (layer->insData.size() != 2 || layer->outData.empty())
-                THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
 
             Precision inIdxPrecision = layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getPrecision();
-            if (inIdxPrecision != Precision::FP32 &&
-                inIdxPrecision != Precision::I32 &&
-                inIdxPrecision != Precision::U16 &&
-                inIdxPrecision != Precision::I16 &&
-                inIdxPrecision != Precision::U8 &&
-                inIdxPrecision != Precision::I8)
-                THROW_IE_EXCEPTION << "Incorrect input precision. Only FP32|I32|U16|I16|U8|I8 are supported!";
+            if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only FP32 or I32 are supported!";
 
             //  Remove redundant dimensions
             const SizeVector& dictionary_dims = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getDims();
-            size_t actualAxis = 0;
             SizeVector dims_actual;
             for (size_t i = 0; i < dictionary_dims.size(); i++) {
                 if (dictionary_dims[i] > 1) {
@@ -110,83 +41,42 @@ public:
             }
 
             if (dims_actual.size() == 0)
-                THROW_IE_EXCEPTION << "Incorrect input parameters dimension!";
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimension!";
 
             axis = static_cast<int>(layer->GetParamAsInt("axis"));
             // Dictionary must be at least rank axis + 1
-            if (axis > 0 && (dims_actual.size() - axis) < 1)
-                THROW_IE_EXCEPTION << "Incorrect input parameters dimensions and axis number!";
+            if (axis > 0 && static_cast<int>(dims_actual.size()) < (1 + axis))
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimensions and axis number!";
             else if (axis < 0 && (static_cast<int>(dims_actual.size()) + axis) < 0)
-                THROW_IE_EXCEPTION << "Incorrect input parameters dimensions and axis number!";
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimensions and axis number!";
 
             if (axis < 0)
                 axis += dims_actual.size();
 
             //  Find number of dictionaries, index range and data length
-            for (size_t i = 0; i < axis; i++)
+            for (int i = 0; i < axis; i++)
                 numDictionaries *= dims_actual[i];
             indexRange = dims_actual[axis];
             for (size_t i = axis + 1; i < dims_actual.size(); i++)
                 dataLength *= dims_actual[i];
 
             if (dataLength == 0)
-                THROW_IE_EXCEPTION << "Incorrect input parameters dimension!";
-
-            LayerConfig config;
-            DataConfig dataConfigIdx, dataConfigDct;
-            const SizeVector& indexes_dims = layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getDims();
-            dataConfigDct.desc = TensorDesc(InferenceEngine::Precision(InferenceEngine::Precision::FP32), dictionary_dims, InferenceEngine::Layout::ANY);
-            dataConfigIdx.desc = TensorDesc(inIdxPrecision, indexes_dims, InferenceEngine::Layout::ANY);
-            if (GATHER_DICTIONARY == 0) {
-                config.inConfs.push_back(dataConfigDct);
-                config.inConfs.push_back(dataConfigIdx);
-            } else {
-                config.inConfs.push_back(dataConfigIdx);
-                config.inConfs.push_back(dataConfigDct);
-            }
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimension!";
 
-            DataConfig dataConfigOut;
-            const SizeVector& out_dims = layer->outData[0]->getTensorDesc().getDims();
-            SizeVector blocks = out_dims;
-            SizeVector order(blocks.size());
-            SizeVector dimOffsets(blocks.size());
-            SizeVector strides(blocks.size());
-            size_t offset(std::numeric_limits<size_t>::max());
-            for (size_t i = 0; i < order.size(); i++) {
-                strides[i] = std::numeric_limits<size_t>::max();
-                dimOffsets[i] = 0;
-                order[i] = i;
-            }
-            dataConfigOut.desc = TensorDesc(InferenceEngine::Precision(InferenceEngine::Precision::FP32), out_dims,
-                                                                      { blocks, order, offset, dimOffsets, strides });
-            config.outConfs.push_back(dataConfigOut);
-            config.dynBatchSupport = false;
-            confs.push_back(config);
+            addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
+                      { DataConfigurator(ConfLayout::PLN) });
         } catch (InferenceEngine::details::InferenceEngineException &ex) {
             errorMsg = ex.what();
         }
     }
 
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
         switch (inputs[GATHER_INDEXES]->precision()) {
             case Precision::FP32:
-                gather(inputs[GATHER_INDEXES]->cbuffer().as<const float *>(), inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0], withHoles);
+                gather(inputs[GATHER_INDEXES]->cbuffer().as<const float *>(), inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
                 break;
             case Precision::I32:
-                gather(inputs[GATHER_INDEXES]->cbuffer().as<const int32_t *>(), inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0], withHoles);
-                break;
-            case Precision::U16:
-                gather(inputs[GATHER_INDEXES]->cbuffer().as<const uint16_t *>(), inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0], withHoles);
-                break;
-            case Precision::I16:
-                gather(inputs[GATHER_INDEXES]->cbuffer().as<const int16_t *>(), inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0], withHoles);
-                break;
-            case Precision::U8:
-                gather(inputs[GATHER_INDEXES]->cbuffer().as<const uint8_t *>(), inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0], withHoles);
-                break;
-            case Precision::I8:
-                gather(inputs[GATHER_INDEXES]->cbuffer().as<const int8_t *>(), inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0], withHoles);
+                gather(inputs[GATHER_INDEXES]->cbuffer().as<const int32_t *>(), inputs[GATHER_INDEXES], inputs[GATHER_DICTIONARY], outputs[0]);
                 break;
             default:
                 return GENERAL_ERROR;
@@ -195,20 +85,9 @@ public:
         return OK;
     }
 
-protected:
-    enum class ConfLayout { ANY, PLN, BLK8, BLK16 };
-    std::string errorMsg;
-    std::vector<LayerConfig> confs;
-
 private:
-    enum HolesMode {
-        NONE = 0,
-        OUTPUT = 1,
-        ALL = 2
-    };
-
     template <typename data_t>
-    void gather(data_t *src_dataIdx, Blob::Ptr indexes, Blob::Ptr dictionary, Blob::Ptr output, bool withHoles);
+    void gather(data_t *src_dataIdx, Blob::Ptr indexes, Blob::Ptr dictionary, Blob::Ptr output);
 
     int axis = 0;
     size_t numDictionaries = 1;
@@ -216,82 +95,46 @@ private:
     size_t dataLength = 1;
     const size_t GATHER_DICTIONARY = 0;
     const size_t GATHER_INDEXES = 1;
-    HolesMode withHoles = ALL;
 };
 
 template <typename data_t>
-void GatherImpl::gather(data_t *src_dataIdx, Blob::Ptr indexes, Blob::Ptr dictionary, Blob::Ptr output, bool withHoles) {
+void GatherImpl::gather(data_t *src_dataIdx, Blob::Ptr indexes, Blob::Ptr dictionary, Blob::Ptr output) {
     size_t src_dataIdxSize = indexes->size();
-    size_t dataSize = sizeof(float) * dataLength;
-
-    if (withHoles == GatherImpl::NONE) {  //  No holes in tensors
-        const float *src_dataDict = dictionary->cbuffer().as<const float *>() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        float* dst_data = output->cbuffer().as<float *>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        src_dataIdx += indexes->getTensorDesc().getBlockingDesc().getOffsetPadding();
+    const float *src_dataDict = dictionary->cbuffer().as<const float *>() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding();
+    float* dst_data = output->cbuffer().as<float *>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+    src_dataIdx += indexes->getTensorDesc().getBlockingDesc().getOffsetPadding();
 
-        if (axis == 0) {
-            parallel_for(src_dataIdxSize, [&](size_t i) {
-                int idx = static_cast<int>(src_dataIdx[i]);
-
-                //  Index clipping
-                clipping(&idx, 0, indexRange);
+    if (axis == 0) {
+        parallel_for(src_dataIdxSize, [&](size_t i) {
+            unsigned int idx = static_cast<unsigned int>(src_dataIdx[i]);
 
+            //  Index clipping
+            if (idx < indexRange) {
                 //  Copying data to destination from Dictionary
-                simple_copy(&dst_data[dataLength * i],
+                simple_copy(&dst_data[i * dataLength],
                             output->byteSize() - (dataLength * i),
                             &src_dataDict[dataLength * idx],
-                            dataSize);
-            });
-        } else {
-            parallel_for(src_dataIdxSize, [&](size_t i) {
-                int idx = static_cast<int>(src_dataIdx[i]);
-
-                //  Index clipping
-                clipping(&idx, 0, indexRange);
+                            sizeof(float) * dataLength);
+            } else {
+                std::fill_n(&dst_data[i * dataLength], dataLength, 0.f);
+            }
+        });
+    } else {
+        parallel_for(src_dataIdxSize, [&](size_t i) {
+            unsigned int idx = static_cast<unsigned int>(src_dataIdx[i]);
 
+            //  Index clipping
+            if (idx < indexRange) {
                 //  Copying data to destination from Dictionary
                 for (size_t j = 0; j < numDictionaries; j++) {
                     simple_copy(&dst_data[dataLength * (i + j * src_dataIdxSize)],
                                 output->byteSize() - (dataLength * (i + j * src_dataIdxSize)),
                                 &src_dataDict[dataLength * (idx + j * indexRange)],
-                                dataSize);
+                                sizeof(float) * dataLength);
                 }
-            });
-        }
-    } else if (withHoles == GatherImpl::OUTPUT) {  //  If only output tensor have holes
-        const float *src_dataDict = dictionary->cbuffer().as<const float *>() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding();
-        float* dst_data = output->cbuffer().as<float *>();
-        src_dataIdx += indexes->getTensorDesc().getBlockingDesc().getOffsetPadding();
-
-        parallel_for(src_dataIdxSize, [&](size_t i) {
-            int idx = static_cast<int>(src_dataIdx[i]);
-
-            //  Index clipping
-            clipping(&idx, 0, indexRange);
-
-            //  Copying data to destination from Dictionary
-            for (size_t j = 0; j < numDictionaries; j++) {
-                for (size_t k = 0; k < dataLength; k++) {
-                    dst_data[output->getTensorDesc().offset(k + dataLength * (i + j * src_dataIdxSize))] =
-                        src_dataDict[k + dataLength * (idx + j * indexRange)];
-                }
-            }
-        });
-    } else {  //  If input and oupput tensors have holes
-        const float *src_dataDict = dictionary->cbuffer().as<const float *>();
-        float* dst_data = output->cbuffer().as<float *>();
-
-        parallel_for(src_dataIdxSize, [&](size_t i) {
-            int idx = static_cast<int>(src_dataIdx[indexes->getTensorDesc().offset(i)]);
-
-            //  Index clipping
-            clipping(&idx, 0, indexRange);
-
-            //  Copying data to destination from Dictionary
-            for (size_t j = 0; j < numDictionaries; j++) {
-                for (size_t k = 0; k < dataLength; k++) {
-                    dst_data[output->getTensorDesc().offset(k + dataLength * (i + j * src_dataIdxSize))] =
-                        src_dataDict[dictionary->getTensorDesc().offset(k + dataLength * (idx + j * indexRange))];
+            } else {
+                for (size_t j = 0; j < numDictionaries; j++) {
+                    std::fill_n(&dst_data[dataLength * (i + j * src_dataIdxSize)], dataLength, 0.f);
                 }
             }
         });
diff --git a/inference-engine/src/extension/ext_grn.cpp b/inference-engine/src/extension/ext_grn.cpp
index 4810d9d08..87869f73a 100644
--- a/inference-engine/src/extension/ext_grn.cpp
+++ b/inference-engine/src/extension/ext_grn.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -48,7 +48,7 @@ public:
             }
             variance = std::pow(variance + bias, 0.5f);
             for (int c = 0; c < C; c++) {
-                dst_data[b*C*H*W + c*H*W + h*W + w] = src_data[b*C*H*W + c*H*W + h*W + w] / variance;
+                dst_data[b*C*H*W + c*H*W + h*W + w] = src_data[b*C*H*W + c*H*W + h*W + w] / static_cast<float>(variance);
             }
         });
         return OK;
diff --git a/inference-engine/src/extension/ext_interp.cpp b/inference-engine/src/extension/ext_interp.cpp
index 64ff20d88..3b3b684c6 100644
--- a/inference-engine/src/extension/ext_interp.cpp
+++ b/inference-engine/src/extension/ext_interp.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/ext_list.cpp b/inference-engine/src/extension/ext_list.cpp
index 6aa139dc2..89058be65 100644
--- a/inference-engine/src/extension/ext_list.cpp
+++ b/inference-engine/src/extension/ext_list.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -31,8 +31,8 @@ void CpuExtensions::AddShapeInferImpl(std::string name, const IShapeInferImpl::P
 
 void CpuExtensions::GetVersion(const Version*& versionInfo) const noexcept {
     static Version ExtensionDescription = {
-            { 1, 0 },    // extension API version
-            "1.0",
+            { 1, 6 },    // extension API version
+            "1.6",
             "ie-cpu-ext"  // extension description message
     };
 
diff --git a/inference-engine/src/extension/ext_list.hpp b/inference-engine/src/extension/ext_list.hpp
index 6e83e7ea3..08f6235b4 100644
--- a/inference-engine/src/extension/ext_list.hpp
+++ b/inference-engine/src/extension/ext_list.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/ext_mvn.cpp b/inference-engine/src/extension/ext_mvn.cpp
index 27f8b9ff9..7c09e5342 100644
--- a/inference-engine/src/extension/ext_mvn.cpp
+++ b/inference-engine/src/extension/ext_mvn.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -31,8 +31,8 @@ public:
             if (layer->insData.size() != 1 || layer->outData.empty())
                 THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
 
-            across_channels = static_cast<bool>(layer->GetParamAsInt("across_channels"));
-            normalize_variance = static_cast<bool>(layer->GetParamAsInt("normalize_variance"));
+            across_channels = layer->GetParamAsBool("across_channels", false);
+            normalize_variance = layer->GetParamAsBool("normalize_variance", false);
             eps = layer->GetParamAsFloat("eps");
 
 #if defined(HAVE_AVX512F)
@@ -87,7 +87,7 @@ void MVNImpl::mvn_pln(const float* src_data, float* dst_data, const SizeVector&
         size_t cb = b * C3;
         if (across_channels) {
             double mean = 0.0;
-            mean = parallel_sum(C, mean, [&](int c)->double {
+            mean = parallel_sum(C, mean, [&](size_t c)->double {
                 double mean_internal = 0.0;
                 size_t cc = cb + c * C2;
                 for (size_t d = 0lu; d < D; d++) {
@@ -111,7 +111,7 @@ void MVNImpl::mvn_pln(const float* src_data, float* dst_data, const SizeVector&
                         size_t ch = cd + h * W;
                         for (size_t w = 0lu; w < W; w++) {
                             size_t cw = ch + w;
-                            dst_data[cw] = src_data[cw] - mean;
+                            dst_data[cw] = src_data[cw] - static_cast<float>(mean);
                         }
                     }
                 }
@@ -138,7 +138,7 @@ void MVNImpl::mvn_pln(const float* src_data, float* dst_data, const SizeVector&
                         size_t ch = cd + h * W;
                         for (size_t w = 0lu; w < W; w++) {
                             size_t cw = ch + w;
-                            dst_data[cw] = src_data[cw] - mean;
+                            dst_data[cw] = src_data[cw] - static_cast<float>(mean);
                         }
                     }
                 }
@@ -152,7 +152,7 @@ void MVNImpl::mvn_pln(const float* src_data, float* dst_data, const SizeVector&
             size_t cb = b * C3;
             if (across_channels) {
                 double variance = 0.0;
-                variance = parallel_sum(C, variance, [&](int c)->double {
+                variance = parallel_sum(C, variance, [&](size_t c)->double {
                     double variance_internal = 0.0;
                     size_t cc = cb + c * C2;
                     for (size_t d = 0lu; d < D; d++) {
@@ -177,7 +177,7 @@ void MVNImpl::mvn_pln(const float* src_data, float* dst_data, const SizeVector&
                         for (size_t h = 0lu; h < H; h++) {
                             size_t ch = cd + h * W;
                             for (size_t w = 0lu; w < W; w++) {
-                                dst_data[ch + w] /= variance;
+                                dst_data[ch + w] /= static_cast<float>(variance);
                             }
                         }
                     }
@@ -204,7 +204,7 @@ void MVNImpl::mvn_pln(const float* src_data, float* dst_data, const SizeVector&
                         for (size_t h = 0lu; h < H; h++) {
                             size_t ch = cd + h * W;
                             for (size_t w = 0lu; w < W; w++) {
-                                dst_data[ch + w] /= variance;
+                                dst_data[ch + w] /= static_cast<float>(variance);
                             }
                         }
                     }
@@ -233,13 +233,12 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
     size_t H = (dims_size > 3) ? dims[dims_size - 2] : 1lu;
     size_t W = (dims_size > 2) ? dims[dims_size - 1] : 1lu;
 
-    int CB = div_up(C, static_cast<int>(blk_size));
+    int CB = div_up(static_cast<int>(C), static_cast<int>(blk_size));
 
     size_t C0 = W * blk_size;
     size_t C1 = C0 * H;
     size_t C2 = C1 * D;
     size_t C3 = C2 * CB;
-    size_t C4 = D * H * W;
     size_t C5 = C * D * H * W;
 
     if (normalize_variance) {
@@ -265,9 +264,8 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
                 double variance = 0.0;
                 variance = parallel_sum3d(CB, D, H, variance, [&](size_t cb, size_t d, size_t h)->double {
                     size_t ccbd = ccb + cb * C2 + d * C1 + h * C0;
-                    size_t min_cb = std::min(blk_size, C - cb * blk_size);
                     double variance_internal = 0.0;
-                    for (size_t w = 0lu; w < W; w++) {
+                    for (size_t w = 0lu, min_cb = std::min(blk_size, C - cb * blk_size); w < W; w++) {
                         size_t cw = ccbd + w * blk_size;
                         for (size_t c = 0lu; c < min_cb; c++) {
                             variance_internal += std::pow(static_cast<double>(src_data[cw + c]) - mean, 2);
@@ -282,19 +280,17 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
 
                 parallel_for3d(CB, D, H, [&](size_t cb, size_t d, size_t h) {
                     size_t ccbd = ccb + cb * C2 + d * C1 + h * C0;
-                    size_t min_cb = std::min(blk_size, C - cb * blk_size);
-                    for (size_t w = 0lu; w < W; w++) {
+                    for (size_t w = 0lu, min_cb = std::min(blk_size, C - cb * blk_size); w < W; w++) {
                         size_t cw = ccbd + w * blk_size;
                         for (size_t c = 0lu; c < min_cb; c++) {
                             size_t src_offset = cw + c;
 
-                            dst_data[src_offset] = (static_cast<double>(src_data[src_offset]) - mean) / variance;
+                            dst_data[src_offset] = static_cast<float>((static_cast<double>(src_data[src_offset]) - mean) / variance);
                         }
                     }
                 });
             } else {
                 parallel_for(CB, [&](size_t cb) {
-                    size_t min_cb = std::min(blk_size, C - cb * blk_size);
                     size_t src_off = ccb + cb * C2;
 #if defined(HAVE_AVX2) || defined(HAVE_AVX512F)
                     vec_type vmean = _mm_uni_setzero_ps();
@@ -344,6 +340,7 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
                         }
                     }
 #else
+                    size_t min_cb = std::min(blk_size, C - cb * blk_size);
                     for (size_t c = 0; c < min_cb; c++) {
                         size_t cc = src_off + c;
 
@@ -358,6 +355,7 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
                             }
                         }
 
+                        size_t C4 = D * H * W;
                         mean /= static_cast<double>(C4);
 
                         double variance = 0.0;
@@ -382,7 +380,7 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
                                 size_t ch = cd + h * C0;
                                 for (size_t w = 0lu; w < W; w++) {
                                     size_t index = ch + w * blk_size;
-                                    dst_data[index] = (src_data[index] - mean) / variance;
+                                    dst_data[index] = (src_data[index] - static_cast<float>(mean)) / static_cast<float>(variance);
                                 }
                             }
                         }
@@ -398,9 +396,8 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
                 double mean = 0.0;
                 mean = parallel_sum3d(CB, D, H, mean, [&](size_t cb, size_t d, size_t h)->double {
                     size_t ccbd = ccb + cb * C2 + d * C1 + h * C0;
-                    size_t min_cb = std::min(blk_size, C - cb * blk_size);
                     double mean_internal = 0.f;
-                    for (size_t w = 0lu; w < W; w++) {
+                    for (size_t w = 0lu, min_cb = std::min(blk_size, C - cb * blk_size); w < W; w++) {
                         size_t cw = ccbd + w * blk_size;
                         for (size_t c = 0lu; c < min_cb; c++) {
                             mean_internal += src_data[cw + c];
@@ -413,19 +410,17 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
 
                 parallel_for3d(CB, D, H, [&](size_t cb, size_t d, size_t h) {
                     size_t ccbd = ccb + cb * C2 + d * C1 + h * C0;
-                    size_t min_cb = std::min(blk_size, C - cb * blk_size);
-                    for (size_t w = 0lu; w < W; w++) {
+                    for (size_t w = 0lu, min_cb = std::min(blk_size, C - cb * blk_size); w < W; w++) {
                         size_t cw = ccbd + w * blk_size;
                         for (size_t c = 0lu; c < min_cb; c++) {
                             size_t src_offset = cw + c;
 
-                            dst_data[src_offset] = src_data[src_offset] - mean;
+                            dst_data[src_offset] = src_data[src_offset] - static_cast<float>(mean);
                         }
                     }
                 });
             } else {
                 parallel_for(CB, [&](size_t cb) {
-                    size_t min_cb = std::min(blk_size, C - cb * blk_size);
                     size_t src_off = ccb + cb * C2;
 #if defined(HAVE_AVX2) || defined(HAVE_AVX512F)
                     vec_type vmean = _mm_uni_setzero_ps();
@@ -455,6 +450,7 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
                         }
                     }
 #else
+                    size_t min_cb = std::min(blk_size, C - cb * blk_size);
                     for (size_t c = 0lu; c < min_cb; c++) {
                         size_t cc = src_off + c;
                         double mean = 0.0;
@@ -468,6 +464,7 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
                             }
                         }
 
+                        size_t C4 = D * H * W;
                         mean /= static_cast<double>(C4);
 
                         for (size_t d = 0lu; d < D; d++) {
@@ -476,7 +473,7 @@ void MVNImpl::mvn_blk(const float* src_data, float* dst_data, const SizeVector&
                                 size_t ch = cd + h * C0;
                                 for (size_t w = 0lu; w < W; w++) {
                                     size_t index = ch + w * blk_size;
-                                    dst_data[index] = src_data[index] - mean;
+                                    dst_data[index] = src_data[index] - static_cast<float>(mean);
                                 }
                             }
                         }
diff --git a/inference-engine/src/extension/ext_normalize.cpp b/inference-engine/src/extension/ext_normalize.cpp
index 0c77e3eed..448d0cb1f 100644
--- a/inference-engine/src/extension/ext_normalize.cpp
+++ b/inference-engine/src/extension/ext_normalize.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -31,8 +31,8 @@ public:
             weights = std::dynamic_pointer_cast<TBlob<float>>(layer->blobs.at("weights"));
             if (!weights)
                 THROW_IE_EXCEPTION << layer->name << " weights is empty!";
-            across_spatial = static_cast<bool>(layer->GetParamAsInt("across_spatial"));
-            channel_shared = static_cast<bool>(layer->GetParamAsInt("channel_shared"));
+            across_spatial = layer->GetParamAsBool("across_spatial", false);
+            channel_shared = layer->GetParamAsBool("channel_shared", false);
             eps = layer->GetParamAsFloat("eps");
 
             addConfig(layer, {{ConfLayout::PLN, false, 0}}, {{ConfLayout::PLN, false, 0}}, true);
@@ -83,9 +83,6 @@ public:
         const int H = static_cast<int>(dims.size() > 2 ? dims[2] : 1);
         const int W = static_cast<int>(dims.size() > 3 ? dims[3] : 1);
 
-        const int HW = H*W;
-        const int CHW = C*HW;
-
         for (int n = 0; n < N; n++) {
             const float* psrc = src + n*C*H*W;
             float* pdst = dst + n*C*H*W;
@@ -220,7 +217,7 @@ private:
 
     bool across_spatial = true;
     bool channel_shared = true;
-    float eps = 1e-10;
+    float eps = 1e-10f;
 };
 
 REG_FACTORY_FOR(ImplFactory<NormalizeImpl>, Normalize);
diff --git a/inference-engine/src/extension/ext_pad.cpp b/inference-engine/src/extension/ext_pad.cpp
index 102db1303..255e1ada3 100644
--- a/inference-engine/src/extension/ext_pad.cpp
+++ b/inference-engine/src/extension/ext_pad.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -20,7 +20,7 @@ public:
     explicit PadImpl(const CNNLayer* layer) {
         try {
             if (layer->insData.empty() || layer->outData.empty())
-                THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
 
             pads_begin = layer->GetParamAsUInts("pads_begin");
             std::vector<unsigned int> pads_end = layer->GetParamAsUInts("pads_end");
@@ -28,7 +28,7 @@ public:
             src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
             dst_dims = layer->outData[0]->getTensorDesc().getDims();
             if (src_dims.size() != dst_dims.size() || pads_begin.size() != src_dims.size())
-                THROW_IE_EXCEPTION << "Incorrect number of input/output dimensions!";
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!";
 
             std::string pad_mode = layer->GetParamAsString("pad_mode");
             if (pad_mode == "constant") {
diff --git a/inference-engine/src/extension/ext_powerfile.cpp b/inference-engine/src/extension/ext_powerfile.cpp
index f3666b227..ff3fe0f2f 100644
--- a/inference-engine/src/extension/ext_powerfile.cpp
+++ b/inference-engine/src/extension/ext_powerfile.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/ext_priorbox.cpp b/inference-engine/src/extension/ext_priorbox.cpp
index 8b948efb7..d1cb1955c 100644
--- a/inference-engine/src/extension/ext_priorbox.cpp
+++ b/inference-engine/src/extension/ext_priorbox.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -8,6 +8,7 @@
 #include <vector>
 #include <string>
 #include <cmath>
+#include <limits>
 
 namespace InferenceEngine {
 namespace Extensions {
@@ -28,9 +29,9 @@ public:
             _step = layer->GetParamAsFloat("step", 0);
             _min_sizes = layer->GetParamAsFloats("min_size", {});
             _max_sizes = layer->GetParamAsFloats("max_size", {});
-            _flip = static_cast<bool>(layer->GetParamAsInt("flip"));
-            _clip = static_cast<bool>(layer->GetParamAsInt("clip"));
-            _scale_all_sizes = static_cast<bool>(layer->GetParamAsInt("scale_all_sizes", 1));
+            _flip = layer->GetParamAsBool("flip", false);
+            _clip = layer->GetParamAsBool("clip", false);
+            _scale_all_sizes = layer->GetParamAsBool("scale_all_sizes", true);
 
             bool exist;
 
@@ -41,6 +42,10 @@ public:
             for (float aspect_ratio : aspect_ratios) {
                 exist = false;
 
+                if (std::fabs(aspect_ratio) < std::numeric_limits<float>::epsilon()) {
+                    THROW_IE_EXCEPTION << "aspect_ratio param can't be equal to zero";
+                }
+
                 for (float _aspect_ratio : _aspect_ratios) {
                     if (fabs(aspect_ratio - _aspect_ratio) < 1e-6) {
                         exist = true;
@@ -91,6 +96,10 @@ public:
         }
     }
 
+    StatusCode init(LayerConfig& config, ResponseDesc *resp) noexcept override {
+        return OK;
+    }
+
     StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
                        ResponseDesc *resp) noexcept override {
         if (inputs.size() != 2 || outputs.empty()) {
diff --git a/inference-engine/src/extension/ext_priorbox_clustered.cpp b/inference-engine/src/extension/ext_priorbox_clustered.cpp
index 69807a936..40fd27384 100644
--- a/inference-engine/src/extension/ext_priorbox_clustered.cpp
+++ b/inference-engine/src/extension/ext_priorbox_clustered.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -39,12 +39,16 @@ public:
         }
     }
 
+    StatusCode init(LayerConfig& config, ResponseDesc *resp) noexcept override {
+        return OK;
+    }
+
     StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
                        ResponseDesc *resp) noexcept override {
         int num_priors_ = widths_.size();
 
         if (variance_.empty())
-            variance_.push_back(0.1);
+            variance_.push_back(0.1f);
 
         // Execute
         const int layer_width = inputs[0]->getTensorDesc().getDims()[3];
@@ -73,10 +77,10 @@ public:
                     float box_width = widths_[s];
                     float box_height = heights_[s];
 
-                    float xmin = (center_x - box_width / 2.) / img_width;
-                    float ymin = (center_y - box_height / 2.) / img_height;
-                    float xmax = (center_x + box_width / 2.) / img_width;
-                    float ymax = (center_y + box_height / 2.) / img_height;
+                    float xmin = (center_x - box_width / 2.0f) / img_width;
+                    float ymin = (center_y - box_height / 2.0f) / img_height;
+                    float xmax = (center_x + box_width / 2.0f) / img_width;
+                    float ymax = (center_y + box_height / 2.0f) / img_height;
 
                     if (clip_) {
                         xmin = std::min(std::max(xmin, 0.0f), 1.0f);
diff --git a/inference-engine/src/extension/ext_priorgridgenerator_onnx.cpp b/inference-engine/src/extension/ext_priorgridgenerator_onnx.cpp
new file mode 100644
index 000000000..a8e668b56
--- /dev/null
+++ b/inference-engine/src/extension/ext_priorgridgenerator_onnx.cpp
@@ -0,0 +1,97 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+#include <algorithm>
+#include <cassert>
+#include <vector>
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+const int INPUT_PRIORS {0};
+const int INPUT_FEATUREMAP {1};
+const int INPUT_IMAGE {2};
+
+const int OUTPUT_ROIS {0};
+
+class ExperimentalDetectronPriorGridGeneratorImpl: public ExtLayerBase {
+private:
+    // Inputs:
+    //      priors, shape [n, 4]
+    //      [feature_map], shape [b, c, h, w]
+    //      [im_data], shape [b, 3, im_h, im_w]
+    // Outputs:
+    //      priors_grid, shape [m, 4]
+
+public:
+    explicit ExperimentalDetectronPriorGridGeneratorImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.size() > 3 || layer->outData.empty())
+                THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
+
+            if (layer->insData[INPUT_PRIORS].lock()->dims.size() != 2 ||
+                    (layer->insData.size() > INPUT_FEATUREMAP &&
+                     layer->insData[INPUT_FEATUREMAP].lock()->dims.size() != 4) ||
+                    (layer->insData.size() > INPUT_IMAGE &&
+                     layer->insData[INPUT_IMAGE].lock()->dims.size() != 4))
+                THROW_IE_EXCEPTION << "Unsupported shape of input blobs!";
+
+            grid_w_ = layer->GetParamAsInt("w", 0);
+            grid_h_ = layer->GetParamAsInt("h", 0);
+            stride_h_ = layer->GetParamAsFloat("stride_y", 0);
+            stride_w_ = layer->GetParamAsFloat("stride_x", 0);
+
+            addConfig(layer,
+                      {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)},
+                      {DataConfigurator(ConfLayout::PLN)});
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
+                       ResponseDesc *resp) noexcept override {
+        const int num_priors_ = inputs[INPUT_PRIORS]->getTensorDesc().getDims()[0];
+        assert(inputs[INPUT_PRIORS]->getTensorDesc().getDims()[1] == 4);
+
+        // Execute
+        const int layer_width = grid_w_ ? grid_w_ : inputs[INPUT_FEATUREMAP]->getTensorDesc().getDims()[3];
+        const int layer_height = grid_h_ ? grid_h_ : inputs[INPUT_FEATUREMAP]->getTensorDesc().getDims()[2];
+        const float step_w = stride_w_ ? stride_w_ : static_cast<float>(inputs[INPUT_IMAGE]->getTensorDesc().getDims()[3]) / layer_width;
+        const float step_h = stride_h_ ? stride_h_ : static_cast<float>(inputs[INPUT_IMAGE]->getTensorDesc().getDims()[2]) / layer_height;
+
+        const auto *bottom_data_0 = inputs[0]->buffer().as<const float *>();
+        auto *top_data_0 = outputs[OUTPUT_ROIS]->buffer().as<float *>();
+
+        for (int h = 0; h < layer_height; ++h) {
+            for (int w = 0; w < layer_width; ++w) {
+                for (int s = 0; s < num_priors_; ++s) {
+                    top_data_0[0] = bottom_data_0[4 * s + 0] + step_w * (w + 0.5f);
+                    top_data_0[1] = bottom_data_0[4 * s + 1] + step_h * (h + 0.5f);
+                    top_data_0[2] = bottom_data_0[4 * s + 2] + step_w * (w + 0.5f);
+                    top_data_0[3] = bottom_data_0[4 * s + 3] + step_h * (h + 0.5f);
+                    top_data_0 += 4;
+                }
+            }
+        }
+
+        return OK;
+    }
+
+private:
+    int grid_w_;
+    int grid_h_;
+    float stride_w_;
+    float stride_h_;
+};
+
+
+REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronPriorGridGeneratorImpl>, ExperimentalDetectronPriorGridGenerator);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_proposal.cpp b/inference-engine/src/extension/ext_proposal.cpp
index 2f93b05c9..e431d49c4 100644
--- a/inference-engine/src/extension/ext_proposal.cpp
+++ b/inference-engine/src/extension/ext_proposal.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -74,7 +74,7 @@ void enumerate_proposals_cpu(const float* bottom4d, const float* d_anchor4d, con
                              const int bottom_W, const float img_H, const float img_W,
                              const float min_box_H, const float min_box_W, const int feat_stride,
                              const float box_coordinate_scale, const float box_size_scale,
-                             float coordinates_offset, bool initial_clip, bool swap_xy) {
+                             float coordinates_offset, bool initial_clip, bool swap_xy, bool clip_before_nms) {
     const int bottom_area = bottom_H * bottom_W;
 
     const float* p_anchors_wm = anchors + 0 * num_anchors;
@@ -83,8 +83,8 @@ void enumerate_proposals_cpu(const float* bottom4d, const float* d_anchor4d, con
     const float* p_anchors_hp = anchors + 3 * num_anchors;
 
     parallel_for2d(bottom_H, bottom_W, [&](size_t h, size_t w) {
-            const float x = (swap_xy ? h : w) * feat_stride;
-            const float y = (swap_xy ? w : h) * feat_stride;
+            const float x = static_cast<float>((swap_xy ? h : w) * feat_stride);
+            const float y = static_cast<float>((swap_xy ? w : h) * feat_stride);
 
             const float* p_box   = d_anchor4d + h * bottom_W + w;
             const float* p_score = bottom4d   + h * bottom_W + w;
@@ -135,10 +135,12 @@ void enumerate_proposals_cpu(const float* bottom4d, const float* d_anchor4d, con
                 y1 = pred_ctr_y + 0.5f * pred_h;
 
                 // adjust new corner locations to be within the image region,
-                x0 = std::max<float>(0.0f, std::min<float>(x0, img_W - coordinates_offset));
-                y0 = std::max<float>(0.0f, std::min<float>(y0, img_H - coordinates_offset));
-                x1 = std::max<float>(0.0f, std::min<float>(x1, img_W - coordinates_offset));
-                y1 = std::max<float>(0.0f, std::min<float>(y1, img_H - coordinates_offset));
+                if (clip_before_nms) {
+                    x0 = std::max<float>(0.0f, std::min<float>(x0, img_W - coordinates_offset));
+                    y0 = std::max<float>(0.0f, std::min<float>(y0, img_H - coordinates_offset));
+                    x1 = std::max<float>(0.0f, std::min<float>(x1, img_W - coordinates_offset));
+                    y1 = std::max<float>(0.0f, std::min<float>(y1, img_H - coordinates_offset));
+                }
 
                 // recompute new width & height
                 const float box_w = x1 - x0 + coordinates_offset;
@@ -290,7 +292,8 @@ static
 void retrieve_rois_cpu(const int num_rois, const int item_index,
                               const int num_proposals,
                               const float* proposals, const int roi_indices[],
-                              float* rois, int post_nms_topn_) {
+                              float* rois, int post_nms_topn_,
+                              bool normalize, float img_h, float img_w, bool clip_after_nms) {
     const float *src_x0 = proposals + 0 * num_proposals;
     const float *src_y0 = proposals + 1 * num_proposals;
     const float *src_x1 = proposals + 2 * num_proposals;
@@ -299,12 +302,26 @@ void retrieve_rois_cpu(const int num_rois, const int item_index,
     parallel_for(num_rois, [&](size_t roi) {
         int index = roi_indices[roi];
 
-        const float x0 = src_x0[index];
-        const float y0 = src_y0[index];
-        const float x1 = src_x1[index];
-        const float y1 = src_y1[index];
+        float x0 = src_x0[index];
+        float y0 = src_y0[index];
+        float x1 = src_x1[index];
+        float y1 = src_y1[index];
+
+        if (clip_after_nms) {
+            x0 = std::max<float>(0.0f, std::min<float>(x0, img_w));
+            y0 = std::max<float>(0.0f, std::min<float>(y0, img_h));
+            x1 = std::max<float>(0.0f, std::min<float>(x1, img_w));
+            y1 = std::max<float>(0.0f, std::min<float>(y1, img_h));
+        }
+
+        if (normalize) {
+            x0 /= img_w;
+            y0 /= img_h;
+            x1 /= img_w;
+            y1 /= img_h;
+        }
 
-        rois[roi * 5 + 0] = item_index;
+        rois[roi * 5 + 0] = static_cast<float>(item_index);
         rois[roi * 5 + 1] = x0;
         rois[roi * 5 + 2] = y0;
         rois[roi * 5 + 3] = x1;
@@ -341,6 +358,9 @@ public:
             box_size_scale_ = layer->GetParamAsFloat("box_size_scale", 1.0);
             scales = layer->GetParamAsFloats("scale", {});
             ratios = layer->GetParamAsFloats("ratio", {});
+            normalize_ = layer->GetParamsAsBool("normalize", false);
+            clip_before_nms = layer->GetParamsAsBool("clip_before_nms", true);
+            clip_after_nms = layer->GetParamsAsBool("clip_after_nms", false);
 
             anchors_shape_0 = ratios.size() * scales.size();
             anchors_.resize(anchors_shape_0 * 4);
@@ -386,10 +406,7 @@ public:
         const float* p_img_info_cpu = inputs[2]->buffer();
         float* p_roi_item = outputs[0]->buffer();
 
-        size_t img_info_size = 1;
-        for (size_t i = 0; i < inputs[2]->getTensorDesc().getDims().size(); i++) {
-            img_info_size *= inputs[2]->getTensorDesc().getDims()[i];
-        }
+        size_t img_info_size = inputs[2]->getTensorDesc().getDims()[1];
 
         // No second output so ignoring this
         // Dtype* p_score_item = (top.size() > 1) ? top[1]->mutable_cpu_data() : NULL;
@@ -437,12 +454,12 @@ public:
         // Execute
         int nn = inputs[0]->getTensorDesc().getDims()[0];
         for (int n = 0; n < nn; ++n) {
-            enumerate_proposals_cpu(p_bottom_item + num_proposals, p_d_anchor_item,
+            enumerate_proposals_cpu(p_bottom_item + num_proposals + n*num_proposals*2, p_d_anchor_item + n*num_proposals*4,
                                     &anchors_[0], reinterpret_cast<float *>(&proposals_[0]),
                                     anchors_shape_0, bottom_H, bottom_W, img_H, img_W,
                                     min_box_H, min_box_W, feat_stride_,
                                     box_coordinate_scale_, box_size_scale_,
-                                    coordinates_offset, initial_clip, swap_xy);
+                                    coordinates_offset, initial_clip, swap_xy, clip_before_nms);
             std::partial_sort(proposals_.begin(), proposals_.begin() + pre_nms_topn, proposals_.end(),
                               [](const ProposalBox& struct1, const ProposalBox& struct2) {
                                   return (struct1.score > struct2.score);
@@ -450,7 +467,8 @@ public:
 
             unpack_boxes(reinterpret_cast<float *>(&proposals_[0]), &unpacked_boxes[0], pre_nms_topn);
             nms_cpu(pre_nms_topn, &is_dead[0], &unpacked_boxes[0], &roi_indices_[0], &num_rois, 0, nms_thresh_, post_nms_topn_, coordinates_offset);
-            retrieve_rois_cpu(num_rois, n, pre_nms_topn, &unpacked_boxes[0], &roi_indices_[0], p_roi_item, post_nms_topn_);
+            retrieve_rois_cpu(num_rois, n, pre_nms_topn, &unpacked_boxes[0], &roi_indices_[0], p_roi_item + n*post_nms_topn_*5,
+                              post_nms_topn_, normalize_, img_H, img_W, clip_after_nms);
         }
 
         return OK;
@@ -467,6 +485,7 @@ private:
     float box_size_scale_;
     std::vector<float> scales;
     std::vector<float> ratios;
+    bool normalize_;
 
     size_t anchors_shape_0;
     std::vector<float> anchors_;
@@ -475,9 +494,11 @@ private:
     // Framework specific parameters
     float coordinates_offset;
     bool swap_xy;
-    bool initial_clip;   // clip initial bounding boxes
-    bool round_ratios;   // round ratios during anchors generation stage
-    bool shift_anchors;  // shift anchors by half size of the box
+    bool initial_clip;     // clip initial bounding boxes
+    bool clip_before_nms;  // clip bounding boxes before nms step
+    bool clip_after_nms;   // clip bounding boxes after nms step
+    bool round_ratios;     // round ratios during anchors generation stage
+    bool shift_anchors;    // shift anchors by half size of the box
 };
 
 class ProposalFactory : public ImplFactory<ProposalImpl> {
diff --git a/inference-engine/src/extension/ext_proposal_onnx.cpp b/inference-engine/src/extension/ext_proposal_onnx.cpp
new file mode 100644
index 000000000..43ce9a090
--- /dev/null
+++ b/inference-engine/src/extension/ext_proposal_onnx.cpp
@@ -0,0 +1,442 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cassert>
+#include <cmath>
+#include <string>
+#include <vector>
+#include <utility>
+#include <algorithm>
+#if defined(HAVE_AVX2)
+#include <immintrin.h>
+#endif
+#include "ie_parallel.hpp"
+
+
+namespace {
+struct Indexer {
+  const std::vector<int> dims_;
+  int total_{1};
+
+  explicit Indexer(const std::vector<int>& dims) : dims_(dims) {
+      total_ = 1;
+      for (size_t i = 0; i < dims_.size(); ++i) {
+          total_ *= dims_[i];
+      }
+  }
+
+  const int operator()(const std::vector<int>& idx) const {
+      int flat_idx = 0;
+      assert(idx.size() == dims_.size());
+      for (size_t i = 0; i < dims_.size(); ++i) {
+          assert(0 <= idx[i] && idx[i] < dims_[i]);
+          flat_idx = flat_idx * dims_[i] + idx[i];
+      }
+      assert(flat_idx < total_);
+      return flat_idx;
+  }
+};
+}  // namespace
+
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+static
+void refine_anchors(const float* deltas, const float* scores, const float* anchors,
+                    float* proposals, const int anchors_num, const int bottom_H,
+                    const int bottom_W, const float img_H, const float img_W,
+                    const float min_box_H, const float min_box_W,
+                    const float max_delta_log_wh,
+                    float coordinates_offset) {
+    Indexer delta_idx({anchors_num, 4, bottom_H, bottom_W});
+    Indexer score_idx({anchors_num, 1, bottom_H, bottom_W});
+    Indexer proposal_idx({bottom_H, bottom_W, anchors_num, 5});
+    Indexer anchor_idx({bottom_H, bottom_W, anchors_num, 4});
+
+    parallel_for2d(bottom_H, bottom_W, [&](int h, int w) {
+            for (int anchor = 0; anchor < anchors_num; ++anchor) {
+                float x0 = anchors[anchor_idx({h, w, anchor, 0})];
+                float y0 = anchors[anchor_idx({h, w, anchor, 1})];
+                float x1 = anchors[anchor_idx({h, w, anchor, 2})];
+                float y1 = anchors[anchor_idx({h, w, anchor, 3})];
+
+                const float dx = deltas[delta_idx({anchor, 0, h, w})];
+                const float dy = deltas[delta_idx({anchor, 1, h, w})];
+                const float d_log_w = deltas[delta_idx({anchor, 2, h, w})];
+                const float d_log_h = deltas[delta_idx({anchor, 3, h, w})];
+
+                const float score = scores[score_idx({anchor, 0, h, w})];
+
+                // width & height of box
+                const float ww = x1 - x0 + coordinates_offset;
+                const float hh = y1 - y0 + coordinates_offset;
+                // center location of box
+                const float ctr_x = x0 + 0.5f * ww;
+                const float ctr_y = y0 + 0.5f * hh;
+
+                // new center location according to deltas (dx, dy)
+                const float pred_ctr_x = dx * ww + ctr_x;
+                const float pred_ctr_y = dy * hh + ctr_y;
+                // new width & height according to deltas d(log w), d(log h)
+                const float pred_w = std::exp(std::min(d_log_w, max_delta_log_wh)) * ww;
+                const float pred_h = std::exp(std::min(d_log_h, max_delta_log_wh)) * hh;
+
+                // update upper-left corner location
+                x0 = pred_ctr_x - 0.5f * pred_w;
+                y0 = pred_ctr_y - 0.5f * pred_h;
+                // update lower-right corner location
+                x1 = pred_ctr_x + 0.5f * pred_w - coordinates_offset;
+                y1 = pred_ctr_y + 0.5f * pred_h - coordinates_offset;
+
+                // adjust new corner locations to be within the image region,
+                x0 = std::max<float>(0.0f, std::min<float>(x0, img_W - coordinates_offset));
+                y0 = std::max<float>(0.0f, std::min<float>(y0, img_H - coordinates_offset));
+                x1 = std::max<float>(0.0f, std::min<float>(x1, img_W - coordinates_offset));
+                y1 = std::max<float>(0.0f, std::min<float>(y1, img_H - coordinates_offset));
+
+                // recompute new width & height
+                const float box_w = x1 - x0 + coordinates_offset;
+                const float box_h = y1 - y0 + coordinates_offset;
+
+                proposals[proposal_idx({h, w, anchor, 0})] = x0;
+                proposals[proposal_idx({h, w, anchor, 1})] = y0;
+                proposals[proposal_idx({h, w, anchor, 2})] = x1;
+                proposals[proposal_idx({h, w, anchor, 3})] = y1;
+                proposals[proposal_idx({h, w, anchor, 4})] = (min_box_W <= box_w) * (min_box_H <= box_h) * score;
+            }
+    });
+}
+
+static void unpack_boxes(const float* p_proposals, float* unpacked_boxes, int pre_nms_topn) {
+    parallel_for(pre_nms_topn, [&](size_t i) {
+        unpacked_boxes[0*pre_nms_topn + i] = p_proposals[5*i + 0];
+        unpacked_boxes[1*pre_nms_topn + i] = p_proposals[5*i + 1];
+        unpacked_boxes[2*pre_nms_topn + i] = p_proposals[5*i + 2];
+        unpacked_boxes[3*pre_nms_topn + i] = p_proposals[5*i + 3];
+        unpacked_boxes[4*pre_nms_topn + i] = p_proposals[5*i + 4];
+    });
+}
+
+static
+void nms_cpu(const int num_boxes, int is_dead[],
+             const float* boxes, int index_out[], int* const num_out,
+             const int base_index, const float nms_thresh, const int max_num_out,
+             float coordinates_offset) {
+    const int num_proposals = num_boxes;
+    int count = 0;
+
+    const float* x0 = boxes + 0 * num_proposals;
+    const float* y0 = boxes + 1 * num_proposals;
+    const float* x1 = boxes + 2 * num_proposals;
+    const float* y1 = boxes + 3 * num_proposals;
+
+    memset(is_dead, 0, num_boxes * sizeof(int));
+
+#if defined(HAVE_AVX2)
+    __m256  vc_fone = _mm256_set1_ps(coordinates_offset);
+    __m256i vc_ione = _mm256_set1_epi32(1);
+    __m256  vc_zero = _mm256_set1_ps(0.0f);
+
+    __m256 vc_nms_thresh = _mm256_set1_ps(nms_thresh);
+#endif
+
+    for (int box = 0; box < num_boxes; ++box) {
+        if (is_dead[box])
+            continue;
+
+        index_out[count++] = base_index + box;
+        if (count == max_num_out)
+            break;
+
+        int tail = box + 1;
+
+#if defined(HAVE_AVX2)
+        __m256 vx0i = _mm256_set1_ps(x0[box]);
+        __m256 vy0i = _mm256_set1_ps(y0[box]);
+        __m256 vx1i = _mm256_set1_ps(x1[box]);
+        __m256 vy1i = _mm256_set1_ps(y1[box]);
+
+        __m256 vA_width  = _mm256_sub_ps(vx1i, vx0i);
+        __m256 vA_height = _mm256_sub_ps(vy1i, vy0i);
+        __m256 vA_area   = _mm256_mul_ps(_mm256_add_ps(vA_width, vc_fone), _mm256_add_ps(vA_height, vc_fone));
+
+        for (; tail <= num_boxes - 8; tail += 8) {
+            __m256i *pdst = reinterpret_cast<__m256i*>(is_dead + tail);
+            __m256i  vdst = _mm256_loadu_si256(pdst);
+
+            __m256 vx0j = _mm256_loadu_ps(x0 + tail);
+            __m256 vy0j = _mm256_loadu_ps(y0 + tail);
+            __m256 vx1j = _mm256_loadu_ps(x1 + tail);
+            __m256 vy1j = _mm256_loadu_ps(y1 + tail);
+
+            __m256 vx0 = _mm256_max_ps(vx0i, vx0j);
+            __m256 vy0 = _mm256_max_ps(vy0i, vy0j);
+            __m256 vx1 = _mm256_min_ps(vx1i, vx1j);
+            __m256 vy1 = _mm256_min_ps(vy1i, vy1j);
+
+            __m256 vwidth  = _mm256_add_ps(_mm256_sub_ps(vx1, vx0), vc_fone);
+            __m256 vheight = _mm256_add_ps(_mm256_sub_ps(vy1, vy0), vc_fone);
+            __m256 varea = _mm256_mul_ps(_mm256_max_ps(vc_zero, vwidth), _mm256_max_ps(vc_zero, vheight));
+
+            __m256 vB_width  = _mm256_sub_ps(vx1j, vx0j);
+            __m256 vB_height = _mm256_sub_ps(vy1j, vy0j);
+            __m256 vB_area   = _mm256_mul_ps(_mm256_add_ps(vB_width, vc_fone), _mm256_add_ps(vB_height, vc_fone));
+
+            __m256 vdivisor = _mm256_sub_ps(_mm256_add_ps(vA_area, vB_area), varea);
+            __m256 vintersection_area = _mm256_div_ps(varea, vdivisor);
+
+            __m256 vcmp_0 = _mm256_cmp_ps(vx0i, vx1j, _CMP_LE_OS);
+            __m256 vcmp_1 = _mm256_cmp_ps(vy0i, vy1j, _CMP_LE_OS);
+            __m256 vcmp_2 = _mm256_cmp_ps(vx0j, vx1i, _CMP_LE_OS);
+            __m256 vcmp_3 = _mm256_cmp_ps(vy0j, vy1i, _CMP_LE_OS);
+            __m256 vcmp_4 = _mm256_cmp_ps(vc_nms_thresh, vintersection_area, _CMP_LT_OS);
+
+            vcmp_0 = _mm256_and_ps(vcmp_0, vcmp_1);
+            vcmp_2 = _mm256_and_ps(vcmp_2, vcmp_3);
+            vcmp_4 = _mm256_and_ps(vcmp_4, vcmp_0);
+            vcmp_4 = _mm256_and_ps(vcmp_4, vcmp_2);
+
+            _mm256_storeu_si256(pdst, _mm256_blendv_epi8(vdst, vc_ione, _mm256_castps_si256(vcmp_4)));
+        }
+#endif
+
+        for (; tail < num_boxes; ++tail) {
+            float res = 0.0f;
+
+            const float x0i = x0[box];
+            const float y0i = y0[box];
+            const float x1i = x1[box];
+            const float y1i = y1[box];
+
+            const float x0j = x0[tail];
+            const float y0j = y0[tail];
+            const float x1j = x1[tail];
+            const float y1j = y1[tail];
+
+            if (x0i <= x1j && y0i <= y1j && x0j <= x1i && y0j <= y1i) {
+                // overlapped region (= box)
+                const float x0 = std::max<float>(x0i, x0j);
+                const float y0 = std::max<float>(y0i, y0j);
+                const float x1 = std::min<float>(x1i, x1j);
+                const float y1 = std::min<float>(y1i, y1j);
+
+                // intersection area
+                const float width  = std::max<float>(0.0f,  x1 - x0 + coordinates_offset);
+                const float height = std::max<float>(0.0f,  y1 - y0 + coordinates_offset);
+                const float area   = width * height;
+
+                // area of A, B
+                const float A_area = (x1i - x0i + coordinates_offset) * (y1i - y0i + coordinates_offset);
+                const float B_area = (x1j - x0j + coordinates_offset) * (y1j - y0j + coordinates_offset);
+
+                // IoU
+                res = area / (A_area + B_area - area);
+            }
+
+            if (nms_thresh < res)
+                is_dead[tail] = 1;
+        }
+    }
+
+    *num_out = count;
+}
+
+
+static
+void fill_output_blobs(const float* proposals, const int* roi_indices,
+                       float* rois, float* scores,
+                       const int num_proposals, const int num_rois, const int post_nms_topn) {
+    const float *src_x0 = proposals + 0 * num_proposals;
+    const float *src_y0 = proposals + 1 * num_proposals;
+    const float *src_x1 = proposals + 2 * num_proposals;
+    const float *src_y1 = proposals + 3 * num_proposals;
+    const float *src_score = proposals + 4 * num_proposals;
+
+    parallel_for(num_rois, [&](size_t i) {
+        int index = roi_indices[i];
+        rois[i * 4 + 0] = src_x0[index];
+        rois[i * 4 + 1] = src_y0[index];
+        rois[i * 4 + 2] = src_x1[index];
+        rois[i * 4 + 3] = src_y1[index];
+        scores[i] = src_score[index];
+    });
+
+    if (num_rois < post_nms_topn) {
+        for (int i = 4 * num_rois; i < 4 * post_nms_topn; i++) {
+            rois[i] = 0.f;
+        }
+        for (int i = num_rois; i < post_nms_topn; i++) {
+            scores[i] = 0.f;
+        }
+    }
+}
+
+
+class ONNXCustomProposalImpl : public ExtLayerBase {
+private:
+    const int INPUT_IM_INFO {0};
+    const int INPUT_ANCHORS {1};
+    const int INPUT_DELTAS {2};
+    const int INPUT_SCORES {3};
+    const int OUTPUT_ROIS {0};
+    const int OUTPUT_SCORES {1};
+
+public:
+    explicit ONNXCustomProposalImpl(const CNNLayer *layer) {
+        try {
+            if (layer->insData.size() != 4 || layer->outData.size() != 2)
+                THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
+
+            min_size_ = layer->GetParamAsFloat("min_size");
+            nms_thresh_ = layer->GetParamAsFloat("nms_threshold");
+            pre_nms_topn_ = layer->GetParamAsInt("pre_nms_count");
+            post_nms_topn_ = layer->GetParamAsInt("post_nms_count");
+
+            coordinates_offset = 0.0f;
+
+            roi_indices_.resize(post_nms_topn_);
+            addConfig(layer,
+                      {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN),
+                       DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)},
+                      {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)});
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    void print_shape(const Blob::Ptr& b) {
+        for (size_t i = 0; i < b->getTensorDesc().getDims().size(); ++i) {
+            std::cout << b->getTensorDesc().getDims()[i] << ", ";
+        }
+        std::cout << std::endl;
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr> &inputs, std::vector<Blob::Ptr> &outputs,
+                       ResponseDesc *resp) noexcept override {
+        if (inputs.size() != 4 || outputs.size() != 2) {
+            if (resp) {
+                std::string errorMsg = "Incorrect number of input or output edges!";
+                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+            }
+            return GENERAL_ERROR;
+        }
+
+        // Prepare memory
+        const float* p_deltas_item = inputs[INPUT_DELTAS]->buffer();
+        const float* p_scores_item = inputs[INPUT_SCORES]->buffer();
+        const float* p_anchors_item = inputs[INPUT_ANCHORS]->buffer();
+        const float* p_img_info_cpu = inputs[INPUT_IM_INFO]->buffer();
+
+        float* p_roi_item = outputs[OUTPUT_ROIS]->buffer();
+        float* p_roi_score_item = outputs[OUTPUT_SCORES]->buffer();
+
+
+        size_t img_info_size = 1;
+        for (size_t i = 0; i < inputs[INPUT_IM_INFO]->getTensorDesc().getDims().size(); i++) {
+            img_info_size *= inputs[INPUT_IM_INFO]->getTensorDesc().getDims()[i];
+        }
+
+        const int anchors_num = inputs[INPUT_SCORES]->getTensorDesc().getDims()[0];
+
+        // bottom shape: (num_anchors) x H x W
+        const int bottom_H = inputs[INPUT_DELTAS]->getTensorDesc().getDims()[1];
+        const int bottom_W = inputs[INPUT_DELTAS]->getTensorDesc().getDims()[2];
+
+        // input image height & width
+        const float img_H = p_img_info_cpu[0];
+        const float img_W = p_img_info_cpu[1];
+
+        // scale factor for height & width
+
+        // minimum box width & height
+        const float min_box_H = min_size_;
+        const float min_box_W = min_size_;
+
+        // number of all proposals = num_anchors * H * W
+        const int num_proposals = anchors_num * bottom_H * bottom_W;
+
+        // number of top-n proposals before NMS
+        const int pre_nms_topn = std::min<int>(num_proposals, pre_nms_topn_);
+
+        // number of final RoIs
+        int num_rois = 0;
+
+        // enumerate all proposals
+        //   num_proposals = num_anchors * H * W
+        //   (x1, y1, x2, y2, score) for each proposal
+        // NOTE: for bottom, only foreground scores are passed
+        struct ProposalBox {
+            float x0;
+            float y0;
+            float x1;
+            float y1;
+            float score;
+        };
+        std::vector<ProposalBox> proposals_(num_proposals);
+        std::vector<float> unpacked_boxes(5 * pre_nms_topn);
+        std::vector<int> is_dead(pre_nms_topn);
+
+        // Execute
+        int batch_size = 1;  // inputs[INPUT_DELTAS]->getTensorDesc().getDims()[0];
+        for (int n = 0; n < batch_size; ++n) {
+            refine_anchors(p_deltas_item, p_scores_item, p_anchors_item,
+                           reinterpret_cast<float *>(&proposals_[0]), anchors_num, bottom_H,
+                           bottom_W, img_H, img_W,
+                           min_box_H, min_box_W,
+                           static_cast<const float>(log(1000. / 16.)),
+                           1.0f);
+            std::partial_sort(proposals_.begin(), proposals_.begin() + pre_nms_topn, proposals_.end(),
+                              [](const ProposalBox& struct1, const ProposalBox& struct2) {
+                                  return (struct1.score > struct2.score);
+                              });
+
+            unpack_boxes(reinterpret_cast<float *>(&proposals_[0]), &unpacked_boxes[0], pre_nms_topn);
+            nms_cpu(pre_nms_topn, &is_dead[0], &unpacked_boxes[0], &roi_indices_[0], &num_rois, 0,
+                    nms_thresh_, post_nms_topn_, coordinates_offset);
+            fill_output_blobs(&unpacked_boxes[0], &roi_indices_[0], p_roi_item, p_roi_score_item,
+                              pre_nms_topn, num_rois, post_nms_topn_);
+        }
+
+        return OK;
+    }
+
+private:
+    float min_size_;
+    int pre_nms_topn_;
+    int post_nms_topn_;
+    float nms_thresh_;
+    float coordinates_offset;
+
+    std::vector<int> roi_indices_;
+};
+
+class ONNXCustomProposalFactory : public ImplFactory<ONNXCustomProposalImpl> {
+public:
+    explicit ONNXCustomProposalFactory(const CNNLayer *layer): ImplFactory(layer) {}
+    // set output shapes by input shapes.
+    StatusCode getShapes(const std::vector<TensorDesc>& inShapes, std::vector<TensorDesc>& outShapes,
+                         ResponseDesc *resp) noexcept override {
+        if (inShapes.size() != 1) {
+            if (resp) {
+                std::string errorMsg = "Incorrect input shapes!";
+                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+            }
+            return GENERAL_ERROR;
+        }
+        outShapes.clear();
+        outShapes.emplace_back(cnnLayer.precision, inShapes[0].getDims(), inShapes[0].getLayout());
+        return OK;
+    }
+};
+
+REG_FACTORY_FOR(ONNXCustomProposalFactory, ExperimentalDetectronGenerateProposalsSingleImage);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_psroi.cpp b/inference-engine/src/extension/ext_psroi.cpp
index 355a3e60c..71bd3f654 100644
--- a/inference-engine/src/extension/ext_psroi.cpp
+++ b/inference-engine/src/extension/ext_psroi.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -26,6 +26,9 @@ public:
             spatial_scale_ = layer->GetParamAsFloat("spatial_scale");
             pooled_height_ = group_size_;
             pooled_width_ = group_size_;
+            spatial_bins_x_ = static_cast<size_t>(layer->GetParamAsInt("spatial_bins_x", 1));
+            spatial_bins_y_ = static_cast<size_t>(layer->GetParamAsInt("spatial_bins_y", 1));
+            mode_ = layer->GetParamAsString("mode", "average");
 
             SizeVector inDims = layer->insData[0].lock()->getTensorDesc().getDims();
             channels = static_cast<int>(inDims[1]);
@@ -59,51 +62,116 @@ public:
             }
         }
 
+        size_t num_bins = spatial_bins_x_*spatial_bins_y_;
+
         parallel_for(real_rois, [&](int n) {
             const float* bottom_rois = bottom_rois_beginning + n * 5;
             int roi_batch_ind = static_cast<int>(bottom_rois[0]);
-            float roi_start_w = static_cast<float>(round(bottom_rois[1])) * spatial_scale_;
-            float roi_start_h = static_cast<float>(round(bottom_rois[2])) * spatial_scale_;
-            float roi_end_w   = static_cast<float>(round(bottom_rois[3]) + 1.0f) * spatial_scale_;
-            float roi_end_h   = static_cast<float>(round(bottom_rois[4]) + 1.0f) * spatial_scale_;
-
-            // Force too small ROIs to be 1x1
-            float roi_width  = std::max<float>(roi_end_w - roi_start_w, 0.1f);  // avoid 0
-            float roi_height = std::max<float>(roi_end_h - roi_start_h, 0.1f);
-
-            float bin_size_h = roi_height / static_cast<float>(pooled_height_);
-            float bin_size_w = roi_width  / static_cast<float>(pooled_width_);
+            float roi_start_w = 0.0f;
+            float roi_start_h = 0.0f;
+            float roi_end_w   = 0.0f;
+            float roi_end_h   = 0.0f;
+            float roi_width   = 0.0f;
+            float roi_height  = 0.0f;
+
+            if (mode_ == "bilinear") {
+                roi_start_w = bottom_rois[1] * spatial_scale_;
+                roi_start_h = bottom_rois[2] * spatial_scale_;
+                roi_end_w = bottom_rois[3] * spatial_scale_;
+                roi_end_h = bottom_rois[4] * spatial_scale_;
+                roi_width  = roi_end_w - roi_start_w;
+                roi_height = roi_end_h - roi_start_h;
+            } else if (mode_ == "average") {
+                roi_start_w = static_cast<float>(round(bottom_rois[1])) * spatial_scale_;
+                roi_start_h = static_cast<float>(round(bottom_rois[2])) * spatial_scale_;
+                roi_end_w   = static_cast<float>(round(bottom_rois[3]) + 1.0f) * spatial_scale_;
+                roi_end_h   = static_cast<float>(round(bottom_rois[4]) + 1.0f) * spatial_scale_;
+                // Force too small ROIs to be 1x1
+                roi_width  = std::max<float>(roi_end_w - roi_start_w, 0.1f);  // avoid 0
+                roi_height = std::max<float>(roi_end_h - roi_start_h, 0.1f);
+            }
 
             for (int c = 0; c < nc; c++) {
                 for (int h = 0; h < nh; h++) {
-                    int hstart = floor(static_cast<float>(h + 0) * bin_size_h + roi_start_h);
-                    int hend = ceil(static_cast<float>(h + 1) * bin_size_h + roi_start_h);
-
-                    hstart = std::min<int>(std::max<int>(hstart, 0), height);
-                    hend = std::min<int>(std::max<int>(hend, 0), height);
-
                     for (int w = 0; w < nw; w++) {
-                        int index = n * nc * nh * nw + c * nh * nw + h * nw + w;
+                        size_t index = n*nc*nh*nw + c*nh*nw + h*nw + w;
                         dst_data[index] = 0.0f;
 
-                        int wstart = floor(static_cast<float>(w + 0) * bin_size_w + roi_start_w);
-                        int wend = ceil(static_cast<float>(w + 1) * bin_size_w + roi_start_w);
-
-                        wstart = std::min<int>(std::max<int>(wstart, 0), width);
-                        wend = std::min<int>(std::max<int>(wend, 0), width);
-
-                        float bin_area = (hend - hstart) * (wend - wstart);
-                        if (bin_area) {
-                            int gc = (c * group_size_ + h) * group_size_ + w;
-                            const float *bottom_data =
-                                    bottom_data_beginning + ((roi_batch_ind * channels + gc) * height * width);
-
-                            float out_sum = 0.0f;
-                            for (int hh = hstart; hh < hend; ++hh)
-                                for (int ww = wstart; ww < wend; ++ww)
-                                    out_sum += bottom_data[hh * width + ww];
-
-                            dst_data[index] = out_sum / bin_area;
+                        if (mode_ == "average") {
+                            float bin_size_h = roi_height / static_cast<float>(pooled_height_);
+                            float bin_size_w = roi_width  / static_cast<float>(pooled_width_);
+
+                            int hstart = static_cast<int>(floor(static_cast<float>(h + 0) * bin_size_h + roi_start_h));
+                            int hend = static_cast<int>(ceil(static_cast<float>(h + 1) * bin_size_h + roi_start_h));
+
+                            hstart = std::min<int>(std::max<int>(hstart, 0), height);
+                            hend = std::min<int>(std::max<int>(hend, 0), height);
+                            int wstart = static_cast<int>(floor(static_cast<float>(w + 0) * bin_size_w + roi_start_w));
+                            int wend = static_cast<int>(ceil(static_cast<float>(w + 1) * bin_size_w + roi_start_w));
+
+                            wstart = std::min<int>(std::max<int>(wstart, 0), width);
+                            wend = std::min<int>(std::max<int>(wend, 0), width);
+
+                            float bin_area = static_cast<float>((hend - hstart) * (wend - wstart));
+                            if (bin_area) {
+                                int gc = (c * group_size_ + h) * group_size_ + w;
+                                const float *bottom_data =
+                                        bottom_data_beginning + ((roi_batch_ind * channels + gc) * height * width);
+
+                                float out_sum = 0.0f;
+                                for (int hh = hstart; hh < hend; ++hh)
+                                    for (int ww = wstart; ww < wend; ++ww)
+                                        out_sum += bottom_data[hh * width + ww];
+
+                                dst_data[index] = out_sum / bin_area;
+                            }
+                        } else if (mode_ == "bilinear") {
+                            for (size_t bin_y = 0; bin_y < spatial_bins_y_; bin_y++) {
+                                for (size_t bin_x = 0; bin_x < spatial_bins_x_; bin_x++) {
+                                    float box_xmin = roi_start_w + (bin_x + 0) * (roi_width / spatial_bins_x_);
+                                    float box_xmax = roi_start_w + (bin_x + 1) * (roi_width / spatial_bins_x_);
+                                    float box_ymin = roi_start_h + (bin_y + 0) * (roi_height / spatial_bins_y_);
+                                    float box_ymax = roi_start_h + (bin_y + 1) * (roi_height / spatial_bins_y_);
+
+                                    size_t gc = c + (bin_y*spatial_bins_x_ + bin_x)*nc;
+                                    size_t src_idx = (roi_batch_ind * channels + gc) * height * width;
+                                    const float *bottom_data = bottom_data_beginning + src_idx;
+
+                                    float height_scale = nh > 1 ? (box_ymax - box_ymin) * (height - 1) / (pooled_height_ - 1)
+                                                                : 0.0f;
+                                    float width_scale = nw > 1 ? (box_xmax - box_xmin) * (width - 1) / (pooled_width_ - 1)
+                                                               : 0.0f;
+
+                                    float in_y = nh > 1 ? (h * height_scale + box_ymin * (height - 1))
+                                                        : 0.5f * (box_ymin + box_ymax) * (height - 1);
+                                    float in_x = nw > 1 ? (w * width_scale + box_xmin * (width - 1))
+                                                        : 0.5f * (box_xmin + box_xmax) * (width - 1);
+
+                                    if (!(in_y < 0 || in_y > height - 1 || in_x < 0 || in_x > width - 1)) {
+                                        int top_y_index = static_cast<int>(floorf(in_y));
+                                        int bottom_y_index = static_cast<int>(ceilf(in_y));
+                                        int left_x_index = static_cast<int>(floorf(in_x));
+                                        int right_x_index = static_cast<int>(ceilf(in_x));
+
+                                        if (right_x_index > width - 1)
+                                            right_x_index = width - 1;
+
+                                        if (bottom_y_index > height - 1)
+                                            bottom_y_index = height - 1;
+
+                                        const float top_left = bottom_data[top_y_index * width + left_x_index];
+                                        const float top_right = bottom_data[top_y_index * width + right_x_index];
+                                        const float bottom_left = bottom_data[bottom_y_index * width + left_x_index];
+                                        const float bottom_right = bottom_data[bottom_y_index * width + right_x_index];
+
+                                        const float top = top_left + (top_right - top_left) * (in_x - left_x_index);
+                                        const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index);
+
+                                        dst_data[index] += top + (bottom - top) * (in_y - top_y_index);
+                                    }
+                                }
+                            }
+                            dst_data[index] /= num_bins;
                         }
                     }
                 }
@@ -126,6 +194,9 @@ private:
     float spatial_scale_ = 0;
     size_t pooled_height_ = 0;
     size_t pooled_width_ = 0;
+    size_t spatial_bins_x_ = 0;
+    size_t spatial_bins_y_ = 0;
+    std::string mode_ = "";
 
     int channels = 0;
     int height = 0;
diff --git a/inference-engine/src/extension/ext_range.cpp b/inference-engine/src/extension/ext_range.cpp
new file mode 100644
index 000000000..d438df8b5
--- /dev/null
+++ b/inference-engine/src/extension/ext_range.cpp
@@ -0,0 +1,132 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class RangeImpl: public ExtLayerBase {
+public:
+    explicit RangeImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.empty() || layer->outData.empty())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+
+            if (layer->insData.size() != 3)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!";
+
+            SizeVector start_dims = layer->insData[RANGE_START].lock()->getTensorDesc().getDims();
+            if (start_dims.size() > 1)
+                THROW_IE_EXCEPTION << layer->name << " Start scalar should have 1 dimension";
+
+            SizeVector limit_dims = layer->insData[RANGE_LIMIT].lock()->getTensorDesc().getDims();
+            if (limit_dims.size() > 1)
+                THROW_IE_EXCEPTION << layer->name << " Limit scalar should have 1 dimension";
+
+            SizeVector delta_dims = layer->insData[RANGE_DELTA].lock()->getTensorDesc().getDims();
+            if (delta_dims.size() > 1)
+                THROW_IE_EXCEPTION << layer->name << " Delta scalar should have 1 dimension";
+
+            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
+            if (dst_dims.size() > 1)
+                THROW_IE_EXCEPTION << layer->name << " Output vector should have 1 dimension";
+
+            if (!(layer->insData[RANGE_START].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
+                  layer->insData[RANGE_LIMIT].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
+                  layer->insData[RANGE_DELTA].lock()->getTensorDesc().getPrecision() == Precision::I32 &&
+                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
+                !(layer->insData[RANGE_START].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
+                  layer->insData[RANGE_LIMIT].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
+                  layer->insData[RANGE_DELTA].lock()->getTensorDesc().getPrecision() == Precision::FP32 &&
+                  layer->outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
+                THROW_IE_EXCEPTION << layer->name <<
+                    " 'Start', 'Limit', 'Delta' input scalars and output tensor should have same precision" <<
+                    "and only FP32 and I32 are supported!";
+            }
+
+            addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
+                             { DataConfigurator(ConfLayout::PLN) });
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        StatusCode retcode = OK;
+        switch (outputs[0]->precision()) {
+        case Precision::FP32: {
+            retcode = range((inputs[RANGE_START]->cbuffer().as<float *>() +
+                             inputs[RANGE_START]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
+                            (inputs[RANGE_LIMIT]->cbuffer().as<float *>() +
+                             inputs[RANGE_LIMIT]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
+                            (inputs[RANGE_DELTA]->cbuffer().as<float *>() +
+                             inputs[RANGE_DELTA]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0], outputs[0]);
+        }
+        break;
+        case Precision::I32: {
+            retcode = range((inputs[RANGE_START]->cbuffer().as<int32_t *>() +
+                             inputs[RANGE_START]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
+                            (inputs[RANGE_LIMIT]->cbuffer().as<int32_t *>() +
+                             inputs[RANGE_LIMIT]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
+                            (inputs[RANGE_DELTA]->cbuffer().as<int32_t *>() +
+                             inputs[RANGE_DELTA]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0], outputs[0]);
+        }
+        break;
+        default:
+            if (resp) {
+                std::string errorMsg = "Incorrect output precision. Only FP32 and I32 are supported!";
+                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+            }
+            retcode = GENERAL_ERROR;
+        }
+        if (resp && retcode == PARAMETER_MISMATCH) {
+            std::string errorMsg = "Range indexes exceeds data tensor dimension";
+            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+        }
+        return retcode;
+    }
+
+private:
+    const size_t RANGE_START = 0;
+    const size_t RANGE_LIMIT = 1;
+    const size_t RANGE_DELTA = 2;
+
+    template <typename data_t>
+    StatusCode range(data_t start, data_t limit, data_t delta, Blob::Ptr output);
+};
+
+template <typename data_t>
+StatusCode RangeImpl::range(data_t start, data_t limit, data_t delta, Blob::Ptr output) {
+    size_t dst_size = (output->getTensorDesc().getDims())[0];
+    data_t* dst_data = output->cbuffer().as<data_t *>() +
+                       output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+    size_t work_amount_dst = static_cast<size_t>(std::floor(std::abs((limit - start) / delta)));
+    if (work_amount_dst != dst_size)
+        return PARAMETER_MISMATCH;
+
+    parallel_nt(0, [&](const int ithr, const int nthr) {
+        size_t iwork = 0, end = 0;
+        splitter(work_amount_dst, nthr, ithr, iwork, end);
+        data_t dst_value = start + iwork * delta;
+
+        for (; iwork < end; ++iwork, dst_value += delta) {
+            dst_data[iwork] = dst_value;
+        }
+    });
+    return OK;
+}
+REG_FACTORY_FOR(ImplFactory<RangeImpl>, Range);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_region_yolo.cpp b/inference-engine/src/extension/ext_region_yolo.cpp
index 1cda662e8..a53869aa9 100644
--- a/inference-engine/src/extension/ext_region_yolo.cpp
+++ b/inference-engine/src/extension/ext_region_yolo.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -23,7 +23,7 @@ public:
             classes = layer->GetParamAsInt("classes");
             coords = layer->GetParamAsInt("coords");
             num = layer->GetParamAsInt("num");
-            do_softmax = static_cast<bool>(layer->GetParamAsInt("do_softmax", 1));
+            do_softmax = layer->GetParamAsBool("do_softmax", true);
             mask = layer->GetParamAsInts("mask", {});
 
             addConfig(layer, {DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
diff --git a/inference-engine/src/extension/ext_reorg_yolo.cpp b/inference-engine/src/extension/ext_reorg_yolo.cpp
index ebeecb7ce..8f0e559d8 100644
--- a/inference-engine/src/extension/ext_reorg_yolo.cpp
+++ b/inference-engine/src/extension/ext_reorg_yolo.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/ext_resample.cpp b/inference-engine/src/extension/ext_resample.cpp
index 531158f0a..5c3492c91 100644
--- a/inference-engine/src/extension/ext_resample.cpp
+++ b/inference-engine/src/extension/ext_resample.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -35,7 +35,7 @@ public:
                 THROW_IE_EXCEPTION << "Resample supports only 4D blobs!";
 
             type = layer->GetParamAsString("type");
-            antialias = static_cast<bool>(layer->GetParamAsInt("antialias"));
+            antialias = layer->GetParamAsBool("antialias", false);
 
 #if defined(HAVE_AVX512F)
             auto blk_layout = ConfLayout::BLK16;
@@ -58,6 +58,7 @@ public:
 #undef IN
 #endif
         Layout layout = inputs[0]->layout();
+        Precision precision = inputs[0]->precision();
 
         size_t IN = inputs[0]->getTensorDesc().getDims()[0];
         size_t IC = inputs[0]->getTensorDesc().getDims()[1];
@@ -68,7 +69,11 @@ public:
         size_t OW = outputs[0]->getTensorDesc().getDims()[3];
 
         if (IW == OW && IH == OH && type == "caffe.ResampleParameter.LINEAR") {
-            simple_copy(dst_data, outputs[0]->byteSize(), src_data, IN * IC * IH * IW * sizeof(float));
+            size_t size = IN * IC * IH * IW;
+            if (inputs[0]->getTensorDesc().getPrecision() == Precision::FP32) {
+                size *= sizeof(float);
+            }
+            simple_copy(dst_data, outputs[0]->byteSize(), src_data, size);
             return OK;
         }
 
@@ -79,14 +84,24 @@ public:
 
         if (type == "caffe.ResampleParameter.NEAREST") {
             if (!isDownsample && fx == 0.25f && fy == 0.25f) {
-                if (layout == NCHW) {
-                    Upsample_Nearest_PLN<4>(src_data, dst_data, IN, IC, IH, IW);
+                if (layout == NCHW || layout == NHWC) {
+                    if (precision == Precision::FP32) {
+                        Upsample_Nearest_PLN<float, 4>(src_data, dst_data, IN, IC, IH, IW, layout);
+                    } else {
+                        Upsample_Nearest_PLN<uint8_t, 4>(reinterpret_cast<const uint8_t*>(src_data),
+                                                         reinterpret_cast<uint8_t*>(dst_data), IN, IC, IH, IW, layout);
+                    }
                 } else {
                     Upsample_Nearest_BLK<4>(src_data, dst_data, IN, IC, IH, IW);
                 }
             } else if (!isDownsample && fx == 0.5f && fy == 0.5f) {
-                if (layout == NCHW) {
-                    Upsample_Nearest_PLN<2>(src_data, dst_data, IN, IC, IH, IW);
+                if (layout == NCHW || layout == NHWC) {
+                    if (precision == Precision::FP32) {
+                        Upsample_Nearest_PLN<float, 2>(src_data, dst_data, IN, IC, IH, IW, layout);
+                    } else {
+                        Upsample_Nearest_PLN<uint8_t, 2>(reinterpret_cast<const uint8_t*>(src_data),
+                                                         reinterpret_cast<uint8_t*>(dst_data), IN, IC, IH, IW, layout);
+                    }
                 } else {
                     Upsample_Nearest_BLK<2>(src_data, dst_data, IN, IC, IH, IW);
                 }
@@ -143,8 +158,8 @@ private:
                         float ax = 1.0f / (antialias ? fx : 1.0f);
                         float ay = 1.0f / (antialias ? fy : 1.0f);
 
-                        int rx = (fx < 1.0f) ? 2 : ceil(static_cast<float>(kernel_width) / ax);
-                        int ry = (fy < 1.0f) ? 2 : ceil(static_cast<float>(kernel_width) / ay);
+                        int rx = (fx < 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / ax));
+                        int ry = (fy < 1.0f) ? 2 : static_cast<int>(ceil(static_cast<float>(kernel_width) / ay));
 
                         for (int y = iy_r - ry; y <= iy_r + ry; y++) {
                             for (int x = ix_r - rx; x <= ix_r + rx; x++) {
@@ -169,13 +184,13 @@ private:
     }
 
     static void NearestNeighborKernel_PLN(const float *in_ptr_, float *out_ptr_, int B, int C, int IH, int IW, float fx, float fy, int OH, int OW) {
-        for (size_t b = 0; b < B; b++) {
-            for (size_t c = 0; c < C; c++) {
+        for (int b = 0; b < B; b++) {
+            for (int c = 0; c < C; c++) {
                 const float *in_ptr = in_ptr_ + IW * IH * C * b + IW * IH * c;
                 float *out_ptr = out_ptr_ + OW * OH * C * b + OW * OH * c;
 
-                for (size_t oy = 0; oy < OH; oy++) {
-                    for (size_t ox = 0; ox < OW; ox++) {
+                for (int oy = 0; oy < OH; oy++) {
+                    for (int ox = 0; ox < OW; ox++) {
                         float ix = ox * fx + fy / 2.0f - 0.5f;
                         float iy = oy * fy + fx / 2.0f - 0.5f;
 
@@ -191,15 +206,15 @@ private:
 
     static void NearestNeighborKernel_BLK(const float *in_ptr_, float *out_ptr_, int B, int C, int IH, int IW, float fx, float fy, int OH, int OW) {
         int blk_size = 8;
-        size_t CB = (size_t)div_up(C, blk_size);
+        int CB = div_up(C, blk_size);
 
-        for (size_t b = 0; b < B; b++) {
-            for (size_t cb = 0; cb < CB; cb++) {
+        for (int b = 0; b < B; b++) {
+            for (int cb = 0; cb < CB; cb++) {
                 const float *in_ptr = in_ptr_ + IW * IH * CB * blk_size * b + IW * IH * cb * blk_size;
                 float *out_ptr = out_ptr_ + OW * OH * CB * blk_size * b + OW * OH * cb * blk_size;
 
-                for (size_t oy = 0; oy < OH; oy++) {
-                    for (size_t ox = 0; ox < OW; ox++) {
+                for (int oy = 0; oy < OH; oy++) {
+                    for (int ox = 0; ox < OW; ox++) {
                         float ix = ox * fx + fy / 2.0f - 0.5f;
                         float iy = oy * fy + fx / 2.0f - 0.5f;
 
@@ -217,30 +232,67 @@ private:
         }
     }
 
-    template <int factor>
-    static void Upsample_Nearest_PLN(const float *in_ptr_, float *out_ptr_, int B, int C, int IH, int IW) {
+    template <typename T, int factor>
+    static void Upsample_Nearest_PLN(const T *in_ptr_, T *out_ptr_, int B, int C, int IH, int IW, Layout layout) {
         int OH = factor * IH;
         int OW = factor * IW;
 
-        for (size_t b = 0; b < B; b++) {
-            for (size_t c = 0; c < C; c++) {
-                const float *in_ptr = in_ptr_ + IW * IH * C * b + IW * IH * c;
-                float *out_ptr = out_ptr_ + OW * OH * C * b + OW * OH * c;
+        if (layout == NCHW) {
+            for (int b = 0; b < B; b++) {
+                for (int c = 0; c < C; c++) {
+                    const T *in_ptr = in_ptr_ + IW * IH * C * b + IW * IH * c;
+                    T *out_ptr = out_ptr_ + OW * OH * C * b + OW * OH * c;
 
-                for (size_t iy = 0; iy < IH; iy++) {
-                    for (size_t ix = 0; ix < IW; ix++) {
-                        size_t oy = factor * iy;
-                        size_t ox = factor * ix;
-                        float value = in_ptr[iy * IW + ix];
+                    for (int iy = 0; iy < IH; iy++) {
+                        for (int ix = 0; ix < IW; ix++) {
+                            int oy = factor * iy;
+                            int ox = factor * ix;
+                            float value = in_ptr[iy * IW + ix];
 
-                        for (int fh = 0; fh < factor; fh++) {
-                            for (int fw = 0; fw < factor; fw++) {
-                                out_ptr[(oy + fh) * OW + ox + fw] = value;
+                            for (int fh = 0; fh < factor; fh++) {
+                                for (int fw = 0; fw < factor; fw++) {
+                                    out_ptr[(oy + fh) * OW + ox + fw] = static_cast<T>(value);
+                                }
                             }
                         }
                     }
                 }
             }
+        } else {
+            int block_size = C;
+            int block_size_bytes = block_size * sizeof(T);
+
+            int ICIWIH = C * IW * IH;
+            int OWOH = OW * OH;
+            int OCOWOH = C * OWOH;
+
+            int stepX = factor;
+            int stepY = factor;
+
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2)
+#endif
+            for (int mb = 0; mb < B; mb++) {
+                for (int oh = 0; oh < OH; oh += stepY) {
+                    size_t dst_off = mb * OCOWOH + (oh * OW) * block_size;
+                    size_t src_off = mb * ICIWIH + (oh / stepY * IW) * block_size;
+
+                    for (int ow = 0; ow < OW; ow += stepX) {
+                        size_t dst_off_curr = dst_off + ow * block_size;
+                        size_t src_off_curr = src_off + ow / stepX * block_size;
+
+                        memcpy(&out_ptr_[dst_off_curr], &in_ptr_[src_off_curr], block_size_bytes);
+
+                        for (int owx = 1; owx < stepX; owx++) {
+                            memcpy(&out_ptr_[dst_off_curr + block_size * owx], &in_ptr_[src_off_curr], block_size_bytes);
+                        }
+                    }
+
+                    for (int ohy = 1; ohy < stepY; ohy++) {
+                        memcpy(&out_ptr_[dst_off + OW * block_size * ohy], &out_ptr_[dst_off], block_size_bytes * OW);
+                    }
+                }
+            }
         }
     }
 
@@ -268,10 +320,10 @@ private:
                 const float *in_ptr = in_ptr_ + IW * IH * CB * blk_size * b + IW * IH * cb * blk_size;
                 float *out_ptr = out_ptr_ + OW * OH * CB * blk_size * b + OW * OH * cb * blk_size;
 
-                for (size_t iy = 0; iy < IH; iy++) {
-                    for (size_t ix = 0; ix < IW; ix++) {
-                        size_t oy = factor * iy;
-                        size_t ox = factor * ix;
+                for (int iy = 0; iy < IH; iy++) {
+                    for (int ix = 0; ix < IW; ix++) {
+                        int oy = factor * iy;
+                        int ox = factor * ix;
 
                         vec_type vsrc = _mm_uni_loadu_ps(in_ptr + iy * IW * blk_size + ix * blk_size);
 
diff --git a/inference-engine/src/extension/ext_reverse_sequence.cpp b/inference-engine/src/extension/ext_reverse_sequence.cpp
new file mode 100644
index 000000000..5780ef26a
--- /dev/null
+++ b/inference-engine/src/extension/ext_reverse_sequence.cpp
@@ -0,0 +1,179 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include <algorithm>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class ReverseSequenceImpl: public ExtLayerBase {
+public:
+    explicit ReverseSequenceImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.size() != 2 || layer->outData.size() != 1)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+
+            src_dims = layer->insData[REVERSESEQUENCE_DATA].lock()->getTensorDesc().getDims();
+            SizeVector seq_lengths_dims = layer->insData[REVERSESEQUENCE_LENGTHS].lock()->getTensorDesc().getDims();
+            if (layer->insData[REVERSESEQUENCE_LENGTHS].lock()->getTensorDesc().getPrecision() != Precision::I32 &&
+                layer->insData[REVERSESEQUENCE_LENGTHS].lock()->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect 'seq_lengths' input precision. Only FP32 and I32 are supported!";
+            if (seq_lengths_dims.size() > 1)
+                THROW_IE_EXCEPTION << layer->name << " Seq_lengths vector should be 1 dimension";
+
+            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
+            if (src_dims.size() != dst_dims.size())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output sizes!";
+
+            for (size_t i = 0; i < dst_dims.size(); i++) {
+                if (src_dims[i] != dst_dims[i])
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimension!";
+            }
+
+            seq_axis = layer->GetParamAsInt("seq_axis", 1);
+            if (seq_axis < 0)
+                seq_axis += src_dims.size();
+
+            if (seq_axis < 0 || seq_axis >= static_cast<int>(src_dims.size()))
+                THROW_IE_EXCEPTION << layer->name << " Incorrect 'seq_axis' parameters dimensions and axis number!";
+
+            batch_axis = layer->GetParamAsInt("batch_axis", 0);
+            if (batch_axis < 0)
+                batch_axis += src_dims.size();
+
+            if (batch_axis < 0 || batch_axis >= static_cast<int>(src_dims.size()))
+                THROW_IE_EXCEPTION << layer->name << " Incorrect 'batch_axis' parameters dimensions and axis number!";
+
+            if (seq_lengths_dims[0] != dst_dims[batch_axis])
+                THROW_IE_EXCEPTION << layer->name << " Incorrect 'seq_lengths_dims' parameters dimension!";
+
+            srcStrides = layer->insData[REVERSESEQUENCE_DATA].lock()->getTensorDesc().getBlockingDesc().getStrides();
+            work_amount_dst = srcStrides[0] * src_dims[0];
+
+            addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        size_t i;
+        const float *src_data = inputs[REVERSESEQUENCE_DATA]->cbuffer().as<const float *>() +
+                                inputs[REVERSESEQUENCE_DATA]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        float* dst_data = outputs[0]->cbuffer().as<float *>() +
+                          outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        switch (inputs[REVERSESEQUENCE_LENGTHS]->precision()) {
+            case Precision::FP32: {
+                float *seq_lengths_data = inputs[REVERSESEQUENCE_LENGTHS]->cbuffer().as<float *>() +
+                                          inputs[REVERSESEQUENCE_LENGTHS]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+                for (i = 0; i < src_dims[batch_axis]; i++) {
+                    if (static_cast<int32_t>(seq_lengths_data[i]) > static_cast<int>(src_dims[seq_axis])) {
+                        if (resp) {
+                            std::string errorMsg = "Incorrect input 'seq_lengths' values!";
+                            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                        }
+                        return PARAMETER_MISMATCH;
+                    }
+                }
+
+                parallel_nt(0, [&](const int ithr, const int nthr) {
+                    size_t i, start = 0, end = 0, src_idx = 0;
+                    SizeVector counters(src_dims.size(), 0);
+                    splitter(work_amount_dst, nthr, ithr, start, end);
+                    for (int j = src_dims.size() - 1, i = start; j >= 0; j--) {
+                        counters[j] = i % src_dims[j];
+                        i /= src_dims[j];
+                    }
+
+                    for (size_t iwork = start; iwork < end; ++iwork) {
+                        for (i = 0, src_idx = 0; i < src_dims.size(); ++i) {
+                            size_t idx = counters[i];
+                            if (static_cast<int>(i) == seq_axis &&
+                                    static_cast<int>(idx) < static_cast<int32_t>(seq_lengths_data[counters[batch_axis]])) {
+                                idx = static_cast<int32_t>(seq_lengths_data[counters[batch_axis]]) - idx - 1;
+                            }
+                            src_idx += idx * srcStrides[i];
+                        }
+                        dst_data[iwork] = src_data[src_idx];
+                        for (int j = src_dims.size() - 1; j >= 0; j--) {
+                            counters[j] = (counters[j] + 1) % src_dims[j];
+                            if (counters[j] != 0) break;
+                        }
+                    }
+                });
+            }
+            break;
+            case Precision::I32: {
+                int32_t *seq_lengths_data = inputs[REVERSESEQUENCE_LENGTHS]->cbuffer().as<int32_t *>() +
+                                            inputs[REVERSESEQUENCE_LENGTHS]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+                for (i = 0; i < src_dims[batch_axis]; i++) {
+                    if (seq_lengths_data[i] > static_cast<int>(src_dims[seq_axis])) {
+                        if (resp) {
+                            std::string errorMsg = "Incorrect input 'seq_lengths' values!";
+                            errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                        }
+                        return PARAMETER_MISMATCH;
+                    }
+                }
+
+                parallel_nt(0, [&](const int ithr, const int nthr) {
+                    size_t i, start = 0, end = 0, src_idx = 0;
+                    SizeVector counters(src_dims.size(), 0);
+                    splitter(work_amount_dst, nthr, ithr, start, end);
+                    for (int j = src_dims.size() - 1, i = start; j >= 0; j--) {
+                        counters[j] = i % src_dims[j];
+                        i /= src_dims[j];
+                    }
+
+                    for (size_t iwork = start; iwork < end; ++iwork) {
+                        for (i = 0, src_idx = 0; i < src_dims.size(); ++i) {
+                            size_t idx = counters[i];
+                            if (static_cast<int>(i) == seq_axis &&
+                                    static_cast<int>(idx) < seq_lengths_data[counters[batch_axis]]) {
+                                idx = seq_lengths_data[counters[batch_axis]] - idx - 1;
+                            }
+                            src_idx += idx * srcStrides[i];
+                        }
+                        dst_data[iwork] = src_data[src_idx];
+                        for (int j = src_dims.size() - 1; j >= 0; j--) {
+                            counters[j] = (counters[j] + 1) % src_dims[j];
+                            if (counters[j] != 0) break;
+                        }
+                    }
+                });
+            }
+            break;
+            default:
+                return GENERAL_ERROR;
+        }
+
+        return OK;
+    }
+
+private:
+    const size_t REVERSESEQUENCE_DATA = 0;
+    const size_t REVERSESEQUENCE_LENGTHS = 1;
+
+    int seq_axis;
+    int batch_axis;
+    SizeVector src_dims;
+    SizeVector srcStrides;
+    size_t work_amount_dst;
+};
+
+REG_FACTORY_FOR(ImplFactory<ReverseSequenceImpl>, ReverseSequence);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_roifeatureextractor_onnx.cpp b/inference-engine/src/extension/ext_roifeatureextractor_onnx.cpp
new file mode 100644
index 000000000..8c7a0965a
--- /dev/null
+++ b/inference-engine/src/extension/ext_roifeatureextractor_onnx.cpp
@@ -0,0 +1,413 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+// There are some code snippets in this file.
+// Original source file is avaialble here (Copyright (c) 2018 Facebook, MIT License):
+// https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/cpu/ROIAlign_cpu.cpp
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+#include <cassert>
+#include <cmath>
+#include <vector>
+#include <string>
+#include <algorithm>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+// implementation taken from Caffe2
+template <typename T>
+struct PreCalc {
+  int pos1;
+  int pos2;
+  int pos3;
+  int pos4;
+  T w1;
+  T w2;
+  T w3;
+  T w4;
+};
+
+template <typename T>
+void pre_calc_for_bilinear_interpolate(
+    const int height,
+    const int width,
+    const int pooled_height,
+    const int pooled_width,
+    const int iy_upper,
+    const int ix_upper,
+    T roi_start_h,
+    T roi_start_w,
+    T bin_size_h,
+    T bin_size_w,
+    int roi_bin_grid_h,
+    int roi_bin_grid_w,
+    std::vector<PreCalc<T>>& pre_calc) {
+  int pre_calc_index = 0;
+  for (int ph = 0; ph < pooled_height; ph++) {
+    for (int pw = 0; pw < pooled_width; pw++) {
+      for (int iy = 0; iy < iy_upper; iy++) {
+        const T yy = roi_start_h + ph * bin_size_h +
+            static_cast<T>(iy + .5f) * bin_size_h /
+                static_cast<T>(roi_bin_grid_h);  // e.g., 0.5, 1.5
+        for (int ix = 0; ix < ix_upper; ix++) {
+          const T xx = roi_start_w + pw * bin_size_w +
+              static_cast<T>(ix + .5f) * bin_size_w /
+                  static_cast<T>(roi_bin_grid_w);
+
+          T x = xx;
+          T y = yy;
+          // deal with: inverse elements are out of feature map boundary
+          if (y < -1.0 || y > height || x < -1.0 || x > width) {
+            // empty
+            PreCalc<T> pc;
+            pc.pos1 = 0;
+            pc.pos2 = 0;
+            pc.pos3 = 0;
+            pc.pos4 = 0;
+            pc.w1 = 0;
+            pc.w2 = 0;
+            pc.w3 = 0;
+            pc.w4 = 0;
+            pre_calc.at(pre_calc_index) = pc;
+            pre_calc_index += 1;
+            continue;
+          }
+
+          if (y <= 0) {
+            y = 0;
+          }
+          if (x <= 0) {
+            x = 0;
+          }
+
+          int y_low = static_cast<int>(y);
+          int x_low = static_cast<int>(x);
+          int y_high = 0;
+          int x_high = 0;
+
+          if (y_low >= height - 1) {
+            y_high = y_low = height - 1;
+            y = (T)y_low;
+          } else {
+            y_high = y_low + 1;
+          }
+
+          if (x_low >= width - 1) {
+            x_high = x_low = width - 1;
+            x = (T)x_low;
+          } else {
+            x_high = x_low + 1;
+          }
+
+          T ly = y - y_low;
+          T lx = x - x_low;
+          T hy = static_cast<T>(1) - ly, hx = static_cast<T>(1) - lx;
+          T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+
+          // save weights and indeces
+          PreCalc<T> pc;
+          pc.pos1 = y_low * width + x_low;
+          pc.pos2 = y_low * width + x_high;
+          pc.pos3 = y_high * width + x_low;
+          pc.pos4 = y_high * width + x_high;
+          pc.w1 = w1;
+          pc.w2 = w2;
+          pc.w3 = w3;
+          pc.w4 = w4;
+          pre_calc[pre_calc_index] = pc;
+
+          pre_calc_index += 1;
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+void ROIAlignForward_cpu_kernel(
+    const int nthreads,
+    const T* bottom_data,
+    const T& spatial_scale,
+    const int channels,
+    const int height,
+    const int width,
+    const int pooled_height,
+    const int pooled_width,
+    const int sampling_ratio,
+    const T* bottom_rois,
+    T* top_data) {
+  int roi_cols = 4;
+
+  int n_rois = nthreads / channels / pooled_width / pooled_height;
+  // (n, c, ph, pw) is an element in the pooled output
+  parallel_for(n_rois, [&](size_t n) {
+    int index_n = n * channels * pooled_width * pooled_height;
+
+    // roi could have 4 or 5 columns
+    const T* offset_bottom_rois = bottom_rois + n * roi_cols;
+    int roi_batch_ind = 0;
+    if (roi_cols == 5) {
+      roi_batch_ind = static_cast<int>(offset_bottom_rois[0]);
+      offset_bottom_rois++;
+    }
+
+    // Do not using rounding; this implementation detail is critical
+    T roi_start_w = offset_bottom_rois[0] * spatial_scale;
+    T roi_start_h = offset_bottom_rois[1] * spatial_scale;
+    T roi_end_w = offset_bottom_rois[2] * spatial_scale;
+    T roi_end_h = offset_bottom_rois[3] * spatial_scale;
+
+    // Force malformed ROIs to be 1x1
+    T roi_width = std::max(roi_end_w - roi_start_w, (T)1.);
+    T roi_height = std::max(roi_end_h - roi_start_h, (T)1.);
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+        ? sampling_ratio
+        : static_cast<int>(ceil(roi_height / pooled_height));  // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : static_cast<int>(ceil(roi_width / pooled_width));
+
+    // We do average (integral) pooling inside a bin
+    const T count = static_cast<T>(roi_bin_grid_h * roi_bin_grid_w);  // e.g. = 4
+
+    // we want to precalculate indeces and weights shared by all chanels,
+    // this is the key point of optimiation
+    std::vector<PreCalc<T>> pre_calc(
+        roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
+    pre_calc_for_bilinear_interpolate(
+        height,
+        width,
+        pooled_height,
+        pooled_width,
+        roi_bin_grid_h,
+        roi_bin_grid_w,
+        roi_start_h,
+        roi_start_w,
+        bin_size_h,
+        bin_size_w,
+        roi_bin_grid_h,
+        roi_bin_grid_w,
+        pre_calc);
+
+    for (int c = 0; c < channels; c++) {
+      int index_n_c = index_n + c * pooled_width * pooled_height;
+      const T* offset_bottom_data =
+          bottom_data + (roi_batch_ind * channels + c) * height * width;
+      int pre_calc_index = 0;
+
+      for (int ph = 0; ph < pooled_height; ph++) {
+        for (int pw = 0; pw < pooled_width; pw++) {
+          int index = index_n_c + ph * pooled_width + pw;
+
+          T output_val = 0.;
+          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+              PreCalc<T> pc = pre_calc[pre_calc_index];
+              output_val += pc.w1 * offset_bottom_data[pc.pos1] +
+                  pc.w2 * offset_bottom_data[pc.pos2] +
+                  pc.w3 * offset_bottom_data[pc.pos3] +
+                  pc.w4 * offset_bottom_data[pc.pos4];
+
+              pre_calc_index += 1;
+            }
+          }
+          output_val /= count;
+
+          top_data[index] = output_val;
+        }  // for pw
+      }  // for ph
+    }  // for c
+  });
+}
+
+
+void redistribute_rois(const float* rois, int* level_ids,
+                       const int num_rois, const int levels_num) {
+    const float canonical_scale = 224.0f;
+    const int canonical_level = 2;
+
+    for (int i = 0; i < num_rois; ++i) {
+        const float x0 = rois[4 * i + 0];
+        const float y0 = rois[4 * i + 1];
+        const float x1 = rois[4 * i + 2];
+        const float y1 = rois[4 * i + 3];
+
+        int target_level = levels_num;
+        float area = (x1 - x0) * (y1 - y0);
+        if (area > 0) {
+            area = std::sqrt(area) / canonical_scale;
+            area = std::log2(area + 1e-6f);
+            target_level = static_cast<int>(std::floor(area + canonical_level));
+            target_level = std::max<int>(0, std::min<int>(levels_num - 1, target_level));
+        }
+
+        level_ids[i] = target_level;
+    }
+}
+
+
+void reorder(const float* src_data, const int* ranks, const int n, const int step, float* dst_data,
+             int* dst_mapping) {
+    std::iota(dst_mapping, dst_mapping + n, 0);
+    std::sort(dst_mapping, dst_mapping + n, [&ranks](size_t i1, size_t i2) {return ranks[i1] < ranks[i2];});
+    for (int i = 0; i < n; ++i) {
+        const int j = dst_mapping[i];
+        assert(0 <= j && j < n);
+        std::memcpy(dst_data + i * step, src_data + j * step, sizeof(float) * step);
+    }
+}
+
+void split_points(const std::vector<int>& ids, std::vector<int>& rois_per_level, const int levels_num) {
+    rois_per_level.clear();
+    rois_per_level.resize(levels_num, 0);
+    for (size_t i = 0; i < ids.size(); ++i) {
+        assert(0 <= ids[i] && ids[i] < levels_num);
+        rois_per_level[ids[i]]++;
+    }
+    for (int i = 1; i < levels_num; ++i) {
+        rois_per_level[i] += rois_per_level[i - 1];
+    }
+    rois_per_level.insert(rois_per_level.begin(), 0);
+}
+
+
+void reorder_rois(const float *rois, const int* ids, int* mapping, const int rois_num,
+                  float * reordered_rois, std::vector<int>& rois_per_level, const int levels_num) {
+    rois_per_level.clear();
+    rois_per_level.resize(levels_num, 0);
+    for (int i = 0; i < rois_num; ++i) {
+        assert(0 <= ids[i] && ids[i] < levels_num);
+        rois_per_level[ids[i]]++;
+    }
+    for (int i = 1; i < levels_num; ++i) {
+        rois_per_level[i] += rois_per_level[i - 1];
+    }
+    rois_per_level.insert(rois_per_level.begin(), 0);
+
+    std::vector<int> level_counter = rois_per_level;
+
+    for (int i = 0; i < rois_num; ++i) {
+        const int level = ids[i];
+        assert(level < levels_num);
+        const int j = level_counter[level];
+        assert(0 <= j && j < rois_num);
+        reordered_rois[j * 4 + 0] = rois[i * 4 + 0];
+        reordered_rois[j * 4 + 1] = rois[i * 4 + 1];
+        reordered_rois[j * 4 + 2] = rois[i * 4 + 2];
+        reordered_rois[j * 4 + 3] = rois[i * 4 + 3];
+        level_counter[level]++;
+    }
+}
+
+class ExperimentalDetectronROIFeatureExtractorImpl: public ExtLayerBase {
+private:
+    const int INPUT_ROIS {0};
+    const int INPUT_FEATURES_START {1};
+
+    const int OUTPUT_ROI_FEATURES {0};
+    const int OUTPUT_ROIS {1};
+
+public:
+    explicit ExperimentalDetectronROIFeatureExtractorImpl(const CNNLayer* layer) {
+        try {
+            output_dim_ = layer->GetParamAsInt("output_size");
+            pyramid_scales_ = layer->GetParamAsInts("pyramid_scales");
+            sampling_ratio_ = layer->GetParamAsInt("sampling_ratio");
+            pooled_height_ = output_dim_;
+            pooled_width_ = output_dim_;
+
+            std::vector<DataConfigurator> inputs_layouts(layer->insData.size(), DataConfigurator(ConfLayout::PLN));
+            std::vector<DataConfigurator> outputs_layouts(layer->outData.size(), DataConfigurator(ConfLayout::PLN));
+            addConfig(layer, inputs_layouts, outputs_layouts);
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
+                       ResponseDesc *resp) noexcept override {
+        const int levels_num = inputs.size() - INPUT_FEATURES_START;
+        const int num_rois = inputs[INPUT_ROIS]->getTensorDesc().getDims()[0];
+        const int channels_num = inputs[INPUT_FEATURES_START]->getTensorDesc().getDims()[1];
+        const int feaxels_per_roi = pooled_height_ * pooled_width_ * channels_num;
+
+        auto *input_rois = inputs[INPUT_ROIS]->buffer().as<const float *>();
+        auto *output_rois_features = outputs[OUTPUT_ROI_FEATURES]->buffer().as<float *>();
+        float *output_rois = nullptr;
+        if (OUTPUT_ROIS < static_cast<int>(outputs.size())) {
+            output_rois = outputs[OUTPUT_ROIS]->buffer().as<float *>();
+        }
+
+        std::vector<int> level_ids(num_rois, 0);
+        redistribute_rois(input_rois, reinterpret_cast<int *>(&level_ids[0]), num_rois, levels_num);
+
+        std::vector<float> reordered_rois(4 * num_rois, 0);
+        std::vector<int> original_rois_mapping(num_rois, 0);
+        reorder(input_rois, &level_ids[0], num_rois, 4, &reordered_rois[0], &original_rois_mapping[0]);
+
+        std::vector<int> rois_per_level;
+        split_points(level_ids, rois_per_level, levels_num + 1);
+
+        std::vector<float> output_rois_features_temp(feaxels_per_roi * num_rois, 0);
+        for (int i = 0; i < levels_num; ++i) {
+            const int level_rois_offset = rois_per_level[i];
+            const int level_rois_num = rois_per_level[i + 1] - level_rois_offset;
+            if (level_rois_num > 0) {
+                auto *featuremap = inputs[INPUT_FEATURES_START + i]->buffer().as<const float *>();
+                const int featuremap_height = inputs[INPUT_FEATURES_START + i]->getTensorDesc().getDims()[2];
+                const int featuremap_width = inputs[INPUT_FEATURES_START + i]->getTensorDesc().getDims()[3];
+                ROIAlignForward_cpu_kernel<float>(feaxels_per_roi * level_rois_num,
+                    featuremap,
+                    1.0f / pyramid_scales_[i],
+                    channels_num,
+                    featuremap_height,
+                    featuremap_width,
+                    pooled_height_,
+                    pooled_width_,
+                    sampling_ratio_,
+                    &reordered_rois[4 * level_rois_offset],
+                    &output_rois_features_temp[feaxels_per_roi * level_rois_offset]);
+            }
+        }
+
+        std::vector<int> dummy_mapping(num_rois, 0);
+        reorder(&output_rois_features_temp[0], &original_rois_mapping[0], num_rois, feaxels_per_roi,
+                output_rois_features, &dummy_mapping[0]);
+        if (output_rois != nullptr) {
+            std::memcpy(output_rois, input_rois, 4 * num_rois * sizeof(float));
+        }
+
+        return OK;
+    }
+
+private:
+    int output_dim_ = 0;
+    int pooled_height_ = 0;
+    int pooled_width_ = 0;
+    std::vector<int> pyramid_scales_;
+    int sampling_ratio_ = 0;
+
+    int channels = 0;
+    int height = 0;
+    int width = 0;
+
+    int nn = 0;
+    int nc = 0;
+    int nh = 0;
+    int nw = 0;
+};
+
+REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronROIFeatureExtractorImpl>, ExperimentalDetectronROIFeatureExtractor);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_shuffle_channels.cpp b/inference-engine/src/extension/ext_shuffle_channels.cpp
new file mode 100644
index 000000000..79b23dadb
--- /dev/null
+++ b/inference-engine/src/extension/ext_shuffle_channels.cpp
@@ -0,0 +1,149 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class ShuffleChannelsImpl: public ExtLayerBase {
+#define CNTR_SIZE 3
+
+__inline size_t initter(size_t start, size_t size, size_t* counters, size_t* own_dims, size_t* ownStrides) {
+    size_t i = start;
+    size_t idx = 0;
+    for (int j = size - 1; j >= 0; j--) {
+        counters[j] = i % own_dims[j];
+        idx += counters[j] * ownStrides[j];
+        i /= own_dims[j];
+    }
+    return idx;
+}
+
+__inline size_t updater(size_t idx, size_t size, size_t* counters, size_t* own_dims, size_t* ownStrides) {
+    size_t i = 1;
+    for (int j = size - 1; j >= 0; j--) {
+        counters[j]++;
+        if (counters[j] < own_dims[j]) {
+            idx += ownStrides[j];
+            break;
+        } else {
+            counters[j] = 0;
+            i = 0;
+        }
+    }
+    if (!i) {
+        for (idx = 0; i < CNTR_SIZE; ++i)
+            idx += counters[i] * ownStrides[i];
+    }
+    return idx;
+}
+
+public:
+    explicit ShuffleChannelsImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.empty() || layer->outData.empty())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+
+            SizeVector src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
+            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
+            if (src_dims.size() != dst_dims.size())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!";
+
+            if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only F32 is supported!";
+
+            if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect output precision. Only F32 is supported!";
+
+            int axis = layer->GetParamAsInt("axis", 1);
+            if (axis < 0)
+                axis += dst_dims.size();
+
+            if (axis < 0 || axis >= static_cast<int>(dst_dims.size()))
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimensions and axis number!";
+
+            size_t group = layer->GetParamAsUInt("group", 1);
+            if (group == 0 || dst_dims[axis] % group)
+                THROW_IE_EXCEPTION << layer->name << " Group parameter must evenly divide the channel dimension!";
+
+            //  Find number of dictionaries, index range and data length
+            own_dims[0] = 1;
+            for (int i = 0; i < axis; i++)
+                own_dims[0] *= dst_dims[i];
+
+            for (size_t i = axis + 1; i < dst_dims.size(); i++)
+                dataLength *= dst_dims[i];
+
+            if (dataLength == 0)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimension!";
+
+            own_dims[1] = dst_dims[axis] / group;
+            own_dims[2] = group;
+            ownStrides[0] = dst_dims[axis];
+            ownStrides[1] = 1;
+            ownStrides[2] = own_dims[1];
+            work_amount_dst = ownStrides[0] * own_dims[0];
+
+            addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        const float *src_data = inputs[0]->cbuffer().as<const float *>() +
+            inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        float* dst_data = outputs[0]->cbuffer().as<float *>() +
+            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        if (dataLength > 1) {
+            //  Vectorized & Parallel
+            parallel_nt(0, [&](const int ithr, const int nthr) {
+                size_t start = 0, end = 0, src_idx = 0;
+                size_t counters[CNTR_SIZE] = { 0 };
+                splitter(work_amount_dst, nthr, ithr, start, end);
+                src_idx = initter(start, CNTR_SIZE, counters, own_dims, ownStrides);
+                for (size_t iwork = start, dst_idx = start * dataLength; iwork < end; ++iwork, dst_idx += dataLength) {
+                    memcpy(&dst_data[dst_idx], &src_data[dataLength * src_idx], sizeof(float) * dataLength);
+                    src_idx = updater(src_idx, CNTR_SIZE, counters, own_dims, ownStrides);
+                }
+            });
+        } else {
+            //  Parallel
+            parallel_nt(0, [&](const int ithr, const int nthr) {
+                size_t start = 0, end = 0, src_idx = 0;
+                size_t counters[CNTR_SIZE] = { 0 };
+                splitter(work_amount_dst, nthr, ithr, start, end);
+                src_idx = initter(start, CNTR_SIZE, counters, own_dims, ownStrides);
+                for (size_t iwork = start; iwork < end; ++iwork) {
+                    dst_data[iwork] = src_data[src_idx];
+                    src_idx = updater(src_idx, CNTR_SIZE, counters, own_dims, ownStrides);
+                }
+            });
+        }
+
+        return OK;
+    }
+
+private:
+    size_t dataLength = 1;
+    size_t work_amount_dst;
+    size_t own_dims[CNTR_SIZE];
+    size_t ownStrides[CNTR_SIZE];
+};
+
+REG_FACTORY_FOR(ImplFactory<ShuffleChannelsImpl>, ShuffleChannels);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_simplernms.cpp b/inference-engine/src/extension/ext_simplernms.cpp
index 72b004abc..cb0e717e4 100644
--- a/inference-engine/src/extension/ext_simplernms.cpp
+++ b/inference-engine/src/extension/ext_simplernms.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -263,9 +263,9 @@ public:
         const float* delta_pred = src_delta->buffer().as<const float*>();
         const float* im_info = inputs[2]->buffer().as<const float*>();
 
-        int IW = im_info[1];
-        int IH = im_info[0];
-        int IS = im_info[2];
+        int IW = static_cast<int>(im_info[1]);
+        int IH = static_cast<int>(im_info[0]);
+        int IS = static_cast<int>(im_info[2]);
 
         int scaled_min_bbox_size = min_box_size_ * IS;
 
@@ -293,8 +293,8 @@ public:
                     simpler_nms_roi_t tmp_roi = simpler_nms_gen_bbox(anchors[anchor_index], bbox_delta, anchor_shift_x, anchor_shift_y);
                     simpler_nms_roi_t roi = tmp_roi.clamp({ 0, 0, static_cast<float>(IW - 1), static_cast<float>(IH - 1)});
 
-                    int bbox_w = roi.x1 - roi.x0 + 1;
-                    int bbox_h = roi.y1 - roi.y0 + 1;
+                    int bbox_w = static_cast<int>(roi.x1 - roi.x0) + 1;
+                    int bbox_h = static_cast<int>(roi.y1 - roi.y0) + 1;
 
                     if (bbox_w >= scaled_min_bbox_size && bbox_h >= scaled_min_bbox_size) {
                         simpler_nms_proposal_t proposal { roi, proposal_confidence, sorted_proposals_confidence.size() };
diff --git a/inference-engine/src/extension/ext_space_to_depth.cpp b/inference-engine/src/extension/ext_space_to_depth.cpp
new file mode 100644
index 000000000..e00bc0a14
--- /dev/null
+++ b/inference-engine/src/extension/ext_space_to_depth.cpp
@@ -0,0 +1,126 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class SpaceToDepthImpl: public ExtLayerBase {
+#define CNTR_SIZE 5
+
+public:
+    explicit SpaceToDepthImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.empty() || layer->outData.empty())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+
+            SizeVector src_dims = layer->insData[0].lock()->getTensorDesc().getDims();
+            if (src_dims.size() < 2)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input dimensions!";
+            if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect input precision. Only F32 is supported!";
+
+            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
+            if (dst_dims.size() < 3)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of output dimensions!";
+            if (layer->outData[0]->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect output precision. Only F32 is supported!";
+
+            size_t block_size = layer->GetParamAsUInt("block_size", 1);
+            if (block_size == 0)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect block_size parameter is zero!";
+
+            if (dst_dims[dst_dims.size() - 3] % (block_size * block_size))
+                THROW_IE_EXCEPTION << layer->name << " block_size parameter is incompatible with input tensor Color dimension size!";
+
+            if (src_dims.size() > 2 && dst_dims[dst_dims.size() - 3] != (src_dims[src_dims.size() - 3] * block_size * block_size))
+                THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Color dimension is incompatible with block_size!";
+
+            if (src_dims[src_dims.size() - 2] != (dst_dims[dst_dims.size() - 2] * block_size))
+                THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Height dimension is incompatible with block_size!";
+
+            if (src_dims[src_dims.size() - 1] != (dst_dims[dst_dims.size() - 1] * block_size))
+                THROW_IE_EXCEPTION << layer->name << " Input/Output tensor Width dimension is incompatible with block_size!";
+
+            own_dims[0] = 1;
+            for (size_t i = 0; i < (dst_dims.size() - 3); i++)
+                own_dims[0] *= dst_dims[i];
+            own_dims[1] = dst_dims[dst_dims.size() - 2];
+            own_dims[2] = dst_dims[dst_dims.size() - 3] / block_size;
+            own_dims[3] = dst_dims[dst_dims.size() - 1];
+            own_dims[4] = block_size;
+
+            size_t C = dst_dims[dst_dims.size() - 2] * dst_dims[dst_dims.size() - 1];
+            ownStrides[0] = dst_dims[dst_dims.size() - 3] * C;
+            ownStrides[1] = dst_dims[dst_dims.size() - 1];
+            ownStrides[2] = block_size * C;
+            ownStrides[3] = 1;
+            ownStrides[4] = C;
+            work_amount_dst = ownStrides[0] * own_dims[0];
+
+            addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        const float *src_data = inputs[0]->cbuffer().as<const float *>() +
+            inputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        float* dst_data = outputs[0]->cbuffer().as<float *>() +
+            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        //  Parallel
+        parallel_nt(0, [&](const int ithr, const int nthr) {
+            size_t i, start = 0, end = 0, dst_idx = 0;
+            size_t counters[CNTR_SIZE] = { 0 };
+            splitter(work_amount_dst, nthr, ithr, start, end);
+            i = start;
+            for (int j = CNTR_SIZE - 1; j >= 0; j--) {
+                counters[j] = i % own_dims[j];
+                dst_idx += counters[j] * ownStrides[j];
+                i /= own_dims[j];
+            }
+
+            for (size_t iwork = start, i = 1; iwork < end; ++iwork) {
+                dst_data[dst_idx] = src_data[iwork];
+                for (int j = CNTR_SIZE - 1; j >= 0; j--) {
+                    counters[j]++;
+                    if (counters[j] < own_dims[j]) {
+                        dst_idx += ownStrides[j];
+                        break;
+                    } else {
+                        counters[j] = i = 0;
+                    }
+                }
+                if (!i) {
+                    for (dst_idx = 0; i < CNTR_SIZE; ++i)
+                        dst_idx += counters[i] * ownStrides[i];
+                }
+            }
+        });
+
+        return OK;
+    }
+
+private:
+    size_t work_amount_dst;
+    size_t own_dims[CNTR_SIZE];
+    size_t ownStrides[CNTR_SIZE];
+};
+
+REG_FACTORY_FOR(ImplFactory<SpaceToDepthImpl>, SpaceToDepth);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_spatial_transformer.cpp b/inference-engine/src/extension/ext_spatial_transformer.cpp
deleted file mode 100644
index a63fb69e6..000000000
--- a/inference-engine/src/extension/ext_spatial_transformer.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include "ext_list.hpp"
-#include "ext_base.hpp"
-
-#include "matrixmult.h"
-
-#include <algorithm>
-#include <vector>
-#include <cmath>
-#include <map>
-#include <string>
-
-namespace InferenceEngine {
-namespace Extensions {
-namespace Cpu {
-
-class SpatialTransformerImpl: public ExtLayerBase {
-public:
-    explicit SpatialTransformerImpl(const CNNLayer* layer) {
-        try {
-            if (layer->insData.size() != 2 || layer->outData.empty())
-                THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
-
-            if (layer->insData[0].lock()->dims.size() != 4)
-                THROW_IE_EXCEPTION << "SpatialTransformer supports only 4D blobs!";
-
-            addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
-        } catch (InferenceEngine::details::InferenceEngineException &ex) {
-            errorMsg = ex.what();
-        }
-    }
-
-    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
-                       ResponseDesc *resp) noexcept override {
-        std::vector<size_t> real_dims = inputs[0]->getTensorDesc().getDims();
-        size_t data_size = inputs[0]->size();
-
-        const auto *src_data = inputs[0]->cbuffer().as<const float *>();
-        auto *theta = inputs[1]->buffer().as<float *>();
-        auto *dst_data = outputs[0]->buffer().as<float *>();
-
-        auto N = real_dims[0];
-        auto C = real_dims[1];
-        auto output_H_ = real_dims[2];
-        auto output_W_ = real_dims[3];
-
-        // Prepare input and output grid
-        std::vector<float> input_grid_data(N * output_H_ * output_W_ * 2);
-        std::vector<float> output_grid_data(3 * output_H_ * output_W_);
-        for (int i = 0; i < output_H_ * output_W_; ++i) {
-            output_grid_data[3 * i] = (i / output_W_) * 1.0 / output_H_ * 2 - 1;
-            output_grid_data[3 * i + 1] = (i % output_W_) * 1.0 / output_W_ * 2 - 1;
-            output_grid_data[3 * i + 2] = 1;
-        }
-
-        // Actually execute
-        for (int i = 0; i < N; ++i) {
-            auto coordinates = input_grid_data.begin() + (output_H_ * output_W_ * 2) * i;
-
-            auto M_size = output_H_ * output_W_;
-            auto N_size = 2;
-            auto K_size = 3;
-
-            matrixMult(&output_grid_data[0], theta + 6 * i, &(*coordinates), M_size, N_size, K_size, true);
-
-            int row_idx;
-            float px, py;
-
-            for (int j = 0; j < C; ++j) {
-                for (int s = 0; s < output_H_; ++s) {
-                    for (int t = 0; t < output_W_; ++t) {
-                        row_idx = output_W_ * s + t;
-
-                        px = coordinates[row_idx * 2];
-                        py = coordinates[row_idx * 2 + 1];
-
-                        size_t dst_offset = ((i * C + j) * output_H_ + s) * output_W_ + t;
-                        size_t src_offset = ((i * C + j) * output_H_ + 0) * output_W_ + 0;
-                        dst_data[dst_offset] = transform_forward_cpu(src_data + src_offset, px, py);
-                    }
-                }
-            }
-        }
-        return OK;
-    }
-
-private:
-    float transform_forward_cpu(const float *pic, float px, float py) {
-        int H = 24;
-        int W = 94;
-
-        float res = 0.0f;
-        float x = (px + 1) / 2 * H;
-        float y = (py + 1) / 2 * W;
-
-        int m, n;
-        float w;
-
-        m = std::floor(x);
-        n = std::floor(y);
-        w = 0;
-        if (m >= 0 && m < H && n >= 0 && n < W) {
-            w = std::max<float>(0.0f, 1 - std::abs(x - m)) * std::max<float>(0.0f, 1 - std::abs(y - n));
-            res += w * pic[m * W + n];
-        }
-
-        m = std::floor(x) + 1;
-        n = std::floor(y);
-        w = 0;
-        if (m >= 0 && m < H && n >= 0 && n < W) {
-            w = std::max<float>(0.0f, 1 - std::abs(x - m)) * std::max<float>(0.0f, 1 - std::abs(y - n));
-            res += w * pic[m * W + n];
-        }
-
-        m = std::floor(x);
-        n = std::floor(y) + 1;
-        w = 0;
-        if (m >= 0 && m < H && n >= 0 && n < W) {
-            w = std::max<float>(0.0f, 1 - std::abs(x - m)) * std::max<float>(0.0f, 1 - std::abs(y - n));
-            res += w * pic[m * W + n];
-        }
-
-        m = std::floor(x) + 1;
-        n = std::floor(y) + 1;
-        w = 0;
-        if (m >= 0 && m < H && n >= 0 && n < W) {
-            w = std::max<float>(0.0f, 1 - std::abs(x - m)) * std::max<float>(0.0f, 1 - std::abs(y - n));
-            res += w * pic[m * W + n];
-        }
-
-        return res;
-    }
-};
-
-class SpatialTransformerShapeInfer : public IShapeInferImpl {
-public:
-    StatusCode inferShapes(const std::vector<SizeVector>& inShapes,
-                           const std::map<std::string, std::string>& params,
-                           const std::map<std::string, Blob::Ptr>& blobs,
-                           std::vector<SizeVector>& outShapes,
-                           ResponseDesc* resp) noexcept override {
-        outShapes.push_back(inShapes[0]);
-        return InferenceEngine::OK;
-    }
-};
-
-REG_FACTORY_FOR(ImplFactory<SpatialTransformerImpl>, SpatialTransformer);
-REG_SHAPE_INFER_FOR_TYPE(SpatialTransformerShapeInfer, SpatialTransformer);
-
-}  // namespace Cpu
-}  // namespace Extensions
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_squeeze.cpp b/inference-engine/src/extension/ext_squeeze.cpp
new file mode 100644
index 000000000..a745031d5
--- /dev/null
+++ b/inference-engine/src/extension/ext_squeeze.cpp
@@ -0,0 +1,123 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class SqueezeImpl: public ExtLayerBase {
+public:
+    explicit SqueezeImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.empty() || layer->outData.empty())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+
+            if (layer->insData.size() != 2)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!";
+
+            idx_dims = layer->insData[SQUEEZE_INDEXES].lock()->getTensorDesc().getDims();
+            if (idx_dims.size() > 1)
+                THROW_IE_EXCEPTION << layer->name << " Index vector should be 1 dimension";
+
+            if (layer->insData[SQUEEZE_INDEXES].lock()->getTensorDesc().getPrecision() != Precision::I32 &&
+                layer->insData[SQUEEZE_INDEXES].lock()->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect 'indices_to_squeeze' input precision. Only FP32 and I32 are supported!";
+
+            data_dims = layer->insData[SQUEEZE_DATA].lock()->getTensorDesc().getDims();
+            SizeVector dst_dims = layer->outData[0]->getTensorDesc().getDims();
+            if (data_dims.size() < dst_dims.size())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output dimensions!";
+
+            if (data_dims.size() <= idx_dims[0] && !(data_dims.size() == 1 && idx_dims[0] == 1))
+                THROW_IE_EXCEPTION << layer->name << " Incompatible number of data dimensions and indexes vector length!";
+
+            addConfig(layer, { { ConfLayout::PLN, false, 0 }, { ConfLayout::ANY, true } }, { { ConfLayout::PLN, false, 0 } });
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        switch (inputs[SQUEEZE_INDEXES]->precision()) {
+        case Precision::FP32: {
+            float *idx_data = inputs[SQUEEZE_INDEXES]->cbuffer().as<float *>() +
+                              inputs[SQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            for (size_t i = 0; i < idx_dims[0]; i++) {
+                float axis = idx_data[i];
+                if (axis < 0)
+                    axis += data_dims.size();
+
+                if (axis > static_cast<int>(data_dims.size())) {
+                    if (resp) {
+                        std::string errorMsg = "Index to squeeze exceeds data tensor dimension";
+                        errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                    }
+                    return PARAMETER_MISMATCH;
+                } else if (data_dims[static_cast<int>(axis)] != 1) {
+                    if (resp) {
+                        std::string errorMsg = "Index to squeeze of data tensor dimension is not 1";
+                        errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                    }
+                    return PARAMETER_MISMATCH;
+                }
+            }
+        }
+        break;
+        case Precision::I32: {
+            int32_t *idx_data = inputs[SQUEEZE_INDEXES]->cbuffer().as<int32_t *>() +
+                                inputs[SQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            for (size_t i = 0; i < idx_dims[0]; i++) {
+                int32_t axis = idx_data[i];
+                if (axis < 0)
+                    axis += data_dims.size();
+
+                if (axis > static_cast<int>(data_dims.size())) {
+                    if (resp) {
+                        std::string errorMsg = "Index to squeeze exceeds data tensor dimension";
+                        errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                    }
+                    return PARAMETER_MISMATCH;
+                } else if (data_dims[axis] != 1) {
+                    if (resp) {
+                        std::string errorMsg = "Index to squeeze of data tensor dimension is not 1";
+                        errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                    }
+                    return PARAMETER_MISMATCH;
+                }
+            }
+        }
+        break;
+        default:
+            if (resp) {
+                std::string errorMsg = "Incorrect 'indices_to_squeeze' input precision. Only FP32 and I32 are supported!";
+                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+            }
+            return GENERAL_ERROR;
+        }
+
+        return OK;
+    }
+
+private:
+    const size_t SQUEEZE_DATA = 0;
+    const size_t SQUEEZE_INDEXES = 1;
+
+    SizeVector data_dims;
+    SizeVector idx_dims;
+};
+
+REG_FACTORY_FOR(ImplFactory<SqueezeImpl>, Squeeze);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_strided_slice.cpp b/inference-engine/src/extension/ext_strided_slice.cpp
new file mode 100644
index 000000000..4a94059d1
--- /dev/null
+++ b/inference-engine/src/extension/ext_strided_slice.cpp
@@ -0,0 +1,380 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include <algorithm>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+inline void clipping(int *idx, const int min, const int max) {
+    (*idx) = ((*idx) > min) ? (*idx) : min;
+    (*idx) = ((*idx) < max) ? (*idx) : (max - 1);
+    return;
+}
+
+class StridedSliceImpl: public ExtLayerBase {
+public:
+    explicit StridedSliceImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.size() > 4 || layer->outData.size() != 1)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+
+            src_dims = layer->insData[STRIDEDSLICE_DATA].lock()->getTensorDesc().getDims();
+
+            bounds_size = 0;
+            begin_dims = {};
+            if (layer->insData.size() > 1) {
+                begin_dims = layer->insData[STRIDEDSLICE_BEGIN].lock()->getTensorDesc().getDims();
+                if (layer->insData[STRIDEDSLICE_BEGIN].lock()->getTensorDesc().getPrecision() != Precision::I32)
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect 'begin' input precision. Only I32 is supported!";
+                if (begin_dims.size() > 1)
+                    THROW_IE_EXCEPTION << layer->name << " Begin vector should be 1 dimension";
+                bounds_size = begin_dims[0];
+            }
+
+            if (layer->insData.size() > 2) {
+                end_dims = layer->insData[STRIDEDSLICE_END].lock()->getTensorDesc().getDims();
+                if (layer->insData[STRIDEDSLICE_END].lock()->getTensorDesc().getPrecision() != Precision::I32)
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect 'end' input precision. Only I32 is supported!";
+                if (end_dims.size() > 1)
+                    THROW_IE_EXCEPTION << layer->name << " End vector should be 1 dimension";
+                if (begin_dims[0] != end_dims[0])
+                    THROW_IE_EXCEPTION << layer->name << " Begin vector size should be equal end vectror size";
+            }
+
+            if (layer->insData.size() > 3) {
+                stride_dims = layer->insData[STRIDEDSLICE_STRIDE].lock()->getTensorDesc().getDims();
+                if (layer->insData[STRIDEDSLICE_STRIDE].lock()->getTensorDesc().getPrecision() != Precision::I32)
+                    THROW_IE_EXCEPTION << layer->name << " Incorrect 'strides' input precision. Only I32 is supported!";
+                if (stride_dims.size() > 1)
+                    THROW_IE_EXCEPTION << layer->name << " End vector should be 1 dimension";
+                if (begin_dims[0] != stride_dims[0])
+                    THROW_IE_EXCEPTION << layer->name << " Stride vector size should be equal begin vectror size";
+            }
+            dst_dims = layer->outData[0]->getTensorDesc().getDims();
+
+            std::string::size_type i;
+            std::string begin_mask_str = layer->GetParamAsString("begin_mask", "");
+            for (i = 0; i < begin_mask_str.size(); ++i) {
+                if (begin_mask_str[i] == '1') begin_mask.push_back(1);
+                else if (begin_mask_str[i] == '0') begin_mask.push_back(0);
+            }
+            for (; i < src_dims.size(); ++i) begin_mask.push_back(1);
+
+            std::string end_mask_str = layer->GetParamAsString("end_mask", "");
+            for (i = 0; i < end_mask_str.size(); ++i) {
+                if (end_mask_str[i] == '1') end_mask.push_back(1);
+                else if (end_mask_str[i] == '0') end_mask.push_back(0);
+            }
+            for (; i < src_dims.size(); ++i) end_mask.push_back(1);
+
+            std::string ellipsis_mask_str = layer->GetParamAsString("ellipsis_mask", "");
+            size_t ellipsis_mask_counter = 0;
+            for (i = 0; i < ellipsis_mask_str.size(); ++i) {
+                if (ellipsis_mask_str[i] == '1') {
+                    ellipsis_mask_counter++;
+                    ellipsis_mask.push_back(1);
+                } else if (ellipsis_mask_str[i] == '0') {
+                    ellipsis_mask.push_back(0);
+                }
+            }
+            if (ellipsis_mask_counter > 1)
+                THROW_IE_EXCEPTION << layer->name << " 'Ellipsis_mask' must be a power of two (only one ellipsis)!";
+            for (; i < src_dims.size(); ++i) ellipsis_mask.push_back(0);
+
+            std::string new_axis_mask_str = layer->GetParamAsString("new_axis_mask", "");
+            for (i = 0; i < new_axis_mask_str.size(); ++i) {
+                if (new_axis_mask_str[i] == '1') new_axis_mask.push_back(1);
+                else if (new_axis_mask_str[i] == '0') new_axis_mask.push_back(0);
+            }
+            for (; i < src_dims.size(); ++i) new_axis_mask.push_back(0);
+
+            std::string shrink_axis_mask_str = layer->GetParamAsString("shrink_axis_mask", "");
+            for (i = 0; i < shrink_axis_mask_str.size(); ++i) {
+                if (shrink_axis_mask_str[i] == '1') shrink_axis_mask.push_back(1);
+                else if (shrink_axis_mask_str[i] == '0') shrink_axis_mask.push_back(0);
+            }
+            for (; i < src_dims.size(); ++i) shrink_axis_mask.push_back(0);
+
+
+            int new_axis = 0;
+            for (auto& na : new_axis_mask)
+                new_axis += na;
+
+            shrink_axis = 0;
+            for (auto& sa : shrink_axis_mask)
+                shrink_axis += sa;
+            max_dims = src_dims.size() + new_axis;
+
+            //  ellipsis_mask must be a power of two (only one ellipsis), so to take a first position
+            ellipsis_pos1 = ellipsis_pos2 = max_dims;
+            for (i = 0; i < ellipsis_mask.size(); i++) {
+                if (ellipsis_mask[i] > 0) {
+                    ellipsis_pos1 = i;
+                    break;
+                }
+            }
+            bounds_size -= ellipsis_pos1;
+            if (bounds_size > 0 && (max_dims - bounds_size) > ellipsis_pos1)
+                ellipsis_pos2 = max_dims - bounds_size;
+
+            begin_dms.assign(max_dims, 0);
+            end_dms.assign(max_dims, -1);
+            stride_dms.assign(max_dims, 1);
+
+            srcStrides = layer->insData[STRIDEDSLICE_DATA].lock()->getTensorDesc().getBlockingDesc().getStrides();
+            dstStrides = layer->outData[0]->getTensorDesc().getBlockingDesc().getStrides();
+            if (layer->insData.size() == 1) {
+                addConfig(layer, { DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
+            } else if (layer->insData.size() == 2) {
+                addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
+            } else if (layer->insData.size() == 3) {
+                addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN) },
+                          { DataConfigurator(ConfLayout::PLN) });
+            } else {
+                addConfig(layer, { DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN),
+                                   DataConfigurator(ConfLayout::PLN) }, { DataConfigurator(ConfLayout::PLN) });
+            }
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        const float *src_data = inputs[STRIDEDSLICE_DATA]->cbuffer().as<const float *>() +
+            inputs[STRIDEDSLICE_DATA]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        int *begin = nullptr, *end = nullptr, *stride = nullptr;
+        if (begin_dims.size())
+            begin = inputs[STRIDEDSLICE_BEGIN]->cbuffer().as<int *>() + inputs[STRIDEDSLICE_BEGIN]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        if (end_dims.size())
+            end = inputs[STRIDEDSLICE_END]->cbuffer().as<int *>() + inputs[STRIDEDSLICE_END]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        if (stride_dims.size())
+            stride = inputs[STRIDEDSLICE_STRIDE]->cbuffer().as<int *>() + inputs[STRIDEDSLICE_STRIDE]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        float* dst_data = outputs[0]->cbuffer().as<float *>() +
+            outputs[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        InferenceEngine::SizeVector src_dims = inputs[STRIDEDSLICE_DATA]->getTensorDesc().getDims();
+        InferenceEngine::SizeVector srcStrides = inputs[STRIDEDSLICE_DATA]->getTensorDesc().getBlockingDesc().getStrides();
+        InferenceEngine::SizeVector dst_dims = outputs[0]->getTensorDesc().getDims();
+        InferenceEngine::SizeVector dstStrides = outputs[0]->getTensorDesc().getBlockingDesc().getStrides();
+
+        size_t i, j, k, bj, ej, sj;
+        InferenceEngine::SizeVector our_dims;
+        InferenceEngine::SizeVector out_dims;
+        for (i = 0, j = 0, k = 0, bj = 0, ej = 0, sj = 0; static_cast<int>(i) < max_dims; i++) {
+            if (static_cast<int>(i) >= ellipsis_pos1 &&
+                    static_cast<int>(i) < ellipsis_pos2) {
+                if (new_axis_mask.size() > i && new_axis_mask[i] == 1)
+                    end_dms[i] = 0;
+                else
+                    end_dms[i] = end_dms[i] >= 0 ? end_dms[i] : src_dims[j++] + end_dms[i];
+
+                out_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) / static_cast<float>(abs(stride_dms[i])))));
+                our_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) / static_cast<float>(abs(stride_dms[i])))));
+                k = ellipsis_pos1;
+            } else {
+                stride_dms[i] = (stride != nullptr && stride_dims[0] > sj && stride[sj] != 0) ? stride[sj++] : 1;
+
+                if (begin_mask.size() > j && begin_mask[j] == 0)
+                    begin_dms[i] = stride_dms[i] > 0 ? 0 : -1;
+                else
+                    begin_dms[i] = (begin != nullptr && begin_dims[0] > bj) ? begin[bj] : (stride_dms[i] > 0 ? 0 : -1);
+                bj++;
+                begin_dms[i] = begin_dms[i] >= 0 ? begin_dms[i] : src_dims[j] + begin_dms[i];
+                //  Clipping 'begin'
+                clipping(&begin_dms[i], 0, src_dims[j]);
+
+                if (end_mask.size() > j && end_mask[j] == 0) {
+                    end_dms[i] = stride_dms[i] > 0 ? -1 : 0;
+                } else {
+                    int end_dms_tmp = (end != nullptr && end_dims[0] > ej) ? (stride_dms[i] > 0 ? end[ej] - 1 : end[ej] + 1)
+                                                                     : end_dms[i];
+                    end_dms[i] = (end != nullptr && end_dims[0] > ej) ? end_dms_tmp : (stride_dms[i] > 0 ? -1 : 0);
+                }
+                ej++;
+                end_dms[i] = end_dms[i] >= 0 ? end_dms[i] : src_dims[j] + end_dms[i];
+                //  Clipping 'end'
+                clipping(&end_dms[i], 0, src_dims[j]);
+
+                if (new_axis_mask.size() > i && new_axis_mask[i] == 1)
+                    end_dms[i] = 0;
+                else
+                    j++;
+
+                if (shrink_axis_mask.size() > k && shrink_axis_mask[k] == 1)
+                    end_dms[i] = begin_dms[i];
+                else
+                    out_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) /
+                                                             static_cast<float>(abs(stride_dms[i])))));
+
+                our_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) /
+                                                         static_cast<float>(abs(stride_dms[i])))));
+                k++;
+            }
+        }
+
+        for (i = 0; i < std::min(out_dims.size(), dst_dims.size()); i++) {
+            if (out_dims[i] != dst_dims[i])
+                return PARAMETER_MISMATCH;
+        }
+
+        if (static_cast<int>(src_dims.size()) == max_dims && shrink_axis == 0 &&
+                stride_dms[stride_dms.size()-1] == 1 && stride_dms.size() > 1)
+            strided_slice_vp(src_data, dst_data);
+        else if (static_cast<int>(src_dims.size()) == max_dims && shrink_axis == 0)
+            strided_slice_p(src_data, dst_data);
+        else
+            strided_slice(src_data, dst_data, our_dims);
+
+        return OK;
+    }
+
+private:
+    const size_t STRIDEDSLICE_DATA = 0;
+    const size_t STRIDEDSLICE_BEGIN = 1;
+    const size_t STRIDEDSLICE_END = 2;
+    const size_t STRIDEDSLICE_STRIDE = 3;
+
+    void strided_slice(const float *src_data, float* dst_data, std::vector<size_t> &dims);
+    void strided_slice_vp(const float *src_data, float* dst_data);
+    void strided_slice_p(const float *src_data, float* dst_data);
+
+    SizeVector begin_dims;
+    SizeVector end_dims;
+    SizeVector stride_dims;
+
+    SizeVector begin_mask;
+    SizeVector end_mask;
+    SizeVector ellipsis_mask;
+    SizeVector new_axis_mask;
+    SizeVector shrink_axis_mask;
+    int shrink_axis;
+
+    SizeVector src_dims;
+    SizeVector dst_dims;
+    std::vector<int> begin_dms;
+    std::vector<int> end_dms;
+    std::vector<int> stride_dms;
+    SizeVector srcStrides;
+    SizeVector dstStrides;
+    int bounds_size;
+    int max_dims;
+    int ellipsis_pos1, ellipsis_pos2;
+};
+
+void StridedSliceImpl::strided_slice(const float *src_data, float* dst_data, std::vector<size_t> &dims) {
+    size_t work_amount_dst = dstStrides[0] * dst_dims[0];
+    parallel_nt(0, [&](const int ithr, const int nthr) {
+        int j;
+        size_t i, start = 0, end = 0;
+        SizeVector counters(max_dims, 0);
+        splitter(work_amount_dst, nthr, ithr, start, end);
+        for (j = max_dims - 1, i = start; j >= 0; j--) {
+            counters[j] = i % dims[j];
+            i /= dims[j];
+        }
+        for (size_t iwork = start; iwork < end; ++iwork) {
+            int src_idx = 0;
+            for (i = 0, j = 0; static_cast<int>(i) < max_dims; ++i) {
+                if (!(new_axis_mask.size() > i && new_axis_mask[i] == 1))
+                    src_idx += (begin_dms[i] + counters[i] * stride_dms[i]) * srcStrides[j++];
+            }
+
+            dst_data[iwork] = src_data[src_idx];
+
+            for (j = max_dims - 1; j >= 0; j--) {
+                counters[j]++;
+                if (counters[j] < dims[j])
+                    break;
+                else
+                    counters[j] = 0;
+            }
+        }
+    });
+}
+
+void StridedSliceImpl::strided_slice_vp(const float *src_data, float* dst_data) {
+    //  Vectorized copy
+    size_t dims_size_1 = dst_dims.size() - 1;
+    size_t dataLength = dst_dims[dims_size_1];
+    size_t work_amount_dst = dstStrides[0] * dst_dims[0] / dst_dims[dims_size_1];
+
+    parallel_nt(0, [&](const int ithr, const int nthr) {
+        size_t start = 0, end = 0;
+        SizeVector counters(dims_size_1, 0);
+        splitter(work_amount_dst, nthr, ithr, start, end);
+        size_t src_idx = begin_dms[dims_size_1];
+        for (int j = dims_size_1 - 1, i = start; j >= 0; j--) {
+            counters[j] = i % dst_dims[j];
+            src_idx += (begin_dms[j] + counters[j] * stride_dms[j]) * srcStrides[j];
+            i /= dst_dims[j];
+        }
+
+        for (size_t iwork = start, dst_idx = start * dataLength, i = 1; iwork < end; ++iwork, dst_idx += dataLength) {
+            memcpy(&dst_data[dst_idx], &src_data[src_idx], sizeof(float) * dataLength);
+            for (int j = dims_size_1 - 1; j >= 0; j--) {
+                counters[j]++;
+                if (counters[j] < dst_dims[j]) {
+                    src_idx += stride_dms[j] * srcStrides[j];
+                    break;
+                } else {
+                    counters[j] = i = 0;
+                }
+            }
+            if (!i) {
+                for (src_idx = begin_dms[dims_size_1]; i < dims_size_1; ++i)
+                    src_idx += (begin_dms[i] + counters[i] * stride_dms[i]) * srcStrides[i];
+            }
+        }
+    });
+}
+
+void StridedSliceImpl::strided_slice_p(const float *src_data, float* dst_data) {
+    size_t dims_size = dst_dims.size();
+    size_t work_amount_dst = dstStrides[0] * dst_dims[0];
+
+    parallel_nt(0, [&](const int ithr, const int nthr) {
+        size_t start = 0, end = 0;
+        SizeVector counters(dims_size, 0);
+        splitter(work_amount_dst, nthr, ithr, start, end);
+        int src_idx = 0;
+        for (int j = dims_size - 1, i = start; j >= 0; j--) {
+            counters[j] = i % dst_dims[j];
+            src_idx += (begin_dms[j] + counters[j] * stride_dms[j]) * srcStrides[j];
+            i /= dst_dims[j];
+        }
+
+        for (size_t iwork = start, dst_idx = start, i = 1; iwork < end; ++iwork, dst_idx++) {
+            dst_data[dst_idx] = src_data[src_idx];
+            for (int j = dims_size - 1; j >= 0; j--) {
+                counters[j]++;
+                if (counters[j] < dst_dims[j]) {
+                    src_idx += stride_dms[j] * srcStrides[j];
+                    break;
+                } else {
+                    counters[j] = i = 0;
+                }
+            }
+            if (!i) {
+                for (src_idx = 0; i < dims_size; ++i)
+                    src_idx += (begin_dms[i] + counters[i] * stride_dms[i]) * srcStrides[i];
+            }
+        }
+    });
+}
+
+REG_FACTORY_FOR(ImplFactory<StridedSliceImpl>, StridedSlice);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_topkrois_onnx.cpp b/inference-engine/src/extension/ext_topkrois_onnx.cpp
new file mode 100644
index 000000000..0584bd5f6
--- /dev/null
+++ b/inference-engine/src/extension/ext_topkrois_onnx.cpp
@@ -0,0 +1,78 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+#include <algorithm>
+#include <cassert>
+#include <vector>
+
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class ExperimentalDetectronTopKROIsImpl: public ExtLayerBase {
+private:
+    // Inputs:
+    //      rois, shape [n, 4]
+    //      rois_probs, shape [n]
+    // Outputs:
+    //      top_rois, shape [max_rois, 4]
+
+    const int INPUT_ROIS {0};
+    const int INPUT_PROBS {1};
+
+    const int OUTPUT_ROIS {0};
+
+public:
+    explicit ExperimentalDetectronTopKROIsImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.size() != 2 || layer->outData.empty())
+                THROW_IE_EXCEPTION << "Incorrect number of input/output edges!";
+
+            if (layer->insData[INPUT_ROIS].lock()->dims.size() != 2 ||
+                layer->insData[INPUT_PROBS].lock()->dims.size() != 1)
+                THROW_IE_EXCEPTION << "Unsupported shape of input blobs!";
+
+            max_rois_num_ = layer->GetParamAsInt("max_rois", 0);
+
+            addConfig(layer,
+                      {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)},
+                      {DataConfigurator(ConfLayout::PLN)});
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
+                       ResponseDesc *resp) noexcept override {
+        const int input_rois_num = inputs[INPUT_ROIS]->getTensorDesc().getDims()[0];
+        const int top_rois_num = std::min(max_rois_num_, input_rois_num);
+
+        auto *input_rois = inputs[INPUT_ROIS]->buffer().as<const float *>();
+        auto *input_probs = inputs[INPUT_PROBS]->buffer().as<const float *>();
+        auto *output_rois = outputs[OUTPUT_ROIS]->buffer().as<float *>();
+
+        std::vector<size_t> idx(input_rois_num);
+        iota(idx.begin(), idx.end(), 0);
+        // FIXME. partial_sort is enough here.
+        sort(idx.begin(), idx.end(), [&input_probs](size_t i1, size_t i2) {return input_probs[i1] > input_probs[i2];});
+
+        for (int i = 0; i < top_rois_num; ++i) {
+            std::memcpy(output_rois + 4 * i, input_rois + 4 * idx[i], 4 * sizeof(float));
+        }
+
+        return OK;
+    }
+
+private:
+    int max_rois_num_;
+};
+
+REG_FACTORY_FOR(ImplFactory<ExperimentalDetectronTopKROIsImpl>, ExperimentalDetectronTopKROIs);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/ext_unsqueeze.cpp b/inference-engine/src/extension/ext_unsqueeze.cpp
new file mode 100644
index 000000000..0fda31c5d
--- /dev/null
+++ b/inference-engine/src/extension/ext_unsqueeze.cpp
@@ -0,0 +1,110 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ext_list.hpp"
+#include "ext_base.hpp"
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <cassert>
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace Extensions {
+namespace Cpu {
+
+class UnsqueezeImpl: public ExtLayerBase {
+public:
+    explicit UnsqueezeImpl(const CNNLayer* layer) {
+        try {
+            if (layer->insData.empty() || layer->outData.empty())
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input/output edges!";
+
+            if (layer->insData.size() != 2)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!";
+
+            idx_dims = layer->insData[UNSQUEEZE_INDEXES].lock()->getTensorDesc().getDims();
+            data_dims = layer->insData[UNSQUEEZE_DATA].lock()->getTensorDesc().getDims();
+            if (idx_dims.size() > 1)
+                THROW_IE_EXCEPTION << layer->name << " Index vector should be 1 dimension";
+
+            if (layer->insData[UNSQUEEZE_INDEXES].lock()->getTensorDesc().getPrecision() != Precision::I32 &&
+                layer->insData[UNSQUEEZE_INDEXES].lock()->getTensorDesc().getPrecision() != Precision::FP32)
+                THROW_IE_EXCEPTION << layer->name << " Incorrect 'indices_to_squeeze' input precision. Only FP32 and I32 are supported!";
+
+            addConfig(layer, { { ConfLayout::PLN, false, 0 }, { ConfLayout::ANY, true } }, { { ConfLayout::PLN, false, 0 } });
+        } catch (InferenceEngine::details::InferenceEngineException &ex) {
+            errorMsg = ex.what();
+        }
+    }
+
+    StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs, ResponseDesc *resp) noexcept override {
+        InferenceEngine::SizeVector data_dims = inputs[UNSQUEEZE_DATA]->getTensorDesc().getDims();
+        InferenceEngine::SizeVector idx_dims = inputs[UNSQUEEZE_INDEXES]->getTensorDesc().getDims();
+
+        switch (inputs[UNSQUEEZE_INDEXES]->precision()) {
+        case Precision::FP32: {
+            float *idx_data = inputs[UNSQUEEZE_INDEXES]->cbuffer().as<float *>() +
+                              inputs[UNSQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+            size_t max = data_dims.size();
+            for (size_t i = 0; i < idx_dims[0]; i++) {
+                size_t axis = static_cast<size_t>(idx_data[i]);
+                if (axis > max) max = axis;
+            }
+            max++;
+
+            if ((idx_dims[0] + data_dims.size()) < max) {
+                if (resp) {
+                    std::string errorMsg = "Indices_to_set for unsqueeze layer is out of tensor dimension";
+                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                }
+                return PARAMETER_MISMATCH;
+            }
+        }
+        break;
+        case Precision::I32: {
+            int32_t *idx_data = inputs[UNSQUEEZE_INDEXES]->cbuffer().as<int32_t *>() +
+                                inputs[UNSQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+            size_t max = data_dims.size();
+            for (size_t i = 0; i < idx_dims[0]; i++) {
+                size_t axis = static_cast<size_t>(idx_data[i]);
+                if (axis > max) max = axis;
+            }
+            max++;
+
+            if ((idx_dims[0] + data_dims.size()) < max) {
+                if (resp) {
+                    std::string errorMsg = "Indices_to_set for unsqueeze layer is out of tensor dimension";
+                    errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+                }
+                return PARAMETER_MISMATCH;
+            }
+        }
+        break;
+        default:
+            if (resp) {
+                std::string errorMsg = "Incorrect 'indices_to_set' input precision. Only FP32 and I32 are supported!";
+                errorMsg.copy(resp->msg, sizeof(resp->msg) - 1);
+            }
+            return GENERAL_ERROR;
+        }
+
+        return OK;
+    }
+
+private:
+    const size_t UNSQUEEZE_DATA = 0;
+    const size_t UNSQUEEZE_INDEXES = 1;
+
+    SizeVector data_dims;
+    SizeVector idx_dims;
+};
+
+REG_FACTORY_FOR(ImplFactory<UnsqueezeImpl>, Unsqueeze);
+
+}  // namespace Cpu
+}  // namespace Extensions
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/extension/simple_copy.cpp b/inference-engine/src/extension/simple_copy.cpp
index 22d6be03b..d42732829 100644
--- a/inference-engine/src/extension/simple_copy.cpp
+++ b/inference-engine/src/extension/simple_copy.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/extension/simple_copy.h b/inference-engine/src/extension/simple_copy.h
index aaf7521ae..42ea6c90b 100644
--- a/inference-engine/src/extension/simple_copy.h
+++ b/inference-engine/src/extension/simple_copy.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/CMakeLists.txt b/inference-engine/src/gna_plugin/CMakeLists.txt
index f6a25b618..4c6b3d6c6 100644
--- a/inference-engine/src/gna_plugin/CMakeLists.txt
+++ b/inference-engine/src/gna_plugin/CMakeLists.txt
@@ -1,6 +1,5 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-#
 
 set(TARGET_NAME "GNAPlugin")
 
diff --git a/inference-engine/src/gna_plugin/dnn.cpp b/inference-engine/src/gna_plugin/dnn.cpp
index 8c94f720a..76f94cbfe 100644
--- a/inference-engine/src/gna_plugin/dnn.cpp
+++ b/inference-engine/src/gna_plugin/dnn.cpp
@@ -1,7 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-
+// dnn.cpp : component based neural network class for ease of use
+//
 extern bool global_debug;
 
 #include <cstdlib>
@@ -1932,6 +1933,8 @@ void AmIntelDnn::InitGNAStruct(intel_nnet_type_t *ptr_nnet) {
 
     if (ptr_nnet == nullptr)
         THROW_GNA_EXCEPTION << "Invalid input parameter";
+    if (ptr_nnet->pLayers != nullptr)
+        THROW_GNA_EXCEPTION << "InitGNAStruct can't work on prellocated layers array";
     if (component.empty())
         THROW_GNA_EXCEPTION << "empty model in AmIntelDnn::FillGNAStruct()";
 
@@ -2180,10 +2183,10 @@ void AmIntelDnn::InitGNAStruct(intel_nnet_type_t *ptr_nnet) {
                 pLayer++;
                 break;
             case kDnnCopyOp:
-                pLayer->nInputRows = component[i].num_rows_in;
-                pLayer->nInputColumns = component[i].num_columns_in;
-                pLayer->nOutputRows = component[i].num_rows_out;
-                pLayer->nOutputColumns = component[i].num_columns_out;
+                pLayer->nInputRows = component[i].num_columns_in;
+                pLayer->nInputColumns = component[i].num_rows_in;
+                pLayer->nOutputRows = component[i].num_columns_out;
+                pLayer->nOutputColumns = component[i].num_rows_out;
                 pLayer->nBytesPerInput = component[i].num_bytes_per_input;
                 pLayer->nBytesPerOutput = component[i].num_bytes_per_output;
                 pLayer->nBytesPerIntermediateOutput = sizeof(int32_t);
@@ -2198,8 +2201,8 @@ void AmIntelDnn::InitGNAStruct(intel_nnet_type_t *ptr_nnet) {
                         THROW_GNA_EXCEPTION << pLayer->nLayerKind << " could not allocate memory for INTEL_COPY layer structure.";
                     }
                     auto *pCopyLayer = reinterpret_cast<intel_copy_layer_t *>(pLayer->pLayerStruct);
-                    pCopyLayer->nCopyRows = component[i].op.copy.num_copy_rows;
-                    pCopyLayer->nCopyCols = component[i].op.copy.num_copy_columns;
+                    pCopyLayer->nCopyRows = component[i].op.copy.num_copy_columns;
+                    pCopyLayer->nCopyCols = component[i].op.copy.num_copy_rows;
                 }
                 pLayer++;
                 break;
@@ -2398,20 +2401,18 @@ void AmIntelDnn::WriteInputAndOutputText() {
                 float floatValue = 0.f;
                 if (component[i].num_bytes_per_output == 4) {
                     if (number_type_ == kDnnInt) {
-                        auto value = (reinterpret_cast<int32_t *>(component[i].ptr_outputs)[k * component[i].num_columns_out+ j]);
-                    //    out_file << std::setw(8) << value << "\n";
-                        floatValue = (static_cast<float>(value) / component[i].output_scale_factor);
+                        auto value = reinterpret_cast<int32_t *>(component[i].ptr_outputs)[k * component[i].num_columns_out+ j];
+                        floatValue = static_cast<float>(value);
 
                     } else {
-                        floatValue = (reinterpret_cast<float*>(component[i].ptr_outputs)[
-                            k * component[i].num_columns_out+ j]) / component[i].output_scale_factor;
+                        floatValue = reinterpret_cast<float*>(component[i].ptr_outputs)[k * component[i].num_columns_out+ j];
                     }
                 } else {
                     auto value = reinterpret_cast<int16_t *>(component[i].ptr_outputs)[k * component[i].num_columns_out+ j];
-                 //   out_file << std::setw(8) << value << "\n";
-                    floatValue = (static_cast<float>(value) / component[i].output_scale_factor);
+                    floatValue = static_cast<float>(value);
                 }
-                out_file << std::setw(8) << floatValue << "\n";
+                out_file << std::setw(8) << floatValue / component[i].output_scale_factor << "\n";
+
                 if (ref_out_file) {
                     float ref_value = 0.f;
                     ref_out_file >> ref_value;
@@ -2433,25 +2434,31 @@ void AmIntelDnn::WriteInputAndOutputText() {
                         << " maxD="<< std::fixed << std::setprecision(5) << std::right << std::setw(8) << maxD << std::endl;
         }
 
+        float input_scale_factor = component[i].output_scale_factor;
+        if (component[i].operation == kDnnAffineOp ||
+            component[i].operation == kDnnDiagonalOp) {
+            input_scale_factor /= component[i].op.affine.weight_scale_factor;
+        } else if (component[i].operation == kDnnConvolutional1dOp) {
+            input_scale_factor /= component[i].op.conv1D.weight_scale_factor;
+        } else if (component[i].operation == kDnnPiecewiselinearOp) {
+            input_scale_factor = 1.f;
+        }
 
         for (int k = 0; k < component[i].num_rows_in; k++) {
             for (int j = 0; j < component[i].num_columns_in; j++) {
+                float floatValue = 0.f;
                 if (component[i].num_bytes_per_input == 4) {
                     if (number_type_ == kDnnInt) {
-                        in_file << std::setw(8)
-                                << (reinterpret_cast<int32_t *>(component[i].ptr_inputs)[k * component[i].num_columns_in
-                                    + j]);
+                        auto value = reinterpret_cast<int32_t *>(component[i].ptr_inputs)[k * component[i].num_columns_in + j];
+                        floatValue = static_cast<float>(value);
                     } else {
-                        in_file << std::setw(8)
-                                << (reinterpret_cast<float *>(component[i].ptr_inputs)[k * component[i].num_columns_in
-                                    + j]);
+                        floatValue = reinterpret_cast<float *>(component[i].ptr_inputs)[k * component[i].num_columns_in + j];
                     }
                 } else {
-                    in_file << std::setw(8)
-                            << (reinterpret_cast<int16_t *>(component[i].ptr_inputs)[k * component[i].num_columns_in
-                                + j]);
+                    auto value = reinterpret_cast<int16_t *>(component[i].ptr_inputs)[k * component[i].num_columns_in+ j];
+                    floatValue = static_cast<float>(value);
                 }
-                in_file << "\n";
+                in_file << std::setw(8) << floatValue / input_scale_factor << "\n";
             }
         }
 #endif
diff --git a/inference-engine/src/gna_plugin/dnn.h b/inference-engine/src/gna_plugin/dnn.h
index 8a1506dbe..0d89a2daf 100644
--- a/inference-engine/src/gna_plugin/dnn.h
+++ b/inference-engine/src/gna_plugin/dnn.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -251,7 +251,8 @@ class AmIntelDnn {
           softmax_type(kSoftmaxNone),
           ptr_sumgroup_sizes(NULL),
           num_sumgroup_sizes(0),
-          ptr_priors(NULL) {
+          ptr_priors(NULL),
+          ptr_dnn_memory_(NULL) {
     }
 
     ~AmIntelDnn() {
diff --git a/inference-engine/src/gna_plugin/dnn_memory.cpp b/inference-engine/src/gna_plugin/dnn_memory.cpp
index 16496b5bf..dec790703 100644
--- a/inference-engine/src/gna_plugin/dnn_memory.cpp
+++ b/inference-engine/src/gna_plugin/dnn_memory.cpp
@@ -1,6 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+// dnn_memory.cpp : memory manipulation routines
+//
 
 #include <cstdio>
 #include <cstdlib>
diff --git a/inference-engine/src/gna_plugin/dnn_memory.hpp b/inference-engine/src/gna_plugin/dnn_memory.hpp
index 5ab2c961f..43720f709 100644
--- a/inference-engine/src/gna_plugin/dnn_memory.hpp
+++ b/inference-engine/src/gna_plugin/dnn_memory.hpp
@@ -1,6 +1,7 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+// dnn_memory.hpp : memory manipulation routines
 
 #pragma once
 
diff --git a/inference-engine/src/gna_plugin/dnn_traits.hpp b/inference-engine/src/gna_plugin/dnn_traits.hpp
index 0a92bb342..98238dfe0 100644
--- a/inference-engine/src/gna_plugin/dnn_traits.hpp
+++ b/inference-engine/src/gna_plugin/dnn_traits.hpp
@@ -1,6 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+// dnn_traits.hpp : c++ trait approach to  define dnn objects
+//
 
 #pragma once
 
diff --git a/inference-engine/src/gna_plugin/floatmath.cpp b/inference-engine/src/gna_plugin/floatmath.cpp
index 3ea411279..72f3b3eeb 100644
--- a/inference-engine/src/gna_plugin/floatmath.cpp
+++ b/inference-engine/src/gna_plugin/floatmath.cpp
@@ -1,6 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+// floatmath.cpp : unoptimized floating point math routines (for reference)
+//
 
 #include "floatmath.h"
 #include "pwl.h"
diff --git a/inference-engine/src/gna_plugin/floatmath.h b/inference-engine/src/gna_plugin/floatmath.h
index ff9bf9938..5ce0db9ba 100644
--- a/inference-engine/src/gna_plugin/floatmath.h
+++ b/inference-engine/src/gna_plugin/floatmath.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_allocator.hpp b/inference-engine/src/gna_plugin/gna_allocator.hpp
index ae62b1f76..e862efc4a 100644
--- a/inference-engine/src/gna_plugin/gna_allocator.hpp
+++ b/inference-engine/src/gna_plugin/gna_allocator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_api_wrapper.hpp b/inference-engine/src/gna_plugin/gna_api_wrapper.hpp
index fb9d2cc2e..1328ef5ae 100644
--- a/inference-engine/src/gna_plugin/gna_api_wrapper.hpp
+++ b/inference-engine/src/gna_plugin/gna_api_wrapper.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -32,6 +32,9 @@ class CPPWrapper<intel_nnet_type_t> {
      * @param n - number  of layers
      */
     explicit CPPWrapper(size_t n) {
+        if (n == 0) {
+            THROW_GNA_EXCEPTION << "Can't allocate array of intel_nnet_layer_t objects of zero length";
+        }
         obj.pLayers = reinterpret_cast<intel_nnet_layer_t *>(_mm_malloc(n * sizeof(intel_nnet_layer_t), 64));
         if (obj.pLayers == nullptr) {
             THROW_GNA_EXCEPTION << "out of memory in while allocating "<< n << " GNA layers";
diff --git a/inference-engine/src/gna_plugin/gna_device.cpp b/inference-engine/src/gna_plugin/gna_device.cpp
index 3936bc89b..344d44e21 100644
--- a/inference-engine/src/gna_plugin/gna_device.cpp
+++ b/inference-engine/src/gna_plugin/gna_device.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_device.hpp b/inference-engine/src/gna_plugin/gna_device.hpp
index 782821137..563f3a53c 100644
--- a/inference-engine/src/gna_plugin/gna_device.hpp
+++ b/inference-engine/src/gna_plugin/gna_device.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_executable_network.hpp b/inference-engine/src/gna_plugin/gna_executable_network.hpp
index 1230624fb..88960ce33 100644
--- a/inference-engine/src/gna_plugin/gna_executable_network.hpp
+++ b/inference-engine/src/gna_plugin/gna_executable_network.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_helper.cpp b/inference-engine/src/gna_plugin/gna_helper.cpp
index 604828c33..7d26aaf27 100644
--- a/inference-engine/src/gna_plugin/gna_helper.cpp
+++ b/inference-engine/src/gna_plugin/gna_helper.cpp
@@ -1,6 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+//  gna_helper.cpp : various GNA-related utility functions
+//
 
 #include "lstm.hpp"
 
diff --git a/inference-engine/src/gna_plugin/gna_infer_request.hpp b/inference-engine/src/gna_plugin/gna_infer_request.hpp
index ba8e99f79..00a03a840 100644
--- a/inference-engine/src/gna_plugin/gna_infer_request.hpp
+++ b/inference-engine/src/gna_plugin/gna_infer_request.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -32,7 +32,10 @@ class GNAInferRequest : public InferenceEngine::AsyncInferRequestInternal {
 
         // copy inputs blobs since we need to have them in separate address space to allow simultaneous infer requests
         _outputs[_networkOutputs.begin()->first] = plg->GetOutputBlob(networkOutputs.begin()->second->getPrecision());
-        _inputs[_networkInputs.begin()->first] = plg->GetInputBlob(networkInputs.begin()->second->getInputPrecision());
+        for (auto input : _networkInputs) {
+            _inputs[input.first] =
+                plg->GetInputBlob(input.first, networkInputs.begin()->second->getInputPrecision());
+        }
     }
     /**
      * @brief Infers specified input(s) in synchronous mode
diff --git a/inference-engine/src/gna_plugin/gna_layer_info.hpp b/inference-engine/src/gna_plugin/gna_layer_info.hpp
index 7e6da438e..5851a8630 100644
--- a/inference-engine/src/gna_plugin/gna_layer_info.hpp
+++ b/inference-engine/src/gna_plugin/gna_layer_info.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -55,7 +55,7 @@ class LayerInfo {
     bool has32BOutput() const noexcept {
         IS_VALID();
         static  InferenceEngine::details::caseless_set<std::string> layersWith32BOutputs =
-                {"FullyConnected", "InnerProduct", "Eltwise", "ScaleShift", "Convolution", "Pooling"};
+                {"FullyConnected", "InnerProduct", "AffineFilter", "Eltwise", "ScaleShift", "Convolution", "Pooling"};
         return (layersWith32BOutputs.find(layer->type) != layersWith32BOutputs.end()) ||
                                                             (isCrop() && isCropAffined());
     }
@@ -88,6 +88,11 @@ class LayerInfo {
         IS_VALID();
         return InferenceEngine::details::CaselessEq<std::string>()(layer->type, "input");
     }
+    bool isScaleShift() const noexcept {
+        IS_VALID();
+        return nullptr != as<const InferenceEngine::ScaleShiftLayer*>();
+    }
+
     bool isEltwise() const noexcept {
         IS_VALID();
         return nullptr != as<const InferenceEngine::EltwiseLayer*>();
@@ -112,9 +117,6 @@ class LayerInfo {
         return InferenceEngine::details::CaselessEq<std::string>()(layer->type, "FullyConnected") ||
                 InferenceEngine::details::CaselessEq<std::string>()(layer->type, "InnerProduct");
     }
-    bool isConvolutional() const noexcept {
-        return InferenceEngine::details::CaselessEq<std::string>()(layer->type, "Convolution");
-    }
     bool isSplit() const noexcept {
         IS_VALID();
         return InferenceEngine::details::CaselessEq<std::string>()(layer->type, "split");
@@ -155,7 +157,7 @@ class LayerInfo {
     bool isCropAffined() const noexcept {
         auto cropLayer = dynamic_cast<InferenceEngine::CropLayer *> (layer);
         size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
-        return (ALIGN(cropOffset, 8) != cropOffset);
+        return (ALIGN64(cropOffset) != cropOffset);
     }
     bool isCopy() const noexcept {
         IS_VALID();
diff --git a/inference-engine/src/gna_plugin/gna_mem_requests.hpp b/inference-engine/src/gna_plugin/gna_mem_requests.hpp
index 24163dc4f..99d073149 100644
--- a/inference-engine/src/gna_plugin/gna_mem_requests.hpp
+++ b/inference-engine/src/gna_plugin/gna_mem_requests.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_memory.hpp b/inference-engine/src/gna_plugin/gna_memory.hpp
index d1c96506b..30da318cf 100644
--- a/inference-engine/src/gna_plugin/gna_memory.hpp
+++ b/inference-engine/src/gna_plugin/gna_memory.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_memory_state.hpp b/inference-engine/src/gna_plugin/gna_memory_state.hpp
index 7edcb02e5..90e1f43db 100644
--- a/inference-engine/src/gna_plugin/gna_memory_state.hpp
+++ b/inference-engine/src/gna_plugin/gna_memory_state.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.cpp b/inference-engine/src/gna_plugin/gna_model_serial.cpp
index 3b14b8c81..84c7d3c38 100644
--- a/inference-engine/src/gna_plugin/gna_model_serial.cpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_model_serial.hpp b/inference-engine/src/gna_plugin/gna_model_serial.hpp
index 0ba5be5ab..30be460f2 100644
--- a/inference-engine/src/gna_plugin/gna_model_serial.hpp
+++ b/inference-engine/src/gna_plugin/gna_model_serial.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_plugin.cpp b/inference-engine/src/gna_plugin/gna_plugin.cpp
index 620aa489c..fc57d5248 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -131,7 +131,7 @@ void GNAPlugin::copyInputData(T *dst,
         for (uint32_t i = 0; i < num_frames; i++) {
             for (uint32_t j = 0; j < num_vector_elements; j++) {
                 if (!std::is_same<T, U>::value) {
-                    dst[j * num_group + i] = GNAPluginNS::ConvertFloatToInt16(src[i * num_vector_elements + j] * input_scale_factor);
+                    dst[j * num_group + i] = GNAPluginNS::ConvertFloatToInt16(src[i * num_vector_elements + j] * get_input_scale_factor());
                 } else {
                     dst[j * num_group + i] = src[i * num_vector_elements + j];
                 }
@@ -154,7 +154,7 @@ void GNAPlugin::copyInputData(T *dst,
                 U *ptr_src_vec = const_cast<U *>(reinterpret_cast<const U *>(src) + i * num_vector_elements);
                 std::memset(ptr_dst_vec, 0, num_vector_stride * sizeof(T));
                 for (int j=0; j < num_vector_elements; j++) {
-                    ptr_dst_vec[j] = GNAPluginNS::ConvertFloatToInt16(ptr_src_vec[j] * input_scale_factor);
+                    ptr_dst_vec[j] = GNAPluginNS::ConvertFloatToInt16(ptr_src_vec[j] * get_input_scale_factor());
                 }
             }
 
@@ -189,9 +189,13 @@ void GNAPlugin::copyInputDataWithSplit(T *const dst,
     for (auto&& outputLayer : splitInfo.splitOutputLayers) {
         uint32_t begin = outputLayer.offset/precision_size;
         uint32_t end = (outputLayer.offset + outputLayer.pure_size)/precision_size;
+        if (dst_ptr - dst >= end) {
+            // output layer with bind pointer as previous one. Skip
+            continue;
+        }
         for (uint32_t i = begin; i < end; ++i) {
             if (!std::is_same<T, U>::value) {
-                *(dst_ptr++) = GNAPluginNS::ConvertFloatToInt16(*(src_ptr++) * input_scale_factor);
+                *(dst_ptr++) = GNAPluginNS::ConvertFloatToInt16(*(src_ptr++) * get_input_scale_factor());
             } else {
                 *(dst_ptr++) = *(src_ptr++);
             }
@@ -285,46 +289,39 @@ void GNAPlugin::ImportFrames(
                   uint32_t num_group,
                   uint32_t num_vector_elements,
                   uint32_t num_vector_stride) {
-    // special case if split/slice layers connected
-    // with Input detected
-    auto it = split_connection.end();
-    if (split_connection.size() != 0) {
-        it = std::find_if(split_connection.begin(), split_connection.end(), []
-                    (const std::pair<std::string, GNASplitLayer> &item) -> bool {
-                        return CaselessEq<std::string>()(item.second.splitInputLayer.name, "Input");
-                    });
-    }
     if (orientation == kDnnInterleavedOrientation) {
         // TODO : fix that as well
-        if (input_precision.size() == 2) {
+        if (input_precision == Precision::U8) {
+            int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
+            uint8_t *src = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(ptr_src));
+            copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
+        } else if (input_precision.size() == 2) {
             int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
             int16_t *src = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_src));
-            if (it != split_connection.end()) {
-                copyInputDataWithSplit(dst, src, it->second, input_precision.size());
-            } else {
-                copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
-            }
+            copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
         } else if (input_precision.size() == 4) {
             if (!gnadevice) {
                 float *dst = const_cast<float *>(reinterpret_cast<const float *>(ptr_dst));
                 float *src = const_cast<float *>(reinterpret_cast<const float *>(ptr_src));
-                if (it != split_connection.end()) {
-                    copyInputDataWithSplit(dst, src, it->second, input_precision.size());
-                } else {
-                    copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
-                }
+                copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
             } else {
                 int16_t *dst = reinterpret_cast<int16_t *>(ptr_dst);
                 const float *src = reinterpret_cast<const float *>(ptr_src);
-                if (it != split_connection.end()) {
-                    copyInputDataWithSplit(dst, src, it->second, input_precision.size());
-                } else {
-                    copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
-                }
+                copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
             }
         }
     } else {
-        if (input_precision.size()== 2) {
+        if (input_precision == Precision::U8) {
+            uint8_t *src = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(ptr_src));
+            if (!gnadevice) {
+                float *dst = const_cast<float *>(reinterpret_cast<const float *>(ptr_dst));
+                copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
+            } else {
+                int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
+                copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
+            }
+
+        } else if (input_precision.size()== 2) {
             int16_t *dst = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_dst));
             int16_t *src = const_cast<int16_t *>(reinterpret_cast<const int16_t *>(ptr_src));
             copyInputData(dst, src, num_frames, num_group, num_vector_elements, num_vector_stride, orientation);
@@ -342,9 +339,8 @@ void GNAPlugin::ImportFrames(
     }
 }
 
-void GNAPlugin::fillMemoryConnections(std::map<std::string,
-                                            std::vector<InferenceEngine::CNNLayerPtr>>&
-                                                                            memoryPairs) {
+void GNAPlugin::fillMemoryConnections(std::unordered_map<std::string,
+                                            std::vector<InferenceEngine::CNNLayerPtr>>& memoryPairs) {
     for (auto &memory : memoryPairs) {
         auto inputLayer = memory.second[1];
         auto outputLayer = memory.second[0];
@@ -401,7 +397,7 @@ void GNAPlugin::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) {
     LayerInfo ptrSplitLayerInputLayerInfo(ptrSplitLayerInput);
     for (size_t i = 0; i < layer->outData.size(); ++i) {
         size_t padding = 0;
-        size_t layer_size = 0;
+        size_t output_layer_size = 0;
         auto& dataOutput = layer->outData[i];
 
         if (!dataOutput || !dataInput) {
@@ -416,16 +412,19 @@ void GNAPlugin::fillSplitConnections(InferenceEngine::CNNLayerPtr layer) {
 
             padding = std::max(padding, LayerInfo(ptrSplitLayerOutput).paddingSize())
                                                         * dataOutput->precision.size();
-            layer_size =
+            output_layer_size =
                     InferenceEngine::details::product(begin(dataOutput->dims),
                                                      end(dataOutput->dims)) * dataOutput->precision.size();
 
-            layerInfoItem.splitOutputLayers.emplace_back(ptrSplitLayerOutput->name, split_size, layer_size);
+            if (ptrSplitLayerOutput->type == "AffineFilter") {
+                size_t aligned64_offset = ptrSplitLayerOutput->GetParamAsInt("offset");
+                layerInfoItem.splitOutputLayers.emplace_back(ptrSplitLayerOutput->name, aligned64_offset, output_layer_size);
+            } else {
+                layerInfoItem.splitOutputLayers.emplace_back(ptrSplitLayerOutput->name, split_size, output_layer_size);
+            }
         }
 
-        split_size += ptrSplitLayerInputLayerInfo.isInput() ?
-                                ALIGN64(padding + layer_size):
-                                        padding + layer_size;
+        split_size += padding + output_layer_size;
     }
     layerInfoItem.reserved_size = split_size;
     layerInfoItem.splitInputLayer =
@@ -717,9 +716,9 @@ void GNAPlugin::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
     auto &currentComponent = dnnComponentsForLayer.back().second;
     dnn.InitCopyComponent(currentComponent,
                           orientation,
-                          num_rows_in + num_padding_in,
+                          ALIGN(num_rows_in, 8),
                           num_columns_in,
-                          num_rows_out + num_padding_out,
+                          ALIGN(num_rows_out, 8),
                           num_columns_out,
                           inputs->precision.size(),
                           outputs->precision.size(),
@@ -732,7 +731,7 @@ void GNAPlugin::CopyPrimitive(InferenceEngine::CNNLayerPtr layer) {
     size_t num_data_bytes_out = ALIGN(InferenceEngine::details::product(
                                                             begin(outputs->dims), end(outputs->dims)), 8)
                                                                                 * outputs->precision.size();
-    size_t num_data_bytes_in = num_columns_in * (num_rows_in + num_padding_in) * inputs->precision.size();
+    size_t num_data_bytes_in = num_columns_in * ALIGN(num_rows_in, 8) * inputs->precision.size();
 
     connectInput(layer, ptr_inputs, num_data_bytes_in);
     connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
@@ -757,13 +756,23 @@ void GNAPlugin::ConcatPrimitive(InferenceEngine::CNNLayerPtr layer) {
         THROW_GNA_EXCEPTION << "Different precision for Concat input layers are not supported";
     }
 
+    auto& concatLayerInfo = concat_connection.find(concatLayer->name)->second;
     for (auto &&outLayer : concatLayer->outData.front()->getInputTo()) {
         if ( LayerInfo(outLayer.second).isConcat() ) {
-            auto& concatLayerInfo = concat_connection.find(concatLayer->name)->second;
             connectOutput(layer, &concatLayerInfo.gna_ptr,
                           &concatLayerInfo.gna_ptr, concatLayerInfo.reserved_size);
         }
     }
+
+    size_t idx = 0;
+    for (auto && inputLayer : concatLayerInfo.concatInputLayers) {
+        if ( InferenceEngine::details::CaselessEq<std::string>()
+                                            (inputLayer.name, "input") ) {
+            connectInput(layer, &concatLayerInfo.gna_ptr,
+                                concatLayerInfo.reserved_size-inputLayer.offset, static_cast<int32_t>(-inputLayer.offset), idx);
+        }
+        ++idx;
+    }
 }
 
 void GNAPlugin::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
@@ -780,9 +789,9 @@ void GNAPlugin::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
 
     auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
     size_t cropOffset = cropLayer->offset.back() * cropLayer->precision.size();
-    size_t cropSize = cropLayer->dim.back() * cropLayer->precision.size();
+    size_t cropOutputSize = cropLayer->dim.back() * cropLayer->precision.size();
 
-    if (ALIGN(cropOffset, 8) == cropOffset) {
+    if (ALIGN64(cropOffset) == cropOffset) {
         // leave crop as it is
         GNAPlugin::GNACropLayer cropLayerInfoItem(layer);
         std::string& id = layer->name;
@@ -795,13 +804,13 @@ void GNAPlugin::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
         }
 
         // calculate index idx for connectInput last parameter
-        connectInput(layer, &cropLayerInfo->second.gna_ptr, cropSize + cropOffset, cropOffset, 0);
+        connectInput(layer, &cropLayerInfo->second.gna_ptr, cropOutputSize + cropOffset, cropOffset, 0);
 
         // cases for certain output layers
         for (auto &&outLayer : layer->outData.front()->getInputTo()) {
             auto& nextLayer = outLayer.second;
             if ( LayerInfo(nextLayer).isConcat() ) {
-                connectOutput(layer, &cropLayerInfo->second.gna_ptr, &cropLayerInfo->second.gna_ptr, cropSize);
+                connectOutput(layer, &cropLayerInfo->second.gna_ptr, &cropLayerInfo->second.gna_ptr, cropOutputSize);
             }
         }
     } else {
@@ -842,30 +851,16 @@ void GNAPlugin::CropPrimitive(InferenceEngine::CNNLayerPtr layer) {
                                           begin(outputs->dims), end(outputs->dims)) * 4;
 
         size_t num_data_bytes_in = num_columns_in *
-        (num_rows_in + num_padding) * inputs->precision.size();
+                ALIGN(num_rows_in, 8) * inputs->precision.size();
 
         connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
         connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
 
-        gnamem->readonly().push_initializer(ptr_weights, num_rows_out * (num_rows_in + num_padding)*layer->precision.size(), [=](void * data, size_t size) {
-            int out = 0;
-            for (int input = cropLayer->offset.back(); input < num_rows_out + cropLayer->offset.back(); ++input) {
-                auto mem_ptr = reinterpret_cast<uint8_t *>(data) + input * layer->precision.size() + out * (num_rows_in+num_padding) * layer->precision.size();
-                if (quantized == nullptr) {
-                    auto float_ptr = reinterpret_cast<float *>(mem_ptr);
-                    *float_ptr = 1.0f;
-                } else {
-                    auto int_ptr = reinterpret_cast<uint16_t *>(mem_ptr);
-                    *int_ptr = 1;
-                }
-                ++out;
-            }
-        }, 64);
-        if (quantized == nullptr) {
-            gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
-        } else {
+        FillWeightOfAligningFilter(layer, ptr_weights, cropLayer->offset.back(), (quantized == nullptr) ? false : true);
+
+        (quantized == nullptr) ?
+            gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64):
             gnamem->readonly().push_value<int32_t>(ptr_biases, 0, num_rows_out, 64);
-        }
     }
 }
 
@@ -907,6 +902,7 @@ void GNAPlugin::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
     uint32_t num_rows_in = FROM_IR_DIM(inputs4Bytes, 1);
     uint32_t num_columns_in = FROM_IR_DIM(inputs4Bytes, 2);
     uint32_t num_rows_out = num_rows_in;
+    uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
 
     void *ptr_inputs;
     void *ptr_outputs;
@@ -916,9 +912,9 @@ void GNAPlugin::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
     dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
     auto &currentComponent = dnnComponentsForLayer.back().second;
     dnn.InitAffineComponent(currentComponent,
-                            num_rows_in,
+                            num_rows_in + num_padding,
                             num_columns_in,
-                            num_rows_out,
+                            num_rows_out + num_padding,
                             inputs2Bytes->precision.size(),
                             outputs->precision.size(),
                             // TODO: only fp32 and Int16 tested
@@ -936,11 +932,11 @@ void GNAPlugin::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
     cout << "IR layer : " << std::left << std::setw(20) << layer->name << "diagonal_"<< dnnComponentsForLayer.size() - 1 << "\n";
 #endif
 
-    size_t num_data_bytes_out = InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims))
-        * outputs->precision.size();
+    size_t num_data_bytes_out =
+        InferenceEngine::details::product(begin(outputs->dims), end(outputs->dims)) * outputs->precision.size();
 
-    size_t num_data_bytes_in = InferenceEngine::details::product(begin(inputs2Bytes->dims), end(inputs2Bytes->dims))
-        * inputs2Bytes->precision.size();
+    size_t num_data_bytes_in =
+        num_columns_in * (num_rows_in + num_padding) * inputs2Bytes->precision.size();
 
     connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
     connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 1 - biasesLayerIdx);
@@ -955,6 +951,7 @@ void GNAPlugin::EltwisePrimitive(InferenceEngine::CNNLayerPtr layer) {
                 #define FLOAT_TO_INT16(a) static_cast<int16_t>(((a) < 0)?((a) - 0.5):((a) + 0.5))
 
                 auto quantizedIdentity = FLOAT_TO_INT16(std::min(scaledIdentity, static_cast<float>(INT16_MAX)));
+
                 gnamem->readonly().push_value<int16_t>(ptr_weights, quantizedIdentity, num_rows_out, 64);
             }
             connectInput(layer, ptr_biases, num_data_bytes_in, 0, biasesLayerIdx);
@@ -1028,19 +1025,25 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
     auto transpose = false;
     auto transposedRows = 0;
     auto transposedCols = 0;
-    /**
-     * TODO: enable transpose correction between Conv/affine layers implement dedicated pass
-     * TF topologies have inplace permutes so we dont care
-     * kaldi topologies did this internally
-     */
+
     if (0 && connectionInfo.needTransposeWeights) {
-        gnalog() << "Transposing weights for layer: " << layer->name << "\n";
         // direct order is 0, 1, 2, 3, supported order is only 0,3,2,1 where dim 2 is usually equals to 1
         auto permuteOrder = connectionInfo.permute->GetParamAsInts("order");
         if (permuteOrder != vector<int>({0, 3, 2, 1})) {
             THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute order: was " << layer->GetParamAsString("order") <<
                                ", but only support 0, 3, 2, 1";
         }
+
+        /**
+         * TODO: weights transpose happened after quantisation might result in poor quality for in 8 - move this to passes
+         */
+        if (weightable._weights->precision() == Precision::I8) {
+            THROW_IE_EXCEPTION << "[GNA plugin] Unsupported permute operation for 8 bit weights for layer: " << layer->name;
+        }
+
+        // this affine connected to convolution via pool or activation
+        gnalog() << "Transposing weights for layer: " << layer->name << "\n";
+
         transpose = !isDiag;
         transposedRows = connectionInfo.permute->input()->getDims()[3];
         transposedCols = connectionInfo.permute->input()->getDims()[1];
@@ -1053,7 +1056,6 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
                                         weightable._weights->byteSize(),
                                         64);
         } else {
-            // ToDO: write unit tests for transpose
             gnamem->readonly().push_initializer(ptr_weights, weightable._weights->byteSize(), [=](void * data, size_t size) {
                 for (int k = 0; k < (isDiag ? 1 : num_rows_out); k++) {
                     auto rowOffset = k * transposedRows * transposedCols * weightable.precision.size();
@@ -1063,13 +1065,16 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
                         for (int i = 0; i < transposedRows; i++) {
                             auto offsetWrite = (transposedRows * j + i) * weightable.precision.size();
                             auto offsetRead = (i * transposedCols + j) * weightable.precision.size();
-                            memcpy(u8Data + offsetWrite, cbuffer + offsetRead, weightable.precision.size());
+                            std::memcpy(u8Data + offsetWrite, cbuffer + offsetRead, weightable.precision.size());
                         }
                     }
                 }
             }, 64);
         }
     } else {
+        if (transpose) {
+            THROW_GNA_EXCEPTION << "transpozed weights with non zero padding not yet supported";
+        }
         auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
         auto paddedWeights = isDiag ? elementsIn : elementsIn * num_rows_out;
         auto paddedWeightsSize = paddedWeights * weightable.precision.size();
@@ -1094,6 +1099,123 @@ void GNAPlugin::AffinePrimitive(InferenceEngine::CNNLayerPtr layer, bool isDiag)
     }
 }
 
+void GNAPlugin::FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr layer, void* ptrWeights, size_t offset, bool isQuantized) {
+    auto outputs = *layer->outData.begin();
+    auto inputs = layer->insData.begin()->lock();
+
+    uint32_t num_rows_in = FROM_IR_DIM(inputs, 1);
+    uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
+
+    if (!ptrWeights) {
+        THROW_GNA_EXCEPTION << "Weights memory is not allocated!!!";
+    }
+
+    gnamem->readonly().push_initializer(ptrWeights, num_rows_out * ALIGN(num_rows_in, 8) * layer->precision.size(), [=](void * data, size_t size) {
+        int out = 0;
+        for (int input = offset; input < num_rows_out + offset; ++input) {
+            auto mem_ptr = reinterpret_cast<uint8_t *>(data) + input * layer->precision.size() + out * ALIGN(num_rows_in, 8) * layer->precision.size();
+            if (!isQuantized) {
+                auto float_ptr = reinterpret_cast<float *>(mem_ptr);
+                *float_ptr = 1.0f;
+           } else {
+               auto int_ptr = reinterpret_cast<uint16_t *>(mem_ptr);
+               *int_ptr = 1;
+           }
+            ++out;
+        }
+    }, 64);
+}
+
+void GNAPlugin::AffineFilterPrimitive(InferenceEngine::CNNLayerPtr layer) {
+    auto filterLayer = dynamic_cast<InferenceEngine::WeightableLayer *> (layer.get());
+
+    if (filterLayer == nullptr) {
+        return;
+    }
+
+    std::string& name = filterLayer->name;
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layer);
+
+    // we look for this concat layer pointer in extra concat map
+    auto prevLayer = CNNNetPrevLayer(layer.get(), 0);
+    if (!LayerInfo(prevLayer).isSplit() && !LayerInfo(prevLayer).isSlice()) {
+        THROW_GNA_EXCEPTION << "Case  with Affine Aligning Filter for not Split/Slice layers is not implemented yet!";
+    }
+
+    void *ptr_inputs;
+    void *ptr_outputs;
+    void *ptr_weights;
+    void *ptr_biases;
+
+    auto outputs = *layer->outData.begin();
+    auto inputs = layer->insData.begin()->lock();
+
+    uint32_t num_columns_in = FROM_IR_DIM(inputs, 2);
+    uint32_t num_rows_out = FROM_IR_DIM(outputs, 1);
+    uint32_t num_rows_in = filterLayer->_weights->size() / num_rows_out;
+
+    uint32_t num_padding = ALIGN(num_rows_in, 8) - num_rows_in;
+
+    gnalog() << "Filter " << layer->name << " is being inserted...\n";
+    auto biasPrecision = filterLayer->_biases ? filterLayer->_biases->precision() : outputs->precision;
+    dnnComponentsForLayer.emplace_back(layer->name, intel_dnn_component_t());
+    auto &currentComponent = dnnComponentsForLayer.back().second;
+    dnn.InitAffineComponent(currentComponent,
+                            num_rows_in + num_padding,
+                            num_columns_in,
+                            num_rows_out,
+                            inputs->precision.size(),
+                            outputs->precision.size(),
+                            filterLayer->_weights->precision().size(),
+                            biasPrecision.size(),
+                            quantized == nullptr ? 1 : quantized->_weights_quant.scale,
+                            quantized == nullptr ? 1 : quantized->_dst_quant.scale,
+                            ptr_inputs,
+                            ptr_outputs,
+                            ptr_weights,
+                            ptr_biases,
+                            false);
+
+    size_t num_data_bytes_out =
+                InferenceEngine::details::product(
+                                        begin(outputs->dims), end(outputs->dims)) * 4;
+
+    size_t num_data_bytes_in = num_columns_in *
+                            ALIGN(num_rows_in, 8) * inputs->precision.size();
+
+    connectInput(layer, ptr_inputs, num_data_bytes_in, 0, 0);
+    connectOutput(layer, ptr_outputs, ptr_inputs, num_data_bytes_out);
+
+    if (num_padding == 0) {
+        gnamem->readonly().push_ptr(ptr_weights,
+                                filterLayer->_weights->cbuffer().as<const void *>(),
+                                filterLayer->_weights->byteSize(),
+                                                            64);
+    } else {
+        auto elementsIn = (num_rows_in + num_padding) * num_columns_in;
+        auto paddedWeights = elementsIn * num_rows_out;
+        auto paddedWeightsSize = paddedWeights * filterLayer->precision.size();
+
+        gnamem->readonly().push_initializer(ptr_weights, paddedWeightsSize, [=](void * data, size_t size) {
+            for (int i = 0; i < num_rows_out; i++) {
+                std::memcpy(data,
+                       filterLayer->_weights->cbuffer().as<const uint8_t *>() + num_rows_in * i * filterLayer->precision.size(),
+                       num_rows_in * filterLayer->precision.size());
+                data = reinterpret_cast<uint8_t *>(data) + (num_rows_in + num_padding) * filterLayer->precision.size();
+            }
+        }, 64);
+    }
+
+    if (filterLayer->_biases) {
+        gnamem->readonly().push_ptr(ptr_biases,
+                         filterLayer->_biases->cbuffer().as<const void *>(),
+                         filterLayer->_biases->byteSize(),
+                         64);
+    } else {
+        gnamem->readonly().push_value(ptr_biases, 0.0f, num_rows_out, 64);
+    }
+}
+
 void GNAPlugin::PWLPrimitive(InferenceEngine::CNNLayerPtr layer) {
     auto *generic = dynamic_cast<GenericLayer *>(layer.get());
     std::string type;
@@ -1269,6 +1391,7 @@ void GNAPlugin::CreateLayerPrimitive(CNNLayerPtr layer) {
         {{"Input"}, [](GNAPlugin*, CNNLayerPtr l) {}},  // skip input layers they are not used in GNA lib, only as a memory blobs
         {{"FullyConnected", "InnerProduct"}, CREATE(AffinePrimitive)},
         {{"ScaleShift"}, CREATE(DiagonalPrimitive)},
+        {{"AffineFilter"}, CREATE(AffineFilterPrimitive)},
         {{"Eltwise"},
          CREATE(EltwisePrimitive)},  // same as diagonal while weights are not taken from network, rather than from another output
         {{"Split"}, SKIP},  // skip information about which part of prev layer need to consume handle during layer creation
@@ -1293,109 +1416,10 @@ void GNAPlugin::CreateLayerPrimitive(CNNLayerPtr layer) {
 
 
 GNAPlugin::GNAPlugin(const std::map<std::string, std::string>& configMap) {
-    // holds actual value of a found key
-    std::string value;
-    auto if_set = [&](std::string key, const std::function<void()> & handler) {
-        auto keyInMap = configMap.find(key);
-        if (keyInMap != configMap.end()) {
-            value = keyInMap->second;
-            handler();
-        }
-    };
-
-    if_set(GNA_CONFIG_KEY(SCALE_FACTOR), [&] {
-        input_scale_factor = std::stod(value);
-    });
-
-    if_set(GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), [&] {
-        dumpXNNPath = value;
-    });
-
-    if_set(GNA_CONFIG_KEY(DEVICE_MODE), [&] {
-        static caseless_unordered_map <std::string, uint32_t> supported_values = {
-            {GNAConfigParams::GNA_AUTO, GNA_AUTO},
-            {GNAConfigParams::GNA_HW, GNA_HARDWARE},
-            {GNAConfigParams::GNA_SW, GNA_SOFTWARE},
-            {GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
-        };
-        auto procType = supported_values.find(value);
-        if (procType == supported_values.end()) {
-            THROW_GNA_EXCEPTION << "GNA device mode unsupported: " << value;
-        }
-        gna_proc_type = static_cast<intel_gna_proc_t>(procType->second);
-    });
-
-    if_set(GNA_CONFIG_KEY(COMPACT_MODE), [&] {
-        if (value == PluginConfigParams::YES) {
-            compact_mode = true;
-        } else if (value == PluginConfigParams::NO) {
-            compact_mode = false;
-        } else {
-            THROW_GNA_EXCEPTION << "GNA compact mode should be YES/NO, but not" << value;
-        }
-    });
-
-    if_set(CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), [&] {
-        if (value == PluginConfigParams::YES) {
-            exclusive_async_requests  = true;
-        } else if (value == PluginConfigParams::NO) {
-            exclusive_async_requests  = false;
-        } else {
-            THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
-        }
-    });
-
-    if_set(GNA_CONFIG_KEY(PRECISION), [&] {
-        auto precision = Precision::FromStr(value);
-        if (precision != Precision::I8 && precision != Precision::I16) {
-            THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
-        }
-        gnaPrecision = precision;
-    });
-
-    if_set(GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), [&] {
-        if (value == PluginConfigParams::YES) {
-            uniformPwlDesign = true;
-        } else if (value == PluginConfigParams::NO) {
-            uniformPwlDesign = false;
-        } else {
-            THROW_GNA_EXCEPTION << "GNA pwl uniform algorithm parameter "
-                                                            << "should be equal to YES/NO, but not" << value;
-        }
-    });
-
-    if_set(CONFIG_KEY(PERF_COUNT), [&] {
-        if (value == PluginConfigParams::YES) {
-            performance_counting = true;
-        } else if (value == PluginConfigParams::NO) {
-            performance_counting = false;
-        } else {
-            THROW_GNA_EXCEPTION << "GNA performance counter enabling parameter "
-                                                            << "should be equal to YES/NO, but not" << value;
-        }
-    });
-
-    if_set(GNA_CONFIG_KEY(LIB_N_THREADS), [&] {
-        uint64_t lib_threads = std::stoul(value, NULL, 10);
-        if (lib_threads == 0 || lib_threads > std::numeric_limits<uint8_t>::max()/2-1) {
-            THROW_GNA_EXCEPTION << "Unsupported accelerator lib number of threads: " << value
-                                                            << ", should be greateer than 0 and less than 127";
-        }
-        gna_lib_async_threads_num = lib_threads;
-    });
-
-    if_set(CONFIG_KEY(SINGLE_THREAD), [&] {
-        if (value == PluginConfigParams::YES) {
-            gna_openmp_multithreading  = false;
-        } else if (value == PluginConfigParams::NO) {
-            gna_openmp_multithreading  = true;
-        } else {
-            THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
-        }
-    });
+    SetConfig(configMap);
 }
 
-GNAPluginNS::GNAPlugin::LayerType GNAPlugin::LayerTypeFromStr(const std::string &str) {
+GNAPluginNS::GNAPlugin::LayerType GNAPlugin::LayerTypeFromStr(const std::string &str) const {
     static const caseless_map<std::string, GNAPlugin::LayerType> LayerNameToType = {
         { "Input" , Input },
         { "Convolution" , Convolution },
@@ -1433,13 +1457,14 @@ bool GNAPlugin::AreLayersSupported(ICNNNetwork& network, std::string& errMessage
     auto network_precision = network.getPrecision();
     network.getInputsInfo(inputs);
     auto network_input_precision = inputs.begin()->second->getInputPrecision();
-    auto batch_sise = network.getBatchSize();
+    auto batch_size = network.getBatchSize();
     if (network_precision != Precision::FP32) {
         errMessage = "The plugin does not support networks with " + std::string(network_precision.name()) + " format.\n";
         return false;
     }
     if (network_input_precision != Precision::FP32 &&
-        network_input_precision != Precision::I16) {
+        network_input_precision != Precision::I16 &&
+        network_input_precision != Precision::U8) {
         errMessage = "The plugin does not support input precision with " + std::string(network_input_precision.name()) + " format.\n";
         return false;
     }
@@ -1469,7 +1494,9 @@ bool GNAPlugin::AreLayersSupported(ICNNNetwork& network, std::string& errMessage
                                                     errMessage = "Layer is unsupported by GNA: " + layer->name + ":" + layer->type + "\n";
                                                     check_result =  false;
                                                 }
-                                                if (batch_sise != 1 && LayerInfo::isBatchSizeConstrained(layer->type)) {
+                                                if (batch_size != 1 && LayerInfo::isBatchSizeConstrained(layer->type)) {
+                                                    errMessage = "topology with layer: " + layer->name + ", type: " + layer->type +
+                                                                 ", and batch size(" + to_string(batch_size) + ") != 1 not supported";
                                                     check_result =  false;
                                                 }
                                             }, false);
@@ -1477,6 +1504,10 @@ bool GNAPlugin::AreLayersSupported(ICNNNetwork& network, std::string& errMessage
     return check_result;
 }
 
+float GNAPlugin::get_input_scale_factor() const {
+    return input_scale_factor.empty() ? 1.0 : input_scale_factor.begin()->second;
+}
+
 void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
     //  Check the input network
     std::string error;
@@ -1490,21 +1521,34 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
         substitutePRelu(layers);
         layers = CNNNetSortTopologically(*network.get());
         reorderMaxPool(layers);
-        applyOrientations(layers);
+        //  ToDo sort if bool flag "changed"
+        //  returned from insertion function
+        insertAligningFilterLayer(layers);
+
+#if ENABLE_AUTO_PERMUTE
+        layers = CNNNetSortTopologically(*network.get());
+        reversePermutations(layers);
+#endif
+        layers = CNNNetSortTopologically(*network.get());
         insertIdentityLayer(layers);
+        layers = CNNNetSortTopologically(*network.get());
+        insertCopyLayer(layers);
+        layers = CNNNetSortTopologically(*network.get());
         insertDiagonalLayer(layers);
+        layers = CNNNetSortTopologically(*network.get());
+        substituteScaleShiftBroadCast(layers);
     };
 
     Config supported = Config({
         {TargetDevice::eGNA, Precision::FP32, [&](InferenceEngine::ICNNNetwork &network) -> CNNNetworkPtr {
             if (gnaPrecision == Precision::I16) {
                 ModelQuantizer<QuantI16> q;
-                return q.quantize(network, run_passes, input_scale_factor);
+                return q.quantize(network, run_passes, get_input_scale_factor());
             }
 
             if (gnaPrecision == Precision::I8) {
                 ModelQuantizer<QuantI8> q;
-                return q.quantize(network, run_passes, input_scale_factor);
+                return q.quantize(network, run_passes, get_input_scale_factor());
             }
             THROW_GNA_EXCEPTION << "no mans land for GNA precision";
         }},
@@ -1529,24 +1573,13 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
 
     supported.setDefaultDevice(TargetDevice::eGNA);
     auto newNet = supported.find_configuration(network).convert(network);
-    auto networkPrecision = newNet->getPrecision();
 
-    if (!networkPrecision.is_float()) {
-        gnadevice.reset(new GNADeviceHelper(gna_proc_type,
-                                            gna_lib_async_threads_num,
-                                            gna_openmp_multithreading,
-                                            performance_counting));
-        gnamem.reset(new gna_memory_type(
-                    make_polymorph<GNAAllocator>(*gnadevice.get()), PAGE_SIZE_BYTES));
-    } else {
-        gnamem.reset(new gna_memory_type(make_polymorph<std::allocator<uint8_t>>()));
-    }
+
 
     // creating intel dnn_t structures from network
     auto sortedNet = CNNNetSortTopologically(*newNet);
     std::vector<CNNLayerPtr> sortedNoMem;
-    std::map<std::string,
-                    std::vector<InferenceEngine::CNNLayerPtr>> memoryPairs;
+    std::unordered_map<std::string, std::vector<InferenceEngine::CNNLayerPtr>> memoryPairs;
     // find all memory layers pairs and mark which one used as outputs
     for (auto &layer : sortedNet) {
         auto generic = dynamic_cast<GenericLayer *>(layer.get());
@@ -1572,16 +1605,28 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
     // fill in extra storage with memory layers
     fillMemoryConnections(memoryPairs);
 
+    if (memory_connection.size() != 0) {
+        gna_lib_async_threads_num = 1;
+    }
+
+    auto networkPrecision = newNet->getPrecision();
+
+    if (!networkPrecision.is_float()) {
+        gnadevice.reset(new GNADeviceHelper(gna_proc_type,
+                                            gna_lib_async_threads_num,
+                                            gna_openmp_multithreading,
+                                            performance_counting));
+        gnamem.reset(new gna_memory_type(
+                make_polymorph<GNAAllocator>(*gnadevice.get()), PAGE_SIZE_BYTES));
+    } else {
+        gnamem.reset(new gna_memory_type(make_polymorph<std::allocator<uint8_t>>()));
+    }
+
     // keep inputs information and create input primitives
     newNet->getInputsInfo(inputsDataMap);
     if (inputsDataMap.empty()) {
         THROW_GNA_EXCEPTION << " No inputs for the topology";
     }
-    if (inputsDataMap.size() != 1) {
-        THROW_GNA_EXCEPTION << " cannot infer topologies with more than one inputs";
-    }
-
-    inputDims = inputsDataMap.begin()->second->getDims();
 
     // keep output dims
     newNet->getOutputsInfo(outputsDataMap);
@@ -1593,7 +1638,10 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
     }
     outputDims = outputsDataMap.begin()->second->dims;
 
-    ptr_inputs_global.resize(gna_lib_async_threads_num);
+    for (auto && input : inputsDataMap) {
+        get_ptr_inputs_global(input.first).resize(gna_lib_async_threads_num);
+    }
+
     ptr_outputs_global.resize(gna_lib_async_threads_num);
     // CreatingLayer primitives
     // TODO: solely gna_example convolution hack
@@ -1601,11 +1649,25 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
     for (auto layer = sortedNoMem.begin(); layer != sortedNoMem.end(); ++layer) {
         CreateLayerPrimitive(*layer);
     }
-    gnamem->bind_ptr(&ptr_outputs_global.front(), &dnnComponentsForLayer.back().second.ptr_outputs);
+    DnnComponentsForLayer::iterator output_component = std::find_if(dnnComponentsForLayer.begin(),
+                                                        dnnComponentsForLayer.end(),
+                                                        [&](const std::pair<std::string, intel_dnn_component_t>& v)
+                                                        { return outputsDataMap.begin()->first == v.first; });
+
+    if (output_component == dnnComponentsForLayer.end()) {
+        if (dnnComponentsForLayer.empty()) {
+            THROW_GNA_EXCEPTION << "No outputs found in internal structures";
+        }
+        // likely layer is fused. Take last one
+        output_component = std::prev(dnnComponentsForLayer.end());
+        gnalog() << "Output layer "<< outputsDataMap.begin()->first
+                    << " has not been found in component list. Took  "
+                    << output_component->first << " instead \n" << std::flush;
+    }
+    gnamem->bind_ptr(&ptr_outputs_global.front(), &output_component->second.ptr_outputs);
 
     // make room for active list
-    auto &last_component = dnnComponentsForLayer.back().second;
-    gnamem->reserve_ptr(nullptr, ALIGN64(last_component.num_bytes_per_output * last_component.num_rows_out));
+    gnamem->reserve_ptr(nullptr, ALIGN64(output_component->second.num_bytes_per_output * output_component->second.num_rows_out));
 
     void *pParallelExecutionData  = nullptr;
 
@@ -1630,16 +1692,16 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
     // in fp32 mode last PWL cannot be computed without that
     dnn.InitActiveList(NULL);
 
-    nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(0), -1, InferenceEngine::BlobMap()));
+    nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap()));
 
     if (!networkPrecision.is_float()) {
         // number of layer gets calculated inside that InitGNAStruct function
         dnn.InitGNAStruct(&std::get<0>(nnets.front())->obj);
     }
 
-    // creating same gna RW segment for paralle infer requests
+    // creating same gna RW segment for parallel infer requests
     for (int i = 1; i != gna_lib_async_threads_num; i++) {
-        nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(0), -1, InferenceEngine::BlobMap()));
+        nnets.push_back(std::make_tuple(make_shared<CPPWrapper<intel_nnet_type_t>>(), -1, InferenceEngine::BlobMap()));
 
         // this can be improved by just copy all structures, but we are too lazy
         dnn.InitGNAStruct(&std::get<0>(nnets.back())->obj);
@@ -1656,7 +1718,10 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
             }
         };
 
-        relocate(ptr_inputs_global[i], ptr_inputs_global[0]);
+        for (auto &&input : ptr_inputs_global_storage) {
+            relocate(input[i], input[0]);
+        }
+
         relocate(ptr_outputs_global[i], ptr_outputs_global[0]);
         for (int j = 0; j != std::get<0>(nnets.front())->obj.nLayers; j++) {
             auto & layer = std::get<0>(nnets[i])->obj.pLayers[j];
@@ -1666,11 +1731,60 @@ void GNAPlugin::LoadNetwork(ICNNNetwork &network) {
             relocate(layer.pOutputsIntermediate, layer.pOutputsIntermediate);
         }
     }
-    orientation_in = dnn.component[0].orientation_in;
-    orientation_out = dnn.component[dnn.num_components()-1].orientation_out;
-    num_bytes_per_output = dnn.component[dnn.num_components()-1].num_bytes_per_output;
 
-    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(sortedNoMem.back());
+    // calculating input orientation without memory layers, since their orientation not changed during infer right now
+    std::unordered_map<string, string> skippedLayers;
+    for (auto &layer : sortedNet) {
+        for (int i = 0; CNNNetHasPrevLayer(layer.get(), i); i++) {
+            auto prevLayer = CNNNetPrevLayer(layer.get(), i);
+            if (!skippedLayers.count(prevLayer->name)) {
+                if (CNNNetHasPrevLayer(prevLayer.get())) {
+                    continue;
+                }
+
+                // we are in the one of input layers
+                if (LayerInfo(prevLayer).isMemory()) {
+                    continue;
+                }
+            }
+
+            auto dnnLayer = findDnnLayer(layer);
+            string inputName = prevLayer->name;
+            if (skippedLayers.count(prevLayer->name)) {
+                inputName = skippedLayers[prevLayer->name];
+            }
+
+            // non functional layer - skipped by gna
+            if (nullptr == dnnLayer) {
+                // storing input name for skipped layer
+                skippedLayers[layer->name] = inputName;
+                continue;
+            }
+
+            // input orientation might be already initialized, thus verify that it matches
+            if (!orientation_in.count(inputName)) {
+                orientation_in[inputName] = dnnLayer->orientation_in;
+            } else {
+                if (orientation_in[inputName] != dnnLayer->orientation_in) {
+                    THROW_GNA_EXCEPTION << "orientation for input layer: " << inputName << "cannot be calculated";
+                }
+            }
+        }
+    }
+
+    orientation_out = output_component->second.orientation_out;
+    num_bytes_per_output = output_component->second.num_bytes_per_output;
+
+    // find output layer
+    auto output = std::find_if(sortedNet.begin(),
+                                sortedNet.end(),
+                                [&](const CNNLayerPtr& v)
+                                { return outputsDataMap.begin()->first == v.get()->name; });
+    if (output == sortedNet.end()) {
+        // likely layer is fused. Take last one
+        output = std::prev(sortedNet.end());
+    }
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(*output);
     output_scale_factor = quantized != nullptr ? quantized->_dst_quant.scale : 1.0f;
 
     num_rotate_rows = dnn.num_rotate_rows;
@@ -1692,7 +1806,7 @@ void GNAPlugin::DumpXNNToFile() const {
         }
         auto dump = gnadevice->dumpXnn(&std::get<0>(nnets.front())->obj, ptr_active_indices, num_active_indices);
         dump.header.rw_region_size = gnamem->getRWBytes();
-        dump.header.input_scaling_factor = input_scale_factor;
+        dump.header.input_scaling_factor = get_input_scale_factor();
         dump.header.output_scaling_factor = output_scale_factor;
         std::ofstream dumpStream(dumpXNNPath, std::ios::out | std::ios::binary);
         dumpStream.write(reinterpret_cast<char*>(&dump.header), sizeof(intel_gna_model_header));
@@ -1726,69 +1840,81 @@ void RotateFeatures(uint8_t *ptr_feat,
     }
 }
 
-uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result) {
-    return QueueInference(*input.begin()->second.get(), result);
-
-    /*if (!syncPoints.empty()) {
-        syncPoints.back().second = result;
-    }*/
-}
-
-uint32_t GNAPlugin::QueueInference(const InferenceEngine::Blob &input, InferenceEngine::BlobMap &result) {
-    auto inputLayout = input.layout();
-    if (inputLayout != Layout::NC && inputLayout != Layout::CN && inputLayout != NCHW) {
-        THROW_GNA_EXCEPTION << "Expected input blob to have Layout::NC or Layout::CN, but was: " << input.layout();
-    }
-    if (inputLayout == NCHW) {
-        inputLayout = NC;
-    }
-    auto is2D = input.layout() ==  Layout::NC || input.layout() == Layout ::CN;
-
+uint32_t GNAPlugin::QueueInference(const InferenceEngine::BlobMap &inputs, InferenceEngine::BlobMap &result) {
     auto freeNnet = std::find_if(std::begin(nnets), std::end(nnets), [](decltype(nnets.front()) & item) {
         return std::get<1>(item) == -1;
     });
 
     if (freeNnet == nnets.end()) {
-        THROW_IE_EXCEPTION << as_status << REQUEST_BUSY
-                           << "GNA executable network has max of " << static_cast<uint32_t >(gna_lib_async_threads_num)
-                           << " parallel infer requests, please sync one of already running";
+        if (memory_connection.size() != 0) {
+            Wait(0);
+            freeNnet = nnets.begin();
+        } else {
+            THROW_IE_EXCEPTION << as_status << REQUEST_BUSY
+                               << "GNA executable network has max of "
+                               << static_cast<uint32_t >(gna_lib_async_threads_num)
+                               << " parallel infer requests, please sync one of already running";
+        }
     }
 
+
     auto nnet = std::get<0>(*freeNnet).get();
     auto idx = static_cast<uint32_t>(std::distance(std::begin(nnets), freeNnet));
 
-    if (ptr_inputs_global[idx] == nullptr) {
-        // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
-        THROW_GNA_EXCEPTION << "network not loaded : global input pointer not set";
-    }
+    for (auto &input : inputs) {
+        auto inputLayout = input.second->layout();
+        if (inputLayout != Layout::NC && inputLayout != Layout::CN && inputLayout != NCHW) {
+            THROW_GNA_EXCEPTION << "Expected input blob to have Layout::NC or Layout::CN, but was: "
+                                << input.second->layout();
+        }
+        if (inputLayout == NCHW) {
+            inputLayout = NC;
+        }
+        auto is2D = input.second->layout() == Layout::NC || input.second->layout() == Layout::CN;
 
-    if (orientation_in == kDnnUnknownOrientation) {
-        // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
-        THROW_GNA_EXCEPTION << "network not loaded : input orientation not set";
-    }
+        if (!ptr_inputs_global_id.count(input.first)) {
+            // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
+            THROW_GNA_EXCEPTION << "network not loaded : input pointer for " << input.first << " not set";
+        }
 
-    if (orientation_out == kDnnUnknownOrientation) {
-        // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
-        THROW_GNA_EXCEPTION << "network not loaded : output orientation not set";
-    }
+        if (get_ptr_inputs_global(input.first)[idx] == nullptr) {
+            // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
+            THROW_GNA_EXCEPTION << "network not loaded : input pointer for (" << input.first << " at inferRequest #"
+                                << idx << " not set";
+        }
 
-    ImportFrames(ptr_inputs_global[idx],
-                 input.cbuffer().as<float *>(),
-                 input.precision(),
-                 orientation_in,
-                 input.dims()[input.dims().size() - 1],
-                 is2D ? input.dims()[1] : input.dims()[input.dims().size() - 1],
-                 is2D ? input.dims()[0] :input.dims()[0]*input.dims()[2],
-                 is2D ? input.dims()[0] :input.dims()[0]*input.dims()[2]);
+        if (orientation_in[input.first] == kDnnUnknownOrientation) {
+            // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
+            THROW_GNA_EXCEPTION << "network not loaded : input orientation for " << input.first << " not set";
+        }
+
+        if (orientation_out == kDnnUnknownOrientation) {
+            // should not happen in user code however might happen if there any non executable network based integration of GNAPlugin instance
+            THROW_GNA_EXCEPTION << "network not loaded : output orientation not set";
+        }
 
-    if ((inputLayout == Layout::NC || inputLayout == Layout::NCHW) != (orientation_in == kDnnInterleavedOrientation)) {
-        RotateFeatures(reinterpret_cast<uint8_t*>(ptr_inputs_global[idx]),
-                       gnadevice ? 2 : 4,
-                       // TODO: only works for cnn4a and google command so far
-                       input.dims()[input.dims().size() - 1],
-                       is2D ? input.dims()[0] :input.dims()[0]*input.dims()[2],  // num_feature_vectors looks batch should be there
-                       num_rotate_rows,
-                       num_rotate_columns);
+        auto dims = input.second->dims();
+
+        ImportFrames(get_ptr_inputs_global(input.first)[idx],
+                     input.second->cbuffer().as<float *>(),
+                     input.second->precision(),
+                     orientation_in[input.first],
+                     dims[dims.size() - 1],
+                     is2D ? dims[1] : dims[dims.size() - 1],
+                     is2D ? dims[0] : dims[0] * dims[1] * dims[2],
+                     is2D ? dims[0] : dims[0] * dims[1] * dims[2]);
+        bool isOneChannel = input.second->getTensorDesc().getDims()[1] == 1;
+        if (((inputLayout == Layout::NC || inputLayout == Layout::NCHW)
+            != (orientation_in[input.first] == kDnnInterleavedOrientation))
+            && !isOneChannel) {
+            RotateFeatures(reinterpret_cast<uint8_t *>(get_ptr_inputs_global(input.first)[idx]),
+                           gnadevice ? 2 : 4,
+                           // TODO: only works for cnn4a and google command so far
+                           dims[dims.size() - 1],
+                           is2D ? dims[0] : dims[0] * dims[2],  // num_feature_vectors looks batch should be there
+                           num_rotate_rows,
+                           num_rotate_columns);
+        }
     }
 
     if (!gnadevice) {
@@ -1810,7 +1936,7 @@ void GNAPlugin::Wait(uint32_t idx) {
     }
 
     std::get<1>(nnets[idx]) = -1;
-    auto & output = *std::get<2>(nnets[idx]).begin()->second;
+    auto & result = std::get<2>(nnets[idx]);
 #ifdef PLOT
     dnn.BeginNewWrite();
     if (dnn.num_components() != 0) {
@@ -1819,18 +1945,38 @@ void GNAPlugin::Wait(uint32_t idx) {
     }
     dnn.WriteInputAndOutputTextGNA(&std::get<0>(nnets.front())->obj);
 #endif
+    if (result.size() != 1) {
+        THROW_GNA_EXCEPTION << "Invalid number of outputs for infer request: " << result.size() << ",  only 1 supported";
+    }
+    auto & output = *result.begin()->second;
 
     if (output.layout() == Layout::NC) {
         // TODO: rotate can be incorporated with exporting - used only in unit tests so far
         // TODO: restore:
 //        if (orientation_out != kDnnInterleavedOrientation) {
+//            if (inputs.size() != 1) {
+//                THROW_GNA_EXCEPTION << "Invalid number of inputs for  for deinterleave " << inputs.size()
+//                                    << ", only 1 supported";
+//            }
+//            auto dims = inputs.begin()->second->dims();
 //            RotateFeatures(reinterpret_cast<uint8_t*>(ptr_outputs_global),
 //                           gnadevice ? 2 : 4,
-//                           input.dims()[input.dims().size() - 1],
-//                           input.dims()[0],  // num_feature_vectors looks batch should be there
-//                           input.dims()[0],
-//                           input.dims()[input.dims().size() - 1]);
+//                           dims[dims.size() - 1],
+//                           dims[0],  // num_feature_vectors looks batch should be there
+//                           dims[0],
+//                           dims[dims.size() - 1]);
 //        }
+        // we concider the last layer as output ...
+        size_t output_layer_index = std::max(0, static_cast<int>(std::get<0>(nnets[idx])->obj.nLayers - 1));
+        if (gnadevice && std::get<0>(nnets[idx])->obj.pLayers[output_layer_index].pOutputs != ptr_outputs_global[idx]) {
+            // ...as this is not true, we should look for output layer index
+            for (int j = 0; j != std::get<0>(nnets[idx])->obj.nLayers; j++) {
+                if (std::get<0>(nnets[idx])->obj.pLayers[j].pOutputs == ptr_outputs_global[idx]) {
+                    output_layer_index = j;
+                    break;
+                }
+            }
+        }
 
         ExportScores(output.buffer(),
                      ptr_outputs_global[idx],
@@ -1841,7 +1987,7 @@ void GNAPlugin::Wait(uint32_t idx) {
                      output.dims()[0],
                      output.dims()[0],
                      // TODO: create better getter consider multiple outputs case
-                     gnadevice ? std::get<0>(nnets[idx])->obj.pLayers[std::get<0>(nnets[idx])->obj.nLayers - 1].nBytesPerOutput : sizeof(float),
+                     gnadevice ? std::get<0>(nnets[idx])->obj.pLayers[output_layer_index].nBytesPerOutput : sizeof(float),
                      sizeof(float));
     } else if (output.layout() != Layout::CN) {
         THROW_GNA_EXCEPTION << "Expected output blob to have Layout::NC or Layout::CN. But was " << output.layout();
@@ -1884,13 +2030,6 @@ void GNAPlugin::Wait(uint32_t idx) {
     }
 }
 
-
-void GNAPlugin::Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &output) {
-    BlobMap result;
-    result["output"] = std::shared_ptr<Blob>(&output, [](Blob*){});
-    Wait(QueueInference(input, result));
-}
-
 void GNAPlugin::Reset() {
     for (auto && memLayer : memory_connection) {
         std::memset(memLayer.second.gna_ptr, 0, memLayer.second.reserved_size);
@@ -1900,10 +2039,23 @@ void GNAPlugin::Reset() {
     }
 }
 
-void GNAPlugin::Infer(const BlobMap &inputs, BlobMap &result) {
-    auto &input = *inputs.begin()->second.get();
-    auto &output = *result.begin()->second.get();
-    Infer(input, output);
+void GNAPlugin::Infer(const InferenceEngine::Blob &input, InferenceEngine::Blob &output) {
+    BlobMap bmInput;
+    BlobMap bmOutput;
+    if (inputsDataMap.size() != 1) {
+        THROW_GNA_EXCEPTION << "cannot infer using Infer(Blob&, Blob&)"<< "model accepts " << inputsDataMap.size() << "inputs";
+    }
+    if (outputsDataMap.size() != 1) {
+        THROW_GNA_EXCEPTION << "cannot infer using Infer(Blob&, Blob&)"<< "model accepts " << outputsDataMap.size() << "outputs";
+    }
+
+    bmInput[inputsDataMap.begin()->first] = std::shared_ptr<Blob>(const_cast<Blob*>(&input), [](Blob*){});
+    bmOutput[outputsDataMap.begin()->first] = std::shared_ptr<Blob>(&output, [](Blob*){});
+    Infer(bmInput, bmOutput);
+}
+
+void GNAPlugin::Infer(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result) {
+    Wait(QueueInference(input, result));
 }
 
 Blob::Ptr GNAPlugin::GetOutputBlob(InferenceEngine::Precision precision) {
@@ -1914,10 +2066,11 @@ Blob::Ptr GNAPlugin::GetOutputBlob(InferenceEngine::Precision precision) {
     return outputBlob;
 }
 
-Blob::Ptr GNAPlugin::GetInputBlob(InferenceEngine::Precision precision) {
+Blob::Ptr GNAPlugin::GetInputBlob(std::string name, InferenceEngine::Precision precision) {
     InferenceEngine::Blob::Ptr inputBlob;
     // need to have intermediate blob for interleave conversion
     // TODO: NCHW format support is experimental = c++ MO did insert reshape, while TF mo - not
+    auto inputDims = inputsDataMap[name]->getDims();
     inputBlob = make_blob_with_precision(precision, inputDims.size() == 2 ? NC : NCHW, inputDims);
     inputBlob->allocate();
     return inputBlob;
@@ -1955,7 +2108,8 @@ InferenceEngine::IExecutableNetwork::Ptr GNAPlugin::ImportNetwork(const std::str
     auto serial = GNAModelSerial(&std::get<0>(nnets.back())->obj, mt);
     serial.Import(basePtr, header.gnaMemSize, inputStream);
 
-    ptr_inputs_global.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + header.input.descriptor_offset));
+
+    get_ptr_inputs_global("input").push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + header.input.descriptor_offset));
     ptr_outputs_global.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + header.output.descriptor_offset));
 
     auto getOrientation = [](intel_nnet_layer_t & layer) {
@@ -1963,14 +2117,14 @@ InferenceEngine::IExecutableNetwork::Ptr GNAPlugin::ImportNetwork(const std::str
            kDnnNonInterleavedOrientation : kDnnInterleavedOrientation;
     };
 
-    orientation_in = getOrientation(std::get<0>(nnets.back())->obj.pLayers[0]);
+    orientation_in["input"] = getOrientation(std::get<0>(nnets.back())->obj.pLayers[0]);
     orientation_out = getOrientation(std::get<0>(nnets.back())->obj.pLayers[std::get<0>(nnets.back())->obj.nLayers-1]);
 
     num_bytes_per_output = header.output.element_size;
 
 
     outputDims = SizeVector({header.output.elements_count / header.nGroup, header.nGroup});
-    inputDims = SizeVector({header.input.elements_count / header.nGroup, header.nGroup});
+    auto inputDims = SizeVector({header.input.elements_count / header.nGroup, header.nGroup});
 
     inputsDataMap["input"] = std::make_shared<InputInfo>();
     inputsDataMap["input"]->setInputData(make_shared<Data>("input",
@@ -1983,7 +2137,7 @@ InferenceEngine::IExecutableNetwork::Ptr GNAPlugin::ImportNetwork(const std::str
                                                  Layout::NC);
 
     output_scale_factor = header.output.scaleFactor;
-    input_scale_factor = header.input.scaleFactor;
+    input_scale_factor["input"] = header.input.scaleFactor;
 
     num_rotate_rows = header.nRotateRows;
     num_rotate_columns = header.nRotateColumns;
@@ -2007,20 +2161,25 @@ InferenceEngine::IExecutableNetwork::Ptr GNAPlugin::ImportNetwork(const std::str
 }
 
 void GNAPlugin::Export(const std::string &fileName) {
-    if (ptr_inputs_global.empty() || ptr_outputs_global.empty()) {
+    if (ptr_inputs_global_id.empty() || ptr_outputs_global.empty()) {
         THROW_GNA_EXCEPTION << " network not loaded";
     }
 
+    if (ptr_inputs_global_id.size() != 1) {
+        THROW_GNA_EXCEPTION << " exporting network with multiple inputs not supported";
+    }
+
     std::fstream outStream(fileName, ios_base::out | ios_base::binary);
 
     // TODO: nnet group parameter looks only used in application - so can we move this line into load network.
+    auto inputDims = inputsDataMap.begin()->second->getDims();
     if (inputDims.size() == 2) {
         std::get<0>(nnets.front())->obj.nGroup = inputDims[1];
     }
 
     auto serial = GNAModelSerial(&std::get<0>(nnets.front())->obj,
-                   {input_scale_factor,
-                    ptr_inputs_global[0],
+                   {get_input_scale_factor(),
+                    ptr_inputs_global_storage.front()[0],
                     2,
                     static_cast<uint32_t>(InferenceEngine::details::product(inputsDataMap.begin()->second->getDims()))},
                    {output_scale_factor,
@@ -2043,7 +2202,209 @@ void GNAPlugin::GetPerformanceCounts(std::map<std::string, InferenceEngine::Infe
 }
 
 void GNAPlugin::AddExtension(InferenceEngine::IExtensionPtr extension) {}
-void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {}
+
+void GNAPlugin::SetConfig(const std::map<std::string, std::string> &config) {
+    std::vector<std::string> supportedConfigOptions = {
+        GNA_CONFIG_KEY(SCALE_FACTOR),
+        GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE),
+        GNA_CONFIG_KEY(DEVICE_MODE),
+        GNA_CONFIG_KEY(COMPACT_MODE),
+        CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS),
+        GNA_CONFIG_KEY(PRECISION),
+        GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN),
+        CONFIG_KEY(PERF_COUNT),
+        GNA_CONFIG_KEY(LIB_N_THREADS),
+        CONFIG_KEY(SINGLE_THREAD)
+    };
+
+    for (auto& item : config) {
+        auto keys = std::find_if(supportedConfigOptions.begin(), supportedConfigOptions.end(), [&item](std::string supportedConfigOption) {
+            return item.first.find(supportedConfigOption) != std::string::npos;
+        });
+        if (keys == supportedConfigOptions.end()) {
+            THROW_GNA_EXCEPTION << as_status << NOT_FOUND << "Incorrect GNA Plugin config. Key " << item.first << " not supported";
+        }
+    }
+
+    // holds actual value of a found key
+    std::string key;
+    std::string value;
+    auto if_set = [&](std::string keyInput, const std::function<void()> & handler) {
+        auto keyInMap = config.find(keyInput);
+        if (keyInMap != config.end()) {
+            value = keyInMap->second;
+            handler();
+        }
+    };
+
+    auto if_start = [&](std::string keyInput, const std::function<void()> & handler) {
+        for (auto && c : config) {
+            if (c.first.find(keyInput) == 0) {
+                if (c.first.size() > keyInput.size() + 1) {
+                    key = c.first.substr(keyInput.size() + 1);
+                    value = c.second;
+                    handler();
+                }
+            }
+        }
+    };
+
+    auto fp32eq = [](float p1, float p2) -> bool {
+        return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
+    };
+
+    auto & log = gnalog();
+
+    if_start(GNA_CONFIG_KEY(SCALE_FACTOR), [&, this] {
+        // only identical scale factors supported so far
+        auto ref = input_scale_factor.size() ? input_scale_factor.begin()->second : 1.0;
+        input_scale_factor[key] = std::stod(value);
+        if (ref != 1.0 && !fp32eq(input_scale_factor[key], ref)) {
+            std::string message = "only identical input scale factors supported, but provided: "
+                    + std::to_string(ref) + " and " + std::to_string(input_scale_factor[key]);
+            log << "only identical input scale factors supported, but provided: " << ref <<" and " << input_scale_factor[key];
+            THROW_GNA_EXCEPTION << "only identical input scale factors supported, but provided: " << ref <<" and " << input_scale_factor[key];
+        }
+    });
+
+    if (input_scale_factor.empty()) {
+        if_set(GNA_CONFIG_KEY(SCALE_FACTOR), [&] {
+            input_scale_factor["placeHolder"] = std::stod(value);
+        });
+    }
+
+    if_set(GNA_CONFIG_KEY(FIRMWARE_MODEL_IMAGE), [&] {
+        dumpXNNPath = value;
+    });
+
+    if_set(GNA_CONFIG_KEY(DEVICE_MODE), [&] {
+        static caseless_unordered_map <std::string, uint32_t> supported_values = {
+                {GNAConfigParams::GNA_AUTO, GNA_AUTO},
+                {GNAConfigParams::GNA_HW, GNA_HARDWARE},
+                {GNAConfigParams::GNA_SW, GNA_SOFTWARE},
+                {GNAConfigParams::GNA_SW_EXACT, GNA_SOFTWARE & GNA_HARDWARE}
+        };
+        auto procType = supported_values.find(value);
+        if (procType == supported_values.end()) {
+            log << "GNA device mode unsupported: " << value;
+            THROW_GNA_EXCEPTION << "GNA device mode unsupported: " << value;
+        }
+        gna_proc_type = static_cast<intel_gna_proc_t>(procType->second);
+    });
+
+    if_set(GNA_CONFIG_KEY(COMPACT_MODE), [&] {
+        if (value == PluginConfigParams::YES) {
+            compact_mode = true;
+        } else if (value == PluginConfigParams::NO) {
+            compact_mode = false;
+        } else {
+            log << "GNA compact mode should be YES/NO, but not" << value;
+            THROW_GNA_EXCEPTION << "GNA compact mode should be YES/NO, but not" << value;
+        }
+    });
+
+    if_set(CONFIG_KEY(EXCLUSIVE_ASYNC_REQUESTS), [&] {
+        if (value == PluginConfigParams::YES) {
+            exclusive_async_requests  = true;
+        } else if (value == PluginConfigParams::NO) {
+            exclusive_async_requests  = false;
+        } else {
+            log << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+            THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+        }
+    });
+
+    if_set(GNA_CONFIG_KEY(PRECISION), [&] {
+        auto precision = Precision::FromStr(value);
+        if (precision != Precision::I8 && precision != Precision::I16) {
+            log << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
+            THROW_GNA_EXCEPTION << "Unsupported precision of GNA hardware, should be Int16 or Int8, but was: " << value;
+        }
+        gnaPrecision = precision;
+    });
+
+    if_set(GNA_CONFIG_KEY(PWL_UNIFORM_DESIGN), [&] {
+        if (value == PluginConfigParams::YES) {
+            uniformPwlDesign = true;
+        } else if (value == PluginConfigParams::NO) {
+            uniformPwlDesign = false;
+        } else {
+            log << "GNA pwl uniform algorithm parameter "
+                << "should be equal to YES/NO, but not" << value;
+            THROW_GNA_EXCEPTION << "GNA pwl uniform algorithm parameter "
+                                << "should be equal to YES/NO, but not" << value;
+        }
+    });
+
+    if_set(CONFIG_KEY(PERF_COUNT), [&] {
+        if (value == PluginConfigParams::YES) {
+            performance_counting = true;
+        } else if (value == PluginConfigParams::NO) {
+            performance_counting = false;
+        } else {
+            log << "GNA performance counter enabling parameter "
+                << "should be equal to YES/NO, but not" << value;
+            THROW_GNA_EXCEPTION << "GNA performance counter enabling parameter "
+                                << "should be equal to YES/NO, but not" << value;
+        }
+    });
+
+    if_set(GNA_CONFIG_KEY(LIB_N_THREADS), [&] {
+        uint64_t lib_threads = std::stoul(value, NULL, 10);
+        if (lib_threads == 0 || lib_threads > std::numeric_limits<uint8_t>::max()/2-1) {
+            log << "Unsupported accelerator lib number of threads: " << value << ", should be greateer than 0 and less than 127";
+            THROW_GNA_EXCEPTION << "Unsupported accelerator lib number of threads: " << value
+                                << ", should be greateer than 0 and less than 127";
+        }
+        gna_lib_async_threads_num = lib_threads;
+    });
+
+    if_set(CONFIG_KEY(SINGLE_THREAD), [&] {
+        if (value == PluginConfigParams::YES) {
+            gna_openmp_multithreading  = false;
+        } else if (value == PluginConfigParams::NO) {
+            gna_openmp_multithreading  = true;
+        } else {
+            log << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+            THROW_GNA_EXCEPTION << "EXCLUSIVE_ASYNC_REQUESTS should be YES/NO, but not" << value;
+        }
+    });
+}
+
+/**
+ * @depricated Use the version with config parameter
+ */
+void GNAPlugin::QueryNetwork(const InferenceEngine::ICNNNetwork& network,
+                             InferenceEngine::QueryNetworkResult& res) const {
+    QueryNetwork(network, {}, res);
+}
+
+void GNAPlugin::QueryNetwork(const InferenceEngine::ICNNNetwork& network,
+                             const std::map<std::string, std::string>& config,
+                             InferenceEngine::QueryNetworkResult& res) const {
+    std::unordered_set<CNNLayer *> allLayers;
+    InferenceEngine::InputsDataMap inputs;
+
+    network.getInputsInfo(inputs);
+    std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
+
+    if (inputs.empty()) {
+        THROW_GNA_EXCEPTION << "Network is empty (GNA)\n";
+    }
+
+    auto const & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
+    if (secondLayers.empty()) {
+        THROW_GNA_EXCEPTION << "Network consists of input layer only (GNA)\n";
+    }
+
+    InferenceEngine::details::UnorderedDFS(allLayers,
+                                           secondLayers.begin()->second,
+                                           [&](CNNLayerPtr const layer) {
+                                                if (GNAPluginNS::GNAPlugin::LayerTypeFromStr(layer->type) != NO_TYPE) {
+                                                    res.supportedLayers.insert(layer->name);
+                                                }
+                                            }, false);
+    }
 
 intel_dnn_component_t * GNAPlugin::find_first_unused_input(InferenceEngine::CNNLayerPtr current) {
     if (current->insData.empty()) return nullptr;
@@ -2076,7 +2437,7 @@ void GNAPlugin::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr, voi
                 } else {
                     IE_ASSERT(nextMemoryLayer.reserved_size == ALIGN64(num_data_bytes_out));
                     // same offsets
-                    gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, nextMemoryLayer.reserved_offset);
+                    gnamem->bind_ptr(ptr, &nextMemoryLayer.gna_ptr, 0);
                 }
                 return;
             }
@@ -2119,6 +2480,13 @@ void GNAPlugin::connectOutput(InferenceEngine::CNNLayerPtr layer, void *ptr, voi
                                     });
                         if (included == concat_connection.end()) {
                             gnamem->reserve_ptr(&concatLayerInfoItem.gna_ptr, ALIGN64(concatLayerInfoItem.reserved_size));
+
+                            for (auto && inputLayer : concatLayerInfoItem.concatInputLayers) {
+                                if ( InferenceEngine::details::CaselessEq<std::string>()
+                                                                    (inputLayer.name, "input") ) {
+                                    bytes_alllocated_for_input[inputLayer.name] = ALIGN64(concatLayerInfoItem.reserved_size) - inputLayer.offset;
+                                }
+                            }
                         }
                         concatLayerInfo->second.output_allocation_flag = true;
                     }
@@ -2158,7 +2526,15 @@ intel_dnn_component_t * GNAPlugin::findDnnLayer(CNNLayerPtr __layer) {
     return nullptr;
 }
 
-GNAPlugin::ConnectionDetails GNAPlugin::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, size_t offset, int idx) {
+std::vector<void *>& GNAPlugin::get_ptr_inputs_global(std::string name) {
+    if (!ptr_inputs_global_id.count(name)) {
+        ptr_inputs_global_storage.push_front({});
+        ptr_inputs_global_id[name] = ptr_inputs_global_storage.begin();
+    }
+    return *ptr_inputs_global_id[name];
+}
+
+GNAPlugin::ConnectionDetails GNAPlugin::connectInput(CNNLayerPtr layer, void *ptr, size_t num_data_bytes_in, int32_t offset, int idx) {
     // selecting particular input layers
     auto prevLayer = CNNNetPrevLayer(layer, idx);
 
@@ -2166,15 +2542,24 @@ GNAPlugin::ConnectionDetails GNAPlugin::connectInput(CNNLayerPtr layer, void *pt
 
     // real input not a memory input
     if (LayerInfo(prevLayer).isInput()) {
-        if (0 == bytes_alllocated_for_input) {
-            gnamem->push_value(&ptr_inputs_global.front(), static_cast<uint8_t>(0), num_data_bytes_in, 64);
-            bytes_alllocated_for_input = num_data_bytes_in;
+        if (0 == bytes_alllocated_for_input[prevLayer->name]) {
+            gnamem->push_value(&get_ptr_inputs_global(prevLayer->name).front(), static_cast<uint8_t>(0), num_data_bytes_in, 64);
+            bytes_alllocated_for_input[prevLayer->name] = num_data_bytes_in;
         }
-        if (ALIGN(num_data_bytes_in, 64) > ALIGN(bytes_alllocated_for_input, 64)) {
-            THROW_IE_EXCEPTION << "Layer: " << layer->name << " Cannot bind pointer to already allocated input, due to size_allocated="
-                                  << bytes_alllocated_for_input << ", and size_requested=" << num_data_bytes_in;
+        if (ALIGN(num_data_bytes_in, 64) > ALIGN(bytes_alllocated_for_input[prevLayer->name], 64)) {
+            THROW_GNA_EXCEPTION
+                << "Layer: " << layer->name
+                << " Cannot bind pointer to already allocated input(" << prevLayer->name
+                << "), due to size_allocated=" << bytes_alllocated_for_input[prevLayer->name]
+                << ", and size_requested=" << num_data_bytes_in;
         }
-        gnamem->bind_ptr(ptr, &ptr_inputs_global.front(), offset);
+
+        if (offset >= 0) {
+            gnamem->bind_ptr(ptr, &get_ptr_inputs_global(prevLayer->name).front(), offset);
+        } else {
+            gnamem->bind_ptr(&get_ptr_inputs_global(prevLayer->name).front(), ptr, -offset);
+        }
+
         return prevLayer;
     }
 
@@ -2213,7 +2598,7 @@ GNAPlugin::ConnectionDetails GNAPlugin::connectInput(CNNLayerPtr layer, void *pt
                                                     prevLayer->name);
         if (concatLayerInfo != concat_connection.end()) {
             auto & concatLayerInfoItem = concatLayerInfo->second;
-            // dnnLayer that is input for concat output layer
+            // dnnLayer that is input for concat layer
             gnamem->bind_ptr(ptr, &concatLayerInfoItem.gna_ptr, offset);
             // return layer over concat
             return CNNNetPrevLayer(prevLayer);
diff --git a/inference-engine/src/gna_plugin/gna_plugin.hpp b/inference-engine/src/gna_plugin/gna_plugin.hpp
index 53365d7a6..34bc86672 100644
--- a/inference-engine/src/gna_plugin/gna_plugin.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -9,6 +9,7 @@
 #include "gna_memory.hpp"
 #include "gna_device.hpp"
 #include <map>
+#include <unordered_map>
 #include <list>
 #include <string>
 #include <utility>
@@ -23,6 +24,7 @@
 #include <graph_tools.hpp>
 #include "gna_allocator.hpp"
 #include "gna_api_wrapper.hpp"
+#include "gna_plugin_policy.hpp"
 
 namespace GNAPluginNS {
 
@@ -49,9 +51,16 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
      */
     std::vector<std::tuple<dnn_ptr, int32_t, InferenceEngine::BlobMap>> nnets;
 
-    intel_dnn_orientation_t orientation_in = kDnnUnknownOrientation;
+    std::unordered_map<std::string, intel_dnn_orientation_t> orientation_in;
     intel_dnn_orientation_t orientation_out = kDnnUnknownOrientation;
-    double input_scale_factor = 1.0;
+
+    /**
+     * temporary solution to support multiple scale factors
+     * @return
+     */
+    float get_input_scale_factor() const;
+    std::unordered_map<std::string, double> input_scale_factor;
+
     double output_scale_factor = 1.0;
     uint32_t num_rotate_rows = 0;
     uint32_t num_rotate_columns = 0;
@@ -60,11 +69,13 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
     uint32_t num_feature_maps = 1;
     uint32_t num_memory_bytes;
 
-    std::vector<void *> ptr_inputs_global;
+    std::unordered_map<std::string, std::list<std::vector<void *>>::iterator> ptr_inputs_global_id;
+    std::list<std::vector<void *>> ptr_inputs_global_storage;
+
+    std::vector<void *>& get_ptr_inputs_global(std::string name);
+
     std::vector<void *> ptr_outputs_global;
 
-    int16_t *ptr_int_inputs = NULL;
-    int32_t *ptr_int_outputs = NULL;
     uint32_t *ptr_active_indices = NULL;
     uint32_t num_active_indices = 0;
     uint32_t num_group_in = 0;
@@ -81,7 +92,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
     InferenceEngine::Precision gnaPrecision = InferenceEngine::Precision::I16;
 
     bool performance_counting = false;
-    int  bytes_alllocated_for_input = 0;
+
     intel_dnn_number_type_t output_type = kDnnInt;
     std::string utterance_name;
 
@@ -136,14 +147,13 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
      * @deprecated Use the version with config parameter
      */
     void QueryNetwork(const InferenceEngine::ICNNNetwork &network,
-                      InferenceEngine::QueryNetworkResult &res) const override { }
+                      InferenceEngine::QueryNetworkResult &res) const override;
     void QueryNetwork(const InferenceEngine::ICNNNetwork &network,
                       const std::map<std::string, std::string>& config,
-                      InferenceEngine::QueryNetworkResult &res) const override { }
+                      InferenceEngine::QueryNetworkResult &res) const override;
     uint32_t QueueInference(const InferenceEngine::BlobMap &input, InferenceEngine::BlobMap &result);
     void Wait(uint32_t idx = 0);
 
-    uint32_t QueueInference(const InferenceEngine::Blob &input, InferenceEngine::BlobMap &result);
     /**
      *
      * @param sync - points to gna sync point
@@ -163,7 +173,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
     /**
      * utility to provide input and output blobs externally to be used by InferenceEngine request API clients
      */
-    InferenceEngine::Blob::Ptr GetInputBlob(InferenceEngine::Precision precision);
+    InferenceEngine::Blob::Ptr GetInputBlob(std::string name, InferenceEngine::Precision precision);
     InferenceEngine::Blob::Ptr GetOutputBlob(InferenceEngine::Precision precision);
     /**
      * helpers to provide inputs info on AOT network
@@ -176,7 +186,13 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
      */
      std::vector<InferenceEngine::IMemoryStateInternal::Ptr>  QueryState();
 
+     /**
+      * test-wise API
+      */
+     void SetPolicy(Policy p) {policy = p;}
+
  protected:
+    Policy policy;
     uint32_t num_cnn_rows_out = 0;
     bool done = false;
     std::string dumpXNNPath;
@@ -185,6 +201,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
     void DumpXNNToFile() const;
     void CreateLayerPrimitive(InferenceEngine::CNNLayerPtr);
     void AffinePrimitive(InferenceEngine::CNNLayerPtr, bool isDiag = false);
+    void AffineFilterPrimitive(InferenceEngine::CNNLayerPtr);
     void DiagonalPrimitive(InferenceEngine::CNNLayerPtr);
     void ConvolutionPrimitive(InferenceEngine::CNNLayerPtr);
     void PermutePrimitive(InferenceEngine::CNNLayerPtr);
@@ -198,7 +215,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
     void PWLPrimitive(InferenceEngine::CNNLayerPtr);
     void CopyPrimitive(InferenceEngine::CNNLayerPtr);
     bool AreLayersSupported(InferenceEngine::ICNNNetwork& network, std::string& errMessage);
-    LayerType LayerTypeFromStr(std::string const &str);
+    LayerType LayerTypeFromStr(std::string const &str) const;
     /**
      * maps tpe of connection to input and output layers also stores gna_pointer for alloc request
      */
@@ -272,7 +289,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
 
         InferenceEngine::CNNLayerPtr getSplit() { return splitLayer; }
         /**
-         * gna memory of this size is reserved for concat
+         * gna memory of this size is reserved for split
          */
         size_t reserved_size = 0;
         bool output_allocation_flag = false;
@@ -318,16 +335,16 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
         void *gna_ptr = nullptr;
     };
     using MemoryConnection = std::list<std::pair<std::string, GNAMemoryLayer>>;
-    using ConcatConnection = std::map<std::string, GNAConcatLayer>;
-    using SplitConnection  = std::map<std::string, GNASplitLayer>;
-    using CropConnection  = std::map<std::string, GNACropLayer>;
+    using ConcatConnection = std::unordered_map<std::string, GNAConcatLayer>;
+    using SplitConnection  = std::unordered_map<std::string, GNASplitLayer>;
+    using CropConnection  = std::unordered_map<std::string, GNACropLayer>;
     // layers with extra storage for connections and additional
     // non trivial processing
     MemoryConnection memory_connection;
     ConcatConnection concat_connection;
     SplitConnection  split_connection;
     CropConnection   crop_connection;
-    void fillMemoryConnections(std::map<std::string,
+    void fillMemoryConnections(std::unordered_map<std::string,
                                  std::vector<InferenceEngine::CNNLayerPtr>> &memoryPairs);
 
     void fillConcatConnections(InferenceEngine::CNNLayerPtr layer);
@@ -336,7 +353,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
      * maps layer name to dnn.component, in topological sort prev nodes will be initialized
      */
     using DnnComponentsForLayer = std::list<std::pair<std::string, intel_dnn_component_t>>;
-    std::list<std::pair<std::string, intel_dnn_component_t>> dnnComponentsForLayer;
+    DnnComponentsForLayer dnnComponentsForLayer;
 
     /**
      * @brief returns corresponding dnn layer for topology layer
@@ -356,6 +373,15 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
     std::unique_ptr<gna_memory_type> gnamem;
 
     /**
+     * Fill in the Affine layer weights
+     * @param layer - affine layer pointer
+     * @param ptrWeights - pointer to weights memory
+     * @param offset - memory before offset value will be zeroed
+     * @param isQuantized - information about layer quantization
+     */
+    void FillWeightOfAligningFilter(InferenceEngine::CNNLayerPtr layer, void* ptrWeights, size_t offset, bool isQuantized = false);
+
+    /**
      * Connects either memory output, or generic output to a layer
      * @param layer - layer pointer
      * @param ptr - pointer to pointer where to store  output layer information
@@ -387,7 +413,7 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
     ConnectionDetails connectInput(InferenceEngine::CNNLayerPtr layer,
                       void *pVoid,
                       size_t num_data_bytes_in,
-                      size_t offset = 0,
+                      int32_t offset = 0,
                       int idx = 0);
 
     void ImportFrames(void *ptr_dst,
@@ -438,18 +464,26 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
                     const GNASplitLayer& splitInfo,
                     size_t precision_size);
     /**
-     * @brief GNA affine layers are always have activation atatched, while IR not
-     * @param net - copied net ready for quantisation
+     * @brief GNA affine layers are always have activation atached, while IR not
      */
     void insertIdentityLayer(std::vector<InferenceEngine::CNNLayerPtr> &layers);
 
     /**
-     * @brief GNA convolution layers have deinterleaved oriantations, while affine one doesn't
+     * @brief GNA cannot support broadcast - so we will tile weights and biases for scaleshift layer
+     */
+    void substituteScaleShiftBroadCast(std::vector<InferenceEngine::CNNLayerPtr> &layers);
+
+
+    /**
+     * @brief GNA convolution layers have deinterleaved layout, while affine one doesn't
      * so between convolution and affine layers permute layers need to be inserted,
-     * or removed if they are present in topology
+     * current MO approach is to insert such permutations
+     * since GNA-HW already support conv->affine in permuted for, this pass inverses MO behavior
+     * so its remove permutations of certain form conv->conv, and between conv->affine
+     * and insert permutation between conv->affine if they are missed in IR
      * @param layers
      */
-    void applyOrientations(std::vector<InferenceEngine::CNNLayerPtr> &layers);
+    void reversePermutations(std::vector<InferenceEngine::CNNLayerPtr> &layers);
 
 
     /**
@@ -477,9 +511,13 @@ class GNAPlugin : public InferenceEngine::IInferencePluginInternal, public std::
      */
     void insertCopyLayer(std::vector<InferenceEngine::CNNLayerPtr> & layers);
 
-    intel_dnn_component_t * find_first_unused_input(InferenceEngine::CNNLayerPtr current);
+    /**
+     * aligned filter layer insertion required in cases when split/slice have output connections on not aligned addresses
+     */
+    void insertAligningFilterLayer(std::vector<InferenceEngine::CNNLayerPtr> & layers);
 
-    InferenceEngine::SizeVector inputDims;
+    intel_dnn_component_t * find_first_unused_input(InferenceEngine::CNNLayerPtr current);
+    std::map<std::string, int> bytes_alllocated_for_input;
     InferenceEngine::InputsDataMap inputsDataMap;
 
     InferenceEngine::SizeVector outputDims;
diff --git a/inference-engine/src/gna_plugin/gna_plugin_config.hpp b/inference-engine/src/gna_plugin/gna_plugin_config.hpp
index f82e4434e..15a343667 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_config.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_config.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_plugin_entry_points.cpp b/inference-engine/src/gna_plugin/gna_plugin_entry_points.cpp
index d2312741f..96d47637e 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_entry_points.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_entry_points.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -13,7 +13,7 @@ using namespace GNAPluginNS;
 
 INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin *&plugin, ResponseDesc *resp) noexcept {
     try {
-        plugin = make_ie_compatible_plugin({1, 5, "GNAPlugin", "GNAPlugin"}, make_shared<GNAPluginInternal>());
+        plugin = make_ie_compatible_plugin({1, 6, "GNAPlugin", "GNAPlugin"}, make_shared<GNAPluginInternal>());
         return OK;
     }
     catch (std::exception &ex) {
diff --git a/inference-engine/src/gna_plugin/gna_plugin_internal.hpp b/inference-engine/src/gna_plugin/gna_plugin_internal.hpp
index 3c2dcf02a..f23b938da 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_internal.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -15,15 +15,38 @@ namespace GNAPluginNS {
 
 class GNAPluginInternal  : public InferenceEngine::InferencePluginInternal {
  public:
-    InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(InferenceEngine::ICNNNetwork &network,
-                                                                       const std::map<std::string, std::string> &config) override {
+    InferenceEngine::ExecutableNetworkInternal::Ptr LoadExeNetworkImpl(
+                                                InferenceEngine::ICNNNetwork &network,
+                                                const std::map<std::string, std::string> &config) override {
         return std::make_shared<GNAExecutableNetwork>(network, config);
     }
-    void SetConfig(const std::map<std::string, std::string> &config) override {}
-    InferenceEngine::IExecutableNetwork::Ptr  ImportNetwork(const std::string &modelFileName,
-                                                            const std::map<std::string, std::string> &config) override {
+    void SetConfig(const std::map<std::string, std::string> &config) override {
+        auto plg = std::make_shared<GNAPlugin>();
+        plg->SetConfig(config);
+    }
+    InferenceEngine::IExecutableNetwork::Ptr  ImportNetwork(
+                                                const std::string &modelFileName,
+                                                const std::map<std::string, std::string> &config) override {
         return make_executable_network(std::make_shared<GNAExecutableNetwork>(modelFileName, config));
     }
+
+    /**
+     * @depricated Use the version with config parameter
+     */
+    void QueryNetwork(const InferenceEngine::ICNNNetwork& network,
+                      InferenceEngine::QueryNetworkResult& res) const override {
+        auto plg = std::make_shared<GNAPlugin>();
+        plg->QueryNetwork(network, {}, res);
+    }
+    void QueryNetwork(const InferenceEngine::ICNNNetwork& network,
+                      const std::map<std::string, std::string>& config,
+                      InferenceEngine::QueryNetworkResult& res) const override {
+        auto plg = std::make_shared<GNAPlugin>();
+        try {
+            plg->SetConfig(config);
+        } catch (InferenceEngine::details::InferenceEngineException& e) {}
+        plg->QueryNetwork(network, config, res);
+    }
 };
 
 }  // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/gna_plugin_log.hpp b/inference-engine/src/gna_plugin/gna_plugin_log.hpp
index 08f45ad78..6905f667c 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_log.hpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_log.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/gna_plugin_passes.cpp b/inference-engine/src/gna_plugin/gna_plugin_passes.cpp
index 79d42d240..22cf3c02c 100644
--- a/inference-engine/src/gna_plugin/gna_plugin_passes.cpp
+++ b/inference-engine/src/gna_plugin/gna_plugin_passes.cpp
@@ -1,11 +1,15 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
+#include "gna_plugin_policy.hpp"
 #include <vector>
 #include <string>
 #include <memory>
 #include <utility>
+#include <algorithm>
+#include <list>
+#include <unordered_set>
 
 #include <quantization/quantized_layer_params.hpp>
 #include "gna_plugin.hpp"
@@ -13,11 +17,12 @@
 
 
 using namespace InferenceEngine;
-using namespace std;
+using namespace InferenceEngine::details;
 using namespace GNAPluginNS;
 
 void GNAPlugin::insertDiagonalLayer(std::vector<CNNLayerPtr> & layers) {
     int numOfDiagLayers = 0;
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
     for (auto & l : layers) {
         if (l->insData.empty()) continue;
         auto prevLayer = CNNNetPrevLayer(l);
@@ -51,18 +56,20 @@ void GNAPlugin::insertDiagonalLayer(std::vector<CNNLayerPtr> & layers) {
 #endif
         // actual insertion
         auto diagName = std::string("SyntheticScaleShift_") + std::to_string(numOfDiagLayers++);
-        auto diagLayer = make_shared<ScaleShiftLayer>(LayerParams({diagName, "ScaleShift", Precision::FP32}));
+        auto diagLayer = std::make_shared<ScaleShiftLayer>(LayerParams({diagName, "ScaleShift", Precision::FP32}));
 
         // TODO: diagonal size
         std::vector<float> arrayOf1(l->outData[0]->dims[0], 1.f);
-        diagLayer->_weights = make_shared_blob<float>(l->outData[0]->precision, Layout::C, arrayOf1);;
+        diagLayer->_weights = make_shared_blob<float>(l->outData[0]->precision, Layout::C, arrayOf1);
         auto newDims = l->outData[0]->dims;
         auto dataPtr = std::make_shared<Data>(diagName,
                                               newDims,
                                               l->outData[0]->precision,
                                               l->outData[0]->layout);
 
-        auto diagonalWithQuant = InferenceEngine::injectData<QuantizedLayerParams>(diagLayer);
+        auto diagonalWithQuant = quantized ?
+                            InferenceEngine::injectData<QuantizedLayerParams>(diagLayer) :
+                                                                                    diagLayer;
 
         dataPtr->creatorLayer = diagonalWithQuant;
         diagonalWithQuant->outData.push_back(dataPtr);
@@ -93,7 +100,7 @@ void GNAPlugin::reorderMaxPool(std::vector<InferenceEngine::CNNLayerPtr> & layer
 }
 
 std::vector<CNNLayerPtr> GNAPlugin::getCandidatesForIdentityInsertion(const CNNLayerPtr l) {
-    vector<CNNLayerPtr> prevLayers;
+    std::vector<CNNLayerPtr> prevLayers;
 
     // skipping memory inputs and true inputs layers
     if (l->insData.empty()) return {};
@@ -199,8 +206,8 @@ void GNAPlugin::substitutePRelu(std::vector<InferenceEngine::CNNLayerPtr> &layer
         auto relu1 = outputLayers.begin()->second;
         auto neg1 = (++outputLayers.begin())->second;
         if (second.isRelu()) {
-            swap(first, second);
-            swap(relu1, neg1);
+            std::swap(first, second);
+            std::swap(relu1, neg1);
         }
         if (!first.isRelu()) continue;
         // now we have relu as first layer, lets check second
@@ -254,11 +261,108 @@ void GNAPlugin::substitutePRelu(std::vector<InferenceEngine::CNNLayerPtr> &layer
     }
 }
 
-void GNAPlugin::applyOrientations(std::vector<CNNLayerPtr> & layers) {
+void GNAPlugin::reversePermutations(std::vector<CNNLayerPtr> &layers) {
+    std::function<CNNLayerPtr(CNNLayerPtr, std::function<bool(CNNLayerPtr)>)> prevLayerSkipCertain
+        = [&prevLayerSkipCertain](CNNLayerPtr layer, std::function<bool(CNNLayerPtr)> shouldSkip) -> CNNLayerPtr {
+        if (CNNNetHasPrevLayer(layer.get())) {
+            return nullptr;
+        }
+        auto prev = CNNNetPrevLayer(layer);
+
+        if (!shouldSkip(prev)) return prevLayerSkipCertain(prev, shouldSkip);
+
+        return prev;
+    };
+
+    auto prevLayerSkipReshape = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
+        return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
+            return LayerInfo(l2).isReshape();
+        });
+    };
+
+
+    std::function<CNNLayerPtr(CNNLayerPtr)> nextLayerSkipReshape = [&nextLayerSkipReshape](CNNLayerPtr layer) -> CNNLayerPtr {
+        if (layer->outData.empty()) {
+            return nullptr;
+        }
+        if (layer->outData.front()->inputTo.size() != 1) {
+            return nullptr;
+        }
+        auto next = layer->outData.front()->inputTo.begin()->second;
+
+        if (LayerInfo(next).isReshape()) return nextLayerSkipReshape(next);
+
+        return next;
+    };
+
+    auto prevConv = [&prevLayerSkipCertain](CNNLayerPtr layer) -> CNNLayerPtr {
+        return prevLayerSkipCertain(layer, [] (CNNLayerPtr l2) {
+            return
+                LayerInfo(l2).isReshape() ||
+                LayerInfo(l2).isPooling() ||
+                LayerInfo(l2).isActivation();
+        });
+    };
+
+    std::unordered_set<std::string> affineWithPermutedWeights;
+    std::list<CNNLayerPtr> permutationstoRemove;
+
+    for (auto & l : layers) {
+        if (!LayerInfo(l).isPermute()) {
+            continue;
+        }
+
+        auto layerOrder = l->GetParamAsInts("order");
+
+        if (layerOrder != std::vector<int>({0, 3, 2, 1})) {
+            THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << ", order: was " << l->GetParamAsString("order") <<
+                               ", but support order is 0,3,2,1";
+        }
+
+        // search for it's input convolution
+        auto prev = prevConv(l);
+
+        // pooling no used in speech models without convolution
+        if (!prev) {
+            THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid input to that layer";
+        }
+
+        // we can remove that permutation if it is input to ScaleShift or FC layer
+        auto next = nextLayerSkipReshape(l);
+        if (!next || !LayerInfo(next).isFullyConnected()) {
+            THROW_GNA_EXCEPTION << "Unsupported permute layer: " << l->name << " no valid output of that layer";
+        }
+
+        permutationstoRemove.push_back(l);
+
+        // removing that permutation layer and saving information about affine
+        affineWithPermutedWeights.insert(next->name);
+    }
+
+    for (auto && toRemove : permutationstoRemove) {
+        CNNNetworkRemoveLayer(toRemove);
+    }
+
+    // search for conv->affine sequences
+    for (auto & l : layers) {
+        if (!LayerInfo(l).isFullyConnected() || 0 != affineWithPermutedWeights.count(l->name)) {
+            continue;
+        }
+        // found an affine layer that not involved in permutations removing
+        // searching whether it has direct input from convolution
+        auto prevConvLayer = prevConv(l);
+        if (!prevConvLayer) continue;
+
+        auto directPrev = CNNNetPrevLayer(l);
+
+        // TODO : make new permute
+        CNNNetworkInsertLayer(l, directPrev, CNNLayerPtr(nullptr));
+    }
 }
 
 void GNAPlugin::insertIdentityLayer(std::vector<CNNLayerPtr> &layers) {
     int numOfIdentityLayers = 0;
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
     for (auto & l : layers) {
         for (auto && prev : getCandidatesForIdentityInsertion(l)) {
             // actual insertion
@@ -267,7 +371,7 @@ void GNAPlugin::insertIdentityLayer(std::vector<CNNLayerPtr> &layers) {
             gnalog() << "Inserted "<< activationName << " between: " << prev->name << " and " << l->name << "\n" << std::flush;
 
             CNNLayerPtr activationLayer =
-                make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
+                std::make_shared<GenericLayer>(LayerParams({activationName, "identity", Precision::FP32}));
             auto inputData = l->insData[0].lock();
             auto newDims = inputData->dims;
             std::reverse(begin(newDims), end(newDims));
@@ -276,8 +380,9 @@ void GNAPlugin::insertIdentityLayer(std::vector<CNNLayerPtr> &layers) {
                                                   TensorDesc(inputData->precision,
                                                              newDims,
                                                              inputData->layout));
-
-            auto activationLayerWithQuant = InferenceEngine::injectData<QuantizedLayerParams>(activationLayer);
+            auto activationLayerWithQuant = quantized ?
+                                    InferenceEngine::injectData<QuantizedLayerParams>(activationLayer) :
+                                                                                            activationLayer;
             dataPtr->creatorLayer = activationLayerWithQuant;
             activationLayerWithQuant->outData.push_back(dataPtr);
             // wether 1 identity or all outputs TODO possible grouping here, need to implement special groupped inserter
@@ -299,6 +404,7 @@ void GNAPlugin::insertIdentityLayer(std::vector<CNNLayerPtr> &layers) {
 
 void GNAPlugin::insertCopyLayer(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
     int numCopyLayers = 0;
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
     for (auto & l : layers) {
         if (l->insData.empty()) continue;
         auto prevLayer = CNNNetPrevLayer(l);
@@ -317,7 +423,7 @@ void GNAPlugin::insertCopyLayer(std::vector<InferenceEngine::CNNLayerPtr> & laye
             gnalog() << "Inserted "<< copyName << " between: " << l->name << " and " << prevLayer->name << "\n" << std::flush;
 
             CNNLayerPtr copyLayer =
-            make_shared<GenericLayer>(LayerParams({copyName, "Copy", Precision::FP32}));
+            std::make_shared<GenericLayer>(LayerParams({copyName, "Copy", Precision::FP32}));
 
             auto inputData = l->insData[0].lock();
             auto newDims = inputData->dims;
@@ -329,10 +435,174 @@ void GNAPlugin::insertCopyLayer(std::vector<InferenceEngine::CNNLayerPtr> & laye
                                                              newDims,
                                                              inputData->layout));
 
-            auto copyWithQuant = InferenceEngine::injectData<QuantizedLayerParams>(copyLayer);
+            auto copyWithQuant = quantized ?
+                                    InferenceEngine::injectData<QuantizedLayerParams>(copyLayer) :
+                                                                                            copyLayer;
             dataPtr->creatorLayer = copyWithQuant;
             copyWithQuant->outData.push_back(dataPtr);
             CNNNetworkInsertLayer(prevLayer, l, copyWithQuant);
         }
     }
 }
+
+void GNAPlugin::insertAligningFilterLayer(std::vector<InferenceEngine::CNNLayerPtr> & layers) {
+    // currently split layer only supports 2 bytes in int16 and int8 mode. In fp32 mode this no necessary but usefull for testing
+    const int bytesPerSplitElement = 2;
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
+
+    int numOfFilterLayers = 0;
+    for (auto &l : layers) {
+        auto info = LayerInfo(l);
+        if (!info.isSplit() && !info.isSlice()) {
+            continue;
+        }
+
+        size_t currentOffset = 0;
+        int splitOutIndex = 0;
+        for (auto &&splitOutput  : l->outData) {
+            auto outputSize = product(++begin(splitOutput->getDims()), end(splitOutput->getDims()));
+
+            if (currentOffset != ALIGN64(currentOffset)) {
+                // this split output not beginning from 64 bytes aligned boundary - need to correct by alligning filter layer
+#ifdef PLOT
+                // getting list of layers attached to current split output
+                gnalog() << "Inserted Affine Filter Layer between: " << l->name << " and : \n";
+                for (auto &&followingLayers : splitOutput->getInputTo()) {
+                    gnalog() << "    " << followingLayers.second->name << "\n";
+                }
+                gnalog() << std::flush;
+#endif
+                // insert the filter
+                auto filterName = std::string("AlignFilter_") + std::to_string(numOfFilterLayers++);
+                auto filterLayer =
+                    std::make_shared<WeightableLayer>(LayerParams({filterName, "AffineFilter", Precision::FP32}));
+
+
+                auto inputData = splitOutput;
+                auto newDims = splitOutput->dims;
+
+                size_t aligned64_offset = std::max(0, static_cast<int>(ALIGN64(currentOffset) - 64));
+                size_t newOutputSize = (currentOffset + ALIGN(outputSize, 8) * bytesPerSplitElement - aligned64_offset)
+                    / bytesPerSplitElement;
+
+                // encodes offset to beginning of split layer input
+                filterLayer->params["offset"] = std::to_string(aligned64_offset);
+
+                auto &num_rows_out = splitOutput->dims[0];
+
+                std::vector<float> filterWeights(newOutputSize * num_rows_out, 0.f);
+
+                auto offset = (currentOffset - aligned64_offset) / bytesPerSplitElement;
+
+                for (int i = 0; i != outputSize; i++) {
+                    filterWeights[offset] = 1.0f;
+                    offset += newOutputSize + 1;
+                }
+
+                filterLayer->_weights = make_shared_blob<float>(inputData->precision, Layout::C, filterWeights);
+
+                std::reverse(begin(newDims), end(newDims));
+
+                auto outData = std::make_shared<Data>(filterName,
+                                                      TensorDesc(splitOutput->precision,
+                                                                 newDims,
+                                                                 inputData->layout));
+
+                auto filterWithQuant = quantized ?
+                                       InferenceEngine::injectData<QuantizedLayerParams>(filterLayer) :
+                                       filterLayer;
+                outData->creatorLayer = filterWithQuant;
+                filterWithQuant->outData.push_back(outData);
+                CNNNetworkInsertLayer(l, nullptr, filterWithQuant, splitOutIndex);
+            }
+
+
+            // search data that starts from unaligned location
+            currentOffset += outputSize * bytesPerSplitElement;
+            splitOutIndex++;
+        }
+    }
+}
+
+void GNAPlugin::substituteScaleShiftBroadCast(std::vector<InferenceEngine::CNNLayerPtr> &layers) {
+    auto quantized = InferenceEngine::getInjectedData<QuantizedLayerParams>(layers.front());
+    for (auto & l : layers) {
+        LayerInfo layerInfo(l);
+
+        if (!layerInfo.isScaleShift()) {
+            continue;
+        }
+
+        auto scaleShift = layerInfo.as<ScaleShiftLayer*>();
+
+        auto insData = scaleShift->insData.front().lock();
+        if (!insData) {
+            THROW_GNA_EXCEPTION << "Cannot get inputs data for layer: " << l->name;
+        }
+
+        if (insData->getDims().size() <= 2) {
+            // NC or C cannot do broadcast
+            continue;
+        }
+        auto batchSize = insData->getDims()[0];
+        auto nElements = details::product(insData->getDims()) / batchSize;
+        auto weightsElements = scaleShift->_weights->size();
+        auto weightsBytes = scaleShift->_weights->byteSize();
+
+        if (nElements == weightsElements) {
+            continue;
+        }
+
+        // only 3d scaleshift supported where number of c is arbitrary
+        auto lastD = insData->getDims()[insData->getDims().size() - 1];
+        if (lastD != weightsElements) {
+            THROW_GNA_EXCEPTION << "Unsupported layer: " << l->name
+                                << " should have last dim(" << lastD << ") equal to weights(" << weightsElements << ") length";
+        }
+        if (insData->getDims().size() == 2) {
+            THROW_GNA_EXCEPTION << "For layer: " << l->name
+                                << " weights size(" << weightsElements<< ") invalid: should match input size of(" << lastD << ")";
+        }
+
+        gnalog() << "Substitution ScaleShift broadcast for layer: " << l->name << "\n";
+        // approach 1 - weights tiling
+        if (policy.ScaleShiftPolicy == Policy::WEIGHTS_TILING) {
+            auto tileBlob = [](Blob::Ptr &blob, size_t TileTo){
+                auto weightsElements = blob->size();
+                auto weightsBytes = blob->byteSize();
+                if (weightsElements == 0) {
+                    THROW_IE_EXCEPTION << "Blob size is 0";
+                }
+                if (TileTo % weightsElements) {
+                    return false;
+                }
+
+                auto tiledBlob = make_plain_blob(blob->getTensorDesc().getPrecision(), {TileTo});
+                tiledBlob->allocate();
+
+
+                for (int i=0; i != TileTo / weightsElements; i++) {
+                    ie_memcpy(tiledBlob->buffer().as<uint8_t*>() + i * weightsBytes, weightsBytes, blob->cbuffer(), weightsBytes);
+                }
+                blob = tiledBlob;
+                return true;
+            };
+
+            if (!tileBlob(scaleShift->_weights, nElements)) {
+                THROW_GNA_EXCEPTION << "Cannot tile weights for layer: " << l->name << ", due to weights size not GCD of dims product";
+            }
+            if (scaleShift->_biases) {
+                if (!tileBlob(scaleShift->_biases, nElements)) {
+                    THROW_GNA_EXCEPTION << "Cannot tile biases for layer: " << l->name << ", due to biases size not GCD of dims product";
+                }
+            }
+
+            // currently data type no providing reshape method of tensor desc
+            scaleShift->outData.front()->reshape({batchSize, nElements}, Layout::NC);
+            insData->reshape({batchSize, nElements}, Layout::NC);
+        } else {
+            THROW_GNA_EXCEPTION << "Not implemented substitution of scaleshift broadcast policy of "
+                                << policy.ScaleShiftPolicy <<  "using layers tiling, layer: " << l->name;
+        }
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/src/gna_plugin/gna_plugin_policy.hpp b/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
new file mode 100644
index 000000000..1d499c452
--- /dev/null
+++ b/inference-engine/src/gna_plugin/gna_plugin_policy.hpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+
+namespace GNAPluginNS {
+/**
+ * @brief policy agregates various settings that cannot be tweak using configuration options right now,
+ * and essential to keep test coverage for options both in on and off cases
+ */
+class Policy {
+ public:
+    /**
+    * @brief for scaleshift substitution, weight tiling simplify final graph but have extra weights overhead
+    * if not defined scaleshift broadcast will result in creating multiple diagonal layers instead of weight tiling
+    */
+    enum {
+        WEIGHTS_TILING,
+        /**
+         * GNA has limited amount of batch so even existed topologies cannot be substituted with only batching,
+         * this option combines batch and weights tiling
+         */
+        BATCH_AND_WEIGHTS_TILING,
+        DIAGLAYER_TILING
+    } ScaleShiftPolicy = WEIGHTS_TILING;
+
+    /**
+     * Policy on whether to substitute permute layers or not
+     */
+    enum {
+        DISABLED,
+        AUTO_PERMUTE
+    } PermutePolicy = DISABLED;
+};
+
+}  // namespace GNAPluginNS
diff --git a/inference-engine/src/gna_plugin/lstm.cpp b/inference-engine/src/gna_plugin/lstm.cpp
index 53906e643..e1c0f7ec3 100644
--- a/inference-engine/src/gna_plugin/lstm.cpp
+++ b/inference-engine/src/gna_plugin/lstm.cpp
@@ -1,6 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+// lstm.cpp : GNA LSTM macro layer definition
+//
 
 #include "lstm.hpp"
 
diff --git a/inference-engine/src/gna_plugin/lstm.hpp b/inference-engine/src/gna_plugin/lstm.hpp
index 6ce8f1094..87f96bc49 100644
--- a/inference-engine/src/gna_plugin/lstm.hpp
+++ b/inference-engine/src/gna_plugin/lstm.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/polymorh_allocator.hpp b/inference-engine/src/gna_plugin/polymorh_allocator.hpp
index d50d8a3a7..6742ba3cb 100644
--- a/inference-engine/src/gna_plugin/polymorh_allocator.hpp
+++ b/inference-engine/src/gna_plugin/polymorh_allocator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/pwl.h b/inference-engine/src/gna_plugin/pwl.h
index fd45903fc..061dd5602 100644
--- a/inference-engine/src/gna_plugin/pwl.h
+++ b/inference-engine/src/gna_plugin/pwl.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/pwl_design.cpp b/inference-engine/src/gna_plugin/pwl_design.cpp
index 1f325bac7..2d150dff5 100644
--- a/inference-engine/src/gna_plugin/pwl_design.cpp
+++ b/inference-engine/src/gna_plugin/pwl_design.cpp
@@ -1,6 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+//  pwl_design.cpp : simple activation function designer
+//
 
 #include "pwl.h"
 #include "gna_plugin_log.hpp"
diff --git a/inference-engine/src/gna_plugin/quantization/layer_quantizer.hpp b/inference-engine/src/gna_plugin/quantization/layer_quantizer.hpp
index 6c42d9255..442be423d 100644
--- a/inference-engine/src/gna_plugin/quantization/layer_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/quantization/layer_quantizer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -199,6 +199,11 @@ inline void quantizeWeightsBiases(const QuantDesc & quantDesc,
     uint32_t num_rows = isDiagonal ? 1 : wl->outData[0]->getDims()[1];
     uint32_t num_columns = wl->insData[0].lock().get()->getDims()[1];
 
+    if (wl->type == "AffineFilter") {
+        // for affine filter layer insdata size not equal to actual coded in input layer
+        num_columns = wl->_weights->size() / num_rows;
+    }
+
     if (isDiagonal) {
         std::swap(num_rows, num_columns);
     }
diff --git a/inference-engine/src/gna_plugin/quantization/model_quantizer.hpp b/inference-engine/src/gna_plugin/quantization/model_quantizer.hpp
index 797c87c9c..c0f185255 100644
--- a/inference-engine/src/gna_plugin/quantization/model_quantizer.hpp
+++ b/inference-engine/src/gna_plugin/quantization/model_quantizer.hpp
@@ -1,10 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
-
-#pragma once
 #include <vector>
 #include "gna_plugin_config.hpp"
 #include "layer_transform.hpp"
@@ -49,7 +47,7 @@ class ModelQuantizer {
             gnalog() << layer->name << std::endl;
         }
 
-        // weights scale is a hint, not all weightable layer preserve it in all possible precisions
+        // weights scale is a hint, not all weightable layers preserve it in all possible precisions
         propagateScaleFactor(sortedNewNet, T::mandatory().getWeightsPrecision().size(), scaleFactor);
 
         // sorted order gives possibility for propagate quantisation along depended layers
diff --git a/inference-engine/src/gna_plugin/quantization/precision_ex.hpp b/inference-engine/src/gna_plugin/quantization/precision_ex.hpp
index 798345e98..c3782fb95 100644
--- a/inference-engine/src/gna_plugin/quantization/precision_ex.hpp
+++ b/inference-engine/src/gna_plugin/quantization/precision_ex.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/quantization/quantization.cpp b/inference-engine/src/gna_plugin/quantization/quantization.cpp
index 457bff9af..1609d5dbb 100644
--- a/inference-engine/src/gna_plugin/quantization/quantization.cpp
+++ b/inference-engine/src/gna_plugin/quantization/quantization.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/quantization/quantization.h b/inference-engine/src/gna_plugin/quantization/quantization.h
index bd1ff7b07..8e704fdec 100644
--- a/inference-engine/src/gna_plugin/quantization/quantization.h
+++ b/inference-engine/src/gna_plugin/quantization/quantization.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/quantization/quantized_layer_params.hpp b/inference-engine/src/gna_plugin/quantization/quantized_layer_params.hpp
index 347102bbb..aaa53c99a 100644
--- a/inference-engine/src/gna_plugin/quantization/quantized_layer_params.hpp
+++ b/inference-engine/src/gna_plugin/quantization/quantized_layer_params.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/gna_plugin/quantization/scale_factor_calc.hpp b/inference-engine/src/gna_plugin/quantization/scale_factor_calc.hpp
index a3ba22c1b..1585463c4 100644
--- a/inference-engine/src/gna_plugin/quantization/scale_factor_calc.hpp
+++ b/inference-engine/src/gna_plugin/quantization/scale_factor_calc.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -8,6 +8,7 @@
 #include <utility>
 #include <limits>
 #include <string>
+#include <map>
 #include "gna_layer_info.hpp"
 #include "ie_layers.h"
 #include "gna_plugin_log.hpp"
@@ -53,6 +54,25 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
     const float identity_scale_factor = 2049.0f;
     const float k = 5;
     const float k_identity = 6;
+
+ protected :
+    static bool fp32eq(float p1, float p2) {
+        return (std::abs(p1 - p2) <= 0.00001f * std::min(std::abs(p1), std::abs(p2)));
+    }
+    float getActivationScale(GNAPluginNS::LayerInfo const&  layer, QuantizedLayerParams const* qunatizedParams) {
+            // todo: calculate proper scale factor where we need to expand it a bit to be safe to stay in int16 weights
+            // set the initial value
+            float result = 1.0f;
+            result = (layer.isIdentity()) ? identity_scale_factor : activation_scale_factor;
+            // if activation is one from relu family, we need to apply heuruistic to avoid activation output overflow
+            if (layer.isRelu() &&
+                    static_cast<uint64_t>(result * qunatizedParams->_src_quant.scale)
+                                                                > std::numeric_limits<int32_t>::max()-1) {
+                result = (result * 0.5);
+            }
+            return result;
+    }
+
  public :
     bool operator()(InferenceEngine::CNNLayer *cnnLayer, int weightsSize, float inputScaleFactor, ScaleFactorUpdateResult &result) {
         if ( !cnnLayer ) {
@@ -62,21 +82,43 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
         // TODO: current approach set input scale factor for true input layer(s) equals to provided factor,
         auto quant = getInjectedData<QuantizedLayerParams>(*cnnLayer);
         if (InferenceEngine::details::CaselessEq<std::string>()(cnnLayer->type, "Memory")) {
-            // for memory output layer need to verify it's input scale factor
-            if (CNNNetHasPrevLayer(cnnLayer)) {
+             if (CNNNetHasPrevLayer(cnnLayer)) {
                 auto prevLayer = CNNNetPrevLayer(cnnLayer);
+                auto prevInfo = LayerInfo(prevLayer);
                 auto inputQuant = getInjectedData<QuantizedLayerParams>(prevLayer);
-                if (inputQuant->_dst_quant.scale != activation_scale_factor) {
-                    gnawarn() << "[WARNING] quantization error : input scale factor ( " << inputQuant->_dst_quant.scale <<") "
-                                       << " for " << cnnLayer->name << ", that is child of " << prevLayer->name <<" doesnt match : "
-                                       << activation_scale_factor << std::endl;
-                    inputQuant->_dst_quant.scale = activation_scale_factor;
-                    // restarting from that activation;
-                    result = ScaleFactorUpdateResult(prevLayer.get());
+               // locating corresponding memory layers ith same ID
+                for (auto && input : CNNNetGetAllInputLayers(cnnLayer)) {
+                    LayerInfo ll(input);
+                    if (!ll.isMemory() ||
+                        !InferenceEngine::details::CaselessEq<std::string>()(input->params["id"], cnnLayer->params["id"])) {
+                        continue;
+                    }
+
+                    auto quantSibling = getInjectedData<QuantizedLayerParams>(input);
+
+                    // after restarting from memory input - quant is fine
+                    if (fp32eq(quantSibling->_dst_quant.scale, inputQuant->_dst_quant.scale)) {
+                        quant->_src_quant.scale = quant->_dst_quant.scale = inputQuant->_dst_quant.scale;
+                        return true;
+                    }
+
+                    if (!fp32eq(quantSibling->_dst_quant.scale, 1)) {
+                        // means we already restarted propagation from that memory layer - we cannot do mach here
+                        THROW_GNA_EXCEPTION << "quantization error : input scale factor ( " << inputQuant->_dst_quant.scale <<") "
+                                  << " for " << cnnLayer->name << ", that is child of " << prevLayer->name <<" doesnt match : "
+                                  << activation_scale_factor;
+                    }
+
+                    gnawarn() << "[INFO] quantization : input scale factor (" << inputQuant->_dst_quant.scale <<")"
+                              << " for " << cnnLayer->name << ", that is child of " << prevLayer->name <<" doesnt match : "
+                              << activation_scale_factor << ", restarting from corresponding memory: "<< input->name << std::endl;
+
+                    // try updating memory input layer scale factor and restart from it
+                    quantSibling->_src_quant.scale = quantSibling->_dst_quant.scale = inputQuant->_dst_quant.scale;
+                    result = ScaleFactorUpdateResult(input.get());
                     return true;
                 }
             }
-            quant->_src_quant.scale = quant->_dst_quant.scale = activation_scale_factor;
             return true;
         }
 
@@ -93,13 +135,7 @@ class ScaleFactorPerLayer<InferenceEngine::CNNLayer *> {
         if (layerInfo.isActivation()) {
             // todo: calculate proper scale factor where we need to expand it a bit to be safe to stay in int16 weights
             // set the initial value
-            quant->_dst_quant.scale = layerInfo.isIdentity() ? identity_scale_factor:activation_scale_factor;
-            // if activation is one from relu family, we need to apply heuruistic to avoid activation output overflow
-            if (layerInfo.isRelu() &&
-                    static_cast<uint64_t>(quant->_dst_quant.scale * quant->_src_quant.scale)
-                                                                > std::numeric_limits<int32_t>::max()-1) {
-                quant->_dst_quant.scale = (quant->_dst_quant.scale * 0.5);
-            }
+            quant->_dst_quant.scale = getActivationScale(layerInfo, quant);
         }
         return true;
     }
@@ -170,7 +206,7 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
                             }
 
                             // if we are here it means that we are in the port 1
-                            if (info.isFullyConnected() || info.isConvolutional()) {
+                            if (info.isFullyConnected() || info.isConvolution()) {
                                 auto quantDataForInputLayer = InferenceEngine::getInjectedData<QuantizedLayerParams>(*in);
                                 auto newOutputScale = quantParams->_dst_quant.scale * maxValue;
                                 auto newWeightScale = newOutputScale / quantDataForInputLayer->_src_quant.scale;
@@ -193,6 +229,53 @@ class ScaleFactorPerLayer<InferenceEngine::EltwiseLayer*> {
 };
 
 template<>
+class ScaleFactorPerLayer<InferenceEngine::ConcatLayer*> {
+ public:
+    bool operator()(InferenceEngine::ConcatLayer* concatLayer, int weightsSize, float inputScaleFactor, ScaleFactorUpdateResult &result) {
+        if ( !concatLayer ) {
+            THROW_GNA_EXCEPTION << "Incorrect Concat Layer pointer \n";
+        }
+        auto in0 = InferenceEngine::CNNNetPrevLayer(concatLayer, 0);
+        auto in1 = InferenceEngine::CNNNetPrevLayer(concatLayer, 1);
+        auto infoIn0 = LayerInfo(in0);
+        auto infoIn1 = LayerInfo(in1);
+        auto quantParams0 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in0);
+        auto quantParams1 = InferenceEngine::getInjectedData<QuantizedLayerParams>(in1);
+        GNAPluginNS::QuantizedLayerParams* sourceQuantParams = NULL;
+        auto quantData = InferenceEngine::getInjectedData<QuantizedLayerParams>(*concatLayer);
+
+        if (quantParams0->_dst_quant.scale == quantParams1->_dst_quant.scale) {
+            return true;
+        } else if (infoIn0.isInput() && infoIn1.isInput()) {
+            THROW_GNA_EXCEPTION << "Two Input layers has different scales in concat!!! \n";
+        }
+
+        int i = 0;
+        if (infoIn0.isInput()) {
+            sourceQuantParams = quantParams0;
+        } else if (infoIn1.isInput()) {
+            ++i;
+            sourceQuantParams = quantParams1;
+        }
+
+        if (!sourceQuantParams) {
+            THROW_GNA_EXCEPTION << "Concat quantization for this case need to be implemented!!! \n";
+        }
+        auto destinationQuantParams =
+                InferenceEngine::getInjectedData<QuantizedLayerParams>(InferenceEngine::CNNNetPrevLayer(concatLayer, !i));
+        InferenceEngine::CNNLayerPtr in = InferenceEngine::CNNNetPrevLayer(concatLayer, !i);
+
+        quantData->_dst_quant.scale = sourceQuantParams->_dst_quant.scale;
+        quantData->_src_quant.scale = sourceQuantParams->_dst_quant.scale;
+
+        destinationQuantParams->_dst_quant.scale = sourceQuantParams->_dst_quant.scale;
+        result = ScaleFactorUpdateResult(in.get());
+
+        return true;
+    }
+};
+
+template<>
 class ScaleFactorPerLayer<InferenceEngine::WeightableLayer*> {
  private:
     float const _scale_reduction_50 = 0.50;
diff --git a/inference-engine/src/gna_plugin/util.cpp b/inference-engine/src/gna_plugin/util.cpp
index c10e3175f..e6f577611 100644
--- a/inference-engine/src/gna_plugin/util.cpp
+++ b/inference-engine/src/gna_plugin/util.cpp
@@ -1,6 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+// util.cpp : various utility functions for debugging, file i/o, etc.
+//
 
 #include <cinttypes>
 #ifndef _WIN32
diff --git a/inference-engine/src/gna_plugin/util.h b/inference-engine/src/gna_plugin/util.h
index 0838bd2a6..523d35e2a 100644
--- a/inference-engine/src/gna_plugin/util.h
+++ b/inference-engine/src/gna_plugin/util.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/hetero_plugin/CMakeLists.txt b/inference-engine/src/hetero_plugin/CMakeLists.txt
index 745683404..a073998f3 100644
--- a/inference-engine/src/hetero_plugin/CMakeLists.txt
+++ b/inference-engine/src/hetero_plugin/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -25,3 +25,5 @@ add_definitions(-DIMPLEMENT_INFERENCE_ENGINE_PLUGIN)
 add_library(${TARGET_NAME} SHARED ${SOURCES} ${HEADERS})
 target_link_libraries(${TARGET_NAME} inference_engine ${INTEL_ITT_LIBS})
 set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
+
+add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
diff --git a/inference-engine/src/hetero_plugin/fallback_policy.cpp b/inference-engine/src/hetero_plugin/fallback_policy.cpp
index bc278f19b..9288db71d 100644
--- a/inference-engine/src/hetero_plugin/fallback_policy.cpp
+++ b/inference-engine/src/hetero_plugin/fallback_policy.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright (C) 2018-2019 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include "fallback_policy.h"
diff --git a/inference-engine/src/hetero_plugin/fallback_policy.h b/inference-engine/src/hetero_plugin/fallback_policy.h
index 59f112af4..5547ee827 100644
--- a/inference-engine/src/hetero_plugin/fallback_policy.h
+++ b/inference-engine/src/hetero_plugin/fallback_policy.h
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright (C) 2018-2019 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #pragma once
diff --git a/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp b/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp
index 3fa1e8ed8..5aa360b1a 100644
--- a/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_async_infer_request.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2017-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include "hetero_async_infer_request.h"
diff --git a/inference-engine/src/hetero_plugin/hetero_async_infer_request.h b/inference-engine/src/hetero_plugin/hetero_async_infer_request.h
index 353276507..d09ada9fe 100644
--- a/inference-engine/src/hetero_plugin/hetero_async_infer_request.h
+++ b/inference-engine/src/hetero_plugin/hetero_async_infer_request.h
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2017-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 /**
diff --git a/inference-engine/src/hetero_plugin/hetero_device_loader.cpp b/inference-engine/src/hetero_plugin/hetero_device_loader.cpp
index 79728a905..589388ec3 100644
--- a/inference-engine/src/hetero_plugin/hetero_device_loader.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_device_loader.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright (C) 2018-2019 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include "hetero_device_loader.h"
diff --git a/inference-engine/src/hetero_plugin/hetero_device_loader.h b/inference-engine/src/hetero_plugin/hetero_device_loader.h
index e8fbab4eb..f9b9e4c74 100644
--- a/inference-engine/src/hetero_plugin/hetero_device_loader.h
+++ b/inference-engine/src/hetero_plugin/hetero_device_loader.h
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright (C) 2018-2019 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #pragma once
diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
index 1192abb02..b6f428617 100644
--- a/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_executable_network.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright (C) 2018-2019 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include "hetero_executable_network.h"
@@ -208,10 +220,17 @@ void HeteroExecutableNetwork::load(InferenceEngine::ICNNNetwork &network_,
             _deviceLoaders[affinity]->SetLogCallback(*listener);
     }
 
+    InferenceEngine::ICNNNetworkStats* networkStats = nullptr;
+    if (StatusCode::OK != network.getStats(&networkStats, nullptr)) {
+        networkStats = nullptr;
+    }
+
+
     for (auto &&subgraph : subgraphs) {
         auto affinity = (*subgraph.begin())->affinity;
         tempLayers.assign(subgraph.begin(), subgraph.end());
-        auto tempNetwork = cloneNet(tempLayers);
+        auto tempNetwork = cloneNet(tempLayers, networkStats);
+        tempNetwork->setName(network.getName() + "_" + std::to_string(std::distance(subgraphs.data(), &subgraph)));
         // restoring some outputs from original net if they are not marked as output automatically
         // this might happen if output was set manually for origin network and
         // it doesn't go to next subgraph
diff --git a/inference-engine/src/hetero_plugin/hetero_executable_network.h b/inference-engine/src/hetero_plugin/hetero_executable_network.h
index 24b59b01a..08e4bd76a 100644
--- a/inference-engine/src/hetero_plugin/hetero_executable_network.h
+++ b/inference-engine/src/hetero_plugin/hetero_executable_network.h
@@ -1,7 +1,23 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright (C) 2018-2019 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
+/**
+ * @brief a header file for ExecutableNetwork
+ * @file dlia_executable_network.hpp
+ */
 #pragma once
 
 #include <memory>
diff --git a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp
index fdf865c00..81349f941 100644
--- a/inference-engine/src/hetero_plugin/hetero_infer_request.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_infer_request.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2017-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include "hetero_infer_request.h"
diff --git a/inference-engine/src/hetero_plugin/hetero_infer_request.h b/inference-engine/src/hetero_plugin/hetero_infer_request.h
index 77a6cb264..76330224d 100644
--- a/inference-engine/src/hetero_plugin/hetero_infer_request.h
+++ b/inference-engine/src/hetero_plugin/hetero_infer_request.h
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2017-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 /**
diff --git a/inference-engine/src/hetero_plugin/hetero_plugin.cpp b/inference-engine/src/hetero_plugin/hetero_plugin.cpp
index fff3d16af..987e703c0 100644
--- a/inference-engine/src/hetero_plugin/hetero_plugin.cpp
+++ b/inference-engine/src/hetero_plugin/hetero_plugin.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright (C) 2018-2019 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include "hetero_plugin.h"
@@ -20,7 +32,7 @@ using namespace HeteroPlugin;
 using namespace std;
 
 static Version heteroPluginDescription = {
-        {1, 4},  // plugin API version
+        {1, 6},  // plugin API version
         CI_BUILD_NUMBER,
         "dliaPlugin"  // plugin description message -
 };
@@ -37,6 +49,7 @@ Engine::Engine() {
 
 InferenceEngine::ExecutableNetworkInternal::Ptr Engine::LoadExeNetworkImpl(InferenceEngine::ICNNNetwork &network,
                                                                            const std::map<std::string, std::string> &config) {
+    // TODO(amalyshe) do we need here verification of input precisions?
     std::map<std::string, std::string> tconfig;
     tconfig = config;
 
@@ -83,7 +96,7 @@ INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(
         ResponseDesc *resp) noexcept {
     try {
         plugin = new HeteroPluginBase<Engine>(
-                {{1, 5}, "heteroPlugin", "heteroPlugin"},
+                {{1, 6}, "heteroPlugin", "heteroPlugin"},
                 std::make_shared<Engine>());
         return OK;
     }
diff --git a/inference-engine/src/hetero_plugin/hetero_plugin.h b/inference-engine/src/hetero_plugin/hetero_plugin.h
index 93fa7b3d8..671463da8 100644
--- a/inference-engine/src/hetero_plugin/hetero_plugin.h
+++ b/inference-engine/src/hetero_plugin/hetero_plugin.h
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright (C) 2018-2019 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #pragma once
diff --git a/inference-engine/src/hetero_plugin/hetero_plugin_base.hpp b/inference-engine/src/hetero_plugin/hetero_plugin_base.hpp
index d38275d6b..e2e166b43 100644
--- a/inference-engine/src/hetero_plugin/hetero_plugin_base.hpp
+++ b/inference-engine/src/hetero_plugin/hetero_plugin_base.hpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright (C) 2018-2019 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 /**
diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt
index 41f0e98de..b3dc75f56 100644
--- a/inference-engine/src/inference_engine/CMakeLists.txt
+++ b/inference-engine/src/inference_engine/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -6,10 +6,13 @@ set (TARGET_NAME "inference_engine")
 
 file (GLOB LIBRARY_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/transform/*.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/transform/transformations/*.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/builders/*.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/*.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/*.cpp
         ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/built-in/*.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/const_infer/*.cpp
       )
 
 file (GLOB LIBRARY_HEADERS
@@ -18,6 +21,7 @@ file (GLOB LIBRARY_HEADERS
        ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/*.hpp
        ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/*.hpp
        ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/built-in/*.hpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/shape_infer/const_infer/*.hpp
        ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/base/*.hpp
        ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/impl/*.hpp
        ${CMAKE_CURRENT_SOURCE_DIR}/cpp_interfaces/interface/*.hpp
@@ -33,9 +37,15 @@ if( (NOT DEFINED ENABLE_SSE42) OR ENABLE_SSE42)
            ${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/*.hpp
           )
     include_directories(${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42)
-    set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/blob_transform_sse42.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
-    set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/ie_preprocess_data_sse42.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
-    set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp PROPERTIES COMPILE_FLAGS -msse4.2)
+    if (WIN32)
+        set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/blob_transform_sse42.cpp"
+                "${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/ie_preprocess_data_sse42.cpp"
+                "${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp" PROPERTIES COMPILE_FLAGS /arch:SSE2)
+    else()
+        set_source_files_properties("${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/blob_transform_sse42.cpp"
+                "${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/ie_preprocess_data_sse42.cpp"
+                "${CMAKE_CURRENT_SOURCE_DIR}/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp" PROPERTIES COMPILE_FLAGS -msse4.2)
+    endif()
     add_definitions(-DHAVE_SSE=1)
 endif()
 
@@ -64,7 +74,7 @@ add_library(${TARGET_NAME} SHARED
             ${PUBLIC_HEADERS})
 set_ie_threading_interface_for(${TARGET_NAME})
 
-target_link_libraries(${TARGET_NAME} PRIVATE pugixml fluid ade ${CMAKE_DL_LIBS} ${INTEL_ITT_LIBS})
+target_link_libraries(${TARGET_NAME} PRIVATE fluid ade ${INTEL_ITT_LIBS} PUBLIC pugixml ${CMAKE_DL_LIBS})
 
 # Properties->C/C++->General->Additional Include Directories
 target_include_directories(${TARGET_NAME} PUBLIC ${PUBLIC_HEADERS_DIR}
@@ -105,7 +115,8 @@ target_compile_definitions(${TARGET_NAME}_s PUBLIC -DUSE_STATIC_IE)
 set_target_properties(${TARGET_NAME}_s PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME}_s)
 
 target_link_libraries(${TARGET_NAME}_s PRIVATE fluid
-                                       PRIVATE ade)
+                                       PRIVATE ade
+                                       PRIVATE ${INTEL_ITT_LIBS})
 
 # export targets
 export(TARGETS ${TARGET_NAME} NAMESPACE IE:: FILE "${CMAKE_BINARY_DIR}/targets.cmake")
@@ -118,4 +129,6 @@ configure_file(
 configure_file(
     "${CMAKE_SOURCE_DIR}/cmake/share/InferenceEngineConfig-version.cmake.in"
     "${CMAKE_BINARY_DIR}/InferenceEngineConfig-version.cmake"
-    COPYONLY)
-\ No newline at end of file
+    COPYONLY)
+
+add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
diff --git a/inference-engine/src/inference_engine/ade_util.cpp b/inference-engine/src/inference_engine/ade_util.cpp
index 041c5655e..437d02a79 100644
--- a/inference-engine/src/inference_engine/ade_util.cpp
+++ b/inference-engine/src/inference_engine/ade_util.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ade_util.hpp b/inference-engine/src/inference_engine/ade_util.hpp
index 734835463..f4b26dd16 100644
--- a/inference-engine/src/inference_engine/ade_util.hpp
+++ b/inference-engine/src/inference_engine/ade_util.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/blob_factory.cpp b/inference-engine/src/inference_engine/blob_factory.cpp
index 8be9ab935..dbd9eecaf 100644
--- a/inference-engine/src/inference_engine/blob_factory.cpp
+++ b/inference-engine/src/inference_engine/blob_factory.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -15,6 +15,11 @@ InferenceEngine::Blob::Ptr make_blob_with_precision(const InferenceEngine::Tenso
     return make_blob_with_precision(desc.getPrecision(), desc, ptr);
 }
 
+
+InferenceEngine::Blob::Ptr make_blob_with_precision(const InferenceEngine::TensorDesc& desc, const std::shared_ptr<InferenceEngine::IAllocator>& alloc) {
+    return make_blob_with_precision(desc.getPrecision(), desc, alloc);
+}
+
 InferenceEngine::Layout plain_layout(InferenceEngine::SizeVector dims) {
     int n = dims.size();
     return n == 1 ? InferenceEngine::C    :
diff --git a/inference-engine/src/inference_engine/blob_factory.hpp b/inference-engine/src/inference_engine/blob_factory.hpp
index a4a5d201e..b65f35b3d 100644
--- a/inference-engine/src/inference_engine/blob_factory.hpp
+++ b/inference-engine/src/inference_engine/blob_factory.hpp
@@ -1,10 +1,11 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
 #include <utility>
+#include <memory>
 #include "inference_engine.hpp"
 
 template <InferenceEngine::Precision::ePrecision precision>
@@ -23,6 +24,9 @@ class BlobFactory {
     static InferenceEngine::Blob::Ptr make(const InferenceEngine::TensorDesc& desc, void* ptr) {
         return InferenceEngine::make_shared_blob<BlobType>(desc, reinterpret_cast<BlobType*>(ptr));
     }
+    static InferenceEngine::Blob::Ptr make(const InferenceEngine::TensorDesc& desc, const std::shared_ptr<InferenceEngine::IAllocator>& alloc) {
+        return InferenceEngine::make_shared_blob<BlobType>(desc, alloc);
+    }
 };
 
 template <InferenceEngine::Precision::ePrecision precision, class ... Args> InferenceEngine::Blob::Ptr make_shared_blob2(Args && ... args) {
@@ -35,6 +39,8 @@ template <InferenceEngine::Precision::ePrecision precision, class ... Args> Infe
 
 INFERENCE_ENGINE_API_CPP(InferenceEngine::Blob::Ptr) make_blob_with_precision(const InferenceEngine::TensorDesc& desc);
 INFERENCE_ENGINE_API_CPP(InferenceEngine::Blob::Ptr) make_blob_with_precision(const InferenceEngine::TensorDesc& desc, void* ptr);
+INFERENCE_ENGINE_API_CPP(InferenceEngine::Blob::Ptr) make_blob_with_precision(const InferenceEngine::TensorDesc& desc,
+                                                                              const std::shared_ptr<InferenceEngine::IAllocator>& alloc);
 INFERENCE_ENGINE_API_CPP(InferenceEngine::Blob::Ptr) make_plain_blob(InferenceEngine::Precision prec, const InferenceEngine::SizeVector dims);
 
 INFERENCE_ENGINE_API_CPP(InferenceEngine::Layout) plain_layout(InferenceEngine::SizeVector dims);
@@ -50,6 +56,7 @@ InferenceEngine::Blob::Ptr make_blob_with_precision(InferenceEngine::Precision p
         USE_FACTORY(I8);
         USE_FACTORY(U16);
         USE_FACTORY(I32);
+        USE_FACTORY(BIN);
         default:
             THROW_IE_EXCEPTION << "cannot locate blob for precision: " << precision;
     }
diff --git a/inference-engine/src/inference_engine/blob_transform.cpp b/inference-engine/src/inference_engine/blob_transform.cpp
index bde62a696..f3fc7eaae 100644
--- a/inference-engine/src/inference_engine/blob_transform.cpp
+++ b/inference-engine/src/inference_engine/blob_transform.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/blob_transform.hpp b/inference-engine/src/inference_engine/blob_transform.hpp
index 4d83015fc..0c6bfe2fd 100644
--- a/inference-engine/src/inference_engine/blob_transform.hpp
+++ b/inference-engine/src/inference_engine/blob_transform.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/builders/ie_argmax_layer.cpp b/inference-engine/src/inference_engine/builders/ie_argmax_layer.cpp
index 265913fc1..b666bc952 100644
--- a/inference-engine/src/inference_engine/builders/ie_argmax_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_argmax_layer.cpp
@@ -1,61 +1,86 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_argmax_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ArgMaxLayer::ArgMaxLayer(const std::string& name): LayerFragment("ArgMax", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::ArgMaxLayer::ArgMaxLayer(const std::string& name): LayerDecorator("ArgMax", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
 }
 
-Builder::ArgMaxLayer::ArgMaxLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "ArgMax"))
-        THROW_IE_EXCEPTION << "Cannot create ArgMaxLayer decorator for layer " << getLayer().getType();
+Builder::ArgMaxLayer::ArgMaxLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("ArgMax");
+}
+
+Builder::ArgMaxLayer::ArgMaxLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("ArgMax");
 }
 
 Builder::ArgMaxLayer& Builder::ArgMaxLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::ArgMaxLayer::getPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 
 Builder::ArgMaxLayer& Builder::ArgMaxLayer::setPort(const Port &port) {
-    getLayer().getInputPorts()[0] = port;
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 int Builder::ArgMaxLayer::getAxis() const {
-    return getLayer().getParameters()["axis"].asInt();
+    return getLayer()->getParameters().at("axis");
 }
 Builder::ArgMaxLayer& Builder::ArgMaxLayer::setAxis(int axis) {
-    getLayer().getParameters()["axis"] = axis;
+    getLayer()->getParameters()["axis"] = axis;
     return *this;
 }
 size_t Builder::ArgMaxLayer::getTopK() const {
-    return getLayer().getParameters()["top_k"].asUInt();
+    return getLayer()->getParameters().at("top_k");
 }
 Builder::ArgMaxLayer& Builder::ArgMaxLayer::setTopK(size_t topK) {
-    getLayer().getParameters()["top_k"] = topK;
+    getLayer()->getParameters()["top_k"] = topK;
     return *this;
 }
 size_t Builder::ArgMaxLayer::getOutMaxVal() const {
-    return getLayer().getParameters()["out_max_val"].asUInt();
+    return getLayer()->getParameters().at("out_max_val");
 }
 Builder::ArgMaxLayer& Builder::ArgMaxLayer::setOutMaxVal(size_t outMaxVal) {
-    if (outMaxVal > 1)
-        THROW_IE_EXCEPTION << "OutMaxVal supports only 0 and 1 values.";
-    getLayer().getParameters()["out_max_val"] = outMaxVal;
+    getLayer()->getParameters()["out_max_val"] = outMaxVal;
     return *this;
 }
 
+REG_VALIDATOR_FOR(ArgMax, [] (const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    if (!input_layer->getInputPorts().empty() &&
+        !input_layer->getOutputPorts().empty() &&
+        !input_layer->getInputPorts()[0].shape().empty() &&
+        !input_layer->getOutputPorts()[0].shape().empty() &&
+        input_layer->getInputPorts()[0].shape() != input_layer->getOutputPorts()[0].shape()) {
+        THROW_IE_EXCEPTION << "Input and output ports should be equal";
+    }
+    Builder::ArgMaxLayer layer(input_layer);
+    if (layer.getAxis() > 1) {
+        THROW_IE_EXCEPTION << "axis supports only 0 and 1 values.";
+    }
+    if (layer.getOutMaxVal() > 1) {
+        THROW_IE_EXCEPTION << "OutMaxVal supports only 0 and 1 values.";
+    }
+});
+
+REG_CONVERTER_FOR(ArgMax, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["axis"] = cnnLayer->GetParamAsInt("axis");
+    layer.getParameters()["top_k"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("top_k"));
+    layer.getParameters()["out_max_val"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("out_max_val"));
+});
+
+
diff --git a/inference-engine/src/inference_engine/builders/ie_batch_normalization_layer.cpp b/inference-engine/src/inference_engine/builders/ie_batch_normalization_layer.cpp
index 1c3d27573..329d3f5d9 100644
--- a/inference-engine/src/inference_engine/builders/ie_batch_normalization_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_batch_normalization_layer.cpp
@@ -1,68 +1,63 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_batch_normalization_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::BatchNormalizationLayer::BatchNormalizationLayer(const std::string& name): LayerFragment("BatchNormalization", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::BatchNormalizationLayer::BatchNormalizationLayer(const std::string& name): LayerDecorator("BatchNormalization", name) {
+    getLayer()->getInputPorts().resize(3);
+    getLayer()->getInputPorts()[1].setParameter("type", "weights");
+    getLayer()->getInputPorts()[2].setParameter("type", "biases");
+    getLayer()->getOutputPorts().resize(1);
     setEpsilon(0.00000001f);
 }
 
-Builder::BatchNormalizationLayer::BatchNormalizationLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "BatchNormalization"))
-        THROW_IE_EXCEPTION << "Cannot create BatchNormalizationLayer decorator for layer " << getLayer().getType();
+Builder::BatchNormalizationLayer::BatchNormalizationLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("BatchNormalization");
+}
+
+Builder::BatchNormalizationLayer::BatchNormalizationLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("BatchNormalization");
 }
 
 Builder::BatchNormalizationLayer& Builder::BatchNormalizationLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::BatchNormalizationLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::BatchNormalizationLayer& Builder::BatchNormalizationLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
-    return *this;
-}
-
-Builder::BatchNormalizationLayer& Builder::BatchNormalizationLayer::setWeights(const Blob::CPtr& weights) {
-    getLayer().addConstantData("weights", weights);
-    return *this;
-}
-Builder::BatchNormalizationLayer& Builder::BatchNormalizationLayer::setBiases(const Blob::CPtr& biases) {
-    getLayer().addConstantData("biases", biases);
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 float Builder::BatchNormalizationLayer::getEpsilon() const {
-    return getLayer().getParameters()["epsilon"].asFloat();
+    return getLayer()->getParameters().at("epsilon");
 }
 Builder::BatchNormalizationLayer& Builder::BatchNormalizationLayer::setEpsilon(float eps) {
-    getLayer().getParameters()["epsilon"] = eps;
+    getLayer()->getParameters()["epsilon"] = eps;
     return *this;
 }
 
-void Builder::BatchNormalizationLayer::validate(const Layer& layer)  {
-    auto weightsIt = layer.getConstantData().find("weights");
-    auto biasesIt = layer.getConstantData().find("biases");
-    bool valid = weightsIt != layer.getConstantData().end() &&
-            biasesIt != layer.getConstantData().end() &&
-            weightsIt->second != nullptr &&
-            weightsIt->second->cbuffer() != nullptr &&
-            biasesIt->second != nullptr &&
-            biasesIt->second->cbuffer() != nullptr;
-    if (!valid)
-        THROW_IE_EXCEPTION << "Cannot create BatchNotmalization layer! Weights and biases are required!";
-}
+REG_VALIDATOR_FOR(BatchNormalization, [](const Builder::Layer::CPtr& layer, bool partial)  {
+    Builder::BatchNormalizationLayer batchNormBuilder(layer);
+    if (partial)
+        return;
+    auto weights = layer->getInputPorts()[1].getData()->getData();
+    auto biases = layer->getInputPorts()[2].getData()->getData();
+    if (!weights || weights->cbuffer() == nullptr || !biases || biases->cbuffer() == nullptr)
+        THROW_IE_EXCEPTION << "Cannot create BatchNormalization layer! Weights and biases are required!";
+});
 
-REG_VALIDATOR_FOR(BatchNormalization,  Builder::BatchNormalizationLayer::validate);
-\ No newline at end of file
+REG_CONVERTER_FOR(BatchNormalization, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["epsilon"] = cnnLayer->GetParamAsFloat("epsilon");
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_clamp_layer.cpp b/inference-engine/src/inference_engine/builders/ie_clamp_layer.cpp
index 0bc1fb9ae..587b44281 100644
--- a/inference-engine/src/inference_engine/builders/ie_clamp_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_clamp_layer.cpp
@@ -1,56 +1,77 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_clamp_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ClampLayer::ClampLayer(const std::string& name): LayerFragment("Clamp", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::ClampLayer::ClampLayer(const std::string& name): LayerDecorator("Clamp", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
     setMinValue(0.0f);
     setMaxValue(1.0f);
 }
 
-Builder::ClampLayer::ClampLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Clamp"))
-        THROW_IE_EXCEPTION << "Cannot create ClampLayer decorator for layer " << getLayer().getType();
+Builder::ClampLayer::ClampLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Clamp");
+}
+
+Builder::ClampLayer::ClampLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Clamp");
 }
 
 Builder::ClampLayer& Builder::ClampLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::ClampLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::ClampLayer& Builder::ClampLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 float Builder::ClampLayer::getMaxValue() const {
-    return getLayer().getParameters()["max"].asFloat();
+    return getLayer()->getParameters().at("max");
 }
 
 Builder::ClampLayer& Builder::ClampLayer::setMaxValue(float maxValue) {
-    getLayer().getParameters()["max"] = maxValue;
+    getLayer()->getParameters()["max"] = maxValue;
     return *this;
 }
 
 float Builder::ClampLayer::getMinValue() const {
-    return getLayer().getParameters()["min"].asFloat();
+    return getLayer()->getParameters().at("min");
 }
 
 Builder::ClampLayer& Builder::ClampLayer::setMinValue(float minValue) {
-    getLayer().getParameters()["min"] = minValue;
+    getLayer()->getParameters()["min"] = minValue;
     return *this;
 }
 
+REG_VALIDATOR_FOR(Clamp, [](const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    Builder::ClampLayer layer(input_layer);
+    if (layer.getMinValue() > layer.getMaxValue()) {
+        THROW_IE_EXCEPTION << "MinValue should be less or equal MaxValue";
+    }
+    if (!input_layer->getInputPorts().empty() &&
+        !input_layer->getOutputPorts().empty() &&
+        !input_layer->getInputPorts()[0].shape().empty() &&
+        !input_layer->getOutputPorts()[0].shape().empty() &&
+        input_layer->getInputPorts()[0].shape() != input_layer->getOutputPorts()[0].shape()) {
+        THROW_IE_EXCEPTION << "Input and output ports should be equal";
+    }
+});
+
+REG_CONVERTER_FOR(Clamp, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["max"] = cnnLayer->GetParamAsFloat("max", 0);
+    layer.getParameters()["min"] = cnnLayer->GetParamAsFloat("min", 0);
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_concat_layer.cpp b/inference-engine/src/inference_engine/builders/ie_concat_layer.cpp
index 8ba326f2c..a5e8d3fd1 100644
--- a/inference-engine/src/inference_engine/builders/ie_concat_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_concat_layer.cpp
@@ -1,53 +1,105 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_concat_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ConcatLayer::ConcatLayer(const std::string& name): LayerFragment("Concat", name) {
-    getLayer().getOutputPorts().resize(1);
+Builder::ConcatLayer::ConcatLayer(const std::string& name): LayerDecorator("Concat", name) {
+    getLayer()->getOutputPorts().resize(1);
     setAxis(1);
 }
 
-Builder::ConcatLayer::ConcatLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Concat"))
-        THROW_IE_EXCEPTION << "Cannot create ConcatLayer decorator for layer " << getLayer().getType();
+Builder::ConcatLayer::ConcatLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Concat");
+}
+
+Builder::ConcatLayer::ConcatLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Concat");
 }
 
 Builder::ConcatLayer& Builder::ConcatLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::ConcatLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::ConcatLayer& Builder::ConcatLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 const std::vector<Port>& Builder::ConcatLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 
 Builder::ConcatLayer& Builder::ConcatLayer::setInputPorts(const std::vector<Port>& ports) {
-    getLayer().getInputPorts() = ports;
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 
 size_t Builder::ConcatLayer::getAxis() const {
-    return getLayer().getParameters()["axis"].asUInt();
+    return getLayer()->getParameters().at("axis");
 }
 
 Builder::ConcatLayer& Builder::ConcatLayer::setAxis(size_t axis) {
-    getLayer().getParameters()["axis"] = axis;
+    getLayer()->getParameters()["axis"] = axis;
     return *this;
 }
+
+REG_VALIDATOR_FOR(Concat, [] (const InferenceEngine::Builder::Layer::CPtr &input_layer, bool partial) {
+    if (partial) {
+        return;
+    }
+    Builder::ConcatLayer layer(input_layer);
+    if (layer.getInputPorts().size() < 1) {
+        THROW_IE_EXCEPTION << "Layer " << layer.getName() << " contains incorrect input ports. "
+                           << "It takes at least two Blobs";
+    }
+    for (size_t i = 1; i < layer.getInputPorts().size(); ++i) {
+        if (layer.getInputPorts()[i - 1].shape().size() != layer.getInputPorts()[i].shape().size()) {
+            THROW_IE_EXCEPTION << "Layer " << layer.getName() << " contains incorrect input ports. "
+                               << "It should have equal number of dimensions";
+        }
+    }
+    if (layer.getInputPorts()[0].shape().size() != layer.getOutputPort().shape().size()) {
+        THROW_IE_EXCEPTION << "Layer " << layer.getName() << " contains incorrect input and output ports "
+                           << "It should have equal number of dimensions";
+    }
+    if (layer.getAxis() >= layer.getOutputPort().shape().size()) {
+        THROW_IE_EXCEPTION << "Layer " << layer.getName() << "contains incorrect axis. "
+                           << "It should be >= 0 and < number of port's dimensions.";
+    }
+    for (size_t i = 0; i < layer.getOutputPort().shape().size(); ++i) {
+        if (i == layer.getAxis()) {
+            size_t sumInputDimensions = 0;
+            for (const Port& port : layer.getInputPorts()) {
+                sumInputDimensions += port.shape()[i];
+            }
+            if (sumInputDimensions != layer.getOutputPort().shape()[i]) {
+                THROW_IE_EXCEPTION << "Layer " << layer.getName() << " contains incorrect input and output ports "
+                                   << "Sum of input port's dimensions in the given axis should be equal to output ports dimension in the same axis.";
+            }
+        } else {
+            for (const Port& port : layer.getInputPorts()) {
+                if (port.shape()[i] != layer.getOutputPort().shape()[i]) {
+                    THROW_IE_EXCEPTION << "Layer " << layer.getName() << " contains incorrect input and output ports. "
+                                       << "It should have equal dimensions in axis different from given";
+                }
+            }
+        }
+    }
+});
+
+REG_CONVERTER_FOR(Concat, [] (const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["axis"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("axis", 1));
+});
+
diff --git a/inference-engine/src/inference_engine/builders/ie_const_layer.cpp b/inference-engine/src/inference_engine/builders/ie_const_layer.cpp
index da5d43daf..0b0f2430a 100644
--- a/inference-engine/src/inference_engine/builders/ie_const_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_const_layer.cpp
@@ -1,39 +1,58 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_const_layer.hpp>
-#include <details/caseless.hpp>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ConstLayer::ConstLayer(const std::string& name): LayerFragment("Const", name) {
-    getLayer().getOutputPorts().resize(1);
+Builder::ConstLayer::ConstLayer(const std::string& name): LayerDecorator("Const", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getParameters()["custom"] = Blob::CPtr();
 }
 
-Builder::ConstLayer::ConstLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Const"))
-        THROW_IE_EXCEPTION << "Cannot create ConstLayer decorator for layer " << getLayer().getType();
+Builder::ConstLayer::ConstLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Const");
+}
+
+Builder::ConstLayer::ConstLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Const");
 }
 
 Builder::ConstLayer& Builder::ConstLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::ConstLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::ConstLayer& Builder::ConstLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    const auto & data = getLayer()->getOutputPorts()[0].getData();
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0].setData(data);
     return *this;
 }
 
 Builder::ConstLayer& Builder::ConstLayer::setData(const Blob::CPtr& data) {
-    getLayer().addConstantData("custom", data);
+    getLayer()->getParameters()["custom"] = data;
+    getLayer()->getOutputPorts()[0].getData()->setData(std::const_pointer_cast<Blob>(data));
     return *this;
 }
 
+const Blob::CPtr& Builder::ConstLayer::getData() const {
+    if (getLayer()->getParameters().at("custom").as<Blob::CPtr>().get() !=
+            getLayer()->getOutputPorts()[0].getData()->getData().get())
+        THROW_IE_EXCEPTION << "Constant data output port has incorrect data!";
+    return getLayer()->getParameters().at("custom").as<Blob::CPtr>();
+}
+
+REG_VALIDATOR_FOR(Const, [] (const InferenceEngine::Builder::Layer::CPtr& layer, bool partial)  {
+    Builder::ConstLayer constBuilder(layer);
+    const auto& data = constBuilder.getData();
+    if (!data || data->cbuffer() == nullptr)
+        THROW_IE_EXCEPTION << "Cannot create Const layer! Data is required!";
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_convolution_layer.cpp b/inference-engine/src/inference_engine/builders/ie_convolution_layer.cpp
index a66e1550a..3c81b3f0a 100644
--- a/inference-engine/src/inference_engine/builders/ie_convolution_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_convolution_layer.cpp
@@ -1,153 +1,126 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_convolution_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
+#include <limits>
 
 using namespace InferenceEngine;
 
-Builder::ConvolutionLayer::ConvolutionLayer(const std::string& name): LayerFragment("Convolution", name) {
-    getLayer().getInputPorts().resize(1);
-    getLayer().getOutputPorts().resize(1);
+Builder::ConvolutionLayer::ConvolutionLayer(const std::string& name): LayerDecorator("Convolution", name) {
+    getLayer()->getInputPorts().resize(3);
+    getLayer()->getInputPorts()[1].setParameter("type", "weights");
+    getLayer()->getInputPorts()[2].setParameter("type", "biases");
+    getLayer()->getOutputPorts().resize(1);
+    setGroup(1);
+    setKernel({});
+    setOutDepth(0);
+    setStrides({});
+    setDilation({});
+    setPaddingsEnd({});
+    setPaddingsBegin({});
 }
 
-Builder::ConvolutionLayer::ConvolutionLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Convolution"))
-        THROW_IE_EXCEPTION << "Cannot create ConvolutionLayer decorator for layer " << getLayer().getType();
+Builder::ConvolutionLayer::ConvolutionLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Convolution");
 }
 
-Builder::ConvolutionLayer::operator Builder::Layer() const {
-    Layer genLayer(getLayer());
-
-    std::vector<size_t> l_kernel = getKernel();
-    std::vector<size_t> l_dilation = getDilation();
-    std::vector<size_t> l_paddingBegin = getPaddingsBegin();
-    std::vector<size_t> l_paddingEnd = getPaddingsEnd();
-    std::vector<size_t> l_strides = getStrides();
-
-    if (l_paddingBegin.empty() && !l_kernel.empty())
-        l_paddingBegin.resize(l_kernel.size(), 0);
-    if (l_paddingEnd.empty() && !l_kernel.empty())
-        l_paddingEnd.resize(l_kernel.size(), 0);
-    if (l_dilation.empty() && !l_kernel.empty())
-        l_dilation.resize(l_kernel.size(), 1);
-    if (l_strides.empty() && !l_kernel.empty())
-        l_strides.resize(l_kernel.size(), 1);
-
-    if (!getOutDepth() || l_kernel.empty() || l_kernel.size() != l_paddingBegin.size() || l_kernel.size() != l_paddingEnd.size() ||
-            l_kernel.size() != l_dilation.size() || l_kernel.size() != l_strides.size())
-        THROW_IE_EXCEPTION << genLayer.getType() << " node " << genLayer.getName() << " contains incorrect parameters!";
-
-    genLayer.getParameters()["kernel"] = l_kernel;
-    genLayer.getParameters()["strides"] = l_strides;
-    genLayer.getParameters()["pads_begin"] = l_paddingBegin;
-    genLayer.getParameters()["pads_end"] = l_paddingEnd;
-    genLayer.getParameters()["dilations"] = l_dilation;
-    return genLayer;
+Builder::ConvolutionLayer::ConvolutionLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Convolution");
 }
 
 Builder::ConvolutionLayer &Builder::ConvolutionLayer::setName(const std::string &name) {
-    getLayer().getName() = name;
-    return *this;
-}
-
-Builder::ConvolutionLayer& Builder::ConvolutionLayer::setWeights(const Blob::CPtr& weights) {
-    getLayer().addConstantData("weights", weights);
-    return *this;
-}
-Builder::ConvolutionLayer& Builder::ConvolutionLayer::setBiases(const Blob::CPtr& biases) {
-    getLayer().addConstantData("biases", biases);
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::ConvolutionLayer::getInputPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 
 Builder::ConvolutionLayer& Builder::ConvolutionLayer::setInputPort(const Port& port) {
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 const Port& Builder::ConvolutionLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::ConvolutionLayer& Builder::ConvolutionLayer::setOutputPort(const Port& port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 const std::vector<size_t> Builder::ConvolutionLayer::getKernel() const {
-    return uInts2size_t(getLayer().getParameters()["kernel"].asUInts({}));
+    return getLayer()->getParameters().at("kernel");
 }
 Builder::ConvolutionLayer& Builder::ConvolutionLayer::setKernel(const std::vector<size_t>& kernel) {
-    getLayer().getParameters()["kernel"] = kernel;
+    getLayer()->getParameters()["kernel"] = kernel;
     return *this;
 }
 
 const std::vector<size_t> Builder::ConvolutionLayer::getStrides() const {
-    return uInts2size_t(getLayer().getParameters()["strides"].asUInts({}));
+    return getLayer()->getParameters().at("strides");
 }
 Builder::ConvolutionLayer& Builder::ConvolutionLayer::setStrides(const std::vector<size_t>& strides) {
-    getLayer().getParameters()["strides"] = strides;
+    getLayer()->getParameters()["strides"] = strides;
     return *this;
 }
 
 const std::vector<size_t> Builder::ConvolutionLayer::getDilation() const {
-    return uInts2size_t(getLayer().getParameters()["dilations"].asUInts({}));
+    return getLayer()->getParameters().at("dilations");
 }
 Builder::ConvolutionLayer& Builder::ConvolutionLayer::setDilation(const std::vector<size_t>& dilation) {
-    getLayer().getParameters()["dilations"] = dilation;
+    getLayer()->getParameters()["dilations"] = dilation;
     return *this;
 }
 
 const std::vector<size_t> Builder::ConvolutionLayer::getPaddingsBegin() const {
-    return uInts2size_t(getLayer().getParameters()["pads_begin"].asUInts({}));
+    return getLayer()->getParameters().at("pads_begin");
 }
 Builder::ConvolutionLayer& Builder::ConvolutionLayer::setPaddingsBegin(const std::vector<size_t>& paddings) {
-    getLayer().getParameters()["pads_begin"] = paddings;
+    getLayer()->getParameters()["pads_begin"] = paddings;
     return *this;
 }
 
 const std::vector<size_t> Builder::ConvolutionLayer::getPaddingsEnd() const {
-    return uInts2size_t(getLayer().getParameters()["pads_end"].asUInts({}));
+    return getLayer()->getParameters().at("pads_end");
 }
 Builder::ConvolutionLayer& Builder::ConvolutionLayer::setPaddingsEnd(const std::vector<size_t>& paddings) {
-    getLayer().getParameters()["pads_end"] = paddings;
+    getLayer()->getParameters()["pads_end"] = paddings;
     return *this;
 }
 
 size_t Builder::ConvolutionLayer::getGroup() const {
-    return getLayer().getParameters()["group"].asUInt(1);
+    return getLayer()->getParameters().at("group");
 }
 Builder::ConvolutionLayer& Builder::ConvolutionLayer::setGroup(size_t group) {
-    getLayer().getParameters()["group"] = group;
+    getLayer()->getParameters()["group"] = group;
     return *this;
 }
 
 size_t Builder::ConvolutionLayer::getOutDepth() const {
-    return getLayer().getParameters()["output"].asUInt(0);
+    return getLayer()->getParameters().at("output");
 }
 Builder::ConvolutionLayer& Builder::ConvolutionLayer::setOutDepth(size_t outDepth) {
-    getLayer().getParameters()["output"] = outDepth;
+    getLayer()->getParameters()["output"] = outDepth;
     return *this;
 }
 
-void Builder::ConvolutionLayer::validate(const Layer& layer) {
-    Layer convLayer = layer;
-    Builder::ConvolutionLayer convBuilder(convLayer);
-    std::vector<size_t> l_kernel = convBuilder.getKernel();
-
+REG_VALIDATOR_FOR(Convolution, [] (const InferenceEngine::Builder::Layer::CPtr& layer, bool partial) {
     // WA for old IRs
-    if (l_kernel.empty() && layer.getParameters().find("kernel-x") != layer.getParameters().end() &&
-            layer.getParameters().find("kernel-y") != layer.getParameters().end())
+    if (layer->getParameters().find("kernel") == layer->getParameters().end() &&
+        layer->getParameters().find("kernel-x") != layer->getParameters().end() &&
+        layer->getParameters().find("kernel-y") != layer->getParameters().end())
         return;
 
+    Builder::ConvolutionLayer convBuilder(layer);
+    std::vector<size_t> l_kernel = convBuilder.getKernel();
     std::vector<size_t> l_dilation = convBuilder.getDilation();
     std::vector<size_t> l_paddingBegin = convBuilder.getPaddingsBegin();
     std::vector<size_t> l_paddingEnd = convBuilder.getPaddingsEnd();
@@ -162,9 +135,121 @@ void Builder::ConvolutionLayer::validate(const Layer& layer) {
     if (l_strides.empty() && !l_kernel.empty())
         l_strides.resize(l_kernel.size(), 1);
 
-    if (!convBuilder.getOutDepth() || l_kernel.empty() || l_kernel.size() != l_paddingBegin.size() || l_kernel.size() != l_paddingEnd.size() ||
-            l_kernel.size() != l_dilation.size() || l_kernel.size() != l_strides.size())
-        THROW_IE_EXCEPTION << layer.getType() << " node " << layer.getName() << " contains incorrect parameters!";
-}
+    if (l_kernel.empty()) {
+        THROW_IE_EXCEPTION << "Kernel is empty!";
+    }
+
+    if (l_paddingBegin.size() != l_paddingEnd.size()) {
+        THROW_IE_EXCEPTION << "Padding_begin dimension is not equal to padding_end dimension";
+    }
+
+    if (!l_paddingBegin.empty() && l_kernel.size() != l_paddingBegin.size()) {
+        THROW_IE_EXCEPTION << "Padding dimension is not equal to kernel dimension";
+    }
+
+    if (l_kernel.size() != l_strides.size()) {
+        THROW_IE_EXCEPTION << "Stride dimension is not equal to kernel dimension";
+    }
+
+    if (!l_dilation.empty() && l_kernel.size() != l_dilation.size()) {
+        THROW_IE_EXCEPTION << "Dilation dimension is not equal to kernel dimension";
+    }
+
+    if (convBuilder.getOutDepth() == 0) {
+        THROW_IE_EXCEPTION << "OutDepth parameter should be more than 0";
+    }
+
+    for (size_t kernel_dim : l_kernel) {
+        if (kernel_dim == 0) {
+            THROW_IE_EXCEPTION << "Kernel dimensions should be more than 0";
+        }
+    }
+
+    for (size_t i_stride : l_strides) {
+        if (i_stride == 0) {
+            THROW_IE_EXCEPTION << "Strides should be more than 0";
+        }
+    }
+
+    for (size_t dil : l_dilation) {
+        if (dil == 0)
+            THROW_IE_EXCEPTION << "Dilation should be more than 0";
+    }
+
+    if (!convBuilder.getGroup())
+        THROW_IE_EXCEPTION << "Group should be more than 0";
+
+    if (convBuilder.getInputPort().shape().empty())
+        return;
+
+    const size_t IC = convBuilder.getInputPort().shape()[1];
+    if (IC % convBuilder.getGroup())
+        THROW_IE_EXCEPTION << "Number of input channels (" << IC <<
+                           ") is not divided by group number (" << convBuilder.getGroup() << ")";
+
+    size_t weight_size = convBuilder.getOutDepth() * IC / convBuilder.getGroup();
+    for (size_t kernel_dim : l_kernel) {
+        if (static_cast<double>(weight_size) * kernel_dim > std::numeric_limits<size_t>::max()) {
+            THROW_IE_EXCEPTION << "Weight size exceeds the size_t max";
+        }
+        weight_size *= kernel_dim;
+    }
+
+    if (partial)
+        return;
+
+    const auto weights = layer->getInputPorts()[1].getData()->getData();
+    if (weights->size() != weight_size) {
+        THROW_IE_EXCEPTION << "Weight size is not correct!";
+    }
+
+    const auto biases = layer->getInputPorts()[2].getData()->getData();
+    if (biases && biases->cbuffer() && biases->size() != convBuilder.getOutDepth())
+        THROW_IE_EXCEPTION << "Biases size is incorrect!";
+});
+
+REG_CONVERTER_FOR(Convolution, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    // WA for old IRs
+    if (cnnLayer->params.find("kernel") == cnnLayer->params.end() &&
+        cnnLayer->params.find("kernel-x") != cnnLayer->params.end() &&
+        cnnLayer->params.find("kernel-y") != cnnLayer->params.end())
+        return;
 
-REG_VALIDATOR_FOR(Convolution, Builder::ConvolutionLayer::validate);
+    std::vector<unsigned int> tmp = cnnLayer->GetParamAsUInts("kernel");
+    std::vector<size_t> cur(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        cur[i] = static_cast<size_t>(tmp[i]);
+    }
+    layer.getParameters()["kernel"] = cur;
+
+    tmp = cnnLayer->GetParamAsUInts("strides");
+    cur.resize(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        cur[i] = static_cast<size_t>(tmp[i]);
+    }
+    layer.getParameters()["strides"] = cur;
+
+    tmp = cnnLayer->GetParamAsUInts("dilations");
+    cur.resize(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        cur[i] = static_cast<size_t>(tmp[i]);
+    }
+    layer.getParameters()["dilations"] = cur;
+
+    tmp = cnnLayer->GetParamAsUInts("pads_begin");
+    cur.resize(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        cur[i] = static_cast<size_t>(tmp[i]);
+    }
+    layer.getParameters()["pads_begin"] = cur;
+
+    tmp = cnnLayer->GetParamAsUInts("pads_end");
+    cur.resize(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        cur[i] = static_cast<size_t>(tmp[i]);
+    }
+    layer.getParameters()["pads_end"] = cur;
+
+    layer.getParameters()["group"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("group"));
+    layer.getParameters()["output"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("output"));
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_crop_layer.cpp b/inference-engine/src/inference_engine/builders/ie_crop_layer.cpp
index 7fe259120..239a6f467 100644
--- a/inference-engine/src/inference_engine/builders/ie_crop_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_crop_layer.cpp
@@ -1,69 +1,110 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_crop_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::CropLayer::CropLayer(const std::string& name): LayerFragment("Crop", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(2);
+Builder::CropLayer::CropLayer(const std::string& name): LayerDecorator("Crop", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(2);
 }
 
-Builder::CropLayer::CropLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Crop"))
-        THROW_IE_EXCEPTION << "Cannot create CropLayer decorator for layer " << getLayer().getType();
+Builder::CropLayer::CropLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Crop");
+}
+
+Builder::CropLayer::CropLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Crop");
 }
 
 Builder::CropLayer& Builder::CropLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const std::vector<Port>& Builder::CropLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 
 Builder::CropLayer& Builder::CropLayer::setInputPorts(const std::vector<Port>& ports) {
-    getLayer().getInputPorts() = ports;
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 
 const Port& Builder::CropLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::CropLayer& Builder::CropLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 const std::vector<size_t> Builder::CropLayer::getAxis() const {
-    return uInts2size_t(getLayer().getParameters()["axis"].asUInts());
+    return getLayer()->getParameters().at("axis");
 }
 
 Builder::CropLayer& Builder::CropLayer::setAxis(const std::vector<size_t>& axis) {
-    getLayer().getParameters()["axis"] = axis;
+    getLayer()->getParameters()["axis"] = axis;
     return *this;
 }
 
 const std::vector<size_t> Builder::CropLayer::getOffset() const {
-    return uInts2size_t(getLayer().getParameters()["offset"].asUInts());
+    return getLayer()->getParameters().at("offset");
 }
 
 Builder::CropLayer& Builder::CropLayer::setOffset(const std::vector<size_t>& offsets) {
-    getLayer().getParameters()["offset"] = offsets;
+    getLayer()->getParameters()["offset"] = offsets;
     return *this;
 }
 
-void Builder::CropLayer::validate(const Layer& layer) {
-    if (layer.getInputPorts().size() != 2)
-        THROW_IE_EXCEPTION << "Incorrect parameters for layer " << layer.getName() << " should have 2 inputs!";
-}
-
-REG_VALIDATOR_FOR(Crop, Builder::CropLayer::validate);
-\ No newline at end of file
+REG_VALIDATOR_FOR(Crop, [] (const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    if (input_layer->getInputPorts().size() != 2) {
+        THROW_IE_EXCEPTION << "Incorrect parameters for getLayer() " << input_layer->getName()
+                           << " should have 2 input ports.";
+    }
+    if (input_layer->getOutputPorts().size() != 1) {
+        THROW_IE_EXCEPTION << "Incorrect parameters for getLayer() " << input_layer->getName()
+                           << " should have 1 output port";
+    }
+    Builder::CropLayer layer(input_layer);
+    if (layer.getAxis().size() != layer.getOffset().size()) {
+        THROW_IE_EXCEPTION <<  "Incorrect parameters for getLayer() " << input_layer->getName()
+                           << ". Axis size must be equal to the size of Offset";
+    }
+    for (size_t i = 0; i < layer.getAxis().size(); ++i) {
+        const size_t index = layer.getAxis()[i];
+        if (index >= layer.getInputPorts()[0].shape().size()) {
+            THROW_IE_EXCEPTION << "Incorrect parameters for getLayer() " << input_layer->getName()
+                               << ". Each element of Axis should be less than input shape length";
+        }
+        if (layer.getOutputPort().shape()[index] != layer.getInputPorts()[1].shape()[index]) {
+            THROW_IE_EXCEPTION <<  "Incorrect parameters for getLayer() " << input_layer->getName()
+                               << ". The second input shapes should have the same value as the output shapes in the indexes contained in Axis";
+        }
+        if (layer.getInputPorts()[0].shape()[index] < layer.getOutputPort().shape()[index] + layer.getOffset()[i]) {
+            THROW_IE_EXCEPTION <<  "Incorrect parameters for getLayer() " << input_layer->getName()
+                               << ". The sum of offset and output shape in the " << i + 1 << " dimension is bigger then input shape size";
+        }
+    }
+});
+
+REG_CONVERTER_FOR(Crop, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    std::vector<unsigned int> tmp = cnnLayer->GetParamAsUInts("axis");
+    layer.getParameters()["axis"] = std::vector<size_t>(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        layer.getParameters()["axis"].as<std::vector<size_t>>()[i] = static_cast<size_t>(tmp[i]);
+    }
+
+    tmp = cnnLayer->GetParamAsUInts("offset");
+    layer.getParameters()["offset"] = std::vector<size_t>(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        layer.getParameters()["offset"].as<std::vector<size_t>>()[i] = static_cast<size_t>(tmp[i]);
+    }
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_ctc_greedy_decoder_layer.cpp b/inference-engine/src/inference_engine/builders/ie_ctc_greedy_decoder_layer.cpp
index c3e017a7f..c5b80651a 100644
--- a/inference-engine/src/inference_engine/builders/ie_ctc_greedy_decoder_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_ctc_greedy_decoder_layer.cpp
@@ -1,46 +1,61 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_ctc_greedy_decoder_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::CTCGreedyDecoderLayer::CTCGreedyDecoderLayer(const std::string& name): LayerFragment("CTCGreedyDecoder", name) {
-    getLayer().getOutputPorts().resize(1);
+Builder::CTCGreedyDecoderLayer::CTCGreedyDecoderLayer(const std::string& name): LayerDecorator("CTCGreedyDecoder", name) {
+    getLayer()->getOutputPorts().resize(1);
 }
 
-Builder::CTCGreedyDecoderLayer::CTCGreedyDecoderLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "CTCGreedyDecoder"))
-        THROW_IE_EXCEPTION << "Cannot create CTCGreedyDecoderLayer decorator for layer " << getLayer().getType();
+Builder::CTCGreedyDecoderLayer::CTCGreedyDecoderLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("CTCGreedyDecoder");
+}
+
+Builder::CTCGreedyDecoderLayer::CTCGreedyDecoderLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("CTCGreedyDecoder");
 }
 
 Builder::CTCGreedyDecoderLayer& Builder::CTCGreedyDecoderLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 const std::vector<Port>& Builder::CTCGreedyDecoderLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 Builder::CTCGreedyDecoderLayer& Builder::CTCGreedyDecoderLayer::setInputPorts(const std::vector<Port>& ports) {
-    getLayer().getInputPorts() = ports;
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 const Port& Builder::CTCGreedyDecoderLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 Builder::CTCGreedyDecoderLayer& Builder::CTCGreedyDecoderLayer::setOutputPort(const Port& port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 bool Builder::CTCGreedyDecoderLayer::getCTCMergeRepeated() const {
-    return getLayer().getParameters()["ctc_merge_repeated"].asBool();
+    return getLayer()->getParameters().at("ctc_merge_repeated");
 }
 Builder::CTCGreedyDecoderLayer& Builder::CTCGreedyDecoderLayer::setCTCMergeRepeated(bool flag) {
-    getLayer().getParameters()["ctc_merge_repeated"] = flag;
+    getLayer()->getParameters()["ctc_merge_repeated"] = flag;
     return *this;
 }
 
+REG_VALIDATOR_FOR(CTCGreedyDecoder, [](const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    Builder::CTCGreedyDecoderLayer layer(input_layer);
+
+    if (layer.getInputPorts().empty() || layer.getInputPorts().size() > 2) {
+        THROW_IE_EXCEPTION << "Input ports are wrong in layer " << layer.getName() <<
+                           ". There are should be 1 or 2 input ports";
+    }
+});
+
+REG_CONVERTER_FOR(CTCGreedyDecoder, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["ctc_merge_repeated"] = cnnLayer->GetParamsAsBool("ctc_merge_repeated", false);
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_deconvolution_layer.cpp b/inference-engine/src/inference_engine/builders/ie_deconvolution_layer.cpp
index dfb607a3f..648cdb5a9 100644
--- a/inference-engine/src/inference_engine/builders/ie_deconvolution_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_deconvolution_layer.cpp
@@ -1,20 +1,164 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_deconvolution_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
+#include <limits>
+#include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
 Builder::DeconvolutionLayer::DeconvolutionLayer(const std::string& name): ConvolutionLayer(name) {
-    getLayer().setType("Deconvolution");
+    getLayer()->setType("Deconvolution");
 }
-Builder::DeconvolutionLayer::DeconvolutionLayer(Layer& genLayer): ConvolutionLayer(genLayer.getName()) {
-    getLayer().setName("");
-    getLayer().setType("");
-    getLayer() = genLayer;
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Deconvolution"))
-        THROW_IE_EXCEPTION << "Cannot create DeconvolutionLayer decorator for layer " << getLayer().getType();
+Builder::DeconvolutionLayer::DeconvolutionLayer(const Layer::Ptr& layer): ConvolutionLayer(layer->getName()) {
+    this->getLayer() = layer;
+    checkType("Deconvolution");
 }
+Builder::DeconvolutionLayer::DeconvolutionLayer(const Layer::CPtr& layer): ConvolutionLayer(layer->getName()) {
+    this->getLayer().reset();
+    cLayer = layer;
+    checkType("Deconvolution");
+}
+
+REG_VALIDATOR_FOR(Deconvolution, [] (const InferenceEngine::Builder::Layer::CPtr& layer, bool partial) {
+    // WA for old IRs
+    if (layer->getParameters().find("kernel") == layer->getParameters().end() &&
+        layer->getParameters().find("kernel-x") != layer->getParameters().end() &&
+        layer->getParameters().find("kernel-y") != layer->getParameters().end())
+        return;
+    Builder::DeconvolutionLayer deconvBuilder(layer);
+    std::vector<size_t> l_kernel = deconvBuilder.getKernel();
+    std::vector<size_t> l_dilation = deconvBuilder.getDilation();
+    std::vector<size_t> l_paddingBegin = deconvBuilder.getPaddingsBegin();
+    std::vector<size_t> l_paddingEnd = deconvBuilder.getPaddingsEnd();
+    std::vector<size_t> l_strides = deconvBuilder.getStrides();
+
+    if (l_paddingBegin.empty() && !l_kernel.empty())
+        l_paddingBegin.resize(l_kernel.size(), 0);
+    if (l_paddingEnd.empty() && !l_kernel.empty())
+        l_paddingEnd.resize(l_kernel.size(), 0);
+    if (l_dilation.empty() && !l_kernel.empty())
+        l_dilation.resize(l_kernel.size(), 1);
+    if (l_strides.empty() && !l_kernel.empty())
+        l_strides.resize(l_kernel.size(), 1);
+
+    if (l_kernel.empty()) {
+        THROW_IE_EXCEPTION << "Kernel is empty!";
+    }
+
+    if (l_paddingBegin.size() != l_paddingEnd.size()) {
+        THROW_IE_EXCEPTION << "Padding_begin dimension is not equal to padding_end dimension";
+    }
+
+    if (!l_paddingBegin.empty() && l_kernel.size() != l_paddingBegin.size()) {
+        THROW_IE_EXCEPTION << "Padding dimension is not equal to kernel dimension";
+    }
+
+    if (l_kernel.size() != l_strides.size()) {
+        THROW_IE_EXCEPTION << "Stride dimension is not equal to kernel dimension";
+    }
+
+    if (!l_dilation.empty() && l_kernel.size() != l_dilation.size()) {
+        THROW_IE_EXCEPTION << "Dilation dimension is not equal to kernel dimension";
+    }
+
+    if (deconvBuilder.getOutDepth() == 0) {
+        THROW_IE_EXCEPTION << "OutDepth parameter should be more than 0";
+    }
+
+    for (size_t kernel_dim : l_kernel) {
+        if (kernel_dim == 0) {
+            THROW_IE_EXCEPTION << "Kernel dimensions should be more than 0";
+        }
+    }
+
+    for (size_t i_stride : l_strides) {
+        if (i_stride == 0) {
+            THROW_IE_EXCEPTION << "Strides should be more than 0";
+        }
+    }
+
+    for (size_t dil : l_dilation) {
+        if (dil == 0)
+            THROW_IE_EXCEPTION << "Dilation should be more than 0";
+    }
+
+    if (!deconvBuilder.getGroup())
+        THROW_IE_EXCEPTION << "Group should be more than 0";
+
+    if (deconvBuilder.getInputPort().shape().empty())
+        return;
+
+    const size_t IC = deconvBuilder.getInputPort().shape()[1];
+    if (IC % deconvBuilder.getGroup())
+        THROW_IE_EXCEPTION << "Number of input channels (" << IC <<
+                           ") is not divided by group number (" << deconvBuilder.getGroup() << ")";
+
+    size_t weight_size = deconvBuilder.getOutDepth() * IC / deconvBuilder.getGroup();
+    for (size_t kernel_dim : l_kernel) {
+        if (static_cast<double>(weight_size) * kernel_dim > std::numeric_limits<size_t>::max()) {
+            THROW_IE_EXCEPTION << "Weight size exceeds the size_t max";
+        }
+        weight_size *= kernel_dim;
+    }
+
+    if (partial)
+        return;
+
+    const auto weights = layer->getInputPorts()[1].getData()->getData();
+    if (weights->size() != weight_size) {
+        THROW_IE_EXCEPTION << "Weight size is not correct!";
+    }
+
+    const auto biases = layer->getInputPorts()[2].getData()->getData();
+    if (biases && biases->cbuffer() && biases->size() != deconvBuilder.getOutDepth())
+        THROW_IE_EXCEPTION << "Biases size is incorrect!";
+});
+
+REG_CONVERTER_FOR(Deconvolution, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    // WA for old IRs
+    if (cnnLayer->params.find("kernel") == cnnLayer->params.end() &&
+        cnnLayer->params.find("kernel-x") != cnnLayer->params.end() &&
+        cnnLayer->params.find("kernel-y") != cnnLayer->params.end())
+        return;
+    std::vector<unsigned int> tmp = cnnLayer->GetParamAsUInts("kernel");
+    std::vector<size_t> cur(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        cur[i] = static_cast<size_t>(tmp[i]);
+    }
+    layer.getParameters()["kernel"] = cur;
+
+    tmp = cnnLayer->GetParamAsUInts("strides");
+    cur.resize(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        cur[i] = static_cast<size_t>(tmp[i]);
+    }
+    layer.getParameters()["strides"] = cur;
+
+    tmp = cnnLayer->GetParamAsUInts("dilations");
+    cur.resize(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        cur[i] = static_cast<size_t>(tmp[i]);
+    }
+    layer.getParameters()["dilations"] = cur;
+
+    tmp = cnnLayer->GetParamAsUInts("pads_begin");
+    cur.resize(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        cur[i] = static_cast<size_t>(tmp[i]);
+    }
+    layer.getParameters()["pads_begin"] = cur;
+
+    tmp = cnnLayer->GetParamAsUInts("pads_end");
+    cur.resize(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        cur[i] = static_cast<size_t>(tmp[i]);
+    }
+    layer.getParameters()["pads_end"] = cur;
+
+    layer.getParameters()["group"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("group"));
+    layer.getParameters()["output"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("output"));
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_detection_output_layer.cpp b/inference-engine/src/inference_engine/builders/ie_detection_output_layer.cpp
index f836445f9..42e1a1466 100644
--- a/inference-engine/src/inference_engine/builders/ie_detection_output_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_detection_output_layer.cpp
@@ -1,124 +1,168 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_detection_output_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
+#include <cfloat>
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::DetectionOutputLayer::DetectionOutputLayer(const std::string& name): LayerFragment("DetectionOutput", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(2);
+Builder::DetectionOutputLayer::DetectionOutputLayer(const std::string& name): LayerDecorator("DetectionOutput", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(2);
+    setBackgroudLabelId(-1);
 }
 
-Builder::DetectionOutputLayer::DetectionOutputLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "DetectionOutput"))
-        THROW_IE_EXCEPTION << "Cannot create DetectionOutputLayer decorator for layer " << getLayer().getType();
+Builder::DetectionOutputLayer::DetectionOutputLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("DetectionOutput");
+}
+
+Builder::DetectionOutputLayer::DetectionOutputLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("DetectionOutput");
 }
 
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const std::vector<Port>& Builder::DetectionOutputLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setInputPorts(const std::vector<Port> &ports) {
     if (ports.size() != 3)
-        THROW_IE_EXCEPTION << "Incorrect number of inputs for DetectionOutput layer.";
-    getLayer().getInputPorts() = ports;
+        THROW_IE_EXCEPTION << "Incorrect number of inputs for DetectionOutput getLayer().";
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 
 const Port& Builder::DetectionOutputLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 size_t Builder::DetectionOutputLayer::getNumClasses() const {
-    return getLayer().getParameters()["num_classes"].asUInt();
+    return getLayer()->getParameters().at("num_classes");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setNumClasses(size_t num) {
-    getLayer().getParameters()["num_classes"] = num;
+    getLayer()->getParameters()["num_classes"] = num;
     return *this;
 }
 int Builder::DetectionOutputLayer::getBackgroudLabelId() const {
-    return getLayer().getParameters()["background_label_id"].asInt(-1);
+    return getLayer()->getParameters().at("background_label_id");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setBackgroudLabelId(int labelId) {
-    getLayer().getParameters()["background_label_id"] = labelId;
+    getLayer()->getParameters()["background_label_id"] = labelId;
     return *this;
 }
 int Builder::DetectionOutputLayer::getTopK() const {
-    return getLayer().getParameters()["top_k"].asInt();
+    return getLayer()->getParameters().at("top_k");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setTopK(int topK) {
-    getLayer().getParameters()["top_k"] = topK;
+    getLayer()->getParameters()["top_k"] = topK;
     return *this;
 }
 int Builder::DetectionOutputLayer::getKeepTopK() const {
-    return getLayer().getParameters()["keep_top_k"].asInt();
+    return getLayer()->getParameters().at("keep_top_k");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setKeepTopK(int topK) {
-    getLayer().getParameters()["keep_top_k"] = topK;
+    getLayer()->getParameters()["keep_top_k"] = topK;
     return *this;
 }
 int Builder::DetectionOutputLayer::getNumOrientClasses() const {
-    return getLayer().getParameters()["num_orient_classes"].asInt();
+    return getLayer()->getParameters().at("num_orient_classes");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setNumOrientClasses(int numClasses) {
-    getLayer().getParameters()["num_orient_classes"] = numClasses;
+    getLayer()->getParameters()["num_orient_classes"] = numClasses;
     return *this;
 }
 std::string Builder::DetectionOutputLayer::getCodeType() const {
-    return getLayer().getParameters()["code_type"];
+    return getLayer()->getParameters().at("code_type");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setCodeType(std::string type) {
-    getLayer().getParameters()["code_type"] = type;
+    getLayer()->getParameters()["code_type"] = type;
     return *this;
 }
 int Builder::DetectionOutputLayer::getInterpolateOrientation() const {
-    return getLayer().getParameters()["interpolate_orientation"].asInt();
+    return getLayer()->getParameters().at("interpolate_orientation");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setInterpolateOrientation(int orient) {
-    getLayer().getParameters()["interpolate_orientation"] = orient;
+    getLayer()->getParameters()["interpolate_orientation"] = orient;
     return *this;
 }
 float Builder::DetectionOutputLayer::getNMSThreshold() const {
-    return getLayer().getParameters()["nms_threshold"].asFloat();
+    return getLayer()->getParameters().at("nms_threshold");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setNMSThreshold(float threshold) {
-    getLayer().getParameters()["nms_threshold"] = threshold;
+    getLayer()->getParameters()["nms_threshold"] = threshold;
     return *this;
 }
 float Builder::DetectionOutputLayer::getConfidenceThreshold() const {
-    return getLayer().getParameters()["confidence_threshold"].asFloat();
+    return getLayer()->getParameters().at("confidence_threshold");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setConfidenceThreshold(float threshold) {
-    getLayer().getParameters()["confidence_threshold"] = threshold;
+    getLayer()->getParameters()["confidence_threshold"] = threshold;
     return *this;
 }
 bool Builder::DetectionOutputLayer::getShareLocation() const {
-    return getLayer().getParameters()["share_location"].asBool();
+    return getLayer()->getParameters().at("share_location");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setShareLocation(bool flag) {
-    getLayer().getParameters()["share_location"] = flag;
+    getLayer()->getParameters()["share_location"] = flag;
     return *this;
 }
 bool Builder::DetectionOutputLayer::getVariantEncodedInTarget() const {
-    return getLayer().getParameters()["variance_encoded_in_target"].asBool();
+    return getLayer()->getParameters().at("variance_encoded_in_target");
 }
 Builder::DetectionOutputLayer& Builder::DetectionOutputLayer::setVariantEncodedInTarget(bool flag) {
-    getLayer().getParameters()["variance_encoded_in_target"] = flag;
+    getLayer()->getParameters()["variance_encoded_in_target"] = flag;
     return *this;
 }
+
+REG_VALIDATOR_FOR(DetectionOutput, [](const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    Builder::DetectionOutputLayer layer(input_layer);
+    if (layer.getNumClasses() == 0) {
+        THROW_IE_EXCEPTION << "NumClasses parameter is wrong in layer " << layer.getName() <<
+                           ". It should be > 0.";
+    }
+    if (layer.getCodeType() != "caffe.PriorBoxParameter.CENTER_SIZE" &&
+        layer.getCodeType() != "caffe.PriorBoxParameter.CORNER") {
+        THROW_IE_EXCEPTION << "CodeType parameter is wrong in layer " << layer.getName() <<
+                           ". It should be equal to 'caffe.PriorBoxParameter.CORNER' or 'caffe.PriorBoxParameter.CENTER_SIZE'";
+    }
+    if (layer.getBackgroudLabelId() < -1) {
+        THROW_IE_EXCEPTION << "BackgroundLabelId parameter is wrong in layer " << layer.getName() <<
+                           ". It should be >= 0 if this one is an Id of existing label else it should be equal to -1";
+    }
+    if (layer.getNMSThreshold() <= 0) {
+        THROW_IE_EXCEPTION << "NMSThreshold parameter is wrong in layer " << layer.getName() <<
+                           ". It should be > 0.";
+    }
+    if (layer.getConfidenceThreshold() <= 0) {
+        THROW_IE_EXCEPTION << "ConfidenceThreshold parameter is wrong in layer " << layer.getName() <<
+                           ". It should be > 0.";
+    }
+});
+
+REG_CONVERTER_FOR(DetectionOutput, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["num_classes"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("num_classes"));
+    layer.getParameters()["background_label_id"] = cnnLayer->GetParamAsInt("background_label_id", 0);
+    layer.getParameters()["top_k"] = cnnLayer->GetParamAsInt("top_k", -1);
+    layer.getParameters()["keep_top_k"] = cnnLayer->GetParamAsInt("keep_top_k", -1);
+    layer.getParameters()["num_orient_classes"] = cnnLayer->GetParamAsInt("num_orient_classes", 0);
+    layer.getParameters()["code_type"] = cnnLayer->GetParamAsString("code_type", "caffe.PriorBoxParameter.CORNER");
+    layer.getParameters()["interpolate_orientation"] = cnnLayer->GetParamAsInt("interpolate_orientation", 1);
+    layer.getParameters()["nms_threshold"] = cnnLayer->GetParamAsFloat("nms_threshold");
+    layer.getParameters()["confidence_threshold"] = cnnLayer->GetParamAsFloat("confidence_threshold", -FLT_MAX);
+    layer.getParameters()["share_location"] = cnnLayer->GetParamsAsBool("share_location", true);
+    layer.getParameters()["variance_encoded_in_target"] = cnnLayer->GetParamsAsBool("variance_encoded_in_target", false);
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_eltwise_layer.cpp b/inference-engine/src/inference_engine/builders/ie_eltwise_layer.cpp
index cffecaa48..df51f5ea9 100644
--- a/inference-engine/src/inference_engine/builders/ie_eltwise_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_eltwise_layer.cpp
@@ -1,64 +1,95 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_eltwise_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::EltwiseLayer::EltwiseLayer(const std::string& name): LayerFragment("Eltwise", name) {
-    getLayer().getOutputPorts().resize(1);
+Builder::EltwiseLayer::EltwiseLayer(const std::string& name): LayerDecorator("Eltwise", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(2);
     setEltwiseType(EltwiseType::SUM);
 }
 
-Builder::EltwiseLayer::EltwiseLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Eltwise"))
-        THROW_IE_EXCEPTION << "Cannot create EltwiseLayer decorator for layer " << getLayer().getType();
+Builder::EltwiseLayer::EltwiseLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Eltwise");
 
-    std::string operatorStr = getLayer().getParameters()["operation"];
+    std::string operatorStr = getLayer()->getParameters()["operation"];
     if (operatorStr == "max") {
         type = MAX;
     } else if (operatorStr == "sum") {
         type = SUM;
     } else if (operatorStr == "mul") {
         type = MUL;
+    } else if (operatorStr == "sub") {
+        type = SUB;
+    } else if (operatorStr == "div") {
+        type = DIV;
+    } else if (operatorStr == "min") {
+        type = MIN;
+    } else if (operatorStr == "squared_diff") {
+        type = SQUARED_DIFF;
+    }
+}
+
+Builder::EltwiseLayer::EltwiseLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Eltwise");
+
+    const auto cLayer = static_cast<const EltwiseLayer*>(this)->getLayer();
+
+    std::string operatorStr = cLayer->getParameters().at("operation");
+    if (operatorStr == "max") {
+        type = MAX;
+    } else if (operatorStr == "sum") {
+        type = SUM;
+    } else if (operatorStr == "mul") {
+        type = MUL;
+    } else if (operatorStr == "sub") {
+        type = SUB;
+    } else if (operatorStr == "div") {
+        type = DIV;
+    } else if (operatorStr == "min") {
+        type = MIN;
+    } else if (operatorStr == "squared_diff") {
+        type = SQUARED_DIFF;
     }
 }
 
 Builder::EltwiseLayer& Builder::EltwiseLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const std::vector<Port>& Builder::EltwiseLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 
 Builder::EltwiseLayer& Builder::EltwiseLayer::setInputPorts(const std::vector<Port>& ports) {
-    getLayer().getInputPorts() = ports;
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 
 const Port& Builder::EltwiseLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::EltwiseLayer& Builder::EltwiseLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 const std::vector<float> Builder::EltwiseLayer::getScales() const {
-    return getLayer().getParameters()["scales"].asFloats({});
+    return getLayer()->getParameters().at("scales");
 }
 
 // TODO: IR doesn't contain Scales!!!
 Builder::EltwiseLayer& Builder::EltwiseLayer::setScales(const std::vector<float>& scales) {
-    getLayer().getParameters()["scales"] = scales;
+    getLayer()->getParameters()["scales"] = scales;
     return *this;
 }
 
@@ -70,17 +101,57 @@ Builder::EltwiseLayer& Builder::EltwiseLayer::setEltwiseType(Builder::EltwiseLay
     this->type = type;
     std::string operatorStr;
     switch (type) {
-    case MAX:
-        operatorStr = "max";
-        break;
-    case SUM:
-        operatorStr = "sum";
-        break;
-    case MUL:
-        operatorStr = "mul";
+        case MAX:
+            operatorStr = "max";
+            break;
+        case SUM:
+            operatorStr = "sum";
+            break;
+        case MUL:
+            operatorStr = "mul";
+            break;
+        case SUB:
+            operatorStr = "sub";
+            break;
+        case DIV:
+            operatorStr = "div";
+            break;
+        case MIN:
+            operatorStr = "min";
+            break;
+        case SQUARED_DIFF:
+            operatorStr = "squared_diff";
+            break;
     }
-    getLayer().getParameters()["operation"] = operatorStr;
+    getLayer()->getParameters()["operation"] = operatorStr;
     return *this;
 }
 
+REG_VALIDATOR_FOR(Eltwise, [](const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    Builder::EltwiseLayer layer(input_layer);
+
+    if (layer.getInputPorts().size() != 2) {
+        THROW_IE_EXCEPTION << "Input ports are incorrect in the layer " << layer.getName()
+                           << ". Number of input ports should be equal to 2.";
+    }
+    if (partial && (layer.getInputPorts()[0].shape().empty() || layer.getInputPorts()[1].shape().empty() ||
+            layer.getOutputPort().shape().empty()))
+        return;
+
+    if (layer.getInputPorts()[0].shape() != layer.getInputPorts()[1].shape()) {
+        THROW_IE_EXCEPTION << "Input ports are incorrect in the layer " << layer.getName()
+                           << ". They should have equal dimensions";
+    }
+
+    if (layer.getInputPorts()[0].shape() != layer.getOutputPort().shape()) {
+        THROW_IE_EXCEPTION << "Layer " << layer.getName() << " have different input and output ports. "
+                           << "They should have equal dimensions.";
+    }
+});
+
+REG_CONVERTER_FOR(Eltwise, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["scales"] = cnnLayer->GetParamAsFloats("scales", {});
+    layer.getParameters()["operation"] = cnnLayer->GetParamAsString("operation");
+});
+
 
diff --git a/inference-engine/src/inference_engine/builders/ie_elu_layer.cpp b/inference-engine/src/inference_engine/builders/ie_elu_layer.cpp
index 5be00443f..eb280a771 100644
--- a/inference-engine/src/inference_engine/builders/ie_elu_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_elu_layer.cpp
@@ -1,46 +1,67 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_elu_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ELULayer::ELULayer(const std::string& name): LayerFragment("ELU", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::ELULayer::ELULayer(const std::string& name): LayerDecorator("ELU", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
     setAlpha(1);
 }
 
-Builder::ELULayer::ELULayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "ELU"))
-        THROW_IE_EXCEPTION << "Cannot create ELULayer decorator for layer " << getLayer().getType();
+Builder::ELULayer::ELULayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("ELU");
+}
+
+Builder::ELULayer::ELULayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("ELU");
 }
 
 Builder::ELULayer& Builder::ELULayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::ELULayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::ELULayer& Builder::ELULayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 float Builder::ELULayer::getAlpha() const {
-    return getLayer().getParameters()["alpha"].asFloat();
+    return getLayer()->getParameters().at("alpha");
 }
 
 Builder::ELULayer& Builder::ELULayer::setAlpha(float alpha) {
-    getLayer().getParameters()["alpha"] = alpha;
+    getLayer()->getParameters()["alpha"] = alpha;
     return *this;
 }
 
+REG_VALIDATOR_FOR(ELU, [] (const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    if (!input_layer->getInputPorts().empty() &&
+        !input_layer->getOutputPorts().empty() &&
+        !input_layer->getInputPorts()[0].shape().empty() &&
+        !input_layer->getOutputPorts()[0].shape().empty() &&
+        input_layer->getInputPorts()[0].shape() != input_layer->getOutputPorts()[0].shape()) {
+        THROW_IE_EXCEPTION << "Input and output ports should be equal";
+    }
+    Builder::ELULayer layer(input_layer);
+    if (layer.getAlpha() < 0) {
+        THROW_IE_EXCEPTION << "Alpha should be >= 0";
+    }
+});
+
+REG_CONVERTER_FOR(ELU, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["alpha"] = cnnLayer->GetParamAsFloat("alpha", 0);
+});
+
diff --git a/inference-engine/src/inference_engine/builders/ie_fully_connected_layer.cpp b/inference-engine/src/inference_engine/builders/ie_fully_connected_layer.cpp
index 1abe7b806..cb7879974 100644
--- a/inference-engine/src/inference_engine/builders/ie_fully_connected_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_fully_connected_layer.cpp
@@ -1,62 +1,66 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_fully_connected_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::FullyConnectedLayer::FullyConnectedLayer(const std::string& name): LayerFragment("FullyConnected", name) {
-    getLayer().getInputPorts().resize(1);
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getParameters()["out-size"] = 0;
+Builder::FullyConnectedLayer::FullyConnectedLayer(const std::string& name): LayerDecorator("FullyConnected", name) {
+    getLayer()->getInputPorts().resize(3);
+    getLayer()->getInputPorts()[1].setParameter("type", "weights");
+    getLayer()->getInputPorts()[2].setParameter("type", "biases");
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getParameters()["out-size"] = 0;
 }
 
-Builder::FullyConnectedLayer::FullyConnectedLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "FullyConnected"))
-        THROW_IE_EXCEPTION << "Cannot create FullyConnectedLayer decorator for layer " << getLayer().getType();
+Builder::FullyConnectedLayer::FullyConnectedLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("FullyConnected");
 }
 
-Builder::FullyConnectedLayer &Builder::FullyConnectedLayer::setName(const std::string &name) {
-    getLayer().getName() = name;
-    return *this;
+Builder::FullyConnectedLayer::FullyConnectedLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("FullyConnected");
 }
 
-Builder::FullyConnectedLayer& Builder::FullyConnectedLayer::setWeights(const Blob::CPtr& weights) {
-    getLayer().addConstantData("weights", weights);
-    return *this;
-}
-Builder::FullyConnectedLayer& Builder::FullyConnectedLayer::setBiases(const Blob::CPtr& biases) {
-    getLayer().addConstantData("biases", biases);
+Builder::FullyConnectedLayer &Builder::FullyConnectedLayer::setName(const std::string &name) {
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::FullyConnectedLayer::getInputPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 
 Builder::FullyConnectedLayer& Builder::FullyConnectedLayer::setInputPort(const Port& port) {
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 const Port& Builder::FullyConnectedLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::FullyConnectedLayer& Builder::FullyConnectedLayer::setOutputPort(const Port& port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 size_t Builder::FullyConnectedLayer::getOutputNum() const {
-    return getLayer().getParameters()["out-size"].asUInt();
+    return getLayer()->getParameters().at("out-size");
 }
+
 Builder::FullyConnectedLayer& Builder::FullyConnectedLayer::setOutputNum(size_t outNum) {
-    getLayer().getParameters()["out-size"] = outNum;
+    getLayer()->getParameters()["out-size"] = outNum;
     return *this;
 }
+
+REG_VALIDATOR_FOR(FullyConnected, [](const InferenceEngine::Builder::Layer::CPtr& layer, bool partial) {
+});
+
+REG_CONVERTER_FOR(FullyConnected, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["out-size"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("out-size", 0));
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_grn_layer.cpp b/inference-engine/src/inference_engine/builders/ie_grn_layer.cpp
index 1cc1a7a04..afa362c25 100644
--- a/inference-engine/src/inference_engine/builders/ie_grn_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_grn_layer.cpp
@@ -1,45 +1,52 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_grn_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::GRNLayer::GRNLayer(const std::string& name): LayerFragment("GRN", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::GRNLayer::GRNLayer(const std::string& name): LayerDecorator("GRN", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
     setBeta(0);
 }
 
-Builder::GRNLayer::GRNLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "GRN"))
-        THROW_IE_EXCEPTION << "Cannot create GRNLayer decorator for layer " << getLayer().getType();
+Builder::GRNLayer::GRNLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("GRN");
+}
+
+Builder::GRNLayer::GRNLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("GRN");
 }
 
 Builder::GRNLayer& Builder::GRNLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::GRNLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::GRNLayer& Builder::GRNLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 float Builder::GRNLayer::getBeta() const {
-    return getLayer().getParameters()["beta"].asFloat();
+    return getLayer()->getParameters().at("beta");
 }
 
 Builder::GRNLayer& Builder::GRNLayer::setBeta(float beta) {
-    getLayer().getParameters()["beta"] = beta;
+    getLayer()->getParameters()["beta"] = beta;
     return *this;
 }
+
+REG_CONVERTER_FOR(GRN, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["beta"] = static_cast<size_t>(cnnLayer->GetParamAsFloat("beta"));
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_gru_sequence_layer.cpp b/inference-engine/src/inference_engine/builders/ie_gru_sequence_layer.cpp
new file mode 100644
index 000000000..3197686d9
--- /dev/null
+++ b/inference-engine/src/inference_engine/builders/ie_gru_sequence_layer.cpp
@@ -0,0 +1,126 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_gru_sequence_layer.hpp>
+#include <ie_cnn_layer_builder.h>
+
+#include <vector>
+#include <string>
+
+using namespace InferenceEngine;
+
+Builder::GRUSequenceLayer::GRUSequenceLayer(const std::string& name): LayerDecorator("GRUSequence", name) {
+    getLayer()->getOutputPorts().resize(2);
+    getLayer()->getInputPorts().resize(5);
+    getLayer()->getInputPorts()[1].setParameter("type", "weights");
+    getLayer()->getInputPorts()[2].setParameter("type", "biases");
+    getLayer()->getInputPorts()[3].setParameter("type", "optional");
+}
+
+Builder::GRUSequenceLayer::GRUSequenceLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("GRUSequence");
+}
+
+Builder::GRUSequenceLayer::GRUSequenceLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("GRUSequence");
+}
+
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setName(const std::string& name) {
+    getLayer()->setName(name);
+    return *this;
+}
+
+const std::vector<Port>& Builder::GRUSequenceLayer::getInputPorts() const {
+    return getLayer()->getInputPorts();
+}
+
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setInputPorts(const std::vector<Port>& ports) {
+    getLayer()->getInputPorts() = ports;
+    return *this;
+}
+
+const std::vector<Port>& Builder::GRUSequenceLayer::getOutputPorts() const {
+    return getLayer()->getOutputPorts();
+}
+
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setOutputPorts(const std::vector<Port>& ports) {
+    getLayer()->getOutputPorts() = ports;
+    return *this;
+}
+int Builder::GRUSequenceLayer::getHiddenSize() const {
+    return getLayer()->getParameters().at("hidden_size");
+}
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setHiddenSize(int size) {
+    getLayer()->getParameters()["hidden_size"] = size;
+    return *this;
+}
+bool Builder::GRUSequenceLayer::getSequenceDim() const {
+    return getLayer()->getParameters().at("sequence_dim");
+}
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setSqquenceDim(bool flag) {
+    getLayer()->getParameters()["sequence_dim"] = flag;
+    return *this;
+}
+const std::vector<std::string>& Builder::GRUSequenceLayer::getActivations() const {
+    return getLayer()->getParameters().at("activations");
+}
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setActivations(const std::vector<std::string>& activations) {
+    getLayer()->getParameters()["activations"] = activations;
+    return *this;
+}
+const std::vector<float>& Builder::GRUSequenceLayer::getActivationsAlpha() const {
+    return getLayer()->getParameters().at("activations_alpha");
+}
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setActivationsAlpha(const std::vector<float>& activations) {
+    getLayer()->getParameters()["activations_alpha"] = activations;
+    return *this;
+}
+const std::vector<float>& Builder::GRUSequenceLayer::getActivationsBeta() const {
+    return getLayer()->getParameters().at("activations_beta");
+}
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setActivationsBeta(const std::vector<float>& activations) {
+    getLayer()->getParameters()["activations_beta"] = activations;
+    return *this;
+}
+float Builder::GRUSequenceLayer::getClip() const {
+    return getLayer()->getParameters().at("clip");
+}
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setClip(float clip) {
+    getLayer()->getParameters()["clip"] = clip;
+    return *this;
+}
+
+bool Builder::GRUSequenceLayer::getLinearBeforeReset() const {
+    return getLayer()->getParameters().at("linear_before_reset");
+}
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setLinearBeforeReset(bool flag) {
+    getLayer()->getParameters()["linear_before_reset"] = flag;
+    return *this;
+}
+const std::string& Builder::GRUSequenceLayer::getDirection() const {
+    return getLayer()->getParameters().at("direction");
+}
+Builder::GRUSequenceLayer& Builder::GRUSequenceLayer::setDirection(const std::string& direction) {
+    getLayer()->getParameters()["direction"] = direction;
+    return *this;
+}
+
+REG_CONVERTER_FOR(GRUSequence, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["hidden_size"] = cnnLayer->GetParamAsInt("hidden_size");
+    layer.getParameters()["sequence_dim"] = cnnLayer->GetParamsAsBool("sequence_dim", true);
+    std::vector<std::string> activations;
+    std::istringstream stream(cnnLayer->GetParamAsString("activations"));
+    std::string str;
+    while (getline(stream, str, ',')) {
+         activations.push_back(str);
+    }
+    layer.getParameters()["activations"] = activations;
+    layer.getParameters()["activations_alpha"] = cnnLayer->GetParamAsFloats("activations_alpha");
+    layer.getParameters()["activations_beta"] = cnnLayer->GetParamAsFloats("activations_beta");
+    layer.getParameters()["clip"] = cnnLayer->GetParamAsFloat("clip");
+    layer.getParameters()["linear_before_reset"] = cnnLayer->GetParamsAsBool("linear_before_reset", true);
+    layer.getParameters()["direction"] = cnnLayer->GetParamAsString("direction", "");
+});
+
+
diff --git a/inference-engine/src/inference_engine/builders/ie_input_layer_layer.cpp b/inference-engine/src/inference_engine/builders/ie_input_layer_layer.cpp
index e7e099f7c..3b062931a 100644
--- a/inference-engine/src/inference_engine/builders/ie_input_layer_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_input_layer_layer.cpp
@@ -1,40 +1,40 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_input_layer.hpp>
-#include <details/caseless.hpp>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::InputLayer::InputLayer(const std::string& name): LayerFragment("Input", name) {
-    getLayer().getOutputPorts().resize(1);
+Builder::InputLayer::InputLayer(const std::string& name): LayerDecorator("Input", name) {
+    getLayer()->getOutputPorts().resize(1);
 }
 
-Builder::InputLayer::InputLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Input"))
-        THROW_IE_EXCEPTION << "Cannot create InputLayer decorator for layer " << getLayer().getType();
+Builder::InputLayer::InputLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Input");
+}
+
+Builder::InputLayer::InputLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Input");
 }
 
 Builder::InputLayer& Builder::InputLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::InputLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::InputLayer& Builder::InputLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
-void Builder::InputLayer::validate(const Layer& layer) {
-    if (layer.getOutputPorts()[0].shape().empty())
-        THROW_IE_EXCEPTION << layer.getType() << " node " << layer.getName() << " should have shape!";
-}
-
-REG_VALIDATOR_FOR(Input,  Builder::InputLayer::validate);
-\ No newline at end of file
+REG_VALIDATOR_FOR(Input, [] (const InferenceEngine::Builder::Layer::CPtr& layer, bool partial) {
+    if (layer->getOutputPorts()[0].shape().empty())
+        THROW_IE_EXCEPTION << layer->getType() << " node " << layer->getName() << " should have shape!";
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_layer_builder.cpp b/inference-engine/src/inference_engine/builders/ie_layer_builder.cpp
index a65dd7c2c..99af91ca2 100644
--- a/inference-engine/src/inference_engine/builders/ie_layer_builder.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_layer_builder.cpp
@@ -1,10 +1,9 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_layer_builder.hpp>
 #include <details/caseless.hpp>
-#include <ie_network.hpp>
 
 #include <limits>
 #include <memory>
@@ -14,71 +13,43 @@
 
 using namespace InferenceEngine;
 
-Builder::Layer::Layer(const std::string& type, const std::string& name): id((std::numeric_limits<idx_t>::max)()), type(type), name(name) {}
+Builder::Layer::Layer(const std::string& type, const std::string& name):
+        name(name), type(type), id((std::numeric_limits<idx_t>::max)()) {}
 
-Builder::Layer::Layer(const ILayer::Ptr& layer) {
-    id = layer->getId();
-    getType() = layer->getType();
-    getName() = layer->getName();
-    getGraph() = layer->getGraph();
-    getParameters() = layer->getParameters()->getParameters();
-    getInputPorts() = layer->getInputPorts();
-    getOutputPorts() = layer->getOutputPorts();
-    getConstantData() = layer->getParameters()->getConstantData();
-}
 Builder::Layer::Layer(const ILayer::CPtr& layer) {
     id = layer->getId();
-    getType() = layer->getType();
-    getName() = layer->getName();
-    getGraph() = layer->getGraph();
-    getParameters() = layer->getParameters()->getParameters();
-    getInputPorts() = layer->getInputPorts();
-    getOutputPorts() = layer->getOutputPorts();
-    getConstantData() = layer->getParameters()->getConstantData();
+    name = layer->getName();
+    type = layer->getType();
+    inPorts = layer->getInputPorts();
+    outPorts = layer->getOutputPorts();
+    params = layer->getParameters();
 }
 
 Builder::Layer::Layer(idx_t id, const Builder::Layer& layer): Layer(layer) {
     this->id = id;
 }
 
-idx_t Builder::Layer::getId() const {
+idx_t Builder::Layer::getId() const noexcept {
     return id;
 }
 
-std::string& Builder::Layer::getType() {
-    return type;
-}
-const std::string& Builder::Layer::getType() const {
+const std::string& Builder::Layer::getType() const noexcept {
     return type;
 }
 Builder::Layer& Builder::Layer::setType(const std::string& type) {
-    getType() = type;
+    this->type = type;
     return *this;
 }
 
-std::string& Builder::Layer::getName() {
-    return name;
-}
-const std::string& Builder::Layer::getName() const {
+const std::string& Builder::Layer::getName() const noexcept {
     return name;
 }
 Builder::Layer& Builder::Layer::setName(const std::string& name) {
-    getName() = name;
-    return *this;
-}
-
-INetwork::Ptr& Builder::Layer::getGraph() {
-    return graph;
-}
-const INetwork::Ptr& Builder::Layer::getGraph() const {
-    return graph;
-}
-Builder::Layer& Builder::Layer::setGraph(const INetwork::Ptr& graph) {
-    getGraph() = graph;
+    this->name = name;
     return *this;
 }
 
-const std::map<std::string, Parameter>& Builder::Layer::getParameters() const {
+const std::map<std::string, Parameter>& Builder::Layer::getParameters() const noexcept {
     return params;
 }
 std::map<std::string, Parameter>& Builder::Layer::getParameters() {
@@ -89,30 +60,10 @@ Builder::Layer& Builder::Layer::setParameters(const std::map<std::string, Parame
     return *this;
 }
 
-const std::map<std::string, Blob::CPtr>& Builder::Layer::getConstantData() const {
-    return constData;
-}
-std::map<std::string, Blob::CPtr>& Builder::Layer::getConstantData() {
-    return constData;
-}
-Builder::Layer& Builder::Layer::setConstantData(const std::map<std::string, Blob::Ptr>& constData) {
-    for (const auto& it : constData)
-        addConstantData(it.first, it.second);
-    return *this;
-}
-Builder::Layer& Builder::Layer::setConstantData(const std::map<std::string, Blob::CPtr>& constData) {
-    getConstantData() = constData;
-    return *this;
-}
-Builder::Layer& Builder::Layer::addConstantData(const std::string& name, const Blob::CPtr& data) {
-    getConstantData()[name] = data;
-    return *this;
-}
-
 std::vector<Port>& Builder::Layer::getInputPorts() {
     return inPorts;
 }
-const std::vector<Port>& Builder::Layer::getInputPorts() const {
+const std::vector<Port>& Builder::Layer::getInputPorts() const noexcept {
     return inPorts;
 }
 Builder::Layer& Builder::Layer::setInputPorts(const std::vector<Port> &ports) {
@@ -123,7 +74,7 @@ Builder::Layer& Builder::Layer::setInputPorts(const std::vector<Port> &ports) {
 std::vector<Port>& Builder::Layer::getOutputPorts() {
     return outPorts;
 }
-const std::vector<Port>& Builder::Layer::getOutputPorts() const {
+const std::vector<Port>& Builder::Layer::getOutputPorts() const noexcept {
     return outPorts;
 }
 Builder::Layer& Builder::Layer::setOutputPorts(const std::vector<Port> &ports) {
@@ -131,29 +82,20 @@ Builder::Layer& Builder::Layer::setOutputPorts(const std::vector<Port> &ports) {
     return *this;
 }
 
-const ILayer::Ptr Builder::Layer::build() const {
-    validate();
-    details::Layer::Ptr layer = std::make_shared<details::Layer>(id);
-
-    layer->getName() = name;
-    layer->getType() = type;
-    layer->setGraph(graph);
-    layer->getInputPorts() = inPorts;
-    layer->getOutputPorts() = outPorts;
-    layer->getParameters()->getParameters() = params;
-    layer->getParameters()->getConstantData() = constData;
-    return std::static_pointer_cast<ILayer>(layer);
+const ILayer::CPtr Builder::Layer::build() const {
+    validate(true);
+    return std::static_pointer_cast<const ILayer>(shared_from_this());
 }
 
-void Builder::Layer::addValidator(const std::string &type, const std::function<void(const Layer&)>& validator) {
+void Builder::Layer::addValidator(const std::string &type, const std::function<void(const Layer::CPtr&, bool)>& validator) {
     auto holder = getValidatorsHolder();
     if (holder->validators.find(type) == holder->validators.end())
         holder->validators[type] = validator;
 }
 
-void Builder::Layer::validate() const {
+void Builder::Layer::validate(bool partial) const {
     if (getValidatorsHolder()->validators.find(type) != getValidatorsHolder()->validators.end())
-        getValidatorsHolder()->validators[type](*this);
+        getValidatorsHolder()->validators[type](shared_from_this(), partial);
 }
 
 std::shared_ptr<Builder::ValidatorsHolder> Builder::Layer::getValidatorsHolder() {
diff --git a/inference-engine/src/inference_engine/builders/ie_layer_decorator.cpp b/inference-engine/src/inference_engine/builders/ie_layer_decorator.cpp
new file mode 100644
index 000000000..d01bc97c9
--- /dev/null
+++ b/inference-engine/src/inference_engine/builders/ie_layer_decorator.cpp
@@ -0,0 +1,71 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_layer_decorator.hpp>
+#include <details/caseless.hpp>
+#include <memory>
+#include <vector>
+#include <string>
+
+using namespace InferenceEngine;
+using namespace details;
+
+Builder::LayerDecorator::LayerDecorator(const std::string& type, const std::string& name) {
+    layer = std::make_shared<Layer>(type, name);
+}
+
+Builder::LayerDecorator::LayerDecorator(const Layer::Ptr& layer): layer(layer) {}
+Builder::LayerDecorator::LayerDecorator(const Layer::CPtr& layer): cLayer(layer) {}
+
+Builder::LayerDecorator::LayerDecorator(const Builder::LayerDecorator & rval) {
+    *this = rval;
+}
+
+Builder::LayerDecorator &Builder::LayerDecorator::operator=(const Builder::LayerDecorator &rval) {
+    layer = rval.layer;
+    cLayer = rval.cLayer;
+    return *this;
+}
+
+Builder::LayerDecorator::operator Builder::Layer() const {
+    getLayer()->validate(true);
+    return *getLayer();
+}
+
+Builder::LayerDecorator::operator Builder::Layer::Ptr() {
+    getLayer()->validate(true);
+    return getLayer();
+}
+
+Builder::LayerDecorator::operator Builder::Layer::CPtr() const {
+    getLayer()->validate(true);
+    return getLayer();
+}
+
+const std::string& Builder::LayerDecorator::getType() const {
+    return getLayer()->getType();
+}
+const std::string& Builder::LayerDecorator::getName() const {
+    return getLayer()->getName();
+}
+
+Builder::Layer::Ptr& Builder::LayerDecorator::getLayer() {
+    if (!layer)
+        THROW_IE_EXCEPTION << "Cannot get Layer::Ptr!";
+    return layer;
+}
+
+const Builder::Layer::CPtr Builder::LayerDecorator::getLayer() const {
+    if (!cLayer) {
+        if (!layer)
+            THROW_IE_EXCEPTION << "Cannot get Layer::CPtr!";
+        return std::static_pointer_cast<const Layer>(layer);
+    }
+    return cLayer;
+}
+
+void Builder::LayerDecorator::checkType(const std::string& type) const {
+    if (!details::CaselessEq<std::string>()(getLayer()->getType(), type))
+        THROW_IE_EXCEPTION << "Cannot create " << type << " decorator for layer " << getLayer()->getType();
+}
diff --git a/inference-engine/src/inference_engine/builders/ie_layer_fragment.cpp b/inference-engine/src/inference_engine/builders/ie_layer_fragment.cpp
deleted file mode 100644
index 8cefe7857..000000000
--- a/inference-engine/src/inference_engine/builders/ie_layer_fragment.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <builders/ie_layer_fragment.hpp>
-
-#include <vector>
-#include <string>
-
-using namespace InferenceEngine;
-using namespace details;
-
-Builder::LayerFragment::LayerFragment(const std::string& type, const std::string& name): layer(type, name), refLayer(layer) {}
-
-Builder::LayerFragment::LayerFragment(Layer& genLayer): layer("", ""), refLayer(genLayer) {}
-
-Builder::LayerFragment &Builder::LayerFragment::operator=(const Builder::LayerFragment &rval) {
-    layer = rval.layer;
-    refLayer = rval.refLayer;
-    if (!layer.getType().empty() && !layer.getName().empty())
-        refLayer = layer;
-    return *this;
-}
-
-Builder::LayerFragment::LayerFragment(const Builder::LayerFragment & rval): LayerFragment("", "") {
-    *this = rval;
-}
-
-Builder::LayerFragment::operator Builder::Layer() const {
-    getLayer().validate();
-    return getLayer();
-}
-
-const std::string& Builder::LayerFragment::getType() const {
-    return getLayer().getType();
-}
-const std::string& Builder::LayerFragment::getName() const {
-    return getLayer().getName();
-}
-
-Builder::Layer& Builder::LayerFragment::getLayer() const {
-    return refLayer;
-}
-
-const std::vector<size_t> Builder::LayerFragment::uInts2size_t(const std::vector<unsigned int>& vector) const {
-    std::vector<size_t> newVector;
-    newVector.reserve(vector.size());
-    for (const auto& it : vector) {
-        newVector.push_back(it);
-    }
-    return newVector;
-}
diff --git a/inference-engine/src/inference_engine/builders/ie_lrn_layer.cpp b/inference-engine/src/inference_engine/builders/ie_lrn_layer.cpp
new file mode 100644
index 000000000..8bd20a79f
--- /dev/null
+++ b/inference-engine/src/inference_engine/builders/ie_lrn_layer.cpp
@@ -0,0 +1,105 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_lrn_layer.hpp>
+#include <ie_cnn_layer_builder.h>
+
+#include <string>
+
+using namespace InferenceEngine;
+
+Builder::LRNLayer::LRNLayer(const std::string& name): LayerDecorator("LRN", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
+    setSize(1);
+    setAlpha(1e-4);
+    setBeta(0.75f);
+    setBias(1.0f);
+}
+
+Builder::LRNLayer::LRNLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("LRN");
+}
+
+Builder::LRNLayer::LRNLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("LRN");
+}
+
+Builder::LRNLayer& Builder::LRNLayer::setName(const std::string& name) {
+    getLayer()->setName(name);
+    return *this;
+}
+
+const Port& Builder::LRNLayer::getPort() const {
+    return getLayer()->getOutputPorts()[0];
+}
+
+Builder::LRNLayer& Builder::LRNLayer::setPort(const Port &port) {
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
+    return *this;
+}
+
+size_t Builder::LRNLayer::getSize() const {
+    return getLayer()->getParameters().at("size");
+}
+
+Builder::LRNLayer& Builder::LRNLayer::setSize(size_t size) {
+    getLayer()->getParameters()["size"] = size;
+    return *this;
+}
+
+float Builder::LRNLayer::getAlpha() const {
+    return getLayer()->getParameters().at("alpha");
+}
+
+Builder::LRNLayer& Builder::LRNLayer::setAlpha(float alpha) {
+    getLayer()->getParameters()["alpha"] = alpha;
+    return *this;
+}
+
+float Builder::LRNLayer::getBeta() const {
+    return getLayer()->getParameters().at("beta");
+}
+
+Builder::LRNLayer& Builder::LRNLayer::setBeta(float beta) {
+    getLayer()->getParameters()["beta"] = beta;
+    return *this;
+}
+
+float Builder::LRNLayer::getBias() const {
+    return getLayer()->getParameters().at("bias");
+}
+
+Builder::LRNLayer& Builder::LRNLayer::setBias(float bias) {
+    getLayer()->getParameters()["bias"] = bias;
+    return *this;
+}
+
+REG_VALIDATOR_FOR(LRN, [](const Builder::Layer::CPtr &input_layer, bool partial) {
+    Builder::LRNLayer layer(input_layer);
+    if (layer.getAlpha() <= 0) {
+        THROW_IE_EXCEPTION << "Alpha should be > 0";
+    }
+    if (layer.getBeta() <= 0) {
+        THROW_IE_EXCEPTION << "Beta should be > 0";
+    }
+    if (layer.getSize() == 0) {
+        THROW_IE_EXCEPTION << "Size should be > 0";
+    }
+    if (!input_layer->getInputPorts().empty() &&
+        !input_layer->getOutputPorts().empty() &&
+        !input_layer->getInputPorts()[0].shape().empty() &&
+        !input_layer->getOutputPorts()[0].shape().empty() &&
+        input_layer->getInputPorts()[0].shape() != input_layer->getOutputPorts()[0].shape()) {
+        THROW_IE_EXCEPTION << "Input and output ports should be equal";
+    }
+});
+
+REG_CONVERTER_FOR(LRN, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["bias"] = cnnLayer->GetParamAsFloat("bias", 1.0f);
+    layer.getParameters()["beta"] = cnnLayer->GetParamAsFloat("beta", 0.75f);
+    layer.getParameters()["alpha"] = cnnLayer->GetParamAsFloat("alpha", 1e-4f);
+    layer.getParameters()["size"] = cnnLayer->GetParamAsUInt("size", 1);
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_lstm_sequence_layer.cpp b/inference-engine/src/inference_engine/builders/ie_lstm_sequence_layer.cpp
new file mode 100644
index 000000000..c856368ac
--- /dev/null
+++ b/inference-engine/src/inference_engine/builders/ie_lstm_sequence_layer.cpp
@@ -0,0 +1,127 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_lstm_sequence_layer.hpp>
+#include <ie_cnn_layer_builder.h>
+
+#include <vector>
+#include <string>
+
+using namespace InferenceEngine;
+
+Builder::LSTMSequenceLayer::LSTMSequenceLayer(const std::string& name): LayerDecorator("LSTMSequence", name) {
+    getLayer()->getOutputPorts().resize(3);
+    getLayer()->getInputPorts().resize(7);
+    getLayer()->getInputPorts()[1].setParameter("type", "weights");
+    getLayer()->getInputPorts()[2].setParameter("type", "biases");
+    getLayer()->getInputPorts()[3].setParameter("type", "optional");
+    getLayer()->getInputPorts()[6].setParameter("type", "weights");
+}
+
+Builder::LSTMSequenceLayer::LSTMSequenceLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("LSTMSequence");
+}
+
+Builder::LSTMSequenceLayer::LSTMSequenceLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("LSTMSequence");
+}
+
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setName(const std::string& name) {
+    getLayer()->setName(name);
+    return *this;
+}
+
+const std::vector<Port>& Builder::LSTMSequenceLayer::getInputPorts() const {
+    return getLayer()->getInputPorts();
+}
+
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setInputPorts(const std::vector<Port>& ports) {
+    getLayer()->getInputPorts() = ports;
+    return *this;
+}
+
+const std::vector<Port>& Builder::LSTMSequenceLayer::getOutputPorts() const {
+    return getLayer()->getOutputPorts();
+}
+
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setOutputPorts(const std::vector<Port>& ports) {
+    getLayer()->getOutputPorts() = ports;
+    return *this;
+}
+int Builder::LSTMSequenceLayer::getHiddenSize() const {
+    return getLayer()->getParameters().at("hidden_size");
+}
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setHiddenSize(int size) {
+    getLayer()->getParameters()["hidden_size"] = size;
+    return *this;
+}
+bool Builder::LSTMSequenceLayer::getSequenceDim() const {
+    return getLayer()->getParameters().at("sequence_dim");
+}
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setSqquenceDim(bool flag) {
+    getLayer()->getParameters()["sequence_dim"] = flag;
+    return *this;
+}
+const std::vector<std::string>& Builder::LSTMSequenceLayer::getActivations() const {
+    return getLayer()->getParameters().at("activations");
+}
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setActivations(const std::vector<std::string>& activations) {
+    getLayer()->getParameters()["activations"] = activations;
+    return *this;
+}
+const std::vector<float>& Builder::LSTMSequenceLayer::getActivationsAlpha() const {
+    return getLayer()->getParameters().at("activations_alpha");
+}
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setActivationsAlpha(const std::vector<float>& activations) {
+    getLayer()->getParameters()["activations_alpha"] = activations;
+    return *this;
+}
+const std::vector<float>& Builder::LSTMSequenceLayer::getActivationsBeta() const {
+    return getLayer()->getParameters().at("activations_beta");
+}
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setActivationsBeta(const std::vector<float>& activations) {
+    getLayer()->getParameters()["activations_beta"] = activations;
+    return *this;
+}
+float Builder::LSTMSequenceLayer::getClip() const {
+    return getLayer()->getParameters().at("clip");
+}
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setClip(float clip) {
+    getLayer()->getParameters()["clip"] = clip;
+    return *this;
+}
+
+bool Builder::LSTMSequenceLayer::getInputForget() const {
+    return getLayer()->getParameters().at("input_forget");
+}
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setInputForget(bool flag) {
+    getLayer()->getParameters()["input_forget"] = flag;
+    return *this;
+}
+const std::string& Builder::LSTMSequenceLayer::getDirection() const {
+    return getLayer()->getParameters().at("direction");
+}
+Builder::LSTMSequenceLayer& Builder::LSTMSequenceLayer::setDirection(const std::string& direction) {
+    getLayer()->getParameters()["direction"] = direction;
+    return *this;
+}
+
+REG_CONVERTER_FOR(LSTMSequence, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["hidden_size"] = cnnLayer->GetParamAsInt("hidden_size");
+    layer.getParameters()["sequence_dim"] = cnnLayer->GetParamsAsBool("sequence_dim", true);
+    std::vector<std::string> activations;
+    std::istringstream stream(cnnLayer->GetParamAsString("activations"));
+    std::string str;
+    while (getline(stream, str, ',')) {
+         activations.push_back(str);
+    }
+    layer.getParameters()["activations"] = activations;
+    layer.getParameters()["activations_alpha"] = cnnLayer->GetParamAsFloats("activations_alpha");
+    layer.getParameters()["activations_beta"] = cnnLayer->GetParamAsFloats("activations_beta");
+    layer.getParameters()["clip"] = cnnLayer->GetParamAsFloat("clip");
+    layer.getParameters()["input_forget"] = cnnLayer->GetParamsAsBool("input_forget", true);
+    layer.getParameters()["direction"] = cnnLayer->GetParamAsString("direction", "");
+});
+
+
diff --git a/inference-engine/src/inference_engine/builders/ie_memory_layer.cpp b/inference-engine/src/inference_engine/builders/ie_memory_layer.cpp
index f987b07be..39c0dbffa 100644
--- a/inference-engine/src/inference_engine/builders/ie_memory_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_memory_layer.cpp
@@ -1,70 +1,83 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_memory_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::MemoryLayer::MemoryLayer(const std::string& name): LayerFragment("Memory", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::MemoryLayer::MemoryLayer(const std::string& name): LayerDecorator("Memory", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
+    setSize(2);
 }
 
-Builder::MemoryLayer::MemoryLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Memory"))
-        THROW_IE_EXCEPTION << "Cannot create MemoryLayer decorator for layer " << getLayer().getType();
+Builder::MemoryLayer::MemoryLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Memory");
+}
+
+Builder::MemoryLayer::MemoryLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Memory");
 }
 
 Builder::MemoryLayer& Builder::MemoryLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::MemoryLayer::getInputPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 
 Builder::MemoryLayer& Builder::MemoryLayer::setInputPort(const Port &port) {
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 const Port& Builder::MemoryLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::MemoryLayer& Builder::MemoryLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 const std::string Builder::MemoryLayer::getId() const {
-    return getLayer().getParameters()["id"];
+    return getLayer()->getParameters().at("id");
 }
 Builder::MemoryLayer& Builder::MemoryLayer::setId(const std::string& id) {
-    getLayer().getParameters()["id"] = id;
+    getLayer()->getParameters()["id"] = id;
     return *this;
 }
 size_t Builder::MemoryLayer::getIndex() const {
-    return getLayer().getParameters()["index"].asUInt();
+    return getLayer()->getParameters().at("index");
 }
 Builder::MemoryLayer& Builder::MemoryLayer::setIndex(size_t index) {
     if (index > 1)
         THROW_IE_EXCEPTION << "Index supports only 0 and 1 values.";
-    getLayer().getParameters()["index"] = index;
+    getLayer()->getParameters()["index"] = index;
     return *this;
 }
 size_t Builder::MemoryLayer::getSize() const {
-    return getLayer().getParameters()["size"].asUInt(2);
+    return getLayer()->getParameters().at("size");
 }
 Builder::MemoryLayer& Builder::MemoryLayer::setSize(size_t size) {
     if (size != 2)
         THROW_IE_EXCEPTION << "Only size equal 2 is supported.";
-    getLayer().getParameters()["size"] = size;
+    getLayer()->getParameters()["size"] = size;
     return *this;
 }
+REG_VALIDATOR_FOR(Memory, [](const InferenceEngine::Builder::Layer::CPtr& layer, bool partial) {
+});
+
+REG_CONVERTER_FOR(Memory, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["id"] = cnnLayer->GetParamAsString("id", 0);
+    layer.getParameters()["index"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("index", 0));
+    layer.getParameters()["size"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("size", 0));
+});
+
diff --git a/inference-engine/src/inference_engine/builders/ie_mvn_layer.cpp b/inference-engine/src/inference_engine/builders/ie_mvn_layer.cpp
index 0211e9fd2..c81772dd5 100644
--- a/inference-engine/src/inference_engine/builders/ie_mvn_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_mvn_layer.cpp
@@ -1,60 +1,83 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_mvn_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::MVNLayer::MVNLayer(const std::string& name): LayerFragment("MVN", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::MVNLayer::MVNLayer(const std::string& name): LayerDecorator("MVN", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
     setEpsilon(9.999999717180685e-10f);
     setNormalize(true);
     setAcrossChannels(true);
 }
 
-Builder::MVNLayer::MVNLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "MVN"))
-        THROW_IE_EXCEPTION << "Cannot create MVNLayer decorator for layer " << getLayer().getType();
+Builder::MVNLayer::MVNLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("MVN");
+}
+
+Builder::MVNLayer::MVNLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("MVN");
 }
 
 Builder::MVNLayer& Builder::MVNLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::MVNLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::MVNLayer& Builder::MVNLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 bool Builder::MVNLayer::getAcrossChannels() const {
-    return getLayer().getParameters()["across_channels"].asBool(true);
+    return getLayer()->getParameters().at("across_channels");
 }
 Builder::MVNLayer& Builder::MVNLayer::setAcrossChannels(bool flag) {
-    getLayer().getParameters()["across_channels"] = flag ? 1 : 0;
+    getLayer()->getParameters()["across_channels"] = flag ? 1 : 0;
     return *this;
 }
 bool Builder::MVNLayer::getNormalize() const {
-    return getLayer().getParameters()["normalize_variance"].asBool(true);
+    return getLayer()->getParameters().at("normalize_variance");
 }
 Builder::MVNLayer& Builder::MVNLayer::setNormalize(bool flag) {
-    getLayer().getParameters()["normalize_variance"] = flag ? 1 : 0;
+    getLayer()->getParameters()["normalize_variance"] = flag ? 1 : 0;
     return *this;
 }
 float Builder::MVNLayer::getEpsilon() const {
-    return getLayer().getParameters()["eps"].asFloat();
+    return getLayer()->getParameters().at("eps");
 }
 Builder::MVNLayer& Builder::MVNLayer::setEpsilon(float eps) {
-    getLayer().getParameters()["eps"] = eps;
+    getLayer()->getParameters()["eps"] = eps;
     return *this;
 }
+
+REG_VALIDATOR_FOR(MVN, [](const Builder::Layer::CPtr& input_layer, bool partial) {
+    Builder::MVNLayer layer(input_layer);
+    if (layer.getEpsilon() <= 0) {
+        THROW_IE_EXCEPTION << "Epsilon should be > 0";
+    }
+    if (!input_layer->getInputPorts().empty() &&
+        !input_layer->getOutputPorts().empty() &&
+        !input_layer->getInputPorts()[0].shape().empty() &&
+        !input_layer->getOutputPorts()[0].shape().empty() &&
+        input_layer->getInputPorts()[0].shape() != input_layer->getOutputPorts()[0].shape()) {
+        THROW_IE_EXCEPTION << "Input and output ports should be equal";
+    }
+});
+
+REG_CONVERTER_FOR(MVN, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["across_channels"] = cnnLayer->GetParamsAsBool("across_channels", 0);
+    layer.getParameters()["normalize_variance"] = cnnLayer->GetParamsAsBool("normalize_variance", 0);
+    layer.getParameters()["eps"] = cnnLayer->GetParamAsFloat("eps", 0);
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_network_builder.cpp b/inference-engine/src/inference_engine/builders/ie_network_builder.cpp
index 70d3cded7..2899cfd95 100644
--- a/inference-engine/src/inference_engine/builders/ie_network_builder.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_network_builder.cpp
@@ -1,9 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <ie_builders.hpp>
-#include <ie_network.hpp>
 #include "graph_tools.hpp"
 
 #include <unordered_map>
@@ -33,28 +32,35 @@ Builder::Network::Network(const std::string &name): Builder::Network(Context(),
 Builder::Network::Network(const INetwork &network): Builder::Network(Context(), network) {}
 Builder::Network::Network(const ICNNNetwork &network): Builder::Network(Context(), network) {}
 
-Builder::Network::Network(const Context& ieContext, const std::string &name): ctx(ieContext), name(name), version(3) {}
+Builder::Network::Network(const Context& ieContext, const std::string &name) {
+    parameters["name"] = name;
+    parameters["context"] = ieContext;
+    parameters["version"] = 3;
+    parameters["layers"] = std::vector<Layer::Ptr>();
+    parameters["connections"] = std::vector<Connection>();
+}
 
-Builder::Network::Network(const Context& ieContext, const INetwork &network): ctx(ieContext), name(network.getName()), version(3) {
+Builder::Network::Network(const Context& ieContext, const INetwork &network): Network(ieContext, network.getName()) {
     for (const auto& layer : network) {
-        layers.push_back(Layer(layer));
+        parameters["layers"].as<std::vector<Layer::Ptr>>().push_back(std::make_shared<Layer>(layer));
         const auto layerConnections = network.getLayerConnections(layer->getId());
         for (const auto& connection : layerConnections) {
             bool found = false;
-            for (const auto& con : connections) {
+            for (const auto& con : parameters["connections"].as<std::vector<Connection>>()) {
                 if (con == connection) {
                     found = true;
                     break;
                 }
             }
             if (!found) {
-                connections.push_back(connection);
+                parameters["connections"].as<std::vector<Connection>>().push_back(connection);
             }
         }
     }
 }
 
-Builder::Network::Network(const Context& ieContext, const ICNNNetwork &network): ctx(ieContext), name(network.getName()), version(0) {
+Builder::Network::Network(const Context& ieContext, const ICNNNetwork &network): Network(ieContext, network.getName()) {
+    parameters["version"] = 0;
     auto allInputs = CNNNetGetAllInputLayers(network);
     InputsDataMap inputs;
     network.getInputsInfo(inputs);
@@ -66,7 +72,6 @@ Builder::Network::Network(const Context& ieContext, const ICNNNetwork &network):
     std::vector<CNNLayerPtr> queueLayers;
 
     auto createGenericFromCNNLayer = [&](const CNNLayerPtr& cnnLayer) {
-        std::vector<Port> inputPorts;
         for (const auto& data : cnnLayer->insData) {
             auto lockedData = data.lock();
             if (!lockedData)
@@ -74,155 +79,49 @@ Builder::Network::Network(const Context& ieContext, const ICNNNetwork &network):
             if (dataPtrs.find(lockedData.get()) == dataPtrs.end()) {
                 dataPtrs.insert(lockedData.get());
             }
-            inputPorts.emplace_back(lockedData->getTensorDesc().getDims());
         }
-        std::vector<Port> outputPorts;
         for (const auto& data : cnnLayer->outData) {
             if (dataPtrs.find(data.get()) == dataPtrs.end()) {
                 dataPtrs.insert(data.get());
             }
-            outputPorts.push_back(Port(data->getTensorDesc().getDims()));
         }
-
-        std::map<std::string, Parameter> params;
-        for (const auto& it : cnnLayer->params) {
-            params[it.first] = it.second;
+        std::map<std::string, Blob::Ptr> blobs = cnnLayer->blobs;
+        size_t inputsCount(0);
+        for (const auto& data : cnnLayer->insData) {
+            auto lockedData = data.lock();
+            if (!lockedData)
+                continue;
+            inputsCount++;
         }
-        const auto layer = Layer(cnnLayer->type, cnnLayer->name)
-                .setInputPorts(inputPorts).setOutputPorts(outputPorts)
-                .setParameters(params).setConstantData(cnnLayer->blobs);
+        const auto layer = builderFromCNNLayer(cnnLayer);
         idx_t layerId = addLayer(layer);
+
+        if (blobs.find("weights") != blobs.end()) {
+            idx_t constLayerId = addLayer(ConstLayer("weights").setData(blobs["weights"]));
+            connect({constLayerId}, {layerId, inputsCount++});
+        }
+        if (blobs.find("biases") != blobs.end()) {
+            if (blobs.find("weights") == blobs.end()) ++inputsCount;
+
+            idx_t constLayerId = addLayer(ConstLayer("biases").setData(blobs["biases"]));
+            connect({constLayerId}, {layerId, inputsCount++});
+        }
+        for (const auto& it : blobs) {
+            if (it.first == "weights" || it.first == "biases")
+                continue;
+            idx_t constLayerId = addLayer(ConstLayer(it.first).setData(it.second));
+            connect({constLayerId}, {layerId, inputsCount++});
+        }
         name2id[layer.getName()] = layerId;
         return layerId;
     };
 
     auto addPreProcessFor = [&](const InputInfo::Ptr& inputInfo) {
         auto inputLayer = getLayer(name2id[inputInfo->name()]);
-        if (inputLayer.getType().empty() && inputLayer.getName().empty())
+        if (inputLayer->getType().empty() && inputLayer->getName().empty())
             return;
 
-        ResizeAlgorithm alg = inputInfo->getPreProcess().getResizeAlgorithm();
-        std::string algStr;
-        switch (alg) {
-            case RESIZE_BILINEAR:
-                algStr = "RESIZE_BILINEAR";
-                break;
-            case RESIZE_AREA:
-                algStr = "RESIZE_AREA";
-                break;
-            default:
-                break;
-        }
-
-        if (!algStr.empty())
-            inputLayer.getParameters()["resize_alg"] = algStr;
-
-        switch (inputInfo->getPreProcess().getMeanVariant()) {
-            case MEAN_IMAGE: {
-                auto meanWidth = inputInfo->getPreProcess()[0]->meanData->dims()[0];
-                auto meanHeight = inputInfo->getPreProcess()[0]->meanData->dims()[1];
-
-                TensorDesc desc(Precision::FP32, inputLayer.getOutputPorts()[0].shape(), Layout::NCHW);
-                Blob::Ptr meanBuffer = make_blob_with_precision(desc);
-                meanBuffer->allocate();
-                auto *meanData = meanBuffer->buffer().as<float *>();
-                for (unsigned channel = 0; channel < inputInfo->getPreProcess().getNumberOfChannels(); channel++) {
-                    Blob::Ptr meanBlob = inputInfo->getPreProcess()[channel]->meanData;
-                    if (!meanBlob || meanBlob->precision() != Precision::FP32)
-                        THROW_IE_EXCEPTION << "mean image not provided or not in Float 32";
-                    if (meanBlob->size() != meanHeight*meanWidth) {
-                        THROW_IE_EXCEPTION << "mean image size does not match expected network input, expecting " << meanWidth << " x " << meanHeight;
-                    }
-                    ie_memcpy(meanData + channel*meanBlob->size(),
-                            meanBuffer->byteSize() - channel*meanBlob->size() * sizeof(float),
-                            meanBlob->buffer(),
-                            meanBlob->byteSize());
-                }
-
-                // WA for batch != 1
-                // Reshape for new batch is not supported for models with mean image
-                size_t noBatchSize = desc.getBlockingDesc().getStrides()[0];
-                for (size_t b = 1; b < inputLayer.getOutputPorts()[0].shape()[0]; b++) {
-                    ie_memcpy(meanData + noBatchSize*b,
-                              meanBuffer->byteSize() - noBatchSize * b * sizeof(float),
-                              meanData,
-                              noBatchSize * sizeof(float));
-                }
-
-                std::vector<PortInfo> outPorts;
-                std::vector<Connection> inputConnections = getLayerConnections(inputLayer.getId());
-                for (const auto& connection : inputConnections) {
-                    outPorts.push_back(connection.to());
-                    disconnect(connection);
-                }
-
-                idx_t constId = addLayer(Builder::ConstLayer(inputLayer.getName() + "_mean_image")
-                                                 .setPort(inputLayer.getOutputPorts()[0]).setData(meanBuffer));
-                idx_t constNegId = addLayer({{constId}}, Builder::PowerLayer(inputLayer.getName() + "_mean_image_neg")
-                                                 .setPort(inputLayer.getOutputPorts()[0]).setScale(-1));
-
-                idx_t eltwiseId = addLayer({{inputLayer.getId()}, {constNegId}},
-                        Builder::EltwiseLayer(inputLayer.getName() + "_mean_image_elt")
-                             .setInputPorts({inputLayer.getOutputPorts()[0], inputLayer.getOutputPorts()[0]})
-                             .setOutputPort(inputLayer.getOutputPorts()[0])
-                             .setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUM));
-
-                for (const auto& port : outPorts) {
-                    connect({eltwiseId}, port);
-                }
-            }
-                break;
-            case MEAN_VALUE: {
-                TensorDesc desc(Precision::FP32, {inputInfo->getPreProcess().getNumberOfChannels()}, Layout::C);
-                Blob::Ptr mean = make_blob_with_precision(desc);
-                mean->allocate();
-                Blob::Ptr scale = make_blob_with_precision(desc);
-                scale->allocate();
-                Blob::Ptr emptyScale = make_blob_with_precision(desc);
-                emptyScale->allocate();
-                auto *meanData = mean->buffer().as<float *>();
-                auto *scaleData = scale->buffer().as<float *>();
-                auto *emptyScaleData = emptyScale->buffer().as<float *>();
-                bool noMean = true;
-                bool noScale = true;
-                for (size_t i = 0; i < inputInfo->getPreProcess().getNumberOfChannels(); i++) {
-                    meanData[i] = -inputInfo->getPreProcess()[i]->meanValue;
-                    noMean = noMean && (meanData[i] == 0);
-                    scaleData[i] = inputInfo->getPreProcess()[i]->stdScale;
-                    emptyScaleData[i] = 1;
-                    noScale = noScale && (scaleData[i] == 1);
-                }
-                std::vector<PortInfo> outPorts;
-                std::vector<Connection> inputConnections = getLayerConnections(inputLayer.getId());
-                for (const auto& connection : inputConnections) {
-                    outPorts.push_back(connection.to());
-                    disconnect(connection);
-                }
-
-                idx_t meanId = inputLayer.getId();
-                if (!noMean) {
-                    meanId = addLayer({{inputLayer.getId()}},
-                                            Builder::ScaleShiftLayer(inputLayer.getName() + "_mean_value")
-                                                    .setPort(inputLayer.getOutputPorts()[0])
-                                                    .setBiases(mean).setWeights(emptyScale));
-                }
-
-                idx_t scaleId = meanId;
-                if (!noScale) {
-                    scaleId = addLayer({{meanId}},
-                                             Builder::ScaleShiftLayer(inputLayer.getName() + "_scale_value")
-                                                     .setPort(inputLayer.getOutputPorts()[0])
-                                                     .setWeights(scale));
-                }
-
-                for (const auto& port : outPorts) {
-                    connect({scaleId}, port);
-                }
-            }
-                break;
-            default:
-                break;
-        }
+        inputLayer->getParameters()["preProcess"] = inputInfo->getPreProcess();
     };
 
     for (auto input : inputs) {
@@ -300,10 +199,10 @@ Builder::Network::Network(const Context& ieContext, const ICNNNetwork &network):
             THROW_IE_EXCEPTION << "Cannot find output layer " << creator->name;
 
         auto lastLayer = getLayer(name2id[creator->name]);
-        if (lastLayer.getName() == "" && lastLayer.getType().empty())
+        if (lastLayer->getName() == "" && lastLayer->getType().empty())
             THROW_IE_EXCEPTION << "Cannot find output layer " << creator->name;
 
-        std::string name = "out_" + lastLayer.getName();
+        std::string name = "out_" + lastLayer->getName();
 
         CNNLayerPtr cnnOutLayer(new CNNLayer({name, "Output", creator->outData[0]->getPrecision()}));
         cnnOutLayer->insData.push_back((*it).second);
@@ -318,7 +217,7 @@ Builder::Network::Network(const Context& ieContext, const ICNNNetwork &network):
             }
         }
 
-        connections.push_back(Connection({lastLayer.getId(), inIdx}, {outLayerId}));
+        parameters["connections"].as<std::vector<Connection>>().push_back(Connection({lastLayer->getId(), inIdx}, {outLayerId}));
     }
 
     for (const auto dataPtr : dataPtrs) {
@@ -349,21 +248,21 @@ Builder::Network::Network(const Context& ieContext, const ICNNNetwork &network):
                     break;
                 }
             }
-            connections.push_back(Connection({name2id[cnnInputLayer->name], inIdx}, {name2id[it.second->name], outIdx}));
+            parameters["connections"].as<std::vector<Connection>>()
+                .push_back(Connection({name2id[cnnInputLayer->name], inIdx}, {name2id[it.second->name], outIdx}));
         }
     }
 
-    for (auto input : inputs) {
+    for (const auto &input : inputs) {
         addPreProcessFor(input.second);
     }
 }
 
-std::vector<Builder::Layer>& Builder::Network::getLayers() {
-    return layers;
+const std::vector<Builder::Layer::Ptr>& Builder::Network::getLayers() const {
+    return parameters.at("layers").as<std::vector<Layer::Ptr>>();
 }
-
-const std::vector<Builder::Layer>& Builder::Network::getLayers() const {
-    return layers;
+std::vector<Builder::Layer::Ptr>& Builder::Network::getLayers() {
+    return parameters["layers"].as<std::vector<Layer::Ptr>>();
 }
 
 idx_t Builder::Network::addLayer(const std::vector<PortInfo> &inputs,
@@ -380,10 +279,11 @@ idx_t Builder::Network::addLayer(const Layer& layer) {
         if (defaultId == (std::numeric_limits<idx_t>::max)())
             defaultId = 0;
 
-        auto it = layers.begin();
-        while (it != layers.end()) {
-            for (it = layers.begin(); it != layers.end(); it++) {
-                if (it->getId() == defaultId) {
+        auto it = parameters["layers"].as<std::vector<Layer::Ptr>>().begin();
+        while (it != parameters["layers"].as<std::vector<Layer::Ptr>>().end()) {
+            for (it = parameters["layers"].as<std::vector<Layer::Ptr>>().begin();
+                    it != parameters["layers"].as<std::vector<Layer::Ptr>>().end(); it++) {
+                if ((*it)->getId() == defaultId) {
                     defaultId++;
                     break;
                 }
@@ -399,8 +299,8 @@ idx_t Builder::Network::addLayer(const Layer& layer) {
         bool nameIsUnique(false);
         while (!nameIsUnique) {
             nameIsUnique = true;
-            for (const auto& layer : layers) {
-                if (generatedName == layer.getName()) {
+            for (const auto& layer : parameters["layers"].as<std::vector<Layer::Ptr>>()) {
+                if (generatedName == layer->getName()) {
                     nameIsUnique = false;
                     generatedName += "_" + idName;
                 }
@@ -410,83 +310,131 @@ idx_t Builder::Network::addLayer(const Layer& layer) {
     };
     idx_t generatedId = getAvailableId(layer.getId());
     const auto name = generateAvailableName(layer.getName(), generatedId);
-    layers.emplace_back(generatedId, layer);
-    layers[layers.size() - 1].getName() = name;
+    parameters["layers"].as<std::vector<Layer::Ptr>>().emplace_back(std::make_shared<Layer>(generatedId, layer));
+    parameters["layers"].as<std::vector<Layer::Ptr>>()[parameters["layers"].as<std::vector<Layer::Ptr>>().size() - 1]->setName(name);
     return generatedId;
 }
 
 void Builder::Network::connect(const PortInfo& input, const PortInfo& output) {
-    connections.emplace_back(input, output);
+    const auto mergePortData = [&]() -> bool {
+        const auto blobEqualOrEmpty = [](const Blob::Ptr& ref, const Blob::Ptr& test) -> bool {
+            return (ref->size() == test->size() || test->size() == 0) &&
+                   (!memcmp(ref->cbuffer(), test->cbuffer(), test->byteSize())) &&
+                   (ref->getTensorDesc().getPrecision() == test->getTensorDesc().getPrecision() ||
+                    test->getTensorDesc().getPrecision() == Precision::UNSPECIFIED) &&
+                   (ref->getTensorDesc().getLayout() == test->getTensorDesc().getLayout() ||
+                    test->getTensorDesc().getLayout() == Layout::ANY) &&
+                   (ref->getTensorDesc().getDims() == test->getTensorDesc().getDims() ||
+                    test->getTensorDesc().getDims().empty()) &&
+                   (ref->cbuffer().as<char *>() == test->cbuffer().as<char *>() ||
+                    test->cbuffer() == nullptr);
+        };
+
+        const auto srcPortData = getLayer(input.layerId())->getOutputPorts()[input.portId()].getData();
+        const auto dstPortData = getLayer(output.layerId())->getInputPorts()[output.portId()].getData();
+        if (srcPortData == dstPortData)
+            return true;
+
+        if (srcPortData->getParameters() != dstPortData->getParameters() &&
+                !srcPortData->getParameters().empty() &&
+                !dstPortData->getParameters().empty())
+            return false;
+
+        size_t srcDataCount(0), dstDataCount(0);
+        if (!srcPortData->getParameters().empty()) srcDataCount++;
+        if (!dstPortData->getParameters().empty()) dstDataCount++;
+
+        const auto srcBlb = srcPortData->getData();
+        const auto dstBlb = dstPortData->getData();
+        if (srcBlb == dstBlb || (srcBlb->size() == dstBlb->size() &&
+                srcBlb->getTensorDesc() == dstBlb->getTensorDesc() &&
+                ((srcBlb->cbuffer().as<char *>() == dstBlb->cbuffer().as<char *>()) ||
+                    (srcBlb->cbuffer() != nullptr && dstBlb->cbuffer() != nullptr &&
+                    !memcmp(srcBlb->cbuffer(), dstBlb->cbuffer(), dstBlb->byteSize()))))) {
+            srcDataCount++;
+            dstDataCount++;
+        } else if (blobEqualOrEmpty(srcBlb, dstBlb)) {
+            srcDataCount++;
+        } else if (blobEqualOrEmpty(dstBlb, srcBlb)) {
+            dstDataCount++;
+        } else {
+            return false;
+        }
+
+        if (dstDataCount > srcDataCount) {
+            // Change source and all src destination data
+            for (const auto& connection : getLayerConnections(input.layerId())) {
+                if (connection.from() != input)
+                    continue;
+                getLayer(connection.to().layerId())->getInputPorts()[connection.to().portId()].setData(dstPortData);
+            }
+            getLayer(input.layerId())->getOutputPorts()[input.portId()].setData(dstPortData);
+        } else {
+            // Change destination data
+            getLayer(output.layerId())->getInputPorts()[output.portId()].setData(srcPortData);
+        }
+
+        return true;
+    };
+
+    if (!mergePortData())
+        THROW_IE_EXCEPTION << "Cannot connect two ports with different data!";
+
+    parameters["connections"].as<std::vector<Connection>>().emplace_back(input, output);
 }
 
 void Builder::Network::removeLayer(idx_t layerId) {
-    auto it = layers.begin();
-    for (; it != layers.end(); it++) {
-        if (it->getId() == layerId) {
+    auto it = parameters["layers"].as<std::vector<Layer::Ptr>>().begin();
+    for (; it != parameters["layers"].as<std::vector<Layer::Ptr>>().end(); it++) {
+        if ((*it)->getId() == layerId) {
             break;
         }
     }
-    if (it != layers.end())
-        layers.erase(it);
+    if (it != parameters["layers"].as<std::vector<Layer::Ptr>>().end())
+        parameters["layers"].as<std::vector<Layer::Ptr>>().erase(it);
 }
 
 void Builder::Network::disconnect(const Connection& connection) {
-    auto it = connections.begin();
-    for (; it != connections.end(); it++) {
+    auto it = parameters["connections"].as<std::vector<Connection>>().begin();
+    for (; it != parameters["connections"].as<std::vector<Connection>>().end(); it++) {
         if (connection == *it)
             break;
     }
-    if (it != connections.end())
-        connections.erase(it);
-}
+    if (it != parameters["connections"].as<std::vector<Connection>>().end())
+        parameters["connections"].as<std::vector<Connection>>().erase(it);
 
-const INetwork::Ptr Builder::Network::build() const {
-    // Check that all ports are connected
-    for (const auto& layer : layers) {
-        std::vector<bool> existInCon(layer.getInputPorts().size());
-        std::vector<bool> existOutCon(layer.getOutputPorts().size());
-
-        const auto layerConnections = getLayerConnections(layer.getId());
-        for (const auto& connection : layerConnections) {
-            if (connection.from().layerId() == layer.getId()) {
-                existOutCon[connection.from().portId()] = true;
-                getLayer(connection.to().layerId());
-            }
-            if (connection.to().layerId() == layer.getId()) {
-                existInCon[connection.to().portId()] = true;
-                getLayer(connection.from().layerId());
-            }
-        }
-        bool allPortsConnected = true;
-        for (const auto& cons : {existInCon, existOutCon}) {
-            for (const auto &existCon : cons) {
-                allPortsConnected = allPortsConnected && existCon;
-            }
-        }
-        if (!allPortsConnected)
-            THROW_IE_EXCEPTION << "Not all ports of layer " << layer.getName() << " were connected!";
-    }
+    try {
+        auto layer = getLayer(connection.to().layerId());
+        layer->getInputPorts()[connection.to().portId()].setData(std::make_shared<PortData>());
+    } catch (InferenceEngine::details::InferenceEngineException& ex) {}
+}
 
-    InferenceEngine::details::Network::Ptr network = std::make_shared<InferenceEngine::details::Network>(ctx, name);
-    for (const auto& layer : layers) {
-        network->addLayer(layer.build());
-    }
-    for (const auto& connection : connections) {
-        network->addConnection(connection);
-    }
+const INetwork::CPtr Builder::Network::build() {
+    validate();
+    InferenceEngine::Builder::Network::Ptr network =
+            std::make_shared<InferenceEngine::Builder::Network>(static_cast<const INetwork&>(*this));
+    return network;
+}
 
+void Builder::Network::validate() {
     // Check that all ports are connected
-    for (const auto& layer : *network) {
+    for (const auto& layer : getLayers()) {
         std::vector<bool> existInCon(layer->getInputPorts().size());
+        for (size_t i = 0; i < layer->getInputPorts().size(); i++) {
+            if (layer->getInputPorts()[i].getParameters().find("type") != layer->getInputPorts()[i].getParameters().end())
+                existInCon[i] = true;
+        }
         std::vector<bool> existOutCon(layer->getOutputPorts().size());
 
-        const auto layerConnections = network->getLayerConnections(layer->getId());
+        const auto layerConnections = getLayerConnections(layer->getId());
         for (const auto& connection : layerConnections) {
             if (connection.from().layerId() == layer->getId()) {
                 existOutCon[connection.from().portId()] = true;
+                getLayer(connection.to().layerId());
             }
             if (connection.to().layerId() == layer->getId()) {
                 existInCon[connection.to().portId()] = true;
+                getLayer(connection.from().layerId());
             }
         }
         bool allPortsConnected = true;
@@ -499,25 +447,32 @@ const INetwork::Ptr Builder::Network::build() const {
             THROW_IE_EXCEPTION << "Not all ports of layer " << layer->getName() << " were connected!";
     }
 
+    // Check all layers
+    for (const auto& connection : getConnections()) {
+        if (!getLayer(connection.to().layerId()))
+            THROW_IE_EXCEPTION << "Cannot find layer with id: " << connection.to().layerId();
+        if (!getLayer(connection.from().layerId()))
+            THROW_IE_EXCEPTION << "Cannot find layer with id: " << connection.from().layerId();
+    }
+
     std::map<std::string, SizeVector> inputShapes;
-    for (const auto& input : network->getInputs())
+    for (const auto& input : getInputs())
         inputShapes[input->getName()] = input->getOutputPorts()[0].shape();
 
-    if (version) {
-        details::BaseCreator::version_ = version;
+    if (parameters.at("version").as<int>()) {
+        details::BaseCreator::version_ = parameters.at("version");
     }
 
-    ShapeInfer::Reshaper reshaper(ctx, network);
+    ShapeInfer::Reshaper reshaper(this);
     ResponseDesc resp;
     StatusCode sts = reshaper.run(inputShapes, &resp);
     // Not all implementations may be registered if all shapes were read from IR.
     if (sts == NOT_FOUND) {
         bool allShapesLooksGood = true;
-        for (const auto& connection : network->getConnections()) {
-            if (network->getLayer(connection.from().layerId())->
-                    getOutputPorts()[connection.from().portId()].shape() !=
-                network->getLayer(connection.to().layerId())->
-                        getInputPorts()[connection.to().portId()].shape()) {
+        for (const auto& connection : getConnections()) {
+            if (getLayer(connection.from().layerId())->getOutputPorts()[connection.from().portId()].shape() !=
+                getLayer(connection.to().layerId())->getInputPorts()[connection.to().portId()].shape() ||
+                getLayer(connection.to().layerId())->getInputPorts()[connection.to().portId()].shape().empty()) {
                 allShapesLooksGood = false;
                 break;
             }
@@ -529,30 +484,19 @@ const INetwork::Ptr Builder::Network::build() const {
     if (sts != OK)
         THROW_IE_EXCEPTION << resp.msg;
 
-    return std::static_pointer_cast<INetwork>(network);
-}
-
-const std::shared_ptr<ICNNNetwork> Builder::convertToICNNNetwork(const INetwork::Ptr& network) {
-    std::unique_ptr<details::CNNNetworkImpl> cnnNetworkImpl(new details::CNNNetworkImpl());
-
-    Precision detectedPrecision = Precision::FP32;
-    for (const auto& layer : *network) {
-        const auto& params = layer->getParameters();
-        if (!params)
-            continue;
-        Precision prc = Precision::UNSPECIFIED;
-        for (const auto& blobIterator : params->getConstantData()) {
-            if (blobIterator.second) {
-                prc = blobIterator.second->precision();
-                break;
-            }
-        }
-        if (prc != Precision::UNSPECIFIED) {
-            detectedPrecision = prc;
-            break;
+    // Check all parameters
+    for (const auto& layer : getLayers()) {
+        try {
+            layer->build();
+        } catch(InferenceEngine::details::InferenceEngineException& ex) {
+            THROW_IE_EXCEPTION << "Cannot build layer " << layer->getName() << ": " << ex.what();
+        } catch(std::bad_cast& ex) {
+            THROW_IE_EXCEPTION << "Cannot build layer " << layer->getName() << ": " << ex.what();
         }
     }
+}
 
+const std::shared_ptr<ICNNNetwork> Builder::convertToICNNNetwork(const INetwork::CPtr& network) {
     auto createCNNLayer = [](const std::shared_ptr<const ILayer>& layer, Precision precision) {
         static std::vector<std::shared_ptr<BaseConverter>> convertors = {
                 std::make_shared<LayerConverter<InferenceEngine::PowerLayer>>("Power"),
@@ -578,7 +522,9 @@ const std::shared_ptr<ICNNNetwork> Builder::convertToICNNNetwork(const INetwork:
                 std::make_shared<LayerConverter<InferenceEngine::ReshapeLayer>>("Reshape"),
                 std::make_shared<LayerConverter<InferenceEngine::ReshapeLayer>>("Flatten"),
                 std::make_shared<LayerConverter<InferenceEngine::TileLayer>>("Tile"),
+                std::make_shared<LayerConverter<InferenceEngine::PadLayer>>("Pad"),
                 std::make_shared<ActivationConverter>(),
+                std::make_shared<RNNSequenceConverter>(),
                 std::make_shared<LayerConverter<InferenceEngine::BatchNormalizationLayer>>("BatchNormalization"),
         };
         for (auto &convertor : convertors) {
@@ -590,11 +536,65 @@ const std::shared_ptr<ICNNNetwork> Builder::convertToICNNNetwork(const INetwork:
         return genericCreator.createLayer(layer, precision);
     };
 
+    auto keep_input_info = [](std::unique_ptr<details::CNNNetworkImpl>& network, DataPtr &in_data,
+            PreProcessInfo preProc) {
+        InputInfo::Ptr info(new InputInfo());
+        info->getPreProcess() = preProc;
+        info->setInputData(in_data);
+        Precision prc = info->getInputPrecision();
+
+        // Convert precision into native format (keep element size)
+        prc = prc == Precision::Q78 ? Precision::I16 :
+              prc == Precision::FP16 ? Precision::FP32 :
+              static_cast<Precision::ePrecision>(prc);
+
+        info->setInputPrecision(prc);
+        network->setInputInfo(info);
+    };
+
+    std::unique_ptr<details::CNNNetworkImpl> cnnNetworkImpl(new details::CNNNetworkImpl());
+
+    Precision detectedPrecision = Precision::UNSPECIFIED;
+    for (const auto& layer : *network) {
+        for (const auto& port : layer->getInputPorts()) {
+            Precision prc = port.getData()->getData()->getTensorDesc().getPrecision();
+            if (prc != Precision::UNSPECIFIED) {
+                detectedPrecision = prc;
+                break;
+            }
+        }
+        for (const auto& port : layer->getOutputPorts()) {
+            Precision prc = port.getData()->getData()->getTensorDesc().getPrecision();
+            if (prc != Precision::UNSPECIFIED) {
+                detectedPrecision = prc;
+                break;
+            }
+        }
+        if (detectedPrecision != Precision::UNSPECIFIED)
+            break;
+    }
+    if (detectedPrecision == Precision::UNSPECIFIED)
+        detectedPrecision = Precision::FP32;
+
+    details::CaselessEq<std::string> eq;
     cnnNetworkImpl->setName(network->getName());
     cnnNetworkImpl->setPrecision(Precision::UNSPECIFIED);
     for (const auto& layer : *network) {
-        if (details::CaselessEq<std::string>()(layer->getType(), "Output"))
+        bool isInternalLayer = eq(layer->getType(), "Const");
+        for (const auto& connection : network->getLayerConnections(layer->getId())) {
+            if (!isInternalLayer)
+                break;
+            if (connection.from().layerId() != layer->getId())
+                continue;
+            const auto& port = network->getLayer(connection.to().layerId())->getInputPorts()[connection.to().portId()];
+            isInternalLayer = isInternalLayer &&
+                    port.getParameters().find("type") != port.getParameters().end();
+        }
+        isInternalLayer = isInternalLayer || eq(layer->getType(), "Output");
+
+        if (isInternalLayer)
             continue;
+
         CNNLayerPtr cnnLayer = createCNNLayer(layer, detectedPrecision);
         if (cnnNetworkImpl->getPrecision() == Precision::UNSPECIFIED) {
             cnnNetworkImpl->setPrecision(cnnLayer->precision);
@@ -606,10 +606,13 @@ const std::shared_ptr<ICNNNetwork> Builder::convertToICNNNetwork(const INetwork:
         auto connections = network->getLayerConnections(layer->getId());
         std::unordered_set<idx_t> inputNum, outputNum;
         for (const auto& connection : connections) {
-            if (connection.from().layerId() != layer->getId())
-                inputNum.insert(connection.to().portId());
-            else
+            if (connection.from().layerId() != layer->getId()) {
+                const auto& port = layer->getInputPorts()[connection.to().portId()];
+                if (port.getParameters().find("type") == port.getParameters().end())
+                    inputNum.insert(connection.to().portId());
+            } else {
                 outputNum.insert(connection.from().portId());
+            }
         }
         cnnLayer->insData.resize(inputNum.size());
         cnnLayer->outData.resize(outputNum.size());
@@ -620,8 +623,8 @@ const std::shared_ptr<ICNNNetwork> Builder::convertToICNNNetwork(const INetwork:
         auto connections = network->getLayerConnections(layer->getId());
         CNNLayerPtr cnnLayer;
         StatusCode sts = cnnNetworkImpl->getLayerByName(layer->getName().c_str(), cnnLayer, nullptr);
-        details::CaselessEq<std::string> eq;
-        if (sts != OK && eq(layer->getType(), "Output"))
+
+        if (sts != OK && (eq(layer->getType(), "Output") || eq(layer->getType(), "Const")))
             continue;
         else if (sts != OK)
             THROW_IE_EXCEPTION << "Cannot find CNNLayer by name " << layer->getName();
@@ -634,24 +637,31 @@ const std::shared_ptr<ICNNNetwork> Builder::convertToICNNNetwork(const INetwork:
 
             CNNLayerPtr cnnOutLayer;
             sts = cnnNetworkImpl->getLayerByName(outLayer->getName().c_str(), cnnOutLayer, nullptr);
-            if (sts != OK && !eq(outLayer->getType(), "Output"))
+            if (sts != OK && !eq(outLayer->getType(), "Output") && !eq(layer->getType(), "Const"))
                 THROW_IE_EXCEPTION << "Cannot find CNNLayer by name " << outLayer->getName();
 
             std::string dataName = layer->getName();
             if (cnnLayer->outData.size() > 1) {
-                dataName += "_" + std::to_string(connection.from().portId());
+                dataName += "." + std::to_string(connection.from().portId());
             }
             DataPtr& data = cnnNetworkImpl->getData(dataName);
             if (!data) {
                 TensorDesc dataDesc(detectedPrecision, layer->getOutputPorts()[connection.from().portId()].shape(),
                                     TensorDesc::getLayoutByDims(layer->getOutputPorts()[connection.from().portId()].shape()));
-                data = std::make_shared<Data>(layer->getName(), dataDesc);
+                data = std::make_shared<Data>(dataName, dataDesc);
                 data->creatorLayer = cnnLayer;
             }
             cnnLayer->outData[connection.from().portId()] = data;
+
+            idx_t realPortId(0);
+            const auto inputPorts = outLayer->getInputPorts();
+            for (size_t i = 0; i < connection.to().portId() && i < inputPorts.size(); i++) {
+                if (inputPorts[i].getParameters().find("type") == inputPorts[i].getParameters().end())
+                    realPortId++;
+            }
             if (cnnOutLayer) {
                 data->inputTo[outLayer->getName()] = cnnOutLayer;
-                cnnOutLayer->insData[connection.to().portId()] = data;
+                cnnOutLayer->insData[realPortId] = data;
             } else {
                 cnnNetworkImpl->addOutput(data->getName());
             }
@@ -659,38 +669,161 @@ const std::shared_ptr<ICNNNetwork> Builder::convertToICNNNetwork(const INetwork:
 
         cnnLayer->validateLayer();
         if (eq(cnnLayer->type, "Input")) {
-            InputInfo::Ptr inputInfo(new InputInfo());
-            inputInfo->setInputData(*cnnLayer->outData.begin());
-            cnnNetworkImpl->setInputInfo(inputInfo);
+            PreProcessInfo preProc;
+            if (layer->getParameters().find("preProcess") != layer->getParameters().end())
+                preProc = layer->getParameters().at("preProcess");
+            keep_input_info(cnnNetworkImpl, *cnnLayer->outData.begin(), preProc);
+        }
+    }
+
+    // Set default output precision to FP32 (for back-compatibility)
+    OutputsDataMap outputsInfo;
+    cnnNetworkImpl->getOutputsInfo(outputsInfo);
+    for (auto outputInfo : outputsInfo) {
+        if (outputInfo.second->getPrecision() != Precision::FP32 &&
+            outputInfo.second->getPrecision() != Precision::I32) {
+            outputInfo.second->setPrecision(Precision::FP32);
         }
     }
 
     return std::shared_ptr<ICNNNetwork>(cnnNetworkImpl.release());
 }
 
-Builder::Network::operator const INetwork::Ptr() const {
+Builder::Network::operator const INetwork::CPtr() {
     return build();
 }
 
-const Builder::Layer &Builder::Network::getLayer(idx_t layerId) const {
+const ILayer::CPtr Builder::Network::getLayer(idx_t layerId) const noexcept {
+    try {
+        for (auto& layer : getLayers()) {
+            if (layer->getId() == layerId)
+                return layer->build();
+        }
+    } catch(...) {}
+
+    return nullptr;
+}
+
+Builder::Layer::Ptr Builder::Network::getLayer(idx_t layerId) {
     for (auto& layer : getLayers()) {
-        if (layer.getId() == layerId)
+        if (layer->getId() == layerId)
             return layer;
     }
     THROW_IE_EXCEPTION << "Cannot find layer with id: " << layerId;
 }
 
-Builder::Layer &Builder::Network::getLayer(idx_t layerId) {
-    for (auto& layer : getLayers()) {
-        if (layer.getId() == layerId)
-            return layer;
+const std::string& Builder::Network::getName() const noexcept {
+    return parameters.at("name");
+}
+
+const Context& Builder::Network::getContext() const noexcept {
+    return parameters.at("context");
+}
+
+Context& Builder::Network::getContext() noexcept {
+    return parameters.at("context");
+}
+
+Builder::Network::const_iterator Builder::Network::begin() const noexcept {
+    try {
+        return Network::const_iterator(this);
+    } catch (...) {
+        return Network::const_iterator(this, true);
     }
-    THROW_IE_EXCEPTION << "Cannot find layer with id: " << layerId;
+}
+
+
+Builder::Network::const_iterator Builder::Network::end() const noexcept {
+    return Network::const_iterator(this, true);
+}
+
+size_t Builder::Network::size() const noexcept {
+    return static_cast<size_t>(std::distance(std::begin(*this), std::end(*this)));
+}
+
+Builder::Network::iterator Builder::Network::begin() {
+    return Network::iterator(this);
+}
+
+Builder::Network::iterator Builder::Network::end() {
+    return Network::iterator(this, true);
+}
+
+const std::vector<ILayer::CPtr> Builder::Network::getInputs() const noexcept {
+    std::vector<ILayer::CPtr> inputs;
+    for (const auto& layer : parameters.at("layers").as<std::vector<Layer::Ptr>>()) {
+        bool isInputLayer = true;
+        for (const auto& connection : getLayerConnections(layer->getId())) {
+            if (connection.to().layerId() == layer->getId()) {
+                isInputLayer = false;
+                break;
+            }
+        }
+        if (isInputLayer) {
+            inputs.push_back(layer->build());
+        }
+    }
+    return inputs;
+}
+
+std::vector<Builder::Layer::Ptr> Builder::Network::getInputs() {
+    std::vector<Builder::Layer::Ptr> inputs;
+    for (auto& layer : parameters.at("layers").as<std::vector<Layer::Ptr>>()) {
+        bool isInputLayer = true;
+        for (const auto& connection : getLayerConnections(layer->getId())) {
+            if (connection.to().layerId() == layer->getId()) {
+                isInputLayer = false;
+                break;
+            }
+        }
+        if (isInputLayer) {
+            inputs.push_back(layer);
+        }
+    }
+    return inputs;
+}
+
+const std::vector<ILayer::CPtr> Builder::Network::getOutputs() const noexcept {
+    std::vector<ILayer::CPtr> outputs;
+    for (const auto& layer : parameters.at("layers").as<std::vector<Layer::Ptr>>()) {
+        bool isOutputLayer = true;
+        for (const auto& connection : getLayerConnections(layer->getId())) {
+            if (connection.from().layerId() == layer->getId()) {
+                isOutputLayer = false;
+                break;
+            }
+        }
+        if (isOutputLayer) {
+            outputs.push_back(layer->build());
+        }
+    }
+    return outputs;
+}
+
+std::vector<Builder::Layer::Ptr> Builder::Network::getOutputs() {
+    std::vector<Builder::Layer::Ptr> outputs;
+    for (auto& layer : parameters.at("layers").as<std::vector<Layer::Ptr>>()) {
+        bool isOutputLayer = true;
+        for (const auto& connection : getLayerConnections(layer->getId())) {
+            if (connection.from().layerId() == layer->getId()) {
+                isOutputLayer = false;
+                break;
+            }
+        }
+        if (isOutputLayer) {
+            outputs.push_back(layer);
+        }
+    }
+    return outputs;
+}
+
+const std::vector<Connection>& Builder::Network::getConnections() const {
+    return parameters.at("connections").as<std::vector<Connection>>();
 }
 
 const std::vector<Connection> Builder::Network::getLayerConnections(idx_t layerId) const noexcept {
     std::vector<Connection> layerConnections;
-    for (const auto connection : connections) {
+    for (const auto connection : parameters.at("connections").as<std::vector<Connection>>()) {
         if (connection.from().layerId() == layerId || connection.to().layerId() == layerId)
             layerConnections.push_back(connection);
     }
diff --git a/inference-engine/src/inference_engine/builders/ie_norm_layer.cpp b/inference-engine/src/inference_engine/builders/ie_norm_layer.cpp
index cb6d47b1c..16a2b2d96 100644
--- a/inference-engine/src/inference_engine/builders/ie_norm_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_norm_layer.cpp
@@ -1,77 +1,80 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_norm_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::NormLayer::NormLayer(const std::string& name): LayerFragment("Norm", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::NormLayer::NormLayer(const std::string& name): LayerDecorator("Norm", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
     setAcrossMaps(false);
     setSize(0);
     setAlpha(0);
     setBeta(0);
 }
 
-Builder::NormLayer::NormLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Norm"))
-        THROW_IE_EXCEPTION << "Cannot create NormLayer decorator for layer " << getLayer().getType();
+Builder::NormLayer::NormLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Norm");
+}
+
+Builder::NormLayer::NormLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Norm");
 }
 
 Builder::NormLayer& Builder::NormLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::NormLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::NormLayer& Builder::NormLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 size_t Builder::NormLayer::getSize() const {
-    return getLayer().getParameters()["local-size"].asUInt();
+    return getLayer()->getParameters().at("local-size");
 }
 
 Builder::NormLayer& Builder::NormLayer::setSize(size_t size) {
-    getLayer().getParameters()["local-size"] = size;
+    getLayer()->getParameters()["local-size"] = size;
     return *this;
 }
 
 float Builder::NormLayer::getAlpha() const {
-    return getLayer().getParameters()["alpha"].asFloat();
+    return getLayer()->getParameters().at("alpha");
 }
 
 Builder::NormLayer& Builder::NormLayer::setAlpha(float alpha) {
-    getLayer().getParameters()["alpha"] = alpha;
+    getLayer()->getParameters()["alpha"] = alpha;
     return *this;
 }
 
 float Builder::NormLayer::getBeta() const {
-    return getLayer().getParameters()["beta"].asFloat();
+    return getLayer()->getParameters().at("beta");
 }
 
 Builder::NormLayer& Builder::NormLayer::setBeta(float beta) {
-    getLayer().getParameters()["beta"] = beta;
+    getLayer()->getParameters()["beta"] = beta;
     return *this;
 }
 
 bool Builder::NormLayer::getAcrossMaps() const {
-    return getLayer().getParameters()["region"].asString() == "across";
+    return getLayer()->getParameters().at("region").as<std::string>() == "across";
 }
 
 Builder::NormLayer& Builder::NormLayer::setAcrossMaps(bool acrossMap)  {
     std::string value = acrossMap ? "across" : "same";
-    getLayer().getParameters()["region"] = value;
+    getLayer()->getParameters()["region"] = value;
     return *this;
 }
 
@@ -83,3 +86,29 @@ Builder::NormLayer& Builder::NormLayer::setRegion(Builder::NormLayer::NormType t
     setAcrossMaps(type);
     return *this;
 }
+
+REG_VALIDATOR_FOR(Norm, [] (const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    Builder::NormLayer layer(input_layer);
+    if (layer.getAlpha() <= 0) {
+        THROW_IE_EXCEPTION << "Alpha should be > 0";
+    }
+    if (layer.getBeta() <= 0) {
+        THROW_IE_EXCEPTION << "Beta should be > 0";
+    }
+    if (layer.getSize() == 0) {
+        THROW_IE_EXCEPTION << "Size should be > 0";
+    }
+    if (!input_layer->getInputPorts().empty() &&
+        !input_layer->getOutputPorts().empty() &&
+        !input_layer->getInputPorts()[0].shape().empty() &&
+        !input_layer->getOutputPorts()[0].shape().empty() &&
+        input_layer->getInputPorts()[0].shape() != input_layer->getOutputPorts()[0].shape()) {
+        THROW_IE_EXCEPTION << "Input and output ports should be equal";
+    }
+});
+
+REG_CONVERTER_FOR(Norm, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["local-size"] = (size_t)cnnLayer->GetParamAsUInt("local-size", 0);
+    layer.getParameters()["alpha"] = cnnLayer->GetParamAsFloat("alpha", 0);
+    layer.getParameters()["beta"] = cnnLayer->GetParamAsFloat("beta", 0);
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_normalize_layer.cpp b/inference-engine/src/inference_engine/builders/ie_normalize_layer.cpp
index 699993f07..faa54dc82 100644
--- a/inference-engine/src/inference_engine/builders/ie_normalize_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_normalize_layer.cpp
@@ -1,65 +1,89 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_normalize_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::NormalizeLayer::NormalizeLayer(const std::string& name): LayerFragment("Normalize", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::NormalizeLayer::NormalizeLayer(const std::string& name): LayerDecorator("Normalize", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
     setAcrossMaps(false);
     setChannelShared(false);
     setEpsilon(0.0000001f);
 }
 
-Builder::NormalizeLayer::NormalizeLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Normalize"))
-        THROW_IE_EXCEPTION << "Cannot create NormalizeLayer decorator for layer " << getLayer().getType();
+Builder::NormalizeLayer::NormalizeLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Normalize");
+}
+
+Builder::NormalizeLayer::NormalizeLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Normalize");
 }
 
 Builder::NormalizeLayer& Builder::NormalizeLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::NormalizeLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::NormalizeLayer& Builder::NormalizeLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 bool Builder::NormalizeLayer::getAcrossMaps() const {
-    return getLayer().getParameters()["region"].asBool();
+    return getLayer()->getParameters().at("region");
 }
 
 Builder::NormalizeLayer& Builder::NormalizeLayer::setAcrossMaps(bool acrossMap)  {
-    getLayer().getParameters()["region"] = acrossMap ? 1 : 0;
+    getLayer()->getParameters()["region"] = acrossMap ? 1 : 0;
     return *this;
 }
 
 bool Builder::NormalizeLayer::getChannelShared() const {
-    return getLayer().getParameters()["channel_shared"].asBool();
+    return getLayer()->getParameters().at("channel_shared");
 }
 
 Builder::NormalizeLayer& Builder::NormalizeLayer::setChannelShared(bool channelShared)  {
-    getLayer().getParameters()["channel_shared"] = channelShared ? 1 : 0;
+    getLayer()->getParameters()["channel_shared"] = channelShared ? 1 : 0;
     return *this;
 }
 
 float Builder::NormalizeLayer::getEpsilon() const {
-    return getLayer().getParameters()["eps"].asFloat();
+    return getLayer()->getParameters().at("eps");
 }
 
 Builder::NormalizeLayer& Builder::NormalizeLayer::setEpsilon(float eps) {
-    getLayer().getParameters()["eps"] = eps;
+    getLayer()->getParameters()["eps"] = eps;
     return *this;
 }
+
+REG_VALIDATOR_FOR(Normalize, [] (const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    Builder::NormalizeLayer layer(input_layer);
+    if (layer.getEpsilon() <= 0) {
+        THROW_IE_EXCEPTION << "Epsilon should be > 0";
+    }
+    if (!input_layer->getInputPorts().empty() &&
+        !input_layer->getOutputPorts().empty() &&
+        !input_layer->getInputPorts()[0].shape().empty() &&
+        !input_layer->getOutputPorts()[0].shape().empty() &&
+        input_layer->getInputPorts()[0].shape() != input_layer->getOutputPorts()[0].shape()) {
+        THROW_IE_EXCEPTION << "Input and output ports should be equal";
+    }
+});
+
+REG_CONVERTER_FOR(Normalize, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["region"] = cnnLayer->GetParamsAsBool("region", 0);
+    layer.getParameters()["channel_shared"] = cnnLayer->GetParamsAsBool("channel_shared", 0);
+    layer.getParameters()["eps"] = cnnLayer->GetParamAsFloat("eps", 0);
+});
+
diff --git a/inference-engine/src/inference_engine/builders/ie_output_layer_layer.cpp b/inference-engine/src/inference_engine/builders/ie_output_layer_layer.cpp
index 88dfcf151..9bca83a6c 100644
--- a/inference-engine/src/inference_engine/builders/ie_output_layer_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_output_layer_layer.cpp
@@ -1,33 +1,37 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_output_layer.hpp>
-#include <details/caseless.hpp>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::OutputLayer::OutputLayer(const std::string& name): LayerFragment("Output", name) {
-    getLayer().getInputPorts().resize(1);
+Builder::OutputLayer::OutputLayer(const std::string& name): LayerDecorator("Output", name) {
+    getLayer()->getInputPorts().resize(1);
 }
 
-Builder::OutputLayer::OutputLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Output"))
-        THROW_IE_EXCEPTION << "Cannot create OutputLayer decorator for layer " << getLayer().getType();
+Builder::OutputLayer::OutputLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Output");
+}
+
+Builder::OutputLayer::OutputLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Output");
 }
 
 Builder::OutputLayer& Builder::OutputLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::OutputLayer::getPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 
 Builder::OutputLayer& Builder::OutputLayer::setPort(const Port &port) {
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
+
+REG_VALIDATOR_FOR(Output, [] (const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {});
diff --git a/inference-engine/src/inference_engine/builders/ie_permute_layer.cpp b/inference-engine/src/inference_engine/builders/ie_permute_layer.cpp
index 2cfa879f9..65df2c5ae 100644
--- a/inference-engine/src/inference_engine/builders/ie_permute_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_permute_layer.cpp
@@ -1,52 +1,63 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_permute_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 #include <vector>
 
 using namespace InferenceEngine;
 
-Builder::PermuteLayer::PermuteLayer(const std::string& name): LayerFragment("Permute", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::PermuteLayer::PermuteLayer(const std::string& name): LayerDecorator("Permute", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
 }
 
-Builder::PermuteLayer::PermuteLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Permute"))
-        THROW_IE_EXCEPTION << "Cannot create PermuteLayer decorator for layer " << getLayer().getType();
+Builder::PermuteLayer::PermuteLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Permute");
+}
+
+Builder::PermuteLayer::PermuteLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Permute");
 }
 
 Builder::PermuteLayer& Builder::PermuteLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::PermuteLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::PermuteLayer& Builder::PermuteLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 const Port& Builder::PermuteLayer::getInputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::PermuteLayer& Builder::PermuteLayer::setInputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 const std::vector<size_t> Builder::PermuteLayer::getOrder() const {
-    return uInts2size_t(getLayer().getParameters()["order"].asUInts());
+    return getLayer()->getParameters().at("order");
 }
 Builder::PermuteLayer& Builder::PermuteLayer::setOrder(const std::vector<size_t>& ratios) {
-    getLayer().getParameters()["order"] = ratios;
+    getLayer()->getParameters()["order"] = ratios;
     return *this;
 }
+
+REG_CONVERTER_FOR(Permute, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    std::vector<unsigned int> tmp = cnnLayer->GetParamAsUInts("order");
+    layer.getParameters()["order"] = std::vector<size_t>(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        layer.getParameters()["order"].as<std::vector<size_t>>()[i] = static_cast<size_t>(tmp[i]);
+    }
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_pooling_layer.cpp b/inference-engine/src/inference_engine/builders/ie_pooling_layer.cpp
index 41db6c8f9..67bbcc552 100644
--- a/inference-engine/src/inference_engine/builders/ie_pooling_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_pooling_layer.cpp
@@ -1,42 +1,63 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_pooling_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::PoolingLayer::PoolingLayer(const std::string& name): LayerFragment("Pooling", name) {
-    getLayer().getInputPorts().resize(1);
-    getLayer().getOutputPorts().resize(1);
+Builder::PoolingLayer::PoolingLayer(const std::string& name): LayerDecorator("Pooling", name) {
+    getLayer()->getInputPorts().resize(1);
+    getLayer()->getOutputPorts().resize(1);
+    setKernel({});
+    setStrides({});
+    setPaddingsEnd({});
+    setPaddingsBegin({});
     setExcludePad(false);
     setPoolingType(PoolingType::MAX);
     setRoundingType(RoundingType::CEIL);
 }
 
-Builder::PoolingLayer::PoolingLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Pooling"))
-        THROW_IE_EXCEPTION << "Cannot create PoolingLayer decorator for layer " << getLayer().getType();
+Builder::PoolingLayer::PoolingLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Pooling");
 
-    std::string typeStr = getLayer().getParameters()["pool-method"].asString("max");
+    std::string typeStr = getLayer()->getParameters()["pool-method"];
     if (typeStr == "max")
         type = MAX;
     else if (typeStr == "avg")
         type = AVG;
 
-    typeStr = getLayer().getParameters()["rounding_type"].asString("ceil");
-    if (typeStr == "ceil")
+    std::string roundTypeStr = getLayer()->getParameters()["rounding_type"];
+    if (roundTypeStr == "ceil")
         roundingType = CEIL;
+    else if (roundTypeStr == "avg")
+        roundingType = FLOOR;
+}
+
+Builder::PoolingLayer::PoolingLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Pooling");
+
+    const auto cLayer = static_cast<const PoolingLayer*>(this)->getLayer();
+
+    std::string typeStr = cLayer->getParameters().at("pool-method");
+    if (typeStr == "max")
+        type = MAX;
     else if (typeStr == "avg")
+        type = AVG;
+
+    std::string roundTypeStr = cLayer->getParameters().at("rounding_type");
+    if (roundTypeStr == "ceil")
+        roundingType = CEIL;
+    else if (roundTypeStr == "avg")
         roundingType = FLOOR;
 }
 
 Builder::PoolingLayer::operator Builder::Layer() const {
-    Layer genLayer(getLayer());
+    Layer genLayer(*getLayer());
 
     std::vector<size_t> l_kernel = getKernel();
     std::vector<size_t> l_paddingBegin = getPaddingsBegin();
@@ -61,57 +82,57 @@ Builder::PoolingLayer::operator Builder::Layer() const {
 }
 
 Builder::PoolingLayer &Builder::PoolingLayer::setName(const std::string &name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::PoolingLayer::getInputPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 
 Builder::PoolingLayer& Builder::PoolingLayer::setInputPort(const Port& port) {
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 const Port& Builder::PoolingLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::PoolingLayer& Builder::PoolingLayer::setOutputPort(const Port& port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 const std::vector<size_t> Builder::PoolingLayer::getKernel() const {
-    return uInts2size_t(getLayer().getParameters()["kernel"].asUInts({}));
+    return getLayer()->getParameters().at("kernel");
 }
 Builder::PoolingLayer& Builder::PoolingLayer::setKernel(const std::vector<size_t>& kernel) {
-    getLayer().getParameters()["kernel"] = kernel;
+    getLayer()->getParameters()["kernel"] = kernel;
     return *this;
 }
 
 const std::vector<size_t> Builder::PoolingLayer::getStrides() const {
-    return uInts2size_t(getLayer().getParameters()["strides"].asUInts({}));
+    return getLayer()->getParameters().at("strides");
 }
 Builder::PoolingLayer& Builder::PoolingLayer::setStrides(const std::vector<size_t>& strides) {
-    getLayer().getParameters()["strides"] = strides;
+    getLayer()->getParameters()["strides"] = strides;
     return *this;
 }
 
 const std::vector<size_t> Builder::PoolingLayer::getPaddingsBegin() const {
-    return uInts2size_t(getLayer().getParameters()["pads_begin"].asUInts({}));
+    return getLayer()->getParameters().at("pads_begin");
 }
 Builder::PoolingLayer& Builder::PoolingLayer::setPaddingsBegin(const std::vector<size_t>& paddings) {
-    getLayer().getParameters()["pads_begin"] = paddings;
+    getLayer()->getParameters()["pads_begin"] = paddings;
     return *this;
 }
 
 const std::vector<size_t> Builder::PoolingLayer::getPaddingsEnd() const {
-    return uInts2size_t(getLayer().getParameters()["pads_end"].asUInts({}));
+    return getLayer()->getParameters().at("pads_end");
 }
 Builder::PoolingLayer& Builder::PoolingLayer::setPaddingsEnd(const std::vector<size_t>& paddings) {
-    getLayer().getParameters()["pads_end"] = paddings;
+    getLayer()->getParameters()["pads_end"] = paddings;
     return *this;
 }
 
@@ -119,7 +140,6 @@ Builder::PoolingLayer::PoolingType Builder::PoolingLayer::getPoolingType() const
     return type;
 }
 Builder::PoolingLayer& Builder::PoolingLayer::setPoolingType(Builder::PoolingLayer::PoolingType type) {
-    this->type = type;
     std::string typeStr;
     switch (type) {
         case MAX:
@@ -129,7 +149,8 @@ Builder::PoolingLayer& Builder::PoolingLayer::setPoolingType(Builder::PoolingLay
             typeStr = "avg";
             break;
     }
-    getLayer().getParameters()["pool-method"] = typeStr;
+    getLayer()->getParameters()["pool-method"] = typeStr;
+    this->type = type;
     return *this;
 }
 
@@ -147,28 +168,27 @@ Builder::PoolingLayer& Builder::PoolingLayer::setRoundingType(Builder::PoolingLa
             typeStr = "floor";
             break;
     }
-    getLayer().getParameters()["rounding_type"] = typeStr;
+    getLayer()->getParameters()["rounding_type"] = typeStr;
     return *this;
 }
 
 bool Builder::PoolingLayer::getExcludePad() const {
-    return getLayer().getParameters()["exclude-pad"].asBool();
+    return getLayer()->getParameters().at("exclude-pad");
 }
 
 Builder::PoolingLayer& Builder::PoolingLayer::setExcludePad(bool exclude) {
-    getLayer().getParameters()["exclude-pad"] = exclude;
+    getLayer()->getParameters()["exclude-pad"] = exclude;
     return *this;
 }
 
-
-void Builder::PoolingLayer::validate(const Layer& layer) {
-    Layer poolLayer = layer;
-    Builder::PoolingLayer poolBuilder(poolLayer);
-    std::vector<size_t> l_kernel = poolBuilder.getKernel();
+REG_VALIDATOR_FOR(Pooling, [](const Builder::Layer::CPtr& layer, bool partial) {
     // WA for old IRs
-    if (l_kernel.empty() && layer.getParameters().find("kernel-x") != layer.getParameters().end() &&
-        layer.getParameters().find("kernel-y") != layer.getParameters().end())
+    if (layer->getParameters().find("kernel") == layer->getParameters().end() && layer->getParameters().find("kernel-x") != layer->getParameters().end() &&
+        layer->getParameters().find("kernel-y") != layer->getParameters().end())
         return;
+
+    Builder::PoolingLayer poolBuilder(layer);
+    std::vector<size_t> l_kernel = poolBuilder.getKernel();
     std::vector<size_t> l_paddingBegin = poolBuilder.getPaddingsBegin();
     std::vector<size_t> l_paddingEnd = poolBuilder.getPaddingsEnd();
     std::vector<size_t> l_strides = poolBuilder.getStrides();
@@ -181,7 +201,39 @@ void Builder::PoolingLayer::validate(const Layer& layer) {
         l_strides.resize(l_kernel.size(), 1);
 
     if (l_kernel.empty() || l_kernel.size() != l_paddingBegin.size() || l_kernel.size() != l_paddingEnd.size() || l_kernel.size() != l_strides.size())
-        THROW_IE_EXCEPTION << layer.getType() << " node " << layer.getName() << " contains incorrect parameters!";
-}
+        THROW_IE_EXCEPTION << layer->getType() << " node " << layer->getName() << " contains incorrect parameters!";
+});
+
+REG_CONVERTER_FOR(Pooling, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    if (cnnLayer->params.find("kernel") == cnnLayer->params.end() &&
+        cnnLayer->params.find("kernel-x") != cnnLayer->params.end() &&
+        cnnLayer->params.find("kernel-y") != cnnLayer->params.end())
+        return;
+    std::vector<unsigned int> tmp = cnnLayer->GetParamAsUInts("kernel");
+    layer.getParameters()["kernel"] = std::vector<size_t>(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        layer.getParameters()["kernel"].as<std::vector<size_t>>()[i] = static_cast<size_t>(tmp[i]);
+    }
+
+    tmp = cnnLayer->GetParamAsUInts("strides");
+    layer.getParameters()["strides"] = std::vector<size_t>(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        layer.getParameters()["strides"].as<std::vector<size_t>>()[i] = static_cast<size_t>(tmp[i]);
+    }
+
+    tmp = cnnLayer->GetParamAsUInts("pads_begin");
+    layer.getParameters()["pads_begin"] = std::vector<size_t>(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        layer.getParameters()["pads_begin"].as<std::vector<size_t>>()[i] = static_cast<size_t>(tmp[i]);
+    }
+
+    tmp = cnnLayer->GetParamAsUInts("pads_end");
+    layer.getParameters()["pads_end"] = std::vector<size_t>(tmp.size());
+    for (size_t i = 0; i < tmp.size(); ++i) {
+        layer.getParameters()["pads_end"].as<std::vector<size_t>>()[i] = static_cast<size_t>(tmp[i]);
+    }
 
-REG_VALIDATOR_FOR(Pooling, Builder::PoolingLayer::validate);
+    layer.getParameters()["exclude-pad"] = cnnLayer->GetParamAsBool("exclude-pad", false);
+    layer.getParameters()["rounding_type"] = cnnLayer->GetParamAsString("rounding_type", "ceil");
+    layer.getParameters()["pool-method"] = cnnLayer->GetParamAsString("pool-method", "max");
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_power_layer.cpp b/inference-engine/src/inference_engine/builders/ie_power_layer.cpp
index c3142fa13..db04e2bff 100644
--- a/inference-engine/src/inference_engine/builders/ie_power_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_power_layer.cpp
@@ -1,66 +1,74 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_power_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::PowerLayer::PowerLayer(const std::string& name): LayerFragment("Power", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::PowerLayer::PowerLayer(const std::string& name): LayerDecorator("Power", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
     setPower(1);
     setScale(1);
     setShift(0);
 }
 
-Builder::PowerLayer::PowerLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Power"))
-        THROW_IE_EXCEPTION << "Cannot create PowerLayer decorator for layer " << getLayer().getType();
+Builder::PowerLayer::PowerLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Power");
+}
+
+Builder::PowerLayer::PowerLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Power");
 }
 
 Builder::PowerLayer& Builder::PowerLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::PowerLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::PowerLayer& Builder::PowerLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 float Builder::PowerLayer::getPower() const {
-    return getLayer().getParameters()["power"].asFloat();
+    return getLayer()->getParameters().at("power");
 }
 
 Builder::PowerLayer& Builder::PowerLayer::setPower(float power) {
-    getLayer().getParameters()["power"] = power;
+    getLayer()->getParameters()["power"] = power;
     return *this;
 }
 
 float Builder::PowerLayer::getScale() const {
-    return getLayer().getParameters()["scale"].asFloat();
+    return getLayer()->getParameters().at("scale");
 }
 
 Builder::PowerLayer& Builder::PowerLayer::setScale(float scale) {
-    getLayer().getParameters()["scale"] = scale;
+    getLayer()->getParameters()["scale"] = scale;
     return *this;
 }
 
 float Builder::PowerLayer::getShift() const {
-    return getLayer().getParameters()["shift"].asFloat();
+    return getLayer()->getParameters().at("shift");
 }
 
 Builder::PowerLayer& Builder::PowerLayer::setShift(float shift) {
-    getLayer().getParameters()["shift"] = shift;
+    getLayer()->getParameters()["shift"] = shift;
     return *this;
 }
 
+REG_CONVERTER_FOR(Power, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["shift"] = cnnLayer->GetParamAsFloat("shift", 0);
+    layer.getParameters()["scale"] = cnnLayer->GetParamAsFloat("scale", 1);
+    layer.getParameters()["power"] = cnnLayer->GetParamAsFloat("power", 1);
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_prelu_layer.cpp b/inference-engine/src/inference_engine/builders/ie_prelu_layer.cpp
index 6263f963c..dec276e46 100644
--- a/inference-engine/src/inference_engine/builders/ie_prelu_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_prelu_layer.cpp
@@ -1,49 +1,52 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_prelu_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::PReLULayer::PReLULayer(const std::string& name): LayerFragment("PReLU", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::PReLULayer::PReLULayer(const std::string& name): LayerDecorator("PReLU", name) {
+    getLayer()->getInputPorts().resize(2);
+    getLayer()->getInputPorts()[1].setParameter("type", "weights");
+    getLayer()->getOutputPorts().resize(1);
     setChannelShared(false);
 }
 
-Builder::PReLULayer::PReLULayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "PReLU"))
-        THROW_IE_EXCEPTION << "Cannot create PReLULayer decorator for layer " << getLayer().getType();
+Builder::PReLULayer::PReLULayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("PReLU");
+}
+
+Builder::PReLULayer::PReLULayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("PReLU");
 }
 
 Builder::PReLULayer& Builder::PReLULayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::PReLULayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::PReLULayer& Builder::PReLULayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 bool Builder::PReLULayer::getChannelShared() const {
-    return getLayer().getParameters()["channel_shared"].asBool();
+    return getLayer()->getParameters().at("channel_shared");
 }
 Builder::PReLULayer& Builder::PReLULayer::setChannelShared(bool flag) {
-    getLayer().getParameters()["channel_shared"] = flag ? 1 : 0;
+    getLayer()->getParameters()["channel_shared"] = flag ? 1 : 0;
     return *this;
 }
 
-Builder::PReLULayer& Builder::PReLULayer::setWeights(const Blob::CPtr& weights) {
-    getLayer().addConstantData("weights", weights);
-    return *this;
-}
+REG_CONVERTER_FOR(PReLU, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["channel_shared"] = cnnLayer->GetParamAsBool("channel_shared", false);
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_prior_box_clustered_layer.cpp b/inference-engine/src/inference_engine/builders/ie_prior_box_clustered_layer.cpp
index c52b2f4b2..e4505b66b 100644
--- a/inference-engine/src/inference_engine/builders/ie_prior_box_clustered_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_prior_box_clustered_layer.cpp
@@ -1,124 +1,141 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_prior_box_clustered_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::PriorBoxClusteredLayer::PriorBoxClusteredLayer(const std::string& name): LayerFragment("PriorBoxClustered", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(2);
+Builder::PriorBoxClusteredLayer::PriorBoxClusteredLayer(const std::string& name): LayerDecorator("PriorBoxClustered", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(2);
 }
 
-Builder::PriorBoxClusteredLayer::PriorBoxClusteredLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "PriorBoxClustered"))
-        THROW_IE_EXCEPTION << "Cannot create PriorBoxClusteredLayer decorator for layer " << getLayer().getType();
+Builder::PriorBoxClusteredLayer::PriorBoxClusteredLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("PriorBoxClustered");
+}
+
+Builder::PriorBoxClusteredLayer::PriorBoxClusteredLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("PriorBoxClustered");
 }
 
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const std::vector<Port>& Builder::PriorBoxClusteredLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setInputPorts(const std::vector<Port> &ports) {
     if (ports.size() != 2)
-        THROW_IE_EXCEPTION << "Incorrect number of inputs for PriorBoxClustered layer.";
-    getLayer().getInputPorts() = ports;
+        THROW_IE_EXCEPTION << "Incorrect number of inputs for PriorBoxClustered getLayer().";
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 
 const Port& Builder::PriorBoxClusteredLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 float Builder::PriorBoxClusteredLayer::getVariance() const {
-    return getLayer().getParameters()["variance"].asFloat();
+    return getLayer()->getParameters().at("variance");
 }
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setVariance(float variance) {
-    getLayer().getParameters()["variance"] = variance;
+    getLayer()->getParameters()["variance"] = variance;
     return *this;
 }
 
 float Builder::PriorBoxClusteredLayer::getOffset() const {
-    return getLayer().getParameters()["offset"].asFloat();
+    return getLayer()->getParameters().at("offset");
 }
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setOffset(float offset) {
-    getLayer().getParameters()["offset"] = offset;
+    getLayer()->getParameters()["offset"] = offset;
     return *this;
 }
 
 float Builder::PriorBoxClusteredLayer::getWidth() const {
-    return getLayer().getParameters()["width"].asFloat();
+    return getLayer()->getParameters().at("width");
 }
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setWidth(float width) {
-    getLayer().getParameters()["width"] = width;
+    getLayer()->getParameters()["width"] = width;
     return *this;
 }
 
 float Builder::PriorBoxClusteredLayer::getHeight() const {
-    return getLayer().getParameters()["height"].asFloat();
+    return getLayer()->getParameters().at("height");
 }
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setHeight(float height) {
-    getLayer().getParameters()["height"] = height;
+    getLayer()->getParameters()["height"] = height;
     return *this;
 }
 
 const std::vector<float> Builder::PriorBoxClusteredLayer::getSteps() const {
-    return {getLayer().getParameters()["step_h"].asFloat(), getLayer().getParameters()["step_w"].asFloat()};
+    return {getLayer()->getParameters().at("step_h"), getLayer()->getParameters().at("step_w")};
 }
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setSteps(const std::vector<float> steps) {
     if (steps.size() != 2)
         THROW_IE_EXCEPTION << "PriorBoxClusteredLayer supports sizes only for height and width dimensions!";
-    getLayer().getParameters()["step_h"] = steps[0];
-    getLayer().getParameters()["step_w"] = steps[1];
+    getLayer()->getParameters()["step_h"] = steps[0];
+    getLayer()->getParameters()["step_w"] = steps[1];
     return *this;
 }
 
 const std::vector<float> Builder::PriorBoxClusteredLayer::getImgSizes() const {
-    return {getLayer().getParameters()["img_h"].asFloat(), getLayer().getParameters()["img_w"].asFloat()};
+    return {getLayer()->getParameters().at("img_h"), getLayer()->getParameters().at("img_w")};
 }
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setImgSizes(const std::vector<float> sizes) {
     if (sizes.size() != 2)
         THROW_IE_EXCEPTION << "PriorBoxClusteredLayer allows to specify only height and width dimensions of an input image!";
-    getLayer().getParameters()["img_h"] = sizes[0];
-    getLayer().getParameters()["img_w"] = sizes[1];
+    getLayer()->getParameters()["img_h"] = sizes[0];
+    getLayer()->getParameters()["img_w"] = sizes[1];
     return *this;
 }
 
 float Builder::PriorBoxClusteredLayer::getStep() const {
-    return getLayer().getParameters()["step"].asFloat();
+    return getLayer()->getParameters().at("step");
 }
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setStep(float step) {
-    getLayer().getParameters()["step"] = step;
+    getLayer()->getParameters()["step"] = step;
     return *this;
 }
 
 bool Builder::PriorBoxClusteredLayer::getClip() const {
-    return getLayer().getParameters()["clip"].asBool();
+    return getLayer()->getParameters().at("clip");
 }
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setClip(bool flag) {
-    getLayer().getParameters()["clip"] = flag;
+    getLayer()->getParameters()["clip"] = flag;
     return *this;
 }
 
 bool Builder::PriorBoxClusteredLayer::getFlip() const {
-    return getLayer().getParameters()["flip"].asBool();
+    return getLayer()->getParameters().at("flip");
 }
 Builder::PriorBoxClusteredLayer& Builder::PriorBoxClusteredLayer::setFlip(bool flag) {
-    getLayer().getParameters()["flip"] = flag;
+    getLayer()->getParameters()["flip"] = flag;
     return *this;
 }
+
+REG_CONVERTER_FOR(PriorBoxClustered, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["flip"] = cnnLayer->GetParamAsBool("flip", false);
+    layer.getParameters()["clip"] = cnnLayer->GetParamAsBool("clip", false);
+    layer.getParameters()["step"] = cnnLayer->GetParamAsFloat("step");
+    layer.getParameters()["img_h"] = cnnLayer->GetParamAsFloat("img_h", 0);
+    layer.getParameters()["img_w"] = cnnLayer->GetParamAsFloat("img_w", 0);
+    layer.getParameters()["step_h"] = cnnLayer->GetParamAsFloat("step_h", 0);
+    layer.getParameters()["step_w"] = cnnLayer->GetParamAsFloat("step_w", 0);
+    layer.getParameters()["height"] = cnnLayer->GetParamAsFloat("height", 0);
+    layer.getParameters()["width"] = cnnLayer->GetParamAsFloat("width", 0);
+    layer.getParameters()["offset"] = cnnLayer->GetParamAsFloat("offset", 0);
+    layer.getParameters()["variance"] = cnnLayer->GetParamAsFloat("variance", 0);
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_prior_box_layer.cpp b/inference-engine/src/inference_engine/builders/ie_prior_box_layer.cpp
index dab36e07c..febe397db 100644
--- a/inference-engine/src/inference_engine/builders/ie_prior_box_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_prior_box_layer.cpp
@@ -1,118 +1,133 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_prior_box_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::PriorBoxLayer::PriorBoxLayer(const std::string& name): LayerFragment("PriorBox", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(2);
+Builder::PriorBoxLayer::PriorBoxLayer(const std::string& name): LayerDecorator("PriorBox", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(2);
     setScaleAllSizes(true);
 }
 
-Builder::PriorBoxLayer::PriorBoxLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "PriorBox"))
-        THROW_IE_EXCEPTION << "Cannot create PriorBoxLayer decorator for layer " << getLayer().getType();
+Builder::PriorBoxLayer::PriorBoxLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("PriorBox");
+}
+
+Builder::PriorBoxLayer::PriorBoxLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("PriorBox");
 }
 
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const std::vector<Port>& Builder::PriorBoxLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setInputPorts(const std::vector<Port> &ports) {
     if (ports.size() != 2)
-        THROW_IE_EXCEPTION << "Incorrect number of inputs for PriorBox layer.";
-    getLayer().getInputPorts() = ports;
+        THROW_IE_EXCEPTION << "Incorrect number of inputs for PriorBox getLayer().";
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 
 const Port& Builder::PriorBoxLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 float Builder::PriorBoxLayer::getVariance() const {
-    return getLayer().getParameters()["variance"].asFloat();
+    return getLayer()->getParameters().at("variance");
 }
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setVariance(float variance) {
-    getLayer().getParameters()["variance"] = variance;
+    getLayer()->getParameters()["variance"] = variance;
     return *this;
 }
 
 float Builder::PriorBoxLayer::getOffset() const {
-    return getLayer().getParameters()["offset"].asFloat();
+    return getLayer()->getParameters().at("offset");
 }
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setOffset(float offset) {
-    getLayer().getParameters()["offset"] = offset;
+    getLayer()->getParameters()["offset"] = offset;
     return *this;
 }
 
 float Builder::PriorBoxLayer::getStep() const {
-    return getLayer().getParameters()["step"].asFloat();
+    return getLayer()->getParameters().at("step");
 }
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setStep(float step) {
-    getLayer().getParameters()["step"] = step;
+    getLayer()->getParameters()["step"] = step;
     return *this;
 }
 
 size_t Builder::PriorBoxLayer::getMinSize() const {
-    return getLayer().getParameters()["min_size"].asUInt();
+    return getLayer()->getParameters().at("min_size");
 }
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setMinSize(size_t minSize) {
-    getLayer().getParameters()["min_size"] = minSize;
+    getLayer()->getParameters()["min_size"] = minSize;
     return *this;
 }
 size_t Builder::PriorBoxLayer::getMaxSize() const {
-    return getLayer().getParameters()["max_size"].asUInt();
+    return getLayer()->getParameters().at("max_size");
 }
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setMaxSize(size_t maxSize) {
-    getLayer().getParameters()["max_size"] = maxSize;
+    getLayer()->getParameters()["max_size"] = maxSize;
     return *this;
 }
 
 bool Builder::PriorBoxLayer::getScaleAllSizes() const {
-    return getLayer().getParameters()["scale_all_sizes"].asBool(true);
+    return getLayer()->getParameters().at("scale_all_sizes");
 }
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setScaleAllSizes(bool flag) {
-    getLayer().getParameters()["scale_all_sizes"] = flag;
+    getLayer()->getParameters()["scale_all_sizes"] = flag;
     return *this;
 }
 
 bool Builder::PriorBoxLayer::getClip() const {
-    return getLayer().getParameters()["clip"].asBool();
+    return getLayer()->getParameters().at("clip");
 }
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setClip(bool flag) {
-    getLayer().getParameters()["clip"] = flag;
+    getLayer()->getParameters()["clip"] = flag;
     return *this;
 }
 
 bool Builder::PriorBoxLayer::getFlip() const {
-    return getLayer().getParameters()["flip"].asBool();
+    return getLayer()->getParameters().at("flip");
 }
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setFlip(bool flag) {
-    getLayer().getParameters()["flip"] = flag;
+    getLayer()->getParameters()["flip"] = flag;
     return *this;
 }
 
 const std::vector<size_t> Builder::PriorBoxLayer::getAspectRatio() const {
-    return uInts2size_t(getLayer().getParameters()["aspect_ratio"].asUInts());
+    return getLayer()->getParameters().at("aspect_ratio");
 }
 Builder::PriorBoxLayer& Builder::PriorBoxLayer::setAspectRatio(const std::vector<size_t>& aspectRatio) {
-    getLayer().getParameters()["aspect_ratio"] = aspectRatio;
+    getLayer()->getParameters()["aspect_ratio"] = aspectRatio;
     return *this;
 }
+
+REG_CONVERTER_FOR(PriorBox, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["flip"] = cnnLayer->GetParamAsBool("flip", false);
+    layer.getParameters()["clip"] = cnnLayer->GetParamAsBool("clip", false);
+    layer.getParameters()["scale_all_sizes"] = cnnLayer->GetParamAsBool("scale_all_sizes", true);
+    layer.getParameters()["step"] = cnnLayer->GetParamAsFloat("step", 0);
+    layer.getParameters()["offset"] = cnnLayer->GetParamAsFloat("offset");
+    layer.getParameters()["variance"] = cnnLayer->GetParamAsFloat("variance", 0);
+    layer.getParameters()["aspect_ratio"] = cnnLayer->GetParamAsFloats("aspect_ratio", {});
+    layer.getParameters()["min_size"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("min_size", 0));
+    layer.getParameters()["max_size"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("max_size", 0));
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_proposal_layer.cpp b/inference-engine/src/inference_engine/builders/ie_proposal_layer.cpp
index 2437b7c6a..945f59ed7 100644
--- a/inference-engine/src/inference_engine/builders/ie_proposal_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_proposal_layer.cpp
@@ -1,103 +1,117 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_proposal_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ProposalLayer::ProposalLayer(const std::string& name): LayerFragment("Proposal", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(3);
+Builder::ProposalLayer::ProposalLayer(const std::string& name): LayerDecorator("Proposal", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(3);
 }
 
-Builder::ProposalLayer::ProposalLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Proposal"))
-        THROW_IE_EXCEPTION << "Cannot create ProposalLayer decorator for layer " << getLayer().getType();
+Builder::ProposalLayer::ProposalLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Proposal");
+}
+
+Builder::ProposalLayer::ProposalLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Proposal");
 }
 
 Builder::ProposalLayer& Builder::ProposalLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const std::vector<Port>& Builder::ProposalLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 
 Builder::ProposalLayer& Builder::ProposalLayer::setInputPorts(const std::vector<Port> &ports) {
     if (ports.size() != 3)
-        THROW_IE_EXCEPTION << "Incorrect number of inputs for Proposal layer.";
-    getLayer().getInputPorts() = ports;
+        THROW_IE_EXCEPTION << "Incorrect number of inputs for Proposal getLayer().";
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 
 const Port& Builder::ProposalLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::ProposalLayer& Builder::ProposalLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 size_t Builder::ProposalLayer::getPostNMSTopN() const {
-    return getLayer().getParameters()["post_nms_topn"].asUInt();
+    return getLayer()->getParameters().at("post_nms_topn");
 }
 Builder::ProposalLayer& Builder::ProposalLayer::setPostNMSTopN(size_t topN) {
-    getLayer().getParameters()["post_nms_topn"] = topN;
+    getLayer()->getParameters()["post_nms_topn"] = topN;
     return *this;
 }
 size_t Builder::ProposalLayer::getPreNMSTopN() const {
-    return getLayer().getParameters()["pre_nms_topn"].asUInt();
+    return getLayer()->getParameters().at("pre_nms_topn");
 }
 Builder::ProposalLayer& Builder::ProposalLayer::setPreNMSTopN(size_t topN) {
-    getLayer().getParameters()["pre_nms_topn"] = topN;
+    getLayer()->getParameters()["pre_nms_topn"] = topN;
     return *this;
 }
 float Builder::ProposalLayer::getNMSThresh() const {
-    return getLayer().getParameters()["nms_thresh"].asFloat();
+    return getLayer()->getParameters().at("nms_thresh");
 }
 Builder::ProposalLayer& Builder::ProposalLayer::setNMSThresh(float thresh) {
-    getLayer().getParameters()["nms_thresh"] = thresh;
+    getLayer()->getParameters()["nms_thresh"] = thresh;
     return *this;
 }
 size_t Builder::ProposalLayer::getBaseSize() const {
-    return getLayer().getParameters()["base_size"].asUInt();
+    return getLayer()->getParameters().at("base_size");
 }
 Builder::ProposalLayer& Builder::ProposalLayer::setBaseSize(size_t baseSize) {
-    getLayer().getParameters()["base_size"] = baseSize;
+    getLayer()->getParameters()["base_size"] = baseSize;
     return *this;
 }
 size_t Builder::ProposalLayer::getMinSize() const {
-    return getLayer().getParameters()["min_size"].asUInt();
+    return getLayer()->getParameters().at("min_size");
 }
 Builder::ProposalLayer& Builder::ProposalLayer::setMinSize(size_t minSize) {
-    getLayer().getParameters()["min_size"] = minSize;
+    getLayer()->getParameters()["min_size"] = minSize;
     return *this;
 }
 size_t Builder::ProposalLayer::getFeatStride() const {
-    return getLayer().getParameters()["feat_stride"].asUInt();
+    return getLayer()->getParameters().at("feat_stride");
 }
 Builder::ProposalLayer& Builder::ProposalLayer::setFeatStride(size_t featStride) {
-    getLayer().getParameters()["feat_stride"] = featStride;
+    getLayer()->getParameters()["feat_stride"] = featStride;
     return *this;
 }
 const std::vector<float> Builder::ProposalLayer::getScale() const {
-    return getLayer().getParameters()["scale"].asFloats();
+    return getLayer()->getParameters().at("scale");
 }
 Builder::ProposalLayer& Builder::ProposalLayer::setScale(const std::vector<float>& scales) {
-    getLayer().getParameters()["scale"] = scales;
+    getLayer()->getParameters()["scale"] = scales;
     return *this;
 }
 const std::vector<float> Builder::ProposalLayer::getRatio() const {
-    return getLayer().getParameters()["ratio"].asFloats();
+    return getLayer()->getParameters().at("ratio");
 }
 Builder::ProposalLayer& Builder::ProposalLayer::setRatio(const std::vector<float>& ratios) {
-    getLayer().getParameters()["ratio"] = ratios;
+    getLayer()->getParameters()["ratio"] = ratios;
     return *this;
 }
+
+REG_CONVERTER_FOR(Proposal, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["post_nms_topn"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("post_nms_topn", 0));
+    layer.getParameters()["pre_nms_topn"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("pre_nms_topn", 0));
+    layer.getParameters()["nms_thresh"] = cnnLayer->GetParamAsFloat("nms_thresh", 0);
+    layer.getParameters()["min_size"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("base_size", 0));
+    layer.getParameters()["max_size"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("max_size", 0));
+    layer.getParameters()["max_size"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("feat_stride", 0));
+    layer.getParameters()["scale"] = cnnLayer->GetParamAsFloats("scale");
+    layer.getParameters()["ratio"] = cnnLayer->GetParamAsFloats("ratio");
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_psroi_pooling_layer.cpp b/inference-engine/src/inference_engine/builders/ie_psroi_pooling_layer.cpp
index 8a023d3a3..ac768e28f 100644
--- a/inference-engine/src/inference_engine/builders/ie_psroi_pooling_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_psroi_pooling_layer.cpp
@@ -1,61 +1,70 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_psroi_pooling_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::PSROIPoolingLayer::PSROIPoolingLayer(const std::string& name): LayerFragment("PSROIPooling", name) {
-    getLayer().getOutputPorts().resize(1);
+Builder::PSROIPoolingLayer::PSROIPoolingLayer(const std::string& name): LayerDecorator("PSROIPooling", name) {
+    getLayer()->getOutputPorts().resize(1);
 }
 
-Builder::PSROIPoolingLayer::PSROIPoolingLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "PSROIPooling"))
-        THROW_IE_EXCEPTION << "Cannot create PSROIPoolingLayer decorator for layer " << getLayer().getType();
+Builder::PSROIPoolingLayer::PSROIPoolingLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("PSROIPooling");
+}
+
+Builder::PSROIPoolingLayer::PSROIPoolingLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("PSROIPooling");
 }
 
 Builder::PSROIPoolingLayer& Builder::PSROIPoolingLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 const std::vector<Port>& Builder::PSROIPoolingLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 Builder::PSROIPoolingLayer& Builder::PSROIPoolingLayer::setInputPorts(const std::vector<Port>& ports) {
     if (ports.size() != 2)
         THROW_IE_EXCEPTION << "PSROIPoolingLayer should have 2 inputs!";
-    getLayer().getInputPorts() = ports;
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 const Port& Builder::PSROIPoolingLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 Builder::PSROIPoolingLayer& Builder::PSROIPoolingLayer::setOutputPort(const Port& port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 float Builder::PSROIPoolingLayer::getSpatialScale() const {
-    return getLayer().getParameters()["spatial_scale"].asFloat();
+    return getLayer()->getParameters().at("spatial_scale");
 }
 Builder::PSROIPoolingLayer& Builder::PSROIPoolingLayer::setSpatialScale(float spatialScale) {
-    getLayer().getParameters()["spatial_scale"] = spatialScale;
+    getLayer()->getParameters()["spatial_scale"] = spatialScale;
     return *this;
 }
 size_t Builder::PSROIPoolingLayer::getOutputDim() const {
-    return getLayer().getParameters()["output_dim"].asUInt();
+    return getLayer()->getParameters().at("output_dim");
 }
 Builder::PSROIPoolingLayer& Builder::PSROIPoolingLayer::setOutputDim(size_t outDim) {
-    getLayer().getParameters()["output_dim"] = outDim;
+    getLayer()->getParameters()["output_dim"] = outDim;
     return *this;
 }
 size_t Builder::PSROIPoolingLayer::getGroupSize() const {
-    return getLayer().getParameters()["group_size"].asUInt();
+    return getLayer()->getParameters().at("group_size");
 }
 Builder::PSROIPoolingLayer& Builder::PSROIPoolingLayer::setGroupSize(size_t size) {
-    getLayer().getParameters()["group_size"] = size;
+    getLayer()->getParameters()["group_size"] = size;
     return *this;
 }
+
+REG_CONVERTER_FOR(PSROIPooling, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["group_size"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("group_size", 0));
+    layer.getParameters()["output_dim"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("output_dim", 0));
+    layer.getParameters()["spatial_scale"] = cnnLayer->GetParamAsFloat("spatial_scale", 0);
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_region_yolo_layer.cpp b/inference-engine/src/inference_engine/builders/ie_region_yolo_layer.cpp
index bcefcbb0a..3e4c42e70 100644
--- a/inference-engine/src/inference_engine/builders/ie_region_yolo_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_region_yolo_layer.cpp
@@ -1,96 +1,110 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_region_yolo_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::RegionYoloLayer::RegionYoloLayer(const std::string& name): LayerFragment("RegionYolo", name) {
-    getLayer().getInputPorts().resize(1);
-    getLayer().getOutputPorts().resize(1);
+Builder::RegionYoloLayer::RegionYoloLayer(const std::string& name): LayerDecorator("RegionYolo", name) {
+    getLayer()->getInputPorts().resize(1);
+    getLayer()->getOutputPorts().resize(1);
 }
 
-Builder::RegionYoloLayer::RegionYoloLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "RegionYolo"))
-        THROW_IE_EXCEPTION << "Cannot create RegionYoloLayer decorator for layer " << getLayer().getType();
+Builder::RegionYoloLayer::RegionYoloLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("RegionYolo");
+}
+
+Builder::RegionYoloLayer::RegionYoloLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("RegionYolo");
 }
 
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 const Port& Builder::RegionYoloLayer::getInputPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setInputPort(const Port& port) {
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 const Port& Builder::RegionYoloLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setOutputPort(const Port& port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 int Builder::RegionYoloLayer::getCoords() const {
-    return getLayer().getParameters()["coords"].asInt();
+    return getLayer()->getParameters().at("coords");
 }
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setCoords(int coords) {
-    getLayer().getParameters()["coords"] = coords;
+    getLayer()->getParameters()["coords"] = coords;
     return *this;
 }
 int Builder::RegionYoloLayer::getClasses() const {
-    return getLayer().getParameters()["classes"].asInt();
+    return getLayer()->getParameters().at("classes");
 }
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setClasses(int classes) {
-    getLayer().getParameters()["classes"] = classes;
+    getLayer()->getParameters()["classes"] = classes;
     return *this;
 }
 int Builder::RegionYoloLayer::getNum() const {
-    return getLayer().getParameters()["num"].asInt();
+    return getLayer()->getParameters().at("num");
 }
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setNum(int num) {
-    getLayer().getParameters()["num"] = num;
+    getLayer()->getParameters()["num"] = num;
     return *this;
 }
 bool Builder::RegionYoloLayer::getDoSoftMax() const {
-    return getLayer().getParameters()["do_softmax"].asBool();
+    return getLayer()->getParameters().at("do_softmax");
 }
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setDoSoftMax(bool flag) {
-    getLayer().getParameters()["do_softmax"] = flag ? 1 : 0;
+    getLayer()->getParameters()["do_softmax"] = flag ? 1 : 0;
     return *this;
 }
 float Builder::RegionYoloLayer::getAnchors() const {
-    return getLayer().getParameters()["anchors"].asFloat();
+    return getLayer()->getParameters().at("anchors");
 }
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setAnchors(float anchors) {
-    getLayer().getParameters()["anchors"] = anchors;
+    getLayer()->getParameters()["anchors"] = anchors;
     return *this;
 }
 int Builder::RegionYoloLayer::getMask() const {
-    return getLayer().getParameters()["mask"].asInt();
+    return getLayer()->getParameters().at("mask");
 }
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setMask(int mask) {
-    getLayer().getParameters()["mask"] = mask;
+    getLayer()->getParameters()["mask"] = mask;
     return *this;
 }
 size_t Builder::RegionYoloLayer::getAxis() const {
-    return getLayer().getParameters()["axis"].asUInt();
+    return getLayer()->getParameters().at("axis");
 }
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setAxis(size_t axis) {
-    getLayer().getParameters()["axis"] = axis;
+    getLayer()->getParameters()["axis"] = axis;
     return *this;
 }
 size_t Builder::RegionYoloLayer::getEndAxis() const {
-    return getLayer().getParameters()["end_axis"].asUInt();
+    return getLayer()->getParameters().at("end_axis");
 }
 Builder::RegionYoloLayer& Builder::RegionYoloLayer::setEndAxis(size_t axis) {
-    getLayer().getParameters()["end_axis"] = axis;
+    getLayer()->getParameters()["end_axis"] = axis;
     return *this;
 }
+
+REG_CONVERTER_FOR(RegionYoloLayer, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["end_axis"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("end_axis", 0));
+    layer.getParameters()["axis"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("axis", 0));
+    layer.getParameters()["num"] = cnnLayer->GetParamAsInt("num", 0);
+    layer.getParameters()["mask"] = cnnLayer->GetParamAsInt("mask", 0);
+    layer.getParameters()["coords"] = cnnLayer->GetParamAsInt("coords", 0);
+    layer.getParameters()["classes"] = cnnLayer->GetParamAsInt("classes", 0);
+    layer.getParameters()["anchors"] = cnnLayer->GetParamAsFloat("anchors", 0);
+    layer.getParameters()["do_softmax"] = cnnLayer->GetParamAsBool("do_softmax", false);
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_relu6_layer.cpp b/inference-engine/src/inference_engine/builders/ie_relu6_layer.cpp
index d39b2d06f..966dcb540 100644
--- a/inference-engine/src/inference_engine/builders/ie_relu6_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_relu6_layer.cpp
@@ -1,47 +1,62 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_relu6_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ReLU6Layer::ReLU6Layer(const std::string& name): LayerFragment("ReLU6", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::ReLU6Layer::ReLU6Layer(const std::string& name): LayerDecorator("ReLU6", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
     setN(6);
 }
 
-Builder::ReLU6Layer::ReLU6Layer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "ReLU6"))
-        THROW_IE_EXCEPTION << "Cannot create ReLU6Layer decorator for layer " << getLayer().getType();
+Builder::ReLU6Layer::ReLU6Layer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("ReLU6");
+}
+
+Builder::ReLU6Layer::ReLU6Layer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("ReLU6");
 }
 
 Builder::ReLU6Layer& Builder::ReLU6Layer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::ReLU6Layer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::ReLU6Layer& Builder::ReLU6Layer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 float Builder::ReLU6Layer::getN() const {
-    return getLayer().getParameters()["n"].asFloat();
+    return getLayer()->getParameters().at("n");
 }
 
 Builder::ReLU6Layer& Builder::ReLU6Layer::setN(float n) {
-    getLayer().getParameters()["n"] = n;
+    getLayer()->getParameters()["n"] = n;
     return *this;
 }
 
-
+REG_VALIDATOR_FOR(ReLU6, [] (const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    if (!input_layer->getInputPorts().empty() &&
+        !input_layer->getOutputPorts().empty() &&
+        !input_layer->getInputPorts()[0].shape().empty() &&
+        !input_layer->getOutputPorts()[0].shape().empty() &&
+        input_layer->getInputPorts()[0].shape() != input_layer->getOutputPorts()[0].shape()) {
+        THROW_IE_EXCEPTION << "Input and output ports should be equal";
+    }
+});
+
+REG_CONVERTER_FOR(ReLU6, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["n"] = cnnLayer->GetParamAsFloat("n", 0);
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_relu_layer.cpp b/inference-engine/src/inference_engine/builders/ie_relu_layer.cpp
index 29793c43b..63c221b4a 100644
--- a/inference-engine/src/inference_engine/builders/ie_relu_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_relu_layer.cpp
@@ -1,45 +1,63 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_relu_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ReLULayer::ReLULayer(const std::string& name): LayerFragment("ReLU", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::ReLULayer::ReLULayer(const std::string& name): LayerDecorator("ReLU", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
     setNegativeSlope(0);
 }
 
-Builder::ReLULayer::ReLULayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "ReLU"))
-        THROW_IE_EXCEPTION << "Cannot create ReLULayer decorator for layer " << getLayer().getType();
+Builder::ReLULayer::ReLULayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("ReLU");
+}
+
+Builder::ReLULayer::ReLULayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("ReLU");
 }
 
 Builder::ReLULayer& Builder::ReLULayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::ReLULayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::ReLULayer& Builder::ReLULayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 float Builder::ReLULayer::getNegativeSlope() const {
-    return getLayer().getParameters()["negative_slope"].asFloat();
+    return getLayer()->getParameters().at("negative_slope");
 }
 
 Builder::ReLULayer& Builder::ReLULayer::setNegativeSlope(float negativeSlope) {
-    getLayer().getParameters()["negative_slope"] = negativeSlope;
+    getLayer()->getParameters()["negative_slope"] = negativeSlope;
     return *this;
 }
+
+REG_VALIDATOR_FOR(ReLU, [] (const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    Builder::ReLULayer layer(input_layer);
+    if (!input_layer->getInputPorts().empty() &&
+        !input_layer->getOutputPorts().empty() &&
+        !input_layer->getInputPorts()[0].shape().empty() &&
+        !input_layer->getOutputPorts()[0].shape().empty() &&
+        input_layer->getInputPorts()[0].shape() != input_layer->getOutputPorts()[0].shape()) {
+        THROW_IE_EXCEPTION << "Input and output ports should be equal";
+    }
+});
+
+REG_CONVERTER_FOR(ReLU, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["negative_slope"] = cnnLayer->GetParamAsFloat("negative_slope", 0);
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_reorg_yolo_layer.cpp b/inference-engine/src/inference_engine/builders/ie_reorg_yolo_layer.cpp
index 83c831fbb..3d2e7f416 100644
--- a/inference-engine/src/inference_engine/builders/ie_reorg_yolo_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_reorg_yolo_layer.cpp
@@ -1,47 +1,53 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_reorg_yolo_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ReorgYoloLayer::ReorgYoloLayer(const std::string& name): LayerFragment("ReorgYolo", name) {
-    getLayer().getInputPorts().resize(1);
-    getLayer().getOutputPorts().resize(1);
+Builder::ReorgYoloLayer::ReorgYoloLayer(const std::string& name): LayerDecorator("ReorgYolo", name) {
+    getLayer()->getInputPorts().resize(1);
+    getLayer()->getOutputPorts().resize(1);
 }
 
-Builder::ReorgYoloLayer::ReorgYoloLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "ReorgYolo"))
-        THROW_IE_EXCEPTION << "Cannot create ReorgYoloLayer decorator for layer " << getLayer().getType();
+Builder::ReorgYoloLayer::ReorgYoloLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("ReorgYolo");
+}
+
+Builder::ReorgYoloLayer::ReorgYoloLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("ReorgYolo");
 }
 
 Builder::ReorgYoloLayer& Builder::ReorgYoloLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 const Port& Builder::ReorgYoloLayer::getInputPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 Builder::ReorgYoloLayer& Builder::ReorgYoloLayer::setInputPort(const Port& port) {
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 const Port& Builder::ReorgYoloLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 Builder::ReorgYoloLayer& Builder::ReorgYoloLayer::setOutputPort(const Port& port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 int Builder::ReorgYoloLayer::getStride() const {
-    return getLayer().getParameters()["stride"].asInt();
+    return getLayer()->getParameters().at("stride");
 }
 Builder::ReorgYoloLayer& Builder::ReorgYoloLayer::setStride(int stride) {
-    getLayer().getParameters()["stride"] = stride;
+    getLayer()->getParameters()["stride"] = stride;
     return *this;
 }
 
+REG_CONVERTER_FOR(ReorgYolo, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["stride"] = cnnLayer->GetParamAsInt("stride", 0);
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_resample_layer.cpp b/inference-engine/src/inference_engine/builders/ie_resample_layer.cpp
new file mode 100644
index 000000000..ca2ddda28
--- /dev/null
+++ b/inference-engine/src/inference_engine/builders/ie_resample_layer.cpp
@@ -0,0 +1,95 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_resample_layer.hpp>
+#include <ie_cnn_layer_builder.h>
+#include <vector>
+#include <string>
+
+using namespace InferenceEngine;
+
+Builder::ResampleLayer::ResampleLayer(const std::string& name): LayerDecorator("Resample", name) {
+    getLayer()->getInputPorts().resize(1);
+    getLayer()->getOutputPorts().resize(1);
+}
+
+Builder::ResampleLayer::ResampleLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Resample");
+}
+
+Builder::ResampleLayer::ResampleLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Resample");
+}
+
+Builder::ResampleLayer& Builder::ResampleLayer::setName(const std::string& name) {
+    getLayer()->setName(name);
+    return *this;
+}
+const Port& Builder::ResampleLayer::getInputPort() const {
+    return getLayer()->getInputPorts()[0];
+}
+Builder::ResampleLayer& Builder::ResampleLayer::setInputPort(const Port& port) {
+    getLayer()->getInputPorts()[0] = port;
+    return *this;
+}
+const Port& Builder::ResampleLayer::getOutputPort() const {
+    return getLayer()->getOutputPorts()[0];
+}
+Builder::ResampleLayer& Builder::ResampleLayer::setOutputPort(const Port& port) {
+    getLayer()->getOutputPorts()[0] = port;
+    return *this;
+}
+
+const std::string &Builder::ResampleLayer::getResampleType() const {
+    return getLayer()->getParameters().at("type");
+}
+
+Builder::ResampleLayer &Builder::ResampleLayer::setResampleType(const std::string &type) {
+    getLayer()->getParameters()["type"] = type;
+    return *this;
+}
+
+bool Builder::ResampleLayer::getAntialias() const {
+    return getLayer()->getParameters().at("antialias");
+}
+
+Builder::ResampleLayer &Builder::ResampleLayer::setAntialias(bool antialias) {
+    getLayer()->getParameters()["antialias"] = antialias;
+    return *this;
+}
+
+float Builder::ResampleLayer::getFactor() const {
+    return getLayer()->getParameters().at("factor");
+}
+
+Builder::ResampleLayer &Builder::ResampleLayer::setFactor(float factor) {
+    getLayer()->getParameters()["factor"] = factor;
+    return *this;
+}
+
+size_t Builder::ResampleLayer::getWidth() const {
+    return getLayer()->getParameters().at("width");
+}
+
+Builder::ResampleLayer &Builder::ResampleLayer::setWidth(size_t width) {
+    getLayer()->getParameters()["width"] = width;
+    return *this;
+}
+
+size_t Builder::ResampleLayer::getHeight() const {
+    return getLayer()->getParameters().at("height");
+}
+
+Builder::ResampleLayer &Builder::ResampleLayer::setHeight(size_t height) {
+    getLayer()->getParameters()["height"] = height;
+    return *this;
+}
+
+REG_CONVERTER_FOR(Resample, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["height"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("height", 0));
+    layer.getParameters()["width"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("width", 0));
+    layer.getParameters()["factor"] = cnnLayer->GetParamAsFloat("factor", 0);
+    layer.getParameters()["antialias"] = cnnLayer->GetParamAsBool("antialias", false);
+    layer.getParameters()["type"] = cnnLayer->GetParamAsString("type");
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_reshape_layer.cpp b/inference-engine/src/inference_engine/builders/ie_reshape_layer.cpp
index 9f6c1f961..e72f2feee 100644
--- a/inference-engine/src/inference_engine/builders/ie_reshape_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_reshape_layer.cpp
@@ -1,54 +1,65 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_reshape_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ReshapeLayer::ReshapeLayer(const std::string& name): LayerFragment("Reshape", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::ReshapeLayer::ReshapeLayer(const std::string& name): LayerDecorator("Reshape", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
 }
 
-Builder::ReshapeLayer::ReshapeLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Reshape"))
-        THROW_IE_EXCEPTION << "Cannot create ReshapeLayer decorator for layer " << getLayer().getType();
+Builder::ReshapeLayer::ReshapeLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Reshape");
+}
+
+Builder::ReshapeLayer::ReshapeLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Reshape");
 }
 
 Builder::ReshapeLayer& Builder::ReshapeLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::ReshapeLayer::getInputPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 
 Builder::ReshapeLayer& Builder::ReshapeLayer::setInputPort(const Port &port) {
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 const Port& Builder::ReshapeLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::ReshapeLayer& Builder::ReshapeLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 const std::vector<int> Builder::ReshapeLayer::getDims() const {
-    return getLayer().getParameters()["dim"].asInts();
+    return getLayer()->getParameters().at("dim");
 }
 
 Builder::ReshapeLayer& Builder::ReshapeLayer::setDims(const std::vector<int>& dims) {
-    getLayer().getParameters()["dim"] = dims;
+    getLayer()->getParameters()["dim"] = dims;
     return *this;
 }
 
+REG_CONVERTER_FOR(Flatten, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["axis"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("axis", 0));
+    layer.getParameters()["dim"] = cnnLayer->GetParamAsInts("dim", {});
+});
+REG_CONVERTER_FOR(Reshape, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["axis"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("axis", 0));
+    layer.getParameters()["dim"] = cnnLayer->GetParamAsInts("dim", {});
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_rnn_sequence_layer.cpp b/inference-engine/src/inference_engine/builders/ie_rnn_sequence_layer.cpp
new file mode 100644
index 000000000..9382a94aa
--- /dev/null
+++ b/inference-engine/src/inference_engine/builders/ie_rnn_sequence_layer.cpp
@@ -0,0 +1,100 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <builders/ie_rnn_sequence_layer.hpp>
+#include <ie_cnn_layer_builder.h>
+
+#include <vector>
+#include <string>
+
+using namespace InferenceEngine;
+
+Builder::RNNSequenceLayer::RNNSequenceLayer(const std::string& name): LayerDecorator("RNNSequence", name) {
+    getLayer()->getOutputPorts().resize(2);
+    getLayer()->getInputPorts().resize(5);
+    getLayer()->getInputPorts()[1].setParameter("type", "weights");
+    getLayer()->getInputPorts()[2].setParameter("type", "biases");
+    getLayer()->getInputPorts()[3].setParameter("type", "optional");
+}
+
+Builder::RNNSequenceLayer::RNNSequenceLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("RNNSequence");
+}
+
+Builder::RNNSequenceLayer::RNNSequenceLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("RNNSequence");
+}
+
+Builder::RNNSequenceLayer& Builder::RNNSequenceLayer::setName(const std::string& name) {
+    getLayer()->setName(name);
+    return *this;
+}
+
+const std::vector<Port>& Builder::RNNSequenceLayer::getInputPorts() const {
+    return getLayer()->getInputPorts();
+}
+
+Builder::RNNSequenceLayer& Builder::RNNSequenceLayer::setInputPorts(const std::vector<Port>& ports) {
+    getLayer()->getInputPorts() = ports;
+    return *this;
+}
+
+const std::vector<Port>& Builder::RNNSequenceLayer::getOutputPorts() const {
+    return getLayer()->getOutputPorts();
+}
+
+Builder::RNNSequenceLayer& Builder::RNNSequenceLayer::setOutputPorts(const std::vector<Port>& ports) {
+    getLayer()->getOutputPorts() = ports;
+    return *this;
+}
+int Builder::RNNSequenceLayer::getHiddenSize() const {
+    return getLayer()->getParameters().at("hidden_size");
+}
+Builder::RNNSequenceLayer& Builder::RNNSequenceLayer::setHiddenSize(int size) {
+    getLayer()->getParameters()["hidden_size"] = size;
+    return *this;
+}
+bool Builder::RNNSequenceLayer::getSequenceDim() const {
+    return getLayer()->getParameters().at("sequence_dim");
+}
+Builder::RNNSequenceLayer& Builder::RNNSequenceLayer::setSqquenceDim(bool flag) {
+    getLayer()->getParameters()["sequence_dim"] = flag;
+    return *this;
+}
+const std::vector<std::string>& Builder::RNNSequenceLayer::getActivations() const {
+    return getLayer()->getParameters().at("activations");
+}
+Builder::RNNSequenceLayer& Builder::RNNSequenceLayer::setActivations(const std::vector<std::string>& activations) {
+    getLayer()->getParameters()["activations"] = activations;
+    return *this;
+}
+const std::vector<float>& Builder::RNNSequenceLayer::getActivationsAlpha() const {
+    return getLayer()->getParameters().at("activations_alpha");
+}
+Builder::RNNSequenceLayer& Builder::RNNSequenceLayer::setActivationsAlpha(const std::vector<float>& activations) {
+    getLayer()->getParameters()["activations_alpha"] = activations;
+    return *this;
+}
+const std::vector<float>& Builder::RNNSequenceLayer::getActivationsBeta() const {
+    return getLayer()->getParameters().at("activations_beta");
+}
+Builder::RNNSequenceLayer& Builder::RNNSequenceLayer::setActivationsBeta(const std::vector<float>& activations) {
+    getLayer()->getParameters()["activations_beta"] = activations;
+    return *this;
+}
+REG_CONVERTER_FOR(RNNSequence, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["hidden_size"] = cnnLayer->GetParamAsInt("hidden_size");
+    layer.getParameters()["sequence_dim"] = cnnLayer->GetParamsAsBool("sequence_dim", true);
+    std::vector<std::string> activations;
+    std::istringstream stream(cnnLayer->GetParamAsString("activations"));
+    std::string str;
+    while (getline(stream, str, ',')) {
+         activations.push_back(str);
+    }
+    layer.getParameters()["activations"] = activations;
+    layer.getParameters()["activations_alpha"] = cnnLayer->GetParamAsFloats("activations_alpha");
+    layer.getParameters()["activations_beta"] = cnnLayer->GetParamAsFloats("activations_beta");
+});
+
+
diff --git a/inference-engine/src/inference_engine/builders/ie_roi_pooling_layer.cpp b/inference-engine/src/inference_engine/builders/ie_roi_pooling_layer.cpp
index bd1cf4f63..ad0963cb8 100644
--- a/inference-engine/src/inference_engine/builders/ie_roi_pooling_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_roi_pooling_layer.cpp
@@ -1,58 +1,68 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_roi_pooling_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ROIPoolingLayer::ROIPoolingLayer(const std::string& name): LayerFragment("ROIPooling", name) {
-    getLayer().getOutputPorts().resize(1);
+Builder::ROIPoolingLayer::ROIPoolingLayer(const std::string& name): LayerDecorator("ROIPooling", name) {
+    getLayer()->getOutputPorts().resize(1);
     setPooled({0, 0});
 }
 
-Builder::ROIPoolingLayer::ROIPoolingLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "ROIPooling"))
-        THROW_IE_EXCEPTION << "Cannot create ROIPoolingLayer decorator for layer " << getLayer().getType();
+Builder::ROIPoolingLayer::ROIPoolingLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("ROIPooling");
+}
+
+Builder::ROIPoolingLayer::ROIPoolingLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("ROIPooling");
 }
 
 Builder::ROIPoolingLayer& Builder::ROIPoolingLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 const std::vector<Port>& Builder::ROIPoolingLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 Builder::ROIPoolingLayer& Builder::ROIPoolingLayer::setInputPorts(const std::vector<Port>& ports) {
     if (ports.size() != 2)
         THROW_IE_EXCEPTION << "ROIPoolingLayer should have 2 inputs!";
-    getLayer().getInputPorts() = ports;
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 const Port& Builder::ROIPoolingLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 Builder::ROIPoolingLayer& Builder::ROIPoolingLayer::setOutputPort(const Port& port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 float Builder::ROIPoolingLayer::getSpatialScale() const {
-    return getLayer().getParameters()["spatial_scale"].asFloat();
+    return getLayer()->getParameters().at("spatial_scale");
 }
 Builder::ROIPoolingLayer& Builder::ROIPoolingLayer::setSpatialScale(float spatialScale) {
-    getLayer().getParameters()["spatial_scale"] = spatialScale;
+    getLayer()->getParameters()["spatial_scale"] = spatialScale;
     return *this;
 }
 const std::vector<int> Builder::ROIPoolingLayer::getPooled() const {
-    return {getLayer().getParameters()["pooled_h"].asInt(0), getLayer().getParameters()["pooled_w"].asInt(0)};
+    return {getLayer()->getParameters().at("pooled_h"),
+            getLayer()->getParameters().at("pooled_w")};
 }
 Builder::ROIPoolingLayer& Builder::ROIPoolingLayer::setPooled(const std::vector<int>& pooled) {
     if (pooled.size() != 2)
         THROW_IE_EXCEPTION << "ROIPoolingLayer supports only pooled for height and width dimensions";
-    getLayer().getParameters()["pooled_h"] = pooled[0];
-    getLayer().getParameters()["pooled_w"] = pooled[1];
+    getLayer()->getParameters()["pooled_h"] = pooled[0];
+    getLayer()->getParameters()["pooled_w"] = pooled[1];
     return *this;
 }
+
+REG_CONVERTER_FOR(ROIPooling, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["pooled_h"] = cnnLayer->GetParamAsInt("pooled_h", 0);
+    layer.getParameters()["pooled_w"] = cnnLayer->GetParamAsInt("pooled_w", 0);
+    layer.getParameters()["spatial_scale"] = cnnLayer->GetParamAsFloat("spatial_scale");
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_scale_shift_layer.cpp b/inference-engine/src/inference_engine/builders/ie_scale_shift_layer.cpp
index 534959be7..95ec73785 100644
--- a/inference-engine/src/inference_engine/builders/ie_scale_shift_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_scale_shift_layer.cpp
@@ -1,44 +1,39 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_scale_shift_layer.hpp>
-#include <details/caseless.hpp>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::ScaleShiftLayer::ScaleShiftLayer(const std::string& name): LayerFragment("ScaleShift", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::ScaleShiftLayer::ScaleShiftLayer(const std::string& name): LayerDecorator("ScaleShift", name) {
+    getLayer()->getInputPorts().resize(3);
+    getLayer()->getInputPorts()[1].setParameter("type", "weights");
+    getLayer()->getInputPorts()[2].setParameter("type", "biases");
+    getLayer()->getOutputPorts().resize(1);
 }
 
-Builder::ScaleShiftLayer::ScaleShiftLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "ScaleShift"))
-        THROW_IE_EXCEPTION << "Cannot create ScaleShiftLayer decorator for layer " << getLayer().getType();
+Builder::ScaleShiftLayer::ScaleShiftLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("ScaleShift");
+}
+
+Builder::ScaleShiftLayer::ScaleShiftLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("ScaleShift");
 }
 
 Builder::ScaleShiftLayer& Builder::ScaleShiftLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::ScaleShiftLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::ScaleShiftLayer& Builder::ScaleShiftLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
-    return *this;
-}
-
-Builder::ScaleShiftLayer& Builder::ScaleShiftLayer::setWeights(const Blob::CPtr& weights) {
-    getLayer().addConstantData("weights", weights);
-    return *this;
-}
-Builder::ScaleShiftLayer& Builder::ScaleShiftLayer::setBiases(const Blob::CPtr& biases) {
-    getLayer().addConstantData("biases", biases);
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
-}
+}
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_sigmoid_layer.cpp b/inference-engine/src/inference_engine/builders/ie_sigmoid_layer.cpp
index 72ccc808c..265ad374c 100644
--- a/inference-engine/src/inference_engine/builders/ie_sigmoid_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_sigmoid_layer.cpp
@@ -1,35 +1,37 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_sigmoid_layer.hpp>
-#include <details/caseless.hpp>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::SigmoidLayer::SigmoidLayer(const std::string& name): LayerFragment("Sigmoid", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::SigmoidLayer::SigmoidLayer(const std::string& name): LayerDecorator("Sigmoid", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
 }
 
-Builder::SigmoidLayer::SigmoidLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Sigmoid"))
-        THROW_IE_EXCEPTION << "Cannot create SigmoidLayer decorator for layer " << getLayer().getType();
+Builder::SigmoidLayer::SigmoidLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Sigmoid");
+}
+
+Builder::SigmoidLayer::SigmoidLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Sigmoid");
 }
 
 Builder::SigmoidLayer& Builder::SigmoidLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::SigmoidLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::SigmoidLayer& Builder::SigmoidLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
diff --git a/inference-engine/src/inference_engine/builders/ie_simpler_nms_layer.cpp b/inference-engine/src/inference_engine/builders/ie_simpler_nms_layer.cpp
index 1fc3e07a7..5e333136c 100644
--- a/inference-engine/src/inference_engine/builders/ie_simpler_nms_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_simpler_nms_layer.cpp
@@ -1,89 +1,102 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_simpler_nms_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::SimplerNMSLayer::SimplerNMSLayer(const std::string& name): LayerFragment("SimplerNMS", name) {
-    getLayer().getOutputPorts().resize(1);
+Builder::SimplerNMSLayer::SimplerNMSLayer(const std::string& name): LayerDecorator("SimplerNMS", name) {
+    getLayer()->getOutputPorts().resize(1);
 }
 
-Builder::SimplerNMSLayer::SimplerNMSLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "SimplerNMS"))
-        THROW_IE_EXCEPTION << "Cannot create SimplerNMSLayer decorator for layer " << getLayer().getType();
+Builder::SimplerNMSLayer::SimplerNMSLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("SimplerNMS");
+}
+
+Builder::SimplerNMSLayer::SimplerNMSLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("SimplerNMS");
 }
 
 Builder::SimplerNMSLayer& Builder::SimplerNMSLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 const std::vector<Port>& Builder::SimplerNMSLayer::getInputPorts() const {
-    return getLayer().getInputPorts();
+    return getLayer()->getInputPorts();
 }
 Builder::SimplerNMSLayer& Builder::SimplerNMSLayer::setInputPorts(const std::vector<Port>& ports) {
-    getLayer().getInputPorts() = ports;
+    getLayer()->getInputPorts() = ports;
     return *this;
 }
 const Port& Builder::SimplerNMSLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 Builder::SimplerNMSLayer& Builder::SimplerNMSLayer::setOutputPort(const Port& port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 size_t Builder::SimplerNMSLayer::getPreNMSTopN() const {
-    return getLayer().getParameters()["pre_nms_topn"].asUInt();
+    return getLayer()->getParameters().at("pre_nms_topn");
 }
 Builder::SimplerNMSLayer& Builder::SimplerNMSLayer::setPreNMSTopN(size_t topN) {
-    getLayer().getParameters()["pre_nms_topn"] = topN;
+    getLayer()->getParameters()["pre_nms_topn"] = topN;
     return *this;
 }
 size_t Builder::SimplerNMSLayer::getPostNMSTopN() const {
-    return getLayer().getParameters()["post_nms_topn"].asUInt();
+    return getLayer()->getParameters().at("post_nms_topn");
 }
 Builder::SimplerNMSLayer& Builder::SimplerNMSLayer::setPostNMSTopN(size_t topN) {
-    getLayer().getParameters()["post_nms_topn"] = topN;
+    getLayer()->getParameters()["post_nms_topn"] = topN;
     return *this;
 }
 size_t Builder::SimplerNMSLayer::getFeatStride() const {
-    return getLayer().getParameters()["feat_stride"].asUInt();
+    return getLayer()->getParameters().at("feat_stride");
 }
 Builder::SimplerNMSLayer& Builder::SimplerNMSLayer::setFeatStride(size_t featStride) {
-    getLayer().getParameters()["feat_stride"] = featStride;
+    getLayer()->getParameters()["feat_stride"] = featStride;
     return *this;
 }
 size_t Builder::SimplerNMSLayer::getMinBoxSize() const {
-    return getLayer().getParameters()["min_bbox_size"].asUInt();
+    return getLayer()->getParameters().at("min_bbox_size");
 }
 Builder::SimplerNMSLayer& Builder::SimplerNMSLayer::setMinBoxSize(size_t minSize) {
-    getLayer().getParameters()["min_bbox_size"] = minSize;
+    getLayer()->getParameters()["min_bbox_size"] = minSize;
     return *this;
 }
 size_t Builder::SimplerNMSLayer::getScale() const {
-    return getLayer().getParameters()["scale"].asUInt();
+    return getLayer()->getParameters().at("scale");
 }
 Builder::SimplerNMSLayer& Builder::SimplerNMSLayer::setScale(size_t scale) {
-    getLayer().getParameters()["scale"] = scale;
+    getLayer()->getParameters()["scale"] = scale;
     return *this;
 }
 
 float Builder::SimplerNMSLayer::getCLSThreshold() const {
-    return getLayer().getParameters()["cls_threshold"].asFloat();
+    return getLayer()->getParameters().at("cls_threshold");
 }
 Builder::SimplerNMSLayer& Builder::SimplerNMSLayer::setCLSThreshold(float threshold) {
-    getLayer().getParameters()["cls_threshold"] = threshold;
+    getLayer()->getParameters()["cls_threshold"] = threshold;
     return *this;
 }
 float Builder::SimplerNMSLayer::getIOUThreshold() const {
-    return getLayer().getParameters()["iou_threshold"].asFloat();
+    return getLayer()->getParameters().at("iou_threshold");
 }
 Builder::SimplerNMSLayer& Builder::SimplerNMSLayer::setIOUThreshold(float threshold) {
-    getLayer().getParameters()["iou_threshold"] = threshold;
+    getLayer()->getParameters()["iou_threshold"] = threshold;
     return *this;
 }
+
+REG_CONVERTER_FOR(SimplerNMS, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["iou_threshold"] = cnnLayer->GetParamAsFloat("iou_threshold");
+    layer.getParameters()["cls_threshold"] = cnnLayer->GetParamAsFloat("cls_threshold");
+    layer.getParameters()["scale"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("scale"));
+    layer.getParameters()["min_bbox_size"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("min_bbox_size"));
+    layer.getParameters()["feat_stride"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("feat_stride"));
+    layer.getParameters()["pre_nms_topn"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("pre_nms_topn"));
+    layer.getParameters()["post_nms_topn"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("post_nms_topn"));
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_softmax_layer.cpp b/inference-engine/src/inference_engine/builders/ie_softmax_layer.cpp
index d4ccfa9f8..32cde388f 100644
--- a/inference-engine/src/inference_engine/builders/ie_softmax_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_softmax_layer.cpp
@@ -1,45 +1,52 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_softmax_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::SoftMaxLayer::SoftMaxLayer(const std::string& name): LayerFragment("SoftMax", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::SoftMaxLayer::SoftMaxLayer(const std::string& name): LayerDecorator("SoftMax", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
     setAxis(1);
 }
 
-Builder::SoftMaxLayer::SoftMaxLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "SoftMax"))
-        THROW_IE_EXCEPTION << "Cannot create SoftMaxLayer decorator for layer " << getLayer().getType();
+Builder::SoftMaxLayer::SoftMaxLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("SoftMax");
+}
+
+Builder::SoftMaxLayer::SoftMaxLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("SoftMax");
 }
 
 Builder::SoftMaxLayer& Builder::SoftMaxLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::SoftMaxLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::SoftMaxLayer& Builder::SoftMaxLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 size_t Builder::SoftMaxLayer::getAxis() const {
-    return getLayer().getParameters()["axis"].asUInt();
+    return getLayer()->getParameters().at("axis");
 }
 
 Builder::SoftMaxLayer& Builder::SoftMaxLayer::setAxis(size_t axis) {
-    getLayer().getParameters()["axis"] = axis;
+    getLayer()->getParameters()["axis"] = axis;
     return *this;
 }
+
+REG_CONVERTER_FOR(SoftMax, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["axis"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("axis", 1));
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_split_layer.cpp b/inference-engine/src/inference_engine/builders/ie_split_layer.cpp
index 50d04ddff..7c8185c06 100644
--- a/inference-engine/src/inference_engine/builders/ie_split_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_split_layer.cpp
@@ -1,53 +1,60 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_split_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::SplitLayer::SplitLayer(const std::string& name): LayerFragment("Concat", name) {
-    getLayer().getInputPorts().resize(1);
+Builder::SplitLayer::SplitLayer(const std::string& name): LayerDecorator("Split", name) {
+    getLayer()->getInputPorts().resize(1);
     setAxis(1);
 }
 
-Builder::SplitLayer::SplitLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Concat"))
-        THROW_IE_EXCEPTION << "Cannot create SplitLayer decorator for layer " << getLayer().getType();
+Builder::SplitLayer::SplitLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Split");
+}
+
+Builder::SplitLayer::SplitLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Split");
 }
 
 Builder::SplitLayer& Builder::SplitLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::SplitLayer::getInputPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 
 Builder::SplitLayer& Builder::SplitLayer::setInputPort(const Port &port) {
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 const std::vector<Port>& Builder::SplitLayer::getOutputPorts() const {
-    return getLayer().getOutputPorts();
+    return getLayer()->getOutputPorts();
 }
 
 Builder::SplitLayer& Builder::SplitLayer::setOutputPorts(const std::vector<Port>& ports) {
-    getLayer().getOutputPorts() = ports;
+    getLayer()->getOutputPorts() = ports;
     return *this;
 }
 
 size_t Builder::SplitLayer::getAxis() const {
-    return getLayer().getParameters()["axis"].asUInt();
+    return getLayer()->getParameters().at("axis");
 }
 
 Builder::SplitLayer& Builder::SplitLayer::setAxis(size_t axis) {
-    getLayer().getParameters()["axis"] = axis;
+    getLayer()->getParameters()["axis"] = axis;
     return *this;
 }
+
+REG_CONVERTER_FOR(Split, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["axis"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("axis", 1));
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/builders/ie_tanh_layer.cpp b/inference-engine/src/inference_engine/builders/ie_tanh_layer.cpp
index 37eb7eb90..eeb050388 100644
--- a/inference-engine/src/inference_engine/builders/ie_tanh_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_tanh_layer.cpp
@@ -1,35 +1,47 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_tanh_layer.hpp>
-#include <details/caseless.hpp>
 
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::TanHLayer::TanHLayer(const std::string& name): LayerFragment("TanH", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::TanHLayer::TanHLayer(const std::string& name): LayerDecorator("TanH", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
 }
 
-Builder::TanHLayer::TanHLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "TanH"))
-        THROW_IE_EXCEPTION << "Cannot create TanHLayer decorator for layer " << getLayer().getType();
+Builder::TanHLayer::TanHLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("TanH");
+}
+
+Builder::TanHLayer::TanHLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("TanH");
 }
 
 Builder::TanHLayer& Builder::TanHLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::TanHLayer::getPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::TanHLayer& Builder::TanHLayer::setPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
-}
-\ No newline at end of file
+}
+
+REG_VALIDATOR_FOR(TanH, [] (const InferenceEngine::Builder::Layer::CPtr& input_layer, bool partial) {
+    if (!input_layer->getInputPorts().empty() &&
+        !input_layer->getOutputPorts().empty() &&
+        !input_layer->getInputPorts()[0].shape().empty() &&
+        !input_layer->getOutputPorts()[0].shape().empty() &&
+        input_layer->getInputPorts()[0].shape() != input_layer->getOutputPorts()[0].shape()) {
+        THROW_IE_EXCEPTION << "Input and output ports should be equal";
+    }
+});
diff --git a/inference-engine/src/inference_engine/builders/ie_tile_layer.cpp b/inference-engine/src/inference_engine/builders/ie_tile_layer.cpp
index fade9f305..125c530a7 100644
--- a/inference-engine/src/inference_engine/builders/ie_tile_layer.cpp
+++ b/inference-engine/src/inference_engine/builders/ie_tile_layer.cpp
@@ -1,62 +1,70 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include <builders/ie_tile_layer.hpp>
-#include <details/caseless.hpp>
+#include <ie_cnn_layer_builder.h>
 
 #include <vector>
 #include <string>
 
 using namespace InferenceEngine;
 
-Builder::TileLayer::TileLayer(const std::string& name): LayerFragment("Tile", name) {
-    getLayer().getOutputPorts().resize(1);
-    getLayer().getInputPorts().resize(1);
+Builder::TileLayer::TileLayer(const std::string& name): LayerDecorator("Tile", name) {
+    getLayer()->getOutputPorts().resize(1);
+    getLayer()->getInputPorts().resize(1);
 }
 
-Builder::TileLayer::TileLayer(Layer& genLayer): LayerFragment(genLayer) {
-    if (!details::CaselessEq<std::string>()(getLayer().getType(), "Tile"))
-        THROW_IE_EXCEPTION << "Cannot create TileLayer decorator for layer " << getLayer().getType();
+Builder::TileLayer::TileLayer(const Layer::Ptr& layer): LayerDecorator(layer) {
+    checkType("Tile");
+}
+
+Builder::TileLayer::TileLayer(const Layer::CPtr& layer): LayerDecorator(layer) {
+    checkType("Tile");
 }
 
 Builder::TileLayer& Builder::TileLayer::setName(const std::string& name) {
-    getLayer().getName() = name;
+    getLayer()->setName(name);
     return *this;
 }
 
 const Port& Builder::TileLayer::getInputPort() const {
-    return getLayer().getInputPorts()[0];
+    return getLayer()->getInputPorts()[0];
 }
 
 Builder::TileLayer& Builder::TileLayer::setInputPort(const Port &port) {
-    getLayer().getInputPorts()[0] = port;
+    getLayer()->getInputPorts()[0] = port;
     return *this;
 }
 
 const Port& Builder::TileLayer::getOutputPort() const {
-    return getLayer().getOutputPorts()[0];
+    return getLayer()->getOutputPorts()[0];
 }
 
 Builder::TileLayer& Builder::TileLayer::setOutputPort(const Port &port) {
-    getLayer().getOutputPorts()[0] = port;
+    getLayer()->getOutputPorts()[0] = port;
     return *this;
 }
 
 size_t Builder::TileLayer::getTiles() const {
-    return getLayer().getParameters()["tiles"].asUInt();
+    return getLayer()->getParameters().at("tiles");
 }
 
 Builder::TileLayer& Builder::TileLayer::setTiles(size_t tiles) {
-    getLayer().getParameters()["tiles"] = tiles;
+    getLayer()->getParameters()["tiles"] = tiles;
     return *this;
 }
 
 size_t Builder::TileLayer::getAxis() const {
-    return getLayer().getParameters()["axis"].asUInt();
+    return getLayer()->getParameters().at("axis");
 }
 
 Builder::TileLayer& Builder::TileLayer::setAxis(size_t axis) {
-    getLayer().getParameters()["axis"] = axis;
+    getLayer()->getParameters()["axis"] = axis;
     return *this;
-}
-\ No newline at end of file
+}
+
+REG_CONVERTER_FOR(SoftMax, [](const CNNLayerPtr& cnnLayer, Builder::Layer& layer) {
+    layer.getParameters()["axis"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("axis"));
+    layer.getParameters()["tiles"] = static_cast<size_t>(cnnLayer->GetParamAsUInt("tiles"));
+});
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/cnn_network_impl.cpp b/inference-engine/src/inference_engine/cnn_network_impl.cpp
index 620fe342b..2918da146 100644
--- a/inference-engine/src/inference_engine/cnn_network_impl.cpp
+++ b/inference-engine/src/inference_engine/cnn_network_impl.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,6 +22,14 @@ using namespace InferenceEngine::details;
 CNNNetworkImpl::CNNNetworkImpl(): _targetDevice(TargetDevice::eDefault), _stats(new CNNNetworkStatsImpl()) {
 }
 
+CNNNetworkImpl::~CNNNetworkImpl() {
+    for (auto& data : _data) {
+        for (auto& input : data.second->getInputTo()) {
+            input.second.reset();
+        }
+    }
+}
+
 void CNNNetworkImpl::getOutputsInfo(std::map<std::string, DataPtr>& out) const noexcept {
     out = _outputData;
 }
@@ -34,6 +42,16 @@ void CNNNetworkImpl::addLayer(const CNNLayerPtr& layer) noexcept {
     _layers[layer->name] = layer;
 }
 
+void CNNNetworkImpl::removeLayer(const string& layerName) {
+    auto it = _layers.find(layerName);
+    if (it != _layers.end()) { _layers.erase(it); }
+}
+
+void CNNNetworkImpl::removeData(const string& dataName) {
+    auto it = _data.find(dataName);
+    if (it != _data.end()) { _data.erase(it); }
+}
+
 void CNNNetworkImpl::validate(int version) {
     if (version != 1) {
         std::set<std::string> layerNames;
diff --git a/inference-engine/src/inference_engine/cnn_network_impl.hpp b/inference-engine/src/inference_engine/cnn_network_impl.hpp
index d2d9ae128..87ac2e5f1 100644
--- a/inference-engine/src/inference_engine/cnn_network_impl.hpp
+++ b/inference-engine/src/inference_engine/cnn_network_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -28,6 +28,7 @@ namespace details {
 class INFERENCE_ENGINE_API_CLASS(CNNNetworkImpl) : public ICNNNetwork {
 public:
     CNNNetworkImpl();
+    ~CNNNetworkImpl() override;
     Precision getPrecision() const noexcept override {
         return precision;
     }
@@ -52,6 +53,10 @@ public:
         _inputData[data->name()] = data;
     }
 
+    void removeInputInfo(const std::string& name) {
+        _inputData.erase(name);
+    }
+
     void getName(char* pName, size_t len) const noexcept override {
         // Description buffer will preserve garbage if external pointer not initialized
         if (len < 1) return;
@@ -85,6 +90,10 @@ public:
 
     void addLayer(const CNNLayerPtr& layer) noexcept override;
 
+    void removeLayer(const std::string& layerName);
+
+    void removeData(const std::string& dataName);
+
     StatusCode getLayerByName(const char* layerName, CNNLayerPtr& out, ResponseDesc* resp) const noexcept override;
 
     // deprecated, as there is no ResponseDesc to put error message
diff --git a/inference-engine/src/inference_engine/cnn_network_int8_normalizer.cpp b/inference-engine/src/inference_engine/cnn_network_int8_normalizer.cpp
index 58dd61fc1..435c24dee 100644
--- a/inference-engine/src/inference_engine/cnn_network_int8_normalizer.cpp
+++ b/inference-engine/src/inference_engine/cnn_network_int8_normalizer.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -44,13 +44,18 @@ CNNStatisticHelper::CNNStatisticHelper(CNNNetwork &network, const std::map<std::
     NormalizeStatistic();
 }
 
-bool CNNStatisticHelper::canLayerBeQuantized(const std::string &layerName) const {
-    // TODO(amalyshe) this verification should be extended to 1) inputs 2) there might not be
-    // statistic for every and each layer, but we might go over layers to search it
-    if (internalNodesStats_.find(layerName) == internalNodesStats_.end()) {
-        return true;
+bool CNNStatisticHelper::canLayerBeQuantized(CNNLayer::Ptr layer) const {
+    // verification of existing statistic for all inputs
+    for (const auto i : layer->insData) {
+        if (internalNodesStats_.find(i.lock()->creatorLayer.lock()->name) == internalNodesStats_.end()) {
+            return false;
+        }
     }
-    return false;
+    // verification if there is a statistic for output of the layer
+    if ((layer->outData.size() > 1) && (internalNodesStats_.find(layer->name) == internalNodesStats_.end())) {
+        return false;
+    }
+    return true;
 }
 
 void CNNStatisticHelper::copyStatistics(const std::string& srcName, const std::string& dstName) {
@@ -75,13 +80,18 @@ InferenceEngine::Blob::Ptr CNNStatisticHelper::getInputScale(CNNLayer::Ptr layer
     std::string inputLayerName = previousLayer->name;
 
     // for case when we have the only average pooling before, we need to take this
-    // statistic from input of avg pooloing to compensate work of average pooling
+    // statistic from input of avg pooling to compensate work of average pooling
     // and to stay in int8 as much as we can
     if (previousLayer->type == "Pooling" && (previousLayer->precision == Precision::I8 || previousLayer->precision == Precision::U8)) {
         // take input name to the pooling
         inputLayerName = previousLayer->insData[0].lock()->creatorLayer.lock()->name;
     }
     size_t inputChannels = layer->insData[0].lock()->getTensorDesc().getDims()[1];
+    if (getStatistic(previousLayer)->_minOutputs.size() != inputChannels
+        || getStatistic(previousLayer)->_maxOutputs.size() != inputChannels) {
+        THROW_IE_EXCEPTION << "min and max sizes should be equal to input channels count for " << previousLayer->name;
+    }
+
     return calculateScaleFactor(inputChannels, getStatistic(previousLayer),
                                 hasNegativeOutput(previousLayer->name) ? maxSign_ : maxUnsign_);
 }
@@ -90,8 +100,13 @@ InferenceEngine::Blob::Ptr CNNStatisticHelper::getOutputScale(CNNLayer::Ptr laye
     // TODO(amalyshe) for now we are looking to precision on the data node
     size_t outputChannels = layer->outData[0]->getTensorDesc().getDims()[1];
     if (layer->outData.size() != 1) {
-        THROW_IE_EXCEPTION << "Trying to get scales after layer having multiple ouptut ports";
+        THROW_IE_EXCEPTION << "Trying to get scales after layer having multiple output ports";
+    }
+    if (getStatistic(layer)->_minOutputs.size() != outputChannels
+        || getStatistic(layer)->_maxOutputs.size() != outputChannels) {
+        THROW_IE_EXCEPTION << "min and max sizes should be equal to output channels count for " << layer->name;
     }
+
     return calculateScaleFactor(outputChannels, getStatistic(layer),
                                 layer->outData[0]->getPrecision() == Precision::I8 ? maxSign_ : maxUnsign_);
 }
@@ -139,7 +154,8 @@ NetworkNodeStatsPtr CNNStatisticHelper::getStatistic(CNNLayer::Ptr layer) const
 
 CNNLayer::Ptr CNNStatisticHelper::getLatestInFuse(CNNLayer::Ptr layer) const {
     if (layer->outData[0]->inputTo.size() == 1 &&
-        CaselessEq<std::string>()(layer->outData[0]->inputTo.begin()->second->type, "relu")) {
+        (CaselessEq<std::string>()(layer->outData[0]->inputTo.begin()->second->type, "relu") ||
+         CNNNetworkInt8Normalizer::isReLULikeClamp(layer->outData[0]->inputTo.begin()->second)))  {
         return layer->outData[0]->inputTo.begin()->second;
     }
     // Conv-Sum-ReLU fuse
@@ -164,14 +180,16 @@ CNNLayer::Ptr CNNStatisticHelper::getLatestInFuse(CNNLayer::Ptr layer) const {
         } else {
             // look to the ports of eltwise
             if (eltwise->insData[1].lock()->creatorLayer.lock() == layer &&
-                CaselessEq<std::string>()(eltwise->insData[0].lock()->creatorLayer.lock()->type, "convolution")) {
+                CaselessEq<std::string>()(eltwise->insData[0].lock()->creatorLayer.lock()->type, "convolution") &&
+                eltwise->insData[0].lock()->inputTo.size() == 1) {
                 // this is a case when two convolutions come to eltwise, the second one will be selected for fuse,
                 // first will be used as sum operator
                 return layer;
             }
             // given layer is a convolution and will be used for fuse, but we need to verify if there is ReLU after eltwise
             if (eltwise->outData[0]->inputTo.size() == 1 &&
-                CaselessEq<std::string>()(eltwise->outData[0]->inputTo.begin()->second->type, "relu")) {
+                (CaselessEq<std::string>()(eltwise->outData[0]->inputTo.begin()->second->type, "relu") ||
+                 CNNNetworkInt8Normalizer::isReLULikeClamp(eltwise->outData[0]->inputTo.begin()->second))) {
                 return eltwise->outData[0]->inputTo.begin()->second;
             }
             return eltwise;
@@ -202,6 +220,7 @@ void CNNStatisticHelper::NormalizeStatistic() {
         for (auto i : l->insData) {
             if (newMap.find(i.lock()->creatorLayer.lock()->name) == newMap.end()) {
                 allInputsHaveStatistics = false;
+                break;
             }
         }
         // if we do not have statistic - verify who is consumer of this layer
@@ -211,12 +230,18 @@ void CNNStatisticHelper::NormalizeStatistic() {
                     if (CaselessEq<std::string>()(it.second->type, "scaleshift") ||
                         CaselessEq<std::string>()(it.second->type, "convolution")) {
                         isStarterLayer = true;
+                        break;
                     }
                 }
             }
         } else {
             isStarterLayer = true;
         }
+        if (CaselessEq<std::string>()(l->type, "scaleshift") ||
+            CaselessEq<std::string>()(l->type, "convolution")) {
+            isStarterLayer = true;
+        }
+
         if (!isStarterLayer) {
             continue;
         }
@@ -230,8 +255,11 @@ void CNNStatisticHelper::NormalizeStatistic() {
 
         bool perChannelScale = true;
 
+
         if (CaselessEq<std::string>()(l->type, "concat")
-            && l->outData.size() == 1 && l->outData[0]->getTensorDesc().getDims().size() == 4) {
+            && l->outData.size() == 1
+            && l->outData[0]->getTensorDesc().getDims().size() == 4
+            && allInputsHaveStatistics) {
             size_t concatLayerIdx = 0;
             for (int k = 0; k < l->insData.size(); k++) {
                 auto prevKLayer = l->insData[k].lock()->creatorLayer.lock();
@@ -246,11 +274,28 @@ void CNNStatisticHelper::NormalizeStatistic() {
                     THROW_IE_EXCEPTION << "We have incomplete statistic for predecessors of concat layer " << l->name;
                 }
             }
+        } else if (CaselessEq<std::string>()(l->type, "resample")) {
+            if (l->insData.size() == 1) {
+                CNNLayerPtr creator = l->insData[0].lock()->getCreatorLayer().lock();
+                if (CaselessEq<std::string>()(creator->type, "concat")) {
+                    auto concatStat = newMap[creator->name];
+                    currentStat->_maxOutputs = concatStat->_maxOutputs;
+                    currentStat->_minOutputs = concatStat->_minOutputs;
+                    newMap[l->name] = currentStat;
+                } else {
+                    auto itOld = internalNodesStats_.find(l->name);
+                    if (itOld != internalNodesStats_.end()) {
+                        currentStat->_maxOutputs = itOld->second->_maxOutputs;
+                        currentStat->_minOutputs = itOld->second->_minOutputs;
+                        newMap[l->name] = currentStat;
+                    }
+                }
+            }
         } else {
             // go over all children until we get convoluition, scaleshift, eltwise or unknown layer
             // layers Pooling and ReLU are passthrough
             // to understand the granularity of the scaling
-            // layer concat is a lyer which produce statistics and waterfall it down
+            // layer concat is a layer which produce statistics and waterfall it down
             std::vector<CNNLayer::Ptr> toAnalyze;
             for (auto it : l->outData[0]->inputTo) {
                 toAnalyze.push_back(it.second);
@@ -264,6 +309,7 @@ void CNNStatisticHelper::NormalizeStatistic() {
                 toAnalyze.pop_back();
                 if (CaselessEq<std::string>()(tl->type, "pooling") ||
                     CaselessEq<std::string>()(tl->type, "relu") ||
+                    CNNNetworkInt8Normalizer::isReLULikeClamp(tl) ||
                     CaselessEq<std::string>()(tl->type, "concat")) {
                     if (tl->outData.size() == 1) {
                         for (auto it : tl->outData[0]->inputTo) {
@@ -282,37 +328,61 @@ void CNNStatisticHelper::NormalizeStatistic() {
             }
 
             auto itOld = internalNodesStats_.find(getLatestInFuse(l)->name);
+            if (itOld == internalNodesStats_.end()) {
+                itOld = internalNodesStats_.find(l->name);
+            }
             if (itOld != internalNodesStats_.end()) {
-                currentStat->_maxOutputs = itOld->second->_maxOutputs;
-                currentStat->_minOutputs = itOld->second->_minOutputs;
-
                 if (!perChannelScale) {
-                    float min = FLT_MAX;
-                    float max = FLT_MIN;
+                    currentStat->_maxOutputs.resize(itOld->second->_maxOutputs.size());
                     if (!itOld->second->_maxOutputs.empty()) {
+                        float max = FLT_MIN;
                         DataStats::GetDataAbsMax(&itOld->second->_maxOutputs[0], itOld->second->_maxOutputs.size(), max);
                         std::fill(currentStat->_maxOutputs.begin(), currentStat->_maxOutputs.end(), max);
                     }
+
+                    currentStat->_minOutputs.resize(itOld->second->_minOutputs.size());
                     if (!itOld->second->_minOutputs.empty()) {
+                        float min = FLT_MAX;
                         DataStats::GetDataMinMax(&itOld->second->_minOutputs[0], itOld->second->_minOutputs.size(), min, dummy);
                         std::fill(currentStat->_minOutputs.begin(), currentStat->_minOutputs.end(), min);
                     }
+                } else {
+                    currentStat->_maxOutputs = itOld->second->_maxOutputs;
+                    currentStat->_minOutputs = itOld->second->_minOutputs;
+                }
+            }
+
+
+            if (l->outData.size() == 1) {
+                size_t outputChannels = l->outData[0]->getTensorDesc().getDims()[1];
+                auto oldStat = internalNodesStats_.find(l->name);
+                if ((oldStat != internalNodesStats_.end()) && outputChannels > 1 && oldStat->second->_minOutputs.size() == 1) {
+                    auto min = oldStat->second->_minOutputs[0];
+                    auto max = oldStat->second->_maxOutputs[0];
+
+                    currentStat->_minOutputs = std::vector<float>(outputChannels);
+                    currentStat->_maxOutputs = std::vector<float>(outputChannels);
+                    std::fill(currentStat->_minOutputs.begin(), currentStat->_minOutputs.end(), min);
+                    std::fill(currentStat->_maxOutputs.begin(), currentStat->_maxOutputs.end(), max);
                 }
             }
         }
 
         // propagate this statistic to all layers without scale in primitives
-        std::vector<CNNLayer::Ptr> toAnalyze;
-        toAnalyze.push_back(l);
-        while (!toAnalyze.empty()) {
-            CNNLayer::Ptr tl = toAnalyze.back();
-            toAnalyze.pop_back();
-            newMap[tl->name] = currentStat;
-            if (tl->outData.size() == 1) {
-                for (auto it : tl->outData[0]->inputTo) {
-                    if (CaselessEq<std::string>()(it.second->type, "pooling") ||
-                        CaselessEq<std::string>()(it.second->type, "relu")) {
-                        toAnalyze.push_back(it.second);
+        if (!currentStat->_maxOutputs.empty() && !currentStat->_minOutputs.empty()) {
+            std::vector<CNNLayer::Ptr> toAnalyze;
+            toAnalyze.push_back(l);
+            while (!toAnalyze.empty()) {
+                CNNLayer::Ptr tl = toAnalyze.back();
+                toAnalyze.pop_back();
+                newMap[tl->name] = currentStat;
+                if (tl->outData.size() == 1) {
+                    for (auto it : tl->outData[0]->inputTo) {
+                        if (CaselessEq<std::string>()(it.second->type, "pooling") ||
+                                CaselessEq<std::string>()(it.second->type, "relu") ||
+                                CNNNetworkInt8Normalizer::isReLULikeClamp(it.second)) {
+                            toAnalyze.push_back(it.second);
+                        }
                     }
                 }
             }
@@ -490,8 +560,9 @@ void CNNNetworkInt8Normalizer::AddScaleShifts(CNNNetwork& net, CNNStatisticHelpe
             for (auto nextIter : iter->outData[l1_out_i]->inputTo) {
                 CNNLayer::Ptr next = nextIter.second;
 
-                // Checking for an INT8 convolution with FP32 output
-                if (iter->type == "Convolution" &&
+                // Checking for an INT8 convolution or fully connected with FP32 output
+                if ((CaselessEq<std::string>()(iter->type, "Convolution") ||
+                     CaselessEq<std::string>()(iter->type, "FullyConnected")) &&
                     iter->precision == Precision::I8 &&
                     next->precision == Precision::FP32 &&
                     iter->outData[l1_out_i]->getPrecision() == Precision::FP32) {
@@ -511,6 +582,29 @@ void CNNNetworkInt8Normalizer::AddScaleShifts(CNNNetwork& net, CNNStatisticHelpe
     }
 }
 
+void CNNNetworkInt8Normalizer::ClampsToReLU(CNNNetwork& net, CNNStatisticHelper& statHelper) {
+    std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(net);
+
+    for (auto iter : sortedLayers) {
+        if (isReLULikeClamp(iter) && (iter->precision == Precision::I8 || iter->precision == Precision::U8)) {
+            std::string layerName = iter->name + "_ReLU";
+            LayerParams ssCnnLayerParams{ layerName, "ReLU", iter->precision };
+            CNNLayerPtr ssCnnLayer(new ReLULayer(ssCnnLayerParams));
+
+            auto previousLayer = iter->insData[0].lock()->creatorLayer.lock();
+            ssCnnLayer->insData.push_back(iter->insData[0]);
+            ssCnnLayer->insData[0].lock()->inputTo.erase(iter->name);
+            ssCnnLayer->insData[0].lock()->inputTo[iter->name] = ssCnnLayer;
+
+            ssCnnLayer->outData.push_back(iter->outData[0]);
+            ssCnnLayer->outData[0]->creatorLayer = ssCnnLayer;
+
+            iter->insData.clear();
+            iter->outData.clear();
+        }
+    }
+}
+
 void CNNNetworkInt8Normalizer::ScaleDataToInt(const float* srcData, size_t srcSize, Blob::Ptr int8blob, const std::vector<float>& scales) {
     if (scales.size() == 0 || /*srcblob->size()*/srcSize % scales.size() != 0) {
         THROW_IE_EXCEPTION << "Wrong number of scale factors";
@@ -659,31 +753,35 @@ void CNNNetworkInt8Normalizer::replaceScaleShiftByDWConvolution(CNNNetwork &net)
             && layer->insData[0].lock()->creatorLayer.lock()
             && !CaselessEq<std::string>()(layer->insData[0].lock()->creatorLayer.lock()->type, "input")
             && layer->outData[0]->inputTo.size() > 0) {
-            // verification if this layer does not pass data to PriorBox, if it passes, we do not substitute
-            bool notToPriorBox = true;
-            for (auto o : layer->outData[0]->inputTo) {
-                if (CaselessEq<std::string>()(o.second->type, "priorbox") ||
-                    CaselessEq<std::string>()(o.second->type, "priorboxclustered")) {
-                    notToPriorBox = false;
+            const auto dims = layer->insData[0].lock()->getTensorDesc().getDims();
+            // only four or five dimensions Convolution layers are supported
+            if ((dims.size() == 4) || (dims.size() == 5)) {
+                // verification if this layer does not pass data to PriorBox, if it passes, we do not substitute
+                bool notToPriorBox = true;
+                for (auto o : layer->outData[0]->inputTo) {
+                    if (CaselessEq<std::string>()(o.second->type, "priorbox") ||
+                        CaselessEq<std::string>()(o.second->type, "priorboxclustered")) {
+                        notToPriorBox = false;
+                    }
+                }
+                if (notToPriorBox) {
+                    ScaleShiftLayer *pSS = dynamic_cast<ScaleShiftLayer *>(layer.get());
+                    float *ssWValues = pSS->_weights->buffer().as<float *>();
+                    float *ssSValues = pSS->_biases->buffer().as<float *>();
+                    CNNLayer::Ptr newLayer = createDWConvolutionForScale(layer->name, layer->outData[0]->getTensorDesc().getDims()[1], ssWValues, ssSValues);
+
+                    newLayer->outData = layer->outData;
+                    newLayer->outData[0]->creatorLayer = newLayer;
+                    newLayer->insData = layer->insData;
+                    newLayer->insData[0].lock()->inputTo.erase(layer->name);
+                    newLayer->insData[0].lock()->inputTo[newLayer->name] = newLayer;
                 }
-            }
-            if (notToPriorBox) {
-                ScaleShiftLayer *pSS = dynamic_cast<ScaleShiftLayer *>(layer.get());
-                float *ssWValues = pSS->_weights->buffer().as<float *>();
-                float *ssSValues = pSS->_biases->buffer().as<float *>();
-                CNNLayer::Ptr newLayer = createDWConvolutionForScale(layer->name, layer->outData[0]->getTensorDesc().getDims()[1], ssWValues, ssSValues);
-
-                newLayer->outData = layer->outData;
-                newLayer->outData[0]->creatorLayer = newLayer;
-                newLayer->insData = layer->insData;
-                newLayer->insData[0].lock()->inputTo.erase(layer->name);
-                newLayer->insData[0].lock()->inputTo[newLayer->name] = newLayer;
             }
         }
     }
 }
 
-void CNNNetworkInt8Normalizer::QuantizeConvolution(CNNLayer::Ptr convolution,
+void CNNNetworkInt8Normalizer::QuantizeConvolutionOrFullyConnected(CNNLayer::Ptr convolution,
                                                     CNNStatisticHelper& statHelper) {
     size_t inputChannels = convolution->insData[0].lock()->getTensorDesc().getDims()[1];
     size_t outputChannels = convolution->outData[0]->getTensorDesc().getDims()[1];
@@ -725,20 +823,27 @@ void CNNNetworkInt8Normalizer::QuantizeConvolution(CNNLayer::Ptr convolution,
     if (weights) {
         const float *weight = static_cast<const float *>(weights->buffer());
 
-        ConvolutionLayer *pConv = dynamic_cast<ConvolutionLayer *>(convolution.get());
-        if (pConv->_group == 0) {
+        WeightableLayer *pConv = dynamic_cast<WeightableLayer *>(convolution.get());
+        ConvolutionLayer *pConv1 = dynamic_cast<ConvolutionLayer *>(convolution.get());
+
+        if (pConv1 != nullptr && pConv1->_group == 0) {
             THROW_IE_EXCEPTION << "Convolution '" << convolution->name << "'has wrong groups number == 0";
         }
+        int group = 1;
+        if (pConv1 != nullptr && pConv1->_group != 1) {
+            group = pConv1->_group;
+        }
+
 
         std::vector<float> newWeights;  // "new" weights are weights multiplied by i-scale
 
-        size_t W_CO = outputChannels / pConv->_group,
-        W_CI = inputChannels / pConv->_group,
-        W_HW = weights->size()/ W_CI / W_CO / pConv->_group;
+        size_t W_CO = outputChannels / group,
+        W_CI = inputChannels / group,
+        W_HW = weights->size()/ W_CI / W_CO / group;
 
         {
             float *iScaleMemory = static_cast<float *>(iScale->buffer());
-            for (size_t g = 0; g < pConv->_group; g++) {
+            for (size_t g = 0; g < group; g++) {
                 for (size_t co = 0; co < W_CO; co++) {
                     for (size_t ci = 0; ci < W_CI; ci++) {
                         size_t kernelBase = g * W_CO * W_CI * W_HW + co * W_CI * W_HW + ci * W_HW;
@@ -749,7 +854,7 @@ void CNNNetworkInt8Normalizer::QuantizeConvolution(CNNLayer::Ptr convolution,
                 }
             }
         }
-        size_t outChannelSize = weights->dims()[0] / W_CO / pConv->_group;
+        size_t outChannelSize = weights->dims()[0] / W_CO / group;
 
         // Calculating weights normalization scale factor (w-scale)
         float *weight_convolution;
@@ -790,9 +895,27 @@ void CNNNetworkInt8Normalizer::QuantizeConvolution(CNNLayer::Ptr convolution,
     }
 }
 
-void CNNNetworkInt8Normalizer::returnTailToFP32(CNNLayer::Ptr layer) {
+bool CNNNetworkInt8Normalizer::layerProducesFloat(const CNNLayer::Ptr layer) {
+    // currently we support only case of layers which have one output port
+    if (layer->outData.size() > 1) {
+        return false;
+    }
+
+    bool consumersFP32 = true;
+    for (const auto dOut : layer->outData[0]->inputTo) {
+        if (dOut.second->precision != Precision::FP32) {
+            consumersFP32 = false;
+        }
+    }
+    return consumersFP32;
+}
+
+void CNNNetworkInt8Normalizer::returnTailToFP32(const CNNLayer::Ptr layer) {
     std::set<CNNLayer::Ptr> layersToReturn;
-    layersToReturn.insert(layer);
+    if (layerProducesFloat(layer)) {
+        layersToReturn.insert(layer);
+    }
+
     while (!layersToReturn.empty()) {
         CNNLayer::Ptr layerA = *layersToReturn.begin();
         layersToReturn.erase(layerA);
@@ -806,29 +929,31 @@ void CNNNetworkInt8Normalizer::returnTailToFP32(CNNLayer::Ptr layer) {
         }
 
         if ((CaselessEq<std::string>()(layerA->type, "convolution")
-            || CaselessEq<std::string>()(layerA->type, "relu")) &&
+            || CaselessEq<std::string>()(layerA->type, "fullyconnected")
+            || CaselessEq<std::string>()(layerA->type, "relu")
+            || isReLULikeClamp(layerA)) &&
             layerA->outData.size() == 1) {
             layerA->outData[0]->setPrecision(Precision::FP32);
+            if (CaselessEq<std::string>()(layerA->type, "relu")
+                && isNextFusionAllowed(layerA->insData[0].lock()->creatorLayer.lock())) {
+                layerA->precision = Precision::FP32;
+                layerA->insData[0].lock()->creatorLayer.lock()->outData[0]->setPrecision(Precision::FP32);
+            }
         }
 
 
         // adding parents for analysis
-        if (!CaselessEq<std::string>()(layerA->type, "convolution")) {
-            // for all parrents, if they produce data to only FP32 layers
+        if (!CaselessEq<std::string>()(layerA->type, "convolution") &&
+            !CaselessEq<std::string>()(layerA->type, "fullyconnected")) {
+            // for all parents, if they produce data to only FP32 layers
             for (auto i : layerA->insData) {
                 DataPtr d = i.lock();
                 if (d->creatorLayer.lock()->precision != Precision::FP32
                     && (CaselessEq<std::string>()(layerA->type, "pooling")
                         || CaselessEq<std::string>()(layerA->type, "relu")
+                        || isReLULikeClamp(layerA)
                         || CaselessEq<std::string>()(layerA->type, "concat"))) {
-                    // check if layer produce to only FP32
-                    bool consumersFP32 = true;
-                    for (auto dOut : d->inputTo) {
-                        if (dOut.second->precision != Precision::FP32) {
-                            consumersFP32 = false;
-                        }
-                    }
-                    if (consumersFP32) {
+                    if (layerProducesFloat(d->creatorLayer.lock())) {
                         layersToReturn.insert(d->creatorLayer.lock());
                     }
                 }
@@ -837,8 +962,8 @@ void CNNNetworkInt8Normalizer::returnTailToFP32(CNNLayer::Ptr layer) {
     }
 }
 
-bool CNNNetworkInt8Normalizer::isNextFusionAllowed(CNNLayer::Ptr layer) const {
-    // fusion can happen only if initial layer supplys data to only one layer
+bool CNNNetworkInt8Normalizer::isNextFusionAllowed(const CNNLayer::Ptr& layer) {
+    // fusion can happen only if initial layer supplies data to only one layer
     // if it sends to several layers - it is safe to execute initial layer in any precision
     if (layer->outData[0]->inputTo.size() == 1) {
         std::string aType = layer->outData[0]->inputTo.begin()->second->type;
@@ -847,6 +972,10 @@ bool CNNNetworkInt8Normalizer::isNextFusionAllowed(CNNLayer::Ptr layer) const {
             if (rL->negative_slope != 0.f) {
                 return false;
             }
+        } else if (CaselessEq<std::string>()(aType, "clamp")) {
+            if (!isReLULikeClamp(layer->outData[0]->inputTo.begin()->second)) {
+                return false;
+            }
         } else {
             static const InferenceEngine::details::caseless_set<std::string> nonSuportedActivations =
             {"elu", "clamp", "tanh", "logistic", "square", "abs",
@@ -857,6 +986,17 @@ bool CNNNetworkInt8Normalizer::isNextFusionAllowed(CNNLayer::Ptr layer) const {
     return true;
 }
 
+bool CNNNetworkInt8Normalizer::isReLULikeClamp(CNNLayer::Ptr layer) {
+    if (CaselessEq<std::string>()(layer->type, "Clamp")) {
+        ClampLayer *clamp = dynamic_cast<ClampLayer *>(layer.get());
+        if (clamp == nullptr) {
+            THROW_IE_EXCEPTION << "Int8 Normalizer error: cannot cast layer '" << layer->name << "' to Clamp";
+        }
+        return clamp->min_value == 0;
+    }
+    return false;
+}
+
 void CNNNetworkInt8Normalizer::DefinesExecutionPrecision(CNNNetwork &net, CNNStatisticHelper &statHelper) {
     std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(net);
 
@@ -866,30 +1006,39 @@ void CNNNetworkInt8Normalizer::DefinesExecutionPrecision(CNNNetwork &net, CNNSta
             continue;
         }
 
-        if (statHelper.canLayerBeQuantized(iter->name)) {
+        // Legacy: FullyConnected should not be converted to Int8,
+        // if it isn't explicitly marked to.
+        if (iter->params.find("quantization_level") == iter->params.end() && CaselessEq<std::string>()(iter->type, "fullyconnected")) {
+            continue;
+        }
+
+        if (!statHelper.canLayerBeQuantized(iter)) {
             continue;
         }
 
-        if (CaselessEq<std::string>()(iter->type, "convolution")) {
+        if (CaselessEq<std::string>()(iter->type, "convolution") ||
+            CaselessEq<std::string>()(iter->type, "fullyconnected")) {
             if (isNextFusionAllowed(iter)) {
                 iter->precision = Precision::I8;
                 // we will override I8 to U8 during analysing of Conv-ReLU and Conv-Sum-ReLU fusions
                 iter->outData[0]->setPrecision(Precision::I8);
             }
-        } else if (CaselessEq<std::string>()(iter->type, "relu")) {
+        } else if (CaselessEq<std::string>()(iter->type, "relu") ||
+                   isReLULikeClamp(iter)) {
             // casting to ReLU
             ReLULayer *rL = dynamic_cast<ReLULayer *>(iter.get());
             DataPtr outData = iter->outData.size() ? iter->outData[0] : nullptr;
             if (iter->insData[0].lock()->creatorLayer.lock()->precision != Precision::FP32
                 && outData->getPrecision() == Precision::FP32) {
                 iter->precision = Precision::I8;
-                if (rL->negative_slope != 0.0f) {
+                if (rL != nullptr && rL->negative_slope != 0.0f) {
                     outData->setPrecision(Precision::I8);
                 } else {
                     outData->setPrecision(Precision::U8);
                     // if convolution is a predecessor, change its data to U8 also
                     CNNLayer::Ptr prevLayer = iter->insData[0].lock()->creatorLayer.lock();
-                    if (prevLayer && CaselessEq<std::string>()(prevLayer->type, "convolution")) {
+                    if (prevLayer && (CaselessEq<std::string>()(prevLayer->type, "convolution") ||
+                                      CaselessEq<std::string>()(prevLayer->type, "fullyconnected"))) {
                         iter->insData[0].lock()->setPrecision(Precision::U8);
                     }
                     // if there is a patter A0 -> Eltwise -> ReLU and Convolution -> Eltwise -> ReLU,
@@ -916,9 +1065,12 @@ void CNNNetworkInt8Normalizer::DefinesExecutionPrecision(CNNNetwork &net, CNNSta
             }
         } else if (CaselessEq<std::string>()(iter->type, "pooling")) {
             auto pool = dynamic_cast<PoolingLayer *>(iter.get());
-            if (pool && (pool->_type == PoolingLayer::MAX
-                         || (pool->_type == PoolingLayer::AVG
-                             && pool->outData.size() == 1))) {
+            if (pool == nullptr) {
+                THROW_IE_EXCEPTION << "Int8 Normalizer error: cannot cast layer '" << iter->name << "' to pooling";
+            }
+
+            if (pool->_type == PoolingLayer::MAX ||
+                (pool->_type == PoolingLayer::AVG && pool->outData.size() == 1)) {
                 auto prevLayer = iter->insData[0].lock()->creatorLayer.lock();
                 if (prevLayer && (prevLayer->precision == Precision::I8 || prevLayer->precision == Precision::U8)) {
                     iter->precision = Precision::I8;
@@ -1041,7 +1193,7 @@ void CNNNetworkInt8Normalizer::DefinesExecutionPrecision(CNNNetwork &net, CNNSta
                         iter->precision = Precision::I8;
                         iter->outData[0]->setPrecision(Precision::I8);
                         // calculate the only scale
-                        Blob::Ptr sumLayerScales = statHelper.getOutputScale(sumLayer);
+                        Blob::Ptr sumLayerScales = statHelper.getOutputScale(statHelper.getLatestInFuse(sumLayer));
                         Blob::Ptr convLayerScales = statHelper.getOutputScale(statHelper.getLatestInFuse(convLayer));
                         float *sumScale = sumLayerScales->buffer().as<float *>();
                         float *convScale = convLayerScales->buffer().as<float *>();
@@ -1055,20 +1207,27 @@ void CNNNetworkInt8Normalizer::DefinesExecutionPrecision(CNNNetwork &net, CNNSta
             } else {
                 // if there are convolutions are inputs to this eltwise, we forcedly move them to FP32
                 for (auto i : iter->insData) {
-                    if (CaselessEq<std::string>()(i.lock()->creatorLayer.lock()->type, "convolution")) {
+                    auto type = i.lock()->creatorLayer.lock()->type;
+                    if (CaselessEq<std::string>()(type, "convolution") ||
+                        CaselessEq<std::string>()(type, "fullyconnected")) {
                         i.lock()->creatorLayer.lock()->precision = Precision::FP32;
                         i.lock()->setPrecision(Precision::FP32);
                     }
                 }
             }
+        } else if (CaselessEq<std::string>()(iter->type, "resample")) {
+            iter->precision = Precision::I8;
+            iter->outData[0]->setPrecision(iter->insData[0].lock()->getPrecision());
         }
     }
 
     // quantization of weights/biases
     sortedLayers = CNNNetSortTopologically(net);
     for (auto iter : sortedLayers) {
-        if (iter->precision == Precision::I8 && CaselessEq<std::string>()(iter->type, "convolution")) {
-            QuantizeConvolution(iter, statHelper);
+        if (iter->precision == Precision::I8 &&
+                (CaselessEq<std::string>()(iter->type, "convolution") ||
+                 CaselessEq<std::string>()(iter->type, "fullyconnected"))) {
+            QuantizeConvolutionOrFullyConnected(iter, statHelper);
         }
     }
 
@@ -1080,8 +1239,8 @@ void CNNNetworkInt8Normalizer::DefinesExecutionPrecision(CNNNetwork &net, CNNSta
         if (iter->precision == Precision::I8
             && iter->outData.size() == 1) {
             if ((iter->outData[0]->inputTo.size() == 1
-                                               && iter->outData[0]->inputTo.begin()->second->precision == Precision::FP32)
-                                              || iter->outData[0]->inputTo.size() == 0) {
+                && iter->outData[0]->inputTo.begin()->second->precision == Precision::FP32)
+                || iter->outData[0]->inputTo.size() == 0) {
                 returnTailToFP32(iter);
             }
         }
@@ -1091,8 +1250,6 @@ void CNNNetworkInt8Normalizer::DefinesExecutionPrecision(CNNNetwork &net, CNNSta
 void CNNNetworkInt8Normalizer::PropagateScaleFactors(CNNNetwork& net, const CNNStatisticHelper& statHelper) {
     std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(net);
 
-    std::vector<CNNLayer::Ptr> oScaleLayers;
-
     // Moving o-scales down
     for (auto iter : sortedLayers) {
         if (iter->type == "Concat" && iter->precision == Precision::I8) {
@@ -1143,7 +1300,10 @@ void CNNNetworkInt8Normalizer::PropagateScaleFactors(CNNNetwork& net, const CNNS
             if (iter->outData.size() == 1) {
                 for (auto l : iter->outData[0]->inputTo) {
                     if (l.second->precision == Precision::I8 || l.second->precision == Precision::U8) {
-                        if (l.second->type == "Pooling" || l.second->type == "ReLU") {
+                        if (CaselessEq<std::string>()(l.second->type, "Pooling") ||
+                            CaselessEq<std::string>()(l.second->type, "ReLU") ||
+                            CNNNetworkInt8Normalizer::isReLULikeClamp(l.second)
+                        ) {
                             l.second->blobs["o-scale"] = iter->blobs["o-scale"];
                             // debug scales. Need to compare with actual values in FP32 scoring
                             l.second->blobs["ext-scale"] = l.second->blobs["o-scale"];
@@ -1156,6 +1316,25 @@ void CNNNetworkInt8Normalizer::PropagateScaleFactors(CNNNetwork& net, const CNNS
                                 l.second->blobs["o-scale"] = iter->blobs["o-scale"];
                             }
                             int8Consumers++;
+                        } else if ((l.second->precision == Precision::I8 || l.second->precision == Precision::U8) &&
+                                   CaselessEq<std::string>()(l.second->type, "Resample")) {
+                            // If resample has concat as input layer it should inherit it's
+                            // output scale
+                            if (l.second->insData.size() == 1) {
+                                CNNLayerPtr creator = l.second->insData[0].lock()->creatorLayer.lock();
+                                if (CaselessEq<std::string>()(creator->type, "Concat")) {
+                                    l.second->blobs["o-scale"] = creator->blobs["o-scale"];
+                                    l.second->blobs["i-concat-scale"] = l.second->blobs["o-scale"];
+                                }
+                            }
+
+                            // No concat found, let use statistics
+                            if (l.second->blobs.find("o-scale") == l.second->blobs.end()) {
+                                auto oScale = statHelper.getOutputScale(l.second);
+                                l.second->blobs["o-scale"] = oScale;
+                                l.second->blobs["i-concat-scale"] = l.second->blobs["o-scale"];
+                            }
+                            int8Consumers++;
                         } else if ((l.second->precision == Precision::I8) &&
                             CaselessEq<std::string>()(l.second->type, "concat")) {
                             // if concat is i8, we can propagate oscale further to concat.
@@ -1181,7 +1360,8 @@ void CNNNetworkInt8Normalizer::PropagateScaleFactors(CNNNetwork& net, const CNNS
                     fp32Consumers++;
                 }
 
-                if (CaselessEq<std::string>()(iter->type, "Convolution")) {
+                if (CaselessEq<std::string>()(iter->type, "Convolution") ||
+                    CaselessEq<std::string>()(iter->type, "FullyConnected")) {
                     if (int8Consumers) {
                         iter->blobs["oi-scale"] = iter->blobs["o-scale"];
                     } else {
@@ -1227,9 +1407,10 @@ void CNNNetworkInt8Normalizer::PropagateScaleFactors(CNNNetwork& net, const CNNS
                     && curLayer->insData[0].lock()->creatorLayer.lock()->outData.size() == 1
                     && curLayer->insData[0].lock()->inputTo.size() == 1) {
                     curLayer = curLayer->insData[0].lock()->creatorLayer.lock();
-                    if (curLayer->type != "Pooling"
-                        && curLayer->type != "ReLU"
-                        && curLayer->type != "Convolution") {
+                    if (!CaselessEq<std::string>()(curLayer->type, "Pooling")
+                        && !CaselessEq<std::string>()(curLayer->type, "ReLU")
+                        && !isReLULikeClamp(curLayer)
+                        && !CaselessEq<std::string>()(curLayer->type, "Convolution")) {
                         eliminateOScale = false;
                     }
                 } else {
@@ -1309,6 +1490,7 @@ void CNNNetworkInt8Normalizer::NormalizeNetwork(ICNNNetwork& network, ICNNNetwor
 
     DefinesExecutionPrecision(cnnn, statHelper);
     PropagateScaleFactors(cnnn, statHelper);
+    ClampsToReLU(cnnn, statHelper);
     AddScaleShifts(cnnn, statHelper);
 #ifndef NDEBUG
     std::ofstream file("i8_normalized.dot");
diff --git a/inference-engine/src/inference_engine/cnn_network_int8_normalizer.hpp b/inference-engine/src/inference_engine/cnn_network_int8_normalizer.hpp
index 69e94b1a6..4e0b658b0 100644
--- a/inference-engine/src/inference_engine/cnn_network_int8_normalizer.hpp
+++ b/inference-engine/src/inference_engine/cnn_network_int8_normalizer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -45,7 +45,7 @@ public:
     * Returns if we can quantize layer basing on information of existing statistic before and after
     * layers
     */
-    bool canLayerBeQuantized(const std::string &layerName) const;
+    bool canLayerBeQuantized(CNNLayer::Ptr layer) const;
 
     /**
      * The topology is allowed to be changed, we need to modify statistic accordingly
@@ -163,15 +163,15 @@ private:
 
 public:
     /** main function for calling of quantization */
-    void NormalizeNetwork(ICNNNetwork& network, ICNNNetworkStats& netStats);
+    static void NormalizeNetwork(ICNNNetwork& network, ICNNNetworkStats& netStats);
 
 protected:
     /** Helper function to add scaleshifts and other layers for transformatin of topology */
-    void AddLayerToCNNNetworkBeforeLayer(CNNLayer::Ptr newLayer, CNNLayer::Ptr successor, size_t port);
+    static void AddLayerToCNNNetworkBeforeLayer(CNNLayer::Ptr newLayer, CNNLayer::Ptr successor, size_t port);
     /** Helper function to add scaleshifts and other layers for transformatin of topology */
-    void AddLayerToCNNNetworkAfterData(DataPtr pData, CNNLayer::Ptr layer, const std::string& nextLayerName);
+    static void AddLayerToCNNNetworkAfterData(DataPtr pData, CNNLayer::Ptr layer, const std::string& nextLayerName);
     /**  Adds ScaleShift between two specified layers  */
-    void AddScaleShiftBetween(CNNNetwork& net, const CNNLayerPtr layer1, const CNNLayerPtr layer2, CNNStatisticHelper& statHelper);
+    static void AddScaleShiftBetween(CNNNetwork& net, const CNNLayerPtr layer1, const CNNLayerPtr layer2, CNNStatisticHelper& statHelper);
 
 
     /**
@@ -181,28 +181,31 @@ protected:
      * data
      * o-scale - multiplication on this scale will convert above denormalized fp32 to i8 for next layer
      */
-    void QuantizeConvolution(CNNLayer::Ptr convolution, CNNStatisticHelper& statHelper);
+    static void QuantizeConvolutionOrFullyConnected(CNNLayer::Ptr convolution, CNNStatisticHelper& statHelper);
 
     /**  Adds ScaleShifts everywhere */
-    void AddScaleShifts(CNNNetwork& net, CNNStatisticHelper& statHelper);
+    static void AddScaleShifts(CNNNetwork& net, CNNStatisticHelper& statHelper);
+
+    /**  Convert ReLu-like Clamps to ReLu layers */
+    static void ClampsToReLU(CNNNetwork& net, CNNStatisticHelper& statHelper);
 
     /**
      * Goes over all layers and mark which layers will be executed in FP32/I8 and marks data between
      * layers to I8/U8/FP32
      */
-    void DefinesExecutionPrecision(CNNNetwork& net, CNNStatisticHelper& statHelper);
+    static void DefinesExecutionPrecision(CNNNetwork& net, CNNStatisticHelper& statHelper);
 
     /**
      * Since o-scales exist only for convolutins, we need to propagate them down oever concats and
      * linear layers
      */
-    void PropagateScaleFactors(CNNNetwork& net, const CNNStatisticHelper& statHelper);
+    static void PropagateScaleFactors(CNNNetwork& net, const CNNStatisticHelper& statHelper);
 
     /**
      * Normalizes and quantizes srcData using scales for normalization and int8blob precision for
      * quantization
      */
-    void ScaleDataToInt(const float* srcData, size_t srcSize, Blob::Ptr int8blob, const std::vector<float>& scales);
+    static void ScaleDataToInt(const float* srcData, size_t srcSize, Blob::Ptr int8blob, const std::vector<float>& scales);
 
     /**
      * Replaces all ScaleShifts layers met in the model to the depth-wise convolution with the same
@@ -216,23 +219,34 @@ protected:
      * This conversion allows to avoid introductin one more i8 primitive - ScaleShift accepting i8 input
      * and producing i8 output
      */
-    void replaceScaleShiftByDWConvolution(CNNNetwork& net);
+    static void replaceScaleShiftByDWConvolution(CNNNetwork& net);
 
     /** Helper function which creates DW/Grouped/regular convolution by passed weights and biases */
-    CNNLayer::Ptr createDWConvolutionForScale(const std::string& layerName, size_t channels, float *weights, float *biases);
+    static CNNLayer::Ptr createDWConvolutionForScale(const std::string& layerName, size_t channels, float *weights, float *biases);
+
+    /**
+     * Verifies if layer produces data to layers which marked as float
+     */
+    static bool layerProducesFloat(const CNNLayer::Ptr layer);
 
     /**
     * Returns tails from I8 to FP32 until convolution - it is the most performed approach because
     * convolution can convert to FP32 for free, while adding one more scale will decrease performance
     */
-    void returnTailToFP32(CNNLayer::Ptr layer);
+    static void returnTailToFP32(const CNNLayer::Ptr layer);
 
     /**
      * Verifies if next layer has type which potentially can be fused with convolution
      * and if activation is supported for int8
      * @return true if layer does not have improper activation for fusion
      */
-    bool isNextFusionAllowed(CNNLayer::Ptr layer) const;
+    static bool isNextFusionAllowed(const CNNLayer::Ptr& layer);
+
+public:
+    /**
+     * Returns true for a "relu-like" clamp layer i.e. a clamp with minimum = 0
+     */
+    static bool isReLULikeClamp(CNNLayer::Ptr layer);
 };
 
 typedef std::shared_ptr<CNNNetworkInt8Normalizer> CNNNetworkNormalizerPtr;
diff --git a/inference-engine/src/inference_engine/cnn_network_stats_impl.cpp b/inference-engine/src/inference_engine/cnn_network_stats_impl.cpp
index dd89fcb0f..0a577ab98 100644
--- a/inference-engine/src/inference_engine/cnn_network_stats_impl.cpp
+++ b/inference-engine/src/inference_engine/cnn_network_stats_impl.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cnn_network_stats_impl.hpp b/inference-engine/src/inference_engine/cnn_network_stats_impl.hpp
index f83aca613..f97e1d896 100644
--- a/inference-engine/src/inference_engine/cnn_network_stats_impl.hpp
+++ b/inference-engine/src/inference_engine/cnn_network_stats_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/base/ie_executable_network_base.hpp b/inference-engine/src/inference_engine/cpp_interfaces/base/ie_executable_network_base.hpp
index fd9bd1b3c..aceb47974 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/base/ie_executable_network_base.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/base/ie_executable_network_base.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -58,6 +58,10 @@ public:
         TO_STATUS(_impl->GetMappedTopology(deployedTopology));
     }
 
+    StatusCode GetExecGraphInfo(ICNNNetwork::Ptr &graphPtr, ResponseDesc *resp) noexcept override {
+        TO_STATUS(_impl->GetExecGraphInfo(graphPtr));
+    }
+
     StatusCode  QueryState(IMemoryState::Ptr & pState, size_t idx
         , ResponseDesc *resp) noexcept override {
         try {
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/base/ie_infer_async_request_base.hpp b/inference-engine/src/inference_engine/cpp_interfaces/base/ie_infer_async_request_base.hpp
index 916849a79..6222b1430 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/base/ie_infer_async_request_base.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/base/ie_infer_async_request_base.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/base/ie_memory_state_base.hpp b/inference-engine/src/inference_engine/cpp_interfaces/base/ie_memory_state_base.hpp
index 9764b75b6..2b448e3ab 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/base/ie_memory_state_base.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/base/ie_memory_state_base.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/base/ie_plugin_base.hpp b/inference-engine/src/inference_engine/cpp_interfaces/base/ie_plugin_base.hpp
index 33b3f3979..6269dd32e 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/base/ie_plugin_base.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/base/ie_plugin_base.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/exception2status.hpp b/inference-engine/src/inference_engine/cpp_interfaces/exception2status.hpp
index 0f3462a4c..4015bb166 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/exception2status.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/exception2status.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/ie_executor_manager.cpp b/inference-engine/src/inference_engine/cpp_interfaces/ie_executor_manager.cpp
index 1930937d8..f9533ca3e 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/ie_executor_manager.cpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/ie_executor_manager.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/ie_executor_manager.hpp b/inference-engine/src/inference_engine/cpp_interfaces/ie_executor_manager.hpp
index 8c4d4d090..17e86e0e0 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/ie_executor_manager.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/ie_executor_manager.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/ie_itask_executor.hpp b/inference-engine/src/inference_engine/cpp_interfaces/ie_itask_executor.hpp
index 0f9be309e..3b02eff54 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/ie_itask_executor.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/ie_itask_executor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/ie_task.cpp b/inference-engine/src/inference_engine/cpp_interfaces/ie_task.cpp
index 89e716c33..0df1242f9 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/ie_task.cpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/ie_task.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/ie_task.hpp b/inference-engine/src/inference_engine/cpp_interfaces/ie_task.hpp
index 8646da991..c299be45f 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/ie_task.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/ie_task.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/ie_task_executor.cpp b/inference-engine/src/inference_engine/cpp_interfaces/ie_task_executor.cpp
index 8e4c693b1..3868abc9f 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/ie_task_executor.cpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/ie_task_executor.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/ie_task_executor.hpp b/inference-engine/src/inference_engine/cpp_interfaces/ie_task_executor.hpp
index ad06a6035..c135a82b1 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/ie_task_executor.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/ie_task_executor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/ie_task_synchronizer.hpp b/inference-engine/src/inference_engine/cpp_interfaces/ie_task_synchronizer.hpp
index 3ac5f9f20..160829341 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/ie_task_synchronizer.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/ie_task_synchronizer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/ie_task_with_stages.cpp b/inference-engine/src/inference_engine/cpp_interfaces/ie_task_with_stages.cpp
index 48b2790b1..1e12aca29 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/ie_task_with_stages.cpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/ie_task_with_stages.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -14,7 +14,7 @@ namespace InferenceEngine {
 
 StagedTask::StagedTask() : Task(), _stages(0) {}
 
-StagedTask::StagedTask(std::function<void()> function, size_t stages) : Task(function), _stages(stages) {
+StagedTask::StagedTask(std::function<void()> function, size_t stages) : Task(function), _stages(stages), _stage(0) {
     if (!function) THROW_IE_EXCEPTION << "Failed to create StagedTask object with null function";
     resetStages();
 }
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/ie_task_with_stages.hpp b/inference-engine/src/inference_engine/cpp_interfaces/ie_task_with_stages.hpp
index f9b375513..fff5e5121 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/ie_task_with_stages.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/ie_task_with_stages.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_internal.hpp
index 01e85a3e3..ba3efa3d0 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -59,6 +59,10 @@ public:
         THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
     }
 
+    void GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) override {
+        THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
+    }
+
     void SetPointerToPluginInternal(InferencePluginInternalPtr plugin) {
         _plugin = plugin;
     }
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_thread_safe_async_only.hpp b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_thread_safe_async_only.hpp
index 515a28391..f92d8da1b 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_thread_safe_async_only.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_thread_safe_async_only.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp
index b3c7ad080..88ad125cd 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_executable_network_thread_safe_default.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_internal.hpp
index d194a304e..2a8ffe0e4 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp
index f18a47a7a..3384164c0 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_thread_safe_default.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_thread_safe_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_thread_safe_internal.hpp
index 96a905f88..04622f05a 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_thread_safe_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_async_request_thread_safe_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_request_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_request_internal.hpp
index c9afe39ff..c04a5d9ad 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_request_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_infer_request_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -30,7 +30,8 @@ class InferRequestInternal : virtual public IInferRequestInternal {
 public:
     typedef std::shared_ptr<InferRequestInternal> Ptr;
 
-    InferRequestInternal(InputsDataMap networkInputs, OutputsDataMap networkOutputs) {
+    InferRequestInternal(InputsDataMap networkInputs, OutputsDataMap networkOutputs)
+            : m_curBatch(-1) {
         // We should copy maps in order to avoid modifications in the future.
         for (const auto &it : networkInputs) {
             InputInfo::Ptr newPtr;
@@ -101,6 +102,7 @@ public:
             }
 
             if (foundInput->getPreProcess().getResizeAlgorithm() != ResizeAlgorithm::NO_RESIZE) {
+                PreProcessData::isApplicable(data, _inputs[name]);
                 // Stores the given blob as ROI blob. It will be used to fill in network input during pre-processing.
                 _preProcData[name].setRoiBlob(data);
             } else {
@@ -177,7 +179,8 @@ public:
             if (it != _preProcData.end()) {
                 _preProcData[input.first].execute(input.second,
                                                   _networkInputs[input.first]->getPreProcess().getResizeAlgorithm(),
-                                                  serial);
+                                                  serial,
+                                                  m_curBatch);
             }
         }
     }
@@ -189,6 +192,7 @@ protected:
     InferenceEngine::BlobMap _outputs;
     ExecutableNetworkInternalPtr _exeNetwork;
     std::map<std::string, PreProcessData> _preProcData;  // pre-process data per input
+    int m_curBatch;  // current batch value used in dynamic batching
 
 protected:
     /**
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_memory_state_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_memory_state_internal.hpp
index db3659edf..7d5a9fd85 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_memory_state_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_memory_state_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_plugin_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_plugin_internal.hpp
index d9bee350a..bb261db42 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_plugin_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/impl/ie_plugin_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -13,6 +13,7 @@
 #include <map>
 #include <string>
 #include <blob_factory.hpp>
+#include "graph_transformer.h"
 #include "cpp_interfaces/interface/ie_iplugin_internal.hpp"
 #include "cpp_interfaces/base/ie_executable_network_base.hpp"
 #include "cpp_interfaces/impl/ie_executable_network_internal.hpp"
@@ -47,6 +48,19 @@ public:
         StatusCode sts = _loadedNetwork->CreateInferRequest(_createdInferRequest, &resp);
         if (sts != OK) THROW_IE_EXCEPTION << resp.msg;
     }
+    /**
+     * @brief most plugins successfully consume unreshapable networks - lets do it in base class
+     * WARNING: this functions modifies layers in input network and might affect application, that uses it
+     */
+    virtual ICNNNetwork&  RemoveConstLayers(ICNNNetwork &network) {
+        auto* implNetwork = dynamic_cast<details::CNNNetworkImpl*>(&network);
+        if (implNetwork) {
+            // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network
+            ConstTransformer transformator(implNetwork);
+            transformator.fullTrim();
+        }
+        return network;
+    }
 
     /**
      * @brief Creates an executable network from an pares network object, users can create as many networks as they need and use
@@ -101,7 +115,7 @@ public:
             }
             _networkOutputs[it.first] = newData;
         }
-        auto impl = LoadExeNetworkImpl(network, config);
+        auto impl = LoadExeNetworkImpl(RemoveConstLayers(network), config);
         impl->setNetworkInputs(_networkInputs);
         impl->setNetworkOutputs(_networkOutputs);
         // skip setting shared ptr to avoid curricular dependency: ExecutableNetworkBase -> IExecutableNetworkInternal -> InferencePluginInternal
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp
index cd8a46a2c..eafed12c1 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iexecutable_network_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -60,6 +60,11 @@ public:
      */
     virtual void GetMappedTopology(std::map<std::string, std::vector<PrimitiveInfo::Ptr>> &deployedTopology) = 0;
 
+    /**
+    * @brief Get executable graph information from a device
+    * @param graphPtr network ptr to store executable graph information
+    */
+    virtual void GetExecGraphInfo(ICNNNetwork::Ptr &graphPtr) = 0;
 
     virtual std::vector<IMemoryStateInternal::Ptr> QueryState() = 0;
 };
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iinfer_async_request_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iinfer_async_request_internal.hpp
index 844261ae5..c3162e757 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iinfer_async_request_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iinfer_async_request_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iinfer_request_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iinfer_request_internal.hpp
index e10e6b0d1..24776f109 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iinfer_request_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iinfer_request_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_imemory_state_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_imemory_state_internal.hpp
index a36a91e78..387c19b78 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_imemory_state_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_imemory_state_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.hpp b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.hpp
index 8bac85a06..f7645a571 100644
--- a/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.hpp
+++ b/inference-engine/src/inference_engine/cpp_interfaces/interface/ie_iplugin_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpu_detector.cpp b/inference-engine/src/inference_engine/cpu_detector.cpp
index d05c6dd93..937377190 100644
--- a/inference-engine/src/inference_engine/cpu_detector.cpp
+++ b/inference-engine/src/inference_engine/cpu_detector.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpu_detector.hpp b/inference-engine/src/inference_engine/cpu_detector.hpp
index c0ac96f50..021919fa7 100644
--- a/inference-engine/src/inference_engine/cpu_detector.hpp
+++ b/inference-engine/src/inference_engine/cpu_detector.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.cpp b/inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.cpp
index f8c16a49e..c426125df 100644
--- a/inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.cpp
+++ b/inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.hpp b/inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.hpp
index 5eeb60ddb..034ed9533 100644
--- a/inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.hpp
+++ b/inference-engine/src/inference_engine/cpu_x86_sse42/blob_transform_sse42.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_data_sse42.cpp b/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_data_sse42.cpp
index 7d4015759..220280c65 100644
--- a/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_data_sse42.cpp
+++ b/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_data_sse42.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_data_sse42.hpp b/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_data_sse42.hpp
index 09a53793a..4cc5e7e47 100644
--- a/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_data_sse42.hpp
+++ b/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_data_sse42.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp b/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp
index ea3723513..573aaa09e 100644
--- a/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp
+++ b/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -888,12 +888,9 @@ void calcRowLinear_32F(float *dst[],
                  const float  alpha[],
                  const int    mapsx[],
                  const float  beta[],
-                       float  tmp[],
                  const Size & inSz,
                  const Size & outSz,
                        int    lpi) {
-    UNUSED(tmp);
-
     bool xRatioEq1 = inSz.width  == outSz.width;
     bool yRatioEq1 = inSz.height == outSz.height;
 
diff --git a/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.hpp b/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.hpp
index bbb0d6e0f..8a211e489 100644
--- a/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.hpp
+++ b/inference-engine/src/inference_engine/cpu_x86_sse42/ie_preprocess_gapi_kernels_sse42.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -73,7 +73,6 @@ void calcRowLinear_32F(float *dst[],
                  const float  alpha[],
                  const int    mapsx[],
                  const float  beta[],
-                       float  tmp[],
                  const Size & inSz,
                  const Size & outSz,
                        int    lpi);
diff --git a/inference-engine/src/inference_engine/data_stats.cpp b/inference-engine/src/inference_engine/data_stats.cpp
index 58e43a1f7..127be61ec 100644
--- a/inference-engine/src/inference_engine/data_stats.cpp
+++ b/inference-engine/src/inference_engine/data_stats.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/data_stats.h b/inference-engine/src/inference_engine/data_stats.h
index b25f1d08b..38051561b 100644
--- a/inference-engine/src/inference_engine/data_stats.h
+++ b/inference-engine/src/inference_engine/data_stats.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/debug.h b/inference-engine/src/inference_engine/debug.h
index 8c5df8e73..2e9200de1 100644
--- a/inference-engine/src/inference_engine/debug.h
+++ b/inference-engine/src/inference_engine/debug.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -25,7 +25,6 @@
 #include "ie_algorithm.hpp"
 
 #ifdef _WIN32
-#include <winsock2.h>
 #include <windows.h>
 
 #define POSIX_EPOCH_AS_FILETIME 116444736000000000ULL
diff --git a/inference-engine/src/inference_engine/description_buffer.hpp b/inference-engine/src/inference_engine/description_buffer.hpp
index ae2bf3f3f..f814aff5c 100644
--- a/inference-engine/src/inference_engine/description_buffer.hpp
+++ b/inference-engine/src/inference_engine/description_buffer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/dll_main.hpp b/inference-engine/src/inference_engine/dll_main.hpp
index 2860d0344..fa0eefdd6 100644
--- a/inference-engine/src/inference_engine/dll_main.hpp
+++ b/inference-engine/src/inference_engine/dll_main.hpp
@@ -1,7 +1,7 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
-
+// dllmain.cpp : Defines the entry point for the DLL application.
 #pragma once
 
 #ifdef _WIN32
diff --git a/inference-engine/src/inference_engine/exec_graph_info.hpp b/inference-engine/src/inference_engine/exec_graph_info.hpp
new file mode 100644
index 000000000..633d27fb9
--- /dev/null
+++ b/inference-engine/src/inference_engine/exec_graph_info.hpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+
+namespace ExecGraphInfoSerialization {
+/**
+* @brief Executable Graph Info is represented in ICNNNetwork format with general CNNLayer nodes inside
+*        including connections between the nodes. Each node describes an executable hardware-specific
+*        primitive and stores its parameters within CNNLayer::params map.
+*        There is a list of general keys for the parameters map.
+*/
+
+/**
+ * @brief A general key for CNNLayer::params map. Used to get a string of layer names separated by a comma
+ *        from the original IR, which were fused/merged to the current executable primitive.
+ */
+static const char ORIGIN_NAMES[] = "originalFusedLayersNames";
+/**
+ * @brief A general key for CNNLayer::params map. Used to get a type of the executable primitive.
+ */
+static const char IMPL_TYPE[] = "primitiveType";
+/**
+ * @brief A general key for CNNLayer::params map. Used to get a precision of the executable primitive.
+ */
+static const char PRECISION[] = "precision";
+/**
+ * @brief A general key for CNNLayer::params map. Used to get value of execution time of the executable primitive.
+ */
+static const char PERF_COUNTER[] = "execTimeMcs";
+}  // namespace ExecGraphInfoSerialization
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/file_utils.cpp b/inference-engine/src/inference_engine/file_utils.cpp
index 7b38b9f4a..b76c2b7eb 100644
--- a/inference-engine/src/inference_engine/file_utils.cpp
+++ b/inference-engine/src/inference_engine/file_utils.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/file_utils.h b/inference-engine/src/inference_engine/file_utils.h
index a3e2276cb..ce79a9f17 100644
--- a/inference-engine/src/inference_engine/file_utils.h
+++ b/inference-engine/src/inference_engine/file_utils.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -10,14 +10,17 @@
 
 #include <string>
 #ifdef _WIN32
-#define _WINSOCKAPI_
-#include <windows.h>
-#include <profileapi.h>
+# ifndef NOMINMAX
+#  define NOMINMAX
+# endif
+# define _WINSOCKAPI_
+# include <windows.h>
+# include <profileapi.h>
 #endif
 
 #ifdef __MACH__
-#include <mach/clock.h>
-#include <mach/mach.h>
+# include <mach/clock.h>
+# include <mach/mach.h>
 #endif
 
 #include "ie_api.h"
diff --git a/inference-engine/src/inference_engine/graph_tools.cpp b/inference-engine/src/inference_engine/graph_tools.cpp
index e123c75da..5c20eddc3 100644
--- a/inference-engine/src/inference_engine/graph_tools.cpp
+++ b/inference-engine/src/inference_engine/graph_tools.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -27,4 +27,26 @@ std::vector<CNNLayerPtr> CNNNetSortTopologically(const ICNNNetwork & network) {
 }
 
 }   // namespace details
+
+void CNNNetSubstituteLayer(InferenceEngine::ICNNNetwork &network,
+                           const InferenceEngine::CNNLayerPtr &layer,
+                           const InferenceEngine::CNNLayerPtr &newLayer) {
+    IE_ASSERT(layer->name == newLayer->name);
+
+    // Redirect srd data
+    for (auto& src : layer->insData) {
+        src.lock()->getInputTo()[layer->name] = newLayer;
+    }
+    newLayer->insData = layer->insData;
+
+    // Redirect dst data
+    for (auto& dst : layer->outData) {
+        dst->creatorLayer = newLayer;
+    }
+    newLayer->outData = layer->outData;
+
+    network.addLayer(newLayer);
+}
+
+
 }  // namespace InferenceEngine
 \ No newline at end of file
diff --git a/inference-engine/src/inference_engine/graph_tools.hpp b/inference-engine/src/inference_engine/graph_tools.hpp
index bce8a70f9..2207181fd 100644
--- a/inference-engine/src/inference_engine/graph_tools.hpp
+++ b/inference-engine/src/inference_engine/graph_tools.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -263,14 +263,15 @@ inline std::string  CNNNetPrevLayerName(const InferenceEngine::DataWeakPtr & dat
  * @param idx - index in previous layer collection
  * @param layer
  */
-    inline bool  CNNNetHasPrevLayer(const InferenceEngine::CNNLayer* layer, int idx = 0) {
-        IE_ASSERT(layer != nullptr);
-        if (layer->insData.empty() || layer->insData.size() <= idx) {
-            return false;
-        }
-        auto prevData = layer->insData[idx].lock();
-        return !!prevData->getCreatorLayer().lock();
+inline bool  CNNNetHasPrevLayer(const InferenceEngine::CNNLayer* layer, int idx = 0) {
+    IE_ASSERT(layer != nullptr);
+    if (layer->insData.empty() || layer->insData.size() <= idx) {
+        return false;
     }
+    auto prevData = layer->insData[idx].lock();
+    return !!prevData->getCreatorLayer().lock();
+}
+
 /**
  * @brief pointer of previous layers
  * @param idx - index in previous layer collection
@@ -499,14 +500,36 @@ inline CNNLayerSet CNNNetGetAllInputLayers(const ICNNNetwork &network) {
     if (inputs.empty())
         return inputLayers;
 
-    auto & secondLayers = inputs.begin()->second->getInputData()->getInputTo();
-    if (secondLayers.empty())
-        return inputLayers;
+    for (const auto & input : inputs) {
+        auto &secondLayers = input.second->getInputData()->getInputTo();
 
-    details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
-       if (layer->insData.empty()) {
-           inputLayers.insert(layer);
-       }
+        if (secondLayers.empty())
+            continue;
+
+        details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
+            if (layer->insData.empty()) {
+                inputLayers.insert(layer);
+            }
+        }, false);
+    }
+    return inputLayers;
+}
+
+/**
+ * @brief returns all layers that are input or memory , searc started from arbitrary location in network
+ * @param start layer
+ * @return set of input layers
+ */
+inline CNNLayerSet CNNNetGetAllInputLayers(CNNLayer* layer) {
+    CNNLayerSet inputLayers;
+    std::unordered_set<CNNLayer *> allLayers;
+
+    CNNLayerPtr layerPtr(layer, [](CNNLayer*){});
+
+    details::UnorderedDFS(allLayers, layerPtr, [&](CNNLayerPtr layer) {
+        if (layer->insData.empty()) {
+            inputLayers.insert(layer);
+        }
     }, false);
     return inputLayers;
 }
@@ -703,8 +726,9 @@ inline CNNNetPtr CNNNetCopy(const ICNNNetwork &input) {
  * @param after, insertion happened after this layer, if after is nullptr, insertion happened after all inputLayers for before layer
  * @param before, insertion happened before layer, if before is nullptr, insertion happened before all outputLayers of after layer
  * @param layerToInsert inserted layer
+ * @param outDataIndex optional parameter. You can reduce or improve layer search in some use cases by specifying index data
  */
-inline void CNNNetworkInsertLayer(CNNLayerPtr after, CNNLayerPtr before, CNNLayerPtr layerToInsert) {
+inline void CNNNetworkInsertLayer(CNNLayerPtr after, CNNLayerPtr before, CNNLayerPtr layerToInsert, size_t outDataIndex = 0) {
     if (after == nullptr && before == nullptr) {
         THROW_IE_EXCEPTION << "Cannot Insert Layer: before or after layers should be valid layer pointers";
     }
@@ -713,6 +737,10 @@ inline void CNNNetworkInsertLayer(CNNLayerPtr after, CNNLayerPtr before, CNNLaye
     if (after != nullptr) {
         // TODO: only one output data supported
         for (auto && data : after->outData) {
+            if (outDataIndex) {
+                --outDataIndex;
+                continue;
+            }
             for (auto && input : data->inputTo) {
                 if (before != nullptr && input.second.get() != before.get())
                     continue;
@@ -768,4 +796,83 @@ inline void CNNNetworkInsertLayer(CNNLayerPtr after, CNNLayerPtr before, CNNLaye
     }
 }
 
+/**
+ * @brief remove givven layer from topology, currently only layers with one input data and one output data supported
+ */
+inline void CNNNetworkRemoveLayer(CNNLayerPtr layer) {
+    if (!layer) {
+        THROW_IE_EXCEPTION << "Cannot remove layer pointed to NULL";
+    }
+    if (layer->insData.size() != 1) {
+        THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<" that has not 1 input";
+    }
+    if (layer->outData.size() != 1) {
+        THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<" that has not 1 output";
+    }
+
+    auto isp = layer->insData.front().lock();
+    if (!isp) {
+        THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<" cannot get it's input";
+    }
+    // if dimensions of input layer not equal target dimensions - shape infer  or reshape layer required, so skipping those cases
+    auto osp = layer->outData.front();
+    if (isp->getDims() != osp->getDims()) {
+        THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<" its input layer("
+            << isp->getName() << ") and output(" << osp->getName() << ") have incompatible dimensions";
+    }
+
+    // remove isp->layer connection
+    for (auto i = isp->getInputTo().begin(); i != isp->getInputTo().end(); i++) {
+        if (i->second.get() == layer.get()) {
+            isp->getInputTo().erase(i);
+            break;
+        }
+    }
+
+    // remove osp->layer connection
+    for (auto  && outData : osp->getInputTo()) {
+        for (auto i = outData.second->insData.begin(); i != outData.second->insData.end(); i++) {
+            auto insData = i->lock();
+            if (!insData) {
+                THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<", its output layer(" <<
+                    outData.first << " has invalid input configuration";
+            }
+            auto creator = insData->getCreatorLayer().lock();
+            if (!creator) {
+                THROW_IE_EXCEPTION << "Cannot remove layer : "<< layer->name <<", its output layer(" <<
+                    outData.first << " has invalid input configuration";
+            }
+
+            // found layer that need to be removed
+            if (creator.get() == layer.get()) {
+                outData.second->insData.erase(i);
+                break;
+            }
+        }
+    }
+
+    // add isp->osp connections
+    for (auto  && outData : osp->getInputTo()) {
+        // new syntetic name to avoid duplicates in map
+        isp->getInputTo()[layer->name + "_" + outData.first] = outData.second;
+    }
+
+    // add osp->isp connections
+    for (auto  && outData : osp->getInputTo()) {
+        outData.second->insData.push_back(isp);
+    }
+
+    // removing layer->osp, and layer->isp connection not necessary - layer will delete it by itself
+}
+
+/**
+ * @brief Replaces layer with newLayer in network
+ * @param network  - graph containing the layer
+ * @param layer    - layer which need to replace
+ * @param newLayer - new layer instead of layer; it must have same name like a layer for replace
+ */
+void CNNNetSubstituteLayer(InferenceEngine::ICNNNetwork &network,
+    const InferenceEngine::CNNLayerPtr &layer,
+    const InferenceEngine::CNNLayerPtr &newLayer);
+
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/graph_transformer.cpp b/inference-engine/src/inference_engine/graph_transformer.cpp
index af0dd63ca..8c40f0803 100644
--- a/inference-engine/src/inference_engine/graph_transformer.cpp
+++ b/inference-engine/src/inference_engine/graph_transformer.cpp
@@ -1,28 +1,318 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <assert.h>
+#include <cpp/ie_cnn_network.h>
+#include <details/ie_cnn_network_tools.h>
+#include <details/caseless.hpp>
 #include "graph_transformer.h"
+#include "cnn_network_impl.hpp"
+#include "blob_factory.hpp"
+#include "graph_tools.hpp"
+#include <vector>
+#include <string>
+#include <iterator>
+#include <map>
+#include <memory>
+#include <shape_infer/const_infer/ie_const_infer_holder.hpp>
 
 namespace InferenceEngine {
 
-void replaceLayerWithNewLayer(ICNNNetwork &network, const CNNLayerPtr &layer, const CNNLayerPtr &newLayer) {
-    assert(layer->name == newLayer->name);
+std::vector<std::string>
+ConstTransformer::foldConstSubgraphsInternal(const std::map<std::string, bool>& constLayers, const BlobMap& constData,
+                                               const std::vector<CNNLayerPtr>& sortedLayers) {
+    std::vector<std::string> remainingConstLayers;
+    for (const auto& layer : sortedLayers) {
+        if (constLayers.find(layer->name) != constLayers.end()) {
+            // const layer doesn't need parent connections -> erase them
+            for (const auto& insData : layer->insData) {
+                auto& inputTo = insData.lock()->getInputTo();
+                inputTo.erase(layer->name);
+                // Notr: to resolve corner case above layers can be marked as const with const data, just to be removed properly..
+                // and maybe this logic wouldn't be needed
+                if (inputTo.empty()) {
+                    auto creator = insData.lock()->creatorLayer.lock();
+                    auto it = std::find(creator->outData.begin(), creator->outData.end(), insData.lock());
+                    if (it != creator->outData.end()) {
+                        network->removeData((*it)->name);
+                        creator->outData.erase(it);
+                    }
+                }
+            }
+            layer->insData.clear();
 
-    // Redirect srd data
-    for (auto& src : layer->insData) {
-        src.lock()->getInputTo()[layer->name] = newLayer;
+            if (constLayers.at(layer->name)) {
+                for (const auto& outData : layer->outData) {
+                    for (const auto& inputTo : outData->getInputTo()) {
+                        CNNLayerPtr inputToLayer;
+                        std::string inputToName;
+                        std::tie(inputToName, inputToLayer) = inputTo;
+                        auto& insData = inputToLayer->insData;
+                        auto insDataIt = std::find_if(insData.begin(), insData.end(),
+                                                      [&outData](const DataWeakPtr& current) {
+                                                          return current.lock()->name == outData->name;
+                                                      });
+                        // remove connection with const data, because for const child it's not needed, for dynamic - new one will be created
+                        if (insDataIt != insData.end()) {
+                            insDataIt = inputToLayer->insData.erase(insDataIt);
+                        }
+                    }
+                    network->removeData(outData->name);
+                }
+                network->removeLayer(layer->name);
+            } else {
+                // if only one output data is not const - do nothing, otherwise - run procedure below
+                // note: multiple const output data requires multiple layers with blob["custom"] to keep const data
+                bool keepConstData = layer->outData.size() == 1;
+                if (keepConstData) {
+                    auto outData = layer->outData[0];
+                    for (const auto& inputTo : outData->getInputTo()) {
+                        if (constLayers.find(inputTo.first) != constLayers.end()) {
+                            keepConstData = false;
+                        }
+                    }
+                }
+                if (keepConstData) {
+                    if (!constLayers.at(layer->name)) {
+                        auto outData = layer->outData[0];
+                        if (layer->blobs.find("custom") == layer->blobs.end()) {
+                            // if there's no const data - set it
+                            const auto it = constData.find(outData->name);
+                            if (it != constData.end()) {
+                                layer->blobs["custom"] = it->second;
+                            }
+                        }
+                        if (layer->type != "Const") {
+                            // layer was calculated during the Const Propagation, need to hide its semantic (type, params)
+                            LayerParams layerParams{layer->name + "__" + outData->name + "__Const", "Const",
+                                                    layer->precision};
+                            auto newLayer = std::make_shared<CNNLayer>(layerParams);
+                            for (const auto& data : layer->outData) {
+                                data->creatorLayer = newLayer;
+                            }
+                            newLayer->outData = layer->outData;
+                            newLayer->blobs["custom"] = layer->blobs["custom"];
+                            network->removeLayer(layer->name);
+                            network->addLayer(newLayer);
+                            remainingConstLayers.push_back(newLayer->name);
+                        } else {
+                            // Layer with `Const` type should be also considered on trimming shape inputs
+                            remainingConstLayers.push_back(layer->name);
+                        }
+                    }
+                } else {
+                    for (const auto& outData : layer->outData) {
+                        for (const auto& inputTo : outData->getInputTo()) {
+                            CNNLayerPtr inputToLayer;
+                            std::string inputToName;
+                            std::tie(inputToName, inputToLayer) = inputTo;
+                            auto& insData = inputToLayer->insData;
+                            auto insDataIt = std::find_if(insData.begin(), insData.end(),
+                                                          [&outData](const DataWeakPtr& current) {
+                                                              return current.lock()->name == outData->name;
+                                                          });
+                            // remove connection with const data, because for const child it's not needed, for dynamic - new one will be created
+                            if (insDataIt != insData.end()) {
+                                insDataIt = inputToLayer->insData.erase(insDataIt);
+                            }
+                            if (constLayers.find(inputToName) == constLayers.end()) {
+                                // next layer is not const, need to attach const data to it via blobs["custom"] of new Const layer
+                                LayerParams layerParams{layer->name + "__" + outData->name + "__Const", "Const",
+                                                        layer->precision};
+                                auto newLayer = std::make_shared<CNNLayer>(layerParams);
+                                remainingConstLayers.push_back(newLayer->name);
+                                const auto it = constData.find(outData->name);
+                                if (it != constData.end()) {
+                                    newLayer->blobs["custom"] = it->second;
+                                }
+                                auto newData = std::make_shared<Data>(outData->name + "__" + inputToName,
+                                                                      outData->getTensorDesc());
+                                newData->creatorLayer = newLayer;
+                                newData->inputTo[inputToName] = inputToLayer;
+                                newLayer->outData = {newData};
+                                network->addLayer(newLayer);
+                                network->getData(newData->name) = newData;
+                                inputToLayer->insData.insert(insDataIt, newData);
+                            }
+                        }
+                    }
+                    for (const auto& data : layer->outData) {
+                        network->removeData(data->name);
+                    }
+                    network->removeLayer(layer->name);
+                }
+            }
+        }
     }
-    newLayer->insData = layer->insData;
+    return remainingConstLayers;
+}
+
+const std::map<std::string, bool> ConstTransformer::getConstLayers(const std::vector<CNNLayerPtr>& sortedLayers) {
+    std::map<std::string, bool> mapConstLayers;
+    // collect all const layers, which inputs are const layers.
+    for (const auto& layer : sortedLayers) {
+        // Layers with "Shape" and "Const" type are Const by definition
+        if (layer->type == "Shape" || layer->type == "Const") {
+            mapConstLayers[layer->name] = false;
+        } else {
+            bool isAllInputsConst = true;
+            for (auto const& data : layer->insData) {
+                auto creatorName = data.lock()->creatorLayer.lock()->name;
+                if (mapConstLayers.find(creatorName) == mapConstLayers.end()) {
+                    isAllInputsConst = false;
+                }
+            }
+            if (isAllInputsConst && !layer->insData.empty()) mapConstLayers[layer->name] = false;
+        }
+    }
+    // Add mark for const layers, if it's used for shape taking layers as second input
+    // true - is used and can be deleted from graph, as no influence on data, false - opposite
+    std::map<std::string, bool> mapVisitedLayers = mapConstLayers;
+    for (auto rit = sortedLayers.rbegin(); rit != sortedLayers.rend(); rit++) {
+        auto currentLayer = (*rit);
+        std::string currentLayerName = currentLayer->name;
+        bool isCurrentConst = mapConstLayers.find(currentLayerName) != mapConstLayers.end();
+        for (int i = 0; i < currentLayer->insData.size(); i++) {
+            std::string creatorName;
+            if (currentLayer->insData[i].lock()) {
+                auto creator = currentLayer->insData[i].lock()->creatorLayer.lock();
+                if (creator) {
+                    creatorName = creator->name;
+                }
+            }
+            bool isCreatorConst = mapConstLayers.find(creatorName) != mapConstLayers.end();
+            if (isCreatorConst) {
+                // mark second const input of shape taking layers (Reshape, Interp..), if they wasn't visited before
+                if ((i == 1) && (shapeTaking.find(currentLayer->type)) != shapeTaking.end()) {
+                    if (!mapConstLayers[creatorName]) {
+                        if (!mapVisitedLayers.at(creatorName)) {
+                            mapConstLayers[creatorName] = true;
+                        }
+                    }
+                } else {
+                    if (isCurrentConst) {
+                        if (mapConstLayers.at(currentLayerName)) {
+                            if (!mapConstLayers[creatorName]) {
+                                if (!mapVisitedLayers.at(creatorName)) {
+                                    mapConstLayers[creatorName] = true;
+                                }
+                            }
+                        } else {
+                            mapConstLayers[creatorName] = false;
+                        }
+                    } else {
+                        mapConstLayers[creatorName] = false;
+                    }
+                }
+            }
+            mapVisitedLayers[creatorName] = true;
+        }
+        mapVisitedLayers[currentLayerName] = true;
+    }
+    return mapConstLayers;
+}
+
+const BlobMap ConstTransformer::getConstData(const std::map<std::string, bool>& constLayers, const std::vector<CNNLayerPtr>& sortedLayers) {
+    ShapeInfer::ConstInferHolder holder;
+    BlobMap constData;
+    auto getInputBlobs = [&constData](const std::vector<DataWeakPtr>& insData,
+                                      bool isForShape) -> std::vector<Blob::CPtr> {
+        std::vector<Blob::CPtr> inputBlobs;
+        // special case of Const layers: no inputs, no input blobs
+        if (insData.empty()) {
+            return {};
+        }
+        for (const auto& data : insData) {
+            std::string dataName = data.lock()->name;
+            if (constData.find(dataName) != constData.end()) {
+                // get blobs, inferred before
+                inputBlobs.push_back(constData.at(dataName));
+            } else {
+                // special case of Shape layer: no input data, but blob contains info about dimensions, layout and etc...
+                auto blob = make_blob_with_precision(data.lock()->getTensorDesc());
+                inputBlobs.push_back(blob);
+            }
+        }
+        return inputBlobs;
+    };
+
+    auto getOutputBlobs = [](const std::vector<DataPtr>& outData) -> std::vector<Blob::Ptr> {
+        std::vector<Blob::Ptr> outputBlobs;
+        for (const auto& data : outData) {
+            auto blob = make_blob_with_precision(data->getTensorDesc());
+            blob->allocate();
+            outputBlobs.push_back(blob);
+        }
+        return outputBlobs;
+    };
 
-    // Redirect dst data
-    for (auto& dst : layer->outData) {
-        dst->creatorLayer = newLayer;
+    for (const auto& layer : sortedLayers) {
+        if (constLayers.find(layer->name) != constLayers.end()) {
+            std::string layerName = layer->name;
+            bool isForShape = constLayers.at(layerName);
+            CNNNetwork cnnNetwork(network);
+            auto layer = cnnNetwork.getLayerByName(layerName.c_str());
+            auto implPtr = holder.getConstInferImpl(layer->type);
+            if (!implPtr && !isForShape)
+                THROW_IE_EXCEPTION << "Failed to find reference implementation for `"
+                                      + layer->name + "` Layer with `" + layer->type + "` Type on constant propagation";
+            if (!isForShape) {
+                auto outputBlobs = getOutputBlobs(layer->outData);
+                implPtr->infer(getInputBlobs(layer->insData, isForShape), layer->params, layer->blobs, outputBlobs);
+                for (int i = 0; i < layer->outData.size(); i++) {
+                    std::string dataName = layer->outData[i]->name;
+                    auto shapes = layer->outData[i]->getTensorDesc().getDims();
+                    outputBlobs[i]->Reshape(SizeVector(shapes.rbegin(), shapes.rend()),
+                                            TensorDesc::getLayoutByDims(shapes));
+                    constData[dataName] = outputBlobs[i];
+                }
+            }
+        }
     }
-    newLayer->outData = layer->outData;
+    return constData;
+}
+
+void ConstTransformer::trimShapeInputs(const std::vector<std::string>& constLayers) {
+    for (const auto& layerName : constLayers) {
+        auto layer = cnnNetwork.getLayerByName(layerName.c_str());
+        if (layer->outData.size() == 1 && layer->type == "Const" && layer->insData.empty()) {
+            auto constData = layer->outData[0];
+            std::map<std::string, CNNLayerPtr> inputToMap = constData->getInputTo();
+            for (const auto& inputTo : inputToMap) {
+                CNNLayerPtr inputToLayer = inputTo.second;
+                if (shapeTaking.find(inputToLayer->type) != shapeTaking.end()) {
+                    auto& insData = inputToLayer->insData;
+                    auto it = std::find_if(insData.begin(), insData.end(),
+                                           [&constData](const DataWeakPtr& current) {
+                                               return current.lock()->name == constData->name;
+                                           });
+                    if (it != insData.end() && std::distance(insData.begin(), it) == 1) {
+                        inputToLayer->insData.erase(it);
+                        constData->getInputTo().erase(inputTo.first);
+                    }
+                }
+            }
+            if (constData->inputTo.empty()) {
+                network->removeData(constData->name);
+                network->removeLayer(layer->name);
+            }
+        }
+    }
+}
+
+void ConstTransformer::foldConstSubgraphs() {
+    auto sortedLayers = details::CNNNetSortTopologically(*network);
+    auto constLayers = getConstLayers(sortedLayers);
+    auto constData = getConstData(constLayers, sortedLayers);
+    foldConstSubgraphsInternal(constLayers, constData, sortedLayers);
+}
 
-    network.addLayer(newLayer);
+void ConstTransformer::fullTrim() {
+    auto sortedLayers = details::CNNNetSortTopologically(*network);
+    auto constMapLayers = getConstLayers(sortedLayers);
+    auto constData = getConstData(constMapLayers, sortedLayers);
+    auto constLayers = foldConstSubgraphsInternal(constMapLayers, constData, sortedLayers);
+    trimShapeInputs(constLayers);
 }
 
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/graph_transformer.h b/inference-engine/src/inference_engine/graph_transformer.h
index 9d8014d8a..d98453568 100644
--- a/inference-engine/src/inference_engine/graph_transformer.h
+++ b/inference-engine/src/inference_engine/graph_transformer.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -9,16 +9,64 @@
 
 #pragma once
 
+#include <map>
+#include <vector>
+#include <string>
 #include <ie_icnn_network.hpp>
+#include <details/caseless.hpp>
+#include "cnn_network_impl.hpp"
 
 namespace InferenceEngine {
 
 /**
- * @brief Replaces layer with newLayer in network
- * @param network  - graph containing the layer
- * @param layer    - layer which need to replace
- * @param newLayer - new layer instead of layer; it must have same name like a layer for replace
+ * @brief TBD
  */
-void replaceLayerWithNewLayer(ICNNNetwork &network, const CNNLayerPtr &layer, const CNNLayerPtr &newLayer);
+class INFERENCE_ENGINE_API_CLASS(ConstTransformer) {
+public:
+    explicit ConstTransformer(details::CNNNetworkImpl* _network) {
+        if (!_network) THROW_IE_EXCEPTION << "[ERROR]: Failed to init ConstTransformer with null pointer of network";
+        network = _network;
+        cnnNetwork = CNNNetwork(network);
+    }
+
+    /**
+     * @brief calculates const layers, combines const subgraph into a single const layers
+     */
+    void foldConstSubgraphs();
+
+    /**
+      * @brief folds Const Subgraphs and removes second input of Reshape-like layers (Interp, Gather, Resample, ...)
+      */
+    void fullTrim();
+
+protected:
+    /**
+     * @brief collect all const layers with marking if it defines shape (1 - for shape, 0 - otherwise)
+     */
+    virtual const std::map<std::string, bool> getConstLayers(const std::vector<CNNLayerPtr>& sortedLayers);
+
+    /**
+     * @brief TBD
+     */
+    virtual const BlobMap
+        getConstData(const std::map<std::string, bool>& constLayers, const std::vector<CNNLayerPtr>& sortedLayers);
+
+    /**
+     * @brief TBD
+     */
+    virtual std::vector<std::string>
+    foldConstSubgraphsInternal(const std::map<std::string, bool>& constLayers, const BlobMap& constData,
+                               const std::vector<CNNLayerPtr>& sortedLayers);
+
+    /**
+     * @brief TBD
+     */
+    virtual void trimShapeInputs(const std::vector<std::string>& constLayers);
+
+private:
+    const details::caseless_set<std::string> shapeTaking = {"Reshape", "Resample", "Interp"};
+    details::CNNNetworkImpl* network;
+    CNNNetwork cnnNetwork;
+};
 
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_algorithm.hpp b/inference-engine/src/inference_engine/ie_algorithm.hpp
index d0c875015..d5662e1bd 100644
--- a/inference-engine/src/inference_engine/ie_algorithm.hpp
+++ b/inference-engine/src/inference_engine/ie_algorithm.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -38,5 +38,11 @@ auto product(TIterator beg, TIterator en) -> typename std::remove_reference<decl
                            static_cast<typename std::remove_reference<decltype(*beg)>::type>(1),
                            std::multiplies<typename std::remove_reference<decltype(*beg)>::type>());
 }
+
+inline void clipping(int* idx, const int min, const int max) {
+    (*idx) = ((*idx) > min) ? (*idx) : min;
+    (*idx) = ((*idx) < max) ? (*idx) : (max - 1);
+}
+
 }  // namespace details
-}  // namespace InferenceEngine
-\ No newline at end of file
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_blob_common.cpp b/inference-engine/src/inference_engine/ie_blob_common.cpp
index ca991c7ac..7098ca283 100644
--- a/inference-engine/src/inference_engine/ie_blob_common.cpp
+++ b/inference-engine/src/inference_engine/ie_blob_common.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_blob_proxy.hpp b/inference-engine/src/inference_engine/ie_blob_proxy.hpp
index b770590ea..cb0615bfa 100644
--- a/inference-engine/src/inference_engine/ie_blob_proxy.hpp
+++ b/inference-engine/src/inference_engine/ie_blob_proxy.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_cnn_layer_builder.cpp b/inference-engine/src/inference_engine/ie_cnn_layer_builder.cpp
new file mode 100644
index 000000000..7e015db2c
--- /dev/null
+++ b/inference-engine/src/inference_engine/ie_cnn_layer_builder.cpp
@@ -0,0 +1,96 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <ie_cnn_layer_builder.h>
+
+using namespace InferenceEngine;
+
+std::map<std::string, std::string> Builder::convertParameters2Strings(const std::map<std::string, Parameter>& parameters) {
+    std::map<std::string, std::string> oldParams;
+    for (const auto& param : parameters) {
+        // skip blobs and ports
+        if (param.second.is<Blob::CPtr>() || param.second.is<Blob::Ptr>() || param.second.is<std::vector<Port>>()
+                || param.second.is<PreProcessInfo>())
+            continue;
+        if (param.second.is<std::string>() || param.second.is<std::vector<std::string>>()) {
+            oldParams[param.first] = Builder::convertParameter2String<std::string>(param.second);
+        } else if (param.second.is<int>() || param.second.is<std::vector<int>>()) {
+            oldParams[param.first] = Builder::convertParameter2String<int>(param.second);
+        } else if (param.second.is<float>() || param.second.is<std::vector<float>>()) {
+            oldParams[param.first] = Builder::convertParameter2String<float>(param.second);
+        } else if (param.second.is<unsigned int>() || param.second.is<std::vector<unsigned int>>()) {
+            oldParams[param.first] = Builder::convertParameter2String<unsigned int>(param.second);
+        } else if (param.second.is<size_t>() || param.second.is<std::vector<size_t>>()) {
+            oldParams[param.first] = Builder::convertParameter2String<size_t>(param.second);
+        } else if (param.second.is<bool>() || param.second.is<std::vector<bool>>()) {
+            oldParams[param.first] = Builder::convertParameter2String<bool>(param.second);
+        } else {
+            THROW_IE_EXCEPTION << "Parameter " << param.first << " has unsupported parameter type!";
+        }
+    }
+    return oldParams;
+}
+
+Builder::Layer Builder::builderFromCNNLayer(const CNNLayerPtr& cnnLayer) {
+    Builder::Layer layer(cnnLayer->type, cnnLayer->name);
+    std::vector<Port> inputPorts;
+    for (const auto& data : cnnLayer->insData) {
+        auto lockedData = data.lock();
+        if (!lockedData)
+            continue;
+        inputPorts.emplace_back(lockedData->getTensorDesc().getDims());
+    }
+
+    std::vector<Port> outputPorts;
+    for (const auto& data : cnnLayer->outData) {
+        outputPorts.emplace_back(data->getTensorDesc().getDims());
+    }
+
+    size_t inputsCount = inputPorts.size();
+    std::map<std::string, Blob::Ptr> blobs = cnnLayer->blobs;
+    if (blobs.find("weights") != blobs.end()) {
+        auto port = Port();
+        port.setParameter("type", "weights");
+        inputPorts.push_back(port);
+    }
+    if (blobs.find("biases") != blobs.end()) {
+        if (inputsCount == inputPorts.size()) {
+            auto port = Port();
+            port.setParameter("type", "weights");
+            inputPorts.push_back(port);
+        }
+
+        auto port = Port();
+        port.setParameter("type", "biases");
+        inputPorts.push_back(port);
+    }
+    for (const auto& it : blobs) {
+        if (it.first == "weights" || it.first == "biases")
+            continue;
+        auto port = Port();
+        port.setParameter("type", it.first);
+        inputPorts.emplace_back(port);
+    }
+
+    std::map<std::string, Parameter> params;
+    for (const auto& it : cnnLayer->params) {
+        params[it.first] = it.second;
+    }
+
+    layer.setInputPorts(inputPorts).setOutputPorts(outputPorts).setParameters(params);
+
+    Builder::ConverterRegister::convert(cnnLayer, layer);
+
+    return layer;
+}
+
+Builder::ConverterRegister::ConverterRegister(const std::string& type, const std::function<void(const CNNLayerPtr&, Layer&)>& converter) {
+    if (getConvertersHolder().converters.find(type) == getConvertersHolder().converters.end())
+        getConvertersHolder().converters[type] = converter;
+}
+
+Builder::ConvertersHolder &Builder::ConverterRegister::getConvertersHolder() {
+    static Builder::ConvertersHolder holder;
+    return holder;
+}
diff --git a/inference-engine/src/inference_engine/ie_cnn_layer_builder.h b/inference-engine/src/inference_engine/ie_cnn_layer_builder.h
index 8cad3ca2e..85d058c41 100644
--- a/inference-engine/src/inference_engine/ie_cnn_layer_builder.h
+++ b/inference-engine/src/inference_engine/ie_cnn_layer_builder.h
@@ -1,20 +1,73 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
 #include <details/caseless.hpp>
-#include <ie_inetwork.hpp>
+#include <ie_network.hpp>
+#include <ie_builders.hpp>
 #include <ie_layers.h>
 #include <ie_blob.h>
 #include <memory>
 #include <string>
+#include <vector>
+#include <map>
 
 namespace InferenceEngine {
 
 namespace Builder {
 
+template<class T>
+inline std::string convertParameter2String(const Parameter& parameter) {
+    if (parameter.is<std::vector<T>>()) {
+        std::vector<T> params = parameter.as<std::vector<T>>();
+        std::string result;
+        for (const auto& param : params) {
+            if (!result.empty())
+                result += ",";
+            result += convertParameter2String<T>(param);
+        }
+        return result;
+    }
+    return std::to_string(parameter.as<T>());
+}
+template<>
+inline std::string convertParameter2String<std::string>(const Parameter& parameter) {
+    return parameter.as<std::string>();
+}
+
+std::map<std::string, std::string> convertParameters2Strings(const std::map<std::string, Parameter>& parameters);
+Layer builderFromCNNLayer(const CNNLayerPtr& cnnLayer);
+
+struct ConvertersHolder {
+    details::caseless_map<std::string, std::function<void(const CNNLayerPtr& cnnLayer, Layer&)>> converters;
+};
+
+/**
+ * @brief This class registers layer validators
+ */
+class ConverterRegister {
+public:
+    /**
+     * @brief The constructor registers new layer validator
+     * @param type Layer type
+     * @param validator Layer validator
+     */
+    explicit ConverterRegister(const std::string& type, const std::function<void(const CNNLayerPtr&, Layer&)>& converter);
+
+    static void convert(const CNNLayerPtr& cnnLayer, Layer& layer) {
+        if (getConvertersHolder().converters.find(layer.getType()) != getConvertersHolder().converters.end())
+            getConvertersHolder().converters[layer.getType()](cnnLayer, layer);
+    }
+
+private:
+    static ConvertersHolder& getConvertersHolder();
+};
+
+#define REG_CONVERTER_FOR(__type, __converter) \
+static InferenceEngine::Builder::ConverterRegister _reg_converter_##__type(#__type, __converter)
+
 class BaseConverter {
 public:
     explicit BaseConverter(const std::string& type): type(type) {}
@@ -37,20 +90,30 @@ public:
 
         auto * weightLayerPtr = dynamic_cast<WeightableLayer *>(res.get());
 
-        for (auto& it : layer->getParameters()->getConstantData()) {
-            res->blobs[it.first] = std::const_pointer_cast<Blob>(it.second);
+        for (const auto& port : layer->getInputPorts()) {
+            if (port.getParameters().find("type") == port.getParameters().end() ||
+                    port.getData()->getData()->cbuffer() == nullptr)
+                continue;
+            res->blobs[port.getParameters().at("type")] = port.getData()->getData();
             if (weightLayerPtr == nullptr)
                 continue;
-            if (it.first == "weights") {
-                weightLayerPtr->_weights =  std::const_pointer_cast<Blob>(it.second);
-            } else if (it.first == "biases") {
-                weightLayerPtr->_biases =  std::const_pointer_cast<Blob>(it.second);
+            if (port.getParameters().at("type").as<std::string>() == "weights") {
+                weightLayerPtr->_weights = port.getData()->getData();
+            } else if (port.getParameters().at("type").as<std::string>() == "biases") {
+                weightLayerPtr->_biases = port.getData()->getData();
             }
         }
 
-        for (const auto& it : layer->getParameters()->getParameters()) {
-            res->params[it.first] = it.second;
+        // For constant layers
+        for (auto& it : layer->getParameters()) {
+            if (it.second.is<Blob::CPtr>()) {
+                res->blobs[it.first] = std::const_pointer_cast<Blob>(it.second.as<Blob::CPtr>());
+            } else if (it.second.is<Blob::Ptr>()) {
+                res->blobs[it.first] = it.second.as<Blob::Ptr>();
+            }
         }
+
+        res->params = convertParameters2Strings(layer->getParameters());
         return res;
     }
 
@@ -75,13 +138,13 @@ public:
                 {"tanh", std::make_shared<LayerConverter<InferenceEngine::CNNLayer>>("TanH")},
         };
 
-        auto typeIt = layer->getParameters()->getParameters().find("type");
-        if (typeIt == layer->getParameters()->getParameters().end())
+        auto typeIt = layer->getParameters().find("type");
+        if (typeIt == layer->getParameters().end())
             THROW_IE_EXCEPTION << "Unsupported Activation layer. Type is unknown.";
 
         auto activationBuilder = activationCreators.find(typeIt->second);
         if (activationBuilder == activationCreators.end()) {
-            THROW_IE_EXCEPTION << "Unsupported Activation layer type: " << typeIt->second.asString();
+            THROW_IE_EXCEPTION << "Unsupported Activation layer type: " << typeIt->second.as<std::string>();
         }
 
         auto activation = activationBuilder->second->createLayer(layer, precision);
@@ -98,5 +161,28 @@ public:
     }
 };
 
+class RNNSequenceConverter: public BaseConverter {
+public:
+    RNNSequenceConverter(): BaseConverter("RNN") {}
+
+    CNNLayer::Ptr createLayer(const std::shared_ptr<const ILayer>& layer, Precision precision) override {
+        auto rnnLayer = LayerConverter<InferenceEngine::RNNSequenceLayer>("RNN").createLayer(layer, precision);
+        rnnLayer->type = "RNN";
+        std::string type = layer->getType();
+        size_t pos = type.find("Sequence");
+        if (pos != std::string::npos)
+            type.erase(pos);
+        rnnLayer->params["cell_type"] = type;
+        return rnnLayer;
+    }
+
+    bool canCreate(const std::string& nodeType) const override {
+        static const details::caseless_set<std::string> supportedRnnTypes {
+            "LSTMSequence", "GRUSequence", "RNNSequence"
+        };
+        return supportedRnnTypes.find(nodeType) != supportedRnnTypes.end();
+    }
+};
+
 }  // namespace Builder
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp b/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp
index 2db4c2aae..12349aa88 100644
--- a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp
+++ b/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -141,7 +141,7 @@ StatusCode CNNNetReaderImpl::ReadNetwork(pugi::xml_document& xmlDoc) {
 
         _version = GetFileVersion(root);
         if (_version < 1) THROW_IE_EXCEPTION << "deprecated IR version: " << _version;
-        if (_version > 4) THROW_IE_EXCEPTION << "cannot parse future versions: " << _version;
+        if (_version > 5) THROW_IE_EXCEPTION << "cannot parse future versions: " << _version;
         _parser = parserCreator->create(_version);
         network = _parser->Parse(root);
         name = network->getName();
diff --git a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.h b/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.h
index fb9bd494a..cd9214425 100644
--- a/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.h
+++ b/inference-engine/src/inference_engine/ie_cnn_net_reader_impl.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_context.cpp b/inference-engine/src/inference_engine/ie_context.cpp
index 8f8335b7a..58d727d47 100644
--- a/inference-engine/src/inference_engine/ie_context.cpp
+++ b/inference-engine/src/inference_engine/ie_context.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_data.cpp b/inference-engine/src/inference_engine/ie_data.cpp
index 76266203d..8f9173034 100644
--- a/inference-engine/src/inference_engine/ie_data.cpp
+++ b/inference-engine/src/inference_engine/ie_data.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -57,7 +57,7 @@ const TensorDesc& Data::getTensorDesc() const {
 }
 
 bool Data::isInitialized() const {
-    return !dims.empty() || !tensorDesc.getDims().empty();
+    return !dims.empty() || !tensorDesc.getDims().empty() || layout == SCALAR;
 }
 
 void Data::setDims(const SizeVector &a_dims) {
@@ -84,6 +84,14 @@ void Data::setLayout(Layout layout) {
     this->layout = layout;
 }
 
+void Data::reshape(const SizeVector &a_dims, Layout a_layout) {
+    dims = a_dims;
+    layout = a_layout;
+    std::reverse(dims.begin(), dims.end());
+
+    tensorDesc.reshape(a_dims, layout);
+}
+
 CNNLayerWeakPtr &Data::getCreatorLayer() {
     return creatorLayer;
 }
diff --git a/inference-engine/src/inference_engine/ie_device.cpp b/inference-engine/src/inference_engine/ie_device.cpp
index 3094414e3..2090e7fd5 100644
--- a/inference-engine/src/inference_engine/ie_device.cpp
+++ b/inference-engine/src/inference_engine/ie_device.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -57,6 +57,11 @@ FindPluginResponse InferenceEngine::findPlugin(const FindPluginRequest& req) {
         case TargetDevice::eHETERO:
             pluginVec.push_back("HeteroPlugin");
             break;
+        case TargetDevice::eKMB:
+#ifdef ENABLE_KMB
+            pluginVec.push_back("kmbPlugin");
+#endif
+            break;
 
         default:
             THROW_IE_EXCEPTION << "Cannot find plugin for device: " << getDeviceName(req.device);
diff --git a/inference-engine/src/inference_engine/ie_format_parser.cpp b/inference-engine/src/inference_engine/ie_format_parser.cpp
index 2acd26709..57fa00afd 100644
--- a/inference-engine/src/inference_engine/ie_format_parser.cpp
+++ b/inference-engine/src/inference_engine/ie_format_parser.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -12,7 +12,6 @@
 #include <fstream>
 #include <sstream>
 #include "ie_icnn_network_stats.hpp"
-#include "ie_layers_prv.h"
 
 using namespace InferenceEngine;
 using namespace InferenceEngine::details;
@@ -335,6 +334,7 @@ CNNNetworkImplPtr FormatParser::Parse(pugi::xml_node& root) {
                             pars_info.inputPorts[i].dims,
                             pars_info.inputPorts[i].precision,
                             TensorDesc::getLayoutByDims(pars_info.inputPorts[i].dims)));
+                    data->setDims(pars_info.inputPorts[i].dims);
 
                     layer->insData[i] = data;
                     data->inputTo[layer->name] = layer;
@@ -354,6 +354,17 @@ CNNNetworkImplPtr FormatParser::Parse(pugi::xml_node& root) {
     if (!_network->allLayers().size())
         THROW_IE_EXCEPTION << "Incorrect model! Network doesn't contain layers.";
 
+    size_t inputLayersNum(0);
+    CaselessEq<std::string> cmp;
+    for (const auto& kvp : _network->allLayers()) {
+        const CNNLayer::Ptr& layer = kvp.second;
+        if (cmp(layer->type, "Input") || cmp(layer->type, "Const"))
+            inputLayersNum++;
+    }
+
+    if (!inputLayersNum && !cmp(root.name(), "body"))
+        THROW_IE_EXCEPTION << "Incorrect model! Network doesn't contain input layers.";
+
     // check all input ports are occupied
     for (const auto& kvp : _network->allLayers()) {
         const CNNLayer::Ptr& layer = kvp.second;
@@ -378,7 +389,10 @@ CNNNetworkImplPtr FormatParser::Parse(pugi::xml_node& root) {
     OutputsDataMap outputsInfo;
     _network->getOutputsInfo(outputsInfo);
     for (auto outputInfo : outputsInfo) {
-        outputInfo.second->setPrecision(Precision::FP32);
+        if (outputInfo.second->getPrecision() != Precision::FP32 &&
+            outputInfo.second->getPrecision() != Precision::I32) {
+            outputInfo.second->setPrecision(Precision::FP32);
+        }
     }
 
     if (_version == 1) {
@@ -414,11 +428,13 @@ inline Blob::Ptr GetTypedBlobFromSegment(const TBlob<uint8_t>::Ptr& weights, con
 Blob::Ptr FormatParser::GetBlobFromSegment(const TBlob<uint8_t>::Ptr& weights, const WeightSegment& segment) const {
     if (segment.precision == Precision::FP32) {
         return GetTypedBlobFromSegment<float>(weights, segment);
+    } else if (segment.precision == Precision::I32) {
+        return GetTypedBlobFromSegment<int32_t>(weights, segment);
     } else if (segment.precision == Precision::I16 || segment.precision == Precision::Q78 || segment.precision == Precision::FP16) {
         return GetTypedBlobFromSegment<short>(weights, segment);
     } else if (segment.precision == Precision::U8) {
         return GetTypedBlobFromSegment<uint8_t>(weights, segment);
-    } else if (segment.precision == Precision::I8) {
+    } else if (segment.precision == Precision::I8 || segment.precision == Precision::BIN) {
         return GetTypedBlobFromSegment<int8_t>(weights, segment);
     } else {
         THROW_IE_EXCEPTION << "precision " << segment.precision << " is not supported...";
@@ -436,7 +452,18 @@ void FormatParser::SetWeights(const TBlob<uint8_t>::Ptr& weights) {
         WeightableLayer* pWL = dynamic_cast<WeightableLayer*>(kvp.second.get());
         if (pWL != nullptr) {
             if (lprms.blobs.find("weights") != lprms.blobs.end()) {
-                pWL->_weights = GetBlobFromSegment(weights, lprms.blobs["weights"]);
+                if (lprms.prms.type == "BinaryConvolution") {
+                    auto segment = lprms.blobs["weights"];
+                    if (segment.getEnd() > weights->size())
+                        THROW_IE_EXCEPTION << "segment exceeds given buffer limits. Please, validate weights file";
+                    size_t noOfElement = segment.size;
+                    SizeVector w_dims({noOfElement});
+                    typename TBlobProxy<uint8_t>::Ptr binBlob(new TBlobProxy<uint8_t>(Precision::BIN, Layout::C, weights, segment.start, w_dims));
+
+                    pWL->_weights = binBlob;
+                } else {
+                    pWL->_weights = GetBlobFromSegment(weights, lprms.blobs["weights"]);
+                }
                 pWL->blobs["weights"] = pWL->_weights;
             }
             if (lprms.blobs.find("biases") != lprms.blobs.end()) {
@@ -488,10 +515,6 @@ void FormatParser::ParseDims(SizeVector& dims, const pugi::xml_node &parentNode)
         dims.push_back(dim);
     }
 
-    if (dims.empty()) {
-        THROW_IE_EXCEPTION << "input must have dimensions";
-    }
-
     if (_version == 1)
         dims.insert(dims.begin(), 1);  // for batch, in version 1, in version 2 it is already there.
 }
@@ -670,6 +693,15 @@ const std::vector<std::shared_ptr<BaseCreator> >& FormatParser::getCreators() co
         std::make_shared<LayerCreator<GemmLayer>>("Gemm"),
         std::make_shared<LayerCreator<PadLayer>>("Pad"),
         std::make_shared<LayerCreator<GatherLayer>>("Gather"),
+        std::make_shared<LayerCreator<StridedSliceLayer>>("StridedSlice"),
+        std::make_shared<LayerCreator<ShuffleChannelsLayer>>("ShuffleChannels"),
+        std::make_shared<LayerCreator<DepthToSpaceLayer>>("DepthToSpace"),
+        std::make_shared<LayerCreator<SpaceToDepthLayer>>("SpaceToDepth"),
+        std::make_shared<LayerCreator<ReverseSequenceLayer>>("ReverseSequence"),
+        std::make_shared<LayerCreator<SqueezeLayer>>("Squeeze"),
+        std::make_shared<LayerCreator<UnsqueezeLayer>>("Unsqueeze"),
+        std::make_shared<LayerCreator<RangeLayer>>("Range"),
+        std::make_shared<LayerCreator<ExpandLayer>>("Expand"),
         std::make_shared<LayerCreator<ScaleShiftLayer>>("ScaleShift"),
         std::make_shared<LayerCreator<PReLULayer>>("PReLU"),
         std::make_shared<LayerCreator<CropLayer>>("Crop"),
@@ -680,7 +712,13 @@ const std::vector<std::shared_ptr<BaseCreator> >& FormatParser::getCreators() co
         std::make_shared<LayerCreator<BatchNormalizationLayer>>("BatchNormalization"),
         std::make_shared<TILayerCreator>("TensorIterator"),
         std::make_shared<LayerCreator<LSTMCell>>("LSTMCell"),
-        std::make_shared<LayerCreator<RNNLayer>>("RNN"),
+        std::make_shared<LayerCreator<GRUCell>>("GRUCell"),
+        std::make_shared<LayerCreator<RNNCell>>("RNNCell"),
+        std::make_shared<LayerCreator<RNNSequenceLayer>>("RNNSequence"),
+        std::make_shared<LayerCreator<RNNSequenceLayer>>("GRUSequence"),
+        std::make_shared<LayerCreator<RNNSequenceLayer>>("LSTMSequence"),
+        std::make_shared<LayerCreator<QuantizeLayer>>("Quantize"),
+        std::make_shared<LayerCreator<BinaryConvolutionLayer>>("BinaryConvolution"),
     };
     return creators;
 }
diff --git a/inference-engine/src/inference_engine/ie_format_parser.h b/inference-engine/src/inference_engine/ie_format_parser.h
index 6820b1ec0..11e5f26b5 100644
--- a/inference-engine/src/inference_engine/ie_format_parser.h
+++ b/inference-engine/src/inference_engine/ie_format_parser.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_graph_splitter.cpp b/inference-engine/src/inference_engine/ie_graph_splitter.cpp
index 630287a0f..47b5d940d 100644
--- a/inference-engine/src/inference_engine/ie_graph_splitter.cpp
+++ b/inference-engine/src/inference_engine/ie_graph_splitter.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_graph_splitter.hpp b/inference-engine/src/inference_engine/ie_graph_splitter.hpp
index 30e5f37d8..3252632da 100644
--- a/inference-engine/src/inference_engine/ie_graph_splitter.hpp
+++ b/inference-engine/src/inference_engine/ie_graph_splitter.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_layer_parsers.cpp b/inference-engine/src/inference_engine/ie_layer_parsers.cpp
index 886c759fb..ca86df6df 100644
--- a/inference-engine/src/inference_engine/ie_layer_parsers.cpp
+++ b/inference-engine/src/inference_engine/ie_layer_parsers.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -32,6 +32,7 @@ CNNLayer::Ptr ActivationLayerCreator::CreateLayer(pugi::xml_node& node, LayerPar
 
     static caseless_map<std::string, std::shared_ptr<BaseCreator>> activationCreators = {
         {"relu", std::make_shared<LayerCreator<ReLULayer>>("ReLU")},
+        {"relu6", std::make_shared<LayerCreator<ReLU6Layer>>("ReLU6")},
         {"prelu", std::make_shared<LayerCreator<PReLULayer>>("PReLU")},
         {"clamp", std::make_shared<LayerCreator<ClampLayer>>("Clamp")},
         {"elu", std::make_shared<LayerCreator<CNNLayer>>("ELU")},
diff --git a/inference-engine/src/inference_engine/ie_layer_parsers.h b/inference-engine/src/inference_engine/ie_layer_parsers.h
index f2a7ce968..5af4a03c1 100644
--- a/inference-engine/src/inference_engine/ie_layer_parsers.h
+++ b/inference-engine/src/inference_engine/ie_layer_parsers.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_layer_validators.cpp b/inference-engine/src/inference_engine/ie_layer_validators.cpp
index b39a054cf..86248f104 100644
--- a/inference-engine/src/inference_engine/ie_layer_validators.cpp
+++ b/inference-engine/src/inference_engine/ie_layer_validators.cpp
@@ -1,9 +1,8 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "ie_layers.h"
-#include "ie_layers_prv.h"
 #include "ie_layer_validators.hpp"
 #include "debug.h"
 #include "xml_parse_utils.h"
@@ -11,6 +10,8 @@
 #include <string>
 #include <map>
 #include <vector>
+#include <cmath>
+#include <limits>
 #include <ie_iextension.h>
 #include <ie_format_parser.h>
 
@@ -20,6 +21,8 @@ namespace InferenceEngine {
 
 using namespace details;
 using std::vector;
+using std::string;
+using std::map;
 
 template <typename T, typename P>
 inline bool one_of(T val, P item) { return val == item; }
@@ -44,17 +47,21 @@ void CNNLayer::validateLayer() {
 }
 
 struct WeightableParams {
-    size_t kernel_w, kernel_h, outputs, groups;
-    bool isKernelFromInput;
-
-    WeightableParams(size_t _outputs, bool _isKernelFromInput, size_t _groups = 0, size_t _kernel_h = 0,
-                     size_t _kernel_w = 0) : outputs(_outputs), isKernelFromInput(_isKernelFromInput),
-                                             kernel_h(_kernel_h), kernel_w(_kernel_w),
-                                             groups(_groups) {}
+    std::vector<size_t>  _kernel;
+    size_t _outputs = 0lu;
+    size_t _groups = 1lu;
+    bool _isKernelFromInput = false;
+
+    WeightableParams(size_t outputs, bool isKernelFromInput, size_t groups = 0, const std::vector<size_t>& kernel = {}) :
+                _kernel(kernel),
+                _outputs(outputs),
+                _groups(groups),
+                _isKernelFromInput(isKernelFromInput) {}
 };
 
 void checkWeightable(const std::map<std::string, Blob::Ptr>& blobs,
-                     const vector<SizeVector>& inShapes, WeightableParams params,
+                     const vector<SizeVector>& inShapes,
+                     WeightableParams params,
                      const SizeVector& numDims) {
     if (inShapes.size() != 1)
         THROW_IE_EXCEPTION << "Number of inputs (" << inShapes.size() << ") is not equal to expected ones (1)";
@@ -75,18 +82,18 @@ void checkWeightable(const std::map<std::string, Blob::Ptr>& blobs,
 
     if (firstInputShape.empty()) THROW_IE_EXCEPTION << "Input shape can't be empty";
 
-    size_t KW = 1, KH = 1, IC, OC;
+    size_t IC, OC;
+    std::vector<size_t> kernel;
     IC = firstInputShape[1];
-    if (params.isKernelFromInput) {
-        if (firstInputShape.size() == 4) {
-            KH = firstInputShape[2];
-            KW = firstInputShape[3];
-        }
+    if (params._isKernelFromInput) {
+        for (int i = 1; i <= inputSize - 2; i++)
+            kernel.push_back(firstInputShape[inputSize - i]);
     } else {
-        KH = params.kernel_h;
-        KW = params.kernel_w;
+        for (auto k : params._kernel) {
+            kernel.push_back(k);
+        }
     }
-    OC = params.outputs;
+    OC = params._outputs;
 
     auto it = blobs.find("weights");
     if (it != blobs.end()) {  // TODO: return with fixing shape infer tests: THROW_IE_EXCEPTION << "Invalid blobs: no weights";
@@ -94,12 +101,22 @@ void checkWeightable(const std::map<std::string, Blob::Ptr>& blobs,
         if (weights == nullptr || weights->dims().empty()) THROW_IE_EXCEPTION << "Weights can't be empty";
 
         auto weightsSize = details::product(weights->dims());
-        size_t expectedWeightsSize = OC * KW * KH * IC;
-        if (params.groups) expectedWeightsSize /= params.groups;
+        size_t expectedWeightsSize = OC * IC;
+        for (auto k : kernel) {
+            expectedWeightsSize *= k;
+        }
+        if (params._groups) expectedWeightsSize /= params._groups;
         if (expectedWeightsSize != weightsSize) {
-            THROW_IE_EXCEPTION << "New shapes " << details::dumpVec(firstInputShape) << " make Kernels(" << KH << "x"
-                               << KW << "), Channels(" << IC << "), Output depth(" << OC << "), Groups("
-                               << params.groups << ") not matching weights size: " << weightsSize;
+            std::string ker_str;
+            for (int i = 0; i < params._kernel.size(); i++) {
+                if (!ker_str.empty())
+                    ker_str += "x";
+                ker_str += std::to_string(kernel[i]);
+            }
+            THROW_IE_EXCEPTION << "New shapes " << details::dumpVec(firstInputShape) << " make Kernels(" << ker_str <<
+                               "), Channels(" << IC << "), Output depth(" << OC << "), Groups("
+                               << params._groups << ") not matching weights size: "
+                               << expectedWeightsSize << " vs " << weightsSize;
         }
     }
 
@@ -114,6 +131,30 @@ void checkWeightable(const std::map<std::string, Blob::Ptr>& blobs,
     }
 }
 
+void checkDims(const std::vector<SizeVector>& shapes, const vector<int>& expected_shape_size) {
+    for (auto i : shapes) {
+        if (i.empty()) {
+            THROW_IE_EXCEPTION << " Failed with invalid shapes: dimension is empty";
+        }
+        auto iter = std::find(expected_shape_size.begin(), expected_shape_size.end(), i.size());
+        if (iter == expected_shape_size.end()) {
+            THROW_IE_EXCEPTION << " Failed with invalid shapes: dimension is invalid";
+        }
+    }
+}
+
+void checkNumOfInput(const std::vector<SizeVector>& inShapes, const vector<int>& expected_num_of_shapes) {
+    bool shape_was_found = false;
+    for (const auto& i : expected_num_of_shapes) {
+        if (inShapes.size() == i) {
+            shape_was_found = true;
+        }
+    }
+    if (!shape_was_found) {
+        THROW_IE_EXCEPTION << "Number of inputs (" << inShapes.size() << ") is not equal to expected ones";
+    }
+}
+
 LayerValidators* LayerValidators::getInstance() {
     if (!_instance) {
         _instance = new LayerValidators();
@@ -145,19 +186,27 @@ void FullyConnectedValidator::parseParams(CNNLayer* layer) {
 }
 
 void FullyConnectedValidator::checkParams(const CNNLayer* layer) {
-    // TODO: check that values belong to the scope of the definition according to spec
+    auto casted = dynamic_cast<const FullyConnectedLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of FullyConnectedLayer class";
+    }
+    unsigned int _out_num = casted->GetParamAsUInt("out-size");
 }
 
 void FullyConnectedValidator::checkCorrespondence(const CNNLayer* layer,
                                                   const std::map<std::string, Blob::Ptr>& blobs,
                                                   const vector<SizeVector>& inShapes) const {
     const auto casted = dynamic_cast<const FullyConnectedLayer*>(layer);
-    if (!casted) THROW_IE_EXCEPTION << "Layer is not instance of FullyConnectedLayer class";
-    checkWeightable(blobs, inShapes, {casted->_out_num, true, 1}, {4, 2});
+    if (!casted) THROW_IE_EXCEPTION << "Layer is not instance of FullyConnected layer class";
+    checkWeightable(blobs, inShapes, {casted->_out_num, true, 1}, {2, 4, 5});
 }
 
 FullyConnectedValidator::FullyConnectedValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void FullyConnectedValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 void CropValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<CropLayer*>(layer);
     if (!casted) {
@@ -204,9 +253,8 @@ void CropValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>&
         THROW_IE_EXCEPTION << "Layer is not instance of CropLayer class";
     }
     size_t numInputs = inShapes.size();
-    if (numInputs != 1 && numInputs != 2) {
-        THROW_IE_EXCEPTION << "Crop can take only 1 or 2 inputs, but actually it has: " << numInputs;
-    }
+    checkNumOfInput(inShapes, {1, 2});
+
     auto firstShape = inShapes[0];
     size_t shapeSize = firstShape.size();
     for (size_t i = 0; i < casted->axis.size(); i++) {
@@ -326,21 +374,50 @@ void ConvolutionValidator::checkParams(const CNNLayer* layer) {
     if (!casted) {
         THROW_IE_EXCEPTION << "Layer is not instance of ConvolutionLayer class";
     }
-    // TODO: check that values belong to the scope of the definition according to spec
+    casted->GetParamAsUInt("output");
+
+    vector<unsigned int> kernels = casted->GetParamAsUInts("kernel", {});
+    if (kernels.empty()) {
+        // IR_v == 2
+        casted->GetParamAsUInt("kernel-x");
+        casted->GetParamAsUInt("kernel-y");
+        casted->GetParamAsUInt("stride-x", 1u);
+        casted->GetParamAsUInt("stride-y", 1u);
+        casted->GetParamAsUInt("pad-x", 0u);
+        casted->GetParamAsUInt("pad-y", 0u);
+        casted->GetParamAsUInt("pad-r", casted->_padding[X_AXIS]);
+        casted->GetParamAsUInt("pad-b", casted->_padding[Y_AXIS]);
+        casted->GetParamAsUInt("dilation-x", 1u);
+        casted->GetParamAsUInt("dilation-y", 1u);
+    } else {
+        // IR_v > 2
+        vector<unsigned int> default_0 = vector<unsigned int> (casted->_kernel.size(), 0u);
+        vector<unsigned int> default_1 = vector<unsigned int> (casted->_kernel.size(), 1u);
+        casted->GetParamAsUInts("strides", default_1);
+        casted->GetParamAsUInts("pads_begin", default_0);
+        casted->GetParamAsUInts("pads_end", default_0);
+        casted->GetParamAsUInts("dilations", default_1);
+    }
+    casted->GetParamAsString("auto_pad", "");
+    casted->GetParamAsUInt("group", 1);
 }
 
 void ConvolutionValidator::checkCorrespondence(const CNNLayer* layer,
                                                const std::map<std::string, Blob::Ptr>& blobs,
                                                const vector<SizeVector>& inShapes) const {
     auto convLayer = dynamic_cast<const ConvolutionLayer*>(layer);
-    if (!convLayer) THROW_IE_EXCEPTION << "Layer is not instance of ConvolutionLayer class";
-    auto version = BaseCreator::version_;
-    if (version < 3) {
-        checkWeightable(blobs, inShapes, {convLayer->_out_depth, false, convLayer->_group, convLayer->_kernel[Y_AXIS], convLayer->_kernel[X_AXIS]},
-                        {4});
-    } else if (version == 3) {
-        // TODO: implement v2 convolution valitation
-    }
+    if (!convLayer)
+        THROW_IE_EXCEPTION << "Layer is not instance of Convolution layer class";
+
+    std::vector<size_t> krn;
+    for (int i = 0; i < convLayer->_kernel.size(); i++)
+        krn.push_back(convLayer->_kernel[i]);
+    checkWeightable(blobs, inShapes, {convLayer->_out_depth, false, convLayer->_group, krn},
+                    {4, 5});
+}
+
+void ConvolutionValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
 }
 
 void DeconvolutionValidator::parseParams(CNNLayer* layer) {
@@ -352,7 +429,36 @@ void DeconvolutionValidator::parseParams(CNNLayer* layer) {
 }
 
 void DeconvolutionValidator::checkParams(const CNNLayer* layer) {
-    LayerValidator::checkParams(layer);
+    auto casted = dynamic_cast<const ConvolutionLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of ConvolutionLayer class";
+    }
+    casted->GetParamAsUInt("output");
+
+    vector<unsigned int> kernels = casted->GetParamAsUInts("kernel", {});
+    if (kernels.empty()) {
+        // IR_v == 2
+        casted->GetParamAsUInt("kernel-x");
+        casted->GetParamAsUInt("kernel-y");
+        casted->GetParamAsUInt("stride-x", 1u);
+        casted->GetParamAsUInt("stride-y", 1u);
+        casted->GetParamAsUInt("pad-x", 0u);
+        casted->GetParamAsUInt("pad-y", 0u);
+        casted->GetParamAsUInt("pad-r", casted->_padding[X_AXIS]);
+        casted->GetParamAsUInt("pad-b", casted->_padding[Y_AXIS]);
+        casted->GetParamAsUInt("dilation-x", 1u);
+        casted->GetParamAsUInt("dilation-y", 1u);
+    } else {
+        // IR_v > 2
+        vector<unsigned int> default_0 = vector<unsigned int> (casted->_kernel.size(), 0u);
+        vector<unsigned int> default_1 = vector<unsigned int> (casted->_kernel.size(), 1u);
+        casted->GetParamAsUInts("strides", default_1);
+        casted->GetParamAsUInts("pads_begin", default_0);
+        casted->GetParamAsUInts("pads_end", default_0);
+        casted->GetParamAsUInts("dilations", default_1);
+    }
+    casted->GetParamAsString("auto_pad", "");
+    casted->GetParamAsUInt("group", 1);
 }
 
 DeconvolutionValidator::DeconvolutionValidator(const std::string& _type) : ConvolutionValidator(_type) {}
@@ -360,10 +466,19 @@ DeconvolutionValidator::DeconvolutionValidator(const std::string& _type) : Convo
 void DeconvolutionValidator::checkCorrespondence(const CNNLayer* layer,
                                                  const std::map<std::string, Blob::Ptr>& blobs,
                                                  const vector<SizeVector>& inShapes) const {
-    auto casted = dynamic_cast<const DeconvolutionLayer*>(layer);
-    if (!casted) THROW_IE_EXCEPTION << "Layer is not instance of ConvolutionLayer class";
-    checkWeightable(blobs, inShapes, {casted->_out_depth, false, casted->_group, casted->_kernel[Y_AXIS], casted->_kernel[X_AXIS]},
-                    {4});
+    auto deconv_layer = dynamic_cast<const DeconvolutionLayer*>(layer);
+    if (!deconv_layer)
+        THROW_IE_EXCEPTION << "Layer is not instance of Deconvolution layer class";
+
+    std::vector<size_t> krn;
+    for (int i = 0; i < deconv_layer->_kernel.size(); i++)
+        krn.push_back(deconv_layer->_kernel[i]);
+    checkWeightable(blobs, inShapes, {deconv_layer->_out_depth, false, deconv_layer->_group, krn},
+                    {4, 5});
+}
+
+void DeconvolutionValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
 }
 
 PoolingValidator::PoolingValidator(const std::string& _type) : LayerValidator(_type) {}
@@ -483,6 +598,10 @@ void PoolingValidator::checkParams(const CNNLayer* layer) {
     // TODO: check that values belong to the scope of the definition according to spec
 }
 
+void PoolingValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1, 2});
+}
+
 void BatchNormalizationValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<BatchNormalizationLayer*>(layer);
     if (!casted) {
@@ -492,11 +611,22 @@ void BatchNormalizationValidator::parseParams(CNNLayer* layer) {
 }
 
 void BatchNormalizationValidator::checkParams(const CNNLayer* layer) {
-    LayerValidator::checkParams(layer);
+    auto casted = dynamic_cast<const BatchNormalizationLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of BatchNormalizationLayer class";
+    }
+    float epsilon = casted->GetParamAsFloat("epsilon");
+    if (epsilon < 0) {
+        THROW_IE_EXCEPTION << "The value of BatchNormalization layer epsilon parameter is invalid";
+    }
 }
 
 BatchNormalizationValidator::BatchNormalizationValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void BatchNormalizationValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 void PowerValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<PowerLayer*>(layer);
     if (!casted) {
@@ -513,6 +643,10 @@ void PowerValidator::checkParams(const CNNLayer* layer) {
 
 PowerValidator::PowerValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void PowerValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 void PReLUValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<PReLULayer*>(layer);
     if (!casted) {
@@ -527,6 +661,10 @@ void PReLUValidator::checkParams(const CNNLayer* layer) {
 
 PReLUValidator::PReLUValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void PReLUValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 void ScaleShiftValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<ScaleShiftLayer*>(layer);
     if (!casted) {
@@ -543,6 +681,10 @@ void ScaleShiftValidator::checkParams(const CNNLayer* layer) {
 
 ScaleShiftValidator::ScaleShiftValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void ScaleShiftValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 void TileValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<TileLayer*>(layer);
     if (!casted) {
@@ -553,11 +695,23 @@ void TileValidator::parseParams(CNNLayer* layer) {
 }
 
 void TileValidator::checkParams(const CNNLayer* layer) {
-    LayerValidator::checkParams(layer);
+    auto casted = dynamic_cast<const TileLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of TileLayer class";
+    }
+    int axis = casted->GetParamAsInt("axis", -1);
+    int tiles = casted->GetParamAsInt("tiles", -1);
+    if (axis < 0 && tiles < 0) {
+        THROW_IE_EXCEPTION << "The value of Tile layer parameters is invalid";
+    }
 }
 
 TileValidator::TileValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void TileValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 ReshapeValidator::ReshapeValidator(const std::string& _type) : LayerValidator(_type) {}
 
 void ReshapeValidator::parseParams(CNNLayer *layer) {
@@ -605,6 +759,36 @@ void EltwiseValidator::parseParams(CNNLayer* layer) {
         casted->_operation = EltwiseLayer::Prod;
     } else if (op == "max") {
         casted->_operation = EltwiseLayer::Max;
+    } else if (op == "sub") {
+        casted->_operation = EltwiseLayer::Sub;
+    } else if (op == "div") {
+        casted->_operation = EltwiseLayer::Div;
+    } else if (op == "min") {
+        casted->_operation = EltwiseLayer::Min;
+    } else if (op == "squared_diff") {
+        casted->_operation = EltwiseLayer::Squared_diff;
+    } else if (op == "equal") {
+        casted->_operation = EltwiseLayer::Equal;
+    } else if (op == "not_equal") {
+        casted->_operation = EltwiseLayer::Not_equal;
+    } else if (op == "less") {
+        casted->_operation = EltwiseLayer::Less;
+    } else if (op == "less_equal") {
+        casted->_operation = EltwiseLayer::Less_equal;
+    } else if (op == "greater") {
+        casted->_operation = EltwiseLayer::Greater;
+    } else if (op == "greater_equal") {
+        casted->_operation = EltwiseLayer::Greater_equal;
+    } else if (op == "logical_and") {
+        casted->_operation = EltwiseLayer::Logical_AND;
+    } else if (op == "logical_or") {
+        casted->_operation = EltwiseLayer::Logical_OR;
+    } else if (op == "logical_xor") {
+        casted->_operation = EltwiseLayer::Logical_XOR;
+    } else if (op == "floor_mod") {
+        casted->_operation = EltwiseLayer::Floor_mod;
+    } else if (op == "pow") {
+        casted->_operation = EltwiseLayer::Pow;
     } else {
         THROW_IE_EXCEPTION << "Unsupported element wise operation: " << op;
     }
@@ -621,7 +805,17 @@ void EltwiseValidator::parseParams(CNNLayer* layer) {
 }
 
 void EltwiseValidator::checkParams(const CNNLayer* layer) {
-    LayerValidator::checkParams(layer);
+    auto casted = dynamic_cast<const EltwiseLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of EltwiseLayer class";
+    }
+}
+
+void EltwiseValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    if (inShapes.empty()) {
+        THROW_IE_EXCEPTION << "Number of inputs (" << inShapes.size() <<
+                           ") of Eltwise layer is zero";
+    }
 }
 
 EltwiseValidator::EltwiseValidator(const std::string& _type) : LayerValidator(_type) {}
@@ -635,12 +829,13 @@ void ClampValidator::parseParams(CNNLayer* layer) {
     casted->max_value = casted->GetParamAsFloat("max");
 }
 
-void ClampValidator::checkParams(const CNNLayer* layer) {
-    LayerValidator::checkParams(layer);
-}
 
 ClampValidator::ClampValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void ClampValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 void ReLUValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<ReLULayer*>(layer);
     if (!casted) {
@@ -652,11 +847,24 @@ void ReLUValidator::parseParams(CNNLayer* layer) {
 }
 
 void ReLUValidator::checkParams(const CNNLayer* layer) {
-    LayerValidator::checkParams(layer);
+    auto casted = dynamic_cast<const ReLULayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of ReLULayer class";
+    }
+    if (!casted->params.empty()) {
+        float negative_slope = casted->GetParamAsFloat("negative_slope");
+        if (negative_slope < 0) {
+            THROW_IE_EXCEPTION << "The value of ReLU layer negative_slope parameter is invalid";
+        }
+    }
 }
 
 ReLUValidator::ReLUValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void ReLUValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1, 2});
+}
+
 void MVNValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<MVNLayer*>(layer);
     if (!casted) {
@@ -667,11 +875,14 @@ void MVNValidator::parseParams(CNNLayer* layer) {
 }
 
 void MVNValidator::checkParams(const CNNLayer* layer) {
-    LayerValidator::checkParams(layer);
 }
 
 MVNValidator::MVNValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void MVNValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 void GRNValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<GRNLayer*>(layer);
     if (!casted) {
@@ -686,6 +897,10 @@ void GRNValidator::checkParams(const CNNLayer* layer) {
 
 GRNValidator::GRNValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void GRNValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 void SoftMaxValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<SoftMaxLayer*>(layer);
     if (!casted) {
@@ -695,11 +910,22 @@ void SoftMaxValidator::parseParams(CNNLayer* layer) {
 }
 
 void SoftMaxValidator::checkParams(const CNNLayer* layer) {
-    LayerValidator::checkParams(layer);
+    auto casted = dynamic_cast<const SoftMaxLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of SoftMaxLayer class";
+    }
+    int axis = casted->GetParamAsInt("axis", 1);
+    if (axis < 0) {
+        THROW_IE_EXCEPTION << "The value of SoftMax layer axis parameter is invalid";
+    }
 }
 
 SoftMaxValidator::SoftMaxValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void SoftMaxValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 void NormValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<NormLayer*>(layer);
     if (!casted) {
@@ -714,11 +940,23 @@ void NormValidator::parseParams(CNNLayer* layer) {
 }
 
 void NormValidator::checkParams(const CNNLayer* layer) {
-    LayerValidator::checkParams(layer);
+    auto casted = dynamic_cast<const NormLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of NormLayer class";
+    }
+    float _alpha = casted->GetParamAsFloat("alpha");
+    float _beta = casted->GetParamAsFloat("beta");
+    if (_alpha < 0 && _beta < 0) {
+        THROW_IE_EXCEPTION << "The value of Norm layer alpha or beta parameters is invalid";
+    }
 }
 
 NormValidator::NormValidator(const std::string& _type) : LayerValidator(_type) {}
 
+void NormValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
 SplitValidator::SplitValidator(const std::string& _type) : LayerValidator(_type) {}
 
 void SplitValidator::parseParams(CNNLayer* layer) {
@@ -733,7 +971,7 @@ void SplitValidator::parseParams(CNNLayer* layer) {
         if (!out_sizes.empty())
             out_sizes += ",";
         if (static_cast<int>(i->getTensorDesc().getDims().size()) <= casted->_axis) {
-            THROW_IE_EXCEPTION << "Internal error - dimensions are emtpy";
+            THROW_IE_EXCEPTION << "Internal error - dimensions are empty";
         }
         out_sizes += std::to_string(i->getTensorDesc().getDims()[casted->_axis]);
     }
@@ -741,19 +979,6 @@ void SplitValidator::parseParams(CNNLayer* layer) {
         casted->params["out_sizes"] = out_sizes;
 }
 
-void checkNumOfInput(const std::vector<SizeVector>& inShapes, const vector<int>& expected_num_of_shapes) {
-    bool shape_was_found = false;
-    for (const auto& i : expected_num_of_shapes) {
-        if (inShapes.size() == i) {
-            shape_was_found = true;
-        }
-    }
-    if (!shape_was_found) {
-        THROW_IE_EXCEPTION << "Number of inputs (" << inShapes.size() << ") is not equal to expected ones";
-    }
-}
-
-
 void SplitValidator::checkParams(const CNNLayer* layer) {
     LayerValidator::checkParams(layer);
     std::vector<int> out_sizes = layer->GetParamAsInts("out_sizes", {});
@@ -768,6 +993,19 @@ void SplitValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVe
         THROW_IE_EXCEPTION << "Layer is not instance of SplitLayer class";
     }
     checkNumOfInput(inShapes, {1});
+    auto version = BaseCreator::version_;
+    if (version > 3) {
+        std::vector<int> out_sizes = layer->GetParamAsInts("out_sizes", {});
+        size_t sum(0);
+        for (const auto& size : out_sizes)
+            sum += size;
+        if (inShapes.empty() || inShapes[0].size() <= casted->_axis)
+            THROW_IE_EXCEPTION << "Layer has incorrect input shapes!";
+        if (sum != inShapes[0][casted->_axis]) {
+            THROW_IE_EXCEPTION << "The sum of the dimensions on the axis(" << casted->_axis
+                               << ") is not equal out_sizes: " << details::dumpVec(out_sizes);
+        }
+    }
 }
 
 ConcatValidator::ConcatValidator(const std::string& _type) : LayerValidator(_type) {}
@@ -781,11 +1019,9 @@ void ConcatValidator::parseParams(CNNLayer* layer) {
 }
 
 void ConcatValidator::checkParams(const CNNLayer* layer) {
-    LayerValidator::checkParams(layer);
 }
 
-void ConcatValidator::checkShapes(const CNNLayer* layer,
-                                  const std::vector<SizeVector>& inShapes) const {
+void ConcatValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
     if (inShapes.empty())
         THROW_IE_EXCEPTION << "Inputs are empty";
 
@@ -812,9 +1048,8 @@ void ConcatValidator::checkShapes(const CNNLayer* layer,
         bool eq_part2 = std::equal(firstShape.begin() + axis + 1, firstShape.end(),
                                    shape.begin() + axis + 1);
         if (!(eq_part1 && eq_part2))
-            THROW_IE_EXCEPTION << "Invalid inputs for Concat layer: dimensions should match in all"
-                               << "positions except axis (" << axis << ") one"
-                               << ") should match : [" << dumpVec(firstShape) << "] vs ["
+            THROW_IE_EXCEPTION << "Invalid inputs for Concat layer: dimensions should match in all "
+                               << "positions except axis (" << axis << ") : [" << dumpVec(firstShape) << "] vs ["
                                << dumpVec(shape) <<"]";
     }
 }
@@ -843,8 +1078,7 @@ void GemmValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>&
     }
 
     size_t numInputs = inShapes.size();
-    if (numInputs != 2 && numInputs != 3)
-        THROW_IE_EXCEPTION << "Gemm can take only 2 or 3 inputs, but actually it has: " << numInputs;
+    checkNumOfInput(inShapes, {2, 3});
 
     auto dims0 = inShapes[0];
     auto dims1 = inShapes[1];
@@ -879,7 +1113,7 @@ PadValidator::PadValidator(const std::string& _type) : LayerValidator(_type) {}
 void PadValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<PadLayer*>(layer);
     if (!casted) {
-        THROW_IE_EXCEPTION << "Layer is not instance of PadLayer class";
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of PadLayer class";
     }
     std::vector<uint32_t> pads_begin = casted->GetParamAsUInts("pads_begin");
     std::vector<uint32_t> pads_end = casted->GetParamAsUInts("pads_end");
@@ -906,7 +1140,7 @@ void PadValidator::parseParams(CNNLayer* layer) {
     } else if (mode == "symmetric") {
         casted->pad_mode = PadLayer::Symmetric;
     } else {
-        THROW_IE_EXCEPTION << "Unsupported pad mode operation: " << mode;
+        THROW_IE_EXCEPTION << layer->name << " Unsupported pad mode operation: " << mode;
     }
 }
 
@@ -917,30 +1151,29 @@ void PadValidator::checkParams(const CNNLayer* layer) {
 void PadValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
     auto casted = dynamic_cast<const PadLayer*>(layer);
     if (!casted) {
-        THROW_IE_EXCEPTION << "Layer is not instance of PadLayer class";
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of PadLayer class";
     }
 
     size_t numInputs = inShapes.size();
-    if (numInputs != 1)
-        THROW_IE_EXCEPTION << "Pad can take only 1 input, but actually it has: " << numInputs;
+    checkNumOfInput(inShapes, {1});
 
     if (inShapes[0].size() != casted->pads_begin.size())
-        THROW_IE_EXCEPTION << "Dimensions count mismatch in layer " << layer->name
+        THROW_IE_EXCEPTION << layer->name << " Dimensions count mismatch in layer " << layer->name
                            << ". Expected: " << casted->pads_begin.size() << " Got: " << inShapes[0].size();
 
     if (inShapes[0].size() != casted->pads_end.size())
-        THROW_IE_EXCEPTION << "Dimensions count mismatch in layer " << layer->name
+        THROW_IE_EXCEPTION << layer->name << " Dimensions count mismatch in layer " << layer->name
                            << ". Expected: " << casted->pads_end.size() << " Got: " << inShapes[0].size();
 
     if (casted->pad_mode == PadLayer::Symmetric || casted->pad_mode == PadLayer::Reflect) {
         for (size_t i = 0; i < inShapes[0].size(); i++) {
             if (inShapes[0][i] < casted->pads_begin[i]) {
-                THROW_IE_EXCEPTION << "Pad can't be grater than input shape in symmetric and reflect modes."
+                THROW_IE_EXCEPTION << layer->name << " Pad can't be grater than input shape in symmetric and reflect modes."
                                    << " For dimension " << i << " pad_begin=" << casted->pads_begin[i]
                                    << " in_shape="<< inShapes[0][i];
             }
             if (inShapes[0][i] < casted->pads_end[i]) {
-                THROW_IE_EXCEPTION << "Pad can't be grater than input shape in symmetric and reflect modes."
+                THROW_IE_EXCEPTION << layer->name << " Pad can't be grater than input shape in symmetric and reflect modes."
                                    << " For dimension " << i << " pad_end=" << casted->pads_end[i]
                                    << " in_shape="<< inShapes[0][i];
             }
@@ -953,7 +1186,7 @@ GatherValidator::GatherValidator(const std::string& _type) : LayerValidator(_typ
 void GatherValidator::parseParams(CNNLayer* layer) {
     auto casted = dynamic_cast<GatherLayer*>(layer);
     if (!casted) {
-        THROW_IE_EXCEPTION << "Layer is not instance of GatherLayer class";
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of GatherLayer class";
     }
 
     casted->axis = casted->GetParamAsInt("axis", 0);
@@ -966,58 +1199,1322 @@ void GatherValidator::checkParams(const CNNLayer* layer) {
 void GatherValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
     auto casted = dynamic_cast<const GatherLayer*>(layer);
     if (!casted) {
-        THROW_IE_EXCEPTION << "Layer is not instance of GatherLayer class";
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of GatherLayer class";
     }
 
     size_t numInputs = inShapes.size();
     if (numInputs != 2)
-        THROW_IE_EXCEPTION << "Gather can take only 2 inputs, but actually it has: " << numInputs;
+        THROW_IE_EXCEPTION << layer->name << " Gather can take only 2 inputs, but actually it has: " << numInputs;
 
-    if (casted->axis > 0 && (inShapes[0].size() - casted->axis) < 1)
-        THROW_IE_EXCEPTION << "Incorrect input dictionary dimensions " << inShapes[0].size()
+    if (casted->axis > 0 && inShapes[0].size() < (1 + casted->axis))
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input dictionary dimensions " << inShapes[0].size()
                            << " and axis number " << casted->axis;
     else if (casted->axis < 0 && (static_cast<int>(inShapes[0].size()) + casted->axis) < 0)
-        THROW_IE_EXCEPTION << "Incorrect input dictionary dimensions " << inShapes[0].size()
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input dictionary dimensions " << inShapes[0].size()
                            << " and axis number " << casted->axis;
 }
 
-RNNValidator::RNNValidator(const std::string& _type) : LayerValidator(_type) {}
+StridedSliceValidator::StridedSliceValidator(const std::string& _type) : LayerValidator(_type) {}
 
-void RNNValidator::parseParams(CNNLayer* layer) {
-    auto casted = dynamic_cast<RNNLayer*>(layer);
-    if (!casted)
-        THROW_IE_EXCEPTION << "Layer is not instance of RNNLayer class";
+void StridedSliceValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<StridedSliceLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of StridedSlice class";
+    }
 
-    std::string cell = layer->GetParamAsString("cell_type");
-    std::string direction = layer->GetParamAsString("direction", "Forward");
-    int axis = layer->GetParamAsInt("axis", 1);
+    casted->begin_mask = layer->GetParamAsString("begin_mask", "");
+    casted->end_mask = layer->GetParamAsString("end_mask", "");
+    casted->ellipsis_mask = layer->GetParamAsString("ellipsis_mask", "");
+    casted->new_axis_mask = layer->GetParamAsString("new_axis_mask", "");
+    casted->shrink_axis_mask = layer->GetParamAsString("shrink_axis_mask", "");
+}
 
-    if (!one_of(cell, "LSTM", "RNN", "GRU"))
-        THROW_IE_EXCEPTION << "Unknown RNN cell type " << cell << ". "
+void StridedSliceValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void StridedSliceValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const StridedSliceLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of StridedSliceLayer class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs > 4)
+        THROW_IE_EXCEPTION << layer->name << " StridedSlice can take up to 4 inputs, but actually it has: " << numInputs;
+
+    size_t ellipsis_mask_counter = 0;
+    for (size_t i = 0; i < casted->ellipsis_mask.size(); ++i) {
+        if (casted->ellipsis_mask[i] == '1')
+            ellipsis_mask_counter++;
+    }
+    if (ellipsis_mask_counter > 1)
+        THROW_IE_EXCEPTION << layer->name << " 'Ellipsis_mask' must be a power of two (only one ellipsis)!";
+}
+
+
+ShuffleChannelsValidator::ShuffleChannelsValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void ShuffleChannelsValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<ShuffleChannelsLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of ShuffleChannels class";
+    }
+
+    casted->axis = casted->GetParamAsInt("axis", 1);
+    casted->group = casted->GetParamAsUInt("group", 1);
+}
+
+void ShuffleChannelsValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void ShuffleChannelsValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const ShuffleChannelsLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of ShuffleChannels class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 1)
+        THROW_IE_EXCEPTION << layer->name << " ShuffleChannels can take only 1 input, but actually it has: " << numInputs;
+
+    if (casted->axis > 0 && inShapes[0].size() < (1 + casted->axis))
+        THROW_IE_EXCEPTION << layer->name << "I ncorrect input tensor dimensions " << inShapes[0].size()
+        << " and axis number " << casted->axis;
+    else if (casted->axis < 0 && (static_cast<int>(inShapes[0].size()) + casted->axis) < 0)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input dictionary dimensions " << inShapes[0].size()
+        << " and axis number " << casted->axis;
+
+    int axis = casted->axis;
+    if (axis < 0)
+        axis += inShapes[0].size();
+
+    if (inShapes[0][axis] % casted->group)
+        THROW_IE_EXCEPTION << layer->name << " Group parameter must evenly divide the channel dimension!";
+
+    size_t dataLength = 1;
+    for (size_t i = axis + 1; i < inShapes[0].size(); i++)
+        dataLength *= inShapes[0][i];
+
+    if (dataLength == 0)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input parameters dimension!";
+}
+
+
+DepthToSpaceValidator::DepthToSpaceValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void DepthToSpaceValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<DepthToSpaceLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of DepthToSpace class";
+    }
+
+    casted->block_size = casted->GetParamAsUInt("block_size", 1);
+}
+
+void DepthToSpaceValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void DepthToSpaceValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const DepthToSpaceLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of DepthToSpace class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 1)
+        THROW_IE_EXCEPTION << layer->name << " DepthToSpace can take only 1 input, but actually it has: " << numInputs;
+
+    if (inShapes[0].size() < 3)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of input dimensions!";
+
+    if (casted->block_size == 0)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect block_size parameter is zero!";
+
+    if (inShapes[0][inShapes[0].size() - 3] % (casted->block_size * casted->block_size))
+        THROW_IE_EXCEPTION << layer->name << " block_size parameter is incompatible with input tensor Color dimension size!";
+}
+
+
+SpaceToDepthValidator::SpaceToDepthValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void SpaceToDepthValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<SpaceToDepthLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of SpaceToDepth class";
+    }
+
+    casted->block_size = casted->GetParamAsUInt("block_size", 1);
+}
+
+void SpaceToDepthValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void SpaceToDepthValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const SpaceToDepthLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of SpaceToDepth class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 1)
+        THROW_IE_EXCEPTION << layer->name << " SpaceToDepth can take only 1 input, but actually it has: " << numInputs;
+
+    if (inShapes[0].size() < 2)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of input dimensions!";
+
+    if (casted->block_size == 0)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect block_size parameter is zero!";
+
+    if (inShapes[0][inShapes[0].size() - 1] % casted->block_size)
+        THROW_IE_EXCEPTION << layer->name << " block_size parameter is incompatible with input tensor With dimension size!";
+
+    if (inShapes[0][inShapes[0].size() - 2] % casted->block_size)
+        THROW_IE_EXCEPTION << layer->name << " block_size parameter is incompatible with input tensor Height dimension size!";
+}
+
+
+ReverseSequenceValidator::ReverseSequenceValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void ReverseSequenceValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<ReverseSequenceLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of ReverseSequence class";
+    }
+
+    casted->seq_axis = casted->GetParamAsInt("seq_axis", 1);
+    casted->batch_axis = casted->GetParamAsInt("batch_axis", 0);
+}
+
+void ReverseSequenceValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void ReverseSequenceValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const ReverseSequenceLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of ReverseSequence class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 2)
+        THROW_IE_EXCEPTION << layer->name << " ReverseSequence can take 2 inputs, but actually it has: " << numInputs;
+
+    if (inShapes[1].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'seq_lengths' input dimensions!";
+
+    if (casted->seq_axis > 0 && inShapes[0].size() < (1 + casted->seq_axis))
+        THROW_IE_EXCEPTION << layer->name << "Incorrect input tensor dimensions " << inShapes[0].size()
+                           << " and seq_axis number " << casted->seq_axis;
+    else if (casted->seq_axis < 0 && (static_cast<int>(inShapes[0].size()) + casted->seq_axis) < 0)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input dictionary dimensions " << inShapes[0].size()
+                           << " and seq_axis number " << casted->seq_axis;
+
+    if (casted->batch_axis > 0 && inShapes[0].size() < (1 + casted->batch_axis))
+        THROW_IE_EXCEPTION << layer->name << "Incorrect input tensor dimensions " << inShapes[0].size()
+                           << " and batch_axis number " << casted->batch_axis;
+    else if (casted->batch_axis < 0 && (static_cast<int>(inShapes[0].size()) + casted->batch_axis) < 0)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect input dictionary dimensions " << inShapes[0].size()
+                           << " and batch_axis number " << casted->batch_axis;
+
+    int batch_axis = casted->batch_axis;
+    if (batch_axis < 0)
+        batch_axis += inShapes[0].size();
+    if (inShapes[1][0] != inShapes[0][batch_axis])
+        THROW_IE_EXCEPTION << layer->name << " Incorrect 'seq_lengths_dims' parameter dimensions!";
+}
+
+
+SqueezeValidator::SqueezeValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void SqueezeValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<SqueezeLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of Squeeze class";
+    }
+}
+
+void SqueezeValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void SqueezeValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const SqueezeLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of Squeeze class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 2)
+        THROW_IE_EXCEPTION << layer->name << " Squeeze can take 2 inputs, but actually it has: " << numInputs;
+
+    if (inShapes[1].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'indices_to_squeeze' input dimensions!";
+}
+
+
+UnsqueezeValidator::UnsqueezeValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void UnsqueezeValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<UnsqueezeLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of Unsqueeze class";
+    }
+}
+
+void UnsqueezeValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void UnsqueezeValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const UnsqueezeLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of Unsqueeze class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 2)
+        THROW_IE_EXCEPTION << layer->name << " Unsqueeze can take 2 inputs, but actually it has: " << numInputs;
+
+    if (inShapes[1].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'indices_to_set' input dimensions!";
+}
+
+
+RangeValidator::RangeValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void RangeValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<RangeLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of Range class";
+    }
+}
+
+void RangeValidator::checkParams(const CNNLayer* layer) {}
+
+void RangeValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const RangeLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of Range class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 3)
+        THROW_IE_EXCEPTION << layer->name << " Range can take 3 inputs, but actually it has: " << numInputs;
+
+    if (inShapes[0].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'start' input dimensions!";
+
+    if (inShapes[1].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'limit' input dimensions!";
+
+    if (inShapes[2].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'delta' input dimensions!";
+}
+
+
+FillValidator::FillValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void FillValidator::parseParams(CNNLayer* layer) {}
+
+void FillValidator::checkParams(const CNNLayer* layer) {}
+
+void FillValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    size_t numInputs = inShapes.size();
+    if (numInputs != 2)
+        THROW_IE_EXCEPTION << layer->name << " Fill can take 2 inputs, but actually it has: " << numInputs;
+
+    if (inShapes[0].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'fill_dims' input dimensions!";
+
+    if (inShapes[1].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'fill_value' input dimensions!";
+}
+
+
+ExpandValidator::ExpandValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void ExpandValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<ExpandLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of Expand class";
+    }
+}
+
+void ExpandValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void ExpandValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const ExpandLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << layer->name << " Layer is not instance of Expand class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 2)
+        THROW_IE_EXCEPTION << layer->name << " Expand can take 2 inputs, but actually it has: " << numInputs;
+
+    if (inShapes[1].size() != 1)
+        THROW_IE_EXCEPTION << layer->name << " Incorrect number of 'shape' input dimensions!";
+}
+
+/****************************************/
+/*** RNN specific validators ************/
+/****************************************/
+
+static RNNCellBase::CellType cell_type_from(string type_name) {
+    const vector<string> to_remove {"Cell", "Sequence"};
+    for (auto &sub : to_remove) {
+        auto idx = type_name.find(sub);
+        if (idx != string::npos)
+            type_name.erase(idx);
+    }
+
+    if (!one_of(type_name, "LSTM", "RNN", "GRU"))
+        THROW_IE_EXCEPTION << "Unknown RNN cell type " << type_name << ". "
                            << "Expected one of [ LSTM | RNN | GRU ].";
 
-    if (!one_of(direction, "Forward", "Backward", "Bidirectional"))
-        THROW_IE_EXCEPTION << "Unknown RNN direction type " << direction << ". "
+    return type_name == "LSTM" ? RNNSequenceLayer::LSTM :
+           type_name == "GRU"  ? RNNSequenceLayer::GRU :
+           type_name == "RNN"  ? RNNSequenceLayer::RNN :
+           RNNSequenceLayer::LSTM;
+}
+
+static RNNSequenceLayer::Direction direction_from(string direction_name) {
+    if (!one_of(direction_name, "Forward", "Backward", "Bidirectional"))
+        THROW_IE_EXCEPTION << "Unknown RNN direction type " << direction_name << ". "
                            << "Expected one of [ Forward | Backward | Bidirectional ].";
 
-    casted->axis = axis;
-    casted->cellType = cell;
-    casted->direction = direction == "Forward"  ? RNNLayer::RNN_FWD :
-                        direction == "Backward" ? RNNLayer::RNN_BWD :
-                                                  RNNLayer::RNN_BDR;
+    return direction_name == "Forward" ? RNNSequenceLayer::FWD :
+           direction_name == "Backward" ? RNNSequenceLayer::BWD :
+           direction_name == "Bidirecttional" ? RNNSequenceLayer::BDR :
+           RNNSequenceLayer::FWD;
+}
+
+template<>
+std::vector<std::string>
+RNNBaseValidator<RNNSequenceLayer::LSTM>::def_acts {"sigmoid", "tanh", "tanh"};
+template<>
+std::vector<float>
+RNNBaseValidator<RNNSequenceLayer::LSTM>::def_alpha {0, 0, 0};
+template<>
+std::vector<float>
+RNNBaseValidator<RNNSequenceLayer::LSTM>::def_beta {0, 0, 0};
+template<>
+size_t
+RNNBaseValidator<RNNSequenceLayer::LSTM>::G = 4;
+template<>
+size_t
+RNNBaseValidator<RNNSequenceLayer::LSTM>::NS = 2;
+
+template<>
+std::vector<std::string>
+RNNBaseValidator<RNNSequenceLayer::GRU>::def_acts {"sigmoid", "tanh"};
+template<>
+std::vector<float>
+RNNBaseValidator<RNNSequenceLayer::GRU>::def_alpha {0, 0};
+template<>
+std::vector<float>
+RNNBaseValidator<RNNSequenceLayer::GRU>::def_beta {0, 0};
+template<>
+size_t
+RNNBaseValidator<RNNSequenceLayer::GRU>::G = 3;
+template<>
+size_t
+RNNBaseValidator<RNNSequenceLayer::GRU>::NS = 1;
+
+template<>
+std::vector<std::string>
+RNNBaseValidator<RNNSequenceLayer::RNN>::def_acts {"tanh"};
+template<>
+std::vector<float>
+RNNBaseValidator<RNNSequenceLayer::RNN>::def_alpha {0};
+template<>
+std::vector<float>
+RNNBaseValidator<RNNSequenceLayer::RNN>::def_beta {0};
+template<>
+size_t
+RNNBaseValidator<RNNSequenceLayer::RNN>::G = 1;
+template<>
+size_t
+RNNBaseValidator<RNNSequenceLayer::RNN>::NS = 1;
+
+template<RNNSequenceLayer::CellType CELL>
+RNNBaseValidator<CELL>::RNNBaseValidator(const std::string& _type) : LayerValidator(_type) {}
+
+template<RNNSequenceLayer::CellType CELL>
+void RNNBaseValidator<CELL>::parseParams(CNNLayer* layer) {
+    auto rnn = dynamic_cast<RNNCellBase*>(layer);
+    if (!rnn)
+        THROW_IE_EXCEPTION << "Layer is not instance of RNNLayer class";
+
+    rnn->cellType = cell_type_from(layer->type);
+    rnn->hidden_size = rnn->GetParamAsInt("hidden_size");
+    rnn->clip = rnn->GetParamAsFloat("clip", 0.0f);
+    rnn->activations = rnn->GetParamAsStrings("activations", def_acts);
+    rnn->activation_alpha = rnn->GetParamAsFloats("activation_alpha", def_alpha);
+    rnn->activation_beta = rnn->GetParamAsFloats("activation_beta", def_beta);
+
+    if (rnn->cellType == RNNCellBase::GRU) {
+        auto lbr = rnn->GetParamAsBool("linear_before_reset", false);
+        if (lbr) rnn->cellType = RNNCellBase::GRU_LBR;
+    }
 }
 
-void RNNValidator::checkParams(const InferenceEngine::CNNLayer *layer) {
-    auto casted = dynamic_cast<const RNNLayer*>(layer);
+template<RNNSequenceLayer::CellType CELL>
+void RNNBaseValidator<CELL>::checkParams(const InferenceEngine::CNNLayer *layer) {
+    auto rnn = dynamic_cast<const RNNCellBase*>(layer);
+    if (!rnn)
+        THROW_IE_EXCEPTION << "Layer is not instance of RNNLayer class";
+
+    if (rnn->clip < 0.0f)
+        THROW_IE_EXCEPTION << "Clip parameter should be positive";
+
+    for (auto &act : rnn->activations)
+        if (!one_of(act, "sigmoid", "tanh", "relu"))
+            THROW_IE_EXCEPTION << "Unsupported activation function (" << act << ") for RNN layer.";
+
+    int act_num_required = def_acts.size();
+    if (rnn->activations.size() != act_num_required)
+        THROW_IE_EXCEPTION << "Expected " << act_num_required << " activations, but provided "
+                           << rnn->activations.size();
+
+    if (rnn->activation_alpha.size() != act_num_required)
+        THROW_IE_EXCEPTION << "Expected " << act_num_required << " activation alpha parameters, "
+                           << "but provided " << rnn->activation_alpha.size();
+    if (rnn->activation_beta.size() != act_num_required)
+        THROW_IE_EXCEPTION << "Expected " << act_num_required << " activation beta parameters, "
+                           << "but provided " << rnn->activation_beta.size();
+}
+
+template<RNNSequenceLayer::CellType CELL>
+void RNNBaseValidator<CELL>::checkCorrespondence(const CNNLayer* layer,
+         const map<string, Blob::Ptr>& blobs,
+         const vector<SizeVector>& inShapes) const {
+    auto rnn = dynamic_cast<const RNNCellBase*>(layer);
+    if (!rnn)
+        THROW_IE_EXCEPTION << "Layer is not instance of RNNLayer class";
+
+    if (blobs.size() != 2)
+        THROW_IE_EXCEPTION << "Expected only 2 blobs with trained parameters (weights and biases), "
+                           << "but provided only " << blobs.size();
+    if (inShapes.empty())
+        THROW_IE_EXCEPTION << "No input tensors.";
+
+    size_t D = inShapes[0].back();
+    size_t S = rnn->hidden_size;
+    size_t expectetd_w_size = G*S*(D+S);
+    size_t expectetd_b_size = G*S;
+
+    if (rnn->cellType == RNNCellBase::GRU_LBR)
+        expectetd_b_size = (G + 1)*S;
+
+    auto w = blobs.find("weights");
+    if (w == blobs.end())
+        THROW_IE_EXCEPTION << "Weights blob is not provided";
+
+    if (w->second->size() != expectetd_w_size)
+        THROW_IE_EXCEPTION << "Weights blob has wrang size. Expected " << expectetd_w_size;
+
+    auto b = blobs.find("biases");
+    if (b == blobs.end())
+        THROW_IE_EXCEPTION << "Biases blob is not provided";
+
+    if (b->second->size() != expectetd_b_size)
+        THROW_IE_EXCEPTION << "Biases blob has wrang size. Expected " << expectetd_b_size;
+}
+
+template<RNNSequenceLayer::CellType CELL>
+RNNSequenceValidator<CELL>::RNNSequenceValidator(const std::string& _type) : RNNBaseValidator<CELL>(_type) {}
+
+template<RNNSequenceLayer::CellType CELL>
+void RNNSequenceValidator<CELL>::parseParams(CNNLayer* layer) {
+    RNNBaseValidator<CELL>::parseParams(layer);
+
+    auto casted = dynamic_cast<RNNSequenceLayer*>(layer);
+    if (!casted)
+        THROW_IE_EXCEPTION << "Layer is not instance of RNNLayer class";
+
+    std::string direction = layer->GetParamAsString("direction");
+
+    casted->axis = layer->GetParamAsUInt("axis", 1);
+    casted->direction = direction_from(direction);
+}
+
+template<RNNSequenceLayer::CellType CELL>
+void RNNSequenceValidator<CELL>::checkParams(const InferenceEngine::CNNLayer *layer) {
+    RNNBaseValidator<CELL>::checkParams(layer);
+
+    auto casted = dynamic_cast<const RNNSequenceLayer*>(layer);
     if (!casted)
         THROW_IE_EXCEPTION << "Layer is not instance of RNNLayer class";
 
     if (!one_of(casted->axis, 1, 0))
-        THROW_IE_EXCEPTION << "Unsupported axis for RNN layer iterator. Only 0 and 1 axis are supported.";
+        THROW_IE_EXCEPTION << "Unsupported iteration axis for RNNSequense layer. Only 0 or 1 axis are supported.";
+}
+
+template<RNNSequenceLayer::CellType CELL>
+void RNNSequenceValidator<CELL>::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto rnn = dynamic_cast<const RNNSequenceLayer*>(layer);
+    if (!rnn)
+        THROW_IE_EXCEPTION << "Layer is not instance of RNNSequenceLayer class";
+
+    if (inShapes.empty())
+        THROW_IE_EXCEPTION << "No input tensors.";
+
+    if (inShapes[0].size() != 3)
+        THROW_IE_EXCEPTION << "First input data tensor should be 3D";
+
+    size_t T_axis = rnn->axis;
+    size_t N_axis = (T_axis + 1)%2;
+    size_t N = inShapes[0][N_axis];
+    size_t T = inShapes[0][T_axis];
+    size_t D = inShapes[0].back();
+    size_t S = rnn->hidden_size;
+    size_t NS = RNNSequenceValidator<CELL>::NS;
+
+    SizeVector expected_state_shape {N, S};
+
+    if (inShapes.size() > 1) {  // has an initial state blobs
+        if (inShapes.size() != 1 + NS)
+            THROW_IE_EXCEPTION << "Wrong number of input tensors. Expected 1 (data) or "
+                               << 1 + NS << " (data and states)";
+        if (inShapes[1] != expected_state_shape)
+            THROW_IE_EXCEPTION << "Wrong shape of first initial state tensors.";
+//                             << " Expected " << expected_state_shape << " but provided " << inShapes[1];
+
+        if (NS == 2 && inShapes[2] != expected_state_shape)
+            THROW_IE_EXCEPTION << "Wrong shape of second initial state tensors.";
+//                             << " Expected " << expected_state_shape << " but provided " << inShapes[2];
+    }
+}
+
+template class details::RNNSequenceValidator<RNNSequenceLayer::RNN>;
+template class details::RNNSequenceValidator<RNNSequenceLayer::GRU>;
+template class details::RNNSequenceValidator<RNNSequenceLayer::LSTM>;
+
+template<RNNSequenceLayer::CellType CELL>
+RNNCellValidator<CELL>::RNNCellValidator(const std::string& _type) : RNNBaseValidator<CELL>(_type) {}
+
+template<RNNSequenceLayer::CellType CELL>
+void RNNCellValidator<CELL>::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto rnn = dynamic_cast<const RNNCellBase*>(layer);
+    if (!rnn)
+        THROW_IE_EXCEPTION << "Layer is not instance of RNNSequenceLayer class";
+
+    const size_t &NS = RNNCellValidator<CELL>::NS;
+
+    if (inShapes.size() != NS + 1)
+        THROW_IE_EXCEPTION << "Wrong number of input tensors. Expected " << NS + 1;
+
+    if (inShapes[0].size() != 2)
+        THROW_IE_EXCEPTION << "First input data tensor should be 2D";
+
+    size_t N = inShapes[0][0];
+    size_t D = inShapes[0][1];
+    size_t S = rnn->hidden_size;
+
+    SizeVector expected_state_shape {N, S};
+
+    if (inShapes[1] != expected_state_shape)
+        THROW_IE_EXCEPTION << "Wrong shape of first initial state tensors.";
+//                         << " Expected " << expected_state_shape << " but provided " << inShapes[1];
+
+    if (NS == 2 && inShapes[2] != expected_state_shape)
+        THROW_IE_EXCEPTION << "Wrong shape of second initial state tensors.";
+//                         << " Expected " << expected_state_shape << " but provided " << inShapes[2];
+}
+
+template class details::RNNCellValidator<RNNSequenceLayer::RNN>;
+template class details::RNNCellValidator<RNNSequenceLayer::GRU>;
+template class details::RNNCellValidator<RNNSequenceLayer::LSTM>;
+
+void ArgMaxValidator::checkParams(const CNNLayer* layer) {
+    unsigned int top_k_ = layer->GetParamAsUInt("top_k");
+}
+
+void ArgMaxValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+ArgMaxValidator::ArgMaxValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void CTCGreedyDecoderValidator::checkParams(const CNNLayer* layer) {
+    int flag = layer->GetParamAsInt("ctc_merge_repeated", 0);
+    if (flag != 0 && flag != 1) {
+        THROW_IE_EXCEPTION << "CTCGreedyDecoder layer parameter ctc_merge_repeated is invalid";
+    }
+}
+
+void CTCGreedyDecoderValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1, 2});
+}
+
+CTCGreedyDecoderValidator::CTCGreedyDecoderValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void DetectionOutputValidator::parseParams(CNNLayer* layer) {
+    unsigned int num_classes = layer->GetParamAsUInt("num_classes");
+    if (num_classes == 0) {
+        THROW_IE_EXCEPTION << "num_classes parameter of DetectionOutput layer can't be equal to zero";
+    }
+    float _nms_threshold = layer->GetParamAsFloat("nms_threshold");
+    if (_nms_threshold < 0) {
+        THROW_IE_EXCEPTION << "nms_threshold parameter of DetectionOutput layer can't be less then zero";
+    }
+    int _keep_top_k = layer->GetParamAsUInt("keep_top_k", -1);
+
+    if (layer->CheckParamPresence("background_label_id"))
+        int _background_label_id = layer->GetParamAsUInt("background_label_id", -1);
+    if (layer->CheckParamPresence("top_k"))
+        int _top_k = layer->GetParamAsUInt("top_k", -1);
+    if (layer->CheckParamPresence("variance_encoded_in_target"))
+        bool _variance_encoded_in_target = static_cast<bool>(layer->GetParamAsUInt("variance_encoded_in_target"));
+    if (layer->CheckParamPresence("num_orient_classes"))
+        int _num_orient_classes = layer->GetParamAsUInt("num_orient_classes");
+    if (layer->CheckParamPresence("share_location"))
+        bool _share_location = static_cast<bool>(layer->GetParamAsUInt("share_location"));
+    if (layer->CheckParamPresence("interpolate_orientation"))
+        int _interpolate_orientation = layer->GetParamAsInt("interpolate_orientation");
+    if (layer->CheckParamPresence("confidence_threshold")) {
+        float _confidence_threshold = layer->GetParamAsFloat("confidence_threshold");
+        if (_confidence_threshold < 0) {
+            THROW_IE_EXCEPTION << "_nms_threshold parameter of DetectionOutput layer can't be less then zero";
+        }
+    }
+
+    if (layer->CheckParamPresence("code_type")) {
+        std::string _code_type = layer->GetParamAsString("code_type");
+        std::vector<std::string> code_types = {"caffe.PriorBoxParameter.CENTER_SIZE",
+                                               "caffe.PriorBoxParameter.CORNER"};
+        auto it = std::find(code_types.begin(), code_types.end(), _code_type);
+        if (it == code_types.end()) {
+            THROW_IE_EXCEPTION << "Parameter code_type of DetectionOutput layer ";
+        }
+    }
+}
+
+void DetectionOutputValidator::checkParams(const CNNLayer* layer) {
+    unsigned int num_classes = layer->GetParamAsUInt("num_classes");
+    if (num_classes == 0) {
+        THROW_IE_EXCEPTION << "num_classes parameter of DetectionOutput layer can't be equal to zero";
+    }
+    float _nms_threshold = layer->GetParamAsFloat("nms_threshold");
+    if (_nms_threshold < 0) {
+        THROW_IE_EXCEPTION << "nms_threshold parameter of DetectionOutput layer can't be less then zero";
+    }
+    int _keep_top_k = layer->GetParamAsUInt("keep_top_k", -1);
+
+    if (layer->CheckParamPresence("background_label_id"))
+        int _background_label_id = layer->GetParamAsUInt("background_label_id", -1);
+    if (layer->CheckParamPresence("top_k"))
+        int _top_k = layer->GetParamAsUInt("top_k", -1);
+    if (layer->CheckParamPresence("variance_encoded_in_target"))
+        bool _variance_encoded_in_target = static_cast<bool>(layer->GetParamAsUInt("variance_encoded_in_target"));
+    if (layer->CheckParamPresence("num_orient_classes"))
+        int _num_orient_classes = layer->GetParamAsUInt("num_orient_classes");
+    if (layer->CheckParamPresence("share_location"))
+        bool _share_location = static_cast<bool>(layer->GetParamAsUInt("share_location"));
+    if (layer->CheckParamPresence("interpolate_orientation"))
+        int _interpolate_orientation = layer->GetParamAsInt("interpolate_orientation");
+    if (layer->CheckParamPresence("confidence_threshold")) {
+        float _confidence_threshold = layer->GetParamAsFloat("confidence_threshold");
+        if (_confidence_threshold < 0) {
+            THROW_IE_EXCEPTION << "_nms_threshold parameter of DetectionOutput layer can't be less then zero";
+        }
+    }
+    if (layer->CheckParamPresence("code_type")) {
+        std::string _code_type = layer->GetParamAsString("code_type");
+        std::vector<std::string> code_types = {"caffe.PriorBoxParameter.CENTER_SIZE",
+                                               "caffe.PriorBoxParameter.CORNER"};
+        auto it = std::find(code_types.begin(), code_types.end(), _code_type);
+        if (it == code_types.end()) {
+            THROW_IE_EXCEPTION << "Parameter code_type of DetectionOutput layer ";
+        }
+    }
+}
+
+void DetectionOutputValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {3, 5});
+}
+
+DetectionOutputValidator::DetectionOutputValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void InterpValidator::checkParams(const CNNLayer* layer) {
+}
+
+void InterpValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1, 2});
+    auto IS_ZERO = [](float value) {
+        return std::fabs(value) < std::numeric_limits<float>::epsilon();
+    };
+    if (inShapes.size() != 2) {
+        float factor = layer->GetParamAsFloat("factor", 0);
+        if (factor < 0)
+            THROW_IE_EXCEPTION << "factor parameter of Interp layer can't be less then zero";
+        float shrink_factor = layer->GetParamAsFloat("shrink_factor", 0);
+        if (shrink_factor < 0)
+            THROW_IE_EXCEPTION << "shrink_factor parameter of Interp layer can't be less then zero";
+        float zoom_factor = (layer->GetParamAsFloat("zoom_factor", 0));
+        if (zoom_factor < 0)
+            THROW_IE_EXCEPTION << "zoom_factor parameter of Interp layer can't be less then zero";
+        bool noFactor = IS_ZERO(factor) && IS_ZERO(shrink_factor) && IS_ZERO(zoom_factor);
+
+        auto height = layer->GetParamAsUInt("height", 0);
+        auto width = layer->GetParamAsUInt("width", 0);
+
+        if (noFactor && (height == 0 || width == 0)) {
+            THROW_IE_EXCEPTION
+                    << "Can't reshape without factor, or target resolution. "
+                    << "Supported attributes: factor, shrink_factor, zoom_factor, height, width";
+        }
+    }
+}
+
+InterpValidator::InterpValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void InterpValidator::parseParams(CNNLayer* layer) {
+    float factor = layer->GetParamAsFloat("factor", 0);
+    float shrink_factor = layer->GetParamAsFloat("shrink_factor", 0);
+    float zoom_factor = layer->GetParamAsFloat("zoom_factor", 0);
+
+    auto height = layer->GetParamAsUInt("height", 0);
+    auto width = layer->GetParamAsUInt("width", 0);
+}
+
+    void PermuteValidator::checkParams(const CNNLayer* layer) {
+    std::vector<unsigned int> layerOrder = layer->GetParamAsUInts("order");
+}
+
+void PermuteValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+PermuteValidator::PermuteValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void PriorBoxValidator::checkParams(const CNNLayer* layer) {
+    std::vector<unsigned int> min_sizes = layer->GetParamAsUInts("min_size", {});
+    std::vector<unsigned int> max_sizes = layer->GetParamAsUInts("max_size", {});
+    bool flip = static_cast<bool>(layer->GetParamAsInt("flip"));
+    if (layer->CheckParamPresence("aspect_ratio"))
+        const std::vector<unsigned int> aspect_ratios = layer->GetParamAsUInts("aspect_ratio", {});
+    bool clip_ = static_cast<bool>(layer->GetParamAsInt("clip"));
+    if (layer->CheckParamPresence("variance")) {
+        float variance_ = layer->GetParamAsFloat("variance", 1.0);
+        if (variance_ < 0) {
+            THROW_IE_EXCEPTION << "The value of PriorBox layer variance_ parameter is invalid";
+        }
+    }
+    float step_ = layer->GetParamAsFloat("step", 0);
+    if (step_ < 0) {
+        THROW_IE_EXCEPTION << "The value of PriorBox layer step_ parameter is invalid";
+    }
+    float offset_ = layer->GetParamAsFloat("offset");
+    if (offset_ < 0) {
+        THROW_IE_EXCEPTION << "The value of PriorBox layer offset_ parameter is invalid";
+    }
+}
+
+void PriorBoxValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {2});
+}
+
+PriorBoxValidator::PriorBoxValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void PriorBoxClusteredValidator::checkParams(const CNNLayer* layer) {
+    std::vector<float> widths = layer->GetParamAsFloats("width", {});
+    for (auto i : widths) {
+        if (i < 0) {
+            THROW_IE_EXCEPTION << "The value of PriorBoxClustered layer width parameter is invalid";
+        }
+    }
+    std::vector<float> heights = layer->GetParamAsFloats("height", {});
+    for (auto i : heights) {
+        if (i < 0) {
+            THROW_IE_EXCEPTION << "The value of PriorBoxClustered layer heights parameter is invalid";
+        }
+    }
+    bool flip = static_cast<bool>(layer->GetParamAsInt("flip"));
+    bool clip_ = static_cast<bool>(layer->GetParamAsInt("clip"));
+    float offset_ = layer->GetParamAsFloat("offset");
+    if (offset_ < 0) {
+        THROW_IE_EXCEPTION << "The value of PriorBox layer offset_ parameter is invalid";
+    }
+    if (layer->CheckParamPresence("variance")) {
+        float variance_ = layer->GetParamAsFloat("variance");
+        if (variance_ < 0) {
+            THROW_IE_EXCEPTION << "The value of PriorBox layer variance_ parameter is invalid";
+        }
+    }
+    float step_h_ = layer->GetParamAsFloat("step_h", 0);
+    if (step_h_ < 0) {
+        THROW_IE_EXCEPTION << "The value of PriorBox layer step_h_ parameter is invalid";
+    }
+    float step_w_ = layer->GetParamAsFloat("step_w", 0);
+    if (step_w_ < 0) {
+        THROW_IE_EXCEPTION << "The value of PriorBox layer step_w_ parameter is invalid";
+    }
+    float img_h_ = layer->GetParamAsFloat("img_h", 0);
+    if (img_h_ < 0) {
+        THROW_IE_EXCEPTION << "The value of PriorBox layer img_h_ parameter is invalid";
+    }
+    float  img_w_ = layer->GetParamAsFloat("img_w", 0);
+    if (img_w_ < 0) {
+        THROW_IE_EXCEPTION << "The value of PriorBox layer img_w_ parameter is invalid";
+    }
+}
+
+void PriorBoxClusteredValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {2});
+}
+
+PriorBoxClusteredValidator::PriorBoxClusteredValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void ProposalValidator::checkParams(const CNNLayer* layer) {
+    unsigned int post_nms_topn_ = layer->GetParamAsUInt("post_nms_topn");
+
+    if (layer->CheckParamPresence("feat_stride"))
+        unsigned int feat_stride_ = layer->GetParamAsUInt("feat_stride");
+    if (layer->CheckParamPresence("base_size"))
+        unsigned int base_size_ = layer->GetParamAsUInt("base_size");
+    if (layer->CheckParamPresence("min_size"))
+        unsigned int min_size_ = layer->GetParamAsUInt("min_size");
+    if (layer->CheckParamPresence("pre_nms_topn"))
+        unsigned int pre_nms_topn_ = layer->GetParamAsUInt("pre_nms_topn");
+    if (layer->CheckParamPresence("nms_thresh")) {
+        float nms_thresh_ = layer->GetParamAsFloat("nms_thresh");
+        if (nms_thresh_ < 0) {
+            THROW_IE_EXCEPTION << "The value of Proposal layer nms_thresh_ parameter is invalid";
+        }
+    }
+}
+
+void ProposalValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {3});
+}
+
+ProposalValidator::ProposalValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void PSROIPoolingValidator::checkParams(const CNNLayer* layer) {
+    unsigned int output_dim = layer->GetParamAsUInt("output_dim");
+    unsigned int group_size = layer->GetParamAsUInt("group_size");
+    if (layer->CheckParamPresence("spatial_scale")) {
+        float spatial_scale_ = layer->GetParamAsFloat("spatial_scale");
+        if (spatial_scale_ < 0) {
+            THROW_IE_EXCEPTION << "The value of PSROIPooling layer spatial_scale_ parameter is invalid";
+        }
+    }
+}
+
+void PSROIPoolingValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1, 2});
+}
+
+PSROIPoolingValidator::PSROIPoolingValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void RegionYoloValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void RegionYoloValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+RegionYoloValidator::RegionYoloValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void ReorgYoloValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void ReorgYoloValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+ReorgYoloValidator::ReorgYoloValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void ResampleValidator::checkParams(const CNNLayer* layer) {
+    if (layer->CheckParamPresence("antialias")) {
+        auto antialias = static_cast<size_t>(layer->GetParamAsInt("antialias"));
+
+        if (antialias != 0 && antialias != 1) {
+            THROW_IE_EXCEPTION << "The value of resample layer antialias parameter is invalid";
+        }
+    }
+    if (layer->CheckParamPresence("type")) {
+        std::string type = layer->GetParamAsString("type");
+        if (type != "caffe.ResampleParameter.NEAREST" && type != "caffe.ResampleParameter.CUBIC" &&
+            type != "caffe.ResampleParameter.LINEAR") {
+            THROW_IE_EXCEPTION << "The value of resample layer type parameter is invalid";
+        }
+    }
+}
+
+void ResampleValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1, 2});
+}
+
+ResampleValidator::ResampleValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void ROIPoolingValidator::checkParams(const CNNLayer* layer) {
+    unsigned int pooled_h = layer->GetParamAsUInt("pooled_h");
+    unsigned int pooled_w = layer->GetParamAsUInt("pooled_w");
+    float spatial_scale = layer->GetParamAsFloat("spatial_scale");
+    if (spatial_scale < 0) {
+        THROW_IE_EXCEPTION << "The value of ROIPooling layer spatial_scale parameter is invalid";
+    }
+}
+
+void ROIPoolingValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1, 2});
+}
+
+ROIPoolingValidator::ROIPoolingValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void SimplerNMSValidator::checkParams(const CNNLayer* layer) {
+    unsigned int post_nms_topn_ = layer->GetParamAsUInt("post_nms_topn");
+
+    if (layer->CheckParamPresence("min_bbox_size"))
+        unsigned int min_box_size_ = layer->GetParamAsUInt("min_bbox_size");
+    if (layer->CheckParamPresence("feat_stride"))
+        unsigned int feat_stride_ = layer->GetParamAsUInt("feat_stride");
+    if (layer->CheckParamPresence("pre_nms_topn"))
+        unsigned int pre_nms_topn_ = layer->GetParamAsUInt("pre_nms_topn");
+    if (layer->CheckParamPresence("iou_threshold")) {
+        float iou_threshold_ = layer->GetParamAsFloat("iou_threshold");
+        if (iou_threshold_ < 0) {
+            THROW_IE_EXCEPTION << "The value of SimplerNMS layer iou_threshold_ parameter is invalid";
+        }
+    }
+    if (layer->CheckParamPresence("scale"))
+        std::vector<unsigned int> scale = layer->GetParamAsUInts("scale", {});
+    if (layer->CheckParamPresence("cls_threshold")) {
+        float cls_threshold = layer->GetParamAsFloat("cls_threshold");
+        if (cls_threshold < 0) {
+            THROW_IE_EXCEPTION << "The value of SimplerNMS layer cls_threshold parameter is invalid";
+        }
+    }
+}
+
+void SimplerNMSValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {3});
+}
 
-    // TODO: Add more RNN verification..
+SimplerNMSValidator::SimplerNMSValidator(const std::string& _type) : LayerValidator(_type) {
 }
 
-void RNNValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {}
+void SpatialTransformerValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void SpatialTransformerValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {2});
+}
+
+SpatialTransformerValidator::SpatialTransformerValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void UpsamplingValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void UpsamplingValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+UpsamplingValidator::UpsamplingValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void UnpoolingValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void UnpoolingValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+UnpoolingValidator::UnpoolingValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+ActivationValidator::ActivationValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void ActivationValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void ActivationValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+ConstValidator::ConstValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void ConstValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void ConstValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {0, 1});
+}
+
+CopyValidator::CopyValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void CopyValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void CopyValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+ELUValidator::ELUValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void ELUValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void ELUValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+InputValidator::InputValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void InputValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void InputValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {0});
+}
+
+MemoryValidator::MemoryValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void MemoryValidator::checkParams(const CNNLayer* layer) {
+    int size = layer->GetParamAsInt("size");
+    if (size != 2) {
+        THROW_IE_EXCEPTION << "The value of Memory layer size parameter is invalid";
+    }
+}
+
+void MemoryValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1, 0});
+}
+
+NormalizeValidator::NormalizeValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void NormalizeValidator::checkParams(const CNNLayer* layer) {
+    if (layer->CheckParamPresence("eps")) {
+        float eps = layer->GetParamAsFloat("eps");
+        if (eps < 0) {
+            THROW_IE_EXCEPTION << "The value of Normalize layer eps parameter is invalid";
+        }
+    }
+}
+
+void NormalizeValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+PowerFileValidator::PowerFileValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void PowerFileValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void PowerFileValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+ReLU6Validator::ReLU6Validator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void ReLU6Validator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void ReLU6Validator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+SigmoidValidator::SigmoidValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void SigmoidValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void SigmoidValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+TanHValidator::TanHValidator(const std::string& _type) : LayerValidator(_type) {
+}
+
+void TanHValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
+
+QuantizeValidator::QuantizeValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void QuantizeValidator::parseParams(CNNLayer* layer) {
+    auto casted = dynamic_cast<QuantizeLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of QuantizeLayer class";
+    }
+
+    casted->levels = casted->GetParamAsInt("levels", 1);
+
+    if (casted->levels <= 1) {
+        THROW_IE_EXCEPTION << layer->name << ": Incorrect value for parameter levels = " << casted->levels
+                           << ". Expected to be > 1.";
+    }
+}
+
+void QuantizeValidator::checkParams(const CNNLayer* layer) {
+    LayerValidator::checkParams(layer);
+}
+
+void QuantizeValidator::checkShapes(const CNNLayer* layer, const vector<SizeVector>& inShapes) const {
+    auto casted = dynamic_cast<const QuantizeLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of QuantizeLayer class";
+    }
+
+    size_t numInputs = inShapes.size();
+    if (numInputs != 5)
+        THROW_IE_EXCEPTION << "Quantize can take only 5 inputs, but actually it has: " << numInputs;
+
+    auto dims0 = inShapes[0];
+    if (dims0.size() < 1) {
+        THROW_IE_EXCEPTION << "Quantize input0 shape must have at least 1 dimension";
+    }
+}
+
+BinaryConvolutionValidator::BinaryConvolutionValidator(const std::string& _type) : LayerValidator(_type) {}
+
+void BinaryConvolutionValidator::parseParams(CNNLayer* layer) {
+    auto binConvLayer = dynamic_cast<BinaryConvolutionLayer*>(layer);
+    if (!binConvLayer) {
+        THROW_IE_EXCEPTION << "Layer is not instance of BinaryConvolutionLayer class";
+    }
+
+    binConvLayer->_pad_value = binConvLayer->GetParamAsFloat("pad_value", -1.f);
+    binConvLayer->_in_depth = binConvLayer->GetParamAsUInt("input");
+    binConvLayer->_mode = BinaryConvolutionLayer::xnor_popcount;
+    std::string mode = binConvLayer->GetParamAsString("mode", "xnor-popcount");
+    if (mode != "xnor-popcount")
+        THROW_IE_EXCEPTION << "Layer with type `" << _type << "` has incorrect mode!";
+
+    binConvLayer->_out_depth = binConvLayer->GetParamAsUInt("output");
+
+    binConvLayer->_kernel.clear();
+    binConvLayer->_stride.clear();
+    binConvLayer->_padding.clear();
+    binConvLayer->_pads_end.clear();
+    binConvLayer->_dilation.clear();
+
+    vector<unsigned int> kernels = binConvLayer->GetParamAsUInts("kernel", {});
+    if (kernels.empty()) {
+        // IR_v == 2
+        binConvLayer->_kernel.insert(X_AXIS, binConvLayer->GetParamAsUInt("kernel-x"));
+        binConvLayer->_kernel.insert(Y_AXIS, binConvLayer->GetParamAsUInt("kernel-y"));
+
+        binConvLayer->_stride.insert(X_AXIS, binConvLayer->GetParamAsUInt("stride-x", 1u));
+        binConvLayer->_stride.insert(Y_AXIS, binConvLayer->GetParamAsUInt("stride-y", 1u));
+        // TODO: maybe just throw exception, why do we change IR?
+        if (0 == binConvLayer->_stride[X_AXIS]) {
+            binConvLayer->_stride[X_AXIS] = 1u;
+            LogError("Warning! in layer %s: Stride x is 0, setting to 1 ", binConvLayer->name.c_str());
+        }
+        if (0 == binConvLayer->_stride[Y_AXIS]) {
+            binConvLayer->_stride[Y_AXIS] = 1u;
+            LogError("Warning! in layer %s: Stride y is 0, setting to 1", binConvLayer->name.c_str());
+        }
+
+        binConvLayer->_padding.insert(X_AXIS, binConvLayer->GetParamAsUInt("pad-x", 0u));
+        binConvLayer->_padding.insert(Y_AXIS, binConvLayer->GetParamAsUInt("pad-y", 0u));
+
+        binConvLayer->_pads_end.insert(X_AXIS, binConvLayer->GetParamAsUInt("pad-r", binConvLayer->_padding[X_AXIS]));
+        binConvLayer->_pads_end.insert(Y_AXIS, binConvLayer->GetParamAsUInt("pad-b", binConvLayer->_padding[Y_AXIS]));
+
+        binConvLayer->_dilation.insert(X_AXIS, binConvLayer->GetParamAsUInt("dilation-x", 1u));
+        binConvLayer->_dilation.insert(Y_AXIS, binConvLayer->GetParamAsUInt("dilation-y", 1u));
+    } else {
+        // IR_v > 2
+        for (int i = 1; i <= kernels.size(); i++) {
+            binConvLayer->_kernel.insert(i - 1, kernels[kernels.size() - i]);
+        }
+
+        vector<unsigned int> default_0 = vector<unsigned int> (binConvLayer->_kernel.size(), 0u);
+        vector<unsigned int> default_1 = vector<unsigned int> (binConvLayer->_kernel.size(), 1u);
+
+        vector<unsigned int> strides = binConvLayer->GetParamAsUInts("strides", default_1);
+        for (int i = 1; i <= strides.size(); i++) {
+            if (strides[strides.size() - i] == 0) {
+                THROW_IE_EXCEPTION << "Stride could not be 0.\nIn layer " << binConvLayer->name;
+            }
+            binConvLayer->_stride.insert(i - 1, strides[strides.size() - i]);
+        }
+
+        vector<unsigned int> pads_begin = binConvLayer->GetParamAsUInts("pads_begin", default_0);
+        for (int i = 1; i <= pads_begin.size(); i++) {
+            binConvLayer->_padding.insert(i - 1, pads_begin[pads_begin.size() - i]);
+        }
+
+        vector<unsigned int> pads_end = binConvLayer->GetParamAsUInts("pads_end", pads_begin);
+        for (int i = 1; i <= pads_end.size(); i++) {
+            binConvLayer->_pads_end.insert(i - 1, pads_end[pads_end.size() - i]);
+        }
+
+        vector<unsigned int> dilations = binConvLayer->GetParamAsUInts("dilations", default_1);
+        for (int i = 1; i <= dilations.size(); i++) {
+            binConvLayer->_dilation.insert(i - 1, dilations[dilations.size() - i]);
+        }
+    }
+
+    binConvLayer->_auto_pad = binConvLayer->GetParamAsString("auto_pad", "");
+    binConvLayer->_group = binConvLayer->GetParamAsUInt("group", 1u);
+}
+
+void BinaryConvolutionValidator::checkParams(const CNNLayer* layer) {
+    auto casted = dynamic_cast<const BinaryConvolutionLayer*>(layer);
+    if (!casted) {
+        THROW_IE_EXCEPTION << "Layer is not instance of BinaryConvolutionLayer class";
+    }
+}
+
+void BinaryConvolutionValidator::checkCorrespondence(const CNNLayer* layer,
+                                               const std::map<std::string, Blob::Ptr>& blobs,
+                                               const vector<SizeVector>& inShapes) const {
+    auto binConvLayer = dynamic_cast<const BinaryConvolutionLayer*>(layer);
+    if (!binConvLayer)
+        THROW_IE_EXCEPTION << "Layer is not instance of BinaryConvolutionLayer class";
+}
+
+void BinaryConvolutionValidator::checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {
+    checkNumOfInput(inShapes, {1});
+}
 
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_layer_validators.hpp b/inference-engine/src/inference_engine/ie_layer_validators.hpp
index 6361b4f64..94a0a670e 100644
--- a/inference-engine/src/inference_engine/ie_layer_validators.hpp
+++ b/inference-engine/src/inference_engine/ie_layer_validators.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -47,8 +47,7 @@ public:
      * @note: This function doesn't touch ins and out Data of the layer.
      * Throws exception if the check fails
      */
-    virtual void checkShapes(const CNNLayer* layer,
-                             const std::vector<SizeVector>& inShapes) const {}
+    virtual void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const {}
 
     /**
      * @brief Checks correspondence of all parameters in the aggregate, except output shapes.
@@ -86,41 +85,6 @@ private:
     InferenceEngine::details::caseless_unordered_map<std::string, LayerValidator::Ptr> _validators;
 };
 
-static void checkWeakData(const DataWeakPtr& data) {
-}
-
-static void checkData(const DataPtr& data) {
-}
-
-
-/**
- * @brief Checks that input Data is not empty and pointers are not null, number of inputs correspond number of input shapes, dimensions in Data are not empty
- */
-static void checkInputs(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) {
-    // TODO: not finished implementation
-    if (layer->insData.size() != inShapes.size())
-        THROW_IE_EXCEPTION << "Number of layer's inputs don't correspond number of new input shapes";
-
-    auto inData = layer->insData[0].lock();
-    bool isCorrect = false;
-    SizeVector inDims, inShape;
-    if (inData) {
-        inDims = inData->getDims();
-        inShape = inShapes[0];
-        isCorrect = inShape.size() == inDims.size() && !inShape.empty() && !inDims.empty();
-    }
-
-    if (!isCorrect)
-        THROW_IE_EXCEPTION << " Failed with invalid shapes: shapes are empty"
-                                  << "new input shape size=" << inShape.size() << ", input shape size in IR="
-                                  << inDims.size();
-}
-
-/**
- * @brief Checks that output Data is not empty and pointers are not null, number of outputs correspond number of output shapes, dimensions in Data are not empty
- */
-static void checkOutputs(const CNNLayer* layer, const std::vector<SizeVector>& outShapes) {}
-
 static void getInOutShapes(const CNNLayer* layer, InOutDims& inOutShapes) {
     inOutShapes.inDims.clear();
     inOutShapes.outDims.clear();
@@ -155,6 +119,8 @@ public:
     void checkCorrespondence(const CNNLayer* layer,
                              const std::map<std::string, Blob::Ptr>& blobs,
                              const std::vector<SizeVector>& inShapes) const override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(DeconvolutionValidator) : public ConvolutionValidator {
@@ -168,6 +134,8 @@ public:
     void checkCorrespondence(const CNNLayer* layer,
                              const std::map<std::string, Blob::Ptr>& blobs,
                              const std::vector<SizeVector>& inShapes) const override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 
@@ -177,6 +145,8 @@ public:
 
     void checkParams(const CNNLayer* layer) override;
 
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+
     explicit PoolingValidator(const std::string& _type);
 };
 
@@ -191,6 +161,8 @@ public:
     void checkCorrespondence(const CNNLayer* layer,
                              const std::map<std::string, Blob::Ptr>& blobs,
                              const std::vector<SizeVector>& inShapes) const override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(CropValidator) : public LayerValidator {
@@ -211,6 +183,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(BatchNormalizationValidator) : public LayerValidator {
@@ -220,6 +194,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(PowerValidator) : public LayerValidator {
@@ -229,6 +205,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(PReLUValidator) : public LayerValidator {
@@ -238,6 +216,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(ScaleShiftValidator) : public LayerValidator {
@@ -247,6 +227,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(ReshapeValidator) : public LayerValidator {
@@ -265,6 +247,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(ClampValidator) : public LayerValidator {
@@ -273,7 +257,7 @@ public:
 
     void parseParams(CNNLayer* layer) override;
 
-    void checkParams(const CNNLayer* layer) override;
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(ReLUValidator) : public LayerValidator {
@@ -283,6 +267,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(MVNValidator) : public LayerValidator {
@@ -292,6 +278,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(GRNValidator) : public LayerValidator {
@@ -301,6 +289,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(SoftMaxValidator) : public LayerValidator {
@@ -310,6 +300,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(NormValidator) : public LayerValidator {
@@ -319,6 +311,8 @@ public:
     void parseParams(CNNLayer* layer) override;
 
     void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
 class INFERENCE_ENGINE_API_CLASS(SplitValidator) : public LayerValidator {
@@ -376,9 +370,31 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
-class INFERENCE_ENGINE_API_CLASS(RNNValidator) : public LayerValidator {
+class INFERENCE_ENGINE_API_CLASS(StridedSliceValidator) : public LayerValidator {
+public:
+    explicit StridedSliceValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ShuffleChannelsValidator) : public LayerValidator {
+public:
+    explicit ShuffleChannelsValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(DepthToSpaceValidator) : public LayerValidator {
 public:
-    explicit RNNValidator(const std::string& _type);
+    explicit DepthToSpaceValidator(const std::string& _type);
 
     void parseParams(CNNLayer* layer) override;
 
@@ -387,6 +403,412 @@ public:
     void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
 };
 
+class INFERENCE_ENGINE_API_CLASS(SpaceToDepthValidator) : public LayerValidator {
+public:
+    explicit SpaceToDepthValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ReverseSequenceValidator) : public LayerValidator {
+public:
+    explicit ReverseSequenceValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(SqueezeValidator) : public LayerValidator {
+public:
+    explicit SqueezeValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(UnsqueezeValidator) : public LayerValidator {
+public:
+    explicit UnsqueezeValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(RangeValidator) : public LayerValidator {
+public:
+    explicit RangeValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(FillValidator) : public LayerValidator {
+public:
+    explicit FillValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ExpandValidator) : public LayerValidator {
+public:
+    explicit ExpandValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+template<RNNSequenceLayer::CellType CELL>
+class INFERENCE_ENGINE_API_CLASS(RNNBaseValidator) : public LayerValidator {
+public:
+    explicit RNNBaseValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkCorrespondence(const CNNLayer* layer,
+                             const std::map<std::string, Blob::Ptr>& blobs,
+                             const std::vector<SizeVector>& inShapes) const override;
+
+protected:
+    static std::vector<std::string> def_acts;  // Default values for cell gate activations
+    static std::vector<float> def_alpha;  // Default activation alpha parameter
+    static std::vector<float> def_beta;   // Default activation beta parameter
+    static size_t G;   // gate number
+    static size_t NS;  // state number
+};
+
+template<RNNSequenceLayer::CellType CELL>
+class INFERENCE_ENGINE_API_CLASS(RNNCellValidator) : public RNNBaseValidator<CELL> {
+public:
+    explicit RNNCellValidator(const std::string& _type);
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+extern template class INFERENCE_ENGINE_API_CLASS(RNNCellValidator)<RNNSequenceLayer::LSTM>;
+extern template class INFERENCE_ENGINE_API_CLASS(RNNCellValidator)<RNNSequenceLayer::GRU>;
+extern template class INFERENCE_ENGINE_API_CLASS(RNNCellValidator)<RNNSequenceLayer::RNN>;
+
+template<RNNSequenceLayer::CellType CELL>
+class INFERENCE_ENGINE_API_CLASS(RNNSequenceValidator) : public RNNBaseValidator<CELL> {
+public:
+    explicit RNNSequenceValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+extern template class INFERENCE_ENGINE_API_CLASS(RNNSequenceValidator)<RNNSequenceLayer::LSTM>;
+extern template class INFERENCE_ENGINE_API_CLASS(RNNSequenceValidator)<RNNSequenceLayer::GRU>;
+extern template class INFERENCE_ENGINE_API_CLASS(RNNSequenceValidator)<RNNSequenceLayer::RNN>;
+
+class INFERENCE_ENGINE_API_CLASS(ArgMaxValidator) : public LayerValidator {
+public:
+    explicit ArgMaxValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(CTCGreedyDecoderValidator) : public LayerValidator {
+public:
+    explicit CTCGreedyDecoderValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(DetectionOutputValidator) : public LayerValidator {
+public:
+    explicit DetectionOutputValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(InterpValidator) : public LayerValidator {
+public:
+    explicit InterpValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(PermuteValidator) : public LayerValidator {
+public:
+    explicit PermuteValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(PriorBoxValidator) : public LayerValidator {
+public:
+    explicit PriorBoxValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(PriorBoxClusteredValidator) : public LayerValidator {
+public:
+    explicit PriorBoxClusteredValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ProposalValidator) : public LayerValidator {
+public:
+    explicit ProposalValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(PSROIPoolingValidator) : public LayerValidator {
+public:
+    explicit PSROIPoolingValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(RegionYoloValidator) : public LayerValidator {
+public:
+    explicit RegionYoloValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ReorgYoloValidator) : public LayerValidator {
+public:
+    explicit ReorgYoloValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ResampleValidator) : public LayerValidator {
+public:
+    explicit ResampleValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ROIPoolingValidator) : public LayerValidator {
+public:
+    explicit ROIPoolingValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(SimplerNMSValidator) : public LayerValidator {
+public:
+    explicit SimplerNMSValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(SpatialTransformerValidator) : public LayerValidator {
+public:
+    explicit SpatialTransformerValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(UpsamplingValidator) : public LayerValidator {
+public:
+    explicit UpsamplingValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ActivationValidator) : public LayerValidator {
+public:
+    explicit ActivationValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ConstValidator) : public LayerValidator {
+public:
+    explicit ConstValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ELUValidator) : public LayerValidator {
+public:
+    explicit ELUValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(InputValidator) : public LayerValidator {
+public:
+    explicit InputValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(MemoryValidator) : public LayerValidator {
+public:
+    explicit MemoryValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(NormalizeValidator) : public LayerValidator {
+public:
+    explicit NormalizeValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(CopyValidator) : public LayerValidator {
+public:
+    explicit CopyValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(PowerFileValidator) : public LayerValidator {
+public:
+    explicit PowerFileValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(ReLU6Validator) : public LayerValidator {
+public:
+    explicit ReLU6Validator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(SigmoidValidator) : public LayerValidator {
+public:
+    explicit SigmoidValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(TanHValidator) : public LayerValidator {
+public:
+    explicit TanHValidator(const std::string& _type);
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(UnpoolingValidator) : public LayerValidator {
+public:
+    explicit UnpoolingValidator(const std::string& _type);
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(QuantizeValidator) : public LayerValidator {
+public:
+    explicit QuantizeValidator(const std::string& _type);
+
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
+class INFERENCE_ENGINE_API_CLASS(BinaryConvolutionValidator) : public LayerValidator {
+public:
+    void parseParams(CNNLayer* layer) override;
+
+    void checkParams(const CNNLayer* layer) override;
+
+    explicit BinaryConvolutionValidator(const std::string& _type);
+
+    void checkCorrespondence(const CNNLayer* layer,
+                             const std::map<std::string, Blob::Ptr>& blobs,
+                             const std::vector<SizeVector>& inShapes) const override;
+
+    void checkShapes(const CNNLayer* layer, const std::vector<SizeVector>& inShapes) const override;
+};
+
 template<typename Validator>
 class ValidatorRegisterBase {
 public:
@@ -398,34 +820,79 @@ public:
 #define REG_LAYER_VALIDATOR_FOR_TYPE(__validator, __type) \
 static ValidatorRegisterBase<__validator> __reg__##__type(#__type)
 
+REG_LAYER_VALIDATOR_FOR_TYPE(ActivationValidator, Activation);
+REG_LAYER_VALIDATOR_FOR_TYPE(ArgMaxValidator, ArgMax);
+REG_LAYER_VALIDATOR_FOR_TYPE(BatchNormalizationValidator, BatchNormalization);
+REG_LAYER_VALIDATOR_FOR_TYPE(CTCGreedyDecoderValidator, CTCGreedyDecoder);
+REG_LAYER_VALIDATOR_FOR_TYPE(ClampValidator, Clamp);
+REG_LAYER_VALIDATOR_FOR_TYPE(ConcatValidator, Concat);
+REG_LAYER_VALIDATOR_FOR_TYPE(ConstValidator, Const);
 REG_LAYER_VALIDATOR_FOR_TYPE(ConvolutionValidator, Convolution);
+REG_LAYER_VALIDATOR_FOR_TYPE(CopyValidator, Copy);
+REG_LAYER_VALIDATOR_FOR_TYPE(CropValidator, Crop);
 REG_LAYER_VALIDATOR_FOR_TYPE(DeconvolutionValidator, Deconvolution);
-REG_LAYER_VALIDATOR_FOR_TYPE(PoolingValidator, Pooling);
+REG_LAYER_VALIDATOR_FOR_TYPE(DetectionOutputValidator, DetectionOutput);
+REG_LAYER_VALIDATOR_FOR_TYPE(ELUValidator, ELU);
+REG_LAYER_VALIDATOR_FOR_TYPE(EltwiseValidator, Eltwise);
 REG_LAYER_VALIDATOR_FOR_TYPE(FullyConnectedValidator, InnerProduct);
 REG_LAYER_VALIDATOR_FOR_TYPE(FullyConnectedValidator, FullyConnected);
-REG_LAYER_VALIDATOR_FOR_TYPE(CropValidator, Crop);
-REG_LAYER_VALIDATOR_FOR_TYPE(BatchNormalizationValidator, BatchNormalization);
-REG_LAYER_VALIDATOR_FOR_TYPE(PowerValidator, Power);
+REG_LAYER_VALIDATOR_FOR_TYPE(GRNValidator, GRN);
+REG_LAYER_VALIDATOR_FOR_TYPE(InputValidator, Input);
+REG_LAYER_VALIDATOR_FOR_TYPE(InterpValidator, Interp);
+REG_LAYER_VALIDATOR_FOR_TYPE(MVNValidator, MVN);
+REG_LAYER_VALIDATOR_FOR_TYPE(MemoryValidator, Memory);
+REG_LAYER_VALIDATOR_FOR_TYPE(NormValidator, Norm);
+REG_LAYER_VALIDATOR_FOR_TYPE(NormValidator, LRN);
+REG_LAYER_VALIDATOR_FOR_TYPE(NormalizeValidator, Normalize);
 REG_LAYER_VALIDATOR_FOR_TYPE(PReLUValidator, PReLU);
-REG_LAYER_VALIDATOR_FOR_TYPE(ScaleShiftValidator, ScaleShift);
-REG_LAYER_VALIDATOR_FOR_TYPE(TileValidator, Tile);
+REG_LAYER_VALIDATOR_FOR_TYPE(PSROIPoolingValidator, PSROIPooling);
+REG_LAYER_VALIDATOR_FOR_TYPE(PermuteValidator, Permute);
+REG_LAYER_VALIDATOR_FOR_TYPE(PoolingValidator, Pooling);
+REG_LAYER_VALIDATOR_FOR_TYPE(PowerValidator, Power);
+REG_LAYER_VALIDATOR_FOR_TYPE(PowerFileValidator, PowerFile);
+REG_LAYER_VALIDATOR_FOR_TYPE(PriorBoxClusteredValidator, PriorBoxClustered);
+REG_LAYER_VALIDATOR_FOR_TYPE(PriorBoxValidator, PriorBox);
+REG_LAYER_VALIDATOR_FOR_TYPE(ProposalValidator, Proposal);
+REG_LAYER_VALIDATOR_FOR_TYPE(ROIPoolingValidator, ROIPooling);
+REG_LAYER_VALIDATOR_FOR_TYPE(ReLUValidator, ReLU);
+REG_LAYER_VALIDATOR_FOR_TYPE(ReLU6Validator, ReLU6);
+REG_LAYER_VALIDATOR_FOR_TYPE(RegionYoloValidator, RegionYolo);
+REG_LAYER_VALIDATOR_FOR_TYPE(ReorgYoloValidator, ReorgYolo);
+REG_LAYER_VALIDATOR_FOR_TYPE(ResampleValidator, Resample);
 REG_LAYER_VALIDATOR_FOR_TYPE(ReshapeValidator, Reshape);
 REG_LAYER_VALIDATOR_FOR_TYPE(ReshapeValidator, Flatten);
-REG_LAYER_VALIDATOR_FOR_TYPE(EltwiseValidator, Eltwise);
-REG_LAYER_VALIDATOR_FOR_TYPE(ClampValidator, Clamp);
-REG_LAYER_VALIDATOR_FOR_TYPE(ReLUValidator, ReLU);
-REG_LAYER_VALIDATOR_FOR_TYPE(MVNValidator, MVN);
-REG_LAYER_VALIDATOR_FOR_TYPE(GRNValidator, GRN);
+REG_LAYER_VALIDATOR_FOR_TYPE(ScaleShiftValidator, ScaleShift);
+REG_LAYER_VALIDATOR_FOR_TYPE(SigmoidValidator, Sigmoid);
+REG_LAYER_VALIDATOR_FOR_TYPE(SigmoidValidator, Logistic);
+REG_LAYER_VALIDATOR_FOR_TYPE(SimplerNMSValidator, SimplerNMS);
 REG_LAYER_VALIDATOR_FOR_TYPE(SoftMaxValidator, SoftMax);
-REG_LAYER_VALIDATOR_FOR_TYPE(NormValidator, Norm);
-REG_LAYER_VALIDATOR_FOR_TYPE(NormValidator, LRN);
+REG_LAYER_VALIDATOR_FOR_TYPE(SpatialTransformerValidator, SpatialTransformer);
 REG_LAYER_VALIDATOR_FOR_TYPE(SplitValidator, Split);
 REG_LAYER_VALIDATOR_FOR_TYPE(SplitValidator, Slice);
-REG_LAYER_VALIDATOR_FOR_TYPE(ConcatValidator, Concat);
 REG_LAYER_VALIDATOR_FOR_TYPE(GemmValidator, Gemm);
 REG_LAYER_VALIDATOR_FOR_TYPE(PadValidator, Pad);
 REG_LAYER_VALIDATOR_FOR_TYPE(GatherValidator, Gather);
-REG_LAYER_VALIDATOR_FOR_TYPE(RNNValidator, RNN);
-
+REG_LAYER_VALIDATOR_FOR_TYPE(StridedSliceValidator, StridedSlice);
+REG_LAYER_VALIDATOR_FOR_TYPE(ShuffleChannelsValidator, ShuffleChannels);
+REG_LAYER_VALIDATOR_FOR_TYPE(DepthToSpaceValidator, DepthToSpace);
+REG_LAYER_VALIDATOR_FOR_TYPE(SpaceToDepthValidator, SpaceToDepth);
+REG_LAYER_VALIDATOR_FOR_TYPE(ReverseSequenceValidator, ReverseSequence);
+REG_LAYER_VALIDATOR_FOR_TYPE(RNNCellValidator<RNNSequenceLayer::RNN>, RNNCell);
+REG_LAYER_VALIDATOR_FOR_TYPE(RNNCellValidator<RNNSequenceLayer::GRU>, GRUCell);
+REG_LAYER_VALIDATOR_FOR_TYPE(RNNCellValidator<RNNSequenceLayer::LSTM>, LSTMCell);
+REG_LAYER_VALIDATOR_FOR_TYPE(RNNSequenceValidator<RNNSequenceLayer::RNN>, RNNSequence);
+REG_LAYER_VALIDATOR_FOR_TYPE(RNNSequenceValidator<RNNSequenceLayer::GRU>, GRUSequence);
+REG_LAYER_VALIDATOR_FOR_TYPE(RNNSequenceValidator<RNNSequenceLayer::LSTM>, LSTMSequence);
+REG_LAYER_VALIDATOR_FOR_TYPE(SqueezeValidator, Squeeze);
+REG_LAYER_VALIDATOR_FOR_TYPE(UnsqueezeValidator, Unsqueeze);
+REG_LAYER_VALIDATOR_FOR_TYPE(RangeValidator, Range);
+REG_LAYER_VALIDATOR_FOR_TYPE(FillValidator, Fill);
+REG_LAYER_VALIDATOR_FOR_TYPE(ExpandValidator, Expand);
+REG_LAYER_VALIDATOR_FOR_TYPE(TanHValidator, TanH);
+REG_LAYER_VALIDATOR_FOR_TYPE(TileValidator, Tile);
+REG_LAYER_VALIDATOR_FOR_TYPE(UnpoolingValidator, Unpooling);
+REG_LAYER_VALIDATOR_FOR_TYPE(UpsamplingValidator, Upsampling);
+REG_LAYER_VALIDATOR_FOR_TYPE(QuantizeValidator, Quantize);
+REG_LAYER_VALIDATOR_FOR_TYPE(BinaryConvolutionValidator, BinaryConvolution);
 }  // namespace details
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_layers_internal.cpp b/inference-engine/src/inference_engine/ie_layers_internal.cpp
index 55fb62698..c9959662f 100644
--- a/inference-engine/src/inference_engine/ie_layers_internal.cpp
+++ b/inference-engine/src/inference_engine/ie_layers_internal.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -98,7 +98,8 @@ class PaddingsUpdater {
 
 Paddings getPaddingsImpl(const CNNLayer &layer) {
     Paddings actual;
-    details::visitActualLayer(std::tuple <DeconvolutionLayer*, ConvolutionLayer*, PoolingLayer*, CNNLayer*>(), layer, PaddingsUpdater(actual));
+    details::visitActualLayer(std::tuple <DeconvolutionLayer*, ConvolutionLayer*, BinaryConvolutionLayer*, PoolingLayer*,
+            CNNLayer*>(), layer, PaddingsUpdater(actual));
     return actual;
 }
 
diff --git a/inference-engine/src/inference_engine/ie_layers_internal.hpp b/inference-engine/src/inference_engine/ie_layers_internal.hpp
index 296b565b2..562bacbc0 100644
--- a/inference-engine/src/inference_engine/ie_layers_internal.hpp
+++ b/inference-engine/src/inference_engine/ie_layers_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -28,6 +28,7 @@ template <class T>
 inline  typename std::enable_if<is_one_of<T,
                                           DeconvolutionLayer,
                                           ConvolutionLayer,
+                                          BinaryConvolutionLayer,
                                           PoolingLayer>::value, Paddings>::type
 getPaddings(const T & layer) {
     return getPaddingsImpl(layer);
diff --git a/inference-engine/src/inference_engine/ie_layers_prv.h b/inference-engine/src/inference_engine/ie_layers_prv.h
deleted file mode 100644
index 9ec8c3cc8..000000000
--- a/inference-engine/src/inference_engine/ie_layers_prv.h
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-/**
- * @brief a header file for internal Layers structure
- * @file
- */
-#pragma once
-
-#include "ie_layers.h"
-#include <string>
-
-namespace InferenceEngine {
-
-/**
- * LSTM Cell Layer
- *
- * Inputs:
- *    Xt   {N, D}
- *    Ht-1 {N, S}
- *    Ct-1 {N, S}
- *
- * Outputs:
- *    Ht {N, S}
- *    Ct {N, S}
- *
- * Weights:
- *    W {G=4, S, D+S}
- *    B {G=4, S}
- *
- * G=4 and gate order is [f,i,c,o]
- *
- * Semantic:
- *
- *   *  - matrix mult
- *  (.) - eltwise mult
- *  [,] - concatenation
- *
- *  f = sigmoid
- *  h = tanh
- *
- * - ft = f(Wf*[Ht-1, Xt] + Bf)
- * - it = f(Wi*[Ht-1, Xt] + Bi)
- * - ct = h(Wc*[Ht-1, Xt] + Bc)
- * - ot = f(Wo*[Ht-1, Xt] + Bo)
- * - Ct = ft (.) Ct-1 + it (.) ct
- * - Ht = ot (.) h(Ct)
- */
-class LSTMCell : public WeightableLayer {
-public:
-    using WeightableLayer::WeightableLayer;
-};
-
-/**
- * @brief This class represents RNN-Sequence layer
- *
- * Date shapes and meaning (cellType = "LSTM", axis = 1):
- *   input[0] Xt - {N,T,DC} input data sequence
- *   input[1] H0 - {N,SC}   initial hidden state
- *   input[2] C0 - {N,SC}   initial cell state
- *
- *   output[0] Ht - {N,T,SC} out data sequence
- *   output[1] HT - {N,SC}   last hidden state
- *   output[2] CT - {N,SC}   last cell state
- *
- *   Recurrent formula and weight format are same as from
- *   corresponding Cell primitive.
- */
-class RNNLayer : public WeightableLayer {
-public:
-    /**
-     * @brief Type of RNN cell used sequence layer
-     * Possible values "RNN", "LSTM", "GRU".
-     */
-    std::string cellType = "LSTM";
-
-    /**
-     * @brief An axis by which iteration is performed
-     * axis=0 means first input/output data blob dimension is sequence
-     * axis=1 means first input/output data blob dimension is batch
-     */
-    unsigned int axis = 1;
-
-    /**
-     * @brief Direction of iteration through sequence dimension
-     */
-    enum Direction {
-        RNN_FWD,  /**< Forward mode. Iterate starts from index 0 with step 1.         */
-        RNN_BWD,  /**< Backward mode. Iterate starts from last index with step -1.    */
-        RNN_BDR   /**< Bidirectional mode. First is forward pass, second is backward. */
-    };
-
-    Direction direction = RNN_FWD;
-
-    using WeightableLayer::WeightableLayer;
-};
-
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_layouts.cpp b/inference-engine/src/inference_engine/ie_layouts.cpp
index 63cbc16f8..a0ecfb01b 100644
--- a/inference-engine/src/inference_engine/ie_layouts.cpp
+++ b/inference-engine/src/inference_engine/ie_layouts.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -54,6 +54,9 @@ TensorDesc::TensorDesc(const Precision &precision, SizeVector dims, const Blocki
     layout = Layout::BLOCKED;
     if (dims.size() == blockingDesc.getBlockDims().size()) {
         switch (dims.size()) {
+            case 0:
+                layout = Layout::SCALAR;
+                break;
             case 1:
                 layout = Layout::C;
                 break;
@@ -97,6 +100,7 @@ TensorDesc::TensorDesc(const Precision &precision, SizeVector dims, const Blocki
 
 TensorDesc::TensorDesc() {
     this->layout = Layout::ANY;
+    precision = Precision::UNSPECIFIED;
 }
 
 void TensorDesc::setDims(const SizeVector &dims) {
@@ -129,6 +133,8 @@ bool TensorDesc::operator!=(const TensorDesc &rhs) const {
 
 Layout TensorDesc::getLayoutByDims(SizeVector dims) {
     switch (dims.size()) {
+        case 0:
+            return Layout::SCALAR;
         case 1:
             return Layout::C;
         case 2:
@@ -246,6 +252,7 @@ BlockingDesc::BlockingDesc(const SizeVector& dims, Layout layout): offsetPadding
     SizeVector l_order;
     SizeVector l_dims;
     switch (layout) {
+        case Layout::SCALAR:
         case Layout::ANY:
             return;
         case Layout::C:
diff --git a/inference-engine/src/inference_engine/ie_memcpy.cpp b/inference-engine/src/inference_engine/ie_memcpy.cpp
index 330c0f268..d5b16277d 100644
--- a/inference-engine/src/inference_engine/ie_memcpy.cpp
+++ b/inference-engine/src/inference_engine/ie_memcpy.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_memcpy.h b/inference-engine/src/inference_engine/ie_memcpy.h
index ab174de34..a91adfa09 100644
--- a/inference-engine/src/inference_engine/ie_memcpy.h
+++ b/inference-engine/src/inference_engine/ie_memcpy.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_network.cpp b/inference-engine/src/inference_engine/ie_network.cpp
index 3c92b99c9..c2db48450 100644
--- a/inference-engine/src/inference_engine/ie_network.cpp
+++ b/inference-engine/src/inference_engine/ie_network.cpp
@@ -1,161 +1,126 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ie_network.hpp"
-#include <details/ie_inetwork_iterator.hpp>
-#include <details/caseless.hpp>
-#include <iterator>
-#include <string>
-#include <vector>
+#include <ie_network.hpp>
 #include <memory>
+#include <string>
+#include <map>
 
 using namespace InferenceEngine;
 
-details::Network &details::Network::operator=(const details::Network &network) {
-    if (this == &network)
-        return *this;
-    name = network.getName();
-    for (const auto& layer : network) {
-        layers.push_back(Layer::Ptr(new details::Layer(*layer)));
-    }
-    for (const auto& connection : network.connections) {
-        connections.push_back(connection);
+PortData::PortData() {
+    createData({});
+}
+
+PortData::PortData(const SizeVector& shape, const Precision& precision) {
+    createData({precision, shape, TensorDesc::getLayoutByDims(shape)});
+}
+
+const Blob::Ptr& PortData::getData() const {
+    return data;
+}
+
+void PortData::setData(const Blob::Ptr& data) {
+    this->data = data;
+}
+
+const std::map<std::string, Parameter>& PortData::getParameters() const noexcept {
+    return parameters;
+}
+
+void PortData::createData(const TensorDesc& desc) {
+    switch (desc.getPrecision()) {
+        case Precision::UNSPECIFIED:
+            data = std::make_shared<InferenceEngine::TBlob<uint8_t>>(desc);
+            break;
+        case Precision::FP32:
+            data = make_shared_blob<PrecisionTrait<Precision::FP32>::value_type>(desc);
+            break;
+        case Precision::FP16:
+            data = make_shared_blob<PrecisionTrait<Precision::FP16>::value_type>(desc);
+            break;
+        case Precision::Q78:
+            data = make_shared_blob<PrecisionTrait<Precision::Q78>::value_type>(desc);
+            break;
+        case Precision::I16:
+            data = make_shared_blob<PrecisionTrait<Precision::I16>::value_type>(desc);
+            break;
+        case Precision::U8:
+            data = make_shared_blob<PrecisionTrait<Precision::U8>::value_type>(desc);
+            break;
+        case Precision::I8:
+            data = make_shared_blob<PrecisionTrait<Precision::I8>::value_type>(desc);
+            break;
+        case Precision::U16:
+            data = make_shared_blob<PrecisionTrait<Precision::U16>::value_type>(desc);
+            break;
+        case Precision::I32:
+            data = make_shared_blob<PrecisionTrait<Precision::I32>::value_type>(desc);
+            break;
+        default:
+            THROW_IE_EXCEPTION << "Unsupported precisions!";
     }
-    return *this;
-}
-
-details::Network &details::Network::operator=(const INetwork &network) {
-    if (this == &network)
-        return *this;
-    name = network.getName();
-    for (const auto& layer : network) {
-        layers.push_back(std::make_shared<details::Layer>(*layer));
-        for (const auto& newConnection : network.getLayerConnections(layer->getId())) {
-            bool connectionFound = false;
-            for (const auto& connection : connections) {
-                if (connection == newConnection) {
-                    connectionFound = true;
-                    break;
-                }
-            }
-            if (!connectionFound)
-                connections.push_back(newConnection);
-        }
-    }
-    return *this;
-}
-
-details::Network::Network(const Context& context, const std::string& name): ctx(context), name(name) {}
-
-details::Network::Network(const Context& context, const details::Network &network): ctx(context) {
-    *this = network;
-}
-
-details::Network::Network(const Context& context, const INetwork &network): ctx(context) {
-    *this = network;
 }
 
-size_t details::Network::size() const noexcept {
-    return static_cast<size_t>(std::distance(std::begin(*this), std::end(*this)));
+void PortData::setShape(const SizeVector& shape) {
+    TensorDesc desc = data->getTensorDesc();
+    if (desc.getDims() == shape)
+        return;
+    if (data->cbuffer() != nullptr) {
+        THROW_IE_EXCEPTION << "Cannot change shape for allocated data!";
+    }
+    createData({desc.getPrecision(), shape, TensorDesc::getLayoutByDims(shape)});
 }
 
-const std::string& details::Network::getName() const noexcept {
-    return name;
+Port::Port() {
+    data = std::make_shared<PortData>();
 }
 
-std::string& details::Network::getName() noexcept {
-    return name;
+Port::Port(const SizeVector& shapes, const Precision& precision) {
+    data = std::make_shared<PortData>(shapes, precision);
 }
-
-const Context& details::Network::getContext() const noexcept {
-    return ctx;
+Port::Port(const Port& port) {
+    parameters = port.parameters;
+    data = port.data;
 }
 
-const ILayer::Ptr details::Network::getLayer(size_t id) const noexcept {
-    for (const auto& layer : layers) {
-        if (layer->getId() == id)
-            return std::static_pointer_cast<ILayer>(layer);
-    }
-    return nullptr;
-}
-
-const std::vector<ILayer::Ptr> details::Network::getInputs() const noexcept {
-    std::vector<ILayer::Ptr> inputs;
-    for (const auto& layer : layers) {
-        bool isInputLayer = true;
-        for (const auto& connection : getLayerConnections(layer->getId())) {
-            if (connection.to().layerId() == layer->getId()) {
-                isInputLayer = false;
-                break;
-            }
-        }
-        if (isInputLayer) {
-            inputs.push_back(layer);
-        }
-    }
-    return inputs;
-}
-
-const std::vector<ILayer::Ptr> details::Network::getOutputs() const noexcept {
-    std::vector<ILayer::Ptr> outputs;
-    for (const auto& layer : layers) {
-        bool isOutputLayer = true;
-        for (const auto& connection : getLayerConnections(layer->getId())) {
-            if (connection.from().layerId() == layer->getId()) {
-                isOutputLayer = false;
-                break;
-            }
-        }
-        if (isOutputLayer) {
-            outputs.push_back(layer);
-        }
-    }
-    return outputs;
-}
 
-const std::vector<Connection>& details::Network::getConnections() const noexcept {
-    return connections;
+bool Port::operator==(const Port& rhs) const {
+    return parameters == rhs.parameters &&
+           data == rhs.data;
 }
 
-details::Layer::Ptr details::Network::getLayer(size_t id) noexcept {
-    for (const auto& layer : layers) {
-        if (layer->getId() == id)
-            return layer;
-    }
-    return nullptr;
+bool Port::operator!=(const Port& rhs) const {
+    return !(rhs == *this);
 }
 
-const std::vector<Connection> details::Network::getLayerConnections(idx_t layerId) const noexcept {
-    std::vector<Connection> layerConnections;
-    for (auto& connection : connections) {
-        if (connection.from().layerId() == layerId || connection.to().layerId() == layerId)
-            layerConnections.push_back(connection);
-    }
-    return layerConnections;
+const SizeVector& Port::shape() const noexcept {
+    return data->getData()->getTensorDesc().getDims();
 }
 
-void details::Network::addLayer(const ILayer::Ptr &layer) noexcept {
-    if (layer)
-        layers.push_back(std::make_shared<Layer>(*layer));
+void Port::setShape(const SizeVector& shape) {
+    data->setShape(shape);
 }
 
-void details::Network::addConnection(const Connection &connection) noexcept {
-    connections.push_back(connection);
+const std::map<std::string, Parameter>& Port::getParameters() const noexcept {
+    return parameters;
 }
 
-INetwork::const_iterator details::Network::begin() const noexcept {
-    return INetwork::const_iterator(this);
+void Port::setParameters(const std::map<std::string, Parameter>& params) noexcept {
+    parameters = params;
 }
 
-INetwork::const_iterator details::Network::end() const noexcept {
-    return INetwork::const_iterator(this, true);
+void Port::setParameter(const std::string& name, const Parameter& param) {
+    parameters[name] = param;
 }
 
-details::Network::iterator details::Network::begin() noexcept {
-    return Network::iterator(this);
+const PortData::Ptr& Port::getData() const noexcept {
+    return data;
 }
 
-details::Network::iterator details::Network::end() noexcept {
-    return Network::iterator(this, true);
-}
+void Port::setData(const PortData::Ptr& data) {
+    if (!data)
+        return;
+    this->data = data;
+}
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/ie_network.hpp b/inference-engine/src/inference_engine/ie_network.hpp
deleted file mode 100644
index 16a80f7d6..000000000
--- a/inference-engine/src/inference_engine/ie_network.hpp
+++ /dev/null
@@ -1,160 +0,0 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <ie_inetwork.hpp>
-#include <ie_blob.h>
-#include <memory>
-#include <string>
-#include <vector>
-#include <map>
-
-namespace InferenceEngine {
-namespace details {
-
-class Network;
-
-class Parameters: public IParameters {
-public:
-    using Ptr = std::shared_ptr<Parameters>;
-
-    const std::map<std::string, Parameter>& getParameters() const noexcept override {
-        return params;
-    }
-    const std::map<std::string, Blob::CPtr>& getConstantData() const noexcept override {
-        return constData;
-    }
-
-    std::map<std::string, Parameter>& getParameters() {
-        return params;
-    }
-    std::map<std::string, Blob::CPtr>& getConstantData() noexcept {
-        return constData;
-    }
-private:
-    std::map<std::string, Parameter> params;
-    std::map<std::string, InferenceEngine::Blob::CPtr> constData;
-};
-
-class Layer: public ILayer {
-public:
-    using Ptr = std::shared_ptr<Layer>;
-
-    explicit Layer(size_t id): id(id), params(new Parameters()) {}
-    Layer(const Layer& layer) {
-        this->outputs = layer.getOutputPorts();
-        this->inputs = layer.getInputPorts();
-        this->params = layer.getParameters();
-        this->subGraph = layer.getGraph();
-        this->name = layer.getName();
-        this->type = layer.getType();
-        this->id = layer.getId();
-    }
-    explicit Layer(const ILayer& layer) {
-        this->outputs = layer.getOutputPorts();
-        this->inputs = layer.getInputPorts();
-        this->params = layer.getParameters();
-        this->subGraph = layer.getGraph();
-        this->name = layer.getName();
-        this->type = layer.getType();
-        this->id = layer.getId();
-    }
-
-    size_t getId() const noexcept override {
-        return id;
-    }
-    const std::string& getName() const noexcept override {
-        return name;
-    }
-    const std::string& getType() const noexcept override {
-        return type;
-    }
-    const INetwork::Ptr& getGraph() const noexcept override {
-        return subGraph;
-    }
-    const IParameters::Ptr& getParameters() const noexcept override {
-        return params;
-    }
-    const std::vector<Port>& getInputPorts() const noexcept override {
-        return inputs;
-    }
-    const std::vector<Port>& getOutputPorts() const noexcept override {
-        return outputs;
-    }
-
-    std::string& getName() noexcept {
-        return name;
-    }
-
-    std::string& getType() noexcept {
-        return type;
-    }
-    std::shared_ptr<Network> getGraph() noexcept {
-        return std::dynamic_pointer_cast<Network>(subGraph);
-    }
-    void setGraph(const INetwork::Ptr& graph) noexcept {
-        subGraph = graph;
-    }
-    Parameters::Ptr getParameters() noexcept {
-        return std::dynamic_pointer_cast<Parameters>(params);
-    }
-    std::vector<Port>& getInputPorts() noexcept {
-        return inputs;
-    }
-    std::vector<Port>& getOutputPorts() noexcept {
-        return outputs;
-    }
-
-private:
-    idx_t id;
-    std::string name;
-    std::string type;
-    INetwork::Ptr subGraph;
-    IParameters::Ptr params;
-    std::vector<Port> inputs;
-    std::vector<Port> outputs;
-};
-
-class Network: public INetwork {
-public:
-    using Ptr = std::shared_ptr<Network>;
-    using iterator = details::INetworkIterator<Network, Layer>;
-
-    explicit Network(const Context& context, const std::string& name = "");
-    Network(const Context& context, const INetwork& network);
-    Network(const Context& context, const Network& network);
-
-    Network& operator=(const Network& network);
-    Network& operator=(const INetwork& network);
-
-    const_iterator begin() const noexcept override;
-    const_iterator end() const noexcept override;
-    iterator begin() noexcept;
-    iterator end() noexcept;
-
-    const ILayer::Ptr getLayer(size_t id) const noexcept override;
-    const std::vector<ILayer::Ptr> getInputs() const noexcept override;
-    const std::vector<ILayer::Ptr> getOutputs() const noexcept override;
-    const std::vector<Connection> getLayerConnections(idx_t layerId) const noexcept override;
-    size_t size() const noexcept override;
-    const std::string& getName() const noexcept override;
-    const Context& getContext() const noexcept override;
-
-    const std::vector<Connection>& getConnections() const noexcept;
-    Layer::Ptr getLayer(size_t id) noexcept;
-    std::string& getName() noexcept;
-
-    void addLayer(const ILayer::Ptr& layer) noexcept;
-    void addConnection(const Connection& connection) noexcept;
-
-private:
-    const Context ctx;
-    std::string name;
-    std::vector<Layer::Ptr> layers;
-    std::vector<Connection> connections;
-};
-
-}  // namespace details
-}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_preprocess_data.cpp b/inference-engine/src/inference_engine/ie_preprocess_data.cpp
index 11c3f9eec..ca64d4b5b 100644
--- a/inference-engine/src/inference_engine/ie_preprocess_data.cpp
+++ b/inference-engine/src/inference_engine/ie_preprocess_data.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -9,6 +9,7 @@
 #include "ie_preprocess_data_sse42.hpp"
 #endif
 #include "ie_preprocess_gapi.hpp"
+#include "debug.h"
 
 #include <algorithm>
 
@@ -751,7 +752,8 @@ Blob::Ptr PreProcessData::getRoiBlob() const {
     return _roiBlob;
 }
 
-void PreProcessData::execute(Blob::Ptr &outBlob, const ResizeAlgorithm &algorithm, bool serial) {
+void PreProcessData::execute(Blob::Ptr &outBlob, const ResizeAlgorithm &algorithm, bool serial,
+        int batchSize) {
     IE_PROFILING_AUTO_SCOPE_TASK(perf_preprocessing)
 
     if (algorithm == NO_RESIZE) {
@@ -762,13 +764,28 @@ void PreProcessData::execute(Blob::Ptr &outBlob, const ResizeAlgorithm &algorith
         THROW_IE_EXCEPTION << "Input pre-processing is called without ROI blob set";
     }
 
+    if (batchSize == 0) {
+        THROW_IE_EXCEPTION << "Input pre-processing is called with invalid batch size "
+                           << batchSize;
+    }
+
+    if (batchSize < 0) {
+        // if batch_size is unspecified, process the whole input blob
+        batchSize = static_cast<int>(_roiBlob->getTensorDesc().getDims()[0]);
+    }
+
     if (!_preproc) {
         _preproc.reset(new PreprocEngine);
     }
-    if (_preproc->preprocessWithGAPI(_roiBlob, outBlob, algorithm, serial)) {
+    if (_preproc->preprocessWithGAPI(_roiBlob, outBlob, algorithm, serial, batchSize)) {
         return;
     }
 
+    if (batchSize > 1) {
+        THROW_IE_EXCEPTION <<   "Batch pre-processing is unsupported in this mode. "
+                                "Use default pre-processing instead to process batches.";
+    }
+
     Blob::Ptr res_in, res_out;
     if (_roiBlob->getTensorDesc().getLayout() == NHWC) {
         if (!_tmp1 || _tmp1->size() != _roiBlob->size()) {
@@ -814,4 +831,21 @@ void PreProcessData::execute(Blob::Ptr &outBlob, const ResizeAlgorithm &algorith
     }
 }
 
+void PreProcessData::isApplicable(const Blob::Ptr &src, const Blob::Ptr &dst) {
+    auto &src_dims = src->getTensorDesc().getDims();
+    auto &dst_dims = dst->getTensorDesc().getDims();
+
+    if (src_dims.size() != dst_dims.size())
+        THROW_IE_EXCEPTION << "Preprocessing is not applicable. Source and destination blobs have different "
+                              "number of dimensions";
+
+    if (src_dims.size() != 4)
+        THROW_IE_EXCEPTION << "Preprocessing is not applicable. Only 4D tensors are supported.";
+
+    if (src_dims[0] != dst_dims[0] || src_dims[1] != dst_dims[1])
+        THROW_IE_EXCEPTION << "Preprocessing is not applicable. Wrong shape. Network expected 4D input tensor with "
+                              "shape [" << dst_dims[0] << "," << dst_dims[1] <<",H,W] but provided tensor has "
+                              "shape "  << details::dumpVec(src_dims) << ".";
+}
+
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_preprocess_data.hpp b/inference-engine/src/inference_engine/ie_preprocess_data.hpp
index f5a7730ab..479e542a5 100644
--- a/inference-engine/src/inference_engine/ie_preprocess_data.hpp
+++ b/inference-engine/src/inference_engine/ie_preprocess_data.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -55,8 +55,13 @@ public:
      * @brief Executes input pre-processing with a given resize algorithm.
      * @param outBlob pre-processed output blob to be used for inference.
      * @param algorithm resize algorithm.
+     * @param serial disable OpenMP threading if the value set to true.
+     * @param batchSize batch size for pre-processing.
      */
-    void execute(Blob::Ptr &outBlob, const ResizeAlgorithm &algorithm, bool serial);
+    void execute(Blob::Ptr &outBlob, const ResizeAlgorithm &algorithm, bool serial,
+                 int batchSize = -1);
+
+    static void isApplicable(const Blob::Ptr &src, const Blob::Ptr &dst);
 };
 
 //----------------------------------------------------------------------
diff --git a/inference-engine/src/inference_engine/ie_preprocess_gapi.cpp b/inference-engine/src/inference_engine/ie_preprocess_gapi.cpp
index 31f5983fd..b6624b54d 100644
--- a/inference-engine/src/inference_engine/ie_preprocess_gapi.cpp
+++ b/inference-engine/src/inference_engine/ie_preprocess_gapi.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -72,27 +72,43 @@ inline int get_cv_depth(const InferenceEngine::TensorDesc &ie_desc) {
     }
 }
 
-std::vector<cv::gapi::own::Mat> bind_to_blob(Blob::Ptr &blob) {
+std::vector<std::vector<cv::gapi::own::Mat>> bind_to_blob(Blob::Ptr &blob, int batch_size) {
+    if (batch_size <= 0) {
+        return {};
+    }
+
     const auto& ie_desc     = blob->getTensorDesc();
     const auto& ie_desc_blk = ie_desc.getBlockingDesc();
     const auto     desc     = G::decompose(blob);
     const auto cv_depth     = get_cv_depth(ie_desc);
     const auto stride       = desc.s.H*blob->element_size();
     const auto planeSize    = cv::gapi::own::Size(desc.d.W, desc.d.H);
-
-
-    uint8_t* ptr = static_cast<uint8_t*>(blob->buffer());
-    ptr += blob->element_size()*ie_desc_blk.getOffsetPadding();
-
-    std::vector<cv::gapi::own::Mat> result;
-    if (blob->layout() == NHWC) {
-        result.emplace_back(planeSize.height, planeSize.width, CV_MAKETYPE(cv_depth, desc.d.C), ptr, stride);
-    } else {  // NCHW
-        const auto planeType = CV_MAKETYPE(cv_depth, 1);
-        for (size_t ch = 0; ch < desc.d.C; ch++) {
-            cv::gapi::own::Mat plane(planeSize.height, planeSize.width, planeType, ptr + ch*desc.s.C*blob->element_size(), stride);
-            result.emplace_back(plane);
+    // Note: operating with strides (desc.s) rather than dimensions (desc.d) which is vital for ROI
+    //       blobs (data buffer is shared but dimensions are different due to ROI != original image)
+    const auto batch_offset = desc.s.N * blob->element_size();
+
+    std::vector<std::vector<cv::gapi::own::Mat>> result(batch_size);
+
+    uint8_t* blob_ptr = static_cast<uint8_t*>(blob->buffer());
+    blob_ptr += blob->element_size()*ie_desc_blk.getOffsetPadding();
+
+    for (int i = 0; i < batch_size; ++i) {
+        uint8_t* curr_data_ptr = blob_ptr + i * batch_offset;
+
+        std::vector<cv::gapi::own::Mat> planes;
+        if (blob->layout() == NHWC) {
+            planes.emplace_back(planeSize.height, planeSize.width, CV_MAKETYPE(cv_depth, desc.d.C),
+                curr_data_ptr, stride);
+        } else {  // NCHW
+            const auto planeType = CV_MAKETYPE(cv_depth, 1);
+            for (size_t ch = 0; ch < desc.d.C; ch++) {
+                cv::gapi::own::Mat plane(planeSize.height, planeSize.width, planeType,
+                    curr_data_ptr + ch*desc.s.C*blob->element_size(), stride);
+                planes.emplace_back(plane);
+            }
         }
+
+        result[i] = std::move(planes);
     }
     return result;
 }
@@ -203,13 +219,13 @@ InferenceEngine::PreprocEngine::Update InferenceEngine::PreprocEngine::needUpdat
 
     BlobDesc last_in;
     BlobDesc last_out;
-    ResizeAlgorithm last_algo;
+    ResizeAlgorithm last_algo = ResizeAlgorithm::NO_RESIZE;
     std::tie(last_in, last_out, last_algo) = *_lastCall;
 
     CallDesc newCall = newCallOrig;
     BlobDesc new_in;
     BlobDesc new_out;
-    ResizeAlgorithm new_algo;
+    ResizeAlgorithm new_algo = ResizeAlgorithm::NO_RESIZE;
     std::tie(new_in, new_out, new_algo) = newCall;
 
     // Declare two empty vectors per each call
@@ -259,7 +275,8 @@ InferenceEngine::PreprocEngine::Update InferenceEngine::PreprocEngine::needUpdat
     return Update::NOTHING;
 }
 
-bool InferenceEngine::PreprocEngine::preprocessWithGAPI(Blob::Ptr &inBlob, Blob::Ptr &outBlob, const ResizeAlgorithm &algorithm, bool omp_serial) {
+bool InferenceEngine::PreprocEngine::preprocessWithGAPI(Blob::Ptr &inBlob, Blob::Ptr &outBlob,
+        const ResizeAlgorithm &algorithm, bool omp_serial, int batch_size) {
     static const bool NO_GAPI = [](const char *str) -> bool {
         std::string var(str ? str : "");
         return var == "N" || var == "NO" || var == "OFF" || var == "0";
@@ -280,6 +297,20 @@ bool InferenceEngine::PreprocEngine::preprocessWithGAPI(Blob::Ptr &inBlob, Blob:
         in_desc = G::decompose(inBlob),
         out_desc = G::decompose(outBlob);
 
+    // according to the IE's current design, input blob batch size _must_ match networks's expected
+    // batch size, even if the actual processing batch size (set on infer request) is different.
+    if (in_desc.d.N != out_desc.d.N) {
+        THROW_IE_EXCEPTION  << "Input blob batch size is invalid: (input blob) "
+                            << in_desc.d.N << " != " << out_desc.d.N << " (expected by network)";
+    }
+
+    // sanity check batch_size
+    if (batch_size > in_desc.d.N || batch_size > out_desc.d.N) {
+        THROW_IE_EXCEPTION  << "Provided batch size is invaid: (provided)"
+                            << batch_size << " > " << out_desc.d.N << " (expected by network)";
+    }
+
+    // CallDesc doesn't change within batch
     CallDesc thisCall = CallDesc{ BlobDesc{ in_desc_ie.getPrecision(),
                                             inBlob->layout(),
                                             in_desc_ie.getDims() },
@@ -289,9 +320,6 @@ bool InferenceEngine::PreprocEngine::preprocessWithGAPI(Blob::Ptr &inBlob, Blob:
                                   algorithm };
     const Update update = needUpdate(thisCall);
 
-    std::vector<cv::gapi::own::Mat> input_plane_mats  = bind_to_blob(inBlob);
-    std::vector<cv::gapi::own::Mat> output_plane_mats = bind_to_blob(outBlob);
-
     Opt<cv::GComputation> _lastComputation;
     if (Update::REBUILD == update || Update::RESHAPE == update) {
         _lastCall = cv::util::make_optional(std::move(thisCall));
@@ -307,6 +335,8 @@ bool InferenceEngine::PreprocEngine::preprocessWithGAPI(Blob::Ptr &inBlob, Blob:
                                                                   get_cv_depth(in_desc_ie)));
         }
     }
+    auto batched_input_plane_mats  = bind_to_blob(inBlob, batch_size);
+    auto batched_output_plane_mats = bind_to_blob(outBlob, batch_size);
 
     const int thread_num =
             #if IE_THREAD == IE_THREAD_OMP
@@ -323,7 +353,7 @@ bool InferenceEngine::PreprocEngine::preprocessWithGAPI(Blob::Ptr &inBlob, Blob:
     // that an actual number of threads will be as assumed, so it
     // possible that all slices are processed by the same thread.
     //
-    parallel_nt_static(thread_num , [&, this](int slice_n, const int total_slices){
+    parallel_nt_static(thread_num , [&, this](int slice_n, const int total_slices) {
         IE_PROFILING_AUTO_SCOPE_TASK(_perf_exec_tile);
 
         auto& compiled = _lastComp[slice_n];
@@ -331,21 +361,28 @@ bool InferenceEngine::PreprocEngine::preprocessWithGAPI(Blob::Ptr &inBlob, Blob:
             //  need to compile (or reshape) own object for a particular ROI
             IE_PROFILING_AUTO_SCOPE_TASK(_perf_graph_compiling);
 
-            auto meta_of = [](std::vector<cv::gapi::own::Mat> const& ins){
-                std::vector<cv::GMetaArg> rslt{ins.size()}; rslt.clear();
-                for (auto& m : ins) {
-                    rslt.emplace_back(descr_of(m));
-                }
-                return rslt;
-            };
-
             using cv::gapi::own::Rect;
 
-            const auto lines_per_thread = output_plane_mats[0].rows / total_slices;
+            // current design implies all images in batch are equal
+            const auto& input_plane_mats = batched_input_plane_mats[0];
+            const auto& output_plane_mats = batched_output_plane_mats[0];
+
+            auto lines_per_thread = output_plane_mats[0].rows / total_slices;
             const auto remainder = output_plane_mats[0].rows - total_slices * lines_per_thread;
-            const auto roi_height = lines_per_thread + ((slice_n == total_slices -1) ?  remainder : 0);
 
-            auto roi = Rect{0, slice_n * lines_per_thread, output_plane_mats[0].cols, roi_height};
+            // remainder shows how many threads must calculate 1 additional row. now these additions
+            // must also be addressed in rect's Y coordinate:
+            int roi_y = 0;
+            if (slice_n < remainder) {
+                lines_per_thread++;  // 1 additional row
+                roi_y = slice_n * lines_per_thread;  // all previous rois have lines+1 rows
+            } else {
+                // remainder rois have lines+1 rows, the rest prior to slice_n have lines rows
+                roi_y =
+                    remainder * (lines_per_thread + 1) + (slice_n - remainder) * lines_per_thread;
+            }
+
+            auto roi = Rect{0, roi_y, output_plane_mats[0].cols, lines_per_thread};
             std::vector<Rect> rois(output_plane_mats.size(), roi);
 
             // TODO: make a ROI a runtime argument to avoid
@@ -353,20 +390,25 @@ bool InferenceEngine::PreprocEngine::preprocessWithGAPI(Blob::Ptr &inBlob, Blob:
             auto args = cv::compile_args(gapi::preprocKernels(), cv::GFluidOutputRois{std::move(rois)});
             if (Update::REBUILD == update) {
                 auto& computation = _lastComputation.value();
-                compiled = computation.compile(meta_of(input_plane_mats), std::move(args));
+                compiled = computation.compile(descr_of(input_plane_mats), std::move(args));
             } else {
                 IE_ASSERT(compiled);
-                compiled.reshape(meta_of(input_plane_mats), std::move(args));
+                compiled.reshape(descr_of(input_plane_mats), std::move(args));
             }
         }
 
-        cv::GRunArgs call_ins;
-        cv::GRunArgsP call_outs;
-        for (const auto & m : input_plane_mats) { call_ins.emplace_back(m);}
-        for (auto & m : output_plane_mats) { call_outs.emplace_back(&m);}
+        for (int i = 0; i < batch_size; ++i) {
+            const std::vector<cv::gapi::own::Mat>& input_plane_mats = batched_input_plane_mats[i];
+            std::vector<cv::gapi::own::Mat>& output_plane_mats = batched_output_plane_mats[i];
 
-        IE_PROFILING_AUTO_SCOPE_TASK(_perf_exec_graph);
-        compiled(std::move(call_ins), std::move(call_outs));
+            cv::GRunArgs call_ins;
+            cv::GRunArgsP call_outs;
+            for (const auto & m : input_plane_mats) { call_ins.emplace_back(m);}
+            for (auto & m : output_plane_mats) { call_outs.emplace_back(&m);}
+
+            IE_PROFILING_AUTO_SCOPE_TASK(_perf_exec_graph);
+            compiled(std::move(call_ins), std::move(call_outs));
+        }
     });
 
     return true;
diff --git a/inference-engine/src/inference_engine/ie_preprocess_gapi.hpp b/inference-engine/src/inference_engine/ie_preprocess_gapi.hpp
index 5d9168acd..6ac9db2d6 100644
--- a/inference-engine/src/inference_engine/ie_preprocess_gapi.hpp
+++ b/inference-engine/src/inference_engine/ie_preprocess_gapi.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -36,7 +36,8 @@ class PreprocEngine {
 
 public:
     PreprocEngine();
-    bool preprocessWithGAPI(Blob::Ptr &inBlob, Blob::Ptr &outBlob, const ResizeAlgorithm &algorithm, bool omp_serial);
+    bool preprocessWithGAPI(Blob::Ptr &inBlob, Blob::Ptr &outBlob, const ResizeAlgorithm &algorithm,
+        bool omp_serial, int batch_size = -1);
 };
 
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels.cpp b/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels.cpp
index 4910a2a65..5b282d9fc 100644
--- a/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels.cpp
+++ b/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -585,7 +585,6 @@ static void calcRowLinear(const cv::gapi::fluid::View  & in,
                               reinterpret_cast<const float*>(alpha),
                               reinterpret_cast<const int*>(mapsx),
                               reinterpret_cast<const float*>(beta),
-                              reinterpret_cast<float*>(tmp),
                               inSz, outSz, lpi);
             return;
         }
diff --git a/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels.hpp b/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels.hpp
index f4875e63e..6213f6e79 100644
--- a/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels.hpp
+++ b/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels_impl.hpp b/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels_impl.hpp
index 11530dc63..be1d985bb 100644
--- a/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels_impl.hpp
+++ b/inference-engine/src/inference_engine/ie_preprocess_gapi_kernels_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_profiling.hpp b/inference-engine/src/inference_engine/ie_profiling.hpp
index 540255b17..6c75d75b3 100644
--- a/inference-engine/src/inference_engine/ie_profiling.hpp
+++ b/inference-engine/src/inference_engine/ie_profiling.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -209,7 +209,7 @@ inline static void annotateEnd(TimeResultsMap& m, TimeSampler& t) {
 #define IE_STR(x) IE_STR_(x)
 #define IE_STR_(x) #x
 
-#define IE_PROFILING_AUTO_SCOPE(NAME) IE_ITT_SCOPE(IE_STR(NAME)); IE_TIMER_SCOPE(IE_STR(NAME));
+#define IE_PROFILING_AUTO_SCOPE(NAME) IE_ITT_SCOPE(IE_STR(NAME)); IE_TIMER_SCOPE(IE_STR(NAME))
 
 struct ProfilingTask {
     std::string name;
@@ -261,7 +261,7 @@ inline static void annotateEnd(IttStatic&, IttProfilingTask& t) {
     #define IE_ITT_TASK_SCOPE(profiling_task)
 #endif
 
-#define IE_PROFILING_AUTO_SCOPE_TASK(PROFILING_TASK) IE_ITT_TASK_SCOPE(PROFILING_TASK); IE_TIMER_SCOPE(PROFILING_TASK.name);
+#define IE_PROFILING_AUTO_SCOPE_TASK(PROFILING_TASK) IE_ITT_TASK_SCOPE(PROFILING_TASK); IE_TIMER_SCOPE(PROFILING_TASK.name)
 
 inline static void anotateSetThreadName(const char* name) {
     #if ENABLE_PROFILING_ITT
diff --git a/inference-engine/src/inference_engine/ie_util_internal.cpp b/inference-engine/src/inference_engine/ie_util_internal.cpp
index 44be1b541..fd0f77231 100644
--- a/inference-engine/src/inference_engine/ie_util_internal.cpp
+++ b/inference-engine/src/inference_engine/ie_util_internal.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -137,6 +137,16 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
         &layerCloneImpl<GemmLayer              >,
         &layerCloneImpl<PadLayer               >,
         &layerCloneImpl<GatherLayer            >,
+        &layerCloneImpl<StridedSliceLayer      >,
+        &layerCloneImpl<ShuffleChannelsLayer   >,
+        &layerCloneImpl<DepthToSpaceLayer      >,
+        &layerCloneImpl<SpaceToDepthLayer      >,
+        &layerCloneImpl<ReverseSequenceLayer   >,
+        &layerCloneImpl<SqueezeLayer           >,
+        &layerCloneImpl<UnsqueezeLayer         >,
+        &layerCloneImpl<RangeLayer             >,
+        &layerCloneImpl<FillLayer              >,
+        &layerCloneImpl<ExpandLayer            >,
         &layerCloneImpl<ClampLayer             >,
         &layerCloneImpl<ReLULayer              >,
         &layerCloneImpl<SoftMaxLayer           >,
@@ -149,6 +159,11 @@ CNNLayerPtr clonelayer(const CNNLayer& source) {
         &layerCloneImpl<PoolingLayer           >,
         &layerCloneImpl<DeconvolutionLayer     >,
         &layerCloneImpl<ConvolutionLayer       >,
+        &layerCloneImpl<TensorIterator         >,
+        &layerCloneImpl<RNNSequenceLayer       >,
+        &layerCloneImpl<RNNCellBase            >,
+        &layerCloneImpl<QuantizeLayer          >,
+        &layerCloneImpl<BinaryConvolutionLayer >,
         &layerCloneImpl<WeightableLayer        >,
         &layerCloneImpl<CNNLayer               >
     };
@@ -169,8 +184,13 @@ details::CNNNetworkImplPtr cloneNet(const ICNNNetwork &network) {
         layers.push_back(*i);
         i++;
     }
+
+    InferenceEngine::ICNNNetworkStats* pstatsSrc = nullptr;
+    if (StatusCode::OK != network.getStats(&pstatsSrc, nullptr)) {
+        pstatsSrc = nullptr;
+    }
     // copy of the network
-    details::CNNNetworkImplPtr net = cloneNet(layers);
+    details::CNNNetworkImplPtr net = cloneNet(layers, pstatsSrc);
     // going over output layers and duplicatig them:
     OutputsDataMap outputs;
     network.getOutputsInfo(outputs);
@@ -194,21 +214,12 @@ details::CNNNetworkImplPtr cloneNet(const ICNNNetwork &network) {
         }
     }
 
-    // cloning of statistics
-    InferenceEngine::ICNNNetworkStats* pstatsSrc = nullptr, *pstatsTarget = nullptr;
-    StatusCode s = network.getStats(&pstatsSrc, nullptr);
-    if (s == StatusCode::OK && pstatsSrc && !pstatsSrc->isEmpty()) {
-        StatusCode st = net->getStats(&pstatsTarget, nullptr);
-        if (st == StatusCode::OK && pstatsTarget) {
-            pstatsTarget->setNodesStats(pstatsSrc->getNodesStats());
-        }
-    }
-
     return net;
 }
 
 
 details::CNNNetworkImplPtr cloneNet(const std::vector<CNNLayerPtr>& layers,
+                                    const ICNNNetworkStats* networkStats,
                                     std::function<CNNLayerPtr(const CNNLayer&)> layerCloner) {
     // TODO layerCloner std::function is heavy and can be replaced with
     // llvm::function_ref-like lightweight callable when we add one
@@ -319,6 +330,15 @@ details::CNNNetworkImplPtr cloneNet(const std::vector<CNNLayerPtr>& layers,
 
     net->resolveOutput();
 
+    // cloning of statistics
+    InferenceEngine::ICNNNetworkStats* pstatsTarget = nullptr;
+    if (networkStats != nullptr && !networkStats->isEmpty()) {
+        StatusCode st = net->getStats(&pstatsTarget, nullptr);
+        if (st == StatusCode::OK && pstatsTarget) {
+            pstatsTarget->setNodesStats(networkStats->getNodesStats());
+        }
+    }
+
     return net;
 }
 
@@ -413,9 +433,10 @@ struct NodePrinter {
     }
 
     string cleanNodeName_(string node_name) const {
-        // remove dot and dash symbols form node name. It is incorrectly displayed in xdot
+        // remove dot and dash symbols from node name. It is incorrectly displayed in xdot
         node_name.erase(remove(node_name.begin(), node_name.end(), '.'), node_name.end());
         std::replace(node_name.begin(), node_name.end(), '-', '_');
+        std::replace(node_name.begin(), node_name.end(), ':', '_');
         return node_name;
     }
 
@@ -462,6 +483,45 @@ struct NodePrinter {
 
             if (negative_slope != 0.0f)
                 printed_properties.emplace_back("negative_slope", std::to_string(negative_slope));
+        } else if (type == "Eltwise") {
+            auto* eltwise = dynamic_cast<EltwiseLayer*>(layer.get());
+
+            std::string operation;
+
+            if (eltwise->_operation == EltwiseLayer::Sum)
+                operation = "Sum";
+            else if (eltwise->_operation == EltwiseLayer::Prod)
+                operation = "Prod";
+            else if (eltwise->_operation == EltwiseLayer::Max)
+                operation = "Max";
+            else if (eltwise->_operation == EltwiseLayer::Sub)
+                operation = "Sub";
+            else if (eltwise->_operation == EltwiseLayer::Min)
+                operation = "Min";
+            else if (eltwise->_operation == EltwiseLayer::Div)
+                operation = "Div";
+            else if (eltwise->_operation == EltwiseLayer::Squared_diff)
+                operation = "Squared_diff";
+            else if (eltwise->_operation == EltwiseLayer::Equal)
+                operation = "Equal";
+            else if (eltwise->_operation == EltwiseLayer::Not_equal)
+                operation = "Not_equal";
+            else if (eltwise->_operation == EltwiseLayer::Less)
+                operation = "Less";
+            else if (eltwise->_operation == EltwiseLayer::Less_equal)
+                operation = "Less_equal";
+            else if (eltwise->_operation == EltwiseLayer::Greater)
+                operation = "Greater";
+            else if (eltwise->_operation == EltwiseLayer::Greater_equal)
+                operation = "Greater_equal";
+            else if (eltwise->_operation == EltwiseLayer::Logical_AND)
+                operation = "Logical_AND";
+            else if (eltwise->_operation == EltwiseLayer::Logical_OR)
+                operation = "Logical_OR";
+            else if (eltwise->_operation == EltwiseLayer::Logical_XOR)
+                operation = "Logical_XOR";
+
+            printed_properties.emplace_back("operation", operation);
         }
 
         if (layer_cb != nullptr) {
@@ -483,9 +543,9 @@ struct NodePrinter {
         };
 
         std::stringstream dims_ss;
-        size_t idx = data->dims.size();
+        size_t idx = data->getTensorDesc().getDims().size();
         dims_ss << '[';
-        for (auto &dim : data->dims) {
+        for (auto &dim : data->getTensorDesc().getDims()) {
             dims_ss << dim << ((--idx) != 0u ? ", " : "");
         }
         dims_ss << ']';
@@ -499,20 +559,20 @@ struct NodePrinter {
     void printNode(string const &node_name, const string &node_title,
                    ordered_properties const &node_properties,
                    ordered_properties const &printed_properties) {
-        // normalization of names, removing all prohinited symbols like "/"
+        // normalization of names, removing all prohibited symbols like "/"
         string nodeNameN = node_name;
         std::replace(nodeNameN.begin(), nodeNameN.end(), '/', '_');
         string dataNameN = node_title;
         std::replace(dataNameN.begin(), dataNameN.end(), '/', '_');
 
         out << '\t' << nodeNameN << " [";
-        for (auto &node_propertie : node_properties) {
-            out << node_propertie.first << "=\"" << node_propertie.second << "\", ";
+        for (auto &node_property : node_properties) {
+            out << node_property.first << "=\"" << node_property.second << "\", ";
         }
 
         out << "label=\"" << node_title;
-        for (auto &printed_propertie : printed_properties) {
-            out << "\\n" << printed_propertie.first << ": " << printed_propertie.second;
+        for (auto &printed_property : printed_properties) {
+            out << "\\n" << printed_property.first << ": " << printed_property.second;
         }
         out << "\"];\n";
     }
@@ -539,17 +599,10 @@ void saveGraphToDot(InferenceEngine::ICNNNetwork &network, std::ostream &out, pr
         }
     }
 
-    std::vector<std::pair<CNNLayerPtr, std::string>> perf_info;
-    auto store_perf_info = [&](CNNLayerPtr layer) {
-        auto perf = layer->params.find("perf");
-        if (perf != layer->params.end()) perf_info.push_back({layer, perf->second});
-    };
-
     out << "strict digraph Network {\n";
     // Traverse graph and print nodes
     for (const auto &layer : details::CNNNetSortTopologically(network)) {
         printer.printLayerNode(layer);
-        store_perf_info(layer);
 
         // Print output Data Object
         for (auto &dataptr : layer->outData) {
@@ -571,28 +624,6 @@ void saveGraphToDot(InferenceEngine::ICNNNetwork &network, std::ostream &out, pr
             printer.printEdge(layer, dataptr, true);
         }
     }
-
-    if (!perf_info.empty()) {
-        out << "// Performance statistic" << std::endl;
-        out << "node [shape=plain, fontsize=24]" << std::endl;
-
-        for (auto &p : perf_info) {
-            auto &perf = p.second;
-            auto &name = p.first->name;
-            auto layer_name = "layer_" + name;
-            auto perf_name = "perf_" + name;
-            // {rank=same; perf_conv1 [label="133  mcs"]; layer_conv1;}
-            out << "{rank=same; " << perf_name << " [label=\"" << perf << "\"]; "
-                << layer_name << ";}" << std::endl;
-        }
-
-        out << std::endl << "edge[style=invis];" << std::endl;
-        auto p = perf_info.begin();
-        out << "perf_" + p->first->name;
-        for (; p != perf_info.end(); p++)
-            out << " -> perf_" + p->first->name;
-    }
-
     out << "}" << std::endl;
 }
 
diff --git a/inference-engine/src/inference_engine/ie_util_internal.hpp b/inference-engine/src/inference_engine/ie_util_internal.hpp
index 1f6e9f63b..61bf95fbd 100644
--- a/inference-engine/src/inference_engine/ie_util_internal.hpp
+++ b/inference-engine/src/inference_engine/ie_util_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -94,6 +94,7 @@ INFERENCE_ENGINE_API_CPP(CNNLayerPtr) clonelayer(const CNNLayer& source);
  */
 INFERENCE_ENGINE_API_CPP(InferenceEngine::details::CNNNetworkImplPtr)
 cloneNet(const std::vector<InferenceEngine::CNNLayerPtr>& layers,
+         const ICNNNetworkStats* networkStats,
          std::function<CNNLayerPtr(const CNNLayer&)> layerCloner = clonelayer);
 
 /**
diff --git a/inference-engine/src/inference_engine/ie_utils.cpp b/inference-engine/src/inference_engine/ie_utils.cpp
index aa8e00944..fd8163298 100644
--- a/inference-engine/src/inference_engine/ie_utils.cpp
+++ b/inference-engine/src/inference_engine/ie_utils.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/ie_version.cpp b/inference-engine/src/inference_engine/ie_version.cpp
index cca54cc25..5473e800a 100644
--- a/inference-engine/src/inference_engine/ie_version.cpp
+++ b/inference-engine/src/inference_engine/ie_version.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -10,7 +10,7 @@ INFERENCE_ENGINE_API(const Version*) GetInferenceEngineVersion() noexcept {
     // Use local static variable to make sure it is always properly initialized
     // even if called from global constructor
     static Version inferenceEngineVersion = {
-        {1, 4},  // inference engine API version
+        {1, 6},  // inference engine API version
         CI_BUILD_NUMBER
     };
     return &inferenceEngineVersion;
diff --git a/inference-engine/src/inference_engine/layer_transform.hpp b/inference-engine/src/inference_engine/layer_transform.hpp
index fd51793dd..73015521a 100644
--- a/inference-engine/src/inference_engine/layer_transform.hpp
+++ b/inference-engine/src/inference_engine/layer_transform.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -8,7 +8,6 @@
 #include <memory>
 #include <utility>
 #include "ie_layers.h"
-#include "ie_layers_prv.h"
 
 namespace InferenceEngine {
 
@@ -31,6 +30,16 @@ using AllLayers = std::tuple <
     GemmLayer*,
     PadLayer*,
     GatherLayer*,
+    StridedSliceLayer*,
+    ShuffleChannelsLayer*,
+    DepthToSpaceLayer*,
+    SpaceToDepthLayer*,
+    ReverseSequenceLayer*,
+    SqueezeLayer*,
+    UnsqueezeLayer*,
+    RangeLayer*,
+    FillLayer*,
+    ExpandLayer*,
     ConcatLayer*,
     SplitLayer*,
     NormLayer*,
@@ -49,7 +58,11 @@ using AllLayers = std::tuple <
     ClampLayer*,
     TensorIterator*,
     LSTMCell*,
-    RNNLayer*,
+    GRUCell*,
+    RNNCell*,
+    RNNSequenceLayer*,
+    QuantizeLayer*,
+    BinaryConvolutionLayer*,
     WeightableLayer*,
     CNNLayer*
 >;
diff --git a/inference-engine/src/inference_engine/memory_solver.cpp b/inference-engine/src/inference_engine/memory_solver.cpp
index ce31fc18e..e70caabfa 100644
--- a/inference-engine/src/inference_engine/memory_solver.cpp
+++ b/inference-engine/src/inference_engine/memory_solver.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -51,7 +51,7 @@ inline bool popupTogetherWith(MemorySolver::Box &box_new, const MemorySolver::Bo
     }
 }
 
-int MemorySolver::solve() {
+int64_t MemorySolver::solve() {
     maxTopDepth();  // at first make sure that we no need more for boxes sorted by box.start
     std::vector<std::vector<const Box*>> time_slots(_time_duration);
     for (auto & slot : time_slots) slot.reserve(_top_depth);  // 2D array [_time_duration][_top_depth]
@@ -61,11 +61,11 @@ int MemorySolver::solve() {
     std::sort(_boxes.begin(), _boxes.end(), [](const Box& l, const Box& r)
         { return l.size > r.size; });
 
-    int _min_required = 0;
+    int64_t _min_required = 0;
 
     for (Box& box : _boxes) {
         // start from bottom and will lift it up if intersect with other present
-        int id = box.id;
+        int64_t id = box.id;
         box.id = 0;  // id will be used as a temp offset storage
         bool popped_up;
         do {
@@ -91,17 +91,17 @@ int MemorySolver::solve() {
     return _min_required;
 }
 
-int MemorySolver::maxDepth() {
+int64_t MemorySolver::maxDepth() {
     if (_depth == -1) calcDepth();
     return _depth;
 }
 
-int MemorySolver::maxTopDepth() {
+int64_t MemorySolver::maxTopDepth() {
     if (_top_depth == -1) calcDepth();
     return _top_depth;
 }
 
-int MemorySolver::getOffset(int id) const {
+int64_t MemorySolver::getOffset(int id) const {
     auto res = _offsets.find(id);
     if (res == _offsets.end()) THROW_IE_EXCEPTION << "There are no box for provided ID";
     return res->second;
@@ -110,12 +110,12 @@ int MemorySolver::getOffset(int id) const {
 //======== Private =============//
 
 void MemorySolver::calcDepth() {
-    int top_depth = 0;
-    int depth = 0;
-    std::map<int, std::vector<const Box*>> release_at;
+    int64_t top_depth = 0;
+    int64_t depth = 0;
+    std::map<int64_t, std::vector<const Box*>> release_at;
 
     for (const Box& box : _boxes) {
-        int time = box.start;
+        int64_t time = box.start;
         depth += box.size;
         top_depth++;
 
diff --git a/inference-engine/src/inference_engine/memory_solver.hpp b/inference-engine/src/inference_engine/memory_solver.hpp
index 04b8e0652..0881b859f 100644
--- a/inference-engine/src/inference_engine/memory_solver.hpp
+++ b/inference-engine/src/inference_engine/memory_solver.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -57,10 +57,10 @@ public:
         int finish;
 
         /** Size of data. In abstract unit of measure (byte, simd, cache line, ...) */
-        int size;
+        int64_t size;
 
         /** Box identifier, unique for each box. Will be used to querying calculated offset. */
-        int id;
+        int64_t id;
     };
 
     explicit MemorySolver(const std::vector<Box>& boxes);
@@ -69,21 +69,21 @@ public:
      * @brief Solve memory location with maximal reuse.
      * @return Size of common memory blob required for storing all
      */
-    int solve();
+    int64_t solve();
 
     /** Provides calculated offset for specified box id */
-    int getOffset(int id) const;
+    int64_t getOffset(int id) const;
 
     /** Additional info. Max sum of box sizes required for any time stamp. */
-    int maxDepth();
+    int64_t maxDepth();
     /** Additional info. Max num of boxes required for any time stamp. */
-    int maxTopDepth();
+    int64_t maxTopDepth();
 
 private:
     std::vector<Box> _boxes;
-    std::map<int, int> _offsets;
-    int _top_depth = -1;
-    int _depth = -1;
+    std::map<int64_t, int64_t> _offsets;
+    int64_t _top_depth = -1;
+    int64_t _depth = -1;
     int _time_duration = -1;
 
     void calcDepth();
diff --git a/inference-engine/src/inference_engine/net_pass.cpp b/inference-engine/src/inference_engine/net_pass.cpp
index 96ceb63f7..4e7fad27b 100644
--- a/inference-engine/src/inference_engine/net_pass.cpp
+++ b/inference-engine/src/inference_engine/net_pass.cpp
@@ -1,16 +1,25 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "net_pass.h"
-#include "ie_layers_prv.h"
+#include "blob_factory.hpp"
+#include "ie_memcpy.h"
+#include "details/ie_cnn_network_tools.h"
 #include "graph_tools.hpp"
 
 #include <string>
 #include <utility>
+#include <algorithm>
 #include <memory>
+#include <tuple>
+#include <set>
+#include <unordered_map>
 #include <unordered_set>
 
+namespace InferenceEngine {
+namespace NetPass {
+
 template <typename T, typename P>
 inline bool one_of(T val, P item) { return val == item; }
 template <typename T, typename P, typename... Args>
@@ -18,8 +27,124 @@ inline bool one_of(T val, P item, Args... item_others) {
     return val == item || one_of(val, item_others...);
 }
 
-namespace InferenceEngine {
-namespace NetPass {
+/************************************************************/
+/****  TI Utils  ********************************************/
+/************************************************************/
+
+static std::vector<DataPtr> getAllInputs(const std::vector<DataPtr> &heads) {
+    CNNLayerSet inputLayers;
+    std::unordered_set<CNNLayer*> allLayers;
+
+    // Define all start layers
+    for (const auto & data : heads) {
+        auto &secondLayers = data->getInputTo();
+
+        details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
+            if (layer->insData.empty()) {
+                inputLayers.insert(layer);
+            }
+        }, false);
+    }
+
+    std::vector<DataPtr> res = heads;
+    // Add fake input data to point on not achievable
+    // layers from head (like const placeholders)
+    for (auto &starter : inputLayers) {
+        DataPtr holder(new Data(starter->name + ":input_holder", starter->precision));
+        holder->inputTo[starter->name] = starter;
+        res.push_back(holder);
+    }
+
+    return res;
+}
+
+static std::vector<CNNLayerPtr> SortTopologically(const TensorIterator::Body &body) {
+    std::vector<CNNLayerPtr> all_layers;
+
+    auto all_input_layers = getAllInputs(body.inputs);
+    CNNNetForestDFS(all_input_layers, [&](CNNLayerPtr  current){
+        all_layers.push_back(current);
+    }, false);
+    std::reverse(all_layers.begin(), all_layers.end());
+    return all_layers;
+}
+
+static TensorIterator::Body CopyTIBody(ICNNNetwork &net, const TensorIterator::Body &body, std::string suffix = "") {
+    struct NoneStruct {};
+    auto cp = [&](CNNLayerPtr lp) {
+        return injectData<NoneStruct>(lp);
+    };
+
+    const auto all_orig = SortTopologically(body);
+    auto num = all_orig.size();
+
+    std::unordered_map<CNNLayer*, CNNLayerPtr> old2new_l;
+    for (int i = 0; i < num; i++) {
+        auto &orig = all_orig[i];
+        old2new_l[orig.get()] = cp(orig);
+    }
+
+    std::unordered_map<Data*, DataPtr> old2new_d;
+    for (auto &in : body.inputs) {
+        auto new_data = std::make_shared<Data>(*in.get());
+        for (auto &to : new_data->getInputTo())
+            to.second = old2new_l[to.second.get()];
+
+        old2new_d[in.get()] = new_data;
+    }
+
+    for (const auto &old : all_orig) {
+        auto &new_one = old2new_l[old.get()];
+        // remap output data
+        for (int i = 0; i != old->outData.size(); i++) {
+            auto old_data = old->outData[i];
+            auto new_data = new_one->outData[i];
+            new_data->getCreatorLayer() = CNNLayerWeakPtr(new_one);
+            old2new_d[old_data.get()] = new_data;
+
+            for (auto &to : new_data->getInputTo())
+                to.second = old2new_l[to.second.get()];
+        }
+        // remap input data
+        for (int i = 0; i != old->insData.size(); i++) {
+            auto old_data = old->insData[i].lock();
+            auto new_data = old2new_d.at(old_data.get());
+            new_one->insData[i] = new_data;
+        }
+    }
+
+    // Add suffix
+    if (!suffix.empty()) {
+        for (auto &kvp : old2new_l) {
+            auto layer = kvp.second;
+            auto old_name = layer->name;
+            layer->name += suffix;
+            for (auto &ins : layer->insData) {
+                ins.lock()->inputTo.erase(old_name);
+                ins.lock()->inputTo[layer->name] = layer;
+            }
+
+            // And also hold newly created layer in parent network.
+            // TI body may contain isolated constant placeholder layers
+            // which are not achievable from body inputs.
+            net.addLayer(layer);
+        }
+        for (auto &kvp : old2new_d) kvp.second->name += suffix;
+    }
+
+    TensorIterator::Body res;
+    for (auto &in : body.inputs)
+        res.inputs.emplace_back(old2new_d[in.get()]);
+
+    for (auto &out : body.outputs)
+        res.outputs.emplace_back(old2new_d[out.get()]);
+
+    return res;
+}
+
+/************************************************************/
+/****  TI rule helpers  *************************************/
+/************************************************************/
 
 inline bool is_full_ranged(const TensorIterator::PortMap& rule, const DataPtr &data) {
     if (!data)
@@ -39,35 +164,174 @@ inline bool is_full_ranged(const TensorIterator::PortMap& rule, const DataPtr &d
         : begin == size && end == 0;
 }
 
-bool convertToLSTMSequence(CNNLayerPtr cur) {
-    if (cur->type != "TensorIterator") return false;
-    auto ti = std::dynamic_pointer_cast<TensorIterator>(cur);
+inline int get_num_iteration(const std::shared_ptr<TensorIterator> &ti) {
+    int iter_num = 1;  // 1 means no iteration
+
+    for (auto & rule : ti->input_port_map) {
+        if (rule.axis == -1) continue;
+
+        auto data = ti->insData[rule.from].lock();
+        IE_ASSERT(data);
 
+        auto shape = data->getDims();
+        size_t size = shape[rule.axis];
+        size_t step = std::abs(rule.stride);
+        size_t cur_iter_size = size / step;
+
+        if (iter_num == 1) {
+            iter_num = cur_iter_size;
+        } else {
+            if (iter_num != cur_iter_size)
+                return -1;  // TI is inconsistent
+        }
+    }
+
+    for (auto & rule : ti->output_port_map) {
+        if (rule.axis == -1) continue;
+
+        auto data = ti->outData[rule.from];
+        auto shape = data->getDims();
+
+        size_t size = shape[rule.axis];
+        size_t step = std::abs(rule.stride);
+        size_t cur_iter_size = size / step;
+
+        if (iter_num == 1) {
+            iter_num = cur_iter_size;
+        } else {
+            if (iter_num != cur_iter_size)
+                return -1;  // TI is inconsistent
+        }
+    }
+    return iter_num;
+}
+
+using RuleSet = std::vector<TensorIterator::PortMap>;
+
+std::tuple<RuleSet, RuleSet, RuleSet> ClassifyInRules(const std::shared_ptr<TensorIterator> &ti) {
+    /*
+     * first_class  - which has iteration component
+     * second_class - which has no iteration and there are no backedge connection to the same port
+     * third_class  - which has no iteration and has corresponding backedge
+     */
+    RuleSet first_class_rules, second_class_rules, third_class_rules;
+
+    std::set<int> ports_with_backedge;
+    for (const auto &back_edge : ti->back_edges) ports_with_backedge.insert(back_edge.to);
+
+    for (const auto &rule : ti->input_port_map) {
+        if (rule.axis != -1)
+            first_class_rules.push_back(rule);
+
+        else if (!ports_with_backedge.count(rule.to))
+            second_class_rules.push_back(rule);
+
+        else
+            third_class_rules.push_back(rule);
+    }
+    return std::tuple<RuleSet, RuleSet, RuleSet> {first_class_rules, second_class_rules, third_class_rules};
+}
+
+std::tuple<RuleSet, RuleSet, RuleSet> ClassifyOutRules(const std::shared_ptr<TensorIterator> &ti) {
+    /*
+     * first_class  - which has iteration component
+     * second_class - which has no iteration and there are no backedge connection to the same port
+     * third_class  - which has no iteration and has corresponding backedge
+     */
+    RuleSet first_class_rules, second_class_rules, third_class_rules;
+
+    std::set<int> ports_with_backedge;
+    for (const auto &back_edge : ti->back_edges) ports_with_backedge.insert(back_edge.from);
+
+    for (const auto &rule : ti->output_port_map) {
+        if (rule.axis != -1)
+            first_class_rules.push_back(rule);
+
+        else if (!ports_with_backedge.count(rule.to))
+            second_class_rules.push_back(rule);
+
+        else
+            third_class_rules.push_back(rule);
+    }
+    return std::tuple<RuleSet, RuleSet, RuleSet> {first_class_rules, second_class_rules, third_class_rules};
+}
+
+/**
+ * Merge slave connections into master
+ * @param master
+ * @param slave
+ */
+void CombineData(DataPtr &master, DataPtr &slave) {
+    for (auto &kvp : slave->inputTo) {
+        auto &slave_layer = kvp.second;
+        for (auto &slv_ins_wptr : slave_layer->insData) {
+            auto slv_ins = slv_ins_wptr.lock();
+            // Replace slave ptr with master
+            if (slv_ins == slave) slv_ins_wptr = master;
+        }
+        master->inputTo[slave_layer->name] = slave_layer;
+    }
+}
+
+/************************************************************/
+/****  Converter Passes  ************************************/
+/************************************************************/
+
+static RNNSequenceLayer::CellType cell_type_from_name(std::string &layer_type) {
+    RNNSequenceLayer::CellType res;
+    if (layer_type == "LSTMCell")
+        res = RNNSequenceLayer::LSTM;
+    else if (layer_type == "GRUCell")
+        res = RNNSequenceLayer::GRU;
+    else if (layer_type == "RNNCell")
+        res = RNNSequenceLayer::GRU;
+    else
+        THROW_IE_EXCEPTION << "Unknown Cell type (" << layer_type << "). Expected LSTMCell|GRUCell|RNNCell";
+    return res;
+}
+
+static std::string cell_name(RNNSequenceLayer::CellType type) {
+    std::string res;
+    if (type == RNNSequenceLayer::LSTM)
+        res = "LSTM";
+    else if (type == RNNSequenceLayer::GRU)
+        res = "GRU";
+    else if (type == RNNSequenceLayer::GRU)
+        res = "GRU";
+    else
+        THROW_IE_EXCEPTION << "Unknown Cell type (enum index: " << type << "). Expected LSTM|GRU|RNN";
+    return res;
+}
+
+
+bool convertToRNNSeq(CNNLayerPtr cur, ICNNNetwork &net) {
+    if (cur->type != "TensorIterator") return true;
+
+    auto ti = std::dynamic_pointer_cast<TensorIterator>(cur);
     IE_ASSERT(ti) << "Cannot cast object with type TensorIterator to TensorIterator object";
 
-    // Topological order
-    std::vector<CNNLayerPtr> all_body_layers;
-    CNNNetForestDFS(ti->body.inputs, [&](CNNLayerPtr  current){
-        all_body_layers.push_back(current);
-    }, false);
-    std::reverse(all_body_layers.begin(), all_body_layers.end());
+    auto all_body_layers = SortTopologically(ti->body);
 
     // Check if body is:  squeeze -> lstm_cell -> unsqueeze
     if (all_body_layers.size() != 3
         || all_body_layers[0]->type != "Reshape"
-        || all_body_layers[1]->type != "LSTMCell"
+        || !one_of(all_body_layers[1]->type, "GRUCell", "RNNCell", "LSTMCell")
         || all_body_layers[2]->type != "Reshape")
         return false;
 
-    auto &rsp1 = all_body_layers[0];
-    auto &lstm = all_body_layers[1];
-    auto &rsp2 = all_body_layers[2];
+    auto rsp1 = std::dynamic_pointer_cast<ReshapeLayer>(all_body_layers[0]);
+    auto cell = std::dynamic_pointer_cast<RNNCellBase>(all_body_layers[1]);
+    auto rsp2 = std::dynamic_pointer_cast<ReshapeLayer>(all_body_layers[2]);
+
+    auto cell_type = cell_type_from_name(all_body_layers[1]->type);
 
-    IE_ASSERT(lstm->insData.size() == 3);  // {data, hidden, cell}
-    IE_ASSERT(lstm->outData.size() == 2);  // {hidden, cell}
+    int NS = cell_type == RNNSequenceLayer::LSTM ? 2 : 1;  // number of states
 
-    if (lstm->insData[0].lock()->creatorLayer.lock() != rsp1 ||
-        lstm->outData[0]->inputTo.begin()->second != rsp2)
+    IE_ASSERT(cell->insData.size() == NS + 1);  // {data, state1, [state2]}
+    IE_ASSERT(cell->outData.size() == NS);  // {state1, [state2]}
+
+    if (cell->insData[0].lock()->creatorLayer.lock() != rsp1 ||
+        cell->outData[0]->inputTo.begin()->second != rsp2)
         return false;
 
     // Check port mapping
@@ -76,16 +340,17 @@ bool convertToLSTMSequence(CNNLayerPtr cur) {
         return indx == scope.size() ? -1 : indx;
     };
 
-    int in_hs_idx = _indx_in(ti->body.inputs, lstm->insData[1].lock());
-    int in_cs_idx = _indx_in(ti->body.inputs, lstm->insData[2].lock());
     int in_dt_idx = _indx_in(ti->body.inputs, rsp1->insData[0].lock());
+    int in_hs_idx = _indx_in(ti->body.inputs, cell->insData[1].lock());
+    int in_cs_idx = NS == 2 ? _indx_in(ti->body.inputs, cell->insData[2].lock()) : -1;
 
-    int out_hs_idx = _indx_in(ti->body.outputs, lstm->outData[0]);
-    int out_cs_idx = _indx_in(ti->body.outputs, lstm->outData[1]);
     int out_dt_idx = _indx_in(ti->body.outputs, rsp2->outData[0]);
+    int out_hs_idx = _indx_in(ti->body.outputs, cell->outData[0]);
+    int out_cs_idx = NS == 2 ? _indx_in(ti->body.outputs, cell->outData[1]) : -1;
 
-    // indexes should be [0,1,2] : sum == 3
-    if (in_hs_idx + in_cs_idx + in_dt_idx != 3 || out_hs_idx + out_cs_idx + out_dt_idx != 3)
+    // indexes should be [0,1,2] : sum == 3 or [0,1,-1] : sum == 0
+    int sum = (NS - 1) * 3;
+    if (in_hs_idx + in_cs_idx + in_dt_idx != sum || out_hs_idx + out_cs_idx + out_dt_idx != sum)
         return false;
 
     std::map<int, TensorIterator::PortMap> i2map, o2map, be2map;
@@ -93,12 +358,11 @@ bool convertToLSTMSequence(CNNLayerPtr cur) {
     for (auto &m : ti->output_port_map) o2map[m.to] = m;
     for (auto &m : ti->back_edges) be2map[m.to] = m;
 
-    if (!one_of(i2map.size(), 3, 1) ||
-        !one_of(o2map.size(), 3, 1) ||
+    if (!one_of(i2map.size(), NS + 1, 1) ||
+        !one_of(o2map.size(), NS + 1, 1) ||
         !one_of(be2map.size(), 2))
         return false;
 
-
     auto in_iter_rule = i2map[in_dt_idx];
     auto in_iter_data = ti->insData[in_iter_rule.from].lock();
 
@@ -122,39 +386,47 @@ bool convertToLSTMSequence(CNNLayerPtr cur) {
     bool no_init_state = i2map.size() == 1;
     bool no_last_state = o2map.size() == 1;
 
-    if (!no_init_state && ( i2map[in_hs_idx].axis != -1 || i2map[in_cs_idx].axis != -1 ))
+    if (!no_init_state && ( i2map[in_hs_idx].axis != -1 || (NS == 2 && i2map[in_cs_idx].axis != -1) ))
         return false;
-    if (!no_last_state && ( o2map[out_hs_idx].axis != -1 || o2map[out_cs_idx].axis != -1 ))
+    if (!no_last_state && ( o2map[out_hs_idx].axis != -1 || (NS == 2 && o2map[out_cs_idx].axis != -1) ))
         return false;
 
-    auto i_order = no_init_state
-            ? std::vector<int>{i2map[in_dt_idx].from}
-            : std::vector<int>{i2map[in_dt_idx].from,
-                               i2map[in_hs_idx].from,
-                               i2map[in_cs_idx].from};
-    auto o_order = no_last_state
-            ? std::vector<int>{o2map[out_dt_idx].from}
-            : std::vector<int>{o2map[out_dt_idx].from,
-                               o2map[out_hs_idx].from,
-                               o2map[out_cs_idx].from};
+    std::vector<int> i_order {i2map[in_dt_idx].from };
+    if (!no_init_state)
+        i_order.push_back(i2map[in_hs_idx].from);
+    if (!no_init_state && NS == 2)
+        i_order.push_back(i2map[in_cs_idx].from);
+
+    std::vector<int> o_order {o2map[out_dt_idx].from};
+    if (!no_last_state)
+        o_order.push_back(o2map[out_hs_idx].from);
+    if (!no_last_state && NS == 2)
+        o_order.push_back(o2map[out_cs_idx].from);
 
     // need swap an i/o ports if it is not in natural order
-    std::string name = lstm->name + "_sequence";
-    auto rnn  = std::make_shared<RNNLayer>(LayerParams{ name, "RNN",  Precision::FP32 });
-    rnn->cellType = "LSTM";
+    std::string name = cell->name + "_sequence";
+    auto rnn  = std::make_shared<RNNSequenceLayer>(LayerParams{ name, cell_name(cell_type) + "Sequence", cell->precision});
+    rnn->cellType = cell_type;
     rnn->axis = in_iter_rule.axis;
     rnn->direction = in_iter_rule.stride == 1
-            ? RNNLayer::RNN_FWD
-            : RNNLayer::RNN_BWD;
+            ? RNNSequenceLayer::FWD
+            : RNNSequenceLayer::BWD;
 
-    rnn->_weights = dynamic_cast<WeightableLayer*>(lstm.get())->_weights;
-    rnn->blobs["weights"] = lstm->blobs["weights"];
-    rnn->_biases = dynamic_cast<WeightableLayer*>(lstm.get())->_biases;
-    rnn->blobs["biases"] = lstm->blobs["biases"];
+    // copy base RNN cell fields
+    rnn->_weights = cell->_weights;
+    rnn->_biases = cell->_biases;
+    rnn->blobs = cell->blobs;
+    rnn->activations = cell->activations;
+    rnn->activation_alpha = cell->activation_alpha;
+    rnn->activation_beta = cell->activation_beta;
+    rnn->hidden_size = cell->hidden_size;
+    rnn->clip = cell->clip;
 
     for (int i : i_order) {
-        rnn->insData.push_back(ti->insData[i]);
-        rnn->insData.back().lock()->inputTo[ti->name] = rnn;
+        auto in_data = ti->insData[i].lock();
+        in_data->inputTo.erase(ti->name);
+        in_data->inputTo[rnn->name] = rnn;
+        rnn->insData.push_back(in_data);
     }
     for (int i : o_order) {
         rnn->outData.push_back(ti->outData[i]);
@@ -164,16 +436,807 @@ bool convertToLSTMSequence(CNNLayerPtr cur) {
     return true;
 }
 
-bool CombineLSTMSeq(const ICNNNetwork &net) {
-    // Apply action for all nodes
-    CNNNetForestDFS(CNNNetGetAllInputLayers(net), &convertToLSTMSequence, true);
+bool unrollTI(CNNLayerPtr cur, ICNNNetwork &net) {
+    if (cur->type != "TensorIterator")
+        return true;
+
+    auto ti = std::dynamic_pointer_cast<TensorIterator>(cur);
+    IE_ASSERT(ti) << "Cannot cast object with type TensorIterator to TensorIterator object";
+
+    int num = get_num_iteration(ti);  // -1 means inconsistent TI
+    if (num == -1) return false;  // TODO: better to throw exception
+
+    const auto &body = ti->body;
+
+    std::vector<TensorIterator::Body> body_list(num);
+    for (int i = 0; i < num; i++) {
+        // copy with additional suffix to each object name
+        body_list[i] = CopyTIBody(net, body, ":" + std::to_string(i));
+    }
+
+    RuleSet first_class, second_class, third_class;
+    std::tie(first_class, second_class, third_class) = ClassifyInRules(ti);
+
+    /** Clean links on TI */
+    for (auto &ins : ti->insData)
+        ins.lock()->inputTo.erase(ti->name);
+    for (auto &outs : ti->outData)
+        outs->creatorLayer.reset();
+
+    /** FIRST class comes */
+    for (int i = 0; i < first_class.size(); i++) {
+        auto &rule = first_class[i];
+        auto in_data = ti->insData[rule.from].lock();
+
+        std::string name = ti->name + ":in_split_" + std::to_string(i);
+        auto split = std::make_shared<SplitLayer>(LayerParams{ name, "Split", cur->precision });
+        split->_axis = rule.axis;
+        split->outData.resize(num);
+        split->insData.emplace_back(in_data);
+        in_data->inputTo[split->name] = split;
+
+        for (int j = 0; j < num; j++) {
+            auto body_idx = rule.stride == 1 ? j : num - 1 - j;
+            auto &chunk = body_list[body_idx].inputs[rule.to];
+            chunk->creatorLayer = split;
+            split->outData[j] = chunk;
+        }
+    }
+
+    /** SECOND class come on */
+    for (const auto &rule : second_class) {
+        auto in_data = ti->insData[rule.from].lock();
+
+        for (int j = 0; j < num; j++) {
+            auto &chunk = body_list[j].inputs[rule.to];
+            CombineData(in_data, chunk);
+        }
+    }
+
+    /** BACK EDGES that's your time */
+    for (const auto &rule : ti->back_edges) {
+        for (int i = 1; i < num; i++) {
+            auto &from_data = body_list[i-1].outputs[rule.from];
+            auto &to_data = body_list[i].inputs[rule.to];
+            CombineData(from_data, to_data);
+        }
+    }
+
+    /** THIRD class end up */
+    for (const auto &rule : third_class) {
+        // first iteration
+        auto from_data = ti->insData[rule.from].lock();
+        auto &to_data = body_list[0].inputs[rule.to];;
+        CombineData(from_data, to_data);
+    }
+
+    /** And the same actions for outputs connections */
+    std::tie(first_class, second_class, third_class) = ClassifyOutRules(ti);
+
+    /** FIRST class comes */
+    for (int i = 0; i < first_class.size(); i++) {
+        auto &rule = first_class[i];
+        auto out_data = ti->outData[rule.from];
+
+        std::string name = ti->name + ":out_concat_" + std::to_string(i);
+        auto concat = std::make_shared<ConcatLayer>(LayerParams{ name, "Concat", cur->precision });
+        concat->_axis = rule.axis;
+        concat->insData.resize(num);
+        concat->outData.emplace_back(out_data);
+        out_data->creatorLayer = concat;
+
+        for (int j = 0; j < num; j++) {
+            auto body_idx = rule.stride == 1 ? j : num - 1 - j;
+            auto &chunk = body_list[body_idx].outputs[rule.to];
+            chunk->inputTo[concat->name] = concat;
+            concat->insData[j] = chunk;
+        }
+    }
+
+    /** SECOND class come on */
+    for (const auto &rule : second_class) {
+        auto out_data = ti->outData[rule.from];
+
+        for (int j = 0; j < num; j++) {
+            auto &chunk = body_list[j].outputs[rule.to];
+            CombineData(chunk, out_data);
+        }
+    }
+
+    /** THIRD class end up */
+    for (const auto &rule : third_class) {
+        // first iteration
+        auto &from_data = ti->outData[rule.from];
+        auto &to_data = body_list[num-1].outputs[rule.to];
+
+        auto parent = to_data->creatorLayer.lock();
+        std::replace(parent->outData.begin(), parent->outData.end(), to_data, from_data);
+        from_data->creatorLayer = parent;
+
+        CombineData(from_data, to_data);
+    }
+    return true;
+}
+
+/************************************************************/
+/****  Builder helpers   ************************************/
+/************************************************************/
+
+static CNNLayerPtr _concat(std::string name, Precision prc, SizeVector dims, int num) {
+    auto res = std::make_shared<ConcatLayer>(LayerParams{name, "Concat", prc});
+    res->_axis = 1;
+
+    res->insData.resize(num);
+    res->outData.resize(1);
+
+    auto out_data = DataPtr(new Data(name,
+            TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
+    out_data->creatorLayer = res;
+
+    res->outData[0] = out_data;
+    return res;
+}
+
+static CNNLayerPtr _split(std::string name, Precision prc, SizeVector dims, int num) {
+    auto res = std::make_shared<SplitLayer>(LayerParams{name, "Split", prc});
+    res->_axis = 1;
+    res->params["axis"] = res->_axis;
+
+    res->insData.resize(1);
+    res->outData.resize(num);
+
+    for (int i = 0; i < num; i++) {
+        auto out_data = DataPtr(new Data(name + "_part_" + std::to_string(i),
+                TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
+        out_data->creatorLayer = res;
+
+        res->outData[i] = out_data;
+    }
+    return res;
+}
+
+static CNNLayerPtr _fc(std::string name, Precision prc, SizeVector dims, Blob::Ptr &W, Blob::Ptr &B) {
+    auto res = std::make_shared<FullyConnectedLayer>(LayerParams{name, "FullyConnected", prc});
+
+    res->_weights = W;
+    res->_biases = B;
+    res->_out_num = dims[1];
+    res->blobs["weights"] = W;
+    res->blobs["biases"] = B;
+    res->params["out-size"] = std::to_string(dims[1]);
+
+    res->insData.resize(1);
+    res->outData.resize(1);
+
+    auto out_data = DataPtr(new Data(name,
+            TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
+    out_data->creatorLayer = res;
+
+    res->outData[0] = out_data;
+    return res;
+}
+
+static CNNLayerPtr _act(std::string name, Precision prc, SizeVector dims, std::string type) {
+    auto res = std::make_shared<CNNLayer>(LayerParams{name, "Activation", prc});
+
+    res->params["type"] = type;
+
+    res->insData.resize(1);
+    res->outData.resize(1);
+
+    auto out_data = DataPtr(new Data(name,
+            TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
+    out_data->creatorLayer = res;
+
+    res->outData[0] = out_data;
+    return res;
+}
+
+static CNNLayerPtr _pwr(std::string name, Precision prc, SizeVector dims, float scale, float shift) {
+    auto res = std::make_shared<PowerLayer>(LayerParams{name, "Power", prc});
+
+    res->power = 1.0;
+    res->scale = scale;
+    res->offset = shift;
+    res->params["power"] = res->power;
+    res->params["scale"] = res->scale;
+    res->params["shift"] = res->offset;
+
+    res->insData.resize(1);
+    res->outData.resize(1);
+
+    auto out_data = DataPtr(new Data(name,
+            TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
+    out_data->creatorLayer = res;
+
+    res->outData[0] = out_data;
+    return res;
+}
+
+
+static CNNLayerPtr _eltw(std::string name, Precision prc, SizeVector dims, std::string type) {
+    auto res = std::make_shared<EltwiseLayer>(LayerParams{name, "Eltwise", prc});
+
+    res->params["operation"] = type;
+    res->_operation = type == "sum" ? EltwiseLayer::Sum : EltwiseLayer::Prod;
+
+    res->insData.resize(2);
+    res->outData.resize(1);
+
+    auto out_data = DataPtr(new Data(name,
+            TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
+    out_data->creatorLayer = res;
+
+    res->outData[0] = out_data;
+    return res;
+}
+
+static std::shared_ptr<ReshapeLayer> _resh(std::string name, Precision prc, SizeVector dims) {
+    auto res = std::make_shared<ReshapeLayer>(LayerParams{name, "Reshape", prc});
+
+    res->insData.resize(1);
+    res->outData.resize(1);
+
+    auto out_data = DataPtr(new Data(name,
+            TensorDesc { prc, dims, TensorDesc::getLayoutByDims(dims) }));
+    out_data->creatorLayer = res;
+
+    res->outData[0] = out_data;
+    return res;
+}
+
+static std::shared_ptr<RNNCellBase> _cell(std::string name, Precision prc, SizeVector data_dims, SizeVector state_dims, RNNSequenceLayer::CellType type) {
+    std::shared_ptr<RNNCellBase> res;
+    size_t NS = 1;
+    switch (type) {
+        case RNNSequenceLayer::LSTM:
+            res = std::make_shared<LSTMCell>(LayerParams{name, "LSTMCell", prc}); NS = 2;
+            break;
+        case RNNSequenceLayer::GRU:
+        case RNNSequenceLayer::GRU_LBR:
+            res = std::make_shared<GRUCell>(LayerParams{name, "GRUCell", prc});
+            break;
+        case RNNSequenceLayer::RNN:
+            res = std::make_shared<RNNCell>(LayerParams{name, "RNNCell", prc});
+            break;
+    }
+
+    res->cellType = type;
+    res->insData.resize(1 + NS);
+    res->outData.resize(NS);
+
+    auto out_data = DataPtr(new Data(name + ":out_data",
+            TensorDesc { prc, data_dims, TensorDesc::getLayoutByDims(data_dims) }));
+    out_data->creatorLayer = res;
+    res->outData[0] = out_data;
+
+    for (size_t i = 0; i < NS; i++) {
+        auto out_state = DataPtr(new Data(name + ":out_state_" + std::to_string(i),
+                TensorDesc { prc, state_dims, TensorDesc::getLayoutByDims(state_dims) }));
+        out_state->creatorLayer = res;
+        res->outData[i] = out_state;
+    }
+
+    return res;
+}
+
+static std::shared_ptr<TensorIterator> _ti(std::string name, Precision prc, size_t NS) {
+    auto res = std::make_shared<TensorIterator>(LayerParams{name, "TensorIterator", prc});
+
+    res->insData.resize(1 + NS);
+    res->outData.resize(1 + NS);
+
+    return res;
+}
+
+static void _link(CNNLayerPtr src, CNNLayerPtr dst, size_t src_port = 0, size_t dst_port = 0) {
+    auto data = src->outData[src_port];
+    data->inputTo[dst->name] = dst;
+    dst->insData[dst_port] = data;
+}
+
+static void _link(DataPtr &data, CNNLayerPtr dst, size_t dst_port = 0) {
+    data->inputTo[dst->name] = dst;
+    dst->insData[dst_port] = data;
+}
+
+/** Link nodes with clipping data if required (clip_val != 0.0) */
+static void _link_with_clip(CNNLayerPtr src, CNNLayerPtr dst, const float clip_val,
+        size_t src_port = 0, size_t dst_port = 0) {
+    if (clip_val == 0.0f) {
+        _link(src, dst, src_port, dst_port);
+    } else {
+        auto clip_name = dst->name + "_clip";
+        auto clip_prc = dst->precision;
+        auto clip_shape = src->outData[src_port]->getTensorDesc().getDims();
+        auto clip = _act(clip_name, clip_prc, clip_shape, "clamp");
+        clip->params["min"] = std::to_string(-clip_val);
+        clip->params["max"] = std::to_string(clip_val);
+
+        _link(src, clip, src_port, 0);
+        _link(clip, dst, 0, dst_port);
+    }
+}
+
+
+static Blob::Ptr make_partial_copy(Blob::Ptr src, size_t off, size_t size) {
+    auto res = make_plain_blob(src->precision(), {size});
+    res->allocate();
+
+    size_t elem_size = src->precision().size();
+    auto src_ptr = src->buffer().as<uint8_t*>();
+    auto dst_ptr = res->buffer().as<uint8_t*>();
+
+    ie_memcpy(dst_ptr, res->byteSize(), src_ptr + off * elem_size,  size * elem_size);
+
+    return res;
+}
+
+static Blob::Ptr wrap_as_tensor(Blob::Ptr src, SizeVector dims) {
+    auto res = make_blob_with_precision(
+            TensorDesc { src->precision(), dims, plain_layout(dims) },
+            src->buffer());
+    IE_ASSERT(src->size() == res->size());
+    return res;
+}
+
+static Blob::Ptr make_region_copy(Blob::Ptr src, SizeVector region, SizeVector offset) {
+    IE_ASSERT(region.size() == offset.size());
+    IE_ASSERT(region.size() == src->dims().size());
+
+    auto res = make_plain_blob(src->precision(), region);
+    res->allocate();
+
+    size_t elem_size = src->precision().size();
+    auto src_ptr = src->buffer().as<uint8_t*>();
+    auto dst_ptr = res->buffer().as<uint8_t*>();
+
+    auto &dd = src->getTensorDesc().getDims();
+    SizeVector src_dims {1, 1, 1};
+    std::copy(dd.begin(), dd.end(), src_dims.end() - dd.size());
+
+    SizeVector dims {1, 1, 1};
+    std::copy(region.begin(), region.end(), dims.end() - region.size());
+
+    SizeVector off {0, 0, 0};
+    std::copy(offset.begin(), offset.end(), off.end() - offset.size());
+
+    const auto D1 = dims[0];
+    const auto D2 = dims[1];
+    const auto D3 = dims[2];
+    const auto off1 = off[0];
+    const auto off2 = off[1];
+    const auto off3 = off[2];
+    const auto str1 = src_dims[1]*src_dims[2];
+    const auto str2 = src_dims[2];
+
+    for (size_t d1 = 0; d1 < D1; d1++)
+    for (size_t d2 = 0; d2 < D2; d2++) {
+        auto off_src = (off1 + d1)*str1 + (off2 + d2)*str2 + off3;
+        auto off_dst = d1*D2*D3 + d2*D3;
+        ie_memcpy(dst_ptr + off_dst * elem_size, res->byteSize(), src_ptr + off_src * elem_size,  D3 * elem_size);
+    }
+
+    return res;
+}
+
+
+static bool unrollRNNCellBody(CNNLayerPtr cur) {
+    if (cur->type != "RNNCell")
+        return true;
+
+    auto cell = std::dynamic_pointer_cast<RNNCellBase>(cur);
+    IE_ASSERT(cell) << "Cannot cast object with type ***Cell to WeightableLayer object";
+
+    auto name = cell->name;
+
+    auto in_data = cell->insData[0].lock();
+    auto in_h_state = cell->insData[1].lock();
+    auto out_h_state = cell->outData[0];
+
+    auto d_dims = in_data->getTensorDesc().getDims();
+    auto s_dims = in_h_state->getTensorDesc().getDims();
+
+    size_t N = d_dims[0];
+    size_t D = d_dims[1];
+    size_t S = s_dims[1];
+
+    auto prc = cell->precision;
+
+    /** Release links on TI */
+    for (auto &ins : cell->insData)
+        ins.lock()->inputTo.erase(cell->name);
+    for (auto &outs : cell->outData)
+        outs->creatorLayer.reset();
+
+    // operations
+    auto concat = _concat(name + ":concat", prc, {N, D+S}, 2);
+    auto fc = _fc(name + ":fc", prc, {N, S}, cell->_weights, cell->_biases);
+    auto act = _act(name + ":act", prc, {N, S}, cell->activations[0]);
+
+    // Connection
+    _link(in_data, concat, 0);
+    _link(in_h_state, concat, 1);
+    _link(concat, fc);
+    _link_with_clip(fc, act, cell->clip);
+
+    // Output
+    act->outData[0] = out_h_state;
+    out_h_state->creatorLayer = act;
+
+    return true;
+}
+
+static bool unrollLSTMCellBody(CNNLayerPtr cur) {
+    if (cur->type != "LSTMCell")
+        return true;
+
+    auto cell = std::dynamic_pointer_cast<RNNCellBase>(cur);
+    IE_ASSERT(cell) << "Cannot cast object with type ***Cell to WeightableLayer object";
+
+    auto name = cell->name;
+
+    auto in_data = cell->insData[0].lock();
+    auto in_h_state = cell->insData[1].lock();
+    auto in_c_state = cell->insData[2].lock();
+    auto out_h_state = cell->outData[0];
+    auto out_c_state = cell->outData[1];
+
+    auto d_dims = in_data->getTensorDesc().getDims();
+    auto s_dims = in_h_state->getTensorDesc().getDims();
+
+    size_t N = d_dims[0];
+    size_t D = d_dims[1];
+    size_t S = s_dims[1];
+    size_t G = 4;
+
+    auto prc = cell->precision;
+
+    /** Release links on TI */
+    for (auto &ins : cell->insData)
+        ins.lock()->inputTo.erase(cell->name);
+    for (auto &outs : cell->outData)
+        outs->creatorLayer.reset();
+
+    // operations
+    auto concat = _concat(name + ":concat", prc, {N, D+S}, 2);
+    auto split = _split(name + ":split", prc, {N, S}, G);
+    auto fc = _fc(name + ":fc", prc, {N, S*G}, cell->_weights, cell->_biases);
+
+    const std::string _f = cell->activations[0], _g = cell->activations[1], _h = cell->activations[2];
+
+    auto act_f = _act(name + ":act_f", prc, {N, S}, _f);
+    auto act_i = _act(name + ":act_i", prc, {N, S}, _f);
+    auto act_c = _act(name + ":act_c", prc, {N, S}, _g);
+    auto act_o = _act(name + ":act_o", prc, {N, S}, _f);
+    auto act_x = _act(name + ":act_x", prc, {N, S}, _h);
+
+    auto mul_ic = _eltw(name + ":mul_ic", prc, {N, S}, "mul");
+    auto mul_f  = _eltw(name + ":mul_f" , prc, {N, S}, "mul");
+    auto sum    = _eltw(name + ":sum"   , prc, {N, S}, "sum");
+    auto mul    = _eltw(name + ":mul"   , prc, {N, S}, "mul");
+
+    // Connection
+    _link(in_data, concat, 0);
+    _link(in_h_state, concat, 1);
+    _link(concat, fc);
+
+    _link_with_clip(fc, split, cell->clip);
+
+    _link(split, act_f, 0, 0);
+    _link(split, act_i, 1, 0);
+    _link(split, act_c, 2, 0);
+    _link(split, act_o, 3, 0);
+
+    _link(act_i, mul_ic, 0, 0);
+    _link(act_c, mul_ic, 0, 1);
+
+    _link(act_f, mul_f, 0, 0);
+    _link(in_c_state, mul_f, 1);
+
+    _link(mul_f,  sum, 0, 0);
+    _link(mul_ic, sum, 0, 1);
+
+    _link(sum, act_x);
+
+    _link(act_x, mul, 0, 0);
+    _link(act_o, mul, 0, 1);
+
+    // Output
+    mul->outData[0] = out_h_state;
+    out_h_state->creatorLayer = mul;
+
+    CombineData(out_c_state, sum->outData[0]);
+    sum->outData[0] = out_c_state;
+    out_c_state->creatorLayer = sum;
+
+    return true;
+}
+
+static bool unrollGRUCellBody(CNNLayerPtr cur, bool linear_before_reset = false) {
+    if (cur->type != "GRUCell")
+        return true;
+
+    auto cell = std::dynamic_pointer_cast<GRUCell>(cur);
+    IE_ASSERT(cell) << "Cannot cast object with type ***Cell to WeightableLayer object";
+
+    auto name = cell->name;
+
+    auto in_data = cell->insData[0].lock();
+    auto in_h_state = cell->insData[1].lock();
+    auto out_h_state = cell->outData[0];
+
+    auto d_dims = in_data->getTensorDesc().getDims();
+    auto s_dims = in_h_state->getTensorDesc().getDims();
+
+    size_t N = d_dims[0];
+    size_t D = d_dims[1];
+    size_t S = s_dims[1];
+
+    // Split weights UR and O gates. Original gates are URO
+    size_t bG = linear_before_reset ? 4 : 3;
+    auto orig_W = wrap_as_tensor(cell->_weights, {3, S, D+S});
+    auto orig_B = wrap_as_tensor(cell->_biases, {bG, S});
+
+    auto ur_W = make_region_copy(orig_W, {2, S, D+S}, {0, 0, 0});
+    auto o_W  = make_region_copy(orig_W, {1, S, D+S}, {2, 0, 0});
+    auto ur_B = make_region_copy(orig_B, {2, S}, {0, 0});
+    auto o_B  = make_region_copy(orig_B, {1, S}, {2, 0});
+
+    auto prc = cell->precision;
+
+    /** Release links on TI */
+    for (auto &ins : cell->insData)
+        ins.lock()->inputTo.erase(cell->name);
+    for (auto &outs : cell->outData)
+        outs->creatorLayer.reset();
+
+    // operations
+    auto concat = _concat(name + ":concat", prc, {N, D+S}, 2);
+    auto split = _split(name + ":split", prc, {N, S}, 2);
+    auto fc_ur = _fc(name + ":fc_ur", prc, {N, S*2}, ur_W, ur_B);
+
+    const std::string _f = cell->activations[0], _g = cell->activations[1];
+
+    auto act_ur = _act(name + ":act_ur", prc, {N, 2*S}, _f);
+    auto act_o = _act(name + ":act_o", prc, {N, S}, _g);
+
+    auto mul_u = _eltw(name + ":mul_u", prc, {N, S}, "mul");
+    auto mul_r = _eltw(name + ":mul_r", prc, {N, S}, "mul");
+
+    auto pwr_m1 = _pwr(name + ":pwr", prc, {N, S}, -1.0, 1.0);
+
+    auto mul = _eltw(name + ":mul"   , prc, {N, S}, "mul");
+    auto sum = _eltw(name + ":sum"   , prc, {N, S}, "sum");
+
+    /**
+     * - zt = _f(Wz*[Xt + Ht-1] + Bz)
+     * - rt = _f(Wr*[Xt + Ht-1] + Br)
+     * - ht = _g(Wh*[Xt + (rt (.) Ht-1)] + Bh)    # default, when linear_before_reset = 0
+     * - ht = _g(Whw*Xt + Bhw + (rt (.) (Whr*Ht-1 + Bhr))) # when linear_before_reset != 0
+     * - Ht = (1 - zt) (.) ht + zt (.) Ht-1
+     */
+    _link(in_data, concat, 0);
+    _link(in_h_state, concat, 1);
+    _link(concat, fc_ur);
+    _link_with_clip(fc_ur, act_ur, cell->clip);
+    _link(act_ur, split);  // split[0] - zt,  split[1] - rt
+
+    if (linear_before_reset) {
+        auto lbr_B = wrap_as_tensor(orig_B, {4, S});
+
+        auto whw_W = make_region_copy(o_W, {1, S, D}, {0, 0, 0});
+        auto whr_W = make_region_copy(o_W, {1, S, S}, {0, 0, D});
+        auto whw_B = make_region_copy(lbr_B, {1, S}, {2, 0});
+        auto whr_B = make_region_copy(lbr_B, {1, S}, {3, 0});
+
+        auto fc_whr = _fc(name + ":fc_whr", prc, {N, S}, whr_W, whr_B);
+        auto fc_whw = _fc(name + ":fc_whw", prc, {N, S}, whw_W, whw_B);
+        auto sum_h  = _eltw(name + ":sum_h", prc, {N, S}, "sum");
+
+        _link(in_h_state, fc_whr);                  //                            Whr*Ht-1 + Bhr
+        _link(fc_whr, mul_r, 0);                    //
+        _link(split, mul_r, 1, 1);                  //                    rt (.) (Whr*Ht-1 + Bhr)
+        _link(in_data, fc_whw);                     //    Whw*Xt + Bhw
+        _link(fc_whw, sum_h, 0, 0);                 //
+        _link(mul_r, sum_h, 0, 1);                  //    Whw*Xt + Bhw + (rt (.) (Whr*Ht-1 + Bhr))
+        _link_with_clip(sum_h, act_o, cell->clip);  // _g(Whw*Xt + Bhw + (rt (.) (Whr*Ht-1 + Bhr)))
+    } else {
+        auto fc_wh = _fc(name + ":fc_o", prc, {N, S}, o_W, o_B);
+        auto concat_h = _concat(name + ":concat_h", prc, {N, D+S}, 2);
+
+        _link(split, mul_r, 1, 0);                  //
+        _link(in_h_state, mul_r, 1);                //              rt (.) Ht-1
+        _link(in_data, concat_h, 0);                //
+        _link(mul_r, concat_h, 0, 1);               //       [Xt + (rt (.) Ht-1)]
+        _link(concat_h, fc_wh);                     //    Wh*[Xt + (rt (.) Ht-1)] + Bh
+        _link_with_clip(fc_wh, act_o, cell->clip);  // _g(Wh*[Xt + (rt (.) Ht-1)] + Bh)
+    }
+
+    _link(split, pwr_m1, 0, 0);   //  1 - zt
+    _link(act_o, mul, 0, 0);      //
+    _link(pwr_m1, mul, 0, 1);     // (1 - zt) (.) ht
+    _link(split, mul_u, 0, 0);    //
+    _link(in_h_state, mul_u, 1);  //                   zt (.) Ht-1
+    _link(mul, sum, 0, 0);        //
+    _link(mul_u, sum, 0, 1);      // (1 - zt) (.) ht + zt (.) Ht-1
+
+    // Output
+    sum->outData[0] = out_h_state;
+    out_h_state->creatorLayer = sum;
+
     return true;
 }
 
-bool UnrollTI(const ICNNNetwork &net) {
+static bool unrollCell(CNNLayerPtr cur, ICNNNetwork &net) {
+    auto cell = std::dynamic_pointer_cast<RNNCellBase>(cur);
+    switch (cell->cellType) {
+        case RNNCellBase::LSTM:    return unrollLSTMCellBody(cur);
+        case RNNCellBase::GRU:     return unrollGRUCellBody(cur);
+        case RNNCellBase::GRU_LBR: return unrollGRUCellBody(cur, true);
+        case RNNCellBase::RNN:     return unrollRNNCellBody(cur);
+    }
     return false;
 }
 
+static bool unrollSeq(CNNLayerPtr cur, ICNNNetwork &net) {
+    if (!one_of(cur->type, "LSTMSequence", "GRUSequence", "RNNSequence"))
+    return true;
+
+    auto seq = std::dynamic_pointer_cast<RNNSequenceLayer>(cur);
+    IE_ASSERT(seq) << "Cannot cast object with type ***Sequence to RNNSequenceLayer object";
+
+    auto name = seq->name;
+
+    auto in_data = seq->insData[0].lock();
+    auto in_h_state = seq->insData[1].lock();
+    auto out_data = seq->outData[0];
+
+    auto in_d_dims = in_data->getTensorDesc().getDims();
+    auto state_dims = in_h_state->getTensorDesc().getDims();
+    auto out_d_dims = out_data->getTensorDesc().getDims();
+
+    const int axis = seq->axis;
+    const auto direct = seq->direction;
+    const auto prc = seq->precision;
+
+    /** Release links on Seq */
+    for (auto &ins : seq->insData)
+    ins.lock()->inputTo.erase(seq->name);
+    for (auto &outs : seq->outData)
+    outs->creatorLayer.reset();
+
+    /** Body subgraph*/
+    auto in_d_body_dims = in_d_dims;
+    in_d_body_dims[axis] = 1;
+
+    auto in_d_body_squeeze_dims = in_d_dims;
+    in_d_body_squeeze_dims.erase(in_d_body_squeeze_dims.begin() + axis);
+
+    auto out_d_body_dims = out_d_dims;
+    out_d_body_dims[axis] = 1;
+
+    auto out_d_body_squeeze_dims = out_d_dims;
+    out_d_body_squeeze_dims.erase(out_d_body_squeeze_dims.begin() + axis);
+
+    auto body_in_data = DataPtr(new Data(name + ":data_in",
+            TensorDesc { prc, in_d_body_dims, TensorDesc::getLayoutByDims(in_d_body_dims) }));
+
+    auto resh1 = _resh(name + ":resh1", prc, in_d_body_squeeze_dims);
+    auto cell  = _cell(name + ":cell", prc, out_d_body_squeeze_dims, state_dims, seq->cellType);
+    auto resh2 = _resh(name + ":resh2", prc, out_d_body_dims);
+
+    _link(body_in_data, resh1);
+    _link(resh1, cell);
+    _link(cell, resh2);
+
+    cell->_weights = seq->_weights;
+    cell->_biases = seq->_biases;
+    cell->hidden_size = seq->hidden_size;
+    cell->clip = seq->clip;
+    cell->activations = seq->activations;
+    cell->activation_alpha = seq->activation_alpha;
+    cell->activation_beta = seq->activation_beta;
+
+    const size_t NS = cell->outData.size();  // num of state
+
+    /** TI layer */
+    auto ti = _ti(name + ":ti", prc, NS);
+    _link(in_data, ti, 0);
+
+    ti->outData[0] = out_data;
+    out_data->creatorLayer = ti;
+
+    ti->body.inputs.push_back(body_in_data);
+    ti->body.outputs.push_back(resh2->outData[0]);
+
+    int start = direct == RNNSequenceLayer::FWD ? 0 : -1;
+    int end = direct == RNNSequenceLayer::FWD ? -1 : 0;
+    int step = direct == RNNSequenceLayer::FWD ? 1 : -1;
+    ti->input_port_map.push_back({0, 0, axis, step, start, end, 1});
+    ti->output_port_map.push_back({0, 0, axis, step, start, end, 1});
+
+    for (size_t i = 0; i < NS; i++) {
+        auto in_state = seq->insData[1 + i].lock();
+        _link(in_state, ti, 1 + i);
+
+        auto out_state = seq->outData[1 + i];
+        ti->outData[1 + i] = out_state;
+        out_state->creatorLayer = ti;
+
+        auto body_in_state = DataPtr(new Data(name + ":state_in_" + std::to_string(i),
+                TensorDesc { prc, state_dims, TensorDesc::getLayoutByDims(state_dims) }));
+
+        _link(body_in_state, cell, 1 + i);
+
+        ti->body.inputs.push_back(body_in_state);
+        ti->body.outputs.push_back(cell->outData[i]);
+
+        const int ii = 1 + static_cast<int>(i);
+        ti->input_port_map.push_back({ii, ii, -1, 0, 0, 0, 0});
+        ti->output_port_map.push_back({ii, ii, -1, 0, 0, 0, 0});
+        ti->back_edges.push_back({ii, ii, -1, 0, 0, 0, 0});
+    }
+
+    unrollTI(ti, net);
+
+    return true;
+}
+
+/************************************************************/
+/****  Converter API  ***************************************/
+/************************************************************/
+
+template <typename T>
+bool ApplyForAll(ICNNNetwork &net, T action) {
+    auto all_layers = details::CNNNetSortTopologically(net);
+    bool sts = true;
+
+    for (auto &layer : all_layers)
+        sts &= action(layer, net);
+
+    return sts;
+}
+
+template <typename T, typename P>
+bool ApplyForAll_if(ICNNNetwork &net, T action, P pred) {
+    auto all_layers = details::CNNNetSortTopologically(net);
+    bool sts = true;
+
+    for (auto &layer : all_layers)
+        if (pred(layer))
+            sts &= action(layer, net);
+
+    return sts;
+}
+
+bool CombineRNNSeq(ICNNNetwork &net) {
+    return ApplyForAll(net, convertToRNNSeq);
+}
+
+bool UnrollTI(ICNNNetwork &net) {
+    return ApplyForAll(net, unrollTI);
+}
+
+bool UnrollRNN_if(ICNNNetwork &net, const std::function<bool(const RNNCellBase&)> pred) {
+    // Filter layers by RNN specific type
+    auto _seq_pred = [&] (CNNLayerPtr layer) {
+        auto rnn = std::dynamic_pointer_cast<RNNSequenceLayer>(layer);
+        if (!rnn) return false;
+        return pred(*rnn.get());
+    };
+    auto _cell_pred = [&] (CNNLayerPtr layer) {
+        auto rnn = std::dynamic_pointer_cast<RNNCellBase>(layer);
+        if (!rnn || !one_of(rnn->type, "LSTMCell", "GRUCell", "RNNCell")) return false;
+        return pred(*rnn.get());
+    };
+
+    bool res = true;
+    res &= ApplyForAll_if(net, unrollSeq, _seq_pred);
+    res &= ApplyForAll_if(net, unrollCell, _cell_pred);
+    return res;
+}
+
 }  // namespace NetPass
 }  // namespace InferenceEngine
 
diff --git a/inference-engine/src/inference_engine/net_pass.h b/inference-engine/src/inference_engine/net_pass.h
index 8b192864c..62e996fff 100644
--- a/inference-engine/src/inference_engine/net_pass.h
+++ b/inference-engine/src/inference_engine/net_pass.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -15,17 +15,31 @@ namespace NetPass {
 
 /**
  * Try to detect LSTM Sequence pattern inside TI and convert it
+ *
  * @param net network to modify
  * @return true if all Tensor iterator was converted
  */
-INFERENCE_ENGINE_API_CPP(bool) CombineLSTMSeq(const ICNNNetwork &net);
+INFERENCE_ENGINE_API_CPP(bool) CombineRNNSeq(ICNNNetwork &net);
 
 /**
  * Unroll all present Tensor Iterators
+ *
  * @param net network to modify
  * @return true if all Tensor iterator was unrolled successfully
  */
-INFERENCE_ENGINE_API_CPP(bool) UnrollTI(const ICNNNetwork &net);
+INFERENCE_ENGINE_API_CPP(bool) UnrollTI(ICNNNetwork &net);
+
+/**
+ * Unroll all RNN specific layers by predicate
+ *
+ * Will be applied to all RNNSeq and RNNCell layers
+ *
+ * @param net network to modify
+ * @param pred predicate to mark layer to unroll
+ * @return true if all RNN layers was unrolled successfully
+ */
+INFERENCE_ENGINE_API_CPP(bool) UnrollRNN_if(ICNNNetwork &net,
+        std::function<bool(const RNNCellBase&)> pred);
 
 }  // namespace NetPass
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/network_serializer.cpp b/inference-engine/src/inference_engine/network_serializer.cpp
index f530e355d..4ccf4a51f 100644
--- a/inference-engine/src/inference_engine/network_serializer.cpp
+++ b/inference-engine/src/inference_engine/network_serializer.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -10,6 +10,7 @@
 #include "details/ie_cnn_network_tools.h"
 #include "details/caseless.hpp"
 #include "network_serializer.h"
+#include "exec_graph_info.hpp"
 #include "xml_parse_utils.h"
 
 using namespace InferenceEngine;
@@ -38,22 +39,44 @@ void NetworkSerializer::serialize(
     const std::string &xmlPath,
     const std::string &binPath,
     const InferenceEngine::ICNNNetwork& network) {
+    const std::vector<CNNLayerPtr> ordered = CNNNetSortTopologically(network);
 
-    std::ofstream ofsBin(binPath, std::ofstream::out | std::ofstream::binary);
-    if (!ofsBin) {
-        THROW_IE_EXCEPTION << "File '" << binPath << "' is not opened as out file stream";
+    // A flag for serializing executable graph information (not complete IR)
+    bool execGraphInfoSerialization = false;
+    // If first layer has perfCounter parameter set then it's executable graph info serialization.
+    // All other layers must also have this parameter set.
+    if (ordered[0]->params.find(ExecGraphInfoSerialization::PERF_COUNTER) != ordered[0]->params.end()) {
+        execGraphInfoSerialization = true;
+        for (const auto &layer : ordered) {
+            if (layer->params.find(ExecGraphInfoSerialization::PERF_COUNTER) == layer->params.end()) {
+                THROW_IE_EXCEPTION << "Each node must have " << ExecGraphInfoSerialization::PERF_COUNTER
+                                   << " parameter set in case of executable graph info serialization";
+            }
+        }
+    }
+
+    bool dumpWeights = !execGraphInfoSerialization & !binPath.empty();
+    std::ofstream ofsBin;
+    if (dumpWeights) {
+        ofsBin.open(binPath, std::ofstream::out | std::ofstream::binary);
+        if (!ofsBin) {
+            THROW_IE_EXCEPTION << "File '" << binPath << "' is not opened as out file stream";
+        }
     }
 
     pugi::xml_document doc;
-    pugi::xml_node net = doc.append_child("net");
-    net.append_attribute("name").set_value(network.getName().c_str());
-    net.append_attribute("version").set_value("3");
-    net.append_attribute("batch").set_value(network.getBatchSize());
+    pugi::xml_node netXml = doc.append_child("net");
+    netXml.append_attribute("name").set_value(network.getName().c_str());
+
+    // no need to print this information for executable graph information serialization because it is not IR.
+    if (!execGraphInfoSerialization) {
+        netXml.append_attribute("version").set_value("3");
+        netXml.append_attribute("batch").set_value(network.getBatchSize());
+    }
 
-    pugi::xml_node layers = net.append_child("layers");
+    pugi::xml_node layers = netXml.append_child("layers");
 
-    const std::vector<CNNLayerPtr> ordered = CNNNetSortTopologically(network);
-    std::map<CNNLayer::Ptr, int> matching;
+    std::map<CNNLayer::Ptr, size_t> matching;
     for (size_t i = 0; i < ordered.size(); i++) {
         matching[ordered[i]] = i;
     }
@@ -70,18 +93,20 @@ void NetworkSerializer::serialize(
         layer.append_attribute("precision").set_value(precision.name());
         layer.append_attribute("id").set_value(i);
 
-        updateStdLayerParams(node);
+        if (!execGraphInfoSerialization) {
+            updateStdLayerParams(node);
+        }
 
         const auto &params = node->params;
-        if (params.size()) {
+        if (!params.empty()) {
             pugi::xml_node data = layer.append_child(dataName.c_str());
 
-            for (const auto it : params) {
+            for (const auto &it : params) {
                 data.append_attribute(it.first.c_str()).set_value(it.second.c_str());
             }
         }
 
-        if (node->insData.size()) {
+        if (!node->insData.empty()) {
             pugi::xml_node input = layer.append_child("input");
 
             for (size_t iport = 0; iport < node->insData.size(); iport++) {
@@ -95,7 +120,7 @@ void NetworkSerializer::serialize(
                 }
             }
         }
-        if (node->outData.size()) {
+        if (!node->outData.empty()) {
             pugi::xml_node input = layer.append_child("output");
             for (size_t oport = 0; oport < node->outData.size(); oport++) {
                 pugi::xml_node port = input.append_child("port");
@@ -107,9 +132,9 @@ void NetworkSerializer::serialize(
                 }
             }
         }
-        if (node->blobs.size()) {
+        if (dumpWeights && !node->blobs.empty()) {
             auto blobsNode = layer.append_child("blobs");
-            for (const auto dataIt : node->blobs) {
+            for (const auto &dataIt : node->blobs) {
                 const char *dataPtr = dataIt.second->buffer().as<char*>();
 
                 size_t dataSize = dataIt.second->byteSize();
@@ -126,31 +151,33 @@ void NetworkSerializer::serialize(
         }
     }
 
-    ofsBin.close();
-    if (!ofsBin.good()) {
-        THROW_IE_EXCEPTION << "Error during '" << binPath << "' closing";
+    if (dumpWeights) {
+        ofsBin.close();
+        if (!ofsBin.good()) {
+            THROW_IE_EXCEPTION << "Error during '" << binPath << "' closing";
+        }
     }
 
-    pugi::xml_node edges = net.append_child("edges");
+    pugi::xml_node edges = netXml.append_child("edges");
 
-    for (size_t i = 0; i < ordered.size(); i++) {
-        const CNNLayer::Ptr node = ordered[i];
+    for (const auto &ord : ordered) {
+        const CNNLayer::Ptr node = ord;
 
-        if (node->outData.size()) {
+        if (!node->outData.empty()) {
             auto itFrom = matching.find(node);
             if (itFrom == matching.end()) {
                 THROW_IE_EXCEPTION << "Internal error, cannot find " << node->name << " in matching container during serialization of IR";
             }
             for (size_t oport = 0; oport < node->outData.size(); oport++) {
                 const DataPtr outData = node->outData[oport];
-                for (auto inputTo : outData->inputTo) {
+                for (const auto &inputTo : outData->inputTo) {
                     auto itTo = matching.find(inputTo.second);
                     if (itTo == matching.end()) {
                         THROW_IE_EXCEPTION << "Broken edge form layer " << node->name << " to layer "  << inputTo.first<< "during serialization of IR";
                     }
 
-                    size_t foundPort = -1;
-                    for (size_t iport = 0; iport < inputTo.second->insData.size(); iport++) {
+                    int foundPort = -1;
+                    for (int iport = 0; iport < inputTo.second->insData.size(); iport++) {
                         if (inputTo.second->insData[iport].lock() == outData) {
                             foundPort = iport;
                         }
@@ -171,63 +198,10 @@ void NetworkSerializer::serialize(
         }
     }
 
-
-    InputsDataMap inputInfo;
-    network.getInputsInfo(inputInfo);
-
-    // assuming that we have preprocess only for one input
-    for (auto ii : inputInfo) {
-        const PreProcessInfo& pp = ii.second->getPreProcess();
-        size_t  nInChannels = pp.getNumberOfChannels();
-        if (nInChannels) {
-            pugi::xml_node preproc = net.append_child("pre-process");
-
-            preproc.append_attribute("reference-layer-name").set_value(ii.first.c_str());
-            preproc.append_attribute("mean-precision").set_value(Precision(Precision::FP32).name());
-
-            for (size_t ch = 0; ch < nInChannels; ch++) {
-                const PreProcessChannel::Ptr &preProcessChannel = pp[ch];
-                auto channel = preproc.append_child("channel");
-                channel.append_attribute("id").set_value(ch);
-
-                auto mean = channel.append_child("mean");
-
-                if (!preProcessChannel->meanData) {
-                    mean.append_attribute("value").set_value(preProcessChannel->meanValue);
-                } else {
-                    THROW_IE_EXCEPTION << "Mean data is not supported yet for serialization of the model";
-                }
-            }
-        }
-    }
-
-
-    // adding statistic to the file if statistic exists
-    ICNNNetworkStats* netNodesStats = nullptr;
-    auto stats = net.append_child("statistics");
-    network.getStats(&netNodesStats, nullptr);
-    const NetworkStatsMap statsmap =  netNodesStats->getNodesStats();
-
-    auto joinCommas = [&](const std::vector<float>& v) -> std::string {
-        std::string res;
-
-        for (size_t i = 0; i < v.size(); ++i) {
-            res += std::to_string(v[i]);
-            if (i < v.size() - 1) {
-                res += ", ";
-            }
-        }
-
-        return res;
-    };
-
-    for (const auto itStats : statsmap) {
-        auto layer = stats.append_child("layer");
-
-        layer.append_child("name").text().set(itStats.first.c_str());
-
-        layer.append_child("min").text().set(joinCommas(itStats.second->_minOutputs).c_str());
-        layer.append_child("max").text().set(joinCommas(itStats.second->_maxOutputs).c_str());
+    // no need to print this info in case of executable graph info serialization
+    if (!execGraphInfoSerialization) {
+        updatePreProcInfo(network, netXml);
+        updateStatisticsInfo(network, netXml);
     }
 
     if (!doc.save_file(xmlPath.c_str())) {
@@ -235,20 +209,19 @@ void NetworkSerializer::serialize(
     }
 }
 
-
-void NetworkSerializer::updateStdLayerParams(const CNNLayer::Ptr layer) {
+void NetworkSerializer::updateStdLayerParams(const CNNLayer::Ptr &layer) {
     auto layerPtr = layer.get();
     auto &params = layer->params;
 
     if (CaselessEq<std::string>()(layer->type, "power")) {
-        PowerLayer *lr = dynamic_cast<PowerLayer *>(layerPtr);
+        auto *lr = dynamic_cast<PowerLayer *>(layerPtr);
 
         params["scale"] = std::to_string(lr->scale);
         params["shift"] = std::to_string(lr->offset);
         params["power"] = std::to_string(lr->power);
     } else if (CaselessEq<std::string>()(layer->type, "convolution") ||
-        CaselessEq<std::string>()(layer->type, "deconvolution")) {
-        ConvolutionLayer *lr = dynamic_cast<ConvolutionLayer *>(layerPtr);
+               CaselessEq<std::string>()(layer->type, "deconvolution")) {
+        auto *lr = dynamic_cast<ConvolutionLayer *>(layerPtr);
 
         params["kernel"] = arrayRevertToIRProperty(lr->_kernel);
         params["pads_begin"] = arrayRevertToIRProperty(lr->_padding);
@@ -258,20 +231,20 @@ void NetworkSerializer::updateStdLayerParams(const CNNLayer::Ptr layer) {
         params["output"] = std::to_string(lr->_out_depth);
         params["group"] = std::to_string(lr->_group);
     } else if (CaselessEq<std::string>()(layer->type, "relu")) {
-        ReLULayer *lr = dynamic_cast<ReLULayer *>(layerPtr);
+        auto *lr = dynamic_cast<ReLULayer *>(layerPtr);
         if (lr->negative_slope != 0.0f) {
             params["negative_slope"] = std::to_string(lr->negative_slope);
         }
     } else if (CaselessEq<std::string>()(layer->type, "norm") ||
-        CaselessEq<std::string>()(layer->type, "lrn")) {
-        NormLayer *lr = dynamic_cast<NormLayer *>(layerPtr);
+               CaselessEq<std::string>()(layer->type, "lrn")) {
+        auto *lr = dynamic_cast<NormLayer *>(layerPtr);
 
         params["alpha"] = std::to_string(lr->_alpha);
         params["beta"] = std::to_string(lr->_beta);
         params["local-size"] = std::to_string(lr->_size);
         params["region"] = lr->_isAcrossMaps ? "across" : "same";
     } else if (CaselessEq<std::string>()(layer->type, "pooling")) {
-        PoolingLayer *lr = dynamic_cast<PoolingLayer *>(layerPtr);
+        auto *lr = dynamic_cast<PoolingLayer *>(layerPtr);
 
         params["kernel"] = arrayRevertToIRProperty(lr->_kernel);
         params["pads_begin"] = arrayRevertToIRProperty(lr->_padding);
@@ -279,85 +252,85 @@ void NetworkSerializer::updateStdLayerParams(const CNNLayer::Ptr layer) {
         params["strides"] = arrayRevertToIRProperty(lr->_stride);
 
         switch (lr->_type) {
-        case PoolingLayer::MAX:
-            params["pool-method"] = "max";
-            break;
-        case PoolingLayer::AVG:
-            params["pool-method"] = "avg";
-            break;
-
-        default:
-            THROW_IE_EXCEPTION << "Found unsupported pooling method: " << lr->_type;
+            case PoolingLayer::MAX:
+                params["pool-method"] = "max";
+                break;
+            case PoolingLayer::AVG:
+                params["pool-method"] = "avg";
+                break;
+
+            default:
+                THROW_IE_EXCEPTION << "Found unsupported pooling method: " << lr->_type;
         }
     } else if (CaselessEq<std::string>()(layer->type, "split")) {
-        SplitLayer *lr = dynamic_cast<SplitLayer *>(layerPtr);
+        auto *lr = dynamic_cast<SplitLayer *>(layerPtr);
         params["axis"] = std::to_string(lr->_axis);
     } else if (CaselessEq<std::string>()(layer->type, "concat")) {
-        ConcatLayer *lr = dynamic_cast<ConcatLayer *>(layerPtr);
+        auto *lr = dynamic_cast<ConcatLayer *>(layerPtr);
         params["axis"] = std::to_string(lr->_axis);
     } else if (CaselessEq<std::string>()(layer->type, "FullyConnected") ||
-        CaselessEq<std::string>()(layer->type, "InnerProduct")) {
-        FullyConnectedLayer *lr = dynamic_cast<FullyConnectedLayer *>(layerPtr);
+               CaselessEq<std::string>()(layer->type, "InnerProduct")) {
+        auto *lr = dynamic_cast<FullyConnectedLayer *>(layerPtr);
         params["out-size"] = std::to_string(lr->_out_num);
     } else if (CaselessEq<std::string>()(layer->type, "softmax")) {
-        SoftMaxLayer *lr = dynamic_cast<SoftMaxLayer *>(layerPtr);
+        auto *lr = dynamic_cast<SoftMaxLayer *>(layerPtr);
         params["axis"] = std::to_string(lr->axis);
     } else if (CaselessEq<std::string>()(layer->type, "reshape")) {
         // need to add here support of flatten layer if it is created from API
-        ReshapeLayer *lr = dynamic_cast<ReshapeLayer *>(layerPtr);
+        auto *lr = dynamic_cast<ReshapeLayer *>(layerPtr);
         params["dim"] = arrayToIRProperty(lr->shape);
     } else if (CaselessEq<std::string>()(layer->type, "Eltwise")) {
-        EltwiseLayer *lr = dynamic_cast<EltwiseLayer *>(layerPtr);
+        auto *lr = dynamic_cast<EltwiseLayer *>(layerPtr);
 
         std::string op;
 
         switch (lr->_operation) {
-        case EltwiseLayer::Sum:
-            op = "sum";
-            break;
-        case EltwiseLayer::Prod:
-            op = "prod";
-            break;
-        case EltwiseLayer::Max:
-            op = "max";
-            break;
-        default:
-            break;
+            case EltwiseLayer::Sum:
+                op = "sum";
+                break;
+            case EltwiseLayer::Prod:
+                op = "prod";
+                break;
+            case EltwiseLayer::Max:
+                op = "max";
+                break;
+            default:
+                break;
         }
 
         params["operation"] = op;
     } else if (CaselessEq<std::string>()(layer->type, "scaleshift")) {
-        ScaleShiftLayer *lr = dynamic_cast<ScaleShiftLayer *>(layerPtr);
+        auto *lr = dynamic_cast<ScaleShiftLayer *>(layerPtr);
         params["broadcast"] = std::to_string(lr->_broadcast);
     } else if (CaselessEq<std::string>()(layer->type, "crop")) {
-        CropLayer *lr = dynamic_cast<CropLayer *>(layerPtr);
+        auto *lr = dynamic_cast<CropLayer *>(layerPtr);
         params["axis"] = arrayToIRProperty(lr->axis);
         params["offset"] = arrayToIRProperty(lr->offset);
         params["dim"] = arrayToIRProperty(lr->dim);
     } else if (CaselessEq<std::string>()(layer->type, "tile")) {
-        TileLayer *lr = dynamic_cast<TileLayer *>(layerPtr);
+        auto *lr = dynamic_cast<TileLayer *>(layerPtr);
         params["axis"] = std::to_string(lr->axis);
         params["tiles"] = std::to_string(lr->tiles);
     } else if (CaselessEq<std::string>()(layer->type, "prelu")) {
-        PReLULayer *lr = dynamic_cast<PReLULayer *>(layerPtr);
+        auto *lr = dynamic_cast<PReLULayer *>(layerPtr);
         params["channel_shared"] = std::to_string(lr->_channel_shared);
     } else if (CaselessEq<std::string>()(layer->type, "clamp")) {
-        ClampLayer *lr = dynamic_cast<ClampLayer *>(layerPtr);
+        auto *lr = dynamic_cast<ClampLayer *>(layerPtr);
         params["min"] = std::to_string(lr->min_value);
         params["max"] = std::to_string(lr->max_value);
     } else if (CaselessEq<std::string>()(layer->type, "BatchNormalization")) {
-        BatchNormalizationLayer *lr = dynamic_cast<BatchNormalizationLayer *>(layerPtr);
+        auto *lr = dynamic_cast<BatchNormalizationLayer *>(layerPtr);
         params["epsilon"] = std::to_string(lr->epsilon);
     } else if (CaselessEq<std::string>()(layer->type, "grn")) {
-        GRNLayer *lr = dynamic_cast<GRNLayer *>(layerPtr);
+        auto *lr = dynamic_cast<GRNLayer *>(layerPtr);
         params["bias"] = std::to_string(lr->bias);
     } else if (CaselessEq<std::string>()(layer->type, "mvn")) {
-        MVNLayer *lr = dynamic_cast<MVNLayer *>(layerPtr);
+        auto *lr = dynamic_cast<MVNLayer *>(layerPtr);
         params["across_channels"] = std::to_string(lr->across_channels);
         params["normalize_variance"] = std::to_string(lr->normalize);
     } else if (CaselessEq<std::string>()(layer->type, "rnn") ||
-        CaselessEq<std::string>()(layer->type, "TensorIterator") ||
-        CaselessEq<std::string>()(layer->type, "LSTMCell")) {
+               CaselessEq<std::string>()(layer->type, "TensorIterator") ||
+               CaselessEq<std::string>()(layer->type, "LSTMCell")) {
         THROW_IE_EXCEPTION << "Not covered layers for writing to IR";
     }
 
@@ -365,9 +338,8 @@ void NetworkSerializer::updateStdLayerParams(const CNNLayer::Ptr layer) {
         params["quantization_level"] = layer->params["quantization_level"];
     }
 
-
     // update of weightable layers
-    WeightableLayer *pwlayer = dynamic_cast<WeightableLayer *>(layerPtr);
+    auto *pwlayer = dynamic_cast<WeightableLayer *>(layerPtr);
     if (pwlayer) {
         if (pwlayer->_weights) {
             pwlayer->blobs["weights"] = pwlayer->_weights;
@@ -377,3 +349,64 @@ void NetworkSerializer::updateStdLayerParams(const CNNLayer::Ptr layer) {
         }
     }
 }
+
+void NetworkSerializer::updatePreProcInfo(const InferenceEngine::ICNNNetwork& network, pugi::xml_node &netXml) {
+    InputsDataMap inputInfo;
+    network.getInputsInfo(inputInfo);
+
+    // Assume that you preprocess only one input
+    for (auto ii : inputInfo) {
+        const PreProcessInfo &pp = ii.second->getPreProcess();
+        size_t nInChannels = pp.getNumberOfChannels();
+        if (nInChannels) {
+            pugi::xml_node preproc = netXml.append_child("pre-process");
+
+            preproc.append_attribute("reference-layer-name").set_value(ii.first.c_str());
+            preproc.append_attribute("mean-precision").set_value(Precision(Precision::FP32).name());
+
+            for (size_t ch = 0; ch < nInChannels; ch++) {
+                const PreProcessChannel::Ptr &preProcessChannel = pp[ch];
+                auto channel = preproc.append_child("channel");
+                channel.append_attribute("id").set_value(ch);
+
+                auto mean = channel.append_child("mean");
+
+                if (!preProcessChannel->meanData) {
+                    mean.append_attribute("value").set_value(preProcessChannel->meanValue);
+                } else {
+                    THROW_IE_EXCEPTION << "Mean data is not supported yet for serialization of the model";
+                }
+            }
+        }
+    }
+}
+
+void NetworkSerializer::updateStatisticsInfo(const InferenceEngine::ICNNNetwork& network, pugi::xml_node &netXml) {
+    // If statistics exists, add it to the file
+    ICNNNetworkStats *netNodesStats = nullptr;
+    auto stats = netXml.append_child("statistics");
+    network.getStats(&netNodesStats, nullptr);
+    const NetworkStatsMap statsmap = netNodesStats->getNodesStats();
+
+    auto joinCommas = [&](const std::vector<float> &v) -> std::string {
+        std::string res;
+
+        for (size_t i = 0; i < v.size(); ++i) {
+            res += std::to_string(v[i]);
+            if (i < v.size() - 1) {
+                res += ", ";
+            }
+        }
+
+        return res;
+    };
+
+    for (const auto &itStats : statsmap) {
+        auto layer = stats.append_child("layer");
+
+        layer.append_child("name").text().set(itStats.first.c_str());
+
+        layer.append_child("min").text().set(joinCommas(itStats.second->_minOutputs).c_str());
+        layer.append_child("max").text().set(joinCommas(itStats.second->_maxOutputs).c_str());
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/src/inference_engine/network_serializer.h b/inference-engine/src/inference_engine/network_serializer.h
index a67f4f42a..e39ebc047 100644
--- a/inference-engine/src/inference_engine/network_serializer.h
+++ b/inference-engine/src/inference_engine/network_serializer.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -6,6 +6,8 @@
 
 #include <string>
 
+#include "xml_parse_utils.h"
+
 namespace InferenceEngine {
 namespace details {
 
@@ -17,7 +19,9 @@ public:
     static void serialize(const std::string &xmlPath, const std::string &binPath, const InferenceEngine::ICNNNetwork& network);
 
 private:
-    static void updateStdLayerParams(InferenceEngine::CNNLayer::Ptr layer);
+    static void updateStdLayerParams(const InferenceEngine::CNNLayer::Ptr &layer);
+    static void updatePreProcInfo(const InferenceEngine::ICNNNetwork& network, pugi::xml_node &netXml);
+    static void updateStatisticsInfo(const InferenceEngine::ICNNNetwork& network, pugi::xml_node &netXml);
 };
 
 }  // namespace details
diff --git a/inference-engine/src/inference_engine/parsers.h b/inference-engine/src/inference_engine/parsers.h
index acfe77659..0d830992c 100644
--- a/inference-engine/src/inference_engine/parsers.h
+++ b/inference-engine/src/inference_engine/parsers.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/precision_utils.cpp b/inference-engine/src/inference_engine/precision_utils.cpp
index 99886930d..b1d43ece6 100644
--- a/inference-engine/src/inference_engine/precision_utils.cpp
+++ b/inference-engine/src/inference_engine/precision_utils.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -41,13 +41,20 @@ INFERENCE_ENGINE_API_CPP(void) f32tof16Arrays(short *dst,
 
 // small helper function to represent uint32_t value as float32
 inline float asfloat(uint32_t v) {
-    return *reinterpret_cast<float *>(&v);
+    // Both type-punning casts and unions are UB per C++ spec
+    // But compilers usually only break code with casts
+    union {
+        float f;
+        uint32_t i;
+    };
+    i = v;
+    return f;
 }
 
 // Function to convert F32 into F16
 INFERENCE_ENGINE_API_CPP(float) f16tof32(ie_fp16 x) {
     // this is storage for output result
-    uint32_t u = x;
+    uint32_t u = static_cast<uint32_t>(x);
 
     // get sign in 32bit format
     uint32_t s = ((u & 0x8000) << 16);
@@ -65,8 +72,23 @@ INFERENCE_ENGINE_API_CPP(float) f16tof32(ie_fp16 x) {
         u <<= (23 - 10);
         u |= EXP_MASK_F32;
         u |= s;
-    } else if ((x & EXP_MASK_F16) == 0) {  // check for zero and denormals. both are converted to zero
-        u = s;
+    } else if ((u & EXP_MASK_F16) == 0) {  // check for zero and denormals.
+        uint16_t h_sig = (u & 0x03ffu);
+        if (h_sig == 0) {
+            /* Signed zero */
+            u = s;
+        } else {
+            /* Subnormal */
+            uint16_t h_exp = (u & EXP_MASK_F16);
+            h_sig <<= 1;
+            while ((h_sig & 0x0400u) == 0) {
+                h_sig <<= 1;
+                h_exp++;
+            }
+            uint32_t f_exp = (static_cast<uint32_t>(127 - 15 - h_exp)) << 23;
+            uint32_t f_sig = (static_cast<uint32_t>(h_sig & 0x03ffu)) << 13;
+            u = s + f_exp + f_sig;
+        }
     } else {
         // abs
         u = (u & 0x7FFF);
@@ -82,7 +104,7 @@ INFERENCE_ENGINE_API_CPP(float) f16tof32(ie_fp16 x) {
     }
 
     // finaly represent result as float and return
-    return *reinterpret_cast<float *>(&u);
+    return asfloat(u);
 }
 
 // This function convert f32 to f16 with rounding to nearest value to minimize error
diff --git a/inference-engine/src/inference_engine/precision_utils.h b/inference-engine/src/inference_engine/precision_utils.h
index 3b824f231..be1c935d1 100644
--- a/inference-engine/src/inference_engine/precision_utils.h
+++ b/inference-engine/src/inference_engine/precision_utils.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/range_iterator.hpp b/inference-engine/src/inference_engine/range_iterator.hpp
index 423bd81b3..cf4578f5b 100644
--- a/inference-engine/src/inference_engine/range_iterator.hpp
+++ b/inference-engine/src/inference_engine/range_iterator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_argmax_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_argmax_shape_infer.hpp
index 96a91fb00..8605a8823 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_argmax_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_argmax_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class ArgMaxShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit ArgMaxShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,7 +30,7 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        validate(&cnnLayer, inBlobs, params, blobs);
         auto out_max_val = static_cast<size_t>(cnnLayer.GetParamAsInt("out_max_val", 0));
         auto top_k = static_cast<size_t>(cnnLayer.GetParamAsInt("top_k", 0));
         int axis = 0;
@@ -45,7 +45,7 @@ public:
         size_t num_top_axes = firstInputShape.size();
         if (num_top_axes < 3) num_top_axes = 3;
 
-        SizeVector outputShape(num_top_axes, 1);
+        SizeVector outputShape(num_top_axes, 1lu);
         if (isValidAxis) {
             if (axis < 0) {
                 axis = static_cast<int>(firstInputShape.size() + axis);
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_bin_conv_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_bin_conv_shape_infer.hpp
new file mode 100644
index 000000000..2fd99ef52
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_bin_conv_shape_infer.hpp
@@ -0,0 +1,80 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <description_buffer.hpp>
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <debug.h>
+#include <cmath>
+#include <ie_format_parser.h>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for BinaryConvolution layer
+ */
+class BinConvShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit BinConvShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        BinaryConvolutionLayer binConvLayer(lp);
+        binConvLayer.params = params;
+        binConvLayer.type = _type;
+        validate(&binConvLayer, inBlobs, params, blobs);
+
+        auto dims = inShapes[0];
+        auto computeSpatialShape = [&](size_t inDim, int axis) {
+            size_t kernel = 0;
+            if (binConvLayer._dilation[axis])
+                kernel = (binConvLayer._kernel[axis] - 1) * binConvLayer._dilation[axis] + 1;
+            else
+                kernel = binConvLayer._kernel[axis];
+            size_t stride = binConvLayer._stride[axis];
+            size_t pad = binConvLayer._padding[axis];
+
+            float outDim;
+            std::string padType = binConvLayer._auto_pad;
+            if (padType == "valid") {
+                outDim = std::ceil((inDim - kernel + 1.f) / stride);
+            } else if (padType == "same_upper") {
+                outDim = std::ceil(1.f * inDim / stride);
+            } else if (padType == "same_lower") {
+                outDim = std::floor(1.f * inDim / stride);
+            } else {
+                int padEnd = binConvLayer._pads_end[axis];
+                outDim = std::floor(1.f * (inDim + pad + padEnd - kernel) / stride) + 1.f;
+            }
+
+            if (outDim < 0)
+                THROW_IE_EXCEPTION << "New shapes " << details::dumpVec(dims) << " make output shape negative";
+
+            return static_cast<size_t>(outDim);
+        };
+
+        size_t inputN = dims[0];
+        size_t OC = binConvLayer._out_depth;
+        SizeVector shapes;
+        shapes.push_back(inputN);
+        shapes.push_back(OC);
+        if (dims.size() == 5)
+            shapes.push_back(computeSpatialShape(dims[dims.size() - 3], Z_AXIS));
+        shapes.push_back(computeSpatialShape(dims[dims.size() - 2], Y_AXIS));
+        shapes.push_back(computeSpatialShape(dims[dims.size() - 1], X_AXIS));
+        outShapes.push_back(shapes);
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.cpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.cpp
index 0781df144..bbad1781a 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.cpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -27,7 +27,6 @@
 #include "ie_detection_output_shape_infer.hpp"
 #include "ie_priorbox_clustered_shape_infer.hpp"
 #include "ie_ctc_greedy_decoder_shape_infer.hpp"
-#include "ie_spatial_transformer_shape_infer.hpp"
 #include "ie_inner_product_shape_infer.hpp"
 #include "ie_resample_shape_infer.hpp"
 #include "ie_interp_shape_infer.hpp"
@@ -35,6 +34,22 @@
 #include "ie_gemm_shape_infer.hpp"
 #include "ie_pad_shape_infer.hpp"
 #include "ie_gather_shape_infer.hpp"
+#include "ie_strided_slice_shape_infer.hpp"
+#include "ie_shuffle_channels_shape_infer.hpp"
+#include "ie_depth_to_space_shape_infer.hpp"
+#include "ie_space_to_depth_shape_infer.hpp"
+#include "ie_reverse_sequence_shape_infer.hpp"
+#include "ie_shape_shape_infer.hpp"
+#include "ie_squeeze_shape_infer.hpp"
+#include "ie_unsqueeze_shape_infer.hpp"
+#include "ie_range_shape_infer.hpp"
+#include "ie_fill_shape_infer.hpp"
+#include "ie_expand_shape_infer.hpp"
+#include "ie_rnn_shape_infer.hpp"
+#include "ie_tensor_iterator_shape_infer.hpp"
+#include "ie_rnn_cell_shape_infer.hpp"
+#include "ie_quantize_shape_infer.hpp"
+#include "ie_bin_conv_shape_infer.hpp"
 #include <algorithm>
 #include <memory>
 #include <string>
@@ -132,14 +147,37 @@ REG_SHAPE_INFER_FOR_TYPE(TileShapeProp, Tile);
 REG_SHAPE_INFER_FOR_TYPE(CropShapeProp, Crop);
 REG_SHAPE_INFER_FOR_TYPE(ConcatShapeProp, Concat);
 REG_SHAPE_INFER_FOR_TYPE(EltWiseShapeProp, Eltwise);
+REG_SHAPE_INFER_FOR_TYPE(EltWiseShapeProp, Mul);
+REG_SHAPE_INFER_FOR_TYPE(EltWiseShapeProp, Add);
+REG_SHAPE_INFER_FOR_TYPE(EltWiseShapeProp, Div);
 REG_SHAPE_INFER_FOR_TYPE(CTCGreedyDecoderShapeProp, CTCGreedyDecoder);
 REG_SHAPE_INFER_FOR_TYPE(ProposalShapeProp, Proposal);
 REG_SHAPE_INFER_FOR_TYPE(ReorgYoloShapeProp, ReorgYolo);
 REG_SHAPE_INFER_FOR_TYPE(RegionYoloShapeProp, RegionYolo);
+REG_SHAPE_INFER_FOR_TYPE(RNNShapeProp, RNNSequence);
+REG_SHAPE_INFER_FOR_TYPE(RNNShapeProp, GRUSequence);
+REG_SHAPE_INFER_FOR_TYPE(RNNShapeProp, LSTMSequence);
+REG_SHAPE_INFER_FOR_TYPE(RNNCellShapeProp, RNNCell);
+REG_SHAPE_INFER_FOR_TYPE(GRUCellShapeProp, GRUCell);
+REG_SHAPE_INFER_FOR_TYPE(LSTMCellShapeProp, LSTMCell);
+REG_SHAPE_INFER_FOR_TYPE(TensorIteratorShapeProp, TensorIterator);
 REG_SHAPE_INFER_FOR_TYPE(ArgMaxShapeProp, ArgMax);
 REG_SHAPE_INFER_FOR_TYPE(GemmShapeProp, Gemm);
 REG_SHAPE_INFER_FOR_TYPE(PadShapeProp, Pad);
 REG_SHAPE_INFER_FOR_TYPE(GatherShapeProp, Gather);
+REG_SHAPE_INFER_FOR_TYPE(StridedSliceShapeProp, StridedSlice);
+REG_SHAPE_INFER_FOR_TYPE(ShuffleChannelsShapeProp, ShuffleChannels);
+REG_SHAPE_INFER_FOR_TYPE(DepthToSpaceShapeProp, DepthToSpace);
+REG_SHAPE_INFER_FOR_TYPE(SpaceToDepthShapeProp, SpaceToDepth);
+REG_SHAPE_INFER_FOR_TYPE(ReverseSequenceShapeProp, ReverseSequence);
+REG_SHAPE_INFER_FOR_TYPE(SqueezeShapeProp, Squeeze);
+REG_SHAPE_INFER_FOR_TYPE(UnsqueezeShapeProp, Unsqueeze);
+REG_SHAPE_INFER_FOR_TYPE(RangeShapeProp, Range);
+REG_SHAPE_INFER_FOR_TYPE(FillShapeProp, Fill);
+REG_SHAPE_INFER_FOR_TYPE(ExpandShapeProp, Expand);
+REG_SHAPE_INFER_FOR_TYPE(ShapeShapeProp, Shape);
+REG_SHAPE_INFER_FOR_TYPE(QuantizeShapeProp, Quantize);
+REG_SHAPE_INFER_FOR_TYPE(BinConvShapeProp, BinaryConvolution);
 
 }  // namespace ShapeInfer
 }  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.hpp
index 3cb06100e..84b351041 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_holder.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_impl.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_impl.hpp
index 91896738d..39a6b820e 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_impl.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_built_in_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -30,7 +30,7 @@ public:
             THROW_IE_EXCEPTION << "Internal error: failed to find validator for layer with type: " << _type;
     }
 
-    void validate(CNNLayer* layer, const std::vector<SizeVector>& inShapes,
+    void validate(CNNLayer* layer, const std::vector<Blob::CPtr>& inBlobs,
                   const std::map<std::string, std::string>& params,
                   const std::map<std::string, Blob::Ptr>& blobs) {
         _validator->parseParams(layer);
@@ -39,7 +39,7 @@ public:
         _validator->checkCorrespondence(layer, blobs, inShapes);
     }
 
-    virtual void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    virtual void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                                  const std::map<std::string, std::string>& params,
                                  const std::map<std::string, Blob::Ptr>& blobs,
                                  std::vector<SizeVector>& outShapes) = 0;
@@ -49,21 +49,34 @@ public:
                            const std::map<std::string, Blob::Ptr>& blobs,
                            std::vector<SizeVector>& outShapes,
                            ResponseDesc* resp) noexcept override {
+        return DescriptionBuffer(GENERAL_ERROR, resp)
+                << "Unexpected call of deprecated Shape Infer function with input shapes";
+    }
+
+    StatusCode inferShapes(const std::vector<Blob::CPtr>& inBlobs,
+                           const std::map<std::string, std::string>& params,
+                           const std::map<std::string, Blob::Ptr>& blobs,
+                           std::vector<SizeVector>& outShapes,
+                           ResponseDesc* resp) noexcept override {
+        inShapes.clear();
+        for (const auto& blob : inBlobs) {
+            inShapes.push_back(blob->getTensorDesc().getDims());
+        }
         outShapes.clear();
-        std::string errorPrefix = "Failed to infer shapes for " + _type + " layer with error: ";
         try {
-            inferShapesImpl(inShapes, params, blobs, outShapes);
+            inferShapesImpl(inBlobs, params, blobs, outShapes);
             return OK;
         } catch (const std::exception& ex) {
-            return InferenceEngine::DescriptionBuffer(GENERAL_ERROR, resp) << errorPrefix + ex.what();
+            return InferenceEngine::DescriptionBuffer(GENERAL_ERROR, resp) << ex.what();
         } catch (...) {
-            return InferenceEngine::DescriptionBuffer(UNEXPECTED) << errorPrefix + " unknown";
+            return InferenceEngine::DescriptionBuffer(UNEXPECTED) << "Unknown error";
         }
     }
 
 protected:
     std::string _type;
     details::LayerValidator::Ptr _validator;
+    std::vector<SizeVector> inShapes;
 };
 
 }  // namespace ShapeInfer
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_concat_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_concat_shape_infer.hpp
index 8d183ea05..0e3688b10 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_concat_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_concat_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -20,7 +20,7 @@ class ConcatShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit ConcatShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -28,7 +28,7 @@ public:
         ConcatLayer concatLayer(lp);
         concatLayer.params = params;
         concatLayer.type = _type;
-        validate(&concatLayer, inShapes, params, blobs);
+        validate(&concatLayer, inBlobs, params, blobs);
 
         size_t sum(0);
         size_t axis = concatLayer._axis;
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_conv_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_conv_shape_infer.hpp
index 7c1751fb5..a42f81d37 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_conv_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_conv_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -24,7 +24,7 @@ class ConvShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit ConvShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -32,49 +32,51 @@ public:
         ConvolutionLayer convLayer(lp);
         convLayer.params = params;
         convLayer.type = _type;
-        validate(&convLayer, inShapes, params, blobs);
+        validate(&convLayer, inBlobs, params, blobs);
 
-        float OH_temp, OW_temp;
         auto dims = inShapes[0];
+        auto dims_size = dims.size();
+        auto spacial_d_size = dims.size() - 2;
+        float* OD_temp = new float[spacial_d_size];
+        size_t*  KDims = new size_t[spacial_d_size];
         size_t inputN = dims[0];
-        size_t IH = dims[2];
-        size_t IW = dims[3];
-        size_t KH = 0, KW = 0;
-        int PR = -1, PB = -1;
-        if (convLayer._dilation[Y_AXIS])
-            KH = (convLayer._kernel[Y_AXIS] - 1) * convLayer._dilation[Y_AXIS] + 1;
-        else
-            KH = convLayer._kernel[Y_AXIS];
-        if (convLayer._dilation[X_AXIS])
-            KW = (convLayer._kernel[X_AXIS] - 1) * convLayer._dilation[X_AXIS] + 1;
-        else
-            KW = convLayer._kernel[X_AXIS];
-        size_t SH = convLayer._stride[Y_AXIS];
-        size_t SW = convLayer._stride[X_AXIS];
-        size_t PH = convLayer._padding[Y_AXIS];
-        size_t PW = convLayer._padding[X_AXIS];
+        for (int i = 0; i < spacial_d_size; i++) {
+            if (convLayer._dilation[i])
+                KDims[i] = (convLayer._kernel[i] - 1) * convLayer._dilation[i] + 1;
+            else
+                KDims[i] = convLayer._kernel[i];
+        }
         size_t OC = convLayer._out_depth;
         std::string padType = convLayer._auto_pad;
         if (padType == "valid") {
-            OH_temp = std::ceil((IH - KH + 1.f) / SH);
-            OW_temp = std::ceil((IW - KW + 1.f) / SW);
+            for (int i = 0; i < spacial_d_size; i++)
+                OD_temp[i] = std::ceil((dims[dims_size - 1 - i] - KDims[i] + 1.f) / convLayer._stride[i]);
         } else if (padType == "same_upper") {
-            OH_temp = std::ceil(1.f * IH / SH);
-            OW_temp = std::ceil(1.f * IW / SW);
+            for (int i = 0; i < spacial_d_size; i++)
+                OD_temp[i] = std::ceil(1.f * dims[dims_size - 1 - i] / convLayer._stride[i]);
         } else if (padType == "same_lower") {
-            OH_temp = std::floor(1.f * IH / SH);
-            OW_temp = std::floor(1.f * IW / SW);
+            for (int i = 0; i < spacial_d_size; i++)
+                OD_temp[i] = std::floor(1.f * dims[dims_size - 1 - i] / convLayer._stride[i]);
         } else {
-            PR = convLayer._pads_end[X_AXIS];
-            PB = convLayer._pads_end[Y_AXIS];
-            OH_temp = std::floor(1.f * (IH + PH + PB - KH) / SH) + 1.f;
-            OW_temp = std::floor(1.f * (IW + PW + PR - KW) / SW) + 1.f;
+            for (int i = 0; i < spacial_d_size; i++) {
+                OD_temp[i] = std::floor(1.f * (dims[dims_size - 1 - i] +
+                        convLayer._padding[i] + convLayer._pads_end[i] - KDims[i]) /
+                        convLayer._stride[i]) + 1.f;
+            }
         }
-        if (OH_temp < 0 || OW_temp < 0)
-            THROW_IE_EXCEPTION << "New shapes " << details::dumpVec(dims) << " make output shape negative";
-        size_t OH = static_cast<size_t>(OH_temp);
-        size_t OW = static_cast<size_t>(OW_temp);
-        outShapes.push_back({inputN, OC, OH, OW});
+
+        for (int i = 0; i < spacial_d_size; i++)
+            if (OD_temp[i] < 0)
+                THROW_IE_EXCEPTION << "New shapes " << details::dumpVec(dims) << " make output shape negative";
+
+        SizeVector outShape = {inputN, OC};
+        for (int i = spacial_d_size - 1; i >= 0; i--)
+            outShape.push_back(static_cast<size_t>(OD_temp[i]));
+
+        outShapes.push_back(outShape);
+
+        delete[] OD_temp;
+        delete[] KDims;
     }
 };
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_crop_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_crop_shape_infer.hpp
index 91b72f239..b0bfa2f88 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_crop_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_crop_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -20,7 +20,7 @@ class CropShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit CropShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -28,7 +28,7 @@ public:
         CropLayer cropLayer(lp);
         cropLayer.params = params;
         cropLayer.type = _type;
-        validate(&cropLayer, inShapes, params, blobs);
+        validate(&cropLayer, inBlobs, params, blobs);
 
         outShapes.push_back(inShapes[0]);
         if (inShapes.size() == 2) {
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_ctc_greedy_decoder_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_ctc_greedy_decoder_shape_infer.hpp
index 29625ff6c..c18a597ec 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_ctc_greedy_decoder_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_ctc_greedy_decoder_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -20,15 +20,16 @@ class CTCGreedyDecoderShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit CTCGreedyDecoderShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
         outShapes.clear();
         LayerParams lp{};
         CNNLayer cnnLayer(lp);
-        cnnLayer.params = params; cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        cnnLayer.params = params;
+        cnnLayer.type = _type;
+        validate(&cnnLayer, inBlobs, params, blobs);
 
         outShapes.push_back({inShapes[0][1], inShapes[0][0], 1, 1});
     }
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_deconv_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_deconv_shape_infer.hpp
index c4f130a70..2ddf5bd22 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_deconv_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_deconv_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class DeconvShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit DeconvShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,45 +30,45 @@ public:
         DeconvolutionLayer deconvLayer(lp);
         deconvLayer.params = params;
         deconvLayer.type = _type;
-        validate(&deconvLayer, inShapes, params, blobs);
+        validate(&deconvLayer, inBlobs, params, blobs);
 
         auto dims = inShapes[0];
+        auto dims_size = dims.size();
+        auto spacial_d_size = dims.size() - 2;
+        float* OD_temp = new float[spacial_d_size];
+        size_t*  KDims = new size_t[spacial_d_size];
         size_t inputN = dims[0];
-        size_t IH = dims[2];
-        size_t IW = dims[3];
-        int PR = -1, PB = -1;
-        float OHTemp, OWTemp, KH, KW;
-        if (deconvLayer._dilation[Y_AXIS])
-            KH = (deconvLayer._kernel[Y_AXIS] - 1) * deconvLayer._dilation[Y_AXIS] + 1;
-        else
-            KH = deconvLayer._kernel[Y_AXIS];
-        if (deconvLayer._dilation[X_AXIS])
-            KW = (deconvLayer._kernel[X_AXIS] - 1) * deconvLayer._dilation[X_AXIS] + 1;
-        else
-            KW = deconvLayer._kernel[X_AXIS];
-        size_t SH = deconvLayer._stride[Y_AXIS];
-        size_t SW = deconvLayer._stride[X_AXIS];
-        size_t PH = deconvLayer._padding[Y_AXIS];
-        size_t PW = deconvLayer._padding[X_AXIS];
+        for (int i = 0; i < spacial_d_size; i++) {
+            if (deconvLayer._dilation[i])
+                KDims[i] = (deconvLayer._kernel[i] - 1) * deconvLayer._dilation[i] + 1;
+            else
+                KDims[i] = deconvLayer._kernel[i];
+        }
         size_t OC = deconvLayer._out_depth;
         std::string padType = deconvLayer._auto_pad;
         if (padType == "valid") {
-            OHTemp = IH * SH + KH - 1;
-            OWTemp = IW * SW + KW - 1;
+            for (int i = 0; i < spacial_d_size; i++)
+                OD_temp[i] = dims[dims_size - 1 - i] * deconvLayer._stride[i] + KDims[i] - 1;
         } else if ((padType == "same_upper") || (padType == "same_lower")) {
-            OHTemp = IH * SH;
-            OWTemp = IW * SW;
+            for (int i = 0; i < spacial_d_size; i++)
+                OD_temp[i] = dims[dims_size - 1 - i] * deconvLayer._stride[i];
         } else {
-            PR = deconvLayer._pads_end[X_AXIS];
-            PB = deconvLayer._pads_end[Y_AXIS];
-            OHTemp = SH * (IH - 1) + KH - PH - PB;
-            OWTemp = SW * (IW - 1) + KW - PW - PR;
+            for (int i = 0; i < spacial_d_size; i++)
+                OD_temp[i] = deconvLayer._stride[i] * (dims[dims_size - 1 - i] - 1) +
+                        KDims[i] - deconvLayer._padding[i] - deconvLayer._pads_end[i];
         }
-        if (OHTemp < 0 || OWTemp < 0)
-            THROW_IE_EXCEPTION << "New shapes " << details::dumpVec(dims) << " make output shape negative";
-        size_t OH = static_cast<size_t>(OHTemp);
-        size_t OW = static_cast<size_t>(OWTemp);
-        outShapes.emplace_back(std::initializer_list<size_t>{inputN, OC, OH, OW});
+        for (int i = 0; i < spacial_d_size; i++)
+            if (OD_temp[i] < 0)
+                THROW_IE_EXCEPTION << "New shapes " << details::dumpVec(dims) << " make output shape negative";
+
+        SizeVector outShape = {inputN, OC};
+        for (int i = spacial_d_size - 1; i >= 0; i--)
+            outShape.push_back(static_cast<size_t>(OD_temp[i]));
+
+        outShapes.emplace_back(outShape);
+
+        delete[] OD_temp;
+        delete[] KDims;
     }
 };
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_depth_to_space_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_depth_to_space_shape_infer.hpp
new file mode 100644
index 000000000..9942c05f9
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_depth_to_space_shape_infer.hpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for DepthToSpace layer
+ */
+class DepthToSpaceShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit DepthToSpaceShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        DepthToSpaceLayer depthToSpaceLayer(lp);
+        depthToSpaceLayer.params = params;
+        depthToSpaceLayer.type = _type;
+        validate(&depthToSpaceLayer, inBlobs, params, blobs);
+
+        unsigned int block_size = depthToSpaceLayer.block_size;
+        outShapes = {inShapes[0]};
+
+        outShapes[0][outShapes[0].size() - 1] = inShapes[0][inShapes[0].size() - 1] * block_size;
+        outShapes[0][outShapes[0].size() - 2] = inShapes[0][inShapes[0].size() - 2] * block_size;
+        outShapes[0][outShapes[0].size() - 3] = inShapes[0][inShapes[0].size() - 3] / block_size / block_size;
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_detection_output_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_detection_output_shape_infer.hpp
index eff11ed25..605565592 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_detection_output_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_detection_output_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class DetectionOutputShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit DetectionOutputShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,7 +30,7 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        validate(&cnnLayer, inBlobs, params, blobs);
 
         int top_k = cnnLayer.GetParamAsInt("keep_top_k");
         outShapes.push_back({1, 1, static_cast<size_t>(top_k) * inShapes[0][0], 7});
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_eltwise_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_eltwise_shape_infer.hpp
index ce7248c0d..652f8ab0b 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_eltwise_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_eltwise_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -9,6 +9,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include <algorithm>
 
 namespace InferenceEngine {
 namespace ShapeInfer {
@@ -20,7 +21,7 @@ class EltWiseShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit EltWiseShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -28,8 +29,23 @@ public:
         EltwiseLayer eltwiseLayer(lp);
         eltwiseLayer.params = params;
         eltwiseLayer.type = _type;
-        validate(&eltwiseLayer, inShapes, params, blobs);
-        outShapes.push_back(inShapes[0]);
+        validate(&eltwiseLayer, inBlobs, params, blobs);
+
+        if (inShapes.size() == 1) {
+            outShapes.push_back(inShapes[0]);
+        } else {
+            SizeVector outShape((std::max)(inShapes[0], inShapes[1]));
+            for (size_t ind = 0; ind < outShape.size(); ++ind) {
+                if (ind < inShapes[0].size() && ind < inShapes[1].size()) {
+                    outShape[ind] = (std::max)(inShapes[0][ind], inShapes[1][ind]);
+                } else if (ind >= inShapes[0].size()) {
+                    outShape[ind] = inShapes[1][ind];
+                } else {
+                    outShape[ind] = inShapes[0][ind];
+                }
+            }
+            outShapes.push_back(outShape);
+        }
     }
 };
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_equal_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_equal_shape_infer.hpp
index 9378aba34..e21de0ec4 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_equal_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_equal_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -21,7 +21,7 @@ class EqualShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit EqualShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes, const std::map<std::string, std::string>& params,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs, const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs, std::vector<SizeVector>& outShapes) override {
         outShapes = inShapes;
     }
@@ -31,7 +31,7 @@ class DoNothingShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit DoNothingShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes, const std::map<std::string, std::string>& params,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs, const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs, std::vector<SizeVector>& outShapes) override {}
 };
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_expand_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_expand_shape_infer.hpp
new file mode 100644
index 000000000..db2d687e6
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_expand_shape_infer.hpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for Expand layer
+ */
+class ExpandShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit ExpandShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        ExpandLayer unsqueezeLayer(lp);
+        unsqueezeLayer.params = params;
+        unsqueezeLayer.type = _type;
+        validate(&unsqueezeLayer, inBlobs, params, blobs);
+
+        outShapes = {inShapes[0]};
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_fill_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_fill_shape_infer.hpp
new file mode 100644
index 000000000..504d919c2
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_fill_shape_infer.hpp
@@ -0,0 +1,49 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for Fill layer
+ */
+class FillShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit FillShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        FillLayer fillLayer(lp);
+        fillLayer.params = params;
+        fillLayer.type = _type;
+        validate(&fillLayer, inBlobs, params, blobs);
+
+        auto dimsBlob = *inBlobs.begin();
+        SizeVector shape;
+        SizeVector dims = dimsBlob->getTensorDesc().getDims();
+        auto* buffer = dimsBlob->cbuffer().as<int32_t*>();
+        if (!buffer || dimsBlob->getTensorDesc().getPrecision() != Precision::I32)
+            THROW_IE_EXCEPTION << " Fill dimensions vector should be I32!";
+
+        for (int i = 0; i < dimsBlob->size(); i++) {
+            shape.push_back(buffer[i]);
+        }
+        outShapes = {shape};
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_flatten_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_flatten_shape_infer.hpp
index bdde9761c..be42a6c58 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_flatten_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_flatten_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -24,15 +24,15 @@ class FlattenShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit FlattenShapeProp(const std::string &type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector> &inShapes,
-                         const std::map<std::string, std::string> &params,
-                         const std::map<std::string, Blob::Ptr> &blobs,
-                         std::vector<SizeVector> &outShapes) override {
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
         LayerParams lp{};
         ReshapeLayer reshapeLayer(lp);
         reshapeLayer.params = params;
         reshapeLayer.type = _type;
-        validate(&reshapeLayer, inShapes, params, blobs);
+        validate(&reshapeLayer, inBlobs, params, blobs);
 
         auto inputShape = inShapes[0];
         size_t inputShapeTotal = std::accumulate(inputShape.begin(), inputShape.end(), 1lu, std::multiplies<size_t>());
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_gather_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_gather_shape_infer.hpp
index 41641cb8e..5a37378e2 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_gather_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_gather_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -20,7 +20,7 @@ class GatherShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit GatherShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -28,7 +28,7 @@ public:
         GatherLayer gatherLayer(lp);
         gatherLayer.params = params;
         gatherLayer.type = _type;
-        validate(&gatherLayer, inShapes, params, blobs);
+        validate(&gatherLayer, inBlobs, params, blobs);
 
         int axis = gatherLayer.axis;
         if (axis < 0)
@@ -36,7 +36,7 @@ public:
 
         outShapes.resize(1);
         outShapes[0].resize(inShapes[0].size() + inShapes[1].size() - 1);
-        for (size_t i = 0; i < axis; i++)
+        for (int i = 0; i < axis; i++)
             outShapes[0][i] = inShapes[0][i];
 
         for (size_t i = 0; i < inShapes[1].size(); i++)
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_gemm_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_gemm_shape_infer.hpp
index 5cac2f5a6..f3474f1f6 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_gemm_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_gemm_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -24,15 +24,16 @@ class GemmShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit GemmShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
+        // TODO: primitive does not support 5D tensor yet
         LayerParams lp{};
         GemmLayer gemmLayer(lp);
         gemmLayer.params = params;
         gemmLayer.type = _type;
-        validate(&gemmLayer, inShapes, params, blobs);
+        validate(&gemmLayer, inBlobs, params, blobs);
 
         auto dims0 = inShapes[0];
         auto dims1 = inShapes[1];
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_inner_product_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_inner_product_shape_infer.hpp
index d65a0d3df..63160d05e 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_inner_product_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_inner_product_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -19,7 +19,7 @@ class InnerProductShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit InnerProductShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -27,7 +27,7 @@ public:
         FullyConnectedLayer fcLayer(lp);
         fcLayer.params = params;
         fcLayer.type = _type;
-        validate(&fcLayer, inShapes, params, blobs);
+        validate(&fcLayer, inBlobs, params, blobs);
         size_t OC, ON;
         ON = inShapes[0][0];
         OC = fcLayer._out_num;
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_interp_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_interp_shape_infer.hpp
index ebca8fffa..a7efae0de 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_interp_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_interp_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -11,6 +11,7 @@
 #include <memory>
 #include <string>
 #include <vector>
+#include <limits>
 
 namespace InferenceEngine {
 namespace ShapeInfer {
@@ -22,7 +23,7 @@ class InterpShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit InterpShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,60 +31,67 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
-        auto factor = static_cast<size_t>(cnnLayer.GetParamAsInt("factor", 0));
-        auto shrink_factor = static_cast<size_t>(cnnLayer.GetParamAsInt("shrink_factor", 0));
-        auto zoom_factor = static_cast<size_t>(cnnLayer.GetParamAsInt("zoom_factor", 0));
-        auto height = static_cast<size_t>(cnnLayer.GetParamAsInt("height", 0));
-        auto width = static_cast<size_t>(cnnLayer.GetParamAsInt("width", 0));
+        validate(&cnnLayer, inBlobs, params, blobs);
+        SizeVector outShape;
+        if (inBlobs.size() == 2) {
+            auto* buffer = inBlobs[1]->cbuffer().as<float*>();
+            if (buffer != nullptr) {
+                for (int i = 0; i < inBlobs[1]->size(); i++) {
+                    outShape.push_back(static_cast<unsigned long>(buffer[i]));
+                }
+            } else {
+                THROW_IE_EXCEPTION << "Second input must have allocated data";
+            }
+        } else {
+            auto factor = cnnLayer.GetParamAsFloat("factor", 0);
+            auto shrink_factor = cnnLayer.GetParamAsFloat("shrink_factor", 0);
+            auto zoom_factor = cnnLayer.GetParamAsFloat("zoom_factor", 0);
+            auto height = static_cast<size_t>(cnnLayer.GetParamAsInt("height", 0));
+            auto width = static_cast<size_t>(cnnLayer.GetParamAsInt("width", 0));
+
+            auto IS_ZERO = [](float value) {
+                return std::fabs(value) < std::numeric_limits<float>::epsilon();
+            };
+
+        bool noFactor = IS_ZERO(zoom_factor) && IS_ZERO(shrink_factor) && IS_ZERO(factor);
 
-        // TODO: move to validators
-        if (!zoom_factor && !shrink_factor && !factor && (!height || !width)) {
-            THROW_IE_EXCEPTION
-                    << "Can't reshape without factor, or target resolution. "
-                    << "Supported attributes: factor, shrink_factor, zoom_factor, height, width";
-        }
         size_t N, C, H, W;
-        // TODO: validate that only one input
         N = inShapes[0][0];
         C = inShapes[0][1];
         H = inShapes[0][2];
         W = inShapes[0][3];
 
+            auto SETW = [&width, &W](size_t value) {
+                if (width) {
+                    W = width;
+                } else {
+                    W = value;
+                }
+            };
 
-        auto SETW = [&width, &W](size_t value) {
-            if (width) {
-                W = width;
-            } else {
-                W = value;
-            }
-        };
+            auto SETH = [&height, &H](size_t value) {
+                if (height) {
+                    H = height;
+                } else {
+                    H = value;
+                }
+            };
 
-        auto SETH = [&height, &H](size_t value) {
-            if (height) {
-                H = height;
+            if (noFactor) {
+                SETW(width);
+                SETH(height);
             } else {
-                H = value;
-            }
-        };
-
-        if (factor) {
-            SETH(H * factor);
-            SETW(W * factor);
-        } else if (shrink_factor || zoom_factor) {
-            if (shrink_factor) {
-                SETH(H / shrink_factor);
-                SETW(W / shrink_factor);
-            }
-            if (zoom_factor) {
-                SETH(H * zoom_factor);
-                SETW(W * zoom_factor);
+                float actualFactor = factor;
+                if (!IS_ZERO(shrink_factor) || !IS_ZERO(zoom_factor)) {
+                    if (!IS_ZERO(zoom_factor)) actualFactor = zoom_factor;
+                    if (!IS_ZERO(shrink_factor)) actualFactor /= shrink_factor;
+                }
+                SETW(W * actualFactor);
+                SETH(H * actualFactor);
             }
-        } else {
-            SETW(width);
-            SETH(height);
+            outShape = {N, C, H, W};
         }
-        outShapes.push_back({N, C, H, W});
+        outShapes.push_back(outShape);
     }
 };
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_pad_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_pad_shape_infer.hpp
index 2fb1c4961..424ab39dc 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_pad_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_pad_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -20,7 +20,7 @@ class PadShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit PadShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -28,7 +28,7 @@ public:
         PadLayer padLayer(lp);
         padLayer.params = params;
         padLayer.type = _type;
-        validate(&padLayer, inShapes, params, blobs);
+        validate(&padLayer, inBlobs, params, blobs);
 
         outShapes.push_back(inShapes[0]);
         for (size_t i = 0; i < outShapes[0].size(); i++) {
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_permute_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_permute_shape_infer.hpp
index 46f14565b..cdfa2d7c3 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_permute_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_permute_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class PermuteShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit PermuteShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,7 +30,7 @@ public:
         CNNLayer permuteLayer(lp);
         permuteLayer.params = params;
         permuteLayer.type = _type;
-        validate(&permuteLayer, inShapes, params, blobs);
+        validate(&permuteLayer, inBlobs, params, blobs);
 
         std::vector<size_t> order;
         std::vector<int> layerOrder = permuteLayer.GetParamAsInts("order");
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_pool_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_pool_shape_infer.hpp
index 4850c8a57..4344a420c 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_pool_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_pool_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class PoolingShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit PoolingShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,32 +30,27 @@ public:
         PoolingLayer poolLayer(lp);
         poolLayer.params = params;
         poolLayer.type = _type;
-        validate(&poolLayer, inShapes, params, blobs);
+        validate(&poolLayer, inBlobs, params, blobs);
 
-        float OHTemp = 1.f, OWTemp = 1.f;
         auto dims = inShapes[0];
-        int PR = -1, PB = -1;
+        auto dims_size = dims.size();
+        auto spacial_d_size = dims.size() - 2;
+        float* OD_temp = new float[spacial_d_size];
+        for (int i = 0; i < spacial_d_size; i++)
+            OD_temp[i] = 1.f;
         size_t inputN = dims[0];
         size_t IC = dims[1];
-        size_t IH = dims[2];
-        size_t IW = dims[3];
-        size_t KH = poolLayer._kernel[Y_AXIS];
-        size_t KW = poolLayer._kernel[X_AXIS];
-        size_t SH = poolLayer._stride[Y_AXIS];
-        size_t SW = poolLayer._stride[X_AXIS];
-        size_t PH = poolLayer._padding[Y_AXIS];
-        size_t PW = poolLayer._padding[X_AXIS];
 
         std::string padType = poolLayer._auto_pad;
         if (padType == "valid") {
-            OHTemp = std::ceil((IH - KH + 1.f) / SH);
-            OWTemp = std::ceil((IW - KW + 1.f) / SW);
+            for (int i = 0; i < spacial_d_size; i++)
+                OD_temp[i] = std::ceil((dims[dims_size - 1 - i] - poolLayer._kernel[i] + 1.f) / poolLayer._stride[i]);
         } else if (padType == "same_upper") {
-            OHTemp = std::ceil(1.f * IH / SH);
-            OWTemp = std::ceil(1.f * IW / SW);
+            for (int i = 0; i < spacial_d_size; i++)
+                OD_temp[i] = std::ceil(1.f * dims[dims_size - 1 - i] / poolLayer._stride[i]);
         } else if (padType == "same_lower") {
-            OHTemp = std::floor(1.f * IH / SH);
-            OWTemp = std::floor(1.f * IW / SW);
+            for (int i = 0; i < spacial_d_size; i++)
+                OD_temp[i] = std::floor(1.f * dims[dims_size - 1 - i] / poolLayer._stride[i]);
         } else {
             auto it = std::find_if(
                 poolLayer.params.begin(),
@@ -67,25 +62,31 @@ public:
             if (it != poolLayer.params.end()) {
                 if (it->second == "floor") isCeil = false;
             }
-            PR = poolLayer._pads_end[X_AXIS];
-            PB = poolLayer._pads_end[Y_AXIS];
-            OHTemp += 1.f * (IH + PH + PB - KH) / SH;
-            OWTemp += 1.f * (IW + PW + PR - KW) / SW;
+            for (int i = 0; i < spacial_d_size; i++)
+                OD_temp[i] += 1.f * (dims[dims_size - 1 - i] + poolLayer._padding[i] +
+                        poolLayer._pads_end[i] - poolLayer._kernel[i]) / poolLayer._stride[i];
             if (isCeil) {
-                OHTemp = std::ceil(OHTemp);
-                OWTemp = std::ceil(OWTemp);
+                for (int i = 0; i < spacial_d_size; i++)
+                    OD_temp[i] = std::ceil(OD_temp[i]);
             } else {
-                OHTemp = std::floor(OHTemp);
-                OWTemp = std::floor(OWTemp);
+                for (int i = 0; i < spacial_d_size; i++)
+                    OD_temp[i] = std::floor(OD_temp[i]);
             }
-            if ((OHTemp - 1) * SH >= IH + PH) --OHTemp;
-            if ((OWTemp - 1) * SW >= IW + PW) --OWTemp;
+            for (int i = 0; i < spacial_d_size; i++)
+                if ((OD_temp[i] - 1) * poolLayer._stride[i] >= dims[dims_size - 1 - i] +
+                        poolLayer._padding[i]) --OD_temp[i];
         }
-        if (OHTemp < 0 || OWTemp < 0)
-            THROW_IE_EXCEPTION << "New shapes " << details::dumpVec(dims) << " make output shape negative";
-        size_t OH = static_cast<size_t>(OHTemp);
-        size_t OW = static_cast<size_t>(OWTemp);
-        outShapes.emplace_back(std::initializer_list<size_t>{inputN, IC, OH, OW});
+        for (int i = 0; i < spacial_d_size; i++)
+            if (OD_temp[i] < 0)
+                THROW_IE_EXCEPTION << "New shapes " << details::dumpVec(dims) << " make output shape negative";
+
+        SizeVector outShape = {inputN, IC};
+        for (int i = spacial_d_size - 1; i >= 0; i--)
+            outShape.push_back(static_cast<size_t>(OD_temp[i]));
+
+        outShapes.emplace_back(outShape);
+
+        delete[] OD_temp;
     }
 };
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_priorbox_clustered_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_priorbox_clustered_shape_infer.hpp
index 1aaf3e47c..b7161931a 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_priorbox_clustered_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_priorbox_clustered_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,17 +22,19 @@ class PriorBoxClusteredShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit PriorBoxClusteredShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                            const std::map<std::string, std::string>& params,
                            const std::map<std::string, Blob::Ptr>& blobs,
                            std::vector<SizeVector>& outShapes) override {
-                LayerParams lp{};
+        LayerParams lp{};
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        validate(&cnnLayer, inBlobs, params, blobs);
         std::vector<float> widths = cnnLayer.GetParamAsFloats("width", {});
-        size_t res_prod = widths.size() * inShapes[0][2] * inShapes[0][3] * 4;
+        size_t res_prod = widths.size() * 4;
+        for (int i = 2; i < inShapes[0].size(); i++)
+            res_prod *= inShapes[0][i];
         outShapes.push_back({1, 2, res_prod});
     }
 };
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_priorbox_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_priorbox_shape_infer.hpp
index 03a8d9c03..867651d72 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_priorbox_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_priorbox_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class PriorBoxShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit PriorBoxShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,7 +30,7 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        validate(&cnnLayer, inBlobs, params, blobs);
         std::vector<float> min_sizes = cnnLayer.GetParamAsFloats("min_size", {});
         std::vector<float> max_sizes = cnnLayer.GetParamAsFloats("max_size", {});
         bool flip = static_cast<bool>(cnnLayer.GetParamAsInt("flip"));
@@ -45,7 +45,9 @@ public:
             num_priors = (flip ? 2 : 1) * aspect_ratios.size() + min_sizes.size() - 1;
         }
 
-        size_t res_prod = num_priors * inShapes[0][2] * inShapes[0][3] * 4;
+        size_t res_prod = num_priors * 4;
+        for (int i = 2; i < inShapes[0].size(); i++)
+            res_prod *= inShapes[0][i];
         outShapes.push_back({1, 2, res_prod});
     }
 };
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_proposal_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_proposal_shape_infer.hpp
index 8058500de..6a09fe564 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_proposal_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_proposal_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -20,7 +20,7 @@ class ProposalShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit ProposalShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -28,8 +28,8 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
-        size_t post_nms_topn = cnnLayer.GetParamAsInt("post_nms_topn");
+        validate(&cnnLayer, inBlobs, params, blobs);
+        size_t post_nms_topn = static_cast<size_t>(cnnLayer.GetParamAsInt("post_nms_topn"));
         outShapes.push_back({inShapes[0][0] * post_nms_topn, 5});
     }
 };
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_psroi_pooling_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_psroi_pooling_shape_infer.hpp
index f6ce94e8b..c53feb208 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_psroi_pooling_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_psroi_pooling_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class PSRoiPoolingShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit PSRoiPoolingShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,7 +30,7 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        validate(&cnnLayer, inBlobs, params, blobs);
         size_t output_dim = static_cast<size_t>(cnnLayer.GetParamAsInt("output_dim"));
         size_t group_size = static_cast<size_t>(cnnLayer.GetParamAsInt("group_size"));
         outShapes.push_back({inShapes[1][0], output_dim, group_size, group_size});
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_spatial_transformer_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_quantize_shape_infer.hpp
index 8548c9244..5a8ee085d 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_spatial_transformer_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_quantize_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -6,31 +6,34 @@
 
 #include <description_buffer.hpp>
 #include "ie_built_in_impl.hpp"
-#include <ie_layers.h>
 #include <map>
 #include <memory>
 #include <string>
 #include <vector>
+#include <debug.h>
+#include <cmath>
+#include <algorithm>
 
 namespace InferenceEngine {
 namespace ShapeInfer {
 
 /**
- *@brief Implementation of Shape inference for SpatialTransformer layer
+ *@brief Implementation of Shape inference for quantize layer
  */
-class SpatialTransformerShapeProp : public BuiltInShapeInferImpl {
+class QuantizeShapeProp : public BuiltInShapeInferImpl {
 public:
-    explicit SpatialTransformerShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+    explicit QuantizeShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
         LayerParams lp{};
-        CNNLayer cnnLayer(lp);
-        cnnLayer.params = params;
-        cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        QuantizeLayer quantizeLayer(lp);
+        quantizeLayer.params = params;
+        quantizeLayer.type = _type;
+        validate(&quantizeLayer, inBlobs, params, blobs);
+
         outShapes.push_back(inShapes[0]);
     }
 };
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_range_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_range_shape_infer.hpp
new file mode 100644
index 000000000..4719f0403
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_range_shape_infer.hpp
@@ -0,0 +1,51 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <cmath>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for Range layer
+ */
+class RangeShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit RangeShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        RangeLayer rangeLayer(lp);
+        rangeLayer.params = params;
+        rangeLayer.type = _type;
+        validate(&rangeLayer, inBlobs, params, blobs);
+
+        const size_t RANGE_START = 0;
+        const size_t RANGE_LIMIT = 1;
+        const size_t RANGE_DELTA = 2;
+
+        float start = (inBlobs[RANGE_START]->cbuffer().as<float*>() +
+                       inBlobs[RANGE_START]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
+        float limit = (inBlobs[RANGE_LIMIT]->cbuffer().as<float*>() +
+                       inBlobs[RANGE_LIMIT]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
+        float delta = (inBlobs[RANGE_DELTA]->cbuffer().as<float*>() +
+                       inBlobs[RANGE_DELTA]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
+        size_t work_amount_dst = std::floor(std::abs((limit - start) / delta));
+        outShapes = {{work_amount_dst}};
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_region_yolo_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_region_yolo_shape_infer.hpp
index 78847a078..bed8123f2 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_region_yolo_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_region_yolo_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,22 +22,43 @@ class RegionYoloShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit RegionYoloShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
         LayerParams lp{};
-        CNNLayer cnnLayer(lp);
-        cnnLayer.params = params;
-        cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        CNNLayer layer(lp);
+        layer.params = params;
+        int classes;
+        int coords;
+        int num;
+        bool do_softmax;
+        std::vector<int> mask;
+        classes = layer.GetParamAsInt("classes", 1);
+        coords = layer.GetParamAsInt("coords", 1);
+        num = layer.GetParamAsInt("num", 1);
+        do_softmax = static_cast<bool>(layer.GetParamAsInt("do_softmax", 1));
+        mask = layer.GetParamAsInts("mask", {});
+        unsigned int axis = layer.GetParamAsUInt("axis", 1);
+        int end_axis = layer.GetParamAsInt("end_axis", 1);
+        if (end_axis < 0) end_axis += inShapes[0].size();
+
         SizeVector outShape;
-        outShape.push_back(inShapes[0][0]);
-        size_t mul(1);
-        for (size_t i = 1; i < inShapes[0].size(); i++) {
-            mul *= inShapes[0][i];
+        if (do_softmax) {
+            size_t flat_dim = 1;
+            for (size_t i = 0; i < axis; i++) {
+                outShape.push_back(inShapes[0][i]);
+            }
+            for (size_t i = axis; i < end_axis + 1; i++) {
+                flat_dim *= inShapes[0][i];
+            }
+            outShape.push_back(flat_dim);
+            for (size_t i = end_axis + 1; i < inShapes[0].size(); i++) {
+                outShape.push_back(inShapes[0][i]);
+            }
+        } else {
+            outShape = {inShapes[0][0], (classes + coords + 1) * mask.size(), inShapes[0][2], inShapes[0][3]};
         }
-        outShape.push_back(mul);
         outShapes.push_back({outShape});
     }
 };
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_reorg_yolo_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_reorg_yolo_shape_infer.hpp
index 82ffafab4..7ae0a80ef 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_reorg_yolo_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_reorg_yolo_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class ReorgYoloShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit ReorgYoloShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,7 +30,7 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        validate(&cnnLayer, inBlobs, params, blobs);
         size_t stride = static_cast<size_t>(cnnLayer.GetParamAsInt("stride"));
         SizeVector outShape;
         for (size_t i = 0; i < inShapes[0].size(); i++) {
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_resample_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_resample_shape_infer.hpp
index 8e67ccf3d..fe06a46d6 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_resample_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_resample_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class ResampleShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit ResampleShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,10 +30,24 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
-        // TODO: validate param and number of inputs (1)
-        auto scale = static_cast<size_t>(cnnLayer.GetParamAsInt("factor"));
-        outShapes.push_back({inShapes[0][0], inShapes[0][1], inShapes[0][2] * scale, inShapes[0][3] * scale});
+        validate(&cnnLayer, inBlobs, params, blobs);
+        SizeVector outShape;
+        if (inBlobs.size() == 2) {
+            auto* buffer = inBlobs[1]->cbuffer().as<float*>();
+            if (buffer != nullptr) {
+                for (int i = 0; i < inBlobs[1]->size(); i++) {
+                    outShape.push_back(static_cast<unsigned long>(buffer[i]));
+                }
+            } else {
+                THROW_IE_EXCEPTION << "Second input must have allocated data";
+            }
+        } else {
+            auto scale = static_cast<size_t>(cnnLayer.GetParamAsInt("factor"));
+            outShape = {inShapes[0][0], inShapes[0][1]};
+            for (int i = 2; i < inShapes[0].size(); i++)
+                outShape.push_back(inShapes[0][i] * scale);
+        }
+        outShapes.push_back(outShape);
     }
 };
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_reshape_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_reshape_shape_infer.hpp
index 97b6571d4..d586f3cec 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_reshape_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_reshape_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -6,6 +6,7 @@
 
 #include <description_buffer.hpp>
 #include "ie_built_in_impl.hpp"
+#include "precision_utils.h"
 #include <ie_layers.h>
 #include <map>
 #include <memory>
@@ -22,22 +23,48 @@ namespace ShapeInfer {
  */
 class ReshapeShapeProp : public BuiltInShapeInferImpl {
 public:
-    explicit ReshapeShapeProp(const std::string &type) : BuiltInShapeInferImpl(type) {}
+    explicit ReshapeShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector> &inShapes,
-                         const std::map<std::string, std::string> &params,
-                         const std::map<std::string, Blob::Ptr> &blobs,
-                         std::vector<SizeVector> &outShapes) override {
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
         LayerParams lp{};
         ReshapeLayer reshapeLayer(lp);
         reshapeLayer.params = params;
         reshapeLayer.type = _type;
-        validate(&reshapeLayer, inShapes, params, blobs);
+        validate(&reshapeLayer, inBlobs, params, blobs);
 
-        auto inputShape = inShapes[0];
-        size_t inputShapeTotal = std::accumulate(inputShape.begin(), inputShape.end(), 1lu, std::multiplies<size_t>());
         SizeVector outShape;
-        std::vector<int> reshapeMask = reshapeLayer.shape;
+        std::vector<int> reshapeMask;
+        if (inBlobs.size() == 2) {
+            if (inBlobs[1]->precision() == Precision::FP32) {
+                auto* buffer = inBlobs[1]->cbuffer().as<float*>();
+                if (buffer != nullptr) {
+                    for (int i = 0; i < inBlobs[1]->size(); i++) {
+                        reshapeMask.push_back(static_cast<int>(buffer[i]));
+                    }
+                } else {
+                    THROW_IE_EXCEPTION << "Second input must have allocated data";
+                }
+            } else if (inBlobs[1]->precision() == Precision::FP16) {
+                auto* buffer = inBlobs[1]->cbuffer().as<uint16_t*>();
+                if (buffer != nullptr) {
+                    for (int i = 0; i < inBlobs[1]->size(); i++) {
+                        reshapeMask.push_back(static_cast<int>(PrecisionUtils::f16tof32(buffer[i])));
+                    }
+                } else {
+                    THROW_IE_EXCEPTION << "Second input must have allocated data";
+                }
+            } else {
+                THROW_IE_EXCEPTION << "Second input has unsupported precision";
+            }
+        } else {
+            reshapeMask = reshapeLayer.shape;
+        }
+        auto inputShape = inShapes[0];
+        size_t inputShapeTotal = std::accumulate(inputShape.begin(), inputShape.end(), 1lu,
+                                                 std::multiplies<size_t>());
 
         if (reshapeMask.empty()) {
             outShape = {inputShapeTotal};
@@ -60,7 +87,8 @@ public:
                     outShape.push_back(reshapeMask[i]);
                 }
             }
-            size_t outputShapeTotal = std::accumulate(outShape.begin(), outShape.end(), 1lu, std::multiplies<size_t>());
+            size_t outputShapeTotal = std::accumulate(outShape.begin(), outShape.end(), 1lu,
+                                                      std::multiplies<size_t>());
             if (inputShapeTotal != outputShapeTotal)
                 THROW_IE_EXCEPTION << "Invalid reshape mask (dim attribute): number of elements in input: "
                                    << details::dumpVec(inputShape) << " and output: " << details::dumpVec(outShape)
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_reverse_sequence_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_reverse_sequence_shape_infer.hpp
new file mode 100644
index 000000000..858ffa6fb
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_reverse_sequence_shape_infer.hpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for ReverseSequence layer
+ */
+class ReverseSequenceShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit ReverseSequenceShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        ReverseSequenceLayer reverseSequenceLayer(lp);
+        reverseSequenceLayer.params = params;
+        reverseSequenceLayer.type = _type;
+        validate(&reverseSequenceLayer, inBlobs, params, blobs);
+
+        outShapes = {inShapes[0]};
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_rnn_cell_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_rnn_cell_shape_infer.hpp
new file mode 100644
index 000000000..4869b731f
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_rnn_cell_shape_infer.hpp
@@ -0,0 +1,46 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <description_buffer.hpp>
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for DetectionOutput layer
+ */
+template<class CELL, int S>
+class RNNBaseCellShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit RNNBaseCellShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        CELL cnnLayer(lp);
+        cnnLayer.params = params;
+        cnnLayer.type = _type;
+        validate(&cnnLayer, inBlobs, params, blobs);
+
+        auto state_dims = inShapes[1];
+        for (int i = 0; i < S; i++)
+            outShapes.push_back(state_dims);
+    }
+};
+
+using RNNCellShapeProp  = RNNBaseCellShapeProp<RNNCell,  1>;
+using GRUCellShapeProp  = RNNBaseCellShapeProp<GRUCell,  1>;
+using LSTMCellShapeProp = RNNBaseCellShapeProp<LSTMCell, 2>;
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_rnn_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_rnn_shape_infer.hpp
new file mode 100644
index 000000000..c8763a0ba
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_rnn_shape_infer.hpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <description_buffer.hpp>
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for DetectionOutput layer
+ */
+class RNNShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit RNNShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        RNNSequenceLayer rnn(lp);
+        rnn.params = params;
+        rnn.type = _type;
+        rnn.precision = Precision::FP32;   // FIXME: No ability to discover current precision. Assume fp32
+        validate(&rnn, inBlobs, params, blobs);
+
+        int state_size = rnn.hidden_size;
+
+        auto data_dims = inShapes[0];
+        data_dims[2] = static_cast<size_t>(state_size);
+        outShapes.push_back(data_dims);
+
+        for (int i = 1; i < inShapes.size(); i++) {
+            outShapes.push_back(inShapes[i]);
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_roi_pooling_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_roi_pooling_shape_infer.hpp
index b5f6c85eb..c128469e9 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_roi_pooling_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_roi_pooling_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class RoiPoolingShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit RoiPoolingShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,12 +30,16 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        validate(&cnnLayer, inBlobs, params, blobs);
 
-        int pooled_h = cnnLayer.GetParamAsInt("pooled_h");
-        int pooled_w = cnnLayer.GetParamAsInt("pooled_w");
-        outShapes.push_back(
-                {inShapes[1][0], inShapes[0][1], static_cast<size_t>(pooled_h), static_cast<size_t>(pooled_w)});
+        SizeVector out_shapes = {inShapes[1][0], inShapes[0][1]};
+        for (auto attr : {"pooled_d", "pooled_h", "pooled_w"}) {  // desired IR format: pooled="...,d,h,w"
+            int pooled = cnnLayer.GetParamAsInt(attr, -1);
+            if (pooled >= 0) {
+                out_shapes.push_back(static_cast<size_t>(pooled));
+            }
+        }
+        outShapes.push_back(out_shapes);
     }
 };
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_shape_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_shape_shape_infer.hpp
new file mode 100644
index 000000000..87fbab9e4
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_shape_shape_infer.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <description_buffer.hpp>
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <debug.h>
+#include <cmath>
+#include <ie_format_parser.h>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for Shape layer
+ */
+class ShapeShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit ShapeShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        outShapes.push_back({inShapes[0].size()});
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_shuffle_channels_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_shuffle_channels_shape_infer.hpp
new file mode 100644
index 000000000..8bcda8985
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_shuffle_channels_shape_infer.hpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for ShuffleChannels layer
+ */
+class ShuffleChannelsShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit ShuffleChannelsShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        ShuffleChannelsLayer shuffleChannelsLayer(lp);
+        shuffleChannelsLayer.params = params;
+        shuffleChannelsLayer.type = _type;
+        validate(&shuffleChannelsLayer, inBlobs, params, blobs);
+
+        outShapes = {inShapes[0]};
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_simpler_nms_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_simpler_nms_shape_infer.hpp
index c39755f1e..ddc2eb191 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_simpler_nms_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_simpler_nms_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class SimplerNMSShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit SimplerNMSShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,7 +30,7 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        validate(&cnnLayer, inBlobs, params, blobs);
 
         size_t post_nms_topn = static_cast<size_t>(cnnLayer.GetParamAsInt("post_nms_topn"));
         outShapes.push_back({post_nms_topn, 5});
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_space_to_depth_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_space_to_depth_shape_infer.hpp
new file mode 100644
index 000000000..fdc14a1a6
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_space_to_depth_shape_infer.hpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for SpaceToDepth layer
+ */
+class SpaceToDepthShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit SpaceToDepthShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        SpaceToDepthLayer spaceToDepthLayer(lp);
+        spaceToDepthLayer.params = params;
+        spaceToDepthLayer.type = _type;
+        validate(&spaceToDepthLayer, inBlobs, params, blobs);
+
+        unsigned int block_size = spaceToDepthLayer.block_size;
+        outShapes = {inShapes[0]};
+
+        outShapes[0][outShapes[0].size() - 1] = inShapes[0][inShapes[0].size() - 1] / block_size;
+        outShapes[0][outShapes[0].size() - 2] = inShapes[0][inShapes[0].size() - 2] / block_size;
+        outShapes[0][outShapes[0].size() - 3] = inShapes[0][inShapes[0].size() - 3] * block_size * block_size;
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_split_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_split_shape_infer.hpp
index 94b612fe6..099380b11 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_split_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_split_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -21,7 +21,7 @@ class SplitShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit SplitShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -29,7 +29,7 @@ public:
         SplitLayer splitLayer(lp);
         splitLayer.params = params;
         splitLayer.type = _type;
-        validate(&splitLayer, inShapes, params, blobs);
+        validate(&splitLayer, inBlobs, params, blobs);
 
         std::vector<int> out_sizes = splitLayer.GetParamAsInts("out_sizes", {});
         if (out_sizes.empty())
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_squeeze_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_squeeze_shape_infer.hpp
new file mode 100644
index 000000000..6e0fe413d
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_squeeze_shape_infer.hpp
@@ -0,0 +1,115 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for Squeeze layer
+ */
+class SqueezeShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit SqueezeShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        SqueezeLayer layer(lp);
+        layer.params = params;
+        layer.type = _type;
+        validate(&layer, inBlobs, params, blobs);
+
+        const size_t SQUEEZE_DATA = 0;
+        const size_t SQUEEZE_INDEXES = 1;
+
+        SizeVector data_dims;
+        SizeVector idx_dims;
+
+        idx_dims = inBlobs[SQUEEZE_INDEXES]->getTensorDesc().getDims();
+        if (idx_dims.size() > 1)
+            THROW_IE_EXCEPTION << " Index vector should be 1 dimension";
+
+        if (inBlobs[SQUEEZE_INDEXES]->getTensorDesc().getPrecision() != Precision::I32 &&
+            inBlobs[SQUEEZE_INDEXES]->getTensorDesc().getPrecision() != Precision::FP32)
+            THROW_IE_EXCEPTION << " Incorrect 'indices_to_squeeze' input precision. Only FP32 and I32 are supported!";
+
+        data_dims = inBlobs[SQUEEZE_DATA]->getTensorDesc().getDims();
+
+        if (data_dims.size() <= idx_dims[0] && !(data_dims.size() == 1 && idx_dims[0] == 1))
+            THROW_IE_EXCEPTION << " Incompatible number of data dimensions and indexes vector length!";
+        SizeVector outShape;
+        switch (inBlobs[SQUEEZE_INDEXES]->precision()) {
+            case Precision::FP32: {
+                float* idx_data = inBlobs[SQUEEZE_INDEXES]->cbuffer().as<float*>() +
+                                  inBlobs[SQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+                for (size_t i = 0; i < idx_dims[0]; i++) {
+                    float axis = idx_data[i];
+                    if (axis < 0)
+                        axis += data_dims.size();
+
+                    if (axis > data_dims.size()) {
+                        THROW_IE_EXCEPTION << "Index to squeeze exceeds data tensor dimension";
+                    } else if (data_dims[axis] != 1) {
+                        THROW_IE_EXCEPTION << "Index to squeeze of data tensor dimension is not 1";
+                    }
+                }
+                for (size_t j = 0; j < data_dims.size(); j++) {
+                    bool found = false;
+                    for (size_t i = 0; i < inBlobs[SQUEEZE_INDEXES]->size(); i++) {
+                        int32_t axis = idx_data[i];
+                        if (axis < 0)
+                            axis += data_dims.size();
+                        if (j == static_cast<size_t>(axis)) found = true;
+                    }
+                    if (!found) outShape.push_back(data_dims[j]);
+                }
+            }
+                break;
+            case Precision::I32: {
+                int32_t* idx_data = inBlobs[SQUEEZE_INDEXES]->cbuffer().as<int32_t*>() +
+                                    inBlobs[SQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+                for (size_t i = 0; i < idx_dims[0]; i++) {
+                    int32_t axis = idx_data[i];
+                    if (axis < 0)
+                        axis += data_dims.size();
+
+                    if (axis > data_dims.size()) {
+                        THROW_IE_EXCEPTION << "Index to squeeze exceeds data tensor dimension";
+                    } else if (data_dims[axis] != 1) {
+                        THROW_IE_EXCEPTION << "Index to squeeze of data tensor dimension is not 1";
+                    }
+                }
+                for (size_t j = 0; j < data_dims.size(); j++) {
+                    bool found = false;
+                    for (size_t i = 0; i < inBlobs[SQUEEZE_INDEXES]->size(); i++) {
+                        int32_t axis = idx_data[i];
+                        if (axis < 0)
+                            axis += data_dims.size();
+                        if (j == static_cast<size_t>(axis)) found = true;
+                    }
+                    if (!found) outShape.push_back(data_dims[j]);
+                }
+            }
+                break;
+            default:
+                THROW_IE_EXCEPTION
+                        << "Incorrect 'indices_to_squeeze' input precision. Only FP32 and I32 are supported!";
+        }
+        outShapes.push_back(outShape);
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_strided_slice_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_strided_slice_shape_infer.hpp
new file mode 100644
index 000000000..074010dbc
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_strided_slice_shape_infer.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <algorithm>
+#include <vector>
+#include <shape_infer/const_infer/ie_strided_slice_const_infer.hpp>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for StridedSlice layer
+ */
+class StridedSliceShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit StridedSliceShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        StridedSliceHelper helper(inBlobs, params);
+        outShapes.push_back(helper.getOutputShape());
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_tensor_iterator_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_tensor_iterator_shape_infer.hpp
new file mode 100644
index 000000000..417bbd4ce
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_tensor_iterator_shape_infer.hpp
@@ -0,0 +1,109 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <description_buffer.hpp>
+#include "ie_built_in_impl.hpp"
+#include <shape_infer/ie_reshaper.hpp>
+#include <ie_layers.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for DetectionOutput layer
+ */
+class TensorIteratorShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit TensorIteratorShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void setOriginalLayer(const CNNLayer *layer) {
+        auto ti = dynamic_cast<const TensorIterator*>(layer);
+        if (!ti)
+            THROW_IE_EXCEPTION << "Error during shape infer. Original layer is not TensorIterator.";
+        _original_ti = ti;
+    }
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        TensorIterator ti(lp);
+        ti.params = params;
+        ti.type = _type;
+        ti.body = _original_ti->body;
+        ti.back_edges = _original_ti->back_edges;
+        ti.input_port_map = _original_ti->input_port_map;
+        ti.output_port_map = _original_ti->output_port_map;
+        validate(&ti, inBlobs, params, blobs);
+
+        // TODO: make util function to calculate num of iteration
+        int num_iteration = 1;
+
+        // Prepare input shapes for internal body
+        std::map<std::string, std::vector<size_t>> newInShapes;
+        for (auto &port_map : ti.input_port_map) {
+            int ext_port = port_map.from;
+            int int_port = port_map.to;
+            auto int_name = ti.body.inputs[int_port]->name;
+
+            auto shape = inShapes[ext_port];
+            if (port_map.axis != -1) {
+                int size = shape[port_map.axis];
+                int start = port_map.start < 0
+                        ? port_map.start + size + 1
+                        : port_map.start;
+                int end = port_map.end < 0
+                        ? port_map.end + size + 1
+                        : port_map.end;
+
+                num_iteration = std::abs(end - start) / std::abs(port_map.stride);
+
+                // port with iterating through. Change dimension with iteration
+                shape[port_map.axis] = port_map.part_size;
+            }
+
+            newInShapes[int_name] = shape;
+        }
+
+        // Body shape infer
+        _body_reshaper = std::make_shared<Reshaper>(_original_ti->body.inputs);
+        _body_reshaper->runNoApply(newInShapes);
+
+        outShapes.resize(ti.output_port_map.size());
+        for (auto &port_map : ti.output_port_map) {
+            int ext_port = port_map.from;
+            int int_port = port_map.to;
+            auto &int_out_data = ti.body.outputs[int_port];
+            auto shape = _body_reshaper->getResultShapeFor(int_out_data);
+
+            if (port_map.axis != -1) {
+                // port with iterating through. Change dimension with iteration
+                shape[port_map.axis] *= num_iteration;
+            }
+
+            outShapes[ext_port] = shape;
+        }
+    }
+
+    void apply() {
+        if (!_body_reshaper)
+            THROW_IE_EXCEPTION << "Request of apply reshape results while shape infer was not finished";
+        _body_reshaper->apply();
+    }
+
+
+private:
+    const TensorIterator* _original_ti;
+    std::shared_ptr<Reshaper> _body_reshaper;
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_tile_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_tile_shape_infer.hpp
index ad89d831b..c86654ecb 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_tile_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_tile_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -20,7 +20,7 @@ class TileShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit TileShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -28,7 +28,7 @@ public:
         TileLayer tileLayer(lp);
         tileLayer.params = params;
         tileLayer.type = _type;
-        validate(&tileLayer, inShapes, params, blobs);
+        validate(&tileLayer, inBlobs, params, blobs);
         outShapes.push_back(inShapes[0]);
         outShapes[0][tileLayer.axis] *= tileLayer.tiles;
     }
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_unsqueeze_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_unsqueeze_shape_infer.hpp
new file mode 100644
index 000000000..36dc36702
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_unsqueeze_shape_infer.hpp
@@ -0,0 +1,102 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "ie_built_in_impl.hpp"
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <iostream>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Shape inference for Unsqueeze layer
+ */
+class UnsqueezeShapeProp : public BuiltInShapeInferImpl {
+public:
+    explicit UnsqueezeShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
+
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
+                         const std::map<std::string, std::string>& params,
+                         const std::map<std::string, Blob::Ptr>& blobs,
+                         std::vector<SizeVector>& outShapes) override {
+        LayerParams lp{};
+        UnsqueezeLayer unsqueezeLayer(lp);
+        unsqueezeLayer.params = params;
+        unsqueezeLayer.type = _type;
+        validate(&unsqueezeLayer, inBlobs, params, blobs);
+
+        const size_t UNSQUEEZE_DATA = 0;
+        const size_t UNSQUEEZE_INDEXES = 1;
+
+        SizeVector idx_dims = inBlobs[UNSQUEEZE_INDEXES]->getTensorDesc().getDims();
+        SizeVector data_dims = inBlobs[UNSQUEEZE_DATA]->getTensorDesc().getDims();
+        SizeVector outShape;
+        if (idx_dims.size() > 1)
+            THROW_IE_EXCEPTION << " Index vector should be 1 dimension";
+        if (inBlobs[UNSQUEEZE_INDEXES]->getTensorDesc().getPrecision() != Precision::I32 &&
+            inBlobs[UNSQUEEZE_INDEXES]->getTensorDesc().getPrecision() != Precision::FP32)
+            THROW_IE_EXCEPTION << " Incorrect 'indices_to_squeeze' input precision. Only FP32 and I32 are supported!";
+
+        size_t max = data_dims.size();
+        switch (inBlobs[UNSQUEEZE_INDEXES]->precision()) {
+            case Precision::FP32: {
+                float* idx_data = inBlobs[UNSQUEEZE_INDEXES]->cbuffer().as<float*>() +
+                                  inBlobs[UNSQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+                for (size_t i = 0; i < idx_dims[0]; i++) {
+                    auto axis = static_cast<size_t>(idx_data[i]);
+                    if (axis > max) max = axis;
+                }
+                max++;
+                if ((idx_dims[0] + data_dims.size()) < max) {
+                    THROW_IE_EXCEPTION << "Indices_to_set for unsqueeze layer is out of tensor dimension";
+                }
+                max = inBlobs[UNSQUEEZE_INDEXES]->size() + data_dims.size();
+                for (size_t i = 0, j = 0, k = 0; i < max; i++) {
+                    if (k < inBlobs[UNSQUEEZE_INDEXES]->size() && i == idx_data[k]) {
+                        outShape.push_back(1);
+                        k++;
+                    } else {
+                        outShape.push_back(data_dims[j++]);
+                    }
+                }
+            }
+                break;
+            case Precision::I32: {
+                int32_t* idx_data = inBlobs[UNSQUEEZE_INDEXES]->cbuffer().as<int32_t*>() +
+                                    inBlobs[UNSQUEEZE_INDEXES]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+                max = data_dims.size();
+                for (size_t i = 0; i < idx_dims[0]; i++) {
+                    auto axis = static_cast<size_t>(idx_data[i]);
+                    if (axis > max) max = axis;
+                }
+                max++;
+                if ((idx_dims[0] + data_dims.size()) < max) {
+                    THROW_IE_EXCEPTION << "Indices_to_set for unsqueeze layer is out of tensor dimension";
+                }
+                max = inBlobs[UNSQUEEZE_INDEXES]->size() + data_dims.size();
+                for (size_t i = 0, j = 0, k = 0; i < max; i++) {
+                    if (k < inBlobs[UNSQUEEZE_INDEXES]->size() && i == idx_data[k]) {
+                        outShape.push_back(1);
+                        k++;
+                    } else {
+                        outShape.push_back(data_dims[j++]);
+                    }
+                }
+            }
+            default:
+                THROW_IE_EXCEPTION << "Incorrect 'indices_to_set' input precision. Only FP32 and I32 are supported!";
+        }
+        outShapes.push_back(outShape);
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/ie_upsampling_shape_infer.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/ie_upsampling_shape_infer.hpp
index d74a6b92c..d7dc645ce 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/ie_upsampling_shape_infer.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/ie_upsampling_shape_infer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class UpsamplingShapeProp : public BuiltInShapeInferImpl {
 public:
     explicit UpsamplingShapeProp(const std::string& type) : BuiltInShapeInferImpl(type) {}
 
-    void inferShapesImpl(const std::vector<SizeVector>& inShapes,
+    void inferShapesImpl(const std::vector<Blob::CPtr>& inBlobs,
                          const std::map<std::string, std::string>& params,
                          const std::map<std::string, Blob::Ptr>& blobs,
                          std::vector<SizeVector>& outShapes) override {
@@ -30,9 +30,13 @@ public:
         CNNLayer cnnLayer(lp);
         cnnLayer.params = params;
         cnnLayer.type = _type;
-        validate(&cnnLayer, inShapes, params, blobs);
+        validate(&cnnLayer, inBlobs, params, blobs);
         size_t scale = static_cast<size_t>(cnnLayer.GetParamAsInt("scale"));
-        outShapes.push_back({inShapes[0][0], inShapes[0][1], inShapes[0][2] * scale, inShapes[0][3] * scale});
+        SizeVector out_shapes = {inShapes[0][0], inShapes[0][1]};
+        for (int i = 2; i < inShapes[0].size(); i++) {
+            out_shapes.push_back(inShapes[0][i] * scale);
+        }
+        outShapes.push_back(out_shapes);
     }
 };
 
diff --git a/inference-engine/src/inference_engine/shape_infer/built-in/impl_register.hpp b/inference-engine/src/inference_engine/shape_infer/built-in/impl_register.hpp
index 0c40bd426..9939c8fd1 100644
--- a/inference-engine/src/inference_engine/shape_infer/built-in/impl_register.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/built-in/impl_register.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_add_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_add_const_infer.hpp
new file mode 100644
index 000000000..043b0937f
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_add_const_infer.hpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for TBD layer
+ */
+class AddConstInfer : public ConstInferImpl {
+public:
+    explicit AddConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        size_t numInputs = inData.size();
+        if (inData.size() != 2)
+            THROW_IE_EXCEPTION << "Unsupported number of inputs: " << numInputs << ". 2 inputs is supported";
+        auto* firstBlobBuffer = inData[0]->cbuffer().as<float*>();
+        auto* secondBlobBuffer = inData[1]->cbuffer().as<float*>();
+
+        if (!firstBlobBuffer || !secondBlobBuffer) {
+            THROW_IE_EXCEPTION << "empty input data";
+        }
+        auto outBlob = *outData.begin();
+        auto* outBuffer = outBlob->buffer().as<float*>();
+        if (!outBuffer) THROW_IE_EXCEPTION << "empty output data";
+        if (inData[0]->size() != inData[1]->size()) {
+            THROW_IE_EXCEPTION << "inputs with different shapes are not supported";
+        }
+        for (int i = 0; i < outBlob->size(); i++) {
+            outBuffer[i] = firstBlobBuffer[i] + secondBlobBuffer[i];
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_concat_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_concat_const_infer.hpp
new file mode 100644
index 000000000..d14bdeca9
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_concat_const_infer.hpp
@@ -0,0 +1,59 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_memcpy.h>
+#include "ie_const_infer_impl.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for Tile layer
+ */
+class ConcatConstInfer : public ConstInferImpl {
+public:
+    explicit ConcatConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        LayerParams lp{};
+        ConcatLayer layer(lp);
+        layer.params = params;
+        layer.type = _type;
+        _validator->parseParams(&layer);
+
+        auto outBlob = *outData.begin();
+        SizeVector outShape = outBlob->getTensorDesc().getDims();
+        auto* outBuffer = outBlob->buffer().as<float*>();
+
+        size_t outerSize = 1;
+        for (int i = 0; i < layer._axis; i++)
+            outerSize *= outShape[i];
+
+        size_t outIdx = 0;
+        for (size_t osIdx = 0; osIdx < outerSize; osIdx++) {
+            for (auto& inBlob : inData) {
+                const auto* inBuffer = inBlob->cbuffer().as<float*>();
+                size_t innerSize = inBlob->size() / outerSize;
+
+                for (size_t j = 0; j < innerSize; j++, outIdx++) {
+                    outBuffer[outIdx] = inBuffer[osIdx * innerSize + j];
+                }
+            }
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_const_infer.hpp
new file mode 100644
index 000000000..4ea84b83c
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_const_infer.hpp
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for TBD layer
+ */
+class ConstConstInfer : public ConstInferImpl {
+public:
+    explicit ConstConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+               const std::map<std::string, std::string>& params,
+               const std::map<std::string, Blob::Ptr>& blobs,
+               std::vector<Blob::Ptr>& outData) override {
+        auto it = blobs.find("custom");
+        if (it == blobs.end()) THROW_IE_EXCEPTION << "Missed `custom` blob";
+        // TODO: copy instead of putting pointer?
+        outData[0] = (*it).second;
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_holder.cpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_holder.cpp
new file mode 100644
index 000000000..1e491de04
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_holder.cpp
@@ -0,0 +1,80 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifdef __INTEL_COMPILER
+#pragma warning disable: 2586
+#endif
+
+
+#include "ie_const_infer_holder.hpp"
+#include "ie_mul_const_infer.hpp"
+#include "ie_add_const_infer.hpp"
+#include "ie_div_const_infer.hpp"
+#include "ie_const_const_infer.hpp"
+#include "ie_shape_const_infer.hpp"
+#include "ie_power_const_infer.hpp"
+#include "ie_tile_const_infer.hpp"
+#include "ie_reshape_const_infer.hpp"
+#include "ie_gather_const_infer.hpp"
+#include "ie_split_const_infer.hpp"
+#include "ie_concat_const_infer.hpp"
+#include "ie_in_place_const_infer.hpp"
+#include "ie_strided_slice_const_infer.hpp"
+#include "ie_fill_const_infer.hpp"
+#include "ie_range_const_infer.hpp"
+#include <list>
+#include <memory>
+#include <string>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+ConstInferHolder::ImplsHolder::Ptr ConstInferHolder::GetImplsHolder() {
+    static ImplsHolder::Ptr localHolder;
+    if (localHolder == nullptr) {
+        localHolder = std::make_shared<ImplsHolder>();
+    }
+    return localHolder;
+}
+
+void ConstInferHolder::AddImpl(const std::string& name, const IConstInferImpl::Ptr& impl) {
+    GetImplsHolder()->list[name] = impl;
+}
+
+std::list<std::string> ConstInferHolder::getConstInferTypes() {
+    std::list<std::string> types;
+    auto& factories = GetImplsHolder()->list;
+    for (const auto& factory : factories) {
+        types.push_back(factory.first);
+    }
+    return types;
+}
+
+IConstInferImpl::Ptr ConstInferHolder::getConstInferImpl(const std::string& type) {
+    auto& impls = ConstInferHolder::GetImplsHolder()->list;
+    if (impls.find(type) != impls.end()) {
+        return impls[type];
+    }
+    return nullptr;
+}
+
+REG_CONST_INFER_FOR_TYPE(MulConstInfer, Mul);
+REG_CONST_INFER_FOR_TYPE(AddConstInfer, Add);
+REG_CONST_INFER_FOR_TYPE(DivConstInfer, Div);
+REG_CONST_INFER_FOR_TYPE(ShapeConstInfer, Shape);
+REG_CONST_INFER_FOR_TYPE(ConstConstInfer, Const);
+REG_CONST_INFER_FOR_TYPE(PowerConstInfer, Power);
+REG_CONST_INFER_FOR_TYPE(TileConstInfer, Tile);
+REG_CONST_INFER_FOR_TYPE(ReshapeConstInfer, Reshape);
+REG_CONST_INFER_FOR_TYPE(GatherConstInfer, Gather);
+REG_CONST_INFER_FOR_TYPE(SplitConstInfer, Split);
+REG_CONST_INFER_FOR_TYPE(ConcatConstInfer, Concat);
+REG_CONST_INFER_FOR_TYPE(InPlaceConstInfer, Unsqueeze);
+REG_CONST_INFER_FOR_TYPE(InPlaceConstInfer, Squeeze);
+REG_CONST_INFER_FOR_TYPE(StridedSliceConstInfer, StridedSlice);
+REG_CONST_INFER_FOR_TYPE(FillConstInfer, Fill);
+REG_CONST_INFER_FOR_TYPE(RangeConstInfer, Range);
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_holder.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_holder.hpp
new file mode 100644
index 000000000..ab3ed0377
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_holder.hpp
@@ -0,0 +1,52 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <list>
+#include <map>
+#include <memory>
+
+#include <ie_iextension.h>
+#include "details/caseless.hpp"
+#include <description_buffer.hpp>
+#include "ie_const_infer_impl.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Holder of const infer implementations for build-in IE layers, that plugins support out-of-the-box
+ */
+class INFERENCE_ENGINE_API_CLASS(ConstInferHolder) {
+    struct ImplsHolder {
+        using Ptr = std::shared_ptr<ImplsHolder>;
+        InferenceEngine::details::caseless_map<std::string, IConstInferImpl::Ptr> list;
+    };
+public:
+    std::list<std::string> getConstInferTypes();
+
+    IConstInferImpl::Ptr getConstInferImpl(const std::string& type);
+
+    static void AddImpl(const std::string& name, const IConstInferImpl::Ptr& impl);
+
+private:
+    static ImplsHolder::Ptr GetImplsHolder();
+};
+
+template<typename Impl>
+class ImplRegisterBase {
+public:
+    explicit ImplRegisterBase(const std::string& type) {
+        ConstInferHolder::AddImpl(type, std::make_shared<Impl>(type));
+    }
+};
+
+#define REG_CONST_INFER_FOR_TYPE(__prim, __type) \
+static ImplRegisterBase<__prim> __ci_reg__##__type(#__type)
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_impl.cpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_impl.cpp
new file mode 100644
index 000000000..224b4ed47
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_impl.cpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <map>
+#include <vector>
+#include <string>
+#include "ie_const_infer_impl.hpp"
+
+using namespace InferenceEngine;
+using namespace ShapeInfer;
+
+void ConstInferImpl::infer(const std::vector<Blob::CPtr>& inData,
+                           const std::map<std::string, std::string>& params,
+                           const std::map<std::string, Blob::Ptr>& blobs,
+                           std::vector<Blob::Ptr>& outData) {
+    std::string errorPrefix = "Ref infer error for Layer with `" + _type + "` type: ";
+    if (outData.empty()) THROW_IE_EXCEPTION << errorPrefix + "output data is empty";
+    for (auto const& data : outData) {
+        if (data->buffer() == nullptr) THROW_IE_EXCEPTION << errorPrefix + "output data is not allocated";
+    }
+    // TODO: check for direct (NCHW, NCH, NC) and FP32
+    inferImpl(inData, params, blobs, outData);
+}
+
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_impl.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_impl.hpp
new file mode 100644
index 000000000..6ed1cbba9
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_const_infer_impl.hpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <map>
+#include <vector>
+#include <memory>
+#include <string>
+#include "ie_layer_validators.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ * @experimental
+ * @class IConstInferImpl
+ * @brief This class provides interface for the layer's implementation to propagate const
+ */
+class IConstInferImpl {
+public:
+    using Ptr = std::shared_ptr<IConstInferImpl>;
+
+    virtual ~IConstInferImpl() = default;
+
+
+    /**
+     * @brief all shapes are valid, blobs are allocated
+     *
+     */
+    virtual void infer(const std::vector<Blob::CPtr>& inData,
+                       const std::map<std::string, std::string>& params,
+                       const std::map<std::string, Blob::Ptr>& blobs,
+                       std::vector<Blob::Ptr>& outData) = 0;
+};
+
+class ConstInferImpl : public IConstInferImpl {
+public:
+    explicit ConstInferImpl(const std::string& type) : _type(type) {
+        _validator = details::LayerValidators::getInstance()->getValidator(_type);
+        if (!_validator)
+            THROW_IE_EXCEPTION << "Internal error: failed to find validator for layer with type: " << _type;
+    }
+
+    virtual void inferImpl(const std::vector<Blob::CPtr>& inData,
+                           const std::map<std::string, std::string>& params,
+                           const std::map<std::string, Blob::Ptr>& blobs,
+                           std::vector<Blob::Ptr>& outData) = 0;
+
+    void infer(const std::vector<Blob::CPtr>& inData,
+               const std::map<std::string, std::string>& params,
+               const std::map<std::string, Blob::Ptr>& blobs,
+               std::vector<Blob::Ptr>& outData) override;
+
+protected:
+    std::string _type;
+    // to get parsed descendant CNNLayer from map<string,string>
+    details::LayerValidator::Ptr _validator;
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
+
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_div_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_div_const_infer.hpp
new file mode 100644
index 000000000..e5da59793
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_div_const_infer.hpp
@@ -0,0 +1,51 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for TBD layer
+ */
+class DivConstInfer : public ConstInferImpl {
+public:
+    explicit DivConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        size_t numInputs = inData.size();
+        if (inData.size() != 2)
+            THROW_IE_EXCEPTION << "Unsupported number of inputs: " << numInputs << ". 2 inputs is supported";
+        auto* firstBlobBuffer = inData[0]->cbuffer().as<float*>();
+        auto* secondBlobBuffer = inData[1]->cbuffer().as<float*>();
+
+        if (!firstBlobBuffer || !secondBlobBuffer) {
+            THROW_IE_EXCEPTION << "empty input data";
+        }
+        auto outBlob = *outData.begin();
+        auto* outBuffer = outBlob->buffer().as<float*>();
+        if (!outBuffer) THROW_IE_EXCEPTION << "empty output data";
+        if (inData[0]->size() != inData[1]->size()) {
+            THROW_IE_EXCEPTION << "inputs with different shapes are not supported";
+        }
+        for (int i = 0; i < outBlob->size(); i++) {
+            if (secondBlobBuffer[i] == 0) THROW_IE_EXCEPTION << "division by zero";
+            outBuffer[i] = firstBlobBuffer[i] / secondBlobBuffer[i];
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_fill_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_fill_const_infer.hpp
new file mode 100644
index 000000000..0d2dd7b42
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_fill_const_infer.hpp
@@ -0,0 +1,108 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_memcpy.h>
+#include "ie_const_infer_impl.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for Fill layer
+ */
+class FillConstInfer : public ConstInferImpl {
+public:
+    explicit FillConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        const size_t FILL_DIMS = 0;
+        const size_t FILL_VALUE = 1;
+        if (inData.empty() || outData.empty())
+            THROW_IE_EXCEPTION << " Incorrect number of input/output edges!";
+
+        if (inData.size() != 2)
+            THROW_IE_EXCEPTION << " Incorrect number of input edges!";
+
+        SizeVector dims = inData[FILL_DIMS]->getTensorDesc().getDims();
+        if (dims.size() > 1)
+            THROW_IE_EXCEPTION << " Fill dimensions vector should be 1 dimension";
+
+        if (inData[FILL_DIMS]->getTensorDesc().getPrecision() != Precision::I32)
+            THROW_IE_EXCEPTION << " Fill dimensions vector should be I32!";
+
+        SizeVector value_dims = inData[FILL_VALUE]->getTensorDesc().getDims();
+        if (value_dims.size() > 1)
+            THROW_IE_EXCEPTION << " Value scalar should have 1 dimension";
+
+        if (!(inData[FILL_VALUE]->getTensorDesc().getPrecision() == Precision::I32 &&
+              outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
+            !(inData[FILL_VALUE]->getTensorDesc().getPrecision() == Precision::FP32 &&
+              outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
+            THROW_IE_EXCEPTION <<
+                               " 'Value' input scalars and output tensor should have same precision and only FP32 and I32 are supported!";
+        }
+
+        int32_t* fill_dims = inData[FILL_DIMS]->cbuffer().as<int32_t*>() +
+                             inData[FILL_DIMS]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        size_t fill_size = inData[FILL_DIMS]->getTensorDesc().getDims()[0];
+        SizeVector dst_dims = outData[0]->getTensorDesc().getDims();
+
+        if (dst_dims.size() != fill_size) {
+            THROW_IE_EXCEPTION << "Output tensor dimension mismatch";
+        }
+
+        size_t work_amount_dst = 1;
+        for (size_t i = 0; i < dst_dims.size(); i++) {
+            work_amount_dst *= fill_dims[i];
+            if (static_cast<int>(dst_dims[i]) != fill_dims[i]) {
+                THROW_IE_EXCEPTION << "Output tensor dimension size mismatch";
+            }
+        }
+
+        switch (outData[0]->precision()) {
+            case Precision::FP32: {
+                float* dst_data = outData[0]->cbuffer().as<float*>() +
+                                  outData[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+                float value = (inData[FILL_VALUE]->cbuffer().as<float*>() +
+                               inData[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
+
+                parallel_nt(0, [&](const int ithr, const int nthr) {
+                    size_t start = 0, end = 0;
+                    splitter(work_amount_dst, nthr, ithr, start, end);
+                    std::fill_n(dst_data + start, end - start, value);
+                });
+            }
+                break;
+            case Precision::I32: {
+                int32_t* dst_data = outData[0]->cbuffer().as<int32_t*>() +
+                                    outData[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+                int32_t value = (inData[FILL_VALUE]->cbuffer().as<int32_t*>() +
+                                 inData[FILL_VALUE]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0];
+
+                parallel_nt(0, [&](const int ithr, const int nthr) {
+                    size_t start = 0, end = 0;
+                    splitter(work_amount_dst, nthr, ithr, start, end);
+                    std::fill_n(dst_data + start, end - start, value);
+                });
+            }
+                break;
+            default:
+                THROW_IE_EXCEPTION << "Incorrect output precision. Only FP32 and I32 are supported!";
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_gather_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_gather_const_infer.hpp
new file mode 100644
index 000000000..23c0b7177
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_gather_const_infer.hpp
@@ -0,0 +1,171 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <cmath>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_algorithm.hpp>
+#include "ie_const_infer_impl.hpp"
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+struct GatherParams {
+    size_t dataLength = 1;
+    int axis = 0;
+    size_t indexRange = 0;
+    size_t numDictionaries = 1;
+};
+
+template<typename data_t>
+void
+gather(data_t* src_dataIdx, const Blob::CPtr& indexes, const Blob::CPtr& dictionary, const Blob::Ptr& output,
+       const GatherParams& p) {
+    size_t src_dataIdxSize = indexes->size();
+    size_t dataSize = sizeof(float) * p.dataLength;
+
+    const float* src_dataDict =
+            dictionary->cbuffer().as<const float*>() + dictionary->getTensorDesc().getBlockingDesc().getOffsetPadding();
+    float* dst_data = output->cbuffer().as<float*>() + output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+    src_dataIdx += indexes->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+    if (p.axis == 0) {
+        parallel_for(src_dataIdxSize, [&](size_t i) {
+            int idx = static_cast<int>(src_dataIdx[i]);
+
+            //  Index clipping
+            details::clipping(&idx, 0, p.indexRange);
+
+            //  Copying data to destination from Dictionary
+            ie_memcpy(&dst_data[p.dataLength * i],
+                      output->byteSize() - (p.dataLength * i),
+                      &src_dataDict[p.dataLength * idx],
+                      dataSize);
+        });
+    } else {
+        parallel_for(src_dataIdxSize, [&](size_t i) {
+            int idx = static_cast<int>(src_dataIdx[i]);
+
+            //  Index clipping
+            details::clipping(&idx, 0, p.indexRange);
+
+            //  Copying data to destination from Dictionary
+            for (size_t j = 0; j < p.numDictionaries; j++) {
+                ie_memcpy(&dst_data[p.dataLength * (i + j * src_dataIdxSize)],
+                          output->byteSize() - (p.dataLength * (i + j * src_dataIdxSize)),
+                          &src_dataDict[p.dataLength * (idx + j * p.indexRange)],
+                          dataSize);
+            }
+        });
+    }
+}
+
+/**
+ *@brief Implementation of Const inference for Gather layer
+ */
+class GatherConstInfer : public ConstInferImpl {
+public:
+    explicit GatherConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        LayerParams lp{};
+        CNNLayer layer(lp);
+        layer.params = params;
+
+
+        const size_t GATHER_DICTIONARY = 0;
+        const size_t GATHER_INDEXES = 1;
+
+        if (inData.size() != 2 || outData.empty())
+            THROW_IE_EXCEPTION << " Incorrect number of input/output edges!";
+
+        Precision inIdxPrecision = inData[GATHER_INDEXES]->getTensorDesc().getPrecision();
+        if (inIdxPrecision != Precision::FP32 &&
+            inIdxPrecision != Precision::I32 &&
+            inIdxPrecision != Precision::U16 &&
+            inIdxPrecision != Precision::I16 &&
+            inIdxPrecision != Precision::U8 &&
+            inIdxPrecision != Precision::I8)
+            THROW_IE_EXCEPTION << " Incorrect input precision. Only FP32|I32|U16|I16|U8|I8 are supported!";
+
+        //  Remove redundant dimensions
+        const SizeVector& dictionary_dims = inData[GATHER_DICTIONARY]->getTensorDesc().getDims();
+        size_t actualAxis = 0;
+        SizeVector dims_actual;
+        for (size_t i = 0; i < dictionary_dims.size(); i++) {
+            if (dictionary_dims[i] > 1) {
+                for (size_t j = i; j < dictionary_dims.size(); j++)
+                    dims_actual.push_back(dictionary_dims[j]);
+                break;
+            }
+        }
+
+        if (dims_actual.size() == 0)
+            THROW_IE_EXCEPTION << " Incorrect input parameters dimension!";
+
+        GatherParams p;
+        p.axis = static_cast<int>(layer.GetParamAsInt("axis"));
+        // Dictionary must be at least rank axis + 1
+        if (p.axis > 0 && dims_actual.size() < (1 + p.axis))
+            THROW_IE_EXCEPTION << " Incorrect input parameters dimensions and axis number!";
+        else if (p.axis < 0 && (static_cast<int>(dims_actual.size()) + p.axis) < 0)
+            THROW_IE_EXCEPTION << " Incorrect input parameters dimensions and axis number!";
+
+        if (p.axis < 0)
+            p.axis += dims_actual.size();
+
+        //  Find number of dictionaries, index range and data length
+        for (size_t i = 0; i < p.axis; i++)
+            p.numDictionaries *= dims_actual[i];
+        p.indexRange = dims_actual[p.axis];
+        for (size_t i = p.axis + 1; i < dims_actual.size(); i++)
+            p.dataLength *= dims_actual[i];
+
+        if (p.dataLength == 0)
+            THROW_IE_EXCEPTION << " Incorrect input parameters dimension!";
+
+
+        switch (inData[GATHER_INDEXES]->precision()) {
+            case Precision::FP32:
+                gather(inData[GATHER_INDEXES]->cbuffer().as<const float*>(), inData[GATHER_INDEXES],
+                       inData[GATHER_DICTIONARY], outData[0], p);
+                break;
+            case Precision::I32:
+                gather(inData[GATHER_INDEXES]->cbuffer().as<const int32_t*>(), inData[GATHER_INDEXES],
+                       inData[GATHER_DICTIONARY], outData[0], p);
+                break;
+            case Precision::U16:
+                gather(inData[GATHER_INDEXES]->cbuffer().as<const uint16_t*>(), inData[GATHER_INDEXES],
+                       inData[GATHER_DICTIONARY], outData[0], p);
+                break;
+            case Precision::I16:
+                gather(inData[GATHER_INDEXES]->cbuffer().as<const int16_t*>(), inData[GATHER_INDEXES],
+                       inData[GATHER_DICTIONARY], outData[0], p);
+                break;
+            case Precision::U8:
+                gather(inData[GATHER_INDEXES]->cbuffer().as<const uint8_t*>(), inData[GATHER_INDEXES],
+                       inData[GATHER_DICTIONARY], outData[0], p);
+                break;
+            case Precision::I8:
+                gather(inData[GATHER_INDEXES]->cbuffer().as<const int8_t*>(), inData[GATHER_INDEXES],
+                       inData[GATHER_DICTIONARY], outData[0], p);
+                break;
+            default:
+                THROW_IE_EXCEPTION << " Unsupported precision!";
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_in_place_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_in_place_const_infer.hpp
new file mode 100644
index 000000000..abbcd20b2
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_in_place_const_infer.hpp
@@ -0,0 +1,37 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for Unsqueeze layer
+ */
+class InPlaceConstInfer : public ConstInferImpl {
+public:
+    explicit InPlaceConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        auto inBlob = inData[0];
+        auto outBlob = outData[0];
+        auto* inBuffer = inBlob->cbuffer().as<uint8_t*>();
+        auto* outBuffer = outBlob->buffer().as<uint8_t*>();
+        ie_memcpy(outBuffer, outData[0]->byteSize(), inBuffer, inBlob->byteSize());
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_mul_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_mul_const_infer.hpp
new file mode 100644
index 000000000..37f398f89
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_mul_const_infer.hpp
@@ -0,0 +1,50 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for TBD layer
+ */
+class MulConstInfer : public ConstInferImpl {
+public:
+    explicit MulConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        size_t numInputs = inData.size();
+        if (inData.size() != 2)
+            THROW_IE_EXCEPTION << "Unsupported number of inputs: " << numInputs << ". 2 inputs is supported";
+        auto* firstBlobBuffer = inData[0]->cbuffer().as<float*>();
+        auto* secondBlobBuffer = inData[1]->cbuffer().as<float*>();
+
+        if (!firstBlobBuffer || !secondBlobBuffer) {
+            THROW_IE_EXCEPTION << "empty input data";
+        }
+        auto outBlob = *outData.begin();
+        auto* outBuffer = outBlob->buffer().as<float*>();
+        if (!outBuffer) THROW_IE_EXCEPTION << "empty output data";
+        if (inData[0]->size() != inData[1]->size()) {
+            THROW_IE_EXCEPTION << "inputs with different shapes are not supported";
+        }
+        for (int i = 0; i < outBlob->size(); i++) {
+            outBuffer[i] = firstBlobBuffer[i] * secondBlobBuffer[i];
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_power_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_power_const_infer.hpp
new file mode 100644
index 000000000..d6ce3df3a
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_power_const_infer.hpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <cmath>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include "ie_const_infer_impl.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for TBD layer
+ */
+class PowerConstInfer : public ConstInferImpl {
+public:
+    explicit PowerConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        LayerParams lp{};
+        PowerLayer layer(lp);
+        layer.params = params;
+        layer.type = _type;
+        _validator->parseParams(&layer);
+
+        float scale = layer.scale;
+        float power = layer.power;
+        float shift = layer.offset;
+
+        // TODO: check for access and sizes
+        auto* input = inData[0]->cbuffer().as<float*>();
+        auto* output = outData[0]->buffer().as<float*>();
+        size_t dataSize = inData[0]->size();
+
+        if (power == 1.0f) {
+            for (int i = 0; i < dataSize; i++) {
+                output[i] = input[i] * scale + shift;
+            }
+        } else {
+            for (int i = 0; i < dataSize; i++) {
+                output[i] = pow(input[i] * scale + shift, power);
+            }
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_range_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_range_const_infer.hpp
new file mode 100644
index 000000000..dfdd7f8b7
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_range_const_infer.hpp
@@ -0,0 +1,116 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_memcpy.h>
+#include "ie_const_infer_impl.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for Fill layer
+ */
+class RangeConstInfer : public ConstInferImpl {
+public:
+    explicit RangeConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    template<typename data_t>
+    void range(data_t start, data_t limit, data_t delta, const Blob::Ptr& output) {
+        size_t dst_size = (output->getTensorDesc().getDims())[0];
+        data_t* dst_data = output->cbuffer().as<data_t*>() +
+                           output->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        size_t work_amount_dst = std::floor(std::abs((limit - start) / delta));
+        if (work_amount_dst != dst_size)
+            THROW_IE_EXCEPTION << "Range indexes exceeds data tensor dimension";
+
+        parallel_nt(0, [&](const int ithr, const int nthr) {
+            size_t iwork = 0, end = 0;
+            splitter(work_amount_dst, nthr, ithr, iwork, end);
+            data_t dst_value = start + iwork * delta;
+
+            for (; iwork < end; ++iwork, dst_value += delta) {
+                dst_data[iwork] = dst_value;
+            }
+        });
+    }
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        const size_t RANGE_START = 0;
+        const size_t RANGE_LIMIT = 1;
+        const size_t RANGE_DELTA = 2;
+        if (inData.empty() || outData.empty())
+            THROW_IE_EXCEPTION << " Incorrect number of input/output edges!";
+
+        if (inData.size() != 3)
+            THROW_IE_EXCEPTION << " Incorrect number of input edges!";
+
+        SizeVector start_dims = inData[RANGE_START]->getTensorDesc().getDims();
+        if (start_dims.size() > 1)
+            THROW_IE_EXCEPTION << " Start scalar should have 1 dimension";
+
+        SizeVector limit_dims = inData[RANGE_LIMIT]->getTensorDesc().getDims();
+        if (limit_dims.size() > 1)
+            THROW_IE_EXCEPTION << " Limit scalar should have 1 dimension";
+
+        SizeVector delta_dims = inData[RANGE_DELTA]->getTensorDesc().getDims();
+        if (delta_dims.size() > 1)
+            THROW_IE_EXCEPTION << " Delta scalar should have 1 dimension";
+
+        SizeVector dst_dims = outData[0]->getTensorDesc().getDims();
+        if (dst_dims.size() > 1)
+            THROW_IE_EXCEPTION << " Output vector should have 1 dimension";
+
+        if (!(inData[RANGE_START]->getTensorDesc().getPrecision() == Precision::I32 &&
+              inData[RANGE_LIMIT]->getTensorDesc().getPrecision() == Precision::I32 &&
+              inData[RANGE_DELTA]->getTensorDesc().getPrecision() == Precision::I32 &&
+              outData[0]->getTensorDesc().getPrecision() == Precision::I32) &&
+            !(inData[RANGE_START]->getTensorDesc().getPrecision() == Precision::FP32 &&
+              inData[RANGE_LIMIT]->getTensorDesc().getPrecision() == Precision::FP32 &&
+              inData[RANGE_DELTA]->getTensorDesc().getPrecision() == Precision::FP32 &&
+              outData[0]->getTensorDesc().getPrecision() == Precision::FP32)) {
+            THROW_IE_EXCEPTION <<
+                               " 'Start', 'Limit', 'Delta' input scalars and output tensor should have same precision"
+                               <<
+                               "and only FP32 and I32 are supported!";
+        }
+
+        StatusCode retcode = OK;
+        switch (outData[0]->precision()) {
+            case Precision::FP32: {
+                range((inData[RANGE_START]->cbuffer().as<float*>() +
+                       inData[RANGE_START]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
+                      (inData[RANGE_LIMIT]->cbuffer().as<float*>() +
+                       inData[RANGE_LIMIT]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
+                      (inData[RANGE_DELTA]->cbuffer().as<float*>() +
+                       inData[RANGE_DELTA]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0], outData[0]);
+            }
+                break;
+            case Precision::I32: {
+                range((inData[RANGE_START]->cbuffer().as<int32_t*>() +
+                       inData[RANGE_START]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
+                      (inData[RANGE_LIMIT]->cbuffer().as<int32_t*>() +
+                       inData[RANGE_LIMIT]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0],
+                      (inData[RANGE_DELTA]->cbuffer().as<int32_t*>() +
+                       inData[RANGE_DELTA]->getTensorDesc().getBlockingDesc().getOffsetPadding())[0], outData[0]);
+            }
+                break;
+            default:
+                THROW_IE_EXCEPTION << "Incorrect output precision. Only FP32 and I32 are supported!";
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_reshape_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_reshape_const_infer.hpp
new file mode 100644
index 000000000..71f470bfc
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_reshape_const_infer.hpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_memcpy.h>
+#include "ie_const_infer_impl.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for Tile layer
+ */
+class ReshapeConstInfer : public ConstInferImpl {
+public:
+    explicit ReshapeConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        auto inBlob = *inData.begin();
+        const auto* inBuffer = inBlob->cbuffer().as<uint8_t*>();
+        auto outBlob = *outData.begin();
+        auto* outBuffer = outBlob->buffer().as<uint8_t*>();
+        ie_memcpy(outBuffer, outBlob->byteSize(), inBuffer, inBlob->byteSize());
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_shape_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_shape_const_infer.hpp
new file mode 100644
index 000000000..531104c06
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_shape_const_infer.hpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for TBD layer
+ */
+class ShapeConstInfer : public ConstInferImpl {
+public:
+    explicit ShapeConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        SizeVector inShape = (*inData.begin())->getTensorDesc().getDims();
+        auto outBlob = *outData.begin();
+        if (inShape.size() != outBlob->size()) THROW_IE_EXCEPTION << "Number of shapes don't match size of output";
+        auto* outBuffer = outBlob->buffer().as<float*>();
+        for (int i = 0; i < outBlob->size(); i++) {
+            outBuffer[i] = inShape[i];
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_split_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_split_const_infer.hpp
new file mode 100644
index 000000000..39135b151
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_split_const_infer.hpp
@@ -0,0 +1,58 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_memcpy.h>
+#include "ie_const_infer_impl.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for Tile layer
+ */
+class SplitConstInfer : public ConstInferImpl {
+public:
+    explicit SplitConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        LayerParams lp{};
+        SplitLayer layer(lp);
+        layer.params = params;
+        layer.type = _type;
+        _validator->parseParams(&layer);
+
+        auto inBlob = *inData.begin();
+        SizeVector inShape = inBlob->getTensorDesc().getDims();
+        const auto* inBuffer = inBlob->cbuffer().as<float*>();
+
+        size_t outerSize = 1;
+        for (int i = 0; i < layer._axis; i++)
+            outerSize *= inShape[i];
+
+        for (size_t osIdx = 0; osIdx < outerSize; osIdx++) {
+            for (auto& outBlob : outData) {
+                auto* outBuffer = outBlob->buffer().as<float*>();
+                size_t innerSize = outBlob->size() / outerSize;
+
+                for (size_t j = 0; j < innerSize; j++, inBuffer++) {
+                    outBuffer[osIdx * innerSize + j] = *inBuffer;
+                }
+            }
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_strided_slice_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_strided_slice_const_infer.hpp
new file mode 100644
index 000000000..6aee61da9
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_strided_slice_const_infer.hpp
@@ -0,0 +1,384 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#define NOMINMAX
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <ie_layers.h>
+#include <ie_memcpy.h>
+#include "ie_const_infer_impl.hpp"
+#include "ie_parallel.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+class StridedSliceHelper {
+public:
+    StridedSliceHelper(const std::vector<Blob::CPtr>& inData,
+                       const std::map<std::string, std::string>& params) {
+        LayerParams lp{};
+        CNNLayer layer(lp);
+        layer.params = params;
+
+        src_data = inData[STRIDEDSLICE_DATA]->cbuffer().as<const float*>() +
+                   inData[STRIDEDSLICE_DATA]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        if (inData.size() > 4)
+            THROW_IE_EXCEPTION << " Incorrect number of input/output edges!";
+
+        src_dims = inData[STRIDEDSLICE_DATA]->getTensorDesc().getDims();
+
+        bounds_size = 0;
+        if (inData.size() > 1) {
+            begin_dims = inData[STRIDEDSLICE_BEGIN]->getTensorDesc().getDims();
+            if (inData[STRIDEDSLICE_BEGIN]->getTensorDesc().getPrecision() != Precision::I32)
+                THROW_IE_EXCEPTION << " Incorrect 'begin' input precision. Only I32 is supported!";
+            if (begin_dims.size() > 1)
+                THROW_IE_EXCEPTION << " Begin vector should be 1 dimension";
+            bounds_size = begin_dims[0];
+        }
+
+        if (inData.size() > 2) {
+            end_dims = inData[STRIDEDSLICE_END]->getTensorDesc().getDims();
+            if (inData[STRIDEDSLICE_END]->getTensorDesc().getPrecision() != Precision::I32)
+                THROW_IE_EXCEPTION << " Incorrect 'end' input precision. Only I32 is supported!";
+            if (end_dims.size() > 1)
+                THROW_IE_EXCEPTION << " End vector should be 1 dimension";
+            if (begin_dims[0] != end_dims[0])
+                THROW_IE_EXCEPTION << " Begin vector size should be equal end vectror size";
+        }
+
+        if (inData.size() > 3) {
+            stride_dims = inData[STRIDEDSLICE_STRIDE]->getTensorDesc().getDims();
+            if (inData[STRIDEDSLICE_STRIDE]->getTensorDesc().getPrecision() != Precision::I32)
+                THROW_IE_EXCEPTION << " Incorrect 'strides' input precision. Only I32 is supported!";
+            if (stride_dims.size() > 1)
+                THROW_IE_EXCEPTION << " End vector should be 1 dimension";
+            if (begin_dims[0] != stride_dims[0])
+                THROW_IE_EXCEPTION << " Stride vector size should be equal begin vectror size";
+        }
+
+        std::string::size_type i;
+        std::string begin_mask_str = layer.GetParamAsString("begin_mask", "");
+        for (i = 0; i < begin_mask_str.size(); ++i) {
+            if (begin_mask_str[i] == '1') begin_mask.push_back(1);
+            else if (begin_mask_str[i] == '0') begin_mask.push_back(0);
+        }
+        for (; i < src_dims.size(); ++i) begin_mask.push_back(1);
+
+        std::string end_mask_str = layer.GetParamAsString("end_mask", "");
+        for (i = 0; i < end_mask_str.size(); ++i) {
+            if (end_mask_str[i] == '1') end_mask.push_back(1);
+            else if (end_mask_str[i] == '0') end_mask.push_back(0);
+        }
+        for (; i < src_dims.size(); ++i) end_mask.push_back(1);
+
+        std::string ellipsis_mask_str = layer.GetParamAsString("ellipsis_mask", "");
+        size_t ellipsis_mask_counter = 0;
+        for (i = 0; i < ellipsis_mask_str.size(); ++i) {
+            if (ellipsis_mask_str[i] == '1') {
+                ellipsis_mask_counter++;
+                ellipsis_mask.push_back(1);
+            } else if (ellipsis_mask_str[i] == '0') {
+                ellipsis_mask.push_back(0);
+            }
+        }
+        if (ellipsis_mask_counter > 1)
+            THROW_IE_EXCEPTION << " 'Ellipsis_mask' must be a power of two (only one ellipsis)!";
+        for (; i < src_dims.size(); ++i) ellipsis_mask.push_back(0);
+
+        std::string new_axis_mask_str = layer.GetParamAsString("new_axis_mask", "");
+        for (i = 0; i < new_axis_mask_str.size(); ++i) {
+            if (new_axis_mask_str[i] == '1') new_axis_mask.push_back(1);
+            else if (new_axis_mask_str[i] == '0') new_axis_mask.push_back(0);
+        }
+        for (; i < src_dims.size(); ++i) new_axis_mask.push_back(0);
+
+        std::string shrink_axis_mask_str = layer.GetParamAsString("shrink_axis_mask", "");
+        for (i = 0; i < shrink_axis_mask_str.size(); ++i) {
+            if (shrink_axis_mask_str[i] == '1') shrink_axis_mask.push_back(1);
+            else if (shrink_axis_mask_str[i] == '0') shrink_axis_mask.push_back(0);
+        }
+        for (; i < src_dims.size(); ++i) shrink_axis_mask.push_back(0);
+
+        int new_axis = 0;
+        for (auto& na : new_axis_mask)
+            new_axis += na;
+
+        shrink_axis = 0;
+        for (auto& sa : shrink_axis_mask)
+            shrink_axis += sa;
+        max_dims = src_dims.size() + new_axis;
+
+        //  ellipsis_mask must be a power of two (only one ellipsis), so to take a first position
+        ellipsis_pos1 = ellipsis_pos2 = max_dims;
+        for (i = 0; i < ellipsis_mask.size(); i++) {
+            if (ellipsis_mask[i] > 0) {
+                ellipsis_pos1 = i;
+                break;
+            }
+        }
+        bounds_size -= ellipsis_pos1;
+        if (bounds_size > 0 && (max_dims - bounds_size) > ellipsis_pos1)
+            ellipsis_pos2 = max_dims - bounds_size;
+
+        begin_dms.assign(max_dims, 0);
+        end_dms.assign(max_dims, -1);
+        stride_dms.assign(max_dims, 1);
+
+        srcStrides = inData[STRIDEDSLICE_DATA]->getTensorDesc().getBlockingDesc().getStrides();
+
+        int* begin = nullptr, * end = nullptr, * stride = nullptr;
+        if (begin_dims.size())
+            begin = inData[STRIDEDSLICE_BEGIN]->cbuffer().as<int*>() +
+                    inData[STRIDEDSLICE_BEGIN]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        if (end_dims.size())
+            end = inData[STRIDEDSLICE_END]->cbuffer().as<int*>() +
+                  inData[STRIDEDSLICE_END]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+        if (stride_dims.size())
+            stride = inData[STRIDEDSLICE_STRIDE]->cbuffer().as<int*>() +
+                     inData[STRIDEDSLICE_STRIDE]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        int j, k, bj, ej, sj;
+        for (i = 0, j = 0, k = 0, bj = 0, ej = 0, sj = 0; i < max_dims; i++) {
+            if (i >= ellipsis_pos1 && i < ellipsis_pos2) {
+                if (new_axis_mask.size() > i && new_axis_mask[i] == 1)
+                    end_dms[i] = 0;
+                else
+                    end_dms[i] = end_dms[i] >= 0 ? end_dms[i] : src_dims[j++] + end_dms[i];
+
+                out_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) /
+                                                         static_cast<float>(abs(stride_dms[i])))));
+                our_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) /
+                                                         static_cast<float>(abs(stride_dms[i])))));
+                k = ellipsis_pos1;
+            } else {
+                stride_dms[i] = (stride != nullptr && stride_dims[0] > sj && stride[sj] != 0) ? stride[sj++] : 1;
+
+                if (begin_mask.size() > j && begin_mask[j] == 0)
+                    begin_dms[i] = stride_dms[i] > 0 ? 0 : -1;
+                else
+                    begin_dms[i] = (begin != nullptr && begin_dims[0] > bj) ? begin[bj] : (stride_dms[i] > 0 ? 0 : -1);
+                bj++;
+                begin_dms[i] = begin_dms[i] >= 0 ? begin_dms[i] : src_dims[j] + begin_dms[i];
+                //  Clipping 'begin'
+                details::clipping(&begin_dms[i], 0, src_dims[j]);
+
+                if (end_mask.size() > j && end_mask[j] == 0) {
+                    end_dms[i] = stride_dms[i] > 0 ? -1 : 0;
+                } else {
+                    int end_dms_tmp = (end != nullptr && end_dims[0] > ej) ? (stride_dms[i] > 0 ? end[ej] - 1 : end[ej] + 1)
+                                                                     : end_dms[i];
+                    end_dms[i] = (end != nullptr && end_dims[0] > ej) ? end_dms_tmp : (stride_dms[i] > 0 ? -1 : 0);
+                }
+                ej++;
+                end_dms[i] = end_dms[i] >= 0 ? end_dms[i] : src_dims[j] + end_dms[i];
+                //  Clipping 'end'
+                details::clipping(&end_dms[i], 0, src_dims[j]);
+
+                if (new_axis_mask.size() > i && new_axis_mask[i] == 1)
+                    end_dms[i] = 0;
+                else
+                    j++;
+
+                if (shrink_axis_mask.size() > k && shrink_axis_mask[k] == 1)
+                    end_dms[i] = begin_dms[i];
+                else
+                    out_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) /
+                                                             static_cast<float>(abs(stride_dms[i])))));
+
+                our_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) /
+                                                         static_cast<float>(abs(stride_dms[i])))));
+                k++;
+            }
+        }
+    }
+
+    SizeVector getOutputShape() {
+        return out_dims;
+    }
+
+    void infer(std::vector<Blob::Ptr>& outData) {
+        dst_dims = outData[0]->getTensorDesc().getDims();
+        size_t range = out_dims.size() < dst_dims.size() ? out_dims.size() : dst_dims.size();
+        for (int i = 0; i < range; i++) {
+            if (out_dims[i] != dst_dims[i])
+                THROW_IE_EXCEPTION << "parameter mismatch";
+        }
+        dstStrides = outData[0]->getTensorDesc().getBlockingDesc().getStrides();
+        if (outData.size() != 1)
+            THROW_IE_EXCEPTION << " Incorrect number of input/output edges!";
+        float* dst_data = outData[0]->cbuffer().as<float*>() +
+                          outData[0]->getTensorDesc().getBlockingDesc().getOffsetPadding();
+
+        if (src_dims.size() == max_dims && shrink_axis == 0 && stride_dms[stride_dms.size() - 1] == 1 &&
+            stride_dms.size() > 1)
+            strided_slice_vp(src_data, dst_data);
+        else if (src_dims.size() == max_dims && shrink_axis == 0)
+            strided_slice_p(src_data, dst_data);
+        else
+            strided_slice(src_data, dst_data, our_dims);
+    }
+
+private:
+    void strided_slice(const float* src_data, float* dst_data, std::vector<size_t>& dims) {
+        size_t i;
+        int j;
+        size_t work_amount_dst = dstStrides[0] * dst_dims[0];
+        SizeVector counters(max_dims, 0);
+
+        for (size_t iwork = 0; iwork < work_amount_dst; ++iwork) {
+            int src_idx = 0;
+            for (i = 0, j = 0; i < max_dims; ++i) {
+                src_idx += (begin_dms[i] + counters[i] * stride_dms[i]) * srcStrides[j];
+                if (!(new_axis_mask.size() > i && new_axis_mask[i] == 1)) j++;
+            }
+
+            dst_data[iwork] = src_data[src_idx];
+
+            for (j = max_dims - 1; j >= 0; j--) {
+                counters[j]++;
+                if (counters[j] < dims[j])
+                    break;
+                else
+                    counters[j] = 0;
+            }
+        }
+    }
+
+    void strided_slice_vp(const float* src_data, float* dst_data) {
+        //  Vectorized copy
+        size_t dims_size_1 = dst_dims.size() - 1;
+        size_t dataLength = dst_dims[dims_size_1];
+        size_t work_amount_dst = dstStrides[0] * dst_dims[0] / dst_dims[dims_size_1];
+
+        parallel_nt(0, [&](const int ithr, const int nthr) {
+            size_t i, start = 0, end = 0;
+            SizeVector counters(dims_size_1, 0);
+            splitter(work_amount_dst, nthr, ithr, start, end);
+            int src_idx = begin_dms[dims_size_1];
+            for (int j = dims_size_1 - 1, i = start; j >= 0; j--) {
+                counters[j] = i % dst_dims[j];
+                src_idx += (begin_dms[j] + counters[j] * stride_dms[j]) * srcStrides[j];
+                i /= dst_dims[j];
+            }
+
+            for (size_t iwork = start, dst_idx = start * dataLength, i = 1;
+                 iwork < end; ++iwork, dst_idx += dataLength) {
+                memcpy(&dst_data[dst_idx], &src_data[src_idx], sizeof(float) * dataLength);
+                for (int j = dims_size_1 - 1; j >= 0; j--) {
+                    counters[j]++;
+                    if (counters[j] < dst_dims[j]) {
+                        src_idx += stride_dms[j] * srcStrides[j];
+                        break;
+                    } else {
+                        counters[j] = i = 0;
+                    }
+                }
+                if (!i) {
+                    for (src_idx = begin_dms[dims_size_1]; i < dims_size_1; ++i)
+                        src_idx += (begin_dms[i] + counters[i] * stride_dms[i]) * srcStrides[i];
+                }
+            }
+        });
+    }
+
+    void strided_slice_p(const float* src_data, float* dst_data) {
+        size_t dims_size = dst_dims.size();
+        size_t work_amount_dst = dstStrides[0] * dst_dims[0];
+
+        parallel_nt(0, [&](const int ithr, const int nthr) {
+            size_t i, start = 0, end = 0;
+            SizeVector counters(dims_size, 0);
+            splitter(work_amount_dst, nthr, ithr, start, end);
+            int src_idx = 0;
+            for (int j = dims_size - 1, i = start; j >= 0; j--) {
+                counters[j] = i % dst_dims[j];
+                src_idx += (begin_dms[j] + counters[j] * stride_dms[j]) * srcStrides[j];
+                i /= dst_dims[j];
+            }
+
+            for (size_t iwork = start, dst_idx = start, i = 1; iwork < end; ++iwork, dst_idx++) {
+                dst_data[dst_idx] = src_data[src_idx];
+                for (int j = dims_size - 1; j >= 0; j--) {
+                    counters[j]++;
+                    if (counters[j] < dst_dims[j]) {
+                        src_idx += stride_dms[j] * srcStrides[j];
+                        break;
+                    } else {
+                        counters[j] = i = 0;
+                    }
+                }
+                if (!i) {
+                    for (src_idx = 0; i < dims_size; ++i)
+                        src_idx += (begin_dms[i] + counters[i] * stride_dms[i]) * srcStrides[i];
+                }
+            }
+        });
+    }
+
+private:
+    const size_t STRIDEDSLICE_DATA = 0;
+    const size_t STRIDEDSLICE_BEGIN = 1;
+    const size_t STRIDEDSLICE_END = 2;
+    const size_t STRIDEDSLICE_STRIDE = 3;
+
+    SizeVector begin_dims;
+    SizeVector end_dims;
+    SizeVector stride_dims;
+
+    SizeVector begin_mask;
+    SizeVector end_mask;
+    SizeVector ellipsis_mask;
+    SizeVector new_axis_mask;
+    SizeVector shrink_axis_mask;
+    int shrink_axis;
+
+    SizeVector src_dims;
+    SizeVector dst_dims;
+    std::vector<int> begin_dms;
+    std::vector<int> end_dms;
+    std::vector<int> stride_dms;
+    SizeVector srcStrides;
+    SizeVector dstStrides;
+    size_t bounds_size;
+    size_t max_dims;
+    size_t ellipsis_pos1, ellipsis_pos2;
+
+    InferenceEngine::SizeVector out_dims;
+    InferenceEngine::SizeVector our_dims;
+    const float* src_data;
+};
+
+/**
+ *@brief Implementation of Const inference for Tile layer
+ */
+class StridedSliceConstInfer : public ConstInferImpl {
+public:
+    explicit StridedSliceConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        LayerParams lp{};
+        StridedSliceLayer layer(lp);
+        layer.params = params;
+        layer.type = _type;
+        _validator->parseParams(&layer);
+
+        StridedSliceHelper helper(inData, params);
+        helper.infer(outData);
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/const_infer/ie_tile_const_infer.hpp b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_tile_const_infer.hpp
new file mode 100644
index 000000000..3147a45ee
--- /dev/null
+++ b/inference-engine/src/inference_engine/shape_infer/const_infer/ie_tile_const_infer.hpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_blob.h>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <ie_layers.h>
+#include <ie_memcpy.h>
+#include "ie_const_infer_impl.hpp"
+
+namespace InferenceEngine {
+namespace ShapeInfer {
+
+/**
+ *@brief Implementation of Const inference for Tile layer
+ */
+class TileConstInfer : public ConstInferImpl {
+public:
+    explicit TileConstInfer(const std::string& type) : ConstInferImpl(type) {}
+
+    void inferImpl(const std::vector<Blob::CPtr>& inData,
+                   const std::map<std::string, std::string>& params,
+                   const std::map<std::string, Blob::Ptr>& blobs,
+                   std::vector<Blob::Ptr>& outData) override {
+        LayerParams lp{};
+        TileLayer layer(lp);
+        layer.params = params;
+        layer.type = _type;
+        _validator->parseParams(&layer);
+
+        auto inBlob = *inData.begin();
+        SizeVector inShape = inBlob->getTensorDesc().getDims();
+        const auto* inBuffer = inBlob->cbuffer().as<float*>();
+
+        auto outBlob = *outData.begin();
+        auto* outBuffer = outBlob->buffer().as<float*>();
+
+        int m_outer_dim = 1;
+        int m_inner_dim = 1;
+
+        for (int i = 0; i < layer.axis; i++) m_outer_dim *= inShape[i];
+        for (int i = layer.axis; i < inShape.size(); i++) m_inner_dim *= inShape[i];
+
+        for (int i = 0; i < m_outer_dim; ++i) {
+            for (int t = 0; t < layer.tiles; ++t) {
+                ie_memcpy(outBuffer, outBlob->byteSize(), inBuffer, m_inner_dim * sizeof(float));
+                outBuffer += m_inner_dim;
+            }
+            inBuffer += m_inner_dim;
+        }
+    }
+};
+
+}  // namespace ShapeInfer
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/shape_infer/ie_reshape_io_controllers.cpp b/inference-engine/src/inference_engine/shape_infer/ie_reshape_io_controllers.cpp
index fafd651e7..ed1d3375b 100644
--- a/inference-engine/src/inference_engine/shape_infer/ie_reshape_io_controllers.cpp
+++ b/inference-engine/src/inference_engine/shape_infer/ie_reshape_io_controllers.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -7,6 +7,7 @@
 #include <vector>
 #include <ie_layers.h>
 #include <ie_layer_validators.hpp>
+#include <blob_factory.hpp>
 #include "shape_infer/ie_reshape_io_controllers.hpp"
 
 using namespace InferenceEngine;
@@ -28,6 +29,8 @@ InputController::InputController(const std::vector<DataPtr>& dataVec, const std:
             _dataNames.push_back(data->name);
             SizeVector dims = data->getTensorDesc().getDims();
             _irShapes.push_back(dims);
+            // TODO probably need to create blobs with dimensions, not on getBlobs stage
+            _inferedData.push_back(nullptr);
         }
     }
     _shapes = _irShapes;
@@ -38,6 +41,11 @@ void InputController::setShapeByName(const SizeVector& shape, const std::string&
     _shapes[pos] = shape;
 }
 
+SizeVector InputController::getShapeByName(const std::string& dataName) {
+    long pos = getPositionByName(dataName);
+    return _shapes[pos];
+}
+
 std::vector<SizeVector> InputController::getShapes(bool check) {
     if (check) checkCorrespondence();
     return _shapes;
@@ -57,9 +65,6 @@ void InputController::checkCorrespondence() {
                            << ") doesn't match with number of shapes(" << _shapes.size() << ") for layer '"
                            << _layerName << "'!";
     }
-    for (const auto& shape : _shapes) {
-        if (shape.empty()) THROW_IE_EXCEPTION << "ReshapeLauncher error: shape is not set";
-    }
     // TODO: iterate and check for emptiness and size matching
 }
 
@@ -93,6 +98,34 @@ void InputController::setShapeByIndex(const SizeVector& shape, size_t index) {
     _shapes[index] = shape;
 }
 
+bool InputController::isDataAvailable() {
+    if (_inferedData.empty()) return false;
+    for (const auto& data : _inferedData) {
+        if (!data) return false;
+        else if (data->cbuffer() == nullptr) return false;
+    }
+    return true;
+}
+
+std::vector<Blob::CPtr> InputController::getBlobs(bool check) {
+    if (check) checkCorrespondence();
+    for (int i = 0; i < _dataVec.size(); i++) {
+        if (_inferedData[i] == nullptr || _inferedData[i]->cbuffer() == nullptr) {
+            TensorDesc desc = _dataVec[i]->getTensorDesc();
+            desc.setDims(_shapes[i]);
+            // special case of Shape layer: no input data, but blob contains info about dimensions, layout and etc...
+            auto blob = make_blob_with_precision(desc);
+            _inferedData[i] = blob;
+        }
+    }
+    return _inferedData;
+}
+
+void InputController::setBlobByName(const Blob::CPtr& blob, const std::string& dataName) {
+    long pos = getPositionByName(dataName);
+    _inferedData[pos] = blob;
+}
+
 OutputController::OutputController(const std::vector<DataPtr>& data, const std::string& layerName,
                                    const DefaultChecker::Ptr& checker)
         : InputController(data, layerName, checker) {}
@@ -120,6 +153,49 @@ void OutputController::propagateShapes(const std::set<ReshapeLauncher::Ptr>& lau
     }
 }
 
+// Combine with propagate shapes
+void OutputController::propagateBlobs(const std::set<ReshapeLauncher::Ptr>& launchers) {
+    unsigned idx = 0;
+    for (auto const& outData : _dataVec) {
+        for (auto const& inputTo : outData->inputTo) {
+            CNNLayerPtr layer = inputTo.second;
+            if (layer == nullptr) {
+                THROW_IE_EXCEPTION << "Failed to propagate shapes for layer (" << inputTo.first
+                                   << "): connected layer is null";
+            }
+            auto layerName = layer->name;
+            auto foundLauncher = std::find_if(launchers.begin(), launchers.end(),
+                                              [&layerName](const ReshapeLauncher::Ptr& launcher) {
+                                                  return launcher->getLayerName() == layerName;
+                                              });
+            if (foundLauncher == launchers.end())
+                THROW_IE_EXCEPTION << "Failed to find ReshapeLauncher for layer: '" << layerName << "'";
+            (*foundLauncher)->setBlobByName(_inferedData[idx], outData->name);
+        }
+        idx++;
+    }
+}
+
 void OutputController::setShapes(const std::vector<SizeVector>& shapes) {
     _shapes = shapes;
 }
+
+void OutputController::setBlobs(const std::vector<Blob::Ptr>& blobs) {
+    _inferedData.clear();
+    for (const auto& blob : blobs) {
+        _inferedData.push_back(blob);
+    }
+}
+
+std::vector<Blob::Ptr> OutputController::createBlobs() {
+    std::vector<Blob::Ptr> blobs;
+    for (int i = 0; i < _dataVec.size(); i++) {
+        TensorDesc desc = _dataVec[i]->getTensorDesc();
+        desc.setDims(_shapes[i]);
+        auto blob = make_blob_with_precision(desc);
+        blob->allocate();
+        blobs.push_back(blob);
+    }
+    return blobs;
+}
+
diff --git a/inference-engine/src/inference_engine/shape_infer/ie_reshape_io_controllers.hpp b/inference-engine/src/inference_engine/shape_infer/ie_reshape_io_controllers.hpp
index c553a73a4..f6d104405 100644
--- a/inference-engine/src/inference_engine/shape_infer/ie_reshape_io_controllers.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/ie_reshape_io_controllers.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -54,6 +54,11 @@ public:
     virtual void setShapeByName(const SizeVector& shape, const std::string& dataName);
 
     /**
+     * @brief Return calculated shape for name.
+     */
+    virtual SizeVector getShapeByName(const std::string& dataName);
+
+    /**
      * @brief Set shape for current reshape launcher by corresponding index.
      * @param shape - shape to be set
      * @param index - shape's index
@@ -95,6 +100,12 @@ public:
 
     virtual void checkCorrespondence();
 
+    virtual bool isDataAvailable();
+
+    virtual std::vector<Blob::CPtr> getBlobs(bool check);
+
+    virtual void setBlobByName(const Blob::CPtr& blob, const std::string& name);
+
 private:
     long getPositionByName(const std::string& dataName);
 
@@ -104,6 +115,7 @@ protected:
     std::vector<SizeVector> _irShapes;
     std::vector<std::string> _dataNames;
     std::string _layerName;
+    std::vector<Blob::CPtr> _inferedData;
 };
 
 /**
@@ -122,6 +134,12 @@ public:
     virtual void propagateShapes(const std::set<ReshapeLauncher::Ptr>& launchers);
 
     virtual void setShapes(const std::vector<SizeVector>& shapes);
+
+    virtual void setBlobs(const std::vector<Blob::Ptr>& blobs);
+
+    std::vector<Blob::Ptr> createBlobs();
+
+    void propagateBlobs(const std::set<ReshapeLauncher::Ptr>& set);
 };
 
 }  // namespace ShapeInfer
diff --git a/inference-engine/src/inference_engine/shape_infer/ie_reshape_launcher.cpp b/inference-engine/src/inference_engine/shape_infer/ie_reshape_launcher.cpp
index c2651a0f0..d64c3bbbb 100644
--- a/inference-engine/src/inference_engine/shape_infer/ie_reshape_launcher.cpp
+++ b/inference-engine/src/inference_engine/shape_infer/ie_reshape_launcher.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -10,8 +10,12 @@
 #include <map>
 #include <set>
 #include <details/ie_exception.hpp>
+#include <shape_infer/const_infer/ie_const_infer_holder.hpp>
 #include "shape_infer/ie_reshape_launcher.hpp"
 #include "shape_infer/ie_reshape_io_controllers.hpp"
+#include "ie_reshape_launcher.hpp"
+
+#include "built-in/ie_tensor_iterator_shape_infer.hpp"
 
 using namespace InferenceEngine;
 using namespace ShapeInfer;
@@ -35,8 +39,10 @@ OutputController* DefaultInitializer::createOutputController(const CNNLayer* lay
 }
 
 ReshapeLauncher::ReshapeLauncher(const CNNLayer* layer, const IShapeInferImpl::Ptr& impl,
-                                 const DefaultInitializer::Ptr& initializer) : _layer(layer), _impl(impl) {
+                                 const DefaultInitializer::Ptr& initializer) : _layer(layer), _reshapeImpl(impl) {
     initializer->check(layer, impl);
+    ConstInferHolder holder;
+    if (layer) _inferImpl = holder.getConstInferImpl(layer->type);
     try {
         _iController = initializer->createInputController(layer);
         _oController = initializer->createOutputController(layer);
@@ -59,13 +65,37 @@ void ReshapeLauncher::setShapeByName(const SizeVector& shape, const std::string&
     _iController->setShapeByName(shape, dataName);
 }
 
+void ReshapeLauncher::setBlobByName(const Blob::CPtr& blob, const std::string& dataName) {
+    _iController->setBlobByName(blob, dataName);
+}
+
+SizeVector ReshapeLauncher::getShapeByName(const std::string& dataName) {
+    return _oController->getShapeByName(dataName);
+}
+
 void ReshapeLauncher::reshape(const std::set<ReshapeLauncher::Ptr>& launchers) {
     ResponseDesc resp;
     std::vector<SizeVector> outShapes;
-    auto sts = _impl->inferShapes(_iController->getShapes(true), _layer->params, _layer->blobs, outShapes, &resp);
+
+    // TODO: TensorIterator strongly required original layer instance because body is not presented
+    //       in params map. Original subnetwork body is required for internal shape infer
+    TensorIteratorShapeProp *TI_shaper = dynamic_cast<TensorIteratorShapeProp*>(_reshapeImpl.get());
+    if (TI_shaper) {
+        TI_shaper->setOriginalLayer(_layer);
+    }
+
+    // try to call new API with input blobs
+    auto sts = _reshapeImpl->inferShapes(_iController->getBlobs(true), _layer->params, _layer->blobs, outShapes, &resp);
+    // in case of old custom shape infer function call old API
+    if (sts == NOT_IMPLEMENTED) {
+        sts = _reshapeImpl->inferShapes(_iController->getShapes(true), _layer->params, _layer->blobs, outShapes,
+                                        &resp);
+    }
     _oController->setShapes(outShapes);
     if (sts != OK)
-        THROW_IE_EXCEPTION << resp.msg;
+        THROW_IE_EXCEPTION <<
+                           "Failed to infer shapes for " + _layer->type + " layer (" + _layer->name + ") with error: " +
+                           resp.msg;
     _oController->propagateShapes(launchers);
 }
 
@@ -73,6 +103,23 @@ void ReshapeLauncher::applyChanges(CNNLayer* layer) {
     checkLayer(layer);
     _iController->applyChanges();
     _oController->applyChanges();
+
+    // TODO: Need to finalize result of internal body shape infer and apply
+    //       new shapes to body subnetwork
+    TensorIteratorShapeProp *TI_shaper = dynamic_cast<TensorIteratorShapeProp*>(_reshapeImpl.get());
+    if (TI_shaper) TI_shaper->apply();
+}
+
+void ReshapeLauncher::constInfer(const std::set<ReshapeLauncher::Ptr>& launchers) {
+    if (_iController->isDataAvailable() || _layer->type == "Const" || _layer->type == "Shape") {
+        auto outBlobs = _oController->createBlobs();
+        _oController->setBlobs(outBlobs);
+        if (!_inferImpl)
+            THROW_IE_EXCEPTION << "Failed to find reference implementation for `"
+                                  + _layer->name + "` Layer with `" + _layer->type + "` Type on constant propagation";
+        _inferImpl->infer(_iController->getBlobs(false), _layer->params, _layer->blobs, outBlobs);
+        _oController->propagateBlobs(launchers);
+    }
 }
 
 void ReshapeLauncher::reset() {
@@ -106,7 +153,7 @@ void ReshapeLauncher::setIRShapeByName(const std::string& dataName) {
 }
 
 void ReshapeLauncher::setShapeInferImpl(const IShapeInferImpl::Ptr& impl) {
-    _impl = impl;
+    _reshapeImpl = impl;
 }
 
 const CNNLayer* ReshapeLauncher::getLayer() const {
@@ -178,6 +225,10 @@ void OutputOnlyReshapeLauncher::setShapeByName(const SizeVector& shape, const st
     _oController->setShapeByName(shape, dataName);
 }
 
+void OutputOnlyReshapeLauncher::setBlobByName(const Blob::CPtr& blob, const std::string& dataName) {
+    _oController->setBlobByName(blob, dataName);
+}
+
 void OutputOnlyReshapeLauncher::setIRShapeByName(const std::string& dataName) {
     SizeVector foundShape = _oController->getIRShapeByName(dataName);
     _oController->setShapeByName(foundShape, dataName);
@@ -192,6 +243,23 @@ void OutputOnlyReshapeLauncher::reset() {
     _oController->reset();
 }
 
+void OutputOnlyReshapeLauncher::constInfer(const std::set<ReshapeLauncher::Ptr>& launchers) {
+    if (_layer->type == "Const") {
+        auto outBlobs = _oController->createBlobs();
+        _oController->setBlobs(outBlobs);
+        if (!_inferImpl)
+            THROW_IE_EXCEPTION << "Failed to find reference implementation for `"
+                                  + _layer->name + "` Layer with `" + _layer->type + "` Type on constant propagation";
+        _inferImpl->infer({}, _layer->params, _layer->blobs, outBlobs);
+        auto shapes = _oController->getShapes(true);
+        for (int i = 0; i < outBlobs.size(); i++) {
+            outBlobs[i]->Reshape(SizeVector(shapes[i].rbegin(), shapes[i].rend()), TensorDesc::getLayoutByDims(shapes[i]));
+        }
+        _oController->setBlobs(outBlobs);
+        _oController->propagateBlobs(launchers);
+    }
+}
+
 void InputInitializer::check(const CNNLayer* layer, const IShapeInferImpl::Ptr& impl) {
     OutputOnlyInitializer::check(layer, impl);
     std::string errorBase = "Failed to init reshape launcher: layer type (`" + layer->type + "`) is not";
@@ -263,9 +331,6 @@ OutMemoryReshapeLauncher::OutMemoryReshapeLauncher(const CNNLayer* layer, const
         : ReshapeLauncher(layer, impl, std::make_shared<OutMemoryInitializer>()) {
 }
 
-void OutMemoryReshapeLauncher::reshape(const std::set<ReshapeLauncher::Ptr>& launchers) {
-}
-
 void OutMemoryReshapeLauncher::applyChanges(CNNLayer* layer) {
     checkLayer(layer);
     _iController->applyChanges();
diff --git a/inference-engine/src/inference_engine/shape_infer/ie_reshape_launcher.hpp b/inference-engine/src/inference_engine/shape_infer/ie_reshape_launcher.hpp
index 5a9de537f..28083c612 100644
--- a/inference-engine/src/inference_engine/shape_infer/ie_reshape_launcher.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/ie_reshape_launcher.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -12,6 +12,7 @@
 #include <memory>
 
 #include <ie_layers.h>
+#include "shape_infer/const_infer/ie_const_infer_impl.hpp"
 #include "shape_infer/built-in/ie_built_in_holder.hpp"
 
 namespace InferenceEngine {
@@ -60,6 +61,14 @@ public:
      */
     virtual void setShapeByName(const SizeVector& shape, const std::string& dataName);
 
+    virtual void setBlobByName(const Blob::CPtr& blob, const std::string& dataName);
+
+    /**
+     * @brief Return calculated shape for data with requested name.
+     * @return Result shape
+     */
+    virtual SizeVector getShapeByName(const std::string& dataName);
+
     /**
      * @brief Set input shape from IR by Data name. If there's no Data with given name it throws exception
      * @param dataName - name of the corresponding Data.
@@ -74,6 +83,8 @@ public:
      */
     virtual void reshape(const std::set<ReshapeLauncher::Ptr>& launchers);
 
+    virtual void constInfer(const std::set<ReshapeLauncher::Ptr>& launchers);
+
     /**
      * @brief Apply new input shapes, calculated output shapes and changed layer's params to CNNLayer and Data.
      * @param layer - pointer to the layer for setting changes in layer's params
@@ -86,7 +97,6 @@ public:
      */
     virtual void reset();
 
-    // TODO: use layer instead?
     virtual std::string getLayerName() const;
 
     virtual std::string getLayerType() const;
@@ -99,7 +109,8 @@ protected:
     InputController* _iController = nullptr;
     OutputController* _oController = nullptr;
     const CNNLayer* _layer;
-    IShapeInferImpl::Ptr _impl;
+    IShapeInferImpl::Ptr _reshapeImpl;
+    IConstInferImpl::Ptr _inferImpl;
 
 protected:
     /**
@@ -134,6 +145,8 @@ public:
     FakeReshapeLauncher(const CNNLayer* layer, const IShapeInferImpl::Ptr& impl);
 
     void reshape(const std::set<ReshapeLauncher::Ptr>& launchers) override;
+
+    void constInfer(const std::set<ReshapeLauncher::Ptr>& launchers) override {}
 };
 
 class OutputOnlyInitializer : public DefaultInitializer {
@@ -163,6 +176,10 @@ public:
     void applyChanges(CNNLayer* layer) override;
 
     void reset() override;
+
+    void setBlobByName(const Blob::CPtr& blob, const std::string& dataName) override;
+
+    void constInfer(const std::set<ReshapeLauncher::Ptr>& launchers) override;
 };
 
 class InputInitializer : public OutputOnlyInitializer {
@@ -222,11 +239,13 @@ public:
 
     OutMemoryReshapeLauncher(const CNNLayer* layer1, const IShapeInferImpl::Ptr& impl1);
 
-    void reshape(const std::set<ReshapeLauncher::Ptr>& launchers) override;
+    void reshape(const std::set<ReshapeLauncher::Ptr>& launchers) override {}
 
     void applyChanges(CNNLayer* layer) override;
 
     void reset() override;
+
+    void constInfer(const std::set<ReshapeLauncher::Ptr>& launchers) override {}
 };
 
 }  // namespace ShapeInfer
diff --git a/inference-engine/src/inference_engine/shape_infer/ie_reshaper.cpp b/inference-engine/src/inference_engine/shape_infer/ie_reshaper.cpp
index 89dd72e85..53e39fcad 100644
--- a/inference-engine/src/inference_engine/shape_infer/ie_reshaper.cpp
+++ b/inference-engine/src/inference_engine/shape_infer/ie_reshaper.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -10,20 +10,53 @@
 #include <ie_layers.h>
 #include <graph_tools.hpp>
 #include <debug.h>
+#include <functional>
+#include <blob_factory.hpp>
 
 #include "shape_infer/built-in/ie_built_in_holder.hpp"
 #include "shape_infer/ie_reshaper.hpp"
 #include "details/caseless.hpp"
 #include "details/ie_cnn_network_tools.h"
 #include "ie_reshaper.hpp"
+#include "ie_cnn_layer_builder.h"
 
 using namespace InferenceEngine;
 using namespace InferenceEngine::details;
 using namespace ShapeInfer;
 
-Reshaper::Reshaper(const Context &context, Network::Ptr& network): ctx(context), network(network) {}
+Reshaper::Reshaper(Builder::Network* network): network(network) {}
 
-Reshaper::Reshaper(ICNNNetwork& network, const LauncherCreator::Ptr& launcherCreator) {
+static std::vector<CNNLayerPtr> SortTopologicallyStartsFrom(const std::vector<DataPtr> &inputs) {
+    std::vector<CNNLayerPtr> all_layers;
+    CNNNetForestDFS(inputs, [&](CNNLayerPtr  current){
+        all_layers.push_back(current);
+    }, false);
+    std::reverse(all_layers.begin(), all_layers.end());
+    return all_layers;
+}
+
+Reshaper::Reshaper(std::vector<DataPtr> insDatas, const LauncherCreator::Ptr& launcherCreator): network(nullptr) {
+    auto builtIn = std::make_shared<BuiltInShapeInferHolder>();
+    _allTypes = getTypeNamesFromExtension(builtIn);
+    _extensions.push_back(builtIn);
+
+    _allSortedLayers = SortTopologicallyStartsFrom(insDatas);
+    for (auto &in_data : insDatas) {
+        for (auto layer : in_data->inputTo) {
+            _inputLayers.insert(layer.second);
+        }
+    }
+
+    if (_inputLayers.empty() || _allSortedLayers.empty())
+        THROW_IE_EXCEPTION << "Unsupported model for shape inference: failed to collect inputs and layers";
+
+    for (auto const& currentLayer : _allSortedLayers) {
+        auto createdLauncher = launcherCreator->createNotInputLauncher(currentLayer.get(), _extensions);
+        _launchers.insert(createdLauncher);
+    }
+}
+
+Reshaper::Reshaper(ICNNNetwork& network, const LauncherCreator::Ptr& launcherCreator): network(nullptr) {
     auto builtIn = std::make_shared<BuiltInShapeInferHolder>();
     _allTypes = getTypeNamesFromExtension(builtIn);
     _extensions.push_back(builtIn);
@@ -55,7 +88,7 @@ void Reshaper::AddExtension(const IShapeInferExtensionPtr& extension) {
     if (!extension) THROW_IE_EXCEPTION << "Failed to add empty shape infer extension";
 
     if (network) {
-        ctx.addExtension(extension);
+        network->getContext().addExtension(extension);
         return;
     }
 
@@ -139,8 +172,48 @@ StatusCode Reshaper::run(const std::map<std::string, SizeVector>& inputShapes, R
     for (auto& layer : _allSortedLayers) {
         auto foundLauncher = getLauncherByLayerName(layer->name);
         foundLauncher->reshape(_launchers);
+        foundLauncher->constInfer(_launchers);
+    }
+
+    // apply changes
+    for (auto& layer : _allSortedLayers) {
+        auto foundLauncher = getLauncherByLayerName(layer->name);
+        foundLauncher->applyChanges(layer.get());
+    }
+    return OK;
+}
+
+StatusCode Reshaper::runNoApply(const std::map<std::string, SizeVector>& inputShapes, ResponseDesc* resp) {
+    // Reset all shapes from previous run
+    for (const auto& launcher : _launchers) {
+        launcher->reset();
+    }
+
+    // Set new input shapes
+    for (auto const& input : _inputLayers) {
+        std::string layerName = input->name;
+        for (auto const& inData_w : input->insData) {
+            auto inData = inData_w.lock();
+            auto dataName = inData->name;
+            auto foundShapeIt = inputShapes.find(dataName);
+            auto foundLauncher = getLauncherByLayerName(layerName);
+            if (foundShapeIt != inputShapes.end()) {
+                foundLauncher->setShapeByName(foundShapeIt->second, dataName);
+            } else {
+                foundLauncher->setIRShapeByName(dataName);
+            }
+        }
+    }
+
+    // do reshape
+    for (auto& layer : _allSortedLayers) {
+        auto foundLauncher = getLauncherByLayerName(layer->name);
+        foundLauncher->reshape(_launchers);
     }
+    return OK;
+}
 
+StatusCode Reshaper::apply(ResponseDesc* resp) {
     // apply changes
     for (auto& layer : _allSortedLayers) {
         auto foundLauncher = getLauncherByLayerName(layer->name);
@@ -149,11 +222,21 @@ StatusCode Reshaper::run(const std::map<std::string, SizeVector>& inputShapes, R
     return OK;
 }
 
+SizeVector Reshaper::getResultShapeFor(DataPtr &data, ResponseDesc* resp) {
+    auto creator_layer = data->creatorLayer.lock();
+    std::string creator_layer_name;
+    if (creator_layer) {
+        creator_layer_name = creator_layer->name;
+    }
+    auto foundLauncher = getLauncherByLayerName(creator_layer_name);
+    return foundLauncher->getShapeByName(data->getName());
+}
+
 StatusCode Reshaper::networkShapeInfer(const std::map<std::string, SizeVector>& inputShapes, ResponseDesc* resp) {
     if (!network)
         return DescriptionBuffer(GENERAL_ERROR, resp) << "Cannot infer shapes! Network is not loaded.";
-    std::vector<Layer> propagatedLayers;
-    Network propagatedNetwork(*network);
+    std::vector<Builder::Layer> propagatedLayers;
+    Builder::Network propagatedNetwork(*network);
 
     // Set new input shapes
     for (auto& layer : propagatedNetwork) {
@@ -164,12 +247,78 @@ StatusCode Reshaper::networkShapeInfer(const std::map<std::string, SizeVector>&
         if (layer->getOutputPorts().size() != 1)
             return DescriptionBuffer(GENERAL_ERROR, resp) << "Cannot infer shapes! Input layers can have only one output port.";
 
-        layer->getOutputPorts()[0].shape() = inputShapes.find(layer->getName())->second;
+        layer->getOutputPorts()[0].setShape(inputShapes.find(layer->getName())->second);
+    }
+
+    std::map<idx_t, std::map<std::string, std::string>> preparedParams;
+    // Prepare params for split layer
+    for (auto& layer : propagatedNetwork) {
+        if ((layer->getType() == "Reshape" || layer->getType() == "Flatten") &&
+            layer->getInputPorts().size() != 2 && !layer->getInputPorts()[0].shape().empty() &&
+            layer->getParameters().find("axis") != layer->getParameters().end() &&
+            (layer->getParameters().find("dim") == layer->getParameters().end() ||
+             layer->getParameters().at("dim").as<std::vector<int>>().empty())) {
+            auto inputShape = layer->getInputPorts()[0].shape();
+            size_t inputShapeTotal = std::accumulate(inputShape.begin(), inputShape.end(), 1lu,
+                                                     std::multiplies<size_t>());
+            std::vector<int> dim;
+            size_t axis = layer->getParameters().at("axis");
+            for (size_t i = 0; i < axis; i++) {
+                dim.emplace_back(inputShape[i]);
+                inputShapeTotal /= inputShape[i];
+            }
+            if (dim.size() < inputShape.size())
+                dim.emplace_back(inputShapeTotal);
+            layer->getParameters()["dim"] = dim;
+        }
+
+        std::map<std::string, std::string> params = InferenceEngine::Builder::convertParameters2Strings(layer->getParameters());
+        if (layer->getType() == "Split") {
+            Builder::SplitLayer splitLayer(layer);
+            std::vector<size_t> sizes;
+            size_t axisSize = splitLayer.getInputPort().shape()[splitLayer.getAxis()];
+            size_t uninitOuts(0);
+            for (const auto& port : layer->getOutputPorts()) {
+                if (port.shape().empty()) {
+                    sizes.push_back(0);
+                    uninitOuts++;
+                } else if (port.shape().size() <= splitLayer.getAxis()) {
+                    THROW_IE_EXCEPTION << "Incorrect output shapes in Split layer " << layer->getName();
+                } else {
+                    sizes.push_back(port.shape()[splitLayer.getAxis()]);
+                    axisSize -= port.shape()[splitLayer.getAxis()];
+                }
+            }
+
+            if ((axisSize && !uninitOuts) || (axisSize && uninitOuts && axisSize % uninitOuts))
+                THROW_IE_EXCEPTION << "Incorrect output shapes in Split layer " << layer->getName();
+
+            size_t commonSize = uninitOuts != 0 ? axisSize / uninitOuts : 0;
+            for (size_t i = 0; i < sizes.size() && commonSize; i++) {
+                if (!sizes[i])
+                    sizes[i] = commonSize;
+            }
+
+            std::string out_sizes;
+            for (const auto& size : sizes) {
+                if (!out_sizes.empty())
+                    out_sizes += ",";
+                out_sizes += std::to_string(size);
+            }
+            if (!out_sizes.empty())
+                params["out_sizes"] = out_sizes;
+        }
+
+        preparedParams[layer->getId()] = params;
     }
 
     // Try to propagate shapes
     for (auto& layer : propagatedNetwork) {
-        const auto impl = ctx.getShapeInferImpl(layer->getType());
+        // constant layer does not change during the shape inference and also the Const blob always has C layout and
+        // doesn't know its real shape, so don't run shape propagation for it
+        if (details::CaselessEq<std::string>()(layer->getType(), "Const"))
+            continue;
+        const auto impl = network->getContext().getShapeInferImpl(layer->getType());
         if (!impl)
             return DescriptionBuffer(NOT_FOUND, resp) <<
                         "Cannot infer shapes! Shape infer implementation was not found for type " << layer->getType() << ".";
@@ -178,33 +327,43 @@ StatusCode Reshaper::networkShapeInfer(const std::map<std::string, SizeVector>&
         std::map<std::string, std::string> params;
         std::map<std::string, Blob::Ptr> blobs;
 
+        std::vector<Blob::CPtr> inBlobs;
         for (const auto& inPort : layer->getInputPorts().empty() ? layer->getOutputPorts() : layer->getInputPorts()) {
-            inShapes.push_back(inPort.shape());
-        }
-        if (layer->getParameters()) {
-            for (const auto& it  : layer->getParameters()->getParameters()) {
-                params[it.first] = it.second;
-            }
-            for (const auto& it  : layer->getParameters()->getConstantData()) {
-                blobs[it.first] = std::const_pointer_cast<Blob>(it.second);
+            if (inPort.getParameters().find("type") == inPort.getParameters().end()) {
+                inBlobs.push_back(inPort.getData()->getData());
             }
         }
+        params = preparedParams[layer->getId()];
+
+        for (const auto& port : layer->getInputPorts()) {
+            if (port.getParameters().find("type") == port.getParameters().end() ||
+                    port.getData()->getData()->cbuffer() == nullptr)
+                continue;
+            blobs[port.getParameters().at("type")] = port.getData()->getData();
+        }
+        for (const auto& it  : layer->getParameters()) {
+            if (!it.second.is<Blob::CPtr>())
+                continue;
+            blobs[it.first] = std::const_pointer_cast<Blob>(it.second.as<Blob::CPtr>());
+        }
 
-        StatusCode sts = impl->inferShapes(inShapes, params, blobs, outShapes, resp);
+        StatusCode sts = impl->inferShapes(inBlobs, params, blobs, outShapes, resp);
         if (sts != OK)
             return sts;
 
         if (outShapes.size() != layer->getOutputPorts().size())
-            return DescriptionBuffer(GENERAL_ERROR, resp) << "Cannot infer shapes! The number of output shapes is not equal the number of output ports.";
+            return DescriptionBuffer(GENERAL_ERROR, resp) << "Cannot infer shapes! The number of output shapes is not "
+                                                             "equal the number of output ports for layer "
+                                                             << layer->getName();
 
         for (size_t i = 0; i < outShapes.size(); i++) {
-            layer->getOutputPorts()[i].shape() = outShapes[i];
+            layer->getOutputPorts()[i].setShape(outShapes[i]);
         }
         for (const auto& connection : propagatedNetwork.getLayerConnections(layer->getId())) {
             if (connection.from().layerId() != layer->getId())
                 continue;
             auto nextLayer = propagatedNetwork.getLayer(connection.to().layerId());
-            nextLayer->getInputPorts()[connection.to().portId()].shape() = outShapes[connection.from().portId()];
+            nextLayer->getInputPorts()[connection.to().portId()].setShape(outShapes[connection.from().portId()]);
         }
     }
 
@@ -212,10 +371,10 @@ StatusCode Reshaper::networkShapeInfer(const std::map<std::string, SizeVector>&
     for (auto& layer : *network) {
         const auto& propagatedLayer = propagatedNetwork.getLayer(layer->getId());
         for (size_t i = 0; i < layer->getInputPorts().size(); i++) {
-            layer->getInputPorts()[i].shape() = propagatedLayer->getInputPorts()[i].shape();
+            layer->getInputPorts()[i].setShape(propagatedLayer->getInputPorts()[i].shape());
         }
         for (size_t i = 0; i < layer->getOutputPorts().size(); i++) {
-            layer->getOutputPorts()[i].shape() = propagatedLayer->getOutputPorts()[i].shape();
+            layer->getOutputPorts()[i].setShape(propagatedLayer->getOutputPorts()[i].shape());
         }
     }
     return OK;
diff --git a/inference-engine/src/inference_engine/shape_infer/ie_reshaper.hpp b/inference-engine/src/inference_engine/shape_infer/ie_reshaper.hpp
index 4f1850705..834abe358 100644
--- a/inference-engine/src/inference_engine/shape_infer/ie_reshaper.hpp
+++ b/inference-engine/src/inference_engine/shape_infer/ie_reshaper.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -13,7 +13,7 @@
 
 #include <ie_layers.h>
 #include <ie_context.hpp>
-#include "../ie_network.hpp"
+#include <builders/ie_network_builder.hpp>
 #include "details/caseless.hpp"
 #include "ie_reshape_launcher.hpp"
 #include "ie_icnn_network.hpp"
@@ -60,9 +60,12 @@ public:
      * @param network - const reference to the ICNNNetwork for performing shape inference
      */
     explicit Reshaper(ICNNNetwork& network,
-                      const LauncherCreator::Ptr& creator = std::make_shared<LauncherCreator>());
+            const LauncherCreator::Ptr& creator = std::make_shared<LauncherCreator>());
 
-    Reshaper(const Context& context, details::Network::Ptr& network);
+    explicit Reshaper(std::vector<DataPtr> inputs,
+            const LauncherCreator::Ptr& launcherCreator = std::make_shared<LauncherCreator>());
+
+    Reshaper(Builder::Network* network);
 
     virtual ~Reshaper() = default;
 
@@ -78,6 +81,25 @@ public:
      * @param inputShapes - Map of input names (data) to their input shapes.
      */
     StatusCode run(const std::map<std::string, SizeVector>& inputShapes, ResponseDesc* resp = nullptr);
+
+    /**
+     * @brief Perform shape inference for the given input shapes but not apply it.
+     * In case of cusses call apply() method.
+     * @param inputShapes - Map of input names (data) to their input shapes.
+     * @throws exception if shape infer failed without corruption of original shapes
+     */
+    StatusCode runNoApply(const std::map<std::string, SizeVector>& inputShapes, ResponseDesc* resp = nullptr);
+
+    /**
+     * @brief Apply shapes pre calculated by runNoApply() method.
+     */
+    StatusCode apply(ResponseDesc* resp = nullptr);
+
+    /**
+     * @brief Return newly calculated shape for provided data.
+     */
+    SizeVector getResultShapeFor(DataPtr &data, ResponseDesc* resp = nullptr);
+
 private:
     ReshapeLauncher::Ptr getLauncherByLayerName(const std::string& layerName) const;
 
@@ -91,8 +113,7 @@ private:
     std::set<CNNLayerPtr> _inputLayers{};
     InferenceEngine::details::caseless_set<std::string> _allTypes;
 
-    Context ctx;
-    details::Network::Ptr network;
+    Builder::Network* network;
 };
 
 }  // namespace ShapeInfer
diff --git a/inference-engine/src/inference_engine/system_alllocator.cpp b/inference-engine/src/inference_engine/system_alllocator.cpp
index c5e9f45f7..e075219a1 100644
--- a/inference-engine/src/inference_engine/system_alllocator.cpp
+++ b/inference-engine/src/inference_engine/system_alllocator.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/system_alllocator.hpp b/inference-engine/src/inference_engine/system_alllocator.hpp
index bc49a2bc4..b5a3cc755 100644
--- a/inference-engine/src/inference_engine/system_alllocator.hpp
+++ b/inference-engine/src/inference_engine/system_alllocator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/transform/transform_network.cpp b/inference-engine/src/inference_engine/transform/transform_network.cpp
new file mode 100644
index 000000000..5f3983349
--- /dev/null
+++ b/inference-engine/src/inference_engine/transform/transform_network.cpp
@@ -0,0 +1,353 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <transform/transform_network.hpp>
+#include <limits>
+#include <string>
+#include <vector>
+#include <memory>
+#include <map>
+
+using namespace InferenceEngine;
+
+Transform::Port::Port(Builder::Network& network, PortInfo port, bool isInput)
+    : network(network), port(port), input(isInput) {
+    const auto& layer = network.getLayer(port.layerId());
+    if (isInput) {
+        if (layer->getInputPorts().size() < port.portId())
+            THROW_IE_EXCEPTION << "Cannot find input port "
+                               << port.portId() << " in layer "
+                               << layer->getName();
+    } else {
+        if (layer->getOutputPorts().size() < port.portId())
+            THROW_IE_EXCEPTION << "Cannot find output port "
+                               << port.portId() << " in layer "
+                               << layer->getName();
+    }
+}
+
+PortData::Ptr Transform::Port::getData() const {
+    return input ?
+           network.getLayer(port.layerId())->getInputPorts()[port.portId()].getData() :
+           network.getLayer(port.layerId())->getOutputPorts()[port.portId()].getData();
+}
+
+const std::map<std::string, Parameter> &Transform::Port::getParameters() const {
+    return input ?
+           network.getLayer(port.layerId())->getInputPorts()[port.portId()].getParameters() :
+           network.getLayer(port.layerId())->getOutputPorts()[port.portId()].getParameters();
+}
+
+Transform::Layer Transform::Port::getLayer() const {
+    return Transform::Network(network).getLayer(getPortInfo().layerId());
+}
+
+Transform::Connection Transform::Port::getConnection() const {
+    return Connection(*this);
+}
+
+void Transform::Port::connect(const Port& port) {
+    if (this->input)
+        this->getConnection().setSource(port);
+    else
+        this->getConnection().addDestination(port);
+}
+
+void Transform::Port::disconnect() {
+    getConnection().remove();
+}
+
+const SizeVector& Transform::Port::shape() const {
+    return this->getData()->getData()->getTensorDesc().getDims();
+}
+
+PortInfo Transform::Port::getPortInfo() const {
+    return port;
+}
+
+bool Transform::Port::operator==(const Port& rObj) const {
+    return &network == &rObj.network &&
+           port == rObj.port &&
+           input == rObj.input;
+}
+
+bool Transform::Port::operator!=(const Port& rObj) const {
+    return !(*this == rObj);
+}
+
+
+Transform::Layer::Layer(Builder::Network& network, idx_t id)
+    : network(network), layerId(id) {}
+
+idx_t Transform::Layer::getId() const {
+    return layerId;
+}
+
+std::string Transform::Layer::getName() const {
+    return getLayer()->getName();
+}
+
+std::string Transform::Layer::getType() const {
+    return getLayer()->getType();
+}
+
+Builder::Layer::Ptr Transform::Layer::getLayer() const {
+    return network.getLayer(layerId);
+}
+
+Transform::Layer::operator Builder::Layer::Ptr() const {
+    return getLayer();
+}
+
+Transform::Port Transform::Layer::getInPort() const {
+    if (getLayer()->getInputPorts().size() != 1)
+        THROW_IE_EXCEPTION << "Layer " << getName()
+                           << " has more than 1 input port.";
+    return Transform::Port(network, {layerId, 0}, true);
+}
+
+Transform::Port Transform::Layer::getInPort(idx_t idx) const {
+    if (getLayer()->getInputPorts().size() <= idx)
+        THROW_IE_EXCEPTION << "Layer " << getName()
+                           << " has less than " << idx << " input port(s).";
+    return Transform::Port(network, {layerId, idx}, true);
+}
+
+std::vector<Transform::Port> Transform::Layer::getInPorts() const {
+    std::vector<Transform::Port> ports;
+    for (size_t i = 0; i < getLayer()->getInputPorts().size(); i++) {
+        ports.push_back({network, {layerId, i}, true});
+    }
+    return ports;
+}
+
+Transform::Port Transform::Layer::getOutPort() const {
+    if (getLayer()->getOutputPorts().size() != 1)
+        THROW_IE_EXCEPTION << "Layer " << getName()
+                           << " has more than 1 output port.";
+    return Transform::Port(network, {layerId, 0}, false);
+}
+
+Transform::Port Transform::Layer::getOutPort(idx_t idx) const {
+    if (getLayer()->getOutputPorts().size() <= idx)
+        THROW_IE_EXCEPTION << "Layer " << getName()
+                           << " has less than " << idx << " output port(s).";
+    return Transform::Port(network, {layerId, idx}, false);
+}
+
+std::vector<Transform::Port> Transform::Layer::getOutPorts() const {
+    std::vector<Transform::Port> ports;
+    for (size_t i = 0; i < getLayer()->getInputPorts().size(); i++) {
+        ports.push_back({network, {layerId, i}, false});
+    }
+    return ports;
+}
+
+void Transform::Layer::setParameter(const std::string& key, const Parameter& value) {
+    auto& params = getLayer()->getParameters();
+    params[key] = value;
+}
+
+Parameter& Transform::Layer::getParameter(const std::string& key) const {
+    auto& params = getLayer()->getParameters();
+    if (params.find(key) == params.end())
+        THROW_IE_EXCEPTION << "Layer " << getName() << " has no parameter " << key;
+    return params[key];
+}
+
+Transform::Connection::Connection(const Transform::Port& port)
+    : network(port.network), inPort({(std::numeric_limits<idx_t>::max)(), (std::numeric_limits<idx_t>::max)()}) {
+    if (port.input) {
+        outPorts = {port.getPortInfo()};
+        for (const auto& connection : network.getLayerConnections(port.getPortInfo().layerId())) {
+            if (connection.to() == port.getPortInfo()) {
+                inPort = connection.from();
+                break;
+            }
+        }
+    } else {
+        inPort = port.getPortInfo();
+        for (const auto& connection : network.getLayerConnections(port.getPortInfo().layerId())) {
+            if (connection.from() == port.getPortInfo()) {
+                outPorts.emplace_back(connection.to());
+            }
+        }
+    }
+}
+Transform::Connection::Connection(Builder::Network& network, const InferenceEngine::Connection& connection)
+    : Connection(network, connection.from(), connection.to()) {}
+Transform::Connection::Connection(Builder::Network& network, const PortInfo& inPort, const PortInfo& outPort)
+    : Connection(network, inPort, std::vector<PortInfo>({outPort})) {}
+Transform::Connection::Connection(Builder::Network& network, const PortInfo& inPort, const std::vector<PortInfo>& outPorts)
+    : network(network), inPort(inPort), outPorts(outPorts) {}
+
+Transform::Port Transform::Connection::getSource() const {
+    if (!inPortExist())
+        THROW_IE_EXCEPTION << "Connection doesn't have source port!";
+    return Port(network, inPort, false);
+}
+
+void Transform::Connection::setSource(const Transform::Port &port) {
+    if (inPortExist()) {
+        // disconnect old port
+        for (const auto& outPort : outPorts) {
+            network.disconnect({inPort, outPort});
+        }
+    }
+    inPort = port.getPortInfo();
+    for (const auto& outPort : outPorts) {
+        network.connect(inPort, outPort);
+    }
+}
+
+Transform::Port Transform::Connection::getDestination() const {
+    if (outPorts.size() != 1)
+        THROW_IE_EXCEPTION << "Connection has more than 1 output.";
+    return Transform::Port(network, outPorts[0], true);
+}
+
+Transform::Port Transform::Connection::getDestination(idx_t idx) {
+    if (outPorts.size() <= idx)
+        THROW_IE_EXCEPTION << "Connection has less than "
+                           << idx << " input port(s).";
+    return Transform::Port(network, outPorts[idx], true);
+}
+
+std::vector<Transform::Port> Transform::Connection::getDestinations() const {
+    std::vector<Transform::Port> ports;
+    for (const auto& port : outPorts) {
+        ports.emplace_back(network, port, true);
+    }
+    return ports;
+}
+
+void Transform::Connection::addDestination(const Transform::Port &port) {
+    for (const auto& outPort : outPorts) {
+        if (outPort == port.getPortInfo()) {
+            THROW_IE_EXCEPTION << "Cannot connect twice with one port!";
+        }
+    }
+    outPorts.emplace_back(port.getPortInfo());
+    if (!inPortExist())
+        return;
+    network.connect(inPort, outPorts[outPorts.size() - 1]);
+}
+
+void Transform::Connection::setDestination(const Transform::Port &port) {
+    if (outPorts.size() > 1) {
+        THROW_IE_EXCEPTION << "Cannot set destination for connection which has more than 1 consumer."
+                           << "Please use addDestination or setDestinations methods!";
+    }
+
+    if (!outPorts.empty()) {
+        if (inPortExist())
+            network.disconnect({inPort, outPorts[0]});
+        outPorts.clear();
+    }
+    addDestination(port);
+}
+
+void Transform::Connection::setDestinations(const std::vector<Transform::Port> &ports) {
+    if (!outPorts.empty() && outPorts.size() != ports.size())
+        THROW_IE_EXCEPTION << "Cannot change number of output connections!";
+
+    if (inPortExist()) {
+        for (const auto &port : outPorts) {
+            network.disconnect({inPort, port});
+        }
+    }
+    outPorts.clear();
+    for (const auto &port : ports) {
+        addDestination(port);
+    }
+}
+
+void Transform::Connection::remove() {
+    if (!inPortExist())
+        return;
+    for (const auto& port : outPorts) {
+        network.disconnect({inPort, port});
+    }
+}
+
+bool Transform::Connection::inPortExist() const {
+    static PortInfo uninitPort((std::numeric_limits<idx_t>::max)(), (std::numeric_limits<idx_t>::max)());
+    return inPort != uninitPort;
+}
+
+Transform::Layer Transform::Network::addLayer(const Builder::Layer &layer) {
+    idx_t layerId = network.addLayer(layer);
+    return Transform::Layer(network, layerId);
+}
+
+void Transform::Network::removeLayer(const Transform::Layer &layer) {
+    for (const auto& connection : network.getLayerConnections(layer.getId()))
+        network.disconnect(connection);
+    network.removeLayer(layer.getId());
+}
+
+Transform::Layer Transform::Network::getLayer(const std::string &name) const {
+    for (const auto& layer : network) {
+        if (layer->getName() == name)
+            return Transform::Layer(network, layer->getId());
+    }
+    THROW_IE_EXCEPTION << "Layer with name: " << name << " was not found!";
+}
+
+Transform::Layer Transform::Network::getLayer(idx_t id) const {
+    for (const auto& layer : network) {
+        if (layer->getId() == id)
+            return Transform::Layer(network, layer->getId());
+    }
+    THROW_IE_EXCEPTION << "Layer with id: " << id << " was not found!";
+}
+
+Transform::Connection Transform::Network::connect(const Transform::Layer &src,
+        const Transform::Layer &dst) {
+    Port srcPort = src.getOutPort();
+    Port dstPort = dst.getInPort();
+
+    network.connect(srcPort.getPortInfo(), dstPort.getPortInfo());
+    return Connection(network, srcPort.getPortInfo(), dstPort.getPortInfo());
+}
+
+Transform::Connection Transform::Network::connect(const Transform::Port &src,
+        const Transform::Port &dst) {
+    network.connect(src.getPortInfo(), dst.getPortInfo());
+    return Connection(network, src.getPortInfo(), dst.getPortInfo());
+}
+
+void Transform::Network::disconnect(const Transform::Layer &src, const Transform::Layer &dst) {
+    getConnection(src, dst).remove();
+}
+
+void Transform::Network::disconnect(const Transform::Port &src, const Transform::Port &dst) {
+    getConnection(src, dst).remove();
+}
+
+Builder::Network& Transform::Network::getBuilderNetwork() const {
+    return network;
+}
+
+Transform::Connection Transform::Network::getConnection(const Transform::Layer &src,
+        const Transform::Layer &dst) const {
+    Port srcPort = src.getOutPort();
+    Port dstPort = dst.getInPort();
+
+    for (const auto& connection : network.getConnections()) {
+        if (connection.from() == srcPort.getPortInfo() && connection.to() == dstPort.getPortInfo())
+            return Connection(network, srcPort.getPortInfo(), dstPort.getPortInfo());
+    }
+    THROW_IE_EXCEPTION << "Connection " << src.getName() << " -> " << dst.getName() << " was not found!";
+}
+
+Transform::Connection Transform::Network::getConnection(const Transform::Port &src,
+        const Transform::Port &dst) const {
+    for (const auto& connection : network.getConnections()) {
+        if (connection.from() == src.getPortInfo() && connection.to() == dst.getPortInfo())
+            return Connection(network, src.getPortInfo(), dst.getPortInfo());
+    }
+    THROW_IE_EXCEPTION << "Connection " << getLayer(src.getPortInfo().layerId()).getName()
+        << " -> " << getLayer(dst.getPortInfo().layerId()).getName() << " was not found!";
+}
diff --git a/inference-engine/src/inference_engine/transform/transform_network.hpp b/inference-engine/src/inference_engine/transform/transform_network.hpp
new file mode 100644
index 000000000..fc97c289e
--- /dev/null
+++ b/inference-engine/src/inference_engine/transform/transform_network.hpp
@@ -0,0 +1,116 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_parameter.hpp>
+#include <ie_builders.hpp>
+#include <string>
+#include <vector>
+#include <memory>
+#include <map>
+
+namespace InferenceEngine {
+namespace Transform {
+
+class Connection;
+class Layer;
+
+class INFERENCE_ENGINE_API_CLASS(Port) {
+public:
+    Port(Builder::Network& network, PortInfo port, bool isInput);
+    PortData::Ptr getData() const;
+    const std::map<std::string, Parameter>& getParameters() const;
+    Layer getLayer() const;
+    Connection getConnection() const;
+    void connect(const Port& port);
+    void disconnect();
+    const SizeVector& shape() const;
+    PortInfo getPortInfo() const;
+    bool operator==(const Port& rObj) const;
+    bool operator!=(const Port& rObj) const;
+
+private:
+    Builder::Network& network;
+    PortInfo port;
+    bool input;
+
+    friend class Connection;
+};
+
+class INFERENCE_ENGINE_API_CLASS(Layer) {
+public:
+    Layer(Builder::Network& network, idx_t id);
+    Port getInPort() const;
+    Port getInPort(idx_t idx) const;
+    std::vector<Port> getInPorts() const;
+    Port getOutPort() const;
+    Port getOutPort(idx_t idx) const;
+    std::vector<Port> getOutPorts() const;
+
+    void setParameter(const std::string& key, const Parameter& value);
+    Parameter& getParameter(const std::string& value) const;
+
+    idx_t getId() const;
+    std::string getName() const;
+    std::string getType() const;
+    operator Builder::Layer::Ptr() const;
+
+private:
+    Builder::Network& network;
+    idx_t layerId;
+
+    Builder::Layer::Ptr getLayer() const;
+};
+
+class INFERENCE_ENGINE_API_CLASS(Connection) {
+public:
+    explicit Connection(const Port& port);
+    Connection(Builder::Network& network, const InferenceEngine::Connection& connection);
+    Connection(Builder::Network& network, const PortInfo& inPort, const PortInfo& outPort);
+    Connection(Builder::Network& network, const PortInfo& inPort, const std::vector<PortInfo>& outPorts);
+
+    Port getSource() const;
+    void setSource(const Port& port);
+    Port getDestination() const;
+    Port getDestination(idx_t idx);
+    std::vector<Port> getDestinations() const;
+    void addDestination(const Port& port);
+    void setDestination(const Port& port);
+    void setDestinations(const std::vector<Port>& ports);
+    void remove();
+
+private:
+    Builder::Network& network;
+    PortInfo inPort;
+    std::vector<PortInfo> outPorts;
+
+    bool inPortExist() const;
+};
+
+class INFERENCE_ENGINE_API_CLASS(Network) {
+public:
+    explicit Network(Builder::Network& network): network(network) {}
+    virtual ~Network() = default;
+
+    Layer addLayer(const Builder::Layer& layer);
+    void removeLayer(const Layer& layer);
+    Layer getLayer(const std::string& name) const;
+    Layer getLayer(idx_t id) const;
+
+    Builder::Network& getBuilderNetwork() const;
+
+    Connection connect(const Layer& src, const Layer& dst);
+    Connection connect(const Port& src, const Port& dst);
+    void disconnect(const Layer& src, const Layer& dst);
+    void disconnect(const Port& src, const Port& dst);
+    Connection getConnection(const Layer& src, const Layer& dst) const;
+    Connection getConnection(const Port& src, const Port& dst) const;
+
+private:
+    Builder::Network& network;
+};
+
+}  // namespace Transform
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformation.cpp b/inference-engine/src/inference_engine/transform/transformation.cpp
new file mode 100644
index 000000000..6f82e9881
--- /dev/null
+++ b/inference-engine/src/inference_engine/transform/transformation.cpp
@@ -0,0 +1,20 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <transform/transformation.hpp>
+#include <string>
+
+namespace InferenceEngine {
+namespace Transform {
+
+std::string Transformation::getName() const {
+    return name;
+}
+
+void Transformation::setName(const std::string& name) {
+    this->name = name;
+}
+
+}  // namespace Transform
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformation.hpp b/inference-engine/src/inference_engine/transform/transformation.hpp
new file mode 100644
index 000000000..790ad486f
--- /dev/null
+++ b/inference-engine/src/inference_engine/transform/transformation.hpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <transform/transform_network.hpp>
+#include <string>
+#include <vector>
+#include <map>
+
+namespace InferenceEngine {
+namespace Transform {
+
+class Transformation {
+    std::string name;
+public:
+    std::string getName() const;
+    void setName(const std::string& name);
+    virtual ~Transformation() = default;
+    virtual void execute(Network& network) = 0;
+};
+
+}  // namespace Transform
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.cpp b/inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.cpp
new file mode 100644
index 000000000..27f5d6236
--- /dev/null
+++ b/inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.cpp
@@ -0,0 +1,68 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "eltwise_broadcast.hpp"
+#include "builders/ie_network_builder.hpp"
+#include "builders/ie_reshape_layer.hpp"
+#include "builders/ie_tile_layer.hpp"
+#include "debug.h"
+#include <string>
+#include <vector>
+#include <iostream>
+
+namespace InferenceEngine {
+namespace Transform {
+
+TransformationEltwiseBroadcast::TransformationEltwiseBroadcast() {
+    this->setName("ie.transform.eltwise_broadcast");
+}
+
+void insertTileOverDimension(Transform::Network& network, Transform::Port& inputPort, size_t axis, size_t tile) {
+    auto tileLayerBuilder = Builder::TileLayer("Tile" + std::to_string(axis) + "_" + std::to_string(tile)).setAxis(axis).setTiles(tile);
+    auto tileLayer = network.addLayer(tileLayerBuilder);
+    inputPort.getConnection().setDestination(tileLayer.getInPort());
+    tileLayer.getOutPort().connect(inputPort);
+}
+
+void TransformationEltwiseBroadcast::execute(Network& network) {
+    for (auto layer : network.getBuilderNetwork()) {
+        if (layer->getType() == "Eltwise") {
+            auto eltwiseLayer = network.getLayer(layer->getName());
+            auto outShape = eltwiseLayer.getOutPort(0).shape();
+            for (auto& eltwiseInPort : eltwiseLayer.getInPorts()) {
+                auto inShape = eltwiseInPort.shape();
+                // if shape lengths are not equal then insert Reshape with shape prepended with ones
+                if (inShape.size() < outShape.size()) {
+                    std::vector<int> reshapeDims(inShape.begin(), inShape.end());
+                    reshapeDims.insert(reshapeDims.begin(), outShape.size() - inShape.size(), 1);
+                    auto reshapeLayerBuilder = Builder::ReshapeLayer(eltwiseInPort.getLayer().getName() + "/Reshape").setDims(reshapeDims);
+                    auto reshapeLayer = network.addLayer(reshapeLayerBuilder);
+                    eltwiseInPort.getConnection().setDestination(reshapeLayer.getInPort());
+                    reshapeLayer.getOutPort().connect(eltwiseInPort);
+                    SizeVector newOutShape(reshapeDims.size());
+                    // update shape of the Port
+                    for (size_t ind = 0; ind < reshapeDims.size(); ++ind)
+                        newOutShape[ind] = reshapeDims[ind];
+                    eltwiseInPort.getData()->setShape(newOutShape);
+                    inShape = newOutShape;
+                }
+                for (size_t axis = 0; axis < inShape.size(); ++axis) {
+                    if (inShape[axis] != outShape[axis]) {
+                        if (inShape[axis] != 1) {
+                            THROW_IE_EXCEPTION << "Layer " << layer->getName()
+                                               << " input has invalid shape "
+                                               << details::dumpVec(inShape)
+                                               << " which can not be broadcasted to output shape "
+                                               << details::dumpVec(outShape);
+                        }
+                        insertTileOverDimension(network, eltwiseInPort, axis, outShape[axis]);
+                    }
+                }
+            }
+        }
+    }
+}
+
+}  // namespace Transform
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.hpp b/inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.hpp
new file mode 100644
index 000000000..863b34add
--- /dev/null
+++ b/inference-engine/src/inference_engine/transform/transformations/eltwise_broadcast.hpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include <transform/transformation.hpp>
+
+namespace InferenceEngine {
+namespace Transform {
+
+class TransformationEltwiseBroadcast: public Transformation {
+public:
+    TransformationEltwiseBroadcast();
+    void execute(Network& network) override;
+};
+
+}  // namespace Transform
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/lrn.cpp b/inference-engine/src/inference_engine/transform/transformations/lrn.cpp
new file mode 100644
index 000000000..710a71e1a
--- /dev/null
+++ b/inference-engine/src/inference_engine/transform/transformations/lrn.cpp
@@ -0,0 +1,63 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "lrn.hpp"
+#include "builders/ie_network_builder.hpp"
+#include "builders/ie_power_layer.hpp"
+#include "builders/ie_eltwise_layer.hpp"
+#include "builders/ie_norm_layer.hpp"
+#include <iostream>
+#include <cmath>
+
+namespace InferenceEngine {
+namespace Transform {
+
+TransformationLRN::TransformationLRN() {
+    this->setName("ie.transform.lrn");
+}
+
+void TransformationLRN::execute(Network& network) {
+    for (auto layer : network.getBuilderNetwork()) {
+        if (layer->getType() == "LRN") {
+            auto lrnLayer = network.getLayer(layer->getName());
+            float scale_value = 1.0f / std::pow(static_cast<float>(lrnLayer.getParameter("bias")),
+                                                static_cast<float>(lrnLayer.getParameter("beta")));
+
+            auto normLayerBuilder = Builder::NormLayer(lrnLayer.getName() + "/Norm").
+                    setAlpha(static_cast<float>(lrnLayer.getParameter("alpha")) / static_cast<float>(lrnLayer.getParameter("bias"))).
+                    setSize(static_cast<unsigned int>(lrnLayer.getParameter("size"))).
+                    setBeta(static_cast<float>(lrnLayer.getParameter("beta"))).
+                    setAcrossMaps(true);
+            auto normLayer = network.addLayer(normLayerBuilder);
+
+            auto mulLayerBuilder = Builder::EltwiseLayer(lrnLayer.getName() + "/Mul").setEltwiseType(
+                    Builder::EltwiseLayer::EltwiseType::MUL);
+            auto mulLayer = network.addLayer(mulLayerBuilder);
+
+            auto tensorDesc = TensorDesc(Precision::FP32, SizeVector(4, 1), Layout::NCHW);
+            auto blob = make_shared_blob<float>(tensorDesc);
+            blob->allocate();
+            float *buffer = blob->buffer().as<PrecisionTrait<Precision::FP32>::value_type *>();
+            buffer[0] = scale_value;
+
+            auto constLayerBuilder = Builder::ConstLayer(mulLayerBuilder.getName() + "/Const").setData(blob);
+            auto constLayer = network.addLayer(constLayerBuilder);
+
+            // re-connect input of LRN layer to input of Norm layer
+            lrnLayer.getInPort().getConnection().setDestination(normLayer.getInPort());
+
+            // multiple output of Norm with a constant
+            mulLayer.getInPort(0).connect(normLayer.getOutPort());
+            mulLayer.getInPort(1).connect(constLayer.getOutPort());
+
+            // connect consumers of LRN with mul
+            lrnLayer.getOutPort().getConnection().setSource(mulLayer.getOutPort());
+
+            network.removeLayer(lrnLayer);
+        }
+    }
+}
+
+}  // namespace Transform
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/lrn.hpp b/inference-engine/src/inference_engine/transform/transformations/lrn.hpp
new file mode 100644
index 000000000..040180a55
--- /dev/null
+++ b/inference-engine/src/inference_engine/transform/transformations/lrn.hpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include <transform/transformation.hpp>
+
+namespace InferenceEngine {
+namespace Transform {
+
+class TransformationLRN: public Transformation {
+public:
+    TransformationLRN();
+    void execute(Network& network) override;
+};
+
+}  // namespace Transform
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/sub.cpp b/inference-engine/src/inference_engine/transform/transformations/sub.cpp
new file mode 100644
index 000000000..337bb77ee
--- /dev/null
+++ b/inference-engine/src/inference_engine/transform/transformations/sub.cpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "sub.hpp"
+#include "builders/ie_network_builder.hpp"
+#include "builders/ie_power_layer.hpp"
+#include "builders/ie_eltwise_layer.hpp"
+#include <vector>
+#include <string>
+#include <iostream>
+
+namespace InferenceEngine {
+namespace Transform {
+
+TransformationSub::TransformationSub() {
+    this->setName("ie.transform.sub");
+}
+
+void TransformationSub::execute(Network& network) {
+    for (auto layer : network.getBuilderNetwork()) {
+        if (layer->getType() == "Eltwise" && layer->getParameters()["operation"].as<std::string>() == "sub") {
+            auto subLayer = network.getLayer(layer->getName());
+
+            auto powerLayerBuilder = Builder::PowerLayer(subLayer.getName() + "/Power").setPower(1.0f).setScale(-1.0f).setShift(0.0f);
+            auto powerLayer = network.addLayer(powerLayerBuilder);
+
+            auto eltwiseLayerBuilder = Builder::EltwiseLayer(subLayer.getName() + "/Add").setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUM);
+            auto eltwiseLayer = network.addLayer(eltwiseLayerBuilder);
+
+            // negate the second input to the sub layer
+            subLayer.getInPort(1).getConnection().setDestination(powerLayer.getInPort());
+
+            // connect new eltwise with sum with two inputs
+            subLayer.getInPort(0).getConnection().setDestination(eltwiseLayer.getInPort(0));
+            eltwiseLayer.getInPort(1).connect(powerLayer.getOutPort());
+
+            // reconnect new eltwise with outputs of all eltwise with sub
+            subLayer.getOutPort().getConnection().setSource(eltwiseLayer.getOutPort());
+
+            network.removeLayer(subLayer);
+        }
+    }
+}
+
+}  // namespace Transform
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/transform/transformations/sub.hpp b/inference-engine/src/inference_engine/transform/transformations/sub.hpp
new file mode 100644
index 000000000..c67649d32
--- /dev/null
+++ b/inference-engine/src/inference_engine/transform/transformations/sub.hpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+#pragma once
+
+#include <transform/transformation.hpp>
+
+namespace InferenceEngine {
+namespace Transform {
+
+class TransformationSub: public Transformation {
+public:
+    TransformationSub();
+    void execute(Network& network) override;
+};
+
+}  // namespace Transform
+}  // namespace InferenceEngine
diff --git a/inference-engine/src/inference_engine/w_dirent.h b/inference-engine/src/inference_engine/w_dirent.h
index e5243dbb9..d100d5130 100644
--- a/inference-engine/src/inference_engine/w_dirent.h
+++ b/inference-engine/src/inference_engine/w_dirent.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/w_unistd.h b/inference-engine/src/inference_engine/w_unistd.h
index 506458033..18e4d8d11 100644
--- a/inference-engine/src/inference_engine/w_unistd.h
+++ b/inference-engine/src/inference_engine/w_unistd.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/inference_engine/xml_parse_utils.cpp b/inference-engine/src/inference_engine/xml_parse_utils.cpp
index 82327e843..7e8c5a6dd 100644
--- a/inference-engine/src/inference_engine/xml_parse_utils.cpp
+++ b/inference-engine/src/inference_engine/xml_parse_utils.cpp
@@ -1,17 +1,26 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "xml_parse_utils.h"
 #include "details/ie_exception.hpp"
 #include "ie_precision.hpp"
+#include <string>
+#include <limits>
 
 int XMLParseUtils::GetIntAttr(const pugi::xml_node &node, const char *str) {
     auto attr = node.attribute(str);
     if (attr.empty())
         THROW_IE_EXCEPTION << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
                            << node.offset_debug();
-    return atoi(attr.value());
+    std::string str_value = std::string(attr.value());
+    std::size_t idx = 0;
+    int int_value = std::stoi(str_value, &idx, 10);
+    if (idx != str_value.length())
+        THROW_IE_EXCEPTION << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value
+                           << "\" which is not an integer" << " at offset "
+                           << node.offset_debug();
+    return int_value;
 }
 
 uint64_t XMLParseUtils::GetUInt64Attr(const pugi::xml_node &node, const char *str) {
@@ -19,11 +28,14 @@ uint64_t XMLParseUtils::GetUInt64Attr(const pugi::xml_node &node, const char *st
     if (attr.empty())
         THROW_IE_EXCEPTION << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
                            << node.offset_debug();
-    int64_t value = atoll(attr.value());
-    if (value < 0)
-        THROW_IE_EXCEPTION << "node <" << node.name() << "> has incorrect parameter: " << str << " at offset "
+    std::string str_value = std::string(attr.value());
+    std::size_t idx = 0;
+    long long int_value = std::stoll(str_value, &idx, 10);
+    if (idx != str_value.length() || int_value < 0 || int_value > (std::numeric_limits<uint64_t>::max)())
+        THROW_IE_EXCEPTION << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value
+                           << "\" which is not an unsigned 64 bit integer" << " at offset "
                            << node.offset_debug();
-    return static_cast<uint64_t>(value);
+    return static_cast<uint64_t>(int_value);
 }
 
 unsigned int XMLParseUtils::GetUIntAttr(const pugi::xml_node &node, const char *str) {
@@ -31,11 +43,14 @@ unsigned int XMLParseUtils::GetUIntAttr(const pugi::xml_node &node, const char *
     if (attr.empty())
         THROW_IE_EXCEPTION << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
                            << node.offset_debug();
-    int value = atoi(attr.value());
-    if (value < 0)
-        THROW_IE_EXCEPTION << "node <" << node.name() << "> has incorrect parameter: " << str << " at offset "
+    std::string str_value = std::string(attr.value());
+    std::size_t idx = 0;
+    long long int_value = std::stoll(str_value, &idx, 10);
+    if (idx != str_value.length() || int_value < 0 || int_value > (std::numeric_limits<unsigned int>::max)())
+        THROW_IE_EXCEPTION << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value
+                           << "\" which is not an unsigned integer" << " at offset "
                            << node.offset_debug();
-    return static_cast<unsigned int>(value);
+    return static_cast<unsigned int>(int_value);
 }
 
 std::string XMLParseUtils::GetStrAttr(const pugi::xml_node &node, const char *str) {
@@ -57,7 +72,14 @@ float XMLParseUtils::GetFloatAttr(const pugi::xml_node &node, const char *str) {
     if (attr.empty())
         THROW_IE_EXCEPTION << "node <" << node.name() << "> is missing mandatory attribute: " << str << " at offset "
                            << node.offset_debug();
-    return static_cast<float>(atof(attr.value()));
+    std::string str_value = std::string(attr.value());
+    std::size_t idx = 0;
+    float float_value = std::stof(str_value, &idx);
+    if (idx != str_value.length())
+        THROW_IE_EXCEPTION << "node <" << node.name() << "> has attribute \"" << str << "\" = \"" << str_value
+                           << "\" which is not a floating point" << " at offset "
+                           << node.offset_debug();
+    return float_value;
 }
 
 InferenceEngine::Precision XMLParseUtils::GetPrecisionAttr(const pugi::xml_node &node, const char *str) {
@@ -78,33 +100,25 @@ InferenceEngine::Precision XMLParseUtils::GetPrecisionAttr(const pugi::xml_node
 int XMLParseUtils::GetIntAttr(const pugi::xml_node &node, const char *str, int defVal) {
     auto attr = node.attribute(str);
     if (attr.empty()) return defVal;
-    return atoi(attr.value());
+    return GetIntAttr(node, str);
 }
 
 uint64_t XMLParseUtils::GetUInt64Attr(const pugi::xml_node &node, const char *str, uint64_t defVal) {
     auto attr = node.attribute(str);
     if (attr.empty()) return defVal;
-    int64_t value = atoll(attr.value());
-    if (value < 0)
-        THROW_IE_EXCEPTION << "node <" << node.name() << "> has incorrect parameter: " << str << " at offset "
-                           << node.offset_debug();
-    return static_cast<uint64_t>(value);
+    return GetUInt64Attr(node, str);
 }
 
 unsigned int XMLParseUtils::GetUIntAttr(const pugi::xml_node &node, const char *str, unsigned int defVal) {
     auto attr = node.attribute(str);
     if (attr.empty()) return defVal;
-    int value = atoi(attr.value());
-    if (value < 0)
-        THROW_IE_EXCEPTION << "node <" << node.name() << "> has incorrect parameter: " << str << " at offset "
-                           << node.offset_debug();
-    return static_cast<unsigned int>(value);
+    return GetUIntAttr(node, str);
 }
 
 float XMLParseUtils::GetFloatAttr(const pugi::xml_node &node, const char *str, float defVal) {
     auto attr = node.attribute(str);
     if (attr.empty()) return defVal;
-    return static_cast<float>(atof(attr.value()));
+    return GetFloatAttr(node, str);
 }
 
 int XMLParseUtils::GetIntChild(const pugi::xml_node &node, const char *str, int defVal) {
diff --git a/inference-engine/src/inference_engine/xml_parse_utils.h b/inference-engine/src/inference_engine/xml_parse_utils.h
index 3d2750bf9..77aa9c7bb 100644
--- a/inference-engine/src/inference_engine/xml_parse_utils.h
+++ b/inference-engine/src/inference_engine/xml_parse_utils.h
@@ -1,11 +1,10 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
 #include <cstdlib>
-#include <debug.h>
 #include "pugixml.hpp"
 #include "ie_common.h"
 #include "ie_api.h"
diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
index 5997f7d4b..df81a5aaa 100644
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -34,7 +34,9 @@ include_directories(
         ${CMAKE_CURRENT_SOURCE_DIR}
         ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn
         ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/common
+        ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/src/cpu
         ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/include
+        ${CMAKE_BINARY_DIR}/include/
 )
 
 if (GEMM STREQUAL "MKL")
@@ -64,3 +66,5 @@ target_compile_definitions(test_${TARGET_NAME} PUBLIC -DMKLDNN_THR=${MKLDNN_THR}
 target_link_libraries(test_${TARGET_NAME} PRIVATE inference_engine_s mkldnn)
 
 set_target_properties(test_${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME test_${TARGET_NAME})
+
+add_cpplint_target(${TARGET_NAME}_cpplint FOR_TARGETS ${TARGET_NAME})
diff --git a/inference-engine/src/mkldnn_plugin/config.cpp b/inference-engine/src/mkldnn_plugin/config.cpp
index 4ef10eec2..cfbe1a8a5 100644
--- a/inference-engine/src/mkldnn_plugin/config.cpp
+++ b/inference-engine/src/mkldnn_plugin/config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/config.h b/inference-engine/src/mkldnn_plugin/config.h
index 558ac87ae..46610bd31 100644
--- a/inference-engine/src/mkldnn_plugin/config.h
+++ b/inference-engine/src/mkldnn_plugin/config.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mean_image.cpp b/inference-engine/src/mkldnn_plugin/mean_image.cpp
index f1ac17e9a..dcf11ef34 100644
--- a/inference-engine/src/mkldnn_plugin/mean_image.cpp
+++ b/inference-engine/src/mkldnn_plugin/mean_image.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -72,13 +72,17 @@ void MeanImage::Load(const MKLDNNDims& inputDims, InputInfo::Ptr inputInfo) {
     }
 }
 
-void MeanImage::Subtract(const MKLDNNDims &inputDims, float *input) {
+void MeanImage::Subtract(const MKLDNNDims &inputDims, float *input, InferenceEngine::Layout layout) {
     IE_ASSERT(input != nullptr);
 
     if (inputDims.ndims() != 4) {
         THROW_IE_EXCEPTION << "Expecting input as 4 dimension blob with format NxCxHxW.";
     }
 
+    if (layout != NCHW && layout != NHWC) {
+        THROW_IE_EXCEPTION << "Expecting input layout NCHW or NHWC.";
+    }
+
     int MB = inputDims[0];
     int srcSize = inputDims.size() / MB;
 
@@ -92,8 +96,15 @@ void MeanImage::Subtract(const MKLDNNDims &inputDims, float *input) {
         int C = inputDims[1];
         srcSize /= inputDims[1];
 
-        parallel_for3d(MB, C, srcSize, [&](int mb, int c, int i) {
-            input[srcSize * mb * C + c * srcSize + i] -= meanValues[c];
-        });
+        if (layout == NCHW) {
+            parallel_for3d(MB, C, srcSize, [&](int mb, int c, int i) {
+                input[mb * C * srcSize + c * srcSize + i] -= meanValues[c];
+            });
+        } else if (layout == NHWC) {
+            parallel_for2d(MB, srcSize, [&](int mb, int i) {
+                for (int c = 0; c < C; c++)
+                    input[mb * srcSize * C + i * C + c] -= meanValues[c];
+            });
+        }
     }
 }
diff --git a/inference-engine/src/mkldnn_plugin/mean_image.h b/inference-engine/src/mkldnn_plugin/mean_image.h
index 24dc8163a..eba076200 100644
--- a/inference-engine/src/mkldnn_plugin/mean_image.h
+++ b/inference-engine/src/mkldnn_plugin/mean_image.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -18,16 +18,20 @@ public:
 
 public:
     void Load(const MKLDNNDims& inputDims, InferenceEngine::InputInfo::Ptr inputInfo);
-    void Subtract(const MKLDNNDims &inputDims, float *input);
+    void Subtract(const MKLDNNDims &inputDims, float *input, InferenceEngine::Layout layout);
 
     template<typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
-    void Subtract(const MKLDNNDims &inputDims, T *input) {
+    void Subtract(const MKLDNNDims &inputDims, T *input, InferenceEngine::Layout layout) {
         IE_ASSERT(input != nullptr);
 
         if (inputDims.ndims() != 4) {
             THROW_IE_EXCEPTION << "Expecting input as 4 dimension blob with format NxCxHxW.";
         }
 
+        if (layout != InferenceEngine::NCHW && layout != InferenceEngine::NHWC) {
+            THROW_IE_EXCEPTION << "Expecting input layout NCHW or NHWC.";
+        }
+
         int MB = inputDims[0];
         int srcSize = inputDims.size() / MB;
 
@@ -45,13 +49,25 @@ public:
             int C = inputDims[1];
             srcSize /= inputDims[1];
 
-            InferenceEngine::parallel_for3d(MB, C, srcSize, [&](int mb, int c, int i) {
-                int buf = input[srcSize * mb * C + c * srcSize + i];
-                buf -= meanValues[c];
-                if (buf < std::numeric_limits<T>::min()) buf = std::numeric_limits<T>::min();
-                if (buf > std::numeric_limits<T>::max()) buf = std::numeric_limits<T>::max();
-                input[srcSize * mb * C + c * srcSize + i] = buf;
-            });
+            if (layout == InferenceEngine::NCHW) {
+                InferenceEngine::parallel_for3d(MB, C, srcSize, [&](int mb, int c, int i) {
+                    int buf = input[srcSize * mb * C + c * srcSize + i];
+                    buf -= meanValues[c];
+                    if (buf < std::numeric_limits<T>::min()) buf = std::numeric_limits<T>::min();
+                    if (buf > std::numeric_limits<T>::max()) buf = std::numeric_limits<T>::max();
+                    input[srcSize * mb * C + c * srcSize + i] = buf;
+                });
+            } else if (layout == InferenceEngine::NHWC) {
+                InferenceEngine::parallel_for2d(MB, srcSize, [&](int mb, int i) {
+                    for (int c = 0; c < C; c++) {
+                        int buf = input[mb * srcSize * C + i * C + c];
+                        buf -= meanValues[c];
+                        if (buf < std::numeric_limits<T>::min()) buf = std::numeric_limits<T>::min();
+                        if (buf > std::numeric_limits<T>::max()) buf = std::numeric_limits<T>::max();
+                        input[mb * srcSize * C + i * C + c] = buf;
+                    }
+                });
+            }
         }
     }
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/cpu_engine.h b/inference-engine/src/mkldnn_plugin/mkldnn/cpu_engine.h
index 09ec76c42..e80bf9581 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/cpu_engine.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/cpu_engine.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/cpu_prim_layer.h b/inference-engine/src/mkldnn_plugin/mkldnn/cpu_prim_layer.h
index b3ad3c0c5..d6b099739 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/cpu_prim_layer.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/cpu_prim_layer.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/cpu_prim_tensor.h b/inference-engine/src/mkldnn_plugin/mkldnn/cpu_prim_tensor.h
index 616f517aa..de42f3604 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/cpu_prim_tensor.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/cpu_prim_tensor.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp b/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
index 57b6edc35..271bc564c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/desc_iterator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
index ff3616a44..34a6296c8 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.h b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.h
index 45cca0402..91d5bba97 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/iml_type_mapper.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/omp_manager.cpp b/inference-engine/src/mkldnn_plugin/mkldnn/omp_manager.cpp
index 19bc513f6..c79d6a85d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/omp_manager.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/omp_manager.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/omp_manager.h b/inference-engine/src/mkldnn_plugin/mkldnn/omp_manager.h
index 65cc216e4..409e55ea4 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/omp_manager.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/omp_manager.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/os/lin/lin_omp_manager.cpp b/inference-engine/src/mkldnn_plugin/mkldnn/os/lin/lin_omp_manager.cpp
index 14c3e1d1d..735f819cc 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/os/lin/lin_omp_manager.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/os/lin/lin_omp_manager.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn/os/lin/lin_omp_manager.h b/inference-engine/src/mkldnn_plugin/mkldnn/os/lin/lin_omp_manager.h
index dfd69bbb4..1c4dc3ab5 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn/os/lin/lin_omp_manager.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn/os/lin/lin_omp_manager.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.cpp
index ea463a2d5..bb5d4cc4d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.h b/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.h
index 447787f88..357b43a7a 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_async_infer_request.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp
index bcb47419e..63af551cf 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -53,19 +53,6 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::convolution_forward::desc>()
     return typeDesc->getPtr();
 }
 
-MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_relu_forward::desc> desc) {
-    this->desc.reset(new DescFwdImpl<mkldnn::convolution_relu_forward::desc>(desc));
-}
-
-MKLDNNDescriptor::operator std::shared_ptr<mkldnn::convolution_relu_forward::desc>() {
-    DescFwdImpl<mkldnn::convolution_relu_forward::desc> *typeDesc =
-            dynamic_cast<DescFwdImpl<mkldnn::convolution_relu_forward::desc> *>(desc.get());
-    if (typeDesc == nullptr) {
-        THROW_IE_EXCEPTION << "Cannot cast descriptor!";
-    }
-    return typeDesc->getPtr();
-}
-
 MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_backward_data::desc> desc,
                                    std::shared_ptr<mkldnn::convolution_forward::primitive_desc> prim) {
     this->desc.reset(
@@ -132,19 +119,6 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::pooling_forward::desc>() {
     return typeDesc->getPtr();
 }
 
-MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::relu_forward::desc> desc) {
-    this->desc.reset(new DescFwdImpl<mkldnn::relu_forward::desc>(desc));
-}
-
-MKLDNNDescriptor::operator std::shared_ptr<mkldnn::relu_forward::desc>() {
-    DescFwdImpl<mkldnn::relu_forward::desc> *typeDesc =
-            dynamic_cast<DescFwdImpl<mkldnn::relu_forward::desc> *>(desc.get());
-    if (typeDesc == nullptr) {
-        THROW_IE_EXCEPTION << "Cannot cast descriptor!";
-    }
-    return typeDesc->getPtr();
-}
-
 MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::roi_pooling_forward::desc> desc) {
     this->desc.reset(new DescFwdImpl<mkldnn::roi_pooling_forward::desc>(desc));
 }
@@ -196,3 +170,40 @@ MKLDNNDescriptor::operator std::shared_ptr<mkldnn::rnn_forward::desc>() {
     }
     return typeDesc->getPtr();
 }
+
+MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::eltwise_forward::desc> desc) {
+    this->desc.reset(new DescFwdImpl<mkldnn::eltwise_forward::desc>(desc));
+}
+
+MKLDNNDescriptor::operator std::shared_ptr<mkldnn::eltwise_forward::desc>() {
+    DescFwdImpl<mkldnn::eltwise_forward::desc> *typeDesc =
+            dynamic_cast<DescFwdImpl<mkldnn::eltwise_forward::desc> *>(desc.get());
+        if (typeDesc == nullptr) {
+        THROW_IE_EXCEPTION << "Cannot cast descriptor!";
+    }
+    return typeDesc->getPtr();
+}
+
+MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::binarization_forward::desc> desc) {
+    this->desc.reset(new DescFwdImpl<mkldnn::binarization_forward::desc>(desc));
+}
+
+MKLDNNDescriptor::operator std::shared_ptr<mkldnn::binarization_forward::desc>() {
+    auto *typeDesc = dynamic_cast<DescFwdImpl<mkldnn::binarization_forward::desc> *>(desc.get());
+    if (typeDesc == nullptr) {
+        THROW_IE_EXCEPTION << "Cannot cast descriptor!";
+    }
+    return typeDesc->getPtr();
+}
+
+MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr<mkldnn::binary_convolution_forward::desc> desc) {
+    this->desc.reset(new DescFwdImpl<mkldnn::binary_convolution_forward::desc>(desc));
+}
+
+MKLDNNDescriptor::operator std::shared_ptr<mkldnn::binary_convolution_forward::desc>() {
+    auto *typeDesc = dynamic_cast<DescFwdImpl<mkldnn::binary_convolution_forward::desc> *>(desc.get());
+    if (typeDesc == nullptr) {
+        THROW_IE_EXCEPTION << "Cannot cast descriptor!";
+    }
+    return typeDesc->getPtr();
+}
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h
index dff072089..4a7865010 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -17,9 +17,6 @@ public:
     explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_forward::desc> desc);
     operator std::shared_ptr<mkldnn::convolution_forward::desc>();
 
-    explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_relu_forward::desc> desc);
-    operator std::shared_ptr<mkldnn::convolution_relu_forward::desc>();
-
     MKLDNNDescriptor(std::shared_ptr<mkldnn::convolution_backward_data::desc> desc,
                      std::shared_ptr<mkldnn::convolution_forward::primitive_desc> prim);
     operator std::shared_ptr<mkldnn::convolution_backward_data::desc>();
@@ -34,9 +31,6 @@ public:
     explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::pooling_forward::desc> desc);
     operator std::shared_ptr<mkldnn::pooling_forward::desc>();
 
-    explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::relu_forward::desc> desc);
-    operator std::shared_ptr<mkldnn::relu_forward::desc>();
-
     explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::roi_pooling_forward::desc> desc);
     operator std::shared_ptr<mkldnn::roi_pooling_forward::desc>();
 
@@ -49,6 +43,15 @@ public:
     explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::rnn_forward::desc> desc);
     operator std::shared_ptr<mkldnn::rnn_forward::desc>();
 
+    explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::eltwise_forward::desc> desc);
+    operator std::shared_ptr<mkldnn::eltwise_forward::desc>();
+
+    explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::binarization_forward::desc> desc);
+    operator std::shared_ptr<mkldnn::binarization_forward::desc>();
+
+    explicit MKLDNNDescriptor(std::shared_ptr<mkldnn::binary_convolution_forward::desc> desc);
+    operator std::shared_ptr<mkldnn::binary_convolution_forward::desc>();
+
     mkldnn::primitive_desc_iterator createPrimitiveDescriptorIterator(const mkldnn::engine &engine,
             const mkldnn::primitive_attr &attr = mkldnn::primitive_attr()) const;
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_dims.h b/inference-engine/src/mkldnn_plugin/mkldnn_dims.h
index 06616a8be..62cb10fee 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_dims.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_dims.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -18,18 +18,18 @@ public:
     MKLDNNDims() = default;
 
     explicit MKLDNNDims(const InferenceEngine::SizeVector& size) {
-        dims = std::vector<int>(size.begin(), size.end());
+        dims = std::vector<ptrdiff_t>(size.begin(), size.end());
     }
 
-    explicit MKLDNNDims(const std::vector<int>& dim) {
+    explicit MKLDNNDims(const std::vector<ptrdiff_t>& dim) {
         dims = dim;
     }
 
     MKLDNNDims(const mkldnn_dims_t dnn_dims, int dnn_ndims) {
-        dims = std::vector<int>(dnn_dims, dnn_dims + dnn_ndims);
+        dims = std::vector<ptrdiff_t>(dnn_dims, dnn_dims + dnn_ndims);
     }
 
-    explicit MKLDNNDims(std::initializer_list<int> ilist) : dims(ilist) {}
+    explicit MKLDNNDims(std::initializer_list<ptrdiff_t> ilist) : dims(ilist) {}
     explicit MKLDNNDims(std::initializer_list<size_t > ilist) : dims(ilist.begin(), ilist.end()) {}
 
     InferenceEngine::SizeVector ToSizeVector() const {
@@ -45,12 +45,12 @@ public:
         return dims.size();
     }
 
-    int size() const {
+    ptrdiff_t size() const {
         return size(0);
     }
 
-    int size(int start) const {
-        int size = 1;
+    ptrdiff_t size(int start) const {
+        ptrdiff_t size = 1;
 
         for (int i = start; i < dims.size(); i++) {
             size *= dims[i];
@@ -67,7 +67,7 @@ public:
         return dims;
     }
 
-    bool operator == (const MKLDNNDims& rhs) {
+    bool operator == (const MKLDNNDims& rhs) const {
         if (dims.size() != rhs.dims.size()) {
             return false;
         }
@@ -75,20 +75,20 @@ public:
         return std::equal(rhs.dims.begin(), rhs.dims.end(), dims.begin());
     }
 
-    bool operator != (const MKLDNNDims& rhs) {
+    bool operator != (const MKLDNNDims& rhs) const {
         return !(*this == rhs);
     }
 
-    int& operator[](int idx) {
+    ptrdiff_t& operator[](int idx) {
         return dims[idx];
     }
 
-    int operator[](int idx) const {
+    ptrdiff_t operator[](int idx) const {
         return dims[idx];
     }
 
 private:
-    std::vector<int> dims;
+    std::vector<ptrdiff_t> dims;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
index 92c8c5ad3..7d13d010c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -8,120 +8,140 @@
 #include <blob_factory.hpp>
 
 using namespace mkldnn;
-using namespace MKLDNNPlugin;
+namespace MKLDNNPlugin {
 
-MKLDNNPlugin::MKLDNNEdge::MKLDNNEdge(const std::shared_ptr<MKLDNNPlugin::MKLDNNNode> &parent,
-                                     const std::shared_ptr<MKLDNNPlugin::MKLDNNNode> &child) {
-    this->parent = parent;
-    this->child = child;
-}
+MKLDNNEdge::MKLDNNEdge(const MKLDNNNodePtr &parent, const MKLDNNNodePtr &child, int pr_port, int ch_port) :
+        parent(parent), child(child), parent_port(pr_port), child_port(ch_port) {}
 
-const std::shared_ptr<MKLDNNPlugin::MKLDNNNode> MKLDNNPlugin::MKLDNNEdge::getParent() const {
+const MKLDNNNodePtr MKLDNNEdge::getParent() const {
     auto parentPtr = parent.lock();
     if (!parentPtr)
         THROW_IE_EXCEPTION << "Edge contains empty parent node";
     return parentPtr;
 }
 
-const std::shared_ptr<MKLDNNPlugin::MKLDNNNode> MKLDNNPlugin::MKLDNNEdge::getChild() const {
+const MKLDNNNodePtr MKLDNNEdge::getChild() const {
     auto childPtr = child.lock();
     if (!childPtr)
         THROW_IE_EXCEPTION << "Edge contains empty child node";
     return childPtr;
 }
 
-bool MKLDNNPlugin::MKLDNNEdge::isDropped() {
-    return getInputNum() == -1 && getOutputNum() == -1;
+bool MKLDNNEdge::isDropped() {
+    bool not_in_parent = true;
+    bool not_in_child = true;
+
+    auto parent_ptr = parent.lock();
+    if (parent_ptr) {
+        for (auto &edge : parent_ptr->childEdges)
+            if (edge.lock().get() == this)
+                not_in_parent = false;
+    }
+
+    auto child_ptr = child.lock();
+    if (child_ptr) {
+        for (auto &edge : child_ptr->parentEdges)
+            if (edge.lock().get() == this)
+                not_in_child = false;
+    }
+    return not_in_parent && not_in_child;
 }
 
-bool MKLDNNPlugin::MKLDNNEdge::needReorder() {
+void MKLDNNEdge::drop() {
+    auto _drop_from = [&] (std::vector<MKLDNNEdgeWeakPtr> &list) {
+        auto myself = std::find_if(list.begin(), list.end(),
+                [&] (MKLDNNEdgeWeakPtr edge) { return edge.lock().get() == this; });
+
+        if (myself != list.end())
+            list.erase(myself);
+    };
+
+    _drop_from(getParent()->childEdges);
+    _drop_from(getChild()->parentEdges);
+}
+
+
+bool MKLDNNEdge::needReorder() {
     bool canBeInPlaceConflicts = false;
     auto parentSPD = getParent()->getSelectedPrimitiveDescriptor();
     auto childSPD = getChild()->getSelectedPrimitiveDescriptor();
     if (!parentSPD || !childSPD)
         THROW_IE_EXCEPTION << "Cannot make a decision about reorder. Primitive descriptors weren't selected.";
 
-    int inputNum = getInputNum();
+    int outNumber = getOutputNum();
+    int inNumber = getInputNum();
     bool in_place = inPlace();
-    if (in_place && !getParent()->getChildEdges().empty()) {
-        for (size_t i = 0; i < getParent()->getChildEdges().size(); i++) {
-            if (i == inputNum)
+    bool childCanChangeMem = childSPD->getConfig().outConfs.empty();
+    for (const auto conf : childSPD->getConfig().outConfs) {
+        if (conf.inPlace == outNumber && outNumber >= 0)
+            childCanChangeMem = true;
+    }
+
+    const auto& detectInPlaceChildsNum = [](const std::vector<MKLDNNEdgePtr>& edges) -> size_t {
+        size_t count = 0;
+        for (const auto& edge : edges) {
+            auto childSPD = edge->getChild()->getSelectedPrimitiveDescriptor();
+            int outNumber = edge->getOutputNum();
+            if (childSPD->getConfig().outConfs.empty())
+                count++;
+            for (const auto conf : childSPD->getConfig().outConfs) {
+                if (conf.inPlace == outNumber)
+                    count++;
+            }
+        }
+        return count;
+    };
+
+    const auto portChildEdges = getParent()->getChildEdgesAtPort(inNumber);
+    if (in_place && detectInPlaceChildsNum(portChildEdges) > 1 && childCanChangeMem)
+        canBeInPlaceConflicts = true;
+    if (!canBeInPlaceConflicts && in_place && !getParent()->getChildEdges().empty()) {
+        for (auto &p_edge_peer : portChildEdges) {
+            if (p_edge_peer.get() == this)
                 continue;
-            if (getParent()->getChildEdgeAt(i)->getChild()->getType() != Reorder && getParent()->getChildEdgeAt(i)->inPlace(LOOK_DOWN))
+            if (p_edge_peer->getChild()->getType() != Reorder && p_edge_peer->inPlace(LOOK_DOWN))
                 canBeInPlaceConflicts = true;
         }
     }
 
     if (in_place) {
-        int outNumber = getOutputNum();
-        int inNumber = getInputNum();
         if (inNumber >= 0 && inNumber < parentSPD->getConfig().outConfs.size() && parentSPD->getConfig().outConfs[inNumber].inPlace >= 0 &&
             outNumber >= 0 && outNumber < childSPD->getConfig().inConfs.size() && childSPD->getConfig().inConfs[outNumber].inPlace >= 0)
             canBeInPlaceConflicts = true;
     }
-    return !MKLDNNExtensionUtils::initTensorsAreEqual(getInputDesc(), getOutputDesc()) || canBeInPlaceConflicts;
+    return canBeInPlaceConflicts || !MKLDNNExtensionUtils::initTensorsAreEqual(getInputDesc(), getOutputDesc());
 }
 
-InferenceEngine::TensorDesc MKLDNNPlugin::MKLDNNEdge::getInputDesc() {
+InferenceEngine::TensorDesc MKLDNNEdge::getInputDesc() {
     if (inputDesc.getLayout() == InferenceEngine::Layout::ANY) {
         inputDesc = getSpecifiedInputDesc({});
     }
     return inputDesc;
 }
 
-InferenceEngine::TensorDesc MKLDNNPlugin::MKLDNNEdge::getOutputDesc() {
+InferenceEngine::TensorDesc MKLDNNEdge::getOutputDesc() {
     if (outputDesc.getLayout() == InferenceEngine::Layout::ANY) {
         outputDesc = getSpecifiedOutputDesc({});
     }
     return outputDesc;
 }
 
-InferenceEngine::TensorDesc MKLDNNPlugin::MKLDNNEdge::getDesc() {
+InferenceEngine::TensorDesc MKLDNNEdge::getDesc() {
     if (!MKLDNNExtensionUtils::initTensorsAreEqual(getInputDesc(), getOutputDesc()))
         THROW_IE_EXCEPTION << "Cannot get descriptor for edge: " << getParent()->getName() << "->"
                            << getChild()->getName();
     return getInputDesc();
 }
 
-int MKLDNNPlugin::MKLDNNEdge::getInputNum() {
-    return getAllInputNums()[0];
-}
-
-std::vector<int> MKLDNNPlugin::MKLDNNEdge::getAllInputNums() {
-    auto parentPtr = parent.lock();
-    if (!parentPtr)
-        return {-1};
-
-    std::vector<int> res;
-    for (size_t i = 0; i < parentPtr->getChildEdges().size(); i++) {
-        auto childEdge = parentPtr->getChildEdges()[i].lock();
-        if (childEdge && childEdge.get() == this) {
-            res.push_back(static_cast<int>(i));
-        }
-    }
-    return res.empty() ? std::vector<int>{-1} : res;
+int MKLDNNEdge::getInputNum() {
+    return parent_port;
 }
 
-int MKLDNNPlugin::MKLDNNEdge::getOutputNum() {
-    return getAllOutputNums()[0];
+int MKLDNNEdge::getOutputNum() {
+    return child_port;
 }
 
-std::vector<int> MKLDNNPlugin::MKLDNNEdge::getAllOutputNums() {
-    auto childPtr = child.lock();
-    if (!childPtr)
-        return {-1};
-
-    std::vector<int> res;
-    for (size_t i = 0; i < childPtr->getParentEdges().size(); i++) {
-        auto parentEdge = childPtr->getParentEdges()[i].lock();
-        if (parentEdge && parentEdge.get() == this) {
-            res.push_back(static_cast<int>(i));
-        }
-    }
-    return res.empty() ? std::vector<int>{-1} : res;
-}
-
-void MKLDNNPlugin::MKLDNNEdge::allocate(const void* mem_ptr) {
+void MKLDNNEdge::allocate(const void* mem_ptr) {
     if (status != Status::NeedAllocation)
         return;
 
@@ -142,7 +162,7 @@ void MKLDNNPlugin::MKLDNNEdge::allocate(const void* mem_ptr) {
     status = Status::Allocated;
 }
 
-void MKLDNNPlugin::MKLDNNEdge::changeStatus(MKLDNNPlugin::MKLDNNEdge::Status state) {
+void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) {
     if (state == Status::NotAllocated) {
         THROW_IE_EXCEPTION << "Incorrect behaviour! Use method sharedMemFrom()";
     }
@@ -156,7 +176,7 @@ void MKLDNNPlugin::MKLDNNEdge::changeStatus(MKLDNNPlugin::MKLDNNEdge::Status sta
     status = state;
 }
 
-MKLDNNPlugin::MKLDNNDims &MKLDNNPlugin::MKLDNNEdge::getDims() {
+const MKLDNNDims& MKLDNNEdge::getDims() {
     if (!dims.ndims()) {
         MKLDNNDims outDims;
         MKLDNNDims inDims;
@@ -196,11 +216,7 @@ MKLDNNPlugin::MKLDNNDims &MKLDNNPlugin::MKLDNNEdge::getDims() {
     return dims;
 }
 
-void MKLDNNPlugin::MKLDNNEdge::setDims(MKLDNNPlugin::MKLDNNDims &dims) {
-    this->dims = dims;
-}
-
-bool MKLDNNPlugin::MKLDNNEdge::nodeCanChangeDesc(const std::shared_ptr<MKLDNNPlugin::MKLDNNNode> &node) const {
+bool MKLDNNEdge::nodeCanChangeDesc(const MKLDNNNodePtr &node) const {
     PrimitiveDescInfo * selectedPd = node->getSelectedPrimitiveDescriptor();
     if (selectedPd == nullptr)
         THROW_IE_EXCEPTION << "Primitive descriptor for node " << node->getName() << " is not selected.";
@@ -245,7 +261,7 @@ bool MKLDNNPlugin::MKLDNNEdge::nodeCanChangeDesc(const std::shared_ptr<MKLDNNPlu
 /// In we have {any, any, any} -> {any} or {any} -> {any, any, any} or {any} -> {any} it means that
 /// layer doesn't change memory format
 /// We don't support {any, any, nchw} -> {any}
-InferenceEngine::TensorDesc MKLDNNPlugin::MKLDNNEdge::getSpecifiedInputDesc(std::map<mkldnn::memory::format, size_t> formats) {
+InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedInputDesc(std::map<mkldnn::memory::format, size_t> formats) {
     InferenceEngine::TensorDesc inDesc;
     static int enterCount = 0;
     enterCount++;
@@ -370,7 +386,7 @@ InferenceEngine::TensorDesc MKLDNNPlugin::MKLDNNEdge::getSpecifiedInputDesc(std:
     return MKLDNNMemoryDesc(getDims(), inDataType, desc);
 }
 
-InferenceEngine::TensorDesc MKLDNNPlugin::MKLDNNEdge::getSpecifiedOutputDesc(std::map<mkldnn::memory::format, size_t> formats) {
+InferenceEngine::TensorDesc MKLDNNEdge::getSpecifiedOutputDesc(std::map<mkldnn::memory::format, size_t> formats) {
     static int enterCount = 0;
     enterCount++;
     InferenceEngine::TensorDesc outDesc;
@@ -510,7 +526,7 @@ InferenceEngine::TensorDesc MKLDNNPlugin::MKLDNNEdge::getSpecifiedOutputDesc(std
     return childPtr->getSelectedPrimitiveDescriptor()->getConfig().outConfs[outputIdx].desc;
 }
 
-const MKLDNNPlugin::MKLDNNMemory &MKLDNNPlugin::MKLDNNEdge::getMemory() {
+const MKLDNNMemory &MKLDNNEdge::getMemory() {
     if (status == Status::NotAllocated) {
         memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine()));
         memoryPtr->Create(MKLDNNMemoryDesc(getDesc()), getSharedEdge()->getMemoryPtr()->GetData());
@@ -521,7 +537,7 @@ const MKLDNNPlugin::MKLDNNMemory &MKLDNNPlugin::MKLDNNEdge::getMemory() {
     return *memoryPtr;
 }
 
-MKLDNNPlugin::MKLDNNMemoryPtr &MKLDNNPlugin::MKLDNNEdge::getMemoryPtr() {
+MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() {
     if (status == Status::NotAllocated) {
         memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine()));
         memoryPtr->Create(MKLDNNMemoryDesc(getDesc()), getSharedEdge()->getMemoryPtr()->GetData());
@@ -545,12 +561,12 @@ InferenceEngine::Blob::Ptr MKLDNNEdge::getBlob() {
     return make_blob_with_precision(desc, memoryPtr->GetData());
 }
 
-void MKLDNNPlugin::MKLDNNEdge::sharedMemFrom(const MKLDNNPlugin::MKLDNNEdgePtr &edge) {
+void MKLDNNEdge::sharedMemFrom(const MKLDNNEdgePtr &edge) {
     memoryFromEdge = edge;
     status = Status::NotAllocated;
 }
 
-void MKLDNNPlugin::MKLDNNEdge::validate() {
+void MKLDNNEdge::validate() {
     if (status == Status::Validated)
         return;
     getMemory();
@@ -563,7 +579,7 @@ void MKLDNNPlugin::MKLDNNEdge::validate() {
     status = Status::Validated;
 }
 
-MKLDNNPlugin::MKLDNNEdgePtr MKLDNNPlugin::MKLDNNEdge::getSharedEdge() const {
+MKLDNNEdgePtr MKLDNNEdge::getSharedEdge() const {
     auto memoryFromEdgePtr = memoryFromEdge.lock();
     if (!memoryFromEdgePtr) {
         THROW_IE_EXCEPTION << "Cannot get memory ptr for edge(" << getParent()->getName() << "->"
@@ -578,44 +594,45 @@ void MKLDNNEdge::init() {
     MKLDNNEdgePtr edgePtr = getBaseEdge();
     if (edgePtr.get() == this) {
         changeStatus(Status::NeedAllocation);
-        if (getInputNum() > 0 && getParent()->getSelectedPrimitiveDescriptor() &&
-            getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() <= getInputNum() &&
-            edgePtr != getParent()->getChildEdgeAt(0)) {
-            sharedMemFrom(getParent()->getChildEdgeAt(0));
+        auto port = getInputNum();
+        if (port < 0)
+            return;
+        auto edges_at_same_port = getParent()->getChildEdgesAtPort(static_cast<size_t>(port));
+        if (!edges_at_same_port.empty() &&
+            edgePtr != edges_at_same_port[0]) {
+            sharedMemFrom(edges_at_same_port[0]);
         }
     } else {
         sharedMemFrom(edgePtr);
-        if (getInputNum() > 0 && getParent()->getSelectedPrimitiveDescriptor() &&
-                getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size() <= getInputNum() &&
-                edgePtr != getParent()->getChildEdgeAt(0)) {
-            if (getParent()->getChildEdgeAt(0)->getStatus() != Status::NeedAllocation &&
-                    getParent()->getChildEdgeAt(0)->getStatus() != Status::Uninitialized) {
-                if (getParent()->getChildEdgeAt(0)->getSharedEdge() != edgePtr)
+        auto port = getInputNum();
+        if (port < 0)
+            return;
+        auto edges_at_same_port = getParent()->getChildEdgesAtPort(static_cast<size_t>(port));
+        for (auto edge : edges_at_same_port) {
+            if (edge->getStatus() != Status::NeedAllocation && edge->getStatus() != Status::Uninitialized) {
+                if (edge->getSharedEdge() != edgePtr)
                     THROW_IE_EXCEPTION << "Unsupported behavior. Cannot mark edge "
                                        << getParent()->getChildEdgeAt(0)->getParent()->getName() << "->"
                                        << getParent()->getChildEdgeAt(0)->getChild()->getName() << " as not allocated!";
             } else {
-                getParent()->getChildEdgeAt(0)->sharedMemFrom(edgePtr);
+                if (edge != edgePtr)
+                    edge->sharedMemFrom(edgePtr);
             }
         }
     }
 }
 
 /**
- * Should analize graph node dependensies, inplace node information and return root memory(edge) it view on
+ * Should analyze graph node dependencies, inplace node information and return root memory(edge) it view on
  *
  * @param type some magic enum values... description needed
  * @return root of view-on-memory subgraph
  */
-MKLDNNEdgePtr MKLDNNEdge::getBaseEdge(LOOK look) {
+MKLDNNEdgePtr MKLDNNEdge::getBaseEdge(int look) {
     auto parentConfig = getParent()->getSelectedPrimitiveDescriptor()->getConfig();
     auto childConfig = getChild()->getSelectedPrimitiveDescriptor()->getConfig();
     int inputNum = getInputNum();
     int outputNum = getOutputNum();
-    if (inputNum >= parentConfig.outConfs.size())
-        inputNum = 0;
-    if (outputNum >= childConfig.inConfs.size())
-        outputNum = 0;
 
     if (childConfig.inConfs[outputNum].inPlace >= 0 && parentConfig.outConfs[inputNum].inPlace >= 0) {
         inputNum = getInputNum();
@@ -623,37 +640,43 @@ MKLDNNEdgePtr MKLDNNEdge::getBaseEdge(LOOK look) {
     }
 
     if (childConfig.inConfs[outputNum].inPlace >= 0 && (look & LOOK_DOWN)) {
-        int next_edge_ind = childConfig.inConfs[outputNum].inPlace;
-        if (childConfig.outConfs[next_edge_ind].inPlace >= 0) {
-            childConfig.outConfs[next_edge_ind].inPlace = -1;
+        int next_port_idx = childConfig.inConfs[outputNum].inPlace;
+        if (childConfig.outConfs[next_port_idx].inPlace >= 0) {
+            childConfig.outConfs[next_port_idx].inPlace = -1;
             getChild()->initDescriptor(childConfig);
         }
 
-        // this is a WA ... :-(
-        if (childConfig.outConfs.size() <= getChild()->getChildEdges().size()) {
-            // Multiple connection to some out port.
-            // Will try to find implace consumer.
-            for (int i = 0; i< getChild()->getChildEdges().size(); i++) {
-                auto chch_edge = getChild()->getChildEdgeAt(i);
-                auto chch_conf = chch_edge->getChild()->getSelectedPrimitiveDescriptor()->getConfig();
+        auto ch_edges = getChild()->getChildEdgesAtPort(next_port_idx);
+        auto &next_ch_edge = ch_edges[0];
 
+        // Multiple connection to some out port
+        // Will try to find inplace consumer
+        for (auto &ch_edge : ch_edges) {
+            auto &chch_conf = ch_edge->getChild()->getSelectedPrimitiveDescriptor()->getConfig();
 
-                if (chch_conf.inConfs[chch_edge->getOutputNum()].inPlace >= 0) {
-                    next_edge_ind = i;
-                }
-            }
+            if (chch_conf.inConfs[ch_edge->getOutputNum()].inPlace >= 0)
+                next_ch_edge = ch_edge;
         }
-        return getChild()->getChildEdgeAt(next_edge_ind)->getBaseEdge(LOOK_DOWN);
+        return next_ch_edge->getBaseEdge(LOOK_DOWN);
     } else if (parentConfig.outConfs[inputNum].inPlace >= 0 && (look & LOOK_UP)) {
-        if (parentConfig.inConfs[parentConfig.outConfs[inputNum].inPlace].inPlace >= 0) {
-            parentConfig.inConfs[parentConfig.outConfs[inputNum].inPlace].inPlace = -1;
+        int next_port_idx = parentConfig.outConfs[inputNum].inPlace;
+        if (parentConfig.inConfs[next_port_idx].inPlace >= 0) {
+            parentConfig.inConfs[next_port_idx].inPlace = -1;
             getParent()->initDescriptor(parentConfig);
         }
-        return getParent()->getParentEdgeAt(parentConfig.outConfs[inputNum].inPlace)->getBaseEdge(LOOK_UP);
+        return getParent()->getParentEdgesAtPort(next_port_idx)[0]->getBaseEdge(LOOK_UP);
     }
 
-    inputNum = getInputNum();
-    return getParent()->getChildEdgeAt(inputNum);
+    auto edges_for_same_port = getParent()->getChildEdgesAtPort(inputNum);
+    if (!(look & LOOK_NO_RECURRENT)) {
+        for (auto edge : edges_for_same_port) {
+            if (edge.get() != this) {
+                auto base = edge->getBaseEdge(LOOK_BOTH | LOOK_NO_RECURRENT);
+                if (base != edge) return base;
+            }
+        }
+    }
+    return edges_for_same_port[0];
 }
 
 bool MKLDNNEdge::inPlace(LOOK look) {
@@ -671,18 +694,12 @@ bool MKLDNNEdge::inPlace(LOOK look) {
     if (look & LOOK_UP) {
         if (parentSPD->getConfig().outConfs[inputNum].inPlace >= 0)
             return true;
-        for (const auto &inConf : parentSPD->getConfig().inConfs) {
-            if (inConf.inPlace == inputNum)
-                return true;
-        }
     }
     if (look & LOOK_DOWN) {
         if (childSPD->getConfig().inConfs[outputNum].inPlace >= 0)
             return true;
-        for (const auto &outConf : childSPD->getConfig().outConfs) {
-            if (outConf.inPlace == inputNum)
-                return true;
-        }
     }
     return false;
 }
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
index f5364f614..759084bbf 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -21,6 +21,10 @@ using MKLDNNEdgeWeakPtr = std::weak_ptr<MKLDNNEdge>;
 
 class MKLDNNEdge : public InferenceEngine::details::no_copy {
 public:
+    MKLDNNEdge(const std::shared_ptr<MKLDNNNode>& parent,
+               const std::shared_ptr<MKLDNNNode>& child,
+               int pr_port = 0, int ch_port = 0);
+
     enum class Status {
         Uninitialized,
         NeedAllocation,
@@ -28,9 +32,8 @@ public:
         Allocated,
         Validated
     };
-    MKLDNNEdge(const std::shared_ptr<MKLDNNNode>& parent, const std::shared_ptr<MKLDNNNode>& child);
 
-    inline Status getStatus() noexcept {
+    inline Status getStatus() const noexcept {
         return status;
     }
 
@@ -39,26 +42,23 @@ public:
     virtual void init();
     virtual void allocate(const void* mem_ptr = nullptr);
     virtual void validate();
+    void drop();
 
     const std::shared_ptr<MKLDNNNode> getParent() const;
     const std::shared_ptr<MKLDNNNode> getChild() const;
 
-    bool needReorder();
-
     InferenceEngine::Blob::Ptr getBlob();
+    InferenceEngine::TensorDesc getDesc();
+
+    const MKLDNNDims &getDims();
     const MKLDNNMemory& getMemory();
     MKLDNNMemoryPtr& getMemoryPtr();
 
+    bool needReorder();
     bool isDropped();
 
-    InferenceEngine::TensorDesc getDesc();
     int getInputNum();
     int getOutputNum();
-    std::vector<int> getAllOutputNums();
-    std::vector<int> getAllInputNums();
-
-    MKLDNNDims &getDims();
-    void setDims(MKLDNNDims &dims);
 
     void sharedMemFrom(const MKLDNNEdgePtr& edge);
     MKLDNNEdgePtr getSharedEdge() const;
@@ -66,6 +66,9 @@ public:
 private:
     std::weak_ptr<MKLDNNNode> parent;
     std::weak_ptr<MKLDNNNode> child;
+    int parent_port;
+    int child_port;
+
     MKLDNNEdgeWeakPtr memoryFromEdge;
     MKLDNNDims dims;
     MKLDNNMemoryPtr memoryPtr;
@@ -81,9 +84,9 @@ private:
 
     bool nodeCanChangeDesc(const std::shared_ptr<MKLDNNPlugin::MKLDNNNode>& node) const;
 
-    enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN };
+    enum LOOK { LOOK_UP = 1, LOOK_DOWN = 2, LOOK_BOTH = LOOK_UP | LOOK_DOWN, LOOK_NO_RECURRENT = 4 };
 
-    MKLDNNEdgePtr getBaseEdge(LOOK look = LOOK_BOTH);
+    MKLDNNEdgePtr getBaseEdge(int look = LOOK_BOTH);
     bool inPlace(LOOK look = LOOK_BOTH);
     friend class MKLDNNGraph;
 };
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp
index b362433eb..de757ee27 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h
index f3abd8b4a..5481aa131 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
index 3600ee56c..7b45731c8 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -21,8 +21,11 @@ uint8_t MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type dataType)
         return 1;
     case mkldnn::memory::data_type::u8:
         return 1;
+    case mkldnn::memory::data_type::bin:
+        return 1;
     case mkldnn::memory::data_type::data_undef:
         return 0;
+
     default:
         THROW_IE_EXCEPTION << "Unsupported data type.";
     }
@@ -40,6 +43,8 @@ memory::data_type MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::P
             return memory::s8;
         case InferenceEngine::Precision::U8:
             return memory::u8;
+        case InferenceEngine::Precision::BIN:
+            return memory::bin;
 
         default: {
             THROW_IE_EXCEPTION << "The plugin does not support " << prec.name();
@@ -59,6 +64,8 @@ InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::d
             return InferenceEngine::Precision::I8;
         case memory::u8:
             return InferenceEngine::Precision::U8;
+        case memory::bin:
+            return InferenceEngine::Precision::BIN;
 
         default: {
             THROW_IE_EXCEPTION << "Unsupported data type.";
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
index 8b2994e5f..358a1e722 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index 9c079efd4..13b8e5f83 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -33,6 +33,7 @@
 #include <blob_factory.hpp>
 #include <ie_util_internal.hpp>
 #include <net_pass.h>
+#include <details/ie_cnn_network_tools.h>
 
 #include <mkldnn_graph_dumper.h>
 
@@ -49,10 +50,16 @@
 #include "utils/blob_dump.h"
 
 /*****************************************************
- * Dump capability
- * Specify path to dump folder in BLOB_DUMP_PATH
+ * Debug capability
+ *  - BLOB_DUMP_PATH : Specify with existing folder name
+ *    to dump intermediate blobs into it
+ *  - PRINT_GRAPH_INFO : Define it to enable printing
+ *    additional information to std output.
+ *
  *****************************************************/
-// #define BLOB_DUMP_PATH "dump"
+// #define BLOB_DUMP_PATH "mkldnn_dump"
+// #define PRINT_GRAPH_INFO
+// #define DUMP_AS_TEXT
 
 #ifdef BLOB_DUMP_PATH
 #   define DUMP_DIR        BLOB_DUMP_PATH
@@ -69,11 +76,15 @@ using namespace InferenceEngine;
 using namespace InferenceEngine::details;
 
 void MKLDNNGraph::CreateGraph(const ICNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
-    if (IsReady()) {
+    if (IsReady())
         ForgetGraphData();
-    }
 
-    // go over the inputs and create input primitives
+    Replicate(network, extMgr);
+    InitGraph();
+    status = Ready;
+}
+
+void MKLDNNGraph::Replicate(const ICNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
     InputsDataMap inputs;
     network.getInputsInfo(inputs);
     if (inputs.empty()) {
@@ -86,160 +97,84 @@ void MKLDNNGraph::CreateGraph(const ICNNNetwork &network, const MKLDNNExtensionM
         if (inputLayer) inputLayer->precision = inputLayer->outData[0]->precision;
     }
 
-    for (const auto& input : inputs) {
-        auto inputLayer = input.second->getInputData()->getCreatorLayer().lock();
-        if (!inputLayer) {
-            // For v1 parser
-            inputLayer.reset(new CNNLayer({input.second->getInputData()->getName(),
-                                           "Input",
-                                           input.second->getInputData()->getPrecision()}));
-
-            inputLayer->outData.push_back(input.second->getInputData());
-        }
-
-        const MKLDNNNodePtr inputNode = MKLDNNNodePtr(MKLDNNNode::CreateNode(inputLayer, getEngine(), extMgr));
-
-        graphNodes.push_back(inputNode);
-        inputNodes[input.first] = inputNode;
-        std::vector<ParsedLayer> queueLayers;
-
-        for (const auto &layer : input.second->getInputData()->getInputTo()) {
-            queueLayers.push_back({inputNode, layer.second, 0});
-        }
+    std::unordered_map<CNNLayerPtr, MKLDNNNodePtr> layer2node;
 
-        while (!queueLayers.empty()) {
-            ParseNode(queueLayers[0].cnnLayer, queueLayers[0].parent, extMgr, queueLayers[0].outIdx, queueLayers);
-            queueLayers.erase(queueLayers.begin());
-        }
+    auto _parent_port = [] (const DataPtr &data) -> int {
+        auto parent = data->creatorLayer.lock();
+        for (int i = 0; parent->outData.size(); i++)
+            if (data == parent->outData[i])
+                return i;
+        return -1;
+    };
 
-        // Loading mean images
-        MKLDNNDims outDims(inputNode->getChildEdgeAt(0)->getDims());
-        if (inputs.find(input.first) != inputs.end()) {
-            InputInfo::Ptr ii = inputs[input.first];
-            if (ii && ii->getPreProcess().getNumberOfChannels()) {
-                _meanImages[input.first].Load(outDims, ii);
-            }
+    // Replicate All Nodes in topological order
+    for (const auto layer : CNNNetSortTopologically(network)) {
+        CNNLayerPtr _layer = layer;
+        if (layer->type == "Memory" && layer->GetParamAsString("index") == "1") {
+            auto memoryId = layer->GetParamAsString("id");
+            _layer.reset(new CNNLayer({layer->name + "/id=" + memoryId, "MemoryInput", layer->precision}));
+            _layer->params = layer->params;
+            _layer->outData = layer->outData;
         }
-    }
 
-    auto allInputs = CNNNetGetAllInputLayers(network);
-    for (const auto& input : allInputs) {
-        auto isRealInput = std::find_if(std::begin(inputs), std::end(inputs), [&](InputsDataMap::value_type& inputInfo){
-            return inputInfo.second->getInputData()->getName() == input->name;
-        });
-        if (isRealInput != std::end(inputs)) {
-            continue;
-        }
+        const MKLDNNNodePtr node(MKLDNNNode::CreateNode(_layer, getEngine(), extMgr));
+        graphNodes.push_back(node);
+        layer2node[layer] = node;
 
-        MKLDNNNodePtr inputNode;
-        CaselessEq<std::string> eq;
+        for (int port = 0; port < layer->insData.size(); port++) {
+            auto data = layer->insData[port].lock();
+            auto parent_layer = data->creatorLayer.lock();
+            if (!parent_layer) continue;  // no parent means that it is input data node (or memory/const layer)
 
-        if (eq(input->type, "Memory")) {
-            auto memoryId = input->GetParamAsString("id");
-            CNNLayerPtr layer(new CNNLayer({input->name + "/id=" + memoryId, "MemoryInput", input->precision}));
-            layer->params = input->params;
-            layer->outData = input->outData;
+            auto parent_node = layer2node[parent_layer];
 
-            inputNode = MKLDNNNodePtr(MKLDNNNode::CreateNode(layer, getEngine(), extMgr));
-        } else if (eq(input->type, "Const")) {
-            inputNode = MKLDNNNodePtr(MKLDNNNode::CreateNode(input, getEngine(), extMgr));
+            MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), port));
+            node->addEdge(edge);
+            graphEdges.push_back(edge);
         }
-        graphNodes.push_back(inputNode);
+    }
 
-        std::vector<ParsedLayer> queueLayers;
-        size_t count_out = 0;
-        for (auto &&outData : input->outData) {
-            for (auto &&layer : outData->getInputTo()) {
-                queueLayers.push_back({inputNode, layer.second, count_out});
-            }
-            count_out++;
-        }
+    std::map<std::string, DataPtr> outputs;
+    network.getOutputsInfo(outputs);
 
-        while (!queueLayers.empty()) {
-            ParseNode(queueLayers[0].cnnLayer, queueLayers[0].parent, extMgr, queueLayers[0].outIdx, queueLayers);
-            queueLayers.erase(queueLayers.begin());
-        }
-    }
+    for (const auto &output : outputs) {
+        const auto data = output.second;
 
-    std::map<std::string, DataPtr> output;
-    network.getOutputsInfo(output);
-
-    for (auto it = output.begin(); it != output.end(); ++it) {
-        const DataPtr& outputDataPtr = it->second;
-
-        MKLDNNNodePtr node = FindNodeWithName(outputDataPtr->getCreatorLayer().lock()->name);
-        if (!node)
-            THROW_IE_EXCEPTION << "Cannot find output layer " << outputDataPtr->getCreatorLayer().lock()->name;
-
-        const std::string name = "out_" + it->first;
-
-        CNNLayerPtr layer(new CNNLayer({name, "Output", outputDataPtr->getCreatorLayer().lock()->outData[0]->getPrecision()}));
-        layer->insData.push_back(outputDataPtr);
-        MKLDNNNodePtr outputLayer(new MKLDNNInputNode(layer, getEngine()));
-        MKLDNNEdgePtr edgePtr(new MKLDNNEdge(node, outputLayer));
-        graphEdges.push_back(edgePtr);
-
-        const std::vector<MKLDNNEdgeWeakPtr>& childEdges = node->getChildEdges();
-        size_t insertBeforeChildEdgeIndex = childEdges.size();
-        if (!childEdges.empty()) {
-            bool outputDataIndexWasFound = false;
-            size_t outputDataIndex = 0;
-            for (size_t i = 0; i < node->getCnnLayer()->outData.size(); ++i) {
-                const DataPtr& otherOutputDataPtr = node->getCnnLayer()->outData[i];
-                if (otherOutputDataPtr->name == it->first) {
-                    outputDataIndexWasFound = true;
-                    outputDataIndex = i;
-                }
-            }
-            IE_ASSERT(outputDataIndexWasFound) << "Node " << node->getName() << " doesn't have output data '" << it->first << "'";
+        auto parent_layer = data->creatorLayer.lock();
+        auto parent_node = layer2node[parent_layer];
 
-            std::unordered_map<Data*, size_t> nodeOutputDataIndexByData;
-            const CNNLayerPtr& nodeLayer = node->getCnnLayer();
-            for (size_t dataIndex = 0; dataIndex < nodeLayer->outData.size(); ++dataIndex) {
-                nodeOutputDataIndexByData.emplace(nodeLayer->outData[dataIndex].get(), dataIndex);
-            }
+        CNNLayerPtr layer(new CNNLayer({"out_" + output.first, "Output", data->precision}));
+        layer->insData.push_back(data);
 
-            auto getOutputDataIndex = [&](const MKLDNNEdgePtr& childEdge) -> size_t {
-                const InferenceEngine::CNNLayerPtr& childNodeLayer = childEdge->getChild()->getCnnLayer();
-                for (const DataWeakPtr& childNodeInsertWeakData : childNodeLayer->insData) {
-                    const DataPtr childNodeInsertData = childNodeInsertWeakData.lock();
-                    if (!childNodeInsertData) {
-                        continue;
-                    }
+        const MKLDNNNodePtr node(MKLDNNNode::CreateNode(layer, getEngine(), extMgr));
 
-                    const auto indexIt = nodeOutputDataIndexByData.find(childNodeInsertData.get());
-                    if (indexIt != nodeOutputDataIndexByData.end()) {
-                        return indexIt->second;
-                    }
-                }
+        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), 0));
+        node->addEdge(edge);
+        graphEdges.push_back(edge);
 
-                IE_ASSERT(false) << "Node has child edge without insert data";
-            };
+        graphNodes.push_back(node);
+        outputNodes.push_back(node);
+        layer2node[layer] = node;
+    }
 
-            for (size_t childEdgeIndex = 0; childEdgeIndex < childEdges.size(); ++childEdgeIndex) {
-                const MKLDNNEdgePtr childEdge = childEdges[childEdgeIndex].lock();
-                if (!childEdge) {
-                    continue;
-                }
+    // Replicate input nodes
+    for (const auto& input : inputs) {
+        auto inputLayer = input.second->getInputData()->getCreatorLayer().lock();
+        inputNodes[input.first] = layer2node[inputLayer];
 
-                const size_t edgeOutputDataIndex = getOutputDataIndex(childEdge);
-                if (outputDataIndex < edgeOutputDataIndex) {
-                    insertBeforeChildEdgeIndex = childEdgeIndex;
-                    break;
-                }
+        // Loading mean images
+        MKLDNNDims outDims(inputNodes[input.first]->getChildEdgeAt(0)->getDims());
+        if (inputs.find(input.first) != inputs.end()) {
+            InputInfo::Ptr ii = inputs[input.first];
+            if (ii && ii->getPreProcess().getNumberOfChannels()) {
+                _meanImages[input.first].Load(outDims, ii);
             }
         }
-
-        if (insertBeforeChildEdgeIndex < childEdges.size()) {
-            outputLayer->addEdge(edgePtr, 0, insertBeforeChildEdgeIndex, true);
-        } else {
-            outputLayer->addEdge(edgePtr, 0, node->getChildEdges().size());
-        }
-
-        graphNodes.push_back(outputLayer);
-        outputNodes.push_back(outputLayer);
     }
+}
 
+void MKLDNNGraph::InitGraph() {
+    SortTopologically();
     MKLDNNGraphOptimizer optimizer;
     optimizer.ApplyCommonGraphOptimizations(*this);
     SortTopologically();
@@ -259,37 +194,47 @@ void MKLDNNGraph::CreateGraph(const ICNNNetwork &network, const MKLDNNExtensionM
 
     CreatePrimitives();
 
-    // Will do it before cleanup. Because it will lose original layers information
-    if (!config.dumpToDot.empty()) dumpToDotFile(config.dumpToDot + "_init.dot");
+    // Do it before cleanup. Because it will lose original layers information
+    for (auto &graphNode : graphNodes) {
+        auto nodeType = graphNode->getType();
+        if (nodeType == Reorder || nodeType == Output) continue;
+
+        graphNode->addOriginalLayer(graphNode->getCnnLayer());
+        if (graphNode->getFusedWith().size() || graphNode->getMergeWith().size()) {
+            // Original layer names
+            std::vector<MKLDNNNodePtr> internal = graphNode->getFusedWith();
+            auto &merged = graphNode->getMergeWith();
+            internal.insert(internal.end(), merged.begin(), merged.end());
+
+            for (auto &sub_node : internal) {
+                graphNode->addOriginalLayer(sub_node->getCnnLayer());
+            }
+        }
+    }
+    if (!config.dumpToDot.empty())
+        dumpToDotFile(config.dumpToDot + "_init.dot");
 
     for (auto &graphNode : graphNodes) {
         graphNode->cleanup();
     }
 
+#if !defined(NDEBUG) && defined(PRINT_GRAPH_INFO)
     for (auto &graphNode : graphNodes) {
-#ifndef NDEBUG
         std::cout << "name: " << graphNode->getName() << " [ ";
-#endif
         if (graphNode->parentEdges.size() > 0) {
-            auto prnt = graphNode->parentEdges[0].lock();
-#ifndef NDEBUG
-            std::cout << "in: " << prnt->getOutputDesc().getPrecision().name() << "/l="
-                    << prnt->getOutputDesc().getLayout()
+            auto prnt_out_desc = graphNode->parentEdges[0].lock()->getOutputDesc();
+            std::cout << "in: " << prnt_out_desc.getPrecision().name()
+                      << "/l=" << prnt_out_desc.getLayout()
                     << "; ";
-#endif
         }
         if (graphNode->childEdges.size() > 0) {
-            auto chld = graphNode->childEdges[0].lock();
-#ifndef NDEBUG
-            std::cout << "out: " << chld->getInputDesc().getPrecision().name() << "/l="
-                    << chld->getInputDesc().getLayout();
-#endif
+            auto chld_in_desc = graphNode->childEdges[0].lock()->getInputDesc();
+            std::cout << "out: " << chld_in_desc.getPrecision().name()
+                      << "/l=" << chld_in_desc.getLayout();
         }
-#ifndef NDEBUG
         std::cout << " ]"  << std::endl;
-#endif
     }
-
+#endif
 
     mkldnn::stream stream = mkldnn::stream(stream::kind::eager);
     for (auto &graphNode : graphNodes) {
@@ -297,101 +242,6 @@ void MKLDNNGraph::CreateGraph(const ICNNNetwork &network, const MKLDNNExtensionM
             continue;
         graphNode->execute(stream);
     }
-
-    status = Ready;
-}
-
-void MKLDNNGraph::ParseNode(const CNNLayerPtr& cnnLayer, MKLDNNNodePtr& parent,
-                            const MKLDNNExtensionManager::Ptr& extMgr, size_t outIdx,
-                            std::vector<ParsedLayer>& queuelayers) {
-    if (cnnLayer->precision != Precision::FP32 &&
-        cnnLayer->precision != Precision::I8 &&
-        cnnLayer->precision != Precision::U8) {
-        THROW_IE_EXCEPTION << "The plugin does not support " << cnnLayer->precision;
-    }
-
-    MKLDNNNodePtr node = FindNodeWithName(cnnLayer->name);
-    bool exists = false;
-    if (node) {
-        exists = true;
-    } else {
-        node.reset(MKLDNNNode::CreateNode(cnnLayer, getEngine(), extMgr));
-    }
-
-    if (parent) {
-        MKLDNNEdgePtr edgePtr;
-        size_t shift = 0;
-        if (outIdx >= parent->getChildEdges().size() || !parent->getChildEdges()[outIdx].lock()) {
-            edgePtr.reset(new MKLDNNEdge(parent, node));
-            graphEdges.push_back(edgePtr);
-        } else {
-            edgePtr = parent->getChildEdgeAt(outIdx);
-            if (edgePtr->getChild() != node) {
-                edgePtr.reset(new MKLDNNEdge(parent, node));
-                graphEdges.push_back(edgePtr);
-                shift = parent->getChildEdges().size();
-            }
-        }
-
-
-        size_t pIndex = node->getParentEdges().size();
-        if (parent->getCnnLayer() != nullptr) {
-            for (size_t idx = 0; idx < cnnLayer->insData.size(); idx++) {
-                auto cnnLayerIN = cnnLayer->insData[idx].lock();
-                if (cnnLayerIN &&
-                    parent->getCnnLayer()->outData.size() > outIdx &&
-                    cnnLayerIN.get() == parent->getCnnLayer()->outData[outIdx].get()) {
-                    pIndex = idx;
-                    break;
-                }
-            }
-            node->addEdge(edgePtr, pIndex, outIdx + shift);
-            if (cnnLayer->insData.size() > 1) {
-                for (size_t idx = 1; idx < cnnLayer->insData.size(); idx++) {
-                    if (cnnLayer->insData[idx].lock() == cnnLayer->insData[idx - 1].lock()) {
-                        node->addEdge(edgePtr, pIndex + idx, outIdx + shift + idx);
-                    }
-                }
-            }
-        } else {
-            for (size_t idx = 0; idx < cnnLayer->insData.size(); idx++) {
-                if (cnnLayer->insData[idx].lock()->getName() == parent->getName()) {
-                    pIndex = static_cast<int>(idx);
-                    break;
-                }
-            }
-            node->addEdge(edgePtr, pIndex, outIdx + shift);
-        }
-    }
-
-    if (exists)
-        return;
-
-    if (cnnLayer->blobs.find("ext-scale") != cnnLayer->blobs.end())
-        node->ext_scales = cnnLayer->blobs["ext-scale"];
-
-    graphNodes.push_back(node);
-
-    size_t count_out = 0;
-    std::vector<ParsedLayer> remaining;
-    for (const auto &layer : cnnLayer->outData) {
-        bool first = true;
-        for (const auto &data : layer->getInputTo()) {
-            if (first) {
-                queuelayers.push_back({node, data.second, count_out});
-                first = false;
-            } else {
-                // TODO: Just to hide bug with port ordering.
-                //       At first step we visit only first connection
-                //       at port. As second we will visit all remaining.
-                //
-                // Not first connection to the port are stored here
-                remaining.push_back({node, data.second, count_out});
-            }
-        }
-        count_out++;
-    }
-    queuelayers.insert(queuelayers.end(), remaining.begin(), remaining.end());
 }
 
 void MKLDNNGraph::InitNodes() {
@@ -427,44 +277,44 @@ void MKLDNNGraph::InitEdges() {
     size_t numberOfEdges = graphEdges.size();
     for (auto i = 0; i < numberOfEdges; i++) {
         if (graphEdges[i]->needReorder()) {
-            std::string layerName = graphEdges[i]->getParent()->getName() + "_" +
-                    reorderArgs(graphEdges[i]->getInputDesc(), graphEdges[i]->getOutputDesc()) + "_" +
-                    graphEdges[i]->getChild()->getName();
+            auto &edge = graphEdges[i];
+            std::string layerName = edge->getParent()->getName() + "_" +
+                                    reorderArgs(edge->getInputDesc(), edge->getOutputDesc()) + "_" +
+                                    edge->getChild()->getName();
             CNNLayerPtr layer(new CNNLayer({layerName,
                                             "Reorder",
-                                            graphEdges[i]->getInputDesc().getPrecision()}));
+                                            edge->getInputDesc().getPrecision()}));
             MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine()));
             auto *reorderPtr = dynamic_cast<MKLDNNReorderNode *>(newReorder.get());
             if (reorderPtr) {
-                reorderPtr->setDescs(graphEdges[i]->getInputDesc(), graphEdges[i]->getOutputDesc());
+                reorderPtr->setDescs(edge->getInputDesc(), edge->getOutputDesc());
             }
-            MKLDNNEdgePtr beforeNode(new MKLDNNEdge(graphEdges[i]->getParent(), newReorder));
-            beforeNode->setDims(graphEdges[i]->getDims());
-            MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, graphEdges[i]->getChild()));
-            afterNode->setDims(graphEdges[i]->getDims());
-
-            auto oIndexes = graphEdges[i]->getAllOutputNums();
-            auto iIndexes = graphEdges[i]->getAllInputNums();
-            if (iIndexes[0] < 0 || oIndexes[0] < 0)
+
+            auto oIndex = edge->getOutputNum();
+            auto iIndex = edge->getInputNum();
+            if (iIndex < 0 || oIndex < 0)
                 THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
-                                   << graphEdges[i]->getParent()->getName() << " and "
-                                   << graphEdges[i]->getChild()->getName() << ".";
+                                   << edge->getParent()->getName() << " and "
+                                   << edge->getChild()->getName() << ".";
+
+            edge->drop();
+
+            MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
+            MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
 
             // Add edge for beforeNode
             beforeNode->getChild()->parentEdges.push_back(beforeNode);
-            for (auto iIndex : iIndexes) graphEdges[i]->getParent()->childEdges[iIndex] = beforeNode;
+            edge->getParent()->childEdges.push_back(beforeNode);
 
             // Add edge for afterNode
             afterNode->getParent()->childEdges.push_back(afterNode);
-            for (auto oIndex : oIndexes) graphEdges[i]->getChild()->parentEdges[oIndex] = afterNode;
+            edge->getChild()->parentEdges.push_back(afterNode);
 
             newReorder->getSupportedDescriptors();
             newReorder->initSupportedPrimitiveDescriptors();
             newReorder->selectOptimalPrimitiveDescriptor();
 
-            beforeNode->getDesc();
             graphEdges.push_back(beforeNode);
-            afterNode->getDesc();
             graphEdges.push_back(afterNode);
 
             graphNodes.push_back(newReorder);
@@ -492,14 +342,15 @@ void MKLDNNGraph::AllocateWithReuse() {
             for (auto &claster : edge_clasters) {
                 for (auto &element : claster) {
                     if (element == par) {
-                        claster.push_back(edge);
+                        if (std::find(claster.begin(), claster.end(), edge) == claster.end())
+                            claster.push_back(edge);
                         found = true;
                         break;
                     }
                 }
             }
-            if (!found) edge_clasters.push_back({par, edge});
-
+            if (!found)
+                edge_clasters.push_back({par, edge});
         } else {
             bool found = false;
             for (auto &claster : edge_clasters) {
@@ -510,7 +361,8 @@ void MKLDNNGraph::AllocateWithReuse() {
                     }
                 }
             }
-            if (!found) edge_clasters.push_back({edge});
+            if (!found)
+                edge_clasters.push_back({edge});
         }
     }
 
@@ -535,17 +387,17 @@ void MKLDNNGraph::AllocateWithReuse() {
             // remove duplicates in merged claster
             std::sort(base_classter->begin(), base_classter->end());
             base_classter->erase(std::unique(base_classter->begin(), base_classter->end()),
-                    base_classter->end() );
+                                 base_classter->end() );
 
             // remove empty clasters
             edge_clasters.erase(std::remove_if(edge_clasters.begin(), edge_clasters.end(),
-                    [] ( std::vector<MKLDNNEdgePtr> &cls) { return cls.empty(); }),
-                    edge_clasters.end());
+                                               [] ( std::vector<MKLDNNEdgePtr> &cls) { return cls.empty(); }),
+                                edge_clasters.end());
         }
     }
     //======= End of WA ============
 
-    const int alignment = 16;  // 64 bytes or 16 floats
+    const int64_t alignment = 32;  // 32 bytes
 
     std::vector<MemorySolver::Box> boxes(edge_clasters.size());
     for (int i = 0; i < edge_clasters.size(); i++) {
@@ -557,10 +409,12 @@ void MKLDNNGraph::AllocateWithReuse() {
 
             const BlockingDesc block_desk = edge->getDesc().getBlockingDesc();
 
-            int e_size = block_desk.getOffsetPadding() + 1;  // size in elements (from begin of data to last element)
+            int64_t e_size = block_desk.getOffsetPadding() + 1;  // size in bytes (from begin of data to last element)
             for (int j = 0; j < block_desk.getBlockDims().size(); j++)
                 e_size += (block_desk.getBlockDims()[j] - 1) * block_desk.getStrides()[j];
 
+            e_size *= edge->getDesc().getPrecision() == Precision::BIN ? 1 : edge->getDesc().getPrecision().size();
+
             box.start = std::min(e_start, box.start);
             box.finish = std::max(e_finish, box.finish);
             box.size =  std::max(e_size, box.size);
@@ -587,20 +441,20 @@ void MKLDNNGraph::AllocateWithReuse() {
     }
 
     MemorySolver memSolver(boxes);
-    size_t total_size = memSolver.solve() * alignment;
+    size_t total_size = static_cast<size_t>(memSolver.solve()) * alignment;
 
-    memWorkspace.reset(new MKLDNNMemory(eng));
-    memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::FP32, {total_size}, Layout::C)));
-    float* workspace_ptr = static_cast<float*>(memWorkspace->GetData());
+    memWorkspace = std::make_shared<MKLDNNMemory>(eng);
+    memWorkspace->Create(MKLDNNMemoryDesc(TensorDesc(Precision::I8, {total_size}, Layout::C)));
+    auto* workspace_ptr = static_cast<int8_t*>(memWorkspace->GetData());
 
     for (int i = 0; i < edge_clasters.size(); i++) {
         int count = 0;
         for (auto &edge : edge_clasters[i]) {
             if (edge->getStatus() == MKLDNNEdge::Status::NeedAllocation) {
-                int offset = memSolver.getOffset(i);
+                int64_t offset = memSolver.getOffset(i);
                 // !! Fallback to individual memory allocation !!
                 // if you like to check infer without reuse just call this function without arguments.
-                edge->allocate(workspace_ptr + offset * alignment);  // alignment in float
+                edge->allocate(workspace_ptr + offset * alignment);  // alignment in byte
                 count++;
             }
         }
@@ -653,7 +507,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::
         // todo: make sure 'name' exists in this map...
         if (_meanImages.find(name) != _meanImages.end()) {
             if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
-                _meanImages[name].Subtract(outDims, reinterpret_cast<float *>(inter_data_ptr));
+                _meanImages[name].Subtract(outDims, reinterpret_cast<float *>(inter_data_ptr), in->getTensorDesc().getLayout());
             } else {
                 THROW_IE_EXCEPTION << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported";
             }
@@ -734,20 +588,6 @@ void MKLDNNGraph::Infer(int batch) {
     }
 }
 
-MKLDNNNodePtr MKLDNNGraph::FindNodeWithName(const std::string& name) const {
-    if (inputNodes.empty()) {
-        return std::shared_ptr<MKLDNNNode>();
-    }
-
-    const auto children = graphNodes;
-    const auto node = std::find_if(children.begin(), children.end(),
-                             [&name](MKLDNNNodePtr const& item) {
-                                 return item->getName() == name;
-                             });
-
-    return (node == children.end() ? std::shared_ptr<MKLDNNNode>() : *node);
-}
-
 void MKLDNNGraph::VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sortedNodes) {
     if (node->temporary) {
         return;
@@ -793,12 +633,51 @@ void MKLDNNGraph::SortTopologically() {
 
     graphNodes.erase(graphNodes.begin(), graphNodes.end());
     graphNodes.assign(sorted.begin(), sorted.end());
+
+    // TODO: Sort in/out edges by port index because of backward compatibility
+    //       A lot of plugin logic are build on top of assumption that index in
+    //       vector childEdges/parentEdges is port number. But that is not
+    //       truth anymore. But to keep old logic correct need to simulate ordering.
+    //
+    // Make first N (N == port_num) edge indexes are matched with port index
+    for (auto &node : graphNodes) {
+        {
+            int port_num = node->inDims.size();
+            std::vector<MKLDNNEdgePtr> res(port_num);
+
+            for (int i = 0; i < node->parentEdges.size(); i++) {
+                auto edge = node->getParentEdgeAt(i);
+                int port = edge->getOutputNum();
+                if (!res[port])
+                    res[port] = edge;
+                else
+                    res.push_back(edge);
+            }
+            node->parentEdges = {res.begin(), res.end()};
+        }
+        {
+            int port_num = node->outDims.size();
+            std::vector<MKLDNNEdgePtr> res(port_num);
+
+            for (int i = 0; i < node->childEdges.size(); i++) {
+                auto edge = node->getChildEdgeAt(i);
+                int port = edge->getInputNum();
+                if (!res[port])
+                    res[port] = edge;
+                else
+                    res.push_back(edge);
+            }
+            node->childEdges = {res.begin(), res.end()};
+        }
+    }
 }
 
 void MKLDNNGraph::GetPerfData(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap) const {
+    unsigned i = 0;
     std::function<void(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &, const MKLDNNNodePtr&)>
             getPerfMapFor = [&](std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> &perfMap, const MKLDNNNodePtr& node) {
         InferenceEngine::InferenceEngineProfileInfo &pc = perfMap[node->getName()];
+        pc.execution_index = i++;
         // TODO: Why time counter is signed?
         pc.cpu_uSec = pc.realTime_uSec = (long long) node->PerfCounter().avg();
         pc.status = pc.cpu_uSec > 0 ? InferenceEngine::InferenceEngineProfileInfo::EXECUTED
@@ -863,38 +742,40 @@ void MKLDNNGraph::DropNode(const MKLDNNNodePtr &node) {
             }
         }
     };
-    for (size_t i = 0; i < node->parentEdges.size(); i++) {
-        if (!node->parentEdges[i].lock())
-            continue;
-        auto parent = node->parentEdges[i].lock()->getParent();
-        if (!parent)
-            continue;
 
-        for (size_t j = 0; j < node->childEdges.size(); j++) {
-            if (!node->childEdges[j].lock())
+    auto childs = node->childEdges;
+    auto parents = node->parentEdges;
+
+    for (size_t i = 0; i < parents.size(); i++) {
+        auto p_edge = parents[i].lock();
+        if (!p_edge) continue;
+        auto parent = p_edge->getParent();
+        if (!parent) continue;
+
+        for (size_t j = 0; j < childs.size(); j++) {
+            if (!childs[j].lock())
                 continue;
-            auto child = node->childEdges[j].lock()->getChild();
+            auto child = childs[j].lock()->getChild();
             if (!child)
                 continue;
 
-            MKLDNNEdgePtr remEdge = node->parentEdges[i].lock();
+            MKLDNNEdgePtr &remEdge = p_edge;
             int inNum = 0;
             if (remEdge) {
                 inNum = remEdge->getInputNum();
-                node->removeEdge(remEdge);
+                remEdge->drop();
                 removeEdge(*this, remEdge);
             }
-            inNum += j;
-            remEdge = node->childEdges[j].lock();
+            remEdge = childs[j].lock();
             int outNum = 0;
             if (remEdge) {
                 outNum = remEdge->getOutputNum();
-                node->removeEdge(remEdge);
+                remEdge->drop();
                 removeEdge(*this, remEdge);
             }
-            MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child));
-            this->GetEdges().push_back(newEdge);
-            parent->addEdge(newEdge, outNum, inNum);
+            MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
+            graphEdges.push_back(newEdge);
+            parent->addEdge(newEdge);
         }
     }
 }
@@ -939,7 +820,10 @@ void MKLDNNGraph::dumpToDotFile(std::string file) const {
 void MKLDNNGraph::do_before(const std::string &dir, const MKLDNNNodePtr &node) {
     auto exec_order = std::to_string(node->execIndex);
     std::string nodeName = node->name;
+    std::replace(nodeName.begin(), nodeName.end(), '\\', '_');
     std::replace(nodeName.begin(), nodeName.end(), '/', '_');
+    std::replace(nodeName.begin(), nodeName.end(), ' ', '_');
+    std::replace(nodeName.begin(), nodeName.end(), ':', '_');
 
     auto num_ports = node->getSelectedPrimitiveDescriptor()->getConfig().inConfs.size();
     for (size_t i = 0; i < num_ports; i++) {
@@ -948,18 +832,27 @@ void MKLDNNGraph::do_before(const std::string &dir, const MKLDNNNodePtr &node) {
 
         auto dump_file = dir + "/#" + exec_order + "_" +  nodeName + "_in" + std::to_string(i) + ".ieb";
         TensorDesc desc = prEdge->getDesc();
+        if (desc.getPrecision() == Precision::BIN)
+            return;
         Blob::Ptr blob = make_blob_with_precision(desc, prEdge->getMemoryPtr()->GetData());
 
         BlobDumper dumper(blob);
         if (pr->ext_scales) dumper.withScales(pr->ext_scales);
+#ifdef DUMP_AS_TEXT
+        dumper.dumpAsTxt(dump_file);
+#else
         dumper.dump(dump_file);
+#endif
     }
 }
 
 void MKLDNNGraph::do_after(const std::string &dir, const MKLDNNNodePtr &node) {
     auto exec_order = std::to_string(node->execIndex);
     auto nodeName = node->name;
+    std::replace(nodeName.begin(), nodeName.end(), '\\', '_');
     std::replace(nodeName.begin(), nodeName.end(), '/', '_');
+    std::replace(nodeName.begin(), nodeName.end(), ' ', '_');
+    std::replace(nodeName.begin(), nodeName.end(), ':', '_');
 
     auto num_ports = node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size();
     for (size_t i = 0; i < num_ports; i++) {
@@ -967,15 +860,25 @@ void MKLDNNGraph::do_after(const std::string &dir, const MKLDNNNodePtr &node) {
 
         auto dump_file = dir + "/#" + exec_order + "_" +  nodeName + "_out" + std::to_string(i) + ".ieb";
         TensorDesc desc = childEdge->getDesc();
+        if (desc.getPrecision() == Precision::BIN)
+            return;
         Blob::Ptr blob = make_blob_with_precision(desc, childEdge->getMemoryPtr()->GetData());
 
         BlobDumper dumper(blob);
         if (node->ext_scales) dumper.withScales(node->ext_scales);
 
+#ifdef DUMP_AS_TEXT
+        dumper.dumpAsTxt(dump_file);
+#else
         dumper.dump(dump_file);
+#endif
     }
 }
 
+InferenceEngine::ICNNNetwork::Ptr MKLDNNGraph::dump() const {
+    return dump_graph_as_ie_net(*this);
+}
+
 bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::ICNNNetwork &network) const {
     InputsDataMap inputs;
     network.getInputsInfo(inputs);
@@ -1037,16 +940,26 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
                                      const MKLDNNExtensionManager::Ptr& extMgr) : extensionManager(extMgr) {
     ICNNNetworkStats* pstats = nullptr;
     StatusCode s = network.getStats(&pstats, nullptr);
-    // we are cloning network if we have statistics and we can transform network
-    // in other case we pass original network. Especially because LSTM networks
-    // are not cloned properly
-    details::CNNNetworkImplPtr clonedNetwork;
+    // we are cloning network if we have statistics and we can transform network.
+    auto clonedNetwork = cloneNet(network);
+
     if (s == StatusCode::OK && pstats && !pstats->isEmpty()) {
         CNNNetworkInt8Normalizer cnnorm;
-        clonedNetwork = cloneNet(network);
         cnnorm.NormalizeNetwork(*clonedNetwork, *pstats);
     }
-    bool ti_proc_ok = !NetPass::CombineLSTMSeq(network) ? NetPass::UnrollTI(network) : true;
+
+    bool ti_proc_ok = !NetPass::CombineRNNSeq(*clonedNetwork) ? NetPass::UnrollTI(*clonedNetwork) : true;
+    ti_proc_ok &= NetPass::UnrollRNN_if(*clonedNetwork, [] (RNNCellBase rnn) -> bool {
+        if (rnn.clip != 0.0f)
+            return true;
+        if ((rnn.cellType == RNNCellBase::GRU || rnn.cellType == RNNCellBase::GRU_LBR) &&
+                rnn.activations != std::vector<std::string> {"sigmoid", "tanh"})
+            return true;
+        if (rnn.cellType == RNNCellBase::LSTM &&
+                rnn.activations != std::vector<std::string> {"sigmoid", "tanh", "tanh"})
+            return true;
+        return false;
+    });
     if (!ti_proc_ok)
         THROW_IE_EXCEPTION << "Plugin doesn't support Tensor Iterator in pure form. "
                               "None TI optimization pattern has been applied successfully";
@@ -1054,7 +967,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
 
     if (cfg.batchLimit > 1) {
         // check topology for applicability
-        if (!CanProcessDynBatch(clonedNetwork ? *clonedNetwork : network)) {
+        if (!CanProcessDynBatch(*clonedNetwork)) {
             THROW_IE_EXCEPTION << "MKLDNNGraph::CreateGraph: such topology cannot be compiled for dynamic batch!";
         }
     }
@@ -1081,7 +994,7 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::ICNNNetwork &network
             }
 
             _graph->setConfig(cfg);
-            _graph->CreateGraph(clonedNetwork ? *clonedNetwork : network, extensionManager);
+            _graph->CreateGraph(*clonedNetwork, extensionManager);
             if (cfg.throughputStreams > 1)  // for streams, each worker thread has it's own graph
                 MKLDNNPlugin::MultiWorkerTaskExecutor::ptrContext.ptrGraph = _graph;
         });
@@ -1126,3 +1039,7 @@ void MKLDNNExecNetwork::CreateInferRequest(InferenceEngine::IInferRequest::Ptr &
         mkldnnSyncRequest->SetGraph(graphs[0]);
     }
 }
+
+void MKLDNNExecNetwork::GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) {
+    graphPtr = graphs[0]->dump();
+}
+\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
index de026b5ad..7b01c71eb 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -111,8 +111,9 @@ public:
         #endif
     }
 
+    InferenceEngine::ICNNNetwork::Ptr dump() const;
+
 protected:
-    MKLDNNNodePtr FindNodeWithName(const std::string& name) const;
     void VisitNode(MKLDNNNodePtr node, std::vector<MKLDNNNodePtr>& sortedNodes);
     void SortTopologically();
 
@@ -144,6 +145,8 @@ protected:
     #endif
     mkldnn::engine eng;
 
+    void Replicate(const ICNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr);
+    void InitGraph();
     void InitNodes();
     void InitEdges();
     void Allocate();
@@ -164,8 +167,6 @@ private:
         InferenceEngine::CNNLayerPtr cnnLayer;
         size_t outIdx;
     };
-    void ParseNode(const InferenceEngine::CNNLayerPtr& cnnLayer, MKLDNNNodePtr& parent,
-                   const MKLDNNExtensionManager::Ptr& extMgr, size_t outIdx, std::vector<ParsedLayer>& layers);
 };
 
 
@@ -188,6 +189,8 @@ public:
 
     void setProperty(const std::map<std::string, std::string> &properties);
 
+    void GetExecGraphInfo(InferenceEngine::ICNNNetwork::Ptr &graphPtr) override;
+
 protected:
     std::vector<MKLDNNGraph::Ptr> graphs;
     MKLDNNExtensionManager::Ptr extensionManager;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
index ae24579f6..8b9bcc889 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp
@@ -1,10 +1,23 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include "mkldnn_graph_dumper.h"
 #include "cnn_network_impl.hpp"
 #include "ie_util_internal.hpp"
+#include "exec_graph_info.hpp"
 
 #include <vector>
 #include <string>
@@ -33,7 +46,7 @@ std::shared_ptr<ICNNNetwork> dump_graph_as_ie_net(const MKLDNNGraph &graph) {
     auto net = std::make_shared<details::CNNNetworkImpl>();
 
     net->setPrecision(Precision::FP32);
-    net->setName("internal_cpu_graph");
+    net->setName("runtime_cpu_graph");
     std::map<MKLDNNNodePtr, CNNLayerPtr> node2layer;
 
     // Copy all nodes to network
@@ -109,6 +122,7 @@ static std::map<Type, std::string> type_n2l {
     {Lrn, "Lrn"},
     {Pooling, "Pool"},
     {FullyConnected, "FC"},
+    {FullyConnected_Activation, "FC_Activ"},
     {SoftMax, "SoftMax"},
     {Split, "Split"},
     {Concatenation, "Concat"},
@@ -122,37 +136,24 @@ static std::map<Type, std::string> type_n2l {
     {BatchNormalization, "BatchNorm"},
     {Flatten, "Flatten"},
     {Permute, "Permute"},
+    {Quantize, "Quantize"},
+    {BinaryConvolution, "BinaryConvolution"},
     {MemoryOutput, "MemoryIn"},
     {MemoryInput, "MemoryOut"}
 };
 
-static const std::string ORIGIN_NAMES = "origin";
-static const std::string IMPL_TYPE    = "impl";
-static const std::string PRECISION    = "prec";
-static const std::string PERF_COUNTER = "perf";
-
-static const std::string BLUE  = "#D8D9F1";
-static const std::string GREEN = "#D9EAD3";
+static const char BLUE[]  = "#D8D9F1";
+static const char GREEN[] = "#D9EAD3";
 
 void copy_node_metadata(const MKLDNNNodePtr &node, CNNLayer::Ptr &layer) {
     layer->type = type_n2l[node->getType()];
     layer->name = node->getName();  // Is ID
 
-    if (node->getCnnLayer()) {
-        // Original layer names
-        std::vector<MKLDNNNodePtr> internal = node->getFusedWith();
-        auto &merged = node->getMergeWith();
-        internal.insert(internal.end(), merged.begin(), merged.end());
-
-        std::string orig_names = node->getCnnLayer()->name;
-        for (auto &sub_node : internal)
-            orig_names += " " + sub_node->getCnnLayer()->name;
-
-        layer->params[ORIGIN_NAMES] = orig_names;
-    }
+    // Original layers
+    layer->params[ExecGraphInfoSerialization::ORIGIN_NAMES] = node->getOriginalLayers();
 
     // Implementation type name
-    layer->params[IMPL_TYPE] = node->getPrimitiveDescriptorType();
+    layer->params[ExecGraphInfoSerialization::IMPL_TYPE] = node->getPrimitiveDescriptorType();
 
     // Precision
     // TODO: That is not fully correct mapping type to precision.
@@ -169,11 +170,13 @@ void copy_node_metadata(const MKLDNNNodePtr &node, CNNLayer::Ptr &layer) {
     if (impl_type & jit && impl_type & avx512 &&
         node->getParentEdgeAt(0)->getDesc().getPrecision() == Precision::U8)  precision = "INT8";
 
-    layer->params[PRECISION] = precision;
+    layer->params[ExecGraphInfoSerialization::PRECISION] = precision;
 
     // Performance
     if (node->PerfCounter().avg() != 0) {
-        layer->params[PERF_COUNTER] = std::to_string(node->PerfCounter().avg())+ " mcs";
+        layer->params[ExecGraphInfoSerialization::PERF_COUNTER] = std::to_string(node->PerfCounter().avg());
+    } else {
+        layer->params[ExecGraphInfoSerialization::PERF_COUNTER] = "not_executed";  // it means it was not calculated yet
     }
 }
 
@@ -183,25 +186,29 @@ void drawer_callback(const InferenceEngine::CNNLayerPtr layer,
     const auto &params = layer->params;
 
     // Implementation
-    auto impl = params.find(IMPL_TYPE);
+    auto impl = params.find(ExecGraphInfoSerialization::IMPL_TYPE);
     if (impl != params.end()) {
         printed_properties.push_back({"impl", impl->second});
     }
 
     // Original names
-    auto orig = params.find(ORIGIN_NAMES);
+    auto orig = params.find(ExecGraphInfoSerialization::ORIGIN_NAMES);
     if (orig != params.end()) {
         printed_properties.push_back({"originals", orig->second});
     }
 
     // Precision
-    auto prec = params.find(PRECISION);
+    auto prec = params.find(ExecGraphInfoSerialization::PRECISION);
     if (prec != params.end()) {
         printed_properties.push_back({"precision", prec->second});
     }
 
     // Set color
     node_properties.push_back({"fillcolor", prec->second == "FP32" ? GREEN : BLUE});
+
+    // Set xlabel containing PM data if calculated
+    auto perf = layer->params.find(ExecGraphInfoSerialization::PERF_COUNTER);
+    node_properties.push_back({"xlabel", (perf != layer->params.end()) ? perf->second : ""});
 }
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
index 6ec5ffc45..b419109dd 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #pragma once
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
index 6c88ebd6f..472340318 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -17,6 +17,9 @@
 #include <memory>
 #include <set>
 #include <ie_layers_internal.hpp>
+#include <nodes/mkldnn_bin_conv_node.h>
+#include <nodes/mkldnn_quantize_node.h>
+#include "cpu_isa_traits.hpp"
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -28,8 +31,8 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
     MergeGroupConvolution(graph);
     graph.RemoveDroppedNodes();
 
-//    SLTMTransform(graph);
-//    RemoveDropped(graph);
+    FuseConvolutionAndDepthwise(graph);
+    graph.RemoveDroppedNodes();
 
     FuseConvolutionAndActivation(graph);
     graph.RemoveDroppedNodes();
@@ -40,9 +43,15 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
     FuseConvolutionAndDWConvolution(graph);
     graph.RemoveDroppedNodes();
 
+    FuseBinaryConvolutionAndQuantize(graph);
+    graph.RemoveDroppedNodes();
+
     FuseBatchNormWithScale(graph);
     graph.RemoveDroppedNodes();
 
+    FuseFullyConnectedAndActivation(graph);
+    graph.RemoveDroppedNodes();
+
     RemoveIdentityOperator(graph);
     graph.RemoveDroppedNodes();
 
@@ -113,6 +122,9 @@ void MKLDNNGraphOptimizer::MergeGroupConvolution(MKLDNNGraph &graph) {
         conv->inDims[0] = convInDims;
         conv->outDims[0] = convOutDims;
 
+        conv->fuseWith(split);
+        conv->fuseWith(concat);
+
         graph.DropNode(split);
         graph.DropNode(concat);
     }
@@ -167,11 +179,12 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
     };
 
     for (int i = 0; i < graphNodes.size(); i++) {
-        if (graphNodes[i]->getType() == Convolution) {
+        if (graphNodes[i]->getType() == Convolution || graphNodes[i]->getType() == BinaryConvolution) {
             auto conv = graphNodes[i];
 
             auto fuse = [&] (MKLDNNNodePtr relu) {
-                conv->setType(Convolution_Activation);
+                if (graphNodes[i]->getType() != BinaryConvolution)
+                    conv->setType(Convolution_Activation);
                 conv->fuseWith(relu);
             };
 
@@ -215,9 +228,10 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        return (node->getType() == Convolution || node->getType() == Convolution_Activation) &&
-                node->getCnnLayer()->precision == Precision::FP32 &&
-               (node->getChildEdges().size() == 1);
+        bool isSutableConv = (node->getType() == Convolution || node->getType() == Convolution_Activation) &&
+                             node->getCnnLayer()->precision == Precision::FP32;
+        bool isSutableBinConv = node->getType() == BinaryConvolution;
+        return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1;
     };
 
     auto isSutableChildNode = [](MKLDNNNodePtr node) {
@@ -240,7 +254,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) {
         if (!isSutableChildNode(depthwise0)) continue;
 
         conv->fuseWith(depthwise0);
-        conv->setType(Convolution_Depthwise);
+        if (conv->type != BinaryConvolution)
+            conv->setType(Convolution_Depthwise);
 
         if (depthwise0->getChildEdges().size() == 1) {
             auto depthwise1 = depthwise0->getChildEdgeAt(0)->getChild();
@@ -262,64 +277,163 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
         return node->getType() == Convolution || node->getType() == Convolution_Activation;
     };
 
+    auto isBinaryConvolutionNode = [](MKLDNNNodePtr node) {
+        return node->getType() == BinaryConvolution;
+    };
+
     auto is1x1Convolution = [](ConvolutionLayer* layer) {
         return layer->_kernel[X_AXIS] == 1 && layer->_kernel[Y_AXIS] == 1;
     };
 
     auto isSutableParentConvolution = [&](MKLDNNNodePtr node) {
-        auto* layer = dynamic_cast<ConvolutionLayer*>(node->getCnnLayer().get());
+        if (isBinaryConvolutionNode(node)) {
+            auto *layer = dynamic_cast<BinaryConvolutionLayer *>(node->getCnnLayer().get());
+
+            bool isSupportedParams = layer->_group == 1;
+            if (!isSupportedParams) return false;
+        } else {
+            auto *layer = dynamic_cast<ConvolutionLayer *>(node->getCnnLayer().get());
 
-        bool isSupportedParams = layer->_group == 1 &&
-                                 ((is1x1Convolution(layer) &&
-                                  layer->_stride[X_AXIS] == 1 && layer->_stride[Y_AXIS] == 1) || !is1x1Convolution(layer)) &&
-                                  layer->precision == Precision::FP32;;
-        if (!isSupportedParams) return false;
+            bool isSupportedParams = layer->_group == 1 &&
+                                     ((is1x1Convolution(layer) && layer->_stride[X_AXIS] == 1 &&
+                                       layer->_stride[Y_AXIS] == 1) || !is1x1Convolution(layer)) &&
+                                     (layer->precision == Precision::FP32 || layer->precision == Precision::I8);
+            if (!isSupportedParams) return false;
+        }
 
         return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild());
     };
 
-    auto isSutableChildConvolution = [](MKLDNNNodePtr node) {
-        auto* layer = dynamic_cast<ConvolutionLayer*>(node->getCnnLayer().get());
-        auto allPads = getPaddings(*layer);
-        bool isSupportedParams = layer->_out_depth == layer->_group &&
+    auto isSutableChildConvolution = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+        auto* childLayer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
 
-                                 layer->_out_depth != 1 &&
-                                 // Depthwise convolution output should be multiple of 8
+        if (!isBinaryConvolutionNode(parentNode)) {
+            auto* parentLayer = dynamic_cast<ConvolutionLayer*>(parentNode->getCnnLayer().get());
+            if (parentLayer->precision != childLayer->precision)
+                return false;
+        }
 
-                                 layer->_kernel[X_AXIS] == 3 && layer->_kernel[Y_AXIS] == 3 &&
+        auto allPads = getPaddings(*childLayer);
+        bool isSupportedParams = childLayer->_out_depth == childLayer->_group &&
+                                 childLayer->_out_depth != 1 &&
+                                 // Depthwise convolution output should be multiple of 8
+                                 childLayer->_kernel[X_AXIS] == 3 && childLayer->_kernel[Y_AXIS] == 3 &&
                                  allPads.begin[X_AXIS] == 1 && allPads.begin[Y_AXIS] == 1 &&
-                                 layer->_dilation[X_AXIS] == 1 && layer->_dilation[Y_AXIS] == 1 &&
-                                 layer->_biases != nullptr && layer->_biases->size() != 0 &&
-                                 layer->precision == Precision::FP32;
+                                 childLayer->_dilation[X_AXIS] == 1 && childLayer->_dilation[Y_AXIS] == 1 &&
+                                 childLayer->_biases != nullptr && childLayer->_biases->size() != 0;
+
         return isSupportedParams;
     };
 
-    auto isFusingWorthwhile = [](MKLDNNNodePtr node) {
-        auto inDims = node->inDims[0];
-        auto outDims = node->outDims[0];
+    auto isFusingWorthwhile = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+        if (isBinaryConvolutionNode(parentNode)) {
+            return true;
+        }
+
+        auto* layer = dynamic_cast<ConvolutionLayer*>(childNode->getCnnLayer().get());
+
+        auto inDims = childNode->inDims[0];
+        auto outDims = childNode->outDims[0];
+        int elemSize = MKLDNNExtensionUtils::sizeOfDataType(MKLDNNExtensionUtils::IEPrecisionToDataType(layer->precision));
 
         int L3_cache_size = mkldnn_get_cache_size(3, false);
-        int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * sizeof(float);
-        int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * sizeof(float);
-        return (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2);
+        int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize;
+        int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * elemSize;
+
+        bool isInt8 = layer->precision == Precision::I8 || layer->precision == Precision::U8;
+        bool isAVX512NotSupported = !mkldnn::impl::cpu::mayiuse(impl::cpu::cpu_isa_t::avx512_common);
+
+        return isInt8 ? isAVX512NotSupported : (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2);
     };
 
     for (int i = 0; i < graphNodes.size(); i++) {
-        if (!isConvolutionNode(graphNodes[i])) continue;
+        if (!isConvolutionNode(graphNodes[i]) && !isBinaryConvolutionNode(graphNodes[i])) continue;
 
         auto parentConvNode = graphNodes[i];
         if (!isSutableParentConvolution(parentConvNode)) continue;
 
         auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildConvolution(childConvNode)) continue;
+        if (!isSutableChildConvolution(parentConvNode, childConvNode)) continue;
 
-        if (!isFusingWorthwhile(childConvNode)) continue;
+        if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue;
 
         parentConvNode->fuseWith(childConvNode);
         graph.DropNode(childConvNode);
     }
 }
 
+void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) {
+    auto removeEdge = [](MKLDNNGraph &graph, MKLDNNEdgePtr& edge) {
+        auto& edges = graph.GetEdges();
+        for (auto it = edges.begin(); it != edges.end(); it++) {
+            if ((*it) == edge) {
+                edges.erase(it);
+                return;
+            }
+        }
+    };
+
+    auto& graphNodes = graph.GetNodes();
+
+    auto isSutableParentNode = [](MKLDNNNodePtr node) {
+        bool isSutableBinConv = node->getType() == BinaryConvolution;
+        return isSutableBinConv && node->getChildEdges().size() == 1;
+    };
+
+    auto isSutableChildNode = [](MKLDNNNodePtr node) {
+        if (!node->getCnnLayer())
+            return false;
+
+        auto* quantizeLayer = dynamic_cast<QuantizeLayer*>(node->getCnnLayer().get());
+        bool isSutableQuantize = node->getType() == Quantize && quantizeLayer->levels == 2;
+
+        return isSutableQuantize;
+    };
+
+    for (int i = 0; i < graphNodes.size(); i++) {
+        auto parent = graphNodes[i];
+        if (!isSutableParentNode(parent)) continue;
+
+        auto child = parent->getChildEdgeAt(0)->getChild();
+        if (!isSutableChildNode(child)) continue;
+
+        parent->fuseWith(child);
+
+        auto* binConvNode = dynamic_cast<MKLDNNBinaryConvolutionNode*>(parent.get());
+
+        auto parents = child->parentEdges;
+        for (size_t i = 0; i < parents.size(); i++) {
+            auto p_edge = parents[i].lock();
+            if (p_edge->getParent()->getType() == Input) {
+                InferenceEngine::SizeVector dims;
+                dims.push_back(binConvNode->getChildEdgeAt(0)->getDims()[1]);
+
+                auto InputLowBlob = dynamic_cast<TBlob<float>*>(p_edge->getParent()->getCnnLayer()->blobs["custom"].get());
+
+                auto inputLowData = InputLowBlob->buffer().as<float*>();
+                int inputLowAxis = p_edge->getDims().ndims() == 1 ? 0 : 1;
+                bool isInputLowBroadcasted = p_edge->getDims()[inputLowAxis] != dims[0];
+
+                for (int i = 0; i < dims[0]; i++) {
+                    binConvNode->pushBinarizationThreshold(inputLowData[isInputLowBroadcasted ? 0 : i]);
+                }
+
+                break;
+            }
+        }
+
+        for (size_t i = 0; i < parents.size(); i++) {
+            auto p_edge = parents[i].lock();
+            if (p_edge->getParent()->getType() == BinaryConvolution)
+                continue;
+
+            removeEdge(graph, p_edge);
+        }
+
+        graph.DropNode(child);
+    }
+}
+
 /**
  *  Check if there is a data dependency between parent and child
  *  BFS starting from parent and comparing with child
@@ -417,18 +531,18 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
         if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isSum()) continue;
         if (!std::dynamic_pointer_cast<MKLDNNEltwiseNode>(graphNode)->isUnitScales()) continue;
 
+        auto parent1 = graphNode->getParentEdgeAt(0)->getParent();
+        auto parent2 = graphNode->getParentEdgeAt(1)->getParent();
         // TODO: Enlarge to several inputs
         if (graphNode->getParentEdges().size() != 2 ||
-            (graphNode->getParentEdgeAt(0)->getParent()->getType() != Convolution &&
-                    graphNode->getParentEdgeAt(1)->getParent()->getType() != Convolution))
+            (parent1->getType() != Convolution && parent1->getType() != BinaryConvolution &&
+             parent2->getType() != Convolution && parent2->getType() != BinaryConvolution))
             continue;
 
-        auto parent1 = graphNode->getParentEdgeAt(0)->getParent();
-        auto parent2 = graphNode->getParentEdgeAt(1)->getParent();
-
-        auto mergedConv = (parent1->getType() == Convolution) ? parent1 : parent2;
-        auto peerNode = (parent1->getType() == Convolution) ? parent2 : parent1;
-        if (peerNode->getType() == Convolution && mergedConv->getChildEdges().size() != 1) {
+        auto mergedConv = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent1 : parent2;
+        auto peerNode = (parent1->getType() == Convolution || parent1->getType() == BinaryConvolution) ? parent2 : parent1;
+        if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) &&
+            mergedConv->getChildEdges().size() != 1) {
             mergedConv = parent2;
             peerNode = parent1;
         }
@@ -455,16 +569,23 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
                 isFusingSupported(graphNode, graphNode->getChildEdgeAt(0)->getChild())) {
             auto relu_shared = graphNode->getChildEdgeAt(0)->getChild();
             lastNode = relu_shared;
-            mergedConv->setType(Convolution_Sum_Activation);
+            if (mergedConv->getType() != BinaryConvolution)
+                mergedConv->setType(Convolution_Sum_Activation);
             mergedConv->fuseWith(sum);
         } else {
-            mergedConv->setType(Convolution_Sum);
+            if (mergedConv->getType() != BinaryConvolution)
+                mergedConv->setType(Convolution_Sum);
         }
 
         mergedConv->fuseWith(lastNode);
 
-        MKLDNNEdgePtr edgePtr(new MKLDNNEdge(peerNode, mergedConv));
-        graph.GetEdges().push_back(edgePtr);
+        if (mergedConv->fusedWith.size() > 0 &&
+           (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) {
+            // Merged with DW_conv. Shape may change
+            mergedConv->inDims.push_back(mergedConv->fusedWith[0]->outDims[0]);
+        } else {
+            mergedConv->inDims.push_back(mergedConv->outDims[0]);
+        }
 
         size_t childIdx = 0;
         for (childIdx = 0; childIdx < peerNode->getChildEdges().size(); childIdx++) {
@@ -473,17 +594,29 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
             }
         }
 
-        mergedConv->addEdge(edgePtr, mergedConv->getParentEdges().size(), childIdx);
+        int peer_port = peerNode->getChildEdgeAt(childIdx)->getInputNum();
+        peerNode->getChildEdgeAt(childIdx)->drop();
+
+        MKLDNNEdgePtr edgePtr(new MKLDNNEdge(peerNode, mergedConv, peer_port, 1));
+        graph.GetEdges().push_back(edgePtr);
+
+        mergedConv->addEdge(edgePtr);
 
-        for (size_t j = 0; j < lastNode->getChildEdges().size(); j++) {
-            auto child = lastNode->getChildEdgeAt(j)->getChild();
-            edgePtr = lastNode->getChildEdgeAt(j);
-            int idxParent = edgePtr->getOutputNum();
-            int idxChild = edgePtr->getInputNum();
+        std::vector<MKLDNNEdgeWeakPtr> edges_to_reconnect = lastNode->getChildEdges();
+        for (auto &edge_w : edges_to_reconnect) {
+            auto edge = edge_w.lock();
+            auto child = edge->getChild();
+            int idxParent = edge->getInputNum();
+            int idxChild = edge->getOutputNum();
 
-            MKLDNNEdgePtr newEdge(new MKLDNNEdge(mergedConv, child));
+            // reconnect after  activation/sum. Port index must be 0
+            IE_ASSERT(idxParent == 0);
+
+            edge->drop();
+
+            MKLDNNEdgePtr newEdge(new MKLDNNEdge(mergedConv, child, idxParent, idxChild));
             graph.GetEdges().push_back(newEdge);
-            child->addEdge(newEdge, idxParent, idxChild);
+            child->addEdge(newEdge);
         }
 
         if (lastNode != sum) {
@@ -493,6 +626,40 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
     }
 }
 
+void MKLDNNGraphOptimizer::FuseFullyConnectedAndActivation(MKLDNNGraph &graph) {
+    auto& graphNodes = graph.GetNodes();
+
+    auto isFusingSupported = [&](MKLDNNNodePtr fc, MKLDNNNodePtr activation) {
+        if (!activation->getCnnLayer())
+            return false;
+
+        auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(activation.get());
+
+        // TODO: fuse on fp32 not optimized yet in mkl-dnn
+        return activationNode && fc->getCnnLayer()->precision != Precision::FP32 &&
+            (activationNode->getAlgorithm() == eltwise_relu);
+    };
+
+    for (int i = 0; i < graphNodes.size(); i++) {
+        if (graphNodes[i]->getType() == FullyConnected) {
+            auto fc = graphNodes[i];
+
+            auto fuse = [&] (MKLDNNNodePtr relu) {
+                fc->setType(FullyConnected_Activation);
+                fc->fuseWith(relu);
+            };
+
+            if (fc->getChildEdges().size() == 1) {
+                auto ch1 = fc->getChildEdgeAt(0)->getChild();
+
+                if (isFusingSupported(fc, ch1)) {
+                    fuse(ch1);
+                    graph.DropNode(ch1);
+                }
+            }
+        }
+    }
+}
 
 void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) {
     for (MKLDNNNodePtr& node : graph.GetNodes()) {
@@ -538,6 +705,7 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
             }
 
             MKLDNNNodePtr p = n->getParentEdgeAt(0)->getParent();
+            MKLDNNNodePtr c = nn->getChildEdgeAt(0)->getChild();
 
             auto oldEdgeNum = n->getParentEdgeAt(0)->getInputNum();
 
@@ -547,7 +715,12 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
             processed.insert(node);
             processed.insert(nextNode);
 
-            auto edge = p->getChildEdgeAt(oldEdgeNum);
+            MKLDNNEdgePtr edge;
+            for (auto cur : p->getChildEdgesAtPort(oldEdgeNum)) {
+                if (cur->getChild() == c)
+                    edge = cur;
+            }
+            if (!edge) THROW_IE_EXCEPTION << "Inappropriate graph processing";
 
 
             std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName();
@@ -560,37 +733,38 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) {
                 reorderPtr->setDescs(n->getInput(), nn->getOutput());
                 reorderPtr->_scales = scales;
             }
-            MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder));
-            beforeNode->setDims(edge->getDims());
-            MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild()));
-            afterNode->setDims(edge->getDims());
 
-            int oIndex = edge->getOutputNum();
-            int iIndex = edge->getInputNum();
+            // new !!!
+            auto oIndex = edge->getOutputNum();
+            auto iIndex = edge->getInputNum();
             if (iIndex < 0 || oIndex < 0)
                 THROW_IE_EXCEPTION << "Cannot create reorder for nodes: "
                                    << edge->getParent()->getName() << " and "
                                    << edge->getChild()->getName() << ".";
+            edge->drop();
+
+            MKLDNNEdgePtr beforeNode(new MKLDNNEdge(edge->getParent(), newReorder, iIndex, 0));
+            MKLDNNEdgePtr afterNode(new MKLDNNEdge(newReorder, edge->getChild(), 0, oIndex));
 
             // Add edge for beforeNode
-            edge->getParent()->childEdges[iIndex].reset();
-            edge->getParent()->childEdges[iIndex] = beforeNode;
             beforeNode->getChild()->parentEdges.push_back(beforeNode);
+            edge->getParent()->childEdges.push_back(beforeNode);
 
             // Add edge for afterNode
             afterNode->getParent()->childEdges.push_back(afterNode);
-            edge->getChild()->parentEdges[oIndex].reset();
-            edge->getChild()->parentEdges[oIndex] = afterNode;
+            edge->getChild()->parentEdges.push_back(afterNode);
 
             newReorder->getSupportedDescriptors();
             newReorder->initSupportedPrimitiveDescriptors();
             newReorder->selectOptimalPrimitiveDescriptor();
 
-            beforeNode->getDesc();
             graph.GetEdges().push_back(beforeNode);
-            afterNode->getDesc();
             graph.GetEdges().push_back(afterNode);
 
+            // Just to check accordance
+            afterNode->getDesc();
+            beforeNode->getDesc();
+
             newNodes.push_back(newReorder);
             graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end());
         }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
index 6818cc9ae..6a6d7d74c 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -23,8 +23,10 @@ private:
     void FuseConvolutionAndActivation(MKLDNNGraph &graph);
     void FuseConvolutionAndDepthwise(MKLDNNGraph &graph);
     void FuseConvolutionAndDWConvolution(MKLDNNGraph &graph);
+    void FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph);
     void FuseBatchNormWithScale(MKLDNNGraph& graph);
     void FuseConvolutionSumAndConvolutionSumActivation(MKLDNNGraph &graph);
+    void FuseFullyConnectedAndActivation(MKLDNNGraph &graph);
     void RemoveIdentityOperator(MKLDNNGraph& graph);
 
     void RemoveIOScaleShifts(MKLDNNGraph& graph);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
index 95e803925..573ab06d6 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -14,7 +14,7 @@
 
 MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs,
                                                      InferenceEngine::OutputsDataMap networkOutputs)
-        : InferRequestInternal(networkInputs, networkOutputs), m_curBatch(-1) {}
+        : InferRequestInternal(networkInputs, networkOutputs) {}
 
 
 template <typename T> void MKLDNNPlugin::MKLDNNInferRequest::pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob) {
@@ -218,6 +218,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const char *name, const Inference
         }
 
         if (foundInput->getPreProcess().getResizeAlgorithm() != InferenceEngine::ResizeAlgorithm::NO_RESIZE) {
+            PreProcessData::isApplicable(data, _inputs[name]);
             // Stores the given blob as ROI blob. It will be used to fill in network input during pre-processing.
             _preProcData[name].setRoiBlob(data);
         } else {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h
index 6d88bc8d2..47f1191bb 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -46,7 +46,5 @@ private:
     void changeDefaultPtr();
     MKLDNNGraph::Ptr graph;
     std::map<std::string, void*> externalPtr;
-
-    int m_curBatch;
 };
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
index 1821b88f8..5d9c345ea 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -81,7 +81,7 @@ void MKLDNNMemory::SetData(memory::data_type dataType, memory::format format, co
             GetDataType() != dataType) {
         auto memData = GetDescriptor().data;
 
-        std::vector<int> dims(memData.dims, memData.dims + memData.ndims);
+        std::vector<ptrdiff_t> dims(memData.dims, memData.dims + memData.ndims);
 
         auto dataType = GetDataType();
 
@@ -220,7 +220,7 @@ bool MKLDNNMemory::isConsistant(memory::dims dims, memory::format format) {
 
 bool MKLDNNMemory::IsPlainFormat(memory::format format) {
     std::vector<memory::format> plains = {memory::nc, memory::nchw, memory::ncdhw, memory::nhwc, memory::ndhwc, memory::chwn,
-        memory::oi, memory::io, memory::oihw, memory::oidhw, memory::ihwo,
+        memory::oi, memory::io, memory::oihw, memory::oidhw, memory::ihwo, memory::tnc,
         memory::goihw,
         memory::blocked};
 
@@ -252,6 +252,7 @@ memory::format MKLDNNMemory::GetPlainFormat(memory::dims dims) {
 
 InferenceEngine::Layout MKLDNNMemory::GetPlainLayout(memory::dims dims) {
     switch (dims.size()) {
+        case 0: return Layout::SCALAR;
         case 1: return Layout::C;
         case 2: return Layout::NC;
         case 3: return Layout::CHW;
@@ -290,7 +291,7 @@ void MKLDNNMemory::CreateBlockingDesc(memory::desc &desc) {
         const int prev_idx = perm[ndims - d];
         const int curr_idx = perm[ndims - 1 - d];
 
-        blk.strides[0][curr_idx] = dims[curr_idx] == 0 ? 1 : blk.strides[0][prev_idx] * (std::max)(1, dims[prev_idx]);
+        blk.strides[0][curr_idx] = dims[curr_idx] == 0 ? 1 : blk.strides[0][prev_idx] * (std::max)((ptrdiff_t)1, dims[prev_idx]);
     }
 }
 memory::format MKLDNNMemory::Convert(const InferenceEngine::Layout layout) {
@@ -457,6 +458,9 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
         case mkldnn_s32:
             precision = Precision::I32;
             break;
+        case mkldnn_bin:
+            precision = Precision::BIN;
+            break;
         default:
             THROW_IE_EXCEPTION << "Cannot cast to TensorDesc. Unsupported precision!";
     }
@@ -510,10 +514,17 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const {
         case memory::nhwc:
             layout = Layout::NHWC;
             order = {0, 2, 3, 1};
-            blkDims = {static_cast<size_t>(dims[0]),
-                       static_cast<size_t>(dims[2]),
-                       static_cast<size_t>(dims[3]),
-                       static_cast<size_t>(dims[1])};
+            if (precision == Precision::BIN) {
+                blkDims = {static_cast<size_t>(dims[0]),
+                           static_cast<size_t>(dims[2]),
+                           static_cast<size_t>(dims[3]),
+                           static_cast<size_t>(rnd_up(dims[1], 8))};
+            } else {
+                blkDims = {static_cast<size_t>(dims[0]),
+                           static_cast<size_t>(dims[2]),
+                           static_cast<size_t>(dims[3]),
+                           static_cast<size_t>(dims[1])};
+            }
             break;
         case memory::ndhwc:
             layout = Layout::NDHWC;
@@ -621,7 +632,9 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
         case Precision::I32:
             data_type = mkldnn::memory::data_type::s32;
             break;
-
+        case Precision::BIN:
+            data_type = mkldnn::memory::data_type::bin;
+            break;
         default:
             THROW_IE_EXCEPTION << "Cannot create MKLDNNMemoryDesc from TensorDesc. Unsupported precision!";
     }
@@ -651,6 +664,7 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
         case OIHW:
             mkldnnFormat = memory::format::oihw;
             break;
+        case SCALAR:
         case C:
             mkldnnFormat = memory::format::x;
             break;
@@ -764,7 +778,7 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
             const int prev_idx = perm[realDims.ndims() - d];
             const int curr_idx = perm[realDims.ndims() - 1 - d];
 
-            blk.strides[0][curr_idx] = realDims[curr_idx] == 0 ? 1 : blk.strides[0][prev_idx] * (std::max)(1, realDims[prev_idx]);
+            blk.strides[0][curr_idx] = realDims[curr_idx] == 0 ? 1 : blk.strides[0][prev_idx] * (std::max)((ptrdiff_t)1, realDims[prev_idx]);
         }
     } else {
         desc = MKLDNNMemoryDesc(realDims, data_type, mkldnnFormat);
@@ -772,12 +786,12 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc):
 
     desc.data.layout_desc.blocking.offset_padding = tDesc.getBlockingDesc().getOffsetPadding();
     for (size_t i = 0; i < tDesc.getBlockingDesc().getOffsetPaddingToData().size() && i < TENSOR_MAX_DIMS; i++) {
-        desc.data.layout_desc.blocking.offset_padding_to_data[i] = static_cast<int>(offsetsToData[i]);
+        desc.data.layout_desc.blocking.offset_padding_to_data[i] = static_cast<ptrdiff_t>(offsetsToData[i]);
     }
 
     if (notDefault) {
         for (size_t i = 0; i < strides.size() && i < desc.data.ndims; i++) {
-            desc.data.layout_desc.blocking.strides[0][i] = static_cast<int>(strides[order[i]]);
+            desc.data.layout_desc.blocking.strides[0][i] = static_cast<ptrdiff_t>(strides[order[i]]);
         }
     }
 }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
index 37578e5ff..0a047dd51 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -76,7 +76,10 @@ public:
     }
 
     void* GetData() const {
-        return prim->get_data_handle();
+        void* data = prim->get_data_handle();
+        if (data == nullptr)
+            THROW_IE_EXCEPTION << "Cannot get memory!";
+        return data;
     }
 
     mkldnn::memory::data_type GetDataType() const {
@@ -92,7 +95,7 @@ public:
     mkldnn::memory::dims GetDims() const {
         auto data = GetDescriptor().data;
 
-        return std::vector<int>(data.dims, data.dims + data.ndims);
+        return std::vector<ptrdiff_t>(data.dims, data.dims + data.ndims);
     }
 
     void Create(mkldnn::memory::dims dims, mkldnn::memory::data_type data_type, mkldnn::memory::format format,
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
index 73975b71e..574008000 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -36,6 +36,8 @@
 #include <nodes/mkldnn_permute_node.h>
 #include <nodes/mkldnn_memory_node.hpp>
 #include <nodes/mkldnn_rnn.h>
+#include <nodes/mkldnn_quantize_node.h>
+#include <nodes/mkldnn_bin_conv_node.h>
 #include <mkldnn_types.h>
 #include "mkldnn_extension_utils.h"
 #include "mkldnn_plugin.h"
@@ -70,6 +72,8 @@ MKLDNNNode::Register<MKLDNNSoftMaxNode> MKLDNNSoftMaxNode::reg;
 MKLDNNNode::Register<MKLDNNSplitNode> MKLDNNSplitNode::reg;
 MKLDNNNode::Register<MKLDNNTileNode> MKLDNNTileNode::reg;
 MKLDNNNode::Register<MKLDNNPermuteNode> MKLDNNPermuteNode::reg;
+MKLDNNNode::Register<MKLDNNQuantizeNode> MKLDNNQuantizeNode::reg;
+MKLDNNNode::Register<MKLDNNBinaryConvolutionNode> MKLDNNBinaryConvolutionNode::reg;
 MKLDNNNode::Register<MKLDNNMemoryInputNode> MKLDNNMemoryInputNode::reg;
 MKLDNNNode::Register<MKLDNNMemoryOutputNode> MKLDNNMemoryOutputNode::reg;
 MKLDNNNode::Register<MKLDNNRNN> MKLDNNRNN::reg;
@@ -91,7 +95,6 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::
         }
     }
 
-    parentEdges.resize(layer->insData.size());
     for (const auto& inData : layer->insData) {
         inDims.emplace_back(inData.lock()->getDims());
     }
@@ -109,7 +112,7 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::
     }
 }
 
-void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge, size_t pIndex, size_t cIndex, bool insertChildIndex) {
+void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge) {
     auto edgePtr = edge.lock();
     if (!edgePtr)
         return;
@@ -117,22 +120,9 @@ void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge, size_t pIndex, size_t cI
     auto childPtr = edgePtr->getChild();
     if (!parentPtr || !childPtr)
         return;
-    if (cIndex < parentPtr->childEdges.size()) {
-        if (insertChildIndex) {
-            parentPtr->childEdges.insert(parentPtr->childEdges.begin() + cIndex, edge);
-        } else {
-            removeEdge(parentPtr->childEdges[cIndex]);
-            parentPtr->childEdges[cIndex] = edge;
-        }
-    } else {
-        parentPtr->childEdges.push_back(edge);
-    }
-    if (pIndex < childPtr->parentEdges.size()) {
-        removeEdge(childPtr->parentEdges[pIndex]);
-        childPtr->parentEdges[pIndex] = edge;
-    } else {
-        childPtr->parentEdges.push_back(edge);
-    }
+
+    parentPtr->childEdges.push_back(edge);
+    childPtr->parentEdges.push_back(edge);
 }
 
 void MKLDNNNode::removeEdge(const MKLDNNEdgeWeakPtr& edge) {
@@ -146,24 +136,26 @@ void MKLDNNNode::removeEdge(const MKLDNNEdgeWeakPtr& edge) {
     for (auto it = childPtr->parentEdges.begin(); it != childPtr->parentEdges.end(); it++) {
         auto parentEdge = (*it).lock();
         if (parentEdge && parentEdge->getChild() == childPtr && parentEdge->getParent() == parentPtr) {
-            (*it).reset();
+            childPtr->parentEdges.erase(it);
             break;
         }
     }
     for (auto it = parentPtr->childEdges.begin(); it != parentPtr->childEdges.end(); it++) {
         auto childEdge = (*it).lock();
         if (childEdge && childEdge->getChild() == childPtr && childEdge->getParent() == parentPtr) {
-            (*it).reset();
+            parentPtr->childEdges.erase(it);
             break;
         }
     }
 }
 
 void MKLDNNNode::remove() {
-    for (const auto &parentEdge : parentEdges) {
+    auto parent_edges = parentEdges;
+    for (const auto &parentEdge : parent_edges) {
         removeEdge(parentEdge);
     }
-    for (const auto &childEdge : childEdges) {
+    auto child_edges = childEdges;
+    for (const auto &childEdge : child_edges) {
         removeEdge(childEdge);
     }
 }
@@ -355,11 +347,42 @@ const MKLDNNEdgePtr MKLDNNNode::getChildEdgeAt(size_t idx) const {
     return childEdgePtr;
 }
 
+const std::vector<MKLDNNEdgePtr> MKLDNNNode::getParentEdgesAtPort(size_t idx) const {
+    if (idx >= inDims.size())
+        THROW_IE_EXCEPTION << "Node " << getName() << " contains less input ports than " << idx;
+
+    std::vector<MKLDNNEdgePtr> res;
+    for (auto &edge_w : parentEdges) {
+        auto edge = edge_w.lock();
+        if (!edge)
+            THROW_IE_EXCEPTION << "Node " << getName() << " contains dead weak ptr";
+        if (edge->getOutputNum() == idx) res.push_back(edge);
+    }
+    return res;
+}
+
+const std::vector<MKLDNNEdgePtr> MKLDNNNode::getChildEdgesAtPort(size_t idx) const {
+    if (idx >= outDims.size())
+        THROW_IE_EXCEPTION << "Node " << getName() << " contains less output ports than " << idx;
+
+    std::vector<MKLDNNEdgePtr> res;
+    for (auto &edge_w : childEdges) {
+        auto edge = edge_w.lock();
+        if (!edge)
+            THROW_IE_EXCEPTION << "Node " << getName() << " contains dead weak ptr";
+        if (edge->getInputNum() == idx) res.push_back(edge);
+    }
+    return res;
+}
+
+
 std::vector<memory::format> MKLDNNNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const {
     if (dims.ndims() == 1)
         return {memory::format::x};
     else if (dims.ndims() == 2)
         return {memory::format::nc};
+    else if (dims.ndims() == 3)
+        return {memory::format::tnc, memory::format::ntc};
     else if (dims.ndims() == 4)
         return {memory::format::nchw, memory::format::nChw8c, memory::format::nChw16c};
     else if (dims.ndims() == 5)
@@ -379,7 +402,7 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() {
 
     for (auto& desc : descs) {
         try {
-            primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(engine);
+            std::shared_ptr<primitive_desc_iterator> itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine));
             do {
                 InferenceEngine::LayerConfig config;
                 config.dynBatchSupport = true;
@@ -387,7 +410,7 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() {
                     InferenceEngine::DataConfig dataConfig;
                     dataConfig.inPlace = -1;
                     dataConfig.constant = false;
-                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(itpd, i));
+                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getSrcMemDesc(*itpd, i));
                     config.inConfs.push_back(dataConfig);
                 }
 
@@ -395,13 +418,13 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() {
                     InferenceEngine::DataConfig dataConfig;
                     dataConfig.inPlace = canBeInPlace() ? 0 : -1;
                     dataConfig.constant = false;
-                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(itpd, i));
+                    dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(getDstMemDesc(*itpd, i));
                     config.outConfs.push_back(dataConfig);
                 }
-                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+                impl_desc_type impl_type = parse_impl_name(itpd->get_impl_info_str());
 
                 supportedPrimitiveDescriptors.emplace_back(config, impl_type);
-            } while (itpd.next());
+            } while (itpd->next());
         } catch (std::exception& e) {
             // it throw exception in case of no implementation found
             continue;
@@ -422,12 +445,19 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
         outDescs.push_back(outConf.desc);
     createDescriptor({inDescs}, {outDescs});
 
+    std::shared_ptr<mkldnn::primitive_attr> attr = initPrimitiveAttr();
+
     InferenceEngine::LayerConfig rightConfig = getSelectedPrimitiveDescriptor()->getConfig();
     size_t selected_count = 0;
     for (size_t j = 0; j < descs.size(); j++) {
         try {
             const auto &desc = descs[j];
-            primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(engine);
+            std::shared_ptr<primitive_desc_iterator> itpd;
+            if (attr == nullptr) {
+                itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine));
+            } else {
+                itpd = std::make_shared<primitive_desc_iterator>(desc.createPrimitiveDescriptorIterator(engine, *(attr.get())));
+            }
             do {
                 InferenceEngine::LayerConfig cfg;
                 cfg.dynBatchSupport = true;
@@ -435,7 +465,7 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
                     InferenceEngine::DataConfig dataConfig;
                     dataConfig.inPlace = canBeInPlace() ? 0 : -1;
                     dataConfig.constant = false;
-                    dataConfig.desc = getSrcMemDesc(itpd, i);
+                    dataConfig.desc = getSrcMemDesc(*itpd, i);
                     cfg.inConfs.push_back(dataConfig);
                 }
 
@@ -443,10 +473,10 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
                     InferenceEngine::DataConfig dataConfig;
                     dataConfig.inPlace = -1;
                     dataConfig.constant = false;
-                    dataConfig.desc = getDstMemDesc(itpd, i);
+                    dataConfig.desc = getDstMemDesc(*itpd, i);
                     cfg.outConfs.push_back(dataConfig);
                 }
-                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str().c_str());
+                impl_desc_type impl_type = parse_impl_name(itpd->get_impl_info_str().c_str());
                 if (selected_count == selectedPrimitiveDescriptorIndex) {
                     if (impl_type != selectedPD->getImplementationType()) {
                         THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
@@ -459,7 +489,7 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) {
                     }
                 }
                 selected_count++;
-            } while (itpd.next());
+            } while (itpd->next());
         } catch(...) {}
     }
 
@@ -505,31 +535,49 @@ InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeV
         intLayout = InferenceEngine::Layout::OIHW;
 
     InferenceEngine::TensorDesc desc(blb->precision(), dims, intLayout);
-    InferenceEngine::TBlob<float>::Ptr internalBlob = InferenceEngine::make_shared_blob<float>(desc);
-    internalBlob->allocate();
-    char *data = internalBlob->buffer();
-    size_t intBuffSize = internalBlob->byteSize();
-
-    size_t offset = blb->byteSize();
-    checkSize(intBuffSize, offset);
-    ie_memcpy(data, internalBlob->byteSize(), blb->buffer(), blb->byteSize());
-    data += blb->byteSize();
-    for (const auto &merged : getMergeWith()) {
-        wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(merged->getCnnLayer().get());
-        if (wLayer == nullptr)
-            THROW_IE_EXCEPTION << "Cannot convert merged weightable layer for node "
-                               << getName() << ".";
-        blb = weights ? wLayer->_weights : wLayer->_biases;
-
-        if (blb == nullptr)
-            THROW_IE_EXCEPTION << "Cannot get internal blob layer for node " << getName() << ".";
-        offset += blb->byteSize();
+
+    auto fillInternalBlob = [&](char *data, size_t intBuffSize) {
+        size_t offset = blb->byteSize();
         checkSize(intBuffSize, offset);
-        ie_memcpy(data, internalBlob->byteSize(), blb->buffer(), blb->byteSize());
+        ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
         data += blb->byteSize();
-    }
+        for (const auto &merged : getMergeWith()) {
+            wLayer = dynamic_cast<InferenceEngine::WeightableLayer*>(merged->getCnnLayer().get());
+            if (wLayer == nullptr)
+                THROW_IE_EXCEPTION << "Cannot convert merged weightable layer for node "
+                                   << getName() << ".";
+            blb = weights ? wLayer->_weights : wLayer->_biases;
+
+            if (blb == nullptr)
+                THROW_IE_EXCEPTION << "Cannot get internal blob layer for node " << getName() << ".";
+            offset += blb->byteSize();
+            checkSize(intBuffSize, offset);
+            ie_memcpy(data, intBuffSize, blb->buffer(), blb->byteSize());
+            data += blb->byteSize();
+        }
+    };
 
-    return internalBlob;
+    if (blb->precision() == Precision::BIN) {
+        InferenceEngine::TBlob<int8_t>::Ptr internalBlob = InferenceEngine::make_shared_blob<int8_t>(desc);
+
+        internalBlob->allocate();
+        char *data = internalBlob->buffer();
+        size_t intBuffSize = internalBlob->byteSize();
+
+        fillInternalBlob(data, intBuffSize);
+
+        return internalBlob;
+    } else {
+        InferenceEngine::TBlob<float>::Ptr internalBlob = InferenceEngine::make_shared_blob<float>(desc);
+
+        internalBlob->allocate();
+        char *data = internalBlob->buffer();
+        size_t intBuffSize = internalBlob->byteSize();
+
+        fillInternalBlob(data, intBuffSize);
+
+        return internalBlob;
+    }
 }
 
 void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd) {
@@ -632,6 +680,15 @@ MKLDNNNode::ConstantType MKLDNNNode::checkConstant(LOOK look, std::vector<MKLDNN
     return constant;
 }
 
+void MKLDNNNode::addOriginalLayer(const InferenceEngine::CNNLayerPtr &layer) {
+    if (!layer) return;
+    if (originalLayers.empty()) {
+        originalLayers = layer->name;
+    } else {
+        originalLayers += "," + layer->name;
+    }
+}
+
 void MKLDNNNode::cleanup() {
     internalBlobs.clear();
     cnnLayer.reset();
@@ -673,6 +730,8 @@ std::string MKLDNNNode::typeToStr(Type type) {
             return "Pooling";
         case FullyConnected:
             return "FullyConnected";
+        case FullyConnected_Activation:
+            return "FullyConnected_Activation";
         case Gemm:
             return "Gemm";
         case SoftMax:
@@ -707,10 +766,10 @@ std::string MKLDNNNode::typeToStr(Type type) {
             return "MemoryOutput";
         case MemoryInput:
             return "MemoryInput";
-        case RNN:
-            return "RNN";
-        case LSTMCell:
-            return "LSTMCell";
+        case RNNSeq:
+            return "RNNSeq";
+        case RNNCell:
+            return "RNNCell";
 
         default:
             return "Unknown";
@@ -877,7 +936,7 @@ void MKLDNNNode::initOptimalPrimitiveDescriptor() {
             config.outConfs[i].desc = getConfiguredOutputDesc(config, i);
         }
         initDescriptor(config);
-    } else if (getType() != RNN && getType() != LSTMCell) {
+    } else if (getType() != RNNSeq && getType() != RNNCell) {
         initDescriptor(config);
     }
 }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index fe71c665f..b3060f8ac 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -43,6 +43,7 @@ enum Type {
     Lrn,
     Pooling,
     FullyConnected,
+    FullyConnected_Activation,
     SoftMax,
     Split,
     Concatenation,
@@ -60,8 +61,10 @@ enum Type {
     Copy,
     MemoryOutput,
     MemoryInput,
-    LSTMCell,
-    RNN
+    RNNCell,
+    RNNSeq,
+    Quantize,
+    BinaryConvolution
 };
 
 static Type TypeFromName(const std::string type) {
@@ -78,6 +81,8 @@ static Type TypeFromName(const std::string type) {
             { "Logistic", Activation },
             { "TanH", Activation },
             { "ReLU6", Activation },
+            { "Exp", Activation },
+            { "Not", Activation },
             { "Activation", Activation },
             { "ScaleShift", Depthwise },
             { "PReLU", Depthwise },
@@ -105,8 +110,14 @@ static Type TypeFromName(const std::string type) {
             { "Flatten", Flatten },
             { "Permute", Permute },
             { "Copy", Copy },
-            { "LSTMCell", LSTMCell },
-            { "RNN", RNN },
+            { "LSTMCell", RNNCell },
+            { "GRUCell", RNNCell },
+            { "RNNCell", RNNCell },
+            { "LSTMSequence", RNNSeq },
+            { "GRUSequence", RNNSeq },
+            { "RNNSequence", RNNSeq },
+            { "Quantize", Quantize },
+            { "BinaryConvolution", BinaryConvolution },
             { "MemoryInput", MemoryInput},  // for construction from name ctor, arbitrary name is used
             { "Memory", MemoryOutput },  // for construction from layer ctor
     };
@@ -152,7 +163,7 @@ public:
 
     ~MKLDNNNode() override = default;
 
-    void addEdge(const MKLDNNEdgeWeakPtr& edge, size_t pIndex, size_t cIndex, bool insertChildIndex = false);
+    void addEdge(const MKLDNNEdgeWeakPtr& edge);
     void removeEdge(const MKLDNNEdgeWeakPtr& edge);
 
     virtual void cleanup();
@@ -169,6 +180,8 @@ public:
     const MKLDNNEdgePtr getParentEdgeAt(size_t idx) const;
     virtual const MKLDNNEdgePtr getChildEdgeAt(size_t idx) const;
 
+    const std::vector<MKLDNNEdgePtr> getParentEdgesAtPort(size_t idx) const;
+    const std::vector<MKLDNNEdgePtr> getChildEdgesAtPort(size_t idx) const;
 
     bool isDropped() {
         return (isEdgesEmpty(childEdges) && isEdgesEmpty(parentEdges));
@@ -190,6 +203,8 @@ public:
         mergedWith.push_back(merge);
     }
 
+    void addOriginalLayer(const InferenceEngine::CNNLayerPtr &layer);
+
     const std::vector <MKLDNNNodePtr> &getMergeWith() {
         return mergedWith;
     }
@@ -202,6 +217,10 @@ public:
         return name;
     }
 
+    const std::string getOriginalLayers() const {
+        return originalLayers;
+    }
+
     Type getType() const {
         return type;
     }
@@ -309,17 +328,19 @@ public:
         THROW_IE_EXCEPTION << "Primitive descriptor was not found for node " << getName() << ".";
     }
 
-    static void invertVectorCopyUtoI(const InferenceEngine::PropertyVector<unsigned int>& src, std::vector<int>& dst) {
+    static void invertVectorCopyUtoI(const InferenceEngine::PropertyVector<unsigned int>& src, std::vector<ptrdiff_t>& dst) {
         dst.clear();
         for (int i = 1; i <= src.size(); i++) {
-            dst.push_back(static_cast<int>(src[src.size() - i]));
+            dst.push_back(static_cast<ptrdiff_t>(src[src.size() - i]));
         }
     }
 
+    std::vector<MKLDNNDims> inDims;
+
+
 protected:
     // TODO: It is necessary only in order to avoid modifications of cnnLayers and original topology
     std::vector<MKLDNNDims> outDims;
-    std::vector<MKLDNNDims> inDims;
     void setType(Type type) {
         this->type = type;
     }
@@ -331,6 +352,8 @@ protected:
     virtual MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx);
     virtual MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx);
 
+    virtual std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() const { return nullptr; }
+
     typedef std::function<MKLDNNMemoryDesc (mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx)>
             GetPrimitiveMemoryFormatFunc;
     std::vector<GetPrimitiveMemoryFormatFunc> internalBlobDesc;
@@ -339,6 +362,8 @@ protected:
     std::vector <MKLDNNNodePtr> mergedWith;
     std::vector <impl_desc_type> implPriorities;
 
+    std::string originalLayers;  // contains names of the original layers separated by comma
+
     MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng);
 
     int selectedPrimitiveDescriptorIndex = -1;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
index 35a965afa..d5a48aa64 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -92,12 +92,8 @@ void Engine::QueryNetwork(const ICNNNetwork& network, const std::map<std::string
 INFERENCE_PLUGIN_API(StatusCode) CreatePluginEngine(IInferencePlugin*& plugin, ResponseDesc *resp) noexcept {
     try {
         plugin = make_ie_compatible_plugin(
-                {{1, 5},
-#ifdef MKL_VERSION
-                 MKL_VERSION,
-#else
+                {{1, 6},
                  CI_BUILD_NUMBER,
-#endif
                  "MKLDNNPlugin"}, std::make_shared<Engine>());
         return OK;
     }
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.h b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.h
index 383feaa21..6cbed8487 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_primitive.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_primitive.cpp
index f9e59f2cc..96672cb04 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_primitive.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_primitive.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h b/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h
index 075afff9e..f960d537e 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_primitive.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_streams.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_streams.cpp
index a5198377c..b50552f14 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_streams.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_streams.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -34,7 +34,7 @@ bool check_env_variables() {
 #if !(defined(__APPLE__) || defined(_WIN32))
 /* Get the cores affinity mask for the current process */
 bool get_process_mask(int& ncpus, cpu_set_t*& mask) {
-    for (ncpus = sizeof(cpu_set_t) / CHAR_BIT; ncpus < 1024 /* reasonable limit of #cores*/; ncpus <<= 1) {
+    for (ncpus = sizeof(cpu_set_t) / CHAR_BIT; ncpus < 32768 /* reasonable limit of #cores*/; ncpus <<= 1) {
         mask = CPU_ALLOC(ncpus);
         if (!mask) return false;
 
@@ -61,6 +61,8 @@ bool pin_current_thread_by_mask(int ncores, const cpu_set_t* proc_mask) {
 /* Pin thread to a spare core in the round-robin scheme, while respecting the given process mask.
  * The function can also handle the hyper-threading (by populating the physical cores first) */
 bool pin_thread_to_vacant_core(int thr_idx, int hyperthreads, int ncores, const cpu_set_t* proc_mask) {
+    if (proc_mask == nullptr)
+        return false;
     const size_t size = CPU_ALLOC_SIZE(ncores);
     const int num_cpus = CPU_COUNT_S(size, proc_mask);
     thr_idx %= num_cpus;  // To limit unique number in [; num_cpus-1] range
@@ -337,6 +339,7 @@ void MKLDNNPlugin::MKLDNNGraphlessInferRequest::SetBlob(const char *name, const
         }
 
         if (foundInput->getPreProcess().getResizeAlgorithm() != InferenceEngine::ResizeAlgorithm::NO_RESIZE) {
+            PreProcessData::isApplicable(data, _inputs[name]);
             // Stores the given blob as ROI blob. It will be used to fill in network input during pre-processing.
             _preProcData[name].setRoiBlob(data);
         } else {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_streams.h b/inference-engine/src/mkldnn_plugin/mkldnn_streams.h
index 31558fee2..baa7c8d3e 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_streams.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_streams.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_activation_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_activation_node.cpp
index d23b12e3b..4379d8a5c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_activation_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_activation_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -76,6 +76,16 @@ caseless_map<std::string, std::function<void(GenericLayer*, mkldnn::algorithm&,
             alpha = activationLayer->GetParamAsFloat("max", 1.0f);
             beta = activationLayer->GetParamAsFloat("min", 0.0f);
             algorithm = eltwise_clamp;
+        }},
+        {"exp", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
+            alpha = 0.0f;
+            beta = 0.0f;
+            algorithm = eltwise_exp;
+        }},
+        {"not", [](GenericLayer* activationLayer, mkldnn::algorithm& algorithm, float& alpha, float& beta) {
+            alpha = 0.0f;
+            beta = 0.0f;
+            algorithm = eltwise_not;
         }}
 };
 
@@ -107,9 +117,9 @@ void MKLDNNActivationNode::createPrimitive() {
     if (prim)
         return;
 
-    auto prim_desc = createPrimitiveDescriptor<relu_forward::primitive_desc, relu_forward::desc>();
+    auto prim_desc = createPrimitiveDescriptor<eltwise_forward::primitive_desc, eltwise_forward::desc>();
 
-    prim.reset(new relu_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
+    prim.reset(new eltwise_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
                                 getChildEdgeAt(0)->getMemory().GetPrimitive()));
 }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_activation_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_activation_node.h
index 9dac1507c..3b9cc7e99 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_activation_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_activation_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
index 173df1c24..d1f777ffd 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -71,6 +71,8 @@ void MKLDNNBatchNormalizationNode::getSupportedDescriptors() {
         InferenceEngine::TBlob<float>::Ptr internalBlob = InferenceEngine::make_shared_blob<float>(desc);
         internalBlob->allocate();
         float * data = internalBlob->buffer();
+        if (data == nullptr)
+            THROW_IE_EXCEPTION << "Cannot get memory!";
 
         InferenceEngine::Blob::Ptr blb = scshLayer->_weights;
         if (blb == nullptr)
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h
index c7d9d3e17..b306f5e98 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
new file mode 100644
index 000000000..b1e3ac23f
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp
@@ -0,0 +1,461 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mkldnn_bin_conv_node.h"
+#include "mkldnn_reorder_node.h"
+#include "mkldnn_input_node.h"
+#include "mkldnn_activation_node.h"
+#include "desc_iterator.hpp"
+#include "mkldnn_eltwise_node.h"
+#include "mkldnn_depthwise_node.h"
+#include "mkldnn_quantize_node.h"
+#include "mkldnn_conv_node.h"
+#include <ie_layers.h>
+#include <string>
+#include <vector>
+#include <mkldnn_types.h>
+#include <mkldnn_extension_utils.h>
+#include <ie_layers_internal.hpp>
+
+using namespace mkldnn;
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+
+MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng)
+        : MKLDNNNode(layer, eng) {
+    internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
+        return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(0).desc());
+    });
+}
+
+void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() {
+    if (!descs.empty())
+        return;
+
+    auto* binConvLayer = dynamic_cast<BinaryConvolutionLayer*>(getCnnLayer().get());
+    if (binConvLayer == nullptr)
+        THROW_IE_EXCEPTION << "Cannot convert convolution layer.";
+
+    if (getChildEdges().empty())
+        THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
+
+    if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) {
+        THROW_IE_EXCEPTION << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
+    }
+
+    isMerged = (!getMergeWith().empty());  // grouped convolution was constructed from split->concat subgraph
+    isGrouped = binConvLayer->_group != 1;  // group info available from IR
+    if (isMerged && isGrouped)
+        THROW_IE_EXCEPTION << "Convolution initialization. Group splitted mode are used together with direct group specification.";
+
+    // default values. Can be replaced in next steps
+    size_t groupNum = binConvLayer->_group;
+    pad_value = binConvLayer->_pad_value;
+    size_t groupIC = binConvLayer->_in_depth;
+    size_t groupOC = binConvLayer->_out_depth;
+
+    isDW = groupNum == groupOC && groupNum == groupIC;
+
+    if (isMerged) {
+        groupNum = getMergeWith().size() + 1;
+    }
+    if (isGrouped) {
+        groupIC /= groupNum;
+        groupOC /= groupNum;
+    }
+
+    weightDims.clear();
+    weightDims.push_back(groupOC);
+    weightDims.push_back(groupIC);
+    for (int i = 1; i <= binConvLayer->_kernel.size(); i++) {
+        weightDims.push_back(binConvLayer->_kernel[binConvLayer->_kernel.size() - i]);
+    }
+    biasesDims = { groupOC * groupNum };
+
+    if (isGrouped || isMerged) weightDims.insert(weightDims.begin(), groupNum);
+
+    internalBlobs.push_back(createInternalBlob(weightDims, true));
+
+    Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
+
+    invertVectorCopyUtoI(binConvLayer->_stride, stride);
+    for (int i = 1; i <= binConvLayer->_dilation.size(); i++) {
+        dilation.push_back(static_cast<int>(binConvLayer->_dilation[binConvLayer->_dilation.size() - i]) - 1);
+    }
+
+    auto allPads = getPaddings(*binConvLayer);
+    invertVectorCopyUtoI(allPads.begin, paddingL);
+    invertVectorCopyUtoI(allPads.end, paddingR);
+
+    MKLDNNDims weightsDims = MKLDNNDims(weightDims);
+
+    for (int i = 0; i < paddingR.size(); i++) {
+        int with_group = (isGrouped || isMerged) ? 1 : 0;
+        int krn = weightsDims[with_group + 2 + i];
+        int src = getParentEdgeAt(0)->getDims()[2 + i];
+        int dst = getChildEdgeAt(0)->getDims()[2 + i];
+
+        krn = (krn - 1)*(dilation[i] + 1) + 1;
+        int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1;
+        paddingR[i] = (dst - calc_dst) * stride[i];
+    }
+
+    withSum = false;
+    withBinarization = false;
+    for (auto &node : fusedWith) {
+        auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode*>(node.get());
+        if (convolutionNode) {
+            auto *convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
+            dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2];
+            dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1];
+            dw_conv_oc = convLayer->_out_depth;
+            for (int i = 0; i < convLayer->_kernel.size(); i++) {
+                dw_conv_kernel.push_back(convLayer->_kernel[i]);
+            }
+            for (int i = 0; i < convLayer->_stride.size(); i++) {
+                dw_conv_strides.push_back(convLayer->_stride[i]);
+            }
+        }
+
+        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode*>(node.get());
+        if (eltwiseNode) {
+            withSum = true;
+        }
+
+        auto* quantizationNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
+        if (quantizationNode) {
+            withBinarization = true;
+        }
+    }
+
+    if ((!withSum && getParentEdges().size() != 1) || (withSum && getParentEdges().size() != 2))
+        THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
+
+    auto inputDataType = memory::bin;
+    auto outputDataType = withBinarization ? memory::bin : memory::f32;
+
+    MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::nhwc);
+    MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::nhwc);
+    createDescriptor({in_candidate}, {out_candidate});
+}
+
+void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) {
+    int blob_idx = 0;
+    mkldnn::post_ops ops;
+
+    for (auto &node : fusedWith) {
+        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
+        if (eltwiseNode) {
+            if (eltwiseNode->getCnnLayer()->precision == Precision::I8) {
+                auto it = eltwiseNode->getCnnLayer()->blobs.find("eltwise-sum-scale");
+                if (it != eltwiseNode->getCnnLayer()->blobs.end()) {
+                    // currently there is the only one scale while we need scale by channel :(
+                    ops.append_sum(it->second->buffer().as<float*>()[0]);
+                }
+            } else {
+                ops.append_sum(1.0);
+            }
+            continue;
+        }
+
+        auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
+        if (activationNode) {
+            ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
+                               activationNode->getBeta());
+            continue;
+        }
+
+        auto* depthwiseNode = dynamic_cast<MKLDNNDepthwiseNode *>(node.get());
+        if (depthwiseNode) {
+            auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
+
+            if (initWeights) {
+                MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
+
+                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
+                PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
+
+                PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
+                                                             depthwiseLayer->_weights->buffer(),
+                                                             depthwiseLayer->_weights->size() *
+                                                             MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
+
+                if (depthwiseNode->isBroadcast()) {
+                    float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[0];
+                    for (int i = 1; i < PostOpsIntBlobMemory[blob_idx]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
+                        static_cast<float *>(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue;
+                    }
+                }
+
+                if (depthwiseNode->getAlgorithm() == depthwise_scale_shift) {
+                    PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
+                    PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32,
+                                                                memory::format::x);
+                    PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
+                                                                 depthwiseLayer->_biases->buffer(),
+                                                                 depthwiseLayer->_biases->size() *
+                                                                 MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
+
+                    if (depthwiseNode->isBroadcast()) {
+                        float broadcastValue = static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[0];
+                        for (int i = 1; i < PostOpsIntBlobMemory[blob_idx + 1]->GetPrimitiveDescriptor().desc().data.dims[0]; i++) {
+                            static_cast<float *>(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue;
+                        }
+                    }
+
+                    ops.append_depthwise(depthwiseNode->getAlgorithm(),
+                                         (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
+                                         (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
+
+                    blob_idx += 2;
+                } else {
+                    ops.append_depthwise(depthwiseNode->getAlgorithm(),
+                                         (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
+                                         nullptr);
+
+                    blob_idx += 1;
+                }
+            } else {
+                ops.append_depthwise(depthwiseNode->getAlgorithm(),
+                                     nullptr,
+                                     nullptr);
+            }
+
+            continue;
+        }
+
+        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode *>(node.get());
+        if (quantizeNode) {
+            if (initWeights) {
+                MKLDNNDims binarizationDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
+
+                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
+                PostOpsIntBlobMemory[blob_idx]->Create(binarizationDims, memory::data_type::f32, memory::format::x);
+
+                PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x,
+                                                        &binarizationThresholds[0],
+                                                        binarizationThresholds.size() *
+                                                        MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
+
+                ops.append_binarization(binarization_depthwise, (const float*)PostOpsIntBlobMemory[blob_idx]->GetData());
+
+                blob_idx += 1;
+            } else {
+                ops.append_binarization(binarization_depthwise, nullptr);
+            }
+        }
+
+        auto* convolutionNode = dynamic_cast<MKLDNNConvolutionNode *>(node.get());
+        if (convolutionNode) {
+            auto* convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
+
+            if (initWeights) {
+                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
+                MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
+                PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, memory::data_type::f32,
+                                                            memory::format::Goihw8g);
+
+                PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::goihw,
+                                                             convLayer->_weights->buffer(),
+                                                             dwWeightsDims.size() *
+                                                             MKLDNNExtensionUtils::sizeOfDataType(
+                                                                     memory::data_type::f32));
+
+                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
+                MKLDNNDims dwBiasesDims({dw_conv_oc});
+                PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, memory::data_type::f32,
+                                                                memory::format::x);
+                PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
+                                                                 convLayer->_biases->buffer(),
+                                                                 dwBiasesDims.size() *
+                                                                 MKLDNNExtensionUtils::sizeOfDataType(
+                                                                         memory::data_type::f32));
+                ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
+                                   dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
+                                   (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
+                                   (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData());
+
+                blob_idx += 2;
+            } else {
+                ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
+                                   dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
+                                   nullptr,
+                                   nullptr);
+            }
+            for (auto &dwConvFusedNode : convolutionNode->getFusedWith()) {
+                auto* dwConvActivationNode = dynamic_cast<MKLDNNActivationNode *>(dwConvFusedNode.get());
+                if (dwConvActivationNode) {
+                    ops.append_eltwise(1.0, dwConvActivationNode->getAlgorithm(), dwConvActivationNode->getAlpha(),
+                                       dwConvActivationNode->getBeta());
+                }
+            }
+
+            continue;
+        }
+    }
+
+    attr.set_post_ops(ops);
+}
+
+void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    mkldnn::primitive_attr attr;
+    setPostOps(attr);
+
+    for (auto& desc : descs) {
+        try {
+            primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
+            do {
+                InferenceEngine::LayerConfig config;
+                config.dynBatchSupport = true;
+                for (size_t i = 0; i < desc.inputNumbers(); i++) {
+                    InferenceEngine::DataConfig dataConfig;
+                    dataConfig.inPlace = -1;
+                    dataConfig.constant = false;
+                    dataConfig.desc = getSrcMemDesc(itpd, i);
+                    if (!isGrouped)
+                        dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
+                    config.inConfs.push_back(dataConfig);
+                }
+
+                for (size_t i = 0; i < desc.outputNumbers(); i++) {
+                    InferenceEngine::DataConfig dataConfig;
+                    if (withSum) {
+                        dataConfig.inPlace = 1;
+                    }
+
+                    dataConfig.constant = false;
+                    dataConfig.desc = getDstMemDesc(itpd, i);
+                    if (!isGrouped)
+                        dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
+                    config.outConfs.push_back(dataConfig);
+
+                    if (withSum) {
+                        dataConfig.inPlace = -1;
+                        config.inConfs.push_back(dataConfig);
+                    }
+                }
+                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+
+                supportedPrimitiveDescriptors.emplace_back(config, impl_type);
+            } while (itpd.next());
+        } catch (std::exception& e) {
+            // it throw exception in case of no implementation found
+            continue;
+        }
+    }
+}
+
+
+void MKLDNNBinaryConvolutionNode::createPrimitive() {
+    if (prim)
+        return;
+
+    mkldnn::primitive_attr attr;
+    setPostOps(attr, true);
+
+    auto prim_desc = createPrimitiveDescriptor<binary_convolution_forward::primitive_desc,
+            binary_convolution_forward::desc>(attr);
+
+    prim.reset(new binary_convolution_forward(prim_desc,
+                                       getParentEdgeAt(0)->getMemory().GetPrimitive(),
+                                       internalBlobMemory[0]->GetPrimitive(),
+                                       getChildEdgeAt(0)->getMemory().GetPrimitive()));
+}
+
+bool MKLDNNBinaryConvolutionNode::created() const {
+    return getType() == BinaryConvolution;
+}
+
+void MKLDNNBinaryConvolutionNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
+                                                   const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
+    TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
+    mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
+
+    MKLDNNMemoryDesc in_candidate(inDesc);
+    MKLDNNMemoryDesc out_candidate(outDesc);
+
+    // grouping and autoblocking is not compatible
+    if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
+        return;
+
+    MKLDNNDims blocked_weightDims(weightDims);
+    MKLDNNDims blocked_biasesDims(biasesDims);
+    MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::any};
+
+    std::shared_ptr<mkldnn::binary_convolution_forward::desc> bin_conv_desc;
+    bin_conv_desc.reset(new binary_convolution_forward::desc(prop_kind::forward_scoring, algorithm::binary_convolution_direct,
+                                                             in_candidate, wgh_candidate, out_candidate, stride, dilation,
+                                                             paddingL, paddingR, pad_value));
+
+    descs.emplace_back(bin_conv_desc);
+}
+
+void MKLDNNBinaryConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) {
+    auto* selectedPD = getSelectedPrimitiveDescriptor();
+    if (!selectedPD) {
+        return;
+    }
+
+    createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc});
+
+    mkldnn::primitive_attr attr;
+    setPostOps(attr);
+
+    InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig();
+    size_t selected_count = 0;
+    for (size_t i = 0; i < descs.size(); i++) {
+        const auto& desc = descs[i];
+        try {
+            primitive_desc_iterator itpd = desc.createPrimitiveDescriptorIterator(getEngine(), attr);
+            do {
+                InferenceEngine::LayerConfig cfg;
+                cfg.dynBatchSupport = true;
+                for (size_t j = 0; j < desc.inputNumbers(); j++) {
+                    InferenceEngine::DataConfig dataConfig;
+                    dataConfig.inPlace = -1;
+                    dataConfig.constant = false;
+                    dataConfig.desc = getSrcMemDesc(itpd, j);
+                    cfg.inConfs.push_back(dataConfig);
+                }
+
+                for (size_t j = 0; j < desc.outputNumbers(); j++) {
+                    InferenceEngine::DataConfig dataConfig;
+                    dataConfig.inPlace = -1;
+                    if (withSum) {
+                        cfg.inConfs.push_back(dataConfig);
+                        dataConfig.inPlace = 1;
+                    }
+                    dataConfig.constant = false;
+                    dataConfig.desc = getDstMemDesc(itpd, j);
+
+                    cfg.outConfs.push_back(dataConfig);
+                }
+                impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+
+                if (selected_count == selectedPrimitiveDescriptorIndex) {
+                    if (impl_type != selectedPD->getImplementationType()) {
+                        THROW_IE_EXCEPTION << "Cannot get the original layer configuration!";
+                    }
+                    rightConfig = cfg;
+                }
+                if (i == descs.size() - 1) {
+                    if (impl_type == selectedPD->getImplementationType()) {
+                        rightConfig = config;
+                    }
+                }
+                selected_count++;
+            } while (itpd.next());
+        } catch (std::exception& e) {
+            continue;
+        }
+    }
+    selectedPD->getConfig() = rightConfig;
+}
+
+void MKLDNNBinaryConvolutionNode::pushBinarizationThreshold(float value) {
+    binarizationThresholds.push_back(value);
+}
+\ No newline at end of file
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h
new file mode 100644
index 000000000..659345d05
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h
@@ -0,0 +1,60 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNBinaryConvolutionNode : public MKLDNNNode {
+public:
+    MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng);
+    ~MKLDNNBinaryConvolutionNode() override = default;
+
+    void getSupportedDescriptors() override;
+    void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
+                          const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
+    void initDescriptor(const InferenceEngine::LayerConfig& config) override;
+    void createPrimitive() override;
+    void initSupportedPrimitiveDescriptors() override;
+    bool created() const override;
+    bool canBeInPlace() const override {
+        return false;
+    }
+    void setPostOps(mkldnn::primitive_attr &attr, bool initWeights);
+    void pushBinarizationThreshold(float value);
+
+private:
+    static Register<MKLDNNBinaryConvolutionNode> reg;
+    bool withSum;
+    bool withBinarization;
+    bool isDW;
+    bool isMerged;
+    bool isGrouped;
+    std::vector<ptrdiff_t> stride;
+    std::vector<ptrdiff_t> dilation;
+    std::vector<ptrdiff_t> paddingL;
+    std::vector<ptrdiff_t> paddingR;
+    InferenceEngine::SizeVector weightDims;
+    InferenceEngine::SizeVector biasesDims;
+
+    ptrdiff_t dw_conv_oc;
+    ptrdiff_t dw_conv_ih;
+    ptrdiff_t dw_conv_iw;
+    std::vector<ptrdiff_t> dw_conv_kernel;
+    std::vector<ptrdiff_t> dw_conv_strides;
+    std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
+
+    float pad_value;
+
+    std::vector<float> binarizationThresholds;
+};
+
+}  // namespace MKLDNNPlugin
+
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
index fd2893e95..ec370ee24 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -16,6 +16,7 @@
 #include "mkldnn_dims.h"
 #include "mkldnn_edge.h"
 #include "mkldnn_memory.h"
+#include "ie_parallel.hpp"
 #include <limits>
 
 using namespace mkldnn;
@@ -509,3 +510,46 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() {
     }
     initDescriptor(config);
 }
+
+void MKLDNNConcatNode::execute(mkldnn::stream strm) {
+    if (isOptimized()) {
+        return;
+    }
+
+    const MKLDNNMemory& dst_memory = getChildEdgeAt(0)->getMemory();
+    const mkldnn::memory::data_type data_type = dst_memory.GetDataType();
+
+    const bool isInt8 = (data_type == mkldnn_s8 || data_type == mkldnn_u8);
+
+    if (isInt8) {
+        uint8_t* dst_ptr = reinterpret_cast<uint8_t*>(dst_memory.GetData());
+
+        const size_t num_src = getParentEdges().size();
+
+        std::vector<size_t> channels;
+        size_t channels_size = 0;
+        std::vector<const uint8_t*> src_ptrs;
+        std::vector<uint8_t*> dst_ptrs;
+
+        for (size_t i = 0; i < num_src; i++) {
+            const MKLDNNMemory& src_mem = getParentEdgeAt(i)->getMemory();
+            const size_t num_channels = src_mem.GetDims()[1];
+
+            channels.push_back(num_channels);
+            src_ptrs.push_back(reinterpret_cast<const uint8_t*>(src_mem.GetData()));
+            dst_ptrs.push_back(dst_ptr + channels_size);
+            channels_size += num_channels;
+        }
+
+        const size_t iter_count = getParentEdgeAt(0)->getMemory().GetSize() / channels[0];
+
+        parallel_for(iter_count, [&](int i) {
+            const size_t dst_off = i * channels_size;
+            for (int j = 0; j < num_src; j++) {
+                memcpy(dst_ptrs[j] + dst_off, src_ptrs[j] + i * channels[j], channels[j]);
+            }
+        });
+    } else {
+        MKLDNNNode::execute(strm);
+    }
+}
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h
index 9aa51d7cd..5af4a1022 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -21,6 +21,7 @@ public:
     void createPrimitive() override;
     void selectOptimalPrimitiveDescriptor() override;
     bool created() const override;
+    void execute(mkldnn::stream strm) override;
 
     bool isOptimized() const;
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
index ea1aee821..18e98a7b0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -21,7 +21,8 @@ using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
 MKLDNNConvolutionNode::MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng)
-        : MKLDNNNode(layer, eng), withBiases(false) {
+        : MKLDNNNode(layer, eng), withBiases(false), withSum(false),  dw_conv_iw(0), dw_conv_ih(0),
+        dw_conv_oc(0), isDW(false), isMerged(false), withActivation(false), convLayer(nullptr), isGrouped(false) {
     internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc {
         return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(0).desc());
     });
@@ -41,7 +42,7 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr&
         auto ois = layer->blobs.find("oi-scale");
         if ((getCnnLayer()->outData[0]->getPrecision() == Precision::I8 || getCnnLayer()->outData[0]->getPrecision() == Precision::U8)
             && ois == layer->blobs.end()) {
-            THROW_IE_EXCEPTION << "Internal error of graph quantization - missmatch of intermediate scales and next layer type for convolution "
+            THROW_IE_EXCEPTION << "Internal error of graph quantization - mismatch of intermediate scales and next layer type for convolution "
                 << getCnnLayer()->name;
         }
         if (ois != layer->blobs.end()) {
@@ -262,7 +263,7 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
             auto* depthwiseLayer = reinterpret_cast<WeightableLayer*>(depthwiseNode->getCnnLayer().get());
 
             if (initWeights) {
-                MKLDNNDims depthwiseDims({static_cast<int>(rnd_up(biasesDims[0], 16))});
+                MKLDNNDims depthwiseDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
 
                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
                 PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format::x);
@@ -320,27 +321,25 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
         if (convolutionNode) {
             auto* convLayer = reinterpret_cast<ConvolutionLayer*>(convolutionNode->getCnnLayer().get());
 
+            auto weightsPrc = MKLDNNExtensionUtils::IEPrecisionToDataType(convLayer->precision);
+            auto biasPrc = memory::data_type::s32;
+
             if (initWeights) {
                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
-                MKLDNNDims dwWeightsDims({dw_conv_oc, 1, 1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
-                PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, memory::data_type::f32,
-                                                            memory::format::Goihw8g);
+                MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]});
+                PostOpsIntBlobMemory[blob_idx]->Create(dwWeightsDims, weightsPrc, memory::format::Goihw8g);
+
+                Blob::Ptr weights = convLayer->blobs.find("weights")->second;
+                Blob::Ptr biases = convLayer->blobs.find("biases")->second;
 
-                PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::goihw,
-                                                             convLayer->_weights->buffer(),
-                                                             dwWeightsDims.size() *
-                                                             MKLDNNExtensionUtils::sizeOfDataType(
-                                                                     memory::data_type::f32));
+                PostOpsIntBlobMemory[blob_idx]->SetData(weightsPrc, memory::goihw, weights->buffer(),
+                                                        dwWeightsDims.size() * MKLDNNExtensionUtils::sizeOfDataType(weightsPrc));
 
                 PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
                 MKLDNNDims dwBiasesDims({dw_conv_oc});
-                PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, memory::data_type::f32,
-                                                                memory::format::x);
-                PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x,
-                                                                 convLayer->_biases->buffer(),
-                                                                 dwBiasesDims.size() *
-                                                                 MKLDNNExtensionUtils::sizeOfDataType(
-                                                                         memory::data_type::f32));
+                PostOpsIntBlobMemory[blob_idx + 1]->Create(dwBiasesDims, biasPrc, memory::format::x);
+                PostOpsIntBlobMemory[blob_idx + 1]->SetData(biasPrc, memory::x, biases->buffer(),
+                                                            dwBiasesDims.size() * MKLDNNExtensionUtils::sizeOfDataType(biasPrc));
                 ops.append_dw_conv(dw_conv_ih, dw_conv_iw, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS],
                                    dw_conv_strides[Y_AXIS], dw_conv_strides[X_AXIS],
                                    (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(),
@@ -353,6 +352,46 @@ void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWe
                                    nullptr,
                                    nullptr);
             }
+
+            if (convolutionNode->wScale != nullptr) {
+                float* wScaleData = static_cast<float*>(convolutionNode->wScale->buffer());
+
+                std::vector<float> oScaleDataVector;
+                std::vector<float> oShiftDataVector;
+                if (convolutionNode->getCnnLayer()->precision == Precision::I8 &&
+                    convolutionNode->getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
+                    float *oScaleData = static_cast<float *>(convolutionNode->oScale->buffer());
+
+                    for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
+                        oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
+                        oShiftDataVector.push_back(0.f);
+                    }
+                } else {
+                    for (size_t c = 0; c < convolutionNode->wScale->size(); c++) {
+                        oScaleDataVector.push_back(wScaleData[c]);
+                        oShiftDataVector.push_back(0.f);
+                    }
+                }
+
+                MKLDNNDims oScaleDims({static_cast<ptrdiff_t>(rnd_up(biasesDims[0], 16))});
+
+                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
+                PostOpsIntBlobMemory[blob_idx]->Create(oScaleDims, memory::data_type::f32, memory::format::x);
+                PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::x, &oScaleDataVector[0],
+                                                        oScaleDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
+
+                PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine())));
+                PostOpsIntBlobMemory[blob_idx + 1]->Create(oScaleDims, memory::data_type::f32, memory::format::x);
+                PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::x, &oShiftDataVector[0],
+                                                            oShiftDataVector.size() * MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32));
+
+                ops.append_depthwise(depthwise_scale_shift,
+                                     (const float *)PostOpsIntBlobMemory[blob_idx]->GetData(),
+                                     (const float *)PostOpsIntBlobMemory[blob_idx + 1]->GetData());
+
+                blob_idx += 2;
+            }
+
             for (auto &dwConvFusedNode : convolutionNode->fusedWith) {
                 auto* dwConvActivationNode = dynamic_cast<MKLDNNActivationNode *>(dwConvFusedNode.get());
                 if (dwConvActivationNode) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
index 19191ee45..45d45e29f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -40,18 +40,18 @@ private:
     bool isDW;
     bool isMerged;
     bool isGrouped;
-    std::vector<int> stride;
-    std::vector<int> dilation;
-    std::vector<int> paddingL;
-    std::vector<int> paddingR;
+    std::vector<ptrdiff_t> stride;
+    std::vector<ptrdiff_t> dilation;
+    std::vector<ptrdiff_t> paddingL;
+    std::vector<ptrdiff_t> paddingR;
     InferenceEngine::SizeVector weightDims;
     InferenceEngine::SizeVector biasesDims;
 
-    int dw_conv_oc;
-    int dw_conv_ih;
-    int dw_conv_iw;
-    std::vector<int> dw_conv_kernel;
-    std::vector<int> dw_conv_strides;
+    ptrdiff_t dw_conv_oc;
+    ptrdiff_t dw_conv_ih;
+    ptrdiff_t dw_conv_iw;
+    std::vector<ptrdiff_t> dw_conv_kernel;
+    std::vector<ptrdiff_t> dw_conv_strides;
     std::vector<MKLDNNMemoryPtr> PostOpsIntBlobMemory;
 
     InferenceEngine::ConvolutionLayer* convLayer;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.cpp
index 8b11c296f..25fa018b5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.h
index f74ab297e..08965b9a2 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
index 38ca06ce8..497da396c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
index e32a66a73..aad12edb6 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -35,10 +35,10 @@ private:
     bool withGroups;
     bool isDW;
     size_t groupNum = 1;
-    std::vector<int> stride;
-    std::vector<int> paddingL;
-    std::vector<int> dilation;
-    std::vector<int> paddingR;
+    std::vector<ptrdiff_t> stride;
+    std::vector<ptrdiff_t> paddingL;
+    std::vector<ptrdiff_t> dilation;
+    std::vector<ptrdiff_t> paddingR;
     MKLDNNDims weightsDims;
     static Register<MKLDNNDeconvolutionNode> reg;
     InferenceEngine::Blob::Ptr biases;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depthwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depthwise_node.cpp
index 6b1097a62..03e4473c4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depthwise_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depthwise_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -35,6 +35,11 @@ void MKLDNNDepthwiseNode::getSupportedDescriptors() {
 
     auto parentOutDims = getParentEdgeAt(0)->getDims();
 
+    if (getParentEdges().size() != 1)
+        THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect number of inputs!";
+    if (parentOutDims != getChildEdgeAt(0)->getDims())
+        THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect dimensions!";
+
     SizeVector weightDims = { (long unsigned int)parentOutDims[1] };
     MKLDNNDims blocked_weightDims(weightDims);
 
@@ -76,7 +81,7 @@ void MKLDNNDepthwiseNode::createPrimitive() {
 
     if (isBroadcast()) {
         float broadcastValue = static_cast<float*>(internalBlobMemory[0]->GetData())[0];
-        int blbSize = internalBlobMemory[0]->GetPrimitiveDescriptor().desc().data.dims[0];
+        size_t blbSize = internalBlobMemory[0]->GetPrimitiveDescriptor().desc().data.dims[0];
         for (int i = 1; i < blbSize && realWeightSize != blbSize; i++) {
             static_cast<float*>(internalBlobMemory[0]->GetData())[i] = broadcastValue;
         }
@@ -88,6 +93,15 @@ void MKLDNNDepthwiseNode::createPrimitive() {
                 static_cast<float*>(internalBlobMemory[1]->GetData())[i] = broadcastValue;
             }
         }
+    } else {
+        size_t blbSize = internalBlobMemory[0]->GetPrimitiveDescriptor().desc().data.dims[0];
+        if (realWeightSize != blbSize)
+            THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect weights!";
+        if (isWithBiases()) {
+            blbSize = internalBlobMemory[1]->GetPrimitiveDescriptor().desc().data.dims[0];
+            if (realBiasSize != blbSize)
+                THROW_IE_EXCEPTION << "Cannot create layer " << getName() << ": Incorrect biases!";
+        }
     }
 
     if (isWithBiases()) {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depthwise_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depthwise_node.h
index 16bd3a505..00b60abc4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depthwise_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depthwise_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
index 111196817..fdb5eeb78 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -12,12 +12,15 @@
 #include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
 #include "ie_parallel.hpp"
+#include <map>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
-MKLDNNEltwiseNode::MKLDNNEltwiseNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng) : MKLDNNNode(layer, eng) {}
+MKLDNNEltwiseNode::MKLDNNEltwiseNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng) : MKLDNNNode(layer, eng) {
+    op = EltwiseLayer::Sum;
+}
 
 bool MKLDNNEltwiseNode::isSum() {
     auto * eltwiseLayer = dynamic_cast<EltwiseLayer*>(getCnnLayer().get());
@@ -45,16 +48,36 @@ void MKLDNNEltwiseNode::getSupportedDescriptors() {
         THROW_IE_EXCEPTION << "Cannot convert eltwise layer.";
     op = eltwiseLayer->_operation;
 
-    if (getParentEdges().empty())
+    if (getParentEdges().size() < 2)
         THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
     if (getChildEdges().empty())
         THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
+    if (op == EltwiseLayer::Squared_diff)
+        if (getParentEdges().size() != 2)
+            THROW_IE_EXCEPTION  << "Incorrect number of input edges for layer " << getName() << " for operation squared_diff.\n"
+                << "Expected: 2\n" << "Actual: " << getParentEdges().size();
 
-    auto outDims = getParentEdgeAt(0)->getDims();
-    for (size_t i = 1; i < getParentEdges().size(); i++) {
-        auto oDims = getParentEdgeAt(i)->getDims();
-        if (outDims.size() != oDims.size() || outDims.ndims() != oDims.ndims())
-            THROW_IE_EXCEPTION << "Dimentions of input layers are not equal for " << eltwiseLayer->name;
+    auto outDims = getChildEdgeAt(0)->getDims();
+    for (size_t i = 0; i < getParentEdges().size(); i++) {
+        auto inDims = getParentEdgeAt(i)->getDims();
+        for (size_t j = 1; j <= inDims.ndims(); j++) {
+            if (outDims[outDims.ndims() - j] != inDims[inDims.ndims() - j]) {
+                if (inDims[inDims.ndims() - j] == 1) {
+                    broadcast = true;
+                } else {
+                    THROW_IE_EXCEPTION << "Incorrect dimentions for broadcasting for " << eltwiseLayer->name;
+                }
+            }
+        }
+    }
+
+    if (broadcast) {
+        auto outDims = getChildEdgeAt(0)->getDims();
+        for (size_t i = 0; i < getParentEdges().size(); i++) {
+            auto inDims = getParentEdgeAt(i)->getDims();
+            if (inDims.ndims() > 5 || outDims.ndims() > 5)
+                THROW_IE_EXCEPTION << "Eltwise node in broadcasting mode doesn't support more than 5 dims for blobs";
+        }
     }
 
     bool with_coeffs = !eltwiseLayer->coeff.empty();
@@ -64,6 +87,9 @@ void MKLDNNEltwiseNode::getSupportedDescriptors() {
     if (with_coeffs && eltwiseLayer->coeff.size() != getParentEdges().size())
         THROW_IE_EXCEPTION << "Number of provided coefficients is not equal to number of operands";
 
+    if (with_coeffs && eltwiseLayer->precision != Precision::FP32)
+        THROW_IE_EXCEPTION << "Sum with coefficients supports only FP32 precision";
+
     sum_scales.clear();
     for (int i = 0; i < getParentEdges().size(); i++)
         sum_scales.push_back(with_coeffs ? eltwiseLayer->coeff[i] : 1.0f);
@@ -73,33 +99,38 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    auto same = [&] (mkldnn::memory::data_type inputDT, mkldnn::memory::data_type outputDT, memory::format fmt) -> PrimitiveDescInfo {
+    auto initDesc = [&] (mkldnn::memory::data_type inputDT, mkldnn::memory::data_type outputDT, memory::format format) -> PrimitiveDescInfo {
         InferenceEngine::LayerConfig config;
         config.dynBatchSupport = true;
         for (size_t i = 0; i < getParentEdges().size(); i++) {
             InferenceEngine::DataConfig dataConfig;
             dataConfig.inPlace = (!i && canBeInPlace()) ? 0 : -1;
             dataConfig.constant = false;
-            dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDT, fmt);
-            config.inConfs.push_back(dataConfig);
+
+            if (getParentEdgeAt(i)->getDims().ndims() == getChildEdgeAt(0)->getDims().ndims()) {
+                dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDT, format);
+                config.inConfs.push_back(dataConfig);
+            } else {
+                // Broadcasting support
+                if (MKLDNNMemory::IsPlainFormat(format)) {
+                    dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDT, MKLDNNMemory::GetPlainFormat(getParentEdgeAt(i)->getDims()));
+                    config.inConfs.push_back(dataConfig);
+                }
+            }
         }
 
         InferenceEngine::DataConfig dataConfig;
             dataConfig.inPlace = -1;
             dataConfig.constant = false;
-            dataConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDT, fmt);
+            dataConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDT, format);
             config.outConfs.push_back(dataConfig);
         return {config, impl_desc_type::ref};
     };
 
     for (const auto& format : getAvailableFormatsForDims(getChildEdgeAt(0)->getDims())) {
-        if (getCnnLayer()->precision == Precision::FP32) {
-            mkldnn::memory::data_type inputDT = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::FP32);
-            mkldnn::memory::data_type outputDT = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::FP32);
-            supportedPrimitiveDescriptors.push_back(same(inputDT, outputDT, format));
-        } else {
-            THROW_IE_EXCEPTION << "Invalid Eltwise layer precision: " << getCnnLayer()->name;
-        }
+        mkldnn::memory::data_type inputDT = MKLDNNExtensionUtils::IEPrecisionToDataType(getCnnLayer()->precision);
+        mkldnn::memory::data_type outputDT = MKLDNNExtensionUtils::IEPrecisionToDataType(getCnnLayer()->precision);
+        supportedPrimitiveDescriptors.push_back(initDesc(inputDT, outputDT, format));
     }
 }
 
@@ -127,10 +158,10 @@ void MKLDNNEltwiseNode::createPrimitive() {
             srcs_p.emplace_back(srcMemPtr->GetPrimitive());
         }
     }
-    if (op == EltwiseLayer::Sum) {
+    if (op == EltwiseLayer::Sum && !broadcast) {
         try {
-            auto primitive_desc = sum::primitive_desc(dstMemPtr->GetDescriptor(), sum_scales, srcs_pd);
-            prim = std::shared_ptr<sum>(new sum(primitive_desc, srcs_p, dstMemPtr->GetPrimitive()));
+            auto primitive_desc = mkldnn::sum::primitive_desc(dstMemPtr->GetDescriptor(), sum_scales, srcs_pd);
+            prim = std::shared_ptr<mkldnn::sum>(new mkldnn::sum(primitive_desc, srcs_p, dstMemPtr->GetPrimitive()));
         } catch (...) {
             std::cerr << "Handle this problem correctly!" << std::endl;
             prim = nullptr;
@@ -158,101 +189,1797 @@ void MKLDNNEltwiseNode::initOptimalPrimitiveDescriptor() {
     }
 }
 
-template <typename T0, typename T1> void MKLDNNEltwiseNode::ref_eltwise(int in0, int in1) {
-    IE_ASSERT(getParentEdges().size() > 1);
+void MKLDNNEltwiseNode::dims_calc(int *dims, const MKLDNNDims &edge_dims) {
+    for (int i = 0; i < 5; i++)
+        dims[i] = 1;
+    int ndims = edge_dims.ndims();
+    if (ndims > 5) {
+        THROW_IE_EXCEPTION << "ndims should be less then 5";
+    }
+    for (int i = 0; i < ndims; i++) {
+        dims[4 - i] = edge_dims[ndims - 1 - i];
+    }
+    dims[5 - ndims] = std::min(dims[5 - ndims], batchToProcess());
+}
 
-    auto& srcMemory0 = getParentEdgeAt(in0)->getMemory();
-    auto& srcMemory1 = getParentEdgeAt(in1)->getMemory();
-    const T0 *src0_ptr = reinterpret_cast<const T0*>(srcMemory0.GetData()) +
-            srcMemory0.GetDescriptor().data.layout_desc.blocking.offset_padding;
-    const T1 *src1_ptr = reinterpret_cast<const T1*>(srcMemory1.GetData()) +
-            srcMemory1.GetDescriptor().data.layout_desc.blocking.offset_padding;
-    T0 *dst_ptr = reinterpret_cast<T0*>(getChildEdgeAt(0)->getMemory().GetData()) +
-            getChildEdgeAt(0)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
-    const size_t dst_data_size = srcMemory0.GetSize() / sizeof(T0) / srcMemory0.GetDims()[0] * batchToProcess();
+void MKLDNNEltwiseNode::offset_out_calc(int *offset, int *dims) {
+    int k = 1;
+    for (int i = 4; i >= 0; i--) {
+        offset[i] = k;
+        k *= dims[i];
+    }
+}
+
+void MKLDNNEltwiseNode::offset_in_calc(int *offset, int *dims_in, int *dims_out) {
+    int k = 1;
+    for (int i = 4; i >= 0; i--) {
+        offset[i] = (dims_in[i] == dims_out[i]) ? k : 0;
+        k *= dims_in[i];
+    }
+}
 
-    if (op == EltwiseLayer::Prod) {
+// Intel C++ Compiler 18.0 for Windows contains bug that doesn't allow to use templates to generate eltwise implementations
+// and to avoid all copypaste below
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_add(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
 #ifdef _WIN32
-        for (int i = 0; i < dst_data_size; i++)
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] + src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] + src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] + src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] + src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] + src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] + src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] + src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] + src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_prod(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
             dst_ptr[i] = src0_ptr[i] * src1_ptr[i];
+    }
 #else
-        parallel_for(dst_data_size, [&](int i) {
+        parallel_for(dst_data_size, [&](size_t i) {
             dst_ptr[i] = src0_ptr[i] * src1_ptr[i];
         });
 #endif
-
         for (int j = 2; j < getParentEdges().size(); j++) {
-            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(j)->getMemory().GetData()) +
-                    getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
 #ifdef _WIN32
-            for (int i = 0; i < dst_data_size; i++)
+            for (size_t i = 0; i < dst_data_size; i++) {
                 dst_ptr[i] = dst_ptr[i] * src_ptr[i];
+            }
 #else
-            parallel_for(dst_data_size, [&](int i) {
+            parallel_for(dst_data_size, [&](size_t i) {
                 dst_ptr[i] = dst_ptr[i] * src_ptr[i];
             });
 #endif
         }
-    } else if (op == EltwiseLayer::Max)  {
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] * src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] * src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] * src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] * src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_max(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
 #ifdef _WIN32
-        for (int i = 0; i < dst_data_size; i++)
+        for (size_t i = 0; i < dst_data_size; i++) {
             dst_ptr[i] = std::max(src0_ptr[i], (T0)src1_ptr[i]);
+    }
 #else
-        parallel_for(dst_data_size, [&](int i) {
-            dst_ptr[i] = std::max(src0_ptr[i], (T0) src1_ptr[i]);
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = std::max(src0_ptr[i], (T0)src1_ptr[i]);
         });
 #endif
         for (int j = 2; j < getParentEdges().size(); j++) {
             const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
-                    getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
 #ifdef _WIN32
-            for (int i = 0; i < dst_data_size; i++)
+            for (size_t i = 0; i < dst_data_size; i++) {
                 dst_ptr[i] = std::max(dst_ptr[i], (T0)src_ptr[i]);
+            }
 #else
-            parallel_for(dst_data_size, [&](int i) {
-                dst_ptr[i] = std::max(dst_ptr[i], (T0) src_ptr[i]);
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = std::max(dst_ptr[i], (T0)src_ptr[i]);
             });
 #endif
         }
-    } else if (op == EltwiseLayer::Sum)  {
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
 #ifdef _WIN32
-        for (int i = 0; i < dst_data_size; i++)
-            dst_ptr[i] = src0_ptr[i] + src1_ptr[i];
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = std::max(src0_ptr[index_in0], (T0)src1_ptr[index_in1]);
+                    }
+                }
+            }
+        }
+    }
 #else
-        parallel_for(dst_data_size, [&](int i) {
-            dst_ptr[i] = src0_ptr[i] + src1_ptr[i];
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = std::max(src0_ptr[index_in0], (T0)src1_ptr[index_in1]);
         });
 #endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
 
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = std::max(dst_ptr[index_out], (T0)src_ptr[index_in]);
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = std::max(dst_ptr[index_out], (T0)src_ptr[index_in]);
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_sub(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] - src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] - src1_ptr[i];
+        });
+#endif
         for (int j = 2; j < getParentEdges().size(); j++) {
             const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
-                    getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
 #ifdef _WIN32
-            for (int i = 0; i < dst_data_size; i++)
-                dst_ptr[i] = dst_ptr[i] + src_ptr[i];
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] - src_ptr[i];
+            }
 #else
-            parallel_for(dst_data_size, [&](int i) {
-                dst_ptr[i] = dst_ptr[i] + src_ptr[i];
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] - src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] - src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] - src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] - src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] - src_ptr[index_in];
             });
 #endif
         }
     }
 }
 
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_min(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = std::min(src0_ptr[i], (T0)src1_ptr[i]);
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = std::min(src0_ptr[i], (T0)src1_ptr[i]);
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = std::min(dst_ptr[i], (T0)src_ptr[i]);
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = std::min(dst_ptr[i], (T0)src_ptr[i]);
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = std::min(src0_ptr[index_in0], (T0)src1_ptr[index_in1]);
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = std::min(src0_ptr[index_in0], (T0)src1_ptr[index_in1]);
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = std::min(dst_ptr[index_out], (T0)src_ptr[index_in]);
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = std::min(dst_ptr[index_out], (T0)src_ptr[index_in]);
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_div(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] / src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] / src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] / src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] / src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] / src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] / src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] / src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] / src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_squared_diff(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = (src0_ptr[i] - src1_ptr[i]) * (src0_ptr[i] - src1_ptr[i]);
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = (src0_ptr[i] - src1_ptr[i]) * (src0_ptr[i] - src1_ptr[i]);
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = (dst_ptr[i] - src_ptr[i]) * (dst_ptr[i] - src_ptr[i]);
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = (dst_ptr[i] - src_ptr[i]) * (dst_ptr[i] - src_ptr[i]);
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = (src0_ptr[index_in0] - src1_ptr[index_in1]) * (src0_ptr[index_in0] - src1_ptr[index_in1]);
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = (src0_ptr[index_in0] - src1_ptr[index_in1]) * (src0_ptr[index_in0] - src1_ptr[index_in1]);
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = (dst_ptr[index_out] - src_ptr[index_in]) * (dst_ptr[index_out] - src_ptr[index_in]);
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = (dst_ptr[index_out] - src_ptr[index_in]) * (dst_ptr[index_out] - src_ptr[index_in]);
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_floor_mod(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] - src0_ptr[i] / src1_ptr[i] * src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] - src0_ptr[i] / src1_ptr[i] * src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] - dst_ptr[i] / src_ptr[i] * src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] - dst_ptr[i] / src_ptr[i] * src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] - src0_ptr[index_in1] / src1_ptr[index_in0] * src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] - src0_ptr[index_in1] / src1_ptr[index_in0] * src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] - dst_ptr[index_in] / src_ptr[index_out] * src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] - dst_ptr[index_in] / src_ptr[index_out] * src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_pow(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = std::pow(src0_ptr[i], src1_ptr[i]);
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = std::pow(src0_ptr[i], src1_ptr[i]);
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = std::pow(dst_ptr[i], src_ptr[i]);
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = std::pow(dst_ptr[i], src_ptr[i]);
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = std::pow(src0_ptr[index_in0], src1_ptr[index_in1]);
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = std::pow(src0_ptr[index_in0], src1_ptr[index_in1]);
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = std::pow(dst_ptr[index_out], src_ptr[index_in]);
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = std::pow(dst_ptr[index_out], src_ptr[index_in]);
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_equal(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] == src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] == src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] == src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] == src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] == src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] == src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] == src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] == src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_not_equal(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] != src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] != src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] != src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] != src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] != src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] != src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] != src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] != src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_less(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] < src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] < src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] < src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] < src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] < src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] < src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] < src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] < src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_less_equal(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] <= src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] <= src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] <= src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] <= src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] <= src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] <= src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] <= src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] <= src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_greater(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] > src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] > src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] > src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] > src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] > src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] > src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] > src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] > src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_greater_equal(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] >= src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] >= src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] >= src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] >= src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] >= src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] >= src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] >= src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] >= src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_logical_and(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] && src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] && src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] && src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] && src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] && src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] && src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] && src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] && src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_logical_or(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = src0_ptr[i] || src1_ptr[i];
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = src0_ptr[i] || src1_ptr[i];
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = dst_ptr[i] || src_ptr[i];
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = dst_ptr[i] || src_ptr[i];
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = src0_ptr[index_in0] || src1_ptr[index_in1];
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = src0_ptr[index_in0] || src1_ptr[index_in1];
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = dst_ptr[index_out] || src_ptr[index_in];
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = dst_ptr[index_out] || src_ptr[index_in];
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::eltwise_logical_xor(
+        const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, const size_t dst_data_size) {
+    if (!broadcast) {
+#ifdef _WIN32
+        for (size_t i = 0; i < dst_data_size; i++) {
+            dst_ptr[i] = (src0_ptr[i] || src1_ptr[i]) - (src0_ptr[i] && src1_ptr[i]);
+    }
+#else
+        parallel_for(dst_data_size, [&](size_t i) {
+            dst_ptr[i] = (src0_ptr[i] || src1_ptr[i]) - (src0_ptr[i] && src1_ptr[i]);
+        });
+#endif
+        for (int j = 2; j < getParentEdges().size(); j++) {
+            const T1 *src_ptr = reinterpret_cast<const T1*>(getParentEdgeAt(j)->getMemory().GetData()) +
+                                getParentEdgeAt(j)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+#ifdef _WIN32
+            for (size_t i = 0; i < dst_data_size; i++) {
+                dst_ptr[i] = (dst_ptr[i] || src_ptr[i]) - (dst_ptr[i] && src_ptr[i]);
+            }
+#else
+            parallel_for(dst_data_size, [&](size_t i) {
+                dst_ptr[i] = (dst_ptr[i] || src_ptr[i]) - (dst_ptr[i] && src_ptr[i]);
+            });
+#endif
+        }
+    } else {
+        int dims_out[5], dims_in0[5], dims_in1[5];
+        int offset_out[5], offset_in0[5], offset_in1[5];
+        auto& child_edge_dims = getChildEdgeAt(0)->getDims();
+        auto& parent0_edge_dims = getParentEdgeAt(0)->getDims();
+        auto& parent1_edge_dims = getParentEdgeAt(1)->getDims();
+        dims_calc(dims_out, child_edge_dims);
+        dims_calc(dims_in0, parent0_edge_dims);
+        dims_calc(dims_in1, parent1_edge_dims);
+        offset_out_calc(offset_out, dims_out);
+        offset_in_calc(offset_in0, dims_in0, dims_out);
+        offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+        for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+        for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+            for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                    for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                        size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                        size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+                        size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                        dst_ptr[index_out] = (src0_ptr[index_in0] || src1_ptr[index_in1]) - (src0_ptr[index_in0] && src1_ptr[index_in1]);
+                    }
+                }
+            }
+        }
+    }
+#else
+        parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+            size_t index_in0 = i0 * offset_in0[0] + i1 * offset_in0[1] + i2 * offset_in0[2] + i3 * offset_in0[3] + i4 * offset_in0[4];
+            size_t index_in1 = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+            dst_ptr[index_out] = (src0_ptr[index_in0] || src1_ptr[index_in1]) - (src0_ptr[index_in0] && src1_ptr[index_in1]);
+        });
+#endif
+        for (size_t n = 2; n < getParentEdges().size(); n++) {
+            const T1 *src_ptr = reinterpret_cast<const T1 *>(getParentEdgeAt(n)->getMemory().GetData()) +
+                                getParentEdgeAt(n)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+            auto& parent_edge_dims = getParentEdgeAt(n)->getDims();
+            dims_calc(dims_in1, parent_edge_dims);
+            offset_in_calc(offset_in1, dims_in1, dims_out);
+
+#ifdef _WIN32
+            for (size_t i0 = 0; i0 < dims_out[0]; i0++) {
+            for (size_t i1 = 0; i1 < dims_out[1]; i1++) {
+                for (size_t i2 = 0; i2 < dims_out[2]; i2++) {
+                    for (size_t i3 = 0; i3 < dims_out[3]; i3++) {
+                        for (size_t i4 = 0; i4 < dims_out[4]; i4++) {
+                            size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                            size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                            dst_ptr[index_out] = (dst_ptr[index_out] || src_ptr[index_in]) - (dst_ptr[index_out] && src_ptr[index_in]);
+                        }
+                    }
+                }
+            }
+        }
+#else
+            parallel_for5d(dims_out[0], dims_out[1], dims_out[2], dims_out[3], dims_out[4], [&](size_t i0, size_t i1, size_t i2, size_t i3, size_t i4) {
+                size_t index_out = i0 * offset_out[0] + i1 * offset_out[1] + i2 * offset_out[2] + i3 * offset_out[3] + i4 * offset_out[4];
+                size_t index_in = i0 * offset_in1[0] + i1 * offset_in1[1] + i2 * offset_in1[2] + i3 * offset_in1[3] + i4 * offset_in1[4];
+                dst_ptr[index_out] = (dst_ptr[index_out] || src_ptr[index_in]) - (dst_ptr[index_out] && src_ptr[index_in]);
+            });
+#endif
+        }
+    }
+}
+
+template <typename T0, typename T1> void MKLDNNEltwiseNode::ref_eltwise(int in0, int in1) {
+    IE_ASSERT(getParentEdges().size() > 1);
+
+    auto& srcMemory0 = getParentEdgeAt(in0)->getMemory();
+    auto& srcMemory1 = getParentEdgeAt(in1)->getMemory();
+    const T0 *src0_ptr = reinterpret_cast<const T0*>(srcMemory0.GetData()) +
+            srcMemory0.GetDescriptor().data.layout_desc.blocking.offset_padding;
+    const T1 *src1_ptr = reinterpret_cast<const T1*>(srcMemory1.GetData()) +
+            srcMemory1.GetDescriptor().data.layout_desc.blocking.offset_padding;
+    T0 *dst_ptr = reinterpret_cast<T0*>(getChildEdgeAt(0)->getMemory().GetData()) +
+            getChildEdgeAt(0)->getMemory().GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+    const size_t dst_data_size = srcMemory0.GetSize() / sizeof(T0) / srcMemory0.GetDims()[0] * batchToProcess();
+
+    switch (op) {
+        case EltwiseLayer::eOperation::Sum: eltwise_add(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Prod: eltwise_prod(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Max: eltwise_max(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Sub: eltwise_sub(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Min: eltwise_min(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Div: eltwise_div(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Squared_diff: eltwise_squared_diff(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Floor_mod: eltwise_floor_mod(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Pow: eltwise_pow(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Equal: eltwise_equal(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Not_equal: eltwise_not_equal(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Less: eltwise_less(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Less_equal: eltwise_less_equal(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Greater: eltwise_greater(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Greater_equal: eltwise_greater_equal(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Logical_AND: eltwise_logical_and(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Logical_OR: eltwise_logical_or(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        case EltwiseLayer::eOperation::Logical_XOR: eltwise_logical_xor(src0_ptr, src1_ptr, dst_ptr, dst_data_size); break;
+        default: THROW_IE_EXCEPTION << "Unsupported operation type for Eltwise node";
+    }
+}
 
 void MKLDNNEltwiseNode::execute(mkldnn::stream strm) {
     if (prim) {
         MKLDNNNode::execute(strm);
     } else {
+        if (op == EltwiseLayer::Floor_mod) {
+            for (size_t i = 0; i < getParentEdges().size(); i++)
+                if (getParentEdgeAt(i)->getDesc().getPrecision() != Precision::I32)
+                    THROW_IE_EXCEPTION << "Floor_mod supports only I32 precision of inputs";
+            if (getChildEdgeAt(0)->getDesc().getPrecision() != Precision::I32)
+                THROW_IE_EXCEPTION << "Floor_mod supports only I32 precision of output";
+        }
         if (getParentEdges().size() > 2) {
-            // Only float supported in this case
-            for (int i = 0; i < getParentEdges().size(); i++) {
-                if (getParentEdgeAt(i)->getDesc().getPrecision() != Precision::FP32) {
-                    THROW_IE_EXCEPTION << "If ref eltwise has more than 2 inputs, only FP32 inputs are supported";
-                }
+            Precision pi = getParentEdgeAt(0)->getDesc().getPrecision();
+            Precision po = getChildEdgeAt(0)->getDesc().getPrecision();
+            for (int i = 1; i < getParentEdges().size(); i++) {
+                if (getParentEdgeAt(i)->getDesc().getPrecision() != pi)
+                    THROW_IE_EXCEPTION << "If Eltwise node has more than 2 inputs, all inputs must have same precision";
             }
-
-            ref_eltwise<float, float>(0, 1);
+            if (pi != po) {
+                THROW_IE_EXCEPTION << "If Eltwise node has more than 2 inputs, all inputs and output must have same precision";
+            }
+            if (pi == Precision::FP32)
+                ref_eltwise<float, float>(0, 1);
+            else if (pi == Precision::I32)
+                ref_eltwise<int32_t, int32_t>(0, 1);
+            else if (pi == Precision::I8)
+                ref_eltwise<int8_t, int8_t>(0, 1);
+            else if (pi == Precision::U8)
+                ref_eltwise<uint8_t, uint8_t>(0, 1);
+            else
+                THROW_IE_EXCEPTION << "If Eltwise node has more than 2 inputs, only FP32, I32, I8, U8 are supported";
             return;
         }
 
@@ -278,6 +2005,8 @@ void MKLDNNEltwiseNode::execute(mkldnn::stream strm) {
             ref_eltwise<int8_t, uint8_t>(0, 1);
         } else if (po == Precision::I8 && pi1 == po && pi0 == Precision::U8) {
             ref_eltwise<int8_t, uint8_t>(1, 0);
+        } else if (po == Precision::I32 && pi0 == po && pi1 == po) {
+            ref_eltwise<int32_t, int32_t>(0, 1);
         }
     }
 }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
index 0395cd432..2a6e3f5aa 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -31,8 +31,31 @@ private:
     static Register<MKLDNNEltwiseNode> reg;
     InferenceEngine::EltwiseLayer::eOperation op;
     std::vector<float> sum_scales;
+    bool broadcast = false;
 
     template <typename T0, typename T1> void ref_eltwise(int in0, int in1);
+    void dims_calc(int *dims, const MKLDNNDims &edge_dims);
+    void offset_out_calc(int *offset, int *dims);
+    void offset_in_calc(int *offset, int *dims_in, int *dims_out);
+
+    template <typename T0, typename T1> void eltwise_add(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_prod(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_max(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_sub(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_min(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_div(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_squared_diff(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_floor_mod(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_pow(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_equal(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_not_equal(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_less(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_less_equal(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_greater(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_greater_equal(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_logical_and(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_logical_or(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
+    template <typename T0, typename T1> void eltwise_logical_xor(const T0 *src0_ptr, const T1 *src1_ptr, T0 *dst_ptr, size_t dst_data_size);
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
index 75b814e81..a777b5449 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp
@@ -1,13 +1,15 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "mkldnn_fullyconnected_node.h"
+#include "mkldnn_activation_node.h"
 #include "desc_iterator.hpp"
 #include <ie_layers.h>
 #include <string>
 #include <vector>
 #include <mkldnn_extension_utils.h>
+#include <mkldnn.hpp>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -22,6 +24,25 @@ MKLDNNFullyConnectedNode::MKLDNNFullyConnectedNode(const InferenceEngine::CNNLay
             return MKLDNNMemoryDesc();
         return MKLDNNMemoryDesc(primitive_desc_it.weights_primitive_desc(1).desc());
     });
+
+    auto ws = layer->blobs.find("w-scale");
+    if (ws != layer->blobs.end()) {
+        wScale = ws->second;
+    }
+
+    // Trying to find oi-scale
+    if (getCnnLayer()->type == "FullyConnected" && getCnnLayer()->precision == Precision::I8) {
+        auto ois = layer->blobs.find("oi-scale");
+        if ((getCnnLayer()->outData[0]->getPrecision() == Precision::I8 || getCnnLayer()->outData[0]->getPrecision() == Precision::U8)
+            && ois == layer->blobs.end()) {
+            THROW_IE_EXCEPTION << "Internal error of graph quantization - mismatch of intermediate scales and next layer type for fully connected "
+                << getCnnLayer()->name;
+        }
+        if (ois != layer->blobs.end()) {
+            // If we can find an oi-scale, then the next layer has to be an INT8.
+            oScale = ois->second;
+        }
+    }
 }
 
 void MKLDNNFullyConnectedNode::getSupportedDescriptors() {
@@ -29,12 +50,8 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() {
         return;
 
     InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
-    if (precision != InferenceEngine::Precision::FP32)
-        precision = InferenceEngine::Precision::FP32;
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
     precision = getCnnLayer()->outData[0]->getPrecision();
-    if (precision != InferenceEngine::Precision::FP32)
-        precision = InferenceEngine::Precision::FP32;
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
     auto * fcLayer = dynamic_cast<FullyConnectedLayer*>(getCnnLayer().get());
@@ -75,6 +92,27 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() {
         internalBlobs.push_back(createInternalBlob(biasesDims, false));
     }
 
+    Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
+    if (weights->precision() == Precision::I8) {
+        // The weights blob has incorrect dims, so we have to fix it
+        TensorDesc wdesc = internalBlobs[0]->getTensorDesc();
+        wdesc.setPrecision(Precision::I8);
+        InferenceEngine::TBlob<int8_t>::Ptr reshapedInt8Weights =
+                InferenceEngine::TBlob<int8_t>::Ptr(
+                        new InferenceEngine::TBlob<int8_t>(wdesc, static_cast<int8_t*>(weights->buffer()), weights->byteSize()));
+
+        internalBlobs[0] = reshapedInt8Weights;
+        if (withBiases) {
+            Blob::Ptr biases = this->getCnnLayer()->blobs.find("biases")->second;
+            TensorDesc bdesc = internalBlobs[1]->getTensorDesc();
+            bdesc.setPrecision(Precision::I32);
+            InferenceEngine::TBlob<int32_t>::Ptr reshapedInt32Biases =
+                    InferenceEngine::TBlob<int32_t>::Ptr(
+                            new InferenceEngine::TBlob<int32_t>(bdesc, static_cast<int32_t*>(biases->buffer()), biases->byteSize()));
+            internalBlobs[1] = reshapedInt32Biases;
+        }
+    }
+
     for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) {
         MKLDNNMemoryDesc in_candidate(inDims, inputDataType, format);
         MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, memory::any);
@@ -87,16 +125,24 @@ void MKLDNNFullyConnectedNode::createPrimitive() {
     if (prim)
         return;
 
-    auto prim_desc = createPrimitiveDescriptor<inner_product_forward::primitive_desc, inner_product_forward::desc>();
+    std::shared_ptr<mkldnn::primitive_attr> attr = initPrimitiveAttr();
+    std::shared_ptr<inner_product_forward::primitive_desc> prim_desc;
+    if (attr == nullptr) {
+        prim_desc = std::make_shared<inner_product_forward::primitive_desc>(
+                createPrimitiveDescriptor<inner_product_forward::primitive_desc, inner_product_forward::desc>(*attr));
+    } else {
+        prim_desc = std::make_shared<inner_product_forward::primitive_desc>(
+                createPrimitiveDescriptor<inner_product_forward::primitive_desc, inner_product_forward::desc>(*attr));
+    }
 
     if (internalBlobs.size() > 1) {
-        prim.reset(new inner_product_forward(prim_desc,
+        prim.reset(new inner_product_forward(*prim_desc,
                                              getParentEdgeAt(0)->getMemory().GetPrimitive(),
                                              internalBlobMemory[0]->GetPrimitive(),
                                              internalBlobMemory[1]->GetPrimitive(),
                                              getChildEdgeAt(0)->getMemory().GetPrimitive()));
     } else {
-        prim.reset(new inner_product_forward(prim_desc,
+        prim.reset(new inner_product_forward(*prim_desc,
                                              getParentEdgeAt(0)->getMemory().GetPrimitive(),
                                              internalBlobMemory[0]->GetPrimitive(),
                                              getChildEdgeAt(0)->getMemory().GetPrimitive()));
@@ -104,7 +150,8 @@ void MKLDNNFullyConnectedNode::createPrimitive() {
 }
 
 bool MKLDNNFullyConnectedNode::created() const {
-    return getType() == FullyConnected;
+    return getType() == FullyConnected ||
+            getType() == FullyConnected_Activation;
 }
 
 memory::format MKLDNNFullyConnectedNode::weightsFormatForSrcFormat(memory::format sourceFormat) {
@@ -164,16 +211,74 @@ const std::vector<impl_desc_type>& MKLDNNFullyConnectedNode::getPrimitivesPriori
     return implPriorities;
 }
 
+std::shared_ptr<mkldnn::primitive_attr> MKLDNNFullyConnectedNode::initPrimitiveAttr() const {
+    auto attr = std::make_shared<mkldnn::primitive_attr>(mkldnn::primitive_attr());
+    bool scaled = false;
+    if (wScale != nullptr) {
+       float* wScaleData = static_cast<float*>(wScale->buffer());
+
+       std::vector<float> oScaleDataVector;
+       if (getCnnLayer()->precision == Precision::I8 && getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) {
+           float *oScaleData = static_cast<float *>(oScale->buffer());
+
+           for (size_t c = 0; c < wScale->size(); c++) {
+               oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]);
+           }
+       } else {
+           for (size_t c = 0; c < wScale->size(); c++) {
+               oScaleDataVector.push_back(wScaleData[c]);
+           }
+       }
+
+       attr->set_int_output_round_mode(mkldnn::round_nearest);
+       attr->set_output_scales(1 << 1 /*through C dim*/, oScaleDataVector);
+    }
+    mkldnn::post_ops ops;
+    for (auto &node : fusedWith) {
+        auto* activationNode = dynamic_cast<MKLDNNActivationNode *>(node.get());
+        if (activationNode) {
+            ops.append_eltwise(1.0, activationNode->getAlgorithm(), activationNode->getAlpha(),
+                               activationNode->getBeta());
+        }
+        attr->set_post_ops(ops);
+    }
+    return attr;
+}
+
 void MKLDNNFullyConnectedNode::createDescriptor(const std::vector<InferenceEngine::TensorDesc> &inputDesc,
                                                 const std::vector<InferenceEngine::TensorDesc> &outputDesc) {
-    MKLDNNMemoryDesc in_candidate(inputDesc[0]);
-    MKLDNNMemoryDesc out_candidate(outputDesc[0]);
+    TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0];
+    mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
+    mkldnn::memory::data_type bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision());
+
+    Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second;
+
+    if (weights->precision() == Precision::I8) {
+        wdt = memory::s8;
+        bdt = memory::s32;
+
+        Precision outPrec;
+        if (getCnnLayer()->outData[0]->getPrecision() == Precision::FP32) {
+            outPrec = Precision::FP32;
+        } else {
+            // define precision accordninly normalizer
+            // TODO(amalyshe) do we need to have separate flow for last in int8 chain or not?
+            outPrec = outDesc.getPrecision();
+        }
+
+        inDesc = TensorDesc(inDesc.getPrecision() , inputDesc[0].getDims(), inputDesc[0].getBlockingDesc());
+        outDesc = TensorDesc(outPrec, outputDesc[0].getDims(), Layout::NC/*, outputDesc[0].getBlockingDesc()*/);
+    }
+
+    MKLDNNMemoryDesc in_candidate(inDesc);
+    MKLDNNMemoryDesc out_candidate(outDesc);
+
     memory::format weights_fmt = weightsFormatForSrcFormat(in_candidate.getFormat());
 
-    MKLDNNMemoryDesc wgh_candidate(MKLDNNDims(weightsDims), in_candidate.getDataType(), weights_fmt);
-    MKLDNNMemoryDesc bias_candidate(MKLDNNDims(biasesDims), in_candidate.getDataType(), memory::any);
+    MKLDNNMemoryDesc wgh_candidate(MKLDNNDims(weightsDims), wdt, weights_fmt);
 
     if (internalBlobs.size() > 1) {
+        MKLDNNMemoryDesc bias_candidate(MKLDNNDims(biasesDims), bdt, memory::any);
         MKLDNNDescriptor desc(std::shared_ptr<inner_product_forward::desc>(
                 new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate,
                                                 bias_candidate, out_candidate)));
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h
index 73c06f7ce..3e6c5fbdd 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -28,11 +28,16 @@ public:
     void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
                           const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
 
+protected:
+    std::shared_ptr<mkldnn::primitive_attr> initPrimitiveAttr() const override;
+
 private:
     static Register<MKLDNNFullyConnectedNode> reg;
     InferenceEngine::SizeVector weightsDims;
     InferenceEngine::SizeVector biasesDims;
     mkldnn::memory::format weightsFormatForSrcFormat(mkldnn::memory::format sourceFormat);
+
+    InferenceEngine::Blob::Ptr wScale, oScale;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp
index 2874d9dfe..2ff862f49 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h
index da171a0da..94c4e15f2 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp
index b31b491e1..bc5d6e574 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h
index 7bdd4a0f3..71f86f0db 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
index 9b42bee6b..69ab33652 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
index 99b4c8657..9640e5048 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
index 4b1192b85..0675c3235 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h
index 9d85dabd3..52de049ff 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
index a37a2530b..09cb566a0 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp
index ebc67748f..cca5fb301 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp
index c23ce6ee5..9a25c0fcd 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -299,7 +299,7 @@ static void permute_to_034152(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPt
     }
 }
 
-std::map<InferenceEngine::SizeVector, MKLDNNPermuteNode::PermuteImpl> MKLDNNPermuteNode::OptimizedCases = {
+std::multimap<InferenceEngine::SizeVector, MKLDNNPermuteNode::PermuteImpl> MKLDNNPermuteNode::OptimizedCases = {
         {{0, 2, 3, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_0231, [](MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) {
             return true;
         })},  // NCHW -> NHWC case
@@ -329,26 +329,28 @@ void MKLDNNPermuteNode::execute(mkldnn::stream strm) {
     auto src_data = reinterpret_cast<const float *>(srcMemPtr->GetData());
     auto dst_data = reinterpret_cast<float *>(dstMemPtr->GetData());
 
-    auto perm = OptimizedCases.find(order);
-    if (perm != OptimizedCases.end() && perm->second.isValidParams(srcMemPtr, dstMemPtr)) {
-        perm->second.execute(batchToProcess(), srcMemPtr, dstMemPtr);
-    } else {
-        auto srcBlob = getParentEdgeAt(0)->getBlob();
-        TensorDesc srcDesc = srcBlob->getTensorDesc();
-
-        SizeVector& dims = srcDesc.getDims();
-        InferenceEngine::SizeVector orderedDims;
-        for (auto ord : order) {
-            orderedDims.push_back(dims[ord]);
+    for (const auto &impl : OptimizedCases) {
+        if (impl.first == order && impl.second.isValidParams(srcMemPtr, dstMemPtr)) {
+            impl.second.execute(batchToProcess(), srcMemPtr, dstMemPtr);
+            return;
         }
-        TensorDesc dstDesc(InferenceEngine::Precision::FP32, dims, {orderedDims, order});
+    }
 
-        int dataSize = srcBlob->size() / srcDesc.getDims()[0] * batchToProcess();
+    auto srcBlob = getParentEdgeAt(0)->getBlob();
+    TensorDesc srcDesc = srcBlob->getTensorDesc();
 
-        parallel_for(dataSize, [&](int i) {
-            dst_data[dstDesc.offset(i)] = src_data[srcDesc.offset(i)];
-        });
+    SizeVector& dims = srcDesc.getDims();
+    InferenceEngine::SizeVector orderedDims;
+    for (auto ord : order) {
+        orderedDims.push_back(dims[ord]);
     }
+    TensorDesc dstDesc(InferenceEngine::Precision::FP32, dims, {orderedDims, order});
+
+    int dataSize = srcBlob->size() / srcDesc.getDims()[0] * batchToProcess();
+
+    parallel_for(dataSize, [&](int i) {
+        dst_data[dstDesc.offset(i)] = src_data[srcDesc.offset(i)];
+    });
 }
 
 bool MKLDNNPermuteNode::created() const {
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
index 9c0ce0d49..cad6f908e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -40,7 +40,7 @@ private:
         isApplicable isValidParams;
     };
 
-    static std::map<InferenceEngine::SizeVector, PermuteImpl> OptimizedCases;
+    static std::multimap<InferenceEngine::SizeVector, PermuteImpl> OptimizedCases;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
index 82e3eac50..e501bbac3 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
index e5309f494..cee6404e9 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -30,10 +30,10 @@ private:
     static Register<MKLDNNPoolingNode> reg;
     InferenceEngine::PoolingLayer::PoolType type;
     bool exclude_pad;
-    std::vector<int> stride;
-    std::vector<int> paddingL;
-    std::vector<int> paddingR;
-    std::vector<int> kernel;
+    std::vector<ptrdiff_t> stride;
+    std::vector<ptrdiff_t> paddingL;
+    std::vector<ptrdiff_t> paddingR;
+    std::vector<ptrdiff_t> kernel;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_power_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_power_node.cpp
index 01ae0e6fd..974ededeb 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_power_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_power_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -89,7 +89,7 @@ void MKLDNNPowerNode::createPrimitive() {
 void MKLDNNPowerNode::execute(mkldnn::stream strm) {
     auto& srcMemory = getParentEdgeAt(0)->getMemory();
     auto& dstMemory = getChildEdgeAt(0)->getMemory();
-    const int data_size = srcMemory.GetSize() / sizeof(float) / srcMemory.GetDims()[0] * batchToProcess();
+    const size_t data_size = srcMemory.GetSize() / sizeof(float) / srcMemory.GetDims()[0] * batchToProcess();
 
     const auto *src_ptr = reinterpret_cast<const float*>(srcMemory.GetData()) +
             srcMemory.GetDescriptor().data.layout_desc.blocking.offset_padding;
@@ -97,11 +97,11 @@ void MKLDNNPowerNode::execute(mkldnn::stream strm) {
             dstMemory.GetDescriptor().data.layout_desc.blocking.offset_padding;
 
     if (power == 1.0f) {
-        parallel_for(data_size, [&](int i) {
+        parallel_for(data_size, [&](size_t i) {
             dst_ptr[i] = src_ptr[i] * scale + shift;
         });
     } else {
-        parallel_for(data_size, [&](int i) {
+        parallel_for(data_size, [&](size_t i) {
             dst_ptr[i] = pow(src_ptr[i] * scale + shift, power);
         });
     }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_power_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_power_node.h
index a6fce5cbd..0bd33d218 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_power_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_power_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
new file mode 100644
index 000000000..85e006797
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp
@@ -0,0 +1,229 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "mkldnn_quantize_node.h"
+#include "desc_iterator.hpp"
+#include <ie_layers.h>
+#include <string>
+#include <vector>
+#include <mkldnn_types.h>
+#include <mkldnn_extension_utils.h>
+#include <ie_memcpy.h>
+#include "details/caseless.hpp"
+
+using namespace mkldnn;
+using namespace MKLDNNPlugin;
+using namespace InferenceEngine;
+using namespace InferenceEngine::details;
+
+MKLDNNQuantizeNode::MKLDNNQuantizeNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng) : MKLDNNNode(layer, eng) {}
+
+void MKLDNNQuantizeNode::getSupportedDescriptors() {
+    InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
+    if (precision != InferenceEngine::Precision::FP32)
+        THROW_IE_EXCEPTION << "Quantize layer " << getName() << " supports only FP32 precision";
+
+    auto* quantizeLayer = dynamic_cast<QuantizeLayer*>(getCnnLayer().get());
+    if (quantizeLayer == nullptr)
+        THROW_IE_EXCEPTION << "Cannot convert Quantize layer " << getName();
+
+    levels = quantizeLayer->levels;
+    if (levels <= 1)
+        THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only parameter levels > 1";
+
+    if (getParentEdges().size() != 5)
+        THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
+    if (getChildEdges().empty())
+        THROW_IE_EXCEPTION << "Incorrect number of output edges for layer " << getName();
+
+    if (getParentEdgeAt(0)->getDims().ndims() != 4) {
+        THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only 4D input at edge 0";
+    }
+
+    for (int i = 1; i < 5; i++) {
+        if (getParentEdgeAt(i)->getDims().ndims() != 1 && getParentEdgeAt(i)->getDims().ndims() != 4) {
+            THROW_IE_EXCEPTION << "Quantize layer " << getName() << "supports only 1D or 4D inputs at edge " << i;
+        }
+    }
+
+    canStorePacked = getChildEdges().size() == 1 && getChildEdgeAt(0)->getChild()->getType() == BinaryConvolution;
+
+    if (canStorePacked) {
+        mkldnn::memory::data_type idt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
+        mkldnn::memory::data_type ddt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::BIN);
+        mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
+
+        MKLDNNMemoryDesc in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), idt, memory::nhwc);
+        MKLDNNMemoryDesc out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), ddt, memory::nhwc);
+
+        InferenceEngine::SizeVector weightDims;
+        weightDims.push_back(getParentEdgeAt(0)->getDims()[1]);
+        MKLDNNDims blocked_weightDims(weightDims);
+        MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::x};
+
+
+        std::shared_ptr<mkldnn::binarization_forward::desc> bin_conv_desc;
+        bin_conv_desc.reset(new binarization_forward::desc(prop_kind::forward_scoring, algorithm::binarization_depthwise,
+                                                           in_candidate, wgh_candidate, out_candidate));
+
+        descs.emplace_back(bin_conv_desc);
+
+        InferenceEngine::SizeVector dims;
+        dims.push_back(getParentEdgeAt(0)->getDims()[1]);
+
+        auto InputLowBlob = dynamic_cast<TBlob<float>*>(getParentEdgeAt(1)->getParent()->getCnnLayer()->blobs["custom"].get());
+
+        auto inputLowData = InputLowBlob->buffer().as<float*>();
+        int inputLowAxis = getParentEdgeAt(1)->getDims().ndims() == 1 ? 0 : 1;
+        bool isInputLowBroadcasted = getParentEdgeAt(1)->getDims()[inputLowAxis] != dims[0];
+
+        for (int i = 0; i < dims[0]; i++) {
+            binarizationThresholds.push_back(inputLowData[isInputLowBroadcasted ? 0 : i]);
+        }
+    }
+}
+
+void MKLDNNQuantizeNode::initSupportedPrimitiveDescriptors() {
+    if (!supportedPrimitiveDescriptors.empty())
+        return;
+
+    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
+    auto outputDataType = canStorePacked ? MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::BIN)
+                                         : MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32);
+
+
+
+    auto same = [&] (memory::format fmt, impl_desc_type impl) -> PrimitiveDescInfo {
+        InferenceEngine::LayerConfig config;
+        config.dynBatchSupport = true;
+        for (size_t i = 0; i < getParentEdges().size(); i++) {
+            InferenceEngine::DataConfig dataConfig;
+            dataConfig.inPlace = -1;
+            dataConfig.constant = false;
+
+            if (i == 0) {
+                dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType, fmt);
+            } else {
+                dataConfig.desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType,
+                        getParentEdgeAt(i)->getDims().ndims() == 1 ? memory::x : memory::nchw);
+            }
+            config.inConfs.push_back(dataConfig);
+        }
+
+        InferenceEngine::DataConfig dataConfig;
+            dataConfig.inPlace = -1;
+            dataConfig.constant = false;
+            dataConfig.desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt);
+            config.outConfs.push_back(dataConfig);
+        return {config, impl};
+    };
+
+    supportedPrimitiveDescriptors.push_back(same(memory::nhwc, ref_any));
+
+    if (canStorePacked) {
+        primitive_desc_iterator itpd = descs[0].createPrimitiveDescriptorIterator(getEngine());
+        do {
+            impl_desc_type impl_type = parse_impl_name(itpd.get_impl_info_str());
+            supportedPrimitiveDescriptors.push_back(same(memory::nhwc, impl_type));
+        } while (itpd.next());
+    }
+}
+
+void MKLDNNQuantizeNode::createPrimitive() {
+    if (prim)
+        return;
+
+    auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr();
+    auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr();
+    if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr())
+        THROW_IE_EXCEPTION << "Destination memory isn't allocated.";
+    if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr())
+        THROW_IE_EXCEPTION << "Input memory isn't allocated.";
+    if (getSelectedPrimitiveDescriptor() == nullptr)
+        THROW_IE_EXCEPTION << "Preferable primitive descriptor isn't set.";
+
+    if (canStorePacked) {
+        auto prim_desc = createPrimitiveDescriptor<binarization_forward::primitive_desc, binarization_forward::desc>();
+
+        MKLDNNMemoryDesc binarizationDataDesc = {{getParentEdgeAt(0)->getDims()[1]}, memory::f32, memory::x};
+        auto binarizationDataMem = std::make_shared<MKLDNNMemory>(getEngine());
+        binarizationDataMem->Create(binarizationDataDesc, &binarizationThresholds[0]);
+        internalBlobMemory.push_back(binarizationDataMem);
+
+        prim.reset(new binarization_forward(prim_desc, getParentEdgeAt(0)->getMemory().GetPrimitive(),
+                                            internalBlobMemory[0]->GetPrimitive(),
+                                            getChildEdgeAt(0)->getMemory().GetPrimitive()));
+    }
+}
+
+void MKLDNNQuantizeNode::execute(mkldnn::stream strm) {
+    if (prim) {
+        MKLDNNNode::execute(strm);
+    } else {
+        auto &srcMemory = getParentEdgeAt(0)->getMemoryPtr();
+        auto &inputLowMemory = getParentEdgeAt(1)->getMemoryPtr();
+        auto &inputHighMemory = getParentEdgeAt(2)->getMemoryPtr();
+        auto &outputLowMemory = getParentEdgeAt(3)->getMemoryPtr();
+        auto &outputHighMemory = getParentEdgeAt(4)->getMemoryPtr();
+        auto &dstMemory = getChildEdgeAt(0)->getMemoryPtr();
+
+        auto srcData = reinterpret_cast<const float *>(srcMemory->GetData());
+        auto inputLowData = reinterpret_cast<const float *>(inputLowMemory->GetData());
+        auto inputHighData = reinterpret_cast<const float *>(inputHighMemory->GetData());
+        auto outputLowData = reinterpret_cast<const float *>(outputLowMemory->GetData());
+        auto outputHighData = reinterpret_cast<const float *>(outputHighMemory->GetData());
+        auto dstData = reinterpret_cast<float *>(dstMemory->GetData());
+
+        srcData += srcMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
+        inputLowData += inputLowMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
+        inputHighData += inputHighMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
+        outputLowData += outputLowMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
+        outputHighData += outputHighMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
+        dstData += dstMemory->GetDescriptor().data.layout_desc.blocking.offset_padding;
+
+        size_t N = static_cast<size_t>(batchToProcess());
+        size_t C = static_cast<size_t>(srcMemory->GetDims()[1]);
+        size_t H = static_cast<size_t>(srcMemory->GetDims()[2]);
+        size_t W = static_cast<size_t>(srcMemory->GetDims()[3]);
+
+        int inputLowAxis = inputLowMemory->GetDims().size() == 1 ? 0 : 1;
+        bool isInputLowBroadcasted = inputLowMemory->GetDims()[inputLowAxis] != C;
+
+        int inputHighAxis = inputHighMemory->GetDims().size() == 1 ? 0 : 1;
+        bool isInputHighBroadcasted = inputHighMemory->GetDims()[inputHighAxis] != C;
+
+        int outputLowAxis = outputLowMemory->GetDims().size() == 1 ? 0 : 1;
+        bool isOutputLowBroadcasted = outputLowMemory->GetDims()[outputLowAxis] != C;
+
+        int outputHighAxis = outputHighMemory->GetDims().size() == 1 ? 0 : 1;
+        bool isOutputHighBroadcasted = outputHighMemory->GetDims()[outputHighAxis] != C;
+
+        for (int n = 0; n < N; n++) {
+            for (int h = 0; h < H; h++) {
+                for (int w = 0; w < W; w++) {
+                    for (int c = 0; c < C; c++) {
+                        size_t idx = n * H * W * C + h * W * C + w * C + c;
+
+                        float inputLow = inputLowData[isInputLowBroadcasted ? 0 : c];
+                        float inputHigh = inputHighData[isInputHighBroadcasted ? 0 : c];
+                        float outputLow = outputLowData[isOutputLowBroadcasted ? 0 : c];
+                        float outputHigh = outputHighData[isOutputHighBroadcasted ? 0 : c];
+
+                        if (srcData[idx] <= inputLow)
+                            dstData[idx] = outputLow;
+                        else if (srcData[idx] > inputHigh)
+                            dstData[idx] = outputHigh;
+                        else
+                            dstData[idx] = roundf((srcData[idx] - inputLow) / (inputHigh - inputLow) * (levels - 1)) /
+                                           (levels - 1) * (outputHigh - outputLow) + outputLow;
+                    }
+                }
+            }
+        }
+    }
+}
+
+bool MKLDNNQuantizeNode::created() const {
+    return getType() == Quantize;
+}
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h
new file mode 100644
index 000000000..644926c17
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ie_common.h>
+#include <mkldnn_node.h>
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace MKLDNNPlugin {
+
+class MKLDNNQuantizeNode : public MKLDNNNode {
+public:
+    MKLDNNQuantizeNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng);
+    ~MKLDNNQuantizeNode() override = default;
+
+    void initSupportedPrimitiveDescriptors() override;
+    void getSupportedDescriptors() override;
+    void createPrimitive() override;
+    bool created() const override;
+    void execute(mkldnn::stream strm) override;
+
+
+private:
+    static Register<MKLDNNQuantizeNode> reg;
+
+    bool canStorePacked;
+    int levels;
+
+    std::vector<float> binarizationThresholds;
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
index 345b21536..103f49d53 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
index 7a228ecec..32c373606 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
index d959aa5f9..4d2c34b26 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h
index bb30099c9..b172ef83b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
index ba3228543..af117631a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
@@ -1,11 +1,10 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #include "mkldnn_rnn.h"
 #include "mkldnn_extension_utils.h"
 #include "desc_iterator.hpp"
-#include <ie_layers_prv.h>
 
 #include <string>
 #include <utility>
@@ -22,19 +21,40 @@ inline bool one_of(T val, P item, Args... item_others) {
     return val == item || one_of(val, item_others...);
 }
 
-rnn_direction ie2mkl(RNNLayer::Direction &direction) {
-    return direction == RNNLayer::RNN_FWD ? unidirectional_left2right
-         : direction == RNNLayer::RNN_BWD ? unidirectional_right2left
-         : direction == RNNLayer::RNN_BDR ? bidirectional_concat
-                                          : unidirectional;
+using _RNN = RNNSequenceLayer;  // alias
+
+static rnn_direction ie2mkl(_RNN::Direction &direction) {
+    return direction == _RNN::FWD ? unidirectional_left2right
+         : direction == _RNN::BWD ? unidirectional_right2left
+         : direction == _RNN::BDR ? bidirectional_concat
+         : unidirectional;
+}
+
+static algorithm ie2mkl(std::string act_type) {
+    return act_type == "sigmoid" ? eltwise_logistic
+         : act_type == "tanh"    ? eltwise_tanh
+         : act_type == "relu"    ? eltwise_relu
+         : algorithm_undef;
+}
+
+static algorithm ie2mkl(RNNCellBase::CellType cell_type) {
+    switch (cell_type) {
+        case RNNCellBase::LSTM: return vanilla_lstm;
+        case RNNCellBase::GRU:  return vanilla_gru;
+        case RNNCellBase::GRU_LBR:  return gru_linear_before_reset;
+        case RNNCellBase::RNN:  return vanilla_rnn;
+        default:
+            THROW_IE_EXCEPTION << "Unsoupported cell type";
+            return algorithm_undef;
+    }
 }
 
 MKLDNNRNN::MKLDNNRNN(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng) : MKLDNNNode(layer, eng) {
-    is_cell = layer->type == "LSTMCell";
+    is_cell = one_of(layer->type, "LSTMCell", "GRUCell", "RNNCell");
 }
 
 bool MKLDNNRNN::created() const {
-    return getType() == (is_cell ? LSTMCell : RNN);
+    return getType() == (is_cell ? RNNCell : RNNSeq);
 }
 
 void MKLDNNRNN::getSupportedDescriptors() {
@@ -46,48 +66,59 @@ void MKLDNNRNN::getSupportedDescriptors() {
 
 void MKLDNNRNN::fillCellDesc() {
     if (!descs.empty()) return;
-    auto cellLayer = std::dynamic_pointer_cast<InferenceEngine::LSTMCell>(getCnnLayer());
+    auto cellLayer = std::dynamic_pointer_cast<RNNCellBase>(getCnnLayer());
 
     if (!cellLayer)
-        THROW_IE_EXCEPTION << "Wrong RNN layer representation. Cannot cast to RNNLayer.";
+        THROW_IE_EXCEPTION << "No original layer for RNNCell.";
+
+    algorithm cell_type = ie2mkl(cellLayer->cellType);
+    algorithm cell_act = ie2mkl(cellLayer->activations[0]);  // Works only for RNN with one gate
+
+    cell_desc = {cell_type, cell_act};
+    if (cellLayer->clip != 0.0f)
+        cell_desc.set_clipping(cellLayer->clip);
 
     auto &ins = cellLayer->insData;
     auto &outs = cellLayer->outData;
 
-    if (ins.size() != 3)
+    if (!one_of(ins.size(), 3, 2))
         THROW_IE_EXCEPTION << "Incorrect number of input ports for layer " << getName();
-    if (outs.size() != 2)
+    if (!one_of(outs.size(), 2, 1))
         THROW_IE_EXCEPTION << "Incorrect number of output ports for layer " << getName();
 
     auto in_data_dims = getParentEdgeAt(0)->getDims();
     auto in_h_state_dims = getParentEdgeAt(1)->getDims();
-    auto in_c_state_dims = getParentEdgeAt(2)->getDims();
-
     auto out_h_state_dims = getChildEdgeAt(0)->getDims();
-    auto out_c_state_dims = getChildEdgeAt(1)->getDims();
 
-    if (in_data_dims.ndims() != 2
-        || in_h_state_dims.ndims() != 2
-        || in_c_state_dims.ndims() != 2
-        || out_h_state_dims.ndims() != 2
-        || out_c_state_dims.ndims() != 2)
+    if (in_data_dims.ndims() != 2 || in_h_state_dims.ndims() != 2)
         THROW_IE_EXCEPTION << "Incorrect shape of input/output ports for layer " << getName();
 
+    G = cell_desc.get_gates_count();
+    S = cell_desc.get_state_count();
     T = 1;
     N  = in_data_dims[0];
     DC = in_data_dims[1];
     SC = in_h_state_dims[1];
 
+    Gb = (cell_type != gru_linear_before_reset) ? G : G + 1;
+
     // Expected shapes
     MKLDNNDims D_shape {N, DC}, S_shape {N, SC};
 
     if (in_data_dims != D_shape
         || in_h_state_dims != S_shape
-        || in_c_state_dims != S_shape
-        || out_h_state_dims != S_shape
-        || out_c_state_dims != S_shape)
+        || out_h_state_dims != S_shape)
         THROW_IE_EXCEPTION << "Incorrect shape of input/output ports for layer " << getName();
 
+    if (S == 2) {
+        auto in_c_state_dims = getParentEdgeAt(2)->getDims();
+        auto out_c_state_dims = getChildEdgeAt(1)->getDims();
+
+        if (in_c_state_dims != S_shape
+            || out_c_state_dims != S_shape)
+            THROW_IE_EXCEPTION << "Incorrect shape of input/output ports for layer " << getName();
+    }
+
     auto blobs = cellLayer->blobs;
     Blob::Ptr weights, bias;
     if (blobs.find("weights") != blobs.end()) weights = blobs["weights"];
@@ -99,7 +130,7 @@ void MKLDNNRNN::fillCellDesc() {
     if (weights->size() != G*SC*(SC+DC))
         THROW_IE_EXCEPTION << "RNN Layer. Weights size is not correct. Expected size:" << G*SC*(SC+DC);
 
-    if (bias && bias->size() != G*SC)
+    if (bias && bias->size() != Gb*SC)
         THROW_IE_EXCEPTION << "RNN Layer. Biases size is not correct. Expected size:" << G*SC;
 
     // Shapes and Attributes are correct. Can start internal stuff initialization.
@@ -114,44 +145,55 @@ void MKLDNNRNN::fillCellDesc() {
     w_state_d  = {{L, D, SC, G, SC}, memory::f32, memory::ldigo};
 
     if (bias)
-        w_bias_d = {{L, D, G, SC}, memory::f32, memory::ldgo};
+        w_bias_d = {{L, D, Gb, SC}, memory::f32, memory::ldgo};
 
-    std::vector<TensorDesc> in_candidate;
+    std::vector<TensorDesc> in_candidate, out_candidate;
     in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, memory::f32, memory::nc});
     in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::f32, memory::nc});
-    in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::f32, memory::nc});
-
-    std::vector<TensorDesc> out_candidate;
-    out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::f32, memory::nc});
     out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::f32, memory::nc});
 
+    if (S == 2) {
+        in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::f32, memory::nc});
+        out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::f32, memory::nc});
+    }
+
     createDescriptor(in_candidate, out_candidate);
 }
 
 void MKLDNNRNN::fillSeqDesc() {
     if (!descs.empty()) return;
-    auto rnnLayer = std::dynamic_pointer_cast<RNNLayer>(getCnnLayer());
+    auto rnnLayer = std::dynamic_pointer_cast<RNNSequenceLayer>(getCnnLayer());
 
     if (!rnnLayer)
-        THROW_IE_EXCEPTION << "Wrong RNN layer representation. Cannot cast to RNNLayer.";
+        THROW_IE_EXCEPTION << "Wrong RNN layer representation. Cannot cast to RNNSequenceLayer.";
+
+    if (!one_of(rnnLayer->cellType, _RNN::LSTM, _RNN::GRU, _RNN::GRU_LBR, _RNN::RNN))
+        THROW_IE_EXCEPTION << "RNN layer supports only LSTM/GRU/RNN cell";
 
-    if (!one_of(rnnLayer->cellType, "LSTM"))
-        THROW_IE_EXCEPTION << "RNN layer supports only LSTM like cell";
+    algorithm cell_type = ie2mkl(rnnLayer->cellType);
+    algorithm cell_act = algorithm_undef;
+    if (!rnnLayer->activations.empty())
+        cell_act = ie2mkl(rnnLayer->activations[0]);  // Works only for RNN with one gate
+
+    cell_desc = {cell_type, cell_act};
+
+    if (rnnLayer->clip != 0.0f)
+        cell_desc.set_clipping(rnnLayer->clip);
 
     if (!one_of(rnnLayer->axis, 0, 1))
         THROW_IE_EXCEPTION << "RNN layer supports only sequence axis 0 or 1";
     nativeOrder = rnnLayer->axis == 0;
 
-    if (!one_of(rnnLayer->direction, RNNLayer::RNN_FWD, RNNLayer::RNN_BWD))
+    if (!one_of(rnnLayer->direction, _RNN::FWD, _RNN::BWD))
         THROW_IE_EXCEPTION << "RNN layer supports only unidirectional RNN layer";
     direction = ie2mkl(rnnLayer->direction);
 
     auto &ins = rnnLayer->insData;
     auto &outs = rnnLayer->outData;
 
-    if (!one_of(ins.size(), 3, 1))
+    if (!one_of(ins.size(), 3, 2, 1))
         THROW_IE_EXCEPTION << "Incorrect number of input ports for layer " << getName();
-    if (!one_of(outs.size(), 3, 1))
+    if (!one_of(outs.size(), 3, 2, 1))
         THROW_IE_EXCEPTION << "Incorrect number of output ports for layer " << getName();
 
     auto in_data_dims = getParentEdgeAt(0)->getDims();
@@ -165,32 +207,32 @@ void MKLDNNRNN::fillSeqDesc() {
         std::swap(out_data_dims[0], out_data_dims[1]);
     }
 
+    G = cell_desc.get_gates_count();
+    S = cell_desc.get_state_count();
     T = in_data_dims[0];
     N = in_data_dims[1];
     DC = in_data_dims[2];
     SC = out_data_dims[2];
 
+    Gb = (cell_type != gru_linear_before_reset) ? G : G + 1;
+
     MKLDNNDims ID_shape {T, N, DC}, OD_shape {T, N, SC}, S_shape {N, SC};
 
     if (out_data_dims != OD_shape)
         THROW_IE_EXCEPTION << "Incorrect shape of input/output ports for layer " << getName();
 
-    if (ins.size() == 3) {
-        auto state_dims1 = getParentEdgeAt(1)->getDims();
-        auto stats_dims2 = getParentEdgeAt(2)->getDims();
-
-        if (state_dims1 != S_shape || stats_dims2 != S_shape)
-            THROW_IE_EXCEPTION << "Incorrect shape of state ports for layer " << getName();
+    if (ins.size() > 1) {
+        for (int i = 1; i < ins.size(); i++)
+            if (getParentEdgeAt(i)->getDims() != S_shape)
+                THROW_IE_EXCEPTION << "Incorrect shape of state ports for layer " << getName();
 
         in_state_d = {{L, D, S, N, SC}, memory::f32, memory::ldsnc};
     }
 
-    if (outs.size() == 3) {
-        auto state_dims1 = getChildEdgeAt(1)->getDims();
-        auto stats_dims2 = getChildEdgeAt(2)->getDims();
-
-        if (state_dims1 != S_shape || stats_dims2 != S_shape)
-            THROW_IE_EXCEPTION << "Incorrect shape of state ports for layer " << getName();
+    if (outs.size() > 1) {
+        for (int i = 1; i < outs.size(); i++)
+            if (getChildEdgeAt(i)->getDims() != S_shape)
+                THROW_IE_EXCEPTION << "Incorrect shape of state ports for layer " << getName();
 
         out_state_d = {{L, D, S, N, SC}, memory::f32, memory::ldsnc};
     }
@@ -209,11 +251,11 @@ void MKLDNNRNN::fillSeqDesc() {
     w_data_d  = {{L, D, DC, G, SC}, memory::f32, memory::ldigo};
     w_state_d = {{L, D, SC, G, SC}, memory::f32, memory::ldigo};
 
-    if (bias && bias->size() != G*SC)
+    if (bias && bias->size() != Gb*SC)
         THROW_IE_EXCEPTION << "RNN Layer. Biases size is not correct. Expected size:" << G*SC;
 
     if (bias)
-        w_bias_d = {{L, D, G, SC}, memory::f32, memory::ldgo};
+        w_bias_d = {{L, D, Gb, SC}, memory::f32, memory::ldgo};
 
     // Try to create descriptor and corresponding configuration
     in_data_d = {in_data_dims, memory::f32, memory::tnc};
@@ -225,10 +267,8 @@ void MKLDNNRNN::fillSeqDesc() {
     else
         in_candidate.push_back(MKLDNNMemoryDesc{{N, T, DC}, memory::f32, memory::ntc});
 
-    if (ins.size() == 3) {
-        in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::f32, memory::nc});
+    for (int i = 1; i < ins.size(); i++)
         in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::f32, memory::nc});
-    }
 
     std::vector<TensorDesc> out_candidate;
     if (nativeOrder)
@@ -236,10 +276,8 @@ void MKLDNNRNN::fillSeqDesc() {
     else
         out_candidate.push_back(MKLDNNMemoryDesc{{N, T, SC}, memory::f32, memory::ntc});
 
-    if (outs.size() == 3) {
-        out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::f32, memory::nc});
+    for (int i = 1; i < outs.size(); i++)
         out_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, memory::f32, memory::nc});
-    }
 
     createDescriptor(in_candidate, out_candidate);
 }
@@ -247,8 +285,7 @@ void MKLDNNRNN::fillSeqDesc() {
 void MKLDNNRNN::createDescriptor(const std::vector<TensorDesc> &inputDesc,
                                  const std::vector<TensorDesc> &outputDesc) {
     MKLDNNDescriptor desc(std::shared_ptr<rnn_forward::desc>(
-            new rnn_forward::desc(forward_scoring,
-                    {algorithm::vanilla_lstm, algorithm::eltwise_tanh },
+            new rnn_forward::desc(forward_scoring, cell_desc,
                     direction,
                     /* In Data       */ in_data_d,
                     /* In State      */ in_state_d,
@@ -305,7 +342,6 @@ void MKLDNNRNN::createPrimitive() {
 
     {
         /* Copy Weight data
-         *
          * IE format:
          *   W - [gates, out_state_size, in_data_size + in_state_size]
          *   B - [gates, out_state_size]
@@ -316,11 +352,46 @@ void MKLDNNRNN::createPrimitive() {
          *   B - [gates, out_state_size]
          *
          *   Gate order
+         *   ====== LSTM ======
          *   Caffe - IFOC, ONNX   - IOFC
          *   IE    - FICO, mkldnn - IFCO
+         *
+         *   ====== GRU ======
+         *   IE - URO, mkldnn - URO
          */
-        // FICO -> IFCO
-        const int gate_map[] = {1, 0, 2, 3};
+        const int gate_map_lstm[] = {1, 0, 2, 3};  // FICO -> IFCO
+        const int gate_map_gru[]  = {0, 1, 2, 3};
+        const int gate_map_rnn[]  = {0};
+        const int *gate_map;
+        const int gate_map_lstm_size = sizeof(gate_map_lstm) / sizeof(int);
+        const int gate_map_gru_size = sizeof(gate_map_gru) / sizeof(int);
+        const int gate_map_rnn_size = sizeof(gate_map_rnn) / sizeof(int);
+        if (cell_desc.get_cell_kind() == vanilla_lstm) {
+            gate_map = gate_map_lstm;
+            if (G > gate_map_lstm_size) {
+                THROW_IE_EXCEPTION << "G isn't equal to the size of gate_map";
+            }
+        } else if (cell_desc.get_cell_kind() == vanilla_gru) {
+            gate_map = gate_map_gru;
+            if (G > gate_map_gru_size) {
+                THROW_IE_EXCEPTION << "G isn't equal to the size of gate_map";
+            }
+        } else if (cell_desc.get_cell_kind() == gru_linear_before_reset) {
+            gate_map = gate_map_gru;
+            if (G > gate_map_gru_size) {
+                THROW_IE_EXCEPTION << "G isn't equal to the size of gate_map";
+            }
+        } else if (cell_desc.get_cell_kind() == vanilla_rnn) {
+            gate_map = gate_map_rnn;
+            if (G > gate_map_rnn_size) {
+                THROW_IE_EXCEPTION << "G isn't equal to the size of gate_map";
+            }
+        } else {
+            gate_map = gate_map_gru;
+            if (G > gate_map_gru_size) {
+                THROW_IE_EXCEPTION << "G isn't equal to the size of gate_map";
+            }
+        }
 
         auto ie_w_ptr = getCnnLayer()->blobs["weights"]->buffer().as<const float*>();
         auto w_ptr = static_cast<float*>(w_data_mem->GetData());
@@ -348,7 +419,7 @@ void MKLDNNRNN::createPrimitive() {
         if (w_bias_d) {
             auto ie_b_ptr = getCnnLayer()->blobs["biases"]->buffer().as<const float*>();
             auto b_ptr = static_cast<float*>(w_bias_mem->GetData());
-            for (int g = 0; g < G; g++) {
+            for (int g = 0; g < Gb; g++) {
                 float *l_b_ptr = b_ptr + gate_map[g]*SC;
                 for (int out_i = 0; out_i < SC; out_i++) {
                     *l_b_ptr = *ie_b_ptr;
@@ -363,53 +434,44 @@ void MKLDNNRNN::createPrimitive() {
     src_state_mem->Create(in_state_d);
     internalBlobMemory.push_back(src_state_mem);
     if (in_state_d) {
-        /* create copy/concat primitive */
-        auto src_stat_1 = getParentEdgeAt(1)->getMemory().GetPrimitive();
-        auto src_stat_2 = getParentEdgeAt(2)->getMemory().GetPrimitive();
-
-        auto low_half_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
-        low_half_state_mem->Create(
-                src_stat_1.get_primitive_desc().desc(),
-                src_state_mem->GetPrimitive().get_data_handle());
-        internalBlobMemory.push_back(low_half_state_mem);
-
-        auto high_half_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
-        high_half_state_mem->Create(
-                src_stat_2.get_primitive_desc().desc(),
-                static_cast<uint8_t*>(src_state_mem->GetPrimitive().get_data_handle()) +
-                src_stat_1.get_primitive_desc().get_size());
-        internalBlobMemory.push_back(high_half_state_mem);
-
-        exec_before.emplace_back(src_stat_1, low_half_state_mem->GetPrimitive());
-        exec_before.emplace_back(src_stat_2, high_half_state_mem->GetPrimitive());
+        int offset = 0;
+        for (int i = 0; i < S; i++) {
+            /* create copy/concat primitive */
+            auto src_stat = getParentEdgeAt(i+1)->getMemory().GetPrimitive();
+
+            auto state_mem = std::make_shared<MKLDNNMemory>(getEngine());
+            state_mem->Create(
+                    src_stat.get_primitive_desc().desc(),
+                    static_cast<uint8_t *>(src_state_mem->GetPrimitive().get_data_handle()) + offset);
+            offset += src_stat.get_primitive_desc().get_size();
+
+            internalBlobMemory.push_back(state_mem);
+
+            exec_before.emplace_back(src_stat, state_mem->GetPrimitive());
+        }
     }
 
     auto dst_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
     dst_state_mem->Create(out_state_d);
     internalBlobMemory.push_back(dst_state_mem);
     if (out_state_d) {
-        int idx_H = is_cell ? 0 : 1;
-        int idx_C = is_cell ? 1 : 2;
-        /* create copy/split primitive */
-        auto dst_stat_1 = getChildEdgeAt(idx_H)->getMemory().GetPrimitive();
-        auto dst_stat_2 = getChildEdgeAt(idx_C)->getMemory().GetPrimitive();
-
-        auto low_half_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
-        low_half_state_mem->Create(
-                dst_stat_1.get_primitive_desc().desc(),
-                dst_state_mem->GetPrimitive().get_data_handle());
-        internalBlobMemory.push_back(low_half_state_mem);
-
-        auto high_half_state_mem = std::make_shared<MKLDNNMemory>(getEngine());
-        high_half_state_mem->Create(
-                dst_stat_2.get_primitive_desc().desc(),
-                static_cast<uint8_t*>(dst_state_mem->GetPrimitive().get_data_handle()) +
-                        dst_stat_1.get_primitive_desc().get_size());
-        internalBlobMemory.push_back(high_half_state_mem);
-
-
-        if (!is_cell) exec_after.emplace_back(low_half_state_mem->GetPrimitive(),  dst_stat_1);
-        exec_after.emplace_back(high_half_state_mem->GetPrimitive(), dst_stat_2);
+        int offset = 0;
+        int idx_start = is_cell ? 0 : 1;
+        for (int i = 0; i < S; i++) {
+            /* create copy/split primitive */
+            auto dst_stat = getChildEdgeAt(idx_start + i)->getMemory().GetPrimitive();
+
+            auto state_mem = std::make_shared<MKLDNNMemory>(getEngine());
+            state_mem->Create(
+                    dst_stat.get_primitive_desc().desc(),
+                    static_cast<uint8_t *>(dst_state_mem->GetPrimitive().get_data_handle()) + offset);
+            offset += dst_stat.get_primitive_desc().get_size();
+
+            internalBlobMemory.push_back(state_mem);
+
+            if (is_cell && i == 0) continue;
+            exec_after.emplace_back(state_mem->GetPrimitive(), dst_stat);
+        }
     }
 
     auto workspace_mem = std::make_shared<MKLDNNMemory>(getEngine());
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
index 4399c306a..640459661 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -42,15 +42,19 @@ private:
     /** Direction of iteration through sequence dimension */
     mkldnn::rnn_direction direction = mkldnn::unidirectional;
 
+    /** RNN Cell desc (type/activation_alg/clip)*/
+    mkldnn::rnn_cell::desc cell_desc { mkldnn::algorithm::vanilla_lstm };
+
     // Internal attributes
-    int N = 0;   /**< Batch value */
-    int T = 0;   /**< Sequence value */
-    int DC = 0;  /**< Input data channel size */
-    int SC = 0;  /**< State channel size value */
-    const int G = 4;   /**< Gate size. 4 for LSTM */
-    const int L = 1;   /**< What is it??. Constant for mkldnn impl */
-    const int D = 1;   /**< Num of direction. 1 or 2 */
-    const int S = 2;   /**< Num of state. 2 for LSTM (hidden and sell state). */
+    ptrdiff_t N = 0;   /**< Batch value */
+    ptrdiff_t T = 0;   /**< Sequence value */
+    ptrdiff_t DC = 0;  /**< Input data channel size */
+    ptrdiff_t SC = 0;  /**< State channel size value */
+    ptrdiff_t G = 0;   /**< Gate size. LSTM - 4, GRU - 3, RNN - 1 */
+    ptrdiff_t Gb = 0;  /**< Gate size for biases. Gb = GRU_lbr ? G+1 : G */
+    ptrdiff_t S = 2;   /**< Num of state. LSTM - 2, GRU & RNN - 1 */
+    const ptrdiff_t L = 1;   /**< What is it??. Constant for mkldnn impl */
+    const ptrdiff_t D = 1;   /**< Num of direction. 1 or 2 */
 
     MKLDNNMemoryDesc in_data_d;
     MKLDNNMemoryDesc out_data_d;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
index 4088a1f7a..2843a6f01 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h
index ca2bafd4f..34333d593 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
index 752172733..949815cc1 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h
index 8e199f377..be9a54231 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
index 90cf4f401..cce726439 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -31,17 +31,6 @@ void MKLDNNSplitNode::getSupportedDescriptors() {
     axis = splitLayer->_axis;
     if (axis >= getParentEdgeAt(0)->getDims().ndims())
         THROW_IE_EXCEPTION << "Invalid value of axis parameter in split layer";
-
-    // WA. Check applicability and limitations
-    for (size_t i = 1; i < getCnnLayer()->outData.size(); i++) {
-        int num_port_connection = getCnnLayer()->outData[i]->inputTo.size();
-        // limitation. If num of edges more than num of ports,
-        // we connect it to first port. So check that all ports [1:]
-        // have only one connection.
-        if (num_port_connection > 1)
-            THROW_IE_EXCEPTION << "Unsupported topology. Split layer \"" << getCnnLayer()->name << "\" "
-                               << "has output edges more than output ports.";
-    }
 }
 
 void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
@@ -65,7 +54,7 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
     config.inConfs[0].inPlace = -1;
     config.inConfs[0].constant = false;
     config.inConfs[0].desc = MKLDNNMemoryDesc(srcDims, inputDataType, memory::format::any);
-    config.outConfs.resize(getChildEdges().size());
+    config.outConfs.resize(outDims.size());
 
     if (srcDims.ndims() < 2)
         THROW_IE_EXCEPTION << "Split " << getName() << " isn't supported 1d blobs";
@@ -114,11 +103,11 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
     }
 
     config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {srcDims.ToSizeVector(), order, offset, offsets, strides});
-    for (size_t i = 0; i < getChildEdges().size(); i++) {
-        auto outDims = getChildEdgeAt(i)->getDims();
+    for (size_t i = 0; i < outDims.size(); i++) {
+        auto dims = outDims[i].ToSizeVector();
         config.outConfs[i].inPlace = 0;
-        config.outConfs[i].desc = TensorDesc(Precision::FP32, outDims.ToSizeVector(),
-                                            {outDims.ToSizeVector(), order, offset, offsets, strides});
+        config.outConfs[i].desc = TensorDesc(Precision::FP32, dims,
+                                            {dims, order, offset, offsets, strides});
     }
     supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
 
@@ -149,9 +138,9 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
         config.inConfs[0].desc = TensorDesc(Precision::FP32, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides});
 
         bool canInplace = true;
-        for (size_t i = 0; i < getChildEdges().size(); i++) {
-            auto outDims = getChildEdgeAt(i)->getDims().ToSizeVector();
-            blkDims = outDims;
+        for (size_t i = 0; i < outDims.size(); i++) {
+            auto dims = outDims[i].ToSizeVector();
+            blkDims = dims;
 
             if (blkDims[1] % sizeS) {
                 canInplace = false;
@@ -159,7 +148,7 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() {
             }
             blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu);
             blkDims.push_back(sizeS);
-            config.outConfs[i].desc = TensorDesc(Precision::FP32, outDims, {blkDims, order, offset, offsets, strides});
+            config.outConfs[i].desc = TensorDesc(Precision::FP32, dims, {blkDims, order, offset, offsets, strides});
         }
         if (canInplace)
             supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
@@ -408,37 +397,19 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() {
     const auto& cnnLayer = getCnnLayer();
     if (!cnnLayer)
         THROW_IE_EXCEPTION << "Cannot create Split layer " << getName() << " without CNNLayer!";
-    if (config.outConfs.size() != getChildEdges().size())
+    if (config.outConfs.size() != outDims.size())
         THROW_IE_EXCEPTION << "Invalid config for Split layer " << getName();
     size_t offset = 0;
     for (size_t i = 0; i < cnnLayer->outData.size(); i++) {
-        size_t confNum(0);
-        bool found = false;
-        for (size_t j = i; j < getChildEdges().size(); j++) {
-            if (cnnLayer->outData[i]->inputTo.find(getChildEdgeAt(j)->getChild()->getName()) == cnnLayer->outData[i]->inputTo.end())
-                continue;
-            confNum = j;
-            config.outConfs[j].desc = InferenceEngine::TensorDesc(config.outConfs[j].desc.getPrecision(),
-                                                                  config.outConfs[j].desc.getDims(), {
-                                                                          config.outConfs[j].desc.getBlockingDesc().getBlockDims(),
-                                                                          config.outConfs[j].desc.getBlockingDesc().getOrder(),
-                                                                          config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset,
-                                                                          config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(),
-                                                                          config.inConfs[0].desc.getBlockingDesc().getStrides()
-                                                                  });
-            found = true;
-        }
-        if (!found) {
-            confNum = i;
-            config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(),
-                                                                  config.outConfs[i].desc.getDims(), {
-                                                                          config.outConfs[i].desc.getBlockingDesc().getBlockDims(),
-                                                                          config.outConfs[i].desc.getBlockingDesc().getOrder(),
-                                                                          config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset,
-                                                                          config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(),
-                                                                          config.inConfs[0].desc.getBlockingDesc().getStrides()
-                                                                  });
-        }
+        size_t confNum = i;
+        config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(),
+                                                              config.outConfs[i].desc.getDims(), {
+                                                                      config.outConfs[i].desc.getBlockingDesc().getBlockDims(),
+                                                                      config.outConfs[i].desc.getBlockingDesc().getOrder(),
+                                                                      config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset,
+                                                                      config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(),
+                                                                      config.inConfs[0].desc.getBlockingDesc().getStrides()
+                                                              });
         size_t axisSize = 1;
         for (size_t j = axis; j < config.outConfs[confNum].desc.getBlockingDesc().getBlockDims().size(); j++) {
             axisSize *= config.outConfs[confNum].desc.getBlockingDesc().getBlockDims()[j];
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h
index 905f8069c..3fca021e8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
index 122671681..b7cdd403b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h
index d6a75941f..572a98aec 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/perf_count.h b/inference-engine/src/mkldnn_plugin/perf_count.h
index 3770a2435..988054d9d 100644
--- a/inference-engine/src/mkldnn_plugin/perf_count.h
+++ b/inference-engine/src/mkldnn_plugin/perf_count.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
index 24d2931af..7f61fcea4 100644
--- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
+++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include "blob_dump.h"
diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
index 4130d53a7..1390c18f1 100644
--- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
+++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #pragma once
diff --git a/inference-engine/tests/CMakeLists.txt b/inference-engine/tests/CMakeLists.txt
index 4fa0b4477..2918415f9 100644
--- a/inference-engine/tests/CMakeLists.txt
+++ b/inference-engine/tests/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -11,35 +11,18 @@ set (CMAKE_CXX_STANDARD_REQUIRED ON)
 
 set (gtest_force_shared_crt ON CACHE BOOL "disable static CRT for google test")
 
-#detecting regex support
-if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9)
-    add_definitions(-DUSE_BOOST_RE)
-    set(USE_BOOST_RE ON)
-else()
-    set(USE_BOOST_RE OFF)
-endif()
-
 add_subdirectory(mock_engine)
 
-add_subdirectory(libs/gtest)
+#####################################################################################################
+#                                     SETUP GOOGLE TESTS                                            #
+#####################################################################################################
 
-include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR}
-        ${gmock_SOURCE_DIR}/include
-        ${gmock_SOURCE_DIR}
-        ${IE_MAIN_SOURCE_DIR}/include
-        ${IE_MAIN_SOURCE_DIR}/src
-        ${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src
-        ${IE_MAIN_SOURCE_DIR}/tests/helpers
-        ${IE_MAIN_SOURCE_DIR}/samples/common
-        ${IE_MAIN_SOURCE_DIR}/samples/common/format_reader
-        ${MKLDNN}/include)
+add_subdirectory(libs/gtest)
 
 #####################################################################################################
 #                                     SETUP GOOGLE TESTS                                            #
 #####################################################################################################
 enable_testing()
 
-link_directories(${LIBRARY_OUTPUT_DIRECTORY})
-
 add_subdirectory(helpers)
 add_subdirectory(unit)
diff --git a/inference-engine/tests/helpers/CMakeLists.txt b/inference-engine/tests/helpers/CMakeLists.txt
index 4ab1278a3..9c1e1972c 100644
--- a/inference-engine/tests/helpers/CMakeLists.txt
+++ b/inference-engine/tests/helpers/CMakeLists.txt
@@ -1,8 +1,7 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
 set(TARGET_NAME helpers)
 
 file(GLOB HELPERS_SRC
@@ -15,16 +14,49 @@ file (GLOB HELPERS_INCLUDES
       )
 
 ## Enable Models multiple search pathes
-message("configuring file: ${PROJECT_BINARY_DIR}/test_model_repo.h")
-configure_file(test_model_repo.hpp.in ${PROJECT_BINARY_DIR}/test_model_repo.hpp @ONLY)
+message("configuring file: ${CMAKE_CURRENT_BINARY_DIR}/test_model_repo.h")
+configure_file(test_model_repo.hpp.in ${CMAKE_CURRENT_BINARY_DIR}/test_model_repo.hpp @ONLY)
 
 
 add_library(${TARGET_NAME} STATIC
             ${HELPERS_SRC}
             ${HELPERS_HEADERS})
 
-target_include_directories(${TARGET_NAME} PUBLIC ${PROJECT_BINARY_DIR})
-target_compile_definitions(${TARGET_NAME} PUBLIC -DMODELS_PATH=\"${MODELS_PATH}\")
+# detecting regex support
+if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9)
+    target_compile_definitions(${TARGET_NAME} PUBLIC USE_BOOST_RE)
+
+    debug_message(STATUS "Adding boost dependency")
+    if (VERBOSE_BUILD)
+        set(Boost_DEBUG on)
+    endif ()
+    find_package(Boost REQUIRED COMPONENTS regex)
+    target_link_libraries(${TARGET_NAME} PUBLIC ${Boost_REGEX_LIBRARY})
+    target_include_directories(${TARGET_NAME} PUBLIC ${Boost_INCLUDE_DIRS})
+endif()
+
+if(MSVC)
+    set(PUGI pugixml_mt)
+else()
+    set(PUGI pugixml)
+endif()
+
+if(WIN32)
+    target_include_directories(${TARGET_NAME} PUBLIC "${IE_MAIN_SOURCE_DIR}/samples/common")
+endif()
+
+target_link_libraries(${TARGET_NAME} PUBLIC inference_engine ${PUGI})
+
+target_include_directories(${TARGET_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}" "${gtest_SOURCE_DIR}/include"
+                                                 "${IE_MAIN_SOURCE_DIR}/src" "${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src"
+                                                 "${gmock_SOURCE_DIR}/include"
+										                      PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")
+
+# TODO: eliminate dependency on samples
+target_include_directories(${TARGET_NAME} PUBLIC 
+    "${IE_MAIN_SOURCE_DIR}/samples/common")
+
+target_compile_definitions(${TARGET_NAME} PUBLIC MODELS_PATH=\"${MODELS_PATH}\" DATA_PATH=\"${VALIDATION_SET}\")
 
 set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD 11)
 set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD_REQUIRED ON)
diff --git a/inference-engine/tests/helpers/disable_tests.hpp b/inference-engine/tests/helpers/disable_tests.hpp
index d0f0949ce..04c9b683f 100644
--- a/inference-engine/tests/helpers/disable_tests.hpp
+++ b/inference-engine/tests/helpers/disable_tests.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/ir_gen_helper.cpp b/inference-engine/tests/helpers/ir_gen_helper.cpp
index 40a05c451..3679d2993 100644
--- a/inference-engine/tests/helpers/ir_gen_helper.cpp
+++ b/inference-engine/tests/helpers/ir_gen_helper.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2017-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include "ir_gen_helper.hpp"
diff --git a/inference-engine/tests/helpers/ir_gen_helper.hpp b/inference-engine/tests/helpers/ir_gen_helper.hpp
index db8bff547..bdb0e1605 100644
--- a/inference-engine/tests/helpers/ir_gen_helper.hpp
+++ b/inference-engine/tests/helpers/ir_gen_helper.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -19,7 +19,7 @@ namespace single_layer_tests {
                                   const std::string& precision,
                                   const std::string& layers, 
                                   const std::string& edges,
-                                  const unsigned ir_version = 4u);
+                                  const unsigned ir_version = 5u);
     };
 
 } // namespace single_layer_tests
diff --git a/inference-engine/tests/helpers/single_layer_common.cpp b/inference-engine/tests/helpers/single_layer_common.cpp
index 434d3f28b..d0310b01d 100644
--- a/inference-engine/tests/helpers/single_layer_common.cpp
+++ b/inference-engine/tests/helpers/single_layer_common.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/single_layer_common.hpp b/inference-engine/tests/helpers/single_layer_common.hpp
index 1354129fb..a5cc9680d 100644
--- a/inference-engine/tests/helpers/single_layer_common.hpp
+++ b/inference-engine/tests/helpers/single_layer_common.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -54,6 +54,7 @@ struct conv_common_params {
     std::string auto_pad;
     size_t group;
     size_t out_c;
+    bool with_bias;
 };
 
 struct pool_common_params {
@@ -66,6 +67,11 @@ struct pool_common_params {
     bool exclude_pad;
 };
 
+struct eltwise_common_params {
+    std::string operation;
+    std::vector<float> coeff;
+};
+
 #define PRETTY_PARAM(name, type)                                                            \
     class name                                                                              \
     {                                                                                       \
@@ -104,7 +110,7 @@ template<int Version = 3>
 inline InferenceEngine::details::CNNNetworkImplPtr
 buildSingleLayerNetworkCommon(InferenceEngine::details::IFormatParser *parser,
                               const std::string &layerType,
-                              const testing::InOutData &inOutShapes,
+                              const testing::InOutShapes &inOutShapes,
                               std::map<std::string, std::string> *params,
                               const std::string &layerDataName = "data",
                               const InferenceEngine::Precision &precision = InferenceEngine::Precision::FP32,
diff --git a/inference-engine/tests/helpers/test_assertions.hpp b/inference-engine/tests/helpers/test_assertions.hpp
index 5e2ee36c0..44bbdb0da 100644
--- a/inference-engine/tests/helpers/test_assertions.hpp
+++ b/inference-engine/tests/helpers/test_assertions.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/test_model_path.hpp b/inference-engine/tests/helpers/test_model_path.hpp
index 73f4fc643..a0acd93d5 100644
--- a/inference-engine/tests/helpers/test_model_path.hpp
+++ b/inference-engine/tests/helpers/test_model_path.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/test_model_repo.hpp.in b/inference-engine/tests/helpers/test_model_repo.hpp.in
index 5356f9886..6c3f3be4b 100644
--- a/inference-engine/tests/helpers/test_model_repo.hpp.in
+++ b/inference-engine/tests/helpers/test_model_repo.hpp.in
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2017-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #pragma once
diff --git a/inference-engine/tests/helpers/test_models_path.cpp b/inference-engine/tests/helpers/test_models_path.cpp
index 69d97b880..cef74825c 100644
--- a/inference-engine/tests/helpers/test_models_path.cpp
+++ b/inference-engine/tests/helpers/test_models_path.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/tests_common.hpp b/inference-engine/tests/helpers/tests_common.hpp
index d9698ae7d..08135acf8 100644
--- a/inference-engine/tests/helpers/tests_common.hpp
+++ b/inference-engine/tests/helpers/tests_common.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -27,8 +27,83 @@
 	#include "Psapi.h"
 #endif
 
+class BaseTestCreator {
+protected:
+    std::string _type;
+public:
+    explicit BaseTestCreator(const std::string& type) : _type(type) {}
+
+    virtual InferenceEngine::CNNLayerPtr create(const std::string& type)  = 0;
+
+    virtual bool shouldCreate(const std::string& type) = 0;
+};
+
+template<class LT>
+class LayerTestCreator : public BaseTestCreator {
+public:
+    explicit LayerTestCreator(const std::string& type) : BaseTestCreator(type) {}
+
+    InferenceEngine::CNNLayerPtr create(const std::string& type) override {
+        InferenceEngine::LayerParams params;
+        params.type = type;
+        return std::make_shared<LT>(params);
+    }
+
+    bool shouldCreate(const std::string& type) override {
+        return type == _type;
+    }
+};
+
 class TestsCommon : public ::testing::Test {
+private:
+    static std::vector<std::shared_ptr<BaseTestCreator>>& getCreators() {
+        // there should be unique_ptr but it cant be used with initializer lists
+        static std::vector<std::shared_ptr<BaseTestCreator> > creators = {
+                std::make_shared<LayerTestCreator<InferenceEngine::PowerLayer>>("Power"),
+                std::make_shared<LayerTestCreator<InferenceEngine::ConvolutionLayer>>("Convolution"),
+                std::make_shared<LayerTestCreator<InferenceEngine::DeconvolutionLayer>>("Deconvolution"),
+                std::make_shared<LayerTestCreator<InferenceEngine::PoolingLayer>>("Pooling"),
+                std::make_shared<LayerTestCreator<InferenceEngine::FullyConnectedLayer>>("InnerProduct"),
+                std::make_shared<LayerTestCreator<InferenceEngine::FullyConnectedLayer>>("FullyConnected"),
+                std::make_shared<LayerTestCreator<InferenceEngine::NormLayer>>("LRN"),
+                std::make_shared<LayerTestCreator<InferenceEngine::NormLayer>>("Norm"),
+                std::make_shared<LayerTestCreator<InferenceEngine::SoftMaxLayer>>("Softmax"),
+                std::make_shared<LayerTestCreator<InferenceEngine::SoftMaxLayer>>("SoftMax"),
+                std::make_shared<LayerTestCreator<InferenceEngine::GRNLayer>>("GRN"),
+                std::make_shared<LayerTestCreator<InferenceEngine::MVNLayer>>("MVN"),
+                std::make_shared<LayerTestCreator<InferenceEngine::ReLULayer>>("ReLU"),
+                std::make_shared<LayerTestCreator<InferenceEngine::ClampLayer>>("Clamp"),
+                std::make_shared<LayerTestCreator<InferenceEngine::SplitLayer>>("Split"),
+                std::make_shared<LayerTestCreator<InferenceEngine::SplitLayer>>("Slice"),
+                std::make_shared<LayerTestCreator<InferenceEngine::ConcatLayer>>("Concat"),
+                std::make_shared<LayerTestCreator<InferenceEngine::EltwiseLayer>>("Eltwise"),
+                std::make_shared<LayerTestCreator<InferenceEngine::ScaleShiftLayer>>("ScaleShift"),
+                std::make_shared<LayerTestCreator<InferenceEngine::PReLULayer>>("PReLU"),
+                std::make_shared<LayerTestCreator<InferenceEngine::CropLayer>>("Crop"),
+                std::make_shared<LayerTestCreator<InferenceEngine::ReshapeLayer>>("Reshape"),
+                std::make_shared<LayerTestCreator<InferenceEngine::TileLayer>>("Tile"),
+                std::make_shared<LayerTestCreator<InferenceEngine::BatchNormalizationLayer>>("BatchNormalization"),
+                std::make_shared<LayerTestCreator<InferenceEngine::GemmLayer>>("Gemm"),
+                std::make_shared<LayerTestCreator<InferenceEngine::PadLayer>>("Pad"),
+                std::make_shared<LayerTestCreator<InferenceEngine::GatherLayer>>("Gather"),
+                std::make_shared<LayerTestCreator<InferenceEngine::StridedSliceLayer>>("StridedSlice"),
+                std::make_shared<LayerTestCreator<InferenceEngine::ShuffleChannelsLayer>>("ShuffleChannels"),
+                std::make_shared<LayerTestCreator<InferenceEngine::DepthToSpaceLayer>>("DepthToSpace"),
+                std::make_shared<LayerTestCreator<InferenceEngine::ReverseSequenceLayer>>("ReverseSequence")
+        };
+        return creators;
+    }
 public:
+    static InferenceEngine::CNNLayer::Ptr createLayer(const std::string& type) {
+        for (auto& creator : getCreators()) {
+            if (!creator->shouldCreate(type))
+                continue;
+            return creator->create(type);
+        }
+        static LayerTestCreator<InferenceEngine::GenericLayer> genericCreator("");
+        return genericCreator.create(type);
+    }
+
     static size_t parseLine(char* line) {
         // This assumes that a digit will be found and the line ends in " Kb".
         size_t i = strlen(line);
@@ -56,12 +131,12 @@ public:
         return result;
     }
 #ifdef _WIN32
-	static size_t getVmSizeInKBWin() {
-		PROCESS_MEMORY_COUNTERS pmc;
-		pmc.cb = sizeof(PROCESS_MEMORY_COUNTERS);
-		GetProcessMemoryInfo(GetCurrentProcess(),&pmc, pmc.cb);
-		return pmc.WorkingSetSize;
-	}
+        static size_t getVmSizeInKBWin() {
+                PROCESS_MEMORY_COUNTERS pmc;
+                pmc.cb = sizeof(PROCESS_MEMORY_COUNTERS);
+                GetProcessMemoryInfo(GetCurrentProcess(),&pmc, pmc.cb);
+                return pmc.WorkingSetSize;
+	    }
 #endif
 
  public:
@@ -135,8 +210,8 @@ public:
         return make_so_name(input);
     }
 
-    static void fill_data(InferenceEngine::Blob::Ptr blob) {
-        fill_data(blob->buffer().as<float*>(), blob->size());
+    static void fill_data(InferenceEngine::Blob::Ptr& blob) {
+        fill_data(blob->buffer().as<float*>(), blob->byteSize() / sizeof(float));
     }
 
     static void fill_data(float *data, size_t size, size_t duty_ratio = 10) {
@@ -149,6 +224,25 @@ public:
         }
     }
 
+    static void fill_data_non_zero(int32_t *data, size_t size, int n) {
+        for (size_t i = 0; i < size; i++) {
+            data[i] = n*i%254+1;
+        }
+    }
+
+    static void fill_data_bin(float *data, size_t size) {
+        for (size_t i = 0; i < size; i++) {
+            data[i] = sinf((float)i) > 0.f ? 1.f : -1.f;
+        }
+    }
+
+    static void fill_data_bin_packed(int8_t *data, size_t size) {
+        int nbits = 8;
+        for (size_t i = 0; i < div_up(size, nbits); i++) {
+            data[i] = static_cast<int8_t>(i % 255);
+        }
+    }
+
     static void fill_data_sine(float *data, size_t size, float center, float ampl, float omega) {
         for (size_t i = 0; i < size; i++) {
             data[i] = center + ampl * sin((float)i * omega);
@@ -168,7 +262,6 @@ public:
     }
 
     static void compare(InferenceEngine::Blob &res, InferenceEngine::Blob &ref, float max_diff = 0.01f) {
-
         float *res_ptr = res.buffer().as<float*>();
         size_t res_size = res.size();
 
@@ -183,7 +276,6 @@ public:
     }
 
     static void compare_NRMSD(InferenceEngine::Blob &res, InferenceEngine::Blob &ref, float max_nrmsd = 0.01f) {
-
         float *res_ptr = res.buffer().as<float*>();
         size_t res_size = res.size();
 
@@ -224,8 +316,7 @@ public:
         }
     }
 
-    void replace(std::string& str, const std::string& from, const std::string& to)
-    {
+    void replace(std::string& str, const std::string& from, const std::string& to) {
         std::string::size_type pos = 0;
 
         while((pos = str.find(from, pos)) != std::string::npos) {
@@ -326,6 +417,11 @@ public:
         return sts;
     }
 
+    template <typename T, typename U>
+    static inline T div_up(const T a, const U b) {
+        assert(b);
+        return (a + b - 1) / b;
+    }
 };
 
 
diff --git a/inference-engine/tests/helpers/tests_common_func.hpp b/inference-engine/tests/helpers/tests_common_func.hpp
index 387d5a639..66e0ed84b 100644
--- a/inference-engine/tests/helpers/tests_common_func.hpp
+++ b/inference-engine/tests/helpers/tests_common_func.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/tests_file_utils.cpp b/inference-engine/tests/helpers/tests_file_utils.cpp
index b23e72679..8bc061250 100644
--- a/inference-engine/tests/helpers/tests_file_utils.cpp
+++ b/inference-engine/tests/helpers/tests_file_utils.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/tests_file_utils.hpp b/inference-engine/tests/helpers/tests_file_utils.hpp
index dbfa50cc8..3abb89111 100644
--- a/inference-engine/tests/helpers/tests_file_utils.hpp
+++ b/inference-engine/tests/helpers/tests_file_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/tests_utils.hpp b/inference-engine/tests/helpers/tests_utils.hpp
index 3a44889a8..21351ed6e 100644
--- a/inference-engine/tests/helpers/tests_utils.hpp
+++ b/inference-engine/tests/helpers/tests_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/version_printer.cpp b/inference-engine/tests/helpers/version_printer.cpp
index 7448c99d6..7aa1ba447 100644
--- a/inference-engine/tests/helpers/version_printer.cpp
+++ b/inference-engine/tests/helpers/version_printer.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/xml_father.hpp b/inference-engine/tests/helpers/xml_father.hpp
index 90b7d732a..243a38b8d 100644
--- a/inference-engine/tests/helpers/xml_father.hpp
+++ b/inference-engine/tests/helpers/xml_father.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/xml_helper.hpp b/inference-engine/tests/helpers/xml_helper.hpp
index 75cc1314b..85d389fc7 100644
--- a/inference-engine/tests/helpers/xml_helper.hpp
+++ b/inference-engine/tests/helpers/xml_helper.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/helpers/xml_net_builder.cpp b/inference-engine/tests/helpers/xml_net_builder.cpp
index 45f967270..e313ba08a 100644
--- a/inference-engine/tests/helpers/xml_net_builder.cpp
+++ b/inference-engine/tests/helpers/xml_net_builder.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -21,7 +21,7 @@ void IDManager::reset() {
     portID = layerID = 0;
 }
 
-LayerDesc::LayerDesc(std::string type, InOutData& shapes, IDManager &id_manager) : _type(std::move(type)) {
+LayerDesc::LayerDesc(std::string type, InOutShapes& shapes, IDManager &id_manager) : _type(std::move(type)) {
     _layerID = id_manager.getNextLayerID();
     auto inDims = shapes.inDims;
     auto outDims = shapes.outDims;
diff --git a/inference-engine/tests/helpers/xml_net_builder.hpp b/inference-engine/tests/helpers/xml_net_builder.hpp
index 81fa21dc5..ba9f1a7c8 100644
--- a/inference-engine/tests/helpers/xml_net_builder.hpp
+++ b/inference-engine/tests/helpers/xml_net_builder.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,11 +22,11 @@ struct CropData {
 
 typedef std::vector<CropData> CropParams;
 
-struct InOutData {
+struct InOutShapes {
     std::vector<std::vector<size_t>> inDims;
     std::vector<std::vector<size_t>> outDims;
 
-    friend std::ostream& operator<<(std::ostream& os, InOutData const& inout) {
+    friend std::ostream& operator<<(std::ostream& os, InOutShapes const& inout) {
         auto dumpVec = [](const std::vector<size_t>& vec) -> std::string {
             if (vec.empty()) return "[]";
             std::stringstream oss;
@@ -137,7 +137,7 @@ public:
      * @param type - string with type of the layer
      * @param shapes - reference to the structure with input and output shapes
      */
-    explicit LayerDesc(std::string type, InOutData& shapes, IDManager &id_manager);
+    explicit LayerDesc(std::string type, InOutShapes& shapes, IDManager &id_manager);
 
     /**
      * @brief Resets current input and output ports to iterate over all input and output ports
@@ -252,7 +252,7 @@ public:
         return EdgesBuilder(exp.node("edges"), layersDesc);
     }
 
-    XmlNetBuilder& cropLayer(CropParams params, const InOutData& inout) {
+    XmlNetBuilder& cropLayer(CropParams params, const InOutShapes& inout) {
         std::map<std::string, std::string> generalParams;
         for (CropData crop : params) {
             generalParams["axis"] = std::to_string(crop.axis);
@@ -262,7 +262,7 @@ public:
         return addLayer("Crop", "", &generalParams, inout, 0, 0, "crop-data");
     }
 
-    XmlNetBuilder& convolutionLayer(const std::string& precision, const InOutData& inout) {
+    XmlNetBuilder& convolutionLayer(const std::string& precision, const InOutShapes& inout) {
         std::map<std::string, std::string> params{
                 {"stride-x", "4"},
                 {"stride-y", "4"},
@@ -275,7 +275,7 @@ public:
         return addLayer("Convolution", precision, &params, inout, 0, 0, "convolution_data");
     }
 
-    XmlNetBuilder& poolingLayer(const InOutData& inout) {
+    XmlNetBuilder& poolingLayer(const InOutShapes& inout) {
         std::map<std::string, std::string> params{
                 {"stride-x", "4"},
                 {"stride-y", "4"},
@@ -289,7 +289,7 @@ public:
 
     struct TIPortMap { int from_l, from_p, to_l, to_p, axis, stride, start, end; };
 
-    XmlNetBuilder& TILayer(InOutData inout,
+    XmlNetBuilder& TILayer(InOutShapes inout,
                            std::string body,
                            std::vector<TIPortMap> inMap,
                            std::vector<TIPortMap> outMap,
@@ -329,7 +329,7 @@ public:
     XmlNetBuilder& addLayer(const std::string& type,
                             const std::string& precision,
                             std::map<std::string, std::string>* params,
-                            InOutData inout,
+                            InOutShapes inout,
                             int weightsSize = 0,
                             int biasesSize = 0,
                             std::string layerDataName = "data",
@@ -361,7 +361,7 @@ public:
     }
 
     XmlNetBuilder& addInputLayer(const std::string& precision, const std::vector<size_t>& out) {
-        InOutData inout{};
+        InOutShapes inout{};
         inout.outDims.push_back(out);
         return addLayer("Input", precision, nullptr, inout);
     }
diff --git a/inference-engine/tests/libs/gtest/googlemock/msvc/2005/gmock.sln b/inference-engine/tests/libs/gtest/googlemock/msvc/2005/gmock.sln
new file mode 100644
index 000000000..0cf57a35a
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googlemock/msvc/2005/gmock.sln
@@ -0,0 +1,32 @@
+
+Microsoft Visual Studio Solution File, Format Version 9.00
+# Visual Studio 2005
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gmock", "gmock.vcproj", "{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gmock_test", "gmock_test.vcproj", "{F10D22F8-AC7B-4213-8720-608E7D878CD2}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gmock_main", "gmock_main.vcproj", "{E4EF614B-30DF-4954-8C53-580A0BF6B589}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Release|Win32 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Debug|Win32.ActiveCfg = Debug|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Debug|Win32.Build.0 = Debug|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Release|Win32.ActiveCfg = Release|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Release|Win32.Build.0 = Release|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Debug|Win32.ActiveCfg = Debug|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Debug|Win32.Build.0 = Debug|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Release|Win32.ActiveCfg = Release|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Release|Win32.Build.0 = Release|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Debug|Win32.ActiveCfg = Debug|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Debug|Win32.Build.0 = Debug|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Release|Win32.ActiveCfg = Release|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Release|Win32.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock.sln b/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock.sln
new file mode 100644
index 000000000..f192bd2b1
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock.sln
@@ -0,0 +1,46 @@
+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual C++ Express 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gmock", "gmock.vcxproj", "{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gmock_test", "gmock_test.vcxproj", "{F10D22F8-AC7B-4213-8720-608E7D878CD2}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gmock_main", "gmock_main.vcxproj", "{E4EF614B-30DF-4954-8C53-580A0BF6B589}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Debug|Win32.ActiveCfg = Debug|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Debug|Win32.Build.0 = Debug|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Debug|x64.ActiveCfg = Debug|x64
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Debug|x64.Build.0 = Debug|x64
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Release|Win32.ActiveCfg = Release|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Release|Win32.Build.0 = Release|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Release|x64.ActiveCfg = Release|x64
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Release|x64.Build.0 = Release|x64
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Debug|Win32.ActiveCfg = Debug|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Debug|Win32.Build.0 = Debug|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Debug|x64.ActiveCfg = Debug|x64
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Debug|x64.Build.0 = Debug|x64
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Release|Win32.ActiveCfg = Release|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Release|Win32.Build.0 = Release|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Release|x64.ActiveCfg = Release|x64
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Release|x64.Build.0 = Release|x64
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Debug|Win32.ActiveCfg = Debug|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Debug|Win32.Build.0 = Debug|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Debug|x64.ActiveCfg = Debug|x64
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Debug|x64.Build.0 = Debug|x64
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Release|Win32.ActiveCfg = Release|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Release|Win32.Build.0 = Release|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Release|x64.ActiveCfg = Release|x64
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock.vcxproj b/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock.vcxproj
new file mode 100644
index 000000000..eea87dca2
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock.vcxproj
@@ -0,0 +1,145 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}</ProjectGuid>
+    <RootNamespace>gmock</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gmock-all.cc" />
+    <ClCompile Include="$(GTestDir)\src\gtest-all.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock_main.vcxproj b/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock_main.vcxproj
new file mode 100644
index 000000000..991687acf
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock_main.vcxproj
@@ -0,0 +1,151 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{E4EF614B-30DF-4954-8C53-580A0BF6B589}</ProjectGuid>
+    <RootNamespace>gmock_main</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ProjectReference Include="gmock.vcxproj">
+      <Project>{34681f0d-ce45-415d-b5f2-5c662dfe3bd5}</Project>
+      <CopyLocalSatelliteAssemblies>true</CopyLocalSatelliteAssemblies>
+      <ReferenceOutputAssembly>true</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gmock_main.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock_test.vcxproj b/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock_test.vcxproj
new file mode 100644
index 000000000..0a6559740
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googlemock/msvc/2010/gmock_test.vcxproj
@@ -0,0 +1,176 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{F10D22F8-AC7B-4213-8720-608E7D878CD2}</ProjectGuid>
+    <RootNamespace>gmock_test</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..;$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..;$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalIncludeDirectories>..\..\include;..\..;$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalIncludeDirectories>..\..\include;..\..;$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ProjectReference Include="gmock_main.vcxproj">
+      <Project>{e4ef614b-30df-4954-8c53-580a0bf6b589}</Project>
+      <CopyLocalSatelliteAssemblies>true</CopyLocalSatelliteAssemblies>
+      <ReferenceOutputAssembly>true</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\gmock_all_test.cc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock.sln b/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock.sln
new file mode 100644
index 000000000..d4203a844
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock.sln
@@ -0,0 +1,46 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gmock", "gmock.vcxproj", "{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gmock_test", "gmock_test.vcxproj", "{F10D22F8-AC7B-4213-8720-608E7D878CD2}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gmock_main", "gmock_main.vcxproj", "{E4EF614B-30DF-4954-8C53-580A0BF6B589}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Debug|Win32.ActiveCfg = Debug|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Debug|Win32.Build.0 = Debug|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Debug|x64.ActiveCfg = Debug|x64
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Debug|x64.Build.0 = Debug|x64
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Release|Win32.ActiveCfg = Release|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Release|Win32.Build.0 = Release|Win32
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Release|x64.ActiveCfg = Release|x64
+		{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}.Release|x64.Build.0 = Release|x64
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Debug|Win32.ActiveCfg = Debug|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Debug|Win32.Build.0 = Debug|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Debug|x64.ActiveCfg = Debug|x64
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Debug|x64.Build.0 = Debug|x64
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Release|Win32.ActiveCfg = Release|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Release|Win32.Build.0 = Release|Win32
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Release|x64.ActiveCfg = Release|x64
+		{F10D22F8-AC7B-4213-8720-608E7D878CD2}.Release|x64.Build.0 = Release|x64
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Debug|Win32.ActiveCfg = Debug|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Debug|Win32.Build.0 = Debug|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Debug|x64.ActiveCfg = Debug|x64
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Debug|x64.Build.0 = Debug|x64
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Release|Win32.ActiveCfg = Release|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Release|Win32.Build.0 = Release|Win32
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Release|x64.ActiveCfg = Release|x64
+		{E4EF614B-30DF-4954-8C53-580A0BF6B589}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock.vcxproj b/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock.vcxproj
new file mode 100644
index 000000000..c6b56e612
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock.vcxproj
@@ -0,0 +1,145 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{34681F0D-CE45-415D-B5F2-5C662DFE3BD5}</ProjectGuid>
+    <RootNamespace>gmock</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gmock-all.cc" />
+    <ClCompile Include="$(GTestDir)\src\gtest-all.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock_main.vcxproj b/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock_main.vcxproj
new file mode 100644
index 000000000..42381dfa3
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock_main.vcxproj
@@ -0,0 +1,151 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{E4EF614B-30DF-4954-8C53-580A0BF6B589}</ProjectGuid>
+    <RootNamespace>gmock_main</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <AdditionalIncludeDirectories>../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ProjectReference Include="gmock.vcxproj">
+      <Project>{34681f0d-ce45-415d-b5f2-5c662dfe3bd5}</Project>
+      <CopyLocalSatelliteAssemblies>true</CopyLocalSatelliteAssemblies>
+      <ReferenceOutputAssembly>true</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gmock_main.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">../../include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock_test.vcxproj b/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock_test.vcxproj
new file mode 100644
index 000000000..01d1f201b
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googlemock/msvc/2015/gmock_test.vcxproj
@@ -0,0 +1,176 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{F10D22F8-AC7B-4213-8720-608E7D878CD2}</ProjectGuid>
+    <RootNamespace>gmock_test</RootNamespace>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>Unicode</CharacterSet>
+    <PlatformToolset>v140</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+    <Import Project="gmock_config.props" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..;$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
+      <Optimization>Disabled</Optimization>
+      <AdditionalIncludeDirectories>..\..\include;..\..;$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalIncludeDirectories>..\..\include;..\..;$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalIncludeDirectories>..\..\include;..\..;$(GTestDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ProjectReference Include="gmock_main.vcxproj">
+      <Project>{e4ef614b-30df-4954-8c53-580a0bf6b589}</Project>
+      <CopyLocalSatelliteAssemblies>true</CopyLocalSatelliteAssemblies>
+      <ReferenceOutputAssembly>true</ReferenceOutputAssembly>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\gmock_all_test.cc" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest-md.sln b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest-md.sln
new file mode 100644
index 000000000..e36b33b62
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest-md.sln
@@ -0,0 +1,55 @@
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual C++ Express 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gtest-md", "gtest-md.vcxproj", "{C8F6C172-56F2-4E76-B5FA-C3B423B31BE8}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gtest_main-md", "gtest_main-md.vcxproj", "{3AF54C8A-10BF-4332-9147-F68ED9862033}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gtest_prod_test-md", "gtest_prod_test-md.vcxproj", "{24848551-EF4F-47E8-9A9D-EA4D49BC3ECB}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gtest_unittest-md", "gtest_unittest-md.vcxproj", "{4D9FDFB5-986A-4139-823C-F4EE0ED481A2}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE8}.Debug|Win32.ActiveCfg = Debug|Win32
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE8}.Debug|Win32.Build.0 = Debug|Win32
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE8}.Debug|x64.ActiveCfg = Debug|x64
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE8}.Debug|x64.Build.0 = Debug|x64
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE8}.Release|Win32.ActiveCfg = Release|Win32
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE8}.Release|Win32.Build.0 = Release|Win32
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE8}.Release|x64.ActiveCfg = Release|x64
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE8}.Release|x64.Build.0 = Release|x64
+		{3AF54C8A-10BF-4332-9147-F68ED9862033}.Debug|Win32.ActiveCfg = Debug|Win32
+		{3AF54C8A-10BF-4332-9147-F68ED9862033}.Debug|Win32.Build.0 = Debug|Win32
+		{3AF54C8A-10BF-4332-9147-F68ED9862033}.Debug|x64.ActiveCfg = Debug|x64
+		{3AF54C8A-10BF-4332-9147-F68ED9862033}.Debug|x64.Build.0 = Debug|x64
+		{3AF54C8A-10BF-4332-9147-F68ED9862033}.Release|Win32.ActiveCfg = Release|Win32
+		{3AF54C8A-10BF-4332-9147-F68ED9862033}.Release|Win32.Build.0 = Release|Win32
+		{3AF54C8A-10BF-4332-9147-F68ED9862033}.Release|x64.ActiveCfg = Release|x64
+		{3AF54C8A-10BF-4332-9147-F68ED9862033}.Release|x64.Build.0 = Release|x64
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECB}.Debug|Win32.ActiveCfg = Debug|Win32
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECB}.Debug|Win32.Build.0 = Debug|Win32
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECB}.Debug|x64.ActiveCfg = Debug|x64
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECB}.Debug|x64.Build.0 = Debug|x64
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECB}.Release|Win32.ActiveCfg = Release|Win32
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECB}.Release|Win32.Build.0 = Release|Win32
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECB}.Release|x64.ActiveCfg = Release|x64
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECB}.Release|x64.Build.0 = Release|x64
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A2}.Debug|Win32.ActiveCfg = Debug|Win32
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A2}.Debug|Win32.Build.0 = Debug|Win32
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A2}.Debug|x64.ActiveCfg = Debug|x64
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A2}.Debug|x64.Build.0 = Debug|x64
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A2}.Release|Win32.ActiveCfg = Release|Win32
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A2}.Release|Win32.Build.0 = Release|Win32
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A2}.Release|x64.ActiveCfg = Release|x64
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A2}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest-md.vcxproj b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest-md.vcxproj
new file mode 100644
index 000000000..16a6ff12f
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest-md.vcxproj
@@ -0,0 +1,149 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{C8F6C172-56F2-4E76-B5FA-C3B423B31BE8}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+    <TargetName>gtestd</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+    <TargetName>gtest</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <TargetName>gtestd</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <TargetName>gtest</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gtest-all.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest-md.vcxproj.filters b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest-md.vcxproj.filters
new file mode 100644
index 000000000..69edeff23
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest-md.vcxproj.filters
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gtest-all.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest.sln b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest.sln
new file mode 100644
index 000000000..cacd5c0ce
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest.sln
@@ -0,0 +1,55 @@
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual C++ Express 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gtest", "gtest.vcxproj", "{C8F6C172-56F2-4E76-B5FA-C3B423B31BE7}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gtest_main", "gtest_main.vcxproj", "{3AF54C8A-10BF-4332-9147-F68ED9862032}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gtest_unittest", "gtest_unittest.vcxproj", "{4D9FDFB5-986A-4139-823C-F4EE0ED481A1}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "gtest_prod_test", "gtest_prod_test.vcxproj", "{24848551-EF4F-47E8-9A9D-EA4D49BC3ECA}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE7}.Debug|Win32.ActiveCfg = Debug|Win32
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE7}.Debug|Win32.Build.0 = Debug|Win32
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE7}.Debug|x64.ActiveCfg = Debug|x64
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE7}.Debug|x64.Build.0 = Debug|x64
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE7}.Release|Win32.ActiveCfg = Release|Win32
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE7}.Release|Win32.Build.0 = Release|Win32
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE7}.Release|x64.ActiveCfg = Release|x64
+		{C8F6C172-56F2-4E76-B5FA-C3B423B31BE7}.Release|x64.Build.0 = Release|x64
+		{3AF54C8A-10BF-4332-9147-F68ED9862032}.Debug|Win32.ActiveCfg = Debug|Win32
+		{3AF54C8A-10BF-4332-9147-F68ED9862032}.Debug|Win32.Build.0 = Debug|Win32
+		{3AF54C8A-10BF-4332-9147-F68ED9862032}.Debug|x64.ActiveCfg = Debug|x64
+		{3AF54C8A-10BF-4332-9147-F68ED9862032}.Debug|x64.Build.0 = Debug|x64
+		{3AF54C8A-10BF-4332-9147-F68ED9862032}.Release|Win32.ActiveCfg = Release|Win32
+		{3AF54C8A-10BF-4332-9147-F68ED9862032}.Release|Win32.Build.0 = Release|Win32
+		{3AF54C8A-10BF-4332-9147-F68ED9862032}.Release|x64.ActiveCfg = Release|x64
+		{3AF54C8A-10BF-4332-9147-F68ED9862032}.Release|x64.Build.0 = Release|x64
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A1}.Debug|Win32.ActiveCfg = Debug|Win32
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A1}.Debug|Win32.Build.0 = Debug|Win32
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A1}.Debug|x64.ActiveCfg = Debug|x64
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A1}.Debug|x64.Build.0 = Debug|x64
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A1}.Release|Win32.ActiveCfg = Release|Win32
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A1}.Release|Win32.Build.0 = Release|Win32
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A1}.Release|x64.ActiveCfg = Release|x64
+		{4D9FDFB5-986A-4139-823C-F4EE0ED481A1}.Release|x64.Build.0 = Release|x64
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECA}.Debug|Win32.ActiveCfg = Debug|Win32
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECA}.Debug|Win32.Build.0 = Debug|Win32
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECA}.Debug|x64.ActiveCfg = Debug|x64
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECA}.Debug|x64.Build.0 = Debug|x64
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECA}.Release|Win32.ActiveCfg = Release|Win32
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECA}.Release|Win32.Build.0 = Release|Win32
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECA}.Release|x64.ActiveCfg = Release|x64
+		{24848551-EF4F-47E8-9A9D-EA4D49BC3ECA}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest.vcxproj b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest.vcxproj
new file mode 100644
index 000000000..a46f5c7af
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest.vcxproj
@@ -0,0 +1,149 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{C8F6C172-56F2-4E76-B5FA-C3B423B31BE7}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)temp\$(ProjectName)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)temp\$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)temp\$(ProjectName)\</IntDir>
+    <TargetName>gtestd</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)temp\$(ProjectName)\</IntDir>
+    <TargetName>gtest</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <TargetName>gtestd</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <TargetName>gtest</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gtest-all.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest.vcxproj.filters b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest.vcxproj.filters
new file mode 100644
index 000000000..69edeff23
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest.vcxproj.filters
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gtest-all.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main-md.vcxproj b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main-md.vcxproj
new file mode 100644
index 000000000..3d773895b
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main-md.vcxproj
@@ -0,0 +1,154 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{3AF54C8A-10BF-4332-9147-F68ED9862033}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+    <TargetName>gtest_maind</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+    <TargetName>gtest_main</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <TargetName>gtest_maind</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <TargetName>gtest_main</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib />
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gtest_main.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="gtest-md.vcxproj">
+      <Project>{c8f6c172-56f2-4e76-b5fa-c3b423b31be8}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main-md.vcxproj.filters b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main-md.vcxproj.filters
new file mode 100644
index 000000000..726c773cc
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main-md.vcxproj.filters
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gtest_main.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main.vcxproj b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main.vcxproj
new file mode 100644
index 000000000..8fb25897c
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main.vcxproj
@@ -0,0 +1,162 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{3AF54C8A-10BF-4332-9147-F68ED9862032}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)temp\$(ProjectName)\</IntDir>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)temp\$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)temp\$(ProjectName)\</IntDir>
+    <TargetName>gtest_maind</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)temp\$(ProjectName)\</IntDir>
+    <TargetName>gtest_main</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <TargetName>gtest_maind</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <TargetName>gtest_main</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(ProjectName)d.lib</OutputFile>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(ProjectName)d.lib</OutputFile>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(ProjectName).lib</OutputFile>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Lib>
+      <OutputFile>$(OutDir)$(ProjectName).lib</OutputFile>
+    </Lib>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gtest_main.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="gtest.vcxproj">
+      <Project>{c8f6c172-56f2-4e76-b5fa-c3b423b31be7}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main.vcxproj.filters b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main.vcxproj.filters
new file mode 100644
index 000000000..726c773cc
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_main.vcxproj.filters
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\src\gtest_main.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test-md.vcxproj b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test-md.vcxproj
new file mode 100644
index 000000000..830e5dce4
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test-md.vcxproj
@@ -0,0 +1,199 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{24848551-EF4F-47E8-9A9D-EA4D49BC3ECB}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+    <TargetName>gtest_prod_test</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+    <TargetName>gtest_prod_test</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <TargetName>gtest_prod_test</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <TargetName>gtest_prod_test</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <ProgramDatabaseFile>$(OutDir)gtest_prod_test.pdb</ProgramDatabaseFile>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <ProgramDatabaseFile>$(OutDir)gtest_prod_test.pdb</ProgramDatabaseFile>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\gtest_prod_test.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </PrecompiledHeader>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+      </PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="..\..\test\production.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </PrecompiledHeader>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+      </PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\test\production.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="gtest_main-md.vcxproj">
+      <Project>{3af54c8a-10bf-4332-9147-f68ed9862033}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test-md.vcxproj.filters b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test-md.vcxproj.filters
new file mode 100644
index 000000000..ac367310a
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test-md.vcxproj.filters
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\gtest_prod_test.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\test\production.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\test\production.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test.vcxproj b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test.vcxproj
new file mode 100644
index 000000000..d42e13511
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test.vcxproj
@@ -0,0 +1,191 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{24848551-EF4F-47E8-9A9D-EA4D49BC3ECA}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)temp\$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)temp\$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)temp\$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)temp\$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <ProgramDatabaseFile>$(OutDir)gtest_prod_test.pdb</ProgramDatabaseFile>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <ProgramDatabaseFile>$(OutDir)gtest_prod_test.pdb</ProgramDatabaseFile>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\gtest_prod_test.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </PrecompiledHeader>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+      </PrecompiledHeader>
+    </ClCompile>
+    <ClCompile Include="..\..\test\production.cc">
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </PrecompiledHeader>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+      </PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\test\production.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="gtest_main.vcxproj">
+      <Project>{3af54c8a-10bf-4332-9147-f68ed9862032}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test.vcxproj.filters b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test.vcxproj.filters
new file mode 100644
index 000000000..ac367310a
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_prod_test.vcxproj.filters
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\gtest_prod_test.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\test\production.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\test\production.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest-md.vcxproj b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest-md.vcxproj
new file mode 100644
index 000000000..93b0dc4e1
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest-md.vcxproj
@@ -0,0 +1,188 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{4D9FDFB5-986A-4139-823C-F4EE0ED481A2}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+    <TargetName>gtest_unittest</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)$(ProjectName)\</IntDir>
+    <TargetName>gtest_unittest</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <TargetName>gtest_unittest</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <TargetName>gtest_unittest</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <ProgramDatabaseFile>$(OutDir)gtest_unittest.pdb</ProgramDatabaseFile>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <ProgramDatabaseFile>$(OutDir)gtest_unittest.pdb</ProgramDatabaseFile>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\gtest_unittest.cc">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">MinSpace</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">MinSpace</Optimization>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Default</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Default</BasicRuntimeChecks>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </PrecompiledHeader>
+      <DebugInformationFormat Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ProgramDatabase</DebugInformationFormat>
+      <DebugInformationFormat Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+      </PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="gtest_main-md.vcxproj">
+      <Project>{3af54c8a-10bf-4332-9147-f68ed9862033}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest-md.vcxproj.filters b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest-md.vcxproj.filters
new file mode 100644
index 000000000..047dae513
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest-md.vcxproj.filters
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\gtest_unittest.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest.vcxproj b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest.vcxproj
new file mode 100644
index 000000000..ec6abde7d
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest.vcxproj
@@ -0,0 +1,180 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{4D9FDFB5-986A-4139-823C-F4EE0ED481A1}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <CharacterSet>MultiByte</CharacterSet>
+    <PlatformToolset>v100</PlatformToolset>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)temp\$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
+    <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)temp\$(ProjectName)\</IntDir>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)temp\$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(SolutionName)\$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(OutDir)temp\$(ProjectName)\</IntDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MinimalRebuild>true</MinimalRebuild>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <ProgramDatabaseFile>$(OutDir)gtest_unittest.pdb</ProgramDatabaseFile>
+      <SubSystem>Console</SubSystem>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <ProgramDatabaseFile>$(OutDir)gtest_unittest.pdb</ProgramDatabaseFile>
+      <SubSystem>Console</SubSystem>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <TargetMachine>MachineX86</TargetMachine>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <PreprocessorDefinitions>WIN32;_VARIADIC_MAX=10;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>..\..\include;..\..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <SubSystem>Console</SubSystem>
+      <OptimizeReferences>true</OptimizeReferences>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\gtest_unittest.cc">
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">MinSpace</Optimization>
+      <Optimization Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">MinSpace</Optimization>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Default</BasicRuntimeChecks>
+      <BasicRuntimeChecks Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Default</BasicRuntimeChecks>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+      </PrecompiledHeader>
+      <DebugInformationFormat Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ProgramDatabase</DebugInformationFormat>
+      <DebugInformationFormat Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..;..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+      </PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+      </PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="gtest_main.vcxproj">
+      <Project>{3af54c8a-10bf-4332-9147-f68ed9862032}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest.vcxproj.filters b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest.vcxproj.filters
new file mode 100644
index 000000000..047dae513
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/msvc/2010/gtest_unittest.vcxproj.filters
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\test\gtest_unittest.cc">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Config/DebugProject.xcconfig b/inference-engine/tests/libs/gtest/googletest/xcode/Config/DebugProject.xcconfig
new file mode 100644
index 000000000..3d68157d5
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Config/DebugProject.xcconfig
@@ -0,0 +1,30 @@
+//
+//  DebugProject.xcconfig
+//
+//  These are Debug Configuration project settings for the gtest framework and
+//  examples. It is set in the "Based On:" dropdown in the "Project" info
+//  dialog.
+//  This file is based on the Xcode Configuration files in:
+//  http://code.google.com/p/google-toolbox-for-mac/
+// 
+
+#include "General.xcconfig"
+
+// No optimization
+GCC_OPTIMIZATION_LEVEL = 0
+
+// Deployment postprocessing is what triggers Xcode to strip, turn it off
+DEPLOYMENT_POSTPROCESSING = NO
+
+// Dead code stripping off
+DEAD_CODE_STRIPPING = NO
+
+// Debug symbols should be on obviously
+GCC_GENERATE_DEBUGGING_SYMBOLS = YES
+
+// Define the DEBUG macro in all debug builds
+OTHER_CFLAGS = $(OTHER_CFLAGS) -DDEBUG=1
+
+// These are turned off to avoid STL incompatibilities with client code
+// // Turns on special C++ STL checks to "encourage" good STL use
+// GCC_PREPROCESSOR_DEFINITIONS = $(GCC_PREPROCESSOR_DEFINITIONS) _GLIBCXX_DEBUG_PEDANTIC _GLIBCXX_DEBUG _GLIBCPP_CONCEPT_CHECKS
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Config/FrameworkTarget.xcconfig b/inference-engine/tests/libs/gtest/googletest/xcode/Config/FrameworkTarget.xcconfig
new file mode 100644
index 000000000..357b1c8fb
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Config/FrameworkTarget.xcconfig
@@ -0,0 +1,17 @@
+//
+//  FrameworkTarget.xcconfig
+//
+//  These are Framework target settings for the gtest framework and examples. It
+//  is set in the "Based On:" dropdown in the "Target" info dialog.
+//  This file is based on the Xcode Configuration files in:
+//  http://code.google.com/p/google-toolbox-for-mac/
+// 
+
+// Dynamic libs need to be position independent
+GCC_DYNAMIC_NO_PIC = NO
+
+// Dynamic libs should not have their external symbols stripped.
+STRIP_STYLE = non-global
+
+// Let the user install by specifying the $DSTROOT with xcodebuild
+SKIP_INSTALL = NO
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Config/General.xcconfig b/inference-engine/tests/libs/gtest/googletest/xcode/Config/General.xcconfig
new file mode 100644
index 000000000..f23e32227
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Config/General.xcconfig
@@ -0,0 +1,41 @@
+//
+//  General.xcconfig
+//
+//  These are General configuration settings for the gtest framework and
+//  examples.
+//  This file is based on the Xcode Configuration files in:
+//  http://code.google.com/p/google-toolbox-for-mac/
+// 
+
+// Build for PPC and Intel, 32- and 64-bit
+ARCHS = i386 x86_64 ppc ppc64
+
+// Zerolink prevents link warnings so turn it off
+ZERO_LINK = NO
+
+// Prebinding considered unhelpful in 10.3 and later
+PREBINDING = NO
+
+// Strictest warning policy
+WARNING_CFLAGS = -Wall -Werror -Wendif-labels -Wnewline-eof -Wno-sign-compare -Wshadow
+
+// Work around Xcode bugs by using external strip. See:
+// http://lists.apple.com/archives/Xcode-users/2006/Feb/msg00050.html
+SEPARATE_STRIP = YES
+
+// Force C99 dialect
+GCC_C_LANGUAGE_STANDARD = c99
+
+// not sure why apple defaults this on, but it's pretty risky
+ALWAYS_SEARCH_USER_PATHS = NO
+
+// Turn on position dependent code for most cases (overridden where appropriate)
+GCC_DYNAMIC_NO_PIC = YES
+
+// Default SDK and minimum OS version is 10.4
+SDKROOT = $(DEVELOPER_SDK_DIR)/MacOSX10.4u.sdk
+MACOSX_DEPLOYMENT_TARGET = 10.4
+GCC_VERSION = 4.0
+
+// VERSIONING BUILD SETTINGS (used in Info.plist)
+GTEST_VERSIONINFO_ABOUT =  © 2008 Google Inc.
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Config/ReleaseProject.xcconfig b/inference-engine/tests/libs/gtest/googletest/xcode/Config/ReleaseProject.xcconfig
new file mode 100644
index 000000000..5349f0a04
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Config/ReleaseProject.xcconfig
@@ -0,0 +1,32 @@
+//
+//  ReleaseProject.xcconfig
+//
+//  These are Release Configuration project settings for the gtest framework
+//  and examples. It is set in the "Based On:" dropdown in the "Project" info
+//  dialog.
+//  This file is based on the Xcode Configuration files in:
+//  http://code.google.com/p/google-toolbox-for-mac/
+// 
+
+#include "General.xcconfig"
+
+// subconfig/Release.xcconfig
+
+// Optimize for space and size (Apple recommendation)
+GCC_OPTIMIZATION_LEVEL = s
+
+// Deploment postprocessing is what triggers Xcode to strip
+DEPLOYMENT_POSTPROCESSING = YES
+
+// No symbols
+GCC_GENERATE_DEBUGGING_SYMBOLS = NO
+
+// Dead code strip does not affect ObjC code but can help for C
+DEAD_CODE_STRIPPING = YES
+
+// NDEBUG is used by things like assert.h, so define it for general compat.
+// ASSERT going away in release tends to create unused vars.
+OTHER_CFLAGS = $(OTHER_CFLAGS) -DNDEBUG=1 -Wno-unused-variable
+
+// When we strip we want to strip all symbols in release, but save externals.
+STRIP_STYLE = all
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Config/StaticLibraryTarget.xcconfig b/inference-engine/tests/libs/gtest/googletest/xcode/Config/StaticLibraryTarget.xcconfig
new file mode 100644
index 000000000..3922fa51d
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Config/StaticLibraryTarget.xcconfig
@@ -0,0 +1,18 @@
+//
+//  StaticLibraryTarget.xcconfig
+//
+//  These are static library target settings for libgtest.a. It
+//  is set in the "Based On:" dropdown in the "Target" info dialog.
+//  This file is based on the Xcode Configuration files in:
+//  http://code.google.com/p/google-toolbox-for-mac/
+// 
+
+// Static libs can be included in bundles so make them position independent
+GCC_DYNAMIC_NO_PIC = NO
+
+// Static libs should not have their internal globals or external symbols
+// stripped.
+STRIP_STYLE = debugging
+
+// Let the user install by specifying the $DSTROOT with xcodebuild
+SKIP_INSTALL = NO
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Config/TestTarget.xcconfig b/inference-engine/tests/libs/gtest/googletest/xcode/Config/TestTarget.xcconfig
new file mode 100644
index 000000000..e6652ba85
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Config/TestTarget.xcconfig
@@ -0,0 +1,8 @@
+//
+//  TestTarget.xcconfig
+//
+//  These are Test target settings for the gtest framework and examples. It
+//  is set in the "Based On:" dropdown in the "Target" info dialog.
+
+PRODUCT_NAME = $(TARGET_NAME)
+HEADER_SEARCH_PATHS = ../include
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Resources/Info.plist b/inference-engine/tests/libs/gtest/googletest/xcode/Resources/Info.plist
new file mode 100644
index 000000000..9dd28ea14
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Resources/Info.plist
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>English</string>
+	<key>CFBundleExecutable</key>
+	<string>${EXECUTABLE_NAME}</string>
+	<key>CFBundleIconFile</key>
+	<string></string>
+	<key>CFBundleIdentifier</key>
+	<string>com.google.${PRODUCT_NAME}</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundlePackageType</key>
+	<string>FMWK</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>GTEST_VERSIONINFO_LONG</string>
+	<key>CFBundleShortVersionString</key>
+	<string>GTEST_VERSIONINFO_SHORT</string>
+	<key>CFBundleGetInfoString</key>
+	<string>${PRODUCT_NAME} GTEST_VERSIONINFO_LONG, ${GTEST_VERSIONINFO_ABOUT}</string>
+	<key>NSHumanReadableCopyright</key>
+	<string>${GTEST_VERSIONINFO_ABOUT}</string>
+	<key>CSResourcesFileMapped</key>
+	<true/>
+</dict>
+</plist>
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/Info.plist b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/Info.plist
new file mode 100644
index 000000000..f3852edea
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/Info.plist
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>English</string>
+	<key>CFBundleExecutable</key>
+	<string>${EXECUTABLE_NAME}</string>
+	<key>CFBundleIconFile</key>
+	<string></string>
+	<key>CFBundleIdentifier</key>
+	<string>com.google.gtest.${PRODUCT_NAME:identifier}</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>${PRODUCT_NAME}</string>
+	<key>CFBundlePackageType</key>
+	<string>FMWK</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleSignature</key>
+	<string>????</string>
+	<key>CFBundleVersion</key>
+	<string>1.0</string>
+	<key>CSResourcesFileMapped</key>
+	<true/>
+</dict>
+</plist>
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/WidgetFramework.xcodeproj/project.pbxproj b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/WidgetFramework.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..497617eb6
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/WidgetFramework.xcodeproj/project.pbxproj
@@ -0,0 +1,457 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 42;
+	objects = {
+
+/* Begin PBXAggregateTarget section */
+		4024D162113D7D2400C7059E /* Test */ = {
+			isa = PBXAggregateTarget;
+			buildConfigurationList = 4024D169113D7D4600C7059E /* Build configuration list for PBXAggregateTarget "Test" */;
+			buildPhases = (
+				4024D161113D7D2400C7059E /* ShellScript */,
+			);
+			dependencies = (
+				4024D166113D7D3100C7059E /* PBXTargetDependency */,
+			);
+			name = Test;
+			productName = TestAndBuild;
+		};
+		4024D1E9113D83FF00C7059E /* TestAndBuild */ = {
+			isa = PBXAggregateTarget;
+			buildConfigurationList = 4024D1F0113D842B00C7059E /* Build configuration list for PBXAggregateTarget "TestAndBuild" */;
+			buildPhases = (
+			);
+			dependencies = (
+				4024D1ED113D840900C7059E /* PBXTargetDependency */,
+				4024D1EF113D840D00C7059E /* PBXTargetDependency */,
+			);
+			name = TestAndBuild;
+			productName = TestAndBuild;
+		};
+/* End PBXAggregateTarget section */
+
+/* Begin PBXBuildFile section */
+		3B7EB1250E5AEE3500C7F239 /* widget.cc in Sources */ = {isa = PBXBuildFile; fileRef = 3B7EB1230E5AEE3500C7F239 /* widget.cc */; };
+		3B7EB1260E5AEE3500C7F239 /* widget.h in Headers */ = {isa = PBXBuildFile; fileRef = 3B7EB1240E5AEE3500C7F239 /* widget.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		3B7EB1280E5AEE4600C7F239 /* widget_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 3B7EB1270E5AEE4600C7F239 /* widget_test.cc */; };
+		3B7EB1480E5AF3B400C7F239 /* Widget.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8D07F2C80486CC7A007CD1D0 /* Widget.framework */; };
+		4024D188113D7D7800C7059E /* libgtest.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 4024D185113D7D5500C7059E /* libgtest.a */; };
+		4024D189113D7D7A00C7059E /* libgtest_main.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 4024D183113D7D5500C7059E /* libgtest_main.a */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+		3B07BDF00E3F3FAE00647869 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 8D07F2BC0486CC7A007CD1D0;
+			remoteInfo = gTestExample;
+		};
+		4024D165113D7D3100C7059E /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 3B07BDE90E3F3F9E00647869;
+			remoteInfo = WidgetFrameworkTest;
+		};
+		4024D1EC113D840900C7059E /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 8D07F2BC0486CC7A007CD1D0;
+			remoteInfo = WidgetFramework;
+		};
+		4024D1EE113D840D00C7059E /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 4024D162113D7D2400C7059E;
+			remoteInfo = Test;
+		};
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXFileReference section */
+		3B07BDEA0E3F3F9E00647869 /* WidgetFrameworkTest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = WidgetFrameworkTest; sourceTree = BUILT_PRODUCTS_DIR; };
+		3B7EB1230E5AEE3500C7F239 /* widget.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = widget.cc; sourceTree = "<group>"; };
+		3B7EB1240E5AEE3500C7F239 /* widget.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = widget.h; sourceTree = "<group>"; };
+		3B7EB1270E5AEE4600C7F239 /* widget_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = widget_test.cc; sourceTree = "<group>"; };
+		4024D183113D7D5500C7059E /* libgtest_main.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libgtest_main.a; path = /usr/local/lib/libgtest_main.a; sourceTree = "<absolute>"; };
+		4024D185113D7D5500C7059E /* libgtest.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libgtest.a; path = /usr/local/lib/libgtest.a; sourceTree = "<absolute>"; };
+		4024D1E2113D838200C7059E /* runtests.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = runtests.sh; sourceTree = "<group>"; };
+		8D07F2C70486CC7A007CD1D0 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist; path = Info.plist; sourceTree = "<group>"; };
+		8D07F2C80486CC7A007CD1D0 /* Widget.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Widget.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		3B07BDE80E3F3F9E00647869 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				4024D189113D7D7A00C7059E /* libgtest_main.a in Frameworks */,
+				4024D188113D7D7800C7059E /* libgtest.a in Frameworks */,
+				3B7EB1480E5AF3B400C7F239 /* Widget.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		8D07F2C30486CC7A007CD1D0 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		034768DDFF38A45A11DB9C8B /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				8D07F2C80486CC7A007CD1D0 /* Widget.framework */,
+				3B07BDEA0E3F3F9E00647869 /* WidgetFrameworkTest */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		0867D691FE84028FC02AAC07 /* gTestExample */ = {
+			isa = PBXGroup;
+			children = (
+				4024D1E1113D836C00C7059E /* Scripts */,
+				08FB77ACFE841707C02AAC07 /* Source */,
+				089C1665FE841158C02AAC07 /* Resources */,
+				3B07BE350E4094E400647869 /* Test */,
+				0867D69AFE84028FC02AAC07 /* External Frameworks and Libraries */,
+				034768DDFF38A45A11DB9C8B /* Products */,
+			);
+			name = gTestExample;
+			sourceTree = "<group>";
+		};
+		0867D69AFE84028FC02AAC07 /* External Frameworks and Libraries */ = {
+			isa = PBXGroup;
+			children = (
+				4024D183113D7D5500C7059E /* libgtest_main.a */,
+				4024D185113D7D5500C7059E /* libgtest.a */,
+			);
+			name = "External Frameworks and Libraries";
+			sourceTree = "<group>";
+		};
+		089C1665FE841158C02AAC07 /* Resources */ = {
+			isa = PBXGroup;
+			children = (
+				8D07F2C70486CC7A007CD1D0 /* Info.plist */,
+			);
+			name = Resources;
+			sourceTree = "<group>";
+		};
+		08FB77ACFE841707C02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				3B7EB1230E5AEE3500C7F239 /* widget.cc */,
+				3B7EB1240E5AEE3500C7F239 /* widget.h */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		3B07BE350E4094E400647869 /* Test */ = {
+			isa = PBXGroup;
+			children = (
+				3B7EB1270E5AEE4600C7F239 /* widget_test.cc */,
+			);
+			name = Test;
+			sourceTree = "<group>";
+		};
+		4024D1E1113D836C00C7059E /* Scripts */ = {
+			isa = PBXGroup;
+			children = (
+				4024D1E2113D838200C7059E /* runtests.sh */,
+			);
+			name = Scripts;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		8D07F2BD0486CC7A007CD1D0 /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				3B7EB1260E5AEE3500C7F239 /* widget.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		3B07BDE90E3F3F9E00647869 /* WidgetFrameworkTest */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 3B07BDF40E3F3FB600647869 /* Build configuration list for PBXNativeTarget "WidgetFrameworkTest" */;
+			buildPhases = (
+				3B07BDE70E3F3F9E00647869 /* Sources */,
+				3B07BDE80E3F3F9E00647869 /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				3B07BDF10E3F3FAE00647869 /* PBXTargetDependency */,
+			);
+			name = WidgetFrameworkTest;
+			productName = gTestExampleTest;
+			productReference = 3B07BDEA0E3F3F9E00647869 /* WidgetFrameworkTest */;
+			productType = "com.apple.product-type.tool";
+		};
+		8D07F2BC0486CC7A007CD1D0 /* WidgetFramework */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 4FADC24208B4156D00ABE55E /* Build configuration list for PBXNativeTarget "WidgetFramework" */;
+			buildPhases = (
+				8D07F2C10486CC7A007CD1D0 /* Sources */,
+				8D07F2C30486CC7A007CD1D0 /* Frameworks */,
+				8D07F2BD0486CC7A007CD1D0 /* Headers */,
+				8D07F2BF0486CC7A007CD1D0 /* Resources */,
+				8D07F2C50486CC7A007CD1D0 /* Rez */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = WidgetFramework;
+			productInstallPath = "$(HOME)/Library/Frameworks";
+			productName = gTestExample;
+			productReference = 8D07F2C80486CC7A007CD1D0 /* Widget.framework */;
+			productType = "com.apple.product-type.framework";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		0867D690FE84028FC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 4FADC24608B4156D00ABE55E /* Build configuration list for PBXProject "WidgetFramework" */;
+			compatibilityVersion = "Xcode 2.4";
+			hasScannedForEncodings = 1;
+			mainGroup = 0867D691FE84028FC02AAC07 /* gTestExample */;
+			productRefGroup = 034768DDFF38A45A11DB9C8B /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				8D07F2BC0486CC7A007CD1D0 /* WidgetFramework */,
+				3B07BDE90E3F3F9E00647869 /* WidgetFrameworkTest */,
+				4024D162113D7D2400C7059E /* Test */,
+				4024D1E9113D83FF00C7059E /* TestAndBuild */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		8D07F2BF0486CC7A007CD1D0 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXRezBuildPhase section */
+		8D07F2C50486CC7A007CD1D0 /* Rez */ = {
+			isa = PBXRezBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXRezBuildPhase section */
+
+/* Begin PBXShellScriptBuildPhase section */
+		4024D161113D7D2400C7059E /* ShellScript */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+			);
+			outputPaths = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "/bin/bash $SRCROOT/runtests.sh $BUILT_PRODUCTS_DIR/WidgetFrameworkTest\n";
+		};
+/* End PBXShellScriptBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		3B07BDE70E3F3F9E00647869 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				3B7EB1280E5AEE4600C7F239 /* widget_test.cc in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		8D07F2C10486CC7A007CD1D0 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				3B7EB1250E5AEE3500C7F239 /* widget.cc in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+		3B07BDF10E3F3FAE00647869 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 8D07F2BC0486CC7A007CD1D0 /* WidgetFramework */;
+			targetProxy = 3B07BDF00E3F3FAE00647869 /* PBXContainerItemProxy */;
+		};
+		4024D166113D7D3100C7059E /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 3B07BDE90E3F3F9E00647869 /* WidgetFrameworkTest */;
+			targetProxy = 4024D165113D7D3100C7059E /* PBXContainerItemProxy */;
+		};
+		4024D1ED113D840900C7059E /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 8D07F2BC0486CC7A007CD1D0 /* WidgetFramework */;
+			targetProxy = 4024D1EC113D840900C7059E /* PBXContainerItemProxy */;
+		};
+		4024D1EF113D840D00C7059E /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 4024D162113D7D2400C7059E /* Test */;
+			targetProxy = 4024D1EE113D840D00C7059E /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+		3B07BDEC0E3F3F9F00647869 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = WidgetFrameworkTest;
+			};
+			name = Debug;
+		};
+		3B07BDED0E3F3F9F00647869 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = WidgetFrameworkTest;
+			};
+			name = Release;
+		};
+		4024D163113D7D2400C7059E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = TestAndBuild;
+			};
+			name = Debug;
+		};
+		4024D164113D7D2400C7059E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = TestAndBuild;
+			};
+			name = Release;
+		};
+		4024D1EA113D83FF00C7059E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = TestAndBuild;
+			};
+			name = Debug;
+		};
+		4024D1EB113D83FF00C7059E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				PRODUCT_NAME = TestAndBuild;
+			};
+			name = Release;
+		};
+		4FADC24308B4156D00ABE55E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				FRAMEWORK_VERSION = A;
+				INFOPLIST_FILE = Info.plist;
+				INSTALL_PATH = "@loader_path/../Frameworks";
+				PRODUCT_NAME = Widget;
+			};
+			name = Debug;
+		};
+		4FADC24408B4156D00ABE55E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				FRAMEWORK_VERSION = A;
+				INFOPLIST_FILE = Info.plist;
+				INSTALL_PATH = "@loader_path/../Frameworks";
+				PRODUCT_NAME = Widget;
+			};
+			name = Release;
+		};
+		4FADC24708B4156D00ABE55E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_VERSION = 4.0;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+			};
+			name = Debug;
+		};
+		4FADC24808B4156D00ABE55E /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_VERSION = 4.0;
+				SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		3B07BDF40E3F3FB600647869 /* Build configuration list for PBXNativeTarget "WidgetFrameworkTest" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				3B07BDEC0E3F3F9F00647869 /* Debug */,
+				3B07BDED0E3F3F9F00647869 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		4024D169113D7D4600C7059E /* Build configuration list for PBXAggregateTarget "Test" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				4024D163113D7D2400C7059E /* Debug */,
+				4024D164113D7D2400C7059E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		4024D1F0113D842B00C7059E /* Build configuration list for PBXAggregateTarget "TestAndBuild" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				4024D1EA113D83FF00C7059E /* Debug */,
+				4024D1EB113D83FF00C7059E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		4FADC24208B4156D00ABE55E /* Build configuration list for PBXNativeTarget "WidgetFramework" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				4FADC24308B4156D00ABE55E /* Debug */,
+				4FADC24408B4156D00ABE55E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		4FADC24608B4156D00ABE55E /* Build configuration list for PBXProject "WidgetFramework" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				4FADC24708B4156D00ABE55E /* Debug */,
+				4FADC24808B4156D00ABE55E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 0867D690FE84028FC02AAC07 /* Project object */;
+}
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/runtests.sh b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/runtests.sh
new file mode 100644
index 000000000..4a0d413e5
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/runtests.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+#
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Executes the samples and tests for the Google Test Framework.
+
+# Help the dynamic linker find the path to the libraries.
+export DYLD_FRAMEWORK_PATH=$BUILT_PRODUCTS_DIR
+export DYLD_LIBRARY_PATH=$BUILT_PRODUCTS_DIR
+
+# Create some executables.
+test_executables=$@
+
+# Now execute each one in turn keeping track of how many succeeded and failed.
+succeeded=0
+failed=0
+failed_list=()
+for test in ${test_executables[*]}; do
+  "$test"
+  result=$?
+  if [ $result -eq 0 ]; then
+    succeeded=$(( $succeeded + 1 ))
+  else
+    failed=$(( failed + 1 ))
+    failed_list="$failed_list $test"
+  fi
+done
+
+# Report the successes and failures to the console.
+echo "Tests complete with $succeeded successes and $failed failures."
+if [ $failed -ne 0 ]; then
+  echo "The following tests failed:"
+  echo $failed_list
+fi
+exit $failed
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/widget.cc b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/widget.cc
new file mode 100644
index 000000000..bfc4e7fcf
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/widget.cc
@@ -0,0 +1,63 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: preston.a.jackson@gmail.com (Preston Jackson)
+//
+// Google Test - FrameworkSample
+// widget.cc
+//
+
+// Widget is a very simple class used for demonstrating the use of gtest
+
+#include "widget.h"
+
+Widget::Widget(int number, const std::string& name)
+    : number_(number),
+      name_(name) {}
+
+Widget::~Widget() {}
+
+float Widget::GetFloatValue() const {
+  return number_;
+}
+
+int Widget::GetIntValue() const {
+  return static_cast<int>(number_);
+}
+
+std::string Widget::GetStringValue() const {
+  return name_;
+}
+
+void Widget::GetCharPtrValue(char* buffer, size_t max_size) const {
+  // Copy the char* representation of name_ into buffer, up to max_size.
+  strncpy(buffer, name_.c_str(), max_size-1);
+  buffer[max_size-1] = '\0';
+  return;
+}
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/widget.h b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/widget.h
new file mode 100644
index 000000000..0c55cdc8c
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/widget.h
@@ -0,0 +1,59 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: preston.a.jackson@gmail.com (Preston Jackson)
+//
+// Google Test - FrameworkSample
+// widget.h
+//
+
+// Widget is a very simple class used for demonstrating the use of gtest. It
+// simply stores two values a string and an integer, which are returned via
+// public accessors in multiple forms.
+
+#import <string>
+
+class Widget {
+ public:
+  Widget(int number, const std::string& name);
+  ~Widget();
+
+  // Public accessors to number data
+  float GetFloatValue() const;
+  int GetIntValue() const;
+
+  // Public accessors to the string data
+  std::string GetStringValue() const;
+  void GetCharPtrValue(char* buffer, size_t max_size) const;
+
+ private:
+  // Data members
+  float number_;
+  std::string name_;
+};
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/widget_test.cc b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/widget_test.cc
new file mode 100644
index 000000000..872599421
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Samples/FrameworkSample/widget_test.cc
@@ -0,0 +1,68 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: preston.a.jackson@gmail.com (Preston Jackson)
+//
+// Google Test - FrameworkSample
+// widget_test.cc
+//
+
+// This is a simple test file for the Widget class in the Widget.framework
+
+#include <string>
+#include "gtest/gtest.h"
+
+#include <Widget/widget.h>
+
+// This test verifies that the constructor sets the internal state of the
+// Widget class correctly.
+TEST(WidgetInitializerTest, TestConstructor) {
+  Widget widget(1.0f, "name");
+  EXPECT_FLOAT_EQ(1.0f, widget.GetFloatValue());
+  EXPECT_EQ(std::string("name"), widget.GetStringValue());
+}
+
+// This test verifies the conversion of the float and string values to int and
+// char*, respectively.
+TEST(WidgetInitializerTest, TestConversion) {
+  Widget widget(1.0f, "name");
+  EXPECT_EQ(1, widget.GetIntValue());
+
+  size_t max_size = 128;
+  char buffer[max_size];
+  widget.GetCharPtrValue(buffer, max_size);
+  EXPECT_STREQ("name", buffer);
+}
+
+// Use the Google Test main that is linked into the framework. It does something
+// like this:
+// int main(int argc, char** argv) {
+//   testing::InitGoogleTest(&argc, argv);
+//   return RUN_ALL_TESTS();
+// }
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Scripts/runtests.sh b/inference-engine/tests/libs/gtest/googletest/xcode/Scripts/runtests.sh
new file mode 100644
index 000000000..3fc229f1d
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Scripts/runtests.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Executes the samples and tests for the Google Test Framework.
+
+# Help the dynamic linker find the path to the libraries.
+export DYLD_FRAMEWORK_PATH=$BUILT_PRODUCTS_DIR
+export DYLD_LIBRARY_PATH=$BUILT_PRODUCTS_DIR
+
+# Create some executables.
+test_executables=("$BUILT_PRODUCTS_DIR/gtest_unittest-framework"
+                  "$BUILT_PRODUCTS_DIR/gtest_unittest"
+                  "$BUILT_PRODUCTS_DIR/sample1_unittest-framework"
+                  "$BUILT_PRODUCTS_DIR/sample1_unittest-static")
+
+# Now execute each one in turn keeping track of how many succeeded and failed. 
+succeeded=0
+failed=0
+failed_list=()
+for test in ${test_executables[*]}; do
+  "$test"
+  result=$?
+  if [ $result -eq 0 ]; then
+    succeeded=$(( $succeeded + 1 ))
+  else
+    failed=$(( failed + 1 ))
+    failed_list="$failed_list $test"
+  fi
+done
+
+# Report the successes and failures to the console.
+echo "Tests complete with $succeeded successes and $failed failures."
+if [ $failed -ne 0 ]; then
+  echo "The following tests failed:"
+  echo $failed_list
+fi
+exit $failed
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/Scripts/versiongenerate.py b/inference-engine/tests/libs/gtest/googletest/xcode/Scripts/versiongenerate.py
new file mode 100644
index 000000000..16791d253
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/Scripts/versiongenerate.py
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+#
+# Copyright 2008, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""A script to prepare version informtion for use the gtest Info.plist file.
+
+  This script extracts the version information from the configure.ac file and
+  uses it to generate a header file containing the same information. The
+  #defines in this header file will be included in during the generation of
+  the Info.plist of the framework, giving the correct value to the version
+  shown in the Finder.
+
+  This script makes the following assumptions (these are faults of the script,
+  not problems with the Autoconf):
+    1. The AC_INIT macro will be contained within the first 1024 characters
+       of configure.ac
+    2. The version string will be 3 integers separated by periods and will be
+       surrounded by square brackets, "[" and "]" (e.g. [1.0.1]). The first
+       segment represents the major version, the second represents the minor
+       version and the third represents the fix version.
+    3. No ")" character exists between the opening "(" and closing ")" of
+       AC_INIT, including in comments and character strings.
+"""
+
+import sys
+import re
+
+# Read the command line argument (the output directory for Version.h)
+if (len(sys.argv) < 3):
+  print "Usage: versiongenerate.py input_dir output_dir"
+  sys.exit(1)
+else:
+  input_dir = sys.argv[1]
+  output_dir = sys.argv[2]
+
+# Read the first 1024 characters of the configure.ac file
+config_file = open("%s/configure.ac" % input_dir, 'r')
+buffer_size = 1024
+opening_string = config_file.read(buffer_size)
+config_file.close()
+
+# Extract the version string from the AC_INIT macro
+#   The following init_expression means:
+#     Extract three integers separated by periods and surrounded by square
+#     brackets(e.g. "[1.0.1]") between "AC_INIT(" and ")". Do not be greedy
+#     (*? is the non-greedy flag) since that would pull in everything between
+#     the first "(" and the last ")" in the file.
+version_expression = re.compile(r"AC_INIT\(.*?\[(\d+)\.(\d+)\.(\d+)\].*?\)",
+                                re.DOTALL)
+version_values = version_expression.search(opening_string)
+major_version = version_values.group(1)
+minor_version = version_values.group(2)
+fix_version = version_values.group(3)
+
+# Write the version information to a header file to be included in the
+# Info.plist file.
+file_data = """//
+// DO NOT MODIFY THIS FILE (but you can delete it)
+//
+// This file is autogenerated by the versiongenerate.py script. This script
+// is executed in a "Run Script" build phase when creating gtest.framework. This
+// header file is not used during compilation of C-source. Rather, it simply
+// defines some version strings for substitution in the Info.plist. Because of
+// this, we are not restricted to C-syntax nor are we using include guards.
+//
+
+#define GTEST_VERSIONINFO_SHORT %s.%s
+#define GTEST_VERSIONINFO_LONG %s.%s.%s
+
+""" % (major_version, minor_version, major_version, minor_version, fix_version)
+version_file = open("%s/Version.h" % output_dir, 'w')
+version_file.write(file_data)
+version_file.close()
diff --git a/inference-engine/tests/libs/gtest/googletest/xcode/gtest.xcodeproj/project.pbxproj b/inference-engine/tests/libs/gtest/googletest/xcode/gtest.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..003bff8cb
--- /dev/null
+++ b/inference-engine/tests/libs/gtest/googletest/xcode/gtest.xcodeproj/project.pbxproj
@@ -0,0 +1,1182 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 46;
+	objects = {
+
+/* Begin PBXAggregateTarget section */
+		3B238F5F0E828B5400846E11 /* Check */ = {
+			isa = PBXAggregateTarget;
+			buildConfigurationList = 3B238FA30E828BB600846E11 /* Build configuration list for PBXAggregateTarget "Check" */;
+			buildPhases = (
+				3B238F5E0E828B5400846E11 /* ShellScript */,
+			);
+			dependencies = (
+				40899F9D0FFA740F000B29AE /* PBXTargetDependency */,
+				40C849F7101A43440083642A /* PBXTargetDependency */,
+				4089A0980FFAD34A000B29AE /* PBXTargetDependency */,
+				40C849F9101A43490083642A /* PBXTargetDependency */,
+			);
+			name = Check;
+			productName = Check;
+		};
+		40C44ADC0E3798F4008FCC51 /* Version Info */ = {
+			isa = PBXAggregateTarget;
+			buildConfigurationList = 40C44AE40E379905008FCC51 /* Build configuration list for PBXAggregateTarget "Version Info" */;
+			buildPhases = (
+				40C44ADB0E3798F4008FCC51 /* Generate Version.h */,
+			);
+			comments = "The generation of Version.h must be performed in its own target. Since the Info.plist is preprocessed before any of the other build phases in gtest, the Version.h file would not be ready if included as a build phase of that target.";
+			dependencies = (
+			);
+			name = "Version Info";
+			productName = Version.h;
+		};
+/* End PBXAggregateTarget section */
+
+/* Begin PBXBuildFile section */
+		224A12A30E9EADCC00BD17FD /* gtest-test-part.h in Headers */ = {isa = PBXBuildFile; fileRef = 224A12A20E9EADCC00BD17FD /* gtest-test-part.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		3BF6F2A00E79B5AD000F2EEE /* gtest-type-util.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = 3BF6F29F0E79B5AD000F2EEE /* gtest-type-util.h */; };
+		3BF6F2A50E79B616000F2EEE /* gtest-typed-test.h in Headers */ = {isa = PBXBuildFile; fileRef = 3BF6F2A40E79B616000F2EEE /* gtest-typed-test.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		404884380E2F799B00CF7658 /* gtest-death-test.h in Headers */ = {isa = PBXBuildFile; fileRef = 404883DB0E2F799B00CF7658 /* gtest-death-test.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		404884390E2F799B00CF7658 /* gtest-message.h in Headers */ = {isa = PBXBuildFile; fileRef = 404883DC0E2F799B00CF7658 /* gtest-message.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		4048843A0E2F799B00CF7658 /* gtest-spi.h in Headers */ = {isa = PBXBuildFile; fileRef = 404883DD0E2F799B00CF7658 /* gtest-spi.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		4048843B0E2F799B00CF7658 /* gtest.h in Headers */ = {isa = PBXBuildFile; fileRef = 404883DE0E2F799B00CF7658 /* gtest.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		4048843C0E2F799B00CF7658 /* gtest_pred_impl.h in Headers */ = {isa = PBXBuildFile; fileRef = 404883DF0E2F799B00CF7658 /* gtest_pred_impl.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		4048843D0E2F799B00CF7658 /* gtest_prod.h in Headers */ = {isa = PBXBuildFile; fileRef = 404883E00E2F799B00CF7658 /* gtest_prod.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		404884500E2F799B00CF7658 /* README.md in Resources */ = {isa = PBXBuildFile; fileRef = 404883F60E2F799B00CF7658 /* README.md */; };
+		404884A00E2F7BE600CF7658 /* gtest-death-test-internal.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = 404883E20E2F799B00CF7658 /* gtest-death-test-internal.h */; };
+		404884A10E2F7BE600CF7658 /* gtest-filepath.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = 404883E30E2F799B00CF7658 /* gtest-filepath.h */; };
+		404884A20E2F7BE600CF7658 /* gtest-internal.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = 404883E40E2F799B00CF7658 /* gtest-internal.h */; };
+		404884A30E2F7BE600CF7658 /* gtest-port.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = 404883E50E2F799B00CF7658 /* gtest-port.h */; };
+		404884A40E2F7BE600CF7658 /* gtest-string.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = 404883E60E2F799B00CF7658 /* gtest-string.h */; };
+		404884AC0E2F7CD900CF7658 /* CHANGES in Resources */ = {isa = PBXBuildFile; fileRef = 404884A90E2F7CD900CF7658 /* CHANGES */; };
+		404884AD0E2F7CD900CF7658 /* CONTRIBUTORS in Resources */ = {isa = PBXBuildFile; fileRef = 404884AA0E2F7CD900CF7658 /* CONTRIBUTORS */; };
+		404884AE0E2F7CD900CF7658 /* LICENSE in Resources */ = {isa = PBXBuildFile; fileRef = 404884AB0E2F7CD900CF7658 /* LICENSE */; };
+		40899F3A0FFA70D4000B29AE /* gtest-all.cc in Sources */ = {isa = PBXBuildFile; fileRef = 224A12A10E9EADA700BD17FD /* gtest-all.cc */; };
+		40899F500FFA7281000B29AE /* gtest-tuple.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = 40899F4D0FFA7271000B29AE /* gtest-tuple.h */; };
+		40899F530FFA72A0000B29AE /* gtest_unittest.cc in Sources */ = {isa = PBXBuildFile; fileRef = 3B238C120E7FE13C00846E11 /* gtest_unittest.cc */; };
+		4089A0440FFAD1BE000B29AE /* sample1.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4089A02C0FFACF7F000B29AE /* sample1.cc */; };
+		4089A0460FFAD1BE000B29AE /* sample1_unittest.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4089A02E0FFACF7F000B29AE /* sample1_unittest.cc */; };
+		40C848FF101A21150083642A /* gtest-all.cc in Sources */ = {isa = PBXBuildFile; fileRef = 224A12A10E9EADA700BD17FD /* gtest-all.cc */; };
+		40C84915101A21DF0083642A /* gtest_main.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4048840D0E2F799B00CF7658 /* gtest_main.cc */; };
+		40C84916101A235B0083642A /* libgtest_main.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 40C8490B101A217E0083642A /* libgtest_main.a */; };
+		40C84921101A23AD0083642A /* libgtest_main.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 40C8490B101A217E0083642A /* libgtest_main.a */; };
+		40C84978101A36540083642A /* libgtest_main.a in Resources */ = {isa = PBXBuildFile; fileRef = 40C8490B101A217E0083642A /* libgtest_main.a */; };
+		40C84980101A36850083642A /* gtest_unittest.cc in Sources */ = {isa = PBXBuildFile; fileRef = 3B238C120E7FE13C00846E11 /* gtest_unittest.cc */; };
+		40C84982101A36850083642A /* libgtest.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 40C848FA101A209C0083642A /* libgtest.a */; };
+		40C84983101A36850083642A /* libgtest_main.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 40C8490B101A217E0083642A /* libgtest_main.a */; };
+		40C8498F101A36A60083642A /* sample1.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4089A02C0FFACF7F000B29AE /* sample1.cc */; };
+		40C84990101A36A60083642A /* sample1_unittest.cc in Sources */ = {isa = PBXBuildFile; fileRef = 4089A02E0FFACF7F000B29AE /* sample1_unittest.cc */; };
+		40C84992101A36A60083642A /* libgtest.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 40C848FA101A209C0083642A /* libgtest.a */; };
+		40C84993101A36A60083642A /* libgtest_main.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 40C8490B101A217E0083642A /* libgtest_main.a */; };
+		40C849A2101A37050083642A /* gtest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4539C8FF0EC27F6400A70F4C /* gtest.framework */; };
+		40C849A4101A37150083642A /* gtest.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4539C8FF0EC27F6400A70F4C /* gtest.framework */; };
+		4539C9340EC280AE00A70F4C /* gtest-param-test.h in Headers */ = {isa = PBXBuildFile; fileRef = 4539C9330EC280AE00A70F4C /* gtest-param-test.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		4539C9380EC280E200A70F4C /* gtest-linked_ptr.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = 4539C9350EC280E200A70F4C /* gtest-linked_ptr.h */; };
+		4539C9390EC280E200A70F4C /* gtest-param-util-generated.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = 4539C9360EC280E200A70F4C /* gtest-param-util-generated.h */; };
+		4539C93A0EC280E200A70F4C /* gtest-param-util.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = 4539C9370EC280E200A70F4C /* gtest-param-util.h */; };
+		4567C8181264FF71007740BE /* gtest-printers.h in Headers */ = {isa = PBXBuildFile; fileRef = 4567C8171264FF71007740BE /* gtest-printers.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		F67D4F3E1C7F5D8B0017C729 /* gtest-port-arch.h in Headers */ = {isa = PBXBuildFile; fileRef = F67D4F3D1C7F5D8B0017C729 /* gtest-port-arch.h */; };
+		F67D4F3F1C7F5DA70017C729 /* gtest-port-arch.h in Copy Headers Internal */ = {isa = PBXBuildFile; fileRef = F67D4F3D1C7F5D8B0017C729 /* gtest-port-arch.h */; };
+		F67D4F441C7F5DD00017C729 /* gtest-port.h in Headers */ = {isa = PBXBuildFile; fileRef = F67D4F411C7F5DD00017C729 /* gtest-port.h */; };
+		F67D4F451C7F5DD00017C729 /* gtest-printers.h in Headers */ = {isa = PBXBuildFile; fileRef = F67D4F421C7F5DD00017C729 /* gtest-printers.h */; };
+		F67D4F461C7F5DD00017C729 /* gtest.h in Headers */ = {isa = PBXBuildFile; fileRef = F67D4F431C7F5DD00017C729 /* gtest.h */; };
+		F67D4F481C7F5E160017C729 /* gtest-port.h in Copy Headers Internal Custom */ = {isa = PBXBuildFile; fileRef = F67D4F411C7F5DD00017C729 /* gtest-port.h */; };
+		F67D4F491C7F5E260017C729 /* gtest-printers.h in Copy Headers Internal Custom */ = {isa = PBXBuildFile; fileRef = F67D4F421C7F5DD00017C729 /* gtest-printers.h */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+		40899F9C0FFA740F000B29AE /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 40899F420FFA7184000B29AE;
+			remoteInfo = gtest_unittest;
+		};
+		4089A0970FFAD34A000B29AE /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 4089A0120FFACEFC000B29AE;
+			remoteInfo = sample1_unittest;
+		};
+		408BEC0F1046CFE900DEF522 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 40C848F9101A209C0083642A;
+			remoteInfo = "gtest-static";
+		};
+		40C44AE50E379922008FCC51 /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 40C44ADC0E3798F4008FCC51;
+			remoteInfo = Version.h;
+		};
+		40C8497C101A36850083642A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 40C848F9101A209C0083642A;
+			remoteInfo = "gtest-static";
+		};
+		40C8497E101A36850083642A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 40C8490A101A217E0083642A;
+			remoteInfo = "gtest_main-static";
+		};
+		40C8498B101A36A60083642A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 40C848F9101A209C0083642A;
+			remoteInfo = "gtest-static";
+		};
+		40C8498D101A36A60083642A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 40C8490A101A217E0083642A;
+			remoteInfo = "gtest_main-static";
+		};
+		40C8499B101A36DC0083642A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 40C8490A101A217E0083642A;
+			remoteInfo = "gtest_main-static";
+		};
+		40C8499D101A36E50083642A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 8D07F2BC0486CC7A007CD1D0;
+			remoteInfo = "gtest-framework";
+		};
+		40C8499F101A36F10083642A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 8D07F2BC0486CC7A007CD1D0;
+			remoteInfo = "gtest-framework";
+		};
+		40C849F6101A43440083642A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 40C8497A101A36850083642A;
+			remoteInfo = "gtest_unittest-static";
+		};
+		40C849F8101A43490083642A /* PBXContainerItemProxy */ = {
+			isa = PBXContainerItemProxy;
+			containerPortal = 0867D690FE84028FC02AAC07 /* Project object */;
+			proxyType = 1;
+			remoteGlobalIDString = 40C84989101A36A60083642A;
+			remoteInfo = "sample1_unittest-static";
+		};
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		404884A50E2F7C0400CF7658 /* Copy Headers Internal */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = Headers/internal;
+			dstSubfolderSpec = 6;
+			files = (
+				F67D4F3F1C7F5DA70017C729 /* gtest-port-arch.h in Copy Headers Internal */,
+				404884A00E2F7BE600CF7658 /* gtest-death-test-internal.h in Copy Headers Internal */,
+				404884A10E2F7BE600CF7658 /* gtest-filepath.h in Copy Headers Internal */,
+				404884A20E2F7BE600CF7658 /* gtest-internal.h in Copy Headers Internal */,
+				4539C9380EC280E200A70F4C /* gtest-linked_ptr.h in Copy Headers Internal */,
+				4539C9390EC280E200A70F4C /* gtest-param-util-generated.h in Copy Headers Internal */,
+				4539C93A0EC280E200A70F4C /* gtest-param-util.h in Copy Headers Internal */,
+				404884A30E2F7BE600CF7658 /* gtest-port.h in Copy Headers Internal */,
+				404884A40E2F7BE600CF7658 /* gtest-string.h in Copy Headers Internal */,
+				40899F500FFA7281000B29AE /* gtest-tuple.h in Copy Headers Internal */,
+				3BF6F2A00E79B5AD000F2EEE /* gtest-type-util.h in Copy Headers Internal */,
+			);
+			name = "Copy Headers Internal";
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		F67D4F471C7F5DF60017C729 /* Copy Headers Internal Custom */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = Headers/internal/custom;
+			dstSubfolderSpec = 6;
+			files = (
+				F67D4F491C7F5E260017C729 /* gtest-printers.h in Copy Headers Internal Custom */,
+				F67D4F481C7F5E160017C729 /* gtest-port.h in Copy Headers Internal Custom */,
+			);
+			name = "Copy Headers Internal Custom";
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		224A12A10E9EADA700BD17FD /* gtest-all.cc */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = "gtest-all.cc"; sourceTree = "<group>"; };
+		224A12A20E9EADCC00BD17FD /* gtest-test-part.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = "gtest-test-part.h"; sourceTree = "<group>"; };
+		3B238C120E7FE13C00846E11 /* gtest_unittest.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = gtest_unittest.cc; sourceTree = "<group>"; };
+		3B87D2100E96B92E000D1852 /* runtests.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; path = runtests.sh; sourceTree = "<group>"; };
+		3BF6F29F0E79B5AD000F2EEE /* gtest-type-util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-type-util.h"; sourceTree = "<group>"; };
+		3BF6F2A40E79B616000F2EEE /* gtest-typed-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-typed-test.h"; sourceTree = "<group>"; };
+		403EE37C0E377822004BD1E2 /* versiongenerate.py */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.python; path = versiongenerate.py; sourceTree = "<group>"; };
+		404883DB0E2F799B00CF7658 /* gtest-death-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-death-test.h"; sourceTree = "<group>"; };
+		404883DC0E2F799B00CF7658 /* gtest-message.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-message.h"; sourceTree = "<group>"; };
+		404883DD0E2F799B00CF7658 /* gtest-spi.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-spi.h"; sourceTree = "<group>"; };
+		404883DE0E2F799B00CF7658 /* gtest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = gtest.h; sourceTree = "<group>"; };
+		404883DF0E2F799B00CF7658 /* gtest_pred_impl.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = gtest_pred_impl.h; sourceTree = "<group>"; };
+		404883E00E2F799B00CF7658 /* gtest_prod.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = gtest_prod.h; sourceTree = "<group>"; };
+		404883E20E2F799B00CF7658 /* gtest-death-test-internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-death-test-internal.h"; sourceTree = "<group>"; };
+		404883E30E2F799B00CF7658 /* gtest-filepath.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-filepath.h"; sourceTree = "<group>"; };
+		404883E40E2F799B00CF7658 /* gtest-internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-internal.h"; sourceTree = "<group>"; };
+		404883E50E2F799B00CF7658 /* gtest-port.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-port.h"; sourceTree = "<group>"; };
+		404883E60E2F799B00CF7658 /* gtest-string.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-string.h"; sourceTree = "<group>"; };
+		404883F60E2F799B00CF7658 /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = README.md; path = ../README.md; sourceTree = SOURCE_ROOT; };
+		4048840D0E2F799B00CF7658 /* gtest_main.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = gtest_main.cc; sourceTree = "<group>"; };
+		404884A90E2F7CD900CF7658 /* CHANGES */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = CHANGES; path = ../CHANGES; sourceTree = SOURCE_ROOT; };
+		404884AA0E2F7CD900CF7658 /* CONTRIBUTORS */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = CONTRIBUTORS; path = ../CONTRIBUTORS; sourceTree = SOURCE_ROOT; };
+		404884AB0E2F7CD900CF7658 /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = LICENSE; path = ../LICENSE; sourceTree = SOURCE_ROOT; };
+		40899F430FFA7184000B29AE /* gtest_unittest-framework */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "gtest_unittest-framework"; sourceTree = BUILT_PRODUCTS_DIR; };
+		40899F4D0FFA7271000B29AE /* gtest-tuple.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-tuple.h"; sourceTree = "<group>"; };
+		40899FB30FFA7567000B29AE /* StaticLibraryTarget.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = StaticLibraryTarget.xcconfig; sourceTree = "<group>"; };
+		4089A0130FFACEFC000B29AE /* sample1_unittest-framework */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "sample1_unittest-framework"; sourceTree = BUILT_PRODUCTS_DIR; };
+		4089A02C0FFACF7F000B29AE /* sample1.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sample1.cc; sourceTree = "<group>"; };
+		4089A02D0FFACF7F000B29AE /* sample1.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sample1.h; sourceTree = "<group>"; };
+		4089A02E0FFACF7F000B29AE /* sample1_unittest.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sample1_unittest.cc; sourceTree = "<group>"; };
+		40C848FA101A209C0083642A /* libgtest.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libgtest.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		40C8490B101A217E0083642A /* libgtest_main.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libgtest_main.a; sourceTree = BUILT_PRODUCTS_DIR; };
+		40C84987101A36850083642A /* gtest_unittest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = gtest_unittest; sourceTree = BUILT_PRODUCTS_DIR; };
+		40C84997101A36A60083642A /* sample1_unittest-static */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "sample1_unittest-static"; sourceTree = BUILT_PRODUCTS_DIR; };
+		40D4CDF10E30E07400294801 /* DebugProject.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = DebugProject.xcconfig; sourceTree = "<group>"; };
+		40D4CDF20E30E07400294801 /* FrameworkTarget.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = FrameworkTarget.xcconfig; sourceTree = "<group>"; };
+		40D4CDF30E30E07400294801 /* General.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = General.xcconfig; sourceTree = "<group>"; };
+		40D4CDF40E30E07400294801 /* ReleaseProject.xcconfig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.xcconfig; path = ReleaseProject.xcconfig; sourceTree = "<group>"; };
+		40D4CF510E30F5E200294801 /* Info.plist */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		4539C8FF0EC27F6400A70F4C /* gtest.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = gtest.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		4539C9330EC280AE00A70F4C /* gtest-param-test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-param-test.h"; sourceTree = "<group>"; };
+		4539C9350EC280E200A70F4C /* gtest-linked_ptr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-linked_ptr.h"; sourceTree = "<group>"; };
+		4539C9360EC280E200A70F4C /* gtest-param-util-generated.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-param-util-generated.h"; sourceTree = "<group>"; };
+		4539C9370EC280E200A70F4C /* gtest-param-util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-param-util.h"; sourceTree = "<group>"; };
+		4567C8171264FF71007740BE /* gtest-printers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-printers.h"; sourceTree = "<group>"; };
+		F67D4F3D1C7F5D8B0017C729 /* gtest-port-arch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-port-arch.h"; sourceTree = "<group>"; };
+		F67D4F411C7F5DD00017C729 /* gtest-port.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-port.h"; sourceTree = "<group>"; };
+		F67D4F421C7F5DD00017C729 /* gtest-printers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "gtest-printers.h"; sourceTree = "<group>"; };
+		F67D4F431C7F5DD00017C729 /* gtest.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = gtest.h; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		40899F410FFA7184000B29AE /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				40C849A4101A37150083642A /* gtest.framework in Frameworks */,
+				40C84916101A235B0083642A /* libgtest_main.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		4089A0110FFACEFC000B29AE /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				40C849A2101A37050083642A /* gtest.framework in Frameworks */,
+				40C84921101A23AD0083642A /* libgtest_main.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		40C84981101A36850083642A /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				40C84982101A36850083642A /* libgtest.a in Frameworks */,
+				40C84983101A36850083642A /* libgtest_main.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		40C84991101A36A60083642A /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				40C84992101A36A60083642A /* libgtest.a in Frameworks */,
+				40C84993101A36A60083642A /* libgtest_main.a in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		034768DDFF38A45A11DB9C8B /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				4539C8FF0EC27F6400A70F4C /* gtest.framework */,
+				40C848FA101A209C0083642A /* libgtest.a */,
+				40C8490B101A217E0083642A /* libgtest_main.a */,
+				40899F430FFA7184000B29AE /* gtest_unittest-framework */,
+				40C84987101A36850083642A /* gtest_unittest */,
+				4089A0130FFACEFC000B29AE /* sample1_unittest-framework */,
+				40C84997101A36A60083642A /* sample1_unittest-static */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		0867D691FE84028FC02AAC07 /* gtest */ = {
+			isa = PBXGroup;
+			children = (
+				40D4CDF00E30E07400294801 /* Config */,
+				08FB77ACFE841707C02AAC07 /* Source */,
+				40D4CF4E0E30F5E200294801 /* Resources */,
+				403EE37B0E377822004BD1E2 /* Scripts */,
+				034768DDFF38A45A11DB9C8B /* Products */,
+			);
+			name = gtest;
+			sourceTree = "<group>";
+		};
+		08FB77ACFE841707C02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				404884A90E2F7CD900CF7658 /* CHANGES */,
+				404884AA0E2F7CD900CF7658 /* CONTRIBUTORS */,
+				404884AB0E2F7CD900CF7658 /* LICENSE */,
+				404883F60E2F799B00CF7658 /* README.md */,
+				404883D90E2F799B00CF7658 /* include */,
+				4089A02F0FFACF84000B29AE /* samples */,
+				404884070E2F799B00CF7658 /* src */,
+				3B238BF00E7FE13B00846E11 /* test */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		3B238BF00E7FE13B00846E11 /* test */ = {
+			isa = PBXGroup;
+			children = (
+				3B238C120E7FE13C00846E11 /* gtest_unittest.cc */,
+			);
+			name = test;
+			path = ../test;
+			sourceTree = SOURCE_ROOT;
+		};
+		403EE37B0E377822004BD1E2 /* Scripts */ = {
+			isa = PBXGroup;
+			children = (
+				403EE37C0E377822004BD1E2 /* versiongenerate.py */,
+				3B87D2100E96B92E000D1852 /* runtests.sh */,
+			);
+			path = Scripts;
+			sourceTree = "<group>";
+		};
+		404883D90E2F799B00CF7658 /* include */ = {
+			isa = PBXGroup;
+			children = (
+				404883DA0E2F799B00CF7658 /* gtest */,
+			);
+			name = include;
+			path = ../include;
+			sourceTree = SOURCE_ROOT;
+		};
+		404883DA0E2F799B00CF7658 /* gtest */ = {
+			isa = PBXGroup;
+			children = (
+				404883E10E2F799B00CF7658 /* internal */,
+				224A12A20E9EADCC00BD17FD /* gtest-test-part.h */,
+				404883DB0E2F799B00CF7658 /* gtest-death-test.h */,
+				404883DC0E2F799B00CF7658 /* gtest-message.h */,
+				4539C9330EC280AE00A70F4C /* gtest-param-test.h */,
+				4567C8171264FF71007740BE /* gtest-printers.h */,
+				404883DD0E2F799B00CF7658 /* gtest-spi.h */,
+				404883DE0E2F799B00CF7658 /* gtest.h */,
+				404883DF0E2F799B00CF7658 /* gtest_pred_impl.h */,
+				404883E00E2F799B00CF7658 /* gtest_prod.h */,
+				3BF6F2A40E79B616000F2EEE /* gtest-typed-test.h */,
+			);
+			path = gtest;
+			sourceTree = "<group>";
+		};
+		404883E10E2F799B00CF7658 /* internal */ = {
+			isa = PBXGroup;
+			children = (
+				F67D4F401C7F5DD00017C729 /* custom */,
+				404883E20E2F799B00CF7658 /* gtest-death-test-internal.h */,
+				404883E30E2F799B00CF7658 /* gtest-filepath.h */,
+				404883E40E2F799B00CF7658 /* gtest-internal.h */,
+				4539C9350EC280E200A70F4C /* gtest-linked_ptr.h */,
+				4539C9360EC280E200A70F4C /* gtest-param-util-generated.h */,
+				4539C9370EC280E200A70F4C /* gtest-param-util.h */,
+				404883E50E2F799B00CF7658 /* gtest-port.h */,
+				F67D4F3D1C7F5D8B0017C729 /* gtest-port-arch.h */,
+				404883E60E2F799B00CF7658 /* gtest-string.h */,
+				40899F4D0FFA7271000B29AE /* gtest-tuple.h */,
+				3BF6F29F0E79B5AD000F2EEE /* gtest-type-util.h */,
+			);
+			path = internal;
+			sourceTree = "<group>";
+		};
+		404884070E2F799B00CF7658 /* src */ = {
+			isa = PBXGroup;
+			children = (
+				224A12A10E9EADA700BD17FD /* gtest-all.cc */,
+				4048840D0E2F799B00CF7658 /* gtest_main.cc */,
+			);
+			name = src;
+			path = ../src;
+			sourceTree = SOURCE_ROOT;
+		};
+		4089A02F0FFACF84000B29AE /* samples */ = {
+			isa = PBXGroup;
+			children = (
+				4089A02C0FFACF7F000B29AE /* sample1.cc */,
+				4089A02D0FFACF7F000B29AE /* sample1.h */,
+				4089A02E0FFACF7F000B29AE /* sample1_unittest.cc */,
+			);
+			name = samples;
+			path = ../samples;
+			sourceTree = SOURCE_ROOT;
+		};
+		40D4CDF00E30E07400294801 /* Config */ = {
+			isa = PBXGroup;
+			children = (
+				40D4CDF10E30E07400294801 /* DebugProject.xcconfig */,
+				40D4CDF20E30E07400294801 /* FrameworkTarget.xcconfig */,
+				40D4CDF30E30E07400294801 /* General.xcconfig */,
+				40D4CDF40E30E07400294801 /* ReleaseProject.xcconfig */,
+				40899FB30FFA7567000B29AE /* StaticLibraryTarget.xcconfig */,
+			);
+			path = Config;
+			sourceTree = "<group>";
+		};
+		40D4CF4E0E30F5E200294801 /* Resources */ = {
+			isa = PBXGroup;
+			children = (
+				40D4CF510E30F5E200294801 /* Info.plist */,
+			);
+			path = Resources;
+			sourceTree = "<group>";
+		};
+		F67D4F401C7F5DD00017C729 /* custom */ = {
+			isa = PBXGroup;
+			children = (
+				F67D4F411C7F5DD00017C729 /* gtest-port.h */,
+				F67D4F421C7F5DD00017C729 /* gtest-printers.h */,
+				F67D4F431C7F5DD00017C729 /* gtest.h */,
+			);
+			path = custom;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		8D07F2BD0486CC7A007CD1D0 /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				F67D4F451C7F5DD00017C729 /* gtest-printers.h in Headers */,
+				404884380E2F799B00CF7658 /* gtest-death-test.h in Headers */,
+				404884390E2F799B00CF7658 /* gtest-message.h in Headers */,
+				4539C9340EC280AE00A70F4C /* gtest-param-test.h in Headers */,
+				F67D4F461C7F5DD00017C729 /* gtest.h in Headers */,
+				F67D4F441C7F5DD00017C729 /* gtest-port.h in Headers */,
+				4567C8181264FF71007740BE /* gtest-printers.h in Headers */,
+				F67D4F3E1C7F5D8B0017C729 /* gtest-port-arch.h in Headers */,
+				3BF6F2A50E79B616000F2EEE /* gtest-typed-test.h in Headers */,
+				4048843A0E2F799B00CF7658 /* gtest-spi.h in Headers */,
+				4048843B0E2F799B00CF7658 /* gtest.h in Headers */,
+				4048843C0E2F799B00CF7658 /* gtest_pred_impl.h in Headers */,
+				4048843D0E2F799B00CF7658 /* gtest_prod.h in Headers */,
+				224A12A30E9EADCC00BD17FD /* gtest-test-part.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		40899F420FFA7184000B29AE /* gtest_unittest-framework */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 40899F4A0FFA71BC000B29AE /* Build configuration list for PBXNativeTarget "gtest_unittest-framework" */;
+			buildPhases = (
+				40899F400FFA7184000B29AE /* Sources */,
+				40899F410FFA7184000B29AE /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				40C849A0101A36F10083642A /* PBXTargetDependency */,
+			);
+			name = "gtest_unittest-framework";
+			productName = gtest_unittest;
+			productReference = 40899F430FFA7184000B29AE /* gtest_unittest-framework */;
+			productType = "com.apple.product-type.tool";
+		};
+		4089A0120FFACEFC000B29AE /* sample1_unittest-framework */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 4089A0240FFACF01000B29AE /* Build configuration list for PBXNativeTarget "sample1_unittest-framework" */;
+			buildPhases = (
+				4089A0100FFACEFC000B29AE /* Sources */,
+				4089A0110FFACEFC000B29AE /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				40C8499E101A36E50083642A /* PBXTargetDependency */,
+			);
+			name = "sample1_unittest-framework";
+			productName = sample1_unittest;
+			productReference = 4089A0130FFACEFC000B29AE /* sample1_unittest-framework */;
+			productType = "com.apple.product-type.tool";
+		};
+		40C848F9101A209C0083642A /* gtest-static */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 40C84902101A212E0083642A /* Build configuration list for PBXNativeTarget "gtest-static" */;
+			buildPhases = (
+				40C848F7101A209C0083642A /* Sources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "gtest-static";
+			productName = "gtest-static";
+			productReference = 40C848FA101A209C0083642A /* libgtest.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+		40C8490A101A217E0083642A /* gtest_main-static */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 40C84912101A21D20083642A /* Build configuration list for PBXNativeTarget "gtest_main-static" */;
+			buildPhases = (
+				40C84908101A217E0083642A /* Sources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "gtest_main-static";
+			productName = "gtest_main-static";
+			productReference = 40C8490B101A217E0083642A /* libgtest_main.a */;
+			productType = "com.apple.product-type.library.static";
+		};
+		40C8497A101A36850083642A /* gtest_unittest-static */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 40C84984101A36850083642A /* Build configuration list for PBXNativeTarget "gtest_unittest-static" */;
+			buildPhases = (
+				40C8497F101A36850083642A /* Sources */,
+				40C84981101A36850083642A /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				40C8497B101A36850083642A /* PBXTargetDependency */,
+				40C8497D101A36850083642A /* PBXTargetDependency */,
+			);
+			name = "gtest_unittest-static";
+			productName = gtest_unittest;
+			productReference = 40C84987101A36850083642A /* gtest_unittest */;
+			productType = "com.apple.product-type.tool";
+		};
+		40C84989101A36A60083642A /* sample1_unittest-static */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 40C84994101A36A60083642A /* Build configuration list for PBXNativeTarget "sample1_unittest-static" */;
+			buildPhases = (
+				40C8498E101A36A60083642A /* Sources */,
+				40C84991101A36A60083642A /* Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				40C8498A101A36A60083642A /* PBXTargetDependency */,
+				40C8498C101A36A60083642A /* PBXTargetDependency */,
+			);
+			name = "sample1_unittest-static";
+			productName = sample1_unittest;
+			productReference = 40C84997101A36A60083642A /* sample1_unittest-static */;
+			productType = "com.apple.product-type.tool";
+		};
+		8D07F2BC0486CC7A007CD1D0 /* gtest-framework */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 4FADC24208B4156D00ABE55E /* Build configuration list for PBXNativeTarget "gtest-framework" */;
+			buildPhases = (
+				8D07F2C10486CC7A007CD1D0 /* Sources */,
+				8D07F2BD0486CC7A007CD1D0 /* Headers */,
+				404884A50E2F7C0400CF7658 /* Copy Headers Internal */,
+				F67D4F471C7F5DF60017C729 /* Copy Headers Internal Custom */,
+				8D07F2BF0486CC7A007CD1D0 /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+				40C44AE60E379922008FCC51 /* PBXTargetDependency */,
+				408BEC101046CFE900DEF522 /* PBXTargetDependency */,
+				40C8499C101A36DC0083642A /* PBXTargetDependency */,
+			);
+			name = "gtest-framework";
+			productInstallPath = "$(HOME)/Library/Frameworks";
+			productName = gtest;
+			productReference = 4539C8FF0EC27F6400A70F4C /* gtest.framework */;
+			productType = "com.apple.product-type.framework";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		0867D690FE84028FC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 0460;
+			};
+			buildConfigurationList = 4FADC24608B4156D00ABE55E /* Build configuration list for PBXProject "gtest" */;
+			compatibilityVersion = "Xcode 3.2";
+			developmentRegion = English;
+			hasScannedForEncodings = 1;
+			knownRegions = (
+				English,
+				Japanese,
+				French,
+				German,
+				en,
+			);
+			mainGroup = 0867D691FE84028FC02AAC07 /* gtest */;
+			productRefGroup = 034768DDFF38A45A11DB9C8B /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				8D07F2BC0486CC7A007CD1D0 /* gtest-framework */,
+				40C848F9101A209C0083642A /* gtest-static */,
+				40C8490A101A217E0083642A /* gtest_main-static */,
+				40899F420FFA7184000B29AE /* gtest_unittest-framework */,
+				40C8497A101A36850083642A /* gtest_unittest-static */,
+				4089A0120FFACEFC000B29AE /* sample1_unittest-framework */,
+				40C84989101A36A60083642A /* sample1_unittest-static */,
+				3B238F5F0E828B5400846E11 /* Check */,
+				40C44ADC0E3798F4008FCC51 /* Version Info */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		8D07F2BF0486CC7A007CD1D0 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				404884500E2F799B00CF7658 /* README.md in Resources */,
+				404884AC0E2F7CD900CF7658 /* CHANGES in Resources */,
+				404884AD0E2F7CD900CF7658 /* CONTRIBUTORS in Resources */,
+				404884AE0E2F7CD900CF7658 /* LICENSE in Resources */,
+				40C84978101A36540083642A /* libgtest_main.a in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXShellScriptBuildPhase section */
+		3B238F5E0E828B5400846E11 /* ShellScript */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+			);
+			outputPaths = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "# Remember, this \"Run Script\" build phase will be executed from $SRCROOT\n/bin/bash Scripts/runtests.sh";
+		};
+		40C44ADB0E3798F4008FCC51 /* Generate Version.h */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"$(SRCROOT)/Scripts/versiongenerate.py",
+				"$(SRCROOT)/../configure.ac",
+			);
+			name = "Generate Version.h";
+			outputPaths = (
+				"$(PROJECT_TEMP_DIR)/Version.h",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "# Remember, this \"Run Script\" build phase will be executed from $SRCROOT\n/usr/bin/python Scripts/versiongenerate.py ../ $PROJECT_TEMP_DIR";
+		};
+/* End PBXShellScriptBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		40899F400FFA7184000B29AE /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				40899F530FFA72A0000B29AE /* gtest_unittest.cc in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		4089A0100FFACEFC000B29AE /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				4089A0440FFAD1BE000B29AE /* sample1.cc in Sources */,
+				4089A0460FFAD1BE000B29AE /* sample1_unittest.cc in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		40C848F7101A209C0083642A /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				40C848FF101A21150083642A /* gtest-all.cc in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		40C84908101A217E0083642A /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				40C84915101A21DF0083642A /* gtest_main.cc in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		40C8497F101A36850083642A /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				40C84980101A36850083642A /* gtest_unittest.cc in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		40C8498E101A36A60083642A /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				40C8498F101A36A60083642A /* sample1.cc in Sources */,
+				40C84990101A36A60083642A /* sample1_unittest.cc in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+		8D07F2C10486CC7A007CD1D0 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				40899F3A0FFA70D4000B29AE /* gtest-all.cc in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+		40899F9D0FFA740F000B29AE /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 40899F420FFA7184000B29AE /* gtest_unittest-framework */;
+			targetProxy = 40899F9C0FFA740F000B29AE /* PBXContainerItemProxy */;
+		};
+		4089A0980FFAD34A000B29AE /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 4089A0120FFACEFC000B29AE /* sample1_unittest-framework */;
+			targetProxy = 4089A0970FFAD34A000B29AE /* PBXContainerItemProxy */;
+		};
+		408BEC101046CFE900DEF522 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 40C848F9101A209C0083642A /* gtest-static */;
+			targetProxy = 408BEC0F1046CFE900DEF522 /* PBXContainerItemProxy */;
+		};
+		40C44AE60E379922008FCC51 /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 40C44ADC0E3798F4008FCC51 /* Version Info */;
+			targetProxy = 40C44AE50E379922008FCC51 /* PBXContainerItemProxy */;
+		};
+		40C8497B101A36850083642A /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 40C848F9101A209C0083642A /* gtest-static */;
+			targetProxy = 40C8497C101A36850083642A /* PBXContainerItemProxy */;
+		};
+		40C8497D101A36850083642A /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 40C8490A101A217E0083642A /* gtest_main-static */;
+			targetProxy = 40C8497E101A36850083642A /* PBXContainerItemProxy */;
+		};
+		40C8498A101A36A60083642A /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 40C848F9101A209C0083642A /* gtest-static */;
+			targetProxy = 40C8498B101A36A60083642A /* PBXContainerItemProxy */;
+		};
+		40C8498C101A36A60083642A /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 40C8490A101A217E0083642A /* gtest_main-static */;
+			targetProxy = 40C8498D101A36A60083642A /* PBXContainerItemProxy */;
+		};
+		40C8499C101A36DC0083642A /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 40C8490A101A217E0083642A /* gtest_main-static */;
+			targetProxy = 40C8499B101A36DC0083642A /* PBXContainerItemProxy */;
+		};
+		40C8499E101A36E50083642A /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 8D07F2BC0486CC7A007CD1D0 /* gtest-framework */;
+			targetProxy = 40C8499D101A36E50083642A /* PBXContainerItemProxy */;
+		};
+		40C849A0101A36F10083642A /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 8D07F2BC0486CC7A007CD1D0 /* gtest-framework */;
+			targetProxy = 40C8499F101A36F10083642A /* PBXContainerItemProxy */;
+		};
+		40C849F7101A43440083642A /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 40C8497A101A36850083642A /* gtest_unittest-static */;
+			targetProxy = 40C849F6101A43440083642A /* PBXContainerItemProxy */;
+		};
+		40C849F9101A43490083642A /* PBXTargetDependency */ = {
+			isa = PBXTargetDependency;
+			target = 40C84989101A36A60083642A /* sample1_unittest-static */;
+			targetProxy = 40C849F8101A43490083642A /* PBXContainerItemProxy */;
+		};
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+		3B238F600E828B5400846E11 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				COPY_PHASE_STRIP = NO;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				PRODUCT_NAME = Check;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		3B238F610E828B5400846E11 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				COPY_PHASE_STRIP = YES;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				PRODUCT_NAME = Check;
+				SDKROOT = macosx;
+				ZERO_LINK = NO;
+			};
+			name = Release;
+		};
+		40899F450FFA7185000B29AE /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				HEADER_SEARCH_PATHS = ../;
+				PRODUCT_NAME = "gtest_unittest-framework";
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		40899F460FFA7185000B29AE /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				HEADER_SEARCH_PATHS = ../;
+				PRODUCT_NAME = "gtest_unittest-framework";
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		4089A0150FFACEFD000B29AE /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				PRODUCT_NAME = "sample1_unittest-framework";
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		4089A0160FFACEFD000B29AE /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				PRODUCT_NAME = "sample1_unittest-framework";
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		40C44ADF0E3798F4008FCC51 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				MACOSX_DEPLOYMENT_TARGET = 10.7;
+				PRODUCT_NAME = gtest;
+				SDKROOT = macosx;
+				TARGET_NAME = gtest;
+			};
+			name = Debug;
+		};
+		40C44AE00E3798F4008FCC51 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				MACOSX_DEPLOYMENT_TARGET = 10.7;
+				PRODUCT_NAME = gtest;
+				SDKROOT = macosx;
+				TARGET_NAME = gtest;
+			};
+			name = Release;
+		};
+		40C848FB101A209D0083642A /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 40899FB30FFA7567000B29AE /* StaticLibraryTarget.xcconfig */;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				GCC_INLINES_ARE_PRIVATE_EXTERN = YES;
+				GCC_SYMBOLS_PRIVATE_EXTERN = YES;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				HEADER_SEARCH_PATHS = (
+					../,
+					../include/,
+				);
+				PRODUCT_NAME = gtest;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		40C848FC101A209D0083642A /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 40899FB30FFA7567000B29AE /* StaticLibraryTarget.xcconfig */;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				GCC_INLINES_ARE_PRIVATE_EXTERN = YES;
+				GCC_SYMBOLS_PRIVATE_EXTERN = YES;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				HEADER_SEARCH_PATHS = (
+					../,
+					../include/,
+				);
+				PRODUCT_NAME = gtest;
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		40C8490E101A217F0083642A /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 40899FB30FFA7567000B29AE /* StaticLibraryTarget.xcconfig */;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				HEADER_SEARCH_PATHS = (
+					../,
+					../include/,
+				);
+				PRODUCT_NAME = gtest_main;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		40C8490F101A217F0083642A /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 40899FB30FFA7567000B29AE /* StaticLibraryTarget.xcconfig */;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				HEADER_SEARCH_PATHS = (
+					../,
+					../include/,
+				);
+				PRODUCT_NAME = gtest_main;
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		40C84985101A36850083642A /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				HEADER_SEARCH_PATHS = ../;
+				PRODUCT_NAME = gtest_unittest;
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		40C84986101A36850083642A /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				HEADER_SEARCH_PATHS = ../;
+				PRODUCT_NAME = gtest_unittest;
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		40C84995101A36A60083642A /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				PRODUCT_NAME = "sample1_unittest-static";
+				SDKROOT = macosx;
+			};
+			name = Debug;
+		};
+		40C84996101A36A60083642A /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				PRODUCT_NAME = "sample1_unittest-static";
+				SDKROOT = macosx;
+			};
+			name = Release;
+		};
+		4FADC24308B4156D00ABE55E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 40D4CDF20E30E07400294801 /* FrameworkTarget.xcconfig */;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				HEADER_SEARCH_PATHS = (
+					../,
+					../include/,
+				);
+				INFOPLIST_FILE = Resources/Info.plist;
+				INFOPLIST_PREFIX_HEADER = "$(PROJECT_TEMP_DIR)/Version.h";
+				INFOPLIST_PREPROCESS = YES;
+				PRODUCT_NAME = gtest;
+				SDKROOT = macosx;
+				VERSIONING_SYSTEM = "apple-generic";
+			};
+			name = Debug;
+		};
+		4FADC24408B4156D00ABE55E /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 40D4CDF20E30E07400294801 /* FrameworkTarget.xcconfig */;
+			buildSettings = {
+				COMBINE_HIDPI_IMAGES = YES;
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+				HEADER_SEARCH_PATHS = (
+					../,
+					../include/,
+				);
+				INFOPLIST_FILE = Resources/Info.plist;
+				INFOPLIST_PREFIX_HEADER = "$(PROJECT_TEMP_DIR)/Version.h";
+				INFOPLIST_PREPROCESS = YES;
+				PRODUCT_NAME = gtest;
+				SDKROOT = macosx;
+				VERSIONING_SYSTEM = "apple-generic";
+			};
+			name = Release;
+		};
+		4FADC24708B4156D00ABE55E /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 40D4CDF10E30E07400294801 /* DebugProject.xcconfig */;
+			buildSettings = {
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				MACOSX_DEPLOYMENT_TARGET = 10.7;
+			};
+			name = Debug;
+		};
+		4FADC24808B4156D00ABE55E /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 40D4CDF40E30E07400294801 /* ReleaseProject.xcconfig */;
+			buildSettings = {
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
+				CLANG_CXX_LIBRARY = "libc++";
+				MACOSX_DEPLOYMENT_TARGET = 10.7;
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		3B238FA30E828BB600846E11 /* Build configuration list for PBXAggregateTarget "Check" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				3B238F600E828B5400846E11 /* Debug */,
+				3B238F610E828B5400846E11 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		40899F4A0FFA71BC000B29AE /* Build configuration list for PBXNativeTarget "gtest_unittest-framework" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				40899F450FFA7185000B29AE /* Debug */,
+				40899F460FFA7185000B29AE /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		4089A0240FFACF01000B29AE /* Build configuration list for PBXNativeTarget "sample1_unittest-framework" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				4089A0150FFACEFD000B29AE /* Debug */,
+				4089A0160FFACEFD000B29AE /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		40C44AE40E379905008FCC51 /* Build configuration list for PBXAggregateTarget "Version Info" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				40C44ADF0E3798F4008FCC51 /* Debug */,
+				40C44AE00E3798F4008FCC51 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		40C84902101A212E0083642A /* Build configuration list for PBXNativeTarget "gtest-static" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				40C848FB101A209D0083642A /* Debug */,
+				40C848FC101A209D0083642A /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		40C84912101A21D20083642A /* Build configuration list for PBXNativeTarget "gtest_main-static" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				40C8490E101A217F0083642A /* Debug */,
+				40C8490F101A217F0083642A /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		40C84984101A36850083642A /* Build configuration list for PBXNativeTarget "gtest_unittest-static" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				40C84985101A36850083642A /* Debug */,
+				40C84986101A36850083642A /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		40C84994101A36A60083642A /* Build configuration list for PBXNativeTarget "sample1_unittest-static" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				40C84995101A36A60083642A /* Debug */,
+				40C84996101A36A60083642A /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		4FADC24208B4156D00ABE55E /* Build configuration list for PBXNativeTarget "gtest-framework" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				4FADC24308B4156D00ABE55E /* Debug */,
+				4FADC24408B4156D00ABE55E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		4FADC24608B4156D00ABE55E /* Build configuration list for PBXProject "gtest" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				4FADC24708B4156D00ABE55E /* Debug */,
+				4FADC24808B4156D00ABE55E /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 0867D690FE84028FC02AAC07 /* Project object */;
+}
diff --git a/inference-engine/tests/mock_engine/CMakeLists.txt b/inference-engine/tests/mock_engine/CMakeLists.txt
index dc1edfb5e..a0f77cf3d 100644
--- a/inference-engine/tests/mock_engine/CMakeLists.txt
+++ b/inference-engine/tests/mock_engine/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -17,24 +17,23 @@ file (GLOB LIBRARY_HEADERS
 if(UNIX)
     list(REMOVE_ITEM LIBRARY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/dllmain.cpp)
 endif()
-add_definitions(-DIMPLEMENT_INFERENCE_ENGINE_API)
 
 # Create named folders for the sources within the .vcproj
 # Empty name lists them directly under the .vcproj
 source_group("src" FILES ${LIBRARY_SRC})
 source_group("include" FILES ${LIBRARY_HEADERS})
 
-# Properties->C/C++->General->Additional Include Directories
-include_directories (${IE_MAIN_SOURCE_DIR}/include
-        ${IE_MAIN_SOURCE_DIR}/src/inference_engine
-        ${IE_MAIN_SOURCE_DIR}/include
-        ${IE_MAIN_SOURCE_DIR}/thirdparty/pugixml/src)
-
 # Create library file from sources.
 add_library(${TARGET_NAME} SHARED
             ${LIBRARY_SRC}
             ${LIBRARY_HEADERS})
 
+target_include_directories (${TARGET_NAME} PRIVATE
+        "${IE_MAIN_SOURCE_DIR}/src/inference_engine")
+
+target_link_libraries(${TARGET_NAME} PRIVATE inference_engine)
+
+target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_INFERENCE_ENGINE_API)
 
 set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD 11)
 set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD_REQUIRED ON)
diff --git a/inference-engine/tests/mock_engine/dllmain.cpp b/inference-engine/tests/mock_engine/dllmain.cpp
index a9dd58a52..88a881567 100644
--- a/inference-engine/tests/mock_engine/dllmain.cpp
+++ b/inference-engine/tests/mock_engine/dllmain.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 // dllmain.cpp : Defines the entry point for the DLL application.
diff --git a/inference-engine/tests/mock_engine/mock_plugin.cpp b/inference-engine/tests/mock_engine/mock_plugin.cpp
index 0d344c8ba..587d224d4 100644
--- a/inference-engine/tests/mock_engine/mock_plugin.cpp
+++ b/inference-engine/tests/mock_engine/mock_plugin.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/mock_engine/mock_plugin.hpp b/inference-engine/tests/mock_engine/mock_plugin.hpp
index 970638134..3a2c95233 100644
--- a/inference-engine/tests/mock_engine/mock_plugin.hpp
+++ b/inference-engine/tests/mock_engine/mock_plugin.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/mock_engine/stub_inference_engine.xpp b/inference-engine/tests/mock_engine/stub_inference_engine.xpp
index fa2d9de1f..008e9a046 100644
--- a/inference-engine/tests/mock_engine/stub_inference_engine.xpp
+++ b/inference-engine/tests/mock_engine/stub_inference_engine.xpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include <random>
diff --git a/inference-engine/tests/unit/CMakeLists.txt b/inference-engine/tests/unit/CMakeLists.txt
index 4761e8336..9c0f53962 100644
--- a/inference-engine/tests/unit/CMakeLists.txt
+++ b/inference-engine/tests/unit/CMakeLists.txt
@@ -1,31 +1,15 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
-cmake_minimum_required(VERSION 2.8)
 cmake_policy(SET CMP0054 NEW)
 
 set(TARGET_NAME InferenceEngineUnitTests)
 
 #rpath enabled for unit tests only
-SET (CMAKE_SKIP_RPATH FALSE)
-
-if (UNIX AND NOT APPLE)
-    set(ARCH_SPECIFIC_FOLDER_TBB /gcc4.4)
-    set(ARCH_SPECIFIC_FOLDER intel64_lin)
-    if ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
-        set(ARCH_SPECIFIC_FOLDER intel64_lin)
-    else ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
-        set(ARCH_SPECIFIC_FOLDER intel32_lin)
-    endif ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
-else ()
-    set(ARCH_SPECIFIC_FOLDER_TBB /vc_mt)
-    if ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
-        set(ARCH_SPECIFIC_FOLDER intel64_win)
-    else ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
-        set(ARCH_SPECIFIC_FOLDER ia32_win)
-    endif ("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
-endif ()
+SET (CMAKE_SKIP_RPATH OFF)
+
+# collect sources
 
 file(GLOB
         TEST_SRC
@@ -34,6 +18,8 @@ file(GLOB
         inference_engine_tests/cpp_interfaces/*.cpp
         mem_solver/*.cpp
         cnn_network/*.cpp
+        builders/*.cpp
+        transformations/*.cpp
         shape_infer/*.cpp
         shape_infer/built-in/*.cpp
         topology_verification_tests/*.cpp
@@ -57,7 +43,7 @@ endif()
 if (ENABLE_MKL_DNN)
     if (GEMM STREQUAL "MKL")
         add_definitions(-DUSE_MKL)
-     endif ()
+    endif ()
     file(GLOB
             MKLDNN_TESTS
             engines/mkldnn/*.cpp
@@ -68,80 +54,49 @@ if (ENABLE_MKL_DNN)
     file(GLOB
             MKLDNN_TESTS_INCLUDE engines/mkldnn/graph/*.hpp)
 
-    if (USE_BOOST_RE)
-        debug_message(STATUS "Adding boost dependency")
-        if (VERBOSE_BUILD)
-            set(Boost_DEBUG on)
-        endif ()
-        find_package(Boost REQUIRED COMPONENTS regex)
-        link_directories(${Boost_LIBRARY_DIRS})
-        include_directories(${Boost_INCLUDE_DIRS})
-    endif ()
-
     include_directories(
             ${IE_MAIN_SOURCE_DIR}/thirdparty/mkl-dnn/include
-            engines/mkldnn/graph)
+            engines/mkldnn/graph
+            ${CMAKE_BINARY_DIR}/include/)
 
     source_group("mkldnn" FILES ${MKLDNN_TESTS} ${MKLDNN_TESTS_INCLUDE})
 endif ()
 
 file(GLOB
         TEST_INCLUDE
-        ${IE_MAIN_SOURCE_DIR}/tests/helpers/*.hpp
         shape_infer/*.hpp)
 
 source_group("src" FILES ${TEST_SRC})
 source_group("include" FILES ${TEST_INCLUDE})
 
-include_directories(
-        ${IE_MAIN_SOURCE_DIR}/include
-        ${IE_MAIN_SOURCE_DIR}/src/inference_engine
+# create target
+
+add_executable(${TARGET_NAME} ${TEST_SRC} ${TEST_INCLUDE} ${MKLDNN_TESTS} ${MKLDNN_TESTS_INCLUDE} ${DLAI_TESTS} transformations/sub_test.cpp transformations/tranformations_test.hpp)
+set_ie_threading_interface_for(${TARGET_NAME})
+
+target_include_directories(${TARGET_NAME} PRIVATE
         ${IE_MAIN_SOURCE_DIR}/src/mkldnn_plugin
         ${IE_MAIN_SOURCE_DIR}/src/gna_plugin
+        ${IE_MAIN_SOURCE_DIR}/src/inference_engine
         ${IE_MAIN_SOURCE_DIR}/src/extension
         ${IE_MAIN_SOURCE_DIR}/src/extension/common
-        ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/gflags/include
-        mocks)
-add_executable(${TARGET_NAME} ${TEST_SRC} ${TEST_INCLUDE} ${MKLDNN_TESTS} ${MKLDNN_TESTS_INCLUDE} ${DLAI_TESTS})
-set_ie_threading_interface_for(${TARGET_NAME})
+        "${CMAKE_CURRENT_SOURCE_DIR}/mocks")
 
-set_target_properties(${TARGET_NAME} PROPERTIES "CMAKE_CXX_FLAGS" "${CMAKE_CXX_FLAGS} -fPIE"
-COMPILE_PDB_NAME ${TARGET_NAME})
+set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_PDB_NAME ${TARGET_NAME})
 
 ## Mock macros doesn't use "override" specificator
 target_compile_options(${TARGET_NAME} PRIVATE $<$<CXX_COMPILER_ID:Clang>: -Wno-inconsistent-missing-override >)
-
-if (FALSE)
-    add_custom_command(
-            TARGET ${TARGET_NAME}
-            POST_BUILD COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/data/*.xml ${LIBRARY_OUTPUT_DIRECTORY}
-            POST_BUILD COMMAND cp -R ${IE_MAIN_SOURCE_DIR}/temp/models ${LIBRARY_OUTPUT_DIRECTORY}/models
-    )
-endif ()
-
-if (MSVC)
-    set(PUGI pugixml_mt)
-else ()
-    set(PUGI pugixml)
-endif ()
-
-add_definitions(-DMODELS_PATH=\"${MODELS_PATH}\" -DDATA_PATH=\"${IE_MAIN_SOURCE_DIR}/tests/data\")
+target_compile_options(${TARGET_NAME} PRIVATE $<$<CXX_COMPILER_ID:AppleClang>: -Wno-inconsistent-missing-override >)
 
 target_link_libraries(${TARGET_NAME} PRIVATE
-        gtest
-        gmock
-        gtest_main
-        inference_engine_s
-        ie_cpu_extension
-        helpers
-        ${PUGI}
-        ${LIB_DL}
-        ${MKLDNN_STATIC_ENGINE}
-        ${INTEL_ITT_LIBS}
-        ${Boost_REGEX_LIBRARY}
-        ${TBB_LIBRARY}
-        ${TBBMALLOC_LIBRARY}
-        ${GNA_TEST_ENGINE})
+    gtest
+    gtest_main
+    gmock
+    gflags
+    inference_engine_s
+    helpers
+    ${CMAKE_DL_LIBS}
+    ${GNA_TEST_ENGINE})
 
 add_dependencies(${TARGET_NAME} ie_cpu_extension)
 
diff --git a/inference-engine/tests/unit/builders/argmax_layer_test.cpp b/inference-engine/tests/unit/builders/argmax_layer_test.cpp
new file mode 100644
index 000000000..40e1595f9
--- /dev/null
+++ b/inference-engine/tests/unit/builders/argmax_layer_test.cpp
@@ -0,0 +1,47 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_argmax_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class ArgMaxLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(ArgMaxLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network network("network");
+    Builder::ArgMaxLayer argMaxLayer("ArgMax layer");
+    argMaxLayer.setAxis(1);
+    argMaxLayer.setOutMaxVal(0);
+    argMaxLayer.setTopK(20);
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(argMaxLayer));
+    Builder::ArgMaxLayer layerFromNetwork(network.getLayer(ind));
+    ASSERT_EQ(argMaxLayer.getAxis(), layerFromNetwork.getAxis());
+    ASSERT_EQ(argMaxLayer.getOutMaxVal(), layerFromNetwork.getOutMaxVal());
+    ASSERT_EQ(argMaxLayer.getTopK(), layerFromNetwork.getTopK());
+}
+
+TEST_F(ArgMaxLayerBuilderTest, cannotAddLayerWithWrongAxis) {
+    Builder::Network network("network");
+    Builder::ArgMaxLayer argMaxLayer("ArgMax layer");
+    argMaxLayer.setAxis(500);  // here
+    argMaxLayer.setOutMaxVal(0);
+    argMaxLayer.setTopK(20);
+    ASSERT_THROW(network.addLayer(argMaxLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ArgMaxLayerBuilderTest, cannotAddLayerWithWrongOutMaxVal) {
+    Builder::Network network("network");
+    Builder::ArgMaxLayer argMaxLayer("ArgMax layer");
+    argMaxLayer.setAxis(1);
+    argMaxLayer.setOutMaxVal(500);  // here
+    argMaxLayer.setTopK(20);
+    ASSERT_THROW(network.addLayer(argMaxLayer), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/batch_normalization_layer_test.cpp b/inference-engine/tests/unit/builders/batch_normalization_layer_test.cpp
index 5d55c17b0..1ae7f4676 100644
--- a/inference-engine/tests/unit/builders/batch_normalization_layer_test.cpp
+++ b/inference-engine/tests/unit/builders/batch_normalization_layer_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -14,23 +14,23 @@ using namespace InferenceEngine;
 
 class BatchNormalizationLayerBuilderTest : public BuilderTestCommon {};
 
-TEST_F(BatchNormalizationLayerBuilderTest, cannotCreateBatchNormalizationWithoutWeightOrBiases) {
-    ASSERT_THROW(((Builder::Layer)Builder::BatchNormalizationLayer("in1")), InferenceEngine::details::InferenceEngineException);
-    ASSERT_THROW(((Builder::Layer)Builder::BatchNormalizationLayer("in1")
-            .setWeights(generateBlob(Precision::FP32, {3}, Layout::C))), InferenceEngine::details::InferenceEngineException);
-    ASSERT_THROW(((Builder::Layer)Builder::BatchNormalizationLayer("in1")
-            .setBiases(generateBlob(Precision::FP32, {3}, Layout::C))), InferenceEngine::details::InferenceEngineException);
-}
+//TEST_F(BatchNormalizationLayerBuilderTest, cannotCreateBatchNormalizationWithoutWeightOrBiases) {
+//    ASSERT_THROW(((Builder::Layer)Builder::BatchNormalizationLayer("in1")), InferenceEngine::details::InferenceEngineException);
+//    ASSERT_THROW(((Builder::Layer)Builder::BatchNormalizationLayer("in1")
+//            .setWeights(generateBlob(Precision::FP32, {3}, Layout::C))), InferenceEngine::details::InferenceEngineException);
+//    ASSERT_THROW(((Builder::Layer)Builder::BatchNormalizationLayer("in1")
+//            .setBiases(generateBlob(Precision::FP32, {3}, Layout::C))), InferenceEngine::details::InferenceEngineException);
+//}
 
 TEST_F(BatchNormalizationLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
     Builder::Network network("Test");
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {3}, Layout::C)));
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {3}, Layout::C)));
     Builder::BatchNormalizationLayer bnBuilder("bn");
-    bnBuilder.setWeights(generateBlob(Precision::FP32, {3}, Layout::C));
-    bnBuilder.setBiases(generateBlob(Precision::FP32, {3}, Layout::C));
-    size_t bnId = network.addLayer(bnBuilder);
+    idx_t bnId = network.addLayer({{0}, {weightsId}, {biasesId}}, bnBuilder);
     Builder::BatchNormalizationLayer bnBuilderFromNetwork(network.getLayer(bnId));
     ASSERT_EQ(bnBuilderFromNetwork.getEpsilon(), bnBuilder.getEpsilon());
     bnBuilderFromNetwork.setEpsilon(2);
     ASSERT_NE(bnBuilderFromNetwork.getEpsilon(), bnBuilder.getEpsilon());
-    ASSERT_EQ(bnBuilderFromNetwork.getEpsilon(), network.getLayer(bnId).getParameters()["epsilon"].asFloat());
+    ASSERT_EQ(bnBuilderFromNetwork.getEpsilon(), network.getLayer(bnId)->getParameters()["epsilon"].as<float>());
 }
 \ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/builder_test.hpp b/inference-engine/tests/unit/builders/builder_test.hpp
index 28ef342c8..728a346f1 100644
--- a/inference-engine/tests/unit/builders/builder_test.hpp
+++ b/inference-engine/tests/unit/builders/builder_test.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/builders/clamp_layer_test.cpp b/inference-engine/tests/unit/builders/clamp_layer_test.cpp
new file mode 100644
index 000000000..d912b2630
--- /dev/null
+++ b/inference-engine/tests/unit/builders/clamp_layer_test.cpp
@@ -0,0 +1,49 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_clamp_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class ClampLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(ClampLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network net("network");
+    Builder::ClampLayer clampLayer("clampLayer");
+    clampLayer.setMinValue(0.1).setMaxValue(0.2);
+    size_t ind = net.addLayer(clampLayer);
+    Builder::ClampLayer layerFromNet(net.getLayer(ind));
+    ASSERT_EQ(layerFromNet.getMinValue(), clampLayer.getMinValue());
+    ASSERT_EQ(layerFromNet.getMaxValue(), clampLayer.getMaxValue());
+}
+
+TEST_F(ClampLayerBuilderTest, cannotCreateLayerWithWrongMinValue) {
+    Builder::Network net("network");
+    Builder::ClampLayer clampLayer("clampLayer");
+    clampLayer.setMinValue(0).setMaxValue(0.2);
+    ASSERT_NO_THROW(net.addLayer(clampLayer));
+}
+
+TEST_F(ClampLayerBuilderTest, cannotCreateLayerWithWrongMaxValue) {
+    Builder::Network net("network");
+    Builder::ClampLayer clampLayer("clampLayer");
+    clampLayer.setMinValue(10).setMaxValue(-0.2);
+    ASSERT_THROW(net.addLayer(clampLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ClampLayerBuilderTest, cannotCreateLayerWithWrongShapes) {
+    Builder::Network net("network");
+    Builder::Layer::Ptr fakeClampLayerPtr = std::make_shared<Builder::Layer>("Clamp", "Clamp layer");
+    fakeClampLayerPtr->getInputPorts().push_back(Port({1, 1, 1, 1}));
+    fakeClampLayerPtr->getOutputPorts().push_back(Port({1, 1, 1, 2}));
+    Builder::ClampLayer clampLayer(fakeClampLayerPtr);
+    clampLayer.setMinValue(0.0f).setMaxValue(1.0f);
+    ASSERT_THROW(net.addLayer(clampLayer), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/concat_layer_test.cpp b/inference-engine/tests/unit/builders/concat_layer_test.cpp
new file mode 100644
index 000000000..3c2ba9069
--- /dev/null
+++ b/inference-engine/tests/unit/builders/concat_layer_test.cpp
@@ -0,0 +1,151 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_concat_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class ConcatLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(ConcatLayerBuilderTest, getExistsLayerFromNetworkBuilderAxis) {
+    Builder::Network network("network");
+    Builder::ConcatLayer layer("concat layer");
+
+    layer.setAxis(0);
+    layer.setInputPorts({Port({1, 2, 55, 55}), Port({3, 2, 55, 55})});
+    layer.setOutputPort(Port({1 + 3, 2, 55, 55}));
+
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    network.getLayer(ind)->validate(false);
+    ASSERT_NO_THROW(network.getLayer(ind)->validate(false));
+    Builder::ConcatLayer layerFromNet(network.getLayer(ind));
+
+    ASSERT_EQ(layer.getAxis(), layerFromNet.getAxis());
+    ASSERT_EQ(layer.getInputPorts(), layerFromNet.getInputPorts());
+    ASSERT_EQ(layer.getOutputPort(), layerFromNet.getOutputPort());
+}
+
+TEST_F(ConcatLayerBuilderTest, cannotCreateLayerWithNoInputPorts) {
+    Builder::Network network("network");
+    Builder::ConcatLayer layer("concat layer");
+
+    layer.setAxis(1);
+    layer.setOutputPort(Port({1, 2 + 4, 55, 55}));
+    // here should be layer.setInputPort(...)
+
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConcatLayerBuilderTest, cannotCreateLayerWithOneInputPort) {
+    Builder::Network network("network");
+    Builder::ConcatLayer layer("concat layer");
+
+    layer.setAxis(1);
+    layer.setInputPorts({Port({1, 2, 55, 55})});  // here
+    layer.setOutputPort(Port({1, 2 + 4, 55, 55}));
+
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConcatLayerBuilderTest, cannotCreateLayerWithWrongAxis) {
+    Builder::Network network("network");
+    Builder::ConcatLayer layer("concat layer");
+
+    layer.setAxis(50);  // here
+    layer.setInputPorts({Port({1, 2, 55, 55}), Port({3, 2, 55, 55})});
+    layer.setOutputPort(Port({1 + 3, 2, 55, 55}));
+
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConcatLayerBuilderTest, cannotCreateLayerWithUnalignedPorts1) {
+    Builder::Network network("network");
+    Builder::ConcatLayer layer("concat layer");
+
+    layer.setAxis(0);
+    layer.setInputPorts({Port({1, 2, 55, 55}), Port({3, 2, 55, 55})});
+    layer.setOutputPort(Port({1 + 3, 2, 55, 155}));  // should be {1 + 3, 2, 55, 55}
+
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConcatLayerBuilderTest, cannotCreateLayerWithUnalignedPorts2) {
+    Builder::Network network("network");
+    Builder::ConcatLayer layer("concat layer");
+
+    layer.setAxis(0);
+    layer.setInputPorts({Port({1, 2, 55, 55}), Port({3, 2, 55, 55})});
+    layer.setOutputPort(Port({1 + 3, 2, 155, 55}));  // should be {1 + 3, 2, 55, 55}
+
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConcatLayerBuilderTest, cannotCreateLayerWithUnalignedPorts3) {
+    Builder::Network network("network");
+    Builder::ConcatLayer layer("concat layer");
+
+    layer.setAxis(0);
+    layer.setInputPorts({Port({1, 2, 55, 55}), Port({3, 2, 55, 55})});
+    layer.setOutputPort(Port({100, 2, 55, 55}));  // should be {1 + 3, 2, 55, 55}
+
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConcatLayerBuilderTest, cannotCreateLayerWithUnalignedPorts4) {
+    Builder::Network network("network");
+    Builder::ConcatLayer layer("concat layer");
+
+    layer.setAxis(1);
+    layer.setInputPorts({Port({1, 2, 55, 55}), Port({3, 2, 55, 55})});
+    layer.setOutputPort(Port({1, 100, 55, 55}));  // should be {1, 2 + 4, 55, 55}
+
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConcatLayerBuilderTest, cannotCreateLayerWithDifferentInputPorts1) {
+    Builder::Network network("network");
+    Builder::ConcatLayer layer("concat layer");
+
+    layer.setAxis(0);
+    layer.setInputPorts({Port({1, 2, 55, 55}), Port({3, 2, 55, 155})});  // here
+    layer.setOutputPort(Port({1 + 3, 4, 55, 55}));
+
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConcatLayerBuilderTest, cannotCreateLayerWithDifferentInputPorts2) {
+    Builder::Network network("network");
+    Builder::ConcatLayer layer("concat layer");
+
+    layer.setAxis(0);
+    layer.setInputPorts({Port({1, 2, 55, 55}), Port({3, 2, 155, 55})});  // here
+    layer.setOutputPort(Port({1 + 3, 4, 55, 55}));
+
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/const_layer_test.cpp b/inference-engine/tests/unit/builders/const_layer_test.cpp
new file mode 100644
index 000000000..1905096cf
--- /dev/null
+++ b/inference-engine/tests/unit/builders/const_layer_test.cpp
@@ -0,0 +1,30 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_const_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class ConstLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(ConstLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network net("network");
+    Builder::ConstLayer layer("const layer");
+    layer.setData(generateBlob(Precision::FP32, {3}, Layout::C));
+    const size_t ind = net.addLayer(layer);
+    ASSERT_NO_THROW(net.getLayer(ind)->validate(false));
+}
+
+TEST_F(ConstLayerBuilderTest, cannotCreateLayerWithoutData) {
+    Builder::Network net("network");
+    Builder::ConstLayer layer("const layer");
+    ASSERT_THROW(net.addLayer(layer),
+            InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/convolution_layer_test.cpp b/inference-engine/tests/unit/builders/convolution_layer_test.cpp
new file mode 100644
index 000000000..0b1ca8e92
--- /dev/null
+++ b/inference-engine/tests/unit/builders/convolution_layer_test.cpp
@@ -0,0 +1,307 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_convolution_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class ConvolutionLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(ConvolutionLayerBuilderTest, cannotCreateConvolutionWithoutWeight) {
+    Builder::Network network("Test");
+
+    Builder::ConvolutionLayer convBuilder("Convolution");
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));
+    convBuilder.setDilation({1, 1});
+    size_t ind = network.addLayer(convBuilder);
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConvolutionLayerBuilderTest, getExistsLayerFromNetworkBuilderWithInputPort) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));
+    convBuilder.setDilation({1, 1});
+
+    idx_t convId = network.addLayer(convBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    Builder::ConvolutionLayer convBuilderFromNetwork(network.getLayer(convId));
+
+    ASSERT_EQ(convBuilderFromNetwork.getStrides(), convBuilder.getStrides());
+    ASSERT_EQ(convBuilderFromNetwork.getKernel(), convBuilder.getKernel());
+    ASSERT_EQ(convBuilderFromNetwork.getPaddingsEnd(), convBuilder.getPaddingsEnd());
+    ASSERT_EQ(convBuilderFromNetwork.getPaddingsBegin(), convBuilder.getPaddingsBegin());
+    ASSERT_EQ(convBuilderFromNetwork.getOutDepth(), convBuilder.getOutDepth());
+    ASSERT_EQ(convBuilderFromNetwork.getDilation(), convBuilder.getDilation());
+}
+
+TEST_F(ConvolutionLayerBuilderTest, getExistsLayerFromNetworkBuilderWithoutInputPort) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setDilation({1, 1});
+
+    idx_t convId = network.addLayer(convBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    Builder::ConvolutionLayer convBuilderFromNetwork(network.getLayer(convId));
+
+    ASSERT_EQ(convBuilderFromNetwork.getStrides(), convBuilder.getStrides());
+    ASSERT_EQ(convBuilderFromNetwork.getKernel(), convBuilder.getKernel());
+    ASSERT_EQ(convBuilderFromNetwork.getPaddingsEnd(), convBuilder.getPaddingsEnd());
+    ASSERT_EQ(convBuilderFromNetwork.getPaddingsBegin(), convBuilder.getPaddingsBegin());
+    ASSERT_EQ(convBuilderFromNetwork.getOutDepth(), convBuilder.getOutDepth());
+    ASSERT_EQ(convBuilderFromNetwork.getDilation(), convBuilder.getDilation());
+}
+
+TEST_F(ConvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongNumberOfInputChannels) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 64, 225, 225}));  // here
+
+    idx_t convId = network.addLayer(convBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    ASSERT_THROW(network.getLayer(convId)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConvolutionLayerBuilderTest, canCreateCorrcetConvolution) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));  // here
+
+    idx_t convId = network.addLayer(convBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    ASSERT_NO_THROW(network.getLayer(convId)->validate(false));
+}
+
+TEST_F(ConvolutionLayerBuilderTest, cannotCreateConvolutionWithGroup) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setGroup(2);
+    convBuilder.setInputPort(Port({1, 6, 225, 225}));
+
+    idx_t convId = network.addLayer(convBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 6, 11, 11}, Layout::OIHW)));
+    // should be {96, 6 / 2, 11, 11}
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    ASSERT_THROW(network.getLayer(convId)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConvolutionLayerBuilderTest, canCreateConvolution) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setGroup(2);
+    convBuilder.setInputPort(Port({1, 6, 225, 225}));  // here
+
+    idx_t convId = network.addLayer(convBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    ASSERT_NO_THROW(network.getLayer(convId)->validate(false));
+}
+
+TEST_F(ConvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongOutDepth) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(4);  // here
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));
+
+    idx_t convId = network.addLayer(convBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    ASSERT_THROW(network.getLayer(convId)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongStrides) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 0});  // here
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));
+    convBuilder.setPaddingsEnd({0, 0});
+    convBuilder.setPaddingsBegin({0, 0});
+    convBuilder.setDilation({0, 0});
+    ASSERT_THROW(network.addLayer(convBuilder), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongKernel1) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 0});  // here
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));
+
+    ASSERT_THROW(network.addLayer(convBuilder), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongKernel2) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11, 11});  // here
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));
+
+    ASSERT_THROW(network.addLayer(convBuilder), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongDilation1) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));
+    convBuilder.setDilation({1, 0});  // here
+
+    ASSERT_THROW(network.addLayer(convBuilder), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongDilation2) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convBuilder("Convolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));
+    convBuilder.setDilation({1, 1, 1});  // here
+
+    ASSERT_THROW(network.addLayer(convBuilder), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ConvolutionLayerBuilderTest, canCreateLayerWithNumberOfGroupDividingNumberOfInputChannels) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convLayer("Convolution");
+
+    size_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 2, 11, 11}, Layout::OIHW)));
+    size_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+
+    convLayer.setStrides({4, 4});
+    convLayer.setKernel({11, 11});
+    convLayer.setOutDepth(96);
+    convLayer.setInputPort(Port({1, 6, 225, 225}));
+    convLayer.setDilation({1, 1});
+
+    convLayer.setGroup(3);
+    size_t convId = network.addLayer(convLayer);
+    network.connect({weightsId}, {convId, 1});
+    network.connect({biasesId}, {convId, 2});
+    ASSERT_NO_THROW(network.getLayer(convId)->validate(false));
+}
+
+TEST_F(ConvolutionLayerBuilderTest, canCreateLayerWithWeightsNotAvailableForGroup) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convLayer("Convolution");
+
+    size_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 5, 11, 11}, Layout::OIHW)));
+    size_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+
+    convLayer.setStrides({4, 4});
+    convLayer.setKernel({11, 11});
+    convLayer.setOutDepth(96);
+    convLayer.setInputPort(Port({1, 6, 225, 225}));
+    convLayer.setDilation({1, 1});
+
+    convLayer.setGroup(3);
+    ASSERT_THROW(network.addLayer({{weightsId}, {biasesId}}, convLayer),
+                 InferenceEngine::details::InferenceEngineException);  // 6 / 3 != 5
+}
+
+TEST_F(ConvolutionLayerBuilderTest, cannotCreateLayerWithNumberOfGroupNotDividingNumberOfInputChannels) {
+    Builder::Network network("Test");
+    Builder::ConvolutionLayer convLayer("Convolution");
+
+    size_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 2, 11, 11}, Layout::OIHW)));
+    size_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+
+    convLayer.setStrides({4, 4});
+    convLayer.setKernel({11, 11});
+    convLayer.setOutDepth(96);
+    convLayer.setInputPort(Port({1, 6, 225, 225}));
+    convLayer.setDilation({1, 1});
+
+    convLayer.setGroup(4);
+    ASSERT_THROW(network.addLayer({{weightsId}, {biasesId}}, convLayer),
+                 InferenceEngine::details::InferenceEngineException);  // 6 % 4 == 2
+}
+
diff --git a/inference-engine/tests/unit/builders/crop_layer_test.cpp b/inference-engine/tests/unit/builders/crop_layer_test.cpp
new file mode 100644
index 000000000..c098bd643
--- /dev/null
+++ b/inference-engine/tests/unit/builders/crop_layer_test.cpp
@@ -0,0 +1,84 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_crop_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class CropLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(CropLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network network("network");
+    Builder::CropLayer cropLayer("Crop layer");
+    std::vector<Port> input_ports;
+    input_ports.push_back(Port({1, 21, 44, 44}));
+    input_ports.push_back(Port({1, 21, 44, 44}));
+    cropLayer.setInputPorts(input_ports);
+    cropLayer.setOutputPort(Port({1, 21, 44, 44}));
+    cropLayer.setAxis({2, 3});
+    cropLayer.setOffset({0, 0});
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(cropLayer));
+    Builder::CropLayer layerFromNet(network.getLayer(ind));
+    ASSERT_EQ(layerFromNet.getAxis(), cropLayer.getAxis());
+    ASSERT_EQ(layerFromNet.getOffset(), cropLayer.getOffset());
+}
+
+TEST_F(CropLayerBuilderTest, cannotCreateLayerWithOneInputShape) {
+    Builder::Network network("network");
+    Builder::CropLayer cropLayer("Crop layer");
+    std::vector<Port> input_ports;
+    input_ports.push_back(Port({1, 21, 44, 44}));  // here
+    cropLayer.setInputPorts(input_ports);
+    cropLayer.setOutputPort(Port({1, 21, 44, 44}));
+    cropLayer.setAxis({2, 3});
+    cropLayer.setOffset({0, 0});
+    ASSERT_THROW(network.addLayer(cropLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CropLayerBuilderTest, cannotCreateLayerWithThreeInputShapes) {
+    Builder::Network network("network");
+    Builder::CropLayer cropLayer("Crop layer");
+    std::vector<Port> input_ports;
+    input_ports.push_back(Port({1, 21, 44, 44}));
+    input_ports.push_back(Port({1, 21, 44, 44}));
+    input_ports.push_back(Port({1, 21, 44, 44}));  // here
+    cropLayer.setInputPorts(input_ports);
+    cropLayer.setOutputPort(Port({1, 21, 44, 44}));
+    cropLayer.setAxis({2, 3});
+    cropLayer.setOffset({0, 0});
+    ASSERT_THROW(network.addLayer(cropLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CropLayerBuilderTest, cannotCreateLayerWithDifferentSizeOfAxisAndOffset) {
+    Builder::Network network("network");
+    Builder::CropLayer cropLayer("Crop layer");
+    std::vector<Port> input_ports;
+    input_ports.push_back(Port({1, 21, 44, 44}));
+    input_ports.push_back(Port({1, 21, 44, 44}));
+    cropLayer.setInputPorts(input_ports);
+    cropLayer.setOutputPort(Port({1, 21, 44, 44}));
+    cropLayer.setAxis({2, 3});
+    cropLayer.setOffset({0, 0, 0});  // here
+    ASSERT_THROW(network.addLayer(cropLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CropLayerBuilderTest, cannotCreateLayerWithSoBigOffset) {
+    Builder::Network network("network");
+    Builder::CropLayer cropLayer("Crop layer");
+    std::vector<Port> input_ports;
+    input_ports.push_back(Port({1, 21, 44, 44}));
+    input_ports.push_back(Port({1, 21, 34, 34}));
+    cropLayer.setInputPorts(input_ports);
+    cropLayer.setOutputPort(Port({1, 21, 34, 34}));
+    cropLayer.setAxis({2, 3});
+    cropLayer.setOffset({0, 50});  // here
+    ASSERT_THROW(network.addLayer(cropLayer), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/ctc_greedy_decoder_layer_test.cpp b/inference-engine/tests/unit/builders/ctc_greedy_decoder_layer_test.cpp
new file mode 100644
index 000000000..a8e7bf51e
--- /dev/null
+++ b/inference-engine/tests/unit/builders/ctc_greedy_decoder_layer_test.cpp
@@ -0,0 +1,42 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_ctc_greedy_decoder_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class CTCGreedyDecoderLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(CTCGreedyDecoderLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network network("network");
+    Builder::CTCGreedyDecoderLayer ctcGreedyDecoderLayer("CTCGreedyDecoder");
+    ctcGreedyDecoderLayer.setInputPorts({Port({88, 1, 71}), Port({88, 1})});
+    ctcGreedyDecoderLayer.setOutputPort(Port({1, 88, 1, 1}));
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(ctcGreedyDecoderLayer));
+    Builder::CTCGreedyDecoderLayer layerFromNet(network.getLayer(ind));
+    ASSERT_EQ(ctcGreedyDecoderLayer.getInputPorts(), layerFromNet.getInputPorts());
+    ASSERT_EQ(ctcGreedyDecoderLayer.getOutputPort(), layerFromNet.getOutputPort());
+}
+
+TEST_F(CTCGreedyDecoderLayerBuilderTest, cannotCreateLayerWithoutInputPorts) {
+    Builder::Network network("network");
+    Builder::CTCGreedyDecoderLayer ctcGreedyDecoderLayer("CTCGreedyDecoder");
+    ctcGreedyDecoderLayer.setOutputPort(Port({1, 88, 1, 1}));
+    ASSERT_THROW(network.addLayer(ctcGreedyDecoderLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(CTCGreedyDecoderLayerBuilderTest, cannotCreateLayerWithThreeInputPorts) {
+    Builder::Network network("network");
+    Builder::CTCGreedyDecoderLayer ctcGreedyDecoderLayer("CTCGreedyDecoder");
+    ctcGreedyDecoderLayer.setInputPorts({Port({88, 1, 71}), Port({88, 1}), Port({88, 1})});
+    ctcGreedyDecoderLayer.setOutputPort(Port({1, 88, 1, 1}));
+    ASSERT_THROW(network.addLayer(ctcGreedyDecoderLayer), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/deconvolution_layer_test.cpp b/inference-engine/tests/unit/builders/deconvolution_layer_test.cpp
new file mode 100644
index 000000000..73a9657bc
--- /dev/null
+++ b/inference-engine/tests/unit/builders/deconvolution_layer_test.cpp
@@ -0,0 +1,306 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_deconvolution_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class DeconvolutionLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(DeconvolutionLayerBuilderTest, cannotCreateConvolutionWithoutWeight) {
+    Builder::Network network("Test");
+
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setInputPort(Port({1, 3, 225, 225}));
+    deconvBuilder.setDilation({1, 1});
+    size_t ind = network.addLayer(deconvBuilder);
+    ASSERT_THROW(network.getLayer(ind)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, getExistsLayerFromNetworkBuilderWithInputPort) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setInputPort(Port({1, 3, 225, 225}));
+    deconvBuilder.setDilation({1, 1});
+
+    idx_t convId = network.addLayer(deconvBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    Builder::DeconvolutionLayer deconvBuilderFromNetwork(network.getLayer(convId));
+
+    ASSERT_EQ(deconvBuilderFromNetwork.getStrides(), deconvBuilder.getStrides());
+    ASSERT_EQ(deconvBuilderFromNetwork.getKernel(), deconvBuilder.getKernel());
+    ASSERT_EQ(deconvBuilderFromNetwork.getPaddingsEnd(), deconvBuilder.getPaddingsEnd());
+    ASSERT_EQ(deconvBuilderFromNetwork.getPaddingsBegin(), deconvBuilder.getPaddingsBegin());
+    ASSERT_EQ(deconvBuilderFromNetwork.getOutDepth(), deconvBuilder.getOutDepth());
+    ASSERT_EQ(deconvBuilderFromNetwork.getDilation(), deconvBuilder.getDilation());
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, getExistsLayerFromNetworkBuilderWithoutInputPort) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setDilation({1, 1});
+
+    idx_t convId = network.addLayer(deconvBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    Builder::DeconvolutionLayer deconvBuilderFromNetwork(network.getLayer(convId));
+
+    ASSERT_EQ(deconvBuilderFromNetwork.getStrides(), deconvBuilder.getStrides());
+    ASSERT_EQ(deconvBuilderFromNetwork.getKernel(), deconvBuilder.getKernel());
+    ASSERT_EQ(deconvBuilderFromNetwork.getPaddingsEnd(), deconvBuilder.getPaddingsEnd());
+    ASSERT_EQ(deconvBuilderFromNetwork.getPaddingsBegin(), deconvBuilder.getPaddingsBegin());
+    ASSERT_EQ(deconvBuilderFromNetwork.getOutDepth(), deconvBuilder.getOutDepth());
+    ASSERT_EQ(deconvBuilderFromNetwork.getDilation(), deconvBuilder.getDilation());
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongNumberOfInputChannels) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setInputPort(Port({1, 64, 225, 225}));  // here
+
+    idx_t convId = network.addLayer(deconvBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    ASSERT_THROW(network.getLayer(convId)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, canCreateCorrcetConvolution) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setInputPort(Port({1, 3, 225, 225}));  // here
+
+    idx_t convId = network.addLayer(deconvBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    ASSERT_NO_THROW(network.getLayer(convId)->validate(false));
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, cannotCreateConvolutionWithGroup) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setGroup(2);
+    deconvBuilder.setInputPort(Port({1, 6, 225, 225}));
+
+    idx_t convId = network.addLayer(deconvBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 6, 11, 11}, Layout::OIHW)));
+    // should be {96, 6 / 2, 11, 11}
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    ASSERT_THROW(network.getLayer(convId)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, canCreateConvolution) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setGroup(2);
+    deconvBuilder.setInputPort(Port({1, 6, 225, 225}));  // here
+
+    idx_t convId = network.addLayer(deconvBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    ASSERT_NO_THROW(network.getLayer(convId)->validate(false));
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongOutDepth) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(4);  // here
+    deconvBuilder.setInputPort(Port({1, 3, 225, 225}));
+
+    idx_t convId = network.addLayer(deconvBuilder);
+
+    idx_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    network.connect({weightsId}, {convId, 1});
+
+    idx_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    network.connect({biasesId}, {convId, 2});
+
+    ASSERT_THROW(network.getLayer(convId)->validate(false), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongStrides) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    deconvBuilder.setStrides({4, 0});  // here
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setInputPort(Port({1, 3, 225, 225}));
+    deconvBuilder.setPaddingsEnd({0, 0});
+    deconvBuilder.setPaddingsBegin({0, 0});
+    deconvBuilder.setDilation({0, 0});
+    ASSERT_THROW(network.addLayer(deconvBuilder), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongKernel1) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 0});  // here
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setInputPort(Port({1, 3, 225, 225}));
+
+    ASSERT_THROW(network.addLayer(deconvBuilder), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongKernel2) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer convBuilder("Deconvolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11, 11});  // here
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));
+
+    ASSERT_THROW(network.addLayer(convBuilder), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongDilation1) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setInputPort(Port({1, 3, 225, 225}));
+    deconvBuilder.setDilation({1, 0});  // here
+
+    ASSERT_THROW(network.addLayer(deconvBuilder), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, cannotCreateConvolutionWithWrongDilation2) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer convBuilder("Deconvolution");
+
+    convBuilder.setStrides({4, 4});
+    convBuilder.setKernel({11, 11});
+    convBuilder.setOutDepth(96);
+    convBuilder.setInputPort(Port({1, 3, 225, 225}));
+    convBuilder.setDilation({1, 1, 1});  // here
+
+    ASSERT_THROW(network.addLayer(convBuilder), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, canCreateLayerWithNumberOfGroupDividingNumberOfInputChannels) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    size_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 2, 11, 11}, Layout::OIHW)));
+    size_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setInputPort(Port({1, 6, 225, 225}));
+    deconvBuilder.setDilation({1, 1});
+
+    deconvBuilder.setGroup(3);
+    size_t convId = network.addLayer(deconvBuilder);
+    network.connect({weightsId}, {convId, 1});
+    network.connect({biasesId}, {convId, 2});
+    ASSERT_NO_THROW(network.getLayer(convId)->validate(false));
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, canCreateLayerWithWeightsNotAvailableForGroup) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    size_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 5, 11, 11}, Layout::OIHW)));
+    size_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setInputPort(Port({1, 6, 225, 225}));
+    deconvBuilder.setDilation({1, 1});
+
+    deconvBuilder.setGroup(3);
+    ASSERT_THROW(network.addLayer({{weightsId}, {biasesId}}, deconvBuilder),
+                 InferenceEngine::details::InferenceEngineException);  // 6 / 3 != 5
+}
+
+TEST_F(DeconvolutionLayerBuilderTest, cannotCreateLayerWithNumberOfGroupNotDividingNumberOfInputChannels) {
+    Builder::Network network("Test");
+    Builder::DeconvolutionLayer deconvBuilder("Deconvolution");
+
+    size_t weightsId = network.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 2, 11, 11}, Layout::OIHW)));
+    size_t biasesId = network.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+
+    deconvBuilder.setStrides({4, 4});
+    deconvBuilder.setKernel({11, 11});
+    deconvBuilder.setOutDepth(96);
+    deconvBuilder.setInputPort(Port({1, 6, 225, 225}));
+    deconvBuilder.setDilation({1, 1});
+
+    deconvBuilder.setGroup(4);
+    ASSERT_THROW(network.addLayer({{weightsId}, {biasesId}}, deconvBuilder),
+                 InferenceEngine::details::InferenceEngineException);  // 6 % 4 == 2
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/detection_output_layer_test.cpp b/inference-engine/tests/unit/builders/detection_output_layer_test.cpp
new file mode 100644
index 000000000..e636be928
--- /dev/null
+++ b/inference-engine/tests/unit/builders/detection_output_layer_test.cpp
@@ -0,0 +1,117 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_detection_output_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class DetectionOutputLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(DetectionOutputLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network network("network");
+    Builder::DetectionOutputLayer layer("detection output layer");
+    layer.setNumClasses(2);
+    layer.setShareLocation(true);
+    layer.setBackgroudLabelId(-1);
+    layer.setNMSThreshold(0.45);
+    layer.setTopK(400);
+    layer.setCodeType("caffe.PriorBoxParameter.CENTER_SIZE");
+    layer.setVariantEncodedInTarget(false);
+    layer.setKeepTopK(200);
+    layer.setConfidenceThreshold(0.01);
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = network.addLayer(layer));
+    Builder::DetectionOutputLayer layerFromNet(network.getLayer(ind));
+    ASSERT_EQ(layerFromNet.getName(), layer.getName());
+    ASSERT_EQ(layerFromNet.getNumClasses(), layer.getNumClasses());
+    ASSERT_EQ(layerFromNet.getShareLocation(), layer.getShareLocation());
+    ASSERT_EQ(layerFromNet.getBackgroudLabelId(), layer.getBackgroudLabelId());
+    ASSERT_EQ(layerFromNet.getNMSThreshold(), layer.getNMSThreshold());
+    ASSERT_EQ(layerFromNet.getTopK(), layer.getTopK());
+    ASSERT_EQ(layerFromNet.getCodeType(), layer.getCodeType());
+    ASSERT_EQ(layerFromNet.getVariantEncodedInTarget(), layer.getVariantEncodedInTarget());
+    ASSERT_EQ(layerFromNet.getKeepTopK(), layer.getKeepTopK());
+    ASSERT_EQ(layerFromNet.getConfidenceThreshold(), layer.getConfidenceThreshold());
+}
+
+TEST_F(DetectionOutputLayerBuilderTest, cannotCreateLayerWithWrongNumClasses) {
+    Builder::Network network("network");
+    Builder::DetectionOutputLayer layer("detection output layer");
+    layer.setNumClasses(0);  // here
+    layer.setShareLocation(true);
+    layer.setBackgroudLabelId(-1);
+    layer.setNMSThreshold(0.45);
+    layer.setTopK(400);
+    layer.setCodeType("caffe.PriorBoxParameter.CENTER_SIZE");
+    layer.setVariantEncodedInTarget(false);
+    layer.setKeepTopK(200);
+    layer.setConfidenceThreshold(0.01);
+    ASSERT_THROW(network.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DetectionOutputLayerBuilderTest, cannotCreateLayerWithWrongCodeType) {
+    Builder::Network network("network");
+    Builder::DetectionOutputLayer layer("detection output layer");
+    layer.setNumClasses(2);
+    layer.setShareLocation(true);
+    layer.setBackgroudLabelId(-1);
+    layer.setNMSThreshold(0.45);
+    layer.setTopK(400);
+    layer.setCodeType("trololo");  // here
+    layer.setVariantEncodedInTarget(false);
+    layer.setKeepTopK(200);
+    layer.setConfidenceThreshold(0.01);
+    ASSERT_THROW(network.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DetectionOutputLayerBuilderTest, cannotCreateLayerWithWrongBackLabelId) {
+    Builder::Network network("network");
+    Builder::DetectionOutputLayer layer("detection output layer");
+    layer.setNumClasses(2);
+    layer.setShareLocation(true);
+    layer.setBackgroudLabelId(-100);  // here
+    layer.setNMSThreshold(0.45);
+    layer.setTopK(400);
+    layer.setCodeType("caffe.PriorBoxParameter.CENTER_SIZE");
+    layer.setVariantEncodedInTarget(false);
+    layer.setKeepTopK(200);
+    layer.setConfidenceThreshold(0.01);
+    ASSERT_THROW(network.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DetectionOutputLayerBuilderTest, cannotCreateLayerWithWrongNMSThreshold) {
+    Builder::Network network("network");
+    Builder::DetectionOutputLayer layer("detection output layer");
+    layer.setNumClasses(2);
+    layer.setShareLocation(true);
+    layer.setBackgroudLabelId(-1);
+    layer.setNMSThreshold(0);  // here
+    layer.setTopK(400);
+    layer.setCodeType("caffe.PriorBoxParameter.CENTER_SIZE");
+    layer.setVariantEncodedInTarget(false);
+    layer.setKeepTopK(200);
+    layer.setConfidenceThreshold(0.01);
+    ASSERT_THROW(network.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(DetectionOutputLayerBuilderTest, cannotCreateLayerWithWrongConfidenceThreshold) {
+    Builder::Network network("network");
+    Builder::DetectionOutputLayer layer("detection output layer");
+    layer.setNumClasses(2);
+    layer.setShareLocation(true);
+    layer.setBackgroudLabelId(-1);
+    layer.setNMSThreshold(0.45);
+    layer.setTopK(400);
+    layer.setCodeType("caffe.PriorBoxParameter.CENTER_SIZE");
+    layer.setVariantEncodedInTarget(false);
+    layer.setKeepTopK(200);
+    layer.setConfidenceThreshold(0);  // here
+    ASSERT_THROW(network.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/eltwise_layer_test.cpp b/inference-engine/tests/unit/builders/eltwise_layer_test.cpp
new file mode 100644
index 000000000..d85595a89
--- /dev/null
+++ b/inference-engine/tests/unit/builders/eltwise_layer_test.cpp
@@ -0,0 +1,102 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_eltwise_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class EltwiseLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(EltwiseLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network net("network");
+    Builder::EltwiseLayer layer("Eltwise layer");
+
+    layer.setInputPorts({Port({1, 2, 3, 4}), Port({1, 2, 3, 4})});
+    layer.setOutputPort(Port({1, 2, 3, 4}));
+    size_t ind = 0;
+    ASSERT_NO_THROW(ind = net.addLayer(layer));
+    Builder::EltwiseLayer layerFromNet(net.getLayer(ind));
+
+    ASSERT_EQ(layer.getInputPorts(), layerFromNet.getInputPorts());
+    ASSERT_EQ(layer.getOutputPort(), layerFromNet.getOutputPort());
+    ASSERT_EQ(layer.getEltwiseType(), layerFromNet.getEltwiseType());
+}
+
+TEST_F(EltwiseLayerBuilderTest, checkOnlineEltwiseTypeChanging) {
+    Builder::Network net("network");
+    Builder::EltwiseLayer layer("Eltwise layer");
+
+    layer.setInputPorts({Port({1, 2, 3}), Port({1, 2, 3})});
+    layer.setOutputPort(Port({1, 2, 3}));
+
+    layer.setEltwiseType(Builder::EltwiseLayer::EltwiseType::MAX);
+    ASSERT_EQ(layer.getEltwiseType(), Builder::EltwiseLayer::EltwiseType::MAX);
+    ASSERT_NO_THROW(net.addLayer(layer));
+
+    layer.setEltwiseType(Builder::EltwiseLayer::EltwiseType::DIV);
+    ASSERT_EQ(layer.getEltwiseType(), Builder::EltwiseLayer::EltwiseType::DIV);
+    ASSERT_NO_THROW(net.addLayer(layer));
+
+    layer.setEltwiseType(Builder::EltwiseLayer::EltwiseType::MIN);
+    ASSERT_EQ(layer.getEltwiseType(), Builder::EltwiseLayer::EltwiseType::MIN);
+    ASSERT_NO_THROW(net.addLayer(layer));
+
+    layer.setEltwiseType(Builder::EltwiseLayer::EltwiseType::MUL);
+    ASSERT_EQ(layer.getEltwiseType(), Builder::EltwiseLayer::EltwiseType::MUL);
+    ASSERT_NO_THROW(net.addLayer(layer));
+
+    layer.setEltwiseType(Builder::EltwiseLayer::EltwiseType::SQUARED_DIFF);
+    ASSERT_EQ(layer.getEltwiseType(), Builder::EltwiseLayer::EltwiseType::SQUARED_DIFF);
+    ASSERT_NO_THROW(net.addLayer(layer));
+
+    layer.setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUB);
+    ASSERT_EQ(layer.getEltwiseType(), Builder::EltwiseLayer::EltwiseType::SUB);
+    ASSERT_NO_THROW(net.addLayer(layer));
+
+    layer.setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUM);
+    ASSERT_EQ(layer.getEltwiseType(), Builder::EltwiseLayer::EltwiseType::SUM);
+    ASSERT_NO_THROW(net.addLayer(layer));
+}
+
+TEST_F(EltwiseLayerBuilderTest, cannotCreateLayerWithOneInputPort) {
+    Builder::Network net("network");
+    Builder::EltwiseLayer layer("Eltwise layer");
+
+    layer.setInputPorts({Port({1, 2, 3, 4})});   // here
+    layer.setOutputPort(Port({1, 2, 3, 4}));
+    ASSERT_THROW(net.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(EltwiseLayerBuilderTest, cannotCreateLayerWithThreeInputPort) {
+    Builder::Network net("network");
+    Builder::EltwiseLayer layer("Eltwise layer");
+
+    layer.setInputPorts({Port({1, 2, 3, 4}), Port({1, 2, 3, 4}), Port({1, 2, 3, 4})});   // here
+    layer.setOutputPort(Port({1, 2, 3, 4}));
+    ASSERT_THROW(net.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(EltwiseLayerBuilderTest, cannotCreateLayerWithDifferentInputPorts) {
+    Builder::Network net("network");
+    Builder::EltwiseLayer layer("Eltwise layer");
+
+    layer.setInputPorts({Port({1, 2, 3, 4}), Port({1, 2, 3, 1000})});   // here
+    layer.setOutputPort(Port({1, 2, 3, 4}));
+    ASSERT_THROW(net.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(EltwiseLayerBuilderTest, cannotCreateLayerWithDifferentInputAndOutputPorts) {
+    Builder::Network net("network");
+    Builder::EltwiseLayer layer("Eltwise layer");
+
+    layer.setInputPorts({Port({1, 2, 3, 4}), Port({1, 2, 3, 4})});
+    layer.setOutputPort(Port({1, 2, 3, 100}));   // here
+    ASSERT_THROW(net.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
diff --git a/inference-engine/tests/unit/builders/elu_layer_test.cpp b/inference-engine/tests/unit/builders/elu_layer_test.cpp
new file mode 100644
index 000000000..4ddbda3c8
--- /dev/null
+++ b/inference-engine/tests/unit/builders/elu_layer_test.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_elu_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class ELULayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(ELULayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network net("network");
+    Builder::ELULayer eluLayer("ELU_layer");
+    eluLayer.setAlpha(100);
+    size_t ind = net.addLayer(eluLayer);
+    Builder::ELULayer layerFromNet(net.getLayer(ind));
+    ASSERT_EQ(eluLayer.getAlpha(), layerFromNet.getAlpha());
+}
+
+TEST_F(ELULayerBuilderTest, cannotCreateLayerWithWrongShapes) {
+    Builder::Network net("network");
+    Builder::Layer::Ptr fakeELULayerPtr = std::make_shared<Builder::Layer>("ELU", "ELU layer");
+    fakeELULayerPtr->getInputPorts().push_back(Port({1, 1, 1, 1}));
+    fakeELULayerPtr->getOutputPorts().push_back(Port({1, 1, 1, 2}));
+    Builder::ELULayer eluLayer(fakeELULayerPtr);
+    eluLayer.setAlpha(100);
+    ASSERT_THROW(net.addLayer(eluLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(ELULayerBuilderTest, cannotCreateLayerWithWrongAlpha) {
+    Builder::Network net("network");
+    Builder::ELULayer eluLayer("ELU_layer");
+    eluLayer.setAlpha(-100);
+    ASSERT_THROW(net.addLayer(eluLayer), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/input_layer_test.cpp b/inference-engine/tests/unit/builders/input_layer_test.cpp
index 6a30fdb64..2e840de1c 100644
--- a/inference-engine/tests/unit/builders/input_layer_test.cpp
+++ b/inference-engine/tests/unit/builders/input_layer_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -27,6 +27,6 @@ TEST_F(InputLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
     ASSERT_EQ(inBuilderFromNetwork.getPort().shape(), Port({1, 3, 3, 3}).shape());
     inBuilderFromNetwork.setPort(Port({1, 3, 4, 4}));
     ASSERT_EQ(inBuilderFromNetwork.getPort().shape(), Port({1, 3, 4, 4}).shape());
-    ASSERT_EQ(network.getLayer(inId).getOutputPorts()[0].shape(), Port({1, 3, 4, 4}).shape());
+    ASSERT_EQ(network.getLayer(inId)->getOutputPorts()[0].shape(), Port({1, 3, 4, 4}).shape());
     ASSERT_EQ(inBuilder.getPort().shape(), Port({1, 3, 3, 3}).shape());
 }
 \ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/mvn_layer_test.cpp b/inference-engine/tests/unit/builders/mvn_layer_test.cpp
new file mode 100644
index 000000000..01cf4481e
--- /dev/null
+++ b/inference-engine/tests/unit/builders/mvn_layer_test.cpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_mvn_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class MVNLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(MVNLayerBuilderTest, getExistsLayerFromNetworkBuilder1) {
+    Builder::Network net("network");
+    Builder::MVNLayer mvnLayer("MVN_layer");
+    mvnLayer.setEpsilon(99.9).setAcrossChannels(true).setNormalize(true);
+    size_t ind = net.addLayer(mvnLayer);
+    Builder::MVNLayer layerFromNet(net.getLayer(ind));
+}
+
+TEST_F(MVNLayerBuilderTest, getExistsLayerFromNetworkBuilder2) {
+    Builder::Network net("network");
+    Builder::MVNLayer mvnLayer("MVN_layer");
+    mvnLayer.setEpsilon(99.9).setAcrossChannels(true).setNormalize(false);
+    size_t ind = net.addLayer(mvnLayer);
+    Builder::MVNLayer layerFromNet(net.getLayer(ind));
+}
+
+TEST_F(MVNLayerBuilderTest, getExistsLayerFromNetworkBuilder3) {
+    Builder::Network net("network");
+    Builder::MVNLayer mvnLayer("MVN_layer");
+    mvnLayer.setEpsilon(99.9).setAcrossChannels(false).setNormalize(true);
+    size_t ind = net.addLayer(mvnLayer);
+    Builder::MVNLayer layerFromNet(net.getLayer(ind));
+}
+
+TEST_F(MVNLayerBuilderTest, getExistsLayerFromNetworkBuilder4) {
+    Builder::Network net("network");
+    Builder::MVNLayer mvnLayer("MVN_layer");
+    mvnLayer.setEpsilon(99.9).setAcrossChannels(false).setNormalize(false);
+    size_t ind = net.addLayer(mvnLayer);
+    Builder::MVNLayer layerFromNet(net.getLayer(ind));
+}
+
+TEST_F(MVNLayerBuilderTest, cannotCreateLayerWithWrongEpsion) {
+    Builder::Network net("network");
+    Builder::MVNLayer mvnLayer("MVN_layer");
+    mvnLayer.setEpsilon(-100).setAcrossChannels(true).setNormalize(true);  // here
+    ASSERT_THROW(net.addLayer(mvnLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(MVNLayerBuilderTest, cannotCreateLayerWithWrongShapes) {
+    Builder::Network net("network");
+    Builder::Layer::Ptr fakeMVNLayerPtr = std::make_shared<Builder::Layer>("MVN", "MVN layer");
+    fakeMVNLayerPtr->getInputPorts().push_back(Port({1, 1, 1, 1}));
+    fakeMVNLayerPtr->getOutputPorts().push_back(Port({1, 1, 1, 2}));
+    Builder::MVNLayer mvnLayer(fakeMVNLayerPtr);
+    mvnLayer.setEpsilon(100).setAcrossChannels(true).setNormalize(true);
+    ASSERT_THROW(net.addLayer(mvnLayer), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/network_builder_test.cpp b/inference-engine/tests/unit/builders/network_builder_test.cpp
index 3b53f1271..45a18a1c7 100644
--- a/inference-engine/tests/unit/builders/network_builder_test.cpp
+++ b/inference-engine/tests/unit/builders/network_builder_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -43,61 +43,65 @@ protected:
 
 public:
 
-    Builder::Network prepateAlexnetBuilder() {
+    Builder::Network prepateAlexnetBuilder(Precision precision = Precision::FP32) {
         Context ctx;
         Builder::Network builder(ctx, "AlexNet");
+        idx_t weightsId, biasesId;
         idx_t layerId = builder.addLayer(Builder::InputLayer(alexNetNames[0]).setPort(Port({1,3, 227, 227})));
-        layerId = builder.addLayer({{layerId}}, Builder::ScaleShiftLayer(alexNetNames[1]).setBiases(generateBlob(Precision::FP32, {3}, Layout::C)));
-        layerId = builder.addLayer({{layerId}}, Builder::ConvolutionLayer(alexNetNames[2]).setKernel({11, 11}).setStrides({4, 4}).setOutDepth(96)
-                .setWeights(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW))
-                .setBiases(generateBlob(Precision::FP32, {96}, Layout::C)));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(precision, {3}, Layout::C)));
+        layerId = builder.addLayer({{layerId}}, Builder::ScaleShiftLayer(alexNetNames[1]));
+        builder.connect({biasesId}, {layerId, 2});
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(precision, {96, 3, 11, 11}, Layout::OIHW)));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(precision, {96}, Layout::C)));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer(alexNetNames[2]).setKernel({11, 11})
+                .setStrides({4, 4}).setOutDepth(96));
         layerId = builder.addLayer({{layerId}}, Builder::ReLULayer(alexNetNames[3]));
         layerId = builder.addLayer({{layerId}}, Builder::NormLayer(alexNetNames[4]).setAlpha(9.999999747378752e-05f).setBeta(0.75f).setSize(5).setAcrossMaps(true));
         layerId = builder.addLayer({{layerId}}, Builder::PoolingLayer(alexNetNames[5]).setExcludePad(false).setKernel({3, 3}).setPaddingsBegin({0, 0})
                 .setPaddingsEnd({0, 0}).setPoolingType(Builder::PoolingLayer::PoolingType::MAX).setStrides({2, 2}));
-        layerId = builder.addLayer({{layerId}}, Builder::ConvolutionLayer(alexNetNames[6]).setKernel({5, 5}).setStrides({1, 1}).setOutDepth(256)
-                .setPaddingsBegin({2, 2}).setPaddingsEnd({2, 2}).setGroup(2).setDilation({1, 1})
-                .setWeights(generateBlob(Precision::FP32, {96, 256, 5, 5}, Layout::OIHW))
-                .setBiases(generateBlob(Precision::FP32, {256}, Layout::C)));
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(precision, {256, 96 / 2, 5, 5}, Layout::OIHW)));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(precision, {256}, Layout::C)));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer(alexNetNames[6]).setKernel({5, 5}).setStrides({1, 1}).setOutDepth(256)
+                .setPaddingsBegin({2, 2}).setPaddingsEnd({2, 2}).setGroup(2).setDilation({1, 1}));
         layerId = builder.addLayer({{layerId}}, Builder::ReLULayer(alexNetNames[7]));
         layerId = builder.addLayer({{layerId}}, Builder::NormLayer(alexNetNames[8]).setAlpha(9.999999747378752e-05f).setBeta(0.75f).setSize(5).setAcrossMaps(true));
         layerId = builder.addLayer({{layerId}}, Builder::PoolingLayer(alexNetNames[9]).setExcludePad(false).setKernel({3, 3}).setPaddingsBegin({0, 0})
                 .setPaddingsEnd({0, 0}).setPoolingType(Builder::PoolingLayer::PoolingType::MAX).setStrides({2, 2}));
-        layerId = builder.addLayer({{layerId}}, Builder::ConvolutionLayer(alexNetNames[10]).setKernel({3, 3}).setStrides({1, 1}).setOutDepth(384)
-                .setPaddingsBegin({1, 1}).setPaddingsEnd({1, 1}).setGroup(1).setDilation({1, 1})
-                .setWeights(generateBlob(Precision::FP32, {256, 384, 3, 3}, Layout::OIHW))
-                .setBiases(generateBlob(Precision::FP32, {384}, Layout::C)));
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(precision, {256, 384, 3, 3}, Layout::OIHW)));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(precision, {384}, Layout::C)));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer(alexNetNames[10]).setKernel({3, 3})
+                .setStrides({1, 1}).setOutDepth(384).setPaddingsBegin({1, 1}).setPaddingsEnd({1, 1}).setGroup(1).setDilation({1, 1}));
         layerId = builder.addLayer({{layerId}}, Builder::ReLULayer(alexNetNames[11]));
-        layerId = builder.addLayer({{layerId}}, Builder::ConvolutionLayer(alexNetNames[12]).setKernel({3, 3}).setStrides({1, 1}).setOutDepth(384)
-                .setPaddingsBegin({1, 1}).setPaddingsEnd({1, 1}).setGroup(2).setDilation({1, 1})
-                .setWeights(generateBlob(Precision::FP32, {384, 384, 3, 3}, Layout::OIHW))
-                .setBiases(generateBlob(Precision::FP32, {384}, Layout::C)));
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(precision, {384, 384 / 2, 3, 3}, Layout::OIHW)));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(precision, {384}, Layout::C)));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer(alexNetNames[12]).setKernel({3, 3})
+                .setStrides({1, 1}).setOutDepth(384).setPaddingsBegin({1, 1}).setPaddingsEnd({1, 1}).setGroup(2).setDilation({1, 1}));
         layerId = builder.addLayer({{layerId}}, Builder::ReLULayer(alexNetNames[13]));
-        layerId = builder.addLayer({{layerId}}, Builder::ConvolutionLayer(alexNetNames[14]).setKernel({3, 3}).setStrides({1, 1}).setOutDepth(256)
-                .setPaddingsBegin({1, 1}).setPaddingsEnd({1, 1}).setGroup(2).setDilation({1, 1})
-                .setWeights(generateBlob(Precision::FP32, {256, 384, 3, 3}, Layout::OIHW))
-                .setBiases(generateBlob(Precision::FP32, {384}, Layout::C)));
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(precision, {256, 384 / 2, 3, 3}, Layout::OIHW)));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(precision, {256}, Layout::C)));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer(alexNetNames[14]).setKernel({3, 3})
+                .setStrides({1, 1}).setOutDepth(256).setPaddingsBegin({1, 1}).setPaddingsEnd({1, 1}).setGroup(2).setDilation({1, 1}));
         layerId = builder.addLayer({{layerId}}, Builder::ReLULayer(alexNetNames[15]));
         layerId = builder.addLayer({{layerId}}, Builder::PoolingLayer(alexNetNames[16]).setExcludePad(false).setKernel({3, 3}).setPaddingsBegin({0, 0})
                 .setPaddingsEnd({0, 0}).setPoolingType(Builder::PoolingLayer::PoolingType::MAX).setStrides({2, 2}));
-        layerId = builder.addLayer({{layerId}}, Builder::FullyConnectedLayer(alexNetNames[17]).setOutputNum(4096)
-                .setWeights(generateBlob(Precision::FP32, {4096, 256, 6, 6}, Layout::OIHW))
-                .setBiases(generateBlob(Precision::FP32, {4096}, Layout::C)));
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(precision, {4096, 256, 6, 6}, Layout::OIHW)));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(precision, {4096}, Layout::C)));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::FullyConnectedLayer(alexNetNames[17]).setOutputNum(4096));
         layerId = builder.addLayer({{layerId}}, Builder::ReLULayer(alexNetNames[18]));
-        layerId = builder.addLayer({{layerId}}, Builder::FullyConnectedLayer(alexNetNames[19]).setOutputNum(4096)
-                .setWeights(generateBlob(Precision::FP32, {4096, 4096}, Layout::NC))
-                .setBiases(generateBlob(Precision::FP32, {4096}, Layout::C)));
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(precision, {4096, 4096}, Layout::NC)));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(precision, {4096}, Layout::C)));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::FullyConnectedLayer(alexNetNames[19]).setOutputNum(4096));
         layerId = builder.addLayer({{layerId}}, Builder::ReLULayer(alexNetNames[20]));
-        layerId = builder.addLayer({{layerId}}, Builder::FullyConnectedLayer(alexNetNames[21]).setOutputNum(1000)
-                .setWeights(generateBlob(Precision::FP32, {1000, 4096}, Layout::NC))
-                .setBiases(generateBlob(Precision::FP32, {1000}, Layout::C)));
+        weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(precision, {1000, 4096}, Layout::NC)));
+        biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(precision, {1000}, Layout::C)));
+        layerId = builder.addLayer({{layerId}, {weightsId}, {biasesId}}, Builder::FullyConnectedLayer(alexNetNames[21]).setOutputNum(1000));
         layerId = builder.addLayer({{layerId}}, Builder::SoftMaxLayer(alexNetNames[22]).setAxis(1));
 
         idx_t outputId = builder.addLayer({PortInfo(layerId)}, Builder::OutputLayer(alexNetNames[23]));
         return builder;
     }
 
-    const INetwork::Ptr createAlexnet() {
+    const INetwork::CPtr createAlexnet() {
         return prepateAlexnetBuilder().build();
     }
 
@@ -106,12 +110,11 @@ public:
             auto connections = network.getLayerConnections(layer->getId());
             CNNLayerPtr cnnLayer;
             StatusCode sts = cnnNetwork.getLayerByName(layer->getName().c_str(), cnnLayer, nullptr);
-            if (sts != OK && layer->getType() == "Output")
+            if (sts != OK && (layer->getType() == "Output" || layer->getType() == "Const"))
                 continue;
             else if (sts != OK)
                 THROW_IE_EXCEPTION << "Cannot find CNNLayer by name: " << layer->getName();
 
-
             // Output connections
             for (size_t i = 0; i < cnnLayer->outData.size(); i++) {
                 for (const auto& it : cnnLayer->outData[i]->inputTo) {
@@ -124,9 +127,16 @@ public:
                     }
 
                     for (auto conIt = connections.begin(); conIt != connections.end(); conIt++) {
+                        const auto& inputPorts = network.getLayer(conIt->to().layerId())->getInputPorts();
+                        idx_t realPortId(0);
+                        for (size_t q = 0; q < conIt->to().portId() && q < inputPorts.size(); q++) {
+                            if (inputPorts[q].getParameters().find("type") == inputPorts[q].getParameters().end())
+                                realPortId++;
+                        }
+
                         if (conIt->from().layerId() == layer->getId() && conIt->from().portId() == i &&
-                            network.getLayer(conIt->to().layerId())->getName() == it.second->name &&
-                            conIt->to().portId() == j) {
+                                network.getLayer(conIt->to().layerId())->getName() == it.second->name &&
+                                realPortId == j) {
                             connections.erase(conIt);
                             break;
                         }
@@ -162,7 +172,20 @@ public:
             if (connections.size() == 1 && network.getLayer(connections[0].to().layerId())->getType() == "Output")
                 connections.erase(connections.begin());
 
-            if (!connections.empty())
+            bool connectionsConnected = true;
+            for (const auto& connection : connections) {
+                if (connection.to().layerId() != layer->getId()) {
+                    connectionsConnected = false;
+                    break;
+                }
+                const auto& port = layer->getInputPorts()[connection.to().portId()];
+                if (port.getParameters().find("type") == port.getParameters().end()) {
+                    connectionsConnected = false;
+                    break;
+                }
+            }
+
+            if (!connectionsConnected)
                 THROW_IE_EXCEPTION << "Not all connections were connected.";
         }
     }
@@ -282,18 +305,22 @@ TEST_F(NetworkBuilderTest, checkReshapeAlexNet) {
 
     Builder::Network builder = prepateAlexnetBuilder();
     for (const auto &layer : builder.getLayers()) {
-        if (layer.getType() == "Input") {
-            ASSERT_EQ(outPorts[layer.getName()][0], layer.getOutputPorts()[0].shape());
-        } else {
-            for (size_t j = 0; j < layer.getOutputPorts().size(); j++) {
-                ASSERT_TRUE(layer.getOutputPorts()[j].shape().empty());
+        if (layer->getType() == "Input") {
+            ASSERT_EQ(outPorts[layer->getName()][0], layer->getOutputPorts()[0].shape());
+        } else if (layer->getType() != "Const") {
+            for (const auto &port : layer->getOutputPorts()) {
+                ASSERT_TRUE(port.shape().empty());
             }
         }
     }
-    INetwork::Ptr graph;
+    INetwork::CPtr graph;
     ASSERT_NO_THROW(graph = builder.build());
     for (const auto &layer : *graph) {
+        if (layer->getType() == "Const")
+            continue;
         for (size_t i = 0; i < layer->getInputPorts().size(); i++) {
+            if (layer->getInputPorts()[i].getParameters().find("type") != layer->getInputPorts()[i].getParameters().end())
+                continue;
             ASSERT_EQ(inPorts[layer->getName()][i], layer->getInputPorts()[i].shape());
         }
         for (size_t i = 0; i < layer->getOutputPorts().size(); i++) {
@@ -306,10 +333,10 @@ TEST_F(NetworkBuilderTest, checkNoImplWithCorrectPorts) {
     Context ctx;
     Builder::Network builder(ctx, "TestAlexNet");
     idx_t inId = builder.addLayer(Builder::InputLayer(alexNetNames[0]).setPort(Port({1,3, 227, 227})));
-    idx_t convId = builder.addLayer({{inId}}, Builder::ConvolutionLayer(alexNetNames[2]).setKernel({11, 11}).setStrides({4, 4}).setOutDepth(96)
-            .setInputPort(Port({1,3, 227, 227})).setOutputPort(Port({1, 96, 55, 55}))
-            .setWeights(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW))
-            .setBiases(generateBlob(Precision::FP32, {96}, Layout::C)));
+    idx_t weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    idx_t biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    idx_t convId = builder.addLayer({{inId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer(alexNetNames[2]).setKernel({11, 11})
+            .setStrides({4, 4}).setOutDepth(96).setInputPort(Port({1,3, 227, 227})).setOutputPort(Port({1, 96, 55, 55})));
     idx_t testLayerId = builder.addLayer({PortInfo(convId)}, Builder::Layer("TestLayer", "testPort")
             .setInputPorts({Port({1, 96, 55, 55})}).setOutputPorts({Port({1, 96, 55, 55})}));
     idx_t outputId = builder.addLayer({PortInfo(testLayerId)}, Builder::OutputLayer("out").setPort({Port({1, 96, 55, 55})}));
@@ -321,33 +348,34 @@ TEST_F(NetworkBuilderTest, checkNoImplWithIncorrectPorts) {
     Context ctx;
     Builder::Network builder(ctx, "TestAlexNet");
     idx_t inId = builder.addLayer(Builder::InputLayer(alexNetNames[0]).setPort(Port({1,3, 227, 227})));
-    idx_t convId = builder.addLayer({{inId}}, Builder::ConvolutionLayer(alexNetNames[2]).setKernel({11, 11}).setStrides({4, 4}).setOutDepth(96)
-            .setInputPort(Port({1,3, 227, 227})).setOutputPort(Port({1, 96, 55, 55}))
-            .setWeights(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW))
-            .setBiases(generateBlob(Precision::FP32, {96}, Layout::C)));
-    idx_t testLayerId = builder.addLayer({PortInfo(convId)}, Builder::Layer("TestLayer", "testPort")
-            .setInputPorts({Port({1, 3, 55, 55})}).setOutputPorts({Port({1, 96, 55, 55})}));
-
-    ASSERT_THROW(builder.build(), InferenceEngine::details::InferenceEngineException);
+    idx_t weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {96, 3, 11, 11}, Layout::OIHW)));
+    idx_t biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {96}, Layout::C)));
+    idx_t convId = builder.addLayer({{inId}, {weightsId}, {biasesId}}, Builder::ConvolutionLayer(alexNetNames[2]).setKernel({11, 11})
+            .setStrides({4, 4}).setOutDepth(96).setInputPort(Port({1,3, 227, 227})).setOutputPort(Port({1, 96, 55, 55})));
+    ASSERT_THROW(builder.addLayer({PortInfo(convId)}, Builder::Layer("TestLayer", "testPort")
+            .setInputPorts({Port({1, 3, 55, 55})}).setOutputPorts({Port({1, 96, 55, 55})})),
+                    InferenceEngine::details::InferenceEngineException);
 }
 
 TEST_F(NetworkBuilderTest, createNetworkIterator) {
-    const INetwork::Ptr graph = createAlexnet();
+    const INetwork::CPtr graph = createAlexnet();
 
     ASSERT_NO_THROW(graph->begin());
 }
 
 TEST_F(NetworkBuilderTest, checkNetworkSize) {
-    const INetwork::Ptr graph = createAlexnet();
+    const INetwork::CPtr graph = createAlexnet();
 
-    ASSERT_EQ(24, graph->size());
+    ASSERT_EQ(41, graph->size());
 }
 
 TEST_F(NetworkBuilderTest, iterateNetworkForeach) {
-    const INetwork::Ptr graph = createAlexnet();
+    const INetwork::CPtr graph = createAlexnet();
 
     size_t idx = 0;
     for (const auto& layer : *graph) {
+        if (layer->getType() == "Const")
+            continue;
         ASSERT_NE(idx, alexNetNames.size());
         ASSERT_EQ(alexNetNames[idx], layer->getName());
         idx++;
@@ -355,10 +383,12 @@ TEST_F(NetworkBuilderTest, iterateNetworkForeach) {
 }
 
 TEST_F(NetworkBuilderTest, iterateNetworkFor) {
-    const INetwork::Ptr graph = createAlexnet();
+    const INetwork::CPtr graph = createAlexnet();
 
     size_t idx = 0;
     for (auto it = graph->begin(); it != graph->end(); it++) {
+        if ((*it)->getType() == "Const")
+            continue;
         ASSERT_EQ(alexNetNames[idx], (*it)->getName());
         idx++;
     }
@@ -522,7 +552,7 @@ TEST_F(NetworkBuilderTest, convertFromICNNNetwork) {
     InferenceEngine::TBlob<uint8_t>::Ptr weights_ptr = InferenceEngine::TBlob<uint8_t>::Ptr(weights);
 
     net_reader.SetWeights(weights_ptr);
-    INetwork::Ptr network = Builder::Network(net_reader.getNetwork()).build();
+    INetwork::CPtr network = Builder::Network(net_reader.getNetwork()).build();
 
     try {
         compareWithICNNNetwork(*network, net_reader.getNetwork());
@@ -801,26 +831,35 @@ TEST_F(NetworkBuilderTest, connectTwoNetworks) {
     // Find output
     idx_t lastLayerId(0);
     for (const auto& layer : originalNetwork.getLayers()) {
-        if (layer.getType() != "Output")
+        if (layer->getType() != "Output")
             continue;
-        const auto connections = originalNetwork.getLayerConnections(layer.getId());
+        const auto connections = originalNetwork.getLayerConnections(layer->getId());
         ASSERT_EQ(1, connections.size());
-        ASSERT_EQ(layer.getId(), connections[0].to().layerId());
+        ASSERT_EQ(layer->getId(), connections[0].to().layerId());
         ASSERT_EQ(0, connections[0].from().portId());
         lastLayerId = connections[0].from().layerId();
         originalNetwork.disconnect(connections[0]);
-        originalNetwork.removeLayer(layer.getId());
+        originalNetwork.removeLayer(layer->getId());
         break;
     }
 
     std::map<idx_t, idx_t> oldNewId;
-    for (const auto& layer : addNetwork.getLayers()) {
-        if (layer.getType() == "Input") {
-            oldNewId[layer.getId()] = lastLayerId;
+    for (const auto& layer : addNetwork) {
+        if (layer->getType() == "Input") {
+            oldNewId[layer->getId()] = lastLayerId;
             continue;
         }
-        oldNewId[layer.getId()] = originalNetwork.addLayer(layer);
-        const auto connections = addNetwork.getLayerConnections(layer.getId());
+        auto newLayer = layer;
+        if (newLayer->getType() != "Const") {
+            for (size_t i = 0; i < newLayer->getInputPorts().size(); i++) {
+                newLayer->getInputPorts()[i].setData(std::make_shared<PortData>());
+            }
+            for (size_t i = 0; i < newLayer->getOutputPorts().size(); i++) {
+                newLayer->getOutputPorts()[i].setData(std::make_shared<PortData>());
+            }
+        }
+        oldNewId[layer->getId()] = originalNetwork.addLayer(*newLayer);
+        const auto connections = addNetwork.getLayerConnections(layer->getId());
         for (const auto& connection : connections) {
             if (oldNewId.find(connection.from().layerId()) == oldNewId.end() ||
                     oldNewId.find(connection.to().layerId()) == oldNewId.end())
@@ -829,8 +868,15 @@ TEST_F(NetworkBuilderTest, connectTwoNetworks) {
                     {oldNewId[connection.to().layerId()], connection.to().portId()});
         }
 
-        if (layer.getType() == "Convolution") {
-            Builder::ConvolutionLayer(originalNetwork.getLayer(oldNewId[layer.getId()])).setWeights(generateBlob(Precision::FP32, {16, 32, 7, 7}, Layout::OIHW));
+        if (layer->getType() == "Convolution") {
+            idx_t weightsId = originalNetwork.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {16, 32, 7, 7}, Layout::OIHW)));
+            for (const auto& connection : originalNetwork.getLayerConnections(oldNewId[layer->getId()])) {
+                if (connection.to().layerId() != oldNewId[layer->getId()] || connection.to().portId() != 1)
+                    continue;
+                originalNetwork.removeLayer(connection.from().layerId());
+                originalNetwork.disconnect(connection);
+            }
+            originalNetwork.connect({weightsId}, {oldNewId[layer->getId()], 1});
         }
     }
     ASSERT_NO_THROW(originalNetwork.build());
@@ -855,29 +901,41 @@ TEST_F(NetworkBuilderTest, createLayersWithTheSameNames) {
     ieLayer.setPaddingsEnd({0, 0, 0, 0});
     ieLayer.setGroup(1);
     ieLayer.setOutDepth(outCn);
-    auto convLayerId = netBuilder.addLayer({inpLayerId}, ieLayer);
+    idx_t weightsId = netBuilder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {1, 1, 3, 3}, Layout::OIHW)));
+    auto convLayerId = netBuilder.addLayer({{inpLayerId}, {weightsId}}, ieLayer);
 
     // Connect convolution layer with it's output
     InferenceEngine::Builder::OutputLayer outLayer("conv1");
     auto convOutLayerId = netBuilder.addLayer({convLayerId}, outLayer);
-    ASSERT_NE(netBuilder.getLayer(convLayerId).getName(), netBuilder.getLayer(convOutLayerId).getName());
+    ASSERT_NE(netBuilder.getLayer(convLayerId)->getName(), netBuilder.getLayer(convOutLayerId)->getName());
     InferenceEngine::Builder::ReLULayer reLULayer("relu1");
     reLULayer.setNegativeSlope(0);
     auto reluLayerId = netBuilder.addLayer({convLayerId}, reLULayer);
     InferenceEngine::Builder::OutputLayer outReLULayer("relu1");
     auto reluOutLayerId = netBuilder.addLayer({reluLayerId}, outReLULayer);
-    ASSERT_NE(netBuilder.getLayer(reluLayerId).getName(), netBuilder.getLayer(reluOutLayerId).getName());
+    ASSERT_NE(netBuilder.getLayer(reluLayerId)->getName(), netBuilder.getLayer(reluOutLayerId)->getName());
 
     ASSERT_NO_THROW(netBuilder.build());
 }
 
 TEST_F(NetworkBuilderTest, RemoveLayerAndBuild) {
     auto builder = prepateAlexnetBuilder();
-    builder.removeLayer(builder.getLayers()[2].getId());
+    builder.removeLayer(builder.getLayers()[2]->getId());
 
     ASSERT_THROW(builder.build(), InferenceEngine::details::InferenceEngineException);
 }
 
+TEST_F(NetworkBuilderTest, CheckConnectionsData) {
+    auto builder = prepateAlexnetBuilder();
+
+    for (const auto& connection : builder.getConnections()) {
+        const auto srcPort = builder.getLayer(connection.from().layerId())->getOutputPorts()[connection.from().portId()];
+        const auto dstPort = builder.getLayer(connection.to().layerId())->getInputPorts()[connection.to().portId()];
+
+        ASSERT_EQ(srcPort.getData(), dstPort.getData());
+    }
+}
+
 TEST_F(NetworkBuilderTest, DocumentationExample) {
     // Create graph with name
     InferenceEngine::Builder::Network graph("Example1");
@@ -897,11 +955,12 @@ TEST_F(NetworkBuilderTest, DocumentationExample) {
     data[0] = 1;
     data[1] = 2;
     data[2] = 3;
-    idx_t scaleShiftId = graph.addLayer(Builder::ScaleShiftLayer("scaleShift1").setBiases(blobWithScaleShiftBiases));
+    idx_t biasesId = graph.addLayer(Builder::ConstLayer("biases").setData(blobWithScaleShiftBiases));
+    idx_t scaleShiftId = graph.addLayer(Builder::ScaleShiftLayer("scaleShift1"));
 
     // Connect ScaleShift layer with relu1
     graph.connect({relu1Id}, {scaleShiftId}); // Also port indexes could be defined (0 is default value) builder.connect({layerId, outPortIdx}, {scaleShiftId, inPortIdx});
-
+    graph.connect({biasesId}, {scaleShiftId, 2});
     // Create ReLU layer with a negative slope 0.2 using generic layer builder and connect it with scaleShift
     idx_t relu2Id = graph.addLayer({{scaleShiftId}}, Builder::Layer("ReLU", "relu2").setParameters({{"negative_slope", 0.2f}}).setOutputPorts({Port()}).setInputPorts({Port()}));
 
@@ -909,7 +968,7 @@ TEST_F(NetworkBuilderTest, DocumentationExample) {
     idx_t outId = graph.addLayer({{relu2Id, 0}}, Builder::OutputLayer("out"));
 
     // Build original network
-    InferenceEngine::INetwork::Ptr finalNetwork = graph.build();
+    InferenceEngine::INetwork::CPtr finalNetwork = graph.build();
     std::shared_ptr<InferenceEngine::ICNNNetwork> cnnNetwork = InferenceEngine::Builder::convertToICNNNetwork(finalNetwork);
 
     // Modify network
@@ -923,5 +982,255 @@ TEST_F(NetworkBuilderTest, DocumentationExample) {
     // Connect scaleShift1 and out
     graph.connect({scaleShiftId}, {outId});
     // Build network without relu2
-    InferenceEngine::INetwork::Ptr changedNetwork = graph.build();
+    InferenceEngine::INetwork::CPtr changedNetwork = graph.build();
+}
+
+TEST_F(NetworkBuilderTest, CreateFullyConnectedWithoutBiases) {
+    Builder::Network builder("network");
+    Builder::FullyConnectedLayer fcBuilder("FullyConnected");
+
+    SizeVector inputDims = {1, 2, 16, 16}; // 1 KB
+
+    idx_t layerId = builder.addLayer(Builder::InputLayer("input").setPort(Port(inputDims)));
+
+    idx_t weightsId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32,
+                                                                                           {1024, 2, 16, 16}, Layout::OIHW)));
+
+    layerId = builder.addLayer({{layerId}, {weightsId} }, Builder::FullyConnectedLayer("FullyConnected").setOutputNum(1024 * 1));
+
+    builder.addLayer({PortInfo(layerId)}, Builder::OutputLayer("output"));
+
+    ASSERT_NO_THROW(std::shared_ptr<InferenceEngine::ICNNNetwork> cnnNetwork = InferenceEngine::Builder::convertToICNNNetwork(builder.build()));
+}
+
+TEST_F(NetworkBuilderTest, CreateAndConvertNetworkWithoutWeightsWithConst) {
+    Builder::Network builder("network");
+
+    idx_t layerId = builder.addLayer(Builder::InputLayer("input").setPort(Port({1, 1, 10, 10})));
+    layerId = builder.addLayer({layerId}, Builder::PoolingLayer("pool").setKernel({2, 2}).setStrides({2, 2})
+            .setPoolingType(Builder::PoolingLayer::PoolingType::MAX));
+    builder.addLayer({layerId}, Builder::OutputLayer("output"));
+
+
+    layerId = builder.addLayer(Builder::ConstLayer("constWA").setData(generateBlob(Precision::FP16, {1}, Layout::C)));
+    builder.addLayer({layerId}, Builder::OutputLayer("output_const"));
+
+    auto cnnNetwork = InferenceEngine::CNNNetwork(InferenceEngine::Builder::convertToICNNNetwork(builder.build()));
+    ASSERT_EQ(Precision::FP16, cnnNetwork.getPrecision());
+}
+
+TEST_F(NetworkBuilderTest, CreateAndConvertNetworkWithoutWeights) {
+    Builder::Network builder("network");
+
+    idx_t layerId = builder.addLayer(Builder::InputLayer("input").setPort(Port({1, 1, 10, 10}, Precision::FP16)));
+    layerId = builder.addLayer({layerId}, Builder::PoolingLayer("pool").setKernel({2, 2}).setStrides({2, 2})
+            .setPoolingType(Builder::PoolingLayer::PoolingType::MAX));
+    builder.addLayer({layerId}, Builder::OutputLayer("output"));
+
+    auto cnnNetwork = InferenceEngine::CNNNetwork(InferenceEngine::Builder::convertToICNNNetwork(builder.build()));
+    ASSERT_EQ(Precision::FP16, cnnNetwork.getPrecision());
+}
+
+TEST_F(NetworkBuilderTest, CreateAndNetworkWithPadLayer) {
+    Builder::Network builder("network");
+
+    idx_t layerId = builder.addLayer(Builder::InputLayer("input").setPort(Port({1, 2, 3, 4})));
+    Builder::Layer padLayer("Pad", "padding");
+    padLayer.getParameters()["pads_begin"] = std::vector<int>({0, 0, 1, 1});
+    padLayer.getParameters()["pads_end"] = std::vector<int>({0, 0, 1, 1});
+    padLayer.getParameters()["pad_mode"] = std::string("constant");
+    padLayer.getParameters()["pad_value"] = 0;
+    padLayer.setInputPorts(std::vector<InferenceEngine::Port>(1));
+    padLayer.setOutputPorts(std::vector<InferenceEngine::Port>(1));
+    layerId = builder.addLayer({layerId}, padLayer);
+    builder.addLayer({layerId}, Builder::OutputLayer("output"));
+
+    ASSERT_NO_THROW(InferenceEngine::CNNNetwork(InferenceEngine::Builder::convertToICNNNetwork(builder.build())));
+}
+
+TEST_F(NetworkBuilderTest, CreateLSTMFromBuilder) {
+    std::string model = R"V0G0N(
+<net name="LSTMTINet" precision="FP32" version="2" batch="1">
+    <layers>
+        <layer name="Input0" precision="FP32" type="Input" id="0">
+            <output>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>3</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="Input1" precision="FP32" type="Input" id="1">
+            <output>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>5</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="Input2" precision="FP32" type="Input" id="2">
+            <output>
+                <port id="2">
+                    <dim>1</dim>
+                    <dim>5</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="RNN3" precision="FP32" type="RNN" id="3">
+            <data axis="1" direction="Backward" hidden_size="5"></data>
+            <input>
+                <port id="3">
+                    <dim>1</dim>
+                    <dim>3</dim>
+                    <dim>10</dim>
+                </port>
+                <port id="4">
+                    <dim>1</dim>
+                    <dim>5</dim>
+                </port>
+                <port id="5">
+                    <dim>1</dim>
+                    <dim>5</dim>
+                </port>
+            </input>
+            <output>
+                <port id="6">
+                    <dim>1</dim>
+                    <dim>3</dim>
+                    <dim>5</dim>
+                </port>
+                <port id="7">
+                    <dim>1</dim>
+                    <dim>5</dim>
+                </port>
+                <port id="8">
+                    <dim>1</dim>
+                    <dim>5</dim>
+                </port>
+            </output>
+
+            <weights offset="0" size="1200"></weights>
+            <biases offset="1200" size="80"></biases>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="3" to-port="3"></edge>
+        <edge from-layer="1" from-port="1" to-layer="3" to-port="4"></edge>
+        <edge from-layer="2" from-port="2" to-layer="3" to-port="5"></edge>
+    </edges>
+</net>
+    )V0G0N";
+
+    InferenceEngine::CNNNetReader net_reader;
+    ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+    Builder::Network builder("LSTMTINet");
+    idx_t in0 = builder.addLayer(Builder::InputLayer("Input0").setPort(Port({1, 3, 10})));
+    idx_t in1 = builder.addLayer(Builder::InputLayer("Input1").setPort(Port({1, 5})));
+    idx_t in2 = builder.addLayer(Builder::InputLayer("Input2").setPort(Port({1, 5})));
+    idx_t weightId = builder.addLayer(Builder::ConstLayer("weights").setData(generateBlob(Precision::FP32, {300}, Layout::C)));
+    idx_t biasesId = builder.addLayer(Builder::ConstLayer("biases").setData(generateBlob(Precision::FP32, {20}, Layout::C)));
+    idx_t lstm = builder.addLayer({{in0}, {weightId}, {biasesId}},
+            Builder::LSTMSequenceLayer("RNN3")
+            .setDirection("Backward")
+            .setHiddenSize(5));
+    builder.getLayer(lstm)->getOutputPorts()[0].setShape({1, 3, 5});
+    builder.getLayer(lstm)->getOutputPorts()[1].setShape({1, 5});
+    builder.getLayer(lstm)->getOutputPorts()[2].setShape({1, 5});
+    builder.connect({in1}, {lstm, 4});
+    builder.connect({in2}, {lstm, 5});
+
+    builder.addLayer({{lstm, 0}}, Builder::OutputLayer("output0"));
+    builder.addLayer({{lstm, 1}}, Builder::OutputLayer("output1"));
+    builder.addLayer({{lstm, 2}}, Builder::OutputLayer("output2"));
+    const auto network = Builder::convertToICNNNetwork(builder.build());
+    try {
+        compareICNNNetworks(*network, net_reader.getNetwork());
+    } catch (InferenceEngine::details::InferenceEngineException &ex) {
+        FAIL() << ex.what();
+    }
 }
+
+TEST_F(NetworkBuilderTest, Fp16AlexNetInputPrecision) {
+    auto cnnNetwork = Builder::convertToICNNNetwork(prepateAlexnetBuilder(Precision::FP16).build());
+
+    OutputsDataMap outputs;
+    InputsDataMap inputs;
+
+    cnnNetwork->getInputsInfo(inputs);
+    cnnNetwork->getOutputsInfo(outputs);
+
+    auto input = inputs.begin()->second;
+    auto output = outputs.begin()->second;
+    ASSERT_EQ(Precision::FP32, input->getPrecision());
+    ASSERT_EQ(Precision::FP32, output->getPrecision());
+}
+
+TEST_F(NetworkBuilderTest, CheckPreProcessAlexNet) {
+    auto cnnNetwork = Builder::convertToICNNNetwork(createAlexnet());
+
+    InputsDataMap inputs;
+
+    cnnNetwork->getInputsInfo(inputs);
+
+    auto input = inputs.begin()->second;
+    ASSERT_NE(input->getPreProcess().getResizeAlgorithm(), ResizeAlgorithm::RESIZE_BILINEAR);
+    input->getPreProcess().setResizeAlgorithm(ResizeAlgorithm::RESIZE_BILINEAR);
+
+    auto newCnnNetwork = Builder::convertToICNNNetwork(Builder::Network(*cnnNetwork).build());
+    newCnnNetwork->getInputsInfo(inputs);
+    input = inputs.begin()->second;
+    ASSERT_EQ(input->getPreProcess().getResizeAlgorithm(), ResizeAlgorithm::RESIZE_BILINEAR);
+}
+
+TEST_F(NetworkBuilderTest, ReshapeNetworkTest) {
+    std::string model = R"V0G0N(
+<net name="Reshape" version="2" batch="1">
+    <layers>
+        <layer name="data" type="Input" precision="FP32" id="0">
+            <output>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>1000</dim>
+                    <dim>1</dim>
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="1" name="flatten" precision="FP32" type="Reshape">
+			<data axis="1"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>1000</dim>
+					<dim>1</dim>
+					<dim>1</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>1000</dim>
+				</port>
+			</output>
+		</layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+    </edges>
+</net>)V0G0N";
+
+    InferenceEngine::CNNNetReader net_reader;
+    ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+    auto network = Builder::convertToICNNNetwork(Builder::Network(net_reader.getNetwork()).build());
+
+    CNNLayerPtr layer;
+    network->getLayerByName("flatten", layer, nullptr);
+    ASSERT_EQ(layer->outData[0]->getDims().size(), 2);
+    try {
+        compareICNNNetworks(*network, net_reader.getNetwork());
+    } catch (InferenceEngine::details::InferenceEngineException &ex) {
+        FAIL() << ex.what();
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/norm_layer_test.cpp b/inference-engine/tests/unit/builders/norm_layer_test.cpp
new file mode 100644
index 000000000..72f2581ef
--- /dev/null
+++ b/inference-engine/tests/unit/builders/norm_layer_test.cpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_norm_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class NormLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(NormLayerBuilderTest, getExistsLayerFromNetworkBuilderWithAcrossMapsEqualTrue) {
+    Builder::Network net("Test");
+    auto layer = Builder::NormLayer("NormLayer").setAlpha(9.999999747378752e-05f).setBeta(0.75f).setSize(5).setAcrossMaps(true).setPort(Port({10, 10, 100, 100}));
+    size_t id = net.addLayer(layer);
+    Builder::NormLayer layerFromNetwork(net.getLayer(id));
+    ASSERT_EQ(layer.getAlpha(), layerFromNetwork.getAlpha());
+    ASSERT_EQ(layer.getBeta(), layerFromNetwork.getBeta());
+    ASSERT_EQ(layer.getAcrossMaps(), layerFromNetwork.getAcrossMaps());
+}
+
+TEST_F(NormLayerBuilderTest, getExistsLayerFromNetworkBuilderWithAcrossMapsEqualFalse) {
+    Builder::Network net("Test");
+    auto layer = Builder::NormLayer("NormLayer").setAlpha(9.999999747378752e-05f).setBeta(0.75f).setSize(5).setAcrossMaps(false).setPort(Port({10, 10, 100, 100}));
+    size_t id = net.addLayer(layer);
+    Builder::NormLayer layerFromNetwork(net.getLayer(id));
+    ASSERT_EQ(layer.getAlpha(), layerFromNetwork.getAlpha());
+    ASSERT_EQ(layer.getBeta(), layerFromNetwork.getBeta());
+    ASSERT_EQ(layer.getAcrossMaps(), layerFromNetwork.getAcrossMaps());
+}
+
+TEST_F(NormLayerBuilderTest, cannotCreateNormLayerWithWrongAlpha) {
+    Builder::Network net("Test");
+    auto layer = Builder::NormLayer("NormLayer").setAlpha(0).setBeta(0.75f).setSize(5).setAcrossMaps(true).setPort(Port({10, 10, 100, 100}));
+    ASSERT_THROW(net.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(NormLayerBuilderTest, cannotCreateNormLayerWithWrongBeta) {
+    Builder::Network net("Test");
+    auto layer = Builder::NormLayer("NormLayer").setAlpha(1).setBeta(0).setSize(5).setAcrossMaps(true).setPort(Port({10, 10, 100, 100}));
+    ASSERT_THROW(net.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(NormLayerBuilderTest, cannotCreateNormLayerWithWrongSize) {
+    Builder::Network net("Test");
+    auto layer = Builder::NormLayer("NormLayer").setAlpha(1).setBeta(1).setSize(0).setAcrossMaps(true).setPort(Port({10, 10, 100, 100}));
+    ASSERT_THROW(net.addLayer(layer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(NormLayerBuilderTest, cannotCreateLayerWithWrongShapes) {
+    Builder::Network net("network");
+    Builder::Layer::Ptr fakeNormLayerPtr = std::make_shared<Builder::Layer>("Norm", "Norm layer");
+    fakeNormLayerPtr->getInputPorts().push_back(Port({1, 1, 1, 1}));
+    fakeNormLayerPtr->getOutputPorts().push_back(Port({1, 1, 1, 2}));
+    Builder::NormLayer normLayer(fakeNormLayerPtr);
+    normLayer.setAlpha(1).setBeta(0).setSize(5).setAcrossMaps(true);
+    ASSERT_THROW(net.addLayer(normLayer), InferenceEngine::details::InferenceEngineException);
+}
+
diff --git a/inference-engine/tests/unit/builders/normalize_layer_test.cpp b/inference-engine/tests/unit/builders/normalize_layer_test.cpp
new file mode 100644
index 000000000..809f2b1bd
--- /dev/null
+++ b/inference-engine/tests/unit/builders/normalize_layer_test.cpp
@@ -0,0 +1,89 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_normalize_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class NormalizeLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(NormalizeLayerBuilderTest, getExistsLayerFromNetworkBuilder1) {
+    Builder::Network net("network");
+    Builder::NormalizeLayer normalizeLayer("normalizeLayer");
+    normalizeLayer.setEpsilon(0.1).setChannelShared(true).setAcrossMaps(true);
+    size_t ind = net.addLayer(normalizeLayer);
+    Builder::NormalizeLayer layerFromNet(net.getLayer(ind));
+    ASSERT_EQ(normalizeLayer.getEpsilon(), layerFromNet.getEpsilon());
+}
+
+TEST_F(NormalizeLayerBuilderTest, getExistsLayerFromNetworkBuilder2) {
+    Builder::Network net("network");
+    Builder::NormalizeLayer normalizeLayer("normalizeLayer");
+    normalizeLayer.setEpsilon(0.1).setChannelShared(true).setAcrossMaps(false);
+    size_t ind = net.addLayer(normalizeLayer);
+    Builder::NormalizeLayer layerFromNet(net.getLayer(ind));
+    ASSERT_EQ(normalizeLayer.getEpsilon(), layerFromNet.getEpsilon());
+}
+
+TEST_F(NormalizeLayerBuilderTest, getExistsLayerFromNetworkBuilder3) {
+    Builder::Network net("network");
+    Builder::NormalizeLayer normalizeLayer("normalizeLayer");
+    normalizeLayer.setEpsilon(0.1).setChannelShared(false).setAcrossMaps(true);
+    size_t ind = net.addLayer(normalizeLayer);
+    Builder::NormalizeLayer layerFromNet(net.getLayer(ind));
+    ASSERT_EQ(normalizeLayer.getEpsilon(), layerFromNet.getEpsilon());
+}
+
+TEST_F(NormalizeLayerBuilderTest, getExistsLayerFromNetworkBuilder4) {
+    Builder::Network net("network");
+    Builder::NormalizeLayer normalizeLayer("normalizeLayer");
+    normalizeLayer.setEpsilon(0.1).setChannelShared(false).setAcrossMaps(false);
+    size_t ind = net.addLayer(normalizeLayer);
+    Builder::NormalizeLayer layerFromNet(net.getLayer(ind));
+    ASSERT_EQ(normalizeLayer.getEpsilon(), layerFromNet.getEpsilon());
+}
+
+TEST_F(NormalizeLayerBuilderTest, cannotCreateLayerWithWrongEpsilon1) {
+    Builder::Network net("network");
+    Builder::NormalizeLayer normalizeLayer("normalizeLayer");
+    normalizeLayer.setEpsilon(0).setChannelShared(true).setAcrossMaps(true);
+    ASSERT_THROW(net.addLayer(normalizeLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(NormalizeLayerBuilderTest, cannotCreateLayerWithWrongEpsilon2) {
+    Builder::Network net("network");
+    Builder::NormalizeLayer normalizeLayer("normalizeLayer");
+    normalizeLayer.setEpsilon(0).setChannelShared(true).setAcrossMaps(false);
+    ASSERT_THROW(net.addLayer(normalizeLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(NormalizeLayerBuilderTest, cannotCreateLayerWithWrongEpsilon3) {
+    Builder::Network net("network");
+    Builder::NormalizeLayer normalizeLayer("normalizeLayer");
+    normalizeLayer.setEpsilon(0).setChannelShared(false).setAcrossMaps(true);
+    ASSERT_THROW(net.addLayer(normalizeLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(NormalizeLayerBuilderTest, cannotCreateLayerWithWrongEpsilon4) {
+    Builder::Network net("network");
+    Builder::NormalizeLayer normalizeLayer("normalizeLayer");
+    normalizeLayer.setEpsilon(0).setChannelShared(false).setAcrossMaps(false);
+    ASSERT_THROW(net.addLayer(normalizeLayer), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(NormalizeLayerBuilderTest, cannotCreateLayerWithWrongShapes) {
+    Builder::Network net("network");
+    Builder::Layer::Ptr fakeNormalizeLayerPtr = std::make_shared<Builder::Layer>("Normalize", "Normalize layer");
+    fakeNormalizeLayerPtr->getInputPorts().push_back(Port({1, 1, 1, 1}));
+    fakeNormalizeLayerPtr->getOutputPorts().push_back(Port({1, 1, 1, 2}));
+    Builder::NormalizeLayer normalizeLayer(fakeNormalizeLayerPtr);
+    normalizeLayer.setEpsilon(0.1).setChannelShared(true).setAcrossMaps(true);
+    ASSERT_THROW(net.addLayer(normalizeLayer), InferenceEngine::details::InferenceEngineException);
+}
diff --git a/inference-engine/tests/unit/builders/output_layer_test.cpp b/inference-engine/tests/unit/builders/output_layer_test.cpp
new file mode 100644
index 000000000..dc2b91bda
--- /dev/null
+++ b/inference-engine/tests/unit/builders/output_layer_test.cpp
@@ -0,0 +1,25 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_output_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class OutputLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(OutputLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network network("network");
+    Builder::OutputLayer layer("output layer");
+    layer.setPort(Port({1, 1, 1, 1}));
+    size_t ind = network.addLayer(layer);
+    Builder::OutputLayer layerFromNet(network.getLayer(ind));
+    ASSERT_EQ(layer.getPort().shape(), layerFromNet.getPort().shape());
+    ASSERT_EQ(layer.getPort().shape(), Port({1, 1, 1, 1}).shape());
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/relu6_layer_test.cpp b/inference-engine/tests/unit/builders/relu6_layer_test.cpp
new file mode 100644
index 000000000..a0e934069
--- /dev/null
+++ b/inference-engine/tests/unit/builders/relu6_layer_test.cpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_relu6_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class ReLU6LayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(ReLU6LayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network net("network");
+    Builder::ReLU6Layer relu6Layer("relu6layer");
+    relu6Layer.setN(100);
+    size_t ind = net.addLayer(relu6Layer);
+    Builder::ReLU6Layer layerFromNet(net.getLayer(ind));
+    ASSERT_EQ(relu6Layer.getN(), layerFromNet.getN());
+}
+
+TEST_F(ReLU6LayerBuilderTest, cannotCreateLayerWithWrongShapes) {
+    Builder::Network net("network");
+    Builder::Layer::Ptr fakeReLU6LayerPtr = std::make_shared<Builder::Layer>("ReLU6", "ReLU6 layer");
+    fakeReLU6LayerPtr->getInputPorts().push_back(Port({1, 1, 1, 1}));
+    fakeReLU6LayerPtr->getOutputPorts().push_back(Port({1, 1, 1, 2}));
+    Builder::ReLU6Layer reLU6Layer(fakeReLU6LayerPtr);
+    reLU6Layer.setN(10);
+    ASSERT_THROW(net.addLayer(reLU6Layer), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/relu_layer_test.cpp b/inference-engine/tests/unit/builders/relu_layer_test.cpp
new file mode 100644
index 000000000..a05a5d9c3
--- /dev/null
+++ b/inference-engine/tests/unit/builders/relu_layer_test.cpp
@@ -0,0 +1,41 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_relu_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class ReLULayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(ReLULayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network net("network");
+    Builder::ReLULayer reluLayer("ReLU_layer");
+    reluLayer.setNegativeSlope(100);
+    size_t ind = net.addLayer(reluLayer);
+    Builder::ReLULayer layerFromNet(net.getLayer(ind));
+    ASSERT_EQ(reluLayer.getNegativeSlope(), layerFromNet.getNegativeSlope());
+}
+
+TEST_F(ReLULayerBuilderTest, cannotCreateLayerWithWrongNegativeSlope) {
+    Builder::Network net("network");
+    Builder::ReLULayer reluLayer("ReLU_layer");
+    reluLayer.setNegativeSlope(-10);
+    ASSERT_NO_THROW(net.addLayer(reluLayer));
+}
+
+TEST_F(ReLULayerBuilderTest, cannotCreateLayerWithWrongShapes) {
+    Builder::Network net("network");
+    Builder::Layer::Ptr fakeReLULayerPtr = std::make_shared<Builder::Layer>("ReLU", "ReLU layer");
+    fakeReLULayerPtr->getInputPorts().push_back(Port({1, 1, 1, 1}));
+    fakeReLULayerPtr->getOutputPorts().push_back(Port({1, 1, 1, 2}));
+    Builder::ReLULayer reluLayer(fakeReLULayerPtr);
+    reluLayer.setNegativeSlope(100);
+    ASSERT_THROW(net.addLayer(reluLayer), InferenceEngine::details::InferenceEngineException);
+}
diff --git a/inference-engine/tests/unit/builders/resample_layer_test.cpp b/inference-engine/tests/unit/builders/resample_layer_test.cpp
new file mode 100644
index 000000000..059108001
--- /dev/null
+++ b/inference-engine/tests/unit/builders/resample_layer_test.cpp
@@ -0,0 +1,35 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class ResampleLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(ResampleLayerBuilderTest, checkTypeParameter) {
+    InferenceEngine::Builder::Layer ieLayer("Resample", "upsample");
+    ieLayer.getParameters()["type"] = std::string("caffe.ResampleParameter.NEAREST");
+    ieLayer.getParameters()["antialias"] = false;
+    ieLayer.getParameters()["factor"] = 2.0f;
+    ieLayer.getParameters()["width"] = 10;
+    ieLayer.getParameters()["height"] = 10;
+
+    ASSERT_EQ("Resample", ieLayer.getType());
+    ASSERT_EQ("caffe.ResampleParameter.NEAREST", ieLayer.getParameters()["type"].as<std::string>());
+
+    InferenceEngine::Builder::ResampleLayer resampleLayer("upsample");
+    resampleLayer.setResampleType("caffe.ResampleParameter.NEAREST");
+    resampleLayer.setAntialias(false);
+    resampleLayer.setFactor(2);
+    resampleLayer.setWidth(10);
+    resampleLayer.setHeight(10);
+    ASSERT_EQ("Resample", resampleLayer.getType());
+    ASSERT_EQ("caffe.ResampleParameter.NEAREST", resampleLayer.getResampleType());
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/split_layer_test.cpp b/inference-engine/tests/unit/builders/split_layer_test.cpp
new file mode 100644
index 000000000..145295ed9
--- /dev/null
+++ b/inference-engine/tests/unit/builders/split_layer_test.cpp
@@ -0,0 +1,83 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class SplitLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(SplitLayerBuilderTest, CreateIdentitySplitLayer) {
+    Builder::Network builder("network");
+    SizeVector shape = {1, 4, 3, 4};
+    idx_t layerId = builder.addLayer(Builder::InputLayer("input").setPort(Port(shape, Precision::FP16)));
+    layerId = builder.addLayer({layerId}, Builder::SplitLayer("identity").setOutputPorts({Port()}));
+    builder.addLayer({layerId}, Builder::OutputLayer("output"));
+
+    const auto network = builder.build();
+    ASSERT_EQ(shape, network->getLayer(layerId)->getOutputPorts()[0].shape());
+}
+
+TEST_F(SplitLayerBuilderTest, CreateSplitLayerWithTwoOutputs) {
+    Builder::Network builder("network");
+    SizeVector shape = {1, 4, 3, 4};
+    SizeVector outShape = {1, 2, 3, 4};
+    idx_t layerId = builder.addLayer(Builder::InputLayer("input").setPort(Port(shape, Precision::FP16)));
+    layerId = builder.addLayer({layerId}, Builder::SplitLayer("split").setOutputPorts({Port(), Port()}));
+    builder.addLayer({{layerId}}, Builder::OutputLayer("output1"));
+    builder.addLayer({{layerId, 1}}, Builder::OutputLayer("output2"));
+
+    const auto network = builder.build();
+    ASSERT_EQ(outShape, network->getLayer(layerId)->getOutputPorts()[0].shape());
+    ASSERT_EQ(outShape, network->getLayer(layerId)->getOutputPorts()[1].shape());
+}
+
+TEST_F(SplitLayerBuilderTest, CreateSplitLayerWithTwoOutputsAndOneInitialized) {
+    Builder::Network builder("network");
+    SizeVector shape = {1, 4, 3, 4};
+    SizeVector outShape1 = {1, 3, 3, 4};
+    SizeVector outShape2 = {1, 1, 3, 4};
+    idx_t layerId = builder.addLayer(Builder::InputLayer("input").setPort(Port(shape, Precision::FP16)));
+    layerId = builder.addLayer({layerId}, Builder::SplitLayer("split").setOutputPorts({Port(outShape1), Port()}));
+    builder.addLayer({{layerId}}, Builder::OutputLayer("output1"));
+    builder.addLayer({{layerId, 1}}, Builder::OutputLayer("output2"));
+
+    const auto network = builder.build();
+    ASSERT_EQ(outShape1, network->getLayer(layerId)->getOutputPorts()[0].shape());
+    ASSERT_EQ(outShape2, network->getLayer(layerId)->getOutputPorts()[1].shape());
+}
+
+TEST_F(SplitLayerBuilderTest, CreateSplitLayerWithTwoOutputsAxis3) {
+    Builder::Network builder("network");
+    SizeVector shape = {1, 4, 3, 4};
+    SizeVector outShape = {1, 4, 3, 2};
+    idx_t layerId = builder.addLayer(Builder::InputLayer("input").setPort(Port(shape, Precision::FP16)));
+    layerId = builder.addLayer({layerId}, Builder::SplitLayer("split").setAxis(3).setOutputPorts({Port(), Port()}));
+    builder.addLayer({{layerId}}, Builder::OutputLayer("output1"));
+    builder.addLayer({{layerId, 1}}, Builder::OutputLayer("output2"));
+
+    const auto network = builder.build();
+    ASSERT_EQ(outShape, network->getLayer(layerId)->getOutputPorts()[0].shape());
+    ASSERT_EQ(outShape, network->getLayer(layerId)->getOutputPorts()[1].shape());
+}
+
+TEST_F(SplitLayerBuilderTest, CreateSplitLayerWithTwoOutputsAxis3AndOneInitialized) {
+    Builder::Network builder("network");
+    SizeVector shape = {1, 4, 3, 4};
+    SizeVector outShape1 = {1, 4, 3, 1};
+    SizeVector outShape2 = {1, 4, 3, 3};
+    idx_t layerId = builder.addLayer(Builder::InputLayer("input").setPort(Port(shape, Precision::FP16)));
+    layerId = builder.addLayer({layerId}, Builder::SplitLayer("split").setAxis(3).setOutputPorts({Port(outShape1), Port()}));
+    builder.addLayer({{layerId}}, Builder::OutputLayer("output1"));
+    builder.addLayer({{layerId, 1}}, Builder::OutputLayer("output2"));
+
+    const auto network = builder.build();
+    ASSERT_EQ(outShape1, network->getLayer(layerId)->getOutputPorts()[0].shape());
+    ASSERT_EQ(outShape2, network->getLayer(layerId)->getOutputPorts()[1].shape());
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/tanh_layer_test.cpp b/inference-engine/tests/unit/builders/tanh_layer_test.cpp
new file mode 100644
index 000000000..0e37aa508
--- /dev/null
+++ b/inference-engine/tests/unit/builders/tanh_layer_test.cpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <ie_builders.hpp>
+#include <builders/ie_tanh_layer.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class TanHLayerBuilderTest : public BuilderTestCommon {};
+
+TEST_F(TanHLayerBuilderTest, getExistsLayerFromNetworkBuilder) {
+    Builder::Network net("network");
+    Builder::TanHLayer tanhLayer("TanH_layer");
+    size_t ind = net.addLayer(tanhLayer);
+    Builder::TanHLayer layerFromNet(net.getLayer(ind));
+}
+
+TEST_F(TanHLayerBuilderTest, cannotCreateLayerWithWrongShapes) {
+    Builder::Network net("network");
+    Builder::Layer::Ptr fakeTanHLayerPtr = std::make_shared<Builder::Layer>("TanH", "TanH layer");
+    fakeTanHLayerPtr->getInputPorts().push_back(Port({1, 1, 1, 1}));
+    fakeTanHLayerPtr->getOutputPorts().push_back(Port({1, 1, 1, 2}));
+    Builder::TanHLayer tanhLayer(fakeTanHLayerPtr);
+    ASSERT_THROW(net.addLayer(tanhLayer), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/builders/transform_network_test.cpp b/inference-engine/tests/unit/builders/transform_network_test.cpp
new file mode 100644
index 000000000..2ae996843
--- /dev/null
+++ b/inference-engine/tests/unit/builders/transform_network_test.cpp
@@ -0,0 +1,185 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <transform/transform_network.hpp>
+#include <ie_builders.hpp>
+
+#include "builder_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class TransformNetworkTest: public BuilderTestCommon {};
+
+TEST_F(TransformNetworkTest, AddNewLayer) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    ASSERT_EQ(0, builder.size());
+    network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    ASSERT_EQ(1, builder.size());
+}
+
+TEST_F(TransformNetworkTest, RemoveLayer) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    ASSERT_EQ(0, builder.size());
+    Transform::Layer layer = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    ASSERT_EQ(1, builder.size());
+
+    network.removeLayer(layer);
+    ASSERT_EQ(0, builder.size());
+}
+
+TEST_F(TransformNetworkTest, GetIncorrectPort) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Layer layer = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    ASSERT_THROW(layer.getInPort(), InferenceEngine::details::InferenceEngineException);
+    ASSERT_THROW(layer.getOutPort(1), InferenceEngine::details::InferenceEngineException);
+}
+
+
+TEST_F(TransformNetworkTest, GetCorrectPort) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Layer layer = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    ASSERT_NO_THROW(layer.getOutPort());
+    ASSERT_NO_THROW(layer.getOutPort(0));
+}
+
+TEST_F(TransformNetworkTest, GetLayerById) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Layer layer = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    ASSERT_NO_THROW(network.getLayer(layer.getId()));
+}
+
+TEST_F(TransformNetworkTest, GetLayerByName) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    ASSERT_NO_THROW(network.getLayer("in1"));
+}
+
+TEST_F(TransformNetworkTest, ConnectTwoLayers) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Layer input = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    Transform::Layer relu = network.addLayer(Builder::ReLULayer("relu1"));
+    ASSERT_EQ(2, builder.size());
+    ASSERT_EQ(0, builder.getConnections().size());
+    network.connect(input, relu);
+    ASSERT_EQ(1, builder.getConnections().size());
+}
+
+TEST_F(TransformNetworkTest, ConnectTwoPorts) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Port inputPort = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27}))).getOutPort();
+    Transform::Port reluPort = network.addLayer(Builder::ReLULayer("relu1")).getInPort();
+    ASSERT_EQ(2, builder.size());
+    ASSERT_EQ(0, builder.getConnections().size());
+    network.connect(inputPort, reluPort);
+    ASSERT_EQ(1, builder.getConnections().size());
+}
+
+TEST_F(TransformNetworkTest, DisconnectTwoLayers) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Layer input = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    Transform::Layer relu = network.addLayer(Builder::ReLULayer("relu1"));
+    ASSERT_EQ(2, builder.size());
+    ASSERT_EQ(0, builder.getConnections().size());
+    network.connect(input, relu);
+    ASSERT_EQ(1, builder.getConnections().size());
+    network.disconnect(input, relu);
+    ASSERT_EQ(0, builder.getConnections().size());
+}
+
+TEST_F(TransformNetworkTest, DisonnectTwoPorts) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Port inputPort = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27}))).getOutPort();
+    Transform::Port reluPort = network.addLayer(Builder::ReLULayer("relu1")).getInPort();
+    ASSERT_EQ(2, builder.size());
+    ASSERT_EQ(0, builder.getConnections().size());
+    network.connect(inputPort, reluPort);
+    ASSERT_EQ(1, builder.getConnections().size());
+    network.disconnect(inputPort, reluPort);
+    ASSERT_EQ(0, builder.getConnections().size());
+}
+
+TEST_F(TransformNetworkTest, RemoveLayerAndConnection) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Layer input = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    Transform::Layer relu = network.addLayer(Builder::ReLULayer("relu1"));
+    network.connect(input, relu);
+    ASSERT_EQ(1, builder.getConnections().size());
+    ASSERT_EQ(2, builder.size());
+    network.removeLayer(relu);
+    ASSERT_EQ(0, builder.getConnections().size());
+    ASSERT_EQ(1, builder.size());
+}
+
+TEST_F(TransformNetworkTest, GetInitializedConnection) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Layer input = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    Transform::Layer relu = network.addLayer(Builder::ReLULayer("relu1"));
+    network.connect(input, relu);
+    ASSERT_EQ(input.getOutPort(), relu.getInPort().getConnection().getSource());
+}
+
+TEST_F(TransformNetworkTest, GetIncorrectConnections) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Layer input = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27})));
+    Transform::Layer relu = network.addLayer(Builder::ReLULayer("relu1"));
+    ASSERT_THROW(relu.getInPort().getConnection().getSource(), InferenceEngine::details::InferenceEngineException);
+    ASSERT_THROW(input.getOutPort().getConnection().getDestination(), InferenceEngine::details::InferenceEngineException);
+    ASSERT_NO_THROW(input.getOutPort().getConnection().getSource());
+    ASSERT_NO_THROW(relu.getInPort().getConnection().getDestination());
+}
+
+TEST_F(TransformNetworkTest, ConnectToSourcePortsFromConnection) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Port inputPort = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27}))).getOutPort();
+    Transform::Port reluPort = network.addLayer(Builder::ReLULayer("relu1")).getInPort();
+    ASSERT_EQ(2, builder.size());
+    ASSERT_EQ(0, builder.getConnections().size());
+    ASSERT_NO_THROW(inputPort.getConnection().setDestination(reluPort));
+    ASSERT_EQ(1, builder.getConnections().size());
+}
+
+TEST_F(TransformNetworkTest, ConnectWithTwoDestinations) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Port inputPort = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27}))).getOutPort();
+    Transform::Port reluPort1 = network.addLayer(Builder::ReLULayer("relu1")).getInPort();
+    Transform::Port reluPort2 = network.addLayer(Builder::ReLULayer("relu2")).getInPort();
+    ASSERT_EQ(3, builder.size());
+    ASSERT_EQ(0, builder.getConnections().size());
+    ASSERT_NO_THROW(inputPort.getConnection().setDestination(reluPort1));
+    ASSERT_NO_THROW(inputPort.getConnection().addDestination(reluPort2));
+    ASSERT_THROW(inputPort.getConnection().addDestination(reluPort2), InferenceEngine::details::InferenceEngineException);
+    ASSERT_EQ(2, builder.getConnections().size());
+    ASSERT_THROW(inputPort.getConnection().setDestination(reluPort2), InferenceEngine::details::InferenceEngineException);
+    ASSERT_NO_THROW(inputPort.getConnection().setDestinations({reluPort2, reluPort1}));
+    ASSERT_EQ(2, builder.getConnections().size());
+}
+
+TEST_F(TransformNetworkTest, ConnectToDestinationPortsFromConnection) {
+    Builder::Network builder("test");
+    Transform::Network network(builder);
+    Transform::Port inputPort = network.addLayer(Builder::InputLayer("in1").setPort(Port({1, 3, 27, 27}))).getOutPort();
+    Transform::Port reluPort = network.addLayer(Builder::ReLULayer("relu1")).getInPort();
+    ASSERT_EQ(2, builder.size());
+    ASSERT_EQ(0, builder.getConnections().size());
+    reluPort.getConnection().setSource(inputPort);
+    ASSERT_EQ(1, builder.getConnections().size());
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/cnn_network/cnn_layer_validation_tests.cpp b/inference-engine/tests/unit/cnn_network/cnn_layer_validation_tests.cpp
new file mode 100644
index 000000000..d06687e4d
--- /dev/null
+++ b/inference-engine/tests/unit/cnn_network/cnn_layer_validation_tests.cpp
@@ -0,0 +1,99 @@
+/*
+* INTEL CONFIDENTIAL
+* Copyright (C) 2018-2019 Intel Corporation.
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel Corporation
+* or its suppliers or licensors. Title to the Material remains with
+* Intel Corporation or its suppliers and licensors. The Material may
+* contain trade secrets and proprietary and confidential information
+* of Intel Corporation and its suppliers and licensors, and is protected
+* by worldwide copyright and trade secret laws and treaty provisions.
+* No part of the Material may be used, copied, reproduced, modified,
+* published, uploaded, posted, transmitted, distributed, or disclosed
+* in any way without Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you by
+* disclosure or delivery of the Materials, either expressly, by implication,
+* inducement, estoppel or otherwise. Any license under such intellectual
+* property rights must be express and approved by Intel in writing.
+*
+* Include any supplier copyright notices as supplier requires Intel to use.
+*
+* Include supplier trademarks or logos as supplier requires Intel to use,
+* preceded by an asterisk. An asterisked footnote can be added as follows:
+* *Third Party trademarks are the property of their respective owners.
+*
+* Unless otherwise agreed by Intel in writing, you may not remove or alter
+* this notice or any other notice embedded in Materials by Intel or Intel's
+* suppliers or licensors in any way.
+*/
+#include <gtest/gtest.h>
+#include <xml_net_builder.hpp>
+#include <inference_engine/cnn_network_impl.hpp>
+#include <inference_engine/ie_format_parser.h>
+#include <inference_engine/ie_layer_validators.hpp>
+#include <xml_helper.hpp>
+#include <../shape_infer/built_in_shape_infer_general_test.hpp>
+#include <memory>
+#include <../include/ie_data.h>
+
+#include "layer_builder.h"
+#include "shapes.h"
+using namespace InferenceEngine;
+using namespace InferenceEngine::details;
+
+TEST_P(CNNLayerValidationTests, checkValidParams) {
+
+    assertThat(type)->setParams(valid_params);
+    auto layer = getLayer();
+    LayerValidator::Ptr validator = LayerValidators::getInstance()->getValidator(type);
+
+    ASSERT_NO_THROW(validator->parseParams(layer.get()));
+    ASSERT_NO_THROW(validator->checkParams(layer.get()));
+}
+
+TEST_P(CNNLayerValidationTests, checkInvalidParams) {
+
+    assertThat(type);
+    int numberOfParams = getNumOfParams();
+    LayerValidator::Ptr validator = LayerValidators::getInstance()->getValidator(type);
+    auto layer_ = getLayer();
+    for (int i = 0; i < numberOfParams; ++i) {
+        layer->setParams(!valid_params);
+        ASSERT_THROW(validator->parseParams(layer_.get()), InferenceEngineException);
+        ASSERT_THROW(validator->checkParams(layer_.get()), InferenceEngineException);
+    }
+}
+
+TEST_P(CNNLayerValidationTests, checkInvalidInputShapes) {
+    LayerValidator::Ptr validator = LayerValidators::getInstance()->getValidator(type);
+    std::vector<DataPtr> spData;
+    assertThat(type)->setShapes(spData, !valid_input);
+
+    auto layer_ = getLayer();
+    InOutDims shapes;
+    InferenceEngine::details::getInOutShapes(layer_.get(), shapes);
+    ASSERT_THROW(validator->checkShapes(layer_.get(), shapes.inDims), InferenceEngineException);
+}
+
+TEST_P(CNNLayerValidationTests, checkValidShapes) {
+
+    std::vector<DataPtr> spData;
+    assertThat(type)->setShapes(spData, valid_input);
+    auto layer = getLayer();
+    LayerValidator::Ptr validator = LayerValidators::getInstance()->getValidator(type);
+    InOutDims shapes;
+    InferenceEngine::details::getInOutShapes(layer.get(), shapes);
+    ASSERT_NO_THROW(validator->checkShapes(layer.get(), shapes.inDims));
+}
+
+INSTANTIATE_TEST_CASE_P(
+        InstantiationName, CNNLayerValidationTests,
+        ::testing::Values(
+                "Convolution"
+                ,"Deconvolution"
+                ,"DetectionOutput"
+        )
+);
diff --git a/inference-engine/tests/unit/cnn_network/cnn_net_reader_impl_test.cpp b/inference-engine/tests/unit/cnn_network/cnn_net_reader_impl_test.cpp
index e33362f19..d3c96be6c 100644
--- a/inference-engine/tests/unit/cnn_network/cnn_net_reader_impl_test.cpp
+++ b/inference-engine/tests/unit/cnn_network/cnn_net_reader_impl_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -10,6 +10,8 @@
 #include <gmock/gmock-more-actions.h>
 #include "cnn_network_impl.hpp"
 #include "mock_iformat_parser.hpp"
+#include <test_assertions.hpp>
+#include <single_layer_common.hpp>
 
 using namespace testing;
 using namespace InferenceEngine;
@@ -26,6 +28,7 @@ struct MockFormatParserCreator : public FormatParserCreator {
     MockFormatParserCreator() {
         _parser = make_shared<MockIFormatParser>();
     }
+
     std::shared_ptr<IFormatParser> create(int version) override {
         return _parser;
     }
@@ -1697,49 +1700,49 @@ TEST_F(CNNNetReaderImplTest, cycleIsDetectedInReader) {
 
 TEST_F(CNNNetReaderImplTest, canRead3DConvolution) {
     std::string model =
-        "<net batch=\"1\" name=\"Convolution_only\" version=\"3\">"
-        "    <layers>"
-        "        <layer id=\"0\" name=\"1\" precision=\"FP32\" type=\"Input\">"
-        "            <output>"
-        "                <port id=\"0\">"
-        "                    <dim>1</dim>"
-        "                    <dim>3</dim>"
-        "                    <dim>16</dim>"
-        "                    <dim>112</dim>"
-        "                    <dim>112</dim>"
-        "                </port>"
-        "            </output>"
-        "        </layer>"
-        "        <layer id=\"1\" name=\"3D_conv\" precision=\"FP32\" type=\"Convolution\">"
-        "            <data dilations=\"1,3,5\" group=\"1\" kernel=\"1,3,5\" output=\"64\" pads_begin=\"1,3,5\" pads_end=\"1,3,5\" strides=\"1,3,5\"/>"
-        "            <input>"
-        "                <port id=\"0\">"
-        "                    <dim>1</dim>"
-        "                    <dim>3</dim>"
-        "                    <dim>16</dim>"
-        "                    <dim>112</dim>"
-        "                    <dim>112</dim>"
-        "                </port>"
-        "            </input>"
-        "            <output>"
-        "                <port id=\"1\">"
-        "                    <dim>1</dim>"
-        "                    <dim>64</dim>"
-        "                    <dim>16</dim>"
-        "                    <dim>56</dim>"
-        "                    <dim>56</dim>"
-        "                </port>"
-        "            </output>"
-        "            <blobs>"
-        "                <weights offset=\"0\" size=\"263424\"/>"
-        "                <biases offset=\"263424\" size=\"256\"/>"
-        "            </blobs>"
-        "        </layer>"
-        "    </layers>"
-        "    <edges>"
-        "        <edge from-layer=\"0\" from-port=\"0\" to-layer=\"1\" to-port=\"0\"/>"
-        "    </edges>"
-        "</net>";
+            "<net batch=\"1\" name=\"Convolution_only\" version=\"3\">"
+            "    <layers>"
+            "        <layer id=\"0\" name=\"1\" precision=\"FP32\" type=\"Input\">"
+            "            <output>"
+            "                <port id=\"0\">"
+            "                    <dim>1</dim>"
+            "                    <dim>3</dim>"
+            "                    <dim>16</dim>"
+            "                    <dim>112</dim>"
+            "                    <dim>112</dim>"
+            "                </port>"
+            "            </output>"
+            "        </layer>"
+            "        <layer id=\"1\" name=\"3D_conv\" precision=\"FP32\" type=\"Convolution\">"
+            "            <data dilations=\"1,3,5\" group=\"1\" kernel=\"1,3,5\" output=\"64\" pads_begin=\"1,3,5\" pads_end=\"1,3,5\" strides=\"1,3,5\"/>"
+            "            <input>"
+            "                <port id=\"0\">"
+            "                    <dim>1</dim>"
+            "                    <dim>3</dim>"
+            "                    <dim>16</dim>"
+            "                    <dim>112</dim>"
+            "                    <dim>112</dim>"
+            "                </port>"
+            "            </input>"
+            "            <output>"
+            "                <port id=\"1\">"
+            "                    <dim>1</dim>"
+            "                    <dim>64</dim>"
+            "                    <dim>16</dim>"
+            "                    <dim>56</dim>"
+            "                    <dim>56</dim>"
+            "                </port>"
+            "            </output>"
+            "            <blobs>"
+            "                <weights offset=\"0\" size=\"263424\"/>"
+            "                <biases offset=\"263424\" size=\"256\"/>"
+            "            </blobs>"
+            "        </layer>"
+            "    </layers>"
+            "    <edges>"
+            "        <edge from-layer=\"0\" from-port=\"0\" to-layer=\"1\" to-port=\"0\"/>"
+            "    </edges>"
+            "</net>";
 
     CNNNetReaderImpl reader(make_shared<V2FormatParserCreator>());
     ASSERT_EQ(OK, reader.ReadNetwork(model.data(), model.length(), &resp));
@@ -1748,7 +1751,7 @@ TEST_F(CNNNetReaderImplTest, canRead3DConvolution) {
 
     CNNLayerPtr layer;
     ASSERT_EQ(OK, network->getLayerByName("3D_conv", layer, nullptr));
-    auto *conv = dynamic_cast<ConvolutionLayer *>(layer.get());
+    auto* conv = dynamic_cast<ConvolutionLayer*>(layer.get());
     ASSERT_NE(nullptr, conv);
     ASSERT_EQ(conv->_kernel[X_AXIS], 5);
     ASSERT_EQ(conv->_kernel[Y_AXIS], 3);
@@ -1769,45 +1772,45 @@ TEST_F(CNNNetReaderImplTest, canRead3DConvolution) {
 
 TEST_F(CNNNetReaderImplTest, canRead3DPooling) {
     std::string model =
-        "<net batch=\"1\" name=\"Pooling_only\" version=\"3\">"
-        "    <layers>"
-        "        <layer id=\"0\" name=\"1\" precision=\"FP32\" type=\"Input\">"
-        "            <output>"
-        "                <port id=\"0\">"
-        "                    <dim>1</dim>"
-        "                    <dim>3</dim>"
-        "                    <dim>16</dim>"
-        "                    <dim>112</dim>"
-        "                    <dim>112</dim>"
-        "                </port>"
-        "            </output>"
-        "        </layer>"
-        "        <layer id=\"1\" name=\"3D_pooling\" precision=\"FP32\" type=\"Pooling\">"
-        "            <data exclude-pad=\"true\" kernel=\"1,3,5\" pads_begin=\"1,3,5\" pads_end=\"1,3,5\" pool-method=\"max\" rounding_type=\"ceil\" strides=\"1,3,5\"/>"
-        "            <input>"
-        "                <port id=\"0\">"
-        "                    <dim>1</dim>"
-        "                    <dim>3</dim>"
-        "                    <dim>16</dim>"
-        "                    <dim>112</dim>"
-        "                    <dim>112</dim>"
-        "                </port>"
-        "            </input>"
-        "            <output>"
-        "                <port id=\"1\">"
-        "                    <dim>1</dim>"
-        "                    <dim>64</dim>"
-        "                    <dim>8</dim>"
-        "                    <dim>28</dim>"
-        "                    <dim>28</dim>"
-        "                </port>"
-        "            </output>"
-        "        </layer>"
-        "    </layers>"
-        "    <edges>"
-        "        <edge from-layer=\"0\" from-port=\"0\" to-layer=\"1\" to-port=\"0\"/>"
-        "    </edges>"
-        "</net>";
+            "<net batch=\"1\" name=\"Pooling_only\" version=\"3\">"
+            "    <layers>"
+            "        <layer id=\"0\" name=\"1\" precision=\"FP32\" type=\"Input\">"
+            "            <output>"
+            "                <port id=\"0\">"
+            "                    <dim>1</dim>"
+            "                    <dim>3</dim>"
+            "                    <dim>16</dim>"
+            "                    <dim>112</dim>"
+            "                    <dim>112</dim>"
+            "                </port>"
+            "            </output>"
+            "        </layer>"
+            "        <layer id=\"1\" name=\"3D_pooling\" precision=\"FP32\" type=\"Pooling\">"
+            "            <data exclude-pad=\"true\" kernel=\"1,3,5\" pads_begin=\"1,3,5\" pads_end=\"1,3,5\" pool-method=\"max\" rounding_type=\"ceil\" strides=\"1,3,5\"/>"
+            "            <input>"
+            "                <port id=\"0\">"
+            "                    <dim>1</dim>"
+            "                    <dim>3</dim>"
+            "                    <dim>16</dim>"
+            "                    <dim>112</dim>"
+            "                    <dim>112</dim>"
+            "                </port>"
+            "            </input>"
+            "            <output>"
+            "                <port id=\"1\">"
+            "                    <dim>1</dim>"
+            "                    <dim>64</dim>"
+            "                    <dim>8</dim>"
+            "                    <dim>28</dim>"
+            "                    <dim>28</dim>"
+            "                </port>"
+            "            </output>"
+            "        </layer>"
+            "    </layers>"
+            "    <edges>"
+            "        <edge from-layer=\"0\" from-port=\"0\" to-layer=\"1\" to-port=\"0\"/>"
+            "    </edges>"
+            "</net>";
 
     CNNNetReaderImpl reader(make_shared<V2FormatParserCreator>());
     ASSERT_EQ(OK, reader.ReadNetwork(model.data(), model.length(), &resp));
@@ -1817,7 +1820,7 @@ TEST_F(CNNNetReaderImplTest, canRead3DPooling) {
     CNNLayerPtr layer;
 
     ASSERT_EQ(OK, network->getLayerByName("3D_pooling", layer, nullptr));
-    auto *pool = dynamic_cast<PoolingLayer *>(layer.get());
+    auto* pool = dynamic_cast<PoolingLayer*>(layer.get());
     ASSERT_NE(nullptr, pool);
     ASSERT_EQ(pool->_kernel[X_AXIS], 5);
     ASSERT_EQ(pool->_kernel[Y_AXIS], 3);
@@ -1862,22 +1865,7 @@ TEST_F(CNNNetReaderImplTest, canParseWithoutInput_1to2) {
 
     CNNNetReaderImpl reader(make_shared<V2FormatParserCreator>());
     sts = reader.ReadNetwork(model.data(), model.length(), &resp);
-    ASSERT_EQ(OK, sts) << resp.msg;
-
-    auto net = reader.getNetwork(&resp);
-    ASSERT_NE(nullptr, net ) << resp.msg;
-
-    InputsDataMap in_map;
-    OutputsDataMap out_map;
-    net->getInputsInfo(in_map);
-    net->getOutputsInfo(out_map);
-
-    ASSERT_EQ(in_map.size(), 1); auto i = in_map.begin();
-    ASSERT_EQ(i++->second->name(), "Boo");
-
-    ASSERT_EQ(out_map.size(), 2); auto o = out_map.begin();
-    ASSERT_EQ(o++->second->getName(), "Boo.0");
-    ASSERT_EQ(o++->second->getName(), "Boo.1");
+    ASSERT_EQ(GENERAL_ERROR, sts) << resp.msg;
 }
 
 TEST_F(CNNNetReaderImplTest, canParseWithoutInput_2to1) {
@@ -1909,26 +1897,11 @@ TEST_F(CNNNetReaderImplTest, canParseWithoutInput_2to1) {
 
     CNNNetReaderImpl reader(make_shared<V2FormatParserCreator>());
     sts = reader.ReadNetwork(model.data(), model.length(), &resp);
-    ASSERT_EQ(OK, sts) << resp.msg;
-
-    auto net = reader.getNetwork(&resp);
-    ASSERT_NE(nullptr, net ) << resp.msg;
-
-    InputsDataMap in_map;
-    OutputsDataMap out_map;
-    net->getInputsInfo(in_map);
-    net->getOutputsInfo(out_map);
-
-    ASSERT_EQ(in_map.size(), 2); auto i = in_map.begin();
-    ASSERT_EQ(i++->second->name(), "Foo.0");
-    ASSERT_EQ(i++->second->name(), "Foo.1");
-
-    ASSERT_EQ(out_map.size(), 1); auto o = out_map.begin();
-    ASSERT_EQ(o++->second->getName(), "Foo");
+    ASSERT_EQ(GENERAL_ERROR, sts) << resp.msg;
 }
 
 TEST_F(CNNNetReaderImplTest, canParseSimpleTI) {
-        std::string model = R"V0G0N(
+    std::string model = R"V0G0N(
 <net batch="1" name="Simple_TI" version="4">
     <layers>
         <layer id="0" name="input" precision="FP32" type="Input">
@@ -2046,50 +2019,122 @@ TEST_F(CNNNetReaderImplTest, canParseSimpleTI) {
 </net>
     )V0G0N";
 
-        CNNNetReaderImpl reader(make_shared<V2FormatParserCreator>());
-        sts = reader.ReadNetwork(model.data(), model.length(), &resp);
-        ASSERT_EQ(OK, sts) << resp.msg;
+    CNNNetReaderImpl reader(make_shared<V2FormatParserCreator>());
+    sts = reader.ReadNetwork(model.data(), model.length(), &resp);
+    ASSERT_EQ(OK, sts) << resp.msg;
 
-        auto network = reader.getNetwork(&resp);
-        ASSERT_NE(nullptr, network ) << resp.msg;
+    auto network = reader.getNetwork(&resp);
+    ASSERT_NE(nullptr, network) << resp.msg;
 
-        CNNLayerPtr layer;
-        sts = network->getLayerByName("SomeTI", layer, &resp);
-        ASSERT_EQ(OK, sts) << resp.msg;
+    CNNLayerPtr layer;
+    sts = network->getLayerByName("SomeTI", layer, &resp);
+    ASSERT_EQ(OK, sts) << resp.msg;
+
+    auto* ti = dynamic_cast<TensorIterator*>(layer.get());
+    ASSERT_NE(nullptr, ti);
+    ASSERT_EQ(ti->type, "TensorIterator");
+
+    //  Check Input port mapping
+    ASSERT_EQ(ti->input_port_map.size(), 2);
+    int i = ti->input_port_map[0].axis == 1 ? 0 : 1;
+    ASSERT_EQ(ti->input_port_map[i].axis, 1);
+    ASSERT_EQ(ti->input_port_map[i].stride, 1);
+    ASSERT_EQ(ti->input_port_map[i].start, 0);
+    ASSERT_EQ(ti->input_port_map[i].end, -1);
+    ASSERT_EQ(ti->input_port_map[i].part_size, 1);
+    ASSERT_EQ(ti->input_port_map[1 - i].axis, -1);
+    ASSERT_EQ(ti->input_port_map[1 - i].stride, 1);
+    ASSERT_EQ(ti->input_port_map[1 - i].start, 0);
+    ASSERT_EQ(ti->input_port_map[1 - i].end, -1);
+    ASSERT_EQ(ti->input_port_map[1 - i].part_size, 1);
+
+    //  Check Output port mapping
+    ASSERT_EQ(ti->output_port_map.size(), 1);
+    ASSERT_EQ(ti->output_port_map[0].axis, 1);
+    ASSERT_EQ(ti->output_port_map[0].stride, 1);
+    ASSERT_EQ(ti->output_port_map[0].start, 0);
+    ASSERT_EQ(ti->output_port_map[0].end, -1);
+    ASSERT_EQ(ti->output_port_map[0].part_size, 1);
+
+    //  No back edges
+    ASSERT_EQ(ti->back_edges.size(), 1);
+    ASSERT_EQ(ti->back_edges[0].from, 0);
+    ASSERT_EQ(ti->back_edges[0].to, 1);
+    ASSERT_EQ(ti->back_edges[0].axis, -1);
+    ASSERT_EQ(ti->back_edges[0].stride, 1);
+    ASSERT_EQ(ti->back_edges[0].start, 0);
+    ASSERT_EQ(ti->back_edges[0].end, -1);
+    ASSERT_EQ(ti->back_edges[0].part_size, 1);
+}
+
+TEST_F(CNNNetReaderImplTest, canParseScalar) {
+    std::string model = R"V0G0N(
+<net batch="1" name="SimpleNet" version="2">
+    <layers>
+        <layer id="0" name="input" precision="FP32" type="Input">
+            <output>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>5</dim>
+                    <dim>16</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="1" name="scalar" precision="FP32" type="Const">
+            <output>
+                <port id="0"/>
+            </output>
+            <blobs>
+                <custom offset="0" size="4"/>
+            </blobs>
+        </layer>
+        <layer id="2" name="reshape" precision="FP32" type="Reshape">
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>5</dim>
+                    <dim>16</dim>
+                </port>
+                <port id="1"/>
+            </input>
+            <output>
+                <port id="2">
+                    <dim>90</dim>
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+        <edge from-layer="1" from-port="0" to-layer="2" to-port="1"/>
+    </edges>
+</net>
+    )V0G0N";
 
-        auto *ti = dynamic_cast<TensorIterator*>(layer.get());
-        ASSERT_NE(nullptr, ti);
-        ASSERT_EQ(ti->type, "TensorIterator");
+    CNNNetReaderImpl reader(make_shared<V2FormatParserCreator>());
+    sts = reader.ReadNetwork(model.data(), model.length(), &resp);
+    ASSERT_EQ(OK, sts) << resp.msg;
+    auto blob = make_shared_blob<uint8_t>(TensorDesc(Precision::U8, {4}, Layout::C));
+    blob->allocate();
+    auto buffer = blob->buffer().as<float*>();
+    float SCALAR_VALUE = 90;
+    buffer[0] = SCALAR_VALUE;
 
-        //  Check Input port mapping
-        ASSERT_EQ(ti->input_port_map.size(), 2);
-        int i = ti->input_port_map[0].axis == 1 ? 0 : 1;
-        ASSERT_EQ(ti->input_port_map[i].axis, 1);
-        ASSERT_EQ(ti->input_port_map[i].stride, 1);
-        ASSERT_EQ(ti->input_port_map[i].start, 0);
-        ASSERT_EQ(ti->input_port_map[i].end, -1);
-        ASSERT_EQ(ti->input_port_map[i].part_size, 1);
-        ASSERT_EQ(ti->input_port_map[1-i].axis, -1);
-        ASSERT_EQ(ti->input_port_map[1-i].stride, 1);
-        ASSERT_EQ(ti->input_port_map[1-i].start, 0);
-        ASSERT_EQ(ti->input_port_map[1-i].end, -1);
-        ASSERT_EQ(ti->input_port_map[1-i].part_size, 1);
+    sts = reader.SetWeights(blob, &resp);
+    ASSERT_EQ(OK, sts) << resp.msg;
 
-        //  Check Output port mapping
-        ASSERT_EQ(ti->output_port_map.size(), 1);
-        ASSERT_EQ(ti->output_port_map[0].axis, 1);
-        ASSERT_EQ(ti->output_port_map[0].stride, 1);
-        ASSERT_EQ(ti->output_port_map[0].start, 0);
-        ASSERT_EQ(ti->output_port_map[0].end, -1);
-        ASSERT_EQ(ti->output_port_map[0].part_size, 1);
+    auto net = reader.getNetwork(&resp);
 
-        //  No back edges
-        ASSERT_EQ(ti->back_edges.size(), 1);
-        ASSERT_EQ(ti->back_edges[0].from, 0);
-        ASSERT_EQ(ti->back_edges[0].to, 1);
-        ASSERT_EQ(ti->back_edges[0].axis, -1);
-        ASSERT_EQ(ti->back_edges[0].stride, 1);
-        ASSERT_EQ(ti->back_edges[0].start, 0);
-        ASSERT_EQ(ti->back_edges[0].end, -1);
-        ASSERT_EQ(ti->back_edges[0].part_size, 1);
+    ASSERT_NE(nullptr, net) << resp.msg;
+    CNNLayerPtr layer;
+    sts = net->getLayerByName("scalar", layer, &resp);
+    ASSERT_EQ(OK, sts) << resp.msg;
+    ASSERT_NE(nullptr, layer.get());
+    ASSERT_EQ(layer->type, "Const");
+    auto actualBlob = layer->blobs.begin()->second;
+    ASSERT_EQ(actualBlob->buffer().as<float*>()[0], SCALAR_VALUE);
+    auto scalarDesc = layer->outData[0]->getTensorDesc();
+    ASSERT_TRUE(scalarDesc.getDims().empty());
+    ASSERT_EQ(scalarDesc.getLayout(), SCALAR);
+    ASSERT_EQ(scalarDesc.getPrecision(), Precision::FP32);
 }
diff --git a/inference-engine/tests/unit/cnn_network/cnn_network_impl_test.cpp b/inference-engine/tests/unit/cnn_network/cnn_network_impl_test.cpp
index 9a5a47ab1..896333198 100644
--- a/inference-engine/tests/unit/cnn_network/cnn_network_impl_test.cpp
+++ b/inference-engine/tests/unit/cnn_network/cnn_network_impl_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/cnn_network/layer_builder.h b/inference-engine/tests/unit/cnn_network/layer_builder.h
new file mode 100644
index 000000000..2de6472e9
--- /dev/null
+++ b/inference-engine/tests/unit/cnn_network/layer_builder.h
@@ -0,0 +1,150 @@
+#include <utility>
+
+/*
+* INTEL CONFIDENTIAL
+* Copyright (C) 2018-2019 Intel Corporation.
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel Corporation
+* or its suppliers or licensors. Title to the Material remains with
+* Intel Corporation or its suppliers and licensors. The Material may
+* contain trade secrets and proprietary and confidential information
+* of Intel Corporation and its suppliers and licensors, and is protected
+* by worldwide copyright and trade secret laws and treaty provisions.
+* No part of the Material may be used, copied, reproduced, modified,
+* published, uploaded, posted, transmitted, distributed, or disclosed
+* in any way without Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you by
+* disclosure or delivery of the Materials, either expressly, by implication,
+* inducement, estoppel or otherwise. Any license under such intellectual
+* property rights must be express and approved by Intel in writing.
+*
+* Include any supplier copyright notices as supplier requires Intel to use.
+*
+* Include supplier trademarks or logos as supplier requires Intel to use,
+* preceded by an asterisk. An asterisked footnote can be added as follows:
+* *Third Party trademarks are the property of their respective owners.
+*
+* Unless otherwise agreed by Intel in writing, you may not remove or alter
+* this notice or any other notice embedded in Materials by Intel or Intel's
+* suppliers or licensors in any way.
+*/
+
+#include <gtest/gtest.h>
+#include <tests_common.hpp>
+#include <memory>
+#include "parameters.h"
+#include "shapes.h"
+
+using namespace InferenceEngine;
+using namespace InferenceEngine::details;
+
+class LayerBuilder {
+private:
+    CNNLayerPtr layer;
+    std::string dataName = "data";
+    std::unique_ptr<Parameters> parameters;
+public:
+    explicit LayerBuilder (InferenceEngine::CNNLayer::Ptr createdLayer) : layer(std::move(createdLayer)) {
+        parameters = std::unique_ptr<Parameters>(new Parameters(layer->type));
+    }
+
+    LayerBuilder&  setParams(bool valid) {
+        if (valid) {
+            layer->params = parameters->getValidParameters();
+        } else {
+            layer->params = parameters->getInvalidParameters();
+        }
+        return *this;
+    }
+
+    LayerBuilder&  setShapes(std::vector<DataPtr>& spData, bool valid_input) {
+        testing::InOutShapes shapes;
+        LayersWithNotEqualIO layersWithNotEqualIO;
+        LayersWithEqualIO layersWithEqualIO;
+        LayersWithNIO layersWithNIO;
+        std::vector<Layers*> layers{&layersWithNotEqualIO, &layersWithEqualIO, &layersWithNIO};
+        ShapesHelper* shapesHelper = nullptr;
+        for(const auto& layer : layers) {
+            if (layer->containLayer(this->layer->type)) {
+                shapesHelper = layer->factoryShape();
+                break;
+            }
+        }
+        if (valid_input) {
+            shapes = shapesHelper->getValidShapes();
+        } else {
+            shapes = shapesHelper->getInvalidInputShapes();
+        }
+        for (const auto& inData : shapes.inDims) {
+            DataPtr data = std::make_shared<Data>(dataName, inData, InferenceEngine::Precision::FP32);
+            spData.push_back(data);
+            layer->insData.push_back(data);
+        }
+        for (const auto& outData : shapes.outDims) {
+            layer->outData.push_back(std::make_shared<Data>(dataName, outData, InferenceEngine::Precision::FP32));
+        }
+        delete shapesHelper;
+        return *this;
+    }
+
+    CNNLayerPtr get() {
+        return layer;
+    }
+
+    int getNumOfParams() {
+        return parameters->getNumOfParameters();
+    }
+
+    int getNumOfLayerVariant() {
+        LayersWithNotEqualIO layersWithNotEqualIO;
+        LayersWithEqualIO layersWithEqualIO;
+        LayersWithNIO layersWithNIO;
+        Layers* layers[] = {&layersWithNotEqualIO, &layersWithEqualIO, &layersWithNIO};
+        int cnt = 0;
+        for(const auto& layer : layers) {
+            if (layer->containLayer(this->layer->type)) {
+                cnt++;
+            }
+        }
+        return cnt;
+    }
+};
+
+class CNNLayerValidationTests : public testing::TestWithParam<std::string>{
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        type = params;
+    }
+
+    std::shared_ptr<LayerBuilder>& createConcreteLayer(const std::string& type) {
+        layer = std::make_shared<LayerBuilder>(TestsCommon::createLayer(type));
+        return layer;
+    }
+
+    std::shared_ptr<LayerBuilder>&  getBuilder() {
+        return layer;
+    }
+
+    CNNLayerPtr getLayer() {
+        return layer.get()->get();
+    }
+
+    int getNumOfParams() {
+        return layer.get()->getNumOfParams();
+    }
+
+    int getNumOfLayerVariant() {
+        return layer.get()->getNumOfLayerVariant();
+    }
+protected:
+    std::string type;
+    bool valid_params = true;
+    bool valid_input = true;
+    std::shared_ptr<LayerBuilder> layer;
+};
+
+#define assertThat(type) SCOPED_TRACE("");createConcreteLayer(type)
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/cnn_network/layout_tests.cpp b/inference-engine/tests/unit/cnn_network/layout_tests.cpp
index 11ad64572..49faf87bd 100644
--- a/inference-engine/tests/unit/cnn_network/layout_tests.cpp
+++ b/inference-engine/tests/unit/cnn_network/layout_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/cnn_network/mean_image.cpp b/inference-engine/tests/unit/cnn_network/mean_image.cpp
index 2c31fa1ef..cd7c9226b 100644
--- a/inference-engine/tests/unit/cnn_network/mean_image.cpp
+++ b/inference-engine/tests/unit/cnn_network/mean_image.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/cnn_network/mean_image.h b/inference-engine/tests/unit/cnn_network/mean_image.h
index 3b4ffcedf..5b85aa87b 100644
--- a/inference-engine/tests/unit/cnn_network/mean_image.h
+++ b/inference-engine/tests/unit/cnn_network/mean_image.h
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/cnn_network/parameters.h b/inference-engine/tests/unit/cnn_network/parameters.h
new file mode 100644
index 000000000..45420d627
--- /dev/null
+++ b/inference-engine/tests/unit/cnn_network/parameters.h
@@ -0,0 +1,319 @@
+/*
+* INTEL CONFIDENTIAL
+* Copyright (C) 2018-2019 Intel Corporation.
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel Corporation
+* or its suppliers or licensors. Title to the Material remains with
+* Intel Corporation or its suppliers and licensors. The Material may
+* contain trade secrets and proprietary and confidential information
+* of Intel Corporation and its suppliers and licensors, and is protected
+* by worldwide copyright and trade secret laws and treaty provisions.
+* No part of the Material may be used, copied, reproduced, modified,
+* published, uploaded, posted, transmitted, distributed, or disclosed
+* in any way without Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you by
+* disclosure or delivery of the Materials, either expressly, by implication,
+* inducement, estoppel or otherwise. Any license under such intellectual
+* property rights must be express and approved by Intel in writing.
+*
+* Include any supplier copyright notices as supplier requires Intel to use.
+*
+* Include supplier trademarks or logos as supplier requires Intel to use,
+* preceded by an asterisk. An asterisked footnote can be added as follows:
+* *Third Party trademarks are the property of their respective owners.
+*
+* Unless otherwise agreed by Intel in writing, you may not remove or alter
+* this notice or any other notice embedded in Materials by Intel or Intel's
+* suppliers or licensors in any way.
+*/
+
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+#include <queue>
+#include <random>
+
+enum class ParametersValues {
+    ZERO,
+    INT_POSITIVE,
+    INT_NEGATIVE,
+    FLOAT_POSITIVE,
+    FLOAT_NEGATIVE,
+    STRING
+};
+enum class ParameterRange {
+    SET,
+    SINGLE
+};
+using GoodBadParams = std::pair<std::vector<ParametersValues>, std::vector<ParametersValues>>;
+using Params = std::map<std::string, std::pair<ParameterRange, GoodBadParams>>;
+
+Params operator + (const Params& val1, const Params& val2) {
+    Params result;
+    result.insert(val1.begin(), val1.end());
+    result.insert(val2.begin(), val2.end());
+    return result;
+}
+
+class Parameters {
+private:
+    // Common for Convolution, Deconvolution, Pooling layers
+    Params common {
+            // Parameter name, range, type of good values, type of bad
+            {"stride-x", {ParameterRange::SINGLE,
+                                                  {{ParametersValues::INT_POSITIVE},
+                                                                                                             {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"stride-y", {ParameterRange::SINGLE,
+                                                  {{ParametersValues::INT_POSITIVE},
+                                                                                                             {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"kernel-x", {ParameterRange::SINGLE,
+                                                  {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                                                                                             {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"kernel-y", {ParameterRange::SINGLE, {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE}, {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"pad-x", {ParameterRange::SINGLE,
+                                                  {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                                                                                             {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"pad-y", {ParameterRange::SINGLE,
+                                                  {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                                                                                             {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}}
+    };
+    Params conv {
+            // Parameter name, range, type of good values, type of bad
+            {"dilation-x", {ParameterRange::SINGLE,
+                                   {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                           {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"dilation-y", {ParameterRange::SINGLE,
+                                   {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                           {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"output", {ParameterRange::SINGLE,
+                                   {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                           {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"group", {ParameterRange::SINGLE,
+                                   {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                           {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+    };
+    Params pooling {
+            // Parameter name, range, type of good values, type of bad
+            {"pool-method", {ParameterRange::SINGLE,
+                                    {{ParametersValues::STRING},
+                                            {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"exclude-pad", {ParameterRange::SINGLE,
+                                    {{ParametersValues::STRING},
+                                            {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}}
+    };
+    Params detectionOutput {
+            // Parameter name, range, type of good values, type of bad
+            {"num_classes", {ParameterRange::SINGLE,
+                                    {{ParametersValues::INT_POSITIVE},
+                                            {ParametersValues::ZERO, ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"background_label_id", {ParameterRange::SINGLE,
+                                    {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                            {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"top_k", {ParameterRange::SINGLE,
+                                    {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                            {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"variance_encoded_in_target", {ParameterRange::SINGLE,
+                                    {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                            {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"keep_top_k", {ParameterRange::SINGLE,
+                                    {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                            {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"num_orient_classes", {ParameterRange::SINGLE,
+                                    {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                            {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"code_type", {ParameterRange::SINGLE,
+                                    {{ParametersValues::STRING},
+                                            {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"share_location", {ParameterRange::SINGLE,
+                                    {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                            {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"interpolate_orientation", {ParameterRange::SINGLE,
+                                    {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                            {ParametersValues::STRING}}}},
+            {"nms_threshold", {ParameterRange::SINGLE,
+                                    {{ParametersValues::FLOAT_POSITIVE},
+                                            {ParametersValues::FLOAT_NEGATIVE, ParametersValues::STRING}}}},
+            {"confidence_threshold", {ParameterRange::SINGLE,
+                                    {{ParametersValues::FLOAT_POSITIVE},
+                                            {ParametersValues::FLOAT_NEGATIVE, ParametersValues::STRING}}}}
+    };
+    Params crop {
+            {"axis", {ParameterRange::SET,
+                             {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                     {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"offset", {ParameterRange::SET,
+                             {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                     {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"dim", {ParameterRange::SET,
+                             {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                     {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"crop_begin", {ParameterRange::SET,
+                             {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                     {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"crop_end", {ParameterRange::SET,
+                             {{ParametersValues::ZERO, ParametersValues::INT_POSITIVE},
+                                     {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+    };
+    Params interp {
+            {"height", {ParameterRange::SINGLE,
+                               {{ParametersValues::INT_POSITIVE, ParametersValues::ZERO},
+                                       {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+            {"factor", {ParameterRange::SINGLE,
+                               {{ParametersValues::FLOAT_POSITIVE},
+                                       {ParametersValues::ZERO, ParametersValues::FLOAT_NEGATIVE, ParametersValues::STRING}}}},
+            {"shrink_factor", {ParameterRange::SINGLE,
+                               {{ParametersValues::FLOAT_POSITIVE},
+                                       {ParametersValues::ZERO, ParametersValues::FLOAT_NEGATIVE, ParametersValues::STRING}}}},
+            {"zoom_factor", {ParameterRange::SINGLE,
+                               {{ParametersValues::FLOAT_POSITIVE},
+                                       {ParametersValues::ZERO, ParametersValues::FLOAT_NEGATIVE, ParametersValues::STRING}}}},
+            {"width", {ParameterRange::SINGLE,
+                               {{ParametersValues::INT_POSITIVE, ParametersValues::ZERO},
+                                       {ParametersValues::INT_NEGATIVE, ParametersValues::STRING}}}},
+    };
+    std::map<std::string, std::map<std::string, std::vector<std::string>>> stringParams {
+            {"Eltwise", {{"operation", {"sum", "max", "mul"}}}},
+            {"LRN", {{"region", {"across", "same"}}}},
+            {"Activation", {{"type", {"sigmoid", "tanh", "elu", "relu6"}}}},
+            {"Pooling", {{"pool_method", {"max", "avg"}}, {"exlude_pad", {"true", "false"}}}},
+            {"Resample", {{"type", {"caffe.ResampleParameter.LINEAR", "caffe.ResampleParameter.CUBIC",
+                                    "caffe.ResampleParameter.NEAREST"}}}},
+            {"DetectionOutput", {{"code_type", {"caffe.PriorBoxParameter.CENTER_SIZE", "caffe.PriorBoxParameter.CORNER"}}}}
+    };
+    std::map<std::string, Params> layerParamsNames {
+            // Layer name, Corresponding params names
+            {"Convolution", common + conv},
+            {"Deconvolution", common + conv},
+            {"Pooling", common + pooling},
+            {"DetectionOutput", detectionOutput},
+            {"Crop", crop},
+            {"Interp", interp}
+    };
+    const int zero = 0;
+    std::string type;
+    std::mt19937 gen;
+    std::uniform_int_distribution<int> distIntPositive;
+    std::uniform_int_distribution<int> distIntNegative;
+    std::uniform_real_distribution<float> distFloatNegative;
+    std::uniform_real_distribution<float> distFloatPositive;
+    std::queue<std::string> paramWasInvalid;
+public:
+    Parameters() {}
+    Parameters(const std::string& type) : gen(static_cast<unsigned long>(std::chrono::system_clock::now().time_since_epoch().count())),
+                                          distIntPositive(1, 100),
+                                          distIntNegative(-100, -1),
+                                          distFloatNegative(-10.0, -0.1),
+                                          distFloatPositive(0.1, 10.0) {
+        this->type = type;
+        Params param = getParametersByLayerName();
+        for (auto iter : param) {
+            paramWasInvalid.push(iter.first);
+        }
+    }
+    Params getParametersByLayerName() {
+        return layerParamsNames[type];
+    }
+
+    std::vector<std::string> getDifferentParamValues(const std::vector<ParametersValues>& valuesType,
+                                                     const std::vector<std::string>& stringValues) {
+        int magicNumber = 10;
+        std::vector<std::string> paramsValues = {};
+        for (auto i : valuesType) {
+            switch(i) {
+                case ParametersValues::ZERO: {
+                    paramsValues.push_back("0");
+                    break;
+                }
+                case ParametersValues::INT_POSITIVE: {
+                    for (int j = 0; j < magicNumber; ++j) {
+                        paramsValues.push_back(std::to_string(distIntPositive(gen)));
+                    }
+                    break;
+                }
+                case ParametersValues::INT_NEGATIVE: {
+                    for (int j = 0; j < magicNumber; ++j) {
+                        paramsValues.push_back(std::to_string(distIntNegative(gen)));
+                    }
+                    break;
+                }
+                case ParametersValues::FLOAT_POSITIVE: {
+                    for (int j = 0; j < magicNumber; ++j) {
+                        paramsValues.push_back(std::to_string(distFloatPositive(gen)));
+                    }
+                    break;
+                }
+                case ParametersValues::FLOAT_NEGATIVE: {
+                    for (int j = 0; j < magicNumber; ++j) {
+                        paramsValues.push_back(std::to_string(distFloatNegative(gen)));
+                    }
+                    break;
+                }
+                case ParametersValues::STRING: {
+                    paramsValues.insert(paramsValues.begin(), stringValues.begin(), stringValues.end());
+                    break;
+                }
+            }
+        }
+
+        return  paramsValues;
+    }
+
+    std::map<std::string, std::string> getValidParameters() {
+        Params param = getParametersByLayerName();
+        std::map<std::string, std::string> params;
+        for (auto i : param) {
+            params[i.first] = getCorrectParamValue(i.second, i.first);
+        }
+        return params;
+    }
+
+    std::string getCorrectParamValue(const std::pair<ParameterRange, GoodBadParams>& values,
+                                     const std::string& paramName) {
+        std::string parameter = "";
+        ParameterRange howMany = values.first;
+        std::vector<ParametersValues> valuesType = values.second.first;
+
+        std::vector<std::string> paramsValues = getDifferentParamValues(valuesType, stringParams[type][paramName]);
+
+        std::uniform_int_distribution<int> indexesDist(0, static_cast<int>(paramsValues.size() - 1));
+        if (howMany == ParameterRange::SINGLE) {
+            int index = indexesDist(gen);
+            parameter = paramsValues[index];
+        } else {
+            int numOfDigits = indexesDist(gen);
+            for (int i = 0; i < numOfDigits; i++) {
+                parameter += paramsValues[i] + ", ";
+            }
+        }
+        return parameter;
+    }
+
+    std::string getIncorrectParamValue(const std::pair<ParameterRange, GoodBadParams>& values) {
+        std::string parameter = "";
+        std::vector<ParametersValues> valuesType = values.second.second;
+
+        std::vector<std::string> paramsValues = getDifferentParamValues(valuesType, {"foo", "bar"});
+        std::uniform_int_distribution<int> indexesDist(0, static_cast<int>(paramsValues.size() - 1));
+        int index = indexesDist(gen);
+        parameter = paramsValues[index];
+
+        return parameter;
+    }
+
+    std::map<std::string, std::string> getInvalidParameters() {
+        std::map<std::string, std::string> params = getValidParameters();
+
+        std::string paramName = paramWasInvalid.front();
+        paramWasInvalid.pop();
+        params[paramName] = getIncorrectParamValue(layerParamsNames[type][paramName]);
+        return params;
+    }
+
+    int getNumOfParameters() {
+        return static_cast<int>(layerParamsNames[type].size());
+    }
+};
diff --git a/inference-engine/tests/unit/cnn_network/parser_tests_base.hpp b/inference-engine/tests/unit/cnn_network/parser_tests_base.hpp
index 28c4646b2..866da4ddb 100644
--- a/inference-engine/tests/unit/cnn_network/parser_tests_base.hpp
+++ b/inference-engine/tests/unit/cnn_network/parser_tests_base.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -56,7 +56,7 @@ class FormatParserTest : public TestsCommon {
     void assertParseFail(const std::string& fileContent) {
         try {
             parse(fileContent);
-            FAIL() << "Parser didn't trow";
+            FAIL() << "Parser didn't throw";
         } catch (const std::exception& ex) {
             SUCCEED() << ex.what();
         }
@@ -69,7 +69,7 @@ class FormatParserTest : public TestsCommon {
     void assertSetWeightsFail(const InferenceEngine::TBlob<uint8_t>::Ptr& binBlob) {
         try {
             parser->SetWeights(binBlob);
-            FAIL() << "Parser didn't trow";
+            FAIL() << "Parser didn't throw";
         } catch (const std::exception& ex) {
             SUCCEED() << ex.what();
         }
diff --git a/inference-engine/tests/unit/cnn_network/shapes.h b/inference-engine/tests/unit/cnn_network/shapes.h
new file mode 100644
index 000000000..87198f6ee
--- /dev/null
+++ b/inference-engine/tests/unit/cnn_network/shapes.h
@@ -0,0 +1,257 @@
+/*
+* INTEL CONFIDENTIAL
+* Copyright (C) 2018-2019 Intel Corporation.
+*
+* The source code contained or described herein and all documents
+* related to the source code ("Material") are owned by Intel Corporation
+* or its suppliers or licensors. Title to the Material remains with
+* Intel Corporation or its suppliers and licensors. The Material may
+* contain trade secrets and proprietary and confidential information
+* of Intel Corporation and its suppliers and licensors, and is protected
+* by worldwide copyright and trade secret laws and treaty provisions.
+* No part of the Material may be used, copied, reproduced, modified,
+* published, uploaded, posted, transmitted, distributed, or disclosed
+* in any way without Intel's prior express written permission.
+*
+* No license under any patent, copyright, trade secret or other
+* intellectual property right is granted to or conferred upon you by
+* disclosure or delivery of the Materials, either expressly, by implication,
+* inducement, estoppel or otherwise. Any license under such intellectual
+* property rights must be express and approved by Intel in writing.
+*
+* Include any supplier copyright notices as supplier requires Intel to use.
+*
+* Include supplier trademarks or logos as supplier requires Intel to use,
+* preceded by an asterisk. An asterisked footnote can be added as follows:
+* *Third Party trademarks are the property of their respective owners.
+*
+* Unless otherwise agreed by Intel in writing, you may not remove or alter
+* this notice or any other notice embedded in Materials by Intel or Intel's
+* suppliers or licensors in any way.
+*/
+
+#ifndef SHAPES_H
+#define SHAPES_H
+
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+#include <xml_net_builder.hpp>
+#include <random>
+#include <chrono>
+
+using namespace testing;
+
+struct Maps{
+    std::map<std::string, int> mapOfEqualShapes {
+            // Layer name, Correct num of input, Correct num of output
+            { "Convolution", 1},
+            { "Deconvolution", 1},
+            { "Crop", 1},
+            { "Interp", 1}
+    };
+
+    std::map<std::string, std::pair<int, int>> mapOfUnequalShapes {
+            // Layer name, Correct num of input, Correct num of output
+            { "Crop", {2, 1}},
+            { "DetectionOutput", {3, 1}},
+            { "Interp", {2, 1}}
+    };
+
+    std::map<std::string, std::pair<std::string, std::string>> mapOfContinuousShapes {
+            // Layer name, Correct num of input, Correct num of output
+            { "Slice", {"1", "N"}},
+            { "Eltwise", {"N", "1"}}
+    };
+} maps;
+
+class ShapesHelper {
+protected:
+    std::string type;
+public:
+    ShapesHelper() = default;
+
+    explicit ShapesHelper(std::string& type) {
+        this->type = type;
+    }
+
+    std::string getType() {return type;}
+
+    virtual testing::InOutShapes getValidShapes() = 0;
+    virtual testing::InOutShapes getInvalidInputShapes() = 0;
+
+    std::vector<std::vector<size_t>> generateShapes(const int& numOfShapes) {
+        std::mt19937 gen(static_cast<unsigned long>(std::chrono::high_resolution_clock::now().time_since_epoch().count()));
+        std::uniform_int_distribution<unsigned long> dist(1, 256);
+
+        std::vector<std::vector<size_t>> shape;
+        shape.reserve(static_cast<unsigned long>(numOfShapes));
+        for (int i = 0; i < numOfShapes; ++i) {
+            shape.push_back({dist(gen), dist(gen), dist(gen), 7});
+        }
+        return shape;
+    }
+    virtual ~ShapesHelper() = default;
+};
+
+class EqualIOShapesHelper : public ShapesHelper {
+public:
+    explicit EqualIOShapesHelper(std::string& type) : ShapesHelper(type) {};
+
+    testing::InOutShapes getValidShapes() override {
+        int numOfInput = {maps.mapOfEqualShapes[type]};
+        int numOfOutput = {maps.mapOfEqualShapes[type]};
+        std::vector<std::vector<size_t>> inputs = generateShapes(numOfInput);
+        std::vector<std::vector<size_t>> outputs = generateShapes(numOfOutput);
+        return {inputs, outputs};
+    }
+
+    testing::InOutShapes getInvalidInputShapes()  override {
+        int numOfOutput = maps.mapOfEqualShapes[type];
+        int numOfInput = maps.mapOfEqualShapes[type]  + numOfOutput;
+        std::vector<std::vector<size_t>> inputs = generateShapes(numOfInput);
+        std::vector<std::vector<size_t>> outputs = generateShapes(numOfOutput);
+        return {inputs, outputs};
+    }
+    ~EqualIOShapesHelper() override = default;
+};
+
+class NotEqualConcreteIOShapesHelper : public ShapesHelper {
+public:
+    explicit NotEqualConcreteIOShapesHelper(std::string& type) : ShapesHelper(type) {};
+
+    testing::InOutShapes getValidShapes() override {
+        int numOfInput = maps.mapOfUnequalShapes[type].first;
+        int numOfOutput = maps.mapOfUnequalShapes[type].second;
+        std::vector<std::vector<size_t>> inputs = generateShapes(numOfInput);
+        std::vector<std::vector<size_t>> outputs = generateShapes(numOfOutput);
+        return {inputs, outputs};
+    }
+
+    testing::InOutShapes getInvalidInputShapes()  override {
+        int numOfOutput = maps.mapOfUnequalShapes[type].second;
+        int numOfInput = maps. mapOfUnequalShapes[type].first + numOfOutput;
+
+        std::vector<std::vector<size_t>> inputs = generateShapes(numOfInput);
+        std::vector<std::vector<size_t>> outputs = generateShapes(numOfOutput);
+        return {inputs, outputs};
+    }
+    ~NotEqualConcreteIOShapesHelper() override = default;
+};
+
+class NotEqualIOShapesHelper : public ShapesHelper {
+private:
+    bool is_number(const std::string& s)
+    {
+        return !s.empty() && std::find_if(s.begin(),
+                                          s.end(), [](char c) { return !std::isdigit(c); }) == s.end();
+    }
+
+public:
+
+    explicit NotEqualIOShapesHelper(std::string& type) : ShapesHelper(type) {};
+
+    testing::InOutShapes getValidShapes() override {
+        int numOfInput;
+        int numOfOutput;
+        std::vector<std::vector<size_t>> inputs;
+        std::vector<std::vector<size_t>> outputs;
+        if (is_number(maps.mapOfContinuousShapes[type].first)) {
+            numOfInput = std::stoi(maps.mapOfContinuousShapes[type].first);
+            inputs = generateShapes(numOfInput);
+            outputs = generateShapes(100);
+        } else {
+            numOfOutput = std::stoi(maps.mapOfContinuousShapes[type].second);
+            outputs = generateShapes(numOfOutput);
+            inputs = generateShapes(100);
+        }
+
+        return {inputs, outputs};
+    }
+
+    testing::InOutShapes getInvalidInputShapes()  override {
+        int numOfInput;
+        int numOfOutput;
+        std::vector<std::vector<size_t>> inputs;
+        std::vector<std::vector<size_t>> outputs;
+        if (is_number(maps.mapOfContinuousShapes[type].first)) {
+            numOfInput = std::stoi(maps.mapOfContinuousShapes[type].first) * 2;
+            inputs = generateShapes(numOfInput);
+            outputs = generateShapes(100);
+        } else {
+            numOfOutput = std::stoi(maps.mapOfContinuousShapes[type].second);
+            outputs = generateShapes(numOfOutput);
+            inputs = generateShapes(100);
+        }
+        return {inputs, outputs};
+    }
+
+    ~NotEqualIOShapesHelper() override = default;
+};
+
+class Layers {
+public:
+    virtual bool containLayer(std::string concrete_layer) = 0;
+    virtual ShapesHelper* factoryShape() = 0;
+    virtual ~Layers() = default;
+};
+
+class LayersWithEqualIO : public Layers {
+private:
+    std::string layer = "";
+public:
+    bool containLayer(std::string concrete_layer) override {
+        for (const auto& layer : maps.mapOfEqualShapes) {
+            if (concrete_layer == layer.first) {
+                this->layer = concrete_layer;
+                return true;
+            }
+        }
+        return false;
+    }
+    ShapesHelper* factoryShape() override {
+        return new EqualIOShapesHelper(this->layer);
+    }
+    ~LayersWithEqualIO() override = default;
+};
+
+class LayersWithNotEqualIO : public Layers{
+private:
+    std::string layer = "";
+public:
+    bool containLayer(std::string concrete_layer) override {
+        for (const auto& layer : maps.mapOfUnequalShapes) {
+            if (concrete_layer == layer.first) {
+                this->layer = concrete_layer;
+                return true;
+            }
+        }
+        return false;
+    }
+    ShapesHelper* factoryShape() override {
+        return new NotEqualConcreteIOShapesHelper(this->layer);
+    }
+    ~LayersWithNotEqualIO() override = default;
+};
+
+class LayersWithNIO : public Layers{
+private:
+    std::string layer = "";
+public:
+    bool containLayer(std::string concrete_layer) override {
+        for (const auto& layer : maps.mapOfContinuousShapes) {
+            if (concrete_layer == layer.first) {
+                this->layer = concrete_layer;
+                return true;
+            }
+        }
+        return false;
+    }
+    ShapesHelper* factoryShape() override {
+        return new NotEqualIOShapesHelper(this->layer);
+    }
+    ~LayersWithNIO() override = default;
+};
+
+#endif // SHAPES_H
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/cnn_network/v2_format_parser_test.cpp b/inference-engine/tests/unit/cnn_network/v2_format_parser_test.cpp
index 1b9cdc042..36b49ddff 100644
--- a/inference-engine/tests/unit/cnn_network/v2_format_parser_test.cpp
+++ b/inference-engine/tests/unit/cnn_network/v2_format_parser_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -144,6 +144,24 @@ TEST_F(V2FormatParserTest, failIfIdLessThanZero) {
     ASSERT_NO_FATAL_FAILURE(assertParseFail(content));
 }
 
+TEST_F(V2FormatParserTest, failIfIdNotInteger) {
+    string content = MAKE_ALEXNET_FOR_MEAN_TESTS_V2()
+            .node("channel").attr("id", "0").node("mean").attr("value", "104.5").close()
+            .newnode("channel").attr("id", "1").node("mean").attr("value", "117.8").close()
+            .newnode("channel").attr("id", "2_2").node("mean").attr("value", "123").close();
+
+    ASSERT_NO_FATAL_FAILURE(assertParseFail(content));
+}
+
+TEST_F(V2FormatParserTest, failIfValueNotFloat) {
+    string content = MAKE_ALEXNET_FOR_MEAN_TESTS_V2()
+            .node("channel").attr("id", "0").node("mean").attr("value", "104,5").close()
+            .newnode("channel").attr("id", "1").node("mean").attr("value", "117.8").close()
+            .newnode("channel").attr("id", "2").node("mean").attr("value", "123").close();
+
+    ASSERT_NO_FATAL_FAILURE(assertParseFail(content));
+}
+
 TEST_F(V2FormatParserTest, failIfIdMoreThanNumChannels) {
     string content = MAKE_ALEXNET_FOR_MEAN_TESTS_V2()
             .node("channel").attr("id", "4").node("mean").attr("value", "104.5").close();
@@ -653,4 +671,4 @@ TEST_F(V2FormatParserTest, canConvertActivationLayerAsClamp) {
     ASSERT_EQ(clamp->min_value, -5);
     ASSERT_EQ(clamp->max_value, 5);
     ASSERT_EQ(clamp->params.find("type"), clamp->params.end());
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/tests/unit/cnn_network/v3_format_parser_test.cpp b/inference-engine/tests/unit/cnn_network/v3_format_parser_test.cpp
index b80d2cb20..085c299de 100644
--- a/inference-engine/tests/unit/cnn_network/v3_format_parser_test.cpp
+++ b/inference-engine/tests/unit/cnn_network/v3_format_parser_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/cnn_network/xml_father_tests.cpp b/inference-engine/tests/unit/cnn_network/xml_father_tests.cpp
index 451a15b94..f43021d52 100644
--- a/inference-engine/tests/unit/cnn_network/xml_father_tests.cpp
+++ b/inference-engine/tests/unit/cnn_network/xml_father_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/gna/I8_quantisation_test.cpp b/inference-engine/tests/unit/engines/gna/I8_quantisation_test.cpp
index 8e69a3b63..cb6e8009b 100644
--- a/inference-engine/tests/unit/engines/gna/I8_quantisation_test.cpp
+++ b/inference-engine/tests/unit/engines/gna/I8_quantisation_test.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include <vector>
diff --git a/inference-engine/tests/unit/engines/gna/configuration_test.cpp b/inference-engine/tests/unit/engines/gna/configuration_test.cpp
index e17e6dbee..70229c64a 100644
--- a/inference-engine/tests/unit/engines/gna/configuration_test.cpp
+++ b/inference-engine/tests/unit/engines/gna/configuration_test.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include <vector>
@@ -133,4 +145,10 @@ TEST_F(GNAConfigTest, canMatchWithSingleMultipleOMPThreads) {
         .inNotCompactMode()
         .enable_omp_multithreading()
         .gna().propagate_forward().called_without().pwl_inserted_into_nnet();
+}
+
+TEST_F(GNAConfigTest, failToCreatePluginWithDifferentInputScaleFactors) {
+    assert_that().creating().gna_plugin()
+        .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR))+"_1", 1000)
+        .withGNAConfig(std::string(GNA_CONFIG_KEY(SCALE_FACTOR))+"_2", 2000).throws();
 }
 \ No newline at end of file
diff --git a/inference-engine/tests/unit/engines/gna/fp32_non_quantized_tests.cpp b/inference-engine/tests/unit/engines/gna/fp32_non_quantized_tests.cpp
new file mode 100644
index 000000000..faf574e68
--- /dev/null
+++ b/inference-engine/tests/unit/engines/gna/fp32_non_quantized_tests.cpp
@@ -0,0 +1,208 @@
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
+//
+
+
+#include <vector>
+#include <gtest/gtest.h>
+#include <inference_engine/layer_transform.hpp>
+#include "gna_plugin/quantization/model_quantizer.hpp"
+#include "gna_plugin/quantization/layer_quantizer.hpp"
+#include "gna_matcher.hpp"
+
+using namespace InferenceEngine;
+using namespace GNAPluginNS;
+using namespace GNATestIRs;
+
+class FP32NonQuantizedTest : public GNATest {
+ protected:
+
+    void SetUp() override  {
+    }
+};
+
+TEST_F(FP32NonQuantizedTest, SplitFollowedByFCAndEltwiseOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    std::vector<float> expected_result = {12.0, 12.0, 12.0, 12.0, 12.0,
+                                          12.0, 12.0, 12.0, 12.0, 12.0};
+    assert_that().onInferModel(FCWithPaddingAfterSplitModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, SliceFollowedByFCAndEltwiseOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    std::vector<float> expected_result = {14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0};
+    assert_that().onInferModel(FCWithPaddingAfterSliceModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, SliceFollowedByAlignedFCAndEltwiseOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    std::vector<float> expected_result = {18.0, 18.0, 18.0, 18.0};
+    assert_that().onInferModel(SliceModelWithAlignedOutputs())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, DISABLED_SliceFollowedBy2FCsAnd2EltwisesOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    std::vector<float> expected_result = {27.0, 27.0, 27.0, 27.0, 27.0, 27.0, 27.0, 27.0};
+    assert_that().onInferModel(twoFCWithPaddingAfterSliceModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, SplitAfterFCFollowedByFCAndEltwiseOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    std::vector<float> expected_result = {232.0, 232.0, 232.0, 232.0, 232.0,
+                                          232.0, 232.0, 232.0, 232.0, 232.0};
+    assert_that().onInferModel(FCBeforeSplitModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+
+TEST_F(FP32NonQuantizedTest, ConcatPropagateForwardWithSuccessOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    std::vector<float> expected_result = {121.0, 121.0, 121.0, 121.0, 121.0,
+                                          121.0, 121.0, 121.0, 121.0, 121.0,
+                                          121.0, 121.0, 121.0, 121.0, 121.0,
+                                          121.0, 121.0, 121.0, 121.0, 121.0};
+
+    assert_that().onInferModel(concatModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, DoubleConcatPropageteForwardWithSuccessOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    std::vector<float> expected_result = {141.0, 141.0, 141.0, 141.0, 141.0,
+                                          141.0, 141.0, 141.0, 141.0, 141.0,
+                                          141.0, 141.0, 141.0, 141.0, 141.0,
+                                          141.0, 141.0, 141.0, 141.0, 141.0,
+                                          141.0, 141.0, 141.0, 141.0, 141.0,
+                                          141.0, 141.0, 141.0, 141.0, 141.0,
+                                          141.0, 141.0, 141.0, 141.0, 141.0,
+                                          141.0, 141.0, 141.0, 141.0, 141.0};
+
+    assert_that().onInferModel(doubleConcatModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, multiple_inputs_correct_results) {
+    std::vector<float> input_data  = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    std::vector<float> input2_data = {2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};
+    std::vector<float> result      = {30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0};
+
+    assert_that().onInferModel(two_inputs_to_affine())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with().input("input_1", input_data).And().input("input_2", input2_data).result().equal_to(result);
+}
+
+
+TEST_F(FP32NonQuantizedTest, CropWithoutOffsetPropagateForwardWithSuccessOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+    std::vector<float> expected_result = {11.0, 11.0, 11.0, 11.0, 11.0,
+                                          11.0, 11.0, 11.0, 11.0, 11.0};
+
+    assert_that().onInferModel(cropWithoutOffsetModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, CropWithAlignedOffsetPropagateForwardWithSuccessOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+    std::vector<float> expected_result = {3.0, 3.0, 3.0, 3.0, 3.0,
+                                          3.0, 3.0, 3.0, 3.0, 3.0};
+
+    assert_that().onInferModel(cropWithAlignedOffsetModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, CropWithOffsetPropagateForwardWithSuccessOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+    std::vector<float> expected_result = {7.0, 7.0, 7.0, 7.0, 7.0,
+                                          7.0, 7.0, 7.0, 7.0, 7.0};
+
+    assert_that().onInferModel(cropWithOffsetModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, CropWithMaxOffsetPropagateForwardWithSuccessOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+    std::vector<float> expected_result = {1.0, 1.0, 1.0, 1.0, 1.0,
+                                          1.0, 1.0, 1.0, 1.0, 1.0};
+
+    assert_that().onInferModel(cropWithMaxOffsetModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, CropWithOffsetAfterFCPropagateForwardWithSuccessOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+    std::vector<float> expected_result = {111.0, 111.0, 111.0, 111.0, 111.0,
+                                          111.0, 111.0, 111.0, 111.0, 111.0};
+
+    assert_that().onInferModel(cropWithOffsetExtendedModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+TEST_F(FP32NonQuantizedTest, CopySimpleCasePropagateForwardWithSuccessOnCPU) {
+    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+    std::vector<float> expected_result = {12.0, 12.0, 12.0, 12.0, 12.0,
+                                          12.0, 12.0, 12.0, 12.0, 12.0,
+                                          11.0, 11.0, 11.0, 11.0, 11.0,
+                                          11.0, 11.0, 11.0, 11.0, 11.0,};
+
+    assert_that().onInferModel(copyModel())
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+
+
+TEST_F(FP32NonQuantizedTest, ScaleShiftWithBroadcastSupported) {
+    std::vector<float> input_data (40, 1.0);
+
+    std::vector<float> expected_result = {2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
+                                          2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
+                                          2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
+                                          2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0,
+                                          2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0};
+
+    assert_that().onInferModel(ScaleShift3DModel()).withWeigthsPattern({1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f})
+        .inNotCompactMode().gna().propagate_forward().onCPU()
+        .called_with_input_and_expected_output(input_data, expected_result);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/engines/gna/gna_allocator_test.cpp b/inference-engine/tests/unit/engines/gna/gna_allocator_test.cpp
index 35ddc770c..d83c1c32b 100644
--- a/inference-engine/tests/unit/engines/gna/gna_allocator_test.cpp
+++ b/inference-engine/tests/unit/engines/gna/gna_allocator_test.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include "gna_plugin/gna_allocator.hpp"
diff --git a/inference-engine/tests/unit/engines/gna/gna_aminteldnn_test.cpp b/inference-engine/tests/unit/engines/gna/gna_aminteldnn_test.cpp
new file mode 100644
index 000000000..2dfd28847
--- /dev/null
+++ b/inference-engine/tests/unit/engines/gna/gna_aminteldnn_test.cpp
@@ -0,0 +1,38 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include "gna_matcher.hpp"
+#include "inference_engine.hpp"
+#include "dnn.h"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class GNA_AmIntelDnn_test : public GNATest {
+protected:
+    AmIntelDnn amIntelDnn;
+    intel_nnet_type_t  desc = {};
+};
+
+TEST_F(GNA_AmIntelDnn_test, intel_nnet_type_tDoesNotFreeHisMemory) {
+    desc.pLayers = nullptr;
+    amIntelDnn.component.resize(1);
+    amIntelDnn.component[0].operation = kDnnAffineOp;
+    ASSERT_NO_THROW(amIntelDnn.InitGNAStruct(&desc));  // thirst init is ok
+    ASSERT_THROW(amIntelDnn.InitGNAStruct(&desc), InferenceEngine::details::InferenceEngineException);  // second init involves memory leak
+}
+
+TEST_F(GNA_AmIntelDnn_test, intel_nnet_type_t_ptrIsNullptr) {
+    ASSERT_THROW(amIntelDnn.InitGNAStruct(nullptr), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(GNA_AmIntelDnn_test, intel_nnet_type_t_pLayersIsNotNullptr) {
+    ASSERT_THROW(amIntelDnn.InitGNAStruct(&desc), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(GNA_AmIntelDnn_test, ComponentIsEmpty) {
+    desc.pLayers = nullptr;
+    ASSERT_THROW(amIntelDnn.InitGNAStruct(&desc), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/engines/gna/gna_api_stub.cpp b/inference-engine/tests/unit/engines/gna/gna_api_stub.cpp
index 5417e52ad..0223fc0ba 100644
--- a/inference-engine/tests/unit/engines/gna/gna_api_stub.cpp
+++ b/inference-engine/tests/unit/engines/gna/gna_api_stub.cpp
@@ -1,6 +1,24 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//*****************************************************************************
 //
+// INTEL CONFIDENTIAL
+// Copyright (C) 2018-2019 Intel Corporation
+//
+// The source code contained or described herein and all documents related
+// to the source code ("Material") are owned by Intel Corporation or its suppliers
+// or licensors. Title to the Material remains with Intel Corporation or its suppliers
+// and licensors. The Material contains trade secrets and proprietary
+// and confidential information of Intel or its suppliers and licensors.
+// The Material is protected by worldwide copyright and trade secret laws and treaty
+// provisions. No part of the Material may be used, copied, reproduced, modified,
+// published, uploaded, posted, transmitted, distributed, or disclosed in any way
+// without Intel's prior express written permission.
+//
+// No license under any patent, copyright, trade secret or other intellectual
+// property right is granted to or conferred upon you by disclosure or delivery
+// of the Materials, either expressly, by implication, inducement, estoppel
+// or otherwise. Any license under such intellectual property rights must
+// be express and approved by Intel in writing.
+//*****************************************************************************
 
 #define INTEL_GNA_DLLEXPORT 1
 #include <gna-api.h>
diff --git a/inference-engine/tests/unit/engines/gna/gna_cppwraper_test.cpp b/inference-engine/tests/unit/engines/gna/gna_cppwraper_test.cpp
new file mode 100644
index 000000000..de937d2d3
--- /dev/null
+++ b/inference-engine/tests/unit/engines/gna/gna_cppwraper_test.cpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef _WIN32
+#include <mm_malloc.h>
+#endif
+#include "gna_api_wrapper.hpp"
+#include <gtest/gtest.h>
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class GNA_CPPWrapper_test : public ::testing::Test {};
+
+TEST_F(GNA_CPPWrapper_test, CPPWrapperConstructorCannotWorkWithInputEqualToZero) {
+    ASSERT_THROW(GNAPluginNS::CPPWrapper<intel_nnet_type_t>(0), InferenceEngine::details::InferenceEngineException);
+}
+
+TEST_F(GNA_CPPWrapper_test, CPPWrapperConstructorCanWorkWithInputNotEqualToZero) {
+    ASSERT_NO_THROW(GNAPluginNS::CPPWrapper<intel_nnet_type_t>(3));
+}
+
+TEST_F(GNA_CPPWrapper_test, CPPWrapperConstructorCanWorkWithoutAnyInput) {
+    ASSERT_NO_THROW(GNAPluginNS::CPPWrapper<intel_nnet_type_t>());
+}
+
diff --git a/inference-engine/tests/unit/engines/gna/gna_graph_aot_test.cpp b/inference-engine/tests/unit/engines/gna/gna_graph_aot_test.cpp
index 45385bee3..0add2554e 100644
--- a/inference-engine/tests/unit/engines/gna/gna_graph_aot_test.cpp
+++ b/inference-engine/tests/unit/engines/gna/gna_graph_aot_test.cpp
@@ -1,6 +1,35 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
+
 
 #include <vector>
 #include <gtest/gtest.h>
diff --git a/inference-engine/tests/unit/engines/gna/gna_hardware_precision_test.cpp b/inference-engine/tests/unit/engines/gna/gna_hardware_precision_test.cpp
index b7dba213e..c9f4bce90 100644
--- a/inference-engine/tests/unit/engines/gna/gna_hardware_precision_test.cpp
+++ b/inference-engine/tests/unit/engines/gna/gna_hardware_precision_test.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include <vector>
diff --git a/inference-engine/tests/unit/engines/gna/gna_input_precision_test.cpp b/inference-engine/tests/unit/engines/gna/gna_input_precision_test.cpp
new file mode 100644
index 000000000..d776c03d3
--- /dev/null
+++ b/inference-engine/tests/unit/engines/gna/gna_input_precision_test.cpp
@@ -0,0 +1,51 @@
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-generated-actions.h>
+#include "gna_matcher.hpp"
+#include "matchers/input_data_matcher.hpp"
+#include "test_irs.hpp"
+
+using namespace std;
+using namespace InferenceEngine;
+using namespace ::testing;
+using namespace GNATestIRs;
+
+class GNAInputPrecisionTest : public GNATest {
+};
+
+TEST_F(GNAInputPrecisionTest, CanProcessU8Input) {
+    std::vector<float> input_init = {128, 128, 128, 128, 128, 128, 128, 128, 128, 128};
+    double scale = 1.f / 128;
+    std::vector<int16_t> input_processed = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+
+    assert_that().onInferModel(Fc2DOutputModel())
+            .inNotCompactMode().gna().propagate_forward().called_with()
+            .preprocessed_input_data(input_init, input_processed, Precision::U8)
+            .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), scale);
+}
+
+TEST_F(GNAInputPrecisionTest, CanProcessFP32Input) {
+    std::vector<float> input_init = {1280, 1280, 1280, 1280, 1280, 1280, 1280, 1280, 1280, 1280};
+    double scale = 1.f / 1280;
+    std::vector<int16_t> input_processed = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+
+    assert_that().onInferModel(Fc2DOutputModel())
+            .inNotCompactMode().gna().propagate_forward().called_with()
+            .preprocessed_input_data(input_init, input_processed, Precision::FP32)
+            .withGNAConfig(GNA_CONFIG_KEY(SCALE_FACTOR), scale);
+}
diff --git a/inference-engine/tests/unit/engines/gna/gna_matcher.cpp b/inference-engine/tests/unit/engines/gna/gna_matcher.cpp
index c609e4e8f..016ae354a 100644
--- a/inference-engine/tests/unit/engines/gna/gna_matcher.cpp
+++ b/inference-engine/tests/unit/engines/gna/gna_matcher.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include <mock_icnn_network.hpp>
@@ -16,10 +28,14 @@
 #include "matchers/pwl_quantization_metrics_matcher.hpp"
 #include "matchers/conv_matcher.hpp"
 #include "matchers/pool_matcher.hpp"
+#include "matchers/fill_with_data.hpp"
+#include "matchers/weights_matcher.hpp"
 
 #include <gmock/gmock-generated-actions.h>
 #include <gmock/gmock-more-actions.h>
 #include "gmock/gmock.h"
+#include "matchers/input_data_matcher.hpp"
+#include <inference_engine/blob_factory.hpp>
 
 using namespace std;
 using namespace InferenceEngine;
@@ -30,10 +46,10 @@ class NullAllocator : public IAllocator {
  void * ptr = nullptr;
 public:
     NullAllocator() {
-        ptr = malloc(1);
+        ptr = new char[1];
     }
     ~NullAllocator() {
-        free(ptr);
+        delete[] static_cast<char*>(ptr);
     }
     void * lock(void * handle, LockOp = LOCK_FOR_WRITE)  noexcept override {
         return ptr;
@@ -56,8 +72,11 @@ void GNAPropagateMatcher :: match() {
     try {
         // matching gna propagate forward call.
         GNAPlugin plugin(_env.config);
+        plugin.SetPolicy(_env.policy);
         size_t inputSize = 10;
         size_t outputSize = 10;
+        InputsDataMap inputsInfo;
+        OutputsDataMap  outputsInfo;
 
         auto loadNetworkFromIR = [&] () {
             CNNNetReader net_reader;
@@ -90,7 +109,11 @@ void GNAPropagateMatcher :: match() {
             auto weights = make_shared_blob<uint8_t >(Precision::U8, C, {weightsSize});
 
             weights->allocate();
-            GNATest::fillWeights(weights);
+            if (!_env.weightsFillPattern.empty()) {
+                GNATest::fillWeights(weights, _env.weightsFillPattern);
+            } else {
+                GNATest::fillWeights(weights);
+            }
             net_reader.SetWeights(weights);
 
             net_reader.getNetwork().setTargetDevice(_env.target_device);
@@ -101,35 +124,60 @@ void GNAPropagateMatcher :: match() {
             }
 
             plugin.LoadNetwork(net_reader.getNetwork());
+
+            inputsInfo = net_reader.getNetwork().getInputsInfo();
+            outputsInfo = net_reader.getNetwork().getOutputsInfo();
         };
 
         auto loadNetworkFromAOT = [&] () {
-            plugin.ImportNetwork(_env.importedModelFileName);
+            auto sp = plugin.ImportNetwork(_env.importedModelFileName);
+            inputsInfo = plugin.GetInputs();
+            outputsInfo = plugin.GetOutputs();
         };
 
-        TBlob<float>::Ptr input, output;
+        std::map<std::string, Blob::Ptr> input;
+        TBlob<float>::Ptr output;
         size_t in_N = 1;
         size_t out_N = in_N;
         size_t in_C;
         size_t out_C;
 
-
         auto loadNetwork = [&]() {
             if (!_env.importedModelFileName.empty()) {
                 ASSERT_NO_FATAL_FAILURE(loadNetworkFromAOT());
             } else {
                 ASSERT_NO_FATAL_FAILURE(loadNetworkFromIR());
             }
-            in_C = _env.matchOutput == true ? _env.input_init.size(): inputSize;
-            out_C = _env.matchOutput == true ? _env.expected_output.size(): outputSize;
-
-            input.reset(new TBlob<float>(Precision::FP32, NC, {in_C, in_N}));
-            input->allocate();
+            const int channel_idx = 0;
+            bool haveInputs = !_env.input_init.empty();
+            for (auto && info :inputsInfo) {
+                decltype(_env.input_init)::iterator it;
+                auto & inputBlob = input[info.first];
+                if (haveInputs) {
+                    if (inputsInfo.size() != 1) {
+                        ASSERT_NE(it = _env.input_init.find(info.first), _env.input_init.end());
+                    } else {
+                        ASSERT_NE(0, _env.input_init.size());
+                        it = _env.input_init.begin();
+                    }
+                    in_C = it->second.size();
+                    ASSERT_EQ(in_C, info.second->getDims()[channel_idx]);
+                }
 
-            if(_env.matchOutput == true) {
-                std::copy_n(_env.input_init.cbegin(), in_N * in_C, input->buffer().as<float *>());
+                inputBlob = make_blob_with_precision(_env.input_precision, info.second->getLayout(), info.second->getDims());
+                inputBlob->allocate();
+                if (haveInputs) {
+                    if (_env.input_precision == Precision::FP32) {
+                        std::copy_n(it->second.cbegin(), in_N * in_C, inputBlob->buffer().as<float *>());
+                    } else if (_env.input_precision == Precision::U8) {
+                        std::copy_n(it->second.cbegin(), in_N * in_C, inputBlob->buffer().as<uint8_t *>());
+                    } else {
+                        std::logic_error(std::string("Unsupported input precision: ") + _env.input_precision.name());
+                    }
+                }
             }
 
+            out_C = _env.matchOutput == true ? _env.expected_output.size(): outputSize;
             output.reset(new TBlob<float>(Precision::FP32, NC, {out_C, out_N}));
             output->allocate();
         };
@@ -199,6 +247,21 @@ void GNAPropagateMatcher :: match() {
                         EXPECT_CALL(mockApi, GNAPropagateForward(_, _, _, _, _, _))
                             .WillOnce(DoAll(SaveArgPointee<1>(savedNet), Return(GNA_NOERROR)));
                         break;
+                    case GnaPluginTestEnvironment::matchInputData :
+                        combined->add(new InputDataMatcher(_env.input_processed));
+                        break;
+                    case GnaPluginTestEnvironment::fillOutputValues :
+                        combined->add(new OutputFiller(_env.fillValue, _env.fillValue));
+                        break;
+                    case GnaPluginTestEnvironment::matchAffineWeightsTranspose:
+                        HasWeightsTranspozed(combined, _env.transposedData, _env.transposeArgs);
+                        break;
+                    case GnaPluginTestEnvironment::matchAffineWeights:
+                        HasWeightsEq(combined, _env.transposedData);
+                        break;
+                    case GnaPluginTestEnvironment::saveAffineWeights:
+                        SaveWeights(combined, _env.transposedData, _env.transposedArgsForSaving);
+                        break;
                     default:
                         EXPECT_CALL(mockApi, GNAPropagateForward(_, _, _, _, _, _))
                             .WillOnce(Return(GNA_NOERROR));
@@ -211,15 +274,39 @@ void GNAPropagateMatcher :: match() {
         }
 
         loadNetwork();
-        plugin.Infer(*input, *output);
-        if(_env.matchOutput == true) {
+
+        if (!inputsInfo.empty()) {
+            BlobMap  input_blob_map;
+            BlobMap  output_blob_map;
+            for (auto info : inputsInfo) {
+                size_t current_size = InferenceEngine::details::product(info.second->getTensorDesc().getDims());
+                input_blob_map[info.first] = input[info.first];
+            }
+            size_t offset = 0;
+            for (auto info : outputsInfo) {
+                size_t current_size = InferenceEngine::details::product(info.second->getTensorDesc().getDims());
+                output_blob_map[info.first] = make_shared_blob<float>(
+                    info.second->getPrecision(), NC,
+                    {1, details::product(info.second->getDims())}, output->data() + offset, current_size * sizeof(float));
+                offset += current_size;
+            }
+
+            plugin.Infer(input_blob_map, output_blob_map);
+
+        } else {
+            plugin.Infer(*input.begin()->second, *output);
+        }
+
+
+        if (_env.matchOutput) {
             std::vector<float> actual_output(output->size());
 
             std::copy_n(output->cbuffer().as<float *>(), out_C * out_N, actual_output.begin());
 
-            ASSERT_EQ(true,
-                    std::equal(_env.expected_output.begin(), _env.expected_output.end(), actual_output.begin())
-                  );
+            for (auto ref = _env.expected_output.begin(); ref != _env.expected_output.end(); ref++ ) {
+                auto idx = std::distance( _env.expected_output.begin(), ref);
+                ASSERT_FLOAT_EQ(*ref, actual_output[idx]) << "at "<< idx;
+            }
         }
 
         std::map<std::string, InferenceEngine::InferenceEngineProfileInfo> perfMap;
@@ -437,4 +524,4 @@ void GNAQueryStateMatcher :: match() {
     catch(...) {
         FAIL() << "unknown exception thrown";
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/tests/unit/engines/gna/gna_matcher.hpp b/inference-engine/tests/unit/engines/gna/gna_matcher.hpp
index b249aa2a8..cd3680c1e 100644
--- a/inference-engine/tests/unit/engines/gna/gna_matcher.hpp
+++ b/inference-engine/tests/unit/engines/gna/gna_matcher.hpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #pragma once
@@ -47,7 +59,12 @@ class GnaPluginTestEnvironment {
         matchPwlQuantizeMetrics,
         matchCopyInserted,
         matchDiagonalInserted,
-        saveArgs
+        saveArgs,
+        matchInputData,
+        fillOutputValues,
+        matchAffineWeightsTranspose,
+        matchAffineWeights,
+        saveAffineWeights
     };
     std::vector<MatchWhat> whatToMatch;
     enum {
@@ -68,14 +85,22 @@ class GnaPluginTestEnvironment {
     bool exportNetworkOnly = false;
     std::function<void (InferenceEngine::CNNNetwork &)> cb;
     std::map<std::string, std::string> config;
+    GNAPluginNS::Policy policy;
     bool matchThrows = false;
     uint32_t proc_type = static_cast<intel_gna_proc_t>(GNA_SOFTWARE & GNA_HARDWARE);
     std::string importedModelFileName;
     bool is_profiling_enabled = false;
     bool matchOutput = false;
     bool is_setup_of_omp_theads_expected = false;
-    std::vector<float> input_init;
+    std::vector<int16_t> input_processed;
+    InferenceEngine::Precision input_precision = InferenceEngine::Precision::FP32;
+    std::map<std::string, std::vector<float>> input_init;
     std::vector<float> expected_output;
+    int16_t fillValue = 0;
+    std::vector<float> weightsFillPattern;
+    std::pair<int, int> transposeArgs;
+    std::pair<int, int> transposedArgsForSaving;
+    std::vector<uint16_t>* transposedData;
 };
 
 class GNATestBase {
@@ -103,7 +128,7 @@ class GNATestConfigurability : public GNATestBase{
         return *dynamic_cast<T*>(this);
     }
     template <class VType>
-    T & withGNAConfig(const std::string keyName, const VType &value) {
+    T & withGNAConfig(const std::string &keyName, const VType &value) {
         std::stringstream ss;
         ss << value;
         _env.config[keyName] = ss.str();
@@ -153,6 +178,22 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
         return *this;
     }
 
+    GNAPropagateMatcher & returns() {
+        return *this;
+    }
+
+    GNAPropagateMatcher & And() {
+        return *this;
+    }
+
+    GNAPropagateMatcher & that() {
+        return *this;
+    }
+
+    GNAPropagateMatcher & result() {
+        return *this;
+    }
+
     GNAPropagateMatcher & called_with() {
         return *this;
     }
@@ -161,11 +202,35 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
         _env.matchInserted = false;
         return *this;
     }
+    /**
+     * @brief gna_propagate_forward will fill all output pointers of 16 bits with this value
+     */
+    GNAPropagateMatcher & filledWith(int16_t valueToFill) {
+        _env.fillValue = valueToFill;
+        getMatcher() = GnaPluginTestEnvironment::fillOutputValues;
+        return *this;
+    }
 
-    GNAPropagateMatcher & called_with_input_and_expected_output(std::vector<float>& input_data,
-                                                                std::vector<float>& expect) {
+    GNAPropagateMatcher & equal_to(const std::vector<float>& expect) {
         _env.matchOutput = true;
-        _env.input_init = input_data;
+        _env.expected_output = expect;
+        return *this;
+    }
+
+    GNAPropagateMatcher & input(const std::string & inputName, const std::vector<float>& inputData) {
+        _env.input_init[inputName] = inputData;
+        return *this;
+    }
+
+    GNAPropagateMatcher & inputScale(const std::string & inputName, float scaleFactor) {
+        _env.config[std::string(GNA_CONFIG_KEY(SCALE_FACTOR)) + "_" + inputName] = std::to_string(scaleFactor);
+        return *this;
+    }
+
+    GNAPropagateMatcher & called_with_input_and_expected_output(const std::vector<float>& input_data,
+                                                                const std::vector<float>& expect) {
+        _env.matchOutput = true;
+        _env.input_init["any_input_name"] = input_data;
         _env.expected_output = expect;
         return *this;
     }
@@ -234,11 +299,47 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
         return *this;
     }
 
+    GNAPropagateMatcher &preprocessed_input_data(std::vector<float> input_init, std::vector<int16_t> input_processed,
+                                                 InferenceEngine::Precision inputPrecision) {
+        getMatcher() = GnaPluginTestEnvironment::matchInputData;
+        _env.input_processed = std::move(input_processed);
+        _env.input_init["placeholder"] = std::move(input_init);
+        _env.input_precision = inputPrecision;
+        return *this;
+    }
+
     GNAPropagateMatcher & copy_inserted_into_nnet() {
         getMatcher() = GnaPluginTestEnvironment::matchCopyInserted;
         return *this;
     }
 
+
+    GNAPropagateMatcher & affine_weights_transpozed(std::pair<int, int> &&transpozedArgs) {
+        getMatcher() = GnaPluginTestEnvironment::saveAffineWeights;
+        _env.transposedArgsForSaving = std::move(transpozedArgs);
+
+        return *this;
+    }
+
+    GNAPropagateMatcher & affine_weights() {
+        getMatcher() = GnaPluginTestEnvironment::saveAffineWeights;
+        return *this;
+    }
+
+    GNAPropagateMatcher & affine_weights_eq(std::vector<uint16_t> & sourceWeights) {
+        getMatcher() = GnaPluginTestEnvironment::matchAffineWeights;
+        _env.transposedData = &sourceWeights;
+        return *this;
+    }
+
+
+    GNAPropagateMatcher & affine_weights_transposed(std::vector<uint16_t> & sourceWeights, std::pair<int,int> transposeData) {
+        getMatcher() = GnaPluginTestEnvironment::matchAffineWeightsTranspose;
+        _env.transposeArgs = transposeData;
+        _env.transposedData = &sourceWeights;
+        return *this;
+    }
+
     GNAPropagateMatcher & nnet_input_precision(const InferenceEngine::Precision &precision) {
         getMatcher() = GnaPluginTestEnvironment::matchPrecision;
         _env.nnet_precision.input_precision = precision;
@@ -271,6 +372,13 @@ class GNAPropagateMatcher : public GNATestConfigurability<GNAPropagateMatcher> {
         return *this;
     }
 
+    GNAPropagateMatcher & to(std::vector<uint16_t> & sourceWeights) {
+        _env.transposedData = &sourceWeights;
+        return *this;
+    }
+
+
+
     GNAPropagateMatcher & onCPU() {
         _env.target_device = InferenceEngine::TargetDevice::eCPU;
         return *this;
@@ -371,14 +479,29 @@ class GNAQueryStateMatcher : public GNADumpXNNMatcher {
 class GNATest : public ::testing::Test, public GNATestConfigurability<GNATest>  {
     using base = GNATestConfigurability<GNATest>;
     using base::_env;
-    std::list<std::vector<uint8_t>> dataUsedInMatchers;
+    class XStorage {
+     public:
+        std::vector<uint8_t> data;
+        std::function<void (void *)> destroyer;
+       ~XStorage() {
+           destroyer(&data.front());
+       }
+    };
+    std::list<XStorage> dataUsedInMatchers;
     std::list<std::shared_ptr<GNATestBase>> returnedMatchers;
 
  public:
     template <class T>
     T & storage () {
-        dataUsedInMatchers.push_back(std::vector<uint8_t >(sizeof(T)));
-        return *reinterpret_cast<T*> (&dataUsedInMatchers.back().front());
+        dataUsedInMatchers.push_back({std::vector<uint8_t >(sizeof(T)), [](void * toDestroy) {
+            reinterpret_cast<T*>(toDestroy)->~T();
+        }});
+
+        auto ptr = reinterpret_cast<T*> (&dataUsedInMatchers.back().data.front());
+        // sad to say we are not using destructors here so data might leak
+        new(ptr) T;
+
+        return *ptr;
     }
     GNATest()  : base(GnaPluginTestEnvironment()) {}
     GNATest & as() {
@@ -399,6 +522,9 @@ class GNATest : public ::testing::Test, public GNATestConfigurability<GNATest>
         getMatcher() = GnaPluginTestEnvironment::saveArgs;
         return *this;
     }
+    GNATest & save() {
+        return *this;
+    }
 
     GNATest & onInfer1AFModel() {
         _env.model = GNATestIRs::Fc2DOutputModel();
@@ -438,6 +564,10 @@ class GNATest : public ::testing::Test, public GNATestConfigurability<GNATest>
         _env.cb = _cb;
         return *this;
     }
+    GNATest &  withWeigthsPattern(std::vector<float> && initializer) {
+        _env.weightsFillPattern = std::move(initializer);
+        return *this;
+    }
     GNATest & gna() {
         return *this;
     }
@@ -484,7 +614,16 @@ class GNATest : public ::testing::Test, public GNATestConfigurability<GNATest>
         return dynamic_cast<GNAPluginAOTMatcher&>(*returnedMatchers.back());
     }
 
-    static void fillWeights(InferenceEngine::Blob::Ptr weights, float value = 1) {
-        std::fill_n(weights->buffer().as<float*>(), weights->byteSize()/sizeof(float), value);
+    static void fillWeights(InferenceEngine::Blob::Ptr weights, std::vector<float> pattern = {1.f}) {
+        float * p = weights->buffer().as<float *>();
+        float * pEnd = p + weights->byteSize() / sizeof(float);
+
+        for(; p!=pEnd ;) {
+            for (int i = 0; i != (weights->byteSize() / sizeof(float) / 3) + 1; i++) {
+                for (int j = 0; j != pattern.size() && p != pEnd; j++, p++) {
+                    *p = pattern[j];
+                }
+            }
+        }
     }
 };
diff --git a/inference-engine/tests/unit/engines/gna/gna_memory_test.cpp b/inference-engine/tests/unit/engines/gna/gna_memory_test.cpp
index aaf0f5776..3c46c50c2 100644
--- a/inference-engine/tests/unit/engines/gna/gna_memory_test.cpp
+++ b/inference-engine/tests/unit/engines/gna/gna_memory_test.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include <vector>
diff --git a/inference-engine/tests/unit/engines/gna/gna_mock_api.hpp b/inference-engine/tests/unit/engines/gna/gna_mock_api.hpp
index 230c5ab94..20a60c7bd 100644
--- a/inference-engine/tests/unit/engines/gna/gna_mock_api.hpp
+++ b/inference-engine/tests/unit/engines/gna/gna_mock_api.hpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2017-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #pragma once
diff --git a/inference-engine/tests/unit/engines/gna/gna_proc_type_test.cpp b/inference-engine/tests/unit/engines/gna/gna_proc_type_test.cpp
index de17de735..7373c98b0 100644
--- a/inference-engine/tests/unit/engines/gna/gna_proc_type_test.cpp
+++ b/inference-engine/tests/unit/engines/gna/gna_proc_type_test.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include <vector>
diff --git a/inference-engine/tests/unit/engines/gna/gna_pwl_test.cpp b/inference-engine/tests/unit/engines/gna/gna_pwl_test.cpp
index 408deec15..865649f30 100644
--- a/inference-engine/tests/unit/engines/gna/gna_pwl_test.cpp
+++ b/inference-engine/tests/unit/engines/gna/gna_pwl_test.cpp
@@ -1,6 +1,35 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
+
 
 #include <vector>
 #include <gtest/gtest.h>
diff --git a/inference-engine/tests/unit/engines/gna/gna_query_state_tests.cpp b/inference-engine/tests/unit/engines/gna/gna_query_state_tests.cpp
index f61aecd47..27725d60f 100644
--- a/inference-engine/tests/unit/engines/gna/gna_query_state_tests.cpp
+++ b/inference-engine/tests/unit/engines/gna/gna_query_state_tests.cpp
@@ -1,6 +1,35 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
+
 
 #include <vector>
 #include <gtest/gtest.h>
diff --git a/inference-engine/tests/unit/engines/gna/i16_quantisation_test.cpp b/inference-engine/tests/unit/engines/gna/i16_quantisation_test.cpp
index c8767b0b5..cf4259942 100644
--- a/inference-engine/tests/unit/engines/gna/i16_quantisation_test.cpp
+++ b/inference-engine/tests/unit/engines/gna/i16_quantisation_test.cpp
@@ -1,10 +1,23 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include <vector>
 #include <gtest/gtest.h>
 #include <inference_engine/layer_transform.hpp>
+#include <gna-api-types-xnn.h>
 #include "gna_plugin/quantization/model_quantizer.hpp"
 #include "gna_plugin/quantization/layer_quantizer.hpp"
 #include "gna_matcher.hpp"
@@ -123,7 +136,7 @@ TEST_F(I16QuantisationTest, DISABLED_outputScaleFactorForAffineIsCorrect){
 
     auto weights = make_shared_blob<uint8_t >(Precision::U8, C, {440});
     weights->allocate();
-    fillWeights(weights, 100);
+    fillWeights(weights, {100});
     net_reader.SetWeights(weights);
 
     auto newNet = q.quantize(net_reader.getNetwork(), 1000);
@@ -190,41 +203,16 @@ TEST_F(I16QuantisationTest, SplitFollowedByActivation_DummyDiagonalAffineInserti
         .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
 }
 
-TEST_F(I16QuantisationTest, SplitFollowedByFCAndEltwiseOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
-    std::vector<float> expected_result = {12.0, 12.0, 12.0, 12.0, 12.0,
-                                          12.0, 12.0, 12.0, 12.0, 12.0};
-    assert_that().onInferModel(FCWithPaddingAfterSplitModel())
-        .inNotCompactMode().gna().propagate_forward().onCPU()
-        .called_with_input_and_expected_output(input_data, expected_result);
-}
-
-TEST_F(I16QuantisationTest, SliceFollowedByFCAndEltwiseOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
-    std::vector<float> expected_result = {14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0};
-    assert_that().onInferModel(FCWithPaddingAfterSliceModel())
-        .inNotCompactMode().gna().propagate_forward().onCPU()
-        .called_with_input_and_expected_output(input_data, expected_result);
-}
-
-TEST_F(I16QuantisationTest, SliceFollowedByAlignedFCAndEltwiseOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
-    std::vector<float> expected_result = {18.0, 18.0, 18.0, 18.0};
-    assert_that().onInferModel(SliceModelWithAlignedOutputs())
-        .inNotCompactMode().gna().propagate_forward().onCPU()
-        .called_with_input_and_expected_output(input_data, expected_result);
+TEST_F(I16QuantisationTest, DISABLED_SliceFollowedBy2FCsAnd2Eltwises_AlignedFilterInsertion) {
+    assert_that().onInferModel(twoFCWithPaddingAfterSliceModel())
+        .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
 }
 
-TEST_F(I16QuantisationTest, SliceFollowedBy2FCsAnd2EltwisesOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
-    std::vector<float> expected_result = {27.0, 27.0, 27.0, 27.0, 27.0, 27.0, 27.0, 27.0};
-    assert_that().onInferModel(twoFCWithPaddingAfterSliceModel())
-        .inNotCompactMode().gna().propagate_forward().onCPU()
-        .called_with_input_and_expected_output(input_data, expected_result);
+// ToDo requires implementation of aligning filter for concat inputs and improvement of
+// qunatization/scaling algorithm for concat
+TEST_F(I16QuantisationTest, DISABLED_DoubleConcatPropageteForwardWithSuccess_AlignedFilterInsertion) {
+    assert_that().onInferModel(doubleConcatModel())
+        .inNotCompactMode().gna().propagate_forward().called_with().diagonal_inserted_into_nnet();
 }
 
 TEST_F(I16QuantisationTest, EltwiseSumm_onlyOneIdentityInsertion) {
@@ -253,36 +241,24 @@ TEST_F(I16QuantisationTest, EltwiseMull_willInsertTwoIdentities) {
         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
 }
 
-TEST_F(I16QuantisationTest, ConcatPropagateForwardWithSuccessOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
-    std::vector<float> expected_result = {121.0, 121.0, 121.0, 121.0, 121.0,
-                                          121.0, 121.0, 121.0, 121.0, 121.0,
-                                          121.0, 121.0, 121.0, 121.0, 121.0,
-                                          121.0, 121.0, 121.0, 121.0, 121.0};
-
-    assert_that().onInferModel(concatModel())
-        .inNotCompactMode().gna().propagate_forward().onCPU()
-        .called_with_input_and_expected_output(input_data, expected_result);
+TEST_F(I16QuantisationTest, multiple_inputs_supported) {
+    assert_that().onInferModel(two_inputs_to_affine())
+        .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
 }
+TEST_F(I16QuantisationTest, multiple_inputs_can_handle_individual_scale_factors) {
+    std::vector<float> input_data  = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    std::vector<float> input2_data = {2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};
+    std::vector<float> result      = {0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5};
 
-TEST_F(I16QuantisationTest, DoubleConcatPropageteForwardWithSuccessOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                    1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
-    std::vector<float> expected_result = {141.0, 141.0, 141.0, 141.0, 141.0,
-                                          141.0, 141.0, 141.0, 141.0, 141.0,
-                                          141.0, 141.0, 141.0, 141.0, 141.0,
-                                          141.0, 141.0, 141.0, 141.0, 141.0,
-                                          141.0, 141.0, 141.0, 141.0, 141.0,
-                                          141.0, 141.0, 141.0, 141.0, 141.0,
-                                          141.0, 141.0, 141.0, 141.0, 141.0,
-                                          141.0, 141.0, 141.0, 141.0, 141.0};
+    assert_that().onInferModel(two_inputs_to_affine())
+        .inNotCompactMode().gna().propagate_forward()
+        .called_with().inputScale("input_1", 2).And()
+        .inputScale("input_2", 2).returns().result().filledWith(16384).that().equal_to(result);
+}
 
-    assert_that().onInferModel(doubleConcatModel())
-        .inNotCompactMode().gna().propagate_forward().onCPU()
-        .called_with_input_and_expected_output(input_data, expected_result);
+TEST_F(I16QuantisationTest, DISABLED_multiple_inputs_into_concat_supported) {
+    assert_that().onInferModel(two_inputs_to_concat())
+        .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().once();
 }
 
 TEST_F(I16QuantisationTest, ScaleShift_Affine_WillResultInIdentityInsertion) {
@@ -306,76 +282,52 @@ TEST_F(I16QuantisationTest, AffineWith2AffineOutputs_ResultInOnlyOneIdentityInse
         .inNotCompactMode().gna().propagate_forward().called_with().pwl_inserted_into_nnet().twice();
 }
 
+TEST_F(I16QuantisationTest, ScaleShiftWithBroadcast_ResultInDiagonalInsertion) {
+
+    auto & affineWeights = storage<std::vector<uint16_t>>();
+
+    affineWeights = {
+        2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+        2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+        2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+        2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+        2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+    };
+
+    assert_that().onInferModel(ScaleShift3DModel()).withWeigthsPattern({1.0f,2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f})
+        .inNotCompactMode().gna().propagate_forward().called_with().called_with().affine_weights_eq(affineWeights);
+}
+
 // TODO: this mode not required in rel life scenarios so far
 TEST_F(I16QuantisationTest, DISABLED_AffineWithOutputToMemoryAndToAnotherNode_ResultInCopyInsertion) {
     assert_that().onInferModel(affineToMemoryModel()).inNotCompactMode().gna().propagate_forward().
         called_with().copy_inserted_into_nnet();
 }
 
-TEST_F(I16QuantisationTest, CropWithoutOffsetPropagateForwardWithSuccessOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
-    std::vector<float> expected_result = {11.0, 11.0, 11.0, 11.0, 11.0,
-                                          11.0, 11.0, 11.0, 11.0, 11.0};
+TEST_F(I16QuantisationTest, DISABLED_permutationOfWeightsBetweenConvAndAffine) {
+    auto & affineWeights = storage<std::vector<uint16_t>>();
 
-    assert_that().onInferModel(cropWithoutOffsetModel())
-    .inNotCompactMode().gna().propagate_forward().onCPU()
-    .called_with_input_and_expected_output(input_data, expected_result);
-}
+    // least likely that width and height both are multiple of 7
+    auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
 
-TEST_F(I16QuantisationTest, CropWithAlignedOffsetPropagateForwardWithSuccessOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
-    std::vector<float> expected_result = {3.0, 3.0, 3.0, 3.0, 3.0,
-                                          3.0, 3.0, 3.0, 3.0, 3.0};
+    // here weights are transpozed
+    save().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
+        .inNotCompactMode().from().propagate_forward().affine_weights_transpozed({128, 61}).to(affineWeights);
 
-    assert_that().onInferModel(cropWithAlignedOffsetModel())
-    .inNotCompactMode().gna().propagate_forward().onCPU()
-    .called_with_input_and_expected_output(input_data, expected_result);
+    // here weights shouldn't be transposed
+    assert_that().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
+        .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_eq(affineWeights);
 }
 
-TEST_F(I16QuantisationTest, CropWithOffsetPropagateForwardWithSuccessOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                     0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0};
-    std::vector<float> expected_result = {7.0, 7.0, 7.0, 7.0, 7.0,
-                                          7.0, 7.0, 7.0, 7.0, 7.0};
+TEST_F(I16QuantisationTest, DISABLED_noPermutationOfWeightsBetweenConvAndAffineIfPermuteLayerWithCorrectArgs) {
+    auto & affineWeights = storage<std::vector<uint16_t>>();
 
-    assert_that().onInferModel(cropWithOffsetModel())
-    .inNotCompactMode().gna().propagate_forward().onCPU()
-    .called_with_input_and_expected_output(input_data, expected_result);
-}
-
-TEST_F(I16QuantisationTest, CropWithMaxOffsetPropagateForwardWithSuccessOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
-    std::vector<float> expected_result = {1.0, 1.0, 1.0, 1.0, 1.0,
-                                          1.0, 1.0, 1.0, 1.0, 1.0};
-
-    assert_that().onInferModel(cropWithMaxOffsetModel())
-    .inNotCompactMode().gna().propagate_forward().onCPU()
-    .called_with_input_and_expected_output(input_data, expected_result);
-}
+    // least likely that width and height both are multiple of 7
+    auto weigthsPattern = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
 
-TEST_F(I16QuantisationTest, CropWithOffsetAfterFCPropagateForwardWithSuccessOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
-    std::vector<float> expected_result = {111.0, 111.0, 111.0, 111.0, 111.0,
-                                          111.0, 111.0, 111.0, 111.0, 111.0};
+    save().onInferModel(affineAfterConvWithPermute()).withWeigthsPattern(weigthsPattern)
+        .inNotCompactMode().from().propagate_forward().affine_weights().to(affineWeights);
 
-    assert_that().onInferModel(cropWithOffsetExtendedModel())
-    .inNotCompactMode().gna().propagate_forward().onCPU()
-    .called_with_input_and_expected_output(input_data, expected_result);
-}
-
-TEST_F(I16QuantisationTest, CopySimpleCasePropagateForwardWithSuccessOnCPU) {
-    std::vector<float> input_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
-                                     0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
-    std::vector<float> expected_result = {12.0, 12.0, 12.0, 12.0, 12.0,
-                                          12.0, 12.0, 12.0, 12.0, 12.0,
-                                          11.0, 11.0, 11.0, 11.0, 11.0,
-                                          11.0, 11.0, 11.0, 11.0, 11.0,};
-
-    assert_that().onInferModel(copyModel())
-    .inNotCompactMode().gna().propagate_forward().onCPU()
-    .called_with_input_and_expected_output(input_data, expected_result);
-}
+    assert_that().onInferModel(affineAfterConvNoPermute()).withWeigthsPattern(weigthsPattern)
+        .inNotCompactMode().gna().propagate_forward().called_with().affine_weights_transposed(affineWeights, {128, 61});
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/engines/gna/matchers/conv_matcher.hpp b/inference-engine/tests/unit/engines/gna/matchers/conv_matcher.hpp
index 4d5947093..db64350b5 100644
--- a/inference-engine/tests/unit/engines/gna/matchers/conv_matcher.hpp
+++ b/inference-engine/tests/unit/engines/gna/matchers/conv_matcher.hpp
@@ -1,6 +1,34 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
 
 #pragma once
 
diff --git a/inference-engine/tests/unit/engines/gna/matchers/copy_matcher.hpp b/inference-engine/tests/unit/engines/gna/matchers/copy_matcher.hpp
index c947ecd9e..4c32f3320 100644
--- a/inference-engine/tests/unit/engines/gna/matchers/copy_matcher.hpp
+++ b/inference-engine/tests/unit/engines/gna/matchers/copy_matcher.hpp
@@ -1,6 +1,34 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
 
 #pragma once
 #include "nnet_base_matcher.hpp"
diff --git a/inference-engine/tests/unit/engines/gna/matchers/diag_matcher.hpp b/inference-engine/tests/unit/engines/gna/matchers/diag_matcher.hpp
index cd6c2469e..e2bb02373 100644
--- a/inference-engine/tests/unit/engines/gna/matchers/diag_matcher.hpp
+++ b/inference-engine/tests/unit/engines/gna/matchers/diag_matcher.hpp
@@ -1,6 +1,34 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
 
 #pragma once
 #include"gna-api.h"
diff --git a/inference-engine/tests/unit/engines/gna/matchers/fill_with_data.hpp b/inference-engine/tests/unit/engines/gna/matchers/fill_with_data.hpp
new file mode 100644
index 000000000..d46ab3038
--- /dev/null
+++ b/inference-engine/tests/unit/engines/gna/matchers/fill_with_data.hpp
@@ -0,0 +1,74 @@
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
+
+ #pragma once
+
+
+class OutputFiller : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+    mutable std::stringstream reason;
+    int32_t fill32BValue;
+    int16_t fill16BValue;
+
+ public:
+    OutputFiller(int32_t fill32BValue, int16_t fill16BValue) : fill32BValue(fill32BValue), fill16BValue(fill16BValue) {}
+
+
+    bool MatchAndExplain(const intel_nnet_type_t* foo, ::testing::MatchResultListener* listener) const override {
+        if (foo == nullptr)
+            return false;
+        reason.str("");
+        // checking pointers are set
+        for (int i=0; i < foo->nLayers; i++) {
+            if (nullptr == foo->pLayers[i].pInputs ||
+                nullptr == foo->pLayers[i].pOutputs) {
+                reason << "input/output pointers in pLayers[" << i << "] shouldn't be null NULL";
+                return false;
+            }
+            auto nElements = foo->pLayers[i].nOutputColumns * foo->pLayers[i].nOutputRows;
+            if (foo->pLayers[i].nBytesPerOutput == 2) {
+                std::fill_n((int16_t *) foo->pLayers[i].pOutputs, nElements, fill16BValue);
+            } else if (foo->pLayers[i].nBytesPerOutput == 4) {
+                std::fill_n((int32_t *) foo->pLayers[i].pOutputs, nElements, fill32BValue);
+            } else {
+                reason << "output bitness of layer [" << i << "] shouldn't be 16 or 32, but was " << foo->pLayers[i].nBytesPerOutput;
+                return false;
+            }
+        }
+        return true;
+    }
+
+    void DescribeTo(::std::ostream *os) const override {
+        *os << "Not a Matcher but a fake, but error happened anyway: " << reason.str();
+    }
+
+};
+
diff --git a/inference-engine/tests/unit/engines/gna/matchers/input_data_matcher.hpp b/inference-engine/tests/unit/engines/gna/matchers/input_data_matcher.hpp
new file mode 100644
index 000000000..f45f9ee46
--- /dev/null
+++ b/inference-engine/tests/unit/engines/gna/matchers/input_data_matcher.hpp
@@ -0,0 +1,69 @@
+#include <utility>
+
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
+
+#pragma once
+
+#include <gmock/gmock-matchers.h>
+#include "nnet_base_matcher.hpp"
+
+class InputDataMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t *> {
+    std::vector<int16_t> refInput;
+public:
+
+    explicit InputDataMatcher(const std::vector<int16_t> &_refInput) : refInput(_refInput) {}
+
+    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+        if (foo->pLayers == nullptr) {
+            *listener << "Address of the first layer descriptor is NULL";
+            return false;
+        }
+        auto firstLayer = foo->pLayers[0];
+        auto actualInput = firstLayer.pInputs;
+        if (!actualInput) {
+            *listener << "Input of the first layer is NULL";
+            return false;
+        }
+
+        auto *actualInputI16 = reinterpret_cast<int16_t *>(actualInput);
+        for (int i = 0; i < refInput.size(); i++) {
+            if (actualInputI16[i] != refInput[i]) {
+                *listener << "Actual and reference value of input doesn't match: " << actualInputI16[i] << " vs "
+                          << refInput[i];
+            }
+        }
+        return true;
+    }
+
+    void DescribeTo(::std::ostream *os) const override {}
+};
diff --git a/inference-engine/tests/unit/engines/gna/matchers/nnet_base_matcher.hpp b/inference-engine/tests/unit/engines/gna/matchers/nnet_base_matcher.hpp
index 7c1f69b15..267777c92 100644
--- a/inference-engine/tests/unit/engines/gna/matchers/nnet_base_matcher.hpp
+++ b/inference-engine/tests/unit/engines/gna/matchers/nnet_base_matcher.hpp
@@ -1,6 +1,34 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
 
 #pragma once
 
diff --git a/inference-engine/tests/unit/engines/gna/matchers/pool_matcher.hpp b/inference-engine/tests/unit/engines/gna/matchers/pool_matcher.hpp
index 009e61c7c..e9b6ae9de 100644
--- a/inference-engine/tests/unit/engines/gna/matchers/pool_matcher.hpp
+++ b/inference-engine/tests/unit/engines/gna/matchers/pool_matcher.hpp
@@ -1,6 +1,34 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
 
 #pragma once
 
diff --git a/inference-engine/tests/unit/engines/gna/matchers/precision_matcher.hpp b/inference-engine/tests/unit/engines/gna/matchers/precision_matcher.hpp
index 9dfdc8780..1d04fadbf 100644
--- a/inference-engine/tests/unit/engines/gna/matchers/precision_matcher.hpp
+++ b/inference-engine/tests/unit/engines/gna/matchers/precision_matcher.hpp
@@ -1,6 +1,34 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
 
 #pragma once
 #include "nnet_base_matcher.hpp"
diff --git a/inference-engine/tests/unit/engines/gna/matchers/pwl_matcher.hpp b/inference-engine/tests/unit/engines/gna/matchers/pwl_matcher.hpp
index 9060cd516..1efba3c5d 100644
--- a/inference-engine/tests/unit/engines/gna/matchers/pwl_matcher.hpp
+++ b/inference-engine/tests/unit/engines/gna/matchers/pwl_matcher.hpp
@@ -1,6 +1,34 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
 
 #pragma once
 #include "nnet_base_matcher.hpp"
diff --git a/inference-engine/tests/unit/engines/gna/matchers/pwl_quantization_metrics_matcher.hpp b/inference-engine/tests/unit/engines/gna/matchers/pwl_quantization_metrics_matcher.hpp
index cccd94069..c55cad898 100644
--- a/inference-engine/tests/unit/engines/gna/matchers/pwl_quantization_metrics_matcher.hpp
+++ b/inference-engine/tests/unit/engines/gna/matchers/pwl_quantization_metrics_matcher.hpp
@@ -1,6 +1,34 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
 
 #pragma once
 #include <cmath>
diff --git a/inference-engine/tests/unit/engines/gna/matchers/weights_matcher.hpp b/inference-engine/tests/unit/engines/gna/matchers/weights_matcher.hpp
new file mode 100644
index 000000000..3c50f85af
--- /dev/null
+++ b/inference-engine/tests/unit/engines/gna/matchers/weights_matcher.hpp
@@ -0,0 +1,212 @@
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
+
+#pragma once
+#include"gna-api.h"
+#include "nnet_base_matcher.hpp"
+#include "quantization/quantization.h"
+
+using TranspozedData = std::tuple<std::vector<uint16_t>*, int, int>;
+
+class TranspozeIterator {
+    std::pair<int, int> dims;
+    int _offset = 0;
+    int _row = 0;
+    int _col = 0;
+    int _outputRow = 0;
+ public :
+    TranspozeIterator(const std::pair<int, int> & dims) : dims(std::move(dims)) {
+    }
+    TranspozeIterator(const TranspozedData & data) : TranspozeIterator({std::get<1>(data), std::get<2>(data)}) {
+    }
+
+    TranspozeIterator operator ++ (int) {
+        TranspozeIterator c(*this);
+        this->operator++();
+        return c;
+   }
+
+   void reset() {
+       _offset = 0;
+       _row = 0;
+       _col = 0;
+       _outputRow = 0;
+    }
+
+   //  prefix form
+   TranspozeIterator& operator ++ () {
+        if (dims.first == 0 || dims.second == 0) {
+            _offset ++;
+        } else {
+            // step over whole row length
+            _row++;
+            // once number of rows hit max value
+            if (_row == dims.second) {
+                // increment offset within row
+                _col++;
+                // restart from first row
+                _row = 0;
+                // restart from next output channel
+                if (_col == dims.first) {
+                    _outputRow++;
+                    _col = 0;
+                }
+            }
+            _offset = _col + _row * dims.first  +  _outputRow * dims.first * dims.second;
+        }
+        return *this;
+    }
+    // getting index
+    operator int() {
+         return _offset;
+    }
+    int row() const noexcept {
+        return _row;
+    }
+    int col() const noexcept {
+        return _col;
+    }
+    int outputRow() const noexcept{
+        return _outputRow;
+    }
+};
+
+class WeightsMatcher : public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+    enum HowMatch{
+        eNone,
+        eEq,
+        eTranspozed
+    } eMatchKind;
+    TranspozedData  transpozedData;
+
+    mutable std::stringstream error;
+    mutable TranspozeIterator iterator;
+    mutable int actual;
+ public:
+    explicit WeightsMatcher(const TranspozedData & data) :
+        eMatchKind(eTranspozed),
+        transpozedData(data),
+        iterator(data) {
+        if (0 == std::get<1>(transpozedData) || 0 == std::get<2>(transpozedData)) {
+            eMatchKind = eEq;
+        }
+    }
+    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+        if (foo == nullptr)
+            return false;
+        iterator.reset();
+
+        for(int i = 0; i < foo->nLayers; i++) {
+            if (foo->pLayers[i].nLayerKind != INTEL_AFFINE &&
+                foo->pLayers[i].nLayerKind != INTEL_AFFINE_DIAGONAL) continue;
+
+            auto affine = (intel_affine_func_t*)foo->pLayers[i].pLayerStruct;
+
+            auto affineWeightsSize = foo->pLayers[i].nOutputRows *
+                foo->pLayers[i].nLayerKind == INTEL_AFFINE_DIAGONAL ? 1 : foo->pLayers[i].nInputRows;
+
+            if (affineWeightsSize != std::get<0>(transpozedData)->size()) {
+                error << "gna-xnn layer(" << i << ") weights size mismatch: expected "
+                      << std::get<0>(transpozedData)->size() << ", but was: " << affineWeightsSize;
+                break;
+            }
+
+            auto pWeights = reinterpret_cast<uint16_t *>(affine->pWeights);
+
+            for (int i = 0; i != affineWeightsSize; i++, iterator++) {
+                auto savedVal = (&std::get<0>(transpozedData)->front())[iterator];
+                if (pWeights[i] != savedVal) {
+                    actual = pWeights[i];
+                    return false;
+                }
+            }
+            return true;
+        }
+        return false;
+    };
+    void DescribeTo(::std::ostream *os) const override {
+        *os << error.str() << std::endl;
+        if (eMatchKind == eEq) {
+            *os << "weights of affine layers are not equal, error at: ";
+        } else {
+            *os << "weights of affine layers are not transpozed, error at: ";
+        }
+        *os << (int)iterator << ", actual=" << actual<<", expected=" << (&std::get<0>(transpozedData)->front())[iterator];
+    }
+};
+
+
+class WeightsSaver: public ::testing::MatcherInterface<const intel_nnet_type_t*> {
+    mutable TranspozeIterator iterator;
+    std::vector<uint16_t>* weights;
+ public:
+    explicit WeightsSaver(TranspozedData data) :
+        weights(std::get<0>(data)), iterator(data) {
+    }
+    bool MatchAndExplain(const intel_nnet_type_t *foo, ::testing::MatchResultListener *listener) const override {
+        if (foo == nullptr)
+            return false;
+        for(int i = 0; i < foo->nLayers; i++) {
+            if (foo->pLayers[i].nLayerKind != INTEL_AFFINE) continue;
+
+            auto affine = (intel_affine_func_t*)foo->pLayers[i].pLayerStruct;
+
+            auto affineWeightsSize = foo->pLayers[i].nOutputRows * foo->pLayers[i].nInputRows;
+            auto pWeights = reinterpret_cast<uint16_t *>(affine->pWeights);
+            weights->resize(affineWeightsSize);
+
+            for (int i=0; i != affineWeightsSize; i++, ++iterator) {
+                (*weights)[i] = pWeights[iterator];
+            }
+
+            return true;
+        }
+        return false;
+    };
+    void DescribeTo(::std::ostream *os) const override {
+        *os << "affine layer not found";
+    }
+};
+
+
+void HasWeightsTranspozed(std::unique_ptr<NNetComponentMatcher>& components,  std::vector<uint16_t>* data, std::pair<int, int> dims) {
+    components->add(new WeightsMatcher(make_tuple(data, dims.first, dims.second)));
+}
+
+void HasWeightsEq(std::unique_ptr<NNetComponentMatcher>& components,  std::vector<uint16_t>* data) {
+    components->add(new WeightsMatcher(make_tuple(data, 0, 0)));
+}
+
+void SaveWeights(std::unique_ptr<NNetComponentMatcher>& components,  std::vector<uint16_t>* data, std::pair<int, int> dims) {
+    components->add(new WeightsSaver(make_tuple(data, dims.first, dims.second)));
+}
+
diff --git a/inference-engine/tests/unit/engines/gna/test_irs.cpp b/inference-engine/tests/unit/engines/gna/test_irs.cpp
index f9a035341..0ab9a0721 100644
--- a/inference-engine/tests/unit/engines/gna/test_irs.cpp
+++ b/inference-engine/tests/unit/engines/gna/test_irs.cpp
@@ -1,6 +1,34 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
 
 #include "test_irs.hpp"
 
@@ -342,7 +370,7 @@ std::string eltwiseToMemoryModel() {
 
 std::string activationAfterSplitModel() {
     return R"V0G0N(
-    <Net Name="activationAfterSplit" version="2" precision="FP32" batch="1">
+    <net Name="activationAfterSplit" version="2" precision="FP32" batch="1">
         <layers>
             <layer name="input_1" type="input" id="0" precision="FP32">
                 <output>
@@ -420,7 +448,7 @@ std::string activationAfterSplitModel() {
             <edge from-layer="12" from-port="2" to-layer="38" to-port="82" />
             <edge from-layer="38" from-port="83" to-layer="11" to-port="1" />
         </edges>
-    </Net>
+    </net>
     )V0G0N";
 }
 
@@ -505,6 +533,104 @@ std::string FCWithPaddingAfterSplitModel() {
     )V0G0N";
 }
 
+std::string FCBeforeSplitModel() {
+    return R"V0G0N(
+    <Net Name="FCBeforeSplitModel" version="2" precision="FP32" batch="1">
+        <layers>
+            <layer name="input_1" type="input" id="0" precision="FP32">
+                <output>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>20</dim>
+                    </port>
+                </output>
+            </layer>
+            <layer name="FullyConnected_1" id="1" type="InnerProduct" precision="FP32">
+                <fc out-size="20" />
+                <biases offset="0" size="80" />
+                <weights offset="80" size="1600" />
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>20</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>20</dim>
+                    </port>
+                </output>
+            </layer>
+            <layer name="Split_1" type="Split" id="2" precision="FP32">
+                <data axis="1" />
+                <input>
+                    <port id="0">
+                        <!--connected to input-->
+                        <dim>1</dim>
+                        <dim>20</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <!--connected to eltwise-->
+                        <dim>1</dim>
+                        <dim>10</dim>
+                    </port>
+                    <port id="2">
+                        <!--connected to fc-->
+                        <dim>1</dim>
+                        <dim>10</dim>
+                    </port>
+                </output>
+            </layer>
+            <layer name="FullyConnected_2" id="11" type="InnerProduct" precision="FP32">
+                <fc out-size="10" />
+                <biases offset="1600" size="40" />
+                <weights offset="1640" size="400" />
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>10</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>10</dim>
+                    </port>
+                </output>
+            </layer>
+            <layer name="Eltwise_8" type="Eltwise" id="21" precision="FP32">
+                <data operation="sum" />
+                <input>
+                    <port id="0">
+                        <dim>1</dim>
+                        <dim>10</dim>
+                    </port>
+                    <port id="1">
+                        <dim>1</dim>
+                        <dim>10</dim>
+                    </port>
+                </input>
+                <output>
+                    <port id="2">
+                        <dim>1</dim>
+                        <dim>10</dim>
+                    </port>
+                </output>
+            </layer>
+        </layers>
+        <edges>
+            <edge from-layer="0" from-port="0" to-layer="1" to-port="0" />
+            <edge from-layer="1" from-port="1" to-layer="2" to-port="0" />
+            <edge from-layer="2" from-port="1" to-layer="21" to-port="0" />
+            <edge from-layer="2" from-port="2" to-layer="11" to-port="0" />
+            <edge from-layer="11" from-port="1" to-layer="21" to-port="1" />
+        </edges>
+    </Net>
+    )V0G0N";
+}
 std::string twoFCWithPaddingAfterSliceModel() {
     return R"V0G0N(
     <Net Name="twoFCWithPaddingAfterSliceModel" version="2" precision="FP32" batch="1">
@@ -1803,6 +1929,7 @@ std::string TFLeakyReluModel() {
     </net>
     )V0G0N";
 }
+
 std::string maxpoolAfterRelu() {
     return R"V0G0N(
 <?xml version="1.0" ?>
@@ -2319,6 +2446,7 @@ std::string doubleConcatModel() {
     )V0G0N";
 }
 
+
 std::string cropWithoutOffsetModel() {
     return R"V0G0N(
     <Net Name="cropWithoutOffsetModel" version="2" precision="FP32" batch="1">
@@ -2675,4 +2803,498 @@ std::string copyModel() {
     </Net>
     )V0G0N";
 }
+
+std::string two_inputs_to_concat() {
+    return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="N" version="2">
+	<layers>
+		<layer id="0" name="input_1" precision="FP32" type="input">
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>600</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="1" name="input_2" precision="FP32" type="input">
+			<output>
+				<port id="2">
+					<dim>1</dim>
+					<dim>600</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="2" name="concat" precision="FP32" type="Concat">
+			<data out-size="600"/>
+			<input>
+				<port id="3">
+					<dim>1</dim>
+					<dim>600</dim>
+				</port>
+			</input>
+			<input>
+				<port id="4">
+					<dim>1</dim>
+					<dim>600</dim>
+				</port>
+			</input>
+			<output>
+				<port id="5">
+					<dim>1</dim>
+					<dim>1200</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="5" name="tanh_6" precision="FP32" type="Activation">
+			<data type="tanh"/>
+			<input>
+				<port id="10">
+					<dim>1</dim>
+					<dim>600</dim>
+				</port>
+			</input>
+			<output>
+				<port id="11">
+					<dim>1</dim>
+					<dim>600</dim>
+				</port>
+			</output>
+		</layer>
+	</layers>
+	<edges>
+		<edge from-layer="0" from-port="1" to-layer="2" to-port="3"/>
+		<edge from-layer="1" from-port="2" to-layer="2" to-port="4"/>
+		<edge from-layer="2" from-port="5" to-layer="5" to-port="10"/>
+	</edges>
+</net>
+    )V0G0N";
+
+}
+
+std::string two_inputs_to_affine() {
+    return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="" version="2">
+	<layers>
+		<layer id="0" name="input_1" precision="FP32" type="input">
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>10</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="1" name="input_2" precision="FP32" type="input">
+			<output>
+				<port id="2">
+					<dim>1</dim>
+					<dim>10</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="2" name="affinetransform_3" precision="FP32" type="FullyConnected">
+			<data out-size="10"/>
+			<input>
+				<port id="3">
+					<dim>1</dim>
+					<dim>10</dim>
+				</port>
+			</input>
+			<output>
+				<port id="4">
+					<dim>1</dim>
+					<dim>10</dim>
+				</port>
+			</output>
+			<blobs>
+				<weights offset="0" size="400"/>
+			</blobs>
+		</layer>
+		<layer id="3" name="affinetransform_4" precision="FP32" type="FullyConnected">
+			<data out-size="600"/>
+			<input>
+				<port id="5">
+					<dim>1</dim>
+					<dim>10</dim>
+				</port>
+			</input>
+			<output>
+				<port id="6">
+					<dim>1</dim>
+					<dim>10</dim>
+				</port>
+			</output>
+			<blobs>
+				<weights offset="400" size="400"/>
+			</blobs>
+		</layer>
+		<layer id="4" name="add_5" precision="FP32" type="Eltwise">
+			<data operation="sum"/>
+			<input>
+				<port id="7">
+					<dim>1</dim>
+					<dim>10</dim>
+				</port>
+				<port id="8">
+					<dim>1</dim>
+					<dim>10</dim>
+				</port>
+			</input>
+			<output>
+				<port id="9">
+					<dim>1</dim>
+					<dim>10</dim>
+				</port>
+			</output>
+		</layer>
+	</layers>
+	<edges>
+		<edge from-layer="0" from-port="1" to-layer="2" to-port="3"/>
+		<edge from-layer="1" from-port="2" to-layer="3" to-port="5"/>
+		<edge from-layer="2" from-port="4" to-layer="4" to-port="7"/>
+		<edge from-layer="3" from-port="6" to-layer="4" to-port="8"/>
+	</edges>
+</net>
+    )V0G0N";
+
+}
+
+
+std::string affineAfterConvNoPermute() {
+    return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model" version="2">
+	<layers>
+		<layer id="0" name="Placeholder" precision="FP32" type="Input">
+			<output>
+				<port id="0">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>126</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="1" name="conv1" precision="FP32" type="Convolution">
+			<data dilation-x="1" dilation-y="1" group="1" kernel-x="5" kernel-y="1" output="128" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>126</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>122</dim>
+				</port>
+			</output>
+			<blobs>
+				<weights offset="0" size="327680"/>
+				<biases offset="327680" size="512"/>
+			</blobs>
+		</layer>
+		<layer id="2" name="conv1_node/Relu" precision="FP32" type="ReLU">
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>122</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>122</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="3" name="pool1_node/MaxPool" precision="FP32" type="Pooling">
+			<data exclude-pad="true" kernel-x="2" kernel-y="1" pad-x="0" pad-y="0" pool-method="max" stride="1,1,1,2" stride-x="2" stride-y="1"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>122</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>61</dim>
+				</port>
+			</output>
+		</layer>
+
+        <layer id="4" name="Reshape_3" precision="FP32" type="Reshape">
+            <data axis="0" dim="1,7808" num_axes="-1"/>
+            <input>
+            <port id="0">
+                <dim>1</dim>
+                <dim>128</dim>
+                <dim>1</dim>
+                <dim>61</dim>
+            </port>
+            </input>
+            <output>
+            <port id="1">
+                <dim>1</dim>
+                <dim>7808</dim>
+            </port>
+            </output>
+        </layer>
+
+        <layer name="FullyConnected" id="5" type="InnerProduct" precision="FP32">
+
+            <fc out-size="10" />
+
+            <biases offset="328192" size="40" />
+            <weights offset="328232" size="312320" />
+
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>7808</dim>
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+        </layer>
+        </layers>
+        <edges>
+		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+		<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
+		<edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
+        <edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
+        <edge from-layer="4" from-port="1" to-layer="5" to-port="0"/>
+        </edges>
+    </net>
+
+    )V0G0N";
+}
+
+std::string affineAfterConvWithPermute() {
+    return R"V0G0N(
+<?xml version="1.0" ?>
+<net batch="1" name="model" version="2">
+	<layers>
+		<layer id="0" name="Placeholder" precision="FP32" type="Input">
+			<output>
+				<port id="0">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>126</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="1" name="conv1" precision="FP32" type="Convolution">
+			<data dilation-x="1" dilation-y="1" group="1" kernel-x="5" kernel-y="1" output="128" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>126</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>122</dim>
+				</port>
+			</output>
+			<blobs>
+				<weights offset="0" size="327680"/>
+				<biases offset="327680" size="512"/>
+			</blobs>
+		</layer>
+		<layer id="2" name="conv1_node/Relu" precision="FP32" type="ReLU">
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>122</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>122</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="3" name="pool1_node/MaxPool" precision="FP32" type="Pooling">
+			<data exclude-pad="true" kernel-x="2" kernel-y="1" pad-x="0" pad-y="0" pool-method="max" stride="1,1,1,2" stride-x="2" stride-y="1"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>122</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>61</dim>
+				</port>
+			</output>
+		</layer>
+
+		<layer id="4" name="maxpoolingcomponent32/Permute" precision="FP32" type="Permute">
+			<data order="0,3,2,1"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>128</dim>
+					<dim>1</dim>
+					<dim>61</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>61</dim>
+					<dim>1</dim>
+					<dim>128</dim>
+				</port>
+			</output>
+		</layer>
+
+        <layer id="5" name="Reshape_3" precision="FP32" type="Reshape">
+            <data axis="0" dim="1,7808" num_axes="-1"/>
+            <input>
+            <port id="0">
+                <dim>1</dim>
+                <dim>61</dim>
+                <dim>1</dim>
+                <dim>128</dim>
+            </port>
+            </input>
+            <output>
+            <port id="1">
+                <dim>1</dim>
+                <dim>7808</dim>
+            </port>
+            </output>
+        </layer>
+
+        <layer name="FullyConnected" id="6" type="InnerProduct" precision="FP32">
+
+            <fc out-size="10" />
+
+            <biases offset="328192" size="40" />
+            <weights offset="328232" size="312320" />
+
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>7808</dim>
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>10</dim>
+                </port>
+            </output>
+        </layer>
+        </layers>
+        <edges>
+		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+		<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
+		<edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
+        <edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
+        <edge from-layer="4" from-port="1" to-layer="5" to-port="0"/>
+        <edge from-layer="5" from-port="1" to-layer="6" to-port="0"/>
+        </edges>
+    </net>
+
+    )V0G0N";
+}
+
+
+
+std::string ScaleShift3DModel() {
+    return R"V0G0N(
+   <?xml version="1.0" ?>
+<net batch="1" name="frozen_model" version="4">
+	<layers>
+		<layer id="0" name="reshape_1_input" precision="FP32" type="Input">
+			<output>
+				<port id="0">
+					<dim>1</dim>
+					<dim>40</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="1" name="conv1d_1/convolution/Squeeze" precision="FP32" type="Reshape">
+			<data dim="1,5,8"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>40</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>5</dim>
+					<dim>8</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="2" name="conv1d_1/add" precision="FP32" type="ScaleShift">
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>5</dim>
+					<dim>8</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>5</dim>
+					<dim>8</dim>
+				</port>
+			</output>
+			<blobs>
+				<weights offset="0" size="32"/>
+				<biases offset="32" size="32"/>
+			</blobs>
+		</layer>
+	</layers>
+	<edges>
+		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+		<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
+	</edges>
+</net>
+
+    )V0G0N";
+}
+
 }  // namespace GNATestIRs
diff --git a/inference-engine/tests/unit/engines/gna/test_irs.hpp b/inference-engine/tests/unit/engines/gna/test_irs.hpp
index c7b4b0c66..c0194dc05 100644
--- a/inference-engine/tests/unit/engines/gna/test_irs.hpp
+++ b/inference-engine/tests/unit/engines/gna/test_irs.hpp
@@ -1,6 +1,34 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
+/*
+ * INTEL CONFIDENTIAL
+ * Copyright (C) 2018-2019 Intel Corporation.
+ *
+ * The source code contained or described herein and all documents
+ * related to the source code ("Material") are owned by Intel Corporation
+ * or its suppliers or licensors. Title to the Material remains with
+ * Intel Corporation or its suppliers and licensors. The Material may
+ * contain trade secrets and proprietary and confidential information
+ * of Intel Corporation and its suppliers and licensors, and is protected
+ * by worldwide copyright and trade secret laws and treaty provisions.
+ * No part of the Material may be used, copied, reproduced, modified,
+ * published, uploaded, posted, transmitted, distributed, or disclosed
+ * in any way without Intel's prior express written permission.
+ *
+ * No license under any patent, copyright, trade secret or other
+ * intellectual property right is granted to or conferred upon you by
+ * disclosure or delivery of the Materials, either expressly, by implication,
+ * inducement, estoppel or otherwise. Any license under such intellectual
+ * property rights must be express and approved by Intel in writing.
+ *
+ * Include any supplier copyright notices as supplier requires Intel to use.
+ *
+ * Include supplier trademarks or logos as supplier requires Intel to use,
+ * preceded by an asterisk. An asterisked footnote can be added as follows:
+ * *Third Party trademarks are the property of their respective owners.
+ *
+ * Unless otherwise agreed by Intel in writing, you may not remove or alter
+ * this notice or any other notice embedded in Materials by Intel or Intel's
+ * suppliers or licensors in any way.
+ */
 
 #pragma once
 
@@ -17,6 +45,7 @@ std::string activationAfterSplitModel();
 std::string FCWithPaddingAfterSplitModel();
 std::string SliceModelWithAlignedOutputs();
 std::string FCWithPaddingAfterSliceModel();
+std::string FCBeforeSplitModel();
 std::string twoFCWithPaddingAfterSliceModel();
 std::string eltwiseSummModel();
 std::string eltwiseMulModel();
@@ -40,4 +69,9 @@ std::string cropWithOffsetModel();
 std::string cropWithMaxOffsetModel();
 std::string cropWithOffsetExtendedModel();
 std::string copyModel();
+std::string two_inputs_to_affine();
+std::string two_inputs_to_concat();
+std::string affineAfterConvNoPermute();
+std::string affineAfterConvWithPermute();
+std::string ScaleShift3DModel();
 }  // namespace GNATestIRs
diff --git a/inference-engine/tests/unit/engines/mkldnn/constant_propagation_test.cpp b/inference-engine/tests/unit/engines/mkldnn/constant_propagation_test.cpp
index 5d817f8b1..c0c25ebcd 100644
--- a/inference-engine/tests/unit/engines/mkldnn/constant_propagation_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/constant_propagation_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/convert_desc_test.cpp b/inference-engine/tests/unit/engines/mkldnn/convert_desc_test.cpp
index ddd244489..e4fa4fc85 100644
--- a/inference-engine/tests/unit/engines/mkldnn/convert_desc_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/convert_desc_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/dump_test.cpp b/inference-engine/tests/unit/engines/mkldnn/dump_test.cpp
index 042f7ac9f..25ec76c48 100644
--- a/inference-engine/tests/unit/engines/mkldnn/dump_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/dump_test.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include <gtest/gtest.h>
diff --git a/inference-engine/tests/unit/engines/mkldnn/dumper_test.cpp b/inference-engine/tests/unit/engines/mkldnn/dumper_test.cpp
index 383a1e7f1..0fc2eff81 100644
--- a/inference-engine/tests/unit/engines/mkldnn/dumper_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/dumper_test.cpp
@@ -1,5 +1,17 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
+//
+// Copyright 2016-2018 Intel Corporation.
+//
+// This software and the related documents are Intel copyrighted materials,
+// and your use of them is governed by the express license under which they
+// were provided to you (End User License Agreement for the Intel(R) Software
+// Development Products (Version May 2017)). Unless the License provides
+// otherwise, you may not use, modify, copy, publish, distribute, disclose or
+// transmit this software or the related documents without Intel's prior
+// written permission.
+//
+// This software and the related documents are provided as is, with no
+// express or implied warranties, other than those that are expressly
+// stated in the License.
 //
 
 #include <gtest/gtest.h>
@@ -29,7 +41,7 @@ public:
             "SomeNet", {2,3,16,16}, "FP32")) {
         using prm_t = map<string, string>;
 
-        testing::InOutData inout = {{{2,3,16,16}},{{2,16,16,16}}};
+        testing::InOutShapes inout = {{{2,3,16,16}},{{2,16,16,16}}};
 
         prm_t conv_prm = {
                 {"stride-x", std::to_string(1)},
@@ -96,4 +108,4 @@ TEST(MKLDNNLayersTests, DumpSimpleGraphToDot) {
     ASSERT_EQ(std::count(dot.begin(), dot.end(), '['), 10); // 4-node 3-data 3-shape
     ASSERT_EQ(std::count(dot.begin(), dot.end(), ']'), 10);
     ASSERT_EQ(std::count(dot.begin(), dot.end(), '>'), 6); // connection
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp
new file mode 100644
index 000000000..d1224503a
--- /dev/null
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/depth_to_space_tests.cpp
@@ -0,0 +1,525 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+struct depth_to_space_test_params {
+    InferenceEngine::SizeVector in_shape;
+    size_t block_size;
+    InferenceEngine::SizeVector out_shape;
+
+    std::vector<float> reference;
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+void ref_depth_to_space(
+    InferenceEngine::TBlob<float> &src,
+    InferenceEngine::TBlob<float> &dst,
+    size_t block_size
+) {
+    size_t i;
+    const float *src_data = src.data();
+    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
+    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
+    float* dst_data = dst.data();
+    InferenceEngine::SizeVector dst_dims = dst.getTensorDesc().getDims();
+    InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
+
+    if (src_dims.size() < 3)
+        FAIL() << " Incorrect number of input dimensions!";
+
+    if (dst_dims.size() < 2)
+        FAIL() << " Incorrect number of output dimensions!";
+
+    if (block_size == 0)
+        FAIL() << " Incorrect block_size parameter is zero!";
+
+    if (src_dims[src_dims.size() - 3] % (block_size * block_size))
+        FAIL() << " block_size parameter is incompatible with input tensor Color dimension size!";
+
+    if (dst_dims.size() > 2 && src_dims[src_dims.size() - 3] != (dst_dims[dst_dims.size() - 3] * block_size * block_size))
+        FAIL() << " Input/Output tensor Color dimension is incompatible with block_size!";
+
+    if (dst_dims[dst_dims.size() - 2] != (src_dims[src_dims.size() - 2] * block_size))
+        FAIL() << " Input/Output tensor Height dimension is incompatible with block_size!";
+
+    if (dst_dims[dst_dims.size() - 1] != (src_dims[src_dims.size() - 1] * block_size))
+        FAIL() << " Input/Output tensor Width dimension is incompatible with block_size!";
+
+    size_t X = 1;
+    for (i = 0; i < (src_dims.size() - 3); i++)
+        X *= src_dims[i];
+
+    size_t C = src_dims[src_dims.size() - 3];
+    size_t H = src_dims[src_dims.size() - 2];
+    size_t W = src_dims[src_dims.size() - 1];
+
+    for (size_t x = 0, k = 0; x < X; ++x) {
+        for (size_t h = 0; h < H; ++h) {
+            for (size_t c = 0; c < C; c += block_size) {
+                for (size_t w = 0; w < W; ++w) {
+                    for (size_t b = 0; b < block_size; ++b) {
+                        size_t idx = x * C*H*W + (c + b) * H*W + h * W + w;
+                        dst_data[k++] = src_data[idx];
+                    }
+                }
+            }
+        }
+    }
+}
+
+void ref_space_to_depth(
+    InferenceEngine::TBlob<float> &src,
+    InferenceEngine::TBlob<float> &dst,
+    size_t block_size
+) {
+    size_t i;
+    const float *src_data = src.data();
+    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
+    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
+    float* dst_data = dst.data();
+    InferenceEngine::SizeVector dst_dims = dst.getTensorDesc().getDims();
+    InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
+
+    if (dst_dims.size() < 3)
+        FAIL() << " Incorrect number of output dimensions!";
+
+    if (src_dims.size() < 2)
+        FAIL() << " Incorrect number of input dimensions!";
+
+    if (block_size == 0)
+        FAIL() << " Incorrect block_size parameter is zero!";
+
+    if (dst_dims[dst_dims.size() - 3] % (block_size * block_size))
+        FAIL() << " block_size parameter is incompatible with input tensor Color dimension size!";
+
+    if (src_dims.size() > 2 && dst_dims[dst_dims.size() - 3] != (src_dims[dst_dims.size() - 3] * block_size * block_size))
+        FAIL() << " Input/Output tensor Color dimension is incompatible with block_size!";
+
+    if (src_dims[src_dims.size() - 2] != (dst_dims[dst_dims.size() - 2] * block_size))
+        FAIL() << " Input/Output tensor Height dimension is incompatible with block_size!";
+
+    if (src_dims[src_dims.size() - 1] != (dst_dims[dst_dims.size() - 1] * block_size))
+        FAIL() << " Input/Output tensor Width dimension is incompatible with block_size!";
+
+    size_t X = 1;
+    for (i = 0; i < (dst_dims.size() - 3); i++)
+        X *= dst_dims[i];
+
+    size_t C = dst_dims[dst_dims.size() - 3];
+    size_t H = dst_dims[dst_dims.size() - 2];
+    size_t W = dst_dims[dst_dims.size() - 1];
+
+    for (size_t x = 0, k = 0; x < X; ++x) {
+        for (size_t h = 0; h < H; ++h) {
+            for (size_t c = 0; c < C; c += block_size) {
+                for (size_t w = 0; w < W; ++w) {
+                    for (size_t b = 0; b < block_size; ++b) {
+                        size_t idx = x * C*H*W + (c + b) * H*W + h * W + w;
+                        dst_data[idx] = src_data[k++];
+                    }
+                }
+            }
+        }
+    }
+}
+
+class MKLDNNCPUExtDepthToSpaceTests : public TestsCommon, public WithParamInterface<depth_to_space_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="DepthToSpace_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="input" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IN_
+                </port>
+            </output>s
+        </layer>
+        <layer name="output" id="2" type="DepthToSpace" precision="FP32">
+            <data block_size="_BS_"/>
+            <input>
+                <port id="1">
+                    _IN_
+                </port>
+           </input>
+            <output>
+                <port id="2">
+                    _OUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(depth_to_space_test_params p) {
+        std::string model = model_t;
+        std::string in_shape, out_shape;
+
+        for (size_t i = 0; i < p.in_shape.size(); i++) {
+            in_shape += "<dim>";
+            in_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
+        }
+        for (size_t i = 0; i < p.out_shape.size(); i++) {
+            out_shape += "<dim>";
+            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_IN_", in_shape);
+        REPLACE_WITH_STR(model, "_OUT_", out_shape);
+        REPLACE_WITH_NUM(model, "_BS_", p.block_size);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            depth_to_space_test_params p = ::testing::WithParamInterface<depth_to_space_test_params>::GetParam();
+            std::string model = getModel(p);
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            // Output Reference
+            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+            dst_ref.allocate();
+
+            // Input Data
+            InferenceEngine::Blob::Ptr src;
+            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
+            src->allocate();
+            fill_data_dbgval(src->buffer(), src->size());
+            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
+            if (srcPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            // Check results
+            InferenceEngine::SizeVector out_dims;
+            ref_depth_to_space(*srcPtr, dst_ref, p.block_size);
+
+            //  Check results
+            if(p.reference.size())
+                if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
+                    FAIL() << "Wrong result with compare TF reference!";
+
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+
+            // Infer
+            graph.Infer(srcs, outputBlobs);
+            compare(*output, dst_ref);
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+class MKLDNNCPUExtSpaceToDepthTests : public TestsCommon, public WithParamInterface<depth_to_space_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="SpaceToDepth_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="input" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IN_
+                </port>
+            </output>s
+        </layer>
+        <layer name="output" id="2" type="SpaceToDepth" precision="FP32">
+            <data block_size="_BS_"/>
+            <input>
+                <port id="1">
+                    _IN_
+                </port>
+           </input>
+            <output>
+                <port id="2">
+                    _OUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(depth_to_space_test_params p) {
+        std::string model = model_t;
+        std::string in_shape, out_shape;
+
+        for (size_t i = 0; i < p.out_shape.size(); i++) {
+            in_shape += "<dim>";
+            in_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
+        }
+        for (size_t i = 0; i < p.in_shape.size(); i++) {
+            out_shape += "<dim>";
+            out_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_IN_", in_shape);
+        REPLACE_WITH_STR(model, "_OUT_", out_shape);
+        REPLACE_WITH_NUM(model, "_BS_", p.block_size);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            depth_to_space_test_params p = ::testing::WithParamInterface<depth_to_space_test_params>::GetParam();
+            std::string model = getModel(p);
+            //std::cout << model;
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*) {}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            // Output Reference
+            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+            dst_ref.allocate();
+
+            // Input Data
+            InferenceEngine::Blob::Ptr src;
+            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.out_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.out_shape) });
+            src->allocate();
+            if (p.reference.size())
+                memcpy(static_cast<float*>(src->buffer()), &p.reference[0], sizeof(float)*p.reference.size());
+            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
+            if (srcPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            // Check results
+            InferenceEngine::SizeVector out_dims;
+            ref_space_to_depth(*srcPtr, dst_ref, p.block_size);
+
+            //  Check results
+            if (p.reference.size()) {
+            //    fill_data_dbgval(src->buffer(), src->size());
+            //    if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
+            //        FAIL() << "Wrong result with compare TF reference!";
+            }
+
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+
+            // Infer
+            graph.Infer(srcs, outputBlobs);
+            compare(*output, dst_ref);
+        }
+        catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+
+
+class MKLDNNCPUExtDepthToSpaceToDepthTests : public TestsCommon, public WithParamInterface<depth_to_space_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="DepthToSpaceToDepth_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="input" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IN_
+                </port>
+            </output>s
+        </layer>
+        <layer name="intermediate" id="2" type="DepthToSpace" precision="FP32">
+            <data block_size="_BS_"/>
+            <input>
+                <port id="1">
+                    _IN_
+                </port>
+           </input>
+            <output>
+                <port id="2">
+                    _OUT_
+                </port>
+            </output>
+        </layer>
+        <layer name="output" id="3" type="SpaceToDepth" precision="FP32">
+            <data block_size="_BS_"/>
+            <input>
+                <port id="1">
+                    _OUT_
+                </port>
+           </input>
+            <output>
+                <port id="2">
+                    _IN_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="3" to-port="1"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(depth_to_space_test_params p) {
+        std::string model = model_t;
+        std::string in_shape, out_shape;
+
+        for (size_t i = 0; i < p.in_shape.size(); i++) {
+            in_shape += "<dim>";
+            in_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
+        }
+        for (size_t i = 0; i < p.out_shape.size(); i++) {
+            out_shape += "<dim>";
+            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_IN_", in_shape);
+        REPLACE_WITH_STR(model, "_OUT_", out_shape);
+        REPLACE_WITH_NUM(model, "_BS_", p.block_size);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            depth_to_space_test_params p = ::testing::WithParamInterface<depth_to_space_test_params>::GetParam();
+            std::string model = getModel(p);
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*) {}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            // Input Data
+            InferenceEngine::Blob::Ptr src;
+            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
+            src->allocate();
+            fill_data_dbgval(src->buffer(), src->size());
+            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
+            if (srcPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+
+            // Infer
+            graph.Infer(srcs, outputBlobs);
+            compare(*output, *src);
+        }
+        catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNCPUExtDepthToSpaceTests, TestsDepthToSpace) {}
+//  Test data vectors
+static std::vector<float> test0 = { 0.f, 6.f, 1.f, 7.f, 2.f, 8.f, 12.f, 18.f, 13.f, 19.f, 14.f, 20.f, 3.f, 9.f, 4.f, 10.f, 5.f, 11.f, 15.f, 21.f, 16.f, 22.f, 17.f, 23.f};
+INSTANTIATE_TEST_CASE_P(
+    TestsDepthToSpace, MKLDNNCPUExtDepthToSpaceTests,
+            ::testing::Values(
+// Params: in_shape, block_size, out_shape, reference
+                depth_to_space_test_params{ { 1, 4, 2, 3 }, 2, { 1, 1, 4, 6 }, test0 },
+                depth_to_space_test_params{ { 4, 2, 3 }, 2, { 1, 1, 4, 6 }, test0 },
+                depth_to_space_test_params{ { 1, 4, 2, 3 }, 2, { 4, 6 }, test0 },
+                depth_to_space_test_params{ { 4, 2, 3 }, 2, { 4, 6 }, test0 },
+                depth_to_space_test_params{ { 5, 4, 2, 3 }, 2, { 5, 1, 4, 6 }, test0 },
+                depth_to_space_test_params{ { 2, 3, 5, 4, 2, 3 }, 2, { 2, 3, 5, 1, 4, 6 }, test0 }
+));
+
+
+TEST_P(MKLDNNCPUExtDepthToSpaceToDepthTests, TestsDepthToSpaceToDepth) {}
+INSTANTIATE_TEST_CASE_P(
+    TestsDepthToSpaceToDepth, MKLDNNCPUExtDepthToSpaceToDepthTests,
+    ::testing::Values(
+        // Params: in_shape, block_size, out_shape, reference
+        depth_to_space_test_params{ { 1, 9, 2, 3 }, 3,{ 1, 1, 6, 9 },{} },
+        depth_to_space_test_params{ { 16, 2, 3 }, 4,{ 1, 1, 8, 12 },{} },
+        depth_to_space_test_params{ { 1, 25, 4, 3 }, 5,{ 20, 15 },{} },
+        depth_to_space_test_params{ { 72, 10, 3 }, 6,{ 2, 60, 18 },{} },
+        depth_to_space_test_params{ { 5, 8, 2, 3 }, 2,{ 5, 2, 4, 6 },{} },
+        depth_to_space_test_params{ { 2, 3, 5, 16, 2, 3 }, 2,{ 2, 3, 5, 4, 4, 6 },{} }
+));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/expand_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/expand_tests.cpp
new file mode 100644
index 000000000..4db82c952
--- /dev/null
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/expand_tests.cpp
@@ -0,0 +1,265 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+struct expand_test_params {
+    std::string                 precision;
+    InferenceEngine::SizeVector in_shape;
+    InferenceEngine::SizeVector out_shape;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+
+template <typename data_t>
+void ref_expand(InferenceEngine::TBlob<data_t> &src, InferenceEngine::TBlob<data_t> &dst) {
+    size_t i;
+    const data_t *src_data = src.data();
+    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
+    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
+    data_t* dst_data = dst.data();
+    InferenceEngine::SizeVector dst_dims = dst.getTensorDesc().getDims();
+    InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
+
+    if (src_dims.size() > dst_dims.size())
+        FAIL() << "Output tensor dimension is smaller then input tensor dimension";
+
+    size_t prefix_size = dst_dims.size() - src_dims.size();
+    for (i = 0; i < src_dims.size(); i++) {
+        if (src_dims[i] != 1 && src_dims[i] != dst_dims[i + prefix_size])
+            FAIL() << "In/Output corresponding dimension must have the same value, or Input dimension is equal to 1";
+    }
+
+    InferenceEngine::SizeVector src_aligned(dst_dims.size());
+    InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size());
+    for (i = 0; i < dst_dims.size(); i++) {
+        if (i < prefix_size) {
+            src_aligned[i] = 1;
+            srcStrides_aligned[i] = srcStrides[0];
+        } else {
+            src_aligned[i] = src_dims[i - prefix_size];
+            srcStrides_aligned[i] = srcStrides[i - prefix_size];
+        }
+    }
+
+    size_t src_idx, work_amount_dst = dstStrides[0] * dst_dims[0];
+    InferenceEngine::SizeVector counters(dst_dims.size(), 0);
+
+    for (size_t iwork = 0; iwork < work_amount_dst; ++iwork) {
+        for (i = 0, src_idx = 0; i < dst_dims.size(); ++i)
+            src_idx += counters[i] ? ((counters[i] % src_aligned[i]) * srcStrides_aligned[i]) : 0;
+
+        dst_data[iwork] = src_data[src_idx];
+
+        for (int j = dst_dims.size() - 1; j >= 0; j--) {
+            counters[j] = (counters[j] + 1) % dst_dims[j];
+            if (counters[j] != 0) break;
+        }
+    }
+}
+
+
+class MKLDNNCPUExtExpandTests : public TestsCommon, public WithParamInterface<expand_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="Expand_net" version="2" precision="_IIDXP_" batch="1">
+    <layers>
+        <layer name="input" type="Input" precision="_IIDXP_" id="1">
+            <output>
+                <port id="1">
+                    _IN_
+                </port>
+            </output>
+        </layer>
+        <layer name="shape" type="Input" precision="I32" id="2">
+            <output>
+                <port id="2">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="output" id="2" type="Expand" precision="_IIDXP_">
+            <data/>
+            <input>
+                <port id="1">
+                    _IN_
+                </port>
+                <port id="2">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </input>
+            <output>
+                <port id="3">
+                    _OUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(expand_test_params p) {
+        std::string model = model_t;
+        std::string in_shape;
+        std::string out_shape;
+
+        REPLACE_WITH_STR(model, "_IIDXP_", p.precision);
+        for (size_t i = 0; i < p.in_shape.size(); i++) {
+            in_shape += "<dim>";
+            in_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_IN_", in_shape);
+        for (size_t i = 0; i < p.out_shape.size(); i++) {
+            out_shape += "<dim>";
+            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_OUT_", out_shape);
+        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.out_shape.size());
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            expand_test_params p = ::testing::WithParamInterface<expand_test_params>::GetParam();
+            std::string model = getModel(p);
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*) {}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            // Input Data
+            InferenceEngine::Blob::Ptr dims;
+            InferenceEngine::SizeVector vector_dim(1, p.out_shape.size());
+            dims = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, vector_dim, InferenceEngine::TensorDesc::getLayoutByDims(vector_dim) });
+            dims->allocate();
+            for (size_t i = 0; i < p.out_shape.size(); i++) {
+                static_cast<int32_t*>(dims->buffer())[i] = static_cast<int32_t>(p.out_shape[i]);
+            }
+            auto * dimsPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(dims.get());
+            if (dimsPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+            InferenceEngine::BlobMap srcs;
+            InferenceEngine::Blob::Ptr src;
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+            if (p.precision == "I32") {
+                src = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
+                src->allocate();
+                for (size_t i = 0; i < src->size(); i++)
+                    static_cast<int32_t*>(src->buffer())[i] = static_cast<int32_t>(i);
+                auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(src.get());
+                if (srcPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("shape", dims));
+
+                // Output Blob
+                InferenceEngine::TBlob<int32_t>::Ptr output;
+                output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
+                output->allocate();
+                outputBlobs[item.first] = output;
+
+                // Output Reference
+                InferenceEngine::TBlob<int32_t> dst_ref(item.second->getTensorDesc());
+                dst_ref.allocate();
+                ref_expand(*srcPtr, dst_ref);
+
+                // Infer
+                graph.Infer(srcs, outputBlobs);
+                for (int i = 0; i < dst_ref.size(); i++) {
+                    if (dst_ref.data()[i] != (*output).data()[i])
+                        FAIL() << "The difference between res_ptr[i] and ref_ptr[i]";
+                }
+            }
+            else if (p.precision == "FP32") {
+                src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
+                src->allocate();
+                fill_data_dbgval(src->buffer(), src->size());
+                auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
+                if (srcPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("shape", dims));
+
+                // Output Blob
+                InferenceEngine::TBlob<float>::Ptr output;
+                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+                output->allocate();
+                outputBlobs[item.first] = output;
+
+                // Output Reference
+                InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+                dst_ref.allocate();
+                ref_expand(*srcPtr, dst_ref);
+
+                // Infer
+                graph.Infer(srcs, outputBlobs);
+                compare(*output, dst_ref);
+            }
+            else {
+                return;
+            }
+        }
+        catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNCPUExtExpandTests, TestsExpand) {}
+
+INSTANTIATE_TEST_CASE_P(
+    TestsExpand, MKLDNNCPUExtExpandTests,
+    ::testing::Values(
+        // Params: precision, in_shape, out_shape
+        expand_test_params{ "I32", { 1 }, { 2, 3, 4 } },
+        expand_test_params{ "I32", { 4, 1, 2 }, { 4, 2, 2 } },
+        expand_test_params{ "I32", { 4, 2, 1 }, { 4, 2, 2 } },
+        expand_test_params{ "I32", { 4, 2 }, { 2, 4, 2 } },
+        expand_test_params{ "I32", { 4, 1, 1 }, { 4, 2, 1 } },
+        expand_test_params{ "I32", { 2, 1, 3, 1 },{ 2, 2, 2, 3, 1 } },
+        expand_test_params{"FP32", { 1 }, { 2, 3, 4 } },
+        expand_test_params{"FP32", { 4, 1, 2 }, { 4, 2, 2 } },
+        expand_test_params{"FP32", { 4, 2, 1 }, { 4, 2, 2 } },
+        expand_test_params{"FP32", { 4, 2 }, { 2, 4, 2 } },
+        expand_test_params{"FP32", { 4, 1, 1 }, { 4, 2, 1 } },
+        expand_test_params{"FP32", { 2, 1, 3, 1 },{ 2, 2, 2, 3, 1 } }
+));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp
index 4e22a72bf..1b9d9367f 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/fake_layer.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -44,8 +44,8 @@ class FakeExtensions : public IExtension {
 
     void GetVersion(const Version *&versionInfo) const noexcept override {
         static Version ExtensionDescription = {
-            {1, 0},    // extension API version
-            "1.0",
+            {1, 6},    // extension API version
+            "1.6",
             "ie-cpu-ext"  // extension description message
         };
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp
new file mode 100644
index 000000000..55dc9d3a4
--- /dev/null
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/fill_tests.cpp
@@ -0,0 +1,202 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+struct fill_test_params {
+    std::string                 precision;
+    InferenceEngine::SizeVector out_shape;
+    float                       value;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+class MKLDNNCPUExtFillTests : public TestsCommon, public WithParamInterface<fill_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="Fill_net" version="2" precision="_IIDXP_" batch="1">
+    <layers>
+        <layer name="dims" type="Input" precision="I32" id="1">
+            <output>
+                <port id="1">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="value" type="Input" precision="_IIDXP_" id="2">
+            <output>
+                <port id="2">
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="output" id="2" type="Fill" precision="_IIDXP_">
+            <data/>
+            <input>
+                <port id="1">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+                <port id="2">
+                    <dim>1</dim>
+                </port>
+            </input>
+            <output>
+                <port id="3">
+                    _OUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(fill_test_params p) {
+        std::string model = model_t;
+        std::string out_shape;
+
+        REPLACE_WITH_STR(model, "_IIDXP_", p.precision);
+        for (size_t i = 0; i < p.out_shape.size(); i++) {
+            out_shape += "<dim>";
+            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_OUT_", out_shape);
+        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.out_shape.size());
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            fill_test_params p = ::testing::WithParamInterface<fill_test_params>::GetParam();
+            std::string model = getModel(p);
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            // Input Data
+            InferenceEngine::Blob::Ptr dims;
+            InferenceEngine::SizeVector vector_dim(1, p.out_shape.size());
+            dims = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, vector_dim, InferenceEngine::TensorDesc::getLayoutByDims(vector_dim) });
+            dims->allocate();
+            for (size_t i = 0; i < p.out_shape.size(); i++) {
+                static_cast<int32_t*>(dims->buffer())[i] = static_cast<int32_t>(p.out_shape[i]);
+            }
+            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(dims.get());
+            if (srcPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+            InferenceEngine::BlobMap srcs;
+            InferenceEngine::Blob::Ptr value_scalar;
+            InferenceEngine::SizeVector value_scalar_dim(1, 1);
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+            if (p.precision == "I32") {
+                value_scalar = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, value_scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(value_scalar_dim) });
+                value_scalar->allocate();
+                static_cast<int32_t*>(value_scalar->buffer())[0] = static_cast<int32_t>(p.value);
+                auto * value_scalarPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(value_scalar.get());
+                if (value_scalarPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("dims", dims));
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("value", value_scalar));
+
+                // Output Blob
+                InferenceEngine::TBlob<int32_t>::Ptr output;
+                output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
+                output->allocate();
+                outputBlobs[item.first] = output;
+
+                // Output Reference
+                InferenceEngine::TBlob<int32_t> dst_ref(item.second->getTensorDesc());
+                dst_ref.allocate();
+                std::fill_n(static_cast<int32_t*>(dst_ref.data()), dst_ref.size(), static_cast<int32_t>(p.value));
+
+                // Infer
+                graph.Infer(srcs, outputBlobs);
+                for (int i = 0; i < dst_ref.size(); i++) {
+                    if(dst_ref.data()[i] != (*output).data()[i])
+                        FAIL() << "The difference between res_ptr[i] and ref_ptr[i]";
+                }
+            } else if (p.precision == "FP32") {
+                value_scalar = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, value_scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(value_scalar_dim) });
+                value_scalar->allocate();
+                static_cast<float*>(value_scalar->buffer())[0] = p.value;
+                auto * value_scalarPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(value_scalar.get());
+                if (value_scalarPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("dims", dims));
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("value", value_scalar));
+
+                // Output Blob
+                InferenceEngine::TBlob<float>::Ptr output;
+                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+                output->allocate();
+                outputBlobs[item.first] = output;
+
+                // Output Reference
+                InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+                dst_ref.allocate();
+                std::fill_n(static_cast<float*>(dst_ref.data()), dst_ref.size(), p.value);
+
+                // Infer
+                graph.Infer(srcs, outputBlobs);
+                compare(*output, dst_ref);
+            } else {
+                return;
+            }
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNCPUExtFillTests, TestsFill) {}
+
+INSTANTIATE_TEST_CASE_P(
+    TestsFill, MKLDNNCPUExtFillTests,
+            ::testing::Values(
+// Params: precision, value, out_shape
+                fill_test_params{ "I32", { 1 }, 1.f },
+                fill_test_params{ "I32", { 1, 3, 1 }, 1.f },
+                fill_test_params{ "I32", { 2, 3, 6 }, -1.f },
+                fill_test_params{"FP32", { 2, 3, 6 }, -1.f },
+                fill_test_params{"FP32", { 1 }, 1.f },
+                fill_test_params{"FP32", { 1, 3, 1, 2 }, .5f },
+                fill_test_params{"FP32", { 4, 3, 2, 5, 4, 2 }, .25f }
+            ));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp
index b4300fba6..d92a4f28d 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/gather_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -32,13 +32,6 @@ struct gather_test_params {
     std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
 };
 
-
-inline void clipping(int *idx, const int min, const int max) {
-    (*idx) = ((*idx) > min) ? (*idx) : min;
-    (*idx) = ((*idx) < max) ? (*idx) : (max - 1);
-    return;
-}
-
 template <typename data_t>
 void ref_gather(InferenceEngine::TBlob<data_t> &srcIdx, InferenceEngine::TBlob<float> &srcDct, InferenceEngine::TBlob<float> &dst, size_t axis) {
     size_t i, j;
@@ -70,15 +63,20 @@ void ref_gather(InferenceEngine::TBlob<data_t> &srcIdx, InferenceEngine::TBlob<f
 
     //  The gathering process
     for (i = 0; i < src_size; i++) {
-        int idx = static_cast<int>(src_dataIdx[i]);
+        unsigned int idx = static_cast<unsigned int>(src_dataIdx[i]);
 
         //  Index clipping
-        clipping(&idx, 0, indexRange);
-
-        //  Copying data to destination from Dictionary
-        for (j = 0; j < numDictionaries; j++) {
-            memcpy(&dst_data[dataLength * (i + j * src_size)],
-                   &src_dataDict[dataLength * (idx + j * indexRange)], sizeof(float)*dataLength);
+        if (idx < indexRange)
+        {
+            //  Copying data to destination from Dictionary
+            for (j = 0; j < numDictionaries; j++) {
+                memcpy(&dst_data[dataLength * (i + j * src_size)],
+                       &src_dataDict[dataLength * (idx + j * indexRange)], sizeof(float) * dataLength);
+            }
+        } else {
+            for (j = 0; j < numDictionaries; j++) {
+                std::fill_n(&dst_data[dataLength * (i + j * src_size)], dataLength, 0.0f);
+            }
         }
     }
 }
@@ -313,9 +311,6 @@ INSTANTIATE_TEST_CASE_P(
             ::testing::Values(
                 gather_test_params{ "FP32", {1, 1, 12, 256}, {1, 1, 71, 16}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
                 gather_test_params{  "I32", {1, 1, 12, 256}, {1, 1, 71, 16}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{  "I16", {1, 1, 12, 256}, {1, 1, 71, 16}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{   "U8", {1, 1, 12, 256}, {1, 1, 71, 16}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
-                gather_test_params{   "I8", {1, 1, 12, 256}, {1, 1, 71, 16}, 0, {1, 12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
                 gather_test_params{  "I32", {12, 256}, {71, 16}, 0, {12, 256, 16}, 1, MKLDNNPlugin::impl_desc_type::unknown },
                 gather_test_params{  "I32", {3, 4}, {2, 5, 6}, 0, {3, 4, 5, 6}, 1, MKLDNNPlugin::impl_desc_type::unknown },
                 gather_test_params{  "I32", {3, 4}, {5, 1}, 0, {3, 4, 1}, 1, MKLDNNPlugin::impl_desc_type::unknown },
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
index 49e62bc6b..793d43a78 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/graph_generic_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp
index 6bc9b757b..94e0d3597 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/interp_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -136,7 +136,7 @@ class MKLDNNCPUExtInterpTests: public TestsCommon, public WithParamInterface<int
             </output>
         </layer>
         <layer name="interp1" id="1" type="Interp" precision="FP32">
-            <data pad_beg="_PB_" pad_end="_PE_"/>
+            <data pad_beg="_PB_" pad_end="_PE_" height="_OH_" width="_OW_"/>
 
             <input>
                 <port id="1">
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp
index 84511a1f9..bb31c09e0 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/mvn_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp
new file mode 100644
index 000000000..292c99bf0
--- /dev/null
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/range_tests.cpp
@@ -0,0 +1,255 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+struct range_test_params {
+    std::string                 precision;
+    float                       start;
+    float                       limit;
+    float                       delta;
+    InferenceEngine::SizeVector out_shape;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+template <typename data_t>
+void ref_range(
+    float start,
+    float limit,
+    float delta,
+    InferenceEngine::TBlob<data_t> &dst
+) {
+    data_t* dst_data = dst.data();
+    size_t work_amount_dst = std::floor(std::abs((limit - start) / delta));
+    if (work_amount_dst != dst.size())
+        FAIL() << "Range indexes exceeds data tensor dimension";
+
+    data_t dst_value = static_cast<data_t>(start);
+    for (size_t iwork = 0; iwork < work_amount_dst; ++iwork, dst_value += static_cast<data_t>(delta)) {
+        dst_data[iwork] = dst_value;
+    }
+}
+
+class MKLDNNCPUExtRangeTests : public TestsCommon, public WithParamInterface<range_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="Range_net" version="2" precision="_IIDXP_" batch="1">
+    <layers>
+        <layer name="start" type="Input" precision="_IIDXP_" id="1">
+            <output>
+                <port id="1">
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="limit" type="Input" precision="_IIDXP_" id="2">
+            <output>
+                <port id="2">
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="delta" type="Input" precision="_IIDXP_" id="3">
+            <output>
+                <port id="3">
+                    <dim>1</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="output" id="2" type="Range" precision="_IIDXP_">
+            <data/>
+            <input>
+                <port id="1">
+                    <dim>1</dim>
+                </port>
+                <port id="2">
+                    <dim>1</dim>
+                </port>
+                <port id="3">
+                    <dim>1</dim>
+                </port>
+            </input>
+            <output>
+                <port id="3">
+                    _OUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
+        <edge from-layer="3" from-port="3" to-layer="2" to-port="3"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(range_test_params p) {
+        std::string model = model_t;
+        std::string out_shape;
+
+        REPLACE_WITH_STR(model, "_IIDXP_", p.precision);
+        for (size_t i = 0; i < p.out_shape.size(); i++) {
+            out_shape += "<dim>";
+            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_OUT_", out_shape);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            range_test_params p = ::testing::WithParamInterface<range_test_params>::GetParam();
+            std::string model = getModel(p);
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            // Input Data
+            InferenceEngine::Blob::Ptr start_scalar;
+            InferenceEngine::Blob::Ptr limit_scalar;
+            InferenceEngine::Blob::Ptr delta_scalar;
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+            InferenceEngine::SizeVector scalar_dim(1, 1);
+            InferenceEngine::BlobMap srcs;
+            InferenceEngine::SizeVector out_dims;
+            if (p.precision == "I32") {
+                start_scalar = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
+                start_scalar->allocate();
+                static_cast<int32_t*>(start_scalar->buffer())[0] = static_cast<int32_t>(p.start);
+                auto * start_scalarPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(start_scalar.get());
+                if (start_scalarPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+                limit_scalar = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
+                limit_scalar->allocate();
+                static_cast<int32_t*>(limit_scalar->buffer())[0] = static_cast<int32_t>(p.limit);
+                auto * limit_scalarPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(limit_scalar.get());
+                if (limit_scalarPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+                delta_scalar = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
+                delta_scalar->allocate();
+                static_cast<int32_t*>(delta_scalar->buffer())[0] = static_cast<int32_t>(p.delta);
+                auto * delta_scalarPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(delta_scalar.get());
+                if (delta_scalarPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("start", start_scalar));
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("limit", limit_scalar));
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("delta", delta_scalar));
+
+                // Output Blob
+                InferenceEngine::TBlob<int32_t>::Ptr output;
+                output = InferenceEngine::make_shared_blob<int32_t>(item.second->getTensorDesc());
+                output->allocate();
+                outputBlobs[item.first] = output;
+
+                // Output Reference
+                InferenceEngine::TBlob<int32_t> dst_ref(item.second->getTensorDesc());
+                dst_ref.allocate();
+                ref_range(p.start, p.limit, p.delta, dst_ref);
+
+                // Infer
+                graph.Infer(srcs, outputBlobs);
+                for (int i = 0; i < dst_ref.size(); i++) {
+                    if (dst_ref.data()[i] != (*output).data()[i])
+                        FAIL() << "The difference between res_ptr[i] and ref_ptr[i]";
+                }
+            } else if (p.precision == "FP32") {
+                start_scalar = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
+                start_scalar->allocate();
+                static_cast<float*>(start_scalar->buffer())[0] = p.start;
+                auto * start_scalarPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(start_scalar.get());
+                if (start_scalarPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+
+                limit_scalar = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
+                limit_scalar->allocate();
+                static_cast<float*>(limit_scalar->buffer())[0] = p.limit;
+                auto * limit_scalarPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(limit_scalar.get());
+                if (limit_scalarPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+
+                delta_scalar = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, scalar_dim, InferenceEngine::TensorDesc::getLayoutByDims(scalar_dim) });
+                delta_scalar->allocate();
+                static_cast<float*>(delta_scalar->buffer())[0] = p.delta;
+                auto * delta_scalarPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(delta_scalar.get());
+                if (delta_scalarPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("start", start_scalar));
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("limit", limit_scalar));
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("delta", delta_scalar));
+
+                // Output Blob
+                InferenceEngine::Blob::Ptr output;
+                output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+                output->allocate();
+                outputBlobs[item.first] = output;
+
+                // Output Reference
+                InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+                dst_ref.allocate();
+                ref_range(p.start, p.limit, p.delta, dst_ref);
+
+                // Infer
+                graph.Infer(srcs, outputBlobs);
+                compare(*output, dst_ref);
+            } else {
+                return;
+            }
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNCPUExtRangeTests, TestsRange) {}
+
+INSTANTIATE_TEST_CASE_P(
+    TestsRange, MKLDNNCPUExtRangeTests,
+            ::testing::Values(
+// Params: precision, start, limit, delta, out_shape
+                range_test_params{ "I32", 3.f, 18.f, 3.f, { 5 } },
+                range_test_params{ "I32", 3.f, 1.f, -1.f, { 2 } },
+                range_test_params{ "I32", 3.f, -3.f, -1.f, { 6 } },
+                range_test_params{ "I32", 0.f, 5.f, 1.f, { 5 } },
+                range_test_params{"FP32", 3.f, 18.f, 3.f, { 5 } },
+                range_test_params{"FP32", 3.f, 1.f, -.5f, { 4 } },
+                range_test_params{"FP32", 3.f, -1.f, -.5f, { 8 } },
+                range_test_params{"FP32", 0.f, 5.f, 1.f, { 5 } }
+            ));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp
index f3e4bad1e..149473156 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/resample_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp
new file mode 100644
index 000000000..66ee38b6f
--- /dev/null
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/reverse_sequence_tests.cpp
@@ -0,0 +1,273 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+
+struct reverse_sequence_test_params {
+    std::string inIdxPrecision;
+    InferenceEngine::SizeVector in_out_shape;
+    std::vector<int32_t> seq_lengths;
+    int                  seq_axis;
+    int                  batch_axis;
+    std::vector<float> reference;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+template <typename data_t>
+void ref_reverse_sequence(
+    InferenceEngine::TBlob<float> &src,
+    InferenceEngine::TBlob<data_t> &seq_lengths,
+    InferenceEngine::TBlob<float> &dst,
+    int seq_axis,
+    int batch_axis
+) {
+    size_t i, src_idx;
+    const float *src_data = src.data();
+    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
+    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
+    const data_t *seq_lengths_data = seq_lengths.data();
+    InferenceEngine::SizeVector seq_lengths_dims = seq_lengths.getTensorDesc().getDims();
+    float* dst_data = dst.data();
+
+    if (seq_axis < 0)
+        seq_axis += src_dims.size();
+
+    if (seq_axis < 0 || seq_axis >= src_dims.size())
+        FAIL() << "Incorrect 'seq_axis' parameters dimensions and axis number!";
+
+    if (batch_axis < 0)
+        batch_axis += src_dims.size();
+
+    if (batch_axis < 0 || batch_axis >= src_dims.size())
+        FAIL() << "Incorrect 'batch_axis' parameters dimensions and axis number!";
+
+    for (i = 0; i < src_dims[batch_axis]; i++) {
+        if (static_cast<int32_t>(seq_lengths_data[i]) > src_dims[seq_axis])
+            FAIL() << "Incorrect input 'seq_lengths' values!";
+    }
+
+    size_t work_amount_dst = srcStrides[0] * src_dims[0];
+    InferenceEngine::SizeVector counters(src_dims.size(), 0);
+    for (size_t iwork = 0; iwork < work_amount_dst; ++iwork) {
+        for (i = 0, src_idx = 0; i < src_dims.size(); ++i) {
+            size_t idx = counters[i];
+            if (i == seq_axis && idx < static_cast<int32_t>(seq_lengths_data[counters[batch_axis]])) {
+                idx = static_cast<int32_t>(seq_lengths_data[counters[batch_axis]]) - idx - 1;
+            }
+            src_idx += idx * srcStrides[i];
+        }
+
+        dst_data[iwork] = src_data[src_idx];
+
+        for (int j = src_dims.size() - 1; j >= 0; j--) {
+            counters[j] = (counters[j] + 1) % src_dims[j];
+            if (counters[j] != 0) break;
+        }
+    }
+}
+
+class MKLDNNCPUExtReverseSequenceTests : public TestsCommon, public WithParamInterface<reverse_sequence_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="ReverseSequence_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="input" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IN_OUT_
+                </port>
+            </output>
+        </layer>
+        <layer name="seq_lengths" type="Input" precision="_IIDXP_" id="2">
+            <output>
+                <port id="2">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="output" id="2" type="ReverseSequence" precision="FP32">
+            <data seq_axis="_SA_" batch_axis="_BA_"/>
+            <input>
+                <port id="1">
+                    _IN_OUT_
+                </port>
+                <port id="2">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </input>
+            <output>
+                <port id="3">
+                    _IN_OUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(reverse_sequence_test_params p) {
+        std::string model = model_t;
+        std::string in_out_shape;
+        for (size_t i = 0; i < p.in_out_shape.size(); i++) {
+            in_out_shape += "<dim>";
+            in_out_shape += std::to_string(p.in_out_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_IIDXP_", p.inIdxPrecision);
+        REPLACE_WITH_STR(model, "_IN_OUT_", in_out_shape);
+        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.seq_lengths.size());
+        REPLACE_WITH_NUM(model, "_SA_", p.seq_axis);
+        REPLACE_WITH_NUM(model, "_BA_", p.batch_axis);
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            reverse_sequence_test_params p = ::testing::WithParamInterface<reverse_sequence_test_params>::GetParam();
+            std::string model = getModel(p);
+            ////std::cout << model;
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            // Output Reference
+            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+            dst_ref.allocate();
+
+            // Input Data
+            InferenceEngine::Blob::Ptr src;
+            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_out_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_out_shape) });
+            src->allocate();
+            fill_data_dbgval(src->buffer(), src->size());
+            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
+            if (srcPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+
+            InferenceEngine::Blob::Ptr seq_lengthsIdx;
+            InferenceEngine::SizeVector seq_lengths_dim(1, p.seq_lengths.size());
+            if (p.inIdxPrecision == "I32") {
+                seq_lengthsIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
+                seq_lengthsIdx->allocate();
+                if (p.seq_lengths.size())
+                    memcpy(static_cast<int32_t*>(seq_lengthsIdx->buffer()), &p.seq_lengths[0], sizeof(int32_t)*p.seq_lengths.size());
+                auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(seq_lengthsIdx.get());
+                if (seq_lengthsIdxPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+                // Check results
+                ref_reverse_sequence(*srcPtr, *seq_lengthsIdxPtr, dst_ref, p.seq_axis, p.batch_axis);
+                if (p.reference.size()) {
+                    if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
+                        FAIL() << "Wrong result with compare TF reference!";
+                }
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("seq_lengths", seq_lengthsIdx));
+            } else if (p.inIdxPrecision == "FP32") {
+                seq_lengthsIdx = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
+                seq_lengthsIdx->allocate();
+                if (p.seq_lengths.size())
+                    for (size_t i = 0; i < p.seq_lengths.size(); i++) {
+                        static_cast<float *>(seq_lengthsIdx->buffer())[i] = static_cast<float>(p.seq_lengths[i]);
+                    }
+                auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(seq_lengthsIdx.get());
+                if (seq_lengthsIdxPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+
+                // Check results
+                ref_reverse_sequence(*srcPtr, *seq_lengthsIdxPtr, dst_ref, p.seq_axis, p.batch_axis);
+                if (p.reference.size()) {
+                    if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
+                        FAIL() << "Wrong result with compare TF reference!";
+                }
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("seq_lengths", seq_lengthsIdx));
+            } else {
+                return;
+            }
+
+            // Infer
+            graph.Infer(srcs, outputBlobs);
+            compare(*output, dst_ref);
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+//  Test data vectors
+static std::vector<float> test0 = { 9.f,10.f,11.f,12.f,13.f,14.f,15.f,16.f,17.f,0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f,18.f,19.f,20.f,21.f,22.f,23.f,24.f,25.f,26.f };
+static std::vector<float> test2 = { 3.f,4.f,5.f,0.f,1.f,2.f,6.f,7.f,8.f,12.f,13.f,14.f,9.f,10.f,11.f,15.f,16.f,17.f,21.f,22.f,23.f,18.f,19.f,20.f,24.f,25.f,26.f };
+static std::vector<float> test4 = { 1.f,0.f,2.f,4.f,3.f,5.f,7.f,6.f,8.f,10.f,9.f,11.f,13.f,12.f,14.f,16.f,15.f,17.f,19.f,18.f,20.f,22.f,21.f,23.f,25.f,24.f,26.f };
+static std::vector<float> test6 = { 2.f,1.f,0.f,4.f,3.f,5.f };
+static std::vector<float> test7 = { 0.f,1.f,2.f,3.f,4.f,5.f,6.f,7.f,8.f,12.f,13.f,14.f,9.f,10.f,11.f,15.f,16.f,17.f,24.f,25.f,26.f,21.f,22.f,23.f,18.f,19.f,20.f };
+static std::vector<float> test8 = { 0.f,4.f,8.f,3.f,1.f,5.f,6.f,7.f,2.f,9.f,13.f,17.f,12.f,10.f,14.f,15.f,16.f,11.f,18.f,22.f,26.f,21.f,19.f,23.f,24.f,25.f,20.f };
+
+TEST_P(MKLDNNCPUExtReverseSequenceTests, TestsReverseSequence) {}
+INSTANTIATE_TEST_CASE_P(
+    TestsReverseSequence, MKLDNNCPUExtReverseSequenceTests,
+            ::testing::Values(
+// Params: in_out_shape, seq_lengths, seq_axis, batch_axis, reference
+/*  0 */        reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 },  0, 0, test0 },
+                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 }, -3, 0, test0 },
+                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 },  1, 0, test2 },
+                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 }, -2, 0, test2 },
+                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 },  2, 1, test4 },
+/*  5 */        reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 2, 2, 2 }, -1, 1, test4 },
+                reverse_sequence_test_params{ "I32", { 2, 3 },{ 3, 2 }, 1, 0, test6 },
+                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 1, 2, 3 },  1, 0, test7 },
+                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 1, 2, 3 },  1,-3, test7 },
+                reverse_sequence_test_params{ "I32", { 3, 3, 3 },{ 1, 2, 3 },  1, 2, test8 },
+                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 },  0, 0, test0 },
+                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 }, -3, 0, test0 },
+                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 },  1, 0, test2 },
+                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 }, -2, 0, test2 },
+                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 },  2, 1, test4 },
+                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 2, 2, 2 }, -1, 1, test4 },
+/* 15 */        reverse_sequence_test_params{"FP32", { 2, 3 },{ 3, 2 }, 1, 0, test6 },
+                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 1, 2, 3 },  1, 0, test7 },
+                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 1, 2, 3 },  1,-3, test7 },
+                reverse_sequence_test_params{"FP32", { 3, 3, 3 },{ 1, 2, 3 },  1, 2, test8 }
+            ));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp
new file mode 100644
index 000000000..9d2310d05
--- /dev/null
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/shuffle_channels_tests.cpp
@@ -0,0 +1,213 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+struct shuffle_channels_test_params {
+    InferenceEngine::SizeVector in_out_shape;
+    int axis;
+    int group;
+
+    std::vector<float> reference;
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+void ref_shuffle_channels(
+    InferenceEngine::TBlob<float> &src,
+    InferenceEngine::TBlob<float> &dst,
+    int axis,
+    int group
+) {
+    size_t i;
+    const float *src_data = src.data();
+    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
+    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
+    float* dst_data = dst.data();
+    InferenceEngine::SizeVector dst_dims = dst.getTensorDesc().getDims();
+    InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
+
+    if (axis < 0)
+        axis += dst_dims.size();
+
+    if (axis < 0 || axis >= dst_dims.size())
+        FAIL() << "Incorrect input parameters dimensions and axis number!";
+
+    if (dst_dims[axis] % group)
+        FAIL() << "Group parameter must evenly divide the channel dimension!";
+
+    //  Find number of dictionaries, index range and data length
+    size_t numDictionaries = 1;
+    for (i = 0; i <= axis; i++)
+        numDictionaries *= dst_dims[i];
+
+    size_t channelsNum = dst_dims[axis] / group;
+
+    size_t dataLength = 1;
+    for (i = axis + 1; i < dst_dims.size(); i++)
+        dataLength *= dst_dims[i];
+
+    if (dataLength == 0)
+        FAIL() << "Incorrect input parameters dimension!";
+
+    size_t j, k;
+    for (j = 0, k = 0; j < numDictionaries; j += dst_dims[axis]) {
+        for (i = 0; i < (dst_dims[axis] * channelsNum); i += channelsNum, k += dataLength) {
+            int idx = j + i / dst_dims[axis] + i % dst_dims[axis];
+            memcpy(&dst_data[k], &src_data[dataLength * idx], sizeof(float) * dataLength);
+        }
+    }
+}
+
+class MKLDNNCPUExtShuffleChannelsTests : public TestsCommon, public WithParamInterface<shuffle_channels_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="ShuffleChannels_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="input" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IN_OUT_
+                </port>
+            </output>
+        </layer>
+        <layer name="output" id="2" type="ShuffleChannels" precision="FP32">
+            <data axis="_AX_" group="_GR_"/>
+            <input>
+                <port id="1">
+                    _IN_OUT_
+                </port>
+           </input>
+            <output>
+                <port id="2">
+                    _IN_OUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(shuffle_channels_test_params p) {
+        std::string model = model_t;
+        std::string in_out_shape;
+
+        for (size_t i = 0; i < p.in_out_shape.size(); i++) {
+            in_out_shape += "<dim>";
+            in_out_shape += std::to_string(p.in_out_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_IN_OUT_", in_out_shape);
+        REPLACE_WITH_NUM(model, "_AX_", p.axis);
+        REPLACE_WITH_NUM(model, "_GR_", p.group);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            shuffle_channels_test_params p = ::testing::WithParamInterface<shuffle_channels_test_params>::GetParam();
+            std::string model = getModel(p);
+            ////std::cout << model;
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            // Output Reference
+            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+            dst_ref.allocate();
+
+            // Input Data
+            InferenceEngine::Blob::Ptr src;
+            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_out_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_out_shape) });
+            src->allocate();
+            fill_data_dbgval(src->buffer(), src->size());
+            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
+            if (srcPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            // Check results
+            InferenceEngine::SizeVector out_dims;
+            ref_shuffle_channels(*srcPtr, dst_ref, p.axis, p.group);
+
+            //  Check results
+            if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
+                FAIL() << "Wrong result with compare TF reference!";
+
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+
+            // Infer
+            graph.Infer(srcs, outputBlobs);
+            compare(*output, dst_ref);
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+
+TEST_P(MKLDNNCPUExtShuffleChannelsTests, TestsShuffleChannels) {}
+
+//  Test data vectors
+static std::vector<float> test0 = { 0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f,
+                                    4.f, 5.f, 6.f, 7.f, 16.f, 17.f, 18.f, 19.f, 28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f, 52.f, 53.f, 54.f, 55.f,
+                                    8.f, 9.f, 10.f, 11.f, 20.f, 21.f, 22.f, 23.f, 32.f, 33.f, 34.f, 35.f, 44.f, 45.f, 46.f, 47.f, 56.f, 57.f, 58.f, 59.f };
+static std::vector<float> test4 = { 0.f, 2.f, 4.f, 1.f, 3.f, 5.f, 6.f, 8.f, 10.f, 7.f, 9.f, 11.f, 12.f, 14.f, 16.f, 13.f, 15.f, 17.f, 18.f, 20.f, 22.f, 19.f, 21.f, 23.f };
+static std::vector<float> test5 = { 0.f, 1.f, 4.f, 5.f, 8.f, 9.f, 2.f, 3.f, 6.f, 7.f, 10.f, 11.f, 12.f, 13.f, 16.f, 17.f, 20.f, 21.f, 14.f, 15.f, 18.f, 19.f, 22.f, 23.f };
+static std::vector<float> test6 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f, 6.f, 9.f, 7.f, 10.f, 8.f, 11.f, 12.f, 15.f, 13.f, 16.f, 14.f, 17.f, 18.f, 21.f, 19.f, 22.f, 20.f, 23.f };
+static std::vector<float> test7 = { 0.f, 1.f, 6.f, 7.f, 2.f, 3.f, 8.f, 9.f, 4.f, 5.f, 10.f, 11.f, 12.f, 13.f, 18.f, 19.f, 14.f, 15.f, 20.f, 21.f, 16.f, 17.f, 22.f, 23.f };
+static std::vector<float> test8 = { 0.f, 3.f, 1.f, 4.f, 2.f, 5.f };
+
+INSTANTIATE_TEST_CASE_P(
+    TestsShuffleChannels, MKLDNNCPUExtShuffleChannelsTests,
+            ::testing::Values(
+// Params: in_out_shape, axis, group, reference
+/* 0 */         shuffle_channels_test_params{ { 1, 15, 2, 2 }, 1, 5, test0 },
+                shuffle_channels_test_params{ { 1, 15, 2, 2 }, -3, 5, test0 },
+                shuffle_channels_test_params{ { 15, 2, 2 }, 0, 5, test0 },
+                shuffle_channels_test_params{ { 15, 2, 2 }, -3, 5, test0 },
+                shuffle_channels_test_params{ { 2, 2, 6 }, -1, 3, test4 },
+/* 5 */         shuffle_channels_test_params{ { 2, 6, 2 }, -2, 3, test5 },
+                shuffle_channels_test_params{ { 2, 2, 6 }, -1, 2, test6 },
+                shuffle_channels_test_params{ { 2, 6, 2 }, -2, 2, test7 },
+                shuffle_channels_test_params{ { 6 }, 0, 2, test8 }
+            ));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/squeeze_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/squeeze_tests.cpp
new file mode 100644
index 000000000..fb315cbb1
--- /dev/null
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/squeeze_tests.cpp
@@ -0,0 +1,244 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+struct squeeze_test_params {
+    std::string                 inIdxPrecision;
+    InferenceEngine::SizeVector in_shape;
+    std::vector<int32_t>        indices_to_squeeze;
+    InferenceEngine::SizeVector out_shape;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+void ref_squeeze(
+    InferenceEngine::TBlob<float> &src,
+    InferenceEngine::SizeVector &out_dims,
+    std::vector<int32_t> indices_to_squeeze
+) {
+    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
+
+    if (indices_to_squeeze.size() == 0)
+        FAIL() << " Index vector should be 1 dimension";
+
+    for (size_t i = 0; i < indices_to_squeeze.size(); i++) {
+        int32_t axis = indices_to_squeeze[i];
+        if (axis < 0)
+            axis += src_dims.size();
+
+        if (axis > src_dims.size())
+            FAIL() << " Index to squeeze exceeds data tensor dimension";
+        else if (src_dims[axis] != 1)
+            FAIL() << " Index to squeeze of data tensor dimension is not 1";
+    }
+
+    for (size_t j = 0; j < src_dims.size(); j++) {
+        bool found = false;
+        for (size_t i = 0; i < indices_to_squeeze.size(); i++) {
+            int32_t axis = indices_to_squeeze[i];
+            if (axis < 0)
+                axis += src_dims.size();
+            if (j == static_cast<size_t>(axis)) found = true;
+        }
+        if(!found) out_dims.push_back(src_dims[j]);
+    }
+}
+
+class MKLDNNCPUExtSqueezeTests : public TestsCommon, public WithParamInterface<squeeze_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="Squeeze_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="input" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IN_
+                </port>
+            </output>
+        </layer>
+        <layer name="indices_to_squeeze" type="Input" precision="_IIDXP_" id="2">
+            <output>
+                <port id="2">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="output" id="2" type="Squeeze" precision="FP32">
+            <data/>
+            <input>
+                <port id="1">
+                    _IN_
+                </port>
+                <port id="2">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </input>
+            <output>
+                <port id="3">
+                    _OUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(squeeze_test_params p) {
+        std::string model = model_t;
+        std::string in_shape;
+        std::string out_shape;
+
+        for (size_t i = 0; i < p.in_shape.size(); i++) {
+            in_shape += "<dim>";
+            in_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_IN_", in_shape);
+        REPLACE_WITH_STR(model, "_IIDXP_", p.inIdxPrecision);
+        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.indices_to_squeeze.size());
+        if (p.out_shape.size()) {
+            for (size_t i = 0; i < p.out_shape.size(); i++) {
+                out_shape += "<dim>";
+                out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
+            }
+        } else {
+            out_shape = "<dim>1</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_OUT_", out_shape);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            squeeze_test_params p = ::testing::WithParamInterface<squeeze_test_params>::GetParam();
+            std::string model = getModel(p);
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            // Output Reference
+            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+            dst_ref.allocate();
+
+            // Input Data
+            InferenceEngine::Blob::Ptr src;
+            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
+            src->allocate();
+            fill_data_dbgval(src->buffer(), src->size());
+            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
+            if (srcPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+
+            InferenceEngine::Blob::Ptr seq_lengthsIdx;
+            InferenceEngine::SizeVector seq_lengths_dim(1, p.indices_to_squeeze.size());
+            if (p.inIdxPrecision == "I32") {
+                seq_lengthsIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
+                seq_lengthsIdx->allocate();
+                if (p.indices_to_squeeze.size())
+                    memcpy(static_cast<int32_t*>(seq_lengthsIdx->buffer()), &p.indices_to_squeeze[0], sizeof(int32_t)*p.indices_to_squeeze.size());
+                auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(seq_lengthsIdx.get());
+                if (seq_lengthsIdxPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("indices_to_squeeze", seq_lengthsIdx));
+            } else if (p.inIdxPrecision == "FP32") {
+                seq_lengthsIdx = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
+                seq_lengthsIdx->allocate();
+                if (p.indices_to_squeeze.size())
+                    for (size_t i = 0; i < p.indices_to_squeeze.size(); i++) {
+                        static_cast<float *>(seq_lengthsIdx->buffer())[i] = static_cast<float>(p.indices_to_squeeze[i]);
+                    }
+                auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(seq_lengthsIdx.get());
+                if (seq_lengthsIdxPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("indices_to_squeeze", seq_lengthsIdx));
+            }
+            else {
+                return;
+            }
+
+            // Check results
+            InferenceEngine::SizeVector out_dims;
+            ref_squeeze(*srcPtr, out_dims, p.indices_to_squeeze);
+            if (out_dims.size() != p.out_shape.size())
+                FAIL() << "Wrong out_shape size!";
+            for (size_t i = 0; i < p.out_shape.size(); i++) {
+                if (out_dims[i] != p.out_shape[i])
+                    FAIL() << "Wrong out_shape dimensions!";
+            }
+
+            // Infer
+            graph.Infer(srcs, outputBlobs);
+            compare(*output, *src);
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNCPUExtSqueezeTests, TestsSqueeze) {}
+
+INSTANTIATE_TEST_CASE_P(
+    TestsSqueeze, MKLDNNCPUExtSqueezeTests,
+            ::testing::Values(
+// Params: inIdxPrecision, in_shape, indices_to_squeeze, out_shape
+                squeeze_test_params{ "I32",{ 1 },{ 0 },{ } },
+                squeeze_test_params{ "I32",{ 1, 3, 1 },{ 0 },{ 3, 1 } },
+                squeeze_test_params{ "I32",{ 1, 3, 1 },{ 2 },{ 1, 3 } },
+                squeeze_test_params{ "I32",{ 1, 3, 1 },{ 0, 2 },{ 3 } },
+                squeeze_test_params{ "I32",{ 1, 3, 1 },{ -1 },{ 1, 3 } },
+                squeeze_test_params{ "I32",{ 1, 3, 1, 2 },{ 0, 2 },{ 3, 2 } },
+                squeeze_test_params{"FP32",{ 1 },{ 0 },{} },
+                squeeze_test_params{"FP32",{ 1, 3, 1 },{ 0 },{ 3, 1 } },
+                squeeze_test_params{"FP32",{ 1, 3, 1 },{ 2 },{ 1, 3 } },
+                squeeze_test_params{"FP32",{ 1, 3, 1 },{ 0, 2 },{ 3 } },
+                squeeze_test_params{"FP32",{ 1, 3, 1 },{ -1 },{ 1, 3 } },
+                squeeze_test_params{"FP32",{ 1, 3, 1, 2 },{ 0, 2 },{ 3, 2 } }
+            ));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp
new file mode 100644
index 000000000..f8a588aaf
--- /dev/null
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/strided_slice_tests.cpp
@@ -0,0 +1,489 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+
+struct strided_slice_test_params {
+    InferenceEngine::SizeVector in_shape;
+    size_t           dim_size;
+    std::vector<int32_t> begin;
+    std::vector<int32_t> end;
+    std::vector<int32_t> stride;
+
+    InferenceEngine::SizeVector begin_mask;
+    InferenceEngine::SizeVector end_mask;
+    InferenceEngine::SizeVector ellipsis_mask;
+    InferenceEngine::SizeVector new_axis_mask;
+    InferenceEngine::SizeVector shrink_axis_mask;
+    InferenceEngine::SizeVector out_shape;
+    std::vector<float> reference;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+inline void clipping(int *idx, const int min, const int max) {
+    (*idx) = ((*idx) > min) ? (*idx) : min;
+    (*idx) = ((*idx) < max) ? (*idx) : (max - 1);
+    return;
+}
+
+void ref_strided_slice(
+    InferenceEngine::TBlob<float> &src,
+    InferenceEngine::TBlob<float> &dst,
+    InferenceEngine::SizeVector &out_dims,
+    std::vector<int> begin,
+    std::vector<int> end,
+    std::vector<int> stride,
+    InferenceEngine::SizeVector begin_mask,
+    InferenceEngine::SizeVector end_mask,
+    InferenceEngine::SizeVector ellipsis_mask,
+    InferenceEngine::SizeVector new_axis_mask,
+    InferenceEngine::SizeVector shrink_axis_mask
+) {
+    size_t i;
+    const float *src_data = src.data();
+    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
+    InferenceEngine::SizeVector srcStrides = src.getTensorDesc().getBlockingDesc().getStrides();
+    float* dst_data = dst.data();
+    InferenceEngine::SizeVector dst_dims = dst.getTensorDesc().getDims();
+    InferenceEngine::SizeVector dstStrides = dst.getTensorDesc().getBlockingDesc().getStrides();
+
+    int new_axis = 0;
+    for (auto& na : new_axis_mask)
+        new_axis += na;
+
+    int shrink_axis = 0;
+    for (auto& sa : shrink_axis_mask)
+        shrink_axis += sa;
+    int max_dims = src_dims.size() + new_axis;
+//    if ((max_dims - shrink_axis) != dst_dims.size())
+//        FAIL() << "Destination dims should be equal source dims + new axis - shrink_axis";
+
+    //  Check beging/end/stride vector sizes
+    int bounds_size = 0;
+    if (begin.size() && end.size() && begin.size() != end.size()) FAIL() << "Begin vector size should be equal end vectror size";
+    if (begin.size() && stride.size() && stride.size() != begin.size()) FAIL() << "Stride vector size should be equal begin vectror size";
+    if (end.size() && stride.size() && stride.size() != end.size()) FAIL() << "Stride vector size should be equal end vectror size";
+
+    if (begin.size()) bounds_size = begin.size();
+    if (end.size()) bounds_size = end.size();
+    if (stride.size()) bounds_size = stride.size();
+
+    //  ellipsis_mask must be a power of two (only one ellipsis), so to take a first position
+    int ellipsis_pos1, ellipsis_pos2;
+    ellipsis_pos1 = ellipsis_pos2 = max_dims;
+    for (i = 0; i < ellipsis_mask.size(); i++) {
+        if (ellipsis_mask[i] > 0) {
+            ellipsis_pos1 = i;
+            break;
+        }
+    }
+    bounds_size -= ellipsis_pos1;
+    if(bounds_size > 0 && (max_dims - bounds_size) > ellipsis_pos1)
+        ellipsis_pos2 = max_dims - bounds_size;
+
+    std::vector<int> begin_dms(max_dims, 0);
+    std::vector<int> end_dms(max_dims, -1);
+    std::vector<int> stride_dms(max_dims, 1);
+
+    int j, k, bj, ej, sj;
+    InferenceEngine::SizeVector our_dims;
+    for (i = 0, j = 0, k = 0, bj = 0, ej = 0, sj = 0; i < max_dims; i++) {
+        if (i >= ellipsis_pos1 && i < ellipsis_pos2) {
+            if (!(new_axis_mask.size() > i && new_axis_mask[i] == 1)) {
+                end_dms[i] = end_dms[i] >= 0 ? end_dms[i] : src_dims[j++] + end_dms[i];
+            } else {
+                //end_dms[i] = 0;
+                end_dms[i] = begin_dms[i];
+            }
+            out_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) / static_cast<float>(abs(stride_dms[i])))));
+            our_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) / static_cast<float>(abs(stride_dms[i])))));
+            k = ellipsis_pos1;
+            continue;
+        }
+        stride_dms[i] = (stride.size() > sj && stride[sj] != 0) ? stride[sj++] : 1;
+
+        if (!(begin_mask.size() > j && begin_mask[j] == 0))
+            begin_dms[i] = begin.size() > bj ? begin[bj] : (stride_dms[i] > 0 ? 0 : -1);
+        else
+            begin_dms[i] = stride_dms[i] > 0 ? 0 : -1;
+        bj++;
+        begin_dms[i] = begin_dms[i] >= 0 ? begin_dms[i] : src_dims[j] + begin_dms[i];
+        //  Clipping 'begin'
+        clipping(&begin_dms[i], 0, src_dims[j]);
+
+        if (!(end_mask.size() > j && end_mask[j] == 0)) {
+            int end_dms_tmp = end.size() > ej ? (stride_dms[i] > 0 ? end[ej] - 1 : end[ej] + 1) : end_dms[i];
+            end_dms[i] = end.size() > ej ? end_dms_tmp : (stride_dms[i] > 0 ? -1 : 0);
+        }
+        else {
+            end_dms[i] = stride_dms[i] > 0 ? -1 : 0;
+        }
+        ej++;
+        end_dms[i] = end_dms[i] >= 0 ? end_dms[i] : src_dims[j] + end_dms[i];
+        //  Clipping 'end'
+        clipping(&end_dms[i], 0, src_dims[j]);
+
+        if (!(new_axis_mask.size() > i && new_axis_mask[i] == 1))
+            j++;
+        else
+            end_dms[i] = 0;
+
+        if (shrink_axis_mask.size() > k && shrink_axis_mask[k] == 1)
+            end_dms[i] = begin_dms[i];
+        else
+            out_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) / static_cast<float>(abs(stride_dms[i])))));
+
+        our_dims.push_back(static_cast<int>(ceil(static_cast<float>(abs(end_dms[i] - begin_dms[i]) + 1) / static_cast<float>(abs(stride_dms[i])))));
+        k++;
+    }
+
+    size_t work_amount_dst = dstStrides[0] * dst_dims[0];
+    InferenceEngine::SizeVector counters(max_dims, 0);
+
+    for (size_t iwork = 0, dst_idx = 0; iwork < work_amount_dst; ++iwork) {
+        int src_idx = 0;
+        for (i = 0, j = 0; i < max_dims; ++i) {
+            src_idx += (begin_dms[i] + counters[i] * stride_dms[i]) * srcStrides[j];
+            if (!(new_axis_mask.size() > i && new_axis_mask[i] == 1)) j++;
+        }
+
+        dst_data[dst_idx++] = src_data[src_idx];
+
+        for (j = max_dims - 1; j >= 0; j--) {
+            counters[j] = (counters[j] + 1) % our_dims[j];
+            if (counters[j] != 0) break;
+        }
+    }
+}
+
+class MKLDNNCPUExtStridedSliceTests : public TestsCommon, public WithParamInterface<strided_slice_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="StridedSlice_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="input" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IN_
+                </port>
+            </output>
+        </layer>
+        <layer name="begin" type="Input" precision="I32" id="2">
+            <output>
+                <port id="2">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="end" type="Input" precision="I32" id="3">
+            <output>
+                <port id="3">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="strides" type="Input" precision="I32" id="4">
+            <output>
+                <port id="4">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="output" id="2" type="StridedSlice" precision="FP32">
+            <data _BEGIN_ _END_ _ELLIPSIS_ _NEW_AXIS_ _SHRINK_/>
+            <input>
+                <port id="1">
+                    _IN_
+                </port>
+                <port id="2">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+                <port id="3">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+                <port id="4">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </input>
+            <output>
+                <port id="5">
+                    _OUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
+        <edge from-layer="3" from-port="3" to-layer="2" to-port="3"/>
+        <edge from-layer="4" from-port="4" to-layer="2" to-port="4"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(strided_slice_test_params p) {
+        std::string model = model_t;
+        std::string in_shape;
+        std::string out_shape;
+        std::string begin;
+        std::string end;
+        std::string ellipsis;
+        std::string new_axis;
+        std::string shrink_axis;
+
+        for (size_t i = 0; i < p.in_shape.size(); i++) {
+            in_shape += "<dim>";
+            in_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
+        }
+        in_shape.pop_back();
+        REPLACE_WITH_STR(model, "_IN_", in_shape);
+        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.dim_size);
+
+        if (p.begin_mask.size()) {
+            begin = "begin_mask=\"";
+            for (auto& pb : p.begin_mask)
+                begin += std::to_string(pb) + ",";
+            begin.pop_back();
+            begin += "\"";
+        }
+        REPLACE_WITH_STR(model, "_BEGIN_", begin);
+
+        if (p.end_mask.size()) {
+            end = "end_mask=\"";
+            for (auto& pb : p.end_mask)
+                end += std::to_string(pb) + ",";
+            end.pop_back();
+            end += "\"";
+        }
+        REPLACE_WITH_STR(model, "_END_", end);
+
+        if (p.ellipsis_mask.size()) {
+            ellipsis = "ellipsis_mask=\"";
+            for (auto& pb : p.ellipsis_mask)
+                ellipsis += std::to_string(pb) + ",";
+            ellipsis.pop_back();
+            ellipsis += "\"";
+        }
+        REPLACE_WITH_STR(model, "_ELLIPSIS_", ellipsis);
+
+        if (p.new_axis_mask.size()) {
+            new_axis = "new_axis_mask=\"";
+            for (auto& pb : p.new_axis_mask)
+                new_axis += std::to_string(pb) + ",";
+            new_axis.pop_back();
+            new_axis += "\"";
+        }
+        REPLACE_WITH_STR(model, "_NEW_AXIS_", new_axis);
+
+        if (p.shrink_axis_mask.size()) {
+            shrink_axis = "shrink_axis_mask=\"";
+            for (auto& pb : p.shrink_axis_mask)
+                shrink_axis += std::to_string(pb) + ",";
+            shrink_axis.pop_back();
+            shrink_axis += "\"";
+        }
+        REPLACE_WITH_STR(model, "_SHRINK_", shrink_axis);
+
+        for (size_t i = 0; i < p.out_shape.size(); i++) {
+            out_shape += "<dim>";
+            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
+        }
+        out_shape.pop_back();
+        REPLACE_WITH_STR(model, "_OUT_", out_shape);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            strided_slice_test_params p = ::testing::WithParamInterface<strided_slice_test_params>::GetParam();
+            std::string model = getModel(p);
+            ////std::cout << model;
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            // Output Reference
+            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+            dst_ref.allocate();
+
+            // Input Data
+            InferenceEngine::Blob::Ptr src;
+            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
+            src->allocate();
+            fill_data_dbgval(src->buffer(), src->size());
+            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
+            if (srcPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            // Input Begin
+            InferenceEngine::Blob::Ptr beginIdx;
+            InferenceEngine::SizeVector begin_dim(1, p.begin.size());
+            beginIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, begin_dim, InferenceEngine::TensorDesc::getLayoutByDims(begin_dim) });
+            beginIdx->allocate();
+            if (p.begin.size())
+                memcpy(static_cast<int32_t*>(beginIdx->buffer()), &p.begin[0], sizeof(int32_t)*p.begin.size());
+            auto * beginIdxPtr = dynamic_cast<InferenceEngine::TBlob<int>*>(beginIdx.get());
+            if (beginIdxPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+            // Input End
+            InferenceEngine::Blob::Ptr endIdx;
+            InferenceEngine::SizeVector end_dim(1, p.end.size());
+            endIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, end_dim, InferenceEngine::TensorDesc::getLayoutByDims(end_dim) });
+            endIdx->allocate();
+            if (p.end.size())
+                memcpy(static_cast<int32_t*>(endIdx->buffer()), &p.end[0], sizeof(int32_t)*p.end.size());
+            auto * endIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(endIdx.get());
+            if (endIdxPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+            // Input Stride
+            InferenceEngine::Blob::Ptr stridesIdx;
+            InferenceEngine::SizeVector strides_dim(1, p.stride.size());
+            stridesIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, strides_dim, InferenceEngine::TensorDesc::getLayoutByDims(strides_dim) });
+            stridesIdx->allocate();
+            if (p.stride.size())
+                memcpy(static_cast<int32_t*>(stridesIdx->buffer()), &p.stride[0], sizeof(int32_t)*p.stride.size());
+            auto * stridesIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(stridesIdx.get());
+            if (stridesIdxPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+            // Check results
+            InferenceEngine::SizeVector out_dims;
+            ref_strided_slice(*srcPtr, dst_ref, out_dims, p.begin, p.end, p.stride, p.begin_mask, p.end_mask, p.ellipsis_mask, p.new_axis_mask, p.shrink_axis_mask);
+
+            //  Check results
+            if(out_dims.size() != p.out_shape.size())
+                FAIL() << "Wrong out_shape size!";
+            for (size_t i = 0; i < p.out_shape.size(); i++) {
+                if (out_dims[i] != p.out_shape[i])
+                    FAIL() << "Wrong out_shape dimensions!";
+            }
+            if (memcmp(dst_ref.data(), &p.reference[0], p.reference.size() * sizeof(float)) != 0)
+                FAIL() << "Wrong result with compare TF reference!";
+
+           InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("begin", beginIdx));
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("end", endIdx));
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("strides", stridesIdx));
+
+            // Infer
+            graph.Infer(srcs, outputBlobs);
+            compare(*output, dst_ref);
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+
+//  Test data vectors
+std::vector<float> test0 =  { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f };
+std::vector<float> test2 =  { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f };
+std::vector<float> test5 =  { 5.f, 6.f, 7.f, 8.f };
+std::vector<float> test6 =  { 0.f, 1.f, 2.f, 3.f, 4.f, 5.f };
+std::vector<float> test8 =  { 5.f, 4.f, 3.f, 2.f, 1.f };
+std::vector<float> test9 =  { 5.f, 4.f, 3.f, 2.f, 1.f, 0.f };
+std::vector<float> test10 = { 5.f, 4.f, 3.f };
+std::vector<float> test11 = { 0.f, 2.f, 4.f, 6.f, 8.f };
+std::vector<float> test12 = { 1.f, 3.f, 5.f, 7.f, 9.f };
+std::vector<float> test13 = { 9.f, 8.f, 7.f, 6.f, 5.f, 4.f, 3.f, 2.f, 1.f, 0.f };
+std::vector<float> test14 = { 9.f, 7.f, 5.f, 3.f, 1.f };
+std::vector<float> test16 = { 0.f, 1.f, 3.f, 4.f };
+std::vector<float> test17 = { 1.f, 4.f };
+std::vector<float> test19 = { 0.f, 1.f, 2.f, 3.f };
+std::vector<float> test20 = { 4.f, 5.f, 6.f, 7.f };
+/*
+0. [0,1,2,3,4,5,6,7,8,9], shape=[10]
+1. [0,1,2,3,4,5,6,7,8,9], shape=[10]
+2. [0,1,2,3,4,5,6,7,8], shape=[9]
+3. [0,1,2,3,4,5,6,7,8], shape=[9]
+4. [0,1,2,3,4,5,6,7,8,9], shape=[10]
+5. [5,6,7,8,9], shape=[5]
+6. [0,1,2,3,4,5], shape=[6]
+7. [5,6,7,8,9], shape=[5]
+8. [5,4,3,2,1], shape=[5]
+9. [5,4,3,2,1,0], shape=[6]
+10. [5,4,3], shape=[3]
+11. [0,2,4,6,8], shape=[5]
+12. [1,3,5,7,9], shape=[5]
+13. [9,8,7,6,5,4,3,2,1,0], shape=[10]
+14. [9,7,5,3,1], shape=[5]
+15. [[0,1,2,3,4,5,6,7,8,9]], shape=[1,10]
+16. [[[0,1,2],[3,4,5]]], shape=[1,2,2]
+17. [[[0,1,2],[3,4,5]]], shape=[1,2,1]
+18. [[[0,1,2],[3,4,5]]], shape=[1,1,2,1]
+19. [[[[0,1],[2,3]],[[4,5],[6,7]]]], shape=[1,2,2]
+20. [[[[0,1],[2,3]],[[4,5],[6,7]]]], shape=[1,2,2]
+21. [[[0,1,2],[3,4,5]]], shape=[1,1,2]
+*/
+
+TEST_P(MKLDNNCPUExtStridedSliceTests, TestsStridedSlice) {}
+INSTANTIATE_TEST_CASE_P(
+    TestsStridedSlice, MKLDNNCPUExtStridedSliceTests,
+            ::testing::Values(
+// Params: in_shape, dim_size, begin, end, stride, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, out_shape, reference
+/* 0 */         strided_slice_test_params{ { 10 }, 1, {}, {}, {}, {}, {}, {}, {}, {}, { 10 }, test0 },
+                strided_slice_test_params{ { 10 }, 1, {0}, {0}, {}, {}, {0}, {}, {}, {}, { 10 }, test0 },
+                strided_slice_test_params{ { 10 }, 1,{ -1 },{ -1 },{},{ 0 },{},{},{},{},{ 9 }, test2 },
+                strided_slice_test_params{ { 10 }, 1,{ 0 },{ -1 },{},{},{},{},{},{},{ 9 }, test2 },
+                strided_slice_test_params{ { 10 }, 1,{ 0 },{ 10 },{},{},{},{},{},{},{ 10 }, test0 },
+/* 5 */         strided_slice_test_params{ { 10 }, 1,{ 5 },{ 10 },{},{},{},{},{},{},{ 5 }, test5 },
+                strided_slice_test_params{ { 10 }, 1,{ 0 },{ 6 },{},{},{},{},{},{},{ 6 }, test6 },
+                strided_slice_test_params{ { 10 }, 1,{ -5 },{ 10 },{},{},{},{},{},{},{ 5 }, test5 },
+                strided_slice_test_params{ { 10 }, 1,{ -5 },{ 0 },{-1},{},{},{},{},{},{ 5 }, test8 },
+                strided_slice_test_params{ { 10 }, 1,{ -5 },{ 0 },{ -1 },{},{0},{},{},{},{ 6 }, test9 },
+/* 10 */        strided_slice_test_params{ { 10 }, 1,{ -5 },{ 2 },{ -1 },{},{},{},{},{},{ 3 }, test10 },
+                strided_slice_test_params{ { 10 }, 1,{ 0 },{ 0 },{ 2 },{},{0},{},{},{},{ 5 }, test11 },
+                strided_slice_test_params{ { 10 }, 1,{ 1 },{ 0 },{ 2 },{},{ 0 },{},{},{},{ 5 }, test12 },
+                strided_slice_test_params{ { 10 }, 1,{ -1 },{ 0 },{ -1 },{},{ 0 },{},{},{},{ 10 }, test13 },
+                strided_slice_test_params{ { 10 }, 1,{ -1 },{ 0 },{ -2 },{},{ 0 },{},{},{},{ 5 }, test14 },
+/* 15 */        strided_slice_test_params{ { 10 }, 1,{ 0 },{ 10 },{},{},{},{},{1},{},{ 1, 10 }, test0 },
+                strided_slice_test_params{ { 1, 2, 3 }, 2,{ 0, 0 },{ 1, 2 },{},{},{},{0, 1},{},{},{ 1, 2, 2 }, test16 },
+                strided_slice_test_params{ { 1, 2, 3 }, 4,{ 0, 0, 0, 1 },{ 2, 3, 2, 2 },{},{},{},{},{ 0,0,1,0 },{ 0,0,0,1 },{ 1,2,1 }, test17 },
+                strided_slice_test_params{ { 1, 2, 3 }, 3,{ 0, 0, 1 },{ 2, 2, 2 },{},{},{},{ 0, 1 },{ 1 },{},{ 1, 1, 2, 1 }, test17 },
+                strided_slice_test_params{ { 1, 2, 2, 2 }, 4,{},{},{},{ 0,1,0,0 },{ 0,1,0,0 },{},{},{ 0,1 },{ 1,2,2 }, test19 },
+/* 20 */        strided_slice_test_params{ { 1, 2, 2, 2 }, 4,{ 0,1,0,0 },{ 1,2,2,2 },{},{ 0,1,0,0 },{ 0,1,0,0 },{},{},{ 0,1,0,0 },{ 1,2,2 }, test20 },
+                strided_slice_test_params{ { 1, 2, 3 }, 3,{ 0, 0, 1 },{ 2, 2, 2 },{},{},{},{ 0, 1 },{ 1 },{ 0, 0, 1 },{ 1, 1, 2 }, test17 }
+            ));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/unsqueeze_tests.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/unsqueeze_tests.cpp
new file mode 100644
index 000000000..1b073beab
--- /dev/null
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/extensions/unsqueeze_tests.cpp
@@ -0,0 +1,235 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include <extension/ext_list.hpp>
+#include "tests_common.hpp"
+
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+
+struct unsqueeze_test_params {
+    std::string                 inIdxPrecision;
+    InferenceEngine::SizeVector in_shape;
+    std::vector<int32_t>        indices_to_set;
+    InferenceEngine::SizeVector out_shape;
+
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+void ref_unsqueeze(
+    InferenceEngine::TBlob<float> &src,
+    InferenceEngine::SizeVector &out_dims,
+    std::vector<int32_t> indices_to_set
+) {
+    InferenceEngine::SizeVector src_dims = src.getTensorDesc().getDims();
+
+    if (indices_to_set.size() == 0)
+        FAIL() << " Index vector should be 1 dimension";
+
+    size_t i, j, k, max = src_dims.size();
+    for (size_t i = 0; i < indices_to_set.size(); i++) {
+        if (indices_to_set[i] > max) max = indices_to_set[i];
+    }
+    max++;
+
+    if ((indices_to_set.size() + src_dims.size()) < max)
+        FAIL() << " Indices_to_set for unsqueeze layer is out of tensor dimension";
+
+    max = indices_to_set.size() + src_dims.size();
+    for (i = 0, j = 0, k = 0; i < max; i++) {
+        if (k < indices_to_set.size() && i == indices_to_set[k]) {
+            out_dims.push_back(1);
+            k++;
+        } else {
+            out_dims.push_back(src_dims[j++]);
+        }
+    }
+}
+
+class MKLDNNCPUExtUnsqueezeTests : public TestsCommon, public WithParamInterface<unsqueeze_test_params> {
+    std::string model_t = R"V0G0N(
+<net Name="Unsqueeze_net" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="input" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">
+                    _IN_
+                </port>
+            </output>
+        </layer>
+        <layer name="indices_to_set" type="Input" precision="_IIDXP_" id="2">
+            <output>
+                <port id="2">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="output" id="2" type="Unsqueeze" precision="FP32">
+            <data/>
+            <input>
+                <port id="1">
+                    _IN_
+                </port>
+                <port id="2">
+                    <dim>_DIM_SIZE_</dim>
+                </port>
+            </input>
+            <output>
+                <port id="3">
+                    _OUT_
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="2" to-port="2"/>
+    </edges>
+</net>
+)V0G0N";
+
+    std::string getModel(unsqueeze_test_params p) {
+        std::string model = model_t;
+        std::string in_shape;
+        std::string out_shape;
+
+        for (size_t i = 0; i < p.in_shape.size(); i++) {
+            in_shape += "<dim>";
+            in_shape += std::to_string(p.in_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_IN_", in_shape);
+        REPLACE_WITH_STR(model, "_IIDXP_", p.inIdxPrecision);
+        REPLACE_WITH_NUM(model, "_DIM_SIZE_", p.indices_to_set.size());
+        for (size_t i = 0; i < p.out_shape.size(); i++) {
+            out_shape += "<dim>";
+            out_shape += std::to_string(p.out_shape[i]) + "</dim>\n";
+        }
+        REPLACE_WITH_STR(model, "_OUT_", out_shape);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            unsqueeze_test_params p = ::testing::WithParamInterface<unsqueeze_test_params>::GetParam();
+            std::string model = getModel(p);
+            ////std::cout << model;
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            InferenceEngine::Extension cpuExt(make_so_name("cpu_extension"));
+            MKLDNNPlugin::MKLDNNExtensionManager::Ptr extMgr(new MKLDNNPlugin::MKLDNNExtensionManager());
+            extMgr->AddExtension(InferenceEngine::IExtensionPtr(&cpuExt, [](InferenceEngine::IExtension*){}));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork(), extMgr);
+
+            // Output Data
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            // Output Reference
+            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+            dst_ref.allocate();
+
+            // Input Data
+            InferenceEngine::Blob::Ptr src;
+            src = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, p.in_shape, InferenceEngine::TensorDesc::getLayoutByDims(p.in_shape) });
+            src->allocate();
+            fill_data_dbgval(src->buffer(), src->size());
+            auto * srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
+            if (srcPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+
+            InferenceEngine::Blob::Ptr seq_lengthsIdx;
+            InferenceEngine::SizeVector seq_lengths_dim(1, p.indices_to_set.size());
+            if (p.inIdxPrecision == "I32") {
+                seq_lengthsIdx = InferenceEngine::make_shared_blob<int32_t>({ InferenceEngine::Precision::I32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
+                seq_lengthsIdx->allocate();
+                if (p.indices_to_set.size())
+                    memcpy(static_cast<int32_t*>(seq_lengthsIdx->buffer()), &p.indices_to_set[0], sizeof(int32_t)*p.indices_to_set.size());
+                auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<int32_t>*>(seq_lengthsIdx.get());
+                if (seq_lengthsIdxPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<int32_t>.";
+
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("indices_to_set", seq_lengthsIdx));
+            } else if (p.inIdxPrecision == "FP32") {
+                seq_lengthsIdx = InferenceEngine::make_shared_blob<float>({ InferenceEngine::Precision::FP32, seq_lengths_dim, InferenceEngine::TensorDesc::getLayoutByDims(seq_lengths_dim) });
+                seq_lengthsIdx->allocate();
+                if (p.indices_to_set.size())
+                    for (size_t i = 0; i < p.indices_to_set.size(); i++) {
+                        static_cast<float *>(seq_lengthsIdx->buffer())[i] = static_cast<float>(p.indices_to_set[i]);
+                    }
+                auto * seq_lengthsIdxPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(seq_lengthsIdx.get());
+                if (seq_lengthsIdxPtr == nullptr)
+                    FAIL() << "Cannot cast blob to TBlob<float>.";
+
+                srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("indices_to_set", seq_lengthsIdx));
+            }
+            else {
+                return;
+            }
+
+            // Check results
+            InferenceEngine::SizeVector out_dims;
+            ref_unsqueeze(*srcPtr, out_dims, p.indices_to_set);
+            if (out_dims.size() != p.out_shape.size())
+                FAIL() << "Wrong out_shape size!";
+            for (size_t i = 0; i < p.out_shape.size(); i++) {
+                if (out_dims[i] != p.out_shape[i])
+                    FAIL() << "Wrong out_shape dimensions!";
+            }
+
+            // Infer
+            graph.Infer(srcs, outputBlobs);
+            compare(*output, *src);
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNCPUExtUnsqueezeTests, TestsUnsqueeze) {}
+
+INSTANTIATE_TEST_CASE_P(
+    TestsUnsqueeze, MKLDNNCPUExtUnsqueezeTests,
+            ::testing::Values(
+// Params: inIdxPrecision, in_shape, indices_to_set, out_shape
+                unsqueeze_test_params{ "I32",{ 3 },{ 0 },{ 1, 3 } },
+                unsqueeze_test_params{ "I32",{ 3 },{ 0, 1, 2 },{ 1, 1, 1, 3 } },
+                unsqueeze_test_params{ "I32",{ 3 },{ 0, 2, 3 },{ 1, 3, 1, 1 } },
+                unsqueeze_test_params{ "I32",{ 2, 3 },{ 0, 3 },{ 1, 2, 3, 1 } },
+                unsqueeze_test_params{ "I32",{ 2, 3 },{ 1 },{ 2, 1, 3 } },
+                unsqueeze_test_params{"FP32",{ 3 },{ 0 },{ 1, 3 } },
+                unsqueeze_test_params{"FP32",{ 3 },{ 0, 1, 2 },{ 1, 1, 1, 3 } },
+                unsqueeze_test_params{"FP32",{ 3 },{ 0, 2, 3 },{ 1, 3, 1, 1 } },
+                unsqueeze_test_params{"FP32",{ 2, 3 },{ 0, 3 },{ 1, 2, 3, 1 } },
+                unsqueeze_test_params{"FP32",{ 2, 3 },{ 1 },{ 2, 1, 3 } }
+            ));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp
index a0898b599..227f6323e 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_activation_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp
index 544f51a73..979796f00 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_scaleshift_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp
index 6920b55c4..450abbe3a 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_batchnorm_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp
index 7396700b8..e9c7eec1a 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_concat_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp
index dbfbc06a3..7eae8c435 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_conv_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -356,15 +356,6 @@ INSTANTIATE_TEST_CASE_P(
                                  {3, 3}, {1, 2}, {0, 0}, {0, 0}, 20, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit },
                 conv_test_params{{1, 1, 32, 16},
                                  {2, 4}, {2, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit },
-#ifdef USE_MKL
-                conv_test_params{{1, 9, 16, 32},
-                                 {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 6, MKLDNNPlugin::impl_desc_type::gemm,
-                                 {MKLDNNPlugin::impl_desc_type::gemm_any,
-                                  MKLDNNPlugin::impl_desc_type::gemm_blas,
-                                  MKLDNNPlugin::impl_desc_type::gemm_avx512,
-                                  MKLDNNPlugin::impl_desc_type::gemm_avx2,
-                                  MKLDNNPlugin::impl_desc_type::gemm_sse42} },
-#endif
                 conv_test_params{{1, 9, 32, 16},
                                  {2, 4}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::ref_any,
                                  {MKLDNNPlugin::impl_desc_type::ref_any} },
@@ -372,7 +363,7 @@ INSTANTIATE_TEST_CASE_P(
                                  {3, 3}, {1, 1}, {1, 1}, {0, 0}, 64, 1, "", 3, MKLDNNPlugin::impl_desc_type::ref_any,
                                  {MKLDNNPlugin::impl_desc_type::jit_avx512_winograd, MKLDNNPlugin::impl_desc_type::ref_any}},
                 // 5D
-        /*9*/   conv_test_params{{1, 3, 15, 20, 20},
+        /*8*/   conv_test_params{{1, 3, 15, 20, 20},
                                  {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::ref_any,
                                  {MKLDNNPlugin::impl_desc_type::ref_any} },
                 conv_test_params{{1, 24, 15, 20, 20},
@@ -385,9 +376,16 @@ INSTANTIATE_TEST_CASE_P(
                                  {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit },
                 conv_test_params{{1, 24, 15, 25, 20},
                                  {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit },
-        /*14*/  conv_test_params{{1, 32, 15, 25, 20},
+        /*13*/  conv_test_params{{1, 32, 15, 25, 20},
                                  {3, 3, 3}, {2, 2, 2}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::jit },
 #ifdef USE_MKL
+                conv_test_params{{1, 9, 16, 32},
+                                 {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 6, MKLDNNPlugin::impl_desc_type::gemm,
+                                 {MKLDNNPlugin::impl_desc_type::gemm_any,
+                                  MKLDNNPlugin::impl_desc_type::gemm_blas,
+                                  MKLDNNPlugin::impl_desc_type::gemm_avx512,
+                                  MKLDNNPlugin::impl_desc_type::gemm_avx2,
+                                  MKLDNNPlugin::impl_desc_type::gemm_sse42} },
                 conv_test_params{{1, 5, 15, 20, 20},
                                  {3, 3, 3}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 64, 1, "", 2, MKLDNNPlugin::impl_desc_type::gemm_blas },
                 conv_test_params{{1, 5, 15, 20, 20},
@@ -406,7 +404,6 @@ INSTANTIATE_TEST_CASE_P(
                                  {5, 5, 5}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, 16, 1, "", 2, MKLDNNPlugin::impl_desc_type::ref_any,
                                  {MKLDNNPlugin::impl_desc_type::ref_any} }));
 
-
 class MKLDNNGraphDynBatchConvolutionTests: public MKLDNNGraphConvolutionTests {
 protected:
     virtual void SetUp() {
@@ -515,6 +512,7 @@ INSTANTIATE_TEST_CASE_P(
                 conv_test_params{{1, 1, 32, 16},
                                  {2, 4}, {2, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::jit,
                                  {MKLDNNPlugin::impl_desc_type::jit_avx512_winograd} },
+#ifdef USE_MKL
                 conv_test_params{{1, 9, 16, 32},
                                  {1, 1}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 7, MKLDNNPlugin::impl_desc_type::gemm,
                                  {MKLDNNPlugin::impl_desc_type::gemm_any,
@@ -523,5 +521,6 @@ INSTANTIATE_TEST_CASE_P(
                                   MKLDNNPlugin::impl_desc_type::gemm_avx2,
                                   MKLDNNPlugin::impl_desc_type::gemm_sse42}
                 },
+#endif
                 conv_test_params{{1, 9, 32, 16},
                                  {2, 4}, {1, 1}, {0, 0}, {0, 0}, 17, 1, "", 5, MKLDNNPlugin::impl_desc_type::ref_any, {MKLDNNPlugin::impl_desc_type::ref_any} }));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp
index 545ac154f..13719000a 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_crop_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp
index b26351158..d416f81ed 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_deconv_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -11,6 +11,7 @@
 #include "single_layer_common.hpp"
 #include <mkldnn_plugin/mkldnn_extension_utils.h>
 #include <inference_engine/cnn_network_impl.hpp>
+#include "ir_gen_helper.hpp"
 #include "tests_common.hpp"
 
 
@@ -18,6 +19,7 @@ using namespace InferenceEngine;
 using namespace ::testing;
 using namespace std;
 using namespace mkldnn;
+using namespace single_layer_tests;
 
 
 struct deconv_test_params {
@@ -69,8 +71,8 @@ void ref_deconv(const InferenceEngine::TBlob<data_t> &src, const InferenceEngine
 
     size_t OC = prm.out_c;
 
-    size_t OW = SW * (IW - 1) + KW - 2 * PW;
-    size_t OH = SH * (IH - 1) + KH - 2 * PH;
+    size_t OW = SW * (IW - 1lu) + KW - 2lu * PW;
+    size_t OH = SH * (IH - 1lu) + KH - 2lu * PH;
     size_t OD = dims_size == 5 ? (SD * (ID - 1) + KD - 2 * PD) : 1u;
 
     const data_t *src_data = src.readOnly();
@@ -86,61 +88,70 @@ void ref_deconv(const InferenceEngine::TBlob<data_t> &src, const InferenceEngine
     size_t CI1 = IH * IW;
     size_t CI2 = CI1 * ID;
     size_t CI3 = CI2 * IC;
+    
+    size_t OC_G = OC / G;
+    size_t IC_G = IC / G;
 
     size_t CK1 = KH * KW;
     size_t CK2 = CK1 * KD;
-    size_t CK3 = CK2 * (OC / G);
-    size_t CK4 = CK3 * (IC / G);
-
-    for (int g = 0; g < G; ++g) {
-        for (int mb = 0; mb < MB; ++mb) {
-            for (int oc = 0; oc < OC / G; ++oc) {
-                for (int od = 0; od < OD; ++od) {
-                    for (int oh = 0; oh < OH; ++oh) {
-                        for (int ow = 0; ow < OW; ++ow) {
-                            size_t didx = mb * CS3
-                                          + (g * OC / G + oc) * CS2
-                                          + od * CS1
-                                          + oh * OW
-                                          + ow;
+    size_t CK3 = CK2 * OC_G;
+    size_t CK4 = CK3 * IC_G;
+
+    for (size_t g = 0lu; g < G; ++g) {
+        size_t g_OC_G = g * OC_G;
+        size_t g_IC_G = g * IC_G;
+        size_t g_CK4 = g * CK4;
+        for (size_t mb = 0lu; mb < MB; ++mb) {
+            size_t mb_CS3 = mb * CS3;
+            size_t mb_CI3 = mb * CI3;
+            for (size_t oc = 0lu; oc < OC_G; ++oc) {
+                size_t g_OC_G_oc = g_OC_G + oc;
+                size_t mb_CS3_g_OC_G_oc_CS2 = mb_CS3 + g_OC_G_oc * CS2;
+                size_t g_CK4_oc_CK2 = g_CK4 + oc * CK2;
+                for (size_t od = 0lu; od < OD; ++od) {
+                    size_t mb_CS3_g_OC_G_oc_CS2_od_CS1 = mb_CS3_g_OC_G_oc_CS2 + od * CS1;
+                    size_t od_PD = od + PD;
+                    for (size_t oh = 0lu; oh < OH; ++oh) {
+                        size_t mb_CS3_g_OC_G_oc_CS2_od_CS1_oh_OW = mb_CS3_g_OC_G_oc_CS2_od_CS1 + oh * OW;
+                        size_t oh_PH = oh + PH;
+                        for (size_t ow = 0lu; ow < OW; ++ow) {
+                            size_t didx = mb_CS3_g_OC_G_oc_CS2_od_CS1_oh_OW + ow;
+                            size_t ow_PW = ow + PW;
 
                             dst_data[didx] = data_t(0);
-                            if (prm.with_bias) dst_data[didx] += bias_data[g * OC / G + oc];
-
-                            for (int ic = 0; ic < IC / G; ic++) {
-                                for (int kd = 0; kd < KD; kd++) {
-                                    for (int kh = 0; kh < KH; kh++) {
-                                        for (int kw = 0; kw < KW; kw++) {
-                                            if (ow + PW < kw || oh + PH < kh || od + PD < kd)
-                                                continue;
+                            if (prm.with_bias) dst_data[didx] += bias_data[g_OC_G_oc];
+
+                            for (size_t ic = 0lu; ic < IC_G; ic++) {
+                                size_t mb_CI3_g_IC_G_ic_CI2 = mb_CI3 + (g_IC_G + ic) * CI2;
+                                size_t g_CK4_oc_CK2_ic_CK3 = g_CK4_oc_CK2 + ic * CK3;
+                                for (int kd = 0lu; kd < KD; kd++) {
+                                    if (od_PD < kd) continue;
+                                    size_t id = od_PD - kd;
+                                    if (id % SD != 0) continue;
+                                    id /= SD;
+                                    if (id >= ID) continue;
+                                    size_t mb_CI3_g_IC_G_ic_CI2_id_CI1 = mb_CI3_g_IC_G_ic_CI2 + id * CI1;
+                                    size_t g_CK4_oc_CK2_ic_CK3_kd_CK1 = g_CK4_oc_CK2_ic_CK3 + kd * CK1;
+                                    for (size_t kh = 0lu; kh < KH; kh++) {
+                                        if (oh_PH < kh) continue;
+                                        size_t ih = oh_PH - kh;
+                                        if (ih % SH != 0) continue;
+                                        ih /= SH;
+                                        if (ih >= IH) continue;
+                                        size_t mb_CI3_g_IC_G_ic_CI2_id_CI1_ih_IW = mb_CI3_g_IC_G_ic_CI2_id_CI1 + ih * IW;
+                                        size_t g_CK4_oc_CK2_ic_CK3_kd_CK1_kh_KW = g_CK4_oc_CK2_ic_CK3_kd_CK1 + kh * KW;
+                                        for (size_t kw = 0lu; kw < KW; kw++) {
+                                            if (ow_PW < kw) continue;
+                                            size_t iw = ow_PW - kw;
+                                            if (iw % SW != 0) continue;
+                                            iw /= SW;
+                                            if (iw >= IW) continue;
 
-                                            size_t iw = ow - kw + PW;
-                                            size_t ih = oh - kh + PH;
-                                            size_t id = od - kd + PD;
+                                            size_t sidx = mb_CI3_g_IC_G_ic_CI2_id_CI1_ih_IW + iw;
 
-                                            if (iw % SW != 0 || ih % SH != 0 || id % SD != 0)
-                                                continue;
+                                            size_t widx = g_CK4_oc_CK2_ic_CK3_kd_CK1_kh_KW + kw;
 
-                                            iw /= SW;
-                                            ih /= SH;
-                                            id /= SD;
-
-                                            if (ih < IH && iw < IW && id < ID) {
-                                                size_t sidx = mb * CI3
-                                                              + (g * IC / G + ic) * CI2
-                                                              + id * CI1
-                                                              + ih * IW
-                                                              + iw;
-
-                                                size_t widx = g * CK4
-                                                              + ic * CK3
-                                                              + oc * CK2
-                                                              + kd * CK1
-                                                              + kh * KW
-                                                              + kw;
-
-                                                dst_data[didx] += src_data[sidx] * weights_data[widx];
-                                            }
+                                            dst_data[didx] += src_data[sidx] * weights_data[widx];
                                         }
                                     }
                                 }
@@ -155,15 +166,7 @@ void ref_deconv(const InferenceEngine::TBlob<data_t> &src, const InferenceEngine
 
 class MKLDNNGraphDeconvolutionalTests: public TestsCommon,
                                      public WithParamInterface<deconv_test_params> {
-    std::string model_t_5D = R"V0G0N(
-<net name="Deconvolution_Only" version="3" precision="FP32" batch="1">
-    <layers>
-        <layer name="in1" type="Input" precision="FP32" id="0">
-            <output>
-                <port id="0">__SRC_DIMS__
-                </port>
-            </output>
-        </layer>
+    std::string layers_t = R"V0G0N(
         <layer name="deconv1" id="1" type="Deconvolution" precision="FP32">
             <deconvolution _AP_ kernel="_K_"
                          pads_begin="_PB_"  pads_end="_PE_"
@@ -174,27 +177,28 @@ class MKLDNNGraphDeconvolutionalTests: public TestsCommon,
             <biases offset="_S1_" size="_S2_" />
 
             <input>
-                <port id="1">__SRC_DIMS__
+                <port id="1">
+                    __SRC_DIMS__
                 </port>
             </input>
             <output>
                 <port id="2">
                     <dim>_IN_</dim>
-                    <dim>_OC_</dim>__DST_DIMS__
+                    <dim>_OC_</dim>
+                    __DST_DIMS__
                 </port>
             </output>
         </layer>
-    </layers>
-    <edges>
+)V0G0N";
+
+    std::string edges_t = R"V0G0N(
         <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
-    </edges>
-</net>
 )V0G0N";
 
 protected:
     std::string getModel(deconv_test_params p) {
-        std::string model = model_t_5D;
-        auto dims_size = p.dims.size();
+        std::string model = layers_t;
+
         std::string s_dims;
         for (auto& dim : p.dims) {
             s_dims += "\n                    <dim>";
@@ -243,6 +247,8 @@ protected:
         }
         REPLACE_WITH_STR(model, "_IMPLS_", impls);
 
+        model = IRTemplateGenerator::getIRTemplate("Deconvolution_Only", p.dims, "FP32", model, edges_t);
+
         return model;
     }
 
@@ -308,16 +314,8 @@ protected:
 
             InferenceEngine::SizeVector dims_src = p.dims;
 
-            InferenceEngine::Layout layout = ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout, dims_src);
+            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
+                    InferenceEngine::Precision::FP32, InferenceEngine::TensorDesc::getLayoutByDims(p.dims), dims_src);
             src->allocate();
             fill_data(src->buffer(), src->size());
 
@@ -362,32 +360,28 @@ INSTANTIATE_TEST_CASE_P(
         ::testing::Values(
         /*0*/   deconv_test_params{{1, 3, 3, 3}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{3, 3, 3, 3}, {4, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {1, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{4, 17, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
                 deconv_test_params{{2, 8, 5, 5}, {8, 8}, {4, 4}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
                 deconv_test_params{{2, 8, 5, 5}, {4, 8}, {2, 4}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
-        /*8*/   deconv_test_params{{1, 3, 3, 3}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::jit} },
+        /*5*/   deconv_test_params{{1, 3, 3, 3}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{3, 3, 3, 3}, {4, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {1, 2}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
-                deconv_test_params{{4, 17, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 8, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
                 deconv_test_params{{2, 8, 5, 5}, {8, 8}, {4, 4}, {1, 1}, {0, 0}, 8, 8, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
                 deconv_test_params{{2, 8, 5, 5}, {4, 8}, {2, 4}, {1, 1}, {0, 0}, 8, 8, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
                 deconv_test_params{{1, 3, 3, 3}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::ref_any}, 
                                     {MKLDNNPlugin::impl_desc_type::ref_any}},
-        /*17*/  deconv_test_params{{2, 8, 5, 5}, {1, 3}, {1, 1}, {0, 1}, {0, 1}, 8, 8, true, "", 2,
+        /*11*/  deconv_test_params{{2, 8, 5, 5}, {1, 3}, {1, 1}, {0, 1}, {0, 1}, 8, 8, true, "", 2,
                     {MKLDNNPlugin::impl_desc_type::ref_any}, {MKLDNNPlugin::impl_desc_type::ref_any}},
                 deconv_test_params{{1, 6, 6, 5}, {3, 1}, {1, 1}, {1, 0}, {1, 0}, 9, 3, true, "", 2,
                     {MKLDNNPlugin::impl_desc_type::ref_any}, {MKLDNNPlugin::impl_desc_type::ref_any}},
-                deconv_test_params{{2, 24, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 24, 3, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
-                deconv_test_params{{2, 24, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 24, 1, true, "", 3, {MKLDNNPlugin::impl_desc_type::jit}},
-                deconv_test_params{{2, 72, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 72, 3, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
-                deconv_test_params{{1, 12, 2, 2}, {4, 4}, {2, 2}, {1, 1}, {1, 1}, 12, 12, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
 #ifdef USE_MKL
+                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {1, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
+                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
+                deconv_test_params{{4, 17, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 2, false, "", 3, {MKLDNNPlugin::impl_desc_type::gemm}},
+                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {1, 2}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
+                deconv_test_params{{1, 3, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
+                deconv_test_params{{4, 17, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, true, "", 2, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 2, true, "", 3, {MKLDNNPlugin::impl_desc_type::gemm}},
                 deconv_test_params{{1, 6, 6, 5}, {3, 1}, {1, 1}, {1, 0}, {1, 0}, 9, 3, true, "", 2,
                     {MKLDNNPlugin::impl_desc_type::gemm_blas}},
@@ -396,7 +390,7 @@ INSTANTIATE_TEST_CASE_P(
                 deconv_test_params{{1, 32, 12, 12, 2}, {2, 2, 2}, {2, 2, 2}, {0, 0, 0}, {1, 0, 0}, 16, 1, true, "", 4, 
                     {MKLDNNPlugin::impl_desc_type::gemm_blas} },
                 deconv_test_params{{1, 25, 1, 1, 1}, {4, 4, 4}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 64, 1, true, "valid", 3,
-                    {MKLDNNPlugin::impl_desc_type::gemm_blas} },
+                    {MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{1, 32, 16, 16, 16}, {4, 4, 4}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, 1, 1, true, "same_upper", 3,
                     {MKLDNNPlugin::impl_desc_type::gemm_blas} },
                 deconv_test_params{{1, 64, 12, 12, 2}, {2, 2, 2}, {2, 2, 2}, {0, 0, 0}, {1, 0, 0}, 32, 1, true, "same_upper", 3,
@@ -404,10 +398,13 @@ INSTANTIATE_TEST_CASE_P(
                 deconv_test_params{{1, 50, 1, 1, 1}, {4, 4, 4}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 128, 1, true, "", 3,
                     {MKLDNNPlugin::impl_desc_type::gemm_blas}, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
 #endif
+                deconv_test_params{{2, 24, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 24, 3, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
+                deconv_test_params{{2, 24, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 24, 1, true, "", 3, {MKLDNNPlugin::impl_desc_type::jit}},
+                deconv_test_params{{2, 72, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 72, 3, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
+                deconv_test_params{{1, 12, 2, 2}, {4, 4}, {2, 2}, {1, 1}, {1, 1}, 12, 12, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
                 // 5D
                 deconv_test_params{{1, 2, 8, 5, 5}, {3, 3, 3}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, 4, 1, true, "", 4,
                     {MKLDNNPlugin::impl_desc_type::ref_any}, {MKLDNNPlugin::impl_desc_type::ref_any} }
-
                 // Blocked, with biases
                 // TODO support on jit
 //                deconv_test_params{{2, 24, 5, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 24, 3, true, "", 4, {MKLDNNPlugin::impl_desc_type::jit}},
@@ -471,18 +468,8 @@ protected:
             graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
             graph.CreateGraph(net_reader.getNetwork());
 
-            InferenceEngine::SizeVector dims_src = p.dims;
-
-            InferenceEngine::Layout layout = ANY;
-            switch (p.dims.size()) {
-                case 4:
-                    layout = InferenceEngine::NCHW;
-                    break;
-                case 5:
-                    layout = InferenceEngine::NCDHW;
-                    break;
-            }
-            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout, dims_src);
+            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
+                    InferenceEngine::Precision::FP32, InferenceEngine::TensorDesc::getLayoutByDims(p.dims), p.dims);
             InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
             if (srcPtr == nullptr)
                 FAIL() << "Cannot cast blob to TBlob<float>.";
@@ -523,10 +510,12 @@ INSTANTIATE_TEST_CASE_P(
         ::testing::Values(
                 deconv_test_params{{1, 3, 3, 3}, {3, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, false, "", 5, {MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{3, 3, 3, 3}, {4, 3}, {1, 1}, {0, 0}, {0, 0}, 2, 1, false, "", 5, {MKLDNNPlugin::impl_desc_type::jit} },
+#ifdef USE_MKL
                 deconv_test_params{{1, 3, 3, 3}, {4, 3}, {1, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 4, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{1, 3, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 3, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{4, 17, 3, 3}, {4, 3}, {2, 2}, {0, 0}, {0, 0}, 2, 1, false, "", 3, {MKLDNNPlugin::impl_desc_type::gemm, MKLDNNPlugin::impl_desc_type::jit} },
                 deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 2, false, "", 3, {MKLDNNPlugin::impl_desc_type::gemm}},
+#endif
                 deconv_test_params{{2, 8, 5, 5}, {4, 4}, {2, 2}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
                 deconv_test_params{{2, 8, 5, 5}, {8, 8}, {4, 4}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}},
                 deconv_test_params{{2, 8, 5, 5}, {4, 8}, {2, 4}, {1, 1}, {0, 0}, 8, 8, false, "", 4, {MKLDNNPlugin::impl_desc_type::jit | MKLDNNPlugin::impl_desc_type::_dw}}
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp
index 27bd24195..f7c1368b8 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_depthwise_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp
index e1d288db2..38f95cad6 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_eltwise_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -19,10 +19,14 @@ using namespace mkldnn;
 
 struct eltwise_test_params {
     // Formats: NCHW, NCDHW
-    vector<size_t> dims;
+    vector<size_t> dims1;
+    vector<size_t> dims2;
+    vector<size_t> dims3;
 
     enum opType {
-        Sum = 0, Prod = 1, Max = 2
+        Sum = 0, Prod, Max, Min, Sub, Div, Squared_diff, Floor_mod, Pow,
+        Logical_AND, Logical_OR, Logical_XOR,
+        Less, Less_equal, Greater, Greater_equal, Equal, Not_equal
     };
 
     opType op;
@@ -55,74 +59,235 @@ void ref_eltwise(const std::vector<InferenceEngine::TBlob<data_t>> &src, Inferen
     data_t *dst_data = dst.data();
 
     const data_t *src_data = src[0].readOnly();
+    auto& dims = dst.getTensorDesc().getDims();
+    auto& dims0 = src[0].dims();
 
-    for (int i = 0; i < src[0].size(); i++) {
-        switch (prm.op) {
-            case eltwise_test_params::Sum: {
-                dst_data[i] = scales[0]*src_data[i];
+    int offset_in[5] = {1, 1, 1, 1, 1};
+    int offset_out[5] = {1, 1, 1, 1, 1};
+
+    for (int i = 0; i < dims0.size(); i++)
+        offset_in[5 - dims0.size() + i] = dims0[i];
+    for (int i = 0; i < dims.size(); i++)
+        offset_out[5 - dims.size() + i] = dims[i];
+
+    unsigned long j = 0, k = 0;
+
+    for (int i0 = 0; i0 < offset_out[0]; i0++) {
+        if (i0 > offset_in[0] - 1) {
+            k -= offset_in[1]*offset_in[2]*offset_in[3]*offset_in[4];
+        }
+        for (int i1 = 0; i1 < offset_out[1]; i1++) {
+            if (i1 > offset_in[1] - 1) {
+                k -= offset_in[2]*offset_in[3]*offset_in[4];
             }
-                break;
-            default: {
-                dst_data[i] = src_data[i];
+            for (int i2 = 0; i2 < offset_out[2]; i2++) {
+                if (i2 > offset_in[2] - 1) {
+                    k -= offset_in[3]*offset_in[4];
+                }
+                for (int i3 = 0; i3 < offset_out[3]; i3++) {
+                    if (i3 > offset_in[3] - 1) {
+                        k -= offset_in[4];
+                    }
+                    for (int i4 = 0; i4 < offset_out[4]; i4++) {
+                        if (i4 > offset_in[4] - 1) {
+                            k -= 1;
+                        }
+                        if (prm.op == eltwise_test_params::Sum) {
+                            dst_data[j++] = scales[0] * src_data[k++];
+                        } else {
+                            dst_data[j++] = src_data[k++];
+                        }
+                    }
+                }
             }
         }
     }
 
     for (int n = 1; n < src.size(); n++) {
+        j = 0;
+        k = 0;
         src_data = src[n].readOnly();
-
-        for (int i = 0; i < src[n].size(); i++) {
-            switch (prm.op) {
-                case eltwise_test_params::Sum: {
-                    dst_data[i] += scales[n]*src_data[i];
-                }
-                    break;
-
-                case eltwise_test_params::Prod: {
-                    dst_data[i] *= src_data[i];
+        auto& dims1 = src[n].dims();
+        int offset_in1[5] = {1, 1, 1, 1, 1};
+        for (int i = 0; i < dims1.size(); i++)
+            offset_in1[5 - dims1.size() + i] = dims1[i];
+
+        for (int i0 = 0; i0 < offset_out[0]; i0++) {
+            if (i0 > offset_in1[0] - 1) {
+                k -= offset_in1[1]*offset_in1[2]*offset_in1[3]*offset_in1[4];
+            }
+            for (int i1 = 0; i1 < offset_out[1]; i1++) {
+                if (i1 > offset_in1[1] - 1) {
+                    k -= offset_in1[2]*offset_in1[3]*offset_in1[4];
                 }
-                    break;
-
-                case eltwise_test_params::Max: {
-                    dst_data[i] = (std::max)(dst_data[i], src_data[i]);
+                for (int i2 = 0; i2 < offset_out[2]; i2++) {
+                    if (i2 > offset_in1[2] - 1) {
+                        k -= offset_in1[3]*offset_in1[4];
+                    }
+                    for (int i3 = 0; i3 < offset_out[3]; i3++) {
+                        if (i3 > offset_in1[3] - 1) {
+                            k -= offset_in1[4];
+                        }
+                        for (int i4 = 0; i4 < offset_out[4]; i4++, j++, k++) {
+                            if (i4 > offset_in1[4] - 1) {
+                                k -= 1;
+                            }
+                            switch (prm.op) {
+                                case eltwise_test_params::Sum:
+                                    dst_data[j] += scales[n] * src_data[k];
+                                    break;
+                                case eltwise_test_params::Sub:
+                                    dst_data[j] = dst_data[j] - src_data[k];
+                                    break;
+                                case eltwise_test_params::Min:
+                                    dst_data[j] = (std::min)(dst_data[j], src_data[k]);
+                                    break;
+                                case eltwise_test_params::Max:
+                                    dst_data[j] = (std::max)(dst_data[j], src_data[k]);
+                                    break;
+                                case eltwise_test_params::Prod:
+                                    dst_data[j] = dst_data[j] * src_data[k];
+                                    break;
+                                case eltwise_test_params::Div:
+                                    dst_data[j] = dst_data[j] / src_data[k];
+                                    break;
+                                case eltwise_test_params::Squared_diff:
+                                    dst_data[j] = (dst_data[j] - src_data[k]) * (dst_data[j] - src_data[k]);
+                                    break;
+                                case eltwise_test_params::Logical_OR:
+                                    dst_data[j] = dst_data[j] || src_data[k];
+                                    break;
+                                case eltwise_test_params::Logical_AND:
+                                    dst_data[j] = dst_data[j] && src_data[k];
+                                    break;
+                                case eltwise_test_params::Logical_XOR:
+                                    dst_data[j] = (dst_data[j] || src_data[k]) - (dst_data[j] && src_data[k]);
+                                    break;
+                                case eltwise_test_params::Less:
+                                    dst_data[j] = dst_data[j] < src_data[k];
+                                    break;
+                                case eltwise_test_params::Less_equal:
+                                    dst_data[j] = dst_data[j] <= src_data[k];
+                                    break;
+                                case eltwise_test_params::Greater:
+                                    dst_data[j] = dst_data[j] > src_data[k];
+                                    break;
+                                case eltwise_test_params::Greater_equal:
+                                    dst_data[j] = dst_data[j] >= src_data[k];
+                                    break;
+                                case eltwise_test_params::Equal:
+                                    dst_data[j] = dst_data[j] == src_data[k];
+                                    break;
+                                case eltwise_test_params::Not_equal:
+                                    dst_data[j] = dst_data[j] != src_data[k];
+                                    break;
+                                case eltwise_test_params::Pow:
+                                    dst_data[j] = std::pow(dst_data[j], src_data[k]);
+                                    break;
+                                case eltwise_test_params::Floor_mod:
+                                    dst_data[j] = dst_data[j] - dst_data[j] / src_data[k] * src_data[k];
+                                    break;
+                            }
+                        }
+                    }
                 }
-                    break;
             }
         }
     }
 }
 
-class MKLDNNGraphEltwiseTests: public TestsCommon,
+std::string select_op(eltwise_test_params::opType op) {
+    std::string str_op;
+    switch(op){
+        case eltwise_test_params::opType::Sum:
+            str_op = "sum";
+            break;
+        case eltwise_test_params::opType::Prod:
+            str_op = "prod";
+            break;
+        case eltwise_test_params::opType::Max:
+            str_op = "max";
+            break;
+        case eltwise_test_params::opType::Min:
+            str_op = "min";
+            break;
+        case eltwise_test_params::opType::Sub:
+            str_op = "sub";
+            break;
+        case eltwise_test_params::opType::Div:
+            str_op = "div";
+            break;
+        case eltwise_test_params::opType::Squared_diff:
+            str_op = "squared_diff";
+            break;
+        case eltwise_test_params::opType::Logical_AND:
+            str_op = "logical_and";
+            break;
+        case eltwise_test_params::opType::Logical_OR:
+            str_op = "logical_or";
+            break;
+        case eltwise_test_params::opType::Logical_XOR:
+            str_op = "logical_xor";
+            break;
+        case eltwise_test_params::opType ::Less:
+            str_op = "less";
+            break;
+        case eltwise_test_params::opType::Less_equal:
+            str_op = "less_equal";
+            break;
+        case eltwise_test_params::opType::Greater:
+            str_op = "greater";
+            break;
+        case eltwise_test_params::opType::Greater_equal:
+            str_op = "greater_equal";
+            break;
+        case eltwise_test_params::opType::Equal:
+            str_op = "equal";
+            break;
+        case eltwise_test_params::opType::Not_equal:
+            str_op = "not_equal";
+            break;
+        case eltwise_test_params::opType::Pow:
+            str_op = "pow";
+            break;
+        case eltwise_test_params::opType::Floor_mod:
+            str_op = "floor_mod";
+            break;
+    }
+    return str_op;
+}
+
+class MKLDNNGraphEltwise3InputsTests: public TestsCommon,
                                      public WithParamInterface<eltwise_test_params> {
     std::string model_t = R"V0G0N(
 <net name="EltwiseOnly" version="3" precision="FP32" batch="1">
     <layers>
         <layer name="in1" type="Input" precision="FP32" id="1">
             <output>
-                <port id="1">__SRC_DIMS__
+                <port id="1">__SRC_DIMS_1__
                 </port>
             </output>
         </layer>
         <layer name="in2" type="Input" precision="FP32" id="2">
             <output>
-                <port id="2">__SRC_DIMS__
+                <port id="2">__SRC_DIMS_2__
                 </port>
             </output>
         </layer>
         <layer name="in3" type="Input" precision="FP32" id="3">
             <output>
-                <port id="3">__SRC_DIMS__
+                <port id="3">__SRC_DIMS_3__
                 </port>
             </output>
         </layer>
         <layer name="con" id="4" type="Eltwise" precision="FP32">
             <data operation="_OP_" _COEFF_/>
             <input>
-                <port id="1">__SRC_DIMS__
+                <port id="1">__SRC_DIMS_1__
                 </port>
-                <port id="2">__SRC_DIMS__
+                <port id="2">__SRC_DIMS_2__
                 </port>
-                <port id="3">__SRC_DIMS__
+                <port id="3">__SRC_DIMS_3__
                 </port>
             </input>
             <output>
@@ -142,22 +307,40 @@ class MKLDNNGraphEltwiseTests: public TestsCommon,
 protected:
     std::string getModel(eltwise_test_params p) {
         std::string model = model_t;
-        std::string op;
-
-        if (p.op == 0) {
-            op = "sum";
-        } else if (p.op == 1) {
-            op = "mul";
-        } else if (p.op == 2) {
-            op = "max";
+        std::string op = select_op(p.op);
+
+        std::string src_dims1;
+        for (auto &dim : p.dims1) {
+            src_dims1 += "\n                    <dim>";
+            src_dims1 += std::to_string(dim) + "</dim>";
+        }
+        REPLACE_WITH_STR(model, "__SRC_DIMS_1__", src_dims1);
+
+        std::string src_dims2;
+        for (auto &dim : p.dims2) {
+            src_dims2 += "\n                    <dim>";
+            src_dims2 += std::to_string(dim) + "</dim>";
         }
+        REPLACE_WITH_STR(model, "__SRC_DIMS_2__", src_dims2);
+
+        std::string src_dims3;
+        for (auto &dim : p.dims3) {
+            src_dims3 += "\n                    <dim>";
+            src_dims3 += std::to_string(dim) + "</dim>";
+        }
+        REPLACE_WITH_STR(model, "__SRC_DIMS_3__", src_dims3);
 
         std::string src_dims;
-        for (auto& dim : p.dims) {
-                src_dims += "\n                    <dim>";
-                src_dims += std::to_string(dim) + "</dim>";
+        std::vector<size_t> dims = p.dims1;
+        for (int i = 0; i < dims.size(); i++) {
+            dims[i] = std::max(p.dims1[i], p.dims2[i]);
+            dims[i] = std::max(dims[i], p.dims3[i]);
+        }
+        for (auto &dim : dims) {
+            src_dims += "\n                    <dim>";
+            src_dims += std::to_string(dim) + "</dim>";
         }
-	REPLACE_WITH_STR(model, "__SRC_DIMS__", src_dims);
+        REPLACE_WITH_STR(model, "__SRC_DIMS__", src_dims);
 
         std::string scale;
         if (!p.scales.empty()) {
@@ -165,6 +348,7 @@ protected:
         }
         REPLACE_WITH_STR(model, "_OP_", op);
         REPLACE_WITH_STR(model, "_COEFF_", scale);
+
         return model;
     }
 
@@ -194,43 +378,61 @@ protected:
                     ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
                 }
             }
-
-            InferenceEngine::SizeVector dims_src = p.dims;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.dims.size()) {
+            InferenceEngine::SizeVector dims_src1 = p.dims1;
+            InferenceEngine::Layout layout1 = InferenceEngine::ANY;
+            switch (p.dims1.size()) {
+                case 4:
+                    layout1 = InferenceEngine::NCHW;
+                    break;
+                case 5:
+                    layout1 = InferenceEngine::NCDHW;
+                    break;
+            }
+            InferenceEngine::SizeVector dims_src2 = p.dims2;
+            InferenceEngine::Layout layout2 = InferenceEngine::ANY;
+            switch (p.dims2.size()) {
                 case 4:
-                    layout = InferenceEngine::NCHW;
+                    layout2 = InferenceEngine::NCHW;
                     break;
                 case 5:
-                    layout = InferenceEngine::NCDHW;
+                    layout2 = InferenceEngine::NCDHW;
+                    break;
+            }
+            InferenceEngine::SizeVector dims_src3 = p.dims3;
+            InferenceEngine::Layout layout3 = InferenceEngine::ANY;
+            switch (p.dims3.size()) {
+                case 4:
+                    layout3 = InferenceEngine::NCHW;
+                    break;
+                case 5:
+                    layout3 = InferenceEngine::NCDHW;
                     break;
             }
 
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout, dims_src);
+            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout1, dims_src1);
             src1->allocate();
 
             InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
 
             if (srcPtr1 == nullptr)
                 FAIL() << "Cannot cast blob to TBlob<float>.";
-
-            fill_data(src1->buffer(), src1->size());
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout, dims_src);
+            fill_data_sine(src1->buffer(), src1->size(), 0.1, 0.9, 1);
+            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout2, dims_src2);
             src2->allocate();
 
             InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
 
             if (srcPtr2 == nullptr)
                 FAIL() << "Cannot cast blob to TBlob<float>.";
-            fill_data(src2->buffer(), src2->size());
-            InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout, dims_src);
+            fill_data_sine(src2->buffer(), src2->size(), 0.1, 0.9, 2);
+            InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout3, dims_src3);
             src3->allocate();
 
             InferenceEngine::TBlob<float>* srcPtr3 = dynamic_cast<InferenceEngine::TBlob<float>*>(src3.get());
 
             if (srcPtr3 == nullptr)
                 FAIL() << "Cannot cast blob to TBlob<float>.";
-            fill_data(src3->buffer(), src3->size());
+            fill_data_sine(src3->buffer(), src3->size(), 0.1, 0.9, 3);
             InferenceEngine::BlobMap srcs;
             srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
             srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
@@ -263,33 +465,35 @@ protected:
     }
 };
 
-TEST_P(MKLDNNGraphEltwiseTests, TestsEltwise) {}
+TEST_P(MKLDNNGraphEltwise3InputsTests, TestsEltwise) {}
 
 
 INSTANTIATE_TEST_CASE_P(
-        TestsEltwise, MKLDNNGraphEltwiseTests,
+        TestsEltwise, MKLDNNGraphEltwise3InputsTests,
         ::testing::Values(
-                eltwise_test_params{{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
                         [](MKLDNNPlugin::PrimitiveDescInfo impl) {
                             ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
                             ASSERT_EQ(3, impl.getConfig().inConfs.size());
                             ASSERT_EQ(1, impl.getConfig().outConfs.size());
                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
+                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
                         }
                 } },
-                eltwise_test_params{{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.0,1.0,1.0", 3, MKLDNNPlugin::impl_desc_type::ref, {
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.0,1.0,1.0", 3, MKLDNNPlugin::impl_desc_type::ref, {
                         [](MKLDNNPlugin::PrimitiveDescInfo impl) {
                             ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
                             ASSERT_EQ(3, impl.getConfig().inConfs.size());
                             ASSERT_EQ(1, impl.getConfig().outConfs.size());
                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(0).desc.getLayout());
                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(1).desc.getLayout());
+                            ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().inConfs.at(2).desc.getLayout());
                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
                         }
                 } },
-                eltwise_test_params{{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.5,0.5,-2.0", 3, MKLDNNPlugin::impl_desc_type::ref, {
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.5,0.5,-2.0", 3, MKLDNNPlugin::impl_desc_type::ref, {
                         [](MKLDNNPlugin::PrimitiveDescInfo impl) {
                             ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
                             ASSERT_EQ(3, impl.getConfig().inConfs.size());
@@ -300,7 +504,7 @@ INSTANTIATE_TEST_CASE_P(
                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
                         }
                 } },
-                eltwise_test_params{{1, 3, 3, 3}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
                         [](MKLDNNPlugin::PrimitiveDescInfo impl) {
                             ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
                             ASSERT_EQ(3, impl.getConfig().inConfs.size());
@@ -311,7 +515,7 @@ INSTANTIATE_TEST_CASE_P(
                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
                         }
                 } },
-                eltwise_test_params{{1, 3, 3, 3}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
                         [](MKLDNNPlugin::PrimitiveDescInfo impl) {
                             ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
                             ASSERT_EQ(3, impl.getConfig().inConfs.size());
@@ -322,7 +526,7 @@ INSTANTIATE_TEST_CASE_P(
                             ASSERT_EQ(InferenceEngine::Layout::NCHW, impl.getConfig().outConfs.at(0).desc.getLayout());
                         }
                 } },
-                eltwise_test_params{{1, 32, 16, 16, 16}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
+                eltwise_test_params{{1, 32, 16, 16, 16},{1, 32, 16, 16, 16},{1, 32, 16, 16, 16}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref, {
                         [](MKLDNNPlugin::PrimitiveDescInfo impl) {
                             ASSERT_EQ(MKLDNNPlugin::impl_desc_type::ref, impl.getImplementationType());
                             ASSERT_EQ(3, impl.getConfig().inConfs.size());
@@ -332,17 +536,258 @@ INSTANTIATE_TEST_CASE_P(
                             ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().inConfs.at(2).desc.getLayout());
                             ASSERT_EQ(InferenceEngine::Layout::NCDHW, impl.getConfig().outConfs.at(0).desc.getLayout());
                         }
-                } }
+                } },
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Min, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sub, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Div, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_AND, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_OR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_XOR, "", 3, MKLDNNPlugin::impl_desc_type::ref}
         ));
+        
+class MKLDNNGraphEltwise2InputsTests: public TestsCommon,
+                                     public WithParamInterface<eltwise_test_params> {
+    std::string model_t = R"V0G0N(
+<net name="EltwiseOnly" version="2" precision="FP32" batch="1">
+    <layers>
+        <layer name="in1" type="Input" precision="FP32" id="1">
+            <output>
+                <port id="1">__SRC_DIMS_1__
+                </port>
+            </output>
+        </layer>
+        <layer name="in2" type="Input" precision="FP32" id="2">
+            <output>
+                <port id="2">__SRC_DIMS_2__
+                </port>
+            </output>
+        </layer>
+        <layer name="con" id="3" type="Eltwise" precision="FP32">
+            <data operation="_OP_" _COEFF_/>
+            <input>
+                <port id="1">__SRC_DIMS_1__
+                </port>
+                <port id="2">__SRC_DIMS_2__
+                </port>
+            </input>
+            <output>
+                <port id="3">__SRC_DIMS__
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="1" from-port="1" to-layer="3" to-port="1"/>
+        <edge from-layer="2" from-port="2" to-layer="3" to-port="2"/>
+    </edges>
+</net>
+)V0G0N";
 
-class MKLDNNGraphDynBatchEltwiseTests: public MKLDNNGraphEltwiseTests {
 protected:
+    std::string getModel(eltwise_test_params p) {
+        std::string model = model_t;
+        std::string op = select_op(p.op);
+
+        std::string src_dims1;
+        for (auto &dim : p.dims1) {
+            src_dims1 += "\n                    <dim>";
+            src_dims1 += std::to_string(dim) + "</dim>";
+        }
+        REPLACE_WITH_STR(model, "__SRC_DIMS_1__", src_dims1);
+
+        std::string src_dims2;
+        for (auto &dim : p.dims2) {
+            src_dims2 += "\n                    <dim>";
+            src_dims2 += std::to_string(dim) + "</dim>";
+        }
+        REPLACE_WITH_STR(model, "__SRC_DIMS_2__", src_dims2);
+
+        std::string src_dims;
+        std::vector<size_t> dims = (p.dims1.size() >= p.dims2.size()) ? p.dims1 : p.dims2;
+        int i = dims.size() - 1, j = p.dims1.size() - 1, k = p.dims2.size() - 1;
+        for (; j >= 0 && k >= 0; i--, j--, k-- ) {
+            dims[i] = std::max(p.dims1[j], p.dims2[k]);
+        }
+
+        for (auto &dim : dims) {
+            src_dims += "\n                    <dim>";
+            src_dims += std::to_string(dim) + "</dim>";
+        }
+        REPLACE_WITH_STR(model, "__SRC_DIMS__", src_dims);
+
+        std::string scale;
+        if (!p.scales.empty()) {
+            scale = std::string("coeff=\"") + p.scales + std::string("\"");
+        }
+        REPLACE_WITH_STR(model, "_OP_", op);
+        REPLACE_WITH_STR(model, "_COEFF_", scale);
+
+        return model;
+    }
+
+    virtual void TearDown() {
+    }
+
     virtual void SetUp() {
         try {
             TestsCommon::SetUp();
             eltwise_test_params p = ::testing::WithParamInterface<eltwise_test_params>::GetParam();
             std::string model = getModel(p);
-            size_t MB = p.dims[0];
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork());
+
+            auto& nodes = graph.getNodes();
+            for (int i = 0; i < nodes.size(); i++) {
+                if (nodes[i]->getType() == MKLDNNPlugin::Eltwise) {
+                    ASSERT_EQ(p.num_prim_desc, nodes[i]->getSupportedPrimitiveDescriptors().size());
+                    for (size_t j = 0; j < p.num_prim_desc && j < p.comp.size(); j++) {
+                        p.comp.at(j)(nodes[i]->getSupportedPrimitiveDescriptors().at(j));
+                    }
+                    ASSERT_NE(nullptr, nodes[i]->getSelectedPrimitiveDescriptor());
+                    ASSERT_EQ(p.selectedType, nodes[i]->getSelectedPrimitiveDescriptor()->getImplementationType());
+                }
+            }
+            InferenceEngine::SizeVector dims_src1 = p.dims1;
+            InferenceEngine::Layout layout1 = InferenceEngine::ANY;
+            switch (p.dims1.size()) {
+                case 4:
+                    layout1 = InferenceEngine::NCHW;
+                    break;
+                case 5:
+                    layout1 = InferenceEngine::NCDHW;
+                    break;
+            }
+            InferenceEngine::SizeVector dims_src2 = p.dims2;
+            InferenceEngine::Layout layout2 = InferenceEngine::ANY;
+            switch (p.dims2.size()) {
+                case 4:
+                    layout2 = InferenceEngine::NCHW;
+                    break;
+                case 5:
+                    layout2 = InferenceEngine::NCDHW;
+                    break;
+            }
+
+            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout1, dims_src1);
+            src1->allocate();
+
+            InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
+
+            if (srcPtr1 == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            fill_data_sine(src1->buffer(), src1->size(), 0.1, 0.9, 1);
+            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout2, dims_src2);
+            src2->allocate();
+
+            InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
+
+            if (srcPtr2 == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+            
+            fill_data_sine(src2->buffer(), src2->size(), 0.1, 0.9, 2);
+            
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src1));
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in2", src2));
+
+            InferenceEngine::OutputsDataMap out;
+            out = net_reader.getNetwork().getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            graph.Infer(srcs, outputBlobs);
+
+            InferenceEngine::TBlob<float> dst_ref(item.second->getTensorDesc());
+            dst_ref.allocate();
+
+            std::vector<InferenceEngine::TBlob<float>> src_vec = {*srcPtr1, *srcPtr2};
+
+            ref_eltwise(src_vec, dst_ref, p);
+
+            compare(*output, dst_ref, 0.0005f);
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+
+};
+
+TEST_P(MKLDNNGraphEltwise2InputsTests, TestsEltwise) {}
+
+INSTANTIATE_TEST_CASE_P(
+        TestsEltwise, MKLDNNGraphEltwise2InputsTests,
+        ::testing::Values(
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Min, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sub, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Div, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Squared_diff, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Logical_AND, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Logical_OR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Logical_XOR, "", 3, MKLDNNPlugin::impl_desc_type::ref}
+        ));
+
+INSTANTIATE_TEST_CASE_P(
+        TestsBroadcasting, MKLDNNGraphEltwise2InputsTests,
+        ::testing::Values(
+                eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Min, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Sub, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Div, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Squared_diff, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Logical_AND, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Logical_OR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 1, 3},{1, 1, 3, 3},{}, eltwise_test_params::opType::Logical_XOR, "", 3, MKLDNNPlugin::impl_desc_type::ref}
+        ));
+
+INSTANTIATE_TEST_CASE_P(
+        TestsDiffDims, MKLDNNGraphEltwise2InputsTests,
+        ::testing::Values(
+                eltwise_test_params{{1},{1, 3},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3},{1},{}, eltwise_test_params::opType::Sum, "", 1, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3},{1},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 2, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3, 3},{1},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3, 3},{1, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3, 3},{1, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3, 3},{1, 3, 3, 3},{}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref}
+        ));
+
+class MKLDNNGraphEltwiseDynBatchTests: public MKLDNNGraphEltwise3InputsTests {
+protected:
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            eltwise_test_params p = ::testing::WithParamInterface<eltwise_test_params>::GetParam();
+            std::string model = getModel(p);
+            size_t MB = p.dims1[0];
             if (MB < 2)
                 MB = 2;
 
@@ -359,18 +804,38 @@ protected:
             graph.setProperty({{InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_ENABLED, InferenceEngine::PluginConfigParams::YES}});
             graph.CreateGraph(net_reader.getNetwork());
 
-            InferenceEngine::SizeVector dims_src = p.dims;
-            InferenceEngine::Layout layout = InferenceEngine::ANY;
-            switch (p.dims.size()) {
+            InferenceEngine::SizeVector dims_src1 = p.dims1;
+            InferenceEngine::Layout layout1 = InferenceEngine::ANY;
+            switch (p.dims1.size()) {
                 case 4:
-                    layout = InferenceEngine::NCHW;
+                    layout1 = InferenceEngine::NCHW;
                     break;
                 case 5:
-                    layout = InferenceEngine::NCDHW;
+                    layout1 = InferenceEngine::NCDHW;
+                    break;
+            }
+            InferenceEngine::SizeVector dims_src2 = p.dims2;
+            InferenceEngine::Layout layout2 = InferenceEngine::ANY;
+            switch (p.dims2.size()) {
+                case 4:
+                    layout2 = InferenceEngine::NCHW;
+                    break;
+                case 5:
+                    layout2 = InferenceEngine::NCDHW;
+                    break;
+            }
+            InferenceEngine::SizeVector dims_src3 = p.dims3;
+            InferenceEngine::Layout layout3 = InferenceEngine::ANY;
+            switch (p.dims3.size()) {
+                case 4:
+                    layout3 = InferenceEngine::NCHW;
+                    break;
+                case 5:
+                    layout3 = InferenceEngine::NCDHW;
                     break;
             }
 
-            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout, dims_src);
+            InferenceEngine::Blob::Ptr src1 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout1, dims_src1);
             src1->allocate();
 
             InferenceEngine::TBlob<float>* srcPtr1 = dynamic_cast<InferenceEngine::TBlob<float>*>(src1.get());
@@ -379,7 +844,7 @@ protected:
                 FAIL() << "Cannot cast blob to TBlob<float>.";
 
             fill_data(src1->buffer(), src1->size());
-            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout, dims_src);
+            InferenceEngine::Blob::Ptr src2 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout2, dims_src2);
             src2->allocate();
 
             InferenceEngine::TBlob<float>* srcPtr2 = dynamic_cast<InferenceEngine::TBlob<float>*>(src2.get());
@@ -387,7 +852,7 @@ protected:
             if (srcPtr2 == nullptr)
                 FAIL() << "Cannot cast blob to TBlob<float>.";
             fill_data(src2->buffer(), src2->size());
-            InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout, dims_src);
+            InferenceEngine::Blob::Ptr src3 = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(InferenceEngine::Precision::FP32, layout3, dims_src3);
             src3->allocate();
 
             InferenceEngine::TBlob<float>* srcPtr3 = dynamic_cast<InferenceEngine::TBlob<float>*>(src3.get());
@@ -424,17 +889,24 @@ protected:
     }
 };
 
-TEST_P(MKLDNNGraphDynBatchEltwiseTests, TestsDynBatchEltwise) {}
+TEST_P(MKLDNNGraphEltwiseDynBatchTests, TestsDynBatchEltwise) {}
 
 INSTANTIATE_TEST_CASE_P(
-        TestsDynBatchEltwise, MKLDNNGraphDynBatchEltwiseTests,
+        TestsDynBatchEltwise, MKLDNNGraphEltwiseDynBatchTests,
         ::testing::Values(
-                eltwise_test_params{{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.0,1.0,1.0", 3, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.5,0.5,-2.0", 3, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1, 3, 3, 3}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref},
-                eltwise_test_params{{1, 3, 3, 3}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref}));
-
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.0,1.0,1.0", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sum, "1.5,0.5,-2.0", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Prod, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Max, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Sub, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Min, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Div, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Pow, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_AND, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_OR, "", 3, MKLDNNPlugin::impl_desc_type::ref},
+                eltwise_test_params{{1, 3, 3, 3},{1, 3, 3, 3},{1, 3, 3, 3}, eltwise_test_params::opType::Logical_XOR, "", 3, MKLDNNPlugin::impl_desc_type::ref}
+                ));
 
 struct precisions_test_2params {
     struct {
@@ -551,4 +1023,3 @@ INSTANTIATE_TEST_CASE_P(
             precisions_test_2params{ {"FP32",   "U8"}, 5, 1 },
             precisions_test_2params{ {  "U8",   "U8"}, 6, 2 }
         ));
-
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp
index dcf001f33..4b74d640f 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_fullyconnected_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp
index 8a2acf042..59218728d 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_gemm_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp
index 1c1d76dab..e5c147915 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_input_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -359,3 +359,114 @@ protected:
 };
 
 TEST_F(MKLDNNGraphConstInputTests, TestsConstInput) {}
+
+
+struct input_layout_test_params {
+    InferenceEngine::Layout layout;
+    std::vector<float> reference;
+    MKLDNNPlugin::impl_desc_type selectedType;
+    std::vector<std::function<void(MKLDNNPlugin::PrimitiveDescInfo)>> comp;
+};
+
+class MKLDNNGraphInputLayoutTest : public TestsCommon, public WithParamInterface<input_layout_test_params> {
+    std::string model_t = R"V0G0N(
+<net name="InputLayers" version="2" batch="1">
+    <layers>
+        <layer name="input" type="Input" precision="FP32" id="0">
+            <output>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>3</dim>
+                    <dim>2</dim>
+                    <dim>2</dim>
+                </port>
+            </output>
+        </layer>
+        <layer name="power1" id="1" type="Power" precision="FP32">
+            <power_data power="1" scale="1" shift="1"/>
+            <input>
+                <port id="1">
+                    <dim>1</dim>
+                    <dim>3</dim>
+                    <dim>2</dim>
+                    <dim>2</dim>
+                </port>
+            </input>
+            <output>
+                <port id="2">
+                    <dim>1</dim>
+                    <dim>3</dim>
+                    <dim>2</dim>
+                    <dim>2</dim>
+                </port>
+            </output>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/>
+    </edges>
+    <pre-process reference-layer-name="input" mean-precision="FP32">
+        <channel id="0">
+            <mean value="1.0"/>
+        </channel>
+        <channel id="1">
+            <mean value="2.0"/>
+        </channel>
+        <channel id="2">
+            <mean value="3.0"/>
+        </channel>
+    </pre-process>
+</net>
+)V0G0N";
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            input_layout_test_params p = ::testing::WithParamInterface<input_layout_test_params>::GetParam();
+            std::string model = model_t;
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(net_reader.getNetwork());
+
+            InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, { 1, 3, 2, 2 }, p.layout);
+            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float>(desc);
+            src->allocate();
+            fill_data_dbgval(src->buffer(), src->size());
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("input", src));
+
+            InferenceEngine::OutputsDataMap out = net_reader.getNetwork().getOutputsInfo();
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            InferenceEngine::BlobMap outputBlobs;
+            outputBlobs[item.first] = output;
+
+            graph.Infer(srcs, outputBlobs);
+            //  Check results
+            if (memcmp((*output).data(), &p.reference[0], p.reference.size()) != 0)
+                FAIL() << "Wrong result with compare reference!";
+        }
+        catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNGraphInputLayoutTest, TestsLayoutInput) {}
+
+INSTANTIATE_TEST_CASE_P(
+    TestsLayoutInput, MKLDNNGraphInputLayoutTest,
+    ::testing::Values(
+        input_layout_test_params{ InferenceEngine::NCHW, { 0,1,2,3,3,4,5,6,6,7,8,9 }, MKLDNNPlugin::impl_desc_type::unknown },
+        input_layout_test_params{ InferenceEngine::NHWC, { 0,0,0,3,3,3,6,6,6,9,9,9 }, MKLDNNPlugin::impl_desc_type::unknown }
+));
+
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
index 793e3d4c9..885cea503 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp
index 873bae5f3..a36717c5a 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_lrn_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp
index a40add1a7..492f8e586 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_permute_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp
index a1ee6bd25..8286c0102 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_pooling_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp
index 83cde28ed..1ea16efa3 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_power_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp
index ce860c2e7..a55e731ec 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_relu_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
index c7c13ade1..ab915d3d0 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_reorder_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp
index d85aaa58b..3304a333f 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_reshape_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp
index 1706f5788..0c6125567 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_roi_pooling_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp
index 7109bdc9d..8e53244f5 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_simplernms_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp
index 1675b096d..e740124ac 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_softmax_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
index e253a820e..a3fe7d81f 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_split_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp
index 4bb207dde..5d7ba11da 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/layers/internal/graph_tile_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp
index 2974b3784..330db7b0a 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_conv_depthwise_fusing_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tets.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tets.cpp
new file mode 100644
index 000000000..b3486100f
--- /dev/null
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_deconv_concat_tets.cpp
@@ -0,0 +1,400 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+#include "mkldnn_plugin/mkldnn_graph.h"
+
+#include "test_graph.hpp"
+
+#include "single_layer_common.hpp"
+#include <mkldnn_plugin/mkldnn_extension_utils.h>
+#include "tests_common.hpp"
+#include "ir_gen_helper.hpp"
+
+using namespace ::testing;
+using namespace std;
+using namespace mkldnn;
+using namespace single_layer_tests;
+
+struct concat_params {
+    size_t axis;
+};
+
+struct deconv_concat_params {
+    // Formats: NCHW, NCDHW
+    std::vector<size_t> in;
+
+    conv_common_params deconv;
+    concat_params concat;
+
+    std::vector<MKLDNNPlugin::impl_desc_type> preferTypes;
+};
+
+void ref_deconv_common(const InferenceEngine::Blob &src,
+                       InferenceEngine::Blob &dst,
+                       const float *weights_data,
+                       size_t weights_size,
+                       const float *bias_data,
+                       size_t bias_size,
+                       const conv_common_params &prm) {
+    auto dims_size = src.dims().size();
+
+    size_t G  = prm.group;
+    size_t KW = prm.kernel[InferenceEngine::X_AXIS];
+    size_t KH = prm.kernel[InferenceEngine::Y_AXIS];
+    size_t KD = prm.kernel.size() > InferenceEngine::Z_AXIS ? prm.kernel[InferenceEngine::Z_AXIS] : 1u;
+
+    size_t PW = prm.pads_begin[InferenceEngine::X_AXIS];
+    size_t PH = prm.pads_begin[InferenceEngine::Y_AXIS];
+    size_t PD = prm.pads_begin.size() > InferenceEngine::Z_AXIS ? prm.pads_begin[InferenceEngine::Z_AXIS] : 0u;
+
+    size_t SW = prm.stride[InferenceEngine::X_AXIS];
+    size_t SH = prm.stride[InferenceEngine::Y_AXIS];
+    size_t SD = prm.stride.size() > InferenceEngine::Z_AXIS ? prm.stride[InferenceEngine::Z_AXIS] : 1u;
+
+    size_t IW = src.dims()[dims_size - 1];
+    size_t IH = src.dims()[dims_size - 2];
+    size_t ID = dims_size == 5 ? src.dims()[dims_size - 3] : 1u;
+    size_t IC = src.dims()[1];
+    size_t MB = src.dims()[0];
+
+    size_t OC = prm.out_c;
+
+    size_t OW = SW * (IW - 1lu) + KW - 2lu * PW;
+    size_t OH = SH * (IH - 1lu) + KH - 2lu * PH;
+    size_t OD = dims_size == 5 ? (SD * (ID - 1) + KD - 2 * PD) : 1u;
+
+    const float *src_data = src.cbuffer().as<float *>();
+    float *dst_data = dst.buffer().as<float *>();
+
+    size_t CS1 = OH * OW;
+    size_t CS2 = CS1 * OD;
+    size_t CS3 = CS2 * OC;
+
+    size_t CI1 = IH * IW;
+    size_t CI2 = CI1 * ID;
+    size_t CI3 = CI2 * IC;
+    
+    size_t OC_G = OC / G;
+    size_t IC_G = IC / G;
+
+    size_t CK1 = KH * KW;
+    size_t CK2 = CK1 * KD;
+    size_t CK3 = CK2 * OC_G;
+    size_t CK4 = CK3 * IC_G;
+
+    for (size_t g = 0lu; g < G; ++g) {
+        size_t g_OC_G = g * OC_G;
+        size_t g_IC_G = g * IC_G;
+        size_t g_CK4 = g * CK4;
+        for (size_t mb = 0lu; mb < MB; ++mb) {
+            size_t mb_CS3 = mb * CS3;
+            size_t mb_CI3 = mb * CI3;
+            for (size_t oc = 0lu; oc < OC_G; ++oc) {
+                size_t g_OC_G_oc = g_OC_G + oc;
+                size_t mb_CS3_g_OC_G_oc_CS2 = mb_CS3 + g_OC_G_oc * CS2;
+                size_t g_CK4_oc_CK2 = g_CK4 + oc * CK2;
+                for (size_t od = 0lu; od < OD; ++od) {
+                    size_t mb_CS3_g_OC_G_oc_CS2_od_CS1 = mb_CS3_g_OC_G_oc_CS2 + od * CS1;
+                    size_t od_PD = od + PD;
+                    for (size_t oh = 0lu; oh < OH; ++oh) {
+                        size_t mb_CS3_g_OC_G_oc_CS2_od_CS1_oh_OW = mb_CS3_g_OC_G_oc_CS2_od_CS1 + oh * OW;
+                        size_t oh_PH = oh + PH;
+                        for (size_t ow = 0lu; ow < OW; ++ow) {
+                            size_t didx = mb_CS3_g_OC_G_oc_CS2_od_CS1_oh_OW + ow;
+                            size_t ow_PW = ow + PW;
+
+                            dst_data[didx] = float(0);
+                            if (prm.with_bias) dst_data[didx] += bias_data[g_OC_G_oc];
+
+                            for (size_t ic = 0lu; ic < IC_G; ic++) {
+                                size_t mb_CI3_g_IC_G_ic_CI2 = mb_CI3 + (g_IC_G + ic) * CI2;
+                                size_t g_CK4_oc_CK2_ic_CK3 = g_CK4_oc_CK2 + ic * CK3;
+                                for (int kd = 0lu; kd < KD; kd++) {
+                                    if (od_PD < kd) continue;
+                                    size_t id = od_PD - kd;
+                                    if (id % SD != 0) continue;
+                                    id /= SD;
+                                    if (id >= ID) continue;
+                                    size_t mb_CI3_g_IC_G_ic_CI2_id_CI1 = mb_CI3_g_IC_G_ic_CI2 + id * CI1;
+                                    size_t g_CK4_oc_CK2_ic_CK3_kd_CK1 = g_CK4_oc_CK2_ic_CK3 + kd * CK1;
+                                    for (size_t kh = 0lu; kh < KH; kh++) {
+                                        if (oh_PH < kh) continue;
+                                        size_t ih = oh_PH - kh;
+                                        if (ih % SH != 0) continue;
+                                        ih /= SH;
+                                        if (ih >= IH) continue;
+                                        size_t mb_CI3_g_IC_G_ic_CI2_id_CI1_ih_IW = mb_CI3_g_IC_G_ic_CI2_id_CI1 + ih * IW;
+                                        size_t g_CK4_oc_CK2_ic_CK3_kd_CK1_kh_KW = g_CK4_oc_CK2_ic_CK3_kd_CK1 + kh * KW;
+                                        for (size_t kw = 0lu; kw < KW; kw++) {
+                                            if (ow_PW < kw) continue;
+                                            size_t iw = ow_PW - kw;
+                                            if (iw % SW != 0) continue;
+                                            iw /= SW;
+                                            if (iw >= IW) continue;
+
+                                            size_t sidx = mb_CI3_g_IC_G_ic_CI2_id_CI1_ih_IW + iw;
+
+                                            size_t widx = g_CK4_oc_CK2_ic_CK3_kd_CK1_kh_KW + kw;
+
+                                            dst_data[didx] += src_data[sidx] * weights_data[widx];
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+class MKLDNNDeconvConcatTests: public TestsCommon,
+                                    public WithParamInterface<deconv_concat_params> {
+    std::string layers_t = R"V0G0N(
+        <layer id="2" name="Deconvolution_1" precision="FP32" type="Deconvolution">
+            <data kernel="_K_" strides="_KS_"
+             pads_begin="_PB_" pads_end="_PE_"
+             dilations="1,1,1" output="_OC_" group="_GC_" PrimitivesPriority="_IMPLS_"/>
+            <input>
+                <port id="0">
+                    __INP_DIMS__
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    <dim>_IN_</dim>
+                    <dim>_OC_</dim>
+                    __DECONV_OUT_DIMS__
+                </port>
+            </output>
+            <blobs>
+                <weights offset="0" size="262144"/>
+            </blobs>
+	</layer>
+        <layer id="3" name="concat0" precision="FP32" type="Concat">
+            <data axis="__AXIS__"/>
+            <input>
+                <port id="0">
+                    <dim>_IN_</dim>
+                    <dim>_OC_</dim>
+                    __DECONV_OUT_DIMS__
+                </port>
+                <port id="1">
+                    __INP_DIMS__
+                </port>
+            </input>
+            <output>
+                <port id="2">
+                    __CONCAT_OUT_DIMS__
+                </port>
+            </output>
+        </layer>
+)V0G0N";
+
+    std::string edges_t = R"V0G0N(
+        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+        <edge from-layer="0" from-port="0" to-layer="3" to-port="1"/>
+        <edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
+)V0G0N";
+
+    std::string getModel(deconv_concat_params p) {
+        std::string model = layers_t;
+        
+        std::string s_dims;
+        for (auto& dim : p.in) {
+            s_dims += "\n                    <dim>";
+            s_dims += std::to_string(dim) + "</dim>";
+        }
+	REPLACE_WITH_STR(model, "__INP_DIMS__", s_dims);
+
+        s_dims = "";
+        size_t deconv_axis_val = p.in[p.concat.axis];
+        int k_len = p.deconv.kernel.size();
+        for (size_t i = 2lu; i < p.in.size(); i++) {
+            size_t inx = k_len - i + 1;
+            size_t dim = p.deconv.stride[inx] * (p.in[i] - 1) + p.deconv.kernel[inx] - 2 * p.deconv.pads_begin[inx];
+            s_dims += "\n                    <dim>";
+            s_dims += std::to_string(dim) + "</dim>";
+            if (i == p.concat.axis) {
+                deconv_axis_val = dim;
+            }
+        }
+	REPLACE_WITH_STR(model, "__DECONV_OUT_DIMS__", s_dims);
+
+        s_dims = "";
+        for (size_t i = 0lu; i < p.in.size(); i++) {
+            size_t val = p.in[i];
+            if (i == p.concat.axis) {
+                val += deconv_axis_val;
+            }
+            s_dims += "\n                    <dim>";
+            s_dims += std::to_string(val) + "</dim>";
+        }
+	REPLACE_WITH_STR(model, "__CONCAT_OUT_DIMS__", s_dims);
+
+        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.deconv.kernel);
+        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.deconv.stride);
+        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.deconv.pads_begin);
+        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.deconv.pads_end);
+        REPLACE_WITH_NUM(model, "_GC_", p.deconv.group);
+        REPLACE_WITH_NUM(model, "_OC_", p.deconv.out_c);
+        REPLACE_WITH_NUM(model, "_IN_", p.in[0]);
+        REPLACE_WITH_NUM(model, "__AXIS__", p.concat.axis);
+
+        std::string impls;
+        for (const auto& preferType : p.preferTypes) {
+            if (!impls.empty())
+                impls += ",";
+            impls += "cpu:" + MKLDNNGraphTestClass::getStrPrimitiveDescriptorType(preferType);
+        }
+        REPLACE_WITH_STR(model, "_IMPLS_", impls);
+
+        model = IRTemplateGenerator::getIRTemplate("Deconvolution_Concat", p.in, "FP32", model, edges_t);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            deconv_concat_params p = ::testing::WithParamInterface<deconv_concat_params>::GetParam();
+            std::string model = getModel(p);
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            size_t blob_size = p.deconv.out_c * (p.in[1] / p.deconv.group);
+            for (int i = 0 ; i < p.deconv.kernel.size(); i++) {
+                blob_size *= p.deconv.kernel[i];
+            }
+            InferenceEngine::SizeVector dims_weights = { blob_size };
+
+            std::vector<InferenceEngine::Blob::Ptr> blob_to_model;
+            InferenceEngine::Blob::Ptr weights = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, InferenceEngine::C, dims_weights);
+            weights->allocate();
+            fill_data(weights->buffer().as<float*>(), weights->size());
+            blob_to_model.push_back(weights);
+
+            InferenceEngine::Blob::Ptr bias = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, InferenceEngine::C, {p.deconv.out_c});
+            bias->allocate();
+            fill_data(bias->buffer().as<float*>(), bias->size());
+            blob_to_model.push_back(bias);
+
+            size_t total_size_in_bytes = 0;
+            for (InferenceEngine::Blob::Ptr blb : blob_to_model) total_size_in_bytes += blb->byteSize();
+
+            InferenceEngine::TBlob<uint8_t>::Ptr model_blob =
+                    InferenceEngine::make_shared_blob<uint8_t>(InferenceEngine::Precision::U8, InferenceEngine::C, {total_size_in_bytes});
+            model_blob->allocate();
+            uint8_t* model_blob_ptr = model_blob->buffer().as<uint8_t*>();
+            for (InferenceEngine::Blob::Ptr blb : blob_to_model) {
+                memcpy(model_blob_ptr, blb->buffer().as<uint8_t*>(), blb->byteSize());
+                model_blob_ptr += blb->byteSize();
+            }
+            net_reader.SetWeights(model_blob);
+
+            auto network = net_reader.getNetwork();
+            MKLDNNGraphTestClass graph;
+            graph.CreateGraph(network);
+
+            InferenceEngine::SizeVector dims_src = p.in;
+
+            InferenceEngine::Blob::Ptr src = InferenceEngine::make_shared_blob<float, const InferenceEngine::SizeVector>(
+                    InferenceEngine::Precision::FP32, InferenceEngine::TensorDesc::getLayoutByDims(p.in), dims_src);
+            src->allocate();
+            fill_data(src->buffer(), src->size());
+
+            InferenceEngine::TBlob<float>* srcPtr = dynamic_cast<InferenceEngine::TBlob<float>*>(src.get());
+
+            if (srcPtr == nullptr)
+                FAIL() << "Cannot cast blob to TBlob<float>.";
+
+            InferenceEngine::BlobMap srcs;
+            srcs.insert(std::pair<std::string, InferenceEngine::Blob::Ptr>("in1", src));
+
+            InferenceEngine::OutputsDataMap out;
+            out = network.getOutputsInfo();
+            InferenceEngine::BlobMap outputBlobs;
+
+            std::pair<std::string, InferenceEngine::DataPtr> item = *out.begin();
+
+            InferenceEngine::TBlob<float>::Ptr output;
+            output = InferenceEngine::make_shared_blob<float>(item.second->getTensorDesc());
+            output->allocate();
+            outputBlobs[item.first] = output;
+
+            graph.Infer(srcs, outputBlobs);
+
+            // Compare with reference
+
+            auto deconv = network.getLayerByName("Deconvolution_1");
+            InferenceEngine::TBlob<float> deconv_ref(deconv->outData[0]->getTensorDesc());
+            deconv_ref.allocate();
+
+            ref_deconv_common(*srcPtr, deconv_ref, weights->buffer().as<float*>(), weights->size(),
+                    bias->buffer().as<float*>(), bias->size(), p.deconv);
+
+            float *src1_ptr = deconv_ref.buffer();
+            size_t src1_size = deconv_ref.size();
+            float *src2_ptr = src->buffer();
+            size_t src2_size = src->size();
+            float *dst_ptr = output->buffer();
+            size_t dst_size = output->size();
+
+            int len1 = 1, len2 = 1;
+            for (int dim = p.concat.axis; dim < output->dims().size(); dim++) {
+                len1 *= deconv->outData[0]->getTensorDesc().getDims()[dim];
+                len2 *= src->dims()[dim];
+            }
+
+            size_t index1 = 0, index2 = 0, index = 0;
+            float max_diff = 0.0001f;
+            for (size_t cycle = 0lu; cycle < p.concat.axis; cycle ++) {
+                for (int i1 = 0; i1 < len1; i1++) {
+                    if (fabs(src1_ptr[index1] - dst_ptr[index]) > max_diff)
+                    {
+                        FAIL() << "index: " << index << " src: " << src1_ptr[index1] << ", dst: " << dst_ptr[index];
+                    }
+                    index1++; index++;
+                }
+                for (int i2 = 0; i2 < len2; i2++) {
+                    if (fabs(src2_ptr[index2] - dst_ptr[index]) > max_diff)
+                    {
+                        FAIL() << "index: " << index << " src: " << src2_ptr[index2] << ", dst: " << dst_ptr[index];
+                    }
+                    index2++; index++;
+                }
+            }
+
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(MKLDNNDeconvConcatTests, TestsDwConvFusing) {}
+
+INSTANTIATE_TEST_CASE_P(
+        TestsDwConvFusing, MKLDNNDeconvConcatTests,
+        ::testing::Values(
+                deconv_concat_params{{1, 256, 4, 4}, 
+                                     { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 256, false },
+                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
+                deconv_concat_params{{2, 256, 4, 4},
+                                     { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 256, false },
+                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
+                deconv_concat_params{{1, 256, 4, 4, 4},
+                                     { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 256, false },
+                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}},
+                deconv_concat_params{{2, 256, 4, 4, 4},
+                                     { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 256, false },
+                                     {1}, {MKLDNNPlugin::impl_desc_type::gemm_blas}}
+        ));
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp
index bc653a11e..9078a772f 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_dw_conv_fusing_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp
index 72c0c8ed9..700bf7ad8 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_optimization_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp b/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
index 52bcb45af..363febb58 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/structure/graph_structure_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -11,6 +11,7 @@
 #include "tests_common.hpp"
 #include "../test_graph.hpp"
 #include <ext_list.hpp>
+#include <ie_builders.hpp>
 
 using namespace ::testing;
 using namespace std;
@@ -3001,24 +3002,16 @@ TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersRmnet_SSSSD) {
     MKLDNNGraphTestClass graph;
     graph.CreateGraph(net_reader.getNetwork());
 
-    // TODO: WA for ttps://jira01.devtools.intel.com/browse/CVS-10715
-    bool isAvx512 = false;
-
     size_t reorders_num = 0;
     auto& nodes = graph.getNodes();
     for (auto &node : nodes) {
         if (node->getType() == MKLDNNPlugin::Reorder) {
             reorders_num++;
-            if (!isAvx512 && node->getChildEdgeAt(0)->getMemory().GetFormat() == memory::nChw16c)
-                isAvx512 = true;
-            if (!isAvx512)
             ASSERT_EQ(MKLDNNPlugin::Output, node->getChildEdgeAt(0)->getChild()->getType());
         }
     }
-    if (!isAvx512)
-        ASSERT_EQ(reorders_num, 1);
-    else
-        ASSERT_EQ(reorders_num, 3);
+
+    ASSERT_EQ(reorders_num, 1);
 }
 
 TEST_F(MKLDNNGraphStructureTests, TestFailedPartDPN92) {
@@ -3806,7 +3799,6 @@ TEST_F(MKLDNNGraphStructureTests, TestNoRedundantReordersForXceptionTopology) {
 
     net_reader.SetWeights(weights_ptr);
 
-
     MKLDNNGraphTestClass graph;
     graph.CreateGraph(net_reader.getNetwork());
 
@@ -6391,18 +6383,18 @@ TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithMultipleData) {
 
     const auto& nodes = graph.getNodes();
     ASSERT_EQ(nodes.size(), 12);
-    ASSERT_EQ(nodes[0].get()->getType(), MKLDNNPlugin::Type::Input);
-    ASSERT_EQ(nodes[1].get()->getType(), MKLDNNPlugin::Type::Split);
-    ASSERT_EQ(nodes[2].get()->getType(), MKLDNNPlugin::Type::Reorder);
-    ASSERT_EQ(nodes[3].get()->getType(), MKLDNNPlugin::Type::Reshape);
-    ASSERT_EQ(nodes[4].get()->getType(), MKLDNNPlugin::Type::Output);
-    ASSERT_EQ(nodes[5].get()->getType(), MKLDNNPlugin::Type::Reorder);
-    ASSERT_EQ(nodes[6].get()->getType(), MKLDNNPlugin::Type::Reshape);
-    ASSERT_EQ(nodes[7].get()->getType(), MKLDNNPlugin::Type::Output);
-    ASSERT_EQ(nodes[8].get()->getType(), MKLDNNPlugin::Type::Reorder);
-    ASSERT_EQ(nodes[9].get()->getType(), MKLDNNPlugin::Type::Output);
-    ASSERT_EQ(nodes[10].get()->getType(), MKLDNNPlugin::Type::Reshape);
-    ASSERT_EQ(nodes[11].get()->getType(), MKLDNNPlugin::Type::Output);
+    ASSERT_EQ(nodes[0]->getType(), MKLDNNPlugin::Type::Input);
+    ASSERT_EQ(nodes[1]->getType(), MKLDNNPlugin::Type::Split);
+    ASSERT_EQ(nodes[2]->getType(), MKLDNNPlugin::Type::Reorder);
+    ASSERT_EQ(nodes[3]->getType(), MKLDNNPlugin::Type::Reshape);
+    ASSERT_EQ(nodes[4]->getType(), MKLDNNPlugin::Type::Output);
+    ASSERT_EQ(nodes[5]->getType(), MKLDNNPlugin::Type::Reorder);
+    ASSERT_EQ(nodes[6]->getType(), MKLDNNPlugin::Type::Reshape);
+    ASSERT_EQ(nodes[7]->getType(), MKLDNNPlugin::Type::Output);
+    ASSERT_EQ(nodes[8]->getType(), MKLDNNPlugin::Type::Reorder);
+    ASSERT_EQ(nodes[9]->getType(), MKLDNNPlugin::Type::Reshape);
+    ASSERT_EQ(nodes[10]->getType(), MKLDNNPlugin::Type::Output);
+    ASSERT_EQ(nodes[11]->getType(), MKLDNNPlugin::Type::Output);
 
     InferenceEngine::OutputsDataMap outputs = reader.getNetwork().getOutputsInfo();
     std::vector<std::pair<std::string, InferenceEngine::DataPtr>> outputItems = {
@@ -6451,3 +6443,297 @@ TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithMultipleData) {
         compare(*outputBlobs[i], *expectedOutputBlobs[i]);
     }
 }
+
+TEST_F(MKLDNNGraphStructureTests, TestCreateGraphWithMultipleData_2) {
+    std::string model = R"V0G0N(
+<net name="net" version="2" batch="1">
+    <layers>
+        <layer name="data" type="Input" precision="FP32" id="0">
+            <output>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>2</dim>
+                    <dim>8</dim>
+                    <dim>8</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="1" name="split" precision="FP32" type="Split">
+			<data axis="1"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>2</dim>
+					<dim>8</dim>
+					<dim>8</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>1</dim>
+					<dim>8</dim>
+					<dim>8</dim>
+				</port>
+				<port id="2">
+					<dim>1</dim>
+					<dim>1</dim>
+					<dim>8</dim>
+					<dim>8</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="2" name="power" precision="FP32" type="Power">
+			<data power="1" scale="-1.0" shift="0.0"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>1</dim>
+					<dim>8</dim>
+					<dim>8</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+                    <dim>1</dim>
+					<dim>1</dim>
+					<dim>8</dim>
+					<dim>8</dim>
+				</port>
+			</output>
+		</layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+        <edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>
+    </edges>
+</net>
+)V0G0N";
+    using namespace InferenceEngine;
+
+    const size_t H = 8;
+    const size_t W = 8;
+    const size_t imgSz = H * W;
+    const float channel1Value = 1.0;
+    const float channel2Value = 2.0;
+
+    const auto weights = std::make_shared<TBlob<uint8_t>>(Precision::U8, Layout::C, SizeVector{0});
+
+    InferenceEngine::CNNNetReader reader;
+    reader.ReadNetwork(model.data(), model.size());
+    reader.SetWeights(weights);
+
+    auto net = reader.getNetwork();
+    net.addOutput("split", 0);
+
+    MKLDNNGraphTestClass graph;
+    graph.CreateGraph(net);
+
+    auto inBlob   = make_shared_blob<float>(Precision::FP32, SizeVector{1, 2, H, W});
+    auto outBlob1 = make_shared_blob<float>(Precision::FP32, SizeVector{1, 1, H, W});
+    auto outBlob2 = make_shared_blob<float>(Precision::FP32, SizeVector{1, 1, H, W});
+    auto outBlob3 = make_shared_blob<float>(Precision::FP32, SizeVector{1, 1, H, W});
+
+    inBlob->allocate();
+    outBlob1->allocate();
+    outBlob2->allocate();
+    outBlob3->allocate();
+
+    auto in_ptr = inBlob->buffer().as<float*>();
+    for (int i = 0; i < imgSz; i++) {
+        in_ptr[i] = channel1Value;
+        in_ptr[i + imgSz] = channel2Value;
+    }
+
+    BlobMap inputBlobMap  = { {"data"   , inBlob  } },
+            outputBlobMap = { {"split.0", outBlob1},
+                              {"split.1", outBlob2},
+                              {"power"  , outBlob3} };
+
+    graph.Infer(inputBlobMap, outputBlobMap);
+
+    auto out_check = [] ( Blob::Ptr blob, float val) {
+        auto size = blob->size();
+        auto ptr = blob->buffer().as<float*>();
+        bool res = true;
+        for (int i = 0; i < size; i++)
+            res &= ( std::abs( ptr[i] - val ) < 0.00001f );
+        return res;
+    };
+
+    EXPECT_TRUE(out_check(outBlob1,  1));
+    EXPECT_TRUE(out_check(outBlob2,  2));
+    EXPECT_TRUE(out_check(outBlob3, -1));
+}
+
+TEST_F(MKLDNNGraphStructureTests, TestCreateGraphAllDataToConcat) {
+    using namespace InferenceEngine;
+    // Build the network.
+    Builder::Network netBuilder("");
+
+    // First input layer
+    idx_t inpId = netBuilder.addLayer(InferenceEngine::Builder::InputLayer("input").setPort(InferenceEngine::Port({1, 1, 4, 5})));
+
+    std::vector<size_t> weightsSize = {1, 1, 1, 1};  // OIHW
+    auto weights = make_shared_blob<float>(Precision::FP32, InferenceEngine::Layout::OIHW, weightsSize);
+    weights->allocate();
+
+    std::vector<float> twos(1, 2);
+    weights->set(twos);
+    idx_t weightsId = netBuilder.addLayer({}, Builder::ConstLayer("weights").setData(weights));
+
+    // Convolution layer
+    idx_t firstConvId = netBuilder.addLayer({{inpId}, {weightsId}}, Builder::ConvolutionLayer("conv").setKernel({1, 1})
+            .setStrides({1, 1}).setDilation({1, 1}).setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0}).setGroup(1).setOutDepth(1));
+
+    weights = make_shared_blob<float>(Precision::FP32, InferenceEngine::Layout::OIHW, weightsSize);
+    weights->allocate();
+
+    std::vector<float> threes(1, 3);
+    weights->set(threes);
+
+    weightsId = netBuilder.addLayer({}, Builder::ConstLayer("weights").setData(weights));
+    // Convolution layer
+    idx_t secondConvId = netBuilder.addLayer({{inpId}, {weightsId}}, Builder::ConvolutionLayer("conv").setKernel({1, 1})
+            .setStrides({1, 1}).setDilation({1, 1}).setPaddingsBegin({0, 0}).setPaddingsEnd({0, 0}).setGroup(1).setOutDepth(1));
+
+    // Concat layer
+    idx_t concatId = netBuilder.addLayer({{inpId}, {firstConvId}, {secondConvId}},
+                                         InferenceEngine::Builder::ConcatLayer("concat").setAxis(1).setInputPorts(std::vector<InferenceEngine::Port>(3)));
+
+    // Output layer
+    InferenceEngine::Builder::OutputLayer outLayer("output");
+    netBuilder.addLayer({concatId}, outLayer);
+
+    auto cnn = CNNNetwork(Builder::convertToICNNNetwork(netBuilder.build()));
+
+    // Load the network
+    std::vector<size_t> inpSize = {5, 4, 1, 1};
+    std::vector<size_t> outSize = {5, 4, 3, 1};
+
+    InferenceEngine::BlobMap inputBlobs;
+    InferenceEngine::BlobMap outputBlobs;
+
+    std::vector<float> inpData(4*5, 1);
+    std::vector<float> outData(3*4*5, 1);
+    for (int i = 0; i < 4*5; ++i)
+    {
+        inpData[i] = i;
+    }
+
+    inputBlobs["input"] = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, inpSize, &inpData[0]);
+    outputBlobs["concat"] = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, outSize, &outData[0]);
+
+
+    MKLDNNGraphTestClass graph;
+    graph.CreateGraph(cnn);
+    graph.Infer(inputBlobs, outputBlobs);
+
+    std::vector<float> refDst = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                                 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38,
+                                 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57};
+
+    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(outputBlobs["concat"]->getTensorDesc(), refDst.data());
+
+    compare(*outputBlobs["concat"], *dstOut);
+}
+
+TEST_F(MKLDNNGraphStructureTests, TestCreateGraphAllDataFromInputToConcat) {
+    using namespace InferenceEngine;
+    // Build the network.
+    Builder::Network netBuilder("");
+
+    // First input layer
+    idx_t inpId = netBuilder.addLayer(InferenceEngine::Builder::InputLayer("input").setPort(InferenceEngine::Port({1, 1, 4, 5})));
+
+    // Concat layer
+    idx_t concatId = netBuilder.addLayer({{inpId}, {inpId}, {inpId}},
+                                         InferenceEngine::Builder::ConcatLayer("concat").setAxis(1).setInputPorts(std::vector<InferenceEngine::Port>(3)));
+
+    // Output layer
+    InferenceEngine::Builder::OutputLayer outLayer("output");
+    netBuilder.addLayer({concatId}, outLayer);
+
+    auto cnn = CNNNetwork(Builder::convertToICNNNetwork(netBuilder.build()));
+
+    // Load the network
+    std::vector<size_t> inpSize = {5, 4, 1, 1};
+    std::vector<size_t> outSize = {5, 4, 3, 1};
+
+    InferenceEngine::BlobMap inputBlobs;
+    InferenceEngine::BlobMap outputBlobs;
+
+    std::vector<float> inpData(4*5, 1);
+    std::vector<float> outData(3*4*5, 1);
+    for (int i = 0; i < 4*5; ++i)
+    {
+        inpData[i] = i;
+    }
+
+    inputBlobs["input"] = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, inpSize, &inpData[0]);
+    outputBlobs["concat"] = InferenceEngine::make_shared_blob<float>(InferenceEngine::Precision::FP32, outSize, &outData[0]);
+
+
+    MKLDNNGraphTestClass graph;
+    graph.CreateGraph(cnn);
+    graph.Infer(inputBlobs, outputBlobs);
+
+    std::vector<float> refDst = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,};
+
+    InferenceEngine::TBlob<float>::Ptr dstOut = InferenceEngine::make_shared_blob<float>(outputBlobs["concat"]->getTensorDesc(), refDst.data());
+
+    compare(*outputBlobs["concat"], *dstOut);
+}
+
+
+TEST_F(MKLDNNGraphStructureTests, TestCheckIncorrectScaleShift) {
+    std::string model = R"V0G0N(
+<net name="net" version="2" batch="1">
+    <layers>
+        <layer name="data" type="Input" precision="FP32" id="0">
+            <output>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>1000</dim>
+                    <dim>16</dim>
+                </port>
+            </output>
+        </layer>
+        <layer id="1" name="test" precision="FP32" type="ScaleShift">
+            <input>
+                <port id="0">
+                    <dim>1</dim>
+                    <dim>1000</dim>
+                    <dim>16</dim>
+                </port>
+            </input>
+            <output>
+                <port id="3">
+                    <dim>1</dim>
+                    <dim>100</dim>
+                    <dim>16</dim>
+                </port>
+            </output>
+            <blobs>
+                <weights offset="0" size="64"/>
+                <biases offset="0" size="64"/>
+            </blobs>
+        </layer>
+    </layers>
+    <edges>
+        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+    </edges>
+</net>
+)V0G0N";
+    using namespace InferenceEngine;
+    const auto weights = std::make_shared<TBlob<uint8_t>>(Precision::U8, Layout::C, SizeVector{64});
+
+    InferenceEngine::CNNNetReader reader;
+    reader.ReadNetwork(model.data(), model.size());
+    reader.SetWeights(weights);
+
+    MKLDNNGraphTestClass graph;
+    ASSERT_THROW(graph.CreateGraph(reader.getNetwork()), InferenceEngine::details::InferenceEngineException);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/engines/mkldnn/graph/test_graph.hpp b/inference-engine/tests/unit/engines/mkldnn/graph/test_graph.hpp
index b0d7bfb83..e6ca63ea0 100644
--- a/inference-engine/tests/unit/engines/mkldnn/graph/test_graph.hpp
+++ b/inference-engine/tests/unit/engines/mkldnn/graph/test_graph.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -78,7 +78,7 @@ public:
             // todo: make sure 'name' exists in this map...
             if (_meanImages.find(name) != _meanImages.end()) {
                 if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) {
-                    _meanImages[name].Subtract(outDims, reinterpret_cast<float *>(inter_data_ptr));
+                    _meanImages[name].Subtract(outDims, reinterpret_cast<float *>(inter_data_ptr), in->getTensorDesc().getLayout());
                 } else {
                     THROW_IE_EXCEPTION << "Mean image of type " << in->getTensorDesc().getPrecision().name() << " is unsupported";
                 }
@@ -89,13 +89,6 @@ public:
     }
 
     void Infer(const InferenceEngine::BlobMap& inputs, InferenceEngine::BlobMap& result, int batch = -1) {
-        for (auto it = result.begin(); it != result.end(); it++) {
-            InferenceEngine::TBlob<float> *out = dynamic_cast<InferenceEngine::TBlob<float> *>((*it).second.get());
-            if (out == nullptr) {
-                FAIL() << "Output data precision not supported. Expected float.";
-            }
-        }
-
         try {
             // need to retain converted blobs until infer finish
             std::vector<InferenceEngine::Blob::Ptr> convertedInputs;
diff --git a/inference-engine/tests/unit/engines/mkldnn/mkldnn_primitive_test.cpp b/inference-engine/tests/unit/engines/mkldnn/mkldnn_primitive_test.cpp
index fd517de87..518f0d6cd 100644
--- a/inference-engine/tests/unit/engines/mkldnn/mkldnn_primitive_test.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/mkldnn_primitive_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/engines/mkldnn/test_layers.cpp b/inference-engine/tests/unit/engines/mkldnn/test_layers.cpp
index 7db41747c..38164f8ad 100644
--- a/inference-engine/tests/unit/engines/mkldnn/test_layers.cpp
+++ b/inference-engine/tests/unit/engines/mkldnn/test_layers.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/graph_tools/graph_copy_tests.cpp b/inference-engine/tests/unit/graph_tools/graph_copy_tests.cpp
index 2971eb7a3..f3498f3d1 100644
--- a/inference-engine/tests/unit/graph_tools/graph_copy_tests.cpp
+++ b/inference-engine/tests/unit/graph_tools/graph_copy_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -359,4 +359,4 @@ TEST(CNNSpecificGraphCopyTests, copyNetworkWithDeconvolution) {
     auto layer = std::dynamic_pointer_cast<DeconvolutionLayer>(copied_net.getLayerByName("upsample_merged"));
     ASSERT_NE(layer, nullptr) << "Could not perform dynamic cast from base pointer to Deconvolution layer pointer. "
                                  "Net copy could be incorrect.";
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/tests/unit/graph_tools/graph_test_base.hpp b/inference-engine/tests/unit/graph_tools/graph_test_base.hpp
index 94c0876a6..79a1f4ac2 100644
--- a/inference-engine/tests/unit/graph_tools/graph_test_base.hpp
+++ b/inference-engine/tests/unit/graph_tools/graph_test_base.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -73,8 +73,27 @@ class GraphTestsBase : public ::testing::Test {
         }
         return nullptr;
     }
+
+
+    #define ASSERT_N_CONNECTIONS(a, b, n) \
+        ASSERT_EQ(countForwardConnections(#a, #b), n);\
+        ASSERT_EQ(countBackwardConnections(#a, #b), n);
+
     #define ASSERT_CONNECTION(a, b) \
-        ASSERT_TRUE(assertConnection(#a, #b));
+        ASSERT_N_CONNECTIONS(a,b,1);
+
+    #define ASSERT_2_CONNECTIONS(a, b) \
+        ASSERT_N_CONNECTIONS(a,b,2);
+
+    #define ASSERT_3_CONNECTIONS(a, b) \
+        ASSERT_N_CONNECTIONS(a,b,3);
+
+    /**
+     * @brief check connection without direction
+     */
+    #define ASSERT_NO_CONNECTION(a, b) \
+        ASSERT_EQ(countConnections(#a, #b), 0);\
+        ASSERT_EQ(countConnections(#b, #a), 0);\
 
     void ASSERT_DIMS(int x, const SizeVector & dims) {
 
@@ -84,30 +103,51 @@ class GraphTestsBase : public ::testing::Test {
         }
     }
 
-    bool assertConnection(std::string a, std::string b) {
+    int countForwardConnections(std::string a, std::string b) {
+        long int nForward = 0;
+        CNNLayerPtr layerExist;
+        try {
+            layerExist = wrap.getLayerByName(a.c_str());
+            if (!layerExist) {
+                return 0;
+            }
+        } catch(...) {
+            return 0;
+        }
 
-        bool bForward = false;
-        for (auto && outData : wrap.getLayerByName(a.c_str())->outData) {
+        for (auto && outData : layerExist->outData) {
             auto &inputMap = outData->inputTo;
-            auto i =
-                std::find_if(inputMap.begin(), inputMap.end(), [&](std::map<std::string, CNNLayerPtr>::value_type &vt) {
+            nForward +=
+                std::count_if(inputMap.begin(), inputMap.end(), [&](std::map<std::string, CNNLayerPtr>::value_type &vt) {
                     return vt.second->name == b;
                 });
-            if (i != inputMap.end()) {
-                bForward = true;
-                break;
-            }
         }
-        if (!bForward) {
-            return false;
+
+        return nForward;
+    }
+
+    int countBackwardConnections(std::string a, std::string b) {
+        CNNLayerPtr layerExist;
+        try {
+            layerExist = wrap.getLayerByName(b.c_str());
+            if (!layerExist) {
+                return 0;
+            }
+        } catch(...) {
+            return 0;
         }
 
-        auto prevData = wrap.getLayerByName(b.c_str())->insData;
+        auto prevData = layerExist->insData;
 
-        auto j = std::find_if(prevData.begin(), prevData.end(), [&](DataWeakPtr wp) {
+        auto nBackward = std::count_if(prevData.begin(), prevData.end(), [&](DataWeakPtr wp) {
             return wp.lock()->getCreatorLayer().lock()->name == a;
         });
-        return  j != prevData.end();
+
+        return  nBackward;
+    }
+
+    int countConnections(std::string a, std::string b) {
+        return  countForwardConnections(a, b) + countBackwardConnections(a, b);
     }
 
     int numCreated = 0;
@@ -189,6 +229,17 @@ class GraphTestsBase : public ::testing::Test {
         }
     }
 
+    void TearDown() override {
+        // Reset shared_pointer circular dependencies to mitigate memory leaks.
+        for (auto& items : datas) {
+            for (auto& data : items) {
+                for (auto& input : data->getInputTo()) {
+                    input.second.reset();
+                }
+            }
+        }
+    }
+
     int ID(const CNNLayerPtr &ptr) {
         for (int i = 0; i < layers.size(); i++) {
             if (layers[i].get() == ptr.get())
diff --git a/inference-engine/tests/unit/graph_tools/graph_tools_test.cpp b/inference-engine/tests/unit/graph_tools/graph_tools_test.cpp
index 94e9c516a..8c15ffdfe 100644
--- a/inference-engine/tests/unit/graph_tools/graph_tools_test.cpp
+++ b/inference-engine/tests/unit/graph_tools/graph_tools_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -701,7 +701,7 @@ TEST_F(GraphToolsTest, CNNNetworkInsertAllAfterSplit) {
 
     CNNNetworkInsertLayer(wrap.getLayerByName("1"), nullptr, createGenericLayer("5"));
 
-    ASSERT_CONNECTION(1, 5);
+    ASSERT_2_CONNECTIONS(1, 5);
     ASSERT_CONNECTION(5, 2);
     ASSERT_CONNECTION(5, 3);
 }
@@ -729,6 +729,156 @@ TEST_F(GraphToolsTest, CNNNetworkInsert1AfterSplit) {
     ASSERT_CONNECTION(5, 4);
 }
 
+TEST_F(GraphToolsTest, CNNNetworkRemoveNullPointerLayer) {
+
+    CONNECT_FROM_PORT(1, 0, 2);
+    CONNECT_FROM_PORT(1, 1, 3);
+    CONNECT_FROM_PORT(1, 2, 4);
+
+    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+        prepareInputs(maps);
+    })));
+
+    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+        l = layerByName(name);
+        return l== nullptr ? GENERAL_ERROR : OK;
+    })));
+
+    ASSERT_ANY_THROW(CNNNetworkRemoveLayer(nullptr));
+}
+
+TEST_F(GraphToolsTest, CNNNetworkRemoveInputOrOutputLayer) {
+
+    CONNECT_FROM_PORT(1, 0, 2);
+    CONNECT_FROM_PORT(2, 0, 3);
+    CONNECT_FROM_PORT(1, 0, 3);
+
+    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+        prepareInputs(maps);
+    })));
+
+    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+        l = layerByName(name);
+        return l== nullptr ? GENERAL_ERROR : OK;
+    })));
+
+    ASSERT_ANY_THROW(CNNNetworkRemoveLayer(wrap.getLayerByName("1")));
+    ASSERT_ANY_THROW(CNNNetworkRemoveLayer(wrap.getLayerByName("3")));
+}
+
+TEST_F(GraphToolsTest, CNNNetworkRemoveLayerThaHas2Outputs) {
+
+    CONNECT_FROM_PORT(1, 0, 2);
+    CONNECT_FROM_PORT(2, 0, 3);
+    CONNECT_FROM_PORT(2, 0, 4);
+    CONNECT_FROM_PORT(1, 0, 3);
+    CONNECT_FROM_PORT(5, 0, 4);
+
+    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+        prepareInputs(maps);
+    })));
+
+    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+        l = layerByName(name);
+        return l== nullptr ? GENERAL_ERROR : OK;
+    })));
+
+    CNNNetworkRemoveLayer(wrap.getLayerByName("2"));
+
+    ASSERT_2_CONNECTIONS(1, 3);
+    ASSERT_CONNECTION(1, 4);
+    ASSERT_CONNECTION(5, 4);
+
+    // means all remained references removed
+    ASSERT_NO_CONNECTION(1, 2);
+    ASSERT_NO_CONNECTION(2, 2);
+    ASSERT_NO_CONNECTION(3, 2);
+    ASSERT_NO_CONNECTION(4, 2);
+}
+
+TEST_F(GraphToolsTest, CNNNetworkRemoveLayerSplit) {
+
+    CONNECT_FROM_PORT(1, 0, 2);
+    CONNECT_FROM_PORT(1, 1, 3);
+    CONNECT_FROM_PORT(2, 0, 3);
+
+    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+        prepareInputs(maps);
+    })));
+
+    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+        l = layerByName(name);
+        return l== nullptr ? GENERAL_ERROR : OK;
+    })));
+
+    CNNNetworkRemoveLayer(wrap.getLayerByName("2"));
+
+    ASSERT_2_CONNECTIONS(1, 3);
+    // means all remained references removed
+    ASSERT_NO_CONNECTION(1, 2);
+    ASSERT_NO_CONNECTION(2, 2);
+    ASSERT_NO_CONNECTION(3, 2);
+}
+
+TEST_F(GraphToolsTest, CNNNetworkRemoveLayerSplit2) {
+
+    CONNECT_FROM_PORT(1, 0, 2);
+    CONNECT_FROM_PORT(1, 0, 3);
+    CONNECT_FROM_PORT(1, 0, 4);
+    CONNECT_FROM_PORT(1, 1, 4);
+    CONNECT_FROM_PORT(1, 2, 5);
+
+    CONNECT_FROM_PORT(2, 0, 3);
+    CONNECT_FROM_PORT(2, 0, 4);
+    CONNECT_FROM_PORT(2, 0, 5);
+
+    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+        prepareInputs(maps);
+    })));
+
+    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+        l = layerByName(name);
+        return l== nullptr ? GENERAL_ERROR : OK;
+    })));
+
+    CNNNetworkRemoveLayer(wrap.getLayerByName("2"));
+
+    ASSERT_2_CONNECTIONS(1, 3);
+    ASSERT_3_CONNECTIONS(1, 4);
+    ASSERT_2_CONNECTIONS(1, 5);
+
+    // means all remained references removed
+    ASSERT_NO_CONNECTION(1, 2);
+    ASSERT_NO_CONNECTION(2, 2);
+    ASSERT_NO_CONNECTION(3, 2);
+    ASSERT_NO_CONNECTION(4, 2);
+    ASSERT_NO_CONNECTION(5, 2);
+}
+
+TEST_F(GraphToolsTest, CNNNetworkRemoveSimpleLayer) {
+
+    CONNECT_FROM_PORT(1, 0, 2);
+    CONNECT_FROM_PORT(2, 0, 3);
+
+    EXPECT_CALL(mockNet, getInputsInfo(_)).WillRepeatedly(WithArg<0>(Invoke([&](InputsDataMap & maps){
+        prepareInputs(maps);
+    })));
+
+    EXPECT_CALL(mockNet, getLayerByName(_,_,_)).WillRepeatedly(WithArgs<0, 1>(Invoke([&](const char* name, InferenceEngine::CNNLayerPtr& l){
+        l = layerByName(name);
+        return l== nullptr ? GENERAL_ERROR : OK;
+    })));
+
+    CNNNetworkRemoveLayer(wrap.getLayerByName("2"));
+
+    ASSERT_CONNECTION(1, 3);
+
+    // means all remained references removed
+    ASSERT_NO_CONNECTION(1, 2);
+    ASSERT_NO_CONNECTION(2, 2);
+    ASSERT_NO_CONNECTION(3, 2);
+}
+
 
 //TEST_F(GraphToolsTest, CNNNetworkInsertLayerBeforeAll) {
 //    CONNECT(1, 2);
diff --git a/inference-engine/tests/unit/inference_engine_tests/alocator_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/alocator_tests.cpp
index 178f11676..5ae1fb5e8 100644
--- a/inference-engine/tests/unit/inference_engine_tests/alocator_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/alocator_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -25,7 +25,9 @@ public:
 };
 
 TEST_F(SystemAllocatorTests, canAllocate) {
-    EXPECT_NO_THROW(allocator->alloc(100));
+    void* handle = allocator->alloc(100);
+    EXPECT_NE(nullptr, handle);
+    allocator->free(handle);
 }
 
 TEST_F(SystemAllocatorTests, canLockAllocatedMemory) {
@@ -34,4 +36,6 @@ TEST_F(SystemAllocatorTests, canLockAllocatedMemory) {
     char * ptr = (char *)allocator->lock(handle);
     ptr [9999] = 11;
     ASSERT_EQ(ptr[9999], 11);
+    allocator->unlock(ptr);
+    allocator->free(handle);
 }
diff --git a/inference-engine/tests/unit/inference_engine_tests/blob_proxy_test.cpp b/inference-engine/tests/unit/inference_engine_tests/blob_proxy_test.cpp
index 9de222c50..cbe09141d 100644
--- a/inference-engine/tests/unit/inference_engine_tests/blob_proxy_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/blob_proxy_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/blob_test.cpp b/inference-engine/tests/unit/inference_engine_tests/blob_test.cpp
index e104c4cd1..8dbaf4d02 100644
--- a/inference-engine/tests/unit/inference_engine_tests/blob_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/blob_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -311,6 +311,11 @@ TEST_F(BlobTests, canMakeSharedBlob) {
     ASSERT_EQ(blob3->size(), 0);
 }
 
+TEST_F(BlobTests, cannotCreateBlobWithIncorrectPrecision) {
+    InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP16, {1, 3, 227, 227}, Layout::NCHW);
+    ASSERT_THROW(InferenceEngine::make_shared_blob<float>(desc), InferenceEngine::details::InferenceEngineException);
+}
+
 TEST_F(BlobTests, canUseBlobInMoveSemantics) {
 
     TBlob<float> b(Precision::FP32, C);
diff --git a/inference-engine/tests/unit/inference_engine_tests/caslesseq_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/caslesseq_tests.cpp
index b0c23e583..ec88bbee9 100644
--- a/inference-engine/tests/unit/inference_engine_tests/caslesseq_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/caslesseq_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cnn_network_test.cpp b/inference-engine/tests/unit/inference_engine_tests/cnn_network_test.cpp
index 4a4b3d4c6..af75edd6c 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cnn_network_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cnn_network_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_base_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_base_tests.cpp
index 425b06276..5cb6e9a7e 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_base_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_base_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_tests.cpp
index b1c93683e..8f2baa66f 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_thread_safe_default_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_thread_safe_default_tests.cpp
index 594ee1912..011594459 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_thread_safe_default_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_thread_safe_default_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_thread_safe_internal.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_thread_safe_internal.cpp
index 49cdadc5c..09e3aa9f0 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_thread_safe_internal.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/async_infer_request_thread_safe_internal.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/callback_manager_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/callback_manager_tests.cpp
index 7d1013793..06991e430 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/callback_manager_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/callback_manager_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_base_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_base_tests.cpp
index f4c472e72..815f9b84b 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_base_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_base_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_tests.cpp
index 399ec7a52..19a77fcfd 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_thread_safe_async_only_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_thread_safe_async_only_tests.cpp
index 2542017f5..d138cc73c 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_thread_safe_async_only_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_thread_safe_async_only_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_thread_safe_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_thread_safe_tests.cpp
index 3c09801ec..4dd38b70f 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_thread_safe_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executable_network_thread_safe_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executor_manager_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executor_manager_tests.cpp
index 450bcd3ec..022cc67bd 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executor_manager_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/executor_manager_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/iinference_plugin_internal_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/iinference_plugin_internal_tests.cpp
index a76857b46..5a4ae544c 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/iinference_plugin_internal_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/iinference_plugin_internal_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -43,7 +43,7 @@ protected:
 
     virtual void SetUp() {
         mock_plugin_impl.reset(new MockInferencePluginInternal());
-        plugin = details::shared_from_irelease(make_ie_compatible_plugin({1, 2, "test", "version"}, mock_plugin_impl));
+        plugin = details::shared_from_irelease(make_ie_compatible_plugin({1, 6, "test", "version"}, mock_plugin_impl));
         mockExeNetworkInternal = make_shared<MockExecutableNetworkInternal>();
     }
 
@@ -183,7 +183,7 @@ protected:
 
     virtual void SetUp() {
         mockPluginImpl = make_shared<MockInferencePluginInternal2>();
-        plugin = details::shared_from_irelease(make_ie_compatible_plugin({1, 2, "test", "version"}, mockPluginImpl));
+        plugin = details::shared_from_irelease(make_ie_compatible_plugin({1, 6, "test", "version"}, mockPluginImpl));
         mockExeNetwork = make_shared<MockIExecutableNetwork>();
     }
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/memory_state_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/memory_state_tests.cpp
index 799f0bd76..8f4426bb5 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/memory_state_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/memory_state_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/plugin_base_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/plugin_base_tests.cpp
index 3df6a6021..6a1004c50 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/plugin_base_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/plugin_base_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,7 +22,7 @@ class PluginBaseTests: public ::testing::Test {
     }
     virtual void SetUp() {
         mock_impl.reset(new MockPluginImpl());
-        plugin = details::shared_from_irelease(make_ie_compatible_plugin({1,2,"test", "version"}, mock_impl));
+        plugin = details::shared_from_irelease(make_ie_compatible_plugin({1,6,"test", "version"}, mock_impl));
     }
 };
 
@@ -33,7 +33,7 @@ TEST_F(PluginBaseTests, canReportVersion) {
     EXPECT_STREQ(V->buildNumber, "test");
     EXPECT_STREQ(V->description, "version");
     EXPECT_EQ(V->apiVersion.major, 1);
-    EXPECT_EQ(V->apiVersion.minor, 2);
+    EXPECT_EQ(V->apiVersion.minor, 6);
 
 }
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_common_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_common_tests.cpp
index e0918ab8b..00fa8769f 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_common_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_common_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_executor_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_executor_tests.cpp
index 0cbc51634..31aaaab2b 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_executor_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_executor_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_synchronizer_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_synchronizer_tests.cpp
index 47b1ef213..9564ff0af 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_synchronizer_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_synchronizer_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_tests.cpp
index 792e134a8..94f4910a8 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_tests_utils.hpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_tests_utils.hpp
index 5f4238f6d..9f6e3ccc8 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_tests_utils.hpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_tests_utils.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_with_stages_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_with_stages_tests.cpp
index 6f665e651..1f8c445cc 100644
--- a/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_with_stages_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/cpp_interfaces/task_with_stages_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/data_test.cpp b/inference-engine/tests/unit/inference_engine_tests/data_test.cpp
index 883986141..3d3cea100 100644
--- a/inference-engine/tests/unit/inference_engine_tests/data_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/data_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/debug_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/debug_tests.cpp
index 5acaeabe3..6d28b0d91 100644
--- a/inference-engine/tests/unit/inference_engine_tests/debug_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/debug_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/device_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/device_tests.cpp
index c83d89aae..368e8fd28 100644
--- a/inference-engine/tests/unit/inference_engine_tests/device_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/device_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -61,7 +61,8 @@ TEST_F(DeviceTests, returnsProperDeviceName) {
     ASSERT_STREQ(getDeviceName(TargetDevice::eMYRIAD), "MYRIAD");
     ASSERT_STREQ(getDeviceName(TargetDevice::eGNA), "GNA");
     ASSERT_STREQ(getDeviceName(TargetDevice::eHETERO), "HETERO");
+    ASSERT_STREQ(getDeviceName(TargetDevice::eKMB), "KMB");
     ASSERT_STREQ(getDeviceName(static_cast<TargetDevice>(-1)), "Unknown device");
     //off by one test - might not be enough
-    ASSERT_STREQ(getDeviceName(static_cast<TargetDevice>((uint8_t)TargetDevice::eHETERO + 1)), "Unknown device");
+    ASSERT_STREQ(getDeviceName(static_cast<TargetDevice>((uint8_t)TargetDevice::eKMB + 1)), "Unknown device");
 }
diff --git a/inference-engine/tests/unit/inference_engine_tests/exception_test.cpp b/inference-engine/tests/unit/inference_engine_tests/exception_test.cpp
index fc93d4881..0ebe0ce96 100644
--- a/inference-engine/tests/unit/inference_engine_tests/exception_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/exception_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/inference_engine_plugin_test.cpp b/inference-engine/tests/unit/inference_engine_tests/inference_engine_plugin_test.cpp
index a23b74c55..c88e9fb5a 100644
--- a/inference-engine/tests/unit/inference_engine_tests/inference_engine_plugin_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/inference_engine_plugin_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/inference_engine_test.cpp b/inference-engine/tests/unit/inference_engine_tests/inference_engine_test.cpp
index ab307cf40..451f1ee88 100644
--- a/inference-engine/tests/unit/inference_engine_tests/inference_engine_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/inference_engine_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/layer_transform_test.cpp b/inference-engine/tests/unit/inference_engine_tests/layer_transform_test.cpp
index fcb5875da..229af8fca 100644
--- a/inference-engine/tests/unit/inference_engine_tests/layer_transform_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/layer_transform_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/layers_test.cpp b/inference-engine/tests/unit/inference_engine_tests/layers_test.cpp
index 6d18b6422..0b0409b40 100644
--- a/inference-engine/tests/unit/inference_engine_tests/layers_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/layers_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/locked_memory_test.cpp b/inference-engine/tests/unit/inference_engine_tests/locked_memory_test.cpp
index 7a7ee5e7b..7bc095099 100644
--- a/inference-engine/tests/unit/inference_engine_tests/locked_memory_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/locked_memory_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/normalization/latest_in_fuse_test.cpp b/inference-engine/tests/unit/inference_engine_tests/normalization/latest_in_fuse_test.cpp
new file mode 100644
index 000000000..1b8da0d48
--- /dev/null
+++ b/inference-engine/tests/unit/inference_engine_tests/normalization/latest_in_fuse_test.cpp
@@ -0,0 +1,163 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <gmock/gmock-spec-builders.h>
+
+#include <cnn_network_int8_normalizer.hpp>
+#include "tests_common.hpp"
+#include "ir_gen_helper.hpp"
+
+using namespace ::testing;
+using namespace single_layer_tests;
+
+struct conv_conv_eltwise_params {
+    // Formats: NCHW, NCDHW
+    std::vector<size_t> in;
+
+    conv_common_params conv;
+    eltwise_common_params eltwise;
+};
+
+class NormalizationConvConvEltwiseTests: public TestsCommon,
+                                    public WithParamInterface<conv_conv_eltwise_params> {
+    std::string layers_t = R"V0G0N(
+        <layer id="1" name="conv_1" precision="FP32" type="Convolution">
+            <data group="_GC_" kernel="_K_" output="_OC_" pads_begin="_PB_" pads_end="_PE_" strides="_KS_"/>
+            <input>
+                <port id="0">
+                    __INP_DIMS__
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    __CONV_OUT_DIMS__
+                </port>
+            </output>
+            <blobs>
+                <weights offset="0" size="1"/>
+                <biases offset="1" size="2"/>
+            </blobs>
+        </layer>
+        <layer id="2" name="conv_2" precision="FP32" type="Convolution">
+            <data group="_GC_" kernel="_K_" output="_OC_" pads_begin="_PB_" pads_end="_PE_" strides="_KS_"/>
+            <input>
+                <port id="0">
+                    __INP_DIMS__
+                </port>
+            </input>
+            <output>
+                <port id="1">
+                    __CONV_OUT_DIMS__
+                </port>
+            </output>
+            <blobs>
+                <weights offset="3" size="4"/>
+                <biases offset="4" size="5"/>
+            </blobs>
+        </layer>
+        <layer id="3" name="eltwise_block" precision="FP32" type="Eltwise">
+            <data coeff="" operation="sum"/>
+            <input>
+                <port id="0">
+                    __CONV_OUT_DIMS__
+                </port>
+                <port id="1">
+                    __CONV_OUT_DIMS__
+                </port>
+            </input>
+            <output>
+                <port id="2">
+                    __CONV_OUT_DIMS__
+                </port>
+            </output>
+        </layer>
+)V0G0N";
+
+    std::string edges_t = R"V0G0N(
+        <edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+        <edge from-layer="0" from-port="0" to-layer="2" to-port="0"/>
+        <edge from-layer="1" from-port="1" to-layer="3" to-port="0"/>
+        <edge from-layer="2" from-port="1" to-layer="3" to-port="1"/>
+)V0G0N";
+
+    std::string getModel(conv_conv_eltwise_params p) {
+        std::string model = layers_t;
+        
+        std::string s_dims;
+        for (auto& dim : p.in) {
+            s_dims += "\n                    <dim>";
+            s_dims += std::to_string(dim) + "</dim>";
+        }
+	REPLACE_WITH_STR(model, "__INP_DIMS__", s_dims);
+
+        s_dims = "\n                    <dim>";
+        s_dims += std::to_string(p.in[0]) + "</dim>";
+        s_dims += "\n                    <dim>";
+        s_dims += std::to_string(p.conv.out_c) + "</dim>";
+        int k_len = p.conv.kernel.size();
+        for (size_t i = 2; i < p.in.size(); i++) {
+            size_t inx = k_len - i + 1;
+            size_t dim = (p.in[i] + 2lu * p.conv.pads_begin[inx] - p.conv.kernel[inx]) / p.conv.stride[inx] + 1lu;
+            s_dims += "\n                    <dim>";
+            s_dims += std::to_string(dim) + "</dim>";
+        }
+	REPLACE_WITH_STR(model, "__CONV_OUT_DIMS__", s_dims);
+
+        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_K_", p.conv.kernel);
+        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_KS_", p.conv.stride);
+        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PB_", p.conv.pads_begin);
+        REPLACE_WITH_NUM_VECTOR_REVERSE(model, "_PE_", p.conv.pads_end);
+        REPLACE_WITH_NUM(model, "_GC_", p.conv.group);
+        REPLACE_WITH_NUM(model, "_OC_", p.conv.out_c);
+
+        model = IRTemplateGenerator::getIRTemplate("Deconvolution_Concat", p.in, "FP32", model, edges_t);
+
+        return model;
+    }
+
+protected:
+    virtual void TearDown() {
+    }
+
+    virtual void SetUp() {
+        try {
+            TestsCommon::SetUp();
+            conv_conv_eltwise_params p = ::testing::WithParamInterface<conv_conv_eltwise_params>::GetParam();
+            std::string model = getModel(p);
+
+            InferenceEngine::CNNNetReader net_reader;
+            ASSERT_NO_THROW(net_reader.ReadNetwork(model.data(), model.length()));
+
+            auto network = net_reader.getNetwork();
+
+            int maxSign = 0x7F;
+            int maxUnsign = 0xFF;
+
+            InferenceEngine::details::CNNStatisticHelper statHelper(network, {}, maxSign, maxUnsign);
+            auto conv_1 = network.getLayerByName("conv_1");
+            auto conv_2 = network.getLayerByName("conv_2");
+            auto eltwise = network.getLayerByName("eltwise_block");
+
+            ASSERT_EQ(eltwise, statHelper.getLatestInFuse(conv_1));
+            ASSERT_EQ(conv_2, statHelper.getLatestInFuse(conv_2));
+            ASSERT_EQ(eltwise, statHelper.getLatestInFuse(eltwise));
+        } catch (const InferenceEngine::details::InferenceEngineException &e) {
+            FAIL() << e.what();
+        }
+    }
+};
+
+TEST_P(NormalizationConvConvEltwiseTests, TestsConvConvEltwise) {}
+
+INSTANTIATE_TEST_CASE_P(
+        TestsConvConvEltwise, NormalizationConvConvEltwiseTests,
+        ::testing::Values(
+                conv_conv_eltwise_params{{1, 16, 4, 4}, 
+                                     { {1, 1}, {1, 1}, {0, 0}, {0, 0}, {1, 1}, "", 1, 32, true },
+                                     {"sum", {}} },
+                conv_conv_eltwise_params{{1, 16, 4, 4, 4},
+                                     { {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, "", 1, 32, true },
+                                     {"sum", {}} }
+        ));
diff --git a/inference-engine/tests/unit/inference_engine_tests/parameter_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/parameter_tests.cpp
new file mode 100644
index 000000000..673d5c7a5
--- /dev/null
+++ b/inference-engine/tests/unit/inference_engine_tests/parameter_tests.cpp
@@ -0,0 +1,292 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <tests_common.hpp>
+#include <ie_parameter.hpp>
+#include <ie_layouts.h>
+
+using namespace InferenceEngine;
+
+class DestructorTest {
+public:
+    DestructorTest() {
+        constructorCount++;
+    }
+
+    DestructorTest(const DestructorTest& c) {
+        constructorCount++;
+    }
+
+    DestructorTest(const DestructorTest&& c) {
+        constructorCount++;
+    }
+
+    ~DestructorTest() {
+        destructorCount++;
+    }
+
+    static size_t destructorCount;
+    static size_t constructorCount;
+};
+size_t DestructorTest::destructorCount = 0;
+size_t DestructorTest::constructorCount = 0;
+
+class ParameterTests : public TestsCommon {
+public:
+    void SetUp() override {
+        TestsCommon::SetUp();
+        DestructorTest::destructorCount = 0;
+        DestructorTest::constructorCount = 0;
+    }
+};
+
+TEST_F(ParameterTests, ParameterAsInt) {
+    Parameter p = 4;
+    ASSERT_TRUE(p.is<int>());
+    int test = p;
+    ASSERT_EQ(4, test);
+}
+
+TEST_F(ParameterTests, ParameterAsUInt) {
+    Parameter p = 4u;
+    ASSERT_TRUE(p.is<unsigned int>());
+    ASSERT_FALSE(p.is<size_t>());
+    unsigned int test = p;
+    ASSERT_EQ(4, test);
+}
+
+TEST_F(ParameterTests, ParameterAsSize_t) {
+    size_t ref = 4;
+    Parameter p = ref;
+    ASSERT_TRUE(p.is<size_t>());
+    size_t test = p;
+    ASSERT_EQ(ref, test);
+}
+
+TEST_F(ParameterTests, ParameterAsFloat) {
+    Parameter p = 4.f;
+    ASSERT_TRUE(p.is<float>());
+    float test = p;
+    ASSERT_EQ(4.f, test);
+}
+
+TEST_F(ParameterTests, ParameterAsString) {
+    std::string ref = "test";
+    Parameter p = ref;
+    std::string test = p;
+    ASSERT_TRUE(p.is<std::string>());
+    ASSERT_EQ(ref, test);
+}
+
+TEST_F(ParameterTests, ParameterAsStringInLine) {
+    Parameter p = "test";
+    std::string test = p;
+    ASSERT_TRUE(p.is<std::string>());
+    ASSERT_EQ("test", test);
+}
+
+TEST_F(ParameterTests, IntParameterAsString) {
+    Parameter p = 4;
+    ASSERT_TRUE(p.is<int>());
+    ASSERT_FALSE(p.is<std::string>());
+    ASSERT_THROW(std::string test = p, std::bad_cast);
+    ASSERT_THROW(std::string test = p.as<std::string>(), std::bad_cast);
+}
+
+TEST_F(ParameterTests, StringParameterAsInt) {
+    Parameter p = "4";
+    ASSERT_FALSE(p.is<int>());
+    ASSERT_TRUE(p.is<std::string>());
+    ASSERT_THROW(int test = p, std::bad_cast);
+    ASSERT_THROW(int test = p.as<int>(), std::bad_cast);
+}
+
+TEST_F(ParameterTests, ParameterAsTensorDesc) {
+    TensorDesc ref(Precision::FP32, {1, 3, 2, 2}, Layout::NCHW);
+    Parameter p = ref;
+    ASSERT_TRUE(p.is<TensorDesc>());
+    TensorDesc test = p;
+    ASSERT_EQ(ref, test);
+}
+
+TEST_F(ParameterTests, ParameterAsInts) {
+    std::vector<int> ref = {1, 2, 3, 4, 5};
+    Parameter p = ref;
+    ASSERT_TRUE(p.is<std::vector<int>>());
+    std::vector<int> test = p;
+    ASSERT_EQ(ref.size(), test.size());
+    for (size_t i = 0; i < ref.size(); i++) {
+        ASSERT_EQ(ref[i], test[i]);
+    }
+}
+
+TEST_F(ParameterTests, ParameterAsUInts) {
+    std::vector<unsigned int> ref = {1, 2, 3, 4, 5};
+    Parameter p = ref;
+    ASSERT_TRUE(p.is<std::vector<unsigned int>>());
+    std::vector<unsigned int> test = p;
+    ASSERT_EQ(ref.size(), test.size());
+    for (size_t i = 0; i < ref.size(); i++) {
+        ASSERT_EQ(ref[i], test[i]);
+    }
+}
+
+TEST_F(ParameterTests, ParameterAsSize_ts) {
+    std::vector<size_t> ref = {1, 2, 3, 4, 5};
+    Parameter p = ref;
+    ASSERT_TRUE(p.is<std::vector<size_t>>());
+    std::vector<size_t> test = p;
+    ASSERT_EQ(ref.size(), test.size());
+    for (size_t i = 0; i < ref.size(); i++) {
+        ASSERT_EQ(ref[i], test[i]);
+    }
+}
+
+TEST_F(ParameterTests, ParameterAsFloats) {
+    std::vector<float> ref = {1, 2, 3, 4, 5};
+    Parameter p = ref;
+    ASSERT_TRUE(p.is<std::vector<float>>());
+    std::vector<float> test = p;
+    ASSERT_EQ(ref.size(), test.size());
+    for (size_t i = 0; i < ref.size(); i++) {
+        ASSERT_EQ(ref[i], test[i]);
+    }
+}
+
+TEST_F(ParameterTests, ParameterAsStrings) {
+    std::vector<std::string> ref = {"test1", "test2", "test3", "test4", "test1"};
+    Parameter p = ref;
+    ASSERT_TRUE(p.is<std::vector<std::string>>());
+    std::vector<std::string> test = p;
+    ASSERT_EQ(ref.size(), test.size());
+    for (size_t i = 0; i < ref.size(); i++) {
+        ASSERT_EQ(ref[i], test[i]);
+    }
+}
+
+TEST_F(ParameterTests, ParameterAsMapOfParameters) {
+    std::map<std::string, Parameter> refMap;
+    refMap["testParamInt"] = 4;
+    refMap["testParamString"] = "test";
+    Parameter p = refMap;
+    bool isMap = p.is<std::map<std::string, Parameter>>();
+    ASSERT_TRUE(isMap);
+    std::map<std::string, Parameter> testMap = p;
+
+    ASSERT_NE(testMap.find("testParamInt"), testMap.end());
+    ASSERT_NE(testMap.find("testParamString"), testMap.end());
+
+    int testInt = testMap["testParamInt"];
+    std::string testString = testMap["testParamString"];
+
+    ASSERT_EQ(refMap["testParamInt"].as<int>(), testInt);
+    ASSERT_EQ(refMap["testParamString"].as<std::string>(), testString);
+}
+
+TEST_F(ParameterTests, ParameterNotEmpty) {
+    Parameter p = 4;
+    ASSERT_FALSE(p.empty());
+}
+
+TEST_F(ParameterTests, ParameterEmpty) {
+    Parameter p;
+    ASSERT_TRUE(p.empty());
+}
+
+TEST_F(ParameterTests, ParameterClear) {
+    Parameter p = 4;
+    ASSERT_FALSE(p.empty());
+    p.clear();
+    ASSERT_TRUE(p.empty());
+}
+
+TEST_F(ParameterTests, ParametersNotEqualByType) {
+    Parameter p1 = 4;
+    Parameter p2 = "string";
+    ASSERT_TRUE(p1 != p2);
+    ASSERT_FALSE(p1 == p2);
+}
+
+TEST_F(ParameterTests, ParametersNotEqualByValue) {
+    Parameter p1 = 4;
+    Parameter p2 = 5;
+    ASSERT_TRUE(p1 != p2);
+    ASSERT_FALSE(p1 == p2);
+}
+
+TEST_F(ParameterTests, ParametersEqual) {
+    Parameter p1 = 4;
+    Parameter p2 = 4;
+    ASSERT_TRUE(p1 == p2);
+    ASSERT_FALSE(p1 != p2);
+}
+
+TEST_F(ParameterTests, CompareParametersWithoutEqualOperator) {
+    class TestClass {
+    public:
+        TestClass(int test, int* testPtr): test(test), testPtr(testPtr) {}
+
+    private:
+        int test;
+        int* testPtr;
+    };
+
+    TestClass a(2, (int *)0x234);
+    TestClass b(2, (int *)0x234);
+    TestClass c(3, (int *)0x234);
+    Parameter parA = a;
+    Parameter parB = b;
+    Parameter parC = c;
+
+    ASSERT_THROW(bool equal = parA == parB, details::InferenceEngineException);
+    ASSERT_THROW(bool equal = parA != parB, details::InferenceEngineException);
+    ASSERT_THROW(bool equal = parA == parC, details::InferenceEngineException);
+    ASSERT_THROW(bool equal = parA != parC, details::InferenceEngineException);
+}
+
+TEST_F(ParameterTests, ParameterRemovedRealObject) {
+    ASSERT_EQ(0, DestructorTest::constructorCount);
+    ASSERT_EQ(0, DestructorTest::destructorCount);
+    {
+        DestructorTest t;
+        Parameter p1 = t;
+    }
+    ASSERT_EQ(2, DestructorTest::constructorCount);
+    ASSERT_EQ(2, DestructorTest::destructorCount);
+}
+
+TEST_F(ParameterTests, ParameterRemovedRealObjectWithDuplication) {
+    ASSERT_EQ(0, DestructorTest::constructorCount);
+    ASSERT_EQ(0, DestructorTest::destructorCount);
+    {
+        DestructorTest t;
+        Parameter p = t;
+        ASSERT_EQ(0, DestructorTest::destructorCount);
+        p = t;
+        ASSERT_EQ(2, DestructorTest::destructorCount);
+    }
+    ASSERT_EQ(4, DestructorTest::constructorCount);
+    ASSERT_EQ(4, DestructorTest::destructorCount);
+}
+
+TEST_F(ParameterTests, ParameterRemovedRealObjectPointerWithDuplication) {
+    ASSERT_EQ(0, DestructorTest::constructorCount);
+    ASSERT_EQ(0, DestructorTest::destructorCount);
+    {
+        auto * t = new DestructorTest();
+        Parameter p = t;
+        ASSERT_EQ(1, DestructorTest::constructorCount);
+        ASSERT_EQ(0, DestructorTest::destructorCount);
+        p = t;
+        ASSERT_TRUE(p.is<DestructorTest *>());
+        DestructorTest* t2 = p;
+        ASSERT_EQ(0, DestructorTest::destructorCount);
+        delete t;
+        auto * t3 = p.as<DestructorTest *>();
+        ASSERT_EQ(t2, t3);
+    }
+    ASSERT_EQ(1, DestructorTest::constructorCount);
+    ASSERT_EQ(1, DestructorTest::destructorCount);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/inference_engine_tests/plugin_dispatcher_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/plugin_dispatcher_tests.cpp
index b54aa383e..b2d2671e7 100644
--- a/inference-engine/tests/unit/inference_engine_tests/plugin_dispatcher_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/plugin_dispatcher_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/pointer_test.cpp b/inference-engine/tests/unit/inference_engine_tests/pointer_test.cpp
index 78985fe6a..374c1b44d 100644
--- a/inference-engine/tests/unit/inference_engine_tests/pointer_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/pointer_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/pre_allocator_test.cpp b/inference-engine/tests/unit/inference_engine_tests/pre_allocator_test.cpp
index 42e06a04d..15a7db4bc 100644
--- a/inference-engine/tests/unit/inference_engine_tests/pre_allocator_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/pre_allocator_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/precision_test.cpp b/inference-engine/tests/unit/inference_engine_tests/precision_test.cpp
index a044b95ee..51cb2fb22 100644
--- a/inference-engine/tests/unit/inference_engine_tests/precision_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/precision_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/preprocess_test.cpp b/inference-engine/tests/unit/inference_engine_tests/preprocess_test.cpp
index 70ef0dccc..5a9fe3dfb 100644
--- a/inference-engine/tests/unit/inference_engine_tests/preprocess_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/preprocess_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/range_iterator_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/range_iterator_tests.cpp
index 0a10c8cdf..367840a5d 100644
--- a/inference-engine/tests/unit/inference_engine_tests/range_iterator_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/range_iterator_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/response_buffer_test.cpp b/inference-engine/tests/unit/inference_engine_tests/response_buffer_test.cpp
index 4087637d8..992045f28 100644
--- a/inference-engine/tests/unit/inference_engine_tests/response_buffer_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/response_buffer_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/shared_object_loader_test.cpp b/inference-engine/tests/unit/inference_engine_tests/shared_object_loader_test.cpp
index cdea8de81..41ca78396 100644
--- a/inference-engine/tests/unit/inference_engine_tests/shared_object_loader_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/shared_object_loader_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/so_pointer_tests.cpp b/inference-engine/tests/unit/inference_engine_tests/so_pointer_tests.cpp
index ed0e35249..9398b695c 100644
--- a/inference-engine/tests/unit/inference_engine_tests/so_pointer_tests.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/so_pointer_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/tensor_desc_test.cpp b/inference-engine/tests/unit/inference_engine_tests/tensor_desc_test.cpp
index 16bd43ba3..8010db6ea 100644
--- a/inference-engine/tests/unit/inference_engine_tests/tensor_desc_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/tensor_desc_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/inference_engine_tests/util_const_infer_test.cpp b/inference-engine/tests/unit/inference_engine_tests/util_const_infer_test.cpp
new file mode 100644
index 000000000..1a3e5bb3a
--- /dev/null
+++ b/inference-engine/tests/unit/inference_engine_tests/util_const_infer_test.cpp
@@ -0,0 +1,830 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <initializer_list>
+#include <string>
+#include <utility>
+#include <unordered_set>
+#include <unordered_map>
+
+#include <ie_util_internal.hpp>
+#include <tests_common.hpp>
+#include <graph_transformer.h>
+#include "ie_utils.hpp"
+#include "blob_factory.hpp"
+#include "debug.h"
+#include "util_test.hpp"
+#include "util_const_infer_test.hpp"
+#include <details/ie_cnn_network_tools.h>
+
+namespace IE = InferenceEngine;
+
+void RemoveLayerTests::SetUp() {
+    net = getNetwork();
+    originalLayersNum = net->allLayers().size();
+    testTransformator.reset(new ConstTransformatorTest(net.get()));
+}
+
+//
+// I1-d1-L1-d4              I4
+//       / \  \              \
+//      |  d7  \            d10
+//      |  |    \            /
+//  I2-d2-L2-d5-L4-d6-L5-d9-L10
+//        /           /
+//       /  ____d8___/
+//      /  /
+// I3-d3-L3
+//
+IE::details::CNNNetworkImplPtr RemoveLayerTests::getNetwork() {
+    return netBuilder
+            .data("data1", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data2", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data3", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data4", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data5", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data6", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data7", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data8", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data9", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data10", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data11", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .layer<IE::CNNLayer>(IE::LayerParams{"input1", "input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"input2", "Input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"input3", "input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"input4", "input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer1", "dummy", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer2", "dummy", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer3", "dummy", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer4", "dummy", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer5", "dummy", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer6", "dummy", IE::Precision::FP32})
+            .linkToData("input1", "data1")
+            .linkToData("input2", "data2")
+            .linkToData("input3", "data3")
+            .linkToData("input4", "data10")
+
+            .linkDataTo("data1", "layer1")
+            .linkDataTo("data2", "layer2")
+            .linkDataTo("data2", "layer1")
+            .linkDataTo("data3", "layer3")
+            .linkDataTo("data3", "layer2")
+            .linkDataTo("data10", "layer6")
+
+            .linkToData("layer1", "data4")
+            .linkToData("layer1", "data7")
+            .linkToData("layer2", "data5")
+            .linkToData("layer3", "data8")
+
+            .linkDataTo("data4", "layer4")
+            .linkDataTo("data5", "layer4")
+            .linkDataTo("data8", "layer5")
+            .linkDataTo("data7", "layer2")
+
+            .linkToData("layer4", "data6")
+
+            .linkDataTo("data6", "layer5")
+
+            .linkToData("layer5", "data9")
+
+            .linkDataTo("data9", "layer6")
+
+            .linkToData("layer6", "data11")
+
+            .addInput("data1")
+            .addInput("data2")
+            .addInput("data3")
+            .finalize();
+}
+
+IE::CNNLayerPtr RemoveLayerTests::getLayer(const std::string& name) {
+    const auto& layers = netBuilder.getLayersMap();
+    auto it = layers.find(name);
+    if (it == layers.end()) throw std::logic_error("Failed to find layer: " + name);
+    return it->second;
+}
+
+IE::DataPtr RemoveLayerTests::getData(const std::string& name) {
+    const auto& datas = netBuilder.getDataMap();
+    auto it = datas.find(name);
+    if (it == datas.end()) throw std::logic_error("Failed to find data: " + name);
+    return it->second;
+}
+
+IE::BlobMap RemoveLayerTests::fillConstData(const std::vector<std::string>& constLayers) {
+    IE::BlobMap constData;
+    for (const auto& name:constLayers) {
+        auto layer = getLayer(name);
+        for (const auto& outData:layer->outData) {
+            IE::TensorDesc desc = outData->getTensorDesc();
+            IE::Blob::Ptr blob = make_blob_with_precision(desc);
+            blob->allocate();
+            auto* buffer = blob->buffer().as<float*>();
+            for (int i = 0; i < blob->size(); i++) {
+                buffer[i] = i + 1;
+            }
+            constData[outData->name] = blob;
+        }
+    }
+    return constData;
+}
+
+IE::BlobMap RemoveLayerTests::initConstLayers(const std::vector<std::string>& constLayers) {
+    for (const auto& name : constLayers) {
+        getLayer(name)->type = "Const";
+    }
+    IE::BlobMap customBlobs = fillConstData(constLayers);
+    for (const auto& layerName: constLayers) {
+        auto layer = getLayer(layerName);
+        layer->type = "Const";
+        layer->blobs["custom"] = customBlobs[layer->outData[0]->name];
+    }
+    return customBlobs;
+}
+
+TEST_F(RemoveLayerTests, canTrimL2) {
+    auto layer1 = getLayer("layer1");
+    auto layer4 = getLayer("layer4");
+    auto data2 = getData("data2");
+    auto data3 = getData("data3");
+    auto data7 = getData("data7");
+    auto data5 = getData("data5");
+    std::vector<std::string> constLayers = {"layer2"};
+    std::vector<std::string> refNewLayers = {constLayers[0] + "__data5__Const"};
+    auto constData = fillConstData(constLayers);
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+
+    auto newLayers = testTransformator->foldConstSubgraphsInternal({{constLayers[0], false}}, constData, sortedLayers);
+
+    ASSERT_EQ(newLayers, refNewLayers);
+    IE::CNNNetwork cnnNetwork(net);
+    ASSERT_THROW(cnnNetwork.getLayerByName("layer2"), IE::NotFound);
+    auto newLayer = cnnNetwork.getLayerByName(refNewLayers[0].c_str());
+    ASSERT_EQ(newLayer->type, "Const");
+    ASSERT_EQ(constData["data5"], newLayer->blobs.at("custom"));
+    ASSERT_EQ(nullptr, net->getData("data7"));
+    net->removeData("data7");
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum);
+    ASSERT_EQ(data2->inputTo.size(), 1);
+    ASSERT_EQ(data2->inputTo.find("layer1")->second, layer1);
+    ASSERT_EQ(data5->creatorLayer.lock(), newLayer);
+    ASSERT_EQ(layer4->insData.size(), 2);
+    ASSERT_EQ(layer4->insData[1].lock(), data5);
+    ASSERT_EQ(layer1->insData.size(), 2);
+    ASSERT_EQ(layer1->insData[0].lock(), getData("data1"));
+    ASSERT_EQ(layer1->insData[1].lock(), data2);
+    ASSERT_EQ(layer1->outData.size(), 1);
+    ASSERT_EQ(layer1->outData[0], getData("data4"));
+    ASSERT_EQ(newLayer->outData.size(), 1);
+    ASSERT_EQ(newLayer->outData[0], data5);
+    ASSERT_EQ(data3->inputTo.size(), 1);
+    ASSERT_EQ(data3->inputTo.find("layer3")->second, getLayer("layer3"));
+}
+
+TEST_F(RemoveLayerTests, canTrimI1andL1) {
+    auto layer4 = getLayer("layer4");
+    auto layer2 = getLayer("layer2");
+    auto data2 = getData("data2");
+    std::vector<std::string> constLayers = {"input1", "layer1"};
+    std::map<std::string, bool> mapConstLayers;
+    for (const auto& it : constLayers) {
+        mapConstLayers[it] = false;
+    }
+    std::vector<std::string> refNewLayers = {(constLayers[1] + "__data4__Const"), (constLayers[1] + "__data7__Const")};
+
+    auto constData = fillConstData(constLayers);
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+    auto newLayers = testTransformator->foldConstSubgraphsInternal(mapConstLayers, constData, sortedLayers);
+
+    ASSERT_EQ(newLayers, refNewLayers);
+    IE::CNNNetwork cnnNetwork(net);
+    ASSERT_THROW(cnnNetwork.getLayerByName("input1"), IE::NotFound);
+    ASSERT_THROW(cnnNetwork.getLayerByName("layer1"), IE::NotFound);
+    auto newLayerD4 = cnnNetwork.getLayerByName(refNewLayers[0].c_str());
+    auto newLayerD7 = cnnNetwork.getLayerByName(refNewLayers[1].c_str());
+    auto newData4 = net->getData("data4__layer4");
+    auto newData7 = net->getData("data7__layer2");
+    ASSERT_EQ(newLayerD4->type, "Const");
+    ASSERT_EQ(newLayerD7->type, "Const");
+    ASSERT_EQ(constData["data4"], newLayerD4->blobs.at("custom"));
+    ASSERT_EQ(constData["data7"], newLayerD7->blobs.at("custom"));
+    ASSERT_EQ(nullptr, net->getData("data1"));
+    net->removeData("data1");
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum);
+    ASSERT_EQ(data2->inputTo.size(), 1);
+    ASSERT_EQ(data2->inputTo.find("layer2")->second, layer2);
+    ASSERT_EQ(newData4->creatorLayer.lock(), newLayerD4);
+    ASSERT_EQ(newData7->creatorLayer.lock(), newLayerD7);
+    ASSERT_EQ(newLayerD4->outData.size(), 1);
+    ASSERT_EQ(newLayerD7->outData.size(), 1);
+    ASSERT_EQ(newLayerD4->outData[0], newData4);
+    ASSERT_EQ(newLayerD7->outData[0], newData7);
+    ASSERT_EQ(layer4->insData.size(), 2);
+    ASSERT_EQ(layer4->insData[0].lock(), newData4);
+    ASSERT_EQ(layer4->insData[1].lock(), getData("data5"));
+    ASSERT_EQ(layer2->insData.size(), 3);
+    ASSERT_EQ(layer2->insData[0].lock(), data2);
+    ASSERT_EQ(layer2->insData[1].lock(), getData("data3"));
+    ASSERT_EQ(layer2->insData[2].lock(), newData7);
+}
+
+TEST_F(RemoveLayerTests, canFindConstLayers) {
+    getLayer("input1")->type = "Const";
+    getLayer("layer2")->type = "Shape";
+
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+    auto constLayers = testTransformator->getConstLayers(sortedLayers);
+
+    ASSERT_EQ(constLayers.size(), 2);
+    auto begin = constLayers.begin();
+    auto end = constLayers.end();
+    ASSERT_FALSE(constLayers.at("input1"));
+    ASSERT_FALSE(constLayers.at("layer2"));
+}
+
+TEST_F(RemoveLayerTests, canFindConstLayers2) {
+    getLayer("input3")->type = "Const";
+    getLayer("input2")->type = "Const";
+    getLayer("layer2")->type = "Shape";
+
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+    auto constLayers = testTransformator->getConstLayers(sortedLayers);
+
+    ASSERT_EQ(constLayers.size(), 4);
+    ASSERT_FALSE(constLayers.at("input3"));
+    ASSERT_FALSE(constLayers.at("layer2"));
+    ASSERT_FALSE(constLayers.at("layer3"));
+    ASSERT_FALSE(constLayers.at("input2"));
+}
+
+TEST_F(RemoveLayerTests, canFindConstLayers3) {
+    getLayer("input3")->type = "Const";
+    getLayer("layer2")->type = "Shape";
+    getLayer("layer1")->type = "Shape";
+    getLayer("layer4")->type = "Reshape";
+
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+    auto constLayers = testTransformator->getConstLayers(sortedLayers);
+
+    ASSERT_EQ(constLayers.size(), 6);
+    ASSERT_FALSE(constLayers.at("input3"));
+    ASSERT_FALSE(constLayers.at("layer1"));
+    ASSERT_TRUE(constLayers.at("layer2"));
+    ASSERT_FALSE(constLayers.at("layer3"));
+    ASSERT_FALSE(constLayers.at("layer4"));
+    ASSERT_FALSE(constLayers.at("layer5"));
+}
+
+TEST_F(RemoveLayerTests, canFindShapeConstLayers) {
+    getLayer("input3")->type = "Const";
+    getLayer("layer2")->type = "Shape";
+    getLayer("layer1")->type = "Shape";
+    getLayer("layer6")->type = "Interp";
+
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+    auto constLayers = testTransformator->getConstLayers(sortedLayers);
+
+    ASSERT_EQ(constLayers.size(), 6);
+    ASSERT_TRUE(constLayers.at("input3"));
+    ASSERT_TRUE(constLayers.at("layer1"));
+    ASSERT_TRUE(constLayers.at("layer2"));
+    ASSERT_TRUE(constLayers.at("layer3"));
+    ASSERT_TRUE(constLayers.at("layer4"));
+    ASSERT_TRUE(constLayers.at("layer5"));
+}
+
+TEST_F(RemoveLayerTests, canFindShapeConstLayers2) {
+    getLayer("input3")->type = "Const";
+    getLayer("input2")->type = "Const";
+    getLayer("layer2")->type = "Shape";
+    getLayer("layer1")->type = "Resample";
+
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+    auto constLayers = testTransformator->getConstLayers(sortedLayers);
+
+    ASSERT_EQ(constLayers.size(), 4);
+    ASSERT_FALSE(constLayers.at("input3"));
+    ASSERT_FALSE(constLayers.at("layer2"));
+    ASSERT_FALSE(constLayers.at("layer3"));
+    ASSERT_FALSE(constLayers.at("input2"));
+}
+
+TEST_F(RemoveLayerTests, canTrimShapeInput) {
+    std::vector<std::string> constLayers = {"input3", "layer3", "input2"};
+    for (const auto& name : constLayers) {
+        getLayer(name)->type = "Const";
+    }
+    getLayer("layer2")->type = "Shape";
+    getLayer("layer1")->type = "Interp";
+    getLayer("layer4")->type = "Reshape";
+    getLayer("layer5")->type = "Reshape";
+    auto layer1 = getLayer("layer1");
+    auto layer4 = getLayer("layer4");
+    auto layer5 = getLayer("layer5");
+
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+    auto mapConstLayers = testTransformator->getConstLayers(sortedLayers);
+    auto newLayers = testTransformator->foldConstSubgraphsInternal(mapConstLayers, {}, sortedLayers);
+    testTransformator->trimShapeInputs(newLayers);
+
+    ASSERT_EQ(nullptr, net->getData("data5"));
+    ASSERT_EQ(nullptr, net->getData("data2"));
+    net->removeData("data5");
+    net->removeData("data2");
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum - 3);
+    ASSERT_EQ(layer1->insData.size(), 1);
+    ASSERT_EQ(layer1->insData[0].lock(), getData("data1"));
+    ASSERT_EQ(layer4->insData.size(), 1);
+    ASSERT_EQ(layer4->insData[0].lock(), getData("data4"));
+    ASSERT_EQ(layer5->insData.size(), 2);
+    ASSERT_EQ(layer5->insData[0].lock(), getData("data8"));
+    ASSERT_EQ(layer5->insData[1].lock(), getData("data6"));
+}
+
+TEST_F(RemoveLayerTests, canTrimShapeInput2) {
+    std::vector<std::string> constLayers = {"input3", "input2"};
+    for (const auto& name : constLayers) {
+        getLayer(name)->type = "Const";
+    }
+    auto layer1 = getLayer("layer1");
+    auto layer2 = getLayer("layer2");
+    layer1->type = "Resample";
+    layer2->type = "StridedSlice";
+
+    testTransformator->trimShapeInputs(constLayers);
+
+    auto data6 = net->getData("data6");
+    auto data2 = net->getData("data2");
+    ASSERT_EQ(data2->inputTo.size(), 1);
+    ASSERT_EQ(data2->inputTo.at(layer2->name), layer2);
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum);
+    ASSERT_EQ(layer1->insData.size(), 1);
+    ASSERT_EQ(layer1->insData[0].lock(), getData("data1"));
+    ASSERT_EQ(layer2->insData.size(), 3);
+    ASSERT_EQ(layer2->insData[0].lock(), getData("data2"));
+    ASSERT_EQ(layer2->insData[1].lock(), getData("data3"));
+    ASSERT_EQ(layer2->insData[2].lock(), getData("data7"));
+}
+
+TEST_F(RemoveLayerTests, notTrimFirstConstInput) {
+    std::vector<std::string> testLayers = {"Interp", "Reshape", "Pad", "Gather", "Resample"};
+    std::string constLayer = "input4";
+    getLayer(constLayer)->type = "Const";
+    auto layer6 = getLayer("layer6");
+    auto data10 = getData("data10");
+    for (const auto& name: testLayers) {
+        layer6->type = name;
+
+        testTransformator->trimShapeInputs({constLayer});
+
+        ASSERT_EQ(net->allLayers().size(), originalLayersNum);
+        IE::CNNNetwork cnnNetwork(net);
+        auto input4 = cnnNetwork.getLayerByName(constLayer.c_str());
+        ASSERT_EQ(data10->inputTo.size(), 1);
+        ASSERT_EQ(data10->creatorLayer.lock(), input4);
+        ASSERT_EQ(layer6->insData.size(), 2);
+        ASSERT_EQ(layer6->insData[0].lock(), data10);
+        ASSERT_EQ(layer6->insData[1].lock(), getData("data9"));
+    }
+}
+
+TEST_F(RemoveLayerTests, canSaveConstForEltWise) {
+    auto input2 = getLayer("input2");
+    auto layer1 = getLayer("layer1");
+    auto data2 = getData("data2");
+    input2->type = "Const";
+    layer1->type = "Eltwise";
+
+    testTransformator->trimShapeInputs({input2->name});
+
+    IE::CNNNetwork cnnNetwork(net);
+    ASSERT_NO_THROW(input2 = cnnNetwork.getLayerByName(input2->name.c_str()));
+    ASSERT_EQ(net->allLayers().size(), 10);
+    ASSERT_EQ(layer1->insData.size(), 2);
+    ASSERT_EQ(layer1->insData[1].lock(), data2);
+    ASSERT_EQ(data2->inputTo.size(), 2);
+    ASSERT_EQ(data2->inputTo.at(layer1->name), layer1);
+    ASSERT_EQ(data2->creatorLayer.lock(), input2);
+}
+
+TEST_F(RemoveLayerTests, canSaveDataWithMultipleInputTo) {
+    auto input3 = getLayer("input3");
+    auto layer2 = getLayer("layer2");
+    auto layer3 = getLayer("layer3");
+    auto data3 = getData("data3");
+    input3->type = "Const";
+    layer2->type = "Reshape";
+
+    testTransformator->trimShapeInputs({input3->name});
+
+    IE::CNNNetwork cnnNetwork(net);
+    ASSERT_NO_THROW(input3 = cnnNetwork.getLayerByName(input3->name.c_str()));
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum);
+    ASSERT_EQ(layer2->insData.size(), 2);
+    ASSERT_EQ(layer2->insData[0].lock(), getData("data2"));
+    ASSERT_EQ(layer2->insData[1].lock(), getData("data7"));
+    ASSERT_EQ(data3->inputTo.size(), 1);
+    ASSERT_EQ(data3->inputTo.at(layer3->name), layer3);
+    ASSERT_EQ(data3->creatorLayer.lock(), input3);
+    ASSERT_EQ(layer3->insData.size(), 1);
+    ASSERT_EQ(layer3->insData[0].lock(), data3);
+}
+
+TEST_F(RemoveLayerTests, canFoldConstSubgraphToConst) {
+    std::vector<std::string> constLayers = {"input1", "input2", "input3"};
+    std::vector<std::string> refNewLayers = {"layer5__data9__Const"};
+    for (const auto& name : constLayers) {
+        getLayer(name)->type = "Const";
+    }
+    getLayer("layer2")->type = "Shape";
+
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+    auto mapConstLayers = testTransformator->getConstLayers(sortedLayers);
+    auto newLayers = testTransformator->foldConstSubgraphsInternal(mapConstLayers, {}, sortedLayers);
+
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum - 7);
+    ASSERT_EQ(newLayers, refNewLayers);
+    IE::CNNNetwork cnnNetwork(net);
+    auto newLayer = cnnNetwork.getLayerByName(refNewLayers[0].c_str());
+    ASSERT_EQ(newLayer->type, "Const");
+    ASSERT_EQ(newLayer->outData[0], getData("data9"));
+}
+
+TEST_F(RemoveLayerTests, canGetConstData) {
+    std::vector<std::string> constLayers = {"input2", "input3", "layer3"};
+    IE::BlobMap refBlobs = initConstLayers(constLayers);
+    std::map<std::string, bool> mapConstLayers;
+    for (const auto& it : constLayers) {
+        mapConstLayers[it] = false;
+    }
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+
+    auto actBlobs = testTransformator->getConstData(mapConstLayers, sortedLayers);
+
+    ASSERT_EQ(actBlobs.size(), refBlobs.size());
+    for (const auto& it: refBlobs) {
+        ASSERT_EQ(it.second, actBlobs[it.first]);
+    }
+}
+
+TEST_F(RemoveLayerTests, canGetConstDataForUnknownImpl) {
+    initConstLayers({"input1", "input2", "input3"});
+    {
+        getLayer("layer1")->type = "UNKNOWN";
+        getLayer("layer2")->type = "UNKNOWN";
+        getLayer("layer3")->type = "Shape";
+        getLayer("layer4")->type = "UNKNOWN";
+        getLayer("layer5")->type = "Mul";
+        getLayer("layer6")->type = "Reshape";
+    }
+    auto sortedLayers = IE::details::CNNNetSortTopologically(*net);
+    IE::SizeVector refShape = {1, 1, 3};
+
+    auto mapConstLayers = testTransformator->getConstLayers(sortedLayers);
+    auto actBlobs = testTransformator->getConstData(mapConstLayers, sortedLayers);
+
+    ASSERT_EQ(getData("data9")->getTensorDesc().getDims(), refShape);
+}
+
+TEST_F(RemoveLayerTests, canFoldConstSubgraphs) {
+    IE::BlobMap refBlobs = initConstLayers({"input1", "input2", "input3"});
+    std::vector<std::string> refNewLayers = {"layer5__data9__Const"};
+    {   // TODO: method for marking layers
+        getLayer("layer1")->type = "Mul";
+        getLayer("layer2")->type = "Shape";
+        getLayer("layer3")->type = "Power";
+        getLayer("layer3")->params = {{"power", "1"},
+                                      {"scale", "2"},
+                                      {"shift", "-4"}};
+        getLayer("layer4")->type = "Mul";
+        getLayer("layer5")->type = "Mul";
+    }
+    float arr[] = {-2.f, 0.f, 54.f};
+    auto ref5 = make_blob_with_precision(getData("data9")->getTensorDesc(), arr);
+
+    IE::ConstTransformer transformator(net.get());
+    transformator.foldConstSubgraphs();
+
+    IE::CNNNetwork cnnNetwork(net);
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum - 7);
+    auto newLayer = cnnNetwork.getLayerByName(refNewLayers[0].c_str());
+    auto actualBlob = newLayer->blobs["custom"];
+    ASSERT_NE(actualBlob, nullptr);
+    ASSERT_FALSE(actualBlob->buffer() == nullptr);
+    TestsCommon::compare(*actualBlob, *ref5);
+    ASSERT_EQ(newLayer->type, "Const");
+}
+
+TEST_F(RemoveLayerTests, canSkipConstCalculation) {
+    IE::BlobMap refBlobs = initConstLayers({"input1", "input2", "input3"});
+    getLayer("layer6")->type = "Reshape";
+
+    IE::ConstTransformer transformator(net.get());
+    transformator.foldConstSubgraphs();
+
+    IE::CNNNetwork cnnNetwork(net);
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum - 8);
+}
+
+TEST_F(RemoveLayerTests, canFoldConstWithUnknownImplForShapeDefiningLayers) {
+    IE::BlobMap refBlobs = initConstLayers({"input1", "input2", "input3"});
+    {
+        getLayer("layer1")->type = "UNKNOWN";
+        getLayer("layer2")->type = "UNKNOWN";
+        getLayer("layer3")->type = "Shape";
+        getLayer("layer4")->type = "Reshape";
+        getLayer("layer5")->type = "Mul";
+        getLayer("layer6")->type = "Reshape";
+    }
+
+    IE::ConstTransformer transformator(net.get());
+    transformator.foldConstSubgraphs();
+
+    IE::CNNNetwork cnnNetwork(net);
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum - 8);
+    ASSERT_EQ(getLayer("layer6")->insData.size(), 1);
+}
+
+TEST_F(RemoveLayerTests, throwErrorOnFoldWithUnknownImplForNotShapeDefiningLayers) {
+    IE::BlobMap refBlobs = initConstLayers({"input1", "input2", "input3"});
+    {
+        getLayer("layer1")->type = "UNKNOWN";
+        getLayer("layer2")->type = "Shape";
+        getLayer("layer3")->type = "Shape";
+        getLayer("layer4")->type = "Mul";
+        getLayer("layer5")->type = "Mul";
+        getLayer("layer6")->type = "Gather";
+    }
+
+    IE::ConstTransformer transformator(net.get());
+    ASSERT_THROW(transformator.foldConstSubgraphs(), IE::details::InferenceEngineException);
+}
+
+TEST_F(RemoveLayerTests, canFullTrim) {
+    IE::BlobMap refBlobs = initConstLayers({"input1", "input2", "input3"});
+    auto layer6 = getLayer("layer6");
+    {   // TODO: method for marking layers
+        getLayer("layer1")->type = "Mul";
+        getLayer("layer2")->type = "Shape";
+        getLayer("layer3")->type = "Power";
+        getLayer("layer3")->params = {{"power", "1"},
+                                      {"scale", "2"},
+                                      {"shift", "-4"}};
+        getLayer("layer4")->type = "Mul";
+        getLayer("layer5")->type = "Mul";
+        layer6->type = "Reshape";
+    }
+
+    IE::ConstTransformer transformator(net.get());
+    transformator.fullTrim();
+
+    IE::CNNNetwork cnnNetwork(net);
+    std::string newName = "layer5__data9__Const";
+    ASSERT_THROW(cnnNetwork.getLayerByName(newName.c_str()), IE::NotFound);
+    ASSERT_EQ(net->allLayers().size(), 2);
+    ASSERT_EQ(layer6->insData.size(), 1);
+    ASSERT_EQ(layer6->insData[0].lock(), getData("data10"));
+}
+
+TEST_F(RemoveLayerTests, canFullTrimConstToReshape) {
+    IE::BlobMap refBlobs = initConstLayers({"input2"});
+    auto layer1 = getLayer("layer1");
+    layer1->type = "Reshape";
+
+    IE::ConstTransformer transformator(net.get());
+    transformator.fullTrim();
+
+    IE::CNNNetwork cnnNetwork(net);
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum);
+    ASSERT_EQ(layer1->insData.size(), 1);
+    ASSERT_EQ(layer1->insData[0].lock(), getData("data1"));
+}
+
+TEST_F(AdvancedShapeInferTests, canReshape) {
+    //
+    // I2-d2-Shape
+    //         \
+    //         d3
+    //          \
+    //  I1-d1-Reshape-d4
+    //
+    net = netBuilder
+            .data("data1", IE::SizeVector{1, 1, 3}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data2", IE::SizeVector{1, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data3", IE::SizeVector{1}, IE::Precision::FP32, IE::Layout::C)
+            .data("data4", IE::SizeVector{1, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .layer<IE::CNNLayer>(IE::LayerParams{"input1", "input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"input2", "Input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer1", "Reshape", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer2", "Shape", IE::Precision::FP32})
+            .linkToData("input1", "data1")
+            .linkToData("input2", "data2")
+            .linkDataTo("data1", "layer1")
+            .linkDataTo("data2", "layer2")
+            .linkToData("layer2", "data3")
+            .linkDataTo("data3", "layer1")
+            .linkToData("layer1", "data4")
+            .addInput("data1")
+            .addInput("data2")
+            .finalize();
+    originalLayersNum = net->allLayers().size();
+    IE::CNNNetwork cnnNetwork(net);
+    IE::SizeVector newShape = {1, 3, 1};
+    std::map<std::string, IE::SizeVector> inputShapes = {{"data2", newShape}};
+    cnnNetwork.reshape(inputShapes);
+
+    ASSERT_NO_THROW(cnnNetwork.getLayerByName("layer2"));
+    ASSERT_EQ(getData("data3")->getTensorDesc().getDims(), IE::SizeVector{3});
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum);
+
+    IE::ConstTransformer transformator(net.get());
+    transformator.fullTrim();
+
+    ASSERT_THROW(cnnNetwork.getLayerByName("layer2"), IE::NotFound);
+    ASSERT_EQ(getData("data4")->getTensorDesc().getDims(), newShape);
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum - 1);
+}
+
+TEST_F(AdvancedShapeInferTests, canReshape2) {
+    //
+    //                 I3-d3-Shape(L3)-d5
+    //                                  \
+    // I2-d2-Shape(L2)-d4-Power(L4)-d6-Mul(L5)-d7
+    //                                          \
+    //                                   I1-d1-Reshape(L1)-d8
+    //
+    net = netBuilder
+            .data("data1", IE::SizeVector{1}, IE::Precision::FP32, IE::Layout::C)
+            .data("data2", IE::SizeVector{1, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data3", IE::SizeVector{1, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data4", IE::SizeVector{1}, IE::Precision::FP32, IE::Layout::C)
+            .data("data5", IE::SizeVector{1}, IE::Precision::FP32, IE::Layout::C)
+            .data("data6", IE::SizeVector{1}, IE::Precision::FP32, IE::Layout::C)
+            .data("data7", IE::SizeVector{1}, IE::Precision::FP32, IE::Layout::C)
+            .data("data8", IE::SizeVector{1, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .layer<IE::CNNLayer>(IE::LayerParams{"input1", "input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"input2", "Input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"input3", "Input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer1", "Reshape", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer2", "Shape", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer3", "Shape", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer4", "Power", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer5", "Mul", IE::Precision::FP32})
+            .linkToData("input1", "data1")
+            .linkToData("input2", "data2")
+            .linkToData("input3", "data3")
+
+            .linkDataTo("data1", "layer1")
+            .linkDataTo("data2", "layer2")
+            .linkDataTo("data3", "layer3")
+
+            .linkToData("layer2", "data4")
+            .linkToData("layer3", "data5")
+
+            .linkDataTo("data4", "layer4")
+
+            .linkToData("layer4", "data6")
+
+            .linkDataTo("data5", "layer5")
+            .linkDataTo("data6", "layer5")
+
+            .linkToData("layer5", "data7")
+
+            .linkDataTo("data7", "layer1")
+
+            .linkToData("layer1", "data8")
+
+            .addInput("data1")
+            .addInput("data2")
+            .addInput("data3")
+            .finalize();
+    originalLayersNum = net->allLayers().size();
+    IE::CNNNetwork cnnNetwork(net);
+    IE::SizeVector newShape = {5, 9, 3};
+    std::map<std::string, IE::SizeVector> inputShapes = {{"data1", {135}},
+                                                         {"data2", {2, 1, 1}},
+                                                         {"data3", {1, 3, 1}}};
+    getLayer("layer4")->params = {{"power", "1"},
+                                  {"scale", "2"},
+                                  {"shift", "1"}};
+
+    cnnNetwork.reshape(inputShapes);
+
+    ASSERT_EQ(getData("data7")->getTensorDesc().getDims(), IE::SizeVector{3});
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum);
+
+    IE::ConstTransformer transformator(net.get());
+    transformator.fullTrim();
+
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum - 4);
+    ASSERT_EQ(getData("data8")->getTensorDesc().getDims(), newShape);
+}
+
+TEST_F(AdvancedShapeInferTests, canReshapeConst) {
+    //
+    //    Const-d2
+    //           \
+    // I1-d1-Reshape(L1)-d3
+    //
+    net = netBuilder
+            .data("data1", IE::SizeVector{1}, IE::Precision::FP32, IE::Layout::C)
+            .data("data2", IE::SizeVector{3}, IE::Precision::FP32, IE::Layout::C)
+            .data("data3", IE::SizeVector{1, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .layer<IE::CNNLayer>(IE::LayerParams{"input1", "input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"const1", "dummy", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer1", "Reshape", IE::Precision::FP32})
+            .linkToData("input1", "data1")
+            .linkToData("const1", "data2")
+            .linkDataTo("data1", "layer1")
+            .linkDataTo("data2", "layer1")
+            .linkToData("layer1", "data3")
+            .addInput("data1")
+            .finalize();
+    originalLayersNum = net->allLayers().size();
+    IE::CNNNetwork cnnNetwork(net);
+    initConstLayers({"const1"});
+    IE::SizeVector newOutShape = {1, 2, 3};
+    IE::SizeVector newInShape = {IE::details::product(newOutShape)};
+
+    std::map<std::string, IE::SizeVector> inputShapes = {{"data1", newInShape}};
+
+    cnnNetwork.reshape(inputShapes);
+
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum);
+
+    IE::ConstTransformer transformator(net.get());
+    transformator.fullTrim();
+
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum - 1);
+    ASSERT_EQ(getData("data1")->getTensorDesc().getDims(), newInShape);
+    ASSERT_EQ(getData("data3")->getTensorDesc().getDims(), newOutShape);
+}
+
+TEST_F(AdvancedShapeInferTests, canReshapeCHWConst) {
+    //
+    //    Const-d1-Tile-d2
+    //
+    net = netBuilder
+            .data("data1", IE::SizeVector{3, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .data("data2", IE::SizeVector{1, 1, 1}, IE::Precision::FP32, IE::Layout::CHW)
+            .layer<IE::CNNLayer>(IE::LayerParams{"const", "dummy", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"tile", "Tile", IE::Precision::FP32})
+            .linkToData("const", "data1")
+            .linkDataTo("data1", "tile")
+            .linkToData("tile", "data2")
+            .addInput("data1")
+            .finalize();
+    getLayer("tile")->params = {{"axis",  "0"},
+                                {"tiles", "2"}};
+    originalLayersNum = net->allLayers().size();
+    IE::CNNNetwork cnnNetwork(net);
+    initConstLayers({"const"});
+
+    cnnNetwork.reshape({});
+
+    IE::SizeVector expectedDims = {2, 1, 3};
+    ASSERT_EQ(getData("data2")->getTensorDesc().getDims(), expectedDims);
+}
+
+TEST_F(AdvancedShapeInferTests, canReshapeWithScalar) {
+    //
+    //   Scalar-d2
+    //           \
+    // I1-d1-Reshape(L1)-d3
+    //
+    net = netBuilder
+            .data("data1", IE::SizeVector{1}, IE::Precision::FP32, IE::Layout::C)
+            .data("data2", IE::SizeVector{}, IE::Precision::FP32, IE::Layout::SCALAR)
+            .data("data3", IE::SizeVector{1}, IE::Precision::FP32, IE::Layout::C)
+            .layer<IE::CNNLayer>(IE::LayerParams{"input1", "input", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"scalar", "dummy", IE::Precision::FP32})
+            .layer<IE::CNNLayer>(IE::LayerParams{"layer1", "Reshape", IE::Precision::FP32})
+            .linkToData("input1", "data1")
+            .linkToData("scalar", "data2")
+            .linkDataTo("data1", "layer1")
+            .linkDataTo("data2", "layer1")
+            .linkToData("layer1", "data3")
+            .addInput("data1")
+            .finalize();
+    originalLayersNum = net->allLayers().size();
+    IE::CNNNetwork cnnNetwork(net);
+    initConstLayers({"scalar"});
+    IE::SizeVector newOutShape = {1};
+    IE::SizeVector newInShape = {IE::details::product(newOutShape)};
+
+    std::map<std::string, IE::SizeVector> inputShapes = {{"data1", newInShape}};
+
+    cnnNetwork.reshape(inputShapes);
+
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum);
+
+    IE::ConstTransformer transformator(net.get());
+    transformator.fullTrim();
+
+    ASSERT_EQ(net->allLayers().size(), originalLayersNum - 1);
+    ASSERT_EQ(getData("data1")->getTensorDesc().getDims(), newInShape);
+    ASSERT_EQ(getData("data3")->getTensorDesc().getDims(), newOutShape);
+}
diff --git a/inference-engine/tests/unit/inference_engine_tests/util_const_infer_test.hpp b/inference-engine/tests/unit/inference_engine_tests/util_const_infer_test.hpp
new file mode 100644
index 000000000..b5fe89abe
--- /dev/null
+++ b/inference-engine/tests/unit/inference_engine_tests/util_const_infer_test.hpp
@@ -0,0 +1,86 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <gtest/gtest.h>
+
+#include <initializer_list>
+#include <string>
+#include <utility>
+#include <unordered_set>
+#include <unordered_map>
+
+#include <ie_util_internal.hpp>
+#include <tests_common.hpp>
+#include <graph_transformer.h>
+#include "ie_utils.hpp"
+#include "blob_factory.hpp"
+#include "debug.h"
+#include "util_test.hpp"
+#include <details/ie_cnn_network_tools.h>
+
+namespace IE = InferenceEngine;
+
+class ConstTransformatorTest : public IE::ConstTransformer {
+public:
+    explicit ConstTransformatorTest(IE::details::CNNNetworkImpl* network) : IE::ConstTransformer(network) {}
+
+    const std::map<std::string, bool>
+    getConstLayers(const std::vector<InferenceEngine::CNNLayerPtr>& sortedLayers) override {
+        return ConstTransformer::getConstLayers(sortedLayers);
+    }
+
+    const InferenceEngine::BlobMap getConstData(const std::map<std::string, bool>& constLayers,
+                                                    const std::vector<InferenceEngine::CNNLayerPtr>& sortedLayers) override {
+        return ConstTransformer::getConstData(constLayers, sortedLayers);
+    }
+
+    std::vector<std::string>
+    foldConstSubgraphsInternal(const std::map<std::string, bool>& constLayers, const IE::BlobMap& constData,
+                               const std::vector<IE::CNNLayerPtr>& sortedLayers) override {
+        return ConstTransformer::foldConstSubgraphsInternal(constLayers, constData, sortedLayers);
+    }
+
+    void trimShapeInputs(const std::vector<std::string>& constLayers) override {
+        ConstTransformer::trimShapeInputs(constLayers);
+    }
+
+};
+
+class RemoveLayerTests : public testing::Test {
+protected:
+    void SetUp() override;
+
+    //
+    // I1-d1-L1-d4              I4
+    //       / \  \              \
+    //      |  d7  \            d10
+    //      |  |    \            /
+    //  I2-d2-L2-d5-L4-d6-L5-d9-L10
+    //        /           /
+    //       /  ____d8___/
+    //      /  /
+    // I3-d3-L3
+    //
+    IE::details::CNNNetworkImplPtr getNetwork();
+
+    IE::CNNLayerPtr getLayer(const std::string& name);
+
+    IE::DataPtr getData(const std::string& name);
+
+    IE::BlobMap fillConstData(const std::vector<std::string>& constLayers);
+
+    IE::BlobMap initConstLayers(const std::vector<std::string>& constLayers);
+
+    NetBuilder netBuilder;
+    IE::details::CNNNetworkImplPtr net;
+    size_t originalLayersNum;
+    std::unique_ptr<ConstTransformatorTest> testTransformator;
+};
+
+class AdvancedShapeInferTests : public RemoveLayerTests {
+protected:
+    void SetUp() override {};
+};
diff --git a/inference-engine/tests/unit/inference_engine_tests/util_test.cpp b/inference-engine/tests/unit/inference_engine_tests/util_test.cpp
index d62e0a1af..7c9222edc 100644
--- a/inference-engine/tests/unit/inference_engine_tests/util_test.cpp
+++ b/inference-engine/tests/unit/inference_engine_tests/util_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -15,124 +15,12 @@
 #include <tests_common.hpp>
 #include <graph_transformer.h>
 #include "ie_utils.hpp"
+#include "util_test.hpp"
+#include "graph_tools.hpp"
 
 namespace IE = InferenceEngine;
 
 namespace {
-class NetBuilder {
-    using LayersMap = std::unordered_map<std::string, IE::CNNLayerPtr>;
-    using DataMap = std::unordered_map<std::string, IE::DataPtr>;
-    using InputsSet = std::unordered_set<IE::InputInfo::Ptr>;
-    LayersMap _layers;
-    DataMap _data;
-    InputsSet _inputs;
-public:
-    NetBuilder() = default;
-    NetBuilder(const NetBuilder&) = delete;
-
-    template<typename... Args>
-    NetBuilder& data(Args&&... args) {
-        auto newData = std::make_shared<IE::Data>(std::forward<Args>(args)...);
-        assert(!IE::contains(_data, newData->getName()));
-        _data[newData->getName()] = newData;
-        return *this;
-    }
-
-    template<typename T,typename... Args>
-    NetBuilder& layer(Args&&... args) {
-        auto newLayer = std::make_shared<T>(std::forward<Args>(args)...);
-        assert(!IE::contains(_layers, newLayer->name));
-        _layers[newLayer->name] = std::static_pointer_cast<IE::CNNLayer>(newLayer);
-        return *this;
-    }
-
-    const LayersMap& getLayersMap() const {
-        return _layers;
-    }
-
-    const DataMap& getDataMap() const {
-        return _data;
-    }
-
-    NetBuilder& linkDataTo(const std::string& dataName,
-                           const std::string& nextlayerName) {
-        assert(IE::contains(_layers, nextlayerName));
-        assert(IE::contains(_data,   dataName));
-
-        auto nextlayer = _layers[nextlayerName];
-        auto data = _data[dataName];
-
-        nextlayer->insData.push_back(data);
-        data->getInputTo().insert({nextlayerName, nextlayer});
-        return *this;
-    }
-
-    NetBuilder& linkToData(const std::string& prevlayerName,
-                           const std::string& dataName) {
-        assert(IE::contains(_layers, prevlayerName));
-        assert(IE::contains(_data,   dataName));
-
-        auto prevlayer = _layers[prevlayerName];
-        auto data = _data[dataName];
-        assert(nullptr == data->getCreatorLayer().lock());
-
-        prevlayer->outData.push_back(data);
-        data->getCreatorLayer() = prevlayer;
-        return *this;
-    }
-
-    NetBuilder& linkLayers(const std::string& prevlayerName,
-                           const std::string& nextlayerName,
-                           const std::string& dataName) {
-        linkToData(prevlayerName, dataName);
-        linkDataTo(dataName, nextlayerName);
-        return *this;
-    }
-
-    NetBuilder& linkData(const std::string& prevDataName,
-                         const std::string& nextDataName,
-                         const std::string& layerName) {
-        linkDataTo(prevDataName, layerName);
-        linkToData(layerName, nextDataName);
-        return *this;
-    }
-
-    template<typename... Args>
-    NetBuilder& addInput(const std::string& dataName, Args&&... args) {
-        assert(!dataName.empty());
-        assert(IE::contains(_data, dataName));
-        auto input = std::make_shared<IE::InputInfo>(
-                         std::forward<Args>(args)...);
-        input->setInputData(_data[dataName]);
-        _inputs.insert(std::move(input));
-        return *this;
-    }
-
-    IE::details::CNNNetworkImplPtr finalize() {
-        auto net = std::make_shared<IE::details::CNNNetworkImpl>();
-
-        for (auto&& it: _data) {
-            auto& data = it.second;
-            net->getData(it.first) = data;
-            if (nullptr == data->getCreatorLayer().lock()) {
-                auto input  = std::make_shared<IE::InputInfo>();
-                input->setInputData(data);
-                net->setInputInfo(input);
-            }
-        }
-        for (auto&& it: _layers) {
-            net->addLayer(it.second);
-        }
-        for (auto& i : _inputs) {
-            net->setInputInfo(std::move(i));
-        }
-
-        net->resolveOutput();
-
-        return net;
-    }
-};
-
 bool checkLayers(const std::vector<IE::CNNLayerPtr>& layers, std::initializer_list<const char*> layersToCheck) {
     if (layers.size() != layersToCheck.size()) {
         return false;
@@ -537,7 +425,7 @@ TEST(UtilTests, cloneNet) {
 
     {
         auto layer = getLayer(net, "layer1");
-        auto cloned = IE::cloneNet({layer});
+        auto cloned = IE::cloneNet({layer}, nullptr);
         EXPECT_EQ(2, cloned->layerCount());
         auto clonedLayer = getLayer(cloned, "layer1");
         ASSERT_NE(nullptr, clonedLayer);
@@ -555,7 +443,7 @@ TEST(UtilTests, cloneNet) {
     {
         auto layer1 = getLayer(net, "layer1");
         auto layer2 = getLayer(net, "layer2");
-        auto cloned = IE::cloneNet({layer1,layer2});
+        auto cloned = IE::cloneNet({layer1,layer2}, nullptr);
         EXPECT_EQ(4, cloned->layerCount());
         auto clonedLayer1 = getLayer(cloned, "layer1");
         auto clonedLayer2 = getLayer(cloned, "layer2");
@@ -576,7 +464,7 @@ TEST(UtilTests, cloneNet) {
     {
         auto layer4 = getLayer(net, "layer4");
         auto layer5 = getLayer(net, "layer5");
-        auto cloned = IE::cloneNet({layer4,layer5});
+        auto cloned = IE::cloneNet({layer4,layer5}, nullptr);
         EXPECT_EQ(4, cloned->layerCount());
         auto clonedLayer4 = getLayer(cloned, "layer4");
         auto clonedLayer5 = getLayer(cloned, "layer5");
@@ -608,7 +496,7 @@ TEST(UtilTests, cloneNet) {
     }
     {
         auto layer3 = getLayer(net, "layer3");
-        auto cloned = IE::cloneNet({layer3});
+        auto cloned = IE::cloneNet({layer3}, nullptr);
         EXPECT_EQ(2, cloned->layerCount());
         auto clonedLayer3 = getLayer(cloned, "layer3");
         ASSERT_NE(nullptr, clonedLayer3);
@@ -638,7 +526,7 @@ TEST(UtilTests, cloneNet) {
         auto layer5 = getLayer(net, "layer5");
         auto layer6 = getLayer(net, "layer6");
         auto layer7 = getLayer(net, "layer7");
-        auto cloned = IE::cloneNet({layer1,layer2,layer3,layer4,layer5,layer6,layer7});
+        auto cloned = IE::cloneNet({layer1,layer2,layer3,layer4,layer5,layer6,layer7}, nullptr);
         EXPECT_EQ(9, cloned->layerCount());
         auto clonedLayer1 = getLayer(cloned, "layer1");
         auto clonedLayer2 = getLayer(cloned, "layer2");
@@ -771,7 +659,7 @@ TEST(UtilTests, cloneNet_input) {
 
     auto cloned = IE::cloneNet({getLayer(net, "layer1"),
                                 getLayer(net, "layer2"),
-                                getLayer(net, "layer3")});
+                                getLayer(net, "layer3")}, nullptr);
 
     ASSERT_EQ(6, cloned->layerCount());
     ASSERT_NE(nullptr, getLayer(cloned, "input1"));
@@ -825,7 +713,7 @@ TEST(UtilTests, cloneNet_const) {
 
     auto cloned = IE::cloneNet({getLayer(net, "layer1"),
                                 getLayer(net, "layer2"),
-                                getLayer(net, "layer3")});
+                                getLayer(net, "layer3")}, nullptr);
 
     ASSERT_EQ(6, cloned->layerCount());
     ASSERT_NE(nullptr, getLayer(cloned, "input1"));
@@ -1673,7 +1561,7 @@ TEST(UtilTests, replaceLayerWithNewLayer) {
         auto newLayer1 = std::make_shared<IE::CNNLayer>(IE::LayerParams{"layer1", "dummy", IE::Precision::UNSPECIFIED});
         auto layer1    = layers.find("layer1");
         EXPECT_TRUE(layer1 != layers.end());
-        IE::replaceLayerWithNewLayer(*net, layer1->second, newLayer1);
+        CNNNetSubstituteLayer(*net, layer1->second, newLayer1);
         IE::CNNLayerPtr layer1Check = nullptr;
         net->getLayerByName("layer1", layer1Check, nullptr);
         ASSERT_EQ(layer1Check, newLayer1);
@@ -1685,7 +1573,7 @@ TEST(UtilTests, replaceLayerWithNewLayer) {
         auto newLayer2 = std::make_shared<IE::CNNLayer>(IE::LayerParams{"layer2", "dummy", IE::Precision::UNSPECIFIED});
         auto layer2    = layers.find("layer2");
         EXPECT_TRUE(layer2 != layers.end());
-        IE::replaceLayerWithNewLayer(*net, layer2->second, newLayer2);
+        CNNNetSubstituteLayer(*net, layer2->second, newLayer2);
         IE::CNNLayerPtr layer2Check = nullptr;
         net->getLayerByName("layer2", layer2Check, nullptr);
         ASSERT_EQ(layer2Check, newLayer2);
@@ -1697,7 +1585,7 @@ TEST(UtilTests, replaceLayerWithNewLayer) {
         auto newLayer3 = std::make_shared<IE::CNNLayer>(IE::LayerParams{"layer3", "dummy", IE::Precision::UNSPECIFIED});
         auto layer3    = layers.find("layer3");
         EXPECT_TRUE(layer3 != layers.end());
-        IE::replaceLayerWithNewLayer(*net, layer3->second, newLayer3);
+        CNNNetSubstituteLayer(*net, layer3->second, newLayer3);
         IE::CNNLayerPtr layer3Check = nullptr;
         net->getLayerByName("layer3", layer3Check, nullptr);
         ASSERT_EQ(layer3Check, newLayer3);
diff --git a/inference-engine/tests/unit/inference_engine_tests/util_test.hpp b/inference-engine/tests/unit/inference_engine_tests/util_test.hpp
new file mode 100644
index 000000000..76225e099
--- /dev/null
+++ b/inference-engine/tests/unit/inference_engine_tests/util_test.hpp
@@ -0,0 +1,121 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+namespace IE = InferenceEngine;
+
+class NetBuilder {
+    using LayersMap = std::unordered_map<std::string, IE::CNNLayerPtr>;
+    using DataMap = std::unordered_map<std::string, IE::DataPtr>;
+    using InputsSet = std::unordered_set<IE::InputInfo::Ptr>;
+    LayersMap _layers;
+    DataMap _data;
+    InputsSet _inputs;
+public:
+    NetBuilder() = default;
+
+    NetBuilder(const NetBuilder&) = delete;
+
+    template<typename... Args>
+    NetBuilder& data(Args&& ... args) {
+        auto newData = std::make_shared<IE::Data>(std::forward<Args>(args)...);
+        assert(!IE::contains(_data, newData->getName()));
+        _data[newData->getName()] = newData;
+        return *this;
+    }
+
+    template<typename T, typename... Args>
+    NetBuilder& layer(Args&& ... args) {
+        auto newLayer = std::make_shared<T>(std::forward<Args>(args)...);
+        assert(!IE::contains(_layers, newLayer->name));
+        _layers[newLayer->name] = std::static_pointer_cast<IE::CNNLayer>(newLayer);
+        return *this;
+    }
+
+    const LayersMap& getLayersMap() const {
+        return _layers;
+    }
+
+    const DataMap& getDataMap() const {
+        return _data;
+    }
+
+    NetBuilder& linkDataTo(const std::string& dataName,
+                           const std::string& nextlayerName) {
+        assert(IE::contains(_layers, nextlayerName));
+        assert(IE::contains(_data, dataName));
+
+        auto nextlayer = _layers[nextlayerName];
+        auto data = _data[dataName];
+
+        nextlayer->insData.push_back(data);
+        data->getInputTo().insert({nextlayerName, nextlayer});
+        return *this;
+    }
+
+    NetBuilder& linkToData(const std::string& prevlayerName,
+                           const std::string& dataName) {
+        assert(IE::contains(_layers, prevlayerName));
+        assert(IE::contains(_data, dataName));
+
+        auto prevlayer = _layers[prevlayerName];
+        auto data = _data[dataName];
+        assert(nullptr == data->getCreatorLayer().lock());
+
+        prevlayer->outData.push_back(data);
+        data->getCreatorLayer() = prevlayer;
+        return *this;
+    }
+
+    NetBuilder& linkLayers(const std::string& prevlayerName,
+                           const std::string& nextlayerName,
+                           const std::string& dataName) {
+        linkToData(prevlayerName, dataName);
+        linkDataTo(dataName, nextlayerName);
+        return *this;
+    }
+
+    NetBuilder& linkData(const std::string& prevDataName,
+                         const std::string& nextDataName,
+                         const std::string& layerName) {
+        linkDataTo(prevDataName, layerName);
+        linkToData(layerName, nextDataName);
+        return *this;
+    }
+
+    template<typename... Args>
+    NetBuilder& addInput(const std::string& dataName, Args&& ... args) {
+        assert(!dataName.empty());
+        assert(IE::contains(_data, dataName));
+        auto input = std::make_shared<IE::InputInfo>(
+                std::forward<Args>(args)...);
+        input->setInputData(_data[dataName]);
+        _inputs.insert(std::move(input));
+        return *this;
+    }
+
+    IE::details::CNNNetworkImplPtr finalize() {
+        auto net = std::make_shared<IE::details::CNNNetworkImpl>();
+
+        for (auto&& it: _data) {
+            auto& data = it.second;
+            net->getData(it.first) = data;
+            if (nullptr == data->getCreatorLayer().lock()) {
+                auto input = std::make_shared<IE::InputInfo>();
+                input->setInputData(data);
+                net->setInputInfo(input);
+            }
+        }
+        for (auto&& it: _layers) {
+            net->addLayer(it.second);
+        }
+        for (auto& i : _inputs) {
+            net->setInputInfo(std::move(i));
+        }
+
+        net->resolveOutput();
+
+        return net;
+    }
+};
diff --git a/inference-engine/tests/unit/mem_solver/mem_solver_test.cpp b/inference-engine/tests/unit/mem_solver/mem_solver_test.cpp
index 8ffab0f61..0f430c0ac 100644
--- a/inference-engine/tests/unit/mem_solver/mem_solver_test.cpp
+++ b/inference-engine/tests/unit/mem_solver/mem_solver_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_default.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_default.hpp
index 5141a1f55..bd49a139d 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_default.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_default.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_internal.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_internal.hpp
index a8adfbb54..c65279d3e 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_internal.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_thread_safe_internal.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_thread_safe_internal.hpp
index 720c58411..6fdc1d0f6 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_thread_safe_internal.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_async_infer_request_thread_safe_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_network_internal.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_network_internal.hpp
index 08bd36731..8ab78b217 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_network_internal.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_network_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -28,6 +28,6 @@ public:
     MOCK_METHOD1(CreateInferRequest, void(IInferRequest::Ptr &));
     MOCK_METHOD1(Export, void(const std::string &));
     MOCK_METHOD1(GetMappedTopology, void(std::map<std::string, std::vector<PrimitiveInfo::Ptr>> &));
-
+    MOCK_METHOD1(GetExecGraphInfo, void(ICNNNetwork::Ptr &));
 };
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_thread_safe_async_only.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_thread_safe_async_only.hpp
index d6658bc7c..587dbea77 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_thread_safe_async_only.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_thread_safe_async_only.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_thread_safe_default.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_thread_safe_default.hpp
index f67323b7e..9e5a25470 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_thread_safe_default.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_executable_thread_safe_default.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_infer_request_internal.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_infer_request_internal.hpp
index 42e299ba0..fc40d03f4 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_infer_request_internal.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_infer_request_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
index bf3b54038..9898c6563 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/impl/mock_inference_plugin_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iasync_infer_request_internal.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iasync_infer_request_internal.hpp
index cf37848e6..9ae7837b7 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iasync_infer_request_internal.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iasync_infer_request_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
index c5316bf9e..e63016708 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iexecutable_network_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -28,4 +28,5 @@ public:
     MOCK_METHOD1(Export, void(const std::string &));
     MOCK_METHOD1(GetMappedTopology, void(std::map<std::string, std::vector<PrimitiveInfo::Ptr>> &));
     MOCK_METHOD0(QueryState, std::vector<IMemoryStateInternal::Ptr>());
+    MOCK_METHOD1(GetExecGraphInfo, void(ICNNNetwork::Ptr &));
 };
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iinfer_request_internal.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iinfer_request_internal.hpp
index dd1bb497e..253a548f7 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iinfer_request_internal.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_iinfer_request_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_imemory_state_internal.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_imemory_state_internal.hpp
index 03a4043e2..667a7947b 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_imemory_state_internal.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/interface/mock_imemory_state_internal.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/mock_plugin_impl.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/mock_plugin_impl.hpp
index 66c9910bc..99ba25ba9 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/mock_plugin_impl.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/mock_plugin_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/mock_task_executor.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/mock_task_executor.hpp
index 34ccb5cad..1d9b79a46 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/mock_task_executor.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/mock_task_executor.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/cpp_interfaces/mock_task_synchronizer.hpp b/inference-engine/tests/unit/mocks/cpp_interfaces/mock_task_synchronizer.hpp
index 2d34f1e42..e8aedba67 100644
--- a/inference-engine/tests/unit/mocks/cpp_interfaces/mock_task_synchronizer.hpp
+++ b/inference-engine/tests/unit/mocks/cpp_interfaces/mock_task_synchronizer.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/mock_allocator.hpp b/inference-engine/tests/unit/mocks/mock_allocator.hpp
index ad53afbff..ce632f904 100644
--- a/inference-engine/tests/unit/mocks/mock_allocator.hpp
+++ b/inference-engine/tests/unit/mocks/mock_allocator.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/mock_error_listener.hpp b/inference-engine/tests/unit/mocks/mock_error_listener.hpp
index 420fc228a..e2b27836f 100644
--- a/inference-engine/tests/unit/mocks/mock_error_listener.hpp
+++ b/inference-engine/tests/unit/mocks/mock_error_listener.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/mock_iasync_infer_request.hpp b/inference-engine/tests/unit/mocks/mock_iasync_infer_request.hpp
index 571a7d4b0..e9f1aad4f 100644
--- a/inference-engine/tests/unit/mocks/mock_iasync_infer_request.hpp
+++ b/inference-engine/tests/unit/mocks/mock_iasync_infer_request.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/mock_icnn_network.hpp b/inference-engine/tests/unit/mocks/mock_icnn_network.hpp
index 43337fb46..1bdac7d83 100644
--- a/inference-engine/tests/unit/mocks/mock_icnn_network.hpp
+++ b/inference-engine/tests/unit/mocks/mock_icnn_network.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/mock_iexecutable_network.hpp b/inference-engine/tests/unit/mocks/mock_iexecutable_network.hpp
index d28d81c14..5ddf2f6a6 100644
--- a/inference-engine/tests/unit/mocks/mock_iexecutable_network.hpp
+++ b/inference-engine/tests/unit/mocks/mock_iexecutable_network.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -22,4 +22,5 @@ public:
     MOCK_QUALIFIED_METHOD2(GetMappedTopology, noexcept, StatusCode(std::map<std::string, std::vector<PrimitiveInfo::Ptr>> &, ResponseDesc*));
     MOCK_QUALIFIED_METHOD0(Release, noexcept, void ());
     MOCK_QUALIFIED_METHOD3(QueryState, noexcept, StatusCode(IMemoryState::Ptr &, size_t  , ResponseDesc*));
+    MOCK_QUALIFIED_METHOD2(GetExecGraphInfo, noexcept, StatusCode(ICNNNetwork::Ptr &, ResponseDesc*));
 };
diff --git a/inference-engine/tests/unit/mocks/mock_iformat_parser.hpp b/inference-engine/tests/unit/mocks/mock_iformat_parser.hpp
index 12b7c2fb1..750b2f0db 100644
--- a/inference-engine/tests/unit/mocks/mock_iformat_parser.hpp
+++ b/inference-engine/tests/unit/mocks/mock_iformat_parser.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/mock_inference_engine.hpp b/inference-engine/tests/unit/mocks/mock_inference_engine.hpp
index 150629c61..dd3a99158 100644
--- a/inference-engine/tests/unit/mocks/mock_inference_engine.hpp
+++ b/inference-engine/tests/unit/mocks/mock_inference_engine.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/mock_not_empty_icnn_network.hpp b/inference-engine/tests/unit/mocks/mock_not_empty_icnn_network.hpp
index bc71baed6..1edefb728 100644
--- a/inference-engine/tests/unit/mocks/mock_not_empty_icnn_network.hpp
+++ b/inference-engine/tests/unit/mocks/mock_not_empty_icnn_network.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -18,10 +18,16 @@ public:
     static constexpr const char* OUTPUT_BLOB_NAME = "first_output";
     MOCK_QUALIFIED_METHOD0(getPrecision, const noexcept, Precision ());
     void getOutputsInfo(OutputsDataMap& out) const noexcept override {
-        out[OUTPUT_BLOB_NAME] = nullptr;
+        auto data = std::make_shared<Data>("", Precision::UNSPECIFIED);
+        data->getInputTo()[""] = std::make_shared<CNNLayer>(LayerParams{});
+        out[OUTPUT_BLOB_NAME] = data;
     };
     void getInputsInfo(InputsDataMap &inputs) const noexcept override {
-        inputs[INPUT_BLOB_NAME] = nullptr;
+        auto inputInfo = std::make_shared<InputInfo>();
+        auto data = std::make_shared<Data>("", Precision::UNSPECIFIED);
+        data->getInputTo()[""] = std::make_shared<CNNLayer>(LayerParams{});
+        inputInfo->setInputData(data);
+        inputs[INPUT_BLOB_NAME] = inputInfo;
     };
     MOCK_QUALIFIED_METHOD1(getInput, const noexcept, InputInfo::Ptr (const std::string &inputName));
     MOCK_QUALIFIED_METHOD2(getName, const noexcept, void (char* pName, size_t len));
diff --git a/inference-engine/tests/unit/mocks/mock_plugin_dispatcher.hpp b/inference-engine/tests/unit/mocks/mock_plugin_dispatcher.hpp
index 769690fcc..aaa165804 100644
--- a/inference-engine/tests/unit/mocks/mock_plugin_dispatcher.hpp
+++ b/inference-engine/tests/unit/mocks/mock_plugin_dispatcher.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/shape_infer/mock_input_controller.hpp b/inference-engine/tests/unit/mocks/shape_infer/mock_input_controller.hpp
index 4e2c2d4c8..e971ee7be 100644
--- a/inference-engine/tests/unit/mocks/shape_infer/mock_input_controller.hpp
+++ b/inference-engine/tests/unit/mocks/shape_infer/mock_input_controller.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -23,6 +23,8 @@ public:
 
     MOCK_METHOD1(getShapes, std::vector<SizeVector>(bool));
 
+    MOCK_METHOD1(getBlobs, std::vector<Blob::CPtr>(bool));
+
     MOCK_METHOD0(getIRShapes, std::vector<SizeVector>());
 
     MOCK_METHOD1(getIRShapeByName, SizeVector(
diff --git a/inference-engine/tests/unit/mocks/shape_infer/mock_ishape_infer_impl.hpp b/inference-engine/tests/unit/mocks/shape_infer/mock_ishape_infer_impl.hpp
index 75e70dee0..9868310bd 100644
--- a/inference-engine/tests/unit/mocks/shape_infer/mock_ishape_infer_impl.hpp
+++ b/inference-engine/tests/unit/mocks/shape_infer/mock_ishape_infer_impl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -15,7 +15,7 @@ public:
     using Ptr = std::shared_ptr<MockIShapeInferImpl>;
 
     MOCK_QUALIFIED_METHOD5(inferShapes, noexcept, StatusCode(
-            const std::vector<SizeVector> &, const std::map<std::string, std::string>&, const std::map<std::string, Blob::Ptr>&, std::vector<SizeVector> &, ResponseDesc *));
+            const std::vector<Blob::CPtr> &, const std::map<std::string, std::string>&, const std::map<std::string, Blob::Ptr>&, std::vector<SizeVector> &, ResponseDesc *));
 
 };
 
diff --git a/inference-engine/tests/unit/mocks/shape_infer/mock_output_controller.hpp b/inference-engine/tests/unit/mocks/shape_infer/mock_output_controller.hpp
index a3cc339f9..b7b1b0705 100644
--- a/inference-engine/tests/unit/mocks/shape_infer/mock_output_controller.hpp
+++ b/inference-engine/tests/unit/mocks/shape_infer/mock_output_controller.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/shape_infer/mock_reshaper_launcher.hpp b/inference-engine/tests/unit/mocks/shape_infer/mock_reshaper_launcher.hpp
index 46045469d..7784a0904 100644
--- a/inference-engine/tests/unit/mocks/shape_infer/mock_reshaper_launcher.hpp
+++ b/inference-engine/tests/unit/mocks/shape_infer/mock_reshaper_launcher.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/mocks/shape_infer/mock_shape_infer_extension.hpp b/inference-engine/tests/unit/mocks/shape_infer/mock_shape_infer_extension.hpp
index f579954c4..8ef5152bf 100644
--- a/inference-engine/tests/unit/mocks/shape_infer/mock_shape_infer_extension.hpp
+++ b/inference-engine/tests/unit/mocks/shape_infer/mock_shape_infer_extension.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/opencv_test_gapi/CMakeLists.txt b/inference-engine/tests/unit/opencv_test_gapi/CMakeLists.txt
index 73d3af56b..5a4248ffd 100644
--- a/inference-engine/tests/unit/opencv_test_gapi/CMakeLists.txt
+++ b/inference-engine/tests/unit/opencv_test_gapi/CMakeLists.txt
@@ -1,5 +1,17 @@
-# Copyright (C) 2018 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
+#
+# Copyright (C) 2018-2019 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials,
+# and your use of them is governed by the express license under which they
+# were provided to you (End User License Agreement for the Intel(R) Software
+# Development Products (Version May 2017)). Unless the License provides
+# otherwise, you may not use, modify, copy, publish, distribute, disclose or
+# transmit this software or the related documents without Intel's prior
+# written permission.
+#
+# This software and the related documents are provided as is, with no
+# express or implied warranties, other than those that are expressly
+# stated in the License.
 #
 
 if(NOT ENABLE_GAPI_TESTS)
@@ -8,24 +20,25 @@ if(NOT ENABLE_GAPI_TESTS)
 endif()
 
 find_package(OpenCV COMPONENTS gapi)
-if(NOT(OpenCV_FOUND))
+if(NOT OpenCV_FOUND)
     message(WARNING "No suitable OpenCV version detected, " ${TARGET_NAME} " skipped")
     return()
 endif()
 
+add_subdirectory(fluid_test_computations)
+
 file(GLOB SOURCES *.cpp common/*.cpp cpu/*.cpp)
 file(GLOB HEADERS *.hpp common/*.hpp cpu/*.hpp)
 
 set(TARGET opencv_test_gapi)
 add_executable(${TARGET} ${SOURCES} ${HEADERS})
 
-target_include_directories(${TARGET}
-  PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}"
-  PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/common"
-  PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/cpu"
-  PRIVATE "${IE_MAIN_SOURCE_DIR}/thirdparty/fluid/modules/gapi/include/")
+target_include_directories(${TARGET} PRIVATE
+          "${CMAKE_CURRENT_SOURCE_DIR}"
+          "${CMAKE_CURRENT_SOURCE_DIR}/common"
+          "${CMAKE_CURRENT_SOURCE_DIR}/cpu")
 
-target_link_libraries(${TARGET} ${OpenCV_LIBS} inference_engine gtest gtest_main)
+target_link_libraries(${TARGET} PRIVATE ${OpenCV_LIBS} inference_engine_s fluid_test_computations gtest gtest_main)
 
 if(GAPI_TEST_PERF)
   target_compile_definitions(${TARGET} PRIVATE -DPERF_TEST=1)
diff --git a/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests.cpp b/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests.cpp
index fb57725af..e46d81a9c 100644
--- a/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests.cpp
+++ b/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests.hpp b/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests.hpp
index 7a251f9d6..884554fb2 100644
--- a/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests.hpp
+++ b/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -13,15 +13,9 @@
 namespace opencv_test
 {
 
-struct ResizeTestGAPI: public testing::TestWithParam<std::tuple<int, int, std::pair<cv::Size, cv::Size>, double, cv::GCompileArgs>> {};
-
-struct Split2TestGAPI: public TestParams<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
-struct Split3TestGAPI: public TestParams<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
-struct Split4TestGAPI: public TestParams<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
-
-struct Merge2TestGAPI: public TestParams<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
-struct Merge3TestGAPI: public TestParams<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
-struct Merge4TestGAPI: public TestParams<std::tuple<int, cv::Size, cv::GCompileArgs>> {};
+struct ResizeTestGAPI: public testing::TestWithParam<std::tuple<int, int, std::pair<cv::Size, cv::Size>, double>> {};
+struct SplitTestGAPI: public TestParams<std::tuple<int, int, cv::Size>> {};
+struct MergeTestGAPI: public TestParams<std::tuple<int, int, cv::Size>> {};
 
 //------------------------------------------------------------------------------
 
diff --git a/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests_inl.hpp b/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests_inl.hpp
index 3daaba5ae..9f9244916 100644
--- a/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests_inl.hpp
+++ b/inference-engine/tests/unit/opencv_test_gapi/common/gapi_core_tests_inl.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -9,9 +9,7 @@
 
 #include "blob_factory.hpp"
 #include "blob_transform.hpp"
-#include "ie_preprocess.hpp"
 #include "ie_preprocess_data.hpp"
-#include "ie_preprocess_gapi_kernels.hpp"
 
 #include <opencv2/core.hpp>
 #include <opencv2/imgproc.hpp>
@@ -23,7 +21,7 @@
 
 #include <chrono>
 
-#define CV_MAT_CHANNELS(flags) (((flags) >> CV_CN_SHIFT) + 1)
+#include <fluid_test_computations.hpp>
 
 // Can be set externally (via CMake) if built with -DGAPI_TEST_PERF=ON
 #ifndef PERF_TEST
@@ -107,14 +105,27 @@ static cv::String typeToString(int type)
 }
 #endif  // PERF_TEST
 
+namespace {
+
+test::Mat to_test(cv::Mat& mat) { return {mat.rows, mat.cols, mat.type(), mat.data}; }
+std::vector<test::Mat> to_test(std::vector<cv::Mat>& mats)
+{
+    std::vector<test::Mat> test_mats(mats.size());
+    for (int i = 0; i < mats.size(); i++) {
+        test_mats[i] = to_test(mats[i]);
+    }
+    return test_mats;
+}
+
+} // anonymous namespace
+
 TEST_P(ResizeTestGAPI, AccuracyTest)
 {
     int type = 0, interp = 0;
     cv::Size sz_in, sz_out;
     double tolerance = 0.0;
-    cv::GCompileArgs compile_args;
     std::pair<cv::Size, cv::Size> sizes;
-    std::tie(type, interp, sizes, tolerance, compile_args) = GetParam();
+    std::tie(type, interp, sizes, tolerance) = GetParam();
     std::tie(sz_in, sz_out) = sizes;
 
     cv::Mat in_mat1 (sz_in, type );
@@ -127,42 +138,12 @@ TEST_P(ResizeTestGAPI, AccuracyTest)
     cv::Mat out_mat_ocv(sz_out, type);
 
     // G-API code //////////////////////////////////////////////////////////////
-    cv::GMat in, out;
-    switch (CV_MAT_CHANNELS(type))
-    {
-    case 1:
-        out = InferenceEngine::gapi::ScalePlane::on(in, type, sz_in, sz_out, interp);
-        break;
-    case 3:
-        {
-        int depth = CV_MAT_DEPTH(type);
-        int type1 = CV_MAKE_TYPE(depth, 1);
-        cv::GMat in0, in1, in2, out0, out1, out2;
-        std::tie(in0, in1, in2) = InferenceEngine::gapi::Split3::on(in);
-        out0 = InferenceEngine::gapi::ScalePlane::on(in0, type1, sz_in, sz_out, interp);
-        out1 = InferenceEngine::gapi::ScalePlane::on(in1, type1, sz_in, sz_out, interp);
-        out2 = InferenceEngine::gapi::ScalePlane::on(in2, type1, sz_in, sz_out, interp);
-        out = InferenceEngine::gapi::Merge3::on(out0, out1, out2);
-        }
-        break;
-    default: CV_Assert(!"ERROR: unsupported number of channels!");
-    }
-
-    cv::GComputation c(in, out);
-
-    // compile graph, and test once
-
-    auto own_in_mat1 = cv::to_own(in_mat1);
-    auto own_out_mat = cv::to_own(out_mat);
-
-    std::vector<cv::gapi::own::Mat> v_in  = { own_in_mat1 };
-    std::vector<cv::gapi::own::Mat> v_out = { own_out_mat };
-
-    c.apply(v_in, v_out, std::move(compile_args));
+    FluidResizeComputation rc(to_test(in_mat1), to_test(out_mat), interp);
+    rc.warmUp();
 
 #if PERF_TEST
     // iterate testing, and print performance
-    test_ms([&](){ c.apply(v_in, v_out); },
+    test_ms([&](){ rc.apply(); },
             100, "Resize GAPI %s %s %dx%d -> %dx%d",
             interpToString(interp).c_str(), typeToString(type).c_str(),
             sz_in.width, sz_in.height, sz_out.width, sz_out.height);
@@ -180,299 +161,75 @@ TEST_P(ResizeTestGAPI, AccuracyTest)
     }
 }
 
-TEST_P(Split2TestGAPI, AccuracyTest)
-{
-    int depth = std::get<0>(GetParam());
-    cv::Size sz_in = std::get<1>(GetParam());
-    auto compile_args = std::get<2>(GetParam());
-
-    int type1 = CV_MAKE_TYPE(depth, 1);
-    int type2 = CV_MAKE_TYPE(depth, 2);
-    initMatrixRandU(type2, sz_in, type1);
-
-    cv::Mat out_mat2 = cv::Mat(sz_in, type1);
-    cv::Mat out_mat_ocv2 = cv::Mat(sz_in, type1);
-
-    // G-API code //////////////////////////////////////////////////////////////
-    cv::GMat in1, out1, out2;
-    std::tie(out1, out2) = InferenceEngine::gapi::Split2::on(in1);
-    cv::GComputation c(cv::GIn(in1), cv::GOut(out1, out2));
-
-    // compile graph, and test once
-
-    auto own_in_mat1      = cv::to_own(in_mat1);
-    auto own_out_mat_gapi = cv::to_own(out_mat_gapi);
-    auto own_out_mat2     = cv::to_own(out_mat2);
-
-    std::vector<cv::gapi::own::Mat> v_in  = { own_in_mat1 };
-    std::vector<cv::gapi::own::Mat> v_out = { own_out_mat_gapi, own_out_mat2 };
-
-    c.apply(v_in, v_out, std::move(compile_args));
-
-#if PERF_TEST
-    // iterate testing, and print performance
-    test_ms([&](){ c.apply(v_in, v_out); },
-        400, "Split GAPI %s %dx%d", typeToString(type2).c_str(), sz_in.width, sz_in.height);
-#endif
-
-    // OpenCV code /////////////////////////////////////////////////////////////
-    {
-        std::vector<cv::Mat> out_mats_ocv = {out_mat_ocv, out_mat_ocv2};
-        cv::split(in_mat1, out_mats_ocv);
-    }
-    // Comparison //////////////////////////////////////////////////////////////
-    {
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv  != out_mat_gapi));
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv2 != out_mat2));
-    }
-}
-
-TEST_P(Split3TestGAPI, AccuracyTest)
-{
-    int depth = std::get<0>(GetParam());
-    cv::Size sz_in = std::get<1>(GetParam());
-    auto compile_args = std::get<2>(GetParam());
-
-    int type1 = CV_MAKE_TYPE(depth, 1);
-    int type3 = CV_MAKE_TYPE(depth, 3);
-    initMatrixRandU(type3, sz_in, type1);
-
-    cv::Mat out_mat2 = cv::Mat(sz_in, type1);
-    cv::Mat out_mat3 = cv::Mat(sz_in, type1);
-    cv::Mat out_mat_ocv2 = cv::Mat(sz_in, type1);
-    cv::Mat out_mat_ocv3 = cv::Mat(sz_in, type1);
-
-    // G-API code //////////////////////////////////////////////////////////////
-    cv::GMat in1, out1, out2, out3;
-    std::tie(out1, out2, out3) = InferenceEngine::gapi::Split3::on(in1);
-    cv::GComputation c(cv::GIn(in1), cv::GOut(out1, out2, out3));
-
-    // compile graph, and test once
-
-    auto own_in_mat1      = cv::to_own(in_mat1);
-    auto own_out_mat_gapi = cv::to_own(out_mat_gapi);
-    auto own_out_mat2     = cv::to_own(out_mat2);
-    auto own_out_mat3     = cv::to_own(out_mat3);
-
-    std::vector<cv::gapi::own::Mat> v_in  = { own_in_mat1 };
-    std::vector<cv::gapi::own::Mat> v_out = { own_out_mat_gapi, own_out_mat2, own_out_mat3 };
-
-    c.apply(v_in, v_out, std::move(compile_args));
-
-#if PERF_TEST
-    // iterate testing, and print performance
-    test_ms([&](){ c.apply(v_in, v_out); },
-        400, "Split GAPI %s %dx%d", typeToString(type3).c_str(), sz_in.width, sz_in.height);
-#endif
-
-    // OpenCV code /////////////////////////////////////////////////////////////
-    {
-        std::vector<cv::Mat> out_mats_ocv = {out_mat_ocv, out_mat_ocv2, out_mat_ocv3};
-        cv::split(in_mat1, out_mats_ocv);
-    }
-    // Comparison //////////////////////////////////////////////////////////////
-    {
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv  != out_mat_gapi));
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv2 != out_mat2));
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv3 != out_mat3));
-    }
-}
-
-TEST_P(Split4TestGAPI, AccuracyTest)
+TEST_P(SplitTestGAPI, AccuracyTest)
 {
-    int depth = std::get<0>(GetParam());
-    cv::Size sz_in = std::get<1>(GetParam());
-    auto compile_args = std::get<2>(GetParam());
+    const auto params = GetParam();
+    int planes  = std::get<0>(params);
+    int depth   = std::get<1>(params);
+    cv::Size sz = std::get<2>(params);
 
-    int type1 = CV_MAKE_TYPE(depth, 1);
-    int type4 = CV_MAKE_TYPE(depth, 4);
-    initMatrixRandU(type4, sz_in, type1);
-
-    cv::Mat out_mat2 = cv::Mat(sz_in, type1);
-    cv::Mat out_mat3 = cv::Mat(sz_in, type1);
-    cv::Mat out_mat4 = cv::Mat(sz_in, type1);
-    cv::Mat out_mat_ocv2 = cv::Mat(sz_in, type1);
-    cv::Mat out_mat_ocv3 = cv::Mat(sz_in, type1);
-    cv::Mat out_mat_ocv4 = cv::Mat(sz_in, type1);
-
-    // G-API code //////////////////////////////////////////////////////////////
-    cv::GMat in1, out1, out2, out3, out4;
-    std::tie(out1, out2, out3, out4) = InferenceEngine::gapi::Split4::on(in1);
-    cv::GComputation c(cv::GIn(in1), cv::GOut(out1, out2, out3, out4));
-
-    // compile graph, and test once
-
-    auto own_in_mat1      = cv::to_own(in_mat1);
-    auto own_out_mat_gapi = cv::to_own(out_mat_gapi);
-    auto own_out_mat2     = cv::to_own(out_mat2);
-    auto own_out_mat3     = cv::to_own(out_mat3);
-    auto own_out_mat4     = cv::to_own(out_mat4);
-
-    std::vector<cv::gapi::own::Mat> v_in  = { own_in_mat1 };
-    std::vector<cv::gapi::own::Mat> v_out = { own_out_mat_gapi, own_out_mat2,
-                                                  own_out_mat3, own_out_mat4 };
-
-    c.apply(v_in, v_out, std::move(compile_args));
-
-#if PERF_TEST
-    // iterate testing, and print performance
-    test_ms([&](){ c.apply(v_in, v_out); },
-        400, "Split GAPI %s %dx%d", typeToString(type4).c_str(), sz_in.width, sz_in.height);
-#endif
-
-    // OpenCV code /////////////////////////////////////////////////////////////
-    {
-        std::vector<cv::Mat> out_mats_ocv = {out_mat_ocv, out_mat_ocv2, out_mat_ocv3, out_mat_ocv4};
-        cv::split(in_mat1, out_mats_ocv);
-    }
-    // Comparison //////////////////////////////////////////////////////////////
-    {
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv  != out_mat_gapi));
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv2 != out_mat2));
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv3 != out_mat3));
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv4 != out_mat4));
-    }
-}
+    int srcType = CV_MAKE_TYPE(depth, planes);
+    int dstType = CV_MAKE_TYPE(depth, 1);
 
-TEST_P(Merge2TestGAPI, AccuracyTest)
-{
-    int depth = std::get<0>(GetParam());
-    cv::Size sz_in = std::get<1>(GetParam());
-    auto compile_args = std::get<2>(GetParam());
+    cv::Mat in_mat(sz, srcType);
+    cv::randn(in_mat, cv::Scalar::all(127), cv::Scalar::all(40.f));
 
-    int type1 = CV_MAKE_TYPE(depth, 1);
-    int type2 = CV_MAKE_TYPE(depth, 2);
-    initMatsRandU(type1, sz_in, type2);
+    std::vector<cv::Mat> out_mats_gapi(planes, cv::Mat::zeros(sz, dstType));
+    std::vector<cv::Mat> out_mats_ocv (planes, cv::Mat::zeros(sz, dstType));
 
     // G-API code //////////////////////////////////////////////////////////////
-    cv::GMat in1, in2;
-    auto out = InferenceEngine::gapi::Merge2::on(in1, in2);
-    cv::GComputation c(cv::GIn(in1, in2), cv::GOut(out));
-
-    // compile graph, and test once
-
-    auto own_in_mat1      = cv::to_own(in_mat1);
-    auto own_in_mat2      = cv::to_own(in_mat2);
-    auto own_out_mat_gapi = cv::to_own(out_mat_gapi);
-
-    std::vector<cv::gapi::own::Mat> v_in  = { own_in_mat1, own_in_mat2 };
-    std::vector<cv::gapi::own::Mat> v_out = { own_out_mat_gapi };
-
-    c.apply(v_in, v_out, std::move(compile_args));
+    FluidSplitComputation sc(to_test(in_mat), to_test(out_mats_gapi));
+    sc.warmUp();
 
 #if PERF_TEST
     // iterate testing, and print performance
-    test_ms([&](){ c.apply(v_in, v_out); },
-        400, "Merge GAPI %s %dx%d", typeToString(type2).c_str(), sz_in.width, sz_in.height);
+    test_ms([&](){ sc.apply(); },
+        400, "Split GAPI %s %dx%d", typeToString(srcType).c_str(), sz.width, sz.height);
 #endif
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
-        std::vector<cv::Mat> in_mats_ocv = {in_mat1, in_mat2};
-        cv::merge(in_mats_ocv, out_mat_ocv);
+        cv::split(in_mat, out_mats_ocv);
     }
     // Comparison //////////////////////////////////////////////////////////////
     {
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
+        for (int p = 0; p < planes; p++) {
+            EXPECT_EQ(0, cv::countNonZero(out_mats_ocv[p]  != out_mats_gapi[p]));
+        }
     }
 }
 
-TEST_P(Merge3TestGAPI, AccuracyTest)
+TEST_P(MergeTestGAPI, AccuracyTest)
 {
-    int depth = std::get<0>(GetParam());
-    cv::Size sz_in = std::get<1>(GetParam());
-    auto compile_args = std::get<2>(GetParam());
+    const auto params = GetParam();
+    int planes  = std::get<0>(params);
+    int depth   = std::get<1>(params);
+    cv::Size sz = std::get<2>(params);
 
-    int type1 = CV_MAKE_TYPE(depth, 1);
-    int type3 = CV_MAKE_TYPE(depth, 3);
-    initMatsRandU(type1, sz_in, type3);
+    int srcType = CV_MAKE_TYPE(depth, 1);
+    int dstType = CV_MAKE_TYPE(depth, planes);
 
-    cv::Scalar mean = cv::Scalar::all(127);
-    cv::Scalar stddev = cv::Scalar::all(40.f);
-
-    cv::Mat in_mat3(sz_in,  type1);
-    cv::randn(in_mat3, mean, stddev);
-
-    // G-API code //////////////////////////////////////////////////////////////
-    cv::GMat in1, in2, in3;
-    auto out = InferenceEngine::gapi::Merge3::on(in1, in2, in3);
-    cv::GComputation c(cv::GIn(in1, in2, in3), cv::GOut(out));
-
-    // compile graph, and test once
-
-    auto own_in_mat1      = cv::to_own(in_mat1);
-    auto own_in_mat2      = cv::to_own(in_mat2);
-    auto own_in_mat3      = cv::to_own(in_mat3);
-    auto own_out_mat_gapi = cv::to_own(out_mat_gapi);
-
-    std::vector<cv::gapi::own::Mat> v_in  = { own_in_mat1, own_in_mat2, own_in_mat3 };
-    std::vector<cv::gapi::own::Mat> v_out = { own_out_mat_gapi };
-
-    c.apply(v_in, v_out, std::move(compile_args));
-
-#if PERF_TEST
-    // iterate testing, and print performance
-    test_ms([&](){ c.apply(v_in, v_out); },
-        400, "Merge GAPI %s %dx%d", typeToString(type3).c_str(), sz_in.width, sz_in.height);
-#endif
-
-    // OpenCV code /////////////////////////////////////////////////////////////
-    {
-        std::vector<cv::Mat> in_mats_ocv = {in_mat1, in_mat2, in_mat3};
-        cv::merge(in_mats_ocv, out_mat_ocv);
+    std::vector<cv::Mat> in_mats(planes, cv::Mat(sz, srcType));
+    for (int p = 0; p < planes; p++) {
+        cv::randn(in_mats[p], cv::Scalar::all(127), cv::Scalar::all(40.f));
     }
-    // Comparison //////////////////////////////////////////////////////////////
-    {
-        EXPECT_EQ(0, cv::countNonZero(out_mat_ocv != out_mat_gapi));
-    }
-}
-
-TEST_P(Merge4TestGAPI, AccuracyTest)
-{
-    int depth = std::get<0>(GetParam());
-    cv::Size sz_in = std::get<1>(GetParam());
-    auto compile_args = std::get<2>(GetParam());
 
-    int type1 = CV_MAKE_TYPE(depth, 1);
-    int type4 = CV_MAKE_TYPE(depth, 4);
-    initMatsRandU(type1, sz_in, type4);
-
-    cv::Scalar mean = cv::Scalar::all(127);
-    cv::Scalar stddev = cv::Scalar::all(40.f);
-
-    cv::Mat in_mat3(sz_in,  type1);
-    cv::Mat in_mat4(sz_in,  type1);
-    cv::randn(in_mat3, mean, stddev);
-    cv::randn(in_mat4, mean, stddev);
+    cv::Mat out_mat_ocv  = cv::Mat::zeros(sz, dstType);
+    cv::Mat out_mat_gapi = cv::Mat::zeros(sz, dstType);
 
     // G-API code //////////////////////////////////////////////////////////////
-    cv::GMat in1, in2, in3, in4;
-    auto out = InferenceEngine::gapi::Merge4::on(in1, in2, in3, in4);
-    cv::GComputation c(cv::GIn(in1, in2, in3, in4), cv::GOut(out));
-
-    // compile graph, and test once
-
-    auto own_in_mat1      = cv::to_own(in_mat1);
-    auto own_in_mat2      = cv::to_own(in_mat2);
-    auto own_in_mat3      = cv::to_own(in_mat3);
-    auto own_in_mat4      = cv::to_own(in_mat4);
-    auto own_out_mat_gapi = cv::to_own(out_mat_gapi);
-
-    std::vector<cv::gapi::own::Mat> v_in  = { own_in_mat1, own_in_mat2, own_in_mat3, own_in_mat4 };
-    std::vector<cv::gapi::own::Mat> v_out = { own_out_mat_gapi };
-
-    c.apply(v_in, v_out, std::move(compile_args));
+    FluidMergeComputation mc(to_test(in_mats), to_test(out_mat_gapi));
+    mc.warmUp();
 
 #if PERF_TEST
     // iterate testing, and print performance
-    test_ms([&](){ c.apply(v_in, v_out); },
-        400, "Merge GAPI %s %dx%d", typeToString(type4).c_str(), sz_in.width, sz_in.height);
+    test_ms([&](){ mc.apply(); },
+        400, "Merge GAPI %s %dx%d", typeToString(dstType).c_str(), sz.width, sz.height);
 #endif
 
     // OpenCV code /////////////////////////////////////////////////////////////
     {
-        std::vector<cv::Mat> in_mats_ocv = {in_mat1, in_mat2, in_mat3, in_mat4};
-        cv::merge(in_mats_ocv, out_mat_ocv);
+        cv::merge(in_mats, out_mat_ocv);
     }
     // Comparison //////////////////////////////////////////////////////////////
     {
@@ -534,11 +291,11 @@ TEST_P(ResizeTestIE, AccuracyTest)
     ResizeAlgorithm algorithm = cv::INTER_AREA == interp ? RESIZE_AREA : RESIZE_BILINEAR;
 
     // test once to warm-up cache
-    preprocess.execute(out_blob, algorithm);
+    preprocess.execute(out_blob, algorithm, false);
 
 #if PERF_TEST
     // iterate testing, and print performance
-    test_ms([&](){ preprocess.execute(out_blob, algorithm); },
+    test_ms([&](){ preprocess.execute(out_blob, algorithm, false); },
             100, "Resize IE %s %s %dx%d -> %dx%d",
             interpToString(interp).c_str(), typeToString(type).c_str(),
             sz_in.width, sz_in.height, sz_out.width, sz_out.height);
@@ -827,7 +584,7 @@ TEST_P(PreprocTest, Performance)
     preprocess.setRoiBlob(in_blob);
 
     // test once to warm-up cache
-    preprocess.execute(out_blob, interp);
+    preprocess.execute(out_blob, interp, false);
 
     switch (prec)
     {
@@ -859,7 +616,7 @@ TEST_P(PreprocTest, Performance)
     const auto in_layout_str = layout_to_str(in_layout);
     const auto out_layout_str = layout_to_str(out_layout);
 
-    test_ms([&]() { preprocess.execute(out_blob, interp); },
+    test_ms([&]() { preprocess.execute(out_blob, interp, false); },
             300,
             "Preproc %s %d %s %s %dx%d %s %dx%d",
             type_str.c_str(),
diff --git a/inference-engine/tests/unit/opencv_test_gapi/common/gapi_tests_common.hpp b/inference-engine/tests/unit/opencv_test_gapi/common/gapi_tests_common.hpp
index 27b43e3c4..f44207628 100644
--- a/inference-engine/tests/unit/opencv_test_gapi/common/gapi_tests_common.hpp
+++ b/inference-engine/tests/unit/opencv_test_gapi/common/gapi_tests_common.hpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/opencv_test_gapi/cpu/gapi_core_tests_fluid.cpp b/inference-engine/tests/unit/opencv_test_gapi/cpu/gapi_core_tests_fluid.cpp
index 31714b608..040dfe6c7 100644
--- a/inference-engine/tests/unit/opencv_test_gapi/cpu/gapi_core_tests_fluid.cpp
+++ b/inference-engine/tests/unit/opencv_test_gapi/cpu/gapi_core_tests_fluid.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -101,45 +101,23 @@ INSTANTIATE_TEST_CASE_P(ResizeTestFluid_U8, ResizeTestGAPI,
                         Combine(Values(CV_8UC1, CV_8UC3),
                                 Values(cv::INTER_LINEAR, cv::INTER_AREA),
                                 Values(TEST_RESIZE_PAIRS),
-                                Values(1), // error not more than 1 unit
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(1))); // error not more than 1 unit
 
 INSTANTIATE_TEST_CASE_P(ResizeTestFluid_F32, ResizeTestGAPI,
                         Combine(Values(CV_32FC1, CV_32FC3),
                                 Values(cv::INTER_LINEAR, cv::INTER_AREA),
                                 Values(TEST_RESIZE_PAIRS),
-                                Values(0.015), // accuracy like ~1.5%
-                                Values(cv::compile_args(CORE_FLUID))));
-
-INSTANTIATE_TEST_CASE_P(Split2TestFluid, Split2TestGAPI,
-                        Combine(Values(CV_8U, CV_32F),
-                                Values(TEST_SIZES),
-                                Values(cv::compile_args(CORE_FLUID))));
-
-INSTANTIATE_TEST_CASE_P(Split3TestFluid, Split3TestGAPI,
-                        Combine(Values(CV_8U, CV_32F),
-                                Values(TEST_SIZES),
-                                Values(cv::compile_args(CORE_FLUID))));
-
-INSTANTIATE_TEST_CASE_P(Split4TestFluid, Split4TestGAPI,
-                        Combine(Values(CV_8U, CV_32F),
-                                Values(TEST_SIZES),
-                                Values(cv::compile_args(CORE_FLUID))));
-
-INSTANTIATE_TEST_CASE_P(Merge2TestFluid, Merge2TestGAPI,
-                        Combine(Values(CV_8U, CV_32F),
-                                Values(TEST_SIZES),
-                                Values(cv::compile_args(CORE_FLUID))));
-
-INSTANTIATE_TEST_CASE_P(Merge3TestFluid, Merge3TestGAPI,
-                        Combine(Values(CV_8U, CV_32F),
-                                Values(TEST_SIZES),
-                                Values(cv::compile_args(CORE_FLUID))));
-
-INSTANTIATE_TEST_CASE_P(Merge4TestFluid, Merge4TestGAPI,
-                        Combine(Values(CV_8U, CV_32F),
-                                Values(TEST_SIZES),
-                                Values(cv::compile_args(CORE_FLUID))));
+                                Values(0.015))); // accuracy like ~1.5%
+
+INSTANTIATE_TEST_CASE_P(SplitTestFluid, SplitTestGAPI,
+                        Combine(Values(2, 3, 4),
+                                Values(CV_8U, CV_32F),
+                                Values(TEST_SIZES)));
+
+INSTANTIATE_TEST_CASE_P(MergeTestFluid, MergeTestGAPI,
+                        Combine(Values(2, 3, 4),
+                                Values(CV_8U, CV_32F),
+                                Values(TEST_SIZES)));
 
 //----------------------------------------------------------------------
 
diff --git a/inference-engine/tests/unit/opencv_test_gapi/fluid_test_computations/CMakeLists.txt b/inference-engine/tests/unit/opencv_test_gapi/fluid_test_computations/CMakeLists.txt
new file mode 100644
index 000000000..5ade83ac3
--- /dev/null
+++ b/inference-engine/tests/unit/opencv_test_gapi/fluid_test_computations/CMakeLists.txt
@@ -0,0 +1,25 @@
+#
+# Copyright 2019 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials,
+# and your use of them is governed by the express license under which they
+# were provided to you (End User License Agreement for the Intel(R) Software
+# Development Products (Version May 2017)). Unless the License provides
+# otherwise, you may not use, modify, copy, publish, distribute, disclose or
+# transmit this software or the related documents without Intel's prior
+# written permission.
+#
+# This software and the related documents are provided as is, with no
+# express or implied warranties, other than those that are expressly
+# stated in the License.
+#
+
+file(GLOB SRC *.cpp)
+file(GLOB HDR *.hpp)
+
+add_library(fluid_test_computations SHARED ${SRC} ${HDR})
+
+target_include_directories(fluid_test_computations PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
+
+target_link_libraries(fluid_test_computations PRIVATE inference_engine_s
+                                              PRIVATE fluid)
diff --git a/inference-engine/tests/unit/opencv_test_gapi/fluid_test_computations/fluid_test_computations.cpp b/inference-engine/tests/unit/opencv_test_gapi/fluid_test_computations/fluid_test_computations.cpp
new file mode 100644
index 000000000..9efd2eee5
--- /dev/null
+++ b/inference-engine/tests/unit/opencv_test_gapi/fluid_test_computations/fluid_test_computations.cpp
@@ -0,0 +1,133 @@
+#include <fluid_test_computations.hpp>
+#include <opencv2/gapi.hpp>
+#include <ie_preprocess_gapi_kernels.hpp>
+
+#define CV_MAT_CHANNELS(flags) (((flags) >> CV_CN_SHIFT) + 1)
+
+namespace opencv_test
+{
+struct FluidComputation::Priv
+{
+    cv::GComputation m_c;
+    std::vector<cv::gapi::own::Mat> m_v_in;
+    std::vector<cv::gapi::own::Mat> m_v_out;
+};
+
+FluidComputation::FluidComputation(Priv *priv)
+    : m_priv(priv)
+{}
+
+void FluidComputation::warmUp()
+{
+    m_priv->m_c.apply(m_priv->m_v_in, m_priv->m_v_out, cv::compile_args(InferenceEngine::gapi::preprocKernels()));
+}
+
+void FluidComputation::apply()
+{
+    m_priv->m_c.apply(m_priv->m_v_in, m_priv->m_v_out);
+}
+
+namespace
+{
+cv::gapi::own::Mat to_own(test::Mat mat) { return {mat.rows, mat.cols, mat.type, mat.data}; }
+
+std::vector<cv::gapi::own::Mat> to_own(std::vector<test::Mat> mats)
+{
+    std::vector<cv::gapi::own::Mat> own_mats(mats.size());
+    for (int i = 0; i < mats.size(); i++) {
+        own_mats[i] = to_own(mats[i]);
+    }
+    return own_mats;
+}
+
+template<typename... Ts, int... IIs>
+std::vector<cv::GMat> to_vec_impl(std::tuple<Ts...> &&gmats, cv::detail::Seq<IIs...>) {
+    return { std::get<IIs>(gmats)... };
+}
+
+template<typename... Ts>
+std::vector<cv::GMat> to_vec(std::tuple<Ts...> &&gmats) {
+    return to_vec_impl(std::move(gmats), typename cv::detail::MkSeq<sizeof...(Ts)>::type());
+}
+} // anonymous namespace
+
+static cv::GComputation buildResizeComputation(test::Mat inMat, test::Mat outMat, int interp)
+{
+    cv::gapi::own::Size sz_in  { inMat.cols,  inMat.rows};
+    cv::gapi::own::Size sz_out {outMat.cols, outMat.rows};
+    int type = outMat.type;
+    cv::GMat in, out;
+    switch (CV_MAT_CHANNELS(type)) {
+    case 1:
+        out = InferenceEngine::gapi::ScalePlane::on(in, type, sz_in, sz_out, interp);
+        break;
+    case 3:
+        {
+        int depth = CV_MAT_DEPTH(type);
+        int type1 = CV_MAKE_TYPE(depth, 1);
+        cv::GMat in0, in1, in2, out0, out1, out2;
+        std::tie(in0, in1, in2) = InferenceEngine::gapi::Split3::on(in);
+        out0 = InferenceEngine::gapi::ScalePlane::on(in0, type1, sz_in, sz_out, interp);
+        out1 = InferenceEngine::gapi::ScalePlane::on(in1, type1, sz_in, sz_out, interp);
+        out2 = InferenceEngine::gapi::ScalePlane::on(in2, type1, sz_in, sz_out, interp);
+        out = InferenceEngine::gapi::Merge3::on(out0, out1, out2);
+        }
+        break;
+    default: GAPI_Assert(!"ERROR: unsupported number of channels!");
+    }
+
+    return cv::GComputation(in, out);
+}
+
+FluidResizeComputation::FluidResizeComputation(test::Mat inMat, test::Mat outMat, int interp)
+    : FluidComputation(new Priv{buildResizeComputation(inMat, outMat, interp)
+                               ,{to_own(inMat)}
+                               ,{to_own(outMat)}
+                               })
+{}
+
+static cv::GComputation buildSplitComputation(int planes)
+{
+    std::vector<cv::GMat> ins(1);
+    std::vector<cv::GMat> outs(planes);
+
+    switch (planes) {
+    case 2: outs = to_vec(InferenceEngine::gapi::Split2::on(ins[0])); break;
+    case 3: outs = to_vec(InferenceEngine::gapi::Split3::on(ins[0])); break;
+    case 4: outs = to_vec(InferenceEngine::gapi::Split4::on(ins[0])); break;
+    default: GAPI_Assert(false);
+    }
+
+    return cv::GComputation(ins, outs);
+}
+
+FluidSplitComputation::FluidSplitComputation(test::Mat inMat, std::vector<test::Mat> outMats)
+    : FluidComputation(new Priv{buildSplitComputation(outMats.size())
+                               ,{to_own(inMat)}
+                               ,to_own(outMats)
+                               })
+{}
+
+static cv::GComputation buildMergeComputation(int planes)
+{
+    std::vector<cv::GMat> ins(planes);
+    std::vector<cv::GMat> outs(1);
+
+    switch (planes) {
+    case 2: outs[0] = InferenceEngine::gapi::Merge2::on(ins[0], ins[1]); break;
+    case 3: outs[0] = InferenceEngine::gapi::Merge3::on(ins[0], ins[1], ins[2]); break;
+    case 4: outs[0] = InferenceEngine::gapi::Merge4::on(ins[0], ins[1], ins[2], ins[3]); break;
+    default: GAPI_Assert(false);
+    }
+
+    return cv::GComputation(ins, outs);
+}
+
+FluidMergeComputation::FluidMergeComputation(std::vector<test::Mat> inMats, test::Mat outMat)
+    : FluidComputation(new Priv{buildMergeComputation(inMats.size())
+                               ,to_own(inMats)
+                               ,{to_own(outMat)}
+                               })
+{}
+
+} // namespace opencv_test
diff --git a/inference-engine/tests/unit/opencv_test_gapi/fluid_test_computations/fluid_test_computations.hpp b/inference-engine/tests/unit/opencv_test_gapi/fluid_test_computations/fluid_test_computations.hpp
new file mode 100644
index 000000000..52a8bf6ca
--- /dev/null
+++ b/inference-engine/tests/unit/opencv_test_gapi/fluid_test_computations/fluid_test_computations.hpp
@@ -0,0 +1,57 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#ifndef FLUID_TEST_COMPUTATIONS_HPP
+#define FLUID_TEST_COMPUTATIONS_HPP
+
+#include <ie_api.h>
+
+#include <memory>
+#include <vector>
+
+namespace opencv_test
+{
+namespace test
+{
+struct Mat
+{
+    int   rows;
+    int   cols;
+    int   type;
+    void* data;
+};
+}
+
+class __attribute__((visibility("default"))) FluidComputation
+{
+protected:
+    struct Priv;
+    std::shared_ptr<Priv> m_priv;
+public:
+    FluidComputation(Priv* priv);
+    void warmUp();
+    void apply();
+};
+
+class __attribute__((visibility("default"))) FluidResizeComputation : public FluidComputation
+{
+public:
+    FluidResizeComputation(test::Mat inMat, test::Mat outMat, int interp);
+};
+
+class __attribute__((visibility("default"))) FluidSplitComputation : public FluidComputation
+{
+public:
+    FluidSplitComputation(test::Mat inMat, std::vector<test::Mat> outMats);
+};
+
+class __attribute__((visibility("default"))) FluidMergeComputation : public FluidComputation
+{
+public:
+    FluidMergeComputation(std::vector<test::Mat> inMats, test::Mat outMat);
+};
+
+} // namespace opencv_test
+
+#endif // FLUID_TEST_COMPUTATIONS_HPP
diff --git a/inference-engine/tests/unit/shape_infer/adult_test.cpp b/inference-engine/tests/unit/shape_infer/adult_test.cpp
new file mode 100644
index 000000000..0dd1c4930
--- /dev/null
+++ b/inference-engine/tests/unit/shape_infer/adult_test.cpp
@@ -0,0 +1,648 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <tuple>
+#include "adult_test.hpp"
+#include "debug.h"
+#include <cmath>
+
+using namespace InferenceEngine;
+using namespace details;
+using namespace ShapeInfer;
+using namespace ShapeInferTests;
+
+void BasicTest::SetUp() {
+    auto params = GetParam();
+    type = std::get<0>(params);
+    inOutData = std::get<1>(params);
+}
+
+void BlobTest::SetUp() {
+    auto params = GetParam();
+    type = std::get<0>(params);
+    inOutData = std::get<1>(params);
+    blobsParam = std::get<2>(params);
+}
+
+void ParamsTest::SetUp() {
+    auto params = GetParam();
+    type = std::get<0>(params);
+    inOutData = std::get<1>(params);
+    strParams = std::get<2>(params);
+}
+
+ASITestBuilder CommonTests::assertThat() {
+    return ASITestBuilder().withType(type).withData(inOutData);
+}
+
+std::vector<Precision> StridedSliceTest::getPrecisions() {
+    size_t size = inOutData.inData.size();
+    std::vector<Precision> result;
+    if (!size) THROW_IE_EXCEPTION << "unsupported number of precisions";
+    result.emplace_back(Precision::FP32);
+    for (int i = 1; i < size; i++) {
+        result.emplace_back(Precision::I32);
+    }
+    return result;
+}
+
+std::vector<float> FillTest::refGen(const InOutData& inOutData) {
+    const size_t FILL_DIMS = 0;
+    const size_t FILL_VALUE = 1;
+    float value = inOutData.inData[FILL_VALUE][0];
+    auto shape = inOutData.inData[FILL_DIMS];
+    return std::vector<float>(product(shape), value);
+}
+
+std::vector<float> RangeTest::refGen(const InOutData& inOutData) {
+    std::vector<float> result;
+    float start = inOutData.inData[0][0];
+    float limit = inOutData.inData[1][0];
+    float delta = inOutData.inData[2][0];
+    size_t work_amount_dst = std::floor(std::abs((limit - start) / delta));
+    if (work_amount_dst != product(inOutData.inOutShapes.outDims[0]))
+        THROW_IE_EXCEPTION << "Range indexes exceeds data tensor dimension";
+
+    float dst_value = start;
+    for (size_t iwork = 0; iwork < work_amount_dst; ++iwork, dst_value += delta) {
+        result.push_back(dst_value);
+    }
+    return result;
+}
+
+TEST_P(BlobTest, impl) {
+    assertThat().constInferResultFor().withBlobs(blobsParam).equals().toData(inOutData.outData);
+}
+
+TEST_P(BasicTest, impl) {
+    assertThat().constInferResultFor().equals().toData(inOutData.outData);
+}
+
+TEST_P(ParamsTest, impl) {
+    assertThat().constInferResultFor().withParams(strParams.data).equals().toData(inOutData.outData);
+}
+
+TEST_P(StridedSliceTest, impl) {
+    assertThat().constInferResultFor().withParams(strParams.data)
+            .withInputPrecisions(getPrecisions()).equals().toData(inOutData.outData);
+}
+
+TEST_P(StridedSliceTest, shapeInfer) {
+    assertThat().shapeInferResultFor().withParams(strParams.data)
+            .withInputPrecisions(getPrecisions())
+            .equals().toShapes(inOutData.inOutShapes.outDims);
+}
+
+TEST_P(BasicAdultTest, impl) {
+    assertThat().shapeInferResultFor().equals().toShapes(inOutData.inOutShapes.outDims);
+}
+
+TEST_P(FillTest, impl) {
+    assertThat().constInferResultFor().withInputPrecisions({Precision::I32, Precision::FP32})
+            .equals().toData({refGen(inOutData)});
+}
+
+TEST_P(FillTest, shapeInfer) {
+    assertThat().shapeInferResultFor().withInputPrecisions({Precision::I32, Precision::FP32})
+            .equals().toShapes(inOutData.inOutShapes.outDims);
+}
+
+TEST_P(RangeTest, impl) {
+    assertThat().constInferResultFor().equals().toData({refGen(inOutData)});
+}
+
+TEST_P(RangeTest, shapeInfer) {
+    assertThat().shapeInferResultFor().equals().toShapes(inOutData.inOutShapes.outDims);
+}
+
+static std::vector<float> singleInputData = {4.f, 8.f, 12.f, 16.f};
+
+static testing::InOutShapes singleSmallShapes = {{{1, 3}},
+                                                 {{1, 3}}};
+static std::vector<float> singleSmallData = {1.f, 2.f, 4.f};
+
+static testing::InOutShapes singleSmall2Shapes = {{{1, 3}, {1, 3}},
+                                                  {{1, 3}}};
+
+static testing::InOutShapes singleInOutShape = {{{4, 8, 12, 16}},
+                                                {{4}}};
+
+static std::vector<float> fourInARow = {1.f, 2.f, 3.f, 4.f};
+
+static SizeVector threeDeuces = {2, 2, 2};
+
+INSTANTIATE_TEST_CASE_P(
+        CheckOutputDirectly, BlobTest,
+        ::testing::Values(
+                ::testing::make_tuple(LayerType("Const"), InOutDataParam({singleInOutShape, {}, {singleInputData}}),
+                                      BlobsParam(FloatMap{{"custom", singleInputData}}))
+        )
+);
+
+INSTANTIATE_TEST_CASE_P(
+        CheckOutputDirectly, ParamsTest,
+        ::testing::Values(
+                ::testing::make_tuple(LayerType("Power"),
+                                      InOutDataParam({singleSmallShapes,
+                                                      {singleSmallData},
+                                                      {{-2 / 3.f, -2 / 7.f, -2 / 15.f}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"power", "-1"},
+                                                                                             {"scale", "-2"},
+                                                                                             {"shift", "0.5"}}))),
+                ::testing::make_tuple(LayerType("Power"),
+                                      InOutDataParam({singleSmallShapes,
+                                                      {singleSmallData},
+                                                      {{-3.375f, -1.f, 0.f,}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"power", "3"},
+                                                                                             {"scale", "0.5"},
+                                                                                             {"shift", "-2"}}))),
+                ::testing::make_tuple(LayerType("Power"),
+                                      InOutDataParam({singleSmallShapes,
+                                                      {singleSmallData},
+                                                      {{10.f, 10.f, 10.f,}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"power", "1"},
+                                                                                             {"scale", "0"},
+                                                                                             {"shift", "10"}}))),
+                ::testing::make_tuple(LayerType("Tile"),
+                                      InOutDataParam({{{{2, 1, 2}},
+                                                              {threeDeuces}},
+                                                      {fourInARow},
+                                                      {{1.f, 2.f, 1.f, 2.f, 3.f, 4.f, 3.f, 4.f}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis",  "1"},
+                                                                                             {"tiles", "2"}}))),
+                ::testing::make_tuple(LayerType("Tile"),
+                                      InOutDataParam({{{{2, 2, 1}},
+                                                              {threeDeuces}},
+                                                      {fourInARow},
+                                                      {{1.f, 1.f, 2.f, 2.f, 3.f, 3.f, 4.f, 4.f}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis",  "2"},
+                                                                                             {"tiles", "2"}}))),
+                ::testing::make_tuple(LayerType("Tile"),
+                                      InOutDataParam({{{{1, 2, 2}},
+                                                              {threeDeuces}},
+                                                      {fourInARow},
+                                                      {{1.f, 2.f, 3.f, 4.f, 1.f, 2.f, 3.f, 4.f}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis",  "0"},
+                                                                                             {"tiles", "2"}}))),
+                ::testing::make_tuple(LayerType("Reshape"),
+                                      InOutDataParam({{{{1, 2, 2}}, {{4}}},
+                                                      {fourInARow},
+                                                      {fourInARow}}),
+                                      MapParams(MapStrStr())),
+                ::testing::make_tuple(LayerType("Split"),
+                                      InOutDataParam({{{{2, 1, 2}}, {{2, 1, 1}, {2, 1, 1}}},
+                                                      {fourInARow},
+                                                      {{1.f, 3.f},  {2.f,       4.f}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "2"}}))),
+                ::testing::make_tuple(LayerType("Split"),
+                                      InOutDataParam({{{{2, 1, 2}}, {{1, 1, 2}, {1, 1, 2}}},
+                                                      {fourInARow},
+                                                      {{1.f, 2.f},  {3.f,       4.f}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "0"}}))),
+                ::testing::make_tuple(LayerType("Split"),
+                                      InOutDataParam({{{{4, 1, 1}}, {{2, 1, 1}, {1, 1, 1}, {1, 1, 1}}},
+                                                      {fourInARow},
+                                                      {{1.f, 2.f},  {3.f}, {4.f}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "0"}}))),
+                ::testing::make_tuple(LayerType("Concat"),
+                                      InOutDataParam({{{{2, 1, 1}, {2, 1, 1}}, {{2, 1, 2}}},
+                                                      {{1.f,       3.f},       {2.f, 4.f}},
+                                                      {fourInARow}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "2"}}))),
+                ::testing::make_tuple(LayerType("Concat"),
+                                      InOutDataParam({{{{1, 1, 2}, {1, 1, 2}}, {{2, 1, 2}}},
+                                                      {{1.f,       2.f},       {3.f, 4.f}},
+                                                      {fourInARow}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "0"}}))),
+                ::testing::make_tuple(LayerType("Concat"),
+                                      InOutDataParam({{{{2, 1, 1}, {1, 1, 1}, {1, 1, 1}}, {{4, 1, 1}}},
+                                                      {{1.f,       2.f},                  {3.f}, {4.f}},
+                                                      {fourInARow}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "0"}})))
+        )
+);
+
+namespace {
+//  Test data vectors
+std::vector<float> in0 = {0.f, 1.f, 1.f, 0.f};
+std::vector<float> in1 = {0.f, 1.f, 2.f, 1.f};
+std::vector<float> dict = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f};
+std::vector<float> dict2D = {1.f, 2.f, 3.f, 4.f}; // 2x2
+std::vector<float> ref_in0_a0_d223 = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 7.f, 8.f, 9.f,
+                                      10.f, 11.f, 12.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f}; // 2x2x2x3
+std::vector<float> ref_in1_a2_d223 = {1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f,
+                                      11.f}; // 2x2x2x2
+std::vector<float> ref_in0_a0_d22 = {1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f}; // 2x2x2
+}
+
+INSTANTIATE_TEST_CASE_P(
+        TestsGather, ParamsTest,
+        ::testing::Values(
+                ::testing::make_tuple(LayerType("Gather"),
+                                      InOutDataParam({{{{2, 2}, {1, 4}}, {{1, 4, 2}}},
+                                                      {dict2D,           in0},
+                                                      {ref_in0_a0_d22}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "0"}}))),
+                ::testing::make_tuple(LayerType("Gather"),
+                                      InOutDataParam({{{{2, 2, 3}, {2, 2}}, {{2, 2, 2, 3}}},
+                                                      {dict,                in0},
+                                                      {ref_in0_a0_d223}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "0"}}))),
+                ::testing::make_tuple(LayerType("Gather"),
+                                      InOutDataParam({{{{2, 2, 3}, {2, 2}}, {{2, 2, 2, 3}}},
+                                                      {dict,                in0},
+                                                      {ref_in0_a0_d223}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "-3"}}))),
+                ::testing::make_tuple(LayerType("Gather"),
+                                      InOutDataParam({{{{2, 2, 3}, {2, 2}}, {{2, 2, 2, 2}}},
+                                                      {dict,                in1},
+                                                      {ref_in1_a2_d223}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "2"}})))
+        )
+);
+
+//static testing::InOutShapes eltWiseShapes1 = {{{4}, {1}},
+//                                              {{4}}};
+//static std::vector<std::vector<float>> eltWiseInputs1 = {singleInputData,
+//                                                         {4.f}};
+//
+//static testing::InOutShapes eltWiseShapes2 = {{{2, 3}, {3}},
+//                                              {{2, 3}}};
+//static std::vector<std::vector<float>> eltWiseInputs2 = {{4.f, 8.f, 12.f, 4.f, 8.f, 8.f},
+//                                                         {4.f, 8.f, 4.f}};
+INSTANTIATE_TEST_CASE_P(
+        CheckOutputDirectly, BasicTest,
+        ::testing::Values(
+                ::testing::make_tuple(
+                        LayerType("Shape"),
+                        InOutDataParam({singleInOutShape, {}, {singleInputData}})),
+//                ::testing::make_tuple(
+//                        LayerType("Mul"),
+//                        InOutDataParam({eltWiseShapes1, eltWiseInputs1, {{16.f, 32.f, 48.f, 64.f}}})),
+//                ::testing::make_tuple(
+//                        LayerType("Add"),
+//                        InOutDataParam({eltWiseShapes1, eltWiseInputs1, {{8.f, 12.f, 16.f, 20.f}}})),
+//                ::testing::make_tuple(
+//                        LayerType("Div"),
+//                        InOutDataParam({eltWiseShapes1, eltWiseInputs1, {{1.f, 2.f, 3.f, 4.f}}})),
+//                ::testing::make_tuple(
+//                        LayerType("Mul"),
+//                        InOutDataParam({eltWiseShapes2, eltWiseInputs2, {{16.f, 64.f, 48.f, 16.f, 64.f, 32.f}}})),
+//                ::testing::make_tuple(
+//                        LayerType("Add"),
+//                        InOutDataParam({eltWiseShapes2, eltWiseInputs2, {{8.f, 16.f, 16.f, 8.f, 16.f, 12.f}}})),
+//                ::testing::make_tuple(
+//                        LayerType("Div"),
+//                        InOutDataParam({eltWiseShapes2, eltWiseInputs2, {{1.f, 1.f, 3.f, 1.f, 1.f, 2.f}}})),
+                ::testing::make_tuple(LayerType("Mul"),
+                                      InOutDataParam({singleSmall2Shapes, {singleSmallData, singleSmallData},
+                                                      {{1.f, 4.f, 16.f}}})),
+                ::testing::make_tuple(LayerType("Add"),
+                                      InOutDataParam({singleSmall2Shapes, {singleSmallData, singleSmallData},
+                                                      {{2.f, 4.f, 8.f}}})),
+                ::testing::make_tuple(LayerType("Div"),
+                                      InOutDataParam({singleSmall2Shapes, {singleSmallData, singleSmallData},
+                                                      {{1.f, 1.f, 1.f}}}))
+        )
+);
+
+INSTANTIATE_TEST_CASE_P(
+        SecondInput, BasicAdultTest,
+        ::testing::Combine(::testing::Values(LayerType("Reshape"), LayerType("Interp"), LayerType("Resample")),
+                           ::testing::Values(InOutDataParam({{{{2, 3}, {2}},
+                                                                     {{1, 6}}},
+                                                             {{},    {1.f, 6.f}},
+                                                             {}})))
+);
+
+INSTANTIATE_TEST_CASE_P(
+        DimSemantic, BasicAdultTest,
+        ::testing::Values(
+                ::testing::make_tuple(LayerType("Reshape"),
+                                      InOutDataParam({{{{2, 3}, {2}},
+                                                              {{1, 6}}},
+                                                      {{},    {1.f, -1.f}},
+                                                      {}}))
+        )
+);
+
+INSTANTIATE_TEST_CASE_P(
+        SqueezeUnsqueeze, BasicAdultTest,
+        ::testing::Values(
+                ::testing::make_tuple(LayerType("Unsqueeze"),
+                                      InOutDataParam({{{{3}, {1}},
+                                                              {{1, 3}}},
+                                                      {{},    {0.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Unsqueeze"),
+                                      InOutDataParam({{{{3}, {3}},
+                                                              {{1, 1, 1, 3}}},
+                                                      {{},    {0.f, 1.f, 2.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Unsqueeze"),
+                                      InOutDataParam({{{{3}, {3}},
+                                                              {{1, 3, 1, 1}}},
+                                                      {{},    {0.f, 2.f, 3.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Unsqueeze"),
+                                      InOutDataParam({{{{2, 3}, {2}},
+                                                              {{1, 2, 3, 1}}},
+                                                      {{},    {0.f, 3.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Unsqueeze"),
+                                      InOutDataParam({{{{2, 3}, {1}},
+                                                              {{2, 1, 3}}},
+                                                      {{},    {1.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Unsqueeze"),
+                                      InOutDataParam({{{{3}, {1}},
+                                                              {{1, 3}}},
+                                                      {{},    {0.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Unsqueeze"),
+                                      InOutDataParam({{{{3}, {3}},
+                                                              {{1, 1, 1, 3}}},
+                                                      {{},    {0.f, 1.f, 2.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Unsqueeze"),
+                                      InOutDataParam({{{{3}, {3}},
+                                                              {{1, 3, 1, 1}}},
+                                                      {{},    {0.f, 2.f, 3.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Unsqueeze"),
+                                      InOutDataParam({{{{2, 3}, {2}},
+                                                              {{1, 2, 3, 1}}},
+                                                      {{},    {0.f, 3.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Unsqueeze"),
+                                      InOutDataParam({{{{2, 3}, {1}},
+                                                              {{2, 1, 3}}},
+                                                      {{},    {1.f,}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1}, {1}},
+                                                              {{}}},
+                                                      {{},    {0.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1, 3, 1}, {1}},
+                                                              {{3, 1}}},
+                                                      {{},    {0.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1, 3, 1}, {1}},
+                                                              {{1, 3}}},
+                                                      {{},    {2.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1, 3, 1}, {2}},
+                                                              {{3}}},
+                                                      {{},    {0.f, 2.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1, 3, 1}, {1}},
+                                                              {{1, 3}}},
+                                                      {{},    {-1.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1, 3, 1, 2}, {2}},
+                                                              {{3, 2}}},
+                                                      {{},    {0.f, 2.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1}, {1}},
+                                                              {{}}},
+                                                      {{},    {0.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1, 3, 1}, {1}},
+                                                              {{1, 3}}},
+                                                      {{},    {2.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1, 3, 1}, {2}},
+                                                              {{3}}},
+                                                      {{},    {0.f, 2.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1, 3, 1}, {1}},
+                                                              {{1, 3}}},
+                                                      {{},    {-1.f}},
+                                                      {}})),
+                ::testing::make_tuple(LayerType("Squeeze"),
+                                      InOutDataParam({{{{1, 3, 1, 2}, {2}},
+                                                              {{3, 2}}},
+                                                      {{},    {0.f, 2.f}},
+                                                      {}}))
+        )
+);
+namespace {
+//  Test data vectors
+std::vector<float> test0 = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f};
+std::vector<float> test2 = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f};
+std::vector<float> test5 = {5.f, 6.f, 7.f, 8.f};
+std::vector<float> test6 = {0.f, 1.f, 2.f, 3.f, 4.f, 5.f};
+std::vector<float> test8 = {5.f, 4.f, 3.f, 2.f, 1.f};
+std::vector<float> test9 = {5.f, 4.f, 3.f, 2.f, 1.f, 0.f};
+std::vector<float> test10 = {5.f, 4.f, 3.f};
+std::vector<float> test11 = {0.f, 2.f, 4.f, 6.f, 8.f};
+std::vector<float> test12 = {1.f, 3.f, 5.f, 7.f, 9.f};
+std::vector<float> test13 = {9.f, 8.f, 7.f, 6.f, 5.f, 4.f, 3.f, 2.f, 1.f, 0.f};
+std::vector<float> test14 = {9.f, 7.f, 5.f, 3.f, 1.f};
+std::vector<float> test16 = {0.f, 1.f, 3.f, 4.f};
+std::vector<float> test17 = {1.f, 4.f};
+std::vector<float> test19 = {0.f, 1.f, 2.f, 3.f};
+std::vector<float> test20 = {4.f, 5.f, 6.f, 7.f};
+/*
+0. [0,1,2,3,4,5,6,7,8,9], shape=[10]
+1. [0,1,2,3,4,5,6,7,8,9], shape=[10]
+2. [0,1,2,3,4,5,6,7,8], shape=[9]
+3. [0,1,2,3,4,5,6,7,8], shape=[9]
+4. [0,1,2,3,4,5,6,7,8,9], shape=[10]
+5. [5,6,7,8,9], shape=[5]
+6. [0,1,2,3,4,5], shape=[6]
+7. [5,6,7,8,9], shape=[5]
+8. [5,4,3,2,1], shape=[5]
+9. [5,4,3,2,1,0], shape=[6]
+10. [5,4,3], shape=[3]
+11. [0,2,4,6,8], shape=[5]
+12. [1,3,5,7,9], shape=[5]
+13. [9,8,7,6,5,4,3,2,1,0], shape=[10]
+14. [9,7,5,3,1], shape=[5]
+15. [[0,1,2,3,4,5,6,7,8,9]], shape=[1,10]
+16. [[[0,1,2],[3,4,5]]], shape=[1,2,2]
+17. [[[0,1,2],[3,4,5]]], shape=[1,2,1]
+18. [[[0,1,2],[3,4,5]]], shape=[1,1,2,1]
+19. [[[[0,1],[2,3]],[[4,5],[6,7]]]], shape=[1,2,2]
+20. [[[[0,1],[2,3]],[[4,5],[6,7]]]], shape=[1,2,2]
+21. [[[0,1,2],[3,4,5]]], shape=[1,1,2]
+*/
+}
+
+INSTANTIATE_TEST_CASE_P(
+        StridedSlice, StridedSliceTest,
+        ::testing::Values(
+                /* 0 */
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {}, {}, {}}, {{10}}},
+                                                                                 {{test0},            {}, {}, {}},
+                                                                                 {test0}}),
+                                      MapParams(MapStrStr())),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {}}, {{10}}},
+                                                                                 {{test0},              {0.f}, {0.f}, {}},
+                                                                                 {test0}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"end_mask", "0"}}))),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {}}, {{9}}},
+                                                                                 {{test0},              {-1.f}, {-1.f}, {}},
+                                                                                 {test2}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"begin_mask", "0"}}))),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {}}, {{9}}},
+                                                                                 {{test0},              {0.f}, {-1.f}, {}},
+                                                                                 {test2}}),
+                                      MapParams(MapStrStr())),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {}}, {{10}}},
+                                                                                 {{test0},              {0.f}, {10.f}, {}},
+                                                                                 {test0}}),
+                                      MapParams(MapStrStr())),
+/* 5 */
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {}}, {{5}}},
+                                                                                 {{test0},              {5.f}, {10.f}, {}},
+                                                                                 {test5}}),
+                                      MapParams(MapStrStr())),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {}}, {{6}}},
+                                                                                 {{test0},              {0.f}, {6.f}, {}},
+                                                                                 {test6}}),
+                                      MapParams(MapStrStr())),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {}}, {{5}}},
+                                                                                 {{test0},              {-5.f}, {10.f}, {}},
+                                                                                 {test5}}),
+                                      MapParams(MapStrStr())),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {1}}, {{5}}},
+                                                                                 {{test0},               {-5.f}, {0.f}, {-1.f}},
+                                                                                 {test8}}),
+                                      MapParams(MapStrStr())),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {1}}, {{6}}},
+                                                                                 {{test0},               {-5.f}, {0.f}, {-1.f}},
+                                                                                 {test9}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"end_mask", "0"}}))
+                ),
+/* 10 */
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {1}}, {{3}}},
+                                                                                 {{test0},               {-5.f}, {2.f}, {-1.f}},
+                                                                                 {test10}}),
+                                      MapParams(MapStrStr())),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {1}}, {{5}}},
+                                                                                 {{test0},               {0.f}, {0.f}, {2.f}},
+                                                                                 {test11}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"end_mask", "0"}}))),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {1}}, {{5}}},
+                                                                                 {{test0},               {1.f}, {0.f}, {2.f}},
+                                                                                 {test12}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"end_mask", "0"}}))),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {1}}, {{10}}},
+                                                                                 {{test0},               {-1.f}, {0.f}, {-1.f}},
+                                                                                 {test13}}),
+                                      MapParams(MapStrStr(
+                                              std::map<std::string, std::string>{{"end_mask", "0"}}))),
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {1}}, {{5}}},
+                                                                                 {{test0},               {-1.f}, {0.f}, {-2.f}},
+                                                                                 {test14}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"end_mask", "0"}}))),
+/* 15 */
+                ::testing::make_tuple(LayerType("StridedSlice"), InOutDataParam({{{{10}, {1}, {1}, {}}, {{1, 10}}},
+                                                                                 {{test0},              {0.f}, {10.f}, {}},
+                                                                                 {test0}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"new_axis_mask", "1"}}))),
+                ::testing::make_tuple(LayerType("StridedSlice"),
+                                      InOutDataParam({{{{1, 2, 3}, {2}, {2}, {}}, {{1, 2, 2}}},
+                                                      {{test0},                   {0.f, 0.f}, {1.f, 2.f}, {}},
+                                                      {test16}}),
+                                      MapParams(
+                                              MapStrStr(std::map<std::string, std::string>{{"ellipsis_mask", "0,1"}}))),
+                ::testing::make_tuple(LayerType("StridedSlice"),
+                                      InOutDataParam({{{{1, 2, 3}, {4}, {4}, {}}, {{1,   2,   1}}},
+                                                      {{test0},                   {{0.f, 0.f, 0.f, 1.f}}, {2.f, 3.f, 2.f, 2.f}, {}},
+                                                      {test17}}),
+                                      MapParams(
+                                              MapStrStr(std::map<std::string, std::string>{{"new_axis_mask",    "0,0,1,0"},
+                                                                                           {"shrink_axis_mask", "0,0,0,1"}}))),
+                ::testing::make_tuple(LayerType("StridedSlice"),
+                                      InOutDataParam({{{{1, 2, 3}, {3}, {3}, {}}, {{1, 1, 2, 1}}},
+                                                      {{test0},                   {0.f, 0.f, 1.f}, {2.f, 2.f, 2.f}, {}},
+                                                      {test17}}),
+                                      MapParams(MapStrStr(
+                                              std::map<std::string, std::string>{{"ellipsis_mask", "0,1"},
+                                                                                 {"new_axis_mask", "1"}}))),
+                ::testing::make_tuple(LayerType("StridedSlice"),
+                                      InOutDataParam({{{{1, 2, 2, 2}, {1}, {1}, {1}}, {{1, 2, 2}}},
+                                                      {{test0},                       {-1.f}, {0.f}, {-2.f}},
+                                                      {test19}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"begin_mask",       "0,1,0,0"},
+                                                                                             {"end_mask",         "0,1,0,0"},
+                                                                                             {"shrink_axis_mask", "0,1"}}))),
+/* 20 */
+                ::testing::make_tuple(LayerType("StridedSlice"),
+                                      InOutDataParam({{{{1, 2, 2, 2}, {4}, {4}, {}}, {{1, 2, 2}}},
+                                                      {{test0},                      {0.f, 1.f, 0.f, 0.f}, {1.f, 2.f, 2.f, 2.f}, {}},
+                                                      {test20}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"begin_mask",       "0,1,0,0"},
+                                                                                             {"end_mask",         "0,1,0,0"},
+                                                                                             {"shrink_axis_mask", "0,1,0,0"}}))),
+                ::testing::make_tuple(LayerType("StridedSlice"),
+                                      InOutDataParam({{{{1, 2, 3}, {3}, {3}, {}}, {{1, 1, 2}}},
+                                                      {{test0},                   {0.f, 0.f, 1.f}, {2.f, 2.f, 2.f}, {}},
+                                                      {test17}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"ellipsis_mask",    "0,1"},
+                                                                                             {"new_axis_mask",    "1"},
+                                                                                             {"shrink_axis_mask", "0,0,1"}})))
+        )
+);
+
+INSTANTIATE_TEST_CASE_P(
+        Fill, FillTest,
+        ::testing::Values(
+                ::testing::make_tuple(LayerType("Fill"), InOutDataParam({{{{1}, {1}},
+                                                                                 {{1}}},
+                                                                         {{1.f}, {1.f}},
+                                                                         {}})),
+                ::testing::make_tuple(LayerType("Fill"), InOutDataParam({{{{3}, {1}},
+                                                                                 {{1, 3, 1}}},
+                                                                         {{1.f, 3.f, 1.f}, {1.f}},
+                                                                         {}})),
+                ::testing::make_tuple(LayerType("Fill"), InOutDataParam({{{{3}, {1}},
+                                                                                 {{2, 3, 6}}},
+                                                                         {{2.f, 3.f, 6.f}, {-1.f}},
+                                                                         {}})),
+                ::testing::make_tuple(LayerType("Fill"), InOutDataParam({{{{4}, {1}},
+                                                                                 {{1, 3, 1, 2}}},
+                                                                         {{1.f, 3.f, 1.f, 2.f}, {.5f}},
+                                                                         {}})),
+                ::testing::make_tuple(LayerType("Fill"), InOutDataParam({{{{6}, {1}},
+                                                                                 {{4, 3, 2, 5, 4, 2}}},
+                                                                         {{4.f, 3.f, 2.f, 5.f, 4.f, 2.f}, {.25f}},
+                                                                         {}}))
+        )
+);
+
+INSTANTIATE_TEST_CASE_P(
+        Range, RangeTest,
+        ::testing::Values(
+                ::testing::make_tuple(LayerType("Range"), InOutDataParam({{{{1}, {1}, {1}},
+                                                                                  {{5}}},
+                                                                          {{3.f}, {18.f}, {3.f}},
+                                                                          {{}}})),
+                ::testing::make_tuple(LayerType("Range"), InOutDataParam({{{{1}, {1}, {1}},
+                                                                                  {{2}}},
+                                                                          {{3.f}, {1.f}, {-1.f}},
+                                                                          {{}}})),
+                ::testing::make_tuple(LayerType("Range"), InOutDataParam({{{{1}, {1}, {1}},
+                                                                                  {{6}}},
+                                                                          {{3.f}, {-3.f}, {-1.f}},
+                                                                          {{}}})),
+                ::testing::make_tuple(LayerType("Range"), InOutDataParam({{{{1}, {1}, {1}},
+                                                                                  {{5}}},
+                                                                          {{0.f}, {5.f}, {1.f}},
+                                                                          {{}}}))
+        )
+);
diff --git a/inference-engine/tests/unit/shape_infer/adult_test.hpp b/inference-engine/tests/unit/shape_infer/adult_test.hpp
new file mode 100644
index 000000000..44478ad83
--- /dev/null
+++ b/inference-engine/tests/unit/shape_infer/adult_test.hpp
@@ -0,0 +1,74 @@
+#include <utility>
+
+#include <utility>
+
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <gtest/gtest.h>
+#include <inference_engine/shape_infer/const_infer/ie_const_infer_holder.hpp>
+#include "built_in_shape_infer_general_test.hpp"
+#include "adult_test_utils.hpp"
+
+namespace IE = InferenceEngine;
+
+namespace ShapeInferTests {
+
+class CommonTests : public ::testing::Test {
+protected:
+    ASITestBuilder assertThat();
+
+protected:
+    std::string type;
+    InOutData inOutData;
+};
+
+class BasicTest
+        : public CommonTests,
+          public testing::WithParamInterface<std::tuple<LayerType, InOutDataParam>> {
+protected:
+    void SetUp() override;
+};
+
+class BlobTest
+        : public CommonTests,
+          public testing::WithParamInterface<std::tuple<LayerType, InOutDataParam, BlobsParam>> {
+protected:
+    void SetUp() override;
+
+protected:
+    FloatMap blobsParam;
+};
+
+class ParamsTest
+        : public CommonTests,
+          public testing::WithParamInterface<std::tuple<LayerType, InOutDataParam, MapParams>> {
+protected:
+    void SetUp() override;
+
+protected:
+    MapStrStr strParams;
+};
+
+class BasicAdultTest : public BasicTest {
+};
+
+class StridedSliceTest : public ParamsTest {
+public:
+    std::vector<IE::Precision> getPrecisions();
+};
+
+class FillTest : public BasicTest {
+protected:
+    std::vector<float> refGen(const InOutData& inOutData);
+};
+
+class RangeTest : public BasicTest {
+protected:
+    std::vector<float> refGen(const InOutData& inOutData);
+};
+
+}  // namespace ShapeInferTests
diff --git a/inference-engine/tests/unit/shape_infer/adult_test_utils.cpp b/inference-engine/tests/unit/shape_infer/adult_test_utils.cpp
new file mode 100644
index 000000000..208872758
--- /dev/null
+++ b/inference-engine/tests/unit/shape_infer/adult_test_utils.cpp
@@ -0,0 +1,124 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <tuple>
+#include "adult_test.hpp"
+#include "adult_test_utils.hpp"
+
+
+using namespace InferenceEngine;
+using namespace details;
+using namespace ShapeInfer;
+
+void BaseMatcher::compareWithRef(const std::vector<InferenceEngine::Blob::Ptr>& outBlobs,
+                                 const std::vector<std::vector<float>>& refData,
+                                 float tolerance) {
+    for (int outIdx = 0; outIdx < outBlobs.size(); outIdx++) {
+        auto* data = outBlobs[outIdx]->buffer().as<float*>();
+        for (int elemIdx = 0; elemIdx < refData[outIdx].size(); elemIdx++) {
+            ASSERT_NEAR(data[elemIdx], refData[outIdx][elemIdx], tolerance);
+        }
+    }
+}
+
+std::vector<IE::Blob::Ptr>
+BaseMatcher::createBlobs(const std::vector<IE::SizeVector>& shapes, const std::vector<IE::Precision>& precisions) {
+    if (shapes.size() != precisions.size())
+        THROW_IE_EXCEPTION << "Vectors of shapes and precisions can't have different sizes";
+    std::vector<Blob::Ptr> blobs;
+    int i = 0;
+    for (const auto& dims : shapes) {
+        // it's assumed that empty dims = empty data = no blob
+        if (!dims.empty()) {
+            TensorDesc inDesc(precisions[i++], dims, TensorDesc::getLayoutByDims(dims));
+            auto blob = make_blob_with_precision(inDesc);
+            blob->allocate();
+            blobs.push_back(blob);
+        }
+    }
+    return blobs;
+}
+
+void BaseMatcher::fillBlobs(const std::vector<IE::Blob::Ptr>& blobs, const std::vector<std::vector<float>>& data) {
+    if (!data.empty()) {
+        for (int blobIdx = 0; blobIdx < blobs.size(); blobIdx++) {
+            auto blob = blobs[blobIdx];
+            // it's assumed that empty dims = empty data = no blob
+            if (!data[blobIdx].empty()) {
+                switch (blob->precision()) {
+                    case Precision::FP32: {
+                        auto* buffer = blob->buffer().as<float*>();
+                        for (int dataIdx = 0; dataIdx < blob->size(); dataIdx++) {
+                            buffer[dataIdx] = data[blobIdx][dataIdx];
+                        }
+                    }
+                        break;
+                    case Precision::I32: {
+                        auto* buffer = blob->buffer().as<int32_t*>();
+                        for (int dataIdx = 0; dataIdx < blob->size(); dataIdx++) {
+                            buffer[dataIdx] = static_cast<int32_t>(data[blobIdx][dataIdx]);
+                        }
+                    }
+                        break;
+                    default:
+                        THROW_IE_EXCEPTION << "Unsupported precision " << blob->precision() << " to fill blobs";
+                }
+            }
+        }
+    }
+}
+
+void ConstInferMatcher::toData(const std::vector<std::vector<float>>& refData) {
+    auto impl = holder->getConstInferImpl(config.type);
+    ASSERT_NE(nullptr, impl);
+    auto outBlobs = createBlobs(config.inOutData.inOutShapes.outDims, config.outPrecisions);
+    auto inBlobs = createBlobs(config.inOutData.inOutShapes.inDims, config.inPrecisions);
+    fillBlobs(inBlobs, config.inOutData.inData);
+    auto blobs = config.initBlobs(config.floatBlobData);
+    std::vector<Blob::CPtr> inCBlobs;
+    std::copy(inBlobs.begin(), inBlobs.end(), back_inserter(inCBlobs));
+    ASSERT_NO_THROW(impl->infer(inCBlobs, config.strParams, blobs, outBlobs));
+    compareWithRef(outBlobs, refData);
+}
+
+void ShapeInferMatcher::toShapes(const std::vector<IE::SizeVector>& refShape) {
+    siHolder.reset(new IE::ShapeInfer::BuiltInShapeInferHolder());
+    IE::IShapeInferImpl::Ptr impl;
+    std::vector<IE::SizeVector> outShapes;
+    sts = siHolder->getShapeInferImpl(impl, config.type.c_str(), &desc);
+    ASSERT_NE(nullptr, impl);
+    auto inBlobs = createBlobs(config.inOutData.inOutShapes.inDims, config.inPrecisions);
+    fillBlobs(inBlobs, config.inOutData.inData);
+    std::vector<Blob::CPtr> inCBlobs;
+    std::copy(inBlobs.begin(), inBlobs.end(), back_inserter(inCBlobs));
+    auto blobs = config.initBlobs(config.floatBlobData);
+    sts = impl->inferShapes(inCBlobs, config.strParams, blobs, outShapes, &desc);
+    ASSERT_EQ(sts, IE::OK) << desc.msg;
+    ASSERT_EQ(config.inOutData.inOutShapes.outDims, outShapes);
+}
+
+InitBlobsFunc ASITestBuilder::defaultBlobInit() {
+    return [](const FloatMap& blobDataMap) -> BlobMap {
+        BlobMap blobs;
+        for (const auto& it : blobDataMap) {
+            std::string blobName;
+            std::vector<float> data;
+            std::tie(blobName, data) = it;
+            SizeVector blobDims = {data.size()};
+            auto blob = make_shared_blob<float>(Precision::FP32, TensorDesc::getLayoutByDims(blobDims), blobDims,
+                                                data);
+            blobs[blobName] = blob;
+        }
+        return blobs;
+    };
+}
+
+MatcherConfigurator<ConstInferMatcher> ASITestBuilder::constInferResultFor() {
+    return MatcherConfigurator<ConstInferMatcher>(config);
+}
+
+MatcherConfigurator<ShapeInferMatcher> ASITestBuilder::shapeInferResultFor() {
+    return MatcherConfigurator<ShapeInferMatcher>(config);
+}
diff --git a/inference-engine/tests/unit/shape_infer/adult_test_utils.hpp b/inference-engine/tests/unit/shape_infer/adult_test_utils.hpp
new file mode 100644
index 000000000..451799cbd
--- /dev/null
+++ b/inference-engine/tests/unit/shape_infer/adult_test_utils.hpp
@@ -0,0 +1,137 @@
+#include <utility>
+
+#include <utility>
+
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <gtest/gtest.h>
+#include <inference_engine/shape_infer/const_infer/ie_const_infer_holder.hpp>
+#include "built_in_shape_infer_general_test.hpp"
+
+namespace IE = InferenceEngine;
+
+struct InOutData {
+    testing::InOutShapes inOutShapes;
+    std::vector<std::vector<float>> inData;
+    std::vector<std::vector<float>> outData;
+};
+
+using FloatMap = std::map<std::string, std::vector<float>>;
+using InitBlobsFunc = std::function<IE::BlobMap(const FloatMap& inOutData)>;
+
+struct ASIConfig {
+    InOutData inOutData;
+    std::string type;
+    FloatMap floatBlobData;
+    std::map<std::string, std::string> strParams;
+    InitBlobsFunc initBlobs;
+    std::vector<IE::Precision> inPrecisions;
+    std::vector<IE::Precision> outPrecisions;
+};
+
+class BaseMatcher {
+public:
+    explicit BaseMatcher(ASIConfig config) : config(std::move(config)) {}
+
+protected:
+    void compareWithRef(const std::vector<IE::Blob::Ptr>& outBlobs,
+                        const std::vector<std::vector<float>>& refData,
+                        float tolerance = 0.0001);
+
+    std::vector<IE::Blob::Ptr>
+    createBlobs(const std::vector<IE::SizeVector>& shapes, const std::vector<IE::Precision>& precisions);
+
+    void fillBlobs(const std::vector<IE::Blob::Ptr>& blobs, const std::vector<std::vector<float>>& data);
+
+    ASIConfig config;
+};
+
+class ConstInferMatcher : public BaseMatcher {
+public:
+    explicit ConstInferMatcher(const ASIConfig& config) : BaseMatcher(config) {}
+
+    void toData(const std::vector<std::vector<float>>& refData);
+
+private:
+    std::shared_ptr<IE::ShapeInfer::ConstInferHolder> holder;
+};
+
+class ShapeInferMatcher : public BaseMatcher {
+public:
+    explicit ShapeInferMatcher(const ASIConfig& config) : BaseMatcher(config) {}
+
+    void toShapes(const std::vector<IE::SizeVector>& refShape);
+
+private:
+    std::unique_ptr<IE::ShapeInfer::BuiltInShapeInferHolder> siHolder;
+    IE::StatusCode sts;
+    IE::ResponseDesc desc;
+};
+
+template<typename M>
+class MatcherConfigurator {
+public:
+    explicit MatcherConfigurator(ASIConfig config) : config(std::move(config)) {}
+
+    MatcherConfigurator& withParams(const std::map<std::string, std::string>& params) {
+        config.strParams = params;
+        return *this;
+    }
+
+    MatcherConfigurator& withInputPrecisions(const std::vector<IE::Precision>& inputPrecisions) {
+        config.inPrecisions = inputPrecisions;
+        return *this;
+    }
+
+    MatcherConfigurator& withOutputPrecisions(const std::vector<IE::Precision>& outputPrecisions) {
+        config.outPrecisions = outputPrecisions;
+        return *this;
+    }
+
+    MatcherConfigurator& withBlobs(const FloatMap& blobDataMap) {
+        config.floatBlobData = blobDataMap;
+        return *this;
+    }
+
+    M equals() {
+        return M(config);
+    }
+
+private:
+    ASIConfig config;
+};
+
+class ASITestBuilder {
+    ASIConfig config;
+public:
+    ASITestBuilder() {
+        config.initBlobs = defaultBlobInit();
+    }
+
+    ASITestBuilder& withData(const InOutData& data) {
+        config.inOutData = data;
+        config.inPrecisions = {data.inOutShapes.inDims.size(), IE::Precision::FP32};
+        config.outPrecisions = {data.inOutShapes.outDims.size(), IE::Precision::FP32};
+        return *this;
+    }
+
+    ASITestBuilder& withType(const std::string& type) {
+        config.type = type;
+        return *this;
+    }
+
+    MatcherConfigurator<ConstInferMatcher> constInferResultFor();
+
+    MatcherConfigurator<ShapeInferMatcher> shapeInferResultFor();
+
+private:
+    InitBlobsFunc defaultBlobInit();
+};
+
+PRETTY_PARAM(BlobsParam, FloatMap)
+
+PRETTY_PARAM(InOutDataParam, InOutData)
diff --git a/inference-engine/tests/unit/shape_infer/built_in_holder_test.cpp b/inference-engine/tests/unit/shape_infer/built_in_holder_test.cpp
index b8661bd8f..35e16eb34 100644
--- a/inference-engine/tests/unit/shape_infer/built_in_holder_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/built_in_holder_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_batch_test.cpp b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_batch_test.cpp
index 9f57e35a3..ebf728a5f 100644
--- a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_batch_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_batch_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_conv_test.cpp b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_conv_test.cpp
index 07aaf7fdc..fefdaebeb 100644
--- a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_conv_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_conv_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -15,7 +15,7 @@ using namespace InferenceEngine;
 using namespace ShapeInfer;
 
 class BuiltInShapeInferConvImplTest
-        : public BuiltInShapeInferTestWithParam<std::tuple<InOutShapes, kernel, stride, pad, auto_pad, out_channels, group, dilation_factor, NewInOutShapes, CanInfer, padrb, IsTransposed>> {
+        : public BuiltInShapeInferTestWithParam<std::tuple<InOutShapes, kernel, stride, pad, auto_pad, out_channels, group, dilation_factor, NewInOutShapes, CanInfer, pad_end, IsTransposed>> {
 protected:
     void SetUp() override {
         BuiltInShapeInferCommon::SetUp();
@@ -30,7 +30,7 @@ protected:
         dilation_factor = std::get<7>(params);
         newInOutShapes = std::get<8>(params);
         canInfer = std::get<9>(params);
-        padrb = std::get<10>(params);
+        pad_end = std::get<10>(params);
         isTransposed = std::get<11>(params);
         if (isTransposed) {
             type = "Deconvolution";
@@ -40,25 +40,6 @@ protected:
 
     std::map<std::string, std::string> getMapParams() {
         std::map<std::string, std::string> params = {
-                {"kernel-x",   std::to_string(kernel.x)},
-                {"kernel-y",   std::to_string(kernel.y)},
-                {"stride-x",   std::to_string(stride.x)},
-                {"stride-y",   std::to_string(stride.y)},
-                {"pad-x",      std::to_string(pad.x)},
-                {"pad-y",      std::to_string(pad.y)},
-                {"output",     std::to_string(out_channels)},
-                {"group",      std::to_string(group)},
-                {"dilation-x", std::to_string(dilation_factor.x)},
-                {"dilation-y", std::to_string(dilation_factor.y)}
-        };
-        if (!auto_pad.empty()) params["auto_pad"] = auto_pad;
-        if (padrb.x) params["pad-r"] = std::to_string(padrb.x);
-        if (padrb.y) params["pad-b"] = std::to_string(padrb.y);
-        return params;
-    }
-
-    std::map<std::string, std::string> getMapParams_IRv3() {
-        std::map<std::string, std::string> params = {
                 {"kernel",     kernel.toSeparetedRow(",")},
                 {"strides",    stride.toSeparetedRow(",")},
                 {"pads_begin", pad.toSeparetedRow(",")},
@@ -67,21 +48,19 @@ protected:
                 {"dilations",  dilation_factor.toSeparetedRow(",")}
         };
         if (!auto_pad.empty()) params["auto_pad"] = auto_pad;
-        if (padrb.x != 0 && padrb.y != 0) {
-            params["pads_end"] = padrb.toSeparetedRow(",");
-        }
+        if (!pad_end.empty()) params["pads_end"] = pad_end.toSeparetedRow(",");
         return params;
     }
 
 protected:
     std::string type = "Convolution";
     std::string dataName = "convolution_data";
-    testing::InOutData inOutShapes;
-    testing::InOutData newInOutShapes;
+    testing::InOutShapes inOutShapes;
+    testing::InOutShapes newInOutShapes;
     param_size kernel{};
     param_size stride{};
     param_size pad{};
-    param_size padrb{};
+    param_size pad_end{};
     param_size dilation_factor{};
     std::string auto_pad;
     unsigned out_channels{};
@@ -92,20 +71,22 @@ protected:
 
 
 TEST_P(BuiltInShapeInferConvImplTest, impl) {
-    InferenceEngine::details::BaseCreator::version_ = 2;
     auto impl = getShapeInferImpl(type);
     ASSERT_NE(nullptr, impl);
     if (!group) group = 1;
-    SizeVector weightsDim{kernel.x * kernel.y * out_channels * inOutShapes.inDims[0][1] / group};
+    unsigned w_dim = out_channels * inOutShapes.inDims[0][1] / group;
+    for (auto k : kernel.dims)
+        w_dim *= k;
+    SizeVector weightsDim{w_dim};
     blobs["weights"] = make_shared_blob(Precision::fromType<size_t>(), weightsDim);
-    ASSERT_NO_THROW(sts = impl->inferShapes(inOutShapes.inDims, getMapParams(), blobs, outShapes, &resp));
+    ASSERT_NO_THROW(sts = impl->inferShapes(getBlobs(inOutShapes.inDims), getMapParams(), blobs, outShapes, &resp));
     ASSERT_EQ(int(OK), sts) << resp.msg;
     ASSERT_EQ(inOutShapes.outDims, outShapes);
 }
 
 TEST_P(BuiltInShapeInferConvImplTest, batch) {
     auto layerParams = getMapParams();
-    auto cnnNetworkImplPtr = buildSingleLayerNetwork<2>(type, inOutShapes, &layerParams, dataName);
+    auto cnnNetworkImplPtr = buildSingleLayerNetwork<4>(type, inOutShapes, &layerParams, dataName);
     auto reshaper = std::make_shared<Reshaper>(*cnnNetworkImplPtr);
     sts = cnnNetworkImplPtr->setBatchSizeReshape(BATCH, &resp);
     ASSERT_EQ((int) OK, sts) << resp.msg;
@@ -115,38 +96,7 @@ TEST_P(BuiltInShapeInferConvImplTest, batch) {
 
 TEST_P(BuiltInShapeInferConvImplTest, reshaper) {
     auto layerParams = getMapParams();
-    auto cnnNetworkImplPtr = buildSingleLayerNetwork<2>(type, inOutShapes, &layerParams, dataName);
-    auto reshaper = std::make_shared<Reshaper>(*cnnNetworkImplPtr);
-    auto inputShapes = setInputShapes(*cnnNetworkImplPtr, newInOutShapes.inDims);
-    reshaper->run(inputShapes);
-    checkNetworkInOut(*cnnNetworkImplPtr, newInOutShapes);
-}
-
-TEST_P(BuiltInShapeInferConvImplTest, impl_IRv3) {
-    InferenceEngine::details::BaseCreator::version_ = 3;
-    auto impl = getShapeInferImpl(type);
-    ASSERT_NE(nullptr, impl);
-    if (!group) group = 1;
-    SizeVector weightsDim{kernel.x * kernel.y * out_channels * inOutShapes.inDims[0][1] / group};
-    blobs["weights"] = make_shared_blob(Precision::fromType<size_t>(), weightsDim);
-    ASSERT_NO_THROW(sts = impl->inferShapes(inOutShapes.inDims, getMapParams_IRv3(), blobs, outShapes, &resp));
-    ASSERT_EQ(int(OK), sts) << resp.msg;
-    ASSERT_EQ(inOutShapes.outDims, outShapes);
-}
-
-TEST_P(BuiltInShapeInferConvImplTest, batch_IRv3) {
-    auto layerParams = getMapParams_IRv3();
-    auto cnnNetworkImplPtr = buildSingleLayerNetwork<3>(type, inOutShapes, &layerParams, dataName);
-    auto reshaper = std::make_shared<Reshaper>(*cnnNetworkImplPtr);
-    sts = cnnNetworkImplPtr->setBatchSizeReshape(BATCH, &resp);
-    ASSERT_EQ((int) OK, sts) << resp.msg;
-    inOutShapes.inDims[0][0] = inOutShapes.outDims[0][0] = BATCH;
-    checkNetworkInOut(*cnnNetworkImplPtr, inOutShapes);
-}
-
-TEST_P(BuiltInShapeInferConvImplTest, reshaper_IRv3) {
-    auto layerParams = getMapParams_IRv3();
-    auto cnnNetworkImplPtr = buildSingleLayerNetwork<3>(type, inOutShapes, &layerParams, dataName);
+    auto cnnNetworkImplPtr = buildSingleLayerNetwork<4>(type, inOutShapes, &layerParams, dataName);
     auto reshaper = std::make_shared<Reshaper>(*cnnNetworkImplPtr);
     auto inputShapes = setInputShapes(*cnnNetworkImplPtr, newInOutShapes.inDims);
     reshaper->run(inputShapes);
@@ -162,42 +112,42 @@ INSTANTIATE_TEST_CASE_P(
                                       pad({2, 1}), auto_pad(""), out_channels(64), group(1), dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 229, 115}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(false)),
+                                      CanInfer(true), pad_end(), IsTransposed(false)),
                 // fixate pad + dilation
                 ::testing::make_tuple(InOutShapes({{{4, 3,  228, 228}},
                                                    {{4, 64, 225, 109}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 1}), auto_pad(""), out_channels(64), group(1), dilation_factor({5, 5}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 225, 109}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(false)),
+                                      CanInfer(true), pad_end(), IsTransposed(false)),
                 // fixate pad + right/bottom
                 ::testing::make_tuple(InOutShapes({{{4, 3,  228, 228}},
                                                    {{4, 64, 230, 115}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 1}), auto_pad(""), out_channels(64), group(1), dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 230, 115}}}),
-                                      CanInfer(true), padrb({3, 2}), IsTransposed(false)),
+                                      CanInfer(true), pad_end({3, 2}), IsTransposed(false)),
                 // valid + empty paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3,  228, 228}},
                                                    {{4, 64, 227, 113}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({0, 0}), auto_pad("valid"), out_channels(64), group(1), dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 227, 113}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(false)),
+                                      CanInfer(true), pad_end(), IsTransposed(false)),
                 // valid + dilation
                 ::testing::make_tuple(InOutShapes({{{4, 3,  228, 228}},
                                                    {{4, 64, 223, 107}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({0, 0}), auto_pad("valid"), out_channels(64), group(1), dilation_factor({5, 5}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 223, 107}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(false)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(false)),
                 // valid + fixated paddings (shouldn't affect)
                 ::testing::make_tuple(InOutShapes({{{4, 3,  228, 228}},
                                                    {{4, 64, 227, 113}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 4}), auto_pad("valid"), out_channels(64), group(1), dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 227, 113}}}),
-                                      CanInfer(true), padrb({3, 2}), IsTransposed(false)),
+                                      CanInfer(true), pad_end({3, 2}), IsTransposed(false)),
                 // same_upper + empty paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 114}}}), kernel({4, 2}), stride({2, 1}),
@@ -205,7 +155,7 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 114}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(false)),
+                                      CanInfer(true), pad_end(), IsTransposed(false)),
                 // same_upper + dilation paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 114}}}), kernel({4, 2}), stride({2, 1}),
@@ -213,7 +163,7 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({5, 5}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 114}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(false)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(false)),
                 // same_upper + fixated paddings (shouldn't affect)
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 114}}}), kernel({4, 2}), stride({2, 1}),
@@ -221,7 +171,7 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 114}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(false)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(false)),
                 // same_lower + empty paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 113}}}), kernel({4, 2}), stride({2, 1}),
@@ -229,7 +179,7 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 113}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(false)),
+                                      CanInfer(true), pad_end(), IsTransposed(false)),
                 // same_lower + dilation
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 113}}}), kernel({4, 2}), stride({2, 1}),
@@ -237,7 +187,7 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 113}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(false)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(false)),
                 // same_lower + fixated paddings (shouldn't affect)
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 113}}}), kernel({4, 2}), stride({2, 1}),
@@ -245,7 +195,37 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 113}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(false))
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(false)),
+                // 5D tensors
+                // fixate pad
+                ::testing::make_tuple(InOutShapes({{{4, 3, 64, 100, 120}},
+                                                   {{4, 64, 66, 101, 61}}}), kernel({4, 2, 1}), stride({2, 1, 1}),
+                                      pad({2, 1, 1}), auto_pad(""), out_channels(64), group(1), dilation_factor({0, 0, 0}),
+                                      NewInOutShapes({{{1, 3, 64, 100, 120}},
+                                                      {{1, 64, 66, 101, 61}}}),
+                                      CanInfer(true), pad_end(), IsTransposed(false)),
+                // fixate pad + right/bottom
+                ::testing::make_tuple(InOutShapes({{{4, 3, 16, 128, 128}},
+                                                   {{4, 64, 18, 130, 65}}}), kernel({4, 2, 2}), stride({2, 1, 1}),
+                                      pad({2, 1, 1}), auto_pad(""), out_channels(64), group(1), dilation_factor({0, 0, 0}),
+                                      NewInOutShapes({{{1, 3, 16, 128, 128}},
+                                                      {{1, 64, 18, 130, 65}}}),
+                                      CanInfer(true), pad_end({3, 2, 2}), IsTransposed(false)),
+                // valid + fixated paddings (shouldn't affect)
+                ::testing::make_tuple(InOutShapes({{{4, 3, 16, 128, 130}},
+                                                   {{4, 64, 15, 127, 64}}}), kernel({4, 2, 2}), stride({2, 1, 1}),
+                                      pad({2, 4, 2}), auto_pad("valid"), out_channels(64), group(1), dilation_factor({0, 0, 0}),
+                                      NewInOutShapes({{{1, 3, 16, 128, 130}},
+                                                      {{1, 64, 15, 127, 64}}}),
+                                      CanInfer(true), pad_end({3, 2, 2}), IsTransposed(false)),
+                // same_lower + empty paddings
+                ::testing::make_tuple(InOutShapes({{{4, 3, 16, 128, 130}},
+                                                   {{4, 64, 16, 128, 65}}}), kernel({4, 2, 1}), stride({2, 1, 1}),
+                                      pad({0, 0, 0}), auto_pad("same_lower"), out_channels(64), group(1),
+                                      dilation_factor({0, 0, 0}),
+                                      NewInOutShapes({{{1, 3, 16, 128, 130}},
+                                                      {{1, 64, 16, 128, 65}}}),
+                                      CanInfer(true), pad_end(), IsTransposed(false))
         )
 );
 
@@ -258,42 +238,42 @@ INSTANTIATE_TEST_CASE_P(
                                       pad({2, 1}), auto_pad(""), out_channels(64), group(1), dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 227, 454}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(true)),
+                                      CanInfer(true), pad_end(), IsTransposed(true)),
                 // fixate pad + dilation
                 ::testing::make_tuple(InOutShapes({{{4, 3,  228, 228}},
                                                    {{4, 64, 231, 466}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 1}), auto_pad(""), out_channels(64), group(1), dilation_factor({5, 5}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 231, 466}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(true)),
+                                      CanInfer(true), pad_end(), IsTransposed(true)),
                 // fixate pad + right/bottom
                 ::testing::make_tuple(InOutShapes({{{4, 3,  228, 228}},
                                                    {{4, 64, 226, 453}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 1}), auto_pad(""), out_channels(64), group(1), dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 226, 453}}}),
-                                      CanInfer(true), padrb({3, 2}), IsTransposed(true)),
+                                      CanInfer(true), pad_end({3, 2}), IsTransposed(true)),
                 // valid + empty paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3,  228, 228}},
                                                    {{4, 64, 229, 459}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({0, 0}), auto_pad("valid"), out_channels(64), group(1), dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 229, 459}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(true)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(true)),
                 // valid + dilation
                 ::testing::make_tuple(InOutShapes({{{4, 3,  228, 228}},
                                                    {{4, 64, 233, 471}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({0, 0}), auto_pad("valid"), out_channels(64), group(1), dilation_factor({5, 5}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 233, 471}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(true)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(true)),
                 // valid + fixated paddings (shouldn't affect)
                 ::testing::make_tuple(InOutShapes({{{4, 3,  228, 228}},
                                                    {{4, 64, 233, 471}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 4}), auto_pad("valid"), out_channels(64), group(1), dilation_factor({5, 5}),
                                       NewInOutShapes({{{1, 3,  228, 228}},
                                                       {{1, 64, 233, 471}}}),
-                                      CanInfer(true), padrb({3, 2}), IsTransposed(true)),
+                                      CanInfer(true), pad_end({3, 2}), IsTransposed(true)),
                 // same_upper + empty paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 454}}}), kernel({4, 2}), stride({2, 1}),
@@ -301,7 +281,7 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 454}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(true)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(true)),
                 // same_upper + dilation paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 454}}}), kernel({4, 2}), stride({2, 1}),
@@ -309,7 +289,7 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({5, 5}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 454}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(true)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(true)),
                 // same_upper + fixated paddings (shouldn't affect)
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 454}}}), kernel({4, 2}), stride({2, 1}),
@@ -317,7 +297,7 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 454}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(true)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(true)),
                 // same_lower + empty paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 454}}}), kernel({4, 2}), stride({2, 1}),
@@ -325,7 +305,7 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 454}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(true)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(true)),
                 // same_lower + dilation
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 454}}}), kernel({4, 2}), stride({2, 1}),
@@ -333,7 +313,7 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 454}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(true)),
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(true)),
                 // same_lower + fixated paddings (shouldn't affect)
                 ::testing::make_tuple(InOutShapes({{{4, 3,  227, 227}},
                                                    {{4, 64, 227, 454}}}), kernel({4, 2}), stride({2, 1}),
@@ -341,6 +321,36 @@ INSTANTIATE_TEST_CASE_P(
                                       dilation_factor({0, 0}),
                                       NewInOutShapes({{{1, 3,  227, 227}},
                                                       {{1, 64, 227, 454}}}),
-                                      CanInfer(true), padrb({0, 0}), IsTransposed(true))
+                                      CanInfer(true), pad_end({0, 0}), IsTransposed(true)),
+                // 5D tensors
+                // fixate pad
+                ::testing::make_tuple(InOutShapes({{{4, 3, 64, 100, 120}},
+                                                   {{4, 64, 66, 101, 61}}}), kernel({4, 2, 1}), stride({2, 1, 1}),
+                                      pad({2, 1, 1}), auto_pad(""), out_channels(64), group(1), dilation_factor({0, 0 ,0}),
+                                      NewInOutShapes({{{1, 3,  64, 100, 120}},
+                                                      {{1, 64, 66, 101, 61}}}),
+                                      CanInfer(true), pad_end(), IsTransposed(false)),
+                // fixate pad + right/bottom
+                ::testing::make_tuple(InOutShapes({{{4, 3, 16, 128, 130}},
+                                                   {{4, 64, 14, 126, 257}}}), kernel({4, 2, 2}), stride({2, 1, 1}),
+                                      pad({2, 1, 1}), auto_pad(""), out_channels(64), group(1), dilation_factor({0, 0, 0}),
+                                      NewInOutShapes({{{1, 3, 16, 128, 130}},
+                                                      {{1, 64, 14, 126, 257 }}}),
+                                      CanInfer(true), pad_end({3, 2, 2}), IsTransposed(true)),
+                // valid + fixated paddings (shouldn't affect)
+                ::testing::make_tuple(InOutShapes({{{4, 3, 16, 128, 130}},
+                                                   {{4, 64, 15, 127, 64}}}), kernel({4, 2, 2}), stride({2, 1, 1}),
+                                      pad({2, 4, 2}), auto_pad("valid"), out_channels(64), group(1), dilation_factor({0, 0, 0}),
+                                      NewInOutShapes({{{1, 3, 16, 128, 130}},
+                                                      {{1, 64, 15, 127, 64}}}),
+                                      CanInfer(true), pad_end({3, 2, 2}), IsTransposed(false)),
+                // same_lower + empty paddings
+                ::testing::make_tuple(InOutShapes({{{4, 3, 16, 128, 130}},
+                                                   {{4, 64, 16, 128, 65}}}), kernel({4, 2, 1}), stride({2, 1, 1}),
+                                      pad({0, 0, 0}), auto_pad("same_lower"), out_channels(64), group(1),
+                                      dilation_factor({0, 0, 0}),
+                                      NewInOutShapes({{{1, 3, 16, 128, 130}},
+                                                      {{1, 64, 16, 128, 65}}}),
+                                      CanInfer(true), pad_end(), IsTransposed(false))
         )
 );
diff --git a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_fake_test.cpp b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_fake_test.cpp
index 2b66d59f8..17f64c07c 100644
--- a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_fake_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_fake_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_general_test.cpp b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_general_test.cpp
index a7d3a647d..191436552 100644
--- a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_general_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_general_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -19,7 +19,8 @@ using namespace ShapeInfer;
 TEST_P(BuiltInShapeInferImplTest, impl) {
     auto impl = getShapeInferImpl(type);
     ASSERT_NE(nullptr, impl);
-    ASSERT_NO_THROW(sts = impl->inferShapes(newInOutShapes.inDims, layerParams.data, blobs, outShapes, &resp));
+    ASSERT_NO_THROW(
+            sts = impl->inferShapes(getBlobs(newInOutShapes.inDims), layerParams.data, blobs, outShapes, &resp));
 
     if (canInfer) {
         ASSERT_EQ(int(OK), sts) << resp.msg;
@@ -33,7 +34,6 @@ TEST_P(BuiltInShapeInferImplTest, reshaper) {
     auto cnnNetworkImplPtr = buildSingleLayerNetwork<3>(type, inOutShapes, &layerParams.data, layerDataName);
     auto reshaper = std::make_shared<Reshaper>(*cnnNetworkImplPtr);
     auto inputShapes = setInputShapes(*cnnNetworkImplPtr.get(), newInOutShapes.inDims);
-
     if (canInfer) {
         reshaper->run(inputShapes);
         checkNetworkInOut(*cnnNetworkImplPtr, newInOutShapes);
@@ -63,6 +63,19 @@ INSTANTIATE_TEST_CASE_P(
 );
 
 INSTANTIATE_TEST_CASE_P(
+        BuiltInMultiImpls, BuiltInShapeInferImplTest,
+        ::testing::Combine(
+                ::testing::Values(LayerType("Mul"), LayerType("Eltwise"), LayerType("Add"), LayerType("Div")),
+                ::testing::Values(InOutShapes({{{1, 1, 1, 1}, {1, 1, 1, 1}},
+                                               {{1, 1, 1, 1}}})),
+                ::testing::Values(NewInOutShapes({{{1, 3, 228, 228}, {1, 3, 228, 228}},
+                                                  {{1, 3, 228, 228}}})),
+                ::testing::Values(MapParams(MapStrStr())),
+                ::testing::Values(LayerDataName("data")),
+                ::testing::Values(CanInfer(true)))
+);
+
+INSTANTIATE_TEST_CASE_P(
         BuiltInGeneralImpls, BuiltInShapeInferImplTest,
         ::testing::Values(
                 ::testing::make_tuple(LayerType("LRN"),
@@ -144,9 +157,9 @@ INSTANTIATE_TEST_CASE_P(
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("Reshape"),
-                                      InOutShapes({{{1, 1, 300, 4}},
+                                      InOutShapes({{{1,   1, 300, 4}},
                                                    {{300, 4}}}),
-                                      NewInOutShapes({{{1, 1, 500, 4}},
+                                      NewInOutShapes({{{1,   1, 500, 4}},
                                                       {{500, 4}}}),
                                       MapParams(MapStrStr(std::map<std::string, std::string>{{"dim", "-1,4"}})),
                                       LayerDataName("data"),
@@ -159,11 +172,11 @@ INSTANTIATE_TEST_CASE_P(
                                       MapParams(MapParams(MapStrStr())),
                                       LayerDataName("data"),
                                       CanInfer(true)),
-                ::testing::make_tuple(LayerType("PriorBoxClustered"),
-                                      InOutShapes({{{2, 1, 4, 5}},
+                ::testing::make_tuple(LayerType("PriorBoxClustered"), // TODO 5D test
+                                      InOutShapes({ {{2, 1, 4, 5}, {2, 4, 5, 6}},
                                                    {{1, 2, 400}}}),
-                                      NewInOutShapes({{{4, 1, 5, 5}},
-                                                      {{1, 2, 500}}}),
+                    NewInOutShapes({ {{4, 1, 5, 5}, {3, 5, 6, 3}},
+                                                      {{1, 2, 500}} }),
                                       MapParams(MapStrStr(
                                               std::map<std::string, std::string>{{"width",  "86.000000,13.000000,57.000000,39.000000,68.000000"},
                                                                                  {"clip",   "0"},
@@ -181,6 +194,7 @@ INSTANTIATE_TEST_CASE_P(
                                                                                  {"max_size",     "315"},
                                                                                  {"clip",         "0"},
                                                                                  {"flip",         "1"},
+                                                                                 { "offset",       "0.5" },
                                                                                  {"aspect_ratio", "2"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
@@ -188,16 +202,16 @@ INSTANTIATE_TEST_CASE_P(
                                       InOutShapes({{{2, 512, 32, 32}, {2, 3, 512, 512}},
                                                    {{1, 2,   16384}}}),
                                       NewInOutShapes({{{2, 512, 32, 32}, {2, 3, 512, 512}},
-                                                   {{1, 2,   16384}}}),
+                                                      {{1, 2,   16384}}}),
                                       MapParams(MapStrStr(
-                                          std::map<std::string, std::string>{{"min_size",     "35.84,52.46464"},
-                                                                             {"max_size",     ""},
-                                                                             {"clip",         "0"},
-                                                                             {"step",         "16"},
-                                                                             {"flip",         "0"},
-                                                                             {"offset",       "0.5"},
-                                                                             {"aspect_ratio", "1.0,2.0,0.5"},
-                                                                             {"scale_all_sizes", "0"}})),
+                                              std::map<std::string, std::string>{{"min_size",        "35.84,52.46464"},
+                                                                                 {"max_size",        ""},
+                                                                                 {"clip",            "0"},
+                                                                                 {"step",            "16"},
+                                                                                 {"flip",            "0"},
+                                                                                 {"offset",          "0.5"},
+                                                                                 {"aspect_ratio",    "1.0,2.0,0.5"},
+                                                                                 {"scale_all_sizes", "0"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("PriorBox"),
@@ -206,20 +220,20 @@ INSTANTIATE_TEST_CASE_P(
                                       NewInOutShapes({{{2, 512, 32, 32}, {2, 3, 512, 512}},
                                                       {{1, 2,   28672}}}),
                                       MapParams(MapStrStr(
-                                          std::map<std::string, std::string>{{"min_size",     "35.84,52.46464"},
-                                                                             {"max_size",     ""},
-                                                                             {"clip",         "0"},
-                                                                             {"step",         "16"},
-                                                                             {"offset",       "0.5"},
-                                                                             {"flip",         "1"},
-                                                                             {"aspect_ratio", "1.0,2.0,0.5"},
-                                                                             {"scale_all_sizes", "0"}})),
+                                              std::map<std::string, std::string>{{"min_size",        "35.84,52.46464"},
+                                                                                 {"max_size",        ""},
+                                                                                 {"clip",            "0"},
+                                                                                 {"step",            "16"},
+                                                                                 {"offset",          "0.5"},
+                                                                                 {"flip",            "1"},
+                                                                                 {"aspect_ratio",    "1.0,2.0,0.5"},
+                                                                                 {"scale_all_sizes", "0"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("DetectionOutput"),
-                                      InOutShapes({{{2, 1, 4,   5}},
+                                      InOutShapes({{{2, 1, 4,   5}, { 2, 1, 4,   5 }, { 2, 1, 4,   5 }},
                                                    {{2, 1, 200, 7}}}),
-                                      NewInOutShapes({{{4, 1, 5,   5}},
+                                      NewInOutShapes({{{4, 1, 5,   5}, { 4, 1, 5,   5 }, { 4, 1, 5,   5 }},
                                                       {{1, 1, 800, 7}}}),
                                       MapParams(MapStrStr(std::map<std::string, std::string>{{"keep_top_k",    "200"},
                                                                                              {"num_classes",   "21"},
@@ -227,52 +241,41 @@ INSTANTIATE_TEST_CASE_P(
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("Interp"),
-                                      InOutShapes({{{2, 2, 33,   65}},
+                                      InOutShapes({{{2, 2, 33,  65}},
                                                    {{2, 2, 257, 513}}}),
-                                      NewInOutShapes({{{2, 2, 33,   65}},
+                                      NewInOutShapes({{{2, 2, 33,  65}},
                                                       {{2, 2, 257, 513}}}),
                                       MapParams(MapStrStr(std::map<std::string, std::string>{{"align_corners", "1"},
-                                                                                             {"height", "257"},
-                                                                                             {"pad_beg", "0"},
-                                                                                             {"pad_end", "0"},
-                                                                                             {"width", "513"}})),
+                                                                                             {"height",        "257"},
+                                                                                             {"pad_beg",       "0"},
+                                                                                             {"pad_end",       "0"},
+                                                                                             {"width",         "513"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("Interp"),
-                                      InOutShapes({{{2, 2, 33,   65}},
+                                      InOutShapes({{{2, 2, 33, 65}},
                                                    {{2, 2, 66, 513}}}),
-                                      NewInOutShapes({{{2, 2, 33,   65}},
+                                      NewInOutShapes({{{2, 2, 33, 65}},
                                                       {{2, 2, 66, 513}}}),
                                       MapParams(MapStrStr(std::map<std::string, std::string>{{"align_corners", "1"},
-                                                                                             {"factor", "2"},
-                                                                                             {"width", "513"},
-                                                                                             {"pad_beg", "0"},
-                                                                                             {"pad_end", "0"}})),
+                                                                                             {"factor",        "2"},
+                                                                                             {"width",         "513"},
+                                                                                             {"pad_beg",       "0"},
+                                                                                             {"pad_end",       "0"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("Interp"),
-                                      InOutShapes({{{2, 2, 33,   65}},
+                                      InOutShapes({{{2, 2, 33,  65}},
                                                    {{2, 2, 257, 130}}}),
-                                      NewInOutShapes({{{2, 2, 33,   65}},
+                                      NewInOutShapes({{{2, 2, 33,  65}},
                                                       {{2, 2, 257, 130}}}),
                                       MapParams(MapStrStr(std::map<std::string, std::string>{{"align_corners", "1"},
-                                                                                             {"factor", "2"},
-                                                                                             {"height", "257"},
-                                                                                             {"pad_beg", "0"},
-                                                                                             {"pad_end", "0"}})),
+                                                                                             {"factor",        "2"},
+                                                                                             {"height",        "257"},
+                                                                                             {"pad_beg",       "0"},
+                                                                                             {"pad_end",       "0"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
-                ::testing::make_tuple(LayerType("Interp"),
-                                      InOutShapes({{{2, 2, 33,   65}},
-                                                   {{2, 2, 257, 130}}}),
-                                      NewInOutShapes({{{2, 2, 33,   65}},
-                                                      {{2, 2, 257, 130}}}),
-                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"align_corners", "1"},
-                                                                                             {"width", "513"},
-                                                                                             {"pad_beg", "0"},
-                                                                                             {"pad_end", "0"}})),
-                                      LayerDataName("data"),
-                                      CanInfer(false)),
                 ::testing::make_tuple(LayerType("ROIPooling"),
                                       InOutShapes({{{2,   3, 4, 5}, {150, 5}},
                                                    {{150, 3, 6, 6}}}),
@@ -292,7 +295,7 @@ INSTANTIATE_TEST_CASE_P(
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("PSROIPooling"),
-                                      InOutShapes({{{1,   3, 4, 5}, {150, 5}},
+                                      InOutShapes({{{1, 3, 4, 5}, {150, 5}},
                                                    {{150, 2, 6, 6}}}),
                                       NewInOutShapes({{{2,   1, 5, 5}, {200, 5}},
                                                       {{200, 2, 6, 6}}}),
@@ -385,14 +388,6 @@ INSTANTIATE_TEST_CASE_P(
                                                                                              {"out_sizes", "2,4"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
-                ::testing::make_tuple(LayerType("CTCGreedyDecoder"),
-                                      InOutShapes({{{88, 1,  48, 1}},
-                                                   {{1,  88, 1,  1}}}),
-                                      NewInOutShapes({{{88, 2,  48, 1}},
-                                                      {{2,  88, 1,  1}}}),
-                                      MapParams(MapStrStr()),
-                                      LayerDataName("data"),
-                                      CanInfer(true)),
                 ::testing::make_tuple(LayerType("Proposal"),
                                       InOutShapes({{{1,   12, 34, 62}, {1, 24, 34, 62}, {1, 6}},
                                                    {{200, 5}}}),
@@ -416,7 +411,9 @@ INSTANTIATE_TEST_CASE_P(
                                                    {{1, 21125}}}),
                                       NewInOutShapes({{{20, 125, 16, 13}},
                                                       {{20, 26000}}}),
-                                      MapParams(MapStrStr()),
+                                      MapParams(MapStrStr({{"axis",       "1"},
+                                                           {"end_axis",   "-1"},
+                                                           {"do_softmax", "1"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("ArgMax"),
@@ -535,14 +532,14 @@ INSTANTIATE_TEST_CASE_P(
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("Pad"),
-                                      InOutShapes({{{3, 3, 15, 10}},
+                                      InOutShapes({{{3, 3,  15, 10}},
                                                    {{9, 11, 25, 22}}}),
-                                      NewInOutShapes({{{4, 2, 20, 15}},
+                                      NewInOutShapes({{{4,  2,  20, 15}},
                                                       {{10, 10, 30, 27}}}),
-                                      MapParams(MapStrStr({{"pads_begin",  "1,2,3,4"},
-                                                           {"pads_end",    "5,6,7,8"},
-                                                           {"pad_mode",    "edge"},
-                                                           {"pad_value",   "1.0f"}})),
+                                      MapParams(MapStrStr({{"pads_begin", "1,2,3,4"},
+                                                           {"pads_end",   "5,6,7,8"},
+                                                           {"pad_mode",   "edge"},
+                                                           {"pad_value",  "1.0f"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("Pad"),
@@ -550,33 +547,34 @@ INSTANTIATE_TEST_CASE_P(
                                                    {{16, 18, 25, 22}}}),
                                       NewInOutShapes({{{20, 30, 40, 50}},
                                                       {{26, 38, 40, 50}}}),
-                                      MapParams(MapStrStr({{"pads_begin",  "1,2,0,0"},
-                                                           {"pads_end",    "5,6,0,0"},
-                                                           {"pad_mode",    "reflect"},
-                                                           {"pad_value",   "1.0f"}})),
+                                      MapParams(MapStrStr({{"pads_begin", "1,2,0,0"},
+                                                           {"pads_end",   "5,6,0,0"},
+                                                           {"pad_mode",   "reflect"},
+                                                           {"pad_value",  "1.0f"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("Pad"),
                                       InOutShapes({{{10, 10, 15, 10}},
                                                    {{16, 18, 25, 22}}}),
-                                      NewInOutShapes({{{4, 2, 20, 15}},
+                                      NewInOutShapes({{{4,  2,  20, 15}},
                                                       {{10, 10, 30, 27}}}),
-                                      MapParams(MapStrStr({{"pads_begin",  "1,2,3,4"},
-                                                           {"pads_end",    "5,6,7,8"},
-                                                           {"pad_mode",    "reflect"},
-                                                           {"pad_value",   "1.0f"}})),
+                                      MapParams(MapStrStr({{"pads_begin", "1,2,3,4"},
+                                                           {"pads_end",   "5,6,7,8"},
+                                                           {"pad_mode",   "reflect"},
+                                                           {"pad_value",  "1.0f"}})),
                                       LayerDataName("data"),
                                       CanInfer(false))
         )
 );
 
+// There are gtest limitation on tests number: 50
 INSTANTIATE_TEST_CASE_P(
         BuiltInGeneralImpls2, BuiltInShapeInferImplTest,
         ::testing::Values(
                 ::testing::make_tuple(LayerType("Gather"),
                                       InOutShapes({{{7, 16}, {1, 25}},
                                                    {{1, 25, 16}}}),
-                                      NewInOutShapes({{{7, 16}, {12, 25}},
+                                      NewInOutShapes({{{7,  16}, {12, 25}},
                                                       {{12, 25, 16}}}),
                                       MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "0"}})),
                                       LayerDataName("data"),
@@ -597,12 +595,29 @@ INSTANTIATE_TEST_CASE_P(
                                       MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "-1"}})),
                                       LayerDataName("data"),
                                       CanInfer(true)),
+                ::testing::make_tuple(LayerType("CTCGreedyDecoder"),
+                                      InOutShapes({{{88, 1,  48, 1}},
+                                                   {{1,  88, 1,  1}}}),
+                                      NewInOutShapes({{{88, 2,  48, 1}},
+                                                      {{2,  88, 1,  1}}}),
+                                      MapParams(MapStrStr()),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("CTCGreedyDecoder"),
+                                      InOutShapes({{{88, 1, 71}, {88, 1}},
+                                                   {{1,  88, 1, 1}}}),
+                                      NewInOutShapes({{{88, 2, 71}, {88, 2}},
+                                                      {{2,  88, 1,  1}}}),
+                                      MapParams(MapStrStr()),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
                 ::testing::make_tuple(LayerType("Reshape"),
                                       InOutShapes({{{1, 2}},
                                                    {{1, 1}}}),
                                       NewInOutShapes({{{1, 2}},
                                                       {{1, 1}}}),
-                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"dim", "1,1"}})),  // dim doesn't match input
+                                      MapParams(MapStrStr(
+                                              std::map<std::string, std::string>{{"dim", "1,1"}})),  // dim doesn't match input
                                       LayerDataName("data"),
                                       CanInfer(false)),
                 ::testing::make_tuple(LayerType("Flatten"),
@@ -610,7 +625,7 @@ INSTANTIATE_TEST_CASE_P(
                                                    {{40}}}),
                                       NewInOutShapes({{{4, 1, 4, 5}},
                                                       {{80}}}),
-                                      MapParams(MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "0"},
+                                      MapParams(MapParams(MapStrStr(std::map<std::string, std::string>{{"axis",     "0"},
                                                                                                        {"end_axis", "-1"}}))),
                                       LayerDataName("data"),
                                       CanInfer(true)),
@@ -619,7 +634,7 @@ INSTANTIATE_TEST_CASE_P(
                                                    {{2, 8, 5}}}),
                                       NewInOutShapes({{{4, 2, 4, 5}},
                                                       {{4, 8, 5}}}),
-                                      MapParams(MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "1"},
+                                      MapParams(MapParams(MapStrStr(std::map<std::string, std::string>{{"axis",     "1"},
                                                                                                        {"end_axis", "2"}}))),
                                       LayerDataName("data"),
                                       CanInfer(true)),
@@ -628,7 +643,8 @@ INSTANTIATE_TEST_CASE_P(
                                                    {{2, 40}}}),
                                       NewInOutShapes({{{4, 2, 4, 5}},
                                                       {{4, 40}}}),
-                                      MapParams(MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "1"}}))),
+                                      MapParams(
+                                              MapParams(MapStrStr(std::map<std::string, std::string>{{"axis", "1"}}))),
                                       LayerDataName("data"),
                                       CanInfer(true)),
                 ::testing::make_tuple(LayerType("Flatten"),
@@ -636,7 +652,114 @@ INSTANTIATE_TEST_CASE_P(
                                                    {{4, 4, 5}}}),
                                       NewInOutShapes({{{4, 2, 4, 5}},
                                                       {{8, 4, 5}}}),
-                                      MapParams(MapParams(MapStrStr(std::map<std::string, std::string>{{"end_axis", "1"}}))),
+                                      MapParams(MapParams(
+                                              MapStrStr(std::map<std::string, std::string>{{"end_axis", "1"}}))),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("Interp"),
+                                      InOutShapes({{{2, 2, 100, 16}},
+                                                   {{2, 2, 25,  4}}}),
+                                      NewInOutShapes({{{2, 2, 201, 33}},
+                                                      {{2, 2, 50,  8}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"align_corners", "1"},
+                                                                                             {"factor",        "0.25"},
+                                                                                             {"pad_beg",       "0"},
+                                                                                             {"pad_end",       "0"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("Interp"),
+                                      InOutShapes({{{2, 2, 100, 16}},
+                                                   {{2, 2, 100, 16}}}),
+                                      NewInOutShapes({{{2, 2, 101, 33}},
+                                                      {{2, 2, 101, 33}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"align_corners", "1"},
+                                                                                             {"shrink_factor", "1.5"},
+                                                                                             {"zoom_factor",   "1.5"},
+                                                                                             {"pad_beg",       "0"},
+                                                                                             {"pad_end",       "0"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("ShuffleChannels"),
+                                      InOutShapes({{{1, 2, 3, 4}},
+                                                   {{1, 2, 3, 4}}}),
+                                      NewInOutShapes({{{2, 4, 4, 7}},
+                                                      {{2, 4, 4, 7}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"axis",  "1"},
+                                                                                             {"group", "2"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("DepthToSpace"),
+                                      InOutShapes({{{4, 2, 3}},
+                                                   {{1, 4, 6}}}),
+                                      NewInOutShapes({{{8, 3, 4}},
+                                                      {{2, 6, 8}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"block_size", "2"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("SpaceToDepth"),
+                                      InOutShapes({ { { 1, 4, 6 } },
+                                                    { { 4, 2, 3 } } }),
+                                      NewInOutShapes({ { { 2, 6, 8 } },
+                                                       { { 8, 3, 4 } } }),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{ {"block_size", "2"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("ReverseSequence"),
+                                      InOutShapes({{{3, 4, 5}, {3}},
+                                                   {{3, 4, 5}}}),
+                                      NewInOutShapes({{{4, 8, 9}, {4}},
+                                                      {{4, 8, 9}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"seq_axis",   "1"},
+                                                                                             {"batch_axis", "0"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("RegionYolo"),
+                                      InOutShapes({{{1,       125, 13, 13}},
+                                                   {{1 * 125, 13,  13}}}),
+                                      NewInOutShapes({{{20,       125, 16, 13}},
+                                                      {{20 * 125, 16,  13}}}),
+                                      MapParams(MapStrStr({{"axis",       "0"},
+                                                           {"end_axis",   "1"},
+                                                           {"do_softmax", "1"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("RegionYolo"),
+                                      InOutShapes({{{1,            125, 13, 13}},
+                                                   {{1 * 125 * 13, 13}}}),
+                                      NewInOutShapes({{{20,            125, 16, 13}},
+                                                      {{20 * 125 * 16, 13}}}),
+                                      MapParams(MapStrStr({{"axis",       "0"},
+                                                           {"end_axis",   "2"},
+                                                           {"do_softmax", "1"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("RegionYolo"),
+                                      InOutShapes({{{1, 125,                13, 13}},
+                                                   {{1, (80 + 4 + 1) * 125, 13, 13}}}),
+                                      NewInOutShapes({{{20, 125,                16, 13}},
+                                                      {{20, (80 + 4 + 1) * 3, 16, 13}}}),
+                                      MapParams(MapStrStr({{"axis",       "1"},
+                                                           {"end_axis",   "-1"},
+                                                           {"do_softmax", "0"},
+                                                           {"classes",    "80"},
+                                                           {"coords",     "4"},
+                                                           {"mask",       "6,7,8"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("Upsampling"),
+                                      InOutShapes({{{1, 3, 4, 5, 6}},
+                                                   {{1, 3, 8, 10, 12}}}),
+                                      NewInOutShapes({{{2, 1, 7, 5, 5}},
+                                                      {{2, 1, 14, 10, 10}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{{"scale", "2"}})),
+                                      LayerDataName("data"),
+                                      CanInfer(true)),
+                ::testing::make_tuple(LayerType("Quantize"),
+                                      InOutShapes({{{1, 64, 10, 10}, {1, 64, 1, 1}, {1, 64, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}},
+                                                   {{1, 64, 10, 10}}}),
+                                      NewInOutShapes({{{2, 128, 10, 10}, {1, 128, 1, 1}, {1, 128, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}},
+                                                      {{2, 128, 10, 10}}}),
+                                      MapParams(MapStrStr(std::map<std::string, std::string>{ {"levels", "2"}})),
                                       LayerDataName("data"),
                                       CanInfer(true))
         )
@@ -668,3 +791,4 @@ INSTANTIATE_TEST_CASE_P(
                 ::testing::Values(CanInfer())
         )
 );
+
diff --git a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_general_test.hpp b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_general_test.hpp
index 5eac622be..89f7b5a72 100644
--- a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_general_test.hpp
+++ b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_general_test.hpp
@@ -1,55 +1,54 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
 #pragma once
 
 #include <gtest/gtest.h>
+#include <inference_engine/blob_factory.hpp>
 #include <inference_engine/shape_infer/built-in/ie_built_in_holder.hpp>
 #include <utility>
 #include <inference_engine/ie_format_parser.h>
 #include <xml_helper.hpp>
 #include <xml_net_builder.hpp>
 #include <single_layer_common.hpp>
+#include <tests_common.hpp>
 
-class BaseTestCreator {
-protected:
-    std::string _type;
-public:
-    explicit BaseTestCreator(const std::string &type) : _type(type) {}
-
-    virtual InferenceEngine::CNNLayerPtr create(const std::string &type) = 0;
-
-    virtual bool shouldCreate(const std::string &type) = 0;
-};
-
-template<class LT>
-class LayerTestCreator : public BaseTestCreator {
-public:
-    explicit LayerTestCreator(const std::string &type) : BaseTestCreator(type) {}
+namespace IE = InferenceEngine;
 
-    InferenceEngine::CNNLayerPtr create(const std::string &type) override {
-        InferenceEngine::LayerParams params;
-        params.type = type;
-        return std::make_shared<LT>(params);
+struct param_size {
+    // dimensions order: x, y, z, ...
+    std::vector<unsigned> dims;
+    param_size() {}
+//    param_size(const std::vector<unsigned>& dims) {
+//        this->dims = dims;
+//    }
+    param_size(std::initializer_list<unsigned> dims) {
+        this->dims = dims;
     }
-
-    bool shouldCreate(const std::string &type) override {
-        return type == _type;
+    bool empty() {
+        return dims.empty();
     }
-};
-
-struct param_size {
-    unsigned x;
-    unsigned y;
 
     friend std::ostream &operator<<(std::ostream &os, param_size const &paramSize) {
-        os << "x=" << std::to_string(paramSize.x) << ", y=" << std::to_string(paramSize.y);
+        auto d_size = paramSize.dims.size();
+        if (d_size > 0) {
+            os << "dims[" << std::to_string(0) << "]=" << std::to_string(paramSize.dims[0]);
+            for (int i = 1; i < paramSize.dims.size(); i++)
+                os << ", dims[" << std::to_string(i) << "]=" << std::to_string(paramSize.dims[i]);
+        }
         return os;
     };
 
     std::string toSeparetedRow(const char *separator) {
-        std::string res = std::to_string(y) + separator + std::to_string(x);
+        auto d_size = dims.size();
+        std::string res;
+        if (d_size > 0) {
+            res = std::to_string(dims[d_size - 1]);
+            for (int i = d_size - 2; i >= 0; i--) {
+                res += separator + std::to_string(dims[i]);
+            }
+        }
         return res;
     }
 };
@@ -60,7 +59,7 @@ PRETTY_PARAM(stride, param_size);
 
 PRETTY_PARAM(pad, param_size);
 
-PRETTY_PARAM(padrb, param_size);
+PRETTY_PARAM(pad_end, param_size);
 
 PRETTY_PARAM(auto_pad, std::string);
 
@@ -78,9 +77,9 @@ PRETTY_PARAM(LayerType, std::string)
 
 PRETTY_PARAM(LayerDataName, std::string)
 
-PRETTY_PARAM(InOutShapes, testing::InOutData)
+PRETTY_PARAM(InOutShapes, testing::InOutShapes)
 
-PRETTY_PARAM(NewInOutShapes, testing::InOutData)
+PRETTY_PARAM(NewInOutShapes, testing::InOutShapes)
 
 PRETTY_PARAM(MapParams, MapStrStr)
 
@@ -94,107 +93,45 @@ PRETTY_PARAM(ModelPath, std::string);
 
 static size_t BATCH = 100;
 
-class BuiltInShapeInferCommon : public ::testing::Test {
+class BuiltInShapeInferCommon : public TestsCommon {
 protected:
     void SetUp() override {
-        holder = std::make_shared<InferenceEngine::ShapeInfer::BuiltInShapeInferHolder>();
+        holder = std::make_shared<IE::ShapeInfer::BuiltInShapeInferHolder>();
     }
 
-    InferenceEngine::IShapeInferImpl::Ptr getShapeInferImpl(const std::string &type) {
-        InferenceEngine::IShapeInferImpl::Ptr impl;
+    IE::IShapeInferImpl::Ptr getShapeInferImpl(const std::string &type) {
+        IE::IShapeInferImpl::Ptr impl;
         sts = holder->getShapeInferImpl(impl, type.c_str(), &resp);
-        if (sts != InferenceEngine::StatusCode::OK) THROW_IE_EXCEPTION << resp.msg;
+        if (sts != IE::StatusCode::OK) THROW_IE_EXCEPTION << resp.msg;
         return impl;
     }
 
 protected:
-    InferenceEngine::StatusCode sts = InferenceEngine::StatusCode::GENERAL_ERROR;
-    InferenceEngine::ResponseDesc resp;
-    std::shared_ptr<InferenceEngine::IShapeInferExtension> holder;
+    IE::StatusCode sts = IE::StatusCode::GENERAL_ERROR;
+    IE::ResponseDesc resp;
+    std::shared_ptr<IE::IShapeInferExtension> holder;
 };
 
 template<class T>
 class BuiltInShapeInferTestWithParam : public BuiltInShapeInferCommon,
                                        public testing::WithParamInterface<T> {
-    const std::vector<std::shared_ptr<BaseTestCreator>> &getCreators() const {
-        // there should be unique_ptr but it cant be used with initializer lists
-        static std::vector<std::shared_ptr<BaseTestCreator> > creators = {
-                std::make_shared<LayerTestCreator<InferenceEngine::PowerLayer>>("Power"),
-                std::make_shared<LayerTestCreator<InferenceEngine::ConvolutionLayer>>("Convolution"),
-                std::make_shared<LayerTestCreator<InferenceEngine::DeconvolutionLayer>>("Deconvolution"),
-                std::make_shared<LayerTestCreator<InferenceEngine::PoolingLayer>>("Pooling"),
-                std::make_shared<LayerTestCreator<InferenceEngine::FullyConnectedLayer>>("InnerProduct"),
-                std::make_shared<LayerTestCreator<InferenceEngine::FullyConnectedLayer>>("FullyConnected"),
-                std::make_shared<LayerTestCreator<InferenceEngine::NormLayer>>("LRN"),
-                std::make_shared<LayerTestCreator<InferenceEngine::NormLayer>>("Norm"),
-                std::make_shared<LayerTestCreator<InferenceEngine::SoftMaxLayer>>("Softmax"),
-                std::make_shared<LayerTestCreator<InferenceEngine::SoftMaxLayer>>("SoftMax"),
-                std::make_shared<LayerTestCreator<InferenceEngine::GRNLayer>>("GRN"),
-                std::make_shared<LayerTestCreator<InferenceEngine::MVNLayer>>("MVN"),
-                std::make_shared<LayerTestCreator<InferenceEngine::ReLULayer>>("ReLU"),
-                std::make_shared<LayerTestCreator<InferenceEngine::ClampLayer>>("Clamp"),
-                std::make_shared<LayerTestCreator<InferenceEngine::SplitLayer>>("Split"),
-                std::make_shared<LayerTestCreator<InferenceEngine::SplitLayer>>("Slice"),
-                std::make_shared<LayerTestCreator<InferenceEngine::ConcatLayer>>("Concat"),
-                std::make_shared<LayerTestCreator<InferenceEngine::EltwiseLayer>>("Eltwise"),
-                std::make_shared<LayerTestCreator<InferenceEngine::ScaleShiftLayer>>("ScaleShift"),
-                std::make_shared<LayerTestCreator<InferenceEngine::PReLULayer>>("PReLU"),
-                std::make_shared<LayerTestCreator<InferenceEngine::CropLayer>>("Crop"),
-                std::make_shared<LayerTestCreator<InferenceEngine::ReshapeLayer>>("Reshape"),
-                std::make_shared<LayerTestCreator<InferenceEngine::TileLayer>>("Tile"),
-                std::make_shared<LayerTestCreator<InferenceEngine::BatchNormalizationLayer>>("BatchNormalization"),
-                std::make_shared<LayerTestCreator<InferenceEngine::GemmLayer>>("Gemm"),
-                std::make_shared<LayerTestCreator<InferenceEngine::PadLayer>>("Pad"),
-                std::make_shared<LayerTestCreator<InferenceEngine::GatherLayer>>("Gather")
-        };
-        return creators;
-    }
 
 protected:
-    InferenceEngine::DataPtr
-    getNotEmptyData(std::string const &name = "", const InferenceEngine::SizeVector &dims = {}) {
-        InferenceEngine::TensorDesc desc(InferenceEngine::Precision::UNSPECIFIED, dims,
-                                         InferenceEngine::TensorDesc::getLayoutByDims(dims));
-        return std::make_shared<InferenceEngine::Data>(name, desc);
-    }
-
-    InferenceEngine::CNNLayer::Ptr createLayer(const std::string &type) const {
-        for (auto &creator : getCreators()) {
-            if (!creator->shouldCreate(type))
-                continue;
-            return creator->create(type);
-        }
-        static LayerTestCreator<InferenceEngine::GenericLayer> genericCreator("");
-        return genericCreator.create(type);
-    }
-
-    void initLayer(const InferenceEngine::CNNLayerPtr &layer, const testing::InOutData &inOutData) {
-        for (const auto &in:inOutData.inDims) {
-            auto data = getNotEmptyData("", in);
-            _savedData.push_back(data);
-            layer->insData.push_back(data);
-        }
-        for (const auto &out:inOutData.outDims) {
-            layer->outData.push_back(getNotEmptyData("", out));
-        }
-    }
-
-    static testing::InOutData getFakeData(const testing::InOutData &inOutShapes) {
-        testing::InOutData initial = inOutShapes;
-        for (auto &dims : initial.inDims) {
-            std::fill(dims.begin(), dims.end(), 1);
-        }
-        for (auto &dims : initial.outDims) {
-            std::fill(dims.begin(), dims.end(), 1);
+    static std::vector<IE::Blob::CPtr> getBlobs(const std::vector<IE::SizeVector>& shapes) {
+        std::vector<IE::Blob::CPtr> inBlobs;
+        for (auto const& dims : shapes) {
+            IE::TensorDesc desc(IE::Precision::FP32, dims, IE::TensorDesc::getLayoutByDims(dims));
+            auto blob = make_blob_with_precision(desc);
+            inBlobs.push_back(blob);
         }
-        return initial;
+        return inBlobs;
     }
 
-    static InferenceEngine::ICNNNetwork::InputShapes
-    setInputShapes(const InferenceEngine::ICNNNetwork &cnnNetwork,
-                   const std::vector<InferenceEngine::SizeVector> &shapesToSet) {
-        InferenceEngine::ICNNNetwork::InputShapes inputShapes;
-        InferenceEngine::InputsDataMap inputs;
+    static IE::ICNNNetwork::InputShapes
+    setInputShapes(const IE::ICNNNetwork &cnnNetwork,
+                   const std::vector<IE::SizeVector> &shapesToSet) {
+        IE::ICNNNetwork::InputShapes inputShapes;
+        IE::InputsDataMap inputs;
         cnnNetwork.getInputsInfo(inputs);
         for (const auto &pair : inputs) {
             auto info = pair.second;
@@ -212,10 +149,10 @@ protected:
         return inputShapes;
     }
 
-    static void checkNetworkInOut(const InferenceEngine::ICNNNetwork &network,
-                                  const testing::InOutData &inOutData) {
-        InferenceEngine::InputsDataMap inputsDataMap;
-        InferenceEngine::OutputsDataMap outputsDataMap;
+    static void checkNetworkInOut(const IE::ICNNNetwork &network,
+                                  const testing::InOutShapes &inOutData) {
+        IE::InputsDataMap inputsDataMap;
+        IE::OutputsDataMap outputsDataMap;
         network.getInputsInfo(inputsDataMap);
         network.getOutputsInfo(outputsDataMap);
         int i = 0;
@@ -229,20 +166,19 @@ protected:
     }
 
     template<int Version = 3>
-    static InferenceEngine::details::CNNNetworkImplPtr
+    static IE::details::CNNNetworkImplPtr
     buildSingleLayerNetwork(const std::string &layerType,
-                            const testing::InOutData &inOutShapes,
+                            const testing::InOutShapes &inOutShapes,
                             std::map<std::string, std::string> *params,
                             const std::string &layerDataName = "data") {
-        auto *parser = new InferenceEngine::details::FormatParser(Version);
+        auto *parser = new IE::details::FormatParser(Version);
         return buildSingleLayerNetworkCommon<Version>(parser, layerType, inOutShapes, params, layerDataName);
     }
 
 protected:
-    std::vector<InferenceEngine::SizeVector> outShapes;
+    std::vector<IE::SizeVector> outShapes;
     std::map<std::string, std::string> params;
-    std::map<std::string, InferenceEngine::Blob::Ptr> blobs;
-    std::vector<InferenceEngine::DataPtr> _savedData;
+    std::map<std::string, IE::Blob::Ptr> blobs;
 };
 
 class BuiltInShapeInferImplTest
@@ -261,8 +197,8 @@ protected:
 
 protected:
     std::string type;
-    testing::InOutData inOutShapes;
-    testing::InOutData newInOutShapes;
+    testing::InOutShapes inOutShapes;
+    testing::InOutShapes newInOutShapes;
     MapStrStr layerParams;
     std::string layerDataName;
     bool canInfer{};
diff --git a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_pool_test.cpp b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_pool_test.cpp
index 487ff8469..de82eb4d9 100644
--- a/inference-engine/tests/unit/shape_infer/built_in_shape_infer_pool_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/built_in_shape_infer_pool_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -15,7 +15,7 @@ using namespace InferenceEngine;
 using namespace ShapeInfer;
 
 class BuiltInShapeInferPoolImplTest
-        : public BuiltInShapeInferTestWithParam<std::tuple<InOutShapes, kernel, stride, pad, pool_type, exclude_pad, auto_pad, NewInOutShapes, padrb>> {
+        : public BuiltInShapeInferTestWithParam<std::tuple<InOutShapes, kernel, stride, pad, pool_type, exclude_pad, auto_pad, NewInOutShapes, pad_end>> {
 protected:
     void SetUp() override {
         BuiltInShapeInferCommon::SetUp();
@@ -28,27 +28,10 @@ protected:
         exclude_pad = std::get<5>(params);
         auto_pad = std::get<6>(params);
         newInOutShapes = std::get<7>(params);
-        padrb = std::get<8>(params);
+        pad_end = std::get<8>(params);
     }
 
     std::map<std::string, std::string> getMapParams() {
-        std::map<std::string, std::string> params{
-                {"kernel-x",    std::to_string(kernel.x)},
-                {"kernel-y",    std::to_string(kernel.y)},
-                {"stride-x",    std::to_string(stride.x)},
-                {"stride-y",    std::to_string(stride.y)},
-                {"pad-x",       std::to_string(pad.x)},
-                {"pad-y",       std::to_string(pad.y)},
-                {"pool-method", pool_type},
-                {"exclude-pad", exclude_pad ? "false" : "true"},
-        };
-        if (!auto_pad.empty()) params["auto_pad"] = auto_pad;
-        if (padrb.x) params["pad-r"] = std::to_string(padrb.x);
-        if (padrb.y) params["pad-b"] = std::to_string(padrb.y);
-        return params;
-    }
-
-    std::map<std::string, std::string> getMapParams_IRv3() {
         std::map<std::string, std::string> params = {
                 {"kernel",      kernel.toSeparetedRow(",")},
                 {"strides",     stride.toSeparetedRow(",")},
@@ -57,36 +40,34 @@ protected:
                 {"exclude-pad", exclude_pad ? "false" : "true"}
         };
         if (!auto_pad.empty()) params["auto_pad"] = auto_pad;
-        if (padrb.x != 0 && padrb.y != 0) {
-            params["pads_end"] = padrb.toSeparetedRow(",");
-        }
+        if (!pad_end.empty()) params["pads_end"] = pad_end.toSeparetedRow(",");
         return params;
     }
 
 protected:
     std::string type = "Pooling";
-    testing::InOutData inOutShapes;
-    testing::InOutData newInOutShapes;
+    testing::InOutShapes inOutShapes;
+    testing::InOutShapes newInOutShapes;
     param_size kernel;
     param_size stride;
     param_size pad;
     std::string pool_type;
     bool exclude_pad;
     std::string auto_pad;
-    param_size padrb;
+    param_size pad_end;
 };
 
 TEST_P(BuiltInShapeInferPoolImplTest, body) {
     auto impl = getShapeInferImpl(type);
     ASSERT_NE(nullptr, impl);
-    ASSERT_NO_THROW(sts = impl->inferShapes(inOutShapes.inDims, getMapParams(), blobs, outShapes, &resp));
+    ASSERT_NO_THROW(sts = impl->inferShapes(getBlobs(inOutShapes.inDims), getMapParams(), blobs, outShapes, &resp));
     ASSERT_EQ(int(OK), sts) << resp.msg;
     ASSERT_EQ(inOutShapes.outDims, outShapes);
 }
 
 TEST_P(BuiltInShapeInferPoolImplTest, reshaper) {
     auto layerParams = getMapParams();
-    auto cnnNetworkImplPtr = buildSingleLayerNetwork<2>(type, inOutShapes, &layerParams, "pooling_data");
+    auto cnnNetworkImplPtr = buildSingleLayerNetwork<4>(type, inOutShapes, &layerParams, "pooling_data");
     auto reshaper = std::make_shared<Reshaper>(*cnnNetworkImplPtr);
     auto inputShapes = setInputShapes(*cnnNetworkImplPtr, newInOutShapes.inDims);
     reshaper->run(inputShapes);
@@ -95,34 +76,7 @@ TEST_P(BuiltInShapeInferPoolImplTest, reshaper) {
 
 TEST_P(BuiltInShapeInferPoolImplTest, batch) {
     auto layerParams = getMapParams();
-    auto cnnNetworkImplPtr = buildSingleLayerNetwork<2>(type, inOutShapes, &layerParams, "pooling_data");
-    auto reshaper = std::make_shared<Reshaper>(*cnnNetworkImplPtr);
-    sts = cnnNetworkImplPtr->setBatchSize(BATCH, &resp);
-    ASSERT_EQ((int)OK, sts) << resp.msg;
-    inOutShapes.inDims[0][0] = inOutShapes.outDims[0][0] = BATCH;
-    checkNetworkInOut(*cnnNetworkImplPtr, inOutShapes);
-}
-
-TEST_P(BuiltInShapeInferPoolImplTest, body_IRv3) {
-    auto impl = getShapeInferImpl(type);
-    ASSERT_NE(nullptr, impl);
-    ASSERT_NO_THROW(sts = impl->inferShapes(inOutShapes.inDims, getMapParams_IRv3(), blobs, outShapes, &resp));
-    ASSERT_EQ(int(OK), sts) << resp.msg;
-    ASSERT_EQ(inOutShapes.outDims, outShapes);
-}
-
-TEST_P(BuiltInShapeInferPoolImplTest, reshaper_IRv3) {
-    auto layerParams = getMapParams_IRv3();
-    auto cnnNetworkImplPtr = buildSingleLayerNetwork<3>(type, inOutShapes, &layerParams, "pooling_data");
-    auto reshaper = std::make_shared<Reshaper>(*cnnNetworkImplPtr);
-    auto inputShapes = setInputShapes(*cnnNetworkImplPtr, newInOutShapes.inDims);
-    reshaper->run(inputShapes);
-    checkNetworkInOut(*cnnNetworkImplPtr, newInOutShapes);
-}
-
-TEST_P(BuiltInShapeInferPoolImplTest, batch_IRv3) {
-    auto layerParams = getMapParams_IRv3();
-    auto cnnNetworkImplPtr = buildSingleLayerNetwork<3>(type, inOutShapes, &layerParams, "pooling_data");
+    auto cnnNetworkImplPtr = buildSingleLayerNetwork<4>(type, inOutShapes, &layerParams, "pooling_data");
     auto reshaper = std::make_shared<Reshaper>(*cnnNetworkImplPtr);
     sts = cnnNetworkImplPtr->setBatchSize(BATCH, &resp);
     ASSERT_EQ((int)OK, sts) << resp.msg;
@@ -138,48 +92,67 @@ INSTANTIATE_TEST_CASE_P(
                                                    {{4, 3, 229, 115}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 1}), pool_type("max"), exclude_pad(true), auto_pad(""),
                                       NewInOutShapes({{{1, 3, 228, 228}},
-                                                      {{1, 3, 229, 115}}}), padrb({0, 0})),
+                                                      {{1, 3, 229, 115}}}), pad_end()),
                 // fixate pad + right/bottom
                 ::testing::make_tuple(InOutShapes({{{4, 3, 228, 228}},
                                                    {{4, 3, 229, 115}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 1}), pool_type("max"), exclude_pad(true), auto_pad(""),
                                       NewInOutShapes({{{1, 3, 228, 228}},
-                                                      {{1, 3, 229, 115}}}), padrb({3, 2})),
+                                                      {{1, 3, 229, 115}}}), pad_end({3, 2})),
                 // valid + empty paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3, 228, 228}},
                                                    {{4, 3, 227, 113}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({0, 0}), pool_type("max"), exclude_pad(true), auto_pad("valid"),
                                       NewInOutShapes({{{1, 3, 228, 228}},
-                                                      {{1, 3, 227, 113}}}), padrb({0, 0})),
+                                                      {{1, 3, 227, 113}}}), pad_end()),
                 // valid + fixated paddings (shouldn't affect)
                 ::testing::make_tuple(InOutShapes({{{4, 3, 228, 228}},
                                                    {{4, 3, 227, 113}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 4}), pool_type("max"), exclude_pad(true), auto_pad("valid"),
                                       NewInOutShapes({{{1, 3, 228, 228}},
-                                                      {{1, 3, 227, 113}}}), padrb({2, 1})),
+                                                      {{1, 3, 227, 113}}}), pad_end({2, 1})),
                 // same_upper + empty paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3, 227, 227}},
                                                    {{4, 3, 227, 114}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({0, 0}), pool_type("max"), exclude_pad(true), auto_pad("same_upper"),
                                       NewInOutShapes({{{1, 3, 227, 227}},
-                                                      {{1, 3, 227, 114}}}), padrb({0, 0})),
+                                                      {{1, 3, 227, 114}}}), pad_end()),
                 // same_upper + fixated paddings (shouldn't affect)
                 ::testing::make_tuple(InOutShapes({{{4, 3, 227, 227}},
                                                    {{4, 3, 227, 114}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 4}), pool_type("max"), exclude_pad(true), auto_pad("same_upper"),
                                       NewInOutShapes({{{1, 3, 227, 227}},
-                                                      {{1, 3, 227, 114}}}), padrb({0, 0})),
+                                                      {{1, 3, 227, 114}}}), pad_end({0, 0})),
                 // same_lower + empty paddings
                 ::testing::make_tuple(InOutShapes({{{4, 3, 227, 227}},
                                                    {{4, 3, 227, 113}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({0, 0}), pool_type("max"), exclude_pad(true), auto_pad("same_lower"),
                                       NewInOutShapes({{{1, 3, 227, 227}},
-                                                      {{1, 3, 227, 113}}}), padrb({0, 0})),
+                                                      {{1, 3, 227, 113}}}), pad_end({0, 0})),
                 // same_lower + fixated paddings (shouldn't affect)
                 ::testing::make_tuple(InOutShapes({{{4, 3, 227, 227}},
                                                    {{4, 3, 227, 113}}}), kernel({4, 2}), stride({2, 1}),
                                       pad({2, 4}), pool_type("max"), exclude_pad(true), auto_pad("same_lower"),
                                       NewInOutShapes({{{1, 3, 227, 227}},
-                                                      {{1, 3, 227, 113}}}), padrb({0, 0}))
+                                                      {{1, 3, 227, 113}}}), pad_end({0, 0})),
+                // 5D tensors
+                // fixate pad
+                ::testing::make_tuple(InOutShapes({{{4, 3, 16, 128, 130}},
+                                                   {{4, 3, 17, 129, 66}}}), kernel({4, 2, 2}), stride({2, 1, 1}),
+                                      pad({2, 1, 1}), pool_type("max"), exclude_pad(true), auto_pad(""),
+                                      NewInOutShapes({{{1, 3, 16, 128, 130}},
+                                                      {{1, 3, 17, 129, 66}}}), pad_end()),
+                // valid + empty paddings
+                ::testing::make_tuple(InOutShapes({{{4, 3, 16, 128, 130}},
+                                                   {{4, 3, 15, 127, 64}}}), kernel({4, 2, 2}), stride({2, 1, 1}),
+                                      pad({0, 0, 0}), pool_type("max"), exclude_pad(true), auto_pad("valid"),
+                                      NewInOutShapes({{{1, 3, 16, 128, 130}},
+                                                      {{1, 3, 15, 127, 64}}}), pad_end()),
+                // same_upper + empty paddings
+                ::testing::make_tuple(InOutShapes({{{4, 3, 16, 128, 130}},
+                                                   {{4, 3, 16, 128, 65}}}), kernel({4, 2, 2}), stride({2, 1, 1}),
+                                      pad({0, 0, 0}), pool_type("max"), exclude_pad(true), auto_pad("same_upper"),
+                                      NewInOutShapes({{{1, 3, 16, 128, 130}},
+                                                      {{1, 3, 16, 128, 65}}}), pad_end())
         )
 );
diff --git a/inference-engine/tests/unit/shape_infer/cpu_ext_shape_infer_general_test.cpp b/inference-engine/tests/unit/shape_infer/cpu_ext_shape_infer_general_test.cpp
deleted file mode 100644
index 4551dd766..000000000
--- a/inference-engine/tests/unit/shape_infer/cpu_ext_shape_infer_general_test.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (C) 2018 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#include <gtest/gtest.h>
-#include <extension/ext_list.hpp>
-#include <xml_net_builder.hpp>
-#include <inference_engine/cnn_network_impl.hpp>
-#include <inference_engine/shape_infer/ie_reshaper.hpp>
-#include <cpp/ie_cnn_net_reader.h>
-#include <test_model_path.hpp>
-#include <inference_engine/debug.h>
-#include <ie_extension.h>
-#include <tests_common.hpp>
-#include "built_in_shape_infer_general_test.hpp"
-
-using namespace InferenceEngine;
-using namespace InferenceEngine::details;
-using namespace ShapeInfer;
-
-class CPUExtShapeInferTests : public BuiltInShapeInferImplTest {
-protected:
-    InferenceEngine::ShapeInferExtension shapeInferExt;
-    CPUExtShapeInferTests () : shapeInferExt(TestsCommon::make_so_name("cpu_extension")) {}
-
-    void SetUp() override {
-        BuiltInShapeInferImplTest::SetUp();
-        holder = std::shared_ptr<IShapeInferExtension>(&shapeInferExt, [](IShapeInferExtension*){});
-    }
-};
-
-TEST_P(CPUExtShapeInferTests, impl) {
-    auto impl = getShapeInferImpl(type);
-    ASSERT_NE(nullptr, impl);
-    ASSERT_NO_THROW(sts = impl->inferShapes(newInOutShapes.inDims, layerParams.data, blobs, outShapes, &resp));
-
-    if (canInfer) {
-        ASSERT_EQ(int(OK), sts) << resp.msg;
-        ASSERT_EQ(newInOutShapes.outDims, outShapes);
-    } else {
-        ASSERT_EQ(GENERAL_ERROR, sts) << resp.msg;
-    }
-}
-
-TEST_P(CPUExtShapeInferTests, reshaper) {
-    auto cnnNetworkImplPtr = buildSingleLayerNetwork<3>(type, inOutShapes, &layerParams.data, layerDataName);
-    auto reshaper = std::make_shared<Reshaper>(*cnnNetworkImplPtr);
-    auto inputShapes = setInputShapes(*cnnNetworkImplPtr.get(), newInOutShapes.inDims);
-    reshaper->AddExtension(holder);
-
-    if (canInfer) {
-        reshaper->run(inputShapes);
-        checkNetworkInOut(*cnnNetworkImplPtr, newInOutShapes);
-    } else {
-        ASSERT_THROW(reshaper->run(inputShapes), InferenceEngine::details::InferenceEngineException);
-    }
-}
-
-INSTANTIATE_TEST_CASE_P(
-        CPUExtGeneralImpls, CPUExtShapeInferTests,
-        ::testing::Values(
-                ::testing::make_tuple(LayerType("SpatialTransformer"),
-                                      InOutShapes({{{1, 6, 5, 5}, {1, 3}},
-                                                   {{1, 6, 5, 5}}}),
-                                      NewInOutShapes({{{2, 6, 5, 6}, {1, 3}},
-                                                      {{2, 6, 5, 6}}}),
-                                      MapParams(MapStrStr()),
-                                      LayerDataName("data"),
-                                      CanInfer(true))
-        )
-);
diff --git a/inference-engine/tests/unit/shape_infer/input_controller_test.cpp b/inference-engine/tests/unit/shape_infer/input_controller_test.cpp
index c6fc3756b..80b7e8643 100644
--- a/inference-engine/tests/unit/shape_infer/input_controller_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/input_controller_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -39,7 +39,7 @@ TEST_F(InputControllerTest, canPushShapes) {
     ASSERT_NO_THROW(controller.setShapeByName(inDims, TEST_NAME));
 }
 
-TEST_F(InputControllerTest, throwOnGetWithNotEnoughShapes) {
+TEST_F(InputControllerTest, DISABLED_throwOnGetWithNotEnoughShapes) {
     InputController controller({notEmptyData, notEmptyData}, TEST_NAME);
     controller.setShapeByName(inDims, TEST_NAME);
     ASSERT_THROW(controller.getShapes(true), InferenceEngineException);
@@ -57,7 +57,7 @@ TEST_F(InputControllerTest, canGetChanges) {
     ASSERT_NO_THROW(controller.getShapes(true));
 }
 
-TEST_F(InputControllerTest, throwOnApplyWithNotEnoughShapes) {
+TEST_F(InputControllerTest, DISABLED_throwOnApplyWithNotEnoughShapes) {
     InputController controller({notEmptyData, notEmptyData}, TEST_NAME);
     controller.setShapeByName(inDims, TEST_NAME);
     ASSERT_THROW(controller.applyChanges(), InferenceEngineException);
@@ -72,7 +72,7 @@ TEST_F(InputControllerTest, canApplyChanges) {
 TEST_F(InputControllerTest, canResetShapes) {
     InputController controller({notEmptyData}, TEST_NAME);
     controller.setShapeByName(inDims, TEST_NAME);
-    ASSERT_FALSE(controller.getShapes(true).empty());
+    ASSERT_EQ(controller.getShapes(true)[0], inDims);
     ASSERT_NO_THROW(controller.reset());
-    ASSERT_THROW(controller.getShapes(true), InferenceEngineException);
+    ASSERT_NE(controller.getShapes(true)[0], inDims);
 }
diff --git a/inference-engine/tests/unit/shape_infer/input_reshape_launcher_test.cpp b/inference-engine/tests/unit/shape_infer/input_reshape_launcher_test.cpp
index 7d99fcbb9..e877334d1 100644
--- a/inference-engine/tests/unit/shape_infer/input_reshape_launcher_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/input_reshape_launcher_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/shape_infer/output_controller_test.cpp b/inference-engine/tests/unit/shape_infer/output_controller_test.cpp
index 8083875e9..c9c197a96 100644
--- a/inference-engine/tests/unit/shape_infer/output_controller_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/output_controller_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/shape_infer/reshape_launcher_test.cpp b/inference-engine/tests/unit/shape_infer/reshape_launcher_test.cpp
index 372d3f43c..22e49b4a8 100644
--- a/inference-engine/tests/unit/shape_infer/reshape_launcher_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/reshape_launcher_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -6,6 +6,7 @@
 #include <gmock/gmock-matchers.h>
 
 #include <inference_engine/shape_infer/ie_reshape_launcher.hpp>
+#include <inference_engine/blob_factory.hpp>
 #include <shape_infer/mock_ishape_infer_impl.hpp>
 #include <shape_infer/mock_reshaper_launcher.hpp>
 
@@ -20,7 +21,15 @@ protected:
         notEmptyData = getNotEmptyData();
         impl = std::make_shared<MockIShapeInferImpl>();
     };
-
+    std::vector<Blob::CPtr> getBlobs(const std::vector<SizeVector>& shapes) {
+        std::vector<Blob::CPtr> inBlobs;
+        for (auto const& dims : shapes) {
+            TensorDesc desc(Precision::FP32, dims, TensorDesc::getLayoutByDims(dims));
+            auto blob = make_blob_with_precision(desc);
+            inBlobs.push_back(blob);
+        }
+        return inBlobs;
+    }
 public:
     StatusCode sts = GENERAL_ERROR;
     ResponseDesc resp;
@@ -32,7 +41,7 @@ public:
     std::map<std::string, std::string> changedParams{{TEST_NAME, TEST_NAME}};
 public:
     DataPtr getNotEmptyData() {
-        return std::make_shared<Data>(TEST_NAME, Precision::UNSPECIFIED, Layout::C);
+        return std::make_shared<Data>(TEST_NAME, Precision::FP32, Layout::C);
     }
 };
 
@@ -92,7 +101,10 @@ TEST_F(ReshapeLauncherTest, throwOnReshapeWihtNotEnoughShapes) {
     ReshapeLauncher launcher(&layer, impl);
 
     launcher.setShapeByName(inDims, TEST_NAME);
-    ASSERT_THROW(launcher.reshape({}), InferenceEngineException);
+    try {
+        launcher.reshape({});
+        FAIL() << "Reshape should be failed!";
+    } catch (...) {}
 }
 
 TEST_F(ReshapeLauncherTest, implIsCalledOnReshape) {
@@ -103,11 +115,12 @@ TEST_F(ReshapeLauncherTest, implIsCalledOnReshape) {
     auto inputController = initializer->getInputController();
     auto outputController = initializer->getOutputController();
     std::vector<SizeVector> shapes{inDims};
+    auto blobs = getBlobs(shapes);
     EXPECT_CALL(*inputController, setShapeByName(inDims, TEST_NAME));
-    EXPECT_CALL(*inputController, getShapes(true)).WillOnce(Return(shapes));
+    EXPECT_CALL(*inputController, getBlobs(true)).WillOnce(Return(blobs));
     EXPECT_CALL(*outputController, setShapes(_));
     EXPECT_CALL(*outputController, propagateShapes(_));
-    EXPECT_CALL(*impl.get(), inferShapes(shapes, _, _, _, _)).WillOnce(Return(OK));
+    EXPECT_CALL(*impl.get(), inferShapes(blobs, _, _, _, _)).WillOnce(Return(OK));
     launcher.setShapeByName(inDims, TEST_NAME);
     launcher.reshape({});
 }
diff --git a/inference-engine/tests/unit/shape_infer/reshaper_test.cpp b/inference-engine/tests/unit/shape_infer/reshaper_test.cpp
index 86364ea73..0566e1480 100644
--- a/inference-engine/tests/unit/shape_infer/reshaper_test.cpp
+++ b/inference-engine/tests/unit/shape_infer/reshaper_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/stress_tests/stress_tests.cpp b/inference-engine/tests/unit/stress_tests/stress_tests.cpp
index 5bb764f40..28bba2f6f 100644
--- a/inference-engine/tests/unit/stress_tests/stress_tests.cpp
+++ b/inference-engine/tests/unit/stress_tests/stress_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
@@ -8,7 +8,6 @@
 
 using namespace std;
 
-#ifdef ENABLE_STRESS_UNIT_TESTS
 class StressTests : public ::testing::Test {
 protected:
     const std::string DUMMY_FILE_NAME = "Dummy.txt";
@@ -43,4 +42,3 @@ TEST_F(StressTests, checkBigFileSize) {
     DummyFileManager::deleteFile(DUMMY_FILE_NAME);
     ASSERT_EQ(size, BIG_FILE_SIZE);
 }
-#endif //ENABLE_STRESS_UNIT_TESTS
diff --git a/inference-engine/tests/unit/topology_verification_tests/v2_topology_verification_test.cpp b/inference-engine/tests/unit/topology_verification_tests/v2_topology_verification_test.cpp
index 34ff736ea..44457b983 100644
--- a/inference-engine/tests/unit/topology_verification_tests/v2_topology_verification_test.cpp
+++ b/inference-engine/tests/unit/topology_verification_tests/v2_topology_verification_test.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
diff --git a/inference-engine/tests/unit/transformations/eltwise_broadcast_test.cpp b/inference-engine/tests/unit/transformations/eltwise_broadcast_test.cpp
new file mode 100644
index 000000000..83f48ef4a
--- /dev/null
+++ b/inference-engine/tests/unit/transformations/eltwise_broadcast_test.cpp
@@ -0,0 +1,63 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <transform/transform_network.hpp>
+#include <transform/transformations/eltwise_broadcast.hpp>
+#include <ie_builders.hpp>
+
+#include "tranformations_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class TransformNetworkTest: public TransformationTestCommon {};
+
+TEST_F(TransformationTestCommon, EltwiseBroadcastOneDimension) {
+    Builder::Network builder("eltwiseBroadcast");
+
+    idx_t firstInputId = builder.addLayer(Builder::InputLayer("FirstInput").setPort(Port({1, 3, 227, 1})));
+    idx_t secondInputId = builder.addLayer(Builder::InputLayer("SecondInput").setPort(Port({1, 3, 227, 227})));
+    idx_t eltwiseSumId = builder.addLayer({firstInputId, secondInputId}, Builder::EltwiseLayer("Sum").
+                                                                         setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUM).
+                                                                         setOutputPort(Port({1, 3, 227, 227})));
+    auto network = Transform::Network(builder);
+
+    Transform::TransformationEltwiseBroadcast transformationEltwiseBroadcast;
+    transformationEltwiseBroadcast.execute(network);
+    auto firstInputLayer = network.getLayer(firstInputId);
+    auto tileLayer = network.getLayer(firstInputId).getOutPort().getConnection().getDestination().getLayer();
+    ASSERT_EQ(tileLayer.getType(), "Tile");
+    ASSERT_EQ(tileLayer.getParameter("axis").as<size_t>(), 3);
+    ASSERT_EQ(tileLayer.getParameter("tiles").as<size_t>(), 227);
+    ASSERT_EQ(firstInputLayer.getOutPort().getConnection().getDestination().getLayer().getId(), tileLayer.getId());
+    ASSERT_EQ(tileLayer.getOutPort().getConnection().getDestination().getLayer().getId(), eltwiseSumId);
+}
+
+TEST_F(TransformationTestCommon, EltwiseBroadcastTwoDimensions) {
+    Builder::Network builder("eltwiseBroadcast");
+
+    idx_t firstInputId = builder.addLayer(Builder::InputLayer("FirstInput").setPort(Port({1, 1, 227, 1})));
+    idx_t secondInputId = builder.addLayer(Builder::InputLayer("SecondInput").setPort(Port({1, 3, 227, 227})));
+    idx_t eltwiseSumId = builder.addLayer({firstInputId, secondInputId}, Builder::EltwiseLayer("Sum").
+                                                                         setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUM).
+                                                                         setOutputPort(Port({1, 3, 227, 227})));
+    auto network = Transform::Network(builder);
+
+    Transform::TransformationEltwiseBroadcast transformationEltwiseBroadcast;
+    transformationEltwiseBroadcast.execute(network);
+    auto firstInputLayer = network.getLayer(firstInputId);
+    auto tile1Layer = network.getLayer(firstInputId).getOutPort().getConnection().getDestination().getLayer();
+    auto tile2Layer = tile1Layer.getOutPort().getConnection().getDestination().getLayer();
+    ASSERT_EQ(tile1Layer.getType(), "Tile");
+    ASSERT_EQ(tile1Layer.getParameter("axis").as<size_t>(), 1);
+    ASSERT_EQ(tile1Layer.getParameter("tiles").as<size_t>(), 3);
+    ASSERT_EQ(tile2Layer.getType(), "Tile");
+    ASSERT_EQ(tile2Layer.getParameter("axis").as<size_t>(), 3);
+    ASSERT_EQ(tile2Layer.getParameter("tiles").as<size_t>(), 227);
+    ASSERT_EQ(firstInputLayer.getOutPort().getConnection().getDestination().getLayer().getId(), tile1Layer.getId());
+    ASSERT_EQ(tile1Layer.getOutPort().getConnection().getDestination().getLayer().getId(), tile2Layer.getId());
+    ASSERT_EQ(tile2Layer.getOutPort().getConnection().getDestination().getLayer().getId(), eltwiseSumId);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/transformations/sub_test.cpp b/inference-engine/tests/unit/transformations/sub_test.cpp
new file mode 100644
index 000000000..9e2f93501
--- /dev/null
+++ b/inference-engine/tests/unit/transformations/sub_test.cpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+#include <string.h>
+#include <transform/transform_network.hpp>
+#include <transform/transformations/sub.hpp>
+#include <ie_builders.hpp>
+
+#include "tranformations_test.hpp"
+
+using namespace testing;
+using namespace InferenceEngine;
+
+class TransformNetworkTest: public TransformationTestCommon {};
+
+TEST_F(TransformationTestCommon, Sub) {
+    Builder::Network builder("sub");
+
+    idx_t firstInputId = builder.addLayer(Builder::InputLayer("FirstInput").setPort(Port({1,3, 227, 227})));
+    idx_t secondInputId = builder.addLayer(Builder::InputLayer("SecondInput").setPort(Port({1,3, 227, 227})));
+    idx_t eltwiseSubId = builder.addLayer({firstInputId, secondInputId}, Builder::EltwiseLayer("Sub").setEltwiseType(Builder::EltwiseLayer::EltwiseType::SUB));
+    idx_t clampId = builder.addLayer({eltwiseSubId}, Builder::ClampLayer("clamp"));
+    auto network = Transform::Network(builder);
+
+    Transform::TransformationSub transformationSub;
+    transformationSub.execute(network);
+    ASSERT_THROW(network.getLayer("Sub"), InferenceEngine::details::InferenceEngineException);
+    auto sumLayer = network.getLayer(firstInputId).getOutPort().getConnection().getDestination().getLayer();
+    auto powerLayer = network.getLayer(secondInputId).getOutPort().getConnection().getDestination().getLayer();
+    ASSERT_EQ(sumLayer.getType(), "Eltwise");
+    ASSERT_EQ(sumLayer.getParameter("operation").as<std::string>(), "sum");
+    ASSERT_EQ(powerLayer.getType(), "Power");
+    ASSERT_EQ(powerLayer.getParameter("power").as<float>(), 1.0f);
+    ASSERT_EQ(powerLayer.getParameter("scale").as<float>(), -1.0f);
+    ASSERT_EQ(powerLayer.getParameter("shift").as<float>(), 0.0f);
+    ASSERT_EQ(sumLayer.getOutPort().getConnection().getDestination().getLayer().getId(), clampId);
+}
+\ No newline at end of file
diff --git a/inference-engine/tests/unit/transformations/tranformations_test.hpp b/inference-engine/tests/unit/transformations/tranformations_test.hpp
new file mode 100644
index 000000000..797c2980d
--- /dev/null
+++ b/inference-engine/tests/unit/transformations/tranformations_test.hpp
@@ -0,0 +1,13 @@
+// Copyright (C) 2018-2019 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <string.h>
+#include <ie_builders.hpp>
+#include <blob_factory.hpp>
+
+#include "../builders/builder_test.hpp"
+
+class TransformationTestCommon : public BuilderTestCommon {
+public:
+};
+\ No newline at end of file
diff --git a/inference-engine/tests/validation_app/CMakeLists.txt b/inference-engine/tests/validation_app/CMakeLists.txt
new file mode 100644
index 000000000..04be08c0b
--- /dev/null
+++ b/inference-engine/tests/validation_app/CMakeLists.txt
@@ -0,0 +1,62 @@
+#
+# Copyright (C) 2018-2019 Intel Corporation.
+#
+# This software and the related documents are Intel copyrighted materials,
+# and your use of them is governed by the express license under which they
+# were provided to you (End User License Agreement for the Intel(R) Software
+# Development Products (Version May 2017)). Unless the License provides
+# otherwise, you may not use, modify, copy, publish, distribute, disclose or
+# transmit this software or the related documents without Intel's prior
+# written permission.
+#
+# This software and the related documents are provided as is, with no
+# express or implied warranties, other than those that are expressly
+# stated in the License.
+#
+
+set (TARGET_NAME "test_validation_app")
+
+# Find OpenCV components if exist
+find_package(OpenCV COMPONENTS imgcodecs)
+if(NOT(OpenCV_FOUND))
+    message(WARNING "No suitable OpenCV version detected, " ${TARGET_NAME} " skipped")
+    return()
+endif()
+
+set(VALIDATION_APP_SOURCE "${IE_MAIN_SOURCE_DIR}/samples/validation_app")
+
+file (GLOB MAIN_SRC
+        ${VALIDATION_APP_SOURCE}/*.cpp
+        ${VALIDATION_APP_SOURCE}/pugixml/*.cpp
+        )
+
+file (GLOB MAIN_HEADERS
+        ${VALIDATION_APP_SOURCE}/*.hpp
+        ${VALIDATION_APP_SOURCE}/pugixml/*.hpp
+        )
+
+# Create named folders for the sources within the .vcproj
+# Empty name lists them directly under the .vcproj
+source_group("src" FILES ${MAIN_SRC})
+source_group("include" FILES ${MAIN_HEADERS})
+
+if (WIN32)
+    if(NOT "${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
+        message(FATAL_ERROR "Only 64-bit supported on Windows")
+    endif()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_SCL_SECURE_NO_WARNINGS -DNOMINMAX")
+endif()
+
+# Properties->C/C++->General->Additional Include Directories
+include_directories (${VALIDATION_APP_SOURCE}/../classification_sample/core
+        ${VALIDATION_APP_SOURCE}/../common
+        ${VALIDATION_APP_SOURCE}/../common/os/windows
+        ${VALIDATION_APP_SOURCE}/../../include)
+
+# Create library file from sources.
+
+list(REMOVE_ITEM MAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp)
+
+add_library(${TARGET_NAME} STATIC ${MAIN_SRC} ${MAIN_HEADERS})
+set_target_properties(${TARGET_NAME} PROPERTIES "COMPILE_PDB_NAME" ${TARGET_NAME})
+target_link_libraries(${TARGET_NAME} gflags ie_cpu_extension ${OpenCV_LIBRARIES})
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/CMakeLists.txt b/inference-engine/thirdparty/CMakeLists.txt
index 8277d6c3b..f65f38c7a 100644
--- a/inference-engine/thirdparty/CMakeLists.txt
+++ b/inference-engine/thirdparty/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
 
@@ -8,12 +8,16 @@ if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
 endif()
 
 add_subdirectory(pugixml)
+export(TARGETS pugixml NAMESPACE IE:: APPEND FILE "${CMAKE_BINARY_DIR}/targets.cmake")
+
 add_subdirectory(stb_lib)
 add_subdirectory(ade)
 
 if (ENABLE_CLDNN)
-    set(CLDNN__OUTPUT_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
+    set(CLDNN__OUTPUT_BIN_DIR ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
+    set(CLDNN__OUTPUT_LIB_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
     set(CLDNN__INCLUDE_TESTS OFF CACHE BOOL "" FORCE)
+    set(CLDNN__INCLUDE_CORE_INTERNAL_TESTS OFF CACHE BOOL "" FORCE)
     set(CLDNN__INCLUDE_EXAMPLES OFF CACHE BOOL "" FORCE)
     set(CLDNN__INCLUDE_TUTORIAL OFF CACHE BOOL "" FORCE)
     if (WIN32)
@@ -22,9 +26,7 @@ if (ENABLE_CLDNN)
         set(CLDNN__ARCHITECTURE_TARGET "Linux64" CACHE STRING "" FORCE)
     endif()
 
-    remove_definitions(-fvisibility=default)
     add_subdirectory(clDNN)
-    add_definitions(-fvisibility=default)
 endif()
 
 if(ENABLE_MKL_DNN)
diff --git a/inference-engine/thirdparty/clDNN/.gitignore b/inference-engine/thirdparty/clDNN/.gitignore
new file mode 100644
index 000000000..8359a8cbf
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/.gitignore
@@ -0,0 +1,7 @@
+build/*
+*.pyc
+*~
+UnixMk
+**/.idea/*
+src/caps/private/*.inc
+/examples/utils/venv
diff --git a/inference-engine/thirdparty/clDNN/CMakeLists.txt b/inference-engine/thirdparty/clDNN/CMakeLists.txt
index 6ce8119d7..624d95c85 100644
--- a/inference-engine/thirdparty/clDNN/CMakeLists.txt
+++ b/inference-engine/thirdparty/clDNN/CMakeLists.txt
@@ -75,6 +75,9 @@ set(CLDNN__COMMON_DIR    "${CMAKE_CURRENT_SOURCE_DIR}/common")
 # Path which points to directory with interface for framework.
 set(CLDNN__API_DIR       "${CMAKE_CURRENT_SOURCE_DIR}/api")
 
+# Path which points to directory with interface extension for framework.
+set(CLDNN__API_EXTENSION_DIR       "${CMAKE_CURRENT_SOURCE_DIR}/api_extension")
+
 # Path which points to directory with interface for framework.
 set(CLDNN__KERNEL_SELECTOR_DIR       "${CMAKE_CURRENT_SOURCE_DIR}/kernel_selector")
 
@@ -338,7 +341,7 @@ endif()
 if(DEFINED CLDNN__OUTPUT_DIR)
   set(CLDNN__OUTPUT_BIN_DIR "${CLDNN__OUTPUT_DIR}" CACHE PATH "Output directory path where the final exetuables, examples and tests will be stored.")
   set(CLDNN__OUTPUT_LIB_DIR "${CLDNN__OUTPUT_DIR}" CACHE PATH "Output directory path where the final libraries will be stored.")
-else()
+elseif(NOT DEFINED CLDNN__OUTPUT_BIN_DIR AND NOT DEFINED CLDNN__OUTPUT_LIB_DIR)
   # Output directory path where the final libraries, examples and tests will be stored.
   if(CLDNN__MULTI_CFG_GEN)
     # Multi-configuration generators automatically append build type subdirectory.
@@ -382,6 +385,13 @@ mark_as_advanced(CLDNN__INCLUDE_TESTS)
 
 # ======================================================================================================
 
+# Include and build: Core Internal Tests (unit tests and small acceptance tests) for core internal clDNN framework mechanisms.
+set(CLDNN__INCLUDE_CORE_INTERNAL_TESTS ON CACHE BOOL "Include and build: clDNN framework's core internal tests.")
+mark_as_advanced(CLDNN__INCLUDE_CORE_INTERNAL_TESTS)
+
+# ======================================================================================================
+
+
 # Include and build: clDNN tutorial.
 set(CLDNN__INCLUDE_TUTORIAL ON CACHE BOOL "Include and build: clDNN Tutorial.")
 mark_as_advanced(CLDNN__INCLUDE_TUTORIAL)
@@ -394,6 +404,12 @@ mark_as_advanced(CLDNN__RUN_TESTS)
 
 # ======================================================================================================
 
+# Run (requires CLDNN__INCLUDE_CORE_INTERNAL_TESTS to be true): Tests (unit tests and small acceptance core internal tests) for clDNN framework.
+set(CLDNN__RUN_CORE_INTERNAL_TESTS OFF CACHE BOOL "Run: clDNN framework's core internal tests.")
+mark_as_advanced(CLDNN__RUN_CORE_INTERNAL_TESTS)
+
+# ======================================================================================================
+
 # Compile / Link: Use static C++ Runtime library.
 set(CLDNN__COMPILE_LINK_USE_STATIC_RUNTIME OFF CACHE BOOL "Compile / Link: Use static version of C++ Runtime library instead of shared one.")
 mark_as_advanced(CLDNN__COMPILE_LINK_USE_STATIC_RUNTIME)
@@ -429,6 +445,14 @@ endif()
 
 # ======================================================================================================
 
+# Checking whether tests can be run.
+if((NOT CLDNN__INCLUDE_CORE_INTERNAL_TESTS) AND CLDNN__RUN_CORE_INTERNAL_TESTS)
+  message(WARNING "[clDNN] CLDNN__INCLUDE_CORE_INTERNAL_TESTS: Selected running of core internal tests, but test are not built. Option will be disabled.")
+  set(CLDNN__RUN_CORE_INTERNAL_TESTS OFF)
+endif()
+
+# ======================================================================================================
+
 # Check for python 2.7 interpreter (required tool).
 find_package(PythonInterp 2.7)
 if(NOT PYTHONINTERP_FOUND)
@@ -534,6 +558,8 @@ unset(__CLDNN_IOclIcdDefaultVersion)
 unset(__CLDNN_IOclIcdVersionIdx)
 
 
+# ======================================================================================================
+set(CLDNN_UTILS__RAPIDJSON_INCDIRS "utils/rapidjson" CACHE INTERNAL "Paths to interface headers for rapidjson.")
 # ====================================== Version Calculation ===========================================
 
 if(EXISTS "${CLDNN__VERSION_FILE_NAME}")
@@ -619,17 +645,19 @@ message(STATUS "[clDNN]")
 message(STATUS "[clDNN]")
 message(STATUS "[clDNN] Advanced:")
 if (CLDNN__IOCL_ICD_USE_EXTERNAL)
-  message(STATUS "[clDNN]  - ICD version used to build:     N/A (installed externally)")
+  message(STATUS "[clDNN]  - ICD version used to build:         N/A (installed externally)")
 else()
-  message(STATUS "[clDNN]  - ICD version used to build:     ${CLDNN__IOCL_ICD_VERSION}")
+  message(STATUS "[clDNN]  - ICD version used to build:         ${CLDNN__IOCL_ICD_VERSION}")
 endif()
 message(STATUS "[clDNN]")
-message(STATUS "[clDNN]  - Include/Build cldnn core:      ${CLDNN__INCLUDE_CORE}")
-message(STATUS "[clDNN]  - Include/Build kernel selector: ${CLDNN__INCLUDE_KERNEL_SELECTOR}")
-message(STATUS "[clDNN]  - Include/Build tests:           ${CLDNN__INCLUDE_TESTS}")
-message(STATUS "[clDNN]  - Include/Build tutorial:        ${CLDNN__INCLUDE_TUTORIAL}")
+message(STATUS "[clDNN]  - Include/Build cldnn core:          ${CLDNN__INCLUDE_CORE}")
+message(STATUS "[clDNN]  - Include/Build kernel selector:     ${CLDNN__INCLUDE_KERNEL_SELECTOR}")
+message(STATUS "[clDNN]  - Include/Build tests:               ${CLDNN__INCLUDE_TESTS}")
+message(STATUS "[clDNN]  - Include/Build core internal tests: ${CLDNN__INCLUDE_CORE_INTERNAL_TESTS}")
+message(STATUS "[clDNN]  - Include/Build tutorial:            ${CLDNN__INCLUDE_TUTORIAL}")
 message(STATUS "[clDNN]")
 message(STATUS "[clDNN]  - Run tests:                     ${CLDNN__RUN_TESTS}")
+message(STATUS "[clDNN]  - Run core internal tests:       ${CLDNN__RUN_CORE_INTERNAL_TESTS}")
 message(STATUS "[clDNN]")
 message(STATUS "[clDNN]  - Use static C++ Runtime:        ${CLDNN__COMPILE_LINK_USE_STATIC_RUNTIME}")
 message(STATUS "[clDNN]  - Allow unsafe size opts:        ${CLDNN__COMPILE_LINK_ALLOW_UNSAFE_SIZE_OPT}")
@@ -659,10 +687,10 @@ set(CLDNN_BUILD__PROJ_LABEL__clDNN "clDNN")
 
 # Old.
 set(EXECUTABLE_OUTPUT_PATH "${CLDNN__OUTPUT_BIN_DIR}")
-set(LIBRARY_OUTPUT_PATH    "${CLDNN__OUTPUT_BIN_DIR}")
+set(LIBRARY_OUTPUT_PATH    "${CLDNN__OUTPUT_LIB_DIR}")
 # New.
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CLDNN__OUTPUT_LIB_DIR}")
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CLDNN__OUTPUT_BIN_DIR}")
+set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CLDNN__OUTPUT_LIB_DIR}")
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CLDNN__OUTPUT_BIN_DIR}")
 
 
@@ -679,7 +707,7 @@ intel_arch_get_os(__CLDNN_TargetOs "${CLDNN__ARCHITECTURE_TARGET}")
 if(__CLDNN_TargetOs MATCHES "^Darwin$")
   set(CMAKE_INSTALL_RPATH "@executable_path")
 else()
-  set(CMAKE_INSTALL_RPATH "$ORIGIN")
+  set(CMAKE_INSTALL_RPATH "$ORIGIN/lib")
 endif()
 unset(__CLDNN_TargetOs)
 
@@ -931,6 +959,7 @@ if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
         SET_RAW
           -Wl,-z,noexecstack,-z,relro,-z,now
       )
+  list(APPEND CLDNN__SYSTEM_LINK_LIBRARIES "dl")
 endif()
 
 if((CMAKE_C_COMPILER_ID MATCHES "^Clang$") OR (CMAKE_CXX_COMPILER_ID MATCHES "^Clang$"))
@@ -944,7 +973,7 @@ if((CMAKE_C_COMPILER_ID MATCHES "^Clang$") OR (CMAKE_CXX_COMPILER_ID MATCHES "^C
         -Wl,-headerpad_max_install_names
     )
 
-  list(APPEND CLDNN__SYSTEM_LINK_LIBRARIES "c++" "c++abi" "supc++")
+  list(APPEND CLDNN__SYSTEM_LINK_LIBRARIES "c++" "c++abi" "supc++" "dl")
 endif()
 
 unset(__CLDNN_LinkerFlagName)
@@ -995,6 +1024,7 @@ endif()
 
 include_directories(
     ${CLDNN__IOCL_ICD_INCDIRS}
+	${CLDNN_UTILS__RAPIDJSON_INCDIRS}
     "${CLDNN__KHR_CLHPP_DIR}"
     "${CLDNN__CODEGEN_INCDIR}"
   )
@@ -1010,6 +1040,9 @@ endif()
 if(CLDNN__INCLUDE_TESTS)
   add_subdirectory(tests)
 endif()
+if(CLDNN__INCLUDE_CORE_INTERNAL_TESTS)
+  add_subdirectory(tests_core_internal)
+endif()
 if(CLDNN__INCLUDE_KERNEL_SELECTOR)
   add_subdirectory(kernel_selector)
 endif()
diff --git a/inference-engine/thirdparty/clDNN/README.md b/inference-engine/thirdparty/clDNN/README.md
index 6a31eb23f..fc0d77e81 100644
--- a/inference-engine/thirdparty/clDNN/README.md
+++ b/inference-engine/thirdparty/clDNN/README.md
@@ -6,7 +6,7 @@
 *Compute Library for Deep Neural Networks* (*clDNN*) is an open source performance
 library for Deep Learning (DL) applications intended for acceleration of
 DL Inference on Intel® Processor Graphics – including HD Graphics and
-Iris® Graphics.  
+Iris® Graphics.
 *clDNN* includes highly optimized building blocks for implementation of
 convolutional neural networks (CNN) with C and C++ interfaces. We created
 this project to enable the DL community to innovate on Intel® processors.
@@ -25,6 +25,7 @@ clDNN is licensed is licensed under
 clDNN uses 3<sup>rd</sup>-party components licensed under following licenses:
 - *googletest* under [Google\* License](https://github.com/google/googletest/blob/master/googletest/LICENSE)
 - *OpenCL™ ICD and C++ Wrapper* under [Khronos™ License](https://github.com/KhronosGroup/OpenCL-CLHPP/blob/master/LICENSE.txt)
+- *RapidJSON* under [Tencent\* License](https://github.com/Tencent/rapidjson/blob/master/license.txt)
 
 ## Documentation
 The latest clDNN documentation is at [GitHub pages](https://intel.github.io/clDNN/index.html).
@@ -41,8 +42,126 @@ clDNN is released also together with Intel® OpenVino™ Toolkit, which contains
 
 You can find more information [here](https://software.intel.com/en-us/openvino-toolkit/deep-learning-cv).
 
+## OpenVINO specific changes
+    New features:
+    - added `not` activation type
+    - added `depth_to_space` layer
+    - new clip options in `detection_output` (cpu impl) and `proposal` layers
+    - added eltwise `xor` and `squared_diff` operations
+    - added `gather` layer
+    - added `bilinear` mode for position sensitive `roi_pooling` layer
+    - added `shuffle_channels` layer
+    - added `strided_slice` layer
+    - added IE gates ordering for lstm layer
+    - added `reverse_sequence` layer
+    Bug fixes:
+    - fixed unknown bool type error in C API
+    - fixed non-relu activation fusing with conv_eltwise node
+    - fixed infinite performance regression on several topologies
+    - minor internal fixes
+    - unified the permute order with cldnn's tensor order
+    Other:
+    - removed boost
+    - supported compilation with c++11 only
+
+
 ## Changelog
 
+### Drop 13.1
+    New features:
+    - added max mode for contract primitive
+    - added one_hot primitive
+    - optional explicit output data type support for all primitives
+    Bug fixes:
+    - fix for graph optimizer (crop primitive)
+    - fix for processing order (deconvolution primitive)
+    - fix for convolution-eltwise primitive
+    UX:
+    - cache.json is searched in to library directory
+    Performance:
+    - optimizations for lstm_gemm primitive
+
+### Drop 13.0
+    New features:
+    - events pool
+    - group support in convolution and deconvolution primitives
+    - broadcastable inputs support for eltwise primitive
+    - asymmetric padding for convolution primitive
+    - fused convolution-eltwise primitive (API extension)
+    - auto-calculated output shape support for reshape primitive
+    - crop support for i8/s8/i32/i64 types
+    - broadcast axis support for broadcast primitive
+    - logic and comparison operations support for eltwise primitive
+    Bug fixes:
+    - added required alignment checks for some fc implementations
+    - added lstm support for f16 (half) type
+    - reorders for fc moved to graph compiler
+    - primitive fusing and reorder fixes
+    UX:
+    - added internal core tests project
+    - refactored optimizations pass manager and passes
+    Performance:
+    - optimized concatenation during upsampling (unpool)
+    - IMAD-based optimizations for convolution, fc, eltwise and pooling primitives (i8/s8)
+    - convolution-eltwise fusing optimizations
+    - partial writes optimizations for block-based kernels
+
+### Drop 12.1
+	- gtests code refactor
+	- buildbreak fix
+
+### Drop 12.0
+    New features:
+    - pyramidRoiAlign primitive
+    - multiple axes support for reverse mode in index_select
+    - eltwise min/max/mod support for i8/i32/i64
+    - broadcast support for i32/i64
+    Bug fixes:
+    - memory leak fixes
+    - in-place reshape
+    - no padding for output primitives
+    UX:
+    - RapidJSON library for auto-tune cache
+    - less dependencies in program.cpp
+    - do not throw error, when device not validated
+    - global pooling in c API
+    - optimized padding for convolution
+
+### Drop 11.0
+    New features:
+    - throttle hints
+    - extended border and tile
+    - GPU implementation of Detection Output
+	- More cases for BatchNorm primitive
+    Bug fixes:
+    - GEMM fix (align with ONNX)
+	- memory leak fix in memory pool
+	- increase FC precision for fp16 (fp32 accu)
+    Performance:
+    - cache for new topologies and devices
+    - conv1x1 with stride >1 into eltwise optimization
+
+### Drop 10.0
+    New features:
+    - condition primitive
+    - fused convolution with bn and scale (backprop)
+    - scale/shit and mean/var as an output in batch norm
+    - add LSTM output selection
+    Bug fixes:
+    - memory pool fixes
+    UX:
+    - downgrade to cxx11
+    - add support for u8 data type in custom primitive
+    - library size optimizations
+    Performance:
+    - in place concatenation optimization
+    - conv1x1 with stride >1 into eltwise optimization
+
+### Drop 9.2
+	New features
+	- local convolution
+	- eltwise with strie
+
 ### Drop 9.1
     New features:
     - select index primitive
@@ -161,7 +280,7 @@ You can find more information [here](https://software.intel.com/en-us/openvino-t
 	- reorder optimization
 	- concatenation optimization
 	- eltwise optimization
-	- activation fusing 
+	- activation fusing
 
 ### Drop 3.0
 	Added:
@@ -183,7 +302,7 @@ You can find more information [here](https://software.intel.com/en-us/openvino-t
 	- initial drop of clDNN
 
 ## Support
-Please report issues and suggestions 
+Please report issues and suggestions
 [GitHub issues](https://github.com/01org/cldnn/issues).
 
 ## How to Contribute
@@ -224,7 +343,7 @@ clDNN supports Intel® HD Graphics and Intel® Iris® Graphics and is optimized
     * Intel® Iris® Graphics 650 (GT3e, *client* market)
     * Intel® HD Graphics P630 (GT2, *server* market)
     * Intel® Iris® Pro Graphics 630 (GT2, *server* market)
-	
+
 clDNN currently uses OpenCL™ with multiple Intel® OpenCL™ extensions and requires Intel® Graphics Driver to run.
 
 clDNN requires CPU with Intel® SSE/Intel® AVX support.
@@ -232,9 +351,9 @@ clDNN requires CPU with Intel® SSE/Intel® AVX support.
 ---
 
 The software dependencies are:
-- [CMake\*](https://cmake.org/download/) 3.5 or later  
+- [CMake\*](https://cmake.org/download/) 3.5 or later
 - C++ compiler with C++11 standard support compatible with:
-    * GNU\* Compiler Collection 4.8 or later  
+    * GNU\* Compiler Collection 4.8 or later
     * clang 3.5 or later
     * [Intel® C++ Compiler](https://software.intel.com/en-us/intel-parallel-studio-xe) 17.0 or later
     * Visual C++ 2015 (MSVC++ 19.0) or later
@@ -242,10 +361,10 @@ The software dependencies are:
 > Intel® CPU intrinsics header (`<immintrin.h>`) must be available during compilation.
 
 - [python™](https://www.python.org/downloads/) 2.7 or later (scripts are both compatible with python™ 2.7.x and python™ 3.x)
-- *(optional)* [Doxygen\*](http://www.stack.nl/~dimitri/doxygen/download.html) 1.8.13 or later  
+- *(optional)* [Doxygen\*](http://www.stack.nl/~dimitri/doxygen/download.html) 1.8.13 or later
     Needed for manual generation of documentation from inline comments or running `docs` custom target which will generate it automatically.
 
-> [GraphViz\*](http://www.graphviz.org/Download..php) (2.38 or later) is also recommended to generate documentation with all embedded diagrams.  
+> [GraphViz\*](http://www.graphviz.org/Download..php) (2.38 or later) is also recommended to generate documentation with all embedded diagrams.
 (Make sure that `dot` application is visible in the `PATH` environment variable.)
 
 ---
@@ -275,14 +394,14 @@ clDNN uses multiple 3<sup>rd</sup>-party components. They are stored in binary f
 
 ---
 
-clDNN uses a CMake-based build system. You can use CMake command-line tool or CMake GUI (`cmake-gui`) to generate required solution.  
+clDNN uses a CMake-based build system. You can use CMake command-line tool or CMake GUI (`cmake-gui`) to generate required solution.
 For Windows system, you can call in `cmd` (or `powershell`):
 ```shellscript
     @REM Generate 32-bit solution (solution contains multiple build configurations)...
     cmake -E make_directory build && cd build && cmake -G "Visual Studio 14 2015" ..
     @REM Generate 64-bit solution (solution contains multiple build configurations)...
     cmake -E make_directory build && cd build && cmake -G "Visual Studio 14 2015 Win64" ..
-```  
+```
 Created solution can be opened in Visual Studio 2015 or built using appropriate `msbuild` tool
 (you can also use `cmake --build .` to select build tool automatically).
 
@@ -324,7 +443,7 @@ CMake solution offers multiple options which you can specify using normal CMake
 | CLDNN__RUN_TESTS                          | BOOL     | Run tests after building `tests` project. This option requires `CLDNN__INCLUDE_TESTS` option to be `ON`. Default: `OFF` |
 |                                           |          |                                                                              |
 | CLDNN__CMAKE_DEBUG                        | BOOL     | Enable extended debug messages in CMake. Default: `OFF`                      |
-    
+
 ---
 
 clDNN includes unit tests implemented using the googletest framework. To validate your build, run `tests` target, e.g.:
diff --git a/inference-engine/thirdparty/clDNN/api/C/batch_norm.h b/inference-engine/thirdparty/clDNN/api/C/batch_norm.h
index c35351c54..e108a411c 100644
--- a/inference-engine/thirdparty/clDNN/api/C/batch_norm.h
+++ b/inference-engine/thirdparty/clDNN/api/C/batch_norm.h
@@ -37,13 +37,17 @@ extern "C" {
 /// 
 /// <b>Algorithm:</b>
 /// @n global stats can be computed as:
-/// @n out[i] = in[i] - mean[b] / sqrt(variance[b] + epsilon)
+/// @n out[i] = ( (in[i] - mean[b]) / sqrt(variance[b] + epsilon) ) * scale[b] + shift[b]
 
 CLDNN_BEGIN_PRIMITIVE_DESC(batch_norm)
 /// @brief Primitive id containing mean data.
 cldnn_primitive_id mean;
 /// @brief Primitive id containing variance.
 cldnn_primitive_id variance;
+/// @brief Primitive id containing scale.
+cldnn_primitive_id scale;
+/// @brief Primitive id containing shift.
+cldnn_primitive_id shift;
 /// @brief Primitive id containing inverted variance used in future gradient computing.
 cldnn_primitive_id inv_variance;
 /// @brief Epsilon.
diff --git a/inference-engine/thirdparty/clDNN/api/C/border.h b/inference-engine/thirdparty/clDNN/api/C/border.h
index a7b90fb5a..5537ca741 100644
--- a/inference-engine/thirdparty/clDNN/api/C/border.h
+++ b/inference-engine/thirdparty/clDNN/api/C/border.h
@@ -36,18 +36,19 @@ typedef enum /*:int32_t*/
 {
     /// @brief All points in the border are set to constant value.
     cldnn_border_constant,
+    cldnn_border_zero = cldnn_border_constant, /// keep bwd compatibilty 
     /// @brief Border is constructed as an mirror of image (edge is also mirrored).
     /// @details Size of border in any dimension cannot be larger than size of
     ///          input in the same dimension.
     cldnn_border_mirror,
-    /// @brief Border is constructed as an replication of edge.
-    /// @details Size of border in any dimension cannot be larger than size of
-    ///          input in the same dimension.
-    cldnn_border_edge,
     /// @brief Border is constructed as an mirror of image (edge is NOT mirrored).
     /// @details Size of border in any dimension cannot be larger than size of
     ///          input in the same dimension decreased by @c 1.
-    cldnn_border_mirror_101
+    cldnn_border_mirror_101,
+    /// @brief Border is constructed as an replication of edge.
+    /// @details Size of border in any dimension cannot be larger than size of
+    ///          input in the same dimension.
+    cldnn_border_edge
 } cldnn_border_type;
 
 
diff --git a/inference-engine/thirdparty/clDNN/api/C/broadcast.h b/inference-engine/thirdparty/clDNN/api/C/broadcast.h
index d431b5c24..d820de203 100644
--- a/inference-engine/thirdparty/clDNN/api/C/broadcast.h
+++ b/inference-engine/thirdparty/clDNN/api/C/broadcast.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -30,14 +30,16 @@
 extern "C" {
 #endif
 
-/// @brief Broadcasts input to specified output size (broadcast size).
+/// @brief Broadcasts input to defined by @p broadcast_sizes output. @p broadcast_axes are used to
+///        reinterpret input (reshape) inside algorithm.
 ///
-/// @details Takes input and copies it to output once or multiple times, until output will
-///          reach the sizes specified in @p broadcast_sizes.
+/// @details Takes input, reinterpret it according to @p broadcast_axes
+///          and copies it to output once or multiple times.
 /// @n
-/// @n Lets assume that:
+/// @n Simple example with empty @p broadcast_axes. Lets assume that:
 /// @n      <tt>input_sizes = (in_b, in_f, in_y, in_x)</tt>
 /// @n      <tt>broadcast_sizes = (bs_b, bs_f, bs_y, bs_x)</tt>
+/// @n      <tt>broadcast_axes = () - empty</tt>
 /// @n The input is broadcasted on each dimension where <tt>bs_{dim} > in_{dim}</tt> and <tt>bs_{dim}</tt>
 ///    is dividable by <tt>in_{dim}</tt> (input is copied <tt>bs_{dim} / in_{dim}</tt> times).
 ///    The dimensions where <tt>bs_{dim}</tt> is equal to <tt>in_{dim}</tt> remain unchanged.
@@ -46,22 +48,36 @@ extern "C" {
 /// @n      <tt>output[(b, f, y, x)] = input[(b % in_b, f % in_f, y % in_y, x % in_x)]</tt>
 /// @n where <tt>(b, f, y, x)</tt> is a position of value in a primitive output.
 /// @n
+/// @n More complicated example with non empty @p broadcast_axes. Lets assume that:
+/// @n      <tt>broadcast_sizes = (bs_b, bs_f, bs_y, bs_x)</tt>
+/// @n      <tt>broadcast_axes = (2)</tt>
+/// @n Taking into account broadcast_axes size (=1) primitive's input must be (4 - 1 = 3):
+/// @n      <tt>primitive input = (1, in_b, in_f, in_x)</tt>
+/// @n Due to broadcast_axes = (2) primitive will interpret input as:
+/// @n      <tt>primitive input(internal representation) = (in_b, in_f, 1, in_x)</tt>
+/// @n Now, you can apply broadcast rules from previous example to modified (reinterpreted)
+///    input and output:
+/// @n      <tt>input_sizes = (in_b, in_f, 1, in_x)</tt>
+/// @n      <tt>output_shape = (bs_b, bs_f, bs_y, bs_x)</tt>
+/// @n      <tt>broadcast_axes = () - empty</tt>
+/// @n
 /// @n@b Requirements:
-/// @n - @p broadcast_sizes must be positive on all dimensions and compatible
-///      with size of input (describe the same dimensions).
-/// @n - @p broadcast_sizes must be greater than or equal to input sizes on
-///      all dimensions. (For any dimension, if @p broadcast_sizes is lower
-///      than input size on the dimension then @p broadcast_sizes will be replaced
-///      by input size on this dimension.)
-/// @n - For any dimension, if @p broadcast_sizes is greater than input size on
-///      the dimension then @p broadcast_sizes must be dividable by input size
-///      on this dimension.
-/// @n Breaking any of these conditions will raise an exeption.
+/// @n - @p broadcast_sizes must be positive on all dimensions.
+/// @n - @p broadcast_axes size (dimensions count) must be within (inclusive) range
+///      0 - 4.
+/// @n - @p broadcast_axes mustn't have duplicate values.
+/// @n - Values of @p broadcast_axes must be within (inclusive) range 0 - 3
+/// @n - @p output_shape must be greater (dividable) than or equal to reinterpreted
+///      input on all dimensions.
+/// @n Breaking any of these conditions will raise an exception.
 CLDNN_BEGIN_PRIMITIVE_DESC(broadcast)
 /// @brief Sizes of broadcast. Output size of current primitive will match broadcast sizes (layout type
 ///        will not change).
-///        If @p broadcast_sizes are not specified (all zeros), the input sizes are used as @p broadcast_sizes.
 cldnn_tensor broadcast_sizes;
+/// @brief Array of axes positions from output shape (0-based, from left to right)
+///        along which broadcast should happen.
+cldnn_uint16_t_arr broadcast_axes;
+
 CLDNN_END_PRIMITIVE_DESC(broadcast)
 
 
diff --git a/inference-engine/thirdparty/clDNN/api/C/cldnn.h b/inference-engine/thirdparty/clDNN/api/C/cldnn.h
index 6a61b9eb1..9b705fb17 100644
--- a/inference-engine/thirdparty/clDNN/api/C/cldnn.h
+++ b/inference-engine/thirdparty/clDNN/api/C/cldnn.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -157,8 +157,10 @@ typedef struct
     const char* engine_log;                             ///< Specifies a file to which engine log should be dumped. Null/empty values means no logging.
     const char* sources_dumps_dir;                      ///< Specifies a directory where sources of cldnn::program objects should be dumped. Null/empty values means no loggins.
     /*cldnn_priority_mode_type*/ int16_t priority_mode; ///< Priority mode (support of OpenCL priority hints in command queue).
-    /*cldnn_throttle_mode_type*/ int16_t throttle_mode; ///< Placeholder for throttle mode (support of throttle hints in command queue). It has no effect for now and should be set to cldnn_throttle_disabled.
+    /*cldnn_throttle_mode_type*/ int16_t throttle_mode; ///< Throttle mode (support of throttle hints in command queue).
     uint32_t enable_memory_pool;                        ///< Enables memory usage optimization. memory objects will be reused when possible. 
+    void* context;
+    const char* tuning_cache_path;                      ///< Enables defining other than default path to tuning cache json 
 }  cldnn_engine_configuration;
 
 /// @brief Information about the engine returned by cldnn_get_engine_info().
@@ -212,7 +214,8 @@ typedef enum /*:int32_t*/
     cldnn_build_option_graph_dumps_dir,         ///< Specifies a directory to which stages of network compilation should be dumped.
     cldnn_build_option_serialization,           ///< Specifies a name of files to which serialization should be dumped.
     cldnn_build_option_load_program,            ///< Specifies a name of load_program process.
-    cldnn_build_option_learning_config          ///< User defined learning parameters.
+    cldnn_build_option_learning_config,         ///< User defined learning parameters.
+    cldnn_build_option_detection_output_gpu     ///< Run detection output layer always on GPU, regardless performance
 } cldnn_build_option_type;
 
 /// @brief Tuning modes.
@@ -275,6 +278,8 @@ typedef enum /*:int32_t*/
     cldnn_format_fyxb,          ///< format not used inside clDNN, but supported in reorder as extension for user provided formats.
     cldnn_format_os_iyx_osv16,  ///< format used only for convolution weights: os - output feature maps slice, i - input feature maps, yx - spatials, sv16 - 16 values of single slice.
                                 ///< \n \image html os_iyx_osv16.jpg
+    cldnn_format_os_iyx_osv32,  ///< format used only for convolution weights: os - output feature maps slice, i - input feature maps, yx - spatials, sv32 - 32 values of single slice.
+    cldnn_format_os_iyx_osv64,  ///< format used only for convolution weights: os - output feature maps slice, i - input feature maps, yx - spatials, sv64 - 64 values of single slice.
     cldnn_format_bs_xs_xsv8_bsv8, ///< format used only for fully connected weights: bs - batch slice, xs - x slice, bsv8 - 8 values of single slice.
                                   ///< \n \image html bs_xs_xsv8_bsv8.jpg
     cldnn_format_bs_xs_xsv8_bsv16,///< format used only for fully connected weights: bs - batch slice, xs - x slice, bsv16 - 16 values of single slice.
@@ -287,10 +292,23 @@ typedef enum /*:int32_t*/
                                       ///< \n \image html image_2d_weights_c4_fyx_b.jpg
     cldnn_format_image_2d_weights_c1_b_fyx, ///< image format for weights, image 2d, single channel, width size is b, height is f*y*x
                                       ///< \n \image html image_2d_weights_c1_b_fyx.jpg
-    cldnn_format_byxf_af32,           /// < \n format for input for primitives using MMAD
-    cldnn_format_fs_bs_yx_bs4_fs32, /// < \n format for batched input for primitives using MMAD
+    cldnn_format_winograd_2x3_s1_data,       ///< format used for input for winograd convolution, F(2,3) -- filter 3x3 with stride 1
+    cldnn_format_winograd_2x3_s1_weights,    ///< format used for weights for winograd non-fused convolution, F(2,3) -- filter 3x3 with stride 1
+    cldnn_format_winograd_2x3_s1_fused_weights,    ///< format used for weights for winograd fused convolution, F(2,3) -- filter 3x3 with stride 1
+    cldnn_format_winograd_6x3_s1_fused_weights,    ///< format used for weights for winograd fused convolution, F(6,3) -- filter 3x3 with stride 1
+    cldnn_format_image_2d_weights_winograd_6x3_s1_fbxyb, ///< image format used for weights for winograd fused convolution, F(6,3) -- filter 3x3 with stride 1
+    cldnn_format_image_2d_weights_winograd_6x3_s1_xfbyb, ///< image format used for weights for winograd fused convolution, F(6,3) -- filter 3x3 with stride 1
+    cldnn_format_byxf_af32,               /// < \n format for input for primitives using MMAD
+    cldnn_format_byx8_f4,                 /// < \n format for input for MMAD convolutions
+    cldnn_format_fs_bs_yx_bs4_fs32,       /// < \n format for batched input for primitives using MMAD
     cldnn_format_os_is_yx_isa8_osv8_isv4, /// < \n format for weights for MMAD convolutions, stored as ((aligned_to_8(O)/8) * (aligned_to_32(I)/32) * Y * X * ( 8 ) * ( 8 ) * ( 4 )
+    cldnn_format_os_is_yx_isa8_osv8_isv4_swizzled_by_4, /// < \n format for weights for MMAD convolutions
     cldnn_format_is_o_yx_isv32, /// < \n format for weights for 1x1 MMAD convolutions 
+    cldnn_format_is_o32_yx_isv32_swizzled_by_4, /// < \n format for weights for 1x1 MMAD convolutions
+    cldnn_format_os_is_y_x8_osv8_isv4, /// < n\ format for weights for MMAD convolutions
+    cldnn_bf_lyx_yx,                      /// < \n format for local convolution weights
+    cldnn_format_b_fs_yx_fsv4,            /// < \n format for input for IMAD convolutions
+    cldnn_format_os_is_yx_osv16_isv4,     /// < \n format for weights for IMAD convolutions
     cldnn_format_format_num,    ///< number of format types
     cldnn_format_any = -1
 } cldnn_format_type;
@@ -301,6 +319,7 @@ typedef enum /*:int32_t*/
 #define CLDNN_TENSOR_BATCH_DIM_MAX 1
 #define CLDNN_TENSOR_FEATURE_DIM_MAX 1
 #define CLDNN_TENSOR_SPATIAL_DIM_MAX 2
+#define CLDNN_TENSOR_LOCAL_DIM_MAX 2
 #define CLDNN_TENSOR_DIM_MAX 8
 
 /// @brief N-dimensional vector. Mostly used to represent memory size.
@@ -309,6 +328,7 @@ typedef struct
     size_t batch_num;
     size_t feature_num;
     size_t spatial_num;
+    size_t local_num;
     int32_t sizes[CLDNN_TENSOR_DIM_MAX];
 } cldnn_tensor;
 
@@ -361,6 +381,13 @@ typedef struct
     size_t size;       ///< Size (in uint16_t) of the array.
 } cldnn_uint16_t_arr;
 
+/// @brief Represents reference to an array of uint8_t.
+typedef struct
+{
+    const uint8_t* data; ///< Pointer to uint8_t array.
+    size_t size;       ///< Size (in uint8_t) of the array.
+} cldnn_uint8_t_arr;
+
 /// @brief Represents reference to an array of tensor.
 typedef struct
 {
@@ -381,6 +408,13 @@ typedef struct
     size_t size;                    ///< Number of ids in the array.
 } cldnn_primitive_id_arr;
 
+typedef struct
+{
+    cldnn_data_type data_type;
+    // No bool type available...
+    char enabled;
+} cldnn_optional_data_type;
+
 /// @brief Custom primitive kernel source code
 typedef const char*  cldnn_kernel_code;
 /// @brief Custom primitive kernel source code array
@@ -434,8 +468,9 @@ typedef enum cldnn_activation_func_t
     activation_acos,                    // acos(val)
     activation_cosh,                    // cosh(val)
     activation_log,                     // log(val)
-	activation_log2,					// log2(val)
+    activation_log2,                    // log2(val)
     activation_exp,                     // exp(val)
+    activation_not                      // !(val)
 } cldnn_activation_func;
 
 /// @brief activation gradient functions
@@ -452,6 +487,17 @@ typedef struct cldnn_activation_additional_params_t
     float a, b;
 } cldnn_activation_additional_params;
 
+/// @brief Axis which index_select primitive will index.
+typedef enum index_select_axis_name_t
+{
+    along_b,
+    along_f,
+    along_y,
+    along_x
+} index_select_axis_name;
+
+/// @brief  Axis which index_select primitive will index array
+typedef const index_select_axis_name* index_select_axis_name_arr;
 
 /// @brief reorder mean operation modes
 typedef enum cldnn_reorder_mean_mode_t
@@ -470,7 +516,8 @@ typedef enum cldnn_reorder_mean_mode_t
     cldnn_primitive_type_id type; /**< @brief Primitive type identificator. */\
     cldnn_primitive_id id;        /**< @brief Primitive id unique within a topology. */\
     cldnn_primitive_id_arr input; /**< @brief Input primitives ids. */\
-    cldnn_padding output_padding; /**< @brief Output padding information. */
+    cldnn_padding output_padding; /**< @brief Output padding information. */\
+    cldnn_optional_data_type output_data_type; /**< @brief If specified, describes an explicit change of the output precision of the primitive. */
 
 /// @brief Close primitive descriptor definition.
 #define CLDNN_END_PRIMITIVE_DESC(PType) };
diff --git a/inference-engine/thirdparty/clDNN/api/C/condition.h b/inference-engine/thirdparty/clDNN/api/C/condition.h
new file mode 100644
index 000000000..425803e5d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/C/condition.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef CONDITION_H
+#define CONDITION_H
+
+#include "cldnn.h"
+
+
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// @brief Function, which will be used during comparison.
+typedef enum /*:int32_t*/
+{
+    EQUAL,
+    GREATER,
+    LESS
+} cldnn_cond_functions;
+
+/// @brief Adds primitive, which works like "if".
+///
+/// @details
+/// @n   Applies comparision between 2 inputs.
+/// @n   Compare data - sizes of that input specifes the range of the comparison.
+/// @n   Offset - offset in memory, when comparing values.
+CLDNN_BEGIN_PRIMITIVE_DESC(condition)
+/// @brief An identifier of topology, which will be executed when comparison returns true.
+cldnn_topology topology_true;
+/// @brief An identifier of topology, which will be executed when comparison returns false.
+cldnn_topology topology_false;
+/// @brief An identifier of primitive which contains compare values.
+cldnn_primitive_id compare_data;
+/// @brief Used function during comparison.
+cldnn_cond_functions function;
+/// @brief Offset for compare data.
+cldnn_tensor offset;
+
+CLDNN_END_PRIMITIVE_DESC(condition)
+CLDNN_DECLARE_PRIMITIVE_TYPE_ID(condition);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif // CONDITION_H
diff --git a/inference-engine/thirdparty/clDNN/api/C/contract.h b/inference-engine/thirdparty/clDNN/api/C/contract.h
new file mode 100644
index 000000000..9e12cb8e2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/C/contract.h
@@ -0,0 +1,89 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef CONTRACT_H
+#define CONTRACT_H
+
+#include "cldnn.h"
+
+
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    /// @brief Select reduction operation for contract layer ( @CLDNN_PRIMITIVE_DESC{contract} ?).
+    typedef enum /*:int32_t*/
+    {
+        /// @brief Sum reduction.
+        cldnn_contract_sum,
+        /// @brief Product reduction.
+        cldnn_contract_product,
+        /// @brief All reduction.
+        cldnn_contract_all,
+        /// @brief Any reduction.
+        cldnn_contract_any,
+        /// @brief Max reduction.
+        cldnn_contract_max
+    } cldnn_contract_mode;
+
+    /// @brief Reduces input with an operation defined by @p mode along defined
+    ///        by @p reduction_axes dimensions.
+    ///
+    /// @details Reduces the input using the binary operation determined by
+    ///          @p mode. The @p reduction_axes determine the final shape of the
+    ///          output, which is calculated based on the input shape by
+    ///          collapsing the dimensions along which the reduction happens.
+    ///          For example, for the input with
+    /// @n      <tt>input_sizes = (in_b, in_f, in_y, in_x)</tt>
+    /// @n a reduction with
+    /// @n      <tt>reduction_axes = (2)</tt>
+    /// @n would collapse the Y dimension, producing
+    /// @n      <tt>output_shape = (1, in_b, in_f, in_x)</tt>
+    /// @n where every element is a @p mode reduction of the input elements with
+    /// @n the same B, F and X coordinates.
+    /// @n
+    /// @n@b Requirements:
+    /// @n - @p reduction_axes size (dimensions count) must be within (inclusive) range
+    ///      1 - 4.
+    /// @n - @p reduction_axes mustn't have duplicate values.
+    /// @n - Values of @p reduction_axes must be within (inclusive) range 0 - 3
+    /// @n Breaking any of these conditions will raise an exception.
+    CLDNN_BEGIN_PRIMITIVE_DESC(contract)
+    /// @brief Reduction mode. See #cldnn_contract_mode.
+    int32_t mode; /*cldnn_contract_mode*/
+    /// @brief Array of axes positions from input shape (0-based, from left to right)
+    ///        along which reduction should happen.
+    cldnn_uint16_t_arr reduction_axes;
+
+    CLDNN_END_PRIMITIVE_DESC(contract)
+
+
+        CLDNN_DECLARE_PRIMITIVE_TYPE_ID(contract);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif // CONTRACT_H
diff --git a/inference-engine/thirdparty/clDNN/api/C/convolution.h b/inference-engine/thirdparty/clDNN/api/C/convolution.h
index 4be5c23d3..bd79ed266 100644
--- a/inference-engine/thirdparty/clDNN/api/C/convolution.h
+++ b/inference-engine/thirdparty/clDNN/api/C/convolution.h
@@ -64,6 +64,12 @@ cldnn_primitive_id_arr output_calibration_factors;
 float input_quantization_factor;
 /// @brief Output quantization factor
 float output_quantization_factor;
+/// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1.
+uint32_t groups;
+/// @param padding_above Defines a padding added to input image on left (x axis) and top (y axis).
+cldnn_tensor padding_above;
+/// @param padding_below Defines a padding added to input image on right (x axis) and bottom (y axis).
+cldnn_tensor padding_below;
 
 CLDNN_END_PRIMITIVE_DESC(convolution)
 
diff --git a/inference-engine/thirdparty/clDNN/api/C/convolution_grad_weights.h b/inference-engine/thirdparty/clDNN/api/C/convolution_grad_weights.h
index aacd8ff89..ebf783ba3 100644
--- a/inference-engine/thirdparty/clDNN/api/C/convolution_grad_weights.h
+++ b/inference-engine/thirdparty/clDNN/api/C/convolution_grad_weights.h
@@ -18,6 +18,7 @@
 #ifndef CONVOLUTION_GRAD_WEIGHTS_H
 #define CONVOLUTION_GRAD_WEIGHTS_H
 
+#include <stdbool.h>
 #include "cldnn.h"
 /// @addtogroup c_api C API
 /// @{
@@ -54,6 +55,9 @@ cldnn_primitive_id conv_grad;
 cldnn_primitive_id_arr prev_weights_grad;
 /// @brief Array of primitive ids containing bias gradient data calculated in previous iteration. Amount of primitives and their memory sizes should be same as biases.
 cldnn_primitive_id_arr prev_bias_grad;
+/// @brief Should primitive give weights gradient (delta) as an output
+bool output_grad_w;
+
 CLDNN_END_PRIMITIVE_DESC(convolution_grad_weights)
 
 CLDNN_DECLARE_PRIMITIVE_TYPE_ID(convolution_grad_weights);
diff --git a/inference-engine/thirdparty/clDNN/api/C/crop.h b/inference-engine/thirdparty/clDNN/api/C/crop.h
index fd977f0fd..caa7bf1cc 100644
--- a/inference-engine/thirdparty/clDNN/api/C/crop.h
+++ b/inference-engine/thirdparty/clDNN/api/C/crop.h
@@ -31,7 +31,8 @@ extern "C" {
 #endif
 
 /// @brief Performs crop operation on input.
-/// @details Crops the input to the shape of reference_input accross all dimensions taking into account specified input offsets.
+/// @details Crops the input to the shape of reference_input across all dimensions taking into account specified input offsets.
+/// @n       Borders variant calculated output shape from input shape minus the specified borders.
 /// @n
 /// @n\b Examples
 /// @n Crop without offset example:
@@ -39,17 +40,24 @@ extern "C" {
 /// @n Crop with offset example:
 /// \image html crop_w_offset.jpg
 /// @n
-/// @n\b Requirements 
-/// @n - Input, reference and offset layout (order) has to be the same
+/// @n\b Requirements (reference size variant)
 /// @n - Input size cannot be greater than reference size in any dimension
 /// @n - All sizes have to have positive numbers
 /// @n - Reference size plus offset cannot exceed input size
-/// @n Breaking any of this conditions will cause exeption throw.
-
+/// @n
+/// @n\b Requirements (borders variant)
+/// @n - Borders support batch, feature and spatial dimensions (rest of dimensions ignored).
+/// @n - Input size cannot be greater than reference size in any dimension
+/// @n - All sizes specified in borders have to have non-negative values (positive or @c 0).
+/// @n - Sum of sizes of opposite borders must be lower than input size (on all non-ignored dimensions).
+/// @n
+/// @n Breaking any of this conditions will cause exception throw.
 CLDNN_BEGIN_PRIMITIVE_DESC(crop)
-/// @brief Reference input tensor with the required dimensions.
+/// @brief Reference input tensor with the required dimensions (if positive) or
+///        negated value of right/bottom/upper border size (if non-positive).
 cldnn_tensor reference_input;
-/// @brief Input offsets.
+/// @brief Input offsets (reference_input is positive) or left/top/lower border
+///        size (reference_input is negative).
 cldnn_tensor offsets;
 CLDNN_END_PRIMITIVE_DESC(crop)
 
diff --git a/inference-engine/thirdparty/clDNN/api/C/deconvolution.h b/inference-engine/thirdparty/clDNN/api/C/deconvolution.h
index dd1b8e512..a1f034768 100644
--- a/inference-engine/thirdparty/clDNN/api/C/deconvolution.h
+++ b/inference-engine/thirdparty/clDNN/api/C/deconvolution.h
@@ -54,6 +54,8 @@ cldnn_primitive_id_arr weights;
 cldnn_primitive_id_arr bias;
 /// @brief Indicates that deconvolution is used for convolution backward computation (convolution_grad_input)
 uint32_t gradient;
+/// @brief Number of feature groups (grouped deconvolution). If more than 1 then weights/bias count needs to be 1.
+uint32_t groups;
 CLDNN_END_PRIMITIVE_DESC(deconvolution)
 
 CLDNN_DECLARE_PRIMITIVE_TYPE_ID(deconvolution);
diff --git a/inference-engine/thirdparty/clDNN/api/C/depth_to_space.h b/inference-engine/thirdparty/clDNN/api/C/depth_to_space.h
new file mode 100644
index 000000000..64e579e8d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/C/depth_to_space.h
@@ -0,0 +1,49 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef DEPTH_TO_SPACE_H
+#define DEPTH_TO_SPACE_H
+
+#include "cldnn.h"
+
+
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+CLDNN_BEGIN_PRIMITIVE_DESC(depth_to_space)
+/// @brief Size of spatial block in the output tensor. Should be >= 2.
+size_t block_size;
+CLDNN_END_PRIMITIVE_DESC(depth_to_space)
+
+CLDNN_DECLARE_PRIMITIVE_TYPE_ID(depth_to_space);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif // DEPTH_TO_SPACE_H
diff --git a/inference-engine/thirdparty/clDNN/api/C/detection_output.h b/inference-engine/thirdparty/clDNN/api/C/detection_output.h
index 38d71d5cb..82e1d0319 100644
--- a/inference-engine/thirdparty/clDNN/api/C/detection_output.h
+++ b/inference-engine/thirdparty/clDNN/api/C/detection_output.h
@@ -40,7 +40,7 @@ typedef enum /*:int32_t*/
 
 /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
 /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
-/// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1. 
+/// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
 CLDNN_BEGIN_PRIMITIVE_DESC(detection_output)
 /// @brief Number of classes to be predicted.
 uint32_t num_classes;
@@ -74,8 +74,10 @@ int32_t input_width;
 int32_t input_height;
 /// @brief Decrease label id to skip background label equal to 0. Can't be used simultaneously with background_label_id.
 int32_t decrease_label_id;
-/// @brief Clip decoded boxes
-int32_t clip;
+/// @brief Clip decoded boxes right after decoding
+int32_t clip_before_nms;
+/// @brief Clip decoded boxes after nms step
+int32_t clip_after_nms;
 CLDNN_END_PRIMITIVE_DESC(detection_output)
 
 CLDNN_DECLARE_PRIMITIVE_TYPE_ID(detection_output);
diff --git a/inference-engine/thirdparty/clDNN/api/C/detection_output_sort.h b/inference-engine/thirdparty/clDNN/api/C/detection_output_sort.h
new file mode 100644
index 000000000..b1e5f3854
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/C/detection_output_sort.h
@@ -0,0 +1,60 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef DETECTION_OUTPUT_SORT_H
+#define DETECTION_OUTPUT_SORT_H
+
+#include "cldnn.h"
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
+    /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
+    /// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1. 
+    CLDNN_BEGIN_PRIMITIVE_DESC(detection_output_sort)
+        /// @brief Number of classes to be predicted.
+        uint32_t num_classes;
+    /// @brief Number of classes to be predicted.
+    uint32_t num_images;
+    /// @brief Number of total bounding boxes to be kept per image after NMS step.
+    uint32_t keep_top_k;
+    /// @brief If true, bounding box are shared among different classes.
+    uint32_t share_location;
+    /// @brief Maximum number of results to be kept in NMS.
+    int top_k;
+    /// @brief Background label id (-1 if there is no background class).
+    int background_label_id;
+    CLDNN_END_PRIMITIVE_DESC(detection_output_sort)
+
+    CLDNN_DECLARE_PRIMITIVE_TYPE_ID(detection_output_sort);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif /* DETECTION_OUTPUT_SORT_H */
diff --git a/inference-engine/thirdparty/clDNN/api/C/eltwise.h b/inference-engine/thirdparty/clDNN/api/C/eltwise.h
index 1668fddbc..e0f8a796e 100644
--- a/inference-engine/thirdparty/clDNN/api/C/eltwise.h
+++ b/inference-engine/thirdparty/clDNN/api/C/eltwise.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -48,13 +48,34 @@ typedef enum /*:int32_t*/
     /// @brief Eltwise pow.
     cldnn_eltwise_pow,
     /// @brief Eltwise mod.
-    cldnn_eltwise_mod
+    cldnn_eltwise_mod,
+    /// @brief Eltwise equal.
+    cldnn_eltwise_eq,
+    /// @brief Eltwise not equal.
+    cldnn_eltwise_ne,
+    /// @brief Eltwise less.
+    cldnn_eltwise_lt,
+    /// @brief Eltwise less of equal.
+    cldnn_eltwise_le,
+    /// @brief Eltwise greater.
+    cldnn_eltwise_gt,
+    /// @brief Eltwise greater or equal.
+    cldnn_eltwise_ge,
+    /// @brief Eltwise and.
+    cldnn_eltwise_and,
+    /// @brief Eltwise or.
+    cldnn_eltwise_or,
+    /// @brief Eltwise xor.
+    cldnn_eltwise_xor,
+    /// @brief Eltwise squared diff.
+    cldnn_eltwise_squared_diff
 } cldnn_eltwise_mode;
 
 /// @brief Performs elementwise operations (sum, subtract, max or product) on two input primitives
 /// Also supports built-in Relu @CLDNN_PRIMITIVE_DESC{activation} available by setting it in arguments.
 /// @notes
-/// - both inputs have to have equal sizes in all dimensions
+/// - both inputs have to have equal sizes in all dimensions or the input tensors are broadcastable
+///   to the same shape in which the size of each dimention is a max. of input sizes on this dimension)
 /// - format of both inputs has to be the same
 /// - when using integer types, only following eltwise modes are supported: sum, sub, prod, div
 CLDNN_BEGIN_PRIMITIVE_DESC(eltwise)
@@ -70,6 +91,9 @@ cldnn_float_arr coefficients;
 uint32_t with_activation;
 /// @brief Relu activation slope.
 float activation_negative_slope;
+/// @brief Defines shift in input buffers between adjacent calculations of output values.
+cldnn_tensor_arr stride;
+
 CLDNN_END_PRIMITIVE_DESC(eltwise)
 
 CLDNN_DECLARE_PRIMITIVE_TYPE_ID(eltwise);
diff --git a/inference-engine/thirdparty/clDNN/api/C/gather.h b/inference-engine/thirdparty/clDNN/api/C/gather.h
new file mode 100644
index 000000000..5457b0c17
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/C/gather.h
@@ -0,0 +1,58 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef GATHER_H
+#define GATHER_H
+
+#include "cldnn.h"
+
+
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+typedef enum
+{
+    cldnn_gather_along_b = 0,
+    cldnn_gather_along_f = CLDNN_TENSOR_BATCH_DIM_MAX,
+    cldnn_gather_along_x = CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX,
+    cldnn_gather_along_y = cldnn_gather_along_x + 1
+} cldnn_gather_axis;
+
+CLDNN_BEGIN_PRIMITIVE_DESC(gather)
+/// @brief Gathering axis;
+cldnn_gather_axis axis;
+/// @brief Output shape
+cldnn_tensor output_shape;
+CLDNN_END_PRIMITIVE_DESC(gather)
+
+CLDNN_DECLARE_PRIMITIVE_TYPE_ID(gather);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif // GATHER_H
diff --git a/inference-engine/thirdparty/clDNN/api/C/gemm.h b/inference-engine/thirdparty/clDNN/api/C/gemm.h
index 71690363d..f0311db6f 100644
--- a/inference-engine/thirdparty/clDNN/api/C/gemm.h
+++ b/inference-engine/thirdparty/clDNN/api/C/gemm.h
@@ -34,12 +34,6 @@ extern "C" {
 /// @brief Performs forward attention layer.
 
 CLDNN_BEGIN_PRIMITIVE_DESC(gemm)
-/// @brief Primitive id containing first matrix
-cldnn_primitive_id input1;
-/// @brief Primitive id containing second matrix
-cldnn_primitive_id input2;
-/// @brief Primitive id containing output matrix bias
-cldnn_primitive_id input3;
 /// @brief Variable containing ALPHA parameter
 float alpha;
 /// @brief Variable containing BETA parameter
@@ -48,9 +42,6 @@ float beta;
 bool transpose_input1;
 /// @brief Flag for transposing second input matrix
 bool transpose_input2;
-// NOT SUPPORTED YET
-// /// @brief The sequence output for the hidden. This is not clearly specified in the ONNX definition.
-// uint32_t output_sequence;
 CLDNN_END_PRIMITIVE_DESC(gemm)
 
 CLDNN_DECLARE_PRIMITIVE_TYPE_ID(gemm);
diff --git a/inference-engine/thirdparty/clDNN/api/C/index_select.h b/inference-engine/thirdparty/clDNN/api/C/index_select.h
index 907217fc8..d7e1388fd 100644
--- a/inference-engine/thirdparty/clDNN/api/C/index_select.h
+++ b/inference-engine/thirdparty/clDNN/api/C/index_select.h
@@ -18,6 +18,7 @@
 
 #include "cldnn.h"
 
+#include <stdbool.h>
 
 /// @addtogroup c_api C API
 /// @{
@@ -30,15 +31,6 @@
 extern "C" {
 #endif
 
-/// @brief Axis which index_select primitive will index.
-typedef enum /*:int32_t*/
-{
-    cldnn_along_b,
-    cldnn_along_f,
-    cldnn_along_x,
-    cldnn_along_y,
-} cldnn_index_select_axis;
-
 /// @brief Select index, which will be copied to the output..
 ///
 /// @details Applies index selecting along specified dimension. The indices, which will be copied are specifed by 
@@ -64,8 +56,12 @@ typedef enum /*:int32_t*/
 /// @n Breaking any of this conditions will cause exeption throw.
 CLDNN_BEGIN_PRIMITIVE_DESC(index_select)
 
-/// @brief Axis of index selecting.
-cldnn_index_select_axis axis;
+/// @brief A list of axes of index selecting.
+index_select_axis_name_arr axis;
+/// @brief Number of axes of index selecting.
+int axis_num;
+/// @brief Do index_select in reverse order on axis.
+bool reverse;
 
 CLDNN_END_PRIMITIVE_DESC(index_select)
 
diff --git a/inference-engine/thirdparty/clDNN/api/C/lstm.h b/inference-engine/thirdparty/clDNN/api/C/lstm.h
index 10e8eeaaf..fa68f5123 100644
--- a/inference-engine/thirdparty/clDNN/api/C/lstm.h
+++ b/inference-engine/thirdparty/clDNN/api/C/lstm.h
@@ -31,20 +31,43 @@
 extern "C" {
 #endif
 
+/// @brief Weights orders
+/// @details Specifies the order in which the weights are concatenated.
+/// e.g. [i, o, f, z] : [input, output, forget, block]
+/// ONNX order: iofz
+/// Caffe order: ifoz
+/// pyTorch order: izof
+/// IE order: fizo
 typedef enum /*:int32_t*/
 {
-    cldnn_lstm_offset_order_iofz = 0, // ONNX
-    cldnn_lstm_offset_order_ifoz  // Caffe
+    cldnn_lstm_offset_order_iofz = 0,
+    cldnn_lstm_offset_order_ifoz,
+    cldnn_lstm_offset_order_izof,
+    cldnn_lstm_offset_order_fizo
 } cldnn_lstm_offset_order;
 
+/// @brief LSTM Output selection
+/// @details The current implementation allows the use to select the output
+/// of an LSTM node by specifing any of the following options 
+typedef enum /*:int32_t*/
+{
+    /// output the entire hidden sequence
+    cldnn_lstm_output_sequence = 0,
+    /// output just the last hidden value
+    cldnn_lstm_output_hidden,
+    /// output the last hidden and last cell values
+    cldnn_lstm_output_hidden_cell,
+    /// output the hidden sequence concatenated with the last cell
+    cldnn_lstm_output_sequence_cell
+} cldnn_lstm_output;
 
 /// @brief Performs forward Long Short-Term Memory (LSTM) layer.
-/// @details The current implementation of LSTM supports Peepholes.
-///   it = f(Xt*(Wi^T) + Ht-1*Ri + Pi (.) Ct-1 + Wbi + Rbi)
-///   ft = f(Xt*(Wf^T) + Ht-1*Rf + Pf (.) Ct-1 + Wbf + Rbf)
-///   ct = g(Xt*(Wc^T) + Ht-1*Rc + Wbc + Rbc)
+/// @details The current implementation of LSTM is described the following equations.
+///   it = f(Xt*(Wi^T) + Ht-1*Ri + Wbi)
+///   ft = f(Xt*(Wf^T) + Ht-1*Rf + Wbf)
+///   ct = g(Xt*(Wc^T) + Ht-1*Rc + Wbc)
 ///   Ct = ft (.) Ct-1 + it (.) ct
-///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Po (.) Ct + Wbo + Rbo)
+///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Wbo)
 ///   Ht = ot (.) h(Ct)
 /// Where f = Sigmoid, g = Tanh, and h = Tanh.
 CLDNN_BEGIN_PRIMITIVE_DESC(lstm)
@@ -68,10 +91,11 @@ bool input_forget;
 cldnn_activation_func activations[3];
 /// @brief Optional scaling values used by some activation functions. The values are consumed in the order of activation functions.
 cldnn_activation_additional_params activation_params[3];
+/// @brief Output selection. Default the entire hidden sequence is returned
+cldnn_lstm_output output_selection;
 /// @brief Weights, recurrent weights, and biases order. [iofz] : ONNX, [ifoz] : Caffe
 cldnn_lstm_offset_order offset_order;
 // NOT SUPPORTED YET
-// /// @brief The sequence output for the hidden. This is not clearly specified in the ONNX definition.
 // uint32_t output_sequence;
 CLDNN_END_PRIMITIVE_DESC(lstm)
 
@@ -113,8 +137,9 @@ cldnn_activation_func activations[3];
 cldnn_activation_additional_params activation_params[3];
 /// @brief Weights, recurrent weights, and biases order. [iofz] : ONNX, [ifoz] : Caffe
 cldnn_lstm_offset_order offset_order;
+/// @brief direction default = 0, bidirectional = 1.
+uint32_t direction;
 // NOT SUPPORTED YET
-// /// @brief The sequence output for the hidden. This is not clearly specified in the ONNX definition.
 // uint32_t output_sequence;
 CLDNN_END_PRIMITIVE_DESC(lstm_elt)
 
diff --git a/inference-engine/thirdparty/clDNN/api/C/one_hot.h b/inference-engine/thirdparty/clDNN/api/C/one_hot.h
new file mode 100644
index 000000000..d53cc6c36
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/C/one_hot.h
@@ -0,0 +1,71 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef ONE_HOT_H
+#define ONE_HOT_H
+
+#include "cldnn.h"
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    /// @brief Creates a one-hot encoding of the input.
+    /// @details Creates a one-hot encoding of the input, putting the new one-hot axis in the position
+    /// @n       specified by the @p one_hot_axis input, using the @p shape tensor as size reference.
+    /// @n       The size of @p shape must be appropriate for adding a one-hot axis to input. For example,
+    /// @n      <tt>input_sizes = (1, in_f, in_y, in_x)</tt> 
+    /// @n expanded with 
+    /// @n      <tt>one_hot_axis = 2</tt> 
+    /// @n would insert the one-hot axis in the Y dimension, requiring
+    /// @n      <tt>shape = (in_f, in_y, one-hot_limit, in_x)</tt> 
+    /// @n The output values would then be determined by input as
+    /// @n      <tt>output[f, y, i, x] = (input[0, f, y, x] == i) ? 1 : 0;</tt>
+    /// @n Since determining whether the input is appropriate (that the one-hot axis
+    /// @n has enough space to fully encode all inputs) requires scanning the whole
+    /// @n input, the primitive doesn't check for that, instead producing all-zeros
+    /// @n output axes for inputs below 0 and greater than the limit set by
+    /// @n @p shape.
+    /// @n
+    /// @n\b Requirements
+    /// @n - @p one_hot_axis must be within (inclusive) range 0 - 3.
+    /// @n - @p shape must fit input sizes (see example above).
+    /// @n - input batch size must be equal to 1.
+    /// @n
+    /// @n Breaking any of this conditions will cause exception throw.
+    CLDNN_BEGIN_PRIMITIVE_DESC(one_hot)
+    /// @brief Output size reference.
+    cldnn_tensor shape;
+    /// @brief One-hot axis position in output shape (0-based, from left to right).
+    uint16_t one_hot_axis;
+    CLDNN_END_PRIMITIVE_DESC(one_hot)
+
+        CLDNN_DECLARE_PRIMITIVE_TYPE_ID(one_hot);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif /* ONE_HOT_H */
+
diff --git a/inference-engine/thirdparty/clDNN/api/C/pooling.h b/inference-engine/thirdparty/clDNN/api/C/pooling.h
index a8148fc84..1078a46cc 100644
--- a/inference-engine/thirdparty/clDNN/api/C/pooling.h
+++ b/inference-engine/thirdparty/clDNN/api/C/pooling.h
@@ -52,6 +52,8 @@ CLDNN_BEGIN_PRIMITIVE_DESC(pooling)
 cldnn_primitive_id argmax;
 /// @brief Pooling method. See #cldnn_pooling_mode.
 int32_t mode;
+/// @brief Global pooling (kernel size is equal to the spatial dimension of input tensor)
+int8_t global_pooling;
 /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the pooling window should start calculations.
 cldnn_tensor input_offset;
 /// @brief Defines shift in input buffer between adjacent calculations of output values.
diff --git a/inference-engine/thirdparty/clDNN/api/C/proposal.h b/inference-engine/thirdparty/clDNN/api/C/proposal.h
index c57175924..991cae4e7 100644
--- a/inference-engine/thirdparty/clDNN/api/C/proposal.h
+++ b/inference-engine/thirdparty/clDNN/api/C/proposal.h
@@ -47,8 +47,11 @@ CLDNN_BEGIN_PRIMITIVE_DESC(proposal)
     float box_size_scale;
     uint32_t swap_xy;
     uint32_t initial_clip;
+    uint32_t clip_before_nms;
+    uint32_t clip_after_nms;
     uint32_t round_ratios;
     uint32_t shift_anchors;
+    uint32_t normalize;
 CLDNN_END_PRIMITIVE_DESC(proposal)
 
 CLDNN_DECLARE_PRIMITIVE_TYPE_ID(proposal);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT2.cpp b/inference-engine/thirdparty/clDNN/api/C/pyramid_roi_align.h
index 4ebd2fcb9..e33663ae8 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT2.cpp
+++ b/inference-engine/thirdparty/clDNN/api/C/pyramid_roi_align.h
@@ -1,4 +1,3 @@
-/*
 // Copyright (c) 2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,17 +11,22 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    //SKL GT2
-    void tuning_cache_1912(tuning_data& td)
-    {
-        tuning_cache_1912_B1_B16(td);
-        tuning_cache_1912_B8(td);
-        tuning_cache_1912_B32_B64(td);
-    }
-}
-\ No newline at end of file
+
+#pragma once
+
+#include "cldnn.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    CLDNN_BEGIN_PRIMITIVE_DESC(pyramid_roi_align)
+
+    CLDNN_END_PRIMITIVE_DESC(pyramid_roi_align)
+
+    CLDNN_DECLARE_PRIMITIVE_TYPE_ID(pyramid_roi_align);
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/inference-engine/thirdparty/clDNN/api/C/reorder.h b/inference-engine/thirdparty/clDNN/api/C/reorder.h
index 67c504f35..bfe37a446 100644
--- a/inference-engine/thirdparty/clDNN/api/C/reorder.h
+++ b/inference-engine/thirdparty/clDNN/api/C/reorder.h
@@ -37,8 +37,6 @@ extern "C" {
 CLDNN_BEGIN_PRIMITIVE_DESC(reorder)
 /// @brief Requested memory format.
 cldnn_format_type output_format;
-/// @brief Requested memory data type.
-cldnn_data_type output_data_type;
 /// @brief Primitive id to get mean subtract values. Ignored if subtract_per_featrue is set.
 cldnn_primitive_id mean_subtract;
 /// @brief Array of mean subtract values.
diff --git a/inference-engine/thirdparty/clDNN/api/C/reverse_sequence.h b/inference-engine/thirdparty/clDNN/api/C/reverse_sequence.h
new file mode 100644
index 000000000..7a7ec96b9
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/C/reverse_sequence.h
@@ -0,0 +1,51 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef REVERSE_SEQUENCE_H
+#define REVERSE_SEQUENCE_H
+
+#include "cldnn.h"
+
+
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+CLDNN_BEGIN_PRIMITIVE_DESC(reverse_sequence)
+/// @brief The axis which is partially reversed.
+int32_t seq_axis;
+/// @brief The axis along which reversal is performed.
+int32_t batch_axis;
+CLDNN_END_PRIMITIVE_DESC(reverse_sequence)
+
+CLDNN_DECLARE_PRIMITIVE_TYPE_ID(reverse_sequence);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif // REVERSE_SEQUENCE_H
diff --git a/inference-engine/thirdparty/clDNN/api/C/roi_pooling.h b/inference-engine/thirdparty/clDNN/api/C/roi_pooling.h
index 846d1ee94..7ada955ae 100644
--- a/inference-engine/thirdparty/clDNN/api/C/roi_pooling.h
+++ b/inference-engine/thirdparty/clDNN/api/C/roi_pooling.h
@@ -18,6 +18,7 @@
 #ifndef ROI_POOLING_H
 #define ROI_POOLING_H
 
+#include <stdbool.h>
 #include "cldnn.h"
 /// @addtogroup c_api C API
 /// @{
@@ -34,16 +35,20 @@ extern "C" {
 CLDNN_BEGIN_PRIMITIVE_DESC(roi_pooling)
 /// @brief Pooling method. See #cldnn_pooling_mode.
 int32_t mode;
-
+/// @brief True, if pooling is position sensitive (PSROIPoolng)
+bool position_sensitive;
 /// @brief Output width.
 int pooled_width;
 /// @brief Output height.
 int pooled_height;
+/// @brief Count of sub bins in x spatial dimension
+int spatial_bins_x;
+/// @brief Count of sub bins in y spatial dimension
+int spatial_bins_y;
+/// @brief Output features count (applied for position sensitive case only)
+int output_dim;
 /// @brief Ratio of the coordinates used in RoIs to the width (and height) of the input data.
 float spatial_scale;
-
-/// @brief Group size as defined by PSRoIPooling when > 0, else if 0 means regular RoIPooling.
-int group_sz;
 CLDNN_END_PRIMITIVE_DESC(roi_pooling)
 
 CLDNN_DECLARE_PRIMITIVE_TYPE_ID(roi_pooling);
diff --git a/inference-engine/thirdparty/clDNN/api/C/shuffle_channels.h b/inference-engine/thirdparty/clDNN/api/C/shuffle_channels.h
new file mode 100644
index 000000000..a5a4b07ff
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/C/shuffle_channels.h
@@ -0,0 +1,51 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef SHUFFLE_CHANNELS_H
+#define SHUFFLE_CHANNELS_H
+
+#include "cldnn.h"
+
+
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+CLDNN_BEGIN_PRIMITIVE_DESC(shuffle_channels)
+/// @brief The number of groups to split the channel dimension. This number must evenly divide the channel dimension size.
+int32_t group;
+/// @brief The index of the channel dimension (default is 1).
+int32_t axis;
+CLDNN_END_PRIMITIVE_DESC(shuffle_channels)
+
+CLDNN_DECLARE_PRIMITIVE_TYPE_ID(shuffle_channels);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif // SHUFFLE_CHANNELS_H
diff --git a/inference-engine/thirdparty/clDNN/api/C/strided_slice.h b/inference-engine/thirdparty/clDNN/api/C/strided_slice.h
new file mode 100644
index 000000000..9f6f0813b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/C/strided_slice.h
@@ -0,0 +1,55 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef STRIDED_SLICE_H
+#define STRIDED_SLICE_H
+
+#include "cldnn.h"
+
+
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+CLDNN_BEGIN_PRIMITIVE_DESC(strided_slice)
+/// @brief Array of bits, that provide replace begin[i] to max possible range in that dimension.
+cldnn_uint8_t_arr begin_mask;
+/// @brief Array of bits, that provide replace end[i] to max possible range in that dimension.
+cldnn_uint8_t_arr end_mask;
+/// @brief Array of bits, that provide adding a new length 1 dimension at ith position in the output tensor.
+cldnn_uint8_t_arr new_axis_mask;
+/// @brief Array of bits, that provide shrinks the dimensionality by 1, taking on the value at index begin[i].
+cldnn_uint8_t_arr shrink_axis_mask;
+CLDNN_END_PRIMITIVE_DESC(strided_slice)
+
+CLDNN_DECLARE_PRIMITIVE_TYPE_ID(strided_slice);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif // STRIDED_SLICE_H
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/batch_norm.hpp b/inference-engine/thirdparty/clDNN/api/CPP/batch_norm.hpp
index 393320326..7962d7199 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/batch_norm.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/batch_norm.hpp
@@ -35,7 +35,7 @@ namespace cldnn
 /// 
 /// <b>Algorithm:</b>
 /// @n global stats can be computed as:
-/// @n out[i] = (in[i] - mean[b]) / sqrt(variance[b] + epsilon)
+/// @n out[i] = ( (in[i] - mean[b]) / sqrt(variance[b] + epsilon) ) * scale[b] + shift[b]
 
 struct batch_norm : public primitive_base<batch_norm, CLDNN_PRIMITIVE_DESC(batch_norm)>
 {
@@ -63,6 +63,34 @@ struct batch_norm : public primitive_base<batch_norm, CLDNN_PRIMITIVE_DESC(batch
     {
     }
 
+	/// @brief Constructs batch normalization primitive.
+	/// @param id This primitive id.
+	/// @param input Input primitive id.
+	/// @param mean Primitive id containing mean data.
+	/// @param variance Primitive id containing variance.
+	/// @brief scale Primitive id containing scale.
+	/// @brief shift Primitive id containing shift.
+	/// @param epsilon Epsilon.
+	batch_norm(
+		const primitive_id& id,
+		const primitive_id& input,
+		const primitive_id& mean,
+		const primitive_id& variance,
+		const primitive_id& scale,
+		const primitive_id& shift,
+		float epsilon,
+		const padding& output_padding = padding()
+	)
+		:primitive_base(id, { input }, output_padding)
+		, mean(mean)
+		, variance(variance)
+		, scale(scale)
+		, shift(shift)
+		, inv_variance("")
+		, epsilon(epsilon)
+	{
+	}
+
     /// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
     /// @param id This primitive id.
     /// @param input Input primitive id.
@@ -83,11 +111,69 @@ struct batch_norm : public primitive_base<batch_norm, CLDNN_PRIMITIVE_DESC(batch
     {
     }
 
+	/// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
+	/// @param id This primitive id.
+	/// @param input Input primitive id.
+	/// @brief scale Primitive id containing scale.
+	/// @brief shift Primitive id containing shift.
+	/// @param epsilon Epsilon.
+	/// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
+	batch_norm(
+		const primitive_id& id,
+		const primitive_id& input,
+		float epsilon,
+		const primitive_id& scale,
+		const primitive_id& shift,
+		const primitive_id& inv_variance = "",
+		const padding& output_padding = padding()
+	)
+		:primitive_base(id, { input }, output_padding)
+		, mean("")
+		, variance("")
+		, scale(scale)
+		, shift(shift)
+		, inv_variance(inv_variance)
+		, epsilon(epsilon)
+	{
+	}
+
+	/// @brief Constructs batch normalization primitive with mean and variance calculation (used for training).
+	/// @param id This primitive id.
+	/// @param input Input primitive id.
+	/// @brief scale Primitive id containing scale.
+	/// @brief shift Primitive id containing shift.
+	/// @brief mean_out Primitive id containing mean output.
+	/// @brief variance_out Primitive id containing variance output.
+	/// @param epsilon Epsilon.
+	/// @param inv_variance Primitive id containing inverted variance calculated in this primitive. For inference leave empty.
+	batch_norm(
+		const primitive_id& id,
+		const primitive_id& input,
+		float epsilon,
+		const primitive_id& mean_out,
+		const primitive_id& variance_out,
+		const primitive_id& scale,
+		const primitive_id& shift,
+		const primitive_id& inv_variance = "",
+		const padding& output_padding = padding()
+	)
+		:primitive_base(id, { input }, output_padding)
+		, mean(mean_out)
+		, variance(variance_out)
+		, scale(scale)
+		, shift(shift)
+		, inv_variance(inv_variance)
+		, epsilon(epsilon)
+	{
+	}
+
     /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{batch_norm}
     batch_norm(const dto* dto)
         :primitive_base(dto)
         , mean(dto->mean)
         , variance(dto->variance)
+		, scale(dto->scale)
+		, shift(dto->shift)
         , inv_variance(dto->inv_variance)
         , epsilon(dto->epsilon)
     {
@@ -97,20 +183,36 @@ struct batch_norm : public primitive_base<batch_norm, CLDNN_PRIMITIVE_DESC(batch
     primitive_id mean;
     /// @brief Primitive id containing variance.
     primitive_id variance;
+	/// @brief Primitive id containing scale.
+	primitive_id scale;
+	/// @brief Primitive id containing shift.
+	primitive_id shift;
     /// @brief Primitive id containing inverted variance used in future gradient computing.
     primitive_id inv_variance;
     /// @brief Epsilon.
     float epsilon;
 
 protected:
-    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override 
-    { 
-        if (!mean.empty() && !variance.empty())
-            return{ mean, variance };
-        else if (!inv_variance.empty())
-            return{ inv_variance };
-        else
-            return{};
+	std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override
+	{
+		std::vector<std::reference_wrapper<const primitive_id>> deps;
+
+		if (!mean.empty() && !variance.empty())
+		{
+			deps.push_back(mean);
+			deps.push_back(variance);
+		}
+
+		if (!scale.empty() && !shift.empty())
+		{
+			deps.push_back(scale);
+			deps.push_back(shift);
+		}
+
+		if (!inv_variance.empty())
+			deps.push_back(inv_variance);
+
+		return deps;
     }
 
     void update_dto(dto& dto) const override
@@ -118,6 +220,8 @@ protected:
         dto.mean = mean.c_str();
         dto.variance = variance.c_str();
         dto.inv_variance = inv_variance.c_str();
+        dto.scale = scale.c_str();
+        dto.shift = shift.c_str();
         dto.epsilon = epsilon;
     }
 };
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/border.hpp b/inference-engine/thirdparty/clDNN/api/CPP/border.hpp
index 6171b6cbe..862421f1b 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/border.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/border.hpp
@@ -33,18 +33,19 @@ enum class border_type : std::int32_t
 {
     /// @brief All points in the border are set to constant value.
     constant = cldnn_border_constant,
+    zero = cldnn_border_zero,
     /// @brief Border is constructed as an mirror of image (edge is also mirrored).
     /// @details Size of border in any dimension cannot be larger than size of
     ///          input in the same dimension.
     mirror = cldnn_border_mirror,
-    /// @brief Border is constructed as an replication of edge.
-    /// @details Size of border in any dimension cannot be larger than size of
-    ///          input in the same dimension.
-    edge = cldnn_border_edge,
     /// @brief Border is constructed as an mirror of image (edge is NOT mirrored).
     /// @details Size of border in any dimension cannot be larger than size of
     ///          input in the same dimension decreased by @c 1.
-    mirror_101 = cldnn_border_mirror_101
+    mirror_101 = cldnn_border_mirror_101,
+    /// @brief Border is constructed as an replication of edge.
+    /// @details Size of border in any dimension cannot be larger than size of
+    ///          input in the same dimension.
+    edge = cldnn_border_edge
 };
 
 
@@ -80,9 +81,9 @@ struct border : public primitive_base<border, CLDNN_PRIMITIVE_DESC(border)>
     border(
         const primitive_id& id,
         const primitive_id& input,
-        const tensor& left_top_sizes,
-        const tensor& right_bottom_sizes,
-        const border_type type,
+        const tensor& left_top_sizes = { 0, 0, 0, 0 },
+        const tensor& right_bottom_sizes = { 0, 0, 0, 0 },
+        const border_type type = border_type::constant,
         const float border_value = 0.0f,
         const padding& output_padding = padding()
     )
@@ -94,6 +95,28 @@ struct border : public primitive_base<border, CLDNN_PRIMITIVE_DESC(border)>
     {
     }
 
+    /// @brief Constructs border primitive / layer.
+    ///
+    /// @param id                 An identifier of new primitive.
+    /// @param input              An identifier of primitive which is an input for newly created
+    ///                           border primitive.
+    /// @param x_y_sizes          Sizes of border that needs to be added from left and right
+    ///                           (in X dimension) and from top and bottom (in Y dimension).
+    ///                           Created border is simmetric (the same size of border applied
+    ///                           from both sides of input).
+    /// @param type               Type of added border.
+    /// @param output_padding     Optional padding for output from primitive.
+    border(
+        const primitive_id& id,
+        const primitive_id& input,
+        const tensor& x_y_sizes,
+        const border_type type = border_type::constant,
+        const padding& output_padding = padding()
+    )
+        : border(id, input, x_y_sizes, x_y_sizes, type, 0.0f, output_padding)
+    {
+    }
+
     /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{border}
     border(const dto* dto)
         : primitive_base(dto),
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/broadcast.hpp b/inference-engine/thirdparty/clDNN/api/CPP/broadcast.hpp
index 686358f21..cc27d7fa6 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/broadcast.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/broadcast.hpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -28,14 +28,16 @@ namespace cldnn
 /// @addtogroup cpp_primitives Primitives
 /// @{
 
-/// @brief Broadcasts input to specified output size (broadcast size).
+/// @brief Broadcasts input to defined by @p broadcast_sizes output. @p broadcast_axes are used to
+///        reinterpret input (reshape) inside algorithm.
 ///
-/// @details Takes input and copies it to output once or multiple times, until output will
-///          reach the sizes specified in @p broadcast_sizes.
+/// @details Takes input, reinterpret it according to @p broadcast_axes
+///          and copies it to output once or multiple times.
 /// @n
-/// @n Lets assume that:
+/// @n Simple example with empty @p broadcast_axes. Lets assume that:
 /// @n      <tt>input_sizes = (in_b, in_f, in_y, in_x)</tt>
 /// @n      <tt>broadcast_sizes = (bs_b, bs_f, bs_y, bs_x)</tt>
+/// @n      <tt>broadcast_axes = () - empty</tt>
 /// @n The input is broadcasted on each dimension where <tt>bs_{dim} > in_{dim}</tt> and <tt>bs_{dim}</tt>
 ///    is dividable by <tt>in_{dim}</tt> (input is copied <tt>bs_{dim} / in_{dim}</tt> times).
 ///    The dimensions where <tt>bs_{dim}</tt> is equal to <tt>in_{dim}</tt> remain unchanged.
@@ -44,17 +46,28 @@ namespace cldnn
 /// @n      <tt>output[(b, f, y, x)] = input[(b % in_b, f % in_f, y % in_y, x % in_x)]</tt>
 /// @n where <tt>(b, f, y, x)</tt> is a position of value in a primitive output.
 /// @n
+/// @n More complicated example with non empty @p broadcast_axes. Lets assume that:
+/// @n      <tt>broadcast_sizes = (bs_b, bs_f, bs_y, bs_x)</tt>
+/// @n      <tt>broadcast_axes = (2)</tt>
+/// @n Taking into account broadcast_axes size (=1) primitive's input must be (4 - 1 = 3):
+/// @n      <tt>primitive input = (1, in_b, in_f, in_x)</tt>
+/// @n Due to broadcast_axes = (2) primitive will interpret input as:
+/// @n      <tt>primitive input(internal representation) = (in_b, in_f, 1, in_x)</tt>
+/// @n Now, you can apply broadcast rules from previous example to modified (reinterpreted)
+///    input and output:
+/// @n      <tt>input_sizes = (in_b, in_f, 1, in_x)</tt>
+/// @n      <tt>output_shape = (bs_b, bs_f, bs_y, bs_x)</tt>
+/// @n      <tt>broadcast_axes = () - empty</tt>
+/// @n
 /// @n@b Requirements:
-/// @n - @p broadcast_sizes must be positive on all dimensions and compatible
-///      with size of input (describe the same dimensions).
-/// @n - @p broadcast_sizes must be greater than or equal to input sizes on
-///      all dimensions. (For any dimension, if @p broadcast_sizes is lower
-///      than input size on the dimension then @p broadcast_sizes will be replaced
-///      by input size on this dimension.)
-/// @n - For any dimension, if @p broadcast_sizes is greater than input size on
-///      the dimension then @p broadcast_sizes must be dividable by input size
-///      on this dimension.
-/// @n Breaking any of these conditions will raise an exeption.
+/// @n - @p broadcast_sizes must be positive on all dimensions.
+/// @n - @p broadcast_axes size (dimensions count) must be within (inclusive) range
+///      0 - 4.
+/// @n - @p broadcast_axes mustn't have duplicate values.
+/// @n - Values of @p broadcast_axes must be within (inclusive) range 0 - 3
+/// @n - @p output_shape must be greater (dividable) than or equal to reinterpreted
+///      input on all dimensions.
+/// @n Breaking any of these conditions will raise an exception.
 struct broadcast : public primitive_base<broadcast, CLDNN_PRIMITIVE_DESC(broadcast)>
 {
     CLDNN_DECLARE_PRIMITIVE(broadcast)
@@ -66,34 +79,45 @@ struct broadcast : public primitive_base<broadcast, CLDNN_PRIMITIVE_DESC(broadca
     ///                        broadcast primitive.
     /// @param broadcast_sizes Sizes of broadcast. Output size of current primitive
     ///                        will match broadcast sizes (layout type will not change).
-    ///                        If @p broadcast_sizes are not specified, the input sizes
-    ///                        are used as @p broadcast_sizes.
+    /// @param broadcast_axes  Axes positions (0-based, from left to right) in output_shape
+    ///                        that are being broadcast. Values of broadcast_axes on remaining
+    ///                        axes must be greater (dividable) or equal to corresponding input
+    ///                        dimension values.
     /// @param output_padding  Optional padding for output from primitive.
     broadcast(
         const primitive_id& id,
         const primitive_id& input,
-        const tensor& broadcast_sizes = {0, 0, 0, 0},
+        const tensor& broadcast_sizes,
+        const std::vector<uint16_t>& broadcast_axes = {},
         const padding& output_padding = padding()
     )
         : primitive_base(id, {input}, output_padding),
-          broadcast_sizes(broadcast_sizes)
+          broadcast_sizes(broadcast_sizes),
+          broadcast_axes(broadcast_axes)
     {
     }
 
     /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{broadcast}
     broadcast(const dto* dto)
         : primitive_base(dto),
-          broadcast_sizes(dto->broadcast_sizes)
+          broadcast_sizes(dto->broadcast_sizes),
+          broadcast_axes(uint16_t_arr_to_vector(dto->broadcast_axes))
+
     {
     }
 
     /// @brief Expected sizes of output from broadcast primitive.
     tensor broadcast_sizes;
+    /// @brief Array of axes positions from output shape (0-based, from left to right)
+    ///        along which broadcast should happen.
+    std::vector<uint16_t> broadcast_axes;
 
 protected:
     void update_dto(dto& dto) const override
     {
         dto.broadcast_sizes = broadcast_sizes;
+        dto.broadcast_axes = uint16_t_vector_to_arr(broadcast_axes);
+
     }
 };
 /// @}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/cldnn_defs.h b/inference-engine/thirdparty/clDNN/api/CPP/cldnn_defs.h
index 7e82d2c42..7281bd3d6 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/cldnn_defs.h
+++ b/inference-engine/thirdparty/clDNN/api/CPP/cldnn_defs.h
@@ -349,6 +349,18 @@ inline std::vector<uint16_t> uint16_t_arr_to_vector(const cldnn_uint16_t_arr& ar
     return result;
 }
 
+///
+/// \brief Converts C API uint8_t array to std::vector<uint8_t>
+///
+inline std::vector<uint8_t> uint8_t_arr_to_vector(const cldnn_uint8_t_arr& arr)
+{
+    std::vector<uint8_t> result(arr.size);
+    for (size_t i = 0; i < arr.size; i++)
+    {
+        result[i] = arr.data[i];
+    }
+    return result;
+}
 
 ///
 /// \brief Converts std::vector<float> to C API float_array
@@ -367,6 +379,14 @@ inline cldnn_uint16_t_arr uint16_t_vector_to_arr(const std::vector<uint16_t>& st
 }
 
 ///
+/// \brief Converts std::vector<uint8_t> to C API uint8_t array
+///
+inline cldnn_uint8_t_arr uint8_t_vector_to_arr(const std::vector<uint8_t>& stor)
+{
+    return{ stor.data(), stor.size() };
+}
+
+///
 /// \brief Converts std::vector<tensor> to C API tensor_array
 ///
 inline cldnn_tensor_arr tensor_vector_to_arr(const std::vector<cldnn_tensor>& stor)
@@ -374,6 +394,18 @@ inline cldnn_tensor_arr tensor_vector_to_arr(const std::vector<cldnn_tensor>& st
     return cldnn_tensor_arr{ stor.data(), stor.size() };
 }
 
+///
+/// \brief Converts C API tensor_array to std::vector of C API tensor
+///
+inline std::vector<cldnn_tensor> tensor_arr_to_cldnn_vector(const cldnn_tensor_arr& arr)
+{
+    std::vector<cldnn_tensor> result(arr.size);
+    for (size_t i = 0; i < arr.size; i++)
+        result[i] = arr.data[i];
+
+    return result;
+}
+
 /// @}
 
 /// @endcond
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/condition.hpp b/inference-engine/thirdparty/clDNN/api/CPP/condition.hpp
new file mode 100644
index 000000000..0ad6c3e64
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/CPP/condition.hpp
@@ -0,0 +1,119 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "../C/condition.h"
+#include "primitive.hpp"
+#include "topology.hpp"
+
+namespace cldnn
+{
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+/// @brief Function, which will be used during comparison.
+enum cond_functions : int32_t
+{
+    EQUAL,
+    GREATER,
+    LESS
+};
+
+/// @brief Adds primitive, which works like "if".
+///
+/// @details
+/// @n   Applies comparision between 2 inputs.
+/// @n   Compare data - sizes of that input specifes the range of the comparison.
+/// @n   Offset - offset in memory, when comparing values.
+struct condition : public primitive_base<condition, CLDNN_PRIMITIVE_DESC(condition)>
+{
+    CLDNN_DECLARE_PRIMITIVE(condition)
+
+        /// @brief Constructs condition primitive / layer.
+        ///
+        /// @param id                 An identifier of new primitive.
+        /// @param input              An identifier of primitive which is an input for newly created
+        ///                           condition primitive.
+        /// @param topology_true      Topolgoy containg primitives, which will be executed when comparsion results  
+        ///                           true.
+        /// @param topology_false     Topolgoy containg primitives, which will be executed when comparsion results  
+        ///                           false..
+        /// @param compare_Data       An identifier of primitive which contains compare values
+        /// @param func               Used function during comparison.
+        /// @param offseg             Offset for compare data.
+        /// @param output_padding     Optional padding for output from primitive.
+        condition(
+            const primitive_id& id,
+            const primitive_id& input,
+            const topology& topology_true,
+            const topology& topology_false,
+            const primitive_id& compare_data,
+            const cond_functions& func,
+            const tensor& offset = { 0, 0, 0, 0 },
+            const padding& output_padding = padding()
+        )
+        : primitive_base(id, { input }, output_padding)
+        , topology_true(topology_true)
+        , topology_false(topology_false)
+        , compare_data(compare_data)
+        , function(func)
+        , offset(offset)
+    {}
+
+
+    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{condition}
+    condition(const dto* dto)
+        : primitive_base(dto)
+        , topology_true(dto->topology_true)
+        , topology_false(dto->topology_false)
+        , compare_data(dto->compare_data)
+        , function(static_cast<cond_functions>(dto->function))
+        , offset(dto->offset)
+    {}
+
+
+    /// @brief An identifier of topology, which will be executed when comparison returns true.
+    topology topology_true;
+    /// @brief An identifier of topology, which will be executed when comparison returns false.
+    topology topology_false;
+    /// @brief An identifier of primitive which contains compare values.
+    primitive_id compare_data;
+    /// @brief Used function during comparison.
+    cond_functions function;
+    /// @brief Offset for compare data.
+    tensor offset;
+protected:
+    void update_dto(dto& dto) const override
+    {
+        dto.compare_data = compare_data.c_str();
+        dto.function = static_cast<cldnn_cond_functions>(function);
+        dto.offset = offset;
+        dto.topology_true = topology_true.get();
+        dto.topology_false = topology_false.get();
+    }
+
+    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override
+    {
+        return { compare_data };
+    }
+};
+}
+/// @}
+/// @}
+/// @}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/contract.hpp b/inference-engine/thirdparty/clDNN/api/CPP/contract.hpp
new file mode 100644
index 000000000..9ce79cabf
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/CPP/contract.hpp
@@ -0,0 +1,119 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "../C/contract.h"
+#include "primitive.hpp"
+
+
+namespace cldnn
+{
+    /// @addtogroup cpp_api C++ API
+    /// @{
+    /// @addtogroup cpp_topology Network Topology
+    /// @{
+    /// @addtogroup cpp_primitives Primitives
+    /// @{
+
+    /// @brief Select mode for the @ref contract layer.
+    enum class contract_mode : int32_t
+    {
+        /// @brief Sum reduction.
+        sum = cldnn_contract_sum,
+        /// @brief Product reduction.
+        prod = cldnn_contract_product,
+        /// @brief All reduction.
+        all = cldnn_contract_all,
+        /// @brief Any reduction.
+        any = cldnn_contract_any,
+        /// @brief Max reduction.
+        max = cldnn_contract_max
+    };
+
+    /// @brief Reduces input with an operation defined by @p mode along defined
+    ///        by @p reduction_axes dimensions.
+    ///
+    /// @details Reduces the input using the binary operation determined by
+    ///          @p mode. The @p reduction_axes determine the final shape of the
+    ///          output, which is calculated based on the input shape by
+    ///          collapsing the dimensions along which the reduction happens.
+    ///          For example, for the input with
+    /// @n      <tt>input_sizes = (in_b, in_f, in_y, in_x)</tt>
+    /// @n a reduction with
+    /// @n      <tt>reduction_axes = (2)</tt>
+    /// @n would collapse the Y dimension, producing
+    /// @n      <tt>output_shape = (1, in_b, in_f, in_x)</tt>
+    /// @n where every element is a @p mode reduction of the input elements with
+    /// @n the same B, F and X coordinates.
+    /// @n
+    /// @n@b Requirements:
+    /// @n - @p reduction_axes size (dimensions count) must be within (inclusive) range
+    ///      1 - 4.
+    /// @n - @p reduction_axes mustn't have duplicate values.
+    /// @n - Values of @p reduction_axes must be within (inclusive) range 0 - 3
+    /// @n Breaking any of these conditions will raise an exception.
+    struct contract : public primitive_base<contract, CLDNN_PRIMITIVE_DESC(contract)>
+    {
+        CLDNN_DECLARE_PRIMITIVE(contract)
+
+            /// @brief Constructs contract primitive / layer.
+            ///
+            /// @param id              An identifier of new primitive.
+            /// @param input           An identifier of primitive which is an input for newly created
+            ///                        contract primitive.
+            /// @param mode            Reduction mode.
+            /// @param reduction_axes  Axes positions (0-based, from left to right) in input_shape
+            ///                        that are being reduced.
+            /// @param output_padding  Optional padding for output from primitive.
+            contract(
+                const primitive_id& id,
+                const primitive_id& input,
+                contract_mode mode,
+                const std::vector<uint16_t>& reduction_axes = {},
+                const padding& output_padding = padding()
+            )
+            : primitive_base(id, { input }, output_padding),
+            mode(mode),
+            reduction_axes(reduction_axes)
+        {
+        }
+
+        /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{contract}
+        contract(const dto* dto)
+            : primitive_base(dto),
+            mode(static_cast<contract_mode>(dto->mode)),
+            reduction_axes(uint16_t_arr_to_vector(dto->reduction_axes))
+
+        {
+        }
+
+        /// @param mode Contract mode.
+        contract_mode mode;
+        /// @brief Array of axes positions from input shape (0-based, from left to right)
+        ///        along which reduction should happen.
+        std::vector<uint16_t> reduction_axes;
+
+    protected:
+        void update_dto(dto& dto) const override
+        {
+            dto.mode = static_cast<cldnn_contract_mode>(mode);
+            dto.reduction_axes = uint16_t_vector_to_arr(reduction_axes);
+        }
+    };
+    /// @}
+    /// @}
+    /// @}
+}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/convolution.hpp b/inference-engine/thirdparty/clDNN/api/CPP/convolution.hpp
index 8efecd89b..a8ae6037a 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/convolution.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/convolution.hpp
@@ -72,6 +72,9 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
         , with_output_size(false)
+        , groups(1)
+        , padding_above(tensor(0, 0, 0, 0))
+        , padding_below(tensor(0, 0, 0, 0))
         , _weights(weights)
         , _bias(bias)
         , _weights_quantization_factors(std::vector<primitive_id>(0))
@@ -81,6 +84,217 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
             throw std::runtime_error("convolution's weights/bias count does not match");
     }
 
+    /// @brief Constructs convolution primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param bias List of primitive ids containing bias data.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param dilation Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
+    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+    /// @param padding_above Defines a padding added to input image on left (x axis) and top (y axis).
+    /// @param padding_below Defines a padding added to input image on right (x axis) and bottom (y axis).
+    /// @param with_activation Enable Relu activation.
+    /// @param activation_slp Relu activation slope.
+    convolution(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        const std::vector<primitive_id>& bias,
+        tensor stride,
+        tensor input_offset,
+        tensor dilation,
+        tensor padding_above,
+        tensor padding_below,
+        bool with_activation = false,
+        float activation_slp = 0.0f,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , weights_quantization_factors(_weights_quantization_factors.cpp_ids)
+        , output_calibration_factors(_output_calibration_factors.cpp_ids)
+        , input_quantization_factor(1.0f)
+        , output_quantization_factor(1.0f)
+        , input_offset(input_offset)
+        , stride(stride)
+        , dilation(dilation)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(false)
+        , groups(1)
+        , padding_above(padding_above)
+        , padding_below(padding_below)
+        , _weights(weights)
+        , _bias(bias)
+        , _weights_quantization_factors(std::vector<primitive_id>(0))
+        , _output_calibration_factors(std::vector<primitive_id>(0))
+    {
+        if ((bias.size() != 0) && (weights.size() != bias.size()))
+            throw std::runtime_error("convolution's weights/bias count does not match");
+    }
+
+    /// @brief Constructs convolution primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param groups Number of filter groups.
+    /// @param bias List of primitive ids containing bias data.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param dilation Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
+    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+    /// @param padding_above Defines a padding added to input image on left (x axis) and top (y axis).
+    /// @param padding_below Defines a padding added to input image on right (x axis) and bottom (y axis).
+    /// @param with_activation Enable Relu activation.
+    /// @param activation_slp Relu activation slope.
+    convolution(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        const std::vector<primitive_id>& bias,
+        uint32_t groups,
+        tensor stride,
+        tensor input_offset,
+        tensor dilation,
+        tensor padding_above,
+        tensor padding_below,
+        bool with_activation = false,
+        float activation_slp = 0.0f,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , weights_quantization_factors(_weights_quantization_factors.cpp_ids)
+        , output_calibration_factors(_output_calibration_factors.cpp_ids)
+        , input_quantization_factor(1.0f)
+        , output_quantization_factor(1.0f)
+        , input_offset(input_offset)
+        , stride(stride)
+        , dilation(dilation)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(false)
+        , groups(groups)
+        , padding_above(padding_above)
+        , padding_below(padding_below)
+        , _weights(weights)
+        , _bias(bias)
+        , _weights_quantization_factors(std::vector<primitive_id>(0))
+        , _output_calibration_factors(std::vector<primitive_id>(0))
+    {
+        if ((bias.size() != 0) && (weights.size() != bias.size()))
+            throw std::runtime_error("convolution's weights/bias count does not match");
+    }
+
+    /// @brief Constructs convolution primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param groups Number of filter groups.
+    /// @param bias List of primitive ids containing bias data.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param dilation Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
+    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+    /// @param with_activation Enable Relu activation.
+    /// @param activation_slp Relu activation slope.
+    /// @param output_size User-defined output data size of the primitive (w/o padding).
+    convolution(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        const std::vector<primitive_id>& bias,
+        uint32_t groups,
+        tensor stride,
+        tensor input_offset,
+        tensor dilation,
+        bool with_activation,
+        float activation_slp,
+        tensor output_size,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , weights_quantization_factors(_weights_quantization_factors.cpp_ids)
+        , output_calibration_factors(_output_calibration_factors.cpp_ids)
+        , input_quantization_factor(1.0f)
+        , output_quantization_factor(1.0f)
+        , input_offset(input_offset)
+        , stride(stride)
+        , dilation(dilation)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(true)
+        , output_size(output_size)
+        , groups(groups)
+        , padding_above(tensor(0, 0, 0, 0))
+        , padding_below(tensor(0, 0, 0, 0))
+        , _weights(weights)
+        , _bias(bias)
+        , _weights_quantization_factors(std::vector<primitive_id>(0))
+        , _output_calibration_factors(std::vector<primitive_id>(0))
+    {
+        if ((bias.size() != 0) && (weights.size() != bias.size()))
+            throw std::runtime_error("convolution's weights/bias count does not match");
+    }
+
+    /// @brief Constructs convolution primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param groups Number of filter groups.
+    /// @param bias List of primitive ids containing bias data.
+    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param dilation Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
+    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+    /// @param with_activation Enable Relu activation.
+    /// @param activation_slp Relu activation slope.
+    convolution(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        const std::vector<primitive_id>& bias,
+        uint32_t groups,
+        tensor stride = { 1, 1, 1, 1 },
+        tensor input_offset = { 0,0,0,0 },
+        tensor dilation = { 1, 1, 1, 1 },
+        bool with_activation = false,
+        float activation_slp = 0.0f,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , weights_quantization_factors(_weights_quantization_factors.cpp_ids)
+        , output_calibration_factors(_output_calibration_factors.cpp_ids)
+        , input_quantization_factor(1.0f)
+        , output_quantization_factor(1.0f)
+        , input_offset(input_offset)
+        , stride(stride)
+        , dilation(dilation)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(false)
+        , groups(groups)
+        , padding_above(tensor(0, 0, 0, 0))
+        , padding_below(tensor(0, 0, 0, 0))
+        , _weights(weights)
+        , _bias(bias)
+        , _weights_quantization_factors(std::vector<primitive_id>(0))
+        , _output_calibration_factors(std::vector<primitive_id>(0))
+    {
+        if ((bias.size() != 0) && (weights.size() != bias.size()))
+            throw std::runtime_error("convolution's weights/bias count does not match");
+        if ((groups > 1) && ((weights.size() != 1) || ((bias.size() != 0) && (bias.size() != 1))))
+            throw std::runtime_error("grouped convolution's weights/bias count must be 1");
+    }
 
     /// @brief Constructs convolution primitive.
     /// @param id This primitive id.
@@ -125,6 +339,9 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
         , with_output_size(false)
+        , groups(1)
+        , padding_above(tensor(0, 0, 0, 0))
+        , padding_below(tensor(0, 0, 0, 0))
         , _weights(weights)
         , _bias(bias)
         , _weights_quantization_factors(w_quantization_factor)
@@ -180,6 +397,9 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
         , with_output_size(false)
+        , groups(1)
+        , padding_above(tensor(0, 0, 0, 0))
+        , padding_below(tensor(0, 0, 0, 0))
         , _weights(weights)
         , _bias(bias)
         , _weights_quantization_factors(w_quantization_factor)
@@ -227,6 +447,156 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
         , with_output_size(false)
+        , groups(1)
+        , padding_above(tensor(0, 0, 0, 0))
+        , padding_below(tensor(0, 0, 0, 0))
+        , _weights(weights)
+        , _bias(std::vector<primitive_id>(0))
+        , _weights_quantization_factors(std::vector<primitive_id>(0))
+        , _output_calibration_factors(std::vector<primitive_id>(0))
+    {
+    }
+
+    /// @brief Constructs convolution primitive (w/o bias).
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param dilation Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
+    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+    /// @param padding_above Defines a padding added to input image on left (x axis) and top (y axis).
+    /// @param padding_below Defines a padding added to input image on right (x axis) and bottom (y axis).
+    /// @param with_activation Enable Relu activation.
+    /// @param activation_slp Relu activation slope.
+    convolution(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        tensor stride,
+        tensor input_offset,
+        tensor dilation,
+        tensor padding_above,
+        tensor padding_below,
+        bool with_activation = false,
+        float activation_slp = 0.0f,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , weights_quantization_factors(_weights_quantization_factors.cpp_ids)
+        , output_calibration_factors(_output_calibration_factors.cpp_ids)
+        , input_quantization_factor(1.0f)
+        , output_quantization_factor(1.0f)
+        , input_offset(input_offset)
+        , stride(stride)
+        , dilation(dilation)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(false)
+        , groups(1)
+        , padding_above(padding_above)
+        , padding_below(padding_below)
+        , _weights(weights)
+        , _bias(std::vector<primitive_id>(0))
+        , _weights_quantization_factors(std::vector<primitive_id>(0))
+        , _output_calibration_factors(std::vector<primitive_id>(0))
+    {
+    }
+
+    /// @brief Constructs convolution primitive (w/o bias).
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param groups Number of filter groups.
+    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param dilation Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
+    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+    /// @param padding_above Defines a padding added to input image on left (x axis) and top (y axis).
+    /// @param padding_below Defines a padding added to input image on right (x axis) and bottom (y axis).
+    /// @param with_activation Enable Relu activation.
+    /// @param activation_slp Relu activation slope.
+    convolution(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        uint32_t groups,
+        tensor stride,
+        tensor input_offset,
+        tensor dilation,
+        tensor padding_above,
+        tensor padding_below,
+        bool with_activation = false,
+        float activation_slp = 0.0f,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , weights_quantization_factors(_weights_quantization_factors.cpp_ids)
+        , output_calibration_factors(_output_calibration_factors.cpp_ids)
+        , input_quantization_factor(1.0f)
+        , output_quantization_factor(1.0f)
+        , input_offset(input_offset)
+        , stride(stride)
+        , dilation(dilation)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(false)
+        , groups(groups)
+        , padding_above(padding_above)
+        , padding_below(padding_below)
+        , _weights(weights)
+        , _bias(std::vector<primitive_id>(0))
+        , _weights_quantization_factors(std::vector<primitive_id>(0))
+        , _output_calibration_factors(std::vector<primitive_id>(0))
+    {
+    }
+
+    /// @brief Constructs convolution primitive (w/o bias).
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param groups Number of filter groups.
+    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param dilation Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
+    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+    /// @param with_activation Enable Relu activation.
+    /// @param activation_slp Relu activation slope.
+    convolution(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        uint32_t groups,
+        tensor stride = { 1, 1, 1, 1 },
+        tensor input_offset = { 0,0,0,0 },
+        tensor dilation = { 1, 1, 1, 1 },
+        bool with_activation = false,
+        float activation_slp = 0.0f,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , weights_quantization_factors(_weights_quantization_factors.cpp_ids)
+        , output_calibration_factors(_output_calibration_factors.cpp_ids)
+        , input_quantization_factor(1.0f)
+        , output_quantization_factor(1.0f)
+        , input_offset(input_offset)
+        , stride(stride)
+        , dilation(dilation)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(false)
+        , groups(groups)
+        , padding_above(tensor(0, 0, 0, 0))
+        , padding_below(tensor(0, 0, 0, 0))
         , _weights(weights)
         , _bias(std::vector<primitive_id>(0))
         , _weights_quantization_factors(std::vector<primitive_id>(0))
@@ -274,6 +644,9 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
         , activation_negative_slope(activation_slp)
         , with_output_size(true)
         , output_size(output_size)
+        , groups(1)
+        , padding_above(tensor(0, 0, 0, 0))
+        , padding_below(tensor(0, 0, 0, 0))
         , _weights(weights)
         , _bias(bias)
         , _weights_quantization_factors(std::vector<primitive_id>(0))
@@ -321,6 +694,9 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
         , activation_negative_slope(activation_slp)
         , with_output_size(true)
         , output_size(output_size)
+        , groups(1)
+        , padding_above(tensor(0, 0, 0, 0))
+        , padding_below(tensor(0, 0, 0, 0))
         , _weights(weights)
         , _bias(std::vector<primitive_id>(0))
         , _weights_quantization_factors(std::vector<primitive_id>(0))
@@ -344,6 +720,9 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
         , activation_negative_slope(dto->activation_negative_slope)
         , with_output_size(dto->with_output_size != 0)
         , output_size(dto->output_size)
+        , groups(dto->groups)
+        , padding_above(dto->padding_above)
+        , padding_below(dto->padding_below)
         , _weights(dto->weights)
         , _bias(dto->bias)
         , _weights_quantization_factors(dto->weights_quantization_factors)
@@ -443,6 +822,12 @@ struct convolution : public primitive_base<convolution, CLDNN_PRIMITIVE_DESC(con
     bool with_output_size;
     /// @brief User-defined output data size of the primitive (w/o padding).
     tensor output_size;
+    /// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1.
+    uint32_t groups;
+    /// @param padding_above Defines a padding added to input image on left (x axis) and top (y axis).
+    tensor padding_above;
+    /// @param padding_below Defines a padding added to input image on right (x axis) and bottom (y axis).
+    tensor padding_below;
 
     /// @brief On how many cards split the computation to.
     int32_t split() const { return static_cast<int32_t>(weights.size()); }
@@ -484,7 +869,9 @@ protected:
         dto.dilation = dilation;
         dto.with_output_size = with_output_size;
         dto.output_size = output_size;
-        
+        dto.groups = groups;
+        dto.padding_above = padding_above;
+        dto.padding_below = padding_below;
     }
 };
 /// @}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/convolution_grad_weights.hpp b/inference-engine/thirdparty/clDNN/api/CPP/convolution_grad_weights.hpp
index 54c361c38..2b485b86f 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/convolution_grad_weights.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/convolution_grad_weights.hpp
@@ -66,6 +66,7 @@ struct convolution_grad_weights : public primitive_base<convolution_grad_weights
         , stride(stride)
         , input_offset(input_offset)
         , dilation(dilation)
+        , output_grad_w(false)
         , _weights(weights)
         , _bias(bias)
         , _prev_weights_grad(std::vector<primitive_id>(0))
@@ -81,6 +82,7 @@ struct convolution_grad_weights : public primitive_base<convolution_grad_weights
     /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution_grad_weights window should start calculations.
     /// @param dilation Defines dilation size.
     /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param Should primitive give weights gradient (delta) as an output
     /// @param conv_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating. Leave empty if primitive is last in backward pass.
     convolution_grad_weights(
         const primitive_id& id,
@@ -90,6 +92,7 @@ struct convolution_grad_weights : public primitive_base<convolution_grad_weights
         tensor stride = { 1, 1, 1, 1 },
         tensor input_offset = { 0, 0, 0, 0 },
         tensor dilation = { 1, 1, 1, 1 },
+        bool output_grad_w = false,
         const primitive_id& conv_grad = "",
         const padding& output_padding = padding()
     )
@@ -102,6 +105,44 @@ struct convolution_grad_weights : public primitive_base<convolution_grad_weights
         , stride(stride)
         , input_offset(input_offset)
         , dilation(dilation)
+        , output_grad_w(output_grad_w)
+        , _weights(weights)
+        , _bias(std::vector<primitive_id>(0))
+        , _prev_weights_grad(std::vector<primitive_id>(0))
+        , _prev_bias_grad(std::vector<primitive_id>(0))
+    {
+    }
+
+    /// @brief Constructs convolution_grad_weights primitive (w/o bias).
+    /// @param id This primitive id.
+    /// @param input Input gradient primitive id.
+    /// @param input Input primitive id from convolution forward pass.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution_grad_weights window should start calculations.
+    /// @param dilation Defines dilation size.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param conv_grad Id of primitive which uses weights and biases updated in this primitive. This is for correct order of calculating. Leave empty if primitive is last in backward pass.
+    convolution_grad_weights(
+        const primitive_id& id,
+        const primitive_id& input_grad,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        tensor stride,
+        tensor input_offset,
+        tensor dilation,
+        const primitive_id& conv_grad = "",
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input_grad, input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , prev_weights_grad(_prev_weights_grad.cpp_ids)
+        , prev_bias_grad(_prev_bias_grad.cpp_ids)
+        , conv_grad(conv_grad)
+        , stride(stride)
+        , input_offset(input_offset)
+        , dilation(dilation)
+        , output_grad_w(false)
         , _weights(weights)
         , _bias(std::vector<primitive_id>(0))
         , _prev_weights_grad(std::vector<primitive_id>(0))
@@ -144,6 +185,7 @@ struct convolution_grad_weights : public primitive_base<convolution_grad_weights
         , stride(stride)
         , input_offset(input_offset)
         , dilation(dilation)
+        , output_grad_w(false)
         , _weights(weights)
         , _bias(bias)
         , _prev_weights_grad(prev_weights_grad)
@@ -162,6 +204,7 @@ struct convolution_grad_weights : public primitive_base<convolution_grad_weights
         , stride(dto->stride)
         , input_offset(dto->input_offset)
         , dilation(dto->dilation)
+        , output_grad_w(dto->output_grad_w)
         , _weights(dto->weights)
         , _bias(dto->bias)
         , _prev_weights_grad(dto->prev_weights_grad)
@@ -189,6 +232,8 @@ struct convolution_grad_weights : public primitive_base<convolution_grad_weights
     /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1. 
     /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
     tensor dilation;
+    /// @brief Should primitive give weights gradient (delta) as an output
+    bool output_grad_w;
 
     /// @brief On how many cards split the computation to.
     int32_t split() const { return static_cast<int32_t>(weights.size()); }
@@ -226,6 +271,7 @@ protected:
         dto.dilation = dilation;
         dto.split = split();
         dto.stride = stride;
+        dto.output_grad_w = output_grad_w;
         dto.conv_grad = conv_grad.c_str();
         dto.prev_bias_grad = _prev_bias_grad.ref();
         dto.prev_weights_grad = _prev_weights_grad.ref();
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/crop.hpp b/inference-engine/thirdparty/clDNN/api/CPP/crop.hpp
index 7395d1875..3d74c9689 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/crop.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/crop.hpp
@@ -28,8 +28,20 @@ namespace cldnn
 /// @addtogroup cpp_primitives Primitives
 /// @{
 
+
+/// @brief Marker type indicating that instead of reference input size left, top,
+///        right and bottom borders (to cut out) should be specified.
+///
+/// @details Used to differentiate constructors.
+struct crop_borders_t {};
+
+/// @brief Marker indicating that instead of reference input size left, top,
+///        right and bottom borders (to cut out) should be specified.
+constexpr auto crop_borders = crop_borders_t{};
+
 /// @brief Performs crop operation on input.
-/// @details Crops the input to the shape of reference_input accross all dimensions taking into account specified input offsets.
+/// @details Crops the input to the shape of reference_input across all dimensions taking into account specified input offsets.
+/// @n       Borders variant calculated output shape from input shape minus the specified borders.
 /// @n
 /// @n\b Examples
 /// @n Crop without offset example:
@@ -37,13 +49,18 @@ namespace cldnn
 /// @n Crop with offset example:
 /// \image html crop_w_offset.jpg
 /// @n
-/// @n\b Requirements 
-/// @n - Input and reference format has to be same
-/// @n - Input, reference and offset layout (order) has to be the same
+/// @n\b Requirements (reference size variant)
 /// @n - Input size cannot be greater than reference size in any dimension
 /// @n - All sizes have to have positive numbers
 /// @n - Reference size plus offset cannot exceed input size
-/// @n Breaking any of this conditions will cause exeption throw.
+/// @n
+/// @n\b Requirements (borders variant)
+/// @n - Borders support batch, feature and spatial dimensions (rest of dimensions ignored).
+/// @n - Input size cannot be greater than reference size in any dimension
+/// @n - All sizes specified in borders have to have non-negative values (positive or @c 0).
+/// @n - Sum of sizes of opposite borders must be lower than input size (on all non-ignored dimensions).
+/// @n
+/// @n Breaking any of this conditions will cause exception throw.
 struct crop : public primitive_base<crop, CLDNN_PRIMITIVE_DESC(crop)>
 {
     CLDNN_DECLARE_PRIMITIVE(crop)
@@ -66,6 +83,55 @@ struct crop : public primitive_base<crop, CLDNN_PRIMITIVE_DESC(crop)>
     {
     }
 
+    /// @brief Constructs crop primitive (borders variant).
+    ///
+    /// @details Allows to specify borders from each side that should be cut out
+    ///          by the primitive.
+    /// @n       NOTE: Borders variant supports only up to four dimensions.
+    ///
+    /// @param id         Identifier of newly created primitive.
+    /// @param input      Identifier of input primitive which dimensions will be cropped.
+    /// @param lt_borders Border sizes (spatial dimensions define left (X) and top (Y)
+    ///                   borders, non-spatial dimensions - lower borders)
+    /// @param rb_borders Border sizes (spatial dimensions define right (X) and bottom (Y)
+    ///                   borders, non-spatial dimensions - upper borders)
+    crop(
+        const primitive_id& id,
+        const primitive_id& input,
+        const tensor& lt_borders,
+        const tensor& rb_borders,
+        const crop_borders_t,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, {input}, output_padding)
+        , reference_input(rb_borders.negate())
+        , offsets(lt_borders)
+    {
+    }
+
+    /// @brief Constructs crop primitive (symmetric borders variant).
+    ///
+    /// @details Allows to specify borders from each side that should be cut out
+    ///          by the primitive.
+    /// @n       NOTE: Borders variant supports only up to four dimensions.
+    ///
+    /// @param id         Identifier of newly created primitive.
+    /// @param input      Identifier of input primitive which dimensions will be cropped.
+    /// @param xy_borders Border sizes (symmetric; spatial dimensions define left/right (X)
+    ///                   and top/bottom (Y) borders, non-spatial dimensions - lower/upper borders).
+    crop(
+        const primitive_id& id,
+        const primitive_id& input,
+        const tensor& xy_borders,
+        const crop_borders_t,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, {input}, output_padding)
+        , reference_input(xy_borders.negate())
+        , offsets(xy_borders)
+    {
+    }
+
     /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{crop}
     crop(const dto* dto)
         :primitive_base(dto)
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/deconvolution.hpp b/inference-engine/thirdparty/clDNN/api/CPP/deconvolution.hpp
index f1de10df2..21607b18b 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/deconvolution.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/deconvolution.hpp
@@ -63,16 +63,88 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
         , with_output_size(false)
+        , groups(1)
         , _weights(weights)
         , _bias(bias)
         , _gradient(false)
     {
     }
+    /// @brief Constructs deconvolution primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param groups Number of filter groups.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param bias List of primitive ids containing bias data. Provide empty vector if using next parameters without bias.
+    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the deconvolution window should start calculations.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param with_activation Enables Relu activation.
+    /// @param activation_slp Relu activation slope.
+    deconvolution(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        const std::vector<primitive_id>& bias,
+        uint32_t groups,
+        tensor stride = { 1, 1, 1, 1 },
+        tensor input_offset = { 0,0,0,0 },
+        bool with_activation = false,
+        float activation_slp = 0.0f,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , input_offset(input_offset)
+        , stride(stride)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(false)
+        , groups(groups)
+        , _weights(weights)
+        , _bias(bias)
+        , _gradient(false)
+    {
+    }
+
+    /// @brief Constructs deconvolution primitive (w/o bias).
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the deconvolution window should start calculations.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param with_activation Enables Relu activation.
+    /// @param activation_slp Relu activation slope.
+    deconvolution(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        tensor stride = { 1, 1, 1, 1 },
+        tensor input_offset = { 0,0,0,0 },
+        bool with_activation = false,
+        float activation_slp = 0.0f,
+        const padding& output_padding = padding(),
+        bool gradient = false
+    )
+        :primitive_base(id, { input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , input_offset(input_offset)
+        , stride(stride)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(false)
+        , groups(1)
+        , _weights(weights)
+        , _bias(std::vector<primitive_id>(0))
+        , _gradient(gradient)
+    {
+    }
 
     /// @brief Constructs deconvolution primitive (w/o bias).
     /// @param id This primitive id.
     /// @param input Input primitive id.
     /// @param weights List of primitive ids containing weights data.
+    /// @param groups Number of filter groups.
     /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the deconvolution window should start calculations.
     /// @param stride Defines shift in input buffer between adjacent calculations of output values.
     /// @param with_activation Enables Relu activation.
@@ -81,6 +153,7 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
         const primitive_id& id,
         const primitive_id& input,
         const std::vector<primitive_id>& weights,
+        uint32_t groups,
         tensor stride = { 1, 1, 1, 1 },
         tensor input_offset = { 0,0,0,0 },
         bool with_activation = false,
@@ -96,6 +169,7 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
         , with_output_size(false)
+        , groups(groups)
         , _weights(weights)
         , _bias(std::vector<primitive_id>(0))
         , _gradient(gradient)
@@ -133,12 +207,54 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
         , activation_negative_slope(activation_slp)
         , with_output_size(true)
         , output_size(output_size)
+        , groups(1)
+        , _weights(weights)
+        , _bias(bias)
+        , _gradient(false)
+    {
+    }
+
+    /// @brief Constructs deconvolution primitive (computes input paddings to match output size).
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param bias List of primitive ids containing bias data. Provide empty vector if using next parameters without bias.
+    /// @param groups Number of filter groups.
+    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the deconvolution window should start calculations.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param with_activation Enables Relu activation.
+    /// @param activation_slp Relu activation slope.
+    /// @param output_size User-defined output data size of the primitive (w/o padding).
+    deconvolution(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        const std::vector<primitive_id>& bias,
+        uint32_t groups,
+        tensor stride,
+        tensor input_offset,
+        bool with_activation,
+        float activation_slp,
+        tensor output_size,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , input_offset(input_offset)
+        , stride(stride)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(true)
+        , output_size(output_size)
+        , groups(groups)
         , _weights(weights)
         , _bias(bias)
         , _gradient(false)
     {
     }
 
+
     /// @brief Constructs deconvolution primitive (w/o bias, computes input paddings to match output size).
     /// @param id This primitive id.
     /// @param input Input primitive id.
@@ -169,6 +285,7 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
         , activation_negative_slope(activation_slp)
         , with_output_size(true)
         , output_size(output_size)
+        , groups(1)
         , _weights(weights)
         , _bias(std::vector<primitive_id>(0))
         , _gradient(gradient)
@@ -186,6 +303,7 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
         , activation_negative_slope(dto->activation_negative_slope)
         , with_output_size(dto->with_output_size != 0)
         , output_size(dto->output_size)
+        , groups(dto->groups)
         , _weights(dto->weights)
         , _bias(dto->bias)
         , _gradient(dto->gradient != 0)
@@ -264,6 +382,8 @@ struct deconvolution : public primitive_base<deconvolution, CLDNN_PRIMITIVE_DESC
     bool with_output_size;
     /// @brief User-defined output data size of the primitive (w/o padding).
     tensor output_size;
+    /// @brief Number of feature groups (grouped convolution). If more than 1 then weights/bias count needs to be 1.
+    uint32_t groups;
 
     /// @brief On how many cards split the computation to.
     int32_t split() const { return static_cast<int32_t>(weights.size()); }
@@ -299,9 +419,10 @@ protected:
         dto.with_output_size = with_output_size;
         dto.output_size = output_size;
         dto.gradient = _gradient;
+        dto.groups = groups;
     }
 };
 /// @}
 /// @}
 /// @}
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/depth_to_space.hpp b/inference-engine/thirdparty/clDNN/api/CPP/depth_to_space.hpp
new file mode 100644
index 000000000..d0831033e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/CPP/depth_to_space.hpp
@@ -0,0 +1,72 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "../C/depth_to_space.h"
+#include "primitive.hpp"
+
+namespace  cldnn
+{
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief
+/// @details
+struct depth_to_space : public primitive_base<depth_to_space, CLDNN_PRIMITIVE_DESC(depth_to_space)>
+{
+    CLDNN_DECLARE_PRIMITIVE(depth_to_space)
+
+    /// @brief Constructs depth_to_space primitive.
+    /// @param id This primitive id.
+    /// @param input Input dictionary primitive id.
+    /// @param block_size Block size.
+    depth_to_space(
+        const primitive_id& id,
+        const primitive_id& input,
+        const size_t block_size,
+        const padding& output_padding = padding()
+    )
+        : primitive_base(id, {input}, output_padding)
+        , block_size(block_size)
+    {
+    }
+
+    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{depth_to_space}
+    depth_to_space(const dto* dto)
+        : primitive_base(dto)
+        , block_size(dto->block_size)
+    {
+    }
+
+    /// @brief Block size.
+    size_t block_size;
+protected:
+
+    void update_dto(dto& dto) const override
+    {
+        dto.block_size = block_size;
+    }
+};
+/// @}
+/// @}
+/// @}
+}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/detection_output.hpp b/inference-engine/thirdparty/clDNN/api/CPP/detection_output.hpp
index 8d3d75c75..87ea56839 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/detection_output.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/detection_output.hpp
@@ -18,6 +18,7 @@
 #pragma once
 #include <limits>
 #include "../C/detection_output.h"
+#include "../C/detection_output_sort.h"
 #include "primitive.hpp"
 
 namespace cldnn
@@ -39,7 +40,7 @@ enum class prior_box_code_type : int32_t
 
 /// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
 /// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
-/// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1. 
+/// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
 struct detection_output : public primitive_base<detection_output, CLDNN_PRIMITIVE_DESC(detection_output)>
 {
     CLDNN_DECLARE_PRIMITIVE(detection_output)
@@ -80,7 +81,8 @@ struct detection_output : public primitive_base<detection_output, CLDNN_PRIMITIV
         const int32_t input_width = -1,
         const int32_t input_height = -1,
         const bool decrease_label_id = false,
-        const bool clip = false,
+        const bool clip_before_nms = false,
+        const bool clip_after_nms = false,
         const padding& output_padding = padding()
         )
         : primitive_base(id, { input_location, input_confidence, input_prior_box }, output_padding)
@@ -100,7 +102,8 @@ struct detection_output : public primitive_base<detection_output, CLDNN_PRIMITIV
         , input_width(input_width)
         , input_height(input_height)
         , decrease_label_id(decrease_label_id)
-        , clip(clip)
+        , clip_before_nms(clip_before_nms)
+        , clip_after_nms(clip_after_nms)
     {
         if (decrease_label_id && background_label_id != 0)
             throw std::invalid_argument("Cannot use decrease_label_id and background_label_id parameter simultaneously.");
@@ -125,7 +128,8 @@ struct detection_output : public primitive_base<detection_output, CLDNN_PRIMITIV
         , input_width(dto->input_width)
         , input_height(dto->input_height)
         , decrease_label_id(dto->decrease_label_id != 0)
-        , clip(dto->clip != 0)
+        , clip_before_nms(dto->clip_before_nms != 0)
+        , clip_after_nms(dto->clip_after_nms != 0)
     {
         if (decrease_label_id && background_label_id != 0)
             throw std::invalid_argument("Cannot use decrease_label_id and background_label_id parameter simultaneously.");
@@ -163,8 +167,10 @@ struct detection_output : public primitive_base<detection_output, CLDNN_PRIMITIV
     const int32_t input_height;
     /// @brief Decrease label id to skip background label equal to 0. Can't be used simultaneously with background_label_id.
     const bool decrease_label_id;
-    /// @brief Clip decoded boxes
-    const bool clip;
+    /// @brief Clip decoded boxes right after decoding
+    const bool clip_before_nms;
+    /// @brief Clip decoded boxes after nms step
+    const bool clip_after_nms;
 
 protected:
     void update_dto(dto& dto) const override
@@ -185,7 +191,81 @@ protected:
         dto.input_width = input_width;
         dto.input_height = input_height;
         dto.decrease_label_id = decrease_label_id;
-        dto.clip = clip;
+        dto.clip_before_nms = clip_before_nms;
+        dto.clip_after_nms = clip_after_nms;
+    }
+};
+
+/// @brief Generates a list of detections based on location and confidence predictions by doing non maximum suppression.
+/// @details Each row is a 7 dimension vector, which stores: [image_id, label, confidence, xmin, ymin, xmax, ymax].
+/// If number of detections per image is lower than keep_top_k, will write dummy results at the end with image_id=-1.
+struct detection_output_sort : public primitive_base<detection_output_sort, CLDNN_PRIMITIVE_DESC(detection_output_sort)>
+{
+    CLDNN_DECLARE_PRIMITIVE(detection_output_sort)
+
+    /// @brief Constructs detection output primitive.
+    /// @param id This primitive id.
+    /// @param input_bboxes Input bounding boxes primitive id.
+    /// @param num_images Number of images to be predicted.
+    /// @param num_classes Number of classes to be predicted.
+    /// @param keep_top_k Number of total bounding boxes to be kept per image after NMS step.
+    /// @param share_location If true bounding box are shared among different classes.
+    /// @param top_k Maximum number of results to be kept in NMS.
+    /// @param output_padding Output padding.
+    detection_output_sort(
+        const primitive_id& id,
+        const primitive_id& input_bboxes,
+        const uint32_t num_images,
+        const uint32_t num_classes,
+        const uint32_t keep_top_k,
+        const bool share_location = true,
+        const int top_k = -1,
+        const int background_label_id = -1,
+        const padding& output_padding = padding()
+    )
+    : primitive_base(id, { input_bboxes }, output_padding)
+    , num_images(num_images)
+    , num_classes(num_classes)
+    , keep_top_k(keep_top_k)
+    , share_location(share_location)
+    , top_k(top_k)
+    , background_label_id(background_label_id)
+    {}
+
+    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{detection_output}
+    detection_output_sort(const dto* dto)
+        : primitive_base(dto)
+        , num_images(dto->num_images)
+        , num_classes(dto->num_classes)
+        , keep_top_k(dto->keep_top_k)
+        , share_location(dto->share_location != 0)
+        , top_k(dto->top_k)
+        , background_label_id(dto->background_label_id)
+    {}
+
+    /// @brief Number of classes to be predicted.
+    const uint32_t num_images;
+    /// @brief Number of classes to be predicted.
+    const uint32_t num_classes;
+    /// @brief Number of total bounding boxes to be kept per image after NMS step.
+    const int keep_top_k;
+    /// @brief If true, bounding box are shared among different classes.
+    const bool share_location;
+    /// @brief Maximum number of results to be kept in NMS.
+    const int top_k;
+    /// @brief Background label id (-1 if there is no background class).
+    const int background_label_id;
+
+
+protected:
+    void update_dto(dto& dto) const override
+    {
+        dto.num_classes = num_classes;
+        dto.num_images = num_images;
+        dto.keep_top_k = keep_top_k;
+        dto.share_location = share_location;
+        dto.top_k = top_k;
+        dto.background_label_id = background_label_id;
     }
 };
 /// @}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/eltwise.hpp b/inference-engine/thirdparty/clDNN/api/CPP/eltwise.hpp
index f1b208410..619be492a 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/eltwise.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/eltwise.hpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -45,14 +45,35 @@ enum class eltwise_mode : int32_t
     min = cldnn_eltwise_min,
     /// @brief Eltwise pow.
     pow = cldnn_eltwise_pow,
+    /// @brief Eltwise squared diff.
+    squared_diff = cldnn_eltwise_squared_diff,
     /// @brief Eltwise mod.
     mod = cldnn_eltwise_mod,
+    /// @brief Eltwise equal.
+    eq = cldnn_eltwise_eq,
+    /// @brief Eltwise not equal.
+    ne = cldnn_eltwise_ne,
+    /// @brief Eltwise less.
+    lt = cldnn_eltwise_lt,
+    /// @brief Eltwise less of equal.
+    le = cldnn_eltwise_le,
+    /// @brief Eltwise greater.
+    gt = cldnn_eltwise_gt,
+    /// @brief Eltwise greater or equal.
+    ge = cldnn_eltwise_ge,
+    /// @brief Eltwise and.
+    logic_and = cldnn_eltwise_and,
+    /// @brief Eltwise or.
+    logic_or = cldnn_eltwise_or,
+    /// @brief Eltwise XOR.
+    logic_xor = cldnn_eltwise_xor
 };
 
 /// @brief Performs elementwise operations (sum, subtract, max or product) on two input primitives
 /// Also supports built-in Relu @ref activation available by setting it in arguments.
 /// @notes
-/// - both inputs have to have equal sizes in all dimensions
+/// - both inputs have to have equal sizes in all dimensions or the input tensors are broadcastable
+///   to the same shape in which the size of each dimention is a max. of input sizes on this dimension)
 /// - format of both inputs has to be the same
 /// - when using integer types, only following eltwise modes are supported: sum, sub, prod, div
 struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)>
@@ -82,6 +103,38 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)>
         , coefficients(std::vector<float>(0))
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
+        , stride(std::vector<tensor>(0))
+        , _stride(tensor_vector_to_cldnn_vector(stride))
+    {
+    }
+
+    /// @brief Constructs eltwise primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param input2 Second input primitive id with values needed for eltwise computation.
+    /// @param stride Defines shift in input buffers between adjacent calculations of output values.
+    /// @param mode Eltwise mode.
+    /// @param with_activation Enables Relu activation.
+    /// @param activation_slp Relu activation slope.
+    eltwise(
+        const primitive_id& id,
+        const primitive_id& input,
+        const primitive_id& input2,
+        std::vector<tensor> stride,
+        eltwise_mode mode,
+        bool with_activation = false,
+        float activation_slp = 0.0f,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input, input2 }, output_padding)
+        , output_calibration_factors("")
+        , output_quantization_factor(1.0f)
+        , mode(mode)
+        , coefficients(std::vector<float>(0))
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , stride(stride)
+        , _stride(tensor_vector_to_cldnn_vector(stride))
     {
     }
 
@@ -106,6 +159,8 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)>
         , coefficients(std::vector<float>(0))
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
+        , stride(std::vector<tensor>(0))
+        , _stride(tensor_vector_to_cldnn_vector(stride))
     {
     }
 
@@ -134,6 +189,8 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)>
         , coefficients(std::vector<float>(0))
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
+        , stride(std::vector<tensor>(0))
+        , _stride(tensor_vector_to_cldnn_vector(stride))
     {
     }
 
@@ -160,6 +217,8 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)>
         , coefficients(std::vector<float>(0))
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
+        , stride(std::vector<tensor>(0))
+        , _stride(tensor_vector_to_cldnn_vector(stride))
     {
     }
 
@@ -188,6 +247,8 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)>
         , coefficients(std::vector<float>(0))
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
+        , stride(std::vector<tensor>(0))
+        , _stride(tensor_vector_to_cldnn_vector(stride))
     {
     }
 
@@ -214,6 +275,8 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)>
         , coefficients(std::vector<float>(0))
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
+        , stride(std::vector<tensor>(0))
+        , _stride(tensor_vector_to_cldnn_vector(stride))
     {
     }
 
@@ -240,6 +303,8 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)>
         , coefficients(coefficients)
         , with_activation(with_activation)
         , activation_negative_slope(activation_slp)
+        , stride(std::vector<tensor>(0))
+        , _stride(tensor_vector_to_cldnn_vector(stride))
     {
         if (mode == eltwise_mode::sum && !coefficients.empty() && coefficients.size() != inputs.size())
         {
@@ -260,6 +325,8 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)>
         , coefficients(float_arr_to_vector(dto->coefficients))
         , with_activation(dto->with_activation != 0)
         , activation_negative_slope(dto->activation_negative_slope)
+        , stride(tensor_arr_to_vector(dto->stride))
+        , _stride(tensor_vector_to_cldnn_vector(stride))
     {
         if (dto->input.size < 2)
             throw std::invalid_argument("eltiwise dto should containt at least two inputs");
@@ -279,8 +346,11 @@ struct eltwise : public primitive_base<eltwise, CLDNN_PRIMITIVE_DESC(eltwise)>
     bool with_activation;
     /// @brief Relu activation slope.
     float activation_negative_slope;
+    /// @brief Defines shift in input buffers between adjacent calculations of output values.
+    std::vector<tensor> stride;
 
 protected:
+    std::vector<cldnn_tensor> _stride;
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override
     {
         std::vector<std::reference_wrapper<const primitive_id>> ret;
@@ -298,6 +368,7 @@ protected:
         dto.coefficients = float_vector_to_arr(coefficients);
         dto.with_activation = with_activation;
         dto.activation_negative_slope = activation_negative_slope;
+        dto.stride = tensor_vector_to_arr(_stride);
     }
 };
 /// @}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/embed.hpp b/inference-engine/thirdparty/clDNN/api/CPP/embed.hpp
index 8acb96793..0c1d492c0 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/embed.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/embed.hpp
@@ -56,6 +56,19 @@ namespace cldnn
 			, bias(bias)
 		{}
 
+        /// @brief Constructs embed primitive.
+        /// @param id This primitive id.
+        /// @param input Input primitive id.
+        embed(
+            const primitive_id& id,
+            const primitive_id& input,
+            const primitive_id& weights
+        )
+            : primitive_base(id, { input })
+            , weights(weights)
+            , bias("")
+        {}
+
 		/// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{embed}
 		embed(const dto* dto)
 			:primitive_base(dto)
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/engine.hpp b/inference-engine/thirdparty/clDNN/api/CPP/engine.hpp
index 83090a19b..66fbd2b49 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/engine.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/engine.hpp
@@ -63,8 +63,12 @@ struct engine_configuration
     const std::string engine_log;               ///< Specifies a file to which engine log should be dumped. Empty by default (means no logging).
     const std::string sources_dumps_dir;        ///< Specifies a directory where sources of cldnn::program objects should be dumped. Empty by default (means no dumping).
     const priority_mode_types priority_mode;    ///< Priority mode (support of priority hints in command queue). If cl_khr_priority_hints extension is not supported by current OpenCL implementation, the value must be set to cldnn_priority_disabled.
-    const throttle_mode_types throttle_mode;    ///< Placeholder for throttle mode (support of throttle hints in command queue). It has no effect for now and should be set to cldnn_throttle_disabled.
-    bool enable_memory_pool;              ///< Enables memory usage optimization. memory objects will be reused when possible (switched off for older drivers then NEO).
+
+    const throttle_mode_types throttle_mode;    ///< Throttle mode (support of throttle hints in command queue). If cl_khr_throttle_hints extension is not supported by current OpenCL implementation, the value must be set to cldnn_throttle_disabled.
+
+    bool enable_memory_pool;                    ///< Enables memory usage optimization. memory objects will be reused when possible (switched off for older drivers then NEO).
+    void* context;              ///< Pointer to user context
+    const std::string tuning_cache_path;        ///< Path to tuning kernel cache 
 
     /// @brief Constructs engine configuration with specified options.
     /// @param profiling Enable per-primitive profiling.
@@ -83,7 +87,9 @@ struct engine_configuration
             const std::string& sources_dumps_dir = std::string(),
             priority_mode_types priority_mode = priority_mode_types::disabled,
             throttle_mode_types throttle_mode = throttle_mode_types::disabled,
-            bool memory_pool = true)
+            bool memory_pool = true,
+            void* context = nullptr,
+            const std::string& tuning_cache_path = "cache.json")
         : enable_profiling(profiling)
         , meaningful_kernels_names(decorate_kernel_names)
         , dump_custom_program(dump_custom_program)
@@ -95,6 +101,8 @@ struct engine_configuration
         , priority_mode(priority_mode)
         , throttle_mode(throttle_mode)
         , enable_memory_pool(memory_pool)
+        , context(context)
+        , tuning_cache_path(tuning_cache_path)
     {}
 
     engine_configuration(const cldnn_engine_configuration& c_conf)
@@ -109,6 +117,8 @@ struct engine_configuration
         , priority_mode(static_cast<priority_mode_types>(c_conf.priority_mode))
         , throttle_mode(static_cast<throttle_mode_types>(c_conf.throttle_mode))
         , enable_memory_pool(c_conf.enable_memory_pool != 0)
+        , context(c_conf.context)
+		, tuning_cache_path(c_conf.tuning_cache_path)
     {}
 
     /// @brief Implicit conversion to C API @ref ::cldnn_engine_configuration
@@ -125,7 +135,9 @@ struct engine_configuration
             sources_dumps_dir.c_str(),
             static_cast<int16_t>(priority_mode),
             static_cast<int16_t>(throttle_mode),
-            enable_memory_pool
+            enable_memory_pool,
+            context,
+            tuning_cache_path.c_str()
         };
     }
 };
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/gather.hpp b/inference-engine/thirdparty/clDNN/api/CPP/gather.hpp
new file mode 100644
index 000000000..68669c103
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/CPP/gather.hpp
@@ -0,0 +1,88 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "../C/gather.h"
+#include "primitive.hpp"
+
+namespace  cldnn
+{
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief
+/// @details
+struct gather : public primitive_base<gather, CLDNN_PRIMITIVE_DESC(gather)>
+{
+    CLDNN_DECLARE_PRIMITIVE(gather)
+
+    enum gather_axis
+    {
+        along_b = cldnn_gather_along_b,
+        along_f = cldnn_gather_along_f,
+        along_x = cldnn_gather_along_x,
+        along_y = cldnn_gather_along_y
+    };
+
+    /// @brief Constructs gather primitive.
+    /// @param id This primitive id.
+    /// @param dict Input dictionary primitive id.
+    /// @param idx Input indexes primitive id.
+    /// @param axis Gathering axis.
+    /// @param output_shape Output shape.
+    gather(
+            const primitive_id& id,
+            const primitive_id& dict,
+            const primitive_id& idx,
+            const gather_axis axis,
+            const tensor& output_shape,
+            const padding& output_padding = padding()
+    )
+            : primitive_base(id, {dict, idx}, output_padding)
+            , axis(axis)
+            , output_shape(output_shape)
+    {
+    }
+
+    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{gather}
+    gather(const dto* dto)
+            : primitive_base(dto)
+            , axis(static_cast<gather_axis >(dto->axis))
+            , output_shape(dto->output_shape)
+    {
+    }
+
+    /// @brief Gathering axis
+    gather_axis axis;
+    /// @brief Gathering input shape
+    tensor output_shape;
+protected:
+
+    void update_dto(dto& dto) const override
+    {
+        dto.axis = static_cast<cldnn_gather_axis>(axis);
+        dto.output_shape = output_shape;
+    }
+};
+/// @}
+/// @}
+/// @}
+}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/gemm.hpp b/inference-engine/thirdparty/clDNN/api/CPP/gemm.hpp
index ee25c7017..1c3bc1127 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/gemm.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/gemm.hpp
@@ -49,10 +49,10 @@ struct gemm : public primitive_base<gemm, CLDNN_PRIMITIVE_DESC(gemm)>
         /// @brief Constructs gemm layer.
         /// @brief Primitive id containing first matrix
         /// @brief Primitive id containing second matrix
-        /// @brief Variable containing ALPHA parameter
-        /// @brief Variable containing BETA parameter
         /// @brief Flag for transposing first input matrix
         /// @brief Flag for transposing second input matrix
+        /// @brief Variable containing ALPHA parameter
+        /// @brief Variable containing BETA parameter
 
         gemm(
             const primitive_id& id,
@@ -75,10 +75,11 @@ struct gemm : public primitive_base<gemm, CLDNN_PRIMITIVE_DESC(gemm)>
         /// @brief Primitive id containing first matrix
         /// @brief Primitive id containing second matrix
         /// @brief Primitive id containing third matrix 
-        /// @brief Variable containing ALPHA parameter
-        /// @brief Variable containing BETA parameter
         /// @brief Flag for transposing first input matrix
         /// @brief Flag for transposing second input matrix
+        /// @brief Variable containing ALPHA parameter
+        /// @brief Variable containing BETA parameter
+
         gemm(
             const primitive_id& id,
             const primitive_id& input,
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/index_select.hpp b/inference-engine/thirdparty/clDNN/api/CPP/index_select.hpp
index 11ff25a95..589753310 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/index_select.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/index_select.hpp
@@ -21,15 +21,6 @@
 
 namespace cldnn
 {
-/// @brief Axis which index_select primitive will index.
-enum class index_select_axis_name : int32_t
-{
-    along_b,
-    along_f,
-    along_y,
-    along_x
-};
-
 /// @brief Select index, which will be copied to the output..
 ///
 /// @details Applies index selecting along specified dimension. The indices, which will be copied are specifed by 
@@ -63,7 +54,7 @@ struct index_select : public primitive_base<index_select, CLDNN_PRIMITIVE_DESC(i
     /// @param input              An identifier of primitive, which is an input for newly created
     ///                           index_select primitive.
     /// @param indicies           An identifer of primitive, which have indices in memory distributed along x. 
-    /// @param type               Axis of index selecting.
+    /// @param axis               Axis of index selecting.
     /// @param output_padding     Optional padding for output from primitive.
     index_select(
         const primitive_id& id,
@@ -72,23 +63,65 @@ struct index_select : public primitive_base<index_select, CLDNN_PRIMITIVE_DESC(i
         index_select_axis_name axis = index_select_axis_name::along_b,
         const padding& output_padding = padding()
     )
-        : primitive_base(id, {input, indices}, output_padding)
+        : primitive_base(id, { input, indices }, output_padding)
+        , axis( { axis } )
+        , reverse(false)
+    {}
+
+    /// @brief Constructs index_select primitive / layer.
+    ///
+    /// @param id                 An identifier of new primitive.
+    /// @param input              An identifier of primitive, which is an input for newly created
+    ///                           index_select primitive.
+    /// @param axis               Axis of index selecting.
+    /// @param output_padding     Optional padding for output from primitive.
+    index_select(
+        const primitive_id& id,
+        const primitive_id& input,
+        index_select_axis_name axis = index_select_axis_name::along_b,
+        const padding& output_padding = padding()
+    )
+        : primitive_base(id, { input }, output_padding)
+        , axis( { axis } )
+        , reverse(true)
+    {}
+
+    /// @brief Constructs index_select primitive / layer.
+    ///
+    /// @param id                 An identifier of new primitive.
+    /// @param input              An identifier of primitive, which is an input for newly created
+    ///                           index_select primitive.
+    /// @param axis               Vector of axes of index selecting.
+    /// @param output_padding     Optional padding for output from primitive.
+    index_select(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<index_select_axis_name>& axis = { index_select_axis_name::along_b },
+        const padding& output_padding = padding()
+    )
+        : primitive_base(id, { input }, output_padding)
         , axis(axis)
+        , reverse(true)
     {}
 
     /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{broadcast}
     index_select(const dto* dto)
         : primitive_base(dto)
-        , axis(static_cast<index_select_axis_name>(dto->axis))
+        , axis(dto->axis, dto->axis + dto->axis_num)
+        , reverse(dto->reverse)
     {}
 
-    /// @brief Axis of index selecting.
-    index_select_axis_name axis;
+    /// @brief A list of axes of index selecting
+    std::vector<index_select_axis_name> axis;
+    /// @brief Do index_select in reverse order on axis/axes.
+    bool reverse;
 
 protected:
     void update_dto(dto& dto) const override
     {
-        dto.axis = static_cast<cldnn_index_select_axis>(axis);
+        dto.axis = axis.data();
+        dto.axis_num = (int)axis.size();
+        dto.reverse = reverse;
     }
 };
 /// @}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/layout.hpp b/inference-engine/thirdparty/clDNN/api/CPP/layout.hpp
index 56b199801..1f9438461 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/layout.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/layout.hpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -39,6 +39,40 @@ enum class data_types : size_t
     f32 = cldnn_f32,
 };
 
+class optional_data_type
+{
+    // Must be the same as the undrelying type of `data_types`.
+    using storage_type = size_t;
+
+    // Implicitly assumes that this value is not used in the `data_types`.
+    static constexpr auto non_specified_type =
+        std::numeric_limits<storage_type>::max();
+
+public:
+    optional_data_type()
+        : storage(non_specified_type)
+    {}
+
+    optional_data_type(data_types type)
+        : storage(static_cast<storage_type>(type))
+    {}
+
+    operator bool() const { return storage != non_specified_type; }
+
+    // Similarly to std::optional does *not* verify that the object has the type
+    // set. Unlike it, though, returns the value instead of pointer/reference.
+    data_types operator*() const { return static_cast<data_types>(storage); }
+
+    optional_data_type& operator=(const data_types new_type)
+    {
+        storage = static_cast<storage_type>(new_type);
+        return *this;
+    }
+
+private:
+    storage_type storage;
+};
+
 /// Converts C++ type to @ref data_types .
 template <typename T> struct type_to_data_type;
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
@@ -98,6 +132,8 @@ struct data_type_traits
         {
         case data_types::i8:
             return "i8";
+        case data_types::u8:
+            return "u8";
         case data_types::i32:
             return "i32";
         case data_types::i64:
@@ -312,6 +348,11 @@ struct layout
             sizes[3] = align_to(sizes[3], 32);
         }
 
+        if (format == format::byx8_f4)
+        {
+            sizes[3] = align_to(sizes[3], 4);
+            sizes[2] = align_to(sizes[2], 8);
+        }
         std::vector<tensor::value_type> pitches(sizes.size(), tensor::value_type(1));
         std::partial_sum(sizes.rbegin(), sizes.rend() - 1, pitches.rbegin() + 1, std::multiplies<tensor::value_type>());
         return{ format, pitches };
@@ -352,6 +393,14 @@ struct layout
         {
             sizes[0] = align_to(sizes[0], 16);
         }
+        else if (this->format == cldnn::format::os_iyx_osv32 && !is_aligned_to(sizes[0], 32))
+        {
+            sizes[0] = align_to(sizes[0], 32);
+        }
+        else if (this->format == cldnn::format::os_iyx_osv64 && !is_aligned_to(sizes[0], 64))
+        {
+            sizes[0] = align_to(sizes[0], 64);
+        }
         else if (this->format == cldnn::format::bs_xs_xsv8_bsv8 && !(is_aligned_to(sizes[0], 8) && is_aligned_to(sizes[2], 8)))
         {
             sizes[0] = align_to(sizes[0], 8);
@@ -376,20 +425,49 @@ struct layout
         {
             sizes[1] = align_to(sizes[1], 32);
         }
+        else if (this->format == cldnn::format::byx8_f4 && (!is_aligned_to(sizes[1], 4) || !is_aligned_to(sizes[2], 8)))
+        {
+            // for this case we want to make sure, that with padding we're aligned to 8 in x
+            auto lp = data_padding.lower_size().spatial[0];
+            auto up = data_padding.upper_size().spatial[0];
+            sizes[1] = align_to(sizes[1], 4);
+            sizes[2] = align_to(lp + up + sizes[2], 8);
+            sizes[2] -= lp + up;
+        }
         else if (this->format == cldnn::format::fs_bs_yx_bsv4_fsv32 && (!(is_aligned_to(sizes[1], 32)) || !(is_aligned_to(sizes[0], 4)) ) )
         {
             sizes[1] = align_to(sizes[1], 32);
             sizes[0] = align_to(sizes[0], 4);
         }
+        else if (this->format == cldnn::format::b_fs_yx_fsv4 && !(is_aligned_to(sizes[1], 4)))
+        {
+            sizes[1] = align_to(sizes[1], 4);
+        }
         else if (this->format == cldnn::format::os_is_yx_isa8_osv8_isv4 && !(is_aligned_to(sizes[0], 8)) && !(is_aligned_to(sizes[1], 32)))
         {
             sizes[0] = align_to(sizes[0], 8);
             sizes[1] = align_to(sizes[1], 32);
         }
+        else if (this->format == cldnn::format::os_is_yx_isa8_osv8_isv4_swizzled_by_4 && !(is_aligned_to(sizes[0], 32)) && !(is_aligned_to(sizes[1], 32)))
+        {
+            sizes[0] = align_to(sizes[0], 32);
+            sizes[1] = align_to(sizes[1], 32);
+        }
         else if (this->format == cldnn::format::is_o_yx_isv32 && !(is_aligned_to(sizes[1], 32)))
         {
             sizes[1] = align_to(sizes[1], 32);
         }
+        else if (this->format == cldnn::format::is_o32_yx_isv32_swizzled_by_4 && (!is_aligned_to(sizes[1], 32) || !(is_aligned_to(sizes[0], 32))))
+        {
+            sizes[0] = align_to(sizes[0], 32);
+            sizes[1] = align_to(sizes[1], 32);
+        }
+        else if (this->format == cldnn::format::os_is_y_x8_osv8_isv4)
+        {
+            sizes[1] = align_to(sizes[1], 4);
+            sizes[0] = align_to(sizes[0], 8);
+            sizes[2] = align_to(sizes[2], 8);
+        }
         return std::accumulate(
             sizes.begin(),
             sizes.end(),
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/lstm.hpp b/inference-engine/thirdparty/clDNN/api/CPP/lstm.hpp
index dd9e99233..2276616c9 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/lstm.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/lstm.hpp
@@ -29,14 +29,14 @@ namespace cldnn
 /// @{
 
 /// @brief Performs forward Long Short-Term Memory (LSTM) layer.
-/// @details The current implementation of LSTM supports Peepholes.
-///   it = f(Xt*(Wi^T) + Ht-1*Ri + Pi (.) Ct-1 + Wbi + Rbi)
-///   ft = f(Xt*(Wf^T) + Ht-1*Rf + Pf (.) Ct-1 + Wbf + Rbf)
-///   ct = g(Xt*(Wc^T) + Ht-1*Rc + Wbc + Rbc)
+/// @details The current implementation of LSTM is described the following equations.
+///   it = f(Xt*(Wi^T) + Ht-1*Ri + Wbi)
+///   ft = f(Xt*(Wf^T) + Ht-1*Rf + Wbf)
+///   ct = g(Xt*(Wc^T) + Ht-1*Rc + Wbc)
 ///   Ct = ft (.) Ct-1 + it (.) ct
-///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Po (.) Ct + Wbo + Rbo)
+///   ot = f(Xt*(Wo^T) + Ht-1*Ro + Wbo)
 ///   Ht = ot (.) h(Ct)
-/// Where f=Sigmoid, g=Tanh, and h = Tanh.
+/// Where f = Sigmoid, g = Tanh, and h = Tanh.
 struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)>
 {
     CLDNN_DECLARE_PRIMITIVE(lstm)
@@ -53,6 +53,7 @@ struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)>
     /// @param input_forget Provide 0 if using lstm without coupled input-forget gates.
     /// @param activations Vector of activations. Specify [f, g, h]. Default are [sigmoid, tanh, tanh]
     /// @param activation_params Vector of ativation params. Specify params for each [f, g, h] activation.
+    /// @brief Output selection. Default the entire hidden sequence is returned.
     /// @param offset_order Order of the concatenated weights, recurrent, and bias. ONNX default is iofz [input, output, forget, block].
     lstm(
         const primitive_id& id,
@@ -67,6 +68,7 @@ struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)>
         const bool input_forget = 0,
         const std::vector<cldnn_activation_func>& activations = {},
         const std::vector<cldnn_activation_additional_params> activation_params = {},
+        const cldnn_lstm_output output_selection = cldnn_lstm_output_sequence,
         const cldnn_lstm_offset_order offset_order = cldnn_lstm_offset_order_iofz,
         const padding& output_padding = padding()
         )
@@ -81,6 +83,7 @@ struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)>
         , input_forget(input_forget)
         , activations(activations)
         , activation_params(activation_params)
+        , output_selection(output_selection)
         , offset_order(offset_order)
     {
     }
@@ -98,6 +101,7 @@ struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)>
         , input_forget(dto->input_forget)
 		, activations(dto->activations, std::end(dto->activations))
 		, activation_params(dto->activation_params, std::end(dto->activation_params))
+        , output_selection(dto->output_selection)
         , offset_order(dto->offset_order)
     {
     }
@@ -122,6 +126,8 @@ struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)>
     std::vector<cldnn_activation_func> activations;
     /// @brief Optional scaling values used by some activation functions. The values are consumed in the order of activation functions.
     std::vector<cldnn_activation_additional_params> activation_params;
+    /// @brief Output selection. Default the entire hidden sequence is returned.
+    cldnn_lstm_output output_selection;
     /// @brief Weights, recurrent weights, and biases order. [iofz] : ONNX, [ifoz] : Caffe
     cldnn_lstm_offset_order offset_order;
 
@@ -129,7 +135,7 @@ struct lstm : public primitive_base<lstm, CLDNN_PRIMITIVE_DESC(lstm)>
     // /// @brief Optional tensor specifying lengths of the sequences in a batch.
     // /// If not specified - assumed all sequences in the batch to have length `seq_length`. It has shape `[batch_size]`.
     // tensor sequence_lens;
-    // /// @brief The sequence output for the hidden??? This is not clearly specified in the ONNX definition.
+    // /// @brief The sequence output for the hidden.
     // uint32_t output_sequence;
 protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override
@@ -160,6 +166,7 @@ protected:
         dto.peepholes = peepholes.c_str();
         dto.initial_hidden = initial_hidden.c_str();
         dto.initial_cell = initial_cell.c_str();
+        dto.output_selection = output_selection;
         dto.offset_order = offset_order;
         if (activations.size() == 3) {
             std::copy_n(activations.begin(), 3, dto.activations);
@@ -271,6 +278,7 @@ struct lstm_elt : public primitive_base<lstm_elt, CLDNN_PRIMITIVE_DESC(lstm_elt)
         const std::vector<cldnn_activation_func> activations = {},
         const std::vector<cldnn_activation_additional_params> activation_params = {},
         const cldnn_lstm_offset_order offset_order = cldnn_lstm_offset_order_iofz,
+        const uint32_t direction = 0,
         const padding& output_padding = padding()
         )
         : primitive_base(id, {input}, output_padding)
@@ -280,6 +288,7 @@ struct lstm_elt : public primitive_base<lstm_elt, CLDNN_PRIMITIVE_DESC(lstm_elt)
         , activations(activations)
         , activation_params(activation_params)
         , offset_order(offset_order)
+        , direction(direction)
     {
     }
 
@@ -292,6 +301,7 @@ struct lstm_elt : public primitive_base<lstm_elt, CLDNN_PRIMITIVE_DESC(lstm_elt)
 		, activations(dto->activations, std::end(dto->activations))
 		, activation_params(dto->activation_params, std::end(dto->activation_params))
         , offset_order(dto->offset_order)
+        , direction(dto->direction)
     {
     }
 
@@ -307,6 +317,9 @@ struct lstm_elt : public primitive_base<lstm_elt, CLDNN_PRIMITIVE_DESC(lstm_elt)
     std::vector<cldnn_activation_additional_params> activation_params;
     /// @brief Weights, recurrent weights, and biases order. [iofz] : ONNX, [ifoz] : Caffe
     cldnn_lstm_offset_order offset_order;
+    /// @brief direction default = 0, bidirectional = 1.
+    uint32_t direction;
+
 protected:
     std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override
     {
@@ -328,6 +341,7 @@ protected:
         if (activation_params.size() == 3) {
             std::copy_n(activation_params.begin(), 3, dto.activation_params);
         }
+        dto.direction = direction;
     }
 };
 
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/one_hot.hpp b/inference-engine/thirdparty/clDNN/api/CPP/one_hot.hpp
new file mode 100644
index 000000000..5f997b27d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/CPP/one_hot.hpp
@@ -0,0 +1,103 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "../C/one_hot.h"
+#include "primitive.hpp"
+
+
+namespace cldnn
+{
+    /// @addtogroup cpp_api C++ API
+    /// @{
+    /// @addtogroup cpp_topology Network Topology
+    /// @{
+    /// @addtogroup cpp_primitives Primitives
+    /// @{
+
+    /// @brief Creates a one-hot encoding of the input.
+    /// @details Creates a one-hot encoding of the input, putting the new one-hot axis in the position
+    /// @n       specified by the @p one_hot_axis input, using the @p shape tensor as size reference.
+    /// @n       The size of @p shape must be appropriate for adding a one-hot axis to input. For example,
+    /// @n      <tt>input_sizes = (1, in_f, in_y, in_x)</tt> 
+    /// @n expanded with 
+    /// @n      <tt>one_hot_axis = 2</tt> 
+    /// @n would insert the one-hot axis in the Y dimension, requiring
+    /// @n      <tt>shape = (in_f, in_y, one-hot_limit, in_x)</tt> 
+    /// @n The output values would then be determined by input as
+    /// @n      <tt>output[f, y, i, x] = (input[0, f, y, x] == i) ? 1 : 0;</tt>
+    /// @n Since determining whether the input is appropriate (that the one-hot axis
+    /// @n has enough space to fully encode all inputs) requires scanning the whole
+    /// @n input, the primitive doesn't check for that, instead producing all-zeros
+    /// @n output axes for inputs below 0 and greater than the limit set by
+    /// @n @p shape.
+    /// @n
+    /// @n@b Requirements
+    /// @n - @p one_hot_axis must be within (inclusive) range 0 - 3.
+    /// @n - @p shape must fit input sizes (see example above).
+    /// @n - input batch size must be equal to 1.
+    /// @n
+    /// @n Breaking any of this conditions will cause exception throw.
+    struct one_hot : public primitive_base<one_hot, CLDNN_PRIMITIVE_DESC(one_hot)>
+    {
+        CLDNN_DECLARE_PRIMITIVE(one_hot)
+
+            /// @brief Constructs one-hot primitive / layer.
+            ///
+            /// @param id              An identifier of new primitive.
+            /// @param input           An identifier of primitive which is an input for newly created
+            ///                        one-hot primitive.
+            /// @param shape           Size of the output primitive.
+            /// @param one_hot_axis    One-hot axis position (0-based, from left to right) in shape.
+            /// @param output_padding  Optional padding for output from primitive.
+            one_hot(
+                const primitive_id& id,
+                const primitive_id& input,
+                const tensor& shape,
+                const uint16_t& one_hot_axis,
+                const padding& output_padding = padding()
+            )
+            : primitive_base(id, { input }, output_padding),
+            shape(shape),
+            one_hot_axis(one_hot_axis)
+        {
+        }
+
+        /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{one_hot}
+        one_hot(const dto* dto)
+            : primitive_base(dto),
+            shape(dto->shape),
+            one_hot_axis(dto->one_hot_axis)
+        {
+        }
+
+        /// @brief Output size reference.
+        tensor shape;
+        /// @brief One-hot axis position in output shape (0-based, from left to right).
+        uint16_t one_hot_axis;
+
+    protected:
+        void update_dto(dto& dto) const override
+        {
+            dto.shape = shape;
+            dto.one_hot_axis = one_hot_axis;
+
+        }
+    };
+    /// @}
+    /// @}
+    /// @}
+}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/pooling.hpp b/inference-engine/thirdparty/clDNN/api/CPP/pooling.hpp
index 1ca6d8fe9..3e60f79f6 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/pooling.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/pooling.hpp
@@ -68,6 +68,7 @@ struct pooling : public primitive_base<pooling, CLDNN_PRIMITIVE_DESC(pooling)>
         : primitive_base(id, {input}, output_padding)
         , argmax("")
         , mode(static_cast<pooling_mode>(mode))
+        , global_pooling(false)
         , input_offset(input_offset)
         , stride(stride)
         , size(size)
@@ -95,6 +96,7 @@ struct pooling : public primitive_base<pooling, CLDNN_PRIMITIVE_DESC(pooling)>
         : primitive_base(id, { input }, output_padding)
         , argmax(argmax)
         , mode(static_cast<pooling_mode>(mode))
+        , global_pooling(false)
         , input_offset(input_offset)
         , stride(stride)
         , size(size)
@@ -122,6 +124,7 @@ struct pooling : public primitive_base<pooling, CLDNN_PRIMITIVE_DESC(pooling)>
         : primitive_base(id, {input}, output_padding)
         , argmax("")
         , mode(static_cast<pooling_mode>(mode))
+        , global_pooling(false)
         , input_offset(input_offset)
         , stride(stride)
         , size(size)
@@ -152,6 +155,7 @@ struct pooling : public primitive_base<pooling, CLDNN_PRIMITIVE_DESC(pooling)>
         : primitive_base(id, { input }, output_padding)
         , argmax(argmax)
         , mode(static_cast<pooling_mode>(mode))
+        , global_pooling(false)
         , input_offset(input_offset)
         , stride(stride)
         , size(size)
@@ -159,11 +163,32 @@ struct pooling : public primitive_base<pooling, CLDNN_PRIMITIVE_DESC(pooling)>
         , output_size(output_size)
     {}
 
+    /// @brief Constructs pooling primitive with kernel size equal to the spatial dimension of input tensor.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param mode Pooling mode.
+    pooling(
+        const primitive_id& id,
+        const primitive_id& input,
+        pooling_mode mode,
+        const padding& output_padding = padding()
+    )
+        : primitive_base(id, { input }, output_padding)
+        , argmax("")
+        , mode(static_cast<pooling_mode>(mode))
+        , global_pooling(true)
+        , input_offset(0, 0, 0, 0)
+        , stride(1, 1, 1, 1)
+        , size(0, 0, 0, 0)
+        , with_output_size(false)
+    {}
+
     /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{pooling}
     pooling(const dto* dto)
         : primitive_base(dto)
         , argmax(dto->argmax)
         , mode(static_cast<pooling_mode>(dto->mode))
+        , global_pooling(dto->global_pooling != 0)
         , input_offset(dto->input_offset)
         , stride(dto->stride)
         , size(dto->size)
@@ -223,6 +248,8 @@ struct pooling : public primitive_base<pooling, CLDNN_PRIMITIVE_DESC(pooling)>
     primitive_id argmax;
     /// @brief Pooling mode.
     pooling_mode mode;
+    /// @brief Global pooling (kernel size is equal to the spatial dimension of input tensor)
+    bool global_pooling;
     /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the pooling window should start calculations.
     tensor input_offset;
     /// @brief Defines shift in input buffer between adjacent calculations of output values.
@@ -251,9 +278,10 @@ protected:
         dto.size = size;
         dto.with_output_size = with_output_size;
         dto.output_size = output_size;
+        dto.global_pooling = global_pooling;
     }
 };
 /// @}
 /// @}
 /// @}
-}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/primitive.hpp b/inference-engine/thirdparty/clDNN/api/CPP/primitive.hpp
index 8314afc5a..41fa27d87 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/primitive.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/primitive.hpp
@@ -92,15 +92,30 @@ public:
         const primitive_type_id& type,
         const primitive_id& id,
         const std::vector<primitive_id>& input,
-        const padding& output_padding = padding()
+        const padding& output_padding = padding(),
+        const optional_data_type output_data_type = optional_data_type()
     )
-        :type(type), id(id), input(_input.cpp_ids), output_padding(output_padding), _input(input)
+        : type(type)
+        , id(id)
+        , input(_input.cpp_ids)
+        , output_padding(output_padding)
+        , output_data_type(output_data_type)
+        , _input(input)
     {}
 
     /// @brief Constructs a copy from basic C API @CLDNN_PRIMITIVE_DESC{primitive}
-    primitive(const CLDNN_PRIMITIVE_DESC(primitive)* dto)
-        :type(dto->type), id(dto->id), input(_input.cpp_ids), output_padding(dto->output_padding), _input(dto->input)
-    {}
+    primitive(const CLDNN_PRIMITIVE_DESC(primitive) * dto)
+        : type(dto->type)
+        , id(dto->id)
+        , input(_input.cpp_ids)
+        , output_padding(dto->output_padding)
+        , output_data_type(dto->output_data_type.enabled
+                               ? optional_data_type{static_cast<data_types>(
+                                     dto->output_data_type.data_type)}
+                               : optional_data_type{})
+        , _input(dto->input)
+    {
+    }
 
     virtual ~primitive() = default;
 
@@ -114,7 +129,7 @@ public:
     {
         std::vector<std::reference_wrapper<primitive_id>> result;
         auto&& deps = get_dependencies();
-
+        
         result.reserve(_input.size() + deps.size());
         for (auto& pid : _input.cpp_ids)
             result.push_back(std::ref(pid));
@@ -148,6 +163,9 @@ public:
     /// @brief Requested output padding.
     padding output_padding;
 
+    /// @brief Requested output precision, if any.
+    optional_data_type output_data_type;
+
 protected:
     struct primitive_id_arr
     {
@@ -198,6 +216,9 @@ public:
         _dto.type = type;
         _dto.input = _input.ref();
         _dto.output_padding = output_padding;
+        _dto.output_data_type.enabled = (bool)output_data_type;
+        _dto.output_data_type.data_type =
+            static_cast<cldnn_data_type>(*output_data_type);
 
         //call abstract method to update primitive-specific fields
         update_dto(_dto);
@@ -208,14 +229,15 @@ protected:
     explicit primitive_base(
         const primitive_id& id,
         const std::vector<primitive_id>& input,
-        const padding& output_padding = padding())
-        : primitive(PType::type_id(), id, input, output_padding)
+        const padding& output_padding = padding(),
+        optional_data_type output_data_type = optional_data_type())
+        : primitive(PType::type_id(), id, input, output_padding, output_data_type)
     {}
 
     primitive_base(const DTO* dto)
         : primitive(reinterpret_cast<const CLDNN_PRIMITIVE_DESC(primitive)*>(dto))
     {
-        if (dto->type != PType::type_id())
+        if (dto->type != PType::type_id()) 
             throw std::invalid_argument("DTO type mismatch");
     }
 
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/prior_box.hpp b/inference-engine/thirdparty/clDNN/api/CPP/prior_box.hpp
index a21afda2e..c5ad40a2e 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/prior_box.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/prior_box.hpp
@@ -91,6 +91,9 @@ struct prior_box : public primitive_base<prior_box, CLDNN_PRIMITIVE_DESC(prior_b
             if (!already_exist) {
                 this->aspect_ratios.push_back(new_aspect_ratio);
                 if (flip) {
+                    if (std::fabs(new_aspect_ratio) < std::numeric_limits<float>::epsilon()) {
+                        throw std::runtime_error("prior_box aspect ratio can't be zero!");
+                    }
                     this->aspect_ratios.push_back(1.f / new_aspect_ratio);
                 }
             }
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/program.hpp b/inference-engine/thirdparty/clDNN/api/CPP/program.hpp
index 6657765c5..a8520ad3d 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/program.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/program.hpp
@@ -41,6 +41,9 @@ enum class build_option_type
     /// @brief Enable implicit reordering for user inputs (default: false).
     optimize_data = cldnn_build_option_optimize_data,
 
+    /// @brief Enable running detection output layer always on gpu, regardless performance
+    detection_output_gpu = cldnn_build_option_detection_output_gpu,
+
     /// @brief Enable debug mode (default: false).
     /// @details This option enforce all program primitives to be accessible as outputs.
     debug = cldnn_build_option_debug,
@@ -112,6 +115,9 @@ struct build_option
     /// @brief Enable implicit reordering for user inputs (default: false).
     static std::shared_ptr<const build_option> optimize_data(bool enable = false);
 
+    /// @brief Enable running detection output layer always on GPU, regardless performance (default: false).
+    static std::shared_ptr<const build_option> detection_output_gpu(bool enable = false);
+
     /// @brief Enable debug mode (default: false).
     /// @details This option enforce all program primitives to be accessible as outputs.
     static std::shared_ptr<const build_option> debug(bool enable = false);
@@ -462,6 +468,16 @@ namespace detail
             return std::make_shared<object_type>(option);
         }
     };
+    template<> struct build_option_traits<build_option_type::detection_output_gpu>
+    {
+        typedef build_option_bool<build_option_type::detection_output_gpu> object_type;
+        static std::shared_ptr<const build_option> make_default() { return build_option::detection_output_gpu(); }
+        static std::shared_ptr<const build_option> make_option(const cldnn_build_option& option)
+        {
+            assert(option.type == cldnn_build_option_detection_output_gpu);
+            return std::make_shared<object_type>(option);
+        }
+    };
     template<> struct build_option_traits<build_option_type::debug>
     {
         typedef build_option_bool<build_option_type::debug> object_type;
@@ -547,6 +563,11 @@ inline std::shared_ptr<const build_option> build_option::optimize_data(bool enab
     return std::make_shared<build_option_bool<build_option_type::optimize_data>>(enable);
 }
 
+inline std::shared_ptr<const build_option> build_option::detection_output_gpu(bool enable)
+{
+    return std::make_shared<build_option_bool<build_option_type::detection_output_gpu>>(enable);
+}
+
 inline std::shared_ptr<const build_option> build_option::debug(bool enable)
 {
     return std::make_shared<build_option_bool<build_option_type::debug>>(enable);
@@ -664,10 +685,12 @@ private:
         {
         case cldnn_build_option_fusing:
             return detail::build_option_traits<build_option_type::fusing>::make_option(option);
-		case cldnn_build_option_learning_config:
-			return detail::build_option_traits<build_option_type::learning_config>::make_option(option);
+        case cldnn_build_option_learning_config:
+            return detail::build_option_traits<build_option_type::learning_config>::make_option(option);
         case cldnn_build_option_optimize_data:
             return detail::build_option_traits<build_option_type::optimize_data>::make_option(option);
+        case cldnn_build_option_detection_output_gpu:
+            return detail::build_option_traits<build_option_type::detection_output_gpu>::make_option(option);
         case cldnn_build_option_debug:
             return detail::build_option_traits<build_option_type::debug>::make_option(option);
         case cldnn_build_option_outputs:
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/proposal.hpp b/inference-engine/thirdparty/clDNN/api/CPP/proposal.hpp
index ab4bb3376..8de42da61 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/proposal.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/proposal.hpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2017-2018 Intel Corporation
+// Copyright (c) 2017-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -34,9 +34,9 @@ namespace cldnn
 struct proposal : public primitive_base<proposal, CLDNN_PRIMITIVE_DESC(proposal)>
 {
     CLDNN_DECLARE_PRIMITIVE(proposal)
- 
+
     proposal(
-        const primitive_id& id,        
+        const primitive_id& id,
         const primitive_id& cls_scores,
         const primitive_id& bbox_pred,
         const primitive_id& image_info,
@@ -65,8 +65,11 @@ struct proposal : public primitive_base<proposal, CLDNN_PRIMITIVE_DESC(proposal)
                  box_size_scale(1.0f),
                  swap_xy(false),
                  initial_clip(false),
+                 clip_before_nms(true),
+                 clip_after_nms(false),
                  round_ratios(true),
-                 shift_anchors(false)
+                 shift_anchors(false),
+                 normalize(false)
     {
     }
 
@@ -89,8 +92,11 @@ struct proposal : public primitive_base<proposal, CLDNN_PRIMITIVE_DESC(proposal)
         float box_size_scale,
         bool swap_xy,
         bool initial_clip,
+        bool clip_before_nms,
+        bool clip_after_nms,
         bool round_ratios,
         bool shift_anchors,
+        bool normalize,
         const padding& output_padding = padding()
         )
         : primitive_base(id, {cls_scores, bbox_pred, image_info}, output_padding),
@@ -108,8 +114,11 @@ struct proposal : public primitive_base<proposal, CLDNN_PRIMITIVE_DESC(proposal)
                  box_size_scale(box_size_scale),
                  swap_xy(swap_xy),
                  initial_clip(initial_clip),
+                 clip_before_nms(clip_before_nms),
+                 clip_after_nms(clip_after_nms),
                  round_ratios(round_ratios),
-                 shift_anchors(shift_anchors)
+                 shift_anchors(shift_anchors),
+                 normalize(normalize)
     {
     }
 
@@ -129,8 +138,11 @@ struct proposal : public primitive_base<proposal, CLDNN_PRIMITIVE_DESC(proposal)
         box_size_scale(dto->box_size_scale),
         swap_xy(dto->swap_xy != 0),
         initial_clip(dto->initial_clip != 0),
+        clip_before_nms(dto->clip_before_nms != 0),
+        clip_after_nms(dto->clip_after_nms != 0),
         round_ratios(dto->round_ratios != 0),
-        shift_anchors(dto->shift_anchors != 0)
+        shift_anchors(dto->shift_anchors != 0),
+        normalize(dto->normalize != 0)
     {
     }
 
@@ -140,7 +152,7 @@ struct proposal : public primitive_base<proposal, CLDNN_PRIMITIVE_DESC(proposal)
     int min_bbox_size;
     int feature_stride;
     int pre_nms_topn;
-    int post_nms_topn;      
+    int post_nms_topn;
     std::vector<float> ratios;
     std::vector<float> scales;
     float coordinates_offset;
@@ -148,8 +160,11 @@ struct proposal : public primitive_base<proposal, CLDNN_PRIMITIVE_DESC(proposal)
     float box_size_scale;
     bool swap_xy;
     bool initial_clip;
+    bool clip_before_nms;
+    bool clip_after_nms;
     bool round_ratios;
     bool shift_anchors;
+    bool normalize;
 
 protected:
     void update_dto(dto& dto) const override
@@ -168,8 +183,11 @@ protected:
         dto.box_size_scale = box_size_scale;
         dto.swap_xy = swap_xy;
         dto.initial_clip = initial_clip;
+        dto.clip_before_nms = clip_before_nms;
+        dto.clip_after_nms = clip_after_nms;
         dto.round_ratios = round_ratios;
         dto.shift_anchors = shift_anchors;
+        dto.normalize = normalize;
     }
 };
 
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/pyramid_roi_align.hpp b/inference-engine/thirdparty/clDNN/api/CPP/pyramid_roi_align.hpp
new file mode 100644
index 000000000..243c66b37
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/CPP/pyramid_roi_align.hpp
@@ -0,0 +1,64 @@
+// Copyright (c) 2016-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "../C/pyramid_roi_align.h"
+#include "primitive.hpp"
+
+using namespace std;
+
+namespace cldnn {
+
+    struct pyramid_roi_align : public primitive_base<pyramid_roi_align, CLDNN_PRIMITIVE_DESC(pyramid_roi_align)>
+    {
+        CLDNN_DECLARE_PRIMITIVE(pyramid_roi_align)
+
+        pyramid_roi_align(
+            const primitive_id& id,
+            const primitive_id& input,
+            const padding& output_padding = padding()
+        )
+         : primitive_base(id, { input }, output_padding)
+        {}
+
+        pyramid_roi_align(
+            const primitive_id &id_c,
+            const primitive_id &base_str,
+            const primitive_id &meta_str,
+            const primitive_id &P2_str,
+            const primitive_id &P3_str,
+            const primitive_id &P4_str,
+            const primitive_id &P5_str,
+            const primitive_id &pool_size_str,
+            const padding& output_padding = padding()
+        )
+            : primitive_base(std::string(id_c), { 
+                    base_str, meta_str, P2_str, P3_str,
+                    P4_str, P5_str, pool_size_str},
+                    output_padding)
+        {}
+
+        /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{broadcast}
+        pyramid_roi_align(const dto* dto)
+            : primitive_base(dto)
+
+        {}
+
+    protected:
+        void update_dto(dto &) const override
+        {}
+
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/reorder.hpp b/inference-engine/thirdparty/clDNN/api/CPP/reorder.hpp
index cf39f715f..78001efc0 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/reorder.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/reorder.hpp
@@ -49,9 +49,8 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)>
         const std::vector<float>& values_to_subtract = {},
         const cldnn_reorder_mean_mode mode = cldnn_reorder_mean_mode::mean_subtract
     )
-        : primitive_base(id, { input }, output_layout.data_padding)
+        : primitive_base(id, { input }, output_layout.data_padding, { output_layout.data_type })
         , output_format(output_layout.format)
-        , output_data_type(output_layout.data_type)
         , mean("")
         , subtract_per_feature(values_to_subtract)
         , mean_mode(mode)
@@ -70,9 +69,8 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)>
         primitive_id const& mean,
         const cldnn_reorder_mean_mode mode = cldnn_reorder_mean_mode::mean_subtract
     )
-        : primitive_base(id, { input }, output_layout.data_padding)
+        : primitive_base(id, { input }, output_layout.data_padding, { output_layout.data_type })
         , output_format(output_layout.format)
-        , output_data_type(output_layout.data_type)
         , mean(mean)
         , subtract_per_feature(0)
         , mean_mode(mode)
@@ -93,9 +91,8 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)>
         const cldnn_reorder_mean_mode mode = cldnn_reorder_mean_mode::mean_subtract,
         const padding& output_padding = padding()
     )
-        : primitive_base(id, { input }, output_padding)
+        : primitive_base(id, { input }, output_padding, { output_data_type })
         , output_format(output_format)
-        , output_data_type(output_data_type)
         , mean("")
         , subtract_per_feature(values_to_subtract)
         , mean_mode(mode)
@@ -116,9 +113,8 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)>
         const cldnn_reorder_mean_mode mode = cldnn_reorder_mean_mode::mean_subtract,
         const padding& output_padding = padding()
     )
-        : primitive_base(id, { input }, output_padding)
+        : primitive_base(id, { input }, output_padding, { output_data_type })
         , output_format(output_format)
-        , output_data_type(output_data_type)
         , mean(mean)
         , subtract_per_feature(0)
         , mean_mode(mode)
@@ -129,7 +125,6 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)>
     reorder(const dto* dto)
         : primitive_base(dto)
         , output_format(dto->output_format)
-        , output_data_type(static_cast<data_types>(dto->output_data_type))
         , mean(dto->mean_subtract)
         , subtract_per_feature(float_arr_to_vector(dto->subtract_per_feature))
         , mean_mode(dto->mean_mode)
@@ -138,8 +133,6 @@ struct reorder : public primitive_base<reorder, CLDNN_PRIMITIVE_DESC(reorder)>
 
     /// @brief Requested memory format.
     format output_format;
-    /// @brief Requested memory data type.
-    data_types output_data_type;
     /// @brief Primitive id to get mean subtract values. Ignored if subtract_per_featrue is set.
     primitive_id mean;
     /// @brief Array of mean subtract values.
@@ -158,7 +151,6 @@ protected:
     void update_dto(dto& dto) const override
     {
         dto.output_format = static_cast<cldnn_format_type>(output_format.value);
-        dto.output_data_type = static_cast<cldnn_data_type>(output_data_type);
         dto.mean_subtract = mean.c_str();
         dto.subtract_per_feature = float_vector_to_arr(subtract_per_feature);
         dto.mean_mode = mean_mode;
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/reshape.hpp b/inference-engine/thirdparty/clDNN/api/CPP/reshape.hpp
index 233ee91f0..7c834d5a0 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/reshape.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/reshape.hpp
@@ -41,6 +41,8 @@ struct reshape : public primitive_base<reshape, CLDNN_PRIMITIVE_DESC(reshape)>
     /// @param id This primitive id.
     /// @param input Input primitive id.
     /// @param output_shape Requested memory shape (excluding padding).
+    /// A dimension could be 0, in this case,  the value is taken from the input tensor.
+    /// At most one dimension of the new shape can be -1. In this case, the value is inferred from the size of the tensor and the remaining dimensions.
     /// @param output_padding Requested memory padding.
     reshape(
         const primitive_id& id,
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/reverse_sequence.hpp b/inference-engine/thirdparty/clDNN/api/CPP/reverse_sequence.hpp
new file mode 100644
index 000000000..9269e4246
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/CPP/reverse_sequence.hpp
@@ -0,0 +1,100 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "../C/reverse_sequence.h"
+#include "primitive.hpp"
+
+namespace  cldnn
+{
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief
+/// @details
+struct reverse_sequence : public primitive_base<reverse_sequence, CLDNN_PRIMITIVE_DESC(reverse_sequence)>
+{
+    CLDNN_DECLARE_PRIMITIVE(reverse_sequence)
+
+    /// @brief Constructs reverse_sequence primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param seq_lengths Sequence lengths primitive id.
+    /// @param seq_axis The axis which is partially reversed.
+    /// @param batch_axis The axis along which reversal is performed.
+    reverse_sequence(
+            const primitive_id& id,
+            const primitive_id& input,
+            const primitive_id& seq_lengths,
+            const int32_t seq_axis,
+            const int32_t batch_axis = 0,
+            const padding& output_padding = padding()
+    )
+            : primitive_base(id, {input, seq_lengths}, output_padding)
+            , seq_axis(seq_axis)
+            , batch_axis(batch_axis)
+    {
+        const int32_t number_of_dims = 4;
+
+        int32_t batch_a = batch_axis;
+        int32_t seq_a = seq_axis;
+
+        if (batch_a < 0)
+            batch_a += number_of_dims;
+
+        if (seq_a < 0)
+            seq_a += number_of_dims;
+
+        if (batch_a == seq_a)
+            throw std::runtime_error("Batch axis and sequence axis should not be equal\n");
+
+        if (batch_a < 0 || batch_a >= number_of_dims)
+            throw std::runtime_error("Incorrect batch axis value! Actual axis is" + std::to_string(batch_a));
+
+        if (seq_a < 0 || seq_a >= number_of_dims)
+            throw std::runtime_error("Incorrect sequence axis value! Actual axis is" + std::to_string(seq_a));
+    }
+
+    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{reverse_sequence}
+    reverse_sequence(const dto* dto)
+            : primitive_base(dto)
+            , seq_axis(dto->seq_axis)
+            , batch_axis(dto->batch_axis)
+    {
+    }
+
+    /// @brief The axis which is partially reversed.
+    int32_t seq_axis;
+    /// @brief The axis along which reversal is performed.
+    int32_t batch_axis;
+protected:
+
+    void update_dto(dto& dto) const override
+    {
+        dto.seq_axis = seq_axis;
+        dto.batch_axis = batch_axis;
+    }
+};
+/// @}
+/// @}
+/// @}
+}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/roi_pooling.hpp b/inference-engine/thirdparty/clDNN/api/CPP/roi_pooling.hpp
index 3007f8cdc..1b5afa6b2 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/roi_pooling.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/roi_pooling.hpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2017 Intel Corporation
+// Copyright (c) 2017-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -39,43 +39,58 @@ struct roi_pooling : public primitive_base<roi_pooling, CLDNN_PRIMITIVE_DESC(roi
         const primitive_id& input_data,
         const primitive_id& input_rois,
         pooling_mode mode,
+        bool position_sensitive,
         int pooled_width,
         int pooled_height,
         float spatial_scale,
-        int group_sz = 0,
+        int output_dim = 0,
+        int spatial_bins_x = 1,
+        int spatial_bins_y = 1,
         const padding& output_padding = padding()
         )
         : primitive_base(id, {input_data, input_rois}, output_padding)
         , mode(mode)
+        , position_sensitive(position_sensitive)
         , pooled_width(pooled_width)
         , pooled_height(pooled_height)
         , spatial_scale(spatial_scale)
-        , group_sz(group_sz)
+        , output_dim(output_dim)
+        , spatial_bins_x(spatial_bins_x)
+        , spatial_bins_y(spatial_bins_y)
     {}
 
     roi_pooling(const dto* dto)
         : primitive_base(dto)
         , mode(static_cast<pooling_mode>(dto->mode))
+        , position_sensitive(dto->position_sensitive)
         , pooled_width(dto->pooled_width)
         , pooled_height(dto->pooled_height)
         , spatial_scale(dto->spatial_scale)
-        , group_sz(dto->group_sz)
+        , output_dim(dto->output_dim)
+        , spatial_bins_x(dto->spatial_bins_x)
+        , spatial_bins_y(dto->spatial_bins_y)
     {}
 
     pooling_mode mode;
+    bool position_sensitive;
     int pooled_width;
     int pooled_height;
     float spatial_scale;
-    int group_sz;
+    int output_dim;
+    int spatial_bins_x;
+    int spatial_bins_y;
 
 protected:
     void update_dto(dto& dto) const override
     {
         dto.mode = static_cast<int32_t>(mode);
+        dto.position_sensitive = position_sensitive;
         dto.pooled_width = pooled_width;
         dto.pooled_height = pooled_height;
         dto.spatial_scale = spatial_scale;
-        dto.group_sz = group_sz;
+        dto.output_dim = output_dim;
+        dto.spatial_bins_x = spatial_bins_x;
+        dto.spatial_bins_y = spatial_bins_y;
     }
 };
 
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/shuffle_channels.hpp b/inference-engine/thirdparty/clDNN/api/CPP/shuffle_channels.hpp
new file mode 100644
index 000000000..03c974eef
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/CPP/shuffle_channels.hpp
@@ -0,0 +1,79 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include "../C/shuffle_channels.h"
+#include "primitive.hpp"
+
+namespace  cldnn
+{
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief
+/// @details
+struct shuffle_channels : public primitive_base<shuffle_channels, CLDNN_PRIMITIVE_DESC(shuffle_channels)>
+{
+    CLDNN_DECLARE_PRIMITIVE(shuffle_channels)
+
+    /// @brief Constructs shuffle_channels primitive.
+    /// @param id This primitive id.
+    /// @param input Input dictionary primitive id.
+    /// @param group The number of groups to split the channel dimension.
+    /// @param axis The index of the channel dimension.
+    shuffle_channels(
+            const primitive_id& id,
+            const primitive_id& input,
+            const int32_t group,
+            const int32_t axis = 1,
+            const padding& output_padding = padding()
+    )
+            : primitive_base(id, {input}, output_padding)
+            , group(group)
+            , axis(axis)
+    {
+    }
+
+    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{shuffle_channels}
+    shuffle_channels(const dto* dto)
+            : primitive_base(dto)
+            , group(dto->group)
+            , axis(dto->axis)
+    {
+    }
+
+    /// @brief The number of groups to split the channel dimension. This number must evenly divide the channel dimension size.
+    int32_t group;
+    /// @brief The index of the channel dimension (default is 1).
+    int32_t axis;
+protected:
+
+    void update_dto(dto& dto) const override
+    {
+        dto.group = group;
+        dto.axis = axis;
+    }
+};
+/// @}
+/// @}
+/// @}
+}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/split.hpp b/inference-engine/thirdparty/clDNN/api/CPP/split.hpp
index 0ed7f22a9..08e3789c7 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/split.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/split.hpp
@@ -112,35 +112,6 @@ protected:
 
         return res;
     }
-
-    static std::vector<tensor> tensor_arr_to_vector(const cldnn_tensor_arr& arr)
-    {
-        std::vector<tensor> result(arr.size);
-        for (size_t i = 0; i < arr.size; i++)
-            result[i] = arr.data[i];
-
-        return result;
-    }
-
-    static std::vector<cldnn_tensor> tensor_arr_to_cldnn_vector(const cldnn_tensor_arr& arr)
-    {
-        std::vector<cldnn_tensor> result(arr.size);
-        for (size_t i = 0; i < arr.size; i++)
-            result[i] = arr.data[i];
-
-        return result;
-    }
-
-    static std::vector<cldnn_tensor> tensor_vector_to_cldnn_vector(const std::vector<tensor>& stor)
-    {
-        std::vector<cldnn_tensor> res;
-        res.resize(stor.size());
-        for (size_t i = 0; i < stor.size(); ++i)
-            res[i] = stor[i];
-
-        return res;
-    }
-
 };
 /// @}
 /// @}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/strided_slice.hpp b/inference-engine/thirdparty/clDNN/api/CPP/strided_slice.hpp
new file mode 100644
index 000000000..98bcc7440
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api/CPP/strided_slice.hpp
@@ -0,0 +1,99 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "../C/strided_slice.h"
+#include "primitive.hpp"
+
+namespace  cldnn
+{
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief
+/// @details
+struct strided_slice : public primitive_base<strided_slice, CLDNN_PRIMITIVE_DESC(strided_slice)>
+{
+    CLDNN_DECLARE_PRIMITIVE(strided_slice)
+
+    /// @brief Constructs strided_slice primitive.
+    /// @param id This primitive id.
+    /// @param input Input data primitive id.
+    /// @param begin_id Begin position primitive id.
+    /// @param end_id End position primitive id.
+    /// @param strides_id Step of slicing primitive id.
+    /// @param begin_mask Array of bits, that provide replace begin[i] to max possible range in that dimension.
+    /// @param end_mask Array of bits, that provide replace end[i] to max possible range in that dimension.
+    /// @param new_axis_mask Array of bits, that provide adding a new length 1 dimension at ith position in the output tensor.
+    /// @param shrink_axis_mask Array of bits, that provide shrinks the dimensionality by 1, taking on the value at index begin[i].
+    strided_slice(
+        const primitive_id& id,
+        const primitive_id& input,
+        const primitive_id& begin_id,
+        const primitive_id& end_id,
+        const primitive_id& strides_id,
+        std::vector<uint8_t> begin_mask,
+        std::vector<uint8_t> end_mask,
+        std::vector<uint8_t> new_axis_mask,
+        std::vector<uint8_t> shrink_axis_mask,
+        const padding& output_padding = padding()
+    )
+        : primitive_base(id, {input, begin_id, end_id, strides_id}, output_padding)
+        , begin_mask(begin_mask)
+        , end_mask(end_mask)
+        , new_axis_mask(new_axis_mask)
+        , shrink_axis_mask(shrink_axis_mask)
+    {
+    }
+
+    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{strided_slice}
+    strided_slice(const dto* dto)
+        : primitive_base(dto)
+        , begin_mask(uint8_t_arr_to_vector(dto->begin_mask))
+        , end_mask(uint8_t_arr_to_vector(dto->end_mask))
+        , new_axis_mask(uint8_t_arr_to_vector(dto->new_axis_mask))
+        , shrink_axis_mask(uint8_t_arr_to_vector(dto->shrink_axis_mask))
+    {
+    }
+
+    /// @param begin_mask Array of bits, that provide replace begin[i] to max possible range in that dimension.
+    std::vector<uint8_t> begin_mask;
+    /// @param end_mask Array of bits, that provide replace end[i] to max possible range in that dimension.
+    std::vector<uint8_t> end_mask;
+    /// @param new_axis_mask Array of bits, that provide adding a new length 1 dimension at ith position in the output tensor.
+    std::vector<uint8_t> new_axis_mask;
+    /// @param shrink_axis_mask Array of bits, that provide shrinks the dimensionality by 1, taking on the value at index begin[i].
+    std::vector<uint8_t> shrink_axis_mask;
+
+protected:
+
+    void update_dto(dto& dto) const override
+    {
+        dto.begin_mask = uint8_t_vector_to_arr(begin_mask);
+        dto.end_mask = uint8_t_vector_to_arr(end_mask);
+        dto.new_axis_mask = uint8_t_vector_to_arr(new_axis_mask);
+        dto.shrink_axis_mask = uint8_t_vector_to_arr(shrink_axis_mask);
+    }
+};
+/// @}
+/// @}
+/// @}
+}
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/tensor.hpp b/inference-engine/thirdparty/clDNN/api/CPP/tensor.hpp
index 2a5439e63..9528f0133 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/tensor.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/tensor.hpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -45,6 +45,8 @@ struct format_traits
     size_t feature_num;
     /// @brief Number of spatial (x,y) dimensions in a format.
     size_t spatial_num;
+    /// @brief Number of local (x,y) dimensions in a format.
+    size_t local_num;
     /// @brief Dimensions changing order from rare to often.
     std::string order;
     /// @brief Dimensions order for internal storage.
@@ -55,12 +57,16 @@ struct format_traits
     static const char* feature_chars() { return "fioc"; }
     /// @brief Characters representing spatial dimensions in an order.
     static const char* spatial_chars() { return "xyzhsw"; }
+    /// @brief Characters representing local dimensions in an order.
+    static const char* local_chars() { return "kl"; }
     /// @brief Checks if @p c represents batch dimension.
     static bool is_batch_char(char c) { return std::string(batch_chars()).find_first_of(c) != std::string::npos; }
     /// @brief Checks if @p c represents feature map/channel dimension.
     static bool is_feature_char(char c) { return std::string(feature_chars()).find_first_of(c) != std::string::npos; }
     /// @brief Checks if @p c represents spatial dimension.
     static bool is_spatial_char(char c) { return std::string(spatial_chars()).find_first_of(c) != std::string::npos; }
+    /// @brief Checks if @p c represents local dimensions.
+    static bool is_local_char(char c) { return std::string(local_chars()).find_first_of(c) != std::string::npos; }
 };
 
 /// @brief Represents memory formats (orders).
@@ -82,6 +88,8 @@ struct format
         fyxb = cldnn_format_fyxb, ///< format not used inside clDNN, but supported in reorder as extension for user provided formats.
         os_iyx_osv16 = cldnn_format_os_iyx_osv16, ///< format used only for convolution weights: os - output feature maps slice, i - input feature maps, yx - spatials, sv16 - 16 values of single slice.
                                                   ///< \n \image html os_iyx_osv16.jpg
+        os_iyx_osv32 = cldnn_format_os_iyx_osv32, ///< format used only for convolution weights: os - output feature maps slice, i - input feature maps, yx - spatials, sv32 - 32 values of single slice.
+        os_iyx_osv64 = cldnn_format_os_iyx_osv64, ///< format used only for convolution weights: os - output feature maps slice, i - input feature maps, yx - spatials, sv64 - 64 values of single slice.
         bs_xs_xsv8_bsv8 = cldnn_format_bs_xs_xsv8_bsv8,  ///< format used only for fully connected weights: bs - batch slice, xs - x slice, bsv8 - 8 values of single slice.
                                                          ///< \n \image html bs_xs_xsv8_bsv8.jpg
         bs_xs_xsv8_bsv16 = cldnn_format_bs_xs_xsv8_bsv16,///< format used only for fully connected weights: bs - batch slice, xs - x slice, bsv16 - 16 values of single slice.
@@ -101,9 +109,16 @@ struct format
         image_2d_weights_winograd_6x3_s1_fbxyb,      ///< image format used for weights for winograd fused convolution, F(6,3) -- filter 3x3 with stride 1
         image_2d_weights_winograd_6x3_s1_xfbyb,      ///< image format used for weights for winograd fused convolution, F(6,3) -- filter 3x3 with stride 1
         os_is_yx_isa8_osv8_isv4,                        /// format for weights for MMAD convolution
+        os_is_yx_isa8_osv8_isv4_swizzled_by_4, /// format for weights for MMAD convolution
         is_o_yx_isv32, /// format for weights for 1x1 MMAD convolutions
+        is_o32_yx_isv32_swizzled_by_4, /// format for weights for 1x1 MMAD convolutions
+        os_is_y_x8_osv8_isv4, /// format for weights for 1x1 MMAD convolutions
         byxf_af32,           /// < \n format for input for primitives using MMAD
+        byx8_f4,             /// < \n format for input for MMAD convolutions
         fs_bs_yx_bsv4_fsv32, /// < \n format for batched input for primitives using MMAD
+        bf_lyx_yx = cldnn_bf_lyx_yx,            /// < \n format for local convolution weights
+        b_fs_yx_fsv4,        /// < \n format for input for IMAD convolutions
+        os_is_yx_osv16_isv4, /// < \n format for weights for IMAD convolutions
         format_num = cldnn_format_format_num, ///< number of format types
         any = cldnn_format_any
     };
@@ -113,27 +128,36 @@ struct format
     {
         static const std::map<type, format_traits> traits
         {
-            { yxfb,{ 1, 1, 2, "yxfb", "bfxy" } },
-            { byxf,{ 1, 1, 2, "byxf", "bfxy" } },
-            { bfyx,{ 1, 1, 2, "bfyx", "bfxy" } },
-            { fyxb,{ 1, 1, 2, "fyxb", "bfxy" } },
-            { os_iyx_osv16, { 1, 1, 2, "bfyx", "bfxy" } },
-            { bs_xs_xsv8_bsv8, { 1, 1, 1, "bx", "b?x?" } },
-            { bs_xs_xsv8_bsv16,{ 1, 1, 1, "bx", "b?x?" } },
-            { bs_x_bsv16, { 1, 1, 1, "bx", "b?x?" } },
-            { bf8_xy16, { 1, 1, 2, "bfyx", "bfxy" }},
-            { image_2d_weights_c4_fyx_b, { 1, 1, 2, "bfyx", "bfxy" } },
-            { image_2d_weights_c1_b_fyx, { 1, 1, 2, "bfyx", "bfxy" } },
-            { winograd_2x3_s1_data, { 1, 1, 2, "bxyf", "bfxy" } },
-            { winograd_2x3_s1_weights, { 1, 1, 2, "bfyx", "bfxy" } },
-            { winograd_2x3_s1_fused_weights, { 1, 1, 2, "xyfb", "bfxy" } },
-            { winograd_6x3_s1_fused_weights,{ 1, 1, 2, "xyfb", "bfxy" } },
-            { image_2d_weights_winograd_6x3_s1_fbxyb,{ 1, 1, 2, "xyfb", "bfxy" } },
-            { image_2d_weights_winograd_6x3_s1_xfbyb,{ 1, 1, 2, "xyfb", "bfxy" } },
-            { os_is_yx_isa8_osv8_isv4, { 1, 1, 2, "bfyx", "bfxy" } },
-            { is_o_yx_isv32 , {1, 1, 2, "byxf", "bfxy" } },
-            { byxf_af32, { 1, 1, 2, "byxf", "bfxy" } },
-            { fs_bs_yx_bsv4_fsv32 , { 1, 1, 2, "fbyx", "bfxy" }}
+            { yxfb,{ 1, 1, 2, 0, "yxfb", "bfxy" } },
+            { byxf,{ 1, 1, 2, 0, "byxf", "bfxy" } },
+            { bfyx,{ 1, 1, 2, 0, "bfyx", "bfxy" } },
+            { fyxb,{ 1, 1, 2, 0, "fyxb", "bfxy" } },
+            { os_iyx_osv16, { 1, 1, 2, 0, "bfyx", "bfxy" } },
+            { os_iyx_osv32,{ 1, 1, 2, 0, "bfyx", "bfxy" } },
+            { os_iyx_osv64,{ 1, 1, 2, 0, "bfyx", "bfxy" } },
+            { bs_xs_xsv8_bsv8, { 1, 1, 1, 0, "bx", "b?x?" } },
+            { bs_xs_xsv8_bsv16,{ 1, 1, 1, 0, "bx", "b?x?" } },
+            { bs_x_bsv16, { 1, 1, 1, 0, "bx", "b?x?" } },
+            { bf8_xy16, { 1, 1, 2, 0, "bfyx", "bfxy" }},
+            { image_2d_weights_c4_fyx_b, { 1, 1, 2, 0, "bfyx", "bfxy" } },
+            { image_2d_weights_c1_b_fyx, { 1, 1, 2, 0, "bfyx", "bfxy" } },
+            { winograd_2x3_s1_data, { 1, 1, 2, 0, "bxyf", "bfxy" } },
+            { winograd_2x3_s1_weights, { 1, 1, 2, 0, "bfyx", "bfxy" } },
+            { winograd_2x3_s1_fused_weights, { 1, 1, 2, 0, "xyfb", "bfxy" } },
+            { winograd_6x3_s1_fused_weights,{ 1, 1, 2, 0, "xyfb", "bfxy" } },
+            { image_2d_weights_winograd_6x3_s1_fbxyb,{ 1, 1, 2, 0, "xyfb", "bfxy" } },
+            { image_2d_weights_winograd_6x3_s1_xfbyb,{ 1, 1, 2, 0, "xyfb", "bfxy" } },
+            { os_is_yx_isa8_osv8_isv4, { 1, 1, 2, 0, "bfyx", "bfxy" } },
+            { os_is_yx_isa8_osv8_isv4_swizzled_by_4,{ 1, 1, 2, 0, "bfyx", "bfxy" } },
+            { byxf_af32, { 1, 1, 2, 0, "byxf", "bfxy" } },
+            { byx8_f4 , { 1, 1, 2, 0, "byxf", "bfyx"} },
+            { fs_bs_yx_bsv4_fsv32 , { 1, 1, 2, 0, "fbyx", "bfxy" }},
+            { is_o_yx_isv32 , {1, 1, 2, 0, "byxf", "bfxy" } },
+            { is_o32_yx_isv32_swizzled_by_4 , {1,1,2,0,"byxf", "bfxy" } },
+            { os_is_y_x8_osv8_isv4 , { 1, 1, 2, 0, "byxf", "bfxy" } },
+            { bf_lyx_yx,{ 1, 1, 2, 2, "bfklyx", "bfklxy" } },
+            { b_fs_yx_fsv4,{ 1, 1, 1, 0, "bfyx", "bfxy" } },
+            { os_is_yx_osv16_isv4,{ 1, 1, 1, 0, "bfxy", "bfxy?" } },
         };
         return traits.at(fmt);
     }
@@ -144,6 +168,8 @@ struct format
     static size_t feature_num(type fmt) { return traits(fmt).feature_num; }
     /// @brief Returns number of spatial dimensions for a @p format.
     static size_t spatial_num(type fmt) { return traits(fmt).spatial_num; }
+    /// @brief Returns number of local dimensions for a @p format.
+    static size_t local_num(type fmt) { return traits(fmt).local_num; }
     /// @brief Returns an order of dimensions for a @ format.
     static const std::string& order(type fmt) { return traits(fmt).order; }
     /// @brief Returns an internal orders of dimensions for a @p format.
@@ -163,6 +189,8 @@ struct format
     size_t feature_num() const { return traits(value).feature_num; }
     /// @brief Returns number of spatial dimensions.
     size_t spatial_num() const { return traits(value).spatial_num; }
+    /// @brief Returns number of local dimensions.
+    size_t local_num() const { return traits(value).local_num; }
     /// @brief Returns an order of dimensions in form of string.
     const std::string& order() const { return traits(value).order; }
     /// @brief Returns an internal orders of dimensions form of string.
@@ -197,7 +225,8 @@ enum class dim_vec_kind
 {
     batch,
     feature,
-    spatial
+    spatial,
+    local
 };
 
 /// @brief template class with max_dimensionalities and dimension offset for dimension kinds
@@ -228,6 +257,13 @@ struct dim_vec_limits<dim_vec_kind::spatial>
     static constexpr int32_t dim_offset = CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX;
 };
 
+template <>
+struct dim_vec_limits<dim_vec_kind::local>
+{
+    static constexpr int32_t max_dimentionality = CLDNN_TENSOR_LOCAL_DIM_MAX;
+    static constexpr int32_t dim_offset = CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX;
+};
+
 /// @brief Template class used in tensor constructor using dim_vec_kinds
 template <dim_vec_kind Kind>
 class dim_vec_kind_init
@@ -267,12 +303,19 @@ details::dim_vec_kind_init<details::dim_vec_kind::spatial> spatial(InitTys&& ...
     return details::dim_vec_kind_init<details::dim_vec_kind::spatial>(std::forward<InitTys>(inits) ...);
 }
 
+template <typename ... InitTys>
+details::dim_vec_kind_init<details::dim_vec_kind::local> local(InitTys&& ... inits)
+{
+    return details::dim_vec_kind_init<details::dim_vec_kind::local>(std::forward<InitTys>(inits) ...);
+}
+
 /// @brief N-dimensional vector. Mostly used to represent memory size.
 struct tensor
 {
     friend class details::dim_vec_kind_init<details::dim_vec_kind::batch>;
     friend class details::dim_vec_kind_init<details::dim_vec_kind::feature>;
     friend class details::dim_vec_kind_init<details::dim_vec_kind::spatial>;
+    friend class details::dim_vec_kind_init<details::dim_vec_kind::local>;
 
     typedef int32_t value_type;     ///< Values type stored in tensor.
     //TODO find the way to prevent direct change of following fields.
@@ -280,6 +323,7 @@ struct tensor
     mutable_array_ref<value_type> batch;    ///< Batch dimensions.
     mutable_array_ref<value_type> feature;  ///< Feature maps.
     mutable_array_ref<value_type> spatial;  ///< Spatial dimensions.
+    mutable_array_ref<value_type> local;    ///< Local dimensions.
 
 private:
     value_type _sizes[CLDNN_TENSOR_DIM_MAX];
@@ -292,6 +336,8 @@ public:
         , batch(_sizes, CLDNN_TENSOR_BATCH_DIM_MAX)
         , feature(_sizes + CLDNN_TENSOR_BATCH_DIM_MAX, CLDNN_TENSOR_FEATURE_DIM_MAX)
         , spatial(_sizes + CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX, CLDNN_TENSOR_SPATIAL_DIM_MAX)
+        , local(_sizes + CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX +
+            CLDNN_TENSOR_SPATIAL_DIM_MAX, CLDNN_TENSOR_LOCAL_DIM_MAX)
     {
         std::fill_n(_sizes, CLDNN_TENSOR_DIM_MAX, default_size);
     }
@@ -345,6 +391,32 @@ public:
         _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + 1] = height;
     }
 
+    /// @brief Constructs @p tensor.
+    /// @details Example:
+    /*! @code
+    *
+    tensor my_tensor( 2, 3, 4, 5, 6, 7 );   // b=2, f=3, x=4, y=5, lx= 6, ly =7
+    cout << my_tensor.batch[0] << endl;           // 2
+    cout << my_tensor.feature[0] << endl;         // 3
+    cout << "x=" << my_tensor.spatial[0] << endl; // x=4
+    cout << "y=" << my_tensor.spatial[1] << endl; // y=5
+	cout << "local x=" << my_tensor.local[0] << endl; // local x=6
+	cout << "loxal y=" << my_tensor.local[1] << endl; // local y=7
+    *
+    * @endcode
+    */
+    tensor(value_type batch_num, value_type feature_num, value_type width,
+        value_type height, value_type local_x, value_type local_y)
+        : tensor(1)
+    {
+        _sizes[0] = batch_num;
+        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX] = feature_num;
+        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX] = width;
+        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + 1] = height;
+        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX] = local_x;
+        _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX + 1] = local_y;
+    }
+
     /// @brief Constructs @p tensor using vector of sizes.
     /// @param[in] sizes dimensions need to be provided in the following order {batch, feature, spatial_x, spatial_y}.
     /// @param[in] default_size default_size for tensor dimensions.
@@ -366,6 +438,13 @@ public:
         _sizes[CLDNN_TENSOR_BATCH_DIM_MAX] = sizes[CLDNN_TENSOR_BATCH_DIM_MAX];
         _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX] = sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX];
         _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + 1] = sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + 1];
+        if (sizes.size() == 6)
+        {
+            _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX] =
+                sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX];
+            _sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX + 1] =
+                sizes[CLDNN_TENSOR_BATCH_DIM_MAX + CLDNN_TENSOR_FEATURE_DIM_MAX + CLDNN_TENSOR_SPATIAL_DIM_MAX + 1];
+        }
     }
 
     tensor(format fmt, const std::vector<value_type>& sizes, value_type default_size = 1)
@@ -404,6 +483,7 @@ public:
         result.batch_num = batch.size();
         result.feature_num = feature.size();
         result.spatial_num = spatial.size();
+        result.local_num = local.size();
         std::copy_n(_sizes, CLDNN_TENSOR_DIM_MAX, result.sizes);
         return result;
     }
@@ -664,6 +744,16 @@ public:
             my_sizes[0] = align_to(my_sizes[0], 16);
             adjusted_coords[0] = align_to(adjusted_coords[0], 16);
         }
+        else if (fmt == cldnn::format::os_iyx_osv32 && !is_aligned_to(my_sizes[0], 32))
+        {
+            my_sizes[0] = align_to(my_sizes[0], 32);
+            adjusted_coords[0] = align_to(adjusted_coords[0], 32);
+        }
+        else if (fmt == cldnn::format::os_iyx_osv64 && !is_aligned_to(my_sizes[0], 64))
+        {
+            my_sizes[0] = align_to(my_sizes[0], 64);
+            adjusted_coords[0] = align_to(adjusted_coords[0], 64);
+        }
         else if (fmt == cldnn::format::bs_xs_xsv8_bsv8 && !(is_aligned_to(my_sizes[0], 8) && is_aligned_to(my_sizes[1], 8)))
         {
             my_sizes[0] = align_to(my_sizes[0], 8);
@@ -699,16 +789,43 @@ public:
             adjusted_coords[0] = align_to(adjusted_coords[0], 8);
             adjusted_coords[1] = align_to(adjusted_coords[1], 32);
         }
+        else if (fmt == cldnn::format::os_is_yx_isa8_osv8_isv4_swizzled_by_4 && !(is_aligned_to(my_sizes[0], 32)) && !(is_aligned_to(my_sizes[1], 32)))
+        {
+            my_sizes[0] = align_to(my_sizes[0], 32);
+            my_sizes[1] = align_to(my_sizes[1], 32);
+            adjusted_coords[0] = align_to(adjusted_coords[0], 32);
+            adjusted_coords[1] = align_to(adjusted_coords[1], 32);
+        }
         else if (fmt == cldnn::format::is_o_yx_isv32 && !(is_aligned_to(my_sizes[1], 32)))
         {
             my_sizes[1] = align_to(my_sizes[1], 32);
             adjusted_coords[1] = align_to(adjusted_coords[1], 32);
         }
+        else if (fmt == cldnn::format::is_o32_yx_isv32_swizzled_by_4 && (!is_aligned_to(my_sizes[1], 32) || !is_aligned_to(my_sizes[0], 32)))
+        {
+            my_sizes[0] = align_to(my_sizes[0], 32);
+            my_sizes[1] = align_to(my_sizes[1], 32);
+            adjusted_coords[0] = align_to(adjusted_coords[0], 32);
+            adjusted_coords[1] = align_to(adjusted_coords[1], 32);
+        }
+        else if (fmt == cldnn::format::os_is_y_x8_osv8_isv4)
+        {
+            my_sizes[1] = align_to(my_sizes[1], 4);
+            my_sizes[0] = align_to(my_sizes[0], 8);
+            my_sizes[2] = align_to(my_sizes[2], 8);
+        }
         else if (fmt == cldnn::format::byxf_af32 && !(is_aligned_to(my_sizes[1], 32)))
         {
             my_sizes[1] = align_to(my_sizes[1], 32);
             adjusted_coords[1] = align_to(adjusted_coords[1], 32);
         }
+        else if (fmt == cldnn::format::byx8_f4 && (!(is_aligned_to(my_sizes[1], 4)) || !(is_aligned_to(my_sizes[2], 8))))
+        {
+            my_sizes[1] = align_to(my_sizes[1], 4);
+            my_sizes[2] = align_to(my_sizes[2], 8);
+            adjusted_coords[1] = align_to(adjusted_coords[1], 4);
+            adjusted_coords[2] = align_to(adjusted_coords[2], 8);
+        }
         else if (fmt == cldnn::format::fs_bs_yx_bsv4_fsv32 && (!is_aligned_to(my_sizes[1], 32) || !is_aligned_to(my_sizes[0], 4) ))
         {
             my_sizes[1] = align_to(my_sizes[1], 32);
@@ -764,6 +881,7 @@ private:
     }
 };
 
+#define TensorValue(val) static_cast<cldnn::tensor::value_type>(val)
 
 template<details::dim_vec_kind Kind>
 inline void details::dim_vec_kind_init<Kind>::init_tensor_values(cldnn::tensor & t)
@@ -781,6 +899,26 @@ inline tensor operator*(const tensor& lhs, tensor::value_type rhs) { return lhs.
 /// @brief Divides a @p tensor by a @p scalar
 inline tensor operator/(const tensor& lhs, tensor::value_type rhs) { return lhs.div(rhs); }
 
+///
+/// \brief Converts C API tensor_array to std::vector<tensor>
+///
+inline std::vector<tensor> tensor_arr_to_vector(const cldnn_tensor_arr& arr)
+{
+    std::vector<tensor> result(arr.size);
+    for (size_t i = 0; i < arr.size; i++)
+        result[i] = arr.data[i];
+
+    return result;
+}
+
+///
+/// \brief Converts std::vector<tensor> to std::vector of C API tensor
+///
+inline std::vector<cldnn_tensor> tensor_vector_to_cldnn_vector(const std::vector<tensor>& stor)
+{
+    return std::vector<cldnn_tensor>(stor.begin(), stor.end());
+}
+
 /// @}
 /// @}
 }
diff --git a/inference-engine/thirdparty/clDNN/api/CPP/topology.hpp b/inference-engine/thirdparty/clDNN/api/CPP/topology.hpp
index e5a44e421..37481abce 100644
--- a/inference-engine/thirdparty/clDNN/api/CPP/topology.hpp
+++ b/inference-engine/thirdparty/clDNN/api/CPP/topology.hpp
@@ -61,6 +61,13 @@ struct topology
         return *this;
     }
 
+    /// Construct C++ topology based on C API @p cldnn_topology
+    topology(const cldnn_topology& other) 
+        :_impl(other)
+    {
+        if (_impl == nullptr) throw std::invalid_argument("implementation pointer should not be null");
+    }
+
     /// @brief Releases wrapped C API @ref cldnn_topology.
     ~topology()
     {
@@ -124,11 +131,6 @@ private:
     friend struct network;
     cldnn_topology _impl;
 
-    topology(cldnn_topology impl) :_impl(impl)
-    {
-        if (_impl == nullptr) throw std::invalid_argument("implementation pointer should not be null");
-    }
-
     void retain()
     {
         check_status<void>("retain topology failed", [=](status_t* status) { cldnn_retain_topology(_impl, status); });
diff --git a/inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_bn_scale.h b/inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_bn_scale.h
new file mode 100644
index 000000000..a57d75231
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_bn_scale.h
@@ -0,0 +1,73 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef FUSED_CONV_BN_SCALE_H
+#define FUSED_CONV_BN_SCALE_H
+
+#include "api/C/cldnn.h"
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// @brief Primitives that fuses convolution, batch norm, scale and optionally Relu.
+CLDNN_BEGIN_PRIMITIVE_DESC(fused_conv_bn_scale)
+/// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
+cldnn_tensor input_offset;
+/// @brief Defines shift in input buffer between adjacent calculations of output values.
+cldnn_tensor stride;
+/// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+/// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1. 
+/// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+cldnn_tensor dilation;
+/// @brief Enable Relu activation.
+uint32_t with_activation;
+/// @brief Relu activation slope.
+float activation_negative_slope;
+/// @brief On how many cards split the computation to.
+uint32_t split;
+/// @brief Array of primitive ids containing weights data. Size of array should be equivalent to @p split.
+cldnn_primitive_id_arr weights;
+/// @brief Array of primitive ids containing bias data. Size of array should be equivalent to @p split.
+cldnn_primitive_id_arr bias;
+/// @brief Primitive id containing scale bias data for fused convolution.
+cldnn_primitive_id scale_bias;
+/// @brief Primitive id containing inverted variance used in future gradient computing for fused convolution.
+cldnn_primitive_id inv_variance;
+/// @brief Epsilon for fused convolution.
+float epsilon;
+/// @brief Indicates that primitive is fused with batch norm and scale.
+uint32_t fused_batch_norm_scale;
+CLDNN_END_PRIMITIVE_DESC(fused_conv_bn_scale)
+
+CLDNN_DECLARE_PRIMITIVE_TYPE_ID(fused_conv_bn_scale);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif /* FUSED_CONV_BN_SCALE.H */
+
diff --git a/inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_eltwise.h b/inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_eltwise.h
new file mode 100644
index 000000000..458648706
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api_extension/C/fused_conv_eltwise.h
@@ -0,0 +1,104 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#ifndef FUSED_CONV_ELTWISE_H
+#define FUSED_CONV_ELTWISE_H
+
+#include "api/C/cldnn.h"
+/// @addtogroup c_api C API
+/// @{
+/// @addtogroup c_topology Network Topology
+/// @{
+/// @addtogroup c_primitives Primitives
+/// @{
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// @brief Performs forward spatial convolution with weight sharing fused with eltwise.
+/// Also supports built-in Relu @CLDNN_PRIMITIVE_DESC{activation} separate for convolution and for eltwise, available by setting it in arguments.
+CLDNN_BEGIN_PRIMITIVE_DESC(fused_conv_eltwise)
+
+struct conv_data
+{
+    /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
+    cldnn_tensor input_offset;
+    /// @brief Defines shift in input buffer between adjacent calculations of output values.
+    cldnn_tensor stride;
+    /// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1. 
+    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+    cldnn_tensor dilation;
+    /// @brief Enable Relu activation.
+    uint32_t with_activation;
+    /// @brief Relu activation slope.
+    float activation_negative_slope;
+    /// @brief On how many cards split the computation to.
+    uint32_t split;
+    /// @brief Indicates that the primitive has user-defined output size (non-zero value).
+    uint32_t with_output_size;
+    /// @brief User-defined output data size of the primitive (w/o padding).
+    cldnn_tensor output_size;
+    /// @brief Array of primitive ids containing weights data. Size of array should be equivalent to @p split.
+    cldnn_primitive_id_arr weights;
+    /// @brief Array of primitive ids containing bias data. Size of array should be equivalent to @p split.
+    cldnn_primitive_id_arr bias;
+    /// @brief List of primitive ids containing weights quanitization factors per output feature map.
+    cldnn_primitive_id_arr weights_quantization_factors;
+    /// @brief List of primitive ids containing output calibration factors per output feature map.
+    cldnn_primitive_id_arr output_calibration_factors;
+    /// @brief Input quantization factor
+    float input_quantization_factor;
+    /// @brief Output quantization factor
+    float output_quantization_factor;
+} conv;
+
+struct eltw_data
+{
+    /// @brief Primitive id containing output quanitization factors per output feature map.
+    cldnn_primitive_id output_calibration_factors;
+    /// @brief Output quantization factor
+    float output_quantization_factor;
+    /// @brief Eltwise mode. See #cldnn_eltwise_mode.
+    int32_t mode; /*cldnn_eltwise_mode*/
+    /// @brief Blob-wise coefficient for SUM operation
+    cldnn_float_arr coefficients;
+    /// @brief Enables Relu activation.
+    uint32_t with_activation;
+    /// @brief Relu activation slope.
+    float activation_negative_slope;
+    /// @brief Defines shift in input buffers between adjacent calculations of output values.
+    cldnn_tensor_arr stride;
+} eltw;
+
+/// @brief Is optimization that output contains data from second input ON ?
+bool second_input_in_output = false;
+
+CLDNN_END_PRIMITIVE_DESC(fused_conv_eltwise)
+
+CLDNN_DECLARE_PRIMITIVE_TYPE_ID(fused_conv_eltwise);
+
+#ifdef __cplusplus
+}
+#endif
+
+/// @}
+/// @}
+/// @}
+#endif /* FUSED_CONV_ELTWISE_H */
+
diff --git a/inference-engine/thirdparty/clDNN/api_extension/CPP/fused_conv_bn_scale.hpp b/inference-engine/thirdparty/clDNN/api_extension/CPP/fused_conv_bn_scale.hpp
new file mode 100644
index 000000000..117e4ac55
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api_extension/CPP/fused_conv_bn_scale.hpp
@@ -0,0 +1,170 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "../C/fused_conv_bn_scale.h"
+#include "api/CPP/primitive.hpp"
+
+namespace cldnn
+{
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief Primitives that fuses convolution, batch norm, scale and optionally Relu.
+struct fused_conv_bn_scale : public primitive_base<fused_conv_bn_scale, CLDNN_PRIMITIVE_DESC(fused_conv_bn_scale)>
+{
+    CLDNN_DECLARE_PRIMITIVE(fused_conv_bn_scale)
+
+    /// @brief Constructs convolution primitive fused with batch norm and scale.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param bias List of primitive ids containing bias data.
+    /// @param epsilon Small number to protect from 0 dividing.
+    /// @param scale_input Scale input primitive id with values needed for product computation. Used in fused scale part.
+    /// @param scale_bias Primitive id containing bias data for fused scale part.
+    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param inv_variance Primitive id containing inverted variance calculated in this primitive. Used in fused batch norm part.
+    /// @param with_activation Enable Relu activation.
+    /// @param activation_slp Relu activation slope.
+    fused_conv_bn_scale(
+        const primitive_id& id,
+        const primitive_id& input,
+        const std::vector<primitive_id>& weights,
+        const std::vector<primitive_id>& bias,
+        float epsilon,
+        const primitive_id& scale_input,
+        const primitive_id& scale_bias = "",
+        tensor stride = { 1, 1, 1, 1 },
+        tensor dilation = { 1, 1, 1, 1 },
+        tensor input_offset = { 0,0,0,0 },
+        const primitive_id& inv_variance = "",
+        bool with_activation = false,
+        float activation_slp = 0.0f,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input, scale_input }, output_padding)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , input_offset(input_offset)
+        , stride(stride)
+        , dilation(dilation)
+        , with_activation(with_activation)
+        , activation_negative_slope(activation_slp)
+        , with_output_size(false)
+        , scale_bias(scale_bias)
+        , inv_variance(inv_variance)
+        , epsilon(epsilon)
+        , _weights(weights)
+        , _bias(bias)
+    {
+        if ((bias.size() != 0) && (weights.size() != bias.size()))
+            throw std::runtime_error("convolution's weights/bias count does not match");
+    }
+
+    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{fused_conv_bn_scale}
+    fused_conv_bn_scale(const dto* dto)
+        :primitive_base(dto)
+        , weights(_weights.cpp_ids)
+        , bias(_bias.cpp_ids)
+        , input_offset(dto->input_offset)
+        , stride(dto->stride)
+        , dilation(dto->dilation)
+        , with_activation(dto->with_activation != 0)
+        , activation_negative_slope(dto->activation_negative_slope)
+        , scale_bias(dto->scale_bias)
+        , inv_variance(dto->inv_variance)
+        , epsilon(dto->epsilon)
+        , _weights(dto->weights)
+        , _bias(dto->bias)
+    {
+        if (!dto->split || (weights.size() != bias.size() && bias.size() != 0) || dto->split != weights.size())
+            throw std::invalid_argument("Invalid convolution dto: bad split value");
+    }
+
+    /// @brief List of primitive ids containing weights data.
+    fixed_size_vector_ref weights;
+    /// @brief List of primitive ids containing bias data.
+    fixed_size_vector_ref bias;
+    /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
+    tensor input_offset;
+    /// @brief Defines shift in input buffer between adjacent calculations of output values.
+    tensor stride;
+    /// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1. 
+    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+    tensor dilation;
+    /// @brief Enable Relu activation.
+    bool with_activation;
+    /// @brief Relu activation slope.
+    float activation_negative_slope;
+    /// @brief Indicates that the primitive has user-defined output size (non-zero value).
+    bool with_output_size;
+    /// @brief User-defined output data size of the primitive (w/o padding).
+    tensor output_size;
+    /// @brief Primitive id containing scale bias data for fused convolution.
+    primitive_id scale_bias;
+    /// @brief Primitive id containing inverted variance used in future gradient computing for fused convolution.
+    primitive_id inv_variance;
+    /// @brief Epsilon for fused convolution.
+    float epsilon;
+    /// @brief On how many cards split the computation to.
+    int32_t split() const { return static_cast<int32_t>(weights.size()); }
+
+protected:
+    primitive_id_arr _weights;
+    primitive_id_arr _bias;
+
+    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override
+    {
+        std::vector<std::reference_wrapper<const primitive_id>> ret;
+        ret.reserve(weights.size() + bias.size() + !scale_bias.empty() + !inv_variance.empty());
+        for (auto& w : weights)
+            ret.push_back(w);
+        for (auto& b : bias)
+            ret.push_back(b);
+        if (!scale_bias.empty())
+            ret.push_back(scale_bias);
+        if (!inv_variance.empty())
+            ret.push_back(inv_variance);
+        return ret;
+    }
+
+    void update_dto(dto& dto) const override
+    {
+        dto.weights = _weights.ref();
+        dto.bias = _bias.ref();
+        dto.input_offset = input_offset;
+        dto.stride = stride;
+        dto.dilation = dilation;
+        dto.split = split();
+        dto.with_activation = with_activation;
+        dto.activation_negative_slope = activation_negative_slope;
+        dto.epsilon = epsilon;
+        dto.inv_variance = inv_variance.c_str();
+        dto.scale_bias = scale_bias.c_str();
+    }
+};
+/// @}
+/// @}
+/// @}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/api_extension/CPP/fused_conv_eltwise.hpp b/inference-engine/thirdparty/clDNN/api_extension/CPP/fused_conv_eltwise.hpp
new file mode 100644
index 000000000..bc3a2786b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/api_extension/CPP/fused_conv_eltwise.hpp
@@ -0,0 +1,262 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "../C/fused_conv_eltwise.h"
+#include "api/CPP/primitive.hpp"
+#include "api/CPP/eltwise.hpp"
+
+namespace cldnn
+{
+/// @addtogroup cpp_api C++ API
+/// @{
+/// @addtogroup cpp_topology Network Topology
+/// @{
+/// @addtogroup cpp_primitives Primitives
+/// @{
+
+/// @brief Performs forward spatial convolution with fused eltwise and optionally Relu.
+struct fused_conv_eltwise : public primitive_base<fused_conv_eltwise, CLDNN_PRIMITIVE_DESC(fused_conv_eltwise)>
+{
+    CLDNN_DECLARE_PRIMITIVE(fused_conv_eltwise)
+
+    /// @brief Constructs fused_conv_eltwise primitive.
+    /// @param id This primitive id.
+    /// @param input Input primitive id.
+    /// @param weights List of primitive ids containing weights data.
+    /// @param bias List of primitive ids containing bias data.
+    /// @param w_quantization_factor List of primitive ids containing weights quanitization factors per output feature map.
+    /// @param output_calibration_factors List of primitive ids output containing calibration factors per output feature map.
+    /// @param i_quantization_factor Input quantization factor
+    /// @param input_offset Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
+    /// @param stride Defines shift in input buffer between adjacent calculations of output values.
+    /// @param dilation Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+    /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1.
+    /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+    /// @param with_activation Enable Relu activation.
+    /// @param activation_slp Relu activation slope.
+    fused_conv_eltwise(
+        const primitive_id& id,
+        const primitive_id& input,
+        const primitive_id& input2,
+        eltwise_mode mode,
+        const std::vector<primitive_id>& weights,
+        const std::vector<primitive_id>& bias,
+        const std::vector<primitive_id>& conv_w_quantization_factor,
+        const std::vector<primitive_id>& conv_output_calibration_factors,
+        const float conv_i_quantization_factor,
+        const primitive_id& eltw_output_calibration_factors,
+        const std::vector<tensor>& eltw_stride,
+        tensor stride = { 1, 1, 1, 1 },
+        tensor input_offset = { 0,0,0,0 },
+        tensor dilation = { 1, 1, 1, 1 },
+        bool conv_with_activation = false,
+        float conv_activation_slp = 0.0f,
+        bool eltw_with_activation = false,
+        float eltw_activation_slp = 0.0f,
+        const padding& output_padding = padding()
+    )
+        :primitive_base(id, { input, input2 }, output_padding)
+        , conv(_conv_weights.cpp_ids, _conv_bias.cpp_ids, _conv_weights_quantization_factors.cpp_ids, _conv_output_calibration_factors.cpp_ids)
+        , eltw(eltw_output_calibration_factors)
+        , _conv_weights(weights)
+        , _conv_bias(bias)
+        , _conv_weights_quantization_factors(conv_w_quantization_factor)
+        , _conv_output_calibration_factors(conv_output_calibration_factors)
+    {
+
+        conv.input_quantization_factor = conv_i_quantization_factor;
+        conv.output_quantization_factor = 1.0f;
+
+        conv.input_offset = input_offset;
+        conv.stride = stride;
+        conv.dilation = dilation;
+        conv.with_activation = conv_with_activation;
+        conv.activation_negative_slope = conv_activation_slp;
+        conv.with_output_size = false;
+
+        eltw.mode = mode;
+        eltw.with_activation = eltw_with_activation;
+        eltw.activation_negative_slope = eltw_activation_slp;
+        eltw.stride = eltw_stride;
+
+        if ((bias.size() != 0) && (weights.size() != bias.size()))
+            throw std::runtime_error("convolution's weights/bias count does not match");
+        if (conv.output_calibration_factors.size())
+        {
+            if ((weights.size() != 0) && (weights.size() != conv.weights_quantization_factors.size()))
+                throw std::runtime_error("convolution's weights count does not match quantization factors count");
+        }
+    }
+
+    /// @brief Constructs a copy from C API @CLDNN_PRIMITIVE_DESC{convolution}
+    fused_conv_eltwise(const dto* dto)
+        :primitive_base(dto)
+        , conv(_conv_weights.cpp_ids, _conv_bias.cpp_ids, _conv_weights_quantization_factors.cpp_ids, _conv_output_calibration_factors.cpp_ids)
+        , eltw(dto->eltw.output_calibration_factors)
+        , _conv_weights(dto->conv.weights)
+        , _conv_bias(dto->conv.bias)
+        , _conv_weights_quantization_factors(dto->conv.weights_quantization_factors)
+        , _conv_output_calibration_factors(dto->conv.output_calibration_factors)
+        , _eltw_stride(tensor_vector_to_cldnn_vector(eltw.stride))
+    {
+        conv.input_quantization_factor = dto->conv.input_quantization_factor;
+        conv.output_quantization_factor = dto->conv.output_quantization_factor;
+        conv.input_offset = dto->conv.input_offset;
+        conv.stride = dto->conv.stride;
+        conv.dilation = dto->conv.dilation;
+        conv.with_activation = dto->conv.with_activation != 0;
+        conv.activation_negative_slope = dto->conv.activation_negative_slope;
+        conv.with_output_size = dto->conv.with_output_size != 0;
+        conv.output_size = dto->conv.output_size;
+
+        second_input_in_output = dto->second_input_in_output;
+
+        if (!dto->conv.split || (conv.weights.size() != conv.bias.size() && conv.bias.size() != 0) || dto->conv.split != conv.weights.size())
+            throw std::invalid_argument("Invalid convolution dto: bad split value");
+    }
+
+    struct conv_data
+    {
+        /// @brief List of primitive ids containing weights data.
+        fixed_size_vector_ref weights;
+        /// @brief List of primitive ids containing bias data.
+        fixed_size_vector_ref bias;
+        /// @brief List of primitive ids containing weights quanitization factors per output feature map.
+        fixed_size_vector_ref weights_quantization_factors;
+        /// @brief List of primitive ids containing output quanitization factors per output feature map for convolution.
+        fixed_size_vector_ref output_calibration_factors;
+        /// @brief Input quantization factor for convolution
+        float input_quantization_factor;
+        /// @brief Output quantization factor for convolution
+        float output_quantization_factor;
+        /// @brief Defines a shift, relative to (0,0) position of the input buffer, where (0,0) point of the convolution window should start calculations.
+        tensor input_offset;
+        /// @brief Defines shift in input buffer between adjacent calculations of output values.
+        tensor stride;
+        /// @brief Defines gaps in the input - dilation rate k=1 is normal convolution, k=2 means skipping one pixel per input, k=4 means skipping 3 pixels.
+        /// As an example in one dimension, a filter w of size 3 would compute over input x the following: w[0]*x[0] + w[1]*x[1] + w[2]*x[2] for dilation of 1. 
+        /// For dilation 2 the filter would instead compute w[0]*x[0] + w[1]*x[2] + w[2]*x[4].
+        tensor dilation;
+        /// @brief Enable Relu activation.
+        bool with_activation;
+        /// @brief Relu activation slope.
+        float activation_negative_slope;
+        /// @brief Indicates that the primitive has user-defined output size (non-zero value).
+        bool with_output_size;
+        /// @brief User-defined output data size of the primitive (w/o padding).
+        tensor output_size;
+
+        conv_data(const fixed_size_vector_ref& weights,
+            const fixed_size_vector_ref& bias,
+            const fixed_size_vector_ref& weights_quantization_factors,
+            const fixed_size_vector_ref& output_calibration_factors
+        ) : weights(weights),
+            bias(bias),
+            weights_quantization_factors(weights_quantization_factors),
+            output_calibration_factors(output_calibration_factors)
+        {}
+    } conv;
+
+    struct eltw_data
+    {
+        /// @brief Primitive id containing output quanitization factors per output feature map.
+        primitive_id output_calibration_factors;
+        /// @brief Output quantization factor for eltwise
+        float output_quantization_factor;
+        /// @param mode Eltwise mode.
+        eltwise_mode mode;
+        /// @brief Enable Relu activation.
+        bool with_activation;
+        /// @brief Relu activation slope.
+        float activation_negative_slope;
+        /// @brief Defines shift in input buffers between adjacent calculations of output values.
+        std::vector<tensor> stride;
+
+        eltw_data(const primitive_id& output_calibration_factors)
+            : output_calibration_factors(output_calibration_factors)
+        {}
+    } eltw;
+
+    /// @brief On how many cards split the computation to.
+    int32_t split() const { return static_cast<int32_t>(conv.weights.size()); }
+
+    /// @brief Is optimization that output contains data from second input ON ?
+    bool second_input_in_output = false;
+protected:
+    primitive_id_arr _conv_weights;
+    primitive_id_arr _conv_bias;
+    primitive_id_arr _conv_weights_quantization_factors;
+    primitive_id_arr _conv_output_calibration_factors;
+
+    std::vector<cldnn_tensor> _eltw_stride;
+
+    std::vector<std::reference_wrapper<const primitive_id>> get_dependencies() const override
+    {
+        std::vector<std::reference_wrapper<const primitive_id>> ret;
+        ret.reserve(conv.weights.size()
+            + conv.bias.size()
+            + conv.weights_quantization_factors.size()
+            + conv.output_calibration_factors.size()
+            + (eltw.output_calibration_factors.empty() ? 0 : 1));
+
+        for (auto& w : conv.weights)
+            ret.push_back(w);
+        for (auto& b : conv.bias)
+            ret.push_back(b);
+        for (auto& q : conv.weights_quantization_factors)
+            ret.push_back(q);
+        for (auto& q : conv.output_calibration_factors)
+            ret.push_back(q);
+
+        if (!eltw.output_calibration_factors.empty())
+            ret.push_back(eltw.output_calibration_factors);
+
+        return ret;
+    }
+
+    void update_dto(dto& dto) const override
+    {
+        dto.conv.weights = _conv_weights.ref();
+        dto.conv.bias = _conv_bias.ref();
+        dto.conv.weights_quantization_factors = _conv_weights_quantization_factors.ref();
+        dto.conv.output_calibration_factors = _conv_output_calibration_factors.ref();
+        dto.conv.input_quantization_factor = conv.input_quantization_factor;
+        dto.conv.output_quantization_factor = conv.output_quantization_factor;
+        dto.conv.input_offset = conv.input_offset;
+        dto.conv.stride = conv.stride;
+        dto.conv.split = split();
+        dto.conv.with_activation = conv.with_activation;
+        dto.conv.activation_negative_slope = conv.activation_negative_slope;
+        dto.conv.dilation = conv.dilation;
+        dto.conv.with_output_size = conv.with_output_size;
+        dto.conv.output_size = conv.output_size;
+
+        dto.eltw.output_calibration_factors = eltw.output_calibration_factors.c_str();
+        dto.eltw.output_quantization_factor = eltw.output_quantization_factor;
+        dto.eltw.mode = static_cast<cldnn_eltwise_mode>(eltw.mode);
+        dto.eltw.with_activation = eltw.with_activation;
+        dto.eltw.activation_negative_slope = eltw.activation_negative_slope;
+        dto.eltw.stride = tensor_vector_to_arr(_eltw_stride);
+
+        dto.second_input_in_output = second_input_in_output;
+    }
+};
+/// @}
+/// @}
+/// @}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/common/boost/1.64.0/include/boost-1_64/boost/make_unique.hpp b/inference-engine/thirdparty/clDNN/common/boost/1.64.0/include/boost-1_64/boost/make_unique.hpp
new file mode 100644
index 000000000..7189d6e4e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/common/boost/1.64.0/include/boost-1_64/boost/make_unique.hpp
@@ -0,0 +1,13 @@
+/*
+Copyright 2014 Glen Joseph Fernandes
+(glenjofe@gmail.com)
+
+Distributed under the Boost Software License, Version 1.0.
+(http://www.boost.org/LICENSE_1_0.txt)
+*/
+#ifndef BOOST_MAKE_UNIQUE_HPP_INCLUDED
+#define BOOST_MAKE_UNIQUE_HPP_INCLUDED
+
+#include <boost/smart_ptr/make_unique.hpp>
+
+#endif
diff --git a/inference-engine/thirdparty/clDNN/common/boost/1.64.0/include/boost-1_64/boost/smart_ptr/make_unique.hpp b/inference-engine/thirdparty/clDNN/common/boost/1.64.0/include/boost-1_64/boost/smart_ptr/make_unique.hpp
new file mode 100644
index 000000000..eed503392
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/common/boost/1.64.0/include/boost-1_64/boost/smart_ptr/make_unique.hpp
@@ -0,0 +1,110 @@
+/*
+Copyright 2012-2015 Glen Joseph Fernandes
+(glenjofe@gmail.com)
+
+Distributed under the Boost Software License, Version 1.0.
+(http://www.boost.org/LICENSE_1_0.txt)
+*/
+#ifndef BOOST_SMART_PTR_MAKE_UNIQUE_HPP
+#define BOOST_SMART_PTR_MAKE_UNIQUE_HPP
+
+#include <boost/config.hpp>
+#include <memory>
+#include <utility>
+
+namespace boost {
+namespace detail {
+
+template<class T>
+struct up_if_object {
+    typedef std::unique_ptr<T> type;
+};
+
+template<class T>
+struct up_if_object<T[]> { };
+
+template<class T, std::size_t N>
+struct up_if_object<T[N]> { };
+
+template<class T>
+struct up_if_array { };
+
+template<class T>
+struct up_if_array<T[]> {
+    typedef std::unique_ptr<T[]> type;
+};
+
+template<class T>
+struct up_remove_reference {
+    typedef T type;
+};
+
+template<class T>
+struct up_remove_reference<T&> {
+    typedef T type;
+};
+
+template<class T>
+struct up_remove_reference<T&&> {
+    typedef T type;
+};
+
+template<class T>
+struct up_element { };
+
+template<class T>
+struct up_element<T[]> {
+    typedef T type;
+};
+
+} /* detail */
+
+template<class T>
+inline typename detail::up_if_object<T>::type
+make_unique()
+{
+    return std::unique_ptr<T>(new T());
+}
+
+#if !defined(BOOST_NO_CXX11_VARIADIC_TEMPLATES)
+template<class T, class... Args>
+inline typename detail::up_if_object<T>::type
+make_unique(Args&&... args)
+{
+    return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+#endif
+
+template<class T>
+inline typename detail::up_if_object<T>::type
+make_unique(typename detail::up_remove_reference<T>::type&& value)
+{
+    return std::unique_ptr<T>(new T(std::move(value)));
+}
+
+template<class T>
+inline typename detail::up_if_object<T>::type
+make_unique_noinit()
+{
+    return std::unique_ptr<T>(new T);
+}
+
+template<class T>
+inline typename detail::up_if_array<T>::type
+make_unique(std::size_t size)
+{
+    return std::unique_ptr<T>(new typename
+        detail::up_element<T>::type[size]());
+}
+
+template<class T>
+inline typename detail::up_if_array<T>::type
+make_unique_noinit(std::size_t size)
+{
+    return std::unique_ptr<T>(new typename
+        detail::up_element<T>::type[size]);
+}
+
+} /* boost */
+
+#endif
diff --git a/inference-engine/thirdparty/clDNN/create_msvc_mscc.bat b/inference-engine/thirdparty/clDNN/create_msvc_mscc.bat
index 914979205..156bc08a0 100644
--- a/inference-engine/thirdparty/clDNN/create_msvc_mscc.bat
+++ b/inference-engine/thirdparty/clDNN/create_msvc_mscc.bat
@@ -31,7 +31,7 @@ rmdir /S /Q %SOLUTION_DIR64%\codegen
 echo Creating Visual Studio 2015 (Win32) files in %SOLUTION_DIR32%... && ^
 cd "%ROOT_DIR%" && cmake -E make_directory "%SOLUTION_DIR32%" && cd "%SOLUTION_DIR32%" && cmake -G "Visual Studio 14 2015" "-DCLDNN__ARCHITECTURE_TARGET=%SOLUTION_TARGET32%" "%ROOT_DIR%"
 echo Creating Visual Studio 2015 (x64) files in %SOLUTION_DIR64%... && ^
-cd "%ROOT_DIR%" && cmake -E make_directory "%SOLUTION_DIR64%" && cd "%SOLUTION_DIR64%" && cmake -G "Visual Studio 14 2015 Win64" "-DCLDNN__ARCHITECTURE_TARGET=%SOLUTION_TARGET64%""%ROOT_DIR%"
+cd "%ROOT_DIR%" && cmake -E make_directory "%SOLUTION_DIR64%" && cd "%SOLUTION_DIR64%" && cmake -G "Visual Studio 14 2015 Win64" "-DCLDNN__ARCHITECTURE_TARGET=%SOLUTION_TARGET64%" "%ROOT_DIR%"
 
 echo Done.
 pause
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/CMakeLists.txt b/inference-engine/thirdparty/clDNN/kernel_selector/CMakeLists.txt
index f8f68371c..ecaede517 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/CMakeLists.txt
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/CMakeLists.txt
@@ -43,7 +43,7 @@ file(GLOB __CLDNN_Sources__main
     "${__CLDNN_Directory__main}/*.hpp"
     "${__CLDNN_Directory__main}/*.cpp"
   )
-  
+
 set(__CLDNN_Directory__core           "${__CLDNN_Directory__main}/core")
 set(__CLDNN_Label__core               "core")
 file(GLOB __CLDNN_Sources__core
@@ -59,7 +59,7 @@ file(GLOB __CLDNN_Sources__common
     "${__CLDNN_Directory__common}/*.hpp"
     "${__CLDNN_Directory__common}/*.cpp"
   )
-  
+
 set(__CLDNN_Directory__core_common          "${__CLDNN_Directory__core}/common")
 set(__CLDNN_Label__core_common              "${__CLDNN_Label__core}\\common")
 file(GLOB __CLDNN_Sources__core_common
@@ -87,7 +87,7 @@ foreach(__CLDNN_FilePath ${__CLDNN_Sources__actual_kernels})
   string(REPLACE ";" "\;" __CLDNN_FilePath "${__CLDNN_FilePath}") # [WA#1] Must escape ; again if occurred in item.
   get_filename_component(__CLDNN_FileDir "${__CLDNN_FilePath}" DIRECTORY)
   get_filename_component(__CLDNN_DirName "${__CLDNN_FileDir}" NAME)
-  
+
   set(__CLDNN_FileLabel "${__CLDNN_Label__actual_kernels}\\${__CLDNN_DirName}")
   source_group("${__CLDNN_FileLabel}" FILES ${__CLDNN_FilePath})
 endforeach()
@@ -137,7 +137,7 @@ include_directories(
     "${__CLDNN_Directory__main}"
     "${__CLDNN_Directory__core}"
     "${__CLDNN_Directory__core}/common"
-	"${__CLDNN_Directory__core}/cache"
+    "${__CLDNN_Directory__core}/cache"
     "${__CLDNN_Directory__actual_kernels}"
     "${__CLDNN_Directory__common}"
   )
@@ -165,7 +165,6 @@ endif()
 target_link_libraries("${CLDNN_BUILD__PROJ}" ${CLDNN__SYSTEM_LINK_LIBRARIES})
 
 # =================================== Custom pre- and post-steps =======================================
-
 add_custom_command(OUTPUT "${__CLDNN_CGDirectory__cg_cache}/${__CLDNN_File__cg_cache__prim_db}"
     COMMAND "${CMAKE_COMMAND}" -E make_directory "${__CLDNN_CGDirectory__cg_cache}"
     COMMAND "${PYTHON_EXECUTABLE}" "${__CLDNN_Directory__core_common}/primitive_db_gen.py" -out_path "${__CLDNN_CGDirectory__cg_cache}" -out_file_name "${__CLDNN_File__cg_cache__prim_db}" -kernels "${__CLDNN_Directory__cl_kernels}"
@@ -177,5 +176,17 @@ add_custom_command(OUTPUT "${__CLDNN_Directory__cg_cache}/${__CLDNN_File__cg_cac
     DEPENDS "${__CLDNN_CGDirectory__cg_cache}/${__CLDNN_File__cg_cache__prim_db}" ${__CLDNN_Sources__cl_kernels} "${__CLDNN_Directory__core_common}/primitive_db_gen.py"
     COMMENT "Updating file if the file changed (${__CLDNN_File__cg_cache__prim_db}) ..."
   )
+if(WIN32)
+  set(CLDNN_CACHE_PATH "${CLDNN__OUTPUT_BIN_DIR}/$<CONFIGURATION>")
+else((NOT ANDROID) AND (UNIX))
+  set(CLDNN_CACHE_PATH "${CLDNN__OUTPUT_LIB_DIR}/")
+endif()
+
+message(STATUS "[CACHE COMMAND]: " "${CMAKE_COMMAND} -E copy_if_different ${__CLDNN_Directory__core}/cache/cache.json ${CLDNN_CACHE_PATH}")
+
+add_custom_command(
+    TARGET "${CLDNN_BUILD__PROJ}" POST_BUILD
+    COMMAND "${CMAKE_COMMAND}" -E copy_if_different ${__CLDNN_Directory__core}/cache/cache.json ${CLDNN_CACHE_PATH})
+
 
 # ======================================================================================================
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_tools.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_tools.h
index 0f23cbeb4..509ead6e1 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_tools.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_tools.h
@@ -50,6 +50,7 @@ namespace kernel_selector
         switch (wt)
         {
         case WeightsType::INT8:
+        case WeightsType::UINT8:
             return 1;
         case WeightsType::F16:
             return 2;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
index c24420970..e923c78b2 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/common_types.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2016-2018 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -40,6 +40,8 @@ namespace kernel_selector
         ACTIVATION,
         SOFT_MAX,
         ELTWISE,
+        FUSED_CONV_BN_SCALE,
+        FUSED_CONV_ELTWISE,
         TABLE_LOOKUP,
         REORDER,
         RESHAPE,
@@ -63,7 +65,16 @@ namespace kernel_selector
         SELECT,
         BROADCAST,
         GEMM,
-        INDEX_SELECT
+        INDEX_SELECT,
+        PYRAMID_ROI_ALIGN,
+        CONTRACT,
+        ONE_HOT,
+        DETECTION_OUTPUT,
+        GATHER,
+        DEPTH_TO_SPACE,
+        SHUFFLE_CHANNELS,
+        STRIDED_SLICE,
+        REVERSE_SEQUENCE
     };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -92,6 +103,7 @@ namespace kernel_selector
         F16,
         F32,
         INT8,
+        UINT8,
     };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -119,9 +131,10 @@ namespace kernel_selector
         ACOS,
         COSH,
         LOG,
-		LOG2,
+        LOG2,
         EXP,
         NONE,
+        NOT,
         NONE_GRAD
     };
 
@@ -243,7 +256,17 @@ namespace kernel_selector
         MODULU,
         SQRT,
         RSQRT,
-        ASSIGN
+        ASSIGN,
+        EQ,
+        NE,
+        LT,
+        LE,
+        GT,
+        GE,
+        LOGIC_AND,
+        LOGIC_OR,
+        LOGIC_XOR,
+        SQUARED_DIFF
     };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -288,7 +311,7 @@ namespace kernel_selector
     enum class MeanSubtractMode
     {
         NONE,
-        INSIDE_PARAMS, // the index is feature id (modulu size) 
+        INSIDE_PARAMS, // the index is feature id (modulu size)
         IN_BUFFER,
     };
 
@@ -299,7 +322,7 @@ namespace kernel_selector
     {
         NONE,
         SUB,
-        MUL, 
+        MUL,
         DIV,
     };
 
@@ -357,18 +380,6 @@ namespace kernel_selector
     };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // NonLinearParams
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    struct NonLinearParams
-    {
-        float m = 1.f;
-        float n = 0.f;
-
-        NonLinearParams() = default;
-        NonLinearParams(const float m, const float n) : m(m), n(n) {}
-    };
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // Size
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     template <typename T>
@@ -409,4 +420,27 @@ namespace kernel_selector
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     using uSize  = Size<std::uint32_t>;
     using stSize = Size<std::size_t>;
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // ContractMode
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    enum class ContractMode
+    {
+        SUM,
+        PRODUCT,
+        ALL,
+        ANY,
+        MAX,
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // GatherAxis
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    enum class GatherAxis
+    {
+        X,
+        Y,
+        FEATURE,
+        BATCH,
+    };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
index 555ca1ea5..4773448ac 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -30,45 +30,54 @@ namespace kernel_selector
             //X, Y, F, R, B
             {-1,-1, 0,-1, 1 }, // DataLayout::bf
             {-1,-1, 1,-1, 0 }, // DataLayout::fb
-            { 0, 1, 2,-1, 3 },  // DataLayout::bfyx
-            { 2, 3, 1,-1, 0 },  // DataLayout::yxfb
-            { 1, 2, 0,-1, 3 },  // DataLayout::byxf
-            { 1, 2, 3,-1, 0 },  // DataLayout::fyxb
-            {-1,-1, 0,-1, 1 },  // DataLayout::bs_f_bsv8__af8
-            {-1,-1, 0,-1, 1 },  // DataLayout::bs_f_bsv16__af8
-            { 0, 1, 2,-1, 3 },  // DataLayout::bf8_xy16
-            { 0, 1, 2, 3, 4 },  // DataLayout::brfyx
-            { 2, 1, 0,-1, 3 },  // DataLayout::winograd_2x3_s1_data
-            { 1, 2, 0,-1, 3 },  // DataLayout::byxf_af32
-            { 0, 1, 3,-1, 2 },  // DataLayout::fs_bs_yx_bsv4_fsv32
+            { 0, 1, 2,-1, 3 }, // DataLayout::bfyx
+            { 2, 3, 1,-1, 0 }, // DataLayout::yxfb
+            { 1, 2, 0,-1, 3 }, // DataLayout::byxf
+            { 1, 2, 3,-1, 0 }, // DataLayout::fyxb
+            {-1,-1, 0,-1, 1 }, // DataLayout::bs_f_bsv8__af8
+            {-1,-1, 0,-1, 1 }, // DataLayout::bs_f_bsv16__af8
+            { 0, 1, 2,-1, 3 }, // DataLayout::bf8_xy16
+            { 0, 1, 2, 3, 4 }, // DataLayout::brfyx
+            { 2, 1, 0,-1, 3 }, // DataLayout::winograd_2x3_s1_data
+            { 1, 2, 0,-1, 3 }, // DataLayout::byxf_af32
+            { 1, 2, 0,-1, 3 }, // DataLayout::byx8_f8
+            { 0, 1, 3,-1, 2 }, // DataLayout::fs_bs_yx_bsv4_fsv32
+            { 0, 1, 2, -1, 3 },// DataLayout::b_fs_yx_fsv4
         } };
 
-        std::array<std::array<int, 4>, WeightsLayout::WeightsLayoutCount> WeightsTensor::weightsChannelArray
+        std::array<std::array<int, 6>, WeightsLayout::WeightsLayoutCount> WeightsTensor::weightsChannelArray
         { {
-            //X, Y, I, O
-            {-1,-1, 0, 1 },  // WeightsLayout::oi
-            {-1,-1, 1, 0 },  // WeightsLayout::io
-            { 0, 1, 2, 3 },  // WeightsLayout::oiyx
-            { 1, 2, 0, 3 },  // WeightsLayout::oyxi
-            { 1, 2, 3, 0 },  // WeightsLayout::iyxo
-            { 2, 3, 1, 0 },  // WeightsLayout::yxio
-            { 0, 1, 2, 3 },  // WeightsLayout::os_iyx_osv16
-            { 0, 1, 2, 3 },  // WeightsLayout::os_iyx_osv16_rotate_180
-            {-1,-1, 0, 1 },  // WeightsLayout::os_i_osv8__ai8
-            {-1,-1, 0, 1 },  // WeightsLayout::os_i_osv16__ai8
-            {-1,-1, 0, 1 },  // WeightsLayout::os_i_osv16            
-            { 1, 2, 3, 0 },  // WeightsLayout::i_yxs_os_yxsv2_osv16
-            { 1, 2, 3, 0 },  // WeightsLayout::iy_xs_os_xsv2_osv16__ao32
-            { 1, 2, 3, 0 },  // WeightsLayout::iy_xs_os_xsv2_osv8__ao32
-            { 0, 1, 2, 3 },  // WeightsLayout::image_2d_weights_c4_fyx_b
-            { 0, 1, 2, 3 },  // WeightsLayout::image_2d_weights_c1_b_fyx
-            { 3, 2, 1, 0 },  // WeightsLayout::winograd_2x3_s1_weights
-            { 0, 1, 2, 3 },  // WeightsLayout::winograd_2x3_s1_fused_weights
-            { 0, 1, 2, 3 },  // WeightsLayout::winograd_6x3_s1_fused_weights
-            { 0, 1, 2, 3 },  // WeightsLayout::image_2d_weights_winograd_6x3_s1_fbxyb
-            { 0, 1, 2, 3 },  // WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb
-            { 0, 1, 2, 3 },  // WeightsLayout::os_is_yx_isa8_osv8_isv4
-            { 1, 2, 0, 3 },  // WeightsLayout::is_o_yx_isv32
+            // X, Y,   I,  O, LX, LY,
+            { -1, -1,  0,  1, -1, -1 }, // WeightsLayout::oi
+            { -1, -1,  1,  0, -1, -1 }, // WeightsLayout::io
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::oiyx
+            {  1,  2,  0,  3, -1, -1 }, // WeightsLayout::oyxi
+            {  1,  2,  3,  0, -1, -1 }, // WeightsLayout::iyxo
+            {  2,  3,  1,  0, -1, -1 }, // WeightsLayout::yxio
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_iyx_osv16
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_iyx_osv32
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_iyx_osv64
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_iyx_osv16_rotate_180
+            { -1, -1,  0,  1, -1, -1 }, // WeightsLayout::os_i_osv8__ai8
+            { -1, -1,  0,  1, -1, -1 }, // WeightsLayout::os_i_osv16__ai8
+            { -1, -1,  0,  1, -1, -1 }, // WeightsLayout::os_i_osv16
+            {  1,  2,  3,  0, -1, -1 }, // WeightsLayout::i_yxs_os_yxsv2_osv16
+            {  1,  2,  3,  0, -1, -1 }, // WeightsLayout::iy_xs_os_xsv2_osv16__ao32
+            {  1,  2,  3,  0, -1, -1 }, // WeightsLayout::iy_xs_os_xsv2_osv8__ao32
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::image_2d_weights_c4_fyx_b
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::image_2d_weights_c1_b_fyx
+            {  3,  2,  1,  0, -1, -1 }, // WeightsLayout::winograd_2x3_s1_weights
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::winograd_2x3_s1_fused_weights
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::winograd_6x3_s1_fused_weights
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::image_2d_weights_winograd_6x3_s1_fbxyb
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_is_yx_isa8_osv8_isv4
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_is_yx_isa8_osv8_isv4_swizzled_by_4
+            {  1,  2,  0,  3, -1, -1 }, // WeightsLayout::is_o_yx_isv32
+            {  1,  2,  0,  3, -1, -1 }, // WeightsLayout::is_o32_yx_isv32_swizzled_by_4
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_is_y_x8_osv8_isv4
+            {  0,  1,  2,  3,  4,  5 }, // WeightsLayout::bf_lyx_yx
+            {  0,  1,  2,  3, -1, -1 }, // WeightsLayout::os_is_yx_osv16_isv4
         } };
 
         NDims DataTensor::GetSimpleDims(const std::vector<size_t>& d, DataLayout l)
@@ -98,6 +107,11 @@ namespace kernel_selector
                 assert(newDims.size() == 4);
                 newDims[0] = RoundUp(newDims[0], 32);
                 break;
+            case byx8_f4:
+                assert(newDims.size() == 4);
+                newDims[0] = RoundUp(newDims[0], 4);
+                newDims[1] = RoundUp(newDims[1], 8);
+                break;
             case fs_bs_yx_bsv4_fsv32:
                 assert(newDims.size() == 4);
                 newDims[3] = RoundUp(newDims[3], 32);
@@ -117,7 +131,7 @@ namespace kernel_selector
                 pitch *= newDims[i];
             }
 
-            if (l == byxf_af32 || l == fs_bs_yx_bsv4_fsv32)
+            if (l == byxf_af32 || l == fs_bs_yx_bsv4_fsv32 || l == byx8_f4)
             {
                 ret[0].pitch = 1;
                 ret[1].pitch = ret[0].pitch * newDims[0];
@@ -266,6 +280,14 @@ namespace kernel_selector
                 assert(newDims.size() == 4);
                 newDims[3] = RoundUp(newDims[3], 16);
                 break;
+            case os_iyx_osv32:
+                assert(newDims.size() == 4);
+                newDims[3] = RoundUp(newDims[3], 32);
+                break;
+            case os_iyx_osv64:
+                assert(newDims.size() == 4);
+                newDims[3] = RoundUp(newDims[3], 64);
+                break;
             case os_i_osv8__ai8:
                 assert(newDims.size() == 2);
                 newDims[0] = RoundUp(newDims[0], 8);
@@ -294,10 +316,31 @@ namespace kernel_selector
                 newDims[3] = RoundUp(newDims[3], 8);
                 newDims[2] = RoundUp(newDims[2], 32);
                 break;
+            case os_is_yx_isa8_osv8_isv4_swizzled_by_4:
+                assert(newDims.size() == 4);
+                newDims[3] = RoundUp(newDims[3], 32);
+                newDims[2] = RoundUp(newDims[2], 32);
+                break;
             case is_o_yx_isv32:
                 assert(newDims.size() == 4);
                 newDims[0] = RoundUp(newDims[0], 32);
                 break;
+            case is_o32_yx_isv32_swizzled_by_4:
+                assert(newDims.size() == 4);
+                newDims[0] = RoundUp(newDims[0], 32);
+                newDims[3] = RoundUp(newDims[3], 32);
+                break;
+            case os_is_y_x8_osv8_isv4:
+                assert(newDims.size() == 4);
+                newDims[2] = RoundUp(newDims[2], 4);
+                newDims[3] = RoundUp(newDims[3], 8);
+                newDims[0] = RoundUp(newDims[0], 8);
+                break;
+            case os_is_yx_osv16_isv4:
+                assert(newDims.size() == 4);
+                newDims[2] = RoundUp(newDims[2], 4);
+                newDims[3] = RoundUp(newDims[3], 16);
+                break;
             default:
                 break;
             }
@@ -322,15 +365,20 @@ namespace kernel_selector
             {
                 ret[2].pitch     = RoundUp(ret[1].v, 2) * ret[1].pitch;
                 ret[1].pad.after = newDims[1] - ret[1].v;
-                
+
                 ret[3].pitch     = ret[2].v * ret[2].pitch;
                 ret[2].pad.after = newDims[2] - ret[2].v;
             }
-            else if (l == os_is_yx_isa8_osv8_isv4)
+            else if (l == os_is_yx_isa8_osv8_isv4 || l == os_is_yx_isa8_osv8_isv4_swizzled_by_4)
             {
                 ret[0].pitch = 256;
                 ret[1].pitch = ret[0].pitch * ret[0].v;
             }
+            else if (l == bf_lyx_yx)
+            {
+                ret[2].pitch = ret[0].v * ret[1].v * ret[2].v * ret[3].v;
+                ret[3].pitch = ret[2].pitch * ret[5].v;
+            }
 
             return ret;
         }
@@ -385,6 +433,15 @@ namespace kernel_selector
                 vec[Channelndex(l, WeightsChannelName::IFM)] = dst_ifm;
                 vec[Channelndex(l, WeightsChannelName::OFM)] = OFM().v;
             }
+            else if (src_channels == 6 && dst_channels == 6)
+            {
+                vec[Channelndex(l, WeightsChannelName::X)] = IFM().v;
+                vec[Channelndex(l, WeightsChannelName::Y)] = OFM().v;
+                vec[Channelndex(l, WeightsChannelName::IFM)] = LX().v;
+                vec[Channelndex(l, WeightsChannelName::OFM)] = LY().v;
+                vec[Channelndex(l, WeightsChannelName::LX)] = X().v;
+                vec[Channelndex(l, WeightsChannelName::LY)] = Y().v;
+            }
             else
             {
                 assert(0);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
index 8331ab0a2..cb3d3e972 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/common/tensor_type.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #pragma once
 
 #include "common_types.h"
+#include "common_tools.h"
 #include <vector>
 #include <assert.h>
 #include <numeric>
@@ -48,7 +49,9 @@ namespace kernel_selector
             brfyx,              // 4D+batch
             winograd_2x3_s1_data, //winograd convolution input, F(2,3) -- filter 3x3 with stride 1
             byxf_af32, // for MMAD convolution
+            byx8_f4, // for MMAD convolution
             fs_bs_yx_bsv4_fsv32, // for batched MMAD
+            b_fs_yx_fsv4,        // reordering format for swizzled input for convolution using IMAD
             DataLayoutCount // NMBER OF ELEMENTS IN ENUM
         };
 
@@ -64,6 +67,8 @@ namespace kernel_selector
             iyxo,
             yxio,
             os_iyx_osv16,
+            os_iyx_osv32,
+            os_iyx_osv64,
             os_iyx_osv16_rotate_180,
             os_i_osv16,
             os_i_osv8__ai8,         // TODO can we drop the alignment form layout name?
@@ -79,8 +84,13 @@ namespace kernel_selector
             image_2d_weights_winograd_6x3_s1_fbxyb, // image 2d winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
             image_2d_weights_winograd_6x3_s1_xfbyb, // image 2d winograd convolution weights for fused kernel, F(2, 3) --filter 3x3 with stride 1
             os_is_yx_isa8_osv8_isv4, // for MMAD convolution
-            is_o_yx_isv32, // for MMAD 1x1 convolutions
-            WeightsLayoutCount // NMBER OF ELEMENTS IN ENUM
+            os_is_yx_isa8_osv8_isv4_swizzled_by_4, // for MMAD convolution swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, 1,5...
+            is_o_yx_isv32,           // for MMAD 1x1 convolutions
+            is_o32_yx_isv32_swizzled_by_4,           // for MMAD 1x1 convolutions swizzled from ofm 0..7 to 0,4,8,12,16,20,24,28, 1,5...
+            os_is_y_x8_osv8_isv4, // for MMAD convolutions
+            bf_lyx_yx,               // local convolution
+            os_is_yx_osv16_isv4,     // swizzled weights for convolution using IMAD
+            WeightsLayoutCount       // NMBER OF ELEMENTS IN ENUM
         };
 
         ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -126,6 +136,8 @@ namespace kernel_selector
             Y   = 1,
             IFM = 2,
             OFM = 3,
+            LX = 4,
+            LY = 5,
         };
 
         inline bool SimpleLayout(WeightsLayout l)
@@ -495,6 +507,8 @@ namespace kernel_selector
             Dim Y()   const { return Extract(layout, WeightsChannelName::Y, dims); }
             Dim IFM() const { return Extract(layout, WeightsChannelName::IFM, dims); }
             Dim OFM() const { return Extract(layout, WeightsChannelName::OFM, dims); }
+            Dim LX()  const { return Extract(layout, WeightsChannelName::LX, dims); }
+            Dim LY()  const { return Extract(layout, WeightsChannelName::LY, dims); }
 
             static inline Dim Extract(WeightsLayout l, WeightsChannelName channel, const NDims& d)
             {
@@ -512,7 +526,7 @@ namespace kernel_selector
             }
         private:
             static NDims GetSimpleDims(const std::vector<size_t>& d, WeightsLayout l);
-            static std::array<std::array<int, 4>, WeightsLayout::WeightsLayoutCount> weightsChannelArray;
+            static std::array<std::array<int, 6>, WeightsLayout::WeightsLayoutCount> weightsChannelArray;
         };
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
index 358b66d1c..caca72818 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_base.cpp
@@ -53,7 +53,7 @@ namespace kernel_selector
         const auto& inputNlParams = params.inputActivationParams;
 
         jit.AddConstants({
-            MakeJitConstant("PARAMS_NUM", GetActivationAdditionalParamsNumber(params.activationFunc)),
+            MakeJitConstant("PARAMS_NUM", GetActivationAdditionalParamsNumber(params.activation.function)),
         });
 
         if (!inputNlParams.empty())
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h
index e2c6092c7..51d1f9027 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_opt.h
@@ -28,12 +28,12 @@ namespace kernel_selector
         virtual ~ActivationKernelOpt() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         static const int NUM_COLS_WI = 4;
         virtual DispatchData SetDefault(const activation_params& arg) const override;
         virtual bool Validate(const Params& p, const optional_params& o) const override;
         virtual JitConstants GetJitConstants(const activation_params& params, DispatchData kd) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h
index c07a44982..7dcccfac6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~ActivationKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_tutorial.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_tutorial.h
index 8fef33590..c479a8e2d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_tutorial.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/activation/activation_kernel_tutorial.h
@@ -38,13 +38,13 @@ namespace kernel_selector {
         virtual ~ActivationKernel_Tutorial() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
 #ifdef ADVANCED_TUTORIAL
         virtual DispatchData SetDefault(const activation_params& arg) const override;
         virtual bool Validate(const Params& p, const optional_params& o) const override;
         virtual JitConstants GetJitConstants(const activation_params& params, DispatchData) const override;
 #endif
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.h
index a3b26237b..a3bd10955 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_axis.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~ArgMaxMinKernelAxis() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_gpu_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_gpu_ref.h
index c492e7772..080b912e5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_gpu_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_gpu_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
 		virtual ~ArgMaxMinKernelGPURef() {}
 
 		virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
 		virtual ParamsKey GetSupportedKey() const override;
 	};
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.h
index 0b1292332..5f06f4479 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/arg_max_min/arg_max_min_kernel_opt.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~ArgMaxMinKernelOpt() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_gpu_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_gpu_ref.h
index a8ce320d9..a632c565b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_gpu_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/average_unpooling/average_unpooling_kernel_gpu_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~AverageUnpoolingKernelGPURef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
index ebf881f50..064d8a549 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.cpp
@@ -36,6 +36,10 @@ namespace kernel_selector
         jit.AddConstant(MakeJitConstant("EPSILON", params.batchNormParams.epsilon));
         if (params.batchNormParams.with_inv_var)
             jit.AddConstant(MakeJitConstant("FORWARD", 1));
+		if (params.batchNormParams.with_scale_shift)
+			jit.AddConstant(MakeJitConstant("SCALE_SHIFT", 1));
+		if (params.batchNormParams.with_mean_var_out)
+			jit.AddConstant(MakeJitConstant("MEAN_VAR_OUT", 1));
 
         return jit;
     }
@@ -79,7 +83,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        int inputs_num = 1 + orgParams.batchNormParams.with_inv_var;
+        int inputs_num = 1 + orgParams.batchNormParams.with_inv_var + 2*orgParams.batchNormParams.with_scale_shift + 2 * orgParams.batchNormParams.with_mean_var_out;
         FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, inputs_num);
 
         kd.estimatedTime = estimatedTime;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.h
index 30855ef4b..ebc4c05f4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_base.h
@@ -32,6 +32,8 @@ namespace kernel_selector
         {
             float epsilon;
             bool with_inv_var;
+			bool with_scale_shift;
+			bool with_mean_var_out = false;
         };
 
         DedicatedParams batchNormParams;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_ref.h
index ccf80083e..6a7da91e3 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm/batch_norm_kernel_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~BatchNormKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_ref.h
index 5c36e1c72..5c858717a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/batch_norm_grad/batch_norm_grad_kernel_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~BatchNormGradKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h
index fbce8a632..4e778a436 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_base.h
@@ -32,7 +32,9 @@ namespace kernel_selector
 
 
         border_params()
-            : base_params(KernelType::BORDER)
+            : base_params(KernelType::BORDER),
+            b_type(BorderType::CONSTANT),
+            border_value(0.0f)
         {
         }
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp
index 9e42901bf..3e513843d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.cpp
@@ -39,9 +39,9 @@ namespace kernel_selector
         k.EnableOutputLayout(DataLayout::yxfb);
         k.EnableOutputLayout(DataLayout::byxf);
 
-        k.EnableBatching();
         k.EnableTensorOffset();
         k.EnableTensorPitches();
+        k.EnableBatching();
 
         return k;
     }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h
index 0862ed144..f5f4c8190 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/border/border_kernel_ref.h
@@ -25,6 +25,8 @@ namespace kernel_selector
         BorderKernelRef() : BorderKernelBase("border_gpu_ref") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         ParamsKey GetSupportedKey() const override;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp
index 3d3b2f4d3..795871b37 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.cpp
@@ -23,6 +23,11 @@ namespace kernel_selector
     JitConstants BroadcastKernelBase::GetJitConstants(const broadcast_params& params)
     {
         JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        jit.AddConstants({
+            MakeJitConstant("BROADCAST_ORDER", params.input_order)
+        });
+
         return jit;
     }
 
@@ -63,7 +68,6 @@ namespace kernel_selector
 
         auto& kernel = k_data.kernels[0];
         FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
-
         k_data.estimatedTime = estimated_time;
 
         return {k_data};
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h
index cf4865e80..f13192a63 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_base.h
@@ -29,6 +29,8 @@ namespace kernel_selector
             : base_params(KernelType::BROADCAST)
         {
         }
+        std::vector<uint16_t> input_order;
+
     };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp
index 0be42a5e2..f7fe76404 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.cpp
@@ -25,20 +25,22 @@ namespace kernel_selector
         k.EnableInputDataType(Datatype::F32);
         k.EnableInputDataType(Datatype::INT8);
         k.EnableInputDataType(Datatype::UINT8);
+        k.EnableInputDataType(Datatype::INT32);
+        k.EnableInputDataType(Datatype::INT64);
 
         k.EnableOutputDataType(Datatype::F32);
         k.EnableOutputDataType(Datatype::F16);
         k.EnableOutputDataType(Datatype::INT8);
         k.EnableOutputDataType(Datatype::UINT8);
+        k.EnableOutputDataType(Datatype::INT32);
+        k.EnableOutputDataType(Datatype::INT64);
 
         k.EnableInputLayout(DataLayout::bfyx);
-        k.EnableInputLayout(DataLayout::yxfb);
-        k.EnableInputLayout(DataLayout::byxf);
 
         k.EnableOutputLayout(DataLayout::bfyx);
-        k.EnableOutputLayout(DataLayout::yxfb);
-        k.EnableOutputLayout(DataLayout::byxf);
 
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
         k.EnableBatching();
 
         return k;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h
index ccca397ab..3f6fee8bd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/broadcast/broadcast_kernel_ref.h
@@ -25,6 +25,8 @@ namespace kernel_selector
         BroadcastKernelRef() : BroadcastKernelBase("broadcast_gpu_ref") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         ParamsKey GetSupportedKey() const override;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.h
index 70cba2731..c9e577aa6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_base.h
@@ -46,6 +46,8 @@ namespace kernel_selector
         concatenation_optional_params() : optional_params(KernelType::CONCATENATION) {}
         bool kernelPerInput = true;
 
+
+    protected:
         virtual ParamsKey GetSupportedKey() const
         {
             ParamsKey k = optional_params::GetSupportedKey();
@@ -80,4 +82,4 @@ namespace kernel_selector
         virtual DispatchData SetDefault(const concatenation_params& params) const;
         KernelsData GetCommonKernelsData(const Params& params, const optional_params&) const;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.h
index 2b4036611..f21e56bef 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_depth_bfyx_no_pitch.h
@@ -27,8 +27,10 @@ namespace kernel_selector {
         virtual ~ConcatenationKernel_depth_bfyx_no_pitch() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         virtual DispatchData SetDefault(const concatenation_params& params) const override;
         virtual bool Validate(const Params& p, const optional_params& o) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.h
index 2b7379c07..3020b179d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/concatenation/concatenation_kernel_ref.h
@@ -27,8 +27,8 @@ namespace kernel_selector {
         virtual ~ConcatenationKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const concatenation_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.cpp
new file mode 100644
index 000000000..d5ac28eff
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.cpp
@@ -0,0 +1,138 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "contract_kernel_base.h"
+
+#include "kernel_selector_utils.h"
+
+
+namespace kernel_selector
+{
+    JitConstants ContractKernelBase::GetJitConstants(const contract_params& params)
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        const size_t no_dim_flag = 6;
+        std::vector<size_t> output_dims(4, no_dim_flag);
+        int out_dim = 2;
+        for (int i = 3; i >= 0; --i)
+        {
+            if (std::find(params.reduction_axes.begin(), params.reduction_axes.end(), i) == params.reduction_axes.end())
+                output_dims.at(i) = out_dim--;
+        }
+
+        if (output_dims[3] != no_dim_flag)
+            jit.AddConstants({
+                MakeJitConstant("DIM_X", output_dims.at(3))
+            });
+        if (output_dims[2] != no_dim_flag)
+            jit.AddConstants({
+                MakeJitConstant("DIM_Y", output_dims.at(2))
+            });
+        if (output_dims[1] != no_dim_flag)
+            jit.AddConstants({
+                MakeJitConstant("DIM_F", output_dims.at(1))
+            });
+        if (output_dims[0] != no_dim_flag)
+            jit.AddConstants({
+                MakeJitConstant("DIM_B", output_dims.at(0))
+            });
+
+        jit.AddConstants({
+            MakeJitConstant("REDUCE_X", output_dims.at(3) == no_dim_flag),
+            MakeJitConstant("REDUCE_Y", output_dims.at(2) == no_dim_flag),
+            MakeJitConstant("REDUCE_F", output_dims.at(1) == no_dim_flag),
+            MakeJitConstant("REDUCE_B", output_dims.at(0) == no_dim_flag)
+        });
+
+        switch (params.mode)
+        {
+        case ContractMode::SUM:
+            jit.AddConstants({
+                MakeJitConstant("REDUCE_SEED", "0"),
+                MakeJitConstant("REDUCE_OPERATION(a, b)", "a + b")
+            });
+            break;
+        case ContractMode::PRODUCT:
+            jit.AddConstants({
+                MakeJitConstant("REDUCE_SEED", "1"),
+                MakeJitConstant("REDUCE_OPERATION(a, b)", "a * b")
+            });
+            break;
+        case ContractMode::ALL:
+            jit.AddConstants({
+                MakeJitConstant("REDUCE_SEED", "1"),
+                MakeJitConstant("REDUCE_OPERATION(a, b)", "a && b")
+            });
+            break;
+        case ContractMode::ANY:
+            jit.AddConstants({
+                MakeJitConstant("REDUCE_SEED", "0"),
+                MakeJitConstant("REDUCE_OPERATION(a, b)", "a || b")
+            });
+            break;
+        case ContractMode::MAX:
+            jit.AddConstants({
+                MakeJitConstant("REDUCE_SEED", "UNIT_VAL_MIN"),
+                MakeJitConstant("REDUCE_OPERATION(a, b)", "UNIT_MAX_FUNC(a,b)")
+            });
+            break;
+        }
+
+        return jit;
+    }
+
+    ContractKernelBase::DispatchData ContractKernelBase::SetDefault(const contract_params& params)
+    {
+        const auto& output = params.output;
+
+        DispatchData kd;
+
+        kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+        std::vector<size_t> global{ output.Feature().v, output.Y().v, output.X().v };
+        const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+        kd.gws0 = global[0];
+        kd.gws1 = global[1];
+        kd.gws2 = global[2];
+
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        return kd;
+    }
+
+    KernelsData ContractKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+    {
+        assert(params.GetType() == KernelType::CONTRACT);
+
+        const auto& prim_params = static_cast<const contract_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast)
+
+        auto run_info = SetDefault(prim_params);
+        KernelData k_data = KernelData::Default<contract_params>(params);
+
+        auto cldnn_jit = GetJitConstants(prim_params);
+        auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+        auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = k_data.kernels[0];
+        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+        k_data.estimatedTime = estimated_time;
+
+        return{ k_data };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.h
new file mode 100644
index 000000000..22e308c35
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_base.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // contract_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct contract_params : public base_params
+    {
+        contract_params()
+            : base_params(KernelType::CONTRACT)
+        {
+        }
+        ContractMode mode;
+        std::vector<uint16_t> reduction_axes;
+
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // contract_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct contract_optional_params : optional_params
+    {
+        contract_optional_params()
+            : optional_params(KernelType::CONTRACT)
+        {
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // ContractKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class ContractKernelBase : public common_kernel_base
+    {
+    public:
+        using common_kernel_base::common_kernel_base;
+
+        using DispatchData = CommonDispatchData;
+
+    protected:
+        static JitConstants GetJitConstants(const contract_params& params);
+        static DispatchData SetDefault(const contract_params& params);
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_ref.cpp
new file mode 100644
index 000000000..ba42e2808
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_ref.cpp
@@ -0,0 +1,53 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "contract_kernel_ref.h"
+
+
+namespace kernel_selector
+{
+    ParamsKey ContractKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::UINT8);
+        k.EnableInputDataType(Datatype::INT32);
+        k.EnableInputDataType(Datatype::INT64);
+
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::UINT8);
+        k.EnableOutputDataType(Datatype::INT32);
+        k.EnableOutputDataType(Datatype::INT64);
+
+        k.EnableInputLayout(DataLayout::bfyx);
+
+        k.EnableOutputLayout(DataLayout::bfyx);
+
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+
+        return k;
+    }
+
+    KernelsData ContractKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_ref.h
new file mode 100644
index 000000000..eb8a6cc8e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_ref.h
@@ -0,0 +1,30 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "contract_kernel_base.h"
+
+
+namespace kernel_selector
+{
+    class ContractKernelRef : public ContractKernelBase
+    {
+    public:
+        ContractKernelRef() : ContractKernelBase("contract_ref") {}
+
+        KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_selector.cpp
new file mode 100644
index 000000000..06d756997
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_selector.cpp
@@ -0,0 +1,30 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "contract_kernel_selector.h"
+#include "contract_kernel_ref.h"
+
+namespace kernel_selector
+{
+    contract_kernel_selector::contract_kernel_selector()
+    {
+        Attach<ContractKernelRef>();
+    }
+
+    KernelsData contract_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::CONTRACT);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_selector.h
new file mode 100644
index 000000000..3c9e87a2b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/contract/contract_kernel_selector.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+
+namespace kernel_selector
+{
+    class contract_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static contract_kernel_selector &Instance() {
+            static contract_kernel_selector instance;
+            return instance;
+        }
+
+        contract_kernel_selector();
+
+        KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_1x1_gemm_MMAD.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_1x1_gemm_MMAD.cpp
index ab7690655..67f0a048c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_1x1_gemm_MMAD.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_1x1_gemm_MMAD.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_1x1_gemm_MMAD.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
     
@@ -82,7 +81,7 @@ namespace kernel_selector {
         const auto of_maps = arg.output.Feature().v;
         const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
 
-        runInfo.effiency = FORCE_PRIORITY_1;
+        runInfo.effiency = FORCE_PRIORITY_2;
 
         runInfo.gws0 = RoundUp(arg.output.X().v * arg.output.Y().v, 8) / 8;
         runInfo.gws1 = of_threads_per_batch * arg.output.Batch().v;
@@ -111,6 +110,6 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_1x1_gemm_MMAD::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_1x1_gemm_MMAD.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_1x1_gemm_MMAD.h
index 5c664f639..7596f8683 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_1x1_gemm_MMAD.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_1x1_gemm_MMAD.h
@@ -28,9 +28,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_1x1_gemm_MMAD() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
         bool Validate(const Params& p, const optional_params& o) const override;
@@ -41,4 +41,4 @@ namespace kernel_selector {
             };
         }
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD.cpp
index 0963f0ba8..f4a3863e7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_MMAD.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
     
@@ -50,7 +49,7 @@ namespace kernel_selector {
         const auto of_maps = arg.output.Feature().v;
         const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
 
-        runInfo.effiency = FORCE_PRIORITY_3;
+        runInfo.effiency = FORCE_PRIORITY_4;
 
         runInfo.gws0 = arg.output.X().v;
         runInfo.gws1 = arg.output.Y().v;
@@ -79,9 +78,9 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_MMAD::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        KernelsData kd = GetCommonKernelsData(params, options);
+        KernelsData kd = GetTunedKernelsDataByIndex(params, options);
         if(!kd.empty())
-            kd[0].estimatedTime = FORCE_PRIORITY_3;
+            kd[0].estimatedTime = FORCE_PRIORITY_4;
         return kd;
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD.h
index 824fcf73d..1b2bbabab 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD.h
@@ -28,9 +28,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_MMAD() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
@@ -40,4 +40,4 @@ namespace kernel_selector {
             };
         }
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD_blocks.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD_blocks.cpp
index dd2a03c1e..6c892e8c5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD_blocks.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD_blocks.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_MMAD_blocks.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -25,21 +24,21 @@ namespace kernel_selector
         std::vector<size_t> blockWidthSizes = { 1,2,4,5,6,8,10,12,14,16,18,20,22,24,26,28,30,32 };
         std::vector<size_t> blockHeightSizes = { 1,2,3,4,5,6,7,8,9,10 };
         std::vector<size_t> prefetchSizes = { 1,2,3,4,5,6,8,10 };
-        std::vector<std::string> executionModes = { /*AGE_BASED ,*/ ROUND_ROBIN };
+        std::vector<std::string> executionModes = ConvolutionKernelBase::autoTuneOptions;
         const size_t maxBlockSize = 240;
-
-        for (auto blockWidth : blockWidthSizes)
+        for (auto executionMode : executionModes)
         {
-            for (auto blockHeight : blockHeightSizes)
+            for (auto blockWidth : blockWidthSizes)
             {
-                for (auto prefetch : prefetchSizes)
+                for (auto blockHeight : blockHeightSizes)
                 {
-                    for (auto executionMode : executionModes)
+                    for (auto prefetch : prefetchSizes)
                     {
                         if (blockWidth * blockHeight <= maxBlockSize)
                         {
                             autoTuneOptions.emplace_back(AutoTuneOption{ blockWidth, blockHeight, prefetch, executionMode });
                         }
+                   
                     }
                 }
             }
@@ -110,7 +109,7 @@ namespace kernel_selector
         // Sub-group size used by "convolution_gpu_mmad_blocks" kernel.
         constexpr size_t sub_group_size = 16;
 
-        AutoTuneOption option = { 0, 0, 0, ROUND_ROBIN };
+        AutoTuneOption option = { 0, 0, 0, DEFAULT };
 
         const convolution_params& cp = static_cast<const convolution_params&>(p);
 
@@ -255,14 +254,9 @@ namespace kernel_selector
         return jit;
     }
 
-    KernelsData ConvolutionKernel_MMAD_blocks::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, const int autoTuneIndex) const
-    {
-        return GetCommonKernelsData(params, options, GetAutoTuneOptions(params, autoTuneIndex).exeMode, autoTuneIndex);
-    }
-
     KernelsData ConvolutionKernel_MMAD_blocks::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        KernelsData kd = GetCommonKernelsData(params, options);
+        KernelsData kd = GetTunedKernelsDataByIndex(params, options);
         if (!kd.empty())
             kd[0].estimatedTime = FORCE_PRIORITY_2;
     
@@ -287,9 +281,6 @@ namespace kernel_selector
             }
         }
 
-        KernelsData defaultKds = GetKernelsData(params, options);
-        res.insert(res.end(), defaultKds.begin(), defaultKds.end());
-
         return res;
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD_blocks.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD_blocks.h
index 03137b498..a49561394 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD_blocks.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_MMAD_blocks.h
@@ -29,10 +29,9 @@ namespace kernel_selector {
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
         virtual KernelsData GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const override;
-        virtual KernelsData GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, int autoTuneIndex) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
@@ -54,4 +53,4 @@ namespace kernel_selector {
         AutoTuneOption GetAutoTuneOptions(const Params& arg, int autoTuneIndex) const;
         std::vector<AutoTuneOption> autoTuneOptions = {};
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
index 86bfe937c..d40c8abef 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.cpp
@@ -61,10 +61,11 @@ namespace kernel_selector
             MakeJitConstant("STRIDE",                       params.stride),
             MakeJitConstant("PADDING",                      params.padding),
             MakeJitConstant("DILATION",                     params.dilation),
-            MakeJitConstant("FILTER_ARRAY_NUM",             params.split),
+            MakeJitConstant("FILTER_ARRAY_NUM",             params.split * params.groups),
             MakeJitConstant("INPUT0_OFFSET_WITH_PADDING",   input_offset_with_padding),
-            MakeJitConstant("DEPTHWISE_SEPARABLE_OPT",      params.depthwiseSeparableOpt),
+            MakeJitConstant("DEPTHWISE_SEPARABLE_OPT",      params.depthwise_separable_opt),
             MakeJitConstant("QUANTIZATION_TERM",            params.int8_quantization),
+            MakeJitConstant("GROUPED",                      (params.groups > 1) ? 1 : 0),
         });
 
         if (params.int8_quantization)
@@ -82,6 +83,11 @@ namespace kernel_selector
                 mem_consts.AddConstants({ MakeJitConstant("O_QF", params.output_quantization_factor) });
         }
 
+        if (params.local_convolution)
+        {
+            mem_consts.AddConstants({ MakeJitConstant("LOCAL_CONVOLUTION", params.local_convolution) });
+        }
+
         std::vector<uint32_t> unrollLoopParams{
             params.filterSize.x,
             params.filterSize.y,
@@ -249,4 +255,123 @@ namespace kernel_selector
 
         return{ kd };
     }
+
+    bool CheckConvolutionPaddedInputDesc(const convolution_params& params, const DataTensor& reqDesc)
+    {
+        assert(params.inputs.size() == 1);
+
+        bool properPadding =
+            reqDesc.X().pad.before <= params.inputs[0].X().pad.before &&
+            reqDesc.Y().pad.before <= params.inputs[0].Y().pad.before &&
+            reqDesc.Feature().pad.before <= params.inputs[0].Feature().pad.before &&
+            reqDesc.Batch().pad.before <= params.inputs[0].Batch().pad.before;
+
+        properPadding &=
+            reqDesc.X().pad.after <= params.inputs[0].X().pad.after &&
+            reqDesc.Y().pad.after <= params.inputs[0].Y().pad.after &&
+            reqDesc.Feature().pad.after <= params.inputs[0].Feature().pad.after &&
+            reqDesc.Batch().pad.after <= params.inputs[0].Batch().pad.after;
+
+        properPadding &= ((params.padding.x == 0 && params.padding.y == 0) || params.inputs[0].GetPaddedVal() == 0.f);
+
+        return properPadding;
+    }
+
+    static DataTensor GetConvolutionBFYXPaddedTensor(const convolution_params& cp)
+    {
+        assert(cp.inputs.size() == 1);
+        assert(cp.inputs[0].GetDims().size() == 4U);
+
+        DataTensor t = cp.inputs[0];
+        std::vector<Tensor::Pad> pad{ { 0,0 },{ 0,0 },{ 0,0 },{ 0,0 } };
+
+        pad[0].before = cp.padding.x;
+        pad[1].before = cp.padding.y;
+
+        const auto inputLimitX = (cp.output.X().v - 1) * cp.stride.x + (cp.filterSize.x - 1) * cp.dilation.x + 1;
+        const auto inputLimitY = (cp.output.Y().v - 1) * cp.stride.y + (cp.filterSize.y - 1) * cp.dilation.y + 1;
+
+        pad[0].after = (size_t)std::max((int)inputLimitX - (int)t.X().v - (int)pad[0].before, (int)0);
+        pad[1].after = (size_t)std::max((int)inputLimitY - (int)t.Y().v - (int)pad[1].before, (int)0);
+
+        Tensor::NDims dims(4);
+        const Tensor::NDims& orgDims = cp.inputs[0].GetDims();
+        size_t pitch = 1;
+        for (size_t i = 0; i < dims.size(); i++)
+        {
+            dims[i].pad = pad[i];
+            dims[i].v = orgDims[i].v;
+            dims[i].pitch = pitch;
+            pitch *= dims[i].LogicalDimPadded();
+        }
+
+        return{ dims, t.GetDType(), t.GetLayout() };
+    }
+
+    bool CovolutionCheckInput(const Params& p, const optional_params& o)
+    {
+        const convolution_params& params = static_cast<const convolution_params&>(p);
+        const convolution_optional_params& optParams = static_cast<const convolution_optional_params&>(o);
+
+        const auto req_input = GetConvolutionBFYXPaddedTensor(params);
+        const bool bProperInputDesc = CheckConvolutionPaddedInputDesc(params, req_input);
+        const bool bInputPadded = optParams.allowInputReordering || bProperInputDesc;
+
+        if (!bInputPadded)
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    bool CovolutionUpdateInputParams(convolution_params& params)
+    {
+        const auto req_input = GetConvolutionBFYXPaddedTensor(params);
+        const bool bProperInputDesc = CheckConvolutionPaddedInputDesc(params, req_input);
+
+        if (!bProperInputDesc)
+        {
+            params.inputs[0] = req_input;
+            return true;
+        }
+
+        return false;
+    }
+
+	    std::string ConvolutionKernelBase::GetAutoTuneOptions(int autoTuneIndex) const
+    {
+        if ((autoTuneIndex >= 0) && (autoTuneIndex < (int)autoTuneOptions.size()))
+        {
+            return autoTuneOptions[autoTuneIndex];
+        }
+
+        return DEFAULT;
+    }
+
+    KernelsData ConvolutionKernelBase::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, const int autoTuneIndex) const
+    {
+        return GetCommonKernelsData(params, options, GetAutoTuneOptions(autoTuneIndex), autoTuneIndex);
+    }
+
+    KernelsData ConvolutionKernelBase::GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const
+    {
+        if (!Validate(params, options))
+        {
+            return{};
+        }
+
+        KernelsData res = {};
+
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, options, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+        
+        return res;
+    }
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h
index 4e7c82fb2..d6dc4763a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_base.h
@@ -56,6 +56,11 @@ namespace kernel_selector
                 GEMMStyle  gemmStyle;
             };
         };
+        
+        std::string GetAutoTuneOptions(int autoTuneIndex) const;
+        std::vector<std::string> autoTuneOptions = { DEFAULT, NO_PRERA_SCH, AGE_BASED };
+        virtual KernelsData GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const override;
+        virtual KernelsData GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, int autoTuneIndex = -1) const override;
     
     protected:
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const = 0;
@@ -66,6 +71,11 @@ namespace kernel_selector
         virtual DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const;
         static bool CheckWorkGroups(const DispatchData&);
         static bool CheckPitchForSplitOnly(const convolution_params& params);
-        KernelsData GetCommonKernelsData(const Params& params, const optional_params& options, const std::string exeMode = ROUND_ROBIN, int autoTuneIndex = -1) const;
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params& options, const std::string exeMode = DEFAULT, int autoTuneIndex = -1) const;
     };
+
+    bool CovolutionCheckInput(const Params& p, const optional_params& o);
+    bool CheckConvolutionPaddedInputDesc(const convolution_params& params, const DataTensor& reqDesc);
+    bool CovolutionUpdateInputParams(convolution_params& params);
+
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp
index cba3ba36e..e0fab58b7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_bfyx_1x1.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
     
@@ -107,6 +106,6 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_bfyx_1x1::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h
index 7ea745657..0f11ddd85 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1.h
@@ -29,9 +29,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_bfyx_1x1() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
         {
             return{
@@ -42,4 +42,4 @@ namespace kernel_selector {
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp
index a34add2c2..b1c15ae38 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_bfyx_1x1_gemm_buf.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
     
@@ -112,6 +111,6 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_bfyx_1x1_gemm_buf::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h
index 61eb82628..55ecfbdb8 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_gemm_buf.h
@@ -29,9 +29,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_bfyx_1x1_gemm_buf() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
         {
             return{
@@ -42,4 +42,4 @@ namespace kernel_selector {
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp
new file mode 100644
index 000000000..1c08d2acf
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.cpp
@@ -0,0 +1,173 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_bfyx_1x1_opt.h"
+
+namespace kernel_selector 
+{
+
+    convolution_kernel_bfyx_1x1_opt::convolution_kernel_bfyx_1x1_opt() : ConvolutionKernelBase("convolution_gpu_bfyx_1x1_opt")
+    {
+    }
+
+    ParamsKey convolution_kernel_bfyx_1x1_opt::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputWeightsType(WeightsType::F32);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableSubGroup();
+        k.EnableBiasPerFeature();
+        k.EnableBiasPerOutput();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        return k;
+    }
+
+    struct block_params
+    {
+        int32_t out_width;
+        int32_t out_height;
+        int32_t out_depth;
+    };
+
+    static block_params get_out_block_size(const convolution_params& p)
+    {
+        auto out_depth = 8;
+
+        if (p.output.X().v == 7)
+        {
+            auto gws0 = p.output.X().v / 7;
+            auto gws1 = p.output.Y().v / 1;
+            auto gws2 = 2*(p.output.Feature().v * p.output.Batch().v) / 8 ; // process 8 output channels per Workitem
+
+            auto compute_units = p.engineInfo.computeUnitsCount;
+            auto total_threads = (gws0 * gws1 * gws2) / 64;
+            if (total_threads < compute_units)
+            {
+                out_depth /= 2;
+                total_threads *= 2;
+            }
+            if (total_threads < compute_units)
+            {
+                out_depth /= 2;
+                total_threads *= 2;
+            }
+            return { 7,1,out_depth };
+        }
+        else if (p.output.X().v == 14)
+            return { 7,1,8 };
+        else if (p.output.X().v == 28)
+            return { 7,2,4 };
+        else if (p.output.X().v == 56)
+            return { 8,1,8 };
+
+        return { 1,1,1 };
+    }
+
+
+    ConvolutionKernelBase::DispatchData convolution_kernel_bfyx_1x1_opt::SetDefault(const convolution_params& cp, int) const
+    {
+        DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+
+        constexpr size_t sub_group_size = 8;
+
+        runInfo.effiency = FORCE_PRIORITY_3;
+
+        auto block = get_out_block_size(cp);
+
+        runInfo.gws0 = cp.output.X().v / block.out_width;
+        runInfo.gws1 = cp.output.Y().v / block.out_height;
+        runInfo.gws2 = 2*(cp.output.Feature().v * cp.output.Batch().v) / block.out_depth; // process 8 output channels per Workitem
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 2*sub_group_size;
+
+        return runInfo;
+    }
+
+    bool convolution_kernel_bfyx_1x1_opt::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!ConvolutionKernelBase::Validate(p, o))
+        {
+            return false;
+        }
+        const convolution_params& cp = static_cast<const convolution_params&>(p);
+
+        if (cp.stride.x != 1 || cp.stride.y != 1)
+            return false;
+
+        if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
+            return false;
+
+        if (cp.output.Feature().v % 64 != 0)
+            return false;
+
+        if (cp.padding.x != 0 || cp.padding.y != 0)
+            return false;
+
+        // if block sizes are 1x1, then this algorithm is probably not the best
+        auto block = get_out_block_size(cp);
+        if (block.out_width == 1 && block.out_height == 1)
+            return false;
+
+        if (cp.output.X().v % block.out_width != 0)
+            return false;
+        if (cp.output.Y().v % block.out_height != 0)
+            return false;
+
+        return true;
+    }
+
+    JitConstants convolution_kernel_bfyx_1x1_opt::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+    {
+        auto jit = Parent::GetJitConstants(params, runInfo);
+
+        auto block = get_out_block_size(params);
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block.out_width));
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_HEIGHT", block.out_height));
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_DEPTH", block.out_depth));
+
+        return jit;
+    }
+
+    std::vector<WeightsLayout> convolution_kernel_bfyx_1x1_opt::GetSupportedWeightLayouts(const convolution_params& cp) const
+    {
+        auto block = get_out_block_size(cp);
+        if (block.out_depth == 8)
+            return { WeightsLayout::os_iyx_osv64 };
+        if (block.out_depth == 4)
+            return { WeightsLayout::os_iyx_osv32 };
+        if (block.out_depth == 2)
+            return { WeightsLayout::os_iyx_osv16 };
+        else
+            return{ WeightsLayout::yxio };
+    }
+
+    KernelsData convolution_kernel_bfyx_1x1_opt::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelsData kd = GetCommonKernelsData(params, options);
+        if (!kd.empty())
+            kd[0].estimatedTime = FORCE_PRIORITY_1;
+        return kd;
+    }
+
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h
new file mode 100644
index 000000000..969dadba1
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_1x1_opt.h
@@ -0,0 +1,40 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+
+namespace kernel_selector {
+
+    class convolution_kernel_bfyx_1x1_opt : public ConvolutionKernelBase
+    {
+    public:
+        using Parent = ConvolutionKernelBase;
+        convolution_kernel_bfyx_1x1_opt();
+        virtual ~convolution_kernel_bfyx_1x1_opt() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&)  const override;
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        bool NeedPaddedInput() const override { return true; }
+        DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
index b92df30b7..9bbfdcb13 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_bfyx_3x3_dw_opt.h"
-#include "kernel_selector_utils.h"
  
 namespace kernel_selector 
 {
@@ -24,7 +23,7 @@ namespace kernel_selector
         // Generate the dispatch options to the auto-tuner.
         std::vector<size_t> tileXDimSizes = { 1,2,4,5,6,8,10,12,14 };
         std::vector<size_t> tileYDimSizes = { 1,2,3,4,5,6,7 };
-        std::vector<std::string> executionModes = { /*AGE_BASED ,*/ ROUND_ROBIN };
+        std::vector<std::string> executionModes = ConvolutionKernelBase::autoTuneOptions;
 
         for (auto tileXDim : tileXDimSizes)
         {
@@ -95,7 +94,7 @@ namespace kernel_selector
 
         constexpr int simdSize = 16;
 
-        return AutoTuneOption{ { simdSize - 2, 7 }, ROUND_ROBIN };
+        return AutoTuneOption{ { simdSize - 2, 7 }, DEFAULT };
     }
 
     ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_3x3_dw_opt::SetDefault(const convolution_params& params, int autoTuneIndex) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h
index 0c9cf0e8d..9606b4e42 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_3x3_dw_opt.h
@@ -30,9 +30,9 @@ namespace kernel_selector
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
         virtual KernelsData GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const override;
         virtual KernelsData GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, int autoTuneIndex) const override;
-        virtual ParamsKey GetSupportedKey() const override;
-    
+
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params&, const optional_params&) const override;
         std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&)  const override { return{ WeightsLayout::oiyx }; }
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
@@ -47,4 +47,4 @@ namespace kernel_selector
         AutoTuneOption GetAutoTuneOptions(const Params& arg, int autoTuneIndex) const;
         std::vector<AutoTuneOption> autoTuneOptions = {};
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp
index f6841db94..12478e85a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.cpp
@@ -15,8 +15,7 @@
 */
 
 #include "convolution_kernel_bfyx_depthwise_weights_lwg.h"
-#include "kernel_selector_utils.h"
- 
+
 namespace kernel_selector 
 {
     ParamsKey ConvolutionKernel_bfyx_depthwise_weights_lwg::GetSupportedKey() const
@@ -39,6 +38,7 @@ namespace kernel_selector
         k.EnableSubGroup();
         k.EnableSubGroupShort();
         k.EnableDepthwiseSeparableOpt();
+        k.EnableDilation();
         return k;
     }
 
@@ -51,12 +51,11 @@ namespace kernel_selector
         }
 
        const convolution_params& cp = static_cast<const convolution_params&>(p);
-       if (!cp.depthwiseSeparableOpt)
+       if (!cp.depthwise_separable_opt)
            return false;
-
        if ((cp.filterSize.x > 4) ||
            (cp.filterSize.y > 4) ||
-           (cp.inputs[0].Feature().v != cp.split))
+           ((cp.inputs[0].Feature().v != cp.split) && (cp.inputs[0].Feature().v != cp.groups)))
        {
            return false;
        }
@@ -95,6 +94,6 @@ namespace kernel_selector
 
     KernelsData ConvolutionKernel_bfyx_depthwise_weights_lwg::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h
index b578f8fd5..96a79d277 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_depthwise_weights_lwg.h
@@ -28,12 +28,12 @@ namespace kernel_selector
         virtual ~ConvolutionKernel_bfyx_depthwise_weights_lwg() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
-    
+
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params&, const optional_params&) const override;
         std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&)  const override { return{ WeightsLayout::oiyx }; }
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp
index 95d012bc6..17c9cab8d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.cpp
@@ -15,8 +15,6 @@
 */
 
 #include "convolution_kernel_bfyx_direct_10_12_16.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
 
 namespace kernel_selector {
 
@@ -111,6 +109,6 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_bfyx_Direct_10_10_12::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options, AGE_BASED);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h
index 68ae13a13..a33782561 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_direct_10_12_16.h
@@ -28,9 +28,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_bfyx_Direct_10_10_12() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override { return{ WeightsLayout::i_yxs_os_yxsv2_osv16 }; }
 
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
@@ -38,4 +38,4 @@ namespace kernel_selector {
         bool NeedPaddedInput() const override { return true; }
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp
index c7127487c..e44b521bd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.cpp
@@ -14,10 +14,7 @@
 // limitations under the License.
 */
 
-#include <cmath>
 #include "convolution_kernel_bfyx_gemm_like.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
 
 namespace kernel_selector 
 {
@@ -137,6 +134,6 @@ namespace kernel_selector
 
     KernelsData ConvolutionKernel_bfyx_GEMMLike::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options, AGE_BASED);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h
index 693687d52..4074f8b6d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_gemm_like.h
@@ -28,9 +28,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_bfyx_GEMMLike() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override;
         std::string GetKernelName(const convolution_params& params) const override;
         bool NeedPaddedInput() const override { return true; }
@@ -38,4 +38,4 @@ namespace kernel_selector {
         bool Validate(const Params& p, const optional_params& o) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp
index 854c12e8b..730f88f99 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.cpp
@@ -15,8 +15,6 @@
 */
 
 #include "convolution_kernel_bfyx_os_iyx_osv16.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
 
 namespace kernel_selector 
 {
@@ -29,21 +27,21 @@ namespace kernel_selector
         std::vector<size_t> blockWidthSizes = { 1,2,4,5,6,8,10,12,14,16 };
         std::vector<size_t> blockHeightSizes = { 1,2,3,4,5 };
         std::vector<size_t> prefetchSizes = { 1,2,3,4,5,6,8,10 };
-        std::vector<std::string> executionModes = { /*AGE_BASED ,*/ ROUND_ROBIN };
+        std::vector<std::string> executionModes = ConvolutionKernelBase::autoTuneOptions;
         const size_t maxBlockSize = 60;
 
-        for (auto blockWidth : blockWidthSizes)
+        for (auto executionMode : executionModes)
         {
-            for (auto blockHeight : blockHeightSizes)
+            for (auto blockWidth : blockWidthSizes)
             {
-                for (auto prefetch : prefetchSizes)
+                for (auto blockHeight : blockHeightSizes)
                 {
-                    for (auto executionMode : executionModes)
+                    for (auto prefetch : prefetchSizes)
                     {
-                        if (blockWidth * blockHeight <= maxBlockSize)
-                        {
-                            autoTuneOptions.emplace_back(AutoTuneOption{ blockWidth, blockHeight, prefetch, executionMode });
-                        }
+                            if (blockWidth * blockHeight <= maxBlockSize)
+                            {
+                                autoTuneOptions.emplace_back(AutoTuneOption{ blockWidth, blockHeight, prefetch, executionMode });
+                            }
                     }
                 }
             }
@@ -124,7 +122,7 @@ namespace kernel_selector
             return autoTuneOptions[autoTuneIndex];
         }
 
-        AutoTuneOption option = { 0, 0, 0, ROUND_ROBIN };
+        AutoTuneOption option = { 0, 0, 0, DEFAULT };
 
         const convolution_params& cp = static_cast<const convolution_params&>(p);
 
@@ -252,11 +250,6 @@ namespace kernel_selector
         return jit;
     }
 
-    KernelsData ConvolutionKernel_bfyx_os_iyx_osv16::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, const int autoTuneIndex) const
-    {
-        return GetCommonKernelsData(params, options, GetAutoTuneOptions(params, autoTuneIndex).exeMode, autoTuneIndex);
-    }
-
     std::vector<WeightsLayout> ConvolutionKernel_bfyx_os_iyx_osv16::GetSupportedWeightLayouts(const convolution_params& params) const
     {
         if (!params.transposed)
@@ -271,7 +264,7 @@ namespace kernel_selector
 
     KernelsData ConvolutionKernel_bfyx_os_iyx_osv16::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetTunedKernelsDataByIndex(params, options, -1);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 
     KernelsData ConvolutionKernel_bfyx_os_iyx_osv16::GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const
@@ -283,7 +276,7 @@ namespace kernel_selector
 
         KernelsData res = {};
 
-        for (size_t i = 0 ; i < autoTuneOptions.size(); i++)
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
         {
             KernelsData kd = GetTunedKernelsDataByIndex(params, options, (int)i);
             if (!kd.empty())
@@ -292,9 +285,7 @@ namespace kernel_selector
             }
         }
 
-        KernelsData defaultKds = GetKernelsData(params, options);
-        res.insert(res.end(), defaultKds.begin(), defaultKds.end());
-
         return res;
     }
+
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h
index 0b0ebc8c6..4f82540f9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16.h
@@ -29,10 +29,9 @@ namespace kernel_selector {
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
         virtual KernelsData GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const override;
-        virtual KernelsData GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, int autoTuneIndex) const override;
-        virtual ParamsKey GetSupportedKey() const override;
-    
+
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&)  const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         bool Validate(const Params& p, const optional_params& o) const override;
@@ -52,4 +51,4 @@ namespace kernel_selector {
 
         std::vector<AutoTuneOption> autoTuneOptions = {};
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp
new file mode 100644
index 000000000..3eac16908
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.cpp
@@ -0,0 +1,299 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_bfyx_os_iyx_osv16_2_sg.h"
+
+namespace kernel_selector 
+{
+    // Sub-group size used by "kernel_name_bfyx_os_iyx_osv16" kernel.
+    constexpr size_t sub_group_size = 16;
+
+    ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::ConvolutionKernel_bfyx_os_iyx_osv16_2_sg() : ConvolutionKernelBase("convolution_gpu_bfyx_os_iyx_osv16_2_sg")
+    {
+        // Generate the dispatch options to the auto-tuner.
+        std::vector<size_t> blockWidthSizes = { 1,2,4,5,6,8,10,12,14,16 };
+        std::vector<size_t> blockHeightSizes = { 1,2,3,4,5 };
+        std::vector<size_t> prefetchSizes = { 1,2,3,4,5,6,8,10 };
+        std::vector<std::string> executionModes = ConvolutionKernelBase::autoTuneOptions;
+        const size_t maxBlockSize = 60;
+
+        for (auto executionMode : executionModes)
+        {
+            for (auto blockWidth : blockWidthSizes)
+            {
+                for (auto blockHeight : blockHeightSizes)
+                {
+                    for (auto prefetch : prefetchSizes)
+                    {
+                            if (blockWidth * blockHeight <= maxBlockSize)
+                            {
+                                autoTuneOptions.emplace_back(AutoTuneOption{ blockWidth, blockHeight, prefetch, executionMode });
+                            }
+                    }
+                }
+            }
+        }
+    }
+
+    ParamsKey ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputWeightsType(WeightsType::F16);
+        k.EnableInputWeightsType(WeightsType::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableSubGroup();
+        k.EnableBiasPerFeature();
+        k.EnableBiasPerOutput();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableSplitSupport();
+        k.EnableDilation();
+        k.EnableTranspose();
+        return k;
+    }
+
+    static std::pair<size_t, size_t> get_bfyx_req_input_block_dims(
+        size_t output_block_width,
+        size_t output_block_height,
+        const uSize& filter_size,
+        const uSize& stride,
+        const uSize& dilation,
+        size_t sg_size = 16,
+        size_t read_chunk_size = 8,
+        size_t min_read_size = 16)
+    {
+        assert(output_block_width > 0 && output_block_height > 0);
+        assert(stride.x > 0 && stride.y > 0);
+        assert(filter_size.x > 0 && filter_size.y > 0);
+
+        // Number of elements in X dimension needed from input to compute output block without re-reading input.
+        size_t input_block_req_width = (output_block_width - 1) * stride.x + (filter_size.x - 1)*dilation.x + 1;
+        // Number of elements in Y dimension needed from input to compute output block without re-reading input.
+        size_t input_block_req_height = (output_block_height - 1) * stride.y + (filter_size.y - 1)*dilation.y + 1;
+
+        // Required number of elements in X dimension rounded to nearest >= read chunk size.
+        size_t input_block_read_width = std::max(RoundUp(input_block_req_width, read_chunk_size), min_read_size);
+        // Number of sub-group-sized vectors of unit type needed to store input block.
+        size_t input_block_array_size = CeilDiv(input_block_req_height * input_block_read_width, sg_size);
+
+        return std::make_pair(input_block_array_size, input_block_read_width);
+    }
+
+    static void shrink_blocks_to_output_size(size_t output_x, size_t output_y, size_t &block_x, size_t &block_y)
+    {
+        // how many elements we will compute in each dimension
+        size_t computed_x = Align(output_x, block_x);
+        size_t computed_y = Align(output_y, block_y);
+        // how many simds we need in each dimension
+        size_t simds_x = computed_x / block_x;
+        size_t simds_y = computed_y / block_y;
+        // how many unused values we have in each dimension
+        size_t unused_x = computed_x - output_x;
+        size_t unused_y = computed_y - output_y;
+
+        block_x -= unused_x / simds_x;
+        block_y -= unused_y / simds_y;
+    }
+
+    ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::AutoTuneOption ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetAutoTuneOptions(const Params& p, int autoTuneIndex) const
+    {
+        if ((autoTuneIndex >= 0) && (autoTuneIndex < (int)autoTuneOptions.size()))
+        {
+            return autoTuneOptions[autoTuneIndex];
+        }
+
+        AutoTuneOption option = { 0, 0, 0, DEFAULT };
+
+        const convolution_params& cp = static_cast<const convolution_params&>(p);
+
+        if (cp.stride.x == 1 && cp.stride.y == 1)
+        {
+            if (cp.filterSize.x == 1 && cp.filterSize.y == 1)
+            {
+                option.blockWidth = 16;
+                option.blockHeight = 1;
+                option.prefetch = 4;
+            }
+            //if less than 16 values is required to compute one single row of output
+            //then each WI shall compute one single row to maximize reuse within SIMD subgroup (this gives very nice performance results)
+            else if (cp.output.X().v + (cp.filterSize.x - 1)*cp.dilation.x < sub_group_size)
+            {
+                option.blockWidth = cp.output.X().v;
+                option.blockHeight = 1;
+                option.prefetch = 4;
+            }
+            else if (cp.filterSize.x < 5 && cp.filterSize.y < 5)
+            {
+                option.blockWidth = sub_group_size - cp.filterSize.x + 1;
+                option.blockHeight = 2;
+                option.prefetch = 4;
+            }
+            else
+            {
+                option.blockWidth = 4;
+                option.blockHeight = 3;
+                option.prefetch = 4;
+            }
+        }
+        else if (cp.stride.x == 2 && cp.stride.y == 2)
+        {
+            option.blockWidth = 5;
+            option.blockHeight = 4;
+            option.prefetch = 4;
+        }
+        else
+        {
+            option.blockWidth = 4;
+            option.blockHeight = 3;
+            option.prefetch = 5;
+            //run_info.effiency = FORCE_PRIORITY_7; // GEMM is better
+        }
+
+        // if this is not 1x1 batch1 case then shrink filters, other way we're memory bound and it's best to use 16x1 block sizes
+        if (cp.filterSize.x != 1 || cp.filterSize.y != 1 || cp.output.Batch().v != 1)
+        {
+            shrink_blocks_to_output_size(cp.output.X().v, cp.output.Y().v,
+                option.blockWidth, option.blockHeight);
+        }
+
+        return option;
+    }
+
+    ConvolutionKernelBase::DispatchData ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::SetDefault(const convolution_params& cp, int autoTuneIndex) const
+    {
+        DispatchData runInfo = ConvolutionKernelBase::SetDefault(cp);
+
+        const auto of_maps = cp.output.Feature().v;
+        const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
+
+        runInfo.effiency = FORCE_PRIORITY_3;
+
+        auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex);
+        runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth;
+        runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight;
+        runInfo.cldnnStyle.prefetch = tuneOptions.prefetch;
+
+        auto input_block_dims = get_bfyx_req_input_block_dims(
+            runInfo.cldnnStyle.blockWidth,
+            runInfo.cldnnStyle.blockHeight,
+            cp.filterSize,
+            cp.stride,
+            cp.dilation,
+            sub_group_size,
+            runInfo.fp16UnitUsed ? sub_group_size : sub_group_size / 2,
+            sub_group_size);
+        runInfo.cldnnStyle.inputBlockArraySize = input_block_dims.first;
+        runInfo.cldnnStyle.inputBlockWidth = input_block_dims.second;
+
+        runInfo.gws0 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth);
+        runInfo.gws1 = CeilDiv(cp.output.Y().v, runInfo.cldnnStyle.blockHeight);
+        runInfo.gws2 = 2 * of_threads_per_batch * cp.output.Batch().v;
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 2*sub_group_size;
+
+        return runInfo;
+    }
+
+    bool ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!ConvolutionKernelBase::Validate(p, o) ||
+            !CovolutionCheckInput(p, o))
+        {
+            return false;
+        }
+
+        const convolution_params& cp = static_cast<const convolution_params&>(p);
+
+        if (cp.inputs[0].Feature().v % 2 != 0 || cp.inputs[0].Feature().v < 64)
+            return false;
+
+        if (cp.output.Feature().v % 64 != 0)
+            return false;
+
+        return true;
+    }
+
+    JitConstants ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+    {
+        const auto of_maps = params.output.Feature().v;
+        const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
+        size_t leftovers = of_threads_per_batch - of_maps;
+
+        auto jit = Parent::GetJitConstants(params, runInfo);
+
+        jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", 16));
+        jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", runInfo.cldnnStyle.blockWidth));
+        jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", runInfo.cldnnStyle.blockHeight));
+        jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", runInfo.cldnnStyle.inputBlockArraySize));
+        jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", runInfo.cldnnStyle.inputBlockWidth));
+        jit.AddConstant(MakeJitConstant("PREFETCH", runInfo.cldnnStyle.prefetch));
+
+        if (leftovers)
+        {
+            jit.AddConstant(MakeJitConstant("LEFTOVERS", leftovers));
+        }
+
+        return jit;
+    }
+
+    std::vector<WeightsLayout> ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetSupportedWeightLayouts(const convolution_params& params) const
+    {
+        if (!params.transposed)
+        {
+            return{ WeightsLayout::os_iyx_osv16 };
+        }
+        else
+        {
+            return{ WeightsLayout::os_iyx_osv16_rotate_180 };
+        }
+    }
+
+    KernelsData ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetTunedKernelsDataByIndex(params, options);
+    }
+
+    KernelsData ConvolutionKernel_bfyx_os_iyx_osv16_2_sg::GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const
+    {
+        if (!Validate(params, options))
+        {
+            return{};
+        }
+
+        KernelsData res = {};
+
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, options, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+
+        return res;
+    }
+
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h
new file mode 100644
index 000000000..02af55746
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_os_iyx_osv16_2_sg.h
@@ -0,0 +1,54 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+
+namespace kernel_selector {
+
+    class ConvolutionKernel_bfyx_os_iyx_osv16_2_sg : public ConvolutionKernelBase
+    {
+    public:
+        using Parent = ConvolutionKernelBase;
+        ConvolutionKernel_bfyx_os_iyx_osv16_2_sg();
+        virtual ~ConvolutionKernel_bfyx_os_iyx_osv16_2_sg() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual KernelsData GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&)  const override;
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        bool NeedPaddedInput() const override { return true; }
+        DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+
+    private:
+        struct AutoTuneOption
+        {
+            size_t blockWidth;
+            size_t blockHeight;
+            size_t prefetch;
+            std::string exeMode;
+        };
+
+        AutoTuneOption GetAutoTuneOptions(const Params& arg, int autoTuneIndex) const;
+
+        std::vector<AutoTuneOption> autoTuneOptions = {};
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_ref.cpp
index a625c6458..a4e83b079 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_ref.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_bfyx_ref.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
     
@@ -47,11 +46,13 @@ namespace kernel_selector {
         k.EnableInt8Quantization();
         k.EnableOutputCalibration();
         k.DisableTuning();
+        k.EnableLocalConvolution();
+        k.EnableGroupedConvolution();
         return k;
     }
 
     KernelsData ConvolutionKernel_bfyx_Ref::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_ref.h
index f005457aa..0835bab38 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_bfyx_ref.h
@@ -27,9 +27,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_bfyx_Ref() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
         {
             return{
@@ -37,7 +37,8 @@ namespace kernel_selector {
                 WeightsLayout::yxio,
                 WeightsLayout::iyxo,
                 WeightsLayout::oyxi,
+                WeightsLayout::bf_lyx_yx,
             };
         }
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.cpp
new file mode 100644
index 000000000..7bbf435a4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.cpp
@@ -0,0 +1,81 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    ParamsKey ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableInputWeightsType(WeightsType::INT8);
+        k.EnableInputLayout(DataLayout::byx8_f4);
+        k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableDilation();
+        k.EnableBiasPerFeature();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        k.DisableTuning();
+        return k;
+    }
+
+    bool ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!Parent::Validate(p, o))
+        {
+            return false;
+        }
+
+        const convolution_params& params = static_cast<const convolution_params&>(p);
+
+        // this kernel is designed for quantization use case
+        if (!params.int8_quantization)
+            return false;
+
+        return true;
+    }
+
+    ConvolutionKernelBase::DispatchData ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::SetDefault(const convolution_params& arg, int) const
+    {
+        DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+        runInfo.effiency = FORCE_PRIORITY_1;
+
+        runInfo.gws0 = (arg.output.Batch().v * arg.output.Feature().v) / 4;
+        runInfo.gws1 = arg.output.X().v / 8;
+        runInfo.gws2 = arg.output.Y().v / 4;
+
+        runInfo.lws0 = 8;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 1;
+
+        return runInfo;
+    }
+
+    KernelsData ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelsData kd = GetCommonKernelsData(params, options, " -Dcl_intel_subgroups_char");
+        if (!kd.empty())
+            kd[0].estimatedTime = FORCE_PRIORITY_3;
+        return kd;
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h
new file mode 100644
index 000000000..312310b08
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h
@@ -0,0 +1,43 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+
+namespace kernel_selector {
+
+    class ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32 : public ConvolutionKernelBase
+    {
+    public:
+        using Parent = ConvolutionKernelBase;
+        ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32() : ConvolutionKernelBase("convolution_gpu_byx8_f4__fs_bs_yx_bsv4_fsv32") {}
+        virtual ~ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        ConvolutionKernelBase::DispatchData SetDefault(const convolution_params& arg, int) const override;
+        virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
+        {
+            return{
+                WeightsLayout::os_is_y_x8_osv8_isv4,
+            };
+        }
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_af32_depthwise.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_af32_depthwise.cpp
index 154b4e547..7c5fe4baf 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_af32_depthwise.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_af32_depthwise.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_byxf_af32_depthwise.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
     
@@ -52,7 +51,7 @@ namespace kernel_selector {
         const convolution_params& params = static_cast<const convolution_params&>(p);
 
         // this kernel is designed for quantization use case
-        if (!params.depthwiseSeparableOpt)
+        if (!params.depthwise_separable_opt)
             return false;
 
         return true;
@@ -60,7 +59,7 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_byxf_af32_depthiwise::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        KernelsData kd = GetCommonKernelsData(params, options);
+        KernelsData kd = GetTunedKernelsDataByIndex(params, options);
         if(!kd.empty())
             kd[0].estimatedTime = FORCE_PRIORITY_3;
         return kd;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_af32_depthwise.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_af32_depthwise.h
index b71b62912..2b4fdefed 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_af32_depthwise.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_af32_depthwise.h
@@ -28,9 +28,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_byxf_af32_depthiwise() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
         {
@@ -42,4 +42,4 @@ namespace kernel_selector {
             };
         }
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.cpp
new file mode 100644
index 000000000..728ab588c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.cpp
@@ -0,0 +1,62 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+    
+    ParamsKey ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableInputWeightsType(WeightsType::INT8);
+        k.EnableInputLayout(DataLayout::byxf);
+        k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        k.DisableTuning();
+        return k;
+    }
+
+    ConvolutionKernelBase::DispatchData ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::SetDefault(const convolution_params& arg, int) const
+    {
+        DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+        runInfo.effiency = FORCE_PRIORITY_1;
+
+        runInfo.gws0 = (arg.output.Batch().v * arg.output.Feature().v) / 4;
+        runInfo.gws1 = arg.output.X().v / 8;
+        runInfo.gws2 = arg.output.Y().v;
+
+        runInfo.lws0 = 8;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 1;
+
+        return runInfo;
+    }
+
+    KernelsData ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h
new file mode 100644
index 000000000..18cf868e1
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h
@@ -0,0 +1,41 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+ 
+namespace kernel_selector {
+    
+    class ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32 : public ConvolutionKernelBase
+    {
+    public:
+        ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32() : ConvolutionKernelBase("convolution_gpu_byxf_fs_bs_yx_bsv4_fsv32") {}
+        virtual ~ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        ConvolutionKernelBase::DispatchData SetDefault(const convolution_params& arg, int) const override;
+        virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
+        {
+            return{
+                WeightsLayout::yxio
+            };
+        }
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_1x1.cpp
new file mode 100644
index 000000000..91a42e52b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_1x1.cpp
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_imad_1x1.h"
+#include "kernel_selector_utils.h"
+#include "common_tools.h"
+
+namespace kernel_selector {
+
+    JitConstants
+        ConvolutionKernel_imad_1x1::GetJitConstants(
+            const convolution_params& params,
+            const DispatchData&       kd) const
+    {
+        auto mem_consts = Parent::GetJitConstants(params, kd);
+
+        mem_consts.AddConstants({
+            // Block reading optimization is implemented for 3x3 only.
+            // For 1x1 it should be disabled.
+            MakeJitConstant("NON_BLOCK_LOAD", 1),
+        });
+        return mem_consts;
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_1x1.h
new file mode 100644
index 000000000..11c4e06bc
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_1x1.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_imad_3x3.h"
+
+namespace kernel_selector {
+
+    // TODO Currently the best 1x1 IMAD convolution kernel is not completely done.
+    // Temporary solution to implement 1x1 using 3x3 IMAD convolution kernel with a
+    // little modifications.
+    class ConvolutionKernel_imad_1x1 : public ConvolutionKernel_imad_3x3
+    {
+    public:
+        using Parent = ConvolutionKernel_imad_3x3;
+        ConvolutionKernel_imad_1x1() : ConvolutionKernel_imad_3x3(1, 1) {}
+        virtual ~ConvolutionKernel_imad_1x1() {}
+
+    protected:
+        // For 3x3 based IMAD convolution only 'GetJitConstants' method is required
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_3x3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_3x3.cpp
new file mode 100644
index 000000000..980e00182
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_3x3.cpp
@@ -0,0 +1,305 @@
+/*
+// Copyright (c) 2018-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_imad_3x3.h"
+#include "kernel_selector_utils.h"
+#include "common_tools.h"
+
+//
+// Kernel specific constants
+//
+#define SIMD_SIZE             16
+// Threshold value to calculate the block size.
+#define OUT_BLOCK_THRESHOLD   7
+// For images 7x7 it's 7 (default), for 14x14 and above it's 14.
+#define OUT_BLOCK_WIDTH       7
+// For images 7x7 it's 1 (default), for 14x14 and above it's 2.
+#define OUT_BLOCK_HEIGHT      1
+
+static void getOutBlock_WH(size_t inW, size_t Stride, size_t Pad, size_t& outW, size_t& outH)
+{
+    outW = OUT_BLOCK_WIDTH * 2;
+    outH = OUT_BLOCK_HEIGHT * 2;
+
+    if ((inW <= OUT_BLOCK_THRESHOLD) ||
+        (outW * Stride + Pad > SIMD_SIZE)) {
+        outW = OUT_BLOCK_WIDTH;
+        outH = OUT_BLOCK_HEIGHT;
+    }
+    if (outW * Stride + Pad > SIMD_SIZE) {
+        outW = outH = 4;
+    }
+
+    assert(outW * Stride + Pad <= SIMD_SIZE);
+} // getOutBlock_WH
+
+namespace kernel_selector {
+
+    ParamsKey ConvolutionKernel_imad_3x3::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::UINT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::UINT8);
+        k.EnableInputWeightsType(WeightsType::INT8);
+        k.EnableInputWeightsType(WeightsType::UINT8);
+        k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
+        k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
+        k.EnableDifferentInputWeightsTypes();
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableDilation();
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        k.DisableTuning();
+        return k;
+    }
+
+    KernelsData
+    ConvolutionKernel_imad_3x3::GetKernelsData(
+                                    const Params&          params,
+                                    const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options);
+    }
+
+    JitConstants
+    ConvolutionKernel_imad_3x3::GetJitConstants(
+                                    const convolution_params& params,
+                                    const DispatchData&       kd) const
+    {
+        auto mem_consts = Parent::GetJitConstants(params, kd);
+
+        const auto& input = params.inputs[0];
+        const auto& output = params.output;
+
+        const auto& iDims   = input.GetDims();
+        const auto& oDims = output.GetDims();
+        const auto& weights = params.weights;
+        const auto& wDims   = weights.GetDims();
+        const int iX  = DataTensor::Channelndex(
+                            input.GetLayout(), Tensor::DataChannelName::X);
+        const int iY  = DataTensor::Channelndex(
+                            input.GetLayout(), Tensor::DataChannelName::Y);
+        const int iB  = DataTensor::Channelndex(
+                            input.GetLayout(), Tensor::DataChannelName::BATCH);
+        const int iF  = DataTensor::Channelndex(
+                            input.GetLayout(), Tensor::DataChannelName::FEATURE);
+        const int wOD = WeightsTensor::Channelndex(
+                            weights.GetLayout(), Tensor::WeightsChannelName::OFM);
+        const int oX = DataTensor::Channelndex(
+            output.GetLayout(), Tensor::DataChannelName::X);
+        const int oY = DataTensor::Channelndex(
+            output.GetLayout(), Tensor::DataChannelName::Y);
+        mem_consts.AddConstants({
+            MakeJitConstant("_IMAD_DEFINES",   1),
+            //MakeJitConstant("SCALE_FACTOR",     m_ScaleFactor), //(255.0f / 700000.0f);
+            MakeJitConstant("_IW",              iDims[iX].v),
+            MakeJitConstant("_IH",              iDims[iY].v),
+            MakeJitConstant("_ID",              RoundUp(iDims[iF].v, 4)),
+            MakeJitConstant("IWPAD",            iDims[iX].pad.before + iDims[iX].pad.after),
+            MakeJitConstant("IHPAD",            iDims[iY].pad.before + iDims[iY].pad.after),
+            MakeJitConstant("_OW",              oDims[oX].v),
+            MakeJitConstant("_OH",              oDims[oY].v),
+            MakeJitConstant("_OD",              wDims[wOD].v),
+            MakeJitConstant("OWPAD",            oDims[oX].pad.before + oDims[oX].pad.after),
+            MakeJitConstant("OHPAD",            oDims[oY].pad.before + oDims[oY].pad.after),
+            MakeJitConstant("SIMD_SIZE",        SIMD_SIZE),
+            MakeJitConstant("K_HEIGHT",         wDims[iY].v),
+            MakeJitConstant("K_WIDTH",          wDims[iX].v),
+            MakeJitConstant("K_STRIDE",         params.stride.x), // X and Y must be equal
+            MakeJitConstant("BATCH_SIZE",       iDims[iB].v),
+            MakeJitConstant("WORKGROUP_SIZE",   "SIMD_SIZE"),
+        });
+
+        size_t obw, obh;
+        getOutBlock_WH(iDims[iX].v, params.stride.x, iDims[iX].pad.before + iDims[iX].pad.after,
+                       obw, obh);
+        mem_consts.AddConstants({
+            MakeJitConstant("OUT_BLOCK_WIDTH",  obw),
+            MakeJitConstant("OUT_BLOCK_HEIGHT", obh)
+        });
+
+        // FM_TILE definition
+        mem_consts.AddConstants({
+            MakeJitConstant("IMAD_LENGTH", 4),
+            MakeJitConstant("SYSTOLIC_DEPTH", 1),
+            MakeJitConstant("FM_TILE", "(IMAD_LENGTH * SYSTOLIC_DEPTH)")
+        });
+
+        if (input.GetDType() == Datatype::UINT8) {
+            // For unsigned types IMAD convolution kernel should skip
+            // all negative values.
+            mem_consts.AddConstants({
+                MakeJitConstant("CONVO_UNSIGNED", 1)
+            });
+        }
+
+        if (params.output.GetLayout() != DataLayout::b_fs_yx_fsv4) {
+            mem_consts.AddConstants({
+                // Produce unswizzelled results.
+                MakeJitConstant("TO_UNSWIZZLE", 1),
+            });
+        }
+
+        return mem_consts;
+
+    } // GetJitConstants
+
+
+    ConvolutionKernelBase::DispatchData ConvolutionKernel_imad_3x3::SetDefault(
+                                               const convolution_params& params,
+                                               int) const
+    {
+        DispatchData kd;
+
+        const auto& in      = params.inputs[0];
+        const auto& weights = params.weights;
+        const auto& iDims   = in.GetDims();
+        const auto& wDims   = weights.GetDims();
+        const int iX  = DataTensor::Channelndex(
+                            in.GetLayout(), Tensor::DataChannelName::X);
+        const int iY  = DataTensor::Channelndex(
+                            in.GetLayout(), Tensor::DataChannelName::Y);
+        const int iB  = DataTensor::Channelndex(
+                            in.GetLayout(), Tensor::DataChannelName::BATCH);
+        const int wOD = WeightsTensor::Channelndex(
+                            weights.GetLayout(), Tensor::WeightsChannelName::OFM);
+
+        size_t otw, oth;
+        getOutBlock_WH(iDims[iX].v, params.stride.x, iDims[iX].pad.before + iDims[iX].pad.after,
+                       otw, oth);
+
+        std::vector<size_t> global = {
+            //globalRange[0] = ((_IW / K_STRIDE) + (OTW - 1)) / OTW;
+            // number of tiles needed to cover output width
+            (((iDims[iX].v / params.stride.x) + (otw - 1)) / otw),
+
+            //globalRange[1] = ((_IH / K_STRIDE) + (OTH - 1)) / OTH;
+            // number of tiles needed to cover output height
+            (((iDims[iY].v / params.stride.y) + (oth - 1)) / oth),
+
+            // globalRange[2] = (_OD * _B) + ((_B *_OD) % __WORKGROUP_SIZE);
+            // round depth range up
+            ((wDims[wOD].v * iDims[iB].v) + ((wDims[wOD].v * iDims[iB].v) % SIMD_SIZE))
+        };
+
+        std::vector<size_t> local = {1, 1, SIMD_SIZE};
+
+        kd.gws0 = global[0];
+        kd.gws1 = global[1];
+        kd.gws2 = global[2];
+
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        kd.cldnnStyle = { 0 };
+        kd.gemmStyle  = { 0 };
+        kd.effiency   = FORCE_PRIORITY_1;
+
+        return kd;
+
+    } // SetDefault
+
+    bool
+    ConvolutionKernel_imad_3x3::Validate(
+            const Params&          params,
+            const optional_params& options) const
+    {
+        if (!Parent::Validate(params, options))
+        {
+            return false;
+        }
+
+        KernelData kd = KernelData::Default<convolution_params>(params);
+        convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
+
+        if (newParams.stride.x != newParams.stride.y) {
+            // Strides must be equial
+            return false;
+        }
+        else if ((newParams.filterSize.x != m_FilterSizeX) ||
+                 (newParams.filterSize.y != m_FilterSizeY)) {
+            // Kernel does not support such filter size
+            return false;
+        }
+        else {
+            const auto& in = newParams.inputs[0];
+            const auto& iDims = in.GetDims();
+            const int iX = DataTensor::Channelndex(
+                in.GetLayout(), Tensor::DataChannelName::X);
+            if (iDims[iX].v % OUT_BLOCK_THRESHOLD != 0) {
+                // Input size must be multiple of OUT_BLOCK_THRESHOLD
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    KernelsData
+    ConvolutionKernel_imad_3x3::GetCommonKernelsData(
+                                const Params&          params,
+                                const optional_params& options,
+                                const std::string      exeMode,
+                                int                    autoTuneIndex) const
+    {
+        if (!Validate(params, options))
+        {
+            return{};
+        }
+
+        KernelData kd = KernelData::Default<convolution_params>(params);
+        convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
+        DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
+        if (!CheckWorkGroups(runInfo))
+        {
+            // Internal Error - wrong calculation of global/local work group sizes
+            return{};
+        }
+
+        bool succeed = UpdateWeightsParams(
+            newParams,
+            options,
+            GetSupportedWeightLayouts(newParams),
+            kd.weightsReorderParams,
+            GetSupportedKey());
+
+        if (!succeed)
+        {
+            return{};
+        }
+
+        auto finalKernelName = GetKernelName(newParams);
+        auto cldnnJit = GetJitConstants(newParams, runInfo);
+        auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
+        auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
+
+        auto& kernel = kd.kernels[0];
+        FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entryPoint, exeMode, true, !newParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
+
+        kd.estimatedTime = runInfo.effiency;
+        kd.autoTuneIndex = autoTuneIndex;
+
+        return{ kd };
+
+    } // GetCommonKernelsData
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_3x3.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_3x3.h
new file mode 100644
index 000000000..a25588350
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_3x3.h
@@ -0,0 +1,58 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+
+namespace kernel_selector {
+
+    class ConvolutionKernel_imad_3x3 : public ConvolutionKernelBase
+    {
+    public:
+        using Parent = ConvolutionKernelBase;
+        ConvolutionKernel_imad_3x3() : ConvolutionKernelBase("convolution_gpu_imad") {}
+        ConvolutionKernel_imad_3x3(size_t FilterSizeX, size_t FilterSizeY)
+            : ConvolutionKernelBase("convolution_gpu_imad"),
+              m_FilterSizeX(FilterSizeX),
+              m_FilterSizeY(FilterSizeY) {}
+        virtual ~ConvolutionKernel_imad_3x3() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params& options, const std::string exeMode = DEFAULT, int autoTuneIndex = -1) const;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        virtual bool Validate(const Params& params, const optional_params& options) const override;
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+        DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
+
+        std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
+        {
+            return{
+                WeightsLayout::os_is_yx_osv16_isv4
+            };
+        }
+
+    protected:
+        // This class is base one for several similar classes with different
+        // filter sizes. That's why the actual filters sizes must be explicitly
+        // specified.
+        size_t m_FilterSizeX = 3;
+        size_t m_FilterSizeY = 3;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_7x7.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_7x7.cpp
new file mode 100644
index 000000000..9a6da693e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_7x7.cpp
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_imad_7x7.h"
+#include "kernel_selector_utils.h"
+#include "common_tools.h"
+
+namespace kernel_selector {
+
+    JitConstants
+        ConvolutionKernel_imad_7x7::GetJitConstants(
+            const convolution_params& params,
+            const DispatchData&       kd) const
+    {
+        auto mem_consts = Parent::GetJitConstants(params, kd);
+
+        mem_consts.AddConstants({
+            // Block reading optimization is implemented for 3x3 only.
+            // For 7x7 it should be disabled.
+            MakeJitConstant("NON_BLOCK_LOAD", 1),
+        });
+        return mem_consts;
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_7x7.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_7x7.h
new file mode 100644
index 000000000..0e268ffc5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad_7x7.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_imad_3x3.h"
+
+namespace kernel_selector {
+
+    // TODO Currently the best 7x7 IMAD convolution kernel is not completely done.
+    // Temporary solution to implement 7x7 using 3x3 IMAD convolution kernel with a
+    // little modifications.
+    class ConvolutionKernel_imad_7x7 : public ConvolutionKernel_imad_3x3
+    {
+    public:
+        using Parent = ConvolutionKernel_imad_3x3;
+        ConvolutionKernel_imad_7x7() : ConvolutionKernel_imad_3x3(7, 7) {}
+        virtual ~ConvolutionKernel_imad_7x7() {}
+
+    protected:
+        // For 3x3 based IMAD convolution only 'GetJitConstants' method is required
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.cpp
new file mode 100644
index 000000000..9b141ec4c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.cpp
@@ -0,0 +1,187 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    static const size_t _SG_TILE_M = 32;
+    static const size_t _SG_TILE_N = 32;
+    static const size_t _SG_SIZE = 8; // sub group size
+    static const size_t _TILES_PER_SG_X = 1; // Persistent threads
+    static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
+
+	ParamsKey ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetSupportedKey() const
+	{
+		ParamsKey k;
+		k.EnableInputDataType(Datatype::INT8);
+		k.EnableOutputDataType(Datatype::INT8);
+		k.EnableInputWeightsType(WeightsType::INT8);
+		k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+		k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+		k.EnableTensorOffset();
+		k.EnableTensorPitches();
+		k.EnableBiasPerFeature();
+		k.EnableBatching();
+		k.EnableInt8Quantization();
+		k.EnableOutputCalibration();
+		k.DisableTuning();
+		return k;
+	}
+
+	bool ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::Validate(const Params& p, const optional_params& o) const
+	{
+		if (!ConvolutionKernelBase::Validate(p, o) ||
+			!CovolutionCheckInput(p, o))
+		{
+			return false;
+		}
+
+		const convolution_params& cp = static_cast<const convolution_params&>(p);
+		
+        // make sure it's 1x1 conv
+        if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
+            return false;
+
+        // make sure stride is 1x1
+        if (cp.stride.x != 1 || cp.stride.y != 1)
+            return false;
+
+        // input padding not supported
+        if (cp.inputs[0].X().pad.Total() != 0 ||
+            cp.inputs[0].Y().pad.Total() != 0 ||
+            cp.inputs[0].Feature().pad.Total() != 0 ||
+            cp.inputs[0].Batch().pad.Total() != 0)
+            return false;
+
+        // input and output spatial sizes must match
+        if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
+            return false;
+
+		const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v ;
+		const auto k = cp.inputs[0].Feature().v;
+		const auto n = cp.output.Feature().v ;
+
+		if (m % 32 != 0 && m % 128 != 0)  // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M=128
+			return false;
+	
+		if (k % 32 != 0)  // Matrix size K, Must be mutliple of 32
+			return false;
+		
+		if (n % 32 != 0 && n % 128 != 0) // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N=128
+			return false;
+		
+		return true;
+	}
+
+
+	ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::SetDefault(const convolution_params& arg, int) const
+	{
+		DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+		runInfo.effiency = FORCE_PRIORITY_1;
+
+		size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
+		size_t mat_n = arg.output.Feature().v;
+		
+		size_t _MATRIX_M = mat_m;
+		size_t _MATRIX_N = mat_n;
+		
+		size_t _WG_TILE_M = 128;
+		size_t _WG_TILE_N = 128;
+
+		// Calculate number of threads needed
+		const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
+		const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y  ;
+
+		// Define execution setup for kernel:
+		size_t globalWorkSize[3] = { threadsX, threadsY, 1 };
+		size_t localWorkSize[3] = { _SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1 };
+		
+		runInfo.gws0 = globalWorkSize[0];
+		runInfo.gws1 = globalWorkSize[1];
+		runInfo.gws2 = globalWorkSize[2]; 
+		
+		runInfo.lws0 = localWorkSize[0];
+		runInfo.lws1 = localWorkSize[1];
+		runInfo.lws2 = localWorkSize[2];
+
+		return runInfo;
+	}
+
+	JitConstants ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+	{
+		auto jit = Parent::GetJitConstants(params, runInfo);
+
+		jit.AddConstant(MakeJitConstant("WG_TILE_M", 128));          // Work-Group tile size M, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("WG_TILE_N", 128));          // Work-Group tile size N, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", 1));       // Persistent threads
+		jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", 1));       // Persistent threads
+
+		// Do not change values below
+        jit.AddConstant(MakeJitConstant("DIM_X", 0));
+        jit.AddConstant(MakeJitConstant("DIM_Y", 1));
+        jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
+        jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
+        jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
+        jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
+        jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
+        jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
+        jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
+        jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
+
+		jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
+		jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
+		jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
+
+		const auto& input = params.inputs[0];
+		const auto& output = params.output;
+
+		auto m = output.X().v * output.Y().v * output.Batch().v;
+		auto k = input.Feature().v;
+		auto n = output.Feature().v;
+
+		jit.AddConstant(MakeJitConstant("MATRIX_M", m));
+		jit.AddConstant(MakeJitConstant("MATRIX_K", k));
+		jit.AddConstant(MakeJitConstant("MATRIX_N", n));
+
+        const size_t out_x_pitch = 32 * 4;
+        const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
+        const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
+        const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
+        const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
+
+        bool out_padding = output.X().pad.Total() != 0 || output.Y().pad.Total() != 0;
+        jit.AddConstant(MakeJitConstant("OUT_WITH_PADDING", out_padding));
+
+		return jit;
+	}
+
+	KernelsData ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8::GetKernelsData(const Params& params, const optional_params& options) const
+	{
+		KernelsData kd = GetCommonKernelsData(params, options);
+		if (!kd.empty())
+			kd[0].estimatedTime = FORCE_PRIORITY_1; //_3 
+		return kd;
+	}
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h
new file mode 100644
index 000000000..6be47486e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h
@@ -0,0 +1,45 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+
+namespace kernel_selector {
+
+	class ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8 : public ConvolutionKernelBase
+	{
+	public:
+		using Parent = ConvolutionKernelBase;
+        ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8() : ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_128x128wg_slm_int8") {}
+	
+		virtual ~ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8() {}
+
+		virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+	protected:
+		virtual ParamsKey GetSupportedKey() const override;
+		bool Validate(const Params& p, const optional_params& o) const override;
+		JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+		DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+		virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
+		{
+			return{
+				WeightsLayout::is_o32_yx_isv32_swizzled_by_4,
+			};
+		}
+	};
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.cpp
new file mode 100644
index 000000000..5e84d7f4f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.cpp
@@ -0,0 +1,187 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    static const size_t _SG_TILE_M = 32;
+    static const size_t _SG_TILE_N = 32;
+    static const size_t _SG_SIZE = 8; // sub group size
+    static const size_t _TILES_PER_SG_X = 1; // Persistent threads
+    static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
+
+    ParamsKey ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetSupportedKey() const
+	{
+		ParamsKey k;
+		k.EnableInputDataType(Datatype::INT8);
+		k.EnableOutputDataType(Datatype::INT8);
+		k.EnableInputWeightsType(WeightsType::INT8);
+		k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+		k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+		k.EnableTensorOffset();
+		k.EnableTensorPitches();
+		k.EnableBiasPerFeature();
+		k.EnableBatching();
+		k.EnableInt8Quantization();
+		k.EnableOutputCalibration();
+		k.DisableTuning();
+		return k;
+	}
+
+	bool ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::Validate(const Params& p, const optional_params& o) const
+	{
+		if (!ConvolutionKernelBase::Validate(p, o) ||
+			!CovolutionCheckInput(p, o))
+		{
+			return false;
+		}
+
+		const convolution_params& cp = static_cast<const convolution_params&>(p);
+		
+        // make sure it's 1x1 conv
+        if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
+            return false;
+
+        // make sure stride is 1x1
+        if (cp.stride.x != 1 || cp.stride.y != 1)
+            return false;
+
+        // input padding not supported
+        if (cp.inputs[0].X().pad.Total() != 0 ||
+            cp.inputs[0].Y().pad.Total() != 0 ||
+            cp.inputs[0].Feature().pad.Total() != 0 ||
+            cp.inputs[0].Batch().pad.Total() != 0)
+            return false;
+
+        // input and output spatial sizes must match
+        if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
+            return false;
+
+		const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v ;
+		const auto k = cp.inputs[0].Feature().v;
+		const auto n = cp.output.Feature().v ;
+
+		if (m % 32 != 0 && m % 224 != 0)  // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M=128
+			return false;
+	
+		if (k % 32 != 0)  // Matrix size K, Must be mutliple of 32
+			return false;
+		
+		if (n % 32 != 0 && n % 128 != 0) // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N=128
+			return false;
+
+		return true;
+	}
+
+
+	ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::SetDefault(const convolution_params& arg, int) const
+	{
+		DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+		runInfo.effiency = FORCE_PRIORITY_1;
+
+		size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
+		size_t mat_n = arg.output.Feature().v;
+		
+		size_t _MATRIX_M = mat_m;
+		size_t _MATRIX_N = mat_n;
+		
+		size_t _WG_TILE_M = 224;
+		size_t _WG_TILE_N = 128;
+
+		// Calculate number of threads needed
+		const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
+		const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y  ;
+
+        // Define execution setup for kernel:
+		size_t globalWorkSize[3] = { threadsX, threadsY, 1 };
+		size_t localWorkSize[3] = { _SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1 };
+
+		runInfo.gws0 = globalWorkSize[0];
+		runInfo.gws1 = globalWorkSize[1];
+		runInfo.gws2 = globalWorkSize[2]; 
+
+		runInfo.lws0 = localWorkSize[0];
+		runInfo.lws1 = localWorkSize[1];
+		runInfo.lws2 = localWorkSize[2]; 
+
+		return runInfo;
+	}
+
+	JitConstants ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+	{
+		auto jit = Parent::GetJitConstants(params, runInfo);
+
+		jit.AddConstant(MakeJitConstant("WG_TILE_M", 224));          // Work-Group tile size M, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("WG_TILE_N", 128));          // Work-Group tile size N, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", _TILES_PER_SG_X));
+		jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", _TILES_PER_SG_Y));
+
+		// Do not change values below
+		jit.AddConstant(MakeJitConstant("DIM_X", 0));
+		jit.AddConstant(MakeJitConstant("DIM_Y", 1));
+		jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
+		jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
+		jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
+		jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
+		jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
+		jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
+		jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
+		jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
+
+		jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
+		jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
+		jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
+
+		const auto& input = params.inputs[0];
+		const auto& output = params.output;
+
+		auto m = output.X().v * output.Y().v * output.Batch().v;
+		auto k = input.Feature().v;
+		auto n = output.Feature().v;
+
+		jit.AddConstant(MakeJitConstant("MATRIX_M", m)); // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M
+		jit.AddConstant(MakeJitConstant("MATRIX_K", k)); // Matrix size K, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("MATRIX_N", n)); // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N
+
+        const size_t out_x_pitch = 32 * 4;
+        const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
+        const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
+        const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
+        const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
+
+        bool out_padding = output.X().pad.Total() != 0 || output.Y().pad.Total() != 0;
+        jit.AddConstant(MakeJitConstant("OUT_WITH_PADDING", out_padding));
+        
+        return jit;
+	}
+
+	KernelsData ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8::GetKernelsData(const Params& params, const optional_params& options) const
+	{
+        KernelsData kd = GetCommonKernelsData(params, options);
+        if (!kd.empty())
+			kd[0].estimatedTime = FORCE_PRIORITY_1; //_3 
+		return kd;
+	}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h
new file mode 100644
index 000000000..dd5793748
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h
@@ -0,0 +1,45 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+
+namespace kernel_selector {
+
+	class ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8 : public ConvolutionKernelBase
+	{
+	public:
+		using Parent = ConvolutionKernelBase;
+        ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8() : ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_224x128wg_slm_int8") {}
+		
+		virtual ~ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8() {}
+
+		virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+	protected:
+		virtual ParamsKey GetSupportedKey() const override;
+		bool Validate(const Params& p, const optional_params& o) const override;
+		JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+		DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+		virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
+		{
+			return{
+				WeightsLayout::is_o32_yx_isv32_swizzled_by_4,
+			};
+		}
+	};
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_slm_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_slm_int8.cpp
new file mode 100644
index 000000000..b9a6e183b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_slm_int8.cpp
@@ -0,0 +1,184 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_mmad_32x32sg_slm_int8.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    static const size_t _SG_TILE_M = 32;
+    static const size_t _SG_TILE_N = 32;
+    static const size_t _SG_SIZE = 8; // sub group size
+    static const size_t _TILES_PER_SG_X = 1; // Persistent threads
+    static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
+
+    ParamsKey ConvolutionKernel_mmad_32x32sg_slm_int8::GetSupportedKey() const
+	{
+		ParamsKey k;
+		k.EnableInputDataType(Datatype::INT8);
+		k.EnableOutputDataType(Datatype::INT8);
+		k.EnableInputWeightsType(WeightsType::INT8);
+		k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+		k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+		k.EnableTensorOffset();
+		k.EnableTensorPitches();
+		k.EnableBiasPerFeature();
+		k.EnableBatching();
+		k.EnableInt8Quantization();
+		k.EnableOutputCalibration();
+		k.DisableTuning();
+		return k;
+	}
+
+	bool ConvolutionKernel_mmad_32x32sg_slm_int8::Validate(const Params& p, const optional_params& o) const
+	{
+		if (!ConvolutionKernelBase::Validate(p, o) ||
+			!CovolutionCheckInput(p, o))
+		{
+			return false;
+		}
+
+		const convolution_params& cp = static_cast<const convolution_params&>(p);
+		
+        // make sure it's 1x1 conv
+        if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
+            return false;
+
+        // make sure stride is 1x1
+        if (cp.stride.x != 1 || cp.stride.y != 1)
+            return false;
+
+        // input padding not supported
+        if (cp.inputs[0].X().pad.Total() != 0 ||
+            cp.inputs[0].Y().pad.Total() != 0 ||
+            cp.inputs[0].Feature().pad.Total() != 0 ||
+            cp.inputs[0].Batch().pad.Total() != 0)
+            return false;
+
+        // input and output spatial sizes must match
+        if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
+            return false;
+
+		const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v ;
+		const auto k = cp.inputs[0].Feature().v;
+		const auto n = cp.output.Feature().v ;
+
+		if (m % 32 != 0)  // Matrix size M, Must be mutliple of 32
+			return false;
+	
+		if (k % 32 != 0)  // Matrix size K, Must be multiple of 32
+			return false;
+		
+		if (n % 32 != 0) // Matrix size N, Must be mutliple of 32
+			return false;
+
+		return true;
+	}
+
+
+	ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_32x32sg_slm_int8::SetDefault(const convolution_params& arg, int) const
+	{
+		DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+		runInfo.effiency = FORCE_PRIORITY_2;
+
+		size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
+		size_t mat_n = arg.output.Feature().v;
+		
+		size_t _MATRIX_M = mat_m;
+		size_t _MATRIX_N = mat_n;
+		
+		size_t _WG_TILE_M = 32;
+		size_t _WG_TILE_N = 32;
+
+		// Calculate number of threads needed
+		const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
+		const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y  ;
+
+        // Define execution setup for kernel:
+		size_t globalWorkSize[3] = { threadsX, threadsY, 1 };
+		size_t localWorkSize[3] = { _SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1 };
+
+		runInfo.gws0 = globalWorkSize[0];
+		runInfo.gws1 = globalWorkSize[1];
+		runInfo.gws2 = globalWorkSize[2]; 
+
+		runInfo.lws0 = localWorkSize[0];
+		runInfo.lws1 = localWorkSize[1];
+		runInfo.lws2 = localWorkSize[2]; 
+
+		return runInfo;
+	}
+
+	JitConstants ConvolutionKernel_mmad_32x32sg_slm_int8::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+	{
+		auto jit = Parent::GetJitConstants(params, runInfo);
+
+		jit.AddConstant(MakeJitConstant("WG_TILE_M", 32));          // Work-Group tile size M, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("WG_TILE_N", 32));          // Work-Group tile size N, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", _TILES_PER_SG_X));
+		jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", _TILES_PER_SG_Y));
+
+		// Do not change values below
+		jit.AddConstant(MakeJitConstant("DIM_X", 0));
+		jit.AddConstant(MakeJitConstant("DIM_Y", 1));
+		jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
+		jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
+		jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
+		jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
+		jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
+		jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
+		jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
+		jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
+
+		jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
+		jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
+		jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
+
+		const auto& input = params.inputs[0];
+		const auto& output = params.output;
+
+		auto m = output.X().v * output.Y().v * output.Batch().v;
+		auto k = input.Feature().v;
+		auto n = output.Feature().v;
+
+		jit.AddConstant(MakeJitConstant("MATRIX_M", m)); // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M
+		jit.AddConstant(MakeJitConstant("MATRIX_K", k)); // Matrix size K, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("MATRIX_N", n)); // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N
+
+        const size_t out_x_pitch = 32 * 4;
+        const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
+        const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
+        const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
+        const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
+
+		return jit;
+	}
+
+	KernelsData ConvolutionKernel_mmad_32x32sg_slm_int8::GetKernelsData(const Params& params, const optional_params& options) const
+	{
+		KernelsData kd = GetCommonKernelsData(params, options);
+		if (!kd.empty())
+			kd[0].estimatedTime = FORCE_PRIORITY_2; //_3 
+		return kd;
+	}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_slm_int8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_slm_int8.h
new file mode 100644
index 000000000..448a657d1
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_32x32sg_slm_int8.h
@@ -0,0 +1,45 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+
+namespace kernel_selector {
+
+	class ConvolutionKernel_mmad_32x32sg_slm_int8 : public ConvolutionKernelBase
+	{
+	public:
+		using Parent = ConvolutionKernelBase;
+        ConvolutionKernel_mmad_32x32sg_slm_int8() : ConvolutionKernelBase("convolution_gpu_mmad_32x32sg_slm_int8") {}
+		
+		virtual ~ConvolutionKernel_mmad_32x32sg_slm_int8() {}
+
+		virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+	protected:
+		virtual ParamsKey GetSupportedKey() const override;
+		bool Validate(const Params& p, const optional_params& o) const override;
+		JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+		DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+		virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
+		{
+			return{
+				WeightsLayout::is_o_yx_isv32,
+			};
+		}
+	};
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp
index ce73392ac..178c0781b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_mmad_batched.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
     
@@ -49,7 +48,7 @@ namespace kernel_selector {
         const auto of_maps = arg.output.Feature().v;
         const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
 
-        runInfo.effiency = FORCE_PRIORITY_3;
+        runInfo.effiency = FORCE_PRIORITY_6;
 
         runInfo.gws0 = arg.output.X().v;
         runInfo.gws1 = arg.output.Y().v;
@@ -89,9 +88,9 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        KernelsData kd = GetCommonKernelsData(params, options);
+        KernelsData kd = GetTunedKernelsDataByIndex(params, options);
         if(!kd.empty())
-            kd[0].estimatedTime = FORCE_PRIORITY_3;
+            kd[0].estimatedTime = FORCE_PRIORITY_6;
         return kd;
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h
index 8a3dda451..366ceb4d7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched.h
@@ -28,9 +28,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_mmad_batched() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
@@ -40,4 +40,4 @@ namespace kernel_selector {
             };
         }
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block.cpp
new file mode 100644
index 000000000..716b89596
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block.cpp
@@ -0,0 +1,157 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_mmad_batched_block.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+    
+    ParamsKey ConvolutionKernel_mmad_batched_block::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableInputWeightsType(WeightsType::INT8);
+        k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBiasPerFeature();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        k.DisableTuning();
+        return k;
+    }
+
+    struct block_params
+    {
+        int32_t out_width;
+        int32_t out_height;
+        int32_t out_depth;
+    };
+
+    static block_params get_out_block_size(const convolution_params& p)
+    {
+        if (p.filterSize.x == 3 && p.filterSize.y == 3)
+        {
+            if (p.output.X().v == 7)
+                return { 7, 1, 4 };
+            else if (p.output.X().v == 14)
+                return { 7, 1, 4 };
+            else if (p.output.X().v == 28)
+                return { 7, 1, 4 };
+            else if (p.output.X().v == 56)
+                return { 8, 1, 4 };
+        }
+
+        return { 1,1,1 };
+    }
+
+    std::vector<WeightsLayout> ConvolutionKernel_mmad_batched_block::GetSupportedWeightLayouts(const convolution_params& cp) const
+    {
+        auto block = get_out_block_size(cp);
+        if (block.out_depth == 4)
+            return { WeightsLayout::os_is_yx_isa8_osv8_isv4_swizzled_by_4 };
+        else
+            return { WeightsLayout::os_is_yx_isa8_osv8_isv4 };
+    }
+
+    bool ConvolutionKernel_mmad_batched_block::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!ConvolutionKernelBase::Validate(p, o) ||
+            !CovolutionCheckInput(p, o))
+        {
+            return false;
+        }
+        const convolution_params& cp = static_cast<const convolution_params&>(p);
+
+        // if block sizes are 1x1, then this algorithm is probably not the best
+        auto block = get_out_block_size(cp);
+        if (block.out_width == 1 && block.out_height == 1)
+            return false;
+
+        if (cp.output.X().v % block.out_width != 0)
+            return false;
+        if (cp.output.Y().v % block.out_height != 0)
+            return false;
+
+        if (cp.filterSize.x == 1)
+            return false;
+
+        return true;
+    }
+
+    ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_batched_block::SetDefault(const convolution_params& arg, int) const
+    {
+        DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+        constexpr size_t sub_group_size = 8;
+
+        runInfo.effiency = FORCE_PRIORITY_5;
+
+        auto block = get_out_block_size(arg);
+
+        runInfo.gws0 = arg.output.X().v / block.out_width;
+        runInfo.gws1 = arg.output.Y().v / block.out_height;
+        runInfo.gws2 = (arg.output.Feature().v) * ((arg.output.Batch().v+3) / 4) / block.out_depth; // process 4 output channels per Workitem
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = sub_group_size;
+
+        return runInfo;
+    }
+
+    JitConstants ConvolutionKernel_mmad_batched_block::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+    {
+        auto jit = Parent::GetJitConstants(params, runInfo);
+
+        jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
+
+        // pitch for special block format used in this kernel
+        const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
+        const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
+        jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
+
+        const size_t in_x_pitch = 32 * 4;
+        const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+        const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+        const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+        const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+        jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+        jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+        auto block = get_out_block_size(params);
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block.out_width));
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_HEIGHT", block.out_height));
+        jit.AddConstant(MakeJitConstant("WEIGHTS_PER_WORKITEM", block.out_depth));
+
+        return jit;
+    }
+
+    KernelsData ConvolutionKernel_mmad_batched_block::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelsData kd = GetCommonKernelsData(params, options);
+        if(!kd.empty())
+            kd[0].estimatedTime = FORCE_PRIORITY_5;
+        return kd;
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block.h
new file mode 100644
index 000000000..590287856
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block.h
@@ -0,0 +1,39 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+ 
+namespace kernel_selector {
+    
+    class ConvolutionKernel_mmad_batched_block : public ConvolutionKernelBase
+    {
+    public:
+        using Parent = ConvolutionKernelBase;
+        ConvolutionKernel_mmad_batched_block() : ConvolutionKernelBase("convolution_gpu_mmad_batched_block") {}
+        virtual ~ConvolutionKernel_mmad_batched_block() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+        DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+        virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block_1x1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block_1x1.cpp
new file mode 100644
index 000000000..1d7987218
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block_1x1.cpp
@@ -0,0 +1,159 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_mmad_batched_block_1x1.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    ParamsKey ConvolutionKernel_mmad_batched_block_1x1::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableInputWeightsType(WeightsType::INT8);
+        k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBiasPerFeature();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        k.DisableTuning();
+        return k;
+    }
+
+    struct block_params
+    {
+        int32_t out_width;
+        int32_t out_height;
+        int32_t out_depth;
+    };
+
+    static block_params get_out_block_size(const convolution_params& p)
+    {
+        if (p.output.X().v == 7)
+            return { 7,1,4 };
+        else if (p.output.X().v == 14)
+            return { 7,1,4 };
+        else if (p.output.X().v == 28)
+            return { 4,2,4 };
+        else if (p.output.X().v == 56)
+            return { 8,1,4 };
+
+        return { 1,1,1 };
+    }
+
+    std::vector<WeightsLayout> ConvolutionKernel_mmad_batched_block_1x1::GetSupportedWeightLayouts(const convolution_params& cp) const
+    {
+        auto block = get_out_block_size(cp);
+        if (block.out_depth == 4)
+            return { WeightsLayout::os_is_yx_isa8_osv8_isv4_swizzled_by_4 };
+        else
+            return { WeightsLayout::os_is_yx_isa8_osv8_isv4 };
+    }
+
+    bool ConvolutionKernel_mmad_batched_block_1x1::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!ConvolutionKernelBase::Validate(p, o) ||
+            !CovolutionCheckInput(p, o))
+        {
+            return false;
+        }
+        const convolution_params& cp = static_cast<const convolution_params&>(p);
+
+        // only for conv 1x1
+        if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
+            return false;
+
+        // only for stride 1x1
+        if (cp.stride.x != 1 || cp.stride.y != 1)
+            return false;
+
+        // if block sizes are 1x1, then this algorithm is probably not the best
+        auto block = get_out_block_size(cp);
+        if (block.out_depth != 4)
+            return false;
+
+        if (cp.output.X().v % block.out_width != 0)
+            return false;
+        if (cp.output.Y().v % block.out_height != 0)
+            return false;
+
+        return true;
+    }
+
+    ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_batched_block_1x1::SetDefault(const convolution_params& arg, int) const
+    {
+        DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+        constexpr size_t sub_group_size = 8;
+
+        runInfo.effiency = FORCE_PRIORITY_3;
+
+        auto block = get_out_block_size(arg);
+
+        runInfo.gws0 = arg.output.X().v / block.out_width;
+        runInfo.gws1 = arg.output.Y().v / block.out_height;
+        runInfo.gws2 = (arg.output.Feature().v) * ((arg.output.Batch().v + 3) / 4) / block.out_depth; // process 4 output channels per Workitem
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = sub_group_size;
+
+        return runInfo;
+    }
+
+    JitConstants ConvolutionKernel_mmad_batched_block_1x1::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+    {
+        auto jit = Parent::GetJitConstants(params, runInfo);
+
+        jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
+
+        // pitch for special block format used in this kernel
+        const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
+        const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
+        jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
+
+        const size_t in_x_pitch = 32 * 4;
+        const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+        const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+        const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+        const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+        jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+        jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+        auto block = get_out_block_size(params);
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block.out_width));
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_HEIGHT", block.out_height));
+        jit.AddConstant(MakeJitConstant("WEIGHTS_PER_WORKITEM", block.out_depth));
+
+        return jit;
+    }
+
+    KernelsData ConvolutionKernel_mmad_batched_block_1x1::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelsData kd = GetCommonKernelsData(params, options, " -Dcl_intel_subgroups_char");
+        if (!kd.empty())
+            kd[0].estimatedTime = FORCE_PRIORITY_3;
+        return kd;
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block_1x1.h
new file mode 100644
index 000000000..5d3c11a0d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_batched_block_1x1.h
@@ -0,0 +1,39 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+ 
+namespace kernel_selector {
+    
+    class ConvolutionKernel_mmad_batched_block_1x1 : public ConvolutionKernelBase
+    {
+    public:
+        using Parent = ConvolutionKernelBase;
+        ConvolutionKernel_mmad_batched_block_1x1() : ConvolutionKernelBase("convolution_gpu_mmad_batched_block_1x1") {}
+        virtual ~ConvolutionKernel_mmad_batched_block_1x1() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+        DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+        virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_2x14_rep4.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_2x14_rep4.cpp
new file mode 100644
index 000000000..7c660768f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_2x14_rep4.cpp
@@ -0,0 +1,121 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_mmad_slm_2x14_rep4.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+    
+    ParamsKey ConvolutionKernel_mmad_slm_2x14_rep4::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableInputWeightsType(WeightsType::INT8);
+        k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBiasPerFeature();
+        k.EnableBiasPerOutput();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        k.DisableTuning();
+        return k;
+    }
+
+    bool ConvolutionKernel_mmad_slm_2x14_rep4::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!ConvolutionKernelBase::Validate(p, o) ||
+            !CovolutionCheckInput(p, o))
+        {
+            return false;
+        }
+
+        const convolution_params& cp = static_cast<const convolution_params&>(p);
+
+        if (cp.filterSize.x != 3 || cp.filterSize.y != 3)
+            return false;
+
+        if (cp.inputs[0].X().v != 56 || cp.inputs[0].Y().v != 56)
+            return false;
+
+        if (cp.stride.x != 1 || cp.stride.y != 1)
+            return false;
+
+        return true;
+    }
+
+    ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_slm_2x14_rep4::SetDefault(const convolution_params& arg, int) const
+    {
+        DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+        runInfo.effiency = FORCE_PRIORITY_1;
+
+        const size_t rep_count = 4;
+        const size_t batch_per_wi = 1;
+        const size_t out_block_width = 14;
+        const size_t out_block_height = 2;
+        runInfo.gws0 = arg.output.Feature().v * (arg.output.Batch().v / (rep_count * batch_per_wi)); // number of tiles needed to cover output width
+        runInfo.gws1 = ((arg.inputs[0].X().v / arg.stride.x) + (out_block_width - 1)) / out_block_width;
+        runInfo.gws2 = ((arg.inputs[0].Y().v / arg.stride.y) + (out_block_height - 1)) / out_block_height;
+
+        runInfo.lws0 = 32; // depth
+        runInfo.lws1 = 1; // width
+        runInfo.lws2 = 4; // height
+
+        return runInfo;
+    }
+
+    JitConstants ConvolutionKernel_mmad_slm_2x14_rep4::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+    {
+        auto jit = ConvolutionKernelBase::GetJitConstants(params, runInfo);
+
+        jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", 8));
+
+        // pitch for special block format used in this kernel
+        const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
+        const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
+        jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
+
+        const size_t in_x_pitch = 32 * 4;
+        const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+        const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+        const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+        const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+        jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+        jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", 14));
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_HEIGHT", 2));
+        jit.AddConstant(MakeJitConstant("LOCAL_SIZE_X", runInfo.lws0));
+        jit.AddConstant(MakeJitConstant("LOCAL_SIZE_Y", runInfo.lws1));
+        jit.AddConstant(MakeJitConstant("LOCAL_SIZE_Z", runInfo.lws2));
+
+        return jit;
+    }
+
+    KernelsData ConvolutionKernel_mmad_slm_2x14_rep4::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, " -Dcl_intel_subgroups_char");
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_2x14_rep4.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_2x14_rep4.h
new file mode 100644
index 000000000..b158a9803
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_2x14_rep4.h
@@ -0,0 +1,43 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+ 
+namespace kernel_selector {
+    
+    class ConvolutionKernel_mmad_slm_2x14_rep4 : public ConvolutionKernelBase
+    {
+    public:
+        ConvolutionKernel_mmad_slm_2x14_rep4() : ConvolutionKernelBase("convolution_gpu_mmad_slm_2x14_rep4") {}
+        virtual ~ConvolutionKernel_mmad_slm_2x14_rep4() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+        DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
+        {
+            return{
+                WeightsLayout::os_is_yx_isa8_osv8_isv4,
+            };
+        }
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_7x7_rep4.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_7x7_rep4.cpp
new file mode 100644
index 000000000..bf6863f0f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_7x7_rep4.cpp
@@ -0,0 +1,129 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "convolution_kernel_mmad_slm_7x7_rep4.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+    
+    ParamsKey ConvolutionKernel_mmad_slm_7x7_rep4::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableInputWeightsType(WeightsType::INT8);
+        k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBiasPerFeature();
+        k.EnableBiasPerOutput();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        k.DisableTuning();
+        return k;
+    }
+
+    bool ConvolutionKernel_mmad_slm_7x7_rep4::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!ConvolutionKernelBase::Validate(p, o) ||
+            !CovolutionCheckInput(p, o))
+        {
+            return false;
+        }
+
+        const convolution_params& cp = static_cast<const convolution_params&>(p);
+
+        if (cp.filterSize.x != 3 || cp.filterSize.y != 3)
+            return false;
+
+        if (cp.stride.x != 1 || cp.stride.y != 1)
+            return false;
+
+        if (cp.inputs[0].X().v == 7 && cp.inputs[0].Y().v == 7)
+            return true;
+
+        if (cp.inputs[0].X().v == 14 && cp.inputs[0].Y().v == 14)
+            return true;
+
+        return false;
+    }
+
+    ConvolutionKernelBase::DispatchData ConvolutionKernel_mmad_slm_7x7_rep4::SetDefault(const convolution_params& arg, int) const
+    {
+        DispatchData runInfo = ConvolutionKernelBase::SetDefault(arg);
+
+        runInfo.effiency = FORCE_PRIORITY_1;
+
+        const size_t rep_count = 4;
+        const size_t batch_per_wi = 4;
+        const size_t out_block_width = 7;
+        //const size_t out_block_height = 1;
+        runInfo.gws0 = (arg.output.Feature().v * arg.output.Batch().v) / (rep_count * batch_per_wi); // number of tiles needed to cover output width
+        runInfo.gws1 = ((arg.inputs[0].X().v / arg.stride.x) + (out_block_width - 1)) / out_block_width;
+        // since this kernel only apply to 7x7 sizes we need to manually set gws2 to 8
+        runInfo.gws2 = Align(arg.inputs[0].Y().v, 8);//8;//((arg.inputs[0].Y().v / arg.stride.y) + (out_block_height - 1)) / out_block_height;
+
+        runInfo.lws0 = 16; // depth
+        runInfo.lws1 = 1; // width
+        runInfo.lws2 = 8; // height
+
+        return runInfo;
+    }
+
+    JitConstants ConvolutionKernel_mmad_slm_7x7_rep4::GetJitConstants(const convolution_params& params, const DispatchData& runInfo) const
+    {
+        auto jit = ConvolutionKernelBase::GetJitConstants(params, runInfo);
+
+        jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", 8));
+
+        // pitch for special block format used in this kernel
+        const size_t ifm_32_aligned = Align(params.weights.IFM().v, 32);
+        const size_t filter_ofm_block_pitch = (ifm_32_aligned / 32) * params.weights.X().v * params.weights.Y().v * 4 * 8 * 8;
+        jit.AddConstant(MakeJitConstant("FILTER_OFM_BLOCK_PITCH", filter_ofm_block_pitch));
+
+        const size_t in_x_pitch = 32 * 4;
+        const size_t in_y_pitch = 32 * 4 * params.inputs[0].X().LogicalDimPadded();
+        const size_t in_b_block_pitch = in_y_pitch * params.inputs[0].Y().LogicalDimPadded();
+        const size_t in_f_block_pitch = in_b_block_pitch * ((params.inputs[0].Batch().v + 3) / 4);
+        const size_t in_offset = in_x_pitch * params.inputs[0].X().pad.before + in_y_pitch * params.inputs[0].Y().pad.before;
+
+        const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
+
+        jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+        jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+        jit.AddConstant(MakeJitConstant("IN_B_BLOCK_PITCH", in_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_F_BLOCK_PITCH", in_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("IN_OFFSET", in_offset));
+
+        jit.AddConstant(MakeJitConstant("OUT_X_PITCH", in_x_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", 7));
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_HEIGHT", 1));
+        jit.AddConstant(MakeJitConstant("LOCAL_SIZE_X", runInfo.lws0));
+        jit.AddConstant(MakeJitConstant("LOCAL_SIZE_Y", runInfo.lws1));
+        jit.AddConstant(MakeJitConstant("LOCAL_SIZE_Z", 7)); // must be 7 since we process 7 in Y per workgroup
+
+        return jit;
+    }
+
+    KernelsData ConvolutionKernel_mmad_slm_7x7_rep4::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, " -Dcl_intel_subgroups_char");
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_7x7_rep4.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_7x7_rep4.h
new file mode 100644
index 000000000..0bfe238fc
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_mmad_slm_7x7_rep4.h
@@ -0,0 +1,43 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "convolution_kernel_base.h"
+ 
+namespace kernel_selector {
+    
+    class ConvolutionKernel_mmad_slm_7x7_rep4 : public ConvolutionKernelBase
+    {
+    public:
+        ConvolutionKernel_mmad_slm_7x7_rep4() : ConvolutionKernelBase("convolution_gpu_mmad_slm_7x7_rep4") {}
+        virtual ~ConvolutionKernel_mmad_slm_7x7_rep4() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
+        DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
+        {
+            return{
+                WeightsLayout::os_is_yx_isa8_osv8_isv4,
+            };
+        }
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
index aa5850593..c87b2b440 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_selector.cpp
@@ -16,16 +16,18 @@
 
 #include "convolution_kernel_selector.h"
 #include "convolution_kernel_bfyx_ref.h"
+#include "convolution_kernel_bfyx_1x1_opt.h"
 #include "convolution_kernel_bfyx_gemm_like.h"
 #include "convolution_kernel_bfyx_direct_10_12_16.h"
 #include "convolution_kernel_bfyx_os_iyx_osv16.h"
+#include "convolution_kernel_bfyx_os_iyx_osv16_2_sg.h"
 #include "convolution_kernel_yxfb_ref.h"
 #include "convolution_kernel_yxfb_yxio_b16.h"
 #include "convolution_kernel_yxfb_yxio_b8.h"
 #include "convolution_kernel_yxfb_yxio_b1_block.h"
 #include "convolution_kernel_yxfb_yxio_b1_block_multiple_x.h"
 #include "convolution_kernel_tutorial.h"
-#include "convolution_kernel_bfyx_3x3_dw_opt.h"
+//#include "convolution_kernel_bfyx_3x3_dw_opt.h"
 #include "convolution_kernel_winograd_2x3_s1.h"
 #include "convolution_kernel_bfyx_1x1.h"
 #include "convolution_kernel_bfyx_1x1_gemm_buf.h"
@@ -37,23 +39,36 @@
 #include "convolution_kernel_byxf_af32_depthwise.h"
 #include "convolution_kernel_mmad_batched.h"
 #include "convolution_kernel_bfyx_depthwise_weights_lwg.h"
+#include "convolution_kernel_mmad_slm_2x14_rep4.h"
+#include "convolution_kernel_mmad_slm_7x7_rep4.h"
+#include "convolution_kernel_byxf_fs_bs_yx_bsv4_fsv32.h"
+#include "convolution_kernel_mmad_batched_block.h"
+#include "convolution_kernel_mmad_batched_block_1x1.h"
+#include "convolution_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
+#include "convolution_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
+#include "convolution_kernel_mmad_32x32sg_slm_int8.h"
+#include "convolution_kernel_byx8_f4__fs_bs_yx_bsv4_fsv32.h"
+#include "convolution_kernel_imad_3x3.h"
+#include "convolution_kernel_imad_1x1.h"
+#include "convolution_kernel_imad_7x7.h"
 
-#include <iostream>
- 
 namespace kernel_selector 
 {
     convolution_kernel_selector::convolution_kernel_selector()
     {
         Attach<ConvolutionKernel_bfyx_Ref>();
+        Attach<convolution_kernel_bfyx_1x1_opt>();
         Attach<ConvolutionKernel_bfyx_GEMMLike>();
         Attach<ConvolutionKernel_bfyx_Direct_10_10_12>();
         Attach<ConvolutionKernel_bfyx_os_iyx_osv16>();
+        // commented out to not get in our way, will enable in future after autotuning
+//        Attach<ConvolutionKernel_bfyx_os_iyx_osv16_2_sg>();
         Attach<ConvolutionKernel_yxfb_Ref>();
         Attach<ConvolutionKernel_yxfb_yxio_b16>();
         Attach<ConvolutionKernel_yxfb_yxio_b8>();
         //Attach<ConvolutionKernel_yxfb_yxio_b1_block>(); // TODO: need to finish integration
         Attach<ConvolutionKernel_yxfb_yxio_b1_block_mulitple_x>();
-        Attach<ConvolutionKernel_bfyx_3x3_dw_opt>();
+        //Attach<ConvolutionKernel_bfyx_3x3_dw_opt>();
         Attach<ConvolutionKernel_Winograd_2x3_s1>();
         Attach<ConvolutionKernel_Winograd_2x3_s1_fused>();
         Attach<ConvolutionKernel_Winograd_6x3_s1_fused>();
@@ -65,13 +80,23 @@ namespace kernel_selector
         Attach<ConvolutionKernel_byxf_af32_depthiwise>();
         Attach<ConvolutionKernel_mmad_batched>();
         Attach<ConvolutionKernel_bfyx_depthwise_weights_lwg>();
+        Attach<ConvolutionKernel_mmad_slm_2x14_rep4>();
+        Attach<ConvolutionKernel_mmad_slm_7x7_rep4>();
+        Attach<ConvolutionKernel_mmad_32x32sg_128x128wg_slm_int8>();
+        Attach<ConvolutionKernel_mmad_32x32sg_224x128wg_slm_int8>();
+        Attach<ConvolutionKernel_byxf_fs_bs_yx_bsv4_fsv32>();
+        Attach<ConvolutionKernel_byx8_f4__fs_bs_yx_bsv4_fsv32>();
+        Attach<ConvolutionKernel_mmad_batched_block>();
+        Attach<ConvolutionKernel_mmad_batched_block_1x1>();
+//        Attach<ConvolutionKernel_mmad_32x32sg_slm_int8>();
         //Attach<ConvolutionKernel_Tutorial>(); //In order to use this implementation for tutorial purposes please uncomment this line
+        Attach<ConvolutionKernel_imad_3x3>();
+        Attach<ConvolutionKernel_imad_1x1>();
+        Attach<ConvolutionKernel_imad_7x7>();
     }
 
     KernelsData convolution_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
     {
-        //const ConvolutionParams& orgParams = static_cast<const ConvolutionParams&>(params);
-        //std::cout << orgParams.to_string() << std::endl;
         return GetAutoTuneBestKernel(params, options, KernelType::CONVOLUTION);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.cpp
index 7d6bda116..fa8ac6ee6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_tutorial.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
 
@@ -181,7 +180,7 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_Tutorial::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 
 #endif
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.h
index e2cbdfe6d..77f7135a5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_tutorial.h
@@ -38,9 +38,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_Tutorial() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
         {
             return{
@@ -57,4 +57,4 @@ namespace kernel_selector {
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1)   const override;
 #endif
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp
index 98876df65..e0190515d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.cpp
@@ -15,8 +15,6 @@
 */
 
 #include "convolution_kernel_winograd_2x3_s1.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
 
 namespace kernel_selector {
 
@@ -120,6 +118,6 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_Winograd_2x3_s1::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h
index 04f61acef..491eeb32c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1.h
@@ -28,13 +28,13 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_Winograd_2x3_s1() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override { return{ WeightsLayout::winograd_2x3_s1_weights }; }
 
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         bool Validate(const Params& p, const optional_params& o) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp
index f2d59990e..f26abb899 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_winograd_2x3_s1_fused.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
 
@@ -148,6 +147,6 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_Winograd_2x3_s1_fused::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h
index bb520a065..770f0fadc 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_2x3_s1_fused.h
@@ -28,13 +28,13 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_Winograd_2x3_s1_fused() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override { return{ WeightsLayout::winograd_2x3_s1_fused_weights }; }
 
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         bool Validate(const Params& p, const optional_params& o) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp
index 1a06f04b4..a93a4b613 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.cpp
@@ -159,6 +159,6 @@ namespace kernel_selector {
 
     KernelsData ConvolutionKernel_Winograd_6x3_s1_fused::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options, AGE_BASED);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h
index 39b9fd892..665e5a8e9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_winograd_6x3_s1_fused.h
@@ -28,12 +28,12 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_Winograd_6x3_s1_fused() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         bool Validate(const Params& p, const optional_params& o) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
         std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_ref.cpp
index 6726433f9..584d34352 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_ref.cpp
@@ -39,11 +39,12 @@ namespace kernel_selector
         k.EnableDilation();
         k.EnableDepthwiseSeparableOpt();
         k.DisableTuning();
+        k.EnableGroupedConvolution();
         return k;
     }
 
     KernelsData ConvolutionKernel_yxfb_Ref::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_ref.h
index 1d6a7dfb1..1f2239fb2 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_ref.h
@@ -27,9 +27,9 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_yxfb_Ref() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
-    
+
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override
         { 
             return{ 
@@ -40,4 +40,4 @@ namespace kernel_selector {
             }; 
         }
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp
index 04508ef2e..7dae5c22e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "convolution_kernel_yxfb_yxio_b16.h"
-#include "convolution_params.h"
 
 namespace kernel_selector 
 {
@@ -210,6 +209,6 @@ namespace kernel_selector
 
     KernelsData ConvolutionKernel_yxfb_yxio_b16::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h
index 9a4c2fcec..e60ceae84 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b16.h
@@ -28,13 +28,13 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_yxfb_yxio_b16() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
-    
+
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override { return{ WeightsLayout::yxio }; }
         std::string GetKernelName(const convolution_params&) const override;
         bool Validate(const Params& p, const optional_params& o) const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp
index 431cfe193..3600917e7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.cpp
@@ -15,8 +15,6 @@
 */
 
 #include "convolution_kernel_yxfb_yxio_b1_block.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
 
 namespace kernel_selector 
 {
@@ -58,6 +56,6 @@ namespace kernel_selector
 
     KernelsData ConvolutionKernel_yxfb_yxio_b1_block::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h
index 8d19b7c63..6b170c471 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block.h
@@ -27,11 +27,11 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_yxfb_yxio_b1_block() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
-    
+
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override { return{ WeightsLayout::yxio }; }
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp
index 81646f43e..9097311bd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.cpp
@@ -15,8 +15,6 @@
 */
 
 #include "convolution_kernel_yxfb_yxio_b1_block_multiple_x.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
 
 namespace kernel_selector 
 {
@@ -155,6 +153,6 @@ namespace kernel_selector
 
     KernelsData ConvolutionKernel_yxfb_yxio_b1_block_mulitple_x::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h
index 8571eb524..2b77f701a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b1_block_multiple_x.h
@@ -27,12 +27,12 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_yxfb_yxio_b1_block_mulitple_x() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
-    
+
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override { return{ WeightsLayout::yxio }; }
         bool Validate(const Params& p, const optional_params& o) const override;
         JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp
index ccee6e669..84dba185b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.cpp
@@ -15,8 +15,6 @@
 */
 
 #include "convolution_kernel_yxfb_yxio_b8.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
 
 namespace kernel_selector 
 {
@@ -130,6 +128,6 @@ namespace kernel_selector
 
     KernelsData ConvolutionKernel_yxfb_yxio_b8::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options);
+        return GetTunedKernelsDataByIndex(params, options);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h
index dd7f8c52e..4659e2d63 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_yxfb_yxio_b8.h
@@ -27,12 +27,12 @@ namespace kernel_selector {
         virtual ~ConvolutionKernel_yxfb_yxio_b8() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
-    
+
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
         virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const convolution_params&) const override { return{ WeightsLayout::yxio }; }
         bool Validate(const Params& p, const optional_params& o) const override;
         DispatchData SetDefault(const convolution_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp
index 9b76961ba..16bbaf22a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp
@@ -56,7 +56,7 @@ namespace kernel_selector
             k.EnableDilation();
         }
 
-        if (depthwiseSeparableOpt)
+        if (depthwise_separable_opt)
         {
             k.EnableDepthwiseSeparableOpt();
         }
@@ -76,6 +76,16 @@ namespace kernel_selector
             k.EnableOutputCalibration();
         }
 
+        if (local_convolution)
+        {
+            k.EnableLocalConvolution();
+        }
+
+        if (groups > 1 && !depthwise_separable_opt)
+        {
+            k.EnableGroupedConvolution();
+        }
+
         return k;
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.h
index 91ab4193e..51884989c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.h
@@ -33,12 +33,14 @@ namespace kernel_selector
         uSize    dilation;
         uSize    padding;
         uint32_t split = 1;
-        bool     depthwiseSeparableOpt = false;
+        bool     depthwise_separable_opt = false;
         bool     transposed = false;
         bool     int8_quantization = false;
         bool     output_calibration = false;
+        bool     local_convolution = false;
         float    input_quantization_factor = 1.0f;
         float    output_quantization_factor = 1.0f;
+        uint32_t groups = 1;
 
         MultiDataTensor weights_quantization_factors;
         MultiDataTensor output_calibration_factors;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.h
index aee5a6fc5..28d7828c1 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_1x1.h
@@ -28,6 +28,8 @@ namespace kernel_selector {
 
         virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
         virtual bool Validate(const Params& p, const optional_params& o) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h
index 39fcb7e96..c4051e610 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_3x3.h
@@ -28,6 +28,8 @@ namespace kernel_selector {
 
 		virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
 		virtual bool Validate(const Params& p, const optional_params& o) const override;
+
+    protected:
 		virtual ParamsKey GetSupportedKey() const override;
 	};
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h
index 286caf5c0..2c9a13420 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_7x7.h
@@ -28,6 +28,8 @@ namespace kernel_selector {
 
         virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
         virtual bool Validate(const Params& p, const optional_params& o) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
index 1e2cd30a8..e24f696a4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.cpp
@@ -56,7 +56,8 @@ namespace kernel_selector
             MakeJitConstant("DILATION",                     cp.dilation),
             MakeJitConstant("FILTER_ARRAY_NUM",             cp.split),
             MakeJitConstant("INPUT0_OFFSET_WITH_PADDING",   input_offset_with_padding),
-            MakeJitConstant("DEPTHWISE_SEPARABLE_OPT",      cp.depthwiseSeparableOpt),
+            MakeJitConstant("DEPTHWISE_SEPARABLE_OPT",      cp.depthwise_separable_opt),
+            MakeJitConstant("OUTPUT_GRAD_W",                cp.output_grad_w),
         });
 
         return jit;
@@ -124,7 +125,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, DEFAULT, true, !orgParams.bias.empty());
         if (newParams.use_momentum)
         {
             kernel.arguments.push_back({ ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.h
index bf5100fe6..1331afd2b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_base.h
@@ -33,7 +33,8 @@ namespace kernel_selector
         uSize    dilation;
         uSize    padding;
         uint32_t split = 1;
-        bool     depthwiseSeparableOpt = false;
+        bool     depthwise_separable_opt = false;
+        bool     output_grad_w = false;
 
         virtual std::string to_string() const override;
 
@@ -52,7 +53,7 @@ namespace kernel_selector
                 k.EnableDilation();
             }
 
-            if (depthwiseSeparableOpt)
+            if (depthwise_separable_opt)
             {
                 k.EnableDepthwiseSeparableOpt();
             }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.h
index 3c95c4d60..be09a6619 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_ref.h
@@ -26,6 +26,8 @@ namespace kernel_selector {
         ConvolutionGradWeightsKernelRef() : ConvolutionGradWeightsKernelBase("convolution_grad_weights_ref") {}
         virtual ~ConvolutionGradWeightsKernelRef() {}
 
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.h
index 23a149b06..904884b8d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution_grad_weights/convolution_grad_weights_kernel_yxfb.h
@@ -28,6 +28,8 @@ namespace kernel_selector {
 
 		virtual DispatchData SetDefault(const convolution_grad_weights_params& params) const override;
 		virtual bool Validate(const Params& p, const optional_params& o) const override;
+
+    protected:
 		virtual ParamsKey GetSupportedKey() const override;
 	};
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
index cbc0bd780..242cc9ac1 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.cpp
@@ -56,8 +56,9 @@ namespace kernel_selector
             MakeJitConstant("DILATION",                     dp.dilation),
             MakeJitConstant("FILTER_ARRAY_NUM",             dp.split),
             MakeJitConstant("INPUT0_OFFSET_WITH_PADDING",   input_offset_with_padding),
-            MakeJitConstant("DEPTHWISE_SEPARABLE_OPT",      dp.depthwiseSeparableOpt),
-            MakeJitConstant("FUSED_ELTWISE",                dp.fused_eltwise)
+            MakeJitConstant("DEPTHWISE_SEPARABLE_OPT",      dp.depthwise_separable_opt),
+            MakeJitConstant("FUSED_ELTWISE",                dp.fused_eltwise),
+            MakeJitConstant("GROUPED",                      (dp.groups > 1) ? 1 : 0)
         });
 
         return jit;
@@ -120,7 +121,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty());
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, DEFAULT, true, !newParams.bias.empty());
         kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 });
         if (orgParams.fused_eltwise)
             kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
index 206614a70..46a1527ce 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_base.h
@@ -33,7 +33,8 @@ namespace kernel_selector
         uSize    dilation;
         uSize    padding;
         uint32_t split = 1;
-        bool     depthwiseSeparableOpt = false;
+        uint32_t groups = 1;
+        bool     depthwise_separable_opt = false;
         bool     fused_eltwise = false;
 
         virtual std::string to_string() const override;
@@ -53,11 +54,16 @@ namespace kernel_selector
                 k.EnableDilation();
             }
 
-            if (depthwiseSeparableOpt)
+            if (depthwise_separable_opt)
             {
                 k.EnableDepthwiseSeparableOpt();
             }
 
+            if (groups > 1 && !depthwise_separable_opt)
+            {
+                k.EnableGroupedConvolution();
+            }
+
             return k;
         }
     };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.h
index 1c18e15f7..178cb34e3 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_bfyx_opt.h
@@ -26,9 +26,8 @@ namespace kernel_selector {
         DeconvolutionKernel_bfyx_opt() : DeconvolutionKernelBase("deconvolution_gpu_bfyx_opt") {}
         virtual ~DeconvolutionKernel_bfyx_opt() {}
 
-        virtual ParamsKey GetSupportedKey() const override;
-
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         CommonDispatchData SetDefault(const deconvolution_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp
index fd5c28f6d..73e25b250 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.cpp
@@ -42,6 +42,7 @@ namespace kernel_selector
         k.EnableSplitSupport();
         k.EnableDepthwiseSeparableOpt();
         k.EnableGradient();
+        k.EnableGroupedConvolution();
         return k;
     }
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.h
index 2d2c89d8c..ae6a17251 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/deconvolution/deconvolution_kernel_ref.h
@@ -26,10 +26,9 @@ namespace kernel_selector {
         DeconvolutionKernelRef() : DeconvolutionKernelBase("deconvolution_gpu_ref") {}
         virtual ~DeconvolutionKernelRef() {}
 
-        virtual ParamsKey GetSupportedKey() const override;
-
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         CommonDispatchData SetDefault(const deconvolution_params& params) const override;
         JitConstants GetJitConstants(const deconvolution_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.cpp
new file mode 100644
index 000000000..2f6f33885
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.cpp
@@ -0,0 +1,85 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "depth_to_space_kernel_ref.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector
+{
+    ParamsKey DepthToSpaceKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableAllInputLayout();
+        k.EnableAllOutputLayout();
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        return k;
+    }
+
+    CommonDispatchData DepthToSpaceKernelRef::SetDefault(const depth_to_space_params& params, const optional_params&) const
+    {
+        CommonDispatchData runInfo;
+
+        std::vector<size_t> global = { params.output.Batch().v, params.output.Feature().v, params.output.Y().v * params.output.X().v };
+
+        auto local = GetOptimalLocalWorkGroupSizes(global);
+
+        runInfo.gws0 = global[0];
+        runInfo.gws1 = global[1];
+        runInfo.gws2 = global[2];
+
+        runInfo.lws0 = local[0];
+        runInfo.lws1 = local[1];
+        runInfo.lws2 = local[2];
+
+        return runInfo;
+    }
+
+    JitConstants DepthToSpaceKernelRef::GetJitConstants(const depth_to_space_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        jit.AddConstant(MakeJitConstant("BLOCK_SIZE", params.block_size));
+
+        return jit;
+    }
+
+    KernelsData DepthToSpaceKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelData kd = KernelData::Default<depth_to_space_params>(params);
+        depth_to_space_params& newParams = *static_cast<depth_to_space_params*>(kd.params.get());
+
+        assert(params.GetType() == KernelType::DEPTH_TO_SPACE);
+
+        auto runInfo = SetDefault(newParams, options);
+        auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+        auto cldnn_jit = GetJitConstants(newParams);
+        std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = kd.kernels[0];
+
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+
+        kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+        return{ kd };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.h
new file mode 100644
index 000000000..9db06c09e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_ref.h
@@ -0,0 +1,56 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // depth_to_space_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct depth_to_space_params : public base_params
+    {
+        depth_to_space_params() : base_params(KernelType::DEPTH_TO_SPACE) {}
+
+        size_t block_size;
+
+        virtual ParamsKey GetParamsKey() const
+        {
+            return base_params::GetParamsKey();
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // depth_to_space_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct depth_to_space_optional_params : optional_params
+    {
+        depth_to_space_optional_params() : optional_params(KernelType::DEPTH_TO_SPACE) {}
+    };
+
+    class DepthToSpaceKernelRef : public common_kernel_base
+    {
+    public:
+        DepthToSpaceKernelRef() : common_kernel_base("depth_to_space_ref") {}
+        virtual ~DepthToSpaceKernelRef() {}
+        virtual JitConstants GetJitConstants(const depth_to_space_params& params) const;
+        virtual CommonDispatchData SetDefault(const depth_to_space_params& params, const optional_params&) const;
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_selector.cpp
new file mode 100644
index 000000000..f50ba4070
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "depth_to_space_kernel_selector.h"
+#include "depth_to_space_kernel_ref.h"
+
+namespace kernel_selector {
+
+    depth_to_space_kernel_selector::depth_to_space_kernel_selector()
+    {
+        Attach<DepthToSpaceKernelRef>();
+    }
+
+    KernelsData depth_to_space_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::DEPTH_TO_SPACE);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_selector.h
new file mode 100644
index 000000000..1ddb54dd7
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/depth_to_space/depth_to_space_kernel_selector.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector
+{
+    class depth_to_space_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static depth_to_space_kernel_selector &Instance() {
+            static depth_to_space_kernel_selector instance_;
+            return instance_;
+        }
+
+        depth_to_space_kernel_selector();
+
+        virtual ~depth_to_space_kernel_selector() {}
+
+        virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp
new file mode 100644
index 000000000..3e6a053ef
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.cpp
@@ -0,0 +1,67 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "detection_output_kernel_base.h"
+
+namespace kernel_selector 
+{
+    JitConstants DetectionOutputKernelBase::GetJitConstants(const detection_output_params & params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        const auto& detectOutParams = params.detectOutParams;
+
+        jit.AddConstants({
+            MakeJitConstant("NUM_IMAGES", detectOutParams.num_images),
+            MakeJitConstant("NUM_CLASSES", detectOutParams.num_classes),
+            MakeJitConstant("KEEP_TOP_K", detectOutParams.keep_top_k),
+            MakeJitConstant("TOP_K", detectOutParams.top_k),
+            MakeJitConstant("BACKGROUND_LABEL_ID", detectOutParams.background_label_id),
+            MakeJitConstant("CODE_TYPE", detectOutParams.code_type),
+            MakeJitConstant("CONF_SIZE_X", detectOutParams.conf_size_x),
+            MakeJitConstant("CONF_SIZE_Y", detectOutParams.conf_size_y),
+            MakeJitConstant("CONF_PADDING_X", detectOutParams.conf_padding_x),
+            MakeJitConstant("CONF_PADDING_Y", detectOutParams.conf_padding_y),
+            MakeJitConstant("SHARE_LOCATION", detectOutParams.share_location),
+            MakeJitConstant("VARIANCE_ENCODED_IN_TARGET", detectOutParams.variance_encoded_in_target),
+            MakeJitConstant("NMS_THRESHOLD", detectOutParams.nms_threshold),
+            MakeJitConstant("ETA", detectOutParams.eta),
+            MakeJitConstant("CONFIDENCE_THRESHOLD", detectOutParams.confidence_threshold),
+            MakeJitConstant("IMAGE_WIDTH", detectOutParams.input_width),
+            MakeJitConstant("IMAGE_HEIGH", detectOutParams.input_heigh),
+            MakeJitConstant("ELEMENTS_PER_THREAD", detectOutParams.elements_per_thread),
+            MakeJitConstant("PRIOR_COORD_OFFSET", detectOutParams.prior_coordinates_offset),
+            MakeJitConstant("PRIOR_INFO_SIZE", detectOutParams.prior_info_size),
+            MakeJitConstant("PRIOR_IS_NORMALIZED", detectOutParams.prior_is_normalized),
+        });
+
+        return jit;
+    }
+
+    DetectionOutputKernelBase::DispatchData DetectionOutputKernelBase::SetDefault(const detection_output_params& params) const
+    {
+        DispatchData kd;
+
+        kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+        kd.gws0 = 0;
+        kd.gws1 = 0;
+        kd.gws2 = 0;
+        kd.lws0 = 0;
+        kd.lws1 = 0;
+        kd.lws2 = 0;
+        return kd;
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.h
new file mode 100644
index 000000000..8d267d177
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_base.h
@@ -0,0 +1,87 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // detection_output_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct detection_output_params : public base_params
+    {
+        detection_output_params() : base_params(KernelType::DETECTION_OUTPUT), detectOutParams() {}
+
+        struct DedicatedParams
+        {
+            uint32_t num_images;
+            uint32_t num_classes;
+            int32_t keep_top_k;
+            int32_t top_k;
+            int32_t background_label_id;
+            int32_t code_type;
+            int32_t conf_size_x;
+            int32_t conf_size_y;
+            int32_t conf_padding_x;
+            int32_t conf_padding_y;
+            int32_t elements_per_thread;
+            int32_t input_width;
+            int32_t input_heigh;
+            int32_t prior_coordinates_offset;
+            int32_t prior_info_size;
+            bool prior_is_normalized;
+            bool share_location;
+            bool variance_encoded_in_target;
+            float nms_threshold;
+            float eta;
+            float confidence_threshold;
+        };
+
+        DedicatedParams detectOutParams;
+
+        virtual ParamsKey GetParamsKey() const
+        {
+            return base_params::GetParamsKey();
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // detection_output_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct detection_output_optional_params : optional_params
+    {
+        detection_output_optional_params() : optional_params(KernelType::DETECTION_OUTPUT) {}
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // DetectionOutputKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class DetectionOutputKernelBase : public common_kernel_base
+    {
+    public:
+        using common_kernel_base :: common_kernel_base;
+        virtual ~DetectionOutputKernelBase() {}
+
+        using DispatchData = CommonDispatchData;
+    
+    protected:
+        JitConstants GetJitConstants(const detection_output_params& params) const;
+        virtual DispatchData SetDefault(const detection_output_params& params) const;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp
new file mode 100644
index 000000000..b9e34631f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.cpp
@@ -0,0 +1,95 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "detection_output_kernel_ref.h"
+#include "kernel_selector_utils.h"
+
+#define PRIOR_BOX_SIZE 4 // Each prior-box consists of [xmin, ymin, xmax, ymax].
+
+namespace kernel_selector
+{
+
+    ParamsKey DetectionOutputKernel::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        return k;
+    }
+
+    CommonDispatchData DetectionOutputKernel::SetDefault(const detection_output_params& params) const
+    {
+        CommonDispatchData runInfo = DetectionOutputKernelBase::SetDefault(params);
+
+        // Number of all work items is set to total number of bounding boxes -
+        // one bounding box is procerssed by one work item
+        size_t num_classes = (params.detectOutParams.share_location)? 1 : params.detectOutParams.num_classes;
+
+        // Size of input0 (input location), if shared loaction it is equal to size of one class, 
+        // else it has size of all items for all classes
+        size_t bboxesNum = params.inputs[0].LogicalSize() / PRIOR_BOX_SIZE / num_classes;
+        // Work group size is set to number of bounding boxes per image for sorting purpose
+        // (access to one table with sorted values)
+        size_t work_group_size = bboxesNum / params.inputs[0].Batch().v;
+
+        if (work_group_size > 256)
+        {
+            work_group_size = work_group_size / ((work_group_size / 256) + 1) + 1;
+        }
+
+        bboxesNum = work_group_size * params.inputs[0].Batch().v;
+
+        runInfo.gws0 = Align(bboxesNum, work_group_size);
+        runInfo.gws1 = 1;
+        runInfo.gws2 = 1;
+
+        runInfo.lws0 = work_group_size;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 1;
+
+        return runInfo;
+    }
+
+    KernelsData DetectionOutputKernel::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        assert(params.GetType() == KernelType::DETECTION_OUTPUT &&
+               options.GetType() == KernelType::DETECTION_OUTPUT);
+
+        KernelData kd = KernelData::Default<detection_output_params>(params);
+        const detection_output_params& detectOutParams = static_cast<const detection_output_params&>(params);
+        DispatchData runInfo = SetDefault(detectOutParams);
+
+        auto cldnnJit = GetJitConstants(detectOutParams);
+        auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, options);
+        auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
+
+        auto& kernel = kd.kernels[0];
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint);
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 2 });
+
+        kd.estimatedTime = FORCE_PRIORITY_8;
+
+        return{ kd };
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h
new file mode 100644
index 000000000..42d342aac
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_ref.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "detection_output_kernel_base.h"
+
+namespace kernel_selector {
+
+    class DetectionOutputKernel : public DetectionOutputKernelBase
+    {
+    public:
+        DetectionOutputKernel() : DetectionOutputKernelBase("detection_output") {}
+        virtual ~DetectionOutputKernel() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+
+    private:
+        CommonDispatchData SetDefault(const detection_output_params& params) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp
new file mode 100644
index 000000000..19fe97b7f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.cpp
@@ -0,0 +1,42 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "detection_output_kernel_selector.h"
+#include "detection_output_kernel_ref.h"
+#include "detection_output_kernel_sort.h"
+ 
+namespace kernel_selector
+{
+    detection_output_kernel_selector::detection_output_kernel_selector()
+    {
+        Attach<DetectionOutputKernel>();
+    }
+
+    KernelsData detection_output_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::DETECTION_OUTPUT);
+    }
+
+    detection_output_sort_kernel_selector::detection_output_sort_kernel_selector()
+    {
+        Attach<DetectionOutputKernel_sort>();
+    }
+
+    KernelsData detection_output_sort_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::DETECTION_OUTPUT);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h
new file mode 100644
index 000000000..f2c8db74a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_selector.h
@@ -0,0 +1,52 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+ 
+namespace kernel_selector
+{
+    class detection_output_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static detection_output_kernel_selector &Instance() {
+            static detection_output_kernel_selector instance_;
+            return instance_;
+        }
+
+        detection_output_kernel_selector();
+
+        virtual ~detection_output_kernel_selector() {}
+
+        virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+
+    class detection_output_sort_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static detection_output_sort_kernel_selector &Instance() {
+            static detection_output_sort_kernel_selector instance_;
+            return instance_;
+        }
+
+        detection_output_sort_kernel_selector();
+
+        virtual ~detection_output_sort_kernel_selector() {}
+
+        virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp
new file mode 100644
index 000000000..b1d8fa9a4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.cpp
@@ -0,0 +1,89 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "detection_output_kernel_sort.h"
+#include "kernel_selector_utils.h"
+
+#define DETECTION_OUTPUT_ROW_SIZE 7 // Each detection consists of [image_id, label, confidence, xmin, ymin, xmax, ymax].
+
+namespace kernel_selector
+{
+
+    ParamsKey DetectionOutputKernel_sort::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        return k;
+    }
+
+    CommonDispatchData DetectionOutputKernel_sort::SetDefault(const detection_output_params& params) const
+    {
+        CommonDispatchData runInfo = DetectionOutputKernelBase::SetDefault(params);
+
+        unsigned class_num = params.detectOutParams.num_classes;
+        if (params.detectOutParams.share_location && params.detectOutParams.background_label_id == 0)
+        {
+            class_num -= 1;
+        }
+        const size_t bboxesNum = class_num * params.detectOutParams.num_images;
+        // Work group size is set to number of bounding boxes per image
+        size_t work_group_size = class_num;
+
+        if (work_group_size > 256)
+        {
+            work_group_size = (work_group_size + work_group_size % 2) / (work_group_size / 256 + 1);
+        }
+
+        runInfo.gws0 = Align(bboxesNum, work_group_size);
+        runInfo.gws1 = 1;
+        runInfo.gws2 = 1;
+
+        runInfo.lws0 = work_group_size;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 1;
+
+        return runInfo;
+    }
+
+    KernelsData DetectionOutputKernel_sort::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        assert(params.GetType() == KernelType::DETECTION_OUTPUT &&
+               options.GetType() == KernelType::DETECTION_OUTPUT);
+
+        KernelData kd = KernelData::Default<detection_output_params>(params);
+        const detection_output_params& detectOutParams = static_cast<const detection_output_params&>(params);
+        DispatchData runInfo = SetDefault(detectOutParams);
+
+        auto cldnnJit = GetJitConstants(detectOutParams);
+        auto entryPoint = GetEntryPoint(kernelName, detectOutParams.layerID, options);
+        auto jit = CreateJit(kernelName, cldnnJit, entryPoint);
+
+        auto& kernel = kd.kernels[0];
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entryPoint);
+
+        kd.estimatedTime = FORCE_PRIORITY_8;
+
+        return{ kd };
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.h
new file mode 100644
index 000000000..b06ea1c2e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/detection_output/detection_output_kernel_sort.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "detection_output_kernel_base.h"
+
+namespace kernel_selector {
+
+    class DetectionOutputKernel_sort : public DetectionOutputKernelBase
+    {
+    public:
+        DetectionOutputKernel_sort() : DetectionOutputKernelBase("detection_output_sort") {}
+        virtual ~DetectionOutputKernel_sort() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+
+    private:
+        CommonDispatchData SetDefault(const detection_output_params& params) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.cpp
new file mode 100644
index 000000000..28758b29b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.cpp
@@ -0,0 +1,301 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "eltwise_kernel_b_fs_yx_fsv4.h"
+#include "kernel_selector_utils.h" 
+
+namespace kernel_selector {
+
+    ParamsKey EltwiseKernel_b_fs_yx_fsv4::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::UINT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::UINT8);
+        k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
+        k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        k.EnableEltwiseStride();
+        return k;
+    }
+
+    EltwiseKernelBase::DispatchData EltwiseKernel_b_fs_yx_fsv4::SetDefault(const eltwise_params& params) const
+    {
+        DispatchData kd;
+
+        // Because of very specific requirements for data, we may linearize the data,
+        // i.e. use only one dimension, e.g. 'X'.
+
+        //GWS:
+        // we process 4*4 (4 int8 bytes per on block_read4 reading) features per workitem
+        kd.gws0 = params.output.X().v * params.output.Y().v *
+                  params.output.Batch().v * params.output.Feature().v / (4*4);
+        kd.gws1 = 1;
+        kd.gws2 = 1;
+        // LWS:
+        kd.lws0 = 8;
+        kd.lws1 = 1;
+        kd.lws2 = 1;
+
+        kd.effiency = FORCE_PRIORITY_1;
+        return kd;
+    }
+
+    bool EltwiseKernel_b_fs_yx_fsv4::Validate(const Params& params, const optional_params& options) const
+    {
+        // Requirents to use 'eltwise_b_fs_yx_fsv4' kernel are below:
+        // 1. No stride
+        // 2. All dimensions for all inputs are the same
+        // 3. No padding
+        // So, it can be linearized
+
+        if (!Parent::Validate(params, options)) {
+            return false;
+        }
+
+        KernelData kd = KernelData::Default<eltwise_params>(params);
+        eltwise_params& newParams = *static_cast<eltwise_params*>(kd.params.get());
+
+        // 1. No stride
+        if (!newParams.stride.empty()) {
+            return false;
+        }
+
+        for (size_t i = 0; i < newParams.inputs.size() - 1; i++)
+        {
+            // 2. All dimensions for all inputs are the same
+            if (!(newParams.inputs[i] == newParams.inputs[i + 1])) {
+                return false;
+            }
+        }
+
+        const auto& in = newParams.inputs[0];
+        for (size_t i = 0; i < in.Dimentions(); i++)
+        {
+            // 3. No padding
+            if ((in.GetDims()[i].pad.before != 0) ||
+                (in.GetDims()[i].pad.after != 0)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    JitConstants EltwiseKernel_b_fs_yx_fsv4::GetJitConstants(const eltwise_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        if (params.inputs[0].GetDType() == Datatype::UINT8) {
+            // Special handler for unsigned types
+            jit.AddConstants({
+                MakeJitConstant("ELTW_UNSIGNED", 1)
+            });
+        }
+
+        ///////////////
+        jit.AddConstants({
+            MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
+            MakeJitConstant("QUANTIZATION_TERM",    params.int8_quantization),
+        });
+
+        if (params.int8_quantization)
+        {
+            if (params.output_calibration)
+            {
+                jit.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.output_calibration));
+                jit.AddConstant(MakeJitConstant("O_QF", params.output_calibration_factors[0]));
+
+            }
+            else
+                jit.AddConstants({ MakeJitConstant("O_QF",       params.output_quantization_factor) });
+        }
+
+        std::string inputs_decls;
+        auto& updateInputs = params.updateInputIds;
+
+        for (size_t i = 0; i < params.inputs.size(); i++)
+        {
+            //const should be added only to inputs which will not be updated
+            std::string const_str = "const";
+            for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
+            {
+                if (updateInputs[update_input_idx].inputId == i)
+                {
+                    const_str = "";
+                    break;
+                }
+            }
+
+            inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
+        }
+
+        jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
+        jit.AddConstant(MakeJitConstant("ELTWISE_NO_PITCH_SAME_DIMS", CheckInputsOutputNoPitchSameDims(params)));
+
+        std::string do_eltwise;
+
+        auto& operations = params.operations;
+        auto& coefficients = params.coefficients;
+
+        for (size_t op_num = 0; op_num < operations.size(); op_num++)
+        {
+            const std::string op_num_str = std::to_string(op_num);
+            const auto& ew = operations[op_num];
+
+            for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++)
+            {
+                const auto& input = ew.inputs[input_idx];
+                const std::string name = "INPUT_" + op_num_str + "_" + std::to_string(input_idx);
+                switch (input.mode)
+                {
+                case EltwiseInputMode::SCALAR:
+                    jit.AddConstant(MakeJitConstant(name, input.scalar));
+                    break;
+                case EltwiseInputMode::INPUT_BUFFER:
+                    jit.AddConstant(MakeJitConstant(name, "GET_INPUT(input" + std::to_string(input.index) + ", INPUT" + std::to_string(input.index) + ")"));
+                    break;
+                case EltwiseInputMode::OUTPUT_BUFFER:
+                    jit.AddConstant(MakeJitConstant(name, "output[GET_INDEX(OUTPUT, )]"));
+                    break;
+                case EltwiseInputMode::UNORDERED_ACCESS_INPUT_BUFFER:
+                    jit.AddConstant(MakeJitConstant(name, "input" + std::to_string(input.index) + "[(size_t)tmp" + std::to_string(input.tmpIndex) + "]"));
+                    break;
+                case EltwiseInputMode::INTERMEDIATE_RESULTS_INDEX:
+                    jit.AddConstant(MakeJitConstant(name, "tmp" + std::to_string(input.tmpIndex)));
+                    break;
+                default:
+                    break;
+                }
+            }
+            std::string input0_str, input1_str, cast_type, op;
+
+            cast_type = "(int16)";
+            op = "const int16 tmp" + op_num_str + " = ";
+
+            input0_str = cast_type + "INPUT_" + op_num_str + "_0";
+            input1_str = cast_type + "INPUT_" + op_num_str + "_1";
+
+            if (ew.mode == EltwiseMode::ADD)
+            {
+                std::vector<std::string> coeff_strings(ew.inputs.size(), "");
+                for (size_t input_idx = 0; input_idx < ew.inputs.size(); input_idx++)
+                {
+                    const auto& input = ew.inputs[input_idx];
+                    if (input.mode == EltwiseInputMode::INPUT_BUFFER && input.index < coefficients.size())
+                    {
+                        const float c = coefficients[input.index];
+                        if (c != 1.0f)
+                            coeff_strings[input_idx] = cast_type + "(" + std::to_string(c) + ")*";
+                    }
+                }
+
+                input0_str = coeff_strings[0] + input0_str;
+                input1_str = coeff_strings[1] + input1_str;
+            }
+
+
+            switch (ew.mode)
+            {
+            case EltwiseMode::ADD:      op += input0_str + " + " + input1_str; break;
+            case EltwiseMode::SUB:      op += input0_str + " - " + input1_str; break;
+            case EltwiseMode::MUL:      op += input0_str + " * " + input1_str; break;
+            case EltwiseMode::DIV:      op += input0_str + " / " + input1_str; break;
+            case EltwiseMode::MODULU:
+            case EltwiseMode::MIN:
+            case EltwiseMode::MAX:
+            {
+                auto mode = (ew.mode == EltwiseMode::MODULU ? "mod" : (ew.mode == EltwiseMode::MIN ? "min" : "max"));
+                auto input_0_type = params.inputs[0].GetDType();
+                auto input_1_type = params.inputs[1].GetDType();
+
+                // input_0 == int
+                if (input_0_type == kernel_selector::Datatype::INT8 ||
+                    input_0_type == kernel_selector::Datatype::UINT8)
+                {
+                    // input_0 == int && input_1 == int
+                    if (input_1_type == kernel_selector::Datatype::INT8 ||
+                        input_1_type == kernel_selector::Datatype::UINT8)
+                    {
+                        if (ew.mode == EltwiseMode::MODULU)
+                            op += input0_str + " % " + input1_str;
+                        else
+                            op += cast_type + mode + "(" + input0_str + ", " + input1_str + ")";
+                    }
+                    // input_0 == int && input_1 != int
+                    else
+                    {
+                        op += cast_type + "f" + mode + "(convert_float(" + input0_str + "), " + input1_str + ")";
+                    }
+                }
+                // input_0 != int && input_1 == int
+                else if (input_1_type == kernel_selector::Datatype::INT8 ||
+                         input_1_type == kernel_selector::Datatype::UINT8)
+                {
+                    op += cast_type + "f" + mode + "(" + input0_str + ", convert_float(" + input1_str + "))";
+                }
+                // input_0 != int && input_1 != int
+                else
+                {
+                    op += cast_type + "f" + mode + "(" + input0_str + ", " + input1_str + ")";
+                }
+            } break;
+            case EltwiseMode::POW:      op += cast_type + "pow(" + input0_str + ", " + input1_str + ")"; break;
+            case EltwiseMode::SQRT:     op += cast_type + "sqrt(" + input0_str + ")"; break;
+            case EltwiseMode::RSQRT:    op += cast_type + "1/sqrt(" + input0_str + ")"; break;
+            case EltwiseMode::ASSIGN:   op += input0_str; break;
+            default:
+                break;
+            }
+
+            std::string opname = "OPERATION" + op_num_str;
+            jit.AddConstant(MakeJitConstant(opname, op));
+            do_eltwise += "\\\n\t" + opname + ";";
+        }
+
+        for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
+            do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) +
+            "[GET_INDEX(INPUT, " + std::to_string(updateInputs[update_input_idx].inputId) +
+            ")] = tmp" + std::to_string(updateInputs[update_input_idx].tmpId) + ";";
+
+        do_eltwise += "\\\n\tres = tmp" + std::to_string(operations.size() - 1) + ";";
+
+        jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
+
+        if (params.layoutBased || params.int8_quantization)
+        {
+            jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
+        }
+
+        if (!params.stride.empty())
+        {
+            jit.AddConstant(MakeJitConstant("INPUT_STRIDED", 1));
+        }
+
+        ///////////////
+        return jit;
+    }
+
+    KernelsData EltwiseKernel_b_fs_yx_fsv4::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.h
new file mode 100644
index 000000000..1032b6872
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_b_fs_yx_fsv4.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "eltwise_kernel_base.h"
+
+namespace kernel_selector
+{
+    class EltwiseKernel_b_fs_yx_fsv4 : public EltwiseKernelBase
+    {
+    public:
+        using Parent = EltwiseKernelBase;
+        EltwiseKernel_b_fs_yx_fsv4() : EltwiseKernelBase("eltwise_b_fs_yx_fsv4") {}
+        virtual ~EltwiseKernel_b_fs_yx_fsv4() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        virtual bool Validate(const Params& params, const optional_params& options) const override;
+        JitConstants GetJitConstants(const eltwise_params& params) const override;
+        virtual DispatchData SetDefault(const eltwise_params& params) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
index 5feac0ca5..85cedc3ce 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 */
 
 #include "eltwise_kernel_base.h"
-#include "kernel_selector_utils.h" 
+#include "kernel_selector_utils.h"
 
 namespace kernel_selector
 {
@@ -31,6 +31,16 @@ namespace kernel_selector
         case EltwiseMode::MAX:
         case EltwiseMode::POW:
         case EltwiseMode::MODULU:
+        case EltwiseMode::EQ:
+        case EltwiseMode::NE:
+        case EltwiseMode::LT:
+        case EltwiseMode::LE:
+        case EltwiseMode::GT:
+        case EltwiseMode::GE:
+        case EltwiseMode::LOGIC_AND:
+        case EltwiseMode::LOGIC_OR:
+        case EltwiseMode::LOGIC_XOR:
+        case EltwiseMode::SQUARED_DIFF:
             return 2;
         case EltwiseMode::SQRT:
         case EltwiseMode::RSQRT:
@@ -54,6 +64,16 @@ namespace kernel_selector
             k.EnableOutputCalibration();
         }
 
+        if (!stride.empty())
+        {
+            k.EnableEltwiseStride();
+        }
+
+        if (broadcast)
+        {
+            k.EnableEltwiseBroadcast();
+        }
+
         return k;
     }
 
@@ -109,6 +129,7 @@ namespace kernel_selector
         jit.AddConstants({
             MakeJitConstant("ELTWISE_LAYOUT_BASED", params.layoutBased),
             MakeJitConstant("QUANTIZATION_TERM",    params.int8_quantization),
+            MakeJitConstant("ELTWISE_BROADCAST",    params.broadcast),
         });
 
         if (params.int8_quantization)
@@ -140,6 +161,11 @@ namespace kernel_selector
             }
 
             inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
+            if (!params.stride.empty())
+            {
+                jit.AddConstant(MakeJitConstant("INPUT" + std::to_string(i) + "_STRIDE_X", params.stride[i].x));
+                jit.AddConstant(MakeJitConstant("INPUT" + std::to_string(i) + "_STRIDE_Y", params.stride[i].y));
+            }
             if (useVload8)
             {
                 vload_decls += "\\\n\tconst " + toCLType(params.inputs[i].GetDType()) + "8 in" + std::to_string(i);
@@ -196,7 +222,7 @@ namespace kernel_selector
                 }
             }
 
-            std::string input0_str, input1_str, cast_type, op;
+            std::string input0_str, input1_str, cast_type, output_cast, op;
 
             if (useVload8)
             {
@@ -214,6 +240,11 @@ namespace kernel_selector
                 op = "const UNIT_TYPE tmp" + op_num_str + " = ";
             }
 
+            if (params.output.GetDType() == Datatype::INT8 && !params.int8_quantization) {
+                output_cast = "(char)";
+                cast_type = "(" + toCLType(params.inputs[op_num].GetDType()) + ")";
+            }
+
             input0_str = cast_type + "INPUT_" + op_num_str + "_0";
             input1_str = cast_type + "INPUT_" + op_num_str + "_1";
 
@@ -238,17 +269,67 @@ namespace kernel_selector
 
             switch (ew.mode)
             {
-            case EltwiseMode::ADD:      op += input0_str + " + " + input1_str; break;
-            case EltwiseMode::SUB:      op += input0_str + " - " + input1_str; break;
-            case EltwiseMode::MUL:      op += input0_str + " * " + input1_str; break;
-            case EltwiseMode::DIV:      op += input0_str + " / " + input1_str; break;
-            case EltwiseMode::MODULU:   op += cast_type + "fmod(" + input0_str + ", " + input1_str + ")"; break;
-            case EltwiseMode::MIN:      op += cast_type + "fmin(" + input0_str + ", " + input1_str + ")"; break;
-            case EltwiseMode::MAX:      op += cast_type + "fmax(" + input0_str + ", " + input1_str + ")"; break;
-            case EltwiseMode::POW:      op += cast_type + "pow(" + input0_str + ", " + input1_str + ")"; break;
-            case EltwiseMode::SQRT:     op += cast_type + "sqrt(" + input0_str + ")"; break;
-            case EltwiseMode::RSQRT:    op += cast_type + "1/sqrt(" + input0_str + ")"; break;
-            case EltwiseMode::ASSIGN:   op += input0_str; break;
+            case EltwiseMode::ADD:         op += input0_str + " + " + input1_str; break;
+            case EltwiseMode::SUB:         op += input0_str + " - " + input1_str; break;
+            case EltwiseMode::MUL:         op += input0_str + " * " + input1_str; break;
+            case EltwiseMode::DIV:         op += input0_str + " / " + input1_str; break;
+            case EltwiseMode::MODULU:
+            case EltwiseMode::MIN:
+            case EltwiseMode::MAX:
+            {
+                auto mode = (ew.mode == EltwiseMode::MODULU ? "mod" : (ew.mode == EltwiseMode::MIN ? "min" : "max" ));
+                auto input_0_type = params.inputs[0].GetDType();
+                auto input_1_type = params.inputs[1].GetDType();
+
+                // input_0 == int
+                if (input_0_type == kernel_selector::Datatype::INT8 ||
+                    input_0_type == kernel_selector::Datatype::INT32 ||
+                    input_0_type == kernel_selector::Datatype::INT64)
+                {
+                    // input_0 == int && input_1 == int
+                    if (input_1_type == kernel_selector::Datatype::INT8 ||
+                        input_1_type == kernel_selector::Datatype::INT32 ||
+                        input_1_type == kernel_selector::Datatype::INT64)
+                    {
+                        if (ew.mode == EltwiseMode::MODULU)
+                            op += input0_str + " % " + input1_str;
+                        else
+                            op += cast_type + mode + "(" + input0_str + ", " + input1_str + ")";
+                    }
+                    // input_0 == int && input_1 != int
+                    else
+                    {
+                        op += cast_type + "f" + mode + "(convert_float(" + input0_str + "), " + input1_str + ")";
+                    }
+                }
+                // input_0 != int && input_1 == int
+                else if ( input_1_type == kernel_selector::Datatype::INT8 ||
+                          input_1_type == kernel_selector::Datatype::INT32 ||
+                          input_1_type == kernel_selector::Datatype::INT64)
+                {
+                    op += cast_type + "f" + mode + "(" + input0_str + ", convert_float(" + input1_str + "))";
+                }
+                // input_0 != int && input_1 != int
+                else
+                {
+                    op += cast_type + "f" + mode + "(" + input0_str + ", " + input1_str + ")";
+                }
+            } break;
+            case EltwiseMode::POW:          op += cast_type + "pow(" + input0_str + ", " + input1_str + ")"; break;
+            case EltwiseMode::SQRT:         op += cast_type + "sqrt(" + input0_str + ")"; break;
+            case EltwiseMode::RSQRT:        op += cast_type + "1/sqrt(" + input0_str + ")"; break;
+            case EltwiseMode::SQUARED_DIFF: op += cast_type + "((" + input0_str + " - " + input1_str + ")"
+                                                            " * (" + input0_str + " - " + input1_str + "))"; break;
+            case EltwiseMode::EQ:           op += output_cast + "(" + input0_str + " == " + input1_str + ")"; break;
+            case EltwiseMode::NE:           op += output_cast + "(" + input0_str + " != " + input1_str + ")"; break;
+            case EltwiseMode::LT:           op += output_cast + "(" + input0_str + " < " + input1_str + ")"; break;
+            case EltwiseMode::LE:           op += output_cast + "(" + input0_str + " <= " + input1_str + ")"; break;
+            case EltwiseMode::GT:           op += output_cast + "(" + input0_str + " > " + input1_str + ")"; break;
+            case EltwiseMode::GE:           op += output_cast + "(" + input0_str + " >= " + input1_str + ")"; break;
+            case EltwiseMode::LOGIC_AND:    op += output_cast + "(" + input0_str + " && " + input1_str + ")"; break;
+            case EltwiseMode::LOGIC_OR:     op += output_cast + "(" + input0_str + " || " + input1_str + ")"; break;
+            case EltwiseMode::LOGIC_XOR:    op += output_cast + "(!" + input0_str + " != !" + input1_str + ")"; break;
+            case EltwiseMode::ASSIGN:       op += input0_str; break;
             default:
                 break;
             }
@@ -259,7 +340,7 @@ namespace kernel_selector
         }
 
         for (size_t update_input_idx = 0; update_input_idx < updateInputs.size(); update_input_idx++)
-            do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) + 
+            do_eltwise += "\\\n\tinput" + std::to_string(updateInputs[update_input_idx].inputId) +
             "[GET_INDEX(INPUT, " + std::to_string(updateInputs[update_input_idx].inputId) +
             ")] = tmp" + std::to_string(updateInputs[update_input_idx].tmpId) + ";";
 
@@ -267,9 +348,14 @@ namespace kernel_selector
 
         jit.AddConstant(MakeJitConstant("DO_ELTWISE", do_eltwise));
 
-        if (params.layoutBased || params.int8_quantization)
+        if (params.layoutBased || params.int8_quantization || params.broadcast)
+        {
+            jit.Merge(GetTensorFriendlyWorkGroupsJit(params.output));
+        }
+
+        if (!params.stride.empty())
         {
-            jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
+            jit.AddConstant(MakeJitConstant("INPUT_STRIDED", 1));
         }
 
         return jit;
@@ -284,12 +370,17 @@ namespace kernel_selector
     {
         DispatchData kd;
 
-        if (params.layoutBased || params.int8_quantization)
+        if (params.layoutBased || params.int8_quantization || params.broadcast)
         {
-            auto global = GetTensorFriendlyWorkGroups(params.inputs[0]);
+            auto global = GetTensorFriendlyWorkGroups(params.output);
             kd.gws0 = global[0];
             kd.gws1 = global[1];
             kd.gws2 = global[2];
+            if (!params.stride.empty())
+            {
+                kd.gws0 /= params.stride[0].x;
+                kd.gws0 /= params.stride[0].y;
+            }
         }
         else if (CheckInputsOutputNoPitchSameDims(params))
         {
@@ -346,7 +437,7 @@ namespace kernel_selector
         kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 };
         kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 };
 
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
         kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false, newParams.int8_quantization, newParams.output_calibration);
 
         kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
index 161140849..458f3b9d5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_base.h
@@ -90,12 +90,14 @@ namespace kernel_selector
         std::vector<eltwise_params::Node> operations;
         std::vector<float> coefficients;
         std::vector<UpdateInputData> updateInputIds;
- 
+        std::vector<uSize> stride;
+
         bool  layoutBased = false;
         bool  int8_quantization = false;
         bool  output_calibration = false;
         float output_quantization_factor = 1.0f;
-        
+        bool  broadcast = false;
+
         MultiDataTensor output_calibration_factors;
         virtual ParamsKey GetParamsKey() const;
     };
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
index 571a013ce..e6445056e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2018-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 */
 
 #include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
-#include "kernel_selector_utils.h" 
+#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
 
@@ -31,6 +31,7 @@ namespace kernel_selector {
         k.EnableBatching();
         k.EnableInt8Quantization();
         k.EnableOutputCalibration();
+        k.EnableEltwiseStride();
         return k;
     }
 
@@ -46,6 +47,7 @@ namespace kernel_selector {
         kd.lws1 = 1;
         kd.lws2 = 8;
 
+        kd.effiency = FORCE_PRIORITY_3;
         return kd;
     }
 
@@ -100,6 +102,12 @@ namespace kernel_selector {
             }
 
             inputs_decls += const_str + " __global " + toCLType(params.inputs[i].GetDType()) + "* input" + std::to_string(i) + ", ";
+
+            if (!params.stride.empty())
+            {
+                jit.AddConstant(MakeJitConstant("INPUT" + std::to_string(i) + "_STRIDE_X", params.stride[i].x));
+                jit.AddConstant(MakeJitConstant("INPUT" + std::to_string(i) + "_STRIDE_Y", params.stride[i].y));
+            }
         }
 
         jit.AddConstant(MakeJitConstant("INPUTS_DECLS", inputs_decls));
@@ -177,17 +185,67 @@ namespace kernel_selector {
 
             switch (ew.mode)
             {
-            case EltwiseMode::ADD:      op += input0_str + " + " + input1_str; break;
-            case EltwiseMode::SUB:      op += input0_str + " - " + input1_str; break;
-            case EltwiseMode::MUL:      op += input0_str + " * " + input1_str; break;
-            case EltwiseMode::DIV:      op += input0_str + " / " + input1_str; break;
-            case EltwiseMode::MODULU:   op += cast_type + "fmod(" + input0_str + ", " + input1_str + ")"; break;
-            case EltwiseMode::MIN:      op += cast_type + "fmin(" + input0_str + ", " + input1_str + ")"; break;
-            case EltwiseMode::MAX:      op += cast_type + "fmax(" + input0_str + ", " + input1_str + ")"; break;
-            case EltwiseMode::POW:      op += cast_type + "pow(" + input0_str + ", " + input1_str + ")"; break;
-            case EltwiseMode::SQRT:     op += cast_type + "sqrt(" + input0_str + ")"; break;
-            case EltwiseMode::RSQRT:    op += cast_type + "1/sqrt(" + input0_str + ")"; break;
-            case EltwiseMode::ASSIGN:   op += input0_str; break;
+            case EltwiseMode::ADD:          op += input0_str + " + " + input1_str; break;
+            case EltwiseMode::SUB:          op += input0_str + " - " + input1_str; break;
+            case EltwiseMode::MUL:          op += input0_str + " * " + input1_str; break;
+            case EltwiseMode::DIV:          op += input0_str + " / " + input1_str; break;
+            case EltwiseMode::MODULU:
+            case EltwiseMode::MIN:
+            case EltwiseMode::MAX:
+            {
+                auto mode = (ew.mode == EltwiseMode::MODULU ? "mod" : (ew.mode == EltwiseMode::MIN ? "min" : "max"));
+                auto input_0_type = params.inputs[0].GetDType();
+                auto input_1_type = params.inputs[1].GetDType();
+
+                // input_0 == int
+                if (input_0_type == kernel_selector::Datatype::INT8 ||
+                    input_0_type == kernel_selector::Datatype::INT32 ||
+                    input_0_type == kernel_selector::Datatype::INT64)
+                {
+                    // input_0 == int && input_1 == int
+                    if (input_1_type == kernel_selector::Datatype::INT8 ||
+                        input_1_type == kernel_selector::Datatype::INT32 ||
+                        input_1_type == kernel_selector::Datatype::INT64)
+                    {
+                        if (ew.mode == EltwiseMode::MODULU)
+                            op += input0_str + " % " + input1_str;
+                        else
+                            op += cast_type + mode + "(" + input0_str + ", " + input1_str + ")";
+                    }
+                    // input_0 == int && input_1 != int
+                    else
+                    {
+                        op += cast_type + "f" + mode + "(convert_float(" + input0_str + "), " + input1_str + ")";
+                    }
+                }
+                // input_0 != int && input_1 == int
+                else if (input_1_type == kernel_selector::Datatype::INT8 ||
+                    input_1_type == kernel_selector::Datatype::INT32 ||
+                    input_1_type == kernel_selector::Datatype::INT64)
+                {
+                    op += cast_type + "f" + mode + "(" + input0_str + ", convert_float(" + input1_str + "))";
+                }
+                // input_0 != int && input_1 != int
+                else
+                {
+                    op += cast_type + "f" + mode + "(" + input0_str + ", " + input1_str + ")";
+                }
+            } break;
+            case EltwiseMode::POW:          op += cast_type + "pow(" + input0_str + ", " + input1_str + ")"; break;
+            case EltwiseMode::SQRT:         op += cast_type + "sqrt(" + input0_str + ")"; break;
+            case EltwiseMode::RSQRT:        op += cast_type + "1/sqrt(" + input0_str + ")"; break;
+            case EltwiseMode::SQUARED_DIFF: op += cast_type + "((" + input0_str + " - " + input1_str + ")"
+                                                            " * (" + input0_str + " - " + input1_str + "))"; break;
+            case EltwiseMode::EQ:           op += cast_type + "(" + input0_str + " == " + input1_str + ")"; break;
+            case EltwiseMode::NE:           op += cast_type + "(" + input0_str + " != " + input1_str + ")"; break;
+            case EltwiseMode::LT:           op += cast_type + "(" + input0_str + " < " + input1_str + ")"; break;
+            case EltwiseMode::LE:           op += cast_type + "(" + input0_str + " <= " + input1_str + ")"; break;
+            case EltwiseMode::GT:           op += cast_type + "(" + input0_str + " > " + input1_str + ")"; break;
+            case EltwiseMode::GE:           op += cast_type + "(" + input0_str + " >= " + input1_str + ")"; break;
+            case EltwiseMode::LOGIC_AND:    op += cast_type + "(" + input0_str + " && " + input1_str + ")"; break;
+            case EltwiseMode::LOGIC_OR:     op += cast_type + "(" + input0_str + " || " + input1_str + ")"; break;
+            case EltwiseMode::LOGIC_XOR:    op += cast_type + "(!" + input0_str + " != !" + input1_str + ")"; break;
+            case EltwiseMode::ASSIGN:       op += input0_str; break;
             default:
                 break;
             }
@@ -211,6 +269,11 @@ namespace kernel_selector {
             jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
         }
 
+        if (!params.stride.empty())
+        {
+            jit.AddConstant(MakeJitConstant("INPUT_STRIDED", 1));
+        }
+
         ///////////////
         return jit;
     }
@@ -219,4 +282,4 @@ namespace kernel_selector {
     {
         return GetCommonKernelsData(params, options);
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
index b1fb3e950..7cd0fe645 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_fs_bs_yx_bsv4_fsv32.h
@@ -27,9 +27,9 @@ namespace kernel_selector
         virtual ~EltwiseKernel_fs_bs_yx_bsv4_fsv32() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const eltwise_params& params) const override;
         virtual DispatchData SetDefault(const eltwise_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
index 3a7776575..6b1e6eae5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 */
 
 #include "eltwise_kernel_ref.h"
-#include "kernel_selector_utils.h" 
+#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
 
@@ -40,6 +40,8 @@ namespace kernel_selector {
         k.EnableBatching();
         k.EnableInt8Quantization();
         k.EnableOutputCalibration();
+        k.EnableEltwiseStride();
+        k.EnableEltwiseBroadcast();
         return k;
     }
 
@@ -56,7 +58,8 @@ namespace kernel_selector {
             if (params.inputs[i].GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
                 return false;
         }
-        if (params.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32)
+        if (params.output.GetLayout() == DataLayout::fs_bs_yx_bsv4_fsv32 ||
+            params.output.GetLayout() == DataLayout::b_fs_yx_fsv4)
             return false;
 
         return true;
@@ -66,4 +69,4 @@ namespace kernel_selector {
     {
         return GetCommonKernelsData(params, options);
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
index c2ccf054d..4f89ba43c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_ref.h
@@ -27,9 +27,9 @@ namespace kernel_selector
         virtual ~EltwiseKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
 
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
index cf7565216..1f0e01e28 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_selector.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 #include "eltwise_kernel_ref.h"
 #include "eltwise_kernel_vload8.h"
 #include "eltwise_kernel_fs_bs_yx_bsv4_fsv32.h"
+#include "eltwise_kernel_b_fs_yx_fsv4.h"
 
 namespace kernel_selector 
 {
@@ -26,6 +27,7 @@ namespace kernel_selector
         Attach<EltwiseKernelRef>();
         Attach<EltwiseKernel_vload8>();
         Attach<EltwiseKernel_fs_bs_yx_bsv4_fsv32>();
+        Attach<EltwiseKernel_b_fs_yx_fsv4>();
     }
 
     KernelsData eltwise_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
index 5ceb75084..cd5285e96 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.cpp
@@ -123,7 +123,7 @@ namespace kernel_selector {
         auto& kernel = kd.kernels[0];
         kernel.workGroups.global = { std::max(newParams.inputs[0].LogicalSize()/8, (size_t)1), 1, 1 };
         kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
         kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
 
         kd.estimatedTime = FORCE_PRIORITY_8;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.h
index 8f716ae98..a369b22bc 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/eltwise/eltwise_kernel_vload8.h
@@ -27,10 +27,10 @@ namespace kernel_selector
         virtual ~EltwiseKernel_vload8() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual bool Validate(const Params& p, const optional_params& o) const override;
         virtual JitConstants GetJitConstants(const eltwise_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
index f126daa94..57091fbd7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.cpp
@@ -40,6 +40,7 @@ namespace kernel_selector
 		k.EnableTensorOffset();
 		k.EnableTensorPitches();
 		k.EnableBatching();
+        k.EnableNonBiasTerm();
 		return k;
 	}
 
@@ -58,7 +59,7 @@ namespace kernel_selector
 	EmbedKernelRef::DispatchData EmbedKernelRef::SetDefault(const embed_params& params) const
 	{
 		DispatchData kd;
-		std::vector<size_t> global = { params.inputs[0].Y().v , params.weights.OFM().v, params.inputs[0].Batch().v };
+		std::vector<size_t> global = { params.inputs[0].X().v , params.weights.OFM().v, params.inputs[0].Batch().v };
 		std::vector<size_t> local = GetOptimalLocalWorkGroupSizes(global);
 
 		kd.gws0 = global[0];
@@ -103,7 +104,7 @@ namespace kernel_selector
 
 		auto& kernel = kd.kernels[0];
 
-		FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !newParams.bias.empty());
+		FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, DEFAULT, true, !newParams.bias.empty());
 
 		kd.estimatedTime = runInfo.effiency;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.h
index 6ff98b037..2df84464e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_kernel_ref.h
@@ -36,10 +36,10 @@ namespace kernel_selector
         {
         };
 
-        virtual ParamsKey GetSupportedKey() const override;
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
         virtual JitConstants GetJitConstants(const embed_params& params) const;
         virtual DispatchData SetDefault(const embed_params& params) const;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_params.h
index bb2a10990..f4b6b4f7c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_params.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/embed/embed_params.h
@@ -17,6 +17,7 @@
 #pragma once
 
 #include "weight_bias_params.h"
+#include <string>
 
 namespace kernel_selector
 {
@@ -28,6 +29,22 @@ namespace kernel_selector
     {
 		embed_params() : weight_bias_params(KernelType::EMBED) {}
 
+
+        std::string to_string() const
+        {
+            std::stringstream s;
+
+            s << base_params::to_string() << "_";
+            if (bias.empty())
+            {
+                s << "no_bias" << "_";
+            }
+            else
+            {
+                s << "bias_" << bias[0].PhysicalSize() << "_";
+            }
+            return s.str();
+        }
         virtual ParamsKey GetParamsKey() const
         {
             return weight_bias_params::GetParamsKey();
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.cpp
index baed45d45..24cc0bac0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_block_kernel_base.cpp
@@ -15,8 +15,6 @@
 */
 
 #include "fully_connected_block_kernel_base.h"
-#include "kernel_selector_utils.h"
-#include "common_tools.h"
 
 namespace kernel_selector 
 {
@@ -32,4 +30,5 @@ namespace kernel_selector
 
         return cldnnJit;
     }
+
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_MMAD.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_MMAD.cpp
index fd2f61782..dd07347f5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_MMAD.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_MMAD.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_MMAD.h"
-#include "kernel_selector_utils.h"
  
 namespace kernel_selector 
 {
@@ -39,7 +38,7 @@ namespace kernel_selector
         return k;
     }
 
-    std::unique_ptr<FullyConnectedKernelMMAD::Parent::DispatchData> FullyConnectedKernelMMAD::SetDefault(const fully_connected_params& params) const
+    FullyConnectedKernelMMAD::DispatchData FullyConnectedKernelMMAD::SetDefault(const fully_connected_params& params, int) const
     {
         auto runInfo = Parent::SetDefault(params);
         
@@ -47,15 +46,15 @@ namespace kernel_selector
         const auto of_maps = params.output.Feature().v;
         const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
 
-        runInfo->gws0 = 1;
-        runInfo->gws1 = 1;
-        runInfo->gws2 = of_threads_per_batch * params.output.Batch().v;
+        runInfo.gws0 = 1;
+        runInfo.gws1 = 1;
+        runInfo.gws2 = of_threads_per_batch * params.output.Batch().v;
 
-        runInfo->lws0 = 1;
-        runInfo->lws1 = 1;
-        runInfo->lws2 = sub_group_size;
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = sub_group_size;
 
-        return std::move(runInfo);
+        return runInfo;
     }
 
     JitConstants FullyConnectedKernelMMAD::GetJitConstants(const fully_connected_params& params, const DispatchData& runInfo) const
@@ -74,8 +73,17 @@ namespace kernel_selector
 
     KernelsData FullyConnectedKernelMMAD::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options, DataLayout::byxf_af32,
-        { WeightsLayout::os_is_yx_isa8_osv8_isv4 }
-        );
+
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, options, DataLayout::byxf_af32,
+                { WeightsLayout::os_is_yx_isa8_osv8_isv4 }, DONT_USE_IF_HAVE_SOMETHING_ELSE, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+        return res;
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_MMAD.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_MMAD.h
index 048ed238b..5004c4032 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_MMAD.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_MMAD.h
@@ -28,10 +28,10 @@ namespace kernel_selector {
         FullyConnectedKernelMMAD() : Parent("fully_connected_gpu_MMAD") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
-        
+
     protected:
+        ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
-        std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& params) const override;
+        DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
index 20e6e8dca..9b4cbb73f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.cpp
@@ -47,27 +47,27 @@ namespace kernel_selector
         return jit;
     }
 
-    std::unique_ptr<FullyConnectedKernelBase::DispatchData> FullyConnectedKernelBase::SetDefault(const fully_connected_params& params) const
+    FullyConnectedKernelBase::DispatchData FullyConnectedKernelBase::SetDefault(const fully_connected_params& params, int) const
     {
-        std::unique_ptr<DispatchData> dispatchData = std::unique_ptr<DispatchData>(new DispatchData());
-        dispatchData->fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+        DispatchData dispatchData;
+        dispatchData.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
 
         // Determine global work sizes.
-        dispatchData->gws0 = params.output.LogicalSize();
-        dispatchData->gws1 = dispatchData->gws2 = 1;
+        dispatchData.gws0 = params.output.LogicalSize();
+        dispatchData.gws1 = dispatchData.gws2 = 1;
 
         // Find largest positive local work size that is divider for global work size.
-        dispatchData->lws0 = std::min(std::max(dispatchData->gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
-        while (dispatchData->gws0 % dispatchData->lws0 != 0)
+        dispatchData.lws0 = std::min(std::max(dispatchData.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
+        while (dispatchData.gws0 % dispatchData.lws0 != 0)
         {
-            --dispatchData->lws0;
+            --dispatchData.lws0;
         }
-        dispatchData->lws1 = dispatchData->lws2 = 1;
+        dispatchData.lws1 = dispatchData.lws2 = 1;
 
-        return std::move(dispatchData);
+        return dispatchData;
     }
 
-    KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time) const
+    KernelsData FullyConnectedKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time, const std::string exeMode, int autoTuneIndex) const
     {
         if (!Validate(params, options) ||
             wl.empty())
@@ -117,15 +117,31 @@ namespace kernel_selector
         
         auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
 
-        const std::unique_ptr<DispatchData> runInfo = SetDefault(newParams);
-        auto cldnn_jit = GetJitConstants(newParams, *runInfo.get());
+        const DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
+        auto cldnn_jit = GetJitConstants(newParams, runInfo);
         std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, *runInfo.get(), params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, exeMode, true, !orgParams.bias.empty(), 1, newParams.int8_quantization, newParams.output_calibration);
 
         kd.estimatedTime = estimated_time;
-        kd.autoTuneIndex = -1;
+        kd.autoTuneIndex = autoTuneIndex;
         return{ kd };
     }
+
+    std::string FullyConnectedKernelBase::GetAutoTuneOptions(int autoTuneIndex) const
+    {
+        if ((autoTuneIndex >= 0) && (autoTuneIndex < (int)autoTuneOptions.size()))
+        {
+            return autoTuneOptions[autoTuneIndex];
+        }
+
+        return DEFAULT;
+}
+
+    KernelsData FullyConnectedKernelBase::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time, const int autoTuneIndex) const
+    {
+        return GetCommonKernelsData(params, options, dl, wl, estimated_time, GetAutoTuneOptions(autoTuneIndex), autoTuneIndex);
+    }
+
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h
index d7d47e63c..a4f32c27d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_base.h
@@ -31,12 +31,29 @@ namespace kernel_selector
         virtual ~FullyConnectedKernelBase() {}
 
         struct DispatchData : public CommonDispatchData
-        {};
+        {
+            uint32_t    unit_byte_size;
+            const char* chunk_type;
+            uint32_t    chunk_byte_size;
+            uint32_t    units_per_chunk;
+            uint32_t    bytes_per_sg_read;
+            uint32_t    units_per_sg_read;
+            uint32_t    responses_per_sg_exec;
+            uint32_t    in_chunk_prefetch_size;
+            uint32_t    filter_chunk_prefetch_size;
+
+            uint32_t    last_rg_size;
+            uint32_t    rg_count;
+        };
     
+        std::string GetAutoTuneOptions(int autoTuneIndex) const;
+        std::vector<std::string> autoTuneOptions = { DEFAULT, NO_PRERA_SCH, AGE_BASED };
+        virtual KernelsData GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time = DONT_USE_IF_HAVE_SOMETHING_ELSE, int autoTuneIndex = -1) const ;
+
     protected:
         virtual JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const;
-        virtual std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& params) const;
-        KernelsData GetCommonKernelsData(const Params& params, const optional_params& optParams, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time = DONT_USE_IF_HAVE_SOMETHING_ELSE) const;
+        virtual DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const;
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params& optParams, DataLayout dl, std::vector<WeightsLayout> wl, float estimated_time = DONT_USE_IF_HAVE_SOMETHING_ELSE, const std::string exeMode = DEFAULT, int autoTuneIndex = -1) const;
 
         bool Validate(const Params& p, const optional_params&) const override
         {
@@ -48,4 +65,4 @@ namespace kernel_selector
             return true;
         }
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp
index 61d5edc6d..8b762f96d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_bf_io_gemm.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector {
 
@@ -38,9 +37,9 @@ namespace kernel_selector {
         return k;
     }
 
-    std::unique_ptr<FullyConnected_bf_io_GEMM::Parent::DispatchData> FullyConnected_bf_io_GEMM::SetDefault(const fully_connected_params& params) const
+    FullyConnected_bf_io_GEMM::DispatchData FullyConnected_bf_io_GEMM::SetDefault(const fully_connected_params& params, int autoTuneIndex) const
     {
-        auto runInfo = Parent::SetDefault(params);
+        auto runInfo = Parent::SetDefault(params, autoTuneIndex);
 
         const uint32_t localWorkSizeX = 64;
         const uint32_t globalWorkSizeX = localWorkSizeX;
@@ -48,17 +47,17 @@ namespace kernel_selector {
         std::vector<size_t> global = { globalWorkSizeX, params.output.Feature().v, params.output.Batch().v };
         std::vector<size_t> local = { localWorkSizeX, 1, 1 };
 
-        runInfo->gws0 = global[0];
-        runInfo->gws1 = global[1];
-        runInfo->gws2 = 1;
+        runInfo.gws0 = global[0];
+        runInfo.gws1 = global[1];
+        runInfo.gws2 = 1;
 
-        runInfo->lws0 = local[0];
-        runInfo->lws1 = local[1];
-        runInfo->lws2 = 1;
+        runInfo.lws0 = local[0];
+        runInfo.lws1 = local[1];
+        runInfo.lws2 = 1;
 
-        runInfo->effiency = FORCE_PRIORITY_6;
+        runInfo.effiency = FORCE_PRIORITY_6;
 
-        return std::move(runInfo);
+        return runInfo;
     }
 
     JitConstants FullyConnected_bf_io_GEMM::GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const
@@ -89,6 +88,16 @@ namespace kernel_selector {
 
     KernelsData FullyConnected_bf_io_GEMM::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options, DataLayout::bf, { WeightsLayout::oiyx }, FORCE_PRIORITY_6);
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, options, DataLayout::bf, { WeightsLayout::oiyx }, FORCE_PRIORITY_6, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+
+        return res;
     }
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h
index 80b799b04..fa56bb3d6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_gemm.h
@@ -17,9 +17,9 @@
 #pragma once
 
 #include "fully_connected_kernel_base.h"
- 
+
 namespace kernel_selector {
-    
+
     class FullyConnected_bf_io_GEMM : public FullyConnectedKernelBase
     {
     public:
@@ -27,10 +27,10 @@ namespace kernel_selector {
         FullyConnected_bf_io_GEMM() : Parent("fully_connected_gpu_bf_io_gemm") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
 
     protected:
-        std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& params) const override;
+        ParamsKey GetSupportedKey() const override;
+        DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
         JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp
index b19a92390..383e1b5fd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_bf_io_input_spatial.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -36,18 +35,18 @@ namespace kernel_selector
         return k;
     }
 
-    std::unique_ptr<FullyConnected_bf_io_input_spatial::DispatchData> FullyConnected_bf_io_input_spatial::SetDefault(const fully_connected_params& arg) const
+    FullyConnected_bf_io_input_spatial::DispatchData FullyConnected_bf_io_input_spatial::SetDefault(const fully_connected_params& arg, int ) const
     {
         auto kd = FullyConnectedKernelBase::SetDefault(arg);
 
-        kd->gws0 = Align(arg.output.LogicalSize() / arg.inputs[0].Batch().v, 16);
-        kd->gws1 = arg.inputs[0].Batch().v;
-        kd->gws2 = 1;
-        kd->lws0 = 16;
-        kd->lws1 = 1;
-        kd->lws2 = 1;
+        kd.gws0 = Align(arg.output.LogicalSize() / arg.inputs[0].Batch().v, 16);
+        kd.gws1 = arg.inputs[0].Batch().v;
+        kd.gws2 = 1;
+        kd.lws0 = 16;
+        kd.lws1 = 1;
+        kd.lws2 = 1;
 
-        kd->effiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+        kd.effiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
 
         const auto &input = arg.inputs[0];
         const auto &output = arg.output;
@@ -56,11 +55,11 @@ namespace kernel_selector
         {
             if ((input.LogicalSize() / output.Batch().v >= 9216) && (output.Feature().v >= 4096))
             {
-                kd->effiency = FORCE_PRIORITY_1;
+                kd.effiency = FORCE_PRIORITY_1;
             }
         }
 
-        return std::move(kd);
+        return kd;
     }
 
     bool FullyConnected_bf_io_input_spatial::Validate(const Params& p, const optional_params& o) const
@@ -85,21 +84,42 @@ namespace kernel_selector
 
     KernelsData FullyConnected_bf_io_input_spatial::GetKernelsData(const Params& params, const optional_params& optParams) const
     {
+        KernelsData res = {};
         const auto& orgParams = static_cast<const fully_connected_params&>(params);
 
         const auto& input = orgParams.inputs[0];
         const auto& output = orgParams.output;
 
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+
+            KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::bf, { WeightsLayout::io }, DONT_USE_IF_HAVE_SOMETHING_ELSE, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+
         if (input.GetLayout() == DataLayout::bfyx)
         {
             if (input.Batch().v == 1 && output.Batch().v == 1)
             {
                 if ((input.LogicalSize() / output.Batch().v >= 9216) && (output.Feature().v >= 4096))
                 {
-                    return GetCommonKernelsData(params, optParams, DataLayout::bf, { WeightsLayout::io }, FORCE_PRIORITY_1);
+                    for (size_t i = 0; i < autoTuneOptions.size(); i++)
+                    {
+                        KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::bf, { WeightsLayout::io }, FORCE_PRIORITY_1, (int)i+3);
+                        if (!kd.empty())
+                        {
+                            res.emplace_back(kd[0]);
+                        }
+                    }
                 }
             }
         }
-        return GetCommonKernelsData(params, optParams, DataLayout::bf, { WeightsLayout::io });
+       
+
+
+        return res;
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.h
index 5c6fddbd2..9d81bd846 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_input_spatial.h
@@ -26,10 +26,10 @@ namespace kernel_selector {
         FullyConnected_bf_io_input_spatial() : FullyConnectedKernelBase("fully_connected_gpu_bf_io_input_spatial") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
 
     protected:
+        ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
-        std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& arg) const override;
+        DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp
index 0c4efe2e0..3ede9224c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_bf_io_ref.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -38,6 +37,17 @@ namespace kernel_selector
 
     KernelsData FullyConnected_bf_io_ref::GetKernelsData(const Params& params, const optional_params& optParams) const
     {
-        return GetCommonKernelsData(params, optParams, DataLayout::bf, { WeightsLayout::io });
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::bf, { WeightsLayout::io },
+                DONT_USE_IF_HAVE_SOMETHING_ELSE, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+
+        return res;
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h
index 8d708fda8..2a089388e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bf_io_ref.h
@@ -26,6 +26,8 @@ namespace kernel_selector {
         FullyConnected_bf_io_ref() : FullyConnectedKernelBase("fully_connected_gpu_bf_io_ref") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp
index 0c50aeca5..4c4ddd2eb 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.cpp
@@ -46,27 +46,38 @@ namespace kernel_selector
         return k;
     }
 
-    std::unique_ptr<FullyConnected_bfyx_Ref::Parent::DispatchData> FullyConnected_bfyx_Ref::SetDefault(const fully_connected_params& params) const
+    FullyConnected_bfyx_Ref::DispatchData FullyConnected_bfyx_Ref::SetDefault(const fully_connected_params& params, int ) const
     {
         auto runInfo = Parent::SetDefault(params);
         
         std::vector<size_t> global = { params.output.Feature().v, params.output.Batch().v };
         std::vector<size_t> local  = GetOptimalLocalWorkGroupSizes(global);
 
-        runInfo->gws0 = global[0];
-        runInfo->gws1 = global[1];
-        runInfo->gws2 = 1;
+        runInfo.gws0 = global[0];
+        runInfo.gws1 = global[1];
+        runInfo.gws2 = 1;
 
-        runInfo->lws0 = local[0];
-        runInfo->lws1 = local[1];
-        runInfo->lws2 = 1;
+        runInfo.lws0 = local[0];
+        runInfo.lws1 = local[1];
+        runInfo.lws2 = 1;
 
-        return std::move(runInfo);
+        return runInfo;
     }
 
     KernelsData FullyConnected_bfyx_Ref::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options, DataLayout::bfyx,
-        { WeightsLayout::oiyx, WeightsLayout::oyxi, WeightsLayout::iyxo, WeightsLayout::yxio });
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, options, DataLayout::bfyx,
+                { WeightsLayout::oiyx, WeightsLayout::oyxi, WeightsLayout::iyxo, WeightsLayout::yxio },
+                DONT_USE_IF_HAVE_SOMETHING_ELSE, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+
+        return res;
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h
index 8ea52d5d9..65dc61104 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bfyx_ref.h
@@ -17,9 +17,9 @@
 #pragma once
 
 #include "fully_connected_kernel_base.h"
- 
+
 namespace kernel_selector {
-    
+
     class FullyConnected_bfyx_Ref : public FullyConnectedKernelBase
     {
     public:
@@ -28,9 +28,9 @@ namespace kernel_selector {
         FullyConnected_bfyx_Ref() : Parent("fully_connected_gpu_bfyx_ref") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
-        
+
     protected:
-        std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& params) const override;
+        ParamsKey GetSupportedKey() const override;
+        DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp
index a7b77e093..08562dc95 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_bs_f_bsv16_af8.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -36,17 +35,17 @@ namespace kernel_selector
         return k;
     }
 
-    std::unique_ptr<FullyConnected_bs_f_bsv16_af8::DispatchData> FullyConnected_bs_f_bsv16_af8::SetDefault(const fully_connected_params& arg) const
+    FullyConnected_bs_f_bsv16_af8::DispatchData FullyConnected_bs_f_bsv16_af8::SetDefault(const fully_connected_params& arg, int ) const
     {
         auto kd = FullyConnectedBlockKernelBase::SetDefault(arg);
 
         size_t groups_per_batches = GetLocalGroupsSize(arg);
-        kd->gws0 = Align(arg.output.LogicalSize() / (GetBatchesPerWorkItem(arg) * groups_per_batches), 16);
-        kd->gws1 = groups_per_batches;
-        kd->lws0 = 16;
-        kd->lws1 = 1;
+        kd.gws0 = Align(arg.output.LogicalSize() / (GetBatchesPerWorkItem(arg) * groups_per_batches), 16);
+        kd.gws1 = groups_per_batches;
+        kd.lws0 = 16;
+        kd.lws1 = 1;
 
-        return std::move(kd);
+        return kd;
     }
     
     static bool check_input_layout(const DataTensor& t)
@@ -86,6 +85,16 @@ namespace kernel_selector
 
     KernelsData FullyConnected_bs_f_bsv16_af8::GetKernelsData(const Params& params, const optional_params& optParams) const
     {   
-        return GetCommonKernelsData(params, optParams, DataLayout::bs_f_bsv16__af8, { WeightsLayout::os_i_osv16__ai8 }, FORCE_PRIORITY_2);
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::bs_f_bsv16__af8, { WeightsLayout::os_i_osv16__ai8 }, FORCE_PRIORITY_2, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+
+        return res;
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.h
index 63a507534..57bdef554 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_af8.h
@@ -26,10 +26,10 @@ namespace kernel_selector {
         FullyConnected_bs_f_bsv16_af8() : FullyConnectedBlockKernelBase("fully_connected_gpu_bs_f_bsv16_af8_vload") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
-    
+
     protected:
+        ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
-        std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& arg) const override;
+        DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
index b98b528a8..eec40ebd7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_bs_f_bsv16_b1.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -58,13 +57,13 @@ namespace kernel_selector
         return cldnn_jit;
     }
 
-    std::unique_ptr<FullyConnected_bs_f_bsv16_b1::FullyConnectedKernelBase::DispatchData> FullyConnected_bs_f_bsv16_b1::SetDefault(const fully_connected_params& arg) const
+    FullyConnected_bs_f_bsv16_b1::DispatchData FullyConnected_bs_f_bsv16_b1::SetDefault(const fully_connected_params& arg, int ) const
     {
-        auto run_info = std::unique_ptr<DispatchData>(new DispatchData(*FullyConnectedKernelBase::SetDefault(arg)));
+        DispatchData run_info = FullyConnectedKernelBase::SetDefault(arg);
 
         // Properties of chunk and unit.
         const     char*    chunk_type           = "uint";
-        const     uint32_t unit_byte_size       = run_info->fp16UnitUsed ? sizeof(short) : sizeof(float);
+        const     uint32_t unit_byte_size       = run_info.fp16UnitUsed ? sizeof(short) : sizeof(float);
         constexpr uint32_t chunk_byte_size      = sizeof(uint32_t);
         constexpr uint32_t sub_group_size       = 16;
         const     uint32_t units_per_chunk      = chunk_byte_size / unit_byte_size;
@@ -76,28 +75,37 @@ namespace kernel_selector
         const auto response_size = arg.output.Feature().v;
         auto rg_count = CeilDiv(response_size, responses_per_sg_exec);
 
-        run_info->lws0 = sub_group_size;
+        run_info.lws0 = sub_group_size;
         // Number of work items needed to process all response groups.
-        run_info->gws0 = rg_count * sub_group_size;
-        run_info->lws1 = run_info->lws2 = 1;
-        run_info->gws1 = run_info->gws2 = 1;
+        run_info.gws0 = rg_count * sub_group_size;
+        run_info.lws1 = run_info.lws2 = 1;
+        run_info.gws1 = run_info.gws2 = 1;
 
-        auto& kd = run_info;
-        kd->unit_byte_size              = unit_byte_size;
-        kd->chunk_type                  = chunk_type;
-        kd->chunk_byte_size             = chunk_byte_size;
-        kd->units_per_chunk             = units_per_chunk;
-        kd->bytes_per_sg_read           = sub_group_size * chunk_byte_size;
-        kd->units_per_sg_read           = units_per_sg_read;
-        kd->responses_per_sg_exec       = responses_per_sg_exec;
-        kd->in_chunk_prefetch_size      = 2;
-        kd->filter_chunk_prefetch_size  = responses_per_sg_exec;
+        run_info.unit_byte_size              = unit_byte_size;
+        run_info.chunk_type                  = chunk_type;
+        run_info.chunk_byte_size             = chunk_byte_size;
+        run_info.units_per_chunk             = units_per_chunk;
+        run_info.bytes_per_sg_read           = sub_group_size * chunk_byte_size;
+        run_info.units_per_sg_read           = units_per_sg_read;
+        run_info.responses_per_sg_exec       = responses_per_sg_exec;
+        run_info.in_chunk_prefetch_size      = 2;
+        run_info.filter_chunk_prefetch_size  = responses_per_sg_exec;
 
-        return std::move(run_info);
+        return run_info;
     }
 
     KernelsData FullyConnected_bs_f_bsv16_b1::GetKernelsData(const Params& params, const optional_params& optParams) const
     {
-        return GetCommonKernelsData(params, optParams, DataLayout::bf, {WeightsLayout::os_i_osv16}, FORCE_PRIORITY_5);
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::bf, { WeightsLayout::os_i_osv16 }, FORCE_PRIORITY_5, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+    }
+}
+
+        return res;
     }
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h
index d440e60a3..4d453dc65 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv16_b1.h
@@ -26,29 +26,10 @@ namespace kernel_selector {
         FullyConnected_bs_f_bsv16_b1() : FullyConnectedKernelBase("fully_connected_gpu_bs_f_bsv16_b1") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
-    
-    protected:
-        struct DispatchData : public FullyConnectedKernelBase::DispatchData
-        {
-            DispatchData(const FullyConnectedKernelBase::DispatchData& base_dispatch_data)
-                : FullyConnectedKernelBase::DispatchData(base_dispatch_data),
-                unit_byte_size(0), chunk_type(nullptr), chunk_byte_size(0), units_per_chunk(0), bytes_per_sg_read(0),
-                units_per_sg_read(0), responses_per_sg_exec(0), in_chunk_prefetch_size(0), filter_chunk_prefetch_size(0)
-            {}
-
-            uint32_t    unit_byte_size;
-            const char* chunk_type;
-            uint32_t    chunk_byte_size;
-            uint32_t    units_per_chunk;
-            uint32_t    bytes_per_sg_read;
-            uint32_t    units_per_sg_read;
-            uint32_t    responses_per_sg_exec;
-            uint32_t    in_chunk_prefetch_size;
-            uint32_t    filter_chunk_prefetch_size;
-        };
 
+    protected:
+        ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const fully_connected_params& params, const FullyConnectedKernelBase::DispatchData& kd) const override;
-        std::unique_ptr<FullyConnectedKernelBase::DispatchData> SetDefault(const fully_connected_params& arg) const override;
+        DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp
index 6b8fbfa85..234a9414b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_bs_f_bsv8_af8.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -38,32 +37,32 @@ namespace kernel_selector
         return k;
     }
 
-    std::unique_ptr<FullyConnected_bs_f_bsv8_af8::DispatchData> FullyConnected_bs_f_bsv8_af8::SetDefault(const fully_connected_params& arg) const
+    FullyConnected_bs_f_bsv8_af8::DispatchData FullyConnected_bs_f_bsv8_af8::SetDefault(const fully_connected_params& arg, int ) const
     {
         auto kd = FullyConnectedBlockKernelBase::SetDefault(arg);
 
         size_t groups_per_batches = GetLocalGroupsSize(arg);
-        kd->gws0 = Align(arg.output.LogicalSize() / (GetNeuronsPerWorkItem(arg) * GetBatchesPerWorkItem(arg) * groups_per_batches), 8);
-        kd->gws1 = groups_per_batches;
-        kd->lws0 = 8;
-        kd->lws1 = 1;
+        kd.gws0 = Align(arg.output.LogicalSize() / (GetNeuronsPerWorkItem(arg) * GetBatchesPerWorkItem(arg) * groups_per_batches), 8);
+        kd.gws1 = groups_per_batches;
+        kd.lws0 = 8;
+        kd.lws1 = 1;
 
-        return std::move(kd);
+        return kd;
     }
     
     static bool check_input_layout(const DataTensor& t)
     {
         bool b16_layout = false;
         b16_layout |= t.GetLayout() == DataLayout::bs_f_bsv8__af8;
-        b16_layout |= DataTensor::Channelndex(t.GetLayout(), Tensor::DataChannelName::BATCH) == 0 && (t.Batch().v == 8); // TODO - check f alignment to 8
+        b16_layout |= DataTensor::Channelndex(t.GetLayout(), Tensor::DataChannelName::BATCH) == 0 && (t.Batch().v == 8);
         return b16_layout;
     }
 
     static bool check_output_layout(const DataTensor& t)
     {
         bool b16_layout = false;
-        b16_layout |= (t.GetLayout() == DataLayout::fb);
-        b16_layout |= (t.GetLayout() == DataLayout::bs_f_bsv8__af8) && (t.Batch().v == 8);
+        b16_layout |= (t.GetLayout() == DataLayout::fb) && (t.Batch().v == 8);
+        b16_layout |= (t.GetLayout() == DataLayout::bs_f_bsv8__af8);
         return b16_layout;
     }
 
@@ -85,11 +84,14 @@ namespace kernel_selector
         const bool bProperBatch =
             params.inputs[0].Batch().v >= 8 &&
             params.inputs[0].Batch().v % 8 == 0;
+        const bool bProperFeature =
+            params.inputs[0].Feature().v >= 8 &&
+            params.inputs[0].Feature().v % 8 == 0;
         const bool bProperInput = check_input_layout(params.inputs[0]);
         const bool bProperOutput = check_output_layout(params.output);
         const bool bSupportedLayout = optParams.allowInputReordering || bProperInput;
 
-        if (!bProperBatch || !bSupportedLayout || !bProperOutput)
+        if (!bProperBatch || !bProperFeature || !bSupportedLayout || !bProperOutput)
         {
             return false;
         }
@@ -99,6 +101,16 @@ namespace kernel_selector
 
     KernelsData FullyConnected_bs_f_bsv8_af8::GetKernelsData(const Params& params, const optional_params& optParams) const
     {
-        return GetCommonKernelsData(params, optParams, DataLayout::bs_f_bsv8__af8, { WeightsLayout::os_i_osv8__ai8 }, FORCE_PRIORITY_4);
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::bs_f_bsv8__af8, { WeightsLayout::os_i_osv8__ai8 }, FORCE_PRIORITY_4, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+
+        return res;
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.h
index 666df90ca..13799e24b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_bs_f_bsv8_af8.h
@@ -26,10 +26,10 @@ namespace kernel_selector {
         FullyConnected_bs_f_bsv8_af8() : FullyConnectedBlockKernelBase("fully_connected_gpu_bs_f_bsv8_af8_vload") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
-    
+
     protected:
+        ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
-        std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& arg) const override;
+        DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp
index 1a3e98d9e..839f94058 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_fb_io_b8_f8.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -37,19 +36,19 @@ namespace kernel_selector
         return k;
     }
 
-    std::unique_ptr<FullyConnected_fb_io_b8_f8::DispatchData> FullyConnected_fb_io_b8_f8::SetDefault(const fully_connected_params& arg) const
+    FullyConnected_fb_io_b8_f8::DispatchData FullyConnected_fb_io_b8_f8::SetDefault(const fully_connected_params& arg, int ) const
     {
         auto kd = FullyConnectedBlockKernelBase::SetDefault(arg);
 
         const auto& output = arg.output;
         
         size_t groups_per_batches = GetLocalGroupsSize(arg);
-        kd->gws0 = output.LogicalSize() / (GetNeuronsPerWorkItem(arg) * GetBatchesPerWorkItem(arg) * groups_per_batches);
-        kd->gws1 = groups_per_batches;
-        kd->lws0 = 8;
-        kd->lws1 = 1;
+        kd.gws0 = Align(output.LogicalSize() / (GetNeuronsPerWorkItem(arg) * GetBatchesPerWorkItem(arg) * groups_per_batches), 8);
+        kd.gws1 = groups_per_batches;
+        kd.lws0 = 8;
+        kd.lws1 = 1;
 
-        return std::move(kd);
+        return kd;
     }
 
     bool FullyConnected_fb_io_b8_f8::Validate(const Params& p, const optional_params& o) const
@@ -65,11 +64,17 @@ namespace kernel_selector
         const auto batches = output.Batch().v;
         const auto x_size = output.LogicalSize() / batches;
 
+        const auto& input = params.inputs[0];
+        const auto input_x_size = input.LogicalSize() / input.Batch().v;
+        const bool proper_input_aligment = (input_x_size % 8) == 0;
+        const bool proper_output_aligment = (output.LogicalSize() / (GetNeuronsPerWorkItem(params) * GetBatchesPerWorkItem(params) * GetLocalGroupsSize(params)) % 8) == 0;
         const bool bSupportedBatch = (batches % 8) == 0;
         const bool bSupportedFeature = (x_size % 8) == 0;
 
         if (!bSupportedBatch ||
-            !bSupportedFeature)
+            !bSupportedFeature ||
+            !proper_input_aligment ||
+            !proper_output_aligment)
         {
             return false;
         }
@@ -80,13 +85,22 @@ namespace kernel_selector
     KernelsData FullyConnected_fb_io_b8_f8::GetKernelsData(const Params& params, const optional_params& optParams) const
     {
         assert(params.GetType() == KernelType::FULLY_CONNECTED);
-
+        KernelsData res = {};
         const auto& orgParams = static_cast<const fully_connected_params&>(params);
 
         float estimated_time =
             orgParams.inputs[0].GetDType() == Datatype::F16 && orgParams.output.Batch().v >= 16 ?
             FORCE_PRIORITY_3 : FORCE_PRIORITY_5;
         
-        return GetCommonKernelsData(params, optParams, DataLayout::fb, { WeightsLayout::io }, estimated_time);
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::fb, { WeightsLayout::io }, estimated_time, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+
+        return res;
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.h
index d380862c1..2bb0117f4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_b8_f8.h
@@ -26,10 +26,10 @@ namespace kernel_selector {
         FullyConnected_fb_io_b8_f8() : FullyConnectedBlockKernelBase("fully_connected_gpu_fb_io_b8_f8_vload") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
 
     protected:
+        ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
-        std::unique_ptr<FullyConnectedKernelBase::DispatchData> SetDefault(const fully_connected_params& arg) const override;
+        DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
index b32c8a54e..01a70613e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_fb_io_block.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -35,9 +34,10 @@ namespace kernel_selector
         return k;
     }
 
-    std::unique_ptr<FullyConnected_fb_io_block::FullyConnectedKernelBase::DispatchData> FullyConnected_fb_io_block::SetDefault(const fully_connected_params& arg) const
+
+    FullyConnected_fb_io_block::DispatchData FullyConnected_fb_io_block::SetDefault(const fully_connected_params& arg, int ) const
     {
-        auto kd = std::unique_ptr<DispatchData>(new DispatchData(*FullyConnectedKernelBase::SetDefault(arg)));
+        auto kd = FullyConnectedKernelBase::SetDefault(arg);
         const auto& output = arg.output;
         
         auto batch_size = output.Batch().v;
@@ -55,38 +55,37 @@ namespace kernel_selector
         // for at least one input data set from batch.
         auto rg_count = CeilDiv(response_size, units_per_sg_read);
 
-        kd->lws0 = sub_group_size;
+        kd.lws0 = sub_group_size;
         // Number of work items needed to process all response groups.
-        kd->gws0 = rg_count * sub_group_size;
-        kd->lws1 = 1;
-        kd->gws1 = batch_size / units_per_sg_read;
-
-        kd->unit_byte_size    = unit_byte_size;
-        kd->chunk_type        = chunk_type;
-        kd->chunk_byte_size   = chunk_byte_size;
-        kd->units_per_chunk   = units_per_chunk;
-        kd->bytes_per_sg_read = sub_group_size * chunk_byte_size;
-        kd->units_per_sg_read = units_per_sg_read;
-        kd->rg_count          = (uint32_t)rg_count;
-        kd->last_rg_size      = response_size % units_per_sg_read;
-        return std::move(kd);
+        kd.gws0 = rg_count * sub_group_size;
+        kd.lws1 = 1;
+        kd.gws1 = batch_size / units_per_sg_read;
+
+        kd.unit_byte_size    = unit_byte_size;
+        kd.chunk_type        = chunk_type;
+        kd.chunk_byte_size   = chunk_byte_size;
+        kd.units_per_chunk   = units_per_chunk;
+        kd.bytes_per_sg_read = sub_group_size * chunk_byte_size;
+        kd.units_per_sg_read = units_per_sg_read;
+        kd.rg_count          = (uint32_t)rg_count;
+        kd.last_rg_size      = response_size % units_per_sg_read;
+        return kd;
     }
 
     JitConstants FullyConnected_fb_io_block::GetJitConstants(const fully_connected_params& params, const FullyConnectedKernelBase::DispatchData& run_info) const
     {
-        auto &d = static_cast<const DispatchData&>(run_info);
         auto cldnn_jit = FullyConnectedKernelBase::GetJitConstants(params, run_info);
         cldnn_jit.AddConstants({
-            MakeJitConstant("SUB_GROUP_SIZE",        d.lws0),
-            MakeJitConstant("WORK_ITEMS_PER_BATCH",  d.gws1),
-            MakeJitConstant("UNIT_BYTE_SIZE",        d.unit_byte_size),
-            MakeJitConstant("CHUNK_TYPE",            d.chunk_type),
-            MakeJitConstant("CHUNK_BYTE_SIZE",       d.chunk_byte_size),
-            MakeJitConstant("UNITS_PER_CHUNK",       d.units_per_chunk),
-            MakeJitConstant("BYTES_PER_SG_READ",     d.bytes_per_sg_read),
-            MakeJitConstant("UNITS_PER_SG_READ",     d.units_per_sg_read),
-            MakeJitConstant("RG_COUNT",              d.rg_count),
-            MakeJitConstant("LAST_RG_SIZE",          d.last_rg_size),
+            MakeJitConstant("SUB_GROUP_SIZE",        run_info.lws0),
+            MakeJitConstant("WORK_ITEMS_PER_BATCH",  run_info.gws1),
+            MakeJitConstant("UNIT_BYTE_SIZE",        run_info.unit_byte_size),
+            MakeJitConstant("CHUNK_TYPE",            run_info.chunk_type),
+            MakeJitConstant("CHUNK_BYTE_SIZE",       run_info.chunk_byte_size),
+            MakeJitConstant("UNITS_PER_CHUNK",       run_info.units_per_chunk),
+            MakeJitConstant("BYTES_PER_SG_READ",     run_info.bytes_per_sg_read),
+            MakeJitConstant("UNITS_PER_SG_READ",     run_info.units_per_sg_read),
+            MakeJitConstant("RG_COUNT",              run_info.rg_count),
+            MakeJitConstant("LAST_RG_SIZE",          run_info.last_rg_size),
         });
         return cldnn_jit;
     }
@@ -144,6 +143,18 @@ namespace kernel_selector
         //       (fb == fyxb flatten fyx, not yxfb flatten yxf).
         //       the order of the add operation cause some numeric changes. in order to avoid them right now we use yxfb/oiyx instead.
         // return GetCommonKernelsData(params, optParams, DataLayout::fb, WeightsLayout::io, estimated_time);
-        return GetCommonKernelsData(params, optParams, DataLayout::yxfb, { WeightsLayout::yxio }, estimated_time);
-    }
+        //return GetCommonKernelsData(params, optParams, DataLayout::yxfb, { WeightsLayout::yxio }, estimated_time);
+
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::yxfb, { WeightsLayout::yxio }, estimated_time, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+    	}
+
+        return res;
+	}
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h
index c3c433c37..98ced4a69 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_block.h
@@ -26,29 +26,11 @@ namespace kernel_selector {
         FullyConnected_fb_io_block() : FullyConnectedKernelBase("fully_connected_gpu_fb_io_block_fp16") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
 
     protected:
-        struct DispatchData : public FullyConnectedKernelBase::DispatchData
-        {
-            DispatchData(const FullyConnectedKernelBase::DispatchData& base_dispatch_data)
-                : FullyConnectedKernelBase::DispatchData(base_dispatch_data),
-                unit_byte_size(0), chunk_type(nullptr), chunk_byte_size(0), units_per_chunk(0),
-                bytes_per_sg_read(0), units_per_sg_read(0), last_rg_size(0), rg_count(0)
-            {}
-
-            uint32_t    unit_byte_size;
-            const char *chunk_type;
-            uint32_t    chunk_byte_size;
-            uint32_t    units_per_chunk;
-            uint32_t    bytes_per_sg_read;
-            uint32_t    units_per_sg_read;
-            uint32_t    last_rg_size;
-            uint32_t    rg_count;
-        };
-
+        ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
         JitConstants GetJitConstants(const fully_connected_params& params, const FullyConnectedKernelBase::DispatchData& kd) const override;
-        std::unique_ptr<FullyConnectedKernelBase::DispatchData> SetDefault(const fully_connected_params& arg) const override;
+        DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp
index f91078a04..84e3c80f6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_fb_io_ref.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -42,7 +41,15 @@ namespace kernel_selector
         //       (fb == fyxb flatten fyx, not yxfb flatten yxf).
         //       the order of the add operation cause some numeric changes. in order to avoid them right now we use yxfb/oiyx instead.
         // return GetCommonKernelsData(params, optParams, DataLayout::fb, WeightsLayout::io, FORCE_PRIORITY_6);
-
-        return GetCommonKernelsData(params, optParams, DataLayout::yxfb, { WeightsLayout::yxio }, FORCE_PRIORITY_6);
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::yxfb, { WeightsLayout::yxio }, FORCE_PRIORITY_6, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+        return res;
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h
index 9d5e5b5c7..46ee639a9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_io_ref.h
@@ -26,6 +26,8 @@ namespace kernel_selector {
         FullyConnected_fb_io_ref() : FullyConnectedKernelBase("fully_connected_gpu_fb_io_ref") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp
index 5d1c8aa25..8232e5e34 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_fb_oi_b8_ref.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -35,17 +34,17 @@ namespace kernel_selector
         return k;
     }
 
-    std::unique_ptr<FullyConnected_fb_oi_b8_ref::DispatchData> FullyConnected_fb_oi_b8_ref::SetDefault(const fully_connected_params& arg) const
+    FullyConnected_fb_oi_b8_ref::DispatchData FullyConnected_fb_oi_b8_ref::SetDefault(const fully_connected_params& arg, int ) const
     {
         auto kd = FullyConnectedKernelBase::SetDefault(arg);
 
         const auto& output = arg.output;
-        kd->gws0 = output.Batch().v;
-        kd->gws1 = output.LogicalSize() / kd->gws0;
-        kd->lws0 = 8;
-        kd->lws1 = 1;
+        kd.gws0 = output.Batch().v;
+        kd.gws1 = output.LogicalSize() / kd.gws0;
+        kd.lws0 = 8;
+        kd.lws1 = 1;
 
-        return std::move(kd);
+        return kd;
     }
 
     bool FullyConnected_fb_oi_b8_ref::Validate(const Params& p, const optional_params& o) const
@@ -67,6 +66,15 @@ namespace kernel_selector
 
     KernelsData FullyConnected_fb_oi_b8_ref::GetKernelsData(const Params& params, const optional_params& optParams) const
     {
-        return GetCommonKernelsData(params, optParams, DataLayout::fb, { WeightsLayout::oi }, FORCE_PRIORITY_6);
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::fb, { WeightsLayout::oi }, FORCE_PRIORITY_6, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+        return res;
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.h
index 0c063e23b..f7a3785c6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_b8_ref.h
@@ -26,10 +26,10 @@ namespace kernel_selector {
         FullyConnected_fb_oi_b8_ref() : FullyConnectedKernelBase("fully_connected_gpu_fb_oi_b8_fp32_ref") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
 
     protected:
+        ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
-        std::unique_ptr<FullyConnectedKernelBase::DispatchData> SetDefault(const fully_connected_params& arg) const override;
+        DispatchData SetDefault(const fully_connected_params& arg, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp
index 6d167011e..8ace81254 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_fb_oi_ref.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -38,6 +37,15 @@ namespace kernel_selector
 
     KernelsData FullyConnected_fb_oi_ref::GetKernelsData(const Params& params, const optional_params& optParams) const
     {
-        return GetCommonKernelsData(params, optParams, DataLayout::fb, { WeightsLayout::oi });
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, optParams, DataLayout::fb, { WeightsLayout::oi }, DONT_USE_IF_HAVE_SOMETHING_ELSE, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+        return res;
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h
index 814ad60cb..4f74e77e7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_fb_oi_ref.h
@@ -26,6 +26,8 @@ namespace kernel_selector {
         FullyConnected_fb_oi_ref() : FullyConnectedKernelBase("fully_connected_gpu_fb_oi_ref") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp
new file mode 100644
index 000000000..28f60526c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.cpp
@@ -0,0 +1,116 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fully_connected_kernel_imad.h"
+
+// IMAD Fully_Connected primitive implementation.
+// Limitations are:
+// 1. Input=Fx1x1 with Filter=1x1
+// 2. No data padding
+
+namespace kernel_selector
+{
+    ParamsKey FullyConnectedKernelIMAD::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::UINT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::UINT8);
+        k.EnableInputWeightsType(WeightsType::INT8);
+        k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
+        k.EnableOutputLayout(DataLayout::bf);
+        k.EnableBiasPerOutput();
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnableInt8Quantization();
+        k.EnableOutputCalibration();
+        return k;
+    }
+
+    FullyConnectedKernelIMAD::Parent::DispatchData
+    FullyConnectedKernelIMAD::SetDefault(const fully_connected_params& params, int) const
+    {
+        const int simdSize = 16;
+
+        auto runInfo = Parent::SetDefault(params);
+
+        runInfo.gws0 = RoundUp(params.output.Feature().v, simdSize);
+        runInfo.gws1 = params.output.Batch().v;
+        runInfo.gws2 = 1;
+
+        runInfo.lws0 = simdSize;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 1;
+
+        return runInfo;
+    } // SetDefault
+
+    bool FullyConnectedKernelIMAD::Validate(const Params& params, const optional_params& options) const
+    {
+        if (!Parent::Validate(params, options)) {
+            return false;
+        }
+
+        const auto& newParams = static_cast<const fully_connected_params&>(params);
+        const auto& in = newParams.inputs[0];
+        const auto& weights = newParams.weights;
+
+        if ((in.X().v != 1) ||
+            (in.Y().v != 1) ||
+            (weights.X().v != 1) ||
+            (weights.Y().v != 1)) {
+            // Currently only Input=Fx1x1 with Filter=1x1 is supported
+            return false;
+        }
+        if ((in.X().pad.before != 0) ||
+            (in.X().pad.after != 0) ||
+            (in.Y().pad.before != 0) ||
+            (in.Y().pad.after != 0)) {
+            // Padding is not supported
+            return false;
+        }
+        if (in.Feature().v % (4 * 8)) {
+            // Algorith requires 4 bytes read as one int
+            // with specific weight format os_is_yx_osv16_isv4
+            // wich will read 8 elements per reading
+            return false;
+        }
+
+        return true;
+    } // Validate
+
+    KernelsData FullyConnectedKernelIMAD::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(
+                                params, options, DataLayout::b_fs_yx_fsv4,
+                                { WeightsLayout::os_is_yx_osv16_isv4 },
+                                FORCE_PRIORITY_1, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+        return res;
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h
new file mode 100644
index 000000000..e6c3bf8c1
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_imad.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "fully_connected_kernel_base.h"
+
+namespace kernel_selector {
+
+    class FullyConnectedKernelIMAD : public FullyConnectedKernelBase
+    {
+    public:
+        using Parent = FullyConnectedKernelBase;
+
+        FullyConnectedKernelIMAD() : Parent("fully_connected_gpu_imad") {}
+
+        KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        ParamsKey GetSupportedKey() const override;
+        virtual bool Validate(const Params& params, const optional_params& options) const override;
+        DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.cpp
index 78bc49785..b7942a929 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.cpp
@@ -40,30 +40,38 @@ namespace kernel_selector
         return k;
     }
 
-    std::unique_ptr<FullyConnected_image_tutorial::Parent::DispatchData> FullyConnected_image_tutorial::SetDefault(const fully_connected_params& params) const
+    FullyConnected_image_tutorial::DispatchData FullyConnected_image_tutorial::SetDefault(const fully_connected_params& params, int ) const
     {
         auto runInfo = Parent::SetDefault(params);
         
         std::vector<size_t> global = { params.output.Feature().v, params.output.Batch().v };
         std::vector<size_t> local  = GetOptimalLocalWorkGroupSizes(global);
 
-        runInfo->gws0 = global[0];
-        runInfo->gws1 = global[1];
-        runInfo->gws2 = 1;
+        runInfo.gws0 = global[0];
+        runInfo.gws1 = global[1];
+        runInfo.gws2 = 1;
 
-        runInfo->lws0 = local[0];
-        runInfo->lws1 = local[1];
-        runInfo->lws2 = 1;
+        runInfo.lws0 = local[0];
+        runInfo.lws1 = local[1];
+        runInfo.lws2 = 1;
 
-        runInfo->effiency = TUTORIAL_PRIORITY;
+        runInfo.effiency = TUTORIAL_PRIORITY;
 
-        return std::move(runInfo);
+        return runInfo;
     }
 
     KernelsData FullyConnected_image_tutorial::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options, DataLayout::bfyx,
-        { WeightsLayout::image_2d_weights_c4_fyx_b }
-        );
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, options, DataLayout::bfyx,
+                { WeightsLayout::image_2d_weights_c4_fyx_b }, DONT_USE_IF_HAVE_SOMETHING_ELSE, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+        return res;
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.h
index 95adf3af5..12b35d037 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_image_tutorial.h
@@ -17,9 +17,9 @@
 #pragma once
 
 #include "fully_connected_kernel_base.h"
- 
+
 namespace kernel_selector {
-    
+
     class FullyConnected_image_tutorial : public FullyConnectedKernelBase
     {
     public:
@@ -28,9 +28,9 @@ namespace kernel_selector {
         FullyConnected_image_tutorial() : Parent("fully_connected_gpu_image_tutorial") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
-        
+
     protected:
-        std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& params) const override;
+        ParamsKey GetSupportedKey() const override;
+        DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp
index 46e4dea8d..ad5739797 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_mmad_batched.h"
-#include "kernel_selector_utils.h"
  
 namespace kernel_selector 
 {
@@ -88,7 +87,7 @@ namespace kernel_selector
         return jit;
     }
 
-    std::unique_ptr<FullyConnected_mmad_batched::Parent::DispatchData> FullyConnected_mmad_batched::SetDefault(const fully_connected_params& params) const
+    FullyConnected_mmad_batched::DispatchData FullyConnected_mmad_batched::SetDefault(const fully_connected_params& params, int) const
     {
         auto runInfo = Parent::SetDefault(params);
         
@@ -97,21 +96,30 @@ namespace kernel_selector
         const auto of_maps = params.output.Feature().v;
         const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
 
-        runInfo->gws0 = params.output.Batch().v / 8; // we process 8 batches in a single WG
-        runInfo->gws1 = of_threads_per_batch;
-        runInfo->gws2 = 1;
+        runInfo.gws0 = params.output.Batch().v / 8; // we process 8 batches in a single WG
+        runInfo.gws1 = of_threads_per_batch;
+        runInfo.gws2 = 1;
 
-        runInfo->lws0 = 1;
-        runInfo->lws1 = sub_group_size;
-        runInfo->lws2 = 1;
+        runInfo.lws0 = 1;
+        runInfo.lws1 = sub_group_size;
+        runInfo.lws2 = 1;
 
-        runInfo->effiency = FORCE_PRIORITY_1;
-        return std::move(runInfo);
+        runInfo.effiency = FORCE_PRIORITY_1;
+        return runInfo;
     }
 
     KernelsData FullyConnected_mmad_batched::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options, DataLayout::fs_bs_yx_bsv4_fsv32,
-        { WeightsLayout::os_is_yx_isa8_osv8_isv4 }, FORCE_PRIORITY_1);
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, options, DataLayout::fs_bs_yx_bsv4_fsv32,
+                { WeightsLayout::os_is_yx_isa8_osv8_isv4 }, FORCE_PRIORITY_1, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+        return res;
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h
index 61af89f19..b08fe3217 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_mmad_batched.h
@@ -28,11 +28,11 @@ namespace kernel_selector {
         FullyConnected_mmad_batched() : Parent("fully_connected_gpu_mmad_batched") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
-        
+
     protected:
+        ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
         JitConstants GetJitConstants(const fully_connected_params& params, const DispatchData& kd) const override;
-        std::unique_ptr<DispatchData> SetDefault(const fully_connected_params& params) const override;
+        DispatchData SetDefault(const fully_connected_params& params, int autoTuneIndex = -1) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
index 529e1ca33..80a345e45 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_selector.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@
 #include "fully_connected_kernel_image_tutorial.h"
 #include "fully_connected_kernel_MMAD.h"
 #include "fully_connected_kernel_mmad_batched.h"
+#include "fully_connected_kernel_imad.h"
 
 namespace kernel_selector {
 
@@ -51,10 +52,11 @@ namespace kernel_selector {
         Attach<FullyConnected_bf_io_input_spatial>();
         Attach<FullyConnectedKernelMMAD>();
         Attach<FullyConnected_mmad_batched>();
+        Attach<FullyConnectedKernelIMAD>();
     }
 
     KernelsData fully_connected_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
     {
         return GetAutoTuneBestKernel(params, options, KernelType::FULLY_CONNECTED);
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp
index 5afb9ca41..9a5d2de77 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.cpp
@@ -15,7 +15,6 @@
 */
 
 #include "fully_connected_kernel_yxfb_ref.h"
-#include "kernel_selector_utils.h"
 
 namespace kernel_selector 
 {
@@ -40,8 +39,16 @@ namespace kernel_selector
 
     KernelsData FullyConnected_yxfb_ref::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        return GetCommonKernelsData(params, options, DataLayout::yxfb,
-            { WeightsLayout::io, WeightsLayout::oi, WeightsLayout::oiyx, WeightsLayout::oyxi, WeightsLayout::iyxo, WeightsLayout::yxio }
-        );
+        KernelsData res = {};
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, options, DataLayout::yxfb,
+                { WeightsLayout::io, WeightsLayout::oi, WeightsLayout::oiyx, WeightsLayout::oyxi, WeightsLayout::iyxo, WeightsLayout::yxio }, DONT_USE_IF_HAVE_SOMETHING_ELSE, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+        return res;
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h
index c76e50bf2..1dcc5d007 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected/fully_connected_kernel_yxfb_ref.h
@@ -26,6 +26,8 @@ namespace kernel_selector {
         FullyConnected_yxfb_ref() : FullyConnectedKernelBase("fully_connected_gpu_yxfb_ref") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
index e40848af7..39d4817f8 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_base.cpp
@@ -80,7 +80,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, DEFAULT, true, !orgParams.bias.empty());
         kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
 
         kd.estimatedTime = runInfo.effiency;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_ref.h
index 7d1068b83..c70293c4f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_input/fully_connected_grad_input_kernel_ref.h
@@ -26,6 +26,8 @@ namespace kernel_selector {
         FullyConnectedGradInputKernelRef() : FullyConnectedGradInputKernelBase("fully_connected_grad_input_gpu_ref") {}
         virtual ~FullyConnectedGradInputKernelRef() {}
 
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
index 67328ac99..3af05f93e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_base.cpp
@@ -82,7 +82,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty());
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, DEFAULT, true, !orgParams.bias.empty());
         if (orgParams.use_momentum)
         {
             kernel.arguments.push_back({ ArgumentDescriptor::Types::PREV_WEIGHTS_GRADIENT, 0 });
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.h
index 929128753..78bba9c39 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fully_connected_grad_weights/fully_connected_grad_weights_kernel_ref.h
@@ -26,6 +26,7 @@ namespace kernel_selector {
         FullyConnectedGradWeightsKernelRef() : FullyConnectedGradWeightsKernelBase("fully_connected_grad_weights_gpu_ref") {}
         virtual ~FullyConnectedGradWeightsKernelRef() {}
 
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_base.cpp
new file mode 100644
index 000000000..e74eb7fb8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_base.cpp
@@ -0,0 +1,176 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_bn_scale_kernel_base.h"
+#include "kernel_selector_utils.h"
+#include "common_tools.h"
+
+namespace kernel_selector 
+{
+    bool fused_conv_bn_scale_kernel_base::Validate(const Params& p, const optional_params& o) const
+    {
+        if (p.GetType() != KernelType::FUSED_CONV_BN_SCALE ||
+            o.GetType() != KernelType::FUSED_CONV_BN_SCALE)
+        {
+            return false;
+        }
+
+        const fused_conv_bn_scale_params& params = static_cast<const fused_conv_bn_scale_params&>(p);
+        const fused_conv_bn_scale_optional_params& optParams = static_cast<const fused_conv_bn_scale_optional_params&>(o);
+
+        bool bSupportedWeightsLayout = false;
+
+        for (WeightsLayout l : GetSupportedWeightLayouts(params))
+        {
+            bSupportedWeightsLayout |= params.weights.GetLayout() == l;
+        }
+
+        const bool bWeightsOK = bSupportedWeightsLayout || optParams.allowStaticInputReordering;
+
+        return bWeightsOK;
+    }
+
+    JitConstants fused_conv_bn_scale_kernel_base::GetJitConstants(const fused_conv_bn_scale_params& params, const DispatchData&) const
+    {
+        JitConstants mem_consts = WeightBiasKernelBase::GetJitConstants(params);
+        const auto& padding = params.padding;
+        const auto& input = params.inputs[0];
+
+        int64_t input_offset_with_padding = (int64_t)input.GetFirstElementOffset() - padding.x*input.X().pitch - input.Y().pitch*padding.y;
+        input_offset_with_padding = std::max(input_offset_with_padding, (int64_t)0);
+
+        mem_consts.AddConstants({
+            MakeJitConstant("STRIDE",                       params.stride),
+            MakeJitConstant("PADDING",                      params.padding),
+            MakeJitConstant("FILTER_ARRAY_NUM",             params.split),
+            MakeJitConstant("DILATION",                     params.dilation),
+            MakeJitConstant("INPUT0_OFFSET_WITH_PADDING",   input_offset_with_padding),
+            MakeJitConstant("EPSILON", params.epsilon)
+        });
+
+        if (params.fused_in_training)
+            mem_consts.AddConstant(MakeJitConstant("FUSED_TRAINING", 1));
+        if (params.scale_bias)
+            mem_consts.AddConstant(MakeJitConstant("SCALE_BIAS_TERM", 1));
+
+        return mem_consts;
+    }
+
+    bool fused_conv_bn_scale_kernel_base::CheckWorkGroups(const DispatchData& kd)
+    {
+        if (kd.gws0 == 0 ||
+            kd.gws1 == 0 ||
+            kd.gws2 == 0 ||
+            kd.lws0 == 0 ||
+            kd.lws1 == 0 ||
+            kd.lws2 == 0)
+        {
+            return false;
+        }
+
+        if ((kd.gws0 % kd.lws0) != 0 ||
+            (kd.gws1 % kd.lws1) != 0 ||
+            (kd.gws2 % kd.lws2) != 0)
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    fused_conv_bn_scale_kernel_base::DispatchData fused_conv_bn_scale_kernel_base::SetDefault(const fused_conv_bn_scale_params& params) const
+    {
+        DispatchData kd;
+
+        const auto& out = params.output;
+        kd.fp16UnitUsed = out.GetDType() == Datatype::F16;
+        std::vector<size_t> global;
+        if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf)
+        {
+            global = { out.X().v, out.Y().v, out.Feature().v*out.Batch().v };
+        }
+        else
+        {
+            global = { out.Feature().v*out.Batch().v, out.X().v, out.Y().v };
+        }
+
+        auto local = GetOptimalLocalWorkGroupSizes(global);
+
+        kd.gws0 = global[0];
+        kd.gws1 = global[1];
+        kd.gws2 = global[2];
+
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        kd.effiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+        return kd;
+    }
+
+    KernelsData fused_conv_bn_scale_kernel_base::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+    {
+        if (!Validate(params, options))
+        {
+            return{};
+        }
+
+        KernelData kd = KernelData::Default<fused_conv_bn_scale_params>(params);
+        fused_conv_bn_scale_params& newParams = *static_cast<fused_conv_bn_scale_params*>(kd.params.get());
+
+        DispatchData runInfo = SetDefault(newParams);
+        
+        if (!CheckWorkGroups(runInfo))
+        {
+            // Internal Error - wrong calculation of global/local work group sizes
+            return{};
+        }
+
+        bool succeed = UpdateWeightsParams(
+            newParams,
+            options,
+            GetSupportedWeightLayouts(newParams),
+            kd.weightsReorderParams);
+
+        if (!succeed)
+        {
+            return{};
+        }
+
+        auto finalKernelName = GetKernelName(newParams);
+        auto cldnnJit = GetJitConstants(newParams, runInfo);
+        auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
+        auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
+
+        auto& kernel = kd.kernels[0];
+        FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entryPoint, "", true, !newParams.bias.empty(), 1);
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 });
+        uint32_t idx = 1;
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
+        if (newParams.scale_bias)
+            kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
+        if (newParams.fused_in_training)
+        {
+            kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
+            kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx++ });
+            kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, idx });
+        }
+
+        kd.estimatedTime = estimated_time;
+
+        return{ kd };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_base.h
new file mode 100644
index 000000000..cdd887878
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_base.h
@@ -0,0 +1,81 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "weight_bias_kernel_base.h"
+#include "actual_kernels/convolution/convolution_params.h"
+
+namespace kernel_selector 
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // fused_conv_bn_scale_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct fused_conv_bn_scale_params : public weight_bias_params
+    {
+        fused_conv_bn_scale_params() : weight_bias_params(KernelType::FUSED_CONV_BN_SCALE) {}
+
+        uSize    filterSize;
+        uSize    stride;
+        uSize    dilation;
+        uSize    padding;
+        uint32_t split = 1;
+        bool     fused_in_training = false;
+        bool     scale_bias = false;
+        float    epsilon = 0.00001f;
+
+        ParamsKey GetParamsKey() const override
+        {
+            ParamsKey k = weight_bias_params::GetParamsKey();
+
+            if (split > 1)
+            {
+                k.EnableSplitSupport();
+            }
+
+            return k;
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // fused_conv_bn_scale_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct fused_conv_bn_scale_optional_params : weight_bias_optional_params
+    {
+        fused_conv_bn_scale_optional_params() : weight_bias_optional_params(KernelType::FUSED_CONV_BN_SCALE) {}
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // fused_conv_bn_scale_kernel_base
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class fused_conv_bn_scale_kernel_base : public WeightBiasKernelBase
+    {
+    public:
+        using WeightBiasKernelBase::WeightBiasKernelBase;
+        virtual ~fused_conv_bn_scale_kernel_base() {}
+
+        using DispatchData = CommonDispatchData;
+    
+    protected:
+        virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const fused_conv_bn_scale_params&) const = 0;
+        virtual std::string GetKernelName(const fused_conv_bn_scale_params&) const { return kernelName; }
+        virtual bool Validate(const Params& p, const optional_params& o) const override;
+        virtual JitConstants GetJitConstants(const fused_conv_bn_scale_params& params, const DispatchData& kd) const;
+        virtual DispatchData SetDefault(const fused_conv_bn_scale_params& params) const;
+        static bool CheckWorkGroups(const DispatchData&);
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params& options,  float estimated_time) const;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_ref.cpp
new file mode 100644
index 000000000..e3317bfd3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_ref.cpp
@@ -0,0 +1,74 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_bn_scale_kernel_ref.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+    
+    ParamsKey fused_conv_bn_scale_kernel_ref::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputWeightsType(WeightsType::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableSplitSupport();
+        k.EnableBatching();
+        k.DisableTuning();
+        return k;
+    }
+
+    fused_conv_bn_scale_kernel_base::DispatchData fused_conv_bn_scale_kernel_ref::SetDefault(const fused_conv_bn_scale_params& arg) const
+    {
+        DispatchData runInfo = fused_conv_bn_scale_kernel_base::SetDefault(arg);
+
+        runInfo.effiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+        runInfo.gws0 = arg.output.Batch().v;
+        runInfo.gws1 = arg.output.Feature().v; 
+        runInfo.gws2 = 1;
+
+        runInfo.lws0 = std::min(std::max(runInfo.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
+        while (runInfo.gws0 % runInfo.lws0 != 0)
+        {
+            --runInfo.lws0;
+        }
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 1;
+
+        return runInfo;
+    }
+
+    JitConstants fused_conv_bn_scale_kernel_ref::GetJitConstants(const fused_conv_bn_scale_params& params, const DispatchData& runInfo) const
+    {
+        auto jit = Parent::GetJitConstants(params, runInfo);
+
+        return jit;
+    }
+
+    KernelsData fused_conv_bn_scale_kernel_ref::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelsData kd = GetCommonKernelsData(params, options, DONT_USE_IF_HAVE_SOMETHING_ELSE);
+ 
+        return kd;
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_ref.h
new file mode 100644
index 000000000..fc36068e5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_ref.h
@@ -0,0 +1,44 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "fused_conv_bn_scale_kernel_base.h"
+ 
+namespace kernel_selector {
+    
+    class fused_conv_bn_scale_kernel_ref : public fused_conv_bn_scale_kernel_base
+    {
+    public:
+        using Parent = fused_conv_bn_scale_kernel_base;
+
+        fused_conv_bn_scale_kernel_ref() : fused_conv_bn_scale_kernel_base("fused_conv_bn_scale_kernel_ref") {}
+        virtual ~fused_conv_bn_scale_kernel_ref() {}
+
+        KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        ParamsKey GetSupportedKey() const override;
+        std::vector<WeightsLayout> GetSupportedWeightLayouts(const fused_conv_bn_scale_params&) const override
+        {
+            return{
+                WeightsLayout::oiyx,
+            };
+        }
+        DispatchData SetDefault(const fused_conv_bn_scale_params& arg) const override;
+        JitConstants GetJitConstants(const fused_conv_bn_scale_params& params, const DispatchData& kd) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT2_B32_B64.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_selector.cpp
index 08b19536b..f51cdc139 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT2_B32_B64.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_selector.cpp
@@ -14,16 +14,18 @@
 // limitations under the License.
 */
 
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
+#include "fused_conv_bn_scale_kernel_selector.h"
+#include "fused_conv_bn_scale_kernel_ref.h"
+ 
 namespace kernel_selector 
 {
-    //SKL GT2
-    void tuning_cache_1912_B32_B64(tuning_data& td)
+    fused_conv_bn_scale_kernel_selector::fused_conv_bn_scale_kernel_selector()
     {
-        td.td.insert({
-            
-            { "9500850790449116723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 16) },
-            });
+        Attach<fused_conv_bn_scale_kernel_ref>();
+    }
+
+    KernelsData fused_conv_bn_scale_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::FUSED_CONV_BN_SCALE);
     }
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_selector.h
new file mode 100644
index 000000000..2b63db74e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_bn_scale/fused_conv_bn_scale_kernel_selector.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+ 
+namespace kernel_selector 
+{
+    class fused_conv_bn_scale_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static fused_conv_bn_scale_kernel_selector &Instance() {
+            static fused_conv_bn_scale_kernel_selector instance_;
+            return instance_;
+        }
+
+        fused_conv_bn_scale_kernel_selector();
+
+        virtual ~fused_conv_bn_scale_kernel_selector() {}
+
+        KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp
new file mode 100644
index 000000000..3ac4e9e4d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.cpp
@@ -0,0 +1,464 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_eltwise_kernel_base.h"
+#include "kernel_selector_utils.h"
+#include "common_tools.h"
+
+namespace kernel_selector 
+{
+    std::string fused_conv_eltwise_params::to_string() const
+    {
+        std::stringstream s;
+
+        s << base_params::to_string() << "_";
+        if (bias.empty())
+        {
+            s << "no_bias" << "_";
+        }
+        else
+        {
+            s << "bias_" << bias[0].PhysicalSize() << "_";
+        }
+
+        s << conv.filterSize.x << "_" << conv.filterSize.y << "_";
+        s << conv.stride.x << "_" << conv.stride.y << "_";
+        s << conv.dilation.x << "_" << conv.dilation.y << "_";
+        s << conv.padding.x << "_" << conv.padding.y << "_";
+        s << conv.split;
+
+        return s.str();
+    }
+
+    ParamsKey fused_conv_eltwise_params::GetParamsKey() const
+    {
+        ParamsKey k = weight_bias_params::GetParamsKey();
+
+        if (conv.split > 1)
+        {
+            k.EnableFusedConvEltwSplitSupport();
+        }
+
+        if (conv.dilation.x != 1 ||
+            conv.dilation.y != 1)
+        {
+            k.EnableFusedConvEltwDilation();
+        }
+
+        if (conv.depthwise_separable_opt)
+        {
+            k.EnableFusedConvEltwDepthwiseSeparableOpt();
+        }
+
+        if (conv.transposed)
+        {
+            k.EnableFusedConvEltwTranspose();
+        }
+
+        if (conv.int8_quantization)
+        {
+            k.EnableFusedConvEltwInt8Quantization();
+        }
+
+        if (conv.output_calibration)
+        {
+            k.EnableFusedConvEltwOutputCalibration();
+        }
+
+        if (conv.local_convolution)
+        {
+            k.EnableFusedConvEltwLocalConvolution();
+        }
+
+        if (second_input_in_output)
+        {
+            k.EnableFusedConvEltwiseRWOutOpt();
+        }
+
+        return k;
+    }
+
+    bool fused_conv_eltwise_kernel_base::Validate(const Params& p, const optional_params& o) const
+    {
+        if (p.GetType() != KernelType::FUSED_CONV_ELTWISE ||
+            o.GetType() != KernelType::FUSED_CONV_ELTWISE)
+        {
+            return false;
+        }
+
+        const fused_conv_eltwise_params& params = static_cast<const fused_conv_eltwise_params&>(p);
+        const fused_conv_eltwise_optional_params& optParams = static_cast<const fused_conv_eltwise_optional_params&>(o);
+
+        bool bSupportedWeightsLayout = false;
+
+        for (WeightsLayout l : GetSupportedWeightLayouts(params))
+        {
+            bSupportedWeightsLayout |= params.weights.GetLayout() == l;
+        }
+
+        const bool bWeightsOK = bSupportedWeightsLayout || optParams.allowStaticInputReordering;
+
+        if (!bWeightsOK)
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    JitConstants fused_conv_eltwise_kernel_base::GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const
+    {
+        JitConstants mem_consts = WeightBiasKernelBase::GetJitConstants(params);
+        const auto& padding = params.conv.padding;
+        const auto& input = params.inputs[0];
+
+        int64_t input_offset_with_padding = (int64_t)input.GetFirstElementOffset() - padding.x*input.X().pitch - input.Y().pitch*padding.y;
+        input_offset_with_padding = std::max(input_offset_with_padding, (int64_t)0);
+
+        mem_consts.AddConstants({
+            MakeJitConstant("STRIDE",                       params.conv.stride),
+            MakeJitConstant("PADDING",                      params.conv.padding),
+            MakeJitConstant("DILATION",                     params.conv.dilation),
+            MakeJitConstant("FILTER_ARRAY_NUM",             params.conv.split),
+            MakeJitConstant("INPUT0_OFFSET_WITH_PADDING",   input_offset_with_padding),
+            MakeJitConstant("DEPTHWISE_SEPARABLE_OPT",      params.conv.depthwise_separable_opt),
+            MakeJitConstant("QUANTIZATION_TERM",            params.conv.int8_quantization),
+        });
+
+        if (params.conv.int8_quantization)
+        {
+            mem_consts.AddConstants({ MakeJitConstant("W_QF", params.conv.weights_quantization_factors[0]) });
+            mem_consts.AddConstants({ MakeJitConstant("I_QF",params.conv.input_quantization_factor) });
+
+            if (params.conv.output_calibration)
+            {
+                mem_consts.AddConstant(MakeJitConstant("CALIBRATION_TERM", params.conv.output_calibration));
+                mem_consts.AddConstant(MakeJitConstant("O_QF", params.conv.output_calibration_factors[0]));
+
+            }
+            else
+                mem_consts.AddConstants({ MakeJitConstant("O_QF", params.conv.output_quantization_factor) });
+        }
+
+        if (params.conv.local_convolution)
+        {
+            mem_consts.AddConstants({ MakeJitConstant("LOCAL_CONVOLUTION", params.conv.local_convolution) });
+        }
+
+        JitConstants eltw_activations = MakeActivationJitConstants(params.eltw.activation, "_ELTW");
+        mem_consts.Merge(eltw_activations);
+
+        mem_consts.AddConstant(MakeJitConstant("IN_OUT_OPT", params.second_input_in_output ? 1 : 0));
+
+        std::vector<uint32_t> unrollLoopParams{
+            params.conv.filterSize.x,
+            params.conv.filterSize.y,
+            (uint32_t)kd.gemmStyle.globalWorkSizeDX,
+            (uint32_t)kd.gemmStyle.globalWorkSizeDY,
+            (uint32_t)kd.gemmStyle.globalWorkSizeDZ,
+            (uint32_t)kd.gemmStyle.subBlockDimM,
+            (uint32_t)kd.gemmStyle.subBlockDimK,
+            (uint32_t)kd.gemmStyle.subBlockDimN
+        };
+
+        auto loopCount = *std::max_element(unrollLoopParams.begin(), unrollLoopParams.end());
+
+        JitConstants mem_consts_loop = MakeLoopUnrollParamsJitConstants(loopCount);
+        mem_consts.Merge(mem_consts_loop);
+
+        return mem_consts;
+    }
+
+    bool fused_conv_eltwise_kernel_base::CheckWorkGroups(const fused_conv_eltwise_kernel_base::DispatchData& kd)
+    {
+        if (kd.gws0 == 0 ||
+            kd.gws1 == 0 ||
+            kd.gws2 == 0 ||
+            kd.lws0 == 0 ||
+            kd.lws1 == 0 ||
+            kd.lws2 == 0)
+        {
+            return false;
+        }
+
+        if ((kd.gws0 % kd.lws0) != 0 ||
+            (kd.gws1 % kd.lws1) != 0 ||
+            (kd.gws2 % kd.lws2) != 0)
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    namespace
+    {
+        bool CheckTensorForSplit(const DataTensor& t, uint32_t split)
+        {
+            if (t.PitchesDifferFromLogicalDims())
+            {
+                auto feature = t.Feature();
+                auto featureIndex = DataTensor::Channelndex(t.GetLayout(), Tensor::DataChannelName::FEATURE);
+                if (featureIndex >= 0 && featureIndex+1 < (int)DataTensor::ChannelsCount(t.GetLayout()))
+                {
+                    if (feature.v*split <= t.GetDims()[featureIndex+1].pitch)
+                    {
+                        Tensor::NDims newDims = t.GetDims();
+                        newDims[featureIndex].v = feature.v*split;
+                        
+                        DataTensor newTensor{ newDims, t.GetDType(), t.GetLayout(), t.GetViewOffset(), t.PhysicalSize(), t.GetPaddedVal()};
+
+                        if (newTensor.PitchesDifferFromLogicalDims() == false)
+                        {
+                            return true;
+                        }
+                    }
+                }
+
+                return false;
+            }
+
+            return true;
+        }
+    }
+
+    bool fused_conv_eltwise_kernel_base::CheckPitchForSplitOnly(const fused_conv_eltwise_params& params)
+    {
+        // TODO: it's better to add pitch+offset support than handle this case
+        return CheckTensorForSplit(params.inputs[0], params.conv.split);
+    }
+
+    fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_base::SetDefault(const fused_conv_eltwise_params& params, int) const
+    {
+        DispatchData kd;
+
+        const auto& out = params.output;
+        kd.fp16UnitUsed = out.GetDType() == Datatype::F16;
+        std::vector<size_t> global;
+        if (params.output.GetLayout() == DataLayout::bfyx || params.output.GetLayout() == DataLayout::byxf)
+        {
+            global = { out.X().v, out.Y().v, out.Feature().v*out.Batch().v };
+        }
+        else
+        {
+            global = { out.Feature().v*out.Batch().v, out.X().v, out.Y().v };
+        }
+
+        auto local = GetOptimalLocalWorkGroupSizes(global);
+
+        kd.gws0 = global[0];
+        kd.gws1 = global[1];
+        kd.gws2 = global[2];
+
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        kd.cldnnStyle.blockWidth = 1;
+        kd.cldnnStyle.blockHeight = 1;
+        kd.cldnnStyle.prefetch = 0;
+        kd.cldnnStyle.inputBlockArraySize = 0;
+        kd.cldnnStyle.inputBlockWidth = 0;
+
+        kd.gemmStyle.globalWorkSizeDX = 1;
+        kd.gemmStyle.globalWorkSizeDY = 1;
+        kd.gemmStyle.globalWorkSizeDZ = 1;
+        kd.gemmStyle.subBlockDimK = 1;
+        kd.gemmStyle.subBlockDimM = 0;
+        kd.gemmStyle.subBlockDimN = 0;
+        kd.effiency = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+        return kd;
+    }
+
+    KernelsData fused_conv_eltwise_kernel_base::GetCommonKernelsData(const Params& params, const optional_params& options, const std::string exeMode, int autoTuneIndex) const
+    {
+        if (!Validate(params, options))
+        {
+            return{};
+        }
+
+        KernelData kd = KernelData::Default<fused_conv_eltwise_params>(params);
+        fused_conv_eltwise_params& newParams = *static_cast<fused_conv_eltwise_params*>(kd.params.get());
+
+        if (NeedPaddedInput())
+        {
+            kd.reorderInput = CovolutionUpdateInputParams(newParams);
+        }
+        DispatchData runInfo = SetDefault(newParams, autoTuneIndex);
+
+        if (!CheckWorkGroups(runInfo))
+        {
+            // Internal Error - wrong calculation of global/local work group sizes
+            return{};
+        }
+
+        bool succeed = UpdateWeightsParams(
+            newParams,
+            options,
+            GetSupportedWeightLayouts(newParams),
+            kd.weightsReorderParams);
+
+        if (!succeed)
+        {
+            return{};
+        }
+
+        auto finalKernelName = GetKernelName(newParams);
+        auto cldnnJit = GetJitConstants(newParams, runInfo);
+        auto entryPoint = GetEntryPoint(finalKernelName, newParams.layerID, options);
+        auto jit = CreateJit(finalKernelName, cldnnJit, entryPoint);
+
+        auto& kernel = kd.kernels[0];
+        FillCLKernelData(kernel, runInfo, params.engineInfo, finalKernelName, jit, entryPoint, exeMode, true, !newParams.bias.empty(), 1, newParams.conv.int8_quantization, newParams.conv.output_calibration);
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::SPLIT, 0 });
+        // eltwise's second input
+        if(newParams.second_input_in_output)
+        {
+            kernel.arguments.push_back({ ArgumentDescriptor::Types::OUTPUT, 0 });
+        }
+        else
+        {
+            kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
+        }
+        if (!newParams.eltw.output_calibration_factors.empty())
+            kernel.arguments.push_back({ArgumentDescriptor::Types::OUTPUT_CALIBRATION_FACTORS, 1});
+
+        kd.estimatedTime = runInfo.effiency;
+        kd.autoTuneIndex = autoTuneIndex;
+
+        return{ kd };
+    }
+
+    std::string fused_conv_eltwise_kernel_base::GetAutoTuneOptions(int autoTuneIndex) const
+    {
+        if ((autoTuneIndex >= 0) && (autoTuneIndex < (int)autoTuneOptions.size()))
+        {
+            return autoTuneOptions[autoTuneIndex];
+        }
+
+        return DEFAULT;
+    }
+
+    KernelsData fused_conv_eltwise_kernel_base::GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, const int autoTuneIndex) const
+    {
+        return GetCommonKernelsData(params, options, GetAutoTuneOptions(autoTuneIndex), autoTuneIndex);
+    }
+
+    KernelsData fused_conv_eltwise_kernel_base::GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const
+    {
+        if (!Validate(params, options))
+        {
+            return{};
+        }
+
+        KernelsData res = {};
+
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, options, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+        
+        return res;
+    }
+
+    static DataTensor GetConvolutionBFYXPaddedTensor(const fused_conv_eltwise_params& cp)
+    {
+        assert(cp.inputs[0].GetDims().size() == 4U);
+
+        DataTensor t = cp.inputs[0];
+        std::vector<Tensor::Pad> pad{ { 0,0 },{ 0,0 },{ 0,0 },{ 0,0 } };
+
+        auto& conv = cp.conv;
+
+        pad[0].before = conv.padding.x;
+        pad[1].before = conv.padding.y;
+
+
+        const auto inputLimitX = (cp.output.X().v - 1) * conv.stride.x + (conv.filterSize.x - 1) * conv.dilation.x + 1;
+        const auto inputLimitY = (cp.output.Y().v - 1) * conv.stride.y + (conv.filterSize.y - 1) * conv.dilation.y + 1;
+
+        pad[0].after = (size_t)std::max((int)inputLimitX - (int)t.X().v - (int)pad[0].before, (int)0);
+        pad[1].after = (size_t)std::max((int)inputLimitY - (int)t.Y().v - (int)pad[1].before, (int)0);
+
+        Tensor::NDims dims(4);
+        const Tensor::NDims& orgDims = cp.inputs[0].GetDims();
+        size_t pitch = 1;
+        for (size_t i = 0; i < dims.size(); i++)
+        {
+            dims[i].pad = pad[i];
+            dims[i].v = orgDims[i].v;
+            dims[i].pitch = pitch;
+            pitch *= dims[i].LogicalDimPadded();
+        }
+
+        return{ dims, t.GetDType(), t.GetLayout() };
+    }
+
+    bool CheckConvolutionPaddedInputDesc(const fused_conv_eltwise_params& params, const DataTensor& reqDesc)
+    {
+        bool properPadding =
+            reqDesc.X().pad.before <= params.inputs[0].X().pad.before &&
+            reqDesc.Y().pad.before <= params.inputs[0].Y().pad.before &&
+            reqDesc.Feature().pad.before <= params.inputs[0].Feature().pad.before &&
+            reqDesc.Batch().pad.before <= params.inputs[0].Batch().pad.before;
+
+        properPadding &=
+            reqDesc.X().pad.after <= params.inputs[0].X().pad.after &&
+            reqDesc.Y().pad.after <= params.inputs[0].Y().pad.after &&
+            reqDesc.Feature().pad.after <= params.inputs[0].Feature().pad.after &&
+            reqDesc.Batch().pad.after <= params.inputs[0].Batch().pad.after;
+
+        properPadding &= ((params.conv.padding.x == 0 && params.conv.padding.y == 0) || params.inputs[0].GetPaddedVal() == 0.f);
+
+        return properPadding;
+    }
+
+    bool CovolutionUpdateInputParams(fused_conv_eltwise_params& params)
+    {
+        const auto req_input = GetConvolutionBFYXPaddedTensor(params);
+        const bool bProperInputDesc = CheckConvolutionPaddedInputDesc(params, req_input);
+
+        if (!bProperInputDesc)
+        {
+            params.inputs[0] = req_input;
+            return true;
+        }
+
+        return false;
+    }
+
+    bool FusedConvolutionEltwiseCheckInput(const Params& p, const optional_params& o)
+    {
+        const fused_conv_eltwise_params& params = static_cast<const fused_conv_eltwise_params&>(p);
+        const fused_conv_eltwise_optional_params& optParams = static_cast<const fused_conv_eltwise_optional_params&>(o);
+
+        const auto req_input = GetConvolutionBFYXPaddedTensor(params);
+        const bool bProperInputDesc = CheckConvolutionPaddedInputDesc(params, req_input);
+        const bool bInputPadded = optParams.allowInputReordering || bProperInputDesc;
+
+        if (!bInputPadded)
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h
new file mode 100644
index 000000000..1bdebab02
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_base.h
@@ -0,0 +1,138 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "weight_bias_kernel_base.h"
+#include "actual_kernels/convolution/convolution_params.h"
+#include "actual_kernels/eltwise/eltwise_kernel_base.h"
+
+namespace kernel_selector 
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // convolution_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct fused_conv_eltwise_params : public weight_bias_params
+    {
+        fused_conv_eltwise_params() : weight_bias_params(KernelType::FUSED_CONV_ELTWISE) {}
+
+        struct conv_data
+        {
+            uSize    filterSize;
+            uSize    stride;
+            uSize    dilation;
+            uSize    padding;
+            uint32_t split = 1;
+            bool     depthwise_separable_opt = false;
+            bool     transposed = false;
+            bool     int8_quantization = false;
+            bool     output_calibration = false;
+            bool     local_convolution = false;
+            float    input_quantization_factor = 1.0f;
+            float    output_quantization_factor = 1.0f;
+            MultiDataTensor weights_quantization_factors;
+            MultiDataTensor output_calibration_factors;
+        } conv;
+
+        struct eltw_data
+        {
+            std::vector<eltwise_params::Node> operations;
+            std::vector<float> coefficients;
+            std::vector<eltwise_params::UpdateInputData> updateInputIds;
+            std::vector<uSize> stride;
+
+            bool  layoutBased = false;
+            bool  int8_quantization = false;
+            bool  output_calibration = false;
+            float output_quantization_factor = 1.0f;
+
+            MultiDataTensor output_calibration_factors;
+
+            base_activation_params activation;
+        } eltw;
+
+        bool second_input_in_output = false;
+
+        virtual std::string to_string() const override;
+        virtual ParamsKey GetParamsKey() const override;
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // convolution_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct fused_conv_eltwise_optional_params : weight_bias_optional_params
+    {
+        fused_conv_eltwise_optional_params() : weight_bias_optional_params(KernelType::FUSED_CONV_ELTWISE) {}
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // ConvolutionKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class fused_conv_eltwise_kernel_base : public WeightBiasKernelBase
+    {
+    public:
+        using WeightBiasKernelBase::WeightBiasKernelBase;
+        virtual ~fused_conv_eltwise_kernel_base() {}
+
+        struct DispatchData : public CommonDispatchData
+        {
+            struct CLDNNStyle
+            {
+                size_t blockWidth, blockHeight; // used for kernels processing blocks
+                size_t prefetch;
+                size_t inputBlockArraySize;     // Number of elements in array of UNIT_TYPE that must be specified in kernel to store/cache input block.
+                size_t inputBlockWidth;         // Number of elements in X dimension stored/cached in input block.
+            };
+
+            struct GEMMStyle
+            {
+                size_t subBlockDimM;
+                size_t subBlockDimK;
+                size_t subBlockDimN;
+                size_t globalWorkSizeDX;
+                size_t globalWorkSizeDY;
+                size_t globalWorkSizeDZ;
+            };
+
+            union
+            {
+                CLDNNStyle cldnnStyle;
+                GEMMStyle  gemmStyle;
+            };
+        };
+        
+        std::string GetAutoTuneOptions(int autoTuneIndex) const;
+        std::vector<std::string> autoTuneOptions = { DEFAULT, NO_PRERA_SCH, AGE_BASED };
+        virtual KernelsData GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const override;
+        virtual KernelsData GetTunedKernelsDataByIndex(const Params& params, const optional_params& options, int autoTuneIndex = -1) const override;
+    
+    protected:
+        virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const fused_conv_eltwise_params&) const = 0;
+        virtual std::string GetKernelName(const fused_conv_eltwise_params&) const { return kernelName; }
+        virtual bool NeedPaddedInput() const { return false; }
+        virtual bool Validate(const Params& p, const optional_params& o) const override;
+        virtual JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const;
+        virtual DispatchData SetDefault(const fused_conv_eltwise_params& params, int autoTuneIndex = -1) const;
+        static bool CheckWorkGroups(const DispatchData&);
+        static bool CheckPitchForSplitOnly(const fused_conv_eltwise_params& params);
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params& options, const std::string exeMode = DEFAULT, int autoTuneIndex = -1) const;
+    };
+
+    bool FusedConvolutionEltwiseCheckInput(const Params& p, const optional_params& o);
+    bool CheckConvolutionPaddedInputDesc(const fused_conv_eltwise_params& params, const DataTensor& reqDesc);
+    bool CovolutionUpdateInputParams(fused_conv_eltwise_params& params);
+
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp
new file mode 100644
index 000000000..8c68a9b01
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.cpp
@@ -0,0 +1,194 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_eltwise_kernel_bfyx_1x1_opt.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    ParamsKey fused_conv_eltwise_kernel_bfyx_1x1_opt::GetSupportedKey() const
+	{
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputWeightsType(WeightsType::F32);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableSubGroup();
+        //k.EnableSubGroupShort(); // we need it for FP16 only. we check it on the Validate phase
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableFusedConvEltwSplitSupport();
+        k.EnableFusedConvEltwiseRWOutOpt(); // data for second input are already in output
+        return k;
+	}
+
+    struct block_params
+    {
+        int32_t out_width;
+        int32_t out_height;
+        int32_t out_depth;
+    };
+
+    static block_params get_out_block_size(const fused_conv_eltwise_params& p)
+    {
+        auto out_depth = 8;
+
+        if (p.output.X().v == 7)
+        {
+            auto gws0 = p.output.X().v / 7;
+            auto gws1 = p.output.Y().v / 1;
+            auto gws2 = 2 * (p.output.Feature().v * p.output.Batch().v) / 8; // process 8 output channels per Workitem
+
+            auto compute_units = p.engineInfo.computeUnitsCount;
+            auto total_threads = (gws0 * gws1 * gws2) / 64;
+            if (total_threads < compute_units)
+            {
+                out_depth /= 2;
+                total_threads *= 2;
+            }
+            if (total_threads < compute_units)
+            {
+                out_depth /= 2;
+                total_threads *= 2;
+            }
+            return { 7,1,out_depth };
+        }
+        else if (p.output.X().v == 14)
+            return { 7,1,8 };
+        else if (p.output.X().v == 28)
+            return { 7,2,4 };
+        else if (p.output.X().v == 56)
+            return { 8,1,8 };
+
+        return { 1,1,1 };
+    }
+
+    std::string fused_conv_eltwise_kernel_bfyx_1x1_opt::GetKernelName(const fused_conv_eltwise_params& params) const
+    {
+        if (params.inputs[0].GetDType() == Datatype::F32)
+        {
+            return kernelName + "_fp32";
+        }
+        else
+        {
+            return kernelName + "_fp16";
+        }
+    }
+
+	bool fused_conv_eltwise_kernel_bfyx_1x1_opt::Validate(const Params& p, const optional_params& o) const
+	{
+		if (!fused_conv_eltwise_kernel_base::Validate(p, o) ||
+			!FusedConvolutionEltwiseCheckInput(p, o))
+		{
+			return false;
+		}
+
+		const fused_conv_eltwise_params& cp = static_cast<const fused_conv_eltwise_params&>(p);
+		
+        if (cp.conv.stride.x != 1 || cp.conv.stride.y != 1)
+            return false;
+
+        if (cp.conv.filterSize.x != 1 || cp.conv.filterSize.y != 1)
+            return false;
+
+        if (cp.output.Feature().v % 64 != 0)
+            return false;
+
+        if (cp.conv.padding.x != 0 || cp.conv.padding.y != 0)
+            return false;
+
+        // if block sizes are 1x1, then this algorithm is probably not the best
+        auto block = get_out_block_size(cp);
+        if (block.out_width == 1 && block.out_height == 1)
+            return false;
+
+        if (cp.output.X().v % block.out_width != 0)
+            return false;
+        if (cp.output.Y().v % block.out_height != 0)
+            return false;
+
+		return true;
+	}
+
+    std::vector<WeightsLayout> fused_conv_eltwise_kernel_bfyx_1x1_opt::GetSupportedWeightLayouts(const fused_conv_eltwise_params& p) const
+    {
+        auto block = get_out_block_size(p);
+        if (block.out_depth == 8)
+            return { WeightsLayout::os_iyx_osv64 };
+        if (block.out_depth == 4)
+            return { WeightsLayout::os_iyx_osv32 };
+        if (block.out_depth == 2)
+            return { WeightsLayout::os_iyx_osv16 };
+        else
+            return{ WeightsLayout::yxio };
+    }
+
+    fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_bfyx_1x1_opt::SetDefault(const fused_conv_eltwise_params& arg, int) const
+	{
+        DispatchData runInfo = Parent::SetDefault(arg);
+
+        constexpr size_t sub_group_size = 8;
+
+        runInfo.effiency = FORCE_PRIORITY_3;
+
+        auto block = get_out_block_size(arg);
+
+        runInfo.gws0 = arg.output.X().v / block.out_width;
+        runInfo.gws1 = arg.output.Y().v / block.out_height;
+        runInfo.gws2 = 2 * (arg.output.Feature().v * arg.output.Batch().v) / block.out_depth; // process 8 output channels per Workitem
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 2 * sub_group_size;
+
+        return runInfo;
+	}
+
+	JitConstants fused_conv_eltwise_kernel_bfyx_1x1_opt::GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& runInfo) const
+	{
+		auto jit = Parent::GetJitConstants(params, runInfo);
+
+        auto block = get_out_block_size(params);
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_WIDTH", block.out_width));
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_HEIGHT", block.out_height));
+        jit.AddConstant(MakeJitConstant("OUT_BLOCK_DEPTH", block.out_depth));
+
+        if (!params.eltw.stride.empty())
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", params.eltw.stride[0].x));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", params.eltw.stride[0].y));
+        }
+        else
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", 1));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", 1));
+        }
+
+        return jit;
+	}
+
+    KernelsData fused_conv_eltwise_kernel_bfyx_1x1_opt::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelsData kd = GetCommonKernelsData(params, options);
+        if (!kd.empty())
+            kd[0].estimatedTime = FORCE_PRIORITY_1;
+        return kd;
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h
new file mode 100644
index 000000000..688c8ede1
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_1x1_opt.h
@@ -0,0 +1,42 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "fused_conv_eltwise_kernel_base.h"
+
+namespace kernel_selector {
+
+	class fused_conv_eltwise_kernel_bfyx_1x1_opt : public fused_conv_eltwise_kernel_base
+	{
+	public:
+		using Parent = fused_conv_eltwise_kernel_base;
+        fused_conv_eltwise_kernel_bfyx_1x1_opt() : fused_conv_eltwise_kernel_base("fused_conv_eltwise_gpu_bfyx_1x1_opt") {}
+		
+		virtual ~fused_conv_eltwise_kernel_bfyx_1x1_opt() {}
+
+
+	protected:
+		virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+		virtual ParamsKey GetSupportedKey() const override;
+        std::vector<WeightsLayout> GetSupportedWeightLayouts(const fused_conv_eltwise_params&) const override;
+        std::string GetKernelName(const fused_conv_eltwise_params& params) const override;
+        bool NeedPaddedInput() const override { return true; }
+        JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
+	};
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp
new file mode 100644
index 000000000..99a8c12db
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.cpp
@@ -0,0 +1,303 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h"
+
+namespace kernel_selector 
+{
+    // Sub-group size used by "kernel_name_bfyx_os_iyx_osv16" kernel.
+    constexpr size_t sub_group_size = 16;
+
+    fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::fused_conv_eltwise_kernel_bfyx_os_iyx_osv16() : fused_conv_eltwise_kernel_base("fused_conv_eltwise_gpu_bfyx_os_iyx_osv16")
+    {
+        // Generate the dispatch options to the auto-tuner.
+        std::vector<size_t> blockWidthSizes = { 1,2,4,5,6,8,10,12,14,16 };
+        std::vector<size_t> blockHeightSizes = { 1,2,3,4,5 };
+        std::vector<size_t> prefetchSizes = { 1,2,3,4,5,6,8,10 };
+        std::vector<std::string> executionModes = fused_conv_eltwise_kernel_base::autoTuneOptions;
+        const size_t maxBlockSize = 60;
+
+        for (auto executionMode : executionModes)
+        {
+            for (auto blockWidth : blockWidthSizes)
+            {
+                for (auto blockHeight : blockHeightSizes)
+                {
+                    for (auto prefetch : prefetchSizes)
+                    {
+                            if (blockWidth * blockHeight <= maxBlockSize)
+                            {
+                                autoTuneOptions.emplace_back(AutoTuneOption{ blockWidth, blockHeight, prefetch, executionMode });
+                            }
+                    }
+                }
+            }
+        }
+    }
+
+    ParamsKey fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputWeightsType(WeightsType::F16);
+        k.EnableInputWeightsType(WeightsType::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableSubGroup();
+        k.EnableBiasPerFeature();
+        k.EnableBiasPerOutput();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableFusedConvEltwSplitSupport();
+        k.EnableFusedConvEltwDilation();
+        k.EnableFusedConvEltwTranspose();
+        k.EnableFusedConvEltwiseRWOutOpt(); // data for second input are already in output
+        return k;
+    }
+
+    static std::pair<size_t, size_t> get_bfyx_req_input_block_dims(
+        size_t output_block_width,
+        size_t output_block_height,
+        const uSize& filter_size,
+        const uSize& stride,
+        const uSize& dilation,
+        size_t sg_size = 16,
+        size_t read_chunk_size = 8,
+        size_t min_read_size = 16)
+    {
+        assert(output_block_width > 0 && output_block_height > 0);
+        assert(stride.x > 0 && stride.y > 0);
+        assert(filter_size.x > 0 && filter_size.y > 0);
+
+        // Number of elements in X dimension needed from input to compute output block without re-reading input.
+        size_t input_block_req_width = (output_block_width - 1) * stride.x + (filter_size.x - 1)*dilation.x + 1;
+        // Number of elements in Y dimension needed from input to compute output block without re-reading input.
+        size_t input_block_req_height = (output_block_height - 1) * stride.y + (filter_size.y - 1)*dilation.y + 1;
+
+        // Required number of elements in X dimension rounded to nearest >= read chunk size.
+        size_t input_block_read_width = std::max(RoundUp(input_block_req_width, read_chunk_size), min_read_size);
+        // Number of sub-group-sized vectors of unit type needed to store input block.
+        size_t input_block_array_size = CeilDiv(input_block_req_height * input_block_read_width, sg_size);
+
+        return std::make_pair(input_block_array_size, input_block_read_width);
+    }
+
+    static void shrink_blocks_to_output_size(size_t output_x, size_t output_y, size_t &block_x, size_t &block_y)
+    {
+        // how many elements we will compute in each dimension
+        size_t computed_x = Align(output_x, block_x);
+        size_t computed_y = Align(output_y, block_y);
+        // how many simds we need in each dimension
+        size_t simds_x = computed_x / block_x;
+        size_t simds_y = computed_y / block_y;
+        // how many unused values we have in each dimension
+        size_t unused_x = computed_x - output_x;
+        size_t unused_y = computed_y - output_y;
+
+        block_x -= unused_x / simds_x;
+        block_y -= unused_y / simds_y;
+    }
+
+    fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::AutoTuneOption fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetAutoTuneOptions(const Params& p, int autoTuneIndex) const
+    {
+        if ((autoTuneIndex >= 0) && (autoTuneIndex < (int)autoTuneOptions.size()))
+        {
+            return autoTuneOptions[autoTuneIndex];
+        }
+
+        AutoTuneOption option = { 0, 0, 0, DEFAULT };
+
+        const convolution_params& cp = static_cast<const convolution_params&>(p);
+
+        if (cp.stride.x == 1 && cp.stride.y == 1)
+        {
+            if (cp.filterSize.x == 1 && cp.filterSize.y == 1)
+            {
+                option.blockWidth = 16;
+                option.blockHeight = 1;
+                option.prefetch = 4;
+            }
+            //if less than 16 values is required to compute one single row of output
+            //then each WI shall compute one single row to maximize reuse within SIMD subgroup (this gives very nice performance results)
+            else if (cp.output.X().v + (cp.filterSize.x - 1)*cp.dilation.x < sub_group_size)
+            {
+                option.blockWidth = cp.output.X().v;
+                option.blockHeight = 1;
+                option.prefetch = 4;
+            }
+            else if (cp.filterSize.x < 5 && cp.filterSize.y < 5)
+            {
+                option.blockWidth = sub_group_size - cp.filterSize.x + 1;
+                option.blockHeight = 2;
+                option.prefetch = 4;
+            }
+            else
+            {
+                option.blockWidth = 4;
+                option.blockHeight = 3;
+                option.prefetch = 4;
+            }
+        }
+        else if (cp.stride.x == 2 && cp.stride.y == 2)
+        {
+            option.blockWidth = 5;
+            option.blockHeight = 4;
+            option.prefetch = 4;
+        }
+        else
+        {
+            option.blockWidth = 4;
+            option.blockHeight = 3;
+            option.prefetch = 5;
+            //run_info.effiency = FORCE_PRIORITY_7; // GEMM is better
+        }
+
+        // if this is not 1x1 batch1 case then shrink filters, other way we're memory bound and it's best to use 16x1 block sizes
+        if (cp.filterSize.x != 1 || cp.filterSize.y != 1 || cp.output.Batch().v != 1)
+        {
+            shrink_blocks_to_output_size(cp.output.X().v, cp.output.Y().v,
+                option.blockWidth, option.blockHeight);
+        }
+
+        return option;
+    }
+
+    fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::SetDefault(const fused_conv_eltwise_params& cp, int autoTuneIndex) const
+    {
+        DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(cp);
+
+        const auto of_maps = cp.output.Feature().v;
+        const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
+
+        runInfo.effiency = FORCE_PRIORITY_3;
+
+        auto tuneOptions = GetAutoTuneOptions(cp, autoTuneIndex);
+        runInfo.cldnnStyle.blockWidth = tuneOptions.blockWidth;
+        runInfo.cldnnStyle.blockHeight = tuneOptions.blockHeight;
+        runInfo.cldnnStyle.prefetch = tuneOptions.prefetch;
+
+        auto input_block_dims = get_bfyx_req_input_block_dims(
+            runInfo.cldnnStyle.blockWidth,
+            runInfo.cldnnStyle.blockHeight,
+            cp.conv.filterSize,
+            cp.conv.stride,
+            cp.conv.dilation,
+            sub_group_size,
+            runInfo.fp16UnitUsed ? sub_group_size : sub_group_size / 2,
+            sub_group_size);
+        runInfo.cldnnStyle.inputBlockArraySize = input_block_dims.first;
+        runInfo.cldnnStyle.inputBlockWidth = input_block_dims.second;
+
+        runInfo.gws0 = CeilDiv(cp.output.X().v, runInfo.cldnnStyle.blockWidth);
+        runInfo.gws1 = CeilDiv(cp.output.Y().v, runInfo.cldnnStyle.blockHeight);
+        runInfo.gws2 = of_threads_per_batch * cp.output.Batch().v;
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = sub_group_size;
+
+        return runInfo;
+    }
+
+    bool fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!fused_conv_eltwise_kernel_base::Validate(p, o) ||
+            !FusedConvolutionEltwiseCheckInput(p, o))
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+    JitConstants fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& runInfo) const
+    {
+        const auto of_maps = params.output.Feature().v;
+        const size_t of_threads_per_batch = RoundUp(of_maps, sub_group_size);
+        size_t leftovers = of_threads_per_batch - of_maps;
+
+        auto jit = Parent::GetJitConstants(params, runInfo);
+
+        jit.AddConstant(MakeJitConstant("SUB_GROUP_SIZE", runInfo.lws2));
+        jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_WIDTH", runInfo.cldnnStyle.blockWidth));
+        jit.AddConstant(MakeJitConstant("OUTPUT_BLOCK_HEIGHT", runInfo.cldnnStyle.blockHeight));
+        jit.AddConstant(MakeJitConstant("IN_BLOCK_ARRAY_SIZE", runInfo.cldnnStyle.inputBlockArraySize));
+        jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", runInfo.cldnnStyle.inputBlockWidth));
+        jit.AddConstant(MakeJitConstant("PREFETCH", runInfo.cldnnStyle.prefetch));
+
+        if (leftovers)
+        {
+            jit.AddConstant(MakeJitConstant("LEFTOVERS", leftovers));
+        }
+
+        if (!params.eltw.stride.empty())
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", params.eltw.stride[0].x));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", params.eltw.stride[0].y));
+        }
+        else
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", 1));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", 1));
+        }
+
+        return jit;
+    }
+
+    std::vector<WeightsLayout> fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetSupportedWeightLayouts(const fused_conv_eltwise_params& params) const
+    {
+        if (!params.conv.transposed)
+        {
+            return{ WeightsLayout::os_iyx_osv16 };
+        }
+        else
+        {
+            return{ WeightsLayout::os_iyx_osv16_rotate_180 };
+        }
+    }
+
+    KernelsData fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetTunedKernelsDataByIndex(params, options);
+    }
+
+    KernelsData fused_conv_eltwise_kernel_bfyx_os_iyx_osv16::GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const
+    {
+        if (!Validate(params, options))
+        {
+            return{};
+        }
+
+        KernelsData res = {};
+
+        for (size_t i = 0; i < autoTuneOptions.size(); i++)
+        {
+            KernelsData kd = GetTunedKernelsDataByIndex(params, options, (int)i);
+            if (!kd.empty())
+            {
+                res.emplace_back(kd[0]);
+            }
+        }
+
+        return res;
+    }
+
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h
new file mode 100644
index 000000000..9ded5dd5b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h
@@ -0,0 +1,54 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "fused_conv_eltwise_kernel_base.h"
+
+namespace kernel_selector {
+
+    class fused_conv_eltwise_kernel_bfyx_os_iyx_osv16 : public fused_conv_eltwise_kernel_base
+    {
+    public:
+        using Parent = fused_conv_eltwise_kernel_base;
+        fused_conv_eltwise_kernel_bfyx_os_iyx_osv16();
+        virtual ~fused_conv_eltwise_kernel_bfyx_os_iyx_osv16() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual KernelsData GetKernelsDataForAutoTune(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        std::vector<WeightsLayout> GetSupportedWeightLayouts(const fused_conv_eltwise_params&)  const override;
+        JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        bool NeedPaddedInput() const override { return true; }
+        DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
+
+    private:
+        struct AutoTuneOption
+        {
+            size_t blockWidth;
+            size_t blockHeight;
+            size_t prefetch;
+            std::string exeMode;
+        };
+
+        AutoTuneOption GetAutoTuneOptions(const Params& arg, int autoTuneIndex) const;
+
+        std::vector<AutoTuneOption> autoTuneOptions = {};
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_gemm.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_gemm.cpp
new file mode 100644
index 000000000..fefe82bc8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_gemm.cpp
@@ -0,0 +1,164 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_eltwise_kernel_gemm.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    ParamsKey fused_conv_eltwise_kernel_gemm::GetSupportedKey() const
+	{
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableInputWeightsType(WeightsType::F16);
+        k.EnableInputWeightsType(WeightsType::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableSubGroup();
+        //k.EnableSubGroupShort(); // we need it for FP16 only. we check it on the Validate phase
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableFusedConvEltwSplitSupport();
+        return k;
+	}
+
+    std::string fused_conv_eltwise_kernel_gemm::GetKernelName(const fused_conv_eltwise_params& params) const
+    {
+        if (params.inputs[0].GetDType() == Datatype::F32)
+        {
+            return kernelName + "_fp32";
+        }
+        else
+        {
+            return kernelName + "_fp16";
+        }
+    }
+
+	bool fused_conv_eltwise_kernel_gemm::Validate(const Params& p, const optional_params& o) const
+	{
+		if (!fused_conv_eltwise_kernel_base::Validate(p, o) ||
+			!FusedConvolutionEltwiseCheckInput(p, o))
+		{
+			return false;
+		}
+
+		const convolution_params& cp = static_cast<const convolution_params&>(p);
+		
+        // make sure it's 1x1 conv
+        if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
+            return false;
+
+        // make sure stride is 1x1
+        if (cp.stride.x != 1 || cp.stride.y != 1)
+            return false;
+
+        // input padding not supported
+        if (cp.inputs[0].X().pad.Total() != 0 ||
+            cp.inputs[0].Y().pad.Total() != 0 ||
+            cp.inputs[0].Feature().pad.Total() != 0 ||
+            cp.inputs[0].Batch().pad.Total() != 0)
+            return false;
+
+        // input and output spatial sizes must match
+        if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
+            return false;
+
+		return true;
+	}
+
+    std::vector<WeightsLayout> fused_conv_eltwise_kernel_gemm::GetSupportedWeightLayouts(const fused_conv_eltwise_params& params) const
+    {
+        if (params.inputs[0].GetDType() == Datatype::F16)
+        {
+            return{ WeightsLayout::iy_xs_os_xsv2_osv16__ao32 };
+        }
+        else
+        {
+            return{ WeightsLayout::iy_xs_os_xsv2_osv8__ao32 };
+        }
+    }
+
+    fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_gemm::SetDefault(const fused_conv_eltwise_params& arg, int) const
+	{
+        DispatchData runInfo = Parent::SetDefault(arg);
+
+        runInfo.lws0 = 1;
+        runInfo.lws2 = 1;
+
+        if (arg.inputs[0].GetDType() == Datatype::F16)
+        {
+            runInfo.gemmStyle = { 1, arg.conv.filterSize.x, 32, 32, 1, 1 };
+            runInfo.lws1 = 16;
+            runInfo.effiency = FORCE_PRIORITY_6;
+        }
+        else
+        {
+            runInfo.gemmStyle = { 2, arg.conv.filterSize.x, 32, 32, 2, 1 };
+            runInfo.lws1 = 8;
+            runInfo.effiency = FORCE_PRIORITY_8;
+        }
+
+        size_t sgemm_m = RoundUp(arg.output.X().v * arg.output.Y().v, runInfo.gemmStyle.subBlockDimM);
+        size_t sgemm_n = RoundUp(arg.output.Feature().v, runInfo.gemmStyle.subBlockDimN);
+
+        runInfo.gws0 = RoundUp(CeilDiv(sgemm_n, runInfo.gemmStyle.globalWorkSizeDX), runInfo.lws0);
+        runInfo.gws1 = RoundUp(CeilDiv(sgemm_m, runInfo.gemmStyle.globalWorkSizeDY), runInfo.lws1);
+        runInfo.gws2 = arg.output.Batch().v;
+
+        return runInfo;
+	}
+
+	JitConstants fused_conv_eltwise_kernel_gemm::GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& runInfo) const
+	{
+		auto jit = Parent::GetJitConstants(params, runInfo);
+
+        jit.AddConstants({
+            MakeJitConstant("ALIGNED_OFM",                  RoundUp(params.output.Feature().v, runInfo.gemmStyle.subBlockDimN)),
+            MakeJitConstant("DX",                           runInfo.gemmStyle.globalWorkSizeDX),
+            MakeJitConstant("DY",                           runInfo.gemmStyle.globalWorkSizeDY),
+            MakeJitConstant("FILTER_SIZE_X_DIV2",           params.conv.filterSize.x / 2),
+            MakeJitConstant("INPUT_BUFFER_WIDTH_PADDED",    ""),    // TODO: enable non padding path again
+            MakeJitConstant("INPUT_BUFFER_HEIGHT_PADDED",   ""),
+            });
+
+        if (CeilDiv(RoundUp(params.output.X().v * params.output.Y().v, runInfo.gemmStyle.subBlockDimM), runInfo.gemmStyle.globalWorkSizeDY) % runInfo.lws1 != 0)
+            jit.AddConstant(MakeJitConstant("LEFTOVERS", 1));
+
+        if (!params.eltw.stride.empty())
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", params.eltw.stride[0].x));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", params.eltw.stride[0].y));
+        }
+        else
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", 1));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", 1));
+        }
+
+		return jit;
+	}
+
+    KernelsData fused_conv_eltwise_kernel_gemm::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetTunedKernelsDataByIndex(params, options);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_gemm.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_gemm.h
new file mode 100644
index 000000000..476d87583
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_gemm.h
@@ -0,0 +1,42 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "fused_conv_eltwise_kernel_base.h"
+
+namespace kernel_selector {
+
+	class fused_conv_eltwise_kernel_gemm : public fused_conv_eltwise_kernel_base
+	{
+	public:
+		using Parent = fused_conv_eltwise_kernel_base;
+        fused_conv_eltwise_kernel_gemm() : fused_conv_eltwise_kernel_base("fused_conv_eltwise_gpu_gemm") {}
+		
+		virtual ~fused_conv_eltwise_kernel_gemm() {}
+
+		virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+	protected:
+		virtual ParamsKey GetSupportedKey() const override;
+        std::vector<WeightsLayout> GetSupportedWeightLayouts(const fused_conv_eltwise_params&) const override;
+        std::string GetKernelName(const fused_conv_eltwise_params& params) const override;
+        bool NeedPaddedInput() const override { return true; }
+        JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
+	};
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8.cpp
new file mode 100644
index 000000000..dd218505a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8.cpp
@@ -0,0 +1,224 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    static const size_t _SG_TILE_M = 32;
+    static const size_t _SG_TILE_N = 32;
+    static const size_t _SG_SIZE = 8; // sub group size
+    static const size_t _TILES_PER_SG_X = 1; // Persistent threads
+    static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
+
+	ParamsKey fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8::GetSupportedKey() const
+	{
+		ParamsKey k;
+		k.EnableInputDataType(Datatype::INT8);
+		k.EnableOutputDataType(Datatype::INT8);
+		k.EnableInputWeightsType(WeightsType::INT8);
+		k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+		k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+		k.EnableTensorOffset();
+		k.EnableTensorPitches();
+		k.EnableBiasPerFeature();
+		k.EnableBatching();
+		k.EnableFusedConvEltwInt8Quantization();
+		k.EnableFusedConvEltwOutputCalibration();
+		k.DisableTuning();
+        k.EnableFusedConvEltwiseRWOutOpt();
+		return k;
+	}
+
+	bool fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8::Validate(const Params& p, const optional_params& o) const
+	{
+		if (!fused_conv_eltwise_kernel_base::Validate(p, o) ||
+			!FusedConvolutionEltwiseCheckInput(p, o))
+		{
+			return false;
+		}
+
+		const fused_conv_eltwise_params& cp = static_cast<const fused_conv_eltwise_params&>(p);
+		
+        // make sure it's 1x1 conv
+        if (cp.conv.filterSize.x != 1 || cp.conv.filterSize.y != 1)
+            return false;
+
+        // make sure stride is 1x1
+        if (cp.conv.stride.x != 1 || cp.conv.stride.y != 1)
+            return false;
+
+        // input padding not supported
+        if (cp.inputs[0].X().pad.Total() != 0 ||
+            cp.inputs[0].Y().pad.Total() != 0 ||
+            cp.inputs[0].Feature().pad.Total() != 0 ||
+            cp.inputs[0].Batch().pad.Total() != 0)
+            return false;
+
+        // input and output spatial sizes must match
+        if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
+            return false;
+
+		const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v ;
+		const auto k = cp.inputs[0].Feature().v;
+		const auto n = cp.output.Feature().v ;
+
+		if (m % 32 != 0 && m % 128 != 0)  // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M=128
+			return false;
+	
+		if (k % 32 != 0)  // Matrix size K, Must be mutliple of 32
+			return false;
+		
+		if (n % 32 != 0 && n % 128 != 0) // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N=128
+			return false;
+		
+		return true;
+	}
+
+
+    fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8::SetDefault(const fused_conv_eltwise_params& arg, int) const
+	{
+		DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(arg);
+
+		runInfo.effiency = FORCE_PRIORITY_1;
+
+		size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
+		size_t mat_n = arg.output.Feature().v;
+		
+		size_t _MATRIX_M = mat_m;
+		size_t _MATRIX_N = mat_n;
+		
+		size_t _WG_TILE_M = 128;
+		size_t _WG_TILE_N = 128;
+
+		// Calculate number of threads needed
+		const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
+		const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y  ;
+
+		// Define execution setup for kernel:
+		size_t globalWorkSize[3] = { threadsX, threadsY, 1 };
+		size_t localWorkSize[3] = { _SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1 };
+		
+		runInfo.gws0 = globalWorkSize[0];
+		runInfo.gws1 = globalWorkSize[1];
+		runInfo.gws2 = globalWorkSize[2]; 
+		
+		runInfo.lws0 = localWorkSize[0];
+		runInfo.lws1 = localWorkSize[1];
+		runInfo.lws2 = localWorkSize[2];
+
+		return runInfo;
+	}
+
+	JitConstants fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8::GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& runInfo) const
+	{
+		auto jit = Parent::GetJitConstants(params, runInfo);
+
+		jit.AddConstant(MakeJitConstant("WG_TILE_M", 128));          // Work-Group tile size M, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("WG_TILE_N", 128));          // Work-Group tile size N, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", 1));       // Persistent threads
+		jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", 1));       // Persistent threads
+
+		// Do not change values below
+        jit.AddConstant(MakeJitConstant("DIM_X", 0));
+        jit.AddConstant(MakeJitConstant("DIM_Y", 1));
+        jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
+        jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
+        jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
+        jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
+        jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
+        jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
+        jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
+        jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
+
+		jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
+		jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
+		jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
+
+		const auto& input = params.inputs[0];
+		const auto& output = params.output;
+
+		auto m = output.X().v * output.Y().v * output.Batch().v;
+		auto k = input.Feature().v;
+		auto n = output.Feature().v;
+
+		jit.AddConstant(MakeJitConstant("MATRIX_M", m));
+		jit.AddConstant(MakeJitConstant("MATRIX_K", k));
+		jit.AddConstant(MakeJitConstant("MATRIX_N", n));
+
+        const size_t out_x_pitch = 32 * 4;
+        const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
+        const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
+        const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
+        const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
+
+        bool out_padding = output.X().pad.Total() != 0 || output.Y().pad.Total() != 0;
+        jit.AddConstant(MakeJitConstant("OUT_WITH_PADDING", out_padding));
+
+        bool eltw_padding = false;
+        if (!params.second_input_in_output)
+        {
+            // for second input
+            const size_t in2_x_pitch = 32 * 4;
+            const size_t in2_y_pitch = 32 * 4 * params.inputs[1].X().LogicalDimPadded();
+            const size_t in2_b_block_pitch = in2_y_pitch * params.inputs[1].Y().LogicalDimPadded();
+            const size_t in2_f_block_pitch = in2_b_block_pitch * ((params.inputs[1].Batch().v + 3) / 4);
+            const size_t in2_offset = in2_x_pitch * params.inputs[1].X().pad.before + in2_y_pitch * params.inputs[1].Y().pad.before;
+
+            jit.AddConstant(MakeJitConstant("IN2_X_PITCH", in2_x_pitch));
+            jit.AddConstant(MakeJitConstant("IN2_Y_PITCH", in2_y_pitch));
+            jit.AddConstant(MakeJitConstant("IN2_B_BLOCK_PITCH", in2_b_block_pitch));
+            jit.AddConstant(MakeJitConstant("IN2_F_BLOCK_PITCH", in2_f_block_pitch));
+            jit.AddConstant(MakeJitConstant("IN2_OFFSET", in2_offset));
+
+            eltw_padding = params.inputs[1].X().pad.Total() != 0 || params.inputs[1].Y().pad.Total() != 0;;
+        }
+        else
+        {
+            eltw_padding = out_padding;
+        }
+
+        jit.AddConstant(MakeJitConstant("ELTW_WITH_PADDING", eltw_padding));
+
+        if (!params.eltw.stride.empty())
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", params.eltw.stride[0].x));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", params.eltw.stride[0].y));
+        }
+        else
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", 1));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", 1));
+        }
+
+		return jit;
+	}
+
+	KernelsData fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8::GetKernelsData(const Params& params, const optional_params& options) const
+	{
+        KernelsData kd = GetCommonKernelsData(params, options);
+        if (!kd.empty())
+			kd[0].estimatedTime = FORCE_PRIORITY_1; //_3 
+		return kd;
+	}
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8.h
new file mode 100644
index 000000000..331a50c91
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8.h
@@ -0,0 +1,45 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "fused_conv_eltwise_kernel_base.h"
+
+namespace kernel_selector {
+
+	class fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8 : public fused_conv_eltwise_kernel_base
+	{
+	public:
+		using Parent = fused_conv_eltwise_kernel_base;
+        fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8() : fused_conv_eltwise_kernel_base("fused_conv_eltwise_gpu_mmad_32x32sg_128x128wg_slm_int8") {}
+	
+		virtual ~fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8() {}
+
+		virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+	protected:
+		virtual ParamsKey GetSupportedKey() const override;
+		bool Validate(const Params& p, const optional_params& o) const override;
+		JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+		DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
+		virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const fused_conv_eltwise_params&) const override
+		{
+			return{
+				WeightsLayout::is_o32_yx_isv32_swizzled_by_4,
+			};
+		}
+	};
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8.cpp
new file mode 100644
index 000000000..f3052eb14
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8.cpp
@@ -0,0 +1,224 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    static const size_t _SG_TILE_M = 32;
+    static const size_t _SG_TILE_N = 32;
+    static const size_t _SG_SIZE = 8; // sub group size
+    static const size_t _TILES_PER_SG_X = 1; // Persistent threads
+    static const size_t _TILES_PER_SG_Y = 1; // Persistent threads
+
+    ParamsKey fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8::GetSupportedKey() const
+	{
+		ParamsKey k;
+		k.EnableInputDataType(Datatype::INT8);
+		k.EnableOutputDataType(Datatype::INT8);
+		k.EnableInputWeightsType(WeightsType::INT8);
+		k.EnableInputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+		k.EnableOutputLayout(DataLayout::fs_bs_yx_bsv4_fsv32);
+		k.EnableTensorOffset();
+		k.EnableTensorPitches();
+		k.EnableBiasPerFeature();
+		k.EnableBatching();
+		k.EnableFusedConvEltwInt8Quantization();
+		k.EnableFusedConvEltwOutputCalibration();
+		k.DisableTuning();
+        k.EnableFusedConvEltwiseRWOutOpt();
+		return k;
+	}
+
+	bool fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8::Validate(const Params& p, const optional_params& o) const
+	{
+		if (!fused_conv_eltwise_kernel_base::Validate(p, o) ||
+			!FusedConvolutionEltwiseCheckInput(p, o))
+		{
+			return false;
+		}
+
+		const convolution_params& cp = static_cast<const convolution_params&>(p);
+		
+        // make sure it's 1x1 conv
+        if (cp.filterSize.x != 1 || cp.filterSize.y != 1)
+            return false;
+
+        // make sure stride is 1x1
+        if (cp.stride.x != 1 || cp.stride.y != 1)
+            return false;
+
+        // input padding not supported
+        if (cp.inputs[0].X().pad.Total() != 0 ||
+            cp.inputs[0].Y().pad.Total() != 0 ||
+            cp.inputs[0].Feature().pad.Total() != 0 ||
+            cp.inputs[0].Batch().pad.Total() != 0)
+            return false;
+
+        // input and output spatial sizes must match
+        if (!(cp.output.X().v == cp.inputs[0].X().v) || !(cp.output.Y().v == cp.inputs[0].Y().v))
+            return false;
+
+		const auto m = cp.output.X().v * cp.output.Y().v * cp.output.Batch().v ;
+		const auto k = cp.inputs[0].Feature().v;
+		const auto n = cp.output.Feature().v ;
+
+		if (m % 32 != 0 && m % 224 != 0)  // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M=128
+			return false;
+	
+		if (k % 32 != 0)  // Matrix size K, Must be mutliple of 32
+			return false;
+		
+		if (n % 32 != 0 && n % 128 != 0) // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N=128
+			return false;
+
+		return true;
+	}
+
+
+    fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8::SetDefault(const fused_conv_eltwise_params& arg, int) const
+	{
+		DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(arg);
+
+		runInfo.effiency = FORCE_PRIORITY_1;
+
+		size_t mat_m = arg.output.X().v * arg.output.Y().v * arg.output.Batch().v;
+		size_t mat_n = arg.output.Feature().v;
+		
+		size_t _MATRIX_M = mat_m;
+		size_t _MATRIX_N = mat_n;
+		
+		size_t _WG_TILE_M = 224;
+		size_t _WG_TILE_N = 128;
+
+		// Calculate number of threads needed
+		const size_t threadsX = (_MATRIX_N / (_SG_TILE_N / _SG_SIZE)) / _TILES_PER_SG_X;
+		const size_t threadsY = (_MATRIX_M / _SG_TILE_M) / _TILES_PER_SG_Y  ;
+
+        // Define execution setup for kernel:
+		size_t globalWorkSize[3] = { threadsX, threadsY, 1 };
+		size_t localWorkSize[3] = { _SG_SIZE * _WG_TILE_N / _SG_TILE_N, _WG_TILE_M / _SG_TILE_M, 1 };
+
+		runInfo.gws0 = globalWorkSize[0];
+		runInfo.gws1 = globalWorkSize[1];
+		runInfo.gws2 = globalWorkSize[2]; 
+
+		runInfo.lws0 = localWorkSize[0];
+		runInfo.lws1 = localWorkSize[1];
+		runInfo.lws2 = localWorkSize[2]; 
+
+		return runInfo;
+	}
+
+	JitConstants fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8::GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& runInfo) const
+	{
+		auto jit = Parent::GetJitConstants(params, runInfo);
+
+		jit.AddConstant(MakeJitConstant("WG_TILE_M", 224));          // Work-Group tile size M, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("WG_TILE_N", 128));          // Work-Group tile size N, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("TILES_PER_SG_X", _TILES_PER_SG_X));
+		jit.AddConstant(MakeJitConstant("TILES_PER_SG_Y", _TILES_PER_SG_Y));
+
+		// Do not change values below
+		jit.AddConstant(MakeJitConstant("DIM_X", 0));
+		jit.AddConstant(MakeJitConstant("DIM_Y", 1));
+		jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K", 32));
+		jit.AddConstant(MakeJitConstant("MATRIX_SMALL_K_BFLOAT", 16));
+		jit.AddConstant(MakeJitConstant("SG_TILE_M", _SG_TILE_M));
+		jit.AddConstant(MakeJitConstant("SG_TILE_N", _SG_TILE_N));
+		jit.AddConstant(MakeJitConstant("SG_SIZE", _SG_SIZE));
+		jit.AddConstant(MakeJitConstant("SIMD_LANE_M", "SG_TILE_M"));
+		jit.AddConstant(MakeJitConstant("SIMD_LANE_N", "(SG_TILE_N / SG_SIZE)"));
+		jit.AddConstant(MakeJitConstant("WG_SIZE", "(SG_SIZE * WG_TILE_N / SG_TILE_N) * (WG_TILE_M / SG_TILE_M)"));
+
+		jit.AddConstant(MakeJitConstant("COMPILE_KERNELS", ""));
+		jit.AddConstant(MakeJitConstant("TILED_GLOBAL_LAYOUT", ""));
+		jit.AddConstant(MakeJitConstant("OUTPUT_TILED_GLOBAL_LAYOUT", ""));
+
+		const auto& input = params.inputs[0];
+		const auto& output = params.output;
+
+		auto m = output.X().v * output.Y().v * output.Batch().v;
+		auto k = input.Feature().v;
+		auto n = output.Feature().v;
+
+		jit.AddConstant(MakeJitConstant("MATRIX_M", m)); // Matrix size M, Must be mutliple of 32 and multiple of WG_TILE_M
+		jit.AddConstant(MakeJitConstant("MATRIX_K", k)); // Matrix size K, Must be mutliple of 32
+		jit.AddConstant(MakeJitConstant("MATRIX_N", n)); // Matrix size N, Must be mutliple of 32 and multiple of WG_TILE_N
+
+        const size_t out_x_pitch = 32 * 4;
+        const size_t out_y_pitch = 32 * 4 * params.output.X().LogicalDimPadded();
+        const size_t out_b_block_pitch = out_y_pitch * params.output.Y().LogicalDimPadded();
+        const size_t out_f_block_pitch = out_b_block_pitch * ((params.output.Batch().v + 3) / 4);
+        const size_t out_offset = out_x_pitch * params.output.X().pad.before + out_y_pitch * params.output.Y().pad.before;
+
+        jit.AddConstant(MakeJitConstant("OUT_X_PITCH", out_x_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_Y_PITCH", out_y_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_B_BLOCK_PITCH", out_b_block_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_F_BLOCK_PITCH", out_f_block_pitch));
+        jit.AddConstant(MakeJitConstant("OUT_OFFSET", out_offset));
+
+        bool out_padding = output.X().pad.Total() != 0 || output.Y().pad.Total() != 0;
+        jit.AddConstant(MakeJitConstant("OUT_WITH_PADDING", out_padding));
+
+        bool eltw_padding = false;
+        if (!params.second_input_in_output)
+        {
+            // for second input
+            const size_t in2_x_pitch = 32 * 4;
+            const size_t in2_y_pitch = 32 * 4 * params.inputs[1].X().LogicalDimPadded();
+            const size_t in2_b_block_pitch = in2_y_pitch * params.inputs[1].Y().LogicalDimPadded();
+            const size_t in2_f_block_pitch = in2_b_block_pitch * ((params.inputs[1].Batch().v + 3) / 4);
+            const size_t in2_offset = in2_x_pitch * params.inputs[1].X().pad.before + in2_y_pitch * params.inputs[1].Y().pad.before;
+
+            jit.AddConstant(MakeJitConstant("IN2_X_PITCH", in2_x_pitch));
+            jit.AddConstant(MakeJitConstant("IN2_Y_PITCH", in2_y_pitch));
+            jit.AddConstant(MakeJitConstant("IN2_B_BLOCK_PITCH", in2_b_block_pitch));
+            jit.AddConstant(MakeJitConstant("IN2_F_BLOCK_PITCH", in2_f_block_pitch));
+            jit.AddConstant(MakeJitConstant("IN2_OFFSET", in2_offset));
+
+            eltw_padding = params.inputs[1].X().pad.Total() != 0 || params.inputs[1].Y().pad.Total() != 0;;
+        }
+        else
+        {
+            eltw_padding = out_padding;
+        }
+
+        jit.AddConstant(MakeJitConstant("ELTW_WITH_PADDING", eltw_padding));
+
+        if (!params.eltw.stride.empty())
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", params.eltw.stride[0].x));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", params.eltw.stride[0].y));
+        }
+        else
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", 1));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", 1));
+        }
+
+		return jit;
+	}
+
+	KernelsData fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8::GetKernelsData(const Params& params, const optional_params& options) const
+	{
+        KernelsData kd = GetCommonKernelsData(params, options);
+        if (!kd.empty())
+			kd[0].estimatedTime = FORCE_PRIORITY_1; //_3 
+		return kd;
+	}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8.h
new file mode 100644
index 000000000..a5ca36c44
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8.h
@@ -0,0 +1,45 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "fused_conv_eltwise_kernel_base.h"
+
+namespace kernel_selector {
+
+	class fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8 : public fused_conv_eltwise_kernel_base
+	{
+	public:
+		using Parent = fused_conv_eltwise_kernel_base;
+        fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8() : fused_conv_eltwise_kernel_base("fused_conv_eltwise_gpu_mmad_32x32sg_224x128wg_slm_int8") {}
+		
+		virtual ~fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8() {}
+
+		virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+	protected:
+		virtual ParamsKey GetSupportedKey() const override;
+		bool Validate(const Params& p, const optional_params& o) const override;
+		JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+		DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
+		virtual std::vector<WeightsLayout> GetSupportedWeightLayouts(const fused_conv_eltwise_params&) const override
+		{
+			return{
+				WeightsLayout::is_o32_yx_isv32_swizzled_by_4,
+			};
+		}
+	};
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.cpp
new file mode 100644
index 000000000..670fae853
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.cpp
@@ -0,0 +1,41 @@
+/*
+// Copyright (c) 2016-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_eltwise_kernel_selector.h"
+#include "fused_conv_eltwise_kernel_gemm.h"
+#include "fused_conv_eltwise_kernel_bfyx_1x1_opt.h"
+#include "fused_conv_eltwise_kernel_bfyx_os_iyx_osv16.h"
+#include "fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8.h"
+#include "fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8.h"
+#include "fused_conv_eltwise_kernel_yxfb_yxio_b16.h"
+
+namespace kernel_selector 
+{
+    fused_conv_eltwise_kernel_selector::fused_conv_eltwise_kernel_selector()
+    {
+//        Attach<fused_conv_eltwise_kernel_gemm>();
+        Attach<fused_conv_eltwise_kernel_yxfb_yxio_b16>();
+        Attach<fused_conv_eltwise_kernel_bfyx_1x1_opt>();
+        Attach<fused_conv_eltwise_kernel_bfyx_os_iyx_osv16>();
+        Attach<fused_conv_eltwise_kernel_mmad_32x32sg_128x128wg_slm_int8>();
+        Attach<fused_conv_eltwise_kernel_mmad_32x32sg_224x128wg_slm_int8>();
+    }
+
+    KernelsData fused_conv_eltwise_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetAutoTuneBestKernel(params, options, KernelType::FUSED_CONV_ELTWISE);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.h
new file mode 100644
index 000000000..94225b8a3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_selector.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+ 
+namespace kernel_selector 
+{
+    class fused_conv_eltwise_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static fused_conv_eltwise_kernel_selector &Instance() {
+            static fused_conv_eltwise_kernel_selector instance_;
+            return instance_;
+        }
+
+        fused_conv_eltwise_kernel_selector();
+
+        virtual ~fused_conv_eltwise_kernel_selector() {}
+
+        virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp
new file mode 100644
index 000000000..77b2093ec
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.cpp
@@ -0,0 +1,224 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_eltwise_kernel_yxfb_yxio_b16.h"
+
+namespace kernel_selector 
+{
+
+    ParamsKey fused_conv_eltwise_kernel_yxfb_yxio_b16::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputWeightsType(WeightsType::F16);
+        k.EnableInputWeightsType(WeightsType::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableInputLayout(DataLayout::yxfb);
+        k.EnableOutputLayout(DataLayout::yxfb);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBiasPerFeature();
+        k.EnableNonBiasTerm();
+        k.EnableBatching();
+        k.EnableSplitSupport();
+        k.EnableDilation();
+        k.EnableSubGroup();
+        k.EnableFusedConvEltwiseRWOutOpt();
+        return k;
+    }
+
+    std::string fused_conv_eltwise_kernel_yxfb_yxio_b16::GetKernelName(const fused_conv_eltwise_params& params) const
+    {
+        if (params.inputs[0].GetDType() == Datatype::F32)
+        {
+            return kernelName + "_fp32";
+        }
+        else
+        {
+            return kernelName + "_fp16";
+        }
+    }
+
+    namespace {
+        // how many batches will a single work item compute
+        size_t GetBatchesPerWorkItem(size_t batch_size, Datatype dataType)
+        {
+            if (dataType == Datatype::F16)
+            {
+                const uint32_t min_batches_per_wi = 1;
+                const uint32_t min_lws = 16;
+
+                if (batch_size % (4 * min_batches_per_wi * min_lws) == 0)
+                {
+                    return 4 * min_batches_per_wi; // USE_BLOCK_READ_2 + as_half4
+                }
+                else if (batch_size % (2 * min_batches_per_wi * min_lws) == 0)
+                {
+                    return 2 * min_batches_per_wi; // USE_BLOCK_READ_1 + as_half2
+                }
+                else
+                {
+                    return min_batches_per_wi;
+                }
+            }
+            else
+            {
+                return 2;
+            }
+        }
+
+        size_t GetOfmPerWorkitem(Datatype dataType)
+        {
+            if (dataType == Datatype::F16)
+                return 16;
+            return 8;
+        }
+    }
+
+    fused_conv_eltwise_kernel_base::DispatchData fused_conv_eltwise_kernel_yxfb_yxio_b16::SetDefault(const fused_conv_eltwise_params& arg, int) const
+    {
+        DispatchData runInfo = fused_conv_eltwise_kernel_base::SetDefault(arg);
+
+        const auto filter_ofm_num = arg.weights.OFM().v;
+        const auto batch_size = arg.output.Batch().v;
+        const uint32_t min_lws = 16;
+
+        const size_t batchesPerWorkItem = GetBatchesPerWorkItem(batch_size, arg.inputs[0].GetDType());
+        const size_t ofmPerWorkItem = GetOfmPerWorkitem(arg.inputs[0].GetDType());
+
+        if (arg.inputs[0].GetDType() == Datatype::F16)
+        {
+            runInfo.effiency = FORCE_PRIORITY_7;
+        }
+        else
+        {
+            runInfo.effiency = FORCE_PRIORITY_9;
+        }
+
+        runInfo.lws0 = min_lws;
+        runInfo.gws0 = filter_ofm_num * batch_size / (ofmPerWorkItem * batchesPerWorkItem);
+        
+        return runInfo;
+    }
+
+    bool fused_conv_eltwise_kernel_yxfb_yxio_b16::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!fused_conv_eltwise_kernel_base::Validate(p, o))
+        {
+            return false;
+        }
+        const convolution_params& params = static_cast<const convolution_params&>(p);
+
+        const auto filter_ofm_num = params.weights.OFM().v;
+        const auto batch_size = params.output.Batch().v;
+        const uint32_t min_lws = 16;
+
+        const bool bInputValidated =
+            (filter_ofm_num > 0) &&
+            (batch_size > 0) &&
+            (params.output.Feature().v == filter_ofm_num);
+
+        if (!bInputValidated)
+        {
+            return false;
+        }
+
+        if (params.inputs[0].GetDType() == Datatype::F16)
+        {
+            const uint32_t min_ofm_per_wi = 16;
+            const uint32_t min_batches_per_wi = 1;
+
+            const bool bFilterOK = filter_ofm_num % min_ofm_per_wi == 0;            // Number of output features dividable by minimum number of output features processed inside work item.
+            const bool bBatchOK = batch_size % (min_batches_per_wi * min_lws) == 0; // Batch size dividable by minimum number of batches processed when smallest local work size is used.
+
+            if (!bFilterOK || !bBatchOK)
+            {
+                return false;
+            }
+        }
+        else
+        {
+            if ((filter_ofm_num * batch_size) % min_lws != 0 ||
+                batch_size < 32) // TODO: check why it's not supported
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    JitConstants fused_conv_eltwise_kernel_yxfb_yxio_b16::GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const
+    {
+        auto jit = Parent::GetJitConstants(params, kd);
+
+        const auto local_work_group_size = kd.lws0;
+        const auto batch_size = params.output.Batch().v;
+
+        if (params.inputs[0].GetDType() == Datatype::F32)
+        {
+            // A LITTLE HACK, for convolutions with low number of input features don't use block reads, and it will speed up by 25%
+            // TODO - investigate why is this happening
+            if (params.inputs[0].Feature().v > 4)
+            {
+                jit.AddConstant(MakeJitConstant("USE_BLOCK_READ_2", ""));
+            }
+        }
+        else
+        {
+            const auto batch_pad_before = params.output.Batch().pad.before;
+            const auto feature_pitch = params.output.Feature().pitch;
+
+            if (batch_size >= 64 && (feature_pitch % 2 == 0) && (batch_pad_before % 2 == 0))
+            {
+                jit.AddConstant(MakeJitConstant("USE_BLOCK_READ_2", ""));
+            }
+            else if (batch_size >= 32 && (feature_pitch % 2 == 0) && (batch_pad_before % 2 == 0))
+            {
+                jit.AddConstant(MakeJitConstant("USE_BLOCK_READ_1", ""));
+            }
+        }
+
+        const size_t batchesPerWorkItem = GetBatchesPerWorkItem(batch_size, params.inputs[0].GetDType());
+        const size_t ofmPerWorkItem = GetOfmPerWorkitem(params.inputs[0].GetDType());
+
+        jit.AddConstants({
+            MakeJitConstant("LOCAL_WORK_GROUP_SIZE", kd.lws0),
+            MakeJitConstant("OFM_PER_WORK_ITEM", ofmPerWorkItem),
+            MakeJitConstant("BATCHES_PER_WORK_ITEM",                            batchesPerWorkItem), // how many batches will a single work item compute
+            MakeJitConstant("LOCAL_WORK_GROUPS_PER_SINGLE_BATCHES_ELEMENTS",    std::max(batch_size / batchesPerWorkItem / local_work_group_size, static_cast<size_t>(1))), // how many local work groups we need to compute single element for each batch
+            MakeJitConstant("WORK_ITEMS_PER_SINGLE_BATCHES_ELEMENTS", batch_size / batchesPerWorkItem), // how many work items we need to compute single element for each batch
+        });
+
+        if (!params.eltw.stride.empty())
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", params.eltw.stride[0].x));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", params.eltw.stride[0].y));
+        }
+        else
+        {
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_X", 1));
+            jit.AddConstant(MakeJitConstant("ELTW_STRIDE_Y", 1));
+        }
+
+        return jit;
+    }
+
+    KernelsData fused_conv_eltwise_kernel_yxfb_yxio_b16::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetTunedKernelsDataByIndex(params, options);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h
new file mode 100644
index 000000000..91d22d1f1
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/fused_conv_eltwise/fused_conv_eltwise_kernel_yxfb_yxio_b16.h
@@ -0,0 +1,40 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "fused_conv_eltwise_kernel_base.h"
+
+namespace kernel_selector {
+
+    class fused_conv_eltwise_kernel_yxfb_yxio_b16 : public fused_conv_eltwise_kernel_base
+    {
+    public:
+        using Parent = fused_conv_eltwise_kernel_base;
+        fused_conv_eltwise_kernel_yxfb_yxio_b16() : fused_conv_eltwise_kernel_base("fused_conv_eltwise_gpu_yxfb_yxio_b16") {}
+        virtual ~fused_conv_eltwise_kernel_yxfb_yxio_b16() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        std::vector<WeightsLayout> GetSupportedWeightLayouts(const fused_conv_eltwise_params&) const override { return{ WeightsLayout::yxio }; }
+        std::string GetKernelName(const fused_conv_eltwise_params&) const override;
+        bool Validate(const Params& p, const optional_params& o) const override;
+        JitConstants GetJitConstants(const fused_conv_eltwise_params& params, const DispatchData& kd) const override;
+        DispatchData SetDefault(const fused_conv_eltwise_params& arg, int autoTuneIndex = -1) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp
new file mode 100644
index 000000000..5a9d50b9c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.cpp
@@ -0,0 +1,144 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gather_kernel_ref.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector
+{
+    static int32_t GetGatherChannelIndex(const gather_params& params)
+    {
+        Tensor::DataChannelName name = Tensor::DataChannelName::X;
+
+        switch (params.axis)
+        {
+            case GatherAxis::X:
+                return 3;
+            case GatherAxis::Y:
+                return 2;
+            case GatherAxis::FEATURE:
+                return 1;
+            case GatherAxis::BATCH:
+                return 0;
+            default: break;
+        }
+
+        return DataTensor::Channelndex(params.output.GetLayout(), name);
+    }
+
+    ParamsKey GatherKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableAllInputLayout();
+        k.EnableAllOutputLayout();
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnableDifferentTypes();
+        k.EnableLookUpTableIndicesFormat(Datatype::F32);
+        return k;
+    }
+
+    static size_t getPartSize(const gather_params& params, int32_t axis)
+    {
+        size_t partSize = 1;
+        for (size_t i = params.inputs[0].Dimentions() - axis; i > 0; --i)
+            partSize *= params.inputs[0].GetDims()[i-1].v;
+        return partSize;
+    }
+
+    static size_t getNumberOfParts(const gather_params& params, size_t partSize)
+    {
+        return params.inputs[0].LogicalSize() / partSize;
+    }
+
+    static size_t getSliceSize(const gather_params& params, int32_t axis)
+    {
+        size_t numberOfItemsInSlice = 1;
+        for (size_t i = params.inputs[0].Dimentions() - axis - 1; i > 0; --i)
+            numberOfItemsInSlice *= params.inputs[0].GetDims()[i-1].v;
+        return numberOfItemsInSlice;
+    }
+
+    CommonDispatchData GatherKernelRef::SetDefault(const gather_params& params, const optional_params&) const
+    {
+        CommonDispatchData runInfo;
+
+        const int32_t axis = GetGatherChannelIndex(params);
+
+        const size_t numberOfParts = params.inputs[0].LogicalSize() / getPartSize(params, axis);
+
+        size_t gws = numberOfParts * params.inputs[1].LogicalSize();
+
+        const size_t vectorSize = 16;
+
+        runInfo.gws0 = Align(gws, vectorSize);
+        runInfo.gws1 = 1;
+        runInfo.gws2 = 1;
+
+        runInfo.lws0 = vectorSize;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 1;
+
+        runInfo.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+        return runInfo;
+    }
+
+    JitConstants GatherKernelRef::GetJitConstants(const gather_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        int32_t axis = GetGatherChannelIndex(params);
+        size_t partSize = getPartSize(params, axis);
+        size_t sliceSize = getSliceSize(params, axis);
+        size_t numberOfParts = getNumberOfParts(params, partSize);
+        size_t numberOfIndexes = params.inputs[1].LogicalSize();
+
+        jit.AddConstant(MakeJitConstant("AXIS", axis));
+        jit.AddConstant(MakeJitConstant("PART_SIZE", partSize));
+        jit.AddConstant(MakeJitConstant("SLICE_SIZE", sliceSize));
+        jit.AddConstant(MakeJitConstant("PARTS_NUMBER", numberOfParts));
+        jit.AddConstant(MakeJitConstant("COMPUTATIONAL_OPERATIONS_NUMBER", numberOfParts * numberOfIndexes));
+
+        return jit;
+    }
+
+    KernelsData GatherKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelData kd = KernelData::Default<gather_params>(params);
+        gather_params& newParams = *static_cast<gather_params*>(kd.params.get());
+
+        assert(params.GetType() == KernelType::GATHER);
+
+        auto runInfo = SetDefault(newParams, options);
+        auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+        auto cldnn_jit = GetJitConstants(newParams);
+        std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = kd.kernels[0];
+
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2);
+
+        kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+        return{ kd };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.h
new file mode 100644
index 000000000..630cf14e4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_ref.h
@@ -0,0 +1,56 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // gather_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct gather_params : public base_params
+    {
+        gather_params() : base_params(KernelType::GATHER) {}
+
+        GatherAxis axis;
+
+        virtual ParamsKey GetParamsKey() const
+        {
+            return base_params::GetParamsKey();
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // gather_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct gather_optional_params : optional_params
+    {
+        gather_optional_params() : optional_params(KernelType::GATHER) {}
+    };
+
+    class GatherKernelRef : public common_kernel_base
+    {
+    public:
+        GatherKernelRef() : common_kernel_base("gather_ref") {}
+        virtual ~GatherKernelRef() {}
+        virtual JitConstants GetJitConstants(const gather_params& params) const;
+        virtual CommonDispatchData SetDefault(const gather_params& params, const optional_params&) const;
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_selector.cpp
new file mode 100644
index 000000000..3f7962a35
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gather_kernel_selector.h"
+#include "gather_kernel_ref.h"
+
+namespace kernel_selector {
+
+    gather_kernel_selector::gather_kernel_selector()
+    {
+        Attach<GatherKernelRef>();
+    }
+
+    KernelsData gather_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::GATHER);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_selector.h
new file mode 100644
index 000000000..630c1efb7
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gather/gather_kernel_selector.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector
+{
+    class gather_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static gather_kernel_selector &Instance() {
+            static gather_kernel_selector instance_;
+            return instance_;
+        }
+
+        gather_kernel_selector();
+
+        virtual ~gather_kernel_selector() {}
+
+        virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp
index 12af8a1c5..4d5e5d743 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_base.cpp
@@ -89,7 +89,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = k_data.kernels[0];
-        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, false, false, (uint32_t)prim_params.inputs.size());
+        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, (uint32_t)prim_params.inputs.size());
 
         k_data.estimatedTime = estimated_time;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h
index 89727597d..8b7410a5a 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/gemm/gemm_kernel_ref.h
@@ -25,6 +25,8 @@ namespace kernel_selector
         GemmKernelRef() : GemmKernelBase("gemm_ref") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         ParamsKey GetSupportedKey() const override;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp
index c0dc0851c..0aa05d33b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.cpp
@@ -24,7 +24,42 @@ namespace kernel_selector
     {
         JitConstants jit = MakeBaseParamsJitConstants(params);
 
-        jit.AddConstant(MakeJitConstant(toString(params.axis), ""));
+        jit.AddConstant(MakeJitConstant("AXES_NUMBER", params.axes.size()));
+
+        if (params.reverse) {
+            jit.AddConstant(MakeJitConstant("REVERSE", 1));
+        }
+
+        for (size_t i = 0; i < params.axes.size(); i++)
+        {
+            std::string size_name = "REVERSE_AXIS_SIZE";
+            size_t size_value = 0;
+            if (params.axes.size() > 1) {
+                std::stringstream ss;
+                ss << "REVERSE_" << toString(params.axes[i]) << "_SIZE";
+                size_name = ss.str();
+            }
+            jit.AddConstant(MakeJitConstant(toString(params.axes[i]), ""));
+            if (params.reverse) {
+                if (params.axes[i] == IndexSelectAxis::BATCH)
+                {
+                    size_value = params.inputs.at(0).Batch().v;
+                }
+                else if (params.axes[i] == IndexSelectAxis::X)
+                {
+                    size_value = params.inputs.at(0).X().v;
+                }
+                else if (params.axes[i] == IndexSelectAxis::Y)
+                {
+                    size_value = params.inputs.at(0).Y().v;
+                }
+                else if (params.axes[i] == IndexSelectAxis::FEATURE)
+                {
+                    size_value = params.inputs.at(0).Feature().v;
+                }
+            }
+            jit.AddConstant(MakeJitConstant(size_name, size_value));
+        }
 
         return jit;
     }
@@ -32,24 +67,58 @@ namespace kernel_selector
     IndexSelectKernelBase::DispatchData IndexSelectKernelBase::SetDefault(const index_select_params& params)
     {
         const auto& output = params.output;
-        const auto& indices = params.inputs.at(1);
         DispatchData kd;
 
         kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
 
         std::vector<size_t> global;
-        if (params.axis == IndexSelectAxis::BATCH)
-        {
-            global = { 1, indices.X().v, output.Feature().v };
-        }
-        else if (params.axis == IndexSelectAxis::X || params.axis == IndexSelectAxis::Y)
-        {
-            global = { output.Batch().v, indices.X().v, output.Feature().v };
+        
+        if(params.axes.size() == 1) {
+            if (params.reverse)
+            {
+                if (params.axes[0] == IndexSelectAxis::BATCH)
+                {
+                    global = { 1, params.inputs.at(0).Batch().v, output.Feature().v };
+                }
+                else if (params.axes[0] == IndexSelectAxis::X)
+                {
+                    global = { output.Batch().v, params.inputs.at(0).X().v, output.Feature().v };
+                }
+                else if (params.axes[0] == IndexSelectAxis::Y)
+                {
+                    global = { output.Batch().v, params.inputs.at(0).Y().v, output.Feature().v };
+                }
+                else if (params.axes[0] == IndexSelectAxis::FEATURE)
+                {
+                    global = { output.Batch().v, params.inputs.at(0).Feature().v, output.Y().v };
+                }
+            }
+            else
+            {
+                const auto indices = params.inputs.at(1).X().v;
+
+                if (params.axes[0] == IndexSelectAxis::BATCH)
+                {
+                    global = { 1, indices, output.Feature().v };
+                }
+                else if (params.axes[0] == IndexSelectAxis::X || params.axes[0] == IndexSelectAxis::Y)
+                {
+                    global = { output.Batch().v, indices, output.Feature().v };
+                }
+                else if (params.axes[0] == IndexSelectAxis::FEATURE)
+                {
+                    global = { output.Batch().v, indices, output.Y().v };
+                }
+            }
         }
-        else if(params.axis == IndexSelectAxis::FEATURE)
+        else
         {
-            global = { output.Batch().v, indices.X().v, output.Y().v };
+            if (params.reverse)
+            {
+                global = { output.Batch().v, output.Y().v, output.Feature().v };
+            }
         }
+
         const auto& local = GetOptimalLocalWorkGroupSizes(global);
 
         kd.gws0 = global[0];
@@ -77,7 +146,7 @@ namespace kernel_selector
         auto jit         = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = k_data.kernels[0];
-        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, false, false, (uint32_t)prim_params.inputs.size());
+        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, DEFAULT, false, false, (uint32_t)prim_params.inputs.size());
 
         k_data.estimatedTime = estimated_time;
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h
index c7abe43bc..2142c6020 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_base.h
@@ -29,7 +29,8 @@ namespace kernel_selector
             : base_params(KernelType::INDEX_SELECT)
         {}
 
-        IndexSelectAxis axis = IndexSelectAxis::BATCH;
+        std::vector<IndexSelectAxis> axes = { IndexSelectAxis::BATCH };
+        bool reverse = false;
     };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h
index 3dd16198f..e3a339be9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/index_select/index_select_kernel_ref.h
@@ -25,6 +25,8 @@ namespace kernel_selector
         IndexSelectKernelRef() : IndexSelectKernelBase("index_select_gpu_ref") {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         ParamsKey GetSupportedKey() const override;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.h
index 358aa8a39..555531b65 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_axis.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~LookUpTableKernelAxis() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_ref.h
index 8d33d2356..45385b217 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lookup_table/lookup_table_kernel_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~LookUpTableKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h
index edaba0a85..0586ce1c8 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_multiple_features.h
@@ -26,6 +26,8 @@ namespace kernel_selector
         LRNKernelAcrossChannelMultipleFeatures() : LRNKernelBase("lrn_gpu_across_channel_multiple_features") {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
 
     private:
@@ -33,4 +35,4 @@ namespace kernel_selector
         JitConstants GetJitConstants(const lrn_params& params, DispatchData kd) const override;
         CommonDispatchData SetDefault(const lrn_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h
index 629cc3ad9..17d336c77 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_opt_b8.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~LRNKernelAcrossChannel_b8() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
 
     private:
@@ -34,4 +36,4 @@ namespace kernel_selector
         JitConstants GetJitConstants(const lrn_params& params, DispatchData kd) const override;
         CommonDispatchData SetDefault(const lrn_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h
index 20146ac37..7d9e77555 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_across_channel_ref.h
@@ -27,9 +27,11 @@ namespace kernel_selector
         virtual ~LRNKernelAcrossChannelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
 
     private:
         CommonDispatchData SetDefault(const lrn_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h
index f15d49376..9eb4d3705 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_ref.h
@@ -28,6 +28,8 @@ namespace kernel_selector
         virtual ~LRNKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
 
     protected:
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h
index f8eb02734..51dc718f6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_byxf_opt.h
@@ -28,9 +28,9 @@ namespace kernel_selector
         virtual ~LRNKernelWithinChannelByxfOpt() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params&, const optional_params&) const override;
         virtual JitConstants GetJitConstants(const lrn_params& params, DispatchData kd) const override;
         virtual DispatchData SetDefault(const lrn_params& params) const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h
index 0545a1d99..4ae8eab01 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref.h
@@ -27,9 +27,11 @@ namespace kernel_selector
         virtual ~LRNKernelWithinChannel() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
 
     private:
         CommonDispatchData SetDefault(const lrn_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h
index 0fd00b419..ad4221ef9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lrn/lrn_kernel_within_channel_ref_opt.h
@@ -27,9 +27,11 @@ namespace kernel_selector
         virtual ~LRNKernelWithinChannelOpt() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
 
     private:
         CommonDispatchData SetDefault(const lrn_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp
index 6170abd46..26fdb9360 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.cpp
@@ -26,7 +26,11 @@ namespace kernel_selector
 
         if (params.has_cell) {
             const auto& cell = params.cell;
-            jit.AddConstants({ MakeJitConstant("CELL_TERM", true), MakeJitConstant("CELL", cell) });
+            jit.AddConstants({
+                MakeJitConstant("CELL_TERM", true),
+                MakeJitConstant("CELL", cell),
+                MakeJitConstant("CELL_DIRECTION", params.cell_direction)
+            });
         }
         if (params.clip > 0) {
             std::string psclip = toCodeString(params.clip);
@@ -40,6 +44,7 @@ namespace kernel_selector
         if (params.input_forget) {
             jit.AddConstants({ MakeJitConstant("INPUT_FORGET", true) });
         }
+        jit.AddConstants({ MakeJitConstant("DIRECTION", params.direction) });
 
         const auto& GEMMInput = params.inputs[0];
         size_t size = GEMMInput.X().v / 4;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.h
index c9082ce0b..c6d16e797 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_base.h
@@ -29,6 +29,8 @@ namespace kernel_selector
         enum order_type : int32_t {
             offset_iofz, // ONNX default
             offset_ifoz, // caffe
+            offset_izof, // pyTorch
+            offset_fizo  // IE default
         };
 
         lstm_elt_params()
@@ -40,11 +42,15 @@ namespace kernel_selector
         order_type gate_order = offset_iofz;
         float clip = 0;
         bool input_forget = false;
+        uint32_t direction = 0;
+        uint32_t cell_direction = 0;
 
         size_t GetOffsetIndex(order_type type, size_t idx) const {
             static const std::map<order_type, std::vector<size_t>> offset_map {
-                {offset_iofz, {0, 1, 2, 3}},
-                {offset_ifoz, {0, 2, 1, 3}}
+                {offset_iofz, { 0, 1, 2, 3}},
+                {offset_ifoz, { 0, 2, 1, 3}},
+                {offset_izof, { 0, 3, 1, 2}},
+                {offset_fizo, { 1, 3, 0, 2}}
             };
             return offset_map.at(type)[idx];
         }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.h
index 8213167f0..356d9e6f6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_elt_kernel_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~LSTMEltKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp
index 703008546..a6846438f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.cpp
@@ -31,11 +31,15 @@ namespace kernel_selector
             jit.AddConstants({ MakeJitConstant("BIAS", bias), MakeJitConstant("BIAS_TERM", true) });
         }
         if (params.hasHidden) {
-            jit.AddConstants({ MakeJitConstant("HIDDEN", hidden), MakeJitConstant("HIDDEN_TERM", true) , MakeJitConstant("RECURRENT", recurrent) });
+            jit.AddConstants({ MakeJitConstant("HIDDEN", hidden),
+                MakeJitConstant("HIDDEN_TERM", true),
+                MakeJitConstant("RECURRENT", recurrent),
+                MakeJitConstant("HIDDEN_DIRECTION", params.hidden_direction)
+            });
         }
-
         jit.AddConstants({ MakeJitConstant("WEIGHTS", weights)});
         jit.AddConstants({ MakeJitConstant("DIRECTION", params.direction)});
+        jit.AddConstants({ MakeJitConstant("INPUT_DIRECTION", params.input_direction)});
 
         return jit;
     }
@@ -51,7 +55,7 @@ namespace kernel_selector
 
         KernelData kd = KernelData::Default<lstm_gemm_params>(params, orgParams.inputs.size());
 
-        float effiency = FORCE_PRIORITY_1;
+        float effiency = FORCE_PRIORITY_9;
         const auto& input = orgParams.inputs[0];
 
         auto newParams = orgParams;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h
index e766120e0..261b8e29d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_base.h
@@ -35,6 +35,8 @@ namespace kernel_selector
         bool hasBias = false;
         bool hasHidden = false;
         uint32_t direction = 0;
+        uint32_t input_direction = 0; // for bidirectional node fusion in stacked LSTMs
+        uint32_t hidden_direction = 0;
 
         void SetBias(const DataTensor& v) {
             bias = v;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.h
index 15488ac6d..b382309e8 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~LSTMGemmKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_selector.cpp
index 79296daed..b372bb7ee 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemm_kernel_selector.cpp
@@ -16,12 +16,16 @@
 
 #include "lstm_gemm_kernel_selector.h"
 #include "lstm_gemm_kernel_ref.h"
+#include "lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.h"
+#include "lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.h"
 
 namespace kernel_selector
 {
     lstm_gemm_kernel_selector::lstm_gemm_kernel_selector()
     {
         Attach<LSTMGemmKernelRef>();
+        Attach<LSTMGemvKernel_subgroup1x64_bfyx_ff_SIMD16>();
+        Attach<LSTMGemvKernel_subgroup1x64_bfyx_hh_SIMD16>();
     }
 
     KernelsData lstm_gemm_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.cpp
new file mode 100644
index 000000000..fcea58780
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.cpp
@@ -0,0 +1,62 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    ParamsKey LSTMGemvKernel_subgroup1x64_bfyx_ff_SIMD16::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableDifferentTypes();
+        k.EnableInputLayout(DataLayout::bfyx); 
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnableLSTMGEMMBias();
+        k.EnableLSTMGEMMHidden();
+        k.EnableSubGroup();
+        return k;
+    }
+
+    KernelsData LSTMGemvKernel_subgroup1x64_bfyx_ff_SIMD16::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelsData kernelsData = GetCommonKernelsData(params, options);
+        auto &kernel = kernelsData[0].kernels[0];
+
+        // This kernel is good if 
+        // 1) Batch size is 1
+        // 2) The input size y-x size is 64x1
+        const lstm_gemm_params& orgParams = static_cast<const lstm_gemm_params&>(params);
+        const auto& input = orgParams.inputs[0];
+
+        if (   (input.Batch().v == 1) 
+            && (input.X().v >= 64) 
+            && (input.Y().v == 1)) 
+        {    
+            auto out = orgParams.output;
+
+            kernel.workGroups.global = { 16, out.X().v, out.Batch().v };
+            kernelsData[0].estimatedTime = FORCE_PRIORITY_1;
+        }
+
+        return kernelsData;
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.h
new file mode 100644
index 000000000..e0ee836da
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.h
@@ -0,0 +1,32 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "lstm_gemm_kernel_base.h"
+
+namespace kernel_selector
+{
+    class LSTMGemvKernel_subgroup1x64_bfyx_ff_SIMD16 : public LSTMGemmKernelBase
+    {
+    public:
+        LSTMGemvKernel_subgroup1x64_bfyx_ff_SIMD16() : LSTMGemmKernelBase("lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16") {}
+        virtual ~LSTMGemvKernel_subgroup1x64_bfyx_ff_SIMD16() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.cpp
new file mode 100644
index 000000000..7d34a10bb
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.cpp
@@ -0,0 +1,62 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    ParamsKey LSTMGemvKernel_subgroup1x64_bfyx_hh_SIMD16::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableDifferentTypes();
+        k.EnableInputLayout(DataLayout::bfyx); 
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnableLSTMGEMMBias();
+        k.EnableLSTMGEMMHidden();
+        k.EnableSubGroup();
+        return k;
+    }
+
+    KernelsData LSTMGemvKernel_subgroup1x64_bfyx_hh_SIMD16::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelsData kernelsData = GetCommonKernelsData(params, options);
+        auto &kernel = kernelsData[0].kernels[0];
+
+        // This kernel is good if 
+        // 1) Batch size is 1
+        // 2) The input size y-x size is 64x1
+        const lstm_gemm_params& orgParams = static_cast<const lstm_gemm_params&>(params);
+        const auto& input = orgParams.inputs[0];
+
+        if (   (input.Batch().v == 1) 
+            && (input.X().v >= 64) 
+            && (input.Y().v == 1)) 
+        {    
+            auto out = orgParams.output;
+
+            kernel.workGroups.global = { 16, out.X().v, out.Batch().v };
+            kernelsData[0].estimatedTime = FORCE_PRIORITY_1;
+        }
+
+        return kernelsData;
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.h
new file mode 100644
index 000000000..c315a41ad
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/lstm/lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.h
@@ -0,0 +1,32 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "lstm_gemm_kernel_base.h"
+
+namespace kernel_selector
+{
+    class LSTMGemvKernel_subgroup1x64_bfyx_hh_SIMD16 : public LSTMGemmKernelBase
+    {
+    public:
+        LSTMGemvKernel_subgroup1x64_bfyx_hh_SIMD16() : LSTMGemmKernelBase("lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16") {}
+        virtual ~LSTMGemvKernel_subgroup1x64_bfyx_hh_SIMD16() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_gpu_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_gpu_ref.h
index eae59763e..e0ba99b9d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_gpu_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/max_unpooling/max_unpooling_kernel_gpu_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~MaxUnpoolingKernelGPURef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h
index 9127187e5..bd9c3fedb 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_bfyx_opt.h
@@ -27,9 +27,11 @@ namespace kernel_selector
         virtual ~MVNKernelBfyxOpt() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         using Parent = MVNKernelBase;
 
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+
     private:
         DispatchData SetDefault(const mvn_params& params) const override;
         JitConstants GetJitConstants(const mvn_params& params, MVNKernelBase::DispatchData kd) const override;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h
index 9a88c8d23..cd0a4fbdc 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/mvn/mvn_kernel_ref.h
@@ -27,9 +27,9 @@ namespace kernel_selector
         virtual ~MVNKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         std::string GetKernelName(const mvn_params&) const override;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_across_spatial_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_across_spatial_ref.h
index cc202b7a7..b7243f7b4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_across_spatial_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_across_spatial_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~NormalizeKernelAcrossSpatialRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_within_spatial_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_within_spatial_ref.h
index 20f08608a..fd4d36ae2 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_within_spatial_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/normalize/normalize_kernel_within_spatial_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~NormalizeKernelWithinSpatialRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp
new file mode 100644
index 000000000..c36456c40
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.cpp
@@ -0,0 +1,76 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "one_hot_kernel_base.h"
+
+#include "kernel_selector_utils.h"
+
+
+namespace kernel_selector
+{
+    JitConstants OneHotKernelBase::GetJitConstants(const one_hot_params& params)
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        jit.AddConstants({
+            MakeJitConstant("ONE_HOT_AXIS", params.one_hot_axis),
+            MakeJitConstant("ONE_HOT_LIMIT", params.one_hot_limit)
+        });
+
+        return jit;
+    }
+
+    OneHotKernelBase::DispatchData OneHotKernelBase::SetDefault(const one_hot_params& params)
+    {
+        const auto& input = params.inputs[0];
+
+        DispatchData kd;
+
+        kd.fp16UnitUsed = input.GetDType() == Datatype::F16;
+
+        std::vector<size_t> global{ input.Feature().v, input.Y().v, input.X().v };
+        const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+        kd.gws0 = global[0];
+        kd.gws1 = global[1];
+        kd.gws2 = global[2];
+
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        return kd;
+    }
+
+    KernelsData OneHotKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+    {
+        assert(params.GetType() == KernelType::ONE_HOT);
+
+        const auto& prim_params = static_cast<const one_hot_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast)
+
+        auto run_info = SetDefault(prim_params);
+        KernelData k_data = KernelData::Default<one_hot_params>(params);
+
+        auto cldnn_jit = GetJitConstants(prim_params);
+        auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+        auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = k_data.kernels[0];
+        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point);
+        k_data.estimatedTime = estimated_time;
+
+        return{ k_data };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.h
new file mode 100644
index 000000000..ab387eaef
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_base.h
@@ -0,0 +1,63 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // one_hot_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct one_hot_params : public base_params
+    {
+        one_hot_params()
+            : base_params(KernelType::ONE_HOT)
+        {
+        }
+        uint16_t one_hot_axis;
+        int32_t one_hot_limit;
+
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // one_hot_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct one_hot_optional_params : optional_params
+    {
+        one_hot_optional_params()
+            : optional_params(KernelType::ONE_HOT)
+        {
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // OneHotKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class OneHotKernelBase : public common_kernel_base
+    {
+    public:
+        using common_kernel_base::common_kernel_base;
+
+        using DispatchData = CommonDispatchData;
+
+    protected:
+        static JitConstants GetJitConstants(const one_hot_params& params);
+        static DispatchData SetDefault(const one_hot_params& params);
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_ref.cpp
new file mode 100644
index 000000000..712422e7d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_ref.cpp
@@ -0,0 +1,49 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "one_hot_kernel_ref.h"
+
+
+namespace kernel_selector
+{
+    ParamsKey OneHotKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::UINT8);
+        k.EnableInputDataType(Datatype::INT32);
+        k.EnableInputDataType(Datatype::INT64);
+
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::UINT8);
+        k.EnableOutputDataType(Datatype::INT32);
+        k.EnableOutputDataType(Datatype::INT64);
+
+        k.EnableInputLayout(DataLayout::bfyx);
+
+        k.EnableOutputLayout(DataLayout::bfyx);
+
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+
+        return k;
+    }
+
+    KernelsData OneHotKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_ref.h
new file mode 100644
index 000000000..972b7aeac
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_ref.h
@@ -0,0 +1,32 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "one_hot_kernel_base.h"
+
+
+namespace kernel_selector
+{
+    class OneHotKernelRef : public OneHotKernelBase
+    {
+    public:
+        OneHotKernelRef() : OneHotKernelBase("one_hot_ref") {}
+
+        KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_selector.cpp
new file mode 100644
index 000000000..230dd323f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_selector.cpp
@@ -0,0 +1,30 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "one_hot_kernel_selector.h"
+#include "one_hot_kernel_ref.h"
+
+namespace kernel_selector
+{
+    one_hot_kernel_selector::one_hot_kernel_selector()
+    {
+        Attach<OneHotKernelRef>();
+    }
+
+    KernelsData one_hot_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::ONE_HOT);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_selector.h
new file mode 100644
index 000000000..79c8c3489
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/one_hot/one_hot_kernel_selector.h
@@ -0,0 +1,34 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+
+namespace kernel_selector
+{
+    class one_hot_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static one_hot_kernel_selector &Instance() {
+            static one_hot_kernel_selector instance;
+            return instance;
+        }
+
+        one_hot_kernel_selector();
+
+        KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
index ca6977952..14a9c95f4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -42,12 +42,8 @@ namespace kernel_selector
 
     inline JitConstants MakePermuteJitConstants(const permute_params& params)
     {
-        JitConstants jit = MakeBaseParamsJitConstants(params);
-
-        jit.AddConstants({
-            MakeJitConstant("PERMUTE_ORDER", params.order)
-        });
-
+        JitConstants jit = MakeBaseParamsJitConstants(params);;
+        jit.AddConstant(MakeJitConstant("PERMUTE_ORDER", params.order));
         return jit;
     }
 
@@ -65,24 +61,14 @@ namespace kernel_selector
 
         const auto& in = newParams.inputs[0];
         auto& kernel = kd.kernels[0];
-        std::vector<size_t> gws;
-        for (const auto& o : in.GetDims())
-        {
-            gws.push_back(o.v);
-        }
-        
-        for (size_t i = gws.size(); i < 4; i++)
-        {
-            gws.push_back(1U);
-        }
 
-        kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] };
+        kernel.workGroups.global = { in.Y().v, in.X().v, in.Feature().v * in.Batch().v};
         kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
         kernel.arguments = GetArgsDesc(1, false, false);
         
         kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
 
         return{ kd };
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.h
index 978717c75..83e4e8b57 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/permute/permute_kernel_ref.h
@@ -53,6 +53,8 @@ namespace kernel_selector
         virtual ~PermuteKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_average_opt.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_average_opt.cpp
index 29822a98e..aa1745545 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_average_opt.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_average_opt.cpp
@@ -41,7 +41,7 @@ namespace kernel_selector
 
         const pooling_params& params = static_cast<const pooling_params&>(p);
 
-        if (params.activationFunc != ActivationFunction::NONE)
+        if (params.activation.function != ActivationFunction::NONE)
         {
             return{};
         }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_average_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_average_opt.h
index 5c46d655b..b343d1059 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_average_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_average_opt.h
@@ -27,10 +27,10 @@ namespace kernel_selector
         virtual ~PoolingKernelGPUAverageOpt() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params&, const optional_params&) const override;
         JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
         DispatchData SetDefault(const pooling_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp
new file mode 100644
index 000000000..3a50ee3e5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.cpp
@@ -0,0 +1,77 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "pooling_kernel_gpu_b_fs_yx_fsv4.h"
+
+namespace kernel_selector
+{
+    ParamsKey PoolingKerneGPU_b_fs_yx_fsv4::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::INT8);
+        k.EnableInputDataType(Datatype::UINT8);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableOutputDataType(Datatype::UINT8);
+        k.EnableInputLayout(DataLayout::b_fs_yx_fsv4);
+        k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnablePoolType(PoolType::MAX);
+        k.EnablePoolType(PoolType::AVG);
+        k.EnablePoolRemainder(PoolRemainder::FLOOR);
+        k.EnablePoolRemainder(PoolRemainder::CEIL);
+        k.EnablePoolKernelDividerMode(KernelDividerMode::FIXED);
+        k.EnablePoolKernelDividerMode(KernelDividerMode::DYNAMIC);
+        k.EnablePoolKernelDividerMode(KernelDividerMode::DYNAMIC_WITH_PADDING);
+        k.EnableDifferentTypes();
+        return k;
+    }
+
+    PoolingKernelBase::DispatchData PoolingKerneGPU_b_fs_yx_fsv4::SetDefault(const pooling_params& params) const
+    {
+        DispatchData runInfo = PoolingKernelBase::SetDefault(params);
+
+        runInfo.gws0 = params.output.X().v;   // X
+        runInfo.gws1 = params.output.Y().v;   // Y
+        // we got b_fs_yx_fsv4 format, we process 4 features per workitem
+        runInfo.gws2 = (params.output.Feature().v * params.output.Batch().v) / 4;
+
+        runInfo.lws0 = 1;
+        runInfo.lws1 = 1;
+        runInfo.lws2 = 1;
+
+        return runInfo;
+    }
+
+    JitConstants PoolingKerneGPU_b_fs_yx_fsv4::GetJitConstants(const pooling_params& params, DispatchData kd) const
+    {
+        auto jit = PoolingKernelBase::GetJitConstants(params, kd);
+
+        const size_t in_x_pitch = 4;
+        const size_t in_y_pitch = 4 * params.inputs[0].X().LogicalDimPadded();
+        jit.AddConstant(MakeJitConstant("IN_X_PITCH", in_x_pitch));
+        jit.AddConstant(MakeJitConstant("IN_Y_PITCH", in_y_pitch));
+
+        return jit;
+    }
+
+    KernelsData PoolingKerneGPU_b_fs_yx_fsv4::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_1);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h
new file mode 100644
index 000000000..43d1f8ac2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_b_fs_yx_fsv4.h
@@ -0,0 +1,36 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "pooling_kernel_base.h"
+
+namespace kernel_selector
+{
+    class PoolingKerneGPU_b_fs_yx_fsv4 : public PoolingKernelBase
+    {
+    public:
+        PoolingKerneGPU_b_fs_yx_fsv4() : PoolingKernelBase("pooling_gpu_b_fs_yx_fsv4") {}
+        virtual ~PoolingKerneGPU_b_fs_yx_fsv4() {}
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        DispatchData SetDefault(const pooling_params& params) const override;
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+        JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
+
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h
index b9831b97e..2dddbf5df 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_bfyx_block_opt.h
@@ -27,10 +27,10 @@ namespace kernel_selector
         virtual ~PoolingKernelGPUBfyxBlockOpt() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params&, const optional_params&) const override;
         JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
         DispatchData SetDefault(const pooling_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_af32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_af32.h
index c515282ff..a250495f5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_af32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_af32.h
@@ -27,7 +27,9 @@ namespace kernel_selector
         virtual ~PoolingKerneGPU_byxf_af32() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         DispatchData SetDefault(const pooling_params& params) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h
index 9b3ad118d..75372802b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_opt.h
@@ -25,12 +25,12 @@ namespace kernel_selector
     public:
         PoolingKernelGPUByxfOpt() : PoolingKernelBase("pooling_gpu_byxf_opt") {}
         virtual ~PoolingKernelGPUByxfOpt() {}
-
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
+
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params&, const optional_params&) const override;
         JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
         DispatchData SetDefault(const pooling_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h
index eb0f0d1f5..dfe6ddc31 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_byxf_padding_opt.h
@@ -27,10 +27,10 @@ namespace kernel_selector
         virtual ~PoolingKernelGPUByxfPaddingOpt() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params&, const optional_params&) const override;
         JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
         DispatchData SetDefault(const pooling_params& params) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h
index efb5c67cd..034392d0d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h
@@ -27,10 +27,10 @@ namespace kernel_selector
         virtual ~PoolingKerneGPU_fs_bs_yx_bsv4_fsv32() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         DispatchData SetDefault(const pooling_params& params) const override;
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const pooling_params& params, DispatchData kd) const override;
 
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h
index ec05c081b..b50fad87e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_int8_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~PoolingKernelGPUInt8Ref() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h
index 9bfd68753..3d39e9989 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_gpu_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~PoolingKernelGPURef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp
index 91ec4d2dc..6538212ba 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pooling/pooling_kernel_selector.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include "pooling_kernel_gpu_byxf_af32.h"
 #include "pooling_kernel_gpu_int8_ref.h"
 #include "pooling_kernel_gpu_fs_bs_yx_bsv4_fsv32.h"
+#include "pooling_kernel_gpu_b_fs_yx_fsv4.h"
 
 namespace kernel_selector {
 
@@ -36,6 +37,7 @@ namespace kernel_selector {
         Attach<PoolingKernelGPUInt8Ref>();
         Attach<PoolingKerneGPU_byxf_af32>();
         Attach<PoolingKerneGPU_fs_bs_yx_bsv4_fsv32>();
+        Attach<PoolingKerneGPU_b_fs_yx_fsv4>();
     }
 
     KernelsData pooling_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp
new file mode 100644
index 000000000..c7bbdbb09
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.cpp
@@ -0,0 +1,67 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "pyramid_roi_align_kernel_base.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector {
+
+    JitConstants PyramidROIAlignKernelBase::GetJitConstants(const PyramidROIAlign_params& params)
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+        return jit;
+    }
+
+    PyramidROIAlignKernelBase::DispatchData PyramidROIAlignKernelBase::SetDefault(const PyramidROIAlign_params& params)
+    {
+        const auto& boxes = params.inputs.at(0);
+        DispatchData kd;
+
+        kd.fp16UnitUsed = params.inputs[0].GetDType() == Datatype::F16;
+
+        std::vector<size_t> global;
+        global = { boxes.Y().v, 1, 1 };
+
+        const auto& local = GetOptimalLocalWorkGroupSizes(global);
+
+        kd.gws0 = global[0];
+        kd.gws1 = global[1];
+        kd.gws2 = global[2];
+
+        kd.lws0 = local[0];
+        kd.lws1 = local[1];
+        kd.lws2 = local[2];
+
+        return kd;
+    }
+
+    KernelsData PyramidROIAlignKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimated_time) const
+    {
+        assert(params.GetType() == KernelType::PYRAMID_ROI_ALIGN);
+
+        const auto& prim_params = static_cast<const PyramidROIAlign_params&>(params); // NOLINT(cppcoreguidelines-pro-type-static-cast-downcast)
+        auto run_info = SetDefault(prim_params);
+        KernelData k_data = KernelData::Default<PyramidROIAlign_params>(params);
+        auto cldnn_jit = GetJitConstants(prim_params);
+        auto entry_point = GetEntryPoint(kernelName, prim_params.layerID, options);
+        auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = k_data.kernels[0];
+        FillCLKernelData(kernel, run_info, params.engineInfo, kernelName, jit, entry_point, "", false, false, (uint32_t)prim_params.inputs.size());
+
+        k_data.estimatedTime = estimated_time;
+
+        return { k_data };
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.h
new file mode 100644
index 000000000..1d7a0f370
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_base.h
@@ -0,0 +1,57 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "common_kernel_base.h"
+#include "kernel_selector_params.h"
+
+namespace kernel_selector {
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // PyramidROIAlign_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct PyramidROIAlign_params : public base_params
+    {
+        PyramidROIAlign_params()
+            : base_params(KernelType::PYRAMID_ROI_ALIGN)
+        {}
+     };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // index_select_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct PyramidROIAlign_optional_params : optional_params
+    {
+        PyramidROIAlign_optional_params()
+            : optional_params(KernelType::PYRAMID_ROI_ALIGN)
+            {}
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // PyramidROIAlignKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class PyramidROIAlignKernelBase : public common_kernel_base
+    {
+    public:
+        using common_kernel_base::common_kernel_base;
+        virtual ~PyramidROIAlignKernelBase() {}
+
+        using DispatchData = CommonDispatchData;
+
+    protected:
+        static JitConstants GetJitConstants(const PyramidROIAlign_params& params);
+        static DispatchData SetDefault(const PyramidROIAlign_params& params);
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params&, float estimated_time) const;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp
new file mode 100644
index 000000000..4de5ec110
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.cpp
@@ -0,0 +1,40 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "pyramid_roi_align_kernel_ref.h"
+
+namespace kernel_selector {
+    ParamsKey PyramidROIAlignKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableBatching();
+        k.EnableDifferentTypes();
+
+        return k;
+    }
+
+    KernelsData PyramidROIAlignKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.h
new file mode 100644
index 000000000..8194d887c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_ref.h
@@ -0,0 +1,29 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "pyramid_roi_align_kernel_base.h"
+
+namespace kernel_selector {
+    class PyramidROIAlignKernelRef : public PyramidROIAlignKernelBase
+    {
+    public:
+        PyramidROIAlignKernelRef() : PyramidROIAlignKernelBase("pyramid_roi_align_gpu_ref") {}
+        KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
+        ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_selector.cpp
new file mode 100644
index 000000000..90e59127e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_selector.cpp
@@ -0,0 +1,28 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "pyramid_roi_align_kernel_selector.h"
+#include "pyramid_roi_align_kernel_ref.h"
+
+namespace kernel_selector {
+    PyramidROIAlign_kernel_selector::PyramidROIAlign_kernel_selector()
+    {
+        Attach<PyramidROIAlignKernelRef>();
+    }
+
+    KernelsData PyramidROIAlign_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::PYRAMID_ROI_ALIGN);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_selector.h
new file mode 100644
index 000000000..82c4f017e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/pyramid_roi_align/pyramid_roi_align_kernel_selector.h
@@ -0,0 +1,31 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector {
+    class PyramidROIAlign_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static PyramidROIAlign_kernel_selector &Instance() {
+            static PyramidROIAlign_kernel_selector instance;
+            return instance;
+        }
+
+        PyramidROIAlign_kernel_selector();
+        KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.h
index 53eb76204..27fae9bd1 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/region_yolo/region_yolo_kernel_ref.h
@@ -60,10 +60,10 @@ namespace kernel_selector
 
         using DispatchData = CommonDispatchData;        
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual JitConstants GetJitConstants(const region_yolo_params& params) const;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.h
index a3081defe..79913b26d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_from_winograd_2x3_kernel.h
@@ -26,8 +26,10 @@ namespace kernel_selector
         ReorderFromWinograd2x3Kernel() : ReorderKernelBase("reorder_from_winograd_2x3_s1") {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         virtual JitConstants GetJitConstants(const reorder_params& params) const override;
         virtual DispatchData SetDefault(const reorder_params& arg) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.h
index 08d78f429..88a6bde76 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel.h
@@ -27,7 +27,9 @@ namespace kernel_selector
         virtual ~ReorderKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         virtual JitConstants GetJitConstants(const reorder_params& params) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
index 867a3c8b9..8d0edcb36 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_base.cpp
@@ -26,6 +26,8 @@ namespace kernel_selector
         switch (l)
         {
         case WeightsLayout::os_iyx_osv16:
+        case WeightsLayout::os_iyx_osv32:
+        case WeightsLayout::os_iyx_osv64:
         case WeightsLayout::os_iyx_osv16_rotate_180:
         case WeightsLayout::os_i_osv16:
         case WeightsLayout::os_i_osv16__ai8:
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_byxf_f32_to_byx8_f4_i8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_byxf_f32_to_byx8_f4_i8.cpp
new file mode 100644
index 000000000..e3562ea38
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_byxf_f32_to_byx8_f4_i8.cpp
@@ -0,0 +1,83 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "reorder_kernel_byxf_f32_to_byx8_f4_i8.h"
+#include "kernel_selector_utils.h"
+ 
+namespace kernel_selector 
+{
+    ParamsKey reorder_kernel_byxf_f32_to_byx8_f4_i8::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::INT8);
+        k.EnableDifferentTypes();
+        k.EnableInputLayout(DataLayout::byxf);
+        k.EnableOutputLayout(DataLayout::byx8_f4);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        return k;
+    }
+
+    bool reorder_kernel_byxf_f32_to_byx8_f4_i8::Validate(const Params& p, const optional_params& o) const
+    {
+        if (!ReorderKernelBase::Validate(p, o))
+        {
+            return false;
+        }
+
+        const reorder_params& params = static_cast<const reorder_params&>(p);
+
+        if (params.output.X().v % 16 != 0)
+            return false;
+
+        if (params.inputs[0].Feature().v != 3)
+            return false;
+
+        return true;
+    }
+
+    reorder_kernel_byxf_f32_to_byx8_f4_i8::DispatchData reorder_kernel_byxf_f32_to_byx8_f4_i8::SetDefault(const reorder_params& params) const
+    {
+        DispatchData kd;
+
+        const auto& input = params.inputs[0];
+
+        kd.gws0 = input.X().v;
+        kd.gws1 = input.Y().v;
+        kd.gws2 = input.Batch().v;
+
+        kd.lws0 = 16;
+        kd.lws1 = 1;
+        kd.lws2 = 1;
+
+        return kd;
+    }
+
+    JitConstants reorder_kernel_byxf_f32_to_byx8_f4_i8::GetJitConstants(const reorder_params& params) const
+    {
+        auto jit = ReorderKernelBase::GetJitConstants(params);
+        jit.Merge(GetTensorFriendlyWorkGroupsJit(params.inputs[0]));
+        return jit;
+    }
+
+    KernelsData reorder_kernel_byxf_f32_to_byx8_f4_i8::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        const reorder_params& orgParams = static_cast<const reorder_params&>(params);
+        return GetCommonKernelsData(orgParams, options, FORCE_PRIORITY_5);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_byxf_f32_to_byx8_f4_i8.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_byxf_f32_to_byx8_f4_i8.h
new file mode 100644
index 000000000..1a8882da6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_byxf_f32_to_byx8_f4_i8.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "reorder_kernel_base.h"
+ 
+namespace kernel_selector 
+{    
+    class reorder_kernel_byxf_f32_to_byx8_f4_i8 : public ReorderKernelBase
+    {
+    public:
+        reorder_kernel_byxf_f32_to_byx8_f4_i8() : ReorderKernelBase("reorder_data_byxf_f32_to_byx8_f4_i8") {}
+        virtual ~reorder_kernel_byxf_f32_to_byx8_f4_i8() {}
+
+        virtual bool Validate(const Params& p, const optional_params& o) const override;
+        virtual DispatchData SetDefault(const reorder_params& params) const override;
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual JitConstants GetJitConstants(const reorder_params& params) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.h
index ea1a82882..4a6105fab 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_fast_b1.h
@@ -26,8 +26,10 @@ namespace kernel_selector
         ReorderKernelFastBatch1() : ReorderKernelBase("reorder_data_fast_b1") {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         virtual JitConstants GetJitConstants(const reorder_params& params) const override;
         virtual DispatchData SetDefault(const reorder_params& arg) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_selector.cpp
index db2b53892..0cad9606e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_selector.cpp
@@ -20,6 +20,7 @@
 #include "reorder_from_winograd_2x3_kernel.h"
 #include "reorder_to_winograd_2x3_kernel.h"
 #include "reorder_kernel_to_yxfb_batched.h"
+#include "reorder_kernel_byxf_f32_to_byx8_f4_i8.h"
 
 namespace kernel_selector {
 
@@ -30,6 +31,7 @@ namespace kernel_selector {
         Attach<ReorderFromWinograd2x3Kernel>();
         Attach<ReorderToWinograd2x3Kernel>();
         Attach<ReorderKernel_to_yxfb_batched>();
+        //Attach<reorder_kernel_byxf_f32_to_byx8_f4_i8>(); // Slower than default!
     }
 
     KernelsData reorder_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.h
index 8bea6efc1..82dd844d7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_kernel_to_yxfb_batched.h
@@ -24,12 +24,12 @@ namespace kernel_selector
     {
     public:
         ReorderKernel_to_yxfb_batched() : ReorderKernelBase("reorder_data_to_yxfb_batched") {}
-        virtual ParamsKey GetSupportedKey() const override;
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual JitConstants GetJitConstants(const reorder_params& params) const override;
         virtual DispatchData SetDefault(const reorder_params& arg) const override;
         bool Validate(const Params& p, const optional_params& o) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.h
index 1c07f9ebb..1a4382470 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_to_winograd_2x3_kernel.h
@@ -26,8 +26,10 @@ namespace kernel_selector
         ReorderToWinograd2x3Kernel() : ReorderKernelBase("reorder_to_winograd_2x3_s1") {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         virtual JitConstants GetJitConstants(const reorder_params& params) const override;
         virtual DispatchData SetDefault(const reorder_params& arg) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.h
index a3c021d6c..22de3a4f4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_fyx_b_kernel.h
@@ -26,7 +26,9 @@ namespace kernel_selector
         ReorderWeightsImage_fyx_b_Kernel() : ReorderKernelBase("reorder_weights_image_2d_c4_fyx_b") {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         virtual DispatchData SetDefault(const reorder_weights_params& arg) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.h
index 6cb1c8423..48940dae0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_image_winograd_6x3_kernel.h
@@ -26,7 +26,9 @@ namespace kernel_selector
         ReorderWeightsImageWinograd6x3Kernel() : ReorderKernelBase("reorder_weights_image_winograd_6x3_s1") {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         virtual DispatchData SetDefault(const reorder_weights_params& arg) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_kernel.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_kernel.h
index 635b3466d..f769c1199 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_kernel.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_kernel.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~ReorderWeightsKernel() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.h
index c77354a08..6e4b75db8 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_2x3_kernel.h
@@ -26,7 +26,9 @@ namespace kernel_selector
         ReorderWeightsWinograd2x3Kernel() : ReorderKernelBase("reorder_weights_winograd_2x3_s1") {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         virtual DispatchData SetDefault(const reorder_weights_params& arg) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.h
index 51f86fedd..9ffd3ab9c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorder/reorder_weights_winograd_6x3_kernel.h
@@ -26,7 +26,9 @@ namespace kernel_selector
         ReorderWeightsWinograd6x3Kernel() : ReorderKernelBase("reorder_weights_winograd_6x3_s1") {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         virtual DispatchData SetDefault(const reorder_weights_params& arg) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.h
index 9f5b6db21..05c6fd45d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reorg_yolo/reorg_yolo_kernel_ref.h
@@ -56,10 +56,9 @@ namespace kernel_selector
 
         using DispatchData = CommonDispatchData;        
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
-
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual JitConstants GetJitConstants(const reorg_yolo_params& params) const;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp
index 9c9c760ee..e0efa9ce0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.cpp
@@ -66,7 +66,7 @@ namespace kernel_selector
 
         kernel.workGroups.global = { gws[0], gws[1], gws[2] * gws[3] };
         kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
         kernel.arguments = GetArgsDesc(1, false, false);
         
         kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.h
index 86595fca2..b4d875790 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reshape/reshape_kernel_ref.h
@@ -48,6 +48,8 @@ namespace kernel_selector
         virtual ~ReshapeKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp
new file mode 100644
index 000000000..7a121192c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.cpp
@@ -0,0 +1,87 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "reverse_sequence_kernel_ref.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector
+{
+    ParamsKey ReverseSequenceKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableAllInputLayout();
+        k.EnableAllOutputLayout();
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnableDifferentTypes();
+        return k;
+    }
+
+    CommonDispatchData ReverseSequenceKernelRef::SetDefault(const reverse_sequence_params& params, const optional_params&) const
+    {
+        CommonDispatchData runInfo;
+
+        std::vector<size_t> global = { params.output.Batch().v, params.output.Feature().v, params.output.Y().v * params.output.X().v };
+
+        auto local = GetOptimalLocalWorkGroupSizes(global);
+
+        runInfo.gws0 = global[0];
+        runInfo.gws1 = global[1];
+        runInfo.gws2 = global[2];
+
+        runInfo.lws0 = local[0];
+        runInfo.lws1 = local[1];
+        runInfo.lws2 = local[2];
+
+        return runInfo;
+    }
+
+    JitConstants ReverseSequenceKernelRef::GetJitConstants(const reverse_sequence_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        jit.AddConstant(MakeJitConstant("SEQ_AXIS", params.seq_axis));
+        jit.AddConstant(MakeJitConstant("BATCH_AXIS", params.batch_axis));
+
+        return jit;
+    }
+
+    KernelsData ReverseSequenceKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelData kd = KernelData::Default<reverse_sequence_params>(params);
+        reverse_sequence_params& newParams = *static_cast<reverse_sequence_params*>(kd.params.get());
+
+        assert(params.GetType() == KernelType::REVERSE_SEQUENCE);
+
+        auto runInfo = SetDefault(newParams, options);
+        auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+        auto cldnn_jit = GetJitConstants(newParams);
+        std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = kd.kernels[0];
+
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, "", false, false, 2);
+
+        kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+        return{ kd };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.h
new file mode 100644
index 000000000..c12a5f96b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_ref.h
@@ -0,0 +1,57 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // reverse_sequence_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct reverse_sequence_params : public base_params
+    {
+        reverse_sequence_params() : base_params(KernelType::REVERSE_SEQUENCE) {}
+
+        int32_t seq_axis;
+        int32_t batch_axis;
+
+        virtual ParamsKey GetParamsKey() const
+        {
+            return base_params::GetParamsKey();
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // reverse_sequence_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct reverse_sequence_optional_params : optional_params
+    {
+        reverse_sequence_optional_params() : optional_params(KernelType::REVERSE_SEQUENCE) {}
+    };
+
+    class ReverseSequenceKernelRef : public common_kernel_base
+    {
+    public:
+        ReverseSequenceKernelRef() : common_kernel_base("reverse_sequence_ref") {}
+        virtual ~ReverseSequenceKernelRef() {}
+        virtual JitConstants GetJitConstants(const reverse_sequence_params& params) const;
+        virtual CommonDispatchData SetDefault(const reverse_sequence_params& params, const optional_params&) const;
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_selector.cpp
new file mode 100644
index 000000000..490406bcf
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "reverse_sequence_kernel_selector.h"
+#include "reverse_sequence_kernel_ref.h"
+
+namespace kernel_selector {
+
+    reverse_sequence_kernel_selector::reverse_sequence_kernel_selector()
+    {
+        Attach<ReverseSequenceKernelRef>();
+    }
+
+    KernelsData reverse_sequence_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::REVERSE_SEQUENCE);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_selector.h
new file mode 100644
index 000000000..18067f253
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/reverse_sequence/reverse_sequence_kernel_selector.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector
+{
+    class reverse_sequence_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static reverse_sequence_kernel_selector &Instance() {
+            static reverse_sequence_kernel_selector instance_;
+            return instance_;
+        }
+
+        reverse_sequence_kernel_selector();
+
+        virtual ~reverse_sequence_kernel_selector() {}
+
+        virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp
new file mode 100644
index 000000000..14523fa56
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.cpp
@@ -0,0 +1,83 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "roi_pooling_kernel_base.h"
+
+namespace kernel_selector {
+
+    static ROIPoolingKernelBase::DispatchData SetDefault(const roi_pooling_params& params)
+    {
+        ROIPoolingKernelBase::DispatchData kd;
+
+        kd.fp16UnitUsed = (params.inputs[0].GetDType() == Datatype::F16);
+
+        // Determine global work sizes.
+        kd.gws0 = params.output.LogicalSize();
+        kd.gws1 = 1;
+        kd.gws2 = 1;
+
+        // Find largest positive local work size that is divider for global work size.
+        kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
+        while (kd.gws0 % kd.lws0 != 0)
+        {
+            --kd.lws0;
+        }
+        kd.lws1 = 1;
+        kd.lws2 = 1;
+
+        return kd;
+    }
+
+    JitConstants ROIPoolingKernelBase::GetJitConstants(const roi_pooling_params& rp) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(rp);
+
+        jit.AddConstants({
+            MakeJitConstant("POOLED_HEIGHT",     rp.pooledHeight),
+            MakeJitConstant("POOLED_WIDTH",      rp.pooledWidth),
+            MakeJitConstant("SPATIAL_SCALE",     rp.spatialScale),
+            MakeJitConstant(toString(rp.mode) + "_POOLING", 1),
+        });
+
+        return jit;
+    }
+
+    KernelsData ROIPoolingKernelBase::GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const
+    {
+        assert(params.GetType() == KernelType::ROI_POOLING);
+        const roi_pooling_params& orgParams = static_cast<const roi_pooling_params&>(params);
+
+        if (orgParams.activation.function != ActivationFunction::NONE)
+        {
+            return{};
+        }
+
+        DispatchData runInfo = SetDefault(orgParams);
+        KernelData kd = KernelData::Default<roi_pooling_params>(params);
+
+        auto cldnn_jit = GetJitConstants(orgParams);
+        auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
+        auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = kd.kernels[0];
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
+
+        kd.estimatedTime = estimatedTime;
+
+        return{ kd };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.h
new file mode 100644
index 000000000..ca27f4755
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_base.h
@@ -0,0 +1,75 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include <iostream>
+#include "common_kernel_base.h"
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // roi_pooling_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct roi_pooling_params : public base_params
+    {
+        roi_pooling_params() : base_params(KernelType::ROI_POOLING) {}
+
+        PoolType    mode = PoolType::MAX;
+        bool        position_sensitive = false;
+        int         pooledWidth = 0;
+        int         pooledHeight = 0;
+        int         spatial_bins_x = 1;
+        int         spatial_bins_y = 1;
+        float       spatialScale = 1.f;
+
+        virtual ParamsKey GetParamsKey() const
+        {
+            auto k = base_params::GetParamsKey();
+            if (position_sensitive)
+            {
+                k.EnablePositionSensitivePooling();
+            }
+            k.EnablePoolType(mode);
+
+            return k;
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // roi_pooling_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct roi_pooling_optional_params : optional_params
+    {
+        roi_pooling_optional_params() : optional_params(KernelType::ROI_POOLING) {}
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // ROIPoolingKernelBase
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class ROIPoolingKernelBase : public common_kernel_base
+    {
+    public:
+        using common_kernel_base::common_kernel_base;
+        virtual ~ROIPoolingKernelBase() {};
+
+        using DispatchData = CommonDispatchData;
+
+        KernelsData GetCommonKernelsData(const Params& params, const optional_params& options, float estimatedTime) const;
+    protected:
+        virtual JitConstants GetJitConstants(const roi_pooling_params& params) const;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ps_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ps_ref.cpp
new file mode 100644
index 000000000..ba22e2190
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ps_ref.cpp
@@ -0,0 +1,55 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "roi_pooling_kernel_ps_ref.h"
+
+namespace kernel_selector {
+
+    ParamsKey PSROIPoolingKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::brfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        k.EnableDifferentTypes();
+        k.EnablePoolType(PoolType::AVG);
+        k.EnablePoolType(PoolType::BILINEAR);
+        k.EnablePositionSensitivePooling();
+        return k;
+    }
+
+    JitConstants PSROIPoolingKernelRef::GetJitConstants(const roi_pooling_params& rp) const
+    {
+        JitConstants jit = ROIPoolingKernelBase::GetJitConstants(rp);
+
+        jit.AddConstants({ MakeJitConstant("SPATIAL_BINS_X", rp.spatial_bins_x),
+                           MakeJitConstant("SPATIAL_BINS_Y", rp.spatial_bins_y),
+                         });
+
+        return jit;
+    }
+
+    KernelsData PSROIPoolingKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ps_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ps_ref.h
new file mode 100644
index 000000000..280f950e6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ps_ref.h
@@ -0,0 +1,40 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "roi_pooling_kernel_base.h"
+
+namespace kernel_selector
+{
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // PSROIPoolingKernelRef
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class PSROIPoolingKernelRef : public ROIPoolingKernelBase
+    {
+    public:
+        PSROIPoolingKernelRef() : ROIPoolingKernelBase("roi_pooling_ps_ref") {}
+        virtual ~PSROIPoolingKernelRef() {}
+
+        using DispatchData = CommonDispatchData;
+
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    protected:
+        JitConstants GetJitConstants(const roi_pooling_params& params) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp
index 1e5a0f5fc..375db2d0e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016-2018 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -27,81 +27,18 @@ namespace kernel_selector {
         k.EnableOutputDataType(Datatype::F32);
         k.EnableInputLayout(DataLayout::bfyx);
         k.EnableOutputLayout(DataLayout::brfyx);
-        k.EnablePoolType(PoolType::MAX);
-        k.EnablePoolType(PoolType::AVG);
-        k.EnablePoolType(PoolType::BILINEAR);
         k.EnableTensorOffset();
         k.EnableTensorPitches();
         k.EnableBatching();
         k.EnableDifferentTypes();
+        k.EnablePoolType(PoolType::MAX);
+        k.EnablePoolType(PoolType::AVG);
+        k.EnablePoolType(PoolType::BILINEAR);
         return k;
     }
 
-    static ROIPoolingKernelRef::DispatchData SetDefault(const roi_pooling_params& params)
-    {
-        ROIPoolingKernelRef::DispatchData kd;
-
-        kd.fp16UnitUsed = (params.inputs[0].GetDType() == Datatype::F16);
-
-        // Determine global work sizes.
-        kd.gws0 = params.output.LogicalSize();
-        kd.gws1 = 1;
-        kd.gws2 = 1;
-
-        // Find largest positive local work size that is divider for global work size.
-        kd.lws0 = std::min(std::max(kd.gws0, static_cast<size_t>(1)), static_cast<size_t>(32));
-        while (kd.gws0 % kd.lws0 != 0)
-        {
-            --kd.lws0;
-        }
-        kd.lws1 = 1;
-        kd.lws2 = 1;
-
-        return kd;
-    }
-
-    JitConstants ROIPoolingKernelRef::GetJitConstants(const roi_pooling_params& rp) const
-    {
-        JitConstants jit = MakeBaseParamsJitConstants(rp);
-
-        jit.AddConstants({
-            MakeJitConstant("POOLED_HEIGHT",     rp.pooledHeight),
-            MakeJitConstant("POOLED_WIDTH",      rp.pooledWidth),
-            MakeJitConstant("SPATIAL_SCALE",     rp.spatialScale),
-            MakeJitConstant("GROUP_SIZE",        rp.groupSize),
-            MakeJitConstant(toString(rp.mode) + "_POOLING", 1),
-        });
-
-        jit.AddConstants({
-            MakeJitConstant("USE_OLD_SCALE_AND_ROUNDING",   rp.groupSize == 0)
-        });
-
-        return jit;
-    }
-
     KernelsData ROIPoolingKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
     {
-        assert(params.GetType() == KernelType::ROI_POOLING);
-        const roi_pooling_params& orgParams = static_cast<const roi_pooling_params&>(params);
-
-        if (orgParams.activationFunc != ActivationFunction::NONE)
-        {
-            return{};
-        }
-
-        DispatchData runInfo = SetDefault(orgParams);
-        KernelData kd = KernelData::Default<roi_pooling_params>(params);
-
-        auto cldnn_jit = GetJitConstants(orgParams);
-        auto entry_point = GetEntryPoint(kernelName, orgParams.layerID, options);
-        auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
-
-        auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
-        kernel.arguments.push_back({ ArgumentDescriptor::Types::INPUT, 1 });
-
-        kd.estimatedTime = FORCE_PRIORITY_9;
-
-        return{ kd };
+        return GetCommonKernelsData(params, options, FORCE_PRIORITY_9);
     }
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.h
index 1bc3c602d..e4c8934fd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_ref.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,52 +16,25 @@
 
 #pragma once
 
-#include "common_kernel_base.h"
+#include "roi_pooling_kernel_base.h"
 
 namespace kernel_selector
 {
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // roi_pooling_params
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    struct roi_pooling_params : public base_params
-    {
-        roi_pooling_params() : base_params(KernelType::ROI_POOLING) {}
-
-        PoolType    mode = PoolType::MAX;
-        size_t      pooledWidth = 0;
-        size_t      pooledHeight = 0;
-        size_t      groupSize = 0;
-        float       spatialScale = 1.f;
-
-        virtual ParamsKey GetParamsKey() const
-        {
-            return base_params::GetParamsKey();
-        }
-    };
-
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    // roi_pooling_optional_params
-    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    struct roi_pooling_optional_params : optional_params
-    {
-        roi_pooling_optional_params() : optional_params(KernelType::ROI_POOLING) {}
-    };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // ROIPoolingKernelRef
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-    class ROIPoolingKernelRef : public common_kernel_base
+    class ROIPoolingKernelRef : public ROIPoolingKernelBase
     {
     public:
-        ROIPoolingKernelRef() : common_kernel_base("roi_pooling_ref") {}
+        ROIPoolingKernelRef() : ROIPoolingKernelBase("roi_pooling_ref") {}
         virtual ~ROIPoolingKernelRef() {}
 
         using DispatchData = CommonDispatchData;
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
-        JitConstants GetJitConstants(const roi_pooling_params& params) const;
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_selector.cpp
index 9dbb71ccb..969362dbe 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/roi_pooling/roi_pooling_kernel_selector.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,16 +16,18 @@
 
 #include "roi_pooling_kernel_selector.h"
 #include "roi_pooling_kernel_ref.h"
- 
-namespace kernel_selector 
+#include "roi_pooling_kernel_ps_ref.h"
+
+namespace kernel_selector
 {
     roi_pooling_kernel_selector::roi_pooling_kernel_selector()
     {
         Attach<ROIPoolingKernelRef>();
+        Attach<PSROIPoolingKernelRef>();
     }
 
     KernelsData roi_pooling_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
     {
         return GetNaiveBestKernel(params, options, KernelType::ROI_POOLING);
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp
index 61edddabf..e90f591c1 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_base.cpp
@@ -57,7 +57,7 @@ namespace kernel_selector
         auto jit = CreateJit(kernelName, cldnn_jit, entry_point);
 
         auto& kernel = kd.kernels[0];
-        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, ROUND_ROBIN, true, !orgParams.bias.empty(), 2);
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point, DEFAULT, true, !orgParams.bias.empty(), 2);
 
         if (orgParams.use_momentum)
         {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.h
index 59ed5d738..3538572a0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/scale_grad_weights/scale_grad_weights_kernel_ref.h
@@ -26,6 +26,7 @@ namespace kernel_selector {
         ScaleGradWeightsKernelRef() : ScaleGradWeightsKernelBase("scale_grad_weights_gpu_ref") {}
         virtual ~ScaleGradWeightsKernelRef() {}
 
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp
index 09b3a0151..43d2e113c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_base.cpp
@@ -167,7 +167,7 @@ namespace kernel_selector
         kernel.workGroups.global = { runInfo.gws0, runInfo.gws1, runInfo.gws2 };
         kernel.workGroups.local = { runInfo.lws0, runInfo.lws1, runInfo.lws2 };
 
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
         kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
 
         kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h
index a72c0e90a..4663a38e0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/select/select_kernel_ref.h
@@ -27,9 +27,9 @@ namespace kernel_selector
         virtual ~SelectKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         bool Validate(const Params& p, const optional_params& o) const override;
 
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp
new file mode 100644
index 000000000..fd5528f62
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.cpp
@@ -0,0 +1,102 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "shuffle_channels_kernel_ref.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector
+{
+    ParamsKey ShuffleChannelsKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableInputLayout(DataLayout::bfyx);
+        k.EnableOutputLayout(DataLayout::bfyx);
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        return k;
+    }
+
+    CommonDispatchData ShuffleChannelsKernelRef::SetDefault(const shuffle_channels_params& params, const optional_params&) const
+    {
+        CommonDispatchData runInfo;
+
+        std::vector<size_t> global = { params.output.Batch().v, params.output.Feature().v, params.output.Y().v * params.output.X().v };
+
+        auto local = GetOptimalLocalWorkGroupSizes(global);
+
+        runInfo.gws0 = global[0];
+        runInfo.gws1 = global[1];
+        runInfo.gws2 = global[2];
+
+        runInfo.lws0 = local[0];
+        runInfo.lws1 = local[1];
+        runInfo.lws2 = local[2];
+
+        return runInfo;
+    }
+
+    JitConstants ShuffleChannelsKernelRef::GetJitConstants(const shuffle_channels_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        jit.AddConstant(MakeJitConstant("GROUPS_NUMBER", params.group));
+
+        auto getDimSizeByAxis = [](const shuffle_channels_params& params) -> size_t {
+            switch (params.axis) {
+                case 0:
+                    return params.inputs[0].Batch().v;
+                case 1:
+                    return params.inputs[0].Feature().v;
+                case 2:
+                    return params.inputs[0].Y().v;
+                case 3:
+                    return params.inputs[0].X().v;
+            }
+            return 0;
+        };
+
+        jit.AddConstant(MakeJitConstant("GROUP_SIZE", getDimSizeByAxis(params) / params.group));
+        jit.AddConstant(MakeJitConstant("AXIS", params.axis));
+
+        return jit;
+    }
+
+    KernelsData ShuffleChannelsKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelData kd = KernelData::Default<shuffle_channels_params>(params);
+        shuffle_channels_params& newParams = *static_cast<shuffle_channels_params*>(kd.params.get());
+
+        assert(params.GetType() == KernelType::SHUFFLE_CHANNELS);
+
+        auto runInfo = SetDefault(newParams, options);
+        auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+        auto cldnn_jit = GetJitConstants(newParams);
+        std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = kd.kernels[0];
+
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+
+        kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+        return{ kd };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.h
new file mode 100644
index 000000000..6f6f3d041
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_ref.h
@@ -0,0 +1,57 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // shuffle_channels_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct shuffle_channels_params : public base_params
+    {
+        shuffle_channels_params() : base_params(KernelType::SHUFFLE_CHANNELS) {}
+
+        int32_t group;
+        int32_t axis;
+
+        virtual ParamsKey GetParamsKey() const
+        {
+            return base_params::GetParamsKey();
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // shuffle_channels_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct shuffle_channels_optional_params : optional_params
+    {
+        shuffle_channels_optional_params() : optional_params(KernelType::SHUFFLE_CHANNELS) {}
+    };
+
+    class ShuffleChannelsKernelRef : public common_kernel_base
+    {
+    public:
+        ShuffleChannelsKernelRef() : common_kernel_base("shuffle_channels_ref") {}
+        virtual ~ShuffleChannelsKernelRef() {}
+        virtual JitConstants GetJitConstants(const shuffle_channels_params& params) const;
+        virtual CommonDispatchData SetDefault(const shuffle_channels_params& params, const optional_params&) const;
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_selector.cpp
new file mode 100644
index 000000000..41088def3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "shuffle_channels_kernel_selector.h"
+#include "shuffle_channels_kernel_ref.h"
+
+namespace kernel_selector {
+
+    shuffle_channels_kernel_selector::shuffle_channels_kernel_selector()
+    {
+        Attach<ShuffleChannelsKernelRef>();
+    }
+
+    KernelsData shuffle_channels_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::SHUFFLE_CHANNELS);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_selector.h
new file mode 100644
index 000000000..dadc63f36
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/shuffle_channels/shuffle_channels_kernel_selector.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector
+{
+    class shuffle_channels_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static shuffle_channels_kernel_selector &Instance() {
+            static shuffle_channels_kernel_selector instance_;
+            return instance_;
+        }
+
+        shuffle_channels_kernel_selector();
+
+        virtual ~shuffle_channels_kernel_selector() {}
+
+        virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
index 4d2c36d39..e0f93f071 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_base.cpp
@@ -105,7 +105,7 @@ namespace kernel_selector
         const softmax_params& params = static_cast<const softmax_params&>(p);
         const auto& input = params.inputs[0];
 
-        if (params.activationFunc != ActivationFunction::NONE)
+        if (params.activation.function != ActivationFunction::NONE)
         {
             return false;
         }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.h
index 52a30f58c..5f96abf9d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_bf.h
@@ -28,7 +28,9 @@ namespace kernel_selector
         virtual ~SoftmaxKernel_bf() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
         DispatchData SetDefault(const softmax_params& params, const optional_params& optParams) const override;
+
+    protected:
+        virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.h
index 461670ae3..bb9c45c6b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_fb.h
@@ -28,10 +28,10 @@ namespace kernel_selector
         virtual ~SoftmaxKernel_fb() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        virtual ParamsKey GetSupportedKey() const override;
 
     protected:
+        virtual ParamsKey GetSupportedKey() const override;
         virtual bool Validate(const Params& p, const optional_params& o) const override;
         DispatchData SetDefault(const softmax_params& params, const optional_params& optParams) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h
index 3f3bf6f24..51afb8442 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_items_class_optimized.h
@@ -28,10 +28,10 @@ namespace kernel_selector
         virtual ~SoftmaxKerneItemsClassOptimized() {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
 
     protected:
+        ParamsKey GetSupportedKey() const override;
         JitConstants GetJitConstants(const softmax_params& params, DispatchData kd) const override;
         DispatchData SetDefault(const softmax_params& params, const optional_params& optParams) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.h
index f517a4228..4c6fd10cd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax/softmax_kernel_ref.h
@@ -28,9 +28,9 @@ namespace kernel_selector
         virtual ~SoftmaxKernelRef() {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
-        ParamsKey GetSupportedKey() const override;
 
     protected:
+        ParamsKey GetSupportedKey() const override;
         DispatchData SetDefault(const softmax_params& params, const optional_params& optParams) const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_ref.h
index a30be9baf..b9494e619 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/softmax_loss_grad/softmax_loss_grad_kernel_ref.h
@@ -28,6 +28,8 @@ namespace kernel_selector
         virtual ~SoftmaxLossGradKernelRef() {}
 
         KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp
new file mode 100644
index 000000000..c34d554d3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.cpp
@@ -0,0 +1,104 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "strided_slice_kernel_ref.h"
+#include "kernel_selector_utils.h"
+
+namespace kernel_selector
+{
+    ParamsKey StridedSliceKernelRef::GetSupportedKey() const
+    {
+        ParamsKey k;
+        k.EnableInputDataType(Datatype::F16);
+        k.EnableInputDataType(Datatype::F32);
+        k.EnableOutputDataType(Datatype::F16);
+        k.EnableOutputDataType(Datatype::F32);
+        k.EnableAllInputLayout();
+        k.EnableAllOutputLayout();
+        k.EnableTensorOffset();
+        k.EnableTensorPitches();
+        k.EnableBatching();
+        return k;
+    }
+
+    CommonDispatchData StridedSliceKernelRef::SetDefault(const strided_slice_params& params, const optional_params&) const
+    {
+        CommonDispatchData runInfo;
+        std::vector<size_t> gws;
+
+        // If the new_axis_mask is set, then begin, end, and stride are ignored
+        // and a new length 1 dimension is adding. Input data just copying to output
+        // TODO: remove data copying in case where only shape size changing
+        if (params.new_axis_mask.size() != 0)
+            gws = { params.inputs[0].Batch().v, params.inputs[0].Feature().v, params.inputs[0].Y().v * params.inputs[0].X().v };
+        else
+            gws = { params.output.Batch().v, params.output.Feature().v, params.output.Y().v * params.output.X().v };
+
+        auto lws = GetOptimalLocalWorkGroupSizes(gws);
+
+        runInfo.gws0 = gws[0];
+        runInfo.gws1 = gws[1];
+        runInfo.gws2 = gws[2];
+
+        runInfo.lws0 = lws[0];
+        runInfo.lws1 = lws[1];
+        runInfo.lws2 = lws[2];
+
+        return runInfo;
+    }
+
+    JitConstants StridedSliceKernelRef::GetJitConstants(const strided_slice_params& params) const
+    {
+        JitConstants jit = MakeBaseParamsJitConstants(params);
+
+        auto makeJitConstForParam = [](JitConstants& jit, const std::string name, const std::vector<int32_t> vec) {
+            jit.AddConstant(MakeJitConstant(name + "_SIZES", vec));
+            jit.AddConstant(MakeJitConstant(name + "_BATCH", vec[0]));
+            jit.AddConstant(MakeJitConstant(name + "_FEATURE", vec[1]));
+            jit.AddConstant(MakeJitConstant(name + "_Y", vec[2]));
+            jit.AddConstant(MakeJitConstant(name + "_X", vec[3]));
+        };
+
+        makeJitConstForParam(jit, "SLICE_BEGIN", params.striding_params[0]);
+        makeJitConstForParam(jit, "SLICE_END", params.striding_params[1]);
+        makeJitConstForParam(jit, "SLICE_STEPS", params.striding_params[2]);
+
+        jit.AddConstant(MakeJitConstant("NEW_AXIS_MODE", std::find(params.new_axis_mask.begin(), params.new_axis_mask.end(), 1) != params.new_axis_mask.end()));
+
+        return jit;
+    }
+
+    KernelsData StridedSliceKernelRef::GetKernelsData(const Params& params, const optional_params& options) const
+    {
+        KernelData kd = KernelData::Default<strided_slice_params>(params);
+        strided_slice_params& newParams = *static_cast<strided_slice_params*>(kd.params.get());
+
+        assert(params.GetType() == KernelType::STRIDED_SLICE);
+
+        auto runInfo = SetDefault(newParams, options);
+        auto entry_point = GetEntryPoint(kernelName, newParams.layerID, options);
+        auto cldnn_jit = GetJitConstants(newParams);
+        std::string jit = CreateJit(kernelName, cldnn_jit, entry_point);
+
+        auto& kernel = kd.kernels[0];
+
+        FillCLKernelData(kernel, runInfo, params.engineInfo, kernelName, jit, entry_point);
+
+        kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
+
+        return{ kd };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.h
new file mode 100644
index 000000000..159e658f5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_ref.h
@@ -0,0 +1,61 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "common_kernel_base.h"
+
+namespace kernel_selector
+{
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // strided_slice_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct strided_slice_params : public base_params
+    {
+        strided_slice_params() : base_params(KernelType::STRIDED_SLICE) {}
+
+        std::vector<std::vector<int32_t>> striding_params;
+        std::vector<uint8_t> begin_mask;
+        std::vector<uint8_t> end_mask;
+        std::vector<uint8_t> ellipsis_mask;
+        std::vector<uint8_t> new_axis_mask;
+        std::vector<uint8_t> shrink_axis_mask;
+
+        virtual ParamsKey GetParamsKey() const
+        {
+            return base_params::GetParamsKey();
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // strided_slice_optional_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct strided_slice_optional_params : optional_params
+    {
+        strided_slice_optional_params() : optional_params(KernelType::STRIDED_SLICE) {}
+    };
+
+    class StridedSliceKernelRef : public common_kernel_base
+    {
+    public:
+        StridedSliceKernelRef() : common_kernel_base("strided_slice_ref") {}
+        virtual ~StridedSliceKernelRef() {}
+        virtual JitConstants GetJitConstants(const strided_slice_params& params) const;
+        virtual CommonDispatchData SetDefault(const strided_slice_params& params, const optional_params&) const;
+        virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+        virtual ParamsKey GetSupportedKey() const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_selector.cpp
new file mode 100644
index 000000000..7dfba71a4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_selector.cpp
@@ -0,0 +1,31 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "strided_slice_kernel_selector.h"
+#include "strided_slice_kernel_ref.h"
+
+namespace kernel_selector {
+
+    strided_slice_kernel_selector::strided_slice_kernel_selector()
+    {
+        Attach<StridedSliceKernelRef>();
+    }
+
+    KernelsData strided_slice_kernel_selector::GetBestKernels(const Params& params, const optional_params& options) const
+    {
+        return GetNaiveBestKernel(params, options, KernelType::STRIDED_SLICE);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_selector.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_selector.h
new file mode 100644
index 000000000..6f983b160
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/strided_slice/strided_slice_kernel_selector.h
@@ -0,0 +1,37 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "kernel_selector.h"
+
+namespace kernel_selector
+{
+    class strided_slice_kernel_selector : public kernel_selector_base
+    {
+    public:
+        static strided_slice_kernel_selector &Instance() {
+            static strided_slice_kernel_selector instance_;
+            return instance_;
+        }
+
+        strided_slice_kernel_selector();
+
+        virtual ~strided_slice_kernel_selector() {}
+
+        virtual KernelsData GetBestKernels(const Params& params, const optional_params& options) const override;
+    };
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h
index 967dab817..4f08d7a27 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/tile/tile_kernel_ref.h
@@ -53,6 +53,8 @@ namespace kernel_selector
         virtual JitConstants GetJitConstants(const tile_params& params) const;
         virtual CommonDispatchData SetDefault(const tile_params& params, const optional_params&) const;
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp
index 889daf8bc..ae696f88d 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_base.cpp
@@ -76,7 +76,7 @@ namespace kernel_selector
 
         kernel.workGroups.global = { out.X().v, out.Y().v, out.Feature().v * out.Batch().v };
         kernel.workGroups.local = GetOptimalLocalWorkGroupSizes(kernel.workGroups.global);
-        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, ROUND_ROBIN);
+        kernel.kernelString = GetKernelString(kernelName, jit, entry_point, params.engineInfo, DEFAULT);
         kernel.arguments = GetArgsDesc((uint32_t)newParams.inputs.size(), false, false);
 
         kd.estimatedTime = DONT_USE_IF_HAVE_SOMETHING_ELSE;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.h
index b7b5596dc..de27559af 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/upsampling/upsampling_kernel_ref.h
@@ -27,6 +27,8 @@ namespace kernel_selector
         virtual ~UpSamplingKernelRef() {}
 
         virtual KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
+
+    protected:
         virtual ParamsKey GetSupportedKey() const override;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.cpp
index d9ccd15cb..307390d24 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.cpp
@@ -15,143 +15,106 @@
 */
 
 #include "auto_tuner.h"
-#include "auto_tuner_offline.h"
 #include <iostream>
 #include <sstream>
 #include <fstream>
+#include <iomanip>
+#include "istreamwrapper.h"
+#include "stringbuffer.h"
+#include "prettywriter.h"
 
- 
-namespace kernel_selector 
+
+namespace kernel_selector
 {
-    std::tuple<std::string, int> AutoTuner::LoadKernelOnline(const TuningMode tuningMode, const std::string& tuningFilePath, const std::string& deviceID, const std::string& driverVersion, const std::string& hostVersion, const std::string& hash)
+    std::tuple<std::string, int> AutoTuner::LoadKernelOnline(const TuningMode tuningMode, const std::string& cacheFilePath, const uint32_t computeUnitsCount,  const std::string& hash)
     {
         std::lock_guard<std::mutex> lock(mutex);
-
-        //First, check if the tuning file has been already loaded to cache
-        auto const& tuningFileCache = onlineCache.find(tuningFilePath);
-        if (tuningFileCache == onlineCache.end())
+        rapidjson::Document cacheData;
+        std::ifstream tuningFile(cacheFilePath);
+        if (tuningFile && tuningFile.good())
         {
-            // Load tuning file to cache
-            onlineCache[tuningFilePath] = {};
-
-            std::ifstream tuningFile(tuningFilePath);
-            std::string cachedDeviceId;
-            std::string cachedDriverVersion;
-            std::string cachedHostVersion;
-            std::string cachedhash;
-            std::string cachedkernelName;
-            int cachedIndex;
-            std::string line;
-
-            if (tuningFile) // Tuning file exists
+            rapidjson::IStreamWrapper isw{ tuningFile };
+            cacheData.ParseStream(isw);
+        }
+        else // Tuning file doesn't exist
+        {
+            if (tuningMode == TuningMode::TUNING_USE_CACHE)
             {
-                // Read device ID
-                tuningFile >> cachedDeviceId;
-                if (!tuningFile.good() || (cachedDeviceId.compare(deviceID) != 0))
-                {
-                    throw std::runtime_error("Tuning file bad structure or wrong device ID. Re-generate cache in TUNE_AND_CACHE mode.");
-                }
-
-                // Read driver version
-                tuningFile >> cachedDriverVersion;
-                if (!tuningFile.good() || (cachedDriverVersion.compare(driverVersion) != 0))
-                {
-                    throw std::runtime_error("Tuning file bad structure or wrong driver version. Re-generate cache in TUNE_AND_CACHE mode.");
-                }
+                throw std::runtime_error("Tuning file: " + cacheFilePath + " could not be read! Must provide a valid cache file in USE_CACHE mode.");
+            }
 
-                // Read host version
-                tuningFile >> cachedHostVersion;
-                if (!tuningFile.good() || (cachedHostVersion.compare(hostVersion) != 0))
-                {
-                    throw std::runtime_error("Tuning file bad structure or wrong host version. Re-generate cache in TUNE_AND_CACHE mode.");
-                }
+            // Create a new tuning file and write the versions
+            std::ofstream newTuningFile(cacheFilePath, std::ofstream::out);
 
-                // Read optimal kernel/config data 
-                while (std::getline(tuningFile, line))
-                {
-                    if (line.empty())
-                    {
-                        continue;
-                    }
-                    std::istringstream iss(line);
-                    iss >> cachedhash >> cachedkernelName >> cachedIndex;
-                    if (iss.fail())
-                    {
-                        throw std::runtime_error("Tuning file bad structure. Re-generate cache in TUNE_AND_CACHE mode.");
-                    }
+        }
+        tuningFile.close();
 
-                    // Update tuning cache 
-                    onlineCache[tuningFilePath].td[cachedhash] = std::make_tuple(cachedkernelName, cachedIndex);
-                }
+        onlineCache = std::make_shared<rapidjson::Document>(std::move(cacheData));
 
-                tuningFile.close();
-            }
-            else // Tuning file doesn't exist
+        // Tuning file is loaded
+        auto computeUnitsStr = std::to_string(computeUnitsCount);
+        if (!onlineCache->IsNull())
+        {
+            auto cacheObject = onlineCache->GetObject();
+            if (onlineCache->HasMember(computeUnitsStr.c_str()))
             {
-                if (tuningMode == TuningMode::TUNING_USE_CACHE)
+                if (cacheObject[computeUnitsStr.c_str()].HasMember(hash.c_str()))
                 {
-                    throw std::runtime_error("Tuning file: " + tuningFilePath + " could not be read! Must provide a valid cache file in USE_CACHE mode.");
+                    const rapidjson::Value& prog = cacheObject[computeUnitsStr.c_str()][hash.c_str()];
+                    return std::make_tuple(prog[0].GetString(), prog[1].GetInt());
                 }
-
-                // Create a new tuning file and write the versions
-                std::ofstream newTuningFile(tuningFilePath, std::ofstream::out);
-
-                newTuningFile << deviceID << "\n";
-                newTuningFile << driverVersion << "\n";
-                newTuningFile << hostVersion << "\n";
             }
         }
+        return std::make_pair("", 0);
+        
+    }
 
-        // Tuning file is loaded
-        auto const& tuningFileData = onlineCache[tuningFilePath];
-        auto const& hashData = tuningFileData.td.find(hash);
-        if (hashData != tuningFileData.td.end())
+    void AutoTuner::StoreKernel(const std::string& cacheFilePath, const std::string& hash, std::string implementationName, const int tuneIndex, const uint32_t computeUnitsCount)
+    {
+        std::lock_guard<std::mutex> lock(mutex);
+        auto computeUnitsStr = std::to_string(computeUnitsCount);
+        rapidjson::Document::AllocatorType& allocator = onlineCache->GetAllocator();
+        rapidjson::Value dataArray(rapidjson::kArrayType);
+        rapidjson::Value hashStr(rapidjson::kStringType);
+        hashStr.Set(hash.c_str(), allocator);
+        dataArray.PushBack(rapidjson::Value().Set(implementationName.c_str(),allocator) , allocator);
+        dataArray.PushBack(rapidjson::Value().SetInt(tuneIndex), allocator);
+
+        rapidjson::Value newVal(rapidjson::kObjectType);
+        newVal.SetObject();
+        if (onlineCache->IsNull())
         {
-            // Tuning data exists for this hash.
-            return hashData->second;
+            onlineCache->Parse("{}");
         }
-        else
+        if (!onlineCache->HasMember(computeUnitsStr.c_str()))
         {
-            // Tuning data doesn't exists for this hash - on-line tuning is needed.
-            return std::make_pair("", 0);
+            onlineCache->AddMember(rapidjson::Value(computeUnitsStr.c_str(), allocator), newVal, allocator);
         }
-    }
-
-    void AutoTuner::StoreKernel(const std::string& tuningFilePath, const std::string& hash, const std::string& implementationName, const int tuneIndex)
-    {
-        std::lock_guard<std::mutex> lock(mutex);
 
-        // Add the new tuning data to cache
-        onlineCache[tuningFilePath].td[hash] = std::make_tuple(implementationName, tuneIndex);
+        auto cache = onlineCache->GetObject();
+        cache[computeUnitsStr.c_str()].AddMember(hashStr, dataArray, allocator);
 
-        // Add the new tuning data to tuning file
-        std::ofstream cachedKernelsFile(tuningFilePath, std::ofstream::out | std::ofstream::app);
-        if (!cachedKernelsFile.good())
-        {
-            throw std::runtime_error("Tuning file: " + tuningFilePath + " could not be written!");
-        }
-        cachedKernelsFile << hash << " ";
-        cachedKernelsFile << implementationName << " ";
-        cachedKernelsFile << tuneIndex << "\n";
+        std::ofstream cachedKernelsFile(cacheFilePath);
+        rapidjson::StringBuffer buffer(0, 1024);
+        rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
+        onlineCache->Accept(writer);
+        auto temp = buffer.GetString();
+        cachedKernelsFile << temp;
         cachedKernelsFile.close();
     }
 
-    std::tuple<std::string, int> AutoTuner::LoadKernelOffline(const std::string& deviceID, const std::string& hash)
+
+    std::tuple<std::string, int> AutoTuner::LoadKernelOffline(std::shared_ptr<rapidjson::Document> deviceCache, const std::string& hash)
     {
-        auto const& deviceCache = auto_tuner_offline::get_instance(deviceID)->get_tuning_data();
-        if (deviceCache.td.empty())
-        {
-            return std::make_pair("", 0);
-        }
-        auto const& deviceCacheData = deviceCache.td;
-        auto const& hashData = deviceCacheData.find(hash);
-        if (hashData == deviceCacheData.end())
+        if (!deviceCache->IsNull())
         {
-            return std::make_pair("", 0);
-        }
-        else
-        {
-            return hashData->second;
+            auto cache = deviceCache->GetObject();
+            if (deviceCache->HasMember(hash.c_str()))
+            {
+                const rapidjson::Value& prog = cache[hash.c_str()];
+                return std::make_tuple(prog[0].GetString(), prog[1].GetInt());
+            }
         }
+        return std::make_tuple("", 0);
     }
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.h
index 864e1ce11..7e4a7cda9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner.h
@@ -19,25 +19,23 @@
 #include <atomic>
 #include <mutex>
 #include <map>
-#include "kernel_selector_common.h"
+#include "kernel_selector_common.h" 
+#include "document.h"
+
 
 namespace kernel_selector 
 {
-    struct tuning_data // this could be replaced with 
-    {
-        std::map<std::string, std::tuple<std::string, int>> td;
-    };
 
     class AutoTuner
     {
     public:
         AutoTuner() = default;
-        std::tuple<std::string, int> LoadKernelOnline(const TuningMode tuningMode, const std::string& tuningFilePath, const std::string& deviceID, const std::string& driverVersion, const std::string& hostVersion, const std::string& hash);
-        void StoreKernel(const std::string& tuningFilePath, const std::string& hash, const std::string& implementationName, const int tuneIndex);
-        std::tuple<std::string, int> LoadKernelOffline(const std::string& deviceID, const std::string& hash);
+        std::tuple<std::string, int> LoadKernelOnline(const TuningMode tuningMode, const std::string& tuningFilePath, const uint32_t computeUnitsCount, const std::string& hash);
+        void StoreKernel(const std::string& tuningFilePath, const std::string& hash, std::string implementationName, const int tuneIndex, const uint32_t computeUnitsCount);
+        std::tuple<std::string, int> LoadKernelOffline(std::shared_ptr<rapidjson::Document> cache, const std::string& hash);
 
     private:    
-        std::map<std::string, tuning_data> onlineCache; // Tuning file name -> kernel/config per hash (hash -> [implementation name, tuning index])
+        std::shared_ptr<rapidjson::Document> onlineCache; // Tuning file name -> kernel/config per hash (hash -> [implementation name, tuning index])
         std::mutex mutex; // Mutex to synchronize cache updates
         
         /*
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner_offline.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner_offline.cpp
deleted file mode 100644
index 062138a79..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner_offline.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    std::shared_ptr<auto_tuner_offline> auto_tuner_offline::instance = 0;
-    std::mutex auto_tuner_offline::mutex;
-
-    auto_tuner_offline::auto_tuner_offline(const std::string& hw_id)
-    {
-        std::string temp_hw_id = hw_id;
-        // TODO: this is temporary solution of cases where user has non-tuned configuration. needs to implement better logic
-        // i.e. create table with number of eu's configuration that will point to common cache.
-        if (sku_cache_fillers.count(hw_id) == 0)
-            temp_hw_id = "0x1912";
-        sku_cache_fillers.at(temp_hw_id)(t_data);
-    }
-
-    std::shared_ptr<auto_tuner_offline> auto_tuner_offline::get_instance(const std::string& hw_id)
-    {
-        std::lock_guard<std::mutex> lock(mutex);
-        if (instance == nullptr)
-        {
-            instance = std::make_shared<auto_tuner_offline>(auto_tuner_offline(hw_id));
-        }
-        return instance;
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner_offline.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner_offline.h
deleted file mode 100644
index b7008d688..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/auto_tuner_offline.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#pragma once
-
-#include <string>
-#include <mutex>
-#include "auto_tuner.h"
-#include "kernel_selector_common.h"
-
-namespace kernel_selector 
-{
-    // SKL GT4e
-    void tuning_cache_193B(tuning_data&);
-    void tuning_cache_193B_B1_B16(tuning_data&);
-    void tuning_cache_193B_B8(tuning_data&);
-    void tuning_cache_193B_B32_B64(tuning_data&);
-    //SKL GT2
-    void tuning_cache_1912(tuning_data&);
-    void tuning_cache_1912_B1_B16(tuning_data&);
-    void tuning_cache_1912_B8(tuning_data&);
-    void tuning_cache_1912_B32_B64(tuning_data&);
-    //KBL GT3e
-    void tuning_cache_5927(tuning_data&);
-    void tuning_cache_5927_B1(tuning_data&);
-    //ICL GT2
-    void tuning_cache_8A52(tuning_data&);
-    void tuning_cache_8A52_B1_B16(tuning_data&);
-    //APL 10W
-    void tuning_cache_5A84(tuning_data&);
-    // Device ID for APL E3930.
-    void tuning_cache_5A85(tuning_data&);
-
-    class auto_tuner_offline
-    {
-    private:
-        static std::shared_ptr<auto_tuner_offline> instance;
-        static std::mutex mutex;
-        auto_tuner_offline() = delete;
-        // this is singleton implementation, if called twice with different parameter, 
-        // second call param will be ignored
-        auto_tuner_offline(const std::string& hw_id);
-        tuning_data t_data;
-
-        const std::map<std::string, void(*)(tuning_data&)> sku_cache_fillers
-        {
-            { "0x193B" , tuning_cache_193B },
-            { "0x1912" , tuning_cache_1912 },
-            { "0x5927" , tuning_cache_5927 },
-            { "0x8A52" , tuning_cache_8A52 },
-            { "0x5A84" , tuning_cache_5A84 },
-            { "0x5A85" , tuning_cache_5A84 },
-            { "0x3184" , tuning_cache_5A84 },
-        };
-
-    public:
-        static std::shared_ptr<auto_tuner_offline> get_instance(const std::string& hw_id);
-        tuning_data get_tuning_data() const { return t_data; }
-   };
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache.json b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache.json
new file mode 100644
index 000000000..29cd72ac4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache.json
@@ -0,0 +1,52153 @@
+{
+    "24": {
+        "1447947330145817080": ["convolution_gpu_bfyx_gemm_like", 2],
+        "7822260665195993699": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "8834376889372261135": ["convolution_gpu_bfyx_gemm_like",2],
+        "13198642774931141302": ["convolution_gpu_bfyx_gemm_like",1],
+        "14147966687151087307": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12416108838449201073": ["convolution_gpu_bfyx_gemm_like",2],
+        "2981613830919028333": ["convolution_gpu_bfyx_gemm_like",2],
+        "8083720773671701257": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2088791910163600059": ["convolution_gpu_bfyx_gemm_like",2],
+        "10501842258923285952": ["convolution_gpu_bfyx_gemm_like",2],
+        "18377151309967754698": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11463423774446158264": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "1907052741356343855": ["convolution_gpu_bfyx_gemm_like",2],
+        "17107836795750250005": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12392243022666304830": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "7210665245866922495": ["convolution_gpu_bfyx_gemm_like",1],
+        "15377692880620850674": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "2235284465019694961": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "17891191718277641356": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "11506567689103579136": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "13566452591890409921": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "10984167927862279982": ["convolution_gpu_bfyx_gemm_like",2],
+        "11442013495763732580": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "15472674298322946992": ["convolution_gpu_bfyx_gemm_like",2],
+        "1814045892909314674": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "10888799142381813035": ["convolution_gpu_bfyx_gemm_like",2],
+        "2121110886540804293": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "1544283806060575584": ["convolution_gpu_bfyx_gemm_like",2],
+        "3773802352282967589": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "3665566135022890729": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2714391204826997965": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "683383121058719452": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3963106895592011725": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4915831715914920982": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "3759057398165607194": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "6670327979947471550": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17793292063552633023": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "18271689282126907793": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "5951936376654416075": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "956022649859563080": ["convolution_gpu_bfyx_gemm_like",1],
+        "8460847842045253466": ["convolution_gpu_bfyx_os_iyx_osv16",13],
+        "6585223640997887253": ["convolution_gpu_bfyx_gemm_like",2],
+        "10784073615329190425": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "3621930417735246405": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "2623687018437195679": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9562291747339451180": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "9152451371616153112": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17377293745073971167": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3689722043202617487": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "15522785615618973614": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15351724241036614758": ["convolution_gpu_bfyx_os_iyx_osv16",166],
+        "16626226341188424071": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "4542143431130171516": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "11430400968543668873": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "16312223896859176991": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1779941298820543013": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "1096671695414716274": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "10173283505468233128": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "9367157746678824712": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11932770338770247767": ["convolution_gpu_bfyx_os_iyx_osv16",435],
+        "14738573151275130683": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "1938086876393565238": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "6772239376357727149": ["convolution_gpu_bfyx_os_iyx_osv16",470],
+        "13550435052563656432": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "9323825370872655346": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8819268903800581706": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10058165874008941852": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "12277470820821378855": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7394217382008802567": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11066913713501760080": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14947798627499698329": ["convolution_gpu_bfyx_gemm_like",2],
+        "18245935804520236353": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "11703557271443535142": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "4408600136502382976": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "7524311370696987092": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1143214652021653634": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "9101018613418825655": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "381149736509958403": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "6767245864232675168": ["convolution_gpu_bfyx_gemm_like",2],
+        "8260130048649729185": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "16234606052818596502": ["convolution_gpu_bfyx_os_iyx_osv16",466],
+        "1207026216972160297": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "6553736978928374036": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "9514210061704584354": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8376077531098664520": ["convolution_gpu_bfyx_gemm_like",1],
+        "7474639594232203854": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "3041752019114501584": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "9780938731831129283": ["convolution_gpu_bfyx_gemm_like",2],
+        "11115684531624462986": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "15591167992985613695": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6531171505861182429": ["convolution_gpu_bfyx_os_iyx_osv16",758],
+        "17353894529222574441": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "4381329435655511217": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "14418429155823196539": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "13597240991532942069": ["convolution_gpu_bfyx_os_iyx_osv16",1051],
+        "9979259596137305973": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "877436308867220589": ["convolution_gpu_bfyx_gemm_like",2],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",2],
+        "12929981792125924963": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "13484950419220835364": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16474284418841532356": ["convolution_gpu_bfyx_gemm_like",2],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",1],
+        "12181607120522804433": ["convolution_gpu_bfyx_os_iyx_osv16",311],
+        "59739211822469868": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "13973179950424276578": ["convolution_gpu_bfyx_os_iyx_osv16",809],
+        "4013707396889204359": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "15860915170591763391": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "2973436171295280783": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "18202222342562516071": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "1306339989221885682": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16117738994809548007": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2324120381399737261": ["convolution_gpu_bfyx_os_iyx_osv16",522],
+        "16683089431066989909": ["convolution_gpu_bfyx_gemm_like",2],
+        "12874626654611400042": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "18424400171776141118": ["convolution_gpu_bfyx_gemm_like",2],
+        "15334195300678132907": ["fully_connected_gpu_bf_io_gemm",2],
+        "8866736221671835567": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "16294825599850364701": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "797387385159110695": ["convolution_gpu_bfyx_gemm_like",1],
+        "1478419046264331178": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "14872992823083730615": ["convolution_gpu_bfyx_gemm_like",2],
+        "593712935037568960": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10290107543739998181": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "17306482303091342504": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "14006248791647711759": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "2108296560864415762": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11275109735493317886": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "15908673392788376468": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "1403617451623027879": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "5876880412336151866": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "15315327794058441258": ["convolution_gpu_bfyx_gemm_like",2],
+        "6821855018718422278": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "16190949264253468961": ["convolution_gpu_bfyx_gemm_like",1],
+        "15661322183507404821": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1742897526168249500": ["convolution_gpu_bfyx_gemm_like",2],
+        "875296362957469305": ["convolution_gpu_bfyx_gemm_like",1],
+        "9287404618748313247": ["convolution_gpu_bfyx_gemm_like",2],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "5172712078329324967": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "13842309033760176194": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15823825508128158158": ["convolution_gpu_bfyx_gemm_like",2],
+        "15451919862187018297": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "1604661321386793876": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7549378486471456156": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11267742746905371769": ["convolution_gpu_bfyx_gemm_like",1],
+        "12675313398314286884": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12380856644683171627": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "1643241486250690844": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "10377729875228238588": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "13002363400738122017": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "5326247361632903583": ["convolution_gpu_bfyx_gemm_like",2],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17087740929472936216": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "11878734040194151073": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5245308722062496788": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "7271236108345900406": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14359530849521980269": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "12112853999307505628": ["convolution_gpu_bfyx_gemm_like",2],
+        "2543041530639980505": ["convolution_gpu_bfyx_gemm_like",1],
+        "11318913630213187720": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10237524128771958432": ["convolution_gpu_bfyx_gemm_like",2],
+        "2908249767551054613": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "38736266675995457": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "4652136280940317116": ["convolution_gpu_bfyx_gemm_like",2],
+        "1103204698908514224": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "11806402239500046867": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "11077876432364512822": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "875142032423622622": ["convolution_gpu_bfyx_os_iyx_osv16",542],
+        "9553032671453999824": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "1559798212423183813": ["convolution_gpu_bfyx_os_iyx_osv16",928],
+        "12242618640422208652": ["convolution_gpu_bfyx_gemm_like",2],
+        "10971070835319242371": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "4738743763536059708": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "8971115542951085891": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "7279393739634103483": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13926122593957480821": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16383540667048742064": ["convolution_gpu_bfyx_gemm_like",2],
+        "9399994156762372761": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "5629373398445592781": ["convolution_gpu_bfyx_gemm_like",2],
+        "3835286851569826052": ["convolution_gpu_bfyx_gemm_like",2],
+        "11619548409913646265": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10112032316939871435": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "1003101267609305257": ["convolution_gpu_bfyx_gemm_like",2],
+        "16206791915939407806": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "4236174000795439083": ["convolution_gpu_bfyx_gemm_like",2],
+        "3524531620118359828": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "1154228007901031779": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "7570346182940928159": ["convolution_gpu_bfyx_gemm_like",2],
+        "8275277322582733101": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "16783619135298589974": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "13898821685774165645": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "3273748387141431306": ["convolution_gpu_bfyx_os_iyx_osv16",808],
+        "11507538232733291666": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "8104309105061227444": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13384754476437374504": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "9933958860597451711": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "12643423612381102003": ["convolution_gpu_bfyx_os_iyx_osv16",831],
+        "16816222375242496370": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "265124365266629363": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "10554266898346470422": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14578291812739325465": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "123251351612308092": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "9285566577169147378": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3109104171383198425": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "296142385116663420": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "10990741293315393791": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "14026570177552137240": ["convolution_gpu_bfyx_gemm_like",2],
+        "14462438074931673266": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6996376303337512293": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "7998930863626763670": ["convolution_gpu_bfyx_gemm_like",2],
+        "11327228813412934262": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "11850332373794932468": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "15374625876485618845": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6981537186704688907": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15851356529373376076": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3291180926381314705": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "6204725118764552662": ["convolution_gpu_bfyx_gemm_like",2],
+        "10293186062391000719": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "709835724029986012": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "2659712601063515059": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "10791067159964399241": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "8965747921518186477": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "11022847760121601465": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "17839839336294937155": ["convolution_gpu_bfyx_gemm_like",1],
+        "3199841714087553410": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "13046322179198317310": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "8124736388338424498": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "14990645740260870030": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "13224814158106791463": ["convolution_gpu_bfyx_gemm_like",2],
+        "8241070786700614317": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",2],
+        "17796310681498690253": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9410978119783758141": ["convolution_gpu_bfyx_gemm_like",2],
+        "6635217802203685464": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "15381833359831622179": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "12680339228267704518": ["convolution_gpu_bfyx_os_iyx_osv16",540],
+        "8071957466247137919": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "10292243973236220688": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "2423754482456771339": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17994361454416813294": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "9173631510896381179": ["convolution_gpu_bfyx_gemm_like",2],
+        "423221712829930726": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "397770940444464146": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",2],
+        "12297371032753209816": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "9263063714383940562": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "879939701282942121": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "12014527187730671229": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "13248567106128518549": ["convolution_gpu_bfyx_gemm_like",2],
+        "14923692894655929923": ["fully_connected_gpu_bf_io_gemm",2],
+        "10783981060353445280": ["convolution_gpu_bfyx_os_iyx_osv16",57],
+        "4099859307693687554": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "11897113890115321056": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "939718260623752240": ["convolution_gpu_bfyx_gemm_like",1],
+        "13119479079474639169": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "1056009037551688122": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8264178890341675354": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "11754316727756881612": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "10968768803038046390": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1081962464388501987": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13739257060165119132": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "12985650543127289023": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "14974730512607138726": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "16236397968499692493": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",525],
+        "17829148383265978140": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "10279778381617181802": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "7688176479120305539": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7009735776703529573": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "12516911293946682547": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "509781001842353609": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "9182897385081081193": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "761169277744593430": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "7472330881076141262": ["convolution_gpu_bfyx_gemm_like",2],
+        "17222005830854879661": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "3480732841490521799": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "18400379759523099542": ["convolution_gpu_bfyx_gemm_like",2],
+        "4085450203909854919": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "583303098958523195": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "12843671306854567956": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "4991419288164762786": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "12557015880639217508": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12990341489637414845": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "4640028527711211109": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "16692569816843207989": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "10987953316324712538": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "9660812093766156608": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4456004887590847716": ["convolution_gpu_bfyx_gemm_like",2],
+        "3063055767192991776": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "14650567822254940018": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3930314908786112883": ["convolution_gpu_bfyx_gemm_like",2],
+        "11031625790234068916": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "14057348639391787117": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "9259437778054905599": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "15790005937034794347": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "4958222070605478947": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "15392077168521832549": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "14491949194619001237": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "659150305191479097": ["convolution_gpu_bfyx_gemm_like",2],
+        "6820284286806022849": ["convolution_gpu_bfyx_gemm_like",2],
+        "10702234389482091891": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "10196332102593337214": ["convolution_gpu_bfyx_gemm_like",2],
+        "10989937450490049763": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "12517838703662330663": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "15488550074426713959": ["convolution_gpu_bfyx_os_iyx_osv16",1099],
+        "8133587696326295326": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "11147816119060617810": ["convolution_gpu_bfyx_os_iyx_osv16",875],
+        "10264913782610095832": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "7824524940405130010": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5311718276151327830": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9213563311267466388": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5103094815475470596": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16352331970945217438": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "17094948685292534952": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "7975810844103449438": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "4479117540570599742": ["convolution_gpu_bfyx_gemm_like",2],
+        "5057534502588100071": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6048964584602891448": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "1077773457856682663": ["convolution_gpu_bfyx_gemm_like",2],
+        "4992668316921598993": ["convolution_gpu_bfyx_gemm_like",2],
+        "16833854122884184025": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17006655627343469372": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6458124573210430792": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "16945184617367657570": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "18133334552107213128": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6323026044750482867": ["convolution_gpu_bfyx_gemm_like",2],
+        "16969463538496570528": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12427258337646070422": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16589607587365212240": ["convolution_gpu_bfyx_gemm_like",1],
+        "8881150100883636392": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10573920781439771673": ["convolution_gpu_bfyx_os_iyx_osv16",680],
+        "6025872155179042054": ["convolution_gpu_bfyx_gemm_like",2],
+        "75742659105146536": ["convolution_gpu_bfyx_gemm_like",1],
+        "16711955423531846725": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "8560635685184432720": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8906588133431586825": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1051506168926530904": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "6232363902828992968": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5584432943673435454": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "6040286126398028933": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5592556538784745960": ["convolution_gpu_bfyx_gemm_like",2],
+        "17106086048442658788": ["convolution_gpu_bfyx_gemm_like",2],
+        "3140230065585683313": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "9404953235624894187": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "5336120047683197088": ["convolution_gpu_bfyx_gemm_like",2],
+        "1908809004094565452": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "4865023158176874622": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "18084635102736402756": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15026219694198820614": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10642327923162019888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3895088069642140043": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "3277243911383750280": ["convolution_gpu_bfyx_gemm_like",1],
+        "1500571771538985941": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "11873734271080160669": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "13816104794723484993": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13189392239349392492": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "8708643228914766202": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "2651385050387738902": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "10890975553758439233": ["convolution_gpu_bfyx_gemm_like",2],
+        "10728212277329722684": ["convolution_gpu_bfyx_gemm_like",2],
+        "1036010477232750453": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "9604982746455852556": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "12090536142661253835": ["fully_connected_gpu_bf_io_gemm",1],
+        "4437258459981739942": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "13312514874803986753": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "13583166868754499339": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "6263019986730305851": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "10384537928514123040": ["convolution_gpu_bfyx_gemm_like",2],
+        "5688478347124565305": ["convolution_gpu_bfyx_gemm_like",1],
+        "12809199739984715013": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17423645390621980919": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "1616603916015535857": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "15947699374684516369": ["convolution_gpu_bfyx_gemm_like",2],
+        "4073467095502162430": ["convolution_gpu_bfyx_gemm_like",1],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "12672995204641007004": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14559308665571750465": ["convolution_gpu_bfyx_gemm_like",2],
+        "15466940145773097237": ["convolution_gpu_bfyx_gemm_like",1],
+        "17790026124881397912": ["fully_connected_gpu_yxfb_ref",2],
+        "9389555743403158574": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16336482874764861478": ["convolution_gpu_bfyx_gemm_like",2],
+        "18136135457402651842": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15962137123591591534": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "16025442470600124062": ["convolution_gpu_bfyx_gemm_like",2],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "17281202179589913619": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "10109431802089940590": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "16833026567865627676": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "11716771904412649891": ["convolution_gpu_bfyx_os_iyx_osv16",435],
+        "16998508915819714690": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "4660288622381620227": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "17316626950179740845": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "878892264408839067": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "4897991181236908768": ["convolution_gpu_bfyx_gemm_like",2],
+        "11113256687741667688": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "11956435900037329302": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "6233612563637601101": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "18232278892738147217": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "3272017687600371031": ["convolution_gpu_bfyx_gemm_like",2],
+        "13890118723041457532": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "5953754321266570854": ["convolution_gpu_bfyx_gemm_like",1],
+        "15118142492742177336": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10302338806536775954": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "13990028451169604107": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12956726277674279950": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "4999505377862312410": ["fully_connected_gpu_bf_io_gemm",2],
+        "5524218746051008792": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "14670068483447729857": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "8707189142909022305": ["convolution_gpu_bfyx_os_iyx_osv16",986],
+        "14532844474906286088": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "913496537924971856": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "10492056481694320580": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "7076937538747704750": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "2669822154816760632": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "8107447526839063293": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "2044363708106765326": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17808913959977434594": ["convolution_gpu_bfyx_gemm_like",1],
+        "8762901342272872498": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "13853056718266488510": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13300022131572486202": ["convolution_gpu_bfyx_gemm_like",2],
+        "16403423801823379909": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "7009873605945341897": ["convolution_gpu_bfyx_gemm_like",2],
+        "9191832520273617003": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "5079055505117153635": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "11810221946429451169": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9810904714798127155": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4086556132337751931": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13059207969254830451": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17343050785312683560": ["convolution_gpu_bfyx_gemm_like",2],
+        "13723543003759101485": ["convolution_gpu_bfyx_gemm_like",2],
+        "6854611304056079417": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "14281201038135286621": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "12860222041026638681": ["convolution_gpu_bfyx_gemm_like",2],
+        "14363654136811880073": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8176012042686275874": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "3304589333915676807": ["convolution_gpu_bfyx_gemm_like",2],
+        "13464226348405628455": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "3432296808755992670": ["convolution_gpu_bfyx_gemm_like",2],
+        "3782315919331102574": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11706378390483804857": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "4026686872534942904": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "15322019609805777935": ["convolution_gpu_bfyx_gemm_like",2],
+        "17881905640473324965": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9824678205469832038": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "1316444335300814745": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6391847213494189692": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11609821372586026178": ["convolution_gpu_bfyx_gemm_like",2],
+        "852092858392507925": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16108573960501496757": ["convolution_gpu_bfyx_gemm_like",2],
+        "13713406612642090169": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "8153567933591966877": ["convolution_gpu_bfyx_gemm_like",1],
+        "2732519635571994212": ["convolution_gpu_bfyx_gemm_like",2],
+        "4161001033681779582": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "2909728331855309274": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "17854578307286932628": ["convolution_gpu_bfyx_gemm_like",2],
+        "15511138074959300404": ["convolution_gpu_bfyx_gemm_like",2],
+        "3430266954211750407": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "9642229389394495047": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "6149673627320838019": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "13951717514084457087": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12232696287029987946": ["convolution_gpu_bfyx_os_iyx_osv16",165],
+        "8061914949376516780": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "3526580286148537369": ["convolution_gpu_bfyx_os_iyx_osv16",1098],
+        "5912451559447635837": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "10025839973092358719": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "12214162812589030126": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "17050675313067213312": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "6329618009202266591": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1838534101161814609": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4806571630436601566": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "10178951466584845110": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "17208186152576814861": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "18173314625562011976": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17024388383581997032": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "5353552956675518468": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "13698389420396031586": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "2710485608298356329": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5485749317130402302": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "13468713306678453952": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "360872770877634346": ["convolution_gpu_bfyx_gemm_like",2],
+        "2242915551775617989": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "5749536453225343663": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "2497756607567197523": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "4355933224673863178": ["convolution_gpu_bfyx_gemm_like",2],
+        "3438116423688595487": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "3682813162987778705": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "13709111882513486557": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "17556238490521153146": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "6642767323474835034": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "11284755586130392759": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5763440554939527411": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "8295126647635181949": ["convolution_gpu_bfyx_gemm_like",2],
+        "8306337702797456793": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "5115661026367632863": ["convolution_gpu_bfyx_os_iyx_osv16",765],
+        "17001502418583498926": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "2572395498687401679": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "2052712465925238009": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "17638692805430115529": ["convolution_gpu_bfyx_gemm_like",2],
+        "4085907608404305515": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "2727219457659794468": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12773693193167844110": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "14630499010941056793": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "16744011463988595802": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6942049339361951275": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "9275303306340702111": ["convolution_gpu_bfyx_gemm_like",2],
+        "3033264172690274208": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "13102754309439605192": ["convolution_gpu_bfyx_gemm_like",2],
+        "14695781272831602408": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "9798585825695496550": ["convolution_gpu_bfyx_gemm_like",2],
+        "11443268857010762276": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "4674416595144505741": ["convolution_gpu_bfyx_gemm_like",2],
+        "18269685060032395235": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "8751016391945753900": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13237050834496100264": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "9522661528867955338": ["convolution_gpu_bfyx_gemm_like",2],
+        "12696412964119109465": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "388828310152538138": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "8656468860180713379": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "4867937397499803072": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "18377298651236993830": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "16108759090923335184": ["convolution_gpu_bfyx_gemm_like",2],
+        "6578908625437515675": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "5504757952698692953": ["convolution_gpu_bfyx_os_iyx_osv16",658],
+        "11461581290174106570": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "4112696777811320312": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "17015791782274123780": ["convolution_gpu_bfyx_gemm_like",1],
+        "11465965972527519631": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17977676737774695825": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "4163359403543480821": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "13328449155966085543": ["convolution_gpu_bfyx_gemm_like",2],
+        "5390559917122707732": ["convolution_gpu_bfyx_os_iyx_osv16",57],
+        "153117141968471446": ["convolution_gpu_bfyx_gemm_like",1],
+        "9257078583742821465": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "10084794570892043447": ["convolution_gpu_bfyx_gemm_like",2],
+        "6171845068913882721": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "16076153317792960383": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12962552332511702682": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10000618285883395700": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "18172711677056449158": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "6062246008880097669": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "178353385245384751": ["convolution_gpu_bfyx_gemm_like",2],
+        "13192808619929896995": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "2451712485584835395": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12523676912856063091": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8818070832398055086": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "16131448347558322280": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "13054405729329143152": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3138374672801504481": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "13702692566238948173": ["convolution_gpu_bfyx_gemm_like",1],
+        "17301887391757619741": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "12782191856884962803": ["convolution_gpu_bfyx_gemm_like",2],
+        "4670443882075998209": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "17515573322312447679": ["convolution_gpu_bfyx_gemm_like",2],
+        "2968031010495399536": ["convolution_gpu_bfyx_gemm_like",2],
+        "10323345824599612614": ["convolution_gpu_bfyx_gemm_like",2],
+        "10555597973766215754": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "598390166442977699": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5659168916726488798": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "14811022197918391667": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6882621854468565774": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "6146876760962332928": ["convolution_gpu_bfyx_gemm_like",2],
+        "16779678846332091086": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "1152693503778768433": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "9594594523961285945": ["convolution_gpu_bfyx_os_iyx_osv16",927],
+        "13115589642140732066": ["convolution_gpu_bfyx_gemm_like",1],
+        "6093575518270471235": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "16426179645101678763": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5419041493176804960": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "9192665896782282996": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "18233660940545931789": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "16129296588866116913": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8108843303778211282": ["convolution_gpu_bfyx_gemm_like",2],
+        "3892679716763161057": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6644418194983229139": ["convolution_gpu_bfyx_gemm_like",2],
+        "11086699387784339943": ["convolution_gpu_bfyx_gemm_like",2],
+        "7843498978148810586": ["convolution_gpu_bfyx_gemm_like",2],
+        "3007637520820789085": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "16865879032845300007": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10482582307328548806": ["convolution_gpu_bfyx_os_iyx_osv16",379],
+        "14841539539334726292": ["convolution_gpu_bfyx_os_iyx_osv16",171],
+        "13713501506522022845": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11992353959766718397": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "5522698342845820411": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6688522645556262131": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "4818231379191523896": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1364546124782880196": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2440366541074371090": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "15485701086886851362": ["convolution_gpu_bfyx_os_iyx_osv16",923],
+        "54019631544204590": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "15847413004526420496": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "4366168099274266975": ["convolution_gpu_bfyx_gemm_like",1],
+        "4424217045094988504": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "13951781924205611716": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "11582534256623549131": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "1832310305089212990": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "11599932445375240727": ["convolution_gpu_bfyx_gemm_like",2],
+        "13459514533473657102": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "425744529089575241": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "7024495439434892956": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "5415319660821122528": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "3653156933813711765": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "5740738339752793113": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12545558125736154584": ["convolution_gpu_bfyx_os_iyx_osv16",571],
+        "805221045541170643": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "8006738296385794413": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",0],
+        "276407276027553756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16720108310653948550": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "1819720745131968914": ["convolution_gpu_bfyx_gemm_like",2],
+        "7264756313770306662": ["convolution_gpu_bfyx_gemm_like",2],
+        "16467987800266816984": ["convolution_gpu_bfyx_os_iyx_osv16",216],
+        "9529614587861271730": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7130694811424715594": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "7869916853707978306": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "713121569924250372": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "5334566325056222430": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "1138439260035360722": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "953306082374100275": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "14431607479949498164": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "6830387121684699972": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "6355395905401306995": ["convolution_gpu_bfyx_gemm_like",2],
+        "2128376438627103433": ["convolution_gpu_bfyx_gemm_like",2],
+        "1116274074896622552": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "11169292427557543138": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "12935563359569230797": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "13761566845514364807": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "11723735945517472199": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "17847109385592002207": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4957638663977636791": ["convolution_gpu_bfyx_gemm_like",2],
+        "6133592828563353516": ["convolution_gpu_bfyx_os_iyx_osv16",191],
+        "7311120574972466702": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16053585286807864356": ["convolution_gpu_bfyx_gemm_like",2],
+        "846088275031979661": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",2],
+        "7962991673727743706": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13200151444914751729": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "16370218798911151331": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "13538051178827008933": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18077281411861416889": ["convolution_gpu_bfyx_gemm_like",1],
+        "5448537627319798272": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "282274448389888221": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "18203935818408469865": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "522181557896569275": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17154337492545826355": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "5843679089588930933": ["convolution_gpu_bfyx_os_iyx_osv16",146],
+        "17877776363798202236": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5381578460674280089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10320711719466983961": ["convolution_gpu_bfyx_gemm_like",2],
+        "3491333679577961640": ["convolution_gpu_bfyx_gemm_like",2],
+        "15490478608105402679": ["convolution_gpu_bfyx_gemm_like",2],
+        "15123868617509445149": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "17775705003104146872": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "16789135236017252073": ["convolution_gpu_bfyx_gemm_like",2],
+        "10509933181132310969": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16955653765071712611": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5275016494706355806": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8860815977851486767": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "861419637283812778": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "14353390922580547467": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9028970753877215614": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "6341197991729122563": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "12707946849050970702": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "158222105675022402": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17833517350994024381": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "6681818065741882453": ["convolution_gpu_bfyx_gemm_like",2],
+        "7727001441358508665": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15579919505002150556": ["convolution_gpu_bfyx_gemm_like",2],
+        "11077503608116183709": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "12247991248100147706": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",1],
+        "9513032457323269513": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "288853243482418538": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18062849937960759210": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "16863960779539003201": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "16491532291908469567": ["convolution_gpu_bfyx_gemm_like",1],
+        "5179760459095053114": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "1155389358857780776": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "12365282242489300092": ["convolution_gpu_bfyx_os_iyx_osv16",45],
+        "15619086801947147359": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "14749947225382670869": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "4800587664660105589": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "17382660912493284320": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "9100044555742394133": ["convolution_gpu_bfyx_gemm_like",1],
+        "4488336106517889531": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "3003526572122876385": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "2525260242689556544": ["convolution_gpu_bfyx_gemm_like",2],
+        "17465517455679097501": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "10892706534058849825": ["convolution_gpu_bfyx_os_iyx_osv16",288],
+        "3218248162832023196": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "13404888565084206853": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "15126660425728872065": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "1471837664358450291": ["convolution_gpu_bfyx_gemm_like",2],
+        "2438374917504708831": ["convolution_gpu_bfyx_gemm_like",1],
+        "52089503050497755": ["convolution_gpu_bfyx_gemm_like",2],
+        "2597453794298356435": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17525531790109748810": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "2915165824085219545": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "17050143605017295447": ["convolution_gpu_bfyx_gemm_like",2],
+        "5047419871737940985": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "751912075185318190": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "16362139250976572928": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "3831261590121101287": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "8961138963663532667": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "4014667229872705228": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12796777049340516563": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11254635684957519432": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "156456996459945842": ["convolution_gpu_bfyx_os_iyx_osv16",435],
+        "8881135571874888085": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "14133958262039763609": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17243648226968859637": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "11185156002426041243": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9354818521586974021": ["convolution_gpu_bfyx_gemm_like",2],
+        "14878347463243157447": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5115134711994944288": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "3934290309368153435": ["fully_connected_gpu_bf_io_gemm",2],
+        "1473214668483422172": ["convolution_gpu_bfyx_gemm_like",2],
+        "2502125887857336825": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "15669490019428002270": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "1334070221835422461": ["convolution_gpu_bfyx_gemm_like",2],
+        "1996860183441418841": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14599780481362761532": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5831419373611158773": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "5141753233513623264": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "15217183882858251099": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "2809950092498355574": ["convolution_gpu_bfyx_os_iyx_osv16",1056],
+        "10436819182310112786": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "13497225521878034159": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14038261392627717712": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "4135003545872878882": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "18273537339378756543": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "6067904130482758510": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "5170245731599664670": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "12022152681602871455": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5801429077171542466": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like",2],
+        "14025496192869856801": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "10880081193716628051": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "15767973630744679517": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2862999234347597091": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "8258382025812748961": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13954144830230671601": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14763982961176216679": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "3974589991022739479": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "8618835732380720921": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3599823735065658574": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "14484890926084856480": ["convolution_gpu_bfyx_gemm_like",1],
+        "7565221050911842393": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "5848293219267886434": ["convolution_gpu_bfyx_os_iyx_osv16",536],
+        "9321208819255762521": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "3192332625020432602": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2040762223425679479": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "1939140810847988694": ["convolution_gpu_bfyx_gemm_like",2],
+        "6614833247756539341": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "16103943009195163681": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "13320828013530046693": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "13613399861925108148": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "15628121900226431719": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5608133987357542077": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "15884763176333003771": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "9882204352209412039": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2622434279674583815": ["convolution_gpu_bfyx_gemm_like",1],
+        "2065752819810364738": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "14420809655798184553": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "789359733867650915": ["convolution_gpu_bfyx_gemm_like",1],
+        "2100891581797371600": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "11311859068168414878": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "4703107905652287491": ["convolution_gpu_bfyx_gemm_like",2],
+        "15378025640603637387": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "15192024816519005250": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "16683485007140805060": ["fully_connected_gpu_yxfb_ref",2],
+        "16541722316343690197": ["convolution_gpu_bfyx_os_iyx_osv16",504],
+        "7179714714302073459": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like",2],
+        "10294185397756053636": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "14068780861332616363": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "10414903047695486119": ["convolution_gpu_bfyx_os_iyx_osv16",680],
+        "13800387305792597325": ["convolution_gpu_bfyx_os_iyx_osv16",1035],
+        "6129602738379919488": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6438522646185979880": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14077148976508649021": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12081835728078383819": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "5524215233998361104": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "10647227605517025377": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "18005721959893562716": ["convolution_gpu_bfyx_os_iyx_osv16",171],
+        "1545105800386716684": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "16582132711225619740": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17599383258252980421": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "13410850301164057911": ["convolution_gpu_bfyx_gemm_like",1],
+        "15993427814066246646": ["convolution_gpu_bfyx_gemm_like",2],
+        "6181308879301978465": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "16758697697363920520": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "2969389503332309296": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "2893564501191050837": ["convolution_gpu_bfyx_gemm_like",1],
+        "5673972310424776040": ["convolution_gpu_bfyx_gemm_like",2],
+        "2986189945936592561": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "5629670679897666607": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "5656623709782744241": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "16761856644242716357": ["convolution_gpu_bfyx_os_iyx_osv16",843],
+        "7000486794832106857": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "18137106379929135901": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "11814740669468421049": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like",2],
+        "12190841837604350271": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "16547425454653232058": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1663285216972929652": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "12812685418923919055": ["convolution_gpu_bfyx_os_iyx_osv16",11],
+        "4082229510324076196": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15650839696475698676": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "1594612401422787491": ["convolution_gpu_bfyx_gemm_like",2],
+        "5597908143491399643": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "13540002981450186147": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "12098146032672599222": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "12976499206227689731": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "6418327009347170687": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "16067605128297748820": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16036386660666696362": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "291868903926685441": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4499586349553581439": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "5637480705139132901": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "17089801601582809764": ["convolution_gpu_bfyx_gemm_like",1],
+        "4474697990228400564": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13681462437496627948": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "10556089809203693400": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "1882052795393187384": ["convolution_gpu_bfyx_gemm_like",1],
+        "6471563320494376693": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "6904130543085920483": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13439896617880328331": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "18043340998699622388": ["convolution_gpu_bfyx_gemm_like",2],
+        "1653274345637156919": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "941626985322260281": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "54975980454651672": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "4553409514380460123": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "4750894407873652809": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "11459784003592366395": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12085348936192462321": ["convolution_gpu_bfyx_gemm_like",1],
+        "18268811652302076976": ["convolution_gpu_bfyx_gemm_like",2],
+        "4161612746310931789": ["convolution_gpu_bfyx_gemm_like",2],
+        "1701609125136907870": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10967218651864700933": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12421707187947291166": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "12040626513219974957": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "11559360678008060513": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "2294026590516781945": ["convolution_gpu_bfyx_gemm_like",2],
+        "13176385389367548697": ["convolution_gpu_bfyx_gemm_like",2],
+        "244921290040927639": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12617625046664709483": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like",2],
+        "16230621843665445228": ["convolution_gpu_bfyx_gemm_like",2],
+        "16461809076899645037": ["convolution_gpu_bfyx_os_iyx_osv16",9],
+        "5933743119393822386": ["convolution_gpu_bfyx_gemm_like",1],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "13455881643467418059": ["convolution_gpu_bfyx_gemm_like",1],
+        "7351401242363888463": ["convolution_gpu_bfyx_gemm_like",2],
+        "6087091876057515304": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9133263538092913983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13933912937625580405": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "2597523728660247862": ["convolution_gpu_bfyx_os_iyx_osv16",538],
+        "8788703258318141635": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8140094412609934765": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "12668149981216388765": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "3349519148124496343": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "6443517114667332732": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "13898284586432291433": ["convolution_gpu_bfyx_gemm_like",2],
+        "3215659303601163167": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "12278364834477923930": ["convolution_gpu_bfyx_gemm_like",2],
+        "9840495023131952174": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12971822824884826169": ["convolution_gpu_bfyx_gemm_like",2],
+        "15287650965861631130": ["convolution_gpu_bfyx_gemm_like",2],
+        "16172528828198474326": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11913020016435860608": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "17700958439420868719": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "9438739171104456179": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "8990561333549136048": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "6075691042233712335": ["convolution_gpu_bfyx_gemm_like",2],
+        "11951606039079763598": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2937907409658060025": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3102816736961785641": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "15962533525948221648": ["convolution_gpu_bfyx_os_iyx_osv16",1024],
+        "1451466106918423837": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "13607830451968188080": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "1774158624592967937": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16208488491972128275": ["convolution_gpu_bfyx_os_iyx_osv16",809],
+        "14224121742920800990": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "5912303851874077576": ["convolution_gpu_bfyx_gemm_like",2],
+        "13972357557211413688": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "14230385851791760020": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "9274179337770060652": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "10085059621136526248": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "4569338575782832784": ["convolution_gpu_bfyx_gemm_like",1],
+        "15163327502374403643": ["convolution_gpu_bfyx_os_iyx_osv16",175],
+        "2526832080529662683": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "8141428150264829362": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "3032921857841371728": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16706121580364790904": ["convolution_gpu_bfyx_gemm_like",2],
+        "10591379189397010097": ["convolution_gpu_bfyx_os_iyx_osv16",613],
+        "6942622405269419082": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3750338655074082587": ["fully_connected_gpu_fb_io_ref",1],
+        "7777333052643961206": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "5159470523468873105": ["convolution_gpu_bfyx_os_iyx_osv16",175],
+        "9216608098626790565": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17856816245251319111": ["convolution_gpu_bfyx_os_iyx_osv16",939],
+        "10295330953350618042": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5219399418946822456": ["convolution_gpu_bfyx_gemm_like",2],
+        "3120553928584920777": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "18012549942299450620": ["convolution_gpu_bfyx_gemm_like",2],
+        "2140514316203117958": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3001615302961701154": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1028160614515220430": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "8025053805734757314": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2251029128552117936": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14104238386345631681": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "10670103699537731664": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "14711697456265712456": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "5622089373755094139": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2730604806511016352": ["convolution_gpu_bfyx_os_iyx_osv16",524],
+        "15640202505592598653": ["convolution_gpu_bfyx_gemm_like",2],
+        "14046114605615338907": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "14103112843209793966": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "16853250891250756537": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "5132761922124425835": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "13865227850818392065": ["convolution_gpu_bfyx_os_iyx_osv16",429],
+        "18092842590142527927": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "18423051691107460439": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "6982733543386888622": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "11883485911218628865": ["convolution_gpu_bfyx_os_iyx_osv16",1041],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",2],
+        "2940027113687311893": ["convolution_gpu_bfyx_gemm_like",2],
+        "3217246278485567748": ["convolution_gpu_bfyx_gemm_like",2],
+        "8739347545059610410": ["convolution_gpu_bfyx_gemm_like",2],
+        "17824431042110985323": ["convolution_gpu_bfyx_gemm_like",1],
+        "13093429681061786539": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "10424278617647597641": ["convolution_gpu_bfyx_gemm_like",2],
+        "1652781065871883392": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9988801796928462423": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "5221320470007950766": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "13184662326021747000": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12069726772532946193": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "10046663998164493552": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2571882179292959757": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17309326904418811234": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "17791773192152464021": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17427036330773218054": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "7918742312252115870": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "4505008254511324231": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5609922876429907954": ["convolution_gpu_bfyx_gemm_like",2],
+        "14546281065004619074": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16043683538361975370": ["convolution_gpu_bfyx_gemm_like",2],
+        "822162932339827810": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5805383505505929391": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17832542092610191859": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "1706927777850488363": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8709632541892447149": ["convolution_gpu_bfyx_gemm_like",2],
+        "14381420852659789698": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14792528369891965810": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16489624657475712467": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "11709992724966310174": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17542414935564676110": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "8253823502854784432": ["convolution_gpu_bfyx_gemm_like",2],
+        "8268533335852735248": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "4795705973706796563": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "9525853014023664813": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "5319668297345215520": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "4229105529069729944": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14686278683380845546": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",2],
+        "3087801652564627458": ["convolution_gpu_bfyx_os_iyx_osv16",60],
+        "12218337369633748663": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2857337999074313592": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "17596685300497748803": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17035903590837750750": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "89439319782574517": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "318377908569897093": ["convolution_gpu_bfyx_gemm_like",2],
+        "12011982029561277581": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "7941729567451949422": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "8728178019712933221": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "6290317420155851465": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15702382940521972117": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "5240181393417899912": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "3067806959725855130": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "3633858263279042265": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "6491244517639245276": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "2096779676054335057": ["convolution_gpu_bfyx_gemm_like",2],
+        "18174857480705846286": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "6302958994152837045": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "7915318733663535312": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "7104756264011682902": ["convolution_gpu_bfyx_gemm_like",2],
+        "9707630588260222630": ["convolution_gpu_bfyx_gemm_like",2],
+        "4282198629458668761": ["convolution_gpu_bfyx_gemm_like",2],
+        "12309132521191764927": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10157866834809927320": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "12609361477548272638": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17522452942286240233": ["convolution_gpu_bfyx_gemm_like",2],
+        "14994322266840011040": ["convolution_gpu_bfyx_gemm_like",2],
+        "2261453441277654139": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "1573498199681662714": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "17542176922797334839": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7744787957569714828": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14733510474010040334": ["convolution_gpu_bfyx_gemm_like",2],
+        "10408322429232132983": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "14088382963493477342": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "12515465135362865565": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "3796274347773622633": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10679760989906275129": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "14719421757340260468": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6181651715051152713": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "8701248964531180496": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14691372262153587653": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "5120466856097219243": ["convolution_gpu_bfyx_gemm_like",2],
+        "9481675228591993785": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "11682323163346544125": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "17006133396401462698": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "16609136488331186895": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4265693151382066296": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1474271081523145413": ["convolution_gpu_bfyx_gemm_like",2],
+        "8300655194765375060": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "7875272450497189442": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "15470013032930986062": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "1540041682425757361": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "13708979487306970634": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "17912189681971987483": ["convolution_gpu_bfyx_gemm_like",2],
+        "15641537661939240413": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5219048275475447369": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "10690972785852373520": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "2355214244972870639": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "6008613375871089139": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "16446533347502650316": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "16037141448095945650": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "2281119269283845320": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "8929453032482114162": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "12892693137085610062": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "6631816968511312100": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",1],
+        "8843585527713905568": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9213886570531053949": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "7881187047171099732": ["convolution_gpu_bfyx_gemm_like",2],
+        "8127190765748950828": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "5419775002149092646": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15997754881872769378": ["convolution_gpu_bfyx_gemm_like",2],
+        "700717277178942679": ["convolution_gpu_bfyx_gemm_like",2],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",360],
+        "8616686489737649890": ["convolution_gpu_bfyx_os_iyx_osv16",942],
+        "7807983899017500046": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "2038505773698938555": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "3889519976910355277": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "17825280904760131680": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7370273921473161914": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "16559140502701231107": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11473442921040533207": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "11215217005872946038": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3706994659266083979": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "3797957937905580811": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "482564204402769504": ["convolution_gpu_bfyx_gemm_like",2],
+        "1089679781525023551": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2307310127637739872": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "12793908914872030220": ["convolution_gpu_bfyx_gemm_like",1],
+        "7272538316511343863": ["convolution_gpu_bfyx_gemm_like",2],
+        "4642234334824303290": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "15882969506682501496": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "16884396694505987920": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "17713034180977313726": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11670430946096342056": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "17421991623849671076": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7903891232234389925": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14907097142953816744": ["convolution_gpu_bfyx_gemm_like",2],
+        "17599396373608265826": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "4142978475842207311": ["convolution_gpu_bfyx_gemm_like",2],
+        "2721793280965260548": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "12245096462203481681": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "12988961529988078346": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "7430073011895298582": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2842103889477438816": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "17104611871050967957": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "706370730287471796": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "879896719155824868": ["convolution_gpu_bfyx_gemm_like",2],
+        "5157249499936659040": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "40704767167309552": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "15494543914974994991": ["convolution_gpu_bfyx_gemm_like",1],
+        "13489318651148001664": ["convolution_gpu_bfyx_gemm_like",2],
+        "1760690277175249985": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7000524935770116969": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "17310332946322628458": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13191096881934434519": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "13855438905855887272": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "12478309735214802531": ["convolution_gpu_bfyx_os_iyx_osv16",567],
+        "2235210915304938149": ["convolution_gpu_bfyx_gemm_like",2],
+        "9373353053843326128": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "13654816209891478730": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "11207257238719531888": ["convolution_gpu_bfyx_gemm_like",2],
+        "7808544677773370430": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12134712464763856064": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "18136765667969393174": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13546876216568825877": ["convolution_gpu_bfyx_os_iyx_osv16",622],
+        "8434794604559592624": ["convolution_gpu_bfyx_gemm_like",1],
+        "14046990030104971367": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "3499109651698979012": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "16431165572426232677": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "18251360413872841969": ["convolution_gpu_bfyx_os_iyx_osv16",1062],
+        "1103228955716492167": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "3336303478756453360": ["convolution_gpu_bfyx_gemm_like",2],
+        "994489782629179836": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "8207349115037232863": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "10267260789603562117": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "12129572274423886770": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "481328129206881674": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1561225943337590599": ["convolution_gpu_bfyx_os_iyx_osv16",506],
+        "8100595788531468781": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "4232250144427804891": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "192209423643075326": ["convolution_gpu_bfyx_gemm_like",2],
+        "2534408579674556441": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8203550467004532364": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "5796500397424307442": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "9462315044265139531": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5643908654122573882": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4840004190985490064": ["convolution_gpu_bfyx_gemm_like",2],
+        "2738256633362038820": ["convolution_gpu_bfyx_gemm_like",2],
+        "10118395047539851751": ["convolution_gpu_bfyx_gemm_like",2],
+        "792684262493086891": ["convolution_gpu_bfyx_gemm_like",1],
+        "13131740479277027362": ["fully_connected_gpu_bf_io_gemm",2],
+        "16896833230469488924": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "13477416097954638887": ["fully_connected_gpu_bf_io_gemm",1],
+        "12952980509662451384": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "4764776977138392550": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "13753473508578037346": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "3012566432840424198": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "14759179293743468995": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "10869005786136023160": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "18386376129938707290": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11327097771110264965": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "8916983923551808409": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "15331103261044247142": ["convolution_gpu_bfyx_os_iyx_osv16",940],
+        "10309504812060596568": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2490155559809645659": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13603318842632052764": ["convolution_gpu_bfyx_os_iyx_osv16",390],
+        "1059505639883914386": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3106710091841093202": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "17802514063213000148": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7958459862276998225": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "4747159205186229582": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "4196367396954155354": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15897477855246170861": ["convolution_gpu_bfyx_gemm_like",2],
+        "1999979442136861875": ["convolution_gpu_bfyx_os_iyx_osv16",59],
+        "13644681270630373984": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "17723621158215826108": ["convolution_gpu_bfyx_gemm_like",2],
+        "8501145642605270365": ["convolution_gpu_bfyx_gemm_like",2],
+        "6324565723045697080": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "4220826666482500445": ["convolution_gpu_bfyx_gemm_like",2],
+        "989564341557094953": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "5927467766675317093": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "7545013298074733778": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "16947969669087411530": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "1107027047188366075": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "4716188972902735458": ["convolution_gpu_bfyx_gemm_like",2],
+        "2007192658799516915": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "11772741918108731396": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "17392594284473856393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12461575861709234385": ["convolution_gpu_bfyx_gemm_like",2],
+        "4766071144928072260": ["convolution_gpu_bfyx_gemm_like",1],
+        "4561874206785244358": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4673127824919879657": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3332334993503432420": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "5893940382830835820": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11198908896401597838": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "6419580456182610836": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "18384657372655350144": ["convolution_gpu_bfyx_gemm_like",2],
+        "18150429561058646714": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "10917498758625273194": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "2345023488044002149": ["convolution_gpu_bfyx_gemm_like",1],
+        "15778834188130183853": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "2638131332283395057": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7575634241190730697": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4282661608732125403": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "14100870590396726248": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "6634330132674952638": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "2133849627845285277": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16",1098],
+        "13071545223094862275": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "3062101811226530720": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "7211355951470869591": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12644942072153919043": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "577182964135927041": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "9868561386826862471": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "8479958930889587809": ["fully_connected_gpu_yxfb_ref",2],
+        "8578747191812631883": ["convolution_gpu_bfyx_gemm_like",2],
+        "13283842370311517843": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "10900880512948479338": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "949330876419581703": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "18210370419559876426": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5835634465164771899": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "8671491767142900139": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "1791615587935799399": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "863952266514375915": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "5091558853871982858": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "4885944395876887711": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4239415134522959352": ["convolution_gpu_bfyx_gemm_like",2],
+        "17995371099806008878": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "13272818502368975319": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "969746749329671447": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "11243840588602365090": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "16884228931101540030": ["convolution_gpu_bfyx_gemm_like",2],
+        "9631481972809246378": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "6181272224000872375": ["convolution_gpu_bfyx_gemm_like",2],
+        "8780604510524622314": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "9819596940685093690": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "9762182215179534181": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "6860503758000008398": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "654821507679356726": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "18259656768460999562": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "3880189981766119529": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "12693511427898130707": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6204183474669103812": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "13775529405693629438": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18419183012101393192": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "17285815901490707654": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "1201692134690347847": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "5485971317082563152": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "3737552767159920174": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3746573775462003750": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14150012830816329527": ["convolution_gpu_bfyx_gemm_like",2],
+        "11632275875447013409": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5824801192141531089": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "5409924335138540834": ["convolution_gpu_bfyx_gemm_like",2],
+        "18125732229366977468": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9723314434598141024": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",0],
+        "16731107540370927220": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8482147530539941792": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "17829047941256922307": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "11829442945690098558": ["convolution_gpu_bfyx_gemm_like",1],
+        "2816353973187452604": ["convolution_gpu_bfyx_gemm_like",2],
+        "10727592780669452048": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12355112948013108181": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18118237182023167949": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "2968094709908141988": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "7969848911698660033": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "7590767013583950613": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6345550009198921347": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "15881381297320383917": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9421643783312790618": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3285520504090196295": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "3134489458855347772": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "5687802882700097624": ["convolution_gpu_bfyx_gemm_like",2],
+        "7605139219344415117": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "10548792624072794724": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "5303170164698694791": ["fully_connected_gpu_bf_io_gemm",2],
+        "6980201892073961793": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "954796765467489259": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "10892456883214928095": ["convolution_gpu_bfyx_os_iyx_osv16",943],
+        "15759530339367380982": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "7500192998744460131": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "3755253206085028904": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3498490999014554104": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "16561224775421968533": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "16264774056719724826": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",0],
+        "1622880009460832832": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "15688186132508213638": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "407189201971322683": ["convolution_gpu_bfyx_os_iyx_osv16",23],
+        "16985912104363932350": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "10499265278415026816": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8285478622349266483": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "9714764457768279762": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3909551222373722085": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "13379165253894817165": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "14136097914489095982": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "15781622938833984014": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12160764253455777655": ["convolution_gpu_bfyx_gemm_like",2],
+        "941829593638869991": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "8127570953237266335": ["fully_connected_gpu_bf_io_input_spatial",5],
+        "5865480930796299143": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "7072606962946873975": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "287386909600391846": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "755414184406250882": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1076005730007872492": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3037042229494600258": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5594180958505308003": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "17101789600628162503": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17303408650780384587": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "5766507688771440170": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "16504962609450876148": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",490],
+        "15595549493819416194": ["convolution_gpu_bfyx_os_iyx_osv16",106],
+        "2231648183489019418": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "2344498602308448450": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15197248015210313435": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12601126285773042005": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "10607904718265020949": ["convolution_gpu_bfyx_gemm_like",2],
+        "18277685132620834972": ["convolution_gpu_bfyx_os_iyx_osv16",278],
+        "5582896843095691256": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "5782934278345953016": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "6139574161497189424": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15450609897480659306": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "9454954846682513038": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2305461098719675735": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4185398348055518182": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "659846949368492111": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17798636687709019154": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "5754844816339228920": ["convolution_gpu_bfyx_gemm_like",1],
+        "12223993560805441284": ["convolution_gpu_bfyx_gemm_like",2],
+        "12655099960717366198": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8132521728369930959": ["convolution_gpu_bfyx_gemm_like",2],
+        "4481903208484313806": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "7143510787416483146": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "3325727286860556323": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "12136029303893296753": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "15349944413643626251": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "7199295899520406795": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8321769923556905957": ["convolution_gpu_bfyx_gemm_like",2],
+        "6090625728451718945": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "13472532612464340803": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "2806529556090896246": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4801117903303888658": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "9524303276541517389": ["convolution_gpu_bfyx_gemm_like",2],
+        "10398572248321217585": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6410682026872155392": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "17163158934005653629": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "17419874083634480896": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12541834857357563605": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "18337160891834020517": ["convolution_gpu_bfyx_gemm_like",2],
+        "5585398540591396124": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "1372939511728986224": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16601702334097258697": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7134654288295280046": ["convolution_gpu_bfyx_os_iyx_osv16",438],
+        "3715177305271762194": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "13358283026528078900": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "17882819773586674851": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17900257435531434807": ["convolution_gpu_bfyx_gemm_like",1],
+        "11455843788148231615": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "14744368497944610864": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "7334966010680206302": ["convolution_gpu_bfyx_gemm_like",2],
+        "5601435819039968726": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15891662883560480723": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "2095802691829304676": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "8296551195150971668": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15948383678216076358": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "15430549683839591544": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "2995134938466176198": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "4408772370026995920": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "1452597292381229708": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12788968383428254917": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9065894438656900887": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "10792503079194374004": ["convolution_gpu_bfyx_gemm_like",1],
+        "14335423820860953927": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5093049998173715787": ["convolution_gpu_bfyx_gemm_like",2],
+        "11771014003680394135": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6656593119788274992": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "10433541468308381909": ["convolution_gpu_bfyx_gemm_like",2],
+        "16348402367953880206": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9057036344533510776": ["convolution_gpu_bfyx_gemm_like",2],
+        "14217181622713951411": ["convolution_gpu_bfyx_gemm_like",2],
+        "15293727142789007900": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "11469881811044037340": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17422822627612865758": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7164580481046523192": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "16818714747882774917": ["convolution_gpu_bfyx_gemm_like",2],
+        "13268525255152984893": ["convolution_gpu_bfyx_os_iyx_osv16",940],
+        "3221221905804708596": ["convolution_gpu_bfyx_gemm_like",2],
+        "14472187692485966933": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "2659031931257084418": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "16469788155263456039": ["convolution_gpu_bfyx_gemm_like",2],
+        "15447513376965243034": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "12914986936318857086": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "13210604117940125947": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "11192356850081328892": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "8451212914744825089": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "4640696923527766618": ["convolution_gpu_bfyx_gemm_like",2],
+        "12141300895511301068": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "16611452077660879545": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11622925573287101001": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4983880246908724272": ["convolution_gpu_bfyx_os_iyx_osv16",348],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",2],
+        "13253775441326432265": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "14762599606783897222": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "3448477246688526708": ["convolution_gpu_bfyx_gemm_like",1],
+        "11830297960718214360": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "3800011935243649447": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "14716719350966652036": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10848277915422577656": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "11726298758004767743": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "1332624116953483870": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "15831600396403741571": ["convolution_gpu_bfyx_gemm_like",2],
+        "9796621763733208035": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "6902644989079870993": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "490233152678323691": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "7354234812009979811": ["convolution_gpu_bfyx_os_iyx_osv16",860],
+        "77073286362822723": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "3723613341885592267": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "12353956380178079089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8995598177504756805": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "9270950131920019932": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "5287076386757143976": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17434429579652310107": ["convolution_gpu_bfyx_gemm_like",2],
+        "5211191663202250117": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "9849272539053219052": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "14389915292223442327": ["convolution_gpu_bfyx_os_iyx_osv16",164],
+        "14821616804286068969": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16011429608661242565": ["convolution_gpu_bfyx_gemm_like",2],
+        "11728824117049687850": ["convolution_gpu_bfyx_gemm_like",2],
+        "17025268985366223779": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "17947818179123182001": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "16758962840329202004": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7817036102984218692": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "916389941321470163": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "14729854278671832528": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "10861525139715322534": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "10412588668458621135": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "3234107167862677811": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "4239133538073498792": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7786866732196451977": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "17546566148752689536": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12992194515157698316": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16509472637458153234": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "13558618754911056302": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "17009318615658405230": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15984885011101717258": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "5040730152867713388": ["convolution_gpu_bfyx_gemm_like",2],
+        "7431849514656037251": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "12730339458081890990": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11062100629646715785": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10328182165125764988": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "17370051888730874220": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7460672405409009037": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "11868551452004726281": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "8939683514448064461": ["convolution_gpu_bfyx_gemm_like",2],
+        "6483208845600234755": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7881579844586294503": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "8650948093564284852": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9519623751582710696": ["convolution_gpu_bfyx_os_iyx_osv16",439],
+        "10795104632256101599": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "15352245788978088971": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "9226443907548972870": ["convolution_gpu_bfyx_gemm_like",2],
+        "15114370307779942381": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "2543995971214089085": ["convolution_gpu_bfyx_os_iyx_osv16",622],
+        "15417738436777481469": ["convolution_gpu_bfyx_gemm_like",2],
+        "8057302050645780813": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10308175009371219583": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "14681717813022425567": ["convolution_gpu_bfyx_gemm_like",2],
+        "10650698451740924172": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "12635265188475834607": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "8032685176029570383": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6205240287062600210": ["convolution_gpu_bfyx_gemm_like",2],
+        "2012181953284568566": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "4197617702037834389": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "380316849107383484": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6769243149577568817": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "1104489643524273315": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "419783127503173016": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "1450888744802985214": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "5873257164958285393": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "15354185859262170540": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "13699740641705514374": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8507854696766492454": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "1071007164550012186": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "8769060267707904998": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12626014184575881530": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "14221578799010900252": ["convolution_gpu_bfyx_gemm_like",2],
+        "14902389080201926109": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15110359240685619357": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "15548971488532746290": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2322559721899919275": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "2683507674615735878": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11292995457386147494": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "157805434489791310": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "3644282167178264526": ["convolution_gpu_bfyx_gemm_like",0],
+        "3643250372952944907": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "13026555349791486777": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "580936360000782237": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "16881283637687482989": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "11529876081402974396": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9058996149754556268": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "2802810524370514276": ["convolution_gpu_bfyx_gemm_like",0],
+        "1509728225855233852": ["convolution_gpu_bfyx_gemm_like",2],
+        "14885109535362957947": ["convolution_gpu_bfyx_os_iyx_osv16",592],
+        "5078905972285278557": ["convolution_gpu_bfyx_gemm_like",2],
+        "5312140481706133684": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "6214194654733781771": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "9381304526221508530": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "2704063557078535883": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "14621327324047759584": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12790788016297794214": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "15426960908024585800": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11528417522960871233": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4342360467977736802": ["convolution_gpu_bfyx_gemm_like",2],
+        "15094664469997373662": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8790625191540101806": ["convolution_gpu_bfyx_gemm_like",2],
+        "3362190082518348071": ["convolution_gpu_bfyx_gemm_like",2],
+        "534032316469702287": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "17738299860390552088": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17006095064160484022": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "9631545863582097486": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "12534001599784153836": ["convolution_gpu_bfyx_gemm_like",1],
+        "18180820925685532104": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "14258499419905714808": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "7852144838267007144": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "16894871557229780934": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "5469227748156438008": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "16710651492402564794": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18375125668176498051": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "724953082687879224": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "11901740241052104941": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "8101977280003030465": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "15399245700982979379": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "530491406341772040": ["convolution_gpu_bfyx_gemm_like",2],
+        "7026575758396092435": ["convolution_gpu_bfyx_gemm_like",1],
+        "5197105253412476591": ["convolution_gpu_bfyx_gemm_like",2],
+        "16463823433924519300": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "8464582977975377118": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15129834325410878425": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "628191607060767879": ["convolution_gpu_bfyx_os_iyx_osv16",91],
+        "9763310312421884308": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15011504472108164173": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15739274921308457528": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "1033385936344875354": ["convolution_gpu_bfyx_gemm_like",2],
+        "11398019086259011063": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "15720507574336564201": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "8121179472578287280": ["convolution_gpu_bfyx_os_iyx_osv16",379],
+        "8787816339967963727": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "15320845027635796583": ["convolution_gpu_bfyx_gemm_like",2],
+        "2618108630886857741": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13374993751390784382": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "9311802150474489673": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "2150326211917340956": ["convolution_gpu_bfyx_gemm_like",2],
+        "11107930597263802755": ["convolution_gpu_bfyx_gemm_like",2],
+        "11936419502418995274": ["convolution_gpu_bfyx_os_iyx_osv16",844],
+        "13204120207726209723": ["fully_connected_gpu_bf_io_gemm",1],
+        "17225578855755054959": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8146945902795164796": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like",2],
+        "16811402686462277562": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13025323039227543550": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "8797843396807284399": ["convolution_gpu_bfyx_gemm_like",2],
+        "1890739204389692970": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "17318287523550546026": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7852745450437172519": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "3603706453982734995": ["convolution_gpu_bfyx_os_iyx_osv16",553],
+        "578703329577922869": ["convolution_gpu_bfyx_os_iyx_osv16",190],
+        "10309083227104422150": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "1941341635794709702": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6988674007771237080": ["convolution_gpu_bfyx_gemm_like",1],
+        "14515066741400300669": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "7606728651572102823": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "12319073009094248232": ["convolution_gpu_bfyx_gemm_like",2],
+        "4347816192417741558": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "2124033349728954551": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "3159147743553063163": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "11583017348580874022": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "10049571207493913006": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15856268902838573812": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9040046051053703359": ["convolution_gpu_bfyx_gemm_like",2],
+        "14352303529756685990": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "9447458159095730492": ["convolution_gpu_bfyx_gemm_like",2],
+        "6343396486660315308": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "386749666417295495": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "4560479630843098090": ["convolution_gpu_bfyx_gemm_like",2],
+        "17809920600993699808": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "15824189967727245909": ["convolution_gpu_bfyx_gemm_like",1],
+        "7947870656736319919": ["convolution_gpu_bfyx_os_iyx_osv16",59],
+        "872401732136570312": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12494969618927201911": ["fully_connected_gpu_yxfb_ref",1],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "12308359047798183133": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "14089893422771228191": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4492673409319122180": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "2194607895573544953": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "13786357802945430475": ["convolution_gpu_bfyx_os_iyx_osv16",1099],
+        "6065819201836017182": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15675968397825708285": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "14823616678465136590": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3286629188347536485": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "3281207855459771997": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8402692278765063674": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "14487682847898298214": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "654122557966242717": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "18128162750557822655": ["convolution_gpu_bfyx_os_iyx_osv16",851],
+        "17243576882981097341": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9366201112659847392": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "7162575953766465459": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15890473622821659630": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1934379409955686502": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4304041922043496030": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5406129421969383274": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "14746359019867963124": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2349007644347065353": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "16749148369456398030": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "14667209474639064623": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4701832665603867798": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "8083672466967374860": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "12408889192918919210": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "2371412124305478965": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10522649794540845800": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7966454753124154534": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8079376692609682448": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "12667014405537239093": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "4237276338897143680": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "3159681096461848644": ["convolution_gpu_bfyx_os_iyx_osv16",311],
+        "472454322186482185": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",610],
+        "6714886136800883594": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "11655994466278963438": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "428659495445490820": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "1411786954276574458": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "16295660312557315941": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4428101657497677982": ["convolution_gpu_bfyx_os_iyx_osv16",873],
+        "4584970211859494304": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12179581684777023804": ["convolution_gpu_bfyx_gemm_like",2],
+        "12949204491386872217": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "13178480813522103091": ["fully_connected_gpu_bf_io_gemm",1],
+        "15101680837342453931": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "5644068493155655611": ["convolution_gpu_bfyx_gemm_like",2],
+        "12553441041059632729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5019077257951332016": ["convolution_gpu_bfyx_gemm_like",2],
+        "7351733901977025859": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4479979951990338510": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13991205023798493715": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "17108987360340581555": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "17915846724151945664": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "6114147683777615071": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "11530101016435264783": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "14352796912241296357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "12725647706191463348": ["convolution_gpu_bfyx_gemm_like",1],
+        "13608239208821071914": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "6303682540621797774": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "529543453251381109": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1410630713443793537": ["convolution_gpu_bfyx_gemm_like",1],
+        "11462462742322068863": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "7546586420552408243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "5589350202160007768": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5570311824197099845": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14174888981602932979": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5420766967862917815": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "1541754036637209097": ["convolution_gpu_bfyx_gemm_like",2],
+        "14211903923555028634": ["convolution_gpu_bfyx_os_iyx_osv16",1056],
+        "879005904827468163": ["convolution_gpu_bfyx_os_iyx_osv16",13],
+        "7862815466573236157": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "14289048840489035546": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4862529593282936100": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "7304346312452588844": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "9942726414918759892": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2379484884827231127": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "3928266232090746643": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5095827462645341808": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7732899312577293959": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12972634653821069685": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "15471470494305051299": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "7802311886554362782": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "1375156980278317418": ["convolution_gpu_bfyx_gemm_like",2],
+        "1249137685908951501": ["convolution_gpu_bfyx_gemm_like",1],
+        "8757900457181374694": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9226912483632588371": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3691705516240577130": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "17564338309805484464": ["convolution_gpu_bfyx_os_iyx_osv16",898],
+        "16936366288366370882": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "72444706264681262": ["convolution_gpu_bfyx_gemm_like",2],
+        "9220830217525628783": ["convolution_gpu_bfyx_gemm_like",2],
+        "16705621644424684055": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "15411474884532403722": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "11718418772370938734": ["convolution_gpu_bfyx_os_iyx_osv16",90],
+        "10429613013253088132": ["convolution_gpu_bfyx_gemm_like",2],
+        "10110395703775498948": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "7720939595094113814": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "2458592904274981909": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "6571438978296387721": ["convolution_gpu_bfyx_gemm_like",2],
+        "14991602704357959545": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9437794960375526230": ["convolution_gpu_bfyx_os_iyx_osv16",656],
+        "7353563160591978243": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "7457899998356343871": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "7678457226823073886": ["convolution_gpu_bfyx_os_iyx_osv16",191],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",1],
+        "7757331094141318304": ["convolution_gpu_bfyx_os_iyx_osv16",969],
+        "5192552432194195116": ["convolution_gpu_bfyx_gemm_like",2],
+        "4299492266819967844": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "905780459938651623": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "6928835003016610382": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "16781127329510211966": ["convolution_gpu_bfyx_gemm_like",2],
+        "10405183426600618231": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12894240573737168362": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "15277856047844308598": ["convolution_gpu_bfyx_gemm_like",2],
+        "10136369729388564720": ["convolution_gpu_bfyx_gemm_like",2],
+        "12087141795291232248": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "4161141078006269526": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7662200927459001757": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9269175963143039426": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "6719302427415173754": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "3255465741612432300": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "13314092088416047551": ["fully_connected_gpu_yxfb_ref",1],
+        "18356980026934328781": ["convolution_gpu_bfyx_os_iyx_osv16",658],
+        "4191326605459754690": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "6522575549211855712": ["convolution_gpu_bfyx_gemm_like",2],
+        "689445825453914111": ["convolution_gpu_bfyx_gemm_like",2],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "4550028191070279999": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4274801141127703532": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "15609627722687211129": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "14406070210216948643": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "16442107352245114876": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "15649927926091502215": ["convolution_gpu_bfyx_os_iyx_osv16",435],
+        "12415368596357091523": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "13649894122307008732": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "18372284940315010254": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "11929531534620071758": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "15975964562807570772": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17774424004510360936": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "18068050257421269408": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3128856679264648666": ["convolution_gpu_bfyx_gemm_like",2],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5235375820995365354": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "4084106758501882407": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "9475130054420979752": ["convolution_gpu_bfyx_gemm_like",2],
+        "18310667924071639899": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "5592428580503282095": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "929378940515745198": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "4476928353532757380": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "12174571114411168588": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "3830703844770425343": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "2632535010129224704": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "12348135936862667024": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "14397348576352573007": ["convolution_gpu_bfyx_gemm_like",2],
+        "8500148569566077929": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "11806105193035393795": ["convolution_gpu_bfyx_gemm_like",2],
+        "1172103288112689821": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "10916647716124396856": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "9761573038170759563": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "11856266545854830143": ["convolution_gpu_bfyx_gemm_like",2],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",985],
+        "2287356884312581209": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5277400567128489977": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "5627834277145735283": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "4062706195708729345": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "8329846097322076175": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "15239764240622554314": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "5864250949922222051": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",1],
+        "16402312692470500253": ["convolution_gpu_bfyx_gemm_like",0],
+        "15901675909820977223": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "5041111302824362529": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "5183231560876991543": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "69439315851965666": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "11455518069358829249": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "14429081455612806819": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "12493863403516600413": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "7937870623766562191": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12558716383635737426": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "7575675354187625951": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16146350476627599543": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "13503608041359512": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "14213516751025324346": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1435153323458789173": ["convolution_gpu_bfyx_gemm_like",2],
+        "787363431787954804": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "18109284647478027063": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11970881115757095265": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "11717348577195224554": ["convolution_gpu_bfyx_gemm_like",2],
+        "10995907213890714701": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "9860570706348640782": ["convolution_gpu_bfyx_gemm_like",2],
+        "6574971185849732667": ["convolution_gpu_bfyx_os_iyx_osv16",304],
+        "3511588484597779204": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17443356777503458523": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10628725059172743408": ["convolution_gpu_bfyx_gemm_like",2],
+        "11992625045241269569": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2418288192668085805": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4720851194954041037": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",1],
+        "17739868787095417856": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "5795073619189010837": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9148379585489720669": ["convolution_gpu_bfyx_os_iyx_osv16",484],
+        "14962768577232034246": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "5077214229434392730": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "13324157125165576832": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "2702144517025248597": ["convolution_gpu_bfyx_gemm_like",2],
+        "4994591211723226974": ["convolution_gpu_bfyx_os_iyx_osv16",724],
+        "7806129039150321333": ["convolution_gpu_bfyx_gemm_like",2],
+        "5352861363832390974": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "11070620435959083971": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4217179485243909459": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12255528292506999241": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "14603590053512154268": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "8526484907799590618": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "16431857516454692096": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "13810995219720233595": ["convolution_gpu_bfyx_gemm_like",2],
+        "7172604084103519563": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "7148542290597073512": ["convolution_gpu_bfyx_gemm_like",2],
+        "14011124615649605281": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "15689502054035168040": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2527189070714658176": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13869716373706247686": ["convolution_gpu_bfyx_gemm_like",2],
+        "10887835418423052188": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14767888121198814523": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "2903605246599054308": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "18218631037214746168": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16173557782125372935": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3106922888635965020": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11834683513280095384": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5994204139128667921": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "14251848023416168295": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "13447028922679236865": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11060822686394981344": ["convolution_gpu_bfyx_gemm_like",2],
+        "7638626850074132214": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5334190564423375247": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1434535531617424039": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "17123463568694499533": ["convolution_gpu_bfyx_gemm_like",2],
+        "4623542918584461522": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "2370837049876630969": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "2406816735581074778": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7712831597869354170": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8048617952947915835": ["convolution_gpu_bfyx_gemm_like",2],
+        "17123153447808465303": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "6780215829176686721": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "649203303142950236": ["convolution_gpu_bfyx_os_iyx_osv16",3],
+        "220326805056361171": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "11092828091552833150": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "475043738497218394": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "9428176632140441528": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "13327653786981478088": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "8075180350084516696": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6729785110495533200": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9759380701896779097": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6213386558868267629": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12246408434917478929": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "2582625260054352916": ["convolution_gpu_bfyx_gemm_like",2],
+        "2103882464623009432": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11490143853656040028": ["convolution_gpu_bfyx_gemm_like",2],
+        "13852065717057446998": ["convolution_gpu_bfyx_gemm_like",2],
+        "18199526506796726885": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "6293403765897901528": ["convolution_gpu_bfyx_gemm_like",2],
+        "17248756229500447131": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "10978173291465325823": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",2],
+        "15916505622570323098": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "9048522050692986204": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "10632020369698615114": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "2198278382394812839": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "1697248235682953135": ["convolution_gpu_bfyx_gemm_like",2],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "6351347283201596793": ["convolution_gpu_bfyx_os_iyx_osv16",849],
+        "16094174852600023296": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "2566302789609970663": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "570683988452622223": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "17729546848373991614": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8594644182487917002": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3150231129728961455": ["convolution_gpu_bfyx_gemm_like",1],
+        "12159582810513550491": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2552187713769926425": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3036808833459559381": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13105192484434299621": ["convolution_gpu_bfyx_gemm_like",2],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "3499243120652875549": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "10058614204420018541": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "17870874477143985774": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "3122997634505472500": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "11649407835105973949": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "142486914279119363": ["convolution_gpu_bfyx_os_iyx_osv16",188],
+        "5381354625969068789": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5779388310240896974": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "10483664832302187567": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "10947686124973711385": ["convolution_gpu_bfyx_os_iyx_osv16",435],
+        "13793441296561946357": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "18034648276860485300": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8906185843274300447": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "1436052878894538927": ["convolution_gpu_bfyx_gemm_like",2],
+        "3752171257634205726": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "16290626406346691996": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "5482851829165191681": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "13418701036204748812": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "435888248913413834": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14070988879848388270": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "13317417676446624018": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "14385185911482960528": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "12894625941923144893": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "4226968857681929488": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15602863681196390535": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "14810839157236175179": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7866128397931438774": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "7398196853452900099": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "4793007249026943006": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5519535335798045279": ["convolution_gpu_bfyx_gemm_like",2],
+        "116291934148608396": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "13973028408397200796": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "11446745541571732900": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13163146272900339330": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "13932662890258900896": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5331173521406046122": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",986],
+        "2452226948562393335": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "13503688893307029975": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11163107409437069532": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8272823732258536202": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12293786134765875615": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "15284262113150488297": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "3792945601873900927": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "10700011669103135203": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "3883845471211207871": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "12813978452097969536": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "1954052357826969119": ["convolution_gpu_bfyx_gemm_like",1],
+        "8655883535274781128": ["convolution_gpu_bfyx_gemm_like",2],
+        "5948701218437980356": ["convolution_gpu_bfyx_gemm_like",2],
+        "12388375914105990324": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "1383899865465106141": ["convolution_gpu_bfyx_gemm_like",2],
+        "11324851661119942609": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "10682918518101379579": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "1801731858063091191": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "8195881973746570408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10797908931694274013": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "9785114056964539323": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "8323445733669842657": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17517495652165026573": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "6343888265369366589": ["convolution_gpu_bfyx_os_iyx_osv16",693],
+        "15247381586316467097": ["convolution_gpu_bfyx_gemm_like",2],
+        "1095495157025479260": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "4149728557142033774": ["convolution_gpu_bfyx_os_iyx_osv16",946],
+        "7368916076070115064": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "488298169768725160": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "190530884420224257": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "6848989271874647093": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "3860603464276263676": ["convolution_gpu_bfyx_gemm_like",2],
+        "9019388470685749691": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "9954050478761346921": ["convolution_gpu_bfyx_gemm_like",2],
+        "8337820318779061494": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3180320769716158201": ["convolution_gpu_bfyx_os_iyx_osv16",862],
+        "16071723603031305677": ["convolution_gpu_bfyx_gemm_like",2],
+        "7187734276051878356": ["convolution_gpu_bfyx_gemm_like",2],
+        "7877332346656934022": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "11661208196482963286": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "14906458674793172507": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3167336012388169649": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "1532263118203058517": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "3388752887767453958": ["convolution_gpu_bfyx_gemm_like",2],
+        "12954154886708228545": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "16286085532892593349": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1907439276166837309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1889171157980977747": ["convolution_gpu_bfyx_gemm_like",2],
+        "15857087373591747006": ["convolution_gpu_bfyx_gemm_like",2],
+        "4894227264080887361": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "16044646335477470657": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5389189982064081933": ["convolution_gpu_bfyx_os_iyx_osv16",943],
+        "4202645222013675478": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5807196005360653656": ["convolution_gpu_bfyx_gemm_like",2],
+        "10001963042016663554": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "804195263636995800": ["convolution_gpu_bfyx_gemm_like",2],
+        "14435120971846098308": ["convolution_gpu_bfyx_os_iyx_osv16",568],
+        "15156525717629023944": ["convolution_gpu_bfyx_gemm_like",2],
+        "4892959859293355837": ["convolution_gpu_bfyx_gemm_like",1],
+        "9404677451270692749": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12962558681443556219": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "9839670675413379092": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "6949539207944972855": ["convolution_gpu_bfyx_gemm_like",2],
+        "9416186718345824095": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",2],
+        "16710010075465723498": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1572991986657256775": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like",2],
+        "10023279637210292010": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "14131851237755716991": ["convolution_gpu_bfyx_gemm_like",2],
+        "3179874645565098825": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5088898934670078153": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3355259926747524578": ["convolution_gpu_bfyx_gemm_like",2],
+        "14554225625951128811": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "14959566236432790882": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "994182747184593564": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11756881293845417212": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "2030309697153345387": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "18424912460022156378": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "3926585856863002495": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "15963038745470172423": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "9069334144391048686": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17928043901784474130": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9421927854269492263": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "7924408980408826942": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "5020788604681810984": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "15914512645931208899": ["convolution_gpu_bfyx_gemm_like",2],
+        "13762042713029963144": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11919129623429545762": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4871907623235871050": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "16820082917500285799": ["convolution_gpu_bfyx_gemm_like",2],
+        "3563614453014995411": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "1628593159980574595": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "11428599290755097395": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "8528750110601691390": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "15696910741835640150": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "14910223536998380801": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "15410074937424854348": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "2627779045483019709": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "2265784112305305260": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9454512817077883797": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "385046297070779752": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "16995873636564597028": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "11528310408333718862": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "12394049027081208902": ["convolution_gpu_bfyx_gemm_like",1],
+        "17026284168840448378": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "15713964605078748923": ["convolution_gpu_bfyx_gemm_like",2],
+        "13733327241591630239": ["convolution_gpu_bfyx_os_iyx_osv16",761],
+        "10774528268153772208": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "15160738482264643601": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "10169992769527680821": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "6620782733027313312": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "10930115765550856328": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "12228963567837353733": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "3727142736386026852": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "3041612155708729812": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11239754372812258455": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "4338023436590582323": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "3106591708459602370": ["convolution_gpu_bfyx_os_iyx_osv16",564],
+        "12238674883388043717": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "14043770215999952932": ["convolution_gpu_bfyx_gemm_like",2],
+        "14274685812676150168": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "14826791706471872785": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12987636957813312667": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10416622008071151225": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "10729288973933590396": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3600066510593746268": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "10306542963828398049": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2912858944747613525": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "9751582946441607796": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "17224181038411430675": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "6777045876155144709": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "7177837234452118325": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "16327433707667075261": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7994179151788368291": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "15154700439767512396": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "738850098651678143": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4356817283284529593": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4278280309700908015": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14916625550370402883": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "12107262410635772120": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4021558014531645922": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "3236003754884728510": ["convolution_gpu_bfyx_os_iyx_osv16",1048],
+        "12908594497114706897": ["convolution_gpu_bfyx_1x1",2],
+        "2057158988261512114": ["convolution_gpu_bfyx_1x1",2],
+        "4165036357594592683": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4927360358387344983": ["convolution_gpu_bfyx_gemm_like",1],
+        "5658664813683907476": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "9562527071055150197": ["convolution_gpu_bfyx_1x1",2],
+        "2162882863309264684": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "13078401519973360182": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17778091287904736965": ["convolution_gpu_bfyx_gemm_like",2],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "12577421746159122264": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "6288489890578212082": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10864011008000364415": ["convolution_gpu_bfyx_1x1",2],
+        "3160543867929843861": ["convolution_gpu_bfyx_1x1",2],
+        "517997325935712670": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13830605041347009953": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "5295693108687178880": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8059328623525062913": ["convolution_gpu_bfyx_gemm_like",2],
+        "9208964785762052001": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "1587501521145162454": ["convolution_gpu_bfyx_gemm_like",2],
+        "14544219140091420262": ["convolution_gpu_bfyx_gemm_like",1],
+        "3141886504884887200": ["convolution_gpu_bfyx_gemm_like",2],
+        "18142462471803295391": ["convolution_gpu_bfyx_1x1",2],
+        "4056971751486746551": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "18270587701371596297": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "14316077757957132678": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "18417288692814472127": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "8975333906619899020": ["convolution_gpu_bfyx_gemm_like",2],
+        "16033512206711124104": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7292351660229751817": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "8943913562339525413": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "11706446082856895571": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5572956736535433608": ["convolution_gpu_bfyx_1x1",2],
+        "15899192375330393731": ["convolution_gpu_bfyx_os_iyx_osv16",427],
+        "1419073145594317633": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "8651641584737798174": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "1778345646142852816": ["convolution_gpu_bfyx_gemm_like",2],
+        "5941298590926032148": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "2929190644951986399": ["convolution_gpu_bfyx_gemm_like",2],
+        "8421388456873652700": ["convolution_gpu_bfyx_gemm_like",2],
+        "2394023805427701338": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "8303211644727914658": ["convolution_gpu_bfyx_1x1",2],
+        "9423854233835016530": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15069906408448814772": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "13450061819089402572": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2242829490403202087": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9277176009071334860": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "1843555260471832708": ["convolution_gpu_bfyx_gemm_like",2],
+        "13352000946213986936": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "187352687850707150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7307271009495440764": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "12900949103593247293": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "12992061224471212714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10930640103080573253": ["convolution_gpu_bfyx_1x1",0],
+        "3872151366780051246": ["convolution_gpu_bfyx_gemm_like",1],
+        "4006884370026272807": ["convolution_gpu_bfyx_gemm_like",2],
+        "18017913952946745878": ["convolution_gpu_bfyx_gemm_like",1],
+        "10753540518493641553": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "16182470664818268848": ["convolution_gpu_bfyx_gemm_like",1],
+        "9076758673133996959": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "6726099352298108756": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "2339864165283480961": ["convolution_gpu_bfyx_1x1",2],
+        "1760391741350091665": ["convolution_gpu_bfyx_gemm_like",2],
+        "11627532066884923848": ["convolution_gpu_bfyx_1x1",2],
+        "13183380647506951324": ["convolution_gpu_bfyx_gemm_like",0],
+        "13190888313721073437": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12194037100109755112": ["convolution_gpu_bfyx_gemm_like",2],
+        "12134858519320245809": ["convolution_gpu_bfyx_1x1",2],
+        "9426665763007611385": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "10179916356323479080": ["convolution_gpu_bfyx_gemm_like",2],
+        "13483175684542464385": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12118387933632797428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13853630125050609175": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "11848462434662954749": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "7953255701516490034": ["convolution_gpu_bfyx_os_iyx_osv16",774],
+        "6071668124835539929": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5649082203775427830": ["convolution_gpu_bfyx_gemm_like",2],
+        "10294610483561043024": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16",882],
+        "4803370483104261655": ["convolution_gpu_bfyx_gemm_like",1],
+        "17649961873981897621": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15322609677356616580": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "13328911884191551889": ["convolution_gpu_bfyx_1x1",2],
+        "15155676074658242659": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "9378269524012289175": ["convolution_gpu_bfyx_gemm_like",2],
+        "15914058104244750036": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "584086621952390547": ["convolution_gpu_bfyx_gemm_like",2],
+        "12370729327673204804": ["convolution_gpu_bfyx_gemm_like",2],
+        "11356842300444410831": ["convolution_gpu_bfyx_os_iyx_osv16",659],
+        "7104309382120208659": ["convolution_gpu_bfyx_gemm_like",2],
+        "3239033622277917802": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "18180655791734632264": ["convolution_gpu_bfyx_gemm_like",2],
+        "17635171685500922207": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "12207503176295152756": ["convolution_gpu_bfyx_1x1",2],
+        "14349625788399542568": ["convolution_gpu_bfyx_gemm_like",1],
+        "726985753660756762": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "7870154008378361670": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "4133424990380177132": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "6556424924189200804": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "7706714181281908433": ["convolution_gpu_bfyx_gemm_like",2],
+        "7995820969034996638": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "8045367391487213749": ["convolution_gpu_bfyx_1x1",2],
+        "8689206546467098603": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10404725818204494388": ["convolution_gpu_bfyx_gemm_like",2],
+        "3216793152416217495": ["convolution_gpu_bfyx_gemm_like",2],
+        "8791285622784082122": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "7454366978268164047": ["convolution_gpu_bfyx_gemm_like",2],
+        "1569043950563130463": ["convolution_gpu_bfyx_gemm_like",1],
+        "16839741351990811959": ["convolution_gpu_bfyx_gemm_like",2],
+        "5012013738970489338": ["convolution_gpu_bfyx_1x1",2],
+        "15289152041466330689": ["convolution_gpu_bfyx_gemm_like",2],
+        "4447065688824381344": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "4861982518177129729": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "1718634913016284523": ["convolution_gpu_bfyx_1x1",2],
+        "1470933384474984858": ["convolution_gpu_bfyx_1x1",2],
+        "17515064188391421150": ["convolution_gpu_bfyx_gemm_like",1],
+        "3463959257726925426": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6664432489777052771": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "3870539490799697188": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "13471752029049484143": ["convolution_gpu_bfyx_gemm_like",2],
+        "7531346828150129063": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15065925414996398951": ["convolution_gpu_bfyx_1x1",2],
+        "1040030752340209480": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15047676717402283805": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "3806131437010910920": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12198263593657033426": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "11939914680143672459": ["fully_connected_gpu_fb_oi_ref",1],
+        "7106362077449435105": ["convolution_gpu_bfyx_gemm_like",0],
+        "5600807544955072308": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "16924006268301179157": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "13754408679115174221": ["convolution_gpu_bfyx_gemm_like",2],
+        "2939605281692583169": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "5770286476124511234": ["convolution_gpu_bfyx_gemm_like",1],
+        "2296581485980163665": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15133468875250992696": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11031569203645035546": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13809330759308309353": ["convolution_gpu_bfyx_gemm_like",2],
+        "4792351255949877935": ["convolution_gpu_bfyx_gemm_like",2],
+        "11690334177981352452": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "3646228701104397128": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "12776081190690731910": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "1345101751956733589": ["convolution_gpu_bfyx_gemm_like",2],
+        "84595904778810418": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "4834446692898125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "11565861421381730304": ["convolution_gpu_bfyx_os_iyx_osv16",1048],
+        "3118602494449249177": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "16913004986170202203": ["convolution_gpu_bfyx_gemm_like",2],
+        "16120988958246503683": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "13883044928774243663": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "16437124655147660375": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1018687388655376483": ["convolution_gpu_bfyx_gemm_like",1],
+        "3501667344669686338": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",2],
+        "13553263424160050064": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "17975017633455909321": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "12026482841341343242": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11893541520830049036": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "383721620126444793": ["convolution_gpu_bfyx_gemm_like",1],
+        "14050124896329573468": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5649150695527000655": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11666250400445971335": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "16577611471466452776": ["convolution_gpu_bfyx_gemm_like",2],
+        "49948277487706148": ["convolution_gpu_bfyx_1x1",2],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "11759426200341586247": ["convolution_gpu_bfyx_os_iyx_osv16",1],
+        "7578177053220150569": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "12978370505631031751": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "9175450649281374948": ["convolution_gpu_bfyx_os_iyx_osv16",106],
+        "12676167240795292217": ["convolution_gpu_bfyx_gemm_like",1],
+        "4279062247055842367": ["convolution_gpu_bfyx_gemm_like",1],
+        "17252589865292797082": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "7181154048972884375": ["convolution_gpu_bfyx_gemm_like",2],
+        "4091702228990140696": ["convolution_gpu_bfyx_gemm_like",1],
+        "7913076120244203725": ["convolution_gpu_bfyx_gemm_like",2],
+        "9407646138658641974": ["convolution_gpu_bfyx_gemm_like",2],
+        "1632416005093914709": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "8614534946699754256": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12972798847556569913": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16694984452720336415": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "6791806088355877039": ["convolution_gpu_bfyx_gemm_like",2],
+        "10488269059469838160": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "12831123539633580270": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11421180829679625737": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "16925721317097534009": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12864558900883069118": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18218755616248669884": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2983038203471784211": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "4678607855896512523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12977678792503377525": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8809017515482311843": ["convolution_gpu_bfyx_os_iyx_osv16",1036],
+        "17442105631503326136": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",0],
+        "17342198739672369885": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7015738038963065110": ["convolution_gpu_bfyx_gemm_like",2],
+        "11657946392097042544": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "18059267466971880386": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "6310724136390087834": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "11132679855317294753": ["convolution_gpu_bfyx_gemm_like",1],
+        "4889188980319017094": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "338716975932676215": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "2226745622763268469": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "7432142107544210174": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "5242271874488296527": ["convolution_gpu_bfyx_gemm_like",1],
+        "2530317332900569142": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "2936333406928424760": ["convolution_gpu_bfyx_1x1",2],
+        "4129722446574108695": ["convolution_gpu_bfyx_1x1",2],
+        "16986610822918634530": ["convolution_gpu_bfyx_1x1",2],
+        "14115742296883450319": ["convolution_gpu_bfyx_gemm_like",1],
+        "597650904461183283": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "4461989328775275994": ["convolution_gpu_bfyx_gemm_like",2],
+        "12397280593466519809": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "59356084516953804": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "4615708568396290002": ["convolution_gpu_bfyx_1x1",2],
+        "17705807503894740726": ["convolution_gpu_bfyx_gemm_like",2],
+        "15746620724134970969": ["convolution_gpu_bfyx_1x1",2],
+        "6254141935545262078": ["convolution_gpu_bfyx_gemm_like",1],
+        "17854208422879910606": ["convolution_gpu_bfyx_gemm_like",1],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "5352061583962489055": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1218323229202187514": ["convolution_gpu_bfyx_gemm_like",2],
+        "7481256533438761028": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "12319073009094248232": ["convolution_gpu_bfyx_gemm_like",2],
+        "10865695385270390803": ["convolution_gpu_bfyx_os_iyx_osv16",266],
+        "3711525118850629466": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7693459946348737411": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "1680468564927032670": ["convolution_gpu_bfyx_gemm_like",1],
+        "6323083153920795679": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "946479876892100082": ["convolution_gpu_bfyx_gemm_like",1],
+        "13472577372534605883": ["convolution_gpu_bfyx_gemm_like",1],
+        "3244675355773468991": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "16992405636352406660": ["convolution_gpu_bfyx_gemm_like",2],
+        "7689320135952025041": ["convolution_gpu_bfyx_gemm_like",0],
+        "18381791065890314250": ["convolution_gpu_bfyx_gemm_like",0],
+        "1841155673858789206": ["fully_connected_gpu_fb_oi_ref",1],
+        "6942016672941874829": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9440117898128288296": ["convolution_gpu_bfyx_gemm_like",2],
+        "11324651029379152442": ["convolution_gpu_bfyx_1x1",2],
+        "12531580106484042446": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "394778201589371681": ["convolution_gpu_bfyx_gemm_like",2],
+        "17830290099875088207": ["convolution_gpu_bfyx_gemm_like",1],
+        "16384186388687043048": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "7843508201826629532": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5720964268093705079": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "16588325081458426169": ["convolution_gpu_bfyx_gemm_like",2],
+        "3782239800777370325": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "6391201577234440562": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "378801963103874857": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "15817443774186015593": ["convolution_gpu_bfyx_1x1",2],
+        "12589440296742583335": ["convolution_gpu_bfyx_1x1",2],
+        "4156384238797998294": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "10787747981914307179": ["convolution_gpu_bfyx_1x1",0],
+        "2964705957088952872": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "2283157145557154450": ["convolution_gpu_bfyx_1x1",2],
+        "14483314305369207554": ["convolution_gpu_bfyx_1x1",2],
+        "9695024256541464964": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "3017411837779243878": ["convolution_gpu_bfyx_gemm_like",0],
+        "15914342421266687768": ["convolution_gpu_bfyx_gemm_like",2],
+        "1920070013712913772": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "4141005390823981166": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1186545671730357033": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "15820359925623438341": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "708452703070938673": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "15065019229949449623": ["convolution_gpu_bfyx_gemm_like",1],
+        "7977195117668583981": ["convolution_gpu_bfyx_gemm_like",2],
+        "7770000755097925765": ["convolution_gpu_bfyx_1x1",2],
+        "16729849855476690294": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "9390478179772073718": ["convolution_gpu_bfyx_gemm_like",1],
+        "14151747022287993729": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12822126914959112382": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "12802517759474139810": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9741607635826869269": ["convolution_gpu_bfyx_gemm_like",1],
+        "11799179287124317845": ["convolution_gpu_bfyx_gemm_like",1],
+        "17025324057045572535": ["convolution_gpu_bfyx_gemm_like",1],
+        "12990527753120735255": ["convolution_gpu_bfyx_gemm_like",0],
+        "4046830923427667342": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "7940369586324090841": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2934519615045138808": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "18067291256808591467": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3833510944499257797": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "18035673326929466074": ["convolution_gpu_bfyx_gemm_like",1],
+        "2273992727647793692": ["convolution_gpu_bfyx_gemm_like",1],
+        "677249604491773387": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13357365044448426880": ["convolution_gpu_bfyx_1x1",2],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",1],
+        "6142707387281700290": ["convolution_gpu_bfyx_gemm_like",2],
+        "10486000767830001094": ["convolution_gpu_bfyx_1x1",2],
+        "14045927407431718832": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "17742192339816511494": ["convolution_gpu_bfyx_gemm_like",2],
+        "1152691534728260611": ["convolution_gpu_bfyx_1x1",2],
+        "6217542346826403576": ["convolution_gpu_bfyx_1x1",2],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "5680236635030250712": ["convolution_gpu_bfyx_1x1",2],
+        "5159738930501638535": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4664983769199548480": ["convolution_gpu_bfyx_1x1",2],
+        "15675903059949404837": ["convolution_gpu_bfyx_1x1",2],
+        "18431306649860116380": ["convolution_gpu_bfyx_gemm_like",1],
+        "10894058425957901202": ["convolution_gpu_bfyx_1x1",2],
+        "15271783562528081169": ["convolution_gpu_bfyx_gemm_like",2],
+        "7581174843529024536": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "17084977396231597605": ["convolution_gpu_bfyx_gemm_like",1],
+        "8541982562061181756": ["convolution_gpu_bfyx_gemm_like",1],
+        "13735180250757239202": ["convolution_gpu_bfyx_gemm_like",2],
+        "7375461241315602473": ["convolution_gpu_bfyx_gemm_like",2],
+        "9062774198518904260": ["convolution_gpu_bfyx_gemm_like",2],
+        "8640150341228170279": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10437367877444543776": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9954050478761346921": ["convolution_gpu_bfyx_gemm_like",2],
+        "13960388312976163971": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "12510951219501865365": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "13804221028705631415": ["convolution_gpu_bfyx_gemm_like",2],
+        "1208161922424418734": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "14230493618724018658": ["convolution_gpu_bfyx_gemm_like",2],
+        "12339692995143159283": ["convolution_gpu_bfyx_gemm_like",2],
+        "17329287216741045059": ["convolution_gpu_bfyx_gemm_like",2],
+        "3499645386058307669": ["convolution_gpu_bfyx_gemm_like",1],
+        "16542318967217020315": ["convolution_gpu_bfyx_gemm_like",2],
+        "2421404763191415191": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8712136292276123857": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10576856554114055028": ["convolution_gpu_bfyx_gemm_like",2],
+        "3240102173773280414": ["convolution_gpu_bfyx_1x1",2],
+        "9099720270958987421": ["convolution_gpu_bfyx_1x1",2],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "17640725195881101275": ["convolution_gpu_bfyx_gemm_like",2],
+        "3563872903821081702": ["convolution_gpu_bfyx_gemm_like",1],
+        "8155268141318893606": ["convolution_gpu_bfyx_gemm_like",1],
+        "13038533272699602337": ["convolution_gpu_bfyx_gemm_like",2],
+        "8317673282128335201": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "1822096761703761792": ["convolution_gpu_bfyx_1x1",2],
+        "15031155621982459860": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "9601412379897937608": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "4533786844080178561": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3816674884393241704": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "5040095338370816349": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "11857037689248685487": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "16765994345605657100": ["convolution_gpu_bfyx_1x1",2],
+        "16958329690837977102": ["convolution_gpu_bfyx_gemm_like",2],
+        "875400109066360897": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14098811155652990436": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "15363606233048272809": ["convolution_gpu_bfyx_1x1",2],
+        "3220280315905987373": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "15548847099740441551": ["convolution_gpu_bfyx_1x1",2],
+        "7056293586529818253": ["convolution_gpu_bfyx_gemm_like",2],
+        "1788455099959676873": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "8857763129101380288": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "12051595062513871723": ["convolution_gpu_bfyx_1x1",2],
+        "17025182465337728023": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2967481531952454828": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "4135068756462147853": ["convolution_gpu_bfyx_gemm_like",1],
+        "9692654253261175490": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "10837496380266058422": ["convolution_gpu_bfyx_gemm_like",2],
+        "17174919737114915467": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13025361884606488732": ["convolution_gpu_bfyx_gemm_like",2],
+        "1920042803083729276": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "2527276292172180386": ["convolution_gpu_bfyx_gemm_like",2],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "16587061389996963349": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "12071914115316550349": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "17318287523550546026": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2762489653422414995": ["convolution_gpu_bfyx_gemm_like",2],
+        "5955575949957198434": ["convolution_gpu_bfyx_gemm_like",1],
+        "12024817951074673335": ["convolution_gpu_bfyx_1x1",2],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16",478],
+        "16158139166784964096": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6300691162962736560": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "632116056424249698": ["convolution_gpu_bfyx_gemm_like",1],
+        "12522495848240087966": ["convolution_gpu_bfyx_gemm_like",2],
+        "4754967381316623440": ["convolution_gpu_bfyx_gemm_like",2],
+        "7565867291827884997": ["convolution_gpu_bfyx_gemm_like",2],
+        "17891499682354369344": ["convolution_gpu_bfyx_gemm_like",2],
+        "2111669705686676421": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "17364712285968437405": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "12700372241799686527": ["convolution_gpu_bfyx_gemm_like",2],
+        "11083993858285515074": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "12501619443242354860": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "18215430801133520364": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7338229552985076723": ["convolution_gpu_bfyx_gemm_like",2],
+        "1458615259705605525": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "12068797674575015662": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "2832268621630415376": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17961702508543961900": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "273242667845386507": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12015336418727455195": ["convolution_gpu_bfyx_1x1",2],
+        "5600128039063009632": ["convolution_gpu_bfyx_gemm_like",1],
+        "9737565171095493297": ["convolution_gpu_bfyx_gemm_like",1],
+        "3350601287664242323": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "7005509036795164602": ["convolution_gpu_bfyx_1x1",2],
+        "16768797136991242472": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "11560634267092054110": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16949056117405140365": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "2114599010013594942": ["convolution_gpu_bfyx_gemm_like",1],
+        "1230262279011217327": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2369451367723962073": ["convolution_gpu_bfyx_1x1",2],
+        "10899110544832584656": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "15412447128995361859": ["convolution_gpu_bfyx_gemm_like",0],
+        "12259844988981080505": ["convolution_gpu_bfyx_gemm_like",2],
+        "11120846960057008937": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "9580986168276580598": ["convolution_gpu_bfyx_gemm_like",2],
+        "3538679039078582272": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "3622409603053918029": ["convolution_gpu_bfyx_gemm_like",1],
+        "4672441137336208890": ["convolution_gpu_bfyx_gemm_like",2],
+        "3868149953087814447": ["convolution_gpu_bfyx_gemm_like",2],
+        "4571404165794634411": ["convolution_gpu_bfyx_1x1",2],
+        "4974320417566990034": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12946531140050029900": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "15106614232165315070": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3774285301357006334": ["convolution_gpu_bfyx_gemm_like",2],
+        "2930898141522848681": ["convolution_gpu_bfyx_1x1",2],
+        "9144487908815767824": ["convolution_gpu_bfyx_1x1",2],
+        "6114241186364821679": ["convolution_gpu_bfyx_gemm_like",2],
+        "2878824076934639346": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "4773123925616969670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10019470094545733255": ["convolution_gpu_bfyx_gemm_like",1],
+        "11359409533744011242": ["convolution_gpu_bfyx_gemm_like",2],
+        "9999425239167488495": ["convolution_gpu_bfyx_gemm_like",2],
+        "9525535670799618110": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "17724604495865223459": ["convolution_gpu_bfyx_gemm_like",2],
+        "345043289576587800": ["convolution_gpu_bfyx_1x1",2],
+        "14466032674083938714": ["convolution_gpu_bfyx_gemm_like",1],
+        "2438261005924916746": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "12024143207855886580": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "7322472892320910654": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "5738835498104275267": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "14808895254077106198": ["convolution_gpu_bfyx_gemm_like",2],
+        "8519354640245415816": ["convolution_gpu_bfyx_gemm_like",1],
+        "8761283252495354972": ["convolution_gpu_bfyx_gemm_like",2],
+        "15489746763312425915": ["convolution_gpu_bfyx_gemm_like",2],
+        "8002233052700666718": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "7264274394359484318": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "15161053469199826008": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4325081100430903742": ["convolution_gpu_bfyx_gemm_like",2],
+        "5083163738120585821": ["fully_connected_gpu_fb_oi_ref",1],
+        "17490471699618303993": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "4387041763614917736": ["convolution_gpu_bfyx_gemm_like",1],
+        "3101087806792514129": ["convolution_gpu_bfyx_1x1",2],
+        "9043982883185435219": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "18094205332383644037": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "973966345068677905": ["convolution_gpu_bfyx_1x1",2],
+        "14973431782875808802": ["convolution_gpu_bfyx_gemm_like",2],
+        "10280619408766255552": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "10171373375072694210": ["convolution_gpu_bfyx_1x1",2],
+        "3571959174116404960": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12727541507197887360": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "10811837819834149164": ["convolution_gpu_bfyx_gemm_like",1],
+        "721174714308243785": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2321148334382088982": ["convolution_gpu_bfyx_gemm_like",2],
+        "10100237101982273901": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "9803492989444302959": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "17889864541794448203": ["convolution_gpu_bfyx_1x1",2],
+        "6307939332939714967": ["convolution_gpu_bfyx_1x1",2],
+        "16888412539296862194": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "13621339501067135142": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "7119182041840303390": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4644580321919256401": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "3568514382399560386": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "1885075753696445410": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11845189428639322474": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "2173720698351153121": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "8690196189594920365": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "4444730303823507621": ["convolution_gpu_bfyx_gemm_like",0],
+        "1485662490111767875": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "10128390168715530898": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "18186615266760475767": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "13161997040644039778": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "17021925795809437171": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10816637153861630723": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "11727227430687227444": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16293465561256937726": ["convolution_gpu_bfyx_gemm_like",2],
+        "9119618606914671839": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7840653268996892538": ["convolution_gpu_bfyx_gemm_like",2],
+        "12003323477818208825": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "9131235538209388787": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "8921636651939679647": ["convolution_gpu_bfyx_1x1",1],
+        "5592526760253524303": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "3337625924046561031": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "9835739612255048978": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "7900926714874404219": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "883436333317162926": ["convolution_gpu_bfyx_1x1",2],
+        "12348602762263193288": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "1318571118468536310": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "15602218079503030465": ["convolution_gpu_bfyx_gemm_like",2],
+        "6195916781434462809": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15035800097152337587": ["convolution_gpu_bfyx_gemm_like",2],
+        "150132162949295379": ["convolution_gpu_bfyx_1x1",2],
+        "15488340031228619748": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11999246609107242706": ["convolution_gpu_bfyx_gemm_like",2],
+        "1914964404168211864": ["convolution_gpu_bfyx_gemm_like",2],
+        "8398910340371320955": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7474592508575297101": ["convolution_gpu_bfyx_1x1",2],
+        "16487774205195979355": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "16027853590391209100": ["convolution_gpu_bfyx_gemm_like",2],
+        "13602140021189675477": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "18393312550272875456": ["convolution_gpu_bfyx_1x1",2],
+        "12352923639732112511": ["convolution_gpu_bfyx_os_iyx_osv16",79],
+        "742689192890486807": ["convolution_gpu_bfyx_gemm_like",2],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "5754396201681434378": ["convolution_gpu_bfyx_1x1",2],
+        "11241838709529552265": ["convolution_gpu_bfyx_gemm_like",2],
+        "7603319690872333930": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "15943141845766932879": ["convolution_gpu_bfyx_1x1",2],
+        "4933831571091731212": ["convolution_gpu_bfyx_gemm_like",1],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "11807282628372660280": ["convolution_gpu_bfyx_1x1",2],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "4916769804113823482": ["convolution_gpu_bfyx_1x1",1],
+        "10626341369865893888": ["convolution_gpu_bfyx_gemm_like",2],
+        "6156831095718536092": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "952318454591754214": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "14147460733160099960": ["convolution_gpu_bfyx_gemm_like",1],
+        "14578867494693499627": ["convolution_gpu_bfyx_gemm_like",2],
+        "3541538046227217664": ["convolution_gpu_bfyx_gemm_like",1],
+        "5393510569127725391": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "17948637243158994878": ["convolution_gpu_bfyx_gemm_like",2],
+        "7465681710653503161": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9452470718398027950": ["convolution_gpu_bfyx_os_iyx_osv16",928],
+        "17344974951998490453": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "14031009077471784948": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10771803503544737080": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "3854114166348568039": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "10292349730148518173": ["convolution_gpu_bfyx_os_iyx_osv16",694],
+        "12757611260347801001": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "17921973525603585874": ["convolution_gpu_bfyx_gemm_like",2],
+        "10292585962794261197": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "592245952014430043": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "10706267011822108376": ["convolution_gpu_bfyx_1x1",2],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "11872943152839631823": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "16364494883229084045": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "3266557807508325807": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15924916465272239832": ["convolution_gpu_bfyx_gemm_like",2],
+        "10783630257421062891": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4759671642533786591": ["convolution_gpu_bfyx_gemm_like",2],
+        "1497560475414454618": ["convolution_gpu_bfyx_gemm_like",0],
+        "5374969798377773063": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "4079026972040047969": ["convolution_gpu_bfyx_gemm_like",2],
+        "9947449295659685973": ["convolution_gpu_bfyx_gemm_like",2],
+        "13710319251108632115": ["convolution_gpu_bfyx_1x1",2],
+        "4914435717288687793": ["convolution_gpu_bfyx_1x1",2],
+        "12309955719964788034": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "15078168059698267650": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "16117448559783537844": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "14614844213016502202": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "11031358859656806724": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16386955278777720573": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "18302892230881285207": ["convolution_gpu_bfyx_gemm_like",1],
+        "12514693341682532560": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "9120377367517042357": ["convolution_gpu_bfyx_1x1",2],
+        "6318228858846223186": ["convolution_gpu_bfyx_1x1",2],
+        "9372916528346260712": ["convolution_gpu_bfyx_gemm_like",0],
+        "16425665058951535484": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "6911215749850066204": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "9942099207256025216": ["convolution_gpu_bfyx_gemm_like",1],
+        "14447191095937730964": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "5568753513029409478": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "9714508918051740792": ["convolution_gpu_bfyx_gemm_like",1],
+        "17370158297470557151": ["convolution_gpu_bfyx_1x1",2],
+        "17711453305763476458": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "7880845322716481548": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4264284648458489052": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "8540111719936129376": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "11007944497812650617": ["convolution_gpu_bfyx_gemm_like",2],
+        "9406763539724266157": ["convolution_gpu_bfyx_1x1",2],
+        "4290840152278060614": ["convolution_gpu_bfyx_gemm_like",2],
+        "8787438180071123604": ["convolution_gpu_bfyx_gemm_like",1],
+        "1704404203639481753": ["convolution_gpu_bfyx_gemm_like",2],
+        "2008424849669196225": ["convolution_gpu_bfyx_1x1",2],
+        "10471519687597963116": ["convolution_gpu_bfyx_gemm_like",1],
+        "15690161340392005765": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13488495920546871271": ["convolution_gpu_bfyx_os_iyx_osv16",851],
+        "959260710517842876": ["convolution_gpu_bfyx_gemm_like",2],
+        "15078590909693331731": ["convolution_gpu_bfyx_gemm_like",2],
+        "10425889533411573166": ["convolution_gpu_bfyx_gemm_like",2],
+        "6928136130626403937": ["convolution_gpu_bfyx_gemm_like",2],
+        "3141773224039276177": ["convolution_gpu_bfyx_1x1",2],
+        "12494969618927201911": ["fully_connected_gpu_yxfb_ref",0],
+        "15117880293418979489": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "6964383468476265892": ["convolution_gpu_bfyx_1x1",2],
+        "16409729623371222748": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "7969441643457570812": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "938222258370511187": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "5922142661777925178": ["convolution_gpu_bfyx_gemm_like",2],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "994842991399671507": ["convolution_gpu_bfyx_gemm_like",1],
+        "17806712457019493207": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "10730222715353420212": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "5321698540631249776": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "1596353239542510685": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "4124478505694604763": ["convolution_gpu_bfyx_1x1",2],
+        "3398322619007806698": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "4429109491655891299": ["convolution_gpu_bfyx_gemm_like",1],
+        "6297802534570892679": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "11265472910579659280": ["convolution_gpu_bfyx_gemm_like",1],
+        "15678385128478075284": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5538883245745495145": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "8860443174052454332": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13575423234109624706": ["fully_connected_gpu_bs_f_bsv16_af8_vload",0],
+        "9533360488591027707": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "15809639778580769565": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3374410641320310726": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "2920840796593281126": ["convolution_gpu_bfyx_gemm_like",2],
+        "16065744898134487748": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "3409043224171087168": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "14114380593731243715": ["convolution_gpu_bfyx_os_iyx_osv16",1038],
+        "1418595171949196661": ["convolution_gpu_bfyx_gemm_like",2],
+        "15187035463799513424": ["convolution_gpu_bfyx_1x1",2],
+        "12480527132372884168": ["convolution_gpu_bfyx_1x1",0],
+        "9152433123828445089": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1202292109713947702": ["convolution_gpu_bfyx_gemm_like",2],
+        "16772854836230971016": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "13646974121952099172": ["convolution_gpu_bfyx_gemm_like",2],
+        "14424566003632608852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1905758333157310570": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15048584393463312977": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "18132952464279667664": ["convolution_gpu_bfyx_1x1",2],
+        "60267878504897170": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "863057075064640334": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "9305861997313663528": ["convolution_gpu_bfyx_gemm_like",1],
+        "13820498543284008286": ["convolution_gpu_bfyx_gemm_like",2],
+        "1351033666248868977": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "1867337342417952506": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "1497127399271219422": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11942736969933408358": ["convolution_gpu_bfyx_gemm_like",1],
+        "18184621367843960190": ["convolution_gpu_bfyx_gemm_like",2],
+        "4734389463002799056": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6733731409232284409": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "5911282942658469852": ["convolution_gpu_bfyx_gemm_like",1],
+        "3017891343734146267": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "13092232276822302626": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "14123081378489325832": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "3154539627593235077": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "12801481303602178879": ["convolution_gpu_bfyx_gemm_like",1],
+        "579781312141502576": ["convolution_gpu_bfyx_1x1",2],
+        "5214654427283761256": ["convolution_gpu_bfyx_gemm_like",0],
+        "12625112690264223217": ["convolution_gpu_bfyx_gemm_like",2],
+        "9441060601228656341": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7548031489690889629": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "14159596290442764023": ["convolution_gpu_bfyx_gemm_like",1],
+        "3070859615622845671": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "14054116974002669018": ["convolution_gpu_bfyx_1x1",1],
+        "14487842225000203929": ["convolution_gpu_bfyx_gemm_like",2],
+        "17585206779958265260": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "331661172067077796": ["convolution_gpu_bfyx_1x1",2],
+        "11988546375476924356": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "5195511638783481084": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "7692849839965441330": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "15859493313686060349": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "15726902746983125797": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "7274179284676568361": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "15493488989417521388": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "1008476023750261156": ["convolution_gpu_bfyx_1x1",2],
+        "10923480230259977438": ["convolution_gpu_bfyx_1x1",2],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "18103534417093702556": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8036474422877454869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7208008921815475393": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9616636708366808604": ["convolution_gpu_bfyx_gemm_like",2],
+        "1040650352205493707": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "18135307303959376082": ["convolution_gpu_bfyx_gemm_like",2],
+        "14082448162400225052": ["convolution_gpu_bfyx_1x1",2],
+        "11800783548769329949": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10308431308942416781": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "8251544171504007740": ["convolution_gpu_bfyx_gemm_like",2],
+        "12177387334053203378": ["convolution_gpu_bfyx_gemm_like",2],
+        "2321767794934000238": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "13144385730409574259": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "11756650366229979428": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6931953332823066530": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13124342334495538095": ["convolution_gpu_bfyx_gemm_like",2],
+        "8444259010311137762": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "7397341452130124383": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "13815395589135469450": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5840254078917931433": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15799159401545270696": ["convolution_gpu_bfyx_gemm_like",1],
+        "4435224497850514394": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17636500109629107732": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "15961487889420208188": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "15530407024531326375": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "16770615142634470903": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "9101903304994333336": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "18275601715050791851": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "787203599734115483": ["convolution_gpu_bfyx_1x1",0],
+        "6856130385095139346": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "16767392067294252396": ["convolution_gpu_bfyx_gemm_like",2],
+        "4903592553439092472": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "13369603621524676979": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9468684953949274635": ["convolution_gpu_bfyx_gemm_like",0],
+        "16698547937652264447": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4118073384938355655": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "5308128387928804050": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11055049031355432623": ["convolution_gpu_bfyx_gemm_like",2],
+        "10682300249493137042": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "13781423818051299677": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "17224104246148265328": ["convolution_gpu_bfyx_gemm_like",2],
+        "3308770992373192529": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "7650375560336513366": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "8543619733732987550": ["convolution_gpu_bfyx_gemm_like",1],
+        "15223164574152266895": ["convolution_gpu_bfyx_1x1",2],
+        "9195732599757736182": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "9056038338958199256": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16667887002111125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "16717713360264747483": ["convolution_gpu_bfyx_gemm_like",2],
+        "13738760763969959522": ["convolution_gpu_bfyx_gemm_like",1],
+        "9205978149692979955": ["convolution_gpu_bfyx_gemm_like",2],
+        "14362876471450307424": ["convolution_gpu_bfyx_1x1",2],
+        "7868973874302246233": ["convolution_gpu_bfyx_gemm_like",2],
+        "142329025839464842": ["convolution_gpu_bfyx_1x1",2],
+        "12253049204822930675": ["convolution_gpu_bfyx_gemm_like",2],
+        "5349415632630235233": ["convolution_gpu_bfyx_1x1",2],
+        "2727175120437582536": ["convolution_gpu_bfyx_gemm_like",1],
+        "631489011812924153": ["convolution_gpu_bfyx_1x1",2],
+        "14331658870024759698": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "10808909442136736629": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "16081386644309102158": ["convolution_gpu_bfyx_gemm_like",2],
+        "5044721291675005144": ["convolution_gpu_bfyx_1x1",2],
+        "15914107501176673997": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9314293064351558241": ["convolution_gpu_bfyx_gemm_like",2],
+        "9299299311101549958": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "142270860894725256": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "13483088320871913126": ["convolution_gpu_bfyx_gemm_like",2],
+        "3699344686791530101": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9767294641786972359": ["convolution_gpu_bfyx_gemm_like",2],
+        "4104562704039821482": ["convolution_gpu_bfyx_1x1",2],
+        "2204178900998688268": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13833960927635646899": ["convolution_gpu_bfyx_gemm_like",1],
+        "1082586642383386489": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "73865742350616903": ["convolution_gpu_bfyx_gemm_like",1],
+        "11451740938287179908": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "13893808009363736870": ["convolution_gpu_bfyx_gemm_like",0],
+        "5364060938737428149": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "8532217744217419503": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1982176363226079588": ["convolution_gpu_bfyx_gemm_like",2],
+        "12641170321047008726": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "7669403041163460089": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "13734043898517059207": ["convolution_gpu_bfyx_gemm_like",2],
+        "6817494598328071314": ["convolution_gpu_bfyx_gemm_like",2],
+        "13800760323805415740": ["convolution_gpu_bfyx_gemm_like",2],
+        "7585184325339753737": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "9243949750444156746": ["convolution_gpu_bfyx_gemm_like",1],
+        "9545968464906009869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16748662918272106932": ["convolution_gpu_bfyx_gemm_like",1],
+        "11828175723996627443": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "1390379098099686972": ["convolution_gpu_bfyx_1x1",2],
+        "5339985303398206057": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "669771152920944125": ["convolution_gpu_bfyx_gemm_like",2],
+        "6673966852801136416": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "70580716590540876": ["convolution_gpu_bfyx_gemm_like",1],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "15201438563802430490": ["fully_connected_gpu_fb_oi_ref",2],
+        "9488453013746383896": ["convolution_gpu_bfyx_gemm_like",0],
+        "314054598858070952": ["convolution_gpu_bfyx_gemm_like",2],
+        "6203765709597125063": ["convolution_gpu_bfyx_gemm_like",2],
+        "7060804814325505165": ["convolution_gpu_bfyx_gemm_like",2],
+        "2581414750854621875": ["convolution_gpu_bfyx_gemm_like",2],
+        "16882092367103683293": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16911464046178654033": ["convolution_gpu_bfyx_1x1",2],
+        "16341722570340169855": ["convolution_gpu_bfyx_1x1",2],
+        "17791024851737594885": ["convolution_gpu_bfyx_1x1",2],
+        "4190912926126844643": ["convolution_gpu_bfyx_1x1",2],
+        "17037416417174266088": ["convolution_gpu_bfyx_gemm_like",0],
+        "4880150897829846031": ["convolution_gpu_bfyx_1x1",2],
+        "3976736548270395981": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "7235358742317442134": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3105425187506203551": ["convolution_gpu_bfyx_1x1",2],
+        "9111988592015450418": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "3806761527342944195": ["convolution_gpu_bfyx_gemm_like",2],
+        "8490260671996115530": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14122213471825630433": ["convolution_gpu_bfyx_gemm_like",1],
+        "11305232900158601613": ["convolution_gpu_bfyx_1x1",2],
+        "6603778920476932267": ["convolution_gpu_bfyx_gemm_like",1],
+        "11640225461345567929": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "9480653639044390919": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7349880498513046830": ["convolution_gpu_bfyx_1x1",2],
+        "17037462814585846902": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "13011676362747785816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16566214123371867456": ["convolution_gpu_bfyx_gemm_like",2],
+        "10218763091060511457": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "1074748462756364699": ["fully_connected_gpu_fb_oi_ref",1],
+        "11418379777288974452": ["convolution_gpu_bfyx_gemm_like",2],
+        "2783577080556699089": ["convolution_gpu_bfyx_gemm_like",1],
+        "8913823292181409151": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "16075006181495932250": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13468081302022888489": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "12473600360154597915": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "2920322372993101148": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8922929126299811091": ["convolution_gpu_bfyx_1x1",2],
+        "15814015810740458605": ["convolution_gpu_bfyx_1x1",2],
+        "9869959062341950047": ["convolution_gpu_bfyx_1x1",2],
+        "2294800960010879540": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "13485300684443803732": ["convolution_gpu_bfyx_os_iyx_osv16",985],
+        "15991460001131903561": ["convolution_gpu_bfyx_gemm_like",2],
+        "6648876837655776653": ["convolution_gpu_bfyx_1x1",2],
+        "17281826959243966826": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "135072053401934228": ["convolution_gpu_bfyx_1x1",2],
+        "2477849395789783501": ["convolution_gpu_bfyx_gemm_like",2],
+        "17109520309574369561": ["convolution_gpu_bfyx_gemm_like",2],
+        "10128120599276549920": ["convolution_gpu_bfyx_1x1",2],
+        "8431759922045602848": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10751536136794650334": ["convolution_gpu_bfyx_gemm_like",2],
+        "14502856487639608696": ["convolution_gpu_bfyx_gemm_like",2],
+        "4959403414256988744": ["convolution_gpu_bfyx_gemm_like",0],
+        "6423785822515265784": ["convolution_gpu_bfyx_gemm_like",2],
+        "826850797666395121": ["convolution_gpu_bfyx_gemm_like",1],
+        "8700574100180128776": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12068974703657294908": ["convolution_gpu_bfyx_1x1",2],
+        "1122856374602590533": ["convolution_gpu_bfyx_1x1",2],
+        "13308187548669026714": ["convolution_gpu_bfyx_1x1",2],
+        "15192230303376521834": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "11955992313739654625": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "15188570678726970998": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4718716595177056289": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "2728938624042183713": ["convolution_gpu_bfyx_gemm_like",2],
+        "15190508870639648203": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6398819277350155011": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8170998059688907013": ["convolution_gpu_bfyx_1x1",2],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",0],
+        "7974670633697926450": ["convolution_gpu_bfyx_1x1",2],
+        "6193161166790398003": ["convolution_gpu_bfyx_gemm_like",2],
+        "15773157615731010456": ["convolution_gpu_bfyx_gemm_like",2],
+        "4772696293208603817": ["convolution_gpu_bfyx_gemm_like",1],
+        "9423958333298993923": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "15829095120243431195": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "856877003890134554": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "778476198101178556": ["convolution_gpu_bfyx_gemm_like",1],
+        "11130439225010714550": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "1563987925712579649": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15178921033274918199": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10879218241103462088": ["convolution_gpu_bfyx_gemm_like",1],
+        "852015206582470545": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3779229442395464456": ["convolution_gpu_bfyx_gemm_like",1],
+        "16393176054374397767": ["convolution_gpu_bfyx_gemm_like",0],
+        "9193880745263317167": ["convolution_gpu_bfyx_gemm_like",2],
+        "4436244774193918646": ["fully_connected_gpu_fb_oi_ref",1],
+        "16789245987103323406": ["convolution_gpu_bfyx_gemm_like",2],
+        "12076058470574246054": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "8505040075968411726": ["convolution_gpu_bfyx_gemm_like",1],
+        "18122858611264877646": ["convolution_gpu_bfyx_gemm_like",1],
+        "7715649642603303319": ["convolution_gpu_bfyx_1x1",2],
+        "4138968242532400395": ["convolution_gpu_bfyx_gemm_like",1],
+        "9939234037869927090": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "6104380778870471127": ["convolution_gpu_bfyx_1x1",2],
+        "9383182168277796969": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "6584960721513702502": ["convolution_gpu_bfyx_gemm_like",1],
+        "12947341728489226671": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10130171279527667782": ["convolution_gpu_bfyx_gemm_like",2],
+        "10861769381993948050": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17726079670612220433": ["convolution_gpu_bfyx_gemm_like",0],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "18416908414174464784": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "10316451248440741901": ["convolution_gpu_bfyx_gemm_like",2],
+        "2817919813339364130": ["convolution_gpu_bfyx_gemm_like",1],
+        "13330734840729670622": ["convolution_gpu_bfyx_gemm_like",0],
+        "17216583849049249733": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "17907223570737272640": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "9530116228032101908": ["convolution_gpu_bfyx_1x1",2],
+        "3603187029740446600": ["convolution_gpu_bfyx_gemm_like",2],
+        "5758133252959371492": ["convolution_gpu_bfyx_gemm_like",2],
+        "875296362957469305": ["convolution_gpu_bfyx_gemm_like",1],
+        "537074122417021898": ["convolution_gpu_bfyx_gemm_like",2],
+        "3202085450628781999": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "15231987838322151865": ["convolution_gpu_bfyx_1x1",2],
+        "6988492019664525206": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "7107677063657303327": ["convolution_gpu_bfyx_1x1",2],
+        "18221867262301937903": ["convolution_gpu_bfyx_1x1",2],
+        "5269172622193124300": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "2921118493468368908": ["convolution_gpu_bfyx_gemm_like",1],
+        "3501882025888946886": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "8792010676469476740": ["convolution_gpu_bfyx_gemm_like",2],
+        "15879172437519876393": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7171904645566467208": ["convolution_gpu_bfyx_gemm_like",2],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "11669828823444745889": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "16293101831324587788": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12868739680413736657": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "3221469860582147955": ["convolution_gpu_bfyx_gemm_like",2],
+        "16395067736440127496": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "5941092474669713339": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "4465701487417893814": ["convolution_gpu_bfyx_gemm_like",1],
+        "7800015766976654402": ["convolution_gpu_bfyx_gemm_like",0],
+        "1752185056297124917": ["convolution_gpu_bfyx_1x1",1],
+        "13486084204140096478": ["convolution_gpu_bfyx_gemm_like",1],
+        "9502195532658935521": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14680730265621679042": ["convolution_gpu_bfyx_os_iyx_osv16",758],
+        "11198301748997371475": ["convolution_gpu_bfyx_gemm_like",1],
+        "7393601059996816014": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16610284927818475574": ["convolution_gpu_bfyx_gemm_like",2],
+        "3573490922300056520": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "16944335478353845609": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "6548949901446632697": ["convolution_gpu_bfyx_1x1",2],
+        "13856271274572142709": ["convolution_gpu_bfyx_gemm_like",1],
+        "13526488884846845330": ["convolution_gpu_bfyx_gemm_like",2],
+        "16616945998593626851": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4403753181729432604": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "3820661057776133570": ["convolution_gpu_bfyx_1x1",2],
+        "14985236276429954162": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8096131027165540886": ["convolution_gpu_bfyx_gemm_like",2],
+        "9232653317479846765": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "5042176052323856983": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5303970743736042689": ["convolution_gpu_bfyx_gemm_like",2],
+        "1984152634309440563": ["convolution_gpu_bfyx_gemm_like",2],
+        "6577505360421510286": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "14025235562200209723": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "10722782762733112118": ["convolution_gpu_bfyx_1x1",2],
+        "3116068331849795558": ["convolution_gpu_bfyx_gemm_like",2],
+        "60509335250891515": ["convolution_gpu_bfyx_gemm_like",2],
+        "14389719202147508599": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "10170577772376890221": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "11020315012951440351": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "8767817856303586064": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "17219920118109316867": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "10893432143734884603": ["convolution_gpu_bfyx_gemm_like",2],
+        "4353842547963164546": ["convolution_gpu_bfyx_1x1",2],
+        "10090036431487700311": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "9328223957245552723": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2173867324489962689": ["convolution_gpu_bfyx_gemm_like",1],
+        "2912098199463107173": ["convolution_gpu_bfyx_1x1",2],
+        "7232326270078161768": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "14805540705424073865": ["convolution_gpu_bfyx_gemm_like",2],
+        "13590444711975157776": ["convolution_gpu_bfyx_gemm_like",1],
+        "543472136359161929": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1044978617045366709": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "11337525286386930242": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "5440983284868981549": ["convolution_gpu_bfyx_gemm_like",2],
+        "10724501418439612080": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "8792202318168046223": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4056979460327024961": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "3477539135137665170": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "938848188161536107": ["convolution_gpu_bfyx_1x1",0],
+        "4455369117448405874": ["convolution_gpu_bfyx_1x1",2],
+        "16871004845988227014": ["convolution_gpu_bfyx_1x1",2],
+        "18235209540858013173": ["convolution_gpu_bfyx_1x1",2],
+        "6428098122005804378": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "11834361584875491425": ["convolution_gpu_bfyx_1x1",1],
+        "7082007579524697455": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3522383297921565178": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "10135458965276110244": ["convolution_gpu_bfyx_1x1",2],
+        "10544034939133448916": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5291011077679733990": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7056030150365552588": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "16235115911229280717": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "14026537760442360645": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "101401523793806394": ["convolution_gpu_bfyx_gemm_like",2],
+        "14204609663091442879": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13902214851539825156": ["convolution_gpu_bfyx_gemm_like",0],
+        "16361932270527364507": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10055549084854766170": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "10151922632636937118": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9891428775774615719": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "14206076551739831333": ["convolution_gpu_bfyx_gemm_like",1],
+        "818998169319147148": ["convolution_gpu_bfyx_gemm_like",1],
+        "5582450255753679095": ["convolution_gpu_bfyx_1x1",2],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "15003778740401601065": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17039993918927377002": ["convolution_gpu_bfyx_os_iyx_osv16",429],
+        "2362092095402043749": ["convolution_gpu_bfyx_gemm_like",1],
+        "10991423760161409883": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3860667078458481972": ["convolution_gpu_bfyx_gemm_like",1],
+        "4849343880559509889": ["convolution_gpu_bfyx_1x1",2],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "4523064418696274869": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "15728009639807698634": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "2968439898708528834": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6713985030102340818": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7369903937189508744": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "6509758095668864050": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8859895010324601937": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "12782932626966309185": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "13839116996827687373": ["convolution_gpu_bfyx_gemm_like",1],
+        "1287490919205560806": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "15209909241815414156": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "8561261337239934159": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "7585785802379042424": ["convolution_gpu_bfyx_1x1",2],
+        "4848143712599565301": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "8270591002934311024": ["convolution_gpu_bfyx_1x1",2],
+        "12995903177757437362": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "5963901433137582265": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7602222004475424358": ["convolution_gpu_bfyx_gemm_like",1],
+        "4228437925117070319": ["convolution_gpu_bfyx_1x1",2],
+        "2856601829807186494": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16000753982895054944": ["convolution_gpu_bfyx_gemm_like",1],
+        "16934879647229234163": ["convolution_gpu_bfyx_gemm_like",2],
+        "11724225282274130518": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "4238885454989272754": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "14999920879568237166": ["convolution_gpu_bfyx_1x1",2],
+        "6334639534663495263": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "6863331059471727622": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "13448845356783404653": ["convolution_gpu_bfyx_gemm_like",1],
+        "12946540633035976364": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "17764033613416389758": ["convolution_gpu_bfyx_gemm_like",1],
+        "1192279884248226739": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "15959543980008442942": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "3873183249402084406": ["convolution_gpu_bfyx_gemm_like",0],
+        "13809898858049445969": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "13434576226708227155": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "14885031472057965707": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "216603198215625772": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "7532088618116521936": ["convolution_gpu_bfyx_gemm_like",2],
+        "11666226259183201584": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7075659071934895087": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "15757308772667178999": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17053671692908867872": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "7649413902932043811": ["convolution_gpu_bfyx_gemm_like",2],
+        "12421204749289937399": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "2041212737963974230": ["convolution_gpu_bfyx_gemm_like",2],
+        "3950738240651133849": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "9999553425206328238": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "8130920994920685157": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7755177205197405275": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "17228810554159747400": ["convolution_gpu_bfyx_gemm_like",2],
+        "3503893875515897267": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2294318010381635693": ["convolution_gpu_bfyx_gemm_like",1],
+        "6843617687528352801": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4856470441452830056": ["convolution_gpu_bfyx_gemm_like",2],
+        "15968821946892330559": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11450378244355788918": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "13145474177271090694": ["convolution_gpu_bfyx_gemm_like",2],
+        "15967614281807823696": ["convolution_gpu_bfyx_gemm_like",2],
+        "13474805373264874144": ["convolution_gpu_bfyx_1x1",2],
+        "7372956570616880244": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "15839295895890205274": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "2373860353284525265": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "17917978116807564183": ["convolution_gpu_bfyx_gemm_like",0],
+        "1902656726461670148": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "13512863534076172940": ["convolution_gpu_bfyx_gemm_like",2],
+        "11164519756679631743": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1551596771935253711": ["convolution_gpu_bfyx_gemm_like",1],
+        "2066731703492755469": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "3190494353583341446": ["convolution_gpu_bfyx_gemm_like",1],
+        "7243917162812988891": ["convolution_gpu_bfyx_gemm_like",2],
+        "16574710115918192418": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11147573971701279689": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "4491380839102267034": ["convolution_gpu_bfyx_gemm_like",1],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "13296242326766100583": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "16084700435355748612": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "2866656294663853474": ["convolution_gpu_bfyx_1x1",2],
+        "10308113903347312964": ["convolution_gpu_bfyx_gemm_like",2],
+        "1711220333751274603": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "14001406016806064079": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "6101196122606108273": ["convolution_gpu_bfyx_gemm_like",1],
+        "7314288062932060863": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "15112599407339712681": ["convolution_gpu_bfyx_1x1",2],
+        "12489973984967168447": ["convolution_gpu_bfyx_1x1",2],
+        "12790570304622911607": ["convolution_gpu_bfyx_os_iyx_osv16",928],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",0],
+        "1353170363915443814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "745009493367761775": ["convolution_gpu_bfyx_gemm_like",2],
+        "3499106702307464480": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3056212889689424946": ["convolution_gpu_bfyx_1x1",2],
+        "13251091004269229867": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4362304842016958728": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17823133607491820214": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16911450336605071390": ["convolution_gpu_bfyx_1x1",2],
+        "1520529227443340435": ["convolution_gpu_bfyx_gemm_like",2],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "15428591250165788477": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "15727611564408173858": ["convolution_gpu_bfyx_gemm_like",2],
+        "9671459469252116568": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "17011363406405852347": ["convolution_gpu_bfyx_gemm_like",2],
+        "6669808855737023569": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "182115051096556835": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "6328802691680458752": ["convolution_gpu_bfyx_gemm_like",2],
+        "12590922530749026871": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "17512961503976896701": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "8108933468437926367": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10187930930336324253": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "16988275131627316108": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "3399406641489305996": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "9918371346247634545": ["convolution_gpu_bfyx_gemm_like",2],
+        "10486348549691280032": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "348058686961206025": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "3316798708399098230": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "760687670112194844": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "5003718302026277632": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "9702618600245321109": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "8803037667261582905": ["convolution_gpu_bfyx_gemm_like",2],
+        "11931568365395665142": ["convolution_gpu_bfyx_gemm_like",2],
+        "1120455113299469776": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "10765280349477640969": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3286330985102373533": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4815047491742617397": ["convolution_gpu_bfyx_gemm_like",2],
+        "16247399911710810038": ["convolution_gpu_bfyx_gemm_like",1],
+        "2863465257341735941": ["convolution_gpu_bfyx_1x1",2],
+        "8527193566719173253": ["convolution_gpu_bfyx_gemm_like",2],
+        "5074273865983613482": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "1930929857644673460": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7624476043779763605": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "2814805887448339818": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "226601879759378771": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "998876398773540321": ["convolution_gpu_bfyx_1x1",2],
+        "5926747396493954633": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13454265023861566476": ["convolution_gpu_bfyx_gemm_like",2],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "1425953627379976115": ["convolution_gpu_bfyx_gemm_like",1],
+        "17634966178519099371": ["convolution_gpu_bfyx_1x1",2],
+        "9485825829394109934": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15486917753097743853": ["convolution_gpu_bfyx_1x1",2],
+        "15739278428190392018": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "10480527638577674825": ["convolution_gpu_bfyx_1x1",2],
+        "467975197394411990": ["convolution_gpu_bfyx_gemm_like",1],
+        "3438296636411972401": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1045854873741563331": ["convolution_gpu_bfyx_gemm_like",2],
+        "17408275657360833363": ["convolution_gpu_bfyx_1x1",2],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "12794369485239257709": ["convolution_gpu_bfyx_gemm_like",2],
+        "17147293671640396193": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "10546430708947911124": ["convolution_gpu_bfyx_gemm_like",2],
+        "7878605163588288309": ["convolution_gpu_bfyx_os_iyx_osv16",89],
+        "10512507780534402341": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "3814584042139408454": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "16723478941106779069": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "12391792381149655331": ["convolution_gpu_bfyx_gemm_like",2],
+        "13954821927253849036": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "14835309921389262864": ["convolution_gpu_bfyx_1x1",2],
+        "14343008518525689150": ["convolution_gpu_bfyx_1x1",2],
+        "14289082888174784976": ["convolution_gpu_bfyx_gemm_like",2],
+        "13247725847475539658": ["convolution_gpu_bfyx_1x1",2],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "6294240435687565243": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "3534971503826416049": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "2613462626256090659": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "5648658688155716974": ["convolution_gpu_bfyx_1x1",2],
+        "8984436655107983227": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "2777318471329665162": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "7799984350284425885": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "4635570915184713874": ["convolution_gpu_bfyx_gemm_like",0],
+        "4531222427159927606": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1644335606100150388": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "621915374938805401": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "17015328096102652908": ["convolution_gpu_bfyx_gemm_like",1],
+        "7780140599533242850": ["convolution_gpu_bfyx_gemm_like",1],
+        "15800447082078291243": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "12467673564660108244": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "7201521533301617290": ["convolution_gpu_bfyx_gemm_like",1],
+        "10509933181132310969": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3391032227732782982": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "17908444616754154471": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5898740235388207878": ["convolution_gpu_bfyx_1x1",2],
+        "4466647043226271996": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13938466156916423478": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "9340159617983543624": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11499219760597131534": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "15529757761327002288": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "12932635875905153141": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "15109847707903824859": ["convolution_gpu_bfyx_1x1",2],
+        "17525564757769958678": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "10883341041912056319": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "1245259979364728404": ["convolution_gpu_bfyx_1x1",2],
+        "7869779894480025247": ["convolution_gpu_bfyx_gemm_like",2],
+        "14034525799882831106": ["convolution_gpu_bfyx_gemm_like",2],
+        "12564687330941036772": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "9728611486592854529": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "9987415314864002460": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13585163747565192884": ["convolution_gpu_bfyx_gemm_like",2],
+        "3385797925880519845": ["convolution_gpu_bfyx_1x1",2],
+        "14985755375924972050": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "3106922888635965020": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14126906427006602775": ["convolution_gpu_bfyx_1x1",2],
+        "4914474312076193952": ["convolution_gpu_bfyx_gemm_like",1],
+        "8203171222962341018": ["convolution_gpu_bfyx_gemm_like",2],
+        "12871555773123368130": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "7407975398526425554": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "16511749893955141055": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "739676584505475609": ["convolution_gpu_bfyx_gemm_like",2],
+        "5498839261395459224": ["convolution_gpu_bfyx_gemm_like",1],
+        "2567046336192437734": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "7139714914586273766": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "10014448860206587805": ["convolution_gpu_bfyx_gemm_like",2],
+        "17951403431757222177": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "10330180429524641331": ["convolution_gpu_bfyx_gemm_like",2],
+        "15586047342916704364": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "2770397466252831892": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "11795826875463204296": ["convolution_gpu_bfyx_1x1",2],
+        "14540578324750869319": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15989894214714907271": ["convolution_gpu_bfyx_gemm_like",2],
+        "474139120607442270": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15589007878875898942": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "2826762745628486040": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "7132328255408635227": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "3441335188113424896": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "11806402239500046867": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "8439950151963452285": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10432365444137108781": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "13121297281694293907": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "7708321360699824256": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13477548641580029772": ["convolution_gpu_bfyx_gemm_like",1],
+        "13961773444580398856": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10178145641713631806": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8463615810239412362": ["convolution_gpu_bfyx_1x1",2],
+        "1270307036687208396": ["convolution_gpu_bfyx_gemm_like",1],
+        "14813178380338948912": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "16567638487719493784": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "18357544235608006954": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "17444003685761357480": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "586947787345351152": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7683334381958571864": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15193403354218116460": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8931169575495985034": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12166852830214895457": ["convolution_gpu_bfyx_1x1",2],
+        "8907982643256296667": ["convolution_gpu_bfyx_1x1",2],
+        "9920155432685318259": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "6997971129340865650": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "2116913943188857359": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "14248239982355212178": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "12937333118472722002": ["convolution_gpu_bfyx_gemm_like",2],
+        "3224352307778512793": ["convolution_gpu_bfyx_gemm_like",1],
+        "4378422094110940766": ["convolution_gpu_bfyx_gemm_like",1],
+        "2649192407401044065": ["convolution_gpu_bfyx_gemm_like",1],
+        "12864204111424196179": ["convolution_gpu_bfyx_1x1",2],
+        "13760645810144930270": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16681690088928624738": ["convolution_gpu_bfyx_gemm_like",2],
+        "2149582237161177965": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "16677044352793659175": ["convolution_gpu_bfyx_gemm_like",2],
+        "4708035980731751007": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "2715447739580688669": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "15235409162483701027": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "6557428245898292304": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13364676690016875118": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "5056859994174498686": ["convolution_gpu_bfyx_gemm_like",2],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "12165079289914715018": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "9955939178447682108": ["convolution_gpu_bfyx_1x1",2],
+        "11705756153433897198": ["convolution_gpu_bfyx_1x1",2],
+        "12806934028210472719": ["convolution_gpu_bfyx_gemm_like",2],
+        "10681768474583067517": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "597073780328219388": ["convolution_gpu_bfyx_gemm_like",2],
+        "11104393974242049153": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13609660900720370993": ["convolution_gpu_bfyx_1x1",2],
+        "16432425079146486467": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12004552919019936392": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4084026445911476156": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "5657471280535146301": ["convolution_gpu_bfyx_gemm_like",1],
+        "13754540732991287617": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "11744368351982723504": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2668729552208169959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15838113905712517735": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16666792471632326054": ["convolution_gpu_bfyx_gemm_like",2],
+        "10387844339156517393": ["convolution_gpu_bfyx_1x1",2],
+        "2984726467649419856": ["convolution_gpu_bfyx_gemm_like",2],
+        "14799579913711096584": ["convolution_gpu_bfyx_gemm_like",2],
+        "3515437649977762166": ["convolution_gpu_bfyx_gemm_like",1],
+        "2188101366183302888": ["convolution_gpu_bfyx_gemm_like",1],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "3725013268198063198": ["convolution_gpu_bfyx_1x1",2],
+        "10544411879329675593": ["convolution_gpu_bfyx_os_iyx_osv16",765],
+        "3635446784873718932": ["convolution_gpu_bfyx_gemm_like",2],
+        "15052577143485630617": ["convolution_gpu_bfyx_1x1",1],
+        "87031578643428011": ["convolution_gpu_bfyx_1x1",2],
+        "14616969385577243225": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "9280431727790048190": ["convolution_gpu_bfyx_1x1",2],
+        "9277610800970567810": ["convolution_gpu_bfyx_gemm_like",1],
+        "16105073808368936420": ["convolution_gpu_bfyx_gemm_like",2],
+        "4121109463284708890": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "15796677813117622429": ["convolution_gpu_bfyx_gemm_like",2],
+        "18436249934780056991": ["convolution_gpu_bfyx_gemm_like",2],
+        "5965451243366505522": ["convolution_gpu_bfyx_gemm_like",1],
+        "1170380397764345558": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "15805087418686802636": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "12058759356433220258": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "693883892843558363": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "6800893510381991731": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "8747430148550634190": ["convolution_gpu_bfyx_gemm_like",2],
+        "13170441257780067955": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17207560805775399864": ["convolution_gpu_bfyx_gemm_like",1],
+        "9584652777232392944": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "13338594271376045657": ["convolution_gpu_bfyx_gemm_like",2],
+        "10128143628088846123": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "13403161389559730": ["convolution_gpu_bfyx_gemm_like",2],
+        "7223801044761006523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8837721075413149240": ["convolution_gpu_bfyx_gemm_like",1],
+        "12741457056869452536": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1900375942069325499": ["convolution_gpu_bfyx_1x1",2],
+        "8609939102588915855": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "10914921540144371519": ["convolution_gpu_bfyx_gemm_like",1],
+        "7242013296950669829": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "403634422724914329": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "16781187505186394353": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12725675221990905186": ["convolution_gpu_bfyx_gemm_like",2],
+        "15555083739490354527": ["convolution_gpu_bfyx_gemm_like",2],
+        "11110173861174257158": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7330202944390548890": ["convolution_gpu_bfyx_gemm_like",2],
+        "951747146164097188": ["convolution_gpu_bfyx_1x1",2],
+        "16728762255357411770": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "6845814820599174031": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "15225354446874994535": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "15295951849706930711": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "4450409744922989123": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "5184121466994451498": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12055647521556218046": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "3863816884636503247": ["convolution_gpu_bfyx_gemm_like",1],
+        "10415046594066474634": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2609454334520044465": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "12065769091972094756": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "16441830491664937048": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "1126499865206906037": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "743941460026466526": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "10722677916294015259": ["convolution_gpu_bfyx_gemm_like",2],
+        "15249442550355454201": ["convolution_gpu_bfyx_gemm_like",2],
+        "5740745357953479527": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13993548620104010490": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "138379779469699309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9657324846330221372": ["convolution_gpu_bfyx_1x1",2],
+        "5497751772699578150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10071449674652717890": ["convolution_gpu_bfyx_gemm_like",2],
+        "12936220888307335332": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "11910735867274493498": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "2439993891369206440": ["convolution_gpu_bfyx_1x1",2],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "14283458015244508428": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3011188207492335920": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14764715930784496165": ["convolution_gpu_bfyx_gemm_like",2],
+        "6650607472019166205": ["convolution_gpu_bfyx_1x1",2],
+        "17647962002015093887": ["convolution_gpu_bfyx_gemm_like",2],
+        "17381516856910544374": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "11988285441493553006": ["convolution_gpu_bfyx_gemm_like",2],
+        "4897448054295474302": ["convolution_gpu_bfyx_gemm_like",2],
+        "15059549186302099880": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "11158789938857558596": ["convolution_gpu_bfyx_1x1",2],
+        "18008552719153887303": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "11330591026581463934": ["convolution_gpu_bfyx_gemm_like",2],
+        "7916244303189113815": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "9813748068195103720": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "17746215841755337461": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "438528596970898721": ["convolution_gpu_bfyx_gemm_like",2],
+        "9131183544020825260": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12229574562535756991": ["convolution_gpu_bfyx_gemm_like",1],
+        "11975047184326016230": ["convolution_gpu_bfyx_gemm_like",2],
+        "15677717057398875599": ["convolution_gpu_bfyx_gemm_like",2],
+        "12512751736409465214": ["convolution_gpu_bfyx_gemm_like",1],
+        "708747442142592697": ["convolution_gpu_bfyx_gemm_like",2],
+        "2161052921317193579": ["convolution_gpu_bfyx_gemm_like",2],
+        "8409488188696700816": ["convolution_gpu_bfyx_gemm_like",2],
+        "9700808806849459216": ["convolution_gpu_bfyx_1x1",2],
+        "14263790627243107300": ["convolution_gpu_bfyx_gemm_like",1],
+        "3383222668132648804": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2575631797904040925": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "6143200133853000387": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "11872464450773754851": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "16436006771518788093": ["fully_connected_gpu_bs_f_bsv16_af8_vload",0],
+        "9101334153142718004": ["convolution_gpu_bfyx_gemm_like",2],
+        "5145853681977610916": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "10570285542015420072": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "15216108478837665623": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "5374664689223295796": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "768720470104458759": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "4850497746076450913": ["convolution_gpu_bfyx_gemm_like",1],
+        "11025471731438443683": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "3212789693085089063": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "9250410390663336388": ["convolution_gpu_bfyx_gemm_like",1],
+        "5509395737020858006": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "731825454731954517": ["convolution_gpu_bfyx_gemm_like",2],
+        "7700321970687976931": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "5853697372844744672": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "11936530628363072904": ["convolution_gpu_bfyx_gemm_like",2],
+        "13264617841270329349": ["convolution_gpu_bfyx_1x1",2],
+        "13676654389512816868": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4398371999113956082": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "603883331897298932": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "991586070509079617": ["convolution_gpu_bfyx_gemm_like",2],
+        "14532519639619315651": ["convolution_gpu_bfyx_gemm_like",2],
+        "4513063773753763458": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "16773645387243701837": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "13320675959188615441": ["convolution_gpu_bfyx_gemm_like",2],
+        "15980348884716629349": ["convolution_gpu_bfyx_gemm_like",1],
+        "11583985978586657985": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "13094402291968806996": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "8040001390872143271": ["convolution_gpu_bfyx_gemm_like",2],
+        "1075027491444288875": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12028665820838352309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1997392406402548974": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "6020017927557041768": ["convolution_gpu_bfyx_gemm_like",1],
+        "9731370183088819573": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "12213354854947437262": ["convolution_gpu_bfyx_1x1",2],
+        "11604111639041106489": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "3341302541468955849": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5550969016335082071": ["convolution_gpu_bfyx_gemm_like",2],
+        "1591199515536783245": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "3177304125602972370": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "9795194069954915563": ["convolution_gpu_bfyx_gemm_like",2],
+        "9999955037598579164": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "6870942166356599956": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "14116800584981026541": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "13912843078550000960": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "11637325834858582585": ["convolution_gpu_bfyx_gemm_like",2],
+        "16683169947375504066": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "5245526691775741296": ["convolution_gpu_bfyx_gemm_like",1],
+        "18267428053198215471": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14171139920084409181": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3780320160034246719": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "4717620775314557374": ["convolution_gpu_bfyx_gemm_like",2],
+        "6222595759158615206": ["convolution_gpu_bfyx_gemm_like",1],
+        "14398854364550406668": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3662747857062156477": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11820789223587555410": ["convolution_gpu_bfyx_1x1",2],
+        "4980217316169616839": ["convolution_gpu_bfyx_1x1",2],
+        "14079654309452583394": ["convolution_gpu_bfyx_gemm_like",1],
+        "6708349666663292171": ["fully_connected_gpu_fb_oi_ref",1],
+        "12193395770362986433": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "6403698142681887543": ["convolution_gpu_bfyx_gemm_like",2],
+        "16347412180100581330": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4628748977913534701": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9316082753126682958": ["convolution_gpu_bfyx_gemm_like",2],
+        "17170858505976681742": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7818381040882768404": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "7121708962074176240": ["convolution_gpu_bfyx_1x1",2],
+        "2722124265986526212": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "3788462090984291082": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "2683304757433993300": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7469127846325904854": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "16135569134646688251": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "11690533591656807605": ["convolution_gpu_bfyx_gemm_like",2],
+        "712165731154577189": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "3226193790517362610": ["convolution_gpu_bfyx_1x1",2],
+        "14403132596827435096": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "8655315308767111198": ["convolution_gpu_bfyx_1x1",2],
+        "6340128090694375876": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16014822406751503249": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "16218339663410630711": ["convolution_gpu_bfyx_gemm_like",2],
+        "10256831975351722184": ["convolution_gpu_bfyx_gemm_like",2],
+        "10133054058562198093": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12417253210787537988": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3336076058264596420": ["convolution_gpu_bfyx_gemm_like",2],
+        "12181889163404078773": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "6109013751635776331": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11862259122805366807": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "18299254635579957284": ["convolution_gpu_bfyx_1x1",2],
+        "1963081583851864291": ["convolution_gpu_bfyx_gemm_like",1],
+        "16566128345135114558": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "10645625090439446714": ["convolution_gpu_bfyx_gemm_like",2],
+        "18082422341304348326": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "1212319037405620223": ["convolution_gpu_bfyx_gemm_like",2],
+        "9589942627115344216": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "12755692101476964677": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5156033406916344703": ["convolution_gpu_bfyx_gemm_like",1],
+        "5479761740065152589": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "7333511810266504718": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "4400247897123856252": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "17010172246526353957": ["convolution_gpu_bfyx_1x1",2],
+        "7603872175048237237": ["convolution_gpu_bfyx_1x1",2],
+        "13478984039708550410": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "14117801387057507639": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "17983556812075120553": ["convolution_gpu_bfyx_1x1",2],
+        "10532183096485321729": ["convolution_gpu_bfyx_1x1",2],
+        "16567486018945740036": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "970768445746568749": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "16292848987976256449": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "14421898375873029115": ["convolution_gpu_bfyx_1x1",2],
+        "8069537351442302814": ["convolution_gpu_bfyx_os_iyx_osv16",266],
+        "10447947790216991304": ["convolution_gpu_bfyx_gemm_like",2],
+        "7667898603371717971": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "18121198117765854866": ["convolution_gpu_bfyx_1x1",1],
+        "7209217811135076623": ["convolution_gpu_bfyx_gemm_like",2],
+        "6712698149192186833": ["convolution_gpu_bfyx_gemm_like",1],
+        "11334122788337402526": ["convolution_gpu_bfyx_1x1",1],
+        "16910952799476896905": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4492332228252010118": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "7183578232279711009": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "17238880534517721334": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "11263540528012919947": ["convolution_gpu_bfyx_1x1",2],
+        "5115007207028125638": ["convolution_gpu_bfyx_gemm_like",2],
+        "9153779186876518773": ["convolution_gpu_bfyx_gemm_like",2],
+        "18126685473408206840": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "17209528805596238905": ["convolution_gpu_bfyx_gemm_like",2],
+        "4252157815622916471": ["convolution_gpu_bfyx_1x1",2],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "17536308070854915513": ["convolution_gpu_bfyx_1x1",2],
+        "16579057939215877904": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "5735608687257018419": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3737576893817599311": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "16531824466148265247": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "15643135666029727865": ["convolution_gpu_bfyx_gemm_like",2],
+        "14558572801374416278": ["convolution_gpu_bfyx_gemm_like",2],
+        "8642107585829380438": ["convolution_gpu_bfyx_gemm_like",0],
+        "15082818876354718849": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "12675840135830047968": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "17179609670678746034": ["convolution_gpu_bfyx_gemm_like",0],
+        "16661843849495077745": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "5459463503840817402": ["convolution_gpu_bfyx_1x1",2],
+        "5682190700442712936": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "4958835037528182801": ["convolution_gpu_bfyx_1x1",2],
+        "1875764913306932583": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "10912495395422146386": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "17310409067211414565": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "2780423409483867058": ["convolution_gpu_bfyx_1x1",2],
+        "17759505449240263390": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "12866217660635921034": ["convolution_gpu_bfyx_gemm_like",1],
+        "11645116728396933125": ["convolution_gpu_bfyx_gemm_like",2],
+        "1089944493540593798": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "7650862961269327235": ["convolution_gpu_bfyx_1x1",2],
+        "12353956380178079089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13776178598632392721": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "8866716292621164810": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "10782611933832492335": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "17770104464900126615": ["convolution_gpu_bfyx_1x1",2],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "937159502066696999": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "5124080536266387783": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8746621720912032145": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14091610802555875119": ["convolution_gpu_bfyx_gemm_like",2],
+        "17922279129043570176": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "7084646429975006971": ["convolution_gpu_bfyx_1x1",2],
+        "12031180482028822765": ["convolution_gpu_bfyx_gemm_like",1],
+        "2891736961665476908": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "1643122514049603104": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2221145174704245189": ["convolution_gpu_bfyx_gemm_like",1],
+        "7964396197946740183": ["convolution_gpu_bfyx_os_iyx_osv16",904],
+        "11315238071192463859": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3673781117412048086": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "16152775342222431281": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "5751283221740229986": ["convolution_gpu_bfyx_gemm_like",2],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16",803],
+        "8619526128410675593": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5762878778443755104": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "14444475853714164129": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "840202264034382558": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "775538461106687677": ["fully_connected_gpu_fb_oi_ref",2],
+        "8257103926661643451": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15386715291503303766": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "5941852872160795604": ["convolution_gpu_bfyx_gemm_like",2],
+        "11308583200952256245": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13970935346154374605": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "10016815108730511683": ["convolution_gpu_bfyx_gemm_like",2],
+        "9585113116232600562": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14811603003184578943": ["convolution_gpu_bfyx_gemm_like",2],
+        "9541630719145326121": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "7113777272518482528": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "5429130923188159806": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "3114869763557037270": ["fully_connected_gpu_fb_oi_ref",2],
+        "577844026691991089": ["convolution_gpu_bfyx_gemm_like",1],
+        "2524029454785583409": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "13558656230312558247": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "11239541755868028928": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "9090828337597312855": ["convolution_gpu_bfyx_gemm_like",2],
+        "3219408878901707426": ["convolution_gpu_bfyx_gemm_like",1],
+        "6003409324516527726": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17549411807772646930": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11069983292783104310": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "10635659193402005820": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "12151068022697708126": ["convolution_gpu_bfyx_gemm_like",2],
+        "14896875712028630045": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "2089730611490367290": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5940337324384948573": ["convolution_gpu_bfyx_gemm_like",2],
+        "15695415285791951018": ["convolution_gpu_bfyx_gemm_like",2],
+        "10979362792894404338": ["convolution_gpu_bfyx_gemm_like",2],
+        "18426893729833771809": ["convolution_gpu_bfyx_1x1",2],
+        "1354647381212852890": ["convolution_gpu_bfyx_1x1",2],
+        "7958443549125799229": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "1237262535285717993": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "9222744127882324405": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "4346591404756288097": ["convolution_gpu_bfyx_gemm_like",2],
+        "1173986078589662704": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2114232149447438823": ["convolution_gpu_bfyx_gemm_like",2],
+        "8159303545761286685": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4216958486055161753": ["convolution_gpu_bfyx_gemm_like",2],
+        "9319254979377483709": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "17046662043776372746": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "15528692642731712121": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "15924583510704449214": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "17446505012657609153": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16362857896338778056": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10565371760124443824": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11948858355027908365": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4622514167765722873": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "794499287296495726": ["convolution_gpu_bfyx_1x1",2],
+        "4773077837537775324": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16986358655784856534": ["convolution_gpu_bfyx_os_iyx_osv16",724],
+        "18313088176414428990": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "1336940384521633733": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8794896449397768269": ["convolution_gpu_bfyx_gemm_like",2],
+        "8856888761246057127": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "9643408025778914022": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "4126895998426674411": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "8567667881970262923": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "1398177377739338750": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2608363732937932266": ["convolution_gpu_bfyx_gemm_like",2],
+        "17651821953342321913": ["convolution_gpu_bfyx_1x1",2],
+        "5714365398623475983": ["convolution_gpu_bfyx_1x1",2],
+        "6362428985273506890": ["convolution_gpu_bfyx_1x1",2],
+        "15979956159651515122": ["convolution_gpu_bfyx_gemm_like",2],
+        "18255227391100087860": ["convolution_gpu_bfyx_1x1",2],
+        "11587239927319376658": ["convolution_gpu_bfyx_os_iyx_osv16",301],
+        "10141927023849730720": ["convolution_gpu_bfyx_1x1",2],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",0],
+        "733956743303342862": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "9737833587413114584": ["convolution_gpu_bfyx_gemm_like",1],
+        "17152614235879767116": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "14930789530046665855": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "2625969259447793593": ["convolution_gpu_bfyx_1x1",2],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "1309867416606346543": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "1367483816197881270": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9477562342190423343": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11087413527078604815": ["convolution_gpu_bfyx_gemm_like",2],
+        "16243196137456624852": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "142650579335909103": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "503369896500284129": ["convolution_gpu_bfyx_1x1",2],
+        "7561096442572829049": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2877521658768725103": ["convolution_gpu_bfyx_gemm_like",1],
+        "4769003637955328938": ["convolution_gpu_bfyx_gemm_like",1],
+        "7604075520418038662": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6131481289104111211": ["convolution_gpu_bfyx_gemm_like",2],
+        "8220168481755031959": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "17704040183891532914": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "16862145184923128012": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "10022487076451608714": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "15078262396281327048": ["convolution_gpu_bfyx_gemm_like",1],
+        "15636128989267984459": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "15770767768674603174": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17490188677223978661": ["convolution_gpu_bfyx_gemm_like",1],
+        "12917241193304093727": ["convolution_gpu_bfyx_gemm_like",2],
+        "17489680436564779197": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15329680728165965773": ["convolution_gpu_bfyx_gemm_like",2],
+        "5060012838564094182": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "1171681987783013074": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "17906607354577138153": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "17477062954520561609": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "11823205954749139338": ["convolution_gpu_bfyx_gemm_like",2],
+        "13851240591038949807": ["convolution_gpu_bfyx_gemm_like",2],
+        "4137755981477177003": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "8497468192424557348": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14184895905338394239": ["convolution_gpu_bfyx_gemm_like",2],
+        "1237920404306733800": ["convolution_gpu_bfyx_gemm_like",2],
+        "12046017161414846599": ["convolution_gpu_bfyx_1x1",2],
+        "10106454449619141260": ["convolution_gpu_bfyx_1x1",2],
+        "9641089659148164809": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6450532136308941035": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "11626398907755088688": ["convolution_gpu_bfyx_gemm_like",1],
+        "10747988576436391912": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "3988024997010367546": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "16921939234324970069": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "15901724303713479611": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10037086825900566930": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "8951040603784899163": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6727930402459775131": ["convolution_gpu_bfyx_gemm_like",2],
+        "1952863937205473292": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "13919204232414535363": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "4809191606466167229": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1518270620354036926": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "6664482192233202590": ["convolution_gpu_bfyx_gemm_like",2],
+        "3759515057574218101": ["convolution_gpu_bfyx_gemm_like",1],
+        "13558687084677943158": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "12985942652866621579": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "2781309272856442321": ["convolution_gpu_bfyx_1x1",2],
+        "9726913113016874092": ["convolution_gpu_bfyx_gemm_like",2],
+        "15497797842820949408": ["convolution_gpu_bfyx_gemm_like",1],
+        "14412158605670555579": ["convolution_gpu_bfyx_os_iyx_osv16",153],
+        "548663565933738403": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "14883438809987378616": ["convolution_gpu_bfyx_1x1",2],
+        "17201365233492366678": ["convolution_gpu_bfyx_gemm_like",2],
+        "5211831143687501130": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "13282951481330978659": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "9091110033424983286": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12604104383683210104": ["convolution_gpu_bfyx_gemm_like",2],
+        "17264608538692763688": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6788311046557489996": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "16267682394077585279": ["convolution_gpu_bfyx_os_iyx_osv16",754],
+        "13569941893504840630": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "5039037192630609823": ["convolution_gpu_bfyx_gemm_like",2],
+        "1082574490068006980": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "2598267743388306204": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "3819990462129075757": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "18041177945345031826": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "12351866693978844266": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "9967101735808367971": ["convolution_gpu_bfyx_1x1",2],
+        "17264010982688979937": ["convolution_gpu_bfyx_1x1",2],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",301],
+        "14671212883301405408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14571022040013651253": ["convolution_gpu_bfyx_gemm_like",0],
+        "3039528482572243879": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "8458082326743351141": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "2128612971571865547": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "290134020607738418": ["convolution_gpu_bfyx_gemm_like",1],
+        "7846384623429362522": ["convolution_gpu_bfyx_1x1",2],
+        "2929715823970060874": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16475247464223458061": ["convolution_gpu_bfyx_gemm_like",2],
+        "3265415000818832667": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15779837958180258409": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "10672380526821947133": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1299545313185409227": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11597391933877736800": ["convolution_gpu_bfyx_gemm_like",2],
+        "6219075471508685758": ["convolution_gpu_bfyx_gemm_like",0],
+        "9542325095876448686": ["convolution_gpu_bfyx_gemm_like",1],
+        "13596876807637507229": ["convolution_gpu_bfyx_1x1",2],
+        "3216877571075556066": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "5558136691773431495": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",1],
+        "8354579049246302728": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "8576733135863336233": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16490405739040977260": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "12450814729547235386": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "2800949804770763798": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3217574161785059951": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "5581428998642936688": ["convolution_gpu_bfyx_1x1",2],
+        "7450417963648518926": ["convolution_gpu_bfyx_gemm_like",2],
+        "9748307611165615848": ["convolution_gpu_bfyx_gemm_like",2],
+        "1972879521448306536": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14387756025635589673": ["convolution_gpu_bfyx_1x1",2],
+        "16027456210394993913": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "12566041126392848976": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "14458851250685872417": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "6192955702438301372": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "7447163906170805189": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3106911159524421371": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "13680926356824317761": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "9040145293899470160": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14312549767853703411": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "11768117585574496387": ["convolution_gpu_bfyx_gemm_like",2],
+        "14766477690417085350": ["convolution_gpu_bfyx_1x1",2],
+        "15006321421735686121": ["convolution_gpu_bfyx_gemm_like",2],
+        "18154019240019929225": ["convolution_gpu_bfyx_gemm_like",1],
+        "3034482898462686729": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "11841034668170849494": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10536316961655703500": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4617809377006148936": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "16397733032387984819": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "9439431829175743345": ["convolution_gpu_bfyx_gemm_like",1],
+        "4625107584562815965": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "12052207771201936228": ["convolution_gpu_bfyx_gemm_like",2],
+        "1626430741965136732": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8106738346643994005": ["convolution_gpu_bfyx_gemm_like",2],
+        "12393385058735194260": ["convolution_gpu_bfyx_gemm_like",2],
+        "3024402899381804809": ["convolution_gpu_bfyx_1x1",2],
+        "10702465758376061967": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2554991397391195611": ["convolution_gpu_bfyx_gemm_like",2],
+        "11634932044447867039": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "487214150851213303": ["convolution_gpu_bfyx_gemm_like",0],
+        "1157947252370351851": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5010119207726811326": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "11626402549863483301": ["convolution_gpu_bfyx_gemm_like",2],
+        "7954972694876158422": ["convolution_gpu_bfyx_1x1",2],
+        "12228610148087508521": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "671453551040072499": ["convolution_gpu_bfyx_gemm_like",2],
+        "4313392430539923574": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "5240706676373148280": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3372770576629463160": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "16307464696265537356": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "805131056816361237": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "85050336704401597": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4282668574670785584": ["convolution_gpu_bfyx_gemm_like",2],
+        "10572945270796129630": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "6312971928547466668": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "1617135706549276688": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "16511393582666965704": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3509487327001107638": ["convolution_gpu_bfyx_gemm_like",1],
+        "541817615957967731": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "4750513665628842598": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "15578456771467281881": ["convolution_gpu_bfyx_gemm_like",1],
+        "2495655464941634884": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "9606639214735570069": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "13851025202247070979": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13781423818051299677": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "4531222427159927606": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12590922530749026871": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "18381791065890314250": ["convolution_gpu_bfyx_gemm_like",0],
+        "4282668574670785584": ["convolution_gpu_bfyx_gemm_like",2],
+        "18161786710055240343": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6832967250168141428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11705756153433897198": ["convolution_gpu_bfyx_1x1",2],
+        "10544034939133448916": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "994842991399671507": ["convolution_gpu_bfyx_gemm_like",1],
+        "9657324846330221372": ["convolution_gpu_bfyx_1x1",2],
+        "10480527638577674825": ["convolution_gpu_bfyx_1x1",2],
+        "11679235499894668689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12617736879671137111": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16768797136991242472": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "5738835498104275267": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "14916625550370402883": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4723919313760470311": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16105073808368936420": ["convolution_gpu_bfyx_gemm_like",2],
+        "17906607354577138153": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "8686733586982652897": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17436550598696178210": ["convolution_gpu_yxfb_yxio_b16",2],
+        "348058686961206025": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "18393312550272875456": ["convolution_gpu_bfyx_1x1",2],
+        "9328585005923667676": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "12469992822259989528": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17397600088595751782": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17085927772068621152": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12024143207855886580": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "1281190653081960886": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15548854462657362014": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10751536136794650334": ["convolution_gpu_bfyx_gemm_like",2],
+        "16432425079146486467": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "775538461106687677": ["fully_connected_gpu_fb_oi_ref",2],
+        "4815047491742617397": ["convolution_gpu_bfyx_gemm_like",2],
+        "16711142379173254655": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12703696322769371912": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "13830605041347009953": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "11690334177981352452": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "15646081020506130125": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11055049031355432623": ["convolution_gpu_bfyx_gemm_like",2],
+        "1208161922424418734": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "17408275657360833363": ["convolution_gpu_bfyx_1x1",2],
+        "10151922632636937118": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6193161166790398003": ["convolution_gpu_bfyx_gemm_like",2],
+        "5754396201681434378": ["convolution_gpu_bfyx_1x1",2],
+        "6776601719651959634": ["convolution_gpu_yxfb_yxio_b16",2],
+        "760687670112194844": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "18333355024265557430": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17921973525603585874": ["convolution_gpu_bfyx_gemm_like",2],
+        "11828175723996627443": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "3217574161785059951": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12351866693978844266": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "3536359641225772698": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15363606233048272809": ["convolution_gpu_bfyx_1x1",2],
+        "6859143702528475520": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "8458082326743351141": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "3477539135137665170": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "12546446257192651407": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4856470441452830056": ["convolution_gpu_bfyx_gemm_like",2],
+        "1425953627379976115": ["convolution_gpu_bfyx_gemm_like",1],
+        "12913866095318048752": ["convolution_gpu_bfyx_gemm_like",2],
+        "3582256192870592087": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "8585205898894363799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16270745071180354612": ["convolution_gpu_bfyx_gemm_like",2],
+        "4916769804113823482": ["convolution_gpu_bfyx_1x1",1],
+        "6962062962411903140": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4492332228252010118": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "10005348255972308430": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9974905660671605427": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17344974951998490453": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "8250212706222997384": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3541538046227217664": ["convolution_gpu_bfyx_gemm_like",1],
+        "6438721407426283362": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18433141005552346566": ["convolution_gpu_yxfb_yxio_b16",1],
+        "215512025430490450": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "5977875644245993099": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7602222004475424358": ["convolution_gpu_bfyx_gemm_like",1],
+        "4780291919667721265": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15135644084742750702": ["convolution_gpu_bfyx_gemm_like",2],
+        "16214394186337220006": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15488340031228619748": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "403634422724914329": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "3463959257726925426": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "18416908414174464784": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "5727758374304309350": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9832505855130134649": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15690161340392005765": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4734389463002799056": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "7949069388917479511": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8974851555526896131": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "14307705501349750896": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15871357525719630224": ["convolution_gpu_bfyx_1x1",2],
+        "9674248159643501374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11807282628372660280": ["convolution_gpu_bfyx_1x1",2],
+        "4056979460327024961": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "3731224822876468602": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "11893541520830049036": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "1843555260471832708": ["convolution_gpu_bfyx_gemm_like",2],
+        "13009381943944182288": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17908444616754154471": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3648713169465596196": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3779229442395464456": ["convolution_gpu_bfyx_gemm_like",1],
+        "7561096442572829049": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9891428775774615719": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "4261192887643002603": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "17045386022302353268": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2290965424106255219": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11537166370263116277": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "6418500550523945192": ["convolution_gpu_yxfb_yxio_b16",2],
+        "577844026691991089": ["convolution_gpu_bfyx_gemm_like",1],
+        "11087413527078604815": ["convolution_gpu_bfyx_gemm_like",2],
+        "8450272092307894299": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5802466130040230797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6950586691727980329": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6445721440921372329": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16901594465545439334": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18033349045324117723": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "13851240591038949807": ["convolution_gpu_bfyx_gemm_like",2],
+        "10996596479775375564": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15188273255634848057": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1135062632388082485": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "142329025839464842": ["convolution_gpu_bfyx_1x1",2],
+        "4863644213728386734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5941298590926032148": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "13426254939418471242": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7132328255408635227": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "6208201398783088425": ["convolution_gpu_bfyx_gemm_like",0],
+        "16362857896338778056": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "16385915289511951113": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9532499374173117612": ["fully_connected_gpu_fb_oi_ref",2],
+        "10816637153861630723": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "11857037689248685487": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "15989894214714907271": ["convolution_gpu_bfyx_gemm_like",2],
+        "9899211365930959346": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "402932154499003993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7393551951402219833": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3814584042139408454": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "13954821927253849036": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "14242202444788213591": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "5155616842071169667": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13810735868750326592": ["convolution_gpu_bfyx_os_iyx_osv16",925],
+        "4021045600853993587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12397280593466519809": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "9101334153142718004": ["convolution_gpu_bfyx_gemm_like",2],
+        "417352773179383568": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4137738705782981426": ["convolution_gpu_bfyx_gemm_like",2],
+        "6726099352298108756": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "14263605862840500474": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10387844339156517393": ["convolution_gpu_bfyx_1x1",2],
+        "5680236635030250712": ["convolution_gpu_bfyx_1x1",2],
+        "1841155673858789206": ["fully_connected_gpu_fb_oi_ref",1],
+        "6403698142681887543": ["convolution_gpu_bfyx_gemm_like",2],
+        "11787674847611032323": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18239740525818575112": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9999955037598579164": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "13170441257780067955": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17536591931934691648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17051718450741106678": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10681768474583067517": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "18215430801133520364": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1786105567361070086": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14044732537191084187": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14165325329016075285": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "7897973318803646560": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6097086855988597139": ["convolution_gpu_bfyx_1x1",2],
+        "3713558537660711857": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5355283113999405036": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2457671437276780303": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7822463130304602936": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13111122805945249561": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5938850739683493929": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2835909063063272102": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "4261215727469154244": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8096131027165540886": ["convolution_gpu_bfyx_gemm_like",2],
+        "6490907666077364481": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16161974964662774501": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6772954924703365345": ["convolution_gpu_bfyx_gemm_like",2],
+        "11921652085115182024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6788311046557489996": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "9737565171095493297": ["convolution_gpu_bfyx_gemm_like",1],
+        "15059549186302099880": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1920070013712913772": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7338932272767555117": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17152614235879767116": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "3212789693085089063": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "5668538167635622474": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "8857763129101380288": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9099056013518879466": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "8459380583159325597": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3220280315905987373": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "14771341796915983228": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1520529227443340435": ["convolution_gpu_bfyx_gemm_like",2],
+        "3106911159524421371": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "4574541202890196191": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9603926867418680768": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12864558900883069118": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11241838709529552265": ["convolution_gpu_bfyx_gemm_like",2],
+        "15799159401545270696": ["convolution_gpu_bfyx_gemm_like",1],
+        "4142555169083069413": ["convolution_gpu_bfyx_gemm_like",0],
+        "11152334947349565403": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9835739612255048978": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "3358616456137155015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5922142661777925178": ["convolution_gpu_bfyx_gemm_like",2],
+        "10278515360013727367": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7571716782558859443": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7540655869186258692": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6115915509370042166": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "12676167240795292217": ["convolution_gpu_bfyx_gemm_like",1],
+        "13575423234109624706": ["fully_connected_gpu_bs_f_bsv16_af8_vload",0],
+        "16015963261509760799": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "4436244774193918646": ["fully_connected_gpu_fb_oi_ref",1],
+        "1237262535285717993": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "7894230717547658326": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12933785392937626017": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13247725847475539658": ["convolution_gpu_bfyx_1x1",2],
+        "712495040970043706": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11719957578496407410": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6538526180355194359": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15295951849706930711": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "6427979320488981912": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12721294268595880422": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16293465561256937726": ["convolution_gpu_bfyx_gemm_like",2],
+        "3806806400778685133": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11104393974242049153": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7532088618116521936": ["convolution_gpu_bfyx_gemm_like",2],
+        "10570285542015420072": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "14528180674573671874": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6344600111737335616": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6331794802915121861": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5600128039063009632": ["convolution_gpu_bfyx_gemm_like",1],
+        "16158139166784964096": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8879618489623984140": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3216793152416217495": ["convolution_gpu_bfyx_gemm_like",2],
+        "14263790627243107300": ["convolution_gpu_bfyx_gemm_like",1],
+        "467975197394411990": ["convolution_gpu_bfyx_gemm_like",1],
+        "18186615266760475767": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "4252157815622916471": ["convolution_gpu_bfyx_1x1",2],
+        "17676344219475515993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2283020548041814543": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8203171222962341018": ["convolution_gpu_bfyx_gemm_like",2],
+        "8004244584949995244": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7027962921778599989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2727175120437582536": ["convolution_gpu_bfyx_gemm_like",1],
+        "9195732599757736182": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "6712698149192186833": ["convolution_gpu_bfyx_gemm_like",1],
+        "14288463473159113326": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10762489947656697207": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15956352026642286295": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15678385128478075284": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9787359208094141129": ["fully_connected_gpu_fb_oi_ref",1],
+        "12370729327673204804": ["convolution_gpu_bfyx_gemm_like",2],
+        "7977195117668583981": ["convolution_gpu_bfyx_gemm_like",2],
+        "9378269524012289175": ["convolution_gpu_bfyx_gemm_like",2],
+        "14206076551739831333": ["convolution_gpu_bfyx_gemm_like",1],
+        "16996895381161031110": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "15412447128995361859": ["convolution_gpu_bfyx_gemm_like",0],
+        "4466647043226271996": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "888110783182849535": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8527193566719173253": ["convolution_gpu_bfyx_gemm_like",2],
+        "11806402239500046867": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "12977678792503377525": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5519781859090160931": ["convolution_gpu_bfyx_os_iyx_osv16",1033],
+        "5317076157086789437": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "18249888571553409563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17549411807772646930": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5584145249514762750": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17975017633455909321": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "6703148006012061136": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10463632805036507382": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2608363732937932266": ["convolution_gpu_bfyx_gemm_like",2],
+        "4039483032571506874": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "671453551040072499": ["convolution_gpu_bfyx_gemm_like",2],
+        "9401409770128851474": ["convolution_gpu_bfyx_gemm_like",0],
+        "13912728810446567016": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "15449774545834423274": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12051595062513871723": ["convolution_gpu_bfyx_1x1",2],
+        "16455941573984854254": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6469277112054008613": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "7624476043779763605": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "10706267011822108376": ["convolution_gpu_bfyx_1x1",2],
+        "2907572047024872990": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8700574100180128776": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16531824466148265247": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "5224252360611200472": ["convolution_gpu_bfyx_gemm_like",2],
+        "5042176052323856983": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6862489207967519978": ["convolution_gpu_bfyx_gemm_like",2],
+        "5149303626508247520": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17082268616134506581": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5242271874488296527": ["convolution_gpu_bfyx_gemm_like",1],
+        "8463615810239412362": ["convolution_gpu_bfyx_1x1",2],
+        "12566041126392848976": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "3217674729821898463": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "17546090415334871175": ["convolution_gpu_yxfb_yxio_b16",2],
+        "937159502066696999": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9375272277044782377": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17891499682354369344": ["convolution_gpu_bfyx_gemm_like",2],
+        "5257134257307295031": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15817443774186015593": ["convolution_gpu_bfyx_1x1",2],
+        "9293682866734263821": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5735608687257018419": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "9105127035114339269": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17037416417174266088": ["convolution_gpu_bfyx_gemm_like",0],
+        "2915777749501772828": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3286250915720444467": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6557428245898292304": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "5041922366297242362": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17489680436564779197": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15199604820473713622": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "5955575949957198434": ["convolution_gpu_bfyx_gemm_like",1],
+        "998876398773540321": ["convolution_gpu_bfyx_1x1",2],
+        "9869959062341950047": ["convolution_gpu_bfyx_1x1",2],
+        "6400660469217490279": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6902485831441844789": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6156831095718536092": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "10706180189726741161": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17996535939348094624": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17945600479510493949": ["convolution_gpu_bfyx_os_iyx_osv16",108],
+        "12015336418727455195": ["convolution_gpu_bfyx_1x1",2],
+        "15190508870639648203": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6713985030102340818": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16573597215928075233": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "378292944207609677": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14807466024030301968": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16",882],
+        "6820224292713065232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17025324057045572535": ["convolution_gpu_bfyx_gemm_like",1],
+        "2973337989445169388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2135164671985938807": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6822432085522584060": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8469874583725132145": ["fully_connected_gpu_fb_oi_ref",2],
+        "1930929857644673460": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4429109491655891299": ["convolution_gpu_bfyx_gemm_like",1],
+        "15971340431600153619": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13993548620104010490": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13077012961563218195": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12259611546528256409": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3154539627593235077": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "13680926356824317761": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "9987415314864002460": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "7719954202744123391": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "5079381702867378605": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10528894716283673051": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12052207771201936228": ["convolution_gpu_bfyx_gemm_like",2],
+        "18120079746729314878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13705072264927031658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12450814729547235386": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "8865700182878875593": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5104519293341299859": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4672441137336208890": ["convolution_gpu_bfyx_gemm_like",2],
+        "11910735867274493498": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "8860443174052454332": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "740260423018155343": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "3346891393420268502": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "3935750066315595083": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14149210193687890597": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17965267346493659374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4740585760177040164": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9584652777232392944": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "11342135956789192833": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13338594271376045657": ["convolution_gpu_bfyx_gemm_like",2],
+        "9319064434175105168": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17705992851440826353": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8108933468437926367": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1042605521041579458": ["convolution_gpu_yxfb_yxio_b16",2],
+        "975943900172381326": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1345101751956733589": ["convolution_gpu_bfyx_gemm_like",2],
+        "5576296603250158603": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4491380839102267034": ["convolution_gpu_bfyx_gemm_like",1],
+        "1353170363915443814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",1],
+        "5214654427283761256": ["convolution_gpu_bfyx_gemm_like",0],
+        "18218755616248669884": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5145853681977610916": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "3226193790517362610": ["convolution_gpu_bfyx_1x1",2],
+        "8614375489387596119": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2780423409483867058": ["convolution_gpu_bfyx_1x1",2],
+        "3615052707933370958": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15865753975271064117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2673903488704336606": ["convolution_gpu_bfyx_gemm_like",2],
+        "6260684231055362504": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13325762052023866627": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10745099399736462076": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17766628441954343001": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "4513063773753763458": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "14126906427006602775": ["convolution_gpu_bfyx_1x1",2],
+        "4805402210873641704": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7869779894480025247": ["convolution_gpu_bfyx_gemm_like",2],
+        "968105804060326332": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1697260854781788314": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9017605508157213607": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14883438809987378616": ["convolution_gpu_bfyx_1x1",2],
+        "10882719585803523032": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10425889533411573166": ["convolution_gpu_bfyx_gemm_like",2],
+        "12878346173547852969": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7650862961269327235": ["convolution_gpu_bfyx_1x1",2],
+        "12680688623162482255": ["convolution_gpu_bfyx_1x1",2],
+        "7715649642603303319": ["convolution_gpu_bfyx_1x1",2],
+        "15201438563802430490": ["fully_connected_gpu_fb_oi_ref",2],
+        "6450532136308941035": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "8638074773026771425": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17966898762317477857": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5924271203978892761": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2119566651547512543": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5568753513029409478": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "16386955278777720573": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "465567788283624320": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10538010212480716275": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3135889221160961020": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "144634005596305959": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "13962325395021860937": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2219693989290882970": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14942858162799632403": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11184290482439221741": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "13130001092233798285": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7992077349568239994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16883372966656079608": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16839741351990811959": ["convolution_gpu_bfyx_gemm_like",2],
+        "1367483816197881270": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17228615388053183744": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17264671167892237524": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1920042803083729276": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "5596408142536691534": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16033512206711124104": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9277610800970567810": ["convolution_gpu_bfyx_gemm_like",1],
+        "6931062623510631425": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17647962002015093887": ["convolution_gpu_bfyx_gemm_like",2],
+        "9162862507585693061": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4773077837537775324": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7305582749708309904": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15800554162607246964": ["convolution_gpu_bfyx_gemm_like",2],
+        "10187930930336324253": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "10183537720515608": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8541982562061181756": ["convolution_gpu_bfyx_gemm_like",1],
+        "8890400423799565844": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5895417825685090256": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6791806088355877039": ["convolution_gpu_bfyx_gemm_like",2],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "10850369799801518638": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8113660920207936963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2814805887448339818": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "15275978123703636572": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "12421204749289937399": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "12327057172281102984": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11910900938442124765": ["convolution_gpu_bfyx_gemm_like",1],
+        "15497263259976427714": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "18357544235608006954": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "4325081100430903742": ["convolution_gpu_bfyx_gemm_like",2],
+        "7916244303189113815": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15757308772667178999": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2945245652128285151": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "9062781751511609244": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "9079676771143357396": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12714892326998505133": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7171904645566467208": ["convolution_gpu_bfyx_gemm_like",2],
+        "6318214731544748245": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "9280431727790048190": ["convolution_gpu_bfyx_1x1",2],
+        "8803037667261582905": ["convolution_gpu_bfyx_gemm_like",2],
+        "12185561188335760786": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16566128345135114558": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "5115298857582076692": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "7242013296950669829": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "7649413902932043811": ["convolution_gpu_bfyx_gemm_like",2],
+        "17682152011630274259": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "1170380397764345558": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "794499287296495726": ["convolution_gpu_bfyx_1x1",2],
+        "18279416225045612845": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "2506424495656099512": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14466032674083938714": ["convolution_gpu_bfyx_gemm_like",1],
+        "1531349457115735845": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "2909347733581487795": ["convolution_gpu_yxfb_yxio_b16",2],
+        "805131056816361237": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "10546430708947911124": ["convolution_gpu_bfyx_gemm_like",2],
+        "16709930291825881111": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7946262362930618714": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2431241169199693527": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2598267743388306204": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "7878605163588288309": ["convolution_gpu_bfyx_os_iyx_osv16",89],
+        "4759671642533786591": ["convolution_gpu_bfyx_gemm_like",2],
+        "8575833423399668525": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "2685061316482503878": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "15035800097152337587": ["convolution_gpu_bfyx_gemm_like",2],
+        "7105279481103494151": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "6744692937598310090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15656706773401161497": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18126685473408206840": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "5465400164581117113": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "12494969618927201911": ["fully_connected_gpu_yxfb_ref",0],
+        "10783630257421062891": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13308187548669026714": ["convolution_gpu_bfyx_1x1",2],
+        "12755692101476964677": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16742058312847401360": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5284456216115118110": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18101509783610609787": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6464050901421037006": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6509271384550125629": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3374410641320310726": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "15249442550355454201": ["convolution_gpu_bfyx_gemm_like",2],
+        "11624226818593966530": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "1788455099959676873": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "16677044352793659175": ["convolution_gpu_bfyx_gemm_like",2],
+        "6081038474197004540": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3220756134650041028": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11196245220967135443": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12512751736409465214": ["convolution_gpu_bfyx_gemm_like",1],
+        "13467831091041327178": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18253299978538051201": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16339187733937346919": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5940337324384948573": ["convolution_gpu_bfyx_gemm_like",2],
+        "1040650352205493707": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "15838113905712517735": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13120262386070281193": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14571022040013651253": ["convolution_gpu_bfyx_gemm_like",0],
+        "2129742884686884642": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2934519615045138808": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "9312974578711092131": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5312269140190538942": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3571330754519284334": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13842149852156451845": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1868805550246252143": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7954972694876158422": ["convolution_gpu_bfyx_1x1",2],
+        "9692654253261175490": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "15529757761327002288": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "4635570915184713874": ["convolution_gpu_bfyx_gemm_like",0],
+        "14248622935809594779": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9178915201681884122": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6143200133853000387": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "2781309272856442321": ["convolution_gpu_bfyx_1x1",2],
+        "12567935463143860469": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17386047378634216634": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3980835859526174461": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8075261051536686307": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "11031358859656806724": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "7243917162812988891": ["convolution_gpu_bfyx_gemm_like",2],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "14985755375924972050": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "14712972289919865502": ["convolution_gpu_bfyx_gemm_like",2],
+        "3074436655804078403": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16013560489115457872": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1470933384474984858": ["convolution_gpu_bfyx_1x1",2],
+        "4533786844080178561": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1704404203639481753": ["convolution_gpu_bfyx_gemm_like",2],
+        "4850497746076450913": ["convolution_gpu_bfyx_gemm_like",1],
+        "9485825829394109934": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15115780248032030963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6631103268546309714": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3724572174214794659": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3364141707903132298": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6328802691680458752": ["convolution_gpu_bfyx_gemm_like",2],
+        "5593329151028712439": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15112599407339712681": ["convolution_gpu_bfyx_1x1",2],
+        "497488185553682238": ["convolution_gpu_bfyx_gemm_like",1],
+        "16341131728764501904": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "8768300687476117215": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "249639220178603842": ["convolution_gpu_bfyx_gemm_like",0],
+        "11305232900158601613": ["convolution_gpu_bfyx_1x1",2],
+        "1786732163438555728": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9153779186876518773": ["convolution_gpu_bfyx_gemm_like",2],
+        "7667898603371717971": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "6051877311645456194": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "10617442099961865960": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12362290144183018227": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5150256051921098637": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16335738565228204503": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3287181725010492879": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16075006181495932250": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14805540705424073865": ["convolution_gpu_bfyx_gemm_like",2],
+        "18103534417093702556": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "3427691447288240419": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10294610483561043024": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16393176054374397767": ["convolution_gpu_bfyx_gemm_like",0],
+        "18187345248160481425": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "16661843849495077745": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "6439316331231400868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8444259010311137762": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "14731054961557547253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "990199360818917334": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18035673326929466074": ["convolution_gpu_bfyx_gemm_like",1],
+        "3286330985102373533": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "6654167459904026563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18235209540858013173": ["convolution_gpu_bfyx_1x1",2],
+        "1902656726461670148": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "11545529736818363243": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5854093367753757010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3398322619007806698": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "7958443549125799229": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "12867177334690636800": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "1309867416606346543": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "1463649546800120847": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12600479027568241746": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7974670633697926450": ["convolution_gpu_bfyx_1x1",2],
+        "15589007878875898942": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "778476198101178556": ["convolution_gpu_bfyx_gemm_like",1],
+        "14387756025635589673": ["convolution_gpu_bfyx_1x1",2],
+        "13079058582191027406": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6214677989814002369": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7307271009495440764": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "1082586642383386489": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8321204816277460837": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15488532485794545310": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "16052741298509954954": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16541970206584576833": ["convolution_gpu_bfyx_gemm_like",2],
+        "15739756988784344130": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15914058104244750036": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "7880845322716481548": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9277176009071334860": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "3438296636411972401": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16293101831324587788": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17764795635957985989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2936333406928424760": ["convolution_gpu_bfyx_1x1",2],
+        "17636500109629107732": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "10812324504777808014": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "302694026179841870": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "16135569134646688251": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "621915374938805401": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "13488495920546871271": ["convolution_gpu_bfyx_os_iyx_osv16",851],
+        "13042938686374926241": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13158449455164143947": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "12221101678609734421": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8099100633390626027": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7780336054545552428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10432365444137108781": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "2800949804770763798": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8174833187387604731": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10560559646371329711": ["convolution_gpu_bfyx_os_iyx_osv16",377],
+        "16589848737162195829": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18279927175542031567": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14363025045807200040": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "4880150897829846031": ["convolution_gpu_bfyx_1x1",2],
+        "7581174843529024536": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "14808079119439455357": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15101834579076569231": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7400937639903461446": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3914143598803149415": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12550985938092975889": ["convolution_gpu_bfyx_1x1",2],
+        "10128143628088846123": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "8767817856303586064": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "2670216237572554944": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "973966345068677905": ["convolution_gpu_bfyx_1x1",2],
+        "13618411266808159341": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13472577372534605883": ["convolution_gpu_bfyx_gemm_like",1],
+        "12745552951204330052": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17361714725103230834": ["convolution_gpu_bfyx_os_iyx_osv16",528],
+        "9996142812492415452": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4353583636655606632": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14614844213016502202": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "18359731130169236059": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1644335606100150388": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "743941460026466526": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "9942099207256025216": ["convolution_gpu_bfyx_gemm_like",1],
+        "9726913113016874092": ["convolution_gpu_bfyx_gemm_like",2],
+        "16361932270527364507": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14517191894006411358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9285202897230250613": ["convolution_gpu_yxfb_yxio_b16",2],
+        "294153950488131608": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8846314870152404018": ["convolution_gpu_bfyx_gemm_like",2],
+        "15438470456977849772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1089944493540593798": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "11031569203645035546": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10069896554844445748": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "6104380778870471127": ["convolution_gpu_bfyx_1x1",2],
+        "15052577143485630617": ["convolution_gpu_bfyx_1x1",1],
+        "13636407347458845915": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15811723176266128065": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "10572380563704942622": ["convolution_gpu_yxfb_yxio_b16",0],
+        "11669828823444745889": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "16341700680310033430": ["fully_connected_gpu_fb_io_block_fp16",1],
+        "13621339501067135142": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "6769524481210107636": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18431306649860116380": ["convolution_gpu_bfyx_gemm_like",1],
+        "12788611449571149037": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "9306120768594851497": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13609660900720370993": ["convolution_gpu_bfyx_1x1",2],
+        "9263784636194609884": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10693837788817206459": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15886016297043613632": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15178921033274918199": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2034811390140488812": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10232809153913700925": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17039993918927377002": ["convolution_gpu_bfyx_os_iyx_osv16",429],
+        "10704906466618081803": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12309955719964788034": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "9463256538942644563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11888011890096886932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1898243736289257252": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5319459637051859849": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12637509262827320678": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5853697372844744672": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "14568618538516685994": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5770286476124511234": ["convolution_gpu_bfyx_gemm_like",1],
+        "10330180429524641331": ["convolution_gpu_bfyx_gemm_like",2],
+        "6334639534663495263": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "2328919599530851492": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3349468433721705582": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5714365398623475983": ["convolution_gpu_bfyx_1x1",2],
+        "16934879647229234163": ["convolution_gpu_bfyx_gemm_like",2],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "3940619509778739158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13022797264172398260": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17010172246526353957": ["convolution_gpu_bfyx_1x1",2],
+        "15522099459864628246": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3531786338249174486": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6845814820599174031": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "15796677813117622429": ["convolution_gpu_bfyx_gemm_like",2],
+        "4342446399224806160": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1325669650629605592": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15678768217453692725": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15227189929676013024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9237587440336828595": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1680468564927032670": ["convolution_gpu_bfyx_gemm_like",1],
+        "15267084369543546013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4740864135937875560": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4839205075057964902": ["convolution_gpu_yxfb_yxio_b16",2],
+        "738850098651678143": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17266121859044814533": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9526266653688168429": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9062774198518904260": ["convolution_gpu_bfyx_gemm_like",2],
+        "4400247897123856252": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "10893628699015898230": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11845189428639322474": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "7669403041163460089": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "3056212889689424946": ["convolution_gpu_bfyx_1x1",2],
+        "5840254078917931433": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17826868890632814593": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8466986812935642059": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "12264240305528403865": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13914239937595549448": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4416793079965040181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "462240909302334133": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9162469583721135043": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13101474064130881526": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12081698011407453832": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4282756088824939292": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11315238071192463859": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16511749893955141055": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6669808855737023569": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "10782611933832492335": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "16384186388687043048": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "17490188677223978661": ["convolution_gpu_bfyx_gemm_like",1],
+        "1198893312653197535": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13075579052866074866": ["convolution_gpu_bfyx_gemm_like",2],
+        "3370082268529091875": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14887465694301281952": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10782169939706303899": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11606895513516475339": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4628748977913534701": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12473600360154597915": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "16768497046700403748": ["convolution_gpu_yxfb_yxio_b16",2],
+        "430132942408244070": ["convolution_gpu_bfyx_gemm_like",2],
+        "18121198117765854866": ["convolution_gpu_bfyx_1x1",1],
+        "7203620615363933078": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "7826406759309418010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1919535500129437217": ["convolution_gpu_yxfb_yxio_b16",2],
+        "167635075964111628": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8652128863605749877": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8115522418294960470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3830842631023415233": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17811558714592064184": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12003323477818208825": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "1626430741965136732": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "14699357144600604190": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13408839571805750778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3211829722778368758": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9406763539724266157": ["convolution_gpu_bfyx_1x1",2],
+        "16870036853278751563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3391032227732782982": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "12256193738921380409": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "17053671692908867872": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "7578177053220150569": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4280250278457269231": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2931988747601319855": ["convolution_gpu_bfyx_1x1",2],
+        "8898095926967052382": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3118602494449249177": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "15833461718320604065": ["convolution_gpu_bfyx_gemm_like",2],
+        "15078168059698267650": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "2269140636553245446": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4290840152278060614": ["convolution_gpu_bfyx_gemm_like",2],
+        "9363988379673156863": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14808759315730413993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12992061224471212714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16582761411084080015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4461989328775275994": ["convolution_gpu_bfyx_gemm_like",2],
+        "15493488989417521388": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "17970424536559595893": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12725675221990905186": ["convolution_gpu_bfyx_gemm_like",2],
+        "10701208905236219083": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4752129805031267391": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10100237101982273901": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "6131481289104111211": ["convolution_gpu_bfyx_gemm_like",2],
+        "378801963103874857": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "5422432655714154738": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12978370505631031751": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12681408370704556588": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "9073757008455674094": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10014448860206587805": ["convolution_gpu_bfyx_gemm_like",2],
+        "762634810164167963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8353259929933281349": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "69832608384091511": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14020956765444878761": ["convolution_gpu_bfyx_gemm_like",2],
+        "9120377367517042357": ["convolution_gpu_bfyx_1x1",2],
+        "15968821946892330559": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1752185056297124917": ["convolution_gpu_bfyx_1x1",1],
+        "6863331059471727622": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "3265415000818832667": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15959543980008442942": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "9091110033424983286": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "4124478505694604763": ["convolution_gpu_bfyx_1x1",2],
+        "12137340921829511472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16620268338434572068": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6102330514901613158": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9737833587413114584": ["convolution_gpu_bfyx_gemm_like",1],
+        "10006197783106691106": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "15726902746983125797": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "14337168375989245254": ["convolution_gpu_yxfb_yxio_b16",2],
+        "708201295462256406": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "7407975398526425554": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "16312739695844838884": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17188004018198554470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "142650579335909103": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "9101903304994333336": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7121708962074176240": ["convolution_gpu_bfyx_1x1",2],
+        "12268912077694742671": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17536482873064844308": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17109520309574369561": ["convolution_gpu_bfyx_gemm_like",2],
+        "10997156099709436375": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16441830491664937048": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "16944335478353845609": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "5526223938481098693": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16666792471632326054": ["convolution_gpu_bfyx_gemm_like",2],
+        "17209528805596238905": ["convolution_gpu_bfyx_gemm_like",2],
+        "8094920912208664820": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15879385408480411034": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2530317332900569142": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "17724604495865223459": ["convolution_gpu_bfyx_gemm_like",2],
+        "17479773641824222843": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "14264584839702225855": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7082007579524697455": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4633923265089466898": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "1933147648540963732": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6926590672771069689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17446505012657609153": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1237920404306733800": ["convolution_gpu_bfyx_gemm_like",2],
+        "10899110544832584656": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "7469107606686458209": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7720153213673170931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12864204111424196179": ["convolution_gpu_bfyx_1x1",2],
+        "15814015810740458605": ["convolution_gpu_bfyx_1x1",2],
+        "16988275131627316108": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "9767294641786972359": ["convolution_gpu_bfyx_gemm_like",2],
+        "13809046727894108358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14944590179685661287": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "13883044928774243663": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "11298854310398101852": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "6921081008428242060": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "8170998059688907013": ["convolution_gpu_bfyx_1x1",2],
+        "13289438471364352634": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "9456645866001656225": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1718634913016284523": ["convolution_gpu_bfyx_1x1",2],
+        "6022695488769618639": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12213908871711628660": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12577421746159122264": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "3574585436812909168": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11666250400445971335": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "4772696293208603817": ["convolution_gpu_bfyx_gemm_like",1],
+        "12514693341682532560": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "7868973874302246233": ["convolution_gpu_bfyx_gemm_like",2],
+        "14791575777969587370": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1173986078589662704": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "11455055202624479980": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14120569486714455490": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13636859714649629789": ["convolution_gpu_yxfb_yxio_b16",2],
+        "101401523793806394": ["convolution_gpu_bfyx_gemm_like",2],
+        "7498614018449036163": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "15597317305719116351": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16117448559783537844": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9477562342190423343": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "9694701402170070080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13943983517468412332": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13183380647506951324": ["convolution_gpu_bfyx_gemm_like",0],
+        "18270587701371596297": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "15924583510704449214": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "9120374653477510318": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1044978617045366709": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "16027853590391209100": ["convolution_gpu_bfyx_gemm_like",2],
+        "13576010631084066792": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7465681710653503161": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "11153522012082333137": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17248329632819747646": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12334522314915706512": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14065215389112262561": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5401380444992462053": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8300290944865904942": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10996429218747311159": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9059418187274548462": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15530407024531326375": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "9542325095876448686": ["convolution_gpu_bfyx_gemm_like",1],
+        "15428591250165788477": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "18122858611264877646": ["convolution_gpu_bfyx_gemm_like",1],
+        "9588943054777767098": ["convolution_gpu_yxfb_yxio_b16",0],
+        "12353956380178079089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7332664632757815486": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "15231987838322151865": ["convolution_gpu_bfyx_1x1",2],
+        "11626402549863483301": ["convolution_gpu_bfyx_gemm_like",2],
+        "6673966852801136416": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "10722677916294015259": ["convolution_gpu_bfyx_gemm_like",2],
+        "9951951467222189282": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1387945708447092123": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "5671289201458690944": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "7450417963648518926": ["convolution_gpu_bfyx_gemm_like",2],
+        "6312971928547466668": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "1173136780324694038": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13387545865482261974": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "11871319147579477936": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2173720698351153121": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12213354854947437262": ["convolution_gpu_bfyx_1x1",2],
+        "16588325081458426169": ["convolution_gpu_bfyx_gemm_like",2],
+        "16469493066700118274": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13119040261291835298": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12616205756849913359": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4010419602093863685": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8163000689380461611": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17158401628206867933": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14365232561737454031": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "4500107195684703428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14343008518525689150": ["convolution_gpu_bfyx_1x1",2],
+        "1075027491444288875": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "970768445746568749": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "2321767794934000238": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "7211179360844946434": ["convolution_gpu_bfyx_os_iyx_osv16",905],
+        "9468684953949274635": ["convolution_gpu_bfyx_gemm_like",0],
+        "2930898141522848681": ["convolution_gpu_bfyx_1x1",2],
+        "1142968634734769401": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15159534367247036982": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18131954418490925431": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5602377914578322577": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16781187505186394353": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15695415285791951018": ["convolution_gpu_bfyx_gemm_like",2],
+        "16772854836230971016": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "16409729623371222748": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "4121109463284708890": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "8267783192628619295": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2111669705686676421": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "17077815973022307612": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9775648000771985077": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3321251856445833973": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13161997040644039778": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12757611260347801001": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "7986797517722531256": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6109013751635776331": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8092673566670222445": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4141005390823981166": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "18243724217479803107": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16986358655784856534": ["convolution_gpu_bfyx_os_iyx_osv16",724],
+        "12248852114219058572": ["convolution_gpu_bfyx_os_iyx_osv16",905],
+        "18132952464279667664": ["convolution_gpu_bfyx_1x1",2],
+        "2162882863309264684": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "13282951481330978659": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "14675165976583799157": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3837190939606792435": ["fully_connected_gpu_fb_io_block_fp16",1],
+        "11975047184326016230": ["convolution_gpu_bfyx_gemm_like",2],
+        "10785252006948647963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15958886009743157242": ["convolution_gpu_bfyx_gemm_like",2],
+        "1218323229202187514": ["convolution_gpu_bfyx_gemm_like",2],
+        "10720769054729185991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9423854233835016530": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3141886504884887200": ["convolution_gpu_bfyx_gemm_like",2],
+        "6558436237075337721": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5010119207726811326": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "10939522663236304689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14362876471450307424": ["convolution_gpu_bfyx_1x1",2],
+        "16949056117405140365": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "13456967132681889167": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11612044653200304877": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4776685525963461501": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15728009639807698634": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "1895560603400089814": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11948858355027908365": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7541325258238317885": ["convolution_gpu_yxfb_yxio_b16",2],
+        "688897645422834994": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7333511810266504718": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "8125500765566111746": ["convolution_gpu_yxfb_yxio_b16",2],
+        "863057075064640334": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "73865742350616903": ["convolution_gpu_bfyx_gemm_like",1],
+        "9250030880535336888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11908169713247209976": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7493567975736494003": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "13207134083675064956": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "952318454591754214": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "16292848987976256449": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "1527126728636583082": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4391695940614024479": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17012832508134584917": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14697908554930995949": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "14258941821319200170": ["convolution_gpu_yxfb_yxio_b16",2],
+        "867868384380428650": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3647203315640064927": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11587239927319376658": ["convolution_gpu_bfyx_os_iyx_osv16",301],
+        "2367452220382767844": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2770397466252831892": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "8614534946699754256": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2807516818436584831": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "3515437649977762166": ["convolution_gpu_bfyx_gemm_like",1],
+        "17764033613416389758": ["convolution_gpu_bfyx_gemm_like",1],
+        "17777248703109395158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5592526760253524303": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "13633048912926365931": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9205978149692979955": ["convolution_gpu_bfyx_gemm_like",2],
+        "16925721317097534009": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3364467044587904559": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9452470718398027950": ["convolution_gpu_bfyx_os_iyx_osv16",928],
+        "18148431787172327554": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1485662490111767875": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "959260710517842876": ["convolution_gpu_bfyx_gemm_like",2],
+        "14171139920084409181": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6114241186364821679": ["convolution_gpu_bfyx_gemm_like",2],
+        "14764715930784496165": ["convolution_gpu_bfyx_gemm_like",2],
+        "15003778740401601065": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15666720796968090760": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13862199647000195451": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14689423748560749566": ["fully_connected_gpu_fb_oi_ref",2],
+        "9190054801124577726": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4818598834950786080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3894121333485095575": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5436553435132026991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4444730303823507621": ["convolution_gpu_bfyx_gemm_like",0],
+        "12526988667216482085": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3870539490799697188": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "4588117321438490483": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4834446692898125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "14058311587429063829": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3423717644513543253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10524079700393212963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6875055157295709098": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14749758365915995876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3573490922300056520": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11226912053840621089": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16958329690837977102": ["convolution_gpu_bfyx_gemm_like",2],
+        "11179211757115972103": ["convolution_gpu_bfyx_os_iyx_osv16",482],
+        "2968439898708528834": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "708747442142592697": ["convolution_gpu_bfyx_gemm_like",2],
+        "5931972000452008090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1336940384521633733": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "9542795021683486547": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15578456771467281881": ["convolution_gpu_bfyx_gemm_like",1],
+        "14742998604680438008": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "11612209645710419427": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11759426200341586247": ["convolution_gpu_bfyx_os_iyx_osv16",1],
+        "8456185296386225533": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7178866013527118649": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3503893875515897267": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4023281997496669037": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12411075288896909468": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6286349307417232815": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10531218595816974659": ["convolution_gpu_bfyx_gemm_like",2],
+        "9654944848074437064": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "17868834743037242721": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17207560805775399864": ["convolution_gpu_bfyx_gemm_like",1],
+        "10070051133200561606": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18027243127893440568": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13598062803968442253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12417253210787537988": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7208008921815475393": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7397376454528841634": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3924212595662208655": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2939605281692583169": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "16653412888821076903": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15805087418686802636": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "1972879521448306536": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "10055549084854766170": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "14311888412221174224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1498389965422474930": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12339692995143159283": ["convolution_gpu_bfyx_gemm_like",2],
+        "4849343880559509889": ["convolution_gpu_bfyx_1x1",2],
+        "2296581485980163665": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15438530452161762045": ["convolution_gpu_yxfb_yxio_b16",0],
+        "8837721075413149240": ["convolution_gpu_bfyx_gemm_like",1],
+        "6126579157025017808": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10995886682834858002": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "11685571068419983048": ["convolution_gpu_bfyx_1x1",2],
+        "18186612931984342471": ["convolution_gpu_yxfb_yxio_b16",2],
+        "632116056424249698": ["convolution_gpu_bfyx_gemm_like",1],
+        "10486348549691280032": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "530973311459168543": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "7206226541369793931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9562527071055150197": ["convolution_gpu_bfyx_1x1",2],
+        "5852569526295779497": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9433162648796382333": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6988492019664525206": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "15078262396281327048": ["convolution_gpu_bfyx_gemm_like",1],
+        "16111630594575598044": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13200834963067135502": ["fully_connected_gpu_fb_oi_ref",1],
+        "741727668385951462": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11359409533744011242": ["convolution_gpu_bfyx_gemm_like",2],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "16535378085465418910": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15770767768674603174": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7247475218645942682": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1141277975467180549": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "12181889163404078773": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "3934090072734175564": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14769111376729628572": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13124342334495538095": ["convolution_gpu_bfyx_gemm_like",2],
+        "10184417796355593956": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4079026972040047969": ["convolution_gpu_bfyx_gemm_like",2],
+        "17848582668902427291": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12455871938978342189": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16585502133291740543": ["convolution_gpu_yxfb_yxio_b16",2],
+        "223412492545617963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17640725195881101275": ["convolution_gpu_bfyx_gemm_like",2],
+        "18322435770607273817": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5349415632630235233": ["convolution_gpu_bfyx_1x1",2],
+        "11596971301790598405": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17195293614280872622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4982549855424649217": ["convolution_gpu_yxfb_yxio_b16",0],
+        "2191416057399400794": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "8931469268093714938": ["convolution_gpu_yxfb_yxio_b16",2],
+        "951747146164097188": ["convolution_gpu_bfyx_1x1",2],
+        "17983556812075120553": ["convolution_gpu_bfyx_1x1",2],
+        "2542112741645712811": ["fully_connected_gpu_fb_io_block_fp16",1],
+        "5497751772699578150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8317673282128335201": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "1662588605309237309": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14646141746558153748": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3221469860582147955": ["convolution_gpu_bfyx_gemm_like",2],
+        "252188028702250668": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17635171685500922207": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "10322427853063201289": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "18432421400879260832": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "7612288596055048389": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16992405636352406660": ["convolution_gpu_bfyx_gemm_like",2],
+        "6014752258124559691": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11822555173696078282": ["convolution_gpu_bfyx_gemm_like",1],
+        "2531597468539205600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4708035980731751007": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "4006884370026272807": ["convolution_gpu_bfyx_gemm_like",2],
+        "9165817820007469505": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18093895673012393740": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1171681987783013074": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "42935035304560876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2744566213784972700": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9815961128076948768": ["fully_connected_gpu_fb_io_block_fp16",0],
+        "5834825835421819800": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12727541507197887360": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "5328004363712610999": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11824946481875102910": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9523941899498458600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9999553425206328238": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "10914921540144371519": ["convolution_gpu_bfyx_gemm_like",1],
+        "9545968464906009869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8780671766122887951": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9659837320293869285": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7600034850149968684": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15581997249051127645": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "1787152688807233651": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9309173544512377803": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8257103926661643451": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2715447739580688669": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "8260073247636023575": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13124659308711651699": ["convolution_gpu_bfyx_gemm_like",2],
+        "15612334131144235342": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3635446784873718932": ["convolution_gpu_bfyx_gemm_like",2],
+        "269167598200943915": ["convolution_gpu_yxfb_yxio_b16",2],
+        "314054598858070952": ["convolution_gpu_bfyx_gemm_like",2],
+        "1802510952374368682": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7469127846325904854": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "8106738346643994005": ["convolution_gpu_bfyx_gemm_like",2],
+        "15337841577110104431": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7590390572139249734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14918482938530107806": ["convolution_gpu_bfyx_gemm_like",1],
+        "2783577080556699089": ["convolution_gpu_bfyx_gemm_like",1],
+        "16888412539296862194": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "5449117614287394433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18017913952946745878": ["convolution_gpu_bfyx_gemm_like",1],
+        "8999570321113443117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16347412180100581330": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3463206409786541741": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1354199155380786906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3894130445933963911": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1634884284544380004": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12818786388125465101": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14847662630748580880": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10961696014697611547": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "13493119419114659706": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12776081190690731910": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "7440953406601377619": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7650375560336513366": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "14403132596827435096": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "18135307303959376082": ["convolution_gpu_bfyx_gemm_like",2],
+        "2116913943188857359": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "11120846960057008937": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "3019864917236424168": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3806761527342944195": ["convolution_gpu_bfyx_gemm_like",2],
+        "11254744277059719812": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3011188207492335920": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16120120950870908964": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7349880498513046830": ["convolution_gpu_bfyx_1x1",2],
+        "13144385730409574259": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "16548491024653039967": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "15188570678726970998": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "8609939102588915855": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "12926382190254407283": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3859139031732555228": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8792202318168046223": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1518270620354036926": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "447943521999310356": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14043064718932538557": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16475247464223458061": ["convolution_gpu_bfyx_gemm_like",2],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "60267878504897170": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "7565867291827884997": ["convolution_gpu_bfyx_gemm_like",2],
+        "15106614232165315070": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "2369451367723962073": ["convolution_gpu_bfyx_1x1",2],
+        "2114599010013594942": ["convolution_gpu_bfyx_gemm_like",1],
+        "7815650257256675477": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "17617204422090117691": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5003718302026277632": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "12896226291465522304": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11317843493537672866": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11972290239275366299": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9735280865199145311": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3172518362830684966": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12933253554354951910": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "8527055001340219573": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10565371760124443824": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "637115537820955017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15303251546207338960": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15961487889420208188": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "6511742759171254447": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16924006268301179157": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "3699344686791530101": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4138968242532400395": ["convolution_gpu_bfyx_gemm_like",1],
+        "5330130011321223525": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2477849395789783501": ["convolution_gpu_bfyx_gemm_like",2],
+        "16667887002111125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "8532217744217419503": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12319073009094248232": ["convolution_gpu_bfyx_gemm_like",2],
+        "13833960927635646899": ["convolution_gpu_bfyx_gemm_like",1],
+        "12825407709419526493": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6294240435687565243": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "12379166764490359144": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13676654389512816868": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6118737381591369532": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12793347723828876280": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11263540528012919947": ["convolution_gpu_bfyx_1x1",2],
+        "14544219140091420262": ["convolution_gpu_bfyx_gemm_like",1],
+        "1157069349112113377": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13861223834466385546": ["convolution_gpu_bfyx_gemm_like",1],
+        "3385797925880519845": ["convolution_gpu_bfyx_1x1",2],
+        "7432142107544210174": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "548663565933738403": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "15398380328746287438": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "5291817530552764387": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3231651468686543808": ["convolution_gpu_bfyx_os_iyx_osv16",528],
+        "1088710562928089772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9414927552739380436": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7565006185780806333": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "1497560475414454618": ["convolution_gpu_bfyx_gemm_like",0],
+        "768820004084041271": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14346466672686303107": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14416897092729861207": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18209930746627816139": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13531892014108749846": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7463517383354309469": ["convolution_gpu_bfyx_gemm_like",0],
+        "17030051116023319382": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11497761673211348612": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10555835101752189454": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4644580321919256401": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "11971736882960844905": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14175962333785791005": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12131461096501477069": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8640150341228170279": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10488269059469838160": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "7264274394359484318": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "12771841901357553928": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14813178380338948912": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "2576773809294607971": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6685985905221810743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2487679091192300910": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2863465257341735941": ["convolution_gpu_bfyx_1x1",2],
+        "5374664689223295796": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "6141193842171342687": ["convolution_gpu_yxfb_yxio_b16",2],
+        "844576097677576405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "290134020607738418": ["convolution_gpu_bfyx_gemm_like",1],
+        "6692085187697087807": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "17634966178519099371": ["convolution_gpu_bfyx_1x1",2],
+        "6101196122606108273": ["convolution_gpu_bfyx_gemm_like",1],
+        "17224655686568797096": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14116800584981026541": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "17651477639302255490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "4651261398203912503": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "1638858323987412931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "577842450575835175": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3156783219125679946": ["convolution_gpu_bfyx_1x1",2],
+        "9131183544020825260": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "17587625589456309495": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17133376737554844449": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "10055247339012492459": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "18164706399147697716": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3819990462129075757": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7927587739463421727": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11732321796147239597": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11795826875463204296": ["convolution_gpu_bfyx_1x1",2],
+        "15747571668131081693": ["convolution_gpu_yxfb_yxio_b16",0],
+        "574869992355132069": ["convolution_gpu_bfyx_gemm_like",2],
+        "11724225282274130518": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "3061372669831947873": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4190912926126844643": ["convolution_gpu_bfyx_1x1",2],
+        "11759322316883943989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14280056365441354869": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6123707371654753818": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8039645104667120991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3863816884636503247": ["convolution_gpu_bfyx_gemm_like",1],
+        "2295659951331099829": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11585430081839020501": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11079061135559995449": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15192022454507415969": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2014114949154914483": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "9434143681116089888": ["convolution_gpu_bfyx_gemm_like",2],
+        "16096353398003405565": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2789901295967374316": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9439431829175743345": ["convolution_gpu_bfyx_gemm_like",1],
+        "11694428890484758107": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8542782888102516498": ["convolution_gpu_yxfb_yxio_b16",2],
+        "946479876892100082": ["convolution_gpu_bfyx_gemm_like",1],
+        "13734043898517059207": ["convolution_gpu_bfyx_gemm_like",2],
+        "6664482192233202590": ["convolution_gpu_bfyx_gemm_like",2],
+        "6267138247577676996": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13199672084171648305": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10923480230259977438": ["convolution_gpu_bfyx_1x1",2],
+        "15181987458871339815": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "8710469645764612897": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "10672380526821947133": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "10626018319543075871": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11086464266772450142": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2085467192625870436": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "12287667143602938393": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "8155268141318893606": ["convolution_gpu_bfyx_gemm_like",1],
+        "2668729552208169959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17763347648779573375": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11356842300444410831": ["convolution_gpu_bfyx_os_iyx_osv16",659],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "18244966393978155130": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3114210363452108737": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14339479547451422762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16674633029045714564": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17054207561525574617": ["convolution_gpu_yxfb_yxio_b16",2],
+        "900243696733233996": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16084700435355748612": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "4298242568890525997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11823205954749139338": ["convolution_gpu_bfyx_gemm_like",2],
+        "15421280195211166867": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13957350536347764705": ["convolution_gpu_bfyx_gemm_like",2],
+        "2803569867265035123": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "6340128090694375876": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4999171487916568471": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "7748514992101811029": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11892455357792445192": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7762916621662364082": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17462996923473002801": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7201521533301617290": ["convolution_gpu_bfyx_gemm_like",1],
+        "6288489890578212082": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10645625090439446714": ["convolution_gpu_bfyx_gemm_like",2],
+        "4169042131399110713": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14884315147107686805": ["convolution_gpu_bfyx_gemm_like",2],
+        "6307939332939714967": ["convolution_gpu_bfyx_1x1",2],
+        "17277917672233464304": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2204178900998688268": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16789245987103323406": ["convolution_gpu_bfyx_gemm_like",2],
+        "15727611564408173858": ["convolution_gpu_bfyx_gemm_like",2],
+        "5159738930501638535": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10179916356323479080": ["convolution_gpu_bfyx_gemm_like",2],
+        "15757351352532908153": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "14283458015244508428": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "486816652607164926": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1310498917952637709": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14553577436929219470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7233783054884565746": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "5124080536266387783": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6548949901446632697": ["convolution_gpu_bfyx_1x1",2],
+        "3534971503826416049": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "11800783548769329949": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "9814647153117279415": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14122213471825630433": ["convolution_gpu_bfyx_gemm_like",1],
+        "7755177205197405275": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "15317946705199574301": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8210092359850191682": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1784892318069674949": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8398910340371320955": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5965451243366505522": ["convolution_gpu_bfyx_gemm_like",1],
+        "3160543867929843861": ["convolution_gpu_bfyx_1x1",2],
+        "14770895149190975433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6816632607384969096": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2817919813339364130": ["convolution_gpu_bfyx_gemm_like",1],
+        "10128120599276549920": ["convolution_gpu_bfyx_1x1",2],
+        "6253009218981124949": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5720964268093705079": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "1379758215293949563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15209909241815414156": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "12686330321897091505": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11337525286386930242": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "9169935203300589222": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10019470094545733255": ["convolution_gpu_bfyx_gemm_like",1],
+        "5507373575763339429": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6910589963488897537": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16567486018945740036": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4889188980319017094": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "10292349730148518173": ["convolution_gpu_bfyx_os_iyx_osv16",694],
+        "3034482898462686729": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "1161304401293419103": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11856815095538913065": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7995820969034996638": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "1497127399271219422": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4445913285957791409": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1643122514049603104": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "75120034961995929": ["convolution_gpu_yxfb_yxio_b16",2],
+        "123026136670202868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3444250649099578792": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15863531785836309247": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17015328096102652908": ["convolution_gpu_bfyx_gemm_like",1],
+        "16681690088928624738": ["convolution_gpu_bfyx_gemm_like",2],
+        "7979265448683159733": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8792010676469476740": ["convolution_gpu_bfyx_gemm_like",2],
+        "8302886228681027388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6479042072492268780": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15531306520021286502": ["convolution_gpu_bfyx_gemm_like",2],
+        "15065925414996398951": ["convolution_gpu_bfyx_1x1",2],
+        "15813044197987178947": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "12430677767405883160": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12177387334053203378": ["convolution_gpu_bfyx_gemm_like",2],
+        "742689192890486807": ["convolution_gpu_bfyx_gemm_like",2],
+        "787203599734115483": ["convolution_gpu_bfyx_1x1",0],
+        "12768933181342249823": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "14963614790718019676": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12275528180752359999": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10576856554114055028": ["convolution_gpu_bfyx_gemm_like",2],
+        "2226745622763268469": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "7603319690872333930": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12867038076564517306": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2728938624042183713": ["convolution_gpu_bfyx_gemm_like",2],
+        "14262482011051329729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1798440805196304745": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",0],
+        "16513038896689318072": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15464554714318666871": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5751283221740229986": ["convolution_gpu_bfyx_gemm_like",2],
+        "4792351255949877935": ["convolution_gpu_bfyx_gemm_like",2],
+        "4242173940230902960": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5525691792821548743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3988024997010367546": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "7972861956906521660": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16728762255357411770": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "14670339865153970893": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7861119251077361882": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11421180829679625737": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "12004628115138530335": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14526262781657292025": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13051390418571971928": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15487730714504758208": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "1711220333751274603": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "12393385058735194260": ["convolution_gpu_bfyx_gemm_like",2],
+        "9871407256481442790": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6808843088626121909": ["convolution_gpu_bfyx_gemm_like",2],
+        "4251673416603443503": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6250785177115691293": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10532183096485321729": ["convolution_gpu_bfyx_1x1",2],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "563440246018637010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9909564412554801760": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14085753024976995311": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16182470664818268848": ["convolution_gpu_bfyx_gemm_like",1],
+        "5266313052389515491": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3114869763557037270": ["fully_connected_gpu_fb_oi_ref",2],
+        "7792512829747836997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8505040075968411726": ["convolution_gpu_bfyx_gemm_like",1],
+        "12850195004093999773": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11279789373735965856": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16561618767117193109": ["convolution_gpu_bfyx_1x1",2],
+        "11451740938287179908": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "14025235562200209723": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3571959174116404960": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "1427040855295681285": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7614673554809134631": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7628077869220463202": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "5245526691775741296": ["convolution_gpu_bfyx_gemm_like",1],
+        "15859493313686060349": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "15551338663759394064": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7056293586529818253": ["convolution_gpu_bfyx_gemm_like",2],
+        "12871555773123368130": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "17876939980356283351": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14968401410355925289": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12407890437443790515": ["convolution_gpu_bfyx_gemm_like",0],
+        "15271783562528081169": ["convolution_gpu_bfyx_gemm_like",2],
+        "7861234698413147249": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14034525799882831106": ["convolution_gpu_bfyx_gemm_like",2],
+        "13434576226708227155": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "14031009077471784948": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15967614281807823696": ["convolution_gpu_bfyx_gemm_like",2],
+        "11738360883999461965": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1658174263018326745": ["convolution_gpu_yxfb_yxio_b16",2],
+        "697333686114567307": ["convolution_gpu_bfyx_gemm_like",2],
+        "13025361884606488732": ["convolution_gpu_bfyx_gemm_like",2],
+        "522313477023837056": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14273849038400888518": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3101087806792514129": ["convolution_gpu_bfyx_1x1",2],
+        "17737878867906137388": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "15858356755924943957": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5705056256080522960": ["convolution_gpu_yxfb_yxio_b16",2],
+        "636447309806530300": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10309986238001994183": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15693204620575485046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4995051972576749717": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9412392168031560549": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5649150695527000655": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13082313288887957490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16610284927818475574": ["convolution_gpu_bfyx_gemm_like",2],
+        "17917978116807564183": ["convolution_gpu_bfyx_gemm_like",0],
+        "7106362077449435105": ["convolution_gpu_bfyx_gemm_like",0],
+        "11334122788337402526": ["convolution_gpu_bfyx_1x1",1],
+        "12065769091972094756": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "17580363505072477558": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6934915634718835911": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15594673952484539994": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "6055793483770886264": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8519354640245415816": ["convolution_gpu_bfyx_gemm_like",1],
+        "108442764389420633": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13187657215288939912": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8619526128410675593": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2447893458816856522": ["convolution_gpu_bfyx_gemm_like",2],
+        "2644054989263429508": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9521715904587435700": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4936961129835214448": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16209868158768307271": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "14193777296032212476": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6664432489777052771": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12179968379663737450": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16014822406751503249": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "17264608538692763688": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9314293064351558241": ["convolution_gpu_bfyx_gemm_like",2],
+        "4090512597925170883": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2777318471329665162": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8613740762403897614": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14808895254077106198": ["convolution_gpu_bfyx_gemm_like",2],
+        "10465119306486335226": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4116610956045302817": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12960590161485806657": ["convolution_gpu_bfyx_gemm_like",2],
+        "6723804327185132790": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "10811224523636009881": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7552544688541855979": ["convolution_gpu_bfyx_gemm_like",2],
+        "8561261337239934159": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "5184121466994451498": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16307464696265537356": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "2819475920524949313": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14501815053459103515": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18161971781834208343": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "9518071423184197213": ["convolution_gpu_bfyx_gemm_like",0],
+        "17487594336237597163": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10113696658040720628": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6624079551747071383": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3449007266907948591": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5656320098721954644": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5109636469531439569": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3658599312236344017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9305861997313663528": ["convolution_gpu_bfyx_gemm_like",1],
+        "16579057939215877904": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "16428789154716792138": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1390379098099686972": ["convolution_gpu_bfyx_1x1",2],
+        "9065137335863605013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17019474731460049248": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6808980404170272597": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "15457040168177954463": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13398986810666238552": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8976966933427522253": ["convolution_gpu_bfyx_gemm_like",0],
+        "187352687850707150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11988546375476924356": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "3509487327001107638": ["convolution_gpu_bfyx_gemm_like",1],
+        "151851883170419907": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3080612075440389053": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13526488884846845330": ["convolution_gpu_bfyx_gemm_like",2],
+        "16617569629839911513": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2527018855890902975": ["convolution_gpu_bfyx_gemm_like",1],
+        "12641170321047008726": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "9280279544075738476": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7213383384662748578": ["convolution_gpu_yxfb_yxio_b16",2],
+        "568191462231494113": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13182623473102074079": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "7683334381958571864": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "13835859040765465258": ["convolution_gpu_bfyx_gemm_like",0],
+        "9184275066167601343": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "12866217660635921034": ["convolution_gpu_bfyx_gemm_like",1],
+        "14184895905338394239": ["convolution_gpu_bfyx_gemm_like",2],
+        "13058026769607428653": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7226002258982605405": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "17893696934478535385": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9955816463820554626": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9606639214735570069": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "6715523440337925186": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5179013491581036103": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12782932626966309185": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "14230493618724018658": ["convolution_gpu_bfyx_gemm_like",2],
+        "17491825380936802930": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1117729599102132243": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16210934187492210542": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7274647463152753603": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7900926714874404219": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11010673493295430801": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7364084475361144967": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5751553671208192963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5269172622193124300": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "10982526068861394162": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14359026450472189405": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3107655421406621915": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12004552919019936392": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12311849904266608701": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11280672272221124024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9541630719145326121": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "3383222668132648804": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "18008552719153887303": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "5106072383853469966": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16563030700888982979": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17538518333907257868": ["convolution_gpu_bfyx_gemm_like",2],
+        "13902214851539825156": ["convolution_gpu_bfyx_gemm_like",0],
+        "1594829714229111215": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "14461365896122393071": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14445031303145992349": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "16912738776771289379": ["convolution_gpu_yxfb_yxio_b16",2],
+        "860852602930021016": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15800447082078291243": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "8611873585228858719": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3366647240745174769": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "14331658870024759698": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "2575631797904040925": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "12989677691575632174": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3117673619907511009": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "959666756751640874": ["convolution_gpu_yxfb_yxio_b16",2],
+        "731825454731954517": ["convolution_gpu_bfyx_gemm_like",2],
+        "9736684300833719045": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7425369489110576363": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5926747396493954633": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12850044341631872743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10864011008000364415": ["convolution_gpu_bfyx_1x1",2],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "17036482252028102703": ["convolution_gpu_bfyx_os_iyx_osv16",51],
+        "15586047342916704364": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "9366100787108468082": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6666210546769702280": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "2753393184265405425": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "60509335250891515": ["convolution_gpu_bfyx_gemm_like",2],
+        "15669242195570440840": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4121535611334103359": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2450251936650841836": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "14963449045970262346": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5464801565268066541": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7075659071934895087": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "15138641310139776109": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13155570698198686211": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13590444711975157776": ["convolution_gpu_bfyx_gemm_like",1],
+        "264466528528245004": ["convolution_gpu_yxfb_yxio_b16",2],
+        "537074122417021898": ["convolution_gpu_bfyx_gemm_like",2],
+        "13637537549252005181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8117638644045799192": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "12829916847670789556": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11299021927882809469": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11096750581455917678": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14483314305369207554": ["convolution_gpu_bfyx_1x1",2],
+        "15949311219856917559": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "16129682385980878760": ["convolution_gpu_yxfb_yxio_b16",2],
+        "592245952014430043": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "2147896649835170790": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17480277135590489472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15223779293313750042": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13264617841270329349": ["convolution_gpu_bfyx_1x1",2],
+        "2060161076370553192": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7953255701516490034": ["convolution_gpu_bfyx_os_iyx_osv16",774],
+        "6846760451124717672": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3872151366780051246": ["convolution_gpu_bfyx_gemm_like",1],
+        "6556424924189200804": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "6713554643048248003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12984970933638742657": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "5898740235388207878": ["convolution_gpu_bfyx_1x1",2],
+        "3797986765970777456": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17342758321852264926": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "8431759922045602848": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2581014920570427861": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10635659193402005820": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "6677367803113594603": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11939914680143672459": ["fully_connected_gpu_fb_oi_ref",1],
+        "14885031472057965707": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12165079289914715018": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "1330337530094825121": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9906138392975645747": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16955829428734830876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15974208269240775349": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1235864574444794315": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8921636651939679647": ["convolution_gpu_bfyx_1x1",1],
+        "2706024586717944825": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6911215749850066204": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "13598984763955239116": ["convolution_gpu_bfyx_os_iyx_osv16",1098],
+        "6880746917399866285": ["convolution_gpu_bfyx_gemm_like",2],
+        "3622778166646258015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15998609626878578708": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10117092543913369513": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15187035463799513424": ["convolution_gpu_bfyx_1x1",2],
+        "4133424990380177132": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "5393510569127725391": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "9641089659148164809": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12071914115316550349": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "4727628999533330347": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6128157319666849074": ["convolution_gpu_yxfb_yxio_b16",2],
+        "848735117501914374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "816527348871309530": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6800893510381991731": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "6210866413385292851": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "12624762527234542946": ["convolution_gpu_yxfb_yxio_b16",2],
+        "518733575377143679": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1270307036687208396": ["convolution_gpu_bfyx_gemm_like",1],
+        "5541365322085427177": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4633763257197651352": ["convolution_gpu_yxfb_yxio_b16",2],
+        "501138469231848694": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3567607339495161307": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8794896449397768269": ["convolution_gpu_bfyx_gemm_like",2],
+        "12531880391016521628": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11682041005124075890": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9778670810863940690": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9116620473576064051": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1734769856106746136": ["convolution_gpu_yxfb_yxio_b16",2],
+        "938848188161536107": ["convolution_gpu_bfyx_1x1",0],
+        "7104309382120208659": ["convolution_gpu_bfyx_gemm_like",2],
+        "8409488188696700816": ["convolution_gpu_bfyx_gemm_like",2],
+        "14304497513584420080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15617599138946168772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13962189339706230770": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16404362308829952450": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16128152634974034731": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1982176363226079588": ["convolution_gpu_bfyx_gemm_like",2],
+        "361497145093734608": ["convolution_gpu_bfyx_gemm_like",2],
+        "1596353239542510685": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "11693134363909241514": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14819324687394700033": ["convolution_gpu_bfyx_1x1",2],
+        "13145474177271090694": ["convolution_gpu_bfyx_gemm_like",2],
+        "15489746763312425915": ["convolution_gpu_bfyx_gemm_like",2],
+        "3285968426413869315": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "9488453013746383896": ["convolution_gpu_bfyx_gemm_like",0],
+        "2527276292172180386": ["convolution_gpu_bfyx_gemm_like",2],
+        "677249604491773387": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4291531885506213180": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2048528188026477374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14151747022287993729": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4240975186599864955": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5538883245745495145": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "13530377297525480029": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11988285441493553006": ["convolution_gpu_bfyx_gemm_like",2],
+        "4683320313995550908": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4165926748138587705": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16851716501872033211": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "11148428797294511280": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5050273611519516510": ["convolution_gpu_bfyx_gemm_like",2],
+        "13668940862847596363": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11563334365673075610": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13040213971461407125": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7800015766976654402": ["convolution_gpu_bfyx_gemm_like",0],
+        "10544411879329675593": ["convolution_gpu_bfyx_os_iyx_osv16",765],
+        "8941904405273405481": ["fully_connected_gpu_fb_io_b8_f8_vload",0],
+        "14349625788399542568": ["convolution_gpu_bfyx_gemm_like",1],
+        "13507437548205340054": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9383182168277796969": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "2609454334520044465": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "3244675355773468991": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "14616413139039308367": ["fully_connected_gpu_fb_oi_ref",1],
+        "13854845390344305906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8956566633622104099": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10404725818204494388": ["convolution_gpu_bfyx_gemm_like",2],
+        "6232452664016831516": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4398371999113956082": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "1122856374602590533": ["convolution_gpu_bfyx_1x1",2],
+        "5924698731432597368": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "17616719165728687438": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5498839261395459224": ["convolution_gpu_bfyx_gemm_like",1],
+        "9372916528346260712": ["convolution_gpu_bfyx_gemm_like",0],
+        "7113777272518482528": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "7375461241315602473": ["convolution_gpu_bfyx_gemm_like",2],
+        "16516262096533373158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2133236128630074068": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4278280309700908015": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "12476381811279163147": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4883588237027084166": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10626341369865893888": ["convolution_gpu_bfyx_gemm_like",2],
+        "12816344078518706065": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16910952799476896905": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1354647381212852890": ["convolution_gpu_bfyx_1x1",2],
+        "8567667881970262923": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "8655739705298627602": ["convolution_gpu_bfyx_gemm_like",2],
+        "18180655791734632264": ["convolution_gpu_bfyx_gemm_like",2],
+        "17399728556634171321": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3738514326459749974": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7454366978268164047": ["convolution_gpu_bfyx_gemm_like",2],
+        "12118387933632797428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6550549654706796887": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3224352307778512793": ["convolution_gpu_bfyx_gemm_like",1],
+        "15449715596597016714": ["convolution_gpu_bfyx_gemm_like",1],
+        "11775265110573621330": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "13723434004563378589": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4021558014531645922": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "1018687388655376483": ["convolution_gpu_bfyx_gemm_like",1],
+        "17310409067211414565": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "17987739992848266169": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6792281830591233968": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13585916416233680276": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5942742563827424666": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12068974703657294908": ["convolution_gpu_bfyx_1x1",2],
+        "13486084204140096478": ["convolution_gpu_bfyx_gemm_like",1],
+        "6870942166356599956": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "10308113903347312964": ["convolution_gpu_bfyx_gemm_like",2],
+        "10717031088082350652": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8260024340787818709": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12134858519320245809": ["convolution_gpu_bfyx_1x1",2],
+        "8494725779002762049": ["convolution_gpu_bfyx_gemm_like",0],
+        "8177017967170389275": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15483343060578660278": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10174346112533671798": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11727227430687227444": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4885504197789468842": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5020763861388859254": ["convolution_gpu_bfyx_gemm_like",1],
+        "12026482841341343242": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12384317536636082264": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "14054116974002669018": ["convolution_gpu_bfyx_1x1",1],
+        "2625969259447793593": ["convolution_gpu_bfyx_1x1",2],
+        "3835387982926010630": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7369109502608631066": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10979362792894404338": ["convolution_gpu_bfyx_gemm_like",2],
+        "7824075236081312706": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17638753020411096694": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2759142157812694203": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10071449674652717890": ["convolution_gpu_bfyx_gemm_like",2],
+        "5649082203775427830": ["convolution_gpu_bfyx_gemm_like",2],
+        "14215394208930955062": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16161112020028389294": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14091610802555875119": ["convolution_gpu_bfyx_gemm_like",2],
+        "17830290099875088207": ["convolution_gpu_bfyx_gemm_like",1],
+        "7818381040882768404": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "14469011068777098822": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "8069537351442302814": ["convolution_gpu_bfyx_os_iyx_osv16",266],
+        "10577357333308653027": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "597073780328219388": ["convolution_gpu_bfyx_gemm_like",2],
+        "15879172437519876393": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9207799012657103903": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "2149299205144202701": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6942606834115081953": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5421397731090158382": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10309586646776223605": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3820661057776133570": ["convolution_gpu_bfyx_1x1",2],
+        "16290551573997593168": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "11051684565403294370": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11208625628954179200": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5516518048239364231": ["convolution_gpu_bfyx_os_iyx_osv16",854],
+        "16950925976172895196": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16995444341569389342": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "10588059104387338398": ["convolution_gpu_bfyx_os_iyx_osv16",57],
+        "4046830923427667342": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "8497468192424557348": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2199167704280374654": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9999543693712389402": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10572945270796129630": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "7051238664181857633": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "9436893310034662243": ["convolution_gpu_bfyx_gemm_like",1],
+        "12995903177757437362": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "13767500791267563349": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15310474203328198827": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11872943152839631823": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "15048584393463312977": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "11070968498963106073": ["fully_connected_gpu_fb_io_block_fp16",0],
+        "7168028033666253263": ["convolution_gpu_bfyx_gemm_like",2],
+        "7412772553395852003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8021915447462898777": ["convolution_gpu_bfyx_gemm_like",0],
+        "10833423331830484028": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8717456809499914445": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4897448054295474302": ["convolution_gpu_bfyx_gemm_like",2],
+        "18384215264061386089": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "10771803503544737080": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "17364712285968437405": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "5721096633060535553": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1458615259705605525": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "16027456210394993913": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "2882493407831196579": ["fully_connected_gpu_fb_io_block_fp16",0],
+        "2149582237161177965": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "12480527132372884168": ["convolution_gpu_bfyx_1x1",0],
+        "14117801387057507639": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "2583562092192709891": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4848143712599565301": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4126895998426674411": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4571404165794634411": ["convolution_gpu_bfyx_1x1",2],
+        "17713666626443142908": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6467251764899975676": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "230697511447695268": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6959692641873234850": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6469003096932778978": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "5339985303398206057": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "10803929517111130153": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "7084646429975006971": ["convolution_gpu_bfyx_1x1",2],
+        "4914435717288687793": ["convolution_gpu_bfyx_1x1",2],
+        "6203765709597125063": ["convolution_gpu_bfyx_gemm_like",2],
+        "11361202190524990711": ["convolution_gpu_bfyx_os_iyx_osv16",882],
+        "7104266560248570112": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9504349455215835807": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "1454014148777456006": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14689812157592240007": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5008541841892687897": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6391201577234440562": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "3780320160034246719": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "13731797251725972855": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7107677063657303327": ["convolution_gpu_bfyx_1x1",2],
+        "5632958791318880428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8439950151963452285": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "4387041763614917736": ["convolution_gpu_bfyx_gemm_like",1],
+        "149810021216592597": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10865695385270390803": ["convolution_gpu_bfyx_os_iyx_osv16",266],
+        "3372770576629463160": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "18059267466971880386": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "8506271633579173639": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12762301414049772746": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15418732002117930760": ["convolution_gpu_yxfb_yxio_b16",2],
+        "603883331897298932": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1338581414403268264": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2603233376890892194": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12656228464579497510": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3986429358782189117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3499106702307464480": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "7223737889890738294": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6388086351909447495": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11291868421122092629": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13485300684443803732": ["convolution_gpu_bfyx_os_iyx_osv16",985],
+        "6992073477131490452": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "18184621367843960190": ["convolution_gpu_bfyx_gemm_like",2],
+        "6613116267521819997": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10525462454857911293": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15235409162483701027": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "13326339730522937517": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10090036431487700311": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "345043289576587800": ["convolution_gpu_bfyx_1x1",2],
+        "10406201782146034797": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11070446574652704629": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6018481198468872040": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16247399911710810038": ["convolution_gpu_bfyx_gemm_like",1],
+        "13368203360773949292": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4455369117448405874": ["convolution_gpu_bfyx_1x1",2],
+        "14236681916032484600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4678607855896512523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3350601287664242323": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "14147460733160099960": ["convolution_gpu_bfyx_gemm_like",1],
+        "3965327578193694832": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5429130923188159806": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "16616945998593626851": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16626502801066228405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12625112690264223217": ["convolution_gpu_bfyx_gemm_like",2],
+        "10722782762733112118": ["convolution_gpu_bfyx_1x1",2],
+        "14930789530046665855": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "1186545671730357033": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "1875764913306932583": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2554991397391195611": ["convolution_gpu_bfyx_gemm_like",2],
+        "2282123636764935353": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6371463287631658789": ["convolution_gpu_bfyx_gemm_like",2],
+        "12193395770362986433": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "2737064424879246276": ["convolution_gpu_bfyx_gemm_like",2],
+        "5012013738970489338": ["convolution_gpu_bfyx_1x1",2],
+        "15710826363434377015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16889886654893884746": ["convolution_gpu_bfyx_1x1",2],
+        "4329042569031331949": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5420215220876162902": ["convolution_gpu_yxfb_yxio_b16",1],
+        "824911124897042617": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4208702365182336507": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13092232276822302626": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "5963901433137582265": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16527840366172690992": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17147293671640396193": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "415232223198122046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "768720470104458759": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "1984152634309440563": ["convolution_gpu_bfyx_gemm_like",2],
+        "5546447512898130524": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14671212883301405408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6964383468476265892": ["convolution_gpu_bfyx_1x1",2],
+        "15643135666029727865": ["convolution_gpu_bfyx_gemm_like",2],
+        "7100056605355325582": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8611710048909301596": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16895523130717954500": ["convolution_gpu_yxfb_yxio_b16",2],
+        "70580716590540876": ["convolution_gpu_bfyx_gemm_like",1],
+        "9222744127882324405": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "5250257911846706612": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9096495972770198040": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16936968151775497887": ["convolution_gpu_bfyx_gemm_like",2],
+        "11666226259183201584": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15980348884716629349": ["convolution_gpu_bfyx_gemm_like",1],
+        "14082448162400225052": ["convolution_gpu_bfyx_1x1",2],
+        "14385148066232093878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9714508918051740792": ["convolution_gpu_bfyx_gemm_like",1],
+        "7964396197946740183": ["convolution_gpu_bfyx_os_iyx_osv16",904],
+        "8655315308767111198": ["convolution_gpu_bfyx_1x1",2],
+        "4732226322522411018": ["fully_connected_gpu_fb_io_block_fp16",0],
+        "4723643671527109645": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4615766471724791034": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1398177377739338750": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10034575179959785704": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1584906448442153128": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17264010982688979937": ["convolution_gpu_bfyx_1x1",2],
+        "10514865654990433040": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "11411413051626428349": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2412846055735335136": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "14686272582436109012": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1973819632224480598": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12011606174372081253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12287827551127082597": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10308431308942416781": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "1775515808301276388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2356785927637873692": ["convolution_gpu_bfyx_gemm_like",2],
+        "3976736548270395981": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "331661172067077796": ["convolution_gpu_bfyx_1x1",2],
+        "8809438390805488749": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7678226048807568024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17430994325635361377": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15641322340289892344": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3225866261943242708": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2102169562353089558": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6997971129340865650": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "726985753660756762": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "6764038061921866053": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13809330759308309353": ["convolution_gpu_bfyx_gemm_like",2],
+        "13357365044448426880": ["convolution_gpu_bfyx_1x1",2],
+        "7692849839965441330": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "16252420150239789472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14558572801374416278": ["convolution_gpu_bfyx_gemm_like",2],
+        "9695024256541464964": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "8645965165922150743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12198263593657033426": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "3904383357046705799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11313025178951972247": ["convolution_gpu_bfyx_gemm_like",2],
+        "2095245727814188300": ["convolution_gpu_bfyx_gemm_like",2],
+        "11979032916453246611": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4805194563120934409": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18094592431313771787": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8010456208258134834": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12093737479877309006": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "11565861421381730304": ["convolution_gpu_bfyx_os_iyx_osv16",1048],
+        "6843617687528352801": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "3950738240651133849": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "818998169319147148": ["convolution_gpu_bfyx_gemm_like",1],
+        "4664983769199548480": ["convolution_gpu_bfyx_1x1",2],
+        "16016396784190934729": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16723478941106779069": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "16921939234324970069": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "3833510944499257797": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "4113061482402915179": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10811837819834149164": ["convolution_gpu_bfyx_gemm_like",1],
+        "17021925795809437171": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5445584581720919223": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11516184047320372729": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17559750858236255044": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4171374172427814762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "85050336704401597": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1563987925712579649": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8372855367097191197": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8866716292621164810": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "9272405129875537865": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3216877571075556066": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "1045854873741563331": ["convolution_gpu_bfyx_gemm_like",2],
+        "10512507780534402341": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "1591199515536783245": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "2613462626256090659": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "14281801257982447624": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16247799703932868151": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7287107719392705356": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "9492026326463873766": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7830644361525332797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12536364199388193516": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "5074273865983613482": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "12058759356433220258": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "8015885733173521367": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10262850086265676378": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16644952765107909604": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8219179055259247644": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2844794465598309010": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12831123539633580270": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4903592553439092472": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "3024402899381804809": ["convolution_gpu_bfyx_1x1",2],
+        "17585206779958265260": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "8726274320876550785": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "182115051096556835": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "579781312141502576": ["convolution_gpu_bfyx_1x1",2],
+        "10613621801998459768": ["convolution_gpu_yxfb_yxio_b16",2],
+        "560996739186313493": ["convolution_gpu_yxfb_yxio_b16",2],
+        "47872288115972996": ["convolution_gpu_yxfb_yxio_b16",2],
+        "712165731154577189": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "15281554100135159550": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11583985978586657985": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "9193880745263317167": ["convolution_gpu_bfyx_gemm_like",2],
+        "11239541755868028928": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "3793265335909270748": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11002875874008272679": ["convolution_gpu_bfyx_os_iyx_osv16",427],
+        "17201365233492366678": ["convolution_gpu_bfyx_gemm_like",2],
+        "3337625924046561031": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "9870432551513415176": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13970935346154374605": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "967141158966448909": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",0],
+        "14612206111651511130": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13009612703754510124": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2884499360870038648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16966477504105790279": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3039528482572243879": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "4129722446574108695": ["convolution_gpu_bfyx_1x1",2],
+        "13218298785325404589": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14114380593731243715": ["convolution_gpu_bfyx_os_iyx_osv16",1038],
+        "7615563770941714046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15322609677356616580": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "18029396837690671545": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3889456478817717702": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7241156141838776126": ["convolution_gpu_bfyx_gemm_like",2],
+        "10437367877444543776": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17961793197503317952": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9418041909134721047": ["convolution_gpu_bfyx_gemm_like",2],
+        "12439827609628473238": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4346591404756288097": ["convolution_gpu_bfyx_gemm_like",2],
+        "17969195175890497912": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4683575221310726091": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15555083739490354527": ["convolution_gpu_bfyx_gemm_like",2],
+        "14098811155652990436": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "18426893729833771809": ["convolution_gpu_bfyx_1x1",2],
+        "1617135706549276688": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "17252589865292797082": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "10232429887105708502": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11560634267092054110": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "10016815108730511683": ["convolution_gpu_bfyx_gemm_like",2],
+        "17734480671864478402": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18041177945345031826": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "9502195532658935521": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16729849855476690294": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "16003914811215141863": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7378840969627751667": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7706714181281908433": ["convolution_gpu_bfyx_gemm_like",2],
+        "8159303545761286685": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "11411580529501121244": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "18152894191323920027": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "7693459946348737411": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "11880337915508207160": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2314579504260247470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12669783714916998842": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9748307611165615848": ["convolution_gpu_bfyx_gemm_like",2],
+        "15193403354218116460": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15979956159651515122": ["convolution_gpu_bfyx_gemm_like",2],
+        "15774073623451382326": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13621771094745539509": ["convolution_gpu_yxfb_yxio_b16",2],
+        "576164857039495839": ["convolution_gpu_yxfb_yxio_b16",0],
+        "757225477250808939": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10271261715175176019": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17711453305763476458": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "16974981142389546385": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10991423760161409883": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "1208665743495618456": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17618727959983224888": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2562815925396318565": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "13815395589135469450": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5479761740065152589": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11999246609107242706": ["convolution_gpu_bfyx_gemm_like",2],
+        "15924916465272239832": ["convolution_gpu_bfyx_gemm_like",2],
+        "2629918844315184499": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3442845193734599342": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2920322372993101148": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13011676362747785816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16188473537674428539": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7210854698870587826": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3336076058264596420": ["convolution_gpu_bfyx_gemm_like",2],
+        "4988480452582288323": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9947449295659685973": ["convolution_gpu_bfyx_gemm_like",2],
+        "1646638859396929303": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9079203986633151014": ["convolution_gpu_bfyx_1x1",2],
+        "12489973984967168447": ["convolution_gpu_bfyx_1x1",2],
+        "3737576893817599311": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "2008424849669196225": ["convolution_gpu_bfyx_1x1",2],
+        "17255805293355120219": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2321773209766424929": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9495192057713157041": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6428098122005804378": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "15991460001131903561": ["convolution_gpu_bfyx_gemm_like",2],
+        "745009493367761775": ["convolution_gpu_bfyx_gemm_like",2],
+        "3406812365298442897": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "14066675688397331406": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7447163906170805189": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3396731547696204011": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10217182484138821482": ["convolution_gpu_yxfb_yxio_b16",2],
+        "883436333317162926": ["convolution_gpu_bfyx_1x1",2],
+        "12184235281888559274": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14025678657541870252": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4356817283284529593": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1068155851494601726": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10598099730944525581": ["fully_connected_gpu_fb_io_b8_f8_vload",0],
+        "5056859994174498686": ["convolution_gpu_bfyx_gemm_like",2],
+        "17490471699618303993": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "1120455113299469776": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "10396788403466463989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10447947790216991304": ["convolution_gpu_bfyx_gemm_like",2],
+        "12972798847556569913": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "708452703070938673": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "17081449111821382308": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7708321360699824256": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3725013268198063198": ["convolution_gpu_bfyx_1x1",2],
+        "6142707387281700290": ["convolution_gpu_bfyx_gemm_like",2],
+        "5312413491828906254": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13766070202060785219": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10318417166945621015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16184142990117192433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16577611471466452776": ["convolution_gpu_bfyx_gemm_like",2],
+        "1095959046309466012": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16871004845988227014": ["convolution_gpu_bfyx_1x1",2],
+        "17179609670678746034": ["convolution_gpu_bfyx_gemm_like",0],
+        "875296362957469305": ["convolution_gpu_bfyx_gemm_like",1],
+        "14651159827389223108": ["convolution_gpu_bfyx_gemm_like",1],
+        "9576962489937466093": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11418379777288974452": ["convolution_gpu_bfyx_gemm_like",2],
+        "9099720270958987421": ["convolution_gpu_bfyx_1x1",2],
+        "8040001390872143271": ["convolution_gpu_bfyx_gemm_like",2],
+        "11319799002723299753": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13094402291968806996": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "541817615957967731": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "932195814187889636": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "14799012895945855878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3673781117412048086": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "2283157145557154450": ["convolution_gpu_bfyx_1x1",2],
+        "7330202944390548890": ["convolution_gpu_bfyx_gemm_like",2],
+        "10868287582480518153": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "14199158130218117084": ["convolution_gpu_bfyx_gemm_like",2],
+        "8618627241234406784": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7232326270078161768": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "3932617680771387232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18080788888293706149": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15154700439767512396": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "7843508201826629532": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12903015669020591018": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1963081583851864291": ["convolution_gpu_bfyx_gemm_like",1],
+        "2683304757433993300": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16911464046178654033": ["convolution_gpu_bfyx_1x1",2],
+        "5313382805395362669": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3680396164645753224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10656486867659934705": ["convolution_gpu_bfyx_os_iyx_osv16",854],
+        "7274179284676568361": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "15497797842820949408": ["convolution_gpu_bfyx_gemm_like",1],
+        "9601412379897937608": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "13509275050322423832": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4802009650745059499": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7604075520418038662": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11175936010605958812": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6126073246053235472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16271675466919087248": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18199824206329982249": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8059328623525062913": ["convolution_gpu_bfyx_gemm_like",2],
+        "5550969016335082071": ["convolution_gpu_bfyx_gemm_like",2],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16",803],
+        "11498084465186986412": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12867590715338247144": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11614353411428360211": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16397733032387984819": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "6214624887470295152": ["convolution_gpu_bfyx_1x1",2],
+        "15534876725099279666": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4353842547963164546": ["convolution_gpu_bfyx_1x1",2],
+        "10135458965276110244": ["convolution_gpu_bfyx_1x1",2],
+        "14026537760442360645": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1450861513159359637": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7837876599690110056": ["convolution_gpu_bfyx_gemm_like",2],
+        "1703738105910059846": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5046089607609787258": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3211956138512889433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4313392430539923574": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "12675840135830047968": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13668072006310741601": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7840653268996892538": ["convolution_gpu_bfyx_gemm_like",2],
+        "7799984350284425885": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "8856888761246057127": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "2740885908397449753": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12510951219501865365": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "1854612313463195535": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3711525118850629466": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1318571118468536310": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12600707101000510621": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5940007433515335594": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5509395737020858006": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "16081386644309102158": ["convolution_gpu_bfyx_gemm_like",2],
+        "3603187029740446600": ["convolution_gpu_bfyx_gemm_like",2],
+        "9533360488591027707": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "15136770992109675092": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "12604104383683210104": ["convolution_gpu_bfyx_gemm_like",2],
+        "12305397676800089268": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14487842225000203929": ["convolution_gpu_bfyx_gemm_like",2],
+        "2826762745628486040": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "13483175684542464385": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "11841034668170849494": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11872464450773754851": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4216366893358625960": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "1944461047787586724": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5352061583962489055": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6512987867462549101": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "8975333906619899020": ["convolution_gpu_bfyx_gemm_like",2],
+        "15006321421735686121": ["convolution_gpu_bfyx_gemm_like",2],
+        "8540111719936129376": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "12031180482028822765": ["convolution_gpu_bfyx_gemm_like",1],
+        "5886032409392368342": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5364060938737428149": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "5440983284868981549": ["convolution_gpu_bfyx_gemm_like",2],
+        "9043982883185435219": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "7155796826953849982": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1635121016109328853": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "5558136691773431495": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "3239033622277917802": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9299299311101549958": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8651641584737798174": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "12051398350382954787": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15528692642731712121": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "6107700818115209289": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18337762134908554532": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6784853321527374515": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12024817951074673335": ["convolution_gpu_bfyx_1x1",2],
+        "9763754389347695094": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11265472910579659280": ["convolution_gpu_bfyx_gemm_like",1],
+        "6300691162962736560": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7441188930428385142": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3662747857062156477": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2180039710632160943": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4803370483104261655": ["convolution_gpu_bfyx_gemm_like",1],
+        "8185193068790365354": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "12937333118472722002": ["convolution_gpu_bfyx_gemm_like",2],
+        "11658751382892761740": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11324651029379152442": ["convolution_gpu_bfyx_1x1",2],
+        "693883892843558363": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "7056030150365552588": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "487214150851213303": ["convolution_gpu_bfyx_gemm_like",0],
+        "10894058425957901202": ["convolution_gpu_bfyx_1x1",2],
+        "10509933181132310969": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2495655464941634884": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "789202969657820559": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5099947445888268507": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5657471280535146301": ["convolution_gpu_bfyx_gemm_like",1],
+        "6931953332823066530": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "10702465758376061967": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16436006771518788093": ["fully_connected_gpu_bs_f_bsv16_af8_vload",0],
+        "155962454315573087": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11657946392097042544": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13553263424160050064": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "16101625311127899143": ["convolution_gpu_bfyx_gemm_like",2],
+        "10130171279527667782": ["convolution_gpu_bfyx_gemm_like",2],
+        "2877521658768725103": ["convolution_gpu_bfyx_gemm_like",1],
+        "838726445796308454": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "2042946928570163140": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14680730265621679042": ["convolution_gpu_bfyx_os_iyx_osv16",758],
+        "13512863534076172940": ["convolution_gpu_bfyx_gemm_like",2],
+        "16243196137456624852": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "4438526427135833402": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14585000863294748739": ["convolution_gpu_bfyx_gemm_like",0],
+        "5950285227163574810": ["convolution_gpu_bfyx_os_iyx_osv16",528],
+        "2762489653422414995": ["convolution_gpu_bfyx_gemm_like",2],
+        "15308196586729169691": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8913823292181409151": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "142270860894725256": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "4937688558707451907": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10956668791040094584": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6578517057140155080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2215570184121152738": ["convolution_gpu_bfyx_gemm_like",1],
+        "14458851250685872417": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "14398854364550406668": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10316451248440741901": ["convolution_gpu_bfyx_gemm_like",2],
+        "584086621952390547": ["convolution_gpu_bfyx_gemm_like",2],
+        "6367371992814643260": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4802014352392262053": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "7405315582091905378": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16882092367103683293": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12166852830214895457": ["convolution_gpu_bfyx_1x1",2],
+        "8954488655859677891": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17084977396231597605": ["convolution_gpu_bfyx_gemm_like",1],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "10789133352712755945": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "11731277083374465361": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9127827617126714860": ["fully_connected_gpu_fb_io_b8_f8_vload",1],
+        "9811086682271990794": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10486000767830001094": ["convolution_gpu_bfyx_1x1",2],
+        "12686015414958770329": ["convolution_gpu_bfyx_gemm_like",2],
+        "17649961873981897621": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8952733400567254769": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5295693108687178880": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1771347579022727189": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8325903548627432": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9999425239167488495": ["convolution_gpu_bfyx_gemm_like",2],
+        "17958575161092859465": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2161052921317193579": ["convolution_gpu_bfyx_gemm_like",2],
+        "216603198215625772": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "2912098199463107173": ["convolution_gpu_bfyx_1x1",2],
+        "1587501521145162454": ["convolution_gpu_bfyx_gemm_like",2],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "9649445293567537596": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6744044115114192916": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17219920118109316867": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "14123081378489325832": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "9482749589540764069": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6603778920476932267": ["convolution_gpu_bfyx_gemm_like",1],
+        "8976238022515713641": ["convolution_gpu_bfyx_gemm_like",2],
+        "14840851809642905875": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15131258379753113816": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4118073384938355655": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "10292585962794261197": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "5941095082097535176": ["convolution_gpu_bfyx_gemm_like",2],
+        "11936530628363072904": ["convolution_gpu_bfyx_gemm_like",2],
+        "3631332752661975859": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3219408878901707426": ["convolution_gpu_bfyx_gemm_like",1],
+        "18091349188280218186": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10820312036555742020": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "16490405739040977260": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "17377204616846724192": ["convolution_gpu_bfyx_gemm_like",2],
+        "7998455776901877973": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4562591438007476419": ["convolution_gpu_bfyx_gemm_like",2],
+        "11113125355390956764": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18040183500393090505": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16316483048621486077": ["convolution_gpu_bfyx_gemm_like",2],
+        "10280619408766255552": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "4279062247055842367": ["convolution_gpu_bfyx_gemm_like",1],
+        "16267531927647687641": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "8036474422877454869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6699877220571254719": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9834941975457910988": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15449650271741732512": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17170858505976681742": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16364494883229084045": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "8690196189594920365": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "15225354446874994535": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "14789782064157699768": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "208915399644127739": ["convolution_gpu_bfyx_gemm_like",1],
+        "14985236276429954162": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5156033406916344703": ["convolution_gpu_bfyx_gemm_like",1],
+        "8859895010324601937": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "14135593723444205032": ["convolution_gpu_bfyx_gemm_like",0],
+        "7105219760750474587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4656068024153891922": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16567638487719493784": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "11637325834858582585": ["convolution_gpu_bfyx_gemm_like",2],
+        "401304652492444430": ["convolution_gpu_bfyx_gemm_like",2],
+        "2920840796593281126": ["convolution_gpu_bfyx_gemm_like",2],
+        "7700321970687976931": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "18226737525116147628": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10358170616931426647": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17040537179740138304": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10912495395422146386": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "225809055928705881": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "15206249797344242666": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13646974121952099172": ["convolution_gpu_bfyx_gemm_like",2],
+        "84595904778810418": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "12713821004129672990": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11706446082856895571": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "15223164574152266895": ["convolution_gpu_bfyx_1x1",2],
+        "8787438180071123604": ["convolution_gpu_bfyx_gemm_like",1],
+        "1880137091477870982": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4717620775314557374": ["convolution_gpu_bfyx_gemm_like",2],
+        "7913076120244203725": ["convolution_gpu_bfyx_gemm_like",2],
+        "13326492157370934949": ["convolution_gpu_bfyx_gemm_like",2],
+        "9182260316973872633": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6888842613779488104": ["convolution_gpu_bfyx_1x1",2],
+        "12267555886404772991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14421898375873029115": ["convolution_gpu_bfyx_1x1",2],
+        "13291402786934990349": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8900977003907025003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2581414750854621875": ["convolution_gpu_bfyx_gemm_like",2],
+        "1044889231088602677": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14312549767853703411": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "6817494598328071314": ["convolution_gpu_bfyx_gemm_like",2],
+        "8809017515482311843": ["convolution_gpu_bfyx_os_iyx_osv16",1036],
+        "14041970415787494000": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12985942652866621579": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "3022939690177474442": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2031558560788449957": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12487879163561616870": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13821224753538037982": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11007175027950132719": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "17321934232458063571": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2609346307827449622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14502856487639608696": ["convolution_gpu_bfyx_gemm_like",2],
+        "1905758333157310570": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2247717767819293683": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13960388312976163971": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "17041468169694105561": ["convolution_gpu_yxfb_yxio_b16",2],
+        "57372993988016244": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "4731836216299455047": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5047972486012090625": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "8746621720912032145": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10041205516209288381": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "1298596164164324360": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12501619443242354860": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "17329287216741045059": ["convolution_gpu_bfyx_gemm_like",2],
+        "12259844988981080505": ["convolution_gpu_bfyx_gemm_like",2],
+        "13527018660229167386": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1580344438642032807": ["convolution_gpu_bfyx_gemm_like",1],
+        "12868739680413736657": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "7280502812960451465": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17258128299721452811": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14248239982355212178": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "7870154008378361670": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "4216958486055161753": ["convolution_gpu_bfyx_gemm_like",2],
+        "1867337342417952506": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15155676074658242659": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "15409184364121627414": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12879367655655932174": ["convolution_gpu_yxfb_yxio_b16",2],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "13077917010686381919": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13839116996827687373": ["convolution_gpu_bfyx_gemm_like",1],
+        "2967481531952454828": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "3117175697326325371": ["convolution_gpu_bfyx_os_iyx_osv16",505],
+        "8362179886017398479": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "11931568365395665142": ["convolution_gpu_bfyx_gemm_like",2],
+        "14412158605670555579": ["convolution_gpu_bfyx_os_iyx_osv16",153],
+        "7008873036126556197": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14896875712028630045": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "2761862049452027986": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17854208422879910606": ["convolution_gpu_bfyx_gemm_like",1],
+        "8470959792634864749": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8907982643256296667": ["convolution_gpu_bfyx_1x1",2],
+        "6934241437968723825": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14132543442791497311": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10471519687597963116": ["convolution_gpu_bfyx_gemm_like",1],
+        "10148067979123062638": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18087356517015630281": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6856130385095139346": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "9440117898128288296": ["convolution_gpu_bfyx_gemm_like",2],
+        "6071668124835539929": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2949545414911764346": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12643643553436503069": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4154830034576950123": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15675903059949404837": ["convolution_gpu_bfyx_1x1",2],
+        "16851082749395991194": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "3049097498155857895": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7585785802379042424": ["convolution_gpu_bfyx_1x1",2],
+        "12822126914959112382": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "5578991261564497604": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11069983292783104310": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "7119182041840303390": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15997145184054496085": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10141558851476164734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11942424927004660476": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7590734607006912544": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8791285622784082122": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "11376953876369788199": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8731079912830889828": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17990326690659802090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13856271274572142709": ["convolution_gpu_bfyx_gemm_like",1],
+        "8733371726903473932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12194037100109755112": ["convolution_gpu_bfyx_gemm_like",2],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "11007944497812650617": ["convolution_gpu_bfyx_gemm_like",2],
+        "4135975804549022456": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9076758673133996959": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "3001162215282339268": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "6981294059746462667": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "17381516856910544374": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "17762455138615317884": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9813748068195103720": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "7603872175048237237": ["convolution_gpu_bfyx_1x1",2],
+        "13468081302022888489": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "3962138884698789654": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7814543122045448412": ["convolution_gpu_bfyx_gemm_like",2],
+        "7548031489690889629": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "12818012741490629493": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "13961773444580398856": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16000753982895054944": ["convolution_gpu_bfyx_gemm_like",1],
+        "9170163372548895531": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3171354702636014224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5672464491301994292": ["convolution_gpu_bfyx_gemm_like",1],
+        "6825390996679224270": ["convolution_gpu_yxfb_yxio_b16",2],
+        "897253033961107413": ["convolution_gpu_yxfb_yxio_b16",2],
+        "512446355173752600": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13648761167622654288": ["fully_connected_gpu_fb_oi_ref",2],
+        "10760094119259477688": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "4165019140664090799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17935612508319394087": ["convolution_gpu_yxfb_yxio_b16",2],
+        "877901260688090160": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17525564757769958678": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "17800115051456107658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4615708568396290002": ["convolution_gpu_bfyx_1x1",2],
+        "6509758095668864050": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",1],
+        "4974320417566990034": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12348602762263193288": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "7349168847581850619": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8161520217142313996": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "12068797674575015662": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "12107262410635772120": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "10747101719272611563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5832851215142537445": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9332701118402940384": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5795524493577277985": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14973431782875808802": ["convolution_gpu_bfyx_gemm_like",2],
+        "16426655160932259558": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14331554754171207866": ["convolution_gpu_bfyx_gemm_like",2],
+        "3928356751040028375": ["convolution_gpu_bfyx_gemm_like",2],
+        "3451309062150982886": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12389854459474697184": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "13078401519973360182": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "3815222814331650224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4201057957682777280": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14835309921389262864": ["convolution_gpu_bfyx_1x1",2],
+        "14943031375539993004": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14204609663091442879": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "5968129546023764583": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15109847707903824859": ["convolution_gpu_bfyx_1x1",2],
+        "1287490919205560806": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "5321698540631249776": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "17759505449240263390": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "16898785030254336705": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18385086614524985975": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17444003685761357480": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18302892230881285207": ["convolution_gpu_bfyx_gemm_like",1],
+        "1952863937205473292": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5595779343671478945": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9967101735808367971": ["convolution_gpu_bfyx_1x1",2],
+        "14324166291904435508": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7689320135952025041": ["convolution_gpu_bfyx_gemm_like",0],
+        "5682190700442712936": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9955939178447682108": ["convolution_gpu_bfyx_1x1",2],
+        "11158789938857558596": ["convolution_gpu_bfyx_1x1",2],
+        "14447191095937730964": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "18080848057281093190": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18071280811713424504": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2394023805427701338": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "1154763947184432124": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9827177798112814604": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4089043893927493060": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4433497906256257606": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10751633292301177132": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13809898858049445969": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12229574562535756991": ["convolution_gpu_bfyx_gemm_like",1],
+        "14345755557418971954": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3202085450628781999": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "4156384238797998294": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "13760645810144930270": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15486917753097743853": ["convolution_gpu_bfyx_1x1",2],
+        "7099035779223341587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10879218241103462088": ["convolution_gpu_bfyx_gemm_like",1],
+        "15635250842093678965": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10419440621736450993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12961109385388101976": ["convolution_gpu_yxfb_yxio_b16",0],
+        "13558656230312558247": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "2847490224869294354": ["convolution_gpu_bfyx_gemm_like",2],
+        "9954050478761346921": ["convolution_gpu_bfyx_gemm_like",2],
+        "7600296832974673294": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15082818876354718849": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "4894469114343061704": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16989896550094613437": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2984726467649419856": ["convolution_gpu_bfyx_gemm_like",2],
+        "1778345646142852816": ["convolution_gpu_bfyx_gemm_like",2],
+        "13121297281694293907": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "1008476023750261156": ["convolution_gpu_bfyx_1x1",2],
+        "6388117241933586388": ["convolution_gpu_bfyx_gemm_like",2],
+        "10645057595080511813": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9702618600245321109": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "16065744898134487748": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "5062815196458225737": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "426827405952656362": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "2917999294360728537": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5648658688155716974": ["convolution_gpu_bfyx_1x1",2],
+        "11761545976388416063": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12352923639732112511": ["convolution_gpu_bfyx_os_iyx_osv16",79],
+        "10178145641713631806": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17228810554159747400": ["convolution_gpu_bfyx_gemm_like",2],
+        "14754849694687093032": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10002044609138970243": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "3102538312627892960": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "4435224497850514394": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8078028207842958010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17907223570737272640": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "2964705957088952872": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "6341363789473021047": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16765994345605657100": ["convolution_gpu_bfyx_1x1",2],
+        "10022487076451608714": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "7902473777019759045": ["convolution_gpu_bfyx_gemm_like",1],
+        "17096735128393723245": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17498483343394902796": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "6260115080574637314": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3010520839193613803": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7451154080124553318": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1040411949730118556": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3141773224039276177": ["convolution_gpu_bfyx_1x1",2],
+        "15820005010263193043": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4980217316169616839": ["convolution_gpu_bfyx_1x1",2],
+        "7923576965630818418": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2321148334382088982": ["convolution_gpu_bfyx_gemm_like",2],
+        "10128390168715530898": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "1641111108888949123": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13234055353608734080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1223196405651730260": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13478984039708550410": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "3217295012596892181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16606674008248299103": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "9731370183088819573": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17025182465337728023": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7172357320005702833": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2066731703492755469": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "13585163747565192884": ["convolution_gpu_bfyx_gemm_like",2],
+        "5112480593385320005": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9711184878666366204": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2578325663193624576": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2524029454785583409": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "17806712457019493207": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "13020331397245585657": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "15047676717402283805": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "13325287783358291692": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15866935886105967122": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15474155528481683394": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "10753540518493641553": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "9078447949109922472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4450409744922989123": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "7372956570616880244": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "3868149953087814447": ["convolution_gpu_bfyx_gemm_like",2],
+        "14540578324750869319": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17052161869014993719": ["convolution_gpu_yxfb_yxio_b16",2],
+        "721174714308243785": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14817801788424046035": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3286496836813087881": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7482459536338668149": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16794854619854992714": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10558609844937234631": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16235115911229280717": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "13193898459027972719": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1148949417144436507": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5602328731722824868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10015368609444108372": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11799180632798787251": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14766694310604777253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2832268621630415376": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "2273992727647793692": ["convolution_gpu_bfyx_gemm_like",1],
+        "7338229552985076723": ["convolution_gpu_bfyx_gemm_like",2],
+        "2439993891369206440": ["convolution_gpu_bfyx_1x1",2],
+        "101387140804297623": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9530116228032101908": ["convolution_gpu_bfyx_1x1",2],
+        "15031155621982459860": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "16304192736281226143": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15104727000375811836": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2058364830449635556": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6648876837655776653": ["convolution_gpu_bfyx_1x1",2],
+        "1914964404168211864": ["convolution_gpu_bfyx_gemm_like",2],
+        "13454265023861566476": ["convolution_gpu_bfyx_gemm_like",2],
+        "6799631962511042762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9162359935098885411": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15148442194461613102": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "913861052717410566": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5958300749101873980": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13369603621524676979": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18436249934780056991": ["convolution_gpu_bfyx_gemm_like",2],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "17318287523550546026": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7933217973342728190": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4861982518177129729": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "9659814105483633858": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10724501418439612080": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "16218339663410630711": ["convolution_gpu_bfyx_gemm_like",2],
+        "2114232149447438823": ["convolution_gpu_bfyx_gemm_like",2],
+        "11564071490267241224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13820498543284008286": ["convolution_gpu_bfyx_gemm_like",2],
+        "10981374120597916521": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1551596771935253711": ["convolution_gpu_bfyx_gemm_like",1],
+        "1418595171949196661": ["convolution_gpu_bfyx_gemm_like",2],
+        "2089730611490367290": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12741457056869452536": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4670487436469119872": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9169324504353459004": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10325138269934303618": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8490260671996115530": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1192279884248226739": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17011363406405852347": ["convolution_gpu_bfyx_gemm_like",2],
+        "7126667413990834481": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "9452094307760005150": ["convolution_gpu_bfyx_gemm_like",2],
+        "2840794055129352139": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8251544171504007740": ["convolution_gpu_bfyx_gemm_like",2],
+        "474139120607442270": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3987482581128838173": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3501882025888946886": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "1117836569328440439": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13702254392810961772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12780116250427776647": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7737977992444172757": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "7095629088416100928": ["convolution_gpu_bfyx_gemm_like",2],
+        "3155353791103196186": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7992444232916226938": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15932838442166411183": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17889864541794448203": ["convolution_gpu_bfyx_1x1",2],
+        "12028665820838352309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13328583512713703122": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9580986168276580598": ["convolution_gpu_bfyx_gemm_like",2],
+        "7926989875988735079": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12877601016766418505": ["convolution_gpu_bfyx_gemm_like",0],
+        "17961702508543961900": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "14050124896329573468": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16574710115918192418": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12107079280128343726": ["convolution_gpu_yxfb_yxio_b16",2],
+        "994252691216116396": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4084026445911476156": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "7777279468029216688": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11109044986816563101": ["convolution_gpu_yxfb_yxio_b16",2],
+        "396580837423299119": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6756771670011959646": ["convolution_gpu_bfyx_gemm_like",2],
+        "981803877097233095": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12477315042623518609": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12892265081710606252": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3782308167335660154": ["convolution_gpu_yxfb_yxio_b16",2],
+        "87031578643428011": ["convolution_gpu_bfyx_1x1",2],
+        "12308956927236847009": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6458189051305803360": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6323083153920795679": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "11330591026581463934": ["convolution_gpu_bfyx_gemm_like",2],
+        "6159729136505378486": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2856601829807186494": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3409043224171087168": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "5209144536543011657": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14281154151197472605": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "143667964449473415": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6721354194352192662": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16271970578584267980": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8079914471491171372": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1539677456611270609": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10861769381993948050": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "10009796094612770326": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "3856976081672275637": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "17192352762166764393": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14931590390643373866": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12561177248542630652": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14971506154649368216": ["convolution_gpu_yxfb_yxio_b16",0],
+        "12917241193304093727": ["convolution_gpu_bfyx_gemm_like",2],
+        "8584375748627260395": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1419073145594317633": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "3646228701104397128": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "15783429395177379897": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11782188262748842182": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "17726079670612220433": ["convolution_gpu_bfyx_gemm_like",0],
+        "6318228858846223186": ["convolution_gpu_bfyx_1x1",2],
+        "9152433123828445089": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15128816312559638985": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "18067291256808591467": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5337351591182109481": ["convolution_gpu_bfyx_os_iyx_osv16",131],
+        "8931169575495985034": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14216513246096503793": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13123709697607309884": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11073090858361674041": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9119618606914671839": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10747688146893187959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11563892089503603030": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5551484040302194648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7183578232279711009": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "503369896500284129": ["convolution_gpu_bfyx_1x1",2],
+        "3419536918610303807": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1212319037405620223": ["convolution_gpu_bfyx_gemm_like",2],
+        "15596408854298291433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2016932800158392200": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15705908639736679687": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5919454297699648428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11500205299047837289": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "5643920882179676695": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6398819277350155011": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "17542035367134614728": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9426665763007611385": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "8712136292276123857": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16828388628569377322": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14801210545983960599": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3341302541468955849": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16267682394077585279": ["convolution_gpu_bfyx_os_iyx_osv16",754],
+        "2052010432187897741": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "835053793432636355": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3266557807508325807": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13512059751838488458": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12002302929446578025": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5735703235236456131": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9728611486592854529": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5040095338370816349": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "13803790014241837327": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6254141935545262078": ["convolution_gpu_bfyx_gemm_like",1],
+        "104765009188090817": ["convolution_gpu_yxfb_yxio_b16",1],
+        "856877003890134554": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "10504318542015227515": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "14667793472412360981": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15773157615731010456": ["convolution_gpu_bfyx_gemm_like",2],
+        "15060535689318007173": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "2206771663823062080": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "3501667344669686338": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15070618248849566698": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4744578087509837185": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18417830391649460864": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9441060601228656341": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5774841809066688068": ["fully_connected_gpu_fb_io_b8_f8_vload",1],
+        "12790570304622911607": ["convolution_gpu_bfyx_os_iyx_osv16",928],
+        "5115007207028125638": ["convolution_gpu_bfyx_gemm_like",2],
+        "12947341728489226671": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13412516623201653283": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3782239800777370325": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "17224104246148265328": ["convolution_gpu_bfyx_gemm_like",2],
+        "14827882251752394500": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7223801044761006523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15367649112776077240": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15216108478837665623": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "5567628205735744449": ["convolution_gpu_yxfb_yxio_b16",2],
+        "359617184733439511": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6948455759869670955": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11110173861174257158": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1692473411043262397": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "11164519756679631743": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17439276474731842060": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17770104464900126615": ["convolution_gpu_bfyx_1x1",2],
+        "6727930402459775131": ["convolution_gpu_bfyx_gemm_like",2],
+        "9616636708366808604": ["convolution_gpu_bfyx_gemm_like",2],
+        "17822988909419777692": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13541382855330226000": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7585184325339753737": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "12801481303602178879": ["convolution_gpu_bfyx_gemm_like",1],
+        "2878824076934639346": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "2705031521944165712": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "16773645387243701837": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "9589942627115344216": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "7273427309587902237": ["convolution_gpu_bfyx_gemm_like",1],
+        "1126499865206906037": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "1114679698826953542": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4264284648458489052": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "13776178598632392721": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "3106922888635965020": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10774872391768741315": ["convolution_gpu_yxfb_yxio_b16",2],
+        "543472136359161929": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14811603003184578943": ["convolution_gpu_bfyx_gemm_like",2],
+        "8527069404111265568": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7209217811135076623": ["convolution_gpu_bfyx_gemm_like",2],
+        "12585864429067596351": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16425665058951535484": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "12589440296742583335": ["convolution_gpu_bfyx_1x1",2],
+        "12146979849998627283": ["convolution_gpu_bfyx_gemm_like",2],
+        "3163833930628348446": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "6310724136390087834": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "5032841266226405428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12810833895438895155": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14999920879568237166": ["convolution_gpu_bfyx_1x1",2],
+        "9939234037869927090": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "276313536076170391": ["convolution_gpu_bfyx_gemm_like",2],
+        "2920017342405650206": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15117880293418979489": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "15746620724134970969": ["convolution_gpu_bfyx_1x1",2],
+        "1054954263090546905": ["convolution_gpu_yxfb_yxio_b16",2],
+        "968092788032627444": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1157947252370351851": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14444475853714164129": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "14289082888174784976": ["convolution_gpu_bfyx_gemm_like",2],
+        "14446688005815492020": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16770615142634470903": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "8556125699591344922": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "4104562704039821482": ["convolution_gpu_bfyx_1x1",2],
+        "18275601715050791851": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "875400109066360897": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "6733731409232284409": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "14389719202147508599": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15329680728165965773": ["convolution_gpu_bfyx_gemm_like",2],
+        "15739278428190392018": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "9208964785762052001": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "6883767567034259453": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12946540633035976364": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "14074996784220709246": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3748621266324665764": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2469579114592379040": ["convolution_gpu_bfyx_gemm_like",2],
+        "3742751561273931407": ["convolution_gpu_yxfb_yxio_b16",0],
+        "2561508262445368003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14316077757957132678": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "7134419022268272901": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12308895602001600327": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "16549854027697846882": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "4056971751486746551": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "15295172519920136220": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13477548641580029772": ["convolution_gpu_bfyx_gemm_like",1],
+        "15820359925623438341": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12241130380766920378": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8951040603784899163": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11083993858285515074": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "17769159396346490074": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2905979727479716212": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3244803973821375252": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1997392406402548974": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "4137755981477177003": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "8095675456938934982": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10171373375072694210": ["convolution_gpu_bfyx_1x1",2],
+        "16911450336605071390": ["convolution_gpu_bfyx_1x1",2],
+        "15112393534380347357": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7605652809856543211": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "7940369586324090841": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12923298574715329852": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8407012082034007985": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10808909442136736629": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "9119268982510599778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13569453018083742128": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12234313962656804631": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "14677968346503677769": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8543619733732987550": ["convolution_gpu_bfyx_gemm_like",1],
+        "11020315012951440351": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "17725637691681205907": ["convolution_gpu_bfyx_gemm_like",2],
+        "4617809377006148936": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "10194187012252949909": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4521622755195947253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "394778201589371681": ["convolution_gpu_bfyx_gemm_like",2],
+        "12391792381149655331": ["convolution_gpu_bfyx_gemm_like",2],
+        "10178171262128338408": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3308770992373192529": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "5740745357953479527": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10133054058562198093": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2242829490403202087": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3325575565536567070": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15222260213708019662": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6167369758442930886": ["convolution_gpu_bfyx_gemm_like",2],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "9480653639044390919": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3911736807429733938": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7369903937189508744": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "7770000755097925765": ["convolution_gpu_bfyx_1x1",2],
+        "11155444222714959508": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "10076885835791159907": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16788162879714733906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17216583849049249733": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "2221145174704245189": ["convolution_gpu_bfyx_gemm_like",1],
+        "5762631094740444698": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14366395926517590797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5017701748886087836": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "2722124265986526212": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "7742126547476513275": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18129795023552968695": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11698754846673268046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1152691534728260611": ["convolution_gpu_bfyx_1x1",2],
+        "4925720860007127584": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9827201026276954165": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3805667660217578518": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14868677663932902695": ["convolution_gpu_bfyx_gemm_like",2],
+        "824380206255396866": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13390197134230598693": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17833304859352483840": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "1099404514975797315": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",301],
+        "2147962310424425158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3343020946662226400": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "11730276873446857018": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8553491894663686698": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "15206381185687737007": ["convolution_gpu_bfyx_gemm_like",2],
+        "9920155432685318259": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13471752029049484143": ["convolution_gpu_bfyx_gemm_like",2],
+        "15525903155475629518": ["convolution_gpu_bfyx_gemm_like",2],
+        "13754408679115174221": ["convolution_gpu_bfyx_gemm_like",2],
+        "2567046336192437734": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "18408107772851888061": ["convolution_gpu_bfyx_gemm_like",0],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "14318347197994059448": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13991572769793610416": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "4439786737038041995": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6284333183047854748": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8131879590716437354": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6805188858008657978": ["convolution_gpu_bfyx_gemm_like",2],
+        "17746215841755337461": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "135072053401934228": ["convolution_gpu_bfyx_1x1",2],
+        "15006204461468698734": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15636128989267984459": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "1157388265135592238": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5941852872160795604": ["convolution_gpu_bfyx_gemm_like",2],
+        "1466455001976212160": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11436473937404565094": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15681189418847392587": ["convolution_gpu_bfyx_gemm_like",1],
+        "6525496212688896740": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17948637243158994878": ["convolution_gpu_bfyx_gemm_like",2],
+        "7535571298845832061": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8537824547722216155": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18417288692814472127": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "4830454154838353056": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "12887076860522920405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10547134120307382906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4283886984540574108": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16532743776403877084": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11086471945045031067": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16739031949237426992": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "9232653317479846765": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "4465701487417893814": ["convolution_gpu_bfyx_gemm_like",1],
+        "9423958333298993923": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "5581428998642936688": ["convolution_gpu_bfyx_1x1",2],
+        "16788715253205076219": ["fully_connected_gpu_fb_oi_ref",1],
+        "9617316303048974588": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16683169947375504066": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "15599983560500910839": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4299773714254046691": ["convolution_gpu_yxfb_yxio_b16",2],
+        "130427456111826171": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3766048787611884529": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2895819653081408358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10254566865260697753": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6059368508708501002": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "17167229341919111718": ["convolution_gpu_bfyx_gemm_like",2],
+        "10007925729029867733": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2173867324489962689": ["convolution_gpu_bfyx_gemm_like",1],
+        "8978764053524288494": ["convolution_gpu_bfyx_gemm_like",2],
+        "9175450649281374948": ["convolution_gpu_bfyx_os_iyx_osv16",106],
+        "6970636030494405299": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "9900658671239107502": ["convolution_gpu_bfyx_1x1",2],
+        "17342198739672369885": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17651821953342321913": ["convolution_gpu_bfyx_1x1",2],
+        "9453100135791813000": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17433340097721474017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7235358742317442134": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "12564687330941036772": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "5240706676373148280": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16566214123371867456": ["convolution_gpu_bfyx_gemm_like",2],
+        "17583785768334531086": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3499645386058307669": ["convolution_gpu_bfyx_gemm_like",1],
+        "12531580106484042446": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "17292751972745231011": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1123577455191848310": ["convolution_gpu_bfyx_gemm_like",2],
+        "3928596145340765666": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "18082422341304348326": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "10099598062509781441": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "13738760763969959522": ["convolution_gpu_bfyx_gemm_like",1],
+        "3759515057574218101": ["convolution_gpu_bfyx_gemm_like",1],
+        "11862259122805366807": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "12700372241799686527": ["convolution_gpu_bfyx_gemm_like",2],
+        "10626281431800814406": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8881906040469243354": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16837963510205857013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1413558157882728476": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2923543983518895756": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14578867494693499627": ["convolution_gpu_bfyx_gemm_like",2],
+        "10816702874143297564": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11756650366229979428": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5924341622384096919": ["convolution_gpu_bfyx_gemm_like",1],
+        "10324485383646920518": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "3957253946857103590": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8943651590146149679": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17043601935017365442": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "13767985623872409391": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2983038203471784211": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "4773123925616969670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "11942019076226205097": ["convolution_gpu_yxfb_yxio_b16",2],
+        "383721620126444793": ["convolution_gpu_bfyx_gemm_like",1],
+        "16223356735957394429": ["convolution_gpu_bfyx_gemm_like",0],
+        "1040030752340209480": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "241656278218999298": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6070612528095353265": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10783046011829953095": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14244689429217411113": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6944031900067948180": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3177304125602972370": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "16717713360264747483": ["convolution_gpu_bfyx_gemm_like",2],
+        "231083216612056805": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "7839141505912665157": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "12076058470574246054": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "10930640103080573253": ["convolution_gpu_bfyx_1x1",0],
+        "11417406326478154077": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13365950526881732374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7474592508575297101": ["convolution_gpu_bfyx_1x1",2],
+        "1108229954015380813": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11724732387425614709": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "16870110185980402237": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8220168481755031959": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "13938466156916423478": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "517997325935712670": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "17567012866823126402": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14417401878572618236": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13223232888554043645": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "14424566003632608852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3096280563014331836": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3017891343734146267": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "14774814395786139876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15050884844653850678": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6584960721513702502": ["convolution_gpu_bfyx_gemm_like",1],
+        "9918371346247634545": ["convolution_gpu_bfyx_gemm_like",2],
+        "15670767419106537809": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3948843501884284998": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13352000946213986936": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "10787747981914307179": ["convolution_gpu_bfyx_1x1",0],
+        "6362428985273506890": ["convolution_gpu_bfyx_1x1",2],
+        "14616969385577243225": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "880603384896315783": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9795194069954915563": ["convolution_gpu_bfyx_gemm_like",2],
+        "6730447536124542965": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4927360358387344983": ["convolution_gpu_bfyx_gemm_like",1],
+        "8183383667948205424": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10815244730103375973": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16597170760061556882": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16569637518948306471": ["convolution_gpu_bfyx_gemm_like",2],
+        "3602929955785812025": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3622409603053918029": ["convolution_gpu_bfyx_gemm_like",1],
+        "10429104188258277773": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3399406641489305996": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "6818140422066151642": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15311930929656759371": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15915715422308762909": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "5884951148427535208": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8093401822846123153": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9111988592015450418": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "9655242408142699694": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10226095100825845185": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "15985980444340490463": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12207503176295152756": ["convolution_gpu_bfyx_1x1",2],
+        "13602140021189675477": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "2343310394723780653": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11861634536583463947": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "15602218079503030465": ["convolution_gpu_bfyx_gemm_like",2],
+        "8933701347987963693": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7393601059996816014": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1304921846760027440": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1299545313185409227": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "2621495864635590903": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8494385862885499798": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13701870576531008278": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5638640164891118162": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13520876347177213888": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "13853630125050609175": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "4306052436602921234": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11289650463922092775": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "991586070509079617": ["convolution_gpu_bfyx_gemm_like",2],
+        "377219085802486361": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17211590259060346125": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4367991456894497706": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "13448845356783404653": ["convolution_gpu_bfyx_gemm_like",1],
+        "7767103488808670253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11450378244355788918": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "9671459469252116568": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "852015206582470545": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8509024280905303927": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13966416504547680082": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16913004986170202203": ["convolution_gpu_bfyx_gemm_like",2],
+        "8130920994920685157": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3058716597925544041": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11690533591656807605": ["convolution_gpu_bfyx_gemm_like",2],
+        "9144487908815767824": ["convolution_gpu_bfyx_1x1",2],
+        "16587061389996963349": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "14001406016806064079": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "15012885932988454455": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13683623172740048376": ["convolution_gpu_bfyx_gemm_like",2],
+        "2438261005924916746": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "4754967381316623440": ["convolution_gpu_bfyx_gemm_like",2],
+        "6461637373691101671": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15720012960520885263": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4779919236230154165": ["convolution_gpu_bfyx_gemm_like",0],
+        "7969441643457570812": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "12151068022697708126": ["convolution_gpu_bfyx_gemm_like",2],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "5195511638783481084": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "3788462090984291082": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "3369689552455141157": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4718716595177056289": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "5274929595362413625": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9696168324381001582": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12055647521556218046": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "9738776059655610885": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "15677717057398875599": ["convolution_gpu_bfyx_gemm_like",2],
+        "17718424965214606218": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7060804814325505165": ["convolution_gpu_bfyx_gemm_like",2],
+        "4622514167765722873": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "921209976738626097": ["convolution_gpu_yxfb_yxio_b16",2],
+        "840202264034382558": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "13296242326766100583": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "16341722570340169855": ["convolution_gpu_bfyx_1x1",2],
+        "3176785355296130660": ["convolution_gpu_bfyx_gemm_like",0],
+        "11300415556407923335": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4072951883124129646": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8000679297338683619": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15897457705071738591": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "6945787904293959477": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "727216855315869048": ["convolution_gpu_yxfb_yxio_b16",2],
+        "669771152920944125": ["convolution_gpu_bfyx_gemm_like",2],
+        "7260204889552803221": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "360064276184684693": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5758133252959371492": ["convolution_gpu_bfyx_gemm_like",2],
+        "886880682650879171": ["convolution_gpu_bfyx_gemm_like",2],
+        "15269988216002549857": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10272016038525930672": ["convolution_gpu_bfyx_gemm_like",2],
+        "11799179287124317845": ["convolution_gpu_bfyx_gemm_like",1],
+        "5596441339918073261": ["convolution_gpu_bfyx_os_iyx_osv16",427],
+        "10106454449619141260": ["convolution_gpu_bfyx_1x1",2],
+        "5291011077679733990": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3701838669605585798": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9940761514291929473": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14115742296883450319": ["convolution_gpu_bfyx_gemm_like",1],
+        "12741762570001404232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14034402827496819479": ["convolution_gpu_bfyx_gemm_like",2],
+        "12294364015803004575": ["fully_connected_gpu_fb_io_block_fp16",0],
+        "150132162949295379": ["convolution_gpu_bfyx_1x1",2],
+        "7863319552895863063": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1250095876638711647": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "18373951194274306895": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11088324811742486481": ["convolution_gpu_bfyx_gemm_like",2],
+        "9243949750444156746": ["convolution_gpu_bfyx_gemm_like",1],
+        "8689206546467098603": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13403161389559730": ["convolution_gpu_bfyx_gemm_like",2],
+        "4833749391314748606": ["convolution_gpu_yxfb_yxio_b16",0],
+        "13409744191227471760": ["convolution_gpu_bfyx_gemm_like",0],
+        "17817043205731836063": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13919204232414535363": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "3116068331849795558": ["convolution_gpu_bfyx_gemm_like",2],
+        "10256831975351722184": ["convolution_gpu_bfyx_gemm_like",2],
+        "16120988958246503683": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "9541996065561509160": ["convolution_gpu_yxfb_yxio_b16",2],
+        "273242667845386507": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "586947787345351152": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3121704239277217273": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12990527753120735255": ["convolution_gpu_bfyx_gemm_like",0],
+        "7395419333138772074": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14910911338105922048": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15325852281951905610": ["convolution_gpu_bfyx_os_iyx_osv16",803],
+        "1299760574827253811": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5303970743736042689": ["convolution_gpu_bfyx_gemm_like",2],
+        "1569043950563130463": ["convolution_gpu_bfyx_gemm_like",1],
+        "8555049634736330391": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16502045034098739466": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "2737352811173555281": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4030004320208162301": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4403753181729432604": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "7444165397413360181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12467673564660108244": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "12253049204822930675": ["convolution_gpu_bfyx_gemm_like",2],
+        "12714814165247623529": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3792276488551864121": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16425374300157280628": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15298221796479574600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "49948277487706148": ["convolution_gpu_bfyx_1x1",2],
+        "4098581145478965082": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "4750513665628842598": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "4914474312076193952": ["convolution_gpu_bfyx_gemm_like",1],
+        "13586735166545634506": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2782970766870172398": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17140702790441856730": ["convolution_gpu_bfyx_os_iyx_osv16",722],
+        "17037462814585846902": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "18142462471803295391": ["convolution_gpu_bfyx_1x1",2],
+        "3534874664568214253": ["convolution_gpu_bfyx_1x1",2],
+        "7823257556787476006": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12374775091628199854": ["convolution_gpu_bfyx_1x1",2],
+        "7846384623429362522": ["convolution_gpu_bfyx_1x1",2],
+        "5788018146987909930": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13364676690016875118": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "17742192339816511494": ["convolution_gpu_bfyx_gemm_like",2],
+        "166091609652531090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7843180034077880658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "382811963722907674": ["convolution_gpu_bfyx_gemm_like",2],
+        "2188101366183302888": ["convolution_gpu_bfyx_gemm_like",1],
+        "8642107585829380438": ["convolution_gpu_bfyx_gemm_like",0],
+        "10009559358571629502": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10399620940700804517": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5308128387928804050": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9643408025778914022": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "8323669961818535927": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2058172559199858297": ["convolution_gpu_bfyx_os_iyx_osv16",752],
+        "13717351126657739994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7322472892320910654": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "2173163618947713953": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "6219075471508685758": ["convolution_gpu_bfyx_gemm_like",0],
+        "7779562434199107586": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1375084615110147615": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11194372303922533529": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13754540732991287617": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5359510718430377298": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5285172225938230524": ["convolution_gpu_yxfb_yxio_b16",2],
+        "826850797666395121": ["convolution_gpu_bfyx_gemm_like",1],
+        "3316798708399098230": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "8732952254407298868": ["convolution_gpu_bfyx_gemm_like",2],
+        "1245259979364728404": ["convolution_gpu_bfyx_1x1",2],
+        "7139714914586273766": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "13038533272699602337": ["convolution_gpu_bfyx_gemm_like",2],
+        "17181874388601550941": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9090828337597312855": ["convolution_gpu_bfyx_gemm_like",2],
+        "10049294964307823692": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4561778392194061215": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1448440012428740463": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4824040283449153298": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "2148877522799179369": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13320675959188615441": ["convolution_gpu_bfyx_gemm_like",2],
+        "3101885395179993708": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9399511839804500548": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10775271979871646995": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9161616741940575576": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5578850952665051661": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9319254979377483709": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1336739931702966228": ["convolution_gpu_yxfb_yxio_b16",2],
+        "415826393421796195": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5374969798377773063": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "15640466585550013905": ["convolution_gpu_bfyx_gemm_like",1],
+        "17955326503130437346": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13330734840729670622": ["convolution_gpu_bfyx_gemm_like",0],
+        "748236447365453504": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3735753364888836383": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7329924387620542330": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "3067930325929862490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1202292109713947702": ["convolution_gpu_bfyx_gemm_like",2],
+        "4773482308451190487": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12032580551021546487": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11645116728396933125": ["convolution_gpu_bfyx_gemm_like",2],
+        "8265982881100325775": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16395067736440127496": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3112648799276134590": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14830991971271385876": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "17823133607491820214": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3105425187506203551": ["convolution_gpu_bfyx_1x1",2],
+        "15526021915035861514": ["convolution_gpu_bfyx_gemm_like",2],
+        "14173531787508017136": ["convolution_gpu_yxfb_yxio_b16",0],
+        "7565348337952384040": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16195893521207315456": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "4099828484175044842": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8348997431940166878": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7346046748383284270": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2057158988261512114": ["convolution_gpu_bfyx_1x1",2],
+        "7397341452130124383": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "11198301748997371475": ["convolution_gpu_bfyx_gemm_like",1],
+        "9569522500959727054": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15829095120243431195": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "7481256533438761028": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "13700014916680753395": ["convolution_gpu_bfyx_gemm_like",2],
+        "14263055580023018733": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14916236722843741326": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9390478179772073718": ["convolution_gpu_bfyx_gemm_like",1],
+        "3308955824300750921": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10612739622648878242": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11744368351982723504": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10878198256414940305": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2459018025887933198": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12745631396795162505": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8876704486585503280": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15973363403733281926": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5083163738120585821": ["fully_connected_gpu_fb_oi_ref",1],
+        "16374675547140209181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6136232084354304563": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6195916781434462809": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11557032521956761994": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "17712558058168648648": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11015074526119891710": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7005509036795164602": ["convolution_gpu_bfyx_1x1",2],
+        "1900375942069325499": ["convolution_gpu_bfyx_1x1",2],
+        "14807357397951247957": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5899560521070338192": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1822096761703761792": ["convolution_gpu_bfyx_1x1",2],
+        "15161053469199826008": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7314288062932060863": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "4625107584562815965": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11634932044447867039": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "17778091287904736965": ["convolution_gpu_bfyx_gemm_like",2],
+        "3522383297921565178": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "3684792790546138809": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12046017161414846599": ["convolution_gpu_bfyx_1x1",2],
+        "6546440095044731932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3735605582512535278": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17088011073114549679": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5600807544955072308": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "2314805462821790774": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17439102502195540957": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "9590161922224578217": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15133468875250992696": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2319519208813614116": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "2553539191926275121": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12900949103593247293": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "9700808806849459216": ["convolution_gpu_bfyx_1x1",2],
+        "3563872903821081702": ["convolution_gpu_bfyx_gemm_like",1],
+        "15600841108426475615": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16242136888057221574": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15765592038173567297": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16601230690171340432": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13634686998599681086": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11499219760597131534": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "6577505360421510286": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "467070383257529689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2421404763191415191": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16748662918272106932": ["convolution_gpu_bfyx_gemm_like",1],
+        "3365786526859737112": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16738951239219589307": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4769003637955328938": ["convolution_gpu_bfyx_gemm_like",1],
+        "597650904461183283": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "15065019229949449623": ["convolution_gpu_bfyx_gemm_like",1],
+        "11267495078361954131": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15914107501176673997": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "16961326251624610778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11626398907755088688": ["convolution_gpu_bfyx_gemm_like",1],
+        "12522495848240087966": ["convolution_gpu_bfyx_gemm_like",2],
+        "9316082753126682958": ["convolution_gpu_bfyx_gemm_like",2],
+        "480310470450900836": ["convolution_gpu_bfyx_gemm_like",2],
+        "2373860353284525265": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "17107083637007906184": ["convolution_gpu_bfyx_gemm_like",1],
+        "3873183249402084406": ["convolution_gpu_bfyx_gemm_like",0],
+        "3242391637018676328": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11834361584875491425": ["convolution_gpu_bfyx_1x1",1],
+        "9534041402131086717": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "856949500975232838": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "13912843078550000960": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "9647916259092117712": ["convolution_gpu_bfyx_gemm_like",2],
+        "8002233052700666718": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "12619739385084492771": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12210280332071091209": ["fully_connected_gpu_fb_oi_ref",1],
+        "6317575981520135028": ["convolution_gpu_bfyx_gemm_like",0],
+        "15293835051273372438": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7788374869410867297": ["convolution_gpu_bfyx_gemm_like",2],
+        "10477588607457125173": ["convolution_gpu_bfyx_gemm_like",2],
+        "18313088176414428990": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "7817691489550523328": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6182829358839578529": ["convolution_gpu_bfyx_gemm_like",2],
+        "13320473279945887641": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2866656294663853474": ["convolution_gpu_bfyx_1x1",2],
+        "3538679039078582272": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "7889602687414497280": ["convolution_gpu_bfyx_os_iyx_osv16",427],
+        "7724125714360985807": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10879171754021534649": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3190494353583341446": ["convolution_gpu_bfyx_gemm_like",1],
+        "2501411300945696806": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15217573782563469232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2026622899016787854": ["convolution_gpu_yxfb_yxio_b16",0],
+        "17442105631503326136": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15943141845766932879": ["convolution_gpu_bfyx_1x1",2],
+        "13464697394408238115": ["convolution_gpu_yxfb_yxio_b16",2],
+        "488798544312719183": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12710794174926396540": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13156052826121673994": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "6423785822515265784": ["convolution_gpu_bfyx_gemm_like",2],
+        "6423354409210936959": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13569941893504840630": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "18221867262301937903": ["convolution_gpu_bfyx_1x1",2],
+        "9291397338108903174": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1786821683911142459": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "16511393582666965704": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "16253244737884854313": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13323186744342557015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15078590909693331731": ["convolution_gpu_bfyx_gemm_like",2],
+        "13777174566683935109": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9589361786336650748": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15625374380046476173": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4897690791599638716": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9177211394807412309": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8723078862651154959": ["convolution_gpu_yxfb_yxio_b16",2],
+        "739676584505475609": ["convolution_gpu_bfyx_gemm_like",2],
+        "4523064418696274869": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "4701235352806075765": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "6363788325163726004": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2172999245833525797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6820134899097582639": ["convolution_gpu_yxfb_yxio_b16",0],
+        "7292351660229751817": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "16725049805030712400": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1308980444055174254": ["convolution_gpu_bfyx_gemm_like",2],
+        "11597391933877736800": ["convolution_gpu_bfyx_gemm_like",2],
+        "3017411837779243878": ["convolution_gpu_bfyx_gemm_like",0],
+        "12722153168975105360": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12121204870979363096": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17272600601478967434": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "1351033666248868977": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "14553813154800569861": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2722062599746670336": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12331134162344797761": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2816339200381598722": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6650607472019166205": ["convolution_gpu_bfyx_1x1",2],
+        "17512961503976896701": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "10632933069865171963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12228610148087508521": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "15897300973213364823": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "8339704352841356825": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",0],
+        "13558687084677943158": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "12623375499927200341": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "10765280349477640969": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13234872695521811652": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12303905514885913537": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",2],
+        "4755225554035527185": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10133398220120888583": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13474805373264874144": ["convolution_gpu_bfyx_1x1",2],
+        "5941092474669713339": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "2921118493468368908": ["convolution_gpu_bfyx_gemm_like",1],
+        "6756679359093569015": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "14188157670969097508": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "18299254635579957284": ["convolution_gpu_bfyx_1x1",2],
+        "16542318967217020315": ["convolution_gpu_bfyx_gemm_like",2],
+        "12053562297742437099": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12806934028210472719": ["convolution_gpu_bfyx_gemm_like",2],
+        "17704040183891532914": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "10837496380266058422": ["convolution_gpu_bfyx_gemm_like",2],
+        "6942016672941874829": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "14799579913711096584": ["convolution_gpu_bfyx_gemm_like",2],
+        "13735180250757239202": ["convolution_gpu_bfyx_gemm_like",2],
+        "10065714384927707796": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9589718307719207394": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11175353869874626110": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10718764522366711114": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4430932059574900921": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2041212737963974230": ["convolution_gpu_bfyx_gemm_like",2],
+        "6963293142152132518": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "5582450255753679095": ["convolution_gpu_bfyx_1x1",2],
+        "15542520725696027828": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13596876807637507229": ["convolution_gpu_bfyx_1x1",2],
+        "8045367391487213749": ["convolution_gpu_bfyx_1x1",2],
+        "11270855425262923989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4135068756462147853": ["convolution_gpu_bfyx_gemm_like",1],
+        "10730222715353420212": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1973051991518953158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5723759573058003971": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9525535670799618110": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "6003409324516527726": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15192230303376521834": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "11262989876326061679": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1507504848332592003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16698547937652264447": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8577875628223148806": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11627532066884923848": ["convolution_gpu_bfyx_1x1",2],
+        "3236003754884728510": ["convolution_gpu_bfyx_os_iyx_osv16",1048],
+        "12458921031453334451": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15604634351310647589": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17046662043776372746": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "8984436655107983227": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "13369751385866224286": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12908594497114706897": ["convolution_gpu_bfyx_1x1",2],
+        "7139719632093090046": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "466744273945239777": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5788323787676797805": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "13483088320871913126": ["convolution_gpu_bfyx_gemm_like",2],
+        "11971853138084108953": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "10682300249493137042": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "2917735110073643952": ["convolution_gpu_bfyx_gemm_like",2],
+        "10141927023849730720": ["convolution_gpu_bfyx_1x1",2],
+        "362823013207940830": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10893432143734884603": ["convolution_gpu_bfyx_gemm_like",2],
+        "16437093737761968743": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "3101748967012684440": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1582751548472076534": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15809639778580769565": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9741607635826869269": ["convolution_gpu_bfyx_gemm_like",1],
+        "16767392067294252396": ["convolution_gpu_bfyx_gemm_like",2],
+        "13501352378461071771": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4685236901551256966": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4378422094110940766": ["convolution_gpu_bfyx_gemm_like",1],
+        "14532519639619315651": ["convolution_gpu_bfyx_gemm_like",2],
+        "138379779469699309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2929715823970060874": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15289152041466330689": ["convolution_gpu_bfyx_gemm_like",2],
+        "9967611023372430532": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2929190644951986399": ["convolution_gpu_bfyx_gemm_like",2],
+        "3441335188113424896": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4256155212405177844": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18075395502550596586": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "6887205509732544213": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12013818650853034767": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8935522915553126640": ["convolution_gpu_bfyx_gemm_like",1],
+        "5044721291675005144": ["convolution_gpu_bfyx_1x1",2],
+        "4422642146063042868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14835641172229643545": ["convolution_gpu_bfyx_gemm_like",2],
+        "7289535479247584635": ["convolution_gpu_bfyx_1x1",2],
+        "5258372022038629529": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13149617013851130587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9538863363710651909": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8065866013404161366": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14116275901314596944": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16862145184923128012": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "4228437925117070319": ["convolution_gpu_bfyx_1x1",2],
+        "16072525303202287969": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9629460794894999510": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9340159617983543624": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5576305720733717044": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16780457022162749898": ["convolution_gpu_bfyx_gemm_like",2],
+        "411016281538345537": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18083803358410976976": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5479590921345335946": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "17536308070854915513": ["convolution_gpu_bfyx_1x1",2],
+        "5568728266639058524": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "14091543526898531200": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11820789223587555410": ["convolution_gpu_bfyx_1x1",2],
+        "8193369947544085921": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "7065244994574625911": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8922929126299811091": ["convolution_gpu_bfyx_1x1",2],
+        "15988378956341507229": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8943913562339525413": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "7008509833947166548": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13893808009363736870": ["convolution_gpu_bfyx_gemm_like",0],
+        "136349424199140459": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16152775342222431281": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "10952045211444638649": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13123561937554734618": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12344689711325644622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12023260267201191955": ["convolution_gpu_yxfb_yxio_b16",2],
+        "601430670855155006": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "11130439225010714550": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "14045927407431718832": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "15901724303713479611": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5564881878876582769": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1197281505560782577": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "10424643336435622408": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8421388456873652700": ["convolution_gpu_bfyx_gemm_like",2],
+        "12181310683533105454": ["fully_connected_gpu_fb_oi_ref",2],
+        "17806747473167329833": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3784684114139223050": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16437124655147660375": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10431728173806991521": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16633540487930201533": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "9642965664913867675": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3533556385636018581": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "14897384423894125457": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4091702228990140696": ["convolution_gpu_bfyx_gemm_like",1],
+        "10218763091060511457": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "12936220888307335332": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "12787837386653002743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3047407458812880288": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6249875772709398338": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2399313178951511557": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "11224051407822914513": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18255227391100087860": ["convolution_gpu_bfyx_1x1",2],
+        "5552699731399195573": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4104679489383377966": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6871131333562410117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7263796835299019284": ["convolution_gpu_bfyx_gemm_like",2],
+        "7015738038963065110": ["convolution_gpu_bfyx_gemm_like",2],
+        "8616584380583931648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7343590049199309046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9144269202766996508": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17791024851737594885": ["convolution_gpu_bfyx_1x1",2],
+        "13328911884191551889": ["convolution_gpu_bfyx_1x1",2],
+        "16434358667865869005": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18094205332383644037": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "1129349074674368869": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6602394091385112575": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6839795451275143093": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "7162155897369277782": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15848096609835347542": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "5995121118186531621": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11025471731438443683": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "18214716801063702171": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2339864165283480961": ["convolution_gpu_bfyx_1x1",2],
+        "11052275099129482401": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16",478],
+        "393387269914864557": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "4714289593698160876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14892045745899927762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18029395208219861440": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15779837958180258409": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "11012427206693842637": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13450061819089402572": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12802517759474139810": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14758040027936817208": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14097394936362526559": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10747988576436391912": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "8747430148550634190": ["convolution_gpu_bfyx_gemm_like",2],
+        "12388894315292201102": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3240102173773280414": ["convolution_gpu_bfyx_1x1",2],
+        "5857101685300045443": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16632786413927045192": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5884802375772043861": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2955459120402821540": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12027202455592387086": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15839295895890205274": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "15899192375330393731": ["convolution_gpu_bfyx_os_iyx_osv16",427],
+        "9026883911202247185": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16814025114202322376": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3319827933068341610": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17477062954520561609": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "8784358107340738205": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4674504221851042542": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12932635875905153141": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "17413191440314817117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5658664813683907476": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "10896935976330351144": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14159596290442764023": ["convolution_gpu_bfyx_gemm_like",1],
+        "6709883527730513363": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4165036357594592683": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15141893564826036993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9135116285263927211": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2908156087871187676": ["convolution_gpu_yxfb_yxio_b16",1],
+        "698274493570551388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9407646138658641974": ["convolution_gpu_bfyx_gemm_like",2],
+        "9922764846020092836": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8365255170846178102": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "338716975932676215": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "3070859615622845671": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "14616801816838734032": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7531346828150129063": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10170577772376890221": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "15228390729175722409": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18435632962969462312": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13978649386370395620": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "15397084091361096354": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "18154019240019929225": ["convolution_gpu_bfyx_gemm_like",1],
+        "14909506411483112959": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7715937239456300593": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "1921500066107090648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12767065362702304803": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7859659993155959174": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8576733135863336233": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15386715291503303766": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "13571587312517912280": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "6928136130626403937": ["convolution_gpu_bfyx_gemm_like",2],
+        "6192955702438301372": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "1290180607037086383": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15504618703544589723": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11311839946200066200": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17370158297470557151": ["convolution_gpu_bfyx_1x1",2],
+        "6222595759158615206": ["convolution_gpu_bfyx_gemm_like",1],
+        "8262469434265124590": ["convolution_gpu_yxfb_yxio_b16",2],
+        "166437837813304707": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15352064186447212862": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3816674884393241704": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "9585113116232600562": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14908477489231326997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11768117585574496387": ["convolution_gpu_bfyx_gemm_like",2],
+        "1230262279011217327": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "657356383636782030": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "17705807503894740726": ["convolution_gpu_bfyx_gemm_like",2],
+        "2891736961665476908": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "16094455700371652312": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4447065688824381344": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "11002165738333323413": ["convolution_gpu_yxfb_yxio_b16",2],
+        "875146113874776902": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6020017927557041768": ["convolution_gpu_bfyx_gemm_like",1],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",0],
+        "3861351835305151926": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9890700023578477203": ["convolution_gpu_bfyx_gemm_like",2],
+        "16828961272295386615": ["convolution_gpu_bfyx_os_iyx_osv16",854],
+        "3774285301357006334": ["convolution_gpu_bfyx_gemm_like",2],
+        "938222258370511187": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "2227700097134029783": ["convolution_gpu_yxfb_yxio_b16",2],
+        "132437164570900392": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17789969008677638142": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "11604111639041106489": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "8205640825965213946": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11848462434662954749": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "4306881509708040723": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15612797125081819500": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13797759143769042759": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "15217077412685024074": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17178308105985812083": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11132679855317294753": ["convolution_gpu_bfyx_gemm_like",1],
+        "13076935351221777993": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "3301356450249305137": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4947788161154370784": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "3854114166348568039": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "2064464435352777854": ["convolution_gpu_bfyx_gemm_like",2],
+        "4049224463072418218": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5642822685234782052": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "2208765794404376467": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6280726148869856021": ["convolution_gpu_yxfb_yxio_b16",2],
+        "733956743303342862": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "10415046594066474634": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3976197003067656339": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2714322766616035858": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4362304842016958728": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1074748462756364699": ["fully_connected_gpu_fb_oi_ref",1],
+        "5060012838564094182": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "17174919737114915467": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14001048251986195179": ["convolution_gpu_bfyx_gemm_like",2],
+        "13927671398099556854": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7181154048972884375": ["convolution_gpu_bfyx_gemm_like",2],
+        "119047044057950958": ["convolution_gpu_bfyx_gemm_like",1],
+        "1885075753696445410": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11640225461345567929": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "5572956736535433608": ["convolution_gpu_bfyx_1x1",2],
+        "17466963970980708210": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15167962750603978874": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5293502980575652171": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3059575629482816852": ["convolution_gpu_bfyx_os_iyx_osv16",905],
+        "334703311738467111": ["convolution_gpu_bfyx_gemm_like",2],
+        "4972952621622984792": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11308583200952256245": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1556975727728498645": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15609860394182767048": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15069906408448814772": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "4597873630741623918": ["convolution_gpu_yxfb_yxio_b16",1],
+        "904355798061005466": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13094289895577333088": ["convolution_gpu_yxfb_yxio_b16",2],
+        "234288286732396704": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10880830033700542216": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15576534481170615301": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8303211644727914658": ["convolution_gpu_bfyx_1x1",2],
+        "11955992313739654625": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "7804715870037416579": ["convolution_gpu_bfyx_gemm_like",0],
+        "13367787254519749641": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14823789570149356458": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13800760323805415740": ["convolution_gpu_bfyx_gemm_like",2],
+        "8779960552750034544": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17281826959243966826": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "14752182392048929103": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6217542346826403576": ["convolution_gpu_bfyx_1x1",2],
+        "14113320831418478396": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9435086287598656868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3067001341355453846": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "15424646499666127616": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16953093098789113080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9803492989444302959": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14510495923021693109": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5459463503840817402": ["convolution_gpu_bfyx_1x1",2],
+        "17893181511546734799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4958835037528182801": ["convolution_gpu_bfyx_1x1",2],
+        "7752913515036871482": ["convolution_gpu_bfyx_gemm_like",0],
+        "14079654309452583394": ["convolution_gpu_bfyx_gemm_like",1],
+        "15548847099740441551": ["convolution_gpu_bfyx_1x1",2],
+        "2263637493894079492": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5211831143687501130": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "18267428053198215471": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16487774205195979355": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "12794369485239257709": ["convolution_gpu_bfyx_gemm_like",2],
+        "9131235538209388787": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "17951403431757222177": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "7585777271711713778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5897564616927353003": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "11738780323979052397": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "598214270378842167": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "4959403414256988744": ["convolution_gpu_bfyx_gemm_like",0],
+        "10883341041912056319": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "720558977788683564": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1216021647922150199": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5606914392662771013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13710319251108632115": ["convolution_gpu_bfyx_1x1",2],
+        "15715029280006557222": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14315760630997175346": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10536316961655703500": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5583453364991774426": ["convolution_gpu_yxfb_yxio_b16",2],
+        "248133885018839814": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12808456612606675259": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "5039037192630609823": ["convolution_gpu_bfyx_gemm_like",2],
+        "3571030800252732358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4238885454989272754": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "6709083009339039603": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17515064188391421150": ["convolution_gpu_bfyx_gemm_like",1],
+        "16986610822918634530": ["convolution_gpu_bfyx_1x1",2],
+        "3568514382399560386": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "4933831571091731212": ["convolution_gpu_bfyx_gemm_like",1],
+        "8399668174006528237": ["convolution_gpu_bfyx_gemm_like",2],
+        "7398158542592530232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9250410390663336388": ["convolution_gpu_bfyx_gemm_like",1],
+        "16000428520749664687": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14971270053929063630": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3652414035262499383": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "7780140599533242850": ["convolution_gpu_bfyx_gemm_like",1],
+        "5911282942658469852": ["convolution_gpu_bfyx_gemm_like",1],
+        "2362092095402043749": ["convolution_gpu_bfyx_gemm_like",1],
+        "5157949342388119167": ["convolution_gpu_bfyx_gemm_like",2],
+        "11891319657803057127": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8638227907054657946": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6297802534570892679": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "10718639465064821919": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16371608027363202992": ["convolution_gpu_yxfb_yxio_b16",2],
+        "226601879759378771": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "2797436491596125131": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1632416005093914709": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "17238880534517721334": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "10037086825900566930": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "15914342421266687768": ["convolution_gpu_bfyx_gemm_like",2],
+        "12946531140050029900": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5762878778443755104": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "7317391511452227268": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "438528596970898721": ["convolution_gpu_bfyx_gemm_like",2],
+        "9056038338958199256": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "8063236641629084352": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "4241055784642339756": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9812438080378091263": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11157773554806649837": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16459072408799224894": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "9982350570959875159": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16956263773967652552": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "2164314506903530487": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14766477690417085350": ["convolution_gpu_bfyx_1x1",2],
+        "8270591002934311024": ["convolution_gpu_bfyx_1x1",2],
+        "6708349666663292171": ["fully_connected_gpu_fb_oi_ref",1],
+        "18424611729838147994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15476491807306982382": ["fully_connected_gpu_fb_io_block_fp16",0],
+        "3806131437010910920": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "11942736969933408358": ["convolution_gpu_bfyx_gemm_like",1],
+        "2571289358202565251": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "188830358699960789": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16813995580382709489": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12988253829685880778": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "5577571901049952658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9663847096617096629": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6935581283700404601": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11973034261101454380": ["convolution_gpu_yxfb_yxio_b16",2],
+        "185782385623159958": ["convolution_gpu_bfyx_gemm_like",2],
+        "8354579049246302728": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "9040145293899470160": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "2294318010381635693": ["convolution_gpu_bfyx_gemm_like",1],
+        "17270057383792994793": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5928392400230917930": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4627958043707973483": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8735534480653818425": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15226556774612169126": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1082574490068006980": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "1760391741350091665": ["convolution_gpu_bfyx_gemm_like",2],
+        "2128612971571865547": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4809191606466167229": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15816807118780455948": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10225565543636007389": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2649192407401044065": ["convolution_gpu_bfyx_gemm_like",1],
+        "2294800960010879540": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "631489011812924153": ["convolution_gpu_bfyx_1x1",2],
+        "17434141039341226796": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9328223957245552723": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15750539817895707253": ["convolution_gpu_yxfb_yxio_b16",0],
+        "9515771738501683": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13251091004269229867": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3860667078458481972": ["convolution_gpu_bfyx_gemm_like",1],
+        "14497254583210965214": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3377052601059116318": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17922279129043570176": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "16836088134347394854": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2449586975250543578": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10211403590176354415": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4602232889230956461": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13804221028705631415": ["convolution_gpu_bfyx_gemm_like",2],
+        "8761283252495354972": ["convolution_gpu_bfyx_gemm_like",2],
+        "13190888313721073437": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11147573971701279689": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "4776446300552810228": ["convolution_gpu_bfyx_gemm_like",2],
+        "2412069259085234287": ["convolution_gpu_yxfb_yxio_b16",2],
+        "59356084516953804": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "16694984452720336415": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "5327803911898085293": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1265277707626014051": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "16408015571155576773": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "3652749152621176846": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12968458217519563011": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "10861525139715322534": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "17066417894262330033": ["convolution_gpu_bfyx_gemm_like",1],
+        "10129351141713628942": ["convolution_gpu_bfyx_gemm_like",2],
+        "18020588962875998441": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2857337999074313592": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10114123606924808948": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2618108630886857741": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12138556002719602750": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "192209423643075326": ["convolution_gpu_bfyx_gemm_like",2],
+        "7308442824625238429": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "17993865017392965282": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "12047878068525808907": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16237775310369180101": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16565784556269819846": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "9274179337770060652": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "13858485871773319706": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9298483238271063853": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6810243879781619546": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17740553615487239243": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "2690771087990667627": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "12175796957622122377": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "15619086801947147359": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13343968006718934574": ["convolution_gpu_bfyx_gemm_like",2],
+        "5197105253412476591": ["convolution_gpu_bfyx_gemm_like",2],
+        "14010642743400284761": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "460780635491857522": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9269175963143039426": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "12515465135362865565": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "4185398348055518182": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "14650567822254940018": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "684240994243755872": ["convolution_gpu_bfyx_gemm_like",1],
+        "11257892554921100776": ["convolution_gpu_bfyx_gemm_like",2],
+        "7419216766190700536": ["convolution_gpu_bfyx_gemm_like",1],
+        "5033753554611312392": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "7185832253431234935": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15289017003172341090": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "331390460560782085": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "683530182479794259": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "1081969835308672753": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "4494583230309471319": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "3755253206085028904": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18422772756265807456": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "4436265026202671742": ["convolution_gpu_bfyx_gemm_like",0],
+        "9008848676120441863": ["convolution_gpu_bfyx_gemm_like",1],
+        "8986253016099337778": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6661117204204077150": ["convolution_gpu_bfyx_gemm_like",2],
+        "17220204850799701232": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "17520777331163825810": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13326233188936584240": ["convolution_gpu_bfyx_gemm_like",2],
+        "9361149482291015906": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14157776769026046014": ["fully_connected_gpu_fb_oi_ref",0],
+        "6031307393395339699": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "9104710269725948935": ["convolution_gpu_bfyx_gemm_like",2],
+        "15132518566122695317": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "11642972419456492482": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9402935157379983392": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10073936467467965122": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1233021176530240722": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "1764398518968720486": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4338023436590582323": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "9325064517683111898": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "17248756229500447131": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13753670205703732353": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "1155389358857780776": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "12063837066704136739": ["convolution_gpu_bfyx_gemm_like",2],
+        "9546990560009724329": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "386749666417295495": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "12134712464763856064": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11879484013890539145": ["convolution_gpu_bfyx_gemm_like",1],
+        "13301652037182491495": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15706410484838871362": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "12815588500303820284": ["convolution_gpu_bfyx_gemm_like",1],
+        "1629816265162728770": ["convolution_gpu_bfyx_gemm_like",2],
+        "10574694721257478408": ["convolution_gpu_bfyx_gemm_like",0],
+        "6178519342290638130": ["convolution_gpu_bfyx_gemm_like",2],
+        "10273183900108661041": ["convolution_gpu_bfyx_gemm_like",0],
+        "11379252854859166206": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13439272015824246074": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10853161782230763798": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "15379873910046172004": ["convolution_gpu_bfyx_gemm_like",1],
+        "9419334015760594582": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "1096929244128185929": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "13804435767468730732": ["convolution_gpu_bfyx_gemm_like",2],
+        "8174734104495927379": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7806129039150321333": ["convolution_gpu_bfyx_gemm_like",2],
+        "4903043177313730317": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "3927333491885837374": ["fully_connected_gpu_fb_oi_ref",2],
+        "5920614348521143999": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "2305461098719675735": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13193571607788569533": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18375944751155613159": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "5282780697382984776": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14611470203914805229": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "11632275875447013409": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2605525859754242318": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1835975757316320402": ["convolution_gpu_bfyx_gemm_like",1],
+        "404419072921281472": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8127190765748950828": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "1643241486250690844": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "15291457825664605611": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "263575476655527355": ["convolution_gpu_bfyx_gemm_like",2],
+        "10898684230183205955": ["convolution_gpu_bfyx_gemm_like",2],
+        "7954822934649213505": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14214141488645257351": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "2502125887857336825": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "16580523689587532278": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "5865480930796299143": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "1107027047188366075": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "16896833230469488924": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "7744644472305197412": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13394233139064923018": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6220616397859143111": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "9535474159134436170": ["convolution_gpu_bfyx_gemm_like",1],
+        "12493863403516600413": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "12793814016409887162": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9058996149754556268": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "17796867588410764794": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "11359020774437470164": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "4254313567858225805": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13065517911798224579": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "13342769641176584743": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "14266210014132784194": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",1],
+        "7558864177789582540": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "13115589642140732066": ["convolution_gpu_bfyx_gemm_like",1],
+        "8008513163448840421": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17794162443307839614": ["convolution_gpu_bfyx_gemm_like",1],
+        "7174790971918109163": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "7368916076070115064": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "13974740392602492680": ["convolution_gpu_bfyx_gemm_like",2],
+        "10414903047695486119": ["convolution_gpu_bfyx_os_iyx_osv16",680],
+        "11190259822407791373": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "1996317479484023889": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "8854234880878427078": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "10114186450910665716": ["convolution_gpu_bfyx_gemm_like",2],
+        "906587812125311288": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "1640358227345963848": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "16036386660666696362": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "6210074450403696110": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "841243068178925457": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "17900440115872409689": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3469963495451100978": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5275016494706355806": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14524678598440880756": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "15715775011639091549": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14898829474012181950": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "9643671820560131959": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "14825587275976212624": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "18280672126778847258": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "12382761700262813898": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "1093840152689636371": ["convolution_gpu_bfyx_gemm_like",1],
+        "12700051513124813499": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "6214312494103149808": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "15385836287435319028": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "14639233649574991406": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "10169992769527680821": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "14491949194619001237": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "7995002764260542332": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "18420783889227814721": ["convolution_gpu_bfyx_gemm_like",1],
+        "11883632480024839484": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4488336106517889531": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "2194607895573544953": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "9069334144391048686": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8751016391945753900": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "846088275031979661": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14418429155823196539": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "13379165253894817165": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "4722824701199486161": ["convolution_gpu_bfyx_gemm_like",1],
+        "1653438360841004980": ["fully_connected_gpu_fb_oi_ref",1],
+        "11267742746905371769": ["convolution_gpu_bfyx_gemm_like",1],
+        "14746359019867963124": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14995412997472381785": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "7544565739420583104": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "8159367017950578067": ["convolution_gpu_bfyx_gemm_like",0],
+        "7000524935770116969": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "9609257787066002999": ["convolution_gpu_bfyx_gemm_like",0],
+        "7962991673727743706": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11985789598994479652": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12085348936192462321": ["convolution_gpu_bfyx_gemm_like",1],
+        "851140387756761667": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "12713087335581316946": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "8358425189419823078": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",610],
+        "9484428757321765863": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7714783879762659458": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3612493075378459996": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "17765244777397448823": ["convolution_gpu_bfyx_gemm_like",2],
+        "4915831715914920982": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "6740545361286720494": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "9194441947620820715": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "17422822627612865758": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15898888434295644774": ["convolution_gpu_bfyx_gemm_like",2],
+        "17821196374523699955": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "18136765667969393174": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8420176522157084802": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17221173795372066030": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15576932271488848457": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "1961348920992050029": ["convolution_gpu_bfyx_gemm_like",1],
+        "12076322142162382598": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7109332037985838172": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8133587696326295326": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "14854353557342075292": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "7086554406050778468": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "253337639942573142": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "1152693503778768433": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "3926585856863002495": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "1211404528755199615": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6213353364768643062": ["convolution_gpu_bfyx_gemm_like",1],
+        "9796347091019799053": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "5597908143491399643": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "15316782593191029443": ["convolution_gpu_bfyx_gemm_like",1],
+        "1421879144542252228": ["convolution_gpu_bfyx_gemm_like",1],
+        "14271777022638592600": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "11077876432364512822": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "6478247863479663432": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "9172699707430374863": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "5375957124102705020": ["convolution_gpu_bfyx_gemm_like",2],
+        "7009873605945341897": ["convolution_gpu_bfyx_gemm_like",2],
+        "8323445733669842657": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1231806423322813287": ["convolution_gpu_bfyx_gemm_like",2],
+        "4646795194660982475": ["convolution_gpu_bfyx_gemm_like",2],
+        "1207026216972160297": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "11070696274716018686": ["convolution_gpu_bfyx_os_iyx_osv16",196],
+        "5334190564423375247": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2155348872565175553": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14600700464602327710": ["convolution_gpu_bfyx_gemm_like",2],
+        "13558618754911056302": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "10842505566649585090": ["convolution_gpu_bfyx_gemm_like",1],
+        "89439319782574517": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "17130630712943165823": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "11703557271443535142": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10173382130572498594": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "954347958041231578": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "14669219788000023965": ["fully_connected_gpu_fb_oi_ref",1],
+        "15997231252708686870": ["convolution_gpu_bfyx_gemm_like",2],
+        "3963106895592011725": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17692144048680858991": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14389915292223442327": ["convolution_gpu_bfyx_os_iyx_osv16",164],
+        "10835684445936063871": ["convolution_gpu_bfyx_gemm_like",1],
+        "5057534502588100071": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "2543995971214089085": ["convolution_gpu_bfyx_os_iyx_osv16",622],
+        "779633618375662086": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4720851194954041037": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "4046513842327685203": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7722090560547236852": ["convolution_gpu_bfyx_gemm_like",2],
+        "17864395500488861670": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13161798453564436688": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8557939065994799094": ["convolution_gpu_bfyx_gemm_like",2],
+        "17749857812061795980": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "11152834864013527469": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "14946519992043402896": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "15247381586316467097": ["convolution_gpu_bfyx_gemm_like",2],
+        "10660230104888153758": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "16061176355133391199": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "10252133892687581839": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "7026575758396092435": ["convolution_gpu_bfyx_gemm_like",1],
+        "8422808932256100230": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11021014846012559932": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1197101651805223230": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16998662249038174039": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13231291236739587033": ["convolution_gpu_bfyx_gemm_like",2],
+        "11129224786768161139": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5406129421969383274": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13002363400738122017": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "7601006550805536675": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4783866236592802336": ["convolution_gpu_bfyx_gemm_like",2],
+        "7575675354187625951": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14361697687217060995": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1865187811299838654": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "12651215303242591871": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "13890118723041457532": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "2497756607567197523": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "2103882464623009432": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "641798291578647186": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9522661528867955338": ["convolution_gpu_bfyx_gemm_like",2],
+        "13093429681061786539": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "16549498607618849252": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "3349108500387301004": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "8501145642605270365": ["convolution_gpu_bfyx_gemm_like",2],
+        "14213516751025324346": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6104567430127604601": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "15733030371524967129": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "4765132143483233538": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "14253275166085865948": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6040286126398028933": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5045339651649581926": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "9468314291932574827": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4703107905652287491": ["convolution_gpu_bfyx_gemm_like",2],
+        "18372284940315010254": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "4213330047036138895": ["convolution_gpu_bfyx_gemm_like",2],
+        "17116130466596594359": ["convolution_gpu_bfyx_os_iyx_osv16",723],
+        "14233388108948021331": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12522364636280164681": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10992999157318221164": ["convolution_gpu_bfyx_gemm_like",0],
+        "1781189282179491198": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15033864286535250007": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "13110173649734084688": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17946191056428828467": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "8260689555974656662": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "16053585286807864356": ["convolution_gpu_bfyx_gemm_like",2],
+        "4588420324030315321": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4424217045094988504": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "4369346833875105372": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5385637020152792781": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "13982221711075598070": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8281411537393664160": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12070592804878487941": ["convolution_gpu_bfyx_gemm_like",1],
+        "8528750110601691390": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "17761681290527373180": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "7092429446071184360": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14776308019009874809": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "11012846743944132853": ["convolution_gpu_bfyx_gemm_like",2],
+        "7315740838189400004": ["convolution_gpu_bfyx_gemm_like",2],
+        "6293403765897901528": ["convolution_gpu_bfyx_gemm_like",2],
+        "14141983383097250411": ["convolution_gpu_bfyx_gemm_like",1],
+        "8925796987351708085": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "9875319892082750080": ["convolution_gpu_bfyx_gemm_like",2],
+        "13787118639037730152": ["convolution_gpu_bfyx_gemm_like",2],
+        "2527189070714658176": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "8730097760819044515": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "6373173636869473046": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4026686872534942904": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "9318652504803279936": ["convolution_gpu_bfyx_gemm_like",2],
+        "16306284020664131647": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "12494969618927201911": ["fully_connected_gpu_yxfb_ref",1],
+        "37061093840513038": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12635265188475834607": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "11828522357351010810": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "16710651492402564794": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "94012300876418257": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "12956726277674279950": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "10362906912545982002": ["convolution_gpu_bfyx_gemm_like",2],
+        "9318550032135064372": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7398196853452900099": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "2678815609451494274": ["convolution_gpu_bfyx_1x1",2],
+        "11704394720448242086": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "8071652278387309042": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13046322179198317310": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "4282198629458668761": ["convolution_gpu_bfyx_gemm_like",2],
+        "9390919808369333231": ["convolution_gpu_bfyx_gemm_like",2],
+        "11528310408333718862": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "1996860183441418841": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6098207667540641715": ["convolution_gpu_bfyx_gemm_like",2],
+        "10159790066948852390": ["convolution_gpu_bfyx_gemm_like",1],
+        "11873734271080160669": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "772794189370544860": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7080501503636539396": ["convolution_gpu_bfyx_gemm_like",2],
+        "15851356529373376076": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5635500901926740475": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "8470783908138180217": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6542486391263861823": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "11970881115757095265": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "7404732699742965436": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6027350558532160900": ["convolution_gpu_bfyx_gemm_like",2],
+        "7228139313323996640": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "12020033193997292057": ["convolution_gpu_bfyx_gemm_like",0],
+        "17523255657410563512": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5536424274663702901": ["convolution_gpu_bfyx_gemm_like",2],
+        "3547275591884493445": ["convolution_gpu_bfyx_gemm_like",1],
+        "10178951466584845110": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9216695884134021401": ["convolution_gpu_bfyx_gemm_like",2],
+        "3234263189133106948": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "7807983899017500046": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "7678730081652720605": ["convolution_gpu_bfyx_os_iyx_osv16",881],
+        "2691043943297793735": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "17182558720652199559": ["fully_connected_gpu_fb_io_ref",0],
+        "7533669599936874355": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "7852144838267007144": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5036963191507722541": ["convolution_gpu_bfyx_os_iyx_osv16",1070],
+        "6984620248108632462": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "14560435854055940143": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "9988347141056982336": ["convolution_gpu_bfyx_gemm_like",2],
+        "11769511287553067221": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "3033264172690274208": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1100681675092122613": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "5150467145740542480": ["convolution_gpu_bfyx_gemm_like",2],
+        "14332388011233886083": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "792684262493086891": ["convolution_gpu_bfyx_gemm_like",1],
+        "2055914145961691571": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "17833517350994024381": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "7304346312452588844": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "11928926429060828408": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "18250076003231973692": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2647922515901529845": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "14167086447992316314": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6474882514032493642": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "14546281065004619074": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6983544541444063131": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "17308907916370632622": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9863856393759813897": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "876164657126345894": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "12692563384795319282": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "14599150265057284139": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "4941660917457387098": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "16852207712205172744": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "7969848911698660033": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "15759530339367380982": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "1269703478898366518": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "80038800201815976": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10589803022753839539": ["convolution_gpu_bfyx_gemm_like",2],
+        "4586246090279043149": ["convolution_gpu_bfyx_gemm_like",1],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",0],
+        "2052712465925238009": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "16805562203348924108": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2534408579674556441": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8176012042686275874": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "15781220232431782560": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "15743075522781198932": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9519623751582710696": ["convolution_gpu_bfyx_os_iyx_osv16",439],
+        "8039045580314824307": ["convolution_gpu_bfyx_gemm_like",2],
+        "9377779605078400305": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12365282242489300092": ["convolution_gpu_bfyx_os_iyx_osv16",45],
+        "1795659014508380077": ["convolution_gpu_bfyx_gemm_like",2],
+        "8228641750970480948": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "4184442166820068862": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "522181557896569275": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "11318913630213187720": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8321148793275220552": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1450888744802985214": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "17828453493113919756": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4016652650196255483": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12201437677145858979": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3686062608868674589": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6512006285490280576": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5559417017584278927": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "7843833033404155302": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "4395247494007025604": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4674416595144505741": ["convolution_gpu_bfyx_gemm_like",2],
+        "17754836801944078461": ["convolution_gpu_bfyx_gemm_like",0],
+        "17376180096577763039": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17542176922797334839": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1629280013296592298": ["convolution_gpu_bfyx_gemm_like",2],
+        "16561224775421968533": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "5876880412336151866": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "13327653786981478088": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "3169696741777363811": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "13423515205322319913": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "12643423612381102003": ["convolution_gpu_bfyx_os_iyx_osv16",831],
+        "12696412964119109465": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1436052878894538927": ["convolution_gpu_bfyx_gemm_like",2],
+        "12225119940380026093": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "15891529662801690234": ["convolution_gpu_bfyx_gemm_like",2],
+        "13613948678997524330": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7130694811424715594": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "5342657840254586591": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3539764293444807886": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "12388375914105990324": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "15391215077224693736": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "2148648022160178995": ["convolution_gpu_bfyx_gemm_like",2],
+        "3378088934862423864": ["convolution_gpu_bfyx_gemm_like",1],
+        "3965871278597751318": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9522947878591994913": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4965629769516591986": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11588201241814594642": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "2651385050387738902": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "3827177373408316820": ["convolution_gpu_bfyx_gemm_like",1],
+        "16701880594348935298": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "16091195788712971747": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "13932662890258900896": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6729785110495533200": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "12283317230112506089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7706467560568261104": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "4077290190620885361": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "7975810844103449438": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "11430400968543668873": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "14133958262039763609": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "16661248688859994717": ["convolution_gpu_bfyx_gemm_like",2],
+        "381149736509958403": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "15052127817178941719": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "11855137287698046529": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "12561852932488001568": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "16802487456370986847": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "6090625728451718945": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12465913523583743669": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "1652781065871883392": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2335783507270234825": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "16936366288366370882": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "801486567558674495": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "7512702933193596918": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "12671153706040443724": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "8236792121585073064": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "3273748387141431306": ["convolution_gpu_bfyx_os_iyx_osv16",808],
+        "15743461017318513847": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "4750894407873652809": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "9882204352209412039": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "11287863182337672053": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17882819773586674851": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "5503904988517480229": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17224181038411430675": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "7552144047474664265": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "2070909131301595402": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "16076153317792960383": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13090596133852586482": ["fully_connected_gpu_fb_io_ref",0],
+        "15156805695359911457": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12150109996250730485": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17342603054992556378": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "6897348673467297407": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "4862869094913223247": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "12888823040206007493": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "16327433707667075261": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7148542290597073512": ["convolution_gpu_bfyx_gemm_like",2],
+        "5119087113905313336": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10628989973647855390": ["convolution_gpu_bfyx_gemm_like",2],
+        "13012283016751495099": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "3239100076064406977": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "7174804306958128658": ["convolution_gpu_bfyx_gemm_like",1],
+        "13948512795148364852": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "14177187878748170225": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10600040563032392126": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "13776186230202020053": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "16424490086911928793": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9308999849183405794": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "11913020016435860608": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "6626716013917662606": ["convolution_gpu_bfyx_gemm_like",2],
+        "5164372816534616260": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2836903620603494117": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "13731964100893109797": ["convolution_gpu_bfyx_gemm_like",2],
+        "12813978452097969536": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "12318898203127226615": ["convolution_gpu_bfyx_gemm_like",2],
+        "14848351491062336554": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5934211962000091180": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "3621930417735246405": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "1950057741678433412": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5481293245081340756": ["convolution_gpu_bfyx_gemm_like",1],
+        "8094836777153039013": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "11044223289209000460": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13276959978962672952": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17575293085957492821": ["convolution_gpu_bfyx_gemm_like",1],
+        "4974435385259831818": ["convolution_gpu_bfyx_gemm_like",2],
+        "10209532888121442060": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "6306539529168638031": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "10416622008071151225": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "2590380836212070761": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "14168946412009689868": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3455720400625598790": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "1410630713443793537": ["convolution_gpu_bfyx_gemm_like",1],
+        "12211848608269437730": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6129602738379919488": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13073917160317338455": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "11913865086932469909": ["convolution_gpu_bfyx_gemm_like",2],
+        "15199289022783178329": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1697248235682953135": ["convolution_gpu_bfyx_gemm_like",2],
+        "10429613013253088132": ["convolution_gpu_bfyx_gemm_like",2],
+        "3855151839445505918": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "9669968379760494342": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14006248791647711759": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like",2],
+        "4623542918584461522": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "1299452063079314341": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1617362484243823916": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "6561864486643226753": ["fully_connected_gpu_fb_io_ref",1],
+        "10213461713478260558": ["convolution_gpu_bfyx_gemm_like",2],
+        "11463162527165083478": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "2705394837952559308": ["convolution_gpu_bfyx_gemm_like",2],
+        "1676419079398771261": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "11075875009517060583": ["convolution_gpu_bfyx_gemm_like",2],
+        "3423392897831164719": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5996261744926399743": ["convolution_gpu_bfyx_gemm_like",2],
+        "12711558966638028352": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12458305535453345462": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17040970955448750876": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "14176233347574275776": ["convolution_gpu_bfyx_gemm_like",2],
+        "12669547093826826335": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "9831195630506601660": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14661447197300866468": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "9823752892549805496": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "12627697289412631340": ["convolution_gpu_bfyx_gemm_like",1],
+        "3839690227347352846": ["convolution_gpu_bfyx_gemm_like",2],
+        "3018306533413795559": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "15615172858007002100": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5031342439443897167": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "4689190485668249985": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2419819939573989749": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "18384657372655350144": ["convolution_gpu_bfyx_gemm_like",2],
+        "12685978195521469707": ["convolution_gpu_bfyx_os_iyx_osv16",567],
+        "868177350337221377": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7341140956759424033": ["convolution_gpu_bfyx_gemm_like",2],
+        "15656843575192319040": ["convolution_gpu_bfyx_gemm_like",2],
+        "7024495439434892956": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "4407683781177409314": ["convolution_gpu_bfyx_gemm_like",2],
+        "2722965005012667650": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6140789642561898454": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15667487381692577290": ["convolution_gpu_bfyx_os_iyx_osv16",4],
+        "4244790495090049295": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "14609655423082082099": ["convolution_gpu_bfyx_gemm_like",2],
+        "14221578799010900252": ["convolution_gpu_bfyx_gemm_like",2],
+        "10607904718265020949": ["convolution_gpu_bfyx_gemm_like",2],
+        "9140953654075340568": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "14683086376707577764": ["convolution_gpu_bfyx_gemm_like",1],
+        "6747799061507191246": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17771487895874668302": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "1143558550529121379": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "4652136280940317116": ["convolution_gpu_bfyx_gemm_like",2],
+        "2105482100745329286": ["convolution_gpu_bfyx_gemm_like",2],
+        "16285256723517297210": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11796671083187280457": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "16507285966998102421": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "7807704275483318300": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15597522934012485452": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9640773327221702885": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "6080989915764831447": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11630475290242283451": ["convolution_gpu_bfyx_gemm_like",2],
+        "3291180926381314705": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "7636001038842031672": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "17586562074575968095": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "13769943652297353544": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6019638262018414923": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10463896120685306944": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10928995765778560784": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "8757900457181374694": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15163327502374403643": ["convolution_gpu_bfyx_os_iyx_osv16",175],
+        "15197400201857680173": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15552287544878243347": ["convolution_gpu_bfyx_gemm_like",1],
+        "8734220847509054149": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5033665285977853779": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "708347829794105085": ["convolution_gpu_bfyx_gemm_like",1],
+        "13721983823460534294": ["convolution_gpu_bfyx_gemm_like",2],
+        "13328449155966085543": ["convolution_gpu_bfyx_gemm_like",2],
+        "10568883265991969648": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "6519443541076418301": ["convolution_gpu_bfyx_os_iyx_osv16",1070],
+        "4812064663748033253": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "15947699374684516369": ["convolution_gpu_bfyx_gemm_like",2],
+        "4011704860949525864": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6335402359295811260": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "16367495521884864886": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "17839839336294937155": ["convolution_gpu_bfyx_gemm_like",1],
+        "15295261978800289225": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "13296566345005640760": ["convolution_gpu_bfyx_gemm_like",1],
+        "1505929048307200803": ["convolution_gpu_bfyx_gemm_like",2],
+        "2108296560864415762": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17011927973643184196": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8708643228914766202": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "15072402334212221980": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13713406612642090169": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "15962533525948221648": ["convolution_gpu_bfyx_os_iyx_osv16",1024],
+        "12895496994338720556": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "8124166677361481618": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11407554707582995190": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5339358831190803597": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12584870629297848143": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16833026567865627676": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "7203545612536771243": ["convolution_gpu_bfyx_gemm_like",2],
+        "13850920989756588064": ["convolution_gpu_bfyx_gemm_like",2],
+        "8525704362451630717": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "2809463221123384600": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5254115874873721374": ["convolution_gpu_bfyx_os_iyx_osv16",844],
+        "18308541794729223940": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "14578291812739325465": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "17464785726466943638": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "13146231972557134419": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "7334966010680206302": ["convolution_gpu_bfyx_gemm_like",2],
+        "14998412675237613013": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7031342689301066532": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "1724222702460860833": ["convolution_gpu_bfyx_gemm_like",2],
+        "4800208854712166990": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6610054713068442549": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "287386909600391846": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "700717277178942679": ["convolution_gpu_bfyx_gemm_like",2],
+        "3280795516668356985": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "7472330881076141262": ["convolution_gpu_bfyx_gemm_like",2],
+        "949330876419581703": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "7023033151960653752": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "17387764798693150143": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "18273922178875123753": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "204378699575356398": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7712831597869354170": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16541722316343690197": ["convolution_gpu_bfyx_os_iyx_osv16",504],
+        "12992163255353386581": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9945721344229922405": ["convolution_gpu_bfyx_os_iyx_osv16",485],
+        "8578774826625315147": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "6555440973226014216": ["convolution_gpu_bfyx_gemm_like",2],
+        "6750003965952674453": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "6351347283201596793": ["convolution_gpu_bfyx_os_iyx_osv16",849],
+        "203639177311791127": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "9890252170749328138": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10155417869639270818": ["convolution_gpu_bfyx_gemm_like",2],
+        "5245308722062496788": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "2585176064846114298": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13546898787965086743": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "16666383605403885590": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16881283637687482989": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "6364288463529107554": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "9723314434598141024": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2940027113687311893": ["convolution_gpu_bfyx_gemm_like",2],
+        "10391152927913101404": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5290935680520661218": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16229324496308453344": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "8873424072104563382": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "7530197659550301431": ["convolution_gpu_bfyx_gemm_like",2],
+        "13649894122307008732": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "12925156865008155065": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "14862938122758223157": ["convolution_gpu_bfyx_gemm_like",1],
+        "813347941036099284": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "2070429718533716882": ["convolution_gpu_bfyx_gemm_like",2],
+        "11883941040326858829": ["convolution_gpu_bfyx_gemm_like",2],
+        "4477250064118514397": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13898284586432291433": ["convolution_gpu_bfyx_gemm_like",2],
+        "17543094050285028967": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11778866470635184668": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "11388177266504804841": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "16243813701829982936": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13727643349589056375": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "3752171257634205726": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "12174571114411168588": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "1313038182637545943": ["convolution_gpu_bfyx_gemm_like",2],
+        "6780215829176686721": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "5341876404211768451": ["convolution_gpu_bfyx_gemm_like",2],
+        "16195252193236429176": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "4503960445974334415": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "4307817040832953223": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6896806672575430025": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "11369389082421346630": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "8325686349100774855": ["convolution_gpu_bfyx_gemm_like",1],
+        "13237050834496100264": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4607428643002808173": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "475079717987185580": ["convolution_gpu_bfyx_os_iyx_osv16",949],
+        "15890749658785957481": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "9849272539053219052": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "11511221956203704038": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "12896164738668798380": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "17835134875461003221": ["convolution_gpu_bfyx_gemm_like",2],
+        "9256308629247511374": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16392283136103456949": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "15459849799278480779": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2969389503332309296": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "8995892222116060827": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13112861120841066430": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12952160708294444403": ["convolution_gpu_bfyx_gemm_like",2],
+        "9070474871526366492": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "17775705003104146872": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "7958459862276998225": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "17802261444972408048": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "15581678976147496970": ["convolution_gpu_bfyx_gemm_like",0],
+        "5934841294975212773": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "1204089510255285420": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "15365776263895633531": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4887564143681507924": ["convolution_gpu_bfyx_gemm_like",2],
+        "8663545677000846511": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4063525218682664832": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "802853291842159625": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "2595273700611743351": ["convolution_gpu_bfyx_gemm_like",2],
+        "18060514966005474708": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "13459514533473657102": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1413598669014941757": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "5756918912614763074": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6649759230117795192": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "11583791752668920812": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15993427814066246646": ["convolution_gpu_bfyx_gemm_like",2],
+        "4684985181211883028": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "598745924736700294": ["convolution_gpu_bfyx_gemm_like",2],
+        "18382226420077875582": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2598910952085172410": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2261453441277654139": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10338444429123971258": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12411228585189337571": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "8701248964531180496": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "5733530388090903847": ["convolution_gpu_bfyx_gemm_like",2],
+        "13019190248083899887": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4202645222013675478": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13345599888287912619": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12944449254981328284": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "3329610414149222728": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17442035600389810700": ["convolution_gpu_bfyx_gemm_like",0],
+        "5047419871737940985": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13122637768866153753": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8818070832398055086": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "6898793319624390153": ["convolution_gpu_bfyx_gemm_like",2],
+        "15679696422603106163": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13991205023798493715": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "10136369729388564720": ["convolution_gpu_bfyx_gemm_like",2],
+        "1824009696938637196": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6670327979947471550": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "777107147173214189": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2915952195141872726": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "14795626641169374231": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "11312481316584327495": ["convolution_gpu_bfyx_gemm_like",0],
+        "3783485901378896953": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "6404731509766519779": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10356951625481502476": ["convolution_gpu_bfyx_gemm_like",2],
+        "8254412626112343365": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13792918179373942640": ["convolution_gpu_bfyx_gemm_like",2],
+        "16507216630035678597": ["convolution_gpu_bfyx_gemm_like",2],
+        "2592242929641774198": ["convolution_gpu_bfyx_gemm_like",0],
+        "59739211822469868": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "7596423139159263456": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13276867073526485069": ["convolution_gpu_bfyx_gemm_like",1],
+        "11772741918108731396": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "18076018773227225156": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14799589725341253463": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "4553508439536472227": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "9974986004361966590": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "9880591864624136517": ["convolution_gpu_bfyx_gemm_like",2],
+        "14885519273643841492": ["convolution_gpu_bfyx_gemm_like",0],
+        "9513545197321447870": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "4165920860392215245": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "9235762655002034553": ["convolution_gpu_bfyx_gemm_like",2],
+        "2684971093531227585": ["convolution_gpu_bfyx_gemm_like",2],
+        "10629681722649771498": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15592321818359223008": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "1753515740487760297": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4305170667287274371": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12523676912856063091": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16462602383546733062": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "17900257435531434807": ["convolution_gpu_bfyx_gemm_like",1],
+        "2772704069752888874": ["convolution_gpu_bfyx_gemm_like",2],
+        "17211272113483906944": ["convolution_gpu_bfyx_gemm_like",2],
+        "11858246418724176452": ["convolution_gpu_bfyx_gemm_like",1],
+        "10263861857115868555": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "3202034075645193740": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "8734483136584351066": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "7860086755625626604": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "3688864365328401568": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "14900099988131599740": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "15463873588896650327": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "2888587871912905870": ["convolution_gpu_bfyx_os_iyx_osv16",419],
+        "12821282158186877473": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14512311371993445906": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "5176939691838030517": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "6307840223437204536": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "12894240573737168362": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "14484890926084856480": ["convolution_gpu_bfyx_gemm_like",1],
+        "10153070641942936648": ["convolution_gpu_bfyx_gemm_like",1],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "443863053598769137": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "10302338806536775954": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "6513705142577622089": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8545063312289220869": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6587817876244206939": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5159470523468873105": ["convolution_gpu_bfyx_os_iyx_osv16",175],
+        "4673618329986777239": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "17089801601582809764": ["convolution_gpu_bfyx_gemm_like",1],
+        "1908733355560815063": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5369464352361405510": ["convolution_gpu_bfyx_gemm_like",0],
+        "17646394278957547470": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6412452556355382032": ["convolution_gpu_bfyx_1x1",2],
+        "8468092944055919238": ["convolution_gpu_bfyx_gemm_like",2],
+        "2355214244972870639": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "17734437318941312627": ["convolution_gpu_bfyx_os_iyx_osv16",694],
+        "7930154826818165796": ["convolution_gpu_bfyx_gemm_like",1],
+        "8054562515577756499": ["convolution_gpu_bfyx_os_iyx_osv16",479],
+        "14652719560551657529": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "7786866732196451977": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "7924408980408826942": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "2341006744107937832": ["convolution_gpu_bfyx_os_iyx_osv16",376],
+        "13337315872184544686": ["convolution_gpu_bfyx_os_iyx_osv16",929],
+        "4161612746310931789": ["convolution_gpu_bfyx_gemm_like",2],
+        "1615155632991337496": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1607916839270914773": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "8075180350084516696": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9806689250758752070": ["convolution_gpu_bfyx_gemm_like",2],
+        "4099859307693687554": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "69884424286147709": ["convolution_gpu_bfyx_gemm_like",2],
+        "3623866842874047894": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "11191005013126286532": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "939718260623752240": ["convolution_gpu_bfyx_gemm_like",1],
+        "15331103261044247142": ["convolution_gpu_bfyx_os_iyx_osv16",940],
+        "13108356579957761944": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "11185041745377164894": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "14959281374959998609": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11834683513280095384": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "8751967016877067287": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "9266375177690276615": ["convolution_gpu_bfyx_gemm_like",2],
+        "5149553691611520515": ["convolution_gpu_bfyx_gemm_like",2],
+        "75742659105146536": ["convolution_gpu_bfyx_gemm_like",1],
+        "11845013061234102293": ["convolution_gpu_bfyx_gemm_like",2],
+        "15786313441300512560": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15857087373591747006": ["convolution_gpu_bfyx_gemm_like",2],
+        "2255387202504703562": ["convolution_gpu_bfyx_gemm_like",1],
+        "11640468046947233335": ["convolution_gpu_bfyx_gemm_like",2],
+        "12278364834477923930": ["convolution_gpu_bfyx_gemm_like",2],
+        "16078334558348380858": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15778834188130183853": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "13728914881583145008": ["convolution_gpu_bfyx_gemm_like",1],
+        "5084402281339667158": ["convolution_gpu_bfyx_gemm_like",0],
+        "2881475011209167644": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3813463368918975003": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13163146272900339330": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6443517114667332732": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "15677832333607749130": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12547252593506448096": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "14086074948200412805": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "13388004363210658650": ["convolution_gpu_bfyx_gemm_like",2],
+        "9968496035529786888": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8648848365873958010": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "13898821685774165645": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "15984885011101717258": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "14366861063858001106": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7844764086278702374": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4239277257640567966": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17195491464960153261": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "10432687907685994204": ["convolution_gpu_bfyx_gemm_like",1],
+        "7864880361674128748": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "1444256562477852389": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "557926911473978758": ["convolution_gpu_bfyx_gemm_like",1],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",985],
+        "11757919563609176713": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "9761573038170759563": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "17140704838989242732": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "16744011463988595802": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13429534778879474114": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "16706121580364790904": ["convolution_gpu_bfyx_gemm_like",2],
+        "8489998884193999354": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "1404523328737649536": ["convolution_gpu_bfyx_gemm_like",1],
+        "9692949270906064580": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15824189967727245909": ["convolution_gpu_bfyx_gemm_like",1],
+        "4131038864155440038": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1553825475921110392": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "4927139127938739019": ["convolution_gpu_bfyx_gemm_like",2],
+        "913496537924971856": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "11462394098346770463": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "11981887712163064333": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8132803057215688544": ["convolution_gpu_bfyx_gemm_like",2],
+        "9810703513111623136": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "15598527290222497283": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "3409255127071376537": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "6877976003072165363": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15975964562807570772": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3433877094202077256": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "388828310152538138": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "14797994820826922836": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8806330242319534440": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "14353390922580547467": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9447458159095730492": ["convolution_gpu_bfyx_gemm_like",2],
+        "4920194716156732643": ["convolution_gpu_bfyx_gemm_like",2],
+        "286393043958202995": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "14249346934748369643": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "13654408396081513312": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "10168317560306247723": ["convolution_gpu_bfyx_gemm_like",2],
+        "2479282650381163888": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2715131647421221125": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "2808205041095636198": ["convolution_gpu_bfyx_gemm_like",2],
+        "3697631094971930011": ["convolution_gpu_bfyx_gemm_like",2],
+        "1760779615705074283": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "8707189142909022305": ["convolution_gpu_bfyx_os_iyx_osv16",986],
+        "2084855707532555969": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12248119734016401633": ["fully_connected_gpu_fb_io_ref",2],
+        "4801117903303888658": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "2014911634432127630": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3192518239721798250": ["convolution_gpu_bfyx_gemm_like",2],
+        "13064477237937322246": ["convolution_gpu_bfyx_gemm_like",1],
+        "12040626513219974957": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "7289907211627391947": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "2917248122493101477": ["fully_connected_gpu_fb_io_block_fp16",1],
+        "3509811595028801757": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "16496066467505445971": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6728889146307098720": ["convolution_gpu_bfyx_gemm_like",2],
+        "3939805316470672966": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12675858428585873471": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5983610157873969708": ["convolution_gpu_bfyx_gemm_like",2],
+        "13226478376552374040": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "4317173590203436940": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7076937538747704750": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "3568749741838926204": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5049534591553232781": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5754301693527535975": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "13503688893307029975": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10400727836871462348": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "12965800692507042874": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "6040623414692799116": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10987953316324712538": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "1394872024856809266": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "11798081355131440794": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "17723621158215826108": ["convolution_gpu_bfyx_gemm_like",2],
+        "9454954846682513038": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3201851883430682391": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "3572202652824023801": ["convolution_gpu_bfyx_gemm_like",2],
+        "15921072201288695017": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "15045861858500584001": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16091165907421819456": ["convolution_gpu_bfyx_gemm_like",2],
+        "17050143605017295447": ["convolution_gpu_bfyx_gemm_like",2],
+        "1509728225855233852": ["convolution_gpu_bfyx_gemm_like",2],
+        "16462029188795652848": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "10286228358844791913": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "6402941068107243403": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "18159049252673770569": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "10972033292930619311": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "14906458674793172507": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "14634044133573461949": ["convolution_gpu_bfyx_gemm_like",0],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "3325727286860556323": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "17774424004510360936": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7105622384646913935": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1106762955109168526": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "8050798452111667069": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "3727142736386026852": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "6020570210392850503": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "1149548328523286475": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "6225447513745282621": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4960466075321426984": ["convolution_gpu_bfyx_gemm_like",2],
+        "941626985322260281": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "2659712601063515059": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2832331506191733785": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "7808544677773370430": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17622515300258231642": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "17838473675663772639": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8300655194765375060": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5488168361113140102": ["convolution_gpu_bfyx_gemm_like",2],
+        "4665029580355133140": ["convolution_gpu_bfyx_os_iyx_osv16",197],
+        "9213886570531053949": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "15395497315929884637": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "15374625876485618845": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11897113890115321056": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "18286006396667126860": ["convolution_gpu_bfyx_gemm_like",1],
+        "14535007186125575064": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "17881905640473324965": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15490478608105402679": ["convolution_gpu_bfyx_gemm_like",2],
+        "642256034968512602": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "649203303142950236": ["convolution_gpu_bfyx_os_iyx_osv16",3],
+        "10412902860958663054": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "169973842603492802": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "2937907409658060025": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15661055655577513377": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7177701509002270324": ["convolution_gpu_bfyx_gemm_like",0],
+        "11207578758583923357": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7671440804202996063": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "10445587307296180364": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2616828683870391718": ["convolution_gpu_bfyx_gemm_like",2],
+        "14471867575610362464": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7606277451240586967": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6642767323474835034": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "9270950131920019932": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "11430797372848621790": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "14189775376370027482": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "9448537968809630184": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "814227839929688672": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2376239021851907962": ["convolution_gpu_bfyx_gemm_like",2],
+        "4021097865391343020": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13253775441326432265": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "13845827017732177448": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "523055954326631884": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15184258464890250739": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16703049240941366828": ["convolution_gpu_bfyx_gemm_like",2],
+        "13410850301164057911": ["convolution_gpu_bfyx_gemm_like",1],
+        "7616752360105602320": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "11825209936640729550": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "8733109144496806085": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "16483792160297698151": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "5966963943739041502": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "10856527039674342926": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5246955189449281709": ["convolution_gpu_bfyx_gemm_like",2],
+        "15392077168521832549": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "9126242742012768166": ["convolution_gpu_bfyx_gemm_like",2],
+        "17052596472114345717": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1818433662409886324": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "3643250372952944907": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "14762599606783897222": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "14352796912241296357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "336151670657372877": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "14100870590396726248": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "13066019581499650377": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "4172485608495372888": ["convolution_gpu_bfyx_gemm_like",1],
+        "14142812374094816721": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "9988801796928462423": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "11559360678008060513": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "12451602623042934613": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6246148818627951104": ["convolution_gpu_bfyx_os_iyx_osv16",318],
+        "15280273795883244074": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10134411551190003359": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3311449696894745049": ["convolution_gpu_bfyx_os_iyx_osv16",399],
+        "6701235077433821331": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "11148502358361704423": ["convolution_gpu_bfyx_gemm_like",1],
+        "17390307025967314108": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9527075413813342687": ["convolution_gpu_bfyx_gemm_like",2],
+        "1036010477232750453": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "17962578815194404362": ["convolution_gpu_bfyx_gemm_like",1],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "1176958491218281154": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14705457019471647279": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "11810221946429451169": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17621284804179990612": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4437258459981739942": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "15911352758031362713": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "342174683264941351": ["convolution_gpu_bfyx_gemm_like",2],
+        "4764776977138392550": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "10683839359385393536": ["convolution_gpu_bfyx_gemm_like",1],
+        "15143544451530667222": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "498239903908845198": ["convolution_gpu_bfyx_gemm_like",2],
+        "2552187713769926425": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15596913527233792996": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "12757564215386697460": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "3272776991539782834": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "14300946078988784221": ["convolution_gpu_bfyx_gemm_like",1],
+        "2802810524370514276": ["convolution_gpu_bfyx_gemm_like",0],
+        "393884269158067083": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "3725060015826635697": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5595802790436774398": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13537323999534292650": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "15682441855379046778": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7946776740333736799": ["convolution_gpu_bfyx_gemm_like",1],
+        "8501760360687221821": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "6515141738021465336": ["convolution_gpu_bfyx_gemm_like",2],
+        "11446181888102710561": ["convolution_gpu_bfyx_os_iyx_osv16",949],
+        "15284262113150488297": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "6478054912653910426": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "12673168008792254171": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6275903692904946376": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "17503210896556316294": ["convolution_gpu_bfyx_gemm_like",1],
+        "7557446085365037177": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "6114147683777615071": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "17924819398394001587": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "1187817806204244044": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "17054734441457769665": ["convolution_gpu_bfyx_gemm_like",2],
+        "1081962464388501987": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9861846661532177405": ["convolution_gpu_bfyx_gemm_like",2],
+        "12714194906146827658": ["convolution_gpu_bfyx_gemm_like",1],
+        "10010921697596131761": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "7154364270315480182": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14810839157236175179": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18386376129938707290": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7595481705069674721": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "18005721959893562716": ["convolution_gpu_bfyx_os_iyx_osv16",171],
+        "16218121706393504358": ["convolution_gpu_bfyx_os_iyx_osv16",310],
+        "18348301285923584995": ["convolution_gpu_bfyx_gemm_like",2],
+        "4011606166408526342": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "15688186132508213638": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "2822531372171708171": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "10612049417873776481": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2451603338483395600": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "11649407835105973949": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "15263499602817313477": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "9796621763733208035": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "14872992823083730615": ["convolution_gpu_bfyx_gemm_like",2],
+        "1540041682425757361": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "17489420766684604600": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "5120466856097219243": ["convolution_gpu_bfyx_gemm_like",2],
+        "5367634698951188749": ["convolution_gpu_bfyx_gemm_like",2],
+        "17846007967411480006": ["convolution_gpu_bfyx_gemm_like",1],
+        "5389189982064081933": ["convolution_gpu_bfyx_os_iyx_osv16",943],
+        "11244704751123402754": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "4456004887590847716": ["convolution_gpu_bfyx_gemm_like",2],
+        "18125732229366977468": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "10205696100164492716": ["convolution_gpu_bfyx_gemm_like",2],
+        "17049054004246292085": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "15602863681196390535": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "13398875754083902831": ["fully_connected_gpu_fb_oi_ref",1],
+        "17465517455679097501": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "16162899163122139501": ["fully_connected_gpu_fb_io_ref",1],
+        "12889351859522118935": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9373353053843326128": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "9805748332775912215": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14905705901815863508": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15893297349596399716": ["convolution_gpu_bfyx_gemm_like",2],
+        "2805931700404492624": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11673506380927771816": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "17014952568021457244": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8550133332738529361": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "17854138024884397413": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "714397516895317906": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "14719421757340260468": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1240102354814495870": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "6948696390129114563": ["convolution_gpu_bfyx_gemm_like",2],
+        "6895664772793074050": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "885661562948597780": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6534932244936310237": ["convolution_gpu_bfyx_gemm_like",2],
+        "11530101016435264783": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "15151957983054148973": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "8442368383427915597": ["convolution_gpu_bfyx_gemm_like",2],
+        "15881381297320383917": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6324565723045697080": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "8035784732695264817": ["convolution_gpu_bfyx_os_iyx_osv16",882],
+        "1529658068204046700": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12247991248100147706": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4186957909762095019": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3727796815945431654": ["convolution_gpu_bfyx_gemm_like",2],
+        "11051434650031832658": ["convolution_gpu_bfyx_gemm_like",1],
+        "7247891577022043949": ["convolution_gpu_bfyx_gemm_like",2],
+        "16303870101043861053": ["convolution_gpu_bfyx_gemm_like",2],
+        "8141428150264829362": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "1555841293175143289": ["convolution_gpu_bfyx_gemm_like",1],
+        "11047759270093007856": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "9335016444137172241": ["convolution_gpu_bfyx_gemm_like",2],
+        "6069028745615910182": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "8006738296385794413": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "9164584153555521506": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "5085190482265319015": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10928764471719815519": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "15628121900226431719": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1147744092130296563": ["convolution_gpu_bfyx_gemm_like",1],
+        "6876164425008541018": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8560635685184432720": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2047041720569246861": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "4149728557142033774": ["convolution_gpu_bfyx_os_iyx_osv16",946],
+        "13583166868754499339": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "4682428771166816734": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "13262749073059058405": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16165264024659208580": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "5635504912415420460": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "16912035321030511639": ["convolution_gpu_bfyx_gemm_like",1],
+        "5409924335138540834": ["convolution_gpu_bfyx_gemm_like",2],
+        "6133592828563353516": ["convolution_gpu_bfyx_os_iyx_osv16",191],
+        "659150305191479097": ["convolution_gpu_bfyx_gemm_like",2],
+        "3811462129131022619": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "8291770994531919371": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "15974241934088373021": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "6625355663340809894": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14270450799210365812": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "10377729875228238588": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "8682149821028981871": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "12906669887096343446": ["convolution_gpu_bfyx_gemm_like",1],
+        "14346703182362139650": ["convolution_gpu_bfyx_gemm_like",0],
+        "11254635684957519432": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "5536595882075097311": ["convolution_gpu_bfyx_gemm_like",2],
+        "11510063368067539341": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11901740241052104941": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "12794030011655906930": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "10157866834809927320": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "5229688072405810569": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "7921388663815287395": ["convolution_gpu_bfyx_gemm_like",1],
+        "14667209474639064623": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6437820621340256996": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15630712601053635938": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "17059095074211347838": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "16053383948025511837": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "9552615241912277692": ["convolution_gpu_bfyx_gemm_like",2],
+        "7179714714302073459": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "12024416333474523686": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2727219457659794468": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9513218905938141296": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1071007164550012186": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "13842309033760176194": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12319165874575782715": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "16304963156448605623": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "153117141968471446": ["convolution_gpu_bfyx_gemm_like",1],
+        "572265264921910408": ["convolution_gpu_bfyx_gemm_like",2],
+        "10736892779278378335": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9907053348268964966": ["convolution_gpu_bfyx_gemm_like",2],
+        "13116746433291181712": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12793908914872030220": ["convolution_gpu_bfyx_gemm_like",1],
+        "6942622405269419082": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4600698444492242585": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7480968533463196410": ["convolution_gpu_bfyx_gemm_like",2],
+        "12843671306854567956": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "11726298758004767743": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "4010329161090285019": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "15406324750533549980": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "14164778301660100413": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14953809073272885651": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12129572274423886770": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "10648806188852074159": ["convolution_gpu_bfyx_gemm_like",2],
+        "15551453802011405101": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6762862978340755053": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15733883474006568340": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "12442273255786121651": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5019077257951332016": ["convolution_gpu_bfyx_gemm_like",2],
+        "10978173291465325823": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13212959214376905822": ["convolution_gpu_bfyx_gemm_like",2],
+        "16582132711225619740": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1183774022668948480": ["convolution_gpu_bfyx_gemm_like",2],
+        "12694001580800313954": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15025260753866131193": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "3704618172730076978": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "5488296540132936296": ["convolution_gpu_bfyx_gemm_like",2],
+        "9738285774864435144": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "4788094685976850847": ["convolution_gpu_bfyx_gemm_like",1],
+        "5181206680937070543": ["convolution_gpu_bfyx_1x1",2],
+        "14216698267977999547": ["convolution_gpu_bfyx_os_iyx_osv16",310],
+        "5695368162557483073": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "16688894228380134416": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1882912836250239503": ["convolution_gpu_bfyx_gemm_like",1],
+        "10373791029573299582": ["convolution_gpu_bfyx_gemm_like",0],
+        "5872553335123308034": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5381578460674280089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "213518984547400496": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "2387389473399444503": ["convolution_gpu_bfyx_gemm_like",2],
+        "14296771090926462138": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "15199659885055090985": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3701795558556637835": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13975759856997443246": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "8898449752724034655": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15278336216464964580": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10701231567226563098": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "14691372262153587653": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "1735849969339696694": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "17006655627343469372": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2268291720177538378": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "16857192626139882429": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15160738482264643601": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "4544147798324802817": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "13352151930345854198": ["convolution_gpu_bfyx_os_iyx_osv16",928],
+        "15444345793124210505": ["convolution_gpu_bfyx_gemm_like",1],
+        "1213958002895787672": ["convolution_gpu_bfyx_gemm_like",2],
+        "6631816968511312100": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "15095146351334328804": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5407778324198159962": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "14808831640065476291": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "12341291953192305346": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13224814158106791463": ["convolution_gpu_bfyx_gemm_like",2],
+        "2096167792705935744": ["convolution_gpu_bfyx_gemm_like",0],
+        "18203935818408469865": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "16614092873294424156": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3355259926747524578": ["convolution_gpu_bfyx_gemm_like",2],
+        "3296059171653513862": ["convolution_gpu_bfyx_gemm_like",1],
+        "4574242607119408140": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "16067605128297748820": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9760847838439331960": ["convolution_gpu_bfyx_os_iyx_osv16",1098],
+        "17231014023477377001": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "12305383126483033452": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13489318651148001664": ["convolution_gpu_bfyx_gemm_like",2],
+        "15088940149962496972": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "8158000313391713522": ["convolution_gpu_bfyx_gemm_like",2],
+        "8471867907212890827": ["convolution_gpu_bfyx_gemm_like",0],
+        "10134863884423338495": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "8732106543033226791": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "9632178829095307219": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11922163303962372849": ["convolution_gpu_bfyx_gemm_like",1],
+        "447152944190888653": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12445292008737311977": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5338109154207406041": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "2518919454830671073": ["convolution_gpu_bfyx_gemm_like",1],
+        "10158890414412187141": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14445981111412755844": ["convolution_gpu_bfyx_gemm_like",2],
+        "11007100272494557520": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "1663732107639157701": ["convolution_gpu_bfyx_gemm_like",1],
+        "15522785615618973614": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2100891581797371600": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "17680403286850504499": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "871656942964602772": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "412314676462573090": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1944067639361309743": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "12709406234969954619": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "7715520469947900684": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1646362346584649954": ["fully_connected_gpu_fb_io_b8_f8_vload",2],
+        "12654574135415748217": ["convolution_gpu_bfyx_os_iyx_osv16",569],
+        "4691552892932405676": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "10267260789603562117": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "2168955429090043259": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "18269382610859905921": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3498490999014554104": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "3062101811226530720": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "6369089883691693453": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "10795104632256101599": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "5601435819039968726": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15636407980943172317": ["convolution_gpu_bfyx_gemm_like",1],
+        "15149336254307320187": ["convolution_gpu_bfyx_gemm_like",1],
+        "2287331417346465035": ["convolution_gpu_bfyx_gemm_like",2],
+        "5191016422297403500": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2407509127927738079": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "13874754478479442212": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "16822728519529055454": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8897786294680986991": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "7801270668419570665": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "3432296808755992670": ["convolution_gpu_bfyx_gemm_like",2],
+        "2128062528433088944": ["convolution_gpu_bfyx_gemm_like",1],
+        "17845905249343189063": ["convolution_gpu_bfyx_gemm_like",1],
+        "17877776363798202236": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16067821671414842756": ["convolution_gpu_bfyx_gemm_like",2],
+        "5824801192141531089": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "2248754661513284642": ["convolution_gpu_bfyx_gemm_like",2],
+        "16336482874764861478": ["convolution_gpu_bfyx_gemm_like",2],
+        "13985989113434682460": ["convolution_gpu_bfyx_gemm_like",1],
+        "17354626928258309128": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18431307741997030842": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",1],
+        "1473214668483422172": ["convolution_gpu_bfyx_gemm_like",2],
+        "8562093724840063781": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "9548658329589481069": ["convolution_gpu_bfyx_gemm_like",1],
+        "12570087709404311189": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6204183474669103812": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "10702234389482091891": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "7256947320128669983": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "1414092714405352435": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13007534905441600782": ["convolution_gpu_bfyx_gemm_like",2],
+        "4273605292522062969": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "16578265652036967656": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4157063588837576075": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "7291920886894073603": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "13478922504367374201": ["convolution_gpu_bfyx_os_iyx_osv16",686],
+        "5353552956675518468": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "10265955847846166394": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "6621371075123542816": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "17118569850095586049": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10643373404881648498": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13683563727561197895": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "13800387305792597325": ["convolution_gpu_bfyx_os_iyx_osv16",1035],
+        "15451919862187018297": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "17732250360268013336": ["convolution_gpu_bfyx_gemm_like",2],
+        "6278892144796112655": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "18233660940545931789": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "12874626654611400042": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "12879205642236526041": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15952399564161253450": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17870874477143985774": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "14077148976508649021": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3128856679264648666": ["convolution_gpu_bfyx_gemm_like",2],
+        "9069245927173134634": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16494403731659808258": ["convolution_gpu_bfyx_os_iyx_osv16",844],
+        "8324250071425605671": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "1015184966858657992": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "12541764833974378504": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10499265278415026816": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1587220602242157814": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7471714472577512044": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "3664842151999943": ["convolution_gpu_bfyx_gemm_like",0],
+        "15594091060902767607": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3239779684432082106": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "7612252849133077309": ["fully_connected_gpu_fb_oi_ref",1],
+        "15641049130597645936": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "11851526665791263153": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "7152107839144357830": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "10472893418729915556": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "11579025491409526679": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "16431165572426232677": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "142162982878269165": ["convolution_gpu_bfyx_gemm_like",1],
+        "13471241383850968329": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "3800864312883193560": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "5649838591590266046": ["convolution_gpu_bfyx_gemm_like",2],
+        "12962558681443556219": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "13839590781642269381": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "3179296883398083696": ["convolution_gpu_bfyx_gemm_like",2],
+        "13492216433886201174": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "16184979150665364486": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5758223108250439377": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "8127570953237266335": ["fully_connected_gpu_bf_io_input_spatial",5],
+        "15379595951542162189": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "482564204402769504": ["convolution_gpu_bfyx_gemm_like",2],
+        "15588841557002049726": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "12329302439548900551": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "5779388310240896974": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "5523778675167321193": ["fully_connected_gpu_fb_oi_ref",0],
+        "4054010905884346287": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "18417880214901227799": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "3475222563515381706": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5605603969528988532": ["convolution_gpu_bfyx_1x1",2],
+        "8435953773852854494": ["convolution_gpu_bfyx_os_iyx_osv16",398],
+        "10147140488258047779": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18229087521018116863": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8474585711383508493": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "26434141991791193": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5749536453225343663": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "3120553928584920777": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "18043745678739016406": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "2317476796706098254": ["convolution_gpu_bfyx_gemm_like",2],
+        "13709111882513486557": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "5135353986081664933": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10089588313551601914": ["convolution_gpu_bfyx_gemm_like",2],
+        "17855733925989425515": ["convolution_gpu_bfyx_gemm_like",2],
+        "2935787827649981367": ["convolution_gpu_bfyx_gemm_like",1],
+        "6302958994152837045": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "17494823614269622175": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "3255465741612432300": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "17784357412228522825": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "654821507679356726": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "12390011660072693092": ["convolution_gpu_bfyx_gemm_like",2],
+        "190530884420224257": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "9639014900668946045": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1464276409229103946": ["convolution_gpu_bfyx_gemm_like",2],
+        "3664532426561688336": ["convolution_gpu_bfyx_gemm_like",2],
+        "15529767675448574617": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12318427976031000768": ["convolution_gpu_bfyx_gemm_like",2],
+        "15438623619938843299": ["convolution_gpu_bfyx_gemm_like",2],
+        "2549584578485278083": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6672808203620992802": ["convolution_gpu_bfyx_os_iyx_osv16",518],
+        "2464201299319518869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18398231411109020099": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "14970517289345999487": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9180575279116075400": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "14975859027256879948": ["convolution_gpu_bfyx_gemm_like",1],
+        "10207459870439759692": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "5424159498790442193": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12024318713420323349": ["convolution_gpu_bfyx_gemm_like",2],
+        "16684378382033936005": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "12379734005351960619": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "707449835235490641": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16361249849376112433": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2088422904562849807": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "4890932609897686394": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "17556238490521153146": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "5132761922124425835": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "1565612286723277822": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12112853999307505628": ["convolution_gpu_bfyx_gemm_like",2],
+        "7004953121070642766": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14460931972510023382": ["convolution_gpu_bfyx_gemm_like",2],
+        "16536775289334717044": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "14897935118679731283": ["convolution_gpu_bfyx_gemm_like",1],
+        "7630776235327261710": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "18026754720065676632": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17934338042329576850": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12828115278384825394": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "14350963106032411355": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "10772763339005937717": ["convolution_gpu_bfyx_gemm_like",1],
+        "6505706083205285176": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "15294692035670155801": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "9751582946441607796": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "16264774056719724826": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10395191003166536655": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "10202794960937110471": ["convolution_gpu_bfyx_gemm_like",2],
+        "8788703258318141635": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "5120274680151325194": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "3603706453982734995": ["convolution_gpu_bfyx_os_iyx_osv16",553],
+        "4342360467977736802": ["convolution_gpu_bfyx_gemm_like",2],
+        "13297691763391637265": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "15335516948540868535": ["convolution_gpu_bfyx_gemm_like",2],
+        "7431469348791099474": ["convolution_gpu_bfyx_gemm_like",2],
+        "4086556132337751931": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5141753233513623264": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "9798585825695496550": ["convolution_gpu_bfyx_gemm_like",2],
+        "12262273765279224456": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "3879520363526481335": ["convolution_gpu_bfyx_gemm_like",2],
+        "11398019086259011063": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "13340998273773542342": ["convolution_gpu_bfyx_gemm_like",1],
+        "1312322903335525510": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8292979162428130363": ["convolution_gpu_bfyx_gemm_like",1],
+        "6819846227498139601": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "4165515078945360525": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13926122593957480821": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16463823433924519300": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "12823842409678756966": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13616241450266119966": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "17585210048585855482": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "15924146956535930192": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "6233612563637601101": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "14376192291828307385": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "4477135619420651110": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15513894336778253285": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9056812077282494074": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "12136458184046915563": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18166732758694978380": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9053383117071470496": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "609926704263171728": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "18117954008112578376": ["convolution_gpu_bfyx_gemm_like",2],
+        "7088331918128954410": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "13356152596085257346": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "311101627084421734": ["convolution_gpu_bfyx_gemm_like",2],
+        "1462775202780029067": ["convolution_gpu_bfyx_gemm_like",2],
+        "15830721134654889992": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "9631481972809246378": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "2743892624333411461": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "13833252058258614175": ["convolution_gpu_bfyx_gemm_like",2],
+        "2007192658799516915": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "1894591633696862066": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "13869716373706247686": ["convolution_gpu_bfyx_gemm_like",2],
+        "11354523117287453982": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1299160913578942012": ["convolution_gpu_bfyx_os_iyx_osv16",1048],
+        "14277843123789500234": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "13680502636898130714": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "16837749846151508824": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "6075691042233712335": ["convolution_gpu_bfyx_gemm_like",2],
+        "14008438372661779490": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12553441041059632729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "870448505006560377": ["convolution_gpu_bfyx_gemm_like",0],
+        "5087812112020408781": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "6656668362090313451": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "1941341635794709702": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "18216392915308276053": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "59384288121901543": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "9996590003462421281": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "6364765994481977132": ["convolution_gpu_bfyx_gemm_like",2],
+        "10110359677546019738": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11830297960718214360": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "15579919505002150556": ["convolution_gpu_bfyx_gemm_like",2],
+        "17966517080605659454": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "158222105675022402": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "3234567405788241673": ["convolution_gpu_bfyx_os_iyx_osv16",315],
+        "6889498170947481097": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "9427999492792081454": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "16122815225820081176": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "17396226612787250663": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "12478041902013146137": ["convolution_gpu_bfyx_os_iyx_osv16",89],
+        "17361849627958781572": ["convolution_gpu_bfyx_gemm_like",1],
+        "13915749401892931804": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "26773921190137993": ["convolution_gpu_bfyx_gemm_like",1],
+        "11386443944172875185": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6233455595448276342": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "11490143853656040028": ["convolution_gpu_bfyx_gemm_like",2],
+        "5691889055008878111": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17101789600628162503": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "15928746165235747659": ["convolution_gpu_bfyx_gemm_like",1],
+        "11868551452004726281": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "16683909937519981313": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "3565702695809105495": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "17499047811775012205": ["convolution_gpu_bfyx_gemm_like",1],
+        "15265621959560796543": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "10112032316939871435": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "3286629188347536485": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "13855910108498240870": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "17947818179123182001": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "6505035828719376225": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "4700147248198305671": ["convolution_gpu_bfyx_gemm_like",2],
+        "4879523846205649729": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "3661361503342294227": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4160065196876225262": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16159055229009077435": ["convolution_gpu_bfyx_gemm_like",2],
+        "14130300861965892020": ["convolution_gpu_bfyx_gemm_like",2],
+        "2740287492529009109": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "4737347018334654530": ["convolution_gpu_bfyx_1x1",2],
+        "8332688858465419317": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16474284418841532356": ["convolution_gpu_bfyx_gemm_like",2],
+        "17750329428766282997": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "4316519748653705692": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4892959859293355837": ["convolution_gpu_bfyx_gemm_like",1],
+        "3167336012388169649": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "2184670359551186734": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8571662320744858201": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "13453226687921450129": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8906185843274300447": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4181049793451733466": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "4958222070605478947": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8403560033589747065": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "6831045740006076251": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "15336590103518398224": ["convolution_gpu_bfyx_gemm_like",1],
+        "6041249121715337066": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "16583563382485459718": ["convolution_gpu_bfyx_gemm_like",2],
+        "4725009116734166168": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "4885944395876887711": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "10890538764006500546": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12394049027081208902": ["convolution_gpu_bfyx_gemm_like",1],
+        "8374345306483326015": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "11775667915453535428": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11078289776590382448": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "4316278502963439894": ["convolution_gpu_bfyx_gemm_like",2],
+        "3214253333840552610": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "10295330953350618042": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "16504962609450876148": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8670512344429807851": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13636129806349817264": ["convolution_gpu_bfyx_gemm_like",1],
+        "15315327794058441258": ["convolution_gpu_bfyx_gemm_like",2],
+        "7282751412088726760": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2384942244346844027": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "2683507674615735878": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6410694203929640959": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8155752116518841384": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "8054599744123820194": ["convolution_gpu_bfyx_gemm_like",1],
+        "3349519148124496343": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "8395521198680584245": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "11404331488962230130": ["convolution_gpu_bfyx_gemm_like",2],
+        "2415478259408761142": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "11173744709088359283": ["fully_connected_gpu_fb_oi_ref",1],
+        "16419903786705052849": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5970516037710024187": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "16022858814676339910": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "5629373398445592781": ["convolution_gpu_bfyx_gemm_like",2],
+        "10791067159964399241": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "10679760989906275129": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4212194737559719449": ["convolution_gpu_bfyx_gemm_like",2],
+        "9557728221162137067": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "1403617451623027879": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "15012744672096562609": ["convolution_gpu_bfyx_gemm_like",0],
+        "7117825897866941983": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10884966210360699082": ["convolution_gpu_bfyx_gemm_like",2],
+        "11642941943446484202": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "12219239604684537521": ["convolution_gpu_bfyx_gemm_like",1],
+        "4036143655651874318": ["convolution_gpu_bfyx_gemm_like",1],
+        "12341247287556387988": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "8262441556572334783": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "16159309494101203811": ["convolution_gpu_bfyx_gemm_like",1],
+        "17325362379118492558": ["convolution_gpu_bfyx_gemm_like",1],
+        "13745327504866194229": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "6508892940062336667": ["convolution_gpu_bfyx_gemm_like",2],
+        "11079710960007068860": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6578908625437515675": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "16245852986663960440": ["convolution_gpu_bfyx_os_iyx_osv16",484],
+        "5893940382830835820": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "5982637097503543357": ["convolution_gpu_bfyx_gemm_like",2],
+        "218070270815606832": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4455497237293642238": ["convolution_gpu_bfyx_gemm_like",0],
+        "237302155033013557": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18232459663207612727": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4062706195708729345": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "13234170505677988638": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "4238163995861108694": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "14685573786743639408": ["convolution_gpu_bfyx_gemm_like",2],
+        "17854578307286932628": ["convolution_gpu_bfyx_gemm_like",2],
+        "16632447105476661928": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "13727585908419292912": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9762182215179534181": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "1127844465496534455": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "452869991150713968": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "981733129438741439": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9457038545823436137": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "5211191663202250117": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12345000525470836335": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "1346716334208025932": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "11465965972527519631": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12883021432082543848": ["convolution_gpu_bfyx_gemm_like",1],
+        "7883469783245625654": ["convolution_gpu_bfyx_gemm_like",2],
+        "7431849514656037251": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14959566236432790882": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "5089359404080552270": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "11782525502250249483": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17119834538806653818": ["convolution_gpu_bfyx_gemm_like",1],
+        "18180820925685532104": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "53692441535283176": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "5566145479615299930": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "56327004269432885": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "4265991006340418914": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7338578624767544128": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "3094541981461578435": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "9381304526221508530": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "12956535344568057480": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7941729567451949422": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11431776034512615562": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1051506168926530904": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "3956185868703826254": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1127598752149871162": ["convolution_gpu_bfyx_os_iyx_osv16",437],
+        "17829148383265978140": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "9040046051053703359": ["convolution_gpu_bfyx_gemm_like",2],
+        "12169148580322697755": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5461649843950745696": ["convolution_gpu_bfyx_gemm_like",2],
+        "7405835196787288054": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "2801984749519758568": ["convolution_gpu_bfyx_gemm_like",2],
+        "4013707396889204359": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "9980945809859857871": ["convolution_gpu_bfyx_gemm_like",1],
+        "14749947225382670869": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "13435416060730279243": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15838058479520696173": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "9999963747832102729": ["convolution_gpu_bfyx_1x1",2],
+        "9803306661531470015": ["fully_connected_gpu_fb_oi_ref",0],
+        "6129884455218252024": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "269334626439013799": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "11002656253983635383": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "5245087746877459629": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "14762859593402798050": ["convolution_gpu_bfyx_gemm_like",1],
+        "14707855908416908375": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "16747069131271457481": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "3438116423688595487": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "3790881125495367946": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "1774158624592967937": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5073623316666025204": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",2],
+        "2632535010129224704": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "5673972310424776040": ["convolution_gpu_bfyx_gemm_like",2],
+        "8007491455800395118": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "541744773413565297": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "11086699387784339943": ["convolution_gpu_bfyx_gemm_like",2],
+        "3830703844770425343": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "14763015336626099830": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "3928266232090746643": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12218337369633748663": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "14492935486352505845": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17188170051014066220": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "18034648276860485300": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12482312825666761192": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "3179874645565098825": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14807299286266923693": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "18414480146618201609": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7455983063685796863": ["convolution_gpu_bfyx_os_iyx_osv16",925],
+        "9378419102254633989": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2028119808899845451": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "16359282790151128772": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "8451901619003558199": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "12771805545455650546": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "4565037760028957581": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "16112835627818488034": ["convolution_gpu_bfyx_gemm_like",2],
+        "8885012252853227025": ["convolution_gpu_bfyx_gemm_like",1],
+        "17479614483340719566": ["convolution_gpu_bfyx_gemm_like",2],
+        "9486447779233331380": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "591445875836641836": ["convolution_gpu_bfyx_gemm_like",1],
+        "6263019986730305851": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "6489645404977288242": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "15693851280141842140": ["convolution_gpu_bfyx_gemm_like",2],
+        "390943380079040179": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16692569816843207989": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "14903430454784452446": ["convolution_gpu_bfyx_gemm_like",2],
+        "3885931890288969926": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14004618842373739106": ["convolution_gpu_bfyx_gemm_like",2],
+        "18377298651236993830": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "13017541921351620667": ["convolution_gpu_bfyx_gemm_like",1],
+        "14994322266840011040": ["convolution_gpu_bfyx_gemm_like",2],
+        "4682062886371423209": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "6982733543386888622": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "17281202179589913619": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "12057000101434512661": ["convolution_gpu_bfyx_gemm_like",2],
+        "14184440545916228597": ["convolution_gpu_bfyx_gemm_like",1],
+        "16745988677098035122": ["convolution_gpu_bfyx_gemm_like",2],
+        "1276881030620698911": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "11756881293845417212": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14998779987429927952": ["convolution_gpu_bfyx_gemm_like",2],
+        "18113235498360281695": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "17400844732252600825": ["convolution_gpu_bfyx_gemm_like",1],
+        "2452226948562393335": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "17338623890209792485": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11000064679911527524": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "18259018980049662870": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "3685556976073096544": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "16482763280295827563": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "3800011935243649447": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "6057433908801727873": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13785621878621289403": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "17223169013008075474": ["convolution_gpu_bfyx_gemm_like",2],
+        "15285236716284874711": ["convolution_gpu_bfyx_gemm_like",1],
+        "16181974394948732584": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8990561333549136048": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "8701639906504450534": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1403373982815401451": ["convolution_gpu_bfyx_gemm_like",1],
+        "11927673108508931485": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "14467326533329852095": ["convolution_gpu_bfyx_gemm_like",1],
+        "12191056298847752438": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "7313000297447719088": ["convolution_gpu_bfyx_gemm_like",2],
+        "15329647206594763271": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14733510474010040334": ["convolution_gpu_bfyx_gemm_like",2],
+        "9150686862263626364": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "14713376061469695024": ["convolution_gpu_bfyx_gemm_like",2],
+        "13779700363254765602": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "7606097739225472283": ["convolution_gpu_bfyx_gemm_like",2],
+        "9307683865422702618": ["convolution_gpu_bfyx_gemm_like",2],
+        "1109243878358317937": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "3218248162832023196": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "7020655100877544328": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4191326605459754690": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "12392988351482826871": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "962311766200741205": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8512711227383782401": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "7033442247935655919": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15936513690378208182": ["convolution_gpu_bfyx_gemm_like",0],
+        "12610854610554906160": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8140242320379485952": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "5381354625969068789": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15891746043846062984": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "997155336931700015": ["convolution_gpu_bfyx_gemm_like",2],
+        "6491244517639245276": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "5065071428884648135": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4770478662275293849": ["convolution_gpu_bfyx_gemm_like",2],
+        "2832311883163804015": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "712420402191459810": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3855859061709004677": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "16687701987371294908": ["convolution_gpu_bfyx_gemm_like",2],
+        "18418073826375395057": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13484950419220835364": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "15156525717629023944": ["convolution_gpu_bfyx_gemm_like",2],
+        "6571438978296387721": ["convolution_gpu_bfyx_gemm_like",2],
+        "3994033185122319003": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "5949713204609055571": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "16039372573821594566": ["convolution_gpu_bfyx_gemm_like",0],
+        "8779947213821605681": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "5524215233998361104": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14425082589599804235": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16629493658542781988": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "4084106758501882407": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "5594180958505308003": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "2903605246599054308": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "16589607587365212240": ["convolution_gpu_bfyx_gemm_like",1],
+        "10880081193716628051": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "5510336500642744696": ["convolution_gpu_bfyx_gemm_like",2],
+        "9860570706348640782": ["convolution_gpu_bfyx_gemm_like",2],
+        "13189392239349392492": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "2816353973187452604": ["convolution_gpu_bfyx_gemm_like",2],
+        "15862793522143880668": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "2346992541638145615": ["convolution_gpu_bfyx_gemm_like",2],
+        "2580909693815921167": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "15452906059667613512": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10993061520709478334": ["convolution_gpu_bfyx_gemm_like",1],
+        "989564341557094953": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "1601512693620510391": ["convolution_gpu_bfyx_gemm_like",2],
+        "12478496773222604204": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "13772598362521854438": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "9165275903833498932": ["convolution_gpu_bfyx_gemm_like",1],
+        "7678457226823073886": ["convolution_gpu_bfyx_os_iyx_osv16",191],
+        "3609233164979051271": ["convolution_gpu_bfyx_gemm_like",2],
+        "8971115542951085891": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "18066249200906113142": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15767973630744679517": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "937200116534179904": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "15720507574336564201": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "16448023768045157448": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "12274965963922410259": ["convolution_gpu_bfyx_gemm_like",1],
+        "2424832456352484524": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "5519535335798045279": ["convolution_gpu_bfyx_gemm_like",2],
+        "1594612401422787491": ["convolution_gpu_bfyx_gemm_like",2],
+        "2072252610120557179": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11619548409913646265": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10973647655853229395": ["convolution_gpu_bfyx_gemm_like",1],
+        "8507854696766492454": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "15688260390755491480": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2520734476651273971": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "17824431042110985323": ["convolution_gpu_bfyx_gemm_like",1],
+        "2858694223939965231": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "8357109553923988018": ["convolution_gpu_bfyx_gemm_like",2],
+        "14849708746319190277": ["convolution_gpu_bfyx_gemm_like",2],
+        "16969463538496570528": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14647949921048404551": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "14839051765301295219": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "6223991300587768990": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7272538316511343863": ["convolution_gpu_bfyx_gemm_like",2],
+        "12996812489446605594": ["convolution_gpu_bfyx_gemm_like",2],
+        "5011190083565902614": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "16995919898822376726": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "937763627727362899": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "4554398307153171456": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4220826666482500445": ["convolution_gpu_bfyx_gemm_like",2],
+        "9824678205469832038": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "981276017776678882": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "5648099611567577611": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7881579844586294503": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "12421464739243825246": ["convolution_gpu_bfyx_gemm_like",2],
+        "10186866999254188246": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17716151880660804743": ["convolution_gpu_bfyx_gemm_like",0],
+        "8886676435675463412": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13766538247146238357": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "5515216528474382598": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4279694886527244747": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "142345353315012903": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "9092949297095391463": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5550000568272972532": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "1617907811128880383": ["convolution_gpu_bfyx_gemm_like",2],
+        "14962768577232034246": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "12364947728685604753": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "815847426244665239": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7253709516917901897": ["convolution_gpu_bfyx_gemm_like",2],
+        "8481272193490654884": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "17829047941256922307": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "2128376438627103433": ["convolution_gpu_bfyx_gemm_like",2],
+        "11178675492112714513": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "6494837659483504443": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "52089503050497755": ["convolution_gpu_bfyx_gemm_like",2],
+        "6830643729780599672": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "13538051178827008933": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9650737941239265593": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "8281212003098870446": ["convolution_gpu_bfyx_gemm_like",2],
+        "14104238386345631681": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16923874271029636508": ["convolution_gpu_bfyx_gemm_like",2],
+        "6205240287062600210": ["convolution_gpu_bfyx_gemm_like",2],
+        "9516102312850256675": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "2888315406857606108": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8728178019712933221": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "6410682026872155392": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "12932174902085755507": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10428477376571919905": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "18251360413872841969": ["convolution_gpu_bfyx_os_iyx_osv16",1062],
+        "9753436607600877081": ["convolution_gpu_bfyx_os_iyx_osv16",929],
+        "5124645583449732785": ["convolution_gpu_bfyx_gemm_like",2],
+        "7453661005436415653": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8104509697376352086": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "6355819766289051977": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12582624102297726596": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "3511588484597779204": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5195515230960933214": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "11529521968552409482": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15360511165237335684": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "3799171258564824874": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11732742421854164761": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17486925527036786359": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "17122338330334998991": ["convolution_gpu_bfyx_gemm_like",1],
+        "7107313154723472157": ["convolution_gpu_bfyx_gemm_like",2],
+        "5519835581976587401": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "11239754372812258455": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12425310792514818973": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "10892053822730512072": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "13448159575961515854": ["convolution_gpu_bfyx_gemm_like",0],
+        "6953478877896677022": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2307310127637739872": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "15076307524263378967": ["convolution_gpu_bfyx_gemm_like",2],
+        "18369396029431709828": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3281207855459771997": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "6695336381467406810": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "2827850900421982274": ["convolution_gpu_bfyx_gemm_like",1],
+        "8149815705026829258": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1154228007901031779": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "15783329079045263237": ["convolution_gpu_bfyx_gemm_like",1],
+        "16540183777173974162": ["convolution_gpu_bfyx_gemm_like",1],
+        "10908411570889102154": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14203061085285979556": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "10880656082867082647": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "2850279308978256234": ["convolution_gpu_bfyx_gemm_like",2],
+        "16611452077660879545": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "9343876424591024597": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13711710595263882397": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "447683677378974131": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "6853844061175773603": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "3741411131962514208": ["convolution_gpu_bfyx_gemm_like",0],
+        "11191071895289217783": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11437885274663749440": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "14112695611389738149": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "875142032423622622": ["convolution_gpu_bfyx_os_iyx_osv16",542],
+        "6296371382672640627": ["convolution_gpu_bfyx_gemm_like",1],
+        "1743672154424707483": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4056723579347929559": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "2102507337684140674": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1713947356482032411": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1226681724476075216": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5235375820995365354": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "1362239912535573615": ["convolution_gpu_bfyx_gemm_like",1],
+        "9146427497025645310": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "17342868362584820356": ["convolution_gpu_bfyx_gemm_like",1],
+        "38736266675995457": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "11696231285411686761": ["convolution_gpu_bfyx_gemm_like",1],
+        "5186963188234940985": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9001645663675631429": ["fully_connected_gpu_fb_oi_ref",2],
+        "9055254157155243850": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11421235118459218209": ["convolution_gpu_bfyx_gemm_like",1],
+        "17991368786018745231": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13583272198088247606": ["convolution_gpu_bfyx_gemm_like",2],
+        "17377293745073971167": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12951069548510783681": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "2267942216745157485": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "12797434473085560369": ["convolution_gpu_bfyx_gemm_like",2],
+        "6155686980102491192": ["convolution_gpu_bfyx_gemm_like",2],
+        "11246470701714560770": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14559599508798500518": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "3617433210865054182": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "536646811796032046": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "6879801583428507100": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "6924316691569831424": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "11435397993598981900": ["convolution_gpu_bfyx_gemm_like",1],
+        "6322831233548420761": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "12182468247297592907": ["convolution_gpu_bfyx_gemm_like",2],
+        "10305912614137623024": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16244270858428653037": ["convolution_gpu_bfyx_gemm_like",2],
+        "2086001721804797157": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13952295742818866246": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "9579316322704307175": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10983344268706058114": ["convolution_gpu_bfyx_gemm_like",2],
+        "7199295899520406795": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "54019631544204590": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "4131527916449986086": ["convolution_gpu_bfyx_gemm_like",1],
+        "12659539044474018256": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "9771430089730856496": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "15834666915651997510": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "10045446802759419956": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "8124881451525075977": ["convolution_gpu_bfyx_gemm_like",2],
+        "2430404993947067949": ["convolution_gpu_bfyx_os_iyx_osv16",51],
+        "3668065353749623655": ["convolution_gpu_bfyx_os_iyx_osv16",723],
+        "9358401110755269308": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7771969115805231266": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "5831305777612569716": ["convolution_gpu_bfyx_gemm_like",2],
+        "14174805457643822445": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12176879951537921518": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "9255337426504113924": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "16608940349080184786": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14046217730873620907": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "6438522646185979880": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10708706979952421150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "4994591211723226974": ["convolution_gpu_bfyx_os_iyx_osv16",724],
+        "4127717437639868970": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "11761085899600261002": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8104609318998060422": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "2367791050032803116": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "4790960977352818689": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "15775917744517770768": ["convolution_gpu_bfyx_gemm_like",1],
+        "16131671779145781667": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13485431068391184236": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18260147016899103633": ["convolution_gpu_bfyx_gemm_like",2],
+        "7002547494442875680": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "17093159649157277089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16569200335969311660": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "15160322051545035612": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5514520264534847093": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3643056883397245235": ["convolution_gpu_bfyx_gemm_like",2],
+        "6423120553520000795": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "13385026134633096129": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "12159582810513550491": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11376522803174788945": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9694891301950867606": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "3102693432769248723": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15434536162164591656": ["convolution_gpu_bfyx_gemm_like",1],
+        "3593665238922509290": ["convolution_gpu_bfyx_gemm_like",1],
+        "6235132681081375078": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "3910733479592621526": ["convolution_gpu_bfyx_gemm_like",2],
+        "6603489144277795818": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "16467987800266816984": ["convolution_gpu_bfyx_os_iyx_osv16",216],
+        "5763440554939527411": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "9561367273233389233": ["convolution_gpu_bfyx_gemm_like",1],
+        "7890098956860637458": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "5094600092408024387": ["convolution_gpu_bfyx_gemm_like",2],
+        "2215194389847256545": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "11425187789506600967": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "16687215861591748162": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "18331981707436752260": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "6778781361481531516": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "11739629316219263056": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13661880440426932218": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11341771589317480665": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "16348402367953880206": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3646069704724135633": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3803179179802002296": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5458310740719324710": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "2654793073145467058": ["convolution_gpu_bfyx_gemm_like",2],
+        "17236135174912837061": ["convolution_gpu_bfyx_gemm_like",2],
+        "11962382064404466630": ["convolution_gpu_bfyx_gemm_like",1],
+        "3170274732463232729": ["convolution_gpu_bfyx_gemm_like",2],
+        "1137647382605909133": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "2335428826699999827": ["convolution_gpu_bfyx_os_iyx_osv16",884],
+        "18337160891834020517": ["convolution_gpu_bfyx_gemm_like",2],
+        "17908636589626460288": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5948701218437980356": ["convolution_gpu_bfyx_gemm_like",2],
+        "9424928280483728754": ["convolution_gpu_bfyx_gemm_like",2],
+        "143255828863957128": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16254257590403370542": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3816774953143987171": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2854124603710900850": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "14981122123483756686": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "5912303851874077576": ["convolution_gpu_bfyx_gemm_like",2],
+        "1698847067049584068": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8618835732380720921": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1096671695414716274": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "4795705973706796563": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "17851024468934906318": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "2893564501191050837": ["convolution_gpu_bfyx_gemm_like",1],
+        "2968094709908141988": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "3442073007560756473": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "472454322186482185": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "10615252189597863928": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "5311718276151327830": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13076725905503922540": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15911434513425038508": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5890683283363730941": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7320142714269929201": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "9562291747339451180": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "2844746478867668588": ["convolution_gpu_bfyx_gemm_like",1],
+        "11188849626443657384": ["convolution_gpu_bfyx_gemm_like",2],
+        "13273455049742872922": ["convolution_gpu_bfyx_gemm_like",1],
+        "8575296926578119953": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "16302630993799781492": ["convolution_gpu_bfyx_gemm_like",2],
+        "7218689869635572700": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "8647850242104327366": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1474271081523145413": ["convolution_gpu_bfyx_gemm_like",2],
+        "8036592210244553232": ["convolution_gpu_bfyx_gemm_like",2],
+        "1242366856673194709": ["convolution_gpu_bfyx_gemm_like",1],
+        "15841489476316341204": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "6737332058785771073": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "11599932445375240727": ["convolution_gpu_bfyx_gemm_like",2],
+        "9209450984098528310": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5448537627319798272": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "11185156002426041243": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "444533022549215983": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "10869005786136023160": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "18277685132620834972": ["convolution_gpu_bfyx_os_iyx_osv16",278],
+        "13199524367893035805": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "3177915003579216846": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "755849895494634465": ["convolution_gpu_bfyx_gemm_like",2],
+        "12246408434917478929": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "5552958912776013600": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "10058614204420018541": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "7071991799972799089": ["convolution_gpu_bfyx_gemm_like",1],
+        "3747518910079195578": ["convolution_gpu_bfyx_os_iyx_osv16",475],
+        "17228877915053571642": ["convolution_gpu_bfyx_gemm_like",1],
+        "1486768204660092247": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "6946815194102787268": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "3063055767192991776": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "1469048759583678106": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "1628593159980574595": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like",2],
+        "5024113153979057835": ["convolution_gpu_bfyx_gemm_like",2],
+        "17772882818194611202": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "5658491804782285708": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17425725917335895000": ["convolution_gpu_bfyx_gemm_like",2],
+        "2525260242689556544": ["convolution_gpu_bfyx_gemm_like",2],
+        "2257384183256237750": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "3430266954211750407": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "15566108481408840783": ["convolution_gpu_bfyx_gemm_like",2],
+        "1806154107556234": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "8449108317864057899": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "5947492124433175601": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "13853056718266488510": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2862999234347597091": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "17920083826450150627": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "15447513376965243034": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "5219048275475447369": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "5475537064464968733": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "5368419079251107469": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3416636940668221406": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13831458435772917577": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17774979615691038302": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "1395293354112586043": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "15752695063119223631": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "5816730482014477109": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "2198100074518629980": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6139574161497189424": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14115040663093081148": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "14759179293743468995": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "9513032457323269513": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "17087143277789116317": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3754411063032102107": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "9753894415895178843": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "4692951005189464579": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "15180747404865201068": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "3297036980627776719": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3651651926851660222": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9816834679089152140": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "7059729537732609153": ["convolution_gpu_bfyx_os_iyx_osv16",487],
+        "15635018081312614614": ["convolution_gpu_bfyx_gemm_like",1],
+        "16440598510199834213": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "17370051888730874220": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "13405310261845268772": ["convolution_gpu_bfyx_gemm_like",1],
+        "7173828525834910425": ["convolution_gpu_bfyx_gemm_like",2],
+        "17907732260451873185": ["convolution_gpu_bfyx_gemm_like",1],
+        "7351443601143314161": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "6213444978855892717": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "9434761058126895612": ["convolution_gpu_bfyx_gemm_like",2],
+        "6149261133858739754": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16510194749934323304": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "16978447917682236120": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14070988879848388270": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6431225873891612234": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "14502746747899017937": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11534123522633460320": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10961049607808752432": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "13054706902087663592": ["convolution_gpu_bfyx_gemm_like",2],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",1],
+        "7918742312252115870": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "5040730152867713388": ["convolution_gpu_bfyx_gemm_like",2],
+        "14218701503304823803": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "16179959997108523051": ["convolution_gpu_bfyx_gemm_like",1],
+        "1945630503883822822": ["convolution_gpu_bfyx_gemm_like",2],
+        "6261584163347634965": ["convolution_gpu_bfyx_gemm_like",2],
+        "890897381495317874": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16862485519640051995": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17015421289522369423": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "16674897846232931666": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "7796037793136254198": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14394427817253242611": ["convolution_gpu_bfyx_gemm_like",2],
+        "15381833359831622179": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "3436770797199367854": ["convolution_gpu_bfyx_gemm_like",1],
+        "9100044555742394133": ["convolution_gpu_bfyx_gemm_like",1],
+        "912423125050985716": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "7059809764116926828": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5172712078329324967": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "14066219153422011272": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "459936950868112292": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14284223645235602230": ["fully_connected_gpu_fb_io_ref",1],
+        "6659313690133629176": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17370560568464798319": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "11210961619302975072": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1557549837620967530": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2710485608298356329": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5953754321266570854": ["convolution_gpu_bfyx_gemm_like",1],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "14878347463243157447": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8258382025812748961": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17464465663391774069": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7431069335622070596": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2410828969408182980": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15038779174806415801": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "2070351447898375901": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "12352083215873760290": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "8269543491844451750": ["convolution_gpu_bfyx_os_iyx_osv16",189],
+        "5906083739416582743": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "18184154104081850641": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "2111049986724040641": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "13366059704398720237": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4040607776348275579": ["convolution_gpu_bfyx_gemm_like",2],
+        "5835634465164771899": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4766071144928072260": ["convolution_gpu_bfyx_gemm_like",1],
+        "18423051691107460439": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "3480732841490521799": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "2081318772333460627": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "596528462327775677": ["convolution_gpu_bfyx_os_iyx_osv16",1035],
+        "2730604806511016352": ["convolution_gpu_bfyx_os_iyx_osv16",524],
+        "13772209672418897120": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7156300614592977977": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "17598441149165536737": ["convolution_gpu_bfyx_gemm_like",2],
+        "11093147488085506266": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "17347387929692736001": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "820777941033224662": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "13045206675957093567": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "4652308622880770983": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6615830390513317821": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11353671464383068485": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "2510093757258898215": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "11292995457386147494": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10534355502345993326": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16590893345666612869": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "1339402691552717009": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6323504675912413145": ["convolution_gpu_bfyx_gemm_like",2],
+        "5533829915176762003": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12361909180687647792": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "8948718883406304307": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "5066247088968357726": ["convolution_gpu_bfyx_gemm_like",2],
+        "1005880016096298476": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "15568690152071176945": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "14188045559946481097": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "15085980226773631346": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "9993925424761661218": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "5334566325056222430": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "13607830451968188080": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "10447427622114317323": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "12312291300513951124": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "60749853744407778": ["convolution_gpu_bfyx_gemm_like",0],
+        "16932090423428476170": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "9996196793804333253": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5887877259873928726": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "2722601800398376127": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "13524128602135083081": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "123283730755186382": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10922353028117588062": ["convolution_gpu_bfyx_gemm_like",2],
+        "1791615587935799399": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "5845969526791988973": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "737706555781027628": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "9631545863582097486": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "14845194064376163156": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "202304354656398848": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2534285363781495903": ["convolution_gpu_bfyx_gemm_like",2],
+        "7883108394284369445": ["convolution_gpu_bfyx_gemm_like",1],
+        "3292554262586950764": ["convolution_gpu_bfyx_gemm_like",2],
+        "10205929431600082124": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12440561123106715688": ["convolution_gpu_bfyx_gemm_like",2],
+        "14602509614865844486": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "16403423801823379909": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "16646144748089558351": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14725765847498813247": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5795940144756238917": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "9133224739401155411": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12443662237620745732": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "15349944413643626251": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10173283505468233128": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "16884396694505987920": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "4572185168237245759": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8271034912009744989": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "5116633474932727191": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5113313241198299504": ["convolution_gpu_bfyx_gemm_like",2],
+        "7247414730479113619": ["convolution_gpu_bfyx_gemm_like",1],
+        "14247451223653900488": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9321208819255762521": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "13047793996728441528": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6420851258772300332": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "751912075185318190": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "8451212914744825089": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "3909551222373722085": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "16511261203374835334": ["convolution_gpu_bfyx_gemm_like",2],
+        "13387766889016280910": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "5637480705139132901": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "17997314629342774968": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6695224851008237679": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1208534686657112759": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "14566544143931267758": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9198073694219066216": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "7217405970420485152": ["convolution_gpu_bfyx_gemm_like",2],
+        "1659851931406041285": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "7431237779891953779": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "17942120824047252501": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16031140952379208074": ["convolution_gpu_bfyx_gemm_like",2],
+        "8195881973746570408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8464582977975377118": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2010255131587843361": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "18206785126134139000": ["convolution_gpu_bfyx_gemm_like",2],
+        "2870715678422088243": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "805221045541170643": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "15757254795151275190": ["convolution_gpu_bfyx_gemm_like",2],
+        "3683201905077543598": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "4911398420005278258": ["convolution_gpu_bfyx_gemm_like",1],
+        "1076005730007872492": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "13285123703712436126": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "970596838400633278": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6362453779168658462": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "11115684531624462986": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "13015379405020620466": ["convolution_gpu_bfyx_gemm_like",2],
+        "157852787707383962": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7083152697366621236": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14559552090809408184": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "10995424394152951534": ["convolution_gpu_bfyx_gemm_like",2],
+        "1028160614515220430": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "11455843788148231615": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "17005088865778247367": ["convolution_gpu_bfyx_gemm_like",2],
+        "11428599290755097395": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "17651949893303962955": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9217386935739152562": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "16307719105384538170": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "5797545757863100286": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "17675227620234837075": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "9004823715680825977": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10317038568333963064": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "868488930567226694": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "2789386984431816449": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13674246753382740056": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7569785094993085356": ["convolution_gpu_bfyx_gemm_like",1],
+        "13088023076667575514": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "5834006438103071406": ["convolution_gpu_bfyx_gemm_like",1],
+        "1603703756241612948": ["convolution_gpu_bfyx_gemm_like",2],
+        "10073779356457603252": ["convolution_gpu_bfyx_gemm_like",2],
+        "5659168916726488798": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13980058444317683376": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "1980887257657896260": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "6447172410311223671": ["convolution_gpu_bfyx_gemm_like",2],
+        "5490683510357615963": ["convolution_gpu_bfyx_gemm_like",2],
+        "1281814301909101836": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "18068050257421269408": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3048753162882302153": ["convolution_gpu_bfyx_gemm_like",1],
+        "9667762333290150436": ["convolution_gpu_bfyx_gemm_like",1],
+        "13141069720428059461": ["convolution_gpu_bfyx_gemm_like",1],
+        "18114029275806885644": ["convolution_gpu_bfyx_1x1",2],
+        "16767657090925788431": ["convolution_gpu_bfyx_gemm_like",2],
+        "5091558853871982858": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "2732519635571994212": ["convolution_gpu_bfyx_gemm_like",2],
+        "5796500397424307442": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "5115051214738974496": ["convolution_gpu_bfyx_gemm_like",2],
+        "17258278942367320412": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "9746964858035717775": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2460361970017706505": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "7450915928720828406": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9770300588867836071": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "9628702542543622433": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "5230871884758163940": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "7500192998744460131": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "294103776081392899": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2986309211691835971": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1564774057733793087": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "3199841714087553410": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "9246213432501129631": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16446533347502650316": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "11964639701912187118": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "7863886351122918972": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "4282661608732125403": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "277151219694781348": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "9352866803638271156": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "13358754652597677285": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "3809343305878998617": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "861944552852043171": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7043547563530810431": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "6146876760962332928": ["convolution_gpu_bfyx_gemm_like",2],
+        "4679163800360809315": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "3746573775462003750": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15943174060386142134": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "7369471926167902143": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "2467766894778630615": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "6954257882806659594": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "14990645740260870030": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "11607736973932389832": ["convolution_gpu_bfyx_gemm_like",1],
+        "4145496852718466030": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3138374672801504481": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "14431607479949498164": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "17912189681971987483": ["convolution_gpu_bfyx_gemm_like",2],
+        "17073183514200378702": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "15591167992985613695": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13540002981450186147": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "3689722043202617487": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "18957204268374834": ["convolution_gpu_bfyx_gemm_like",2],
+        "12478421208861550581": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "12609361477548272638": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11215217005872946038": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "16956980254113285457": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "1832310305089212990": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "13855438905855887272": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "10348660503952680688": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "3510837206834640871": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "15507430010796753396": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "17965825642065048619": ["fully_connected_gpu_yxfb_ref",0],
+        "16811402686462277562": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "16025442470600124062": ["convolution_gpu_bfyx_gemm_like",2],
+        "18031896952099861060": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14788817017267716113": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "15031089621161080026": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "879896719155824868": ["convolution_gpu_bfyx_gemm_like",2],
+        "2806529556090896246": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "11205571992835612111": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12309226514391994607": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12608839247035566137": ["convolution_gpu_bfyx_gemm_like",2],
+        "13602299412525111348": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "8143125165478395106": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "3182329375739242693": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14044495589185586465": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "14581447673401303181": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14172081523880352608": ["convolution_gpu_bfyx_os_iyx_osv16",569],
+        "1173321935056172683": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1314612539156304342": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "8707484843981694525": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "4073467095502162430": ["convolution_gpu_bfyx_gemm_like",1],
+        "10073439287681954518": ["convolution_gpu_bfyx_gemm_like",2],
+        "13267743753217317315": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6324194607665787911": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "6157727013102138824": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "9947693652506812817": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4738743763536059708": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "17790026124881397912": ["fully_connected_gpu_yxfb_ref",2],
+        "1757047061843709948": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "12571532345206950176": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "14848732804958314374": ["fully_connected_gpu_yxfb_ref",1],
+        "7476503420928065329": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "16779678846332091086": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "3457676694935264283": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "11210371874006224582": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12715500118796263683": ["convolution_gpu_bfyx_gemm_like",2],
+        "3555204322491340337": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "4865023158176874622": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11620960210789252617": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "15896132602902277133": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "14936045362442728963": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15410074937424854348": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "8394944698739627742": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1938627662342504660": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "17932475157983250382": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "5688623850477433571": ["convolution_gpu_bfyx_gemm_like",2],
+        "15868648764972133201": ["fully_connected_gpu_fb_oi_ref",1],
+        "9896765610231507042": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "10323345824599612614": ["convolution_gpu_bfyx_gemm_like",2],
+        "6381439938385141423": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1471017943056596406": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "9152451371616153112": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10398572248321217585": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13597240991532942069": ["convolution_gpu_bfyx_os_iyx_osv16",1051],
+        "2691406689892290663": ["convolution_gpu_bfyx_gemm_like",1],
+        "14864150409380754546": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1532263118203058517": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "13896429056884108617": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16230621843665445228": ["convolution_gpu_bfyx_gemm_like",2],
+        "6639715607290389968": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9323825370872655346": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "2371412124305478965": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5187613930764630394": ["convolution_gpu_bfyx_gemm_like",2],
+        "9028970753877215614": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "17212292336626940406": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12850610175882424919": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "12511186263003392018": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "9594594523961285945": ["convolution_gpu_bfyx_os_iyx_osv16",927],
+        "18347915312427917189": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3665837617379468265": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17646712050658428055": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "884923290083082187": ["convolution_gpu_bfyx_gemm_like",2],
+        "2180753144963020203": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "727203296169504486": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "17739868787095417856": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "4032516698162311723": ["convolution_gpu_bfyx_gemm_like",2],
+        "265124365266629363": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "5750277248295796439": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6586833064055001967": ["convolution_gpu_bfyx_os_iyx_osv16",274],
+        "10572208209982879914": ["convolution_gpu_bfyx_gemm_like",1],
+        "1305434952341925041": ["convolution_gpu_bfyx_gemm_like",2],
+        "11544455862638831851": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "5240181393417899912": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1364546124782880196": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14336344152455180534": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "9151597254187513724": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "218477594596081189": ["convolution_gpu_bfyx_gemm_like",1],
+        "9794439339209980030": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5109770354438894645": ["convolution_gpu_bfyx_gemm_like",2],
+        "16431857516454692096": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "11646035413147246650": ["convolution_gpu_bfyx_gemm_like",2],
+        "2597435203284675496": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7312862821818362095": ["convolution_gpu_bfyx_gemm_like",2],
+        "2080397907007737054": ["convolution_gpu_bfyx_os_iyx_osv16",337],
+        "13596494923128445274": ["convolution_gpu_bfyx_gemm_like",2],
+        "3001615302961701154": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10831460252334010668": ["convolution_gpu_bfyx_gemm_like",2],
+        "12081835728078383819": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "7549378486471456156": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4259929195364411411": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10016243001407196485": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17044070592136685322": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12691733869577147545": ["convolution_gpu_bfyx_gemm_like",0],
+        "3003526572122876385": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "2481005139798378616": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "10973267399508186283": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "15925338073584559984": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "13459568779083836506": ["convolution_gpu_bfyx_gemm_like",1],
+        "9380980604821454646": ["convolution_gpu_bfyx_gemm_like",2],
+        "2225233951957105071": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "2702566744272427570": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13058929683986290038": ["convolution_gpu_bfyx_gemm_like",2],
+        "14463983770858421738": ["convolution_gpu_bfyx_gemm_like",1],
+        "5951936376654416075": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "8881135571874888085": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "8204962103567653154": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "12161602271403760008": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "4590784654677429162": ["convolution_gpu_bfyx_gemm_like",2],
+        "14696479950182046016": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "1917986916390093536": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "16985912104363932350": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "16644809154210062742": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6556795059657533200": ["convolution_gpu_bfyx_gemm_like",2],
+        "8100051552977329013": ["convolution_gpu_bfyx_gemm_like",1],
+        "11095908837221722097": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "7143510787416483146": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "9802832901508552733": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13502487084912428404": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "364197229238830807": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "15669490019428002270": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "10195952041746407559": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "3787897045202294227": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "15383553612351941890": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "5280450544965361875": ["convolution_gpu_bfyx_gemm_like",1],
+        "2968031010495399536": ["convolution_gpu_bfyx_gemm_like",2],
+        "4753055238892504599": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2908856453997530641": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14865708345458193472": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8790625191540101806": ["convolution_gpu_bfyx_gemm_like",2],
+        "15325302411038679750": ["convolution_gpu_bfyx_gemm_like",2],
+        "17700958439420868719": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "14674266217397415571": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "9267417754412894234": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "4515798403196565084": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "4428101657497677982": ["convolution_gpu_bfyx_os_iyx_osv16",873],
+        "17713011656078651": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "9963020556968031682": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "2892571961726771633": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3691705516240577130": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "11805311302922325617": ["convolution_gpu_bfyx_gemm_like",1],
+        "18271689282126907793": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "1882052795393187384": ["convolution_gpu_bfyx_gemm_like",1],
+        "12792454713887439830": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "12170874893413205000": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "1375156980278317418": ["convolution_gpu_bfyx_gemm_like",2],
+        "12297371032753209816": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "12631324498619207834": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15300588247579013966": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "5116562847410288642": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14046990030104971367": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "16985565646738638215": ["convolution_gpu_bfyx_gemm_like",0],
+        "17902799955139047426": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12460004417430913427": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5352896995050401444": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "296142385116663420": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6551173574001309451": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8108843303778211282": ["convolution_gpu_bfyx_gemm_like",2],
+        "1306339989221885682": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6905249031401202060": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "17798626036576472760": ["convolution_gpu_bfyx_os_iyx_osv16",153],
+        "6990161783770805523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9454028594043242985": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "8769060267707904998": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12756296523829594388": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17472252137354770318": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "10786022075687454490": ["convolution_gpu_bfyx_os_iyx_osv16",1098],
+        "7820430581748383571": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "16897485136352617189": ["convolution_gpu_bfyx_gemm_like",1],
+        "17664704673433112966": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "7316825051569394089": ["convolution_gpu_bfyx_gemm_like",2],
+        "4563773888811395621": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16149924641081427062": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9213563311267466388": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6124219814856247918": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "16499919609457089685": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7720939595094113814": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "10616832946298118456": ["convolution_gpu_bfyx_gemm_like",2],
+        "97332433783610027": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "3075961585045028347": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3276455911598591170": ["convolution_gpu_bfyx_1x1",2],
+        "10591379189397010097": ["convolution_gpu_bfyx_os_iyx_osv16",613],
+        "499739705596245675": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17372520271370779917": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "6585223640997887253": ["convolution_gpu_bfyx_gemm_like",2],
+        "14017025411515888007": ["convolution_gpu_bfyx_os_iyx_osv16",1070],
+        "17580970614129952250": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15399245700982979379": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "3087801652564627458": ["convolution_gpu_bfyx_os_iyx_osv16",60],
+        "6973621625148257910": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16705941191876956548": ["convolution_gpu_bfyx_os_iyx_osv16",197],
+        "15197248015210313435": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12823080103951853168": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17128723415461475388": ["convolution_gpu_bfyx_gemm_like",2],
+        "3826083535442459719": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13411431109933021193": ["convolution_gpu_bfyx_gemm_like",1],
+        "8203550467004532364": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "16053441017037949431": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "7344363094493575878": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",1],
+        "3106591708459602370": ["convolution_gpu_bfyx_os_iyx_osv16",564],
+        "15918017311798856029": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "7658318862249823838": ["convolution_gpu_bfyx_gemm_like",2],
+        "4136736579788862192": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10900880512948479338": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "2540513729176799897": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "2239948568632407776": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "8382509515623938786": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "1801066876009461857": ["convolution_gpu_bfyx_gemm_like",1],
+        "973402921452083017": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "1021364163511049664": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "10756831914332769026": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "14969813450703071948": ["convolution_gpu_bfyx_gemm_like",1],
+        "1781619247831135285": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "156456996459945842": ["convolution_gpu_bfyx_os_iyx_osv16",435],
+        "2062195022363480864": ["convolution_gpu_bfyx_gemm_like",2],
+        "16587078304821304948": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "12461575861709234385": ["convolution_gpu_bfyx_gemm_like",2],
+        "9753702905908744910": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2665148871393634012": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "16953502084939981636": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "9835338452418388180": ["convolution_gpu_bfyx_gemm_like",2],
+        "13400559817638330692": ["convolution_gpu_bfyx_gemm_like",1],
+        "3782315919331102574": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7974614031099580856": ["convolution_gpu_bfyx_gemm_like",1],
+        "15803050672115583478": ["convolution_gpu_bfyx_gemm_like",2],
+        "10437861085319472289": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "11006325877486632502": ["convolution_gpu_bfyx_gemm_like",2],
+        "11284755586130392759": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "9593975471009029134": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "7072606962946873975": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "10455850115486014344": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "15863083575228705763": ["fully_connected_gpu_fb_oi_ref",1],
+        "5733701901687257088": ["convolution_gpu_bfyx_gemm_like",2],
+        "7490524380333929773": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "9263063714383940562": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "8295126647635181949": ["convolution_gpu_bfyx_gemm_like",2],
+        "13198480749588992978": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17097621900023182992": ["convolution_gpu_bfyx_gemm_like",1],
+        "17800494747865760215": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5280182001774668876": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3318430113631867573": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "13155901262605819372": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "15860915170591763391": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5414285637221358737": ["convolution_gpu_bfyx_gemm_like",2],
+        "11878200328276635385": ["convolution_gpu_bfyx_gemm_like",2],
+        "8775336277634573074": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "8166976803757624321": ["convolution_gpu_bfyx_gemm_like",1],
+        "1825914669961085928": ["convolution_gpu_bfyx_gemm_like",2],
+        "8354812222032899427": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "7052552351421332490": ["convolution_gpu_bfyx_gemm_like",1],
+        "9780938731831129283": ["convolution_gpu_bfyx_gemm_like",2],
+        "7019316994558628633": ["convolution_gpu_bfyx_gemm_like",2],
+        "10408322429232132983": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "15285660674737231657": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "10492401059875127091": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4640028527711211109": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "14645023135017806432": ["convolution_gpu_bfyx_gemm_like",2],
+        "1334070221835422461": ["convolution_gpu_bfyx_gemm_like",2],
+        "2133849627845285277": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "16590030963319267708": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "11192914853196766423": ["convolution_gpu_bfyx_gemm_like",2],
+        "2841749330967314053": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "4412343276595791077": ["convolution_gpu_bfyx_gemm_like",2],
+        "15404352708246779967": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11327678075247102542": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",986],
+        "2004691166378443418": ["convolution_gpu_bfyx_gemm_like",2],
+        "17050675313067213312": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "710166379854475667": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "8012414839721814470": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "2072246877651869428": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11128727891847758901": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14026570177552137240": ["convolution_gpu_bfyx_gemm_like",2],
+        "16354698991868048871": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "17340789730321673934": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "5093049998173715787": ["convolution_gpu_bfyx_gemm_like",2],
+        "15232673324549539143": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "9383222411929463824": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16935619230235600309": ["convolution_gpu_bfyx_gemm_like",2],
+        "7390896672639655716": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "10523106317496576486": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6641684310751726510": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6882621854468565774": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "5831419373611158773": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "12621528958448913800": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14743760934522111296": ["convolution_gpu_bfyx_gemm_like",2],
+        "17798636687709019154": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "7576873892262851401": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2638131332283395057": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11047327014045909812": ["convolution_gpu_bfyx_gemm_like",2],
+        "14724862072414829490": ["convolution_gpu_bfyx_gemm_like",1],
+        "8017024160145338317": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "11504777464995699839": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "13430897815414587336": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "919788620883613958": ["convolution_gpu_bfyx_gemm_like",2],
+        "6714886136800883594": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "18199526506796726885": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "13178480813522103091": ["fully_connected_gpu_bf_io_gemm",1],
+        "14174888981602932979": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3234107167862677811": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "2001464747481073870": ["convolution_gpu_bfyx_gemm_like",2],
+        "172303227623890951": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15872143905824807656": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "3644282167178264526": ["convolution_gpu_bfyx_gemm_like",0],
+        "8850600236849718709": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "5397783260083330774": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "5706423911886410117": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12293705794290797805": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10468562355439385073": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7536267099632318821": ["convolution_gpu_bfyx_gemm_like",1],
+        "13455881643467418059": ["convolution_gpu_bfyx_gemm_like",1],
+        "1500571771538985941": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "7768680313873061531": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "1294871956977733262": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7917673216808705075": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13470016086265528105": ["convolution_gpu_bfyx_gemm_like",2],
+        "10628725059172743408": ["convolution_gpu_bfyx_gemm_like",2],
+        "17163158934005653629": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "8525631489886320841": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12963601040302529291": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "12324580272733221544": ["convolution_gpu_bfyx_gemm_like",2],
+        "15948383678216076358": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "9714764457768279762": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "15011507454681836178": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11297512843662536362": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1592619919721912789": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10294185397756053636": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "13480393611172760874": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "17888721282811720634": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "15299926486228458704": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "17575578027095664417": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15411603884973340468": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "7137632495125292608": ["convolution_gpu_bfyx_gemm_like",2],
+        "13367043015761260275": ["convolution_gpu_bfyx_gemm_like",1],
+        "4225955829811705872": ["convolution_gpu_bfyx_gemm_like",1],
+        "4082218299236753259": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "13372079273473545269": ["convolution_gpu_bfyx_gemm_like",2],
+        "3805854200552708060": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4693778191222244259": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "13288357587089816620": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3415589023848700079": ["convolution_gpu_bfyx_gemm_like",2],
+        "11111488580071749965": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "654122557966242717": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "2431923918345445420": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "8703051983346886620": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "480374950802530618": ["convolution_gpu_bfyx_gemm_like",0],
+        "6569793510829850291": ["convolution_gpu_bfyx_gemm_like",1],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "16614678178197571772": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "1249137685908951501": ["convolution_gpu_bfyx_gemm_like",1],
+        "14517120053341144411": ["convolution_gpu_bfyx_gemm_like",0],
+        "14733291836016183044": ["convolution_gpu_bfyx_gemm_like",2],
+        "10968768803038046390": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "17769940507971546305": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13500369101462555447": ["convolution_gpu_bfyx_gemm_like",2],
+        "10995849055789490935": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "15972830392998437739": ["convolution_gpu_bfyx_gemm_like",1],
+        "11984095218733350838": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6996376303337512293": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "10322586483496198615": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15818237122613168508": ["convolution_gpu_bfyx_gemm_like",2],
+        "6313048719388952335": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15417738436777481469": ["convolution_gpu_bfyx_gemm_like",2],
+        "7390201584703727318": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "16114623916610925741": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17974200478864274127": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "3237680963342495368": ["convolution_gpu_bfyx_gemm_like",2],
+        "6848989271874647093": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "12971833748980664090": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "17423645390621980919": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "3831261590121101287": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "17128760774072077101": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3913951712614107871": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13282612510005390816": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4604220876945646096": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "2056597791109604534": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "5419775002149092646": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4713580645061462578": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "12927339938362960563": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14132860735060026066": ["convolution_gpu_bfyx_gemm_like",2],
+        "12642701787250074691": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "3448477246688526708": ["convolution_gpu_bfyx_gemm_like",1],
+        "5751627653496545003": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "8526484907799590618": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "11033824757086203326": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "10600884986702650404": ["convolution_gpu_bfyx_gemm_like",1],
+        "9853089109234784643": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "11568162864377479487": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "5469227748156438008": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "18372277746801271292": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15173187675372221634": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "17434429579652310107": ["convolution_gpu_bfyx_gemm_like",2],
+        "18218631037214746168": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13676670925355487305": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1061595672605627170": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4718705504966715203": ["convolution_gpu_bfyx_gemm_like",2],
+        "8007667797556094444": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "17995371099806008878": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "601591624187191068": ["convolution_gpu_bfyx_gemm_like",2],
+        "6375149408738336520": ["convolution_gpu_bfyx_gemm_like",2],
+        "16273414163942580140": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15075932061614449973": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15661322183507404821": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2758256770667070477": ["convolution_gpu_bfyx_gemm_like",1],
+        "15603710070700542017": ["convolution_gpu_bfyx_gemm_like",2],
+        "8901432555239515645": ["convolution_gpu_bfyx_os_iyx_osv16",695],
+        "4347816192417741558": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "3610579553304450107": ["convolution_gpu_bfyx_os_iyx_osv16",475],
+        "5162737590442940024": ["convolution_gpu_bfyx_gemm_like",1],
+        "8981229334098733320": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3828988304073539836": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14945451027055549800": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "10084794570892043447": ["convolution_gpu_bfyx_gemm_like",2],
+        "8363432163596927598": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6046380638013542109": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2850803473613487020": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "3895088069642140043": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "11004350075893421731": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "659846949368492111": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "768765852586619095": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14421061973479991516": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "15856268902838573812": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "846485116335195633": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "13696782397412896129": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15071888879264671307": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "878892264408839067": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "13479754018079206598": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "237384442106085756": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "1879844536951785808": ["convolution_gpu_bfyx_gemm_like",2],
+        "9776332064497085361": ["convolution_gpu_bfyx_gemm_like",2],
+        "4897991181236908768": ["convolution_gpu_bfyx_gemm_like",2],
+        "15959241441689395955": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "16168891366331544806": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "3109104171383198425": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "14595102366207856448": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12184558469694708819": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "1522591417942130702": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10650698451740924172": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "11447737411040418462": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "6137405768481559638": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3502889736327580141": ["convolution_gpu_bfyx_gemm_like",1],
+        "5632101951796129342": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16140133852987111783": ["convolution_gpu_bfyx_os_iyx_osv16",755],
+        "6203602270552179462": ["convolution_gpu_bfyx_gemm_like",1],
+        "2124458313471852768": ["convolution_gpu_bfyx_gemm_like",1],
+        "10033076377998157101": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5782934278345953016": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "11623764266322172086": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8569122574675372789": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9883682535839267422": ["convolution_gpu_bfyx_gemm_like",2],
+        "11773726534842908728": ["convolution_gpu_bfyx_gemm_like",2],
+        "16614170159588864300": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "11723735945517472199": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "1168311873250200110": ["convolution_gpu_bfyx_gemm_like",2],
+        "3615203440895591147": ["convolution_gpu_bfyx_gemm_like",1],
+        "5435560857659377132": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "1027438463802481676": ["convolution_gpu_bfyx_gemm_like",2],
+        "14230385851791760020": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "7303492518741737111": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17631458041591681785": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "17691748026963003695": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "17580933462801685507": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15826150125827529199": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17163595630291422874": ["convolution_gpu_bfyx_gemm_like",1],
+        "2737840613867456953": ["convolution_gpu_bfyx_gemm_like",0],
+        "9177395776408296291": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17923035110851963413": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13447028922679236865": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1423297940282476513": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "4296524295134959042": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11807945822985245634": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16070611944881238498": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "17515573322312447679": ["convolution_gpu_bfyx_gemm_like",2],
+        "15308667224953963012": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "14559308665571750465": ["convolution_gpu_bfyx_gemm_like",2],
+        "11561352430430157770": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "890679620691833367": ["convolution_gpu_bfyx_gemm_like",2],
+        "13194245601015251743": ["fully_connected_gpu_fb_io_ref",0],
+        "488298169768725160": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "2542984219353153495": ["convolution_gpu_bfyx_gemm_like",2],
+        "8642397690605957294": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "15451193085395494344": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12698546873263218041": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14446441689031758543": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "12169920104076167571": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17309224746854446222": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8703758535351908295": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4030835922805418609": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "12916369918132790013": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17087740929472936216": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "6707221689266688389": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "4890442595203749341": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "2294026590516781945": ["convolution_gpu_bfyx_gemm_like",2],
+        "8422748157997350873": ["convolution_gpu_bfyx_gemm_like",2],
+        "6495132856471482043": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "1285313118947640320": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1429370139030130929": ["convolution_gpu_bfyx_gemm_like",1],
+        "4999505377862312410": ["fully_connected_gpu_bf_io_gemm",2],
+        "16957170318200599740": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10134708781744282286": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12466721526829931923": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15470323769252511904": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "15348127927851026409": ["convolution_gpu_bfyx_gemm_like",2],
+        "7926301289570686825": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15221712686851573528": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8790992468693685188": ["fully_connected_gpu_fb_io_ref",1],
+        "6266336185072196699": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5115661026367632863": ["convolution_gpu_bfyx_os_iyx_osv16",765],
+        "7957927312958744432": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "7852745450437172519": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "17810119189318801197": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "534032316469702287": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "13192808619929896995": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "8317140711232187781": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "14132290154676895976": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "9185109795156451440": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "12949204491386872217": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "5969899876159536205": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18308661808437079996": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5461980510262646821": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "9614300332487270888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17511724795386380064": ["convolution_gpu_bfyx_gemm_like",0],
+        "9666426531743983113": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "5519244962044894877": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9740466267717175474": ["convolution_gpu_bfyx_gemm_like",2],
+        "7562624810837784407": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "10271474583233390474": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "10050254009828302053": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "988812830514150932": ["convolution_gpu_bfyx_gemm_like",2],
+        "12063854963434677046": ["convolution_gpu_bfyx_gemm_like",2],
+        "12418390364502912036": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "426267761240826769": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12848303763972625729": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "11269720109905550213": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7440546908141206022": ["convolution_gpu_bfyx_gemm_like",1],
+        "360872770877634346": ["convolution_gpu_bfyx_gemm_like",2],
+        "6578804773136886939": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11589555938436186313": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "2809950092498355574": ["convolution_gpu_bfyx_os_iyx_osv16",1056],
+        "17225578855755054959": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8374232727884943288": ["convolution_gpu_bfyx_gemm_like",1],
+        "10591159235183381823": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6290317420155851465": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9216608098626790565": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "13739257060165119132": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "7819934200255007163": ["fully_connected_gpu_fb_oi_ref",2],
+        "14630499010941056793": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "13892202459701213504": ["convolution_gpu_bfyx_gemm_like",0],
+        "9181826459972753268": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "8612114608666892632": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8083672466967374860": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "1827410519323879183": ["convolution_gpu_bfyx_1x1",2],
+        "12190841837604350271": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "16568662638983972991": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "13979227237506927267": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "11823106525249133834": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "1254745727978231148": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "533820672115442982": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "18146068930296529306": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17790622334577372736": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13388424034634316547": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "17171513366028235799": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10031973538398542700": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14869125900405603130": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11522488904021243956": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "11338906515425639970": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "1228256819256996416": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "12253987037990618484": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1208243889917809864": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9104236539185546468": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "5419041493176804960": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14681705641267917886": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13695012630130671371": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "17353894529222574441": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "11932768899981458741": ["convolution_gpu_bfyx_gemm_like",2],
+        "15282806587681892519": ["convolution_gpu_bfyx_gemm_like",1],
+        "16851949759898002809": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "3037042229494600258": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15908673392788376468": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "1338534626640014074": ["convolution_gpu_bfyx_gemm_like",2],
+        "7496699438957793920": ["convolution_gpu_bfyx_gemm_like",2],
+        "123251351612308092": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "14234117003504517946": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "15412690778572403180": ["convolution_gpu_bfyx_1x1",2],
+        "16832083703120717402": ["convolution_gpu_bfyx_gemm_like",1],
+        "12507525913398812998": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1095495157025479260": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "13143747549517987032": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "12013883366396753346": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5504757952698692953": ["convolution_gpu_bfyx_os_iyx_osv16",658],
+        "8275277322582733101": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "5718747983756317198": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "1771153051233437607": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3853598651573655548": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "13713501506522022845": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11686716391002981733": ["convolution_gpu_bfyx_gemm_like",1],
+        "9513403717116039597": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "10713207196920878995": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "466868648178437688": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12396552020665536506": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "17281198415161259885": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "1596472719837608525": ["convolution_gpu_bfyx_gemm_like",2],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "11976258954756052550": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "6985970932645412773": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "498439373962299687": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "17303408650780384587": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "9850711648349010674": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8375778282166369933": ["convolution_gpu_bfyx_gemm_like",1],
+        "11207257238719531888": ["convolution_gpu_bfyx_gemm_like",2],
+        "10359995612603125965": ["convolution_gpu_bfyx_gemm_like",2],
+        "4381329435655511217": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "16820082917500285799": ["convolution_gpu_bfyx_gemm_like",2],
+        "16360543923316690540": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "9714770878761308566": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "4122312805832663323": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2096021095904820251": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1677118421195120152": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9285566577169147378": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10168217053882274702": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "16322719022997791344": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "16741985699154392565": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "16429816273405099453": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3596159214965874273": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4212697578665550281": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "15646774522467486699": ["convolution_gpu_bfyx_os_iyx_osv16",287],
+        "7617123358753247310": ["fully_connected_gpu_fb_io_ref",1],
+        "1142725391726703078": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "5843291595446603376": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "15972805725107234322": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "4859271780094116779": ["convolution_gpu_bfyx_gemm_like",2],
+        "14811022197918391667": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2438221595194783178": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "5957444113623953990": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7941359635463232326": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "10522649794540845800": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "10732225577823701543": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16063854283763838910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4292467512797995948": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16628180201355989101": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "1745930004673880589": ["convolution_gpu_bfyx_gemm_like",2],
+        "6377828127090689238": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "1787598049938821496": ["convolution_gpu_bfyx_gemm_like",1],
+        "7380979920013545867": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12978004383198641522": ["convolution_gpu_bfyx_gemm_like",1],
+        "3215659303601163167": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "14815498807515058447": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "16789135236017252073": ["convolution_gpu_bfyx_gemm_like",2],
+        "1434535531617424039": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "18419183012101393192": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "5889635603816026293": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "17615365894230830516": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "9739077580693165062": ["convolution_gpu_bfyx_gemm_like",2],
+        "17429692714456679999": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1924673125135960260": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "12981316015058930198": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "12573987322091254072": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17026284168840448378": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "3446991010350155849": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "3865480446980740412": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "10788148990012795028": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6150043972317126583": ["convolution_gpu_bfyx_gemm_like",1],
+        "15995056067568652754": ["convolution_gpu_bfyx_gemm_like",1],
+        "16463454447642623848": ["convolution_gpu_bfyx_gemm_like",2],
+        "18128162750557822655": ["convolution_gpu_bfyx_os_iyx_osv16",851],
+        "16781127329510211966": ["convolution_gpu_bfyx_gemm_like",2],
+        "14601912265050074833": ["convolution_gpu_bfyx_gemm_like",2],
+        "2415883693527779570": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "9788704336046308724": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "2599817012641445801": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "4476218615403440835": ["convolution_gpu_bfyx_gemm_like",2],
+        "14463841899941062548": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "82249723699159955": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "13973179950424276578": ["convolution_gpu_bfyx_os_iyx_osv16",809],
+        "7546586420552408243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14463173937397982331": ["convolution_gpu_bfyx_os_iyx_osv16",482],
+        "8797661560676476245": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "3021451990778420603": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8866164762286856139": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3216604922889072404": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15024023281204917061": ["convolution_gpu_bfyx_gemm_like",2],
+        "5233164031954315264": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16362139250976572928": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "14326748416648598247": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "259085394007031207": ["convolution_gpu_bfyx_gemm_like",2],
+        "7877637636782924097": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13775683667344570223": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",0],
+        "531020979837645217": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13771196685227797262": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "2546472090573813082": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2399812257701033542": ["convolution_gpu_bfyx_gemm_like",2],
+        "11306782565667740785": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "9454146598828084176": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "7486133596762640215": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "12609790757824750429": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "17142080999569154649": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8684867236134349888": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "3491333679577961640": ["convolution_gpu_bfyx_gemm_like",2],
+        "7999747927804607567": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6522575549211855712": ["convolution_gpu_bfyx_gemm_like",2],
+        "9596656797750683465": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9929060811766882316": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "2042821994795163366": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6181651715051152713": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "14365699621119565405": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5054574917425211132": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "18085089358509617299": ["convolution_gpu_bfyx_gemm_like",2],
+        "13675314612031135613": ["convolution_gpu_bfyx_gemm_like",1],
+        "8047078039937885319": ["convolution_gpu_bfyx_gemm_like",2],
+        "10897008852059401902": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "17599396373608265826": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "12323840136934980793": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "11710299944796838170": ["convolution_gpu_bfyx_gemm_like",2],
+        "16172528828198474326": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16026019808764920641": ["convolution_gpu_bfyx_gemm_like",2],
+        "9275303306340702111": ["convolution_gpu_bfyx_gemm_like",2],
+        "704262295684441748": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "3227725087355827716": ["convolution_gpu_bfyx_gemm_like",2],
+        "9207413252274439059": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "7666505529539001492": ["convolution_gpu_bfyx_gemm_like",2],
+        "2451627421465368826": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "14385995236701277049": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "7371498023669344385": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "978154682881866623": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "6210051945051792519": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2078717472711037103": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13105192484434299621": ["convolution_gpu_bfyx_gemm_like",2],
+        "15741938682483664203": ["convolution_gpu_bfyx_1x1",2],
+        "9226443907548972870": ["convolution_gpu_bfyx_gemm_like",2],
+        "15322019609805777935": ["convolution_gpu_bfyx_gemm_like",2],
+        "4132087699110753428": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7259373400504003467": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "13381441263790184121": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "10409424254454997557": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "360764089318153518": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10792503079194374004": ["convolution_gpu_bfyx_gemm_like",1],
+        "16153434096698006308": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "4085907608404305515": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "1411786954276574458": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "13925839061045347955": ["convolution_gpu_bfyx_gemm_like",1],
+        "2295643314299482773": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "6456426339461437148": ["convolution_gpu_bfyx_gemm_like",2],
+        "16454286604955135655": ["convolution_gpu_bfyx_gemm_like",2],
+        "17721709435558297965": ["convolution_gpu_bfyx_gemm_like",1],
+        "8230144305844912369": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5083776511235413204": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5002362836567498954": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12825029449351875037": ["convolution_gpu_bfyx_gemm_like",1],
+        "4409539711630405776": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "13026555349791486777": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4714858252066253834": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11679869968143173159": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "3291900073868076610": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13708979487306970634": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "5961488595080209440": ["convolution_gpu_bfyx_gemm_like",2],
+        "984472462878596435": ["convolution_gpu_bfyx_os_iyx_osv16",428],
+        "5601320732740276692": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "1090447867763814054": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "16081023484008718887": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "10879183694331631189": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "8549465639583777774": ["convolution_gpu_bfyx_gemm_like",2],
+        "17500224380474287862": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "14568560907026487922": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11716771904412649891": ["convolution_gpu_bfyx_os_iyx_osv16",435],
+        "4871907623235871050": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4729855738455185191": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "11135894989941122115": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "15094664469997373662": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "6983900601570231321": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4239133538073498792": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10771178773821148370": ["convolution_gpu_bfyx_gemm_like",2],
+        "8594644182487917002": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9924213107024674692": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "7370273921473161914": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "905780459938651623": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "16932172538978111342": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "14249486431781112226": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "13987250743654950733": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7777333052643961206": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "8817624284607822971": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6692408578556372014": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "15595549493819416194": ["convolution_gpu_bfyx_os_iyx_osv16",106],
+        "14291113322487568376": ["convolution_gpu_bfyx_gemm_like",2],
+        "4690935789908896751": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "10930115765550856328": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "9349890134436171288": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10720782649044333851": ["convolution_gpu_bfyx_gemm_like",2],
+        "8159489372517869446": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "16916632481840858091": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5854267518455107328": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4287441125635022306": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "784988240891749445": ["convolution_gpu_bfyx_gemm_like",2],
+        "11163107409437069532": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "1855527356709753100": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "14387663434151374245": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "9455406830371528486": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2482449683288477640": ["convolution_gpu_bfyx_gemm_like",2],
+        "656536921219262336": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "17427036330773218054": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "11918018989601427118": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2440366541074371090": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "861813331533609605": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "8855801044538137828": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "8045393243176844621": ["convolution_gpu_bfyx_os_iyx_osv16",951],
+        "10935410906182995784": ["convolution_gpu_bfyx_gemm_like",1],
+        "16642535448111764945": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "3743573500773847162": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "14213127286928643795": ["convolution_gpu_bfyx_os_iyx_osv16",250],
+        "6750269489578112382": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "6784146431605417954": ["convolution_gpu_bfyx_gemm_like",1],
+        "8656468860180713379": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "15838114628203742383": ["convolution_gpu_bfyx_gemm_like",2],
+        "17123463568694499533": ["convolution_gpu_bfyx_gemm_like",2],
+        "2669822154816760632": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "8140094412609934765": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "7565221050911842393": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "8316848551837633169": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "7981376447277193852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "702096475436365058": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15786328370300803713": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "4224423702382859092": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "8061914949376516780": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13412296930014397060": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11473442921040533207": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "893885204484374577": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "13174363822969694054": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "7200893702912130808": ["convolution_gpu_bfyx_gemm_like",1],
+        "1608378717397996752": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "8272823732258536202": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2946926779445063554": ["convolution_gpu_bfyx_gemm_like",2],
+        "18010600104565458874": ["convolution_gpu_bfyx_gemm_like",1],
+        "1364905900191854779": ["convolution_gpu_bfyx_gemm_like",2],
+        "3509027370372599394": ["fully_connected_gpu_fb_io_ref",2],
+        "1889773840456761365": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8054185159612481260": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12831670701606794888": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18006581941186887676": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "1303304215797905198": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17480519865636248903": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8619380242063264016": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12242618640422208652": ["convolution_gpu_bfyx_gemm_like",2],
+        "12360796145248339074": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "724953082687879224": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "509781001842353609": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "10670829898588047148": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7982784766505903515": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "13989803206226593565": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "1103204698908514224": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "15024130918582332928": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15026219694198820614": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17827762625385383658": ["convolution_gpu_bfyx_gemm_like",2],
+        "13939772608127902428": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10729288973933590396": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5848293219267886434": ["convolution_gpu_bfyx_os_iyx_osv16",536],
+        "15953607231296296913": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10890975553758439233": ["convolution_gpu_bfyx_gemm_like",2],
+        "2777614869053822003": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "9884646296875511696": ["convolution_gpu_bfyx_gemm_like",2],
+        "3989707993712888760": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "4082623789007884063": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "1160579996766519752": ["convolution_gpu_bfyx_gemm_like",2],
+        "4142978475842207311": ["convolution_gpu_bfyx_gemm_like",2],
+        "3574679673239756551": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "10117376369841171716": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "13289306769823703069": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "2079476232214121671": ["convolution_gpu_bfyx_gemm_like",2],
+        "3831257753143317802": ["convolution_gpu_bfyx_gemm_like",2],
+        "18074320074700491416": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "9005351264094503686": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "1201692134690347847": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "5440622601084846974": ["convolution_gpu_bfyx_gemm_like",1],
+        "9833242806281729759": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2411809718611709031": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "5584432943673435454": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "5516343490635816913": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8735735614506773179": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "10308175009371219583": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "2032438743863827309": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "16691293834516280510": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4466552246808462897": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "6547565989244888354": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14974730512607138726": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "6204725118764552662": ["convolution_gpu_bfyx_gemm_like",2],
+        "7367814057959247537": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16668140522258646445": ["convolution_gpu_bfyx_gemm_like",2],
+        "4631772220201098020": ["convolution_gpu_bfyx_gemm_like",2],
+        "12929981792125924963": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14136097914489095982": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "2909728331855309274": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "4417341352109525283": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17955654518744592086": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "7146559117784312265": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "699127221549844251": ["convolution_gpu_bfyx_gemm_like",1],
+        "2712946943923358377": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "2662628817605495834": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "13625877249040282040": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "8146559042269976123": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "8057302050645780813": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13476976389397273052": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6777045876155144709": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "13765632280570725774": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "13723543003759101485": ["convolution_gpu_bfyx_gemm_like",2],
+        "5643908654122573882": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17254775053427612466": ["fully_connected_gpu_fb_oi_ref",2],
+        "17471843449888763571": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17707294419513060769": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "7494124707566708728": ["convolution_gpu_bfyx_os_iyx_osv16",622],
+        "1589338074286085915": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "18167956836333309556": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "587350550384936211": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "2104529100867065546": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "10848277915422577656": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "8709180250014055873": ["convolution_gpu_bfyx_os_iyx_osv16",695],
+        "14599780481362761532": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6048964584602891448": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "10700011669103135203": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "15392592805235453180": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "10285802605410795788": ["convolution_gpu_bfyx_gemm_like",2],
+        "9794061741834174000": ["convolution_gpu_bfyx_gemm_like",2],
+        "15594387862678649962": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "15715522462313302642": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "2752322006160986801": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15156015174611610705": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "17850932752450917677": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "498420237272375425": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "875296362957469305": ["convolution_gpu_bfyx_gemm_like",1],
+        "1884327428051733366": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "11455732989503244360": ["convolution_gpu_bfyx_gemm_like",1],
+        "14103112843209793966": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "832976844701988460": ["convolution_gpu_bfyx_gemm_like",2],
+        "1054159213127890689": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "6293500642319778096": ["convolution_gpu_bfyx_gemm_like",2],
+        "13255006150107668739": ["convolution_gpu_bfyx_gemm_like",1],
+        "1994707002538257258": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7395593936948809439": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "12009524797137164943": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "11151426820269138585": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7762778382848852790": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15963358868537664345": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "11461581290174106570": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "7277156316894715321": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "4817953977830392054": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "7925721388119083644": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "555153826947872383": ["convolution_gpu_bfyx_gemm_like",2],
+        "8797843396807284399": ["convolution_gpu_bfyx_gemm_like",2],
+        "574359978358296617": ["convolution_gpu_bfyx_gemm_like",2],
+        "7419990519344756626": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "5786828339670204894": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "10545749454895857995": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "7923602459997389254": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "13580438297062687335": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "7054270030260701612": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "1265107284215037966": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16352331970945217438": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "10573920781439771673": ["convolution_gpu_bfyx_os_iyx_osv16",680],
+        "9197931868200777891": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "7996470545015324613": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12914986936318857086": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "16117738994809548007": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17543625777838573622": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "17361319565503258506": ["convolution_gpu_bfyx_gemm_like",1],
+        "11883485911218628865": ["convolution_gpu_bfyx_os_iyx_osv16",1041],
+        "2039909180006215069": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "14292252222828824305": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "8994777547915132466": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "2521072060867896298": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17550795608527501180": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7439918590741058820": ["convolution_gpu_bfyx_gemm_like",2],
+        "14113510820933411052": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4949865765880884373": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "12361848206190267821": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "6128534975733321186": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "11932770338770247767": ["convolution_gpu_bfyx_os_iyx_osv16",435],
+        "14400339764883906933": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1957975992563882145": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "7963120178142346699": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "16731107540370927220": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13941251104772804303": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "7385295618478993079": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14219526370377548492": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "3693042354944382600": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9416285845239621878": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15361186788588226064": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "5933743119393822386": ["convolution_gpu_bfyx_gemm_like",1],
+        "994489782629179836": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "3995098494991567714": ["convolution_gpu_bfyx_gemm_like",0],
+        "8268533335852735248": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "17160724961832795383": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12478309735214802531": ["convolution_gpu_bfyx_os_iyx_osv16",567],
+        "6145197915306632859": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1336477297334930004": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6719956770229212208": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "8848042913869254179": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3762117189312286955": ["convolution_gpu_bfyx_gemm_like",2],
+        "3142706898070129318": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6681818065741882453": ["convolution_gpu_bfyx_gemm_like",2],
+        "16122033101591094139": ["fully_connected_gpu_fb_oi_ref",1],
+        "4992668316921598993": ["convolution_gpu_bfyx_gemm_like",2],
+        "12174729877807876787": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15352245788978088971": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "12706645084970410965": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8375465895534833097": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "2384310584901598995": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "1499841226042523429": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "11050239499079842408": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "2325807459008347256": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "9899897639161550704": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "16443833779968719790": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1592994755823247500": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "18219755699990183812": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17197868427757781334": ["convolution_gpu_bfyx_os_iyx_osv16",623],
+        "7573223193924678686": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "6858245954375015939": ["convolution_gpu_bfyx_gemm_like",2],
+        "16705621644424684055": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "16683089431066989909": ["convolution_gpu_bfyx_gemm_like",2],
+        "10396343030099602596": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "11680829908738480957": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11033758130987285174": ["convolution_gpu_bfyx_gemm_like",2],
+        "17332395907621747512": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "8221243069068316492": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13468713306678453952": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3039050517419021849": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "18052322665755789573": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15790005937034794347": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "8774613863662947205": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "18429276095695345973": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "800262759663182290": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "2299440282267661763": ["convolution_gpu_bfyx_gemm_like",2],
+        "7353255713834431471": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "17294244481988344762": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "9953329530402569669": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "11311859068168414878": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "3623695848220673001": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4104803308438043557": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16149794106807509790": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5326891298755303584": ["convolution_gpu_bfyx_gemm_like",2],
+        "3626743386403140330": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "2260718905219541967": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15668791697154389130": ["convolution_gpu_bfyx_gemm_like",2],
+        "14034487492239603874": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "5020605371834958647": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "11341287517759485930": ["convolution_gpu_bfyx_gemm_like",2],
+        "12940491379482292807": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "11767263058642131204": ["convolution_gpu_bfyx_gemm_like",0],
+        "5603409300903611279": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "13373912451448693522": ["convolution_gpu_bfyx_gemm_like",1],
+        "16001665772103476029": ["convolution_gpu_bfyx_gemm_like",2],
+        "15107740124884150777": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "11324851661119942609": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "11215297942420903101": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "10710426249911063154": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15047163348308549816": ["convolution_gpu_bfyx_gemm_like",2],
+        "15962137123591591534": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "12434799432980627966": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "11507538232733291666": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "12380856644683171627": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "7550660458541314838": ["convolution_gpu_bfyx_gemm_like",2],
+        "749424160149709131": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "14606504543906913119": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "7084794834886364709": ["convolution_gpu_bfyx_gemm_like",0],
+        "7071864660784255328": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3495464175121035222": ["convolution_gpu_bfyx_gemm_like",1],
+        "2388209402010617408": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1114661658519542600": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "9743549865786050651": ["convolution_gpu_bfyx_gemm_like",1],
+        "13646026173083209094": ["convolution_gpu_bfyx_gemm_like",1],
+        "4440261013093281358": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7589346100701197023": ["convolution_gpu_bfyx_gemm_like",2],
+        "16037141448095945650": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "9839670675413379092": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "13102754309439605192": ["convolution_gpu_bfyx_gemm_like",2],
+        "14094981198645015124": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "7966454753124154534": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17829854042305231384": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "397770940444464146": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "13753473508578037346": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "1673458534805854479": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "9767355861002822967": ["convolution_gpu_bfyx_gemm_like",2],
+        "10040774301055885786": ["convolution_gpu_bfyx_gemm_like",2],
+        "8422541638844255768": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10392013312924273545": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "6236173564220169058": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "15487686565734149288": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "12044635257539223503": ["convolution_gpu_bfyx_gemm_like",1],
+        "17716065235878633691": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "11878734040194151073": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3217555855036660482": ["fully_connected_gpu_fb_io_ref",1],
+        "10000918095695585210": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "18021893665721597443": ["convolution_gpu_bfyx_gemm_like",2],
+        "1540459344569916165": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "2460365527384422680": ["convolution_gpu_bfyx_1x1",2],
+        "13477416097954638887": ["fully_connected_gpu_bf_io_gemm",1],
+        "2820364088001594654": ["convolution_gpu_bfyx_os_iyx_osv16",193],
+        "6536333665377249409": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5329218407413679209": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "2425177545256374371": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "2944333966072327932": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "3007505068107685147": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "14727155647330710270": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13600579723542095577": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "8611417708673038653": ["convolution_gpu_bfyx_gemm_like",1],
+        "6876300000441081789": ["convolution_gpu_bfyx_gemm_like",1],
+        "642695492431061226": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "7451956047774945675": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1954255299238402738": ["convolution_gpu_bfyx_os_iyx_osv16",1048],
+        "10231289519907741812": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7919434905719674781": ["convolution_gpu_bfyx_os_iyx_osv16",686],
+        "16928564394848059094": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12274268980330855890": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "9148379585489720669": ["convolution_gpu_bfyx_os_iyx_osv16",484],
+        "12270548292992377827": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "8181704316455400709": ["convolution_gpu_bfyx_gemm_like",2],
+        "465434718088281598": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "2986189945936592561": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "18009765676050504407": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3892679716763161057": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5409329687010951601": ["convolution_gpu_bfyx_gemm_like",2],
+        "2344498602308448450": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "13133323947490009546": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",2],
+        "6719302427415173754": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "14600118619533737293": ["fully_connected_gpu_fb_oi_ref",1],
+        "13872507386032159320": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "3223726179820717808": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "16626226341188424071": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "12407002532205454767": ["convolution_gpu_bfyx_os_iyx_osv16",1020],
+        "18214405165366931407": ["convolution_gpu_bfyx_gemm_like",2],
+        "13661225837036677371": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "505102470055903237": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6232363902828992968": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9173631510896381179": ["convolution_gpu_bfyx_gemm_like",2],
+        "3393657180338401174": ["convolution_gpu_bfyx_gemm_like",2],
+        "3013359852055354405": ["convolution_gpu_bfyx_os_iyx_osv16",659],
+        "7662818300983256668": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "14206328165498357760": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7869916853707978306": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "2688060699200137048": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12397493112115605421": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "3095800485689583188": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14890705803637193714": ["fully_connected_gpu_fb_oi_ref",1],
+        "12942776337163777730": ["convolution_gpu_bfyx_gemm_like",2],
+        "4550028191070279999": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3060709449176556770": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "4085450203909854919": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "7878546319081647695": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12309132521191764927": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13648462079765466923": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10178462061836778766": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "16206791915939407806": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "323234725943768094": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "4940950742383121943": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8500148569566077929": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "15630324874714927821": ["convolution_gpu_bfyx_gemm_like",2],
+        "14355612297330229277": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13499476832444042458": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "3910579267273061669": ["convolution_gpu_bfyx_os_iyx_osv16",1062],
+        "1188428190761098784": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8880141633878776982": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2253443114793765536": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14512407261081843554": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "15101680837342453931": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "9067207838429479363": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3119045125726216156": ["convolution_gpu_bfyx_gemm_like",2],
+        "374917621051549930": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14923692894655929923": ["fully_connected_gpu_bf_io_gemm",2],
+        "6717243674054760598": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "11501291170503766805": ["convolution_gpu_bfyx_1x1",2],
+        "7210729932836957540": ["convolution_gpu_bfyx_gemm_like",1],
+        "7202348866484870042": ["convolution_gpu_bfyx_gemm_like",1],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "14579050468883613611": ["convolution_gpu_bfyx_gemm_like",2],
+        "2511318920505993508": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12606196670791209919": ["convolution_gpu_bfyx_gemm_like",2],
+        "10857567623940140266": ["fully_connected_gpu_fb_oi_ref",0],
+        "17309326904418811234": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "3420595282107277905": ["convolution_gpu_bfyx_gemm_like",2],
+        "10967218651864700933": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7978370756654787278": ["convolution_gpu_bfyx_gemm_like",2],
+        "10196332102593337214": ["convolution_gpu_bfyx_gemm_like",2],
+        "11855777686733253894": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "1708527842474979709": ["convolution_gpu_bfyx_gemm_like",2],
+        "11352094952907979172": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14695781272831602408": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "11875516764635427358": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "8954139494467782298": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1071663904249509302": ["convolution_gpu_bfyx_gemm_like",1],
+        "14002149958562285929": ["convolution_gpu_bfyx_os_iyx_osv16",722],
+        "10726604761650410429": ["fully_connected_gpu_fb_io_block_fp16",1],
+        "2491079452377917458": ["convolution_gpu_bfyx_gemm_like",2],
+        "15831600396403741571": ["convolution_gpu_bfyx_gemm_like",2],
+        "18062849937960759210": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6577754887650563753": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "12878858391355259417": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6545814945227676265": ["convolution_gpu_bfyx_gemm_like",1],
+        "2183193161596798350": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14204028212129440429": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "4551182180668229945": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "14386256118128644729": ["convolution_gpu_bfyx_gemm_like",2],
+        "11901687795497708884": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "5627834277145735283": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "15214779483545052950": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "16125965158927145599": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13119479079474639169": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "8799427328659766574": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "1431307776181554710": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "12260041857695743504": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "5301394322453453489": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "568023964685613279": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "11845504142528424662": ["convolution_gpu_bfyx_gemm_like",2],
+        "15110359240685619357": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "11206468937763516689": ["convolution_gpu_bfyx_gemm_like",2],
+        "3134642518413656360": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15718011075217705480": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9454512817077883797": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "13539754964691689955": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "6108475838757986889": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13505239531682993049": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "10775785602937893911": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14640909901379728455": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "4152919461079296700": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14462438074931673266": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14443599718173185176": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4438055737691342460": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "2315979511894958580": ["convolution_gpu_bfyx_gemm_like",2],
+        "6673345869874137667": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1166351402218387037": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17495070522944546801": ["convolution_gpu_bfyx_os_iyx_osv16",1035],
+        "14749290801006453098": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "4950144098898276785": ["convolution_gpu_bfyx_gemm_like",2],
+        "13268525255152984893": ["convolution_gpu_bfyx_os_iyx_osv16",940],
+        "8460847842045253466": ["convolution_gpu_bfyx_os_iyx_osv16",13],
+        "17818587793483875865": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "18400137500031567479": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6497227130861473497": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "5821887901198535792": ["convolution_gpu_bfyx_gemm_like",1],
+        "16768470780681544910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "804195263636995800": ["convolution_gpu_bfyx_gemm_like",2],
+        "10105539975183207700": ["convolution_gpu_bfyx_gemm_like",1],
+        "4834591210311380436": ["convolution_gpu_bfyx_gemm_like",2],
+        "9191832520273617003": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "14793709237400480942": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "11322451605795727486": ["convolution_gpu_bfyx_os_iyx_osv16",4],
+        "16671217333627463205": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "8252948921459286528": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10098661517988566506": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12156683064218448087": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13919146899409616452": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "6366477005383470532": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "11066538564303243604": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "14004715832115880216": ["convolution_gpu_bfyx_gemm_like",2],
+        "14156264942337528284": ["convolution_gpu_bfyx_gemm_like",1],
+        "16489624657475712467": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",525],
+        "1640247336720128805": ["convolution_gpu_bfyx_os_iyx_osv16",302],
+        "11299275869800089824": ["convolution_gpu_bfyx_gemm_like",1],
+        "6549150139619174585": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "17715478364817621621": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "12864338805958186191": ["convolution_gpu_bfyx_gemm_like",1],
+        "2721793280965260548": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "1014934490175718598": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "3889688816787688160": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10131754493574658838": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "14185215566042478462": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "868827643007921561": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3007637520820789085": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "14373201903743002596": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "16947456984272008059": ["convolution_gpu_bfyx_gemm_like",2],
+        "8676627474831455650": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "528618206870447012": ["convolution_gpu_bfyx_gemm_like",1],
+        "4091785563304559606": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "15225331270926229394": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "15136557970717196814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7824524940405130010": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7958595516465029682": ["convolution_gpu_bfyx_gemm_like",2],
+        "11696708134796103802": ["convolution_gpu_bfyx_gemm_like",2],
+        "13054405729329143152": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5766507688771440170": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "9938569017948413183": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "18245807830790717634": ["convolution_gpu_bfyx_gemm_like",2],
+        "15914512645931208899": ["convolution_gpu_bfyx_gemm_like",2],
+        "5953847130949209741": ["convolution_gpu_bfyx_gemm_like",2],
+        "8881150100883636392": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4705082468295108028": ["convolution_gpu_bfyx_gemm_like",2],
+        "16955653765071712611": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1300605032840412845": ["fully_connected_gpu_fb_io_block_fp16",0],
+        "436514945529747349": ["convolution_gpu_bfyx_gemm_like",1],
+        "13225520357177380691": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1817929353109443200": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1146419220317481042": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18215260982292770252": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14066660382918185188": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "14381420852659789698": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13140527131098422428": ["convolution_gpu_bfyx_gemm_like",2],
+        "5179760459095053114": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17796784393519192261": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "2524233418633897945": ["convolution_gpu_bfyx_gemm_like",2],
+        "14548629377527143409": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7345632855842905966": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "15487538714246568015": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10569290125322858127": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7570078010521452080": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "8321769923556905957": ["convolution_gpu_bfyx_gemm_like",2],
+        "15861253904810475842": ["convolution_gpu_bfyx_gemm_like",2],
+        "953306082374100275": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "6616869272699525153": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2246205611561147645": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "18120169120088482114": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "10931533380146553429": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3524702814173574637": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "6026065914078520895": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "14011124615649605281": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "12169896916690963726": ["convolution_gpu_bfyx_gemm_like",2],
+        "4014667229872705228": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7688176479120305539": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3715177305271762194": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "7065121716452374910": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "10971971008143485353": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "13748207123919546925": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "14944798586094927774": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "872401732136570312": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "11333068902248367382": ["convolution_gpu_bfyx_gemm_like",2],
+        "5687802882700097624": ["convolution_gpu_bfyx_gemm_like",2],
+        "5157249499936659040": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11345101652477732928": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1103228955716492167": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "11006013403687198405": ["convolution_gpu_bfyx_gemm_like",1],
+        "17337689605705740533": ["convolution_gpu_bfyx_gemm_like",1],
+        "17035903590837750750": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2379484884827231127": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "6774610647537858980": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9127066823698894015": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "17261237809202428783": ["convolution_gpu_bfyx_gemm_like",2],
+        "16207793515276299964": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "6096189754478965440": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9224223997975166038": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "16763947298003094797": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "7777462936697576463": ["convolution_gpu_bfyx_gemm_like",2],
+        "16349083818768061549": ["convolution_gpu_bfyx_gemm_like",2],
+        "17969061908734583627": ["convolution_gpu_bfyx_gemm_like",2],
+        "5901470393936541758": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14173867073407110501": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11709992724966310174": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17602686382249457351": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "12680339228267704518": ["convolution_gpu_bfyx_os_iyx_osv16",540],
+        "801943727169437597": ["convolution_gpu_bfyx_gemm_like",1],
+        "5261762234237034874": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6258191734224827354": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "1138439260035360722": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "4239273649303286078": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "17002053020454970509": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9305957796037500628": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5547961548101779135": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5756395349044790327": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "765085235448596225": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "12664952811642406457": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5688478347124565305": ["convolution_gpu_bfyx_gemm_like",1],
+        "18325123280144403295": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12491350649215984657": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16192971634546462244": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "7962383460496540840": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6383465957427680176": ["convolution_gpu_bfyx_gemm_like",2],
+        "13846039323711897088": ["convolution_gpu_bfyx_gemm_like",2],
+        "10380031655567712558": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "3336303478756453360": ["convolution_gpu_bfyx_gemm_like",2],
+        "4424258528650299664": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "15467064540951151390": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10036998353100219512": ["convolution_gpu_bfyx_os_iyx_osv16",928],
+        "6851536988434597530": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "15764181772410734606": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6218328594667952152": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "1289009275012699560": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "12725647706191463348": ["convolution_gpu_bfyx_gemm_like",1],
+        "12976499206227689731": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "7670176887560273910": ["convolution_gpu_bfyx_1x1",2],
+        "2687781952021151359": ["convolution_gpu_bfyx_gemm_like",1],
+        "142486914279119363": ["convolution_gpu_bfyx_os_iyx_osv16",188],
+        "6500666367043862023": ["convolution_gpu_bfyx_gemm_like",2],
+        "17970855913877771858": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "5011769546010018777": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "17106086048442658788": ["convolution_gpu_bfyx_gemm_like",2],
+        "7833495651619250213": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9839216696114127569": ["convolution_gpu_bfyx_gemm_like",2],
+        "3221221905804708596": ["convolution_gpu_bfyx_gemm_like",2],
+        "18268811652302076976": ["convolution_gpu_bfyx_gemm_like",2],
+        "15695275881213623746": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "1089679781525023551": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14211549589070739656": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "10422138282116598013": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16981010901052181199": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "18083041911869525296": ["convolution_gpu_bfyx_gemm_like",0],
+        "7458923250983373160": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "14711697456265712456": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4184357870886924038": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "7727871584058599163": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9105388853296359769": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "846177346130290194": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "1760830986937165861": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "2345023488044002149": ["convolution_gpu_bfyx_gemm_like",1],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "5175845410753897614": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "10743138314323119696": ["convolution_gpu_bfyx_gemm_like",2],
+        "4560479630843098090": ["convolution_gpu_bfyx_gemm_like",2],
+        "7578465277886568471": ["convolution_gpu_bfyx_gemm_like",2],
+        "11080118408282076423": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13210604117940125947": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "13691555384698806010": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18132981365225439999": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "4854802313728023001": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "9833540739021310892": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "18133334552107213128": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6640926908025731367": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "12311901617815857033": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "11642345039270524373": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11609821372586026178": ["convolution_gpu_bfyx_gemm_like",2],
+        "18259656768460999562": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "16547425454653232058": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16940359862475871276": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "3420064118559852968": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "15123868617509445149": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12061391584831995030": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "5401946420641519048": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8921169563466511475": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10476627457539425144": ["convolution_gpu_bfyx_gemm_like",2],
+        "5570191330195573102": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13182965457868586949": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "8525389694584008001": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "2390769652732034937": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3935174650108042053": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "8709632541892447149": ["convolution_gpu_bfyx_gemm_like",2],
+        "12591586661644753936": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "3810356382905059819": ["convolution_gpu_bfyx_gemm_like",1],
+        "14707884854112495064": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13383524675055536682": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "16896434896068867157": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "496948821475405395": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "10509933181132310969": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13698389420396031586": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "3604379857905625467": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "3923715765392385764": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "11746829511394166662": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9604982746455852556": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "5013936351898884291": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17633445715900116866": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12179581684777023804": ["convolution_gpu_bfyx_gemm_like",2],
+        "1422402723172447295": ["convolution_gpu_bfyx_gemm_like",2],
+        "17778706153204631930": ["convolution_gpu_bfyx_gemm_like",2],
+        "12930435393720466720": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "12990341489637414845": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "17807033661138518449": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "16146350476627599543": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "11962541545116807979": ["convolution_gpu_bfyx_os_iyx_osv16",3],
+        "10844622369472649330": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "3499109651698979012": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "3598116387801985039": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "385046297070779752": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "14515066741400300669": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "4607013085883384144": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8739570656208259296": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1077773457856682663": ["convolution_gpu_bfyx_gemm_like",2],
+        "3666268650646000870": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7379959915507694400": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7573459699367415551": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "11582534256623549131": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16103653667647559851": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "11140613052840033128": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "11033507346101404633": ["fully_connected_gpu_fb_oi_ref",0],
+        "4586266886779200588": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "18191573176587760698": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "7873648177300629037": ["convolution_gpu_bfyx_gemm_like",2],
+        "7386836350136973872": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4476037346005841003": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13758938418512211194": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "3154903035376733831": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2526832080529662683": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "2995957440356398418": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17829983167337875463": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15989164585998175871": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "4569338575782832784": ["convolution_gpu_bfyx_gemm_like",1],
+        "8289989008260635006": ["convolution_gpu_bfyx_gemm_like",2],
+        "948917645960296825": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "3499243120652875549": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "2538377242539785672": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "17182839667242694171": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "8116504545035982006": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "374553246608550876": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "9010159579786049147": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11856266545854830143": ["convolution_gpu_bfyx_gemm_like",2],
+        "9003196270667188479": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3006979228759768702": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "3041752019114501584": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "7717602860943327535": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "12716923819769400487": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "15426960908024585800": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "17516369849823844076": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6919081291036849635": ["convolution_gpu_bfyx_gemm_like",1],
+        "14108091242461324109": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "11113256687741667688": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "7032373341094904961": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15452996816194024433": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "18044455700176500102": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5983808817108775912": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "12971822824884826169": ["convolution_gpu_bfyx_gemm_like",2],
+        "17443356777503458523": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15178327647765537565": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "4766447533088048613": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "18193831330827252971": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13348329768178411596": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14746516289087513444": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10250378821078082800": ["convolution_gpu_bfyx_gemm_like",2],
+        "8329846097322076175": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "16797936364395702812": ["convolution_gpu_bfyx_gemm_like",2],
+        "9559533345689069514": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "4396653960950462197": ["convolution_gpu_bfyx_gemm_like",2],
+        "16968664807495872526": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2044363708106765326": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15168098632351740923": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "16688500506096347178": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "9410978119783758141": ["convolution_gpu_bfyx_gemm_like",2],
+        "17521647426452186921": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "12836639380579091509": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "4747159205186229582": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "4820628266094118650": ["convolution_gpu_bfyx_gemm_like",0],
+        "18232408112396439386": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15739274921308457528": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "17325129240374428839": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "15678329601718218341": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "11863623794400366834": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6948606378949354116": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "10436819182310112786": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "9605161323000741578": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "13272818502368975319": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13787155972060672772": ["convolution_gpu_bfyx_gemm_like",2],
+        "15051114821536746998": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8713776440298790672": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "6290180140047520382": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18153597620760635012": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "2506154888542197909": ["convolution_gpu_bfyx_gemm_like",1],
+        "311255514995417672": ["convolution_gpu_bfyx_gemm_like",2],
+        "8124736388338424498": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "3119235799568225015": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "17108987360340581555": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "12985650543127289023": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "276407276027553756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16404059675217592817": ["fully_connected_gpu_fb_oi_ref",2],
+        "13185831669530779595": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "9524303276541517389": ["convolution_gpu_bfyx_gemm_like",2],
+        "16073578125651112218": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "9905716283229191208": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9481675228591993785": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "2423754482456771339": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8390953788659916133": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11529876081402974396": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "2830019939638455400": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "178353385245384751": ["convolution_gpu_bfyx_gemm_like",2],
+        "10049571207493913006": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16228026045292341333": ["convolution_gpu_bfyx_gemm_like",1],
+        "12046638414686283134": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1354230973143520455": ["convolution_gpu_bfyx_gemm_like",0],
+        "13699740641705514374": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "2349007644347065353": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "1889171157980977747": ["convolution_gpu_bfyx_gemm_like",2],
+        "14996839491874598555": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13931470674812510958": ["convolution_gpu_bfyx_gemm_like",1],
+        "9404677451270692749": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17176310030469904708": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10797908931694274013": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "4049386115353229125": ["convolution_gpu_bfyx_gemm_like",2],
+        "12960666483922103702": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2212821435607151031": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17693518538833606792": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "14486903620614795721": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "12495153386758666911": ["convolution_gpu_bfyx_gemm_like",2],
+        "3856394004079548211": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "7974918595373182037": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14605107834931199380": ["convolution_gpu_bfyx_gemm_like",2],
+        "12705054744767500423": ["fully_connected_gpu_fb_oi_ref",2],
+        "9707630588260222630": ["convolution_gpu_bfyx_gemm_like",2],
+        "17674340174982758744": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "10982693252072682414": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "15115440616185035720": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "3504421925108785018": ["convolution_gpu_bfyx_gemm_like",1],
+        "977617597166653416": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8146906136296114696": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13439896617880328331": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "12427258337646070422": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10174752213614931877": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4184940877670248246": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5060817429317741254": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "6341197991729122563": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "11992625045241269569": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "11897886369869427808": ["convolution_gpu_bfyx_gemm_like",2],
+        "13522405005274414664": ["convolution_gpu_bfyx_gemm_like",2],
+        "16062811901668074268": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "9019451572520595738": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9259437778054905599": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "6483208845600234755": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6074997181157712886": ["convolution_gpu_bfyx_gemm_like",2],
+        "10898210758890334465": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12381377111003298809": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7877256119877423528": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "9454457647272059910": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "15713964605078748923": ["convolution_gpu_bfyx_gemm_like",2],
+        "2387628682187438903": ["convolution_gpu_bfyx_os_iyx_osv16",376],
+        "7089077910858800239": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "689445825453914111": ["convolution_gpu_bfyx_gemm_like",2],
+        "6964180083696019970": ["convolution_gpu_bfyx_gemm_like",2],
+        "2038505773698938555": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "1588995902283491029": ["convolution_gpu_bfyx_gemm_like",2],
+        "12421707187947291166": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "1772363899841601255": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "1350402181555441235": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "12809199739984715013": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9899242398980336120": ["convolution_gpu_bfyx_gemm_like",1],
+        "3177362994630209421": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "15378025640603637387": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "17006133396401462698": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "7527175223662342321": ["convolution_gpu_bfyx_gemm_like",1],
+        "13503608041359512": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",2],
+        "5675497261720118479": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7356440848422235031": ["convolution_gpu_bfyx_gemm_like",1],
+        "6557338279391882446": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4186140878816408491": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1062464852330435815": ["convolution_gpu_bfyx_gemm_like",2],
+        "6161072079255825074": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7881187047171099732": ["convolution_gpu_bfyx_gemm_like",2],
+        "4197617702037834389": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "2012181953284568566": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "232917916392453671": ["convolution_gpu_bfyx_gemm_like",2],
+        "2281043373250691228": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "6213386558868267629": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "9181466280310872332": ["convolution_gpu_bfyx_gemm_like",2],
+        "11860902750907076009": ["convolution_gpu_bfyx_gemm_like",1],
+        "17094948685292534952": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "15641537661939240413": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9680288044487406977": ["convolution_gpu_bfyx_os_iyx_osv16",1054],
+        "17433689016343629925": ["convolution_gpu_bfyx_os_iyx_osv16",346],
+        "3259455156773630257": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "8751367574402839332": ["convolution_gpu_bfyx_os_iyx_osv16",281],
+        "4578587579993676820": ["convolution_gpu_bfyx_gemm_like",1],
+        "16415344078703911571": ["convolution_gpu_bfyx_gemm_like",2],
+        "17178808153714023980": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4399656162365214694": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "15778476379845872053": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4407164552309929507": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "14915908231779912828": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "8316011587868622301": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "14621327324047759584": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "13353123037511986804": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "15958017891397409552": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7782443708015375487": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "18245935804520236353": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "14487682847898298214": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6571473790090353005": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17753585752923130911": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "5172823024549700279": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13336576524443897680": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "582386337144876096": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "104165137500939902": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3980754726678047241": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "2146633923143071497": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "18383733736250135501": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7104756264011682902": ["convolution_gpu_bfyx_gemm_like",2],
+        "17915846724151945664": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "5073980187181521102": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "3561366509539440079": ["convolution_gpu_bfyx_gemm_like",2],
+        "7399775379344444344": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "16190949264253468961": ["convolution_gpu_bfyx_gemm_like",1],
+        "11117529413698667591": ["convolution_gpu_bfyx_gemm_like",2],
+        "11164600098693999456": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12293786134765875615": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "1116274074896622552": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "2947060249866633912": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "8789802900075401620": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "6391847213494189692": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16",1098],
+        "2198278382394812839": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "7282595712912388754": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "9553032671453999824": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "4519609440668743423": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6458124573210430792": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9300767936311837876": ["convolution_gpu_bfyx_gemm_like",0],
+        "621272125402238670": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "11704369548723383645": ["convolution_gpu_bfyx_gemm_like",0],
+        "7838176322738051195": ["convolution_gpu_bfyx_gemm_like",1],
+        "4883106423598271822": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "6103824715103416420": ["convolution_gpu_bfyx_gemm_like",2],
+        "15649927926091502215": ["convolution_gpu_bfyx_os_iyx_osv16",435],
+        "2789137853864057385": ["convolution_gpu_bfyx_gemm_like",1],
+        "17659601542171299562": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "8104309105061227444": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "6789547098653828902": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "16132186023443894579": ["convolution_gpu_bfyx_gemm_like",2],
+        "2172636954267255416": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "6612243861034102250": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "8048617952947915835": ["convolution_gpu_bfyx_gemm_like",2],
+        "2096779676054335057": ["convolution_gpu_bfyx_gemm_like",2],
+        "10025839973092358719": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "17080372737840346243": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "18146184020578260553": ["convolution_gpu_bfyx_gemm_like",2],
+        "6904130543085920483": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13756024658546934803": ["convolution_gpu_bfyx_gemm_like",2],
+        "14537109978413728476": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "2235888904701517631": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "17641033958594901664": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9110265526128628472": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "5911310036550570440": ["convolution_gpu_bfyx_gemm_like",0],
+        "288825580282908143": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "18092842590142527927": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "13972357557211413688": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16342158355942808662": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "7570346182940928159": ["convolution_gpu_bfyx_gemm_like",2],
+        "11083777913844441475": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "18109284647478027063": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4366043672240989175": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "3718980061704064547": ["convolution_gpu_bfyx_gemm_like",1],
+        "7287802938269404923": ["convolution_gpu_bfyx_gemm_like",1],
+        "8611997227481032137": ["convolution_gpu_bfyx_os_iyx_osv16",103],
+        "1154469970162137785": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "16124818805329568431": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "3883845471211207871": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "4310557764929939942": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "10506079835013332412": ["convolution_gpu_bfyx_gemm_like",2],
+        "4163359403543480821": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "12363462562375148101": ["convolution_gpu_bfyx_gemm_like",2],
+        "5408469943982199754": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8616175124735896626": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "7545013298074733778": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "12802376937099168127": ["convolution_gpu_bfyx_gemm_like",2],
+        "17392594284473856393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17366007551797367227": ["convolution_gpu_bfyx_gemm_like",1],
+        "15247278167909654073": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "6902644989079870993": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "14523905821262502926": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "11668043528929060706": ["convolution_gpu_bfyx_gemm_like",2],
+        "577182964135927041": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8906588133431586825": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11071972036962275632": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "14532844474906286088": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "16997897512818072938": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "10293186062391000719": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "419783127503173016": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "8484380699802533068": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "16758697697363920520": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "4113935675071480884": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "7289633911925073088": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6835280231174703662": ["convolution_gpu_bfyx_gemm_like",1],
+        "13603318842632052764": ["convolution_gpu_bfyx_os_iyx_osv16",390],
+        "11977806053733461574": ["convolution_gpu_bfyx_gemm_like",2],
+        "15191864907092681849": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12667014405537239093": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8909239203149651260": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14446344744130895614": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "15683344003370367509": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "17795358440179122086": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16766706479910720794": ["convolution_gpu_bfyx_gemm_like",2],
+        "5701438170070600512": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "3217246278485567748": ["convolution_gpu_bfyx_gemm_like",2],
+        "13324157125165576832": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "15897477855246170861": ["convolution_gpu_bfyx_gemm_like",2],
+        "7336911146060959485": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "4701832665603867798": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17243576882981097341": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "15520716279021654196": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "17382660912493284320": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "10838972820886273680": ["convolution_gpu_bfyx_gemm_like",2],
+        "11872894645888259277": ["convolution_gpu_bfyx_os_iyx_osv16",862],
+        "18337975902615310907": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1019936903773818652": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "17744551201434706388": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "4269447138276727632": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "2030309697153345387": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "17981604038340576961": ["convolution_gpu_bfyx_gemm_like",2],
+        "1018319414633271980": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "12601126285773042005": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "6722358544720547260": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "13852065717057446998": ["convolution_gpu_bfyx_gemm_like",2],
+        "16723949803487501587": ["convolution_gpu_bfyx_gemm_like",1],
+        "4858337483345561292": ["convolution_gpu_bfyx_gemm_like",2],
+        "5842284971563375197": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "11670430946096342056": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "10424278617647597641": ["convolution_gpu_bfyx_gemm_like",2],
+        "13201854669827561901": ["convolution_gpu_bfyx_gemm_like",2],
+        "17809920600993699808": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "12319073009094248232": ["convolution_gpu_bfyx_gemm_like",2],
+        "9676055912997166605": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "18402875771862490280": ["convolution_gpu_bfyx_os_iyx_osv16",318],
+        "1966540437574889257": ["convolution_gpu_bfyx_gemm_like",1],
+        "12967849866710811070": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "16352438188558979362": ["convolution_gpu_bfyx_gemm_like",1],
+        "6471563320494376693": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17009318615658405230": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14257548530334193336": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17774902969414949042": ["convolution_gpu_bfyx_gemm_like",1],
+        "7032409836645019505": ["convolution_gpu_bfyx_gemm_like",2],
+        "4499586349553581439": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "16853250891250756537": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "7520300815632157008": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "529543453251381109": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17641726060706984007": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "1962479636209947761": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "1179906398014559042": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "7271236108345900406": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13654816209891478730": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "517802466588815950": ["convolution_gpu_bfyx_gemm_like",2],
+        "17592646937716566803": ["convolution_gpu_bfyx_os_iyx_osv16",672],
+        "14068780861332616363": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "1755021778097194246": ["convolution_gpu_bfyx_gemm_like",2],
+        "2057345549105608748": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "17259951372033727587": ["convolution_gpu_bfyx_gemm_like",2],
+        "12668149981216388765": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "13418701036204748812": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10001963042016663554": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4355933224673863178": ["convolution_gpu_bfyx_gemm_like",2],
+        "10648332321840733110": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "10892706534058849825": ["convolution_gpu_bfyx_os_iyx_osv16",288],
+        "17024388383581997032": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "6955820760012983739": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "17897500485405386991": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "13205973783895006074": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "12366546292695084543": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "13565027847255501776": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "16576300898841314587": ["convolution_gpu_bfyx_gemm_like",2],
+        "12315068368597230211": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2230884858122788172": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "1006828591724642933": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "18356980026934328781": ["convolution_gpu_bfyx_os_iyx_osv16",658],
+        "16884228931101540030": ["convolution_gpu_bfyx_gemm_like",2],
+        "3750338655074082587": ["fully_connected_gpu_fb_io_ref",1],
+        "5336120047683197088": ["convolution_gpu_bfyx_gemm_like",2],
+        "8334832698020211623": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "17243648226968859637": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "9981938305144461962": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "4673127824919879657": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8241070786700614317": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9525853014023664813": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "17224820843490443805": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "15989730594386153813": ["convolution_gpu_bfyx_gemm_like",1],
+        "15603643151057665338": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "12255528292506999241": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "5077214229434392730": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "3034466284781235431": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "484412270668341493": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17489255290900178723": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1149585571789157695": ["convolution_gpu_bfyx_gemm_like",2],
+        "9182897385081081193": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14746900092090885770": ["convolution_gpu_bfyx_gemm_like",1],
+        "14242742178240625833": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "6929786386716045077": ["convolution_gpu_bfyx_gemm_like",2],
+        "5215755301612973095": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "6343396486660315308": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11389000759226546186": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "2564518461717467683": ["convolution_gpu_bfyx_gemm_like",1],
+        "17316626950179740845": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "14322754320861242412": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "6085098225080533278": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5358925179582853152": ["convolution_gpu_bfyx_gemm_like",2],
+        "15492793021506324472": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "582360460084115077": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "15322989486222859378": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "10274587614581350261": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "3363675939515208883": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "15887938842582811165": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "4082229510324076196": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18259787991864449280": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "1139581213977408268": ["fully_connected_gpu_fb_io_ref",0],
+        "7703363154993904399": ["convolution_gpu_bfyx_gemm_like",2],
+        "5183231560876991543": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "13762042713029963144": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12881836161162762524": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "7843498978148810586": ["convolution_gpu_bfyx_gemm_like",2],
+        "9451273689649467046": ["convolution_gpu_bfyx_gemm_like",2],
+        "10358359789382196576": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "761183183078910587": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "9501165931845934084": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "9497934813418221769": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "5485749317130402302": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "5622089373755094139": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10191238133281607150": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "14230197617570499447": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "14244966672894707129": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "4577872082734403187": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "2305345466244887603": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5222741986856655072": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "13244693761392741931": ["fully_connected_gpu_fb_io_b8_f8_vload",1],
+        "13941188114382863776": ["fully_connected_gpu_fb_oi_ref",1],
+        "14738573151275130683": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "3935404533406270186": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17559685912375493682": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12541834857357563605": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "17343050785312683560": ["convolution_gpu_bfyx_gemm_like",2],
+        "17034122796081495259": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1660279112011537957": ["convolution_gpu_bfyx_os_iyx_osv16",1099],
+        "6761884403006803451": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3621070130367713395": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2801141274570069180": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "10565789595834959047": ["convolution_gpu_bfyx_gemm_like",2],
+        "244921290040927639": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "6680398880450269343": ["convolution_gpu_bfyx_gemm_like",2],
+        "583303098958523195": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "1572991986657256775": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "3730238135300250205": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "2930545263523345204": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "11147816119060617810": ["convolution_gpu_bfyx_os_iyx_osv16",875],
+        "16650590194585316886": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "1091511312740979158": ["convolution_gpu_bfyx_gemm_like",2],
+        "3661305534604931936": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "2763902728396558645": ["convolution_gpu_bfyx_gemm_like",2],
+        "13733327241591630239": ["convolution_gpu_bfyx_os_iyx_osv16",761],
+        "5890599002797783437": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "8882042369902399339": ["convolution_gpu_bfyx_gemm_like",2],
+        "18375125668176498051": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "15488550074426713959": ["convolution_gpu_bfyx_os_iyx_osv16",1099],
+        "12756432707088842236": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2973436171295280783": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "11192356850081328892": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "1999892441424036372": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "3089303702413279458": ["convolution_gpu_bfyx_gemm_like",2],
+        "5973242004448142604": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "2265784112305305260": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5850218300545888277": ["convolution_gpu_bfyx_gemm_like",1],
+        "7915318733663535312": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "11622925573287101001": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3653156933813711765": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "10727592780669452048": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "6298422182853095672": ["convolution_gpu_bfyx_gemm_like",2],
+        "1561225943337590599": ["convolution_gpu_bfyx_os_iyx_osv16",506],
+        "11553355518677163509": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2528245911029869890": ["convolution_gpu_bfyx_gemm_like",2],
+        "2100387626452428743": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "7129623351507828661": ["convolution_gpu_bfyx_gemm_like",2],
+        "4702017956226464806": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4840004190985490064": ["convolution_gpu_bfyx_gemm_like",2],
+        "4356806313729405658": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "13204120207726209723": ["fully_connected_gpu_bf_io_gemm",1],
+        "1541754036637209097": ["convolution_gpu_bfyx_gemm_like",2],
+        "9134203155715293387": ["convolution_gpu_bfyx_gemm_like",2],
+        "4601800315090684242": ["convolution_gpu_bfyx_gemm_like",0],
+        "16975382270657256942": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "14151249542292579535": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "14757855448502485216": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "9057158661097863887": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "11851216776536423298": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "1686420552593340731": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "1852269248476496933": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10008202802779981732": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "17126714253919198029": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "4245229655273611845": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "8939520209266902800": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14629433964319883917": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "16622402936526588344": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "3055842046969432235": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "3200047546714112402": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "9386678255270055573": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2283707846991978126": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "2627779045483019709": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "8426489532875918560": ["convolution_gpu_bfyx_gemm_like",2],
+        "994182747184593564": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13660573428614001128": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10862735194945768250": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "15696910741835640150": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "17867620992288101450": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "10156210866362845661": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "8819268903800581706": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8583431477863678969": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "15578217564714846277": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "5158468772356420379": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "16328232350072955252": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5589350202160007768": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6008613375871089139": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "7134654288295280046": ["convolution_gpu_bfyx_os_iyx_osv16",438],
+        "15043469350539759410": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "11327228813412934262": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "7102173884859438914": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "5379608399492828685": ["convolution_gpu_bfyx_gemm_like",1],
+        "17242820574559628535": ["convolution_gpu_bfyx_gemm_like",2],
+        "6745633232989303110": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "10885752780697269323": ["convolution_gpu_bfyx_gemm_like",1],
+        "7997955859883990923": ["convolution_gpu_bfyx_os_iyx_osv16",540],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like",2],
+        "7044087204529042819": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "11066913713501760080": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16761867442537880229": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "10527256963399838405": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "3164513064874019611": ["convolution_gpu_bfyx_gemm_like",1],
+        "15219830328945680713": ["convolution_gpu_bfyx_gemm_like",0],
+        "879461985074219072": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4867937397499803072": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "16628679902327485435": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8146945902795164796": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12409554044517232554": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "15402502830461368746": ["convolution_gpu_bfyx_gemm_like",2],
+        "15192024816519005250": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1187224156936080964": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "14025496192869856801": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "5962764672151728219": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "10107951904294860034": ["convolution_gpu_bfyx_gemm_like",2],
+        "2649948006897488504": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "1355462205983418380": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9419803870518687519": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13993045680928507594": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6754359635395225555": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "7640517221915599813": ["convolution_gpu_bfyx_gemm_like",2],
+        "4460838234035901102": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15846416859925768761": ["convolution_gpu_bfyx_gemm_like",2],
+        "10842828403850880541": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9942726414918759892": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17846557385112426504": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14826791706471872785": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11318404975804457466": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12894625941923144893": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "11914756126771310827": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6365510146855048488": ["convolution_gpu_bfyx_os_iyx_osv16",346],
+        "2659031931257084418": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "13291816522762326802": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14211903923555028634": ["convolution_gpu_bfyx_os_iyx_osv16",1056],
+        "11060822686394981344": ["convolution_gpu_bfyx_gemm_like",2],
+        "6448710747704334053": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "7172604084103519563": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "5270599940168849812": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2559310381697374321": ["convolution_gpu_bfyx_gemm_like",1],
+        "7904735292914337507": ["convolution_gpu_bfyx_gemm_like",2],
+        "17368161816774674256": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "8306337702797456793": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "14681717813022425567": ["convolution_gpu_bfyx_gemm_like",2],
+        "6355395905401306995": ["convolution_gpu_bfyx_gemm_like",2],
+        "10252930102508743294": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9519113693008246391": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "2159503178414447904": ["convolution_gpu_bfyx_gemm_like",2],
+        "2094546483928406874": ["convolution_gpu_bfyx_gemm_like",1],
+        "11273554217552152172": ["convolution_gpu_bfyx_gemm_like",1],
+        "12266072789949082198": ["convolution_gpu_bfyx_gemm_like",2],
+        "1569111625440278287": ["convolution_gpu_bfyx_gemm_like",2],
+        "411914986559525749": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "710656784939783221": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "2602209853120236226": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8632281866212611140": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "11718418772370938734": ["convolution_gpu_bfyx_os_iyx_osv16",90],
+        "261021128656714770": ["convolution_gpu_bfyx_os_iyx_osv16",659],
+        "12644942072153919043": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10483664832302187567": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "2740834366358352617": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14682537852514419239": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "18278174626712547691": ["convolution_gpu_bfyx_1x1",2],
+        "7817036102984218692": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7657964685067862984": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "3198726093355425150": ["convolution_gpu_bfyx_gemm_like",2],
+        "9322808125154719434": ["convolution_gpu_bfyx_gemm_like",0],
+        "13850807749756445264": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "16113766751106329485": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "10462144647439624978": ["convolution_gpu_bfyx_gemm_like",2],
+        "5424164608102708333": ["convolution_gpu_bfyx_gemm_like",2],
+        "9895036366054127607": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6730474465453860479": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "4107088111454348836": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "9339038855869763548": ["convolution_gpu_bfyx_gemm_like",1],
+        "15875968032394961531": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5244441996055494170": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "7335403151694644211": ["convolution_gpu_bfyx_gemm_like",1],
+        "15737542477498282367": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9660587580162063066": ["convolution_gpu_bfyx_gemm_like",1],
+        "123132396286232401": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1682486914760867977": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "10838721873837128971": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "8145385916241200820": ["convolution_gpu_bfyx_gemm_like",2],
+        "17994361454416813294": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11864780937861562358": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "17172842643607718498": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2754879558245728361": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15260010680436431377": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3242468066266096173": ["fully_connected_gpu_fb_oi_ref",2],
+        "8413117662038329068": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3723082283919334922": ["convolution_gpu_bfyx_gemm_like",1],
+        "11258182961445417799": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10340099951904598712": ["convolution_gpu_bfyx_gemm_like",1],
+        "9198752981132674942": ["convolution_gpu_bfyx_gemm_like",2],
+        "2467535554409643460": ["convolution_gpu_bfyx_gemm_like",1],
+        "2748579123295571094": ["convolution_gpu_bfyx_gemm_like",2],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",1],
+        "6768451741770053089": ["convolution_gpu_bfyx_gemm_like",2],
+        "3046878786712386934": ["convolution_gpu_bfyx_gemm_like",2],
+        "9351428703239678614": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11290558687608213321": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1502236537645808646": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12767115494378788592": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "15432337846778101995": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "10049329759351957685": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17006095064160484022": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "1482319750326346549": ["convolution_gpu_bfyx_gemm_like",2],
+        "14744368497944610864": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17602810216393274602": ["convolution_gpu_bfyx_gemm_like",2],
+        "8529571293598502239": ["convolution_gpu_bfyx_gemm_like",2],
+        "11726125778063855770": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "6275318358833298854": ["convolution_gpu_bfyx_gemm_like",2],
+        "10175721494218314250": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11868789283464117390": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "3527012447011885981": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "13891498649894490342": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "12568071362640409835": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3436576388124386308": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "18369668865072009928": ["convolution_gpu_bfyx_gemm_like",1],
+        "10279778381617181802": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "12965552570525926289": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "318377908569897093": ["convolution_gpu_bfyx_gemm_like",2],
+        "889943986793446284": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "15129201859573664210": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "7198242727502284570": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7826714904736870517": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5702807185231177394": ["convolution_gpu_bfyx_gemm_like",2],
+        "4585891362157592384": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "8503207028307570404": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "2788116002380533417": ["convolution_gpu_bfyx_gemm_like",1],
+        "9514210061704584354": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15660316437768312006": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "9763310312421884308": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17357800564047774826": ["convolution_gpu_bfyx_gemm_like",2],
+        "15696133206063951076": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "12814676907278614920": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "28534640470354264": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "10190532901392055501": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "13659291428095454839": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "14385181780082014495": ["convolution_gpu_bfyx_gemm_like",2],
+        "13483407708449667171": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "7994179151788368291": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "1720791539242542292": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "14673890892774965970": ["convolution_gpu_bfyx_gemm_like",2],
+        "3041612155708729812": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "1641881628032037384": ["convolution_gpu_bfyx_gemm_like",1],
+        "12962552332511702682": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "8153567933591966877": ["convolution_gpu_bfyx_gemm_like",1],
+        "5912451559447635837": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "10783981060353445280": ["convolution_gpu_bfyx_os_iyx_osv16",57],
+        "830199932582554906": ["convolution_gpu_bfyx_gemm_like",1],
+        "17144223055397369799": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12089505956882731481": ["convolution_gpu_bfyx_gemm_like",2],
+        "8655883535274781128": ["convolution_gpu_bfyx_gemm_like",2],
+        "4408772370026995920": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "13599555566632152241": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10599639229366933472": ["convolution_gpu_bfyx_os_iyx_osv16",273],
+        "16761856644242716357": ["convolution_gpu_bfyx_os_iyx_osv16",843],
+        "152263592822875549": ["convolution_gpu_bfyx_gemm_like",2],
+        "352808518345312040": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "11674630830833831209": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13184662326021747000": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12854110364457722483": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "4887402175773881313": ["convolution_gpu_bfyx_gemm_like",2],
+        "4524347845016978037": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4034250407843183678": ["convolution_gpu_bfyx_gemm_like",1],
+        "4209610989252810404": ["convolution_gpu_bfyx_gemm_like",1],
+        "17221958812979739319": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "2008064690158516711": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10451904743064959757": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "13728180355108851541": ["convolution_gpu_bfyx_gemm_like",2],
+        "3098585338129539028": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "3892873577927627992": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14463506867389575739": ["convolution_gpu_bfyx_gemm_like",1],
+        "7527121935101118719": ["convolution_gpu_bfyx_gemm_like",2],
+        "13764532551476584909": ["convolution_gpu_bfyx_gemm_like",1],
+        "15287650965861631130": ["convolution_gpu_bfyx_gemm_like",2],
+        "5981885264666023260": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12992194515157698316": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "13926730608213207277": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13226254161087770253": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15916505622570323098": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12166710900466116000": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6210483922262161762": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7509199936979430017": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11979910991788695837": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "7833280896841707248": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6812025576584060234": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12181607120522804433": ["convolution_gpu_bfyx_os_iyx_osv16",311],
+        "1878953827218615252": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15227034948424983496": ["convolution_gpu_bfyx_gemm_like",1],
+        "2054895351334936744": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "15765198153800696060": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "16071723603031305677": ["convolution_gpu_bfyx_gemm_like",2],
+        "14281201038135286621": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "14652791434312888296": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "5553779954745929430": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "8616686489737649890": ["convolution_gpu_bfyx_os_iyx_osv16",942],
+        "16907043223873231356": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "9421927854269492263": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "2959008804873881193": ["convolution_gpu_bfyx_gemm_like",1],
+        "331490096600171689": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "16202841384048331166": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "5922243230245842969": ["convolution_gpu_bfyx_gemm_like",1],
+        "8488789346759658706": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4810979456269693700": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "13770716520774847938": ["convolution_gpu_bfyx_gemm_like",1],
+        "9399994156762372761": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "4862529593282936100": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "9600125229193280365": ["convolution_gpu_bfyx_gemm_like",2],
+        "13337122303005980542": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "6740385846687754849": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "11682323163346544125": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "1504867045084152953": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4995510103045767117": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14243609293683870669": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "11491172180673411322": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "8376077531098664520": ["convolution_gpu_bfyx_gemm_like",1],
+        "7035625231891242247": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7678168522030142454": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8841627473398015595": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "9493034132406318197": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "16197538586133639338": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "7505608160068471520": ["fully_connected_gpu_fb_io_ref",2],
+        "5482851829165191681": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "14352303529756685990": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "3106922888635965020": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4886289616235149731": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14755869345266103764": ["fully_connected_gpu_fb_oi_ref",2],
+        "3277243911383750280": ["convolution_gpu_bfyx_gemm_like",1],
+        "12630173933512965589": ["convolution_gpu_bfyx_gemm_like",2],
+        "15485701086886851362": ["convolution_gpu_bfyx_os_iyx_osv16",923],
+        "11439519952236570490": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14017106221778585861": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "2816982827037092536": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "6763373100985812924": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "8737417433314100353": ["convolution_gpu_bfyx_gemm_like",2],
+        "2303141161423252932": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "10412748832841674068": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15083602050538795803": ["convolution_gpu_bfyx_gemm_like",2],
+        "11733721371402545268": ["fully_connected_gpu_fb_io_ref",1],
+        "2572395498687401679": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "11462462742322068863": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "11640865562390693266": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "12819626280531787705": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2504018828500488106": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "1362540464632328798": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12791525533856308302": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "13993319023992950944": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10485534959656860449": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "9552312946391901745": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "6673690359191617215": ["fully_connected_gpu_fb_oi_ref",2],
+        "3120885087070223590": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6277198010392189880": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "4434505319447395291": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13821388909343378606": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3682813162987778705": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "10237524128771958432": ["convolution_gpu_bfyx_gemm_like",2],
+        "11882388384272635526": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5251771557248725731": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15929361440504489924": ["convolution_gpu_bfyx_os_iyx_osv16",424],
+        "9940908487812223059": ["convolution_gpu_bfyx_gemm_like",2],
+        "2085738943081638802": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "7111620180131341264": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7748357850995979651": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4830121683809417143": ["convolution_gpu_bfyx_os_iyx_osv16",929],
+        "16720108310653948550": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16477108783154865570": ["convolution_gpu_bfyx_gemm_like",2],
+        "6121673167888047110": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "12279591818557049086": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "13300022131572486202": ["convolution_gpu_bfyx_gemm_like",2],
+        "9492331996847106233": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17358006976602795707": ["convolution_gpu_bfyx_gemm_like",2],
+        "10900962238463588974": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "11717348577195224554": ["convolution_gpu_bfyx_gemm_like",2],
+        "5398895598407183682": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15096978026328154490": ["convolution_gpu_bfyx_gemm_like",2],
+        "15213473731205734586": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "15092483859565823523": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "16488426854651696706": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "14682894856346977838": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "6025872155179042054": ["convolution_gpu_bfyx_gemm_like",2],
+        "11452661262277158611": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3266638956600784732": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2423162087154134021": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "8827683910847407160": ["convolution_gpu_bfyx_gemm_like",2],
+        "13353269683286187221": ["convolution_gpu_bfyx_gemm_like",0],
+        "11852328241822224147": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "4593862318851730430": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "14823616678465136590": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5055133356846736609": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "12972406304361050136": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "8965747921518186477": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "107527758399960384": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "1316444335300814745": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "425744529089575241": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "5303170164698694791": ["fully_connected_gpu_bf_io_gemm",2],
+        "16071030448801649281": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "3087295384028350107": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3558174319433648829": ["convolution_gpu_bfyx_gemm_like",2],
+        "277410555520090949": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15117830538655814853": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "3991584206721185508": ["fully_connected_gpu_fb_oi_ref",1],
+        "12310462218432530363": ["convolution_gpu_bfyx_gemm_like",2],
+        "7879588938300868891": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "10869059995205753062": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "17279975778400757791": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "11831092915967558428": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "14097319816812992451": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "11025455960289445816": ["convolution_gpu_bfyx_os_iyx_osv16",482],
+        "8215519118071138614": ["convolution_gpu_bfyx_gemm_like",2],
+        "91915122883128106": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "15929970324703663357": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13232269620066140073": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "11523864029587161089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8090497202997192142": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "17113350507039887381": ["convolution_gpu_bfyx_gemm_like",2],
+        "13990028451169604107": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1034911525083515252": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "4217179485243909459": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8793779433658187978": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10754321688472707825": ["convolution_gpu_bfyx_gemm_like",1],
+        "15217183882858251099": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8320522112821700316": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "1396516976059964423": ["convolution_gpu_bfyx_os_iyx_osv16",752],
+        "8398760317387811024": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "17540928447332229457": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13077961697656030315": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6613282637922219205": ["convolution_gpu_bfyx_gemm_like",1],
+        "7460672405409009037": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "1573498199681662714": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "6447357750120537934": ["convolution_gpu_bfyx_gemm_like",2],
+        "12014527187730671229": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "6413565827738894970": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "4344644499804057502": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8027062545185940933": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16958661630307271135": ["convolution_gpu_bfyx_gemm_like",1],
+        "18187262802267413585": ["fully_connected_gpu_fb_oi_ref",2],
+        "615833743936753727": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16894871557229780934": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "2020044486043617858": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7264756313770306662": ["convolution_gpu_bfyx_gemm_like",2],
+        "4479979951990338510": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7730305811644972643": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "9350073350568836719": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5734909305243135224": ["convolution_gpu_bfyx_gemm_like",1],
+        "14520482703619969447": ["fully_connected_gpu_fb_io_block_fp16",0],
+        "5103094815475470596": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10809330882739297269": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17802514063213000148": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "2947753291378607664": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4945845875046545967": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "1112828128944231163": ["convolution_gpu_bfyx_gemm_like",2],
+        "17025268985366223779": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "15592248516895826924": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14173804995472477932": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4624363818743696582": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "8689463522180659045": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "11907507085694711513": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6553736978928374036": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "16566714514564722975": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11393439616752806572": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3789890554711038921": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10682918518101379579": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "61390148213644186": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "8234878941966364642": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15372944709956866587": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "6062246008880097669": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "13311560756985319232": ["convolution_gpu_bfyx_gemm_like",2],
+        "16835545111241063900": ["convolution_gpu_bfyx_gemm_like",1],
+        "3853138649112340419": ["convolution_gpu_bfyx_gemm_like",2],
+        "7802311886554362782": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "10647227605517025377": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "7606716827635769887": ["convolution_gpu_bfyx_gemm_like",1],
+        "789359733867650915": ["convolution_gpu_bfyx_gemm_like",1],
+        "6578239603654034233": ["convolution_gpu_bfyx_os_iyx_osv16",122],
+        "6012477132351580695": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "155988420513611659": ["convolution_gpu_bfyx_gemm_like",2],
+        "10025893052937028511": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "475665035119038846": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12307446289692143781": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13410178186827874638": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "16896863928108200897": ["convolution_gpu_bfyx_gemm_like",2],
+        "2103507679502667581": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "8295066904650070896": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "10110395703775498948": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "4774186037059137781": ["convolution_gpu_bfyx_1x1",2],
+        "7187734276051878356": ["convolution_gpu_bfyx_gemm_like",2],
+        "15858485865603722138": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "14131851237755716991": ["convolution_gpu_bfyx_gemm_like",2],
+        "14911763273270477925": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17089332981370803321": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "12615462894236933223": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "11026432639515866259": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "16992620579546408448": ["convolution_gpu_bfyx_os_iyx_osv16",128],
+        "15310138877321331399": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "10111038481447198008": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7323343770209750835": ["convolution_gpu_bfyx_gemm_like",1],
+        "13401926003864565026": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "4161001033681779582": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14046114605615338907": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "8403919905230540356": ["fully_connected_gpu_fb_io_ref",2],
+        "11757953304204716753": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "4783126652984096700": ["convolution_gpu_bfyx_os_iyx_osv16",1048],
+        "2912858944747613525": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "13599438824699346708": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "18057258413318190788": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7211355951470869591": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8566695253227825439": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7638626850074132214": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8697631439739291302": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "15088446688058274991": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "1849035883815257432": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "13817553830305981296": ["convolution_gpu_bfyx_gemm_like",2],
+        "1605295763358374504": ["convolution_gpu_bfyx_gemm_like",2],
+        "3600066510593746268": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "1258881146411114485": ["convolution_gpu_bfyx_gemm_like",2],
+        "13797057152042581440": ["convolution_gpu_bfyx_gemm_like",1],
+        "14541063954080306476": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "12608653044712562811": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "18076121920579110076": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4831224999851230245": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6531171505861182429": ["convolution_gpu_bfyx_os_iyx_osv16",758],
+        "2660620513253264815": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "11264412030568042996": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6093575518270471235": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3317498303952226642": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "4264078972561407296": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "5983162283897982344": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "2838789360952219092": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "2839370555757225469": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12465040766199807760": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11549611099429682170": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "17869928048344193660": ["fully_connected_gpu_fb_io_ref",2],
+        "10462203417605590793": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4569416043426963318": ["convolution_gpu_bfyx_gemm_like",1],
+        "12818953631784587919": ["convolution_gpu_bfyx_gemm_like",1],
+        "15622339218175336908": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "4669204329917622837": ["convolution_gpu_bfyx_gemm_like",1],
+        "7215460815798365056": ["convolution_gpu_bfyx_gemm_like",2],
+        "10650242500904186542": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "5378151578014945610": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6355678392953568007": ["convolution_gpu_bfyx_gemm_like",2],
+        "7627882727285402176": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4542143431130171516": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4947961640303581107": ["convolution_gpu_bfyx_gemm_like",2],
+        "14670952132900619664": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "390219891876240081": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5230406405159608187": ["convolution_gpu_bfyx_os_iyx_osv16",187],
+        "12711366212612147422": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "3704271978133986620": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "9367157746678824712": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11213667690594303395": ["fully_connected_gpu_fb_io_ref",1],
+        "8104522072297740079": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9391102514951576629": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8109572327736409899": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9035867067423437834": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6232596685071671579": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "1334121138243951086": ["convolution_gpu_bfyx_gemm_like",2],
+        "8482147530539941792": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "9287404618748313247": ["convolution_gpu_bfyx_gemm_like",2],
+        "8307147375351882939": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10554266898346470422": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14719871224178118299": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9391425117463100557": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5192552432194195116": ["convolution_gpu_bfyx_gemm_like",2],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",2],
+        "8860815977851486767": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "1501328995320618233": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11725629762660987217": ["convolution_gpu_bfyx_gemm_like",1],
+        "8531836171622495872": ["convolution_gpu_bfyx_gemm_like",1],
+        "14045907210413991971": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "8171897258557801015": ["convolution_gpu_bfyx_gemm_like",2],
+        "11092828091552833150": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "13142183299783041623": ["convolution_gpu_bfyx_gemm_like",1],
+        "8578747191812631883": ["convolution_gpu_bfyx_gemm_like",2],
+        "7606728651572102823": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "9052153145556623933": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "12163456975896925619": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "77073286362822723": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "755157892988514864": ["convolution_gpu_bfyx_os_iyx_osv16",199],
+        "7770438611007743835": ["fully_connected_gpu_fb_io_block_fp16",1],
+        "4347494599650425733": ["convolution_gpu_bfyx_gemm_like",1],
+        "10433456687054381828": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10182490653383265979": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "13948873105076070952": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "5797243082477551421": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "16358588755272162237": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "10412588668458621135": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "14089893422771228191": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7508931961595339477": ["convolution_gpu_bfyx_gemm_like",1],
+        "11263725357444590346": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "15650839696475698676": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4185477435943946730": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "12141880589558027223": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "11145411572841972268": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13473730516782884152": ["convolution_gpu_bfyx_gemm_like",0],
+        "14951164724050668856": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "13359643347682243944": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "2932953010695506533": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3159147743553063163": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "18116824232149703772": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4658091014944825771": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "15016406041863758148": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "17921616427936768657": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "14407614314124529121": ["convolution_gpu_bfyx_gemm_like",1],
+        "13624106485902414324": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8870736106637803783": ["convolution_gpu_bfyx_os_iyx_osv16",385],
+        "384240534894352154": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "12650986929262866534": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "17514082938765137629": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1892198178635468999": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "363330365598760149": ["convolution_gpu_bfyx_gemm_like",1],
+        "12569856169024791306": ["convolution_gpu_bfyx_gemm_like",2],
+        "11091004452522208782": ["convolution_gpu_bfyx_gemm_like",2],
+        "17242442529374722270": ["fully_connected_gpu_fb_oi_ref",1],
+        "17546566148752689536": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "9649533822873928984": ["convolution_gpu_bfyx_gemm_like",1],
+        "7463657272687673896": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "16108759090923335184": ["convolution_gpu_bfyx_gemm_like",2],
+        "54975980454651672": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "1751540546502480266": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2438374917504708831": ["convolution_gpu_bfyx_gemm_like",1],
+        "2705534741438659581": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "4072967257556128157": ["convolution_gpu_bfyx_gemm_like",2],
+        "8961544327690568390": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8473962320928461448": ["convolution_gpu_bfyx_gemm_like",2],
+        "16170708786673864371": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "18424400171776141118": ["convolution_gpu_bfyx_gemm_like",2],
+        "7000326048755427076": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11298638173197050575": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",490],
+        "2338535084014610258": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "346832567535597247": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "9031338938030715616": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1200162031019105686": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "10320711719466983961": ["convolution_gpu_bfyx_gemm_like",2],
+        "13614921331048223116": ["convolution_gpu_bfyx_gemm_like",1],
+        "3526580286148537369": ["convolution_gpu_bfyx_os_iyx_osv16",1098],
+        "7086574330273897976": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "11062100629646715785": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14885109535362957947": ["convolution_gpu_bfyx_os_iyx_osv16",592],
+        "6148794431848761670": ["convolution_gpu_bfyx_gemm_like",2],
+        "2150326211917340956": ["convolution_gpu_bfyx_gemm_like",2],
+        "17967188184891337660": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11234976958917093838": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "17419610762909854340": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2704063557078535883": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "7877332346656934022": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "13836867092941506302": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "282581251783414872": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "2053428297205345660": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14742909697076926475": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "15882969506682501496": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "11715731071598552513": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1967810052096853804": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "340606466693982406": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3088402690095697589": ["convolution_gpu_bfyx_gemm_like",1],
+        "5294364781478821403": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "844278648549884313": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17421991623849671076": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like",2],
+        "11560441698542238940": ["convolution_gpu_bfyx_os_iyx_osv16",480],
+        "15890473622821659630": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7020743056013297476": ["convolution_gpu_bfyx_gemm_like",2],
+        "598390166442977699": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "879005904827468163": ["convolution_gpu_bfyx_os_iyx_osv16",13],
+        "11862162783632998191": ["convolution_gpu_bfyx_gemm_like",2],
+        "2248628426797793532": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "11868419561534906809": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6739799137687789012": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4463585976112702040": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "15929295825192449880": ["convolution_gpu_bfyx_gemm_like",2],
+        "17665874097707161453": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4872433441839808585": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "9657585348407617520": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "3917482908041199389": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "18341524156838963264": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "12526417587678222534": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "10914336346597505098": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16461809076899645037": ["convolution_gpu_bfyx_os_iyx_osv16",9],
+        "5509852360472061267": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17269318621094624075": ["convolution_gpu_bfyx_gemm_like",1],
+        "5088898934670078153": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13624969243174329965": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4634475069086874260": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9305758766575321575": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "697609699740088622": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "11885660439698926227": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10462797712860969072": ["convolution_gpu_bfyx_gemm_like",2],
+        "13551767519605460627": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4868400250190558111": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "13051342120933385671": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "16945184617367657570": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "7162701010394257343": ["convolution_gpu_bfyx_gemm_like",2],
+        "13660015013041074867": ["convolution_gpu_bfyx_gemm_like",2],
+        "5644068493155655611": ["convolution_gpu_bfyx_gemm_like",2],
+        "4206637285289830669": ["convolution_gpu_bfyx_gemm_like",2],
+        "7157499157310356912": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5930451476167223501": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2881769839926594784": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "11362244289696496732": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8938942439963723596": ["convolution_gpu_bfyx_gemm_like",1],
+        "220326805056361171": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "13286723666743148654": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "11992353959766718397": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "11469881811044037340": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15112118829970177073": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "11754316727756881612": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "17683350638672326642": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "8479958930889587809": ["fully_connected_gpu_yxfb_ref",2],
+        "3173044753177123454": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "9220830217525628783": ["convolution_gpu_bfyx_gemm_like",2],
+        "7693556065684619275": ["convolution_gpu_bfyx_os_iyx_osv16",177],
+        "10160082844961863335": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "1828547823690389920": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "13336847303794450665": ["convolution_gpu_bfyx_gemm_like",2],
+        "2054100643811117871": ["convolution_gpu_bfyx_gemm_like",2],
+        "6899658518070473523": ["convolution_gpu_bfyx_gemm_like",2],
+        "11931909191490706784": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "1383899865465106141": ["convolution_gpu_bfyx_gemm_like",2],
+        "16996022503617157059": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "3138712043201001156": ["convolution_gpu_bfyx_gemm_like",1],
+        "17790954200356837750": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "14786904599410885158": ["convolution_gpu_bfyx_os_iyx_osv16",92],
+        "1435153323458789173": ["convolution_gpu_bfyx_gemm_like",2],
+        "875552069535001284": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "17420288204511371476": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1650519167046658780": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "17939745299931100048": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "18137301493811026488": ["convolution_gpu_bfyx_gemm_like",1],
+        "2281119269283845320": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "1436830013293669148": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "2479856511929768548": ["convolution_gpu_bfyx_gemm_like",2],
+        "8067518815436853042": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "14098084847097251914": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "15922076723067110929": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "1204640737451377030": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "1521992965089360209": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "7441139786825555264": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "12924910330295852704": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "10446500827044060319": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "2406816735581074778": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5219399418946822456": ["convolution_gpu_bfyx_gemm_like",2],
+        "15891662883560480723": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "1056494963618130644": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "5825664545247017348": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6680219899975628258": ["convolution_gpu_bfyx_os_iyx_osv16",569],
+        "1922168904767469999": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10482582307328548806": ["convolution_gpu_bfyx_os_iyx_osv16",379],
+        "7671016314869993705": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1827273736951105482": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "13647773816638053437": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17331582127656317117": ["convolution_gpu_bfyx_gemm_like",2],
+        "11988463489006787939": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4457404272076798129": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3816139128011494515": ["convolution_gpu_bfyx_gemm_like",2],
+        "7524311370696987092": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14704939880642470064": ["convolution_gpu_bfyx_gemm_like",1],
+        "12534755422857294243": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "369250798206414410": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "15675968397825708285": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "5656623709782744241": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "8360628955300060520": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "12150384018379393131": ["convolution_gpu_bfyx_gemm_like",2],
+        "3930526618478171342": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "7410220112400588068": ["convolution_gpu_bfyx_gemm_like",2],
+        "4563529605364580848": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "13613399861925108148": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "11910060331768652144": ["convolution_gpu_bfyx_gemm_like",2],
+        "11327867170377736609": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7053070767227498983": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "15257886319670476581": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "68637843533109734": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1145700078649932035": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "10109431802089940590": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "9876098429582714576": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14454927839795553295": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "8129414331584785189": ["convolution_gpu_bfyx_gemm_like",2],
+        "2904162348196990593": ["convolution_gpu_bfyx_gemm_like",1],
+        "5420766967862917815": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "1077224320045437593": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "13761566845514364807": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "581553908799266285": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "13869279315296163696": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "13004055504657277105": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13140254055376365092": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16906866971084527970": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "18156747282906367814": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "18355551625040856531": ["convolution_gpu_bfyx_gemm_like",1],
+        "6256217572152039230": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "10087048842366891699": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "16801553481899627402": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "4554343896877444783": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "8510044123592842725": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "9639125104707961956": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "5136111979773513341": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "5079055505117153635": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "16886045176231683312": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "12526627889432649075": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "5163641718529821203": ["convolution_gpu_bfyx_gemm_like",2],
+        "12707946849050970702": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5511347850693802982": ["fully_connected_gpu_fb_io_b8_f8_vload",0],
+        "3806791682244402910": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12676139447729343679": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "16450345154125804290": ["convolution_gpu_bfyx_os_iyx_osv16",197],
+        "1616603916015535857": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "2322559721899919275": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "16103943009195163681": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "5312140481706133684": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "15777551868644801538": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "12796777049340516563": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5385395378424322451": ["convolution_gpu_bfyx_gemm_like",2],
+        "4917595053453614536": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "6537576410448334203": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3768977479127609228": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "15640202505592598653": ["convolution_gpu_bfyx_gemm_like",2],
+        "10857084376518292379": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10551742525038893508": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "270573524496930135": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "578703329577922869": ["convolution_gpu_bfyx_os_iyx_osv16",190],
+        "1104098779103065492": ["convolution_gpu_bfyx_gemm_like",1],
+        "8243230863677884952": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "17302671258991071440": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "3304589333915676807": ["convolution_gpu_bfyx_gemm_like",2],
+        "8655525088525612583": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "15711618559677233865": ["convolution_gpu_bfyx_gemm_like",2],
+        "7813041847979170166": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17184638213817814424": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6432519735121751346": ["convolution_gpu_bfyx_gemm_like",1],
+        "11546295514640813785": ["convolution_gpu_bfyx_gemm_like",2],
+        "13959998803881264899": ["convolution_gpu_bfyx_gemm_like",2],
+        "481328129206881674": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "14670068483447729857": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "18174857480705846286": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "11409066626289209846": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1128944012801956636": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "9397711809671506538": ["convolution_gpu_bfyx_os_iyx_osv16",106],
+        "12386930130408773521": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3027775502561362722": ["convolution_gpu_bfyx_gemm_like",1],
+        "2778141440914991349": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "18379763351534914922": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "15781622938833984014": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8127853538569353431": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11728824117049687850": ["convolution_gpu_bfyx_gemm_like",2],
+        "1766961036311612128": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6261121070004228939": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "3191047205441946466": ["convolution_gpu_bfyx_gemm_like",2],
+        "5629582391075745771": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "10392297152843428925": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "14741012384358891350": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "7953340333870774815": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5805383505505929391": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6262190151863459214": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "8108939799996498955": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "11223947043157461994": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2352142833866194508": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10784073615329190425": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "4370628494554426971": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "11539652577193034099": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "17517495652165026573": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "11622271315873664622": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "6525052296614701517": ["convolution_gpu_bfyx_gemm_like",1],
+        "3300655231758263066": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "13802834658447955377": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "16170237673140354764": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11054953301882177295": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "14159293183840880884": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "7311120574972466702": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16865879032845300007": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3889519976910355277": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "5609922876429907954": ["convolution_gpu_bfyx_gemm_like",2],
+        "8415763978601237333": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "3638987901025418036": ["convolution_gpu_bfyx_gemm_like",1],
+        "6674643031068271417": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "470101933740495567": ["convolution_gpu_bfyx_gemm_like",2],
+        "5219818570070061892": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16453041919970581620": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "4660288622381620227": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "1452597292381229708": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "18171940644650760608": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "310584224049735004": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "1006721963560645335": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17843570854284772921": ["convolution_gpu_bfyx_gemm_like",2],
+        "10632020369698615114": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "8863731258634577277": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6418748992581951435": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "17160915544701715607": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "2215533237231530097": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11208787273440167590": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5906712613621491207": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7289594989625385620": ["convolution_gpu_bfyx_gemm_like",0],
+        "2191939052196737757": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5343186686923330871": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "15228614030349540878": ["convolution_gpu_bfyx_gemm_like",2],
+        "13083412418930786217": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15381551674482810230": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "5955810688179557560": ["convolution_gpu_bfyx_gemm_like",2],
+        "8509882139595784161": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11357813056434049302": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "17230103497915224469": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "8337457116169698090": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10222020393925339442": ["convolution_gpu_bfyx_gemm_like",2],
+        "11446745541571732900": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11070620435959083971": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "18190085718345933756": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2043990557089419633": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9679023228597590356": ["convolution_gpu_bfyx_gemm_like",2],
+        "16748743818537812349": ["convolution_gpu_bfyx_gemm_like",2],
+        "18167100055915766856": ["convolution_gpu_bfyx_gemm_like",2],
+        "9400558994532871122": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "713121569924250372": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "14659204578478669831": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10784905418636316601": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "2270733937722366926": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "15563546888345388359": ["convolution_gpu_bfyx_gemm_like",2],
+        "16935426150666181858": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "1545105800386716684": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "3240428557350945267": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4272417312859966238": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12018398218876712811": ["convolution_gpu_bfyx_gemm_like",1],
+        "2324120381399737261": ["convolution_gpu_bfyx_os_iyx_osv16",522],
+        "13947140171097868740": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "709835724029986012": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5864250949922222051": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "3811325657214369711": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "16559140502701231107": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9608148784787572220": ["convolution_gpu_bfyx_gemm_like",1],
+        "9810904714798127155": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4208026832369242882": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2008999755215725290": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6878922067845522655": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "3012566432840424198": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "13297875917250935192": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7848121247546147821": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "5353170440534073482": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "16582080251500644069": ["convolution_gpu_bfyx_gemm_like",2],
+        "16715151641337602113": ["convolution_gpu_bfyx_gemm_like",2],
+        "9289375071420565548": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "70244312667395170": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3332334993503432420": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "6854611304056079417": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "16385712633367611786": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "17626938391567407401": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "5933483880333895572": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "8942548644169090240": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "13810995219720233595": ["convolution_gpu_bfyx_gemm_like",2],
+        "9883719542550391149": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "5170245731599664670": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "11098189888598804624": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "5688161172644782612": ["convolution_gpu_bfyx_gemm_like",2],
+        "9979259596137305973": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "4597954342704466825": ["convolution_gpu_bfyx_gemm_like",1],
+        "12325592439309417414": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "6303682540621797774": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "6491772898618671653": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3069726952591207961": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "8101977280003030465": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "17355826643208208691": ["convolution_gpu_bfyx_gemm_like",2],
+        "10046663998164493552": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10117784802089387496": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15334195300678132907": ["fully_connected_gpu_bf_io_gemm",2],
+        "9937387440035377216": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "9954853231955573552": ["convolution_gpu_bfyx_1x1",2],
+        "3594327736281012643": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "5433618404351968121": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13441117085490814804": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "10168272404395268951": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17016846635668370921": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17508987219281192918": ["convolution_gpu_bfyx_gemm_like",2],
+        "8964252048679144533": ["convolution_gpu_bfyx_gemm_like",2],
+        "8260130048649729185": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17190698921280188790": ["convolution_gpu_bfyx_gemm_like",2],
+        "13973028408397200796": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "116291934148608396": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "10906417366145323499": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "3953213564511738847": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "13358283026528078900": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "2173649669339714890": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "12517838703662330663": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "15509845164085518352": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "10397253349562394184": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "1552088062654417187": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "17286180622990393912": ["convolution_gpu_bfyx_gemm_like",2],
+        "15126660425728872065": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8035084960535483680": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4239415134522959352": ["convolution_gpu_bfyx_gemm_like",2],
+        "844742962836593299": ["convolution_gpu_bfyx_os_iyx_osv16",250],
+        "7505966294864890221": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "8306931146242110738": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "11674725184029885494": ["convolution_gpu_bfyx_gemm_like",1],
+        "6245361626768537926": ["convolution_gpu_bfyx_gemm_like",2],
+        "2235210915304938149": ["convolution_gpu_bfyx_gemm_like",2],
+        "10691347880912431064": ["convolution_gpu_bfyx_gemm_like",2],
+        "17610648476343170476": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "10309083227104422150": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "15609627722687211129": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8549811622247170014": ["fully_connected_gpu_fb_oi_ref",2],
+        "11362615856022848825": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16323870023648254366": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "15118142492742177336": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8860685325047463026": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14545322358931928911": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "7483972013701858698": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7584912988728072414": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "139367204458861048": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8264178890341675354": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "8650948093564284852": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9358320688298379206": ["convolution_gpu_bfyx_gemm_like",1],
+        "6638696743420807294": ["convolution_gpu_bfyx_gemm_like",2],
+        "10433541468308381909": ["convolution_gpu_bfyx_gemm_like",2],
+        "2507750416500565780": ["convolution_gpu_bfyx_1x1",2],
+        "18202222342562516071": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "9068406831482072377": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "41250455178236256": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "9245770108138984525": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "863952266514375915": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "10603542859148554015": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "1372939511728986224": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5040944983588288886": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "1655841524658081889": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7875272450497189442": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "6335628260431943016": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7905503566052181015": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "16283197954769879909": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "12015922610963701033": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "5457559128595532093": ["convolution_gpu_bfyx_gemm_like",1],
+        "16613907066461513431": ["convolution_gpu_bfyx_gemm_like",2],
+        "5902427784683046762": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6418327009347170687": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "5989664002046950385": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "9101571410887509600": ["convolution_gpu_bfyx_gemm_like",1],
+        "13839075443229327158": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6224167817672480442": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14812010622304650503": ["convolution_gpu_bfyx_gemm_like",2],
+        "16934386540875904239": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "13121196588092064246": ["convolution_gpu_bfyx_gemm_like",2],
+        "6352796762984487375": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4800587664660105589": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "15963038745470172423": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "9429586951778813053": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "14729854278671832528": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "1811357700607919311": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6419580456182610836": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "18434406492564982566": ["convolution_gpu_bfyx_gemm_like",2],
+        "8942221095468681112": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6636049821584137799": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "8641167903508739082": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "6753857156025715321": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "2873387231297790075": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "17123153447808465303": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "5485971317082563152": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "10689303050557631712": ["convolution_gpu_bfyx_gemm_like",2],
+        "3416059550012678486": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "18395970344992997862": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "5556023021504556658": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2382194958531920812": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "18310667924071639899": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "15825993019555657125": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "4818231379191523896": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "907233163535348999": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "330278641539729021": ["convolution_gpu_bfyx_gemm_like",2],
+        "14301661367597749567": ["convolution_gpu_bfyx_gemm_like",2],
+        "6351924049625723579": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10726830507311062380": ["fully_connected_gpu_fb_io_ref",2],
+        "2094213523530180653": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "2664944425727769475": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "16954232936536653281": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "3187628264815974849": ["convolution_gpu_bfyx_os_iyx_osv16",1032],
+        "999907268780362316": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "3649980610274946512": ["fully_connected_gpu_fb_io_ref",1],
+        "12675313398314286884": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6089202061701179659": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "13085261987388297912": ["convolution_gpu_bfyx_gemm_like",2],
+        "4650645000018045553": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "12277537216735931250": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3859314295530377028": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "578940134826172063": ["convolution_gpu_bfyx_gemm_like",2],
+        "433161293684647032": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "6660221471357497741": ["convolution_gpu_bfyx_gemm_like",2],
+        "14625389915334622267": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "15883541155556528149": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "7400370437512056636": ["convolution_gpu_bfyx_gemm_like",2],
+        "8665233719288454405": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "9199198661789368378": ["convolution_gpu_bfyx_gemm_like",2],
+        "16620032793356620588": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "12085208566397959149": ["convolution_gpu_bfyx_gemm_like",2],
+        "4718568664715549075": ["convolution_gpu_bfyx_gemm_like",2],
+        "3563614453014995411": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "4304041922043496030": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "832830374368320801": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "12672995204641007004": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "969746749329671447": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "16072242340501555867": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "3816979903860227798": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "16256970928603738516": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "32035190068479388": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16042236932298055236": ["convolution_gpu_bfyx_gemm_like",1],
+        "11806105193035393795": ["convolution_gpu_bfyx_gemm_like",2],
+        "3180320769716158201": ["convolution_gpu_bfyx_os_iyx_osv16",862],
+        "16037225955601275305": ["convolution_gpu_bfyx_gemm_like",1],
+        "3034947396960425753": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "1206646015768146562": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "17301887391757619741": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "13094313253457422444": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "3896848534552901221": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15365628642332393565": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "16131448347558322280": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "7103345484511147373": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7857909522677175325": ["convolution_gpu_bfyx_gemm_like",2],
+        "17542414935564676110": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "14429081455612806819": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "14039055710777697188": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "18210370419559876426": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5617115485659763469": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "15751445344585167275": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "6439778526899109398": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17287404861045114619": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "4983880246908724272": ["convolution_gpu_bfyx_os_iyx_osv16",348],
+        "7947870656736319919": ["convolution_gpu_bfyx_os_iyx_osv16",59],
+        "10716913534741102635": ["convolution_gpu_bfyx_gemm_like",1],
+        "14784115394395151055": ["convolution_gpu_bfyx_gemm_like",2],
+        "9261867808456596636": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "14642845734482478360": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "12626014184575881530": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "4178614913813882037": ["convolution_gpu_bfyx_gemm_like",2],
+        "8282940696864401735": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13472532612464340803": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "10970459222330057357": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like",2],
+        "5524218746051008792": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "7126601602274920416": ["convolution_gpu_bfyx_gemm_like",0],
+        "13946367911927964830": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4108707041101687664": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "8779987507326777359": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1059505639883914386": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3377472614945731801": ["convolution_gpu_bfyx_gemm_like",2],
+        "1720057192283799086": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10174616678364842740": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "2780358937598873103": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1410512481031922864": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13082713280504953535": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6827316954140278736": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "4243114942173293897": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13593258537178247801": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "10917498758625273194": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "16383540667048742064": ["convolution_gpu_bfyx_gemm_like",2],
+        "5023609284081684300": ["convolution_gpu_bfyx_gemm_like",2],
+        "8837079302496539409": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "8178825467227185946": ["convolution_gpu_bfyx_gemm_like",2],
+        "2028273519579688266": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "15381014522874131924": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "15156836293519486753": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "14571528890474602715": ["convolution_gpu_bfyx_gemm_like",2],
+        "17285815901490707654": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12921171323911432795": ["convolution_gpu_bfyx_gemm_like",0],
+        "5018845267269043034": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "9475130054420979752": ["convolution_gpu_bfyx_gemm_like",2],
+        "13491221531603384511": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "568114041320772862": ["convolution_gpu_bfyx_gemm_like",2],
+        "11413890625163220846": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "2887152687927903549": ["convolution_gpu_bfyx_os_iyx_osv16",153],
+        "5573639264204952559": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "3961000444895975975": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17699579394941627848": ["convolution_gpu_bfyx_gemm_like",2],
+        "1465692634334679413": ["convolution_gpu_bfyx_gemm_like",0],
+        "3513523165606656242": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13906695412889750672": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "8361403425124294653": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12022152681602871455": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10946069941293798874": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "8309889975288645282": ["convolution_gpu_bfyx_1x1",2],
+        "13865227850818392065": ["convolution_gpu_bfyx_os_iyx_osv16",429],
+        "4679070030774970232": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "11229587372764249222": ["convolution_gpu_bfyx_gemm_like",2],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12277470820821378855": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17608288706234084973": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "8671491767142900139": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "12700008320838073774": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "15822975685755664152": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "2930702812469156271": ["fully_connected_gpu_fb_io_ref",2],
+        "14907038741687299621": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15342520770460205985": ["convolution_gpu_bfyx_gemm_like",2],
+        "11795686089670429481": ["convolution_gpu_bfyx_gemm_like",2],
+        "17018377589252417538": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2065752819810364738": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "17039095054151625163": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2446435710311724460": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10835598123347764626": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "304721598975479337": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "7713736987017889212": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "12427490329663434604": ["convolution_gpu_bfyx_gemm_like",2],
+        "3436433254188539886": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "10728212277329722684": ["convolution_gpu_bfyx_gemm_like",2],
+        "6865406633958213363": ["convolution_gpu_bfyx_gemm_like",2],
+        "13550435052563656432": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "9130971535185609293": ["convolution_gpu_bfyx_gemm_like",2],
+        "14088382963493477342": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "7908036427091174081": ["convolution_gpu_bfyx_gemm_like",2],
+        "12972634653821069685": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "12245096462203481681": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "5041111302824362529": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "13681462437496627948": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14545094765855515974": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3148053731303748054": ["convolution_gpu_bfyx_gemm_like",2],
+        "3036808833459559381": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3930314908786112883": ["convolution_gpu_bfyx_gemm_like",2],
+        "15293727142789007900": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "5585398540591396124": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4082046235109198108": ["convolution_gpu_bfyx_gemm_like",2],
+        "16027853591907232537": ["convolution_gpu_bfyx_gemm_like",1],
+        "3166885953206195915": ["convolution_gpu_bfyx_gemm_like",2],
+        "4242438539626727158": ["convolution_gpu_bfyx_gemm_like",1],
+        "6769243149577568817": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5724069285122500749": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "9239048433297419320": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "14269161473352876138": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "170594581804738255": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10175150090660795910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11804035561861841621": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13312401790608349463": ["convolution_gpu_bfyx_gemm_like",1],
+        "1904461959474455864": ["convolution_gpu_bfyx_gemm_like",2],
+        "1847170421455825520": ["convolution_gpu_bfyx_gemm_like",1],
+        "9660812093766156608": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16587387608532583713": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "10328182165125764988": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "11782514629636023633": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "14513925709624513868": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11455518069358829249": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8713639086785023623": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "15652392678782222737": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8929453032482114162": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "3191417938329385213": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10642327923162019888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15888454525088587794": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "10736915975072972467": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "17267132595546153629": ["convolution_gpu_bfyx_gemm_like",2],
+        "5078905972285278557": ["convolution_gpu_bfyx_gemm_like",2],
+        "5743482411668939203": ["convolution_gpu_bfyx_gemm_like",2],
+        "4251588408225461731": ["convolution_gpu_bfyx_gemm_like",1],
+        "16286085532892593349": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3625906783784771100": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "4402303539054523204": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "4642234334824303290": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "14767888121198814523": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "1760690277175249985": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17001502418583498926": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "8749468546606972791": ["convolution_gpu_bfyx_gemm_like",2],
+        "3714179297375678368": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4272784935990323993": ["convolution_gpu_bfyx_gemm_like",1],
+        "10205576142280465189": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17332230377845694888": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "17977676737774695825": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13663612869789682704": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16497757978901707098": ["convolution_gpu_bfyx_gemm_like",1],
+        "14853629175426765699": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6078344073564209080": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17287487062245049466": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "4554218761970822728": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "15531908897773912572": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6706802683366112205": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13387602037439694372": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2490155559809645659": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "5934532691347082124": ["convolution_gpu_bfyx_gemm_like",1],
+        "4491694127072416122": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "8122815203088327658": ["convolution_gpu_bfyx_gemm_like",2],
+        "770376597027620107": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "14309292105974991733": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "685140170576742460": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18169371857833455144": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "18373068999874730591": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "16491532291908469567": ["convolution_gpu_bfyx_gemm_like",1],
+        "17628984504073918701": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "10384537928514123040": ["convolution_gpu_bfyx_gemm_like",2],
+        "5646139101524964833": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "9758907700230386910": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2049445812114632861": ["convolution_gpu_bfyx_os_iyx_osv16",529],
+        "14910223536998380801": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "18084635102736402756": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "9968478753009937857": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "2668985670745598382": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "14433662482531248989": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8169762955969255618": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17875492671709861777": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15011504472108164173": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13066055561434178894": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7318929661124340248": ["convolution_gpu_bfyx_gemm_like",1],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",2],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "13368477378531148593": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10082542799898846504": ["convolution_gpu_bfyx_gemm_like",2],
+        "265378250397648692": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12228963567837353733": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "14554225625951128811": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "15571801737237063594": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17825953644228876369": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "16355518852513270001": ["convolution_gpu_bfyx_gemm_like",2],
+        "13610246822402943068": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "15939309688773899430": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4017163133829149027": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3518981281605476136": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "786418751322581924": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13002723770137829128": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "5522698342845820411": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "13671635457689276237": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1131384986902172221": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "17392347485675658099": ["convolution_gpu_bfyx_gemm_like",1],
+        "9241243727411869340": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16364899406120840449": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "9057036344533510776": ["convolution_gpu_bfyx_gemm_like",2],
+        "9559550404190168365": ["convolution_gpu_bfyx_gemm_like",2],
+        "18012549942299450620": ["convolution_gpu_bfyx_gemm_like",2],
+        "5513667102916409932": ["convolution_gpu_bfyx_gemm_like",1],
+        "12144421857685107073": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10526411638069090068": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "16290626406346691996": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "3752278444736105763": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16998508915819714690": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "6181272224000872375": ["convolution_gpu_bfyx_gemm_like",2],
+        "5167141379778311462": ["convolution_gpu_bfyx_gemm_like",1],
+        "9599667132406949054": ["convolution_gpu_bfyx_gemm_like",2],
+        "2251029128552117936": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "954796765467489259": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "14298701404596322580": ["convolution_gpu_bfyx_gemm_like",2],
+        "1584529435111149552": ["convolution_gpu_bfyx_gemm_like",1],
+        "14261214737408786954": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "4366168099274266975": ["convolution_gpu_bfyx_gemm_like",1],
+        "14436334357815544497": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9464448984918455020": ["fully_connected_gpu_fb_io_ref",1],
+        "16462862831307415504": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11758765408733113291": ["convolution_gpu_bfyx_gemm_like",1],
+        "2140514316203117958": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17310332946322628458": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14792528369891965810": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15465799788109255561": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6181308879301978465": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "3659996017773078064": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15808629700189777056": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12584692605608021657": ["fully_connected_gpu_fb_oi_ref",2],
+        "9042812985530274425": ["convolution_gpu_bfyx_gemm_like",2],
+        "7575634241190730697": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4298629909621573311": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "12516911293946682547": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "10578656188786691161": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "1086052166358768751": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8961138963663532667": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "17459500507201824299": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "12022980249970038824": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "14118838785256822389": ["convolution_gpu_bfyx_gemm_like",2],
+        "15411474884532403722": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10972882561062503097": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "916389941321470163": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "6100453836448514115": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1388093734262707746": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "4349976387188497685": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "2817383483458239293": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "11929531534620071758": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "8734419426540206087": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "18026468427978643933": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "6493920223660825755": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "262113403359175565": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "3086110559166474482": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "4614700272179482173": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10158184435144178161": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15515233599783472078": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "5320623021116851093": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4790599496008369129": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14592395793778583608": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "17500857407975308984": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "17078700948595127028": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "12782191856884962803": ["convolution_gpu_bfyx_gemm_like",2],
+        "788516646345239698": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14244541340756841557": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "13939763360217628282": ["convolution_gpu_bfyx_gemm_like",1],
+        "4957638663977636791": ["convolution_gpu_bfyx_gemm_like",2],
+        "3792945601873900927": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "7157064096682175957": ["convolution_gpu_bfyx_gemm_like",1],
+        "10548792624072794724": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "580936360000782237": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "7846532542186702987": ["convolution_gpu_bfyx_gemm_like",1],
+        "1452841775482537260": ["convolution_gpu_bfyx_gemm_like",2],
+        "3882955134902442387": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "10831204282620894983": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "12090536142661253835": ["fully_connected_gpu_bf_io_gemm",1],
+        "2004120786408087671": ["convolution_gpu_bfyx_gemm_like",1],
+        "3362190082518348071": ["convolution_gpu_bfyx_gemm_like",2],
+        "9696588462876533517": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "14792711236336832808": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "8485845304380573432": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8399477322910720113": ["convolution_gpu_bfyx_gemm_like",0],
+        "879939701282942121": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "2968144776497288135": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "6345550009198921347": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "6343888265369366589": ["convolution_gpu_bfyx_os_iyx_osv16",693],
+        "4839357013731987873": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "435261825003875448": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "530491406341772040": ["convolution_gpu_bfyx_gemm_like",2],
+        "14566257978356851712": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "11892210755884128272": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "11327237143350479466": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "9547451431091729288": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17252689774572814142": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "7070374681687005676": ["convolution_gpu_bfyx_gemm_like",2],
+        "3509502334639215181": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3547854341779526869": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "17303981366934280174": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7246177123265734169": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "4091001168041745125": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "10118395047539851751": ["convolution_gpu_bfyx_gemm_like",2],
+        "11951606039079763598": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8154794217037682993": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12761366575293006784": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "941829593638869991": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "12052225815821079044": ["fully_connected_gpu_fb_io_ref",1],
+        "5115134711994944288": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "16482301217529090205": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "7753336153932360422": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "11561790484526369917": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "4660214425505918397": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2903075619523363020": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "40684756725622867": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "10902108166827340970": ["convolution_gpu_bfyx_gemm_like",2],
+        "14251848023416168295": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10269005969451576527": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "14712137616211915593": ["convolution_gpu_bfyx_os_iyx_osv16",310],
+        "6821855018718422278": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "17405865057155583042": ["convolution_gpu_bfyx_gemm_like",2],
+        "14908665013877276517": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "13775529405693629438": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8240616667079698459": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "8402396502992483524": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "201277063146140086": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16094174852600023296": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "6067904130482758510": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "1187622888238643867": ["convolution_gpu_bfyx_gemm_like",2],
+        "12634802060661668222": ["convolution_gpu_bfyx_1x1",1],
+        "4563407231964979217": ["convolution_gpu_bfyx_gemm_like",1],
+        "15890492401334524258": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16164111348549092216": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "14043770215999952932": ["convolution_gpu_bfyx_gemm_like",2],
+        "2908249767551054613": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "17433037267999205350": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "1890739204389692970": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "7371339724529362579": ["convolution_gpu_bfyx_gemm_like",2],
+        "18259001228411909210": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8296551195150971668": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "475043738497218394": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "12987636957813312667": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1653274345637156919": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "12994819742376207273": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "1630585964216121575": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11576182324195008022": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "14539163960605215528": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17609882667499000436": ["convolution_gpu_bfyx_gemm_like",0],
+        "14251403312385260177": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "17868294056467093895": ["convolution_gpu_bfyx_gemm_like",2],
+        "5754844816339228920": ["convolution_gpu_bfyx_gemm_like",1],
+        "16124702296533772526": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "428659495445490820": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "14907097142953816744": ["convolution_gpu_bfyx_gemm_like",2],
+        "10624567684389583173": ["convolution_gpu_bfyx_os_iyx_osv16",758],
+        "18269685060032395235": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "8762901342272872498": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "14146157492452859667": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "18118237182023167949": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "13074593348097634731": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "5335250793358473555": ["convolution_gpu_bfyx_gemm_like",1],
+        "10104091044601583658": ["convolution_gpu_bfyx_gemm_like",1],
+        "3020115657931277672": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9549667332801021099": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15816540550252147706": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14484004336536993120": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "7122950455826378169": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "6928835003016610382": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "17208186152576814861": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "5291944277945000781": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "11433534680781300610": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "1925626127045202964": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "15421166985948480394": ["convolution_gpu_bfyx_gemm_like",1],
+        "4640696923527766618": ["convolution_gpu_bfyx_gemm_like",2],
+        "3064765745900772872": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "7744787957569714828": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10500029207807372735": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2894138412746654795": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "11599990834682830362": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "11706378390483804857": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "17873182129275583020": ["convolution_gpu_bfyx_gemm_like",2],
+        "7862815466573236157": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "2585767464396438954": ["convolution_gpu_bfyx_gemm_like",0],
+        "11275109735493317886": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "166267183356660549": ["convolution_gpu_bfyx_gemm_like",2],
+        "13657522194775317201": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "18404344881797725263": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "12647099325257717945": ["convolution_gpu_bfyx_gemm_like",2],
+        "12408889192918919210": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "15718782218800307385": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "6397841935795796056": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "3140230065585683313": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "1050570995635673400": ["convolution_gpu_bfyx_gemm_like",2],
+        "282274448389888221": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "15354185859262170540": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "4642402648038764246": ["convolution_gpu_bfyx_gemm_like",2],
+        "1197184887743937394": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "5357531127711906072": ["convolution_gpu_bfyx_gemm_like",1],
+        "16816222375242496370": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9468542963649996822": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "14930745998253392722": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11327097771110264965": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "12896159402462325805": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "17525531790109748810": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "241860795253927746": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "16509472637458153234": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "1484007449719260391": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1891216794223363114": ["convolution_gpu_bfyx_gemm_like",2],
+        "15847413004526420496": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "17096175733187202673": ["convolution_gpu_bfyx_gemm_like",2],
+        "7391591731082133842": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "8643089982608103149": ["convolution_gpu_bfyx_1x1",2],
+        "4479117540570599742": ["convolution_gpu_bfyx_gemm_like",2],
+        "2728956755635458379": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4716188972902735458": ["convolution_gpu_bfyx_gemm_like",2],
+        "10093554313775878065": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "2597523728660247862": ["convolution_gpu_bfyx_os_iyx_osv16",538],
+        "4495774394017823312": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "13071545223094862275": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "6644418194983229139": ["convolution_gpu_bfyx_gemm_like",2],
+        "3389739049224815652": ["convolution_gpu_bfyx_gemm_like",2],
+        "13027039165868458729": ["convolution_gpu_bfyx_gemm_like",1],
+        "16831114690704826637": ["convolution_gpu_bfyx_gemm_like",1],
+        "8863398172720091880": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "7108596712012465804": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1907439276166837309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10743628077362128751": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11374410888638324212": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2786925522916317149": ["convolution_gpu_bfyx_os_iyx_osv16",761],
+        "4797026040899499511": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "17257466221539644081": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "16468779692009938330": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "10608496431404827757": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12138341287265949399": ["convolution_gpu_bfyx_gemm_like",1],
+        "1682776041247037802": ["convolution_gpu_bfyx_gemm_like",2],
+        "14716719350966652036": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15260448822338206631": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17509205154057032109": ["convolution_gpu_bfyx_os_iyx_osv16",861],
+        "16294825599850364701": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "12840204133991239572": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "14472187692485966933": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "3522455279376021211": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "12753199606413122334": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "15183511809138557392": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "6527268791835193134": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "10885831773581103653": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1954052357826969119": ["convolution_gpu_bfyx_gemm_like",1],
+        "10916647716124396856": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "2746052215199129520": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "11583017348580874022": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "6943519872561469460": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "10774528268153772208": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "490233152678323691": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "8367989677286805427": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12534001599784153836": ["convolution_gpu_bfyx_gemm_like",1],
+        "11632948358256249708": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "7982628452987720190": ["convolution_gpu_bfyx_gemm_like",2],
+        "7353563160591978243": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "2929980913168445753": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7727001441358508665": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "14603590053512154268": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "17308063122516317342": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "4202116155711873525": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17947613081555491099": ["fully_connected_gpu_fb_oi_ref",1],
+        "8963262014498730146": ["convolution_gpu_bfyx_gemm_like",1],
+        "3134489458855347772": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "8740196547852036537": ["convolution_gpu_bfyx_gemm_like",1],
+        "3503236715353689942": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "8402692278765063674": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "77240414396225397": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "16081988990653666386": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "15668060723417155782": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "2460415719642436412": ["convolution_gpu_bfyx_gemm_like",1],
+        "8335501317577461610": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "17729546848373991614": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7811861756798601201": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9366201112659847392": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "4196367396954155354": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3910549475873353422": ["convolution_gpu_bfyx_os_iyx_osv16",754],
+        "3438852523146175580": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12700957546822808929": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "3588791913550955553": ["fully_connected_gpu_fb_oi_ref",1],
+        "6303003639592032299": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "8467771025017377254": ["convolution_gpu_bfyx_gemm_like",2],
+        "14258499419905714808": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "9854440591497995284": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9459869325970475576": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "451787079167744428": ["convolution_gpu_bfyx_os_iyx_osv16",376],
+        "4369680877112803848": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10425622870001886240": ["convolution_gpu_bfyx_gemm_like",2],
+        "9785114056964539323": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "17856816245251319111": ["convolution_gpu_bfyx_os_iyx_osv16",939],
+        "11595387512434355394": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "2995134938466176198": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "17021953651379372973": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7853648744637103420": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "16181623411787179429": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11231597775940542830": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8739347545059610410": ["convolution_gpu_bfyx_gemm_like",2],
+        "14384062335728088286": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "4860861645314518892": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like",2],
+        "14178934083928811388": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8509941319309380587": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3102816736961785641": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "6857064389795419021": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "4991419288164762786": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "12314918602191412697": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14349335089732252796": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4793007249026943006": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12923653434892323603": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "10625675062556386448": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17504669611941355931": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "4248427635083216412": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "11815825155082424936": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "15325810055037682679": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "11857403052583858392": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "2328698995040390396": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "11107930597263802755": ["convolution_gpu_bfyx_gemm_like",2],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "10127626701775288565": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1579733029852052699": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "4860019935631927113": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "3434842614653335826": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2915165824085219545": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "12882754981683858333": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "9642229389394495047": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "17001492460236540325": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "10902747200305475466": ["convolution_gpu_bfyx_gemm_like",0],
+        "9987939079053625302": ["convolution_gpu_bfyx_gemm_like",2],
+        "850343942782057099": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12371817808483211497": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12977141272959735649": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10295400862890021635": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "13191096881934434519": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "9868561386826862471": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "8962502004422485576": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1691020960118022320": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "14397348576352573007": ["convolution_gpu_bfyx_gemm_like",2],
+        "5053369963163583573": ["convolution_gpu_bfyx_gemm_like",1],
+        "13447226378200557777": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13564654155363057485": ["convolution_gpu_bfyx_gemm_like",2],
+        "15496355513574200965": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "14022116362268035779": ["convolution_gpu_bfyx_gemm_like",2],
+        "7164580481046523192": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "17923260699148240081": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "9940300152880498818": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "5284132464580556804": ["convolution_gpu_bfyx_gemm_like",1],
+        "11169292427557543138": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "14841539539334726292": ["convolution_gpu_bfyx_os_iyx_osv16",171],
+        "2830742500858558621": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16596028606733932975": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17392732266843821039": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "10806992251978564302": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "13716836930727272782": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "13126786259906598018": ["convolution_gpu_bfyx_os_iyx_osv16",929],
+        "368147139706197757": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "14973411884734235059": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "269829518575229806": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "425222358618423500": ["convolution_gpu_bfyx_gemm_like",2],
+        "8212533074856783509": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "1158407843601379115": ["convolution_gpu_bfyx_gemm_like",0],
+        "17026348860895225619": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "13702914647519703599": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9647713236241614167": ["convolution_gpu_bfyx_gemm_like",2],
+        "9065894438656900887": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "15982499072593548907": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "15158468970890089465": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "2116524516810466877": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4600261954762222519": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "14116682822622440033": ["convolution_gpu_bfyx_gemm_like",2],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",0],
+        "13071064509662090710": ["convolution_gpu_bfyx_gemm_like",2],
+        "15178012823756517910": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14702670413549232065": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1818234431954731769": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "4104945759139088078": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17306482303091342504": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "13420802275377435086": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "84858894896261863": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "14893822644567136435": ["convolution_gpu_bfyx_gemm_like",2],
+        "10365519690439054710": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "16852690434396099861": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "1034716660124798032": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7561761907958081895": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "835367600773871252": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "17847109385592002207": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4151997155802743451": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "5699637716202391188": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "238804705672659503": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "5340016094501559693": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "922541506531537121": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "8529170838214082841": ["convolution_gpu_bfyx_gemm_like",2],
+        "6323026044750482867": ["convolution_gpu_bfyx_gemm_like",2],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "6087091876057515304": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4398254363079659976": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3340594153142636962": ["convolution_gpu_bfyx_gemm_like",2],
+        "3430998232987873998": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "17526891234501366023": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "15078379507314446744": ["convolution_gpu_bfyx_gemm_like",2],
+        "585914943085061885": ["convolution_gpu_bfyx_gemm_like",2],
+        "1172103288112689821": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "17453621319901961773": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "16995873636564597028": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "13657774210341324470": ["convolution_gpu_bfyx_gemm_like",1],
+        "14237815472706635543": ["convolution_gpu_bfyx_gemm_like",2],
+        "16767564582561837873": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "9539616823548370185": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "176148486634277377": ["convolution_gpu_bfyx_gemm_like",2],
+        "1891073256003809934": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "8800251965243080024": ["convolution_gpu_bfyx_gemm_like",2],
+        "10555597973766215754": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "10892456883214928095": ["convolution_gpu_bfyx_os_iyx_osv16",943],
+        "3015996171698570561": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "3219239043521617253": ["convolution_gpu_bfyx_gemm_like",2],
+        "6630020506382714373": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "10384416235770656262": ["convolution_gpu_bfyx_gemm_like",1],
+        "4732699611696731044": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "8680545947510235993": ["convolution_gpu_bfyx_os_iyx_osv16",667],
+        "13940433448128376511": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "7822148442995976259": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1550689033020233966": ["convolution_gpu_bfyx_gemm_like",2],
+        "10848407542826653699": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "6517802281521111563": ["convolution_gpu_bfyx_gemm_like",1],
+        "9144136375141111897": ["convolution_gpu_bfyx_gemm_like",2],
+        "11307721164906705899": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "1854265455057352782": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "604467633591545941": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "16863960779539003201": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "11240189248024145687": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "505027953105355818": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4211445170027080823": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4406157095142118884": ["convolution_gpu_bfyx_os_iyx_osv16",1033],
+        "12329909110827539139": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10062957707721107508": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15971924211584724882": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "4447895709141687848": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9192665896782282996": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "6171845068913882721": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "6641348239674215714": ["convolution_gpu_bfyx_gemm_like",2],
+        "544003022213487787": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "7881314798558018337": ["convolution_gpu_bfyx_gemm_like",2],
+        "14718143989976451689": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "2305706332728008948": ["convolution_gpu_bfyx_gemm_like",1],
+        "12451592945087000191": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "7877872008801536537": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7354234812009979811": ["convolution_gpu_bfyx_os_iyx_osv16",860],
+        "1460916897832302487": ["convolution_gpu_bfyx_gemm_like",1],
+        "14156845527754813253": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "15659671804906879034": ["convolution_gpu_bfyx_gemm_like",2],
+        "14838067105091112485": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4451257789691974239": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "1801731858063091191": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "7410628771323937530": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12098146032672599222": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "3285688984628545255": ["fully_connected_gpu_fb_io_ref",1],
+        "6768322540857745605": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "16126210124715599267": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "15911508155433936727": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "10794662801660960189": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "8985531644129639832": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "12232696287029987946": ["convolution_gpu_bfyx_os_iyx_osv16",165],
+        "7396823789595001064": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15320845027635796583": ["convolution_gpu_bfyx_gemm_like",2],
+        "10533367671706069274": ["convolution_gpu_bfyx_gemm_like",2],
+        "4007319206075386920": ["convolution_gpu_bfyx_gemm_like",2],
+        "4614042998549572181": ["convolution_gpu_bfyx_gemm_like",2],
+        "13694766887442024878": ["fully_connected_gpu_fb_io_ref",2],
+        "9556219639756304369": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12935563359569230797": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "14269654271903961430": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7757331094141318304": ["convolution_gpu_bfyx_os_iyx_osv16",969],
+        "13319880343534837963": ["convolution_gpu_bfyx_gemm_like",1],
+        "15643053402284856082": ["convolution_gpu_bfyx_gemm_like",1],
+        "15101986369567160956": ["convolution_gpu_bfyx_gemm_like",2],
+        "10309504812060596568": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "12238674883388043717": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "7726714223809300966": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14600034178934274457": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "14248587383098743406": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "3935883681780676157": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "5582896843095691256": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "11800958516083095340": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "3860603464276263676": ["convolution_gpu_bfyx_gemm_like",2],
+        "1117787205894124896": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "3031115694124492679": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "12194352995334529714": ["convolution_gpu_bfyx_gemm_like",2],
+        "11058082057683584650": ["convolution_gpu_bfyx_gemm_like",2],
+        "5485050451156514865": ["convolution_gpu_bfyx_gemm_like",2],
+        "17886436103211436626": ["convolution_gpu_bfyx_gemm_like",2],
+        "13170031087212196468": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12308359047798183133": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "14433939319502072879": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5134857932624749530": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "15737508945513376813": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15809072026388479729": ["convolution_gpu_bfyx_os_iyx_osv16",283],
+        "16601702334097258697": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11367813096511965002": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "13546876216568825877": ["convolution_gpu_bfyx_os_iyx_osv16",622],
+        "3919577663893354177": ["convolution_gpu_bfyx_gemm_like",1],
+        "16108573960501496757": ["convolution_gpu_bfyx_gemm_like",2],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "12136803297132972709": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "852092858392507925": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "3526198034974948081": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "7831542641855749925": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7254869458810021127": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "830147122986411443": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2033072905537284499": ["convolution_gpu_bfyx_gemm_like",2],
+        "5020788604681810984": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "15464714725848277081": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5185125307593023170": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "13550337096609413041": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "41672385434660942": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "2933183897022161826": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "1999979442136861875": ["convolution_gpu_bfyx_os_iyx_osv16",59],
+        "13886526360627032217": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "4047806462440750215": ["convolution_gpu_bfyx_gemm_like",1],
+        "17713034180977313726": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9025790715924779508": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "12281346074445607180": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13414375996946350733": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17466025028296506313": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "316225690176910392": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "3005178737729927131": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "5011273172385428756": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "17715553891959228879": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "10990741293315393791": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "3192332625020432602": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2301409406426420354": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "7281661441196896385": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "5714538749435744920": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "10888435127006141874": ["convolution_gpu_bfyx_os_iyx_osv16",1099],
+        "18136135457402651842": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11312797737791604596": ["convolution_gpu_bfyx_gemm_like",2],
+        "1062508357634542606": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "18009083375897554008": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "13036499105391951007": ["convolution_gpu_bfyx_gemm_like",2],
+        "13132550921538397546": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "10290107543739998181": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "16484600784717969318": ["convolution_gpu_bfyx_gemm_like",1],
+        "6086336348849756671": ["fully_connected_gpu_fb_io_block_fp16",0],
+        "10690972785852373520": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "572155668587252712": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "12576360049619146496": ["convolution_gpu_bfyx_gemm_like",2],
+        "16683485007140805060": ["fully_connected_gpu_yxfb_ref",2],
+        "9933958860597451711": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "3256940792095638732": ["convolution_gpu_bfyx_gemm_like",1],
+        "3032921857841371728": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4428125859693766145": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9655550151067451233": ["convolution_gpu_bfyx_gemm_like",2],
+        "1480287432874335824": ["convolution_gpu_bfyx_os_iyx_osv16",280],
+        "14366252780310630703": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8333743604646422982": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2932914865200583326": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16295660312557315941": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4561874206785244358": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "9970142663470031403": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15259825477604482502": ["convolution_gpu_bfyx_gemm_like",0],
+        "8939683514448064461": ["convolution_gpu_bfyx_gemm_like",2],
+        "11612998433409522582": ["convolution_gpu_bfyx_gemm_like",2],
+        "6914536960012332706": ["convolution_gpu_bfyx_gemm_like",2],
+        "13076343553185159307": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18086782289842715645": ["convolution_gpu_bfyx_gemm_like",1],
+        "2702144517025248597": ["convolution_gpu_bfyx_gemm_like",2],
+        "15561518067918160695": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "4805958162773855302": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "14122018505646948996": ["convolution_gpu_bfyx_gemm_like",2],
+        "17407904982433770732": ["convolution_gpu_bfyx_gemm_like",1],
+        "2623687018437195679": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5127769906401798990": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16689318540732157754": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14038261392627717712": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "13176385389367548697": ["convolution_gpu_bfyx_gemm_like",2],
+        "6647969101146756031": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "8812763803467512830": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "7000486794832106857": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "7499082230554771515": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "13510598063226540077": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "9963817056423168830": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "17791773192152464021": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4171848506399696854": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "7780366826820540504": ["convolution_gpu_bfyx_gemm_like",2],
+        "13206826317378863148": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "8787816339967963727": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "8175595372513695437": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "8224143262995973449": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17796310681498690253": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4999210721703970274": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "9937641338455246118": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "8161047856682416508": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "10023279637210292010": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "13443130482173929700": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "15548971488532746290": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "281287280558289393": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "10413043556440687328": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "12890207857767896504": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "18157442326218165947": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15384168056682476462": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "557778263661655803": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "14650273075211365393": ["convolution_gpu_bfyx_gemm_like",2],
+        "5570311824197099845": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7559892774312756176": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "14203217958874365062": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "7230623964042057933": ["convolution_gpu_bfyx_gemm_like",2],
+        "3761770343527826418": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "13317417676446624018": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "9416236213942870134": ["convolution_gpu_bfyx_gemm_like",2],
+        "12096396455109952715": ["convolution_gpu_bfyx_gemm_like",2],
+        "5795073619189010837": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15380105196319354141": ["convolution_gpu_bfyx_gemm_like",1],
+        "4584970211859494304": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9423239651872522813": ["convolution_gpu_bfyx_gemm_like",1],
+        "6326191473779365124": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "17430593168191424639": ["convolution_gpu_bfyx_gemm_like",1],
+        "9516288831713776693": ["convolution_gpu_bfyx_os_iyx_osv16",272],
+        "15364374265752682266": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "5807196005360653656": ["convolution_gpu_bfyx_gemm_like",2],
+        "14037325204801680738": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "13793441296561946357": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "3170336071769787200": ["convolution_gpu_bfyx_gemm_like",2],
+        "13644681270630373984": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "11532872181912525509": ["convolution_gpu_bfyx_gemm_like",2],
+        "2652267888871336297": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "2470579932413307757": ["convolution_gpu_bfyx_gemm_like",2],
+        "8866736221671835567": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "14904665242518014005": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "4986977887030495943": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14537994197428038805": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "12655099960717366198": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6013434489252641471": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "1132353580998754406": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14824758036755713701": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "11198908896401597838": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "9133263538092913983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9080269503597463911": ["convolution_gpu_bfyx_gemm_like",2],
+        "8620072463881015653": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "12788968383428254917": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14084855778741260863": ["convolution_gpu_bfyx_gemm_like",2],
+        "5185895996350118172": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2532962442388536022": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "4545501713797069587": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "2152903140704848574": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5871082277006078841": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "5050495757462452653": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3830091089824446164": ["convolution_gpu_bfyx_gemm_like",1],
+        "6981537186704688907": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7005371843527735283": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13786357802945430475": ["convolution_gpu_bfyx_os_iyx_osv16",1099],
+        "15314178289202641916": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "9531730330306606343": ["convolution_gpu_bfyx_os_iyx_osv16",153],
+        "11825205449232126827": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "2724007091383127418": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "18242682488017822077": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4806571630436601566": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "6962268765187856246": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "929378940515745198": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12610004507393467447": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5448665190811365701": ["convolution_gpu_bfyx_os_iyx_osv16",1032],
+        "628191607060767879": ["convolution_gpu_bfyx_os_iyx_osv16",91],
+        "10236258478395201152": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "69439315851965666": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "12011982029561277581": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "1895945774251432343": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11102920976866402928": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13277308739029064167": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "15901675909820977223": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "4834743410195700260": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "397445657349822499": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "570683988452622223": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "16494581774051338901": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "3796274347773622633": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5608447459568229694": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3819763245853861272": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "12323418436121785375": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8200094670006738584": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15550722997950669458": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15689502054035168040": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "16801078648431425148": ["convolution_gpu_bfyx_gemm_like",0],
+        "1995546197385478214": ["convolution_gpu_bfyx_gemm_like",2],
+        "9601849246293120347": ["convolution_gpu_bfyx_gemm_like",2],
+        "6638761803107874904": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "7351401242363888463": ["convolution_gpu_bfyx_gemm_like",2],
+        "1622731194539871461": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13192885349640152576": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "4129586781834275070": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "15456771485750114116": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "7394217382008802567": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4327450388326573746": ["convolution_gpu_bfyx_gemm_like",2],
+        "12503605837910457108": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "4445912157712391517": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "3971456598769336038": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5454796925594082324": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4229105529069729944": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15924144379094505874": ["fully_connected_gpu_fb_oi_ref",1],
+        "17015791782274123780": ["convolution_gpu_bfyx_gemm_like",1],
+        "18139055731468596187": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11806402239500046867": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "12008952324872799824": ["convolution_gpu_bfyx_gemm_like",2],
+        "5939121107940759940": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "10897622326486559468": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "10609980283092655115": ["convolution_gpu_bfyx_gemm_like",1],
+        "3557182643072772598": ["convolution_gpu_bfyx_gemm_like",1],
+        "5589785455223385189": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9410125656044318792": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "18232278892738147217": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "5927467766675317093": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "1375259485223819020": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "2101721234597882962": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10858234923346500323": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18112958483003382733": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16129296588866116913": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "1330842758352650583": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8898910394425958745": ["convolution_gpu_bfyx_gemm_like",1],
+        "3314459110790355757": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15976399554094563736": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "16389826434776949524": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12251901229904154127": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "2264520082689779253": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "8093154215631195896": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9236621881488650027": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10965563190266380694": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "5587539329568150667": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6651097363666320726": ["convolution_gpu_bfyx_gemm_like",2],
+        "3831201505512446456": ["convolution_gpu_bfyx_gemm_like",1],
+        "17086887873464601732": ["convolution_gpu_bfyx_gemm_like",1],
+        "13292923826380958700": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "2511072616914149110": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10598995451755327159": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "6925829066248055368": ["convolution_gpu_bfyx_gemm_like",2],
+        "517601465150912854": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "251300311986835571": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1241188741090538769": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8285478622349266483": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "13384754476437374504": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "18133614045401867449": ["convolution_gpu_bfyx_gemm_like",2],
+        "10933247456003592661": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",2],
+        "17666004363345457085": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17771447090715962298": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9608917563823863132": ["convolution_gpu_bfyx_gemm_like",2],
+        "10405183426600618231": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "1006527610094211417": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "15927212142469570269": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "8235002440285527553": ["convolution_gpu_bfyx_gemm_like",1],
+        "4192716493303517040": ["convolution_gpu_bfyx_gemm_like",2],
+        "3380653500106294036": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "11453044274130869816": ["convolution_gpu_bfyx_gemm_like",2],
+        "9547404823672679740": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12862797248089361992": ["convolution_gpu_bfyx_gemm_like",2],
+        "17179123144975837983": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2543041530639980505": ["convolution_gpu_bfyx_gemm_like",1],
+        "7439340221097179208": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4101449235783342476": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "5032195346490064156": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15754688305730191542": ["convolution_gpu_bfyx_gemm_like",2],
+        "6914775146138105785": ["convolution_gpu_bfyx_gemm_like",2],
+        "10085059621136526248": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "15245792492785141641": ["convolution_gpu_bfyx_gemm_like",2],
+        "12892693137085610062": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "3005276417937854742": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9636232825599826837": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "4646176801168621136": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1104489643524273315": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8131682691875884781": ["convolution_gpu_bfyx_gemm_like",1],
+        "3730207439375250056": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17399103575103078835": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2310549887200001260": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "9438739171104456179": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "13491655481292956895": ["convolution_gpu_bfyx_gemm_like",1],
+        "11569367085498045793": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "13729951531199985382": ["convolution_gpu_bfyx_gemm_like",2],
+        "9421643783312790618": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4792657031481471098": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "296202142406900242": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "11140864132614066113": ["convolution_gpu_bfyx_gemm_like",2],
+        "12072890225919159372": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17738299860390552088": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14840301687056551916": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11850332373794932468": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "16211466749116679534": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16304402386608713955": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18332090297993015499": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "3285520504090196295": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "11777373751892075391": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "95993272253183796": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "3292879092145281224": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "15642549417953837059": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "10354305663463607086": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "18160969423211875528": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "4593261844817210660": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15240660399630429406": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3643466095681664346": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "12847879935060092791": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "17769703068450272262": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "232382233865868417": ["convolution_gpu_bfyx_gemm_like",2],
+        "4265693151382066296": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "12860222041026638681": ["convolution_gpu_bfyx_gemm_like",2],
+        "3244402155461139559": ["convolution_gpu_bfyx_gemm_like",2],
+        "7903891232234389925": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8479047101064948298": ["convolution_gpu_bfyx_gemm_like",0],
+        "7430073011895298582": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16033144151193421543": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4304943753428518690": ["convolution_gpu_bfyx_gemm_like",2],
+        "9756049510998074315": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "592364460086746355": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "8922463054055280800": ["convolution_gpu_bfyx_gemm_like",2],
+        "17116941326889312928": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9040986180016264906": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17185089684685480638": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "12386437738920143482": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10804406975968573869": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "2338707843044884352": ["convolution_gpu_bfyx_gemm_like",2],
+        "10773411423039491193": ["convolution_gpu_bfyx_os_iyx_osv16",195],
+        "1908809004094565452": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "15097371415144491976": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "8107597524360102037": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8740183428702591218": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "596934040273798962": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18067353229273804720": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "8469338060514215816": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "1202020283576886284": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11261619081095309088": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2842103889477438816": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "9573589861499897842": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "11459784003592366395": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17517541283617012275": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "1559798212423183813": ["convolution_gpu_bfyx_os_iyx_osv16",928],
+        "9019388470685749691": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "2418288192668085805": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17406383217119217230": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2916077416184925232": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "937050062571228573": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15450609897480659306": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "7806837641999814363": ["convolution_gpu_bfyx_gemm_like",2],
+        "3939977982577786175": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "5635449856699664273": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "4846216894450341698": ["convolution_gpu_bfyx_gemm_like",1],
+        "801864263975761712": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "18349087959351486710": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "9475812329914836280": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8107447526839063293": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "16710010075465723498": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14991602704357959545": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17784882947271841103": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12348135936862667024": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "530825424084837479": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "16900305050319129555": ["convolution_gpu_bfyx_gemm_like",2],
+        "13951781924205611716": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "8913451832923806760": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5287076386757143976": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "88592091379585141": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1838534101161814609": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11970466555294072275": ["convolution_gpu_bfyx_gemm_like",2],
+        "14335423820860953927": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7441199361135503715": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9819596940685093690": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "14731393773801790100": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3012268657922581268": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10835321391911234206": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "10532500300200244159": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "16947969669087411530": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "8028456017016080468": ["convolution_gpu_bfyx_gemm_like",1],
+        "14289048840489035546": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8653024334982611044": ["convolution_gpu_bfyx_os_iyx_osv16",687],
+        "4557272439632791722": ["convolution_gpu_bfyx_gemm_like",1],
+        "18134140047840716203": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "8436644625511258721": ["convolution_gpu_bfyx_gemm_like",2],
+        "15516674573659704770": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "4492673409319122180": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "2521821959816944292": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17703907155485973486": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "2839767407547705101": ["convolution_gpu_bfyx_gemm_like",2],
+        "11829442945690098558": ["convolution_gpu_bfyx_gemm_like",1],
+        "7132441144511706824": ["convolution_gpu_bfyx_gemm_like",0],
+        "5745481082184931194": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "2287356884312581209": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4860779741225078946": ["convolution_gpu_bfyx_gemm_like",1],
+        "5296506025538423220": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17601171646153308079": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "14553738887970260308": ["convolution_gpu_bfyx_gemm_like",2],
+        "10947686124973711385": ["convolution_gpu_bfyx_os_iyx_osv16",435],
+        "12353956380178079089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14763982961176216679": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "3150231129728961455": ["convolution_gpu_bfyx_gemm_like",1],
+        "1332624116953483870": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "10604830376938742429": ["convolution_gpu_bfyx_gemm_like",2],
+        "3170785962566427770": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11809236497308682596": ["convolution_gpu_bfyx_gemm_like",2],
+        "8997120235555587461": ["convolution_gpu_bfyx_gemm_like",2],
+        "8100595788531468781": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11067412830219638639": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5812274221348979687": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "545425355231744794": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "15239764240622554314": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "18445243511250094011": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "7351733901977025859": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "18424912460022156378": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "9311802150474489673": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "796900095669815456": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "12492763342322011136": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13767795972414139958": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "9083686317073801642": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10068502639160680134": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "7504663136669214601": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3968994333196289265": ["convolution_gpu_bfyx_gemm_like",2],
+        "13314092088416047551": ["fully_connected_gpu_yxfb_ref",1],
+        "11673569290324764842": ["convolution_gpu_bfyx_gemm_like",0],
+        "10306169610486701545": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "1474719104479956715": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6496839689453807726": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "9199174367023202640": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7959969582538910953": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13744951984978188201": ["fully_connected_gpu_fb_io_ref",0],
+        "6643161848623134458": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "18235067315439611192": ["convolution_gpu_bfyx_os_iyx_osv16",754],
+        "12028030221272546172": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12223993560805441284": ["convolution_gpu_bfyx_gemm_like",2],
+        "2451712485584835395": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13818587810073749596": ["convolution_gpu_bfyx_gemm_like",1],
+        "9673176853197584682": ["convolution_gpu_bfyx_gemm_like",1],
+        "18076129452098771655": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9765339420071627045": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "17762040448815681058": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "8995598177504756805": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "8892991171111842341": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10662239532841666965": ["convolution_gpu_bfyx_gemm_like",1],
+        "14420809655798184553": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "3164422950831542784": ["convolution_gpu_bfyx_gemm_like",2],
+        "17075150439662364176": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11361013180071053597": ["convolution_gpu_bfyx_gemm_like",1],
+        "14821616804286068969": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2150284597332493904": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "1961296939362567851": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2124033349728954551": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "17419874083634480896": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9743806043658380623": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "7243161613448507792": ["convolution_gpu_bfyx_gemm_like",1],
+        "12557015880639217508": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "5401523175111660554": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5298952273692538291": ["convolution_gpu_bfyx_gemm_like",1],
+        "7085416207166146240": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "3830787224073518842": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "8305500373806058745": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2577413012740709678": ["convolution_gpu_bfyx_gemm_like",2],
+        "15134268179029323647": ["convolution_gpu_bfyx_gemm_like",2],
+        "4553409514380460123": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "18173314625562011976": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4055753250105853003": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "3974589991022739479": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "6860503758000008398": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6158514925486943212": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8131617570786904723": ["convolution_gpu_bfyx_gemm_like",2],
+        "7510055418609679364": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "11655994466278963438": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15912553971677187913": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "16485921493309285440": ["convolution_gpu_bfyx_gemm_like",2],
+        "9758759365463492505": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "3290503865540626256": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "10681304359334525584": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "8631194673451861459": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12707748441880165396": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4318632837402329958": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "3475757648408068589": ["convolution_gpu_bfyx_gemm_like",2],
+        "16522546805419218429": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "14634279730953549909": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "17446388159565719362": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2251572761614039612": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "14287890401250603057": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4987922194420804256": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "17793292063552633023": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "5608133987357542077": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "3382494956350224120": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9747165558500755104": ["convolution_gpu_bfyx_gemm_like",2],
+        "15434706304418357961": ["convolution_gpu_bfyx_gemm_like",2],
+        "10624246057883518638": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "18106333667377667797": ["convolution_gpu_bfyx_gemm_like",2],
+        "6329618009202266591": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14435120971846098308": ["convolution_gpu_bfyx_os_iyx_osv16",568],
+        "16120159001372711511": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "1143214652021653634": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "5124241485043124110": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "14108113294744119367": ["convolution_gpu_bfyx_os_iyx_osv16",516],
+        "6809026385816665583": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2174528711050181972": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4274801141127703532": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "15212317205888563836": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "380316849107383484": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14408266407898585602": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "17993337310288098038": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4586633477264151844": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "15180406256083730261": ["convolution_gpu_bfyx_os_iyx_osv16",686],
+        "9145357433824567384": ["convolution_gpu_bfyx_os_iyx_osv16",694],
+        "791937929163665770": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "5214678408335388758": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "15653223776766070604": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "962676948282027870": ["fully_connected_gpu_fb_io_ref",1],
+        "12843856637642525155": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "5769404877199637961": ["convolution_gpu_bfyx_gemm_like",2],
+        "2056766012044921101": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "8780604510524622314": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "7662200927459001757": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "17358462939783262207": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3658149289395969504": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "17638692805430115529": ["convolution_gpu_bfyx_gemm_like",2],
+        "13462726136352103466": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4538102435488584866": ["convolution_gpu_bfyx_gemm_like",1],
+        "8809794528993445200": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1939140810847988694": ["convolution_gpu_bfyx_gemm_like",2],
+        "6133854782246597175": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "10686870945055880185": ["convolution_gpu_bfyx_gemm_like",0],
+        "13816104794723484993": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13044020050176766314": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17152100243867367458": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "15511138074959300404": ["convolution_gpu_bfyx_gemm_like",2],
+        "14001920054473316909": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "14233219774448115529": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4377137812917082153": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "11494395549955384747": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "5843679089588930933": ["convolution_gpu_bfyx_os_iyx_osv16",146],
+        "7866128397931438774": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "5582107298039488951": ["convolution_gpu_bfyx_os_iyx_osv16",278],
+        "13464226348405628455": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "7394848434332739139": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8515479970005301094": ["convolution_gpu_bfyx_gemm_like",2],
+        "3524531620118359828": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "3759057398165607194": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "6771637612965430926": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "16150934538381572916": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "40704767167309552": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "12083217714727863832": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "4368522743441422202": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "2238901105639912692": ["convolution_gpu_bfyx_os_iyx_osv16",110],
+        "14522844693999581518": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "12173409033330010794": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16815680874311765189": ["convolution_gpu_bfyx_gemm_like",1],
+        "9852052796465340830": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12773693193167844110": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "9628735886189157469": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "8121179472578287280": ["convolution_gpu_bfyx_os_iyx_osv16",379],
+        "2348721939771018658": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "4833761011498696645": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "11088128828863596806": ["convolution_gpu_bfyx_gemm_like",1],
+        "14322392426975869640": ["convolution_gpu_bfyx_gemm_like",1],
+        "6056291179600370019": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11528417522960871233": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1934379409955686502": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17235360775064303316": ["convolution_gpu_bfyx_gemm_like",2],
+        "4568839461523224811": ["convolution_gpu_bfyx_gemm_like",2],
+        "5680888227752935228": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "13954144830230671601": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "3223787640285180270": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15160192060731796225": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "18125075313255528454": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11487565672628286526": ["convolution_gpu_bfyx_gemm_like",0],
+        "9038991914155436715": ["convolution_gpu_bfyx_gemm_like",1],
+        "17274625805315816028": ["convolution_gpu_bfyx_gemm_like",1],
+        "12278842522836720245": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2912984501615111849": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "2363414141971004557": ["convolution_gpu_bfyx_gemm_like",1],
+        "10665697051755790682": ["convolution_gpu_bfyx_gemm_like",2],
+        "8162762980597497749": ["convolution_gpu_bfyx_gemm_like",2],
+        "288853243482418538": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11661214901264500438": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "17285699593273891901": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4780830855450408093": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "7504074736798125353": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11114015660322254541": ["convolution_gpu_bfyx_gemm_like",2],
+        "12421288552109066791": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",2],
+        "5122639094068865656": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "423221712829930726": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15936869458531244961": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "11331539079347079374": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "9154705094446538279": ["fully_connected_gpu_fb_oi_ref",0],
+        "13312514874803986753": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "9914440875772341708": ["convolution_gpu_bfyx_gemm_like",2],
+        "6660077021779164371": ["convolution_gpu_bfyx_gemm_like",2],
+        "9729771183572950642": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5740738339752793113": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5361028467247182860": ["convolution_gpu_bfyx_gemm_like",2],
+        "6820284286806022849": ["convolution_gpu_bfyx_gemm_like",2],
+        "8045697952241865861": ["convolution_gpu_bfyx_gemm_like",2],
+        "10876578967419315028": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "1650080413259413393": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "2281832083123936555": ["convolution_gpu_bfyx_gemm_like",2],
+        "914589847837601900": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6400671582981760192": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18186437875509712500": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "12582321591799165205": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "12518571127411736885": ["convolution_gpu_bfyx_gemm_like",1],
+        "9354818521586974021": ["convolution_gpu_bfyx_gemm_like",2],
+        "16169024543367503806": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "15993651594402422200": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3447774474841314860": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "3805991105758534542": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4408600136502382976": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "14601915376467155290": ["convolution_gpu_bfyx_gemm_like",0],
+        "15140592697506341614": ["convolution_gpu_bfyx_gemm_like",1],
+        "6767245864232675168": ["convolution_gpu_bfyx_gemm_like",2],
+        "7426788519998680898": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "5321807316257768": ["convolution_gpu_bfyx_gemm_like",1],
+        "13629962867123974535": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15470979879166640563": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "6065819201836017182": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9219978118417391687": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "10253092389452603623": ["convolution_gpu_bfyx_gemm_like",1],
+        "2950917846016525392": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6489448536745533209": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14406070210216948643": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "15863633107759120207": ["convolution_gpu_bfyx_gemm_like",0],
+        "15816980369722540994": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "11000413508839562976": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16131386739027190836": ["convolution_gpu_bfyx_gemm_like",2],
+        "6553565990795990748": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5352861363832390974": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "4695182996147218495": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "11812216902426327523": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5592556538784745960": ["convolution_gpu_bfyx_gemm_like",2],
+        "15158997684077722015": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "10672816826126184746": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9437794960375526230": ["convolution_gpu_bfyx_os_iyx_osv16",656],
+        "11906319144823550582": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "9781830607177020570": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4534480875955599254": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "682912708716537431": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "2346855978590136528": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "6095158932103797740": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "11997615422168828775": ["convolution_gpu_bfyx_gemm_like",2],
+        "14423094456821270228": ["convolution_gpu_bfyx_gemm_like",2],
+        "10997029728191881587": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "7279393739634103483": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7998930863626763670": ["convolution_gpu_bfyx_gemm_like",2],
+        "7937870623766562191": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6526586547926160627": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "11797589297451289242": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "5271530745426214211": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "1876286132660871464": ["convolution_gpu_bfyx_gemm_like",0],
+        "1033385936344875354": ["convolution_gpu_bfyx_gemm_like",2],
+        "18043340998699622388": ["convolution_gpu_bfyx_gemm_like",2],
+        "3671753639665974938": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "16035563519857925932": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "6683090495189325653": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5319668297345215520": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "7142195383189497127": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "10381752670329683275": ["convolution_gpu_bfyx_os_iyx_osv16",479],
+        "6634330132674952638": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "6614833247756539341": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "7390751298966198773": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "13608239208821071914": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "16783619135298589974": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "4474697990228400564": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8377593240579657721": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "17104611871050967957": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2498920887656279332": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14577775579978745344": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10054253863699485503": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "4476928353532757380": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "928757863265393904": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17845195044080380488": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "9444953530704856016": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13221156296791499146": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "1239861345413267621": ["convolution_gpu_bfyx_gemm_like",2],
+        "13219865669259079983": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "18172711677056449158": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "16402312692470500253": ["convolution_gpu_bfyx_gemm_like",0],
+        "8779164026828163571": ["convolution_gpu_bfyx_gemm_like",1],
+        "5094419710576598497": ["convolution_gpu_bfyx_gemm_like",2],
+        "14585370009659482450": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "2794704364476462562": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "562221645849170027": ["convolution_gpu_bfyx_gemm_like",2],
+        "1878679922772738648": ["convolution_gpu_bfyx_gemm_like",2],
+        "5759260743809103651": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "7878217536124016199": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "2213068950786625268": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "9875997976286355123": ["convolution_gpu_bfyx_gemm_like",1],
+        "13059207969254830451": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "2713481951804190325": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "17162489604305127396": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "9322011063845207679": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "17025997656996518171": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6251247460381059571": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2930848604606590505": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "4141616050120443260": ["convolution_gpu_bfyx_gemm_like",1],
+        "3797957937905580811": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "13248567106128518549": ["convolution_gpu_bfyx_gemm_like",2],
+        "11555678098290364758": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14827538610133799379": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3286476039871096924": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "3047710665820732705": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "7014674808417899328": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2656076513222828369": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "17381682740282686038": ["convolution_gpu_bfyx_gemm_like",1],
+        "2510919738337557939": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "14121939808880396150": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "8253823502854784432": ["convolution_gpu_bfyx_gemm_like",2],
+        "1373904073013943690": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "17808913959977434594": ["convolution_gpu_bfyx_gemm_like",1],
+        "13103537372248097713": ["convolution_gpu_bfyx_gemm_like",1],
+        "3706994659266083979": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "9207334433308148635": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "838825600917352376": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15148625184033310404": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14705509109623500235": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "12730339458081890990": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "18443643871208996500": ["convolution_gpu_bfyx_gemm_like",2],
+        "12338760476079493547": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "1819720745131968914": ["convolution_gpu_bfyx_gemm_like",2],
+        "172584114180442549": ["convolution_gpu_bfyx_gemm_like",1],
+        "1923745286075356181": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "17844743590995529463": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "15241191584896579183": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "15351724241036614758": ["convolution_gpu_bfyx_os_iyx_osv16",166],
+        "4107186383182650542": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "15662207751131195569": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "1478419046264331178": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "3124997104810767514": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13095408117538194584": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "7807168142899312025": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "18265901700619296616": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11213283109763090897": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2420425134749678611": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "4195847890935259046": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "4108579755980014185": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "17023103136234805388": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9823997593704517392": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10989937450490049763": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1663285216972929652": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "10133406610245448421": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6953499208425592115": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "6431838057506760173": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "12355112948013108181": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17551915565459110848": ["convolution_gpu_bfyx_gemm_like",2],
+        "3590316457726550768": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14128599551956588603": ["convolution_gpu_bfyx_os_iyx_osv16",479],
+        "11049130623091275457": ["convolution_gpu_bfyx_gemm_like",2],
+        "2458592904274981909": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "17825280904760131680": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2582625260054352916": ["convolution_gpu_bfyx_gemm_like",2],
+        "18180491232489548313": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14980327142253281498": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6980201892073961793": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "861419637283812778": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "8529647257749011908": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13289721141799196039": ["convolution_gpu_bfyx_gemm_like",2],
+        "14200479385082007529": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "11533151357949131860": ["convolution_gpu_bfyx_gemm_like",2],
+        "7761195307416102494": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "1622880009460832832": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "10966081583785531511": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "3464774409833295689": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "6430450975098624706": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11378458002317912396": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9910414853336797922": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7617773507561261623": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "17546650302679801134": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "4301372734564127254": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10540323786245205242": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "2929690114697368478": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "12554532636938441328": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8114928396876060694": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "4573547058027867538": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "1146282291269334070": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1192709652314183388": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7552049239568474944": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "10720525166362537653": ["convolution_gpu_bfyx_gemm_like",2],
+        "3332444589775844154": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "5977248663249062384": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11215862132334892351": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "10362264665270226136": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "761169277744593430": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "6999530153839596796": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "5390559917122707732": ["convolution_gpu_bfyx_os_iyx_osv16",57],
+        "6823494099194746145": ["convolution_gpu_bfyx_gemm_like",1],
+        "9141802671320572984": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8032685176029570383": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11260588538207111217": ["convolution_gpu_bfyx_gemm_like",1],
+        "11892088065638996743": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13014443130752087867": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "17177353407003831190": ["convolution_gpu_bfyx_gemm_like",2],
+        "16351593165006175213": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "10254790628108678637": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2431427502927207912": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16808618754363181939": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4695273549696315193": ["convolution_gpu_bfyx_gemm_like",2],
+        "10289725524396556967": ["convolution_gpu_bfyx_gemm_like",2],
+        "5348059680010171141": ["convolution_gpu_bfyx_os_iyx_osv16",339],
+        "3125577147662589592": ["convolution_gpu_bfyx_gemm_like",2],
+        "3400775107143248024": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9172445047535982729": ["convolution_gpu_bfyx_gemm_like",1],
+        "7162575953766465459": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16234606052818596502": ["convolution_gpu_bfyx_os_iyx_osv16",466],
+        "6796998865297819946": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "7654445730724243959": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17300963371220857043": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "291868903926685441": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "12812685418923919055": ["convolution_gpu_bfyx_os_iyx_osv16",11],
+        "8079376692609682448": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "14276876004054588508": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2566302789609970663": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "8369833730195120673": ["convolution_gpu_bfyx_gemm_like",2],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "8434794604559592624": ["convolution_gpu_bfyx_gemm_like",1],
+        "7942294816235384071": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "5553176511624221429": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "17869697579874327192": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18273537339378756543": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "10754450245035836188": ["convolution_gpu_bfyx_gemm_like",2],
+        "9583760104223104233": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8104715661182291749": ["convolution_gpu_bfyx_gemm_like",1],
+        "6577240413312348523": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16440449399643706863": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "16261543808418336089": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "797387385159110695": ["convolution_gpu_bfyx_gemm_like",1],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",2],
+        "2328951328483718941": ["convolution_gpu_bfyx_gemm_like",1],
+        "5095827462645341808": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14524011013133838054": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15091825614924466766": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "18194662560696168435": ["convolution_gpu_bfyx_gemm_like",1],
+        "15329084374930297871": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16044646335477470657": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12854272540346358832": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "13773898185415904435": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "2737738314051715813": ["convolution_gpu_bfyx_gemm_like",2],
+        "9428176632140441528": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "18150429561058646714": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "4104062066031480003": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3355824730785179775": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "3069396488274616770": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "7473012539094225392": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "12141300895511301068": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "13497225521878034159": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11878217002671373638": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "20037669704517227": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13403617010417893318": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15378707205730840765": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10904228118889057467": ["convolution_gpu_bfyx_gemm_like",2],
+        "10660722770448981436": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "911927861489659568": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4226968857681929488": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4648739521905300372": ["convolution_gpu_bfyx_gemm_like",2],
+        "14223878376624781235": ["convolution_gpu_bfyx_gemm_like",2],
+        "10131771849139346986": ["fully_connected_gpu_fb_io_ref",1],
+        "7157531901512507924": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "4894227264080887361": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "15317510501392280831": ["convolution_gpu_bfyx_gemm_like",2],
+        "3745433390861789238": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "4505008254511324231": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "1671347101986657824": ["convolution_gpu_bfyx_gemm_like",2],
+        "16201999154635899927": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "2738256633362038820": ["convolution_gpu_bfyx_gemm_like",2],
+        "7457951266863598199": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "6755802278188792577": ["convolution_gpu_bfyx_gemm_like",2],
+        "9782864129820122469": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "14034029872538173432": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "16861900412880466222": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "1743572310914695413": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "1056009037551688122": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "33889407315234685": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16339114929185730551": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8843585527713905568": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1648021476477101532": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8749399240948437294": ["convolution_gpu_bfyx_gemm_like",2],
+        "15598570851049411521": ["convolution_gpu_bfyx_gemm_like",2],
+        "9888097487468905169": ["convolution_gpu_bfyx_gemm_like",2],
+        "17599383258252980421": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "14071393823183565145": ["convolution_gpu_bfyx_gemm_like",2],
+        "11936419502418995274": ["convolution_gpu_bfyx_os_iyx_osv16",844],
+        "14330281759626724494": ["convolution_gpu_bfyx_gemm_like",1],
+        "8139461711635049443": ["convolution_gpu_bfyx_os_iyx_osv16",610],
+        "17372326727957287976": ["convolution_gpu_bfyx_gemm_like",2],
+        "12636120902231094700": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "16396393355098283060": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "18114814167694102037": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "10177466042250039828": ["convolution_gpu_bfyx_gemm_like",2],
+        "755414184406250882": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "17061233750738578337": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "3654489958995965359": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "12884622643701027202": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "9324602658580246084": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "13762814538289753428": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3518605747492037670": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "877436308867220589": ["convolution_gpu_bfyx_gemm_like",2],
+        "8430177853357865174": ["convolution_gpu_bfyx_gemm_like",2],
+        "12988961529988078346": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "13288543822410746011": ["convolution_gpu_bfyx_gemm_like",1],
+        "14800933038795670868": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "6476480727582657308": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "7196214243890296121": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14133509766683767462": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15019050434475217267": ["convolution_gpu_bfyx_gemm_like",1],
+        "6538694526777067399": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1119928633562250911": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "10979317886451847755": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "15749335301736571135": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6688522645556262131": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "6973224830546378808": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "670951751279091662": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2542506456395240890": ["convolution_gpu_bfyx_gemm_like",2],
+        "4745007371868123765": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1810943242998123550": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "7474639594232203854": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "6678101356115372537": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "2040762223425679479": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "2317409971670298599": ["convolution_gpu_bfyx_os_iyx_osv16",877],
+        "6296118677770264276": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "18072663736237323230": ["convolution_gpu_bfyx_gemm_like",2],
+        "10002942280571012447": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "6476949395889340429": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "8263822658108674162": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3167115892101501516": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "706370730287471796": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "10744779302034526105": ["convolution_gpu_bfyx_gemm_like",2],
+        "12010294231983179604": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "8374409021681741916": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "6149673627320838019": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "9503908816088325966": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14740550583313186369": ["convolution_gpu_bfyx_gemm_like",2],
+        "10614918790075146626": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "15119063070382146368": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3072535365860940873": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13836645410780461434": ["convolution_gpu_bfyx_gemm_like",2],
+        "16946947983339327902": ["convolution_gpu_bfyx_gemm_like",2],
+        "6744583842563891546": ["convolution_gpu_bfyx_gemm_like",1],
+        "8484526109354576450": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "8528886126454874796": ["convolution_gpu_bfyx_gemm_like",2],
+        "15471470494305051299": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "15466940145773097237": ["convolution_gpu_bfyx_gemm_like",1],
+        "12755991236707113150": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16774186226654475036": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "15430549683839591544": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "9277633677927827724": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "6996679663761370444": ["convolution_gpu_bfyx_gemm_like",0],
+        "1724898827344855006": ["convolution_gpu_bfyx_gemm_like",2],
+        "17758354062670710364": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "407189201971322683": ["convolution_gpu_bfyx_os_iyx_osv16",23],
+        "17620801628577659506": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5776920093461427179": ["convolution_gpu_bfyx_gemm_like",1],
+        "9404953235624894187": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "8858009650512312226": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "2850118175701764737": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "5685381761573686628": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "4942131377140353094": ["convolution_gpu_bfyx_gemm_like",1],
+        "6214194654733781771": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "15741360654354155504": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "16462033126494826292": ["convolution_gpu_bfyx_gemm_like",1],
+        "16312223896859176991": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15637565679147396649": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "16442107352245114876": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "17596685300497748803": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15489882561480858974": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6620782733027313312": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "12558716383635737426": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "16951050796024922417": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11273168411455998347": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "11919129623429545762": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4936968239673204144": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "2469138375598281399": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1593086572473375988": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "10982382214349160582": ["convolution_gpu_bfyx_gemm_like",2],
+        "15271492161940795681": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4670443882075998209": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "6772239376357727149": ["convolution_gpu_bfyx_os_iyx_osv16",470],
+        "17564338309805484464": ["convolution_gpu_bfyx_os_iyx_osv16",898],
+        "17222005830854879661": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "11077503608116183709": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "8465142022921853516": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "3541828356667081528": ["convolution_gpu_bfyx_gemm_like",2],
+        "13283842370311517843": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "14306044182355683449": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8312903198090907576": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6997121306455110286": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18305785425659656349": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "6579950270997373448": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "5828768432282043413": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8483234129545181544": ["convolution_gpu_bfyx_gemm_like",2],
+        "2946518372087114752": ["convolution_gpu_bfyx_gemm_like",2],
+        "9954050478761346921": ["convolution_gpu_bfyx_gemm_like",2],
+        "15939740070666326125": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15154934905173371714": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "4003468969524607815": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "13816748148836642416": ["convolution_gpu_bfyx_gemm_like",0],
+        "11047625525388102466": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18178391985193947355": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "11291881629276762730": ["convolution_gpu_bfyx_gemm_like",1],
+        "4678945085654662665": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "1655427025346068673": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8382355932367801226": ["convolution_gpu_bfyx_os_iyx_osv16",277],
+        "1836277956961261472": ["convolution_gpu_bfyx_gemm_like",2],
+        "427362429809315581": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3880189981766119529": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "625469553102754234": ["convolution_gpu_bfyx_gemm_like",2],
+        "5267143428977695208": ["convolution_gpu_bfyx_gemm_like",1],
+        "9724624621108712962": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "15532419087060587119": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13091799752362714688": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "2095802691829304676": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "15485011864326008444": ["fully_connected_gpu_fb_io_ref",2],
+        "1370827524176794227": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "11062005455602919062": ["convolution_gpu_bfyx_gemm_like",2],
+        "15494543914974994991": ["convolution_gpu_bfyx_gemm_like",1],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",1],
+        "2694529308199677811": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "3723613341885592267": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "5801429077171542466": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "1081287304647703427": ["convolution_gpu_bfyx_gemm_like",2],
+        "11783851440679657276": ["convolution_gpu_bfyx_gemm_like",2],
+        "10127598593949337541": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "1071090704302849258": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "7363788553442810299": ["convolution_gpu_bfyx_gemm_like",2],
+        "3497946462254198388": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "6621483425195088869": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "14503814672536990561": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "6794427012971589670": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11307531462784240962": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5032866547826271476": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "5246229312484886433": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5670530004773188380": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4827354455626446376": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "17149185480630228380": ["convolution_gpu_bfyx_gemm_like",2],
+        "7866867237563799289": ["convolution_gpu_yxfb_yxio_b16",0],
+        "13395562320893799513": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13041981853634484809": ["convolution_gpu_bfyx_gemm_like",2],
+        "16173557782125372935": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2477866283402053371": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "4286652913945761799": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "4112696777811320312": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "11022847760121601465": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "15114370307779942381": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "16749148369456398030": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "11091771531609585709": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15409755591665753258": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "1168589063110524328": ["convolution_gpu_bfyx_os_iyx_osv16",1048],
+        "17251021943762069083": ["convolution_gpu_bfyx_gemm_like",1],
+        "104321144590863458": ["convolution_gpu_bfyx_gemm_like",1],
+        "8916983923551808409": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "10068872968385049754": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12811104880512633036": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "9352385417006844121": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "16818714747882774917": ["convolution_gpu_bfyx_gemm_like",2],
+        "13592532173351964111": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12120302918788959150": ["convolution_gpu_bfyx_gemm_like",2],
+        "15470013032930986062": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "7590767013583950613": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "8202626341817892707": ["convolution_gpu_bfyx_gemm_like",0],
+        "2966185891283165994": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9759380701896779097": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7781809277449433812": ["convolution_gpu_bfyx_gemm_like",2],
+        "5221320470007950766": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "191374388179598660": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "10652512666086843369": ["convolution_gpu_bfyx_gemm_like",2],
+        "16843976559933040107": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "16609136488331186895": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5415319660821122528": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "6673753637296082820": ["convolution_gpu_bfyx_gemm_like",2],
+        "15823825508128158158": ["convolution_gpu_bfyx_gemm_like",2],
+        "8431845338648284548": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "12175297963550750804": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "12312934163571823042": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "16208488491972128275": ["convolution_gpu_bfyx_os_iyx_osv16",809],
+        "13432509006553485205": ["convolution_gpu_bfyx_gemm_like",2],
+        "7989188632557972153": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17079309368548171402": ["convolution_gpu_bfyx_gemm_like",1],
+        "10971070835319242371": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "3392632422002516166": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "5313528120127506058": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1784095455470808903": ["convolution_gpu_bfyx_gemm_like",2],
+        "2110090486638190463": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "14257161696605459633": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "11933283931932057859": ["convolution_gpu_bfyx_gemm_like",1],
+        "15585700465988560560": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "6235096928786525260": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "9389555743403158574": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11648841195768568983": ["convolution_gpu_bfyx_gemm_like",1],
+        "17522452942286240233": ["convolution_gpu_bfyx_gemm_like",2],
+        "10058165874008941852": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "5627351109775149477": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "16167185344265573939": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "11521288355888665606": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "17555040035075346152": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "7331552952865138030": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14902389080201926109": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8158983334404475382": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7127306913758514626": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "17065380294456704620": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "5331173521406046122": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "3107611675766875160": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2821441037530057414": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "3633858263279042265": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14359530849521980269": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "8550783999616052522": ["convolution_gpu_bfyx_gemm_like",2],
+        "16627410412068117729": ["convolution_gpu_bfyx_1x1",2],
+        "15210302033167762581": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "3861084063403560668": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11443268857010762276": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "10849780273184392468": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "17365039759826870533": ["convolution_gpu_bfyx_os_iyx_osv16",1070],
+        "8453402620168400406": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "15394217414267195999": ["convolution_gpu_bfyx_os_iyx_osv16",386],
+        "4059085986365258440": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "4135003545872878882": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "9968686603153440164": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "11919579121199894437": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "8104331313502492541": ["convolution_gpu_bfyx_gemm_like",2],
+        "2679903779216253668": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "6564126728704461285": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15018685799485128700": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16320454719906370247": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "13702692566238948173": ["convolution_gpu_bfyx_gemm_like",1],
+        "9626028243479089234": ["convolution_gpu_bfyx_gemm_like",2],
+        "4864384537857484286": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "6427724955844538652": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "5000147505578625898": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "11243840588602365090": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "148355059345569721": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "1471837664358450291": ["convolution_gpu_bfyx_gemm_like",2],
+        "778175413671462719": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "2213990183618003353": ["convolution_gpu_bfyx_os_iyx_osv16",311],
+        "6724516766412732606": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12018506264719915873": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "17959539037614502049": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12790788016297794214": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "14217181622713951411": ["convolution_gpu_bfyx_gemm_like",2],
+        "2984236836610169934": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "13020929028222837402": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "393130776826919699": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3934290309368153435": ["fully_connected_gpu_bf_io_gemm",2],
+        "2571186327837339204": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "3574733745204419723": ["convolution_gpu_bfyx_gemm_like",2],
+        "14363654136811880073": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "10386584706491193379": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "4161141078006269526": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3488828327160968117": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "14668529234172928874": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1190134214210434381": ["convolution_gpu_bfyx_gemm_like",1],
+        "15241636061003642501": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "4620230702710590164": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "8058623285594809047": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "3621449131285713809": ["convolution_gpu_bfyx_gemm_like",0],
+        "3435773540391994106": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "6830387121684699972": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "11934033658708880765": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "10000618285883395700": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "10841786394951910408": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "15670841106242481912": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "16881320590336043120": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16426179645101678763": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8069865332677721685": ["convolution_gpu_bfyx_gemm_like",1],
+        "2597453794298356435": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14706510405720911492": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "11994423635588727210": ["convolution_gpu_bfyx_gemm_like",2],
+        "13200151444914751729": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "11955762239379054277": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "1691554843141984381": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "7203566080268546556": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "9101018613418825655": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12077176094606956613": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3709364270141803019": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10995907213890714701": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "7385225716957197459": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10136297272678091418": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3296080624478711270": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6236857636305802170": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15534517308430424624": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7518734167761579102": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "5951228846460391670": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5301440603380967612": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12160764253455777655": ["convolution_gpu_bfyx_gemm_like",2],
+        "956022649859563080": ["convolution_gpu_bfyx_gemm_like",1],
+        "6612643056203714506": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "10944997349682267106": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "173772845058977237": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "4198666727524342442": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "2571882179292959757": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3122997634505472500": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "16370218798911151331": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "16892873598489732462": ["convolution_gpu_bfyx_gemm_like",0],
+        "5849577829817109757": ["convolution_gpu_bfyx_os_iyx_osv16",723],
+        "12983461576274227638": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "15705195224249560587": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10670104149348964875": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "8394337033015371278": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "1898912620350738645": ["convolution_gpu_bfyx_gemm_like",2],
+        "13933912937625580405": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "14852990574796128305": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "2571778193407799664": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8672860483905060438": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2124776616364429517": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17832542092610191859": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "118898027441804310": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "6172851296465788161": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "6696330836969622824": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11926378988530133568": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "16398511553605808939": ["convolution_gpu_bfyx_gemm_like",2],
+        "4510003738155830628": ["convolution_gpu_bfyx_gemm_like",1],
+        "6706491729783125139": ["convolution_gpu_bfyx_gemm_like",1],
+        "14212924711992025243": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "9034951536385533818": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "6066347819693426556": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "12153119102645240327": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10264913782610095832": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "16381344499660251151": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "17878271352732707544": ["convolution_gpu_bfyx_gemm_like",2],
+        "12617625046664709483": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "829667328391742224": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "2079353700062014100": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "5994204139128667921": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "8454760437961964894": ["convolution_gpu_bfyx_gemm_like",2],
+        "1397214434971745171": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "8378690770140438511": ["convolution_gpu_bfyx_os_iyx_osv16",58],
+        "1200058627526593421": ["convolution_gpu_bfyx_gemm_like",2],
+        "15702382940521972117": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "14547907449418439737": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6635217802203685464": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "14417033368952865805": ["convolution_gpu_bfyx_gemm_like",1],
+        "17928043901784474130": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13381833588713493653": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "9644723852089512961": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "1013207188944763398": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11031625790234068916": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "16758962840329202004": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16872172036344096583": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5124291229936820926": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "1233962450359295141": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12602356791053445447": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16469788155263456039": ["convolution_gpu_bfyx_gemm_like",2],
+        "8207349115037232863": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "16434635675895599016": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3362829461757548683": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "5163965164859517893": ["convolution_gpu_bfyx_gemm_like",2],
+        "11641605357868918146": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4674296632914491946": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "3320392060021963536": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "10747768416582634270": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "13404888565084206853": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "8025053805734757314": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17350963651826443169": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8255732638278792698": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14256842018830898376": ["convolution_gpu_bfyx_os_iyx_osv16",376],
+        "12954154886708228545": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "707979507145930311": ["convolution_gpu_bfyx_gemm_like",2],
+        "14880517974968280393": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9840495023131952174": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3359547327521773367": ["convolution_gpu_bfyx_gemm_like",2],
+        "3272017687600371031": ["convolution_gpu_bfyx_gemm_like",2],
+        "332090597573908506": ["convolution_gpu_bfyx_gemm_like",1],
+        "4134729533276761488": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "4483155585853926891": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "11493371521058673700": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "8717393423378690149": ["convolution_gpu_bfyx_os_iyx_osv16",269],
+        "7937517564893685647": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "11141999085710526242": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "5748047690737461635": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "12136029303893296753": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "14150012830816329527": ["convolution_gpu_bfyx_gemm_like",2],
+        "4299492266819967844": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "11314436000791223218": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13073788277284969422": ["convolution_gpu_bfyx_gemm_like",1],
+        "6522974911083412812": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "9599099244072080863": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "6574971185849732667": ["convolution_gpu_bfyx_os_iyx_osv16",304],
+        "9462315044265139531": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3703292222363446463": ["convolution_gpu_bfyx_os_iyx_osv16",286],
+        "16728826595086368897": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "11311890411536750673": ["convolution_gpu_bfyx_gemm_like",2],
+        "9574931298183748343": ["convolution_gpu_bfyx_gemm_like",2],
+        "17604747523124060652": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "11571049833132558023": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "12590495767805868405": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12087141795291232248": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2370837049876630969": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "10800323158234163234": ["fully_connected_gpu_fb_oi_ref",2],
+        "1208483520611545642": ["convolution_gpu_bfyx_gemm_like",2],
+        "4200340674281276565": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "5781431860747226742": ["convolution_gpu_bfyx_gemm_like",1],
+        "2588106330058954614": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "18077281411861416889": ["convolution_gpu_bfyx_gemm_like",1],
+        "15887484617041779814": ["convolution_gpu_bfyx_gemm_like",2],
+        "8954957191824520301": ["convolution_gpu_bfyx_gemm_like",2],
+        "8500612796090968552": ["convolution_gpu_bfyx_gemm_like",1],
+        "11740474593275702888": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3141554560840195766": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "10250778203413648582": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10712251675747436685": ["convolution_gpu_bfyx_gemm_like",2],
+        "18243018097656671503": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "9529614587861271730": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16495435651959280198": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6849874726361751307": ["convolution_gpu_bfyx_gemm_like",2],
+        "16937207522545573792": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15277856047844308598": ["convolution_gpu_bfyx_gemm_like",2],
+        "7145194061073256844": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "10683462376964742177": ["convolution_gpu_bfyx_1x1",2],
+        "14561847633011875566": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "18137106379929135901": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5911574919905523294": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "14686278683380845546": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9625931001541723278": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "759904421452233375": ["convolution_gpu_bfyx_gemm_like",0],
+        "2656031443043933969": ["convolution_gpu_bfyx_gemm_like",2],
+        "8484176982872847423": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17260550967427796490": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "9048522050692986204": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "2841943277631596989": ["convolution_gpu_bfyx_gemm_like",2],
+        "16295742665642026049": ["convolution_gpu_bfyx_gemm_like",0],
+        "3599823735065658574": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "18196676408993954972": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17154337492545826355": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "730498656295487620": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "981197653890885407": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15334769670416409064": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "11771014003680394135": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14802650433258854647": ["convolution_gpu_bfyx_gemm_like",2],
+        "13248218293365141596": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3419335618146360217": ["convolution_gpu_bfyx_gemm_like",2],
+        "16011429608661242565": ["convolution_gpu_bfyx_gemm_like",2],
+        "9700592037514669700": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "10721811813682112908": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12012860334670244716": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "5169676188205309169": ["convolution_gpu_bfyx_gemm_like",2],
+        "7041670015280138712": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15696864960068112631": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "4332002982390788477": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "16113302464937833403": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "5425221744593278983": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "15984373369388044924": ["convolution_gpu_bfyx_gemm_like",2],
+        "7177837234452118325": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "8976474887968287066": ["convolution_gpu_bfyx_gemm_like",1],
+        "17585852525746136080": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15124985846197662243": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14057348639391787117": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "1771663698943903325": ["convolution_gpu_bfyx_gemm_like",2],
+        "4878084041222897879": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "17795554443343871443": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "7732899312577293959": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9750510172185801133": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16833854122884184025": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "9257078583742821465": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "10994887986667360638": ["convolution_gpu_bfyx_gemm_like",2],
+        "13320828013530046693": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "1938086876393565238": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "3807725810350819929": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1118106412799660613": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1898776014554946000": ["convolution_gpu_bfyx_gemm_like",2],
+        "16461300997058854554": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "2354885756165078342": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "6948147789605707774": ["fully_connected_gpu_fb_io_ref",1],
+        "16847817828600381030": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "10714306166715959794": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "9212091835906796243": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "2343921093633784755": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "6942049339361951275": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "17356122476662104613": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12478914547444399288": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13395074742046717601": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "8451179695288093195": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "15356995665520295246": ["convolution_gpu_bfyx_gemm_like",1],
+        "13283018618260255620": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "12136625628940225638": ["convolution_gpu_bfyx_gemm_like",2],
+        "15461879919099373703": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "13031027103925431505": ["convolution_gpu_bfyx_gemm_like",0],
+        "13387804712929042302": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "2999825793036702585": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "593712935037568960": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "14558850297291634005": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "5180223624868784700": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "9353412605649860251": ["convolution_gpu_bfyx_gemm_like",2],
+        "13131740479277027362": ["fully_connected_gpu_bf_io_gemm",2],
+        "2231648183489019418": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "11612908466465510939": ["convolution_gpu_bfyx_gemm_like",2],
+        "10076578838853982233": ["convolution_gpu_bfyx_gemm_like",1],
+        "17947097500350250352": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14805212478405698245": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "5896089609470353090": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12752101288912456176": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "9524663472084054050": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11684927349056930189": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3404911902272307873": ["convolution_gpu_bfyx_gemm_like",2],
+        "10730856574108806045": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "13809218391763818477": ["convolution_gpu_bfyx_gemm_like",2],
+        "4237276338897143680": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "8708323717539569536": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "6717268005860715462": ["convolution_gpu_bfyx_gemm_like",2],
+        "8700953648388124963": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13572134043095673708": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "14459249705747952583": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "17608082492919905570": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "5351705572686943348": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8347537383976709519": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "6604223938357238686": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "389822325870173489": ["convolution_gpu_bfyx_gemm_like",2],
+        "10292243973236220688": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "14083279273292567319": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "5326247361632903583": ["convolution_gpu_bfyx_gemm_like",2],
+        "1701609125136907870": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13683797097980916261": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6995472847770703647": ["convolution_gpu_bfyx_gemm_like",2],
+        "12069726772532946193": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "6735135795253013220": ["convolution_gpu_bfyx_gemm_like",1],
+        "9340606088243696490": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11541706477255587105": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14277432520333139165": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "4680261350523889008": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2753702428731469792": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12415368596357091523": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "8319405652132127420": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "11215766166462244180": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "13881505737488515065": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "3113016029551460773": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "8336494030011542852": ["convolution_gpu_bfyx_gemm_like",2],
+        "18439017855540532958": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "2945414822360653904": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14385185911482960528": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3106710091841093202": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "414342067295883061": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "7289940394271052757": ["convolution_gpu_bfyx_gemm_like",1],
+        "6148022455516485135": ["convolution_gpu_bfyx_gemm_like",2],
+        "2481473548445286504": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "8509748651922589684": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "15230961192722285950": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14514450640485628836": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12878631058803628679": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5061053593616346116": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "17393241435373906917": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "15385506288692289568": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10918743320372308981": ["convolution_gpu_bfyx_gemm_like",1],
+        "11661208196482963286": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "16615858951735101760": ["fully_connected_gpu_fb_oi_ref",2],
+        "6603817696964851209": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "5507708258753405429": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "14445520478857662586": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "787363431787954804": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "5297273225749803700": ["convolution_gpu_bfyx_gemm_like",0],
+        "11195875185591819437": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13810716860158972470": ["convolution_gpu_bfyx_os_iyx_osv16",928],
+        "12942085219027232135": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "12529210672030682764": ["convolution_gpu_bfyx_gemm_like",1],
+        "8176520928011006903": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10613156984920928792": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9105431502075531641": ["convolution_gpu_bfyx_gemm_like",2],
+        "2220961811760955456": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9105949910901552052": ["convolution_gpu_bfyx_gemm_like",1],
+        "9116206094279111365": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "810244829776621501": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "10270203686708782941": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "15479071839425218367": ["convolution_gpu_bfyx_gemm_like",2],
+        "17835592722977214177": ["convolution_gpu_bfyx_gemm_like",2],
+        "13994738382469480124": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "11915835787294686201": ["fully_connected_gpu_fb_io_ref",2],
+        "13535031376667778809": ["convolution_gpu_bfyx_gemm_like",1],
+        "15905812449037427213": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "11992158790035075804": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6748628505489041229": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14274685812676150168": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "15222823942088272038": ["convolution_gpu_bfyx_gemm_like",2],
+        "8337820318779061494": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8844619836383523698": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "12440883214879663043": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "10887835418423052188": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "822162932339827810": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "676641023579624117": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "17303584953298149285": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "10191980053492569024": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "16125365972873290572": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13816380312874384117": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10340626080611300806": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "16763335832616216769": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7876355212013100281": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "14579042972443651846": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "16917495876041966553": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6489074577147494118": ["convolution_gpu_bfyx_gemm_like",1],
+        "761984225415608773": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "621927597604688551": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14074914477149374595": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "2297846338452062425": ["convolution_gpu_bfyx_gemm_like",2],
+        "14234254258925470171": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2768512766772748723": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "9758033083211570158": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11802527991096689252": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "1919460437053604108": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "8132521728369930959": ["convolution_gpu_bfyx_gemm_like",2],
+        "12952980509662451384": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "11198378813600875939": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "12301464827222654105": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "12565318283493666631": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "4370027682980493159": ["convolution_gpu_bfyx_gemm_like",1],
+        "14880029436467076847": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14555366228958374512": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "4232250144427804891": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "774981050284188673": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "4004333174619528327": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2567809041240246707": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "14013561425708390846": ["convolution_gpu_bfyx_gemm_like",2],
+        "15687441275464931484": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6988674007771237080": ["convolution_gpu_bfyx_gemm_like",1],
+        "2590143768280076032": ["convolution_gpu_bfyx_gemm_like",2],
+        "6656593119788274992": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "726898338396698172": ["convolution_gpu_bfyx_gemm_like",2],
+        "2307629242354292362": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4536811685836767511": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14122647818827599984": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "10394041365384258612": ["convolution_gpu_bfyx_gemm_like",2],
+        "11882021989615795558": ["convolution_gpu_bfyx_os_iyx_osv16",378],
+        "14786800939708939361": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "4959718589070770515": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "18136968124686255108": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "7630342538679060038": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "2242915551775617989": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "17614929666625976544": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15911644545988936270": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11734299455885510243": ["convolution_gpu_bfyx_os_iyx_osv16",663],
+        "1706927777850488363": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "4334698056820320220": ["convolution_gpu_bfyx_gemm_like",1],
+        "10420516636613025222": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "13558603350852076889": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "10492056481694320580": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "4276712095427918904": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "11857822504978122919": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "13025323039227543550": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "15198419554644505600": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "7536287105029319189": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "5698743977411325127": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16134637021630473012": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "2098357709530580176": ["convolution_gpu_bfyx_gemm_like",1],
+        "1779941298820543013": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "13348855287761849180": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "10011668671963948912": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "17523210737277743952": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "8065408380801722040": ["convolution_gpu_bfyx_gemm_like",1],
+        "140463250258747810": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8394085742794617896": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "1400089266180918877": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8725673763972618034": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3828569468687251275": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17318287523550546026": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13498795599230228492": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "9061025737181218101": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "4236174000795439083": ["convolution_gpu_bfyx_gemm_like",2],
+        "4445257000541366640": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "3126316723202463622": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "17264554677210911187": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "12214162812589030126": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "859377216693940737": ["convolution_gpu_bfyx_gemm_like",2],
+        "1742897526168249500": ["convolution_gpu_bfyx_gemm_like",2],
+        "17399542571019639128": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "16711955423531846725": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "10186942318345695432": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "4010650902230520983": ["convolution_gpu_bfyx_gemm_like",1],
+        "3324979924867461126": ["convolution_gpu_bfyx_gemm_like",0],
+        "13083981648347252910": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "16159852373972174245": ["convolution_gpu_bfyx_gemm_like",1],
+        "9226912483632588371": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16794102497779310636": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "6581494673640781863": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7517800202981394755": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "12631385844456089132": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6638154580507569953": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7557439160429040689": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "8069829594586311016": ["convolution_gpu_bfyx_gemm_like",2],
+        "3281411665507625899": ["convolution_gpu_bfyx_gemm_like",2],
+        "9368244029111057323": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15692223101958737604": ["convolution_gpu_bfyx_gemm_like",2],
+        "6893451271566946459": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "6370629727707634189": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "6873973504717201270": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "16043683538361975370": ["convolution_gpu_bfyx_gemm_like",2],
+        "3388752887767453958": ["convolution_gpu_bfyx_gemm_like",2],
+        "15121608487896365221": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4319047524534407016": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15677062663215157168": ["convolution_gpu_bfyx_gemm_like",2],
+        "498221230041656321": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "460346381952024719": ["convolution_gpu_bfyx_gemm_like",2],
+        "13267438341255312172": ["convolution_gpu_bfyx_gemm_like",2],
+        "6053594232298534345": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4750897775273897282": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "18204971481718743856": ["convolution_gpu_bfyx_gemm_like",2],
+        "5629670679897666607": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "18096803908321982720": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "141166664952282933": ["convolution_gpu_bfyx_gemm_like",2],
+        "13506060627438652817": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "3835286851569826052": ["convolution_gpu_bfyx_gemm_like",2],
+        "15616954046484566002": ["convolution_gpu_bfyx_gemm_like",1],
+        "264371219192743152": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "15258215535586455016": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1604661321386793876": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13374993751390784382": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "11814740669468421049": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "15262493122847269333": ["convolution_gpu_bfyx_gemm_like",2],
+        "18265020664540913473": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12602193792076781600": ["convolution_gpu_bfyx_gemm_like",2],
+        "9700098364581157575": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7945923871349397386": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6453222793515233963": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2446257282140830646": ["convolution_gpu_bfyx_gemm_like",2],
+        "7009735776703529573": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "8951503172834790833": ["convolution_gpu_bfyx_gemm_like",2],
+        "11986642867827682648": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2999633429402781278": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "9357359875134299131": ["convolution_gpu_bfyx_gemm_like",1],
+        "18400379759523099542": ["convolution_gpu_bfyx_gemm_like",2],
+        "8075453526439606224": ["convolution_gpu_bfyx_gemm_like",2],
+        "14403780921831769097": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "16605697831520435304": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "4024491643929554510": ["convolution_gpu_bfyx_gemm_like",2],
+        "16815373779430857324": ["convolution_gpu_bfyx_gemm_like",1],
+        "16541535256432192398": ["convolution_gpu_bfyx_gemm_like",2],
+        "1194267934213722567": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "10071611039987219440": ["convolution_gpu_bfyx_gemm_like",1],
+        "7227174766917523481": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "15884763176333003771": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "7457899998356343871": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "12808154347573074859": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12412224630798427948": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "7058458405375602606": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "13186342942242476803": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "15747538142554815480": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "1350953652678789564": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "15783558375979538895": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5873257164958285393": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "11232261979256657934": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "8997817508830449863": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "1003101267609305257": ["convolution_gpu_bfyx_gemm_like",2],
+        "7017157908391870084": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "1896394898744191046": ["convolution_gpu_bfyx_gemm_like",2],
+        "16865271154583564899": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7605139219344415117": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "14224121742920800990": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "9416186718345824095": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "72444706264681262": ["convolution_gpu_bfyx_gemm_like",2],
+        "13139953964389811410": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6949539207944972855": ["convolution_gpu_bfyx_gemm_like",2],
+        "11602830611894444581": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8511244943596227719": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15220874718853723626": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10093371683053539916": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "15671873744670386067": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "8576229375621297412": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "8071957466247137919": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "3737552767159920174": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11956435900037329302": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "2686152083115758704": ["convolution_gpu_bfyx_gemm_like",1],
+        "18415227597391874233": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7650874310714729923": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "17508515605648584094": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11665313746896806563": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11275526584835606578": ["convolution_gpu_bfyx_gemm_like",1],
+        "13150876648527896999": ["convolution_gpu_bfyx_gemm_like",1],
+        "17767784103977797843": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15618891972122000521": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4049276089777687996": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "1451466106918423837": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "2772149704821395618": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "435888248913413834": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3067806959725855130": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2069311169819696343": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "45545661884854912": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "6227066883925046010": ["convolution_gpu_bfyx_gemm_like",2],
+        "5277400567128489977": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "15387047026300787039": ["convolution_gpu_bfyx_gemm_like",2],
+        "5567670507334783760": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9569446666675696513": ["convolution_gpu_bfyx_gemm_like",1],
+        "5509631031571317557": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5384134329664434112": ["convolution_gpu_bfyx_os_iyx_osv16",318],
+        "1617993599154234262": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13951717514084457087": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "157805434489791310": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "14585144905582599299": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "16695020005258780885": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12038525298168664305": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9654726486719966937": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10942743767167283370": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17243953172314194409": ["convolution_gpu_bfyx_os_iyx_osv16",482],
+        "7263339400190408379": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "10670103699537731664": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "4241640917176830862": ["convolution_gpu_bfyx_gemm_like",2],
+        "8770858724416759637": ["convolution_gpu_bfyx_gemm_like",2],
+        "2622434279674583815": ["convolution_gpu_bfyx_gemm_like",1],
+        "5592428580503282095": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4035015193331696438": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "5061795324735006354": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "8623022306922454565": ["convolution_gpu_bfyx_gemm_like",1],
+        "6542417269641204414": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "16021335552443492452": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "14947798627499698329": ["convolution_gpu_bfyx_gemm_like",2],
+        "2602811890459789252": ["convolution_gpu_bfyx_gemm_like",2],
+        "7963529808900784906": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "10468108569766167175": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",360],
+        "3159681096461848644": ["convolution_gpu_bfyx_os_iyx_osv16",311],
+        "15479549936562568596": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1467428583618467133": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10306542963828398049": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13553045975561262752": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4481903208484313806": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12744887771237881196": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "13485140643204970345": ["convolution_gpu_bfyx_gemm_like",2],
+        "12693511427898130707": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18213389163198755626": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "15129834325410878425": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2574815123023594315": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "759816003617478606": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3051823462382231650": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "7107513718824525169": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",2],
+        "12545558125736154584": ["convolution_gpu_bfyx_os_iyx_osv16",571],
+        "16774728502960825097": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8942942026369874093": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "4916569245937189632": ["convolution_gpu_bfyx_gemm_like",2],
+        "3006428377575478529": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "8833400244933346226": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "2764034841399585177": ["fully_connected_gpu_fb_oi_ref",1],
+        "4294879469633231552": ["convolution_gpu_bfyx_gemm_like",2],
+        "65349392124461285": ["convolution_gpu_bfyx_gemm_like",2],
+        "1426606766274640878": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "16236397968499692493": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14313201046801286869": ["convolution_gpu_bfyx_gemm_like",2],
+        "6801897580177846120": ["convolution_gpu_bfyx_os_iyx_osv16",656],
+        "10556089809203693400": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "9497269191159495932": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "11253790393313445931": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8021962180961047152": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "10208132281050693649": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "15997754881872769378": ["convolution_gpu_bfyx_gemm_like",2],
+        "10344489318472060767": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "15938703221521364046": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "4147006350295905486": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "3664562521273273709": ["convolution_gpu_bfyx_os_iyx_osv16",208
+        ]
+    },
+    "48": {
+        "883436333317162926": ["convolution_gpu_bfyx_1x1",0],
+        "4232250144427804891": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "8787816339967963727": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "150132162949295379": ["convolution_gpu_bfyx_1x1",2],
+        "6620782733027313312": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "6603778920476932267": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "190530884420224257": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "4466647043226271996": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6980201892073961793": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "15661322183507404821": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "12014527187730671229": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "3202085450628781999": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "724953082687879224": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "2379484884827231127": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "4013707396889204359": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like",2],
+        "10893432143734884603": ["convolution_gpu_bfyx_gemm_like",2],
+        "13753473508578037346": ["convolution_gpu_bfyx_os_iyx_osv16",527],
+        "12141300895511301068": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "12165079289914715018": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "18356980026934328781": ["convolution_gpu_bfyx_os_iyx_osv16",1039],
+        "5629373398445592781": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "878892264408839067": ["convolution_gpu_bfyx_os_iyx_osv16",644],
+        "9462315044265139531": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9213563311267466388": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2968094709908141988": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "10005177465075197768": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "11709992724966310174": ["convolution_gpu_bfyx_os_iyx_osv16",125],
+        "4238885454989272754": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "17370158297470557151": ["convolution_gpu_bfyx_1x1",2],
+        "13324157125165576832": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "6324565723045697080": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "16758697697363920520": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "10890975553758439233": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13878967140838761911": ["convolution_gpu_bfyx_1x1",2],
+        "12214162812589030126": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "10135458965276110244": ["convolution_gpu_bfyx_1x1",2],
+        "9714764457768279762": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3860603464276263676": ["convolution_gpu_bfyx_gemm_like",2],
+        "15101680837342453931": ["convolution_gpu_bfyx_os_iyx_osv16",19],
+        "9404677451270692749": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "9522661528867955338": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "1103204698908514224": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "18118237182023167949": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "15908673392788376468": ["convolution_gpu_bfyx_os_iyx_osv16",642],
+        "15643135666029727865": ["convolution_gpu_bfyx_gemm_like",2],
+        "5287076386757143976": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13455881643467418059": ["convolution_gpu_bfyx_gemm_like",2],
+        "4642234334824303290": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "7575634241190730697": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13464226348405628455": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "9316082753126682958": ["convolution_gpu_bfyx_gemm_like",2],
+        "8856888761246057127": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "14766477690417085350": ["convolution_gpu_bfyx_1x1",2],
+        "1711220333751274603": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "13379165253894817165": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "11649407835105973949": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "16773645387243701837": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "8859895010324601937": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "1632416005093914709": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "5440983284868981549": ["convolution_gpu_bfyx_gemm_like",0],
+        "9947449295659685973": ["convolution_gpu_bfyx_gemm_like",2],
+        "6090625728451718945": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "1212319037405620223": ["convolution_gpu_bfyx_gemm_like",2],
+        "8540111719936129376": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "12293786134765875615": ["convolution_gpu_bfyx_os_iyx_osv16",871],
+        "3325727286860556323": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "11450378244355788918": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "3177304125602972370": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "5295693108687178880": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17152614235879767116": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "15943141845766932879": ["convolution_gpu_bfyx_1x1",2],
+        "17225552472711821360": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "2732519635571994212": ["convolution_gpu_bfyx_gemm_like",2],
+        "7649413902932043811": ["convolution_gpu_bfyx_gemm_like",1],
+        "6109013751635776331": ["convolution_gpu_bfyx_gemm_like",2],
+        "16582132711225619740": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3039528482572243879": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "1889171157980977747": ["convolution_gpu_bfyx_gemm_like",2],
+        "11893541520830049036": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "13558618754911056302": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "5349415632630235233": ["convolution_gpu_bfyx_1x1",2],
+        "11931568365395665142": ["convolution_gpu_bfyx_gemm_like",2],
+        "7375461241315602473": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "8567667881970262923": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "5779388310240896974": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "7806129039150321333": ["convolution_gpu_bfyx_gemm_like",2],
+        "9222744127882324405": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "6003409324516527726": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3673781117412048086": ["convolution_gpu_bfyx_os_iyx_osv16",1000],
+        "14916625550370402883": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "14031009077471784948": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12935563359569230797": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "1626430741965136732": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5331173521406046122": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "17948637243158994878": ["convolution_gpu_bfyx_gemm_like",2],
+        "7545013298074733778": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "15884763176333003771": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "17522452942286240233": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "8048617952947915835": ["convolution_gpu_bfyx_gemm_like",2],
+        "4892959859293355837": ["convolution_gpu_bfyx_gemm_like",1],
+        "1954052357826969119": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "12782932626966309185": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "16661843849495077745": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "9987415314864002460": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13762042713029963144": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "5740738339752793113": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "14540578324750869319": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7274179284676568361": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "3499243120652875549": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2114599010013594942": ["convolution_gpu_bfyx_gemm_like",2],
+        "15331103261044247142": ["convolution_gpu_bfyx_os_iyx_osv16",171],
+        "7084646429975006971": ["convolution_gpu_bfyx_1x1",2],
+        "13590444711975157776": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18103534417093702556": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "15011504472108164173": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12776081190690731910": ["convolution_gpu_bfyx_os_iyx_osv16",173],
+        "6556424924189200804": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "9584652777232392944": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "10014448860206587805": ["convolution_gpu_bfyx_gemm_like",2],
+        "6418327009347170687": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "16541722316343690197": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "4625107584562815965": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "7172604084103519563": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "226601879759378771": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16",110],
+        "9514210061704584354": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10914921540144371519": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13583166868754499339": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "13210604117940125947": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "5311718276151327830": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12467673564660108244": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "11315238071192463859": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8220168481755031959": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "18203935818408469865": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "11069983292783104310": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "16945184617367657570": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17109520309574369561": ["convolution_gpu_bfyx_os_iyx_osv16",298],
+        "13200151444914751729": ["convolution_gpu_bfyx_os_iyx_osv16",518],
+        "1299545313185409227": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "11756650366229979428": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "755414184406250882": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "9454954846682513038": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15155676074658242659": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7693459946348737411": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "9131183544020825260": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "6571438978296387721": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "5585398540591396124": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "7334966010680206302": ["convolution_gpu_bfyx_gemm_like",2],
+        "12427258337646070422": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5012013738970489338": ["convolution_gpu_bfyx_1x1",2],
+        "11856266545854830143": ["convolution_gpu_bfyx_gemm_like",2],
+        "8943913562339525413": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "265124365266629363": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "17512961503976896701": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "13954821927253849036": ["convolution_gpu_bfyx_os_iyx_osv16",750],
+        "14532519639619315651": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "18393312550272875456": ["convolution_gpu_bfyx_1x1",2],
+        "13115589642140732066": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "2964705957088952872": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4216958486055161753": ["convolution_gpu_bfyx_gemm_like",2],
+        "4476928353532757380": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "8708643228914766202": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "5326247361632903583": ["convolution_gpu_bfyx_gemm_like",2],
+        "991586070509079617": ["convolution_gpu_bfyx_gemm_like",2],
+        "6181651715051152713": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "482564204402769504": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "1914964404168211864": ["convolution_gpu_bfyx_gemm_like",2],
+        "577844026691991089": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8263423704888556491": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "4084516853815444743": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "2103882464623009432": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9226912483632588371": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "8762901342272872498": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "9942099207256025216": ["convolution_gpu_bfyx_gemm_like",2],
+        "16781187505186394353": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10398572248321217585": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11932770338770247767": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "10084794570892043447": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "7177837234452118325": ["convolution_gpu_bfyx_os_iyx_osv16",391],
+        "13364676690016875118": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "3622409603053918029": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10601684126917601680": ["convolution_gpu_bfyx_gemm_like",2],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "17983556812075120553": ["convolution_gpu_bfyx_1x1",2],
+        "2940027113687311893": ["convolution_gpu_bfyx_gemm_like",1],
+        "14343008518525689150": ["convolution_gpu_bfyx_1x1",2],
+        "14046990030104971367": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "8303211644727914658": ["convolution_gpu_bfyx_1x1",2],
+        "13144385730409574259": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "10706267011822108376": ["convolution_gpu_bfyx_1x1",0],
+        "6339908713513858301": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "5649082203775427830": ["convolution_gpu_bfyx_gemm_like",2],
+        "15216108478837665623": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "4560479630843098090": ["convolution_gpu_bfyx_gemm_like",2],
+        "13484950419220835364": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "16731107540370927220": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7072606962946873975": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8354579049246302728": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "6585223640997887253": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "11669828823444745889": ["convolution_gpu_bfyx_gemm_like",2],
+        "3138374672801504481": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "5159738930501638535": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "1103228955716492167": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2065752819810364738": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "10536316961655703500": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "14222482954865351228": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12353956380178079089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9748307611165615848": ["convolution_gpu_bfyx_gemm_like",2],
+        "1551596771935253711": ["convolution_gpu_bfyx_gemm_like",1],
+        "6398819277350155011": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "1154228007901031779": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "10323345824599612614": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "8463615810239412362": ["convolution_gpu_bfyx_1x1",2],
+        "7199295899520406795": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9191832520273617003": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "16352331970945217438": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "7759812946257541251": ["convolution_gpu_bfyx_os_iyx_osv16",109],
+        "2188101366183302888": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9601412379897937608": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "12022152681602871455": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12946531140050029900": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "6438522646185979880": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "4584970211859494304": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "8881150100883636392": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "17154337492545826355": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "14363654136811880073": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1143214652021653634": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "17123153447808465303": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "11956435900037329302": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "14251848023416168295": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16",109],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16",45],
+        "805221045541170643": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "15494543914974994991": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "14383657211047876136": ["convolution_gpu_bfyx_os_iyx_osv16",1000],
+        "14398854364550406668": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",2],
+        "13093429681061786539": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "11324851661119942609": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "11622925573287101001": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "15094664469997373662": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16811402686462277562": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "15555083739490354527": ["convolution_gpu_bfyx_gemm_like",2],
+        "10023279637210292010": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "4759671642533786591": ["convolution_gpu_bfyx_os_iyx_osv16",553],
+        "15781622938833984014": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "1930929857644673460": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "941829593638869991": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7924408980408826942": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "4026686872534942904": ["convolution_gpu_bfyx_os_iyx_osv16",175],
+        "1704404203639481753": ["convolution_gpu_bfyx_gemm_like",2],
+        "1997392406402548974": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "7132328255408635227": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",2],
+        "5044721291675005144": ["convolution_gpu_bfyx_1x1",2],
+        "3106911159524421371": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "9421643783312790618": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9868561386826862471": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11043866034742707103": ["convolution_gpu_bfyx_os_iyx_osv16",1071],
+        "9813748068195103720": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "4999505377862312410": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "5572956736535433608": ["convolution_gpu_bfyx_1x1",1],
+        "5649150695527000655": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "18269685060032395235": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "14558572801374416278": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "14204609663091442879": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "17891499682354369344": ["convolution_gpu_bfyx_gemm_like",2],
+        "2345023488044002149": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "6733731409232284409": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "7570346182940928159": ["convolution_gpu_bfyx_gemm_like",2],
+        "14872992823083730615": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "5277400567128489977": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7450417963648518926": ["convolution_gpu_bfyx_gemm_like",2],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "16720108310653948550": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14362876471450307424": ["convolution_gpu_bfyx_1x1",0],
+        "6303682540621797774": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "3102816736961785641": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "17225578855755054959": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",2],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",1],
+        "4079026972040047969": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",304],
+        "939718260623752240": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "4154403364889130045": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15967614281807823696": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "3024355261291518180": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "4806571630436601566": ["fully_connected_gpu_bf_io_input_spatial",4],
+        "15820359925623438341": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4834446692898125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "14466032674083938714": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "709835724029986012": ["convolution_gpu_bfyx_os_iyx_osv16",470],
+        "2638131332283395057": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7624259732952222597": ["convolution_gpu_bfyx_gemm_like",2],
+        "10576856554114055028": ["convolution_gpu_bfyx_gemm_like",2],
+        "13649894122307008732": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "12558716383635737426": ["convolution_gpu_bfyx_os_iyx_osv16",19],
+        "18377298651236993830": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "5056859994174498686": ["convolution_gpu_bfyx_gemm_like",1],
+        "16084700435355748612": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "12914986936318857086": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3062101811226530720": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "879896719155824868": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "13644681270630373984": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4282668574670785584": ["convolution_gpu_bfyx_gemm_like",2],
+        "8618835732380720921": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5269172622193124300": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "5415319660821122528": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "6025872155179042054": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "15578456771467281881": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8787438180071123604": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9280431727790048190": ["convolution_gpu_bfyx_1x1",2],
+        "9452470718398027950": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "17723621158215826108": ["convolution_gpu_bfyx_gemm_like",2],
+        "10700011669103135203": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "13973028408397200796": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "1152693503778768433": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2438374917504708831": ["convolution_gpu_bfyx_gemm_like",2],
+        "5592428580503282095": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "4124478505694604763": ["convolution_gpu_bfyx_1x1",0],
+        "14878347463243157447": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11284755586130392759": ["convolution_gpu_bfyx_os_iyx_osv16",516],
+        "9404953235624894187": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "1867337342417952506": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "15713964605078748923": ["convolution_gpu_bfyx_gemm_like",2],
+        "4082229510324076196": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4408600136502382976": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "1788455099959676873": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "394778201589371681": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6312971928547466668": ["convolution_gpu_bfyx_os_iyx_osv16",1032],
+        "13869716373706247686": ["convolution_gpu_bfyx_gemm_like",2],
+        "15334195300678132907": ["fully_connected_gpu_bf_io_gemm",2],
+        "1369161172432667462": ["convolution_gpu_bfyx_gemm_like",0],
+        "15696910741835640150": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "6860503758000008398": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "3782239800777370325": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "10642327923162019888": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "4129722446574108695": ["convolution_gpu_bfyx_1x1",2],
+        "17243648226968859637": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "11645116728396933125": ["convolution_gpu_bfyx_gemm_like",2],
+        "11913020016435860608": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "9090828337597312855": ["convolution_gpu_bfyx_gemm_like",2],
+        "8409488188696700816": ["convolution_gpu_bfyx_gemm_like",2],
+        "11893419236649064317": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "1003101267609305257": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "4850497746076450913": ["convolution_gpu_bfyx_gemm_like",0],
+        "3499645386058307669": ["convolution_gpu_bfyx_gemm_like",1],
+        "953306082374100275": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "16761856644242716357": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "13025323039227543550": ["convolution_gpu_bfyx_os_iyx_osv16",951],
+        "5374969798377773063": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "18154019240019929225": ["convolution_gpu_bfyx_gemm_like",0],
+        "1353170363915443814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "875142032423622622": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "7264274394359484318": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10880081193716628051": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14221578799010900252": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "5042176052323856983": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7590767013583950613": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "12194037100109755112": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "10482582307328548806": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "17224104246148265328": ["convolution_gpu_bfyx_gemm_like",2],
+        "654122557966242717": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "3538679039078582272": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16",378],
+        "9785114056964539323": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "18384657372655350144": ["convolution_gpu_bfyx_gemm_like",2],
+        "13176385389367548697": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "17549411807772646930": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "11873734271080160669": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "2339864165283480961": ["convolution_gpu_bfyx_1x1",2],
+        "6964383468476265892": ["convolution_gpu_bfyx_1x1",1],
+        "1471837664358450291": ["convolution_gpu_bfyx_gemm_like",2],
+        "16913004986170202203": ["convolution_gpu_bfyx_gemm_like",2],
+        "994842991399671507": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16710010075465723498": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4428101657497677982": ["convolution_gpu_bfyx_os_iyx_osv16",125],
+        "3759057398165607194": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2939605281692583169": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "12725647706191463348": ["convolution_gpu_bfyx_gemm_like",2],
+        "5805383505505929391": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "2423754482456771339": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7998930863626763670": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1900375942069325499": ["convolution_gpu_bfyx_1x1",2],
+        "1791615587935799399": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "13538051178827008933": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "17599383258252980421": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "15488340031228619748": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "8922929126299811091": ["convolution_gpu_bfyx_1x1",2],
+        "1819720745131968914": ["convolution_gpu_bfyx_gemm_like",2],
+        "10573920781439771673": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "17377293745073971167": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "6491244517639245276": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7009873605945341897": ["convolution_gpu_bfyx_gemm_like",2],
+        "2150326211917340956": ["convolution_gpu_bfyx_gemm_like",2],
+        "9208964785762052001": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "9513032457323269513": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17465517455679097501": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15979956159651515122": ["convolution_gpu_bfyx_gemm_like",2],
+        "9477562342190423343": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "4628748977913534701": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "17306482303091342504": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "12929981792125924963": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15117880293418979489": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "9480653639044390919": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "5132761922124425835": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "15249442550355454201": ["convolution_gpu_bfyx_gemm_like",2],
+        "12512751736409465214": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1643241486250690844": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "11768117585574496387": ["convolution_gpu_bfyx_gemm_like",2],
+        "17281202179589913619": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "9192665896782282996": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "6928835003016610382": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "5656623709782744241": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "8866716292621164810": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6821855018718422278": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "4897991181236908768": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17729546848373991614": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "4429109491655891299": ["convolution_gpu_bfyx_gemm_like",1],
+        "15078262396281327048": ["convolution_gpu_bfyx_gemm_like",2],
+        "7962991673727743706": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "2242829490403202087": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18419183012101393192": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "4299492266819967844": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "15838113905712517735": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1207026216972160297": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "1996860183441418841": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12166852830214895457": ["convolution_gpu_bfyx_1x1",2],
+        "18431306649860116380": ["convolution_gpu_bfyx_gemm_like",1],
+        "14418429155823196539": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "14695781272831602408": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "7700321970687976931": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "14614844213016502202": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "15901724303713479611": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10109431802089940590": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "13078401519973360182": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8002233052700666718": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "4533786844080178561": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14403132596827435096": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "14184895905338394239": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "3600066510593746268": ["convolution_gpu_bfyx_os_iyx_osv16",643],
+        "6800893510381991731": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "3689722043202617487": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "14352303529756685990": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "3988024997010367546": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "11022847760121601465": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "6712698149192186833": ["convolution_gpu_bfyx_gemm_like",2],
+        "7561096442572829049": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "5319668297345215520": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "7565221050911842393": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "1435153323458789173": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "12802517759474139810": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "16617945088781950664": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "13596876807637507229": ["convolution_gpu_bfyx_1x1",2],
+        "16327433707667075261": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8132521728369930959": ["convolution_gpu_bfyx_gemm_like",2],
+        "5688478347124565305": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "17106086048442658788": ["convolution_gpu_bfyx_gemm_like",2],
+        "15047676717402283805": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "11619548409913646265": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "291868903926685441": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "7353563160591978243": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "10118395047539851751": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "4444730303823507621": ["convolution_gpu_bfyx_gemm_like",0],
+        "314054598858070952": ["convolution_gpu_bfyx_gemm_like",1],
+        "2912858944747613525": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "18210370419559876426": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "7918742312252115870": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "8133587696326295326": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "10294610483561043024": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15790005937034794347": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "10724501418439612080": ["convolution_gpu_bfyx_os_iyx_osv16",643],
+        "579781312141502576": ["convolution_gpu_bfyx_1x1",2],
+        "2704063557078535883": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "2984726467649419856": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "8616686489737649890": ["convolution_gpu_bfyx_os_iyx_osv16",923],
+        "2502125887857336825": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "1742897526168249500": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "15847413004526420496": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "13459514533473657102": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "6214194654733781771": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "6635217802203685464": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3522383297921565178": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "15322019609805777935": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "5657471280535146301": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4994591211723226974": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "1218323229202187514": ["convolution_gpu_bfyx_gemm_like",2],
+        "15112599407339712681": ["convolution_gpu_bfyx_1x1",2],
+        "7954972694876158422": ["convolution_gpu_bfyx_1x1",2],
+        "11951606039079763598": ["convolution_gpu_bfyx_gemm_like",1],
+        "10408322429232132983": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "16135569134646688251": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "10000618285883395700": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "2609454334520044465": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "13608239208821071914": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "14057348639391787117": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "3872151366780051246": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1157947252370351851": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "7840653268996892538": ["convolution_gpu_bfyx_gemm_like",0],
+        "14599780481362761532": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "708452703070938673": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "15428591250165788477": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "8929453032482114162": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "7941729567451949422": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9152433123828445089": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6670327979947471550": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "216603198215625772": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8792202318168046223": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "481328129206881674": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "4229105529069729944": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "10429613013253088132": ["convolution_gpu_bfyx_gemm_like",2],
+        "16362139250976572928": ["convolution_gpu_bfyx_os_iyx_osv16",196],
+        "4236174000795439083": ["convolution_gpu_bfyx_gemm_like",2],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "13483175684542464385": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "85050336704401597": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "16884396694505987920": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "669771152920944125": ["convolution_gpu_bfyx_gemm_like",0],
+        "11604111639041106489": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "14123081378489325832": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "12809199739984715013": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4911903898045460096": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4531222427159927606": ["convolution_gpu_bfyx_gemm_like",2],
+        "1332624116953483870": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "5911282942658469852": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16020916772006653269": ["convolution_gpu_bfyx_1x1",2],
+        "6553736978928374036": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "4897448054295474302": ["convolution_gpu_bfyx_gemm_like",2],
+        "10071449674652717890": ["convolution_gpu_bfyx_gemm_like",2],
+        "15239764240622554314": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15235409162483701027": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "4099859307693687554": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "16234606052818596502": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "14962768577232034246": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "7903891232234389925": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "1436052878894538927": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "4670443882075998209": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "15800447082078291243": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15595549493819416194": ["convolution_gpu_bfyx_os_iyx_osv16",484],
+        "12917241193304093727": ["convolution_gpu_bfyx_gemm_like",2],
+        "12382399034878624010": ["convolution_gpu_bfyx_gemm_like",2],
+        "14902389080201926109": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "9702618600245321109": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "15209909241815414156": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",2],
+        "7800015766976654402": ["convolution_gpu_bfyx_gemm_like",2],
+        "5183231560876991543": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "10130171279527667782": ["convolution_gpu_bfyx_gemm_like",0],
+        "15959543980008442942": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "15003778740401601065": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "17724604495865223459": ["convolution_gpu_bfyx_gemm_like",2],
+        "12675840135830047968": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3036808833459559381": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "4914474312076193952": ["convolution_gpu_bfyx_gemm_like",0],
+        "10169992769527680821": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "11499219760597131534": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "1201692134690347847": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "11459784003592366395": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "1594612401422787491": ["convolution_gpu_bfyx_gemm_like",2],
+        "12228610148087508521": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "11703557271443535142": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "2007192658799516915": ["fully_connected_gpu_bf_io_gemm",1],
+        "3017891343734146267": ["convolution_gpu_bfyx_os_iyx_osv16",855],
+        "49948277487706148": ["convolution_gpu_bfyx_1x1",1],
+        "15916505622570323098": ["convolution_gpu_bfyx_os_iyx_osv16",45],
+        "4163359403543480821": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "17427036330773218054": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "16043683538361975370": ["convolution_gpu_bfyx_gemm_like",2],
+        "10591379189397010097": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "10433541468308381909": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "3041752019114501584": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "10328182165125764988": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "7143510787416483146": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "9723314434598141024": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3928266232090746643": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "7139714914586273766": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "7817036102984218692": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "59739211822469868": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "5482851829165191681": ["convolution_gpu_bfyx_os_iyx_osv16",642],
+        "15899192375330393731": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "6345550009198921347": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2439993891369206440": ["convolution_gpu_bfyx_1x1",2],
+        "10892456883214928095": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "2920840796593281126": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "10728212277329722684": ["convolution_gpu_bfyx_gemm_like",2],
+        "4252157815622916471": ["convolution_gpu_bfyx_1x1",2],
+        "17824431042110985323": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "1485662490111767875": ["fully_connected_gpu_fb_io_b8_f8_vload",1],
+        "9383182168277796969": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "9525535670799618110": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "9205978149692979955": ["convolution_gpu_bfyx_gemm_like",2],
+        "220326805056361171": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "654821507679356726": ["convolution_gpu_bfyx_os_iyx_osv16",621],
+        "17791773192152464021": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16692569816843207989": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "11275109735493317886": ["convolution_gpu_bfyx_gemm_like",2],
+        "9270950131920019932": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "8464582977975377118": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12159582810513550491": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7824524940405130010": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14994322266840011040": ["convolution_gpu_bfyx_gemm_like",2],
+        "2625969259447793593": ["convolution_gpu_bfyx_1x1",2],
+        "11455518069358829249": ["convolution_gpu_bfyx_os_iyx_osv16",419],
+        "14532844474906286088": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "3063055767192991776": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "5390559917122707732": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "7398196853452900099": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "8207349115037232863": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "10290107543739998181": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "6254141935545262078": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3011188207492335920": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9040145293899470160": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "15672624168541469192": ["convolution_gpu_bfyx_gemm_like",2],
+        "8650948093564284852": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "11530101016435264783": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "6656593119788274992": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "14429081455612806819": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17089801601582809764": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "3563872903821081702": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16839741351990811959": ["convolution_gpu_bfyx_gemm_like",2],
+        "12391792381149655331": ["convolution_gpu_bfyx_gemm_like",2],
+        "4856470441452830056": ["convolution_gpu_bfyx_gemm_like",2],
+        "5673972310424776040": ["convolution_gpu_bfyx_gemm_like",2],
+        "11901740241052104941": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "5179760459095053114": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3218248162832023196": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "12946540633035976364": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "10916647716124396856": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "9406763539724266157": ["convolution_gpu_bfyx_1x1",2],
+        "2597453794298356435": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8321769923556905957": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "3816674884393241704": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "18436249934780056991": ["convolution_gpu_bfyx_os_iyx_osv16",1048],
+        "2909728331855309274": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "708747442142592697": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "2903605246599054308": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "5927467766675317093": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "11609821372586026178": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "3820661057776133570": ["convolution_gpu_bfyx_1x1",2],
+        "9631545863582097486": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "12069726772532946193": ["convolution_gpu_bfyx_os_iyx_osv16",132],
+        "9502195532658935521": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10722677916294015259": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "5597908143491399643": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16",486],
+        "6101196122606108273": ["convolution_gpu_bfyx_gemm_like",2],
+        "5245308722062496788": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "10292585962794261197": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "6949539207944972855": ["convolution_gpu_bfyx_gemm_like",2],
+        "16403423801823379909": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "13912843078550000960": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "9819596940685093690": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "4959403414256988744": ["convolution_gpu_bfyx_gemm_like",1],
+        "17381516856910544374": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "7565867291827884997": ["convolution_gpu_bfyx_gemm_like",2],
+        "2762489653422414995": ["convolution_gpu_bfyx_gemm_like",2],
+        "14947798627499698329": ["convolution_gpu_bfyx_gemm_like",2],
+        "17010172246526353957": ["convolution_gpu_bfyx_1x1",2],
+        "11929531534620071758": ["convolution_gpu_bfyx_os_iyx_osv16",233],
+        "2826762745628486040": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "7748233564411787605": ["convolution_gpu_bfyx_gemm_like",2],
+        "3889519976910355277": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "11797601971796699898": ["convolution_gpu_bfyx_gemm_like",2],
+        "11025471731438443683": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "331661172067077796": ["convolution_gpu_bfyx_1x1",2],
+        "13497225521878034159": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18174857480705846286": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "1663285216972929652": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "5334190564423375247": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "693883892843558363": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "6193161166790398003": ["convolution_gpu_bfyx_gemm_like",2],
+        "15109847707903824859": ["convolution_gpu_bfyx_1x1",2],
+        "7862815466573236157": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like",2],
+        "14686278683380845546": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14835309921389262864": ["convolution_gpu_bfyx_1x1",1],
+        "3265415000818832667": ["convolution_gpu_bfyx_gemm_like",2],
+        "5303970743736042689": ["convolution_gpu_bfyx_gemm_like",0],
+        "10774528268153772208": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10682300249493137042": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "3498490999014554104": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "721174714308243785": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "7369903937189508744": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "6065819201836017182": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "14359530849521980269": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "14026537760442360645": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11169292427557543138": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "10572945270796129630": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "60509335250891515": ["convolution_gpu_bfyx_gemm_like",2],
+        "12515465135362865565": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "5352061583962489055": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3154539627593235077": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "15817443774186015593": ["convolution_gpu_bfyx_1x1",2],
+        "4455369117448405874": ["convolution_gpu_bfyx_1x1",1],
+        "2781309272856442321": ["convolution_gpu_bfyx_1x1",1],
+        "8857763129101380288": ["convolution_gpu_bfyx_os_iyx_osv16",267],
+        "5570311824197099845": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "77073286362822723": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "15759530339367380982": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "4615708568396290002": ["convolution_gpu_bfyx_1x1",2],
+        "17443356777503458523": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17207560805775399864": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2608363732937932266": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "2934519615045138808": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "1697248235682953135": ["convolution_gpu_bfyx_gemm_like",1],
+        "16884228931101540030": ["convolution_gpu_bfyx_os_iyx_osv16",1018],
+        "13190888313721073437": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "10128143628088846123": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "14749947225382670869": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "11661208196482963286": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "7009735776703529573": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "14281201038135286621": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "14458851250685872417": ["convolution_gpu_bfyx_gemm_like",0],
+        "192209423643075326": ["convolution_gpu_bfyx_gemm_like",2],
+        "16511749893955141055": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "18133334552107213128": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "3863816884636503247": ["convolution_gpu_bfyx_gemm_like",0],
+        "12510951219501865365": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9562527071055150197": ["convolution_gpu_bfyx_1x1",1],
+        "10787747981914307179": ["convolution_gpu_bfyx_1x1",1],
+        "14744368497944610864": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "11207257238719531888": ["convolution_gpu_bfyx_gemm_like",2],
+        "17015791782274123780": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "8306337702797456793": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "7271236108345900406": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14116800584981026541": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "1316444335300814745": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3499109651698979012": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "4769003637955328938": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18035673326929466074": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11565861421381730304": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8690196189594920365": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9321208819255762521": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "17536308070854915513": ["convolution_gpu_bfyx_1x1",0],
+        "548663565933738403": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",0],
+        "2008424849669196225": ["convolution_gpu_bfyx_1x1",1],
+        "11239541755868028928": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "3974589991022739479": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "10110395703775498948": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "3017411837779243878": ["convolution_gpu_bfyx_gemm_like",2],
+        "10055549084854766170": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "18012549942299450620": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "13468081302022888489": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "7975810844103449438": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "1107027047188366075": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "4355933224673863178": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "8780604510524622314": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "522181557896569275": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "3087801652564627458": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "9840495023131952174": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "17759505449240263390": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "8709632541892447149": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "861419637283812778": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "2862999234347597091": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12937333118472722002": ["convolution_gpu_bfyx_gemm_like",2],
+        "17542414935564676110": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "11723735945517472199": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",1],
+        "6195916781434462809": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17301887391757619741": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "3120553928584920777": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "13352000946213986936": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "2281119269283845320": ["convolution_gpu_bfyx_os_iyx_osv16",570],
+        "959260710517842876": ["convolution_gpu_bfyx_gemm_like",1],
+        "3643250372952944907": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "3916913157877412361": ["convolution_gpu_bfyx_os_iyx_osv16",250],
+        "9941035405796680081": ["convolution_gpu_bfyx_1x1",2],
+        "9737565171095493297": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15586047342916704364": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "1952863937205473292": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "8439950151963452285": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "72444706264681262": ["convolution_gpu_bfyx_gemm_like",2],
+        "10923480230259977438": ["convolution_gpu_bfyx_1x1",1],
+        "16075006181495932250": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2802810524370514276": ["convolution_gpu_bfyx_gemm_like",0],
+        "4718716595177056289": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "713121569924250372": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16",486],
+        "13124342334495538095": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "10022487076451608714": ["convolution_gpu_bfyx_gemm_like",2],
+        "537074122417021898": ["convolution_gpu_bfyx_gemm_like",2],
+        "7393601059996816014": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "5963901433137582265": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "9594594523961285945": ["convolution_gpu_bfyx_os_iyx_osv16",924],
+        "11970881115757095265": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "8614534946699754256": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4346591404756288097": ["convolution_gpu_bfyx_gemm_like",2],
+        "3644282167178264526": ["convolution_gpu_bfyx_gemm_like",1],
+        "7372956570616880244": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "5994204139128667921": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "14088382963493477342": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "11806402239500046867": ["convolution_gpu_bfyx_gemm_like",0],
+        "10308113903347312964": ["convolution_gpu_bfyx_gemm_like",2],
+        "7678457226823073886": ["convolution_gpu_bfyx_os_iyx_osv16",926],
+        "12962552332511702682": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2969389503332309296": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "10672380526821947133": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5197105253412476591": ["convolution_gpu_bfyx_gemm_like",2],
+        "905780459938651623": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "8532217744217419503": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "419783127503173016": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "5584432943673435454": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "9860570706348640782": ["convolution_gpu_bfyx_gemm_like",2],
+        "2857337999074313592": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13540002981450186147": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "2369451367723962073": ["convolution_gpu_bfyx_1x1",1],
+        "12822126914959112382": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17219920118109316867": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "761169277744593430": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "4772696293208603817": ["convolution_gpu_bfyx_gemm_like",1],
+        "16397733032387984819": ["convolution_gpu_bfyx_os_iyx_osv16",316],
+        "11597391933877736800": ["convolution_gpu_bfyx_gemm_like",2],
+        "10930640103080573253": ["convolution_gpu_bfyx_1x1",0],
+        "529543453251381109": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "4228437925117070319": ["convolution_gpu_bfyx_1x1",2],
+        "10797908931694274013": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "4226968857681929488": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "826850797666395121": ["convolution_gpu_bfyx_gemm_like",2],
+        "7602222004475424358": ["convolution_gpu_bfyx_gemm_like",1],
+        "10330180429524641331": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "4491380839102267034": ["convolution_gpu_bfyx_gemm_like",1],
+        "388828310152538138": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "7264756313770306662": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "17774424004510360936": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "7818381040882768404": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "10279778381617181802": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17746215841755337461": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10991423760161409883": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12450814729547235386": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "11087413527078604815": ["convolution_gpu_bfyx_gemm_like",2],
+        "11718418772370938734": ["convolution_gpu_bfyx_os_iyx_osv16",840],
+        "15133468875250992696": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "9275303306340702111": ["convolution_gpu_bfyx_gemm_like",2],
+        "2554991397391195611": ["convolution_gpu_bfyx_gemm_like",2],
+        "6307939332939714967": ["convolution_gpu_bfyx_1x1",1],
+        "4447065688824381344": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "16789245987103323406": ["convolution_gpu_bfyx_gemm_like",2],
+        "12003323477818208825": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "5010119207726811326": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "9741607635826869269": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5115661026367632863": ["convolution_gpu_bfyx_os_iyx_osv16",12],
+        "3350601287664242323": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5353552956675518468": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "9731370183088819573": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "16384186388687043048": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "4104562704039821482": ["convolution_gpu_bfyx_1x1",1],
+        "11936530628363072904": ["convolution_gpu_bfyx_gemm_like",2],
+        "11070620435959083971": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "18270587701371596297": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "13898284586432291433": ["convolution_gpu_bfyx_gemm_like",2],
+        "14729854278671832528": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "54975980454651672": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8746621720912032145": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9389555743403158574": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16292848987976256449": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "18034648276860485300": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16504962609450876148": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "6471563320494376693": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "7697369026397443797": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "9519623751582710696": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "5807196005360653656": ["convolution_gpu_bfyx_gemm_like",2],
+        "15488550074426713959": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "11031625790234068916": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "12936220888307335332": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "4073467095502162430": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "621915374938805401": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "4754967381316623440": ["convolution_gpu_bfyx_gemm_like",2],
+        "8913823292181409151": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "12972798847556569913": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "17793292063552633023": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "6087091876057515304": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8272823732258536202": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16105073808368936420": ["convolution_gpu_bfyx_gemm_like",2],
+        "4161141078006269526": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "378801963103874857": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "8317673282128335201": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "14224121742920800990": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "1561225943337590599": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "5659168916726488798": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "13597240991532942069": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "2124033349728954551": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "6673966852801136416": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "14484890926084856480": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "18008552719153887303": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "970768445746568749": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4180325737406616940": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "17174919737114915467": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5124080536266387783": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3480732841490521799": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "7481256533438761028": ["convolution_gpu_bfyx_os_iyx_osv16",1069],
+        "3491333679577961640": ["convolution_gpu_bfyx_gemm_like",2],
+        "1841155673858789206": ["fully_connected_gpu_fb_oi_ref",1],
+        "13853630125050609175": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "8576733135863336233": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "4142978475842207311": ["convolution_gpu_bfyx_gemm_like",2],
+        "5145853681977610916": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "743941460026466526": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "2668729552208169959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1033385936344875354": ["convolution_gpu_bfyx_gemm_like",2],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "15289152041466330689": ["convolution_gpu_bfyx_os_iyx_osv16",267],
+        "11327228813412934262": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "178353385245384751": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "11987564534722442223": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "15231987838322151865": ["convolution_gpu_bfyx_1x1",2],
+        "852092858392507925": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "13268525255152984893": ["convolution_gpu_bfyx_os_iyx_osv16",923],
+        "17021925795809437171": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7104756264011682902": ["convolution_gpu_bfyx_gemm_like",1],
+        "13553263424160050064": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "14763982961176216679": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "5336120047683197088": ["convolution_gpu_bfyx_gemm_like",2],
+        "5303170164698694791": ["fully_connected_gpu_bf_io_gemm",1],
+        "4720851194954041037": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "4118073384938355655": ["convolution_gpu_bfyx_os_iyx_osv16",1050],
+        "10306542963828398049": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "10989937450490049763": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "7000524935770116969": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3001615302961701154": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like",2],
+        "18199526506796726885": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11062100629646715785": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4623542918584461522": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "10747988576436391912": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "7351733901977025859": ["convolution_gpu_bfyx_os_iyx_osv16",393],
+        "6204183474669103812": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "1018687388655376483": ["convolution_gpu_bfyx_gemm_like",1],
+        "863057075064640334": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "7603319690872333930": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10727592780669452048": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "10414903047695486119": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "17713034180977313726": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "16667887002111125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "12801481303602178879": ["convolution_gpu_bfyx_gemm_like",2],
+        "11083993858285515074": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "17798636687709019154": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "12864204111424196179": ["convolution_gpu_bfyx_1x1",1],
+        "11627532066884923848": ["convolution_gpu_bfyx_1x1",0],
+        "1520529227443340435": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "5211831143687501130": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "475043738497218394": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "15924583510704449214": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "8146945902795164796": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "13851851281384416649": ["convolution_gpu_bfyx_1x1",1],
+        "14738573151275130683": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "11806105193035393795": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "16011429608661242565": ["convolution_gpu_bfyx_os_iyx_osv16",969],
+        "5524218746051008792": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13248567106128518549": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13735180250757239202": ["convolution_gpu_bfyx_gemm_like",2],
+        "7900926714874404219": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "9421927854269492263": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "12031180482028822765": ["convolution_gpu_bfyx_gemm_like",0],
+        "5831419373611158773": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2038505773698938555": ["fully_connected_gpu_bf_io_gemm",2],
+        "5192552432194195116": ["convolution_gpu_bfyx_gemm_like",2],
+        "11726298758004767743": ["convolution_gpu_bfyx_os_iyx_osv16",114],
+        "12625112690264223217": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15857087373591747006": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13890118723041457532": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "17310332946322628458": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "13374993751390784382": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "14447191095937730964": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "5115134711994944288": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16",486],
+        "14883438809987378616": ["convolution_gpu_bfyx_1x1",1],
+        "3509487327001107638": ["convolution_gpu_bfyx_gemm_like",2],
+        "12985650543127289023": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "15351724241036614758": ["convolution_gpu_bfyx_os_iyx_osv16",122],
+        "13954144830230671601": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "5770286476124511234": ["convolution_gpu_bfyx_gemm_like",0],
+        "12956726277674279950": ["convolution_gpu_bfyx_os_iyx_osv16",771],
+        "15006321421735686121": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "18423051691107460439": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "8075180350084516696": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9328223957245552723": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "14424566003632608852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14301049621912707511": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "8251544171504007740": ["convolution_gpu_bfyx_gemm_like",2],
+        "8040001390872143271": ["convolution_gpu_bfyx_gemm_like",2],
+        "6798405629870473128": ["convolution_gpu_bfyx_1x1",2],
+        "2194607895573544953": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "9751582946441607796": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "16911464046178654033": ["convolution_gpu_bfyx_1x1",2],
+        "15882969506682501496": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12228963567837353733": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "3226193790517362610": ["convolution_gpu_bfyx_1x1",2],
+        "10016815108730511683": ["convolution_gpu_bfyx_gemm_like",2],
+        "7056030150365552588": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "712165731154577189": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "12348135936862667024": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "3388752887767453958": ["convolution_gpu_bfyx_gemm_like",2],
+        "5754844816339228920": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "4339711224604149541": ["convolution_gpu_bfyx_gemm_like",2],
+        "3291180926381314705": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "13646974121952099172": ["convolution_gpu_bfyx_gemm_like",1],
+        "3441335188113424896": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10037086825900566930": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "10556089809203693400": ["convolution_gpu_bfyx_os_iyx_osv16",666],
+        "11120846960057008937": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4716188972902735458": ["convolution_gpu_bfyx_gemm_like",2],
+        "13054405729329143152": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "13503688893307029975": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "13681462437496627948": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "9175450649281374948": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "7689320135952025041": ["convolution_gpu_bfyx_gemm_like",0],
+        "10917498758625273194": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "7870154008378361670": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "3106710091841093202": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "18067291256808591467": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10171373375072694210": ["convolution_gpu_bfyx_1x1",2],
+        "1801731858063091191": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "9243949750444156746": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7802311886554362782": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "11640225461345567929": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "14011124615649605281": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "8541982562061181756": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13815395589135469450": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7351401242363888463": ["convolution_gpu_bfyx_gemm_like",2],
+        "17025268985366223779": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2012181953284568566": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "10544411879329675593": ["convolution_gpu_bfyx_os_iyx_osv16",764],
+        "3831261590121101287": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "5074273865983613482": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "13338594271376045657": ["convolution_gpu_bfyx_gemm_like",0],
+        "18126685473408206840": ["convolution_gpu_bfyx_os_iyx_osv16",903],
+        "4885944395876887711": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "18424912460022156378": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5933743119393822386": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "3806761527342944195": ["convolution_gpu_bfyx_gemm_like",2],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16",485],
+        "4894227264080887361": ["convolution_gpu_bfyx_os_iyx_osv16",753],
+        "4161001033681779582": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "1075027491444288875": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17053671692908867872": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "18128162750557822655": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "11254635684957519432": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "3007637520820789085": ["convolution_gpu_bfyx_os_iyx_osv16",491],
+        "8561261337239934159": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "768720470104458759": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "15961487889420208188": ["convolution_gpu_bfyx_gemm_like",0],
+        "4239415134522959352": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "5864250949922222051": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "14263790627243107300": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7958443549125799229": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "16789135236017252073": ["convolution_gpu_bfyx_gemm_like",1],
+        "10486348549691280032": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13776178598632392721": ["convolution_gpu_bfyx_os_iyx_osv16",173],
+        "17309326904418811234": ["convolution_gpu_bfyx_os_iyx_osv16",551],
+        "15374625876485618845": ["convolution_gpu_bfyx_gemm_like",2],
+        "16863960779539003201": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "2490155559809645659": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "13320675959188615441": ["convolution_gpu_bfyx_gemm_like",2],
+        "12308359047798183133": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "12393385058735194260": ["convolution_gpu_bfyx_gemm_like",2],
+        "1706927777850488363": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "739676584505475609": ["convolution_gpu_bfyx_gemm_like",2],
+        "2096779676054335057": ["convolution_gpu_bfyx_gemm_like",2],
+        "12954154886708228545": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "2722124265986526212": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "5389189982064081933": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "9933958860597451711": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "7869916853707978306": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "16395067736440127496": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "10384537928514123040": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "9737833587413114584": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "17809920600993699808": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "1318571118468536310": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "487214150851213303": ["convolution_gpu_bfyx_gemm_like",2],
+        "8398910340371320955": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7304346312452588844": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "1680468564927032670": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3646228701104397128": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "6644418194983229139": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "17847109385592002207": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "12015336418727455195": ["convolution_gpu_bfyx_1x1",2],
+        "17922279129043570176": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "9468684953949274635": ["convolution_gpu_bfyx_gemm_like",1],
+        "14025496192869856801": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "13800760323805415740": ["convolution_gpu_bfyx_gemm_like",2],
+        "3883845471211207871": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "731825454731954517": ["convolution_gpu_bfyx_gemm_like",1],
+        "863952266514375915": ["convolution_gpu_bfyx_os_iyx_osv16",516],
+        "1540041682425757361": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "17770104464900126615": ["convolution_gpu_bfyx_1x1",0],
+        "15187035463799513424": ["convolution_gpu_bfyx_1x1",2],
+        "4091702228990140696": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7953255701516490034": ["convolution_gpu_bfyx_os_iyx_osv16",398],
+        "1044978617045366709": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "1104489643524273315": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "16587061389996963349": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "13793441296561946357": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "14335423820860953927": ["convolution_gpu_bfyx_os_iyx_osv16",881],
+        "17975017633455909321": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "17517495652165026573": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12725675221990905186": ["convolution_gpu_bfyx_gemm_like",2],
+        "818998169319147148": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16475247464223458061": ["convolution_gpu_bfyx_gemm_like",2],
+        "8100595788531468781": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "1885075753696445410": ["convolution_gpu_bfyx_os_iyx_osv16",861],
+        "16969463538496570528": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16230621843665445228": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "16027853590391209100": ["convolution_gpu_bfyx_gemm_like",0],
+        "17039993918927377002": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "11807282628372660280": ["convolution_gpu_bfyx_1x1",2],
+        "14217181622713951411": ["convolution_gpu_bfyx_gemm_like",2],
+        "3409043224171087168": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "16728762255357411770": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "6669808855737023569": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14762599606783897222": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "15591167992985613695": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "9447458159095730492": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "14100870590396726248": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "14554225625951128811": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "1628593159980574595": ["convolution_gpu_bfyx_os_iyx_osv16",175],
+        "10990741293315393791": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "3116068331849795558": ["convolution_gpu_bfyx_gemm_like",2],
+        "16293101831324587788": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1718634913016284523": ["convolution_gpu_bfyx_1x1",2],
+        "10548792624072794724": ["convolution_gpu_bfyx_os_iyx_osv16",1000],
+        "8241070786700614317": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1036010477232750453": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "14420809655798184553": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11759426200341586247": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "16490405739040977260": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "11265472910579659280": ["convolution_gpu_bfyx_gemm_like",1],
+        "9541630719145326121": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "879939701282942121": ["convolution_gpu_bfyx_os_iyx_osv16",45],
+        "10267260789603562117": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "5953754321266570854": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "13919204232414535363": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "17364712285968437405": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",2],
+        "3240102173773280414": ["convolution_gpu_bfyx_1x1",1],
+        "11020315012951440351": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "3239033622277917802": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "11717348577195224554": ["convolution_gpu_bfyx_gemm_like",2],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "16910952799476896905": ["convolution_gpu_bfyx_gemm_like",2],
+        "10404725818204494388": ["convolution_gpu_bfyx_gemm_like",2],
+        "9869959062341950047": ["convolution_gpu_bfyx_1x1",2],
+        "3398322619007806698": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "9796621763733208035": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "4121109463284708890": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "12242618640422208652": ["convolution_gpu_bfyx_gemm_like",1],
+        "11110173861174257158": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17101789600628162503": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10387844339156517393": ["convolution_gpu_bfyx_1x1",2],
+        "4750513665628842598": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "9757389422721488173": ["convolution_gpu_bfyx_1x1",1],
+        "11823205954749139338": ["convolution_gpu_bfyx_gemm_like",2],
+        "13208778119673683349": ["convolution_gpu_bfyx_os_iyx_osv16",13],
+        "17252589865292797082": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "14491949194619001237": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "9062774198518904260": ["convolution_gpu_bfyx_gemm_like",2],
+        "13781423818051299677": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "6942622405269419082": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "16025442470600124062": ["convolution_gpu_bfyx_gemm_like",1],
+        "6232363902828992968": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "8083672466967374860": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "5763440554939527411": ["convolution_gpu_bfyx_os_iyx_osv16",623],
+        "1963081583851864291": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6410682026872155392": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4220826666482500445": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "16146350476627599543": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1249137685908951501": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "5622089373755094139": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7000486794832106857": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "14546281065004619074": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17906607354577138153": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "15948383678216076358": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "14462438074931673266": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4461989328775275994": ["convolution_gpu_bfyx_gemm_like",2],
+        "5079055505117153635": ["convolution_gpu_bfyx_os_iyx_osv16",573],
+        "11158789938857558596": ["convolution_gpu_bfyx_1x1",2],
+        "11690334177981352452": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "6450532136308941035": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "5321698540631249776": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "2929715823970060874": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "1270307036687208396": ["convolution_gpu_bfyx_gemm_like",1],
+        "1120455113299469776": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "6843617687528352801": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "11398019086259011063": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11327097771110264965": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "18259656768460999562": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "938848188161536107": ["convolution_gpu_bfyx_1x1",1],
+        "12545558125736154584": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "11086699387784339943": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4505008254511324231": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "11461581290174106570": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "15223164574152266895": ["convolution_gpu_bfyx_1x1",2],
+        "1509728225855233852": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "10308431308942416781": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "7024495439434892956": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "17649961873981897621": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9481675228591993785": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "5221320470007950766": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "7082007579524697455": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "7134654288295280046": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "6845814820599174031": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "12668149981216388765": ["convolution_gpu_bfyx_os_iyx_osv16",19],
+        "3909551222373722085": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16",722],
+        "18218755616248669884": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "15178921033274918199": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "101401523793806394": ["convolution_gpu_bfyx_gemm_like",2],
+        "11595465382166985232": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "2235210915304938149": ["convolution_gpu_bfyx_gemm_like",2],
+        "16474284418841532356": ["convolution_gpu_bfyx_gemm_like",2],
+        "13713501506522022845": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6788311046557489996": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "6362428985273506890": ["convolution_gpu_bfyx_1x1",2],
+        "1569043950563130463": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13161997040644039778": ["convolution_gpu_bfyx_gemm_like",2],
+        "16833026567865627676": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "11451740938287179908": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "1497127399271219422": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17515064188391421150": ["convolution_gpu_bfyx_gemm_like",0],
+        "10935309102034762723": ["convolution_gpu_bfyx_1x1",1],
+        "8543619733732987550": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3122997634505472500": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "2632535010129224704": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "10264913782610095832": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "10058165874008941852": ["convolution_gpu_bfyx_os_iyx_osv16",551],
+        "659846949368492111": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "2730604806511016352": ["convolution_gpu_bfyx_gemm_like",2],
+        "12541834857357563605": ["convolution_gpu_bfyx_os_iyx_osv16",542],
+        "5040095338370816349": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "6531171505861182429": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "3412573508101980656": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "1192279884248226739": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "14025235562200209723": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "1208161922424418734": ["convolution_gpu_bfyx_gemm_like",2],
+        "733956743303342862": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "13369603621524676979": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4672441137336208890": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "7292351660229751817": ["convolution_gpu_bfyx_os_iyx_osv16",673],
+        "4137755981477177003": ["convolution_gpu_bfyx_os_iyx_osv16",1054],
+        "13991205023798493715": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "12004552919019936392": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2727175120437582536": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13902214851539825156": ["convolution_gpu_bfyx_gemm_like",0],
+        "16781127329510211966": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12065769091972094756": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "6233612563637601101": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "4499586349553581439": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "4991419288164762786": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "1507839533611760093": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "15688186132508213638": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "14213516751025324346": ["convolution_gpu_bfyx_gemm_like",2],
+        "17775705003104146872": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "13189392239349392492": ["convolution_gpu_bfyx_os_iyx_osv16",617],
+        "2543995971214089085": ["convolution_gpu_bfyx_os_iyx_osv16",623],
+        "2287356884312581209": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11771014003680394135": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3385797925880519845": ["convolution_gpu_bfyx_1x1",2],
+        "17423645390621980919": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "13357365044448426880": ["convolution_gpu_bfyx_1x1",2],
+        "16896833230469488924": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "726985753660756762": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "10682918518101379579": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "14578291812739325465": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "3272017687600371031": ["convolution_gpu_bfyx_gemm_like",2],
+        "15609627722687211129": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8203171222962341018": ["convolution_gpu_bfyx_gemm_like",2],
+        "755577773771316277": ["convolution_gpu_bfyx_1x1",2],
+        "4138968242532400395": ["convolution_gpu_bfyx_gemm_like",1],
+        "952318454591754214": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4861982518177129729": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "6329618009202266591": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "8257103926661643451": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14813178380338948912": ["convolution_gpu_bfyx_os_iyx_osv16",149],
+        "16698547937652264447": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6343888265369366589": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "11587239927319376658": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "16566128345135114558": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9714508918051740792": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17542176922797334839": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3341302541468955849": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9849272539053219052": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6443517114667332732": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "4773077837537775324": ["convolution_gpu_bfyx_os_iyx_osv16",750],
+        "5941852872160795604": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "8444259010311137762": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "12051595062513871723": ["convolution_gpu_bfyx_1x1",2],
+        "7171904645566467208": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "15315327794058441258": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "15636128989267984459": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "4435224497850514394": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "17009318615658405230": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15773157615731010456": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "577182964135927041": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "5041111302824362529": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3662747857062156477": ["convolution_gpu_bfyx_gemm_like",2],
+        "15031155621982459860": ["convolution_gpu_bfyx_gemm_like",2],
+        "4265693151382066296": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "8482147530539941792": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "11308583200952256245": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3362190082518348071": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9440117898128288296": ["convolution_gpu_bfyx_gemm_like",2],
+        "12225380215512887632": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "7211355951470869591": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "11772741918108731396": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "3691705516240577130": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "805131056816361237": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "6777045876155144709": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "7549378486471456156": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "14487682847898298214": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9530116228032101908": ["convolution_gpu_bfyx_1x1",2],
+        "1934379409955686502": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "11975047184326016230": ["convolution_gpu_bfyx_gemm_like",2],
+        "1700222876284611258": ["convolution_gpu_bfyx_os_iyx_osv16",858],
+        "14985755375924972050": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "5754396201681434378": ["convolution_gpu_bfyx_1x1",2],
+        "16426179645101678763": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "14283458015244508428": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3216877571075556066": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "10292243973236220688": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "6288489890578212082": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4660288622381620227": ["convolution_gpu_bfyx_os_iyx_osv16",1069],
+        "1170380397764345558": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1077773457856682663": ["convolution_gpu_bfyx_gemm_like",2],
+        "14389915292223442327": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "17342198739672369885": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "14006248791647711759": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "8045367391487213749": ["convolution_gpu_bfyx_1x1",2],
+        "16767392067294252396": ["convolution_gpu_bfyx_gemm_like",2],
+        "6093575518270471235": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "5592526760253524303": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "3304589333915676807": ["convolution_gpu_bfyx_gemm_like",2],
+        "14387756025635589673": ["convolution_gpu_bfyx_1x1",0],
+        "16705621644424684055": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "12635265188475834607": ["convolution_gpu_bfyx_os_iyx_osv16",881],
+        "17636500109629107732": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "2937907409658060025": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5115007207028125638": ["convolution_gpu_bfyx_os_iyx_osv16",643],
+        "13264617841270329349": ["convolution_gpu_bfyx_1x1",2],
+        "8809017515482311843": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "7279393739634103483": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "13738760763969959522": ["convolution_gpu_bfyx_gemm_like",2],
+        "16076153317792960383": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2242915551775617989": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "10647227605517025377": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "16037141448095945650": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "9759380701896779097": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7311120574972466702": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "6729785110495533200": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "2294318010381635693": ["convolution_gpu_bfyx_gemm_like",1],
+        "1172103288112689821": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "4282198629458668761": ["convolution_gpu_bfyx_gemm_like",2],
+        "4550028191070279999": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "10628725059172743408": ["convolution_gpu_bfyx_gemm_like",1],
+        "5637480705139132901": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12514693341682532560": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "2893564501191050837": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "10309083227104422150": ["convolution_gpu_bfyx_os_iyx_osv16",136],
+        "10883341041912056319": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13477548641580029772": ["convolution_gpu_bfyx_gemm_like",0],
+        "989564341557094953": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "15901675909820977223": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "1082586642383386489": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "12995903177757437362": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "5522698342845820411": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "18268811652302076976": ["convolution_gpu_bfyx_gemm_like",2],
+        "3334339484693730802": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "7201521533301617290": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12247991248100147706": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11799179287124317845": ["convolution_gpu_bfyx_gemm_like",0],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "8121179472578287280": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "3501882025888946886": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "1497560475414454618": ["convolution_gpu_bfyx_gemm_like",0],
+        "1364546124782880196": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "9762182215179534181": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "2809950092498355574": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "8671491767142900139": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "3216793152416217495": ["convolution_gpu_bfyx_gemm_like",2],
+        "138379779469699309": ["convolution_gpu_bfyx_gemm_like",2],
+        "2571882179292959757": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "14050124896329573468": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7187734276051878356": ["convolution_gpu_bfyx_gemm_like",2],
+        "9999553425206328238": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12134858519320245809": ["convolution_gpu_bfyx_1x1",0],
+        "14810839157236175179": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2371412124305478965": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13002363400738122017": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15675968397825708285": ["convolution_gpu_bfyx_os_iyx_osv16",171],
+        "12181889163404078773": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "6584960721513702502": ["convolution_gpu_bfyx_gemm_like",2],
+        "13192808619929896995": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "2406816735581074778": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "12415368596357091523": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "4165036357594592683": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7307271009495440764": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "13038533272699602337": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "3501667344669686338": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16053585286807864356": ["convolution_gpu_bfyx_gemm_like",1],
+        "5077214229434392730": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "15962137123591591534": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "15914342421266687768": ["convolution_gpu_bfyx_gemm_like",2],
+        "15924916465272239832": ["convolution_gpu_bfyx_gemm_like",2],
+        "360872770877634346": ["convolution_gpu_bfyx_gemm_like",0],
+        "5083163738120585821": ["fully_connected_gpu_fb_oi_ref",2],
+        "10019470094545733255": ["convolution_gpu_bfyx_gemm_like",2],
+        "852015206582470545": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2727219457659794468": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "8059328623525062913": ["convolution_gpu_bfyx_gemm_like",0],
+        "7162575953766465459": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "69439315851965666": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "8916983923551808409": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11130439225010714550": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "8402692278765063674": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",1],
+        "8906185843274300447": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "583303098958523195": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "7104309382120208659": ["convolution_gpu_bfyx_gemm_like",2],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1171681987783013074": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "4818231379191523896": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "5629670679897666607": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "12151068022697708126": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12782191856884962803": ["convolution_gpu_bfyx_gemm_like",2],
+        "13160712904661288567": ["convolution_gpu_bfyx_1x1",2],
+        "6104380778870471127": ["convolution_gpu_bfyx_1x1",2],
+        "4400247897123856252": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "11147573971701279689": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "3308770992373192529": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "15190508870639648203": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2659031931257084418": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "13855438905855887272": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7964396197946740183": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "14122213471825630433": ["convolution_gpu_bfyx_gemm_like",1],
+        "1573498199681662714": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "187352687850707150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2095802691829304676": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15106614232165315070": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "1875764913306932583": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "4738743763536059708": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "15914058104244750036": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "840202264034382558": ["convolution_gpu_bfyx_os_iyx_osv16",398],
+        "8767817856303586064": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "11507538232733291666": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13404888565084206853": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17419874083634480896": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15914107501176673997": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "2349007644347065353": ["convolution_gpu_bfyx_gemm_like",2],
+        "16117738994809548007": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "9040046051053703359": ["convolution_gpu_bfyx_gemm_like",2],
+        "8794896449397768269": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "15193403354218116460": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13296242326766100583": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11878734040194151073": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "17343050785312683560": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "18125732229366977468": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "17912189681971987483": ["convolution_gpu_bfyx_gemm_like",0],
+        "8036474422877454869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14907097142953816744": ["convolution_gpu_bfyx_gemm_like",1],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16",110],
+        "6942016672941874829": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "7755177205197405275": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "12348602762263193288": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "1652781065871883392": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2362092095402043749": ["convolution_gpu_bfyx_gemm_like",1],
+        "13938466156916423478": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "14070988879848388270": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "13973179950424276578": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "8079376692609682448": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "12796777049340516563": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "8700574100180128776": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "5781098222688514465": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",490],
+        "17839839336294937155": ["convolution_gpu_bfyx_gemm_like",2],
+        "12076058470574246054": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "6318228858846223186": ["convolution_gpu_bfyx_1x1",1],
+        "13713406612642090169": ["convolution_gpu_bfyx_os_iyx_osv16",19],
+        "9707630588260222630": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "5898740235388207878": ["convolution_gpu_bfyx_1x1",2],
+        "11992353959766718397": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "6020017927557041768": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17285815901490707654": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "13282951481330978659": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "3159681096461848644": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "12813978452097969536": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "10879218241103462088": ["convolution_gpu_bfyx_gemm_like",2],
+        "15690161340392005765": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3599823735065658574": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "17084977396231597605": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1822096761703761792": ["convolution_gpu_bfyx_1x1",2],
+        "16027456210394993913": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "2108296560864415762": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18299254635579957284": ["convolution_gpu_bfyx_1x1",2],
+        "18245935804520236353": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "6351347283201596793": ["convolution_gpu_bfyx_os_iyx_osv16",479],
+        "3806131437010910920": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "18218631037214746168": ["convolution_gpu_bfyx_os_iyx_osv16",360],
+        "8460847842045253466": ["convolution_gpu_bfyx_os_iyx_osv16",13],
+        "2273992727647793692": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13708979487306970634": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "17006133396401462698": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "1287490919205560806": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "16894871557229780934": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "5288793454052261767": ["convolution_gpu_bfyx_os_iyx_osv16",1020],
+        "6688522645556262131": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "7454366978268164047": ["convolution_gpu_bfyx_gemm_like",2],
+        "17422822627612865758": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13224814158106791463": ["convolution_gpu_bfyx_gemm_like",1],
+        "4135003545872878882": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "3316798708399098230": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "9319254979377483709": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "875400109066360897": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "9475130054420979752": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "7706714181281908433": ["convolution_gpu_bfyx_gemm_like",2],
+        "13046322179198317310": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "14331658870024759698": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "4135068756462147853": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10791067159964399241": ["convolution_gpu_bfyx_os_iyx_osv16",687],
+        "17856816245251319111": ["convolution_gpu_bfyx_os_iyx_osv16",173],
+        "941626985322260281": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "14389719202147508599": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "3003526572122876385": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2543041530639980505": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "12516911293946682547": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "17742192339816511494": ["convolution_gpu_bfyx_gemm_like",2],
+        "4792351255949877935": ["convolution_gpu_bfyx_gemm_like",2],
+        "10702465758376061967": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16",861],
+        "12577421746159122264": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "15078168059698267650": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "4084026445911476156": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "2973436171295280783": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "8819268903800581706": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "2817919813339364130": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6791806088355877039": ["convolution_gpu_bfyx_gemm_like",2],
+        "11666250400445971335": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "17329287216741045059": ["convolution_gpu_bfyx_gemm_like",2],
+        "13184662326021747000": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "18426893729833771809": ["convolution_gpu_bfyx_1x1",2],
+        "12992194515157698316": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11305232900158601613": ["convolution_gpu_bfyx_1x1",2],
+        "15123868617509445149": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "700717277178942679": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "17790026124881397912": ["fully_connected_gpu_yxfb_ref",0],
+        "2251029128552117936": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17854208422879910606": ["convolution_gpu_bfyx_gemm_like",0],
+        "11955992313739654625": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "16173557782125372935": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "4644580321919256401": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "8296551195150971668": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14808895254077106198": ["convolution_gpu_bfyx_gemm_like",2],
+        "17889864541794448203": ["convolution_gpu_bfyx_1x1",1],
+        "10811837819834149164": ["convolution_gpu_bfyx_gemm_like",1],
+        "1843555260471832708": ["convolution_gpu_bfyx_gemm_like",2],
+        "3976736548270395981": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "7500192998744460131": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "14733510474010040334": ["convolution_gpu_bfyx_gemm_like",2],
+        "14136097914489095982": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "937159502066696999": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "16469788155263456039": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "5926747396493954633": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "4353842547963164546": ["convolution_gpu_bfyx_1x1",2],
+        "6664432489777052771": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "11430400968543668873": ["convolution_gpu_bfyx_os_iyx_osv16",623],
+        "14421898375873029115": ["convolution_gpu_bfyx_1x1",2],
+        "13800387305792597325": ["convolution_gpu_bfyx_os_iyx_osv16",664],
+        "14412158605670555579": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "16820082917500285799": ["convolution_gpu_bfyx_gemm_like",2],
+        "2265784112305305260": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "6856130385095139346": ["convolution_gpu_bfyx_os_iyx_osv16",1018],
+        "4617347486560666277": ["convolution_gpu_bfyx_1x1",2],
+        "17823133607491820214": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "17778091287904736965": ["convolution_gpu_bfyx_gemm_like",2],
+        "17726079670612220433": ["convolution_gpu_bfyx_gemm_like",0],
+        "15619086801947147359": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "973966345068677905": ["convolution_gpu_bfyx_1x1",2],
+        "16816222375242496370": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5835634465164771899": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "18172711677056449158": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "12255528292506999241": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "4190912926126844643": ["convolution_gpu_bfyx_1x1",1],
+        "9144487908815767824": ["convolution_gpu_bfyx_1x1",2],
+        "9604982746455852556": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "8747430148550634190": ["convolution_gpu_bfyx_gemm_like",0],
+        "142270860894725256": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "156456996459945842": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "5524215233998361104": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "1889911210088209867": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "2967481531952454828": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "13471752029049484143": ["convolution_gpu_bfyx_gemm_like",1],
+        "706370730287471796": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "18132952464279667664": ["convolution_gpu_bfyx_1x1",2],
+        "9641089659148164809": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13558687084677943158": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "16425665058951535484": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "17994361454416813294": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3190494353583341446": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15528692642731712121": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6254161707168091438": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "15161053469199826008": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2534408579674556441": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12978370505631031751": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "12259844988981080505": ["convolution_gpu_bfyx_gemm_like",2],
+        "1155389358857780776": ["convolution_gpu_bfyx_os_iyx_osv16",421],
+        "8921636651939679647": ["convolution_gpu_bfyx_1x1",1],
+        "12118387933632797428": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "17796310681498690253": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14792528369891965810": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17011363406405852347": ["convolution_gpu_bfyx_gemm_like",2],
+        "10170577772376890221": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "10837496380266058422": ["convolution_gpu_bfyx_gemm_like",2],
+        "5682190700442712936": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "12223993560805441284": ["convolution_gpu_bfyx_gemm_like",2],
+        "16307464696265537356": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "15511138074959300404": ["convolution_gpu_bfyx_gemm_like",2],
+        "282274448389888221": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "12480527132372884168": ["convolution_gpu_bfyx_1x1",1],
+        "17421991623849671076": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9824678205469832038": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15352245788978088971": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "2780423409483867058": ["convolution_gpu_bfyx_1x1",2],
+        "8907982643256296667": ["convolution_gpu_bfyx_1x1",2],
+        "9226443907548972870": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "1334070221835422461": ["convolution_gpu_bfyx_gemm_like",2],
+        "11868551452004726281": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2877521658768725103": ["convolution_gpu_bfyx_gemm_like",1],
+        "15991460001131903561": ["convolution_gpu_bfyx_gemm_like",2],
+        "13760645810144930270": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "12531580106484042446": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "10488269059469838160": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "16833854122884184025": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "17026284168840448378": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "7585785802379042424": ["convolution_gpu_bfyx_1x1",2],
+        "10555597973766215754": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3236003754884728510": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "1920070013712913772": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "4809191606466167229": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4387041763614917736": ["convolution_gpu_bfyx_gemm_like",1],
+        "3819990462129075757": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6107031848283462574": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "13960388312976163971": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "407189201971322683": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "16609136488331186895": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "14444475853714164129": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "6727930402459775131": ["convolution_gpu_bfyx_gemm_like",2],
+        "8174040194088942964": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "4488336106517889531": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "5214654427283761256": ["convolution_gpu_bfyx_gemm_like",2],
+        "14650567822254940018": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "15378025640603637387": ["convolution_gpu_bfyx_os_iyx_osv16",284],
+        "9311802150474489673": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "6067904130482758510": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "6988674007771237080": ["convolution_gpu_bfyx_gemm_like",1],
+        "12351866693978844266": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "3835286851569826052": ["convolution_gpu_bfyx_gemm_like",2],
+        "14991602704357959545": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "13709111882513486557": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "18137106379929135901": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12278364834477923930": ["convolution_gpu_bfyx_gemm_like",2],
+        "4239133538073498792": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "14896875712028630045": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "3706994659266083979": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "14923692894655929923": ["fully_connected_gpu_bf_io_gemm",2],
+        "13102754309439605192": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "11724225282274130518": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7005509036795164602": ["convolution_gpu_bfyx_1x1",2],
+        "12566041126392848976": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "14678312911245000804": ["convolution_gpu_bfyx_os_iyx_osv16",250],
+        "14525127290591744848": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "38736266675995457": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "11560634267092054110": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8751016391945753900": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "10783630257421062891": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "7808544677773370430": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12087141795291232248": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9426665763007611385": ["convolution_gpu_bfyx_gemm_like",2],
+        "3355259926747524578": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "15217183882858251099": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5420766967862917815": ["convolution_gpu_bfyx_os_iyx_osv16",233],
+        "6708349666663292171": ["fully_connected_gpu_fb_oi_ref",1],
+        "9999955037598579164": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "3880189981766119529": ["convolution_gpu_bfyx_os_iyx_osv16",17],
+        "6310724136390087834": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "14174888981602932979": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "1644335606100150388": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "10112032316939871435": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "7546586420552408243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9220830217525628783": ["convolution_gpu_bfyx_gemm_like",2],
+        "2527276292172180386": ["convolution_gpu_bfyx_gemm_like",0],
+        "9277176009071334860": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "13961773444580398856": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "6483208845600234755": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16958329690837977102": ["convolution_gpu_bfyx_gemm_like",2],
+        "6664482192233202590": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "10157866834809927320": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "397770940444464146": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2983038203471784211": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "12831298482349900359": ["convolution_gpu_bfyx_os_iyx_osv16",250],
+        "4635570915184713874": ["convolution_gpu_bfyx_gemm_like",2],
+        "5720964268093705079": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "5019077257951332016": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "1237262535285717993": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "3682813162987778705": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "2204178900998688268": ["convolution_gpu_bfyx_gemm_like",2],
+        "509781001842353609": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "7223801044761006523": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "12129572274423886770": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "2173867324489962689": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12617625046664709483": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "13025361884606488732": ["convolution_gpu_bfyx_gemm_like",2],
+        "9390478179772073718": ["convolution_gpu_bfyx_gemm_like",1],
+        "16295660312557315941": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1701609125136907870": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12339692995143159283": ["convolution_gpu_bfyx_gemm_like",2],
+        "3128856679264648666": ["convolution_gpu_bfyx_gemm_like",2],
+        "7994179151788368291": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "13851240591038949807": ["convolution_gpu_bfyx_gemm_like",2],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "4569338575782832784": ["convolution_gpu_bfyx_gemm_like",2],
+        "5581428998642936688": ["convolution_gpu_bfyx_1x1",1],
+        "7209217811135076623": ["convolution_gpu_bfyx_gemm_like",2],
+        "12949204491386872217": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "12947341728489226671": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11066913713501760080": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2526832080529662683": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "11936419502418995274": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "11850332373794932468": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "2296581485980163665": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "3383222668132648804": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "1419073145594317633": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "10187930930336324253": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "5485749317130402302": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "3499106702307464480": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "10416622008071151225": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "3854114166348568039": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "12790570304622911607": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "60267878504897170": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "11948858355027908365": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "5242271874488296527": ["convolution_gpu_bfyx_gemm_like",2],
+        "15287650965861631130": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "13809330759308309353": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "15381833359831622179": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "14990645740260870030": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "760687670112194844": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2524029454785583409": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "6577505360421510286": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "4436244774193918646": ["fully_connected_gpu_fb_oi_ref",1],
+        "9767294641786972359": ["convolution_gpu_bfyx_gemm_like",1],
+        "17310409067211414565": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9250410390663336388": ["convolution_gpu_bfyx_gemm_like",0],
+        "3477539135137665170": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "18381791065890314250": ["convolution_gpu_bfyx_gemm_like",0],
+        "12700372241799686527": ["convolution_gpu_bfyx_gemm_like",0],
+        "13170441257780067955": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3755253206085028904": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "3192332625020432602": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "10295330953350618042": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "16172528828198474326": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9182897385081081193": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7107677063657303327": ["convolution_gpu_bfyx_1x1",2],
+        "12081835728078383819": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "2394023805427701338": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "474139120607442270": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "879005904827468163": ["convolution_gpu_bfyx_os_iyx_osv16",11],
+        "16152775342222431281": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11727227430687227444": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "16431857516454692096": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "3041612155708729812": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10947686124973711385": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "10173283505468233128": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "10128120599276549920": ["convolution_gpu_bfyx_1x1",2],
+        "1045854873741563331": ["convolution_gpu_bfyx_gemm_like",2],
+        "15739278428190392018": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "4957638663977636791": ["convolution_gpu_bfyx_gemm_like",2],
+        "11243840588602365090": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "10795104632256101599": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "3633858263279042265": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "11992625045241269569": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "13317417676446624018": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "17700958439420868719": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13972357557211413688": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "4185398348055518182": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "12894625941923144893": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "13486084204140096478": ["convolution_gpu_bfyx_gemm_like",2],
+        "17833517350994024381": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "15059549186302099880": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "13384754476437374504": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "11330591026581463934": ["convolution_gpu_bfyx_gemm_like",2],
+        "17881905640473324965": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10058614204420018541": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "17318287523550546026": ["convolution_gpu_bfyx_gemm_like",2],
+        "13300022131572486202": ["convolution_gpu_bfyx_gemm_like",2],
+        "8285478622349266483": ["convolution_gpu_bfyx_os_iyx_osv16",881],
+        "8032685176029570383": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "6681818065741882453": ["convolution_gpu_bfyx_gemm_like",2],
+        "11334122788337402526": ["convolution_gpu_bfyx_1x1",1],
+        "18202222342562516071": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "11473442921040533207": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9373353053843326128": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7937870623766562191": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "15069906408448814772": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "3534971503826416049": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "5766507688771440170": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "1752185056297124917": ["convolution_gpu_bfyx_1x1",1],
+        "9101018613418825655": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16986610822918634530": ["convolution_gpu_bfyx_1x1",2],
+        "11834683513280095384": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3963106895592011725": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14171139920084409181": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5157249499936659040": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "16567638487719493784": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "9323825370872655346": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "14823616678465136590": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like",2],
+        "6820284286806022849": ["convolution_gpu_bfyx_gemm_like",2],
+        "6300691162962736560": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "16362857896338778056": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7333511810266504718": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "3603706453982734995": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "12421204749289937399": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "12417253210787537988": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17908444616754154471": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14805540705424073865": ["convolution_gpu_bfyx_gemm_like",2],
+        "9918371346247634545": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "822162932339827810": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "9942726414918759892": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17870874477143985774": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "12794369485239257709": ["convolution_gpu_bfyx_gemm_like",0],
+        "18017913952946745878": ["convolution_gpu_bfyx_gemm_like",2],
+        "7578177053220150569": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "15247381586316467097": ["convolution_gpu_bfyx_gemm_like",2],
+        "14578867494693499627": ["convolution_gpu_bfyx_gemm_like",2],
+        "6341197991729122563": ["convolution_gpu_bfyx_os_iyx_osv16",195],
+        "872401732136570312": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14905520834426630145": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "4274425737610351312": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "787363431787954804": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "9423854233835016530": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7575675354187625951": ["convolution_gpu_bfyx_gemm_like",2],
+        "14117801387057507639": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "4408772370026995920": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "5582450255753679095": ["convolution_gpu_bfyx_1x1",2],
+        "15589007878875898942": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "13839116996827687373": ["convolution_gpu_bfyx_gemm_like",0],
+        "16924006268301179157": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "10509933181132310969": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "7843498978148810586": ["convolution_gpu_bfyx_gemm_like",2],
+        "10968768803038046390": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "1473214668483422172": ["convolution_gpu_bfyx_gemm_like",2],
+        "435888248913413834": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6323083153920795679": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "5801429077171542466": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "12609361477548272638": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9057036344533510776": ["convolution_gpu_bfyx_gemm_like",2],
+        "18310667924071639899": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "3234107167862677811": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "9589942627115344216": ["convolution_gpu_bfyx_os_iyx_osv16",848],
+        "9173631510896381179": ["convolution_gpu_bfyx_gemm_like",2],
+        "17832542092610191859": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "1857923215589370245": ["convolution_gpu_bfyx_os_iyx_osv16",250],
+        "17995371099806008878": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2613462626256090659": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "3573490922300056520": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "3224352307778512793": ["convolution_gpu_bfyx_gemm_like",0],
+        "12972634653821069685": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14985236276429954162": ["convolution_gpu_bfyx_gemm_like",1],
+        "18150429561058646714": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "2221145174704245189": ["convolution_gpu_bfyx_gemm_like",1],
+        "4640696923527766618": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "1999979442136861875": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "7322472892320910654": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15026219694198820614": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "8655315308767111198": ["convolution_gpu_bfyx_1x1",2],
+        "12107262410635772120": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "14211903923555028634": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "17303408650780384587": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "13546876216568825877": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "16706121580364790904": ["convolution_gpu_bfyx_gemm_like",2],
+        "8376077531098664520": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "8939683514448064461": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "18372284940315010254": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "11655994466278963438": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "3503893875515897267": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "10218763091060511457": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "6391847213494189692": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11942736969933408358": ["convolution_gpu_bfyx_gemm_like",0],
+        "6928136130626403937": ["convolution_gpu_bfyx_gemm_like",2],
+        "12952980509662451384": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "12136029303893296753": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "632116056424249698": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15163327502374403643": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "4056979460327024961": ["convolution_gpu_bfyx_gemm_like",0],
+        "12788968383428254917": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17477062954520561609": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "2162882863309264684": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "1450888744802985214": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "16487774205195979355": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "2418288192668085805": ["convolution_gpu_bfyx_gemm_like",2],
+        "9798585825695496550": ["convolution_gpu_bfyx_gemm_like",2],
+        "11263540528012919947": ["convolution_gpu_bfyx_1x1",2],
+        "16531824466148265247": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "7121708962074176240": ["convolution_gpu_bfyx_1x1",1],
+        "10565371760124443824": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "3372770576629463160": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "8728178019712933221": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "11795826875463204296": ["convolution_gpu_bfyx_1x1",2],
+        "16577611471466452776": ["convolution_gpu_bfyx_gemm_like",0],
+        "14435120971846098308": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "9441060601228656341": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "16947969669087411530": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "14811603003184578943": ["convolution_gpu_bfyx_gemm_like",2],
+        "14431607479949498164": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12534001599784153836": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "7397341452130124383": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "5795073619189010837": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9372916528346260712": ["convolution_gpu_bfyx_gemm_like",0],
+        "7232326270078161768": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "17248756229500447131": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5409924335138540834": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "8843585527713905568": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "18386376129938707290": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "1591199515536783245": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "15284262113150488297": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "2623687018437195679": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17222005830854879661": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "2041212737963974230": ["convolution_gpu_bfyx_gemm_like",2],
+        "2198278382394812839": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "16770615142634470903": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "16182470664818268848": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7852144838267007144": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "8025053805734757314": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "12355112948013108181": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17344974951998490453": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "4279062247055842367": ["convolution_gpu_bfyx_gemm_like",1],
+        "2728938624042183713": ["convolution_gpu_bfyx_gemm_like",2],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",0],
+        "6882621854468565774": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "1779941298820543013": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "14603590053512154268": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "3737552767159920174": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13489318651148001664": ["convolution_gpu_bfyx_gemm_like",2],
+        "2566302789609970663": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "18233660940545931789": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "16998508915819714690": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "5951936376654416075": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "1306339989221885682": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17634966178519099371": ["convolution_gpu_bfyx_1x1",2],
+        "12181607120522804433": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "10729288973933590396": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "578703329577922869": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "14230493618724018658": ["convolution_gpu_bfyx_gemm_like",2],
+        "11897113890115321056": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "10987953316324712538": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "7667898603371717971": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13094402291968806996": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "2322559721899919275": ["convolution_gpu_bfyx_os_iyx_osv16",542],
+        "17037416417174266088": ["convolution_gpu_bfyx_gemm_like",0],
+        "7394217382008802567": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "13503608041359512": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "15963038745470172423": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "15726902746983125797": ["convolution_gpu_bfyx_os_iyx_osv16",250],
+        "9381304526221508530": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "9438739171104456179": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "17087740929472936216": ["convolution_gpu_bfyx_os_iyx_osv16",847],
+        "6391201577234440562": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "11113256687741667688": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "5922142661777925178": ["convolution_gpu_bfyx_gemm_like",2],
+        "16936366288366370882": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "503369896500284129": ["convolution_gpu_bfyx_1x1",2],
+        "11528310408333718862": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8701248964531180496": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "6574971185849732667": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7974670633697926450": ["convolution_gpu_bfyx_1x1",2],
+        "2622434279674583815": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "14131851237755716991": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "15839295895890205274": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "12908594497114706897": ["convolution_gpu_bfyx_1x1",2],
+        "14821616804286068969": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13702692566238948173": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "8866736221671835567": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "10765280349477640969": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "13330734840729670622": ["convolution_gpu_bfyx_gemm_like",2],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "16446533347502650316": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "7524311370696987092": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4933831571091731212": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10302338806536775954": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "12494969618927201911": ["fully_connected_gpu_yxfb_ref",0],
+        "14764715930784496165": ["convolution_gpu_bfyx_gemm_like",2],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like",1],
+        "5479761740065152589": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "14515066741400300669": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "5815789824950542164": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "1186545671730357033": ["convolution_gpu_bfyx_os_iyx_osv16",654],
+        "3699344686791530101": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2452226948562393335": ["convolution_gpu_bfyx_os_iyx_osv16",771],
+        "3180320769716158201": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "7605139219344415117": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "2659712601063515059": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5627834277145735283": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "17216583849049249733": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "5940337324384948573": ["convolution_gpu_bfyx_gemm_like",2],
+        "3141886504884887200": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "598390166442977699": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "12090536142661253835": ["fully_connected_gpu_bf_io_gemm",2],
+        "751912075185318190": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3860667078458481972": ["convolution_gpu_bfyx_gemm_like",1],
+        "7624476043779763605": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "8300655194765375060": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "5912451559447635837": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "10085059621136526248": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "14098811155652990436": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15035800097152337587": ["convolution_gpu_bfyx_gemm_like",2],
+        "15447513376965243034": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "4202645222013675478": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3024402899381804809": ["convolution_gpu_bfyx_1x1",2],
+        "9399994156762372761": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "4983880246908724272": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "1908809004094565452": ["convolution_gpu_bfyx_os_iyx_osv16",543],
+        "18267428053198215471": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12641170321047008726": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "12868739680413736657": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "17599396373608265826": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2140514316203117958": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "2497756607567197523": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "10865695385270390803": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "6767245864232675168": ["convolution_gpu_bfyx_gemm_like",1],
+        "3635446784873718932": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "16108759090923335184": ["convolution_gpu_bfyx_gemm_like",2],
+        "18184621367843960190": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "4191326605459754690": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "9989055862610193828": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "13754540732991287617": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "2986189945936592561": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "11092828091552833150": ["convolution_gpu_bfyx_os_iyx_osv16",120],
+        "6631816968511312100": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16944335478353845609": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "11418379777288974452": ["convolution_gpu_bfyx_gemm_like",2],
+        "16341722570340169855": ["convolution_gpu_bfyx_1x1",0],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "2683507674615735878": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13526488884846845330": ["convolution_gpu_bfyx_gemm_like",2],
+        "3070859615622845671": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "671453551040072499": ["convolution_gpu_bfyx_os_iyx_osv16",644],
+        "16666792471632326054": ["convolution_gpu_bfyx_gemm_like",2],
+        "9643408025778914022": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "2057158988261512114": ["convolution_gpu_bfyx_1x1",2],
+        "3221221905804708596": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "8006738296385794413": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "18121198117765854866": ["convolution_gpu_bfyx_1x1",0],
+        "12866217660635921034": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2133849627845285277": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "1076005730007872492": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "13698389420396031586": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "856877003890134554": ["convolution_gpu_bfyx_gemm_like",0],
+        "1059505639883914386": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18173314625562011976": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3374410641320310726": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "6323026044750482867": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "54019631544204590": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7076937538747704750": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "3541538046227217664": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16071723603031305677": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "2128376438627103433": ["convolution_gpu_bfyx_gemm_like",2],
+        "4992668316921598993": ["convolution_gpu_bfyx_os_iyx_osv16",283],
+        "11754316727756881612": ["convolution_gpu_bfyx_os_iyx_osv16",851],
+        "10544034939133448916": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "10722782762733112118": ["convolution_gpu_bfyx_1x1",2],
+        "5714365398623475983": ["convolution_gpu_bfyx_1x1",2],
+        "15048584393463312977": ["convolution_gpu_bfyx_os_iyx_osv16",200],
+        "13699740641705514374": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "12245096462203481681": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "16081386644309102158": ["convolution_gpu_bfyx_gemm_like",2],
+        "13011676362747785816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18136135457402651842": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "8130920994920685157": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "14077148976508649021": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "7407975398526425554": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "14103112843209793966": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "52089503050497755": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "17170858505976681742": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15975964562807570772": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10512507780534402341": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "10136369729388564720": ["convolution_gpu_bfyx_gemm_like",2],
+        "17928043901784474130": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",490],
+        "16723478941106779069": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "16129296588866116913": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2842103889477438816": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6302958994152837045": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "13283842370311517843": ["convolution_gpu_bfyx_os_iyx_osv16",985],
+        "4880150897829846031": ["convolution_gpu_bfyx_1x1",1],
+        "875296362957469305": ["convolution_gpu_bfyx_gemm_like",1],
+        "5156033406916344703": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11634932044447867039": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "15767973630744679517": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14826791706471872785": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17638692805430115529": ["convolution_gpu_bfyx_gemm_like",1],
+        "8507854696766492454": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "11337525286386930242": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",0],
+        "6848989271874647093": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "5381578460674280089": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",2],
+        "15118142492742177336": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "14671212883301405408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16463823433924519300": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "5381354625969068789": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7880845322716481548": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14767888121198814523": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "7113777272518482528": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "7075659071934895087": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "14054116974002669018": ["convolution_gpu_bfyx_1x1",2],
+        "8951040603784899163": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "13357431438267043322": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "603883331897298932": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8153567933591966877": ["convolution_gpu_bfyx_gemm_like",1],
+        "8329846097322076175": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "9216608098626790565": ["convolution_gpu_bfyx_os_iyx_osv16",395],
+        "3750338655074082587": ["fully_connected_gpu_yxfb_ref",1],
+        "16626226341188424071": ["convolution_gpu_bfyx_os_iyx_osv16",543],
+        "7349880498513046830": ["convolution_gpu_bfyx_1x1",1],
+        "8560635685184432720": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "425744529089575241": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "17264608538692763688": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17201365233492366678": ["convolution_gpu_bfyx_gemm_like",2],
+        "17228810554159747400": ["convolution_gpu_bfyx_gemm_like",2],
+        "13251091004269229867": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13820498543284008286": ["convolution_gpu_bfyx_gemm_like",2],
+        "6760797535531423152": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "13403161389559730": ["convolution_gpu_bfyx_gemm_like",2],
+        "8140094412609934765": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "16000753982895054944": ["convolution_gpu_bfyx_gemm_like",1],
+        "7164580481046523192": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16108573960501496757": ["convolution_gpu_bfyx_gemm_like",2],
+        "15349944413643626251": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "16768797136991242472": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "13558656230312558247": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "11077876432364512822": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "3244675355773468991": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like",2],
+        "4313392430539923574": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "10532183096485321729": ["convolution_gpu_bfyx_1x1",2],
+        "2582625260054352916": ["convolution_gpu_bfyx_gemm_like",2],
+        "3725013268198063198": ["convolution_gpu_bfyx_1x1",2],
+        "16294825599850364701": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "3438296636411972401": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7585184325339753737": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "16818206615424635387": ["convolution_gpu_bfyx_1x1",1],
+        "15779837958180258409": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "4347816192417741558": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16",861],
+        "10499265278415026816": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6423785822515265784": ["convolution_gpu_bfyx_gemm_like",2],
+        "11031569203645035546": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "5735608687257018419": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "2355214244972870639": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "5497751772699578150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10753540518493641553": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "15485701086886851362": ["convolution_gpu_bfyx_os_iyx_osv16",171],
+        "13328449155966085543": ["convolution_gpu_bfyx_gemm_like",2],
+        "2438261005924916746": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "16765994345605657100": ["convolution_gpu_bfyx_1x1",2],
+        "296142385116663420": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "18122858611264877646": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5419775002149092646": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16117448559783537844": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "3105425187506203551": ["convolution_gpu_bfyx_1x1",2],
+        "73865742350616903": ["convolution_gpu_bfyx_gemm_like",1],
+        "3950738240651133849": ["convolution_gpu_bfyx_os_iyx_osv16",736],
+        "2040762223425679479": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "7688176479120305539": ["convolution_gpu_bfyx_os_iyx_osv16",918],
+        "12667014405537239093": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "10783981060353445280": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "5240706676373148280": ["convolution_gpu_bfyx_gemm_like",0],
+        "1572991986657256775": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "15530407024531326375": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "11632275875447013409": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8275277322582733101": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "2465684728484709259": ["convolution_gpu_bfyx_1x1",1],
+        "13761566845514364807": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "5060012838564094182": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "2575631797904040925": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "2294800960010879540": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "14312549767853703411": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "13893808009363736870": ["convolution_gpu_bfyx_gemm_like",1],
+        "17238880534517721334": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "18302892230881285207": ["convolution_gpu_bfyx_gemm_like",0],
+        "14959566236432790882": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "18416908414174464784": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "3337625924046561031": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "1040650352205493707": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "6334639534663495263": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13448845356783404653": ["convolution_gpu_bfyx_gemm_like",1],
+        "14289082888174784976": ["convolution_gpu_bfyx_gemm_like",0],
+        "15412447128995361859": ["convolution_gpu_bfyx_gemm_like",1],
+        "10437367877444543776": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "5120466856097219243": ["convolution_gpu_bfyx_gemm_like",2],
+        "7531346828150129063": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "14621327324047759584": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13654816209891478730": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "17224181038411430675": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "7732899312577293959": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "11077503608116183709": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8528750110601691390": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7712831597869354170": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13121297281694293907": ["convolution_gpu_bfyx_os_iyx_osv16",553],
+        "12672995204641007004": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11428599290755097395": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "9726913113016874092": ["convolution_gpu_bfyx_gemm_like",2],
+        "1452597292381229708": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "1089944493540593798": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "16264774056719724826": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "10635659193402005820": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "6222595759158615206": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13503555814874045782": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "2618108630886857741": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5498839261395459224": ["convolution_gpu_bfyx_gemm_like",1],
+        "12831123539633580270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15417738436777481469": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "1345101751956733589": ["convolution_gpu_bfyx_gemm_like",2],
+        "15628121900226431719": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17392594284473856393": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16616945998593626851": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13621339501067135142": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "16561224775421968533": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "2305461098719675735": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "7875272450497189442": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15890473622821659630": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "584086621952390547": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "517997325935712670": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "1882052795393187384": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "5589350202160007768": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13754408679115174221": ["convolution_gpu_bfyx_gemm_like",2],
+        "2525260242689556544": ["convolution_gpu_bfyx_gemm_like",2],
+        "13447028922679236865": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "6131481289104111211": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "5965451243366505522": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10435566004514173951": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "14248239982355212178": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "3012566432840424198": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "8881135571874888085": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11265079350845539239": ["convolution_gpu_bfyx_gemm_like",0],
+        "7368916076070115064": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "6931953332823066530": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4801117903303888658": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "5680236635030250712": ["convolution_gpu_bfyx_1x1",2],
+        "6830387121684699972": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "1541754036637209097": ["convolution_gpu_bfyx_gemm_like",2],
+        "14046114605615338907": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4840004190985490064": ["convolution_gpu_bfyx_gemm_like",2],
+        "1470933384474984858": ["convolution_gpu_bfyx_1x1",1],
+        "2649192407401044065": ["convolution_gpu_bfyx_gemm_like",1],
+        "10405183426600618231": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "15856268902838573812": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2114232149447438823": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4750894407873652809": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "6403698142681887543": ["convolution_gpu_bfyx_gemm_like",0],
+        "14746359019867963124": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5352861363832390974": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15770767768674603174": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10447947790216991304": ["convolution_gpu_bfyx_gemm_like",2],
+        "9763310312421884308": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "9065894438656900887": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8769060267707904998": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16290626406346691996": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "4865023158176874622": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15470013032930986062": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "16818714747882774917": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "2451712485584835395": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "3255465741612432300": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "11814740669468421049": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "12011982029561277581": ["convolution_gpu_bfyx_os_iyx_osv16",661],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "9780938731831129283": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "15497797842820949408": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15831600396403741571": ["convolution_gpu_bfyx_gemm_like",1],
+        "3737576893817599311": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "13131740479277027362": ["fully_connected_gpu_bf_io_gemm",2],
+        "6328802691680458752": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "13951717514084457087": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "10316451248440741901": ["convolution_gpu_bfyx_gemm_like",2],
+        "18221867262301937903": ["convolution_gpu_bfyx_1x1",1],
+        "4610200388191607540": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "8057302050645780813": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "5893940382830835820": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "949330876419581703": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10522649794540845800": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2226745622763268469": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "4006884370026272807": ["convolution_gpu_bfyx_gemm_like",1],
+        "17046662043776372746": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4747159205186229582": ["convolution_gpu_bfyx_os_iyx_osv16",851],
+        "276407276027553756": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "8594644182487917002": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4958222070605478947": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "16683089431066989909": ["convolution_gpu_bfyx_gemm_like",2],
+        "9585113116232600562": ["convolution_gpu_bfyx_gemm_like",2],
+        "17442105631503326136": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "17025324057045572535": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4021558014531645922": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "6719302427415173754": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "16610284927818475574": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "14630499010941056793": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4793007249026943006": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18077281411861416889": ["convolution_gpu_bfyx_os_iyx_osv16",667],
+        "5406129421969383274": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "17006095064160484022": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "10792503079194374004": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "423221712829930726": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "16547425454653232058": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "11443268857010762276": ["convolution_gpu_bfyx_os_iyx_osv16",870],
+        "15727611564408173858": ["convolution_gpu_bfyx_gemm_like",2],
+        "3273748387141431306": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "10869005786136023160": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "7370273921473161914": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5135539474649575477": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "12394049027081208902": ["convolution_gpu_bfyx_gemm_like",1],
+        "10178145641713631806": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12741457056869452536": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "17596685300497748803": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9606639214735570069": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "1920042803083729276": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "11215217005872946038": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "593712935037568960": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "2597523728660247862": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16",486],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "5594180958505308003": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "8656468860180713379": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "9131235538209388787": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "12207503176295152756": ["convolution_gpu_bfyx_1x1",2],
+        "7969848911698660033": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2683304757433993300": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "11104393974242049153": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18417288692814472127": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "13676654389512816868": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8479958930889587809": ["fully_connected_gpu_yxfb_ref",0],
+        "7457899998356343871": ["convolution_gpu_bfyx_os_iyx_osv16",571],
+        "8712136292276123857": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "3800011935243649447": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "10151922632636937118": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6204725118764552662": ["convolution_gpu_bfyx_gemm_like",1],
+        "16758962840329202004": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16",109],
+        "16710651492402564794": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "135072053401934228": ["convolution_gpu_bfyx_1x1",0],
+        "2669822154816760632": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "15154700439767512396": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "10280619408766255552": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "7026575758396092435": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "17947818179123182001": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3603187029740446600": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "17037462814585846902": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "1138439260035360722": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "7843508201826629532": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "1095495157025479260": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "18446245971488003004": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "541817615957967731": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "9660812093766156608": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17050675313067213312": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",2],
+        "11845189428639322474": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "4156384238797998294": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "12601126285773042005": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "10626341369865893888": ["convolution_gpu_bfyx_gemm_like",2],
+        "9545968464906009869": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "8965747921518186477": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "15860915170591763391": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "1351033666248868977": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12793908914872030220": ["convolution_gpu_bfyx_gemm_like",2],
+        "3788462090984291082": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "17370051888730874220": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "11583985978586657985": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "13680926356824317761": ["convolution_gpu_bfyx_os_iyx_osv16",51],
+        "10308175009371219583": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "15989894214714907271": ["convolution_gpu_bfyx_os_iyx_osv16",150],
+        "16431165572426232677": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "9259437778054905599": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "2373860353284525265": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "6192955702438301372": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "8260130048649729185": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "8790625191540101806": ["convolution_gpu_bfyx_gemm_like",1],
+        "5219399418946822456": ["convolution_gpu_bfyx_gemm_like",2],
+        "951747146164097188": ["convolution_gpu_bfyx_1x1",2],
+        "15277856047844308598": ["convolution_gpu_bfyx_gemm_like",2],
+        "10771803503544737080": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "14502856487639608696": ["convolution_gpu_bfyx_gemm_like",2],
+        "13810995219720233595": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "12112853999307505628": ["convolution_gpu_bfyx_gemm_like",2],
+        "7243917162812988891": ["convolution_gpu_bfyx_gemm_like",2],
+        "12564687330941036772": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "12024143207855886580": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "18142462471803295391": ["convolution_gpu_bfyx_1x1",1],
+        "15799159401545270696": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18186615266760475767": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "13856271274572142709": ["convolution_gpu_bfyx_gemm_like",1],
+        "10979362792894404338": ["convolution_gpu_bfyx_gemm_like",2],
+        "7727001441358508665": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6146876760962332928": ["convolution_gpu_bfyx_gemm_like",2],
+        "13418701036204748812": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "5843679089588930933": ["convolution_gpu_bfyx_os_iyx_osv16",900],
+        "2552187713769926425": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "4056971751486746551": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "8141428150264829362": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "7807983899017500046": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "13970935346154374605": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "1425953627379976115": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1383899865465106141": ["convolution_gpu_bfyx_gemm_like",1],
+        "18094205332383644037": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "5040730152867713388": ["convolution_gpu_bfyx_gemm_like",1],
+        "288853243482418538": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "5738835498104275267": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "9274179337770060652": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "14681717813022425567": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "1071007164550012186": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "8761283252495354972": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "383721620126444793": ["convolution_gpu_bfyx_gemm_like",2],
+        "9076758673133996959": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "3796274347773622633": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2627779045483019709": ["convolution_gpu_bfyx_os_iyx_osv16",92],
+        "3727142736386026852": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "14999920879568237166": ["convolution_gpu_bfyx_1x1",2],
+        "84595904778810418": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "11239754372812258455": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "11599932445375240727": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "16383540667048742064": ["convolution_gpu_bfyx_gemm_like",2],
+        "15329680728165965773": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "7548031489690889629": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "8168240543278779314": ["convolution_gpu_bfyx_1x1",1],
+        "10483664832302187567": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "2572395498687401679": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "12976499206227689731": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "11857037689248685487": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "10196332102593337214": ["convolution_gpu_bfyx_gemm_like",1],
+        "11919129623429545762": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "9111988592015450418": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11820789223587555410": ["convolution_gpu_bfyx_1x1",2],
+        "7669403041163460089": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "9728611486592854529": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17764033613416389758": ["convolution_gpu_bfyx_gemm_like",2],
+        "6139574161497189424": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13926122593957480821": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5568753513029409478": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "994182747184593564": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9048522050692986204": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7852745450437172519": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6071668124835539929": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6557428245898292304": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "16681690088928624738": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "14472187692485966933": ["convolution_gpu_bfyx_os_iyx_osv16",854],
+        "4126895998426674411": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "17281826959243966826": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15548971488532746290": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12900949103593247293": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "338716975932676215": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "4523064418696274869": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "12557015880639217508": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "17208186152576814861": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "4708035980731751007": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "16911450336605071390": ["convolution_gpu_bfyx_1x1",2],
+        "8860815977851486767": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "16711955423531846725": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "16992405636352406660": ["convolution_gpu_bfyx_gemm_like",0],
+        "3438116423688595487": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "9437794960375526230": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "6997971129340865650": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "16312223896859176991": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "8108933468437926367": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9700808806849459216": ["convolution_gpu_bfyx_1x1",2],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11744368351982723504": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15675903059949404837": ["convolution_gpu_bfyx_1x1",1],
+        "13477416097954638887": ["fully_connected_gpu_bf_io_gemm",2],
+        "3150231129728961455": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "7958459862276998225": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "2477849395789783501": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "1778345646142852816": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "13898821685774165645": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "7469127846325904854": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "8961138963663532667": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12319073009094248232": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "16566214123371867456": ["convolution_gpu_bfyx_gemm_like",2],
+        "17104611871050967957": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "17830290099875088207": ["convolution_gpu_bfyx_gemm_like",2],
+        "14147460733160099960": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16218339663410630711": ["convolution_gpu_bfyx_gemm_like",2],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "9091110033424983286": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "7715649642603303319": ["convolution_gpu_bfyx_1x1",1],
+        "18271689282126907793": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "12590922530749026871": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "3779229442395464456": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12040626513219974957": ["convolution_gpu_bfyx_os_iyx_osv16",901],
+        "13830605041347009953": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "1622880009460832832": ["convolution_gpu_bfyx_os_iyx_osv16",301],
+        "14206076551739831333": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10861769381993948050": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "6870942166356599956": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "5876880412336151866": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8931169575495985034": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "956022649859563080": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "4701832665603867798": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "6642767323474835034": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4325081100430903742": ["convolution_gpu_bfyx_gemm_like",2],
+        "9423958333298993923": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "11657946392097042544": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16849652692746541462": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9043982883185435219": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "8270591002934311024": ["convolution_gpu_bfyx_1x1",2],
+        "10546430708947911124": ["convolution_gpu_bfyx_gemm_like",2],
+        "9488453013746383896": ["convolution_gpu_bfyx_gemm_like",2],
+        "18255227391100087860": ["convolution_gpu_bfyx_1x1",2],
+        "488298169768725160": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "10415046594066474634": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "10864011008000364415": ["convolution_gpu_bfyx_1x1",2],
+        "4196367396954155354": ["convolution_gpu_bfyx_gemm_like",0],
+        "3752171257634205726": ["convolution_gpu_bfyx_os_iyx_osv16",19],
+        "1518270620354036926": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "17704040183891532914": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "9407646138658641974": ["convolution_gpu_bfyx_gemm_like",2],
+        "16103943009195163681": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "8990561333549136048": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "17024388383581997032": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "18062849937960759210": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8860443174052454332": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6293403765897901528": ["convolution_gpu_bfyx_gemm_like",2],
+        "11690533591656807605": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "3432296808755992670": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "12523676912856063091": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "1451466106918423837": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "3217246278485567748": ["convolution_gpu_bfyx_gemm_like",2],
+        "17123463568694499533": ["convolution_gpu_bfyx_gemm_like",2],
+        "4304041922043496030": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15739274921308457528": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "7460672405409009037": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "18357544235608006954": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "13328911884191551889": ["convolution_gpu_bfyx_1x1",2],
+        "13237050834496100264": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "11828175723996627443": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "142329025839464842": ["convolution_gpu_bfyx_1x1",2],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "9193880745263317167": ["convolution_gpu_bfyx_gemm_like",2],
+        "12892693137085610062": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15968821946892330559": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16589607587365212240": ["convolution_gpu_bfyx_gemm_like",1],
+        "16293465561256937726": ["convolution_gpu_bfyx_os_iyx_osv16",664],
+        "8651641584737798174": ["convolution_gpu_bfyx_os_iyx_osv16",518],
+        "16347412180100581330": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "9692654253261175490": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "13163146272900339330": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "14068780861332616363": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "8519354640245415816": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13410850301164057911": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "11529876081402974396": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "11626398907755088688": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13434576226708227155": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12864558900883069118": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "2527189070714658176": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "794499287296495726": ["convolution_gpu_bfyx_1x1",2],
+        "15720507574336564201": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "16065744898134487748": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",2],
+        "9955939178447682108": ["convolution_gpu_bfyx_1x1",0],
+        "5865480930796299143": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "10690972785852373520": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "15548847099740441551": ["convolution_gpu_bfyx_1x1",2],
+        "16988275131627316108": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "3780320160034246719": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "3109104171383198425": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12626014184575881530": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5519535335798045279": ["convolution_gpu_bfyx_gemm_like",2],
+        "9028970753877215614": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "4571404165794634411": ["convolution_gpu_bfyx_1x1",2],
+        "7868973874302246233": ["convolution_gpu_bfyx_gemm_like",1],
+        "6048964584602891448": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "12071914115316550349": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "5592556538784745960": ["convolution_gpu_bfyx_gemm_like",1],
+        "10930115765550856328": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "12408889192918919210": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "14841539539334726292": ["convolution_gpu_bfyx_os_iyx_osv16",171],
+        "11060822686394981344": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "13786357802945430475": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "244921290040927639": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "12174571114411168588": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "1604661321386793876": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15757308772667178999": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5840254078917931433": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13852065717057446998": ["convolution_gpu_bfyx_gemm_like",2],
+        "1559798212423183813": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "7757331094141318304": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "8124736388338424498": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "1152691534728260611": ["convolution_gpu_bfyx_1x1",1],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "6343396486660315308": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10912495395422146386": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "11324651029379152442": ["convolution_gpu_bfyx_1x1",2],
+        "16871004845988227014": ["convolution_gpu_bfyx_1x1",2],
+        "11241838709529552265": ["convolution_gpu_bfyx_gemm_like",2],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "3215659303601163167": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "10046663998164493552": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "87031578643428011": ["convolution_gpu_bfyx_1x1",2],
+        "6114241186364821679": ["convolution_gpu_bfyx_gemm_like",2],
+        "8127190765748950828": ["convolution_gpu_bfyx_os_iyx_osv16",1021],
+        "5658664813683907476": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "13327653786981478088": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "5740745357953479527": ["convolution_gpu_bfyx_os_iyx_osv16",750],
+        "11446745541571732900": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7683334381958571864": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "7940369586324090841": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "10432365444137108781": ["convolution_gpu_bfyx_os_iyx_osv16",1070],
+        "3033264172690274208": ["convolution_gpu_bfyx_os_iyx_osv16",102],
+        "3715177305271762194": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "10670103699537731664": ["convolution_gpu_bfyx_os_iyx_osv16",869],
+        "182115051096556835": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "2344498602308448450": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "11706378390483804857": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12068974703657294908": ["convolution_gpu_bfyx_1x1",2],
+        "14811022197918391667": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "14268594692585922659": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "745009493367761775": ["convolution_gpu_bfyx_gemm_like",2],
+        "2458592904274981909": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "530491406341772040": ["convolution_gpu_bfyx_gemm_like",2],
+        "15156525717629023944": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "1398177377739338750": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "18005721959893562716": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "15426960908024585800": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "15824189967727245909": ["convolution_gpu_bfyx_gemm_like",2],
+        "10967218651864700933": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9580986168276580598": ["convolution_gpu_bfyx_gemm_like",2],
+        "9891428775774615719": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "1336940384521633733": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "15490478608105402679": ["convolution_gpu_bfyx_gemm_like",1],
+        "16467987800266816984": ["convolution_gpu_bfyx_os_iyx_osv16",968],
+        "10141927023849730720": ["convolution_gpu_bfyx_1x1",1],
+        "9454512817077883797": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10436819182310112786": ["convolution_gpu_bfyx_os_iyx_osv16",19],
+        "15410074937424854348": ["convolution_gpu_bfyx_os_iyx_osv16",91],
+        "13472577372534605883": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12068797674575015662": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "778476198101178556": ["convolution_gpu_bfyx_gemm_like",1],
+        "2968439898708528834": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12517838703662330663": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "8071957466247137919": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11267742746905371769": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "3463959257726925426": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "153117141968471446": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4862529593282936100": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "11999246609107242706": ["convolution_gpu_bfyx_gemm_like",2],
+        "10650698451740924172": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "9148379585489720669": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "4958835037528182801": ["convolution_gpu_bfyx_1x1",2],
+        "7780140599533242850": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8458082326743351141": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5948701218437980356": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6981537186704688907": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "10808909442136736629": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "7242013296950669829": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "9069334144391048686": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1096671695414716274": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "438528596970898721": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14974730512607138726": ["convolution_gpu_bfyx_os_iyx_osv16",233],
+        "14091610802555875119": ["convolution_gpu_bfyx_gemm_like",2],
+        "6522575549211855712": ["convolution_gpu_bfyx_gemm_like",2],
+        "4141005390823981166": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "6548949901446632697": ["convolution_gpu_bfyx_1x1",2],
+        "11465965972527519631": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3032921857841371728": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3219408878901707426": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5308128387928804050": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "10412588668458621135": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "16915857558806082023": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "11292995457386147494": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "15702382940521972117": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "1089679781525023551": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "3349519148124496343": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "13738442755456366277": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14079654309452583394": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "8501145642605270365": ["convolution_gpu_bfyx_gemm_like",2],
+        "12773693193167844110": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "5039037192630609823": ["convolution_gpu_bfyx_gemm_like",2],
+        "13804221028705631415": ["convolution_gpu_bfyx_gemm_like",0],
+        "2832268621630415376": ["convolution_gpu_bfyx_gemm_like",0],
+        "9525853014023664813": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "7179714714302073459": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "1081962464388501987": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7009459929666511861": ["convolution_gpu_bfyx_1x1",1],
+        "15796677813117622429": ["convolution_gpu_bfyx_gemm_like",2],
+        "7208008921815475393": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13654895364175354091": ["convolution_gpu_bfyx_1x1",2],
+        "6143200133853000387": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "8792010676469476740": ["convolution_gpu_bfyx_gemm_like",1],
+        "5393510569127725391": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "6217542346826403576": ["convolution_gpu_bfyx_1x1",2],
+        "12380856644683171627": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "14571022040013651253": ["convolution_gpu_bfyx_gemm_like",1],
+        "15778834188130183853": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "1458615259705605525": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "7770000755097925765": ["convolution_gpu_bfyx_1x1",2],
+        "11626402549863483301": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "3895088069642140043": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7354234812009979811": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "16865879032845300007": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "9056038338958199256": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16574710115918192418": ["convolution_gpu_bfyx_os_iyx_osv16",643],
+        "4664983769199548480": ["convolution_gpu_bfyx_1x1",2],
+        "13059207969254830451": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "318377908569897093": ["convolution_gpu_bfyx_gemm_like",2],
+        "8619526128410675593": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "11311859068168414878": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8264178890341675354": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "18084635102736402756": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "17209528805596238905": ["convolution_gpu_bfyx_gemm_like",2],
+        "11583017348580874022": ["convolution_gpu_bfyx_os_iyx_osv16",149],
+        "9542325095876448686": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15529757761327002288": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "18232278892738147217": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "14885109535362957947": ["convolution_gpu_bfyx_os_iyx_osv16",592],
+        "2495655464941634884": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "3067806959725855130": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "15997754881872769378": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "2052712465925238009": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "3926585856863002495": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "3106922888635965020": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6290317420155851465": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "10782611933832492335": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "1116274074896622552": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "6634330132674952638": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "142650579335909103": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "13809898858049445969": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13320828013530046693": ["convolution_gpu_bfyx_os_iyx_osv16",420],
+        "15293727142789007900": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "1774158624592967937": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13723543003759101485": ["convolution_gpu_bfyx_gemm_like",2],
+        "4479117540570599742": ["convolution_gpu_bfyx_gemm_like",2],
+        "12604104383683210104": ["convolution_gpu_bfyx_gemm_like",1],
+        "17802514063213000148": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "4800587664660105589": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "7966454753124154534": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3199841714087553410": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "1474271081523145413": ["convolution_gpu_bfyx_gemm_like",2],
+        "16243196137456624852": ["convolution_gpu_bfyx_gemm_like",2],
+        "3746573775462003750": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "12730339458081890990": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "16677044352793659175": ["convolution_gpu_bfyx_gemm_like",2],
+        "14001406016806064079": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "3167336012388169649": ["convolution_gpu_bfyx_os_iyx_osv16",150],
+        "15486917753097743853": ["convolution_gpu_bfyx_1x1",0],
+        "4381329435655511217": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "6996376303337512293": ["convolution_gpu_bfyx_os_iyx_osv16",45],
+        "17564338309805484464": ["convolution_gpu_bfyx_os_iyx_osv16",149],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16",861],
+        "15602218079503030465": ["convolution_gpu_bfyx_gemm_like",2],
+        "16432425079146486467": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "5374664689223295796": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "7977195117668583981": ["convolution_gpu_bfyx_gemm_like",2],
+        "13951781924205611716": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "5955575949957198434": ["convolution_gpu_bfyx_gemm_like",0],
+        "1532263118203058517": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "5459463503840817402": ["convolution_gpu_bfyx_1x1",2],
+        "3568514382399560386": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "6340128090694375876": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "16361932270527364507": ["convolution_gpu_bfyx_os_iyx_osv16",723],
+        "16542318967217020315": ["convolution_gpu_bfyx_gemm_like",2],
+        "5648658688155716974": ["convolution_gpu_bfyx_1x1",2],
+        "11872464450773754851": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "14799579913711096584": ["convolution_gpu_bfyx_gemm_like",2],
+        "14381420852659789698": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13816104794723484993": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "1643122514049603104": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12058759356433220258": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "14910223536998380801": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "8984436655107983227": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "3792945601873900927": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "11872943152839631823": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "13468713306678453952": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10894058425957901202": ["convolution_gpu_bfyx_1x1",1],
+        "3723613341885592267": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "15160738482264643601": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "8803037667261582905": ["convolution_gpu_bfyx_gemm_like",2],
+        "8295126647635181949": ["convolution_gpu_bfyx_gemm_like",2],
+        "16131448347558322280": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "15891662883560480723": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "13308187548669026714": ["convolution_gpu_bfyx_1x1",2],
+        "8127570953237266335": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "4456004887590847716": ["convolution_gpu_bfyx_gemm_like",2],
+        "12397280593466519809": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "16717713360264747483": ["convolution_gpu_bfyx_gemm_like",2],
+        "10766317990628501609": ["convolution_gpu_bfyx_os_iyx_osv16",153],
+        "11841034668170849494": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14274685812676150168": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "789359733867650915": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "7430073011895298582": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "7881187047171099732": ["convolution_gpu_bfyx_gemm_like",2],
+        "2929190644951986399": ["convolution_gpu_bfyx_os_iyx_osv16",664],
+        "386749666417295495": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "4112696777811320312": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "14026570177552137240": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "16236397968499692493": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12177387334053203378": ["convolution_gpu_bfyx_gemm_like",2],
+        "8176012042686275874": ["convolution_gpu_bfyx_os_iyx_osv16",116],
+        "18043340998699622388": ["convolution_gpu_bfyx_gemm_like",2],
+        "10309504812060596568": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "11356842300444410831": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "10730222715353420212": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "16949056117405140365": ["convolution_gpu_bfyx_os_iyx_osv16",695],
+        "1028160614515220430": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7638626850074132214": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5184121466994451498": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15363606233048272809": ["convolution_gpu_bfyx_1x1",2],
+        "7647236080048602591": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "10681768474583067517": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "12055647521556218046": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "17006655627343469372": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "16348402367953880206": ["convolution_gpu_bfyx_os_iyx_osv16",622],
+        "10049571207493913006": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "6114147683777615071": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "1375156980278317418": ["convolution_gpu_bfyx_gemm_like",2],
+        "11670430946096342056": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "15399245700982979379": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9562291747339451180": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "16559140502701231107": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "12198263593657033426": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "4617809377006148936": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "9999425239167488495": ["convolution_gpu_bfyx_gemm_like",2],
+        "13314092088416047551": ["fully_connected_gpu_yxfb_ref",0],
+        "13990028451169604107": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15814015810740458605": ["convolution_gpu_bfyx_1x1",2],
+        "17264010982688979937": ["convolution_gpu_bfyx_1x1",2],
+        "4149728557142033774": ["convolution_gpu_bfyx_os_iyx_osv16",926],
+        "6780215829176686721": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "116291934148608396": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "5538883245745495145": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15114370307779942381": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5055568897499186908": ["convolution_gpu_bfyx_gemm_like",0],
+        "628191607060767879": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "4338023436590582323": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "11716771904412649891": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "3621930417735246405": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "10106454449619141260": ["convolution_gpu_bfyx_1x1",2],
+        "5003718302026277632": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "6769243149577568817": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "5240181393417899912": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4766071144928072260": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "6854611304056079417": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "5749536453225343663": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "1984152634309440563": ["convolution_gpu_bfyx_gemm_like",2],
+        "290134020607738418": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15065019229949449623": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4014667229872705228": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17791024851737594885": ["convolution_gpu_bfyx_1x1",2],
+        "5941298590926032148": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "14151747022287993729": ["convolution_gpu_bfyx_gemm_like",0],
+        "12977678792503377525": ["convolution_gpu_bfyx_gemm_like",0],
+        "14616969385577243225": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "8096131027165540886": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "348058686961206025": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "8203550467004532364": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "1832310305089212990": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "3286629188347536485": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "6614833247756539341": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "428659495445490820": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "1653274345637156919": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14150012830816329527": ["convolution_gpu_bfyx_gemm_like",2],
+        "16995873636564597028": ["convolution_gpu_bfyx_os_iyx_osv16",484],
+        "3511588484597779204": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "1122856374602590533": ["convolution_gpu_bfyx_1x1",1],
+        "16014822406751503249": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "3711525118850629466": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4282661608732125403": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2930898141522848681": ["convolution_gpu_bfyx_1x1",0],
+        "8689206546467098603": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16491532291908469567": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "4474697990228400564": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8483523994859880782": ["convolution_gpu_bfyx_os_iyx_osv16",1000],
+        "787203599734115483": ["convolution_gpu_bfyx_1x1",2],
+        "12992061224471212714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3814584042139408454": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "1202292109713947702": ["convolution_gpu_bfyx_gemm_like",0],
+        "4773123925616969670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15466940145773097237": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "15851356529373376076": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "16461809076899645037": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "17546566148752689536": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3101087806792514129": ["convolution_gpu_bfyx_1x1",2],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "17444003685761357480": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6294240435687565243": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "11988285441493553006": ["convolution_gpu_bfyx_gemm_like",2],
+        "2915165824085219545": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "16409729623371222748": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "15493488989417521388": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "4378422094110940766": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12860222041026638681": ["convolution_gpu_bfyx_os_iyx_osv16",660],
+        "13607830451968188080": ["convolution_gpu_bfyx_os_iyx_osv16",173],
+        "14385185911482960528": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "12932635875905153141": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "1338705434700924127": ["convolution_gpu_bfyx_1x1",1],
+        "14045927407431718832": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "4553409514380460123": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5020788604681810984": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17647962002015093887": ["convolution_gpu_bfyx_gemm_like",2],
+        "16436006771518788093": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "938222258370511187": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "6942049339361951275": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "3797957937905580811": ["convolution_gpu_bfyx_os_iyx_osv16",492],
+        "7799984350284425885": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "1907439276166837309": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "12098146032672599222": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "6458124573210430792": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "12388375914105990324": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11132679855317294753": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4492332228252010118": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "6181308879301978465": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "6297802534570892679": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "4403753181729432604": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "1372939511728986224": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17434429579652310107": ["convolution_gpu_bfyx_gemm_like",2],
+        "16067605128297748820": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4085450203909854919": ["convolution_gpu_bfyx_os_iyx_osv16",985],
+        "12160764253455777655": ["convolution_gpu_bfyx_os_iyx_osv16",360],
+        "2921118493468368908": ["convolution_gpu_bfyx_gemm_like",0],
+        "2920322372993101148": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "9195732599757736182": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "14104238386345631681": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12755692101476964677": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "1410630713443793537": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "1838534101161814609": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17556238490521153146": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "5159470523468873105": ["convolution_gpu_bfyx_os_iyx_osv16",171],
+        "5485971317082563152": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "1563987925712579649": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12461575861709234385": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "7708321360699824256": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12655099960717366198": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "17882819773586674851": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "13993548620104010490": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4927360358387344983": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10971070835319242371": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "8253823502854784432": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "385046297070779752": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "15881381297320383917": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7840966363183459431": ["convolution_gpu_bfyx_os_iyx_osv16",467],
+        "9305861997313663528": ["convolution_gpu_bfyx_gemm_like",1],
+        "6205240287062600210": ["convolution_gpu_bfyx_gemm_like",2],
+        "8500148569566077929": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "5752292348709244393": ["convolution_gpu_bfyx_gemm_like",1],
+        "6902644989079870993": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "18136765667969393174": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7338229552985076723": ["convolution_gpu_bfyx_gemm_like",2],
+        "689445825453914111": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "2066731703492755469": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "9988801796928462423": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "17917978116807564183": ["convolution_gpu_bfyx_gemm_like",1],
+        "3285520504090196295": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "17525531790109748810": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "14691372262153587653": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "16364494883229084045": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "4980217316169616839": ["convolution_gpu_bfyx_1x1",1],
+        "6156831095718536092": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "16744011463988595802": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8431759922045602848": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11421180829679625737": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16",110],
+        "8642107585829380438": ["convolution_gpu_bfyx_gemm_like",0],
+        "8108843303778211282": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "7650375560336513366": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "5419041493176804960": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "12046017161414846599": ["convolution_gpu_bfyx_1x1",0],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16",862],
+        "16917253324065998643": ["convolution_gpu_bfyx_1x1",1],
+        "17001502418583498926": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "13933912937625580405": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "3034482898462686729": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "10892706534058849825": ["convolution_gpu_bfyx_os_iyx_osv16",1033],
+        "5291011077679733990": ["convolution_gpu_bfyx_gemm_like",2],
+        "18235209540858013173": ["convolution_gpu_bfyx_1x1",0],
+        "10887835418423052188": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12213354854947437262": ["convolution_gpu_bfyx_1x1",2],
+        "16881283637687482989": ["convolution_gpu_bfyx_os_iyx_osv16",771],
+        "13191096881934434519": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "10025839973092358719": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "15914512645931208899": ["convolution_gpu_bfyx_gemm_like",2],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "17179609670678746034": ["convolution_gpu_bfyx_gemm_like",2],
+        "4197617702037834389": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "11147816119060617810": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "11682323163346544125": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "11198908896401597838": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "10090036431487700311": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "17651821953342321913": ["convolution_gpu_bfyx_1x1",1],
+        "3179874645565098825": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "16925721317097534009": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "8578747191812631883": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "18400379759523099542": ["convolution_gpu_bfyx_gemm_like",2],
+        "5758133252959371492": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "12309955719964788034": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "12962558681443556219": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11455843788148231615": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "75742659105146536": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "9485825829394109934": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "14885031472057965707": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12874626654611400042": ["convolution_gpu_bfyx_os_iyx_osv16",622],
+        "12061567381160185735": ["convolution_gpu_bfyx_1x1",1],
+        "5093049998173715787": ["convolution_gpu_bfyx_gemm_like",2],
+        "11834361584875491425": ["convolution_gpu_bfyx_1x1",2],
+        "3159147743553063163": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8971115542951085891": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "5782934278345953016": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "15829095120243431195": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "2581414750854621875": ["convolution_gpu_bfyx_gemm_like",2],
+        "5141753233513623264": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "2370837049876630969": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "9439431829175743345": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6578908625437515675": ["convolution_gpu_bfyx_os_iyx_osv16",23],
+        "8505040075968411726": ["convolution_gpu_bfyx_gemm_like",1],
+        "17585206779958265260": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "2866656294663853474": ["convolution_gpu_bfyx_1x1",2],
+        "11862259122805366807": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "10178951466584845110": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12246408434917478929": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "13478984039708550410": ["convolution_gpu_bfyx_os_iyx_osv16",300],
+        "17640725195881101275": ["convolution_gpu_bfyx_gemm_like",2],
+        "8906588133431586825": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "8195881973746570408": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "786401653335542559": ["convolution_gpu_bfyx_os_iyx_osv16",968],
+        "1082574490068006980": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "12493863403516600413": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "10480527638577674825": ["convolution_gpu_bfyx_1x1",2],
+        "2710485608298356329": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8788703258318141635": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11192356850081328892": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "14115742296883450319": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5796500397424307442": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "3281207855459771997": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "14353390922580547467": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "13833960927635646899": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1390379098099686972": ["convolution_gpu_bfyx_1x1",2],
+        "18251360413872841969": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "6650607472019166205": ["convolution_gpu_bfyx_1x1",2],
+        "4217179485243909459": ["convolution_gpu_bfyx_gemm_like",1],
+        "6509758095668864050": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "7777333052643961206": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "13454265023861566476": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "1411786954276574458": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15201438563802430490": ["fully_connected_gpu_fb_oi_ref",1],
+        "15579919505002150556": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "11666226259183201584": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "4278280309700908015": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "3277243911383750280": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "8390889357546397717": ["convolution_gpu_bfyx_1x1",0],
+        "4795705973706796563": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "7130694811424715594": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "13483088320871913126": ["convolution_gpu_bfyx_gemm_like",2],
+        "3774285301357006334": ["convolution_gpu_bfyx_gemm_like",2],
+        "10377729875228238588": ["convolution_gpu_bfyx_os_iyx_osv16",196],
+        "14352796912241296357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "13512863534076172940": ["convolution_gpu_bfyx_gemm_like",2],
+        "3782315919331102574": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8159303545761286685": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13105192484434299621": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "11359409533744011242": ["convolution_gpu_bfyx_gemm_like",2],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "15065925414996398951": ["convolution_gpu_bfyx_1x1",2],
+        "40704767167309552": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "5601435819039968726": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12134712464763856064": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "11007944497812650617": ["convolution_gpu_bfyx_gemm_like",2],
+        "2530317332900569142": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "11164519756679631743": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1972879521448306536": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "2770397466252831892": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "16683485007140805060": ["fully_connected_gpu_yxfb_ref",0],
+        "4848143712599565301": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "5091558853871982858": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "1545105800386716684": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "14258499419905714808": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5643908654122573882": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "13183380647506951324": ["convolution_gpu_bfyx_gemm_like",0],
+        "16044646335477470657": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12806934028210472719": ["convolution_gpu_bfyx_gemm_like",2],
+        "14930789530046665855": ["convolution_gpu_bfyx_gemm_like",2],
+        "5339985303398206057": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "17825280904760131680": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "6142707387281700290": ["convolution_gpu_bfyx_gemm_like",2],
+        "12707946849050970702": ["convolution_gpu_bfyx_gemm_like",2],
+        "15197248015210313435": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16036386660666696362": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "16772854836230971016": ["convolution_gpu_bfyx_os_iyx_osv16",300],
+        "969746749329671447": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "8058419689646625853": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "10702234389482091891": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "4849343880559509889": ["convolution_gpu_bfyx_1x1",2],
+        "10320711719466983961": ["convolution_gpu_bfyx_os_iyx_osv16",610],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "3336076058264596420": ["convolution_gpu_bfyx_os_iyx_osv16",644],
+        "12894240573737168362": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "4717620775314557374": ["convolution_gpu_bfyx_gemm_like",2],
+        "16386955278777720573": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "12278786796362166070": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "2814805887448339818": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "13119479079474639169": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "8739347545059610410": ["convolution_gpu_bfyx_gemm_like",0],
+        "4237276338897143680": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "12193395770362986433": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "8101977280003030465": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2111669705686676421": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "3037042229494600258": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "15295951849706930711": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "7869779894480025247": ["convolution_gpu_bfyx_gemm_like",2],
+        "17829148383265978140": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "13883044928774243663": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9366201112659847392": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6008613375871089139": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "13312514874803986753": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "792684262493086891": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "2161052921317193579": ["convolution_gpu_bfyx_gemm_like",2],
+        "16567486018945740036": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "9671459469252116568": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "12501619443242354860": ["convolution_gpu_bfyx_gemm_like",2],
+        "3114869763557037270": ["fully_connected_gpu_fb_oi_ref",2],
+        "4867937397499803072": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "10784073615329190425": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9795194069954915563": ["convolution_gpu_bfyx_gemm_like",0],
+        "16683169947375504066": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "10424278617647597641": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "11939914680143672459": ["fully_connected_gpu_fb_oi_ref",2],
+        "11705756153433897198": ["convolution_gpu_bfyx_1x1",2],
+        "9101334153142718004": ["convolution_gpu_bfyx_gemm_like",1],
+        "5211191663202250117": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10554266898346470422": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "2715447739580688669": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "4678607855896512523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9810904714798127155": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "17353894529222574441": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "2321148334382088982": ["convolution_gpu_bfyx_gemm_like",2],
+        "9529614587861271730": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "9119618606914671839": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "89439319782574517": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "15271783562528081169": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "9285566577169147378": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "7532088618116521936": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "3212789693085089063": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "16158139166784964096": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12987636957813312667": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18068050257421269408": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3833510944499257797": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "12024817951074673335": ["convolution_gpu_bfyx_1x1",2],
+        "5762878778443755104": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "6263019986730305851": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12871555773123368130": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "2721793280965260548": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "10751536136794650334": ["convolution_gpu_bfyx_gemm_like",2],
+        "14559308665571750465": ["convolution_gpu_bfyx_gemm_like",2],
+        "16511393582666965704": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "15669490019428002270": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "5848293219267886434": ["convolution_gpu_bfyx_os_iyx_osv16",121],
+        "2878824076934639346": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "570683988452622223": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "2783577080556699089": ["convolution_gpu_bfyx_gemm_like",1],
+        "1500571771538985941": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6149673627320838019": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "12218337369633748663": ["convolution_gpu_bfyx_os_iyx_osv16",150],
+        "13842309033760176194": ["convolution_gpu_bfyx_gemm_like",0],
+        "1902656726461670148": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "5047419871737940985": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "3391032227732782982": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "16749148369456398030": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "18313088176414428990": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8837721075413149240": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15082818876354718849": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "11490143853656040028": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "597650904461183283": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10900880512948479338": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5558136691773431495": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "17977676737774695825": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "17446505012657609153": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "14711697456265712456": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "3653156933813711765": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "8526484907799590618": ["convolution_gpu_bfyx_os_iyx_osv16",871],
+        "17739868787095417856": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "381149736509958403": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "11318913630213187720": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "8268533335852735248": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "5504757952698692953": ["convolution_gpu_bfyx_os_iyx_osv16",288],
+        "1367483816197881270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14544219140091420262": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3141773224039276177": ["convolution_gpu_bfyx_1x1",0],
+        "17921973525603585874": ["convolution_gpu_bfyx_gemm_like",0],
+        "2324120381399737261": ["convolution_gpu_bfyx_os_iyx_osv16",150],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "2245166025103475783": ["convolution_gpu_bfyx_os_iyx_osv16",250],
+        "797387385159110695": ["convolution_gpu_bfyx_gemm_like",1],
+        "15962533525948221648": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "17035903590837750750": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "13488495920546871271": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "12179581684777023804": ["convolution_gpu_bfyx_gemm_like",1],
+        "16611452077660879545": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "15471470494305051299": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "4264284648458489052": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "13550435052563656432": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "7877332346656934022": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "15897477855246170861": ["convolution_gpu_bfyx_gemm_like",2],
+        "3217574161785059951": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17094948685292534952": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "2149582237161177965": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "13071545223094862275": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "14487842225000203929": ["convolution_gpu_bfyx_gemm_like",2],
+        "490233152678323691": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "631489011812924153": ["convolution_gpu_bfyx_1x1",2],
+        "12052207771201936228": ["convolution_gpu_bfyx_gemm_like",2],
+        "4366168099274266975": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "6213386558868267629": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "7272538316511343863": ["convolution_gpu_bfyx_gemm_like",2],
+        "15859493313686060349": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "18337160891834020517": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "6133592828563353516": ["convolution_gpu_bfyx_os_iyx_osv16",306],
+        "7465681710653503161": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "17108987360340581555": ["fully_connected_gpu_bf_io_input_spatial",5],
+        "9120377367517042357": ["convolution_gpu_bfyx_1x1",2],
+        "13734043898517059207": ["convolution_gpu_bfyx_gemm_like",2],
+        "11185156002426041243": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9152451371616153112": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17490188677223978661": ["convolution_gpu_bfyx_gemm_like",1],
+        "4492673409319122180": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "12693511427898130707": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "9524303276541517389": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "10492056481694320580": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "3515437649977762166": ["convolution_gpu_bfyx_gemm_like",1],
+        "14114380593731243715": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "3830703844770425343": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "11163107409437069532": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "677249604491773387": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16336482874764861478": ["convolution_gpu_bfyx_gemm_like",2],
+        "2283157145557154450": ["convolution_gpu_bfyx_1x1",2],
+        "12370729327673204804": ["convolution_gpu_bfyx_gemm_like",2],
+        "14206125678667603810": ["convolution_gpu_bfyx_1x1",2],
+        "6428098122005804378": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "3892679716763161057": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4916769804113823482": ["convolution_gpu_bfyx_1x1",1],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6726099352298108756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10292349730148518173": ["convolution_gpu_bfyx_os_iyx_osv16",695],
+        "1351633819648952297": ["convolution_gpu_bfyx_1x1",2],
+        "9939234037869927090": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "12990341489637414845": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "3563614453014995411": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "13585163747565192884": ["convolution_gpu_bfyx_gemm_like",0],
+        "14015062122217462983": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "17951403431757222177": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "11706446082856895571": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "8170998059688907013": ["convolution_gpu_bfyx_1x1",0],
+        "345043289576587800": ["convolution_gpu_bfyx_1x1",2],
+        "9378269524012289175": ["convolution_gpu_bfyx_gemm_like",2],
+        "9101903304994333336": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "15430549683839591544": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "4437258459981739942": ["convolution_gpu_bfyx_os_iyx_osv16",666],
+        "467975197394411990": ["convolution_gpu_bfyx_gemm_like",1],
+        "4673127824919879657": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4085907608404305515": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13178480813522103091": ["fully_connected_gpu_bf_io_gemm",2],
+        "9367157746678824712": ["convolution_gpu_bfyx_os_iyx_osv16",121],
+        "15689502054035168040": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "9865252947376418804": ["convolution_gpu_bfyx_1x1",2],
+        "16094174852600023296": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "17829047941256922307": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "17705807503894740726": ["convolution_gpu_bfyx_gemm_like",2],
+        "12990527753120735255": ["convolution_gpu_bfyx_gemm_like",2],
+        "15451919862187018297": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2440366541074371090": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "9133263538092913983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2307310127637739872": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "6988492019664525206": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "5334566325056222430": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "1354647381212852890": ["convolution_gpu_bfyx_1x1",2],
+        "17711453305763476458": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "2702144517025248597": ["convolution_gpu_bfyx_gemm_like",2],
+        "5912303851874077576": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3160543867929843861": ["convolution_gpu_bfyx_1x1",2],
+        "14406070210216948643": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11756881293845417212": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4815047491742617397": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "7447163906170805189": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "12229574562535756991": ["convolution_gpu_bfyx_gemm_like",2],
+        "12675313398314286884": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "16588325081458426169": ["convolution_gpu_bfyx_gemm_like",2],
+        "8155268141318893606": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7431849514656037251": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "18082422341304348326": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "12253049204822930675": ["convolution_gpu_bfyx_gemm_like",2],
+        "1617135706549276688": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "8640150341228170279": ["convolution_gpu_bfyx_os_iyx_osv16",1067],
+        "2800949804770763798": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14126906427006602775": ["convolution_gpu_bfyx_1x1",2],
+        "14670068483447729857": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12190841837604350271": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3286330985102373533": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "5275016494706355806": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "14759179293743468995": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "15489746763312425915": ["convolution_gpu_bfyx_gemm_like",2],
+        "142486914279119363": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "15823825508128158158": ["convolution_gpu_bfyx_gemm_like",2],
+        "12365282242489300092": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "15947699374684516369": ["convolution_gpu_bfyx_gemm_like",2],
+        "7744787957569714828": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9979259596137305973": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "5751283221740229986": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "15078590909693331731": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16783619135298589974": ["convolution_gpu_bfyx_os_iyx_osv16",395],
+        "7969441643457570812": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "12985942652866621579": ["fully_connected_gpu_fb_io_ref",1],
+        "273242667845386507": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9340159617983543624": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10861525139715322534": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "10899110544832584656": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "17382660912493284320": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1760391741350091665": ["convolution_gpu_bfyx_os_iyx_osv16",988],
+        "16402312692470500253": ["convolution_gpu_bfyx_gemm_like",1],
+        "5364060938737428149": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "2089730611490367290": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "534032316469702287": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6713985030102340818": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12028665820838352309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1616603916015535857": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "15052577143485630617": ["convolution_gpu_bfyx_1x1",1],
+        "8104309105061227444": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "14397348576352573007": ["convolution_gpu_bfyx_gemm_like",1],
+        "3106591708459602370": ["convolution_gpu_bfyx_os_iyx_osv16",923],
+        "913496537924971856": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "9631481972809246378": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "16882092367103683293": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "14719421757340260468": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "10001963042016663554": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15746620724134970969": ["convolution_gpu_bfyx_1x1",2],
+        "7881579844586294503": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "12843671306854567956": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3930314908786112883": ["convolution_gpu_bfyx_gemm_like",2],
+        "12812685418923919055": ["convolution_gpu_bfyx_os_iyx_osv16",387],
+        "5170245731599664670": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16",108],
+        "7916244303189113815": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "18424400171776141118": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9100044555742394133": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "12473600360154597915": ["convolution_gpu_bfyx_os_iyx_osv16",1030],
+        "18273537339378756543": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "1230262279011217327": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14082448162400225052": ["convolution_gpu_bfyx_1x1",1],
+        "15602863681196390535": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "18275601715050791851": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10237524128771958432": ["convolution_gpu_bfyx_gemm_like",2],
+        "17147293671640396193": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "15110359240685619357": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "12696412964119109465": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "9213886570531053949": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "7913076120244203725": ["convolution_gpu_bfyx_gemm_like",2],
+        "5245526691775741296": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9761573038170759563": ["convolution_gpu_bfyx_os_iyx_osv16",301],
+        "13613399861925108148": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "18375125668176498051": ["convolution_gpu_bfyx_gemm_like",2],
+        "13932662890258900896": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17877776363798202236": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "16190949264253468961": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "16393176054374397767": ["convolution_gpu_bfyx_gemm_like",1],
+        "8434794604559592624": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "13439896617880328331": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "916389941321470163": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "13775529405693629438": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6219075471508685758": ["convolution_gpu_bfyx_gemm_like",0],
+        "3118602494449249177": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "742689192890486807": ["convolution_gpu_bfyx_gemm_like",2],
+        "5448537627319798272": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "9803492989444302959": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "4479979951990338510": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "7662200927459001757": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16033512206711124104": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7692849839965441330": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "2294026590516781945": ["convolution_gpu_bfyx_gemm_like",1],
+        "17635171685500922207": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "10978173291465325823": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "5600128039063009632": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "158222105675022402": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "13026555349791486777": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "13092232276822302626": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "12553441041059632729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11728824117049687850": ["convolution_gpu_bfyx_gemm_like",1],
+        "2173720698351153121": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "8818070832398055086": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "9099720270958987421": ["convolution_gpu_bfyx_1x1",2],
+        "11910735867274493498": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",2],
+        "9616636708366808604": ["convolution_gpu_bfyx_gemm_like",2],
+        "7474639594232203854": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "13602140021189675477": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "123251351612308092": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "2908249767551054613": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "14349625788399542568": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12309132521191764927": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15192230303376521834": ["convolution_gpu_bfyx_os_iyx_osv16",863],
+        "649203303142950236": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "4652136280940317116": ["convolution_gpu_bfyx_os_iyx_osv16",216],
+        "946479876892100082": ["convolution_gpu_bfyx_gemm_like",1],
+        "9954050478761346921": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "4542143431130171516": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "9299299311101549958": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12727541507197887360": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "9839670675413379092": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "17489680436564779197": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5103094815475470596": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "7604075520418038662": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16853250891250756537": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",1032],
+        "4803370483104261655": ["convolution_gpu_bfyx_gemm_like",0],
+        "3934290309368153435": ["fully_connected_gpu_bf_io_gemm",1],
+        "13485300684443803732": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "6203765709597125063": ["convolution_gpu_bfyx_gemm_like",2],
+        "6075691042233712335": ["convolution_gpu_bfyx_gemm_like",2],
+        "3266557807508325807": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1941341635794709702": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "3526580286148537369": ["convolution_gpu_bfyx_gemm_like",2],
+        "5550969016335082071": ["convolution_gpu_bfyx_gemm_like",2],
+        "1008476023750261156": ["convolution_gpu_bfyx_1x1",1],
+        "1056009037551688122": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14034525799882831106": ["convolution_gpu_bfyx_gemm_like",2],
+        "8707189142909022305": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "11883485911218628865": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "13247725847475539658": ["convolution_gpu_bfyx_1x1",1],
+        "12238674883388043717": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "6129602738379919488": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "738850098651678143": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "5853697372844744672": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9410978119783758141": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "17515573322312447679": ["convolution_gpu_bfyx_gemm_like",2],
+        "16370218798911151331": ["convolution_gpu_bfyx_os_iyx_osv16",662],
+        "11055049031355432623": ["convolution_gpu_bfyx_gemm_like",2],
+        "11107930597263802755": ["convolution_gpu_bfyx_gemm_like",2],
+        "15354185859262170540": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "7915318733663535312": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "12680339228267704518": ["convolution_gpu_bfyx_os_iyx_osv16",124],
+        "6419580456182610836": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "15129834325410878425": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15640202505592598653": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "15650839696475698676": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "6863331059471727622": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "9967101735808367971": ["convolution_gpu_bfyx_1x1",0],
+        "5057534502588100071": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "9695024256541464964": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "16247399911710810038": ["convolution_gpu_bfyx_gemm_like",1],
+        "15677717057398875599": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "18092842590142527927": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8655883535274781128": ["convolution_gpu_bfyx_gemm_like",2],
+        "4424217045094988504": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "16120988958246503683": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "4640028527711211109": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "5509395737020858006": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "2231648183489019418": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "9263063714383940562": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "9920155432685318259": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "15411474884532403722": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",2],
+        "8609939102588915855": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "16267682394077585279": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "16694984452720336415": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "6772239376357727149": ["convolution_gpu_bfyx_os_iyx_osv16",809],
+        "8106738346643994005": ["convolution_gpu_bfyx_gemm_like",0],
+        "14230385851791760020": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "7720939595094113814": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "1434535531617424039": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "12971822824884826169": ["convolution_gpu_bfyx_gemm_like",2],
+        "4046830923427667342": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "12026482841341343242": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17015328096102652908": ["convolution_gpu_bfyx_gemm_like",1],
+        "15225354446874994535": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "1587501521145162454": ["convolution_gpu_bfyx_gemm_like",2],
+        "14289048840489035546": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15879172437519876393": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4561874206785244358": ["convolution_gpu_bfyx_os_iyx_osv16",771],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "7995820969034996638": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "14043770215999952932": ["convolution_gpu_bfyx_gemm_like",2],
+        "586947787345351152": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12589440296742583335": ["convolution_gpu_bfyx_1x1",2],
+        "11469881811044037340": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "9416186718345824095": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "16286085532892593349": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18059267466971880386": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "9058996149754556268": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "16509472637458153234": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "8757900457181374694": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11637325834858582585": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1701412735970485849": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "16489624657475712467": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "4674416595144505741": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "929378940515745198": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "4703107905652287491": ["convolution_gpu_bfyx_gemm_like",2],
+        "16437124655147660375": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "994489782629179836": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "7472330881076141262": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "7015738038963065110": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "7314288062932060863": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "7878605163588288309": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "14133958262039763609": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "1126499865206906037": ["convolution_gpu_bfyx_os_iyx_osv16",900],
+        "2321767794934000238": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "9428176632140441528": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "17808913959977434594": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "18109284647478027063": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3571959174116404960": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "10679760989906275129": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "16442107352245114876": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "13569941893504840630": ["convolution_gpu_bfyx_os_iyx_osv16",360],
+        "15805087418686802636": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "4903592553439092472": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "15450609897480659306": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "11115684531624462986": ["convolution_gpu_bfyx_os_iyx_osv16",771],
+        "15320845027635796583": ["convolution_gpu_bfyx_gemm_like",2],
+        "7148542290597073512": ["convolution_gpu_bfyx_gemm_like",0],
+        "17854578307286932628": ["convolution_gpu_bfyx_gemm_like",2],
+        "6648876837655776653": ["convolution_gpu_bfyx_1x1",0],
+        "4482135524904874942": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",2],
+        "5609922876429907954": ["convolution_gpu_bfyx_gemm_like",2],
+        "2777318471329665162": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "2261453441277654139": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "12232696287029987946": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "8421388456873652700": ["convolution_gpu_bfyx_gemm_like",2],
+        "403634422724914329": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "7432142107544210174": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "12522495848240087966": ["convolution_gpu_bfyx_gemm_like",2],
+        "16748662918272106932": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17316626950179740845": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "15192024816519005250": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "5078905972285278557": ["convolution_gpu_bfyx_gemm_like",1],
+        "2995134938466176198": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "9926384320714453815": ["convolution_gpu_bfyx_1x1",1],
+        "12790788016297794214": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "8527193566719173253": ["convolution_gpu_bfyx_gemm_like",2],
+        "1760690277175249985": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12352923639732112511": ["convolution_gpu_bfyx_os_iyx_osv16",100],
+        "15678385128478075284": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5235375820995365354": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "17050143605017295447": ["convolution_gpu_bfyx_os_iyx_osv16",150],
+        "9287404618748313247": ["convolution_gpu_bfyx_gemm_like",2],
+        "16921939234324970069": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "16441830491664937048": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "4871907623235871050": ["convolution_gpu_bfyx_os_iyx_osv16",19],
+        "11800783548769329949": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "2567046336192437734": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "8995598177504756805": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "2100891581797371600": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "846088275031979661": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9657324846330221372": ["convolution_gpu_bfyx_1x1",2],
+        "12757611260347801001": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12277470820821378855": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4342360467977736802": ["convolution_gpu_bfyx_gemm_like",2],
+        "1418595171949196661": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "17163158934005653629": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "14159596290442764023": ["convolution_gpu_bfyx_gemm_like",1],
+        "13253775441326432265": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "9533360488591027707": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "380316849107383484": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "70580716590540876": ["convolution_gpu_bfyx_gemm_like",1],
+        "17907223570737272640": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "6911215749850066204": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "3870539490799697188": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "1597770067928214597": ["convolution_gpu_bfyx_1x1",1],
+        "14973431782875808802": ["convolution_gpu_bfyx_gemm_like",2],
+        "3134489458855347772": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5687802882700097624": ["convolution_gpu_bfyx_gemm_like",2],
+        "12676167240795292217": ["convolution_gpu_bfyx_gemm_like",2],
+        "10848277915422577656": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "580936360000782237": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "4062706195708729345": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "4734389463002799056": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "2128612971571865547": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "3220280315905987373": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "1173986078589662704": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "5582896843095691256": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "13474805373264874144": ["convolution_gpu_bfyx_1x1",2],
+        "12489973984967168447": ["convolution_gpu_bfyx_1x1",2],
+        "18180820925685532104": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "877436308867220589": ["convolution_gpu_bfyx_gemm_like",2],
+        "2816353973187452604": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "3221469860582147955": ["convolution_gpu_bfyx_gemm_like",2],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "4465701487417893814": ["convolution_gpu_bfyx_gemm_like",2],
+        "17961702508543961900": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "10179916356323479080": ["convolution_gpu_bfyx_gemm_like",2],
+        "59356084516953804": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "13472532612464340803": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10425889533411573166": ["convolution_gpu_bfyx_gemm_like",2],
+        "16888412539296862194": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "16779678846332091086": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "2912098199463107173": ["convolution_gpu_bfyx_1x1",2],
+        "13865227850818392065": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "12478309735214802531": ["convolution_gpu_bfyx_os_iyx_osv16",923],
+        "3430266954211750407": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11031358859656806724": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "14483314305369207554": ["convolution_gpu_bfyx_1x1",0],
+        "18135307303959376082": ["convolution_gpu_bfyx_gemm_like",2],
+        "15522785615618973614": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9835739612255048978": ["convolution_gpu_bfyx_os_iyx_osv16",501],
+        "1982176363226079588": ["convolution_gpu_bfyx_os_iyx_osv16",644],
+        "14316077757957132678": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "157805434489791310": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "10100237101982273901": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "7786866732196451977": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "7183578232279711009": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4133424990380177132": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "4084106758501882407": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "7581174843529024536": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "3524531620118359828": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "775538461106687677": ["fully_connected_gpu_fb_oi_ref",1],
+        "15980348884716629349": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11462462742322068863": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "2856601829807186494": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13710319251108632115": ["convolution_gpu_bfyx_1x1",0],
+        "1051506168926530904": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "17243576882981097341": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "4168273493370024327": ["convolution_gpu_bfyx_1x1",2],
+        "13609660900720370993": ["convolution_gpu_bfyx_1x1",0],
+        "16985912104363932350": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "11830297960718214360": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "11829442945690098558": ["convolution_gpu_bfyx_gemm_like",1],
+        "1403617451623027879": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "16986358655784856534": ["convolution_gpu_bfyx_gemm_like",0],
+        "2806529556090896246": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "9269175963143039426": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "10471519687597963116": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11919846322488132883": ["convolution_gpu_bfyx_1x1",2],
+        "12297371032753209816": ["convolution_gpu_bfyx_os_iyx_osv16",898],
+        "4915831715914920982": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "13204120207726209723": ["fully_connected_gpu_bf_io_gemm",1],
+        "11810221946429451169": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "4914435717288687793": ["convolution_gpu_bfyx_1x1",2],
+        "6062246008880097669": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "6714886136800883594": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "5608133987357542077": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8323445733669842657": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4398371999113956082": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "13575423234109624706": ["fully_connected_gpu_yxfb_ref",0],
+        "659150305191479097": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "2598267743388306204": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",146],
+        "7181154048972884375": ["convolution_gpu_bfyx_gemm_like",2],
+        "9153779186876518773": ["convolution_gpu_bfyx_gemm_like",2],
+        "11528417522960871233": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2968031010495399536": ["convolution_gpu_bfyx_gemm_like",2],
+        "12421707187947291166": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "7060804814325505165": ["convolution_gpu_bfyx_gemm_like",2],
+        "1040030752340209480": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "804195263636995800": ["convolution_gpu_bfyx_gemm_like",2],
+        "5312140481706133684": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "15392077168521832549": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "13272818502368975319": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5429130923188159806": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8337820318779061494": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "6040286126398028933": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3873183249402084406": ["convolution_gpu_bfyx_gemm_like",1],
+        "472454322186482185": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "5172712078329324967": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2738256633362038820": ["convolution_gpu_bfyx_gemm_like",2],
+        "5088898934670078153": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14906458674793172507": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4290840152278060614": ["convolution_gpu_bfyx_gemm_like",2],
+        "11198301748997371475": ["convolution_gpu_bfyx_gemm_like",1],
+        "13739257060165119132": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "8975333906619899020": ["convolution_gpu_bfyx_gemm_like",2],
+        "8258382025812748961": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15188570678726970998": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "7119182041840303390": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "18277685132620834972": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14680730265621679042": ["convolution_gpu_bfyx_os_iyx_osv16",754],
+        "14667209474639064623": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15695415285791951018": ["convolution_gpu_bfyx_gemm_like",2],
+        "10294185397756053636": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "3332334993503432420": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "17025182465337728023": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "10645625090439446714": ["convolution_gpu_bfyx_gemm_like",2],
+        "7650862961269327235": ["convolution_gpu_bfyx_1x1",2],
+        "9232653317479846765": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "4423866541063606768": ["convolution_gpu_bfyx_os_iyx_osv16",951],
+        "10816637153861630723": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "8107447526839063293": ["convolution_gpu_bfyx_os_iyx_osv16",19],
+        "2651385050387738902": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "1596353239542510685": ["convolution_gpu_bfyx_gemm_like",2],
+        "1905758333157310570": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10256831975351722184": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "17900257435531434807": ["convolution_gpu_bfyx_gemm_like",1],
+        "6181272224000872375": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "3140230065585683313": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "11582534256623549131": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16",642],
+        "16955653765071712611": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16982829522704429982": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "7866128397931438774": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "17490471699618303993": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "11988546375476924356": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "592245952014430043": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "9882204352209412039": ["convolution_gpu_bfyx_gemm_like",1],
+        "10838138488789241338": ["convolution_gpu_bfyx_gemm_like",1],
+        "7235358742317442134": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7606728651572102823": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "1890739204389692970": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "1245259979364728404": ["convolution_gpu_bfyx_1x1",2],
+        "10632020369698615114": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "1309867416606346543": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "8497468192424557348": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6817494598328071314": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "5873257164958285393": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "8490260671996115530": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "10995907213890714701": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "5644068493155655611": ["convolution_gpu_bfyx_gemm_like",2],
+        "11625231046723308981": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "13106818352216009354": ["convolution_gpu_bfyx_gemm_like",2],
+        "7330202944390548890": ["convolution_gpu_bfyx_gemm_like",2],
+        "8061914949376516780": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "1938086876393565238": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "2863465257341735941": ["convolution_gpu_bfyx_1x1",2],
+        "10133054058562198093": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10570285542015420072": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "10607904718265020949": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "4356817283284529593": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3759515057574218101": ["convolution_gpu_bfyx_gemm_like",0],
+        "17738299860390552088": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15993427814066246646": ["convolution_gpu_bfyx_gemm_like",2],
+        "3336303478756453360": ["convolution_gpu_bfyx_gemm_like",2],
+        "12988961529988078346": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "17915846724151945664": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "7603872175048237237": ["convolution_gpu_bfyx_1x1",1],
+        "2936333406928424760": ["convolution_gpu_bfyx_1x1",2],
+        "7056293586529818253": ["convolution_gpu_bfyx_gemm_like",0],
+        "14716719350966652036": ["convolution_gpu_bfyx_gemm_like",1],
+        "17347670200862870457": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16934879647229234163": ["convolution_gpu_bfyx_gemm_like",2],
+        "9277610800970567810": ["convolution_gpu_bfyx_gemm_like",1],
+        "8451212914744825089": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16206791915939407806": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "12085348936192462321": ["convolution_gpu_bfyx_gemm_like",1],
+        "4362304842016958728": ["convolution_gpu_bfyx_gemm_like",2],
+        "3399406641489305996": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "18041177945345031826": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "6355395905401306995": ["convolution_gpu_bfyx_gemm_like",2],
+        "5095827462645341808": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16579057939215877904": ["convolution_gpu_bfyx_os_iyx_osv16",317],
+        "597073780328219388": ["convolution_gpu_bfyx_gemm_like",2],
+        "16235115911229280717": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",2],
+        "4450409744922989123": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "12643423612381102003": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "15126660425728872065": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10486000767830001094": ["convolution_gpu_bfyx_1x1",2],
+        "2044363708106765326": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "543472136359161929": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "3448477246688526708": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "5600807544955072308": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "954796765467489259": ["convolution_gpu_bfyx_os_iyx_osv16",285],
+        "6904130543085920483": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "2116913943188857359": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "16601702334097258697": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "10128390168715530898": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4764776977138392550": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "9354818521586974021": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17408275657360833363": ["convolution_gpu_bfyx_1x1",2],
+        "2421404763191415191": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5195511638783481084": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "3868149953087814447": ["convolution_gpu_bfyx_gemm_like",2],
+        "4889188980319017094": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "15728009639807698634": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "16729849855476690294": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "6171845068913882721": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4274801141127703532": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "3056212889689424946": ["convolution_gpu_bfyx_1x1",2],
+        "5219048275475447369": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "13450061819089402572": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4481903208484313806": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "3687215302429221155": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "9314293064351558241": ["convolution_gpu_bfyx_gemm_like",2],
+        "15649927926091502215": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "10293186062391000719": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "6982733543386888622": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "15809639778580769565": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "5941092474669713339": ["convolution_gpu_bfyx_os_iyx_osv16",110],
+        "17806712457019493207": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "1478419046264331178": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "15984885011101717258": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5469227748156438008": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "16208488491972128275": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "287386909600391846": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "4161612746310931789": ["convolution_gpu_bfyx_gemm_like",2],
+        "18215430801133520364": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15322609677356616580": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "8791285622784082122": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "14089893422771228191": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "16862145184923128012": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "13733327241591630239": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "13145474177271090694": ["convolution_gpu_bfyx_gemm_like",2],
+        "9642229389394495047": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "2891736961665476908": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "4086556132337751931": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "9553032671453999824": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "9019388470685749691": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "13853056718266488510": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "7474592508575297101": ["convolution_gpu_bfyx_1x1",2],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16",485],
+        "14038261392627717712": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "13358283026528078900": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "4974320417566990034": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13603318842632052764": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "11848462434662954749": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "2030309697153345387": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "15386715291503303766": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "9257078583742821465": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "7846384623429362522": ["convolution_gpu_bfyx_1x1",2],
+        "998876398773540321": ["convolution_gpu_bfyx_1x1",2],
+        "8069537351442302814": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "4622514167765722873": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "4513063773753763458": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "1939140810847988694": ["convolution_gpu_bfyx_gemm_like",1],
+        "5824801192141531089": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "12644942072153919043": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1237920404306733800": ["convolution_gpu_bfyx_gemm_like",2],
+        "1074748462756364699": ["fully_connected_gpu_fb_oi_ref",2],
+        "17525564757769958678": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "11559360678008060513": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "7106362077449435105": ["convolution_gpu_bfyx_gemm_like",0],
+        "15641537661939240413": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "7947870656736319919": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "18180655791734632264": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "9999425239167488495": ["convolution_gpu_bfyx_gemm_like",1],
+        "16511393582666965704": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "11856815095538913065": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18209930746627816139": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7780336054545552428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7201521533301617290": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12417253210787537988": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9947449295659685973": ["convolution_gpu_bfyx_gemm_like",1],
+        "3211956138512889433": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6944031900067948180": ["convolution_gpu_yxfb_yxio_b16",0],
+        "5449117614287394433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "87031578643428011": ["convolution_gpu_bfyx_1x1",2],
+        "4833749391314748606": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8450272092307894299": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15209909241815414156": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "13325762052023866627": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3409043224171087168": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "15773157615731010456": ["convolution_gpu_bfyx_gemm_like",2],
+        "6666210546769702280": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "11254744277059719812": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10309586646776223605": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9530116228032101908": ["convolution_gpu_bfyx_1x1",1],
+        "12151068022697708126": ["convolution_gpu_bfyx_gemm_like",2],
+        "6464050901421037006": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12550985938092975889": ["convolution_gpu_bfyx_1x1",2],
+        "3680396164645753224": ["convolution_gpu_yxfb_yxio_b16",0],
+        "6403698142681887543": ["convolution_gpu_bfyx_gemm_like",2],
+        "12351866693978844266": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "15705908639736679687": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15281554100135159550": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "7226002258982605405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12397280593466519809": ["convolution_gpu_bfyx_gemm_like",2],
+        "2527018855890902975": ["convolution_gpu_bfyx_gemm_like",2],
+        "8555049634736330391": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3107655421406621915": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14754849694687093032": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15065019229949449623": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17036482252028102703": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "14304497513584420080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11451740938287179908": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "7498614018449036163": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "12221101678609734421": ["convolution_gpu_yxfb_yxio_b16",2],
+        "560996739186313493": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16542318967217020315": ["convolution_gpu_bfyx_gemm_like",2],
+        "17975017633455909321": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "15161053469199826008": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1223196405651730260": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7889602687414497280": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13160712904661288567": ["convolution_gpu_bfyx_1x1",1],
+        "17178308105985812083": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17742192339816511494": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "13082313288887957490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1972879521448306536": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2967481531952454828": ["convolution_gpu_bfyx_os_iyx_osv16",863],
+        "11330591026581463934": ["convolution_gpu_bfyx_gemm_like",2],
+        "6318214731544748245": ["convolution_gpu_bfyx_gemm_like",2],
+        "5899560521070338192": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1351033666248868977": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "17192352762166764393": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2487679091192300910": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14126906427006602775": ["convolution_gpu_bfyx_1x1",2],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",1032],
+        "15504618703544589723": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7274179284676568361": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12867038076564517306": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11361202190524990711": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "4903592553439092472": ["convolution_gpu_bfyx_os_iyx_osv16",147],
+        "15231987838322151865": ["convolution_gpu_bfyx_1x1",2],
+        "15897300973213364823": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "2412846055735335136": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "8339704352841356825": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14667793472412360981": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16312739695844838884": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11861634536583463947": ["convolution_gpu_bfyx_os_iyx_osv16",51],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "216603198215625772": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12714814165247623529": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9131183544020825260": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "17948637243158994878": ["convolution_gpu_bfyx_gemm_like",2],
+        "3463959257726925426": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "16883372966656079608": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5274929595362413625": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2242829490403202087": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6832967250168141428": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13454265023861566476": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "4242173940230902960": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3699344686791530101": ["convolution_gpu_bfyx_gemm_like",2],
+        "11342135956789192833": ["convolution_gpu_bfyx_os_iyx_osv16",1098],
+        "10850369799801518638": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4283886984540574108": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12308956927236847009": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1933147648540963732": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17015328096102652908": ["convolution_gpu_bfyx_gemm_like",1],
+        "10747988576436391912": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "6784853321527374515": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "9363988379673156863": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13170441257780067955": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "18122858611264877646": ["convolution_gpu_bfyx_gemm_like",2],
+        "6727930402459775131": ["convolution_gpu_bfyx_gemm_like",2],
+        "15726902746983125797": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "5328004363712610999": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10429104188258277773": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12467673564660108244": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "12725675221990905186": ["convolution_gpu_bfyx_gemm_like",2],
+        "5538883245745495145": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "13883044928774243663": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "13474805373264874144": ["convolution_gpu_bfyx_1x1",2],
+        "2048528188026477374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3797986765970777456": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2148877522799179369": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14085753024976995311": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18431306649860116380": ["convolution_gpu_bfyx_gemm_like",2],
+        "15800554162607246964": ["convolution_gpu_bfyx_gemm_like",1],
+        "17258128299721452811": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6214624887470295152": ["convolution_gpu_bfyx_1x1",1],
+        "13842149852156451845": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7482459536338668149": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1786821683911142459": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "12348602762263193288": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5436553435132026991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14677968346503677769": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8655315308767111198": ["convolution_gpu_bfyx_1x1",2],
+        "932195814187889636": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13106818352216009354": ["convolution_gpu_bfyx_gemm_like",2],
+        "12933253554354951910": ["convolution_gpu_bfyx_gemm_like",2],
+        "2945245652128285151": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "7330202944390548890": ["convolution_gpu_bfyx_gemm_like",1],
+        "1697260854781788314": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10816702874143297564": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2884499360870038648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16588325081458426169": ["convolution_gpu_bfyx_gemm_like",2],
+        "6400660469217490279": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16293465561256937726": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "14165325329016075285": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "16912738776771289379": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13702254392810961772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7333511810266504718": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "5276029719268937229": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "7822463130304602936": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8065866013404161366": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5509395737020858006": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "968092788032627444": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4617809377006148936": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17922279129043570176": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "2531597468539205600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3287181725010492879": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8577875628223148806": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "7407975398526425554": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "6764038061921866053": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3061372669831947873": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15604634351310647589": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10803929517111130153": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "5040095338370816349": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3176785355296130660": ["convolution_gpu_bfyx_gemm_like",2],
+        "12810833895438895155": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4154403364889130045": ["convolution_gpu_bfyx_gemm_like",2],
+        "14034402827496819479": ["convolution_gpu_bfyx_gemm_like",2],
+        "16794854619854992714": ["convolution_gpu_yxfb_yxio_b16",1],
+        "84595904778810418": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "7463517383354309469": ["convolution_gpu_bfyx_gemm_like",0],
+        "6870942166356599956": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "4135975804549022456": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6791806088355877039": ["convolution_gpu_bfyx_gemm_like",1],
+        "17990326690659802090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "2328919599530851492": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3602929955785812025": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10100171358681249181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11184290482439221741": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "1040650352205493707": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "16081386644309102158": ["convolution_gpu_bfyx_gemm_like",2],
+        "9433162648796382333": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9542325095876448686": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",2],
+        "3117175697326325371": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "17025182465337728023": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "1641111108888949123": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15871357525719630224": ["convolution_gpu_bfyx_1x1",1],
+        "10598099730944525581": ["fully_connected_gpu_fb_io_b8_f8_vload",1],
+        "5099947445888268507": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15958886009743157242": ["convolution_gpu_bfyx_gemm_like",2],
+        "5551484040302194648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2014114949154914483": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10682300249493137042": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "2095245727814188300": ["convolution_gpu_bfyx_gemm_like",2],
+        "1692473411043262397": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "11563892089503603030": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11164519756679631743": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17789969008677638142": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1556975727728498645": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5593329151028712439": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "3116068331849795558": ["convolution_gpu_bfyx_gemm_like",2],
+        "1963081583851864291": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12308895602001600327": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "741727668385951462": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5884951148427535208": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8976966933427522253": ["convolution_gpu_bfyx_gemm_like",2],
+        "6318228858846223186": ["convolution_gpu_bfyx_1x1",1],
+        "15486917753097743853": ["convolution_gpu_bfyx_1x1",2],
+        "18094205332383644037": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "7084646429975006971": ["convolution_gpu_bfyx_1x1",2],
+        "16694984452720336415": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12177387334053203378": ["convolution_gpu_bfyx_gemm_like",2],
+        "10141558851476164734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2727175120437582536": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8390889357546397717": ["convolution_gpu_bfyx_1x1",1],
+        "16915857558806082023": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "17181874388601550941": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8093401822846123153": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8494725779002762049": ["convolution_gpu_bfyx_gemm_like",2],
+        "4165926748138587705": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13856271274572142709": ["convolution_gpu_bfyx_gemm_like",1],
+        "7900926714874404219": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "5911282942658469852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "762634810164167963": ["convolution_gpu_yxfb_yxio_b16",0],
+        "1154763947184432124": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8686733586982652897": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9065137335863605013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7203620615363933078": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "12806934028210472719": ["convolution_gpu_bfyx_gemm_like",2],
+        "2459018025887933198": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14128122558476128712": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3141773224039276177": ["convolution_gpu_bfyx_1x1",2],
+        "18017913952946745878": ["convolution_gpu_bfyx_gemm_like",2],
+        "12864204111424196179": ["convolution_gpu_bfyx_1x1",2],
+        "4744578087509837185": ["convolution_gpu_yxfb_yxio_b16",0],
+        "8407012082034007985": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "5928392400230917930": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6744692937598310090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16437093737761968743": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "959260710517842876": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "10865695385270390803": ["convolution_gpu_bfyx_os_iyx_osv16",642],
+        "14675165976583799157": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8353259929933281349": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "3364467044587904559": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3385797925880519845": ["convolution_gpu_bfyx_1x1",2],
+        "15924583510704449214": ["convolution_gpu_bfyx_gemm_like",1],
+        "3350601287664242323": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8168240543278779314": ["convolution_gpu_bfyx_1x1",1],
+        "4400247897123856252": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "11175353869874626110": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9751582946441607796": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "760687670112194844": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "757225477250808939": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16027456210394993913": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16516262096533373158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16052741298509954954": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9585113116232600562": ["convolution_gpu_bfyx_gemm_like",1],
+        "7008873036126556197": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3286330985102373533": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4137738705782981426": ["convolution_gpu_bfyx_gemm_like",2],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "8954488655859677891": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14058311587429063829": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8952733400567254769": ["convolution_gpu_bfyx_gemm_like",2],
+        "3022939690177474442": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7748514992101811029": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4571404165794634411": ["convolution_gpu_bfyx_1x1",2],
+        "16084700435355748612": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "16788162879714733906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16862145184923128012": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7923576965630818418": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7650375560336513366": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "15641322340289892344": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "4959403414256988744": ["convolution_gpu_bfyx_gemm_like",1],
+        "12802517759474139810": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4863644213728386734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12995903177757437362": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "16016396784190934729": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14120569486714455490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2180039710632160943": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3396731547696204011": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1427040855295681285": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8059328623525062913": ["convolution_gpu_bfyx_gemm_like",2],
+        "10183537720515608": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7425369489110576363": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14830991971271385876": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "12024143207855886580": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "13957350536347764705": ["convolution_gpu_bfyx_gemm_like",2],
+        "11055049031355432623": ["convolution_gpu_bfyx_gemm_like",2],
+        "11768117585574496387": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "7972861956906521660": ["convolution_gpu_yxfb_yxio_b16",2],
+        "541817615957967731": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "7060804814325505165": ["convolution_gpu_bfyx_gemm_like",2],
+        "14366395926517590797": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14447191095937730964": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12818786388125465101": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9040145293899470160": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "16247799703932868151": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17906607354577138153": ["convolution_gpu_bfyx_os_iyx_osv16",524],
+        "1336940384521633733": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "16582761411084080015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1157069349112113377": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16836088134347394854": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14973431782875808802": ["convolution_gpu_bfyx_gemm_like",2],
+        "12421204749289937399": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9869959062341950047": ["convolution_gpu_bfyx_1x1",2],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "14699357144600604190": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13767985623872409391": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17386047378634216634": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2245166025103475783": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "8193369947544085921": ["convolution_gpu_bfyx_gemm_like",2],
+        "14131851237755716991": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "6284333183047854748": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3236003754884728510": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "10565371760124443824": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4121535611334103359": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4165019140664090799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15397084091361096354": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "1920042803083729276": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "13387545865482261974": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "12722153168975105360": ["convolution_gpu_yxfb_yxio_b16",2],
+        "150132162949295379": ["convolution_gpu_bfyx_1x1",2],
+        "15101834579076569231": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12166852830214895457": ["convolution_gpu_bfyx_1x1",2],
+        "1944461047787586724": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6875055157295709098": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6887205509732544213": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16738951239219589307": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "9378269524012289175": ["convolution_gpu_bfyx_gemm_like",2],
+        "16889886654893884746": ["convolution_gpu_bfyx_1x1",2],
+        "12700372241799686527": ["convolution_gpu_bfyx_gemm_like",1],
+        "12936220888307335332": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "3793265335909270748": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11626398907755088688": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12567935463143860469": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5155616842071169667": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17381516856910544374": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "6942016672941874829": ["convolution_gpu_bfyx_gemm_like",2],
+        "7719954202744123391": ["convolution_gpu_bfyx_gemm_like",2],
+        "3316798708399098230": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "2789901295967374316": ["convolution_gpu_yxfb_yxio_b16",2],
+        "188830358699960789": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8690196189594920365": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "10187930930336324253": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "9193880745263317167": ["convolution_gpu_bfyx_gemm_like",2],
+        "15012885932988454455": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16384186388687043048": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "12179968379663737450": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17096735128393723245": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10424643336435622408": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "18426893729833771809": ["convolution_gpu_bfyx_1x1",2],
+        "2783577080556699089": ["convolution_gpu_bfyx_gemm_like",1],
+        "503369896500284129": ["convolution_gpu_bfyx_1x1",2],
+        "7432142107544210174": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "11975047184326016230": ["convolution_gpu_bfyx_gemm_like",2],
+        "18142462471803295391": ["convolution_gpu_bfyx_1x1",1],
+        "9955939178447682108": ["convolution_gpu_bfyx_1x1",2],
+        "16577611471466452776": ["convolution_gpu_bfyx_gemm_like",2],
+        "18080788888293706149": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4674504221851042542": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9401409770128851474": ["convolution_gpu_bfyx_gemm_like",0],
+        "14123081378489325832": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "9414927552739380436": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11083993858285515074": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "4089043893927493060": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "17434141039341226796": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2625969259447793593": ["convolution_gpu_bfyx_1x1",2],
+        "6109013751635776331": ["convolution_gpu_bfyx_gemm_like",2],
+        "597650904461183283": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16214394186337220006": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2495655464941634884": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "4513063773753763458": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "16339187733937346919": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15814015810740458605": ["convolution_gpu_bfyx_1x1",2],
+        "2314579504260247470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8092673566670222445": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "11893541520830049036": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14686272582436109012": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6934915634718835911": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5293502980575652171": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9190054801124577726": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14268594692585922659": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "18279927175542031567": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4755225554035527185": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17806712457019493207": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "9328585005923667676": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "9541996065561509160": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3346891393420268502": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "16986358655784856534": ["convolution_gpu_bfyx_gemm_like",2],
+        "14216513246096503793": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17446505012657609153": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16434358667865869005": ["convolution_gpu_yxfb_yxio_b16",2],
+        "805131056816361237": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "6195916781434462809": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6548949901446632697": ["convolution_gpu_bfyx_1x1",2],
+        "7155796826953849982": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11545529736818363243": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10613621801998459768": ["convolution_gpu_yxfb_yxio_b16",2],
+        "42935035304560876": ["convolution_gpu_yxfb_yxio_b16",1],
+        "185782385623159958": ["convolution_gpu_bfyx_gemm_like",2],
+        "11418379777288974452": ["convolution_gpu_bfyx_gemm_like",2],
+        "75742659105146536": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "2542112741645712811": ["fully_connected_gpu_fb_io_b8_f8_vload",1],
+        "18082422341304348326": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "7322472892320910654": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "2714322766616035858": ["convolution_gpu_yxfb_yxio_b16",2],
+        "166437837813304707": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5832851215142537445": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14883438809987378616": ["convolution_gpu_bfyx_1x1",1],
+        "9120374653477510318": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12510951219501865365": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "12536364199388193516": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "15457040168177954463": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3019864917236424168": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16974981142389546385": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17167229341919111718": ["convolution_gpu_bfyx_gemm_like",2],
+        "14043064718932538557": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9812438080378091263": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14091610802555875119": ["convolution_gpu_bfyx_gemm_like",1],
+        "5995121118186531621": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9177211394807412309": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9144269202766996508": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9803492989444302959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18186612931984342471": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11999246609107242706": ["convolution_gpu_bfyx_gemm_like",2],
+        "14811603003184578943": ["convolution_gpu_bfyx_gemm_like",2],
+        "16101625311127899143": ["convolution_gpu_bfyx_gemm_like",2],
+        "10514865654990433040": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6931953332823066530": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "18120079746729314878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "5774841809066688068": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "18435632962969462312": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14799012895945855878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8095675456938934982": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8561261337239934159": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12887076860522920405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16548491024653039967": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "12275528180752359999": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15199604820473713622": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "3894130445933963911": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10041205516209288381": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "7447163906170805189": ["convolution_gpu_bfyx_os_iyx_osv16",573],
+        "14884315147107686805": ["convolution_gpu_bfyx_gemm_like",1],
+        "8490260671996115530": ["convolution_gpu_bfyx_gemm_like",1],
+        "7667898603371717971": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10775271979871646995": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",2],
+        "6143200133853000387": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "13804221028705631415": ["convolution_gpu_bfyx_gemm_like",2],
+        "15082818876354718849": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "768820004084041271": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16014822406751503249": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "4889188980319017094": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "8161520217142313996": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8542782888102516498": ["convolution_gpu_yxfb_yxio_b16",2],
+        "967141158966448909": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7171904645566467208": ["convolution_gpu_bfyx_gemm_like",2],
+        "1634884284544380004": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9939234037869927090": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "10226095100825845185": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "16271675466919087248": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7839141505912665157": ["fully_connected_gpu_fb_oi_ref",1],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "13683623172740048376": ["convolution_gpu_bfyx_gemm_like",2],
+        "13991572769793610416": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "9954050478761346921": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "4450409744922989123": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "938222258370511187": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2058172559199858297": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "13776178598632392721": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "13357365044448426880": ["convolution_gpu_bfyx_1x1",2],
+        "7737977992444172757": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16911464046178654033": ["convolution_gpu_bfyx_1x1",2],
+        "4769003637955328938": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1117836569328440439": ["convolution_gpu_yxfb_yxio_b16",2],
+        "338716975932676215": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9399511839804500548": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11002165738333323413": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14230493618724018658": ["convolution_gpu_bfyx_gemm_like",2],
+        "7992077349568239994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14345755557418971954": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7132328255408635227": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17830290099875088207": ["convolution_gpu_bfyx_gemm_like",2],
+        "7450417963648518926": ["convolution_gpu_bfyx_gemm_like",2],
+        "132437164570900392": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8865700182878875593": ["convolution_gpu_yxfb_yxio_b16",2],
+        "466744273945239777": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "1117729599102132243": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17025324057045572535": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1398177377739338750": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8348997431940166878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15035800097152337587": ["convolution_gpu_bfyx_gemm_like",2],
+        "13586735166545634506": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1914964404168211864": ["convolution_gpu_bfyx_gemm_like",2],
+        "10211403590176354415": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "276313536076170391": ["convolution_gpu_bfyx_gemm_like",2],
+        "15879172437519876393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17318287523550546026": ["convolution_gpu_bfyx_gemm_like",2],
+        "15596408854298291433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13042938686374926241": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11828175723996627443": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7375461241315602473": ["convolution_gpu_bfyx_gemm_like",2],
+        "14346466672686303107": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "9456645866001656225": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17970424536559595893": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14805540705424073865": ["convolution_gpu_bfyx_gemm_like",2],
+        "7614673554809134631": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7104309382120208659": ["convolution_gpu_bfyx_gemm_like",2],
+        "16851716501872033211": ["fully_connected_gpu_fb_io_block_fp16",1],
+        "15594673952484539994": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "6141193842171342687": ["convolution_gpu_yxfb_yxio_b16",2],
+        "818998169319147148": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13893808009363736870": ["convolution_gpu_bfyx_gemm_like",2],
+        "9091110033424983286": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "7894230717547658326": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15522099459864628246": ["convolution_gpu_bfyx_gemm_like",2],
+        "2844794465598309010": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "17583785768334531086": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11685571068419983048": ["convolution_gpu_bfyx_1x1",2],
+        "10016815108730511683": ["convolution_gpu_bfyx_gemm_like",1],
+        "1905758333157310570": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14025678657541870252": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8354579049246302728": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "18091349188280218186": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15974208269240775349": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11152334947349565403": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4367991456894497706": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "13833960927635646899": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13700014916680753395": ["convolution_gpu_bfyx_gemm_like",2],
+        "15070618248849566698": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1518270620354036926": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "3118602494449249177": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "778476198101178556": ["convolution_gpu_bfyx_gemm_like",1],
+        "17219920118109316867": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "3220280315905987373": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "378801963103874857": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1135062632388082485": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "16870110185980402237": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10704906466618081803": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8857763129101380288": ["convolution_gpu_bfyx_gemm_like",2],
+        "1775515808301276388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14147460733160099960": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4824040283449153298": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "5606914392662771013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17746215841755337461": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9232653317479846765": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "16617569629839911513": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18059267466971880386": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6942606834115081953": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3737576893817599311": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "9616636708366808604": ["convolution_gpu_bfyx_gemm_like",2],
+        "7474639594232203854": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "9043982883185435219": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "4588117321438490483": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4342446399224806160": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "11910735867274493498": ["convolution_gpu_bfyx_gemm_like",2],
+        "1281190653081960886": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9099056013518879466": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "12686330321897091505": ["convolution_gpu_bfyx_gemm_like",2],
+        "8731079912830889828": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15739756988784344130": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4121109463284708890": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "6845814820599174031": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "2929715823970060874": ["convolution_gpu_bfyx_gemm_like",1],
+        "9463256538942644563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10425889533411573166": ["convolution_gpu_bfyx_gemm_like",2],
+        "9131235538209388787": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "3950738240651133849": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "13512059751838488458": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10820312036555742020": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "6055793483770886264": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1330337530094825121": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6254141935545262078": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4914435717288687793": ["convolution_gpu_bfyx_1x1",1],
+        "234288286732396704": ["convolution_gpu_yxfb_yxio_b16",1],
+        "467070383257529689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6654167459904026563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10782611933832492335": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "10979362792894404338": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "11693134363909241514": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1921500066107090648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14114380593731243715": ["convolution_gpu_bfyx_os_iyx_osv16",167],
+        "3117673619907511009": ["convolution_gpu_bfyx_os_iyx_osv16",487],
+        "6428098122005804378": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "17081449111821382308": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17536308070854915513": ["convolution_gpu_bfyx_1x1",2],
+        "13123561937554734618": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1290180607037086383": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13011676362747785816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6398819277350155011": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "13022797264172398260": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7548031489690889629": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "3986429358782189117": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17490188677223978661": ["convolution_gpu_bfyx_gemm_like",2],
+        "14244689429217411113": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4006884370026272807": ["convolution_gpu_bfyx_gemm_like",2],
+        "9101334153142718004": ["convolution_gpu_bfyx_gemm_like",2],
+        "15589007878875898942": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "6546440095044731932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4773123925616969670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10747101719272611563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10019470094545733255": ["convolution_gpu_bfyx_gemm_like",2],
+        "16441830491664937048": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "13488495920546871271": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "2917999294360728537": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3948843501884284998": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13124659308711651699": ["convolution_gpu_bfyx_gemm_like",2],
+        "2119566651547512543": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1318571118468536310": ["convolution_gpu_bfyx_gemm_like",2],
+        "9545968464906009869": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "4678607855896512523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14458851250685872417": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "11455055202624479980": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5285172225938230524": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11669828823444745889": ["convolution_gpu_bfyx_gemm_like",2],
+        "12184235281888559274": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5257134257307295031": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17085927772068621152": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12494969618927201911": ["fully_connected_gpu_yxfb_ref",2],
+        "16818206615424635387": ["convolution_gpu_bfyx_1x1",1],
+        "13853630125050609175": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "11679235499894668689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8655739705298627602": ["convolution_gpu_bfyx_gemm_like",0],
+        "10861769381993948050": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "5240706676373148280": ["convolution_gpu_bfyx_gemm_like",2],
+        "3325575565536567070": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1074748462756364699": ["fully_connected_gpu_fb_oi_ref",1],
+        "6926590672771069689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9603926867418680768": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9412392168031560549": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16003914811215141863": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8956566633622104099": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9277176009071334860": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "7578177053220150569": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "1040030752340209480": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "574869992355132069": ["convolution_gpu_bfyx_gemm_like",2],
+        "18041177945345031826": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "12933785392937626017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16316483048621486077": ["convolution_gpu_bfyx_gemm_like",2],
+        "2609454334520044465": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "14248622935809594779": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5042176052323856983": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14752182392048929103": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6744044115114192916": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16385915289511951113": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17442105631503326136": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "5017701748886087836": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "10878198256414940305": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12287667143602938393": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "697333686114567307": ["convolution_gpu_bfyx_gemm_like",2],
+        "10113696658040720628": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16013560489115457872": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9741607635826869269": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12023260267201191955": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4776446300552810228": ["convolution_gpu_bfyx_gemm_like",0],
+        "15329680728165965773": ["convolution_gpu_bfyx_gemm_like",2],
+        "11862259122805366807": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "4809191606466167229": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10148067979123062638": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17711453305763476458": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "16072525303202287969": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17037416417174266088": ["convolution_gpu_bfyx_gemm_like",1],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",0],
+        "6822432085522584060": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8791285622784082122": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "3372770576629463160": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "7715937239456300593": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "3573490922300056520": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2321773209766424929": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12710794174926396540": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3226193790517362610": ["convolution_gpu_bfyx_1x1",2],
+        "13815395589135469450": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8466986812935642059": ["convolution_gpu_bfyx_os_iyx_osv16",278],
+        "3501667344669686338": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8159303545761286685": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "1786105567361070086": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12877601016766418505": ["convolution_gpu_bfyx_gemm_like",2],
+        "12241130380766920378": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7837876599690110056": ["convolution_gpu_bfyx_gemm_like",2],
+        "17536482873064844308": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "3024402899381804809": ["convolution_gpu_bfyx_1x1",2],
+        "632116056424249698": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12146979849998627283": ["convolution_gpu_bfyx_gemm_like",2],
+        "824911124897042617": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8709632541892447149": ["convolution_gpu_bfyx_gemm_like",2],
+        "12134858519320245809": ["convolution_gpu_bfyx_1x1",2],
+        "2835909063063272102": ["convolution_gpu_bfyx_gemm_like",2],
+        "4664983769199548480": ["convolution_gpu_bfyx_1x1",1],
+        "5364060938737428149": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",2],
+        "12676167240795292217": ["convolution_gpu_bfyx_gemm_like",1],
+        "3101748967012684440": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8837721075413149240": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12287827551127082597": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14813178380338948912": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "875146113874776902": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7005509036795164602": ["convolution_gpu_bfyx_1x1",2],
+        "1157388265135592238": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11096750581455917678": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10325138269934303618": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14359026450472189405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "487214150851213303": ["convolution_gpu_bfyx_gemm_like",1],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "6825390996679224270": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15690161340392005765": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9657324846330221372": ["convolution_gpu_bfyx_1x1",2],
+        "14963614790718019676": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11775265110573621330": ["convolution_gpu_bfyx_os_iyx_osv16",301],
+        "7552544688541855979": ["convolution_gpu_bfyx_gemm_like",2],
+        "15078590909693331731": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12081698011407453832": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5968129546023764583": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3190494353583341446": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5567628205735744449": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11705756153433897198": ["convolution_gpu_bfyx_1x1",2],
+        "10693837788817206459": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17264671167892237524": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "3759515057574218101": ["convolution_gpu_bfyx_gemm_like",2],
+        "8645965165922150743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16616945998593626851": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6805188858008657978": ["convolution_gpu_bfyx_gemm_like",2],
+        "16901594465545439334": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "586947787345351152": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7541325258238317885": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "3965327578193694832": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3742751561273931407": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12469992822259989528": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17292751972745231011": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3105425187506203551": ["convolution_gpu_bfyx_1x1",2],
+        "3735605582512535278": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15991460001131903561": ["convolution_gpu_bfyx_gemm_like",2],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "16566128345135114558": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "2449586975250543578": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13701870576531008278": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6699877220571254719": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11104393974242049153": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5079381702867378605": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18275601715050791851": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "6509758095668864050": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "14910911338105922048": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10528894716283673051": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12793347723828876280": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15489746763312425915": ["convolution_gpu_bfyx_gemm_like",2],
+        "10399620940700804517": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11694428890484758107": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15666720796968090760": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7770000755097925765": ["convolution_gpu_bfyx_1x1",2],
+        "3419536918610303807": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18446245971488003004": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "13927671398099556854": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8040001390872143271": ["convolution_gpu_bfyx_gemm_like",2],
+        "1880137091477870982": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9076758673133996959": ["convolution_gpu_bfyx_gemm_like",2],
+        "8302886228681027388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7292351660229751817": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "7474592508575297101": ["convolution_gpu_bfyx_1x1",2],
+        "17996535939348094624": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4039483032571506874": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "2622434279674583815": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "15289152041466330689": ["convolution_gpu_bfyx_gemm_like",2],
+        "2191416057399400794": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2608363732937932266": ["convolution_gpu_bfyx_gemm_like",2],
+        "396580837423299119": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "12024817951074673335": ["convolution_gpu_bfyx_1x1",1],
+        "10570285542015420072": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "14031009077471784948": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4116610956045302817": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15476491807306982382": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "6102330514901613158": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9208964785762052001": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "16898785030254336705": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16094455700371652312": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1778345646142852816": ["convolution_gpu_bfyx_gemm_like",2],
+        "16936968151775497887": ["convolution_gpu_bfyx_gemm_like",2],
+        "11308583200952256245": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16188473537674428539": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6756771670011959646": ["convolution_gpu_bfyx_gemm_like",2],
+        "2983038203471784211": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "4355933224673863178": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7051238664181857633": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "3571030800252732358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11065709388908213457": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17512961503976896701": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "856877003890134554": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "12531580106484042446": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "5924341622384096919": ["convolution_gpu_bfyx_gemm_like",2],
+        "13821224753538037982": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "5595779343671478945": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3220756134650041028": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11824946481875102910": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14044732537191084187": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7708321360699824256": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9647916259092117712": ["convolution_gpu_bfyx_gemm_like",2],
+        "7008509833947166548": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1099404514975797315": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3114869763557037270": ["fully_connected_gpu_fb_oi_ref",1],
+        "18126685473408206840": ["convolution_gpu_bfyx_os_iyx_osv16",526],
+        "6427979320488981912": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5312269140190538942": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16813995580382709489": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9606639214735570069": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "4090512597925170883": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6664432489777052771": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "12068797674575015662": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "7585777271711713778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6721354194352192662": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14771341796915983228": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18416908414174464784": ["convolution_gpu_bfyx_gemm_like",2],
+        "15956352026642286295": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9312974578711092131": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17921973525603585874": ["convolution_gpu_bfyx_gemm_like",2],
+        "1208161922424418734": ["convolution_gpu_bfyx_gemm_like",2],
+        "12305397676800089268": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7056293586529818253": ["convolution_gpu_bfyx_gemm_like",2],
+        "17082268616134506581": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14421898375873029115": ["convolution_gpu_bfyx_1x1",2],
+        "17955326503130437346": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "13962325395021860937": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16589848737162195829": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11497761673211348612": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17961702508543961900": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "5938850739683493929": ["convolution_gpu_yxfb_yxio_b16",0],
+        "14343008518525689150": ["convolution_gpu_bfyx_1x1",2],
+        "15188273255634848057": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1413558157882728476": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16770615142634470903": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "16397733032387984819": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "17889864541794448203": ["convolution_gpu_bfyx_1x1",1],
+        "8459380583159325597": ["convolution_gpu_yxfb_yxio_b16",1],
+        "991586070509079617": ["convolution_gpu_bfyx_gemm_like",0],
+        "4718716595177056289": ["convolution_gpu_bfyx_os_iyx_osv16",986],
+        "12107079280128343726": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3217295012596892181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12926382190254407283": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8543619733732987550": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1787152688807233651": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4084026445911476156": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "18270587701371596297": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "16677044352793659175": ["convolution_gpu_bfyx_gemm_like",1],
+        "4021045600853993587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9654944848074437064": ["convolution_gpu_bfyx_gemm_like",2],
+        "15635250842093678965": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9418041909134721047": ["convolution_gpu_bfyx_gemm_like",2],
+        "2031558560788449957": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14807466024030301968": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14135593723444205032": ["convolution_gpu_bfyx_gemm_like",2],
+        "14646141746558153748": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5583453364991774426": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11436473937404565094": ["convolution_gpu_yxfb_yxio_b16",0],
+        "11719957578496407410": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "462240909302334133": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5912303851874077576": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "1042605521041579458": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10130171279527667782": ["convolution_gpu_bfyx_gemm_like",1],
+        "13193898459027972719": ["convolution_gpu_yxfb_yxio_b16",0],
+        "1192279884248226739": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "12780116250427776647": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1436052878894538927": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "7949069388917479511": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18161971781834208343": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "16404362308829952450": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7113777272518482528": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "18132952464279667664": ["convolution_gpu_bfyx_1x1",2],
+        "3301356450249305137": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9589361786336650748": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11807282628372660280": ["convolution_gpu_bfyx_1x1",2],
+        "16953093098789113080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10525462454857911293": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16235115911229280717": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "11313025178951972247": ["convolution_gpu_bfyx_gemm_like",1],
+        "17444003685761357480": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2399313178951511557": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "16644952765107909604": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3724572174214794659": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10893628699015898230": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7954972694876158422": ["convolution_gpu_bfyx_1x1",2],
+        "8458082326743351141": ["convolution_gpu_bfyx_gemm_like",2],
+        "13468081302022888489": ["convolution_gpu_bfyx_gemm_like",2],
+        "14789782064157699768": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5578991261564497604": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13767500791267563349": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5919454297699648428": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15678385128478075284": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6602394091385112575": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17777248703109395158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15031155621982459860": ["convolution_gpu_bfyx_gemm_like",2],
+        "3501882025888946886": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "904355798061005466": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13144385730409574259": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "7095629088416100928": ["convolution_gpu_bfyx_gemm_like",2],
+        "7706714181281908433": ["convolution_gpu_bfyx_gemm_like",2],
+        "3859139031732555228": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12194037100109755112": ["convolution_gpu_bfyx_gemm_like",2],
+        "4500107195684703428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11759322316883943989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4046830923427667342": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "2732519635571994212": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "12831298482349900359": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "15227189929676013024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3177304125602972370": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15656706773401161497": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15412447128995361859": ["convolution_gpu_bfyx_gemm_like",1],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4670487436469119872": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8879618489623984140": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11797601971796699898": ["convolution_gpu_bfyx_gemm_like",2],
+        "6312971928547466668": ["convolution_gpu_bfyx_os_iyx_osv16",1039],
+        "7649413902932043811": ["convolution_gpu_bfyx_gemm_like",2],
+        "3067001341355453846": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "18027243127893440568": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5751283221740229986": ["convolution_gpu_bfyx_gemm_like",1],
+        "142270860894725256": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9243949750444156746": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4429109491655891299": ["convolution_gpu_bfyx_gemm_like",1],
+        "7444165397413360181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6418500550523945192": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17826868890632814593": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10271261715175176019": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5010119207726811326": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "9738776059655610885": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1304921846760027440": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3059575629482816852": ["convolution_gpu_bfyx_os_iyx_osv16",951],
+        "12198263593657033426": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "3156783219125679946": ["convolution_gpu_bfyx_1x1",2],
+        "16739031949237426992": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "5602328731722824868": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11147573971701279689": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "548663565933738403": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "18129795023552968695": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2116913943188857359": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "3244803973821375252": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12808456612606675259": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2762489653422414995": ["convolution_gpu_bfyx_gemm_like",2],
+        "3017411837779243878": ["convolution_gpu_bfyx_gemm_like",2],
+        "11908169713247209976": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5046089607609787258": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13960388312976163971": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10632933069865171963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4353842547963164546": ["convolution_gpu_bfyx_1x1",2],
+        "15398380328746287438": ["convolution_gpu_bfyx_gemm_like",2],
+        "8456185296386225533": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13633048912926365931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "345043289576587800": ["convolution_gpu_bfyx_1x1",2],
+        "17413191440314817117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8365255170846178102": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10504318542015227515": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "12407890437443790515": ["convolution_gpu_bfyx_gemm_like",2],
+        "2649192407401044065": ["convolution_gpu_bfyx_gemm_like",2],
+        "17052161869014993719": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17195293614280872622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3017891343734146267": ["convolution_gpu_bfyx_os_iyx_osv16",102],
+        "12577421746159122264": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "2283157145557154450": ["convolution_gpu_bfyx_1x1",2],
+        "2780423409483867058": ["convolution_gpu_bfyx_1x1",2],
+        "10745099399736462076": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16117448559783537844": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "3820661057776133570": ["convolution_gpu_bfyx_1x1",2],
+        "9079203986633151014": ["convolution_gpu_bfyx_1x1",1],
+        "17152614235879767116": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "16347412180100581330": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "17907223570737272640": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "5184121466994451498": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "16566214123371867456": ["convolution_gpu_bfyx_gemm_like",2],
+        "7243917162812988891": ["convolution_gpu_bfyx_gemm_like",2],
+        "4856470441452830056": ["convolution_gpu_bfyx_gemm_like",2],
+        "3242391637018676328": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14689812157592240007": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1152691534728260611": ["convolution_gpu_bfyx_1x1",2],
+        "13710319251108632115": ["convolution_gpu_bfyx_1x1",2],
+        "12989677691575632174": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3444250649099578792": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8174833187387604731": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10787747981914307179": ["convolution_gpu_bfyx_1x1",2],
+        "12379166764490359144": ["convolution_gpu_yxfb_yxio_b16",2],
+        "631489011812924153": ["convolution_gpu_bfyx_1x1",2],
+        "16837963510205857013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13190888313721073437": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "9495192057713157041": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5727758374304309350": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6733731409232284409": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "17762455138615317884": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "5313382805395362669": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9406763539724266157": ["convolution_gpu_bfyx_1x1",2],
+        "17088011073114549679": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4135068756462147853": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3731224822876468602": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4362304842016958728": ["convolution_gpu_bfyx_gemm_like",2],
+        "8250212706222997384": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4625107584562815965": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "13912843078550000960": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "10536316961655703500": ["convolution_gpu_bfyx_os_iyx_osv16",199],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "9617316303048974588": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13961773444580398856": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17399728556634171321": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7612288596055048389": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8733371726903473932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4651261398203912503": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "9119268982510599778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6328802691680458752": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "13218298785325404589": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5754396201681434378": ["convolution_gpu_bfyx_1x1",2],
+        "15989894214714907271": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "9922764846020092836": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4972952621622984792": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "6584960721513702502": ["convolution_gpu_bfyx_gemm_like",1],
+        "18199824206329982249": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "5157949342388119167": ["convolution_gpu_bfyx_gemm_like",2],
+        "14151747022287993729": ["convolution_gpu_bfyx_gemm_like",2],
+        "10747688146893187959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "75120034961995929": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12058759356433220258": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9237587440336828595": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13326492157370934949": ["convolution_gpu_bfyx_gemm_like",2],
+        "1387945708447092123": ["convolution_gpu_bfyx_os_iyx_osv16",380],
+        "13962189339706230770": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17848582668902427291": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14281801257982447624": ["convolution_gpu_yxfb_yxio_b16",2],
+        "497488185553682238": ["convolution_gpu_bfyx_1x1",1],
+        "16535378085465418910": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13448845356783404653": ["convolution_gpu_bfyx_gemm_like",1],
+        "8900977003907025003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11690533591656807605": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "17133376737554844449": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15602218079503030465": ["convolution_gpu_bfyx_gemm_like",2],
+        "5330130011321223525": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3121704239277217273": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7233783054884565746": ["convolution_gpu_bfyx_gemm_like",2],
+        "3080612075440389053": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15078262396281327048": ["convolution_gpu_bfyx_gemm_like",1],
+        "17651821953342321913": ["convolution_gpu_bfyx_1x1",2],
+        "10218763091060511457": ["convolution_gpu_bfyx_os_iyx_osv16",103],
+        "3568514382399560386": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "4897448054295474302": ["convolution_gpu_bfyx_gemm_like",2],
+        "16152775342222431281": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16986610822918634530": ["convolution_gpu_bfyx_1x1",2],
+        "11196245220967135443": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6820224292713065232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10151922632636937118": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13009381943944182288": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8210092359850191682": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3391032227732782982": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15311930929656759371": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5857101685300045443": ["convolution_gpu_yxfb_yxio_b16",1],
+        "334703311738467111": ["convolution_gpu_bfyx_gemm_like",1],
+        "3451309062150982886": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14808079119439455357": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6760797535531423152": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "6219075471508685758": ["convolution_gpu_bfyx_gemm_like",2],
+        "3928596145340765666": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "2164314506903530487": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15757351352532908153": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "13249852145471010452": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7926989875988735079": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10896935976330351144": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16469493066700118274": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16459072408799224894": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18103534417093702556": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13766070202060785219": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5723759573058003971": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2314805462821790774": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9319064434175105168": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2319519208813614116": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5115298857582076692": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2814805887448339818": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4925720860007127584": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8614375489387596119": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14206125678667603810": ["convolution_gpu_bfyx_1x1",1],
+        "11931568365395665142": ["convolution_gpu_bfyx_gemm_like",2],
+        "17053671692908867872": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "16455941573984854254": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2126208024616319501": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5795524493577277985": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17491825380936802930": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5319459637051859849": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18333355024265557430": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8794896449397768269": ["convolution_gpu_bfyx_gemm_like",2],
+        "16223356735957394429": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5258372022038629529": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6014752258124559691": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10256831975351722184": ["convolution_gpu_bfyx_gemm_like",1],
+        "17987739992848266169": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10982526068861394162": ["convolution_gpu_yxfb_yxio_b16",2],
+        "144634005596305959": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "14116275901314596944": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4240975186599864955": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16267531927647687641": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5643920882179676695": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9170163372548895531": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8921636651939679647": ["convolution_gpu_bfyx_1x1",1],
+        "7178866013527118649": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13320675959188615441": ["convolution_gpu_bfyx_gemm_like",2],
+        "5003718302026277632": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "8325903548627432": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18244966393978155130": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8257103926661643451": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "4256155212405177844": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5047972486012090625": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "10007925729029867733": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4430932059574900921": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15449650271741732512": ["convolution_gpu_yxfb_yxio_b16",2],
+        "60267878504897170": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5705056256080522960": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12248852114219058572": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "11979032916453246611": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15136770992109675092": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "16395067736440127496": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "1632416005093914709": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5577571901049952658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3160543867929843861": ["convolution_gpu_bfyx_1x1",2],
+        "4141005390823981166": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "10179916356323479080": ["convolution_gpu_bfyx_gemm_like",2],
+        "7397341452130124383": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "9736684300833719045": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15914058104244750036": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "15727611564408173858": ["convolution_gpu_bfyx_gemm_like",1],
+        "15303251546207338960": ["convolution_gpu_yxfb_yxio_b16",0],
+        "362823013207940830": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6307939332939714967": ["convolution_gpu_bfyx_1x1",2],
+        "11834361584875491425": ["convolution_gpu_bfyx_1x1",1],
+        "17525564757769958678": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "3646228701104397128": ["convolution_gpu_bfyx_os_iyx_osv16",173],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "16000428520749664687": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5039037192630609823": ["convolution_gpu_bfyx_gemm_like",2],
+        "12309955719964788034": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "7211179360844946434": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "6692085187697087807": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "4531222427159927606": ["convolution_gpu_bfyx_gemm_like",2],
+        "4523064418696274869": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14817801788424046035": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5958300749101873980": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",1],
+        "8575833423399668525": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "1040411949730118556": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15542520725696027828": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15961487889420208188": ["convolution_gpu_bfyx_gemm_like",2],
+        "12879367655655932174": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17364712285968437405": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "10626281431800814406": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3406812365298442897": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "13668940862847596363": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11317843493537672866": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5735608687257018419": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "3635446784873718932": ["convolution_gpu_bfyx_gemm_like",2],
+        "7565867291827884997": ["convolution_gpu_bfyx_gemm_like",1],
+        "1108229954015380813": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15449774545834423274": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6808843088626121909": ["convolution_gpu_bfyx_gemm_like",2],
+        "3492178441007007033": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4118073384938355655": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "16502045034098739466": ["convolution_gpu_bfyx_gemm_like",2],
+        "975943900172381326": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5582450255753679095": ["convolution_gpu_bfyx_1x1",2],
+        "3221469860582147955": ["convolution_gpu_bfyx_gemm_like",2],
+        "16120988958246503683": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "14444475853714164129": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17277917672233464304": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11113125355390956764": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16513038896689318072": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7531346828150129063": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "10701208905236219083": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14930789530046665855": ["convolution_gpu_bfyx_gemm_like",2],
+        "14065215389112262561": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13051390418571971928": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13797759143769042759": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "13558656230312558247": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "8099100633390626027": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7412772553395852003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16428789154716792138": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2477849395789783501": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "11560634267092054110": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "17800115051456107658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4773482308451190487": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4830454154838353056": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "9659814105483633858": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8933701347987963693": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4216366893358625960": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "14791575777969587370": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10808909442136736629": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16989896550094613437": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7535571298845832061": ["convolution_gpu_yxfb_yxio_b16",1],
+        "517997325935712670": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4640696923527766618": ["convolution_gpu_bfyx_gemm_like",2],
+        "13585163747565192884": ["convolution_gpu_bfyx_gemm_like",2],
+        "9135116285263927211": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "15695415285791951018": ["convolution_gpu_bfyx_gemm_like",2],
+        "13058026769607428653": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10431728173806991521": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12990527753120735255": ["convolution_gpu_bfyx_gemm_like",2],
+        "2450251936650841836": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "16469788155263456039": ["convolution_gpu_bfyx_gemm_like",2],
+        "14931590390643373866": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12866217660635921034": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15747571668131081693": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6391201577234440562": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "11645116728396933125": ["convolution_gpu_bfyx_gemm_like",2],
+        "6106367716877633757": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5963901433137582265": ["convolution_gpu_bfyx_gemm_like",2],
+        "12259844988981080505": ["convolution_gpu_bfyx_gemm_like",2],
+        "18226737525116147628": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4129722446574108695": ["convolution_gpu_bfyx_1x1",2],
+        "17966898762317477857": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7603872175048237237": ["convolution_gpu_bfyx_1x1",2],
+        "7755177205197405275": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "12450814729547235386": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9542795021683486547": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12093737479877309006": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "8651641584737798174": ["convolution_gpu_bfyx_gemm_like",2],
+        "10194187012252949909": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15181987458871339815": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "6458189051305803360": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12616205756849913359": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5941095082097535176": ["convolution_gpu_bfyx_gemm_like",1],
+        "12762301414049772746": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8399668174006528237": ["convolution_gpu_bfyx_gemm_like",1],
+        "10404725818204494388": ["convolution_gpu_bfyx_gemm_like",2],
+        "4190912926126844643": ["convolution_gpu_bfyx_1x1",2],
+        "7481256533438761028": ["convolution_gpu_bfyx_gemm_like",2],
+        "14571022040013651253": ["convolution_gpu_bfyx_gemm_like",2],
+        "2964705957088952872": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "584086621952390547": ["convolution_gpu_bfyx_gemm_like",2],
+        "4439786737038041995": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2034811390140488812": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7767103488808670253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10396788403466463989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8036474422877454869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10055549084854766170": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "1509728225855233852": ["convolution_gpu_bfyx_gemm_like",2],
+        "7986797517722531256": ["convolution_gpu_bfyx_gemm_like",2],
+        "13569453018083742128": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2583562092192709891": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8063236641629084352": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "6222595759158615206": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2058364830449635556": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2644054989263429508": ["convolution_gpu_yxfb_yxio_b16",2],
+        "537074122417021898": ["convolution_gpu_bfyx_os_iyx_osv16",100],
+        "17040537179740138304": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7213383384662748578": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10751536136794650334": ["convolution_gpu_bfyx_gemm_like",2],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "13648761167622654288": ["fully_connected_gpu_yxfb_ref",0],
+        "18253299978538051201": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15526021915035861514": ["convolution_gpu_bfyx_gemm_like",1],
+        "14670339865153970893": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9655242408142699694": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18381791065890314250": ["convolution_gpu_bfyx_gemm_like",1],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",2],
+        "8619526128410675593": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "4466647043226271996": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12656228464579497510": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9309173544512377803": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13079058582191027406": ["convolution_gpu_yxfb_yxio_b16",2],
+        "951747146164097188": ["convolution_gpu_bfyx_1x1",2],
+        "7964396197946740183": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "712495040970043706": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15178921033274918199": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8505040075968411726": ["convolution_gpu_bfyx_gemm_like",1],
+        "5550969016335082071": ["convolution_gpu_bfyx_gemm_like",1],
+        "9306120768594851497": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7777279468029216688": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18029396837690671545": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14909506411483112959": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1734769856106746136": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9987415314864002460": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6772954924703365345": ["convolution_gpu_bfyx_gemm_like",2],
+        "5622089373755094139": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "9533360488591027707": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "13403161389559730": ["convolution_gpu_bfyx_gemm_like",2],
+        "9811086682271990794": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9674248159643501374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7830644361525332797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2290965424106255219": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11208625628954179200": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "15774073623451382326": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4750513665628842598": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "10099598062509781441": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16666792471632326054": ["convolution_gpu_bfyx_gemm_like",2],
+        "8618627241234406784": ["convolution_gpu_yxfb_yxio_b16",2],
+        "939718260623752240": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "1784892318069674949": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5568728266639058524": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "6297802534570892679": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "9119618606914671839": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9328223957245552723": ["convolution_gpu_bfyx_gemm_like",2],
+        "348058686961206025": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "8809438390805488749": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15317946705199574301": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16011429608661242565": ["convolution_gpu_bfyx_gemm_like",2],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "18093895673012393740": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10232809153913700925": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17763347648779573375": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2895819653081408358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5658664813683907476": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5600128039063009632": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6490907666077364481": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17188004018198554470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17039993918927377002": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "12946540633035976364": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "7604075520418038662": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13149617013851130587": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11130439225010714550": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16698547937652264447": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "10588059104387338398": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "8117638644045799192": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "6818140422066151642": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5156033406916344703": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15409184364121627414": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3536359641225772698": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12031180482028822765": ["convolution_gpu_bfyx_gemm_like",0],
+        "13590444711975157776": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "742689192890486807": ["convolution_gpu_bfyx_gemm_like",2],
+        "9165817820007469505": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2530317332900569142": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "12946531140050029900": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "17109520309574369561": ["convolution_gpu_bfyx_gemm_like",2],
+        "3673781117412048086": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "16386955278777720573": ["convolution_gpu_bfyx_os_iyx_osv16",855],
+        "886880682650879171": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "7843508201826629532": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "16409729623371222748": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "14263605862840500474": ["convolution_gpu_yxfb_yxio_b16",2],
+        "119047044057950958": ["convolution_gpu_bfyx_gemm_like",1],
+        "11153522012082333137": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3074436655804078403": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15271783562528081169": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "15187035463799513424": ["convolution_gpu_bfyx_1x1",2],
+        "7247475218645942682": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2753393184265405425": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "6003409324516527726": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7953255701516490034": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "1497560475414454618": ["convolution_gpu_bfyx_gemm_like",2],
+        "12788611449571149037": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "12829916847670789556": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17179609670678746034": ["convolution_gpu_bfyx_gemm_like",2],
+        "17140702790441856730": ["convolution_gpu_bfyx_gemm_like",1],
+        "7792512829747836997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8935522915553126640": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17635171685500922207": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9332701118402940384": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16573597215928075233": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "11795826875463204296": ["convolution_gpu_bfyx_1x1",2],
+        "15424646499666127616": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16561618767117193109": ["convolution_gpu_bfyx_1x1",2],
+        "12374775091628199854": ["convolution_gpu_bfyx_1x1",2],
+        "14416897092729861207": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16569637518948306471": ["convolution_gpu_bfyx_gemm_like",2],
+        "6669808855737023569": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "8792202318168046223": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "18417830391649460864": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11565861421381730304": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7817691489550523328": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10555835101752189454": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15858356755924943957": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16402386400454963239": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "7398158542592530232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "7916244303189113815": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "269167598200943915": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6071668124835539929": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2629918844315184499": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15112599407339712681": ["convolution_gpu_bfyx_1x1",2],
+        "9205978149692979955": ["convolution_gpu_bfyx_gemm_like",2],
+        "17479773641824222843": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "10868287582480518153": ["convolution_gpu_bfyx_gemm_like",2],
+        "11738780323979052397": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14616801816838734032": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9589718307719207394": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "15249442550355454201": ["convolution_gpu_bfyx_gemm_like",2],
+        "5367618411887849711": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "3582256192870592087": ["convolution_gpu_bfyx_os_iyx_osv16",1029],
+        "16253244737884854313": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13076935351221777993": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "7724125714360985807": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "2955459120402821540": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4815047491742617397": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "6250785177115691293": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1202292109713947702": ["convolution_gpu_bfyx_gemm_like",2],
+        "10930640103080573253": ["convolution_gpu_bfyx_1x1",2],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "12589440296742583335": ["convolution_gpu_bfyx_1x1",2],
+        "7106362077449435105": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "18164706399147697716": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8792010676469476740": ["convolution_gpu_bfyx_gemm_like",2],
+        "17536591931934691648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15112393534380347357": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14424566003632608852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "252188028702250668": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5519781859090160931": ["convolution_gpu_bfyx_os_iyx_osv16",760],
+        "2221145174704245189": ["convolution_gpu_bfyx_gemm_like",2],
+        "17713666626443142908": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15617599138946168772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11319799002723299753": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4154830034576950123": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3782239800777370325": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "12344689711325644622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9757389422721488173": ["convolution_gpu_bfyx_1x1",1],
+        "17833304859352483840": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12625112690264223217": ["convolution_gpu_bfyx_gemm_like",2],
+        "6888842613779488104": ["convolution_gpu_bfyx_1x1",2],
+        "18080848057281093190": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10785252006948647963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14827882251752394500": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6713554643048248003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9277610800970567810": ["convolution_gpu_bfyx_gemm_like",2],
+        "15131258379753113816": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11658751382892761740": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1973051991518953158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14091543526898531200": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14887465694301281952": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9426665763007611385": ["convolution_gpu_bfyx_gemm_like",2],
+        "3533556385636018581": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "9101903304994333336": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "5291011077679733990": ["convolution_gpu_bfyx_gemm_like",2],
+        "8975333906619899020": ["convolution_gpu_bfyx_gemm_like",2],
+        "4282756088824939292": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15967614281807823696": ["convolution_gpu_bfyx_gemm_like",2],
+        "5656320098721954644": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9584652777232392944": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "2571289358202565251": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3449007266907948591": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "3399406641489305996": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "8890400423799565844": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13296242326766100583": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12822126914959112382": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "12745552951204330052": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16851082749395991194": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "745009493367761775": ["convolution_gpu_bfyx_gemm_like",2],
+        "15924916465272239832": ["convolution_gpu_bfyx_os_iyx_osv16",925],
+        "4280250278457269231": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13326339730522937517": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10090036431487700311": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "14312549767853703411": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "13993548620104010490": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15799159401545270696": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17255805293355120219": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14389719202147508599": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "9423958333298993923": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "12818012741490629493": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "18075395502550596586": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "11848462434662954749": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11337525286386930242": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "3172518362830684966": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14892045745899927762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11263540528012919947": ["convolution_gpu_bfyx_1x1",2],
+        "1875764913306932583": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "18218631037214746168": ["convolution_gpu_bfyx_gemm_like",2],
+        "1082586642383386489": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "9982350570959875159": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15528692642731712121": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15973363403733281926": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6820134899097582639": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1129349074674368869": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7235358742317442134": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16252420150239789472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1171681987783013074": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17041468169694105561": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18267428053198215471": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4995051972576749717": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2199167704280374654": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15739278428190392018": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6131481289104111211": ["convolution_gpu_bfyx_gemm_like",2],
+        "10751633292301177132": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3049097498155857895": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11948858355027908365": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9116620473576064051": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18313088176414428990": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13902214851539825156": ["convolution_gpu_bfyx_gemm_like",2],
+        "2907572047024872990": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5680236635030250712": ["convolution_gpu_bfyx_1x1",2],
+        "9169324504353459004": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17272600601478967434": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "13308187548669026714": ["convolution_gpu_bfyx_1x1",2],
+        "5926747396493954633": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "6843617687528352801": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15949311219856917559": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "848735117501914374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11226912053840621089": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8155268141318893606": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8616584380583931648": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2394023805427701338": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "10879171754021534649": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4805194563120934409": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1235864574444794315": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7979265448683159733": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16709930291825881111": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10406201782146034797": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "4734389463002799056": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "10812324504777808014": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14385148066232093878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8362179886017398479": ["convolution_gpu_bfyx_os_iyx_osv16",8],
+        "4615766471724791034": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6511742759171254447": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9523941899498458600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15295951849706930711": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9026883911202247185": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2939605281692583169": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "4779919236230154165": ["convolution_gpu_bfyx_gemm_like",0],
+        "1054954263090546905": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14097394936362526559": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10558609844937234631": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10318417166945621015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4988480452582288323": ["convolution_gpu_yxfb_yxio_b16",2],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1354199155380786906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13738760763969959522": ["convolution_gpu_bfyx_gemm_like",2],
+        "11806402239500046867": ["convolution_gpu_bfyx_gemm_like",2],
+        "10272016038525930672": ["convolution_gpu_bfyx_gemm_like",2],
+        "18337762134908554532": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15052577143485630617": ["convolution_gpu_bfyx_1x1",2],
+        "16490405739040977260": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "10783630257421062891": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "11738360883999461965": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14098811155652990436": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16911450336605071390": ["convolution_gpu_bfyx_1x1",2],
+        "13851240591038949807": ["convolution_gpu_bfyx_gemm_like",2],
+        "17705807503894740726": ["convolution_gpu_bfyx_gemm_like",2],
+        "7338229552985076723": ["convolution_gpu_bfyx_gemm_like",2],
+        "10997156099709436375": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13820498543284008286": ["convolution_gpu_bfyx_gemm_like",2],
+        "2613462626256090659": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "10720769054729185991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14365232561737454031": ["convolution_gpu_bfyx_os_iyx_osv16",51],
+        "10952045211444638649": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14885031472057965707": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "3648713169465596196": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2469579114592379040": ["convolution_gpu_bfyx_gemm_like",2],
+        "13507437548205340054": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11010673493295430801": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3792276488551864121": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1336739931702966228": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2008424849669196225": ["convolution_gpu_bfyx_1x1",2],
+        "8860443174052454332": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6931062623510631425": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13723434004563378589": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12917241193304093727": ["convolution_gpu_bfyx_gemm_like",2],
+        "17618727959983224888": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2060161076370553192": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3266557807508325807": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10626341369865893888": ["convolution_gpu_bfyx_gemm_like",2],
+        "12714892326998505133": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13040213971461407125": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5852569526295779497": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11724732387425614709": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11892455357792445192": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4773077837537775324": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3904383357046705799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3286496836813087881": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5648658688155716974": ["convolution_gpu_bfyx_1x1",2],
+        "15968821946892330559": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14497254583210965214": ["convolution_gpu_yxfb_yxio_b16",2],
+        "72745257233374197": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17443356777503458523": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "11289650463922092775": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2673903488704336606": ["convolution_gpu_bfyx_gemm_like",2],
+        "15829095120243431195": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "11606895513516475339": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13124342334495538095": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "12546446257192651407": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2856601829807186494": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13412516623201653283": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17868834743037242721": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4805402210873641704": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6816632607384969096": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9899211365930959346": ["convolution_gpu_bfyx_os_iyx_osv16",648],
+        "10612739622648878242": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "8689206546467098603": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17021925795809437171": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5853697372844744672": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4780291919667721265": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "10178171262128338408": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14502856487639608696": ["convolution_gpu_bfyx_gemm_like",2],
+        "4894469114343061704": ["convolution_gpu_yxfb_yxio_b16",0],
+        "6423785822515265784": ["convolution_gpu_bfyx_gemm_like",2],
+        "14417401878572618236": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10914921540144371519": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10724501418439612080": ["convolution_gpu_bfyx_gemm_like",1],
+        "1075027491444288875": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7100056605355325582": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6988492019664525206": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "15859493313686060349": ["convolution_gpu_bfyx_gemm_like",2],
+        "14540578324750869319": ["convolution_gpu_bfyx_gemm_like",2],
+        "7565006185780806333": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "15192022454507415969": ["convolution_gpu_yxfb_yxio_b16",1],
+        "474139120607442270": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "15325852281951905610": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "9144487908815767824": ["convolution_gpu_bfyx_1x1",1],
+        "8519354640245415816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12669783714916998842": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13187657215288939912": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6123707371654753818": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7343590049199309046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5526223938481098693": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8527193566719173253": ["convolution_gpu_bfyx_gemm_like",2],
+        "9521715904587435700": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3058716597925544041": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14808895254077106198": ["convolution_gpu_bfyx_gemm_like",2],
+        "2431241169199693527": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1497127399271219422": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1425953627379976115": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15899192375330393731": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "6118737381591369532": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9184275066167601343": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "10133054058562198093": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "18083803358410976976": ["convolution_gpu_yxfb_yxio_b16",2],
+        "101401523793806394": ["convolution_gpu_bfyx_gemm_like",2],
+        "1299760574827253811": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4126895998426674411": ["convolution_gpu_bfyx_gemm_like",2],
+        "17917978116807564183": ["convolution_gpu_bfyx_gemm_like",2],
+        "8421388456873652700": ["convolution_gpu_bfyx_gemm_like",2],
+        "1584906448442153128": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3096280563014331836": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12334522314915706512": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18067291256808591467": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3337625924046561031": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "9832505855130134649": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11334122788337402526": ["convolution_gpu_bfyx_1x1",2],
+        "4914474312076193952": ["convolution_gpu_bfyx_gemm_like",1],
+        "2934519615045138808": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "1920070013712913772": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "3101087806792514129": ["convolution_gpu_bfyx_1x1",1],
+        "7015738038963065110": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "6963293142152132518": ["convolution_gpu_bfyx_os_iyx_osv16",165],
+        "16432425079146486467": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13809898858049445969": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16195893521207315456": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "6509271384550125629": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8856888761246057127": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "12137340921829511472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1760391741350091665": ["convolution_gpu_bfyx_gemm_like",2],
+        "10811837819834149164": ["convolution_gpu_bfyx_gemm_like",1],
+        "11025471731438443683": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13289438471364352634": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4610200388191607540": ["convolution_gpu_bfyx_gemm_like",2],
+        "10882719585803523032": ["convolution_gpu_yxfb_yxio_b16",2],
+        "592245952014430043": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "1458615259705605525": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "13943983517468412332": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5479761740065152589": ["convolution_gpu_bfyx_gemm_like",2],
+        "10133398220120888583": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13156052826121673994": ["convolution_gpu_bfyx_gemm_like",2],
+        "10006197783106691106": ["convolution_gpu_bfyx_gemm_like",2],
+        "4602232889230956461": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15669242195570440840": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13365950526881732374": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14469011068777098822": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "16341722570340169855": ["convolution_gpu_bfyx_1x1",2],
+        "2421404763191415191": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6101196122606108273": ["convolution_gpu_bfyx_gemm_like",2],
+        "6715523440337925186": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3106911159524421371": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "10717031088082350652": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1587501521145162454": ["convolution_gpu_bfyx_gemm_like",2],
+        "2016932800158392200": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13467831091041327178": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2135164671985938807": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9711184878666366204": ["convolution_gpu_yxfb_yxio_b16",1],
+        "968105804060326332": ["convolution_gpu_yxfb_yxio_b16",2],
+        "579781312141502576": ["convolution_gpu_bfyx_1x1",1],
+        "17248329632819747646": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11942019076226205097": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6902485831441844789": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4885504197789468842": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9813748068195103720": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "13464697394408238115": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10789133352712755945": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12566041126392848976": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "7065244994574625911": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10706267011822108376": ["convolution_gpu_bfyx_1x1",2],
+        "10071449674652717890": ["convolution_gpu_bfyx_gemm_like",2],
+        "15225354446874994535": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "16129682385980878760": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17043601935017365442": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2905979727479716212": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4391695940614024479": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16393176054374397767": ["convolution_gpu_bfyx_gemm_like",1],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "17329287216741045059": ["convolution_gpu_bfyx_gemm_like",2],
+        "17370158297470557151": ["convolution_gpu_bfyx_1x1",2],
+        "708201295462256406": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "9827177798112814604": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15979956159651515122": ["convolution_gpu_bfyx_gemm_like",2],
+        "5050273611519516510": ["convolution_gpu_bfyx_gemm_like",1],
+        "9468684953949274635": ["convolution_gpu_bfyx_gemm_like",1],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "4834446692898125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "7628077869220463202": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "11417406326478154077": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12985942652866621579": ["fully_connected_gpu_fb_io_ref",2],
+        "14387756025635589673": ["convolution_gpu_bfyx_1x1",2],
+        "5638640164891118162": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4974320417566990034": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "603883331897298932": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16633540487930201533": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "3872151366780051246": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "6613116267521819997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3377052601059116318": ["convolution_gpu_yxfb_yxio_b16",0],
+        "13509275050322423832": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17046662043776372746": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "13119040261291835298": ["convolution_gpu_bfyx_gemm_like",2],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "6799631962511042762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11499219760597131534": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "6959692641873234850": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18152894191323920027": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4356817283284529593": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6512987867462549101": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "16371608027363202992": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6210866413385292851": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "465567788283624320": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6756679359093569015": ["convolution_gpu_bfyx_os_iyx_osv16",905],
+        "5740745357953479527": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "17676344219475515993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12003323477818208825": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "3515437649977762166": ["convolution_gpu_bfyx_gemm_like",1],
+        "6945787904293959477": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "18243724217479803107": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11988285441493553006": ["convolution_gpu_bfyx_gemm_like",2],
+        "85050336704401597": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16491532291908469567": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "2227700097134029783": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13450061819089402572": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10309986238001994183": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12531880391016521628": ["convolution_gpu_bfyx_gemm_like",2],
+        "15115780248032030963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4298242568890525997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15959543980008442942": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16870036853278751563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13161997040644039778": ["convolution_gpu_bfyx_gemm_like",2],
+        "15833461718320604065": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "7669403041163460089": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8170998059688907013": ["convolution_gpu_bfyx_1x1",2],
+        "15482685355538566951": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "2294318010381635693": ["convolution_gpu_bfyx_gemm_like",2],
+        "13486084204140096478": ["convolution_gpu_bfyx_gemm_like",2],
+        "6558436237075337721": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2089730611490367290": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1580344438642032807": ["convolution_gpu_bfyx_gemm_like",2],
+        "5578850952665051661": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3217574161785059951": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "5840254078917931433": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13493119419114659706": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16425374300157280628": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7469127846325904854": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "11698754846673268046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12121204870979363096": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2438261005924916746": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "8479958930889587809": ["fully_connected_gpu_yxfb_ref",1],
+        "7843498978148810586": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "1448440012428740463": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8976238022515713641": ["convolution_gpu_bfyx_gemm_like",2],
+        "8642107585829380438": ["convolution_gpu_bfyx_gemm_like",1],
+        "10681768474583067517": ["convolution_gpu_bfyx_gemm_like",1],
+        "7242013296950669829": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "359617184733439511": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9366100787108468082": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5060012838564094182": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "2269140636553245446": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1644335606100150388": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9870432551513415176": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1984152634309440563": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "17489680436564779197": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14117801387057507639": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "361497145093734608": ["convolution_gpu_bfyx_gemm_like",2],
+        "13861223834466385546": ["convolution_gpu_bfyx_gemm_like",1],
+        "10014448860206587805": ["convolution_gpu_bfyx_gemm_like",1],
+        "11690334177981352452": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "9941035405796680081": ["convolution_gpu_bfyx_1x1",1],
+        "12051398350382954787": ["convolution_gpu_yxfb_yxio_b16",0],
+        "14242202444788213591": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "8999570321113443117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "838726445796308454": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "17983556812075120553": ["convolution_gpu_bfyx_1x1",2],
+        "14331658870024759698": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "9518071423184197213": ["convolution_gpu_bfyx_gemm_like",2],
+        "8004244584949995244": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9891428775774615719": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "9702618600245321109": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "11031569203645035546": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17480277135590489472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "167635075964111628": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1463649546800120847": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15817443774186015593": ["convolution_gpu_bfyx_1x1",2],
+        "225809055928705881": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13676654389512816868": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "7998455776901877973": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "11596971301790598405": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "3216793152416217495": ["convolution_gpu_bfyx_gemm_like",2],
+        "3806761527342944195": ["convolution_gpu_bfyx_gemm_like",2],
+        "5312413491828906254": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12600707101000510621": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7346046748383284270": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7804715870037416579": ["convolution_gpu_bfyx_gemm_like",1],
+        "18433141005552346566": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17893181511546734799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10771803503544737080": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "1843555260471832708": ["convolution_gpu_bfyx_gemm_like",1],
+        "7647236080048602591": ["convolution_gpu_bfyx_gemm_like",1],
+        "8398910340371320955": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6910589963488897537": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11175936010605958812": ["convolution_gpu_yxfb_yxio_b16",1],
+        "568191462231494113": ["convolution_gpu_yxfb_yxio_b16",2],
+        "946479876892100082": ["convolution_gpu_bfyx_gemm_like",1],
+        "3861351835305151926": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9641089659148164809": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4803370483104261655": ["convolution_gpu_bfyx_gemm_like",2],
+        "7779562434199107586": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "18385086614524985975": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4731836216299455047": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9222744127882324405": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "13878967140838761911": ["convolution_gpu_bfyx_1x1",1],
+        "7393601059996816014": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11888011890096886932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14985755375924972050": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "17961793197503317952": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5720964268093705079": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3308770992373192529": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "877901260688090160": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1310498917952637709": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6871131333562410117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7799984350284425885": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "10170577772376890221": ["convolution_gpu_bfyx_os_iyx_osv16",664],
+        "9175450649281374948": ["convolution_gpu_bfyx_os_iyx_osv16",862],
+        "7139719632093090046": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "14553577436929219470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16814025114202322376": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11880337915508207160": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4056979460327024961": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6467251764899975676": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16995444341569389342": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15235409162483701027": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "14206076551739831333": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7393551951402219833": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17640725195881101275": ["convolution_gpu_bfyx_gemm_like",2],
+        "70580716590540876": ["convolution_gpu_bfyx_gemm_like",1],
+        "1596353239542510685": ["convolution_gpu_bfyx_gemm_like",2],
+        "2578325663193624576": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13426254939418471242": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15337841577110104431": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14868677663932902695": ["convolution_gpu_bfyx_gemm_like",2],
+        "5083163738120585821": ["fully_connected_gpu_fb_oi_ref",2],
+        "14766694310604777253": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15135644084742750702": ["convolution_gpu_bfyx_gemm_like",2],
+        "12787837386653002743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6167369758442930886": ["convolution_gpu_bfyx_gemm_like",2],
+        "1018687388655376483": ["convolution_gpu_bfyx_gemm_like",1],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",2],
+        "12071914115316550349": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "2026622899016787854": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13338594271376045657": ["convolution_gpu_bfyx_gemm_like",2],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",2],
+        "16374675547140209181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "888110783182849535": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16683485007140805060": ["fully_connected_gpu_yxfb_ref",2],
+        "11820789223587555410": ["convolution_gpu_bfyx_1x1",2],
+        "8260024340787818709": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10480527638577674825": ["convolution_gpu_bfyx_1x1",2],
+        "5352061583962489055": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9162469583721135043": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4685236901551256966": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14054116974002669018": ["convolution_gpu_bfyx_1x1",2],
+        "2296581485980163665": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1635121016109328853": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "14025235562200209723": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "15670767419106537809": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4848143712599565301": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3102538312627892960": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "13839116996827687373": ["convolution_gpu_bfyx_gemm_like",2],
+        "16487774205195979355": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "16768797136991242472": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "18408107772851888061": ["convolution_gpu_bfyx_gemm_like",2],
+        "11179211757115972103": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3980835859526174461": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15525903155475629518": ["convolution_gpu_bfyx_gemm_like",2],
+        "14175962333785791005": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17147293671640396193": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "290134020607738418": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13078401519973360182": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4672441137336208890": ["convolution_gpu_bfyx_gemm_like",2],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "15047676717402283805": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "9534041402131086717": ["convolution_gpu_bfyx_os_iyx_osv16",949],
+        "17638753020411096694": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18432421400879260832": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "16304192736281226143": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7305582749708309904": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3622409603053918029": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13207134083675064956": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "8931169575495985034": ["convolution_gpu_bfyx_gemm_like",2],
+        "17790026124881397912": ["fully_connected_gpu_fb_io_ref",2],
+        "16247399911710810038": ["convolution_gpu_bfyx_gemm_like",1],
+        "16871004845988227014": ["convolution_gpu_bfyx_1x1",2],
+        "12850044341631872743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4492332228252010118": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "11086471945045031067": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13477548641580029772": ["convolution_gpu_bfyx_gemm_like",1],
+        "15805087418686802636": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "5507373575763339429": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13328911884191551889": ["convolution_gpu_bfyx_1x1",2],
+        "5104519293341299859": ["convolution_gpu_yxfb_yxio_b16",2],
+        "249639220178603842": ["convolution_gpu_bfyx_gemm_like",2],
+        "2715447739580688669": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14942858162799632403": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5576296603250158603": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "712165731154577189": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "4533786844080178561": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "1045854873741563331": ["convolution_gpu_bfyx_gemm_like",2],
+        "2683304757433993300": ["convolution_gpu_bfyx_gemm_like",2],
+        "16128152634974034731": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15677717057398875599": ["convolution_gpu_bfyx_gemm_like",1],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "7454366978268164047": ["convolution_gpu_bfyx_gemm_like",2],
+        "743941460026466526": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "4133424990380177132": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "10544034939133448916": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3349468433721705582": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15863531785836309247": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11421180829679625737": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "15367649112776077240": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9492026326463873766": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "16925721317097534009": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "3319827933068341610": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12268912077694742671": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15839295895890205274": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "9073757008455674094": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8780671766122887951": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "15625374380046476173": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7105219760750474587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7818381040882768404": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "16961326251624610778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10076885835791159907": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13485300684443803732": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "7715649642603303319": ["convolution_gpu_bfyx_1x1",2],
+        "1353170363915443814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13541382855330226000": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11015074526119891710": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10730222715353420212": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "1218323229202187514": ["convolution_gpu_bfyx_gemm_like",2],
+        "7897973318803646560": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10879218241103462088": ["convolution_gpu_bfyx_gemm_like",2],
+        "4381329435655511217": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "11871319147579477936": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16601230690171340432": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17207560805775399864": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9737565171095493297": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12052207771201936228": ["convolution_gpu_bfyx_gemm_like",2],
+        "17466963970980708210": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6128157319666849074": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12514693341682532560": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "1197281505560782577": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "4628748977913534701": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13264617841270329349": ["convolution_gpu_bfyx_1x1",2],
+        "6550549654706796887": ["convolution_gpu_yxfb_yxio_b16",0],
+        "13120262386070281193": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13368203360773949292": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8203171222962341018": ["convolution_gpu_bfyx_gemm_like",2],
+        "3615052707933370958": ["convolution_gpu_yxfb_yxio_b16",1],
+        "775538461106687677": ["fully_connected_gpu_fb_oi_ref",1],
+        "2554991397391195611": ["convolution_gpu_bfyx_os_iyx_osv16",184],
+        "12727541507197887360": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "7206226541369793931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17515064188391421150": ["convolution_gpu_bfyx_gemm_like",2],
+        "14122213471825630433": ["convolution_gpu_bfyx_gemm_like",2],
+        "2949545414911764346": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1173136780324694038": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12512751736409465214": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9079676771143357396": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15474155528481683394": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "5339985303398206057": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "9649445293567537596": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14248239982355212178": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16",855],
+        "3603187029740446600": ["convolution_gpu_bfyx_gemm_like",2],
+        "1418595171949196661": ["convolution_gpu_bfyx_gemm_like",2],
+        "10037086825900566930": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "6578517057140155080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16674633029045714564": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "15228390729175722409": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1245259979364728404": ["convolution_gpu_bfyx_1x1",2],
+        "101387140804297623": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5714365398623475983": ["convolution_gpu_bfyx_1x1",2],
+        "13234055353608734080": ["convolution_gpu_yxfb_yxio_b16",1],
+        "136349424199140459": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "9182260316973872633": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11305232900158601613": ["convolution_gpu_bfyx_1x1",2],
+        "17651477639302255490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8768300687476117215": ["convolution_gpu_bfyx_os_iyx_osv16",266],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "7585184325339753737": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "7700321970687976931": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "2321767794934000238": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "14461365896122393071": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8922929126299811091": ["convolution_gpu_bfyx_1x1",2],
+        "4772696293208603817": ["convolution_gpu_bfyx_gemm_like",1],
+        "13596876807637507229": ["convolution_gpu_bfyx_1x1",2],
+        "3039528482572243879": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "6713985030102340818": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2208765794404376467": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1345101751956733589": ["convolution_gpu_bfyx_gemm_like",2],
+        "3070859615622845671": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "12185561188335760786": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14113320831418478396": ["convolution_gpu_yxfb_yxio_b16",2],
+        "840202264034382558": ["convolution_gpu_bfyx_os_iyx_osv16",771],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "12637509262827320678": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12207503176295152756": ["convolution_gpu_bfyx_1x1",2],
+        "5897564616927353003": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "15006204461468698734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9671459469252116568": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "13398986810666238552": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11731277083374465361": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6020017927557041768": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11239541755868028928": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "10880830033700542216": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6934241437968723825": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4740585760177040164": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9423854233835016530": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7009459929666511861": ["convolution_gpu_bfyx_1x1",1],
+        "5602377914578322577": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5056859994174498686": ["convolution_gpu_bfyx_gemm_like",1],
+        "2561508262445368003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17935612508319394087": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6126073246053235472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1885075753696445410": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "786401653335542559": ["convolution_gpu_bfyx_gemm_like",2],
+        "123026136670202868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4999171487916568471": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "1718634913016284523": ["convolution_gpu_bfyx_1x1",2],
+        "14204609663091442879": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10702465758376061967": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12181310683533105454": ["fully_connected_gpu_fb_oi_ref",1],
+        "15765592038173567297": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9748307611165615848": ["convolution_gpu_bfyx_gemm_like",2],
+        "15497263259976427714": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "6817494598328071314": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "12234313962656804631": ["convolution_gpu_bfyx_gemm_like",2],
+        "6964383468476265892": ["convolution_gpu_bfyx_1x1",1],
+        "155962454315573087": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6193161166790398003": ["convolution_gpu_bfyx_gemm_like",2],
+        "15681189418847392587": ["convolution_gpu_bfyx_os_iyx_osv16",857],
+        "9989055862610193828": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "3779229442395464456": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15678768217453692725": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6950586691727980329": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1952863937205473292": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3365786526859737112": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6022695488769618639": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11612044653200304877": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12960590161485806657": ["convolution_gpu_bfyx_gemm_like",2],
+        "8747430148550634190": ["convolution_gpu_bfyx_gemm_like",2],
+        "15223779293313750042": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14749758365915995876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11973034261101454380": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8205640825965213946": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10774872391768741315": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11564071490267241224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12604104383683210104": ["convolution_gpu_bfyx_os_iyx_osv16",216],
+        "9700808806849459216": ["convolution_gpu_bfyx_1x1",2],
+        "3658599312236344017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11198301748997371475": ["convolution_gpu_bfyx_gemm_like",1],
+        "8974851555526896131": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16",1036],
+        "16781127329510211966": ["convolution_gpu_bfyx_gemm_like",2],
+        "12352923639732112511": ["convolution_gpu_bfyx_os_iyx_osv16",455],
+        "9354818521586974021": ["convolution_gpu_bfyx_gemm_like",2],
+        "3976736548270395981": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5576305720733717044": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9580986168276580598": ["convolution_gpu_bfyx_gemm_like",1],
+        "8584375748627260395": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4635570915184713874": ["convolution_gpu_bfyx_gemm_like",2],
+        "11627532066884923848": ["convolution_gpu_bfyx_1x1",2],
+        "18040183500393090505": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4291531885506213180": ["convolution_gpu_yxfb_yxio_b16",2],
+        "875400109066360897": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "12476381811279163147": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12972798847556569913": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "2936333406928424760": ["convolution_gpu_bfyx_1x1",2],
+        "6081038474197004540": ["convolution_gpu_yxfb_yxio_b16",1],
+        "577842450575835175": ["convolution_gpu_yxfb_yxio_b16",2],
+        "401304652492444430": ["convolution_gpu_bfyx_gemm_like",2],
+        "13009612703754510124": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5020763861388859254": ["convolution_gpu_bfyx_gemm_like",2],
+        "10280619408766255552": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "16681690088928624738": ["convolution_gpu_bfyx_gemm_like",2],
+        "2173163618947713953": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "12477315042623518609": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3067930325929862490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17759505449240263390": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "13646974121952099172": ["convolution_gpu_bfyx_gemm_like",2],
+        "7208008921815475393": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5012013738970489338": ["convolution_gpu_bfyx_1x1",1],
+        "8735534480653818425": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4627958043707973483": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10486348549691280032": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "6788311046557489996": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "16610284927818475574": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13618411266808159341": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14412158605670555579": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "2188101366183302888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12923298574715329852": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1531349457115735845": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "12181889163404078773": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "8576733135863336233": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "1367483816197881270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5751553671208192963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13781423818051299677": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "2882493407831196579": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "7232326270078161768": ["convolution_gpu_bfyx_gemm_like",2],
+        "17778091287904736965": ["convolution_gpu_bfyx_gemm_like",2],
+        "4113061482402915179": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8732952254407298868": ["convolution_gpu_bfyx_gemm_like",0],
+        "5564881878876582769": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3217674729821898463": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16",273],
+        "15720012960520885263": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3286250915720444467": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1126499865206906037": ["convolution_gpu_bfyx_os_iyx_osv16",524],
+        "10292585962794261197": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "5124080536266387783": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4633923265089466898": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "9090828337597312855": ["convolution_gpu_bfyx_gemm_like",2],
+        "11270855425262923989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8761283252495354972": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "18187345248160481425": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "8270591002934311024": ["convolution_gpu_bfyx_1x1",2],
+        "9152433123828445089": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4880150897829846031": ["convolution_gpu_bfyx_1x1",1],
+        "13531892014108749846": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3622778166646258015": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12745631396795162505": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6948455759869670955": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12721294268595880422": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17439102502195540957": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "1198893312653197535": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18008552719153887303": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "7121708962074176240": ["convolution_gpu_bfyx_1x1",2],
+        "597073780328219388": ["convolution_gpu_bfyx_gemm_like",2],
+        "636447309806530300": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3202085450628781999": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "16768497046700403748": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15167962750603978874": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6267138247577676996": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7895030495055232460": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "17342198739672369885": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2576773809294607971": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4124478505694604763": ["convolution_gpu_bfyx_1x1",2],
+        "3962138884698789654": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10547134120307382906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7223801044761006523": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "377219085802486361": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9435086287598656868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16000753982895054944": ["convolution_gpu_bfyx_gemm_like",1],
+        "10706180189726741161": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16711142379173254655": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3171354702636014224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11007175027950132719": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11194372303922533529": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13705072264927031658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1170380397764345558": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "9576962489937466093": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4445913285957791409": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12026482841341343242": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "17734480671864478402": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5112480593385320005": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6603778920476932267": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6253009218981124949": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16531824466148265247": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "4850497746076450913": ["convolution_gpu_bfyx_gemm_like",2],
+        "14971270053929063630": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6863331059471727622": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "9305861997313663528": ["convolution_gpu_bfyx_gemm_like",2],
+        "14616969385577243225": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "522313477023837056": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12256193738921380409": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "9096495972770198040": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3056212889689424946": ["convolution_gpu_bfyx_1x1",2],
+        "426827405952656362": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "693883892843558363": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "5440983284868981549": ["convolution_gpu_bfyx_gemm_like",2],
+        "12871555773123368130": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11088324811742486481": ["convolution_gpu_bfyx_gemm_like",2],
+        "4398371999113956082": ["convolution_gpu_bfyx_gemm_like",2],
+        "14774814395786139876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12947341728489226671": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16242136888057221574": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13777174566683935109": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5337351591182109481": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "6249875772709398338": ["convolution_gpu_yxfb_yxio_b16",2],
+        "913861052717410566": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1114679698826953542": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14916625550370402883": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "5516518048239364231": ["convolution_gpu_bfyx_os_iyx_osv16",479],
+        "2581414750854621875": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "12327057172281102984": ["convolution_gpu_yxfb_yxio_b16",2],
+        "576164857039495839": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18184621367843960190": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "6341363789473021047": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1325669650629605592": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5106072383853469966": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7800262579057534804": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10935309102034762723": ["convolution_gpu_bfyx_1x1",1],
+        "9453100135791813000": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18299254635579957284": ["convolution_gpu_bfyx_1x1",2],
+        "3856976081672275637": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "6288489890578212082": ["convolution_gpu_bfyx_gemm_like",2],
+        "12065769091972094756": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "4937688558707451907": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6317575981520135028": ["convolution_gpu_bfyx_gemm_like",1],
+        "15531306520021286502": ["convolution_gpu_bfyx_gemm_like",2],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",0],
+        "13809330759308309353": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "13919204232414535363": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "7683334381958571864": ["convolution_gpu_bfyx_gemm_like",2],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "4980217316169616839": ["convolution_gpu_bfyx_1x1",2],
+        "226601879759378771": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "2282123636764935353": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13978649386370395620": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4279062247055842367": ["convolution_gpu_bfyx_gemm_like",1],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "5942742563827424666": ["convolution_gpu_yxfb_yxio_b16",2],
+        "671453551040072499": ["convolution_gpu_bfyx_gemm_like",2],
+        "2737064424879246276": ["convolution_gpu_bfyx_gemm_like",2],
+        "18087356517015630281": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "3336076058264596420": ["convolution_gpu_bfyx_gemm_like",2],
+        "17791024851737594885": ["convolution_gpu_bfyx_1x1",2],
+        "17252589865292797082": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "11727227430687227444": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12370729327673204804": ["convolution_gpu_bfyx_gemm_like",2],
+        "13251091004269229867": ["convolution_gpu_bfyx_gemm_like",2],
+        "2817919813339364130": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17209528805596238905": ["convolution_gpu_bfyx_gemm_like",2],
+        "3538679039078582272": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "5242271874488296527": ["convolution_gpu_bfyx_gemm_like",2],
+        "12768933181342249823": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "2173720698351153121": ["convolution_gpu_bfyx_gemm_like",2],
+        "1120455113299469776": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "11109044986816563101": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17682152011630274259": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "15322609677356616580": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "11756650366229979428": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "18214716801063702171": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10722782762733112118": ["convolution_gpu_bfyx_1x1",2],
+        "15636128989267984459": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13409744191227471760": ["convolution_gpu_bfyx_gemm_like",1],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "57372993988016244": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "13912728810446567016": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "14324166291904435508": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15193403354218116460": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15418732002117930760": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8803037667261582905": ["convolution_gpu_bfyx_gemm_like",1],
+        "9017605508157213607": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17617204422090117691": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2506424495656099512": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16717713360264747483": ["convolution_gpu_bfyx_gemm_like",2],
+        "13121297281694293907": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "3423717644513543253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4325081100430903742": ["convolution_gpu_bfyx_gemm_like",1],
+        "3652414035262499383": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "10532183096485321729": ["convolution_gpu_bfyx_1x1",2],
+        "17811558714592064184": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7977195117668583981": ["convolution_gpu_bfyx_gemm_like",2],
+        "8671491767142900139": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3816674884393241704": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "13364676690016875118": ["convolution_gpu_bfyx_os_iyx_osv16",926],
+        "12680688623162482255": ["convolution_gpu_bfyx_1x1",2],
+        "7824075236081312706": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15159534367247036982": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14074996784220709246": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8021915447462898777": ["convolution_gpu_bfyx_gemm_like",0],
+        "1973819632224480598": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12411075288896909468": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4916769804113823482": ["convolution_gpu_bfyx_1x1",2],
+        "2908156087871187676": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1563987925712579649": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "8303211644727914658": ["convolution_gpu_bfyx_1x1",2],
+        "8614534946699754256": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7800015766976654402": ["convolution_gpu_bfyx_gemm_like",2],
+        "13851851281384416649": ["convolution_gpu_bfyx_1x1",1],
+        "6217542346826403576": ["convolution_gpu_bfyx_1x1",2],
+        "11557032521956761994": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "13199672084171648305": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15530407024531326375": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "3225866261943242708": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10765280349477640969": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "17876939980356283351": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2877521658768725103": ["convolution_gpu_bfyx_gemm_like",0],
+        "6280726148869856021": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10471519687597963116": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8431759922045602848": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8470959792634864749": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13501352378461071771": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11845189428639322474": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "14908477489231326997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7995820969034996638": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "12767065362702304803": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "13512863534076172940": ["convolution_gpu_bfyx_gemm_like",2],
+        "8039645104667120991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18186615266760475767": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "10316451248440741901": ["convolution_gpu_bfyx_gemm_like",1],
+        "563440246018637010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17361714725103230834": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "16182470664818268848": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5668538167635622474": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "7532088618116521936": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "142650579335909103": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9692654253261175490": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "13130001092233798285": ["convolution_gpu_yxfb_yxio_b16",2],
+        "59356084516953804": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "8185193068790365354": ["convolution_gpu_bfyx_gemm_like",2],
+        "3860667078458481972": ["convolution_gpu_bfyx_gemm_like",1],
+        "17225552472711821360": ["convolution_gpu_bfyx_os_iyx_osv16",946],
+        "4759671642533786591": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "14132543442791497311": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15003778740401601065": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5649150695527000655": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16341131728764501904": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8183383667948205424": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4701235352806075765": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "5295693108687178880": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8567667881970262923": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "2777318471329665162": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "13527018660229167386": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3534971503826416049": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "7056030150365552588": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "8943913562339525413": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10128390168715530898": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "4274425737610351312": ["convolution_gpu_bfyx_gemm_like",2],
+        "14999920879568237166": ["convolution_gpu_bfyx_1x1",2],
+        "4378422094110940766": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5284456216115118110": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3833510944499257797": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "2439993891369206440": ["convolution_gpu_bfyx_1x1",2],
+        "7902473777019759045": ["convolution_gpu_bfyx_gemm_like",2],
+        "10322427853063201289": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "15796677813117622429": ["convolution_gpu_bfyx_gemm_like",2],
+        "15497797842820949408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7762916621662364082": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4072951883124129646": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1008476023750261156": ["convolution_gpu_bfyx_1x1",2],
+        "12384317536636082264": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15578456771467281881": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12741762570001404232": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2581014920570427861": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5854093367753757010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15104727000375811836": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13966416504547680082": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16620268338434572068": ["convolution_gpu_yxfb_yxio_b16",1],
+        "875296362957469305": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "708452703070938673": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "7405315582091905378": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12961109385388101976": ["convolution_gpu_yxfb_yxio_b16",0],
+        "998876398773540321": ["convolution_gpu_bfyx_1x1",1],
+        "10463632805036507382": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1485662490111767875": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "5552699731399195573": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "13038533272699602337": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "15586047342916704364": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12013818650853034767": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13314092088416047551": ["fully_connected_gpu_yxfb_ref",1],
+        "11079061135559995449": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7364084475361144967": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6708349666663292171": ["fully_connected_gpu_fb_oi_ref",2],
+        "7441188930428385142": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3830842631023415233": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11073090858361674041": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8611873585228858719": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12908594497114706897": ["convolution_gpu_bfyx_1x1",2],
+        "3047407458812880288": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1123577455191848310": ["convolution_gpu_bfyx_gemm_like",2],
+        "13478984039708550410": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "17737878867906137388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16667887002111125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "16788715253205076219": ["fully_connected_gpu_fb_oi_ref",1],
+        "17559750858236255044": ["convolution_gpu_yxfb_yxio_b16",2],
+        "731825454731954517": ["convolution_gpu_bfyx_gemm_like",2],
+        "10002044609138970243": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7998930863626763670": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "11012427206693842637": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "5977875644245993099": ["convolution_gpu_yxfb_yxio_b16",1],
+        "411016281538345537": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10766317990628501609": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "9737833587413114584": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16161974964662774501": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7571716782558859443": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13291402786934990349": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8177017967170389275": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15811723176266128065": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "17490471699618303993": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "18033349045324117723": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11500205299047837289": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4947788161154370784": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "13585916416233680276": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15069906408448814772": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "8611710048909301596": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3366647240745174769": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5421397731090158382": ["convolution_gpu_yxfb_yxio_b16",1],
+        "577844026691991089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6962062962411903140": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1148949417144436507": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12165079289914715018": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "1208665743495618456": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5788018146987909930": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1354647381212852890": ["convolution_gpu_bfyx_1x1",2],
+        "3914143598803149415": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3216877571075556066": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "13734043898517059207": ["convolution_gpu_bfyx_gemm_like",1],
+        "12004552919019936392": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11921652085115182024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "586134723922638373": ["convolution_gpu_bfyx_gemm_like",2],
+        "10128120599276549920": ["convolution_gpu_bfyx_1x1",1],
+        "9955816463820554626": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10560559646371329711": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18279416225045612845": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2819475920524949313": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7465681710653503161": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "7861119251077361882": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7369109502608631066": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1216021647922150199": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4238885454989272754": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "2816339200381598722": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2524029454785583409": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "1141277975467180549": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "7107677063657303327": ["convolution_gpu_bfyx_1x1",2],
+        "8079914471491171372": ["convolution_gpu_yxfb_yxio_b16",1],
+        "621915374938805401": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "14263790627243107300": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "17770104464900126615": ["convolution_gpu_bfyx_1x1",2],
+        "6859143702528475520": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4098581145478965082": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "11241838709529552265": ["convolution_gpu_bfyx_os_iyx_osv16",858],
+        "6800893510381991731": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "8951040603784899163": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "331661172067077796": ["convolution_gpu_bfyx_1x1",2],
+        "1044978617045366709": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "9728611486592854529": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9835739612255048978": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "3932617680771387232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5498839261395459224": ["convolution_gpu_bfyx_gemm_like",1],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6310724136390087834": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "7880845322716481548": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18161786710055240343": ["convolution_gpu_bfyx_os_iyx_osv16",951],
+        "12388894315292201102": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16772854836230971016": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "6723804327185132790": ["convolution_gpu_bfyx_gemm_like",2],
+        "10294610483561043024": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10486000767830001094": ["convolution_gpu_bfyx_1x1",2],
+        "15576534481170615301": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7223737889890738294": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16341700680310033430": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "10996429218747311159": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8497468192424557348": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1551596771935253711": ["convolution_gpu_bfyx_gemm_like",1],
+        "5895417825685090256": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "7317391511452227268": ["convolution_gpu_bfyx_gemm_like",2],
+        "2147962310424425158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3244675355773468991": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "6712698149192186833": ["convolution_gpu_bfyx_gemm_like",2],
+        "9940761514291929473": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5288793454052261767": ["convolution_gpu_bfyx_gemm_like",2],
+        "2968439898708528834": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3806806400778685133": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16161112020028389294": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2041212737963974230": ["convolution_gpu_bfyx_gemm_like",2],
+        "7590734607006912544": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9502195532658935521": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17270057383792994793": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13830605041347009953": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "3239033622277917802": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "16327433707667075261": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "863057075064640334": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "14273849038400888518": ["convolution_gpu_yxfb_yxio_b16",2],
+        "360064276184684693": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4597873630741623918": ["convolution_gpu_yxfb_yxio_b16",1],
+        "69832608384091511": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7260204889552803221": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14034525799882831106": ["convolution_gpu_bfyx_gemm_like",2],
+        "4169042131399110713": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1089944493540593798": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "6556424924189200804": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "8931469268093714938": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4208702365182336507": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13914239937595549448": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15488532485794545310": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "3231651468686543808": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "241656278218999298": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10956668791040094584": ["convolution_gpu_yxfb_yxio_b16",2],
+        "844576097677576405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3631332752661975859": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "15421280195211166867": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14823789570149356458": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7104266560248570112": ["convolution_gpu_yxfb_yxio_b16",2],
+        "768720470104458759": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "8075261051536686307": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "4142555169083069413": ["convolution_gpu_bfyx_gemm_like",2],
+        "12501619443242354860": ["convolution_gpu_bfyx_gemm_like",2],
+        "9541630719145326121": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "11987564534722442223": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "16789245987103323406": ["convolution_gpu_bfyx_gemm_like",2],
+        "13200834963067135502": ["fully_connected_gpu_fb_oi_ref",1],
+        "826850797666395121": ["convolution_gpu_bfyx_gemm_like",2],
+        "14280056365441354869": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3766048787611884529": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12878346173547852969": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8723078862651154959": ["convolution_gpu_yxfb_yxio_b16",2],
+        "135072053401934228": ["convolution_gpu_bfyx_1x1",2],
+        "8115522418294960470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15998609626878578708": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2149299205144202701": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5940007433515335594": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1539677456611270609": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4683320313995550908": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15060535689318007173": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "18239740525818575112": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9814647153117279415": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13483088320871913126": ["convolution_gpu_bfyx_gemm_like",1],
+        "6362428985273506890": ["convolution_gpu_bfyx_1x1",2],
+        "3868149953087814447": ["convolution_gpu_bfyx_gemm_like",1],
+        "15932838442166411183": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4165036357594592683": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11971853138084108953": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "11120846960057008937": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "721174714308243785": ["convolution_gpu_bfyx_gemm_like",2],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "13636407347458845915": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15534876725099279666": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9967611023372430532": ["convolution_gpu_bfyx_gemm_like",2],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",1],
+        "4201057957682777280": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17201365233492366678": ["convolution_gpu_bfyx_gemm_like",2],
+        "12311849904266608701": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5738835498104275267": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "10815244730103375973": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5214654427283761256": ["convolution_gpu_bfyx_gemm_like",2],
+        "12526988667216482085": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13077917010686381919": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16828961272295386615": ["convolution_gpu_bfyx_os_iyx_osv16",539],
+        "9695024256541464964": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "12046017161414846599": ["convolution_gpu_bfyx_1x1",2],
+        "17344974951998490453": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8622014461615231500": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15438470456977849772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1868805550246252143": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7846384623429362522": ["convolution_gpu_bfyx_1x1",1],
+        "6388117241933586388": ["convolution_gpu_bfyx_gemm_like",2],
+        "15188570678726970998": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12002302929446578025": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18135307303959376082": ["convolution_gpu_bfyx_gemm_like",2],
+        "7264274394359484318": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "14808759315730413993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3211829722778368758": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "16950925976172895196": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15728009639807698634": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "3370082268529091875": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "7493567975736494003": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "15109847707903824859": ["convolution_gpu_bfyx_1x1",2],
+        "4683575221310726091": ["convolution_gpu_yxfb_yxio_b16",2],
+        "187352687850707150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5401380444992462053": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11052275099129482401": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18417288692814472127": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "3854114166348568039": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "3819990462129075757": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "18249888571553409563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15612334131144235342": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3001162215282339268": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "1082574490068006980": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "16895523130717954500": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14236681916032484600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11823205954749139338": ["convolution_gpu_bfyx_gemm_like",2],
+        "2339864165283480961": ["convolution_gpu_bfyx_1x1",2],
+        "13352000946213986936": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "2247717767819293683": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "4818598834950786080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4444730303823507621": ["convolution_gpu_bfyx_gemm_like",2],
+        "1419073145594317633": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "16335738565228204503": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12193395770362986433": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "17224104246148265328": ["convolution_gpu_bfyx_gemm_like",2],
+        "13182623473102074079": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11157773554806649837": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2722124265986526212": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "8439950151963452285": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "2800949804770763798": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12267555886404772991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2847490224869294354": ["convolution_gpu_bfyx_gemm_like",0],
+        "9452470718398027950": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "2215570184121152738": ["convolution_gpu_bfyx_gemm_like",2],
+        "5584145249514762750": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5374969798377773063": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "12790570304622911607": ["convolution_gpu_bfyx_os_iyx_osv16",565],
+        "10837496380266058422": ["convolution_gpu_bfyx_gemm_like",2],
+        "13526488884846845330": ["convolution_gpu_bfyx_gemm_like",2],
+        "7913076120244203725": ["convolution_gpu_bfyx_gemm_like",2],
+        "11007944497812650617": ["convolution_gpu_bfyx_gemm_like",2],
+        "16773645387243701837": ["convolution_gpu_bfyx_gemm_like",2],
+        "4049224463072418218": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3774285301357006334": ["convolution_gpu_bfyx_gemm_like",1],
+        "17462996923473002801": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7119182041840303390": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5965451243366505522": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2226745622763268469": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "11614353411428360211": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7565348337952384040": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3782308167335660154": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "8779960552750034544": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3934090072734175564": ["convolution_gpu_yxfb_yxio_b16",2],
+        "880603384896315783": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1658174263018326745": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2917735110073643952": ["convolution_gpu_bfyx_gemm_like",2],
+        "9280279544075738476": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12131461096501477069": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14585000863294748739": ["convolution_gpu_bfyx_gemm_like",2],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "8540111719936129376": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "14712972289919865502": ["convolution_gpu_bfyx_gemm_like",1],
+        "12675840135830047968": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11761545976388416063": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6769524481210107636": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17107083637007906184": ["convolution_gpu_bfyx_gemm_like",2],
+        "8451212914744825089": ["convolution_gpu_bfyx_gemm_like",2],
+        "8469874583725132145": ["fully_connected_gpu_fb_oi_ref",1],
+        "4423866541063606768": ["convolution_gpu_bfyx_os_iyx_osv16",949],
+        "5922142661777925178": ["convolution_gpu_bfyx_gemm_like",1],
+        "11298854310398101852": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12118387933632797428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4933831571091731212": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10718764522366711114": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10912495395422146386": ["convolution_gpu_bfyx_gemm_like",2],
+        "4104562704039821482": ["convolution_gpu_bfyx_1x1",2],
+        "4138968242532400395": ["convolution_gpu_bfyx_gemm_like",1],
+        "15886016297043613632": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15980348884716629349": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12253049204822930675": ["convolution_gpu_bfyx_gemm_like",1],
+        "12389854459474697184": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "16071723603031305677": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15693204620575485046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16944335478353845609": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "7329924387620542330": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "14528180674573671874": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5931972000452008090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1704404203639481753": ["convolution_gpu_bfyx_gemm_like",2],
+        "11291868421122092629": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2923543983518895756": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8506271633579173639": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2759142157812694203": ["convolution_gpu_yxfb_yxio_b16",2],
+        "294153950488131608": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1569043950563130463": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4216958486055161753": ["convolution_gpu_bfyx_os_iyx_osv16",105],
+        "6388086351909447495": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11936530628363072904": ["convolution_gpu_bfyx_gemm_like",1],
+        "15267084369543546013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8260073247636023575": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9407646138658641974": ["convolution_gpu_bfyx_gemm_like",1],
+        "7840653268996892538": ["convolution_gpu_bfyx_gemm_like",2],
+        "16436006771518788093": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "15770767768674603174": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12713821004129672990": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14316077757957132678": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "7780140599533242850": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12816344078518706065": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3499406509137418124": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "5291817530552764387": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12977678792503377525": ["convolution_gpu_bfyx_gemm_like",1],
+        "9827201026276954165": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14398854364550406668": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15971340431600153619": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "9162862507585693061": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14963449045970262346": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16949056117405140365": ["convolution_gpu_bfyx_gemm_like",2],
+        "8251544171504007740": ["convolution_gpu_bfyx_gemm_like",2],
+        "17764795635957985989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1752185056297124917": ["convolution_gpu_bfyx_1x1",2],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "5672464491301994292": ["convolution_gpu_bfyx_gemm_like",2],
+        "10991423760161409883": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1898243736289257252": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4617347486560666277": ["convolution_gpu_bfyx_1x1",1],
+        "7273427309587902237": ["convolution_gpu_bfyx_gemm_like",2],
+        "2866656294663853474": ["convolution_gpu_bfyx_1x1",2],
+        "10100237101982273901": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "7561096442572829049": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12903015669020591018": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8941904405273405481": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "16290551573997593168": ["convolution_gpu_bfyx_gemm_like",2],
+        "14944590179685661287": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "787203599734115483": ["convolution_gpu_bfyx_1x1",1],
+        "17089801601582809764": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "8323669961818535927": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12623375499927200341": ["convolution_gpu_bfyx_gemm_like",2],
+        "10141927023849730720": ["convolution_gpu_bfyx_1x1",1],
+        "10883341041912056319": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "6577505360421510286": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "1390379098099686972": ["convolution_gpu_bfyx_1x1",2],
+        "9643408025778914022": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "4982549855424649217": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15295172519920136220": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15750539817895707253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12585864429067596351": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16307464696265537356": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "16218339663410630711": ["convolution_gpu_bfyx_gemm_like",2],
+        "273242667845386507": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "1617135706549276688": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "16725049805030712400": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3738514326459749974": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16767392067294252396": ["convolution_gpu_bfyx_gemm_like",2],
+        "17726079670612220433": ["convolution_gpu_bfyx_gemm_like",2],
+        "11800783548769329949": ["convolution_gpu_bfyx_gemm_like",2],
+        "13598062803968442253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14283458015244508428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17498483343394902796": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "12027202455592387086": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5834825835421819800": ["convolution_gpu_yxfb_yxio_b16",2],
+        "816527348871309530": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8321204816277460837": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10626018319543075871": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7689320135952025041": ["convolution_gpu_bfyx_gemm_like",2],
+        "11891319657803057127": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3308955824300750921": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1095959046309466012": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14184895905338394239": ["convolution_gpu_bfyx_gemm_like",2],
+        "13800760323805415740": ["convolution_gpu_bfyx_gemm_like",1],
+        "5145853681977610916": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "11706446082856895571": ["convolution_gpu_bfyx_gemm_like",2],
+        "15488340031228619748": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "8746621720912032145": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16563030700888982979": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",1],
+        "430132942408244070": ["convolution_gpu_bfyx_gemm_like",2],
+        "11299021927882809469": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9562527071055150197": ["convolution_gpu_bfyx_1x1",2],
+        "1250095876638711647": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "14079654309452583394": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7742126547476513275": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3343020946662226400": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "15746620724134970969": ["convolution_gpu_bfyx_1x1",1],
+        "2670216237572554944": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "15363606233048272809": ["convolution_gpu_bfyx_1x1",2],
+        "937159502066696999": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13735180250757239202": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "17764033613416389758": ["convolution_gpu_bfyx_gemm_like",2],
+        "15779837958180258409": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "7395419333138772074": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "13158449455164143947": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "2782970766870172398": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15449715596597016714": ["convolution_gpu_bfyx_gemm_like",2],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "11224051407822914513": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1788455099959676873": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "8045367391487213749": ["convolution_gpu_bfyx_1x1",2],
+        "11265472910579659280": ["convolution_gpu_bfyx_gemm_like",1],
+        "10308113903347312964": ["convolution_gpu_bfyx_gemm_like",2],
+        "4897690791599638716": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4776685525963461501": ["convolution_gpu_yxfb_yxio_b16",2],
+        "938848188161536107": ["convolution_gpu_bfyx_1x1",2],
+        "16742058312847401360": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "17266121859044814533": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4652136280940317116": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "14764715930784496165": ["convolution_gpu_bfyx_gemm_like",2],
+        "17580363505072477558": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14578867494693499627": ["convolution_gpu_bfyx_gemm_like",2],
+        "9372916528346260712": ["convolution_gpu_bfyx_gemm_like",2],
+        "15148442194461613102": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "5941298590926032148": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7126667413990834481": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "223412492545617963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13621771094745539509": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9871407256481442790": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12391792381149655331": ["convolution_gpu_bfyx_gemm_like",2],
+        "2912098199463107173": ["convolution_gpu_bfyx_1x1",2],
+        "7815650257256675477": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "4461989328775275994": ["convolution_gpu_bfyx_gemm_like",2],
+        "13123709697607309884": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14331554754171207866": ["convolution_gpu_bfyx_gemm_like",1],
+        "12015336418727455195": ["convolution_gpu_bfyx_1x1",2],
+        "13569941893504840630": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "15914107501176673997": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "16597170760061556882": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14050124896329573468": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9485825829394109934": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15048584393463312977": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "15106614232165315070": ["convolution_gpu_bfyx_gemm_like",2],
+        "12913866095318048752": ["convolution_gpu_bfyx_gemm_like",2],
+        "73865742350616903": ["convolution_gpu_bfyx_gemm_like",1],
+        "8943651590146149679": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6469277112054008613": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "18322435770607273817": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6862489207967519978": ["convolution_gpu_bfyx_gemm_like",2],
+        "11051684565403294370": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14262482011051329729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1498389965422474930": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14766477690417085350": ["convolution_gpu_bfyx_1x1",2],
+        "14819324687394700033": ["convolution_gpu_bfyx_1x1",2],
+        "4574541202890196191": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5884802375772043861": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9272405129875537865": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14445031303145992349": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "15310474203328198827": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16992405636352406660": ["convolution_gpu_bfyx_gemm_like",1],
+        "17854208422879910606": ["convolution_gpu_bfyx_gemm_like",2],
+        "17969195175890497912": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9162359935098885411": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9299299311101549958": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "3364141707903132298": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3647203315640064927": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17342758321852264926": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "4438526427135833402": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4717620775314557374": ["convolution_gpu_bfyx_gemm_like",1],
+        "2921118493468368908": ["convolution_gpu_bfyx_gemm_like",1],
+        "852015206582470545": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "130427456111826171": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14045927407431718832": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "10572945270796129630": ["fully_connected_gpu_fb_io_ref",1],
+        "4936961129835214448": ["convolution_gpu_bfyx_gemm_like",2],
+        "14171139920084409181": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11148428797294511280": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13408839571805750778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14558572801374416278": ["convolution_gpu_bfyx_gemm_like",1],
+        "10005348255972308430": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7585785802379042424": ["convolution_gpu_bfyx_1x1",2],
+        "8876704486585503280": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17636500109629107732": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3240102173773280414": ["convolution_gpu_bfyx_1x1",2],
+        "10174346112533671798": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1527126728636583082": ["convolution_gpu_yxfb_yxio_b16",0],
+        "18121198117765854866": ["convolution_gpu_bfyx_1x1",2],
+        "17216583849049249733": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "9538863363710651909": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3011188207492335920": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5224252360611200472": ["convolution_gpu_bfyx_gemm_like",2],
+        "14680730265621679042": ["convolution_gpu_bfyx_os_iyx_osv16",380],
+        "9642965664913867675": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7397376454528841634": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14742998604680438008": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16661843849495077745": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "6921081008428242060": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "12353956380178079089": ["convolution_gpu_bfyx_gemm_like",2],
+        "10939522663236304689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13155570698198686211": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9524303276541517389": ["convolution_gpu_bfyx_gemm_like",2],
+        "11939914680143672459": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "9590161922224578217": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2797436491596125131": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16888412539296862194": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "7600034850149968684": ["convolution_gpu_yxfb_yxio_b16",0],
+        "15548847099740441551": ["convolution_gpu_bfyx_1x1",2],
+        "6839795451275143093": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "7869779894480025247": ["convolution_gpu_bfyx_gemm_like",2],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "15133468875250992696": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "9918371346247634545": ["convolution_gpu_bfyx_os_iyx_osv16",184],
+        "9390478179772073718": ["convolution_gpu_bfyx_gemm_like",1],
+        "14258941821319200170": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2447893458816856522": ["convolution_gpu_bfyx_gemm_like",2],
+        "15078168059698267650": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2367452220382767844": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3987482581128838173": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7369903937189508744": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "1997392406402548974": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "10762489947656697207": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5924698731432597368": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "14487842225000203929": ["convolution_gpu_bfyx_gemm_like",2],
+        "5422432655714154738": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3285968426413869315": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11759426200341586247": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "13320473279945887641": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10923480230259977438": ["convolution_gpu_bfyx_1x1",1],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "3814584042139408454": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "13668072006310741601": ["convolution_gpu_yxfb_yxio_b16",2],
+        "994252691216116396": ["convolution_gpu_yxfb_yxio_b16",1],
+        "149810021216592597": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16475247464223458061": ["convolution_gpu_bfyx_gemm_like",2],
+        "4633763257197651352": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16209868158768307271": ["convolution_gpu_bfyx_os_iyx_osv16",919],
+        "10572380563704942622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11411413051626428349": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8058419689646625853": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "7590390572139249734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12641170321047008726": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "7372956570616880244": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7969441643457570812": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "11110173861174257158": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "720558977788683564": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10544411879329675593": ["convolution_gpu_bfyx_os_iyx_osv16",387],
+        "3438296636411972401": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7814543122045448412": ["convolution_gpu_bfyx_gemm_like",2],
+        "17538518333907257868": ["convolution_gpu_bfyx_gemm_like",2],
+        "2283020548041814543": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7974670633697926450": ["convolution_gpu_bfyx_1x1",1],
+        "14651159827389223108": ["convolution_gpu_bfyx_gemm_like",2],
+        "17224655686568797096": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1703738105910059846": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14215394208930955062": ["convolution_gpu_yxfb_yxio_b16",0],
+        "15914342421266687768": ["convolution_gpu_bfyx_gemm_like",2],
+        "7678226048807568024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13025361884606488732": ["convolution_gpu_bfyx_gemm_like",2],
+        "1308980444055174254": ["convolution_gpu_bfyx_gemm_like",2],
+        "4727628999533330347": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17037462814585846902": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "14149210193687890597": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "13094402291968806996": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14769111376729628572": ["convolution_gpu_yxfb_yxio_b16",2],
+        "501138469231848694": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15645112311663561994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16511749893955141055": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5041922366297242362": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16271970578584267980": ["convolution_gpu_bfyx_os_iyx_osv16",195],
+        "8494385862885499798": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7400937639903461446": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18384215264061386089": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "2369451367723962073": ["convolution_gpu_bfyx_1x1",2],
+        "15269988216002549857": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8846314870152404018": ["convolution_gpu_bfyx_gemm_like",2],
+        "18373951194274306895": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16683169947375504066": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "3101885395179993708": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14315760630997175346": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12331134162344797761": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6254161707168091438": ["convolution_gpu_bfyx_gemm_like",2],
+        "16958329690837977102": ["convolution_gpu_bfyx_gemm_like",2],
+        "7349880498513046830": ["convolution_gpu_bfyx_1x1",2],
+        "1867337342417952506": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4228437925117070319": ["convolution_gpu_bfyx_1x1",2],
+        "897253033961107413": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15206249797344242666": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16210934187492210542": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16567486018945740036": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18436249934780056991": ["convolution_gpu_bfyx_os_iyx_osv16",296],
+        "4104679489383377966": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3713558537660711857": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10961696014697611547": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "2685061316482503878": ["convolution_gpu_bfyx_gemm_like",2],
+        "12487879163561616870": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6340128090694375876": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "10996596479775375564": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "13972357557211413688": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "15050884844653850678": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12681408370704556588": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14082448162400225052": ["convolution_gpu_bfyx_1x1",1],
+        "13636859714649629789": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18071280811713424504": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3212789693085089063": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "698274493570551388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10034575179959785704": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8913823292181409151": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "5349415632630235233": ["convolution_gpu_bfyx_1x1",2],
+        "4290840152278060614": ["convolution_gpu_bfyx_gemm_like",2],
+        "12319073009094248232": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "17616719165728687438": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15308196586729169691": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2114599010013594942": ["convolution_gpu_bfyx_gemm_like",2],
+        "10782169939706303899": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17806747473167329833": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6438721407426283362": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2878824076934639346": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "9714508918051740792": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "740260423018155343": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "3662747857062156477": ["convolution_gpu_bfyx_gemm_like",2],
+        "13637537549252005181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",1],
+        "12896226291465522304": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5393510569127725391": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "9906138392975645747": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11730276873446857018": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12794369485239257709": ["convolution_gpu_bfyx_gemm_like",2],
+        "18154019240019929225": ["convolution_gpu_bfyx_gemm_like",1],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "4156384238797998294": ["convolution_gpu_bfyx_os_iyx_osv16",275],
+        "15640466585550013905": ["convolution_gpu_bfyx_gemm_like",2],
+        "3873183249402084406": ["convolution_gpu_bfyx_gemm_like",1],
+        "14193777296032212476": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6776601719651959634": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5159738930501638535": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2863465257341735941": ["convolution_gpu_bfyx_1x1",1],
+        "11634932044447867039": ["convolution_gpu_bfyx_gemm_like",2],
+        "8096131027165540886": ["convolution_gpu_bfyx_gemm_like",2],
+        "15192230303376521834": ["convolution_gpu_bfyx_os_iyx_osv16",863],
+        "900243696733233996": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7927587739463421727": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7307271009495440764": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "15065925414996398951": ["convolution_gpu_bfyx_1x1",2],
+        "10645625090439446714": ["convolution_gpu_bfyx_gemm_like",2],
+        "4252157815622916471": ["convolution_gpu_bfyx_1x1",2],
+        "3135889221160961020": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5886032409392368342": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2740885908397449753": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15201438563802430490": ["fully_connected_gpu_fb_oi_ref",1],
+        "15609860394182767048": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11311839946200066200": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16910952799476896905": ["convolution_gpu_bfyx_gemm_like",2],
+        "1597770067928214597": ["convolution_gpu_bfyx_1x1",1],
+        "1802510952374368682": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10893432143734884603": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",287],
+        "4792351255949877935": ["convolution_gpu_bfyx_gemm_like",2],
+        "1662588605309237309": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12076058470574246054": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "14020956765444878761": ["convolution_gpu_bfyx_gemm_like",2],
+        "17585206779958265260": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "9482749589540764069": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12053562297742437099": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6883767567034259453": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17725637691681205907": ["convolution_gpu_bfyx_gemm_like",2],
+        "14446688005815492020": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14026537760442360645": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "12564687330941036772": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "5581428998642936688": ["convolution_gpu_bfyx_1x1",2],
+        "12055647521556218046": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "4752129805031267391": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14689423748560749566": ["fully_connected_gpu_fb_oi_ref",1],
+        "15216108478837665623": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "6323083153920795679": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "4615708568396290002": ["convolution_gpu_bfyx_1x1",2],
+        "3935750066315595083": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18215430801133520364": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13503555814874045782": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "5600807544955072308": ["convolution_gpu_bfyx_gemm_like",2],
+        "8652128863605749877": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4521622755195947253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",281],
+        "11732321796147239597": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9285202897230250613": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10070051133200561606": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13520876347177213888": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "5671289201458690944": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "486816652607164926": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16033512206711124104": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3112648799276134590": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17477062954520561609": ["convolution_gpu_bfyx_gemm_like",2],
+        "208915399644127739": ["convolution_gpu_bfyx_gemm_like",2],
+        "5596408142536691534": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11744368351982723504": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5632958791318880428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6159729136505378486": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11158789938857558596": ["convolution_gpu_bfyx_1x1",2],
+        "9263784636194609884": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11942424927004660476": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5558136691773431495": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "6423354409210936959": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11020315012951440351": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "6525496212688896740": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2891736961665476908": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "12229574562535756991": ["convolution_gpu_bfyx_gemm_like",2],
+        "10488269059469838160": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "9890700023578477203": ["convolution_gpu_bfyx_gemm_like",2],
+        "3541538046227217664": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15217573782563469232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11587239927319376658": ["convolution_gpu_bfyx_gemm_like",2],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "15985980444340490463": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1044889231088602677": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "14264584839702225855": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1186545671730357033": ["convolution_gpu_bfyx_os_iyx_osv16",1024],
+        "12393385058735194260": ["convolution_gpu_bfyx_gemm_like",2],
+        "6469003096932778978": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "11787674847611032323": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15646081020506130125": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8463615810239412362": ["convolution_gpu_bfyx_1x1",2],
+        "9735280865199145311": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3265415000818832667": ["convolution_gpu_bfyx_gemm_like",2],
+        "5374664689223295796": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "7859659993155959174": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4436244774193918646": ["fully_connected_gpu_fb_oi_ref",1],
+        "2204178900998688268": ["convolution_gpu_bfyx_gemm_like",2],
+        "10718639465064821919": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7602222004475424358": ["convolution_gpu_bfyx_gemm_like",1],
+        "1161304401293419103": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12686015414958770329": ["convolution_gpu_bfyx_gemm_like",2],
+        "17051718450741106678": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7624259732952222597": ["convolution_gpu_bfyx_gemm_like",2],
+        "2162882863309264684": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "14349625788399542568": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17945600479510493949": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3383222668132648804": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7210854698870587826": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1298596164164324360": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7162155897369277782": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5317076157086789437": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9900658671239107502": ["convolution_gpu_bfyx_1x1",2],
+        "12757611260347801001": ["convolution_gpu_bfyx_os_iyx_osv16",1071],
+        "11799179287124317845": ["convolution_gpu_bfyx_gemm_like",1],
+        "14339479547451422762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13576010631084066792": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1299545313185409227": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "13970935346154374605": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "4353583636655606632": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16020916772006653269": ["convolution_gpu_bfyx_1x1",1],
+        "5596441339918073261": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "733956743303342862": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6880746917399866285": ["convolution_gpu_bfyx_gemm_like",2],
+        "6992073477131490452": ["convolution_gpu_bfyx_gemm_like",2],
+        "15865753975271064117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15275978123703636572": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "4313392430539923574": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "16748662918272106932": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1841155673858789206": ["fully_connected_gpu_fb_oi_ref",2],
+        "3870539490799697188": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "8174040194088942964": ["convolution_gpu_bfyx_os_iyx_osv16",945],
+        "4839205075057964902": ["convolution_gpu_yxfb_yxio_b16",2],
+        "738850098651678143": ["convolution_gpu_bfyx_os_iyx_osv16",542],
+        "11657946392097042544": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "952318454591754214": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "14501815053459103515": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3684792790546138809": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12032580551021546487": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6709083009339039603": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16828388628569377322": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10816637153861630723": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "11637325834858582585": ["convolution_gpu_bfyx_gemm_like",2],
+        "9795194069954915563": ["convolution_gpu_bfyx_gemm_like",2],
+        "1507504848332592003": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16426655160932259558": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4644580321919256401": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "8527069404111265568": ["convolution_gpu_bfyx_os_iyx_osv16",434],
+        "7280502812960451465": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16532743776403877084": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5657471280535146301": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2085467192625870436": ["convolution_gpu_bfyx_gemm_like",2],
+        "7168028033666253263": ["convolution_gpu_bfyx_gemm_like",2],
+        "12303905514885913537": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15612797125081819500": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9452094307760005150": ["convolution_gpu_bfyx_gemm_like",2],
+        "13862199647000195451": ["convolution_gpu_yxfb_yxio_b16",2],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "8372855367097191197": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14544219140091420262": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17228615388053183744": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16606674008248299103": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "8726274320876550785": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12892265081710606252": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7826406759309418010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8078028207842958010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16723478941106779069": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "6631103268546309714": ["convolution_gpu_yxfb_yxio_b16",2],
+        "231083216612056805": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9951951467222189282": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12755692101476964677": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "2295659951331099829": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17019474731460049248": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16579057939215877904": ["convolution_gpu_bfyx_gemm_like",2],
+        "17408275657360833363": ["convolution_gpu_bfyx_1x1",2],
+        "11279789373735965856": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16765994345605657100": ["convolution_gpu_bfyx_1x1",1],
+        "10308431308942416781": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "3499106702307464480": ["convolution_gpu_bfyx_gemm_like",2],
+        "8541982562061181756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13553263424160050064": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "6808980404170272597": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "13234872695521811652": ["convolution_gpu_yxfb_yxio_b16",1],
+        "921209976738626097": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17321934232458063571": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2042946928570163140": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17542035367134614728": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5649082203775427830": ["convolution_gpu_bfyx_gemm_like",2],
+        "13634686998599681086": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13223232888554043645": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3531786338249174486": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "6156831095718536092": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "14363025045807200040": ["convolution_gpu_bfyx_os_iyx_osv16",541],
+        "16567638487719493784": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1902656726461670148": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9291397338108903174": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10635659193402005820": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "17264010982688979937": ["convolution_gpu_bfyx_1x1",2],
+        "16924006268301179157": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7748233564411787605": ["convolution_gpu_bfyx_gemm_like",2],
+        "4435224497850514394": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9601412379897937608": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "11972290239275366299": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2553539191926275121": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8866716292621164810": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12932635875905153141": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "2133236128630074068": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3571330754519284334": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7693459946348737411": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3574585436812909168": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4561778392194061215": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3701838669605585798": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14466032674083938714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6192955702438301372": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "15783429395177379897": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12160764253455777655": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "6685985905221810743": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2102169562353089558": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8220168481755031959": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "13810735868750326592": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "4883588237027084166": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8219179055259247644": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15548854462657362014": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17769159396346490074": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7263796835299019284": ["convolution_gpu_bfyx_gemm_like",2],
+        "3477539135137665170": ["convolution_gpu_bfyx_gemm_like",2],
+        "5303970743736042689": ["convolution_gpu_bfyx_gemm_like",2],
+        "10049294964307823692": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14159596290442764023": ["convolution_gpu_bfyx_gemm_like",1],
+        "5429130923188159806": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "5682190700442712936": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "13092232276822302626": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "18357544235608006954": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "11315238071192463859": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "14116800584981026541": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "12900949103593247293": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13760645810144930270": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "15597317305719116351": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7082007579524697455": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1591199515536783245": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9778670810863940690": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16105073808368936420": ["convolution_gpu_bfyx_gemm_like",2],
+        "13183380647506951324": ["convolution_gpu_bfyx_gemm_like",2],
+        "13754540732991287617": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "13101474064130881526": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13680926356824317761": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "18029395208219861440": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13483175684542464385": ["convolution_gpu_bfyx_os_iyx_osv16",148],
+        "3441335188113424896": ["convolution_gpu_bfyx_gemm_like",2],
+        "11267495078361954131": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1520529227443340435": ["convolution_gpu_bfyx_gemm_like",2],
+        "8300290944865904942": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12561177248542630652": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11300415556407923335": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3503893875515897267": ["convolution_gpu_bfyx_gemm_like",2],
+        "7241156141838776126": ["convolution_gpu_bfyx_gemm_like",1],
+        "15138641310139776109": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "15483343060578660278": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5211831143687501130": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "6051877311645456194": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "5115007207028125638": ["convolution_gpu_bfyx_gemm_like",2],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "17965267346493659374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9999553425206328238": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "530973311459168543": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15988378956341507229": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6648876837655776653": ["convolution_gpu_bfyx_1x1",2],
+        "9056038338958199256": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "10645057595080511813": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3835387982926010630": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4802014352392262053": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10577357333308653027": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",2],
+        "1375084615110147615": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1237262535285717993": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "14532519639619315651": ["convolution_gpu_bfyx_gemm_like",2],
+        "7027962921778599989": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18235209540858013173": ["convolution_gpu_bfyx_1x1",2],
+        "6970636030494405299": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "10531218595816974659": ["convolution_gpu_bfyx_gemm_like",2],
+        "15757308772667178999": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1450861513159359637": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5062815196458225737": ["convolution_gpu_bfyx_os_iyx_osv16",487],
+        "5464801565268066541": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7209217811135076623": ["convolution_gpu_bfyx_gemm_like",2],
+        "11666226259183201584": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6142707387281700290": ["convolution_gpu_bfyx_gemm_like",2],
+        "17397600088595751782": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5525691792821548743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11910900938442124765": ["convolution_gpu_bfyx_gemm_like",2],
+        "1626430741965136732": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17823133607491820214": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "13575423234109624706": ["fully_connected_gpu_yxfb_ref",2],
+        "480310470450900836": ["convolution_gpu_bfyx_gemm_like",2],
+        "4656068024153891922": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14616413139039308367": ["fully_connected_gpu_fb_oi_ref",2],
+        "3571959174116404960": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5898740235388207878": ["convolution_gpu_bfyx_1x1",2],
+        "8767817856303586064": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14971506154649368216": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12339692995143159283": ["convolution_gpu_bfyx_gemm_like",1],
+        "11666250400445971335": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "16425665058951535484": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "12522495848240087966": ["convolution_gpu_bfyx_gemm_like",1],
+        "3509487327001107638": ["convolution_gpu_bfyx_gemm_like",2],
+        "10055247339012492459": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "2356785927637873692": ["convolution_gpu_bfyx_gemm_like",2],
+        "959666756751640874": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11002875874008272679": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "10512507780534402341": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4056971751486746551": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "1582751548472076534": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5032841266226405428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14553813154800569861": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12825407709419526493": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9525535670799618110": ["convolution_gpu_bfyx_gemm_like",2],
+        "14289082888174784976": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "7692849839965441330": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "5541365322085427177": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17546090415334871175": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10617442099961865960": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8509024280905303927": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17649961873981897621": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5924271203978892761": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10135458965276110244": ["convolution_gpu_bfyx_1x1",2],
+        "9440117898128288296": ["convolution_gpu_bfyx_gemm_like",2],
+        "4137755981477177003": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "9340159617983543624": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "10009559358571629502": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13571587312517912280": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "2705031521944165712": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "10128143628088846123": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "12210280332071091209": ["fully_connected_gpu_fb_oi_ref",1],
+        "2761862049452027986": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3374410641320310726": ["convolution_gpu_bfyx_os_iyx_osv16",904],
+        "11626402549863483301": ["convolution_gpu_bfyx_os_iyx_osv16",648],
+        "9250030880535336888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "10323345824599612614": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "8108933468437926367": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10117092543913369513": ["convolution_gpu_yxfb_yxio_b16",2],
+        "708747442142592697": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "6334639534663495263": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "16075006181495932250": ["convolution_gpu_bfyx_gemm_like",2],
+        "16996895381161031110": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "5308128387928804050": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "12868739680413736657": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "1230262279011217327": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10015368609444108372": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17958575161092859465": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12619739385084492771": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7349168847581850619": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14801210545983960599": ["convolution_gpu_yxfb_yxio_b16",2],
+        "488798544312719183": ["convolution_gpu_yxfb_yxio_b16",2],
+        "415826393421796195": ["convolution_gpu_yxfb_yxio_b16",2],
+        "403634422724914329": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "15117880293418979489": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "13754408679115174221": ["convolution_gpu_bfyx_gemm_like",2],
+        "7878605163588288309": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "2321148334382088982": ["convolution_gpu_bfyx_gemm_like",2],
+        "3563872903821081702": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "143667964449473415": ["convolution_gpu_yxfb_yxio_b16",0],
+        "7469107606686458209": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1822096761703761792": ["convolution_gpu_bfyx_1x1",2],
+        "14943031375539993004": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14307705501349750896": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12107262410635772120": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8094920912208664820": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13111122805945249561": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2052010432187897741": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "9153779186876518773": ["convolution_gpu_bfyx_gemm_like",2],
+        "7314288062932060863": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "11822555173696078282": ["convolution_gpu_bfyx_gemm_like",0],
+        "11612209645710419427": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9062781751511609244": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "12937333118472722002": ["convolution_gpu_bfyx_gemm_like",2],
+        "13282951481330978659": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "14671212883301405408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5250257911846706612": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3806131437010910920": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8881906040469243354": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5941852872160795604": ["convolution_gpu_bfyx_gemm_like",2],
+        "13598984763955239116": ["convolution_gpu_bfyx_gemm_like",0],
+        "9120377367517042357": ["convolution_gpu_bfyx_1x1",2],
+        "10576856554114055028": ["convolution_gpu_bfyx_gemm_like",2],
+        "7105279481103494151": ["fully_connected_gpu_fb_oi_ref",1],
+        "15643135666029727865": ["convolution_gpu_bfyx_gemm_like",2],
+        "2803569867265035123": ["convolution_gpu_bfyx_os_iyx_osv16",1029],
+        "7720153213673170931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4010419602093863685": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7274647463152753603": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17030051116023319382": ["convolution_gpu_yxfb_yxio_b16",1],
+        "794499287296495726": ["convolution_gpu_bfyx_1x1",2],
+        "4802009650745059499": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9999955037598579164": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "138379779469699309": ["convolution_gpu_bfyx_gemm_like",2],
+        "14968401410355925289": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11132679855317294753": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4622514167765722873": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "15600841108426475615": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6104380778870471127": ["convolution_gpu_bfyx_1x1",2],
+        "3114210363452108737": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1895560603400089814": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11516184047320372729": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16044646335477470657": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15848096609835347542": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8898095926967052382": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5758133252959371492": ["convolution_gpu_bfyx_gemm_like",1],
+        "17822988909419777692": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15006321421735686121": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "9434143681116089888": ["convolution_gpu_bfyx_gemm_like",2],
+        "17712558058168648648": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "994842991399671507": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18393312550272875456": ["convolution_gpu_bfyx_1x1",2],
+        "8163000689380461611": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14612206111651511130": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10065714384927707796": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9319254979377483709": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2915777749501772828": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9588943054777767098": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2567046336192437734": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "13851025202247070979": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13380637319403400851": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3321251856445833973": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2668729552208169959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13325287783358291692": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7863319552895863063": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1771347579022727189": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16781187505186394353": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "383721620126444793": ["convolution_gpu_bfyx_gemm_like",2],
+        "981803877097233095": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7603319690872333930": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3784684114139223050": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13731797251725972855": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17228810554159747400": ["convolution_gpu_bfyx_gemm_like",2],
+        "7940369586324090841": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "518733575377143679": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10324485383646920518": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "14066675688397331406": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6730447536124542965": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8537824547722216155": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6344600111737335616": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15493488989417521388": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "2603233376890892194": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12600479027568241746": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1379758215293949563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17893696934478535385": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11498084465186986412": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "2373860353284525265": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "11411580529501121244": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "9569522500959727054": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6371463287631658789": ["convolution_gpu_bfyx_gemm_like",2],
+        "10330180429524641331": ["convolution_gpu_bfyx_gemm_like",2],
+        "2984726467649419856": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "6450532136308941035": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "4740864135937875560": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13330734840729670622": ["convolution_gpu_bfyx_gemm_like",2],
+        "10106454449619141260": ["convolution_gpu_bfyx_1x1",2],
+        "1594829714229111215": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "727216855315869048": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5044721291675005144": ["convolution_gpu_bfyx_1x1",2],
+        "8712136292276123857": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "9909564412554801760": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9250410390663336388": ["convolution_gpu_bfyx_gemm_like",1],
+        "8609939102588915855": ["convolution_gpu_bfyx_gemm_like",2],
+        "17310409067211414565": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "3219408878901707426": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10022487076451608714": ["convolution_gpu_bfyx_gemm_like",2],
+        "1338705434700924127": ["convolution_gpu_bfyx_1x1",1],
+        "2737352811173555281": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13471752029049484143": ["convolution_gpu_bfyx_gemm_like",2],
+        "14311888412221174224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16015963261509760799": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "11376953876369788199": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3463206409786541741": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15217077412685024074": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6792281830591233968": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9504349455215835807": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "10546430708947911124": ["convolution_gpu_bfyx_gemm_like",0],
+        "13602140021189675477": ["convolution_gpu_bfyx_gemm_like",2],
+        "13369751385866224286": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6367371992814643260": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10894058425957901202": ["convolution_gpu_bfyx_1x1",2],
+        "16158139166784964096": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "11356842300444410831": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "6260684231055362504": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1088710562928089772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14799579913711096584": ["convolution_gpu_bfyx_gemm_like",1],
+        "14383657211047876136": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "3163833930628348446": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4282668574670785584": ["convolution_gpu_bfyx_gemm_like",2],
+        "8640150341228170279": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "9767294641786972359": ["convolution_gpu_bfyx_gemm_like",2],
+        "14896875712028630045": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "9999543693712389402": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4099828484175044842": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "2172999245833525797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13558687084677943158": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "16362857896338778056": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "2161052921317193579": ["convolution_gpu_bfyx_gemm_like",2],
+        "12259611546528256409": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9439431829175743345": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11857037689248685487": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10722677916294015259": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "4447065688824381344": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "13456967132681889167": ["convolution_gpu_yxfb_yxio_b16",2],
+        "104765009188090817": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1643122514049603104": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "2781309272856442321": ["convolution_gpu_bfyx_1x1",1],
+        "12978370505631031751": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15529757761327002288": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "7870154008378361670": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8409488188696700816": ["convolution_gpu_bfyx_gemm_like",1],
+        "3499645386058307669": ["convolution_gpu_bfyx_gemm_like",1],
+        "16729849855476690294": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "10783046011829953095": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2609346307827449622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "314054598858070952": ["convolution_gpu_bfyx_gemm_like",2],
+        "12776081190690731910": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "9314293064351558241": ["convolution_gpu_bfyx_gemm_like",2],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",2],
+        "11155444222714959508": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "5008541841892687897": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1270307036687208396": ["convolution_gpu_bfyx_gemm_like",1],
+        "5534071639452404412": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4278280309700908015": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "11872943152839631823": ["convolution_gpu_bfyx_gemm_like",2],
+        "8717456809499914445": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10254566865260697753": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13809046727894108358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3534874664568214253": ["convolution_gpu_bfyx_1x1",2],
+        "13717351126657739994": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6911215749850066204": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "10432365444137108781": ["convolution_gpu_bfyx_gemm_like",2],
+        "10009796094612770326": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "11971736882960844905": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3567607339495161307": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14916236722843741326": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16955829428734830876": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9696168324381001582": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8859895010324601937": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "11537166370263116277": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9589942627115344216": ["convolution_gpu_bfyx_os_iyx_osv16",102],
+        "4261192887643002603": ["convolution_gpu_bfyx_gemm_like",2],
+        "14041970415787494000": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12643643553436503069": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7440953406601377619": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7134419022268272901": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "14362876471450307424": ["convolution_gpu_bfyx_1x1",2],
+        "16292848987976256449": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "11280672272221124024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3442845193734599342": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7958443549125799229": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "9105127035114339269": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15800447082078291243": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "5149303626508247520": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18302892230881285207": ["convolution_gpu_bfyx_gemm_like",1],
+        "13077012961563218195": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5642822685234782052": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "3748621266324665764": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15915715422308762909": ["convolution_gpu_bfyx_os_iyx_osv16",274],
+        "17908444616754154471": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2909347733581487795": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13075579052866074866": ["convolution_gpu_bfyx_gemm_like",2],
+        "5209144536543011657": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8740268039366363321": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13938466156916423478": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "17430994325635361377": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9293682866734263821": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5459463503840817402": ["convolution_gpu_bfyx_1x1",2],
+        "15675903059949404837": ["convolution_gpu_bfyx_1x1",2],
+        "3805667660217578518": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10171373375072694210": ["convolution_gpu_bfyx_1x1",2],
+        "8809017515482311843": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "14517191894006411358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1241355545294259810": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "378292944207609677": ["convolution_gpu_yxfb_yxio_b16",2],
+        "248133885018839814": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14697908554930995949": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "2362092095402043749": ["convolution_gpu_bfyx_gemm_like",2],
+        "5355283113999405036": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8553491894663686698": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "8113660920207936963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3034482898462686729": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "6214677989814002369": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16626502801066228405": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15838113905712517735": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3750338655074082587": ["fully_connected_gpu_yxfb_ref",0],
+        "12867590715338247144": ["convolution_gpu_yxfb_yxio_b16",1],
+        "302694026179841870": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "11955992313739654625": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "10864011008000364415": ["convolution_gpu_bfyx_1x1",2],
+        "16527840366172690992": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18101509783610609787": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4079026972040047969": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "4754967381316623440": ["convolution_gpu_bfyx_gemm_like",2],
+        "16966477504105790279": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7183578232279711009": ["convolution_gpu_bfyx_gemm_like",2],
+        "4708035980731751007": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14115742296883450319": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16135569134646688251": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "12028665820838352309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2343310394723780653": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7451154080124553318": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1900375942069325499": ["convolution_gpu_bfyx_1x1",2],
+        "5788323787676797805": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "8002233052700666718": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "9726913113016874092": ["convolution_gpu_bfyx_gemm_like",2],
+        "230697511447695268": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9775648000771985077": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10278515360013727367": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10524079700393212963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12228610148087508521": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "14918482938530107806": ["convolution_gpu_bfyx_gemm_like",2],
+        "10262850086265676378": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7289535479247584635": ["convolution_gpu_bfyx_1x1",2],
+        "17377204616846724192": ["convolution_gpu_bfyx_gemm_like",2],
+        "402932154499003993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5179013491581036103": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16293101831324587788": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12362290144183018227": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15155676074658242659": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "6114241186364821679": ["convolution_gpu_bfyx_os_iyx_osv16",856],
+        "13954821927253849036": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "1287490919205560806": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "17567012866823126402": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11724225282274130518": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3889456478817717702": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11087413527078604815": ["convolution_gpu_bfyx_gemm_like",2],
+        "3398322619007806698": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "637115537820955017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9763754389347695094": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9383182168277796969": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "11782188262748842182": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "17951403431757222177": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "16587061389996963349": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "2273992727647793692": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4387041763614917736": ["convolution_gpu_bfyx_gemm_like",1],
+        "12455871938978342189": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15059549186302099880": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "7624476043779763605": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "18218755616248669884": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "3957253946857103590": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13369603621524676979": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "17817043205731836063": ["convolution_gpu_yxfb_yxio_b16",2],
+        "512446355173752600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3735753364888836383": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6650607472019166205": ["convolution_gpu_bfyx_1x1",2],
+        "10995886682834858002": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "4030004320208162301": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11262989876326061679": ["convolution_gpu_yxfb_yxio_b16",0],
+        "748236447365453504": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7861234698413147249": ["convolution_gpu_yxfb_yxio_b16",2],
+        "970768445746568749": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "5269172622193124300": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "4562591438007476419": ["convolution_gpu_bfyx_gemm_like",2],
+        "15293835051273372438": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8015885733173521367": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "17077815973022307612": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9111988592015450418": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",1],
+        "10447947790216991304": ["convolution_gpu_bfyx_gemm_like",2],
+        "5721096633060535553": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12458921031453334451": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15816807118780455948": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1237920404306733800": ["convolution_gpu_bfyx_gemm_like",1],
+        "9834941975457910988": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6208201398783088425": ["convolution_gpu_bfyx_gemm_like",2],
+        "8265982881100325775": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8638074773026771425": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6846760451124717672": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11604111639041106489": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "5465400164581117113": ["convolution_gpu_bfyx_gemm_like",2],
+        "17723621158215826108": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "5359510718430377298": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4403753181729432604": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "7600296832974673294": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3911736807429733938": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1701412735970485849": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "8527055001340219573": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2562815925396318565": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12068974703657294908": ["convolution_gpu_bfyx_1x1",2],
+        "13323186744342557015": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2111669705686676421": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "9659837320293869285": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14188157670969097508": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2457671437276780303": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11583985978586657985": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "6018481198468872040": ["convolution_gpu_yxfb_yxio_b16",2],
+        "835053793432636355": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14337168375989245254": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9127827617126714860": ["fully_connected_gpu_fb_oi_ref",2],
+        "17170858505976681742": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8131879590716437354": ["convolution_gpu_yxfb_yxio_b16",2],
+        "182115051096556835": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "1982176363226079588": ["convolution_gpu_bfyx_gemm_like",2],
+        "13145474177271090694": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "16065744898134487748": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "17174919737114915467": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "5420215220876162902": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2129742884686884642": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4241055784642339756": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "7933217973342728190": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5950285227163574810": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "7868973874302246233": ["convolution_gpu_bfyx_gemm_like",1],
+        "10232429887105708502": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6439316331231400868": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16882092367103683293": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16988275131627316108": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "6928136130626403937": ["convolution_gpu_bfyx_gemm_like",2],
+        "9731370183088819573": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4416793079965040181": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9526266653688168429": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15879385408480411034": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16913004986170202203": ["convolution_gpu_bfyx_gemm_like",2],
+        "7181154048972884375": ["convolution_gpu_bfyx_gemm_like",2],
+        "1122856374602590533": ["convolution_gpu_bfyx_1x1",1],
+        "677249604491773387": ["convolution_gpu_bfyx_gemm_like",2],
+        "990199360818917334": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4455369117448405874": ["convolution_gpu_bfyx_1x1",2],
+        "3369689552455141157": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5802466130040230797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "973966345068677905": ["convolution_gpu_bfyx_1x1",2],
+        "2728938624042183713": ["convolution_gpu_bfyx_gemm_like",2],
+        "14526262781657292025": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12984970933638742657": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "9629460794894999510": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "601430670855155006": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "12439827609628473238": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17634966178519099371": ["convolution_gpu_bfyx_1x1",2],
+        "5770286476124511234": ["convolution_gpu_bfyx_gemm_like",1],
+        "17158401628206867933": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "2114232149447438823": ["convolution_gpu_bfyx_gemm_like",2],
+        "7650862961269327235": ["convolution_gpu_bfyx_1x1",2],
+        "3940619509778739158": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7843180034077880658": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5195511638783481084": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "17718424965214606218": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7287107719392705356": ["convolution_gpu_bfyx_os_iyx_osv16",4],
+        "14835309921389262864": ["convolution_gpu_bfyx_1x1",2],
+        "14199158130218117084": ["convolution_gpu_bfyx_gemm_like",2],
+        "394778201589371681": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "14807357397951247957": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4171374172427814762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17281826959243966826": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8262469434265124590": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16541970206584576833": ["convolution_gpu_bfyx_gemm_like",2],
+        "12004628115138530335": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "11988546375476924356": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "15901724303713479611": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3863816884636503247": ["convolution_gpu_bfyx_gemm_like",2],
+        "3725013268198063198": ["convolution_gpu_bfyx_1x1",2],
+        "11942736969933408358": ["convolution_gpu_bfyx_gemm_like",2],
+        "14897384423894125457": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12011606174372081253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1697248235682953135": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8907982643256296667": ["convolution_gpu_bfyx_1x1",1],
+        "8010456208258134834": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6538526180355194359": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18359731130169236059": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6097086855988597139": ["convolution_gpu_bfyx_1x1",2],
+        "9059418187274548462": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9942099207256025216": ["convolution_gpu_bfyx_gemm_like",2],
+        "1680468564927032670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4261215727469154244": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4346591404756288097": ["convolution_gpu_bfyx_gemm_like",2],
+        "14001048251986195179": ["convolution_gpu_bfyx_gemm_like",2],
+        "6726099352298108756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6997971129340865650": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "10225565543636007389": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13094289895577333088": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15599983560500910839": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18131954418490925431": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "1919535500129437217": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "8532217744217419503": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "7581174843529024536": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "6624079551747071383": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16364494883229084045": ["convolution_gpu_bfyx_gemm_like",2],
+        "4723919313760470311": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11324651029379152442": ["convolution_gpu_bfyx_1x1",2],
+        "3358616456137155015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "264466528528245004": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9062774198518904260": ["convolution_gpu_bfyx_gemm_like",2],
+        "11872464450773754851": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4732226322522411018": ["fully_connected_gpu_fb_io_b8_f8_vload",0],
+        "13247725847475539658": ["convolution_gpu_bfyx_1x1",2],
+        "4168273493370024327": ["convolution_gpu_bfyx_1x1",1],
+        "860852602930021016": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15190508870639648203": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15581997249051127645": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "6136232084354304563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2744566213784972700": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2412069259085234287": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2294800960010879540": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "11841034668170849494": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11682041005124075890": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3711525118850629466": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14281154151197472605": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9099720270958987421": ["convolution_gpu_bfyx_1x1",2],
+        "5568753513029409478": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "9433875341212148858": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "5735703235236456131": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "9815961128076948768": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "9480653639044390919": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "4465701487417893814": ["convolution_gpu_bfyx_gemm_like",2],
+        "4329042569031331949": ["convolution_gpu_yxfb_yxio_b16",2],
+        "543472136359161929": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3780320160034246719": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "1646638859396929303": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3224352307778512793": ["convolution_gpu_bfyx_gemm_like",1],
+        "789202969657820559": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12988253829685880778": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "8978764053524288494": ["convolution_gpu_bfyx_gemm_like",0],
+        "6935581283700404601": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3988024997010367546": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "9436893310034662243": ["convolution_gpu_bfyx_gemm_like",2],
+        "16934879647229234163": ["convolution_gpu_bfyx_gemm_like",2],
+        "2527276292172180386": ["convolution_gpu_bfyx_gemm_like",2],
+        "2826762745628486040": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "12782932626966309185": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "12864558900883069118": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1786732163438555728": ["convolution_gpu_yxfb_yxio_b16",0],
+        "2149582237161177965": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "2173867324489962689": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13530377297525480029": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "9787359208094141129": ["fully_connected_gpu_fb_oi_ref",1],
+        "6709883527730513363": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7172357320005702833": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "6331794802915121861": ["convolution_gpu_yxfb_yxio_b16",2],
+        "726985753660756762": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "8784358107340738205": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15428591250165788477": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "2501411300945696806": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7378840969627751667": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14568618538516685994": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "6126579157025017808": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8125500765566111746": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9663847096617096629": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2832268621630415376": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "3924212595662208655": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8585205898894363799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7139714914586273766": ["convolution_gpu_bfyx_os_iyx_osv16",517],
+        "10184417796355593956": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7605652809856543211": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "8267783192628619295": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10358170616931426647": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14403132596827435096": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "14770895149190975433": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2931988747601319855": ["convolution_gpu_bfyx_1x1",2],
+        "17891499682354369344": ["convolution_gpu_bfyx_gemm_like",2],
+        "4306052436602921234": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14483314305369207554": ["convolution_gpu_bfyx_1x1",2],
+        "1930929857644673460": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12992061224471212714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11359409533744011242": ["convolution_gpu_bfyx_gemm_like",2],
+        "17054207561525574617": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9488453013746383896": ["convolution_gpu_bfyx_gemm_like",2],
+        "6479042072492268780": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10981374120597916521": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18424611729838147994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7099035779223341587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9207799012657103903": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10811224523636009881": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15866935886105967122": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8700574100180128776": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16270745071180354612": ["convolution_gpu_bfyx_gemm_like",2],
+        "10760094119259477688": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "2920322372993101148": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9169935203300589222": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15222260213708019662": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15154700439767512396": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "15710826363434377015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7332664632757815486": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "2706024586717944825": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6363788325163726004": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14847662630748580880": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7075659071934895087": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9694701402170070080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7540655869186258692": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15223164574152266895": ["convolution_gpu_bfyx_1x1",2],
+        "1309867416606346543": ["convolution_gpu_bfyx_os_iyx_osv16",195],
+        "16632786413927045192": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12672995204641007004": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "12213908871711628660": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3815222814331650224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10833423331830484028": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1470933384474984858": ["convolution_gpu_bfyx_1x1",2],
+        "151851883170419907": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1798440805196304745": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15352064186447212862": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4861982518177129729": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "9195732599757736182": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "2722062599746670336": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11070968498963106073": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "856949500975232838": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "12489973984967168447": ["convolution_gpu_bfyx_1x1",2],
+        "18180655791734632264": ["convolution_gpu_bfyx_gemm_like",2],
+        "13621339501067135142": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17705992851440826353": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2575631797904040925": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "17724604495865223459": ["convolution_gpu_bfyx_gemm_like",2],
+        "1711220333751274603": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",2],
+        "12771841901357553928": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17084977396231597605": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "7992444232916226938": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13434576226708227155": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "10437367877444543776": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4849343880559509889": ["convolution_gpu_bfyx_1x1",2],
+        "11086464266772450142": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15464554714318666871": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11799180632798787251": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17704040183891532914": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "11597391933877736800": ["convolution_gpu_bfyx_gemm_like",2],
+        "4021558014531645922": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9078447949109922472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12831123539633580270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",1035],
+        "108442764389420633": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3788462090984291082": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "15551338663759394064": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10538010212480716275": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "14173531787508017136": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12213354854947437262": ["convolution_gpu_bfyx_1x1",2],
+        "12867177334690636800": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "16585502133291740543": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11070446574652704629": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9477562342190423343": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4023281997496669037": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9967101735808367971": ["convolution_gpu_bfyx_1x1",2],
+        "2057158988261512114": ["convolution_gpu_bfyx_1x1",2],
+        "14263055580023018733": ["convolution_gpu_yxfb_yxio_b16",2],
+        "688897645422834994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6232452664016831516": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15715029280006557222": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2840794055129352139": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14905520834426630145": ["convolution_gpu_bfyx_gemm_like",2],
+        "16096353398003405565": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15997145184054496085": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11585430081839020501": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15820005010263193043": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3141886504884887200": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "5572956736535433608": ["convolution_gpu_bfyx_1x1",2],
+        "6294240435687565243": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "3427691447288240419": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14731054961557547253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6673966852801136416": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "1212319037405620223": ["convolution_gpu_bfyx_gemm_like",2],
+        "6703148006012061136": ["convolution_gpu_yxfb_yxio_b16",2],
+        "382811963722907674": ["convolution_gpu_bfyx_gemm_like",2],
+        "12617736879671137111": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10419440621736450993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17211590259060346125": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13328583512713703122": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12741457056869452536": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "415232223198122046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14985236276429954162": ["convolution_gpu_bfyx_gemm_like",2],
+        "15487730714504758208": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "4264284648458489052": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "4299773714254046691": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5321698540631249776": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "7615563770941714046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6664482192233202590": ["convolution_gpu_bfyx_gemm_like",2],
+        "3155353791103196186": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3894121333485095575": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16172528828198474326": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "9275371801303143499": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "3928356751040028375": ["convolution_gpu_bfyx_gemm_like",2],
+        "11640225461345567929": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "5955575949957198434": ["convolution_gpu_bfyx_gemm_like",1],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "669771152920944125": ["convolution_gpu_bfyx_gemm_like",2],
+        "60509335250891515": ["convolution_gpu_bfyx_gemm_like",2],
+        "14510495923021693109": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5762878778443755104": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4491380839102267034": ["convolution_gpu_bfyx_gemm_like",1],
+        "16437124655147660375": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17487594336237597163": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",527],
+        "8638227907054657946": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16574710115918192418": ["convolution_gpu_bfyx_gemm_like",2],
+        "467975197394411990": ["convolution_gpu_bfyx_gemm_like",1],
+        "8130920994920685157": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17647962002015093887": ["convolution_gpu_bfyx_gemm_like",2],
+        "6203765709597125063": ["convolution_gpu_bfyx_gemm_like",1],
+        "4714289593698160876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13609660900720370993": ["convolution_gpu_bfyx_1x1",1],
+        "10415046594066474634": ["convolution_gpu_bfyx_gemm_like",2],
+        "12624762527234542946": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9441060601228656341": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",1],
+        "15809639778580769565": ["convolution_gpu_bfyx_gemm_like",2],
+        "17587625589456309495": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16728762255357411770": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "8069537351442302814": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "2147896649835170790": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1142968634734769401": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5245526691775741296": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6300691162962736560": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "16653412888821076903": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "5074273865983613482": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9375272277044782377": ["convolution_gpu_bfyx_gemm_like",0],
+        "10465119306486335226": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17436550598696178210": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1854612313463195535": ["convolution_gpu_yxfb_yxio_b16",0],
+        "14758040027936817208": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9280431727790048190": ["convolution_gpu_bfyx_1x1",2],
+        "2807516818436584831": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "1638858323987412931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18035673326929466074": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "417352773179383568": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4339711224604149541": ["convolution_gpu_bfyx_gemm_like",2],
+        "5266313052389515491": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7946262362930618714": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14835641172229643545": ["convolution_gpu_bfyx_gemm_like",2],
+        "5150256051921098637": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14001406016806064079": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10899110544832584656": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6981294059746462667": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "9316082753126682958": ["convolution_gpu_bfyx_gemm_like",2],
+        "9996142812492415452": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13835859040765465258": ["convolution_gpu_bfyx_gemm_like",1],
+        "13472577372534605883": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5762631094740444698": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11450378244355788918": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "11031358859656806724": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "2920017342405650206": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11624226818593966530": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6461637373691101671": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "215512025430490450": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "17264608538692763688": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3522383297921565178": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "6070612528095353265": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5592526760253524303": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "8106738346643994005": ["convolution_gpu_bfyx_gemm_like",2],
+        "10656486867659934705": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "3341302541468955849": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16111630594575598044": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17439276474731842060": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10069896554844445748": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "3837190939606792435": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "739676584505475609": ["convolution_gpu_bfyx_gemm_like",2],
+        "4306881509708040723": ["convolution_gpu_yxfb_yxio_b16",2],
+        "178353385245384751": ["convolution_gpu_bfyx_os_iyx_osv16",969],
+        "1466455001976212160": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "15226556774612169126": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18255227391100087860": ["convolution_gpu_bfyx_1x1",2],
+        "16120120950870908964": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2219693989290882970": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2770397466252831892": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "4091702228990140696": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10509933181132310969": ["convolution_gpu_bfyx_gemm_like",2],
+        "166091609652531090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "47872288115972996": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2128612971571865547": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "8317673282128335201": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "12473600360154597915": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "18094592431313771787": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8787438180071123604": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2930898141522848681": ["convolution_gpu_bfyx_1x1",2],
+        "6445721440921372329": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17012832508134584917": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12264240305528403865": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4433497906256257606": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16",419],
+        "16267682394077585279": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "7752913515036871482": ["convolution_gpu_bfyx_gemm_like",1],
+        "11069983292783104310": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "4958835037528182801": ["convolution_gpu_bfyx_1x1",1],
+        "13390197134230598693": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10217182484138821482": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16780457022162749898": ["convolution_gpu_bfyx_gemm_like",0],
+        "5445584581720919223": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "17238880534517721334": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "12480527132372884168": ["convolution_gpu_bfyx_1x1",1],
+        "3106922888635965020": ["convolution_gpu_bfyx_gemm_like",2],
+        "10672380526821947133": ["convolution_gpu_bfyx_os_iyx_osv16",339],
+        "16027853590391209100": ["convolution_gpu_bfyx_gemm_like",2],
+        "6260115080574637314": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5497751772699578150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12294364015803004575": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "5940337324384948573": ["convolution_gpu_bfyx_gemm_like",2],
+        "438528596970898721": ["convolution_gpu_bfyx_gemm_like",1],
+        "3976197003067656339": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2263637493894079492": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15813044197987178947": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "17433340097721474017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9515771738501683": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12590922530749026871": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "6182829358839578529": ["convolution_gpu_bfyx_gemm_like",2],
+        "13803790014241837327": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13367787254519749641": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10178145641713631806": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1454014148777456006": ["convolution_gpu_yxfb_yxio_b16",2],
+        "49948277487706148": ["convolution_gpu_bfyx_1x1",2],
+        "16921939234324970069": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "867868384380428650": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1068155851494601726": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1157947252370351851": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2621495864635590903": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2929190644951986399": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "6115915509370042166": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "1338581414403268264": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12850195004093999773": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9532499374173117612": ["fully_connected_gpu_fb_oi_ref",1],
+        "12061567381160185735": ["convolution_gpu_bfyx_1x1",1],
+        "8444259010311137762": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "9178915201681884122": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17045386022302353268": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6107700818115209289": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15141893564826036993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8984436655107983227": ["convolution_gpu_bfyx_gemm_like",2],
+        "9920155432685318259": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "14614844213016502202": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "17766628441954343001": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "7823257556787476006": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2973337989445169388": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3010520839193613803": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8000679297338683619": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6557428245898292304": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2598267743388306204": ["convolution_gpu_bfyx_gemm_like",2],
+        "15820359925623438341": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14840851809642905875": ["convolution_gpu_yxfb_yxio_b16",2],
+        "447943521999310356": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16361932270527364507": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "4422642146063042868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17549411807772646930": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "9974905660671605427": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17010172246526353957": ["convolution_gpu_bfyx_1x1",2],
+        "18148431787172327554": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7338932272767555117": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5941092474669713339": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "598214270378842167": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "824380206255396866": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10753540518493641553": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8613740762403897614": ["convolution_gpu_yxfb_yxio_b16",2],
+        "142329025839464842": ["convolution_gpu_bfyx_1x1",2],
+        "6286349307417232815": ["convolution_gpu_yxfb_yxio_b16",2],
+        "883436333317162926": ["convolution_gpu_bfyx_1x1",2],
+        "12801481303602178879": ["convolution_gpu_bfyx_gemm_like",2],
+        "5109636469531439569": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",2],
+        "12430677767405883160": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "1173986078589662704": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11563334365673075610": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9161616741940575576": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10477588607457125173": ["convolution_gpu_bfyx_gemm_like",2],
+        "4723643671527109645": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10387844339156517393": ["convolution_gpu_bfyx_1x1",2],
+        "16549854027697846882": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "16839741351990811959": ["convolution_gpu_bfyx_gemm_like",2],
+        "16243196137456624852": ["convolution_gpu_bfyx_gemm_like",2],
+        "13020331397245585657": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "14318347197994059448": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4251673416603443503": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6677367803113594603": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15298221796479574600": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5546447512898130524": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13854845390344305906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16184142990117192433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12703696322769371912": ["convolution_gpu_bfyx_gemm_like",2],
+        "2920840796593281126": ["convolution_gpu_bfyx_gemm_like",2],
+        "2064464435352777854": ["convolution_gpu_bfyx_gemm_like",1],
+        "2066731703492755469": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "15943141845766932879": ["convolution_gpu_bfyx_1x1",2],
+        "18221867262301937903": ["convolution_gpu_bfyx_1x1",1],
+        "16956263773967652552": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "4927360358387344983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15438530452161762045": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12051595062513871723": ["convolution_gpu_bfyx_1x1",2],
+        "14288463473159113326": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15555083739490354527": ["convolution_gpu_bfyx_gemm_like",2],
+        "5479590921345335946": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "14421061973479991516": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "18076121920579110076": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "16998662249038174039": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "1240102354814495870": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "707979507145930311": ["convolution_gpu_bfyx_gemm_like",1],
+        "14795626641169374231": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "8512711227383782401": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "4738743763536059708": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6578804773136886939": ["convolution_gpu_bfyx_gemm_like",2],
+        "18180491232489548313": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "3572202652824023801": ["convolution_gpu_bfyx_os_iyx_osv16",1031],
+        "17546566148752689536": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "3816979903860227798": ["convolution_gpu_bfyx_gemm_like",2],
+        "4790960977352818689": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "4868400250190558111": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6631816968511312100": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3509502334639215181": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "3697631094971930011": ["convolution_gpu_bfyx_gemm_like",2],
+        "1467428583618467133": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "9335016444137172241": ["convolution_gpu_bfyx_gemm_like",2],
+        "12255528292506999241": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "8127853538569353431": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "1484007449719260391": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "9056812077282494074": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "7127306913758514626": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "14630499010941056793": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10209532888121442060": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "17354626928258309128": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1569111625440278287": ["convolution_gpu_bfyx_gemm_like",2],
+        "213518984547400496": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "384240534894352154": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "4732699611696731044": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15739274921308457528": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "7059729537732609153": ["convolution_gpu_bfyx_os_iyx_osv16",858],
+        "15743461017318513847": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "2778141440914991349": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4588420324030315321": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "885661562948597780": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15687441275464931484": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15094664469997373662": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "5358925179582853152": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "3610579553304450107": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "3047710665820732705": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8363432163596927598": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "11758765408733113291": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "5050495757462452653": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8399477322910720113": ["convolution_gpu_bfyx_gemm_like",2],
+        "8921169563466511475": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3727142736386026852": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "12571532345206950176": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "9552615241912277692": ["convolution_gpu_bfyx_gemm_like",2],
+        "16628180201355989101": ["convolution_gpu_bfyx_os_iyx_osv16",884],
+        "17808913959977434594": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "7000486794832106857": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "286393043958202995": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "6258191734224827354": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "18043745678739016406": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "17946191056428828467": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "6263019986730305851": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9546990560009724329": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "16462602383546733062": ["convolution_gpu_bfyx_os_iyx_osv16",1035],
+        "1350953652678789564": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "330278641539729021": ["convolution_gpu_bfyx_gemm_like",2],
+        "5912451559447635837": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "1774158624592967937": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16985912104363932350": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2908856453997530641": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "17059095074211347838": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "14668529234172928874": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "16863960779539003201": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "14346703182362139650": ["convolution_gpu_bfyx_gemm_like",2],
+        "5522698342845820411": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "10114123606924808948": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "14515066741400300669": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5280450544965361875": ["convolution_gpu_bfyx_gemm_like",1],
+        "15025260753866131193": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "604467633591545941": ["convolution_gpu_bfyx_gemm_like",2],
+        "1500571771538985941": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "7256947320128669983": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16256970928603738516": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16426179645101678763": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "10269005969451576527": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "6745633232989303110": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "12364947728685604753": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "12173409033330010794": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "419783127503173016": ["convolution_gpu_bfyx_os_iyx_osv16",564],
+        "11128727891847758901": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "1093840152689636371": ["convolution_gpu_bfyx_gemm_like",1],
+        "9714770878761308566": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15083602050538795803": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7527121935101118719": ["convolution_gpu_bfyx_gemm_like",2],
+        "5116562847410288642": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "5385395378424322451": ["convolution_gpu_bfyx_gemm_like",2],
+        "11602830611894444581": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5589350202160007768": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16761856644242716357": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "2096167792705935744": ["convolution_gpu_bfyx_gemm_like",2],
+        "3433877094202077256": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "12610004507393467447": ["convolution_gpu_bfyx_gemm_like",2],
+        "15939740070666326125": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8422541638844255768": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13082713280504953535": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8961544327690568390": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11883632480024839484": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5769404877199637961": ["convolution_gpu_bfyx_gemm_like",2],
+        "3296059171653513862": ["convolution_gpu_bfyx_gemm_like",2],
+        "9968496035529786888": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "3664842151999943": ["convolution_gpu_bfyx_gemm_like",1],
+        "11539652577193034099": ["convolution_gpu_bfyx_os_iyx_osv16",300],
+        "2524233418633897945": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "3743573500773847162": ["convolution_gpu_bfyx_os_iyx_osv16",506],
+        "12541834857357563605": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "3813463368918975003": ["convolution_gpu_bfyx_gemm_like",2],
+        "7530197659550301431": ["convolution_gpu_bfyx_gemm_like",2],
+        "9700098364581157575": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4269447138276727632": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "1061595672605627170": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "7569785094993085356": ["convolution_gpu_bfyx_gemm_like",2],
+        "11504777464995699839": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "8224143262995973449": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1501328995320618233": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "12069726772532946193": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "16197538586133639338": ["convolution_gpu_bfyx_gemm_like",1],
+        "237384442106085756": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "15972830392998437739": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "15421166985948480394": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "6794427012971589670": ["convolution_gpu_bfyx_gemm_like",2],
+        "2420425134749678611": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "8050798452111667069": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5824801192141531089": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "3668065353749623655": ["convolution_gpu_bfyx_os_iyx_osv16",1022],
+        "4251588408225461731": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "11113256687741667688": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16582132711225619740": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16957170318200599740": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12644942072153919043": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17715478364817621621": ["convolution_gpu_bfyx_gemm_like",2],
+        "2854124603710900850": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "9380980604821454646": ["convolution_gpu_bfyx_gemm_like",1],
+        "1879844536951785808": ["convolution_gpu_bfyx_gemm_like",2],
+        "1086052166358768751": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "861813331533609605": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "17434429579652310107": ["convolution_gpu_bfyx_gemm_like",2],
+        "14011124615649605281": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16440449399643706863": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11733721371402545268": ["fully_connected_gpu_fb_io_ref",2],
+        "15816540550252147706": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8285478622349266483": ["convolution_gpu_bfyx_os_iyx_osv16",914],
+        "13979227237506927267": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "10492401059875127091": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "12992194515157698316": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "15891662883560480723": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "4600698444492242585": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "7157064096682175957": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "14221578799010900252": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "9213886570531053949": ["convolution_gpu_bfyx_os_iyx_osv16",429],
+        "1285313118947640320": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15430549683839591544": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "15858485865603722138": ["convolution_gpu_bfyx_gemm_like",2],
+        "2116524516810466877": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "12182468247297592907": ["convolution_gpu_bfyx_gemm_like",1],
+        "6949539207944972855": ["convolution_gpu_bfyx_gemm_like",2],
+        "5582107298039488951": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "11773726534842908728": ["convolution_gpu_bfyx_os_iyx_osv16",187],
+        "14385185911482960528": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "8844619836383523698": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "14548629377527143409": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13366059704398720237": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "18349087959351486710": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "15868648764972133201": ["fully_connected_gpu_fb_oi_ref",1],
+        "15322019609805777935": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "4451257789691974239": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1480287432874335824": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "13657522194775317201": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "8032685176029570383": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "4334698056820320220": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "15378707205730840765": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5977248663249062384": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3170785962566427770": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2710485608298356329": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2585176064846114298": ["convolution_gpu_bfyx_gemm_like",2],
+        "18337975902615310907": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "6768322540857745605": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "13657774210341324470": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3072535365860940873": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14230197617570499447": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "10049329759351957685": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "10305912614137623024": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3896848534552901221": ["convolution_gpu_bfyx_gemm_like",2],
+        "7405835196787288054": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "7020655100877544328": ["convolution_gpu_bfyx_gemm_like",1],
+        "13174363822969694054": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13232269620066140073": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "13608239208821071914": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "17026284168840448378": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7866128397931438774": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "8264178890341675354": ["convolution_gpu_bfyx_os_iyx_osv16",1033],
+        "10317038568333963064": ["convolution_gpu_bfyx_os_iyx_osv16",694],
+        "2180753144963020203": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15271492161940795681": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "13272818502368975319": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12281346074445607180": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "570683988452622223": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "8451179695288093195": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "2085738943081638802": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15563546888345388359": ["convolution_gpu_bfyx_gemm_like",2],
+        "8525389694584008001": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "2481005139798378616": ["convolution_gpu_bfyx_os_iyx_osv16",1062],
+        "574359978358296617": ["convolution_gpu_bfyx_gemm_like",2],
+        "15764181772410734606": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9217386935739152562": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "12161602271403760008": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "9758907700230386910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8707189142909022305": ["convolution_gpu_bfyx_gemm_like",2],
+        "1375259485223819020": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9053383117071470496": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6261121070004228939": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1112828128944231163": ["convolution_gpu_bfyx_gemm_like",1],
+        "5843679089588930933": ["convolution_gpu_bfyx_gemm_like",2],
+        "11083777913844441475": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "1923745286075356181": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "3827177373408316820": ["convolution_gpu_bfyx_gemm_like",1],
+        "5488168361113140102": ["convolution_gpu_bfyx_gemm_like",1],
+        "7982628452987720190": ["convolution_gpu_bfyx_gemm_like",2],
+        "8140242320379485952": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "15615172858007002100": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "1653274345637156919": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15210302033167762581": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "17392347485675658099": ["convolution_gpu_bfyx_gemm_like",2],
+        "6574971185849732667": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "4202645222013675478": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "1231806423322813287": ["convolution_gpu_bfyx_gemm_like",2],
+        "166267183356660549": ["convolution_gpu_bfyx_gemm_like",1],
+        "8281212003098870446": ["convolution_gpu_bfyx_gemm_like",0],
+        "14650273075211365393": ["convolution_gpu_bfyx_gemm_like",1],
+        "6928835003016610382": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "12012860334670244716": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "15646774522467486699": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "15126660425728872065": ["convolution_gpu_bfyx_os_iyx_osv16",200],
+        "18265901700619296616": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1653438360841004980": ["fully_connected_gpu_fb_oi_ref",2],
+        "6103824715103416420": ["convolution_gpu_bfyx_gemm_like",2],
+        "15409755591665753258": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "16946947983339327902": ["convolution_gpu_bfyx_gemm_like",2],
+        "6431838057506760173": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "14705457019471647279": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "6801897580177846120": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "8656468860180713379": ["convolution_gpu_bfyx_os_iyx_osv16",472],
+        "16801553481899627402": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "5339358831190803597": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11732742421854164761": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6854611304056079417": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "14568560907026487922": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "4184442166820068862": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "17967188184891337660": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16108759090923335184": ["convolution_gpu_bfyx_gemm_like",1],
+        "5109770354438894645": ["convolution_gpu_bfyx_gemm_like",2],
+        "4691552892932405676": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7331552952865138030": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3332334993503432420": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1422402723172447295": ["convolution_gpu_bfyx_gemm_like",1],
+        "14292252222828824305": ["convolution_gpu_bfyx_gemm_like",2],
+        "18180820925685532104": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "16695020005258780885": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "6129884455218252024": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "13772598362521854438": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "9940908487812223059": ["convolution_gpu_bfyx_gemm_like",2],
+        "4753055238892504599": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "15803050672115583478": ["convolution_gpu_bfyx_gemm_like",1],
+        "3154903035376733831": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9191832520273617003": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "15778834188130183853": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "7557446085365037177": ["convolution_gpu_bfyx_os_iyx_osv16",686],
+        "6213353364768643062": ["convolution_gpu_bfyx_gemm_like",2],
+        "4035015193331696438": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "4368522743441422202": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "10308175009371219583": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "15974241934088373021": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "17254775053427612466": ["fully_connected_gpu_fb_oi_ref",1],
+        "447683677378974131": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "14244966672894707129": ["convolution_gpu_bfyx_gemm_like",2],
+        "7946776740333736799": ["convolution_gpu_bfyx_gemm_like",2],
+        "15496355513574200965": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "9239048433297419320": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12971833748980664090": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "1810943242998123550": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9767355861002822967": ["convolution_gpu_bfyx_gemm_like",2],
+        "14211903923555028634": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "13713406612642090169": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "14923692894655929923": ["fully_connected_gpu_bf_io_gemm",0],
+        "11805311302922325617": ["convolution_gpu_bfyx_gemm_like",2],
+        "9788704336046308724": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "15383553612351941890": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16590030963319267708": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "10797908931694274013": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "14150012830816329527": ["convolution_gpu_bfyx_gemm_like",2],
+        "6739799137687789012": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "5389189982064081933": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "17442035600389810700": ["convolution_gpu_bfyx_gemm_like",2],
+        "10890975553758439233": ["convolution_gpu_bfyx_gemm_like",1],
+        "15713964605078748923": ["convolution_gpu_bfyx_gemm_like",2],
+        "12409554044517232554": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "9796347091019799053": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "17508987219281192918": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "8670512344429807851": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13951781924205611716": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "17596685300497748803": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13206826317378863148": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "13727585908419292912": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6443517114667332732": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "6996679663761370444": ["convolution_gpu_bfyx_gemm_like",1],
+        "13915749401892931804": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "16596028606733932975": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "4198666727524342442": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "16125365972873290572": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "9040046051053703359": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "11918018989601427118": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6714886136800883594": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "7312862821818362095": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "8357109553923988018": ["convolution_gpu_bfyx_gemm_like",2],
+        "8730097760819044515": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "6218328594667952152": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3939977982577786175": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "14991602704357959545": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "16146350476627599543": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "18112958483003382733": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "8507854696766492454": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "5556023021504556658": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "17740553615487239243": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8684867236134349888": ["convolution_gpu_bfyx_os_iyx_osv16",193],
+        "13607830451968188080": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "18235067315439611192": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "13603318842632052764": ["convolution_gpu_bfyx_os_iyx_osv16",380],
+        "17243576882981097341": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4965629769516591986": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "13537323999534292650": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "6233612563637601101": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "10173283505468233128": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "3214253333840552610": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6478054912653910426": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "2458592904274981909": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "9154705094446538279": ["fully_connected_gpu_fb_oi_ref",0],
+        "15916505622570323098": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "6008613375871089139": ["convolution_gpu_bfyx_os_iyx_osv16",755],
+        "6513705142577622089": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "1766961036311612128": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2862999234347597091": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "977617597166653416": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17182558720652199559": ["fully_connected_gpu_fb_io_ref",1],
+        "17854138024884397413": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "8426489532875918560": ["convolution_gpu_bfyx_gemm_like",1],
+        "17869697579874327192": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10928995765778560784": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "770376597027620107": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "17683350638672326642": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "17790954200356837750": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "10482582307328548806": ["convolution_gpu_bfyx_os_iyx_osv16",3],
+        "14349335089732252796": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11033507346101404633": ["fully_connected_gpu_fb_oi_ref",2],
+        "13775529405693629438": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "9459869325970475576": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "2542506456395240890": ["convolution_gpu_bfyx_gemm_like",1],
+        "4499586349553581439": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "16067605128297748820": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16510194749934323304": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "12952160708294444403": ["convolution_gpu_bfyx_gemm_like",2],
+        "11541706477255587105": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "1059505639883914386": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1771663698943903325": ["convolution_gpu_bfyx_os_iyx_osv16",175],
+        "17771487895874668302": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "8762901342272872498": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "412314676462573090": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7367814057959247537": ["convolution_gpu_bfyx_gemm_like",2],
+        "8728178019712933221": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "1192709652314183388": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12427490329663434604": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "13170031087212196468": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8140094412609934765": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12381377111003298809": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1760690277175249985": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "16710651492402564794": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "13447028922679236865": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "18026468427978643933": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "17285699593273891901": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "1471837664358450291": ["convolution_gpu_bfyx_gemm_like",2],
+        "9963817056423168830": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "13388424034634316547": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "2780358937598873103": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "587350550384936211": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12534001599784153836": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "12626014184575881530": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "4082046235109198108": ["convolution_gpu_bfyx_gemm_like",1],
+        "2317476796706098254": ["convolution_gpu_bfyx_gemm_like",2],
+        "1561225943337590599": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "14296771090926462138": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "10853161782230763798": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8390953788659916133": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2310549887200001260": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4854802313728023001": ["convolution_gpu_bfyx_os_iyx_osv16",621],
+        "11264412030568042996": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "18277685132620834972": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "5906083739416582743": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8707484843981694525": ["convolution_gpu_bfyx_os_iyx_osv16",1021],
+        "2947753291378607664": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "17585852525746136080": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "2303141161423252932": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14039055710777697188": ["convolution_gpu_bfyx_gemm_like",2],
+        "3919577663893354177": ["convolution_gpu_bfyx_gemm_like",1],
+        "16578265652036967656": ["convolution_gpu_bfyx_gemm_like",2],
+        "7958459862276998225": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5643908654122573882": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like",2],
+        "9259437778054905599": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "7272538316511343863": ["convolution_gpu_bfyx_gemm_like",2],
+        "12245096462203481681": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "1465692634334679413": ["convolution_gpu_bfyx_gemm_like",2],
+        "13439272015824246074": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "15781220232431782560": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2590380836212070761": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10437861085319472289": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "707449835235490641": ["convolution_gpu_bfyx_gemm_like",1],
+        "11473442921040533207": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "14667209474639064623": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15984373369388044924": ["convolution_gpu_bfyx_gemm_like",2],
+        "1486768204660092247": ["convolution_gpu_bfyx_gemm_like",1],
+        "8360628955300060520": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "12808154347573074859": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1131384986902172221": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "5831419373611158773": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11051434650031832658": ["convolution_gpu_bfyx_gemm_like",1],
+        "3623695848220673001": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "2172636954267255416": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9631481972809246378": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "14522844693999581518": ["convolution_gpu_bfyx_os_iyx_osv16",750],
+        "12136458184046915563": ["convolution_gpu_bfyx_gemm_like",0],
+        "13115589642140732066": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "2654793073145467058": ["convolution_gpu_bfyx_gemm_like",2],
+        "1967810052096853804": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14068780861332616363": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "6796998865297819946": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3314459110790355757": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "13193571607788569533": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15197248015210313435": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15911434513425038508": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4534480875955599254": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11253790393313445931": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7472330881076141262": ["convolution_gpu_bfyx_gemm_like",1],
+        "2044363708106765326": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "11338906515425639970": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "789359733867650915": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "16173557782125372935": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "14133509766683767462": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15411474884532403722": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "1116274074896622552": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "7995002764260542332": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15380105196319354141": ["convolution_gpu_bfyx_os_iyx_osv16",481],
+        "17732250360268013336": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "1622731194539871461": ["convolution_gpu_bfyx_gemm_like",2],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",1],
+        "7918742312252115870": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "12990341489637414845": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15115440616185035720": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4927139127938739019": ["convolution_gpu_bfyx_gemm_like",2],
+        "13131740479277027362": ["fully_connected_gpu_bf_io_gemm",1],
+        "1081962464388501987": ["convolution_gpu_bfyx_os_iyx_osv16",873],
+        "15882969506682501496": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "1362239912535573615": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "2230884858122788172": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12771805545455650546": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "2007192658799516915": ["fully_connected_gpu_bf_io_gemm",1],
+        "6489645404977288242": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "4229105529069729944": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "8241070786700614317": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10743138314323119696": ["convolution_gpu_bfyx_gemm_like",2],
+        "18136135457402651842": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "768765852586619095": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16396393355098283060": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12392988351482826871": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15399245700982979379": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10485534959656860449": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "13083981648347252910": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "2248628426797793532": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "2498920887656279332": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12864338805958186191": ["convolution_gpu_bfyx_gemm_like",2],
+        "5124645583449732785": ["convolution_gpu_bfyx_gemm_like",2],
+        "15024023281204917061": ["convolution_gpu_bfyx_gemm_like",2],
+        "11331539079347079374": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "11857822504978122919": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "10309504812060596568": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "11665313746896806563": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "14911763273270477925": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2096021095904820251": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12010294231983179604": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "7877256119877423528": ["convolution_gpu_bfyx_os_iyx_osv16",489],
+        "18243018097656671503": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "8061914949376516780": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11314436000791223218": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "9516102312850256675": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "14188045559946481097": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "2418288192668085805": ["convolution_gpu_bfyx_gemm_like",2],
+        "15669490019428002270": ["convolution_gpu_bfyx_os_iyx_osv16",986],
+        "3215659303601163167": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14261214737408786954": ["convolution_gpu_bfyx_os_iyx_osv16",621],
+        "7336911146060959485": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "5796500397424307442": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "1364546124782880196": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "11062100629646715785": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11234976958917093838": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "7058458405375602606": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "13654408396081513312": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "1593086572473375988": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "13387766889016280910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5966963943739041502": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "13267743753217317315": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "16431857516454692096": ["convolution_gpu_bfyx_os_iyx_osv16",185],
+        "15914512645931208899": ["convolution_gpu_bfyx_gemm_like",2],
+        "16103653667647559851": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "17025997656996518171": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10935410906182995784": ["convolution_gpu_bfyx_gemm_like",1],
+        "15749335301736571135": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6362453779168658462": ["convolution_gpu_bfyx_os_iyx_osv16",273],
+        "481328129206881674": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "14541063954080306476": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "13210604117940125947": ["convolution_gpu_bfyx_os_iyx_osv16",120],
+        "11058082057683584650": ["convolution_gpu_bfyx_gemm_like",2],
+        "6750269489578112382": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17774979615691038302": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "156456996459945842": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "3219239043521617253": ["convolution_gpu_bfyx_gemm_like",2],
+        "10973647655853229395": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "1521992965089360209": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4145496852718466030": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2317409971670298599": ["convolution_gpu_bfyx_os_iyx_osv16",501],
+        "10966081583785531511": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13745327504866194229": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "390943380079040179": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "2999825793036702585": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4692951005189464579": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",1],
+        "9905716283229191208": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4860019935631927113": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "1835975757316320402": ["convolution_gpu_bfyx_gemm_like",2],
+        "18265020664540913473": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1444256562477852389": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "8510044123592842725": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "10689303050557631712": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "390219891876240081": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7072606962946873975": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "10838972820886273680": ["convolution_gpu_bfyx_gemm_like",2],
+        "15682441855379046778": ["convolution_gpu_bfyx_os_iyx_osv16",130],
+        "16833854122884184025": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12014527187730671229": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "9525853014023664813": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "14719871224178118299": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14880517974968280393": ["convolution_gpu_bfyx_gemm_like",2],
+        "10990741293315393791": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9696588462876533517": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "11964639701912187118": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10429613013253088132": ["convolution_gpu_bfyx_gemm_like",2],
+        "3831261590121101287": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "12190841837604350271": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4254313567858225805": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "1190134214210434381": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "2894138412746654795": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "11378458002317912396": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "18337160891834020517": ["convolution_gpu_bfyx_os_iyx_osv16",151],
+        "16446533347502650316": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "12584692605608021657": ["fully_connected_gpu_fb_oi_ref",1],
+        "907233163535348999": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11510063368067539341": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3164513064874019611": ["convolution_gpu_bfyx_gemm_like",2],
+        "5298952273692538291": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "8382509515623938786": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "14013561425708390846": ["convolution_gpu_bfyx_gemm_like",2],
+        "7801270668419570665": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "11188849626443657384": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13296566345005640760": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "4165920860392215245": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7905503566052181015": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "15872143905824807656": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "10983344268706058114": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "5553176511624221429": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "16033144151193421543": ["convolution_gpu_bfyx_gemm_like",2],
+        "2571882179292959757": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "13810716860158972470": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "17222005830854879661": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "16264774056719724826": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "1919460437053604108": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "12767115494378788592": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "9861846661532177405": ["convolution_gpu_bfyx_gemm_like",2],
+        "7419990519344756626": ["convolution_gpu_bfyx_os_iyx_osv16",1070],
+        "13660573428614001128": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2305706332728008948": ["convolution_gpu_bfyx_gemm_like",2],
+        "2704063557078535883": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "1104489643524273315": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "6620782733027313312": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "4607013085883384144": ["convolution_gpu_bfyx_gemm_like",2],
+        "2721793280965260548": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "9486447779233331380": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "12096396455109952715": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "15509845164085518352": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6525052296614701517": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "13709111882513486557": ["convolution_gpu_bfyx_os_iyx_osv16",617],
+        "12277470820821378855": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3475757648408068589": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "14599150265057284139": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "7678168522030142454": ["convolution_gpu_bfyx_gemm_like",2],
+        "8799427328659766574": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15384168056682476462": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "1801066876009461857": ["convolution_gpu_bfyx_gemm_like",1],
+        "13787155972060672772": ["convolution_gpu_bfyx_gemm_like",1],
+        "4974435385259831818": ["convolution_gpu_bfyx_gemm_like",2],
+        "16108573960501496757": ["convolution_gpu_bfyx_gemm_like",2],
+        "15184258464890250739": ["convolution_gpu_bfyx_gemm_like",2],
+        "7550660458541314838": ["convolution_gpu_bfyx_gemm_like",2],
+        "11367813096511965002": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "11393439616752806572": ["convolution_gpu_bfyx_gemm_like",2],
+        "3752171257634205726": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "13733327241591630239": ["convolution_gpu_bfyx_os_iyx_osv16",752],
+        "838825600917352376": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9383222411929463824": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "3192518239721798250": ["convolution_gpu_bfyx_gemm_like",2],
+        "12478914547444399288": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12609361477548272638": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10036998353100219512": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "17525531790109748810": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "5334566325056222430": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "17248756229500447131": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13212959214376905822": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "1724898827344855006": ["convolution_gpu_bfyx_gemm_like",1],
+        "10890538764006500546": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12978004383198641522": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "13991205023798493715": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "18166732758694978380": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7727871584058599163": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6171845068913882721": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5331173521406046122": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "14113510820933411052": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "1107027047188366075": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "6897348673467297407": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "15191864907092681849": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6090625728451718945": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16569200335969311660": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "11642941943446484202": ["convolution_gpu_bfyx_os_iyx_osv16",516],
+        "12825029449351875037": ["convolution_gpu_bfyx_gemm_like",1],
+        "12818953631784587919": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "9654726486719966937": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10158890414412187141": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8367989677286805427": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "15953607231296296913": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9255337426504113924": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6762862978340755053": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8374345306483326015": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "10386584706491193379": ["convolution_gpu_bfyx_gemm_like",2],
+        "18067353229273804720": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "3588791913550955553": ["fully_connected_gpu_fb_oi_ref",1],
+        "5047419871737940985": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "6078344073564209080": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1172103288112689821": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "15492793021506324472": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "9604982746455852556": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "801486567558674495": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "15652392678782222737": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5570311824197099845": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3701795558556637835": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8369833730195120673": ["convolution_gpu_bfyx_gemm_like",2],
+        "7103345484511147373": ["convolution_gpu_bfyx_gemm_like",2],
+        "4412343276595791077": ["convolution_gpu_bfyx_gemm_like",2],
+        "1596472719837608525": ["convolution_gpu_bfyx_gemm_like",2],
+        "475043738497218394": ["convolution_gpu_bfyx_os_iyx_osv16",518],
+        "15636407980943172317": ["convolution_gpu_bfyx_gemm_like",2],
+        "2816982827037092536": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "3469963495451100978": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "9386678255270055573": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "172584114180442549": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "3828569468687251275": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1403617451623027879": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "18273537339378756543": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "5513667102916409932": ["convolution_gpu_bfyx_gemm_like",2],
+        "8104309105061227444": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "15112118829970177073": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "8566695253227825439": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "654821507679356726": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "13002723770137829128": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8511244943596227719": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1745930004673880589": ["convolution_gpu_bfyx_gemm_like",1],
+        "3603706453982734995": ["convolution_gpu_bfyx_os_iyx_osv16",551],
+        "12707748441880165396": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3277243911383750280": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "8402692278765063674": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "3835286851569826052": ["convolution_gpu_bfyx_gemm_like",2],
+        "6848989271874647093": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "15952399564161253450": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12063837066704136739": ["convolution_gpu_bfyx_gemm_like",1],
+        "12782191856884962803": ["convolution_gpu_bfyx_gemm_like",2],
+        "1330842758352650583": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "4007319206075386920": ["convolution_gpu_bfyx_gemm_like",2],
+        "1592619919721912789": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "984472462878596435": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "813347941036099284": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "7398196853452900099": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "15091825614924466766": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "2379484884827231127": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "3436433254188539886": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "17997314629342774968": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "7334966010680206302": ["convolution_gpu_bfyx_gemm_like",2],
+        "3524702814173574637": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "340606466693982406": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "544003022213487787": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "11632275875447013409": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6948696390129114563": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "4563529605364580848": ["convolution_gpu_bfyx_os_iyx_osv16",131],
+        "2124776616364429517": ["convolution_gpu_bfyx_gemm_like",1],
+        "2946926779445063554": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "11240189248024145687": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "4494583230309471319": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "12218337369633748663": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "8104609318998060422": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "12707946849050970702": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "16587078304821304948": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "237302155033013557": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "13810995219720233595": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "15299926486228458704": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "10548792624072794724": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "11031625790234068916": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "10879183694331631189": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15675968397825708285": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "3745433390861789238": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6275903692904946376": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "1540041682425757361": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "572265264921910408": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "4867937397499803072": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "276407276027553756": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "3747518910079195578": ["convolution_gpu_bfyx_os_iyx_osv16",103],
+        "15198419554644505600": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17073183514200378702": ["convolution_gpu_bfyx_os_iyx_osv16",667],
+        "8611417708673038653": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "8375778282166369933": ["convolution_gpu_bfyx_gemm_like",2],
+        "6831045740006076251": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "4274801141127703532": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6577754887650563753": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "11775667915453535428": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "8898910394425958745": ["convolution_gpu_bfyx_gemm_like",2],
+        "15781622938833984014": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "8035084960535483680": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "3498490999014554104": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "6065819201836017182": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17870874477143985774": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7873648177300629037": ["convolution_gpu_bfyx_gemm_like",2],
+        "18134140047840716203": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "12046638414686283134": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10008202802779981732": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "954347958041231578": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "5871082277006078841": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "6137405768481559638": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "1411786954276574458": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "9105388853296359769": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",2],
+        "11795686089670429481": ["convolution_gpu_bfyx_gemm_like",2],
+        "9437794960375526230": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "4224423702382859092": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2270733937722366926": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "5646139101524964833": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "3239100076064406977": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3730238135300250205": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5219399418946822456": ["convolution_gpu_bfyx_gemm_like",2],
+        "7227174766917523481": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "17772882818194611202": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "18375125668176498051": ["convolution_gpu_bfyx_gemm_like",2],
+        "16091165907421819456": ["convolution_gpu_bfyx_gemm_like",2],
+        "7726714223809300966": ["convolution_gpu_bfyx_gemm_like",1],
+        "13926730608213207277": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11533151357949131860": ["convolution_gpu_bfyx_gemm_like",2],
+        "14805212478405698245": ["convolution_gpu_bfyx_gemm_like",1],
+        "2738256633362038820": ["convolution_gpu_bfyx_gemm_like",2],
+        "9468314291932574827": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "8324250071425605671": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6579950270997373448": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "4381329435655511217": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "1426606766274640878": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "5953754321266570854": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "14827538610133799379": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4920194716156732643": ["convolution_gpu_bfyx_gemm_like",2],
+        "9740466267717175474": ["convolution_gpu_bfyx_gemm_like",2],
+        "6755802278188792577": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "4417341352109525283": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8442368383427915597": ["convolution_gpu_bfyx_gemm_like",1],
+        "11311859068168414878": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "4437258459981739942": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "3693042354944382600": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12390011660072693092": ["convolution_gpu_bfyx_gemm_like",1],
+        "12112853999307505628": ["convolution_gpu_bfyx_gemm_like",2],
+        "12425310792514818973": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14792528369891965810": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10058165874008941852": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "17331582127656317117": ["convolution_gpu_bfyx_gemm_like",1],
+        "13492216433886201174": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2338535084014610258": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16611452077660879545": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "724953082687879224": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "954796765467489259": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "4396653960950462197": ["convolution_gpu_bfyx_gemm_like",1],
+        "5825664545247017348": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "3087801652564627458": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "4624363818743696582": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "152263592822875549": ["convolution_gpu_bfyx_gemm_like",2],
+        "17015791782274123780": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "16831114690704826637": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "17350963651826443169": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7277156316894715321": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "9714764457768279762": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17559685912375493682": ["convolution_gpu_bfyx_os_iyx_osv16",92],
+        "9083686317073801642": ["convolution_gpu_bfyx_gemm_like",1],
+        "311101627084421734": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "3362190082518348071": ["convolution_gpu_bfyx_gemm_like",2],
+        "15349944413643626251": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10405183426600618231": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "15609627722687211129": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "14962768577232034246": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "11077876432364512822": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "10322586483496198615": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7154364270315480182": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "9947693652506812817": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "13593258537178247801": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "16758962840329202004": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "1077224320045437593": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "9481675228591993785": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "2999633429402781278": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "2184670359551186734": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17798626036576472760": ["convolution_gpu_bfyx_os_iyx_osv16",545],
+        "14705509109623500235": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "11318913630213187720": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "8079376692609682448": ["convolution_gpu_bfyx_gemm_like",0],
+        "4585891362157592384": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "5748047690737461635": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "173772845058977237": ["convolution_gpu_bfyx_os_iyx_osv16",512],
+        "6899658518070473523": ["convolution_gpu_bfyx_gemm_like",2],
+        "9455406830371528486": ["convolution_gpu_bfyx_gemm_like",1],
+        "3027775502561362722": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "3909551222373722085": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "1006828591724642933": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "17281202179589913619": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "12136625628940225638": ["convolution_gpu_bfyx_gemm_like",2],
+        "14253275166085865948": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "9875997976286355123": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "14017025411515888007": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "12022152681602871455": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14077148976508649021": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "13140254055376365092": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17224181038411430675": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "4476218615403440835": ["convolution_gpu_bfyx_gemm_like",2],
+        "11465965972527519631": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8045697952241865861": ["convolution_gpu_bfyx_gemm_like",2],
+        "8109572327736409899": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "10085059621136526248": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12325592439309417414": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "18280672126778847258": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "5406129421969383274": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "5469227748156438008": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "14725765847498813247": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2014911634432127630": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9835338452418388180": ["convolution_gpu_bfyx_gemm_like",2],
+        "16912035321030511639": ["convolution_gpu_bfyx_gemm_like",1],
+        "5701438170070600512": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "1499841226042523429": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "9823752892549805496": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "7915318733663535312": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "9101571410887509600": ["convolution_gpu_bfyx_gemm_like",0],
+        "3499243120652875549": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8083672466967374860": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14974730512607138726": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "16820082917500285799": ["convolution_gpu_bfyx_gemm_like",2],
+        "9410125656044318792": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "1818433662409886324": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15381833359831622179": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "2827850900421982274": ["convolution_gpu_bfyx_gemm_like",1],
+        "11507538232733291666": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "536646811796032046": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "18167100055915766856": ["convolution_gpu_bfyx_gemm_like",1],
+        "14184440545916228597": ["convolution_gpu_bfyx_gemm_like",2],
+        "9068406831482072377": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16190949264253468961": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "475665035119038846": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "4172485608495372888": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "13696782397412896129": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6056291179600370019": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "14492935486352505845": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4316519748653705692": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "12246408434917478929": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "16453041919970581620": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "14696479950182046016": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "1925626127045202964": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "16614170159588864300": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "7185832253431234935": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14004715832115880216": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "7157531901512507924": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "14681705641267917886": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5192552432194195116": ["convolution_gpu_bfyx_gemm_like",2],
+        "4872433441839808585": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "8006738296385794413": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "6067904130482758510": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13503688893307029975": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "6914775146138105785": ["convolution_gpu_bfyx_gemm_like",2],
+        "12085348936192462321": ["convolution_gpu_bfyx_gemm_like",2],
+        "6942622405269419082": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "2451712485584835395": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "9305957796037500628": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4040607776348275579": ["convolution_gpu_bfyx_gemm_like",2],
+        "7757331094141318304": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "7088331918128954410": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "9377779605078400305": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "4476928353532757380": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1332624116953483870": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "6740545361286720494": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "17306482303091342504": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15490478608105402679": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "9548658329589481069": ["convolution_gpu_bfyx_gemm_like",2],
+        "13459514533473657102": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "5280182001774668876": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1142725391726703078": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7876355212013100281": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "14435120971846098308": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "3563614453014995411": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "13119479079474639169": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "11077503608116183709": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "2363414141971004557": ["convolution_gpu_bfyx_gemm_like",2],
+        "9019451572520595738": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2111049986724040641": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "6610054713068442549": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "13163026305514410688": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "6232363902828992968": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "6419580456182610836": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "7338578624767544128": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "5805383505505929391": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "13491221531603384511": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "12038525298168664305": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1015184966858657992": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11022847760121601465": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3281207855459771997": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7807983899017500046": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4010329161090285019": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "9133263538092913983": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6722358544720547260": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "5553779954745929430": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13869716373706247686": ["convolution_gpu_bfyx_gemm_like",2],
+        "169973842603492802": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14203061085285979556": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5361028467247182860": ["convolution_gpu_bfyx_gemm_like",1],
+        "11630475290242283451": ["convolution_gpu_bfyx_gemm_like",2],
+        "16768470780681544910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "7480968533463196410": ["convolution_gpu_bfyx_gemm_like",2],
+        "13818587810073749596": ["convolution_gpu_bfyx_gemm_like",1],
+        "12700051513124813499": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "18386376129938707290": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "861419637283812778": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "11333068902248367382": ["convolution_gpu_bfyx_gemm_like",2],
+        "13219865669259079983": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "9700592037514669700": ["convolution_gpu_bfyx_gemm_like",2],
+        "10105539975183207700": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "4239415134522959352": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "9553032671453999824": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "12170874893413205000": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "2096779676054335057": ["convolution_gpu_bfyx_gemm_like",2],
+        "8325686349100774855": ["convolution_gpu_bfyx_gemm_like",2],
+        "8413117662038329068": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2904162348196990593": ["convolution_gpu_bfyx_gemm_like",1],
+        "17542176922797334839": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3527012447011885981": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "5230406405159608187": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "8779947213821605681": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "12985650543127289023": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7505966294864890221": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1213958002895787672": ["convolution_gpu_bfyx_gemm_like",2],
+        "380316849107383484": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8575296926578119953": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "17641033958594901664": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17087740929472936216": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "9105431502075531641": ["convolution_gpu_bfyx_gemm_like",2],
+        "17163158934005653629": ["convolution_gpu_bfyx_os_iyx_osv16",428],
+        "10794662801660960189": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "14579042972443651846": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "13403617010417893318": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18242682488017822077": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "6149261133858739754": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "4185398348055518182": ["convolution_gpu_bfyx_os_iyx_osv16",420],
+        "13088023076667575514": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "14910223536998380801": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "14277843123789500234": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "1370827524176794227": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "12293705794290797805": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3034947396960425753": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "11680829908738480957": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "316225690176910392": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "787363431787954804": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "17236135174912837061": ["convolution_gpu_bfyx_gemm_like",2],
+        "6851536988434597530": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6612643056203714506": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "3446991010350155849": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15071888879264671307": ["convolution_gpu_bfyx_os_iyx_osv16",104],
+        "1228256819256996416": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "17118569850095586049": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "16201999154635899927": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "6235096928786525260": ["convolution_gpu_bfyx_os_iyx_osv16",337],
+        "11493371521058673700": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "7179714714302073459": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "3106710091841093202": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "699127221549844251": ["convolution_gpu_bfyx_gemm_like",2],
+        "7998930863626763670": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "11129224786768161139": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "4631772220201098020": ["convolution_gpu_bfyx_gemm_like",2],
+        "7536287105029319189": ["convolution_gpu_bfyx_os_iyx_osv16",1054],
+        "10412748832841674068": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7385295618478993079": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5934841294975212773": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "14815498807515058447": ["convolution_gpu_bfyx_os_iyx_osv16",278],
+        "13773898185415904435": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "16997897512818072938": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "6769243149577568817": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "2995134938466176198": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6214194654733781771": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "13358283026528078900": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "11956435900037329302": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "12201437677145858979": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "16067821671414842756": ["convolution_gpu_bfyx_gemm_like",1],
+        "11191071895289217783": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6542417269641204414": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9226443907548972870": ["convolution_gpu_bfyx_gemm_like",1],
+        "6948606378949354116": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "4652308622880770983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3285688984628545255": ["fully_connected_gpu_fb_io_ref",1],
+        "17396226612787250663": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4695182996147218495": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "17235360775064303316": ["convolution_gpu_bfyx_gemm_like",2],
+        "14906458674793172507": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6402941068107243403": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12166710900466116000": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17358006976602795707": ["convolution_gpu_bfyx_gemm_like",2],
+        "14359530849521980269": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "5680888227752935228": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "13288543822410746011": ["convolution_gpu_bfyx_gemm_like",1],
+        "1603703756241612948": ["convolution_gpu_bfyx_gemm_like",2],
+        "18133334552107213128": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "18084635102736402756": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "2820364088001594654": ["convolution_gpu_bfyx_os_iyx_osv16",573],
+        "14513925709624513868": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "13244693761392741931": ["fully_connected_gpu_fb_oi_ref",0],
+        "916389941321470163": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "12211848608269437730": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2806529556090896246": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "1706927777850488363": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "1898912620350738645": ["convolution_gpu_bfyx_gemm_like",2],
+        "5849577829817109757": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "12811104880512633036": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "10736915975072972467": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15047163348308549816": ["convolution_gpu_bfyx_gemm_like",1],
+        "6673690359191617215": ["fully_connected_gpu_fb_oi_ref",1],
+        "14274685812676150168": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",2],
+        "13019190248083899887": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "123251351612308092": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "9318652504803279936": ["convolution_gpu_bfyx_gemm_like",2],
+        "2345023488044002149": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "9692949270906064580": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6489074577147494118": ["convolution_gpu_bfyx_gemm_like",1],
+        "8271034912009744989": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "6882621854468565774": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "2248754661513284642": ["convolution_gpu_bfyx_gemm_like",2],
+        "6865406633958213363": ["convolution_gpu_bfyx_gemm_like",2],
+        "14600118619533737293": ["fully_connected_gpu_fb_oi_ref",0],
+        "863952266514375915": ["convolution_gpu_bfyx_os_iyx_osv16",517],
+        "13014443130752087867": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "3730207439375250056": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "17113350507039887381": ["convolution_gpu_bfyx_gemm_like",1],
+        "6604223938357238686": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2817383483458239293": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "17692144048680858991": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15178327647765537565": ["convolution_gpu_bfyx_os_iyx_osv16",666],
+        "7544565739420583104": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8529571293598502239": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "3106591708459602370": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "16328232350072955252": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "13583166868754499339": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "14746900092090885770": ["convolution_gpu_bfyx_gemm_like",2],
+        "1200162031019105686": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "4510003738155830628": ["convolution_gpu_bfyx_gemm_like",1],
+        "8057302050645780813": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "9389555743403158574": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6784146431605417954": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "2502125887857336825": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "6114147683777615071": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "14084855778741260863": ["convolution_gpu_bfyx_gemm_like",2],
+        "9883719542550391149": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "6999530153839596796": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "13412296930014397060": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2062195022363480864": ["convolution_gpu_bfyx_gemm_like",1],
+        "10806992251978564302": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9352385417006844121": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "4890932609897686394": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "877436308867220589": ["convolution_gpu_bfyx_gemm_like",2],
+        "9796621763733208035": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "484412270668341493": ["convolution_gpu_bfyx_gemm_like",1],
+        "15662207751131195569": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3336303478756453360": ["convolution_gpu_bfyx_gemm_like",1],
+        "15183511809138557392": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "5733530388090903847": ["convolution_gpu_bfyx_gemm_like",2],
+        "9574931298183748343": ["convolution_gpu_bfyx_gemm_like",2],
+        "10995907213890714701": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8751367574402839332": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "18259787991864449280": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "1801731858063091191": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "6373173636869473046": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6012477132351580695": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "16367495521884864886": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "13095408117538194584": ["convolution_gpu_bfyx_os_iyx_osv16",108],
+        "3020115657931277672": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4941660917457387098": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "2903605246599054308": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "2238901105639912692": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "1671347101986657824": ["convolution_gpu_bfyx_gemm_like",2],
+        "12274268980330855890": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "17079309368548171402": ["convolution_gpu_bfyx_gemm_like",1],
+        "12793908914872030220": ["convolution_gpu_bfyx_gemm_like",2],
+        "4684985181211883028": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "9882204352209412039": ["convolution_gpu_bfyx_gemm_like",1],
+        "14600700464602327710": ["convolution_gpu_bfyx_gemm_like",2],
+        "1682486914760867977": ["convolution_gpu_bfyx_gemm_like",2],
+        "5013936351898884291": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8292979162428130363": ["convolution_gpu_bfyx_gemm_like",2],
+        "2564518461717467683": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "13613948678997524330": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12956726277674279950": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "919788620883613958": ["convolution_gpu_bfyx_os_iyx_osv16",464],
+        "18060514966005474708": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "13044020050176766314": ["convolution_gpu_bfyx_gemm_like",1],
+        "10720782649044333851": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "77073286362822723": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "1966540437574889257": ["convolution_gpu_bfyx_gemm_like",1],
+        "3715177305271762194": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "16076153317792960383": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "12960666483922103702": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2552187713769926425": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "2264520082689779253": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "6220616397859143111": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10857084376518292379": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15487686565734149288": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "6647969101146756031": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12301464827222654105": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "5807196005360653656": ["convolution_gpu_bfyx_gemm_like",2],
+        "8560635685184432720": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "9694891301950867606": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "13345599888287912619": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2511072616914149110": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "15890749658785957481": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14386256118128644729": ["convolution_gpu_bfyx_gemm_like",2],
+        "7806837641999814363": ["convolution_gpu_bfyx_gemm_like",2],
+        "12962558681443556219": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5164372816534616260": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "3706994659266083979": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "12672995204641007004": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "751912075185318190": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "14895352662503433583": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "3889688816787688160": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "16499919609457089685": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "11825209936640729550": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "4366168099274266975": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "11962541545116807979": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "17939745299931100048": ["convolution_gpu_bfyx_os_iyx_osv16",318],
+        "2543995971214089085": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "11583791752668920812": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "14116682822622440033": ["convolution_gpu_bfyx_gemm_like",1],
+        "15178012823756517910": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "14276876004054588508": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "15470013032930986062": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7627882727285402176": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "1504867045084152953": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8488789346759658706": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2446257282140830646": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "17310332946322628458": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "14905705901815863508": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1553825475921110392": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7335403151694644211": ["convolution_gpu_bfyx_gemm_like",1],
+        "2310159350914289605": ["convolution_gpu_bfyx_gemm_like",2],
+        "3782315919331102574": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14128599551956588603": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "16614678178197571772": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "16805562203348924108": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9354818521586974021": ["convolution_gpu_bfyx_gemm_like",2],
+        "3759057398165607194": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13739257060165119132": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "2423754482456771339": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "13565027847255501776": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16609136488331186895": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16364899406120840449": ["convolution_gpu_bfyx_os_iyx_osv16",398],
+        "17128760774072077101": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9358401110755269308": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8703758535351908295": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10136297272678091418": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13065517911798224579": ["convolution_gpu_bfyx_os_iyx_osv16",377],
+        "7722090560547236852": ["convolution_gpu_bfyx_gemm_like",1],
+        "2370837049876630969": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "9454512817077883797": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "1962479636209947761": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "16392283136103456949": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "4438055737691342460": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2520734476651273971": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8569122574675372789": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4505008254511324231": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "8159489372517869446": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "11599990834682830362": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "17825953644228876369": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8885012252853227025": ["convolution_gpu_bfyx_gemm_like",1],
+        "8484526109354576450": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "17096175733187202673": ["convolution_gpu_bfyx_gemm_like",2],
+        "9596656797750683465": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12883021432082543848": ["convolution_gpu_bfyx_gemm_like",1],
+        "16731107540370927220": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7504074736798125353": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "17184638213817814424": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "18268811652302076976": ["convolution_gpu_bfyx_gemm_like",1],
+        "13681462437496627948": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "11091771531609585709": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2599817012641445801": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "15921072201288695017": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11258182961445417799": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6214312494103149808": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "1673458534805854479": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "10944997349682267106": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "14103112843209793966": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "2887152687927903549": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "2638131332283395057": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13017541921351620667": ["convolution_gpu_bfyx_gemm_like",2],
+        "17626938391567407401": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "517802466588815950": ["convolution_gpu_bfyx_gemm_like",2],
+        "2079476232214121671": ["convolution_gpu_bfyx_gemm_like",1],
+        "2225233951957105071": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "5644068493155655611": ["convolution_gpu_bfyx_gemm_like",2],
+        "5352861363832390974": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "16035563519857925932": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "14767888121198814523": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "8525704362451630717": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "12022980249970038824": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "1478419046264331178": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5656623709782744241": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "17230103497915224469": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "17666004363345457085": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6224167817672480442": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12144421857685107073": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "6581494673640781863": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5461649843950745696": ["convolution_gpu_bfyx_gemm_like",2],
+        "3718980061704064547": ["convolution_gpu_bfyx_gemm_like",2],
+        "712420402191459810": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "2968094709908141988": ["convolution_gpu_bfyx_os_iyx_osv16",3],
+        "11757919563609176713": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "7808544677773370430": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15997231252708686870": ["convolution_gpu_bfyx_gemm_like",2],
+        "12924910330295852704": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "7499082230554771515": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "4702017956226464806": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "10532500300200244159": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "11298638173197050575": ["convolution_gpu_bfyx_os_iyx_osv16",942],
+        "5675497261720118479": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "8787816339967963727": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "13845827017732177448": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14854353557342075292": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "8948718883406304307": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "10647227605517025377": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7510055418609679364": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "2821441037530057414": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "13524128602135083081": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "9707630588260222630": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "9181466280310872332": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "11148502358361704423": ["convolution_gpu_bfyx_gemm_like",1],
+        "7959969582538910953": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "6613282637922219205": ["convolution_gpu_bfyx_gemm_like",2],
+        "17739868787095417856": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "5609922876429907954": ["convolution_gpu_bfyx_gemm_like",2],
+        "12028030221272546172": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2371412124305478965": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12954154886708228545": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "4995510103045767117": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "14707884854112495064": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7323343770209750835": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3292879092145281224": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "15592248516895826924": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9400558994532871122": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "15875968032394961531": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17006133396401462698": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "4073467095502162430": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "16044646335477470657": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14376192291828307385": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6323026044750482867": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "5094419710576598497": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14599780481362761532": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13085261987388297912": ["convolution_gpu_bfyx_gemm_like",1],
+        "7463657272687673896": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "3789890554711038921": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3643250372952944907": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12070592804878487941": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "1208483520611545642": ["convolution_gpu_bfyx_gemm_like",2],
+        "879005904827468163": ["convolution_gpu_bfyx_os_iyx_osv16",763],
+        "3217246278485567748": ["convolution_gpu_bfyx_gemm_like",2],
+        "2652267888871336297": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "5507708258753405429": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "9475812329914836280": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10025893052937028511": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "15221712686851573528": ["convolution_gpu_bfyx_gemm_like",2],
+        "10892456883214928095": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "4338023436590582323": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "3703292222363446463": ["convolution_gpu_bfyx_os_iyx_osv16",762],
+        "9608148784787572220": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "4036143655651874318": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "7371339724529362579": ["convolution_gpu_bfyx_gemm_like",2],
+        "16847817828600381030": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15334769670416409064": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3910549475873353422": ["convolution_gpu_bfyx_os_iyx_osv16",380],
+        "291868903926685441": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13762814538289753428": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "12725647706191463348": ["convolution_gpu_bfyx_gemm_like",2],
+        "16070611944881238498": ["convolution_gpu_bfyx_os_iyx_osv16",884],
+        "9910414853336797922": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15180747404865201068": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "11060822686394981344": ["convolution_gpu_bfyx_gemm_like",1],
+        "18146184020578260553": ["convolution_gpu_bfyx_os_iyx_osv16",302],
+        "5319668297345215520": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "13328449155966085543": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4129586781834275070": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "16783619135298589974": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "9649533822873928984": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9593975471009029134": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10572208209982879914": ["convolution_gpu_bfyx_gemm_like",0],
+        "4999505377862312410": ["fully_connected_gpu_bf_io_gemm",2],
+        "16124702296533772526": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "6341197991729122563": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "14558850297291634005": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "1254745727978231148": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13283018618260255620": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "7720939595094113814": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "16992620579546408448": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "2030309697153345387": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13352151930345854198": ["convolution_gpu_bfyx_os_iyx_osv16",275],
+        "2690771087990667627": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "9257078583742821465": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "1208243889917809864": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "7494124707566708728": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "13564654155363057485": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "15160322051545035612": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8451212914744825089": ["convolution_gpu_bfyx_gemm_like",2],
+        "8929453032482114162": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1036010477232750453": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "7473012539094225392": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "16896863928108200897": ["convolution_gpu_bfyx_gemm_like",2],
+        "8176012042686275874": ["convolution_gpu_bfyx_os_iyx_osv16",863],
+        "11648841195768568983": ["convolution_gpu_bfyx_gemm_like",0],
+        "13831458435772917577": ["convolution_gpu_bfyx_gemm_like",2],
+        "9514210061704584354": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15378025640603637387": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "16852207712205172744": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15132518566122695317": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "1168311873250200110": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3541828356667081528": ["convolution_gpu_bfyx_gemm_like",1],
+        "9524663472084054050": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16403423801823379909": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7431237779891953779": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "3430266954211750407": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "9197931868200777891": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "2344498602308448450": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "9451273689649467046": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "6878922067845522655": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "17242820574559628535": ["convolution_gpu_bfyx_gemm_like",1],
+        "15452996816194024433": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "70244312667395170": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11919129623429545762": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17795358440179122086": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "8263822658108674162": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12860222041026638681": ["convolution_gpu_bfyx_gemm_like",2],
+        "2152903140704848574": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "6735135795253013220": ["convolution_gpu_bfyx_gemm_like",2],
+        "18092842590142527927": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5215755301612973095": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "4122312805832663323": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11619548409913646265": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "2108296560864415762": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "13026555349791486777": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "912423125050985716": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "17281198415161259885": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2110090486638190463": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "3240428557350945267": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13491655481292956895": ["convolution_gpu_bfyx_gemm_like",1],
+        "2343921093633784755": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "3148053731303748054": ["convolution_gpu_bfyx_gemm_like",2],
+        "16404059675217592817": ["fully_connected_gpu_fb_oi_ref",1],
+        "12160764253455777655": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "9034951536385533818": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",1],
+        "12756432707088842236": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "523055954326631884": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17850932752450917677": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "14973411884734235059": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "16229324496308453344": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "10736892779278378335": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "11261619081095309088": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "13368477378531148593": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "5401523175111660554": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9802832901508552733": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11361013180071053597": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "269334626439013799": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17970855913877771858": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "18332090297993015499": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "3665837617379468265": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8316848551837633169": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "17807033661138518449": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6571473790090353005": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "499739705596245675": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "9761573038170759563": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "11830297960718214360": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "54975980454651672": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5603409300903611279": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14332388011233886083": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6673753637296082820": ["convolution_gpu_bfyx_gemm_like",2],
+        "8528886126454874796": ["convolution_gpu_bfyx_gemm_like",1],
+        "10946069941293798874": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "10054253863699485503": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "10690972785852373520": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "9416285845239621878": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "9042812985530274425": ["convolution_gpu_bfyx_gemm_like",2],
+        "12671153706040443724": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "12705054744767500423": ["fully_connected_gpu_fb_io_ref",1],
+        "8503207028307570404": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "5049534591553232781": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6456426339461437148": ["convolution_gpu_bfyx_gemm_like",1],
+        "1289009275012699560": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "3965871278597751318": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13744951984978188201": ["fully_connected_gpu_fb_io_ref",1],
+        "13728180355108851541": ["convolution_gpu_bfyx_gemm_like",2],
+        "4524347845016978037": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4871907623235871050": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "13059207969254830451": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "5011273172385428756": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "12283317230112506089": ["convolution_gpu_bfyx_gemm_like",2],
+        "4481903208484313806": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "10175721494218314250": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10432687907685994204": ["convolution_gpu_bfyx_gemm_like",1],
+        "13614921331048223116": ["convolution_gpu_bfyx_gemm_like",2],
+        "15447513376965243034": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "3041612155708729812": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "9765339420071627045": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "10660230104888153758": ["convolution_gpu_bfyx_gemm_like",2],
+        "12386930130408773521": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6706491729783125139": ["convolution_gpu_bfyx_gemm_like",1],
+        "12675858428585873471": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6293403765897901528": ["convolution_gpu_bfyx_gemm_like",2],
+        "9888097487468905169": ["convolution_gpu_bfyx_gemm_like",2],
+        "9416186718345824095": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "4149728557142033774": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "466868648178437688": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13464226348405628455": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "7282751412088726760": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "14270450799210365812": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7518734167761579102": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9854440591497995284": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13405310261845268772": ["convolution_gpu_bfyx_gemm_like",2],
+        "7715520469947900684": ["convolution_gpu_bfyx_os_iyx_osv16",571],
+        "16408015571155576773": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4783126652984096700": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "13388004363210658650": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5170245731599664670": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "14256842018830898376": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "16114623916610925741": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "10397253349562394184": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8007667797556094444": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "7368916076070115064": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "15129834325410878425": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "18417880214901227799": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4722824701199486161": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1033385936344875354": ["convolution_gpu_bfyx_gemm_like",2],
+        "17011927973643184196": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15212317205888563836": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13802834658447955377": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "6527268791835193134": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "10918743320372308981": ["convolution_gpu_bfyx_gemm_like",2],
+        "2737840613867456953": ["convolution_gpu_bfyx_gemm_like",2],
+        "269829518575229806": ["convolution_gpu_bfyx_gemm_like",2],
+        "2944333966072327932": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "10670103699537731664": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "12744887771237881196": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "1242366856673194709": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "17753585752923130911": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "7282595712912388754": ["convolution_gpu_bfyx_os_iyx_osv16",189],
+        "6985970932645412773": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13192808619929896995": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "7930154826818165796": ["convolution_gpu_bfyx_gemm_like",2],
+        "17994361454416813294": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2150326211917340956": ["convolution_gpu_bfyx_gemm_like",2],
+        "6953478877896677022": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "7134654288295280046": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "10607904718265020949": ["convolution_gpu_bfyx_gemm_like",2],
+        "14719421757340260468": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8008513163448840421": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13221156296791499146": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "15391215077224693736": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "704262295684441748": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "11455732989503244360": ["convolution_gpu_bfyx_os_iyx_osv16",216],
+        "18424400171776141118": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "4286652913945761799": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "5379608399492828685": ["convolution_gpu_bfyx_gemm_like",1],
+        "4614700272179482173": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "15352245788978088971": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7441139786825555264": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "397770940444464146": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "202304354656398848": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "11962382064404466630": ["convolution_gpu_bfyx_gemm_like",1],
+        "5301440603380967612": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "12018398218876712811": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "10898684230183205955": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "2752322006160986801": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15660316437768312006": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "15668791697154389130": ["convolution_gpu_bfyx_gemm_like",1],
+        "1139581213977408268": ["fully_connected_gpu_fb_io_ref",2],
+        "6649759230117795192": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "5244441996055494170": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "9263063714383940562": ["convolution_gpu_bfyx_os_iyx_osv16",62],
+        "11070696274716018686": ["convolution_gpu_bfyx_os_iyx_osv16",570],
+        "18068050257421269408": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "11207257238719531888": ["convolution_gpu_bfyx_gemm_like",2],
+        "8146906136296114696": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "435261825003875448": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "8922463054055280800": ["convolution_gpu_bfyx_gemm_like",1],
+        "13674246753382740056": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14189775376370027482": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8254412626112343365": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13596494923128445274": ["convolution_gpu_bfyx_gemm_like",2],
+        "7085416207166146240": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "10320711719466983961": ["convolution_gpu_bfyx_gemm_like",2],
+        "18310667924071639899": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "13624106485902414324": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "18377298651236993830": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15566108481408840783": ["convolution_gpu_bfyx_gemm_like",2],
+        "15225331270926229394": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "13659291428095454839": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16932090423428476170": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "1882912836250239503": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "10414903047695486119": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "3442073007560756473": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "3609233164979051271": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13108356579957761944": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14823616678465136590": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16027853591907232537": ["convolution_gpu_bfyx_gemm_like",1],
+        "14446344744130895614": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "17924819398394001587": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "1608378717397996752": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "393884269158067083": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "14903430454784452446": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "9311802150474489673": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "15293727142789007900": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "11079710960007068860": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "11815825155082424936": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "2367791050032803116": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "11868789283464117390": ["convolution_gpu_bfyx_gemm_like",2],
+        "11207578758583923357": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17368161816774674256": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "2490155559809645659": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "4551182180668229945": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "9001645663675631429": ["fully_connected_gpu_yxfb_ref",2],
+        "18191573176587760698": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "6027350558532160900": ["convolution_gpu_bfyx_gemm_like",2],
+        "11229587372764249222": ["convolution_gpu_bfyx_gemm_like",2],
+        "15838058479520696173": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "7318929661124340248": ["convolution_gpu_bfyx_gemm_like",0],
+        "3177915003579216846": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "7052552351421332490": ["convolution_gpu_bfyx_gemm_like",2],
+        "13176385389367548697": ["convolution_gpu_bfyx_gemm_like",1],
+        "5589785455223385189": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5163965164859517893": ["convolution_gpu_bfyx_gemm_like",2],
+        "2268291720177538378": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13205973783895006074": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "12892693137085610062": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "11553355518677163509": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "14108113294744119367": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "4161141078006269526": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12929981792125924963": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4673618329986777239": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14287890401250603057": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "3448477246688526708": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "18172711677056449158": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "18202222342562516071": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "7678457226823073886": ["convolution_gpu_bfyx_os_iyx_osv16",91],
+        "2532962442388536022": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "14433662482531248989": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2335428826699999827": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "82249723699159955": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "16547425454653232058": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "15851356529373376076": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "16613907066461513431": ["convolution_gpu_bfyx_gemm_like",0],
+        "11725629762660987217": ["convolution_gpu_bfyx_gemm_like",1],
+        "706370730287471796": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8171897258557801015": ["convolution_gpu_bfyx_gemm_like",1],
+        "15959241441689395955": ["convolution_gpu_bfyx_os_iyx_osv16",680],
+        "14585370009659482450": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "2261453441277654139": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "15838114628203742383": ["convolution_gpu_bfyx_gemm_like",2],
+        "2399812257701033542": ["convolution_gpu_bfyx_gemm_like",2],
+        "7962383460496540840": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8127570953237266335": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "3828988304073539836": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11307531462784240962": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "14838067105091112485": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "8207349115037232863": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4790599496008369129": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "10358359789382196576": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "16073578125651112218": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17405865057155583042": ["convolution_gpu_bfyx_gemm_like",1],
+        "8312903198090907576": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "10173382130572498594": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "331390460560782085": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12353956380178079089": ["convolution_gpu_bfyx_gemm_like",2],
+        "14907097142953816744": ["convolution_gpu_bfyx_gemm_like",1],
+        "1811357700607919311": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "11986642867827682648": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "14686278683380845546": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17006095064160484022": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "11114015660322254541": ["convolution_gpu_bfyx_gemm_like",1],
+        "6420851258772300332": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "14793709237400480942": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "15823825508128158158": ["convolution_gpu_bfyx_gemm_like",2],
+        "12569856169024791306": ["convolution_gpu_bfyx_gemm_like",2],
+        "2001464747481073870": ["convolution_gpu_bfyx_gemm_like",1],
+        "8863398172720091880": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "15148625184033310404": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10624246057883518638": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "6730474465453860479": ["convolution_gpu_bfyx_os_iyx_osv16",1039],
+        "10073439287681954518": ["convolution_gpu_bfyx_gemm_like",2],
+        "16461809076899645037": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "15947699374684516369": ["convolution_gpu_bfyx_gemm_like",2],
+        "5448537627319798272": ["convolution_gpu_bfyx_os_iyx_osv16",523],
+        "15465799788109255561": ["convolution_gpu_bfyx_gemm_like",2],
+        "11757953304204716753": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "1306339989221885682": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "3198726093355425150": ["convolution_gpu_bfyx_gemm_like",2],
+        "962311766200741205": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16728826595086368897": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "1147744092130296563": ["convolution_gpu_bfyx_gemm_like",1],
+        "7146559117784312265": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "388828310152538138": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13073788277284969422": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "2305461098719675735": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "17521647426452186921": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "5433618404351968121": ["convolution_gpu_bfyx_gemm_like",2],
+        "17794162443307839614": ["convolution_gpu_bfyx_gemm_like",1],
+        "16440598510199834213": ["convolution_gpu_bfyx_os_iyx_osv16",121],
+        "18009765676050504407": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3509811595028801757": ["convolution_gpu_bfyx_os_iyx_osv16",131],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "1109243878358317937": ["convolution_gpu_bfyx_os_iyx_osv16",1062],
+        "7254869458810021127": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12615462894236933223": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11926378988530133568": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "2930545263523345204": ["convolution_gpu_bfyx_os_iyx_osv16",542],
+        "7630776235327261710": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "7824524940405130010": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13787118639037730152": ["convolution_gpu_bfyx_os_iyx_osv16",298],
+        "404419072921281472": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "4135003545872878882": ["convolution_gpu_bfyx_os_iyx_osv16",197],
+        "11723735945517472199": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "17749857812061795980": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "2287356884312581209": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4101449235783342476": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "14385181780082014495": ["convolution_gpu_bfyx_gemm_like",2],
+        "6013434489252641471": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8175595372513695437": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "15092483859565823523": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3503236715353689942": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "10831460252334010668": ["convolution_gpu_bfyx_gemm_like",2],
+        "14681717813022425567": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6157727013102138824": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "9823997593704517392": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "3223726179820717808": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "10033076377998157101": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "2571778193407799664": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12668149981216388765": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "13769943652297353544": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "16031140952379208074": ["convolution_gpu_bfyx_gemm_like",2],
+        "6128534975733321186": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10273183900108661041": ["convolution_gpu_bfyx_gemm_like",2],
+        "8316011587868622301": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "905780459938651623": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "13793441296561946357": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "3218248162832023196": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "11907507085694711513": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "2004120786408087671": ["convolution_gpu_bfyx_gemm_like",2],
+        "17515573322312447679": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "5162737590442940024": ["convolution_gpu_bfyx_gemm_like",1],
+        "10906417366145323499": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11992158790035075804": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7606097739225472283": ["convolution_gpu_bfyx_gemm_like",2],
+        "4553409514380460123": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7753336153932360422": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6549150139619174585": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "11327097771110264965": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "505102470055903237": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18233660940545931789": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "157852787707383962": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "8909239203149651260": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14537109978413728476": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "16290626406346691996": ["convolution_gpu_bfyx_os_iyx_osv16",767],
+        "17420288204511371476": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12570087709404311189": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6210483922262161762": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "994182747184593564": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11883941040326858829": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "2324120381399737261": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "3199841714087553410": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "8132521728369930959": ["convolution_gpu_bfyx_gemm_like",2],
+        "17303981366934280174": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "2597523728660247862": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "4127717437639868970": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "17981604038340576961": ["convolution_gpu_bfyx_gemm_like",1],
+        "4301372734564127254": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "2086001721804797157": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "12179581684777023804": ["convolution_gpu_bfyx_gemm_like",2],
+        "16184979150665364486": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "1934379409955686502": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "11655994466278963438": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "1945630503883822822": ["convolution_gpu_bfyx_gemm_like",1],
+        "15232673324549539143": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "12952980509662451384": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "6860503758000008398": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "6661117204204077150": ["convolution_gpu_bfyx_gemm_like",2],
+        "10384416235770656262": ["convolution_gpu_bfyx_gemm_like",1],
+        "13716836930727272782": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3819763245853861272": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3007637520820789085": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "7345632855842905966": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2571186327837339204": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "9996196793804333253": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "11246470701714560770": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "8212533074856783509": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "33889407315234685": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17242442529374722270": ["fully_connected_gpu_fb_oi_ref",1],
+        "7496699438957793920": ["convolution_gpu_bfyx_gemm_like",2],
+        "8375465895534833097": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "6476949395889340429": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "18187262802267413585": ["fully_connected_gpu_fb_io_ref",1],
+        "9454146598828084176": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4241640917176830862": ["convolution_gpu_bfyx_gemm_like",2],
+        "10446500827044060319": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "7908036427091174081": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "12813978452097969536": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "2012181953284568566": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "6948147789605707774": ["fully_connected_gpu_fb_io_ref",2],
+        "18159049252673770569": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "10904228118889057467": ["convolution_gpu_bfyx_gemm_like",2],
+        "14266210014132784194": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "5587539329568150667": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "10098661517988566506": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "5519244962044894877": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "14217181622713951411": ["convolution_gpu_bfyx_gemm_like",2],
+        "11777373751892075391": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "17575293085957492821": ["convolution_gpu_bfyx_gemm_like",2],
+        "7145194061073256844": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7243161613448507792": ["convolution_gpu_bfyx_gemm_like",1],
+        "11559360678008060513": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "2056597791109604534": ["convolution_gpu_bfyx_gemm_like",2],
+        "2873387231297790075": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "4243114942173293897": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "18232408112396439386": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14335423820860953927": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "10947686124973711385": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "1187224156936080964": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "5759260743809103651": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "14759179293743468995": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "16622402936526588344": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "16541722316343690197": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "9061025737181218101": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "17104611871050967957": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "670951751279091662": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "13133323947490009546": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10424278617647597641": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "6551173574001309451": ["convolution_gpu_bfyx_gemm_like",1],
+        "397445657349822499": ["convolution_gpu_bfyx_gemm_like",2],
+        "18251360413872841969": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "17016846635668370921": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "10898210758890334465": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "11684927349056930189": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "332090597573908506": ["convolution_gpu_bfyx_gemm_like",1],
+        "4682428771166816734": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "18006581941186887676": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "5245308722062496788": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "15661322183507404821": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12773693193167844110": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "14634044133573461949": ["convolution_gpu_bfyx_gemm_like",2],
+        "7714783879762659458": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "9806689250758752070": ["convolution_gpu_bfyx_gemm_like",0],
+        "18005721959893562716": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "3166885953206195915": ["convolution_gpu_bfyx_gemm_like",2],
+        "4574242607119408140": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3930314908786112883": ["convolution_gpu_bfyx_gemm_like",2],
+        "531020979837645217": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "11868551452004726281": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9666426531743983113": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "12557015880639217508": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "262113403359175565": ["convolution_gpu_bfyx_os_iyx_osv16",419],
+        "4634475069086874260": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "9397711809671506538": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12008952324872799824": ["convolution_gpu_bfyx_gemm_like",2],
+        "1907439276166837309": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "13314092088416047551": ["fully_connected_gpu_yxfb_ref",1],
+        "12081835728078383819": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "13071064509662090710": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "15928746165235747659": ["convolution_gpu_bfyx_gemm_like",2],
+        "14546281065004619074": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8725673763972618034": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7819934200255007163": ["fully_connected_gpu_fb_oi_ref",2],
+        "13051342120933385671": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "6181308879301978465": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17664704673433112966": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "5353170440534073482": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "12214162812589030126": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "6093575518270471235": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "8240616667079698459": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4600261954762222519": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "7070374681687005676": ["convolution_gpu_bfyx_gemm_like",1],
+        "16968664807495872526": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "18404344881797725263": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "5267143428977695208": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "10700011669103135203": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "7811861756798601201": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "428659495445490820": ["convolution_gpu_bfyx_os_iyx_osv16",925],
+        "1056494963618130644": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "962676948282027870": ["fully_connected_gpu_fb_io_ref",2],
+        "5597908143491399643": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "2788116002380533417": ["convolution_gpu_bfyx_gemm_like",2],
+        "10378966564497668941": ["convolution_gpu_bfyx_os_iyx_osv16",283],
+        "7086574330273897976": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "2732519635571994212": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "16244270858428653037": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "11970466555294072275": ["convolution_gpu_bfyx_gemm_like",2],
+        "4586633477264151844": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "14733510474010040334": ["convolution_gpu_bfyx_gemm_like",2],
+        "2659712601063515059": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "10293186062391000719": ["convolution_gpu_bfyx_os_iyx_osv16",755],
+        "6547565989244888354": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "218477594596081189": ["convolution_gpu_bfyx_os_iyx_osv16",969],
+        "5834006438103071406": ["convolution_gpu_bfyx_gemm_like",2],
+        "15649927926091502215": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "12461575861709234385": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "1592994755823247500": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "2526832080529662683": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "1922168904767469999": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "2809950092498355574": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "15718011075217705480": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13485140643204970345": ["convolution_gpu_bfyx_gemm_like",1],
+        "2664944425727769475": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "580936360000782237": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "12314918602191412697": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "14990645740260870030": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "11341771589317480665": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "6133854782246597175": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "7394848434332739139": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "9937387440035377216": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "11804035561861841621": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "18245935804520236353": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9352866803638271156": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "13324157125165576832": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "1894591633696862066": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "17580933462801685507": ["convolution_gpu_bfyx_gemm_like",1],
+        "5408469943982199754": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11913020016435860608": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15260448822338206631": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "12492763342322011136": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "16758697697363920520": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "14975859027256879948": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "615833743936753727": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5419775002149092646": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5982637097503543357": ["convolution_gpu_bfyx_gemm_like",2],
+        "9025790715924779508": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "17078700948595127028": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5801429077171542466": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "10662239532841666965": ["convolution_gpu_bfyx_gemm_like",2],
+        "11049130623091275457": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "7921388663815287395": ["convolution_gpu_bfyx_gemm_like",2],
+        "3811462129131022619": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3555204322491340337": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "509781001842353609": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "13047793996728441528": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "4047806462440750215": ["convolution_gpu_bfyx_gemm_like",2],
+        "7524311370696987092": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "12518571127411736885": ["convolution_gpu_bfyx_gemm_like",2],
+        "17050675313067213312": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "13200151444914751729": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "2802810524370514276": ["convolution_gpu_bfyx_gemm_like",1],
+        "12248119734016401633": ["fully_connected_gpu_fb_io_ref",1],
+        "7671016314869993705": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "8054562515577756499": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "10732225577823701543": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "2836903620603494117": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "1650080413259413393": ["convolution_gpu_bfyx_gemm_like",2],
+        "7864880361674128748": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "15123868617509445149": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2903075619523363020": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "14211549589070739656": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8749468546606972791": ["convolution_gpu_bfyx_gemm_like",2],
+        "4840004190985490064": ["convolution_gpu_bfyx_gemm_like",2],
+        "148355059345569721": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "4304943753428518690": ["convolution_gpu_bfyx_gemm_like",1],
+        "17318287523550546026": ["convolution_gpu_bfyx_gemm_like",2],
+        "15364374265752682266": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "5136111979773513341": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "15667487381692577290": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "482564204402769504": ["convolution_gpu_bfyx_gemm_like",1],
+        "5983808817108775912": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "14849708746319190277": ["convolution_gpu_bfyx_gemm_like",2],
+        "4646795194660982475": ["convolution_gpu_bfyx_gemm_like",2],
+        "94012300876418257": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "15786313441300512560": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1895945774251432343": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6512006285490280576": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14026570177552137240": ["convolution_gpu_bfyx_gemm_like",2],
+        "15890473622821659630": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3565702695809105495": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "12610854610554906160": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "3895088069642140043": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2100387626452428743": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "1362540464632328798": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13012283016751495099": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "436514945529747349": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "4191326605459754690": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6719956770229212208": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "12692563384795319282": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "5933743119393822386": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "12523676912856063091": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "14744368497944610864": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "868177350337221377": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "832976844701988460": ["convolution_gpu_bfyx_gemm_like",1],
+        "14034487492239603874": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12669547093826826335": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "5947492124433175601": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "13276867073526485069": ["convolution_gpu_bfyx_gemm_like",2],
+        "528618206870447012": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "488298169768725160": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "7426788519998680898": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8961138963663532667": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "9803306661531470015": ["fully_connected_gpu_fb_io_ref",2],
+        "6476480727582657308": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "16774728502960825097": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "17419874083634480896": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "6517802281521111563": ["convolution_gpu_bfyx_gemm_like",1],
+        "10652512666086843369": ["convolution_gpu_bfyx_gemm_like",2],
+        "1452841775482537260": ["convolution_gpu_bfyx_gemm_like",2],
+        "6204725118764552662": ["convolution_gpu_bfyx_gemm_like",1],
+        "16285256723517297210": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14852990574796128305": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "8550783999616052522": ["convolution_gpu_bfyx_gemm_like",2],
+        "5420766967862917815": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5733701901687257088": ["convolution_gpu_bfyx_gemm_like",2],
+        "15860915170591763391": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "6089202061701179659": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "16443833779968719790": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "12812685418923919055": ["convolution_gpu_bfyx_os_iyx_osv16",764],
+        "13224814158106791463": ["convolution_gpu_bfyx_gemm_like",2],
+        "15888454525088587794": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "8116504545035982006": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "5275016494706355806": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "18203935818408469865": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "10408322429232132983": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "15585700465988560560": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "9127066823698894015": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5961488595080209440": ["convolution_gpu_bfyx_gemm_like",2],
+        "4665029580355133140": ["convolution_gpu_bfyx_gemm_like",2],
+        "1939140810847988694": ["convolution_gpu_bfyx_gemm_like",1],
+        "5845969526791988973": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6635217802203685464": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12307446289692143781": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "5251771557248725731": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "13758938418512211194": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12700008320838073774": ["convolution_gpu_bfyx_gemm_like",2],
+        "14164778301660100413": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "12711558966638028352": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "69439315851965666": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12522364636280164681": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "18369668865072009928": ["convolution_gpu_bfyx_gemm_like",2],
+        "727203296169504486": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "4703107905652287491": ["convolution_gpu_bfyx_gemm_like",2],
+        "12129572274423886770": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "5214678408335388758": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17835134875461003221": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8465142022921853516": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15192024816519005250": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "11599932445375240727": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "4601800315090684242": ["convolution_gpu_bfyx_gemm_like",2],
+        "18382226420077875582": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14459249705747952583": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12411228585189337571": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "8124736388338424498": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "8995892222116060827": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "598390166442977699": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15320845027635796583": ["convolution_gpu_bfyx_gemm_like",2],
+        "12310462218432530363": ["convolution_gpu_bfyx_gemm_like",0],
+        "9776332064497085361": ["convolution_gpu_bfyx_gemm_like",2],
+        "9993925424761661218": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "17824431042110985323": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "17001492460236540325": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9454457647272059910": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "4578587579993676820": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "17599383258252980421": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "16113302464937833403": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "17825280904760131680": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "1999892441424036372": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "13074593348097634731": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17392732266843821039": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "2966185891283165994": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "14566257978356851712": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15783329079045263237": ["convolution_gpu_bfyx_gemm_like",1],
+        "9547451431091729288": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "15149336254307320187": ["convolution_gpu_bfyx_gemm_like",2],
+        "10971070835319242371": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "3961000444895975975": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9513545197321447870": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "6980201892073961793": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "13031027103925431505": ["convolution_gpu_bfyx_gemm_like",2],
+        "16583563382485459718": ["convolution_gpu_bfyx_gemm_like",1],
+        "4858337483345561292": ["convolution_gpu_bfyx_gemm_like",2],
+        "6536333665377249409": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8374409021681741916": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "2307629242354292362": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7670176887560273910": ["convolution_gpu_bfyx_1x1",2],
+        "1847170421455825520": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "17407904982433770732": ["convolution_gpu_bfyx_gemm_like",1],
+        "2460415719642436412": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "7877332346656934022": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "11437885274663749440": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "5032195346490064156": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "18077281411861416889": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "7527175223662342321": ["convolution_gpu_bfyx_gemm_like",1],
+        "68637843533109734": ["convolution_gpu_bfyx_gemm_like",1],
+        "8501760360687221821": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "8906588133431586825": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "5890599002797783437": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "12232696287029987946": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "16981010901052181199": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "3499109651698979012": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "13636129806349817264": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "14900099988131599740": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "17867620992288101450": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "621272125402238670": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "12408889192918919210": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "13497225521878034159": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13155901262605819372": ["convolution_gpu_bfyx_os_iyx_osv16",292],
+        "5040944983588288886": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "10897622326486559468": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9065894438656900887": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "15356995665520295246": ["convolution_gpu_bfyx_gemm_like",0],
+        "17907732260451873185": ["convolution_gpu_bfyx_gemm_like",2],
+        "13762042713029963144": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15365628642332393565": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15777551868644801538": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "4304041922043496030": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "385046297070779752": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "17680403286850504499": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4833761011498696645": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "17601171646153308079": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "8204962103567653154": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "13974740392602492680": ["convolution_gpu_bfyx_gemm_like",2],
+        "2712946943923358377": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "5367634698951188749": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "15361186788588226064": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "95993272253183796": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2173649669339714890": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "14355612297330229277": ["convolution_gpu_bfyx_gemm_like",2],
+        "10888435127006141874": ["convolution_gpu_bfyx_os_iyx_osv16",645],
+        "17754836801944078461": ["convolution_gpu_bfyx_gemm_like",2],
+        "5608447459568229694": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "659846949368492111": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "2850118175701764737": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "17093159649157277089": ["convolution_gpu_bfyx_gemm_like",2],
+        "277410555520090949": ["convolution_gpu_bfyx_gemm_like",0],
+        "7975810844103449438": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "18218631037214746168": ["convolution_gpu_bfyx_gemm_like",2],
+        "10612049417873776481": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "16884396694505987920": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "13191096881934434519": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9987939079053625302": ["convolution_gpu_bfyx_gemm_like",2],
+        "18341524156838963264": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17784882947271841103": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "9751582946441607796": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "7941729567451949422": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "5994204139128667921": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13367043015761260275": ["convolution_gpu_bfyx_gemm_like",0],
+        "16037141448095945650": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "1545105800386716684": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "12983461576274227638": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "9747165558500755104": ["convolution_gpu_bfyx_gemm_like",0],
+        "12793814016409887162": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "15653223776766070604": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9194441947620820715": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "12421707187947291166": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "15471470494305051299": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "10702234389482091891": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "15329084374930297871": ["convolution_gpu_bfyx_gemm_like",2],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "7509199936979430017": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4553508439536472227": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "6638696743420807294": ["convolution_gpu_bfyx_gemm_like",2],
+        "2702144517025248597": ["convolution_gpu_bfyx_gemm_like",2],
+        "1720057192283799086": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "4542143431130171516": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "6205240287062600210": ["convolution_gpu_bfyx_gemm_like",2],
+        "13809218391763818477": ["convolution_gpu_bfyx_gemm_like",2],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9261867808456596636": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "16568662638983972991": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "10323345824599612614": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12024318713420323349": ["convolution_gpu_bfyx_gemm_like",2],
+        "7831542641855749925": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13356152596085257346": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10109431802089940590": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "10398572248321217585": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12584870629297848143": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2198100074518629980": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1552088062654417187": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "5485749317130402302": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "4407683781177409314": ["convolution_gpu_bfyx_gemm_like",2],
+        "16747069131271457481": ["convolution_gpu_bfyx_os_iyx_osv16",854],
+        "534032316469702287": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "2213068950786625268": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "17400844732252600825": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "7400370437512056636": ["convolution_gpu_bfyx_gemm_like",2],
+        "1436830013293669148": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8243230863677884952": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4750897775273897282": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14639233649574991406": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "13940433448128376511": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10127598593949337541": ["convolution_gpu_bfyx_os_iyx_osv16",1056],
+        "9660812093766156608": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "4795705973706796563": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "13764532551476584909": ["convolution_gpu_bfyx_gemm_like",2],
+        "14908665013877276517": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "10795104632256101599": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "941829593638869991": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "6555440973226014216": ["convolution_gpu_bfyx_gemm_like",2],
+        "8616686489737649890": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "10377729875228238588": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",2],
+        "15595549493819416194": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "2032438743863827309": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "17303584953298149285": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8036592210244553232": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "15550722997950669458": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8007491455800395118": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "7354234812009979811": ["convolution_gpu_bfyx_os_iyx_osv16",90],
+        "16789135236017252073": ["convolution_gpu_bfyx_gemm_like",2],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",2],
+        "14384062335728088286": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "16202841384048331166": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "9427999492792081454": ["convolution_gpu_bfyx_os_iyx_osv16",128],
+        "8469338060514215816": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "17832542092610191859": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "13291816522762326802": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8104522072297740079": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10127626701775288565": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like",2],
+        "13071545223094862275": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "14799589725341253463": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "17977676737774695825": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "12906669887096343446": ["convolution_gpu_bfyx_gemm_like",2],
+        "17966517080605659454": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11086699387784339943": ["convolution_gpu_bfyx_os_iyx_osv16",495],
+        "2618108630886857741": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "17796867588410764794": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "18395970344992997862": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9367157746678824712": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "7575634241190730697": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11446181888102710561": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "6085098225080533278": ["convolution_gpu_bfyx_gemm_like",2],
+        "4200340674281276565": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "8335501317577461610": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "3991584206721185508": ["fully_connected_gpu_yxfb_ref",2],
+        "11292995457386147494": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "4131527916449986086": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "7505608160068471520": ["fully_connected_gpu_fb_io_ref",2],
+        "6148794431848761670": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "11571049833132558023": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "52089503050497755": ["convolution_gpu_bfyx_os_iyx_osv16",899],
+        "12617625046664709483": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "17130630712943165823": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "3653156933813711765": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "11455518069358829249": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15240660399630429406": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "15531908897773912572": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10771178773821148370": ["convolution_gpu_bfyx_gemm_like",2],
+        "12279591818557049086": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "5290935680520661218": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "16691293834516280510": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "18157442326218165947": ["convolution_gpu_bfyx_gemm_like",2],
+        "15379873910046172004": ["convolution_gpu_bfyx_gemm_like",1],
+        "11345101652477732928": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "16431165572426232677": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "5595802790436774398": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "17267132595546153629": ["convolution_gpu_bfyx_gemm_like",2],
+        "15887484617041779814": ["convolution_gpu_bfyx_gemm_like",2],
+        "12052225815821079044": ["fully_connected_gpu_fb_io_ref",1],
+        "14112695611389738149": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "913496537924971856": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "12831670701606794888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17778706153204631930": ["convolution_gpu_bfyx_gemm_like",1],
+        "116291934148608396": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "9447458159095730492": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "1334121138243951086": ["convolution_gpu_bfyx_gemm_like",1],
+        "13939763360217628282": ["convolution_gpu_bfyx_gemm_like",2],
+        "16303870101043861053": ["convolution_gpu_bfyx_gemm_like",2],
+        "16237775310369180101": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11421235118459218209": ["convolution_gpu_bfyx_gemm_like",1],
+        "5033753554611312392": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "11269720109905550213": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "7777333052643961206": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "517601465150912854": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "5233164031954315264": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "7303492518741737111": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "4134729533276761488": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "5397783260083330774": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "5222741986856655072": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4186140878816408491": ["convolution_gpu_bfyx_os_iyx_osv16",125],
+        "9573589861499897842": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "10987953316324712538": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "5766507688771440170": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "1594612401422787491": ["convolution_gpu_bfyx_gemm_like",2],
+        "8623022306922454565": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3237680963342495368": ["convolution_gpu_bfyx_gemm_like",1],
+        "2446435710311724460": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6670327979947471550": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "15561518067918160695": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "14711697456265712456": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "1852269248476496933": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16001665772103476029": ["convolution_gpu_bfyx_gemm_like",0],
+        "8757900457181374694": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "6902644989079870993": ["convolution_gpu_bfyx_gemm_like",1],
+        "17758354062670710364": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17464785726466943638": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "10754321688472707825": ["convolution_gpu_bfyx_gemm_like",2],
+        "13993045680928507594": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12415368596357091523": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "14749947225382670869": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "5335250793358473555": ["convolution_gpu_bfyx_gemm_like",1],
+        "3037042229494600258": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "16021335552443492452": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "1469048759583678106": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8549811622247170014": ["fully_connected_gpu_fb_io_ref",2],
+        "9816834679089152140": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "425744529089575241": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2054100643811117871": ["convolution_gpu_bfyx_gemm_like",2],
+        "12700957546822808929": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "18020588962875998441": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "6343396486660315308": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4272417312859966238": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6531171505861182429": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "3714179297375678368": ["convolution_gpu_bfyx_os_iyx_osv16",319],
+        "89439319782574517": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "498221230041656321": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",0],
+        "17869928048344193660": ["fully_connected_gpu_yxfb_ref",2],
+        "6439778526899109398": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2881475011209167644": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "16934386540875904239": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "8129414331584785189": ["convolution_gpu_bfyx_gemm_like",1],
+        "6996376303337512293": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "3244402155461139559": ["convolution_gpu_bfyx_gemm_like",1],
+        "17602686382249457351": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "15374625876485618845": ["convolution_gpu_bfyx_gemm_like",2],
+        "13083412418930786217": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15262493122847269333": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3291900073868076610": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "15993651594402422200": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "4265991006340418914": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6080989915764831447": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2649948006897488504": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "9640773327221702885": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "3557182643072772598": ["convolution_gpu_bfyx_gemm_like",2],
+        "6962268765187856246": ["convolution_gpu_bfyx_gemm_like",2],
+        "18402875771862490280": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "6057433908801727873": ["convolution_gpu_bfyx_gemm_like",2],
+        "11828522357351010810": ["convolution_gpu_bfyx_os_iyx_osv16",45],
+        "15245792492785141641": ["convolution_gpu_bfyx_gemm_like",2],
+        "2668985670745598382": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "14046114605615338907": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "16642535448111764945": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17366007551797367227": ["convolution_gpu_bfyx_gemm_like",2],
+        "2470579932413307757": ["convolution_gpu_bfyx_gemm_like",1],
+        "13480393611172760874": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "13414375996946350733": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "10118395047539851751": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "17399103575103078835": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "6642767323474835034": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "11718418772370938734": ["convolution_gpu_bfyx_os_iyx_osv16",843],
+        "11461581290174106570": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "11210371874006224582": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "10093371683053539916": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "15392077168521832549": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "15213473731205734586": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "929378940515745198": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "16306284020664131647": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "9140953654075340568": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "659150305191479097": ["convolution_gpu_bfyx_os_iyx_osv16",902],
+        "10186942318345695432": ["convolution_gpu_bfyx_os_iyx_osv16",648],
+        "6062246008880097669": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "11430400968543668873": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "15456771485750114116": ["convolution_gpu_bfyx_gemm_like",2],
+        "5011190083565902614": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "4479117540570599742": ["convolution_gpu_bfyx_gemm_like",2],
+        "3768977479127609228": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9105949910901552052": ["convolution_gpu_bfyx_gemm_like",1],
+        "16195252193236429176": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13898284586432291433": ["convolution_gpu_bfyx_gemm_like",1],
+        "10726830507311062380": ["fully_connected_gpu_fb_io_ref",1],
+        "6724516766412732606": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16958661630307271135": ["convolution_gpu_bfyx_gemm_like",1],
+        "1187622888238643867": ["convolution_gpu_bfyx_gemm_like",2],
+        "17796784393519192261": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "14749290801006453098": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "12963601040302529291": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "3644282167178264526": ["convolution_gpu_bfyx_gemm_like",2],
+        "11443268857010762276": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "1781619247831135285": ["convolution_gpu_bfyx_os_iyx_osv16",305],
+        "4424258528650299664": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "1996860183441418841": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "3291180926381314705": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "2662628817605495834": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "8641167903508739082": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "15247278167909654073": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "568023964685613279": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "17212292336626940406": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "3202034075645193740": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "16355518852513270001": ["convolution_gpu_bfyx_gemm_like",2],
+        "9172445047535982729": ["convolution_gpu_bfyx_gemm_like",2],
+        "17257466221539644081": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16511261203374835334": ["convolution_gpu_bfyx_gemm_like",2],
+        "13379165253894817165": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "17845905249343189063": ["convolution_gpu_bfyx_gemm_like",2],
+        "1676419079398771261": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3755253206085028904": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "11696708134796103802": ["convolution_gpu_bfyx_gemm_like",1],
+        "9756049510998074315": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "13182965457868586949": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "1474719104479956715": ["convolution_gpu_bfyx_gemm_like",2],
+        "9464448984918455020": ["fully_connected_gpu_fb_io_ref",0],
+        "10344489318472060767": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "8107597524360102037": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "16349083818768061549": ["convolution_gpu_bfyx_gemm_like",2],
+        "3861084063403560668": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5782934278345953016": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",1036],
+        "6534932244936310237": ["convolution_gpu_bfyx_gemm_like",2],
+        "5254115874873721374": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "10169992769527680821": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "8320522112821700316": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9399994156762372761": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "14980327142253281498": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "10995849055789490935": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "2430404993947067949": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "1100681675092122613": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "17829148383265978140": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "956022649859563080": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "13610246822402943068": ["convolution_gpu_bfyx_gemm_like",2],
+        "9559533345689069514": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "7601006550805536675": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "1889171157980977747": ["convolution_gpu_bfyx_gemm_like",2],
+        "6493509887452943215": ["convolution_gpu_bfyx_gemm_like",1],
+        "8075180350084516696": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5088898934670078153": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "61390148213644186": ["convolution_gpu_bfyx_gemm_like",1],
+        "1183774022668948480": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "2294026590516781945": ["convolution_gpu_bfyx_os_iyx_osv16",943],
+        "7969848911698660033": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12494969618927201911": ["fully_connected_gpu_fb_oi_ref",1],
+        "2740834366358352617": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12156683064218448087": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15581678976147496970": ["convolution_gpu_bfyx_gemm_like",0],
+        "4332002982390788477": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "7844764086278702374": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7650874310714729923": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "8484380699802533068": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "10900962238463588974": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7394217382008802567": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13443130482173929700": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4307817040832953223": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "15975964562807570772": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2933183897022161826": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "11341287517759485930": ["convolution_gpu_bfyx_gemm_like",2],
+        "11164600098693999456": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15718782218800307385": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "16504962609450876148": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "9269175963143039426": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4237276338897143680": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11767263058642131204": ["convolution_gpu_bfyx_gemm_like",1],
+        "10295330953350618042": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "17251021943762069083": ["convolution_gpu_bfyx_gemm_like",1],
+        "1249137685908951501": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "14406070210216948643": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "17729546848373991614": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "10205929431600082124": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1824009696938637196": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "2691406689892290663": ["convolution_gpu_bfyx_gemm_like",1],
+        "9144136375141111897": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "14702670413549232065": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11033758130987285174": ["convolution_gpu_bfyx_gemm_like",2],
+        "12696412964119109465": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "609926704263171728": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "1312322903335525510": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9241243727411869340": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "7576873892262851401": ["convolution_gpu_bfyx_gemm_like",1],
+        "14936045362442728963": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "16628679902327485435": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "13112861120841066430": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11810221946429451169": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "9974986004361966590": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13775683667344570223": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15696910741835640150": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "15170578644807800052": ["convolution_gpu_bfyx_gemm_like",2],
+        "868827643007921561": ["convolution_gpu_bfyx_gemm_like",2],
+        "12361848206190267821": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1564774057733793087": ["convolution_gpu_bfyx_os_iyx_osv16",97],
+        "10354305663463607086": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9172699707430374863": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "16362139250976572928": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "16322719022997791344": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "3221221905804708596": ["convolution_gpu_bfyx_gemm_like",1],
+        "16853250891250756537": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "8146559042269976123": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "18009083375897554008": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "16482301217529090205": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9246213432501129631": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "8733109144496806085": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "190530884420224257": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "3021451990778420603": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "844278648549884313": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10286228358844791913": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "13201854669827561901": ["convolution_gpu_bfyx_gemm_like",2],
+        "12184558469694708819": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "2497756607567197523": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "3803179179802002296": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "13248218293365141596": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "41250455178236256": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "2730604806511016352": ["convolution_gpu_bfyx_gemm_like",2],
+        "7044087204529042819": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "14001920054473316909": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "10093554313775878065": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8108939799996498955": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12503605837910457108": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "1452597292381229708": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "32035190068479388": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15971924211584724882": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "16763335832616216769": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "7196214243890296121": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "7102173884859438914": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "16896434896068867157": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "8860815977851486767": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "17608288706234084973": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "15642549417953837059": ["convolution_gpu_bfyx_gemm_like",2],
+        "8484176982872847423": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6643161848623134458": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2794704364476462562": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "875142032423622622": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "16995873636564597028": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "8108843303778211282": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "4593862318851730430": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14463983770858421738": ["convolution_gpu_bfyx_gemm_like",2],
+        "8291770994531919371": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "6538694526777067399": ["convolution_gpu_bfyx_gemm_like",1],
+        "14484890926084856480": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "12894625941923144893": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "15963358868537664345": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "796900095669815456": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "949330876419581703": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "505027953105355818": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5573639264204952559": ["convolution_gpu_bfyx_os_iyx_osv16",501],
+        "1106762955109168526": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "16632447105476661928": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3170274732463232729": ["convolution_gpu_bfyx_gemm_like",1],
+        "88592091379585141": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "11976258954756052550": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "12159582810513550491": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14514450640485628836": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "6471563320494376693": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "10134708781744282286": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "3006428377575478529": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "6737332058785771073": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "9868561386826862471": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4660214425505918397": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6877976003072165363": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17516369849823844076": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "6789547098653828902": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3003526572122876385": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7595481705069674721": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "9805748332775912215": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16580523689587532278": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11407554707582995190": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "8358425189419823078": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "17784357412228522825": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "12916369918132790013": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "18356980026934328781": ["convolution_gpu_bfyx_os_iyx_osv16",1037],
+        "2452226948562393335": ["convolution_gpu_bfyx_os_iyx_osv16",767],
+        "12819626280531787705": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "10231289519907741812": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4157063588837576075": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "8751967016877067287": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "10289725524396556967": ["convolution_gpu_bfyx_gemm_like",2],
+        "17443356777503458523": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "13948512795148364852": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "5951936376654416075": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "3436770797199367854": ["convolution_gpu_bfyx_gemm_like",1],
+        "8479958930889587809": ["fully_connected_gpu_yxfb_ref",0],
+        "16169024543367503806": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "9323825370872655346": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "13140527131098422428": ["convolution_gpu_bfyx_gemm_like",2],
+        "5167141379778311462": ["convolution_gpu_bfyx_gemm_like",2],
+        "13761566845514364807": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "2597453794298356435": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8028456017016080468": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "259085394007031207": ["convolution_gpu_bfyx_gemm_like",1],
+        "13959998803881264899": ["convolution_gpu_bfyx_gemm_like",2],
+        "3686062608868674589": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11462462742322068863": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "9988801796928462423": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "14727155647330710270": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2415478259408761142": ["convolution_gpu_bfyx_os_iyx_osv16",302],
+        "14602509614865844486": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "9289375071420565548": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7440546908141206022": ["convolution_gpu_bfyx_gemm_like",2],
+        "15485011864326008444": ["fully_connected_gpu_fb_io_ref",0],
+        "8470783908138180217": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "17845195044080380488": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "15459849799278480779": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "17721709435558297965": ["convolution_gpu_bfyx_gemm_like",1],
+        "14132860735060026066": ["convolution_gpu_bfyx_gemm_like",2],
+        "15522785615618973614": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6983544541444063131": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "13340998273773542342": ["convolution_gpu_bfyx_gemm_like",2],
+        "3134642518413656360": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2440366541074371090": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "12341291953192305346": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "4986977887030495943": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "16852690434396099861": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "3526198034974948081": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "16053585286807864356": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "4282661608732125403": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "1882052795393187384": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "3273748387141431306": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7617773507561261623": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "4623542918584461522": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "13459568779083836506": ["convolution_gpu_bfyx_gemm_like",2],
+        "13785621878621289403": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1980887257657896260": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "12809199739984715013": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4886289616235149731": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1898776014554946000": ["convolution_gpu_bfyx_gemm_like",2],
+        "4770478662275293849": ["convolution_gpu_bfyx_gemm_like",2],
+        "15117830538655814853": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17178808153714023980": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "1629280013296592298": ["convolution_gpu_bfyx_gemm_like",2],
+        "15110359240685619357": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "16370218798911151331": ["convolution_gpu_bfyx_os_iyx_osv16",287],
+        "13663612869789682704": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14423094456821270228": ["convolution_gpu_bfyx_gemm_like",2],
+        "6820284286806022849": ["convolution_gpu_bfyx_gemm_like",2],
+        "16779678846332091086": ["convolution_gpu_bfyx_os_iyx_osv16",523],
+        "15989730594386153813": ["convolution_gpu_bfyx_gemm_like",1],
+        "6095158932103797740": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "11215217005872946038": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "7399775379344444344": ["convolution_gpu_bfyx_os_iyx_osv16",315],
+        "13381833588713493653": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3380653500106294036": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "18125732229366977468": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13613399861925108148": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "9981938305144461962": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4519609440668743423": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "15097371415144491976": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "12338760476079493547": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "13933912937625580405": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "17126714253919198029": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "2341006744107937832": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "9819596940685093690": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "8881135571874888085": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",2],
+        "3063055767192991776": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "18178391985193947355": ["convolution_gpu_bfyx_gemm_like",2],
+        "4161612746310931789": ["convolution_gpu_bfyx_gemm_like",2],
+        "16509472637458153234": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "11641605357868918146": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "562221645849170027": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "11561790484526369917": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "3658149289395969504": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "8818070832398055086": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5509631031571317557": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "5357531127711906072": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "8994777547915132466": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "2687781952021151359": ["convolution_gpu_bfyx_gemm_like",1],
+        "18083041911869525296": ["convolution_gpu_bfyx_gemm_like",2],
+        "9876098429582714576": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12466721526829931923": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10848407542826653699": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "16808618754363181939": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10436819182310112786": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "7657964685067862984": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13141069720428059461": ["convolution_gpu_bfyx_gemm_like",2],
+        "15831600396403741571": ["convolution_gpu_bfyx_gemm_like",1],
+        "1138439260035360722": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "18184154104081850641": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "2338707843044884352": ["convolution_gpu_bfyx_gemm_like",1],
+        "13850920989756588064": ["convolution_gpu_bfyx_gemm_like",2],
+        "17176310030469904708": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9146427497025645310": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "11291881629276762730": ["convolution_gpu_bfyx_gemm_like",1],
+        "9850711648349010674": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7172604084103519563": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "700717277178942679": ["convolution_gpu_bfyx_gemm_like",1],
+        "6827316954140278736": ["convolution_gpu_bfyx_os_iyx_osv16",125],
+        "13054405729329143152": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "8509941319309380587": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "16488426854651696706": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "10432925516327889351": ["convolution_gpu_bfyx_gemm_like",1],
+        "10600040563032392126": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "11511221956203704038": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "11469881811044037340": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "13839590781642269381": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7508931961595339477": ["convolution_gpu_bfyx_gemm_like",1],
+        "10500029207807372735": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "14330281759626724494": ["convolution_gpu_bfyx_gemm_like",2],
+        "7419216766190700536": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "5585398540591396124": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "17089801601582809764": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "3404911902272307873": ["convolution_gpu_bfyx_gemm_like",2],
+        "17489420766684604600": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "18196676408993954972": ["convolution_gpu_bfyx_os_iyx_osv16",695],
+        "10186866999254188246": ["convolution_gpu_bfyx_gemm_like",1],
+        "4817953977830392054": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "2930702812469156271": ["fully_connected_gpu_fb_io_ref",1],
+        "16549498607618849252": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "11855777686733253894": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "4936968239673204144": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "11988463489006787939": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10178951466584845110": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13326233188936584240": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "2194607895573544953": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "11290558687608213321": ["convolution_gpu_bfyx_gemm_like",2],
+        "12366546292695084543": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "11267742746905371769": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "16582080251500644069": ["convolution_gpu_bfyx_gemm_like",2],
+        "18113235498360281695": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16851949759898002809": ["convolution_gpu_bfyx_os_iyx_osv16",648],
+        "14233388108948021331": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "12434799432980627966": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16192971634546462244": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "4652136280940317116": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "7744644472305197412": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16733587306017341904": ["convolution_gpu_bfyx_gemm_like",2],
+        "10089588313551601914": ["convolution_gpu_bfyx_gemm_like",2],
+        "14397348576352573007": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "11823106525249133834": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13122637768866153753": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "10110359677546019738": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "4342360467977736802": ["convolution_gpu_bfyx_gemm_like",2],
+        "2937907409658060025": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12312291300513951124": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5989664002046950385": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "2346855978590136528": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "1372939511728986224": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "6491244517639245276": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17025268985366223779": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7076937538747704750": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "3971456598769336038": ["convolution_gpu_bfyx_gemm_like",2],
+        "5329218407413679209": ["convolution_gpu_bfyx_gemm_like",2],
+        "18171940644650760608": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "850343942782057099": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "8121179472578287280": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "11215862132334892351": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "13453226687921450129": ["convolution_gpu_bfyx_gemm_like",2],
+        "1056009037551688122": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2816353973187452604": ["convolution_gpu_bfyx_gemm_like",2],
+        "18273922178875123753": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "6904130543085920483": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "2028273519579688266": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "6578908625437515675": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "6233455595448276342": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "13184662326021747000": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11185041745377164894": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "4887402175773881313": ["convolution_gpu_bfyx_gemm_like",1],
+        "3192332625020432602": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "18259656768460999562": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "3086110559166474482": ["convolution_gpu_bfyx_gemm_like",2],
+        "3234567405788241673": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "814227839929688672": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "7565221050911842393": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9942726414918759892": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7771969115805231266": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "17622515300258231642": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "11806105193035393795": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "17715553891959228879": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "11829442945690098558": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2103882464623009432": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4488336106517889531": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "13320828013530046693": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "9141802671320572984": ["convolution_gpu_bfyx_gemm_like",2],
+        "16170237673140354764": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "9933958860597451711": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "8616175124735896626": ["convolution_gpu_bfyx_gemm_like",2],
+        "8482147530539941792": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "17269318621094624075": ["convolution_gpu_bfyx_gemm_like",2],
+        "1529658068204046700": ["convolution_gpu_bfyx_gemm_like",2],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "15317510501392280831": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5688478347124565305": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "447152944190888653": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7817036102984218692": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7500192998744460131": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "14606504543906913119": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "3930526618478171342": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14429081455612806819": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "10455850115486014344": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "6458124573210430792": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6210051945051792519": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "14025496192869856801": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "15451193085395494344": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "5163641718529821203": ["convolution_gpu_bfyx_gemm_like",1],
+        "7104756264011682902": ["convolution_gpu_bfyx_gemm_like",1],
+        "11374410888638324212": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16661248688859994717": ["convolution_gpu_bfyx_gemm_like",2],
+        "3518981281605476136": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "628191607060767879": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "10413043556440687328": ["convolution_gpu_bfyx_gemm_like",2],
+        "911927861489659568": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6561864486643226753": ["fully_connected_gpu_fb_io_ref",1],
+        "17494823614269622175": ["convolution_gpu_bfyx_os_iyx_osv16",1031],
+        "8071652278387309042": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "4805958162773855302": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "16666383605403885590": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "1410512481031922864": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "7033442247935655919": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5120466856097219243": ["convolution_gpu_bfyx_gemm_like",1],
+        "12141880589558027223": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2328698995040390396": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15410074937424854348": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "4195847890935259046": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "3923715765392385764": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2348721939771018658": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8500612796090968552": ["convolution_gpu_bfyx_gemm_like",1],
+        "13695012630130671371": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "5475537064464968733": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "6914536960012332706": ["convolution_gpu_bfyx_gemm_like",0],
+        "3242468066266096173": ["fully_connected_gpu_fb_oi_ref",2],
+        "8817624284607822971": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "8453402620168400406": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "12087141795291232248": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10416622008071151225": ["convolution_gpu_bfyx_os_iyx_osv16",546],
+        "5934211962000091180": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "10178462061836778766": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "9810703513111623136": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "8870736106637803783": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "6040286126398028933": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9628702542543622433": ["convolution_gpu_bfyx_os_iyx_osv16",567],
+        "14845194064376163156": ["convolution_gpu_bfyx_gemm_like",1],
+        "8296551195150971668": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3436576388124386308": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "16711955423531846725": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "17152100243867367458": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "2281119269283845320": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "4614042998549572181": ["convolution_gpu_bfyx_gemm_like",2],
+        "7807168142899312025": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "12150109996250730485": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "13553045975561262752": ["convolution_gpu_bfyx_gemm_like",2],
+        "435888248913413834": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16683909937519981313": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "14174888981602932979": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "16352331970945217438": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "7441199361135503715": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6988674007771237080": ["convolution_gpu_bfyx_gemm_like",2],
+        "1091511312740979158": ["convolution_gpu_bfyx_gemm_like",2],
+        "9134203155715293387": ["convolution_gpu_bfyx_gemm_like",2],
+        "17089332981370803321": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "16434635675895599016": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "5186963188234940985": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2683507674615735878": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16951050796024922417": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2842103889477438816": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13395074742046717601": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "14284223645235602230": ["fully_connected_gpu_fb_io_ref",2],
+        "10861525139715322534": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "13248567106128518549": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "11739629316219263056": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14797994820826922836": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "1743572310914695413": ["convolution_gpu_bfyx_gemm_like",2],
+        "14363654136811880073": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9579316322704307175": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10131754493574658838": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13273455049742872922": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "15085980226773631346": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "15325810055037682679": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17542414935564676110": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "13447226378200557777": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "3075961585045028347": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11851216776536423298": ["convolution_gpu_bfyx_gemm_like",2],
+        "12251901229904154127": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12716923819769400487": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "3438852523146175580": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3638987901025418036": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "11856266545854830143": ["convolution_gpu_bfyx_gemm_like",2],
+        "10445587307296180364": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6692408578556372014": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16053383948025511837": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "7703363154993904399": ["convolution_gpu_bfyx_gemm_like",2],
+        "5632101951796129342": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3666268650646000870": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "10551742525038893508": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2065752819810364738": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "153117141968471446": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "6313048719388952335": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "5981885264666023260": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11462394098346770463": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "1698847067049584068": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "4046513842327685203": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16181974394948732584": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "2431427502927207912": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15119063070382146368": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "5023609284081684300": ["convolution_gpu_bfyx_gemm_like",2],
+        "5797545757863100286": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "3853598651573655548": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "16036386660666696362": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "17332395907621747512": ["convolution_gpu_bfyx_os_iyx_osv16",658],
+        "5524218746051008792": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "6981537186704688907": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "7162701010394257343": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "13383524675055536682": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4099859307693687554": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "7105622384646913935": ["convolution_gpu_bfyx_gemm_like",2],
+        "1908733355560815063": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "12278842522836720245": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7210729932836957540": ["convolution_gpu_bfyx_gemm_like",1],
+        "2239948568632407776": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8337820318779061494": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "5312140481706133684": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "582386337144876096": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "4569416043426963318": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "17921616427936768657": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "2354885756165078342": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "11915835787294686201": ["fully_connected_gpu_fb_io_ref",2],
+        "11588201241814594642": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "17171513366028235799": ["convolution_gpu_bfyx_gemm_like",2],
+        "1313038182637545943": ["convolution_gpu_bfyx_gemm_like",2],
+        "14066660382918185188": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "17810119189318801197": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "884923290083082187": ["convolution_gpu_bfyx_gemm_like",1],
+        "2786925522916317149": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "10701231567226563098": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11260588538207111217": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "6048964584602891448": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "3256940792095638732": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5041111302824362529": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "10156210866362845661": ["convolution_gpu_bfyx_os_iyx_osv16",300],
+        "16482763280295827563": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "13661225837036677371": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7351733901977025859": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "11569367085498045793": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "12324580272733221544": ["convolution_gpu_bfyx_gemm_like",2],
+        "10885831773581103653": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "14897935118679731283": ["convolution_gpu_bfyx_gemm_like",2],
+        "6413565827738894970": ["convolution_gpu_bfyx_gemm_like",2],
+        "17221173795372066030": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "18116824232149703772": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10472893418729915556": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13090596133852586482": ["fully_connected_gpu_fb_io_ref",2],
+        "10274587614581350261": ["convolution_gpu_bfyx_gemm_like",2],
+        "10831204282620894983": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "10394041365384258612": ["convolution_gpu_bfyx_gemm_like",1],
+        "16843976559933040107": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "1051506168926530904": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "11728824117049687850": ["convolution_gpu_bfyx_gemm_like",1],
+        "346832567535597247": ["convolution_gpu_bfyx_os_iyx_osv16",515],
+        "17934338042329576850": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14046990030104971367": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "15715522462313302642": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "6569793510829850291": ["convolution_gpu_bfyx_gemm_like",2],
+        "5115134711994944288": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "15711618559677233865": ["convolution_gpu_bfyx_gemm_like",2],
+        "15136557970717196814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6603817696964851209": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "9104236539185546468": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "7247414730479113619": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "1314612539156304342": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "5368419079251107469": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5622089373755094139": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "16723949803487501587": ["convolution_gpu_bfyx_gemm_like",1],
+        "15640202505592598653": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "17258278942367320412": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "11872894645888259277": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6889498170947481097": ["convolution_gpu_bfyx_os_iyx_osv16",517],
+        "9667762333290150436": ["convolution_gpu_bfyx_gemm_like",2],
+        "12797434473085560369": ["convolution_gpu_bfyx_gemm_like",1],
+        "10025839973092358719": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "18086782289842715645": ["convolution_gpu_bfyx_gemm_like",2],
+        "10880656082867082647": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "14108091242461324109": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "12478041902013146137": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "5375957124102705020": ["convolution_gpu_bfyx_gemm_like",2],
+        "5122639094068865656": ["convolution_gpu_bfyx_gemm_like",2],
+        "3741411131962514208": ["convolution_gpu_bfyx_gemm_like",0],
+        "5504757952698692953": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "8376077531098664520": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12515465135362865565": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "17221958812979739319": ["convolution_gpu_bfyx_gemm_like",2],
+        "10492056481694320580": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "11075875009517060583": ["convolution_gpu_bfyx_gemm_like",1],
+        "13973179950424276578": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "4209610989252810404": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "10328182165125764988": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "1604661321386793876": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "7883469783245625654": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "8460847842045253466": ["convolution_gpu_bfyx_os_iyx_osv16",388],
+        "12814676907278614920": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "15702382940521972117": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "15402502830461368746": ["convolution_gpu_bfyx_gemm_like",2],
+        "10023279637210292010": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "1104098779103065492": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "6423120553520000795": ["convolution_gpu_bfyx_os_iyx_osv16",475],
+        "15759530339367380982": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "10392297152843428925": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "374553246608550876": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "642256034968512602": ["convolution_gpu_bfyx_os_iyx_osv16",687],
+        "1701609125136907870": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "761169277744593430": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13753473508578037346": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "7059809764116926828": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "15291457825664605611": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "1817929353109443200": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10182490653383265979": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "2660620513253264815": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "13116746433291181712": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "8017024160145338317": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "2407509127927738079": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12345000525470836335": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "7107313154723472157": ["convolution_gpu_bfyx_gemm_like",1],
+        "17116130466596594359": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "6096189754478965440": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "9100044555742394133": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "389822325870173489": ["convolution_gpu_bfyx_gemm_like",2],
+        "12608653044712562811": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "17827762625385383658": ["convolution_gpu_bfyx_gemm_like",1],
+        "1103228955716492167": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6744583842563891546": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "4830121683809417143": ["convolution_gpu_bfyx_os_iyx_osv16",939],
+        "14400339764883906933": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "593712935037568960": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "6100453836448514115": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "460780635491857522": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8054185159612481260": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10468108569766167175": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10290107543739998181": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "12881836161162762524": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "12098146032672599222": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "10533367671706069274": ["convolution_gpu_bfyx_gemm_like",2],
+        "2616828683870391718": ["convolution_gpu_bfyx_gemm_like",2],
+        "18215260982292770252": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17915846724151945664": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "15308667224953963012": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "7678730081652720605": ["convolution_gpu_bfyx_os_iyx_osv16",121],
+        "7536267099632318821": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "3649980610274946512": ["fully_connected_gpu_fb_io_ref",0],
+        "14642845734482478360": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17550795608527501180": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "3883845471211207871": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "8090497202997192142": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15300588247579013966": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "12940491379482292807": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "761183183078910587": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "1451466106918423837": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "3180320769716158201": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1154228007901031779": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "10783981060353445280": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "9853089109234784643": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "9151597254187513724": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "14472187692485966933": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "592364460086746355": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14959566236432790882": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "14560435854055940143": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "2534408579674556441": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1697248235682953135": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "970596838400633278": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "16474284418841532356": ["convolution_gpu_bfyx_gemm_like",2],
+        "10131771849139346986": ["fully_connected_gpu_fb_io_ref",1],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "12412224630798427948": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "9378419102254633989": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "17543094050285028967": ["convolution_gpu_bfyx_os_iyx_osv16",348],
+        "15095146351334328804": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5211191663202250117": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6763373100985812924": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "9126242742012768166": ["convolution_gpu_bfyx_gemm_like",2],
+        "9501165931845934084": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8200094670006738584": ["convolution_gpu_bfyx_os_iyx_osv16",695],
+        "13091799752362714688": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "3001615302961701154": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10136369729388564720": ["convolution_gpu_bfyx_gemm_like",2],
+        "7654445730724243959": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4142978475842207311": ["convolution_gpu_bfyx_gemm_like",2],
+        "15936513690378208182": ["convolution_gpu_bfyx_gemm_like",2],
+        "2510919738337557939": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14157776769026046014": ["fully_connected_gpu_fb_io_ref",1],
+        "2888587871912905870": ["convolution_gpu_bfyx_os_iyx_osv16",45],
+        "15107740124884150777": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13852065717057446998": ["convolution_gpu_bfyx_gemm_like",2],
+        "15101680837342453931": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "8848042913869254179": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "5419041493176804960": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "7162575953766465459": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "2538377242539785672": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11047625525388102466": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "194324011642969540": ["convolution_gpu_bfyx_gemm_like",1],
+        "15160738482264643601": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "16159309494101203811": ["convolution_gpu_bfyx_gemm_like",2],
+        "2299440282267661763": ["convolution_gpu_bfyx_gemm_like",2],
+        "2451603338483395600": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "17044070592136685322": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10994887986667360638": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "7450915928720828406": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "2004691166378443418": ["convolution_gpu_bfyx_gemm_like",2],
+        "2595273700611743351": ["convolution_gpu_bfyx_gemm_like",2],
+        "12175796957622122377": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "11190259822407791373": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5116633474932727191": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13821388909343378606": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "8997120235555587461": ["convolution_gpu_bfyx_gemm_like",2],
+        "2355214244972870639": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "5673972310424776040": ["convolution_gpu_bfyx_gemm_like",2],
+        "9182897385081081193": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "6681818065741882453": ["convolution_gpu_bfyx_gemm_like",2],
+        "10267260789603562117": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "16797936364395702812": ["convolution_gpu_bfyx_gemm_like",2],
+        "11324851661119942609": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "5957444113623953990": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14566544143931267758": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7391591731082133842": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15592321818359223008": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7881579844586294503": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "15997754881872769378": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "15688260390755491480": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "444533022549215983": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5629373398445592781": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "12930435393720466720": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5091558853871982858": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "9300767936311837876": ["convolution_gpu_bfyx_gemm_like",2],
+        "11756881293845417212": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6323504675912413145": ["convolution_gpu_bfyx_gemm_like",2],
+        "6364288463529107554": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "3089303702413279458": ["convolution_gpu_bfyx_gemm_like",1],
+        "13418701036204748812": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "8873424072104563382": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "5085190482265319015": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "10384537928514123040": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "18424912460022156378": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "15650839696475698676": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "8075453526439606224": ["convolution_gpu_bfyx_gemm_like",2],
+        "9988347141056982336": ["convolution_gpu_bfyx_gemm_like",2],
+        "18146068930296529306": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3809343305878998617": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "7304346312452588844": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "1096671695414716274": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "15670841106242481912": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "5516343490635816913": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5552958912776013600": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "6717268005860715462": ["convolution_gpu_bfyx_gemm_like",1],
+        "15154934905173371714": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "6919081291036849635": ["convolution_gpu_bfyx_gemm_like",0],
+        "13599555566632152241": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5584432943673435454": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "5077214229434392730": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7431069335622070596": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14088382963493477342": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "2105482100745329286": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "4108579755980014185": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7009735776703529573": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "12360796145248339074": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "11318404975804457466": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15596913527233792996": ["convolution_gpu_bfyx_gemm_like",2],
+        "4474697990228400564": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15163327502374403643": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "11674630830833831209": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12089505956882731481": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "16811402686462277562": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5970516037710024187": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "6377828127090689238": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "4213330047036138895": ["convolution_gpu_bfyx_gemm_like",2],
+        "15908673392788376468": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "11931909191490706784": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12706645084970410965": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12631385844456089132": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10205576142280465189": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "4347816192417741558": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "14776308019009874809": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "4557272439632791722": ["convolution_gpu_bfyx_gemm_like",2],
+        "8939683514448064461": ["convolution_gpu_bfyx_os_iyx_osv16",148],
+        "8307147375351882939": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "10997029728191881587": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15426960908024585800": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "14043770215999952932": ["convolution_gpu_bfyx_gemm_like",2],
+        "17765244777397448823": ["convolution_gpu_bfyx_gemm_like",2],
+        "13906695412889750672": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "12397493112115605421": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "2043990557089419633": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11047327014045909812": ["convolution_gpu_bfyx_gemm_like",2],
+        "360872770877634346": ["convolution_gpu_bfyx_gemm_like",2],
+        "16168891366331544806": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11825205449232126827": ["convolution_gpu_bfyx_gemm_like",2],
+        "6680219899975628258": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "11996551650886043090": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "12691733869577147545": ["convolution_gpu_bfyx_gemm_like",2],
+        "761984225415608773": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14545322358931928911": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "3286476039871096924": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3167336012388169649": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "12878858391355259417": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2460361970017706505": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "11623764266322172086": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "9852052796465340830": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "5559417017584278927": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "10157866834809927320": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "3138374672801504481": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "15470979879166640563": ["convolution_gpu_bfyx_os_iyx_osv16",1022],
+        "5240181393417899912": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "8500148569566077929": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "4030835922805418609": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "12380856644683171627": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13121196588092064246": ["convolution_gpu_bfyx_gemm_like",2],
+        "16816222375242496370": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16626226341188424071": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "1338534626640014074": ["convolution_gpu_bfyx_gemm_like",2],
+        "16112835627818488034": ["convolution_gpu_bfyx_gemm_like",2],
+        "12013883366396753346": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9777638299795801012": ["convolution_gpu_bfyx_gemm_like",2],
+        "1652781065871883392": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5481293245081340756": ["convolution_gpu_bfyx_gemm_like",1],
+        "2888315406857606108": ["convolution_gpu_bfyx_gemm_like",2],
+        "13951717514084457087": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "2415883693527779570": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "7953340333870774815": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10971971008143485353": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "5842284971563375197": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "18076018773227225156": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "832830374368320801": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "2724007091383127418": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "5115661026367632863": ["convolution_gpu_bfyx_os_iyx_osv16",765],
+        "9632178829095307219": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "1429370139030130929": ["convolution_gpu_bfyx_gemm_like",1],
+        "12478496773222604204": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "6634330132674952638": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "8467771025017377254": ["convolution_gpu_bfyx_gemm_like",2],
+        "685140170576742460": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "14704939880642470064": ["convolution_gpu_bfyx_gemm_like",2],
+        "17264554677210911187": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8549465639583777774": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "2915165824085219545": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "1155389358857780776": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "6181272224000872375": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1350402181555441235": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "9552312946391901745": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17995371099806008878": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13300022131572486202": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "5294364781478821403": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "16985565646738638215": ["convolution_gpu_bfyx_gemm_like",2],
+        "14545094765855515974": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "157805434489791310": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "16896833230469488924": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11860902750907076009": ["convolution_gpu_bfyx_gemm_like",1],
+        "3790881125495367946": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2072246877651869428": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "16125965158927145599": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "6748628505489041229": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3119235799568225015": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "5094600092408024387": ["convolution_gpu_bfyx_os_iyx_osv16",939],
+        "14057348639391787117": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13973028408397200796": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "3504421925108785018": ["convolution_gpu_bfyx_gemm_like",1],
+        "11284755586130392759": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "14810839157236175179": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "10295400862890021635": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "3830787224073518842": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "6586833064055001967": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "5191016422297403500": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15160192060731796225": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "10858234923346500323": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8913451832923806760": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7264756313770306662": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "11754316727756881612": ["convolution_gpu_bfyx_os_iyx_osv16",475],
+        "381149736509958403": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "17723621158215826108": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "11878200328276635385": ["convolution_gpu_bfyx_gemm_like",2],
+        "8253823502854784432": ["convolution_gpu_bfyx_gemm_like",2],
+        "12270548292992377827": ["convolution_gpu_bfyx_gemm_like",2],
+        "17881905640473324965": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "5018845267269043034": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "2183193161596798350": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5763440554939527411": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "2737738314051715813": ["convolution_gpu_bfyx_gemm_like",2],
+        "15434536162164591656": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "14743760934522111296": ["convolution_gpu_bfyx_gemm_like",1],
+        "578940134826172063": ["convolution_gpu_bfyx_gemm_like",2],
+        "12297371032753209816": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "10842828403850880541": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "2198278382394812839": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "10754450245035836188": ["convolution_gpu_bfyx_gemm_like",2],
+        "6585223640997887253": ["convolution_gpu_bfyx_gemm_like",2],
+        "1226681724476075216": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15012744672096562609": ["convolution_gpu_bfyx_gemm_like",1],
+        "12024416333474523686": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "14366861063858001106": ["convolution_gpu_bfyx_gemm_like",2],
+        "14872992823083730615": ["convolution_gpu_bfyx_gemm_like",1],
+        "4104803308438043557": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8557939065994799094": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "3725060015826635697": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "941626985322260281": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "10046663998164493552": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6982733543386888622": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "8676627474831455650": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13312514874803986753": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "15342520770460205985": ["convolution_gpu_bfyx_gemm_like",2],
+        "12061391584831995030": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "18420783889227814721": ["convolution_gpu_bfyx_gemm_like",1],
+        "14104238386345631681": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "8751016391945753900": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "5175845410753897614": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4318632837402329958": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9048522050692986204": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12608839247035566137": ["convolution_gpu_bfyx_gemm_like",2],
+        "1081969835308672753": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9340606088243696490": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "8143125165478395106": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4265693151382066296": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "10727592780669452048": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "948917645960296825": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15733030371524967129": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18325123280144403295": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "7430073011895298582": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "4642234334824303290": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "3006979228759768702": ["convolution_gpu_bfyx_gemm_like",2],
+        "9899897639161550704": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "15516674573659704770": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "17675227620234837075": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8774613863662947205": ["convolution_gpu_bfyx_os_iyx_osv16",113],
+        "411914986559525749": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "13994738382469480124": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "6261584163347634965": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8648848365873958010": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "16442107352245114876": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "17358462939783262207": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10117784802089387496": ["convolution_gpu_bfyx_gemm_like",2],
+        "1021364163511049664": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15576932271488848457": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16234606052818596502": ["convolution_gpu_bfyx_os_iyx_osv16",468],
+        "13025323039227543550": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "8901432555239515645": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "8127190765748950828": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "17489255290900178723": ["convolution_gpu_bfyx_gemm_like",2],
+        "6819846227498139601": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12967849866710811070": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "16484600784717969318": ["convolution_gpu_bfyx_gemm_like",1],
+        "7904735292914337507": ["convolution_gpu_bfyx_gemm_like",2],
+        "1346716334208025932": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "16419903786705052849": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16954232936536653281": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "6140789642561898454": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "17285815901490707654": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "10396343030099602596": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "6522575549211855712": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "16945184617367657570": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "11306782565667740785": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "16881283637687482989": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "8114928396876060694": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "17598441149165536737": ["convolution_gpu_bfyx_gemm_like",2],
+        "13189392239349392492": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "3524531620118359828": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "11413890625163220846": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "3811325657214369711": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "11798081355131440794": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "14763015336626099830": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2808205041095636198": ["convolution_gpu_bfyx_gemm_like",2],
+        "11006325877486632502": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "1938086876393565238": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "11135894989941122115": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16818714747882774917": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "1197101651805223230": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5754301693527535975": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15884763176333003771": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "9940300152880498818": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15809072026388479729": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "2525260242689556544": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "1462775202780029067": ["convolution_gpu_bfyx_gemm_like",2],
+        "4408772370026995920": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "4800587664660105589": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "15096978026328154490": ["convolution_gpu_bfyx_gemm_like",2],
+        "14945451027055549800": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2411809718611709031": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "6364765994481977132": ["convolution_gpu_bfyx_gemm_like",2],
+        "7606716827635769887": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "759816003617478606": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "8100051552977329013": ["convolution_gpu_bfyx_gemm_like",2],
+        "16706121580364790904": ["convolution_gpu_bfyx_gemm_like",2],
+        "16061176355133391199": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "11670430946096342056": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "16801078648431425148": ["convolution_gpu_bfyx_gemm_like",2],
+        "16497757978901707098": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "7230623964042057933": ["convolution_gpu_bfyx_gemm_like",2],
+        "15461879919099373703": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15331103261044247142": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "13671635457689276237": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "5157249499936659040": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "8149815705026829258": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5115051214738974496": ["convolution_gpu_bfyx_gemm_like",2],
+        "846088275031979661": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "17208186152576814861": ["convolution_gpu_bfyx_gemm_like",1],
+        "13502487084912428404": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "75742659105146536": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "12972406304361050136": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "2623687018437195679": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "2451627421465368826": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17793292063552633023": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "1145700078649932035": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11932768899981458741": ["convolution_gpu_bfyx_gemm_like",2],
+        "17188170051014066220": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "13073917160317338455": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "18156747282906367814": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "18355551625040856531": ["convolution_gpu_bfyx_gemm_like",1],
+        "9657585348407617520": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "10841786394951910408": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "9462315044265139531": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8897786294680986991": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "9067207838429479363": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "11772741918108731396": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "17430593168191424639": ["convolution_gpu_bfyx_gemm_like",2],
+        "11446745541571732900": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11523864029587161089": ["convolution_gpu_bfyx_gemm_like",0],
+        "17847109385592002207": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "6181651715051152713": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2940027113687311893": ["convolution_gpu_bfyx_gemm_like",2],
+        "4017163133829149027": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3320392060021963536": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "368147139706197757": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5381578460674280089": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "4915831715914920982": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "5523778675167321193": ["fully_connected_gpu_fb_io_ref",0],
+        "2597435203284675496": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2081318772333460627": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "3234107167862677811": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "10499265278415026816": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17316626950179740845": ["convolution_gpu_bfyx_os_iyx_osv16",564],
+        "2669822154816760632": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "9104710269725948935": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "10447427622114317323": ["convolution_gpu_bfyx_os_iyx_osv16",939],
+        "10263861857115868555": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "14561847633011875566": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5169676188205309169": ["convolution_gpu_bfyx_gemm_like",2],
+        "13649894122307008732": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "11192356850081328892": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8739347545059610410": ["convolution_gpu_bfyx_gemm_like",2],
+        "3170336071769787200": ["convolution_gpu_bfyx_gemm_like",1],
+        "1938627662342504660": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "13505239531682993049": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11327678075247102542": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2585767464396438954": ["convolution_gpu_bfyx_gemm_like",2],
+        "3377472614945731801": ["convolution_gpu_bfyx_gemm_like",2],
+        "7838176322738051195": ["convolution_gpu_bfyx_os_iyx_osv16",856],
+        "7520300815632157008": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "5124291229936820926": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "13384754476437374504": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "17182839667242694171": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "2098357709530580176": ["convolution_gpu_bfyx_gemm_like",2],
+        "10856527039674342926": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like",2],
+        "5658491804782285708": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2510093757258898215": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "13817553830305981296": ["convolution_gpu_bfyx_gemm_like",1],
+        "17353894529222574441": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "12730339458081890990": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17923035110851963413": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5627834277145735283": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "8468092944055919238": ["convolution_gpu_bfyx_gemm_like",2],
+        "2893564501191050837": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "218070270815606832": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3124997104810767514": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "16565784556269819846": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "16429816273405099453": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4456004887590847716": ["convolution_gpu_bfyx_gemm_like",1],
+        "11006013403687198405": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "14431607479949498164": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7802311886554362782": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "697609699740088622": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "15641049130597645936": ["convolution_gpu_bfyx_gemm_like",2],
+        "17287487062245049466": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "7575675354187625951": ["convolution_gpu_bfyx_gemm_like",2],
+        "12675313398314286884": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "18348301285923584995": ["convolution_gpu_bfyx_gemm_like",2],
+        "11098189888598804624": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13015379405020620466": ["convolution_gpu_bfyx_gemm_like",2],
+        "17287404861045114619": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "13045206675957093567": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "2479282650381163888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16053441017037949431": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "10451904743064959757": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5902427784683046762": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "1006721963560645335": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17243953172314194409": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "17223169013008075474": ["convolution_gpu_bfyx_gemm_like",2],
+        "17854578307286932628": ["convolution_gpu_bfyx_gemm_like",2],
+        "7024495439434892956": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "14008438372661779490": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "7958595516465029682": ["convolution_gpu_bfyx_gemm_like",2],
+        "426267761240826769": ["convolution_gpu_bfyx_gemm_like",1],
+        "241860795253927746": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "5381354625969068789": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7937517564893685647": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "8166976803757624321": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17163595630291422874": ["convolution_gpu_bfyx_gemm_like",2],
+        "3502889736327580141": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "17338623890209792485": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "3362829461757548683": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "16865271154583564899": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "17185089684685480638": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "2702566744272427570": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7712831597869354170": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12219239604684537521": ["convolution_gpu_bfyx_gemm_like",1],
+        "9318550032135064372": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5303170164698694791": ["fully_connected_gpu_bf_io_gemm",2],
+        "805221045541170643": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "9216608098626790565": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "6494837659483504443": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6621483425195088869": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "5458310740719324710": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "12840204133991239572": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "4282198629458668761": ["convolution_gpu_bfyx_gemm_like",2],
+        "15247381586316467097": ["convolution_gpu_bfyx_gemm_like",2],
+        "15715775011639091549": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "5065071428884648135": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "7457899998356343871": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "11992625045241269569": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "4296524295134959042": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "18384657372655350144": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "2912984501615111849": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2103507679502667581": ["convolution_gpu_bfyx_os_iyx_osv16",752],
+        "1532263118203058517": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "4597954342704466825": ["convolution_gpu_bfyx_gemm_like",1],
+        "5567670507334783760": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3561366509539440079": ["convolution_gpu_bfyx_gemm_like",1],
+        "1364905900191854779": ["convolution_gpu_bfyx_gemm_like",0],
+        "1339402691552717009": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "15602863681196390535": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "15678329601718218341": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "9005351264094503686": ["convolution_gpu_bfyx_gemm_like",2],
+        "3518605747492037670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8295126647635181949": ["convolution_gpu_bfyx_gemm_like",2],
+        "16359282790151128772": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "360764089318153518": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "15834666915651997510": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "11851526665791263153": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "13007534905441600782": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16323870023648254366": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "17190698921280188790": ["convolution_gpu_bfyx_gemm_like",2],
+        "9753702905908744910": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "56327004269432885": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15936869458531244961": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "13702914647519703599": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11459784003592366395": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2572395498687401679": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "296142385116663420": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7546586420552408243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14089893422771228191": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "9004823715680825977": ["convolution_gpu_bfyx_gemm_like",2],
+        "17001502418583498926": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "6505706083205285176": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "11528310408333718862": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8971115542951085891": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "14074914477149374595": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "12238674883388043717": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "8131682691875884781": ["convolution_gpu_bfyx_gemm_like",2],
+        "6302958994152837045": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "11070620435959083971": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "3177362994630209421": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9780938731831129283": ["convolution_gpu_bfyx_gemm_like",2],
+        "3159681096461848644": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "17795554443343871443": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "7549378486471456156": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "545425355231744794": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3885931890288969926": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "1054159213127890689": ["convolution_gpu_bfyx_gemm_like",2],
+        "12664952811642406457": ["convolution_gpu_bfyx_os_iyx_osv16",569],
+        "2080397907007737054": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "16494403731659808258": ["convolution_gpu_bfyx_os_iyx_osv16",540],
+        "14716719350966652036": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "14258499419905714808": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "1119928633562250911": ["convolution_gpu_bfyx_os_iyx_osv16",947],
+        "7713736987017889212": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "14939750655636313880": ["convolution_gpu_bfyx_gemm_like",2],
+        "1646362346584649954": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "13192885349640152576": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "16025442470600124062": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "14133958262039763609": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "17142080999569154649": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "13394233139064923018": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9410978119783758141": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "2047041720569246861": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "5454796925594082324": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",0],
+        "17700958439420868719": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "12972634653821069685": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "1682776041247037802": ["convolution_gpu_bfyx_gemm_like",0],
+        "10624567684389583173": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "17959539037614502049": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1954052357826969119": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "16956980254113285457": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "922541506531537121": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6447172410311223671": ["convolution_gpu_bfyx_gemm_like",1],
+        "15052127817178941719": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "11062005455602919062": ["convolution_gpu_bfyx_gemm_like",1],
+        "6351924049625723579": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12925156865008155065": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "10556089809203693400": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "5291944277945000781": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17123463568694499533": ["convolution_gpu_bfyx_gemm_like",2],
+        "11243840588602365090": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "13990028451169604107": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "423221712829930726": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "311255514995417672": ["convolution_gpu_bfyx_gemm_like",2],
+        "11868419561534906809": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "3664562521273273709": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "11299275869800089824": ["convolution_gpu_bfyx_gemm_like",1],
+        "5221320470007950766": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11107930597263802755": ["convolution_gpu_bfyx_gemm_like",2],
+        "3889519976910355277": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "2438374917504708831": ["convolution_gpu_bfyx_gemm_like",2],
+        "5284132464580556804": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "17309224746854446222": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "8154794217037682993": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18133614045401867449": ["convolution_gpu_bfyx_gemm_like",2],
+        "1572991986657256775": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "5183231560876991543": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "3897967722980386263": ["convolution_gpu_bfyx_gemm_like",2],
+        "15088940149962496972": ["convolution_gpu_bfyx_gemm_like",1],
+        "7083152697366621236": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17877776363798202236": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "9298483238271063853": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "10625675062556386448": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6121673167888047110": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12541764833974378504": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12923653434892323603": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "2567809041240246707": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "2251029128552117936": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4640028527711211109": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17942120824047252501": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "17912189681971987483": ["convolution_gpu_bfyx_gemm_like",2],
+        "7573459699367415551": ["convolution_gpu_bfyx_os_iyx_osv16",515],
+        "4082229510324076196": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5608133987357542077": ["convolution_gpu_bfyx_os_iyx_osv16",539],
+        "8489998884193999354": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10356951625481502476": ["convolution_gpu_bfyx_gemm_like",2],
+        "14044495589185586465": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "3796274347773622633": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "41672385434660942": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "8262441556572334783": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "16748743818537812349": ["convolution_gpu_bfyx_gemm_like",2],
+        "1999979442136861875": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "8234878941966364642": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "7396823789595001064": ["convolution_gpu_bfyx_gemm_like",2],
+        "649203303142950236": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "953306082374100275": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "2850279308978256234": ["convolution_gpu_bfyx_gemm_like",2],
+        "10068502639160680134": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "4766071144928072260": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "4455497237293642238": ["convolution_gpu_bfyx_gemm_like",2],
+        "3621449131285713809": ["convolution_gpu_bfyx_gemm_like",2],
+        "18044455700176500102": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3623866842874047894": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "17332230377845694888": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "3200047546714112402": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "3325727286860556323": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "8682149821028981871": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "5927467766675317093": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "17154337492545826355": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "12011982029561277581": ["convolution_gpu_bfyx_os_iyx_osv16",661],
+        "11147816119060617810": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "6290180140047520382": ["convolution_gpu_bfyx_gemm_like",1],
+        "5135353986081664933": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12138556002719602750": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "15603643151057665338": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "5033665285977853779": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "17433037267999205350": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "7881187047171099732": ["convolution_gpu_bfyx_gemm_like",2],
+        "9069334144391048686": ["convolution_gpu_bfyx_os_iyx_osv16",543],
+        "13199524367893035805": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "17279975778400757791": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "13925839061045347955": ["convolution_gpu_bfyx_gemm_like",1],
+        "12790788016297794214": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "3761770343527826418": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "9092949297095391463": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "10545749454895857995": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4590784654677429162": ["convolution_gpu_bfyx_gemm_like",2],
+        "7981376447277193852": ["convolution_gpu_bfyx_os_iyx_osv16",843],
+        "17829047941256922307": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "10306542963828398049": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "13550435052563656432": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "6964180083696019970": ["convolution_gpu_bfyx_gemm_like",1],
+        "6496839689453807726": ["convolution_gpu_bfyx_gemm_like",2],
+        "203639177311791127": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "1005880016096298476": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "10679760989906275129": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "6719302427415173754": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "9750510172185801133": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6905249031401202060": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17575578027095664417": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3036808833459559381": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "3125577147662589592": ["convolution_gpu_bfyx_gemm_like",1],
+        "10708706979952421150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "5756918912614763074": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5973242004448142604": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "9863856393759813897": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "9609257787066002999": ["convolution_gpu_bfyx_gemm_like",2],
+        "8454760437961964894": ["convolution_gpu_bfyx_gemm_like",2],
+        "1117787205894124896": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "14471867575610362464": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "3816774953143987171": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "8576229375621297412": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "9759380701896779097": ["convolution_gpu_bfyx_gemm_like",2],
+        "17774902969414949042": ["convolution_gpu_bfyx_gemm_like",2],
+        "3882955134902442387": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "16683485007140805060": ["fully_connected_gpu_fb_io_ref",1],
+        "16767564582561837873": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2104529100867065546": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "10158184435144178161": ["convolution_gpu_bfyx_os_iyx_osv16",337],
+        "11892088065638996743": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "9743806043658380623": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "17228877915053571642": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "15284262113150488297": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "3272776991539782834": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14234117003504517946": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "2968031010495399536": ["convolution_gpu_bfyx_gemm_like",2],
+        "10555597973766215754": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2220961811760955456": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "8850600236849718709": ["convolution_gpu_bfyx_os_iyx_osv16",1024],
+        "12643423612381102003": ["convolution_gpu_bfyx_os_iyx_osv16",831],
+        "8133587696326295326": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "5519835581976587401": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "17040970955448750876": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "4239133538073498792": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "5409924335138540834": ["convolution_gpu_bfyx_os_iyx_osv16",526],
+        "14132290154676895976": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12582624102297726596": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "9454954846682513038": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "4865023158176874622": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4950144098898276785": ["convolution_gpu_bfyx_gemm_like",2],
+        "12427258337646070422": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "16131448347558322280": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7228139313323996640": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "11254635684957519432": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9569446666675696513": ["convolution_gpu_bfyx_gemm_like",1],
+        "7813041847979170166": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17628984504073918701": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "2713481951804190325": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "15489882561480858974": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "3286629188347536485": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "3939805316470672966": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "3839690227347352846": ["convolution_gpu_bfyx_gemm_like",2],
+        "17864395500488861670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5036963191507722541": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "261021128656714770": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "12482312825666761192": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "18219755699990183812": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "9070474871526366492": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "2841943277631596989": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "13268525255152984893": ["convolution_gpu_bfyx_os_iyx_osv16",847],
+        "5570191330195573102": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "12823842409678756966": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10887835418423052188": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15351724241036614758": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "6262190151863459214": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "69884424286147709": ["convolution_gpu_bfyx_gemm_like",2],
+        "2521821959816944292": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "17471843449888763571": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2777614869053822003": ["convolution_gpu_bfyx_os_iyx_osv16",377],
+        "13126786259906598018": ["convolution_gpu_bfyx_os_iyx_osv16",1026],
+        "13948873105076070952": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "4220826666482500445": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "8422808932256100230": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "6621371075123542816": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "7862815466573236157": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "14136097914489095982": ["convolution_gpu_bfyx_os_iyx_osv16",516],
+        "3067806959725855130": ["convolution_gpu_bfyx_os_iyx_osv16",512],
+        "15360511165237335684": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "17399542571019639128": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "10117376369841171716": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9642229389394495047": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "846177346130290194": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",2],
+        "15336590103518398224": ["convolution_gpu_bfyx_gemm_like",2],
+        "17243648226968859637": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2930848604606590505": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "3621070130367713395": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "15411603884973340468": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "15016406041863758148": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "13804435767468730732": ["convolution_gpu_bfyx_gemm_like",2],
+        "2128376438627103433": ["convolution_gpu_bfyx_gemm_like",2],
+        "10463896120685306944": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "13786357802945430475": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "15808629700189777056": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17713034180977313726": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "16894871557229780934": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "9404677451270692749": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "7942294816235384071": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "1865187811299838654": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "17049054004246292085": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2133849627845285277": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "4147006350295905486": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9655550151067451233": ["convolution_gpu_bfyx_gemm_like",2],
+        "9833242806281729759": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11239754372812258455": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "9421927854269492263": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4013707396889204359": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "9019388470685749691": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12545558125736154584": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "9038991914155436715": ["convolution_gpu_bfyx_gemm_like",1],
+        "10730856574108806045": ["convolution_gpu_bfyx_os_iyx_osv16",854],
+        "5461980510262646821": ["convolution_gpu_bfyx_gemm_like",2],
+        "4679163800360809315": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "12714194906146827658": ["convolution_gpu_bfyx_gemm_like",1],
+        "3859314295530377028": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "7263339400190408379": ["convolution_gpu_bfyx_gemm_like",2],
+        "15532419087060587119": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5536424274663702901": ["convolution_gpu_bfyx_gemm_like",2],
+        "4086556132337751931": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "2746052215199129520": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "17716065235878633691": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "8594644182487917002": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5685381761573686628": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8258382025812748961": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "14707855908416908375": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "14650567822254940018": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "15118142492742177336": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "15294692035670155801": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "498239903908845198": ["convolution_gpu_bfyx_gemm_like",2],
+        "4085450203909854919": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "15479549936562568596": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "7363788553442810299": ["convolution_gpu_bfyx_gemm_like",2],
+        "5353552956675518468": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "15775917744517770768": ["convolution_gpu_bfyx_gemm_like",2],
+        "9899242398980336120": ["convolution_gpu_bfyx_gemm_like",1],
+        "12791525533856308302": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "9256308629247511374": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "11433534680781300610": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "142486914279119363": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "10965563190266380694": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "17252689774572814142": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "6158514925486943212": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "14786904599410885158": ["convolution_gpu_bfyx_os_iyx_osv16",465],
+        "10728212277329722684": ["convolution_gpu_bfyx_gemm_like",2],
+        "11151426820269138585": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1076005730007872492": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "2052712465925238009": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "1103204698908514224": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5893940382830835820": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "10000618285883395700": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1276881030620698911": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "17523210737277743952": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4883106423598271822": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "8800251965243080024": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "5601435819039968726": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3382494956350224120": ["convolution_gpu_bfyx_gemm_like",1],
+        "6830387121684699972": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "15322989486222859378": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "15881381297320383917": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "801864263975761712": ["convolution_gpu_bfyx_os_iyx_osv16",291],
+        "9457038545823436137": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9798585825695496550": ["convolution_gpu_bfyx_gemm_like",2],
+        "12654574135415748217": ["convolution_gpu_bfyx_os_iyx_osv16",318],
+        "8131617570786904723": ["convolution_gpu_bfyx_gemm_like",2],
+        "1663732107639157701": ["convolution_gpu_bfyx_gemm_like",2],
+        "6695336381467406810": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "11984095218733350838": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14953809073272885651": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "4911398420005278258": ["convolution_gpu_bfyx_gemm_like",1],
+        "4940950742383121943": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "17614929666625976544": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "3737552767159920174": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "10683839359385393536": ["convolution_gpu_bfyx_gemm_like",1],
+        "9207334433308148635": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13954144830230671601": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "18153597620760635012": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "13373912451448693522": ["convolution_gpu_bfyx_gemm_like",1],
+        "7369471926167902143": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "10076578838853982233": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "2935787827649981367": ["convolution_gpu_bfyx_gemm_like",1],
+        "9198752981132674942": ["convolution_gpu_bfyx_gemm_like",1],
+        "17693518538833606792": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "572155668587252712": ["convolution_gpu_bfyx_os_iyx_osv16",1054],
+        "530825424084837479": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "1655427025346068673": ["convolution_gpu_bfyx_gemm_like",1],
+        "10084794570892043447": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "4495774394017823312": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "13359643347682243944": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "11568162864377479487": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "8155752116518841384": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",280],
+        "9173631510896381179": ["convolution_gpu_bfyx_gemm_like",2],
+        "7982784766505903515": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "5141753233513623264": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "5079055505117153635": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "4185477435943946730": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8354812222032899427": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "16131386739027190836": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "6277198010392189880": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "11845013061234102293": ["convolution_gpu_bfyx_gemm_like",2],
+        "11287863182337672053": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12090536142661253835": ["fully_connected_gpu_bf_io_gemm",1],
+        "13472532612464340803": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4716188972902735458": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "3704618172730076978": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "7768680313873061531": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12788968383428254917": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13327653786981478088": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15265621959560796543": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "14230385851791760020": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "9130971535185609293": ["convolution_gpu_bfyx_gemm_like",2],
+        "14930745998253392722": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2124033349728954551": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "8963262014498730146": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3304589333915676807": ["convolution_gpu_bfyx_gemm_like",1],
+        "4947961640303581107": ["convolution_gpu_bfyx_gemm_like",2],
+        "1791615587935799399": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16336482874764861478": ["convolution_gpu_bfyx_gemm_like",2],
+        "11088128828863596806": ["convolution_gpu_bfyx_gemm_like",2],
+        "1450888744802985214": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "4983880246908724272": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "11649407835105973949": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "4294879469633231552": ["convolution_gpu_bfyx_gemm_like",2],
+        "9101018613418825655": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11163107409437069532": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "9608917563823863132": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "12889351859522118935": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6777045876155144709": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "1233021176530240722": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4999210721703970274": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14086074948200412805": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "1207026216972160297": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8515479970005301094": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "7071991799972799089": ["convolution_gpu_bfyx_gemm_like",2],
+        "54019631544204590": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12590495767805868405": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "15316782593191029443": ["convolution_gpu_bfyx_gemm_like",2],
+        "17556238490521153146": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "244921290040927639": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "17466025028296506313": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "17259951372033727587": ["convolution_gpu_bfyx_gemm_like",2],
+        "15385506288692289568": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17087143277789116317": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1095495157025479260": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "14811022197918391667": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "1423297940282476513": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "16996022503617157059": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "10509933181132310969": ["convolution_gpu_bfyx_gemm_like",1],
+        "6225447513745282621": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11195875185591819437": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "13404888565084206853": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7863886351122918972": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "17006655627343469372": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "8485845304380573432": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "10628725059172743408": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17302671258991071440": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "2479856511929768548": ["convolution_gpu_bfyx_gemm_like",1],
+        "702096475436365058": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4327450388326573746": ["convolution_gpu_bfyx_gemm_like",1],
+        "939718260623752240": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "11806402239500046867": ["convolution_gpu_bfyx_gemm_like",2],
+        "11529876081402974396": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "7848121247546147821": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "1003101267609305257": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9810904714798127155": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "10462144647439624978": ["convolution_gpu_bfyx_gemm_like",2],
+        "16170708786673864371": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "5229688072405810569": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8269543491844451750": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "11612998433409522582": ["convolution_gpu_bfyx_gemm_like",2],
+        "17303408650780384587": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "11704369548723383645": ["convolution_gpu_bfyx_gemm_like",2],
+        "16122033101591094139": ["fully_connected_gpu_fb_oi_ref",1],
+        "2094213523530180653": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "5011769546010018777": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "18117954008112578376": ["convolution_gpu_bfyx_gemm_like",2],
+        "14554225625951128811": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "1540459344569916165": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "5055133356846736609": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "10608496431404827757": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2986189945936592561": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "8797661560676476245": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "582360460084115077": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "8529170838214082841": ["convolution_gpu_bfyx_gemm_like",2],
+        "8378690770140438511": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "3860603464276263676": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13616241450266119966": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "16802487456370986847": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "14826791706471872785": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15315327794058441258": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4424217045094988504": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "16063854283763838910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15924144379094505874": ["fully_connected_gpu_fb_io_ref",1],
+        "868488930567226694": ["convolution_gpu_bfyx_gemm_like",2],
+        "10348660503952680688": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "10208132281050693649": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "14394427817253242611": ["convolution_gpu_bfyx_gemm_like",2],
+        "6343888265369366589": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "17101789600628162503": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "17406383217119217230": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14070988879848388270": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "6495132856471482043": ["convolution_gpu_bfyx_os_iyx_osv16",865],
+        "3106922888635965020": ["convolution_gpu_bfyx_gemm_like",2],
+        "14094981198645015124": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8203550467004532364": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "11782525502250249483": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5230871884758163940": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "6898793319624390153": ["convolution_gpu_bfyx_gemm_like",2],
+        "13600579723542095577": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "9207413252274439059": ["convolution_gpu_bfyx_os_iyx_osv16",687],
+        "8300655194765375060": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15151957983054148973": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "14885109535362957947": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "14366252780310630703": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10428477376571919905": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "18250076003231973692": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6778781361481531516": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "4196367396954155354": ["convolution_gpu_bfyx_gemm_like",2],
+        "4406157095142118884": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "15381551674482810230": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "18308661808437079996": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",280],
+        "5277400567128489977": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "8732106543033226791": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "10568883265991969648": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1795659014508380077": ["convolution_gpu_bfyx_gemm_like",1],
+        "14141983383097250411": ["convolution_gpu_bfyx_gemm_like",1],
+        "6651097363666320726": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "10902108166827340970": ["convolution_gpu_bfyx_gemm_like",2],
+        "17599396373608265826": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "9357359875134299131": ["convolution_gpu_bfyx_gemm_like",2],
+        "14579050468883613611": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "1876286132660871464": ["convolution_gpu_bfyx_gemm_like",2],
+        "2740287492529009109": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "15285236716284874711": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "1062508357634542606": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "18373068999874730591": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "10237524128771958432": ["convolution_gpu_bfyx_gemm_like",2],
+        "9831195630506601660": ["convolution_gpu_bfyx_gemm_like",2],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "7606728651572102823": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "7019316994558628633": ["convolution_gpu_bfyx_gemm_like",2],
+        "13729951531199985382": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "9643671820560131959": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "15841489476316341204": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "15024130918582332928": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14301661367597749567": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "6707221689266688389": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "1303304215797905198": ["convolution_gpu_bfyx_gemm_like",2],
+        "10917498758625273194": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "7658318862249823838": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "4347494599650425733": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "8939520209266902800": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17886436103211436626": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "12757564215386697460": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "14959281374959998609": ["convolution_gpu_bfyx_gemm_like",2],
+        "18204971481718743856": ["convolution_gpu_bfyx_gemm_like",2],
+        "7174804306958128658": ["convolution_gpu_bfyx_gemm_like",2],
+        "4550028191070279999": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "6821855018718422278": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "1605295763358374504": ["convolution_gpu_bfyx_gemm_like",2],
+        "12493863403516600413": ["convolution_gpu_bfyx_gemm_like",1],
+        "8749399240948437294": ["convolution_gpu_bfyx_gemm_like",2],
+        "7937870623766562191": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "472454322186482185": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "16494581774051338901": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "4054010905884346287": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "10967218651864700933": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17713011656078651": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15683344003370367509": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "17604747523124060652": ["convolution_gpu_bfyx_gemm_like",2],
+        "7688176479120305539": ["convolution_gpu_bfyx_os_iyx_osv16",918],
+        "12319165874575782715": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "3935883681780676157": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "17828453493113919756": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "9639014900668946045": ["convolution_gpu_bfyx_gemm_like",2],
+        "15280273795883244074": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7761195307416102494": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5095827462645341808": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "17496371501557652357": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "12085208566397959149": ["convolution_gpu_bfyx_gemm_like",2],
+        "5996261744926399743": ["convolution_gpu_bfyx_gemm_like",2],
+        "6954257882806659594": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "16937207522545573792": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13708979487306970634": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "10292243973236220688": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "2566302789609970663": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "6324565723045697080": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "17421991623849671076": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10600884986702650404": ["convolution_gpu_bfyx_gemm_like",2],
+        "12228963567837353733": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "4797026040899499511": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1127598752149871162": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "5939121107940759940": ["convolution_gpu_bfyx_os_iyx_osv16",378],
+        "17123153447808465303": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "585914943085061885": ["convolution_gpu_bfyx_gemm_like",1],
+        "11185156002426041243": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "11579025491409526679": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "14512407261081843554": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "3963106895592011725": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "2346992541638145615": ["convolution_gpu_bfyx_gemm_like",2],
+        "12655099960717366198": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "10682918518101379579": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "17225578855755054959": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "4859271780094116779": ["convolution_gpu_bfyx_gemm_like",2],
+        "13027039165868458729": ["convolution_gpu_bfyx_gemm_like",2],
+        "1643241486250690844": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "5749536453225343663": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "1760830986937165861": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "15551453802011405101": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "2467535554409643460": ["convolution_gpu_bfyx_gemm_like",1],
+        "15124985846197662243": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "10058614204420018541": ["convolution_gpu_bfyx_os_iyx_osv16",4],
+        "3615203440895591147": ["convolution_gpu_bfyx_gemm_like",1],
+        "8230144305844912369": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "10791067159964399241": ["convolution_gpu_bfyx_os_iyx_osv16",310],
+        "7826714904736870517": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17342868362584820356": ["convolution_gpu_bfyx_gemm_like",2],
+        "3830703844770425343": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "13462726136352103466": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10433456687054381828": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "13816104794723484993": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2577413012740709678": ["convolution_gpu_bfyx_gemm_like",2],
+        "17839839336294937155": ["convolution_gpu_bfyx_gemm_like",2],
+        "7351401242363888463": ["convolution_gpu_bfyx_gemm_like",2],
+        "804195263636995800": ["convolution_gpu_bfyx_gemm_like",2],
+        "15434706304418357961": ["convolution_gpu_bfyx_gemm_like",2],
+        "12636120902231094700": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "3792945601873900927": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "6717243674054760598": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "16684378382033936005": ["convolution_gpu_bfyx_gemm_like",2],
+        "16781127329510211966": ["convolution_gpu_bfyx_gemm_like",2],
+        "3480732841490521799": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "8431845338648284548": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "1410630713443793537": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "1760779615705074283": ["convolution_gpu_bfyx_os_iyx_osv16",190],
+        "13020929028222837402": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "16228026045292341333": ["convolution_gpu_bfyx_gemm_like",2],
+        "4660288622381620227": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "18445243511250094011": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4428101657497677982": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "4860779741225078946": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "12965552570525926289": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16467987800266816984": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "14683086376707577764": ["convolution_gpu_bfyx_gemm_like",1],
+        "9057036344533510776": ["convolution_gpu_bfyx_gemm_like",2],
+        "1146282291269334070": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "2425177545256374371": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16103943009195163681": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "17809920600993699808": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "13002363400738122017": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5093049998173715787": ["convolution_gpu_bfyx_gemm_like",2],
+        "6995472847770703647": ["convolution_gpu_bfyx_gemm_like",2],
+        "9366201112659847392": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "16489624657475712467": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "1819720745131968914": ["convolution_gpu_bfyx_gemm_like",2],
+        "12667014405537239093": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "10270203686708782941": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3150231129728961455": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "11198908896401597838": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "14365699621119565405": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11430797372848621790": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "841243068178925457": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "3855151839445505918": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "1179906398014559042": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "6578239603654034233": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "11322451605795727486": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "7410628771323937530": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7490524380333929773": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "12319073009094248232": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "11936419502418995274": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "5695368162557483073": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "12136803297132972709": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6526586547926160627": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "11910060331768652144": ["convolution_gpu_bfyx_gemm_like",2],
+        "6603489144277795818": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "2095802691829304676": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6553736978928374036": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2235210915304938149": ["convolution_gpu_bfyx_gemm_like",2],
+        "18137106379929135901": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7356440848422235031": ["convolution_gpu_bfyx_gemm_like",1],
+        "17846557385112426504": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "12713087335581316946": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "3831257753143317802": ["convolution_gpu_bfyx_gemm_like",2],
+        "17372520271370779917": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "8860685325047463026": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13731964100893109797": ["convolution_gpu_bfyx_gemm_like",1],
+        "2916077416184925232": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "13926122593957480821": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7157499157310356912": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8509748651922589684": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "10756831914332769026": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "18400379759523099542": ["convolution_gpu_bfyx_gemm_like",1],
+        "5369464352361405510": ["convolution_gpu_bfyx_gemm_like",2],
+        "9522661528867955338": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "1691554843141984381": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "13797057152042581440": ["convolution_gpu_bfyx_gemm_like",1],
+        "14352303529756685990": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "10205696100164492716": ["convolution_gpu_bfyx_gemm_like",2],
+        "12247991248100147706": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2055914145961691571": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8104331313502492541": ["convolution_gpu_bfyx_gemm_like",1],
+        "12516911293946682547": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "15282806587681892519": ["convolution_gpu_bfyx_gemm_like",1],
+        "18210370419559876426": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "15552287544878243347": ["convolution_gpu_bfyx_gemm_like",1],
+        "14156845527754813253": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "6740385846687754849": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "12823080103951853168": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "17851024468934906318": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5078905972285278557": ["convolution_gpu_bfyx_gemm_like",2],
+        "10190532901392055501": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "4113935675071480884": ["convolution_gpu_bfyx_gemm_like",2],
+        "14757855448502485216": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "15857087373591747006": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5352896995050401444": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8701639906504450534": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "17526891234501366023": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "14269161473352876138": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "989564341557094953": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "9519623751582710696": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "17631458041591681785": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14213516751025324346": ["convolution_gpu_bfyx_gemm_like",2],
+        "2632535010129224704": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "5754844816339228920": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "9324602658580246084": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "10660722770448981436": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9743549865786050651": ["convolution_gpu_bfyx_gemm_like",2],
+        "4356806313729405658": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "5906712613621491207": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7311120574972466702": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "14100870590396726248": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "8071957466247137919": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11622925573287101001": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "9522947878591994913": ["convolution_gpu_bfyx_gemm_like",2],
+        "12949204491386872217": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "13468713306678453952": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "9527075413813342687": ["convolution_gpu_bfyx_gemm_like",2],
+        "11369389082421346630": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "7786866732196451977": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17833517350994024381": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "4479979951990338510": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "2039909180006215069": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "14174805457643822445": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "533820672115442982": ["convolution_gpu_bfyx_gemm_like",2],
+        "459936950868112292": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "6747799061507191246": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "9468542963649996822": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "6108475838757986889": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17769703068450272262": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "17128723415461475388": ["convolution_gpu_bfyx_gemm_like",2],
+        "1713947356482032411": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "5887877259873928726": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4607428643002808173": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "16149924641081427062": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "2388209402010617408": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "18043340998699622388": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "13699740641705514374": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "15511138074959300404": ["convolution_gpu_bfyx_gemm_like",2],
+        "10483664832302187567": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "4747159205186229582": ["convolution_gpu_bfyx_os_iyx_osv16",479],
+        "13348329768178411596": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9594594523961285945": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "1299452063079314341": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9497934813418221769": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "1395293354112586043": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "11706378390483804857": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7730305811644972643": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "17514082938765137629": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "18259001228411909210": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "6587817876244206939": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7089077910858800239": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "7289940394271052757": ["convolution_gpu_bfyx_gemm_like",1],
+        "13702692566238948173": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "9391425117463100557": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17775705003104146872": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "15695275881213623746": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7843498978148810586": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "4897991181236908768": ["convolution_gpu_bfyx_gemm_like",1],
+        "12582321591799165205": ["convolution_gpu_bfyx_os_iyx_osv16",421],
+        "1629816265162728770": ["convolution_gpu_bfyx_gemm_like",1],
+        "14740550583313186369": ["convolution_gpu_bfyx_gemm_like",1],
+        "17517495652165026573": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "4584970211859494304": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14808831640065476291": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "4369346833875105372": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12836639380579091509": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "1650519167046658780": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "1114661658519542600": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "18132981365225439999": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "13855438905855887272": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2467766894778630615": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "12680339228267704518": ["convolution_gpu_bfyx_os_iyx_osv16",876],
+        "3107611675766875160": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "4202116155711873525": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17370051888730874220": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8509882139595784161": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "3995098494991567714": ["convolution_gpu_bfyx_gemm_like",2],
+        "3032921857841371728": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "363330365598760149": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "10395191003166536655": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "11696231285411686761": ["convolution_gpu_bfyx_gemm_like",2],
+        "14289048840489035546": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "8655525088525612583": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11640865562390693266": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "5020605371834958647": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "178353385245384751": ["convolution_gpu_bfyx_gemm_like",2],
+        "6296371382672640627": ["convolution_gpu_bfyx_gemm_like",1],
+        "13337315872184544686": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "2376239021851907962": ["convolution_gpu_bfyx_gemm_like",1],
+        "1208534686657112759": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "310584224049735004": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "16327433707667075261": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "3435773540391994106": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "13676670925355487305": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "7545013298074733778": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "17343050785312683560": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "14176233347574275776": ["convolution_gpu_bfyx_gemm_like",1],
+        "14670068483447729857": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "5691889055008878111": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6306539529168638031": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12253987037990618484": ["convolution_gpu_bfyx_gemm_like",1],
+        "2040762223425679479": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "9165275903833498932": ["convolution_gpu_bfyx_gemm_like",2],
+        "15156836293519486753": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "7974614031099580856": ["convolution_gpu_bfyx_gemm_like",2],
+        "11928926429060828408": ["convolution_gpu_bfyx_os_iyx_osv16",132],
+        "7009873605945341897": ["convolution_gpu_bfyx_gemm_like",2],
+        "8464582977975377118": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14755869345266103764": ["fully_connected_gpu_fb_oi_ref",1],
+        "9557728221162137067": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "14417033368952865805": ["convolution_gpu_bfyx_gemm_like",1],
+        "16026019808764920641": ["convolution_gpu_bfyx_gemm_like",2],
+        "16897485136352617189": ["convolution_gpu_bfyx_gemm_like",2],
+        "2688060699200137048": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4834591210311380436": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "13237050834496100264": ["convolution_gpu_bfyx_os_iyx_osv16",527],
+        "13500369101462555447": ["convolution_gpu_bfyx_gemm_like",2],
+        "13558618754911056302": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "5334190564423375247": ["convolution_gpu_bfyx_os_iyx_osv16",926],
+        "15679696422603106163": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3522455279376021211": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "2246205611561147645": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "5301394322453453489": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "11398019086259011063": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17429692714456679999": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17427036330773218054": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "5040730152867713388": ["convolution_gpu_bfyx_gemm_like",2],
+        "9177395776408296291": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14904665242518014005": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "1565612286723277822": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5718472464360340274": ["convolution_gpu_bfyx_gemm_like",2],
+        "10897008852059401902": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12935563359569230797": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "12676139447729343679": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7142195383189497127": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2789137853864057385": ["convolution_gpu_bfyx_gemm_like",2],
+        "17900257435531434807": ["convolution_gpu_bfyx_gemm_like",2],
+        "1375156980278317418": ["convolution_gpu_bfyx_gemm_like",2],
+        "5797243082477551421": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "12988961529988078346": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3574679673239756551": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "11726298758004767743": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "14006248791647711759": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "8025053805734757314": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "17991368786018745231": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "9513032457323269513": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "6203602270552179462": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "16559140502701231107": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "16172528828198474326": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "13675314612031135613": ["convolution_gpu_bfyx_gemm_like",1],
+        "8962502004422485576": ["convolution_gpu_bfyx_gemm_like",2],
+        "16955653765071712611": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "3217555855036660482": ["fully_connected_gpu_fb_io_ref",2],
+        "8775336277634573074": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "6876300000441081789": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2070429718533716882": ["convolution_gpu_bfyx_gemm_like",2],
+        "13941251104772804303": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "5415319660821122528": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "14083279273292567319": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "8336494030011542852": ["convolution_gpu_bfyx_gemm_like",1],
+        "6204183474669103812": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "14010642743400284761": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9270950131920019932": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "878892264408839067": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "498420237272375425": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13765632280570725774": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "9475130054420979752": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "14046217730873620907": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17086887873464601732": ["convolution_gpu_bfyx_gemm_like",1],
+        "8734483136584351066": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3018306533413795559": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3355259926747524578": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "755414184406250882": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "17818587793483875865": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13064477237937322246": ["convolution_gpu_bfyx_gemm_like",1],
+        "18193831330827252971": ["convolution_gpu_bfyx_gemm_like",2],
+        "12044635257539223503": ["convolution_gpu_bfyx_gemm_like",2],
+        "4725009116734166168": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4958222070605478947": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "18232459663207612727": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11327867170377736609": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "9840495023131952174": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "1197184887743937394": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9833540739021310892": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16304963156448605623": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16491532291908469567": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "11213667690594303395": ["fully_connected_gpu_fb_io_ref",1],
+        "9368244029111057323": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "1168589063110524328": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "6026065914078520895": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "12083217714727863832": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10264913782610095832": ["convolution_gpu_bfyx_os_iyx_osv16",888],
+        "5246955189449281709": ["convolution_gpu_bfyx_gemm_like",2],
+        "1724222702460860833": ["convolution_gpu_bfyx_gemm_like",2],
+        "6973621625148257910": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "18010600104565458874": ["convolution_gpu_bfyx_gemm_like",2],
+        "11981887712163064333": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "7152107839144357830": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9404953235624894187": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "5955810688179557560": ["convolution_gpu_bfyx_gemm_like",2],
+        "15720507574336564201": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "14038261392627717712": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "710166379854475667": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "13898821685774165645": ["convolution_gpu_bfyx_os_iyx_osv16",847],
+        "1579733029852052699": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16833026567865627676": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5582896843095691256": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "7460672405409009037": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8680545947510235993": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "7380979920013545867": ["convolution_gpu_bfyx_gemm_like",2],
+        "13890118723041457532": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "6351347283201596793": ["convolution_gpu_bfyx_os_iyx_osv16",57],
+        "5592556538784745960": ["convolution_gpu_bfyx_gemm_like",2],
+        "5488296540132936296": ["convolution_gpu_bfyx_gemm_like",1],
+        "304721598975479337": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4299492266819967844": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "6133592828563353516": ["convolution_gpu_bfyx_gemm_like",1],
+        "8158983334404475382": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "7353255713834431471": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "11280403113463077620": ["convolution_gpu_bfyx_gemm_like",2],
+        "12794030011655906930": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "17361319565503258506": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "3856394004079548211": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "12163456975896925619": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "5592428580503282095": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "12311901617815857033": ["convolution_gpu_bfyx_gemm_like",1],
+        "10527256963399838405": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "8334832698020211623": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17965825642065048619": ["fully_connected_gpu_fb_oi_ref",2],
+        "8235002440285527553": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "14251848023416168295": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "4846216894450341698": ["convolution_gpu_bfyx_gemm_like",2],
+        "7878217536124016199": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "13283842370311517843": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "6537576410448334203": ["convolution_gpu_bfyx_os_iyx_osv16",277],
+        "7289633911925073088": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13946367911927964830": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12175297963550750804": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "3432296808755992670": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4085907608404305515": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "13836867092941506302": ["convolution_gpu_bfyx_os_iyx_osv16",315],
+        "9758759365463492505": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12305383126483033452": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "9497269191159495932": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "8329846097322076175": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "16011429608661242565": ["convolution_gpu_bfyx_gemm_like",2],
+        "11979910991788695837": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "8701248964531180496": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "12792454713887439830": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "15241191584896579183": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "6942049339361951275": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "15534517308430424624": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11878217002671373638": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "8045393243176844621": ["convolution_gpu_bfyx_gemm_like",2],
+        "4245229655273611845": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "12315068368597230211": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "12169896916690963726": ["convolution_gpu_bfyx_gemm_like",2],
+        "6674643031068271417": ["convolution_gpu_bfyx_gemm_like",2],
+        "10838721873837128971": ["convolution_gpu_bfyx_os_iyx_osv16",676],
+        "5172712078329324967": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6796758191974756201": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2215194389847256545": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "496948821475405395": ["convolution_gpu_bfyx_gemm_like",2],
+        "18286006396667126860": ["convolution_gpu_bfyx_gemm_like",1],
+        "10713207196920878995": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1890739204389692970": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "17446388159565719362": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "6493920223660825755": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "10011668671963948912": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5172823024549700279": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5635449856699664273": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "12451592945087000191": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "3363675939515208883": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "2257384183256237750": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14463173937397982331": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18423051691107460439": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "8402396502992483524": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17888721282811720634": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2406816735581074778": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2410828969408182980": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16928564394848059094": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14742909697076926475": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "10650698451740924172": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "14807299286266923693": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4408600136502382976": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "7316825051569394089": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "16935619230235600309": ["convolution_gpu_bfyx_gemm_like",2],
+        "4885944395876887711": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "7439340221097179208": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10614918790075146626": ["convolution_gpu_bfyx_os_iyx_osv16",1071],
+        "1908809004094565452": ["convolution_gpu_bfyx_os_iyx_osv16",918],
+        "2527189070714658176": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "3069396488274616770": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2322559721899919275": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "15929361440504489924": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "10968768803038046390": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "10591159235183381823": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "7558864177789582540": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "4232250144427804891": ["fully_connected_gpu_bf_io_gemm",1],
+        "16236397968499692493": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "6895664772793074050": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "14206328165498357760": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13766538247146238357": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "4945845875046545967": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14309292105974991733": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "15214779483545052950": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "792684262493086891": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "5120274680151325194": ["convolution_gpu_bfyx_gemm_like",2],
+        "14848732804958314374": ["fully_connected_gpu_yxfb_ref",0],
+        "1034911525083515252": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "13941188114382863776": ["fully_connected_gpu_fb_oi_ref",2],
+        "18271689282126907793": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "1373904073013943690": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "3746573775462003750": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "13282612510005390816": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "10073779356457603252": ["convolution_gpu_bfyx_gemm_like",2],
+        "7404732699742965436": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "10306169610486701545": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "11007100272494557520": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "3752278444736105763": ["convolution_gpu_bfyx_gemm_like",1],
+        "11404331488962230130": ["convolution_gpu_bfyx_gemm_like",1],
+        "4991419288164762786": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "15394217414267195999": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13721983823460534294": ["convolution_gpu_bfyx_gemm_like",2],
+        "937200116534179904": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "5341876404211768451": ["convolution_gpu_bfyx_gemm_like",1],
+        "9953329530402569669": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5872553335123308034": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3434842614653335826": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6232596685071671579": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7173828525834910425": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9275303306340702111": ["convolution_gpu_bfyx_gemm_like",2],
+        "3409255127071376537": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "1149548328523286475": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "5912303851874077576": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "15019050434475217267": ["convolution_gpu_bfyx_gemm_like",2],
+        "11093147488085506266": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "3604379857905625467": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "3447774474841314860": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "16705941191876956548": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "6491772898618671653": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "9421643783312790618": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3318430113631867573": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "3416636940668221406": ["convolution_gpu_bfyx_os_iyx_osv16",378],
+        "6753857156025715321": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "755157892988514864": ["convolution_gpu_bfyx_os_iyx_osv16",136],
+        "16159852373972174245": ["convolution_gpu_bfyx_gemm_like",2],
+        "10168317560306247723": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "4370027682980493159": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "13694766887442024878": ["fully_connected_gpu_fb_io_ref",1],
+        "6556795059657533200": ["convolution_gpu_bfyx_gemm_like",2],
+        "15387047026300787039": ["convolution_gpu_bfyx_gemm_like",2],
+        "875552069535001284": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "364197229238830807": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "6293500642319778096": ["convolution_gpu_bfyx_gemm_like",1],
+        "10784073615329190425": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "2477866283402053371": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "5448665190811365701": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "16689318540732157754": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "598745924736700294": ["convolution_gpu_bfyx_gemm_like",2],
+        "11814740669468421049": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8054599744123820194": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "1663285216972929652": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "10159790066948852390": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "265124365266629363": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "2805931700404492624": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "3109104171383198425": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4718705504966715203": ["convolution_gpu_bfyx_gemm_like",2],
+        "9444953530704856016": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "6656593119788274992": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1677118421195120152": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "13253775441326432265": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "14462438074931673266": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "713121569924250372": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "16150934538381572916": ["convolution_gpu_bfyx_gemm_like",2],
+        "11004350075893421731": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6849874726361751307": ["convolution_gpu_bfyx_gemm_like",2],
+        "16312223896859176991": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "433161293684647032": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "8788703258318141635": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "6639715607290389968": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "12962552332511702682": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10892706534058849825": ["convolution_gpu_bfyx_os_iyx_osv16",284],
+        "18174857480705846286": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "15737508945513376813": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5743482411668939203": ["convolution_gpu_bfyx_gemm_like",2],
+        "7148542290597073512": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "7281661441196896385": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2542984219353153495": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "6322831233548420761": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "15733883474006568340": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15918017311798856029": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",1],
+        "11522488904021243956": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "11834683513280095384": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3420064118559852968": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3797957937905580811": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "2431923918345445420": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "11717348577195224554": ["convolution_gpu_bfyx_gemm_like",2],
+        "7860086755625626604": ["convolution_gpu_bfyx_gemm_like",2],
+        "10982693252072682414": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "973402921452083017": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "7218689869635572700": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9116206094279111365": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "12329909110827539139": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16385712633367611786": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "4063525218682664832": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "12987636957813312667": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10881884300766361791": ["convolution_gpu_bfyx_gemm_like",2],
+        "3704271978133986620": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8275277322582733101": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "7717602860943327535": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "16766706479910720794": ["convolution_gpu_bfyx_gemm_like",2],
+        "10629681722649771498": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "1659851931406041285": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17902799955139047426": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15737542477498282367": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8550133332738529361": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11528417522960871233": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "6626716013917662606": ["convolution_gpu_bfyx_gemm_like",2],
+        "5920614348521143999": ["convolution_gpu_bfyx_os_iyx_osv16",129],
+        "3617433210865054182": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "2772704069752888874": ["convolution_gpu_bfyx_gemm_like",2],
+        "9968686603153440164": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14151249542292579535": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "17947613081555491099": ["fully_connected_gpu_fb_oi_ref",2],
+        "4244790495090049295": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "3285520504090196295": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "4554343896877444783": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13599438824699346708": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "937050062571228573": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10250778203413648582": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "10153070641942936648": ["convolution_gpu_bfyx_gemm_like",1],
+        "16463823433924519300": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2838789360952219092": ["convolution_gpu_bfyx_gemm_like",2],
+        "8272823732258536202": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "9884646296875511696": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "4445912157712391517": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "4674416595144505741": ["convolution_gpu_bfyx_gemm_like",2],
+        "8153567933591966877": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "7315740838189400004": ["convolution_gpu_bfyx_gemm_like",2],
+        "5060817429317741254": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14724862072414829490": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "981276017776678882": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "10643373404881648498": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "3355824730785179775": ["convolution_gpu_bfyx_os_iyx_osv16",899],
+        "1018319414633271980": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "2764034841399585177": ["fully_connected_gpu_fb_oi_ref",2],
+        "14947798627499698329": ["convolution_gpu_bfyx_gemm_like",2],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like",2],
+        "8474585711383508493": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16687701987371294908": ["convolution_gpu_bfyx_gemm_like",2],
+        "15594091060902767607": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10880081193716628051": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "17309326904418811234": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "1787598049938821496": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "2072252610120557179": ["convolution_gpu_bfyx_gemm_like",2],
+        "6053594232298534345": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "10995424394152951534": ["convolution_gpu_bfyx_gemm_like",2],
+        "17947818179123182001": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "15741360654354155504": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "11709992724966310174": ["convolution_gpu_bfyx_os_iyx_osv16",124],
+        "12878631058803628679": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9531730330306606343": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "1640358227345963848": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "8737417433314100353": ["convolution_gpu_bfyx_gemm_like",2],
+        "14445520478857662586": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6040623414692799116": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "10381752670329683275": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "14066219153422011272": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "14738573151275130683": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "3255465741612432300": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "12874626654611400042": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "3599823735065658574": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "12365282242489300092": ["convolution_gpu_bfyx_os_iyx_osv16",379],
+        "3113016029551460773": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "1089679781525023551": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "4091785563304559606": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "9945721344229922405": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16865879032845300007": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "12176879951537921518": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "14173867073407110501": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "277151219694781348": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "14629433964319883917": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "14669219788000023965": ["fully_connected_gpu_fb_oi_ref",0],
+        "889943986793446284": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "15325302411038679750": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "10177466042250039828": ["convolution_gpu_bfyx_gemm_like",2],
+        "16140133852987111783": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "15693851280141842140": ["convolution_gpu_bfyx_gemm_like",2],
+        "7562624810837784407": ["convolution_gpu_bfyx_gemm_like",2],
+        "14535007186125575064": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "14864150409380754546": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5831305777612569716": ["convolution_gpu_bfyx_gemm_like",2],
+        "6660221471357497741": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10168217053882274702": ["convolution_gpu_bfyx_gemm_like",2],
+        "13874754478479442212": ["convolution_gpu_bfyx_gemm_like",2],
+        "11951606039079763598": ["convolution_gpu_bfyx_gemm_like",2],
+        "5326891298755303584": ["convolution_gpu_bfyx_gemm_like",2],
+        "5550000568272972532": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2387628682187438903": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "72444706264681262": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "14257548530334193336": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13711710595263882397": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14436334357815544497": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "11231597775940542830": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14746359019867963124": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4536811685836767511": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8161047856682416508": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15257886319670476581": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1028160614515220430": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "12879205642236526041": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7215460815798365056": ["convolution_gpu_bfyx_gemm_like",2],
+        "3800011935243649447": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "2881769839926594784": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "11529521968552409482": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6641684310751726510": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17122338330334998991": ["convolution_gpu_bfyx_gemm_like",1],
+        "5185895996350118172": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "714397516895317906": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "13146231972557134419": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7005371843527735283": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "490233152678323691": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "4890442595203749341": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "9216695884134021401": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "17382660912493284320": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15847413004526420496": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14652719560551657529": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "4690935789908896751": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14352796912241296357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "407189201971322683": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "17610648476343170476": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6210074450403696110": ["convolution_gpu_bfyx_gemm_like",2],
+        "5197105253412476591": ["convolution_gpu_bfyx_gemm_like",2],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "12309132521191764927": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2287331417346465035": ["convolution_gpu_bfyx_gemm_like",2],
+        "9235762655002034553": ["convolution_gpu_bfyx_gemm_like",2],
+        "14996839491874598555": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16507285966998102421": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "557778263661655803": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "7344363094493575878": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17947097500350250352": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "8855801044538137828": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "18214405165366931407": ["convolution_gpu_bfyx_gemm_like",2],
+        "11095908837221722097": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14902389080201926109": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "12526627889432649075": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "10340099951904598712": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8107447526839063293": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "6489448536745533209": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "12063854963434677046": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "10931533380146553429": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17021953651379372973": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16907043223873231356": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4894227264080887361": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "17633445715900116866": ["convolution_gpu_bfyx_gemm_like",2],
+        "13980058444317683376": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "8039045580314824307": ["convolution_gpu_bfyx_gemm_like",1],
+        "13286723666743148654": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "3633858263279042265": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "13277308739029064167": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14203217958874365062": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "15278336216464964580": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "14621327324047759584": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "5724069285122500749": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12460004417430913427": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1755021778097194246": ["convolution_gpu_bfyx_gemm_like",1],
+        "1062464852330435815": ["convolution_gpu_bfyx_gemm_like",2],
+        "2267942216745157485": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "4766447533088048613": ["convolution_gpu_bfyx_gemm_like",2],
+        "17738299860390552088": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "13597240991532942069": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "2705394837952559308": ["convolution_gpu_bfyx_gemm_like",2],
+        "8501145642605270365": ["convolution_gpu_bfyx_gemm_like",2],
+        "12174571114411168588": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5734909305243135224": ["convolution_gpu_bfyx_gemm_like",0],
+        "3134489458855347772": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "10155417869639270818": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "16815373779430857324": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "5439738552514649732": ["convolution_gpu_bfyx_gemm_like",2],
+        "8708643228914766202": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "17928043901784474130": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "18216392915308276053": ["convolution_gpu_bfyx_gemm_like",2],
+        "5835634465164771899": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "8101977280003030465": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12418390364502912036": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "5821887901198535792": ["convolution_gpu_bfyx_gemm_like",2],
+        "7605139219344415117": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "6370629727707634189": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "16294825599850364701": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "10869005786136023160": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "10613156984920928792": ["convolution_gpu_bfyx_gemm_like",1],
+        "15901675909820977223": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "14585144905582599299": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "17108987360340581555": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "7071864660784255328": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "15310138877321331399": ["convolution_gpu_bfyx_gemm_like",2],
+        "8526484907799590618": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "4788094685976850847": ["convolution_gpu_bfyx_gemm_like",1],
+        "5699637716202391188": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "451787079167744428": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "5865480930796299143": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "6696330836969622824": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5326247361632903583": ["convolution_gpu_bfyx_gemm_like",2],
+        "1617907811128880383": ["convolution_gpu_bfyx_gemm_like",2],
+        "11173744709088359283": ["fully_connected_gpu_fb_oi_ref",2],
+        "15173187675372221634": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "17868294056467093895": ["convolution_gpu_bfyx_gemm_like",2],
+        "13932662890258900896": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "10050254009828302053": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "17390307025967314108": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "7457951266863598199": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14595102366207856448": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "8906185843274300447": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "13654816209891478730": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "16043683538361975370": ["convolution_gpu_bfyx_gemm_like",2],
+        "17094948685292534952": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "9391102514951576629": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11275109735493317886": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "158222105675022402": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "2155348872565175553": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "6381439938385141423": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2265784112305305260": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "7666505529539001492": ["convolution_gpu_bfyx_gemm_like",2],
+        "17300963371220857043": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9150686862263626364": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "6066347819693426556": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "581553908799266285": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "13105192484434299621": ["convolution_gpu_bfyx_gemm_like",2],
+        "2543041530639980505": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "7084794834886364709": ["convolution_gpu_bfyx_gemm_like",2],
+        "8977099691399563065": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "15747538142554815480": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "14156264942337528284": ["convolution_gpu_bfyx_gemm_like",2],
+        "893885204484374577": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "1436052878894538927": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "11873734271080160669": ["convolution_gpu_bfyx_os_iyx_osv16",92],
+        "7671440804202996063": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "11882388384272635526": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9080269503597463911": ["convolution_gpu_bfyx_gemm_like",2],
+        "11985789598994479652": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "861944552852043171": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "7570346182940928159": ["convolution_gpu_bfyx_gemm_like",2],
+        "1616603916015535857": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "15076307524263378967": ["convolution_gpu_bfyx_gemm_like",2],
+        "8671491767142900139": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "11646035413147246650": ["convolution_gpu_bfyx_gemm_like",1],
+        "8436644625511258721": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "17499047811775012205": ["convolution_gpu_bfyx_gemm_like",1],
+        "15948383678216076358": ["convolution_gpu_bfyx_os_iyx_osv16",617],
+        "40684756725622867": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15404352708246779967": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17703907155485973486": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "18269382610859905921": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6614833247756539341": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "5482851829165191681": ["convolution_gpu_bfyx_os_iyx_osv16",645],
+        "683530182479794259": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "10506079835013332412": ["convolution_gpu_bfyx_gemm_like",2],
+        "10433541468308381909": ["convolution_gpu_bfyx_gemm_like",1],
+        "3652749152621176846": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10747768416582634270": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "14433939319502072879": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12854110364457722483": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "13163146272900339330": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "10002942280571012447": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "14611470203914805229": ["convolution_gpu_bfyx_os_iyx_osv16",888],
+        "3317498303952226642": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7957927312958744432": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "14784115394395151055": ["convolution_gpu_bfyx_gemm_like",2],
+        "7370273921473161914": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6990161783770805523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12361909180687647792": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "9219978118417391687": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15184480575877095737": ["convolution_gpu_bfyx_gemm_like",1],
+        "18400137500031567479": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7852144838267007144": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "10712251675747436685": ["convolution_gpu_bfyx_os_iyx_osv16",190],
+        "1404523328737649536": ["convolution_gpu_bfyx_gemm_like",1],
+        "10340626080611300806": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "10632020369698615114": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "11913865086932469909": ["convolution_gpu_bfyx_gemm_like",2],
+        "15011504472108164173": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6955820760012983739": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "5901470393936541758": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "11561352430430157770": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12134712464763856064": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3682813162987778705": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "12659539044474018256": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "17479614483340719566": ["convolution_gpu_bfyx_gemm_like",2],
+        "15630712601053635938": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "15314178289202641916": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15385836287435319028": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "13931470674812510958": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "15982499072593548907": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "3805991105758534542": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4810979456269693700": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14387663434151374245": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "8093154215631195896": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "879461985074219072": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16468779692009938330": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "16507216630035678597": ["convolution_gpu_bfyx_gemm_like",1],
+        "8525631489886320841": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "9631545863582097486": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "16495435651959280198": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9192665896782282996": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "14017106221778585861": ["convolution_gpu_bfyx_os_iyx_osv16",686],
+        "3140230065585683313": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16620032793356620588": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6087091876057515304": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15668060723417155782": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "15905812449037427213": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15372944709956866587": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "393130776826919699": ["convolution_gpu_bfyx_gemm_like",2],
+        "10710426249911063154": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6213386558868267629": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "17790622334577372736": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12138341287265949399": ["convolution_gpu_bfyx_gemm_like",1],
+        "9110265526128628472": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "14322754320861242412": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "11388177266504804841": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14243609293683870669": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "5385637020152792781": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10848277915422577656": ["convolution_gpu_bfyx_os_iyx_osv16",421],
+        "17651949893303962955": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "557926911473978758": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "9133224739401155411": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "6946815194102787268": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "3095800485689583188": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "779633618375662086": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15277856047844308598": ["convolution_gpu_bfyx_gemm_like",2],
+        "9373353053843326128": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15619086801947147359": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "8965747921518186477": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "3094541981461578435": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "15444345793124210505": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "2822531372171708171": ["convolution_gpu_bfyx_gemm_like",1],
+        "15984885011101717258": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "15767973630744679517": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3787897045202294227": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "9285566577169147378": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "8954139494467782298": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4184940877670248246": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "16683089431066989909": ["convolution_gpu_bfyx_gemm_like",2],
+        "3013359852055354405": ["convolution_gpu_bfyx_os_iyx_osv16",1049],
+        "15927212142469570269": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "10744779302034526105": ["convolution_gpu_bfyx_gemm_like",1],
+        "10422138282116598013": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6046380638013542109": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18169371857833455144": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15140592697506341614": ["convolution_gpu_bfyx_gemm_like",2],
+        "15033864286535250007": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "6925829066248055368": ["convolution_gpu_bfyx_gemm_like",2],
+        "9849272539053219052": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "3892679716763161057": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3167115892101501516": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "11379252854859166206": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17829983167337875463": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "10409424254454997557": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8435953773852854494": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "10772763339005937717": ["convolution_gpu_bfyx_gemm_like",2],
+        "3926585856863002495": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "18269685060032395235": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "872401732136570312": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13771196685227797262": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "7431849514656037251": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "6754359635395225555": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "10774528268153772208": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "890679620691833367": ["convolution_gpu_bfyx_gemm_like",2],
+        "871656942964602772": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12976499206227689731": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "7458923250983373160": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "18305785425659656349": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "10869059995205753062": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7962991673727743706": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "9626028243479089234": ["convolution_gpu_bfyx_gemm_like",2],
+        "16947969669087411530": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "6391847213494189692": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13816380312874384117": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7963120178142346699": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5061053593616346116": ["convolution_gpu_bfyx_gemm_like",2],
+        "801943727169437597": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "654122557966242717": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "14503814672536990561": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "12693511427898130707": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "1891216794223363114": ["convolution_gpu_bfyx_gemm_like",1],
+        "2857337999074313592": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "3201851883430682391": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "5876880412336151866": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "914589847837601900": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "1305434952341925041": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "11213283109763090897": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "3290503865540626256": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "12293786134765875615": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "6375149408738336520": ["convolution_gpu_bfyx_gemm_like",2],
+        "8094836777153039013": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "774981050284188673": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "15529767675448574617": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15464327246951632247": ["convolution_gpu_bfyx_gemm_like",1],
+        "3179874645565098825": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "5776920093461427179": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "8790992468693685188": ["fully_connected_gpu_fb_io_ref",2],
+        "17608082492919905570": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "5150467145740542480": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10252930102508743294": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9660587580162063066": ["convolution_gpu_bfyx_gemm_like",2],
+        "11850332373794932468": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "10133406610245448421": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "17195491464960153261": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "1557549837620967530": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15197400201857680173": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "11703557271443535142": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17376180096577763039": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11353671464383068485": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "4197617702037834389": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "14322392426975869640": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "6227066883925046010": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "17423645390621980919": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4957638663977636791": ["convolution_gpu_bfyx_gemm_like",2],
+        "318377908569897093": ["convolution_gpu_bfyx_gemm_like",2],
+        "10578656188786691161": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "11800958516083095340": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8990561333549136048": ["convolution_gpu_bfyx_os_iyx_osv16",1112],
+        "4072967257556128157": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "4292467512797995948": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7287802938269404923": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "9180575279116075400": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6404731509766519779": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8195881973746570408": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "11582534256623549131": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10792503079194374004": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "8021962180961047152": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1316444335300814745": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "18136765667969393174": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "9895036366054127607": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "11002656253983635383": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "8481272193490654884": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3033264172690274208": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "4016652650196255483": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "16159055229009077435": ["convolution_gpu_bfyx_gemm_like",2],
+        "4573547058027867538": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "9763310312421884308": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "16165264024659208580": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "3539764293444807886": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10849780273184392468": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",2],
+        "3511588484597779204": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12558716383635737426": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "7023033151960653752": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "7636001038842031672": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "13093429681061786539": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "2858694223939965231": ["convolution_gpu_bfyx_os_iyx_osv16",694],
+        "4680261350523889008": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14951164724050668856": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "15594387862678649962": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "2912858944747613525": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "11273554217552152172": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "7662200927459001757": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6438522646185979880": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "80038800201815976": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1917986916390093536": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2054895351334936744": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "4151997155802743451": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "18213389163198755626": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "12363462562375148101": ["convolution_gpu_bfyx_gemm_like",1],
+        "11312797737791604596": ["convolution_gpu_bfyx_gemm_like",2],
+        "15392592805235453180": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "5424159498790442193": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16601702334097258697": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "7390751298966198773": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "6695224851008237679": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3865480446980740412": ["convolution_gpu_bfyx_gemm_like",2],
+        "16710010075465723498": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8141428150264829362": ["convolution_gpu_bfyx_os_iyx_osv16",1033],
+        "1615155632991337496": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14326748416648598247": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "2518919454830671073": ["convolution_gpu_bfyx_gemm_like",2],
+        "17750329428766282997": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "414342067295883061": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "9358320688298379206": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "18139055731468596187": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6129602738379919488": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "5922243230245842969": ["convolution_gpu_bfyx_gemm_like",2],
+        "11428599290755097395": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "13387804712929042302": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11927673108508931485": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "13429534778879474114": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "11066538564303243604": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "4440261013093281358": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "8881150100883636392": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "17325129240374428839": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18074320074700491416": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "12352083215873760290": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12388375914105990324": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5601320732740276692": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "10462203417605590793": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "10573920781439771673": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "15451919862187018297": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12561852932488001568": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17337689605705740533": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "684240994243755872": ["convolution_gpu_bfyx_gemm_like",2],
+        "10973267399508186283": ["convolution_gpu_bfyx_os_iyx_osv16",191],
+        "8703051983346886620": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "3807725810350819929": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6303682540621797774": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "14998412675237613013": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17800494747865760215": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "1241188741090538769": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3689722043202617487": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "2605525859754242318": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15743075522781198932": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7903891232234389925": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "1818234431954731769": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "1555841293175143289": ["convolution_gpu_bfyx_gemm_like",2],
+        "2140514316203117958": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "1691020960118022320": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15260010680436431377": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11066913713501760080": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6879801583428507100": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "7945923871349397386": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "737706555781027628": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "3826083535442459719": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4460838234035901102": ["convolution_gpu_bfyx_gemm_like",2],
+        "17393241435373906917": ["convolution_gpu_bfyx_os_iyx_osv16",319],
+        "791937929163665770": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13855910108498240870": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "10591379189397010097": ["convolution_gpu_bfyx_os_iyx_osv16",989],
+        "13540002981450186147": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "4987922194420804256": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "10665697051755790682": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "18150429561058646714": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",2],
+        "4004333174619528327": ["convolution_gpu_bfyx_gemm_like",1],
+        "11215297942420903101": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "12260041857695743504": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "15220874718853723626": ["convolution_gpu_bfyx_gemm_like",2],
+        "17993337310288098038": ["convolution_gpu_bfyx_gemm_like",2],
+        "12971822824884826169": ["convolution_gpu_bfyx_gemm_like",2],
+        "6683090495189325653": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "8065408380801722040": ["convolution_gpu_bfyx_os_iyx_osv16",858],
+        "17370560568464798319": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "1541754036637209097": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "13381441263790184121": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "1742897526168249500": ["convolution_gpu_bfyx_gemm_like",1],
+        "17508515605648584094": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3142706898070129318": ["convolution_gpu_bfyx_gemm_like",2],
+        "7833495651619250213": ["convolution_gpu_bfyx_gemm_like",2],
+        "13972357557211413688": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "9549667332801021099": ["convolution_gpu_bfyx_gemm_like",2],
+        "11878734040194151073": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11740474593275702888": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7143510787416483146": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "5648099611567577611": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "8162762980597497749": ["convolution_gpu_bfyx_gemm_like",2],
+        "8323445733669842657": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2727219457659794468": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "15825993019555657125": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "13186342942242476803": ["convolution_gpu_bfyx_os_iyx_osv16",1067],
+        "13267438341255312172": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16566714514564722975": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14841539539334726292": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "12278364834477923930": ["convolution_gpu_bfyx_gemm_like",2],
+        "16348402367953880206": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "16857192626139882429": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "5132761922124425835": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "13353123037511986804": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "265378250397648692": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "18260147016899103633": ["convolution_gpu_bfyx_gemm_like",1],
+        "8374232727884943288": ["convolution_gpu_bfyx_gemm_like",1],
+        "2253443114793765536": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16132186023443894579": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "16461300997058854554": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "14122647818827599984": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "16091195788712971747": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "14869125900405603130": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "14532844474906286088": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "4152919461079296700": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "14353390922580547467": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "16062811901668074268": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "17761681290527373180": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "12266072789949082198": ["convolution_gpu_bfyx_gemm_like",2],
+        "3349519148124496343": ["fully_connected_gpu_bf_io_gemm",2],
+        "13410178186827874638": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "9226912483632588371": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "13426413463253581310": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "10010921697596131761": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "16042236932298055236": ["convolution_gpu_bfyx_gemm_like",0],
+        "8713639086785023623": ["convolution_gpu_bfyx_os_iyx_osv16",944],
+        "3855859061709004677": ["convolution_gpu_bfyx_os_iyx_osv16",969],
+        "17873182129275583020": ["convolution_gpu_bfyx_gemm_like",2],
+        "5073980187181521102": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14214141488645257351": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5390559917122707732": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "8700953648388124963": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "263575476655527355": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "3438116423688595487": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "16273414163942580140": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "8260130048649729185": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "17034122796081495259": ["convolution_gpu_bfyx_gemm_like",2],
+        "2307310127637739872": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "10835684445936063871": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "4409539711630405776": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5627351109775149477": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "16692569816843207989": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "14204028212129440429": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "2235888904701517631": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13947140171097868740": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "10412588668458621135": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "7177837234452118325": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "2305345466244887603": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "4693778191222244259": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "16126210124715599267": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5440622601084846974": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "6104567430127604601": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "12576360049619146496": ["convolution_gpu_bfyx_gemm_like",2],
+        "7533669599936874355": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "15217183882858251099": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3062101811226530720": ["convolution_gpu_bfyx_os_iyx_osv16",673],
+        "14408266407898585602": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "26434141991791193": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4186957909762095019": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "17075150439662364176": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "4163359403543480821": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "12015922610963701033": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3122997634505472500": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "6075691042233712335": ["convolution_gpu_bfyx_gemm_like",1],
+        "16763947298003094797": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "7476503420928065329": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "2839767407547705101": ["convolution_gpu_bfyx_gemm_like",2],
+        "15466940145773097237": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "11208787273440167590": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14944798586094927774": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "10670829898588047148": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13842309033760176194": ["convolution_gpu_bfyx_gemm_like",2],
+        "2588106330058954614": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4011704860949525864": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "8916983923551808409": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16207793515276299964": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "192209423643075326": ["convolution_gpu_bfyx_gemm_like",1],
+        "3495464175121035222": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "13644681270630373984": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "9762182215179534181": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "13869279315296163696": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "14146157492452859667": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "4428125859693766145": ["convolution_gpu_bfyx_gemm_like",2],
+        "18052322665755789573": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "1383899865465106141": ["convolution_gpu_bfyx_gemm_like",1],
+        "8420176522157084802": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8619380242063264016": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2315979511894958580": ["convolution_gpu_bfyx_gemm_like",2],
+        "8394085742794617896": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3880189981766119529": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "14862938122758223157": ["convolution_gpu_bfyx_os_iyx_osv16",110],
+        "5084402281339667158": ["convolution_gpu_bfyx_gemm_like",1],
+        "3800864312883193560": ["convolution_gpu_bfyx_os_iyx_osv16",318],
+        "3643056883397245235": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "5812274221348979687": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "18109284647478027063": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13178480813522103091": ["fully_connected_gpu_bf_io_gemm",2],
+        "13485431068391184236": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1096929244128185929": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "10545983240319359348": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "555153826947872383": ["convolution_gpu_bfyx_gemm_like",2],
+        "18194662560696168435": ["convolution_gpu_bfyx_gemm_like",1],
+        "4892959859293355837": ["convolution_gpu_bfyx_gemm_like",1],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17699579394941627848": ["convolution_gpu_bfyx_gemm_like",2],
+        "18106333667377667797": ["convolution_gpu_bfyx_gemm_like",2],
+        "2424832456352484524": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14559552090809408184": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14350963106032411355": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "12348135936862667024": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "17347387929692736001": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "7917673216808705075": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "6329618009202266591": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "9381304526221508530": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "7111620180131341264": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "12711366212612147422": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14605107834931199380": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17381682740282686038": ["convolution_gpu_bfyx_gemm_like",1],
+        "553884705007944190": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "4084106758501882407": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "13748207123919546925": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7822148442995976259": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7379959915507694400": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2615550169523847175": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "13400559817638330692": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "17061233750738578337": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "4238163995861108694": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "1961296939362567851": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "11431776034512615562": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11490143853656040028": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "11080118408282076423": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2038505773698938555": ["fully_connected_gpu_bf_io_gemm",1],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "5514520264534847093": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "6478247863479663432": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "3621930417735246405": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14361697687217060995": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6857064389795419021": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "8332688858465419317": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13094313253457422444": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "13723543003759101485": ["convolution_gpu_bfyx_gemm_like",2],
+        "2053428297205345660": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "16674897846232931666": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "13646026173083209094": ["convolution_gpu_bfyx_gemm_like",1],
+        "10253092389452603623": ["convolution_gpu_bfyx_gemm_like",2],
+        "8012414839721814470": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "11102920976866402928": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16117738994809548007": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14609655423082082099": ["convolution_gpu_bfyx_gemm_like",2],
+        "5519535335798045279": ["convolution_gpu_bfyx_gemm_like",1],
+        "3927333491885837374": ["fully_connected_gpu_fb_oi_ref",2],
+        "18136968124686255108": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "656536921219262336": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "17140704838989242732": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13891498649894490342": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "6625355663340809894": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8382355932367801226": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "7486133596762640215": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15790005937034794347": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "5159470523468873105": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "3457676694935264283": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4242438539626727158": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "1188428190761098784": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9996590003462421281": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "16614092873294424156": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8964252048679144533": ["convolution_gpu_bfyx_gemm_like",2],
+        "17821196374523699955": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14788817017267716113": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "7966454753124154534": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17377293745073971167": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9824678205469832038": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5864250949922222051": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "7578465277886568471": ["convolution_gpu_bfyx_gemm_like",2],
+        "7877872008801536537": ["convolution_gpu_bfyx_gemm_like",2],
+        "12174729877807876787": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12651215303242591871": ["convolution_gpu_bfyx_gemm_like",2],
+        "13499476832444042458": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "7596423139159263456": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "10462797712860969072": ["convolution_gpu_bfyx_gemm_like",2],
+        "12526417587678222534": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6223991300587768990": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "709835724029986012": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5287076386757143976": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "15199659885055090985": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "13510598063226540077": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "11232261979256657934": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "3491333679577961640": ["convolution_gpu_bfyx_gemm_like",2],
+        "8394337033015371278": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11864780937861562358": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "11883485911218628865": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "11583017348580874022": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "2602209853120236226": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12136029303893296753": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "8104509697376352086": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "11863623794400366834": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "16071723603031305677": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "5490683510357615963": ["convolution_gpu_bfyx_os_iyx_osv16",346],
+        "2349007644347065353": ["convolution_gpu_bfyx_gemm_like",2],
+        "11769511287553067221": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "16286085532892593349": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "7853648744637103420": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "12882754981683858333": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17387764798693150143": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "7474639594232203854": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "18026754720065676632": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "10942743767167283370": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4161001033681779582": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "708347829794105085": ["convolution_gpu_bfyx_gemm_like",1],
+        "18372277746801271292": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3046878786712386934": ["convolution_gpu_bfyx_gemm_like",2],
+        "15450609897480659306": ["convolution_gpu_bfyx_os_iyx_osv16",929],
+        "18012549942299450620": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17119834538806653818": ["convolution_gpu_bfyx_gemm_like",2],
+        "12635265188475834607": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "15989164585998175871": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "17274625805315816028": ["convolution_gpu_bfyx_gemm_like",1],
+        "4764776977138392550": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "6366477005383470532": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "4678945085654662665": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "3266638956600784732": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15962137123591591534": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "1708527842474979709": ["convolution_gpu_bfyx_gemm_like",2],
+        "15038779174806415801": ["convolution_gpu_bfyx_gemm_like",2],
+        "11901740241052104941": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "18034648276860485300": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12141300895511301068": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "8415763978601237333": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "9321208819255762521": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "12944449254981328284": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "17422822627612865758": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16351593165006175213": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "16496066467505445971": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17480519865636248903": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "2830019939638455400": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3547275591884493445": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "18128162750557822655": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "11439519952236570490": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "15334195300678132907": ["fully_connected_gpu_bf_io_gemm",1],
+        "3497946462254198388": ["convolution_gpu_bfyx_os_iyx_osv16",319],
+        "13041981853634484809": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16206791915939407806": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "13343968006718934574": ["convolution_gpu_bfyx_gemm_like",2],
+        "13489318651148001664": ["convolution_gpu_bfyx_gemm_like",1],
+        "17856816245251319111": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "139367204458861048": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "4720851194954041037": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "2174528711050181972": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13546876216568825877": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "5566145479615299930": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "10134863884423338495": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "5873257164958285393": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "797387385159110695": ["convolution_gpu_bfyx_gemm_like",1],
+        "2297846338452062425": ["convolution_gpu_bfyx_gemm_like",2],
+        "14559308665571750465": ["convolution_gpu_bfyx_gemm_like",2],
+        "994489782629179836": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17838473675663772639": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "772794189370544860": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "13800387305792597325": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "204378699575356398": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11275526584835606578": ["convolution_gpu_bfyx_gemm_like",1],
+        "14168946412009689868": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "18259018980049662870": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8403919905230540356": ["fully_connected_gpu_fb_io_ref",2],
+        "17509205154057032109": ["convolution_gpu_bfyx_os_iyx_osv16",471],
+        "9213563311267466388": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7584912988728072414": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "4560479630843098090": ["convolution_gpu_bfyx_gemm_like",1],
+        "3069726952591207961": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15890492401334524258": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "16916632481840858091": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "879939701282942121": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "9031338938030715616": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "2684971093531227585": ["convolution_gpu_bfyx_gemm_like",2],
+        "9970142663470031403": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "15689502054035168040": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12932174902085755507": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10681304359334525584": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "15507430010796753396": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "3723082283919334922": ["convolution_gpu_bfyx_gemm_like",2],
+        "17286180622990393912": ["convolution_gpu_bfyx_gemm_like",2],
+        "16881320590336043120": ["convolution_gpu_bfyx_os_iyx_osv16",199],
+        "11178675492112714513": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "2102507337684140674": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5524215233998361104": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7606277451240586967": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13372079273473545269": ["convolution_gpu_bfyx_gemm_like",2],
+        "12077176094606956613": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16832083703120717402": ["convolution_gpu_bfyx_gemm_like",2],
+        "15856268902838573812": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "5930451476167223501": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "14524011013133838054": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "6324194607665787911": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "18057258413318190788": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "6858245954375015939": ["convolution_gpu_bfyx_gemm_like",2],
+        "2973436171295280783": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "5740738339752793113": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "7092429446071184360": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "14840301687056551916": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "6307840223437204536": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "2758256770667070477": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "17621284804179990612": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15752695063119223631": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "18232278892738147217": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "9322808125154719434": ["convolution_gpu_bfyx_gemm_like",1],
+        "5019077257951332016": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "10534355502345993326": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "786418751322581924": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15078379507314446744": ["convolution_gpu_bfyx_gemm_like",2],
+        "11673506380927771816": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4563407231964979217": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "522181557896569275": ["convolution_gpu_bfyx_gemm_like",0],
+        "8954957191824520301": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "3055842046969432235": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "765085235448596225": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "578703329577922869": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "176148486634277377": ["convolution_gpu_bfyx_gemm_like",2],
+        "1743672154424707483": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3723613341885592267": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "9519113693008246391": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "3892873577927627992": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10565789595834959047": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "17791773192152464021": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10743628077362128751": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "10031973538398542700": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "9236621881488650027": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9524303276541517389": ["convolution_gpu_bfyx_gemm_like",2],
+        "12850610175882424919": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "16822728519529055454": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "10729288973933590396": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8886676435675463412": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3041752019114501584": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "11726125778063855770": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "7002547494442875680": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "15751445344585167275": ["convolution_gpu_bfyx_os_iyx_osv16",1056],
+        "1187817806204244044": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "17796310681498690253": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14994322266840011040": ["convolution_gpu_bfyx_gemm_like",2],
+        "253337639942573142": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "14487682847898298214": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "12355112948013108181": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "8655883535274781128": ["convolution_gpu_bfyx_gemm_like",1],
+        "10254790628108678637": ["convolution_gpu_bfyx_gemm_like",1],
+        "9513218905938141296": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "170594581804738255": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "18415227597391874233": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "17707294419513060769": ["convolution_gpu_bfyx_gemm_like",2],
+        "15861253904810475842": ["convolution_gpu_bfyx_gemm_like",2],
+        "6638761803107874904": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11033824757086203326": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "16767657090925788431": ["convolution_gpu_bfyx_gemm_like",2],
+        "7174790971918109163": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "18096803908321982720": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "8938942439963723596": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "6447357750120537934": ["convolution_gpu_bfyx_gemm_like",2],
+        "4355933224673863178": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13476976389397273052": ["convolution_gpu_bfyx_gemm_like",2],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "2969389503332309296": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16703049240941366828": ["convolution_gpu_bfyx_gemm_like",2],
+        "14121939808880396150": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "1832310305089212990": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "11044223289209000460": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13387602037439694372": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9860570706348640782": ["convolution_gpu_bfyx_gemm_like",2],
+        "104165137500939902": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7552144047474664265": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "15598527290222497283": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13881505737488515065": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "1152693503778768433": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5235375820995365354": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "1014934490175718598": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "15891746043846062984": ["convolution_gpu_bfyx_os_iyx_osv16",1051],
+        "11782514629636023633": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1896394898744191046": ["convolution_gpu_bfyx_gemm_like",1],
+        "9055254157155243850": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "475079717987185580": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "4492673409319122180": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "577182964135927041": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "4344644499804057502": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "15467064540951151390": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16120159001372711511": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "4026686872534942904": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "15643053402284856082": ["convolution_gpu_bfyx_gemm_like",2],
+        "12181607120522804433": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "17517541283617012275": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1434535531617424039": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "8321148793275220552": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11078289776590382448": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "14578291812739325465": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "9423239651872522813": ["convolution_gpu_bfyx_gemm_like",2],
+        "1957975992563882145": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "530491406341772040": ["convolution_gpu_bfyx_gemm_like",2],
+        "10104091044601583658": ["convolution_gpu_bfyx_gemm_like",2],
+        "2686152083115758704": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "6672808203620992802": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "10302338806536775954": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "17172842643607718498": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "17392594284473856393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6290317420155851465": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15911508155433936727": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "529543453251381109": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "5020788604681810984": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9305758766575321575": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "14555366228958374512": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "7199295899520406795": ["convolution_gpu_bfyx_gemm_like",2],
+        "12796777049340516563": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "7020743056013297476": ["convolution_gpu_bfyx_gemm_like",2],
+        "14071393823183565145": ["convolution_gpu_bfyx_gemm_like",2],
+        "13602299412525111348": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "12394049027081208902": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "2627779045483019709": ["convolution_gpu_bfyx_os_iyx_osv16",812],
+        "1622880009460832832": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "10087048842366891699": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "142345353315012903": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17802514063213000148": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4132087699110753428": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8306337702797456793": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "591445875836641836": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "4960466075321426984": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "8995598177504756805": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "15976399554094563736": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "11386443944172875185": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5485050451156514865": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "2659031931257084418": ["convolution_gpu_bfyx_os_iyx_osv16",540],
+        "16208488491972128275": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "17615365894230830516": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "14118838785256822389": ["convolution_gpu_bfyx_gemm_like",2],
+        "8866164762286856139": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "97332433783610027": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "17080372737840346243": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16720108310653948550": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12650986929262866534": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "4477135619420651110": ["convolution_gpu_bfyx_gemm_like",2],
+        "9040986180016264906": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "1413598669014941757": ["convolution_gpu_bfyx_gemm_like",2],
+        "7431469348791099474": ["convolution_gpu_bfyx_gemm_like",2],
+        "16383540667048742064": ["convolution_gpu_bfyx_gemm_like",2],
+        "13470016086265528105": ["convolution_gpu_bfyx_gemm_like",1],
+        "5854267518455107328": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15603710070700542017": ["convolution_gpu_bfyx_gemm_like",2],
+        "5219818570070061892": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1601512693620510391": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13297691763391637265": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "13455881643467418059": ["convolution_gpu_bfyx_gemm_like",1],
+        "5706423911886410117": ["convolution_gpu_bfyx_gemm_like",2],
+        "13503608041359512": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "12951069548510783681": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15591167992985613695": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "5637480705139132901": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "4107088111454348836": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "6124219814856247918": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "10062957707721107508": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16179959997108523051": ["convolution_gpu_bfyx_gemm_like",2],
+        "9647713236241614167": ["convolution_gpu_bfyx_gemm_like",2],
+        "10884966210360699082": ["convolution_gpu_bfyx_gemm_like",1],
+        "2728956755635458379": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8578774826625315147": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5414285637221358737": ["convolution_gpu_bfyx_gemm_like",1],
+        "14172081523880352608": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "15786328370300803713": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "5795073619189010837": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12647099325257717945": ["convolution_gpu_bfyx_gemm_like",2],
+        "13292923826380958700": ["convolution_gpu_bfyx_gemm_like",2],
+        "18439017855540532958": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "15963038745470172423": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "3683201905077543598": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "5179760459095053114": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11855137287698046529": ["convolution_gpu_bfyx_gemm_like",2],
+        "15479071839425218367": ["convolution_gpu_bfyx_gemm_like",2],
+        "4701832665603867798": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "9145357433824567384": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "11544455862638831851": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3296080624478711270": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "15929970324703663357": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "3388752887767453958": ["convolution_gpu_bfyx_gemm_like",2],
+        "16181623411787179429": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6345550009198921347": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "7780366826820540504": ["convolution_gpu_bfyx_gemm_like",2],
+        "4538102435488584866": ["convolution_gpu_bfyx_gemm_like",1],
+        "7129623351507828661": ["convolution_gpu_bfyx_os_iyx_osv16",723],
+        "16629493658542781988": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "14177187878748170225": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4049276089777687996": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "143255828863957128": ["convolution_gpu_bfyx_gemm_like",2],
+        "12843671306854567956": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2231648183489019418": ["convolution_gpu_bfyx_os_iyx_osv16",428],
+        "8882042369902399339": ["convolution_gpu_bfyx_gemm_like",1],
+        "676641023579624117": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17009318615658405230": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "13194245601015251743": ["fully_connected_gpu_fb_io_ref",1],
+        "1641881628032037384": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "9529614587861271730": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "17116941326889312928": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14336344152455180534": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9559550404190168365": ["convolution_gpu_bfyx_gemm_like",2],
+        "8985531644129639832": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "875296362957469305": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "10071611039987219440": ["convolution_gpu_bfyx_gemm_like",2],
+        "17585210048585855482": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "13558603350852076889": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "13839075443229327158": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7570078010521452080": ["convolution_gpu_bfyx_gemm_like",1],
+        "7054270030260701612": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "17564338309805484464": ["convolution_gpu_bfyx_os_iyx_osv16",482],
+        "12847879935060092791": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16483792160297698151": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "5343186686923330871": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "2438221595194783178": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "294103776081392899": ["convolution_gpu_bfyx_gemm_like",2],
+        "689445825453914111": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "4729855738455185191": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "6780215829176686721": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "6355395905401306995": ["convolution_gpu_bfyx_gemm_like",2],
+        "6139574161497189424": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14420809655798184553": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "1630585964216121575": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "7806129039150321333": ["convolution_gpu_bfyx_gemm_like",2],
+        "9058996149754556268": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "7630342538679060038": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "9028970753877215614": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "18383733736250135501": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15995056067568652754": ["convolution_gpu_bfyx_gemm_like",1],
+        "15129201859573664210": ["convolution_gpu_bfyx_gemm_like",2],
+        "14695781272831602408": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12914986936318857086": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "3012268657922581268": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "1160579996766519752": ["convolution_gpu_bfyx_gemm_like",1],
+        "14381420852659789698": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16522546805419218429": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "12992163255353386581": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "13317417676446624018": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "11071972036962275632": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "1269703478898366518": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6767245864232675168": ["convolution_gpu_bfyx_gemm_like",1],
+        "12517838703662330663": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "11530101016435264783": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "16561224775421968533": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "1473214668483422172": ["convolution_gpu_bfyx_gemm_like",1],
+        "9052153145556623933": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "14037325204801680738": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "2482449683288477640": ["convolution_gpu_bfyx_gemm_like",2],
+        "6515141738021465336": ["convolution_gpu_bfyx_gemm_like",2],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "12709406234969954619": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7963529808900784906": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "9890252170749328138": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5053369963163583573": ["convolution_gpu_bfyx_os_iyx_osv16",856],
+        "14247451223653900488": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "12698546873263218041": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "10294185397756053636": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3408249386342406615": ["convolution_gpu_bfyx_gemm_like",1],
+        "9454028594043242985": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "13401926003864565026": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "8058623285594809047": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13624969243174329965": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3810356382905059819": ["convolution_gpu_bfyx_gemm_like",1],
+        "1836277956961261472": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9164584153555521506": ["convolution_gpu_bfyx_gemm_like",2],
+        "10265955847846166394": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "7291920886894073603": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "3191047205441946466": ["convolution_gpu_bfyx_gemm_like",0],
+        "15862793522143880668": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "11932770338770247767": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "8434794604559592624": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "11595387512434355394": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "17035903590837750750": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "3510837206834640871": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "6729785110495533200": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "14291113322487568376": ["convolution_gpu_bfyx_gemm_like",2],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",2],
+        "11892210755884128272": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "844742962836593299": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "11929531534620071758": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "11191005013126286532": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "13727643349589056375": ["convolution_gpu_bfyx_os_iyx_osv16",439],
+        "11273168411455998347": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "3509027370372599394": ["fully_connected_gpu_fb_io_ref",2],
+        "14185215566042478462": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "12927339938362960563": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "4801117903303888658": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "1265277707626014051": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "9428176632140441528": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "4491694127072416122": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "5340016094501559693": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "6150043972317126583": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "1071007164550012186": ["convolution_gpu_bfyx_os_iyx_osv16",21],
+        "281287280558289393": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "4264078972561407296": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "997155336931700015": ["convolution_gpu_bfyx_gemm_like",2],
+        "7552049239568474944": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "3280795516668356985": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8780604510524622314": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "7187734276051878356": ["convolution_gpu_bfyx_gemm_like",2],
+        "5296506025538423220": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6688522645556262131": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "13987250743654950733": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "15381014522874131924": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "11026432639515866259": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3625906783784771100": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "9339038855869763548": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "14907038741687299621": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4206637285289830669": ["convolution_gpu_bfyx_gemm_like",1],
+        "9266375177690276615": ["convolution_gpu_bfyx_gemm_like",2],
+        "17543625777838573622": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "5515216528474382598": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15641537661939240413": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "18076129452098771655": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17106086048442658788": ["convolution_gpu_bfyx_gemm_like",2],
+        "5750277248295796439": ["convolution_gpu_bfyx_os_iyx_osv16",108],
+        "12815588500303820284": ["convolution_gpu_bfyx_gemm_like",1],
+        "10809330882739297269": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11359020774437470164": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "4476037346005841003": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13198480749588992978": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15452906059667613512": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4178614913813882037": ["convolution_gpu_bfyx_gemm_like",2],
+        "1435153323458789173": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13312401790608349463": ["convolution_gpu_bfyx_gemm_like",1],
+        "11919579121199894437": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "7351443601143314161": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "14418429155823196539": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "17301887391757619741": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "2801141274570069180": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "9883682535839267422": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "1686420552593340731": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "8898449752724034655": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "830147122986411443": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8837079302496539409": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "40704767167309552": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "2995957440356398418": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4316278502963439894": ["convolution_gpu_bfyx_gemm_like",2],
+        "6149673627320838019": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "14645023135017806432": ["convolution_gpu_bfyx_gemm_like",2],
+        "13054706902087663592": ["convolution_gpu_bfyx_gemm_like",2],
+        "17372326727957287976": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "10554266898346470422": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "1779941298820543013": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "10168272404395268951": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9556219639756304369": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "906587812125311288": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "15406324750533549980": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6410682026872155392": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6750003965952674453": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "9438739171104456179": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "4949865765880884373": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "11622271315873664622": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "6278892144796112655": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1090447867763814054": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "11845504142528424662": ["convolution_gpu_bfyx_gemm_like",2],
+        "11661208196482963286": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "16995919898822376726": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "11354523117287453982": ["convolution_gpu_bfyx_gemm_like",2],
+        "3239779684432082106": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "15783558375979538895": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13853056718266488510": ["convolution_gpu_bfyx_os_iyx_osv16",883],
+        "16605697831520435304": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "625469553102754234": ["convolution_gpu_bfyx_gemm_like",2],
+        "20037669704517227": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "2328951328483718941": ["convolution_gpu_bfyx_gemm_like",2],
+        "10279778381617181802": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "9513403717116039597": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12685978195521469707": ["convolution_gpu_bfyx_os_iyx_osv16",189],
+        "12752101288912456176": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "1294871956977733262": ["convolution_gpu_bfyx_gemm_like",2],
+        "15692223101958737604": ["convolution_gpu_bfyx_gemm_like",1],
+        "11453044274130869816": ["convolution_gpu_bfyx_gemm_like",2],
+        "12379734005351960619": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5786828339670204894": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "4010650902230520983": ["convolution_gpu_bfyx_gemm_like",0],
+        "13583272198088247606": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15134268179029323647": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "7395593936948809439": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "3349108500387301004": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12407002532205454767": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "13439896617880328331": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "7004953121070642766": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "6644418194983229139": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "1027438463802481676": ["convolution_gpu_bfyx_gemm_like",2],
+        "10642327923162019888": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "2242915551775617989": ["convolution_gpu_bfyx_os_iyx_osv16",291],
+        "5061795324735006354": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8866736221671835567": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "1421879144542252228": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "16978447917682236120": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6771637612965430926": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "4586246090279043149": ["convolution_gpu_bfyx_gemm_like",2],
+        "17357800564047774826": ["convolution_gpu_bfyx_gemm_like",2],
+        "2008999755215725290": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10916647716124396856": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "981733129438741439": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "7211355951470869591": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "5338109154207406041": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "5031342439443897167": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "16469788155263456039": ["convolution_gpu_bfyx_gemm_like",2],
+        "14249486431781112226": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "5424164608102708333": ["convolution_gpu_bfyx_gemm_like",2],
+        "11802527991096689252": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "981197653890885407": ["convolution_gpu_bfyx_gemm_like",1],
+        "8612114608666892632": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1019936903773818652": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13077961697656030315": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8317140711232187781": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8169762955969255618": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "123283730755186382": ["convolution_gpu_bfyx_gemm_like",1],
+        "5083776511235413204": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5510336500642744696": ["convolution_gpu_bfyx_gemm_like",2],
+        "9625931001541723278": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13538051178827008933": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "7744787957569714828": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "3378088934862423864": ["convolution_gpu_bfyx_gemm_like",1],
+        "7978370756654787278": ["convolution_gpu_bfyx_gemm_like",1],
+        "5779388310240896974": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "17340789730321673934": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "7843833033404155302": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "5670530004773188380": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3159147743553063163": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3128856679264648666": ["convolution_gpu_bfyx_gemm_like",1],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",2],
+        "15031089621161080026": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "15156015174611610705": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "8055193939726603877": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "10598995451755327159": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13336847303794450665": ["convolution_gpu_bfyx_gemm_like",2],
+        "4992668316921598993": ["convolution_gpu_bfyx_gemm_like",1],
+        "9220830217525628783": ["convolution_gpu_bfyx_gemm_like",2],
+        "8578747191812631883": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "14915908231779912828": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "4226968857681929488": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "9676055912997166605": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4369680877112803848": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "4745007371868123765": ["convolution_gpu_bfyx_gemm_like",2],
+        "288825580282908143": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "16932172538978111342": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "13850807749756445264": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "778175413671462719": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9785114056964539323": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "10704037259494193565": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "11734299455885510243": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15395497315929884637": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "17769940507971546305": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7246177123265734169": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "14848351491062336554": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "14443599718173185176": ["convolution_gpu_bfyx_gemm_like",2],
+        "4217179485243909459": ["convolution_gpu_bfyx_gemm_like",1],
+        "13625877249040282040": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4236174000795439083": ["convolution_gpu_bfyx_gemm_like",2],
+        "282274448389888221": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3526580286148537369": ["convolution_gpu_bfyx_gemm_like",2],
+        "14257161696605459633": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "12529210672030682764": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "6768451741770053089": ["convolution_gpu_bfyx_gemm_like",2],
+        "15943174060386142134": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "16415344078703911571": ["convolution_gpu_bfyx_gemm_like",2],
+        "15822975685755664152": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "6577240413312348523": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "11668043528929060706": ["convolution_gpu_bfyx_gemm_like",1],
+        "15379595951542162189": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2056766012044921101": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "2384942244346844027": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "6400671582981760192": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9746964858035717775": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "7164580481046523192": ["convolution_gpu_bfyx_os_iyx_osv16",914],
+        "2100891581797371600": ["convolution_gpu_bfyx_os_iyx_osv16",274],
+        "9583760104223104233": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13956744866244022582": ["convolution_gpu_bfyx_gemm_like",2],
+        "14403780921831769097": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12956535344568057480": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "1753515740487760297": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "10160082844961863335": ["convolution_gpu_bfyx_os_iyx_osv16",199],
+        "11875516764635427358": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "12242618640422208652": ["convolution_gpu_bfyx_gemm_like",0],
+        "12761366575293006784": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15051114821536746998": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6706802683366112205": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3661305534604931936": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3598116387801985039": ["convolution_gpu_bfyx_os_iyx_osv16",676],
+        "12478421208861550581": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "4750894407873652809": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13066055561434178894": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "18160969423211875528": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "104321144590863458": ["convolution_gpu_bfyx_gemm_like",2],
+        "9008848676120441863": ["convolution_gpu_bfyx_gemm_like",2],
+        "4695273549696315193": ["convolution_gpu_bfyx_gemm_like",2],
+        "14281201038135286621": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "4563773888811395621": ["convolution_gpu_bfyx_gemm_like",2],
+        "5351705572686943348": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2647922515901529845": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "296202142406900242": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "16094174852600023296": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "13342769641176584743": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10468562355439385073": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "4503960445974334415": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "3120553928584920777": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "9492331996847106233": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7107513718824525169": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11376522803174788945": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1772363899841601255": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "16715151641337602113": ["convolution_gpu_bfyx_gemm_like",1],
+        "7997955859883990923": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "6474882514032493642": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13348855287761849180": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15922076723067110929": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3980754726678047241": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9794061741834174000": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "7869916853707978306": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "7410220112400588068": ["convolution_gpu_bfyx_gemm_like",2],
+        "12323840136934980793": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "13110173649734084688": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13411431109933021193": ["convolution_gpu_bfyx_gemm_like",2],
+        "9152451371616153112": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3590316457726550768": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12942085219027232135": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "4818231379191523896": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "8981229334098733320": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "583303098958523195": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "14682537852514419239": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1884327428051733366": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10049571207493913006": ["convolution_gpu_bfyx_os_iyx_osv16",968],
+        "2301409406426420354": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11091004452522208782": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "7353563160591978243": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "12386437738920143482": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "1660279112011537957": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "6483208845600234755": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15239764240622554314": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14729854278671832528": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "16122815225820081176": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14131851237755716991": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "10599639229366933472": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "11674725184029885494": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "12225119940380026093": ["convolution_gpu_bfyx_os_iyx_osv16",1034],
+        "10908411570889102154": ["convolution_gpu_bfyx_gemm_like",1],
+        "15227034948424983496": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17659601542171299562": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12895496994338720556": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2506154888542197909": ["convolution_gpu_bfyx_os_iyx_osv16",860],
+        "15824189967727245909": ["convolution_gpu_bfyx_gemm_like",2],
+        "12040626513219974957": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "11534123522633460320": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11922163303962372849": ["convolution_gpu_bfyx_gemm_like",1],
+        "11357813056434049302": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "2950917846016525392": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "15156525717629023944": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "6172851296465788161": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "6432519735121751346": ["convolution_gpu_bfyx_gemm_like",1],
+        "14685573786743639408": ["convolution_gpu_bfyx_gemm_like",1],
+        "3928266232090746643": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "11141999085710526242": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8951503172834790833": ["convolution_gpu_bfyx_gemm_like",2],
+        "13498795599230228492": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "16815680874311765189": ["convolution_gpu_bfyx_gemm_like",2],
+        "13886526360627032217": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "10476627457539425144": ["convolution_gpu_bfyx_gemm_like",2],
+        "10110395703775498948": ["convolution_gpu_bfyx_os_iyx_osv16",376],
+        "15897477855246170861": ["convolution_gpu_bfyx_gemm_like",2],
+        "17065380294456704620": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "13441117085490814804": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",1034],
+        "13698389420396031586": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "14601912265050074833": ["convolution_gpu_bfyx_gemm_like",2],
+        "5816730482014477109": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "14821616804286068969": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10016243001407196485": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1502236537645808646": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "852092858392507925": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "14682894856346977838": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "15354185859262170540": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "60749853744407778": ["convolution_gpu_bfyx_gemm_like",2],
+        "5032866547826271476": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "12630173933512965589": ["convolution_gpu_bfyx_gemm_like",2],
+        "3297036980627776719": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17160915544701715607": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13285123703712436126": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "682912708716537431": ["convolution_gpu_bfyx_gemm_like",2],
+        "14454927839795553295": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "16039372573821594566": ["convolution_gpu_bfyx_gemm_like",2],
+        "9929060811766882316": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "11455843788148231615": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "7043547563530810431": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "11546295514640813785": ["convolution_gpu_bfyx_gemm_like",2],
+        "7693556065684619275": ["convolution_gpu_bfyx_os_iyx_osv16",568],
+        "16129296588866116913": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8618835732380720921": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11906319144823550582": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "16589607587365212240": ["convolution_gpu_bfyx_gemm_like",2],
+        "815847426244665239": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9148379585489720669": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "4212194737559719449": ["convolution_gpu_bfyx_gemm_like",0],
+        "2352142833866194508": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7924408980408826942": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "568114041320772862": ["convolution_gpu_bfyx_gemm_like",2],
+        "10616832946298118456": ["convolution_gpu_bfyx_gemm_like",2],
+        "14581447673401303181": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "26773921190137993": ["convolution_gpu_bfyx_gemm_like",2],
+        "969746749329671447": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "3762117189312286955": ["convolution_gpu_bfyx_gemm_like",2],
+        "17453621319901961773": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "4565037760028957581": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "15578217564714846277": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "8697631439739291302": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "7313000297447719088": ["convolution_gpu_bfyx_gemm_like",2],
+        "13993319023992950944": ["convolution_gpu_bfyx_gemm_like",2],
+        "11796671083187280457": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "15637565679147396649": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "4806571630436601566": ["fully_connected_gpu_bf_io_input_spatial",4],
+        "14385995236701277049": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "6031307393395339699": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "7000524935770116969": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15432337846778101995": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "2722601800398376127": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "15616954046484566002": ["convolution_gpu_bfyx_gemm_like",2],
+        "15830721134654889992": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "7974918595373182037": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8178825467227185946": ["convolution_gpu_bfyx_gemm_like",2],
+        "386749666417295495": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13102754309439605192": ["convolution_gpu_bfyx_gemm_like",2],
+        "18372284940315010254": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "13691555384698806010": ["convolution_gpu_bfyx_gemm_like",1],
+        "15863633107759120207": ["convolution_gpu_bfyx_gemm_like",1],
+        "12511186263003392018": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9954050478761346921": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like",2],
+        "15168098632351740923": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "16650590194585316886": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "2743892624333411461": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "17177353407003831190": ["convolution_gpu_bfyx_gemm_like",2],
+        "3292554262586950764": ["convolution_gpu_bfyx_gemm_like",2],
+        "5635504912415420460": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11716771904412649891": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",151],
+        "10309083227104422150": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "11067412830219638639": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "14865708345458193472": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "15464714725848277081": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10716913534741102635": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "3596159214965874273": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "11210961619302975072": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8319405652132127420": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9980945809859857871": ["convolution_gpu_bfyx_gemm_like",2],
+        "13858485871773319706": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13319880343534837963": ["convolution_gpu_bfyx_gemm_like",1],
+        "6983900601570231321": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "11897886369869427808": ["convolution_gpu_bfyx_gemm_like",2],
+        "8048617952947915835": ["convolution_gpu_bfyx_gemm_like",2],
+        "16540183777173974162": ["convolution_gpu_bfyx_gemm_like",1],
+        "7852745450437172519": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4862529593282936100": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "17052596472114345717": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "7732899312577293959": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "12458305535453345462": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13255006150107668739": ["convolution_gpu_bfyx_gemm_like",2],
+        "17097621900023182992": ["convolution_gpu_bfyx_gemm_like",2],
+        "14523905821262502926": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5687802882700097624": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "11115684531624462986": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "16162899163122139501": ["fully_connected_gpu_fb_io_ref",1],
+        "15891505875671050928": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10271474583233390474": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "4640696923527766618": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "13473730516782884152": ["convolution_gpu_bfyx_gemm_like",2],
+        "9245770108138984525": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4959718589070770515": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "3934290309368153435": ["fully_connected_gpu_bf_io_gemm",1],
+        "13234170505677988638": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "45545661884854912": ["convolution_gpu_bfyx_os_iyx_osv16",1051],
+        "5311718276151327830": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12896159402462325805": ["convolution_gpu_bfyx_os_iyx_osv16",888],
+        "14647949921048404551": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "9979259596137305973": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "5327803911898085293": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "10196332102593337214": ["convolution_gpu_bfyx_gemm_like",1],
+        "4793007249026943006": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11012846743944132853": ["convolution_gpu_bfyx_gemm_like",2],
+        "4713580645061462578": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "11576182324195008022": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9673176853197584682": ["convolution_gpu_bfyx_gemm_like",1],
+        "3935404533406270186": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13358754652597677285": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "5246229312484886433": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "15939309688773899430": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3805854200552708060": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "5219048275475447369": ["convolution_gpu_bfyx_gemm_like",2],
+        "2832331506191733785": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "710656784939783221": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "8306931146242110738": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "118898027441804310": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12894240573737168362": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "7941359635463232326": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "18418073826375395057": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3935174650108042053": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "10989937450490049763": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "288853243482418538": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "10930115765550856328": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "7994179151788368291": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "5342657840254586591": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16744011463988595802": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6025872155179042054": ["convolution_gpu_bfyx_gemm_like",2],
+        "9562291747339451180": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "8739570656208259296": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "7086554406050778468": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16342158355942808662": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15887938842582811165": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "4211445170027080823": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "3272017687600371031": ["convolution_gpu_bfyx_gemm_like",2],
+        "18118237182023167949": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "15487538714246568015": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17641726060706984007": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "8449108317864057899": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "16536775289334717044": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "13150876648527896999": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "12223993560805441284": ["convolution_gpu_bfyx_gemm_like",2],
+        "8779987507326777359": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8215519118071138614": ["convolution_gpu_bfyx_gemm_like",2],
+        "9069245927173134634": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1201692134690347847": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "3120885087070223590": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "6728889146307098720": ["convolution_gpu_bfyx_gemm_like",1],
+        "14004618842373739106": ["convolution_gpu_bfyx_gemm_like",2],
+        "16741985699154392565": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "8176520928011006903": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14213127286928643795": ["convolution_gpu_bfyx_gemm_like",2],
+        "1336477297334930004": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12565318283493666631": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "11901687795497708884": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "11858246418724176452": ["convolution_gpu_bfyx_gemm_like",1],
+        "17355826643208208691": ["convolution_gpu_bfyx_gemm_like",2],
+        "1573498199681662714": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1204089510255285420": ["convolution_gpu_bfyx_gemm_like",2],
+        "7727001441358508665": ["convolution_gpu_bfyx_os_iyx_osv16",22],
+        "12621528958448913800": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1941341635794709702": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "2768512766772748723": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15579919505002150556": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "16352438188558979362": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "3594327736281012643": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "8281411537393664160": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8843585527713905568": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "11152834864013527469": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "5384134329664434112": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "16749148369456398030": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10522649794540845800": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "17197868427757781334": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "14463841899941062548": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8734220847509054149": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "15597522934012485452": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8770858724416759637": ["convolution_gpu_bfyx_gemm_like",2],
+        "3651651926851660222": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1460916897832302487": ["convolution_gpu_bfyx_gemm_like",2],
+        "2251572761614039612": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "17503210896556316294": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "12308359047798183133": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "14547907449418439737": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "15618891972122000521": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11771014003680394135": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "10380031655567712558": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "9516288831713776693": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "13550337096609413041": ["convolution_gpu_bfyx_gemm_like",2],
+        "17459500507201824299": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "18379763351534914922": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "1509728225855233852": ["convolution_gpu_bfyx_gemm_like",2],
+        "15993427814066246646": ["convolution_gpu_bfyx_gemm_like",1],
+        "7781809277449433812": ["convolution_gpu_bfyx_gemm_like",2],
+        "9003196270667188479": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3034466284781235431": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "11198378813600875939": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5509852360472061267": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "11682323163346544125": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "1239861345413267621": ["convolution_gpu_bfyx_gemm_like",2],
+        "1720791539242542292": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "2419819939573989749": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4062706195708729345": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "9390919808369333231": ["convolution_gpu_bfyx_gemm_like",2],
+        "11882021989615795558": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "4003468969524607815": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12169148580322697755": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "3750338655074082587": ["fully_connected_gpu_yxfb_ref",2],
+        "14524678598440880756": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "5336120047683197088": ["convolution_gpu_bfyx_gemm_like",2],
+        "14592395793778583608": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1781189282179491198": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16587387608532583713": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "11205571992835612111": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14674266217397415571": ["convolution_gpu_bfyx_gemm_like",2],
+        "8642397690605957294": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "172303227623890951": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "17855733925989425515": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "13982221711075598070": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "13337122303005980542": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "5134857932624749530": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8740196547852036537": ["convolution_gpu_bfyx_gemm_like",2],
+        "9781830607177020570": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11297512843662536362": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "16071030448801649281": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "13713501506522022845": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4034250407843183678": ["convolution_gpu_bfyx_gemm_like",1],
+        "3661361503342294227": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7247891577022043949": ["convolution_gpu_bfyx_gemm_like",2],
+        "15628121900226431719": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11992353959766718397": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "8734419426540206087": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "2559310381697374321": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "6659313690133629176": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "7617123358753247310": ["fully_connected_gpu_fb_io_ref",2],
+        "10784905418636316601": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "7999747927804607567": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14670952132900619664": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "4276712095427918904": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3806791682244402910": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "11879484013890539145": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "9287404618748313247": ["convolution_gpu_bfyx_gemm_like",1],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like",2],
+        "13952295742818866246": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "7638626850074132214": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",2],
+        "2094546483928406874": ["convolution_gpu_bfyx_gemm_like",1],
+        "3831201505512446456": ["convolution_gpu_bfyx_gemm_like",0],
+        "14097319816812992451": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "8268533335852735248": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "7279393739634103483": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "38736266675995457": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "13661880440426932218": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7590767013583950613": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1617362484243823916": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "5659168916726488798": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1559798212423183813": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "14491949194619001237": ["convolution_gpu_bfyx_os_iyx_osv16",813],
+        "8819268903800581706": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "16320454719906370247": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "10972033292930619311": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "16230621843665445228": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "8892991171111842341": ["convolution_gpu_bfyx_gemm_like",2],
+        "323234725943768094": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "4670443882075998209": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "15287650965861631130": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "15818237122613168508": ["convolution_gpu_bfyx_gemm_like",0],
+        "6542486391263861823": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15938703221521364046": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8333743604646422982": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "4800208854712166990": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16590893345666612869": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "12601126285773042005": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "3012566432840424198": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "14733291836016183044": ["convolution_gpu_bfyx_gemm_like",2],
+        "15494543914974994991": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "1081287304647703427": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11609821372586026178": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "10961049607808752432": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "6161072079255825074": ["convolution_gpu_bfyx_gemm_like",2],
+        "10392013312924273545": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "10400727836871462348": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "11494395549955384747": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "3329610414149222728": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8986253016099337778": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "12606196670791209919": ["convolution_gpu_bfyx_gemm_like",2],
+        "13484950419220835364": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "8863731258634577277": ["convolution_gpu_bfyx_gemm_like",2],
+        "2586132860307138964": ["convolution_gpu_bfyx_gemm_like",2],
+        "2844746478867668588": ["convolution_gpu_bfyx_gemm_like",2],
+        "12553441041059632729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5485971317082563152": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "1400089266180918877": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "17522452942286240233": ["convolution_gpu_bfyx_gemm_like",2],
+        "7947870656736319919": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "15417738436777481469": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4917595053453614536": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "14603590053512154268": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "5688623850477433571": ["convolution_gpu_bfyx_gemm_like",2],
+        "17790026124881397912": ["fully_connected_gpu_fb_io_ref",1],
+        "4112696777811320312": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "1474271081523145413": ["convolution_gpu_bfyx_gemm_like",2],
+        "10773411423039491193": ["convolution_gpu_bfyx_gemm_like",2],
+        "11809236497308682596": ["convolution_gpu_bfyx_gemm_like",1],
+        "2146633923143071497": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "2968144776497288135": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "3311449696894745049": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "17472252137354770318": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "7271236108345900406": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17024388383581997032": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "3691705516240577130": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7026575758396092435": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "15898888434295644774": ["convolution_gpu_bfyx_gemm_like",1],
+        "13046322179198317310": ["convolution_gpu_bfyx_os_iyx_osv16",883],
+        "11897113890115321056": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "15661055655577513377": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12965800692507042874": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "287386909600391846": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "14800933038795670868": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10721811813682112908": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14762859593402798050": ["convolution_gpu_bfyx_gemm_like",2],
+        "4561874206785244358": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "15972805725107234322": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "11140864132614066113": ["convolution_gpu_bfyx_gemm_like",2],
+        "822162932339827810": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "2909728331855309274": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16261543808418336089": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "15228614030349540878": ["convolution_gpu_bfyx_gemm_like",1],
+        "6335628260431943016": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "6545814945227676265": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3007505068107685147": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "13722424507812159961": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "6418327009347170687": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "15659671804906879034": ["convolution_gpu_bfyx_gemm_like",2],
+        "15893297349596399716": ["convolution_gpu_bfyx_gemm_like",1],
+        "6612243861034102250": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "3913951712614107871": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2546472090573813082": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1013207188944763398": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2679903779216253668": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11409066626289209846": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "7386836350136973872": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "16211466749116679534": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1403373982815401451": ["convolution_gpu_bfyx_gemm_like",1],
+        "7126601602274920416": ["convolution_gpu_bfyx_gemm_like",2],
+        "8790625191540101806": ["convolution_gpu_bfyx_gemm_like",1],
+        "11914756126771310827": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "17224820843490443805": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "13683563727561197895": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "14159293183840880884": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "14763982961176216679": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11885660439698926227": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "13448159575961515854": ["convolution_gpu_bfyx_gemm_like",0],
+        "13779700363254765602": ["convolution_gpu_bfyx_gemm_like",2],
+        "18125075313255528454": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2260718905219541967": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "5688161172644782612": ["convolution_gpu_bfyx_gemm_like",1],
+        "12896164738668798380": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "5635500901926740475": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "17691748026963003695": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "3513523165606656242": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "15754688305730191542": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "37061093840513038": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6830643729780599672": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13865227850818392065": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "8779164026828163571": ["convolution_gpu_bfyx_gemm_like",1],
+        "352808518345312040": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "4014667229872705228": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16835545111241063900": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9343876424591024597": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11092828091552833150": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "16705621644424684055": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "3064765745900772872": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2008064690158516711": ["convolution_gpu_bfyx_gemm_like",2],
+        "11447737411040418462": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "16485921493309285440": ["convolution_gpu_bfyx_gemm_like",2],
+        "17465517455679097501": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "7589346100701197023": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "16615858951735101760": ["fully_connected_gpu_fb_io_ref",1],
+        "13551767519605460627": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "3830091089824446164": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "5758223108250439377": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4399656162365214694": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "15571801737237063594": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "6236857636305802170": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8769060267707904998": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "1334070221835422461": ["convolution_gpu_bfyx_gemm_like",2],
+        "8631194673451861459": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3392632422002516166": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "9402935157379983392": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10001963042016663554": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13680502636898130714": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "5503904988517480229": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "9561367273233389233": ["convolution_gpu_bfyx_gemm_like",2],
+        "17495070522944546801": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "5176939691838030517": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "4942131377140353094": ["convolution_gpu_bfyx_gemm_like",0],
+        "14946519992043402896": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "5398895598407183682": ["convolution_gpu_bfyx_gemm_like",2],
+        "12478309735214802531": ["convolution_gpu_bfyx_os_iyx_osv16",467],
+        "13753670205703732353": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "2148648022160178995": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "6772239376357727149": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "596528462327775677": ["convolution_gpu_bfyx_os_iyx_osv16",687],
+        "7512702933193596918": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "9644723852089512961": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "264371219192743152": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "8663545677000846511": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "7200893702912130808": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "5718747983756317198": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2850803473613487020": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2335783507270234825": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "3088402690095697589": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "10112032316939871435": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "1211404528755199615": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11521288355888665606": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4673127824919879657": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15816980369722540994": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "5781431860747226742": ["convolution_gpu_bfyx_gemm_like",2],
+        "15365776263895633531": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "3389739049224815652": ["convolution_gpu_bfyx_gemm_like",2],
+        "7877637636782924097": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "18398231411109020099": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17520777331163825810": ["convolution_gpu_bfyx_gemm_like",2],
+        "16462862831307415504": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "5348059680010171141": ["convolution_gpu_bfyx_gemm_like",1],
+        "7289907211627391947": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "5378151578014945610": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "5629582391075745771": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "11607736973932389832": ["convolution_gpu_bfyx_gemm_like",0],
+        "2598910952085172410": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1628593159980574595": ["convolution_gpu_bfyx_os_iyx_osv16",622],
+        "17342603054992556378": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3332444589775844154": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4136736579788862192": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13161798453564436688": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "18429276095695345973": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15962533525948221648": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "9539616823548370185": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9914440875772341708": ["convolution_gpu_bfyx_gemm_like",1],
+        "14484004336536993120": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10978173291465325823": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "7065121716452374910": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "9839670675413379092": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8854234880878427078": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "1194267934213722567": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "2387389473399444503": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "10775785602937893911": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",2],
+        "8124166677361481618": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5057534502588100071": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "9267417754412894234": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "14389915292223442327": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "5896089609470353090": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "2908249767551054613": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "5119087113905313336": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4104062066031480003": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3600066510593746268": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "16998508915819714690": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "10857567623940140266": ["fully_connected_gpu_fb_io_ref",1],
+        "18199526506796726885": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "5149553691611520515": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "11311890411536750673": ["convolution_gpu_bfyx_gemm_like",2],
+        "3974589991022739479": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "859377216693940737": ["convolution_gpu_bfyx_gemm_like",2],
+        "2915952195141872726": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "9274179337770060652": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "14142812374094816721": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "8100595788531468781": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "12994819742376207273": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "12057000101434512661": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "11047759270093007856": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "15688186132508213638": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "12715500118796263683": ["convolution_gpu_bfyx_gemm_like",2],
+        "2830742500858558621": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12445292008737311977": ["convolution_gpu_bfyx_gemm_like",2],
+        "15158997684077722015": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "13004055504657277105": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8347537383976709519": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "13398875754083902831": ["fully_connected_gpu_yxfb_ref",2],
+        "16450345154125804290": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "10900880512948479338": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6418748992581951435": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "5848293219267886434": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "17882819773586674851": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "12642701787250074691": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "4642402648038764246": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "17026348860895225619": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "4554398307153171456": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4445257000541366640": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "4682062886371423209": ["convolution_gpu_bfyx_gemm_like",2],
+        "8337457116169698090": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "14969813450703071948": ["convolution_gpu_bfyx_gemm_like",1],
+        "14167086447992316314": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1961348920992050029": ["convolution_gpu_bfyx_os_iyx_osv16",484],
+        "8650948093564284852": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13204120207726209723": ["fully_connected_gpu_bf_io_gemm",2],
+        "14218701503304823803": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "16402312692470500253": ["convolution_gpu_bfyx_gemm_like",2],
+        "541744773413565297": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "18173314625562011976": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14224121742920800990": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13647773816638053437": ["convolution_gpu_bfyx_gemm_like",2],
+        "1143214652021653634": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "3300655231758263066": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "13985989113434682460": ["convolution_gpu_bfyx_gemm_like",1],
+        "16576300898841314587": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4082218299236753259": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3138712043201001156": ["convolution_gpu_bfyx_gemm_like",2],
+        "9493034132406318197": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "2984236836610169934": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "18419183012101393192": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "16969463538496570528": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "9351428703239678614": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "17546650302679801134": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5089359404080552270": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "11970881115757095265": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5890683283363730941": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6678101356115372537": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1838534101161814609": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "17646394278957547470": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "2651385050387738902": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "7875272450497189442": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "4792657031481471098": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13423515205322319913": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "1431307776181554710": ["convolution_gpu_bfyx_gemm_like",2],
+        "8146945902795164796": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9771430089730856496": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "17308907916370632622": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "13374993751390784382": ["convolution_gpu_bfyx_os_iyx_osv16",1070],
+        "13435416060730279243": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "10842505566649585090": ["convolution_gpu_bfyx_gemm_like",1],
+        "6326191473779365124": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2705534741438659581": ["convolution_gpu_bfyx_os_iyx_osv16",475],
+        "17050143605017295447": ["convolution_gpu_bfyx_gemm_like",2],
+        "11307721164906705899": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "11352094952907979172": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14512311371993445906": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "13076343553185159307": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2832311883163804015": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "3182329375739242693": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1077773457856682663": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "17294244481988344762": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5401946420641519048": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "10526411638069090068": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "8181704316455400709": ["convolution_gpu_bfyx_gemm_like",2],
+        "16462033126494826292": ["convolution_gpu_bfyx_gemm_like",2],
+        "12547252593506448096": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "5321807316257768": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "1071663904249509302": ["convolution_gpu_bfyx_gemm_like",2],
+        "1878953827218615252": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "8321769923556905957": ["convolution_gpu_bfyx_gemm_like",1],
+        "7053070767227498983": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12318427976031000768": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "3060709449176556770": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "16936366288366370882": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15485701086886851362": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "14741012384358891350": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "3626743386403140330": ["convolution_gpu_bfyx_gemm_like",1],
+        "16134637021630473012": ["convolution_gpu_bfyx_gemm_like",1],
+        "15026219694198820614": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "15671873744670386067": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2870715678422088243": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "5103094815475470596": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "3430998232987873998": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "1127844465496534455": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15958017891397409552": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "13477416097954638887": ["fully_connected_gpu_bf_io_gemm",1],
+        "2010255131587843361": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11679869968143173159": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1154469970162137785": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "14762599606783897222": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "4994591211723226974": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "8260689555974656662": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "11206468937763516689": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "1265107284215037966": ["convolution_gpu_bfyx_gemm_like",2],
+        "6616869272699525153": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6953499208425592115": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "879896719155824868": ["convolution_gpu_bfyx_gemm_like",2],
+        "10111038481447198008": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6519443541076418301": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "7253709516917901897": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "10236258478395201152": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "15513894336778253285": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "8942221095468681112": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13410850301164057911": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "6571438978296387721": ["convolution_gpu_bfyx_gemm_like",2],
+        "2020044486043617858": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14233219774448115529": ["convolution_gpu_bfyx_gemm_like",2],
+        "9770300588867836071": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "191374388179598660": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "4184357870886924038": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "6235132681081375078": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "13297875917250935192": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14577775579978745344": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "9724624621108712962": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "17638692805430115529": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9729771183572950642": ["convolution_gpu_bfyx_gemm_like",1],
+        "11327228813412934262": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "9212091835906796243": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8528750110601691390": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "4737347018334654530": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "17829854042305231384": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17798636687709019154": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "8571662320744858201": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "5828768432282043413": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "3685556976073096544": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "8047078039937885319": ["convolution_gpu_bfyx_gemm_like",2],
+        "14691372262153587653": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "18062849937960759210": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4366043672240989175": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6148022455516485135": ["convolution_gpu_bfyx_gemm_like",2],
+        "2932914865200583326": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13225520357177380691": ["convolution_gpu_bfyx_gemm_like",2],
+        "8709632541892447149": ["convolution_gpu_bfyx_gemm_like",2],
+        "5261762234237034874": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3102816736961785641": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "5409329687010951601": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "10885752780697269323": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "4577872082734403187": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "9614300332487270888": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "6997121306455110286": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "1071090704302849258": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "937763627727362899": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6146876760962332928": ["convolution_gpu_bfyx_gemm_like",2],
+        "9723314434598141024": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "5185125307593023170": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "11933283931932057859": ["convolution_gpu_bfyx_gemm_like",1],
+        "18120169120088482114": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "17774424004510360936": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "16541535256432192398": ["convolution_gpu_bfyx_gemm_like",2],
+        "4646176801168621136": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7130694811424715594": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3119045125726216156": ["convolution_gpu_bfyx_gemm_like",1],
+        "141166664952282933": ["convolution_gpu_bfyx_gemm_like",2],
+        "220326805056361171": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8228641750970480948": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "835367600773871252": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "15114370307779942381": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17962578815194404362": ["convolution_gpu_bfyx_gemm_like",2],
+        "4831224999851230245": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6812025576584060234": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "9601849246293120347": ["convolution_gpu_bfyx_gemm_like",2],
+        "15156805695359911457": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "16295660312557315941": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "4515798403196565084": ["convolution_gpu_bfyx_gemm_like",2],
+        "8122815203088327658": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "5962764672151728219": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "2622434279674583815": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "2582625260054352916": ["convolution_gpu_bfyx_gemm_like",1],
+        "8809794528993445200": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13660015013041074867": ["convolution_gpu_bfyx_gemm_like",2],
+        "5948701218437980356": ["convolution_gpu_bfyx_gemm_like",2],
+        "15548971488532746290": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "10800323158234163234": ["fully_connected_gpu_fb_oi_ref",2],
+        "6876164425008541018": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "14652791434312888296": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5594180958505308003": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "12942776337163777730": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "16884228931101540030": ["convolution_gpu_bfyx_gemm_like",2],
+        "5629670679897666607": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "14878347463243157447": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "15488550074426713959": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "18331981707436752260": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "59739211822469868": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15765198153800696060": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "11169292427557543138": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "9628735886189157469": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "4569338575782832784": ["convolution_gpu_bfyx_gemm_like",2],
+        "12854272540346358832": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "11831092915967558428": ["convolution_gpu_bfyx_os_iyx_osv16",647
+        ]
+    },
+    "72": {
+        "9226443907548972870": ["convolution_gpu_bfyx_gemm_like",1],
+        "15675968397825708285": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "10136369729388564720": ["convolution_gpu_bfyx_gemm_like",2],
+        "116291934148608396": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "15331103261044247142": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "9366201112659847392": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "1051506168926530904": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "16108573960501496757": ["convolution_gpu_bfyx_gemm_like",2],
+        "17382660912493284320": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15948383678216076358": ["convolution_gpu_bfyx_os_iyx_osv16",617],
+        "14974730512607138726": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "9868561386826862471": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2452226948562393335": ["convolution_gpu_bfyx_os_iyx_osv16",767],
+        "12693511427898130707": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "2669822154816760632": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "1604661321386793876": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "13046322179198317310": ["convolution_gpu_bfyx_os_iyx_osv16",883],
+        "751912075185318190": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "17546566148752689536": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "2044363708106765326": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8079376692609682448": ["convolution_gpu_bfyx_gemm_like",0],
+        "17839839336294937155": ["convolution_gpu_bfyx_gemm_like",2],
+        "7720939595094113814": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "1207026216972160297": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "14729854278671832528": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "16731107540370927220": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "14810839157236175179": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "15860915170591763391": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "3159681096461848644": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "17243648226968859637": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2968031010495399536": ["convolution_gpu_bfyx_gemm_like",2],
+        "5469227748156438008": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "10377729875228238588": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "9660812093766156608": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "5608133987357542077": ["convolution_gpu_bfyx_os_iyx_osv16",539],
+        "17006133396401462698": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "16692569816843207989": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "7969848911698660033": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",2],
+        "17377293745073971167": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4232250144427804891": ["fully_connected_gpu_bf_io_gemm",1],
+        "7072606962946873975": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7368916076070115064": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "16710010075465723498": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11239754372812258455": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "7638626850074132214": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "949330876419581703": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "14335423820860953927": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "4197617702037834389": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "12087141795291232248": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1103228955716492167": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8762901342272872498": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "3511588484597779204": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "10679760989906275129": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "2423754482456771339": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "16312223896859176991": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "10628725059172743408": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "11706378390483804857": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2108296560864415762": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "8616686489737649890": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "9323825370872655346": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "956022649859563080": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "9780938731831129283": ["convolution_gpu_bfyx_gemm_like",2],
+        "1701609125136907870": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "6075691042233712335": ["convolution_gpu_bfyx_gemm_like",1],
+        "10555597973766215754": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",2],
+        "7590767013583950613": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17163158934005653629": ["convolution_gpu_bfyx_os_iyx_osv16",428],
+        "6642767323474835034": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "13869716373706247686": ["convolution_gpu_bfyx_gemm_like",2],
+        "3138374672801504481": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "2371412124305478965": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "2198278382394812839": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "2968094709908141988": ["convolution_gpu_bfyx_os_iyx_osv16",3],
+        "5103094815475470596": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "14100870590396726248": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "17854578307286932628": ["convolution_gpu_bfyx_gemm_like",2],
+        "18174857480705846286": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "1938086876393565238": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "16758697697363920520": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "4229105529069729944": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "5522698342845820411": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "10173283505468233128": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "15450609897480659306": ["convolution_gpu_bfyx_os_iyx_osv16",929],
+        "14711697456265712456": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "11077503608116183709": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "13102754309439605192": ["convolution_gpu_bfyx_gemm_like",2],
+        "2543995971214089085": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "3355259926747524578": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "14385185911482960528": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "969746749329671447": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "11682323163346544125": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "5740738339752793113": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "1003101267609305257": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11267742746905371769": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "17006095064160484022": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "1779941298820543013": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",2],
+        "10729288973933590396": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9065894438656900887": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "5782934278345953016": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "15118142492742177336": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "10001963042016663554": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "77073286362822723": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "10509933181132310969": ["convolution_gpu_bfyx_gemm_like",1],
+        "3134489458855347772": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "10522649794540845800": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "2194607895573544953": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "13699740641705514374": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "11583017348580874022": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "2344498602308448450": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "8751016391945753900": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",1],
+        "6574971185849732667": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "13613399861925108148": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "4652136280940317116": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "6290317420155851465": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "8127570953237266335": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "11465965972527519631": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3526580286148537369": ["convolution_gpu_bfyx_gemm_like",2],
+        "905780459938651623": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "388828310152538138": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13054405729329143152": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "13404888565084206853": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "14990645740260870030": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "11070620435959083971": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "16446533347502650316": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "13178480813522103091": ["fully_connected_gpu_bf_io_gemm",2],
+        "17285815901490707654": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6714886136800883594": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "10433541468308381909": ["convolution_gpu_bfyx_gemm_like",1],
+        "380316849107383484": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5766507688771440170": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "16504962609450876148": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8482147530539941792": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "10642327923162019888": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "8133587696326295326": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "5088898934670078153": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7727001441358508665": ["convolution_gpu_bfyx_os_iyx_osv16",22],
+        "7918742312252115870": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "17392594284473856393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5221320470007950766": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11806105193035393795": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "10046663998164493552": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10548792624072794724": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "7460672405409009037": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8965747921518186477": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "2440366541074371090": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "178353385245384751": ["convolution_gpu_bfyx_gemm_like",2],
+        "3759057398165607194": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15739274921308457528": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "11919129623429545762": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "4163359403543480821": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "17421991623849671076": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11086699387784339943": ["convolution_gpu_bfyx_os_iyx_osv16",495],
+        "10797908931694274013": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "5592556538784745960": ["convolution_gpu_bfyx_gemm_like",2],
+        "8578747191812631883": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "3797957937905580811": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "2566302789609970663": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "9263063714383940562": ["convolution_gpu_bfyx_os_iyx_osv16",62],
+        "17638692805430115529": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "8195881973746570408": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "12523676912856063091": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "13503688893307029975": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14230385851791760020": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "10178951466584845110": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14420809655798184553": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "6780215829176686721": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17025268985366223779": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5749536453225343663": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "5331173521406046122": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "7678457226823073886": ["convolution_gpu_bfyx_os_iyx_osv16",91],
+        "6302958994152837045": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "14670068483447729857": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "13558618754911056302": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "17824431042110985323": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "6040286126398028933": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12707946849050970702": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "11929531534620071758": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "14406070210216948643": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15374625876485618845": ["convolution_gpu_bfyx_gemm_like",2],
+        "1941341635794709702": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "3192332625020432602": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "13418701036204748812": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "4226968857681929488": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "16025442470600124062": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16236397968499692493": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11810221946429451169": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "12365282242489300092": ["convolution_gpu_bfyx_os_iyx_osv16",379],
+        "13926122593957480821": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2370837049876630969": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "3102816736961785641": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "17790026124881397912": ["fully_connected_gpu_fb_io_ref",1],
+        "9192665896782282996": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "17947818179123182001": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "16491532291908469567": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "14733510474010040334": ["convolution_gpu_bfyx_gemm_like",2],
+        "8769060267707904998": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6670327979947471550": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "939718260623752240": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "9133263538092913983": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8104309105061227444": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "1103204698908514224": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "5132761922124425835": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "10157866834809927320": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "12992194515157698316": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "10049571207493913006": ["convolution_gpu_bfyx_os_iyx_osv16",968],
+        "5141753233513623264": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "11655994466278963438": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "4865023158176874622": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10112032316939871435": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "3221221905804708596": ["convolution_gpu_bfyx_gemm_like",1],
+        "3109104171383198425": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2305461098719675735": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "5876880412336151866": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "4640696923527766618": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "12014527187730671229": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "7134654288295280046": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "16489624657475712467": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "10967218651864700933": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11883485911218628865": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "13733327241591630239": ["convolution_gpu_bfyx_os_iyx_osv16",752],
+        "407189201971322683": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "6777045876155144709": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5627834277145735283": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "10416622008071151225": ["convolution_gpu_bfyx_os_iyx_osv16",546],
+        "5219399418946822456": ["convolution_gpu_bfyx_gemm_like",2],
+        "12985650543127289023": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4161001033681779582": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "9152451371616153112": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13973028408397200796": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "12987636957813312667": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5865480930796299143": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "2702144517025248597": ["convolution_gpu_bfyx_gemm_like",2],
+        "11443268857010762276": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "4640028527711211109": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "13775529405693629438": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "8971115542951085891": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "14559308665571750465": ["convolution_gpu_bfyx_gemm_like",2],
+        "5172712078329324967": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13358283026528078900": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "14038261392627717712": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "580936360000782237": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "2732519635571994212": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "13026555349791486777": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "1332624116953483870": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "954796765467489259": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",1],
+        "156456996459945842": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "16383540667048742064": ["convolution_gpu_bfyx_gemm_like",2],
+        "1435153323458789173": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13163146272900339330": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "2065752819810364738": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "17564338309805484464": ["convolution_gpu_bfyx_os_iyx_osv16",482],
+        "15471470494305051299": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "4217179485243909459": ["convolution_gpu_bfyx_gemm_like",1],
+        "875296362957469305": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "2816353973187452604": ["convolution_gpu_bfyx_gemm_like",2],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "17035903590837750750": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "10436819182310112786": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "1663285216972929652": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "2150326211917340956": ["convolution_gpu_bfyx_gemm_like",2],
+        "6133592828563353516": ["convolution_gpu_bfyx_gemm_like",1],
+        "5448537627319798272": ["convolution_gpu_bfyx_os_iyx_osv16",523],
+        "16582132711225619740": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "530491406341772040": ["convolution_gpu_bfyx_gemm_like",2],
+        "7179714714302073459": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "2534408579674556441": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16761856644242716357": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "11632275875447013409": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11856266545854830143": ["convolution_gpu_bfyx_gemm_like",2],
+        "11703557271443535142": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "14088382963493477342": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "4085907608404305515": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "16811402686462277562": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8141428150264829362": ["convolution_gpu_bfyx_os_iyx_osv16",1033],
+        "878892264408839067": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "14691372262153587653": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "1403617451623027879": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "700717277178942679": ["convolution_gpu_bfyx_gemm_like",1],
+        "7009873605945341897": ["convolution_gpu_bfyx_gemm_like",2],
+        "481328129206881674": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "5415319660821122528": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "16609136488331186895": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2103882464623009432": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7998930863626763670": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1104489643524273315": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "4481903208484313806": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "7744787957569714828": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "6491244517639245276": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1509728225855233852": ["convolution_gpu_bfyx_gemm_like",2],
+        "941626985322260281": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "2527189070714658176": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7430073011895298582": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "7370273921473161914": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",0],
+        "2571882179292959757": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "9562291747339451180": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "3603706453982734995": ["convolution_gpu_bfyx_os_iyx_osv16",551],
+        "13248567106128518549": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "534032316469702287": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "9882204352209412039": ["convolution_gpu_bfyx_gemm_like",1],
+        "265124365266629363": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "15823825508128158158": ["convolution_gpu_bfyx_gemm_like",2],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like",2],
+        "13951717514084457087": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "10968768803038046390": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "3277243911383750280": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "9942726414918759892": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4992668316921598993": ["convolution_gpu_bfyx_gemm_like",1],
+        "14906458674793172507": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7394217382008802567": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9933958860597451711": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "4112696777811320312": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "18118237182023167949": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12696412964119109465": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",2],
+        "5390559917122707732": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "14174888981602932979": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "13455881643467418059": ["convolution_gpu_bfyx_gemm_like",1],
+        "14491949194619001237": ["convolution_gpu_bfyx_os_iyx_osv16",813],
+        "4408772370026995920": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "15851356529373376076": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "8061914949376516780": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4703107905652287491": ["convolution_gpu_bfyx_gemm_like",2],
+        "12962552332511702682": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5170245731599664670": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "2133849627845285277": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "12962558681443556219": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4099859307693687554": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15466940145773097237": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "14923692894655929923": ["fully_connected_gpu_bf_io_gemm",0],
+        "15595549493819416194": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "1076005730007872492": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",2],
+        "13320828013530046693": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "4806571630436601566": ["fully_connected_gpu_bf_io_input_spatial",4],
+        "11913020016435860608": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "14821616804286068969": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12011982029561277581": ["convolution_gpu_bfyx_os_iyx_osv16",661],
+        "14749947225382670869": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "7962991673727743706": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5763440554939527411": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "15962533525948221648": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "6854611304056079417": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "12218337369633748663": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "18150429561058646714": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "17434429579652310107": ["convolution_gpu_bfyx_gemm_like",2],
+        "2124033349728954551": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "6204725118764552662": ["convolution_gpu_bfyx_gemm_like",1],
+        "11507538232733291666": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5197105253412476591": ["convolution_gpu_bfyx_gemm_like",2],
+        "1434535531617424039": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "17108987360340581555": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "5019077257951332016": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8075180350084516696": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1095495157025479260": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12843671306854567956": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2379484884827231127": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "4542143431130171516": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",280],
+        "3600066510593746268": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "4338023436590582323": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "7199295899520406795": ["convolution_gpu_bfyx_gemm_like",2],
+        "3285520504090196295": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "12626014184575881530": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "7924408980408826942": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15277856047844308598": ["convolution_gpu_bfyx_gemm_like",2],
+        "18077281411861416889": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "13025323039227543550": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "10947686124973711385": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "13268525255152984893": ["convolution_gpu_bfyx_os_iyx_osv16",847],
+        "8207349115037232863": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9421927854269492263": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "7351401242363888463": ["convolution_gpu_bfyx_gemm_like",2],
+        "2627779045483019709": ["convolution_gpu_bfyx_os_iyx_osv16",812],
+        "11185156002426041243": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "5927467766675317093": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "17224181038411430675": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "17798636687709019154": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "15901675909820977223": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "13800387305792597325": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "879939701282942121": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "9416186718345824095": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "953306082374100275": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "9524303276541517389": ["convolution_gpu_bfyx_gemm_like",2],
+        "1791615587935799399": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "13283842370311517843": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "6821855018718422278": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "17222005830854879661": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "5637480705139132901": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "15447513376965243034": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "6585223640997887253": ["convolution_gpu_bfyx_gemm_like",2],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "2128376438627103433": ["convolution_gpu_bfyx_gemm_like",2],
+        "3273748387141431306": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7712831597869354170": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "16461809076899645037": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "1594612401422787491": ["convolution_gpu_bfyx_gemm_like",2],
+        "10900880512948479338": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15378025640603637387": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "4801117903303888658": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "16884228931101540030": ["convolution_gpu_bfyx_gemm_like",2],
+        "16117738994809548007": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17995371099806008878": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11830297960718214360": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "7304346312452588844": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "14026570177552137240": ["convolution_gpu_bfyx_gemm_like",2],
+        "16894871557229780934": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "11169292427557543138": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "7947870656736319919": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "4670443882075998209": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "9514210061704584354": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10573920781439771673": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "9979259596137305973": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "1452597292381229708": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3895088069642140043": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8132521728369930959": ["convolution_gpu_bfyx_gemm_like",2],
+        "1081962464388501987": ["convolution_gpu_bfyx_os_iyx_osv16",873],
+        "11530101016435264783": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "13374993751390784382": ["convolution_gpu_bfyx_os_iyx_osv16",1070],
+        "916389941321470163": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "15847413004526420496": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15640202505592598653": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11716771904412649891": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "8434794604559592624": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "12515465135362865565": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "16705621644424684055": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "7994179151788368291": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "13439896617880328331": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "12247991248100147706": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6233612563637601101": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "17006655627343469372": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "3304589333915676807": ["convolution_gpu_bfyx_gemm_like",1],
+        "4800587664660105589": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "13002363400738122017": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8323445733669842657": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11311859068168414878": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "13312514874803986753": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "4282198629458668761": ["convolution_gpu_bfyx_gemm_like",2],
+        "14487682847898298214": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "8306337702797456793": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "16744011463988595802": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8260130048649729185": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "14910223536998380801": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "18419183012101393192": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "8916983923551808409": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like",2],
+        "3003526572122876385": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "761169277744593430": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4239133538073498792": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "9148379585489720669": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "1155389358857780776": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "10290107543739998181": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "17713034180977313726": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "9762182215179534181": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "7877332346656934022": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "14462438074931673266": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6329618009202266591": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "2242915551775617989": ["convolution_gpu_bfyx_os_iyx_osv16",291],
+        "15354185859262170540": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "12976499206227689731": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "17556238490521153146": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "14289048840489035546": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "15689502054035168040": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12421707187947291166": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "13739257060165119132": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "8464582977975377118": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "54975980454651672": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5041111302824362529": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "1089679781525023551": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "10405183426600618231": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "3012566432840424198": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "8083672466967374860": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8780604510524622314": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "13723543003759101485": ["convolution_gpu_bfyx_gemm_like",2],
+        "54019631544204590": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "4299492266819967844": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "5095827462645341808": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "16336482874764861478": ["convolution_gpu_bfyx_gemm_like",2],
+        "5047419871737940985": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "13059207969254830451": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "8650948093564284852": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "2543041530639980505": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "7806129039150321333": ["convolution_gpu_bfyx_gemm_like",2],
+        "17739868787095417856": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "16264774056719724826": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "12085348936192462321": ["convolution_gpu_bfyx_gemm_like",2],
+        "13898821685774165645": ["convolution_gpu_bfyx_os_iyx_osv16",847],
+        "12972634653821069685": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "14224121742920800990": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12190841837604350271": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4237276338897143680": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15293727142789007900": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "12309132521191764927": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8285478622349266483": ["convolution_gpu_bfyx_os_iyx_osv16",914],
+        "4236174000795439083": ["convolution_gpu_bfyx_gemm_like",2],
+        "5582896843095691256": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "18068050257421269408": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "15881381297320383917": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "3432296808755992670": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4867937397499803072": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "8560635685184432720": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "18034648276860485300": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17009318615658405230": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9028970753877215614": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8300655194765375060": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "12643423612381102003": ["convolution_gpu_bfyx_os_iyx_osv16",831],
+        "18136765667969393174": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "2251029128552117936": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10670103699537731664": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "13698389420396031586": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17154337492545826355": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "5933743119393822386": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "8939683514448064461": ["convolution_gpu_bfyx_os_iyx_osv16",148],
+        "9354818521586974021": ["convolution_gpu_bfyx_gemm_like",2],
+        "3782315919331102574": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13852065717057446998": ["convolution_gpu_bfyx_gemm_like",2],
+        "12672995204641007004": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "11066913713501760080": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13210604117940125947": ["convolution_gpu_bfyx_os_iyx_osv16",120],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "706370730287471796": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13468713306678453952": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "381149736509958403": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11728824117049687850": ["convolution_gpu_bfyx_gemm_like",1],
+        "1996860183441418841": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "12319073009094248232": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",2],
+        "17243576882981097341": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4381329435655511217": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6214194654733781771": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "12894240573737168362": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "17024388383581997032": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "6114147683777615071": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "755414184406250882": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "17522452942286240233": ["convolution_gpu_bfyx_gemm_like",2],
+        "15720507574336564201": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "16234606052818596502": ["convolution_gpu_bfyx_os_iyx_osv16",468],
+        "5912303851874077576": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "6635217802203685464": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "879005904827468163": ["convolution_gpu_bfyx_os_iyx_osv16",763],
+        "9819596940685093690": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "12534001599784153836": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "3499243120652875549": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6008613375871089139": ["convolution_gpu_bfyx_os_iyx_osv16",755],
+        "12246408434917478929": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "13477416097954638887": ["fully_connected_gpu_bf_io_gemm",1],
+        "75742659105146536": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "16190949264253468961": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "4994591211723226974": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "12242618640422208652": ["convolution_gpu_bfyx_gemm_like",0],
+        "8818070832398055086": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11461581290174106570": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "1697248235682953135": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "7937870623766562191": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2704063557078535883": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "13300022131572486202": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "16108759090923335184": ["convolution_gpu_bfyx_gemm_like",1],
+        "14352303529756685990": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "69439315851965666": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "17802514063213000148": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "875142032423622622": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "797387385159110695": ["convolution_gpu_bfyx_gemm_like",1],
+        "14353390922580547467": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "11726298758004767743": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "15101680837342453931": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "5795073619189010837": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "89439319782574517": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "15650839696475698676": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "6048964584602891448": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "18233660940545931789": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "15239764240622554314": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6681818065741882453": ["convolution_gpu_bfyx_gemm_like",2],
+        "3140230065585683313": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "18271689282126907793": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "11092828091552833150": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "10295330953350618042": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "10880081193716628051": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "8101977280003030465": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10650698451740924172": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "9274179337770060652": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "8006738296385794413": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "9275303306340702111": ["convolution_gpu_bfyx_gemm_like",2],
+        "8025053805734757314": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "1934379409955686502": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "12380856644683171627": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2809950092498355574": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "15857087373591747006": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9481675228591993785": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "5311718276151327830": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "13644681270630373984": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "18137106379929135901": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13105192484434299621": ["convolution_gpu_bfyx_gemm_like",2],
+        "15094664469997373662": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "6025872155179042054": ["convolution_gpu_bfyx_gemm_like",2],
+        "11062100629646715785": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14363654136811880073": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16103943009195163681": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "7824524940405130010": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14136097914489095982": ["convolution_gpu_bfyx_os_iyx_osv16",516],
+        "8929453032482114162": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4347816192417741558": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "16362139250976572928": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "12160764253455777655": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "713121569924250372": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "490233152678323691": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "852092858392507925": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "3974589991022739479": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7172604084103519563": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "5040730152867713388": ["convolution_gpu_bfyx_gemm_like",2],
+        "8656468860180713379": ["convolution_gpu_bfyx_os_iyx_osv16",472],
+        "6458124573210430792": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "8258382025812748961": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "11709992724966310174": ["convolution_gpu_bfyx_os_iyx_osv16",124],
+        "16129296588866116913": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "9269175963143039426": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "14759179293743468995": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "10890975553758439233": ["convolution_gpu_bfyx_gemm_like",1],
+        "15110359240685619357": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "9321208819255762521": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "10930115765550856328": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "18268811652302076976": ["convolution_gpu_bfyx_gemm_like",1],
+        "4750894407873652809": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "14046114605615338907": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "6996376303337512293": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "18337160891834020517": ["convolution_gpu_bfyx_os_iyx_osv16",151],
+        "14744368497944610864": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6213386558868267629": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "16172528828198474326": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "15688186132508213638": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "296142385116663420": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3883845471211207871": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "17087740929472936216": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "16290626406346691996": ["convolution_gpu_bfyx_os_iyx_osv16",767],
+        "12232696287029987946": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "13708979487306970634": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like",2],
+        "12388375914105990324": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13464226348405628455": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "1154228007901031779": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "10682918518101379579": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "8376077531098664520": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7431849514656037251": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "12874626654611400042": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "11723735945517472199": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "11455518069358829249": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15192024816519005250": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "15322019609805777935": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "11956435900037329302": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "13842309033760176194": ["convolution_gpu_bfyx_gemm_like",2],
+        "5643908654122573882": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "5091558853871982858": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11970881115757095265": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "6471563320494376693": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "8479958930889587809": ["fully_connected_gpu_yxfb_ref",0],
+        "16348402367953880206": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "4456004887590847716": ["convolution_gpu_bfyx_gemm_like",1],
+        "10408322429232132983": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "3332334993503432420": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "583303098958523195": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "13954144830230671601": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "11031625790234068916": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "14554225625951128811": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "4720851194954041037": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "6323026044750482867": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "1540041682425757361": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "11107930597263802755": ["convolution_gpu_bfyx_gemm_like",2],
+        "11292995457386147494": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "16896833230469488924": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11649407835105973949": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "17026284168840448378": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4196367396954155354": ["convolution_gpu_bfyx_gemm_like",2],
+        "1653274345637156919": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17104611871050967957": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4584970211859494304": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "18084635102736402756": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "12179581684777023804": ["convolution_gpu_bfyx_gemm_like",2],
+        "192209423643075326": ["convolution_gpu_bfyx_gemm_like",1],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "17281202179589913619": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "17877776363798202236": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "17515573322312447679": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "17001502418583498926": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "4674416595144505741": ["convolution_gpu_bfyx_gemm_like",2],
+        "5994204139128667921": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13119479079474639169": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "8264178890341675354": ["convolution_gpu_bfyx_os_iyx_osv16",1033],
+        "787363431787954804": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "12935563359569230797": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "10987953316324712538": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "15609627722687211129": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "10328182165125764988": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "10554266898346470422": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "570683988452622223": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "14418429155823196539": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "6205240287062600210": ["convolution_gpu_bfyx_gemm_like",2],
+        "12494969618927201911": ["fully_connected_gpu_fb_oi_ref",1],
+        "6904130543085920483": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "7353563160591978243": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17123153447808465303": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "4550028191070279999": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "17427036330773218054": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "16945184617367657570": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "16781127329510211966": ["convolution_gpu_bfyx_gemm_like",2],
+        "3727142736386026852": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "5057534502588100071": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "6553736978928374036": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "8153567933591966877": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "3909551222373722085": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "10109431802089940590": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "4428101657497677982": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "14043770215999952932": ["convolution_gpu_bfyx_gemm_like",2],
+        "10848277915422577656": ["convolution_gpu_bfyx_os_iyx_osv16",421],
+        "6578908625437515675": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "5485971317082563152": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "12517838703662330663": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "15160738482264643601": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6204183474669103812": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "14104238386345631681": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "6343888265369366589": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "15897477855246170861": ["convolution_gpu_bfyx_gemm_like",2],
+        "14251848023416168295": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "1760690277175249985": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "482564204402769504": ["convolution_gpu_bfyx_gemm_like",1],
+        "7757331094141318304": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "11529876081402974396": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "4862529593282936100": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "17208186152576814861": ["convolution_gpu_bfyx_gemm_like",1],
+        "17729546848373991614": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "18232278892738147217": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "18136135457402651842": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "15696910741835640150": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "3835286851569826052": ["convolution_gpu_bfyx_gemm_like",2],
+        "13484950419220835364": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "12297371032753209816": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "2100891581797371600": ["convolution_gpu_bfyx_os_iyx_osv16",274],
+        "4738743763536059708": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4716188972902735458": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "2802810524370514276": ["convolution_gpu_bfyx_gemm_like",1],
+        "18251360413872841969": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "509781001842353609": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1306339989221885682": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "14811022197918391667": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "16863960779539003201": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "5482851829165191681": ["convolution_gpu_bfyx_os_iyx_osv16",645],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like",2],
+        "1545105800386716684": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "7903891232234389925": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "15790005937034794347": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "18259656768460999562": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "11754316727756881612": ["convolution_gpu_bfyx_os_iyx_osv16",475],
+        "3524531620118359828": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "14599780481362761532": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3281207855459771997": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7606728651572102823": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "929378940515745198": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "4479979951990338510": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "2622434279674583815": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "13176385389367548697": ["convolution_gpu_bfyx_gemm_like",1],
+        "13447028922679236865": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "291868903926685441": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "7807983899017500046": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like",2],
+        "1500571771538985941": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "10989937450490049763": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "5326247361632903583": ["convolution_gpu_bfyx_gemm_like",2],
+        "11661208196482963286": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "435888248913413834": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8268533335852735248": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "2096779676054335057": ["convolution_gpu_bfyx_gemm_like",2],
+        "15011504472108164173": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8881135571874888085": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "1706927777850488363": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "4026686872534942904": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "16071723603031305677": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "3737552767159920174": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "16683089431066989909": ["convolution_gpu_bfyx_gemm_like",2],
+        "7177837234452118325": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "5622089373755094139": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "15781622938833984014": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "12644942072153919043": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "9421643783312790618": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "724953082687879224": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9216608098626790565": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "10000618285883395700": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "4999505377862312410": ["fully_connected_gpu_bf_io_gemm",2],
+        "14274685812676150168": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "7457899998356343871": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "529543453251381109": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "3007637520820789085": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "5192552432194195116": ["convolution_gpu_bfyx_gemm_like",2],
+        "17599383258252980421": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "13603318842632052764": ["convolution_gpu_bfyx_os_iyx_osv16",380],
+        "10398572248321217585": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "2730604806511016352": ["convolution_gpu_bfyx_gemm_like",2],
+        "12655099960717366198": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "15488550074426713959": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "14719421757340260468": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12668149981216388765": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",1],
+        "15129834325410878425": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "12040626513219974957": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "18203935818408469865": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "11430400968543668873": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "16720108310653948550": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9751582946441607796": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "6391847213494189692": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9824678205469832038": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17419874083634480896": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "8655883535274781128": ["convolution_gpu_bfyx_gemm_like",1],
+        "9553032671453999824": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "16146350476627599543": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5381578460674280089": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "14681717813022425567": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "10892456883214928095": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "8707189142909022305": ["convolution_gpu_bfyx_gemm_like",2],
+        "5275016494706355806": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "4062706195708729345": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "15856268902838573812": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "3122997634505472500": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "425744529089575241": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14515066741400300669": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6062246008880097669": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "17517495652165026573": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "3255465741612432300": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "9454512817077883797": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "15778834188130183853": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "3930314908786112883": ["convolution_gpu_bfyx_gemm_like",2],
+        "1249137685908951501": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "13410850301164057911": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "3691705516240577130": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13702692566238948173": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "3150231129728961455": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "475043738497218394": ["convolution_gpu_bfyx_os_iyx_osv16",518],
+        "10791067159964399241": ["convolution_gpu_bfyx_os_iyx_osv16",310],
+        "16044646335477470657": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10294185397756053636": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "9173631510896381179": ["convolution_gpu_bfyx_gemm_like",2],
+        "16370218798911151331": ["convolution_gpu_bfyx_os_iyx_osv16",287],
+        "10978173291465325823": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "2973436171295280783": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "9226912483632588371": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "11528417522960871233": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16561224775421968533": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",0],
+        "5093049998173715787": ["convolution_gpu_bfyx_gemm_like",2],
+        "9069334144391048686": ["convolution_gpu_bfyx_os_iyx_osv16",543],
+        "14429081455612806819": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5115661026367632863": ["convolution_gpu_bfyx_os_iyx_osv16",765],
+        "9475130054420979752": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "10783981060353445280": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "6988674007771237080": ["convolution_gpu_bfyx_gemm_like",2],
+        "11446745541571732900": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12278364834477923930": ["convolution_gpu_bfyx_gemm_like",2],
+        "8787816339967963727": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "17856816245251319111": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "1622880009460832832": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "7143510787416483146": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "9723314434598141024": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "17306482303091342504": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "5183231560876991543": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "2806529556090896246": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "3001615302961701154": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1143214652021653634": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "13317417676446624018": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "15399245700982979379": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12081835728078383819": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "10414903047695486119": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "17370051888730874220": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "5277400567128489977": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "5179760459095053114": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8843585527713905568": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "12557015880639217508": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "1890739204389692970": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "16955653765071712611": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "3499109651698979012": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "17248756229500447131": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17318287523550546026": ["convolution_gpu_bfyx_gemm_like",2],
+        "11490143853656040028": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "16294825599850364701": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "18172711677056449158": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "3796274347773622633": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1436052878894538927": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "4915831715914920982": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "12516911293946682547": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "4991419288164762786": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "6688522645556262131": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "7271236108345900406": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17832542092610191859": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "14281201038135286621": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "9525853014023664813": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "8788703258318141635": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "12553441041059632729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18218631037214746168": ["convolution_gpu_bfyx_gemm_like",2],
+        "11455843788148231615": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "14258499419905714808": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8757900457181374694": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "8507854696766492454": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "10424278617647597641": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "16509472637458153234": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "2912858944747613525": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "14133958262039763609": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "15713964605078748923": ["convolution_gpu_bfyx_gemm_like",2],
+        "12255528292506999241": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "6232363902828992968": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "6341197991729122563": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "2052712465925238009": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "18356980026934328781": ["convolution_gpu_bfyx_os_iyx_osv16",1037],
+        "5779388310240896974": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "13204120207726209723": ["fully_connected_gpu_bf_io_gemm",2],
+        "12136029303893296753": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "8107447526839063293": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "4766071144928072260": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "14046990030104971367": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "12971822824884826169": ["convolution_gpu_bfyx_gemm_like",2],
+        "16589607587365212240": ["convolution_gpu_bfyx_gemm_like",2],
+        "10084794570892043447": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "1364546124782880196": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "2040762223425679479": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "7817036102984218692": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1999979442136861875": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "13681462437496627948": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "2231648183489019418": ["convolution_gpu_bfyx_os_iyx_osv16",428],
+        "17343050785312683560": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "16131448347558322280": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "14716719350966652036": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "11327097771110264965": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "14068780861332616363": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12788968383428254917": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "14603590053512154268": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "1316444335300814745": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9058996149754556268": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "2322559721899919275": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",2],
+        "5659168916726488798": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "16431857516454692096": ["convolution_gpu_bfyx_os_iyx_osv16",185],
+        "3689722043202617487": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "13898284586432291433": ["convolution_gpu_bfyx_gemm_like",1],
+        "8127190765748950828": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "1372939511728986224": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "13990028451169604107": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16036386660666696362": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "5336120047683197088": ["convolution_gpu_bfyx_gemm_like",2],
+        "15661322183507404821": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "2502125887857336825": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "3041752019114501584": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "6065819201836017182": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "16710651492402564794": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "12245096462203481681": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "5485749317130402302": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "11192356850081328892": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13489318651148001664": ["convolution_gpu_bfyx_gemm_like",1],
+        "15349944413643626251": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "577182964135927041": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "11772741918108731396": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "17542176922797334839": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10302338806536775954": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "14389915292223442327": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "11992625045241269569": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "6090625728451718945": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "9631545863582097486": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "9381304526221508530": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "318377908569897093": ["convolution_gpu_bfyx_gemm_like",2],
+        "14103112843209793966": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "689445825453914111": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "18375125668176498051": ["convolution_gpu_bfyx_gemm_like",2],
+        "15392077168521832549": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1172103288112689821": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "11022847760121601465": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "4660288622381620227": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2140514316203117958": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "17775705003104146872": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "13327653786981478088": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13253775441326432265": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "15767973630744679517": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10384537928514123040": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "8790625191540101806": ["convolution_gpu_bfyx_gemm_like",1],
+        "2235210915304938149": ["convolution_gpu_bfyx_gemm_like",2],
+        "6571438978296387721": ["convolution_gpu_bfyx_gemm_like",2],
+        "13855438905855887272": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15123868617509445149": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12181607120522804433": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "10267260789603562117": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "17847109385592002207": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "16998508915819714690": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "7026575758396092435": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "2294026590516781945": ["convolution_gpu_bfyx_os_iyx_osv16",943],
+        "1954052357826969119": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "2940027113687311893": ["convolution_gpu_bfyx_gemm_like",2],
+        "12141300895511301068": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "14546281065004619074": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "10429613013253088132": ["convolution_gpu_bfyx_gemm_like",2],
+        "15975964562807570772": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17791773192152464021": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3438116423688595487": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "5629670679897666607": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "7605139219344415117": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "9454954846682513038": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "11198908896401597838": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "5609922876429907954": ["convolution_gpu_bfyx_gemm_like",2],
+        "16884396694505987920": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "13497225521878034159": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7130694811424715594": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "941829593638869991": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "15315327794058441258": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "276407276027553756": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "2526832080529662683": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "9399994156762372761": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "8295126647635181949": ["convolution_gpu_bfyx_gemm_like",2],
+        "15669490019428002270": ["convolution_gpu_bfyx_os_iyx_osv16",986],
+        "14907097142953816744": ["convolution_gpu_bfyx_gemm_like",1],
+        "6410682026872155392": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16403423801823379909": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8618835732380720921": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "6345550009198921347": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "8176012042686275874": ["convolution_gpu_bfyx_os_iyx_osv16",863],
+        "3349519148124496343": ["fully_connected_gpu_bf_io_gemm",2],
+        "5848293219267886434": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "10309504812060596568": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "5334190564423375247": ["convolution_gpu_bfyx_os_iyx_osv16",926],
+        "2638131332283395057": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15381833359831622179": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "6443517114667332732": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "4086556132337751931": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "6181651715051152713": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11060822686394981344": ["convolution_gpu_bfyx_gemm_like",1],
+        "13713501506522022845": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5157249499936659040": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17825280904760131680": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",2],
+        "17796310681498690253": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6139574161497189424": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12541834857357563605": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "9259437778054905599": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "5409924335138540834": ["convolution_gpu_bfyx_os_iyx_osv16",526],
+        "18133334552107213128": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1478419046264331178": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "16881283637687482989": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "6418327009347170687": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4355933224673863178": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12990341489637414845": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "10492056481694320580": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "9447458159095730492": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "1471837664358450291": ["convolution_gpu_bfyx_gemm_like",2],
+        "6860503758000008398": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "3831261590121101287": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "9101018613418825655": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "659150305191479097": ["convolution_gpu_bfyx_os_iyx_osv16",902],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",151],
+        "10917498758625273194": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "3388752887767453958": ["convolution_gpu_bfyx_gemm_like",2],
+        "13890118723041457532": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "2406816735581074778": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "153117141968471446": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "3036808833459559381": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "8321769923556905957": ["convolution_gpu_bfyx_gemm_like",1],
+        "10279778381617181802": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "14959566236432790882": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "14738573151275130683": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "4957638663977636791": ["convolution_gpu_bfyx_gemm_like",2],
+        "14991602704357959545": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "6181272224000872375": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "804195263636995800": ["convolution_gpu_bfyx_gemm_like",2],
+        "10292243973236220688": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "4958222070605478947": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "15197248015210313435": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4014667229872705228": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2552187713769926425": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "16711955423531846725": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "423221712829930726": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "5020788604681810984": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17793292063552633023": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "6149673627320838019": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "17833517350994024381": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "7279393739634103483": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "879896719155824868": ["convolution_gpu_bfyx_gemm_like",2],
+        "872401732136570312": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9057036344533510776": ["convolution_gpu_bfyx_gemm_like",2],
+        "10412588668458621135": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "16706121580364790904": ["convolution_gpu_bfyx_gemm_like",2],
+        "16789135236017252073": ["convolution_gpu_bfyx_gemm_like",2],
+        "9389555743403158574": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16601702334097258697": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "6129602738379919488": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "14025496192869856801": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "14841539539334726292": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "4437258459981739942": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "18210370419559876426": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "17309326904418811234": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "4818231379191523896": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "8203550467004532364": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "2287356884312581209": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14763982961176216679": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6981537186704688907": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "2597453794298356435": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2842103889477438816": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6820284286806022849": ["convolution_gpu_bfyx_gemm_like",2],
+        "13184662326021747000": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15891662883560480723": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "10795104632256101599": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4161141078006269526": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11619548409913646265": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "2012181953284568566": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "11756881293845417212": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "8337820318779061494": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "7852745450437172519": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15410074937424854348": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "5115134711994944288": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "288853243482418538": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "877436308867220589": ["convolution_gpu_bfyx_gemm_like",2],
+        "13540002981450186147": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "11932770338770247767": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "12635265188475834607": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "13538051178827008933": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "7351733901977025859": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "4488336106517889531": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "1889171157980977747": ["convolution_gpu_bfyx_gemm_like",2],
+        "6634330132674952638": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "9367157746678824712": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "3180320769716158201": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "4701832665603867798": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "9040046051053703359": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "5594180958505308003": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "14057348639391787117": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "7869916853707978306": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "1028160614515220430": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "989564341557094953": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "18173314625562011976": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3199841714087553410": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "17977676737774695825": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "7866128397931438774": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "11528310408333718862": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6087091876057515304": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7941729567451949422": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "14695781272831602408": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4185398348055518182": ["convolution_gpu_bfyx_os_iyx_osv16",420],
+        "15320845027635796583": ["convolution_gpu_bfyx_gemm_like",2],
+        "12790788016297794214": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "1059505639883914386": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9285566577169147378": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "6324565723045697080": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4894227264080887361": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "15451919862187018297": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "59739211822469868": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15287650965861631130": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "1907439276166837309": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "3800011935243649447": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "7000524935770116969": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15984885011101717258": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "18202222342562516071": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "8451212914744825089": ["convolution_gpu_bfyx_gemm_like",2],
+        "3752171257634205726": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",2],
+        "18062849937960759210": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17525531790109748810": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "6928835003016610382": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "5312140481706133684": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "9796621763733208035": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16936366288366370882": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16559140502701231107": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "17738299860390552088": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "17422822627612865758": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9529614587861271730": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "8402692278765063674": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "12773693193167844110": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "9642229389394495047": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "10690972785852373520": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "1071007164550012186": ["convolution_gpu_bfyx_os_iyx_osv16",21],
+        "8961138963663532667": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "6531171505861182429": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "5801429077171542466": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "15916505622570323098": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15426960908024585800": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "16995873636564597028": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "4265693151382066296": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "5824801192141531089": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "12293786134765875615": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "4561874206785244358": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "7802311886554362782": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "14435120971846098308": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "4220826666482500445": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "1573498199681662714": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1033385936344875354": ["convolution_gpu_bfyx_gemm_like",2],
+        "3706994659266083979": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "2451712485584835395": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "18125732229366977468": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "8032685176029570383": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "1819720745131968914": ["convolution_gpu_bfyx_gemm_like",2],
+        "6656593119788274992": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "16683485007140805060": ["fully_connected_gpu_fb_io_ref",1],
+        "11868551452004726281": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10023279637210292010": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "10784073615329190425": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "1451466106918423837": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "9840495023131952174": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "5629373398445592781": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "805221045541170643": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "17015791782274123780": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "7575634241190730697": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "18377298651236993830": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8866736221671835567": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "13786357802945430475": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "861419637283812778": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "8100595788531468781": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "17423645390621980919": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14131851237755716991": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "12558716383635737426": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "12929981792125924963": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "16094174852600023296": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "15882969506682501496": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "14826791706471872785": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15997754881872769378": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "4885944395876887711": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "11878734040194151073": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17050143605017295447": ["convolution_gpu_bfyx_gemm_like",2],
+        "5953754321266570854": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "5843679089588930933": ["convolution_gpu_bfyx_gemm_like",2],
+        "2909728331855309274": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15993427814066246646": ["convolution_gpu_bfyx_gemm_like",1],
+        "7545013298074733778": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "9270950131920019932": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "18277685132620834972": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "6351347283201596793": ["convolution_gpu_bfyx_os_iyx_osv16",57],
+        "11473442921040533207": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "18245935804520236353": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3860603464276263676": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12214162812589030126": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "15947699374684516369": ["convolution_gpu_bfyx_gemm_like",2],
+        "11992353959766718397": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "3644282167178264526": ["convolution_gpu_bfyx_gemm_like",2],
+        "14578291812739325465": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "10632020369698615114": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "5570311824197099845": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6644418194983229139": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "4747159205186229582": ["convolution_gpu_bfyx_os_iyx_osv16",479],
+        "3215659303601163167": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "11324851661119942609": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "1138439260035360722": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "8460847842045253466": ["convolution_gpu_bfyx_os_iyx_osv16",388],
+        "11428599290755097395": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15914512645931208899": ["convolution_gpu_bfyx_gemm_like",2],
+        "16779678846332091086": ["convolution_gpu_bfyx_os_iyx_osv16",523],
+        "13192808619929896995": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "16474284418841532356": ["convolution_gpu_bfyx_gemm_like",2],
+        "9513032457323269513": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "18043340998699622388": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "10792503079194374004": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "3106710091841093202": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "18424400171776141118": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "397770940444464146": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11582534256623549131": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16286085532892593349": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "4474697990228400564": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9761573038170759563": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "10237524128771958432": ["convolution_gpu_bfyx_gemm_like",2],
+        "8275277322582733101": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "10025839973092358719": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "2497756607567197523": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "5524218746051008792": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7688176479120305539": ["convolution_gpu_bfyx_os_iyx_osv16",918],
+        "4424217045094988504": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "10308175009371219583": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "5893940382830835820": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "3033264172690274208": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "5656623709782744241": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "12949204491386872217": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "12956726277674279950": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "8057302050645780813": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "2738256633362038820": ["convolution_gpu_bfyx_gemm_like",2],
+        "3067806959725855130": ["convolution_gpu_bfyx_os_iyx_osv16",512],
+        "4764776977138392550": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "11771014003680394135": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "11115684531624462986": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "10306542963828398049": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "220326805056361171": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17829047941256922307": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "3889519976910355277": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "7009735776703529573": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "2355214244972870639": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "5805383505505929391": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "13384754476437374504": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "792684262493086891": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "11243840588602365090": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "4795705973706796563": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "9860570706348640782": ["convolution_gpu_bfyx_gemm_like",2],
+        "1410630713443793537": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "6769243149577568817": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "14962768577232034246": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1107027047188366075": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "11609821372586026178": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "13972357557211413688": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "2908249767551054613": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "17101789600628162503": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "5159470523468873105": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "9462315044265139531": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7915318733663535312": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "287386909600391846": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "3750338655074082587": ["fully_connected_gpu_yxfb_ref",2],
+        "5245308722062496788": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "5235375820995365354": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "2095802691829304676": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8124736388338424498": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "15884763176333003771": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "10892706534058849825": ["convolution_gpu_bfyx_os_iyx_osv16",284],
+        "10309083227104422150": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "863952266514375915": ["convolution_gpu_bfyx_os_iyx_osv16",517],
+        "14746359019867963124": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8739347545059610410": ["convolution_gpu_bfyx_gemm_like",2],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "16230621843665445228": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "9404677451270692749": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12493863403516600413": ["convolution_gpu_bfyx_gemm_like",1],
+        "13093429681061786539": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "3325727286860556323": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "7474639594232203854": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "15485701086886851362": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "7148542290597073512": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14762599606783897222": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "2995134938466176198": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11951606039079763598": ["convolution_gpu_bfyx_gemm_like",2],
+        "18180820925685532104": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "1096671695414716274": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "4304041922043496030": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6902644989079870993": ["convolution_gpu_bfyx_gemm_like",1],
+        "5303170164698694791": ["fully_connected_gpu_bf_io_gemm",2],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like",2],
+        "578703329577922869": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "3643250372952944907": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6355395905401306995": ["convolution_gpu_bfyx_gemm_like",2],
+        "9714764457768279762": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13649894122307008732": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "6438522646185979880": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "13709111882513486557": ["convolution_gpu_bfyx_os_iyx_osv16",617],
+        "14006248791647711759": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "15352245788978088971": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "3480732841490521799": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "13654816209891478730": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "13761566845514364807": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "4013707396889204359": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "13816104794723484993": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "1541754036637209097": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "593712935037568960": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "3106591708459602370": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "4871907623235871050": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "9373353053843326128": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6146876760962332928": ["convolution_gpu_bfyx_gemm_like",2],
+        "8701248964531180496": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "3362190082518348071": ["convolution_gpu_bfyx_gemm_like",2],
+        "3063055767192991776": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "5504757952698692953": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16947969669087411530": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "994489782629179836": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1532263118203058517": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "8329846097322076175": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "13810995219720233595": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "7662200927459001757": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "18109284647478027063": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17094948685292534952": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "6882621854468565774": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "13865227850818392065": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "5585398540591396124": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "15411474884532403722": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "14011124615649605281": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "17882819773586674851": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "13951781924205611716": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "7881187047171099732": ["convolution_gpu_bfyx_gemm_like",2],
+        "11215217005872946038": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "6942622405269419082": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "5406129421969383274": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "16037141448095945650": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "2438374917504708831": ["convolution_gpu_bfyx_gemm_like",2],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",1034],
+        "6942049339361951275": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "16547425454653232058": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "8709632541892447149": ["convolution_gpu_bfyx_gemm_like",2],
+        "3217246278485567748": ["convolution_gpu_bfyx_gemm_like",2],
+        "2986189945936592561": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "5389189982064081933": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "14359530849521980269": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "8108843303778211282": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "16467987800266816984": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "11163107409437069532": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "10482582307328548806": ["convolution_gpu_bfyx_os_iyx_osv16",3],
+        "16076153317792960383": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "11559360678008060513": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "4142978475842207311": ["convolution_gpu_bfyx_gemm_like",2],
+        "15334195300678132907": ["fully_connected_gpu_bf_io_gemm",1],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "2915165824085219545": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "12609361477548272638": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6772239376357727149": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16833854122884184025": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "2721793280965260548": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "11469881811044037340": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "5644068493155655611": ["convolution_gpu_bfyx_gemm_like",2],
+        "8708643228914766202": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14221578799010900252": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "5334566325056222430": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "428659495445490820": ["convolution_gpu_bfyx_os_iyx_osv16",925],
+        "360872770877634346": ["convolution_gpu_bfyx_gemm_like",2],
+        "5078905972285278557": ["convolution_gpu_bfyx_gemm_like",2],
+        "6181308879301978465": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "10990741293315393791": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "17900257435531434807": ["convolution_gpu_bfyx_gemm_like",2],
+        "16816222375242496370": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3272017687600371031": ["convolution_gpu_bfyx_gemm_like",2],
+        "628191607060767879": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "3880189981766119529": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "10727592780669452048": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "1801731858063091191": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "8526484907799590618": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "15824189967727245909": ["convolution_gpu_bfyx_gemm_like",2],
+        "11814740669468421049": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "9257078583742821465": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "1411786954276574458": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "11284755586130392759": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "14878347463243157447": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "5835634465164771899": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "6522575549211855712": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "17303408650780384587": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "5912451559447635837": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "2030309697153345387": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "40704767167309552": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "18423051691107460439": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "17106086048442658788": ["convolution_gpu_bfyx_gemm_like",2],
+        "15417738436777481469": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "11275109735493317886": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "12952980509662451384": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "18199526506796726885": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "9604982746455852556": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "12812685418923919055": ["convolution_gpu_bfyx_os_iyx_osv16",764],
+        "8881150100883636392": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "38736266675995457": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "12545558125736154584": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "1572991986657256775": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "14630499010941056793": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "5353552956675518468": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "10916647716124396856": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "10995907213890714701": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "3120553928584920777": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "2349007644347065353": ["convolution_gpu_bfyx_gemm_like",2],
+        "15628121900226431719": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2281119269283845320": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "5796500397424307442": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "11829442945690098558": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "8990561333549136048": ["convolution_gpu_bfyx_os_iyx_osv16",1112],
+        "14902389080201926109": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "10058614204420018541": ["convolution_gpu_bfyx_os_iyx_osv16",4],
+        "16295660312557315941": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "7546586420552408243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1474271081523145413": ["convolution_gpu_bfyx_gemm_like",2],
+        "7966454753124154534": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12238674883388043717": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "654821507679356726": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "7549378486471456156": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "9522661528867955338": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "3041612155708729812": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4084106758501882407": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "13607830451968188080": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "16820082917500285799": ["convolution_gpu_bfyx_gemm_like",2],
+        "244921290040927639": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "8671491767142900139": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "2261453441277654139": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "2418288192668085805": ["convolution_gpu_bfyx_gemm_like",2],
+        "3491333679577961640": ["convolution_gpu_bfyx_gemm_like",2],
+        "12730339458081890990": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7975810844103449438": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "15026219694198820614": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "2345023488044002149": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "12892693137085610062": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "8860815977851486767": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "5219048275475447369": ["convolution_gpu_bfyx_gemm_like",2],
+        "9182897385081081193": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "5319668297345215520": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "17443356777503458523": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "11599932445375240727": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "12228963567837353733": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "17599396373608265826": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "12408889192918919210": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "16818714747882774917": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "17994361454416813294": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11873734271080160669": ["convolution_gpu_bfyx_os_iyx_osv16",92],
+        "14150012830816329527": ["convolution_gpu_bfyx_gemm_like",2],
+        "13115589642140732066": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "7843498978148810586": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "14472187692485966933": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "15511138074959300404": ["convolution_gpu_bfyx_gemm_like",2],
+        "18310667924071639899": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "7187734276051878356": ["convolution_gpu_bfyx_gemm_like",2],
+        "3106922888635965020": ["convolution_gpu_bfyx_gemm_like",2],
+        "9785114056964539323": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "15579919505002150556": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "11207257238719531888": ["convolution_gpu_bfyx_gemm_like",2],
+        "1116274074896622552": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "3128856679264648666": ["convolution_gpu_bfyx_gemm_like",1],
+        "12813978452097969536": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "16865879032845300007": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15890473622821659630": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14217181622713951411": ["convolution_gpu_bfyx_gemm_like",2],
+        "12809199739984715013": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "13224814158106791463": ["convolution_gpu_bfyx_gemm_like",2],
+        "16783619135298589974": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "17808913959977434594": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "15602863681196390535": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "6719302427415173754": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "13191096881934434519": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "419783127503173016": ["convolution_gpu_bfyx_os_iyx_osv16",564],
+        "10556089809203693400": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2710485608298356329": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6419580456182610836": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "142486914279119363": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "8071957466247137919": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7334966010680206302": ["convolution_gpu_bfyx_gemm_like",2],
+        "16626226341188424071": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "11936419502418995274": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "994182747184593564": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "18012549942299450620": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "14089893422771228191": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "1450888744802985214": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "3563614453014995411": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "12355112948013108181": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "17700958439420868719": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "12782191856884962803": ["convolution_gpu_bfyx_gemm_like",2],
+        "9759380701896779097": ["convolution_gpu_bfyx_gemm_like",2],
+        "3218248162832023196": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "1882052795393187384": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "13503608041359512": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "17596685300497748803": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "2937907409658060025": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13472532612464340803": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15464327246951632247": ["convolution_gpu_bfyx_gemm_like",1],
+        "18269685060032395235": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8140094412609934765": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12129572274423886770": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "5864250949922222051": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "9437794960375526230": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "9798585825695496550": ["convolution_gpu_bfyx_gemm_like",2],
+        "2572395498687401679": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "12394049027081208902": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "6767245864232675168": ["convolution_gpu_bfyx_gemm_like",1],
+        "11850332373794932468": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "3723613341885592267": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "14077148976508649021": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "13189392239349392492": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "5381354625969068789": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3087801652564627458": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "5831419373611158773": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "9213563311267466388": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16352331970945217438": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "4191326605459754690": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7524311370696987092": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14352796912241296357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "14686278683380845546": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17310332946322628458": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "10887835418423052188": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15114370307779942381": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5287076386757143976": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "2525260242689556544": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "8121179472578287280": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "3498490999014554104": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "6848989271874647093": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "5873257164958285393": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "13753473508578037346": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "15641537661939240413": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15284262113150488297": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "11459784003592366395": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5240181393417899912": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "7104756264011682902": ["convolution_gpu_bfyx_gemm_like",1],
+        "5524215233998361104": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13583166868754499339": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "12725647706191463348": ["convolution_gpu_bfyx_gemm_like",2],
+        "18424912460022156378": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "2651385050387738902": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "4366168099274266975": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "16463823433924519300": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "7786866732196451977": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1774158624592967937": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15649927926091502215": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "15962137123591591534": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "4623542918584461522": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "10861525139715322534": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "7862815466573236157": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "15217183882858251099": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16541722316343690197": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "4492673409319122180": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "13991205023798493715": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "4897991181236908768": ["convolution_gpu_bfyx_gemm_like",1],
+        "10264913782610095832": ["convolution_gpu_bfyx_os_iyx_osv16",888],
+        "10499265278415026816": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8728178019712933221": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "3928266232090746643": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "17225578855755054959": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "13932662890258900896": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "7024495439434892956": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "1908809004094565452": ["convolution_gpu_bfyx_os_iyx_osv16",918],
+        "2857337999074313592": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "5419041493176804960": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "8594644182487917002": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "9220830217525628783": ["convolution_gpu_bfyx_gemm_like",2],
+        "3653156933813711765": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "11718418772370938734": ["convolution_gpu_bfyx_os_iyx_osv16",843],
+        "16431165572426232677": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "5352861363832390974": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "3682813162987778705": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "3234107167862677811": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "1152693503778768433": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6303682540621797774": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "17050675313067213312": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",1036],
+        "13793441296561946357": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "16402312692470500253": ["convolution_gpu_bfyx_gemm_like",2],
+        "7852144838267007144": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "9287404618748313247": ["convolution_gpu_bfyx_gemm_like",1],
+        "4673127824919879657": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9213886570531053949": ["convolution_gpu_bfyx_os_iyx_osv16",429],
+        "9631481972809246378": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "8241070786700614317": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15494543914974994991": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "14211903923555028634": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "654122557966242717": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "17829148383265978140": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "9428176632140441528": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "15470013032930986062": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16426179645101678763": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "14484890926084856480": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "14431607479949498164": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7565221050911842393": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7570346182940928159": ["convolution_gpu_bfyx_gemm_like",2],
+        "12134712464763856064": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "709835724029986012": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5948701218437980356": ["convolution_gpu_bfyx_gemm_like",2],
+        "16206791915939407806": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "6631816968511312100": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7000486794832106857": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "6093575518270471235": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "472454322186482185": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12894625941923144893": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "12954154886708228545": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "2969389503332309296": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15831600396403741571": ["convolution_gpu_bfyx_gemm_like",1],
+        "12680339228267704518": ["convolution_gpu_bfyx_os_iyx_osv16",876],
+        "13324157125165576832": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "5420766967862917815": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5211191663202250117": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3963106895592011725": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "3792945601873900927": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "17301887391757619741": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "17465517455679097501": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "11717348577195224554": ["convolution_gpu_bfyx_gemm_like",2],
+        "385046297070779752": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "7881579844586294503": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "17123463568694499533": ["convolution_gpu_bfyx_gemm_like",2],
+        "10728212277329722684": ["convolution_gpu_bfyx_gemm_like",2],
+        "12478309735214802531": ["convolution_gpu_bfyx_os_iyx_osv16",467],
+        "16327433707667075261": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "16173557782125372935": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "17881905640473324965": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "10591379189397010097": ["convolution_gpu_bfyx_os_iyx_osv16",989],
+        "3062101811226530720": ["convolution_gpu_bfyx_os_iyx_osv16",673],
+        "12415368596357091523": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "8819268903800581706": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "9404953235624894187": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "7354234812009979811": ["convolution_gpu_bfyx_os_iyx_osv16",90],
+        "9438739171104456179": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "10483664832302187567": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "4161612746310931789": ["convolution_gpu_bfyx_gemm_like",2],
+        "72444706264681262": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "12069726772532946193": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "5688478347124565305": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "3934290309368153435": ["fully_connected_gpu_bf_io_gemm",1],
+        "2307310127637739872": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "7958459862276998225": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",280],
+        "1036010477232750453": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "6614833247756539341": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "8253823502854784432": ["convolution_gpu_bfyx_gemm_like",2],
+        "3746573775462003750": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "7472330881076141262": ["convolution_gpu_bfyx_gemm_like",1],
+        "4239415134522959352": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4840004190985490064": ["convolution_gpu_bfyx_gemm_like",2],
+        "12675313398314286884": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "5601435819039968726": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11327228813412934262": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "13713406612642090169": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "3448477246688526708": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "9311802150474489673": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "13314092088416047551": ["fully_connected_gpu_yxfb_ref",1],
+        "11113256687741667688": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "157805434489791310": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "14994322266840011040": ["convolution_gpu_bfyx_gemm_like",2],
+        "12353956380178079089": ["convolution_gpu_bfyx_gemm_like",2],
+        "13200151444914751729": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "17928043901784474130": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "14397348576352573007": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "17915846724151945664": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "18128162750557822655": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "190530884420224257": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "14823616678465136590": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "846088275031979661": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15759530339367380982": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "2490155559809645659": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "2862999234347597091": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "10774528268153772208": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "14872992823083730615": ["convolution_gpu_bfyx_gemm_like",1],
+        "14621327324047759584": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "13973179950424276578": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "16758962840329202004": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "12112853999307505628": ["convolution_gpu_bfyx_gemm_like",2],
+        "4408600136502382976": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "6171845068913882721": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5592428580503282095": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "13459514533473657102": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "522181557896569275": ["convolution_gpu_bfyx_gemm_like",0],
+        "6729785110495533200": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "158222105675022402": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "9410978119783758141": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "52089503050497755": ["convolution_gpu_bfyx_os_iyx_osv16",899],
+        "11622925573287101001": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "4073467095502162430": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "11077876432364512822": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "3892679716763161057": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2893564501191050837": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "1742897526168249500": ["convolution_gpu_bfyx_gemm_like",1],
+        "16043683538361975370": ["convolution_gpu_bfyx_gemm_like",2],
+        "18400379759523099542": ["convolution_gpu_bfyx_gemm_like",1],
+        "9839670675413379092": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2623687018437195679": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "10169992769527680821": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "10647227605517025377": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "12796777049340516563": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "3336303478756453360": ["convolution_gpu_bfyx_gemm_like",1],
+        "6067904130482758510": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5077214229434392730": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12617625046664709483": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "6620782733027313312": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "18384657372655350144": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "282274448389888221": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "649203303142950236": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "10196332102593337214": ["convolution_gpu_bfyx_gemm_like",1],
+        "15490478608105402679": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "13608239208821071914": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12988961529988078346": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "123251351612308092": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "3286629188347536485": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "12427258337646070422": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "6980201892073961793": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "1077773457856682663": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "15126660425728872065": ["convolution_gpu_bfyx_os_iyx_osv16",200],
+        "4505008254511324231": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "3633858263279042265": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "8906588133431586825": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "14947798627499698329": ["convolution_gpu_bfyx_gemm_like",2],
+        "1652781065871883392": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15430549683839591544": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "17912189681971987483": ["convolution_gpu_bfyx_gemm_like",2],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "10971070835319242371": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "4476928353532757380": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "5079055505117153635": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "11901740241052104941": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "4082229510324076196": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "16208488491972128275": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "18386376129938707290": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "4085450203909854919": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "13853056718266488510": ["convolution_gpu_bfyx_os_iyx_osv16",883],
+        "14792528369891965810": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9048522050692986204": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2632535010129224704": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "4342360467977736802": ["convolution_gpu_bfyx_gemm_like",2],
+        "13379165253894817165": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "10118395047539851751": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "659846949368492111": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "11254635684957519432": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "8296551195150971668": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6830387121684699972": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "13237050834496100264": ["convolution_gpu_bfyx_os_iyx_osv16",527],
+        "12461575861709234385": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "7162575953766465459": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12667014405537239093": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "9849272539053219052": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "9191832520273617003": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "1838534101161814609": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "8146945902795164796": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "16749148369456398030": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12022152681602871455": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4569338575782832784": ["convolution_gpu_bfyx_gemm_like",2],
+        "9519623751582710696": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "5584432943673435454": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "15247381586316467097": ["convolution_gpu_bfyx_gemm_like",2],
+        "6483208845600234755": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7264756313770306662": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "3755253206085028904": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "12860222041026638681": ["convolution_gpu_bfyx_gemm_like",2],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11806402239500046867": ["convolution_gpu_bfyx_gemm_like",2],
+        "5807196005360653656": ["convolution_gpu_bfyx_gemm_like",2],
+        "8995598177504756805": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "2265784112305305260": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "789359733867650915": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "5519535335798045279": ["convolution_gpu_bfyx_gemm_like",1],
+        "12601126285773042005": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "15963038745470172423": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "13550435052563656432": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "7211355951470869591": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "17723621158215826108": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "17089801601582809764": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "11670430946096342056": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "4479117540570599742": ["convolution_gpu_bfyx_gemm_like",2],
+        "10085059621136526248": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3037042229494600258": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "3430266954211750407": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "15591167992985613695": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17542414935564676110": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "2007192658799516915": ["fully_connected_gpu_bf_io_gemm",1],
+        "598390166442977699": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "7500192998744460131": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "10293186062391000719": ["convolution_gpu_bfyx_os_iyx_osv16",755],
+        "9707630588260222630": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "12098146032672599222": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "1939140810847988694": ["convolution_gpu_bfyx_gemm_like",1],
+        "1473214668483422172": ["convolution_gpu_bfyx_gemm_like",1],
+        "17809920600993699808": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "4642234334824303290": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "4202645222013675478": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "15548971488532746290": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7398196853452900099": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",2],
+        "1334070221835422461": ["convolution_gpu_bfyx_gemm_like",2],
+        "11318913630213187720": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "10110395703775498948": ["convolution_gpu_bfyx_os_iyx_osv16",376],
+        "2903605246599054308": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "17316626950179740845": ["convolution_gpu_bfyx_os_iyx_osv16",564],
+        "16011429608661242565": ["convolution_gpu_bfyx_gemm_like",2],
+        "14532844474906286088": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "9988801796928462423": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "18092842590142527927": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "8048617952947915835": ["convolution_gpu_bfyx_gemm_like",2],
+        "14650567822254940018": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "14381420852659789698": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3830703844770425343": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "14213516751025324346": ["convolution_gpu_bfyx_gemm_like",2],
+        "4149728557142033774": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "4553409514380460123": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "5951936376654416075": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "4793007249026943006": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16969463538496570528": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "18273537339378756543": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "10320711719466983961": ["convolution_gpu_bfyx_gemm_like",2],
+        "14767888121198814523": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "11147816119060617810": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "16853250891250756537": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "13272818502368975319": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "9594594523961285945": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "12174571114411168588": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8500148569566077929": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "2038505773698938555": ["fully_connected_gpu_bf_io_gemm",1],
+        "8501145642605270365": ["convolution_gpu_bfyx_gemm_like",2],
+        "12159582810513550491": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15156525717629023944": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "2324120381399737261": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "15908673392788376468": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "1643241486250690844": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "11834683513280095384": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2727219457659794468": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "9019388470685749691": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "3621930417735246405": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "4282661608732125403": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "5754844816339228920": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",1],
+        "7272538316511343863": ["convolution_gpu_bfyx_gemm_like",2],
+        "3926585856863002495": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "6293403765897901528": ["convolution_gpu_bfyx_gemm_like",2],
+        "10702234389482091891": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "1056009037551688122": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12914986936318857086": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "4892959859293355837": ["convolution_gpu_bfyx_gemm_like",1],
+        "4274801141127703532": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "822162932339827810": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "1559798212423183813": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "7777333052643961206": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "6263019986730305851": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1832310305089212990": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "18005721959893562716": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "7164580481046523192": ["convolution_gpu_bfyx_os_iyx_osv16",914],
+        "3167336012388169649": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "7808544677773370430": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "1628593159980574595": ["convolution_gpu_bfyx_os_iyx_osv16",622],
+        "3032921857841371728": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1383899865465106141": ["convolution_gpu_bfyx_gemm_like",1],
+        "13071545223094862275": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "5673972310424776040": ["convolution_gpu_bfyx_gemm_like",2],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "13328449155966085543": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16985912104363932350": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6343396486660315308": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "10700011669103135203": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "8906185843274300447": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "10607904718265020949": ["convolution_gpu_bfyx_gemm_like",2],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "9763310312421884308": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3599823735065658574": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "1616603916015535857": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "2618108630886857741": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "2683507674615735878": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2659031931257084418": ["convolution_gpu_bfyx_os_iyx_osv16",540],
+        "14667209474639064623": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "7311120574972466702": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",2],
+        "11462462742322068863": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "11398019086259011063": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",2],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "7732899312577293959": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "12348135936862667024": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "16833026567865627676": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "12223993560805441284": ["convolution_gpu_bfyx_gemm_like",2],
+        "13933912937625580405": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "3179874645565098825": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "5597908143491399643": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "7875272450497189442": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "16469788155263456039": ["convolution_gpu_bfyx_gemm_like",2],
+        "4983880246908724272": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "17870874477143985774": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "16611452077660879545": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16067605128297748820": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15619086801947147359": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "12793908914872030220": ["convolution_gpu_bfyx_gemm_like",2],
+        "2582625260054352916": ["convolution_gpu_bfyx_gemm_like",1],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "9954050478761346921": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "386749666417295495": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "913496537924971856": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "5687802882700097624": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "9100044555742394133": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "10058165874008941852": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "15163327502374403643": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "15351724241036614758": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "3159147743553063163": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12277470820821378855": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4560479630843098090": ["convolution_gpu_bfyx_gemm_like",1],
+        "3715177305271762194": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "7076937538747704750": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "13131740479277027362": ["fully_connected_gpu_bf_io_gemm",1],
+        "13546876216568825877": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "17353894529222574441": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "15702382940521972117": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "11897113890115321056": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "4499586349553581439": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "10323345824599612614": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8528750110601691390": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "1561225943337590599": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "1201692134690347847": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "7575675354187625951": ["convolution_gpu_bfyx_gemm_like",2],
+        "17774424004510360936": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "13762042713029963144": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "4135003545872878882": ["convolution_gpu_bfyx_os_iyx_osv16",197],
+        "2458592904274981909": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "10869005786136023160": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "3291180926381314705": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "5120466856097219243": ["convolution_gpu_bfyx_gemm_like",1],
+        "12090536142661253835": ["fully_connected_gpu_bf_io_gemm",1],
+        "2597523728660247862": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "14885109535362957947": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "9810904714798127155": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "5589350202160007768": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13597240991532942069": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "14070988879848388270": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "488298169768725160": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "5419775002149092646": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16442107352245114876": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "2659712601063515059": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6982733543386888622": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "16053585286807864356": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "1375156980278317418": ["convolution_gpu_bfyx_gemm_like",2],
+        "6949539207944972855": ["convolution_gpu_bfyx_gemm_like",2],
+        "8272823732258536202": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "18372284940315010254": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "15522785615618973614": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12308359047798183133": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "10811837819834149164": ["convolution_gpu_bfyx_gemm_like",1],
+        "875400109066360897": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "11130439225010714550": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5762878778443755104": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "9999955037598579164": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "592245952014430043": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "13477548641580029772": ["convolution_gpu_bfyx_gemm_like",1],
+        "1212319037405620223": ["convolution_gpu_bfyx_gemm_like",2],
+        "3308770992373192529": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "70580716590540876": ["convolution_gpu_bfyx_gemm_like",1],
+        "17490471699618303993": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "15187035463799513424": ["convolution_gpu_bfyx_1x1",2],
+        "16683485007140805060": ["fully_connected_gpu_yxfb_ref",2],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",1],
+        "17089801601582809764": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "7650862961269327235": ["convolution_gpu_bfyx_1x1",2],
+        "3101087806792514129": ["convolution_gpu_bfyx_1x1",1],
+        "5291011077679733990": ["convolution_gpu_bfyx_gemm_like",2],
+        "4803370483104261655": ["convolution_gpu_bfyx_gemm_like",2],
+        "1551596771935253711": ["convolution_gpu_bfyx_gemm_like",1],
+        "10783630257421062891": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "4362304842016958728": ["convolution_gpu_bfyx_gemm_like",2],
+        "5714365398623475983": ["convolution_gpu_bfyx_1x1",2],
+        "16397733032387984819": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "7669403041163460089": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12397280593466519809": ["convolution_gpu_bfyx_gemm_like",2],
+        "2296581485980163665": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7447163906170805189": ["convolution_gpu_bfyx_os_iyx_osv16",573],
+        "16243196137456624852": ["convolution_gpu_bfyx_gemm_like",2],
+        "6203765709597125063": ["convolution_gpu_bfyx_gemm_like",1],
+        "3463959257726925426": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7307271009495440764": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "1270307036687208396": ["convolution_gpu_bfyx_gemm_like",1],
+        "7349880498513046830": ["convolution_gpu_bfyx_1x1",2],
+        "3673781117412048086": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "9439431829175743345": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4165036357594592683": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10753540518493641553": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11120846960057008937": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "5242271874488296527": ["convolution_gpu_bfyx_gemm_like",2],
+        "16307464696265537356": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "3759515057574218101": ["convolution_gpu_bfyx_gemm_like",2],
+        "4435224497850514394": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9671459469252116568": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "9193880745263317167": ["convolution_gpu_bfyx_gemm_like",2],
+        "15726902746983125797": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "9232653317479846765": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "1520529227443340435": ["convolution_gpu_bfyx_gemm_like",2],
+        "16491532291908469567": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "16882092367103683293": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7602222004475424358": ["convolution_gpu_bfyx_gemm_like",1],
+        "12213354854947437262": ["convolution_gpu_bfyx_1x1",2],
+        "9175450649281374948": ["convolution_gpu_bfyx_os_iyx_osv16",862],
+        "4447065688824381344": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "13558656230312558247": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "7900926714874404219": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7692849839965441330": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "5288793454052261767": ["convolution_gpu_bfyx_gemm_like",2],
+        "3820661057776133570": ["convolution_gpu_bfyx_1x1",2],
+        "8059328623525062913": ["convolution_gpu_bfyx_gemm_like",2],
+        "17025324057045572535": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "693883892843558363": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "8045367391487213749": ["convolution_gpu_bfyx_1x1",2],
+        "5941092474669713339": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14204609663091442879": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11627532066884923848": ["convolution_gpu_bfyx_1x1",2],
+        "959260710517842876": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "10128143628088846123": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "1287490919205560806": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "17037416417174266088": ["convolution_gpu_bfyx_gemm_like",1],
+        "1875764913306932583": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "543472136359161929": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5649082203775427830": ["convolution_gpu_bfyx_gemm_like",2],
+        "13011676362747785816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13483088320871913126": ["convolution_gpu_bfyx_gemm_like",1],
+        "2939605281692583169": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "11356842300444410831": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "2929715823970060874": ["convolution_gpu_bfyx_gemm_like",1],
+        "3070859615622845671": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "14268594692585922659": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "12995903177757437362": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "10471519687597963116": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6788311046557489996": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "15809639778580769565": ["convolution_gpu_bfyx_gemm_like",2],
+        "1237920404306733800": ["convolution_gpu_bfyx_gemm_like",1],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "10864011008000364415": ["convolution_gpu_bfyx_1x1",2],
+        "13919204232414535363": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "9383182168277796969": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "7201521533301617290": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15548847099740441551": ["convolution_gpu_bfyx_1x1",2],
+        "17549411807772646930": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "2057158988261512114": ["convolution_gpu_bfyx_1x1",2],
+        "17536308070854915513": ["convolution_gpu_bfyx_1x1",2],
+        "1353170363915443814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2114232149447438823": ["convolution_gpu_bfyx_gemm_like",2],
+        "10706267011822108376": ["convolution_gpu_bfyx_1x1",2],
+        "5754396201681434378": ["convolution_gpu_bfyx_1x1",2],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "15839295895890205274": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "13078401519973360182": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1718634913016284523": ["convolution_gpu_bfyx_1x1",2],
+        "6217542346826403576": ["convolution_gpu_bfyx_1x1",2],
+        "3814584042139408454": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "4880150897829846031": ["convolution_gpu_bfyx_1x1",1],
+        "11324651029379152442": ["convolution_gpu_bfyx_1x1",2],
+        "15155676074658242659": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "5083163738120585821": ["fully_connected_gpu_fb_oi_ref",2],
+        "18446245971488003004": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "2878824076934639346": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "16728762255357411770": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "7223801044761006523": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3563872903821081702": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "12160764253455777655": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "12065769091972094756": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "7330202944390548890": ["convolution_gpu_bfyx_gemm_like",1],
+        "16566128345135114558": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "13878967140838761911": ["convolution_gpu_bfyx_1x1",1],
+        "226601879759378771": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "6712698149192186833": ["convolution_gpu_bfyx_gemm_like",2],
+        "3226193790517362610": ["convolution_gpu_bfyx_1x1",2],
+        "7106362077449435105": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "4927360358387344983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16781187505186394353": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "603883331897298932": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "14045927407431718832": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "8859895010324601937": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "11263540528012919947": ["convolution_gpu_bfyx_1x1",2],
+        "16432425079146486467": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "14985755375924972050": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "2149582237161177965": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "16949056117405140365": ["convolution_gpu_bfyx_gemm_like",2],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16",273],
+        "4046830923427667342": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "8036474422877454869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3788462090984291082": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "403634422724914329": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "14614844213016502202": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "2877521658768725103": ["convolution_gpu_bfyx_gemm_like",0],
+        "348058686961206025": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "3216877571075556066": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "13804221028705631415": ["convolution_gpu_bfyx_gemm_like",2],
+        "3011188207492335920": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5044721291675005144": ["convolution_gpu_bfyx_1x1",2],
+        "12051595062513871723": ["convolution_gpu_bfyx_1x1",2],
+        "12193395770362986433": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "11690533591656807605": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "10747988576436391912": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "768720470104458759": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "13282951481330978659": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "3337625924046561031": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "11872943152839631823": ["convolution_gpu_bfyx_gemm_like",2],
+        "12352923639732112511": ["convolution_gpu_bfyx_os_iyx_osv16",455],
+        "2608363732937932266": ["convolution_gpu_bfyx_gemm_like",2],
+        "60509335250891515": ["convolution_gpu_bfyx_gemm_like",2],
+        "12151068022697708126": ["convolution_gpu_bfyx_gemm_like",2],
+        "18275601715050791851": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "9099720270958987421": ["convolution_gpu_bfyx_1x1",2],
+        "9741607635826869269": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "2173720698351153121": ["convolution_gpu_bfyx_gemm_like",2],
+        "17823133607491820214": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "15117880293418979489": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "9390478179772073718": ["convolution_gpu_bfyx_gemm_like",1],
+        "16267682394077585279": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "4664983769199548480": ["convolution_gpu_bfyx_1x1",1],
+        "16992405636352406660": ["convolution_gpu_bfyx_gemm_like",1],
+        "15959543980008442942": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "12076058470574246054": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "17039993918927377002": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "12015336418727455195": ["convolution_gpu_bfyx_1x1",2],
+        "16172528828198474326": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "2984726467649419856": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "7469127846325904854": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "15065925414996398951": ["convolution_gpu_bfyx_1x1",2],
+        "1778345646142852816": ["convolution_gpu_bfyx_gemm_like",2],
+        "15178921033274918199": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14813178380338948912": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "10879218241103462088": ["convolution_gpu_bfyx_gemm_like",2],
+        "2920322372993101148": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1643122514049603104": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1843555260471832708": ["convolution_gpu_bfyx_gemm_like",1],
+        "14680730265621679042": ["convolution_gpu_bfyx_os_iyx_osv16",380],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "8866716292621164810": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5509395737020858006": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16",419],
+        "17228810554159747400": ["convolution_gpu_bfyx_gemm_like",2],
+        "13403161389559730": ["convolution_gpu_bfyx_gemm_like",2],
+        "4264284648458489052": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "10861769381993948050": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "9869959062341950047": ["convolution_gpu_bfyx_1x1",2],
+        "2781309272856442321": ["convolution_gpu_bfyx_1x1",1],
+        "16694984452720336415": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2273992727647793692": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8421388456873652700": ["convolution_gpu_bfyx_gemm_like",2],
+        "10837496380266058422": ["convolution_gpu_bfyx_gemm_like",2],
+        "6557428245898292304": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6800893510381991731": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "8809017515482311843": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "6297802534570892679": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "5751283221740229986": ["convolution_gpu_bfyx_gemm_like",1],
+        "16436006771518788093": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "14034525799882831106": ["convolution_gpu_bfyx_gemm_like",2],
+        "1318571118468536310": ["convolution_gpu_bfyx_gemm_like",2],
+        "3868149953087814447": ["convolution_gpu_bfyx_gemm_like",1],
+        "13883044928774243663": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "4461989328775275994": ["convolution_gpu_bfyx_gemm_like",2],
+        "378801963103874857": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "9813748068195103720": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "17585206779958265260": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "9702618600245321109": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "7878605163588288309": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "9941035405796680081": ["convolution_gpu_bfyx_1x1",1],
+        "13833960927635646899": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15586047342916704364": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2934519615045138808": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "11265472910579659280": ["convolution_gpu_bfyx_gemm_like",1],
+        "15289152041466330689": ["convolution_gpu_bfyx_gemm_like",2],
+        "10432365444137108781": ["convolution_gpu_bfyx_gemm_like",2],
+        "13454265023861566476": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "11975047184326016230": ["convolution_gpu_bfyx_gemm_like",2],
+        "11820789223587555410": ["convolution_gpu_bfyx_1x1",2],
+        "10488269059469838160": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12339692995143159283": ["convolution_gpu_bfyx_gemm_like",1],
+        "9153779186876518773": ["convolution_gpu_bfyx_gemm_like",2],
+        "14766477690417085350": ["convolution_gpu_bfyx_1x1",2],
+        "9119618606914671839": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5074273865983613482": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "11706446082856895571": ["convolution_gpu_bfyx_gemm_like",2],
+        "4708035980731751007": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13569941893504840630": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "6548949901446632697": ["convolution_gpu_bfyx_1x1",2],
+        "13170441257780067955": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "840202264034382558": ["convolution_gpu_bfyx_os_iyx_osv16",771],
+        "8913823292181409151": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "150132162949295379": ["convolution_gpu_bfyx_1x1",2],
+        "14131851237755716991": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "18186615266760475767": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "9942099207256025216": ["convolution_gpu_bfyx_gemm_like",2],
+        "16437124655147660375": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6791806088355877039": ["convolution_gpu_bfyx_gemm_like",1],
+        "4533786844080178561": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "9243949750444156746": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8747430148550634190": ["convolution_gpu_bfyx_gemm_like",2],
+        "5600128039063009632": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8984436655107983227": ["convolution_gpu_bfyx_gemm_like",2],
+        "9601412379897937608": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "15078262396281327048": ["convolution_gpu_bfyx_gemm_like",1],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "14387756025635589673": ["convolution_gpu_bfyx_1x1",2],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "13106818352216009354": ["convolution_gpu_bfyx_gemm_like",2],
+        "10170577772376890221": ["convolution_gpu_bfyx_os_iyx_osv16",664],
+        "6254141935545262078": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8174040194088942964": ["convolution_gpu_bfyx_os_iyx_osv16",945],
+        "16789245987103323406": ["convolution_gpu_bfyx_gemm_like",2],
+        "1867337342417952506": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13912843078550000960": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "8458082326743351141": ["convolution_gpu_bfyx_gemm_like",2],
+        "16044646335477470657": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "13893808009363736870": ["convolution_gpu_bfyx_gemm_like",2],
+        "17635171685500922207": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "14206125678667603810": ["convolution_gpu_bfyx_1x1",1],
+        "14147460733160099960": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2983038203471784211": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "1045854873741563331": ["convolution_gpu_bfyx_gemm_like",2],
+        "12972798847556569913": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6928136130626403937": ["convolution_gpu_bfyx_gemm_like",2],
+        "17889864541794448203": ["convolution_gpu_bfyx_1x1",1],
+        "3819990462129075757": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17512961503976896701": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "4773077837537775324": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17364712285968437405": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "12990527753120735255": ["convolution_gpu_bfyx_gemm_like",2],
+        "9616636708366808604": ["convolution_gpu_bfyx_gemm_like",2],
+        "6997971129340865650": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16934879647229234163": ["convolution_gpu_bfyx_gemm_like",2],
+        "3782239800777370325": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "2814805887448339818": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4325081100430903742": ["convolution_gpu_bfyx_gemm_like",1],
+        "10486000767830001094": ["convolution_gpu_bfyx_1x1",2],
+        "745009493367761775": ["convolution_gpu_bfyx_gemm_like",2],
+        "3863816884636503247": ["convolution_gpu_bfyx_gemm_like",2],
+        "3024402899381804809": ["convolution_gpu_bfyx_1x1",2],
+        "18416908414174464784": ["convolution_gpu_bfyx_gemm_like",2],
+        "17252589865292797082": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "9751582946441607796": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "11705756153433897198": ["convolution_gpu_bfyx_1x1",2],
+        "6673966852801136416": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "7964396197946740183": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "14764715930784496165": ["convolution_gpu_bfyx_gemm_like",2],
+        "11451740938287179908": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "10808909442136736629": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "2362092095402043749": ["convolution_gpu_bfyx_gemm_like",2],
+        "10308431308942416781": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "12831298482349900359": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "2856601829807186494": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13680926356824317761": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "16770615142634470903": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "1338705434700924127": ["convolution_gpu_bfyx_1x1",1],
+        "1436052878894538927": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "17515064188391421150": ["convolution_gpu_bfyx_gemm_like",2],
+        "6328802691680458752": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "937159502066696999": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "16081386644309102158": ["convolution_gpu_bfyx_gemm_like",2],
+        "17025182465337728023": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "17238880534517721334": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "12166852830214895457": ["convolution_gpu_bfyx_1x1",2],
+        "11334122788337402526": ["convolution_gpu_bfyx_1x1",2],
+        "3216793152416217495": ["convolution_gpu_bfyx_gemm_like",2],
+        "2161052921317193579": ["convolution_gpu_bfyx_gemm_like",2],
+        "10576856554114055028": ["convolution_gpu_bfyx_gemm_like",2],
+        "1208161922424418734": ["convolution_gpu_bfyx_gemm_like",2],
+        "6727930402459775131": ["convolution_gpu_bfyx_gemm_like",2],
+        "1418595171949196661": ["convolution_gpu_bfyx_gemm_like",2],
+        "14343008518525689150": ["convolution_gpu_bfyx_1x1",2],
+        "12467673564660108244": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "15805087418686802636": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "5735608687257018419": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "7274179284676568361": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7604075520418038662": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8837721075413149240": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3538679039078582272": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "15209909241815414156": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "12864204111424196179": ["convolution_gpu_bfyx_1x1",2],
+        "11626398907755088688": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9920155432685318259": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "187352687850707150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9441060601228656341": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15838113905712517735": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5124080536266387783": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8651641584737798174": ["convolution_gpu_bfyx_gemm_like",2],
+        "2649192407401044065": ["convolution_gpu_bfyx_gemm_like",2],
+        "15069906408448814772": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "9091110033424983286": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17742192339816511494": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "1569043950563130463": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17983556812075120553": ["convolution_gpu_bfyx_1x1",2],
+        "12501619443242354860": ["convolution_gpu_bfyx_gemm_like",2],
+        "8642107585829380438": ["convolution_gpu_bfyx_gemm_like",1],
+        "15924916465272239832": ["convolution_gpu_bfyx_os_iyx_osv16",925],
+        "6193161166790398003": ["convolution_gpu_bfyx_gemm_like",2],
+        "3034482898462686729": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "2111669705686676421": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "743941460026466526": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "17179609670678746034": ["convolution_gpu_bfyx_gemm_like",2],
+        "3217574161785059951": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "10128390168715530898": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "16469788155263456039": ["convolution_gpu_bfyx_gemm_like",2],
+        "16386955278777720573": ["convolution_gpu_bfyx_os_iyx_osv16",855],
+        "7407975398526425554": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "1644335606100150388": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9731370183088819573": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "11087413527078604815": ["convolution_gpu_bfyx_gemm_like",2],
+        "6845814820599174031": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14289082888174784976": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "7450417963648518926": ["convolution_gpu_bfyx_gemm_like",2],
+        "3017891343734146267": ["convolution_gpu_bfyx_os_iyx_osv16",102],
+        "4133424990380177132": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "2321148334382088982": ["convolution_gpu_bfyx_gemm_like",2],
+        "9757389422721488173": ["convolution_gpu_bfyx_1x1",1],
+        "8856888761246057127": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "13738760763969959522": ["convolution_gpu_bfyx_gemm_like",2],
+        "8390889357546397717": ["convolution_gpu_bfyx_1x1",1],
+        "6310724136390087834": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "11308583200952256245": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",0],
+        "10415046594066474634": ["convolution_gpu_bfyx_gemm_like",2],
+        "5680236635030250712": ["convolution_gpu_bfyx_1x1",2],
+        "3374410641320310726": ["convolution_gpu_bfyx_os_iyx_osv16",904],
+        "7119182041840303390": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14835309921389262864": ["convolution_gpu_bfyx_1x1",2],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",1035],
+        "6219075471508685758": ["convolution_gpu_bfyx_gemm_like",2],
+        "3806131437010910920": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "18357544235608006954": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "14389719202147508599": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "1509728225855233852": ["convolution_gpu_bfyx_gemm_like",2],
+        "10308113903347312964": ["convolution_gpu_bfyx_gemm_like",2],
+        "3499645386058307669": ["convolution_gpu_bfyx_gemm_like",1],
+        "10294610483561043024": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "15048584393463312977": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "17381516856910544374": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "18180655791734632264": ["convolution_gpu_bfyx_gemm_like",2],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "8943913562339525413": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4216958486055161753": ["convolution_gpu_bfyx_os_iyx_osv16",105],
+        "5240706676373148280": ["convolution_gpu_bfyx_gemm_like",2],
+        "5245526691775741296": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12790570304622911607": ["convolution_gpu_bfyx_os_iyx_osv16",565],
+        "4974320417566990034": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8951040603784899163": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "16292848987976256449": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "12494969618927201911": ["fully_connected_gpu_yxfb_ref",2],
+        "18017913952946745878": ["convolution_gpu_bfyx_gemm_like",2],
+        "11359409533744011242": ["convolution_gpu_bfyx_gemm_like",2],
+        "8505040075968411726": ["convolution_gpu_bfyx_gemm_like",1],
+        "7432142107544210174": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "14316077757957132678": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "9542325095876448686": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "10771803503544737080": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "17922279129043570176": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "13314092088416047551": ["fully_connected_gpu_yxfb_ref",1],
+        "17704040183891532914": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "16065744898134487748": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10544034939133448916": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "760687670112194844": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "4279062247055842367": ["convolution_gpu_bfyx_gemm_like",1],
+        "6423785822515265784": ["convolution_gpu_bfyx_gemm_like",2],
+        "3017411837779243878": ["convolution_gpu_bfyx_gemm_like",2],
+        "3873183249402084406": ["convolution_gpu_bfyx_gemm_like",1],
+        "2780423409483867058": ["convolution_gpu_bfyx_1x1",2],
+        "3266557807508325807": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3699344686791530101": ["convolution_gpu_bfyx_gemm_like",2],
+        "3221469860582147955": ["convolution_gpu_bfyx_gemm_like",2],
+        "7056293586529818253": ["convolution_gpu_bfyx_gemm_like",2],
+        "12024817951074673335": ["convolution_gpu_bfyx_1x1",1],
+        "14883438809987378616": ["convolution_gpu_bfyx_1x1",1],
+        "12947341728489226671": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "731825454731954517": ["convolution_gpu_bfyx_gemm_like",2],
+        "8463615810239412362": ["convolution_gpu_bfyx_1x1",2],
+        "2715447739580688669": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "7183578232279711009": ["convolution_gpu_bfyx_gemm_like",2],
+        "2204178900998688268": ["convolution_gpu_bfyx_gemm_like",2],
+        "18082422341304348326": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "712165731154577189": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "15201438563802430490": ["fully_connected_gpu_fb_oi_ref",1],
+        "18215430801133520364": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13448845356783404653": ["convolution_gpu_bfyx_gemm_like",1],
+        "9468684953949274635": ["convolution_gpu_bfyx_gemm_like",1],
+        "7009459929666511861": ["convolution_gpu_bfyx_1x1",1],
+        "142270860894725256": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "11795826875463204296": ["convolution_gpu_bfyx_1x1",2],
+        "8689206546467098603": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10151922632636937118": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15412447128995361859": ["convolution_gpu_bfyx_gemm_like",1],
+        "8532217744217419503": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "1171681987783013074": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "12004552919019936392": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12480527132372884168": ["convolution_gpu_bfyx_1x1",1],
+        "7840653268996892538": ["convolution_gpu_bfyx_gemm_like",2],
+        "7104309382120208659": ["convolution_gpu_bfyx_gemm_like",2],
+        "12194037100109755112": ["convolution_gpu_bfyx_gemm_like",2],
+        "4400247897123856252": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",527],
+        "5115007207028125638": ["convolution_gpu_bfyx_gemm_like",2],
+        "11806402239500046867": ["convolution_gpu_bfyx_gemm_like",2],
+        "12932635875905153141": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "6323083153920795679": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "13830605041347009953": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "12177387334053203378": ["convolution_gpu_bfyx_gemm_like",2],
+        "12946531140050029900": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "4834446692898125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "3236003754884728510": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "5572956736535433608": ["convolution_gpu_bfyx_1x1",2],
+        "10635659193402005820": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "3385797925880519845": ["convolution_gpu_bfyx_1x1",2],
+        "1596353239542510685": ["convolution_gpu_bfyx_gemm_like",2],
+        "18431306649860116380": ["convolution_gpu_bfyx_gemm_like",2],
+        "621915374938805401": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16683169947375504066": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "9152433123828445089": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "18313088176414428990": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",1],
+        "10532183096485321729": ["convolution_gpu_bfyx_1x1",2],
+        "16487774205195979355": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "1237262535285717993": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "8270591002934311024": ["convolution_gpu_bfyx_1x1",2],
+        "13853630125050609175": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "12937333118472722002": ["convolution_gpu_bfyx_gemm_like",2],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "8767817856303586064": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "5159738930501638535": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13308187548669026714": ["convolution_gpu_bfyx_1x1",2],
+        "15690161340392005765": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10724501418439612080": ["convolution_gpu_bfyx_gemm_like",1],
+        "3976736548270395981": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6142707387281700290": ["convolution_gpu_bfyx_gemm_like",2],
+        "15773157615731010456": ["convolution_gpu_bfyx_gemm_like",2],
+        "1711220333751274603": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "3541538046227217664": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "541817615957967731": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "4513063773753763458": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "4889188980319017094": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "10681768474583067517": ["convolution_gpu_bfyx_gemm_like",1],
+        "11315238071192463859": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "10979362792894404338": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9040145293899470160": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11999246609107242706": ["convolution_gpu_bfyx_gemm_like",2],
+        "9328223957245552723": ["convolution_gpu_bfyx_gemm_like",2],
+        "5214654427283761256": ["convolution_gpu_bfyx_gemm_like",2],
+        "7958443549125799229": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "3854114166348568039": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "331661172067077796": ["convolution_gpu_bfyx_1x1",2],
+        "13161997040644039778": ["convolution_gpu_bfyx_gemm_like",2],
+        "11872464450773754851": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "3515437649977762166": ["convolution_gpu_bfyx_gemm_like",1],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "5429130923188159806": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "13809898858049445969": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6192955702438301372": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "7667898603371717971": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1697248235682953135": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16511393582666965704": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "12134858519320245809": ["convolution_gpu_bfyx_1x1",2],
+        "11147573971701279689": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "11666226259183201584": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5538883245745495145": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "6020017927557041768": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11690334177981352452": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "10894058425957901202": ["convolution_gpu_bfyx_1x1",2],
+        "1982176363226079588": ["convolution_gpu_bfyx_gemm_like",2],
+        "9533360488591027707": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "11862259122805366807": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "6156831095718536092": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "6109013751635776331": ["convolution_gpu_bfyx_gemm_like",2],
+        "9076758673133996959": ["convolution_gpu_bfyx_gemm_like",2],
+        "4644580321919256401": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "2421404763191415191": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "135072053401934228": ["convolution_gpu_bfyx_1x1",2],
+        "9967101735808367971": ["convolution_gpu_bfyx_1x1",2],
+        "16577611471466452776": ["convolution_gpu_bfyx_gemm_like",2],
+        "17791024851737594885": ["convolution_gpu_bfyx_1x1",2],
+        "4850497746076450913": ["convolution_gpu_bfyx_gemm_like",2],
+        "584086621952390547": ["convolution_gpu_bfyx_gemm_like",2],
+        "8803037667261582905": ["convolution_gpu_bfyx_gemm_like",1],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",1],
+        "16915857558806082023": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "15796677813117622429": ["convolution_gpu_bfyx_gemm_like",2],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "14571022040013651253": ["convolution_gpu_bfyx_gemm_like",2],
+        "15031155621982459860": ["convolution_gpu_bfyx_gemm_like",2],
+        "4381329435655511217": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "17649961873981897621": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "18121198117765854866": ["convolution_gpu_bfyx_1x1",2],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16",855],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "16566214123371867456": ["convolution_gpu_bfyx_gemm_like",2],
+        "14230493618724018658": ["convolution_gpu_bfyx_gemm_like",2],
+        "4523064418696274869": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "16105073808368936420": ["convolution_gpu_bfyx_gemm_like",2],
+        "14114380593731243715": ["convolution_gpu_bfyx_os_iyx_osv16",167],
+        "13160712904661288567": ["convolution_gpu_bfyx_1x1",1],
+        "669771152920944125": ["convolution_gpu_bfyx_gemm_like",2],
+        "631489011812924153": ["convolution_gpu_bfyx_1x1",2],
+        "9319254979377483709": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",1032],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "16409729623371222748": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "4615708568396290002": ["convolution_gpu_bfyx_1x1",2],
+        "9208964785762052001": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "671453551040072499": ["convolution_gpu_bfyx_gemm_like",2],
+        "7998930863626763670": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "7375461241315602473": ["convolution_gpu_bfyx_gemm_like",2],
+        "13471752029049484143": ["convolution_gpu_bfyx_gemm_like",2],
+        "16152775342222431281": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5955575949957198434": ["convolution_gpu_bfyx_gemm_like",1],
+        "11931568365395665142": ["convolution_gpu_bfyx_gemm_like",2],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "3774285301357006334": ["convolution_gpu_bfyx_gemm_like",1],
+        "4897448054295474302": ["convolution_gpu_bfyx_gemm_like",2],
+        "10722677916294015259": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "13938466156916423478": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "15052577143485630617": ["convolution_gpu_bfyx_1x1",2],
+        "8712136292276123857": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "16818206615424635387": ["convolution_gpu_bfyx_1x1",1],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",2],
+        "1186545671730357033": ["convolution_gpu_bfyx_os_iyx_osv16",1024],
+        "11132679855317294753": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4135068756462147853": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17806712457019493207": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "16293465561256937726": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "16768797136991242472": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "15989894214714907271": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "7264274394359484318": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "10218763091060511457": ["convolution_gpu_bfyx_os_iyx_osv16",103],
+        "4084026445911476156": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "5042176052323856983": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",287],
+        "13754540732991287617": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "4339711224604149541": ["convolution_gpu_bfyx_gemm_like",2],
+        "17209528805596238905": ["convolution_gpu_bfyx_gemm_like",2],
+        "805131056816361237": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "5911282942658469852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9795194069954915563": ["convolution_gpu_bfyx_gemm_like",2],
+        "4398371999113956082": ["convolution_gpu_bfyx_gemm_like",2],
+        "852015206582470545": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13485300684443803732": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "12727541507197887360": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "16362857896338778056": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "17770104464900126615": ["convolution_gpu_bfyx_1x1",2],
+        "826850797666395121": ["convolution_gpu_bfyx_gemm_like",2],
+        "9372916528346260712": ["convolution_gpu_bfyx_gemm_like",2],
+        "15322609677356616580": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "15914342421266687768": ["convolution_gpu_bfyx_gemm_like",2],
+        "18218755616248669884": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "16542318967217020315": ["convolution_gpu_bfyx_gemm_like",2],
+        "9280431727790048190": ["convolution_gpu_bfyx_1x1",2],
+        "87031578643428011": ["convolution_gpu_bfyx_1x1",2],
+        "16567486018945740036": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15739278428190392018": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15991460001131903561": ["convolution_gpu_bfyx_gemm_like",2],
+        "16075006181495932250": ["convolution_gpu_bfyx_gemm_like",2],
+        "786401653335542559": ["convolution_gpu_bfyx_gemm_like",2],
+        "4856470441452830056": ["convolution_gpu_bfyx_gemm_like",2],
+        "9545968464906009869": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "12566041126392848976": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "12118387933632797428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18103534417093702556": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "142650579335909103": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "13357365044448426880": ["convolution_gpu_bfyx_1x1",2],
+        "15677717057398875599": ["convolution_gpu_bfyx_gemm_like",1],
+        "14116800584981026541": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "15488340031228619748": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11724225282274130518": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6391201577234440562": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "7770000755097925765": ["convolution_gpu_bfyx_1x1",2],
+        "8096131027165540886": ["convolution_gpu_bfyx_gemm_like",2],
+        "17908444616754154471": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12259844988981080505": ["convolution_gpu_bfyx_gemm_like",2],
+        "4759671642533786591": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "1997392406402548974": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "16661843849495077745": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "1902656726461670148": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12676167240795292217": ["convolution_gpu_bfyx_gemm_like",1],
+        "13320675959188615441": ["convolution_gpu_bfyx_gemm_like",2],
+        "12782932626966309185": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "739676584505475609": ["convolution_gpu_bfyx_gemm_like",2],
+        "2800949804770763798": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6964383468476265892": ["convolution_gpu_bfyx_1x1",1],
+        "13820498543284008286": ["convolution_gpu_bfyx_gemm_like",2],
+        "7869779894480025247": ["convolution_gpu_bfyx_gemm_like",2],
+        "8155268141318893606": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9585113116232600562": ["convolution_gpu_bfyx_gemm_like",1],
+        "7843508201826629532": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7082007579524697455": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12864558900883069118": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "875296362957469305": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "9641089659148164809": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3372770576629463160": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "1591199515536783245": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9407646138658641974": ["convolution_gpu_bfyx_gemm_like",1],
+        "9440117898128288296": ["convolution_gpu_bfyx_gemm_like",2],
+        "16327433707667075261": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "17370158297470557151": ["convolution_gpu_bfyx_1x1",2],
+        "4138968242532400395": ["convolution_gpu_bfyx_gemm_like",1],
+        "16748662918272106932": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5374664689223295796": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "3190494353583341446": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7585184325339753737": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "3499406509137418124": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "17207560805775399864": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18270587701371596297": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "2294800960010879540": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "8792202318168046223": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "14466032674083938714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6003409324516527726": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12393385058735194260": ["convolution_gpu_bfyx_gemm_like",2],
+        "7940369586324090841": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "818998169319147148": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11807282628372660280": ["convolution_gpu_bfyx_1x1",2],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "13851851281384416649": ["convolution_gpu_bfyx_1x1",1],
+        "16677044352793659175": ["convolution_gpu_bfyx_gemm_like",1],
+        "4455369117448405874": ["convolution_gpu_bfyx_1x1",2],
+        "10179916356323479080": ["convolution_gpu_bfyx_gemm_like",2],
+        "15133468875250992696": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "12831123539633580270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "18094205332383644037": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "14616969385577243225": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "15295951849706930711": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "16364494883229084045": ["convolution_gpu_bfyx_gemm_like",2],
+        "15006321421735686121": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "7715649642603303319": ["convolution_gpu_bfyx_1x1",2],
+        "8791285622784082122": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "8860443174052454332": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16921939234324970069": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "11987564534722442223": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "59356084516953804": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "17264608538692763688": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "467975197394411990": ["convolution_gpu_bfyx_gemm_like",1],
+        "11031358859656806724": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "6556424924189200804": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "7689320135952025041": ["convolution_gpu_bfyx_gemm_like",2],
+        "14123081378489325832": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "2936333406928424760": ["convolution_gpu_bfyx_1x1",2],
+        "8541982562061181756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10387844339156517393": ["convolution_gpu_bfyx_1x1",2],
+        "11823205954749139338": ["convolution_gpu_bfyx_gemm_like",2],
+        "1345101751956733589": ["convolution_gpu_bfyx_gemm_like",2],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "15727611564408173858": ["convolution_gpu_bfyx_gemm_like",1],
+        "15249442550355454201": ["convolution_gpu_bfyx_gemm_like",2],
+        "16986610822918634530": ["convolution_gpu_bfyx_1x1",2],
+        "12028665820838352309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13856271274572142709": ["convolution_gpu_bfyx_gemm_like",1],
+        "7649413902932043811": ["convolution_gpu_bfyx_gemm_like",2],
+        "2339864165283480961": ["convolution_gpu_bfyx_1x1",2],
+        "2162882863309264684": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "9589942627115344216": ["convolution_gpu_bfyx_os_iyx_osv16",102],
+        "10323345824599612614": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "17174919737114915467": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17891499682354369344": ["convolution_gpu_bfyx_gemm_like",2],
+        "7474592508575297101": ["convolution_gpu_bfyx_1x1",2],
+        "3316798708399098230": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "5295693108687178880": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8619526128410675593": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "2226745622763268469": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "1885075753696445410": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "5592526760253524303": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "16117448559783537844": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "6664432489777052771": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "5657471280535146301": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17281826959243966826": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8576733135863336233": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "7121708962074176240": ["convolution_gpu_bfyx_1x1",2],
+        "11800783548769329949": ["convolution_gpu_bfyx_gemm_like",2],
+        "13296242326766100583": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "10509933181132310969": ["convolution_gpu_bfyx_gemm_like",2],
+        "14398854364550406668": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "708452703070938673": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "2369451367723962073": ["convolution_gpu_bfyx_1x1",2],
+        "11164519756679631743": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13434576226708227155": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "345043289576587800": ["convolution_gpu_bfyx_1x1",2],
+        "13264617841270329349": ["convolution_gpu_bfyx_1x1",2],
+        "16911450336605071390": ["convolution_gpu_bfyx_1x1",2],
+        "12309955719964788034": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "3240102173773280414": ["convolution_gpu_bfyx_1x1",2],
+        "11421180829679625737": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "13145474177271090694": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "2524029454785583409": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "9748307611165615848": ["convolution_gpu_bfyx_gemm_like",2],
+        "11988546375476924356": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "11841034668170849494": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12058759356433220258": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "17778091287904736965": ["convolution_gpu_bfyx_gemm_like",2],
+        "9131235538209388787": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "13851240591038949807": ["convolution_gpu_bfyx_gemm_like",2],
+        "5374969798377773063": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "17147293671640396193": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "12725675221990905186": ["convolution_gpu_bfyx_gemm_like",2],
+        "16723478941106779069": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "11640225461345567929": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "16610284927818475574": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "3872151366780051246": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14001406016806064079": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4750513665628842598": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "10512507780534402341": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "13472577372534605883": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10536316961655703500": ["convolution_gpu_bfyx_os_iyx_osv16",199],
+        "8130920994920685157": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4718716595177056289": ["convolution_gpu_bfyx_os_iyx_osv16",986],
+        "1952863937205473292": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "10546430708947911124": ["convolution_gpu_bfyx_gemm_like",0],
+        "6931953332823066530": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "8975333906619899020": ["convolution_gpu_bfyx_gemm_like",2],
+        "2114599010013594942": ["convolution_gpu_bfyx_gemm_like",2],
+        "15530407024531326375": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "9525535670799618110": ["convolution_gpu_bfyx_gemm_like",2],
+        "16588325081458426169": ["convolution_gpu_bfyx_gemm_like",2],
+        "16027853590391209100": ["convolution_gpu_bfyx_gemm_like",2],
+        "17640725195881101275": ["convolution_gpu_bfyx_gemm_like",2],
+        "10480527638577674825": ["convolution_gpu_bfyx_1x1",2],
+        "3441335188113424896": ["convolution_gpu_bfyx_gemm_like",2],
+        "8451212914744825089": ["convolution_gpu_bfyx_gemm_like",2],
+        "13776178598632392721": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "14885031472057965707": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "49948277487706148": ["convolution_gpu_bfyx_1x1",2],
+        "13839116996827687373": ["convolution_gpu_bfyx_gemm_like",2],
+        "9692654253261175490": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "16235115911229280717": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "13483175684542464385": ["convolution_gpu_bfyx_os_iyx_osv16",148],
+        "10437367877444543776": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6584960721513702502": ["convolution_gpu_bfyx_gemm_like",1],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "4672441137336208890": ["convolution_gpu_bfyx_gemm_like",2],
+        "9562527071055150197": ["convolution_gpu_bfyx_1x1",2],
+        "5060012838564094182": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "1089944493540593798": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "11007944497812650617": ["convolution_gpu_bfyx_gemm_like",2],
+        "11330591026581463934": ["convolution_gpu_bfyx_gemm_like",2],
+        "12229574562535756991": ["convolution_gpu_bfyx_gemm_like",2],
+        "3383222668132648804": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3509487327001107638": ["convolution_gpu_bfyx_gemm_like",2],
+        "4274425737610351312": ["convolution_gpu_bfyx_gemm_like",2],
+        "17636500109629107732": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "548663565933738403": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "503369896500284129": ["convolution_gpu_bfyx_1x1",2],
+        "10447947790216991304": ["convolution_gpu_bfyx_gemm_like",2],
+        "4282668574670785584": ["convolution_gpu_bfyx_gemm_like",2],
+        "12450814729547235386": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "6312971928547466668": ["convolution_gpu_bfyx_os_iyx_osv16",1039],
+        "15493488989417521388": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "13474805373264874144": ["convolution_gpu_bfyx_1x1",2],
+        "2245166025103475783": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "5349415632630235233": ["convolution_gpu_bfyx_1x1",2],
+        "9131183544020825260": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15363606233048272809": ["convolution_gpu_bfyx_1x1",2],
+        "10280619408766255552": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "9530116228032101908": ["convolution_gpu_bfyx_1x1",1],
+        "314054598858070952": ["convolution_gpu_bfyx_gemm_like",2],
+        "15047676717402283805": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "16862145184923128012": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2554991397391195611": ["convolution_gpu_bfyx_os_iyx_osv16",184],
+        "3244675355773468991": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "6288489890578212082": ["convolution_gpu_bfyx_gemm_like",2],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "16020916772006653269": ["convolution_gpu_bfyx_1x1",1],
+        "5720964268093705079": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15779837958180258409": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "16084700435355748612": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "17477062954520561609": ["convolution_gpu_bfyx_gemm_like",2],
+        "994842991399671507": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13338594271376045657": ["convolution_gpu_bfyx_gemm_like",2],
+        "18426893729833771809": ["convolution_gpu_bfyx_1x1",2],
+        "10672380526821947133": ["convolution_gpu_bfyx_os_iyx_osv16",339],
+        "12046017161414846599": ["convolution_gpu_bfyx_1x1",2],
+        "8709632541892447149": ["convolution_gpu_bfyx_gemm_like",2],
+        "60267878504897170": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "14098811155652990436": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3646228701104397128": ["convolution_gpu_bfyx_os_iyx_osv16",173],
+        "9657324846330221372": ["convolution_gpu_bfyx_1x1",2],
+        "16384186388687043048": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "16924006268301179157": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "16717713360264747483": ["convolution_gpu_bfyx_gemm_like",2],
+        "2832268621630415376": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "5459463503840817402": ["convolution_gpu_bfyx_1x1",2],
+        "15814015810740458605": ["convolution_gpu_bfyx_1x1",2],
+        "537074122417021898": ["convolution_gpu_bfyx_os_iyx_osv16",100],
+        "11239541755868028928": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "7578177053220150569": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "16888412539296862194": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "13902214851539825156": ["convolution_gpu_bfyx_gemm_like",2],
+        "1245259979364728404": ["convolution_gpu_bfyx_1x1",2],
+        "2625969259447793593": ["convolution_gpu_bfyx_1x1",2],
+        "6101196122606108273": ["convolution_gpu_bfyx_gemm_like",2],
+        "15188570678726970998": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11645116728396933125": ["convolution_gpu_bfyx_gemm_like",2],
+        "7585785802379042424": ["convolution_gpu_bfyx_1x1",2],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",0],
+        "3534971503826416049": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "16347412180100581330": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "9939234037869927090": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "14403132596827435096": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "3141773224039276177": ["convolution_gpu_bfyx_1x1",2],
+        "3141886504884887200": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8527193566719173253": ["convolution_gpu_bfyx_gemm_like",2],
+        "4238885454989272754": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "1230262279011217327": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13094402291968806996": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "6911215749850066204": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "4491380839102267034": ["convolution_gpu_bfyx_gemm_like",1],
+        "5497751772699578150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5012013738970489338": ["convolution_gpu_bfyx_1x1",1],
+        "4652136280940317116": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "3750338655074082587": ["fully_connected_gpu_yxfb_ref",0],
+        "12107262410635772120": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9584652777232392944": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "16765994345605657100": ["convolution_gpu_bfyx_1x1",1],
+        "13575423234109624706": ["fully_connected_gpu_yxfb_ref",2],
+        "3114869763557037270": ["fully_connected_gpu_fb_oi_ref",1],
+        "10292585962794261197": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "3202085450628781999": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "10914921540144371519": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4959403414256988744": ["convolution_gpu_bfyx_gemm_like",1],
+        "863057075064640334": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "8671491767142900139": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "10766317990628501609": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "15678385128478075284": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14808895254077106198": ["convolution_gpu_bfyx_gemm_like",2],
+        "5440983284868981549": ["convolution_gpu_bfyx_gemm_like",2],
+        "2921118493468368908": ["convolution_gpu_bfyx_gemm_like",1],
+        "991586070509079617": ["convolution_gpu_bfyx_gemm_like",0],
+        "8058419689646625853": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "10330180429524641331": ["convolution_gpu_bfyx_gemm_like",2],
+        "4622514167765722873": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "7465681710653503161": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9728611486592854529": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4436244774193918646": ["fully_connected_gpu_fb_oi_ref",1],
+        "12531580106484042446": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "787203599734115483": ["convolution_gpu_bfyx_1x1",1],
+        "8519354640245415816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "16773645387243701837": ["convolution_gpu_bfyx_gemm_like",2],
+        "2438261005924916746": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "856877003890134554": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "8069537351442302814": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "15899192375330393731": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "11418379777288974452": ["convolution_gpu_bfyx_gemm_like",2],
+        "73865742350616903": ["convolution_gpu_bfyx_gemm_like",1],
+        "138379779469699309": ["convolution_gpu_bfyx_gemm_like",2],
+        "10106454449619141260": ["convolution_gpu_bfyx_1x1",2],
+        "15820359925623438341": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1173986078589662704": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "17830290099875088207": ["convolution_gpu_bfyx_gemm_like",2],
+        "10565371760124443824": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6713985030102340818": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "16925721317097534009": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "9477562342190423343": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "13183380647506951324": ["convolution_gpu_bfyx_gemm_like",2],
+        "13488495920546871271": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "16767392067294252396": ["convolution_gpu_bfyx_gemm_like",2],
+        "17651821953342321913": ["convolution_gpu_bfyx_1x1",2],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "3603187029740446600": ["convolution_gpu_bfyx_gemm_like",2],
+        "18299254635579957284": ["convolution_gpu_bfyx_1x1",2],
+        "13364676690016875118": ["convolution_gpu_bfyx_os_iyx_osv16",926],
+        "7531346828150129063": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "15223164574152266895": ["convolution_gpu_bfyx_1x1",2],
+        "12977678792503377525": ["convolution_gpu_bfyx_gemm_like",1],
+        "3501667344669686338": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5600807544955072308": ["convolution_gpu_bfyx_gemm_like",2],
+        "7481256533438761028": ["convolution_gpu_bfyx_gemm_like",2],
+        "4118073384938355655": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "7974670633697926450": ["convolution_gpu_bfyx_1x1",1],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "12806934028210472719": ["convolution_gpu_bfyx_gemm_like",2],
+        "12589440296742583335": ["convolution_gpu_bfyx_1x1",2],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "12489973984967168447": ["convolution_gpu_bfyx_1x1",2],
+        "4640696923527766618": ["convolution_gpu_bfyx_gemm_like",2],
+        "9767294641786972359": ["convolution_gpu_bfyx_gemm_like",2],
+        "8792010676469476740": ["convolution_gpu_bfyx_gemm_like",2],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",2],
+        "6509758095668864050": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "2008424849669196225": ["convolution_gpu_bfyx_1x1",2],
+        "15497797842820949408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2116913943188857359": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "9144487908815767824": ["convolution_gpu_bfyx_1x1",1],
+        "15800447082078291243": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "17746215841755337461": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3635446784873718932": ["convolution_gpu_bfyx_gemm_like",2],
+        "12391792381149655331": ["convolution_gpu_bfyx_gemm_like",2],
+        "1419073145594317633": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "8746621720912032145": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "13190888313721073437": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3568514382399560386": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "973966345068677905": ["convolution_gpu_bfyx_1x1",2],
+        "14126906427006602775": ["convolution_gpu_bfyx_1x1",2],
+        "6664482192233202590": ["convolution_gpu_bfyx_gemm_like",2],
+        "11744368351982723504": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1485662490111767875": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "13328911884191551889": ["convolution_gpu_bfyx_1x1",2],
+        "15112599407339712681": ["convolution_gpu_bfyx_1x1",2],
+        "3522383297921565178": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "10935309102034762723": ["convolution_gpu_bfyx_1x1",1],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "10014448860206587805": ["convolution_gpu_bfyx_gemm_like",1],
+        "8640150341228170279": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "1170380397764345558": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10682300249493137042": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "16033512206711124104": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12908594497114706897": ["convolution_gpu_bfyx_1x1",2],
+        "18267428053198215471": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6318228858846223186": ["convolution_gpu_bfyx_1x1",1],
+        "14487842225000203929": ["convolution_gpu_bfyx_gemm_like",2],
+        "6760797535531423152": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "12985942652866621579": ["fully_connected_gpu_fb_io_ref",2],
+        "4006884370026272807": ["convolution_gpu_bfyx_gemm_like",2],
+        "1040030752340209480": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "10486348549691280032": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "11305232900158601613": ["convolution_gpu_bfyx_1x1",2],
+        "13596876807637507229": ["convolution_gpu_bfyx_1x1",2],
+        "8354579049246302728": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "1122856374602590533": ["convolution_gpu_bfyx_1x1",1],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "13734043898517059207": ["convolution_gpu_bfyx_gemm_like",1],
+        "12641170321047008726": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "5056859994174498686": ["convolution_gpu_bfyx_gemm_like",1],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "4610200388191607540": ["convolution_gpu_bfyx_gemm_like",2],
+        "2609454334520044465": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "16839741351990811959": ["convolution_gpu_bfyx_gemm_like",2],
+        "4353842547963164546": ["convolution_gpu_bfyx_1x1",2],
+        "3409043224171087168": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "4121109463284708890": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "15817443774186015593": ["convolution_gpu_bfyx_1x1",2],
+        "5269172622193124300": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "12026482841341343242": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "14362876471450307424": ["convolution_gpu_bfyx_1x1",2],
+        "14312549767853703411": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "7868973874302246233": ["convolution_gpu_bfyx_gemm_like",1],
+        "2727175120437582536": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9314293064351558241": ["convolution_gpu_bfyx_gemm_like",2],
+        "10930640103080573253": ["convolution_gpu_bfyx_1x1",2],
+        "17948637243158994878": ["convolution_gpu_bfyx_gemm_like",2],
+        "10133054058562198093": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8921636651939679647": ["convolution_gpu_bfyx_1x1",1],
+        "9090828337597312855": ["convolution_gpu_bfyx_gemm_like",2],
+        "10923480230259977438": ["convolution_gpu_bfyx_1x1",1],
+        "3105425187506203551": ["convolution_gpu_bfyx_1x1",2],
+        "16158139166784964096": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "6942016672941874829": ["convolution_gpu_bfyx_gemm_like",2],
+        "7843498978148810586": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "13781423818051299677": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "5658664813683907476": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5682190700442712936": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "17344974951998490453": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7870154008378361670": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15216108478837665623": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "2439993891369206440": ["convolution_gpu_bfyx_1x1",2],
+        "1299545313185409227": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "4617347486560666277": ["convolution_gpu_bfyx_1x1",1],
+        "4635570915184713874": ["convolution_gpu_bfyx_gemm_like",2],
+        "17724604495865223459": ["convolution_gpu_bfyx_gemm_like",2],
+        "5321698540631249776": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "11198301748997371475": ["convolution_gpu_bfyx_gemm_like",1],
+        "5498839261395459224": ["convolution_gpu_bfyx_gemm_like",1],
+        "4129722446574108695": ["convolution_gpu_bfyx_1x1",2],
+        "16361932270527364507": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "11857037689248685487": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8907982643256296667": ["convolution_gpu_bfyx_1x1",1],
+        "10100237101982273901": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "3438296636411972401": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2394023805427701338": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "13352000946213986936": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "9643408025778914022": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "4628748977913534701": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8203171222962341018": ["convolution_gpu_bfyx_gemm_like",2],
+        "18067291256808591467": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5195511638783481084": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "17170858505976681742": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4104562704039821482": ["convolution_gpu_bfyx_1x1",2],
+        "3816674884393241704": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "14930789530046665855": ["convolution_gpu_bfyx_gemm_like",2],
+        "17152614235879767116": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "1354647381212852890": ["convolution_gpu_bfyx_1x1",2],
+        "10171373375072694210": ["convolution_gpu_bfyx_1x1",2],
+        "12055647521556218046": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "178353385245384751": ["convolution_gpu_bfyx_os_iyx_osv16",969],
+        "7372956570616880244": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "8439950151963452285": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "794499287296495726": ["convolution_gpu_bfyx_1x1",2],
+        "12946540633035976364": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "778476198101178556": ["convolution_gpu_bfyx_gemm_like",1],
+        "6870942166356599956": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "17790026124881397912": ["fully_connected_gpu_fb_io_ref",2],
+        "7171904645566467208": ["convolution_gpu_bfyx_gemm_like",2],
+        "2668729552208169959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16574710115918192418": ["convolution_gpu_bfyx_gemm_like",2],
+        "9999553425206328238": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "9947449295659685973": ["convolution_gpu_bfyx_gemm_like",1],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",2],
+        "5648658688155716974": ["convolution_gpu_bfyx_1x1",2],
+        "8922929126299811091": ["convolution_gpu_bfyx_1x1",2],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "3573490922300056520": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "17705807503894740726": ["convolution_gpu_bfyx_gemm_like",2],
+        "7397341452130124383": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "16681690088928624738": ["convolution_gpu_bfyx_gemm_like",2],
+        "13478984039708550410": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "7548031489690889629": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "7916244303189113815": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13450061819089402572": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10883341041912056319": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "12351866693978844266": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "15225354446874994535": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "5840254078917931433": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "9695024256541464964": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "9987415314864002460": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7846384623429362522": ["convolution_gpu_bfyx_1x1",1],
+        "17329287216741045059": ["convolution_gpu_bfyx_gemm_like",2],
+        "7706714181281908433": ["convolution_gpu_bfyx_gemm_like",2],
+        "7235358742317442134": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2817919813339364130": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12822126914959112382": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "4571404165794634411": ["convolution_gpu_bfyx_1x1",2],
+        "9111988592015450418": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "14082448162400225052": ["convolution_gpu_bfyx_1x1",1],
+        "13621339501067135142": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2866656294663853474": ["convolution_gpu_bfyx_1x1",2],
+        "1701412735970485849": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "7953255701516490034": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "5393510569127725391": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "8168240543278779314": ["convolution_gpu_bfyx_1x1",1],
+        "2777318471329665162": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "9989055862610193828": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "1367483816197881270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7708321360699824256": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "632116056424249698": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15943141845766932879": ["convolution_gpu_bfyx_1x1",2],
+        "10626341369865893888": ["convolution_gpu_bfyx_gemm_like",2],
+        "18059267466971880386": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "142329025839464842": ["convolution_gpu_bfyx_1x1",2],
+        "11828175723996627443": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7005509036795164602": ["convolution_gpu_bfyx_1x1",2],
+        "17723621158215826108": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "12675840135830047968": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4914435717288687793": ["convolution_gpu_bfyx_1x1",1],
+        "9541630719145326121": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "2613462626256090659": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "17525564757769958678": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "5853697372844744672": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "6104380778870471127": ["convolution_gpu_bfyx_1x1",2],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",1],
+        "474139120607442270": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12936220888307335332": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "8431759922045602848": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6398819277350155011": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "2173867324489962689": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "15190508870639648203": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7209217811135076623": ["convolution_gpu_bfyx_gemm_like",2],
+        "11020315012951440351": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "8561261337239934159": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "9606639214735570069": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "3399406641489305996": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "12003323477818208825": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "16729849855476690294": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "14283458015244508428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8490260671996115530": ["convolution_gpu_bfyx_gemm_like",1],
+        "7314288062932060863": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "16011429608661242565": ["convolution_gpu_bfyx_gemm_like",2],
+        "2041212737963974230": ["convolution_gpu_bfyx_gemm_like",2],
+        "7693459946348737411": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "14578867494693499627": ["convolution_gpu_bfyx_gemm_like",2],
+        "14905520834426630145": ["convolution_gpu_bfyx_gemm_like",2],
+        "9502195532658935521": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15901724303713479611": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6988492019664525206": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "3571959174116404960": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17216583849049249733": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "9340159617983543624": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2920840796593281126": ["convolution_gpu_bfyx_gemm_like",2],
+        "101401523793806394": ["convolution_gpu_bfyx_gemm_like",2],
+        "1930929857644673460": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "3398322619007806698": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10090036431487700311": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "12253049204822930675": ["convolution_gpu_bfyx_gemm_like",1],
+        "13602140021189675477": ["convolution_gpu_bfyx_gemm_like",2],
+        "10187930930336324253": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "14184895905338394239": ["convolution_gpu_bfyx_gemm_like",2],
+        "2622434279674583815": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "13993548620104010490": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14159596290442764023": ["convolution_gpu_bfyx_gemm_like",1],
+        "6114241186364821679": ["convolution_gpu_bfyx_os_iyx_osv16",856],
+        "13754408679115174221": ["convolution_gpu_bfyx_gemm_like",2],
+        "10404725818204494388": ["convolution_gpu_bfyx_gemm_like",2],
+        "7650375560336513366": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "14171139920084409181": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4156384238797998294": ["convolution_gpu_bfyx_os_iyx_osv16",275],
+        "4355933224673863178": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7015738038963065110": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "11565861421381730304": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12417253210787537988": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "579781312141502576": ["convolution_gpu_bfyx_1x1",1],
+        "12198263593657033426": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "17854208422879910606": ["convolution_gpu_bfyx_gemm_like",2],
+        "3950738240651133849": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "5582450255753679095": ["convolution_gpu_bfyx_1x1",2],
+        "18126685473408206840": ["convolution_gpu_bfyx_os_iyx_osv16",526],
+        "13025361884606488732": ["convolution_gpu_bfyx_gemm_like",2],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",2],
+        "9305861997313663528": ["convolution_gpu_bfyx_gemm_like",2],
+        "5003718302026277632": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "8398910340371320955": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14026537760442360645": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",2],
+        "17408275657360833363": ["convolution_gpu_bfyx_1x1",2],
+        "13676654389512816868": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "10022487076451608714": ["convolution_gpu_bfyx_gemm_like",2],
+        "16531824466148265247": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "6726099352298108756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "17917978116807564183": ["convolution_gpu_bfyx_gemm_like",2],
+        "5926747396493954633": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "1914964404168211864": ["convolution_gpu_bfyx_gemm_like",2],
+        "10130171279527667782": ["convolution_gpu_bfyx_gemm_like",1],
+        "10037086825900566930": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11727227430687227444": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10055549084854766170": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "12871555773123368130": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6603778920476932267": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13800760323805415740": ["convolution_gpu_bfyx_gemm_like",1],
+        "15035800097152337587": ["convolution_gpu_bfyx_gemm_like",2],
+        "3224352307778512793": ["convolution_gpu_bfyx_gemm_like",1],
+        "8106738346643994005": ["convolution_gpu_bfyx_gemm_like",2],
+        "15078590909693331731": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10316451248440741901": ["convolution_gpu_bfyx_gemm_like",1],
+        "1075027491444288875": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14447191095937730964": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1920042803083729276": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "2527276292172180386": ["convolution_gpu_bfyx_gemm_like",2],
+        "12510951219501865365": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "14115742296883450319": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5568753513029409478": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "14916625550370402883": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "2929190644951986399": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "11955992313739654625": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "7139714914586273766": ["convolution_gpu_bfyx_os_iyx_osv16",517],
+        "4849343880559509889": ["convolution_gpu_bfyx_1x1",2],
+        "17443356777503458523": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "16667887002111125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "9406763539724266157": ["convolution_gpu_bfyx_1x1",2],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",2],
+        "2732519635571994212": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "4809191606466167229": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10544411879329675593": ["convolution_gpu_bfyx_os_iyx_osv16",387],
+        "3780320160034246719": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "9299299311101549958": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "8444259010311137762": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "15859493313686060349": ["convolution_gpu_bfyx_gemm_like",2],
+        "17921973525603585874": ["convolution_gpu_bfyx_gemm_like",2],
+        "18381791065890314250": ["convolution_gpu_bfyx_gemm_like",1],
+        "18122858611264877646": ["convolution_gpu_bfyx_gemm_like",2],
+        "3160543867929843861": ["convolution_gpu_bfyx_1x1",2],
+        "1218323229202187514": ["convolution_gpu_bfyx_gemm_like",2],
+        "1841155673858789206": ["fully_connected_gpu_fb_oi_ref",2],
+        "15675903059949404837": ["convolution_gpu_bfyx_1x1",2],
+        "16587061389996963349": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "17046662043776372746": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "9955939178447682108": ["convolution_gpu_bfyx_1x1",2],
+        "6648876837655776653": ["convolution_gpu_bfyx_1x1",2],
+        "7393601059996816014": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15979956159651515122": ["convolution_gpu_bfyx_gemm_like",2],
+        "3212789693085089063": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "7242013296950669829": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "2722124265986526212": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "8220168481755031959": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "14896875712028630045": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "5184121466994451498": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "15329680728165965773": ["convolution_gpu_bfyx_gemm_like",2],
+        "9423958333298993923": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "5040095338370816349": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "4792351255949877935": ["convolution_gpu_bfyx_gemm_like",2],
+        "5581428998642936688": ["convolution_gpu_bfyx_1x1",2],
+        "13558687084677943158": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "4916769804113823482": ["convolution_gpu_bfyx_1x1",2],
+        "1390379098099686972": ["convolution_gpu_bfyx_1x1",2],
+        "1126499865206906037": ["convolution_gpu_bfyx_os_iyx_osv16",524],
+        "9580986168276580598": ["convolution_gpu_bfyx_gemm_like",1],
+        "1336940384521633733": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "4914474312076193952": ["convolution_gpu_bfyx_gemm_like",1],
+        "290134020607738418": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7181154048972884375": ["convolution_gpu_bfyx_gemm_like",2],
+        "9426665763007611385": ["convolution_gpu_bfyx_gemm_like",2],
+        "18142462471803295391": ["convolution_gpu_bfyx_1x1",1],
+        "16988275131627316108": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "7532088618116521936": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "6843617687528352801": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14331658870024759698": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",2],
+        "3177304125602972370": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16071723603031305677": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16000753982895054944": ["convolution_gpu_bfyx_gemm_like",1],
+        "16490405739040977260": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "12228610148087508521": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "216603198215625772": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13247725847475539658": ["convolution_gpu_bfyx_1x1",2],
+        "4154403364889130045": ["convolution_gpu_bfyx_gemm_like",2],
+        "3039528482572243879": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "15728009639807698634": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "1900375942069325499": ["convolution_gpu_bfyx_1x1",2],
+        "12068974703657294908": ["convolution_gpu_bfyx_1x1",2],
+        "3219408878901707426": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1788455099959676873": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "3499106702307464480": ["convolution_gpu_bfyx_gemm_like",2],
+        "17444003685761357480": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10019470094545733255": ["convolution_gpu_bfyx_gemm_like",2],
+        "15529757761327002288": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13486084204140096478": ["convolution_gpu_bfyx_gemm_like",2],
+        "9195732599757736182": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "3056212889689424946": ["convolution_gpu_bfyx_1x1",2],
+        "15968821946892330559": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "1704404203639481753": ["convolution_gpu_bfyx_gemm_like",2],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "6817494598328071314": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "15065019229949449623": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11626402549863483301": ["convolution_gpu_bfyx_os_iyx_osv16",648],
+        "17342198739672369885": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18041177945345031826": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "10570285542015420072": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "11604111639041106489": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "7565867291827884997": ["convolution_gpu_bfyx_gemm_like",1],
+        "10893432143734884603": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "16218339663410630711": ["convolution_gpu_bfyx_gemm_like",2],
+        "7624476043779763605": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13330734840729670622": ["convolution_gpu_bfyx_gemm_like",2],
+        "18235209540858013173": ["convolution_gpu_bfyx_1x1",2],
+        "6450532136308941035": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "2495655464941634884": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "1044978617045366709": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "18132952464279667664": ["convolution_gpu_bfyx_1x1",2],
+        "9737833587413114584": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4903592553439092472": ["convolution_gpu_bfyx_os_iyx_osv16",147],
+        "4815047491742617397": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "3622409603053918029": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15636128989267984459": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "273242667845386507": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "12577421746159122264": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "16475247464223458061": ["convolution_gpu_bfyx_gemm_like",2],
+        "12473600360154597915": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "8543619733732987550": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1074748462756364699": ["fully_connected_gpu_fb_oi_ref",1],
+        "15924583510704449214": ["convolution_gpu_bfyx_gemm_like",1],
+        "11597391933877736800": ["convolution_gpu_bfyx_gemm_like",2],
+        "6071668124835539929": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1587501521145162454": ["convolution_gpu_bfyx_gemm_like",2],
+        "17759505449240263390": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "383721620126444793": ["convolution_gpu_bfyx_gemm_like",2],
+        "7322472892320910654": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "182115051096556835": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "1920070013712913772": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "4980217316169616839": ["convolution_gpu_bfyx_1x1",2],
+        "4126895998426674411": ["convolution_gpu_bfyx_gemm_like",2],
+        "2477849395789783501": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "2967481531952454828": ["convolution_gpu_bfyx_os_iyx_osv16",863],
+        "12514693341682532560": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "5303970743736042689": ["convolution_gpu_bfyx_gemm_like",2],
+        "3116068331849795558": ["convolution_gpu_bfyx_gemm_like",2],
+        "5898740235388207878": ["convolution_gpu_bfyx_1x1",2],
+        "10816637153861630723": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "2221145174704245189": ["convolution_gpu_bfyx_gemm_like",2],
+        "2530317332900569142": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "7880845322716481548": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11942736969933408358": ["convolution_gpu_bfyx_gemm_like",2],
+        "2770397466252831892": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "677249604491773387": ["convolution_gpu_bfyx_gemm_like",2],
+        "17201365233492366678": ["convolution_gpu_bfyx_gemm_like",2],
+        "9316082753126682958": ["convolution_gpu_bfyx_gemm_like",2],
+        "11948858355027908365": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9205978149692979955": ["convolution_gpu_bfyx_gemm_like",2],
+        "2826762745628486040": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "7603872175048237237": ["convolution_gpu_bfyx_1x1",2],
+        "12024143207855886580": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "8170998059688907013": ["convolution_gpu_bfyx_1x1",2],
+        "14263790627243107300": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14424566003632608852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10865695385270390803": ["convolution_gpu_bfyx_os_iyx_osv16",642],
+        "14799579913711096584": ["convolution_gpu_bfyx_gemm_like",1],
+        "14999920879568237166": ["convolution_gpu_bfyx_1x1",2],
+        "4678607855896512523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15193403354218116460": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17951403431757222177": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "1458615259705605525": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "10912495395422146386": ["convolution_gpu_bfyx_gemm_like",2],
+        "13144385730409574259": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9277176009071334860": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "8303211644727914658": ["convolution_gpu_bfyx_1x1",2],
+        "13124342334495538095": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "8108933468437926367": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "517997325935712670": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8251544171504007740": ["convolution_gpu_bfyx_gemm_like",2],
+        "10645625090439446714": ["convolution_gpu_bfyx_gemm_like",2],
+        "5339985303398206057": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "4290840152278060614": ["convolution_gpu_bfyx_gemm_like",2],
+        "1351033666248868977": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "10899110544832584656": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11666250400445971335": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "7474639594232203854": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "14091610802555875119": ["convolution_gpu_bfyx_gemm_like",1],
+        "577844026691991089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1563987925712579649": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "7581174843529024536": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "1309867416606346543": ["convolution_gpu_bfyx_os_iyx_osv16",195],
+        "8540111719936129376": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "17489680436564779197": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5963901433137582265": ["convolution_gpu_bfyx_gemm_like",2],
+        "12421204749289937399": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6143200133853000387": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "7060804814325505165": ["convolution_gpu_bfyx_gemm_like",2],
+        "1497560475414454618": ["convolution_gpu_bfyx_gemm_like",2],
+        "18008552719153887303": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "938848188161536107": ["convolution_gpu_bfyx_1x1",2],
+        "10071449674652717890": ["convolution_gpu_bfyx_gemm_like",2],
+        "15486917753097743853": ["convolution_gpu_bfyx_1x1",2],
+        "1157947252370351851": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "1597770067928214597": ["convolution_gpu_bfyx_1x1",1],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",2],
+        "11910735867274493498": ["convolution_gpu_bfyx_gemm_like",2],
+        "7084646429975006971": ["convolution_gpu_bfyx_1x1",2],
+        "1905758333157310570": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4617809377006148936": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "4079026972040047969": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "11893541520830049036": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "7799984350284425885": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16",1036],
+        "7954972694876158422": ["convolution_gpu_bfyx_1x1",2],
+        "3860667078458481972": ["convolution_gpu_bfyx_gemm_like",1],
+        "1018687388655376483": ["convolution_gpu_bfyx_gemm_like",1],
+        "12319073009094248232": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "9803492989444302959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17264010982688979937": ["convolution_gpu_bfyx_1x1",2],
+        "17037462814585846902": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "742689192890486807": ["convolution_gpu_bfyx_gemm_like",2],
+        "15106614232165315070": ["convolution_gpu_bfyx_gemm_like",2],
+        "15271783562528081169": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "16247399911710810038": ["convolution_gpu_bfyx_gemm_like",1],
+        "12604104383683210104": ["convolution_gpu_bfyx_os_iyx_osv16",216],
+        "16341722570340169855": ["convolution_gpu_bfyx_1x1",2],
+        "15154700439767512396": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "15770767768674603174": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15589007878875898942": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "18302892230881285207": ["convolution_gpu_bfyx_gemm_like",1],
+        "9120377367517042357": ["convolution_gpu_bfyx_1x1",2],
+        "1752185056297124917": ["convolution_gpu_bfyx_1x1",2],
+        "3988024997010367546": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "16027456210394993913": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "15695415285791951018": ["convolution_gpu_bfyx_gemm_like",2],
+        "17764033613416389758": ["convolution_gpu_bfyx_gemm_like",2],
+        "12052207771201936228": ["convolution_gpu_bfyx_gemm_like",2],
+        "2188101366183302888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9423854233835016530": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7369903937189508744": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "7232326270078161768": ["convolution_gpu_bfyx_gemm_like",2],
+        "4378422094110940766": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9222744127882324405": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "4278280309700908015": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1617135706549276688": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "15914058104244750036": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "7700321970687976931": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "17010172246526353957": ["convolution_gpu_bfyx_1x1",2],
+        "9043982883185435219": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "3391032227732782982": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6131481289104111211": ["convolution_gpu_bfyx_gemm_like",2],
+        "3350601287664242323": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14540578324750869319": ["convolution_gpu_bfyx_gemm_like",2],
+        "12522495848240087966": ["convolution_gpu_bfyx_gemm_like",1],
+        "4444730303823507621": ["convolution_gpu_bfyx_gemm_like",2],
+        "5550969016335082071": ["convolution_gpu_bfyx_gemm_like",1],
+        "4124478505694604763": ["convolution_gpu_bfyx_1x1",2],
+        "13121297281694293907": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "12564687330941036772": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "4141005390823981166": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "10702465758376061967": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "7969441643457570812": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "1008476023750261156": ["convolution_gpu_bfyx_1x1",2],
+        "16616945998593626851": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2283157145557154450": ["convolution_gpu_bfyx_1x1",2],
+        "13972357557211413688": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "13954821927253849036": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "5649150695527000655": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16182470664818268848": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15914107501176673997": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "3725013268198063198": ["convolution_gpu_bfyx_1x1",2],
+        "18393312550272875456": ["convolution_gpu_bfyx_1x1",2],
+        "8497468192424557348": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2581414750854621875": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "16944335478353845609": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "14421898375873029115": ["convolution_gpu_bfyx_1x1",2],
+        "3106922888635965020": ["convolution_gpu_bfyx_gemm_like",2],
+        "3711525118850629466": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2728938624042183713": ["convolution_gpu_bfyx_gemm_like",2],
+        "17219920118109316867": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "14544219140091420262": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10782611933832492335": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "16014822406751503249": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "11988285441493553006": ["convolution_gpu_bfyx_gemm_like",2],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "13710319251108632115": ["convolution_gpu_bfyx_1x1",2],
+        "9480653639044390919": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "10128120599276549920": ["convolution_gpu_bfyx_1x1",1],
+        "17084977396231597605": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5622089373755094139": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "9737565171095493297": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2891736961665476908": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "2783577080556699089": ["convolution_gpu_bfyx_gemm_like",1],
+        "14117801387057507639": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "1632416005093914709": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13092232276822302626": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "8002233052700666718": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "4531222427159927606": ["convolution_gpu_bfyx_gemm_like",2],
+        "8317673282128335201": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "4387041763614917736": ["convolution_gpu_bfyx_gemm_like",1],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "12165079289914715018": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "15231987838322151865": ["convolution_gpu_bfyx_1x1",2],
+        "4056979460327024961": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4861982518177129729": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "7561096442572829049": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12672995204641007004": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "12512751736409465214": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9835739612255048978": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "17021925795809437171": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10730222715353420212": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "12978370505631031751": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "13590444711975157776": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "16393176054374397767": ["convolution_gpu_bfyx_gemm_like",1],
+        "4772696293208603817": ["convolution_gpu_bfyx_gemm_like",1],
+        "12741457056869452536": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14383657211047876136": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "1152691534728260611": ["convolution_gpu_bfyx_1x1",2],
+        "8257103926661643451": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "733956743303342862": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4492332228252010118": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "13526488884846845330": ["convolution_gpu_bfyx_gemm_like",2],
+        "6863331059471727622": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "597650904461183283": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",2],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13512863534076172940": ["convolution_gpu_bfyx_gemm_like",2],
+        "7292351660229751817": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "5308128387928804050": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "5364060938737428149": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "5922142661777925178": ["convolution_gpu_bfyx_gemm_like",1],
+        "16986358655784856534": ["convolution_gpu_bfyx_gemm_like",2],
+        "10141927023849730720": ["convolution_gpu_bfyx_1x1",1],
+        "2930898141522848681": ["convolution_gpu_bfyx_1x1",2],
+        "12755692101476964677": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "8931169575495985034": ["convolution_gpu_bfyx_gemm_like",2],
+        "16913004986170202203": ["convolution_gpu_bfyx_gemm_like",2],
+        "4625107584562815965": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "17634966178519099371": ["convolution_gpu_bfyx_1x1",2],
+        "2968439898708528834": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "17711453305763476458": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "2863465257341735941": ["convolution_gpu_bfyx_1x1",1],
+        "17053671692908867872": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "13760645810144930270": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "14248239982355212178": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "10135458965276110244": ["convolution_gpu_bfyx_1x1",2],
+        "4056971751486746551": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "5941852872160795604": ["convolution_gpu_bfyx_gemm_like",2],
+        "13961773444580398856": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16666792471632326054": ["convolution_gpu_bfyx_gemm_like",2],
+        "16135569134646688251": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "7748233564411787605": ["convolution_gpu_bfyx_gemm_like",2],
+        "8159303545761286685": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "6340128090694375876": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "7243917162812988891": ["convolution_gpu_bfyx_gemm_like",2],
+        "2912098199463107173": ["convolution_gpu_bfyx_1x1",2],
+        "4773123925616969670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11848462434662954749": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9056038338958199256": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9101903304994333336": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "11083993858285515074": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "9891428775774615719": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "7913076120244203725": ["convolution_gpu_bfyx_gemm_like",2],
+        "13553263424160050064": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "12757611260347801001": ["convolution_gpu_bfyx_os_iyx_osv16",1071],
+        "18436249934780056991": ["convolution_gpu_bfyx_os_iyx_osv16",296],
+        "2089730611490367290": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "721174714308243785": ["convolution_gpu_bfyx_gemm_like",2],
+        "13809330759308309353": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "5738835498104275267": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "15799159401545270696": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14558572801374416278": ["convolution_gpu_bfyx_gemm_like",1],
+        "6222595759158615206": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13369603621524676979": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "6307939332939714967": ["convolution_gpu_bfyx_1x1",2],
+        "4717620775314557374": ["convolution_gpu_bfyx_gemm_like",1],
+        "4754967381316623440": ["convolution_gpu_bfyx_gemm_like",2],
+        "5039037192630609823": ["convolution_gpu_bfyx_gemm_like",2],
+        "6428098122005804378": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "11069983292783104310": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "12917241193304093727": ["convolution_gpu_bfyx_gemm_like",2],
+        "2762489653422414995": ["convolution_gpu_bfyx_gemm_like",2],
+        "5010119207726811326": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "11031569203645035546": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12992061224471212714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "2373860353284525265": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "883436333317162926": ["convolution_gpu_bfyx_1x1",2],
+        "15967614281807823696": ["convolution_gpu_bfyx_gemm_like",2],
+        "17975017633455909321": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "438528596970898721": ["convolution_gpu_bfyx_gemm_like",1],
+        "9354818521586974021": ["convolution_gpu_bfyx_gemm_like",2],
+        "6708349666663292171": ["fully_connected_gpu_fb_oi_ref",2],
+        "2575631797904040925": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "5912303851874077576": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "1984152634309440563": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "7454366978268164047": ["convolution_gpu_bfyx_gemm_like",2],
+        "16425665058951535484": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "7780140599533242850": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3501882025888946886": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "3265415000818832667": ["convolution_gpu_bfyx_gemm_like",2],
+        "75742659105146536": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "14050124896329573468": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8655315308767111198": ["convolution_gpu_bfyx_1x1",2],
+        "7603319690872333930": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8761283252495354972": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "5965451243366505522": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5758133252959371492": ["convolution_gpu_bfyx_gemm_like",1],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",1],
+        "2567046336192437734": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4466647043226271996": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7818381040882768404": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "14025235562200209723": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1398177377739338750": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14671212883301405408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "8614534946699754256": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8700574100180128776": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "14483314305369207554": ["convolution_gpu_bfyx_1x1",2],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "15578456771467281881": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10178145641713631806": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9488453013746383896": ["convolution_gpu_bfyx_gemm_like",2],
+        "394778201589371681": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "10256831975351722184": ["convolution_gpu_bfyx_gemm_like",1],
+        "14054116974002669018": ["convolution_gpu_bfyx_1x1",2],
+        "14458851250685872417": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6577505360421510286": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "14151747022287993729": ["convolution_gpu_bfyx_gemm_like",2],
+        "6300691162962736560": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "3779229442395464456": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10765280349477640969": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "6254161707168091438": ["convolution_gpu_bfyx_gemm_like",2],
+        "1760391741350091665": ["convolution_gpu_bfyx_gemm_like",2],
+        "12353956380178079089": ["convolution_gpu_bfyx_gemm_like",2],
+        "11104393974242049153": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13038533272699602337": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "4091702228990140696": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4252157815622916471": ["convolution_gpu_bfyx_1x1",2],
+        "9918371346247634545": ["convolution_gpu_bfyx_os_iyx_osv16",184],
+        "5941298590926032148": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5156033406916344703": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12068797674575015662": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "13815395589135469450": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16781127329510211966": ["convolution_gpu_bfyx_gemm_like",2],
+        "4848143712599565301": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11845189428639322474": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "4021558014531645922": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "12900949103593247293": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7624259732952222597": ["convolution_gpu_bfyx_gemm_like",2],
+        "18221867262301937903": ["convolution_gpu_bfyx_1x1",1],
+        "16120988958246503683": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "3239033622277917802": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "4356817283284529593": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16871004845988227014": ["convolution_gpu_bfyx_1x1",2],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "2066731703492755469": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "15082818876354718849": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "3503893875515897267": ["convolution_gpu_bfyx_gemm_like",2],
+        "775538461106687677": ["fully_connected_gpu_fb_oi_ref",1],
+        "4429109491655891299": ["convolution_gpu_bfyx_gemm_like",1],
+        "14412158605670555579": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "7338229552985076723": ["convolution_gpu_bfyx_gemm_like",2],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "1963081583851864291": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9062774198518904260": ["convolution_gpu_bfyx_gemm_like",2],
+        "17015328096102652908": ["convolution_gpu_bfyx_gemm_like",1],
+        "11834361584875491425": ["convolution_gpu_bfyx_1x1",1],
+        "17647962002015093887": ["convolution_gpu_bfyx_gemm_like",2],
+        "708747442142592697": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "2321767794934000238": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "16911464046178654033": ["convolution_gpu_bfyx_1x1",2],
+        "18184621367843960190": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "17726079670612220433": ["convolution_gpu_bfyx_gemm_like",2],
+        "4769003637955328938": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13251091004269229867": ["convolution_gpu_bfyx_gemm_like",2],
+        "16772854836230971016": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "11759426200341586247": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "11756650366229979428": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8409488188696700816": ["convolution_gpu_bfyx_gemm_like",1],
+        "726985753660756762": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "6334639534663495263": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "13585163747565192884": ["convolution_gpu_bfyx_gemm_like",2],
+        "951747146164097188": ["convolution_gpu_bfyx_1x1",2],
+        "17224104246148265328": ["convolution_gpu_bfyx_gemm_like",2],
+        "15746620724134970969": ["convolution_gpu_bfyx_1x1",1],
+        "15489746763312425915": ["convolution_gpu_bfyx_gemm_like",2],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "11637325834858582585": ["convolution_gpu_bfyx_gemm_like",2],
+        "2964705957088952872": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "1626430741965136732": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "946479876892100082": ["convolution_gpu_bfyx_gemm_like",1],
+        "13970935346154374605": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "10016815108730511683": ["convolution_gpu_bfyx_gemm_like",1],
+        "7107677063657303327": ["convolution_gpu_bfyx_1x1",2],
+        "4734389463002799056": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16567638487719493784": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16395067736440127496": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "14031009077471784948": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6650607472019166205": ["convolution_gpu_bfyx_1x1",2],
+        "12348602762263193288": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17906607354577138153": ["convolution_gpu_bfyx_os_iyx_osv16",524],
+        "338716975932676215": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9378269524012289175": ["convolution_gpu_bfyx_gemm_like",2],
+        "7075659071934895087": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5145853681977610916": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "18135307303959376082": ["convolution_gpu_bfyx_gemm_like",2],
+        "738850098651678143": ["convolution_gpu_bfyx_os_iyx_osv16",542],
+        "9954050478761346921": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "7683334381958571864": ["convolution_gpu_bfyx_gemm_like",2],
+        "6294240435687565243": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "11657946392097042544": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6195916781434462809": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7208008921815475393": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7647236080048602591": ["convolution_gpu_bfyx_gemm_like",1],
+        "12625112690264223217": ["convolution_gpu_bfyx_gemm_like",2],
+        "6669808855737023569": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "12794369485239257709": ["convolution_gpu_bfyx_gemm_like",2],
+        "12590922530749026871": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "11337525286386930242": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "15528692642731712121": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6733731409232284409": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "15980348884716629349": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11499219760597131534": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "12868739680413736657": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "15555083739490354527": ["convolution_gpu_bfyx_gemm_like",2],
+        "13960388312976163971": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5352061583962489055": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11110173861174257158": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15059549186302099880": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "2598267743388306204": ["convolution_gpu_bfyx_gemm_like",2],
+        "8479958930889587809": ["fully_connected_gpu_yxfb_ref",1],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",1],
+        "11583985978586657985": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "12801481303602178879": ["convolution_gpu_bfyx_gemm_like",2],
+        "10722782762733112118": ["convolution_gpu_bfyx_1x1",2],
+        "3336076058264596420": ["convolution_gpu_bfyx_gemm_like",2],
+        "12866217660635921034": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1192279884248226739": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "8857763129101380288": ["convolution_gpu_bfyx_gemm_like",2],
+        "5479761740065152589": ["convolution_gpu_bfyx_gemm_like",2],
+        "7056030150365552588": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "597073780328219388": ["convolution_gpu_bfyx_gemm_like",2],
+        "17490188677223978661": ["convolution_gpu_bfyx_gemm_like",2],
+        "15003778740401601065": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9101334153142718004": ["convolution_gpu_bfyx_gemm_like",2],
+        "11799179287124317845": ["convolution_gpu_bfyx_gemm_like",1],
+        "10787747981914307179": ["convolution_gpu_bfyx_1x1",2],
+        "12071914115316550349": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4423866541063606768": ["convolution_gpu_bfyx_os_iyx_osv16",949],
+        "5770286476124511234": ["convolution_gpu_bfyx_gemm_like",1],
+        "4190912926126844643": ["convolution_gpu_bfyx_1x1",2],
+        "7995820969034996638": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "3833510944499257797": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "16910952799476896905": ["convolution_gpu_bfyx_gemm_like",2],
+        "4958835037528182801": ["convolution_gpu_bfyx_1x1",1],
+        "14502856487639608696": ["convolution_gpu_bfyx_gemm_like",2],
+        "15161053469199826008": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14973431782875808802": ["convolution_gpu_bfyx_gemm_like",2],
+        "9485825829394109934": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15235409162483701027": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "3286330985102373533": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "6403698142681887543": ["convolution_gpu_bfyx_gemm_like",2],
+        "18417288692814472127": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "9250410390663336388": ["convolution_gpu_bfyx_gemm_like",1],
+        "1518270620354036926": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "16511749893955141055": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11158789938857558596": ["convolution_gpu_bfyx_1x1",2],
+        "1972879521448306536": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2683304757433993300": ["convolution_gpu_bfyx_gemm_like",2],
+        "1680468564927032670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11025471731438443683": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "14444475853714164129": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1497127399271219422": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14206076551739831333": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11634932044447867039": ["convolution_gpu_bfyx_gemm_like",2],
+        "3870539490799697188": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "8794896449397768269": ["convolution_gpu_bfyx_gemm_like",2],
+        "16698547937652264447": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "12181889163404078773": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "9714508918051740792": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9452470718398027950": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "10751536136794650334": ["convolution_gpu_bfyx_gemm_like",2],
+        "11560634267092054110": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "17446505012657609153": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1470933384474984858": ["convolution_gpu_bfyx_1x1",2],
+        "11669828823444745889": ["convolution_gpu_bfyx_gemm_like",2],
+        "18035673326929466074": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14985236276429954162": ["convolution_gpu_bfyx_gemm_like",2],
+        "3662747857062156477": ["convolution_gpu_bfyx_gemm_like",2],
+        "11797601971796699898": ["convolution_gpu_bfyx_gemm_like",2],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",2],
+        "12061567381160185735": ["convolution_gpu_bfyx_1x1",1],
+        "9277610800970567810": ["convolution_gpu_bfyx_gemm_like",2],
+        "13468081302022888489": ["convolution_gpu_bfyx_gemm_like",2],
+        "17442105631503326136": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "14811603003184578943": ["convolution_gpu_bfyx_gemm_like",2],
+        "1082586642383386489": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "12207503176295152756": ["convolution_gpu_bfyx_1x1",2],
+        "14079654309452583394": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2294318010381635693": ["convolution_gpu_bfyx_gemm_like",2],
+        "8609939102588915855": ["convolution_gpu_bfyx_gemm_like",2],
+        "16958329690837977102": ["convolution_gpu_bfyx_gemm_like",2],
+        "17907223570737272640": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "10425889533411573166": ["convolution_gpu_bfyx_gemm_like",2],
+        "4228437925117070319": ["convolution_gpu_bfyx_1x1",2],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "5211831143687501130": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "8567667881970262923": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3118602494449249177": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "1082574490068006980": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "13646974121952099172": ["convolution_gpu_bfyx_gemm_like",2],
+        "8787438180071123604": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4933831571091731212": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "84595904778810418": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "4137755981477177003": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "2242829490403202087": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5558136691773431495": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "15109847707903824859": ["convolution_gpu_bfyx_1x1",2],
+        "16579057939215877904": ["convolution_gpu_bfyx_gemm_like",2],
+        "17109520309574369561": ["convolution_gpu_bfyx_gemm_like",2],
+        "10991423760161409883": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14349625788399542568": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1822096761703761792": ["convolution_gpu_bfyx_1x1",2],
+        "7333511810266504718": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "4168273493370024327": ["convolution_gpu_bfyx_1x1",1],
+        "4346591404756288097": ["convolution_gpu_bfyx_gemm_like",2],
+        "11936530628363072904": ["convolution_gpu_bfyx_gemm_like",1],
+        "11241838709529552265": ["convolution_gpu_bfyx_os_iyx_osv16",858],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",281],
+        "3220280315905987373": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "11587239927319376658": ["convolution_gpu_bfyx_gemm_like",2],
+        "7755177205197405275": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "85050336704401597": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4450409744922989123": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "18154019240019929225": ["convolution_gpu_bfyx_gemm_like",1],
+        "2128612971571865547": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "18218631037214746168": ["convolution_gpu_bfyx_gemm_like",2],
+        "17961702508543961900": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "1202292109713947702": ["convolution_gpu_bfyx_gemm_like",2],
+        "487214150851213303": ["convolution_gpu_bfyx_gemm_like",1],
+        "586947787345351152": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17225552472711821360": ["convolution_gpu_bfyx_os_iyx_osv16",946],
+        "7800015766976654402": ["convolution_gpu_bfyx_gemm_like",2],
+        "9726913113016874092": ["convolution_gpu_bfyx_gemm_like",2],
+        "7132328255408635227": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15961487889420208188": ["convolution_gpu_bfyx_gemm_like",2],
+        "12700372241799686527": ["convolution_gpu_bfyx_gemm_like",1],
+        "6362428985273506890": ["convolution_gpu_bfyx_1x1",2],
+        "5940337324384948573": ["convolution_gpu_bfyx_gemm_like",2],
+        "16293101831324587788": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9524303276541517389": ["convolution_gpu_bfyx_gemm_like",2],
+        "970768445746568749": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "15757308772667178999": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1120455113299469776": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "4403753181729432604": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7113777272518482528": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "15078168059698267650": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3106911159524421371": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "3477539135137665170": ["convolution_gpu_bfyx_gemm_like",2],
+        "1425953627379976115": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4313392430539923574": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "8040001390872143271": ["convolution_gpu_bfyx_gemm_like",2],
+        "7977195117668583981": ["convolution_gpu_bfyx_gemm_like",2],
+        "15428591250165788477": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "11055049031355432623": ["convolution_gpu_bfyx_gemm_like",2],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3341302541468955849": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17310409067211414565": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "11768117585574496387": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "14805540705424073865": ["convolution_gpu_bfyx_gemm_like",2],
+        "11939914680143672459": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "15879172437519876393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9700808806849459216": ["convolution_gpu_bfyx_1x1",2],
+        "3737576893817599311": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "12802517759474139810": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3806761527342944195": ["convolution_gpu_bfyx_gemm_like",2],
+        "939718260623752240": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "952318454591754214": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "16441830491664937048": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "15643135666029727865": ["convolution_gpu_bfyx_gemm_like",2],
+        "13503555814874045782": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "11450378244355788918": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "5740745357953479527": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "12370729327673204804": ["convolution_gpu_bfyx_gemm_like",2],
+        "15192230303376521834": ["convolution_gpu_bfyx_os_iyx_osv16",863],
+        "998876398773540321": ["convolution_gpu_bfyx_1x1",1],
+        "14532519639619315651": ["convolution_gpu_bfyx_gemm_like",2],
+        "938222258370511187": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12776081190690731910": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "13609660900720370993": ["convolution_gpu_bfyx_1x1",1],
+        "12031180482028822765": ["convolution_gpu_bfyx_gemm_like",0],
+        "13735180250757239202": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "10572945270796129630": ["fully_connected_gpu_fb_io_ref",1],
+        "15602218079503030465": ["convolution_gpu_bfyx_gemm_like",2],
+        "9999425239167488495": ["convolution_gpu_bfyx_gemm_like",1],
+        "1040650352205493707": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "17318287523550546026": ["convolution_gpu_bfyx_gemm_like",2],
+        "15829095120243431195": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "8690196189594920365": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "18255227391100087860": ["convolution_gpu_bfyx_1x1",2],
+        "4465701487417893814": ["convolution_gpu_bfyx_gemm_like",2],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "14122213471825630433": ["convolution_gpu_bfyx_gemm_like",2],
+        "9999425239167488495": ["convolution_gpu_bfyx_gemm_like",1],
+        "16511393582666965704": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "11856815095538913065": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18209930746627816139": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7780336054545552428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7201521533301617290": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12417253210787537988": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9947449295659685973": ["convolution_gpu_bfyx_gemm_like",1],
+        "3211956138512889433": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6944031900067948180": ["convolution_gpu_yxfb_yxio_b16",0],
+        "5449117614287394433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "87031578643428011": ["convolution_gpu_bfyx_1x1",2],
+        "4833749391314748606": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8450272092307894299": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15209909241815414156": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "13325762052023866627": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3409043224171087168": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "15773157615731010456": ["convolution_gpu_bfyx_gemm_like",2],
+        "6666210546769702280": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "11254744277059719812": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10309586646776223605": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9530116228032101908": ["convolution_gpu_bfyx_1x1",1],
+        "12151068022697708126": ["convolution_gpu_bfyx_gemm_like",2],
+        "6464050901421037006": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12550985938092975889": ["convolution_gpu_bfyx_1x1",2],
+        "3680396164645753224": ["convolution_gpu_yxfb_yxio_b16",0],
+        "6403698142681887543": ["convolution_gpu_bfyx_gemm_like",2],
+        "12351866693978844266": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "15705908639736679687": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15281554100135159550": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16238415425814188039": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "7226002258982605405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12397280593466519809": ["convolution_gpu_bfyx_gemm_like",2],
+        "2527018855890902975": ["convolution_gpu_bfyx_gemm_like",2],
+        "8555049634736330391": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3107655421406621915": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14754849694687093032": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15065019229949449623": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17036482252028102703": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "14304497513584420080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11451740938287179908": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "7498614018449036163": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "12221101678609734421": ["convolution_gpu_yxfb_yxio_b16",2],
+        "560996739186313493": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16542318967217020315": ["convolution_gpu_bfyx_gemm_like",2],
+        "17975017633455909321": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "15161053469199826008": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1223196405651730260": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7889602687414497280": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13160712904661288567": ["convolution_gpu_bfyx_1x1",1],
+        "17178308105985812083": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17742192339816511494": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "13082313288887957490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1972879521448306536": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2967481531952454828": ["convolution_gpu_bfyx_os_iyx_osv16",863],
+        "11330591026581463934": ["convolution_gpu_bfyx_gemm_like",2],
+        "6318214731544748245": ["convolution_gpu_bfyx_gemm_like",2],
+        "5899560521070338192": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1351033666248868977": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "17192352762166764393": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2487679091192300910": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14126906427006602775": ["convolution_gpu_bfyx_1x1",2],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",1032],
+        "15504618703544589723": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7274179284676568361": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12867038076564517306": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11361202190524990711": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "4903592553439092472": ["convolution_gpu_bfyx_os_iyx_osv16",147],
+        "15231987838322151865": ["convolution_gpu_bfyx_1x1",2],
+        "15897300973213364823": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "2412846055735335136": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "8339704352841356825": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14667793472412360981": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16312739695844838884": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11861634536583463947": ["convolution_gpu_bfyx_os_iyx_osv16",51],
+        "13425251102263428554": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "216603198215625772": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12714814165247623529": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9131183544020825260": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "17948637243158994878": ["convolution_gpu_bfyx_gemm_like",2],
+        "3463959257726925426": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "16883372966656079608": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5274929595362413625": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2242829490403202087": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6832967250168141428": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13454265023861566476": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "4242173940230902960": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3699344686791530101": ["convolution_gpu_bfyx_gemm_like",2],
+        "11342135956789192833": ["convolution_gpu_bfyx_os_iyx_osv16",1098],
+        "10850369799801518638": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4283886984540574108": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12308956927236847009": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1933147648540963732": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17015328096102652908": ["convolution_gpu_bfyx_gemm_like",1],
+        "10747988576436391912": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "6784853321527374515": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "9363988379673156863": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13170441257780067955": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "18122858611264877646": ["convolution_gpu_bfyx_gemm_like",2],
+        "6727930402459775131": ["convolution_gpu_bfyx_gemm_like",2],
+        "15726902746983125797": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "5328004363712610999": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10429104188258277773": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12467673564660108244": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "12725675221990905186": ["convolution_gpu_bfyx_gemm_like",2],
+        "5538883245745495145": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "13883044928774243663": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "13474805373264874144": ["convolution_gpu_bfyx_1x1",2],
+        "2048528188026477374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3797986765970777456": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2148877522799179369": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14085753024976995311": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18431306649860116380": ["convolution_gpu_bfyx_gemm_like",2],
+        "15800554162607246964": ["convolution_gpu_bfyx_gemm_like",1],
+        "17258128299721452811": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6214624887470295152": ["convolution_gpu_bfyx_1x1",1],
+        "13842149852156451845": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7482459536338668149": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1786821683911142459": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "12348602762263193288": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5436553435132026991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14677968346503677769": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8655315308767111198": ["convolution_gpu_bfyx_1x1",2],
+        "932195814187889636": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13106818352216009354": ["convolution_gpu_bfyx_gemm_like",2],
+        "12933253554354951910": ["convolution_gpu_bfyx_gemm_like",2],
+        "2945245652128285151": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "7330202944390548890": ["convolution_gpu_bfyx_gemm_like",1],
+        "1697260854781788314": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10816702874143297564": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2884499360870038648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16588325081458426169": ["convolution_gpu_bfyx_gemm_like",2],
+        "6400660469217490279": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16293465561256937726": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "14165325329016075285": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "16912738776771289379": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13702254392810961772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7333511810266504718": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "5276029719268937229": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "7822463130304602936": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8065866013404161366": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5509395737020858006": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "968092788032627444": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4617809377006148936": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17922279129043570176": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "2531597468539205600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3287181725010492879": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8577875628223148806": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "7407975398526425554": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "6764038061921866053": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3061372669831947873": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15604634351310647589": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10803929517111130153": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "5040095338370816349": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3176785355296130660": ["convolution_gpu_bfyx_gemm_like",2],
+        "12810833895438895155": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4154403364889130045": ["convolution_gpu_bfyx_gemm_like",2],
+        "14034402827496819479": ["convolution_gpu_bfyx_gemm_like",2],
+        "16794854619854992714": ["convolution_gpu_yxfb_yxio_b16",1],
+        "84595904778810418": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "905526102343710614": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "7463517383354309469": ["convolution_gpu_bfyx_gemm_like",0],
+        "6870942166356599956": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "4135975804549022456": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6791806088355877039": ["convolution_gpu_bfyx_gemm_like",1],
+        "17990326690659802090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10883992248631603006": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "2328919599530851492": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3602929955785812025": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10100171358681249181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11184290482439221741": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "1040650352205493707": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "16081386644309102158": ["convolution_gpu_bfyx_gemm_like",2],
+        "9433162648796382333": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9542325095876448686": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",2],
+        "3117175697326325371": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "17025182465337728023": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "1641111108888949123": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15871357525719630224": ["convolution_gpu_bfyx_1x1",1],
+        "10598099730944525581": ["fully_connected_gpu_fb_io_b8_f8_vload",1],
+        "5099947445888268507": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15958886009743157242": ["convolution_gpu_bfyx_gemm_like",2],
+        "5551484040302194648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2014114949154914483": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10682300249493137042": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "2095245727814188300": ["convolution_gpu_bfyx_gemm_like",2],
+        "1692473411043262397": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "11563892089503603030": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11164519756679631743": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17789969008677638142": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1556975727728498645": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5593329151028712439": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "3116068331849795558": ["convolution_gpu_bfyx_gemm_like",2],
+        "1963081583851864291": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12308895602001600327": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "741727668385951462": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5884951148427535208": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8976966933427522253": ["convolution_gpu_bfyx_gemm_like",2],
+        "6318228858846223186": ["convolution_gpu_bfyx_1x1",1],
+        "15486917753097743853": ["convolution_gpu_bfyx_1x1",2],
+        "18094205332383644037": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "7084646429975006971": ["convolution_gpu_bfyx_1x1",2],
+        "16694984452720336415": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12177387334053203378": ["convolution_gpu_bfyx_gemm_like",2],
+        "10141558851476164734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2727175120437582536": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8390889357546397717": ["convolution_gpu_bfyx_1x1",1],
+        "16915857558806082023": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "17181874388601550941": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8093401822846123153": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8494725779002762049": ["convolution_gpu_bfyx_gemm_like",2],
+        "4165926748138587705": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13856271274572142709": ["convolution_gpu_bfyx_gemm_like",1],
+        "7900926714874404219": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "5911282942658469852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "762634810164167963": ["convolution_gpu_yxfb_yxio_b16",0],
+        "1154763947184432124": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8686733586982652897": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9065137335863605013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7203620615363933078": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "12806934028210472719": ["convolution_gpu_bfyx_gemm_like",2],
+        "2459018025887933198": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14128122558476128712": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3141773224039276177": ["convolution_gpu_bfyx_1x1",2],
+        "18017913952946745878": ["convolution_gpu_bfyx_gemm_like",2],
+        "12864204111424196179": ["convolution_gpu_bfyx_1x1",2],
+        "4744578087509837185": ["convolution_gpu_yxfb_yxio_b16",0],
+        "8407012082034007985": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "5928392400230917930": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6744692937598310090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16437093737761968743": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "959260710517842876": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "10865695385270390803": ["convolution_gpu_bfyx_os_iyx_osv16",642],
+        "14675165976583799157": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8353259929933281349": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "3364467044587904559": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3385797925880519845": ["convolution_gpu_bfyx_1x1",2],
+        "15924583510704449214": ["convolution_gpu_bfyx_gemm_like",1],
+        "3350601287664242323": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8168240543278779314": ["convolution_gpu_bfyx_1x1",1],
+        "4400247897123856252": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "11175353869874626110": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9751582946441607796": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "760687670112194844": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "757225477250808939": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16027456210394993913": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16516262096533373158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16052741298509954954": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9585113116232600562": ["convolution_gpu_bfyx_gemm_like",1],
+        "7008873036126556197": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3286330985102373533": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4137738705782981426": ["convolution_gpu_bfyx_gemm_like",2],
+        "6484375582324852109": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "8954488655859677891": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14058311587429063829": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8952733400567254769": ["convolution_gpu_bfyx_gemm_like",2],
+        "3022939690177474442": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7748514992101811029": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4571404165794634411": ["convolution_gpu_bfyx_1x1",2],
+        "16084700435355748612": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "16788162879714733906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16862145184923128012": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7923576965630818418": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7650375560336513366": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "15641322340289892344": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "4959403414256988744": ["convolution_gpu_bfyx_gemm_like",1],
+        "12802517759474139810": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4863644213728386734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12995903177757437362": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "16016396784190934729": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14120569486714455490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2180039710632160943": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3396731547696204011": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1427040855295681285": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8059328623525062913": ["convolution_gpu_bfyx_gemm_like",2],
+        "10183537720515608": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7425369489110576363": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14830991971271385876": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "12024143207855886580": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "13957350536347764705": ["convolution_gpu_bfyx_gemm_like",2],
+        "11055049031355432623": ["convolution_gpu_bfyx_gemm_like",2],
+        "11768117585574496387": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "7972861956906521660": ["convolution_gpu_yxfb_yxio_b16",2],
+        "541817615957967731": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "7060804814325505165": ["convolution_gpu_bfyx_gemm_like",2],
+        "14366395926517590797": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14447191095937730964": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12818786388125465101": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9040145293899470160": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "16247799703932868151": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17906607354577138153": ["convolution_gpu_bfyx_os_iyx_osv16",524],
+        "1336940384521633733": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "16582761411084080015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1157069349112113377": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16836088134347394854": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14973431782875808802": ["convolution_gpu_bfyx_gemm_like",2],
+        "12421204749289937399": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9869959062341950047": ["convolution_gpu_bfyx_1x1",2],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "14699357144600604190": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13767985623872409391": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17386047378634216634": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2245166025103475783": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "8193369947544085921": ["convolution_gpu_bfyx_gemm_like",2],
+        "14131851237755716991": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "6284333183047854748": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3236003754884728510": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "10565371760124443824": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4121535611334103359": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4165019140664090799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15397084091361096354": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "1920042803083729276": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "13387545865482261974": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "12722153168975105360": ["convolution_gpu_yxfb_yxio_b16",2],
+        "150132162949295379": ["convolution_gpu_bfyx_1x1",2],
+        "15101834579076569231": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12166852830214895457": ["convolution_gpu_bfyx_1x1",2],
+        "1944461047787586724": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6875055157295709098": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6887205509732544213": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16738951239219589307": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "9378269524012289175": ["convolution_gpu_bfyx_gemm_like",2],
+        "16889886654893884746": ["convolution_gpu_bfyx_1x1",2],
+        "12700372241799686527": ["convolution_gpu_bfyx_gemm_like",1],
+        "12936220888307335332": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "3793265335909270748": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11626398907755088688": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12567935463143860469": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5155616842071169667": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17381516856910544374": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "6942016672941874829": ["convolution_gpu_bfyx_gemm_like",2],
+        "7719954202744123391": ["convolution_gpu_bfyx_gemm_like",2],
+        "3316798708399098230": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "2789901295967374316": ["convolution_gpu_yxfb_yxio_b16",2],
+        "188830358699960789": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8690196189594920365": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "10187930930336324253": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "9193880745263317167": ["convolution_gpu_bfyx_gemm_like",2],
+        "15012885932988454455": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16384186388687043048": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "12179968379663737450": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17096735128393723245": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10424643336435622408": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "18426893729833771809": ["convolution_gpu_bfyx_1x1",2],
+        "2783577080556699089": ["convolution_gpu_bfyx_gemm_like",1],
+        "503369896500284129": ["convolution_gpu_bfyx_1x1",2],
+        "7432142107544210174": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "11975047184326016230": ["convolution_gpu_bfyx_gemm_like",2],
+        "18142462471803295391": ["convolution_gpu_bfyx_1x1",1],
+        "9955939178447682108": ["convolution_gpu_bfyx_1x1",2],
+        "16577611471466452776": ["convolution_gpu_bfyx_gemm_like",2],
+        "18080788888293706149": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4674504221851042542": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9401409770128851474": ["convolution_gpu_bfyx_gemm_like",0],
+        "14123081378489325832": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "9414927552739380436": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11083993858285515074": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "4089043893927493060": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15822546325822628634": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "17434141039341226796": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2625969259447793593": ["convolution_gpu_bfyx_1x1",2],
+        "6109013751635776331": ["convolution_gpu_bfyx_gemm_like",2],
+        "597650904461183283": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16214394186337220006": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2495655464941634884": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "4513063773753763458": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "16339187733937346919": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15814015810740458605": ["convolution_gpu_bfyx_1x1",2],
+        "2314579504260247470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8092673566670222445": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13302687772426736346": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "11893541520830049036": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14686272582436109012": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6934915634718835911": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5293502980575652171": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9190054801124577726": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14268594692585922659": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "18279927175542031567": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4755225554035527185": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17806712457019493207": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "9328585005923667676": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "9541996065561509160": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3346891393420268502": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "16986358655784856534": ["convolution_gpu_bfyx_gemm_like",2],
+        "14216513246096503793": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17446505012657609153": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16434358667865869005": ["convolution_gpu_yxfb_yxio_b16",2],
+        "805131056816361237": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "6195916781434462809": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6548949901446632697": ["convolution_gpu_bfyx_1x1",2],
+        "7155796826953849982": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11545529736818363243": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10613621801998459768": ["convolution_gpu_yxfb_yxio_b16",2],
+        "42935035304560876": ["convolution_gpu_yxfb_yxio_b16",1],
+        "185782385623159958": ["convolution_gpu_bfyx_gemm_like",2],
+        "11418379777288974452": ["convolution_gpu_bfyx_gemm_like",2],
+        "75742659105146536": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "2542112741645712811": ["fully_connected_gpu_fb_io_b8_f8_vload",1],
+        "18082422341304348326": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "7322472892320910654": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "2714322766616035858": ["convolution_gpu_yxfb_yxio_b16",2],
+        "166437837813304707": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5832851215142537445": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14883438809987378616": ["convolution_gpu_bfyx_1x1",1],
+        "9120374653477510318": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12510951219501865365": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "12536364199388193516": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "15457040168177954463": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3019864917236424168": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16974981142389546385": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17167229341919111718": ["convolution_gpu_bfyx_gemm_like",2],
+        "14043064718932538557": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9812438080378091263": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14091610802555875119": ["convolution_gpu_bfyx_gemm_like",1],
+        "5995121118186531621": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9177211394807412309": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9144269202766996508": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9803492989444302959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18186612931984342471": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11999246609107242706": ["convolution_gpu_bfyx_gemm_like",2],
+        "14811603003184578943": ["convolution_gpu_bfyx_gemm_like",2],
+        "16101625311127899143": ["convolution_gpu_bfyx_gemm_like",2],
+        "10514865654990433040": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6931953332823066530": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "18120079746729314878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13006774775034887171": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "5774841809066688068": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "18435632962969462312": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14799012895945855878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8095675456938934982": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8561261337239934159": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12887076860522920405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16548491024653039967": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "12275528180752359999": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15199604820473713622": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "4890043345392707202": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "3894130445933963911": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10041205516209288381": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "7447163906170805189": ["convolution_gpu_bfyx_os_iyx_osv16",573],
+        "14884315147107686805": ["convolution_gpu_bfyx_gemm_like",1],
+        "8490260671996115530": ["convolution_gpu_bfyx_gemm_like",1],
+        "7667898603371717971": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10775271979871646995": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",2],
+        "6143200133853000387": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "13804221028705631415": ["convolution_gpu_bfyx_gemm_like",2],
+        "15082818876354718849": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "768820004084041271": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16014822406751503249": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "4889188980319017094": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "8161520217142313996": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8542782888102516498": ["convolution_gpu_yxfb_yxio_b16",2],
+        "967141158966448909": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7171904645566467208": ["convolution_gpu_bfyx_gemm_like",2],
+        "1634884284544380004": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9939234037869927090": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "10226095100825845185": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "16271675466919087248": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7839141505912665157": ["fully_connected_gpu_fb_oi_ref",1],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "13683623172740048376": ["convolution_gpu_bfyx_gemm_like",2],
+        "13991572769793610416": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "9954050478761346921": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "4450409744922989123": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "938222258370511187": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2058172559199858297": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "13776178598632392721": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "13357365044448426880": ["convolution_gpu_bfyx_1x1",2],
+        "7737977992444172757": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16911464046178654033": ["convolution_gpu_bfyx_1x1",2],
+        "4769003637955328938": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1117836569328440439": ["convolution_gpu_yxfb_yxio_b16",2],
+        "338716975932676215": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9399511839804500548": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11002165738333323413": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14230493618724018658": ["convolution_gpu_bfyx_gemm_like",2],
+        "7992077349568239994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14345755557418971954": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7132328255408635227": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17830290099875088207": ["convolution_gpu_bfyx_gemm_like",2],
+        "7450417963648518926": ["convolution_gpu_bfyx_gemm_like",2],
+        "132437164570900392": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8865700182878875593": ["convolution_gpu_yxfb_yxio_b16",2],
+        "466744273945239777": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10785966734346479177": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "1117729599102132243": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17025324057045572535": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1398177377739338750": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8348997431940166878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15035800097152337587": ["convolution_gpu_bfyx_gemm_like",2],
+        "13586735166545634506": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1914964404168211864": ["convolution_gpu_bfyx_gemm_like",2],
+        "10211403590176354415": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "276313536076170391": ["convolution_gpu_bfyx_gemm_like",2],
+        "15879172437519876393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17318287523550546026": ["convolution_gpu_bfyx_gemm_like",2],
+        "15596408854298291433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13042938686374926241": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11828175723996627443": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7375461241315602473": ["convolution_gpu_bfyx_gemm_like",2],
+        "14346466672686303107": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "9456645866001656225": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17970424536559595893": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14805540705424073865": ["convolution_gpu_bfyx_gemm_like",2],
+        "7614673554809134631": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7104309382120208659": ["convolution_gpu_bfyx_gemm_like",2],
+        "16851716501872033211": ["fully_connected_gpu_fb_io_block_fp16",1],
+        "15594673952484539994": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "6141193842171342687": ["convolution_gpu_yxfb_yxio_b16",2],
+        "818998169319147148": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13893808009363736870": ["convolution_gpu_bfyx_gemm_like",2],
+        "9091110033424983286": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "7894230717547658326": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15522099459864628246": ["convolution_gpu_bfyx_gemm_like",2],
+        "2844794465598309010": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "17583785768334531086": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11685571068419983048": ["convolution_gpu_bfyx_1x1",2],
+        "10016815108730511683": ["convolution_gpu_bfyx_gemm_like",1],
+        "1905758333157310570": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14025678657541870252": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8354579049246302728": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "18091349188280218186": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15974208269240775349": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11152334947349565403": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4367991456894497706": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "13833960927635646899": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13700014916680753395": ["convolution_gpu_bfyx_gemm_like",2],
+        "15070618248849566698": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1518270620354036926": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "3118602494449249177": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "778476198101178556": ["convolution_gpu_bfyx_gemm_like",1],
+        "17219920118109316867": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "3220280315905987373": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "378801963103874857": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "18427056032084727710": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1135062632388082485": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "16870110185980402237": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10704906466618081803": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8857763129101380288": ["convolution_gpu_bfyx_gemm_like",2],
+        "1775515808301276388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14147460733160099960": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4824040283449153298": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "5606914392662771013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17746215841755337461": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9232653317479846765": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "16617569629839911513": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18059267466971880386": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6942606834115081953": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3737576893817599311": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "9616636708366808604": ["convolution_gpu_bfyx_gemm_like",2],
+        "7474639594232203854": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "9043982883185435219": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "4588117321438490483": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4342446399224806160": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "11910735867274493498": ["convolution_gpu_bfyx_gemm_like",2],
+        "1281190653081960886": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9099056013518879466": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "12686330321897091505": ["convolution_gpu_bfyx_gemm_like",2],
+        "8731079912830889828": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15739756988784344130": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4121109463284708890": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "6845814820599174031": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "2929715823970060874": ["convolution_gpu_bfyx_gemm_like",1],
+        "9463256538942644563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10425889533411573166": ["convolution_gpu_bfyx_gemm_like",2],
+        "9131235538209388787": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "3950738240651133849": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "13512059751838488458": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10820312036555742020": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "6055793483770886264": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1330337530094825121": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6254141935545262078": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4914435717288687793": ["convolution_gpu_bfyx_1x1",1],
+        "234288286732396704": ["convolution_gpu_yxfb_yxio_b16",1],
+        "467070383257529689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6654167459904026563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10782611933832492335": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "10979362792894404338": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "11693134363909241514": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1921500066107090648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14114380593731243715": ["convolution_gpu_bfyx_os_iyx_osv16",167],
+        "3117673619907511009": ["convolution_gpu_bfyx_os_iyx_osv16",487],
+        "6428098122005804378": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "17081449111821382308": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17536308070854915513": ["convolution_gpu_bfyx_1x1",2],
+        "13123561937554734618": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1290180607037086383": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13011676362747785816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6398819277350155011": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "13022797264172398260": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7548031489690889629": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "3986429358782189117": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17490188677223978661": ["convolution_gpu_bfyx_gemm_like",2],
+        "14244689429217411113": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4006884370026272807": ["convolution_gpu_bfyx_gemm_like",2],
+        "9101334153142718004": ["convolution_gpu_bfyx_gemm_like",2],
+        "15589007878875898942": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "6546440095044731932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4773123925616969670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10747101719272611563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10019470094545733255": ["convolution_gpu_bfyx_gemm_like",2],
+        "16441830491664937048": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "13488495920546871271": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "2917999294360728537": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3948843501884284998": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13124659308711651699": ["convolution_gpu_bfyx_gemm_like",2],
+        "2119566651547512543": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1318571118468536310": ["convolution_gpu_bfyx_gemm_like",2],
+        "9545968464906009869": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "4678607855896512523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14458851250685872417": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "11455055202624479980": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5285172225938230524": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11669828823444745889": ["convolution_gpu_bfyx_gemm_like",2],
+        "12184235281888559274": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5257134257307295031": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17085927772068621152": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1359720957005310113": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12494969618927201911": ["fully_connected_gpu_yxfb_ref",2],
+        "16818206615424635387": ["convolution_gpu_bfyx_1x1",1],
+        "13853630125050609175": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "11679235499894668689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8655739705298627602": ["convolution_gpu_bfyx_gemm_like",0],
+        "10861769381993948050": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "5240706676373148280": ["convolution_gpu_bfyx_gemm_like",2],
+        "3325575565536567070": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1074748462756364699": ["fully_connected_gpu_fb_oi_ref",1],
+        "6926590672771069689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9603926867418680768": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9412392168031560549": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16003914811215141863": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8956566633622104099": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9277176009071334860": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "7578177053220150569": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "1040030752340209480": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "574869992355132069": ["convolution_gpu_bfyx_gemm_like",2],
+        "18041177945345031826": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "12933785392937626017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16316483048621486077": ["convolution_gpu_bfyx_gemm_like",2],
+        "2609454334520044465": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "14248622935809594779": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5042176052323856983": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14752182392048929103": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6744044115114192916": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16385915289511951113": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17442105631503326136": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "5017701748886087836": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "10878198256414940305": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12287667143602938393": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "697333686114567307": ["convolution_gpu_bfyx_gemm_like",2],
+        "10113696658040720628": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16013560489115457872": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9741607635826869269": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12023260267201191955": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4776446300552810228": ["convolution_gpu_bfyx_gemm_like",0],
+        "15329680728165965773": ["convolution_gpu_bfyx_gemm_like",2],
+        "11862259122805366807": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "4809191606466167229": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10148067979123062638": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17711453305763476458": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "16072525303202287969": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17037416417174266088": ["convolution_gpu_bfyx_gemm_like",1],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",0],
+        "6822432085522584060": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8791285622784082122": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "3372770576629463160": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "7715937239456300593": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "3573490922300056520": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2321773209766424929": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12710794174926396540": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3226193790517362610": ["convolution_gpu_bfyx_1x1",2],
+        "13815395589135469450": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8466986812935642059": ["convolution_gpu_bfyx_os_iyx_osv16",278],
+        "3501667344669686338": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8159303545761286685": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "1786105567361070086": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12877601016766418505": ["convolution_gpu_bfyx_gemm_like",2],
+        "12241130380766920378": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7837876599690110056": ["convolution_gpu_bfyx_gemm_like",2],
+        "17536482873064844308": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "3024402899381804809": ["convolution_gpu_bfyx_1x1",2],
+        "632116056424249698": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12146979849998627283": ["convolution_gpu_bfyx_gemm_like",2],
+        "824911124897042617": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8709632541892447149": ["convolution_gpu_bfyx_gemm_like",2],
+        "12134858519320245809": ["convolution_gpu_bfyx_1x1",2],
+        "2835909063063272102": ["convolution_gpu_bfyx_gemm_like",2],
+        "4664983769199548480": ["convolution_gpu_bfyx_1x1",1],
+        "5364060938737428149": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",2],
+        "12676167240795292217": ["convolution_gpu_bfyx_gemm_like",1],
+        "3101748967012684440": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8837721075413149240": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12287827551127082597": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14813178380338948912": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "875146113874776902": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7005509036795164602": ["convolution_gpu_bfyx_1x1",2],
+        "1157388265135592238": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11096750581455917678": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10325138269934303618": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14359026450472189405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "487214150851213303": ["convolution_gpu_bfyx_gemm_like",1],
+        "14795618530175274538": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "6825390996679224270": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15690161340392005765": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9657324846330221372": ["convolution_gpu_bfyx_1x1",2],
+        "14963614790718019676": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11775265110573621330": ["convolution_gpu_bfyx_os_iyx_osv16",301],
+        "7552544688541855979": ["convolution_gpu_bfyx_gemm_like",2],
+        "15078590909693331731": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12081698011407453832": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5968129546023764583": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3190494353583341446": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5567628205735744449": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11705756153433897198": ["convolution_gpu_bfyx_1x1",2],
+        "10693837788817206459": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17264671167892237524": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "3759515057574218101": ["convolution_gpu_bfyx_gemm_like",2],
+        "8645965165922150743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16616945998593626851": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6805188858008657978": ["convolution_gpu_bfyx_gemm_like",2],
+        "16901594465545439334": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "586947787345351152": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7541325258238317885": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "3965327578193694832": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3742751561273931407": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12469992822259989528": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17292751972745231011": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3105425187506203551": ["convolution_gpu_bfyx_1x1",2],
+        "3735605582512535278": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15991460001131903561": ["convolution_gpu_bfyx_gemm_like",2],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "16566128345135114558": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "2449586975250543578": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13701870576531008278": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6699877220571254719": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11104393974242049153": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5079381702867378605": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18275601715050791851": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "6509758095668864050": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "14910911338105922048": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10528894716283673051": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12793347723828876280": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15489746763312425915": ["convolution_gpu_bfyx_gemm_like",2],
+        "10399620940700804517": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11694428890484758107": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15666720796968090760": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7770000755097925765": ["convolution_gpu_bfyx_1x1",2],
+        "3419536918610303807": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18446245971488003004": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "13927671398099556854": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8040001390872143271": ["convolution_gpu_bfyx_gemm_like",2],
+        "1880137091477870982": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9076758673133996959": ["convolution_gpu_bfyx_gemm_like",2],
+        "8302886228681027388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7292351660229751817": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "7474592508575297101": ["convolution_gpu_bfyx_1x1",2],
+        "17996535939348094624": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4039483032571506874": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "2622434279674583815": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "15289152041466330689": ["convolution_gpu_bfyx_gemm_like",2],
+        "2191416057399400794": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2608363732937932266": ["convolution_gpu_bfyx_gemm_like",2],
+        "396580837423299119": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "12024817951074673335": ["convolution_gpu_bfyx_1x1",1],
+        "10570285542015420072": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "14031009077471784948": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4116610956045302817": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15476491807306982382": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "6102330514901613158": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9208964785762052001": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "16898785030254336705": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16094455700371652312": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1778345646142852816": ["convolution_gpu_bfyx_gemm_like",2],
+        "16936968151775497887": ["convolution_gpu_bfyx_gemm_like",2],
+        "11308583200952256245": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16188473537674428539": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6756771670011959646": ["convolution_gpu_bfyx_gemm_like",2],
+        "2983038203471784211": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "4355933224673863178": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7051238664181857633": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "3571030800252732358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11065709388908213457": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17512961503976896701": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "856877003890134554": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "12531580106484042446": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "5924341622384096919": ["convolution_gpu_bfyx_gemm_like",2],
+        "13821224753538037982": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "5595779343671478945": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3220756134650041028": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11824946481875102910": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14044732537191084187": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7708321360699824256": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9647916259092117712": ["convolution_gpu_bfyx_gemm_like",2],
+        "7008509833947166548": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1099404514975797315": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3114869763557037270": ["fully_connected_gpu_fb_oi_ref",1],
+        "18126685473408206840": ["convolution_gpu_bfyx_os_iyx_osv16",526],
+        "6427979320488981912": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5312269140190538942": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16813995580382709489": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9606639214735570069": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "4090512597925170883": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6664432489777052771": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "12068797674575015662": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "7585777271711713778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6721354194352192662": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14771341796915983228": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18416908414174464784": ["convolution_gpu_bfyx_gemm_like",2],
+        "15956352026642286295": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9312974578711092131": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17921973525603585874": ["convolution_gpu_bfyx_gemm_like",2],
+        "1208161922424418734": ["convolution_gpu_bfyx_gemm_like",2],
+        "12305397676800089268": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7056293586529818253": ["convolution_gpu_bfyx_gemm_like",2],
+        "17082268616134506581": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14421898375873029115": ["convolution_gpu_bfyx_1x1",2],
+        "17955326503130437346": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "13962325395021860937": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16589848737162195829": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11497761673211348612": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17961702508543961900": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "5938850739683493929": ["convolution_gpu_yxfb_yxio_b16",0],
+        "14343008518525689150": ["convolution_gpu_bfyx_1x1",2],
+        "15188273255634848057": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1413558157882728476": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16770615142634470903": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "16397733032387984819": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "17889864541794448203": ["convolution_gpu_bfyx_1x1",1],
+        "8459380583159325597": ["convolution_gpu_yxfb_yxio_b16",1],
+        "991586070509079617": ["convolution_gpu_bfyx_gemm_like",0],
+        "4718716595177056289": ["convolution_gpu_bfyx_os_iyx_osv16",986],
+        "12107079280128343726": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3217295012596892181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12926382190254407283": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8543619733732987550": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1787152688807233651": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4084026445911476156": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "18270587701371596297": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "16677044352793659175": ["convolution_gpu_bfyx_gemm_like",1],
+        "4021045600853993587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9654944848074437064": ["convolution_gpu_bfyx_gemm_like",2],
+        "15635250842093678965": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9418041909134721047": ["convolution_gpu_bfyx_gemm_like",2],
+        "2031558560788449957": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14807466024030301968": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14135593723444205032": ["convolution_gpu_bfyx_gemm_like",2],
+        "14646141746558153748": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5583453364991774426": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11436473937404565094": ["convolution_gpu_yxfb_yxio_b16",0],
+        "11719957578496407410": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "462240909302334133": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5912303851874077576": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "1042605521041579458": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10130171279527667782": ["convolution_gpu_bfyx_gemm_like",1],
+        "13193898459027972719": ["convolution_gpu_yxfb_yxio_b16",0],
+        "1192279884248226739": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "12780116250427776647": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1436052878894538927": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "7949069388917479511": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18161971781834208343": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "16404362308829952450": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7113777272518482528": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "18132952464279667664": ["convolution_gpu_bfyx_1x1",2],
+        "3301356450249305137": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9589361786336650748": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11807282628372660280": ["convolution_gpu_bfyx_1x1",2],
+        "16953093098789113080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10525462454857911293": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16235115911229280717": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "11313025178951972247": ["convolution_gpu_bfyx_gemm_like",1],
+        "17444003685761357480": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2399313178951511557": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "16644952765107909604": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3724572174214794659": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10893628699015898230": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7954972694876158422": ["convolution_gpu_bfyx_1x1",2],
+        "8458082326743351141": ["convolution_gpu_bfyx_gemm_like",2],
+        "13468081302022888489": ["convolution_gpu_bfyx_gemm_like",2],
+        "14789782064157699768": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5578991261564497604": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13767500791267563349": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5919454297699648428": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15678385128478075284": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6602394091385112575": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17777248703109395158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15031155621982459860": ["convolution_gpu_bfyx_gemm_like",2],
+        "3501882025888946886": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "904355798061005466": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13144385730409574259": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "7095629088416100928": ["convolution_gpu_bfyx_gemm_like",2],
+        "7706714181281908433": ["convolution_gpu_bfyx_gemm_like",2],
+        "3859139031732555228": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12194037100109755112": ["convolution_gpu_bfyx_gemm_like",2],
+        "4500107195684703428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11759322316883943989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4046830923427667342": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "2732519635571994212": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "12831298482349900359": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "15227189929676013024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3177304125602972370": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15656706773401161497": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15412447128995361859": ["convolution_gpu_bfyx_gemm_like",1],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4670487436469119872": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8879618489623984140": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11797601971796699898": ["convolution_gpu_bfyx_gemm_like",2],
+        "6312971928547466668": ["convolution_gpu_bfyx_os_iyx_osv16",1039],
+        "7649413902932043811": ["convolution_gpu_bfyx_gemm_like",2],
+        "3067001341355453846": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "18027243127893440568": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5751283221740229986": ["convolution_gpu_bfyx_gemm_like",1],
+        "142270860894725256": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9243949750444156746": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4429109491655891299": ["convolution_gpu_bfyx_gemm_like",1],
+        "7444165397413360181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6418500550523945192": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17826868890632814593": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10271261715175176019": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5010119207726811326": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "9738776059655610885": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1304921846760027440": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3059575629482816852": ["convolution_gpu_bfyx_os_iyx_osv16",951],
+        "12198263593657033426": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "3156783219125679946": ["convolution_gpu_bfyx_1x1",2],
+        "16739031949237426992": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "5602328731722824868": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11147573971701279689": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "548663565933738403": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "18129795023552968695": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2116913943188857359": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "3244803973821375252": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12808456612606675259": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2762489653422414995": ["convolution_gpu_bfyx_gemm_like",2],
+        "3017411837779243878": ["convolution_gpu_bfyx_gemm_like",2],
+        "11908169713247209976": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5046089607609787258": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13960388312976163971": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10632933069865171963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4353842547963164546": ["convolution_gpu_bfyx_1x1",2],
+        "15398380328746287438": ["convolution_gpu_bfyx_gemm_like",2],
+        "8456185296386225533": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13633048912926365931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "345043289576587800": ["convolution_gpu_bfyx_1x1",2],
+        "17413191440314817117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8365255170846178102": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10504318542015227515": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "12407890437443790515": ["convolution_gpu_bfyx_gemm_like",2],
+        "2649192407401044065": ["convolution_gpu_bfyx_gemm_like",2],
+        "17052161869014993719": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17195293614280872622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3017891343734146267": ["convolution_gpu_bfyx_os_iyx_osv16",102],
+        "12577421746159122264": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "2283157145557154450": ["convolution_gpu_bfyx_1x1",2],
+        "2780423409483867058": ["convolution_gpu_bfyx_1x1",2],
+        "10745099399736462076": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16117448559783537844": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "3820661057776133570": ["convolution_gpu_bfyx_1x1",2],
+        "9079203986633151014": ["convolution_gpu_bfyx_1x1",1],
+        "17152614235879767116": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "16347412180100581330": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "17907223570737272640": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "5184121466994451498": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "16566214123371867456": ["convolution_gpu_bfyx_gemm_like",2],
+        "7243917162812988891": ["convolution_gpu_bfyx_gemm_like",2],
+        "4856470441452830056": ["convolution_gpu_bfyx_gemm_like",2],
+        "3242391637018676328": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14689812157592240007": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1152691534728260611": ["convolution_gpu_bfyx_1x1",2],
+        "13710319251108632115": ["convolution_gpu_bfyx_1x1",2],
+        "12989677691575632174": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3444250649099578792": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8174833187387604731": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10787747981914307179": ["convolution_gpu_bfyx_1x1",2],
+        "12379166764490359144": ["convolution_gpu_yxfb_yxio_b16",2],
+        "631489011812924153": ["convolution_gpu_bfyx_1x1",2],
+        "16837963510205857013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13190888313721073437": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "9495192057713157041": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5727758374304309350": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6733731409232284409": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "17762455138615317884": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10709828018763273371": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "5313382805395362669": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9406763539724266157": ["convolution_gpu_bfyx_1x1",2],
+        "17088011073114549679": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4135068756462147853": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3731224822876468602": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4362304842016958728": ["convolution_gpu_bfyx_gemm_like",2],
+        "8250212706222997384": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4625107584562815965": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "13912843078550000960": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "10536316961655703500": ["convolution_gpu_bfyx_os_iyx_osv16",199],
+        "17214254645087272557": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "9617316303048974588": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13961773444580398856": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17399728556634171321": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7612288596055048389": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8733371726903473932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4651261398203912503": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "9119268982510599778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6328802691680458752": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "13218298785325404589": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5754396201681434378": ["convolution_gpu_bfyx_1x1",2],
+        "15989894214714907271": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "9922764846020092836": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4972952621622984792": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "6584960721513702502": ["convolution_gpu_bfyx_gemm_like",1],
+        "18199824206329982249": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "5157949342388119167": ["convolution_gpu_bfyx_gemm_like",2],
+        "14151747022287993729": ["convolution_gpu_bfyx_gemm_like",2],
+        "10747688146893187959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "75120034961995929": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12058759356433220258": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9237587440336828595": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13326492157370934949": ["convolution_gpu_bfyx_gemm_like",2],
+        "1387945708447092123": ["convolution_gpu_bfyx_os_iyx_osv16",380],
+        "13962189339706230770": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17848582668902427291": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14281801257982447624": ["convolution_gpu_yxfb_yxio_b16",2],
+        "497488185553682238": ["convolution_gpu_bfyx_1x1",1],
+        "16535378085465418910": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13448845356783404653": ["convolution_gpu_bfyx_gemm_like",1],
+        "8900977003907025003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11690533591656807605": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "17133376737554844449": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15602218079503030465": ["convolution_gpu_bfyx_gemm_like",2],
+        "5330130011321223525": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3121704239277217273": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7233783054884565746": ["convolution_gpu_bfyx_gemm_like",2],
+        "3080612075440389053": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15078262396281327048": ["convolution_gpu_bfyx_gemm_like",1],
+        "17651821953342321913": ["convolution_gpu_bfyx_1x1",2],
+        "10218763091060511457": ["convolution_gpu_bfyx_os_iyx_osv16",103],
+        "3568514382399560386": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "4897448054295474302": ["convolution_gpu_bfyx_gemm_like",2],
+        "16152775342222431281": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16986610822918634530": ["convolution_gpu_bfyx_1x1",2],
+        "11196245220967135443": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6820224292713065232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10151922632636937118": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13009381943944182288": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8210092359850191682": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3391032227732782982": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15311930929656759371": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5857101685300045443": ["convolution_gpu_yxfb_yxio_b16",1],
+        "334703311738467111": ["convolution_gpu_bfyx_gemm_like",1],
+        "3451309062150982886": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14808079119439455357": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6760797535531423152": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "6219075471508685758": ["convolution_gpu_bfyx_gemm_like",2],
+        "3928596145340765666": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "2164314506903530487": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15757351352532908153": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "13249852145471010452": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7926989875988735079": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10896935976330351144": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16469493066700118274": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16459072408799224894": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18103534417093702556": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13766070202060785219": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5723759573058003971": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2314805462821790774": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9319064434175105168": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2319519208813614116": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5115298857582076692": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "2814805887448339818": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4925720860007127584": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8614375489387596119": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14206125678667603810": ["convolution_gpu_bfyx_1x1",1],
+        "11931568365395665142": ["convolution_gpu_bfyx_gemm_like",2],
+        "17053671692908867872": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "16455941573984854254": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2126208024616319501": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5795524493577277985": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17491825380936802930": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5319459637051859849": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18333355024265557430": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8794896449397768269": ["convolution_gpu_bfyx_gemm_like",2],
+        "16223356735957394429": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5258372022038629529": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6014752258124559691": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10256831975351722184": ["convolution_gpu_bfyx_gemm_like",1],
+        "17987739992848266169": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10982526068861394162": ["convolution_gpu_yxfb_yxio_b16",2],
+        "144634005596305959": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "14116275901314596944": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4240975186599864955": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16267531927647687641": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5643920882179676695": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9170163372548895531": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8921636651939679647": ["convolution_gpu_bfyx_1x1",1],
+        "7178866013527118649": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13320675959188615441": ["convolution_gpu_bfyx_gemm_like",2],
+        "5003718302026277632": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "8325903548627432": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18244966393978155130": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8257103926661643451": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "4256155212405177844": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5047972486012090625": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "10007925729029867733": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4430932059574900921": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15449650271741732512": ["convolution_gpu_yxfb_yxio_b16",2],
+        "60267878504897170": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5705056256080522960": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12248852114219058572": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "11979032916453246611": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15136770992109675092": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "16395067736440127496": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "1632416005093914709": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5577571901049952658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3160543867929843861": ["convolution_gpu_bfyx_1x1",2],
+        "4141005390823981166": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "10179916356323479080": ["convolution_gpu_bfyx_gemm_like",2],
+        "7397341452130124383": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "9736684300833719045": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15914058104244750036": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "15727611564408173858": ["convolution_gpu_bfyx_gemm_like",1],
+        "15303251546207338960": ["convolution_gpu_yxfb_yxio_b16",0],
+        "362823013207940830": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6307939332939714967": ["convolution_gpu_bfyx_1x1",2],
+        "11834361584875491425": ["convolution_gpu_bfyx_1x1",1],
+        "17525564757769958678": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "3646228701104397128": ["convolution_gpu_bfyx_os_iyx_osv16",173],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "16000428520749664687": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5039037192630609823": ["convolution_gpu_bfyx_gemm_like",2],
+        "12309955719964788034": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "7211179360844946434": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "6692085187697087807": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "4531222427159927606": ["convolution_gpu_bfyx_gemm_like",2],
+        "4523064418696274869": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14817801788424046035": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5958300749101873980": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",1],
+        "8575833423399668525": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "1040411949730118556": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15542520725696027828": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15961487889420208188": ["convolution_gpu_bfyx_gemm_like",2],
+        "12879367655655932174": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17364712285968437405": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "10626281431800814406": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3406812365298442897": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "13668940862847596363": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11317843493537672866": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5735608687257018419": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "3635446784873718932": ["convolution_gpu_bfyx_gemm_like",2],
+        "7565867291827884997": ["convolution_gpu_bfyx_gemm_like",1],
+        "1108229954015380813": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15449774545834423274": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6808843088626121909": ["convolution_gpu_bfyx_gemm_like",2],
+        "3492178441007007033": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4118073384938355655": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "16502045034098739466": ["convolution_gpu_bfyx_gemm_like",2],
+        "975943900172381326": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5582450255753679095": ["convolution_gpu_bfyx_1x1",2],
+        "3221469860582147955": ["convolution_gpu_bfyx_gemm_like",2],
+        "16120988958246503683": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "14444475853714164129": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17277917672233464304": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11113125355390956764": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16513038896689318072": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7531346828150129063": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "10701208905236219083": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14930789530046665855": ["convolution_gpu_bfyx_gemm_like",2],
+        "14065215389112262561": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13051390418571971928": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13797759143769042759": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "13558656230312558247": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "8099100633390626027": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7412772553395852003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16428789154716792138": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2477849395789783501": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "11560634267092054110": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "17800115051456107658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4773482308451190487": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4830454154838353056": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "9659814105483633858": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8933701347987963693": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4216366893358625960": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "14791575777969587370": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10808909442136736629": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16989896550094613437": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7535571298845832061": ["convolution_gpu_yxfb_yxio_b16",1],
+        "517997325935712670": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4640696923527766618": ["convolution_gpu_bfyx_gemm_like",2],
+        "13585163747565192884": ["convolution_gpu_bfyx_gemm_like",2],
+        "9135116285263927211": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "12712071520541638451": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "15695415285791951018": ["convolution_gpu_bfyx_gemm_like",2],
+        "13058026769607428653": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10431728173806991521": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12990527753120735255": ["convolution_gpu_bfyx_gemm_like",2],
+        "2450251936650841836": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "16469788155263456039": ["convolution_gpu_bfyx_gemm_like",2],
+        "14931590390643373866": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12866217660635921034": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15747571668131081693": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6391201577234440562": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "11645116728396933125": ["convolution_gpu_bfyx_gemm_like",2],
+        "6106367716877633757": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5963901433137582265": ["convolution_gpu_bfyx_gemm_like",2],
+        "12259844988981080505": ["convolution_gpu_bfyx_gemm_like",2],
+        "18226737525116147628": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4129722446574108695": ["convolution_gpu_bfyx_1x1",2],
+        "17966898762317477857": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7603872175048237237": ["convolution_gpu_bfyx_1x1",2],
+        "7755177205197405275": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "12450814729547235386": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9542795021683486547": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12093737479877309006": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "8651641584737798174": ["convolution_gpu_bfyx_gemm_like",2],
+        "10194187012252949909": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15181987458871339815": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "6458189051305803360": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12616205756849913359": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5941095082097535176": ["convolution_gpu_bfyx_gemm_like",1],
+        "12762301414049772746": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8399668174006528237": ["convolution_gpu_bfyx_gemm_like",1],
+        "10404725818204494388": ["convolution_gpu_bfyx_gemm_like",2],
+        "4190912926126844643": ["convolution_gpu_bfyx_1x1",2],
+        "7481256533438761028": ["convolution_gpu_bfyx_gemm_like",2],
+        "14571022040013651253": ["convolution_gpu_bfyx_gemm_like",2],
+        "2964705957088952872": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "584086621952390547": ["convolution_gpu_bfyx_gemm_like",2],
+        "4439786737038041995": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2034811390140488812": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7767103488808670253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10396788403466463989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8036474422877454869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10055549084854766170": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "1509728225855233852": ["convolution_gpu_bfyx_gemm_like",2],
+        "7986797517722531256": ["convolution_gpu_bfyx_gemm_like",2],
+        "13569453018083742128": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2583562092192709891": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8063236641629084352": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "6222595759158615206": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2058364830449635556": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2644054989263429508": ["convolution_gpu_yxfb_yxio_b16",2],
+        "537074122417021898": ["convolution_gpu_bfyx_os_iyx_osv16",100],
+        "17040537179740138304": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7213383384662748578": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10751536136794650334": ["convolution_gpu_bfyx_gemm_like",2],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "13648761167622654288": ["fully_connected_gpu_yxfb_ref",0],
+        "18253299978538051201": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15526021915035861514": ["convolution_gpu_bfyx_gemm_like",1],
+        "14670339865153970893": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9655242408142699694": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18381791065890314250": ["convolution_gpu_bfyx_gemm_like",1],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",2],
+        "8619526128410675593": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "4466647043226271996": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "12656228464579497510": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9309173544512377803": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13079058582191027406": ["convolution_gpu_yxfb_yxio_b16",2],
+        "951747146164097188": ["convolution_gpu_bfyx_1x1",2],
+        "7964396197946740183": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "712495040970043706": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15178921033274918199": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8505040075968411726": ["convolution_gpu_bfyx_gemm_like",1],
+        "5550969016335082071": ["convolution_gpu_bfyx_gemm_like",1],
+        "9306120768594851497": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7777279468029216688": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18029396837690671545": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14909506411483112959": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1734769856106746136": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9987415314864002460": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6772954924703365345": ["convolution_gpu_bfyx_gemm_like",2],
+        "5622089373755094139": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "9533360488591027707": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "13403161389559730": ["convolution_gpu_bfyx_gemm_like",2],
+        "9811086682271990794": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9674248159643501374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7830644361525332797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2290965424106255219": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11208625628954179200": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "15774073623451382326": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4750513665628842598": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "10099598062509781441": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16666792471632326054": ["convolution_gpu_bfyx_gemm_like",2],
+        "8618627241234406784": ["convolution_gpu_yxfb_yxio_b16",2],
+        "939718260623752240": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "1784892318069674949": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5568728266639058524": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "6297802534570892679": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "9119618606914671839": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9328223957245552723": ["convolution_gpu_bfyx_gemm_like",2],
+        "348058686961206025": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "8809438390805488749": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15317946705199574301": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16011429608661242565": ["convolution_gpu_bfyx_gemm_like",2],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "18093895673012393740": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10232809153913700925": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17763347648779573375": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2895819653081408358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5658664813683907476": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5600128039063009632": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6490907666077364481": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17188004018198554470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17039993918927377002": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "12946540633035976364": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "7604075520418038662": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13149617013851130587": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11130439225010714550": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16698547937652264447": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "10588059104387338398": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "8117638644045799192": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "6818140422066151642": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5156033406916344703": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15409184364121627414": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3536359641225772698": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12031180482028822765": ["convolution_gpu_bfyx_gemm_like",0],
+        "13590444711975157776": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "742689192890486807": ["convolution_gpu_bfyx_gemm_like",2],
+        "9165817820007469505": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2530317332900569142": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "12946531140050029900": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "17109520309574369561": ["convolution_gpu_bfyx_gemm_like",2],
+        "3673781117412048086": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "16386955278777720573": ["convolution_gpu_bfyx_os_iyx_osv16",855],
+        "886880682650879171": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "7843508201826629532": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "16409729623371222748": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "14263605862840500474": ["convolution_gpu_yxfb_yxio_b16",2],
+        "119047044057950958": ["convolution_gpu_bfyx_gemm_like",1],
+        "11153522012082333137": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3074436655804078403": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15271783562528081169": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "15187035463799513424": ["convolution_gpu_bfyx_1x1",2],
+        "7247475218645942682": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2753393184265405425": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "6003409324516527726": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7953255701516490034": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "1497560475414454618": ["convolution_gpu_bfyx_gemm_like",2],
+        "12788611449571149037": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "12829916847670789556": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17179609670678746034": ["convolution_gpu_bfyx_gemm_like",2],
+        "17140702790441856730": ["convolution_gpu_bfyx_gemm_like",1],
+        "7792512829747836997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8935522915553126640": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17635171685500922207": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9332701118402940384": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16573597215928075233": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "11795826875463204296": ["convolution_gpu_bfyx_1x1",2],
+        "15424646499666127616": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16561618767117193109": ["convolution_gpu_bfyx_1x1",2],
+        "12374775091628199854": ["convolution_gpu_bfyx_1x1",2],
+        "14416897092729861207": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16569637518948306471": ["convolution_gpu_bfyx_gemm_like",2],
+        "6669808855737023569": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "8792202318168046223": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "18417830391649460864": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11565861421381730304": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7817691489550523328": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10555835101752189454": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15858356755924943957": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16402386400454963239": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "7398158542592530232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "7916244303189113815": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "269167598200943915": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6071668124835539929": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2629918844315184499": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15112599407339712681": ["convolution_gpu_bfyx_1x1",2],
+        "9205978149692979955": ["convolution_gpu_bfyx_gemm_like",2],
+        "17479773641824222843": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "10868287582480518153": ["convolution_gpu_bfyx_gemm_like",2],
+        "11738780323979052397": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14616801816838734032": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9589718307719207394": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "15249442550355454201": ["convolution_gpu_bfyx_gemm_like",2],
+        "5367618411887849711": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "3582256192870592087": ["convolution_gpu_bfyx_os_iyx_osv16",1029],
+        "16253244737884854313": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13076935351221777993": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "7724125714360985807": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "2955459120402821540": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4815047491742617397": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "6250785177115691293": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1202292109713947702": ["convolution_gpu_bfyx_gemm_like",2],
+        "10930640103080573253": ["convolution_gpu_bfyx_1x1",2],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "12589440296742583335": ["convolution_gpu_bfyx_1x1",2],
+        "7106362077449435105": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "18164706399147697716": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8792010676469476740": ["convolution_gpu_bfyx_gemm_like",2],
+        "17536591931934691648": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15112393534380347357": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14424566003632608852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "252188028702250668": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5519781859090160931": ["convolution_gpu_bfyx_os_iyx_osv16",760],
+        "2221145174704245189": ["convolution_gpu_bfyx_gemm_like",2],
+        "17713666626443142908": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15617599138946168772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11319799002723299753": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4154830034576950123": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3782239800777370325": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "12344689711325644622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9757389422721488173": ["convolution_gpu_bfyx_1x1",1],
+        "17833304859352483840": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12625112690264223217": ["convolution_gpu_bfyx_gemm_like",2],
+        "6888842613779488104": ["convolution_gpu_bfyx_1x1",2],
+        "18080848057281093190": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10785252006948647963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14827882251752394500": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6713554643048248003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9277610800970567810": ["convolution_gpu_bfyx_gemm_like",2],
+        "15131258379753113816": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11658751382892761740": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1973051991518953158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14091543526898531200": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14887465694301281952": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9426665763007611385": ["convolution_gpu_bfyx_gemm_like",2],
+        "3533556385636018581": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "9101903304994333336": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "5291011077679733990": ["convolution_gpu_bfyx_gemm_like",2],
+        "8975333906619899020": ["convolution_gpu_bfyx_gemm_like",2],
+        "4282756088824939292": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15967614281807823696": ["convolution_gpu_bfyx_gemm_like",2],
+        "5656320098721954644": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9584652777232392944": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "2571289358202565251": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3449007266907948591": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "3399406641489305996": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "8890400423799565844": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13296242326766100583": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12822126914959112382": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "12745552951204330052": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16851082749395991194": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "745009493367761775": ["convolution_gpu_bfyx_gemm_like",2],
+        "15924916465272239832": ["convolution_gpu_bfyx_os_iyx_osv16",925],
+        "4280250278457269231": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13326339730522937517": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10090036431487700311": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "14312549767853703411": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "13993548620104010490": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15799159401545270696": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17255805293355120219": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14389719202147508599": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "9423958333298993923": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "12818012741490629493": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "18075395502550596586": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "11848462434662954749": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11337525286386930242": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "3172518362830684966": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14892045745899927762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11263540528012919947": ["convolution_gpu_bfyx_1x1",2],
+        "1875764913306932583": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "18218631037214746168": ["convolution_gpu_bfyx_gemm_like",2],
+        "1082586642383386489": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "9982350570959875159": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15528692642731712121": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15973363403733281926": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6820134899097582639": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1129349074674368869": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7235358742317442134": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16252420150239789472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1171681987783013074": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17041468169694105561": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18267428053198215471": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4995051972576749717": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2199167704280374654": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15739278428190392018": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6131481289104111211": ["convolution_gpu_bfyx_gemm_like",2],
+        "10751633292301177132": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3049097498155857895": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11948858355027908365": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9116620473576064051": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18313088176414428990": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13902214851539825156": ["convolution_gpu_bfyx_gemm_like",2],
+        "2907572047024872990": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5680236635030250712": ["convolution_gpu_bfyx_1x1",2],
+        "9169324504353459004": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17272600601478967434": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "13308187548669026714": ["convolution_gpu_bfyx_1x1",2],
+        "5926747396493954633": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "6843617687528352801": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15949311219856917559": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "848735117501914374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11226912053840621089": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8155268141318893606": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8616584380583931648": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2394023805427701338": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "10879171754021534649": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4805194563120934409": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1235864574444794315": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7979265448683159733": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16709930291825881111": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10406201782146034797": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "4734389463002799056": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "10812324504777808014": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14385148066232093878": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8362179886017398479": ["convolution_gpu_bfyx_os_iyx_osv16",8],
+        "4615766471724791034": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6511742759171254447": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9523941899498458600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15295951849706930711": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9026883911202247185": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2939605281692583169": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "4779919236230154165": ["convolution_gpu_bfyx_gemm_like",0],
+        "1054954263090546905": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14097394936362526559": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10558609844937234631": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10318417166945621015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4988480452582288323": ["convolution_gpu_yxfb_yxio_b16",2],
+        "490931535580183607": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1354199155380786906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13738760763969959522": ["convolution_gpu_bfyx_gemm_like",2],
+        "11806402239500046867": ["convolution_gpu_bfyx_gemm_like",2],
+        "10272016038525930672": ["convolution_gpu_bfyx_gemm_like",2],
+        "18337762134908554532": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15052577143485630617": ["convolution_gpu_bfyx_1x1",2],
+        "16490405739040977260": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "10783630257421062891": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "11738360883999461965": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14098811155652990436": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16911450336605071390": ["convolution_gpu_bfyx_1x1",2],
+        "13851240591038949807": ["convolution_gpu_bfyx_gemm_like",2],
+        "17705807503894740726": ["convolution_gpu_bfyx_gemm_like",2],
+        "7338229552985076723": ["convolution_gpu_bfyx_gemm_like",2],
+        "10997156099709436375": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13820498543284008286": ["convolution_gpu_bfyx_gemm_like",2],
+        "2613462626256090659": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "10720769054729185991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14365232561737454031": ["convolution_gpu_bfyx_os_iyx_osv16",51],
+        "10952045211444638649": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14885031472057965707": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "3648713169465596196": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2469579114592379040": ["convolution_gpu_bfyx_gemm_like",2],
+        "13507437548205340054": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11010673493295430801": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3792276488551864121": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1336739931702966228": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2008424849669196225": ["convolution_gpu_bfyx_1x1",2],
+        "8860443174052454332": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6931062623510631425": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13723434004563378589": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12917241193304093727": ["convolution_gpu_bfyx_gemm_like",2],
+        "17618727959983224888": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2060161076370553192": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3266557807508325807": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10626341369865893888": ["convolution_gpu_bfyx_gemm_like",2],
+        "12714892326998505133": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13040213971461407125": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5852569526295779497": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11724732387425614709": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11892455357792445192": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4773077837537775324": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3904383357046705799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3286496836813087881": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5648658688155716974": ["convolution_gpu_bfyx_1x1",2],
+        "15968821946892330559": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14497254583210965214": ["convolution_gpu_yxfb_yxio_b16",2],
+        "72745257233374197": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17443356777503458523": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "11289650463922092775": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2673903488704336606": ["convolution_gpu_bfyx_gemm_like",2],
+        "15829095120243431195": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "11606895513516475339": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13124342334495538095": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "12546446257192651407": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2856601829807186494": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13412516623201653283": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17868834743037242721": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4805402210873641704": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6816632607384969096": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9899211365930959346": ["convolution_gpu_bfyx_os_iyx_osv16",648],
+        "10612739622648878242": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "8689206546467098603": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17021925795809437171": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5853697372844744672": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4780291919667721265": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "10178171262128338408": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14502856487639608696": ["convolution_gpu_bfyx_gemm_like",2],
+        "4894469114343061704": ["convolution_gpu_yxfb_yxio_b16",0],
+        "6423785822515265784": ["convolution_gpu_bfyx_gemm_like",2],
+        "14417401878572618236": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10914921540144371519": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10724501418439612080": ["convolution_gpu_bfyx_gemm_like",1],
+        "1075027491444288875": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7100056605355325582": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6988492019664525206": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "15859493313686060349": ["convolution_gpu_bfyx_gemm_like",2],
+        "14540578324750869319": ["convolution_gpu_bfyx_gemm_like",2],
+        "7565006185780806333": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "15192022454507415969": ["convolution_gpu_yxfb_yxio_b16",1],
+        "474139120607442270": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "15325852281951905610": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "9144487908815767824": ["convolution_gpu_bfyx_1x1",1],
+        "8519354640245415816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12669783714916998842": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13187657215288939912": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6123707371654753818": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7343590049199309046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5526223938481098693": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8527193566719173253": ["convolution_gpu_bfyx_gemm_like",2],
+        "9521715904587435700": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3058716597925544041": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14808895254077106198": ["convolution_gpu_bfyx_gemm_like",2],
+        "2431241169199693527": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1497127399271219422": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1425953627379976115": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15899192375330393731": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "6118737381591369532": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9184275066167601343": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "10133054058562198093": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "18083803358410976976": ["convolution_gpu_yxfb_yxio_b16",2],
+        "101401523793806394": ["convolution_gpu_bfyx_gemm_like",2],
+        "1299760574827253811": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4126895998426674411": ["convolution_gpu_bfyx_gemm_like",2],
+        "17917978116807564183": ["convolution_gpu_bfyx_gemm_like",2],
+        "8421388456873652700": ["convolution_gpu_bfyx_gemm_like",2],
+        "1584906448442153128": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3096280563014331836": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12334522314915706512": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18067291256808591467": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3337625924046561031": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "9832505855130134649": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11334122788337402526": ["convolution_gpu_bfyx_1x1",2],
+        "4914474312076193952": ["convolution_gpu_bfyx_gemm_like",1],
+        "2934519615045138808": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "1920070013712913772": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "3101087806792514129": ["convolution_gpu_bfyx_1x1",1],
+        "7015738038963065110": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "6963293142152132518": ["convolution_gpu_bfyx_os_iyx_osv16",165],
+        "16432425079146486467": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13809898858049445969": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16195893521207315456": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "6509271384550125629": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8856888761246057127": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "12137340921829511472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1760391741350091665": ["convolution_gpu_bfyx_gemm_like",2],
+        "10811837819834149164": ["convolution_gpu_bfyx_gemm_like",1],
+        "11025471731438443683": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13289438471364352634": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4610200388191607540": ["convolution_gpu_bfyx_gemm_like",2],
+        "10882719585803523032": ["convolution_gpu_yxfb_yxio_b16",2],
+        "592245952014430043": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "1458615259705605525": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "13943983517468412332": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5479761740065152589": ["convolution_gpu_bfyx_gemm_like",2],
+        "10133398220120888583": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13156052826121673994": ["convolution_gpu_bfyx_gemm_like",2],
+        "10006197783106691106": ["convolution_gpu_bfyx_gemm_like",2],
+        "4602232889230956461": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15669242195570440840": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13365950526881732374": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14469011068777098822": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "16341722570340169855": ["convolution_gpu_bfyx_1x1",2],
+        "2421404763191415191": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6101196122606108273": ["convolution_gpu_bfyx_gemm_like",2],
+        "6715523440337925186": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3106911159524421371": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "10717031088082350652": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1587501521145162454": ["convolution_gpu_bfyx_gemm_like",2],
+        "2016932800158392200": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13467831091041327178": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2135164671985938807": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9711184878666366204": ["convolution_gpu_yxfb_yxio_b16",1],
+        "968105804060326332": ["convolution_gpu_yxfb_yxio_b16",2],
+        "579781312141502576": ["convolution_gpu_bfyx_1x1",1],
+        "17248329632819747646": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11942019076226205097": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6902485831441844789": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4885504197789468842": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9813748068195103720": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "13464697394408238115": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10789133352712755945": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12566041126392848976": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "7065244994574625911": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10706267011822108376": ["convolution_gpu_bfyx_1x1",2],
+        "10071449674652717890": ["convolution_gpu_bfyx_gemm_like",2],
+        "15225354446874994535": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "16129682385980878760": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17043601935017365442": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2905979727479716212": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4391695940614024479": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16393176054374397767": ["convolution_gpu_bfyx_gemm_like",1],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "17329287216741045059": ["convolution_gpu_bfyx_gemm_like",2],
+        "17370158297470557151": ["convolution_gpu_bfyx_1x1",2],
+        "708201295462256406": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "9827177798112814604": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15979956159651515122": ["convolution_gpu_bfyx_gemm_like",2],
+        "5050273611519516510": ["convolution_gpu_bfyx_gemm_like",1],
+        "9468684953949274635": ["convolution_gpu_bfyx_gemm_like",1],
+        "17672785701483179117": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "4834446692898125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "7628077869220463202": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "11417406326478154077": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12985942652866621579": ["fully_connected_gpu_fb_io_ref",2],
+        "14387756025635589673": ["convolution_gpu_bfyx_1x1",2],
+        "5638640164891118162": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4974320417566990034": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "603883331897298932": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16633540487930201533": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "3872151366780051246": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17515847111676784130": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "6613116267521819997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3377052601059116318": ["convolution_gpu_yxfb_yxio_b16",0],
+        "13509275050322423832": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17046662043776372746": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "13119040261291835298": ["convolution_gpu_bfyx_gemm_like",2],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "6799631962511042762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11499219760597131534": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "6959692641873234850": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18152894191323920027": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4356817283284529593": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6512987867462549101": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "16371608027363202992": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6210866413385292851": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "465567788283624320": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6756679359093569015": ["convolution_gpu_bfyx_os_iyx_osv16",905],
+        "5740745357953479527": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "17676344219475515993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12003323477818208825": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5346898505346646714": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "3515437649977762166": ["convolution_gpu_bfyx_gemm_like",1],
+        "6945787904293959477": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "18243724217479803107": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11988285441493553006": ["convolution_gpu_bfyx_gemm_like",2],
+        "85050336704401597": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16491532291908469567": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "2227700097134029783": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13450061819089402572": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10309986238001994183": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12531880391016521628": ["convolution_gpu_bfyx_gemm_like",2],
+        "15115780248032030963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4298242568890525997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15959543980008442942": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16870036853278751563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13161997040644039778": ["convolution_gpu_bfyx_gemm_like",2],
+        "15833461718320604065": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "7669403041163460089": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8170998059688907013": ["convolution_gpu_bfyx_1x1",2],
+        "15482685355538566951": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "2294318010381635693": ["convolution_gpu_bfyx_gemm_like",2],
+        "13486084204140096478": ["convolution_gpu_bfyx_gemm_like",2],
+        "6558436237075337721": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2089730611490367290": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1580344438642032807": ["convolution_gpu_bfyx_gemm_like",2],
+        "5578850952665051661": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3217574161785059951": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "5840254078917931433": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13493119419114659706": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16425374300157280628": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7469127846325904854": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "11698754846673268046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12121204870979363096": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2438261005924916746": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "12054200116003751590": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "8479958930889587809": ["fully_connected_gpu_yxfb_ref",1],
+        "7843498978148810586": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "1448440012428740463": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8976238022515713641": ["convolution_gpu_bfyx_gemm_like",2],
+        "8642107585829380438": ["convolution_gpu_bfyx_gemm_like",1],
+        "10681768474583067517": ["convolution_gpu_bfyx_gemm_like",1],
+        "7242013296950669829": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "359617184733439511": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9366100787108468082": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5060012838564094182": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "2269140636553245446": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1644335606100150388": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9870432551513415176": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1984152634309440563": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "17489680436564779197": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14117801387057507639": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "361497145093734608": ["convolution_gpu_bfyx_gemm_like",2],
+        "13861223834466385546": ["convolution_gpu_bfyx_gemm_like",1],
+        "10014448860206587805": ["convolution_gpu_bfyx_gemm_like",1],
+        "11690334177981352452": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "9941035405796680081": ["convolution_gpu_bfyx_1x1",1],
+        "12051398350382954787": ["convolution_gpu_yxfb_yxio_b16",0],
+        "14242202444788213591": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "8999570321113443117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "838726445796308454": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "17983556812075120553": ["convolution_gpu_bfyx_1x1",2],
+        "14331658870024759698": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "9518071423184197213": ["convolution_gpu_bfyx_gemm_like",2],
+        "8004244584949995244": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9891428775774615719": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "9702618600245321109": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "11031569203645035546": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17480277135590489472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "167635075964111628": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1463649546800120847": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15817443774186015593": ["convolution_gpu_bfyx_1x1",2],
+        "225809055928705881": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13676654389512816868": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "7998455776901877973": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4804533178560338520": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "11596971301790598405": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "3216793152416217495": ["convolution_gpu_bfyx_gemm_like",2],
+        "3806761527342944195": ["convolution_gpu_bfyx_gemm_like",2],
+        "5312413491828906254": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12600707101000510621": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7346046748383284270": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7804715870037416579": ["convolution_gpu_bfyx_gemm_like",1],
+        "18433141005552346566": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17893181511546734799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10771803503544737080": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "1843555260471832708": ["convolution_gpu_bfyx_gemm_like",1],
+        "7647236080048602591": ["convolution_gpu_bfyx_gemm_like",1],
+        "8398910340371320955": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6910589963488897537": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11175936010605958812": ["convolution_gpu_yxfb_yxio_b16",1],
+        "568191462231494113": ["convolution_gpu_yxfb_yxio_b16",2],
+        "946479876892100082": ["convolution_gpu_bfyx_gemm_like",1],
+        "3861351835305151926": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9641089659148164809": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4803370483104261655": ["convolution_gpu_bfyx_gemm_like",2],
+        "7779562434199107586": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "18385086614524985975": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4731836216299455047": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9222744127882324405": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "13878967140838761911": ["convolution_gpu_bfyx_1x1",1],
+        "7393601059996816014": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11888011890096886932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14985755375924972050": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "17961793197503317952": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5720964268093705079": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3308770992373192529": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "877901260688090160": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1310498917952637709": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6871131333562410117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7799984350284425885": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "10170577772376890221": ["convolution_gpu_bfyx_os_iyx_osv16",664],
+        "9175450649281374948": ["convolution_gpu_bfyx_os_iyx_osv16",862],
+        "7139719632093090046": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "14553577436929219470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16814025114202322376": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11880337915508207160": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4056979460327024961": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6467251764899975676": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16995444341569389342": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "15235409162483701027": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "14206076551739831333": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7393551951402219833": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17640725195881101275": ["convolution_gpu_bfyx_gemm_like",2],
+        "70580716590540876": ["convolution_gpu_bfyx_gemm_like",1],
+        "1596353239542510685": ["convolution_gpu_bfyx_gemm_like",2],
+        "2578325663193624576": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13426254939418471242": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15337841577110104431": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14868677663932902695": ["convolution_gpu_bfyx_gemm_like",2],
+        "5083163738120585821": ["fully_connected_gpu_fb_oi_ref",2],
+        "14766694310604777253": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15135644084742750702": ["convolution_gpu_bfyx_gemm_like",2],
+        "12787837386653002743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6167369758442930886": ["convolution_gpu_bfyx_gemm_like",2],
+        "1018687388655376483": ["convolution_gpu_bfyx_gemm_like",1],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",2],
+        "12071914115316550349": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "5118467701668427545": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "2026622899016787854": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13338594271376045657": ["convolution_gpu_bfyx_gemm_like",2],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",2],
+        "16374675547140209181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "888110783182849535": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16683485007140805060": ["fully_connected_gpu_yxfb_ref",2],
+        "11820789223587555410": ["convolution_gpu_bfyx_1x1",2],
+        "8260024340787818709": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10480527638577674825": ["convolution_gpu_bfyx_1x1",2],
+        "5352061583962489055": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9162469583721135043": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4685236901551256966": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14054116974002669018": ["convolution_gpu_bfyx_1x1",2],
+        "2296581485980163665": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1635121016109328853": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "14025235562200209723": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "15670767419106537809": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4848143712599565301": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3102538312627892960": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "13839116996827687373": ["convolution_gpu_bfyx_gemm_like",2],
+        "16487774205195979355": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "16768797136991242472": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "18408107772851888061": ["convolution_gpu_bfyx_gemm_like",2],
+        "11179211757115972103": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3980835859526174461": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15525903155475629518": ["convolution_gpu_bfyx_gemm_like",2],
+        "14175962333785791005": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17147293671640396193": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "290134020607738418": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13078401519973360182": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4672441137336208890": ["convolution_gpu_bfyx_gemm_like",2],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "15047676717402283805": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "9534041402131086717": ["convolution_gpu_bfyx_os_iyx_osv16",949],
+        "17638753020411096694": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18432421400879260832": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "16304192736281226143": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7305582749708309904": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3622409603053918029": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13207134083675064956": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "8931169575495985034": ["convolution_gpu_bfyx_gemm_like",2],
+        "17790026124881397912": ["fully_connected_gpu_fb_io_ref",2],
+        "16247399911710810038": ["convolution_gpu_bfyx_gemm_like",1],
+        "16871004845988227014": ["convolution_gpu_bfyx_1x1",2],
+        "12850044341631872743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4492332228252010118": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "11086471945045031067": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13477548641580029772": ["convolution_gpu_bfyx_gemm_like",1],
+        "15805087418686802636": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "5507373575763339429": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13328911884191551889": ["convolution_gpu_bfyx_1x1",2],
+        "5104519293341299859": ["convolution_gpu_yxfb_yxio_b16",2],
+        "249639220178603842": ["convolution_gpu_bfyx_gemm_like",2],
+        "2715447739580688669": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14942858162799632403": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5576296603250158603": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "712165731154577189": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "4533786844080178561": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "1045854873741563331": ["convolution_gpu_bfyx_gemm_like",2],
+        "2683304757433993300": ["convolution_gpu_bfyx_gemm_like",2],
+        "16128152634974034731": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15677717057398875599": ["convolution_gpu_bfyx_gemm_like",1],
+        "851057218719456209": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "7454366978268164047": ["convolution_gpu_bfyx_gemm_like",2],
+        "743941460026466526": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "4133424990380177132": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "10544034939133448916": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3349468433721705582": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15863531785836309247": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11421180829679625737": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "15367649112776077240": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9492026326463873766": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "16925721317097534009": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "3319827933068341610": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12268912077694742671": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15839295895890205274": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "9073757008455674094": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8780671766122887951": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "15625374380046476173": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7105219760750474587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7818381040882768404": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "16961326251624610778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10076885835791159907": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13485300684443803732": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "7715649642603303319": ["convolution_gpu_bfyx_1x1",2],
+        "1353170363915443814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13541382855330226000": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11015074526119891710": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10730222715353420212": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "1218323229202187514": ["convolution_gpu_bfyx_gemm_like",2],
+        "7897973318803646560": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10879218241103462088": ["convolution_gpu_bfyx_gemm_like",2],
+        "4381329435655511217": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "11871319147579477936": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16601230690171340432": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17207560805775399864": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9737565171095493297": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12052207771201936228": ["convolution_gpu_bfyx_gemm_like",2],
+        "17466963970980708210": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6128157319666849074": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12514693341682532560": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "1197281505560782577": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "4628748977913534701": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13264617841270329349": ["convolution_gpu_bfyx_1x1",2],
+        "6550549654706796887": ["convolution_gpu_yxfb_yxio_b16",0],
+        "13120262386070281193": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13368203360773949292": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8203171222962341018": ["convolution_gpu_bfyx_gemm_like",2],
+        "3615052707933370958": ["convolution_gpu_yxfb_yxio_b16",1],
+        "775538461106687677": ["fully_connected_gpu_fb_oi_ref",1],
+        "2554991397391195611": ["convolution_gpu_bfyx_os_iyx_osv16",184],
+        "12727541507197887360": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "7206226541369793931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17515064188391421150": ["convolution_gpu_bfyx_gemm_like",2],
+        "14122213471825630433": ["convolution_gpu_bfyx_gemm_like",2],
+        "2949545414911764346": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1173136780324694038": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12512751736409465214": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9079676771143357396": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15474155528481683394": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "5339985303398206057": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "9649445293567537596": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14248239982355212178": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16",855],
+        "3603187029740446600": ["convolution_gpu_bfyx_gemm_like",2],
+        "1418595171949196661": ["convolution_gpu_bfyx_gemm_like",2],
+        "10037086825900566930": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "6578517057140155080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16674633029045714564": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "15228390729175722409": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1245259979364728404": ["convolution_gpu_bfyx_1x1",2],
+        "101387140804297623": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5714365398623475983": ["convolution_gpu_bfyx_1x1",2],
+        "13234055353608734080": ["convolution_gpu_yxfb_yxio_b16",1],
+        "136349424199140459": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "9182260316973872633": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11305232900158601613": ["convolution_gpu_bfyx_1x1",2],
+        "17651477639302255490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8768300687476117215": ["convolution_gpu_bfyx_os_iyx_osv16",266],
+        "6651389480007764007": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "7585184325339753737": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "7700321970687976931": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "2321767794934000238": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "14461365896122393071": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8922929126299811091": ["convolution_gpu_bfyx_1x1",2],
+        "4772696293208603817": ["convolution_gpu_bfyx_gemm_like",1],
+        "13596876807637507229": ["convolution_gpu_bfyx_1x1",2],
+        "3039528482572243879": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "6713985030102340818": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2208765794404376467": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1345101751956733589": ["convolution_gpu_bfyx_gemm_like",2],
+        "3070859615622845671": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "12185561188335760786": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14113320831418478396": ["convolution_gpu_yxfb_yxio_b16",2],
+        "840202264034382558": ["convolution_gpu_bfyx_os_iyx_osv16",771],
+        "4098191685457418125": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "12637509262827320678": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12207503176295152756": ["convolution_gpu_bfyx_1x1",2],
+        "5897564616927353003": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "15006204461468698734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9671459469252116568": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "13398986810666238552": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11731277083374465361": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6020017927557041768": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11239541755868028928": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "10880830033700542216": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6934241437968723825": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4740585760177040164": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9423854233835016530": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7009459929666511861": ["convolution_gpu_bfyx_1x1",1],
+        "5602377914578322577": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5056859994174498686": ["convolution_gpu_bfyx_gemm_like",1],
+        "2561508262445368003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17935612508319394087": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6126073246053235472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1885075753696445410": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "786401653335542559": ["convolution_gpu_bfyx_gemm_like",2],
+        "123026136670202868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4999171487916568471": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "1718634913016284523": ["convolution_gpu_bfyx_1x1",2],
+        "14204609663091442879": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10702465758376061967": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12181310683533105454": ["fully_connected_gpu_fb_oi_ref",1],
+        "15765592038173567297": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9748307611165615848": ["convolution_gpu_bfyx_gemm_like",2],
+        "15497263259976427714": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "6817494598328071314": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "12234313962656804631": ["convolution_gpu_bfyx_gemm_like",2],
+        "6964383468476265892": ["convolution_gpu_bfyx_1x1",1],
+        "155962454315573087": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6193161166790398003": ["convolution_gpu_bfyx_gemm_like",2],
+        "15681189418847392587": ["convolution_gpu_bfyx_os_iyx_osv16",857],
+        "9989055862610193828": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "3779229442395464456": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15678768217453692725": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6950586691727980329": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1952863937205473292": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3365786526859737112": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6022695488769618639": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11612044653200304877": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12960590161485806657": ["convolution_gpu_bfyx_gemm_like",2],
+        "8747430148550634190": ["convolution_gpu_bfyx_gemm_like",2],
+        "15223779293313750042": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14749758365915995876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11973034261101454380": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8205640825965213946": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10774872391768741315": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11564071490267241224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12604104383683210104": ["convolution_gpu_bfyx_os_iyx_osv16",216],
+        "9700808806849459216": ["convolution_gpu_bfyx_1x1",2],
+        "3658599312236344017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11198301748997371475": ["convolution_gpu_bfyx_gemm_like",1],
+        "8974851555526896131": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16",1036],
+        "16781127329510211966": ["convolution_gpu_bfyx_gemm_like",2],
+        "12352923639732112511": ["convolution_gpu_bfyx_os_iyx_osv16",455],
+        "9354818521586974021": ["convolution_gpu_bfyx_gemm_like",2],
+        "3976736548270395981": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5576305720733717044": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9580986168276580598": ["convolution_gpu_bfyx_gemm_like",1],
+        "8584375748627260395": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4635570915184713874": ["convolution_gpu_bfyx_gemm_like",2],
+        "11627532066884923848": ["convolution_gpu_bfyx_1x1",2],
+        "18040183500393090505": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4291531885506213180": ["convolution_gpu_yxfb_yxio_b16",2],
+        "875400109066360897": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "12476381811279163147": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12972798847556569913": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "2936333406928424760": ["convolution_gpu_bfyx_1x1",2],
+        "6081038474197004540": ["convolution_gpu_yxfb_yxio_b16",1],
+        "577842450575835175": ["convolution_gpu_yxfb_yxio_b16",2],
+        "401304652492444430": ["convolution_gpu_bfyx_gemm_like",2],
+        "13009612703754510124": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5020763861388859254": ["convolution_gpu_bfyx_gemm_like",2],
+        "10280619408766255552": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "16681690088928624738": ["convolution_gpu_bfyx_gemm_like",2],
+        "2173163618947713953": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "12477315042623518609": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3067930325929862490": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17759505449240263390": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "13646974121952099172": ["convolution_gpu_bfyx_gemm_like",2],
+        "7208008921815475393": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5012013738970489338": ["convolution_gpu_bfyx_1x1",1],
+        "8735534480653818425": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4627958043707973483": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10486348549691280032": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "6788311046557489996": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "16610284927818475574": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13618411266808159341": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14412158605670555579": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "2188101366183302888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12923298574715329852": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1531349457115735845": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "12181889163404078773": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "8576733135863336233": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "1367483816197881270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5751553671208192963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13781423818051299677": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "2882493407831196579": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "7232326270078161768": ["convolution_gpu_bfyx_gemm_like",2],
+        "17778091287904736965": ["convolution_gpu_bfyx_gemm_like",2],
+        "4113061482402915179": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8732952254407298868": ["convolution_gpu_bfyx_gemm_like",0],
+        "5564881878876582769": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3217674729821898463": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16",273],
+        "15720012960520885263": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3286250915720444467": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1126499865206906037": ["convolution_gpu_bfyx_os_iyx_osv16",524],
+        "10292585962794261197": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "5124080536266387783": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4633923265089466898": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "9090828337597312855": ["convolution_gpu_bfyx_gemm_like",2],
+        "11270855425262923989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8761283252495354972": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "18187345248160481425": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "8270591002934311024": ["convolution_gpu_bfyx_1x1",2],
+        "9152433123828445089": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4880150897829846031": ["convolution_gpu_bfyx_1x1",1],
+        "13531892014108749846": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3622778166646258015": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12745631396795162505": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6948455759869670955": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12721294268595880422": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17439102502195540957": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "1198893312653197535": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18008552719153887303": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "7121708962074176240": ["convolution_gpu_bfyx_1x1",2],
+        "597073780328219388": ["convolution_gpu_bfyx_gemm_like",2],
+        "636447309806530300": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3202085450628781999": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "16768497046700403748": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15167962750603978874": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6267138247577676996": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7895030495055232460": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "17342198739672369885": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2576773809294607971": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4124478505694604763": ["convolution_gpu_bfyx_1x1",2],
+        "3962138884698789654": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10547134120307382906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7223801044761006523": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "377219085802486361": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9435086287598656868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16000753982895054944": ["convolution_gpu_bfyx_gemm_like",1],
+        "10706180189726741161": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16711142379173254655": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3171354702636014224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11007175027950132719": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11194372303922533529": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13705072264927031658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1170380397764345558": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "9576962489937466093": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4445913285957791409": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12026482841341343242": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "17734480671864478402": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5112480593385320005": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6603778920476932267": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6253009218981124949": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16531824466148265247": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "4850497746076450913": ["convolution_gpu_bfyx_gemm_like",2],
+        "14971270053929063630": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6863331059471727622": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "9305861997313663528": ["convolution_gpu_bfyx_gemm_like",2],
+        "14616969385577243225": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "522313477023837056": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12256193738921380409": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "9096495972770198040": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3056212889689424946": ["convolution_gpu_bfyx_1x1",2],
+        "426827405952656362": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "693883892843558363": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "5440983284868981549": ["convolution_gpu_bfyx_gemm_like",2],
+        "12871555773123368130": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11088324811742486481": ["convolution_gpu_bfyx_gemm_like",2],
+        "4398371999113956082": ["convolution_gpu_bfyx_gemm_like",2],
+        "14774814395786139876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12947341728489226671": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16242136888057221574": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13777174566683935109": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5337351591182109481": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "6249875772709398338": ["convolution_gpu_yxfb_yxio_b16",2],
+        "913861052717410566": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1114679698826953542": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14916625550370402883": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "5516518048239364231": ["convolution_gpu_bfyx_os_iyx_osv16",479],
+        "2581414750854621875": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "12327057172281102984": ["convolution_gpu_yxfb_yxio_b16",2],
+        "576164857039495839": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18184621367843960190": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "6341363789473021047": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1325669650629605592": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5106072383853469966": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7800262579057534804": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10935309102034762723": ["convolution_gpu_bfyx_1x1",1],
+        "9453100135791813000": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18299254635579957284": ["convolution_gpu_bfyx_1x1",2],
+        "3856976081672275637": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "6288489890578212082": ["convolution_gpu_bfyx_gemm_like",2],
+        "12065769091972094756": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "4937688558707451907": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6317575981520135028": ["convolution_gpu_bfyx_gemm_like",1],
+        "15531306520021286502": ["convolution_gpu_bfyx_gemm_like",2],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",0],
+        "13809330759308309353": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "13919204232414535363": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "7683334381958571864": ["convolution_gpu_bfyx_gemm_like",2],
+        "3017824560305532066": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "4980217316169616839": ["convolution_gpu_bfyx_1x1",2],
+        "226601879759378771": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "2282123636764935353": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13978649386370395620": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4279062247055842367": ["convolution_gpu_bfyx_gemm_like",1],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "5942742563827424666": ["convolution_gpu_yxfb_yxio_b16",2],
+        "671453551040072499": ["convolution_gpu_bfyx_gemm_like",2],
+        "2737064424879246276": ["convolution_gpu_bfyx_gemm_like",2],
+        "18087356517015630281": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "3336076058264596420": ["convolution_gpu_bfyx_gemm_like",2],
+        "17791024851737594885": ["convolution_gpu_bfyx_1x1",2],
+        "17252589865292797082": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "11727227430687227444": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12370729327673204804": ["convolution_gpu_bfyx_gemm_like",2],
+        "13251091004269229867": ["convolution_gpu_bfyx_gemm_like",2],
+        "2817919813339364130": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17209528805596238905": ["convolution_gpu_bfyx_gemm_like",2],
+        "3538679039078582272": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "5242271874488296527": ["convolution_gpu_bfyx_gemm_like",2],
+        "12768933181342249823": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "2173720698351153121": ["convolution_gpu_bfyx_gemm_like",2],
+        "1120455113299469776": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "11109044986816563101": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17682152011630274259": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "15322609677356616580": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "11756650366229979428": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "18214716801063702171": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10722782762733112118": ["convolution_gpu_bfyx_1x1",2],
+        "15636128989267984459": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13409744191227471760": ["convolution_gpu_bfyx_gemm_like",1],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "57372993988016244": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "13912728810446567016": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "14324166291904435508": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15193403354218116460": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15418732002117930760": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8803037667261582905": ["convolution_gpu_bfyx_gemm_like",1],
+        "9017605508157213607": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17617204422090117691": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2506424495656099512": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16717713360264747483": ["convolution_gpu_bfyx_gemm_like",2],
+        "13121297281694293907": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "3423717644513543253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4325081100430903742": ["convolution_gpu_bfyx_gemm_like",1],
+        "3652414035262499383": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "10532183096485321729": ["convolution_gpu_bfyx_1x1",2],
+        "17811558714592064184": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7977195117668583981": ["convolution_gpu_bfyx_gemm_like",2],
+        "8671491767142900139": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3816674884393241704": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "13364676690016875118": ["convolution_gpu_bfyx_os_iyx_osv16",926],
+        "12680688623162482255": ["convolution_gpu_bfyx_1x1",2],
+        "7824075236081312706": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15159534367247036982": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14074996784220709246": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8021915447462898777": ["convolution_gpu_bfyx_gemm_like",0],
+        "1973819632224480598": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12411075288896909468": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4916769804113823482": ["convolution_gpu_bfyx_1x1",2],
+        "2908156087871187676": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1563987925712579649": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "8303211644727914658": ["convolution_gpu_bfyx_1x1",2],
+        "8614534946699754256": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7800015766976654402": ["convolution_gpu_bfyx_gemm_like",2],
+        "13851851281384416649": ["convolution_gpu_bfyx_1x1",1],
+        "6217542346826403576": ["convolution_gpu_bfyx_1x1",2],
+        "11557032521956761994": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "13199672084171648305": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15530407024531326375": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "3225866261943242708": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10765280349477640969": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "17876939980356283351": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2877521658768725103": ["convolution_gpu_bfyx_gemm_like",0],
+        "6280726148869856021": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10471519687597963116": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8431759922045602848": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8470959792634864749": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13501352378461071771": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11845189428639322474": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "14908477489231326997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7995820969034996638": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "12767065362702304803": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "13512863534076172940": ["convolution_gpu_bfyx_gemm_like",2],
+        "8039645104667120991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18186615266760475767": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "10316451248440741901": ["convolution_gpu_bfyx_gemm_like",1],
+        "563440246018637010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17361714725103230834": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "16182470664818268848": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5668538167635622474": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "7532088618116521936": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "142650579335909103": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9692654253261175490": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "13130001092233798285": ["convolution_gpu_yxfb_yxio_b16",2],
+        "59356084516953804": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "8185193068790365354": ["convolution_gpu_bfyx_gemm_like",2],
+        "3860667078458481972": ["convolution_gpu_bfyx_gemm_like",1],
+        "17225552472711821360": ["convolution_gpu_bfyx_os_iyx_osv16",946],
+        "4759671642533786591": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "14132543442791497311": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15003778740401601065": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5649150695527000655": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16341131728764501904": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8183383667948205424": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4701235352806075765": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "5295693108687178880": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8567667881970262923": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "2777318471329665162": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "13527018660229167386": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3534971503826416049": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "7056030150365552588": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "8943913562339525413": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10128390168715530898": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "4274425737610351312": ["convolution_gpu_bfyx_gemm_like",2],
+        "14999920879568237166": ["convolution_gpu_bfyx_1x1",2],
+        "4378422094110940766": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5284456216115118110": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3833510944499257797": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "2439993891369206440": ["convolution_gpu_bfyx_1x1",2],
+        "7902473777019759045": ["convolution_gpu_bfyx_gemm_like",2],
+        "10322427853063201289": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "15796677813117622429": ["convolution_gpu_bfyx_gemm_like",2],
+        "15497797842820949408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7762916621662364082": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4072951883124129646": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1008476023750261156": ["convolution_gpu_bfyx_1x1",2],
+        "12384317536636082264": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15578456771467281881": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12741762570001404232": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2581014920570427861": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5854093367753757010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15104727000375811836": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13966416504547680082": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16620268338434572068": ["convolution_gpu_yxfb_yxio_b16",1],
+        "875296362957469305": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "708452703070938673": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "7405315582091905378": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12961109385388101976": ["convolution_gpu_yxfb_yxio_b16",0],
+        "998876398773540321": ["convolution_gpu_bfyx_1x1",1],
+        "10463632805036507382": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1485662490111767875": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "5552699731399195573": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "13038533272699602337": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "15586047342916704364": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12013818650853034767": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13314092088416047551": ["fully_connected_gpu_yxfb_ref",1],
+        "11079061135559995449": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7364084475361144967": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6708349666663292171": ["fully_connected_gpu_fb_oi_ref",2],
+        "7441188930428385142": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3830842631023415233": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11073090858361674041": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8611873585228858719": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12908594497114706897": ["convolution_gpu_bfyx_1x1",2],
+        "3047407458812880288": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1123577455191848310": ["convolution_gpu_bfyx_gemm_like",2],
+        "13478984039708550410": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "17737878867906137388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16667887002111125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "16788715253205076219": ["fully_connected_gpu_fb_oi_ref",1],
+        "17559750858236255044": ["convolution_gpu_yxfb_yxio_b16",2],
+        "731825454731954517": ["convolution_gpu_bfyx_gemm_like",2],
+        "10002044609138970243": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7998930863626763670": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "11012427206693842637": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "5977875644245993099": ["convolution_gpu_yxfb_yxio_b16",1],
+        "411016281538345537": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10766317990628501609": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "9737833587413114584": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16161974964662774501": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7571716782558859443": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13291402786934990349": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8177017967170389275": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15811723176266128065": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "17490471699618303993": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "18033349045324117723": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11500205299047837289": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4947788161154370784": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "13585916416233680276": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15069906408448814772": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "8611710048909301596": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3366647240745174769": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5421397731090158382": ["convolution_gpu_yxfb_yxio_b16",1],
+        "577844026691991089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6962062962411903140": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1148949417144436507": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12165079289914715018": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "1208665743495618456": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5788018146987909930": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1354647381212852890": ["convolution_gpu_bfyx_1x1",2],
+        "3914143598803149415": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3216877571075556066": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "13734043898517059207": ["convolution_gpu_bfyx_gemm_like",1],
+        "12004552919019936392": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11921652085115182024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "586134723922638373": ["convolution_gpu_bfyx_gemm_like",2],
+        "10128120599276549920": ["convolution_gpu_bfyx_1x1",1],
+        "9955816463820554626": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10560559646371329711": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18279416225045612845": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2819475920524949313": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7465681710653503161": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "7861119251077361882": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7369109502608631066": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1216021647922150199": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4238885454989272754": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "2816339200381598722": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2524029454785583409": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "1141277975467180549": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "7107677063657303327": ["convolution_gpu_bfyx_1x1",2],
+        "8079914471491171372": ["convolution_gpu_yxfb_yxio_b16",1],
+        "621915374938805401": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "14263790627243107300": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "17770104464900126615": ["convolution_gpu_bfyx_1x1",2],
+        "6859143702528475520": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4098581145478965082": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "11241838709529552265": ["convolution_gpu_bfyx_os_iyx_osv16",858],
+        "6800893510381991731": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "8951040603784899163": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "331661172067077796": ["convolution_gpu_bfyx_1x1",2],
+        "1044978617045366709": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "9728611486592854529": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9835739612255048978": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "3932617680771387232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5498839261395459224": ["convolution_gpu_bfyx_gemm_like",1],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6310724136390087834": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "7880845322716481548": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18161786710055240343": ["convolution_gpu_bfyx_os_iyx_osv16",951],
+        "12388894315292201102": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16772854836230971016": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "6723804327185132790": ["convolution_gpu_bfyx_gemm_like",2],
+        "10294610483561043024": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10486000767830001094": ["convolution_gpu_bfyx_1x1",2],
+        "15576534481170615301": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7223737889890738294": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16341700680310033430": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "10996429218747311159": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8497468192424557348": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1551596771935253711": ["convolution_gpu_bfyx_gemm_like",1],
+        "5895417825685090256": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "7317391511452227268": ["convolution_gpu_bfyx_gemm_like",2],
+        "2147962310424425158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3244675355773468991": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "6712698149192186833": ["convolution_gpu_bfyx_gemm_like",2],
+        "9940761514291929473": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5288793454052261767": ["convolution_gpu_bfyx_gemm_like",2],
+        "2968439898708528834": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3806806400778685133": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16161112020028389294": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2041212737963974230": ["convolution_gpu_bfyx_gemm_like",2],
+        "7590734607006912544": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9502195532658935521": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17270057383792994793": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13830605041347009953": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "3239033622277917802": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "16327433707667075261": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "863057075064640334": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "14273849038400888518": ["convolution_gpu_yxfb_yxio_b16",2],
+        "360064276184684693": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4597873630741623918": ["convolution_gpu_yxfb_yxio_b16",1],
+        "69832608384091511": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7260204889552803221": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14034525799882831106": ["convolution_gpu_bfyx_gemm_like",2],
+        "4169042131399110713": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1089944493540593798": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "6556424924189200804": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "8931469268093714938": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4208702365182336507": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13914239937595549448": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15488532485794545310": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "3231651468686543808": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "241656278218999298": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10956668791040094584": ["convolution_gpu_yxfb_yxio_b16",2],
+        "844576097677576405": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3631332752661975859": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "15421280195211166867": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14823789570149356458": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7104266560248570112": ["convolution_gpu_yxfb_yxio_b16",2],
+        "768720470104458759": ["convolution_gpu_bfyx_os_iyx_osv16",265],
+        "8075261051536686307": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "4142555169083069413": ["convolution_gpu_bfyx_gemm_like",2],
+        "12501619443242354860": ["convolution_gpu_bfyx_gemm_like",2],
+        "9541630719145326121": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "11987564534722442223": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "16789245987103323406": ["convolution_gpu_bfyx_gemm_like",2],
+        "13200834963067135502": ["fully_connected_gpu_fb_oi_ref",1],
+        "826850797666395121": ["convolution_gpu_bfyx_gemm_like",2],
+        "14280056365441354869": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3766048787611884529": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12878346173547852969": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8723078862651154959": ["convolution_gpu_yxfb_yxio_b16",2],
+        "135072053401934228": ["convolution_gpu_bfyx_1x1",2],
+        "8115522418294960470": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15998609626878578708": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2149299205144202701": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5940007433515335594": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1539677456611270609": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4683320313995550908": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15060535689318007173": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "18239740525818575112": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9814647153117279415": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13483088320871913126": ["convolution_gpu_bfyx_gemm_like",1],
+        "6362428985273506890": ["convolution_gpu_bfyx_1x1",2],
+        "3868149953087814447": ["convolution_gpu_bfyx_gemm_like",1],
+        "15932838442166411183": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4165036357594592683": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11971853138084108953": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "11120846960057008937": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "721174714308243785": ["convolution_gpu_bfyx_gemm_like",2],
+        "6290584630172122012": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "13636407347458845915": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15534876725099279666": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9967611023372430532": ["convolution_gpu_bfyx_gemm_like",2],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",1],
+        "4201057957682777280": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17201365233492366678": ["convolution_gpu_bfyx_gemm_like",2],
+        "12311849904266608701": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5738835498104275267": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "10815244730103375973": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5214654427283761256": ["convolution_gpu_bfyx_gemm_like",2],
+        "12526988667216482085": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13077917010686381919": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16828961272295386615": ["convolution_gpu_bfyx_os_iyx_osv16",539],
+        "9695024256541464964": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "12046017161414846599": ["convolution_gpu_bfyx_1x1",2],
+        "17344974951998490453": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8622014461615231500": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15438470456977849772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1868805550246252143": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7846384623429362522": ["convolution_gpu_bfyx_1x1",1],
+        "6388117241933586388": ["convolution_gpu_bfyx_gemm_like",2],
+        "15188570678726970998": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12002302929446578025": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18135307303959376082": ["convolution_gpu_bfyx_gemm_like",2],
+        "7264274394359484318": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "14808759315730413993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3211829722778368758": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "16950925976172895196": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15728009639807698634": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "3370082268529091875": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "7493567975736494003": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "15109847707903824859": ["convolution_gpu_bfyx_1x1",2],
+        "4683575221310726091": ["convolution_gpu_yxfb_yxio_b16",2],
+        "187352687850707150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5401380444992462053": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11052275099129482401": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18417288692814472127": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "3854114166348568039": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "3819990462129075757": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "18249888571553409563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15612334131144235342": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3001162215282339268": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "1082574490068006980": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "16895523130717954500": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14236681916032484600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11823205954749139338": ["convolution_gpu_bfyx_gemm_like",2],
+        "2339864165283480961": ["convolution_gpu_bfyx_1x1",2],
+        "13352000946213986936": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "2247717767819293683": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "4818598834950786080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4444730303823507621": ["convolution_gpu_bfyx_gemm_like",2],
+        "1419073145594317633": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "16335738565228204503": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12193395770362986433": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "17224104246148265328": ["convolution_gpu_bfyx_gemm_like",2],
+        "13182623473102074079": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11157773554806649837": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2722124265986526212": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "8439950151963452285": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "2800949804770763798": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12267555886404772991": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2847490224869294354": ["convolution_gpu_bfyx_gemm_like",0],
+        "9452470718398027950": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "2215570184121152738": ["convolution_gpu_bfyx_gemm_like",2],
+        "5584145249514762750": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5374969798377773063": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "12790570304622911607": ["convolution_gpu_bfyx_os_iyx_osv16",565],
+        "10837496380266058422": ["convolution_gpu_bfyx_gemm_like",2],
+        "13526488884846845330": ["convolution_gpu_bfyx_gemm_like",2],
+        "7913076120244203725": ["convolution_gpu_bfyx_gemm_like",2],
+        "11007944497812650617": ["convolution_gpu_bfyx_gemm_like",2],
+        "16773645387243701837": ["convolution_gpu_bfyx_gemm_like",2],
+        "4049224463072418218": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3774285301357006334": ["convolution_gpu_bfyx_gemm_like",1],
+        "17462996923473002801": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7119182041840303390": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5965451243366505522": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2226745622763268469": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "11614353411428360211": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7565348337952384040": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3782308167335660154": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "8779960552750034544": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3934090072734175564": ["convolution_gpu_yxfb_yxio_b16",2],
+        "880603384896315783": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1658174263018326745": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2917735110073643952": ["convolution_gpu_bfyx_gemm_like",2],
+        "9280279544075738476": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12131461096501477069": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14585000863294748739": ["convolution_gpu_bfyx_gemm_like",2],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "8540111719936129376": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "14712972289919865502": ["convolution_gpu_bfyx_gemm_like",1],
+        "12675840135830047968": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11761545976388416063": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6769524481210107636": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17107083637007906184": ["convolution_gpu_bfyx_gemm_like",2],
+        "8451212914744825089": ["convolution_gpu_bfyx_gemm_like",2],
+        "8469874583725132145": ["fully_connected_gpu_fb_oi_ref",1],
+        "4423866541063606768": ["convolution_gpu_bfyx_os_iyx_osv16",949],
+        "5922142661777925178": ["convolution_gpu_bfyx_gemm_like",1],
+        "11298854310398101852": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12118387933632797428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4933831571091731212": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10718764522366711114": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10912495395422146386": ["convolution_gpu_bfyx_gemm_like",2],
+        "4104562704039821482": ["convolution_gpu_bfyx_1x1",2],
+        "4138968242532400395": ["convolution_gpu_bfyx_gemm_like",1],
+        "15886016297043613632": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15980348884716629349": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12253049204822930675": ["convolution_gpu_bfyx_gemm_like",1],
+        "12389854459474697184": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "16071723603031305677": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15693204620575485046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16944335478353845609": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "7329924387620542330": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "14528180674573671874": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5931972000452008090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1704404203639481753": ["convolution_gpu_bfyx_gemm_like",2],
+        "11291868421122092629": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2923543983518895756": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8506271633579173639": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2759142157812694203": ["convolution_gpu_yxfb_yxio_b16",2],
+        "294153950488131608": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1569043950563130463": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4216958486055161753": ["convolution_gpu_bfyx_os_iyx_osv16",105],
+        "6388086351909447495": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11936530628363072904": ["convolution_gpu_bfyx_gemm_like",1],
+        "15267084369543546013": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8260073247636023575": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9407646138658641974": ["convolution_gpu_bfyx_gemm_like",1],
+        "7840653268996892538": ["convolution_gpu_bfyx_gemm_like",2],
+        "16436006771518788093": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "15770767768674603174": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12713821004129672990": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14316077757957132678": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "7780140599533242850": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12816344078518706065": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3499406509137418124": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "5291817530552764387": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12977678792503377525": ["convolution_gpu_bfyx_gemm_like",1],
+        "9827201026276954165": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14398854364550406668": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15971340431600153619": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "9162862507585693061": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14963449045970262346": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16949056117405140365": ["convolution_gpu_bfyx_gemm_like",2],
+        "8251544171504007740": ["convolution_gpu_bfyx_gemm_like",2],
+        "17764795635957985989": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1752185056297124917": ["convolution_gpu_bfyx_1x1",2],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "5672464491301994292": ["convolution_gpu_bfyx_gemm_like",2],
+        "10991423760161409883": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1898243736289257252": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4617347486560666277": ["convolution_gpu_bfyx_1x1",1],
+        "7273427309587902237": ["convolution_gpu_bfyx_gemm_like",2],
+        "2866656294663853474": ["convolution_gpu_bfyx_1x1",2],
+        "10100237101982273901": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "7561096442572829049": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12903015669020591018": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8941904405273405481": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "16290551573997593168": ["convolution_gpu_bfyx_gemm_like",2],
+        "14944590179685661287": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "787203599734115483": ["convolution_gpu_bfyx_1x1",1],
+        "17089801601582809764": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "8323669961818535927": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12623375499927200341": ["convolution_gpu_bfyx_gemm_like",2],
+        "10141927023849730720": ["convolution_gpu_bfyx_1x1",1],
+        "10883341041912056319": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "6577505360421510286": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "1390379098099686972": ["convolution_gpu_bfyx_1x1",2],
+        "9643408025778914022": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "4982549855424649217": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15295172519920136220": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15750539817895707253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12585864429067596351": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16307464696265537356": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "16218339663410630711": ["convolution_gpu_bfyx_gemm_like",2],
+        "273242667845386507": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "1617135706549276688": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "16725049805030712400": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3738514326459749974": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16767392067294252396": ["convolution_gpu_bfyx_gemm_like",2],
+        "17726079670612220433": ["convolution_gpu_bfyx_gemm_like",2],
+        "11800783548769329949": ["convolution_gpu_bfyx_gemm_like",2],
+        "13598062803968442253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14283458015244508428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17498483343394902796": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "12027202455592387086": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5834825835421819800": ["convolution_gpu_yxfb_yxio_b16",2],
+        "816527348871309530": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8321204816277460837": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10626018319543075871": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7689320135952025041": ["convolution_gpu_bfyx_gemm_like",2],
+        "11891319657803057127": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3308955824300750921": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1095959046309466012": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14184895905338394239": ["convolution_gpu_bfyx_gemm_like",2],
+        "13800760323805415740": ["convolution_gpu_bfyx_gemm_like",1],
+        "5145853681977610916": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "11706446082856895571": ["convolution_gpu_bfyx_gemm_like",2],
+        "15488340031228619748": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "8746621720912032145": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16563030700888982979": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",1],
+        "430132942408244070": ["convolution_gpu_bfyx_gemm_like",2],
+        "11299021927882809469": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9562527071055150197": ["convolution_gpu_bfyx_1x1",2],
+        "1250095876638711647": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "14079654309452583394": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7742126547476513275": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3343020946662226400": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "15746620724134970969": ["convolution_gpu_bfyx_1x1",1],
+        "2670216237572554944": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "15363606233048272809": ["convolution_gpu_bfyx_1x1",2],
+        "937159502066696999": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13735180250757239202": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "17764033613416389758": ["convolution_gpu_bfyx_gemm_like",2],
+        "15779837958180258409": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "7395419333138772074": ["convolution_gpu_yxfb_yxio_b16",1],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "13158449455164143947": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "2782970766870172398": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15449715596597016714": ["convolution_gpu_bfyx_gemm_like",2],
+        "9809458159478958866": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "11224051407822914513": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1788455099959676873": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "8045367391487213749": ["convolution_gpu_bfyx_1x1",2],
+        "11265472910579659280": ["convolution_gpu_bfyx_gemm_like",1],
+        "10308113903347312964": ["convolution_gpu_bfyx_gemm_like",2],
+        "4897690791599638716": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4776685525963461501": ["convolution_gpu_yxfb_yxio_b16",2],
+        "938848188161536107": ["convolution_gpu_bfyx_1x1",2],
+        "16742058312847401360": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "17266121859044814533": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4652136280940317116": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "14764715930784496165": ["convolution_gpu_bfyx_gemm_like",2],
+        "17580363505072477558": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14578867494693499627": ["convolution_gpu_bfyx_gemm_like",2],
+        "9372916528346260712": ["convolution_gpu_bfyx_gemm_like",2],
+        "15148442194461613102": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "5941298590926032148": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7126667413990834481": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "223412492545617963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13621771094745539509": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9871407256481442790": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12391792381149655331": ["convolution_gpu_bfyx_gemm_like",2],
+        "2912098199463107173": ["convolution_gpu_bfyx_1x1",2],
+        "7815650257256675477": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "4461989328775275994": ["convolution_gpu_bfyx_gemm_like",2],
+        "13123709697607309884": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14331554754171207866": ["convolution_gpu_bfyx_gemm_like",1],
+        "12015336418727455195": ["convolution_gpu_bfyx_1x1",2],
+        "13569941893504840630": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "15914107501176673997": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "16597170760061556882": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14050124896329573468": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9485825829394109934": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15048584393463312977": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "15106614232165315070": ["convolution_gpu_bfyx_gemm_like",2],
+        "12913866095318048752": ["convolution_gpu_bfyx_gemm_like",2],
+        "73865742350616903": ["convolution_gpu_bfyx_gemm_like",1],
+        "8943651590146149679": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6469277112054008613": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "18322435770607273817": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6862489207967519978": ["convolution_gpu_bfyx_gemm_like",2],
+        "11051684565403294370": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14262482011051329729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1498389965422474930": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14766477690417085350": ["convolution_gpu_bfyx_1x1",2],
+        "14819324687394700033": ["convolution_gpu_bfyx_1x1",2],
+        "4574541202890196191": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5884802375772043861": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9272405129875537865": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14445031303145992349": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "15310474203328198827": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16992405636352406660": ["convolution_gpu_bfyx_gemm_like",1],
+        "17854208422879910606": ["convolution_gpu_bfyx_gemm_like",2],
+        "17969195175890497912": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9162359935098885411": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9299299311101549958": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "3364141707903132298": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3647203315640064927": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17342758321852264926": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "4438526427135833402": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4717620775314557374": ["convolution_gpu_bfyx_gemm_like",1],
+        "2921118493468368908": ["convolution_gpu_bfyx_gemm_like",1],
+        "852015206582470545": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "130427456111826171": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14045927407431718832": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "10572945270796129630": ["fully_connected_gpu_fb_io_ref",1],
+        "4936961129835214448": ["convolution_gpu_bfyx_gemm_like",2],
+        "14171139920084409181": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11148428797294511280": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13408839571805750778": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14558572801374416278": ["convolution_gpu_bfyx_gemm_like",1],
+        "10005348255972308430": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7585785802379042424": ["convolution_gpu_bfyx_1x1",2],
+        "8876704486585503280": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17636500109629107732": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3240102173773280414": ["convolution_gpu_bfyx_1x1",2],
+        "10174346112533671798": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1527126728636583082": ["convolution_gpu_yxfb_yxio_b16",0],
+        "18121198117765854866": ["convolution_gpu_bfyx_1x1",2],
+        "17216583849049249733": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "9538863363710651909": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3011188207492335920": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5224252360611200472": ["convolution_gpu_bfyx_gemm_like",2],
+        "14680730265621679042": ["convolution_gpu_bfyx_os_iyx_osv16",380],
+        "9642965664913867675": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7397376454528841634": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14742998604680438008": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16661843849495077745": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "6921081008428242060": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "12353956380178079089": ["convolution_gpu_bfyx_gemm_like",2],
+        "10939522663236304689": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13155570698198686211": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9524303276541517389": ["convolution_gpu_bfyx_gemm_like",2],
+        "11939914680143672459": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "9590161922224578217": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2797436491596125131": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16888412539296862194": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "7600034850149968684": ["convolution_gpu_yxfb_yxio_b16",0],
+        "15548847099740441551": ["convolution_gpu_bfyx_1x1",2],
+        "6839795451275143093": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "7869779894480025247": ["convolution_gpu_bfyx_gemm_like",2],
+        "11277866878590984477": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "15133468875250992696": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "9918371346247634545": ["convolution_gpu_bfyx_os_iyx_osv16",184],
+        "9390478179772073718": ["convolution_gpu_bfyx_gemm_like",1],
+        "14258941821319200170": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2447893458816856522": ["convolution_gpu_bfyx_gemm_like",2],
+        "15078168059698267650": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2367452220382767844": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3987482581128838173": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7369903937189508744": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "1997392406402548974": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "10762489947656697207": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5924698731432597368": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "14487842225000203929": ["convolution_gpu_bfyx_gemm_like",2],
+        "5422432655714154738": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3285968426413869315": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11759426200341586247": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "13320473279945887641": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10923480230259977438": ["convolution_gpu_bfyx_1x1",1],
+        "5008350851224686853": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "3814584042139408454": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "13668072006310741601": ["convolution_gpu_yxfb_yxio_b16",2],
+        "994252691216116396": ["convolution_gpu_yxfb_yxio_b16",1],
+        "149810021216592597": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16475247464223458061": ["convolution_gpu_bfyx_gemm_like",2],
+        "4633763257197651352": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16209868158768307271": ["convolution_gpu_bfyx_os_iyx_osv16",919],
+        "10572380563704942622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11411413051626428349": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8058419689646625853": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "7590390572139249734": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12641170321047008726": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "7372956570616880244": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7969441643457570812": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "11110173861174257158": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "720558977788683564": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10544411879329675593": ["convolution_gpu_bfyx_os_iyx_osv16",387],
+        "3438296636411972401": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7814543122045448412": ["convolution_gpu_bfyx_gemm_like",2],
+        "17538518333907257868": ["convolution_gpu_bfyx_gemm_like",2],
+        "2283020548041814543": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7974670633697926450": ["convolution_gpu_bfyx_1x1",1],
+        "14651159827389223108": ["convolution_gpu_bfyx_gemm_like",2],
+        "17224655686568797096": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1703738105910059846": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14215394208930955062": ["convolution_gpu_yxfb_yxio_b16",0],
+        "15914342421266687768": ["convolution_gpu_bfyx_gemm_like",2],
+        "7678226048807568024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13025361884606488732": ["convolution_gpu_bfyx_gemm_like",2],
+        "1308980444055174254": ["convolution_gpu_bfyx_gemm_like",2],
+        "4727628999533330347": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17037462814585846902": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "14149210193687890597": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "13094402291968806996": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14769111376729628572": ["convolution_gpu_yxfb_yxio_b16",2],
+        "501138469231848694": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15645112311663561994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16511749893955141055": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5041922366297242362": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16271970578584267980": ["convolution_gpu_bfyx_os_iyx_osv16",195],
+        "8494385862885499798": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7400937639903461446": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18384215264061386089": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "2369451367723962073": ["convolution_gpu_bfyx_1x1",2],
+        "15269988216002549857": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8846314870152404018": ["convolution_gpu_bfyx_gemm_like",2],
+        "18373951194274306895": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16683169947375504066": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "3101885395179993708": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14315760630997175346": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12331134162344797761": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6254161707168091438": ["convolution_gpu_bfyx_gemm_like",2],
+        "16958329690837977102": ["convolution_gpu_bfyx_gemm_like",2],
+        "7349880498513046830": ["convolution_gpu_bfyx_1x1",2],
+        "1867337342417952506": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4228437925117070319": ["convolution_gpu_bfyx_1x1",2],
+        "897253033961107413": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15206249797344242666": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16210934187492210542": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16567486018945740036": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18436249934780056991": ["convolution_gpu_bfyx_os_iyx_osv16",296],
+        "4104679489383377966": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3713558537660711857": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10961696014697611547": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "2685061316482503878": ["convolution_gpu_bfyx_gemm_like",2],
+        "12487879163561616870": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6340128090694375876": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "10996596479775375564": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "13972357557211413688": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "15050884844653850678": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12681408370704556588": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14082448162400225052": ["convolution_gpu_bfyx_1x1",1],
+        "13636859714649629789": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18071280811713424504": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3212789693085089063": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "698274493570551388": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10034575179959785704": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8913823292181409151": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "5349415632630235233": ["convolution_gpu_bfyx_1x1",2],
+        "4290840152278060614": ["convolution_gpu_bfyx_gemm_like",2],
+        "12319073009094248232": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "17616719165728687438": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15308196586729169691": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2114599010013594942": ["convolution_gpu_bfyx_gemm_like",2],
+        "10782169939706303899": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17806747473167329833": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6438721407426283362": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2878824076934639346": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "9714508918051740792": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "740260423018155343": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "3662747857062156477": ["convolution_gpu_bfyx_gemm_like",2],
+        "13637537549252005181": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",1],
+        "12896226291465522304": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5393510569127725391": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "9906138392975645747": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11730276873446857018": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12794369485239257709": ["convolution_gpu_bfyx_gemm_like",2],
+        "18154019240019929225": ["convolution_gpu_bfyx_gemm_like",1],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "4156384238797998294": ["convolution_gpu_bfyx_os_iyx_osv16",275],
+        "15640466585550013905": ["convolution_gpu_bfyx_gemm_like",2],
+        "3873183249402084406": ["convolution_gpu_bfyx_gemm_like",1],
+        "14193777296032212476": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6776601719651959634": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5159738930501638535": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2863465257341735941": ["convolution_gpu_bfyx_1x1",1],
+        "11634932044447867039": ["convolution_gpu_bfyx_gemm_like",2],
+        "8096131027165540886": ["convolution_gpu_bfyx_gemm_like",2],
+        "15192230303376521834": ["convolution_gpu_bfyx_os_iyx_osv16",863],
+        "900243696733233996": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7927587739463421727": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7307271009495440764": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "15065925414996398951": ["convolution_gpu_bfyx_1x1",2],
+        "10645625090439446714": ["convolution_gpu_bfyx_gemm_like",2],
+        "4252157815622916471": ["convolution_gpu_bfyx_1x1",2],
+        "3135889221160961020": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5886032409392368342": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2740885908397449753": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15201438563802430490": ["fully_connected_gpu_fb_oi_ref",1],
+        "15609860394182767048": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11311839946200066200": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16910952799476896905": ["convolution_gpu_bfyx_gemm_like",2],
+        "1597770067928214597": ["convolution_gpu_bfyx_1x1",1],
+        "1802510952374368682": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10893432143734884603": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",287],
+        "4792351255949877935": ["convolution_gpu_bfyx_gemm_like",2],
+        "1662588605309237309": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12076058470574246054": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "14020956765444878761": ["convolution_gpu_bfyx_gemm_like",2],
+        "17585206779958265260": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "9482749589540764069": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12053562297742437099": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6883767567034259453": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17725637691681205907": ["convolution_gpu_bfyx_gemm_like",2],
+        "14446688005815492020": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14026537760442360645": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "12564687330941036772": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "5581428998642936688": ["convolution_gpu_bfyx_1x1",2],
+        "12055647521556218046": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "4752129805031267391": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14689423748560749566": ["fully_connected_gpu_fb_oi_ref",1],
+        "15216108478837665623": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "6323083153920795679": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "4615708568396290002": ["convolution_gpu_bfyx_1x1",2],
+        "3935750066315595083": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18215430801133520364": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13503555814874045782": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "5600807544955072308": ["convolution_gpu_bfyx_gemm_like",2],
+        "8652128863605749877": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4521622755195947253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",281],
+        "11732321796147239597": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9285202897230250613": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10070051133200561606": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13520876347177213888": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "5671289201458690944": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "486816652607164926": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16033512206711124104": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3112648799276134590": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17477062954520561609": ["convolution_gpu_bfyx_gemm_like",2],
+        "208915399644127739": ["convolution_gpu_bfyx_gemm_like",2],
+        "5596408142536691534": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11744368351982723504": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5632958791318880428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6159729136505378486": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11158789938857558596": ["convolution_gpu_bfyx_1x1",2],
+        "9263784636194609884": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11942424927004660476": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5558136691773431495": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "6423354409210936959": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11020315012951440351": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "6525496212688896740": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2891736961665476908": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "12229574562535756991": ["convolution_gpu_bfyx_gemm_like",2],
+        "10488269059469838160": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "9890700023578477203": ["convolution_gpu_bfyx_gemm_like",2],
+        "3541538046227217664": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15217573782563469232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11587239927319376658": ["convolution_gpu_bfyx_gemm_like",2],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "15985980444340490463": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1044889231088602677": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "14264584839702225855": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1186545671730357033": ["convolution_gpu_bfyx_os_iyx_osv16",1024],
+        "12393385058735194260": ["convolution_gpu_bfyx_gemm_like",2],
+        "6469003096932778978": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "11787674847611032323": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15646081020506130125": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8463615810239412362": ["convolution_gpu_bfyx_1x1",2],
+        "9735280865199145311": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3265415000818832667": ["convolution_gpu_bfyx_gemm_like",2],
+        "5374664689223295796": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "7859659993155959174": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4436244774193918646": ["fully_connected_gpu_fb_oi_ref",1],
+        "2204178900998688268": ["convolution_gpu_bfyx_gemm_like",2],
+        "10718639465064821919": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7602222004475424358": ["convolution_gpu_bfyx_gemm_like",1],
+        "1161304401293419103": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12686015414958770329": ["convolution_gpu_bfyx_gemm_like",2],
+        "17051718450741106678": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7624259732952222597": ["convolution_gpu_bfyx_gemm_like",2],
+        "2162882863309264684": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "14349625788399542568": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17945600479510493949": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3383222668132648804": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7210854698870587826": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1298596164164324360": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7162155897369277782": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5317076157086789437": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9900658671239107502": ["convolution_gpu_bfyx_1x1",2],
+        "12757611260347801001": ["convolution_gpu_bfyx_os_iyx_osv16",1071],
+        "11799179287124317845": ["convolution_gpu_bfyx_gemm_like",1],
+        "14339479547451422762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13576010631084066792": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1299545313185409227": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "13970935346154374605": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "4353583636655606632": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16020916772006653269": ["convolution_gpu_bfyx_1x1",1],
+        "5596441339918073261": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "733956743303342862": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6880746917399866285": ["convolution_gpu_bfyx_gemm_like",2],
+        "6992073477131490452": ["convolution_gpu_bfyx_gemm_like",2],
+        "15865753975271064117": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15275978123703636572": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "4313392430539923574": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "16748662918272106932": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1841155673858789206": ["fully_connected_gpu_fb_oi_ref",2],
+        "3870539490799697188": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "8174040194088942964": ["convolution_gpu_bfyx_os_iyx_osv16",945],
+        "4839205075057964902": ["convolution_gpu_yxfb_yxio_b16",2],
+        "738850098651678143": ["convolution_gpu_bfyx_os_iyx_osv16",542],
+        "11657946392097042544": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "952318454591754214": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "14501815053459103515": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3684792790546138809": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12032580551021546487": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6709083009339039603": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16828388628569377322": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10816637153861630723": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "11637325834858582585": ["convolution_gpu_bfyx_gemm_like",2],
+        "9795194069954915563": ["convolution_gpu_bfyx_gemm_like",2],
+        "1507504848332592003": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16426655160932259558": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4644580321919256401": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "8527069404111265568": ["convolution_gpu_bfyx_os_iyx_osv16",434],
+        "7280502812960451465": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16532743776403877084": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5657471280535146301": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2085467192625870436": ["convolution_gpu_bfyx_gemm_like",2],
+        "7168028033666253263": ["convolution_gpu_bfyx_gemm_like",2],
+        "12303905514885913537": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15612797125081819500": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9452094307760005150": ["convolution_gpu_bfyx_gemm_like",2],
+        "13862199647000195451": ["convolution_gpu_yxfb_yxio_b16",2],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "8372855367097191197": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14544219140091420262": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17228615388053183744": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16606674008248299103": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "8726274320876550785": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12892265081710606252": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7826406759309418010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8078028207842958010": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16723478941106779069": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "6631103268546309714": ["convolution_gpu_yxfb_yxio_b16",2],
+        "231083216612056805": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9951951467222189282": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12755692101476964677": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "2295659951331099829": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17019474731460049248": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16579057939215877904": ["convolution_gpu_bfyx_gemm_like",2],
+        "17408275657360833363": ["convolution_gpu_bfyx_1x1",2],
+        "11279789373735965856": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16765994345605657100": ["convolution_gpu_bfyx_1x1",1],
+        "10308431308942416781": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "3499106702307464480": ["convolution_gpu_bfyx_gemm_like",2],
+        "8541982562061181756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13553263424160050064": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "6808980404170272597": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "13234872695521811652": ["convolution_gpu_yxfb_yxio_b16",1],
+        "921209976738626097": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17321934232458063571": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2042946928570163140": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17542035367134614728": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5649082203775427830": ["convolution_gpu_bfyx_gemm_like",2],
+        "13634686998599681086": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13223232888554043645": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3531786338249174486": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "6156831095718536092": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "2242602888499888844": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "14363025045807200040": ["convolution_gpu_bfyx_os_iyx_osv16",541],
+        "16567638487719493784": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1902656726461670148": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9291397338108903174": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10635659193402005820": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "17264010982688979937": ["convolution_gpu_bfyx_1x1",2],
+        "16924006268301179157": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "4819131094439732065": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7748233564411787605": ["convolution_gpu_bfyx_gemm_like",2],
+        "4435224497850514394": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9601412379897937608": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "11972290239275366299": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2553539191926275121": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8866716292621164810": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12932635875905153141": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "2133236128630074068": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3571330754519284334": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7693459946348737411": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3574585436812909168": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4561778392194061215": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3701838669605585798": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14466032674083938714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6192955702438301372": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "15783429395177379897": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12160764253455777655": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "6685985905221810743": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2102169562353089558": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8220168481755031959": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "13810735868750326592": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "4883588237027084166": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8219179055259247644": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15548854462657362014": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17769159396346490074": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7263796835299019284": ["convolution_gpu_bfyx_gemm_like",2],
+        "3477539135137665170": ["convolution_gpu_bfyx_gemm_like",2],
+        "5303970743736042689": ["convolution_gpu_bfyx_gemm_like",2],
+        "10049294964307823692": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14159596290442764023": ["convolution_gpu_bfyx_gemm_like",1],
+        "5429130923188159806": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "5682190700442712936": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "13092232276822302626": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "18357544235608006954": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "11315238071192463859": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "14116800584981026541": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "12900949103593247293": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13760645810144930270": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "15597317305719116351": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7082007579524697455": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1591199515536783245": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "9778670810863940690": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16105073808368936420": ["convolution_gpu_bfyx_gemm_like",2],
+        "13183380647506951324": ["convolution_gpu_bfyx_gemm_like",2],
+        "13754540732991287617": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "13101474064130881526": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13680926356824317761": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "18029395208219861440": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13483175684542464385": ["convolution_gpu_bfyx_os_iyx_osv16",148],
+        "3441335188113424896": ["convolution_gpu_bfyx_gemm_like",2],
+        "11267495078361954131": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1520529227443340435": ["convolution_gpu_bfyx_gemm_like",2],
+        "8300290944865904942": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12561177248542630652": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11300415556407923335": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3503893875515897267": ["convolution_gpu_bfyx_gemm_like",2],
+        "7241156141838776126": ["convolution_gpu_bfyx_gemm_like",1],
+        "15138641310139776109": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "15483343060578660278": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5211831143687501130": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "6051877311645456194": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "5115007207028125638": ["convolution_gpu_bfyx_gemm_like",2],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "17965267346493659374": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9999553425206328238": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "530973311459168543": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15988378956341507229": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6648876837655776653": ["convolution_gpu_bfyx_1x1",2],
+        "9056038338958199256": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "10645057595080511813": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3835387982926010630": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4802014352392262053": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10577357333308653027": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",2],
+        "1375084615110147615": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1237262535285717993": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "14532519639619315651": ["convolution_gpu_bfyx_gemm_like",2],
+        "7027962921778599989": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18235209540858013173": ["convolution_gpu_bfyx_1x1",2],
+        "6970636030494405299": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "12375919467924385618": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "10531218595816974659": ["convolution_gpu_bfyx_gemm_like",2],
+        "15757308772667178999": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1450861513159359637": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5062815196458225737": ["convolution_gpu_bfyx_os_iyx_osv16",487],
+        "5464801565268066541": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7209217811135076623": ["convolution_gpu_bfyx_gemm_like",2],
+        "11666226259183201584": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6142707387281700290": ["convolution_gpu_bfyx_gemm_like",2],
+        "17397600088595751782": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5525691792821548743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11910900938442124765": ["convolution_gpu_bfyx_gemm_like",2],
+        "1626430741965136732": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17823133607491820214": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "13575423234109624706": ["fully_connected_gpu_yxfb_ref",2],
+        "480310470450900836": ["convolution_gpu_bfyx_gemm_like",2],
+        "4656068024153891922": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14616413139039308367": ["fully_connected_gpu_fb_oi_ref",2],
+        "3571959174116404960": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5898740235388207878": ["convolution_gpu_bfyx_1x1",2],
+        "8767817856303586064": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14971506154649368216": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12339692995143159283": ["convolution_gpu_bfyx_gemm_like",1],
+        "11666250400445971335": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "16425665058951535484": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "12522495848240087966": ["convolution_gpu_bfyx_gemm_like",1],
+        "3509487327001107638": ["convolution_gpu_bfyx_gemm_like",2],
+        "10055247339012492459": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "2356785927637873692": ["convolution_gpu_bfyx_gemm_like",2],
+        "959666756751640874": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11002875874008272679": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "10512507780534402341": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4056971751486746551": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "1582751548472076534": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5032841266226405428": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14553813154800569861": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12825407709419526493": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9525535670799618110": ["convolution_gpu_bfyx_gemm_like",2],
+        "14289082888174784976": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "7692849839965441330": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "5541365322085427177": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17546090415334871175": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10617442099961865960": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8509024280905303927": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17649961873981897621": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5924271203978892761": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10135458965276110244": ["convolution_gpu_bfyx_1x1",2],
+        "9440117898128288296": ["convolution_gpu_bfyx_gemm_like",2],
+        "4137755981477177003": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "9340159617983543624": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "10009559358571629502": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13571587312517912280": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "2705031521944165712": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "10128143628088846123": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "12210280332071091209": ["fully_connected_gpu_fb_oi_ref",1],
+        "2761862049452027986": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3374410641320310726": ["convolution_gpu_bfyx_os_iyx_osv16",904],
+        "11626402549863483301": ["convolution_gpu_bfyx_os_iyx_osv16",648],
+        "9250030880535336888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",596],
+        "10323345824599612614": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "8108933468437926367": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "10117092543913369513": ["convolution_gpu_yxfb_yxio_b16",2],
+        "708747442142592697": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "6334639534663495263": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "16075006181495932250": ["convolution_gpu_bfyx_gemm_like",2],
+        "16996895381161031110": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "5308128387928804050": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "12868739680413736657": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "1230262279011217327": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10015368609444108372": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17958575161092859465": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12619739385084492771": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7349168847581850619": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14801210545983960599": ["convolution_gpu_yxfb_yxio_b16",2],
+        "488798544312719183": ["convolution_gpu_yxfb_yxio_b16",2],
+        "415826393421796195": ["convolution_gpu_yxfb_yxio_b16",2],
+        "403634422724914329": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "15117880293418979489": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "13754408679115174221": ["convolution_gpu_bfyx_gemm_like",2],
+        "7878605163588288309": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "2321148334382088982": ["convolution_gpu_bfyx_gemm_like",2],
+        "3563872903821081702": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "143667964449473415": ["convolution_gpu_yxfb_yxio_b16",0],
+        "7469107606686458209": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1822096761703761792": ["convolution_gpu_bfyx_1x1",2],
+        "14943031375539993004": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14307705501349750896": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12107262410635772120": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8094920912208664820": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13111122805945249561": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2052010432187897741": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "9153779186876518773": ["convolution_gpu_bfyx_gemm_like",2],
+        "7314288062932060863": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "11822555173696078282": ["convolution_gpu_bfyx_gemm_like",0],
+        "11612209645710419427": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9062781751511609244": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "12937333118472722002": ["convolution_gpu_bfyx_gemm_like",2],
+        "13282951481330978659": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "14671212883301405408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5250257911846706612": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3806131437010910920": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8881906040469243354": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5941852872160795604": ["convolution_gpu_bfyx_gemm_like",2],
+        "13598984763955239116": ["convolution_gpu_bfyx_gemm_like",0],
+        "9120377367517042357": ["convolution_gpu_bfyx_1x1",2],
+        "10576856554114055028": ["convolution_gpu_bfyx_gemm_like",2],
+        "7105279481103494151": ["fully_connected_gpu_fb_oi_ref",1],
+        "15643135666029727865": ["convolution_gpu_bfyx_gemm_like",2],
+        "2803569867265035123": ["convolution_gpu_bfyx_os_iyx_osv16",1029],
+        "7720153213673170931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4010419602093863685": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7274647463152753603": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17030051116023319382": ["convolution_gpu_yxfb_yxio_b16",1],
+        "794499287296495726": ["convolution_gpu_bfyx_1x1",2],
+        "4802009650745059499": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9999955037598579164": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "138379779469699309": ["convolution_gpu_bfyx_gemm_like",2],
+        "14968401410355925289": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11132679855317294753": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4622514167765722873": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "15600841108426475615": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6104380778870471127": ["convolution_gpu_bfyx_1x1",2],
+        "3114210363452108737": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "1895560603400089814": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11516184047320372729": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16044646335477470657": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15848096609835347542": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8898095926967052382": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5758133252959371492": ["convolution_gpu_bfyx_gemm_like",1],
+        "17822988909419777692": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15006321421735686121": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "9434143681116089888": ["convolution_gpu_bfyx_gemm_like",2],
+        "17712558058168648648": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "994842991399671507": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18393312550272875456": ["convolution_gpu_bfyx_1x1",2],
+        "8163000689380461611": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14612206111651511130": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10065714384927707796": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9319254979377483709": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2915777749501772828": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9588943054777767098": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2567046336192437734": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "13851025202247070979": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13380637319403400851": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3321251856445833973": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2668729552208169959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13325287783358291692": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7863319552895863063": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1771347579022727189": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16781187505186394353": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "383721620126444793": ["convolution_gpu_bfyx_gemm_like",2],
+        "981803877097233095": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7603319690872333930": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3784684114139223050": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13731797251725972855": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17228810554159747400": ["convolution_gpu_bfyx_gemm_like",2],
+        "7940369586324090841": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "518733575377143679": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10324485383646920518": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "14066675688397331406": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6730447536124542965": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8537824547722216155": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6344600111737335616": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15493488989417521388": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "2603233376890892194": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12600479027568241746": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1379758215293949563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17893696934478535385": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11498084465186986412": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "13565691057064774487": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "2373860353284525265": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "11411580529501121244": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "9569522500959727054": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6371463287631658789": ["convolution_gpu_bfyx_gemm_like",2],
+        "10330180429524641331": ["convolution_gpu_bfyx_gemm_like",2],
+        "2984726467649419856": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "6450532136308941035": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "4740864135937875560": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13330734840729670622": ["convolution_gpu_bfyx_gemm_like",2],
+        "10106454449619141260": ["convolution_gpu_bfyx_1x1",2],
+        "1594829714229111215": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "727216855315869048": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5044721291675005144": ["convolution_gpu_bfyx_1x1",2],
+        "8712136292276123857": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "9909564412554801760": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9250410390663336388": ["convolution_gpu_bfyx_gemm_like",1],
+        "8609939102588915855": ["convolution_gpu_bfyx_gemm_like",2],
+        "17310409067211414565": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "3219408878901707426": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10022487076451608714": ["convolution_gpu_bfyx_gemm_like",2],
+        "1338705434700924127": ["convolution_gpu_bfyx_1x1",1],
+        "2737352811173555281": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13471752029049484143": ["convolution_gpu_bfyx_gemm_like",2],
+        "14311888412221174224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16015963261509760799": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "11376953876369788199": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3463206409786541741": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15217077412685024074": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6792281830591233968": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9504349455215835807": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "10546430708947911124": ["convolution_gpu_bfyx_gemm_like",0],
+        "13602140021189675477": ["convolution_gpu_bfyx_gemm_like",2],
+        "13369751385866224286": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6367371992814643260": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10894058425957901202": ["convolution_gpu_bfyx_1x1",2],
+        "16158139166784964096": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "11356842300444410831": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "6260684231055362504": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1088710562928089772": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14799579913711096584": ["convolution_gpu_bfyx_gemm_like",1],
+        "14383657211047876136": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "3163833930628348446": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4282668574670785584": ["convolution_gpu_bfyx_gemm_like",2],
+        "8640150341228170279": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "9767294641786972359": ["convolution_gpu_bfyx_gemm_like",2],
+        "14896875712028630045": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "9999543693712389402": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4099828484175044842": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "2172999245833525797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13558687084677943158": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "16362857896338778056": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "2161052921317193579": ["convolution_gpu_bfyx_gemm_like",2],
+        "12259611546528256409": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9439431829175743345": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11857037689248685487": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10722677916294015259": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "4447065688824381344": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "13456967132681889167": ["convolution_gpu_yxfb_yxio_b16",2],
+        "104765009188090817": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1643122514049603104": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "2781309272856442321": ["convolution_gpu_bfyx_1x1",1],
+        "12978370505631031751": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15529757761327002288": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "7870154008378361670": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8409488188696700816": ["convolution_gpu_bfyx_gemm_like",1],
+        "3499645386058307669": ["convolution_gpu_bfyx_gemm_like",1],
+        "16729849855476690294": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "10783046011829953095": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2609346307827449622": ["convolution_gpu_yxfb_yxio_b16",2],
+        "314054598858070952": ["convolution_gpu_bfyx_gemm_like",2],
+        "12776081190690731910": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "9314293064351558241": ["convolution_gpu_bfyx_gemm_like",2],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",2],
+        "11155444222714959508": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "5008541841892687897": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1270307036687208396": ["convolution_gpu_bfyx_gemm_like",1],
+        "5534071639452404412": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4278280309700908015": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "11872943152839631823": ["convolution_gpu_bfyx_gemm_like",2],
+        "8717456809499914445": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10254566865260697753": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13809046727894108358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3534874664568214253": ["convolution_gpu_bfyx_1x1",2],
+        "13717351126657739994": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6911215749850066204": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "10432365444137108781": ["convolution_gpu_bfyx_gemm_like",2],
+        "10009796094612770326": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "11971736882960844905": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3567607339495161307": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14916236722843741326": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16955829428734830876": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9696168324381001582": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8859895010324601937": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "11537166370263116277": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9589942627115344216": ["convolution_gpu_bfyx_os_iyx_osv16",102],
+        "4261192887643002603": ["convolution_gpu_bfyx_gemm_like",2],
+        "14041970415787494000": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12643643553436503069": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7440953406601377619": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7134419022268272901": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "14362876471450307424": ["convolution_gpu_bfyx_1x1",2],
+        "16292848987976256449": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "11280672272221124024": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3442845193734599342": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7958443549125799229": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "9105127035114339269": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15800447082078291243": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "5149303626508247520": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18302892230881285207": ["convolution_gpu_bfyx_gemm_like",1],
+        "13077012961563218195": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5642822685234782052": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "3748621266324665764": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15915715422308762909": ["convolution_gpu_bfyx_os_iyx_osv16",274],
+        "17908444616754154471": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2909347733581487795": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13075579052866074866": ["convolution_gpu_bfyx_gemm_like",2],
+        "5209144536543011657": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8740268039366363321": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13938466156916423478": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "17430994325635361377": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9293682866734263821": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5459463503840817402": ["convolution_gpu_bfyx_1x1",2],
+        "15675903059949404837": ["convolution_gpu_bfyx_1x1",2],
+        "3805667660217578518": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10171373375072694210": ["convolution_gpu_bfyx_1x1",2],
+        "8809017515482311843": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "14517191894006411358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1241355545294259810": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "378292944207609677": ["convolution_gpu_yxfb_yxio_b16",2],
+        "248133885018839814": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14697908554930995949": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "2362092095402043749": ["convolution_gpu_bfyx_gemm_like",2],
+        "5355283113999405036": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8553491894663686698": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "8113660920207936963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3034482898462686729": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "6214677989814002369": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16626502801066228405": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15838113905712517735": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3750338655074082587": ["fully_connected_gpu_yxfb_ref",0],
+        "12867590715338247144": ["convolution_gpu_yxfb_yxio_b16",1],
+        "302694026179841870": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "11955992313739654625": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "10864011008000364415": ["convolution_gpu_bfyx_1x1",2],
+        "16527840366172690992": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18101509783610609787": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4079026972040047969": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "4754967381316623440": ["convolution_gpu_bfyx_gemm_like",2],
+        "16966477504105790279": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7183578232279711009": ["convolution_gpu_bfyx_gemm_like",2],
+        "4708035980731751007": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14115742296883450319": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16135569134646688251": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "12028665820838352309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2343310394723780653": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7451154080124553318": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1900375942069325499": ["convolution_gpu_bfyx_1x1",2],
+        "5788323787676797805": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "8002233052700666718": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "9726913113016874092": ["convolution_gpu_bfyx_gemm_like",2],
+        "230697511447695268": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9775648000771985077": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10278515360013727367": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10524079700393212963": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12228610148087508521": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "14918482938530107806": ["convolution_gpu_bfyx_gemm_like",2],
+        "10262850086265676378": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7289535479247584635": ["convolution_gpu_bfyx_1x1",2],
+        "17377204616846724192": ["convolution_gpu_bfyx_gemm_like",2],
+        "402932154499003993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5179013491581036103": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16293101831324587788": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12362290144183018227": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15155676074658242659": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "6114241186364821679": ["convolution_gpu_bfyx_os_iyx_osv16",856],
+        "13954821927253849036": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "1287490919205560806": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "17567012866823126402": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11724225282274130518": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3889456478817717702": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11087413527078604815": ["convolution_gpu_bfyx_gemm_like",2],
+        "3398322619007806698": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "637115537820955017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9763754389347695094": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9383182168277796969": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "11782188262748842182": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "17951403431757222177": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "16587061389996963349": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "2273992727647793692": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4387041763614917736": ["convolution_gpu_bfyx_gemm_like",1],
+        "12455871938978342189": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15059549186302099880": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "7624476043779763605": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "18218755616248669884": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "3957253946857103590": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13369603621524676979": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "17817043205731836063": ["convolution_gpu_yxfb_yxio_b16",2],
+        "512446355173752600": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3735753364888836383": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6650607472019166205": ["convolution_gpu_bfyx_1x1",2],
+        "10995886682834858002": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "4030004320208162301": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11262989876326061679": ["convolution_gpu_yxfb_yxio_b16",0],
+        "748236447365453504": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7861234698413147249": ["convolution_gpu_yxfb_yxio_b16",2],
+        "970768445746568749": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "5269172622193124300": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "4562591438007476419": ["convolution_gpu_bfyx_gemm_like",2],
+        "15293835051273372438": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8015885733173521367": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "17077815973022307612": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9111988592015450418": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",1],
+        "10447947790216991304": ["convolution_gpu_bfyx_gemm_like",2],
+        "5721096633060535553": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12458921031453334451": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15816807118780455948": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1237920404306733800": ["convolution_gpu_bfyx_gemm_like",1],
+        "9834941975457910988": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6208201398783088425": ["convolution_gpu_bfyx_gemm_like",2],
+        "8265982881100325775": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8638074773026771425": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6846760451124717672": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11604111639041106489": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "5465400164581117113": ["convolution_gpu_bfyx_gemm_like",2],
+        "17723621158215826108": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "5359510718430377298": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4403753181729432604": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "7600296832974673294": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3911736807429733938": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1701412735970485849": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "8527055001340219573": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2562815925396318565": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12068974703657294908": ["convolution_gpu_bfyx_1x1",2],
+        "13323186744342557015": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2111669705686676421": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "9659837320293869285": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14188157670969097508": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2457671437276780303": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11583985978586657985": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "6018481198468872040": ["convolution_gpu_yxfb_yxio_b16",2],
+        "835053793432636355": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14337168375989245254": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9127827617126714860": ["fully_connected_gpu_fb_oi_ref",2],
+        "17170858505976681742": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8131879590716437354": ["convolution_gpu_yxfb_yxio_b16",2],
+        "182115051096556835": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "1982176363226079588": ["convolution_gpu_bfyx_gemm_like",2],
+        "13145474177271090694": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "16065744898134487748": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "17174919737114915467": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "5420215220876162902": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2129742884686884642": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4241055784642339756": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "7933217973342728190": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5950285227163574810": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "7868973874302246233": ["convolution_gpu_bfyx_gemm_like",1],
+        "10232429887105708502": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6439316331231400868": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16882092367103683293": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16988275131627316108": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "6928136130626403937": ["convolution_gpu_bfyx_gemm_like",2],
+        "9731370183088819573": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4416793079965040181": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9526266653688168429": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15879385408480411034": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16913004986170202203": ["convolution_gpu_bfyx_gemm_like",2],
+        "7181154048972884375": ["convolution_gpu_bfyx_gemm_like",2],
+        "1122856374602590533": ["convolution_gpu_bfyx_1x1",1],
+        "677249604491773387": ["convolution_gpu_bfyx_gemm_like",2],
+        "990199360818917334": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4455369117448405874": ["convolution_gpu_bfyx_1x1",2],
+        "3369689552455141157": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5802466130040230797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "973966345068677905": ["convolution_gpu_bfyx_1x1",2],
+        "2728938624042183713": ["convolution_gpu_bfyx_gemm_like",2],
+        "14526262781657292025": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12984970933638742657": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "9629460794894999510": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "601430670855155006": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "12439827609628473238": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17634966178519099371": ["convolution_gpu_bfyx_1x1",2],
+        "5770286476124511234": ["convolution_gpu_bfyx_gemm_like",1],
+        "17158401628206867933": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "2114232149447438823": ["convolution_gpu_bfyx_gemm_like",2],
+        "7650862961269327235": ["convolution_gpu_bfyx_1x1",2],
+        "3940619509778739158": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7843180034077880658": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5195511638783481084": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "17718424965214606218": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7287107719392705356": ["convolution_gpu_bfyx_os_iyx_osv16",4],
+        "14835309921389262864": ["convolution_gpu_bfyx_1x1",2],
+        "14199158130218117084": ["convolution_gpu_bfyx_gemm_like",2],
+        "394778201589371681": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "14807357397951247957": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4171374172427814762": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17281826959243966826": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8262469434265124590": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16541970206584576833": ["convolution_gpu_bfyx_gemm_like",2],
+        "12004628115138530335": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "11988546375476924356": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "15901724303713479611": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3863816884636503247": ["convolution_gpu_bfyx_gemm_like",2],
+        "3725013268198063198": ["convolution_gpu_bfyx_1x1",2],
+        "11942736969933408358": ["convolution_gpu_bfyx_gemm_like",2],
+        "14897384423894125457": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12011606174372081253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1697248235682953135": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8907982643256296667": ["convolution_gpu_bfyx_1x1",1],
+        "8010456208258134834": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6538526180355194359": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18359731130169236059": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6097086855988597139": ["convolution_gpu_bfyx_1x1",2],
+        "9059418187274548462": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9942099207256025216": ["convolution_gpu_bfyx_gemm_like",2],
+        "1680468564927032670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4261215727469154244": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4346591404756288097": ["convolution_gpu_bfyx_gemm_like",2],
+        "14001048251986195179": ["convolution_gpu_bfyx_gemm_like",2],
+        "6726099352298108756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6997971129340865650": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "10225565543636007389": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13094289895577333088": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15599983560500910839": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18131954418490925431": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "1919535500129437217": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5539793555189956907": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "8532217744217419503": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "7581174843529024536": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "6624079551747071383": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16364494883229084045": ["convolution_gpu_bfyx_gemm_like",2],
+        "4723919313760470311": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11324651029379152442": ["convolution_gpu_bfyx_1x1",2],
+        "3358616456137155015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "264466528528245004": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9062774198518904260": ["convolution_gpu_bfyx_gemm_like",2],
+        "11872464450773754851": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4732226322522411018": ["fully_connected_gpu_fb_io_b8_f8_vload",0],
+        "13247725847475539658": ["convolution_gpu_bfyx_1x1",2],
+        "4168273493370024327": ["convolution_gpu_bfyx_1x1",1],
+        "860852602930021016": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15190508870639648203": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15581997249051127645": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "6136232084354304563": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2744566213784972700": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2412069259085234287": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2294800960010879540": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "11841034668170849494": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11682041005124075890": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3711525118850629466": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14281154151197472605": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9099720270958987421": ["convolution_gpu_bfyx_1x1",2],
+        "5568753513029409478": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "9433875341212148858": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "5735703235236456131": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "9815961128076948768": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "9480653639044390919": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "4465701487417893814": ["convolution_gpu_bfyx_gemm_like",2],
+        "4329042569031331949": ["convolution_gpu_yxfb_yxio_b16",2],
+        "543472136359161929": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3780320160034246719": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "1646638859396929303": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3224352307778512793": ["convolution_gpu_bfyx_gemm_like",1],
+        "789202969657820559": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12988253829685880778": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "8978764053524288494": ["convolution_gpu_bfyx_gemm_like",0],
+        "6935581283700404601": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3988024997010367546": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "9436893310034662243": ["convolution_gpu_bfyx_gemm_like",2],
+        "16934879647229234163": ["convolution_gpu_bfyx_gemm_like",2],
+        "2527276292172180386": ["convolution_gpu_bfyx_gemm_like",2],
+        "2826762745628486040": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "12782932626966309185": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "12864558900883069118": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1786732163438555728": ["convolution_gpu_yxfb_yxio_b16",0],
+        "2149582237161177965": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "12279771749366327372": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "2173867324489962689": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13530377297525480029": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12495003066477974474": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "9787359208094141129": ["fully_connected_gpu_fb_oi_ref",1],
+        "6709883527730513363": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7172357320005702833": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16139615240471264488": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "6331794802915121861": ["convolution_gpu_yxfb_yxio_b16",2],
+        "726985753660756762": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "8784358107340738205": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15428591250165788477": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "2501411300945696806": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7378840969627751667": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14568618538516685994": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "6126579157025017808": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8125500765566111746": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9663847096617096629": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2832268621630415376": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "3924212595662208655": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8585205898894363799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7139714914586273766": ["convolution_gpu_bfyx_os_iyx_osv16",517],
+        "10184417796355593956": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7605652809856543211": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "8267783192628619295": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10358170616931426647": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14403132596827435096": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "14770895149190975433": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2931988747601319855": ["convolution_gpu_bfyx_1x1",2],
+        "17891499682354369344": ["convolution_gpu_bfyx_gemm_like",2],
+        "4306052436602921234": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14483314305369207554": ["convolution_gpu_bfyx_1x1",2],
+        "1930929857644673460": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12992061224471212714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11359409533744011242": ["convolution_gpu_bfyx_gemm_like",2],
+        "17054207561525574617": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9488453013746383896": ["convolution_gpu_bfyx_gemm_like",2],
+        "6479042072492268780": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10981374120597916521": ["convolution_gpu_yxfb_yxio_b16",1],
+        "18424611729838147994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7099035779223341587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9207799012657103903": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10811224523636009881": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15866935886105967122": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8700574100180128776": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16270745071180354612": ["convolution_gpu_bfyx_gemm_like",2],
+        "10760094119259477688": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "2920322372993101148": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9169935203300589222": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15222260213708019662": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15154700439767512396": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "15710826363434377015": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7332664632757815486": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "2706024586717944825": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6363788325163726004": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14847662630748580880": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7075659071934895087": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9694701402170070080": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7540655869186258692": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15223164574152266895": ["convolution_gpu_bfyx_1x1",2],
+        "1309867416606346543": ["convolution_gpu_bfyx_os_iyx_osv16",195],
+        "16632786413927045192": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12672995204641007004": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "12213908871711628660": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3815222814331650224": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10833423331830484028": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1470933384474984858": ["convolution_gpu_bfyx_1x1",2],
+        "151851883170419907": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1798440805196304745": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15352064186447212862": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4861982518177129729": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "9195732599757736182": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "2722062599746670336": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11070968498963106073": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "856949500975232838": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "12489973984967168447": ["convolution_gpu_bfyx_1x1",2],
+        "18180655791734632264": ["convolution_gpu_bfyx_gemm_like",2],
+        "13621339501067135142": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17705992851440826353": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2575631797904040925": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "17724604495865223459": ["convolution_gpu_bfyx_gemm_like",2],
+        "1711220333751274603": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",2],
+        "12771841901357553928": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17084977396231597605": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "7992444232916226938": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13434576226708227155": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "10437367877444543776": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4849343880559509889": ["convolution_gpu_bfyx_1x1",2],
+        "11086464266772450142": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15464554714318666871": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11799180632798787251": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17704040183891532914": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "11597391933877736800": ["convolution_gpu_bfyx_gemm_like",2],
+        "4021558014531645922": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "9078447949109922472": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12831123539633580270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",1035],
+        "108442764389420633": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3788462090984291082": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "15551338663759394064": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10538010212480716275": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "14173531787508017136": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12213354854947437262": ["convolution_gpu_bfyx_1x1",2],
+        "12867177334690636800": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "16585502133291740543": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11070446574652704629": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9477562342190423343": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4023281997496669037": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9967101735808367971": ["convolution_gpu_bfyx_1x1",2],
+        "2057158988261512114": ["convolution_gpu_bfyx_1x1",2],
+        "14263055580023018733": ["convolution_gpu_yxfb_yxio_b16",2],
+        "688897645422834994": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6232452664016831516": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15715029280006557222": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2840794055129352139": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14905520834426630145": ["convolution_gpu_bfyx_gemm_like",2],
+        "16096353398003405565": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15997145184054496085": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11585430081839020501": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15820005010263193043": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3141886504884887200": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "5572956736535433608": ["convolution_gpu_bfyx_1x1",2],
+        "6294240435687565243": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "3427691447288240419": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14731054961557547253": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6673966852801136416": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "1212319037405620223": ["convolution_gpu_bfyx_gemm_like",2],
+        "6703148006012061136": ["convolution_gpu_yxfb_yxio_b16",2],
+        "382811963722907674": ["convolution_gpu_bfyx_gemm_like",2],
+        "12617736879671137111": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10419440621736450993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17211590259060346125": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13328583512713703122": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12741457056869452536": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "415232223198122046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14985236276429954162": ["convolution_gpu_bfyx_gemm_like",2],
+        "15487730714504758208": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "4264284648458489052": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "4299773714254046691": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5321698540631249776": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "7615563770941714046": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6664482192233202590": ["convolution_gpu_bfyx_gemm_like",2],
+        "3155353791103196186": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3894121333485095575": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16172528828198474326": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "9275371801303143499": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "3928356751040028375": ["convolution_gpu_bfyx_gemm_like",2],
+        "11640225461345567929": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "5955575949957198434": ["convolution_gpu_bfyx_gemm_like",1],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "669771152920944125": ["convolution_gpu_bfyx_gemm_like",2],
+        "60509335250891515": ["convolution_gpu_bfyx_gemm_like",2],
+        "14510495923021693109": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5762878778443755104": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4491380839102267034": ["convolution_gpu_bfyx_gemm_like",1],
+        "16437124655147660375": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17487594336237597163": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",527],
+        "8638227907054657946": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16574710115918192418": ["convolution_gpu_bfyx_gemm_like",2],
+        "467975197394411990": ["convolution_gpu_bfyx_gemm_like",1],
+        "8130920994920685157": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17647962002015093887": ["convolution_gpu_bfyx_gemm_like",2],
+        "6203765709597125063": ["convolution_gpu_bfyx_gemm_like",1],
+        "4714289593698160876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13609660900720370993": ["convolution_gpu_bfyx_1x1",1],
+        "10415046594066474634": ["convolution_gpu_bfyx_gemm_like",2],
+        "12624762527234542946": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9441060601228656341": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",1],
+        "15809639778580769565": ["convolution_gpu_bfyx_gemm_like",2],
+        "17587625589456309495": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16728762255357411770": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "8069537351442302814": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "2147896649835170790": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1142968634734769401": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5245526691775741296": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6300691162962736560": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "16653412888821076903": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "5074273865983613482": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9375272277044782377": ["convolution_gpu_bfyx_gemm_like",0],
+        "10465119306486335226": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17436550598696178210": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1854612313463195535": ["convolution_gpu_yxfb_yxio_b16",0],
+        "14758040027936817208": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9280431727790048190": ["convolution_gpu_bfyx_1x1",2],
+        "2807516818436584831": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "1638858323987412931": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18035673326929466074": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "417352773179383568": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4339711224604149541": ["convolution_gpu_bfyx_gemm_like",2],
+        "5266313052389515491": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7946262362930618714": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14835641172229643545": ["convolution_gpu_bfyx_gemm_like",2],
+        "5150256051921098637": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14001406016806064079": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10899110544832584656": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6981294059746462667": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "9316082753126682958": ["convolution_gpu_bfyx_gemm_like",2],
+        "9996142812492415452": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13835859040765465258": ["convolution_gpu_bfyx_gemm_like",1],
+        "13472577372534605883": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5762631094740444698": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11450378244355788918": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "11031358859656806724": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "2920017342405650206": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11624226818593966530": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6461637373691101671": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "215512025430490450": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "17264608538692763688": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3522383297921565178": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "6070612528095353265": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5592526760253524303": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "8106738346643994005": ["convolution_gpu_bfyx_gemm_like",2],
+        "10656486867659934705": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "3341302541468955849": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16111630594575598044": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17439276474731842060": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10069896554844445748": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "3837190939606792435": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "739676584505475609": ["convolution_gpu_bfyx_gemm_like",2],
+        "4306881509708040723": ["convolution_gpu_yxfb_yxio_b16",2],
+        "178353385245384751": ["convolution_gpu_bfyx_os_iyx_osv16",969],
+        "1466455001976212160": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "15226556774612169126": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18255227391100087860": ["convolution_gpu_bfyx_1x1",2],
+        "16120120950870908964": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2219693989290882970": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2770397466252831892": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "4091702228990140696": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10509933181132310969": ["convolution_gpu_bfyx_gemm_like",2],
+        "166091609652531090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "47872288115972996": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2128612971571865547": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "8317673282128335201": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "12473600360154597915": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "18094592431313771787": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8787438180071123604": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2930898141522848681": ["convolution_gpu_bfyx_1x1",2],
+        "6445721440921372329": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17012832508134584917": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12264240305528403865": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4433497906256257606": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16",419],
+        "16267682394077585279": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "7752913515036871482": ["convolution_gpu_bfyx_gemm_like",1],
+        "11069983292783104310": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "4958835037528182801": ["convolution_gpu_bfyx_1x1",1],
+        "13390197134230598693": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10217182484138821482": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16780457022162749898": ["convolution_gpu_bfyx_gemm_like",0],
+        "5445584581720919223": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "17238880534517721334": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "12480527132372884168": ["convolution_gpu_bfyx_1x1",1],
+        "3106922888635965020": ["convolution_gpu_bfyx_gemm_like",2],
+        "10672380526821947133": ["convolution_gpu_bfyx_os_iyx_osv16",339],
+        "16027853590391209100": ["convolution_gpu_bfyx_gemm_like",2],
+        "6260115080574637314": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5497751772699578150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12294364015803004575": ["fully_connected_gpu_fb_io_block_fp16",2],
+        "5940337324384948573": ["convolution_gpu_bfyx_gemm_like",2],
+        "438528596970898721": ["convolution_gpu_bfyx_gemm_like",1],
+        "3976197003067656339": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2263637493894079492": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15813044197987178947": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "17433340097721474017": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9515771738501683": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12590922530749026871": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "6182829358839578529": ["convolution_gpu_bfyx_gemm_like",2],
+        "13803790014241837327": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13367787254519749641": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10178145641713631806": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1454014148777456006": ["convolution_gpu_yxfb_yxio_b16",2],
+        "49948277487706148": ["convolution_gpu_bfyx_1x1",2],
+        "16921939234324970069": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "867868384380428650": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1068155851494601726": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1157947252370351851": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2621495864635590903": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2929190644951986399": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "6115915509370042166": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "1338581414403268264": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12850195004093999773": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9532499374173117612": ["fully_connected_gpu_fb_oi_ref",1],
+        "12061567381160185735": ["convolution_gpu_bfyx_1x1",1],
+        "8444259010311137762": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "9178915201681884122": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17045386022302353268": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6107700818115209289": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15141893564826036993": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8984436655107983227": ["convolution_gpu_bfyx_gemm_like",2],
+        "9920155432685318259": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "14614844213016502202": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "17766628441954343001": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "7823257556787476006": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2973337989445169388": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3010520839193613803": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8000679297338683619": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6557428245898292304": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2598267743388306204": ["convolution_gpu_bfyx_gemm_like",2],
+        "15820359925623438341": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14840851809642905875": ["convolution_gpu_yxfb_yxio_b16",2],
+        "447943521999310356": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16361932270527364507": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "4422642146063042868": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17549411807772646930": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "9974905660671605427": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17010172246526353957": ["convolution_gpu_bfyx_1x1",2],
+        "18148431787172327554": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7338932272767555117": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5941092474669713339": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11604794601689380990": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "598214270378842167": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "824380206255396866": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10753540518493641553": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8613740762403897614": ["convolution_gpu_yxfb_yxio_b16",2],
+        "142329025839464842": ["convolution_gpu_bfyx_1x1",2],
+        "6286349307417232815": ["convolution_gpu_yxfb_yxio_b16",2],
+        "883436333317162926": ["convolution_gpu_bfyx_1x1",2],
+        "12801481303602178879": ["convolution_gpu_bfyx_gemm_like",2],
+        "5109636469531439569": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",2],
+        "12430677767405883160": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "1173986078589662704": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11563334365673075610": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9161616741940575576": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10477588607457125173": ["convolution_gpu_bfyx_gemm_like",2],
+        "4723643671527109645": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10387844339156517393": ["convolution_gpu_bfyx_1x1",2],
+        "16549854027697846882": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "16839741351990811959": ["convolution_gpu_bfyx_gemm_like",2],
+        "16243196137456624852": ["convolution_gpu_bfyx_gemm_like",2],
+        "13020331397245585657": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "14318347197994059448": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4251673416603443503": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6677367803113594603": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15298221796479574600": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5546447512898130524": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13854845390344305906": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16184142990117192433": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12703696322769371912": ["convolution_gpu_bfyx_gemm_like",2],
+        "2920840796593281126": ["convolution_gpu_bfyx_gemm_like",2],
+        "2064464435352777854": ["convolution_gpu_bfyx_gemm_like",1],
+        "2066731703492755469": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "15943141845766932879": ["convolution_gpu_bfyx_1x1",2],
+        "18221867262301937903": ["convolution_gpu_bfyx_1x1",1],
+        "16956263773967652552": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "4927360358387344983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15438530452161762045": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12051595062513871723": ["convolution_gpu_bfyx_1x1",2],
+        "14288463473159113326": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15555083739490354527": ["convolution_gpu_bfyx_gemm_like",2],
+        "5479590921345335946": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "14421061973479991516": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "18076121920579110076": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "16998662249038174039": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "1240102354814495870": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "707979507145930311": ["convolution_gpu_bfyx_gemm_like",1],
+        "14795626641169374231": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "8512711227383782401": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "4738743763536059708": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6578804773136886939": ["convolution_gpu_bfyx_gemm_like",2],
+        "18180491232489548313": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "3572202652824023801": ["convolution_gpu_bfyx_os_iyx_osv16",1031],
+        "17546566148752689536": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "3816979903860227798": ["convolution_gpu_bfyx_gemm_like",2],
+        "4790960977352818689": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "4868400250190558111": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6631816968511312100": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3509502334639215181": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "3697631094971930011": ["convolution_gpu_bfyx_gemm_like",2],
+        "1467428583618467133": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "9335016444137172241": ["convolution_gpu_bfyx_gemm_like",2],
+        "12255528292506999241": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "8127853538569353431": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "1484007449719260391": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "9056812077282494074": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "7127306913758514626": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "14630499010941056793": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10209532888121442060": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "17354626928258309128": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1569111625440278287": ["convolution_gpu_bfyx_gemm_like",2],
+        "213518984547400496": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "384240534894352154": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "4732699611696731044": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15739274921308457528": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "7059729537732609153": ["convolution_gpu_bfyx_os_iyx_osv16",858],
+        "15743461017318513847": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "2778141440914991349": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4588420324030315321": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "885661562948597780": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15687441275464931484": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15094664469997373662": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "5358925179582853152": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "3610579553304450107": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "3047710665820732705": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8363432163596927598": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "11758765408733113291": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "5050495757462452653": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8399477322910720113": ["convolution_gpu_bfyx_gemm_like",2],
+        "8921169563466511475": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3727142736386026852": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "12571532345206950176": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "9552615241912277692": ["convolution_gpu_bfyx_gemm_like",2],
+        "16628180201355989101": ["convolution_gpu_bfyx_os_iyx_osv16",884],
+        "17808913959977434594": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "7000486794832106857": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "286393043958202995": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "6258191734224827354": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "18043745678739016406": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "17946191056428828467": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "6263019986730305851": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9546990560009724329": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "16462602383546733062": ["convolution_gpu_bfyx_os_iyx_osv16",1035],
+        "1350953652678789564": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "330278641539729021": ["convolution_gpu_bfyx_gemm_like",2],
+        "5912451559447635837": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "1774158624592967937": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16985912104363932350": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2908856453997530641": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "17059095074211347838": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "14668529234172928874": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "16863960779539003201": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "14346703182362139650": ["convolution_gpu_bfyx_gemm_like",2],
+        "5522698342845820411": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "10114123606924808948": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "14515066741400300669": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5280450544965361875": ["convolution_gpu_bfyx_gemm_like",1],
+        "15025260753866131193": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "604467633591545941": ["convolution_gpu_bfyx_gemm_like",2],
+        "1500571771538985941": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "7256947320128669983": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "16256970928603738516": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16426179645101678763": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "10269005969451576527": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "6745633232989303110": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "12364947728685604753": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "12173409033330010794": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "419783127503173016": ["convolution_gpu_bfyx_os_iyx_osv16",564],
+        "11128727891847758901": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "1093840152689636371": ["convolution_gpu_bfyx_gemm_like",1],
+        "9714770878761308566": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15083602050538795803": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7527121935101118719": ["convolution_gpu_bfyx_gemm_like",2],
+        "5116562847410288642": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "5385395378424322451": ["convolution_gpu_bfyx_gemm_like",2],
+        "11602830611894444581": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5589350202160007768": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16761856644242716357": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "2096167792705935744": ["convolution_gpu_bfyx_gemm_like",2],
+        "3433877094202077256": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "12610004507393467447": ["convolution_gpu_bfyx_gemm_like",2],
+        "15939740070666326125": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8422541638844255768": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13082713280504953535": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8961544327690568390": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11883632480024839484": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5769404877199637961": ["convolution_gpu_bfyx_gemm_like",2],
+        "3296059171653513862": ["convolution_gpu_bfyx_gemm_like",2],
+        "9968496035529786888": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "3664842151999943": ["convolution_gpu_bfyx_gemm_like",1],
+        "11539652577193034099": ["convolution_gpu_bfyx_os_iyx_osv16",300],
+        "2524233418633897945": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "3743573500773847162": ["convolution_gpu_bfyx_os_iyx_osv16",506],
+        "12541834857357563605": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "3813463368918975003": ["convolution_gpu_bfyx_gemm_like",2],
+        "7530197659550301431": ["convolution_gpu_bfyx_gemm_like",2],
+        "9700098364581157575": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4269447138276727632": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "1061595672605627170": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "7569785094993085356": ["convolution_gpu_bfyx_gemm_like",2],
+        "11504777464995699839": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "8224143262995973449": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1501328995320618233": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "12069726772532946193": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "16197538586133639338": ["convolution_gpu_bfyx_gemm_like",1],
+        "237384442106085756": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "15972830392998437739": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "15421166985948480394": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "6794427012971589670": ["convolution_gpu_bfyx_gemm_like",2],
+        "2420425134749678611": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "8050798452111667069": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5824801192141531089": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "3668065353749623655": ["convolution_gpu_bfyx_os_iyx_osv16",1022],
+        "4251588408225461731": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "11113256687741667688": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16582132711225619740": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16957170318200599740": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12644942072153919043": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17715478364817621621": ["convolution_gpu_bfyx_gemm_like",2],
+        "2854124603710900850": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "9380980604821454646": ["convolution_gpu_bfyx_gemm_like",1],
+        "1879844536951785808": ["convolution_gpu_bfyx_gemm_like",2],
+        "1086052166358768751": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "861813331533609605": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "17434429579652310107": ["convolution_gpu_bfyx_gemm_like",2],
+        "14011124615649605281": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16440449399643706863": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11733721371402545268": ["fully_connected_gpu_fb_io_ref",2],
+        "15816540550252147706": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8285478622349266483": ["convolution_gpu_bfyx_os_iyx_osv16",914],
+        "13979227237506927267": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "10492401059875127091": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "12992194515157698316": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "15891662883560480723": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "4600698444492242585": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "7157064096682175957": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "14221578799010900252": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "9213886570531053949": ["convolution_gpu_bfyx_os_iyx_osv16",429],
+        "1285313118947640320": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15430549683839591544": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "15858485865603722138": ["convolution_gpu_bfyx_gemm_like",2],
+        "2116524516810466877": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "12182468247297592907": ["convolution_gpu_bfyx_gemm_like",1],
+        "6949539207944972855": ["convolution_gpu_bfyx_gemm_like",2],
+        "5582107298039488951": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "11773726534842908728": ["convolution_gpu_bfyx_os_iyx_osv16",187],
+        "14385185911482960528": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "8844619836383523698": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "14548629377527143409": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13366059704398720237": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "18349087959351486710": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "15868648764972133201": ["fully_connected_gpu_fb_oi_ref",1],
+        "15322019609805777935": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "4451257789691974239": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1480287432874335824": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "13657522194775317201": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "8032685176029570383": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "4334698056820320220": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "15378707205730840765": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5977248663249062384": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3170785962566427770": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2710485608298356329": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2585176064846114298": ["convolution_gpu_bfyx_gemm_like",2],
+        "18337975902615310907": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "6768322540857745605": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "13657774210341324470": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3072535365860940873": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14230197617570499447": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "10049329759351957685": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "10305912614137623024": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3896848534552901221": ["convolution_gpu_bfyx_gemm_like",2],
+        "7405835196787288054": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "7020655100877544328": ["convolution_gpu_bfyx_gemm_like",1],
+        "13174363822969694054": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13232269620066140073": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "13608239208821071914": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "17026284168840448378": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7866128397931438774": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "8264178890341675354": ["convolution_gpu_bfyx_os_iyx_osv16",1033],
+        "10317038568333963064": ["convolution_gpu_bfyx_os_iyx_osv16",694],
+        "2180753144963020203": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15271492161940795681": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "13272818502368975319": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12281346074445607180": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "570683988452622223": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "8451179695288093195": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "2085738943081638802": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15563546888345388359": ["convolution_gpu_bfyx_gemm_like",2],
+        "8525389694584008001": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "2481005139798378616": ["convolution_gpu_bfyx_os_iyx_osv16",1062],
+        "574359978358296617": ["convolution_gpu_bfyx_gemm_like",2],
+        "15764181772410734606": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9217386935739152562": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "12161602271403760008": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "9758907700230386910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8707189142909022305": ["convolution_gpu_bfyx_gemm_like",2],
+        "1375259485223819020": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9053383117071470496": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6261121070004228939": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1112828128944231163": ["convolution_gpu_bfyx_gemm_like",1],
+        "5843679089588930933": ["convolution_gpu_bfyx_gemm_like",2],
+        "11083777913844441475": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "1923745286075356181": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "3827177373408316820": ["convolution_gpu_bfyx_gemm_like",1],
+        "5488168361113140102": ["convolution_gpu_bfyx_gemm_like",1],
+        "7982628452987720190": ["convolution_gpu_bfyx_gemm_like",2],
+        "8140242320379485952": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "15615172858007002100": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "1653274345637156919": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15210302033167762581": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "17392347485675658099": ["convolution_gpu_bfyx_gemm_like",2],
+        "6574971185849732667": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "4202645222013675478": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "1231806423322813287": ["convolution_gpu_bfyx_gemm_like",2],
+        "166267183356660549": ["convolution_gpu_bfyx_gemm_like",1],
+        "8281212003098870446": ["convolution_gpu_bfyx_gemm_like",0],
+        "14650273075211365393": ["convolution_gpu_bfyx_gemm_like",1],
+        "6928835003016610382": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "12012860334670244716": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "15646774522467486699": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "15126660425728872065": ["convolution_gpu_bfyx_os_iyx_osv16",200],
+        "18265901700619296616": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1653438360841004980": ["fully_connected_gpu_fb_oi_ref",2],
+        "6103824715103416420": ["convolution_gpu_bfyx_gemm_like",2],
+        "15409755591665753258": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "16946947983339327902": ["convolution_gpu_bfyx_gemm_like",2],
+        "6431838057506760173": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "14705457019471647279": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "6801897580177846120": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "8656468860180713379": ["convolution_gpu_bfyx_os_iyx_osv16",472],
+        "16801553481899627402": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "5339358831190803597": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11732742421854164761": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6854611304056079417": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "14568560907026487922": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "4184442166820068862": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "17967188184891337660": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16108759090923335184": ["convolution_gpu_bfyx_gemm_like",1],
+        "5109770354438894645": ["convolution_gpu_bfyx_gemm_like",2],
+        "4691552892932405676": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7331552952865138030": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3332334993503432420": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1422402723172447295": ["convolution_gpu_bfyx_gemm_like",1],
+        "14292252222828824305": ["convolution_gpu_bfyx_gemm_like",2],
+        "18180820925685532104": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "16695020005258780885": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "6129884455218252024": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "13772598362521854438": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "9940908487812223059": ["convolution_gpu_bfyx_gemm_like",2],
+        "4753055238892504599": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "15803050672115583478": ["convolution_gpu_bfyx_gemm_like",1],
+        "3154903035376733831": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9191832520273617003": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "15778834188130183853": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "7557446085365037177": ["convolution_gpu_bfyx_os_iyx_osv16",686],
+        "6213353364768643062": ["convolution_gpu_bfyx_gemm_like",2],
+        "4035015193331696438": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "4368522743441422202": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "10308175009371219583": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "15974241934088373021": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "17254775053427612466": ["fully_connected_gpu_fb_oi_ref",1],
+        "447683677378974131": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "14244966672894707129": ["convolution_gpu_bfyx_gemm_like",2],
+        "7946776740333736799": ["convolution_gpu_bfyx_gemm_like",2],
+        "15496355513574200965": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "9239048433297419320": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12971833748980664090": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "1810943242998123550": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9767355861002822967": ["convolution_gpu_bfyx_gemm_like",2],
+        "14211903923555028634": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "13713406612642090169": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "14923692894655929923": ["fully_connected_gpu_bf_io_gemm",0],
+        "11805311302922325617": ["convolution_gpu_bfyx_gemm_like",2],
+        "9788704336046308724": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "15383553612351941890": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16590030963319267708": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "10797908931694274013": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "14150012830816329527": ["convolution_gpu_bfyx_gemm_like",2],
+        "6739799137687789012": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "5389189982064081933": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "17442035600389810700": ["convolution_gpu_bfyx_gemm_like",2],
+        "10890975553758439233": ["convolution_gpu_bfyx_gemm_like",1],
+        "15713964605078748923": ["convolution_gpu_bfyx_gemm_like",2],
+        "12409554044517232554": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "9796347091019799053": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "17508987219281192918": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "8670512344429807851": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13951781924205611716": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "17596685300497748803": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13206826317378863148": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "13727585908419292912": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6443517114667332732": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "6996679663761370444": ["convolution_gpu_bfyx_gemm_like",1],
+        "13915749401892931804": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "16596028606733932975": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "4198666727524342442": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "16125365972873290572": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "9040046051053703359": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "11918018989601427118": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6714886136800883594": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "7312862821818362095": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "8357109553923988018": ["convolution_gpu_bfyx_gemm_like",2],
+        "8730097760819044515": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "6218328594667952152": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3939977982577786175": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "14991602704357959545": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "16146350476627599543": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "18112958483003382733": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "8507854696766492454": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "5556023021504556658": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "17740553615487239243": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8684867236134349888": ["convolution_gpu_bfyx_os_iyx_osv16",193],
+        "13607830451968188080": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "18235067315439611192": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "13603318842632052764": ["convolution_gpu_bfyx_os_iyx_osv16",380],
+        "17243576882981097341": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4965629769516591986": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "13537323999534292650": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "6233612563637601101": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "10173283505468233128": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "3214253333840552610": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6478054912653910426": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "2458592904274981909": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "9154705094446538279": ["fully_connected_gpu_fb_oi_ref",0],
+        "15916505622570323098": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "6008613375871089139": ["convolution_gpu_bfyx_os_iyx_osv16",755],
+        "6513705142577622089": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "1766961036311612128": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2862999234347597091": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "977617597166653416": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17182558720652199559": ["fully_connected_gpu_fb_io_ref",1],
+        "17854138024884397413": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "8426489532875918560": ["convolution_gpu_bfyx_gemm_like",1],
+        "17869697579874327192": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10928995765778560784": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "770376597027620107": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "17683350638672326642": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "17790954200356837750": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "10482582307328548806": ["convolution_gpu_bfyx_os_iyx_osv16",3],
+        "14349335089732252796": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11033507346101404633": ["fully_connected_gpu_fb_oi_ref",2],
+        "13775529405693629438": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "9459869325970475576": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "2542506456395240890": ["convolution_gpu_bfyx_gemm_like",1],
+        "4499586349553581439": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "16067605128297748820": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16510194749934323304": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "12952160708294444403": ["convolution_gpu_bfyx_gemm_like",2],
+        "11541706477255587105": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "1059505639883914386": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1771663698943903325": ["convolution_gpu_bfyx_os_iyx_osv16",175],
+        "17771487895874668302": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "8762901342272872498": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "412314676462573090": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7367814057959247537": ["convolution_gpu_bfyx_gemm_like",2],
+        "8728178019712933221": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "1192709652314183388": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12427490329663434604": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "13170031087212196468": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8140094412609934765": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "12381377111003298809": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1760690277175249985": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "16710651492402564794": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "13447028922679236865": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "18026468427978643933": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "17285699593273891901": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "1471837664358450291": ["convolution_gpu_bfyx_gemm_like",2],
+        "9963817056423168830": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "13388424034634316547": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "2780358937598873103": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "587350550384936211": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12534001599784153836": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "12626014184575881530": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "4082046235109198108": ["convolution_gpu_bfyx_gemm_like",1],
+        "2317476796706098254": ["convolution_gpu_bfyx_gemm_like",2],
+        "1561225943337590599": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "14296771090926462138": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "10853161782230763798": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8390953788659916133": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2310549887200001260": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4854802313728023001": ["convolution_gpu_bfyx_os_iyx_osv16",621],
+        "11264412030568042996": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "18277685132620834972": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "5906083739416582743": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8707484843981694525": ["convolution_gpu_bfyx_os_iyx_osv16",1021],
+        "2947753291378607664": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "17585852525746136080": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "2303141161423252932": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14039055710777697188": ["convolution_gpu_bfyx_gemm_like",2],
+        "3919577663893354177": ["convolution_gpu_bfyx_gemm_like",1],
+        "16578265652036967656": ["convolution_gpu_bfyx_gemm_like",2],
+        "7958459862276998225": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5643908654122573882": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like",2],
+        "9259437778054905599": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "7272538316511343863": ["convolution_gpu_bfyx_gemm_like",2],
+        "12245096462203481681": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "1465692634334679413": ["convolution_gpu_bfyx_gemm_like",2],
+        "13439272015824246074": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "15781220232431782560": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2590380836212070761": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10437861085319472289": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "707449835235490641": ["convolution_gpu_bfyx_gemm_like",1],
+        "11473442921040533207": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "14667209474639064623": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15984373369388044924": ["convolution_gpu_bfyx_gemm_like",2],
+        "1486768204660092247": ["convolution_gpu_bfyx_gemm_like",1],
+        "8360628955300060520": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "12808154347573074859": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1131384986902172221": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "5831419373611158773": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11051434650031832658": ["convolution_gpu_bfyx_gemm_like",1],
+        "3623695848220673001": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "2172636954267255416": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9631481972809246378": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "14522844693999581518": ["convolution_gpu_bfyx_os_iyx_osv16",750],
+        "12136458184046915563": ["convolution_gpu_bfyx_gemm_like",0],
+        "13115589642140732066": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "2654793073145467058": ["convolution_gpu_bfyx_gemm_like",2],
+        "1967810052096853804": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14068780861332616363": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "6796998865297819946": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3314459110790355757": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "13193571607788569533": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15197248015210313435": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15911434513425038508": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4534480875955599254": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11253790393313445931": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7472330881076141262": ["convolution_gpu_bfyx_gemm_like",1],
+        "2044363708106765326": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "11338906515425639970": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "789359733867650915": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "16173557782125372935": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "14133509766683767462": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15411474884532403722": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "1116274074896622552": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "7995002764260542332": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15380105196319354141": ["convolution_gpu_bfyx_os_iyx_osv16",481],
+        "17732250360268013336": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "1622731194539871461": ["convolution_gpu_bfyx_gemm_like",2],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",1],
+        "7918742312252115870": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "12990341489637414845": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15115440616185035720": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4927139127938739019": ["convolution_gpu_bfyx_gemm_like",2],
+        "13131740479277027362": ["fully_connected_gpu_bf_io_gemm",1],
+        "1081962464388501987": ["convolution_gpu_bfyx_os_iyx_osv16",873],
+        "15882969506682501496": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "1362239912535573615": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "2230884858122788172": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12771805545455650546": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "2007192658799516915": ["fully_connected_gpu_bf_io_gemm",1],
+        "6489645404977288242": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "4229105529069729944": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "8241070786700614317": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10743138314323119696": ["convolution_gpu_bfyx_gemm_like",2],
+        "18136135457402651842": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "768765852586619095": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16396393355098283060": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12392988351482826871": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15399245700982979379": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10485534959656860449": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "13083981648347252910": ["convolution_gpu_bfyx_os_iyx_osv16",511],
+        "2248628426797793532": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "2498920887656279332": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12864338805958186191": ["convolution_gpu_bfyx_gemm_like",2],
+        "5124645583449732785": ["convolution_gpu_bfyx_gemm_like",2],
+        "15024023281204917061": ["convolution_gpu_bfyx_gemm_like",2],
+        "11331539079347079374": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "11857822504978122919": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "10309504812060596568": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "11665313746896806563": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "14911763273270477925": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2096021095904820251": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12010294231983179604": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "7877256119877423528": ["convolution_gpu_bfyx_os_iyx_osv16",489],
+        "18243018097656671503": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "8061914949376516780": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11314436000791223218": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "9516102312850256675": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "14188045559946481097": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "2418288192668085805": ["convolution_gpu_bfyx_gemm_like",2],
+        "15669490019428002270": ["convolution_gpu_bfyx_os_iyx_osv16",986],
+        "3215659303601163167": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14261214737408786954": ["convolution_gpu_bfyx_os_iyx_osv16",621],
+        "7336911146060959485": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "5796500397424307442": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "1364546124782880196": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "11062100629646715785": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11234976958917093838": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "7058458405375602606": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "13654408396081513312": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "1593086572473375988": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "13387766889016280910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5966963943739041502": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "13267743753217317315": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "16431857516454692096": ["convolution_gpu_bfyx_os_iyx_osv16",185],
+        "15914512645931208899": ["convolution_gpu_bfyx_gemm_like",2],
+        "16103653667647559851": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "17025997656996518171": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10935410906182995784": ["convolution_gpu_bfyx_gemm_like",1],
+        "15749335301736571135": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6362453779168658462": ["convolution_gpu_bfyx_os_iyx_osv16",273],
+        "481328129206881674": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "14541063954080306476": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "13210604117940125947": ["convolution_gpu_bfyx_os_iyx_osv16",120],
+        "11058082057683584650": ["convolution_gpu_bfyx_gemm_like",2],
+        "6750269489578112382": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17774979615691038302": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "156456996459945842": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "3219239043521617253": ["convolution_gpu_bfyx_gemm_like",2],
+        "10973647655853229395": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "1521992965089360209": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4145496852718466030": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2317409971670298599": ["convolution_gpu_bfyx_os_iyx_osv16",501],
+        "10966081583785531511": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13745327504866194229": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "390943380079040179": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "2999825793036702585": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4692951005189464579": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",1],
+        "9905716283229191208": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4860019935631927113": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "1835975757316320402": ["convolution_gpu_bfyx_gemm_like",2],
+        "18265020664540913473": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1444256562477852389": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "8510044123592842725": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "10689303050557631712": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "390219891876240081": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7072606962946873975": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "10838972820886273680": ["convolution_gpu_bfyx_gemm_like",2],
+        "15682441855379046778": ["convolution_gpu_bfyx_os_iyx_osv16",130],
+        "16833854122884184025": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12014527187730671229": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "9525853014023664813": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "14719871224178118299": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14880517974968280393": ["convolution_gpu_bfyx_gemm_like",2],
+        "10990741293315393791": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9696588462876533517": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "11964639701912187118": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10429613013253088132": ["convolution_gpu_bfyx_gemm_like",2],
+        "3831261590121101287": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "12190841837604350271": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4254313567858225805": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "1190134214210434381": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "2894138412746654795": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "11378458002317912396": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "18337160891834020517": ["convolution_gpu_bfyx_os_iyx_osv16",151],
+        "16446533347502650316": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "12584692605608021657": ["fully_connected_gpu_fb_oi_ref",1],
+        "907233163535348999": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11510063368067539341": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3164513064874019611": ["convolution_gpu_bfyx_gemm_like",2],
+        "5298952273692538291": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "8382509515623938786": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "14013561425708390846": ["convolution_gpu_bfyx_gemm_like",2],
+        "7801270668419570665": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "11188849626443657384": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13296566345005640760": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "4165920860392215245": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7905503566052181015": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "15872143905824807656": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "10983344268706058114": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "5553176511624221429": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "16033144151193421543": ["convolution_gpu_bfyx_gemm_like",2],
+        "2571882179292959757": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "13810716860158972470": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "17222005830854879661": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "16264774056719724826": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "1919460437053604108": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "12767115494378788592": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "9861846661532177405": ["convolution_gpu_bfyx_gemm_like",2],
+        "7419990519344756626": ["convolution_gpu_bfyx_os_iyx_osv16",1070],
+        "13660573428614001128": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2305706332728008948": ["convolution_gpu_bfyx_gemm_like",2],
+        "2704063557078535883": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "1104489643524273315": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "6620782733027313312": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "4607013085883384144": ["convolution_gpu_bfyx_gemm_like",2],
+        "2721793280965260548": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "9486447779233331380": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "12096396455109952715": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "15509845164085518352": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6525052296614701517": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "13709111882513486557": ["convolution_gpu_bfyx_os_iyx_osv16",617],
+        "12277470820821378855": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3475757648408068589": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "14599150265057284139": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "7678168522030142454": ["convolution_gpu_bfyx_gemm_like",2],
+        "8799427328659766574": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15384168056682476462": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "1801066876009461857": ["convolution_gpu_bfyx_gemm_like",1],
+        "13787155972060672772": ["convolution_gpu_bfyx_gemm_like",1],
+        "4974435385259831818": ["convolution_gpu_bfyx_gemm_like",2],
+        "16108573960501496757": ["convolution_gpu_bfyx_gemm_like",2],
+        "15184258464890250739": ["convolution_gpu_bfyx_gemm_like",2],
+        "7550660458541314838": ["convolution_gpu_bfyx_gemm_like",2],
+        "11367813096511965002": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "11393439616752806572": ["convolution_gpu_bfyx_gemm_like",2],
+        "3752171257634205726": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "13733327241591630239": ["convolution_gpu_bfyx_os_iyx_osv16",752],
+        "838825600917352376": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9383222411929463824": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "3192518239721798250": ["convolution_gpu_bfyx_gemm_like",2],
+        "12478914547444399288": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12609361477548272638": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10036998353100219512": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "17525531790109748810": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "5334566325056222430": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "17248756229500447131": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13212959214376905822": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "1724898827344855006": ["convolution_gpu_bfyx_gemm_like",1],
+        "10890538764006500546": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12978004383198641522": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "13991205023798493715": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "18166732758694978380": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7727871584058599163": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6171845068913882721": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5331173521406046122": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "14113510820933411052": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "1107027047188366075": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "6897348673467297407": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "15191864907092681849": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6090625728451718945": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16569200335969311660": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "11642941943446484202": ["convolution_gpu_bfyx_os_iyx_osv16",516],
+        "12825029449351875037": ["convolution_gpu_bfyx_gemm_like",1],
+        "12818953631784587919": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "9654726486719966937": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10158890414412187141": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8367989677286805427": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "15953607231296296913": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9255337426504113924": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6762862978340755053": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8374345306483326015": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "10386584706491193379": ["convolution_gpu_bfyx_gemm_like",2],
+        "18067353229273804720": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "3588791913550955553": ["fully_connected_gpu_fb_oi_ref",1],
+        "5047419871737940985": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "6078344073564209080": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1172103288112689821": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "15492793021506324472": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "9604982746455852556": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "801486567558674495": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "15652392678782222737": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5570311824197099845": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3701795558556637835": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8369833730195120673": ["convolution_gpu_bfyx_gemm_like",2],
+        "7103345484511147373": ["convolution_gpu_bfyx_gemm_like",2],
+        "4412343276595791077": ["convolution_gpu_bfyx_gemm_like",2],
+        "1596472719837608525": ["convolution_gpu_bfyx_gemm_like",2],
+        "475043738497218394": ["convolution_gpu_bfyx_os_iyx_osv16",518],
+        "15636407980943172317": ["convolution_gpu_bfyx_gemm_like",2],
+        "2816982827037092536": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "3469963495451100978": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "9386678255270055573": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "172584114180442549": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "3828569468687251275": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1403617451623027879": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "18273537339378756543": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "5513667102916409932": ["convolution_gpu_bfyx_gemm_like",2],
+        "8104309105061227444": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "15112118829970177073": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "8566695253227825439": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "654821507679356726": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "13002723770137829128": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8511244943596227719": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1745930004673880589": ["convolution_gpu_bfyx_gemm_like",1],
+        "3603706453982734995": ["convolution_gpu_bfyx_os_iyx_osv16",551],
+        "12707748441880165396": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3277243911383750280": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "8402692278765063674": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "3835286851569826052": ["convolution_gpu_bfyx_gemm_like",2],
+        "6848989271874647093": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "15952399564161253450": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12063837066704136739": ["convolution_gpu_bfyx_gemm_like",1],
+        "12782191856884962803": ["convolution_gpu_bfyx_gemm_like",2],
+        "1330842758352650583": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "4007319206075386920": ["convolution_gpu_bfyx_gemm_like",2],
+        "1592619919721912789": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "984472462878596435": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "813347941036099284": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "7398196853452900099": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "15091825614924466766": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "2379484884827231127": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "3436433254188539886": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "17997314629342774968": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "7334966010680206302": ["convolution_gpu_bfyx_gemm_like",2],
+        "3524702814173574637": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "340606466693982406": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "544003022213487787": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "11632275875447013409": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6948696390129114563": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "4563529605364580848": ["convolution_gpu_bfyx_os_iyx_osv16",131],
+        "2124776616364429517": ["convolution_gpu_bfyx_gemm_like",1],
+        "2946926779445063554": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "11240189248024145687": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "4494583230309471319": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "12218337369633748663": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "8104609318998060422": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "12707946849050970702": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "16587078304821304948": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "237302155033013557": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "13810995219720233595": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "15299926486228458704": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "10548792624072794724": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "11031625790234068916": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "10879183694331631189": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15675968397825708285": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "3745433390861789238": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6275903692904946376": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "1540041682425757361": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "572265264921910408": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "4867937397499803072": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "276407276027553756": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "3747518910079195578": ["convolution_gpu_bfyx_os_iyx_osv16",103],
+        "15198419554644505600": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17073183514200378702": ["convolution_gpu_bfyx_os_iyx_osv16",667],
+        "8611417708673038653": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "8375778282166369933": ["convolution_gpu_bfyx_gemm_like",2],
+        "6831045740006076251": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "4274801141127703532": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6577754887650563753": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "11775667915453535428": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "8898910394425958745": ["convolution_gpu_bfyx_gemm_like",2],
+        "15781622938833984014": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "8035084960535483680": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "3498490999014554104": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "6065819201836017182": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17870874477143985774": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7873648177300629037": ["convolution_gpu_bfyx_gemm_like",2],
+        "18134140047840716203": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "12046638414686283134": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10008202802779981732": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "954347958041231578": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "5871082277006078841": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "6137405768481559638": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "1411786954276574458": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "9105388853296359769": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",2],
+        "11795686089670429481": ["convolution_gpu_bfyx_gemm_like",2],
+        "9437794960375526230": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "4224423702382859092": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2270733937722366926": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "5646139101524964833": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "3239100076064406977": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3730238135300250205": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5219399418946822456": ["convolution_gpu_bfyx_gemm_like",2],
+        "7227174766917523481": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "17772882818194611202": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "18375125668176498051": ["convolution_gpu_bfyx_gemm_like",2],
+        "16091165907421819456": ["convolution_gpu_bfyx_gemm_like",2],
+        "7726714223809300966": ["convolution_gpu_bfyx_gemm_like",1],
+        "13926730608213207277": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11533151357949131860": ["convolution_gpu_bfyx_gemm_like",2],
+        "14805212478405698245": ["convolution_gpu_bfyx_gemm_like",1],
+        "2738256633362038820": ["convolution_gpu_bfyx_gemm_like",2],
+        "9468314291932574827": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "8324250071425605671": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6579950270997373448": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "4381329435655511217": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "1426606766274640878": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "5953754321266570854": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "14827538610133799379": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4920194716156732643": ["convolution_gpu_bfyx_gemm_like",2],
+        "9740466267717175474": ["convolution_gpu_bfyx_gemm_like",2],
+        "6755802278188792577": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "4417341352109525283": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8442368383427915597": ["convolution_gpu_bfyx_gemm_like",1],
+        "11311859068168414878": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "4437258459981739942": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "3693042354944382600": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12390011660072693092": ["convolution_gpu_bfyx_gemm_like",1],
+        "12112853999307505628": ["convolution_gpu_bfyx_gemm_like",2],
+        "12425310792514818973": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14792528369891965810": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10058165874008941852": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "17331582127656317117": ["convolution_gpu_bfyx_gemm_like",1],
+        "13492216433886201174": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2338535084014610258": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16611452077660879545": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "724953082687879224": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "954796765467489259": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "4396653960950462197": ["convolution_gpu_bfyx_gemm_like",1],
+        "5825664545247017348": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "3087801652564627458": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "4624363818743696582": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "152263592822875549": ["convolution_gpu_bfyx_gemm_like",2],
+        "17015791782274123780": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "16831114690704826637": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "17350963651826443169": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7277156316894715321": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "9714764457768279762": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17559685912375493682": ["convolution_gpu_bfyx_os_iyx_osv16",92],
+        "9083686317073801642": ["convolution_gpu_bfyx_gemm_like",1],
+        "311101627084421734": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "3362190082518348071": ["convolution_gpu_bfyx_gemm_like",2],
+        "15349944413643626251": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10405183426600618231": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "15609627722687211129": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "14962768577232034246": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "11077876432364512822": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "10322586483496198615": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7154364270315480182": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "9947693652506812817": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "13593258537178247801": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "16758962840329202004": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "1077224320045437593": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "9481675228591993785": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "2999633429402781278": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "2184670359551186734": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17798626036576472760": ["convolution_gpu_bfyx_os_iyx_osv16",545],
+        "14705509109623500235": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "11318913630213187720": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "8079376692609682448": ["convolution_gpu_bfyx_gemm_like",0],
+        "4585891362157592384": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "5748047690737461635": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "173772845058977237": ["convolution_gpu_bfyx_os_iyx_osv16",512],
+        "6899658518070473523": ["convolution_gpu_bfyx_gemm_like",2],
+        "9455406830371528486": ["convolution_gpu_bfyx_gemm_like",1],
+        "3027775502561362722": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "3909551222373722085": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "1006828591724642933": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "17281202179589913619": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "12136625628940225638": ["convolution_gpu_bfyx_gemm_like",2],
+        "14253275166085865948": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "9875997976286355123": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "14017025411515888007": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "12022152681602871455": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14077148976508649021": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "13140254055376365092": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17224181038411430675": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "4476218615403440835": ["convolution_gpu_bfyx_gemm_like",2],
+        "11465965972527519631": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8045697952241865861": ["convolution_gpu_bfyx_gemm_like",2],
+        "8109572327736409899": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "10085059621136526248": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12325592439309417414": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "18280672126778847258": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "5406129421969383274": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "5469227748156438008": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "14725765847498813247": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "2014911634432127630": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9835338452418388180": ["convolution_gpu_bfyx_gemm_like",2],
+        "16912035321030511639": ["convolution_gpu_bfyx_gemm_like",1],
+        "5701438170070600512": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "1499841226042523429": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "9823752892549805496": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "7915318733663535312": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "9101571410887509600": ["convolution_gpu_bfyx_gemm_like",0],
+        "3499243120652875549": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8083672466967374860": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14974730512607138726": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "16820082917500285799": ["convolution_gpu_bfyx_gemm_like",2],
+        "9410125656044318792": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "1818433662409886324": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15381833359831622179": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "2827850900421982274": ["convolution_gpu_bfyx_gemm_like",1],
+        "11507538232733291666": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "536646811796032046": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "18167100055915766856": ["convolution_gpu_bfyx_gemm_like",1],
+        "14184440545916228597": ["convolution_gpu_bfyx_gemm_like",2],
+        "9068406831482072377": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16190949264253468961": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "475665035119038846": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "4172485608495372888": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "13696782397412896129": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6056291179600370019": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "14492935486352505845": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4316519748653705692": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "12246408434917478929": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "16453041919970581620": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "14696479950182046016": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "1925626127045202964": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "16614170159588864300": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "7185832253431234935": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14004715832115880216": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "7157531901512507924": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "14681705641267917886": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5192552432194195116": ["convolution_gpu_bfyx_gemm_like",2],
+        "4872433441839808585": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "8006738296385794413": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "6067904130482758510": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13503688893307029975": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "6914775146138105785": ["convolution_gpu_bfyx_gemm_like",2],
+        "12085348936192462321": ["convolution_gpu_bfyx_gemm_like",2],
+        "6942622405269419082": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "2451712485584835395": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "9305957796037500628": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4040607776348275579": ["convolution_gpu_bfyx_gemm_like",2],
+        "7757331094141318304": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "7088331918128954410": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "9377779605078400305": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "4476928353532757380": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1332624116953483870": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "6740545361286720494": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "17306482303091342504": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15490478608105402679": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "9548658329589481069": ["convolution_gpu_bfyx_gemm_like",2],
+        "13459514533473657102": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "5280182001774668876": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1142725391726703078": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7876355212013100281": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "14435120971846098308": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "3563614453014995411": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "13119479079474639169": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "11077503608116183709": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "2363414141971004557": ["convolution_gpu_bfyx_gemm_like",2],
+        "9019451572520595738": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2111049986724040641": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "6610054713068442549": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "13163026305514410688": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "6232363902828992968": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "6419580456182610836": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "7338578624767544128": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "5805383505505929391": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "13491221531603384511": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "12038525298168664305": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1015184966858657992": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11022847760121601465": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3281207855459771997": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7807983899017500046": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4010329161090285019": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "9133263538092913983": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6722358544720547260": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "5553779954745929430": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13869716373706247686": ["convolution_gpu_bfyx_gemm_like",2],
+        "169973842603492802": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14203061085285979556": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5361028467247182860": ["convolution_gpu_bfyx_gemm_like",1],
+        "11630475290242283451": ["convolution_gpu_bfyx_gemm_like",2],
+        "16768470780681544910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "7480968533463196410": ["convolution_gpu_bfyx_gemm_like",2],
+        "13818587810073749596": ["convolution_gpu_bfyx_gemm_like",1],
+        "12700051513124813499": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "18386376129938707290": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "861419637283812778": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "11333068902248367382": ["convolution_gpu_bfyx_gemm_like",2],
+        "13219865669259079983": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "9700592037514669700": ["convolution_gpu_bfyx_gemm_like",2],
+        "10105539975183207700": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "4239415134522959352": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "9553032671453999824": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "12170874893413205000": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "2096779676054335057": ["convolution_gpu_bfyx_gemm_like",2],
+        "8325686349100774855": ["convolution_gpu_bfyx_gemm_like",2],
+        "8413117662038329068": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2904162348196990593": ["convolution_gpu_bfyx_gemm_like",1],
+        "17542176922797334839": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3527012447011885981": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "5230406405159608187": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "8779947213821605681": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "12985650543127289023": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7505966294864890221": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "1213958002895787672": ["convolution_gpu_bfyx_gemm_like",2],
+        "380316849107383484": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8575296926578119953": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "17641033958594901664": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17087740929472936216": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "9105431502075531641": ["convolution_gpu_bfyx_gemm_like",2],
+        "17163158934005653629": ["convolution_gpu_bfyx_os_iyx_osv16",428],
+        "10794662801660960189": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "14579042972443651846": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "13403617010417893318": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "18242682488017822077": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "6149261133858739754": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "4185398348055518182": ["convolution_gpu_bfyx_os_iyx_osv16",420],
+        "13088023076667575514": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "14910223536998380801": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "14277843123789500234": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "1370827524176794227": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "12293705794290797805": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "3034947396960425753": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "11680829908738480957": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "316225690176910392": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "787363431787954804": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "17236135174912837061": ["convolution_gpu_bfyx_gemm_like",2],
+        "6851536988434597530": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6612643056203714506": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "3446991010350155849": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15071888879264671307": ["convolution_gpu_bfyx_os_iyx_osv16",104],
+        "1228256819256996416": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "17118569850095586049": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "16201999154635899927": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "6235096928786525260": ["convolution_gpu_bfyx_os_iyx_osv16",337],
+        "11493371521058673700": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "7179714714302073459": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "3106710091841093202": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "699127221549844251": ["convolution_gpu_bfyx_gemm_like",2],
+        "7998930863626763670": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "11129224786768161139": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "4631772220201098020": ["convolution_gpu_bfyx_gemm_like",2],
+        "7536287105029319189": ["convolution_gpu_bfyx_os_iyx_osv16",1054],
+        "10412748832841674068": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7385295618478993079": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5934841294975212773": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "14815498807515058447": ["convolution_gpu_bfyx_os_iyx_osv16",278],
+        "13773898185415904435": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "16997897512818072938": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "6769243149577568817": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "2995134938466176198": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6214194654733781771": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "13358283026528078900": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "11956435900037329302": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "12201437677145858979": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "16067821671414842756": ["convolution_gpu_bfyx_gemm_like",1],
+        "11191071895289217783": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6542417269641204414": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9226443907548972870": ["convolution_gpu_bfyx_gemm_like",1],
+        "6948606378949354116": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "4652308622880770983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3285688984628545255": ["fully_connected_gpu_fb_io_ref",1],
+        "17396226612787250663": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4695182996147218495": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "17235360775064303316": ["convolution_gpu_bfyx_gemm_like",2],
+        "14906458674793172507": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6402941068107243403": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12166710900466116000": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17358006976602795707": ["convolution_gpu_bfyx_gemm_like",2],
+        "14359530849521980269": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "5680888227752935228": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "13288543822410746011": ["convolution_gpu_bfyx_gemm_like",1],
+        "1603703756241612948": ["convolution_gpu_bfyx_gemm_like",2],
+        "18133334552107213128": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "18084635102736402756": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "2820364088001594654": ["convolution_gpu_bfyx_os_iyx_osv16",573],
+        "14513925709624513868": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "13244693761392741931": ["fully_connected_gpu_fb_oi_ref",0],
+        "916389941321470163": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "12211848608269437730": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2806529556090896246": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "1706927777850488363": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "1898912620350738645": ["convolution_gpu_bfyx_gemm_like",2],
+        "5849577829817109757": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "12811104880512633036": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "10736915975072972467": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15047163348308549816": ["convolution_gpu_bfyx_gemm_like",1],
+        "6673690359191617215": ["fully_connected_gpu_fb_oi_ref",1],
+        "14274685812676150168": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",2],
+        "13019190248083899887": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "123251351612308092": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "9318652504803279936": ["convolution_gpu_bfyx_gemm_like",2],
+        "2345023488044002149": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "9692949270906064580": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6489074577147494118": ["convolution_gpu_bfyx_gemm_like",1],
+        "8271034912009744989": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "6882621854468565774": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "2248754661513284642": ["convolution_gpu_bfyx_gemm_like",2],
+        "6865406633958213363": ["convolution_gpu_bfyx_gemm_like",2],
+        "14600118619533737293": ["fully_connected_gpu_fb_oi_ref",0],
+        "863952266514375915": ["convolution_gpu_bfyx_os_iyx_osv16",517],
+        "13014443130752087867": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "3730207439375250056": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "17113350507039887381": ["convolution_gpu_bfyx_gemm_like",1],
+        "6604223938357238686": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2817383483458239293": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "17692144048680858991": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15178327647765537565": ["convolution_gpu_bfyx_os_iyx_osv16",666],
+        "7544565739420583104": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8529571293598502239": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "3106591708459602370": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "16328232350072955252": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "13583166868754499339": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "14746900092090885770": ["convolution_gpu_bfyx_gemm_like",2],
+        "1200162031019105686": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "4510003738155830628": ["convolution_gpu_bfyx_gemm_like",1],
+        "8057302050645780813": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "9389555743403158574": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6784146431605417954": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "2502125887857336825": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "6114147683777615071": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "14084855778741260863": ["convolution_gpu_bfyx_gemm_like",2],
+        "9883719542550391149": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "6999530153839596796": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "13412296930014397060": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2062195022363480864": ["convolution_gpu_bfyx_gemm_like",1],
+        "10806992251978564302": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9352385417006844121": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "4890932609897686394": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "877436308867220589": ["convolution_gpu_bfyx_gemm_like",2],
+        "9796621763733208035": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "484412270668341493": ["convolution_gpu_bfyx_gemm_like",1],
+        "15662207751131195569": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3336303478756453360": ["convolution_gpu_bfyx_gemm_like",1],
+        "15183511809138557392": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "5733530388090903847": ["convolution_gpu_bfyx_gemm_like",2],
+        "9574931298183748343": ["convolution_gpu_bfyx_gemm_like",2],
+        "10995907213890714701": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8751367574402839332": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "18259787991864449280": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "1801731858063091191": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "6373173636869473046": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6012477132351580695": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "16367495521884864886": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "13095408117538194584": ["convolution_gpu_bfyx_os_iyx_osv16",108],
+        "3020115657931277672": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4941660917457387098": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "2903605246599054308": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "2238901105639912692": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "1671347101986657824": ["convolution_gpu_bfyx_gemm_like",2],
+        "12274268980330855890": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "17079309368548171402": ["convolution_gpu_bfyx_gemm_like",1],
+        "12793908914872030220": ["convolution_gpu_bfyx_gemm_like",2],
+        "4684985181211883028": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "9882204352209412039": ["convolution_gpu_bfyx_gemm_like",1],
+        "14600700464602327710": ["convolution_gpu_bfyx_gemm_like",2],
+        "1682486914760867977": ["convolution_gpu_bfyx_gemm_like",2],
+        "5013936351898884291": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8292979162428130363": ["convolution_gpu_bfyx_gemm_like",2],
+        "2564518461717467683": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "13613948678997524330": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12956726277674279950": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "919788620883613958": ["convolution_gpu_bfyx_os_iyx_osv16",464],
+        "18060514966005474708": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "13044020050176766314": ["convolution_gpu_bfyx_gemm_like",1],
+        "10720782649044333851": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "77073286362822723": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "1966540437574889257": ["convolution_gpu_bfyx_gemm_like",1],
+        "3715177305271762194": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "16076153317792960383": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "12960666483922103702": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2552187713769926425": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "2264520082689779253": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "6220616397859143111": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10857084376518292379": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15487686565734149288": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "6647969101146756031": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12301464827222654105": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "5807196005360653656": ["convolution_gpu_bfyx_gemm_like",2],
+        "8560635685184432720": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "9694891301950867606": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "13345599888287912619": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2511072616914149110": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "15890749658785957481": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "14386256118128644729": ["convolution_gpu_bfyx_gemm_like",2],
+        "7806837641999814363": ["convolution_gpu_bfyx_gemm_like",2],
+        "12962558681443556219": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5164372816534616260": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "3706994659266083979": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "12672995204641007004": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "751912075185318190": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "14895352662503433583": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "3889688816787688160": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "16499919609457089685": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "11825209936640729550": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "4366168099274266975": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "11962541545116807979": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "17939745299931100048": ["convolution_gpu_bfyx_os_iyx_osv16",318],
+        "2543995971214089085": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "11583791752668920812": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "14116682822622440033": ["convolution_gpu_bfyx_gemm_like",1],
+        "15178012823756517910": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "14276876004054588508": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "15470013032930986062": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7627882727285402176": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14912119584313592912": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "1504867045084152953": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8488789346759658706": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2446257282140830646": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "17310332946322628458": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "14905705901815863508": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1553825475921110392": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7335403151694644211": ["convolution_gpu_bfyx_gemm_like",1],
+        "2310159350914289605": ["convolution_gpu_bfyx_gemm_like",2],
+        "3782315919331102574": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14128599551956588603": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "16614678178197571772": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "16805562203348924108": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9354818521586974021": ["convolution_gpu_bfyx_gemm_like",2],
+        "3759057398165607194": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13739257060165119132": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "2423754482456771339": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "13565027847255501776": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16609136488331186895": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16364899406120840449": ["convolution_gpu_bfyx_os_iyx_osv16",398],
+        "17128760774072077101": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9358401110755269308": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8703758535351908295": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10136297272678091418": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13065517911798224579": ["convolution_gpu_bfyx_os_iyx_osv16",377],
+        "7722090560547236852": ["convolution_gpu_bfyx_gemm_like",1],
+        "2370837049876630969": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "9454512817077883797": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "1962479636209947761": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "16392283136103456949": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "4438055737691342460": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2520734476651273971": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8569122574675372789": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4505008254511324231": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "8159489372517869446": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "11599990834682830362": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "17825953644228876369": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8885012252853227025": ["convolution_gpu_bfyx_gemm_like",1],
+        "8484526109354576450": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "17096175733187202673": ["convolution_gpu_bfyx_gemm_like",2],
+        "9596656797750683465": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12883021432082543848": ["convolution_gpu_bfyx_gemm_like",1],
+        "16731107540370927220": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7504074736798125353": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "17184638213817814424": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "18268811652302076976": ["convolution_gpu_bfyx_gemm_like",1],
+        "13681462437496627948": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "11091771531609585709": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2599817012641445801": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "15921072201288695017": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11258182961445417799": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6214312494103149808": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "1673458534805854479": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "10944997349682267106": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "14103112843209793966": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "2887152687927903549": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "2638131332283395057": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13017541921351620667": ["convolution_gpu_bfyx_gemm_like",2],
+        "17626938391567407401": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "517802466588815950": ["convolution_gpu_bfyx_gemm_like",2],
+        "2079476232214121671": ["convolution_gpu_bfyx_gemm_like",1],
+        "2225233951957105071": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "5644068493155655611": ["convolution_gpu_bfyx_gemm_like",2],
+        "5352861363832390974": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "16035563519857925932": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "14767888121198814523": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "8525704362451630717": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "12022980249970038824": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "1478419046264331178": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5656623709782744241": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "17230103497915224469": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "17666004363345457085": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6224167817672480442": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12144421857685107073": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "6581494673640781863": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5461649843950745696": ["convolution_gpu_bfyx_gemm_like",2],
+        "3718980061704064547": ["convolution_gpu_bfyx_gemm_like",2],
+        "712420402191459810": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "2968094709908141988": ["convolution_gpu_bfyx_os_iyx_osv16",3],
+        "11757919563609176713": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "7808544677773370430": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15997231252708686870": ["convolution_gpu_bfyx_gemm_like",2],
+        "12924910330295852704": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "7499082230554771515": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "4702017956226464806": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "10532500300200244159": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "11298638173197050575": ["convolution_gpu_bfyx_os_iyx_osv16",942],
+        "5675497261720118479": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "8787816339967963727": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "13845827017732177448": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14854353557342075292": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "8948718883406304307": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "10647227605517025377": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7510055418609679364": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "2821441037530057414": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "13524128602135083081": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "9707630588260222630": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "9181466280310872332": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "11148502358361704423": ["convolution_gpu_bfyx_gemm_like",1],
+        "7959969582538910953": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "6613282637922219205": ["convolution_gpu_bfyx_gemm_like",2],
+        "17739868787095417856": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "5609922876429907954": ["convolution_gpu_bfyx_gemm_like",2],
+        "12028030221272546172": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2371412124305478965": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12954154886708228545": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "4995510103045767117": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "14707884854112495064": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7323343770209750835": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3292879092145281224": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "15592248516895826924": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9400558994532871122": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "15875968032394961531": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17006133396401462698": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "4073467095502162430": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "16044646335477470657": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14376192291828307385": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6323026044750482867": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "5094419710576598497": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14599780481362761532": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13085261987388297912": ["convolution_gpu_bfyx_gemm_like",1],
+        "7463657272687673896": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "3789890554711038921": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3643250372952944907": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12070592804878487941": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "1208483520611545642": ["convolution_gpu_bfyx_gemm_like",2],
+        "879005904827468163": ["convolution_gpu_bfyx_os_iyx_osv16",763],
+        "3217246278485567748": ["convolution_gpu_bfyx_gemm_like",2],
+        "2652267888871336297": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "5507708258753405429": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "9475812329914836280": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10025893052937028511": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "15221712686851573528": ["convolution_gpu_bfyx_gemm_like",2],
+        "10892456883214928095": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "4338023436590582323": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "3703292222363446463": ["convolution_gpu_bfyx_os_iyx_osv16",762],
+        "9608148784787572220": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "4036143655651874318": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "7371339724529362579": ["convolution_gpu_bfyx_gemm_like",2],
+        "16847817828600381030": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15334769670416409064": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3910549475873353422": ["convolution_gpu_bfyx_os_iyx_osv16",380],
+        "291868903926685441": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13762814538289753428": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "12725647706191463348": ["convolution_gpu_bfyx_gemm_like",2],
+        "16070611944881238498": ["convolution_gpu_bfyx_os_iyx_osv16",884],
+        "9910414853336797922": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15180747404865201068": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "11060822686394981344": ["convolution_gpu_bfyx_gemm_like",1],
+        "18146184020578260553": ["convolution_gpu_bfyx_os_iyx_osv16",302],
+        "5319668297345215520": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "13328449155966085543": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4129586781834275070": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "16783619135298589974": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "9649533822873928984": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9593975471009029134": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10572208209982879914": ["convolution_gpu_bfyx_gemm_like",0],
+        "4999505377862312410": ["fully_connected_gpu_bf_io_gemm",2],
+        "16124702296533772526": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "6341197991729122563": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "14558850297291634005": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "1254745727978231148": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13283018618260255620": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "7720939595094113814": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "16992620579546408448": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "2030309697153345387": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13352151930345854198": ["convolution_gpu_bfyx_os_iyx_osv16",275],
+        "2690771087990667627": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "9257078583742821465": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "1208243889917809864": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "7494124707566708728": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "13564654155363057485": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "15160322051545035612": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8451212914744825089": ["convolution_gpu_bfyx_gemm_like",2],
+        "8929453032482114162": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1036010477232750453": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "7473012539094225392": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "16896863928108200897": ["convolution_gpu_bfyx_gemm_like",2],
+        "8176012042686275874": ["convolution_gpu_bfyx_os_iyx_osv16",863],
+        "11648841195768568983": ["convolution_gpu_bfyx_gemm_like",0],
+        "13831458435772917577": ["convolution_gpu_bfyx_gemm_like",2],
+        "9514210061704584354": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15378025640603637387": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "16852207712205172744": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15132518566122695317": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "1168311873250200110": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3541828356667081528": ["convolution_gpu_bfyx_gemm_like",1],
+        "9524663472084054050": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16403423801823379909": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7431237779891953779": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "3430266954211750407": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "9197931868200777891": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "2344498602308448450": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "9451273689649467046": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "6878922067845522655": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "17242820574559628535": ["convolution_gpu_bfyx_gemm_like",1],
+        "15452996816194024433": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "70244312667395170": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11919129623429545762": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "17795358440179122086": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "8263822658108674162": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12860222041026638681": ["convolution_gpu_bfyx_gemm_like",2],
+        "2152903140704848574": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "6735135795253013220": ["convolution_gpu_bfyx_gemm_like",2],
+        "18092842590142527927": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "5215755301612973095": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "4122312805832663323": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11619548409913646265": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "2108296560864415762": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "13026555349791486777": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "912423125050985716": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "17281198415161259885": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2110090486638190463": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "3240428557350945267": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13491655481292956895": ["convolution_gpu_bfyx_gemm_like",1],
+        "2343921093633784755": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "3148053731303748054": ["convolution_gpu_bfyx_gemm_like",2],
+        "16404059675217592817": ["fully_connected_gpu_fb_oi_ref",1],
+        "12160764253455777655": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "9034951536385533818": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",1],
+        "12756432707088842236": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "523055954326631884": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17850932752450917677": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "14973411884734235059": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "16229324496308453344": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "10736892779278378335": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "11261619081095309088": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "13368477378531148593": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "5401523175111660554": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9802832901508552733": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11361013180071053597": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "269334626439013799": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17970855913877771858": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "18332090297993015499": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "3665837617379468265": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8316848551837633169": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "17807033661138518449": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6571473790090353005": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "499739705596245675": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "9761573038170759563": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "11830297960718214360": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "54975980454651672": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5603409300903611279": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14332388011233886083": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6673753637296082820": ["convolution_gpu_bfyx_gemm_like",2],
+        "8528886126454874796": ["convolution_gpu_bfyx_gemm_like",1],
+        "10946069941293798874": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "10054253863699485503": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "10690972785852373520": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "9416285845239621878": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "9042812985530274425": ["convolution_gpu_bfyx_gemm_like",2],
+        "12671153706040443724": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "12705054744767500423": ["fully_connected_gpu_fb_io_ref",1],
+        "8503207028307570404": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "5049534591553232781": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6456426339461437148": ["convolution_gpu_bfyx_gemm_like",1],
+        "1289009275012699560": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "3965871278597751318": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13744951984978188201": ["fully_connected_gpu_fb_io_ref",1],
+        "13728180355108851541": ["convolution_gpu_bfyx_gemm_like",2],
+        "4524347845016978037": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4871907623235871050": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "13059207969254830451": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "5011273172385428756": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "12283317230112506089": ["convolution_gpu_bfyx_gemm_like",2],
+        "4481903208484313806": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "10175721494218314250": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10432687907685994204": ["convolution_gpu_bfyx_gemm_like",1],
+        "13614921331048223116": ["convolution_gpu_bfyx_gemm_like",2],
+        "15447513376965243034": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "3041612155708729812": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "9765339420071627045": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "10660230104888153758": ["convolution_gpu_bfyx_gemm_like",2],
+        "12386930130408773521": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6706491729783125139": ["convolution_gpu_bfyx_gemm_like",1],
+        "12675858428585873471": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6293403765897901528": ["convolution_gpu_bfyx_gemm_like",2],
+        "9888097487468905169": ["convolution_gpu_bfyx_gemm_like",2],
+        "9416186718345824095": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "4149728557142033774": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "466868648178437688": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13464226348405628455": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "7282751412088726760": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "14270450799210365812": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7518734167761579102": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9854440591497995284": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13405310261845268772": ["convolution_gpu_bfyx_gemm_like",2],
+        "7715520469947900684": ["convolution_gpu_bfyx_os_iyx_osv16",571],
+        "16408015571155576773": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4783126652984096700": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "13388004363210658650": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5170245731599664670": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "14256842018830898376": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "16114623916610925741": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "10397253349562394184": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8007667797556094444": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "7368916076070115064": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "15129834325410878425": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "18417880214901227799": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4722824701199486161": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1033385936344875354": ["convolution_gpu_bfyx_gemm_like",2],
+        "17011927973643184196": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15212317205888563836": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13802834658447955377": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "6527268791835193134": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "10918743320372308981": ["convolution_gpu_bfyx_gemm_like",2],
+        "2737840613867456953": ["convolution_gpu_bfyx_gemm_like",2],
+        "269829518575229806": ["convolution_gpu_bfyx_gemm_like",2],
+        "2944333966072327932": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "10670103699537731664": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "12744887771237881196": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "1242366856673194709": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "17753585752923130911": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "7282595712912388754": ["convolution_gpu_bfyx_os_iyx_osv16",189],
+        "6985970932645412773": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13192808619929896995": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "7930154826818165796": ["convolution_gpu_bfyx_gemm_like",2],
+        "17994361454416813294": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2150326211917340956": ["convolution_gpu_bfyx_gemm_like",2],
+        "6953478877896677022": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "7134654288295280046": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "10607904718265020949": ["convolution_gpu_bfyx_gemm_like",2],
+        "14719421757340260468": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8008513163448840421": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13221156296791499146": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "15391215077224693736": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "704262295684441748": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "11455732989503244360": ["convolution_gpu_bfyx_os_iyx_osv16",216],
+        "18424400171776141118": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "4286652913945761799": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "5379608399492828685": ["convolution_gpu_bfyx_gemm_like",1],
+        "4614700272179482173": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "15352245788978088971": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7441139786825555264": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "397770940444464146": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "202304354656398848": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "11962382064404466630": ["convolution_gpu_bfyx_gemm_like",1],
+        "5301440603380967612": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "12018398218876712811": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "10898684230183205955": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "2752322006160986801": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15660316437768312006": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "15668791697154389130": ["convolution_gpu_bfyx_gemm_like",1],
+        "1139581213977408268": ["fully_connected_gpu_fb_io_ref",2],
+        "6649759230117795192": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "5244441996055494170": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "9263063714383940562": ["convolution_gpu_bfyx_os_iyx_osv16",62],
+        "11070696274716018686": ["convolution_gpu_bfyx_os_iyx_osv16",570],
+        "18068050257421269408": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "11207257238719531888": ["convolution_gpu_bfyx_gemm_like",2],
+        "8146906136296114696": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "435261825003875448": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "8922463054055280800": ["convolution_gpu_bfyx_gemm_like",1],
+        "13674246753382740056": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14189775376370027482": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8254412626112343365": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13596494923128445274": ["convolution_gpu_bfyx_gemm_like",2],
+        "7085416207166146240": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "10320711719466983961": ["convolution_gpu_bfyx_gemm_like",2],
+        "18310667924071639899": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "13624106485902414324": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "18377298651236993830": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "15566108481408840783": ["convolution_gpu_bfyx_gemm_like",2],
+        "15225331270926229394": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "13659291428095454839": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16932090423428476170": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "1882912836250239503": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "10414903047695486119": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "3442073007560756473": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "3609233164979051271": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13108356579957761944": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14823616678465136590": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16027853591907232537": ["convolution_gpu_bfyx_gemm_like",1],
+        "14446344744130895614": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "17924819398394001587": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "1608378717397996752": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "393884269158067083": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "14903430454784452446": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "9311802150474489673": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "15293727142789007900": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "11079710960007068860": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "11815825155082424936": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "2367791050032803116": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "11868789283464117390": ["convolution_gpu_bfyx_gemm_like",2],
+        "11207578758583923357": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "17368161816774674256": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "2490155559809645659": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "4551182180668229945": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "9001645663675631429": ["fully_connected_gpu_yxfb_ref",2],
+        "18191573176587760698": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "6027350558532160900": ["convolution_gpu_bfyx_gemm_like",2],
+        "11229587372764249222": ["convolution_gpu_bfyx_gemm_like",2],
+        "15838058479520696173": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "7318929661124340248": ["convolution_gpu_bfyx_gemm_like",0],
+        "3177915003579216846": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "7052552351421332490": ["convolution_gpu_bfyx_gemm_like",2],
+        "13176385389367548697": ["convolution_gpu_bfyx_gemm_like",1],
+        "5589785455223385189": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5163965164859517893": ["convolution_gpu_bfyx_gemm_like",2],
+        "2268291720177538378": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13205973783895006074": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "12892693137085610062": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "11553355518677163509": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "14108113294744119367": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "4161141078006269526": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12929981792125924963": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4673618329986777239": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14287890401250603057": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "3448477246688526708": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "18172711677056449158": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "18202222342562516071": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "7678457226823073886": ["convolution_gpu_bfyx_os_iyx_osv16",91],
+        "2532962442388536022": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "14433662482531248989": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2335428826699999827": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "82249723699159955": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "16547425454653232058": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "15851356529373376076": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "16613907066461513431": ["convolution_gpu_bfyx_gemm_like",0],
+        "11725629762660987217": ["convolution_gpu_bfyx_gemm_like",1],
+        "706370730287471796": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8171897258557801015": ["convolution_gpu_bfyx_gemm_like",1],
+        "15959241441689395955": ["convolution_gpu_bfyx_os_iyx_osv16",680],
+        "14585370009659482450": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "2261453441277654139": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "15838114628203742383": ["convolution_gpu_bfyx_gemm_like",2],
+        "2399812257701033542": ["convolution_gpu_bfyx_gemm_like",2],
+        "7962383460496540840": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8127570953237266335": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "3828988304073539836": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11307531462784240962": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "14838067105091112485": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "8207349115037232863": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4790599496008369129": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "10358359789382196576": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "16073578125651112218": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17405865057155583042": ["convolution_gpu_bfyx_gemm_like",1],
+        "8312903198090907576": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "10173382130572498594": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "331390460560782085": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12353956380178079089": ["convolution_gpu_bfyx_gemm_like",2],
+        "14907097142953816744": ["convolution_gpu_bfyx_gemm_like",1],
+        "1811357700607919311": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "11986642867827682648": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "14686278683380845546": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17006095064160484022": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "11114015660322254541": ["convolution_gpu_bfyx_gemm_like",1],
+        "6420851258772300332": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "14793709237400480942": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "15823825508128158158": ["convolution_gpu_bfyx_gemm_like",2],
+        "12569856169024791306": ["convolution_gpu_bfyx_gemm_like",2],
+        "2001464747481073870": ["convolution_gpu_bfyx_gemm_like",1],
+        "8863398172720091880": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "15148625184033310404": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10624246057883518638": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "6730474465453860479": ["convolution_gpu_bfyx_os_iyx_osv16",1039],
+        "10073439287681954518": ["convolution_gpu_bfyx_gemm_like",2],
+        "16461809076899645037": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "15947699374684516369": ["convolution_gpu_bfyx_gemm_like",2],
+        "5448537627319798272": ["convolution_gpu_bfyx_os_iyx_osv16",523],
+        "15465799788109255561": ["convolution_gpu_bfyx_gemm_like",2],
+        "11757953304204716753": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "1306339989221885682": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "3198726093355425150": ["convolution_gpu_bfyx_gemm_like",2],
+        "962311766200741205": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16728826595086368897": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "1147744092130296563": ["convolution_gpu_bfyx_gemm_like",1],
+        "7146559117784312265": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "388828310152538138": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "13073788277284969422": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "2305461098719675735": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "17521647426452186921": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "5433618404351968121": ["convolution_gpu_bfyx_gemm_like",2],
+        "17794162443307839614": ["convolution_gpu_bfyx_gemm_like",1],
+        "16440598510199834213": ["convolution_gpu_bfyx_os_iyx_osv16",121],
+        "18009765676050504407": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3509811595028801757": ["convolution_gpu_bfyx_os_iyx_osv16",131],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "1109243878358317937": ["convolution_gpu_bfyx_os_iyx_osv16",1062],
+        "7254869458810021127": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12615462894236933223": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11926378988530133568": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "2930545263523345204": ["convolution_gpu_bfyx_os_iyx_osv16",542],
+        "7630776235327261710": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "7824524940405130010": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13787118639037730152": ["convolution_gpu_bfyx_os_iyx_osv16",298],
+        "404419072921281472": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "4135003545872878882": ["convolution_gpu_bfyx_os_iyx_osv16",197],
+        "11723735945517472199": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "17749857812061795980": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "2287356884312581209": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4101449235783342476": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "14385181780082014495": ["convolution_gpu_bfyx_gemm_like",2],
+        "6013434489252641471": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8175595372513695437": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "15092483859565823523": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3503236715353689942": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "10831460252334010668": ["convolution_gpu_bfyx_gemm_like",2],
+        "14681717813022425567": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6157727013102138824": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "9823997593704517392": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "3223726179820717808": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "10033076377998157101": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "2571778193407799664": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12668149981216388765": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "13769943652297353544": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "16031140952379208074": ["convolution_gpu_bfyx_gemm_like",2],
+        "6128534975733321186": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10273183900108661041": ["convolution_gpu_bfyx_gemm_like",2],
+        "8316011587868622301": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "905780459938651623": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "13793441296561946357": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "3218248162832023196": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "11907507085694711513": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "2004120786408087671": ["convolution_gpu_bfyx_gemm_like",2],
+        "17515573322312447679": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "5162737590442940024": ["convolution_gpu_bfyx_gemm_like",1],
+        "10906417366145323499": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11992158790035075804": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7606097739225472283": ["convolution_gpu_bfyx_gemm_like",2],
+        "4553409514380460123": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7753336153932360422": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6549150139619174585": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "11327097771110264965": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "505102470055903237": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18233660940545931789": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "157852787707383962": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "8909239203149651260": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14537109978413728476": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "16290626406346691996": ["convolution_gpu_bfyx_os_iyx_osv16",767],
+        "17420288204511371476": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12570087709404311189": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6210483922262161762": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "994182747184593564": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11883941040326858829": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "2324120381399737261": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "3199841714087553410": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "8132521728369930959": ["convolution_gpu_bfyx_gemm_like",2],
+        "17303981366934280174": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "2597523728660247862": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "4127717437639868970": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "17981604038340576961": ["convolution_gpu_bfyx_gemm_like",1],
+        "4301372734564127254": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "2086001721804797157": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "12179581684777023804": ["convolution_gpu_bfyx_gemm_like",2],
+        "16184979150665364486": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "1934379409955686502": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "11655994466278963438": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "1945630503883822822": ["convolution_gpu_bfyx_gemm_like",1],
+        "15232673324549539143": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "12952980509662451384": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "6860503758000008398": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "6661117204204077150": ["convolution_gpu_bfyx_gemm_like",2],
+        "10384416235770656262": ["convolution_gpu_bfyx_gemm_like",1],
+        "13716836930727272782": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3819763245853861272": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3007637520820789085": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "7345632855842905966": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2571186327837339204": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "9996196793804333253": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "11246470701714560770": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "8212533074856783509": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "33889407315234685": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17242442529374722270": ["fully_connected_gpu_fb_oi_ref",1],
+        "7496699438957793920": ["convolution_gpu_bfyx_gemm_like",2],
+        "8375465895534833097": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "6476949395889340429": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "18187262802267413585": ["fully_connected_gpu_fb_io_ref",1],
+        "9454146598828084176": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4241640917176830862": ["convolution_gpu_bfyx_gemm_like",2],
+        "10446500827044060319": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "7908036427091174081": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "12813978452097969536": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "2012181953284568566": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "6948147789605707774": ["fully_connected_gpu_fb_io_ref",2],
+        "18159049252673770569": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "10904228118889057467": ["convolution_gpu_bfyx_gemm_like",2],
+        "14266210014132784194": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "5587539329568150667": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "10098661517988566506": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "5519244962044894877": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "14217181622713951411": ["convolution_gpu_bfyx_gemm_like",2],
+        "11777373751892075391": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "17575293085957492821": ["convolution_gpu_bfyx_gemm_like",2],
+        "7145194061073256844": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7243161613448507792": ["convolution_gpu_bfyx_gemm_like",1],
+        "11559360678008060513": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "2056597791109604534": ["convolution_gpu_bfyx_gemm_like",2],
+        "2873387231297790075": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "4243114942173293897": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "18232408112396439386": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14335423820860953927": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "10947686124973711385": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "1187224156936080964": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "5759260743809103651": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "14759179293743468995": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "16622402936526588344": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "16541722316343690197": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "9061025737181218101": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "17104611871050967957": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "670951751279091662": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "13133323947490009546": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10424278617647597641": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "6551173574001309451": ["convolution_gpu_bfyx_gemm_like",1],
+        "397445657349822499": ["convolution_gpu_bfyx_gemm_like",2],
+        "18251360413872841969": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "17016846635668370921": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "10898210758890334465": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "11684927349056930189": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "332090597573908506": ["convolution_gpu_bfyx_gemm_like",1],
+        "4682428771166816734": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "18006581941186887676": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "5245308722062496788": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "15661322183507404821": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12773693193167844110": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "14634044133573461949": ["convolution_gpu_bfyx_gemm_like",2],
+        "7714783879762659458": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "9806689250758752070": ["convolution_gpu_bfyx_gemm_like",0],
+        "18005721959893562716": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "3166885953206195915": ["convolution_gpu_bfyx_gemm_like",2],
+        "4574242607119408140": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3930314908786112883": ["convolution_gpu_bfyx_gemm_like",2],
+        "531020979837645217": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "11868551452004726281": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9666426531743983113": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "12557015880639217508": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "262113403359175565": ["convolution_gpu_bfyx_os_iyx_osv16",419],
+        "4634475069086874260": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "9397711809671506538": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12008952324872799824": ["convolution_gpu_bfyx_gemm_like",2],
+        "1907439276166837309": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "13314092088416047551": ["fully_connected_gpu_yxfb_ref",1],
+        "12081835728078383819": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "13071064509662090710": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "15928746165235747659": ["convolution_gpu_bfyx_gemm_like",2],
+        "14546281065004619074": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8725673763972618034": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7819934200255007163": ["fully_connected_gpu_fb_oi_ref",2],
+        "13051342120933385671": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "6181308879301978465": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17664704673433112966": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "5353170440534073482": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "12214162812589030126": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "6093575518270471235": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "8240616667079698459": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4600261954762222519": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "7070374681687005676": ["convolution_gpu_bfyx_gemm_like",1],
+        "16968664807495872526": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "18404344881797725263": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "5267143428977695208": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "10700011669103135203": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "7811861756798601201": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "428659495445490820": ["convolution_gpu_bfyx_os_iyx_osv16",925],
+        "1056494963618130644": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "962676948282027870": ["fully_connected_gpu_fb_io_ref",2],
+        "5597908143491399643": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "2788116002380533417": ["convolution_gpu_bfyx_gemm_like",2],
+        "10378966564497668941": ["convolution_gpu_bfyx_os_iyx_osv16",283],
+        "7086574330273897976": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "2732519635571994212": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "16244270858428653037": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "11970466555294072275": ["convolution_gpu_bfyx_gemm_like",2],
+        "4586633477264151844": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "14733510474010040334": ["convolution_gpu_bfyx_gemm_like",2],
+        "2659712601063515059": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "10293186062391000719": ["convolution_gpu_bfyx_os_iyx_osv16",755],
+        "6547565989244888354": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "218477594596081189": ["convolution_gpu_bfyx_os_iyx_osv16",969],
+        "5834006438103071406": ["convolution_gpu_bfyx_gemm_like",2],
+        "15649927926091502215": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "12461575861709234385": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "1592994755823247500": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "2526832080529662683": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "1922168904767469999": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "2809950092498355574": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "15718011075217705480": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13485140643204970345": ["convolution_gpu_bfyx_gemm_like",1],
+        "2664944425727769475": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "580936360000782237": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "12314918602191412697": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "14990645740260870030": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "11341771589317480665": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "6133854782246597175": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "7394848434332739139": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "9937387440035377216": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "11804035561861841621": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "18245935804520236353": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9352866803638271156": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "13324157125165576832": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "1894591633696862066": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "17580933462801685507": ["convolution_gpu_bfyx_gemm_like",1],
+        "5408469943982199754": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11913020016435860608": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15260448822338206631": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "12492763342322011136": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "16758697697363920520": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "14975859027256879948": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "615833743936753727": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5419775002149092646": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5982637097503543357": ["convolution_gpu_bfyx_gemm_like",2],
+        "9025790715924779508": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "17078700948595127028": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5801429077171542466": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "10662239532841666965": ["convolution_gpu_bfyx_gemm_like",2],
+        "11049130623091275457": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "7921388663815287395": ["convolution_gpu_bfyx_gemm_like",2],
+        "3811462129131022619": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3555204322491340337": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "509781001842353609": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "13047793996728441528": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "4047806462440750215": ["convolution_gpu_bfyx_gemm_like",2],
+        "7524311370696987092": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "12518571127411736885": ["convolution_gpu_bfyx_gemm_like",2],
+        "17050675313067213312": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "13200151444914751729": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "2802810524370514276": ["convolution_gpu_bfyx_gemm_like",1],
+        "12248119734016401633": ["fully_connected_gpu_fb_io_ref",1],
+        "7671016314869993705": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "8054562515577756499": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "10732225577823701543": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "2836903620603494117": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "1650080413259413393": ["convolution_gpu_bfyx_gemm_like",2],
+        "7864880361674128748": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "15123868617509445149": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2903075619523363020": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "14211549589070739656": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8749468546606972791": ["convolution_gpu_bfyx_gemm_like",2],
+        "4840004190985490064": ["convolution_gpu_bfyx_gemm_like",2],
+        "148355059345569721": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "4304943753428518690": ["convolution_gpu_bfyx_gemm_like",1],
+        "17318287523550546026": ["convolution_gpu_bfyx_gemm_like",2],
+        "15364374265752682266": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "5136111979773513341": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "15667487381692577290": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "482564204402769504": ["convolution_gpu_bfyx_gemm_like",1],
+        "5983808817108775912": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "14849708746319190277": ["convolution_gpu_bfyx_gemm_like",2],
+        "4646795194660982475": ["convolution_gpu_bfyx_gemm_like",2],
+        "94012300876418257": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "15786313441300512560": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1895945774251432343": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6512006285490280576": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14026570177552137240": ["convolution_gpu_bfyx_gemm_like",2],
+        "15890473622821659630": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3565702695809105495": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "12610854610554906160": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "3895088069642140043": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2100387626452428743": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "1362540464632328798": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13012283016751495099": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "436514945529747349": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "4191326605459754690": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6719956770229212208": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "12692563384795319282": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "5933743119393822386": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "12523676912856063091": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "14744368497944610864": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "868177350337221377": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "832976844701988460": ["convolution_gpu_bfyx_gemm_like",1],
+        "14034487492239603874": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12669547093826826335": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "5947492124433175601": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "13276867073526485069": ["convolution_gpu_bfyx_gemm_like",2],
+        "528618206870447012": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "488298169768725160": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "7426788519998680898": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8961138963663532667": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "9803306661531470015": ["fully_connected_gpu_fb_io_ref",2],
+        "6476480727582657308": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "16774728502960825097": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "17419874083634480896": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "6517802281521111563": ["convolution_gpu_bfyx_gemm_like",1],
+        "10652512666086843369": ["convolution_gpu_bfyx_gemm_like",2],
+        "1452841775482537260": ["convolution_gpu_bfyx_gemm_like",2],
+        "6204725118764552662": ["convolution_gpu_bfyx_gemm_like",1],
+        "16285256723517297210": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14852990574796128305": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "8550783999616052522": ["convolution_gpu_bfyx_gemm_like",2],
+        "5420766967862917815": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5733701901687257088": ["convolution_gpu_bfyx_gemm_like",2],
+        "15860915170591763391": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "6089202061701179659": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "16443833779968719790": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "12812685418923919055": ["convolution_gpu_bfyx_os_iyx_osv16",764],
+        "13224814158106791463": ["convolution_gpu_bfyx_gemm_like",2],
+        "15888454525088587794": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "8116504545035982006": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "5275016494706355806": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "18203935818408469865": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "10408322429232132983": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "15585700465988560560": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "9127066823698894015": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5961488595080209440": ["convolution_gpu_bfyx_gemm_like",2],
+        "4665029580355133140": ["convolution_gpu_bfyx_gemm_like",2],
+        "1939140810847988694": ["convolution_gpu_bfyx_gemm_like",1],
+        "5845969526791988973": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6635217802203685464": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12307446289692143781": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "5251771557248725731": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "13758938418512211194": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12700008320838073774": ["convolution_gpu_bfyx_gemm_like",2],
+        "14164778301660100413": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "12711558966638028352": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "69439315851965666": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12522364636280164681": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "18369668865072009928": ["convolution_gpu_bfyx_gemm_like",2],
+        "727203296169504486": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "4703107905652287491": ["convolution_gpu_bfyx_gemm_like",2],
+        "12129572274423886770": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "5214678408335388758": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17835134875461003221": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8465142022921853516": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15192024816519005250": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "11599932445375240727": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "4601800315090684242": ["convolution_gpu_bfyx_gemm_like",2],
+        "18382226420077875582": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14459249705747952583": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12411228585189337571": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "8124736388338424498": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "8995892222116060827": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "598390166442977699": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15320845027635796583": ["convolution_gpu_bfyx_gemm_like",2],
+        "12310462218432530363": ["convolution_gpu_bfyx_gemm_like",0],
+        "9776332064497085361": ["convolution_gpu_bfyx_gemm_like",2],
+        "9993925424761661218": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "17824431042110985323": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "17001492460236540325": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9454457647272059910": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "4578587579993676820": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "17599383258252980421": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "16113302464937833403": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "17825280904760131680": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "1999892441424036372": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "13074593348097634731": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17392732266843821039": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "2966185891283165994": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "14566257978356851712": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15783329079045263237": ["convolution_gpu_bfyx_gemm_like",1],
+        "9547451431091729288": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "15149336254307320187": ["convolution_gpu_bfyx_gemm_like",2],
+        "10971070835319242371": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "3961000444895975975": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9513545197321447870": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "6980201892073961793": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "13031027103925431505": ["convolution_gpu_bfyx_gemm_like",2],
+        "16583563382485459718": ["convolution_gpu_bfyx_gemm_like",1],
+        "4858337483345561292": ["convolution_gpu_bfyx_gemm_like",2],
+        "6536333665377249409": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8374409021681741916": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "2307629242354292362": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7670176887560273910": ["convolution_gpu_bfyx_1x1",2],
+        "1847170421455825520": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "17407904982433770732": ["convolution_gpu_bfyx_gemm_like",1],
+        "2460415719642436412": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "7877332346656934022": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "11437885274663749440": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "5032195346490064156": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "18077281411861416889": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "7527175223662342321": ["convolution_gpu_bfyx_gemm_like",1],
+        "68637843533109734": ["convolution_gpu_bfyx_gemm_like",1],
+        "8501760360687221821": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "8906588133431586825": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "5890599002797783437": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "12232696287029987946": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "16981010901052181199": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "3499109651698979012": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "13636129806349817264": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "14900099988131599740": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "17867620992288101450": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "621272125402238670": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "12408889192918919210": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "13497225521878034159": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13155901262605819372": ["convolution_gpu_bfyx_os_iyx_osv16",292],
+        "5040944983588288886": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "10897622326486559468": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9065894438656900887": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "15356995665520295246": ["convolution_gpu_bfyx_gemm_like",0],
+        "17907732260451873185": ["convolution_gpu_bfyx_gemm_like",2],
+        "13762042713029963144": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "15365628642332393565": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15777551868644801538": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "4304041922043496030": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "385046297070779752": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "17680403286850504499": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4833761011498696645": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "17601171646153308079": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "8204962103567653154": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "13974740392602492680": ["convolution_gpu_bfyx_gemm_like",2],
+        "2712946943923358377": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "5367634698951188749": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "15361186788588226064": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "95993272253183796": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2173649669339714890": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "14355612297330229277": ["convolution_gpu_bfyx_gemm_like",2],
+        "10888435127006141874": ["convolution_gpu_bfyx_os_iyx_osv16",645],
+        "17754836801944078461": ["convolution_gpu_bfyx_gemm_like",2],
+        "5608447459568229694": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "659846949368492111": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "2850118175701764737": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "17093159649157277089": ["convolution_gpu_bfyx_gemm_like",2],
+        "277410555520090949": ["convolution_gpu_bfyx_gemm_like",0],
+        "7975810844103449438": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "18218631037214746168": ["convolution_gpu_bfyx_gemm_like",2],
+        "10612049417873776481": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "16884396694505987920": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "13191096881934434519": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9987939079053625302": ["convolution_gpu_bfyx_gemm_like",2],
+        "18341524156838963264": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17784882947271841103": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "9751582946441607796": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "7941729567451949422": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "5994204139128667921": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13367043015761260275": ["convolution_gpu_bfyx_gemm_like",0],
+        "16037141448095945650": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "1545105800386716684": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "12983461576274227638": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "9747165558500755104": ["convolution_gpu_bfyx_gemm_like",0],
+        "12793814016409887162": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "15653223776766070604": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9194441947620820715": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "12421707187947291166": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "15471470494305051299": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "10702234389482091891": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "15329084374930297871": ["convolution_gpu_bfyx_gemm_like",2],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "7509199936979430017": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4553508439536472227": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "6638696743420807294": ["convolution_gpu_bfyx_gemm_like",2],
+        "2702144517025248597": ["convolution_gpu_bfyx_gemm_like",2],
+        "1720057192283799086": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "4542143431130171516": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "6205240287062600210": ["convolution_gpu_bfyx_gemm_like",2],
+        "13809218391763818477": ["convolution_gpu_bfyx_gemm_like",2],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "9261867808456596636": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "16568662638983972991": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "10323345824599612614": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12024318713420323349": ["convolution_gpu_bfyx_gemm_like",2],
+        "7831542641855749925": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13356152596085257346": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10109431802089940590": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "10398572248321217585": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12584870629297848143": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2198100074518629980": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1552088062654417187": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "5485749317130402302": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "4407683781177409314": ["convolution_gpu_bfyx_gemm_like",2],
+        "16747069131271457481": ["convolution_gpu_bfyx_os_iyx_osv16",854],
+        "534032316469702287": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "2213068950786625268": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "17400844732252600825": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "7400370437512056636": ["convolution_gpu_bfyx_gemm_like",2],
+        "1436830013293669148": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8243230863677884952": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4750897775273897282": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14639233649574991406": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "13940433448128376511": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10127598593949337541": ["convolution_gpu_bfyx_os_iyx_osv16",1056],
+        "9660812093766156608": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "4795705973706796563": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "13764532551476584909": ["convolution_gpu_bfyx_gemm_like",2],
+        "14908665013877276517": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "10795104632256101599": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "941829593638869991": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "6555440973226014216": ["convolution_gpu_bfyx_gemm_like",2],
+        "8616686489737649890": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "10377729875228238588": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",2],
+        "15595549493819416194": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "2032438743863827309": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "17303584953298149285": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8036592210244553232": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "15550722997950669458": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8007491455800395118": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "7354234812009979811": ["convolution_gpu_bfyx_os_iyx_osv16",90],
+        "16789135236017252073": ["convolution_gpu_bfyx_gemm_like",2],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",2],
+        "14384062335728088286": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "16202841384048331166": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "9427999492792081454": ["convolution_gpu_bfyx_os_iyx_osv16",128],
+        "8469338060514215816": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "17832542092610191859": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "13291816522762326802": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8104522072297740079": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10127626701775288565": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like",2],
+        "13071545223094862275": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "14799589725341253463": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "17977676737774695825": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "12906669887096343446": ["convolution_gpu_bfyx_gemm_like",2],
+        "17966517080605659454": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11086699387784339943": ["convolution_gpu_bfyx_os_iyx_osv16",495],
+        "2618108630886857741": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "17796867588410764794": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "18395970344992997862": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9367157746678824712": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "7575634241190730697": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11446181888102710561": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "6085098225080533278": ["convolution_gpu_bfyx_gemm_like",2],
+        "4200340674281276565": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "8335501317577461610": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "3991584206721185508": ["fully_connected_gpu_yxfb_ref",2],
+        "11292995457386147494": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "4131527916449986086": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "7505608160068471520": ["fully_connected_gpu_fb_io_ref",2],
+        "6148794431848761670": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "11571049833132558023": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "52089503050497755": ["convolution_gpu_bfyx_os_iyx_osv16",899],
+        "12617625046664709483": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "17130630712943165823": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "3653156933813711765": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "11455518069358829249": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15240660399630429406": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "15531908897773912572": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10771178773821148370": ["convolution_gpu_bfyx_gemm_like",2],
+        "12279591818557049086": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "5290935680520661218": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "16691293834516280510": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "18157442326218165947": ["convolution_gpu_bfyx_gemm_like",2],
+        "15379873910046172004": ["convolution_gpu_bfyx_gemm_like",1],
+        "11345101652477732928": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "16431165572426232677": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "5595802790436774398": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "17267132595546153629": ["convolution_gpu_bfyx_gemm_like",2],
+        "15887484617041779814": ["convolution_gpu_bfyx_gemm_like",2],
+        "12052225815821079044": ["fully_connected_gpu_fb_io_ref",1],
+        "14112695611389738149": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "913496537924971856": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "12831670701606794888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17778706153204631930": ["convolution_gpu_bfyx_gemm_like",1],
+        "116291934148608396": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "9447458159095730492": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "1334121138243951086": ["convolution_gpu_bfyx_gemm_like",1],
+        "13939763360217628282": ["convolution_gpu_bfyx_gemm_like",2],
+        "16303870101043861053": ["convolution_gpu_bfyx_gemm_like",2],
+        "16237775310369180101": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11421235118459218209": ["convolution_gpu_bfyx_gemm_like",1],
+        "5033753554611312392": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "11269720109905550213": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "7777333052643961206": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "517601465150912854": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "5233164031954315264": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "7303492518741737111": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "4134729533276761488": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "5397783260083330774": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "5222741986856655072": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4186140878816408491": ["convolution_gpu_bfyx_os_iyx_osv16",125],
+        "9573589861499897842": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "10987953316324712538": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "5766507688771440170": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "1594612401422787491": ["convolution_gpu_bfyx_gemm_like",2],
+        "8623022306922454565": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3237680963342495368": ["convolution_gpu_bfyx_gemm_like",1],
+        "2446435710311724460": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6670327979947471550": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "15561518067918160695": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "14711697456265712456": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "1852269248476496933": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16001665772103476029": ["convolution_gpu_bfyx_gemm_like",0],
+        "8757900457181374694": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "6902644989079870993": ["convolution_gpu_bfyx_gemm_like",1],
+        "17758354062670710364": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "17464785726466943638": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "10754321688472707825": ["convolution_gpu_bfyx_gemm_like",2],
+        "13993045680928507594": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12415368596357091523": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "14749947225382670869": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "5335250793358473555": ["convolution_gpu_bfyx_gemm_like",1],
+        "3037042229494600258": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "16021335552443492452": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "1469048759583678106": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8549811622247170014": ["fully_connected_gpu_fb_io_ref",2],
+        "9816834679089152140": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "425744529089575241": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2054100643811117871": ["convolution_gpu_bfyx_gemm_like",2],
+        "12700957546822808929": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "18020588962875998441": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "6343396486660315308": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4272417312859966238": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6531171505861182429": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "3714179297375678368": ["convolution_gpu_bfyx_os_iyx_osv16",319],
+        "89439319782574517": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "498221230041656321": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",0],
+        "17869928048344193660": ["fully_connected_gpu_yxfb_ref",2],
+        "6439778526899109398": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2881475011209167644": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "16934386540875904239": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "8129414331584785189": ["convolution_gpu_bfyx_gemm_like",1],
+        "6996376303337512293": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "3244402155461139559": ["convolution_gpu_bfyx_gemm_like",1],
+        "17602686382249457351": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "15374625876485618845": ["convolution_gpu_bfyx_gemm_like",2],
+        "13083412418930786217": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15262493122847269333": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3291900073868076610": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "15993651594402422200": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "4265991006340418914": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6080989915764831447": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2649948006897488504": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "9640773327221702885": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "3557182643072772598": ["convolution_gpu_bfyx_gemm_like",2],
+        "6962268765187856246": ["convolution_gpu_bfyx_gemm_like",2],
+        "18402875771862490280": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "6057433908801727873": ["convolution_gpu_bfyx_gemm_like",2],
+        "11828522357351010810": ["convolution_gpu_bfyx_os_iyx_osv16",45],
+        "15245792492785141641": ["convolution_gpu_bfyx_gemm_like",2],
+        "2668985670745598382": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "14046114605615338907": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "16642535448111764945": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17366007551797367227": ["convolution_gpu_bfyx_gemm_like",2],
+        "2470579932413307757": ["convolution_gpu_bfyx_gemm_like",1],
+        "13480393611172760874": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "13414375996946350733": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "10118395047539851751": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "17399103575103078835": ["convolution_gpu_bfyx_os_iyx_osv16",1089],
+        "6642767323474835034": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "11718418772370938734": ["convolution_gpu_bfyx_os_iyx_osv16",843],
+        "11461581290174106570": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "11210371874006224582": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "10093371683053539916": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "15392077168521832549": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "15213473731205734586": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "929378940515745198": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "16306284020664131647": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "9140953654075340568": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "659150305191479097": ["convolution_gpu_bfyx_os_iyx_osv16",902],
+        "10186942318345695432": ["convolution_gpu_bfyx_os_iyx_osv16",648],
+        "6062246008880097669": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "11430400968543668873": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "15456771485750114116": ["convolution_gpu_bfyx_gemm_like",2],
+        "5011190083565902614": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "4479117540570599742": ["convolution_gpu_bfyx_gemm_like",2],
+        "3768977479127609228": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9105949910901552052": ["convolution_gpu_bfyx_gemm_like",1],
+        "16195252193236429176": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13898284586432291433": ["convolution_gpu_bfyx_gemm_like",1],
+        "10726830507311062380": ["fully_connected_gpu_fb_io_ref",1],
+        "6724516766412732606": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16958661630307271135": ["convolution_gpu_bfyx_gemm_like",1],
+        "1187622888238643867": ["convolution_gpu_bfyx_gemm_like",2],
+        "17796784393519192261": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "14749290801006453098": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "12963601040302529291": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "3644282167178264526": ["convolution_gpu_bfyx_gemm_like",2],
+        "11443268857010762276": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "1781619247831135285": ["convolution_gpu_bfyx_os_iyx_osv16",305],
+        "4424258528650299664": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "1996860183441418841": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "3291180926381314705": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "2662628817605495834": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "8641167903508739082": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "15247278167909654073": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "568023964685613279": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "17212292336626940406": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "3202034075645193740": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "16355518852513270001": ["convolution_gpu_bfyx_gemm_like",2],
+        "9172445047535982729": ["convolution_gpu_bfyx_gemm_like",2],
+        "17257466221539644081": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16511261203374835334": ["convolution_gpu_bfyx_gemm_like",2],
+        "13379165253894817165": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "17845905249343189063": ["convolution_gpu_bfyx_gemm_like",2],
+        "1676419079398771261": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3755253206085028904": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "11696708134796103802": ["convolution_gpu_bfyx_gemm_like",1],
+        "9756049510998074315": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "13182965457868586949": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "1474719104479956715": ["convolution_gpu_bfyx_gemm_like",2],
+        "9464448984918455020": ["fully_connected_gpu_fb_io_ref",0],
+        "10344489318472060767": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "8107597524360102037": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "16349083818768061549": ["convolution_gpu_bfyx_gemm_like",2],
+        "3861084063403560668": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5782934278345953016": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",1036],
+        "6534932244936310237": ["convolution_gpu_bfyx_gemm_like",2],
+        "5254115874873721374": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "10169992769527680821": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "8320522112821700316": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "9399994156762372761": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "14980327142253281498": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "10995849055789490935": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "2430404993947067949": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "1100681675092122613": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "17829148383265978140": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "956022649859563080": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "13610246822402943068": ["convolution_gpu_bfyx_gemm_like",2],
+        "9559533345689069514": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "7601006550805536675": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "1889171157980977747": ["convolution_gpu_bfyx_gemm_like",2],
+        "6493509887452943215": ["convolution_gpu_bfyx_gemm_like",1],
+        "8075180350084516696": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5088898934670078153": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "61390148213644186": ["convolution_gpu_bfyx_gemm_like",1],
+        "1183774022668948480": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "2294026590516781945": ["convolution_gpu_bfyx_os_iyx_osv16",943],
+        "7969848911698660033": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12494969618927201911": ["fully_connected_gpu_fb_oi_ref",1],
+        "2740834366358352617": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12156683064218448087": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15581678976147496970": ["convolution_gpu_bfyx_gemm_like",0],
+        "4332002982390788477": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "7844764086278702374": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7650874310714729923": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "8484380699802533068": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "10900962238463588974": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7394217382008802567": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13443130482173929700": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4307817040832953223": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "15975964562807570772": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2933183897022161826": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "11341287517759485930": ["convolution_gpu_bfyx_gemm_like",2],
+        "11164600098693999456": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15718782218800307385": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "16504962609450876148": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "9269175963143039426": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4237276338897143680": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11767263058642131204": ["convolution_gpu_bfyx_gemm_like",1],
+        "10295330953350618042": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "17251021943762069083": ["convolution_gpu_bfyx_gemm_like",1],
+        "1249137685908951501": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "14406070210216948643": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "17729546848373991614": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "10205929431600082124": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1824009696938637196": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "2691406689892290663": ["convolution_gpu_bfyx_gemm_like",1],
+        "9144136375141111897": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "14702670413549232065": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11033758130987285174": ["convolution_gpu_bfyx_gemm_like",2],
+        "12696412964119109465": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "609926704263171728": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "1312322903335525510": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9241243727411869340": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "7576873892262851401": ["convolution_gpu_bfyx_gemm_like",1],
+        "14936045362442728963": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "16628679902327485435": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "13112861120841066430": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11810221946429451169": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "9974986004361966590": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13775683667344570223": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15696910741835640150": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "15170578644807800052": ["convolution_gpu_bfyx_gemm_like",2],
+        "868827643007921561": ["convolution_gpu_bfyx_gemm_like",2],
+        "12361848206190267821": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1564774057733793087": ["convolution_gpu_bfyx_os_iyx_osv16",97],
+        "10354305663463607086": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9172699707430374863": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "16362139250976572928": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "16322719022997791344": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "3221221905804708596": ["convolution_gpu_bfyx_gemm_like",1],
+        "16853250891250756537": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "8146559042269976123": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "18009083375897554008": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "16482301217529090205": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9246213432501129631": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "8733109144496806085": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "190530884420224257": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "3021451990778420603": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "844278648549884313": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10286228358844791913": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "13201854669827561901": ["convolution_gpu_bfyx_gemm_like",2],
+        "12184558469694708819": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "2497756607567197523": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "3803179179802002296": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "13248218293365141596": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "41250455178236256": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "2730604806511016352": ["convolution_gpu_bfyx_gemm_like",2],
+        "7044087204529042819": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "14001920054473316909": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "10093554313775878065": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8108939799996498955": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12503605837910457108": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "1452597292381229708": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "32035190068479388": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15971924211584724882": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "16763335832616216769": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "7196214243890296121": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "7102173884859438914": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "16896434896068867157": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "8860815977851486767": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "17608288706234084973": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "15642549417953837059": ["convolution_gpu_bfyx_gemm_like",2],
+        "8484176982872847423": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6643161848623134458": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2794704364476462562": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "875142032423622622": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "16995873636564597028": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "8108843303778211282": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "4593862318851730430": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14463983770858421738": ["convolution_gpu_bfyx_gemm_like",2],
+        "8291770994531919371": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "6538694526777067399": ["convolution_gpu_bfyx_gemm_like",1],
+        "14484890926084856480": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "12894625941923144893": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "15963358868537664345": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "796900095669815456": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "949330876419581703": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "505027953105355818": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5573639264204952559": ["convolution_gpu_bfyx_os_iyx_osv16",501],
+        "1106762955109168526": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "16632447105476661928": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "3170274732463232729": ["convolution_gpu_bfyx_gemm_like",1],
+        "88592091379585141": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "11976258954756052550": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "12159582810513550491": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14514450640485628836": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "6471563320494376693": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "10134708781744282286": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "3006428377575478529": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "6737332058785771073": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "9868561386826862471": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4660214425505918397": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6877976003072165363": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17516369849823844076": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "6789547098653828902": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3003526572122876385": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7595481705069674721": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "9805748332775912215": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16580523689587532278": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11407554707582995190": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "8358425189419823078": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "17784357412228522825": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "12916369918132790013": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "18356980026934328781": ["convolution_gpu_bfyx_os_iyx_osv16",1037],
+        "2452226948562393335": ["convolution_gpu_bfyx_os_iyx_osv16",767],
+        "12819626280531787705": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "10231289519907741812": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4157063588837576075": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "8751967016877067287": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "10289725524396556967": ["convolution_gpu_bfyx_gemm_like",2],
+        "17443356777503458523": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "13948512795148364852": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "5951936376654416075": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "3436770797199367854": ["convolution_gpu_bfyx_gemm_like",1],
+        "8479958930889587809": ["fully_connected_gpu_yxfb_ref",0],
+        "16169024543367503806": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "9323825370872655346": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "13140527131098422428": ["convolution_gpu_bfyx_gemm_like",2],
+        "5167141379778311462": ["convolution_gpu_bfyx_gemm_like",2],
+        "13761566845514364807": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "2597453794298356435": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8028456017016080468": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "259085394007031207": ["convolution_gpu_bfyx_gemm_like",1],
+        "13959998803881264899": ["convolution_gpu_bfyx_gemm_like",2],
+        "3686062608868674589": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11462462742322068863": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "9988801796928462423": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "14727155647330710270": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2415478259408761142": ["convolution_gpu_bfyx_os_iyx_osv16",302],
+        "14602509614865844486": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "9289375071420565548": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7440546908141206022": ["convolution_gpu_bfyx_gemm_like",2],
+        "15485011864326008444": ["fully_connected_gpu_fb_io_ref",0],
+        "8470783908138180217": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "17845195044080380488": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "15459849799278480779": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "17721709435558297965": ["convolution_gpu_bfyx_gemm_like",1],
+        "14132860735060026066": ["convolution_gpu_bfyx_gemm_like",2],
+        "15522785615618973614": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6983544541444063131": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "13340998273773542342": ["convolution_gpu_bfyx_gemm_like",2],
+        "3134642518413656360": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2440366541074371090": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "12341291953192305346": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "4986977887030495943": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "16852690434396099861": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "3526198034974948081": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "16053585286807864356": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "4282661608732125403": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "1882052795393187384": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "3273748387141431306": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7617773507561261623": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "4623542918584461522": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "13459568779083836506": ["convolution_gpu_bfyx_gemm_like",2],
+        "13785621878621289403": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1980887257657896260": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "12809199739984715013": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4886289616235149731": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1898776014554946000": ["convolution_gpu_bfyx_gemm_like",2],
+        "4770478662275293849": ["convolution_gpu_bfyx_gemm_like",2],
+        "15117830538655814853": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17178808153714023980": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "1629280013296592298": ["convolution_gpu_bfyx_gemm_like",2],
+        "15110359240685619357": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "16370218798911151331": ["convolution_gpu_bfyx_os_iyx_osv16",287],
+        "13663612869789682704": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14423094456821270228": ["convolution_gpu_bfyx_gemm_like",2],
+        "6820284286806022849": ["convolution_gpu_bfyx_gemm_like",2],
+        "16779678846332091086": ["convolution_gpu_bfyx_os_iyx_osv16",523],
+        "15989730594386153813": ["convolution_gpu_bfyx_gemm_like",1],
+        "6095158932103797740": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "11215217005872946038": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "7399775379344444344": ["convolution_gpu_bfyx_os_iyx_osv16",315],
+        "13381833588713493653": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3380653500106294036": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "18125732229366977468": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13613399861925108148": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "9981938305144461962": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4519609440668743423": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "15097371415144491976": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "12338760476079493547": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "13933912937625580405": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "17126714253919198029": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "2341006744107937832": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "9819596940685093690": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "8881135571874888085": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",2],
+        "3063055767192991776": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "18178391985193947355": ["convolution_gpu_bfyx_gemm_like",2],
+        "4161612746310931789": ["convolution_gpu_bfyx_gemm_like",2],
+        "16509472637458153234": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "11641605357868918146": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "562221645849170027": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "11561790484526369917": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "3658149289395969504": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "8818070832398055086": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5509631031571317557": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "5357531127711906072": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "8994777547915132466": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "2687781952021151359": ["convolution_gpu_bfyx_gemm_like",1],
+        "18083041911869525296": ["convolution_gpu_bfyx_gemm_like",2],
+        "9876098429582714576": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12466721526829931923": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10848407542826653699": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "16808618754363181939": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10436819182310112786": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "7657964685067862984": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13141069720428059461": ["convolution_gpu_bfyx_gemm_like",2],
+        "15831600396403741571": ["convolution_gpu_bfyx_gemm_like",1],
+        "1138439260035360722": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "18184154104081850641": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "2338707843044884352": ["convolution_gpu_bfyx_gemm_like",1],
+        "13850920989756588064": ["convolution_gpu_bfyx_gemm_like",2],
+        "17176310030469904708": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9146427497025645310": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "11291881629276762730": ["convolution_gpu_bfyx_gemm_like",1],
+        "9850711648349010674": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7172604084103519563": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "700717277178942679": ["convolution_gpu_bfyx_gemm_like",1],
+        "6827316954140278736": ["convolution_gpu_bfyx_os_iyx_osv16",125],
+        "13054405729329143152": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "8509941319309380587": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "16488426854651696706": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "10432925516327889351": ["convolution_gpu_bfyx_gemm_like",1],
+        "10600040563032392126": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "11511221956203704038": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "11469881811044037340": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "13839590781642269381": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7508931961595339477": ["convolution_gpu_bfyx_gemm_like",1],
+        "10500029207807372735": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "14330281759626724494": ["convolution_gpu_bfyx_gemm_like",2],
+        "7419216766190700536": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "5585398540591396124": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "17089801601582809764": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "3404911902272307873": ["convolution_gpu_bfyx_gemm_like",2],
+        "17489420766684604600": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "18196676408993954972": ["convolution_gpu_bfyx_os_iyx_osv16",695],
+        "10186866999254188246": ["convolution_gpu_bfyx_gemm_like",1],
+        "4817953977830392054": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "2930702812469156271": ["fully_connected_gpu_fb_io_ref",1],
+        "16549498607618849252": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "11855777686733253894": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "4936968239673204144": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "11988463489006787939": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10178951466584845110": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13326233188936584240": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "2194607895573544953": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "11290558687608213321": ["convolution_gpu_bfyx_gemm_like",2],
+        "12366546292695084543": ["convolution_gpu_bfyx_os_iyx_osv16",456],
+        "11267742746905371769": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "16582080251500644069": ["convolution_gpu_bfyx_gemm_like",2],
+        "18113235498360281695": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16851949759898002809": ["convolution_gpu_bfyx_os_iyx_osv16",648],
+        "14233388108948021331": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "12434799432980627966": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16192971634546462244": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "4652136280940317116": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "7744644472305197412": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16733587306017341904": ["convolution_gpu_bfyx_gemm_like",2],
+        "10089588313551601914": ["convolution_gpu_bfyx_gemm_like",2],
+        "14397348576352573007": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "11823106525249133834": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13122637768866153753": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "10110359677546019738": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "4342360467977736802": ["convolution_gpu_bfyx_gemm_like",2],
+        "2937907409658060025": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12312291300513951124": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5989664002046950385": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "2346855978590136528": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "1372939511728986224": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "6491244517639245276": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17025268985366223779": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7076937538747704750": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "3971456598769336038": ["convolution_gpu_bfyx_gemm_like",2],
+        "5329218407413679209": ["convolution_gpu_bfyx_gemm_like",2],
+        "18171940644650760608": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "850343942782057099": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "8121179472578287280": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "11215862132334892351": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "13453226687921450129": ["convolution_gpu_bfyx_gemm_like",2],
+        "1056009037551688122": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2816353973187452604": ["convolution_gpu_bfyx_gemm_like",2],
+        "18273922178875123753": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "6904130543085920483": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "2028273519579688266": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "6578908625437515675": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "6233455595448276342": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "13184662326021747000": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11185041745377164894": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "4887402175773881313": ["convolution_gpu_bfyx_gemm_like",1],
+        "3192332625020432602": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "18259656768460999562": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "3086110559166474482": ["convolution_gpu_bfyx_gemm_like",2],
+        "3234567405788241673": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "814227839929688672": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "7565221050911842393": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9942726414918759892": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7771969115805231266": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "17622515300258231642": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "11806105193035393795": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "17715553891959228879": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "11829442945690098558": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2103882464623009432": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4488336106517889531": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "13320828013530046693": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "9141802671320572984": ["convolution_gpu_bfyx_gemm_like",2],
+        "16170237673140354764": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "9933958860597451711": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "8616175124735896626": ["convolution_gpu_bfyx_gemm_like",2],
+        "8482147530539941792": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "17269318621094624075": ["convolution_gpu_bfyx_gemm_like",2],
+        "1529658068204046700": ["convolution_gpu_bfyx_gemm_like",2],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "15317510501392280831": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5688478347124565305": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "447152944190888653": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7817036102984218692": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7500192998744460131": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "14606504543906913119": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "3930526618478171342": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14429081455612806819": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "10455850115486014344": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "6458124573210430792": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6210051945051792519": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "14025496192869856801": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "15451193085395494344": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "5163641718529821203": ["convolution_gpu_bfyx_gemm_like",1],
+        "7104756264011682902": ["convolution_gpu_bfyx_gemm_like",1],
+        "11374410888638324212": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16661248688859994717": ["convolution_gpu_bfyx_gemm_like",2],
+        "3518981281605476136": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "628191607060767879": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "10413043556440687328": ["convolution_gpu_bfyx_gemm_like",2],
+        "911927861489659568": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6561864486643226753": ["fully_connected_gpu_fb_io_ref",1],
+        "17494823614269622175": ["convolution_gpu_bfyx_os_iyx_osv16",1031],
+        "8071652278387309042": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "4805958162773855302": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "16666383605403885590": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "1410512481031922864": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "7033442247935655919": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5120466856097219243": ["convolution_gpu_bfyx_gemm_like",1],
+        "12141880589558027223": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2328698995040390396": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15410074937424854348": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "4195847890935259046": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "3923715765392385764": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2348721939771018658": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8500612796090968552": ["convolution_gpu_bfyx_gemm_like",1],
+        "13695012630130671371": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "5475537064464968733": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "6914536960012332706": ["convolution_gpu_bfyx_gemm_like",0],
+        "3242468066266096173": ["fully_connected_gpu_fb_oi_ref",2],
+        "8817624284607822971": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "8453402620168400406": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "12087141795291232248": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10416622008071151225": ["convolution_gpu_bfyx_os_iyx_osv16",546],
+        "5934211962000091180": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "10178462061836778766": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "9810703513111623136": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "8870736106637803783": ["convolution_gpu_bfyx_os_iyx_osv16",43],
+        "6040286126398028933": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9628702542543622433": ["convolution_gpu_bfyx_os_iyx_osv16",567],
+        "14845194064376163156": ["convolution_gpu_bfyx_gemm_like",1],
+        "8296551195150971668": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3436576388124386308": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "16711955423531846725": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "17152100243867367458": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "2281119269283845320": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "4614042998549572181": ["convolution_gpu_bfyx_gemm_like",2],
+        "7807168142899312025": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "12150109996250730485": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "13553045975561262752": ["convolution_gpu_bfyx_gemm_like",2],
+        "435888248913413834": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16683909937519981313": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "14174888981602932979": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "16352331970945217438": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "7441199361135503715": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6988674007771237080": ["convolution_gpu_bfyx_gemm_like",2],
+        "1091511312740979158": ["convolution_gpu_bfyx_gemm_like",2],
+        "9134203155715293387": ["convolution_gpu_bfyx_gemm_like",2],
+        "17089332981370803321": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "16434635675895599016": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "5186963188234940985": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2683507674615735878": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16951050796024922417": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2842103889477438816": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13395074742046717601": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "14284223645235602230": ["fully_connected_gpu_fb_io_ref",2],
+        "10861525139715322534": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "13248567106128518549": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "11739629316219263056": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14797994820826922836": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "1743572310914695413": ["convolution_gpu_bfyx_gemm_like",2],
+        "14363654136811880073": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9579316322704307175": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10131754493574658838": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "13273455049742872922": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "15085980226773631346": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "15325810055037682679": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17542414935564676110": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "13447226378200557777": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "3075961585045028347": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11851216776536423298": ["convolution_gpu_bfyx_gemm_like",2],
+        "12251901229904154127": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12716923819769400487": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "3438852523146175580": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3638987901025418036": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "11856266545854830143": ["convolution_gpu_bfyx_gemm_like",2],
+        "10445587307296180364": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6692408578556372014": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16053383948025511837": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "7703363154993904399": ["convolution_gpu_bfyx_gemm_like",2],
+        "5632101951796129342": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3666268650646000870": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "10551742525038893508": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2065752819810364738": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "153117141968471446": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "6313048719388952335": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "5981885264666023260": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11462394098346770463": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "1698847067049584068": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "4046513842327685203": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16181974394948732584": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "2431427502927207912": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15119063070382146368": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "5023609284081684300": ["convolution_gpu_bfyx_gemm_like",2],
+        "5797545757863100286": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "3853598651573655548": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "16036386660666696362": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "17332395907621747512": ["convolution_gpu_bfyx_os_iyx_osv16",658],
+        "5524218746051008792": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "6981537186704688907": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "7162701010394257343": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "13383524675055536682": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4099859307693687554": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "7105622384646913935": ["convolution_gpu_bfyx_gemm_like",2],
+        "1908733355560815063": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "12278842522836720245": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7210729932836957540": ["convolution_gpu_bfyx_gemm_like",1],
+        "2239948568632407776": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8337820318779061494": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "5312140481706133684": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "582386337144876096": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "4569416043426963318": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "17921616427936768657": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "2354885756165078342": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "11915835787294686201": ["fully_connected_gpu_fb_io_ref",2],
+        "11588201241814594642": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "17171513366028235799": ["convolution_gpu_bfyx_gemm_like",2],
+        "1313038182637545943": ["convolution_gpu_bfyx_gemm_like",2],
+        "14066660382918185188": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "17810119189318801197": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "884923290083082187": ["convolution_gpu_bfyx_gemm_like",1],
+        "2786925522916317149": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "10701231567226563098": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11260588538207111217": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "6048964584602891448": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "3256940792095638732": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5041111302824362529": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "10156210866362845661": ["convolution_gpu_bfyx_os_iyx_osv16",300],
+        "16482763280295827563": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "13661225837036677371": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7351733901977025859": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "11569367085498045793": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "12324580272733221544": ["convolution_gpu_bfyx_gemm_like",2],
+        "10885831773581103653": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "14897935118679731283": ["convolution_gpu_bfyx_gemm_like",2],
+        "6413565827738894970": ["convolution_gpu_bfyx_gemm_like",2],
+        "17221173795372066030": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "18116824232149703772": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10472893418729915556": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13090596133852586482": ["fully_connected_gpu_fb_io_ref",2],
+        "10274587614581350261": ["convolution_gpu_bfyx_gemm_like",2],
+        "10831204282620894983": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "10394041365384258612": ["convolution_gpu_bfyx_gemm_like",1],
+        "16843976559933040107": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "1051506168926530904": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "11728824117049687850": ["convolution_gpu_bfyx_gemm_like",1],
+        "346832567535597247": ["convolution_gpu_bfyx_os_iyx_osv16",515],
+        "17934338042329576850": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14046990030104971367": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "15715522462313302642": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "6569793510829850291": ["convolution_gpu_bfyx_gemm_like",2],
+        "5115134711994944288": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "15711618559677233865": ["convolution_gpu_bfyx_gemm_like",2],
+        "15136557970717196814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6603817696964851209": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "9104236539185546468": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "7247414730479113619": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "1314612539156304342": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "5368419079251107469": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5622089373755094139": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "16723949803487501587": ["convolution_gpu_bfyx_gemm_like",1],
+        "15640202505592598653": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "17258278942367320412": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "11872894645888259277": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6889498170947481097": ["convolution_gpu_bfyx_os_iyx_osv16",517],
+        "9667762333290150436": ["convolution_gpu_bfyx_gemm_like",2],
+        "12797434473085560369": ["convolution_gpu_bfyx_gemm_like",1],
+        "10025839973092358719": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "18086782289842715645": ["convolution_gpu_bfyx_gemm_like",2],
+        "10880656082867082647": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "14108091242461324109": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "12478041902013146137": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "5375957124102705020": ["convolution_gpu_bfyx_gemm_like",2],
+        "5122639094068865656": ["convolution_gpu_bfyx_gemm_like",2],
+        "3741411131962514208": ["convolution_gpu_bfyx_gemm_like",0],
+        "5504757952698692953": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "8376077531098664520": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12515465135362865565": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "17221958812979739319": ["convolution_gpu_bfyx_gemm_like",2],
+        "10492056481694320580": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "11075875009517060583": ["convolution_gpu_bfyx_gemm_like",1],
+        "13973179950424276578": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "4209610989252810404": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "10328182165125764988": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "1604661321386793876": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "7883469783245625654": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "8460847842045253466": ["convolution_gpu_bfyx_os_iyx_osv16",388],
+        "12814676907278614920": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "15702382940521972117": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "15402502830461368746": ["convolution_gpu_bfyx_gemm_like",2],
+        "10023279637210292010": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "1104098779103065492": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "6423120553520000795": ["convolution_gpu_bfyx_os_iyx_osv16",475],
+        "15759530339367380982": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "10392297152843428925": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "374553246608550876": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "642256034968512602": ["convolution_gpu_bfyx_os_iyx_osv16",687],
+        "1701609125136907870": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "761169277744593430": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13753473508578037346": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "7059809764116926828": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "15291457825664605611": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "1817929353109443200": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10182490653383265979": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "2660620513253264815": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "13116746433291181712": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "8017024160145338317": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "2407509127927738079": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "12345000525470836335": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "7107313154723472157": ["convolution_gpu_bfyx_gemm_like",1],
+        "17116130466596594359": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "6096189754478965440": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "9100044555742394133": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "389822325870173489": ["convolution_gpu_bfyx_gemm_like",2],
+        "12608653044712562811": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "17827762625385383658": ["convolution_gpu_bfyx_gemm_like",1],
+        "1103228955716492167": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6744583842563891546": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "4830121683809417143": ["convolution_gpu_bfyx_os_iyx_osv16",939],
+        "14400339764883906933": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "593712935037568960": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "6100453836448514115": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "460780635491857522": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8054185159612481260": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10468108569766167175": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10290107543739998181": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "12881836161162762524": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "12098146032672599222": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "10533367671706069274": ["convolution_gpu_bfyx_gemm_like",2],
+        "2616828683870391718": ["convolution_gpu_bfyx_gemm_like",2],
+        "18215260982292770252": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17915846724151945664": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "15308667224953963012": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "7678730081652720605": ["convolution_gpu_bfyx_os_iyx_osv16",121],
+        "7536267099632318821": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "3649980610274946512": ["fully_connected_gpu_fb_io_ref",0],
+        "14642845734482478360": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17550795608527501180": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "3883845471211207871": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "8090497202997192142": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15300588247579013966": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "12940491379482292807": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "761183183078910587": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "1451466106918423837": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "3180320769716158201": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1154228007901031779": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "10783981060353445280": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "9853089109234784643": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "9151597254187513724": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "14472187692485966933": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "592364460086746355": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14959566236432790882": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "14560435854055940143": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "2534408579674556441": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1697248235682953135": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "970596838400633278": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "16474284418841532356": ["convolution_gpu_bfyx_gemm_like",2],
+        "10131771849139346986": ["fully_connected_gpu_fb_io_ref",1],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "12412224630798427948": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "9378419102254633989": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "17543094050285028967": ["convolution_gpu_bfyx_os_iyx_osv16",348],
+        "15095146351334328804": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5211191663202250117": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6763373100985812924": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "9126242742012768166": ["convolution_gpu_bfyx_gemm_like",2],
+        "9501165931845934084": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8200094670006738584": ["convolution_gpu_bfyx_os_iyx_osv16",695],
+        "13091799752362714688": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "3001615302961701154": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10136369729388564720": ["convolution_gpu_bfyx_gemm_like",2],
+        "7654445730724243959": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4142978475842207311": ["convolution_gpu_bfyx_gemm_like",2],
+        "15936513690378208182": ["convolution_gpu_bfyx_gemm_like",2],
+        "2510919738337557939": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14157776769026046014": ["fully_connected_gpu_fb_io_ref",1],
+        "2888587871912905870": ["convolution_gpu_bfyx_os_iyx_osv16",45],
+        "15107740124884150777": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13852065717057446998": ["convolution_gpu_bfyx_gemm_like",2],
+        "15101680837342453931": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "8848042913869254179": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "5419041493176804960": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "7162575953766465459": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "2538377242539785672": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11047625525388102466": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "194324011642969540": ["convolution_gpu_bfyx_gemm_like",1],
+        "15160738482264643601": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "16159309494101203811": ["convolution_gpu_bfyx_gemm_like",2],
+        "2299440282267661763": ["convolution_gpu_bfyx_gemm_like",2],
+        "2451603338483395600": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "17044070592136685322": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10994887986667360638": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "7450915928720828406": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "2004691166378443418": ["convolution_gpu_bfyx_gemm_like",2],
+        "2595273700611743351": ["convolution_gpu_bfyx_gemm_like",2],
+        "12175796957622122377": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "11190259822407791373": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5116633474932727191": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13821388909343378606": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "8997120235555587461": ["convolution_gpu_bfyx_gemm_like",2],
+        "2355214244972870639": ["convolution_gpu_bfyx_os_iyx_osv16",514],
+        "5673972310424776040": ["convolution_gpu_bfyx_gemm_like",2],
+        "9182897385081081193": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "6681818065741882453": ["convolution_gpu_bfyx_gemm_like",2],
+        "10267260789603562117": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "16797936364395702812": ["convolution_gpu_bfyx_gemm_like",2],
+        "11324851661119942609": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "5957444113623953990": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14566544143931267758": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7391591731082133842": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15592321818359223008": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7881579844586294503": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "15997754881872769378": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "15688260390755491480": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "444533022549215983": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5629373398445592781": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "12930435393720466720": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5091558853871982858": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "9300767936311837876": ["convolution_gpu_bfyx_gemm_like",2],
+        "11756881293845417212": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6323504675912413145": ["convolution_gpu_bfyx_gemm_like",2],
+        "6364288463529107554": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "3089303702413279458": ["convolution_gpu_bfyx_gemm_like",1],
+        "13418701036204748812": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "8873424072104563382": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "5085190482265319015": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "10384537928514123040": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "18424912460022156378": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "15650839696475698676": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "8075453526439606224": ["convolution_gpu_bfyx_gemm_like",2],
+        "9988347141056982336": ["convolution_gpu_bfyx_gemm_like",2],
+        "18146068930296529306": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3809343305878998617": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "7304346312452588844": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "1096671695414716274": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "15670841106242481912": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "5516343490635816913": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5552958912776013600": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "6717268005860715462": ["convolution_gpu_bfyx_gemm_like",1],
+        "15154934905173371714": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "6919081291036849635": ["convolution_gpu_bfyx_gemm_like",0],
+        "13599555566632152241": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5584432943673435454": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "5077214229434392730": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7431069335622070596": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14088382963493477342": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "2105482100745329286": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "4108579755980014185": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7009735776703529573": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "12360796145248339074": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "11318404975804457466": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15596913527233792996": ["convolution_gpu_bfyx_gemm_like",2],
+        "4474697990228400564": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15163327502374403643": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "11674630830833831209": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12089505956882731481": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "16811402686462277562": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5970516037710024187": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "6377828127090689238": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "4213330047036138895": ["convolution_gpu_bfyx_gemm_like",2],
+        "15908673392788376468": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "11931909191490706784": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "12706645084970410965": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12631385844456089132": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10205576142280465189": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "4347816192417741558": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "14776308019009874809": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "4557272439632791722": ["convolution_gpu_bfyx_gemm_like",2],
+        "8939683514448064461": ["convolution_gpu_bfyx_os_iyx_osv16",148],
+        "8307147375351882939": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "10997029728191881587": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15426960908024585800": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "14043770215999952932": ["convolution_gpu_bfyx_gemm_like",2],
+        "17765244777397448823": ["convolution_gpu_bfyx_gemm_like",2],
+        "13906695412889750672": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "12397493112115605421": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "2043990557089419633": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11047327014045909812": ["convolution_gpu_bfyx_gemm_like",2],
+        "360872770877634346": ["convolution_gpu_bfyx_gemm_like",2],
+        "16168891366331544806": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11825205449232126827": ["convolution_gpu_bfyx_gemm_like",2],
+        "6680219899975628258": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "11996551650886043090": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "12691733869577147545": ["convolution_gpu_bfyx_gemm_like",2],
+        "761984225415608773": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14545322358931928911": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "3286476039871096924": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "3167336012388169649": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "12878858391355259417": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2460361970017706505": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "11623764266322172086": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "9852052796465340830": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "5559417017584278927": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "10157866834809927320": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "3138374672801504481": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "15470979879166640563": ["convolution_gpu_bfyx_os_iyx_osv16",1022],
+        "5240181393417899912": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "8500148569566077929": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "4030835922805418609": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "12380856644683171627": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13121196588092064246": ["convolution_gpu_bfyx_gemm_like",2],
+        "16816222375242496370": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16626226341188424071": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "1338534626640014074": ["convolution_gpu_bfyx_gemm_like",2],
+        "16112835627818488034": ["convolution_gpu_bfyx_gemm_like",2],
+        "12013883366396753346": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9777638299795801012": ["convolution_gpu_bfyx_gemm_like",2],
+        "1652781065871883392": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "5481293245081340756": ["convolution_gpu_bfyx_gemm_like",1],
+        "2888315406857606108": ["convolution_gpu_bfyx_gemm_like",2],
+        "13951717514084457087": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "2415883693527779570": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "7953340333870774815": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10971971008143485353": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "5842284971563375197": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "18076018773227225156": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "832830374368320801": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "2724007091383127418": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "5115661026367632863": ["convolution_gpu_bfyx_os_iyx_osv16",765],
+        "9632178829095307219": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "1429370139030130929": ["convolution_gpu_bfyx_gemm_like",1],
+        "12478496773222604204": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "6634330132674952638": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "8467771025017377254": ["convolution_gpu_bfyx_gemm_like",2],
+        "685140170576742460": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "14704939880642470064": ["convolution_gpu_bfyx_gemm_like",2],
+        "17264554677210911187": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8549465639583777774": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "2915165824085219545": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "1155389358857780776": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "6181272224000872375": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1350402181555441235": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "9552312946391901745": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17995371099806008878": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13300022131572486202": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "5294364781478821403": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "16985565646738638215": ["convolution_gpu_bfyx_gemm_like",2],
+        "14545094765855515974": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "157805434489791310": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "16896833230469488924": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11860902750907076009": ["convolution_gpu_bfyx_gemm_like",1],
+        "3790881125495367946": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2072246877651869428": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "16125965158927145599": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "6748628505489041229": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3119235799568225015": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "5094600092408024387": ["convolution_gpu_bfyx_os_iyx_osv16",939],
+        "14057348639391787117": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13973028408397200796": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "3504421925108785018": ["convolution_gpu_bfyx_gemm_like",1],
+        "11284755586130392759": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "14810839157236175179": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "10295400862890021635": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "3830787224073518842": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "6586833064055001967": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "5191016422297403500": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15160192060731796225": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "10858234923346500323": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8913451832923806760": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7264756313770306662": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "11754316727756881612": ["convolution_gpu_bfyx_os_iyx_osv16",475],
+        "381149736509958403": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "17723621158215826108": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "11878200328276635385": ["convolution_gpu_bfyx_gemm_like",2],
+        "8253823502854784432": ["convolution_gpu_bfyx_gemm_like",2],
+        "12270548292992377827": ["convolution_gpu_bfyx_gemm_like",2],
+        "17881905640473324965": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "5018845267269043034": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "2183193161596798350": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5763440554939527411": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "2737738314051715813": ["convolution_gpu_bfyx_gemm_like",2],
+        "15434536162164591656": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "14743760934522111296": ["convolution_gpu_bfyx_gemm_like",1],
+        "578940134826172063": ["convolution_gpu_bfyx_gemm_like",2],
+        "12297371032753209816": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "10842828403850880541": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "2198278382394812839": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "10754450245035836188": ["convolution_gpu_bfyx_gemm_like",2],
+        "6585223640997887253": ["convolution_gpu_bfyx_gemm_like",2],
+        "1226681724476075216": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15012744672096562609": ["convolution_gpu_bfyx_gemm_like",1],
+        "12024416333474523686": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "14366861063858001106": ["convolution_gpu_bfyx_gemm_like",2],
+        "14872992823083730615": ["convolution_gpu_bfyx_gemm_like",1],
+        "4104803308438043557": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8557939065994799094": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "3725060015826635697": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "941626985322260281": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "10046663998164493552": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6982733543386888622": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "8676627474831455650": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13312514874803986753": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "15342520770460205985": ["convolution_gpu_bfyx_gemm_like",2],
+        "12061391584831995030": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "18420783889227814721": ["convolution_gpu_bfyx_gemm_like",1],
+        "14104238386345631681": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "8751016391945753900": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "5175845410753897614": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4318632837402329958": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9048522050692986204": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12608839247035566137": ["convolution_gpu_bfyx_gemm_like",2],
+        "1081969835308672753": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9340606088243696490": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "8143125165478395106": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4265693151382066296": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "10727592780669452048": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "948917645960296825": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15733030371524967129": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18325123280144403295": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "7430073011895298582": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "4642234334824303290": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "3006979228759768702": ["convolution_gpu_bfyx_gemm_like",2],
+        "9899897639161550704": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "15516674573659704770": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "17675227620234837075": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8774613863662947205": ["convolution_gpu_bfyx_os_iyx_osv16",113],
+        "411914986559525749": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "13994738382469480124": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "6261584163347634965": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8648848365873958010": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "16442107352245114876": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "17358462939783262207": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10117784802089387496": ["convolution_gpu_bfyx_gemm_like",2],
+        "1021364163511049664": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15576932271488848457": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16234606052818596502": ["convolution_gpu_bfyx_os_iyx_osv16",468],
+        "13025323039227543550": ["convolution_gpu_bfyx_os_iyx_osv16",575],
+        "8901432555239515645": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "8127190765748950828": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "17489255290900178723": ["convolution_gpu_bfyx_gemm_like",2],
+        "6819846227498139601": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12967849866710811070": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "16484600784717969318": ["convolution_gpu_bfyx_gemm_like",1],
+        "7904735292914337507": ["convolution_gpu_bfyx_gemm_like",2],
+        "1346716334208025932": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "16419903786705052849": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16954232936536653281": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "6140789642561898454": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "17285815901490707654": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "10396343030099602596": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "6522575549211855712": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "16945184617367657570": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "11306782565667740785": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "16881283637687482989": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "8114928396876060694": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "17598441149165536737": ["convolution_gpu_bfyx_gemm_like",2],
+        "13189392239349392492": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "3524531620118359828": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "11413890625163220846": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "3811325657214369711": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "11798081355131440794": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "14763015336626099830": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2808205041095636198": ["convolution_gpu_bfyx_gemm_like",2],
+        "11006325877486632502": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "1938086876393565238": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "11135894989941122115": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16818714747882774917": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "1197101651805223230": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5754301693527535975": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15884763176333003771": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "9940300152880498818": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15809072026388479729": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "2525260242689556544": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "1462775202780029067": ["convolution_gpu_bfyx_gemm_like",2],
+        "4408772370026995920": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "4800587664660105589": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "15096978026328154490": ["convolution_gpu_bfyx_gemm_like",2],
+        "14945451027055549800": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2411809718611709031": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "6364765994481977132": ["convolution_gpu_bfyx_gemm_like",2],
+        "7606716827635769887": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "759816003617478606": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "8100051552977329013": ["convolution_gpu_bfyx_gemm_like",2],
+        "16706121580364790904": ["convolution_gpu_bfyx_gemm_like",2],
+        "16061176355133391199": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "11670430946096342056": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "16801078648431425148": ["convolution_gpu_bfyx_gemm_like",2],
+        "16497757978901707098": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "7230623964042057933": ["convolution_gpu_bfyx_gemm_like",2],
+        "15461879919099373703": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15331103261044247142": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "13671635457689276237": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "5157249499936659040": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "8149815705026829258": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5115051214738974496": ["convolution_gpu_bfyx_gemm_like",2],
+        "846088275031979661": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "17208186152576814861": ["convolution_gpu_bfyx_gemm_like",1],
+        "13502487084912428404": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "75742659105146536": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "12972406304361050136": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "2623687018437195679": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "2451627421465368826": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17793292063552633023": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "1145700078649932035": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "11932768899981458741": ["convolution_gpu_bfyx_gemm_like",2],
+        "17188170051014066220": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "13073917160317338455": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "18156747282906367814": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "18355551625040856531": ["convolution_gpu_bfyx_gemm_like",1],
+        "9657585348407617520": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "10841786394951910408": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "9462315044265139531": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8897786294680986991": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "9067207838429479363": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "11772741918108731396": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "17430593168191424639": ["convolution_gpu_bfyx_gemm_like",2],
+        "11446745541571732900": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11523864029587161089": ["convolution_gpu_bfyx_gemm_like",0],
+        "17847109385592002207": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "6181651715051152713": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2940027113687311893": ["convolution_gpu_bfyx_gemm_like",2],
+        "4017163133829149027": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3320392060021963536": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "368147139706197757": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5381578460674280089": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "4915831715914920982": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "5523778675167321193": ["fully_connected_gpu_fb_io_ref",0],
+        "2597435203284675496": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2081318772333460627": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "3234107167862677811": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "10499265278415026816": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17316626950179740845": ["convolution_gpu_bfyx_os_iyx_osv16",564],
+        "2669822154816760632": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "9104710269725948935": ["convolution_gpu_bfyx_os_iyx_osv16",562],
+        "10447427622114317323": ["convolution_gpu_bfyx_os_iyx_osv16",939],
+        "10263861857115868555": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "14561847633011875566": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5169676188205309169": ["convolution_gpu_bfyx_gemm_like",2],
+        "13649894122307008732": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "11192356850081328892": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8739347545059610410": ["convolution_gpu_bfyx_gemm_like",2],
+        "3170336071769787200": ["convolution_gpu_bfyx_gemm_like",1],
+        "1938627662342504660": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "13505239531682993049": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11327678075247102542": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2585767464396438954": ["convolution_gpu_bfyx_gemm_like",2],
+        "3377472614945731801": ["convolution_gpu_bfyx_gemm_like",2],
+        "7838176322738051195": ["convolution_gpu_bfyx_os_iyx_osv16",856],
+        "7520300815632157008": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "5124291229936820926": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "13384754476437374504": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "17182839667242694171": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "2098357709530580176": ["convolution_gpu_bfyx_gemm_like",2],
+        "10856527039674342926": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like",2],
+        "5658491804782285708": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2510093757258898215": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "13817553830305981296": ["convolution_gpu_bfyx_gemm_like",1],
+        "17353894529222574441": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "12730339458081890990": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17923035110851963413": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5627834277145735283": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "8468092944055919238": ["convolution_gpu_bfyx_gemm_like",2],
+        "2893564501191050837": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "218070270815606832": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3124997104810767514": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "16565784556269819846": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "16429816273405099453": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4456004887590847716": ["convolution_gpu_bfyx_gemm_like",1],
+        "11006013403687198405": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "14431607479949498164": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7802311886554362782": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "697609699740088622": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "15641049130597645936": ["convolution_gpu_bfyx_gemm_like",2],
+        "17287487062245049466": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "7575675354187625951": ["convolution_gpu_bfyx_gemm_like",2],
+        "12675313398314286884": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "18348301285923584995": ["convolution_gpu_bfyx_gemm_like",2],
+        "11098189888598804624": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13015379405020620466": ["convolution_gpu_bfyx_gemm_like",2],
+        "17287404861045114619": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "13045206675957093567": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "2479282650381163888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16053441017037949431": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "10451904743064959757": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "5902427784683046762": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "1006721963560645335": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17243953172314194409": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "17223169013008075474": ["convolution_gpu_bfyx_gemm_like",2],
+        "17854578307286932628": ["convolution_gpu_bfyx_gemm_like",2],
+        "7024495439434892956": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "14008438372661779490": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "7958595516465029682": ["convolution_gpu_bfyx_gemm_like",2],
+        "426267761240826769": ["convolution_gpu_bfyx_gemm_like",1],
+        "241860795253927746": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "5381354625969068789": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7937517564893685647": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "8166976803757624321": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17163595630291422874": ["convolution_gpu_bfyx_gemm_like",2],
+        "3502889736327580141": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "17338623890209792485": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "3362829461757548683": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "16865271154583564899": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "17185089684685480638": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "2702566744272427570": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7712831597869354170": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12219239604684537521": ["convolution_gpu_bfyx_gemm_like",1],
+        "9318550032135064372": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5303170164698694791": ["fully_connected_gpu_bf_io_gemm",2],
+        "805221045541170643": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "9216608098626790565": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "6494837659483504443": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6621483425195088869": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "5458310740719324710": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "12840204133991239572": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "4282198629458668761": ["convolution_gpu_bfyx_gemm_like",2],
+        "15247381586316467097": ["convolution_gpu_bfyx_gemm_like",2],
+        "15715775011639091549": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "5065071428884648135": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "7457899998356343871": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "11992625045241269569": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "4296524295134959042": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "18384657372655350144": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "2912984501615111849": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2103507679502667581": ["convolution_gpu_bfyx_os_iyx_osv16",752],
+        "1532263118203058517": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "4597954342704466825": ["convolution_gpu_bfyx_gemm_like",1],
+        "5567670507334783760": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3561366509539440079": ["convolution_gpu_bfyx_gemm_like",1],
+        "1364905900191854779": ["convolution_gpu_bfyx_gemm_like",0],
+        "1339402691552717009": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "15602863681196390535": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "15678329601718218341": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "9005351264094503686": ["convolution_gpu_bfyx_gemm_like",2],
+        "3518605747492037670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8295126647635181949": ["convolution_gpu_bfyx_gemm_like",2],
+        "16359282790151128772": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "360764089318153518": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "15834666915651997510": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "11851526665791263153": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "13007534905441600782": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16323870023648254366": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "17190698921280188790": ["convolution_gpu_bfyx_gemm_like",2],
+        "9753702905908744910": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "56327004269432885": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15936869458531244961": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "13702914647519703599": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11459784003592366395": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2572395498687401679": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "296142385116663420": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7546586420552408243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14089893422771228191": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "9004823715680825977": ["convolution_gpu_bfyx_gemm_like",2],
+        "17001502418583498926": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "6505706083205285176": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "11528310408333718862": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8971115542951085891": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "14074914477149374595": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "12238674883388043717": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "8131682691875884781": ["convolution_gpu_bfyx_gemm_like",2],
+        "6302958994152837045": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "11070620435959083971": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "3177362994630209421": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9780938731831129283": ["convolution_gpu_bfyx_gemm_like",2],
+        "3159681096461848644": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "17795554443343871443": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "7549378486471456156": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "545425355231744794": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3885931890288969926": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "1054159213127890689": ["convolution_gpu_bfyx_gemm_like",2],
+        "12664952811642406457": ["convolution_gpu_bfyx_os_iyx_osv16",569],
+        "2080397907007737054": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "16494403731659808258": ["convolution_gpu_bfyx_os_iyx_osv16",540],
+        "14716719350966652036": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "14258499419905714808": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "1119928633562250911": ["convolution_gpu_bfyx_os_iyx_osv16",947],
+        "7713736987017889212": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "14939750655636313880": ["convolution_gpu_bfyx_gemm_like",2],
+        "1646362346584649954": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "13192885349640152576": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "16025442470600124062": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "14133958262039763609": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "17142080999569154649": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "13394233139064923018": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9410978119783758141": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "2047041720569246861": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "5454796925594082324": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",0],
+        "17700958439420868719": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "12972634653821069685": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "1682776041247037802": ["convolution_gpu_bfyx_gemm_like",0],
+        "10624567684389583173": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "17959539037614502049": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1954052357826969119": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "16956980254113285457": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "922541506531537121": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6447172410311223671": ["convolution_gpu_bfyx_gemm_like",1],
+        "15052127817178941719": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "11062005455602919062": ["convolution_gpu_bfyx_gemm_like",1],
+        "6351924049625723579": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "12925156865008155065": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "10556089809203693400": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "5291944277945000781": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17123463568694499533": ["convolution_gpu_bfyx_gemm_like",2],
+        "11243840588602365090": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "13990028451169604107": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "423221712829930726": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "311255514995417672": ["convolution_gpu_bfyx_gemm_like",2],
+        "11868419561534906809": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "3664562521273273709": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "11299275869800089824": ["convolution_gpu_bfyx_gemm_like",1],
+        "5221320470007950766": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11107930597263802755": ["convolution_gpu_bfyx_gemm_like",2],
+        "3889519976910355277": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "2438374917504708831": ["convolution_gpu_bfyx_gemm_like",2],
+        "5284132464580556804": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "17309224746854446222": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "8154794217037682993": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18133614045401867449": ["convolution_gpu_bfyx_gemm_like",2],
+        "1572991986657256775": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "5183231560876991543": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "3897967722980386263": ["convolution_gpu_bfyx_gemm_like",2],
+        "15088940149962496972": ["convolution_gpu_bfyx_gemm_like",1],
+        "7083152697366621236": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17877776363798202236": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "9298483238271063853": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "10625675062556386448": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6121673167888047110": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12541764833974378504": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12923653434892323603": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "2567809041240246707": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "2251029128552117936": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4640028527711211109": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17942120824047252501": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "17912189681971987483": ["convolution_gpu_bfyx_gemm_like",2],
+        "7573459699367415551": ["convolution_gpu_bfyx_os_iyx_osv16",515],
+        "4082229510324076196": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5608133987357542077": ["convolution_gpu_bfyx_os_iyx_osv16",539],
+        "8489998884193999354": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10356951625481502476": ["convolution_gpu_bfyx_gemm_like",2],
+        "14044495589185586465": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "3796274347773622633": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "41672385434660942": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "8262441556572334783": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "16748743818537812349": ["convolution_gpu_bfyx_gemm_like",2],
+        "1999979442136861875": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "8234878941966364642": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "7396823789595001064": ["convolution_gpu_bfyx_gemm_like",2],
+        "649203303142950236": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "953306082374100275": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "2850279308978256234": ["convolution_gpu_bfyx_gemm_like",2],
+        "10068502639160680134": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "4766071144928072260": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "4455497237293642238": ["convolution_gpu_bfyx_gemm_like",2],
+        "3621449131285713809": ["convolution_gpu_bfyx_gemm_like",2],
+        "18044455700176500102": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3623866842874047894": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "17332230377845694888": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "3200047546714112402": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "3325727286860556323": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "8682149821028981871": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "5927467766675317093": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "17154337492545826355": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "12011982029561277581": ["convolution_gpu_bfyx_os_iyx_osv16",661],
+        "11147816119060617810": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "6290180140047520382": ["convolution_gpu_bfyx_gemm_like",1],
+        "5135353986081664933": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12138556002719602750": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "15603643151057665338": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "5033665285977853779": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "17433037267999205350": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "7881187047171099732": ["convolution_gpu_bfyx_gemm_like",2],
+        "9069334144391048686": ["convolution_gpu_bfyx_os_iyx_osv16",543],
+        "13199524367893035805": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "17279975778400757791": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "13925839061045347955": ["convolution_gpu_bfyx_gemm_like",1],
+        "12790788016297794214": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "3761770343527826418": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "9092949297095391463": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "10545749454895857995": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4590784654677429162": ["convolution_gpu_bfyx_gemm_like",2],
+        "7981376447277193852": ["convolution_gpu_bfyx_os_iyx_osv16",843],
+        "17829047941256922307": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "10306542963828398049": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "13550435052563656432": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "6964180083696019970": ["convolution_gpu_bfyx_gemm_like",1],
+        "6496839689453807726": ["convolution_gpu_bfyx_gemm_like",2],
+        "203639177311791127": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "1005880016096298476": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "10679760989906275129": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "6719302427415173754": ["convolution_gpu_bfyx_os_iyx_osv16",42],
+        "9750510172185801133": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6905249031401202060": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17575578027095664417": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3036808833459559381": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "3125577147662589592": ["convolution_gpu_bfyx_gemm_like",1],
+        "10708706979952421150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "5756918912614763074": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5973242004448142604": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "9863856393759813897": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "9609257787066002999": ["convolution_gpu_bfyx_gemm_like",2],
+        "8454760437961964894": ["convolution_gpu_bfyx_gemm_like",2],
+        "1117787205894124896": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "14471867575610362464": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "3816774953143987171": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "8576229375621297412": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "9759380701896779097": ["convolution_gpu_bfyx_gemm_like",2],
+        "17774902969414949042": ["convolution_gpu_bfyx_gemm_like",2],
+        "3882955134902442387": ["convolution_gpu_bfyx_os_iyx_osv16",720],
+        "16683485007140805060": ["fully_connected_gpu_fb_io_ref",1],
+        "16767564582561837873": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2104529100867065546": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "10158184435144178161": ["convolution_gpu_bfyx_os_iyx_osv16",337],
+        "11892088065638996743": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "9743806043658380623": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "17228877915053571642": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "15284262113150488297": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "3272776991539782834": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14234117003504517946": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "2968031010495399536": ["convolution_gpu_bfyx_gemm_like",2],
+        "10555597973766215754": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2220961811760955456": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "8850600236849718709": ["convolution_gpu_bfyx_os_iyx_osv16",1024],
+        "12643423612381102003": ["convolution_gpu_bfyx_os_iyx_osv16",831],
+        "8133587696326295326": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "5519835581976587401": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "17040970955448750876": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "4239133538073498792": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "5409924335138540834": ["convolution_gpu_bfyx_os_iyx_osv16",526],
+        "14132290154676895976": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12582624102297726596": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "9454954846682513038": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "4865023158176874622": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4950144098898276785": ["convolution_gpu_bfyx_gemm_like",2],
+        "12427258337646070422": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "16131448347558322280": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7228139313323996640": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "11254635684957519432": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9569446666675696513": ["convolution_gpu_bfyx_gemm_like",1],
+        "7813041847979170166": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17628984504073918701": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "2713481951804190325": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "15489882561480858974": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "3286629188347536485": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "3939805316470672966": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "3839690227347352846": ["convolution_gpu_bfyx_gemm_like",2],
+        "17864395500488861670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5036963191507722541": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "261021128656714770": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "12482312825666761192": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "18219755699990183812": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "9070474871526366492": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "2841943277631596989": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "13268525255152984893": ["convolution_gpu_bfyx_os_iyx_osv16",847],
+        "5570191330195573102": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "12823842409678756966": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10887835418423052188": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15351724241036614758": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "6262190151863459214": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "69884424286147709": ["convolution_gpu_bfyx_gemm_like",2],
+        "2521821959816944292": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "17471843449888763571": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2777614869053822003": ["convolution_gpu_bfyx_os_iyx_osv16",377],
+        "13126786259906598018": ["convolution_gpu_bfyx_os_iyx_osv16",1026],
+        "13948873105076070952": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "4220826666482500445": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "8422808932256100230": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "6621371075123542816": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "7862815466573236157": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "14136097914489095982": ["convolution_gpu_bfyx_os_iyx_osv16",516],
+        "3067806959725855130": ["convolution_gpu_bfyx_os_iyx_osv16",512],
+        "15360511165237335684": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "17399542571019639128": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "10117376369841171716": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9642229389394495047": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "846177346130290194": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",2],
+        "15336590103518398224": ["convolution_gpu_bfyx_gemm_like",2],
+        "17243648226968859637": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2930848604606590505": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "3621070130367713395": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "15411603884973340468": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "15016406041863758148": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "13804435767468730732": ["convolution_gpu_bfyx_gemm_like",2],
+        "2128376438627103433": ["convolution_gpu_bfyx_gemm_like",2],
+        "10463896120685306944": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "13786357802945430475": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "15808629700189777056": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17713034180977313726": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "16894871557229780934": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "9404677451270692749": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "7942294816235384071": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "1865187811299838654": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "17049054004246292085": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2133849627845285277": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "4147006350295905486": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "9655550151067451233": ["convolution_gpu_bfyx_gemm_like",2],
+        "9833242806281729759": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11239754372812258455": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "9421927854269492263": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4013707396889204359": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "9019388470685749691": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12545558125736154584": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "9038991914155436715": ["convolution_gpu_bfyx_gemm_like",1],
+        "10730856574108806045": ["convolution_gpu_bfyx_os_iyx_osv16",854],
+        "5461980510262646821": ["convolution_gpu_bfyx_gemm_like",2],
+        "4679163800360809315": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "12714194906146827658": ["convolution_gpu_bfyx_gemm_like",1],
+        "3859314295530377028": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "7263339400190408379": ["convolution_gpu_bfyx_gemm_like",2],
+        "15532419087060587119": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5536424274663702901": ["convolution_gpu_bfyx_gemm_like",2],
+        "4086556132337751931": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "2746052215199129520": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "17716065235878633691": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "8594644182487917002": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5685381761573686628": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8258382025812748961": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "14707855908416908375": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "14650567822254940018": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "15118142492742177336": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "15294692035670155801": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "498239903908845198": ["convolution_gpu_bfyx_gemm_like",2],
+        "4085450203909854919": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "15479549936562568596": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "7363788553442810299": ["convolution_gpu_bfyx_gemm_like",2],
+        "5353552956675518468": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "15775917744517770768": ["convolution_gpu_bfyx_gemm_like",2],
+        "9899242398980336120": ["convolution_gpu_bfyx_gemm_like",1],
+        "12791525533856308302": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "9256308629247511374": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "11433534680781300610": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "142486914279119363": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "10965563190266380694": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "17252689774572814142": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "6158514925486943212": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "14786904599410885158": ["convolution_gpu_bfyx_os_iyx_osv16",465],
+        "10728212277329722684": ["convolution_gpu_bfyx_gemm_like",2],
+        "11151426820269138585": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "1076005730007872492": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "2052712465925238009": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "1103204698908514224": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5893940382830835820": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "10000618285883395700": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1276881030620698911": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "17523210737277743952": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4883106423598271822": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "8800251965243080024": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "5601435819039968726": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3382494956350224120": ["convolution_gpu_bfyx_gemm_like",1],
+        "6830387121684699972": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "15322989486222859378": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "15881381297320383917": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "801864263975761712": ["convolution_gpu_bfyx_os_iyx_osv16",291],
+        "9457038545823436137": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9798585825695496550": ["convolution_gpu_bfyx_gemm_like",2],
+        "12654574135415748217": ["convolution_gpu_bfyx_os_iyx_osv16",318],
+        "8131617570786904723": ["convolution_gpu_bfyx_gemm_like",2],
+        "1663732107639157701": ["convolution_gpu_bfyx_gemm_like",2],
+        "6695336381467406810": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "11984095218733350838": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14953809073272885651": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "4911398420005278258": ["convolution_gpu_bfyx_gemm_like",1],
+        "4940950742383121943": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "17614929666625976544": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "3737552767159920174": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "10683839359385393536": ["convolution_gpu_bfyx_gemm_like",1],
+        "9207334433308148635": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13954144830230671601": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "18153597620760635012": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "13373912451448693522": ["convolution_gpu_bfyx_gemm_like",1],
+        "7369471926167902143": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "10076578838853982233": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "2935787827649981367": ["convolution_gpu_bfyx_gemm_like",1],
+        "9198752981132674942": ["convolution_gpu_bfyx_gemm_like",1],
+        "17693518538833606792": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "572155668587252712": ["convolution_gpu_bfyx_os_iyx_osv16",1054],
+        "530825424084837479": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "1655427025346068673": ["convolution_gpu_bfyx_gemm_like",1],
+        "10084794570892043447": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "4495774394017823312": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "13359643347682243944": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "11568162864377479487": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "8155752116518841384": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",280],
+        "9173631510896381179": ["convolution_gpu_bfyx_gemm_like",2],
+        "7982784766505903515": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "5141753233513623264": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "5079055505117153635": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "4185477435943946730": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8354812222032899427": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "16131386739027190836": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "6277198010392189880": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "11845013061234102293": ["convolution_gpu_bfyx_gemm_like",2],
+        "11287863182337672053": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "12090536142661253835": ["fully_connected_gpu_bf_io_gemm",1],
+        "13472532612464340803": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "4716188972902735458": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "3704618172730076978": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "7768680313873061531": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12788968383428254917": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13327653786981478088": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15265621959560796543": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "14230385851791760020": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "9130971535185609293": ["convolution_gpu_bfyx_gemm_like",2],
+        "14930745998253392722": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2124033349728954551": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "8963262014498730146": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3304589333915676807": ["convolution_gpu_bfyx_gemm_like",1],
+        "4947961640303581107": ["convolution_gpu_bfyx_gemm_like",2],
+        "1791615587935799399": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16336482874764861478": ["convolution_gpu_bfyx_gemm_like",2],
+        "11088128828863596806": ["convolution_gpu_bfyx_gemm_like",2],
+        "1450888744802985214": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "4983880246908724272": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "11649407835105973949": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "4294879469633231552": ["convolution_gpu_bfyx_gemm_like",2],
+        "9101018613418825655": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11163107409437069532": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "9608917563823863132": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "12889351859522118935": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6777045876155144709": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "1233021176530240722": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4999210721703970274": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14086074948200412805": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "1207026216972160297": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8515479970005301094": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "7071991799972799089": ["convolution_gpu_bfyx_gemm_like",2],
+        "54019631544204590": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12590495767805868405": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "15316782593191029443": ["convolution_gpu_bfyx_gemm_like",2],
+        "17556238490521153146": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "244921290040927639": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "17466025028296506313": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "17259951372033727587": ["convolution_gpu_bfyx_gemm_like",2],
+        "15385506288692289568": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17087143277789116317": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1095495157025479260": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "14811022197918391667": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "1423297940282476513": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "16996022503617157059": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "10509933181132310969": ["convolution_gpu_bfyx_gemm_like",1],
+        "6225447513745282621": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11195875185591819437": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "13404888565084206853": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7863886351122918972": ["convolution_gpu_bfyx_os_iyx_osv16",194],
+        "17006655627343469372": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "8485845304380573432": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "10628725059172743408": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17302671258991071440": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "2479856511929768548": ["convolution_gpu_bfyx_gemm_like",1],
+        "702096475436365058": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4327450388326573746": ["convolution_gpu_bfyx_gemm_like",1],
+        "939718260623752240": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "11806402239500046867": ["convolution_gpu_bfyx_gemm_like",2],
+        "11529876081402974396": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "7848121247546147821": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "1003101267609305257": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "9810904714798127155": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "10462144647439624978": ["convolution_gpu_bfyx_gemm_like",2],
+        "16170708786673864371": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "5229688072405810569": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8269543491844451750": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "11612998433409522582": ["convolution_gpu_bfyx_gemm_like",2],
+        "17303408650780384587": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "11704369548723383645": ["convolution_gpu_bfyx_gemm_like",2],
+        "16122033101591094139": ["fully_connected_gpu_fb_oi_ref",1],
+        "2094213523530180653": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "5011769546010018777": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "18117954008112578376": ["convolution_gpu_bfyx_gemm_like",2],
+        "14554225625951128811": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "1540459344569916165": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "5055133356846736609": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "10608496431404827757": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2986189945936592561": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "8797661560676476245": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "582360460084115077": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "8529170838214082841": ["convolution_gpu_bfyx_gemm_like",2],
+        "8378690770140438511": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "3860603464276263676": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13616241450266119966": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "16802487456370986847": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "14826791706471872785": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15315327794058441258": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "4424217045094988504": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "16063854283763838910": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15924144379094505874": ["fully_connected_gpu_fb_io_ref",1],
+        "868488930567226694": ["convolution_gpu_bfyx_gemm_like",2],
+        "10348660503952680688": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "10208132281050693649": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "14394427817253242611": ["convolution_gpu_bfyx_gemm_like",2],
+        "6343888265369366589": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "17101789600628162503": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "17406383217119217230": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14070988879848388270": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "6495132856471482043": ["convolution_gpu_bfyx_os_iyx_osv16",865],
+        "3106922888635965020": ["convolution_gpu_bfyx_gemm_like",2],
+        "14094981198645015124": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8203550467004532364": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "11782525502250249483": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5230871884758163940": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "6898793319624390153": ["convolution_gpu_bfyx_gemm_like",2],
+        "13600579723542095577": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "9207413252274439059": ["convolution_gpu_bfyx_os_iyx_osv16",687],
+        "8300655194765375060": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15151957983054148973": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "14885109535362957947": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "14366252780310630703": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10428477376571919905": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "18250076003231973692": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6778781361481531516": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "4196367396954155354": ["convolution_gpu_bfyx_gemm_like",2],
+        "4406157095142118884": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "15381551674482810230": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "18308661808437079996": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",280],
+        "5277400567128489977": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "8732106543033226791": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "10568883265991969648": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1795659014508380077": ["convolution_gpu_bfyx_gemm_like",1],
+        "14141983383097250411": ["convolution_gpu_bfyx_gemm_like",1],
+        "6651097363666320726": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "10902108166827340970": ["convolution_gpu_bfyx_gemm_like",2],
+        "17599396373608265826": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "9357359875134299131": ["convolution_gpu_bfyx_gemm_like",2],
+        "14579050468883613611": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "1876286132660871464": ["convolution_gpu_bfyx_gemm_like",2],
+        "2740287492529009109": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "15285236716284874711": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "1062508357634542606": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "18373068999874730591": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "10237524128771958432": ["convolution_gpu_bfyx_gemm_like",2],
+        "9831195630506601660": ["convolution_gpu_bfyx_gemm_like",2],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "7606728651572102823": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "7019316994558628633": ["convolution_gpu_bfyx_gemm_like",2],
+        "13729951531199985382": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "9643671820560131959": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "15841489476316341204": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "15024130918582332928": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14301661367597749567": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "6707221689266688389": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "1303304215797905198": ["convolution_gpu_bfyx_gemm_like",2],
+        "10917498758625273194": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "7658318862249823838": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "4347494599650425733": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "8939520209266902800": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17886436103211436626": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "12757564215386697460": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "14959281374959998609": ["convolution_gpu_bfyx_gemm_like",2],
+        "18204971481718743856": ["convolution_gpu_bfyx_gemm_like",2],
+        "7174804306958128658": ["convolution_gpu_bfyx_gemm_like",2],
+        "4550028191070279999": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "6821855018718422278": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "1605295763358374504": ["convolution_gpu_bfyx_gemm_like",2],
+        "12493863403516600413": ["convolution_gpu_bfyx_gemm_like",1],
+        "8749399240948437294": ["convolution_gpu_bfyx_gemm_like",2],
+        "7937870623766562191": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "472454322186482185": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "16494581774051338901": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "4054010905884346287": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "10967218651864700933": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "17713011656078651": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15683344003370367509": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "17604747523124060652": ["convolution_gpu_bfyx_gemm_like",2],
+        "7688176479120305539": ["convolution_gpu_bfyx_os_iyx_osv16",918],
+        "12319165874575782715": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "3935883681780676157": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "17828453493113919756": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "9639014900668946045": ["convolution_gpu_bfyx_gemm_like",2],
+        "15280273795883244074": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7761195307416102494": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5095827462645341808": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "17496371501557652357": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "12085208566397959149": ["convolution_gpu_bfyx_gemm_like",2],
+        "5996261744926399743": ["convolution_gpu_bfyx_gemm_like",2],
+        "6954257882806659594": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "16937207522545573792": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13708979487306970634": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "10292243973236220688": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "2566302789609970663": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "6324565723045697080": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "17421991623849671076": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10600884986702650404": ["convolution_gpu_bfyx_gemm_like",2],
+        "12228963567837353733": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "4797026040899499511": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "1127598752149871162": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "5939121107940759940": ["convolution_gpu_bfyx_os_iyx_osv16",378],
+        "17123153447808465303": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "585914943085061885": ["convolution_gpu_bfyx_gemm_like",1],
+        "11185156002426041243": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "11579025491409526679": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "14512407261081843554": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "3963106895592011725": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "2346992541638145615": ["convolution_gpu_bfyx_gemm_like",2],
+        "12655099960717366198": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "10682918518101379579": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "17225578855755054959": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "4859271780094116779": ["convolution_gpu_bfyx_gemm_like",2],
+        "13027039165868458729": ["convolution_gpu_bfyx_gemm_like",2],
+        "1643241486250690844": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "5749536453225343663": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "1760830986937165861": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "15551453802011405101": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "2467535554409643460": ["convolution_gpu_bfyx_gemm_like",1],
+        "15124985846197662243": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "10058614204420018541": ["convolution_gpu_bfyx_os_iyx_osv16",4],
+        "3615203440895591147": ["convolution_gpu_bfyx_gemm_like",1],
+        "8230144305844912369": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "10791067159964399241": ["convolution_gpu_bfyx_os_iyx_osv16",310],
+        "7826714904736870517": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17342868362584820356": ["convolution_gpu_bfyx_gemm_like",2],
+        "3830703844770425343": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "13462726136352103466": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10433456687054381828": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "13816104794723484993": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2577413012740709678": ["convolution_gpu_bfyx_gemm_like",2],
+        "17839839336294937155": ["convolution_gpu_bfyx_gemm_like",2],
+        "7351401242363888463": ["convolution_gpu_bfyx_gemm_like",2],
+        "804195263636995800": ["convolution_gpu_bfyx_gemm_like",2],
+        "15434706304418357961": ["convolution_gpu_bfyx_gemm_like",2],
+        "12636120902231094700": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "3792945601873900927": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "6717243674054760598": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "16684378382033936005": ["convolution_gpu_bfyx_gemm_like",2],
+        "16781127329510211966": ["convolution_gpu_bfyx_gemm_like",2],
+        "3480732841490521799": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "8431845338648284548": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "1410630713443793537": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "1760779615705074283": ["convolution_gpu_bfyx_os_iyx_osv16",190],
+        "13020929028222837402": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "16228026045292341333": ["convolution_gpu_bfyx_gemm_like",2],
+        "4660288622381620227": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "18445243511250094011": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "4428101657497677982": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "4860779741225078946": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "12965552570525926289": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16467987800266816984": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "14683086376707577764": ["convolution_gpu_bfyx_gemm_like",1],
+        "9057036344533510776": ["convolution_gpu_bfyx_gemm_like",2],
+        "1146282291269334070": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "2425177545256374371": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16103943009195163681": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "17809920600993699808": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "13002363400738122017": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5093049998173715787": ["convolution_gpu_bfyx_gemm_like",2],
+        "6995472847770703647": ["convolution_gpu_bfyx_gemm_like",2],
+        "9366201112659847392": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "16489624657475712467": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "1819720745131968914": ["convolution_gpu_bfyx_gemm_like",2],
+        "12667014405537239093": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "10270203686708782941": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3150231129728961455": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "11198908896401597838": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "14365699621119565405": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11430797372848621790": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "841243068178925457": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "3855151839445505918": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "1179906398014559042": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "6578239603654034233": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "11322451605795727486": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "7410628771323937530": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7490524380333929773": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "12319073009094248232": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "11936419502418995274": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "5695368162557483073": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "12136803297132972709": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6526586547926160627": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "11910060331768652144": ["convolution_gpu_bfyx_gemm_like",2],
+        "6603489144277795818": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "2095802691829304676": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6553736978928374036": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2235210915304938149": ["convolution_gpu_bfyx_gemm_like",2],
+        "18137106379929135901": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7356440848422235031": ["convolution_gpu_bfyx_gemm_like",1],
+        "17846557385112426504": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "12713087335581316946": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "3831257753143317802": ["convolution_gpu_bfyx_gemm_like",2],
+        "17372520271370779917": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "8860685325047463026": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13731964100893109797": ["convolution_gpu_bfyx_gemm_like",1],
+        "2916077416184925232": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "13926122593957480821": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7157499157310356912": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8509748651922589684": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "10756831914332769026": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "18400379759523099542": ["convolution_gpu_bfyx_gemm_like",1],
+        "5369464352361405510": ["convolution_gpu_bfyx_gemm_like",2],
+        "9522661528867955338": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "1691554843141984381": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "13797057152042581440": ["convolution_gpu_bfyx_gemm_like",1],
+        "14352303529756685990": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "10205696100164492716": ["convolution_gpu_bfyx_gemm_like",2],
+        "12247991248100147706": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "2055914145961691571": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8104331313502492541": ["convolution_gpu_bfyx_gemm_like",1],
+        "12516911293946682547": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "15282806587681892519": ["convolution_gpu_bfyx_gemm_like",1],
+        "18210370419559876426": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "15552287544878243347": ["convolution_gpu_bfyx_gemm_like",1],
+        "14156845527754813253": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "6740385846687754849": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "12823080103951853168": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "17851024468934906318": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5078905972285278557": ["convolution_gpu_bfyx_gemm_like",2],
+        "10190532901392055501": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "4113935675071480884": ["convolution_gpu_bfyx_gemm_like",2],
+        "14757855448502485216": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "15857087373591747006": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5352896995050401444": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8701639906504450534": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "17526891234501366023": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "14269161473352876138": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "989564341557094953": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "9519623751582710696": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "17631458041591681785": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14213516751025324346": ["convolution_gpu_bfyx_gemm_like",2],
+        "2632535010129224704": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "5754844816339228920": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "9324602658580246084": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "10660722770448981436": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9743549865786050651": ["convolution_gpu_bfyx_gemm_like",2],
+        "4356806313729405658": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "5906712613621491207": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7311120574972466702": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "14100870590396726248": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "8071957466247137919": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11622925573287101001": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "9522947878591994913": ["convolution_gpu_bfyx_gemm_like",2],
+        "12949204491386872217": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "13468713306678453952": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "9527075413813342687": ["convolution_gpu_bfyx_gemm_like",2],
+        "11369389082421346630": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "7786866732196451977": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17833517350994024381": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "4479979951990338510": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "2039909180006215069": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "14174805457643822445": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "533820672115442982": ["convolution_gpu_bfyx_gemm_like",2],
+        "459936950868112292": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "6747799061507191246": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "9468542963649996822": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "6108475838757986889": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17769703068450272262": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "17128723415461475388": ["convolution_gpu_bfyx_gemm_like",2],
+        "1713947356482032411": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "5887877259873928726": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4607428643002808173": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "16149924641081427062": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "2388209402010617408": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "18043340998699622388": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "13699740641705514374": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "15511138074959300404": ["convolution_gpu_bfyx_gemm_like",2],
+        "10483664832302187567": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "4747159205186229582": ["convolution_gpu_bfyx_os_iyx_osv16",479],
+        "13348329768178411596": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "9594594523961285945": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "1299452063079314341": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9497934813418221769": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "1395293354112586043": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "11706378390483804857": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7730305811644972643": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "17514082938765137629": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "18259001228411909210": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "6587817876244206939": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7089077910858800239": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "7289940394271052757": ["convolution_gpu_bfyx_gemm_like",1],
+        "13702692566238948173": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "9391425117463100557": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17775705003104146872": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "15695275881213623746": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7843498978148810586": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "4897991181236908768": ["convolution_gpu_bfyx_gemm_like",1],
+        "12582321591799165205": ["convolution_gpu_bfyx_os_iyx_osv16",421],
+        "1629816265162728770": ["convolution_gpu_bfyx_gemm_like",1],
+        "14740550583313186369": ["convolution_gpu_bfyx_gemm_like",1],
+        "17517495652165026573": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "4584970211859494304": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14808831640065476291": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "4369346833875105372": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12836639380579091509": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "1650519167046658780": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "1114661658519542600": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "18132981365225439999": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "13855438905855887272": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2467766894778630615": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "12680339228267704518": ["convolution_gpu_bfyx_os_iyx_osv16",876],
+        "3107611675766875160": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "4202116155711873525": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17370051888730874220": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "8509882139595784161": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "3995098494991567714": ["convolution_gpu_bfyx_gemm_like",2],
+        "3032921857841371728": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "363330365598760149": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "10395191003166536655": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "11696231285411686761": ["convolution_gpu_bfyx_gemm_like",2],
+        "14289048840489035546": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "8655525088525612583": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11640865562390693266": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "5020605371834958647": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "178353385245384751": ["convolution_gpu_bfyx_gemm_like",2],
+        "6296371382672640627": ["convolution_gpu_bfyx_gemm_like",1],
+        "13337315872184544686": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "2376239021851907962": ["convolution_gpu_bfyx_gemm_like",1],
+        "1208534686657112759": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "310584224049735004": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "16327433707667075261": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "3435773540391994106": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "13676670925355487305": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "7545013298074733778": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "17343050785312683560": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "14176233347574275776": ["convolution_gpu_bfyx_gemm_like",1],
+        "14670068483447729857": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "5691889055008878111": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "6306539529168638031": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12253987037990618484": ["convolution_gpu_bfyx_gemm_like",1],
+        "2040762223425679479": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "9165275903833498932": ["convolution_gpu_bfyx_gemm_like",2],
+        "15156836293519486753": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "7974614031099580856": ["convolution_gpu_bfyx_gemm_like",2],
+        "11928926429060828408": ["convolution_gpu_bfyx_os_iyx_osv16",132],
+        "7009873605945341897": ["convolution_gpu_bfyx_gemm_like",2],
+        "8464582977975377118": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14755869345266103764": ["fully_connected_gpu_fb_oi_ref",1],
+        "9557728221162137067": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "14417033368952865805": ["convolution_gpu_bfyx_gemm_like",1],
+        "16026019808764920641": ["convolution_gpu_bfyx_gemm_like",2],
+        "16897485136352617189": ["convolution_gpu_bfyx_gemm_like",2],
+        "2688060699200137048": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4834591210311380436": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "13237050834496100264": ["convolution_gpu_bfyx_os_iyx_osv16",527],
+        "13500369101462555447": ["convolution_gpu_bfyx_gemm_like",2],
+        "13558618754911056302": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "5334190564423375247": ["convolution_gpu_bfyx_os_iyx_osv16",926],
+        "15679696422603106163": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3522455279376021211": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "2246205611561147645": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "5301394322453453489": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "11398019086259011063": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17429692714456679999": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17427036330773218054": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "5040730152867713388": ["convolution_gpu_bfyx_gemm_like",2],
+        "9177395776408296291": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14904665242518014005": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "1565612286723277822": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5718472464360340274": ["convolution_gpu_bfyx_gemm_like",2],
+        "10897008852059401902": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12935563359569230797": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "12676139447729343679": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7142195383189497127": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2789137853864057385": ["convolution_gpu_bfyx_gemm_like",2],
+        "17900257435531434807": ["convolution_gpu_bfyx_gemm_like",2],
+        "1375156980278317418": ["convolution_gpu_bfyx_gemm_like",2],
+        "5797243082477551421": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "12988961529988078346": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3574679673239756551": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "11726298758004767743": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "14006248791647711759": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "8025053805734757314": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "17991368786018745231": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "9513032457323269513": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "6203602270552179462": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "16559140502701231107": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "16172528828198474326": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "13675314612031135613": ["convolution_gpu_bfyx_gemm_like",1],
+        "8962502004422485576": ["convolution_gpu_bfyx_gemm_like",2],
+        "16955653765071712611": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "3217555855036660482": ["fully_connected_gpu_fb_io_ref",2],
+        "8775336277634573074": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "6876300000441081789": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2070429718533716882": ["convolution_gpu_bfyx_gemm_like",2],
+        "13941251104772804303": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "5415319660821122528": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "14083279273292567319": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "8336494030011542852": ["convolution_gpu_bfyx_gemm_like",1],
+        "6204183474669103812": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "14010642743400284761": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9270950131920019932": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "878892264408839067": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "498420237272375425": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13765632280570725774": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "9475130054420979752": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "14046217730873620907": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17086887873464601732": ["convolution_gpu_bfyx_gemm_like",1],
+        "8734483136584351066": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "3018306533413795559": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3355259926747524578": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "755414184406250882": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "17818587793483875865": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13064477237937322246": ["convolution_gpu_bfyx_gemm_like",1],
+        "18193831330827252971": ["convolution_gpu_bfyx_gemm_like",2],
+        "12044635257539223503": ["convolution_gpu_bfyx_gemm_like",2],
+        "4725009116734166168": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4958222070605478947": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "18232459663207612727": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "11327867170377736609": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "9840495023131952174": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "1197184887743937394": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9833540739021310892": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16304963156448605623": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16491532291908469567": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "11213667690594303395": ["fully_connected_gpu_fb_io_ref",1],
+        "9368244029111057323": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "1168589063110524328": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "6026065914078520895": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "12083217714727863832": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10264913782610095832": ["convolution_gpu_bfyx_os_iyx_osv16",888],
+        "5246955189449281709": ["convolution_gpu_bfyx_gemm_like",2],
+        "1724222702460860833": ["convolution_gpu_bfyx_gemm_like",2],
+        "6973621625148257910": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "18010600104565458874": ["convolution_gpu_bfyx_gemm_like",2],
+        "11981887712163064333": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "7152107839144357830": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9404953235624894187": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "5955810688179557560": ["convolution_gpu_bfyx_gemm_like",2],
+        "15720507574336564201": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "14038261392627717712": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "710166379854475667": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "13898821685774165645": ["convolution_gpu_bfyx_os_iyx_osv16",847],
+        "1579733029852052699": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16833026567865627676": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5582896843095691256": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "7460672405409009037": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "8680545947510235993": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "7380979920013545867": ["convolution_gpu_bfyx_gemm_like",2],
+        "13890118723041457532": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "6351347283201596793": ["convolution_gpu_bfyx_os_iyx_osv16",57],
+        "5592556538784745960": ["convolution_gpu_bfyx_gemm_like",2],
+        "5488296540132936296": ["convolution_gpu_bfyx_gemm_like",1],
+        "304721598975479337": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4299492266819967844": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "6133592828563353516": ["convolution_gpu_bfyx_gemm_like",1],
+        "8158983334404475382": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "7353255713834431471": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "11280403113463077620": ["convolution_gpu_bfyx_gemm_like",2],
+        "12794030011655906930": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "17361319565503258506": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "3856394004079548211": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "12163456975896925619": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "5592428580503282095": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "12311901617815857033": ["convolution_gpu_bfyx_gemm_like",1],
+        "10527256963399838405": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "8334832698020211623": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17965825642065048619": ["fully_connected_gpu_fb_oi_ref",2],
+        "8235002440285527553": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "14251848023416168295": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "4846216894450341698": ["convolution_gpu_bfyx_gemm_like",2],
+        "7878217536124016199": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "13283842370311517843": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "6537576410448334203": ["convolution_gpu_bfyx_os_iyx_osv16",277],
+        "7289633911925073088": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13946367911927964830": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12175297963550750804": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "3432296808755992670": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4085907608404305515": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "13836867092941506302": ["convolution_gpu_bfyx_os_iyx_osv16",315],
+        "9758759365463492505": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "12305383126483033452": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "9497269191159495932": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "8329846097322076175": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "16011429608661242565": ["convolution_gpu_bfyx_gemm_like",2],
+        "11979910991788695837": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "8701248964531180496": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "12792454713887439830": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "15241191584896579183": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "6942049339361951275": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "15534517308430424624": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11878217002671373638": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "8045393243176844621": ["convolution_gpu_bfyx_gemm_like",2],
+        "4245229655273611845": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "12315068368597230211": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "12169896916690963726": ["convolution_gpu_bfyx_gemm_like",2],
+        "6674643031068271417": ["convolution_gpu_bfyx_gemm_like",2],
+        "10838721873837128971": ["convolution_gpu_bfyx_os_iyx_osv16",676],
+        "5172712078329324967": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6796758191974756201": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2215194389847256545": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "496948821475405395": ["convolution_gpu_bfyx_gemm_like",2],
+        "18286006396667126860": ["convolution_gpu_bfyx_gemm_like",1],
+        "10713207196920878995": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1890739204389692970": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "17446388159565719362": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "6493920223660825755": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "10011668671963948912": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5172823024549700279": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5635449856699664273": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "12451592945087000191": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "3363675939515208883": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "2257384183256237750": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14463173937397982331": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18423051691107460439": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "8402396502992483524": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17888721282811720634": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "2406816735581074778": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2410828969408182980": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16928564394848059094": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14742909697076926475": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "10650698451740924172": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "14807299286266923693": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4408600136502382976": ["convolution_gpu_bfyx_os_iyx_osv16",417],
+        "7316825051569394089": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "16935619230235600309": ["convolution_gpu_bfyx_gemm_like",2],
+        "4885944395876887711": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "7439340221097179208": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10614918790075146626": ["convolution_gpu_bfyx_os_iyx_osv16",1071],
+        "1908809004094565452": ["convolution_gpu_bfyx_os_iyx_osv16",918],
+        "2527189070714658176": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "3069396488274616770": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2322559721899919275": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "15929361440504489924": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "10968768803038046390": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "10591159235183381823": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "7558864177789582540": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "4232250144427804891": ["fully_connected_gpu_bf_io_gemm",1],
+        "16236397968499692493": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "6895664772793074050": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "14206328165498357760": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13766538247146238357": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "4945845875046545967": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14309292105974991733": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "15214779483545052950": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "792684262493086891": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "5120274680151325194": ["convolution_gpu_bfyx_gemm_like",2],
+        "14848732804958314374": ["fully_connected_gpu_yxfb_ref",0],
+        "1034911525083515252": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "13941188114382863776": ["fully_connected_gpu_fb_oi_ref",2],
+        "18271689282126907793": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "1373904073013943690": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "3746573775462003750": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "13282612510005390816": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "10073779356457603252": ["convolution_gpu_bfyx_gemm_like",2],
+        "7404732699742965436": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "10306169610486701545": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "11007100272494557520": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "3752278444736105763": ["convolution_gpu_bfyx_gemm_like",1],
+        "11404331488962230130": ["convolution_gpu_bfyx_gemm_like",1],
+        "4991419288164762786": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "15394217414267195999": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13721983823460534294": ["convolution_gpu_bfyx_gemm_like",2],
+        "937200116534179904": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "5341876404211768451": ["convolution_gpu_bfyx_gemm_like",1],
+        "9953329530402569669": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5872553335123308034": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3434842614653335826": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6232596685071671579": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7173828525834910425": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9275303306340702111": ["convolution_gpu_bfyx_gemm_like",2],
+        "3409255127071376537": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "1149548328523286475": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "5912303851874077576": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "15019050434475217267": ["convolution_gpu_bfyx_gemm_like",2],
+        "11093147488085506266": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "3604379857905625467": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "3447774474841314860": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "16705941191876956548": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "6491772898618671653": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "9421643783312790618": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3318430113631867573": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "3416636940668221406": ["convolution_gpu_bfyx_os_iyx_osv16",378],
+        "6753857156025715321": ["convolution_gpu_bfyx_os_iyx_osv16",223],
+        "755157892988514864": ["convolution_gpu_bfyx_os_iyx_osv16",136],
+        "16159852373972174245": ["convolution_gpu_bfyx_gemm_like",2],
+        "10168317560306247723": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "4370027682980493159": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "13694766887442024878": ["fully_connected_gpu_fb_io_ref",1],
+        "6556795059657533200": ["convolution_gpu_bfyx_gemm_like",2],
+        "15387047026300787039": ["convolution_gpu_bfyx_gemm_like",2],
+        "875552069535001284": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "364197229238830807": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "6293500642319778096": ["convolution_gpu_bfyx_gemm_like",1],
+        "10784073615329190425": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "2477866283402053371": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "5448665190811365701": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "16689318540732157754": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "598745924736700294": ["convolution_gpu_bfyx_gemm_like",2],
+        "11814740669468421049": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8054599744123820194": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "1663285216972929652": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "10159790066948852390": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "265124365266629363": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "2805931700404492624": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "3109104171383198425": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4718705504966715203": ["convolution_gpu_bfyx_gemm_like",2],
+        "9444953530704856016": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "6656593119788274992": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1677118421195120152": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "13253775441326432265": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "14462438074931673266": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "713121569924250372": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "16150934538381572916": ["convolution_gpu_bfyx_gemm_like",2],
+        "11004350075893421731": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6849874726361751307": ["convolution_gpu_bfyx_gemm_like",2],
+        "16312223896859176991": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "433161293684647032": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "8788703258318141635": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "6639715607290389968": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "12962552332511702682": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "10892706534058849825": ["convolution_gpu_bfyx_os_iyx_osv16",284],
+        "18174857480705846286": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "15737508945513376813": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5743482411668939203": ["convolution_gpu_bfyx_gemm_like",2],
+        "7148542290597073512": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "7281661441196896385": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2542984219353153495": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "6322831233548420761": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "15733883474006568340": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15918017311798856029": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",1],
+        "11522488904021243956": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "11834683513280095384": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3420064118559852968": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3797957937905580811": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "2431923918345445420": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "11717348577195224554": ["convolution_gpu_bfyx_gemm_like",2],
+        "7860086755625626604": ["convolution_gpu_bfyx_gemm_like",2],
+        "10982693252072682414": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "973402921452083017": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "7218689869635572700": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9116206094279111365": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "12329909110827539139": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16385712633367611786": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "4063525218682664832": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "12987636957813312667": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10881884300766361791": ["convolution_gpu_bfyx_gemm_like",2],
+        "3704271978133986620": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8275277322582733101": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "7717602860943327535": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "16766706479910720794": ["convolution_gpu_bfyx_gemm_like",2],
+        "10629681722649771498": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "1659851931406041285": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17902799955139047426": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15737542477498282367": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "8550133332738529361": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11528417522960871233": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "6626716013917662606": ["convolution_gpu_bfyx_gemm_like",2],
+        "5920614348521143999": ["convolution_gpu_bfyx_os_iyx_osv16",129],
+        "3617433210865054182": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "2772704069752888874": ["convolution_gpu_bfyx_gemm_like",2],
+        "9968686603153440164": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14151249542292579535": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "17947613081555491099": ["fully_connected_gpu_fb_oi_ref",2],
+        "4244790495090049295": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "3285520504090196295": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "4554343896877444783": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13599438824699346708": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "937050062571228573": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10250778203413648582": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "10153070641942936648": ["convolution_gpu_bfyx_gemm_like",1],
+        "16463823433924519300": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2838789360952219092": ["convolution_gpu_bfyx_gemm_like",2],
+        "8272823732258536202": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "9884646296875511696": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "4445912157712391517": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "4674416595144505741": ["convolution_gpu_bfyx_gemm_like",2],
+        "8153567933591966877": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "7315740838189400004": ["convolution_gpu_bfyx_gemm_like",2],
+        "5060817429317741254": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14724862072414829490": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "981276017776678882": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "10643373404881648498": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "3355824730785179775": ["convolution_gpu_bfyx_os_iyx_osv16",899],
+        "1018319414633271980": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "2764034841399585177": ["fully_connected_gpu_fb_oi_ref",2],
+        "14947798627499698329": ["convolution_gpu_bfyx_gemm_like",2],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like",2],
+        "8474585711383508493": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16687701987371294908": ["convolution_gpu_bfyx_gemm_like",2],
+        "15594091060902767607": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10880081193716628051": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "17309326904418811234": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "1787598049938821496": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "2072252610120557179": ["convolution_gpu_bfyx_gemm_like",2],
+        "6053594232298534345": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "10995424394152951534": ["convolution_gpu_bfyx_gemm_like",2],
+        "17947818179123182001": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "15741360654354155504": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "11709992724966310174": ["convolution_gpu_bfyx_os_iyx_osv16",124],
+        "12878631058803628679": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9531730330306606343": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "1640358227345963848": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "8737417433314100353": ["convolution_gpu_bfyx_gemm_like",2],
+        "14445520478857662586": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6040623414692799116": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "10381752670329683275": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "14066219153422011272": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "14738573151275130683": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "3255465741612432300": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "12874626654611400042": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "3599823735065658574": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "12365282242489300092": ["convolution_gpu_bfyx_os_iyx_osv16",379],
+        "3113016029551460773": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "1089679781525023551": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "4091785563304559606": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "9945721344229922405": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16865879032845300007": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "12176879951537921518": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "14173867073407110501": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "277151219694781348": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "14629433964319883917": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "14669219788000023965": ["fully_connected_gpu_fb_oi_ref",0],
+        "889943986793446284": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "15325302411038679750": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "10177466042250039828": ["convolution_gpu_bfyx_gemm_like",2],
+        "16140133852987111783": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "15693851280141842140": ["convolution_gpu_bfyx_gemm_like",2],
+        "7562624810837784407": ["convolution_gpu_bfyx_gemm_like",2],
+        "14535007186125575064": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "14864150409380754546": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5831305777612569716": ["convolution_gpu_bfyx_gemm_like",2],
+        "6660221471357497741": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10168217053882274702": ["convolution_gpu_bfyx_gemm_like",2],
+        "13874754478479442212": ["convolution_gpu_bfyx_gemm_like",2],
+        "11951606039079763598": ["convolution_gpu_bfyx_gemm_like",2],
+        "5326891298755303584": ["convolution_gpu_bfyx_gemm_like",2],
+        "5550000568272972532": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2387628682187438903": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "72444706264681262": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "14257548530334193336": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13711710595263882397": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14436334357815544497": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "11231597775940542830": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14746359019867963124": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4536811685836767511": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8161047856682416508": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "15257886319670476581": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1028160614515220430": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "12879205642236526041": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7215460815798365056": ["convolution_gpu_bfyx_gemm_like",2],
+        "3800011935243649447": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "2881769839926594784": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "11529521968552409482": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6641684310751726510": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17122338330334998991": ["convolution_gpu_bfyx_gemm_like",1],
+        "5185895996350118172": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "714397516895317906": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "13146231972557134419": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7005371843527735283": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "490233152678323691": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "4890442595203749341": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "9216695884134021401": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "17382660912493284320": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "15847413004526420496": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14652719560551657529": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "4690935789908896751": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14352796912241296357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "407189201971322683": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "17610648476343170476": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6210074450403696110": ["convolution_gpu_bfyx_gemm_like",2],
+        "5197105253412476591": ["convolution_gpu_bfyx_gemm_like",2],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "12309132521191764927": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2287331417346465035": ["convolution_gpu_bfyx_gemm_like",2],
+        "9235762655002034553": ["convolution_gpu_bfyx_gemm_like",2],
+        "14996839491874598555": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16507285966998102421": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "557778263661655803": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "7344363094493575878": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17947097500350250352": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "8855801044538137828": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "18214405165366931407": ["convolution_gpu_bfyx_gemm_like",2],
+        "11095908837221722097": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14902389080201926109": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "12526627889432649075": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "10340099951904598712": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8107447526839063293": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "6489448536745533209": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "12063854963434677046": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "10931533380146553429": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17021953651379372973": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16907043223873231356": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "4894227264080887361": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "17633445715900116866": ["convolution_gpu_bfyx_gemm_like",2],
+        "13980058444317683376": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "8039045580314824307": ["convolution_gpu_bfyx_gemm_like",1],
+        "13286723666743148654": ["convolution_gpu_bfyx_os_iyx_osv16",880],
+        "3633858263279042265": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "13277308739029064167": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14203217958874365062": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "15278336216464964580": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "14621327324047759584": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "5724069285122500749": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12460004417430913427": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1755021778097194246": ["convolution_gpu_bfyx_gemm_like",1],
+        "1062464852330435815": ["convolution_gpu_bfyx_gemm_like",2],
+        "2267942216745157485": ["convolution_gpu_bfyx_os_iyx_osv16",886],
+        "4766447533088048613": ["convolution_gpu_bfyx_gemm_like",2],
+        "17738299860390552088": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "13597240991532942069": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "2705394837952559308": ["convolution_gpu_bfyx_gemm_like",2],
+        "8501145642605270365": ["convolution_gpu_bfyx_gemm_like",2],
+        "12174571114411168588": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5734909305243135224": ["convolution_gpu_bfyx_gemm_like",0],
+        "3134489458855347772": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "10155417869639270818": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "16815373779430857324": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "5439738552514649732": ["convolution_gpu_bfyx_gemm_like",2],
+        "8708643228914766202": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "17928043901784474130": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "18216392915308276053": ["convolution_gpu_bfyx_gemm_like",2],
+        "5835634465164771899": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "8101977280003030465": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12418390364502912036": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "5821887901198535792": ["convolution_gpu_bfyx_gemm_like",2],
+        "7605139219344415117": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "6370629727707634189": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "16294825599850364701": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "10869005786136023160": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "10613156984920928792": ["convolution_gpu_bfyx_gemm_like",1],
+        "15901675909820977223": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "14585144905582599299": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "17108987360340581555": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "7071864660784255328": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "15310138877321331399": ["convolution_gpu_bfyx_gemm_like",2],
+        "8526484907799590618": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "4788094685976850847": ["convolution_gpu_bfyx_gemm_like",1],
+        "5699637716202391188": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "451787079167744428": ["convolution_gpu_bfyx_os_iyx_osv16",41],
+        "5865480930796299143": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "6696330836969622824": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5326247361632903583": ["convolution_gpu_bfyx_gemm_like",2],
+        "1617907811128880383": ["convolution_gpu_bfyx_gemm_like",2],
+        "11173744709088359283": ["fully_connected_gpu_fb_oi_ref",2],
+        "15173187675372221634": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "17868294056467093895": ["convolution_gpu_bfyx_gemm_like",2],
+        "13932662890258900896": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "10050254009828302053": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "17390307025967314108": ["convolution_gpu_bfyx_os_iyx_osv16",718],
+        "7457951266863598199": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "14595102366207856448": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "8906185843274300447": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "13654816209891478730": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "16043683538361975370": ["convolution_gpu_bfyx_gemm_like",2],
+        "17094948685292534952": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "9391102514951576629": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11275109735493317886": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "158222105675022402": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "2155348872565175553": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "6381439938385141423": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2265784112305305260": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "7666505529539001492": ["convolution_gpu_bfyx_gemm_like",2],
+        "17300963371220857043": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9150686862263626364": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "6066347819693426556": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "581553908799266285": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "13105192484434299621": ["convolution_gpu_bfyx_gemm_like",2],
+        "2543041530639980505": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "7084794834886364709": ["convolution_gpu_bfyx_gemm_like",2],
+        "8977099691399563065": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "15747538142554815480": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "14156264942337528284": ["convolution_gpu_bfyx_gemm_like",2],
+        "893885204484374577": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "1436052878894538927": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "11873734271080160669": ["convolution_gpu_bfyx_os_iyx_osv16",92],
+        "7671440804202996063": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "11882388384272635526": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9080269503597463911": ["convolution_gpu_bfyx_gemm_like",2],
+        "11985789598994479652": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "861944552852043171": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "7570346182940928159": ["convolution_gpu_bfyx_gemm_like",2],
+        "1616603916015535857": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "15076307524263378967": ["convolution_gpu_bfyx_gemm_like",2],
+        "8671491767142900139": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "11646035413147246650": ["convolution_gpu_bfyx_gemm_like",1],
+        "8436644625511258721": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "17499047811775012205": ["convolution_gpu_bfyx_gemm_like",1],
+        "15948383678216076358": ["convolution_gpu_bfyx_os_iyx_osv16",617],
+        "40684756725622867": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15404352708246779967": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17703907155485973486": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "18269382610859905921": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6614833247756539341": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "5482851829165191681": ["convolution_gpu_bfyx_os_iyx_osv16",645],
+        "683530182479794259": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "10506079835013332412": ["convolution_gpu_bfyx_gemm_like",2],
+        "10433541468308381909": ["convolution_gpu_bfyx_gemm_like",1],
+        "3652749152621176846": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10747768416582634270": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "14433939319502072879": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12854110364457722483": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "13163146272900339330": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "10002942280571012447": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "14611470203914805229": ["convolution_gpu_bfyx_os_iyx_osv16",888],
+        "3317498303952226642": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7957927312958744432": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "14784115394395151055": ["convolution_gpu_bfyx_gemm_like",2],
+        "7370273921473161914": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "6990161783770805523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12361909180687647792": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "9219978118417391687": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15184480575877095737": ["convolution_gpu_bfyx_gemm_like",1],
+        "18400137500031567479": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7852144838267007144": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "10712251675747436685": ["convolution_gpu_bfyx_os_iyx_osv16",190],
+        "1404523328737649536": ["convolution_gpu_bfyx_gemm_like",1],
+        "10340626080611300806": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "10632020369698615114": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "11913865086932469909": ["convolution_gpu_bfyx_gemm_like",2],
+        "15011504472108164173": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6955820760012983739": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "5901470393936541758": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "11561352430430157770": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12134712464763856064": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3682813162987778705": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "12659539044474018256": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "17479614483340719566": ["convolution_gpu_bfyx_gemm_like",2],
+        "15630712601053635938": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "15314178289202641916": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15385836287435319028": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "13931470674812510958": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "15982499072593548907": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "3805991105758534542": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4810979456269693700": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14387663434151374245": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "8093154215631195896": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "879461985074219072": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16468779692009938330": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "16507216630035678597": ["convolution_gpu_bfyx_gemm_like",1],
+        "8525631489886320841": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "9631545863582097486": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "16495435651959280198": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "9192665896782282996": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "14017106221778585861": ["convolution_gpu_bfyx_os_iyx_osv16",686],
+        "3140230065585683313": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "16620032793356620588": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6087091876057515304": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15668060723417155782": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "15905812449037427213": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15372944709956866587": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "393130776826919699": ["convolution_gpu_bfyx_gemm_like",2],
+        "10710426249911063154": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "6213386558868267629": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "17790622334577372736": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12138341287265949399": ["convolution_gpu_bfyx_gemm_like",1],
+        "9110265526128628472": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "14322754320861242412": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "11388177266504804841": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14243609293683870669": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "5385637020152792781": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10848277915422577656": ["convolution_gpu_bfyx_os_iyx_osv16",421],
+        "17651949893303962955": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "557926911473978758": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "9133224739401155411": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "6946815194102787268": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "3095800485689583188": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "779633618375662086": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15277856047844308598": ["convolution_gpu_bfyx_gemm_like",2],
+        "9373353053843326128": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15619086801947147359": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "8965747921518186477": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "3094541981461578435": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "15444345793124210505": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "2822531372171708171": ["convolution_gpu_bfyx_gemm_like",1],
+        "15984885011101717258": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "15767973630744679517": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3787897045202294227": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "9285566577169147378": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "8954139494467782298": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4184940877670248246": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "16683089431066989909": ["convolution_gpu_bfyx_gemm_like",2],
+        "3013359852055354405": ["convolution_gpu_bfyx_os_iyx_osv16",1049],
+        "15927212142469570269": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "10744779302034526105": ["convolution_gpu_bfyx_gemm_like",1],
+        "10422138282116598013": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6046380638013542109": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18169371857833455144": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15140592697506341614": ["convolution_gpu_bfyx_gemm_like",2],
+        "15033864286535250007": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "6925829066248055368": ["convolution_gpu_bfyx_gemm_like",2],
+        "9849272539053219052": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "3892679716763161057": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3167115892101501516": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "11379252854859166206": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17829983167337875463": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "10409424254454997557": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8435953773852854494": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "10772763339005937717": ["convolution_gpu_bfyx_gemm_like",2],
+        "3926585856863002495": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "18269685060032395235": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "872401732136570312": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13771196685227797262": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "7431849514656037251": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "6754359635395225555": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "10774528268153772208": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "890679620691833367": ["convolution_gpu_bfyx_gemm_like",2],
+        "871656942964602772": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12976499206227689731": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "7458923250983373160": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "18305785425659656349": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "10869059995205753062": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7962991673727743706": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "9626028243479089234": ["convolution_gpu_bfyx_gemm_like",2],
+        "16947969669087411530": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "6391847213494189692": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13816380312874384117": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7963120178142346699": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "5061053593616346116": ["convolution_gpu_bfyx_gemm_like",2],
+        "801943727169437597": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "654122557966242717": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "14503814672536990561": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "12693511427898130707": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "1891216794223363114": ["convolution_gpu_bfyx_gemm_like",1],
+        "2857337999074313592": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "3201851883430682391": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "5876880412336151866": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "914589847837601900": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "1305434952341925041": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "11213283109763090897": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "3290503865540626256": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "12293786134765875615": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "6375149408738336520": ["convolution_gpu_bfyx_gemm_like",2],
+        "8094836777153039013": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "774981050284188673": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "15529767675448574617": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15464327246951632247": ["convolution_gpu_bfyx_gemm_like",1],
+        "3179874645565098825": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "5776920093461427179": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "8790992468693685188": ["fully_connected_gpu_fb_io_ref",2],
+        "17608082492919905570": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "5150467145740542480": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "10252930102508743294": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "9660587580162063066": ["convolution_gpu_bfyx_gemm_like",2],
+        "11850332373794932468": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "10133406610245448421": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "17195491464960153261": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "1557549837620967530": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15197400201857680173": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "11703557271443535142": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17376180096577763039": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11353671464383068485": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "4197617702037834389": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "14322392426975869640": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "6227066883925046010": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "17423645390621980919": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4957638663977636791": ["convolution_gpu_bfyx_gemm_like",2],
+        "318377908569897093": ["convolution_gpu_bfyx_gemm_like",2],
+        "10578656188786691161": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "11800958516083095340": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "8990561333549136048": ["convolution_gpu_bfyx_os_iyx_osv16",1112],
+        "4072967257556128157": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "4292467512797995948": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7287802938269404923": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "9180575279116075400": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6404731509766519779": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8195881973746570408": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "11582534256623549131": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10792503079194374004": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "8021962180961047152": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1316444335300814745": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "18136765667969393174": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "9895036366054127607": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "11002656253983635383": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "8481272193490654884": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3033264172690274208": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "4016652650196255483": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "16159055229009077435": ["convolution_gpu_bfyx_gemm_like",2],
+        "4573547058027867538": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "9763310312421884308": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "16165264024659208580": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "3539764293444807886": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10849780273184392468": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",2],
+        "3511588484597779204": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12558716383635737426": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "7023033151960653752": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "7636001038842031672": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "13093429681061786539": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "2858694223939965231": ["convolution_gpu_bfyx_os_iyx_osv16",694],
+        "4680261350523889008": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14951164724050668856": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "15594387862678649962": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "2912858944747613525": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "11273554217552152172": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "7662200927459001757": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6438522646185979880": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "80038800201815976": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1917986916390093536": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2054895351334936744": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "4151997155802743451": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "18213389163198755626": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "12363462562375148101": ["convolution_gpu_bfyx_gemm_like",1],
+        "11312797737791604596": ["convolution_gpu_bfyx_gemm_like",2],
+        "15392592805235453180": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "5424159498790442193": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16601702334097258697": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "7390751298966198773": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "6695224851008237679": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3865480446980740412": ["convolution_gpu_bfyx_gemm_like",2],
+        "16710010075465723498": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8141428150264829362": ["convolution_gpu_bfyx_os_iyx_osv16",1033],
+        "1615155632991337496": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "14326748416648598247": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "2518919454830671073": ["convolution_gpu_bfyx_gemm_like",2],
+        "17750329428766282997": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "414342067295883061": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "9358320688298379206": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "18139055731468596187": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6129602738379919488": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "5922243230245842969": ["convolution_gpu_bfyx_gemm_like",2],
+        "11428599290755097395": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "13387804712929042302": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11927673108508931485": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "13429534778879474114": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "11066538564303243604": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "4440261013093281358": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "8881150100883636392": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "17325129240374428839": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18074320074700491416": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "12352083215873760290": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12388375914105990324": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "5601320732740276692": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "10462203417605590793": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "10573920781439771673": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "15451919862187018297": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12561852932488001568": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "17337689605705740533": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "684240994243755872": ["convolution_gpu_bfyx_gemm_like",2],
+        "10973267399508186283": ["convolution_gpu_bfyx_os_iyx_osv16",191],
+        "8703051983346886620": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "3807725810350819929": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6303682540621797774": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "14998412675237613013": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17800494747865760215": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "1241188741090538769": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3689722043202617487": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "2605525859754242318": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15743075522781198932": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "7903891232234389925": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "1818234431954731769": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "1555841293175143289": ["convolution_gpu_bfyx_gemm_like",2],
+        "2140514316203117958": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "1691020960118022320": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15260010680436431377": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11066913713501760080": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6879801583428507100": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "7945923871349397386": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "737706555781027628": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "3826083535442459719": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4460838234035901102": ["convolution_gpu_bfyx_gemm_like",2],
+        "17393241435373906917": ["convolution_gpu_bfyx_os_iyx_osv16",319],
+        "791937929163665770": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13855910108498240870": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "10591379189397010097": ["convolution_gpu_bfyx_os_iyx_osv16",989],
+        "13540002981450186147": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "4987922194420804256": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "10665697051755790682": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "18150429561058646714": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",2],
+        "4004333174619528327": ["convolution_gpu_bfyx_gemm_like",1],
+        "11215297942420903101": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "12260041857695743504": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "15220874718853723626": ["convolution_gpu_bfyx_gemm_like",2],
+        "17993337310288098038": ["convolution_gpu_bfyx_gemm_like",2],
+        "12971822824884826169": ["convolution_gpu_bfyx_gemm_like",2],
+        "6683090495189325653": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "8065408380801722040": ["convolution_gpu_bfyx_os_iyx_osv16",858],
+        "17370560568464798319": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "1541754036637209097": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "13381441263790184121": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "1742897526168249500": ["convolution_gpu_bfyx_gemm_like",1],
+        "17508515605648584094": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3142706898070129318": ["convolution_gpu_bfyx_gemm_like",2],
+        "7833495651619250213": ["convolution_gpu_bfyx_gemm_like",2],
+        "13972357557211413688": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "9549667332801021099": ["convolution_gpu_bfyx_gemm_like",2],
+        "11878734040194151073": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11740474593275702888": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7143510787416483146": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "5648099611567577611": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "8162762980597497749": ["convolution_gpu_bfyx_gemm_like",2],
+        "8323445733669842657": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2727219457659794468": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "15825993019555657125": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "13186342942242476803": ["convolution_gpu_bfyx_os_iyx_osv16",1067],
+        "13267438341255312172": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16566714514564722975": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14841539539334726292": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "12278364834477923930": ["convolution_gpu_bfyx_gemm_like",2],
+        "16348402367953880206": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "16857192626139882429": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "5132761922124425835": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "13353123037511986804": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "265378250397648692": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "18260147016899103633": ["convolution_gpu_bfyx_gemm_like",1],
+        "8374232727884943288": ["convolution_gpu_bfyx_gemm_like",1],
+        "2253443114793765536": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16132186023443894579": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "16461300997058854554": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "14122647818827599984": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "16091195788712971747": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "14869125900405603130": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "14532844474906286088": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "4152919461079296700": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "14353390922580547467": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "16062811901668074268": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "17761681290527373180": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "12266072789949082198": ["convolution_gpu_bfyx_gemm_like",2],
+        "3349519148124496343": ["fully_connected_gpu_bf_io_gemm",2],
+        "13410178186827874638": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "9226912483632588371": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "13426413463253581310": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "10010921697596131761": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "16042236932298055236": ["convolution_gpu_bfyx_gemm_like",0],
+        "8713639086785023623": ["convolution_gpu_bfyx_os_iyx_osv16",944],
+        "3855859061709004677": ["convolution_gpu_bfyx_os_iyx_osv16",969],
+        "17873182129275583020": ["convolution_gpu_bfyx_gemm_like",2],
+        "5073980187181521102": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "14214141488645257351": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5390559917122707732": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "8700953648388124963": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "263575476655527355": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "3438116423688595487": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "16273414163942580140": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "8260130048649729185": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "17034122796081495259": ["convolution_gpu_bfyx_gemm_like",2],
+        "2307310127637739872": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "10835684445936063871": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "4409539711630405776": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5627351109775149477": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "16692569816843207989": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "14204028212129440429": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "2235888904701517631": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13947140171097868740": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "10412588668458621135": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "7177837234452118325": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "2305345466244887603": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "4693778191222244259": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "16126210124715599267": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5440622601084846974": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "6104567430127604601": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "12576360049619146496": ["convolution_gpu_bfyx_gemm_like",2],
+        "7533669599936874355": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "15217183882858251099": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3062101811226530720": ["convolution_gpu_bfyx_os_iyx_osv16",673],
+        "14408266407898585602": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "26434141991791193": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4186957909762095019": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "17075150439662364176": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "4163359403543480821": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "12015922610963701033": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3122997634505472500": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "6075691042233712335": ["convolution_gpu_bfyx_gemm_like",1],
+        "16763947298003094797": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "7476503420928065329": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "2839767407547705101": ["convolution_gpu_bfyx_gemm_like",2],
+        "15466940145773097237": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "11208787273440167590": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "14944798586094927774": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "10670829898588047148": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "13842309033760176194": ["convolution_gpu_bfyx_gemm_like",2],
+        "2588106330058954614": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4011704860949525864": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "8916983923551808409": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16207793515276299964": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "192209423643075326": ["convolution_gpu_bfyx_gemm_like",1],
+        "3495464175121035222": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "13644681270630373984": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "9762182215179534181": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "13869279315296163696": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "14146157492452859667": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "4428125859693766145": ["convolution_gpu_bfyx_gemm_like",2],
+        "18052322665755789573": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "1383899865465106141": ["convolution_gpu_bfyx_gemm_like",1],
+        "8420176522157084802": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8619380242063264016": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2315979511894958580": ["convolution_gpu_bfyx_gemm_like",2],
+        "8394085742794617896": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3880189981766119529": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "14862938122758223157": ["convolution_gpu_bfyx_os_iyx_osv16",110],
+        "5084402281339667158": ["convolution_gpu_bfyx_gemm_like",1],
+        "3800864312883193560": ["convolution_gpu_bfyx_os_iyx_osv16",318],
+        "3643056883397245235": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "5812274221348979687": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "18109284647478027063": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13178480813522103091": ["fully_connected_gpu_bf_io_gemm",2],
+        "13485431068391184236": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1096929244128185929": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "10545983240319359348": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "555153826947872383": ["convolution_gpu_bfyx_gemm_like",2],
+        "18194662560696168435": ["convolution_gpu_bfyx_gemm_like",1],
+        "4892959859293355837": ["convolution_gpu_bfyx_gemm_like",1],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17699579394941627848": ["convolution_gpu_bfyx_gemm_like",2],
+        "18106333667377667797": ["convolution_gpu_bfyx_gemm_like",2],
+        "2424832456352484524": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14559552090809408184": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "14350963106032411355": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "12348135936862667024": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "17347387929692736001": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "7917673216808705075": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "6329618009202266591": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "9381304526221508530": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "7111620180131341264": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "12711366212612147422": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14605107834931199380": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17381682740282686038": ["convolution_gpu_bfyx_gemm_like",1],
+        "553884705007944190": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "4084106758501882407": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "13748207123919546925": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7822148442995976259": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7379959915507694400": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2615550169523847175": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "13400559817638330692": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "17061233750738578337": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "4238163995861108694": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "1961296939362567851": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "11431776034512615562": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11490143853656040028": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "11080118408282076423": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2038505773698938555": ["fully_connected_gpu_bf_io_gemm",1],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "5514520264534847093": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "6478247863479663432": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "3621930417735246405": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14361697687217060995": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6857064389795419021": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "8332688858465419317": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13094313253457422444": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "13723543003759101485": ["convolution_gpu_bfyx_gemm_like",2],
+        "2053428297205345660": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "16674897846232931666": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "13646026173083209094": ["convolution_gpu_bfyx_gemm_like",1],
+        "10253092389452603623": ["convolution_gpu_bfyx_gemm_like",2],
+        "8012414839721814470": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "11102920976866402928": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16117738994809548007": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14609655423082082099": ["convolution_gpu_bfyx_gemm_like",2],
+        "5519535335798045279": ["convolution_gpu_bfyx_gemm_like",1],
+        "3927333491885837374": ["fully_connected_gpu_fb_oi_ref",2],
+        "18136968124686255108": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "656536921219262336": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "17140704838989242732": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13891498649894490342": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "6625355663340809894": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8382355932367801226": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "7486133596762640215": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15790005937034794347": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "5159470523468873105": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "3457676694935264283": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4242438539626727158": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "1188428190761098784": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9996590003462421281": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "16614092873294424156": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8964252048679144533": ["convolution_gpu_bfyx_gemm_like",2],
+        "17821196374523699955": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14788817017267716113": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "7966454753124154534": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17377293745073971167": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9824678205469832038": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5864250949922222051": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "7578465277886568471": ["convolution_gpu_bfyx_gemm_like",2],
+        "7877872008801536537": ["convolution_gpu_bfyx_gemm_like",2],
+        "12174729877807876787": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12651215303242591871": ["convolution_gpu_bfyx_gemm_like",2],
+        "13499476832444042458": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "7596423139159263456": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "10462797712860969072": ["convolution_gpu_bfyx_gemm_like",2],
+        "12526417587678222534": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6223991300587768990": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "709835724029986012": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5287076386757143976": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "15199659885055090985": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "13510598063226540077": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "11232261979256657934": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "3491333679577961640": ["convolution_gpu_bfyx_gemm_like",2],
+        "8394337033015371278": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11864780937861562358": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "11883485911218628865": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "11583017348580874022": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "2602209853120236226": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12136029303893296753": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "8104509697376352086": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "11863623794400366834": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "16071723603031305677": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "5490683510357615963": ["convolution_gpu_bfyx_os_iyx_osv16",346],
+        "2349007644347065353": ["convolution_gpu_bfyx_gemm_like",2],
+        "11769511287553067221": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "16286085532892593349": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "7853648744637103420": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "12882754981683858333": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17387764798693150143": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "7474639594232203854": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "18026754720065676632": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "10942743767167283370": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4161001033681779582": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "708347829794105085": ["convolution_gpu_bfyx_gemm_like",1],
+        "18372277746801271292": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3046878786712386934": ["convolution_gpu_bfyx_gemm_like",2],
+        "15450609897480659306": ["convolution_gpu_bfyx_os_iyx_osv16",929],
+        "18012549942299450620": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "17119834538806653818": ["convolution_gpu_bfyx_gemm_like",2],
+        "12635265188475834607": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "15989164585998175871": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "17274625805315816028": ["convolution_gpu_bfyx_gemm_like",1],
+        "4764776977138392550": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "6366477005383470532": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "4678945085654662665": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "3266638956600784732": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15962137123591591534": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "1708527842474979709": ["convolution_gpu_bfyx_gemm_like",2],
+        "15038779174806415801": ["convolution_gpu_bfyx_gemm_like",2],
+        "11901740241052104941": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "18034648276860485300": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12141300895511301068": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "8415763978601237333": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "9321208819255762521": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "12944449254981328284": ["convolution_gpu_bfyx_os_iyx_osv16",510],
+        "17422822627612865758": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16351593165006175213": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "16496066467505445971": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "17480519865636248903": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "2830019939638455400": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3547275591884493445": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "18128162750557822655": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "11439519952236570490": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "15334195300678132907": ["fully_connected_gpu_bf_io_gemm",1],
+        "3497946462254198388": ["convolution_gpu_bfyx_os_iyx_osv16",319],
+        "13041981853634484809": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16206791915939407806": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "13343968006718934574": ["convolution_gpu_bfyx_gemm_like",2],
+        "13489318651148001664": ["convolution_gpu_bfyx_gemm_like",1],
+        "17856816245251319111": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "139367204458861048": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "4720851194954041037": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "2174528711050181972": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13546876216568825877": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "5566145479615299930": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "10134863884423338495": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "5873257164958285393": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "797387385159110695": ["convolution_gpu_bfyx_gemm_like",1],
+        "2297846338452062425": ["convolution_gpu_bfyx_gemm_like",2],
+        "14559308665571750465": ["convolution_gpu_bfyx_gemm_like",2],
+        "994489782629179836": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17838473675663772639": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "772794189370544860": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "13800387305792597325": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "204378699575356398": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11275526584835606578": ["convolution_gpu_bfyx_gemm_like",1],
+        "14168946412009689868": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "18259018980049662870": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8403919905230540356": ["fully_connected_gpu_fb_io_ref",2],
+        "17509205154057032109": ["convolution_gpu_bfyx_os_iyx_osv16",471],
+        "9213563311267466388": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7584912988728072414": ["convolution_gpu_bfyx_os_iyx_osv16",336],
+        "4560479630843098090": ["convolution_gpu_bfyx_gemm_like",1],
+        "3069726952591207961": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15890492401334524258": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "16916632481840858091": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "879939701282942121": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "9031338938030715616": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "2684971093531227585": ["convolution_gpu_bfyx_gemm_like",2],
+        "9970142663470031403": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "15689502054035168040": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "12932174902085755507": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10681304359334525584": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "15507430010796753396": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "3723082283919334922": ["convolution_gpu_bfyx_gemm_like",2],
+        "17286180622990393912": ["convolution_gpu_bfyx_gemm_like",2],
+        "16881320590336043120": ["convolution_gpu_bfyx_os_iyx_osv16",199],
+        "11178675492112714513": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "2102507337684140674": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5524215233998361104": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7606277451240586967": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13372079273473545269": ["convolution_gpu_bfyx_gemm_like",2],
+        "12077176094606956613": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16832083703120717402": ["convolution_gpu_bfyx_gemm_like",2],
+        "15856268902838573812": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "5930451476167223501": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "14524011013133838054": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "6324194607665787911": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "18057258413318190788": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "6858245954375015939": ["convolution_gpu_bfyx_gemm_like",2],
+        "2973436171295280783": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "5740738339752793113": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "7092429446071184360": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "14840301687056551916": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "6307840223437204536": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "2758256770667070477": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "17621284804179990612": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15752695063119223631": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "18232278892738147217": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "9322808125154719434": ["convolution_gpu_bfyx_gemm_like",1],
+        "5019077257951332016": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "10534355502345993326": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "786418751322581924": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15078379507314446744": ["convolution_gpu_bfyx_gemm_like",2],
+        "11673506380927771816": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4563407231964979217": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "522181557896569275": ["convolution_gpu_bfyx_gemm_like",0],
+        "8954957191824520301": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "3055842046969432235": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "765085235448596225": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "578703329577922869": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "176148486634277377": ["convolution_gpu_bfyx_gemm_like",2],
+        "1743672154424707483": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3723613341885592267": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "9519113693008246391": ["convolution_gpu_bfyx_os_iyx_osv16",1102],
+        "3892873577927627992": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "10565789595834959047": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "17791773192152464021": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10743628077362128751": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "10031973538398542700": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "9236621881488650027": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9524303276541517389": ["convolution_gpu_bfyx_gemm_like",2],
+        "12850610175882424919": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "16822728519529055454": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "10729288973933590396": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8886676435675463412": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3041752019114501584": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "11726125778063855770": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "7002547494442875680": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "15751445344585167275": ["convolution_gpu_bfyx_os_iyx_osv16",1056],
+        "1187817806204244044": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "17796310681498690253": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14994322266840011040": ["convolution_gpu_bfyx_gemm_like",2],
+        "253337639942573142": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "14487682847898298214": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "12355112948013108181": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "8655883535274781128": ["convolution_gpu_bfyx_gemm_like",1],
+        "10254790628108678637": ["convolution_gpu_bfyx_gemm_like",1],
+        "9513218905938141296": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "170594581804738255": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "18415227597391874233": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "17707294419513060769": ["convolution_gpu_bfyx_gemm_like",2],
+        "15861253904810475842": ["convolution_gpu_bfyx_gemm_like",2],
+        "6638761803107874904": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11033824757086203326": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "16767657090925788431": ["convolution_gpu_bfyx_gemm_like",2],
+        "7174790971918109163": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "18096803908321982720": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "8938942439963723596": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "6447357750120537934": ["convolution_gpu_bfyx_gemm_like",2],
+        "4355933224673863178": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "13476976389397273052": ["convolution_gpu_bfyx_gemm_like",2],
+        "16522364268583242080": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "2969389503332309296": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16703049240941366828": ["convolution_gpu_bfyx_gemm_like",2],
+        "14121939808880396150": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "1832310305089212990": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "11044223289209000460": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13387602037439694372": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9860570706348640782": ["convolution_gpu_bfyx_gemm_like",2],
+        "104165137500939902": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "7552144047474664265": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "15598527290222497283": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13881505737488515065": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "1152693503778768433": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5235375820995365354": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "1014934490175718598": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "15891746043846062984": ["convolution_gpu_bfyx_os_iyx_osv16",1051],
+        "11782514629636023633": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1896394898744191046": ["convolution_gpu_bfyx_gemm_like",1],
+        "9055254157155243850": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "475079717987185580": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "4492673409319122180": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "577182964135927041": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "4344644499804057502": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "15467064540951151390": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16120159001372711511": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "4026686872534942904": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "15643053402284856082": ["convolution_gpu_bfyx_gemm_like",2],
+        "12181607120522804433": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "17517541283617012275": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "1434535531617424039": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "8321148793275220552": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11078289776590382448": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "14578291812739325465": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "9423239651872522813": ["convolution_gpu_bfyx_gemm_like",2],
+        "1957975992563882145": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "530491406341772040": ["convolution_gpu_bfyx_gemm_like",2],
+        "10104091044601583658": ["convolution_gpu_bfyx_gemm_like",2],
+        "2686152083115758704": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "6672808203620992802": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "10302338806536775954": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "17172842643607718498": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "17392594284473856393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6290317420155851465": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15911508155433936727": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "529543453251381109": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "5020788604681810984": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9305758766575321575": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "14555366228958374512": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "7199295899520406795": ["convolution_gpu_bfyx_gemm_like",2],
+        "12796777049340516563": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "7020743056013297476": ["convolution_gpu_bfyx_gemm_like",2],
+        "14071393823183565145": ["convolution_gpu_bfyx_gemm_like",2],
+        "13602299412525111348": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "12394049027081208902": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "2627779045483019709": ["convolution_gpu_bfyx_os_iyx_osv16",812],
+        "1622880009460832832": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "10087048842366891699": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "142345353315012903": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17802514063213000148": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4132087699110753428": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8306337702797456793": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "591445875836641836": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "4960466075321426984": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "8995598177504756805": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "15976399554094563736": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "11386443944172875185": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5485050451156514865": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "2659031931257084418": ["convolution_gpu_bfyx_os_iyx_osv16",540],
+        "16208488491972128275": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "17615365894230830516": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "14118838785256822389": ["convolution_gpu_bfyx_gemm_like",2],
+        "8866164762286856139": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "97332433783610027": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "17080372737840346243": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "16720108310653948550": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12650986929262866534": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "4477135619420651110": ["convolution_gpu_bfyx_gemm_like",2],
+        "9040986180016264906": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "1413598669014941757": ["convolution_gpu_bfyx_gemm_like",2],
+        "7431469348791099474": ["convolution_gpu_bfyx_gemm_like",2],
+        "16383540667048742064": ["convolution_gpu_bfyx_gemm_like",2],
+        "13470016086265528105": ["convolution_gpu_bfyx_gemm_like",1],
+        "5854267518455107328": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15603710070700542017": ["convolution_gpu_bfyx_gemm_like",2],
+        "5219818570070061892": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1601512693620510391": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13297691763391637265": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "13455881643467418059": ["convolution_gpu_bfyx_gemm_like",1],
+        "5706423911886410117": ["convolution_gpu_bfyx_gemm_like",2],
+        "13503608041359512": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "12951069548510783681": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15591167992985613695": ["convolution_gpu_bfyx_os_iyx_osv16",503],
+        "5637480705139132901": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "4107088111454348836": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "6124219814856247918": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "10062957707721107508": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16179959997108523051": ["convolution_gpu_bfyx_gemm_like",2],
+        "9647713236241614167": ["convolution_gpu_bfyx_gemm_like",2],
+        "10884966210360699082": ["convolution_gpu_bfyx_gemm_like",1],
+        "2728956755635458379": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8578774826625315147": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5414285637221358737": ["convolution_gpu_bfyx_gemm_like",1],
+        "14172081523880352608": ["convolution_gpu_bfyx_os_iyx_osv16",572],
+        "15786328370300803713": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "5795073619189010837": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12647099325257717945": ["convolution_gpu_bfyx_gemm_like",2],
+        "13292923826380958700": ["convolution_gpu_bfyx_gemm_like",2],
+        "18439017855540532958": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "15963038745470172423": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "3683201905077543598": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "5179760459095053114": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11855137287698046529": ["convolution_gpu_bfyx_gemm_like",2],
+        "15479071839425218367": ["convolution_gpu_bfyx_gemm_like",2],
+        "4701832665603867798": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "9145357433824567384": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "11544455862638831851": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3296080624478711270": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "15929970324703663357": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "3388752887767453958": ["convolution_gpu_bfyx_gemm_like",2],
+        "16181623411787179429": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6345550009198921347": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "7780366826820540504": ["convolution_gpu_bfyx_gemm_like",2],
+        "4538102435488584866": ["convolution_gpu_bfyx_gemm_like",1],
+        "7129623351507828661": ["convolution_gpu_bfyx_os_iyx_osv16",723],
+        "16629493658542781988": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "14177187878748170225": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4049276089777687996": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "143255828863957128": ["convolution_gpu_bfyx_gemm_like",2],
+        "12843671306854567956": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2231648183489019418": ["convolution_gpu_bfyx_os_iyx_osv16",428],
+        "8882042369902399339": ["convolution_gpu_bfyx_gemm_like",1],
+        "676641023579624117": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "17009318615658405230": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "13194245601015251743": ["fully_connected_gpu_fb_io_ref",1],
+        "1641881628032037384": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "9529614587861271730": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "17116941326889312928": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14336344152455180534": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9559550404190168365": ["convolution_gpu_bfyx_gemm_like",2],
+        "8985531644129639832": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "875296362957469305": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "10071611039987219440": ["convolution_gpu_bfyx_gemm_like",2],
+        "17585210048585855482": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "13558603350852076889": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "13839075443229327158": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "7570078010521452080": ["convolution_gpu_bfyx_gemm_like",1],
+        "7054270030260701612": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "17564338309805484464": ["convolution_gpu_bfyx_os_iyx_osv16",482],
+        "12847879935060092791": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "16483792160297698151": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "5343186686923330871": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "2438221595194783178": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "294103776081392899": ["convolution_gpu_bfyx_gemm_like",2],
+        "689445825453914111": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "4729855738455185191": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "6780215829176686721": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "6355395905401306995": ["convolution_gpu_bfyx_gemm_like",2],
+        "6139574161497189424": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14420809655798184553": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "1630585964216121575": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "7806129039150321333": ["convolution_gpu_bfyx_gemm_like",2],
+        "9058996149754556268": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "7630342538679060038": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "9028970753877215614": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "18383733736250135501": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15995056067568652754": ["convolution_gpu_bfyx_gemm_like",1],
+        "15129201859573664210": ["convolution_gpu_bfyx_gemm_like",2],
+        "14695781272831602408": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12914986936318857086": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "3012268657922581268": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "1160579996766519752": ["convolution_gpu_bfyx_gemm_like",1],
+        "14381420852659789698": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16522546805419218429": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "12992163255353386581": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "13317417676446624018": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "11071972036962275632": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "1269703478898366518": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6767245864232675168": ["convolution_gpu_bfyx_gemm_like",1],
+        "12517838703662330663": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "11530101016435264783": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "16561224775421968533": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "1473214668483422172": ["convolution_gpu_bfyx_gemm_like",1],
+        "9052153145556623933": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "14037325204801680738": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "2482449683288477640": ["convolution_gpu_bfyx_gemm_like",2],
+        "6515141738021465336": ["convolution_gpu_bfyx_gemm_like",2],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "12709406234969954619": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7963529808900784906": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "9890252170749328138": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5053369963163583573": ["convolution_gpu_bfyx_os_iyx_osv16",856],
+        "14247451223653900488": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "12698546873263218041": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "10294185397756053636": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "3408249386342406615": ["convolution_gpu_bfyx_gemm_like",1],
+        "9454028594043242985": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "13401926003864565026": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "8058623285594809047": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13624969243174329965": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3810356382905059819": ["convolution_gpu_bfyx_gemm_like",1],
+        "1836277956961261472": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9164584153555521506": ["convolution_gpu_bfyx_gemm_like",2],
+        "10265955847846166394": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "7291920886894073603": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "3191047205441946466": ["convolution_gpu_bfyx_gemm_like",0],
+        "15862793522143880668": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "11932770338770247767": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "8434794604559592624": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "11595387512434355394": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "17035903590837750750": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "3510837206834640871": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "6729785110495533200": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "14291113322487568376": ["convolution_gpu_bfyx_gemm_like",2],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",2],
+        "11892210755884128272": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "844742962836593299": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "11929531534620071758": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "11191005013126286532": ["convolution_gpu_bfyx_os_iyx_osv16",552],
+        "13727643349589056375": ["convolution_gpu_bfyx_os_iyx_osv16",439],
+        "11273168411455998347": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "3509027370372599394": ["fully_connected_gpu_fb_io_ref",2],
+        "14185215566042478462": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "12927339938362960563": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "4801117903303888658": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "1265277707626014051": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "9428176632140441528": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "4491694127072416122": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "5340016094501559693": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "6150043972317126583": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "1071007164550012186": ["convolution_gpu_bfyx_os_iyx_osv16",21],
+        "281287280558289393": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "4264078972561407296": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "997155336931700015": ["convolution_gpu_bfyx_gemm_like",2],
+        "7552049239568474944": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "3280795516668356985": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "8780604510524622314": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "7187734276051878356": ["convolution_gpu_bfyx_gemm_like",2],
+        "5296506025538423220": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6688522645556262131": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "13987250743654950733": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "15381014522874131924": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "11026432639515866259": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3625906783784771100": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "9339038855869763548": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "14907038741687299621": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4206637285289830669": ["convolution_gpu_bfyx_gemm_like",1],
+        "9266375177690276615": ["convolution_gpu_bfyx_gemm_like",2],
+        "17543625777838573622": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "5515216528474382598": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15641537661939240413": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "18076129452098771655": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17106086048442658788": ["convolution_gpu_bfyx_gemm_like",2],
+        "5750277248295796439": ["convolution_gpu_bfyx_os_iyx_osv16",108],
+        "12815588500303820284": ["convolution_gpu_bfyx_gemm_like",1],
+        "10809330882739297269": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11359020774437470164": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "4476037346005841003": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13198480749588992978": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15452906059667613512": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4178614913813882037": ["convolution_gpu_bfyx_gemm_like",2],
+        "1435153323458789173": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13312401790608349463": ["convolution_gpu_bfyx_gemm_like",1],
+        "11919579121199894437": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "7351443601143314161": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "14418429155823196539": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "17301887391757619741": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "2801141274570069180": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "9883682535839267422": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "1686420552593340731": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "8898449752724034655": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "830147122986411443": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8837079302496539409": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "40704767167309552": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "2995957440356398418": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4316278502963439894": ["convolution_gpu_bfyx_gemm_like",2],
+        "6149673627320838019": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "14645023135017806432": ["convolution_gpu_bfyx_gemm_like",2],
+        "13054706902087663592": ["convolution_gpu_bfyx_gemm_like",2],
+        "17372326727957287976": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "10554266898346470422": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "1779941298820543013": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "10168272404395268951": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "9556219639756304369": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "906587812125311288": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "15406324750533549980": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6410682026872155392": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "6750003965952674453": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "9438739171104456179": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "4949865765880884373": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "11622271315873664622": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "6278892144796112655": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1090447867763814054": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "11845504142528424662": ["convolution_gpu_bfyx_gemm_like",2],
+        "11661208196482963286": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "16995919898822376726": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "11354523117287453982": ["convolution_gpu_bfyx_gemm_like",2],
+        "3239779684432082106": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "15783558375979538895": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13853056718266488510": ["convolution_gpu_bfyx_os_iyx_osv16",883],
+        "16605697831520435304": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "625469553102754234": ["convolution_gpu_bfyx_gemm_like",2],
+        "20037669704517227": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "2328951328483718941": ["convolution_gpu_bfyx_gemm_like",2],
+        "10279778381617181802": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "9513403717116039597": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12685978195521469707": ["convolution_gpu_bfyx_os_iyx_osv16",189],
+        "12752101288912456176": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "1294871956977733262": ["convolution_gpu_bfyx_gemm_like",2],
+        "15692223101958737604": ["convolution_gpu_bfyx_gemm_like",1],
+        "11453044274130869816": ["convolution_gpu_bfyx_gemm_like",2],
+        "12379734005351960619": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5786828339670204894": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "4010650902230520983": ["convolution_gpu_bfyx_gemm_like",0],
+        "13583272198088247606": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15134268179029323647": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "7395593936948809439": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "3349108500387301004": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12407002532205454767": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "13439896617880328331": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "7004953121070642766": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "6644418194983229139": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "1027438463802481676": ["convolution_gpu_bfyx_gemm_like",2],
+        "10642327923162019888": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "2242915551775617989": ["convolution_gpu_bfyx_os_iyx_osv16",291],
+        "5061795324735006354": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "8866736221671835567": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "1421879144542252228": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "16978447917682236120": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6771637612965430926": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "4586246090279043149": ["convolution_gpu_bfyx_gemm_like",2],
+        "17357800564047774826": ["convolution_gpu_bfyx_gemm_like",2],
+        "2008999755215725290": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10916647716124396856": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "981733129438741439": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "7211355951470869591": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "5338109154207406041": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "5031342439443897167": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "16469788155263456039": ["convolution_gpu_bfyx_gemm_like",2],
+        "14249486431781112226": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "5424164608102708333": ["convolution_gpu_bfyx_gemm_like",2],
+        "11802527991096689252": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "981197653890885407": ["convolution_gpu_bfyx_gemm_like",1],
+        "8612114608666892632": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "1019936903773818652": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13077961697656030315": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8317140711232187781": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8169762955969255618": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "123283730755186382": ["convolution_gpu_bfyx_gemm_like",1],
+        "5083776511235413204": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5510336500642744696": ["convolution_gpu_bfyx_gemm_like",2],
+        "9625931001541723278": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "13538051178827008933": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "7744787957569714828": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "3378088934862423864": ["convolution_gpu_bfyx_gemm_like",1],
+        "7978370756654787278": ["convolution_gpu_bfyx_gemm_like",1],
+        "5779388310240896974": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "17340789730321673934": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "7843833033404155302": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "5670530004773188380": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3159147743553063163": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3128856679264648666": ["convolution_gpu_bfyx_gemm_like",1],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",2],
+        "15031089621161080026": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "15156015174611610705": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "8055193939726603877": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "10598995451755327159": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13336847303794450665": ["convolution_gpu_bfyx_gemm_like",2],
+        "4992668316921598993": ["convolution_gpu_bfyx_gemm_like",1],
+        "9220830217525628783": ["convolution_gpu_bfyx_gemm_like",2],
+        "8578747191812631883": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "14915908231779912828": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "4226968857681929488": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "9676055912997166605": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4369680877112803848": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "4745007371868123765": ["convolution_gpu_bfyx_gemm_like",2],
+        "288825580282908143": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "16932172538978111342": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "13850807749756445264": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "778175413671462719": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "9785114056964539323": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "10704037259494193565": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "11734299455885510243": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15395497315929884637": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "17769940507971546305": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "7246177123265734169": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "14848351491062336554": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "14443599718173185176": ["convolution_gpu_bfyx_gemm_like",2],
+        "4217179485243909459": ["convolution_gpu_bfyx_gemm_like",1],
+        "13625877249040282040": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4236174000795439083": ["convolution_gpu_bfyx_gemm_like",2],
+        "282274448389888221": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "3526580286148537369": ["convolution_gpu_bfyx_gemm_like",2],
+        "14257161696605459633": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "12529210672030682764": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "6768451741770053089": ["convolution_gpu_bfyx_gemm_like",2],
+        "15943174060386142134": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "16415344078703911571": ["convolution_gpu_bfyx_gemm_like",2],
+        "15822975685755664152": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "6577240413312348523": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "11668043528929060706": ["convolution_gpu_bfyx_gemm_like",1],
+        "15379595951542162189": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2056766012044921101": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "2384942244346844027": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "6400671582981760192": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9746964858035717775": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "7164580481046523192": ["convolution_gpu_bfyx_os_iyx_osv16",914],
+        "2100891581797371600": ["convolution_gpu_bfyx_os_iyx_osv16",274],
+        "9583760104223104233": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13956744866244022582": ["convolution_gpu_bfyx_gemm_like",2],
+        "14403780921831769097": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12956535344568057480": ["convolution_gpu_bfyx_os_iyx_osv16",84],
+        "1753515740487760297": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "10160082844961863335": ["convolution_gpu_bfyx_os_iyx_osv16",199],
+        "11875516764635427358": ["convolution_gpu_bfyx_os_iyx_osv16",133],
+        "12242618640422208652": ["convolution_gpu_bfyx_gemm_like",0],
+        "12761366575293006784": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "15051114821536746998": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "6706802683366112205": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3661305534604931936": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3598116387801985039": ["convolution_gpu_bfyx_os_iyx_osv16",676],
+        "12478421208861550581": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "4750894407873652809": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13066055561434178894": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "18160969423211875528": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "104321144590863458": ["convolution_gpu_bfyx_gemm_like",2],
+        "9008848676120441863": ["convolution_gpu_bfyx_gemm_like",2],
+        "4695273549696315193": ["convolution_gpu_bfyx_gemm_like",2],
+        "14281201038135286621": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "4563773888811395621": ["convolution_gpu_bfyx_gemm_like",2],
+        "5351705572686943348": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2647922515901529845": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "296202142406900242": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "16094174852600023296": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "13342769641176584743": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "10468562355439385073": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "4503960445974334415": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "3120553928584920777": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "9492331996847106233": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "7107513718824525169": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "11376522803174788945": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1772363899841601255": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "16715151641337602113": ["convolution_gpu_bfyx_gemm_like",1],
+        "7997955859883990923": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "6474882514032493642": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13348855287761849180": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15922076723067110929": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "3980754726678047241": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9794061741834174000": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "7869916853707978306": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "7410220112400588068": ["convolution_gpu_bfyx_gemm_like",2],
+        "12323840136934980793": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "13110173649734084688": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "13411431109933021193": ["convolution_gpu_bfyx_gemm_like",2],
+        "9152451371616153112": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3590316457726550768": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "12942085219027232135": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "4818231379191523896": ["convolution_gpu_bfyx_os_iyx_osv16",290],
+        "8981229334098733320": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "583303098958523195": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "14682537852514419239": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1884327428051733366": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10049571207493913006": ["convolution_gpu_bfyx_os_iyx_osv16",968],
+        "2301409406426420354": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11091004452522208782": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "7353563160591978243": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "12386437738920143482": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "1660279112011537957": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "6483208845600234755": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15239764240622554314": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14729854278671832528": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "16122815225820081176": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "14131851237755716991": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "10599639229366933472": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "11674725184029885494": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "12225119940380026093": ["convolution_gpu_bfyx_os_iyx_osv16",1034],
+        "10908411570889102154": ["convolution_gpu_bfyx_gemm_like",1],
+        "15227034948424983496": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17659601542171299562": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "12895496994338720556": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2506154888542197909": ["convolution_gpu_bfyx_os_iyx_osv16",860],
+        "15824189967727245909": ["convolution_gpu_bfyx_gemm_like",2],
+        "12040626513219974957": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "11534123522633460320": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11922163303962372849": ["convolution_gpu_bfyx_gemm_like",1],
+        "11357813056434049302": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "2950917846016525392": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "15156525717629023944": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "6172851296465788161": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "6432519735121751346": ["convolution_gpu_bfyx_gemm_like",1],
+        "14685573786743639408": ["convolution_gpu_bfyx_gemm_like",1],
+        "3928266232090746643": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "11141999085710526242": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8951503172834790833": ["convolution_gpu_bfyx_gemm_like",2],
+        "13498795599230228492": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "16815680874311765189": ["convolution_gpu_bfyx_gemm_like",2],
+        "13886526360627032217": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "10476627457539425144": ["convolution_gpu_bfyx_gemm_like",2],
+        "10110395703775498948": ["convolution_gpu_bfyx_os_iyx_osv16",376],
+        "15897477855246170861": ["convolution_gpu_bfyx_gemm_like",2],
+        "17065380294456704620": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "13441117085490814804": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",1034],
+        "13698389420396031586": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "14601912265050074833": ["convolution_gpu_bfyx_gemm_like",2],
+        "5816730482014477109": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "14821616804286068969": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "10016243001407196485": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1502236537645808646": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "852092858392507925": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "14682894856346977838": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "15354185859262170540": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "60749853744407778": ["convolution_gpu_bfyx_gemm_like",2],
+        "5032866547826271476": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "12630173933512965589": ["convolution_gpu_bfyx_gemm_like",2],
+        "3297036980627776719": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17160915544701715607": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13285123703712436126": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "682912708716537431": ["convolution_gpu_bfyx_gemm_like",2],
+        "14454927839795553295": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "16039372573821594566": ["convolution_gpu_bfyx_gemm_like",2],
+        "9929060811766882316": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "11455843788148231615": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "7043547563530810431": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "11546295514640813785": ["convolution_gpu_bfyx_gemm_like",2],
+        "7693556065684619275": ["convolution_gpu_bfyx_os_iyx_osv16",568],
+        "16129296588866116913": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8618835732380720921": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11906319144823550582": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "16589607587365212240": ["convolution_gpu_bfyx_gemm_like",2],
+        "815847426244665239": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9148379585489720669": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "4212194737559719449": ["convolution_gpu_bfyx_gemm_like",0],
+        "2352142833866194508": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7924408980408826942": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "568114041320772862": ["convolution_gpu_bfyx_gemm_like",2],
+        "10616832946298118456": ["convolution_gpu_bfyx_gemm_like",2],
+        "14581447673401303181": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "26773921190137993": ["convolution_gpu_bfyx_gemm_like",2],
+        "969746749329671447": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "3762117189312286955": ["convolution_gpu_bfyx_gemm_like",2],
+        "17453621319901961773": ["convolution_gpu_bfyx_os_iyx_osv16",139],
+        "4565037760028957581": ["convolution_gpu_bfyx_os_iyx_osv16",852],
+        "15578217564714846277": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "8697631439739291302": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "7313000297447719088": ["convolution_gpu_bfyx_gemm_like",2],
+        "13993319023992950944": ["convolution_gpu_bfyx_gemm_like",2],
+        "11796671083187280457": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "15637565679147396649": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "4806571630436601566": ["fully_connected_gpu_bf_io_input_spatial",4],
+        "14385995236701277049": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "6031307393395339699": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "7000524935770116969": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "15432337846778101995": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "2722601800398376127": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "15616954046484566002": ["convolution_gpu_bfyx_gemm_like",2],
+        "15830721134654889992": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "7974918595373182037": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8178825467227185946": ["convolution_gpu_bfyx_gemm_like",2],
+        "386749666417295495": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "13102754309439605192": ["convolution_gpu_bfyx_gemm_like",2],
+        "18372284940315010254": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "13691555384698806010": ["convolution_gpu_bfyx_gemm_like",1],
+        "15863633107759120207": ["convolution_gpu_bfyx_gemm_like",1],
+        "12511186263003392018": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "9954050478761346921": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like",2],
+        "15168098632351740923": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "16650590194585316886": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "2743892624333411461": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "17177353407003831190": ["convolution_gpu_bfyx_gemm_like",2],
+        "3292554262586950764": ["convolution_gpu_bfyx_gemm_like",2],
+        "5635504912415420460": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "11716771904412649891": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",151],
+        "10309083227104422150": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "11067412830219638639": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "14865708345458193472": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "15464714725848277081": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10716913534741102635": ["convolution_gpu_bfyx_os_iyx_osv16",483],
+        "3596159214965874273": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "11210961619302975072": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8319405652132127420": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9980945809859857871": ["convolution_gpu_bfyx_gemm_like",2],
+        "13858485871773319706": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "13319880343534837963": ["convolution_gpu_bfyx_gemm_like",1],
+        "6983900601570231321": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "11897886369869427808": ["convolution_gpu_bfyx_gemm_like",2],
+        "8048617952947915835": ["convolution_gpu_bfyx_gemm_like",2],
+        "16540183777173974162": ["convolution_gpu_bfyx_gemm_like",1],
+        "7852745450437172519": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4862529593282936100": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "17052596472114345717": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "7732899312577293959": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "12458305535453345462": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13255006150107668739": ["convolution_gpu_bfyx_gemm_like",2],
+        "17097621900023182992": ["convolution_gpu_bfyx_gemm_like",2],
+        "14523905821262502926": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "5687802882700097624": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "11115684531624462986": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "16162899163122139501": ["fully_connected_gpu_fb_io_ref",1],
+        "15891505875671050928": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10271474583233390474": ["convolution_gpu_bfyx_os_iyx_osv16",155],
+        "4640696923527766618": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "13473730516782884152": ["convolution_gpu_bfyx_gemm_like",2],
+        "9245770108138984525": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4959718589070770515": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "3934290309368153435": ["fully_connected_gpu_bf_io_gemm",1],
+        "13234170505677988638": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "45545661884854912": ["convolution_gpu_bfyx_os_iyx_osv16",1051],
+        "5311718276151327830": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "12896159402462325805": ["convolution_gpu_bfyx_os_iyx_osv16",888],
+        "14647949921048404551": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "9979259596137305973": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "5327803911898085293": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "10196332102593337214": ["convolution_gpu_bfyx_gemm_like",1],
+        "4793007249026943006": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11012846743944132853": ["convolution_gpu_bfyx_gemm_like",2],
+        "4713580645061462578": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "11576182324195008022": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9673176853197584682": ["convolution_gpu_bfyx_gemm_like",1],
+        "3935404533406270186": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13358754652597677285": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "5246229312484886433": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "15939309688773899430": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3805854200552708060": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "5219048275475447369": ["convolution_gpu_bfyx_gemm_like",2],
+        "2832331506191733785": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "710656784939783221": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "8306931146242110738": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "118898027441804310": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12894240573737168362": ["convolution_gpu_bfyx_os_iyx_osv16",941],
+        "7941359635463232326": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "18418073826375395057": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3935174650108042053": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "10989937450490049763": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "288853243482418538": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "10930115765550856328": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "7994179151788368291": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "5342657840254586591": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16744011463988595802": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6025872155179042054": ["convolution_gpu_bfyx_gemm_like",2],
+        "9562291747339451180": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "8739570656208259296": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "7086554406050778468": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16342158355942808662": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15887938842582811165": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "4211445170027080823": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "3272017687600371031": ["convolution_gpu_bfyx_gemm_like",2],
+        "18118237182023167949": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "15487538714246568015": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "17641726060706984007": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "8449108317864057899": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "16536775289334717044": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "13150876648527896999": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "12223993560805441284": ["convolution_gpu_bfyx_gemm_like",2],
+        "8779987507326777359": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8215519118071138614": ["convolution_gpu_bfyx_gemm_like",2],
+        "9069245927173134634": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1201692134690347847": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "3120885087070223590": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "6728889146307098720": ["convolution_gpu_bfyx_gemm_like",1],
+        "14004618842373739106": ["convolution_gpu_bfyx_gemm_like",2],
+        "16741985699154392565": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "8176520928011006903": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14213127286928643795": ["convolution_gpu_bfyx_gemm_like",2],
+        "1336477297334930004": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12565318283493666631": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "11901687795497708884": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "11858246418724176452": ["convolution_gpu_bfyx_gemm_like",1],
+        "17355826643208208691": ["convolution_gpu_bfyx_gemm_like",2],
+        "1573498199681662714": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1204089510255285420": ["convolution_gpu_bfyx_gemm_like",2],
+        "7727001441358508665": ["convolution_gpu_bfyx_os_iyx_osv16",22],
+        "12621528958448913800": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1941341635794709702": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "2768512766772748723": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "15579919505002150556": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "16352438188558979362": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "3594327736281012643": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "8281411537393664160": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8843585527713905568": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "11152834864013527469": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "5384134329664434112": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "16749148369456398030": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10522649794540845800": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "17197868427757781334": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "14463841899941062548": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8734220847509054149": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "15597522934012485452": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "8770858724416759637": ["convolution_gpu_bfyx_gemm_like",2],
+        "3651651926851660222": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1460916897832302487": ["convolution_gpu_bfyx_gemm_like",2],
+        "2251572761614039612": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "17503210896556316294": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "12308359047798183133": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "14547907449418439737": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "15618891972122000521": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "11771014003680394135": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "10380031655567712558": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "9516288831713776693": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "13550337096609413041": ["convolution_gpu_bfyx_gemm_like",2],
+        "17459500507201824299": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "18379763351534914922": ["convolution_gpu_bfyx_os_iyx_osv16",140],
+        "1509728225855233852": ["convolution_gpu_bfyx_gemm_like",2],
+        "15993427814066246646": ["convolution_gpu_bfyx_gemm_like",1],
+        "7781809277449433812": ["convolution_gpu_bfyx_gemm_like",2],
+        "9003196270667188479": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3034466284781235431": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "11198378813600875939": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5509852360472061267": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "11682323163346544125": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "1239861345413267621": ["convolution_gpu_bfyx_gemm_like",2],
+        "1720791539242542292": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "2419819939573989749": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4062706195708729345": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "9390919808369333231": ["convolution_gpu_bfyx_gemm_like",2],
+        "11882021989615795558": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "4003468969524607815": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "12169148580322697755": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "3750338655074082587": ["fully_connected_gpu_yxfb_ref",2],
+        "14524678598440880756": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "5336120047683197088": ["convolution_gpu_bfyx_gemm_like",2],
+        "14592395793778583608": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "1781189282179491198": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "16587387608532583713": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "11205571992835612111": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14674266217397415571": ["convolution_gpu_bfyx_gemm_like",2],
+        "8642397690605957294": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "172303227623890951": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "17855733925989425515": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "13982221711075598070": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "13337122303005980542": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "5134857932624749530": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8740196547852036537": ["convolution_gpu_bfyx_gemm_like",2],
+        "9781830607177020570": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "11297512843662536362": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "16071030448801649281": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "13713501506522022845": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4034250407843183678": ["convolution_gpu_bfyx_gemm_like",1],
+        "3661361503342294227": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "7247891577022043949": ["convolution_gpu_bfyx_gemm_like",2],
+        "15628121900226431719": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11992353959766718397": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "8734419426540206087": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "2559310381697374321": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "6659313690133629176": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "7617123358753247310": ["fully_connected_gpu_fb_io_ref",2],
+        "10784905418636316601": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "7999747927804607567": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "14670952132900619664": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "4276712095427918904": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "3806791682244402910": ["convolution_gpu_bfyx_os_iyx_osv16",1088],
+        "11879484013890539145": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "9287404618748313247": ["convolution_gpu_bfyx_gemm_like",1],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like",2],
+        "13952295742818866246": ["convolution_gpu_bfyx_os_iyx_osv16",885],
+        "7638626850074132214": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",2],
+        "2094546483928406874": ["convolution_gpu_bfyx_gemm_like",1],
+        "3831201505512446456": ["convolution_gpu_bfyx_gemm_like",0],
+        "14097319816812992451": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "8268533335852735248": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "7279393739634103483": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "38736266675995457": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "13661880440426932218": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7590767013583950613": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1617362484243823916": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "5659168916726488798": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1559798212423183813": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "14491949194619001237": ["convolution_gpu_bfyx_os_iyx_osv16",813],
+        "8819268903800581706": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "16320454719906370247": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "10972033292930619311": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "16230621843665445228": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "8892991171111842341": ["convolution_gpu_bfyx_gemm_like",2],
+        "323234725943768094": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "4670443882075998209": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "15287650965861631130": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "15818237122613168508": ["convolution_gpu_bfyx_gemm_like",0],
+        "6542486391263861823": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "15938703221521364046": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "8333743604646422982": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "4800208854712166990": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16590893345666612869": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "12601126285773042005": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "3012566432840424198": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "14733291836016183044": ["convolution_gpu_bfyx_gemm_like",2],
+        "15494543914974994991": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "1081287304647703427": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "11609821372586026178": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "10961049607808752432": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "6161072079255825074": ["convolution_gpu_bfyx_gemm_like",2],
+        "10392013312924273545": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "10400727836871462348": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "11494395549955384747": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "3329610414149222728": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8986253016099337778": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "12606196670791209919": ["convolution_gpu_bfyx_gemm_like",2],
+        "13484950419220835364": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "8863731258634577277": ["convolution_gpu_bfyx_gemm_like",2],
+        "2586132860307138964": ["convolution_gpu_bfyx_gemm_like",2],
+        "2844746478867668588": ["convolution_gpu_bfyx_gemm_like",2],
+        "12553441041059632729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5485971317082563152": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "1400089266180918877": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "17522452942286240233": ["convolution_gpu_bfyx_gemm_like",2],
+        "7947870656736319919": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "15417738436777481469": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4917595053453614536": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "14603590053512154268": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "5688623850477433571": ["convolution_gpu_bfyx_gemm_like",2],
+        "17790026124881397912": ["fully_connected_gpu_fb_io_ref",1],
+        "4112696777811320312": ["convolution_gpu_bfyx_os_iyx_osv16",995],
+        "1474271081523145413": ["convolution_gpu_bfyx_gemm_like",2],
+        "10773411423039491193": ["convolution_gpu_bfyx_gemm_like",2],
+        "11809236497308682596": ["convolution_gpu_bfyx_gemm_like",1],
+        "2146633923143071497": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "2968144776497288135": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "3311449696894745049": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "17472252137354770318": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "7271236108345900406": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "17024388383581997032": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "3691705516240577130": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7026575758396092435": ["convolution_gpu_bfyx_os_iyx_osv16",172],
+        "15898888434295644774": ["convolution_gpu_bfyx_gemm_like",1],
+        "13046322179198317310": ["convolution_gpu_bfyx_os_iyx_osv16",883],
+        "11897113890115321056": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "15661055655577513377": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12965800692507042874": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "287386909600391846": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "14800933038795670868": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10721811813682112908": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14762859593402798050": ["convolution_gpu_bfyx_gemm_like",2],
+        "4561874206785244358": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "15972805725107234322": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "11140864132614066113": ["convolution_gpu_bfyx_gemm_like",2],
+        "822162932339827810": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "2909728331855309274": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16261543808418336089": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "15228614030349540878": ["convolution_gpu_bfyx_gemm_like",1],
+        "6335628260431943016": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "6545814945227676265": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3007505068107685147": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "13722424507812159961": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "6418327009347170687": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "15659671804906879034": ["convolution_gpu_bfyx_gemm_like",2],
+        "15893297349596399716": ["convolution_gpu_bfyx_gemm_like",1],
+        "6612243861034102250": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "3913951712614107871": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2546472090573813082": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1013207188944763398": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2679903779216253668": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11409066626289209846": ["convolution_gpu_bfyx_os_iyx_osv16",351],
+        "7386836350136973872": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "16211466749116679534": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "1403373982815401451": ["convolution_gpu_bfyx_gemm_like",1],
+        "7126601602274920416": ["convolution_gpu_bfyx_gemm_like",2],
+        "8790625191540101806": ["convolution_gpu_bfyx_gemm_like",1],
+        "11914756126771310827": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "17224820843490443805": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "13683563727561197895": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "14159293183840880884": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "14763982961176216679": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11885660439698926227": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "13448159575961515854": ["convolution_gpu_bfyx_gemm_like",0],
+        "13779700363254765602": ["convolution_gpu_bfyx_gemm_like",2],
+        "18125075313255528454": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "2260718905219541967": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "5688161172644782612": ["convolution_gpu_bfyx_gemm_like",1],
+        "12896164738668798380": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "5635500901926740475": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "17691748026963003695": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "3513523165606656242": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "15754688305730191542": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "37061093840513038": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6830643729780599672": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13865227850818392065": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "8779164026828163571": ["convolution_gpu_bfyx_gemm_like",1],
+        "352808518345312040": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "4014667229872705228": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16835545111241063900": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "9343876424591024597": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11092828091552833150": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "16705621644424684055": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "3064765745900772872": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "2008064690158516711": ["convolution_gpu_bfyx_gemm_like",2],
+        "11447737411040418462": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "16485921493309285440": ["convolution_gpu_bfyx_gemm_like",2],
+        "17465517455679097501": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "7589346100701197023": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "16615858951735101760": ["fully_connected_gpu_fb_io_ref",1],
+        "13551767519605460627": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "3830091089824446164": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "5758223108250439377": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "4399656162365214694": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "15571801737237063594": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "6236857636305802170": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "8769060267707904998": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "1334070221835422461": ["convolution_gpu_bfyx_gemm_like",2],
+        "8631194673451861459": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3392632422002516166": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "9402935157379983392": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10001963042016663554": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13680502636898130714": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "5503904988517480229": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "9561367273233389233": ["convolution_gpu_bfyx_gemm_like",2],
+        "17495070522944546801": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "5176939691838030517": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "4942131377140353094": ["convolution_gpu_bfyx_gemm_like",0],
+        "14946519992043402896": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "5398895598407183682": ["convolution_gpu_bfyx_gemm_like",2],
+        "12478309735214802531": ["convolution_gpu_bfyx_os_iyx_osv16",467],
+        "13753670205703732353": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "2148648022160178995": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "6772239376357727149": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "596528462327775677": ["convolution_gpu_bfyx_os_iyx_osv16",687],
+        "7512702933193596918": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "9644723852089512961": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "264371219192743152": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "8663545677000846511": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "7200893702912130808": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "5718747983756317198": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2850803473613487020": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "2335783507270234825": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "3088402690095697589": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "10112032316939871435": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "1211404528755199615": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11521288355888665606": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4673127824919879657": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15816980369722540994": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "5781431860747226742": ["convolution_gpu_bfyx_gemm_like",2],
+        "15365776263895633531": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "3389739049224815652": ["convolution_gpu_bfyx_gemm_like",2],
+        "7877637636782924097": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "18398231411109020099": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17520777331163825810": ["convolution_gpu_bfyx_gemm_like",2],
+        "16462862831307415504": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "5348059680010171141": ["convolution_gpu_bfyx_gemm_like",1],
+        "7289907211627391947": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "5378151578014945610": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "5629582391075745771": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "11607736973932389832": ["convolution_gpu_bfyx_gemm_like",0],
+        "2598910952085172410": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1628593159980574595": ["convolution_gpu_bfyx_os_iyx_osv16",622],
+        "17342603054992556378": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "3332444589775844154": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "4136736579788862192": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13161798453564436688": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "18429276095695345973": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15962533525948221648": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "9539616823548370185": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9914440875772341708": ["convolution_gpu_bfyx_gemm_like",1],
+        "14484004336536993120": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "10978173291465325823": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "7065121716452374910": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "9839670675413379092": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8854234880878427078": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "1194267934213722567": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "2387389473399444503": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "10775785602937893911": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",2],
+        "8124166677361481618": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "5057534502588100071": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "9267417754412894234": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "14389915292223442327": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "5896089609470353090": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "2908249767551054613": ["convolution_gpu_bfyx_os_iyx_osv16",641],
+        "5119087113905313336": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4104062066031480003": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "3600066510593746268": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "16998508915819714690": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "10857567623940140266": ["fully_connected_gpu_fb_io_ref",1],
+        "18199526506796726885": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "5149553691611520515": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "11311890411536750673": ["convolution_gpu_bfyx_gemm_like",2],
+        "3974589991022739479": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "859377216693940737": ["convolution_gpu_bfyx_gemm_like",2],
+        "2915952195141872726": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "9274179337770060652": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "14142812374094816721": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "8100595788531468781": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "12994819742376207273": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "12057000101434512661": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "11047759270093007856": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "15688186132508213638": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "12715500118796263683": ["convolution_gpu_bfyx_gemm_like",2],
+        "2830742500858558621": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "12445292008737311977": ["convolution_gpu_bfyx_gemm_like",2],
+        "15158997684077722015": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "13004055504657277105": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8347537383976709519": ["convolution_gpu_bfyx_os_iyx_osv16",805],
+        "13398875754083902831": ["fully_connected_gpu_yxfb_ref",2],
+        "16450345154125804290": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "10900880512948479338": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6418748992581951435": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "5848293219267886434": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "17882819773586674851": ["convolution_gpu_bfyx_os_iyx_osv16",135],
+        "12642701787250074691": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "4642402648038764246": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "17026348860895225619": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "4554398307153171456": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "4445257000541366640": ["convolution_gpu_bfyx_os_iyx_osv16",416],
+        "4682062886371423209": ["convolution_gpu_bfyx_gemm_like",2],
+        "8337457116169698090": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "14969813450703071948": ["convolution_gpu_bfyx_gemm_like",1],
+        "14167086447992316314": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1961348920992050029": ["convolution_gpu_bfyx_os_iyx_osv16",484],
+        "8650948093564284852": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13204120207726209723": ["fully_connected_gpu_bf_io_gemm",2],
+        "14218701503304823803": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "16402312692470500253": ["convolution_gpu_bfyx_gemm_like",2],
+        "541744773413565297": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "18173314625562011976": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14224121742920800990": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13647773816638053437": ["convolution_gpu_bfyx_gemm_like",2],
+        "1143214652021653634": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "3300655231758263066": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "13985989113434682460": ["convolution_gpu_bfyx_gemm_like",1],
+        "16576300898841314587": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "4082218299236753259": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "3138712043201001156": ["convolution_gpu_bfyx_gemm_like",2],
+        "9493034132406318197": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "2984236836610169934": ["convolution_gpu_bfyx_os_iyx_osv16",142],
+        "18419183012101393192": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "16969463538496570528": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "9351428703239678614": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "17546650302679801134": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5089359404080552270": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "11970881115757095265": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "5890683283363730941": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6678101356115372537": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1838534101161814609": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "17646394278957547470": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "2651385050387738902": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "7875272450497189442": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "4792657031481471098": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13423515205322319913": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "1431307776181554710": ["convolution_gpu_bfyx_gemm_like",2],
+        "8146945902795164796": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9771430089730856496": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "17308907916370632622": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "13374993751390784382": ["convolution_gpu_bfyx_os_iyx_osv16",1070],
+        "13435416060730279243": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "10842505566649585090": ["convolution_gpu_bfyx_gemm_like",1],
+        "6326191473779365124": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2705534741438659581": ["convolution_gpu_bfyx_os_iyx_osv16",475],
+        "17050143605017295447": ["convolution_gpu_bfyx_gemm_like",2],
+        "11307721164906705899": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "11352094952907979172": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "14512311371993445906": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "13076343553185159307": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2832311883163804015": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "3182329375739242693": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1077773457856682663": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "17294244481988344762": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5401946420641519048": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "10526411638069090068": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "8181704316455400709": ["convolution_gpu_bfyx_gemm_like",2],
+        "16462033126494826292": ["convolution_gpu_bfyx_gemm_like",2],
+        "12547252593506448096": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "5321807316257768": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "1071663904249509302": ["convolution_gpu_bfyx_gemm_like",2],
+        "1878953827218615252": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "8321769923556905957": ["convolution_gpu_bfyx_gemm_like",1],
+        "7053070767227498983": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12318427976031000768": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "3060709449176556770": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "16936366288366370882": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15485701086886851362": ["convolution_gpu_bfyx_os_iyx_osv16",931],
+        "14741012384358891350": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "3626743386403140330": ["convolution_gpu_bfyx_gemm_like",1],
+        "16134637021630473012": ["convolution_gpu_bfyx_gemm_like",1],
+        "15026219694198820614": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "15671873744670386067": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "2870715678422088243": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "5103094815475470596": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "3430998232987873998": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "1127844465496534455": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15958017891397409552": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "13477416097954638887": ["fully_connected_gpu_bf_io_gemm",1],
+        "2010255131587843361": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "11679869968143173159": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "1154469970162137785": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "14762599606783897222": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "4994591211723226974": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "8260689555974656662": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "11206468937763516689": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "1265107284215037966": ["convolution_gpu_bfyx_gemm_like",2],
+        "6616869272699525153": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "6953499208425592115": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "879896719155824868": ["convolution_gpu_bfyx_gemm_like",2],
+        "10111038481447198008": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "6519443541076418301": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "7253709516917901897": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "10236258478395201152": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "15513894336778253285": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "8942221095468681112": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "13410850301164057911": ["convolution_gpu_bfyx_os_iyx_osv16",252],
+        "6571438978296387721": ["convolution_gpu_bfyx_gemm_like",2],
+        "2020044486043617858": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14233219774448115529": ["convolution_gpu_bfyx_gemm_like",2],
+        "9770300588867836071": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "191374388179598660": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "4184357870886924038": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "6235132681081375078": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "13297875917250935192": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "14577775579978745344": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "9724624621108712962": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "17638692805430115529": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "9729771183572950642": ["convolution_gpu_bfyx_gemm_like",1],
+        "11327228813412934262": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "9212091835906796243": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "8528750110601691390": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "4737347018334654530": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "17829854042305231384": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17798636687709019154": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "8571662320744858201": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "5828768432282043413": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "3685556976073096544": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "8047078039937885319": ["convolution_gpu_bfyx_gemm_like",2],
+        "14691372262153587653": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "18062849937960759210": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "4366043672240989175": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "6148022455516485135": ["convolution_gpu_bfyx_gemm_like",2],
+        "2932914865200583326": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13225520357177380691": ["convolution_gpu_bfyx_gemm_like",2],
+        "8709632541892447149": ["convolution_gpu_bfyx_gemm_like",2],
+        "5261762234237034874": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3102816736961785641": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "5409329687010951601": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "10885752780697269323": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "4577872082734403187": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "9614300332487270888": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "6997121306455110286": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "1071090704302849258": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "937763627727362899": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6146876760962332928": ["convolution_gpu_bfyx_gemm_like",2],
+        "9723314434598141024": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "5185125307593023170": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "11933283931932057859": ["convolution_gpu_bfyx_gemm_like",1],
+        "18120169120088482114": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "17774424004510360936": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "16541535256432192398": ["convolution_gpu_bfyx_gemm_like",2],
+        "4646176801168621136": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7130694811424715594": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3119045125726216156": ["convolution_gpu_bfyx_gemm_like",1],
+        "141166664952282933": ["convolution_gpu_bfyx_gemm_like",2],
+        "220326805056361171": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "8228641750970480948": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "835367600773871252": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "15114370307779942381": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17962578815194404362": ["convolution_gpu_bfyx_gemm_like",2],
+        "4831224999851230245": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6812025576584060234": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "9601849246293120347": ["convolution_gpu_bfyx_gemm_like",2],
+        "15156805695359911457": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "16295660312557315941": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "4515798403196565084": ["convolution_gpu_bfyx_gemm_like",2],
+        "8122815203088327658": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "5962764672151728219": ["convolution_gpu_bfyx_os_iyx_osv16",1108],
+        "2622434279674583815": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "2582625260054352916": ["convolution_gpu_bfyx_gemm_like",1],
+        "8809794528993445200": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13660015013041074867": ["convolution_gpu_bfyx_gemm_like",2],
+        "5948701218437980356": ["convolution_gpu_bfyx_gemm_like",2],
+        "15548971488532746290": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "10800323158234163234": ["fully_connected_gpu_fb_oi_ref",2],
+        "6876164425008541018": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "14652791434312888296": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "5594180958505308003": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "12942776337163777730": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "16884228931101540030": ["convolution_gpu_bfyx_gemm_like",2],
+        "5629670679897666607": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "14878347463243157447": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "15488550074426713959": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "18331981707436752260": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "59739211822469868": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15765198153800696060": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "11169292427557543138": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "9628735886189157469": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "4569338575782832784": ["convolution_gpu_bfyx_gemm_like",2],
+        "12854272540346358832": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "11831092915967558428": ["convolution_gpu_bfyx_os_iyx_osv16",647
+        ]
+    },
+    "64": {
+        "12297371032753209816": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "9058996149754556268": ["convolution_gpu_bfyx_os_iyx_osv16",1041],
+        "290134020607738418": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16535378085465418910": ["convolution_gpu_yxfb_yxio_b16",0],
+        "1644335606100150388": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "7581174843529024536": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "3391032227732782982": ["convolution_gpu_bfyx_gemm_like",1],
+        "13358283026528078900": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "1832310305089212990": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "18180820925685532104": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "11132679855317294753": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10650698451740924172": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "13553263424160050064": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "11771014003680394135": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "11055049031355432623": ["convolution_gpu_bfyx_gemm_like",0],
+        "1143214652021653634": ["convolution_gpu_bfyx_os_iyx_osv16",108],
+        "11583985978586657985": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "13970935346154374605": ["convolution_gpu_bfyx_gemm_like",2],
+        "1334070221835422461": ["convolution_gpu_bfyx_gemm_like",2],
+        "13603318842632052764": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "4750513665628842598": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "5801429077171542466": ["convolution_gpu_bfyx_os_iyx_osv16",97],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "1299545313185409227": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "16934879647229234163": ["convolution_gpu_bfyx_gemm_like",0],
+        "6584960721513702502": ["convolution_gpu_bfyx_gemm_like",0],
+        "2096167792705935744": ["convolution_gpu_bfyx_gemm_like",2],
+        "10471519687597963116": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17917978116807564183": ["convolution_gpu_bfyx_gemm_like",1],
+        "7862815466573236157": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "628191607060767879": ["convolution_gpu_bfyx_os_iyx_osv16",841],
+        "9594594523961285945": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "15078168059698267650": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "18041177945345031826": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "15451919862187018297": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7430073011895298582": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "15522785615618973614": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "2423754482456771339": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "13883044928774243663": ["convolution_gpu_bfyx_os_iyx_osv16",1099],
+        "16235115911229280717": ["convolution_gpu_bfyx_gemm_like",2],
+        "9410978119783758141": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "6391201577234440562": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "15831600396403741571": ["convolution_gpu_bfyx_gemm_like",1],
+        "15781622938833984014": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "17882819773586674851": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "276407276027553756": ["convolution_gpu_bfyx_os_iyx_osv16",567],
+        "3883845471211207871": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "8075180350084516696": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16611452077660879545": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "244921290040927639": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "2613462626256090659": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "9524303276541517389": ["convolution_gpu_bfyx_gemm_like",2],
+        "1541754036637209097": ["convolution_gpu_bfyx_gemm_like",2],
+        "16292848987976256449": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "15399245700982979379": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "2669822154816760632": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11609821372586026178": ["convolution_gpu_bfyx_gemm_like",1],
+        "9056038338958199256": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "12028665820838352309": ["convolution_gpu_bfyx_gemm_like",2],
+        "12040626513219974957": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "4628748977913534701": ["convolution_gpu_bfyx_os_iyx_osv16",350],
+        "16442107352245114876": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "12675313398314286884": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "1452597292381229708": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "13439896617880328331": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "17525531790109748810": ["convolution_gpu_bfyx_os_iyx_osv16",424],
+        "5115661026367632863": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "10784073615329190425": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "12052207771201936228": ["convolution_gpu_bfyx_gemm_like",1],
+        "7590767013583950613": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "6870942166356599956": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "89439319782574517": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "9390478179772073718": ["convolution_gpu_bfyx_gemm_like",1],
+        "16728762255357411770": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "14554225625951128811": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "9263063714383940562": ["convolution_gpu_bfyx_os_iyx_osv16",845],
+        "5519535335798045279": ["convolution_gpu_bfyx_gemm_like",1],
+        "4135068756462147853": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9660812093766156608": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "15767973630744679517": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9216608098626790565": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "13713501506522022845": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8323445733669842657": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15649927926091502215": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "6821855018718422278": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "522181557896569275": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9562291747339451180": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "7432142107544210174": ["convolution_gpu_bfyx_gemm_like",2],
+        "4505008254511324231": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "9407646138658641974": ["convolution_gpu_bfyx_gemm_like",1],
+        "13680926356824317761": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "15295951849706930711": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "6181308879301978465": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "467975197394411990": ["convolution_gpu_bfyx_gemm_like",1],
+        "4542143431130171516": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "13251091004269229867": ["convolution_gpu_bfyx_gemm_like",2],
+        "3568514382399560386": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "15528692642731712121": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "12246408434917478929": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "15428591250165788477": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "5843679089588930933": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "15914512645931208899": ["convolution_gpu_bfyx_gemm_like",2],
+        "8451212914744825089": ["convolution_gpu_bfyx_gemm_like",2],
+        "1561225943337590599": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "6557428245898292304": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "11988546375476924356": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "16710010075465723498": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "16208488491972128275": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "2826762745628486040": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "142486914279119363": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "11645116728396933125": ["convolution_gpu_bfyx_gemm_like",2],
+        "954796765467489259": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "17739868787095417856": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "16206791915939407806": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "706370730287471796": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "17791773192152464021": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13317417676446624018": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "9028970753877215614": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "1171681987783013074": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "8207349115037232863": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "13189392239349392492": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "14037325204801680738": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "7817036102984218692": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17443356777503458523": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "5774841809066688068": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "11756881293845417212": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17422822627612865758": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16944335478353845609": ["convolution_gpu_bfyx_os_iyx_osv16",233],
+        "4640696923527766618": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "15529757761327002288": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "16921939234324970069": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "1104489643524273315": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "6669808855737023569": ["convolution_gpu_bfyx_gemm_like",1],
+        "14309292105974991733": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "16985912104363932350": ["convolution_gpu_bfyx_os_iyx_osv16",92],
+        "6101196122606108273": ["convolution_gpu_bfyx_gemm_like",2],
+        "5585398540591396124": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "2149582237161177965": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "3140230065585683313": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "14959566236432790882": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "16781187505186394353": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13464226348405628455": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "158222105675022402": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like",2],
+        "3159147743553063163": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "13786357802945430475": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "13761566845514364807": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "4013707396889204359": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "15417738436777481469": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "5440983284868981549": ["convolution_gpu_bfyx_gemm_like",2],
+        "5287076386757143976": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "13268525255152984893": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "14251848023416168295": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",267],
+        "7235358742317442134": ["convolution_gpu_bfyx_gemm_like",1],
+        "8268533335852735248": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "4481903208484313806": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "10670103699537731664": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "8790625191540101806": ["convolution_gpu_bfyx_gemm_like",1],
+        "9979259596137305973": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "12365282242489300092": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "4750894407873652809": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "15859493313686060349": ["convolution_gpu_bfyx_gemm_like",1],
+        "15988378956341507229": ["convolution_gpu_yxfb_yxio_b16",0],
+        "5156033406916344703": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12730339458081890990": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "13102754309439605192": ["convolution_gpu_bfyx_gemm_like",1],
+        "3563872903821081702": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8792202318168046223": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "7500192998744460131": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "17832542092610191859": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "14171139920084409181": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "16884396694505987920": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "3122997634505472500": ["convolution_gpu_bfyx_os_iyx_osv16",679],
+        "10990741293315393791": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "2770397466252831892": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "8609939102588915855": ["convolution_gpu_bfyx_gemm_like",2],
+        "9761573038170759563": ["convolution_gpu_bfyx_os_iyx_osv16",494],
+        "13558656230312558247": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "15490478608105402679": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "12309132521191764927": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "4492673409319122180": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "16362139250976572928": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "11087413527078604815": ["convolution_gpu_bfyx_gemm_like",2],
+        "5184121466994451498": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "10292243973236220688": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "3509027370372599394": ["fully_connected_gpu_fb_io_ref",2],
+        "14008438372661779490": ["convolution_gpu_bfyx_gemm_like",2],
+        "12394049027081208902": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "12068797674575015662": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16",4],
+        "15315327794058441258": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "10509933181132310969": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "8036474422877454869": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "11800783548769329949": ["convolution_gpu_bfyx_gemm_like",2],
+        "16770615142634470903": ["convolution_gpu_bfyx_os_iyx_osv16",712],
+        "15331103261044247142": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "7139714914586273766": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "778476198101178556": ["convolution_gpu_bfyx_gemm_like",1],
+        "12790570304622911607": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "17046662043776372746": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "8464582977975377118": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16336482874764861478": ["convolution_gpu_bfyx_gemm_like",2],
+        "2296581485980163665": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "12232696287029987946": ["convolution_gpu_bfyx_os_iyx_osv16",496],
+        "15824189967727245909": ["convolution_gpu_bfyx_gemm_like",2],
+        "10765280349477640969": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "6205240287062600210": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "12024143207855886580": ["convolution_gpu_bfyx_gemm_like",2],
+        "18133334552107213128": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "755414184406250882": ["convolution_gpu_bfyx_os_iyx_osv16",843],
+        "2287356884312581209": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14133958262039763609": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "6603778920476932267": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5538883245745495145": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "8843585527713905568": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "16773645387243701837": ["convolution_gpu_bfyx_gemm_like",2],
+        "14150012830816329527": ["convolution_gpu_bfyx_gemm_like",2],
+        "9373353053843326128": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "4764776977138392550": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "16117448559783537844": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "6664432489777052771": ["convolution_gpu_bfyx_gemm_like",2],
+        "805131056816361237": ["convolution_gpu_bfyx_os_iyx_osv16",681],
+        "16958329690837977102": ["convolution_gpu_bfyx_gemm_like",2],
+        "13184662326021747000": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7744787957569714828": ["convolution_gpu_bfyx_gemm_like",1],
+        "5864250949922222051": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "13161997040644039778": ["convolution_gpu_bfyx_gemm_like",2],
+        "4897991181236908768": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "1587501521145162454": ["convolution_gpu_bfyx_gemm_like",2],
+        "18202222342562516071": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "8690196189594920365": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "17434429579652310107": ["convolution_gpu_bfyx_gemm_like",1],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like",2],
+        "9988801796928462423": ["convolution_gpu_bfyx_os_iyx_osv16",903],
+        "1082574490068006980": ["convolution_gpu_bfyx_gemm_like",2],
+        "9130971535185609293": ["convolution_gpu_bfyx_gemm_like",2],
+        "3662747857062156477": ["convolution_gpu_bfyx_gemm_like",1],
+        "3873183249402084406": ["convolution_gpu_bfyx_gemm_like",1],
+        "15155676074658242659": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "17053671692908867872": ["convolution_gpu_bfyx_os_iyx_osv16",723],
+        "16173557782125372935": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "16910952799476896905": ["convolution_gpu_bfyx_gemm_like",1],
+        "116291934148608396": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "17522452942286240233": ["convolution_gpu_bfyx_gemm_like",2],
+        "2727175120437582536": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4660288622381620227": ["convolution_gpu_bfyx_os_iyx_osv16",298],
+        "4279062247055842367": ["convolution_gpu_bfyx_gemm_like",1],
+        "3398322619007806698": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8740196547852036537": ["convolution_gpu_bfyx_gemm_like",2],
+        "1450888744802985214": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "10607904718265020949": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "10308175009371219583": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "6577505360421510286": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "3833510944499257797": ["convolution_gpu_bfyx_os_iyx_osv16",1030],
+        "6303682540621797774": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "13410850301164057911": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "16509472637458153234": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "15082818876354718849": ["convolution_gpu_bfyx_gemm_like",1],
+        "9287404618748313247": ["convolution_gpu_bfyx_os_iyx_osv16",1062],
+        "9428176632140441528": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "16067605128297748820": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "861419637283812778": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "3782315919331102574": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8507854696766492454": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2458592904274981909": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "16182470664818268848": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5219048275475447369": ["convolution_gpu_bfyx_os_iyx_osv16",673],
+        "4239133538073498792": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "12935563359569230797": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "3337625924046561031": ["convolution_gpu_bfyx_gemm_like",1],
+        "4615708568396290002": ["convolution_gpu_bfyx_1x1",2],
+        "4299492266819967844": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "4400247897123856252": ["convolution_gpu_bfyx_gemm_like",2],
+        "5078905972285278557": ["convolution_gpu_bfyx_gemm_like",2],
+        "17599383258252980421": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "4133424990380177132": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "15470013032930986062": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "6204183474669103812": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "6733731409232284409": ["convolution_gpu_bfyx_gemm_like",1],
+        "8578747191812631883": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "15901724303713479611": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "9502195532658935521": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10570285542015420072": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "2273992727647793692": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3221221905804708596": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "693883892843558363": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "10433541468308381909": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "4026686872534942904": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "2702144517025248597": ["convolution_gpu_bfyx_gemm_like",1],
+        "14424566003632608852": ["convolution_gpu_bfyx_gemm_like",2],
+        "14131851237755716991": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "11872943152839631823": ["convolution_gpu_bfyx_gemm_like",2],
+        "5349415632630235233": ["convolution_gpu_bfyx_1x1",2],
+        "6458124573210430792": ["convolution_gpu_bfyx_gemm_like",2],
+        "16230621843665445228": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "17015791782274123780": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "4617809377006148936": ["convolution_gpu_bfyx_gemm_like",2],
+        "16511749893955141055": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "2451712485584835395": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "6214194654733781771": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "5172712078329324967": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "15688186132508213638": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "7164580481046523192": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "12228610148087508521": ["convolution_gpu_bfyx_gemm_like",2],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",1],
+        "16667887002111125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "8881150100883636392": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "12644942072153919043": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "11640865562390693266": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "6143200133853000387": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "5682190700442712936": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "14821616804286068969": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "13602140021189675477": ["convolution_gpu_bfyx_gemm_like",2],
+        "16094174852600023296": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "6578908625437515675": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "14896875712028630045": ["convolution_gpu_bfyx_gemm_like",2],
+        "6075691042233712335": ["convolution_gpu_bfyx_gemm_like",2],
+        "2746052215199129520": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15129834325410878425": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "10118395047539851751": ["convolution_gpu_bfyx_gemm_like",1],
+        "11284755586130392759": ["convolution_gpu_bfyx_os_iyx_osv16",243],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "6195916781434462809": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10899110544832584656": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "17774424004510360936": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "6293403765897901528": ["convolution_gpu_bfyx_gemm_like",2],
+        "9069334144391048686": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "6729785110495533200": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "3600066510593746268": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "7148542290597073512": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "9274179337770060652": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "386749666417295495": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "1473214668483422172": ["convolution_gpu_bfyx_gemm_like",1],
+        "8127190765748950828": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "3179874645565098825": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "10178145641713631806": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "3180320769716158201": ["convolution_gpu_bfyx_os_iyx_osv16",484],
+        "10109431802089940590": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "8133587696326295326": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "14258499419905714808": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "12516911293946682547": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "13448845356783404653": ["convolution_gpu_bfyx_gemm_like",1],
+        "16172528828198474326": ["convolution_gpu_bfyx_os_iyx_osv16",608],
+        "8241070786700614317": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11450378244355788918": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "16129296588866116913": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "10628725059172743408": ["convolution_gpu_bfyx_gemm_like",2],
+        "5584432943673435454": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3304589333915676807": ["convolution_gpu_bfyx_gemm_like",2],
+        "9737565171095493297": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14349625788399542568": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2816353973187452604": ["convolution_gpu_bfyx_gemm_like",2],
+        "3332334993503432420": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "6767245864232675168": ["convolution_gpu_bfyx_gemm_like",2],
+        "738850098651678143": ["convolution_gpu_bfyx_os_iyx_osv16",504],
+        "2371412124305478965": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7076937538747704750": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "12408889192918919210": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "5482851829165191681": ["convolution_gpu_bfyx_os_iyx_osv16",1019],
+        "16347412180100581330": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "9182897385081081193": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "12558716383635737426": ["convolution_gpu_bfyx_os_iyx_osv16",397],
+        "1914964404168211864": ["convolution_gpu_bfyx_gemm_like",1],
+        "9942099207256025216": ["convolution_gpu_bfyx_gemm_like",2],
+        "14355612297330229277": ["convolution_gpu_bfyx_gemm_like",0],
+        "8707189142909022305": ["convolution_gpu_bfyx_os_iyx_osv16",298],
+        "5805383505505929391": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "5419775002149092646": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "11330591026581463934": ["convolution_gpu_bfyx_gemm_like",2],
+        "5876880412336151866": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7918742312252115870": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "5643908654122573882": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "15856268902838573812": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "13810995219720233595": ["convolution_gpu_bfyx_gemm_like",2],
+        "659150305191479097": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13248567106128518549": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "1760690277175249985": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "6571438978296387721": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "13902214851539825156": ["convolution_gpu_bfyx_gemm_like",2],
+        "8866736221671835567": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "8296551195150971668": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9305861997313663528": ["convolution_gpu_bfyx_gemm_like",2],
+        "16431857516454692096": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "7199295899520406795": ["convolution_gpu_bfyx_gemm_like",2],
+        "4082229510324076196": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9354818521586974021": ["convolution_gpu_bfyx_gemm_like",0],
+        "3003526572122876385": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "12972798847556569913": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "8376077531098664520": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "7351733901977025859": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "15450609897480659306": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "6483208845600234755": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "7330202944390548890": ["convolution_gpu_bfyx_gemm_like",1],
+        "4927360358387344983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3816674884393241704": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "2817919813339364130": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12085348936192462321": ["convolution_gpu_bfyx_gemm_like",2],
+        "14204609663091442879": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "10887835418423052188": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1028160614515220430": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "4673127824919879657": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17490188677223978661": ["convolution_gpu_bfyx_gemm_like",2],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",592],
+        "13374993751390784382": ["convolution_gpu_bfyx_os_iyx_osv16",681],
+        "17366007551797367227": ["convolution_gpu_bfyx_gemm_like",2],
+        "12727541507197887360": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "5088898934670078153": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3711525118850629466": ["convolution_gpu_bfyx_gemm_like",1],
+        "15011504472108164173": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17001502418583498926": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "15578456771467281881": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16117738994809548007": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "4738743763536059708": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "14872992823083730615": ["convolution_gpu_bfyx_gemm_like",1],
+        "4046830923427667342": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "15757308772667178999": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "4958222070605478947": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4196367396954155354": ["convolution_gpu_bfyx_gemm_like",2],
+        "8971115542951085891": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "1532263118203058517": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "12193395770362986433": ["convolution_gpu_bfyx_os_iyx_osv16",684],
+        "10718764522366711114": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16789135236017252073": ["convolution_gpu_bfyx_gemm_like",2],
+        "3217246278485567748": ["convolution_gpu_bfyx_gemm_like",2],
+        "17726079670612220433": ["convolution_gpu_bfyx_gemm_like",2],
+        "12259844988981080505": ["convolution_gpu_bfyx_gemm_like",2],
+        "8995598177504756805": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "2026622899016787854": ["convolution_gpu_yxfb_yxio_b16",0],
+        "1436052878894538927": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "9299299311101549958": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "14263790627243107300": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7866128397931438774": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "17163158934005653629": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "3349519148124496343": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "12118387933632797428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16577611471466452776": ["convolution_gpu_bfyx_gemm_like",1],
+        "6133592828563353516": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "15374625876485618845": ["convolution_gpu_bfyx_gemm_like",1],
+        "1425953627379976115": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "534032316469702287": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "16888412539296862194": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "3355259926747524578": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "10626341369865893888": ["convolution_gpu_bfyx_gemm_like",2],
+        "10292585962794261197": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12988961529988078346": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "15156525717629023944": ["convolution_gpu_bfyx_gemm_like",2],
+        "3224352307778512793": ["convolution_gpu_bfyx_gemm_like",1],
+        "3501882025888946886": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "8146945902795164796": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "3880189981766119529": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "3116068331849795558": ["convolution_gpu_bfyx_gemm_like",2],
+        "5582896843095691256": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "7056293586529818253": ["convolution_gpu_bfyx_gemm_like",1],
+        "15962533525948221648": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "16146350476627599543": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "85050336704401597": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "15857087373591747006": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "4091702228990140696": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17392594284473856393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1152693503778768433": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "157805434489791310": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "13192808619929896995": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "6450532136308941035": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "7693459946348737411": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "2324120381399737261": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "11992353959766718397": ["convolution_gpu_bfyx_os_iyx_osv16",942],
+        "7531346828150129063": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "182115051096556835": ["convolution_gpu_bfyx_os_iyx_osv16",278],
+        "17802514063213000148": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "13583166868754499339": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6670327979947471550": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "3831261590121101287": ["convolution_gpu_bfyx_os_iyx_osv16",663],
+        "11528310408333718862": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "6522575549211855712": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "8006738296385794413": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "12179581684777023804": ["convolution_gpu_bfyx_gemm_like",2],
+        "8040001390872143271": ["convolution_gpu_bfyx_gemm_like",2],
+        "1640358227345963848": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "14159596290442764023": ["convolution_gpu_bfyx_gemm_like",1],
+        "16103943009195163681": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "16813995580382709489": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8130920994920685157": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "3286330985102373533": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "8260130048649729185": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "7311120574972466702": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "1591199515536783245": ["convolution_gpu_bfyx_os_iyx_osv16",233],
+        "6642767323474835034": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "1838534101161814609": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "4865023158176874622": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "2133849627845285277": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "2575631797904040925": ["convolution_gpu_bfyx_gemm_like",2],
+        "689445825453914111": ["convolution_gpu_bfyx_gemm_like",1],
+        "3438296636411972401": ["convolution_gpu_bfyx_gemm_like",2],
+        "4959403414256988744": ["convolution_gpu_bfyx_gemm_like",1],
+        "5933743119393822386": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "8906185843274300447": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "9580986168276580598": ["convolution_gpu_bfyx_gemm_like",1],
+        "7881579844586294503": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "1202292109713947702": ["convolution_gpu_bfyx_gemm_like",2],
+        "10522649794540845800": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "1540041682425757361": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13926122593957480821": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3501667344669686338": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "14885031472057965707": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3017411837779243878": ["convolution_gpu_bfyx_gemm_like",2],
+        "6820284286806022849": ["convolution_gpu_bfyx_gemm_like",2],
+        "54975980454651672": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "4894227264080887361": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "11275109735493317886": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "9421927854269492263": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "348058686961206025": ["convolution_gpu_bfyx_os_iyx_osv16",985],
+        "11716771904412649891": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "10128143628088846123": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "797387385159110695": ["convolution_gpu_bfyx_gemm_like",1],
+        "7689320135952025041": ["convolution_gpu_bfyx_gemm_like",2],
+        "12523676912856063091": ["convolution_gpu_bfyx_os_iyx_osv16",565],
+        "3128856679264648666": ["convolution_gpu_bfyx_gemm_like",1],
+        "12992061224471212714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12315068368597230211": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "3190494353583341446": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5469227748156438008": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "16588325081458426169": ["convolution_gpu_bfyx_gemm_like",2],
+        "4220826666482500445": ["convolution_gpu_bfyx_os_iyx_osv16",1029],
+        "11095908837221722097": ["convolution_gpu_bfyx_gemm_like",2],
+        "17423645390621980919": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "7242013296950669829": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "9462315044265139531": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "16293101831324587788": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17994361454416813294": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",2],
+        "11070620435959083971": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "4957638663977636791": ["convolution_gpu_bfyx_gemm_like",2],
+        "2530317332900569142": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "1604661321386793876": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "18259656768460999562": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "15796677813117622429": ["convolution_gpu_bfyx_gemm_like",0],
+        "7924408980408826942": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "8560635685184432720": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "2116913943188857359": ["convolution_gpu_bfyx_gemm_like",2],
+        "8316848551837633169": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "18273537339378756543": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "14947798627499698329": ["convolution_gpu_bfyx_gemm_like",1],
+        "3780320160034246719": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "2929715823970060874": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5159470523468873105": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "8176012042686275874": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "1306339989221885682": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like",2],
+        "11421180829679625737": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "8651641584737798174": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "863057075064640334": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "8671491767142900139": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "12348602762263193288": ["convolution_gpu_bfyx_os_iyx_osv16",648],
+        "14088382963493477342": ["convolution_gpu_bfyx_gemm_like",2],
+        "7177837234452118325": ["convolution_gpu_bfyx_os_iyx_osv16",12],
+        "10308113903347312964": ["convolution_gpu_bfyx_gemm_like",2],
+        "4773077837537775324": ["convolution_gpu_bfyx_gemm_like",2],
+        "381149736509958403": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "8984436655107983227": ["convolution_gpu_bfyx_gemm_like",1],
+        "14811022197918391667": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "16567638487719493784": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "16290626406346691996": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "583303098958523195": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "15891662883560480723": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "8155268141318893606": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13972357557211413688": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "6644418194983229139": ["convolution_gpu_bfyx_gemm_like",1],
+        "16395067736440127496": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2124033349728954551": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "17243576882981097341": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "14912119584313592912": ["convolution_gpu_bfyx_gemm_like",1],
+        "15914342421266687768": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10218763091060511457": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "13059207969254830451": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "2571882179292959757": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3120553928584920777": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "14230493618724018658": ["convolution_gpu_bfyx_gemm_like",2],
+        "3870539490799697188": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "3218248162832023196": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "929378940515745198": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "9643408025778914022": ["convolution_gpu_bfyx_os_iyx_osv16",236],
+        "9585113116232600562": ["convolution_gpu_bfyx_gemm_like",1],
+        "6726099352298108756": ["convolution_gpu_bfyx_gemm_like",1],
+        "3134489458855347772": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "4848143712599565301": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7671016314869993705": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",1],
+        "3448477246688526708": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "17824431042110985323": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "18150429561058646714": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "16768797136991242472": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "11841034668170849494": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "15914058104244750036": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "17174919737114915467": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "9999425239167488495": ["convolution_gpu_bfyx_gemm_like",0],
+        "15471470494305051299": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "10151922632636937118": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "7143510787416483146": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "338716975932676215": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "11951606039079763598": ["convolution_gpu_bfyx_gemm_like",2],
+        "9447458159095730492": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13804221028705631415": ["convolution_gpu_bfyx_gemm_like",2],
+        "4622514167765722873": ["convolution_gpu_bfyx_os_iyx_osv16",378],
+        "1497127399271219422": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "11243840588602365090": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "3033264172690274208": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "14123081378489325832": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "14116800584981026541": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "17154337492545826355": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "1103228955716492167": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13753473508578037346": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "14406070210216948643": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "13094402291968806996": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "17039993918927377002": ["convolution_gpu_bfyx_os_iyx_osv16",438],
+        "632116056424249698": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12655099960717366198": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "13115589642140732066": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14994322266840011040": ["convolution_gpu_bfyx_gemm_like",1],
+        "9040046051053703359": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "12071914115316550349": ["convolution_gpu_bfyx_os_iyx_osv16",346],
+        "10170577772376890221": ["convolution_gpu_bfyx_gemm_like",1],
+        "16863960779539003201": ["convolution_gpu_bfyx_os_iyx_osv16",644],
+        "17207560805775399864": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4156384238797998294": ["convolution_gpu_bfyx_os_iyx_osv16",273],
+        "2362092095402043749": ["convolution_gpu_bfyx_gemm_like",2],
+        "1332624116953483870": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "16076153317792960383": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "77073286362822723": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "10302338806536775954": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "9192665896782282996": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "17421991623849671076": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11862259122805366807": ["fully_connected_gpu_fb_io_b8_f8_vload",1],
+        "17358006976602795707": ["convolution_gpu_bfyx_gemm_like",2],
+        "15239764240622554314": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "2877521658768725103": ["convolution_gpu_bfyx_gemm_like",2],
+        "12843671306854567956": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "981733129438741439": ["convolution_gpu_bfyx_os_iyx_osv16",339],
+        "4014667229872705228": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3286629188347536485": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "16711955423531846725": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "9454146598828084176": ["convolution_gpu_bfyx_os_iyx_osv16",750],
+        "16294825599850364701": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "8916983923551808409": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "8762901342272872498": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "9601412379897937608": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "16158139166784964096": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "4833749391314748606": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2128376438627103433": ["convolution_gpu_bfyx_gemm_like",1],
+        "11031569203645035546": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "16758962840329202004": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "8818070832398055086": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "16589607587365212240": ["convolution_gpu_bfyx_gemm_like",1],
+        "12962552332511702682": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "16036386660666696362": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "3934290309368153435": ["fully_connected_gpu_bf_io_gemm",0],
+        "5242271874488296527": ["convolution_gpu_bfyx_gemm_like",1],
+        "13124342334495538095": ["convolution_gpu_bfyx_gemm_like",1],
+        "1375156980278317418": ["convolution_gpu_bfyx_gemm_like",2],
+        "12415368596357091523": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "5077214229434392730": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "17546566148752689536": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "11446745541571732900": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7104756264011682902": ["convolution_gpu_bfyx_gemm_like",1],
+        "17303408650780384587": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "5912451559447635837": ["convolution_gpu_bfyx_os_iyx_osv16",1033],
+        "12894240573737168362": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "949330876419581703": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "8567667881970262923": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "7104309382120208659": ["convolution_gpu_bfyx_gemm_like",1],
+        "12952980509662451384": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "11670430946096342056": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "3963106895592011725": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "11428599290755097395": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "582360460084115077": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",570],
+        "11207257238719531888": ["convolution_gpu_bfyx_gemm_like",1],
+        "16361932270527364507": ["convolution_gpu_bfyx_os_iyx_osv16",651],
+        "15786328370300803713": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "8505040075968411726": ["convolution_gpu_bfyx_gemm_like",1],
+        "6419580456182610836": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "5115134711994944288": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "7727001441358508665": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "12014527187730671229": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "804195263636995800": ["convolution_gpu_bfyx_gemm_like",2],
+        "2502125887857336825": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "8300655194765375060": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "8431759922045602848": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "9481675228591993785": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "13927671398099556854": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8614534946699754256": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "3037042229494600258": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "17123463568694499533": ["convolution_gpu_bfyx_gemm_like",2],
+        "6328802691680458752": ["convolution_gpu_bfyx_os_iyx_osv16",643],
+        "11910735867274493498": ["convolution_gpu_bfyx_gemm_like",2],
+        "5853697372844744672": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "5091558853871982858": ["convolution_gpu_bfyx_gemm_like",2],
+        "4531222427159927606": ["convolution_gpu_bfyx_gemm_like",2],
+        "8409488188696700816": ["convolution_gpu_bfyx_gemm_like",1],
+        "3063055767192991776": ["convolution_gpu_bfyx_gemm_like",2],
+        "6708349666663292171": ["fully_connected_gpu_fb_oi_ref",1],
+        "17037462814585846902": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "16393176054374397767": ["convolution_gpu_bfyx_gemm_like",1],
+        "8257103926661643451": ["convolution_gpu_bfyx_os_iyx_osv16",275],
+        "4640028527711211109": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "14352796912241296357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "6355395905401306995": ["convolution_gpu_bfyx_gemm_like",2],
+        "1155389358857780776": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "9040145293899470160": ["convolution_gpu_bfyx_os_iyx_osv16",309],
+        "287386909600391846": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "14070988879848388270": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15901675909820977223": ["convolution_gpu_bfyx_os_iyx_osv16",754],
+        "3070859615622845671": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "12223993560805441284": ["convolution_gpu_bfyx_gemm_like",2],
+        "4408772370026995920": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "5433618404351968121": ["convolution_gpu_bfyx_gemm_like",1],
+        "4652136280940317116": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "13540002981450186147": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "6727930402459775131": ["convolution_gpu_bfyx_gemm_like",2],
+        "2552187713769926425": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "11507538232733291666": ["convolution_gpu_bfyx_os_iyx_osv16",861],
+        "14006248791647711759": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "10405183426600618231": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "18277685132620834972": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "7605139219344415117": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "10169992769527680821": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "6329618009202266591": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "10747988576436391912": ["convolution_gpu_bfyx_gemm_like",1],
+        "2964705957088952872": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "13816104794723484993": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "17006133396401462698": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "265124365266629363": ["convolution_gpu_bfyx_os_iyx_osv16",803],
+        "14811603003184578943": ["convolution_gpu_bfyx_gemm_like",2],
+        "11913020016435860608": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "14973431782875808802": ["convolution_gpu_bfyx_gemm_like",2],
+        "10797908931694274013": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "4625107584562815965": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "16706121580364790904": ["convolution_gpu_bfyx_gemm_like",2],
+        "10323345824599612614": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "10554266898346470422": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "423221712829930726": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "6620782733027313312": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "15579919505002150556": ["convolution_gpu_bfyx_gemm_like",1],
+        "11709992724966310174": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "2242915551775617989": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "2534408579674556441": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12932635875905153141": ["convolution_gpu_bfyx_gemm_like",2],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "1907439276166837309": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "10267260789603562117": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "1107027047188366075": ["convolution_gpu_bfyx_os_iyx_osv16",1],
+        "3603706453982734995": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12308359047798183133": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "18337160891834020517": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "7548031489690889629": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "12393385058735194260": ["convolution_gpu_bfyx_gemm_like",1],
+        "10591379189397010097": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "17564338309805484464": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "17854578307286932628": ["convolution_gpu_bfyx_gemm_like",2],
+        "8528750110601691390": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6714886136800883594": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "15636128989267984459": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7903891232234389925": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "15035800097152337587": ["convolution_gpu_bfyx_gemm_like",1],
+        "2452226948562393335": ["convolution_gpu_bfyx_os_iyx_osv16",396],
+        "13898284586432291433": ["convolution_gpu_bfyx_gemm_like",1],
+        "10679760989906275129": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "15968821946892330559": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "2638131332283395057": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9076758673133996959": ["convolution_gpu_bfyx_gemm_like",2],
+        "14878347463243157447": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "17419874083634480896": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "9111988592015450418": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "743941460026466526": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "18062849937960759210": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "14211903923555028634": ["convolution_gpu_bfyx_os_iyx_osv16",568],
+        "9091110033424983286": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "3830703844770425343": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "1628593159980574595": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "14910223536998380801": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "6410682026872155392": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "16559140502701231107": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17825280904760131680": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "3522383297921565178": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "1074748462756364699": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "13809898858049445969": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "3032921857841371728": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2344498602308448450": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "3240102173773280414": ["convolution_gpu_bfyx_1x1",2],
+        "2406816735581074778": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6087091876057515304": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8965747921518186477": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "14352303529756685990": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "1287490919205560806": ["convolution_gpu_bfyx_os_iyx_osv16",197],
+        "16744011463988595802": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12277470820821378855": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "1997392406402548974": ["convolution_gpu_bfyx_os_iyx_osv16",226],
+        "11560634267092054110": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "9383182168277796969": ["convolution_gpu_bfyx_gemm_like",2],
+        "1398177377739338750": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "273242667845386507": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "12348135936862667024": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "12228963567837353733": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "14906458674793172507": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "15201438563802430490": ["fully_connected_gpu_fb_oi_ref",1],
+        "15882969506682501496": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "16014822406751503249": ["convolution_gpu_bfyx_os_iyx_osv16",308],
+        "13314092088416047551": ["fully_connected_gpu_fb_oi_ref",2],
+        "15823825508128158158": ["convolution_gpu_bfyx_gemm_like",2],
+        "17147293671640396193": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "726985753660756762": ["convolution_gpu_bfyx_gemm_like",2],
+        "2788116002380533417": ["convolution_gpu_bfyx_gemm_like",2],
+        "13320675959188615441": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "4701832665603867798": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "6996376303337512293": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "10982526068861394162": ["convolution_gpu_yxfb_yxio_b16",0],
+        "15770767768674603174": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "7304346312452588844": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "4670443882075998209": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "16998508915819714690": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "16467987800266816984": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "4073467095502162430": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "1559798212423183813": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "4163359403543480821": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "14213516751025324346": ["convolution_gpu_bfyx_gemm_like",2],
+        "768720470104458759": ["convolution_gpu_bfyx_os_iyx_osv16",1025],
+        "7575634241190730697": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "11565861421381730304": ["convolution_gpu_bfyx_gemm_like",2],
+        "13676654389512816868": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "10085059621136526248": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "13170441257780067955": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "5735608687257018419": ["convolution_gpu_bfyx_os_iyx_osv16",306],
+        "13954821927253849036": ["convolution_gpu_bfyx_gemm_like",2],
+        "15600841108426475615": ["convolution_gpu_yxfb_yxio_b16",0],
+        "3480732841490521799": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "14738573151275130683": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "6800893510381991731": ["convolution_gpu_bfyx_os_iyx_osv16",277],
+        "1774158624592967937": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11031358859656806724": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "10916647716124396856": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13204120207726209723": ["fully_connected_gpu_bf_io_gemm",2],
+        "13558687084677943158": ["convolution_gpu_bfyx_os_iyx_osv16",988],
+        "9101334153142718004": ["convolution_gpu_bfyx_gemm_like",2],
+        "7755177205197405275": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "10187930930336324253": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "14418429155823196539": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "13831458435772917577": ["convolution_gpu_bfyx_gemm_like",2],
+        "18386376129938707290": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "12247991248100147706": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "1237920404306733800": ["convolution_gpu_bfyx_gemm_like",1],
+        "18008552719153887303": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "10196332102593337214": ["convolution_gpu_bfyx_gemm_like",1],
+        "17285815901490707654": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "10883341041912056319": ["convolution_gpu_bfyx_os_iyx_osv16",233],
+        "17179609670678746034": ["convolution_gpu_bfyx_gemm_like",2],
+        "15126660425728872065": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5658664813683907476": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "17089801601582809764": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "282274448389888221": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "9438739171104456179": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "14630499010941056793": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "12949204491386872217": ["convolution_gpu_bfyx_os_iyx_osv16",424],
+        "5524218746051008792": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "10917498758625273194": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "17764033613416389758": ["convolution_gpu_bfyx_gemm_like",2],
+        "4474697990228400564": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8788703258318141635": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "5740738339752793113": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "12987636957813312667": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7818381040882768404": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "5657471280535146301": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18377298651236993830": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "12725675221990905186": ["convolution_gpu_bfyx_gemm_like",2],
+        "2103882464623009432": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9714764457768279762": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "12388375914105990324": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "3234107167862677811": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "2490155559809645659": ["convolution_gpu_bfyx_os_iyx_osv16",701],
+        "8739347545059610410": ["convolution_gpu_bfyx_gemm_like",2],
+        "5948701218437980356": ["convolution_gpu_bfyx_gemm_like",1],
+        "6928835003016610382": ["convolution_gpu_bfyx_gemm_like",2],
+        "3325727286860556323": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "17908444616754154471": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "11315238071192463859": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "570683988452622223": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "16441830491664937048": ["convolution_gpu_bfyx_gemm_like",2],
+        "17847109385592002207": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "7209217811135076623": ["convolution_gpu_bfyx_gemm_like",1],
+        "16490405739040977260": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "4217179485243909459": ["convolution_gpu_bfyx_gemm_like",1],
+        "14885109535362957947": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "11756650366229979428": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "4403753181729432604": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "8990561333549136048": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "12098146032672599222": ["convolution_gpu_bfyx_os_iyx_osv16",944],
+        "13338594271376045657": ["convolution_gpu_bfyx_gemm_like",2],
+        "1270307036687208396": ["convolution_gpu_bfyx_gemm_like",1],
+        "9737833587413114584": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "12534001599784153836": ["convolution_gpu_bfyx_os_iyx_osv16",1041],
+        "12391792381149655331": ["convolution_gpu_bfyx_gemm_like",0],
+        "17636500109629107732": ["convolution_gpu_bfyx_os_iyx_osv16",1099],
+        "2738256633362038820": ["convolution_gpu_bfyx_gemm_like",2],
+        "8618835732380720921": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "15287650965861631130": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "10090036431487700311": ["convolution_gpu_bfyx_gemm_like",2],
+        "16327433707667075261": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "3106911159524421371": ["convolution_gpu_bfyx_os_iyx_osv16",528],
+        "3244675355773468991": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "296142385116663420": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "2038505773698938555": ["fully_connected_gpu_bf_io_gemm",1],
+        "5857101685300045443": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16",198],
+        "9152433123828445089": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "11007944497812650617": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "5940337324384948573": ["convolution_gpu_bfyx_gemm_like",2],
+        "16108759090923335184": ["convolution_gpu_bfyx_gemm_like",2],
+        "17808913959977434594": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "15675968397825708285": ["convolution_gpu_bfyx_os_iyx_osv16",251],
+        "17746215841755337461": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13489318651148001664": ["convolution_gpu_bfyx_gemm_like",1],
+        "18381791065890314250": ["convolution_gpu_bfyx_gemm_like",0],
+        "15426960908024585800": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "822162932339827810": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "6351347283201596793": ["convolution_gpu_bfyx_os_iyx_osv16",97],
+        "18251360413872841969": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "9780938731831129283": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "13851240591038949807": ["convolution_gpu_bfyx_gemm_like",2],
+        "14472187692485966933": ["convolution_gpu_bfyx_os_iyx_osv16",860],
+        "10136369729388564720": ["convolution_gpu_bfyx_gemm_like",2],
+        "59356084516953804": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "8002233052700666718": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "5522698342845820411": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "13839116996827687373": ["convolution_gpu_bfyx_gemm_like",1],
+        "937159502066696999": ["convolution_gpu_bfyx_gemm_like",1],
+        "14484890926084856480": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "10536316961655703500": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "5381354625969068789": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2618108630886857741": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8108933468437926367": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "15351724241036614758": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "12160764253455777655": ["convolution_gpu_bfyx_gemm_like",2],
+        "9226912483632588371": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "10309504812060596568": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3892679716763161057": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3538679039078582272": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "852015206582470545": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "1908809004094565452": ["convolution_gpu_bfyx_os_iyx_osv16",134],
+        "7333511810266504718": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "12985650543127289023": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "5057534502588100071": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "3432296808755992670": ["convolution_gpu_bfyx_gemm_like",2],
+        "15759530339367380982": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "8317673282128335201": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "13546876216568825877": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "1617135706549276688": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "1207026216972160297": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "5295693108687178880": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "18034648276860485300": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11828175723996627443": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "11459784003592366395": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3498490999014554104": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "2891736961665476908": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "5321698540631249776": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "6345550009198921347": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "2939605281692583169": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "9323825370872655346": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15485701086886851362": ["convolution_gpu_bfyx_os_iyx_osv16",187],
+        "10879218241103462088": ["convolution_gpu_bfyx_gemm_like",0],
+        "3062101811226530720": ["convolution_gpu_bfyx_os_iyx_osv16",148],
+        "84595904778810418": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "18035673326929466074": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5419041493176804960": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "15661322183507404821": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "9775648000771985077": ["convolution_gpu_yxfb_yxio_b16",1],
+        "38736266675995457": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "187352687850707150": ["convolution_gpu_bfyx_gemm_like",2],
+        "16487774205195979355": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "2052712465925238009": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "12129572274423886770": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "3012566432840424198": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "16071723603031305677": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "5269172622193124300": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "5214654427283761256": ["convolution_gpu_bfyx_gemm_like",2],
+        "14681717813022425567": ["convolution_gpu_bfyx_gemm_like",1],
+        "994182747184593564": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "4992668316921598993": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "16547425454653232058": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "9148379585489720669": ["convolution_gpu_bfyx_os_iyx_osv16",479],
+        "18424912460022156378": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "1170380397764345558": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "12757611260347801001": ["convolution_gpu_bfyx_os_iyx_osv16",307],
+        "17796310681498690253": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "16945184617367657570": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "12892693137085610062": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "1930929857644673460": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "6294240435687565243": ["convolution_gpu_bfyx_os_iyx_osv16",989],
+        "3341302541468955849": ["convolution_gpu_bfyx_gemm_like",1],
+        "3159681096461848644": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "7208008921815475393": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "11164519756679631743": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "15123868617509445149": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "12956726277674279950": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "7570346182940928159": ["convolution_gpu_bfyx_gemm_like",2],
+        "14011124615649605281": ["convolution_gpu_bfyx_os_iyx_osv16",51],
+        "15101680837342453931": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "875296362957469305": ["convolution_gpu_bfyx_gemm_like",1],
+        "3041752019114501584": ["convolution_gpu_bfyx_os_iyx_osv16",504],
+        "1383899865465106141": ["convolution_gpu_bfyx_gemm_like",1],
+        "12245096462203481681": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "11850332373794932468": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "1905758333157310570": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "4840004190985490064": ["convolution_gpu_bfyx_gemm_like",2],
+        "8048617952947915835": ["convolution_gpu_bfyx_gemm_like",2],
+        "879005904827468163": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "13709111882513486557": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "12174571114411168588": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "3737552767159920174": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "8857763129101380288": ["convolution_gpu_bfyx_gemm_like",2],
+        "6343396486660315308": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3752171257634205726": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "13046322179198317310": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "17830290099875088207": ["convolution_gpu_bfyx_gemm_like",1],
+        "2832268621630415376": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "3974589991022739479": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "5656623709782744241": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "281287280558289393": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "8708643228914766202": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "5770286476124511234": ["convolution_gpu_bfyx_gemm_like",1],
+        "17790026124881397912": ["fully_connected_gpu_fb_io_ref",1],
+        "11587239927319376658": ["convolution_gpu_bfyx_gemm_like",2],
+        "9328223957245552723": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2108296560864415762": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "5754844816339228920": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "8803037667261582905": ["convolution_gpu_bfyx_gemm_like",1],
+        "5197105253412476591": ["convolution_gpu_bfyx_gemm_like",2],
+        "4085450203909854919": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13990028451169604107": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15820359925623438341": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",0],
+        "13951717514084457087": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "11455843788148231615": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "2543995971214089085": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "11723735945517472199": ["convolution_gpu_bfyx_os_iyx_osv16",673],
+        "18233660940545931789": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "11198908896401597838": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "654122557966242717": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "13455881643467418059": ["convolution_gpu_bfyx_gemm_like",1],
+        "1551596771935253711": ["convolution_gpu_bfyx_gemm_like",1],
+        "4325081100430903742": ["convolution_gpu_bfyx_gemm_like",1],
+        "5211831143687501130": ["convolution_gpu_bfyx_gemm_like",1],
+        "16661843849495077745": ["convolution_gpu_bfyx_os_iyx_osv16",988],
+        "14435120971846098308": ["convolution_gpu_bfyx_os_iyx_osv16",1049],
+        "5766507688771440170": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5389189982064081933": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "13890118723041457532": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "123251351612308092": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "13237050834496100264": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "1071007164550012186": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "1841155673858789206": ["fully_connected_gpu_fb_oi_ref",2],
+        "14403132596827435096": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "6290317420155851465": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17833517350994024381": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "1018687388655376483": ["convolution_gpu_bfyx_gemm_like",1],
+        "13898821685774165645": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "2305706332728008948": ["convolution_gpu_bfyx_gemm_like",2],
+        "18267428053198215471": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "2609454334520044465": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "2662628817605495834": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "3633858263279042265": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "2128612971571865547": ["convolution_gpu_bfyx_gemm_like",2],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "17585206779958265260": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "7712831597869354170": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "7650375560336513366": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9319254979377483709": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "18180655791734632264": ["convolution_gpu_bfyx_gemm_like",2],
+        "5568753513029409478": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "1697248235682953135": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "17343050785312683560": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "7447163906170805189": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "10414903047695486119": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "482564204402769504": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "7869916853707978306": ["convolution_gpu_bfyx_os_iyx_osv16",834],
+        "12914986936318857086": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "14616969385577243225": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "1410630713443793537": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "15678385128478075284": ["convolution_gpu_bfyx_gemm_like",2],
+        "13699740641705514374": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "7802311886554362782": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "13471752029049484143": ["convolution_gpu_bfyx_gemm_like",1],
+        "5245526691775741296": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "577844026691991089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15689502054035168040": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "54019631544204590": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16925721317097534009": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "18136765667969393174": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "9475130054420979752": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "18092842590142527927": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "12380856644683171627": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17881905640473324965": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6340128090694375876": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "8709632541892447149": ["convolution_gpu_bfyx_gemm_like",2],
+        "3118602494449249177": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "12004552919019936392": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "14217181622713951411": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "2777318471329665162": ["convolution_gpu_bfyx_gemm_like",2],
+        "2806529556090896246": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "7708321360699824256": ["convolution_gpu_bfyx_gemm_like",1],
+        "4216958486055161753": ["convolution_gpu_bfyx_gemm_like",2],
+        "6713985030102340818": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "4429109491655891299": ["convolution_gpu_bfyx_gemm_like",1],
+        "3217574161785059951": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "3281207855459771997": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "12773693193167844110": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "17238880534517721334": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "2968439898708528834": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "12866217660635921034": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17224181038411430675": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "5352061583962489055": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14749947225382670869": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "11666250400445971335": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "5047419871737940985": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "9096495972770198040": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17912189681971987483": ["convolution_gpu_bfyx_os_iyx_osv16",274],
+        "11848462434662954749": ["convolution_gpu_bfyx_os_iyx_osv16",894],
+        "10743138314323119696": ["convolution_gpu_bfyx_gemm_like",2],
+        "1680468564927032670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11657946392097042544": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "3755253206085028904": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "6982733543386888622": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "10071449674652717890": ["convolution_gpu_bfyx_gemm_like",1],
+        "15669490019428002270": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "4137755981477177003": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "16511393582666965704": ["convolution_gpu_bfyx_os_iyx_osv16",654],
+        "10412588668458621135": ["convolution_gpu_bfyx_os_iyx_osv16",752],
+        "7223801044761006523": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "11632275875447013409": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12545558125736154584": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "1040030752340209480": ["convolution_gpu_bfyx_os_iyx_osv16",339],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "7852144838267007144": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "7206226541369793931": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13282951481330978659": ["convolution_gpu_bfyx_gemm_like",2],
+        "475043738497218394": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "360872770877634346": ["convolution_gpu_bfyx_gemm_like",1],
+        "5479761740065152589": ["convolution_gpu_bfyx_gemm_like",2],
+        "11988285441493553006": ["convolution_gpu_bfyx_gemm_like",2],
+        "4346591404756288097": ["convolution_gpu_bfyx_gemm_like",0],
+        "2937907409658060025": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "2995134938466176198": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "12473600360154597915": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "4387041763614917736": ["convolution_gpu_bfyx_gemm_like",1],
+        "7916244303189113815": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "7171904645566467208": ["convolution_gpu_bfyx_gemm_like",2],
+        "6902644989079870993": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "16190949264253468961": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "12165079289914715018": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "4342360467977736802": ["convolution_gpu_bfyx_gemm_like",2],
+        "16683089431066989909": ["convolution_gpu_bfyx_gemm_like",2],
+        "7354234812009979811": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "4862529593282936100": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "15322609677356616580": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "8712136292276123857": ["convolution_gpu_bfyx_gemm_like",2],
+        "8819268903800581706": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "10100237101982273901": ["convolution_gpu_bfyx_os_iyx_osv16",238],
+        "14077148976508649021": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "11830297960718214360": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "6719302427415173754": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "4142978475842207311": ["convolution_gpu_bfyx_gemm_like",2],
+        "6213353364768643062": ["convolution_gpu_bfyx_gemm_like",2],
+        "13842309033760176194": ["convolution_gpu_bfyx_gemm_like",2],
+        "9259437778054905599": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "5245308722062496788": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "72444706264681262": ["convolution_gpu_bfyx_os_iyx_osv16",944],
+        "10424278617647597641": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "17556238490521153146": ["convolution_gpu_bfyx_os_iyx_osv16",424],
+        "7271236108345900406": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14826791706471872785": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15963038745470172423": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "16108573960501496757": ["convolution_gpu_bfyx_gemm_like",2],
+        "14079654309452583394": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7869779894480025247": ["convolution_gpu_bfyx_gemm_like",0],
+        "14397348576352573007": ["convolution_gpu_bfyx_gemm_like",1],
+        "4264284648458489052": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "7279393739634103483": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "9440117898128288296": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "8906588133431586825": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "18017913952946745878": ["convolution_gpu_bfyx_gemm_like",2],
+        "3806131437010910920": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "9454954846682513038": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "11682323163346544125": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "7565867291827884997": ["convolution_gpu_bfyx_gemm_like",1],
+        "4491380839102267034": ["convolution_gpu_bfyx_gemm_like",1],
+        "7585184325339753737": ["convolution_gpu_bfyx_os_iyx_osv16",238],
+        "4313392430539923574": ["convolution_gpu_bfyx_os_iyx_osv16",235],
+        "6090625728451718945": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6219075471508685758": ["convolution_gpu_bfyx_gemm_like",2],
+        "16969463538496570528": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "5795073619189010837": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "4161612746310931789": ["convolution_gpu_bfyx_gemm_like",1],
+        "4444730303823507621": ["convolution_gpu_bfyx_gemm_like",0],
+        "760687670112194844": ["convolution_gpu_bfyx_os_iyx_osv16",726],
+        "3515437649977762166": ["convolution_gpu_bfyx_gemm_like",0],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "14671212883301405408": ["convolution_gpu_bfyx_gemm_like",1],
+        "17113350507039887381": ["convolution_gpu_bfyx_gemm_like",2],
+        "13054405729329143152": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "10173283505468233128": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "9421643783312790618": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "11873734271080160669": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "14359530849521980269": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "7183578232279711009": ["convolution_gpu_bfyx_gemm_like",2],
+        "8101977280003030465": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "12022152681602871455": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "11530101016435264783": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "9525853014023664813": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "1318571118468536310": ["convolution_gpu_bfyx_gemm_like",2],
+        "2114599010013594942": ["convolution_gpu_bfyx_gemm_like",2],
+        "288853243482418538": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "12860222041026638681": ["convolution_gpu_bfyx_gemm_like",2],
+        "15589007878875898942": ["convolution_gpu_bfyx_os_iyx_osv16",277],
+        "7082007579524697455": ["convolution_gpu_bfyx_gemm_like",2],
+        "8656468860180713379": ["convolution_gpu_bfyx_os_iyx_osv16",108],
+        "17599396373608265826": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "1704404203639481753": ["convolution_gpu_bfyx_gemm_like",2],
+        "8792010676469476740": ["convolution_gpu_bfyx_gemm_like",1],
+        "9101018613418825655": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17225578855755054959": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "11115684531624462986": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "2440366541074371090": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "17252589865292797082": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "11265472910579659280": ["convolution_gpu_bfyx_gemm_like",1],
+        "10978173291465325823": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "5319668297345215520": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "13954144830230671601": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "1212319037405620223": ["convolution_gpu_bfyx_gemm_like",2],
+        "2251029128552117936": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "9048522050692986204": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "8881135571874888085": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "12242618640422208652": ["convolution_gpu_bfyx_os_iyx_osv16",302],
+        "14695781272831602408": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "10914921540144371519": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14346703182362139650": ["convolution_gpu_bfyx_gemm_like",2],
+        "4550028191070279999": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "11077876432364512822": ["fully_connected_gpu_bf_io_input_spatial",4],
+        "16896833230469488924": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "10157866834809927320": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "9529614587861271730": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "10573920781439771673": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "16247399911710810038": ["convolution_gpu_bfyx_gemm_like",1],
+        "3106710091841093202": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "12871555773123368130": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3723613341885592267": ["convolution_gpu_bfyx_os_iyx_osv16",378],
+        "14487842225000203929": ["convolution_gpu_bfyx_gemm_like",2],
+        "4165036357594592683": ["convolution_gpu_bfyx_gemm_like",2],
+        "16820082917500285799": ["convolution_gpu_bfyx_gemm_like",2],
+        "18137106379929135901": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6491244517639245276": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "7370273921473161914": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "12990527753120735255": ["convolution_gpu_bfyx_gemm_like",2],
+        "2226745622763268469": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "13991205023798493715": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "1934379409955686502": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "1596353239542510685": ["convolution_gpu_bfyx_gemm_like",1],
+        "14985236276429954162": ["convolution_gpu_bfyx_gemm_like",0],
+        "5649082203775427830": ["convolution_gpu_bfyx_gemm_like",2],
+        "7000524935770116969": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "2969389503332309296": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "10880081193716628051": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "15133468875250992696": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5688478347124565305": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "13472532612464340803": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "17219920118109316867": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "14335423820860953927": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8439950151963452285": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "15908673392788376468": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "9213563311267466388": ["convolution_gpu_bfyx_os_iyx_osv16",191],
+        "7524311370696987092": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "3511588484597779204": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "1116274074896622552": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2893564501191050837": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "12813978452097969536": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "2722124265986526212": ["convolution_gpu_bfyx_gemm_like",2],
+        "15486917753097743853": ["convolution_gpu_bfyx_1x1",2],
+        "15118142492742177336": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "597073780328219388": ["convolution_gpu_bfyx_gemm_like",2],
+        "1500571771538985941": ["convolution_gpu_bfyx_os_iyx_osv16",51],
+        "4437258459981739942": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "872401732136570312": ["convolution_gpu_bfyx_gemm_like",2],
+        "5965451243366505522": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1120455113299469776": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "597650904461183283": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "4282661608732125403": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "15412447128995361859": ["convolution_gpu_bfyx_gemm_like",1],
+        "18431306649860116380": ["convolution_gpu_bfyx_gemm_like",2],
+        "11398019086259011063": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "8057302050645780813": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11356842300444410831": ["convolution_gpu_bfyx_os_iyx_osv16",754],
+        "16683169947375504066": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",1],
+        "9340159617983543624": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "8757900457181374694": ["convolution_gpu_bfyx_gemm_like",1],
+        "10398572248321217585": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8769060267707904998": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15489746763312425915": ["convolution_gpu_bfyx_gemm_like",0],
+        "3541538046227217664": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2047041720569246861": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "6531171505861182429": ["convolution_gpu_bfyx_os_iyx_osv16",758],
+        "3219408878901707426": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7375461241315602473": ["convolution_gpu_bfyx_gemm_like",2],
+        "190530884420224257": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "16383540667048742064": ["convolution_gpu_bfyx_gemm_like",1],
+        "15851356529373376076": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "3265415000818832667": ["convolution_gpu_bfyx_gemm_like",1],
+        "10293186062391000719": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "9542325095876448686": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3699344686791530101": ["convolution_gpu_bfyx_gemm_like",2],
+        "2710485608298356329": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11031625790234068916": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "16587061389996963349": ["convolution_gpu_bfyx_os_iyx_osv16",988],
+        "1434535531617424039": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "8500148569566077929": ["convolution_gpu_bfyx_os_iyx_osv16",1035],
+        "2100891581797371600": ["convolution_gpu_bfyx_os_iyx_osv16",1071],
+        "178353385245384751": ["convolution_gpu_bfyx_gemm_like",2],
+        "15284262113150488297": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "10930115765550856328": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "3644282167178264526": ["convolution_gpu_bfyx_gemm_like",2],
+        "2651385050387738902": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "9277176009071334860": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "8751016391945753900": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "18173314625562011976": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9631545863582097486": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "1843555260471832708": ["convolution_gpu_bfyx_gemm_like",1],
+        "17344974951998490453": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "787363431787954804": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "5498839261395459224": ["convolution_gpu_bfyx_gemm_like",2],
+        "4476928353532757380": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "2622434279674583815": ["convolution_gpu_bfyx_gemm_like",1],
+        "1309867416606346543": ["convolution_gpu_bfyx_os_iyx_osv16",1029],
+        "15997754881872769378": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "7132328255408635227": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "7162575953766465459": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "481328129206881674": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "7274179284676568361": ["convolution_gpu_bfyx_os_iyx_osv16",236],
+        "12676167240795292217": ["convolution_gpu_bfyx_gemm_like",0],
+        "11619548409913646265": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11814740669468421049": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "8951040603784899163": ["convolution_gpu_bfyx_gemm_like",2],
+        "1033385936344875354": ["convolution_gpu_bfyx_gemm_like",2],
+        "9751582946441607796": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "8701248964531180496": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "17512961503976896701": ["convolution_gpu_bfyx_os_iyx_osv16",1059],
+        "9762182215179534181": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "15277856047844308598": ["convolution_gpu_bfyx_gemm_like",2],
+        "15349944413643626251": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "16243196137456624852": ["convolution_gpu_bfyx_gemm_like",2],
+        "4084106758501882407": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "8121179472578287280": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "4304041922043496030": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "5311718276151327830": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "12141300895511301068": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "3383222668132648804": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12493863403516600413": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "14458851250685872417": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "8444259010311137762": ["convolution_gpu_bfyx_gemm_like",2],
+        "10647227605517025377": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "12990341489637414845": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "2198278382394812839": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "5687802882700097624": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "318377908569897093": ["convolution_gpu_bfyx_gemm_like",1],
+        "17948637243158994878": ["convolution_gpu_bfyx_gemm_like",2],
+        "18269685060032395235": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "2150326211917340956": ["convolution_gpu_bfyx_gemm_like",1],
+        "10635659193402005820": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15031155621982459860": ["convolution_gpu_bfyx_gemm_like",2],
+        "13869716373706247686": ["convolution_gpu_bfyx_gemm_like",0],
+        "13119479079474639169": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "4533786844080178561": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "548663565933738403": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "9404953235624894187": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "14043770215999952932": ["convolution_gpu_bfyx_gemm_like",2],
+        "2968094709908141988": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "1819720745131968914": ["convolution_gpu_bfyx_gemm_like",2],
+        "4084026445911476156": ["convolution_gpu_bfyx_os_iyx_osv16",613],
+        "3524531620118359828": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "7397341452130124383": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "6418327009347170687": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "4867937397499803072": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "4523064418696274869": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "10049571207493913006": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "3039528482572243879": ["convolution_gpu_bfyx_os_iyx_osv16",238],
+        "16677044352793659175": ["convolution_gpu_bfyx_gemm_like",1],
+        "13708979487306970634": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "13477416097954638887": ["fully_connected_gpu_bf_io_gemm",1],
+        "11604111639041106489": ["convolution_gpu_bfyx_os_iyx_osv16",724],
+        "16924006268301179157": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "11942736969933408358": ["convolution_gpu_bfyx_gemm_like",2],
+        "14398854364550406668": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "425744529089575241": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "586947787345351152": ["convolution_gpu_bfyx_gemm_like",1],
+        "5608133987357542077": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "9477562342190423343": ["convolution_gpu_bfyx_gemm_like",2],
+        "13379165253894817165": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "3499243120652875549": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "14050124896329573468": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12971822824884826169": ["convolution_gpu_bfyx_gemm_like",2],
+        "6391847213494189692": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14792528369891965810": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3563614453014995411": ["convolution_gpu_bfyx_os_iyx_osv16",918],
+        "12510951219501865365": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "17823133607491820214": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like",2],
+        "8616686489737649890": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "4428101657497677982": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "5352861363832390974": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "5074273865983613482": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1458615259705605525": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "11215297942420903101": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "17353894529222574441": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "5601435819039968726": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2527189070714658176": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "3438116423688595487": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "8929453032482114162": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "16504962609450876148": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "17517495652165026573": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "12417253210787537988": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "9488453013746383896": ["convolution_gpu_bfyx_gemm_like",2],
+        "9208964785762052001": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "7211355951470869591": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "5629670679897666607": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "8059328623525062913": ["convolution_gpu_bfyx_gemm_like",2],
+        "14515066741400300669": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "12031180482028822765": ["convolution_gpu_bfyx_gemm_like",2],
+        "2856601829807186494": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "4465701487417893814": ["convolution_gpu_bfyx_gemm_like",2],
+        "15879172437519876393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8295126647635181949": ["convolution_gpu_bfyx_gemm_like",2],
+        "16491532291908469567": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "2524029454785583409": ["convolution_gpu_bfyx_os_iyx_osv16",586],
+        "3759057398165607194": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "913496537924971856": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "593712935037568960": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "859377216693940737": ["convolution_gpu_bfyx_gemm_like",1],
+        "13312514874803986753": ["convolution_gpu_bfyx_os_iyx_osv16",310],
+        "9389555743403158574": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "712165731154577189": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "3509487327001107638": ["convolution_gpu_bfyx_gemm_like",2],
+        "16475247464223458061": ["convolution_gpu_bfyx_gemm_like",2],
+        "15188570678726970998": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "530491406341772040": ["convolution_gpu_bfyx_gemm_like",2],
+        "14025235562200209723": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "15728009639807698634": ["convolution_gpu_bfyx_os_iyx_osv16",614],
+        "4398371999113956082": ["convolution_gpu_bfyx_gemm_like",2],
+        "677249604491773387": ["convolution_gpu_bfyx_gemm_like",2],
+        "7469127846325904854": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "5497751772699578150": ["convolution_gpu_bfyx_gemm_like",1],
+        "17370051888730874220": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "4237276338897143680": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "1056009037551688122": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18172711677056449158": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "14686278683380845546": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15947699374684516369": ["convolution_gpu_bfyx_gemm_like",2],
+        "5926747396493954633": ["convolution_gpu_bfyx_gemm_like",2],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "14670068483447729857": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "8183383667948205424": ["convolution_gpu_yxfb_yxio_b16",0],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16",847],
+        "6830387121684699972": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "12972634653821069685": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "5749536453225343663": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "7953255701516490034": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13200151444914751729": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "7481256533438761028": ["convolution_gpu_bfyx_gemm_like",2],
+        "8576733135863336233": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "10128390168715530898": ["convolution_gpu_bfyx_gemm_like",2],
+        "15430549683839591544": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "8787438180071123604": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6614833247756539341": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "12461575861709234385": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6171845068913882721": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "16710651492402564794": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11727227430687227444": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "14491949194619001237": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "10429613013253088132": ["convolution_gpu_bfyx_gemm_like",2],
+        "14389915292223442327": ["convolution_gpu_bfyx_os_iyx_osv16",875],
+        "1230262279011217327": ["convolution_gpu_bfyx_gemm_like",1],
+        "17024388383581997032": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "13702692566238948173": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "11528417522960871233": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1411786954276574458": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "9545968464906009869": ["convolution_gpu_bfyx_gemm_like",1],
+        "12790788016297794214": ["convolution_gpu_bfyx_os_iyx_osv16",420],
+        "17729546848373991614": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "16582132711225619740": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12194037100109755112": ["convolution_gpu_bfyx_gemm_like",2],
+        "472454322186482185": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "11451740938287179908": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "16037141448095945650": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "1608378717397996752": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "16758697697363920520": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "11640225461345567929": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "11724225282274130518": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "3727142736386026852": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "11901740241052104941": ["convolution_gpu_bfyx_os_iyx_osv16",1031],
+        "16397733032387984819": ["convolution_gpu_bfyx_gemm_like",2],
+        "10384537928514123040": ["convolution_gpu_bfyx_gemm_like",2],
+        "13503608041359512": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "16527840366172690992": ["convolution_gpu_yxfb_yxio_b16",0],
+        "6129884455218252024": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "14151747022287993729": ["convolution_gpu_bfyx_gemm_like",2],
+        "17809920600993699808": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "18310667924071639899": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "17947818179123182001": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "11110173861174257158": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13713406612642090169": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "9019388470685749691": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "5219399418946822456": ["convolution_gpu_bfyx_gemm_like",2],
+        "13369603621524676979": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "709835724029986012": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "1801731858063091191": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "14420809655798184553": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "9999955037598579164": ["convolution_gpu_bfyx_os_iyx_osv16",1059],
+        "6574971185849732667": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "14114380593731243715": ["convolution_gpu_bfyx_os_iyx_osv16",3],
+        "9702618600245321109": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "13739257060165119132": ["convolution_gpu_bfyx_os_iyx_osv16",929],
+        "14810839157236175179": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "14068780861332616363": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "7692849839965441330": ["convolution_gpu_bfyx_gemm_like",2],
+        "17104611871050967957": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16833026567865627676": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "10309083227104422150": ["convolution_gpu_bfyx_os_iyx_osv16",888],
+        "12293786134765875615": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "5951936376654416075": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "3430266954211750407": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "5911282942658469852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11975047184326016230": ["convolution_gpu_bfyx_gemm_like",2],
+        "2632535010129224704": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "16748662918272106932": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7024495439434892956": ["convolution_gpu_bfyx_os_iyx_osv16",292],
+        "15197248015210313435": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17015328096102652908": ["convolution_gpu_bfyx_gemm_like",1],
+        "7106362077449435105": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "14711697456265712456": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "5060012838564094182": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "4806571630436601566": ["fully_connected_gpu_bf_io_input_spatial",4],
+        "8321769923556905957": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",2],
+        "6509758095668864050": ["convolution_gpu_bfyx_gemm_like",2],
+        "10058165874008941852": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "9584652777232392944": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "1139581213977408268": ["fully_connected_gpu_fb_io_ref",1],
+        "17854208422879910606": ["convolution_gpu_bfyx_gemm_like",2],
+        "6192955702438301372": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "9277610800970567810": ["convolution_gpu_bfyx_gemm_like",2],
+        "3141886504884887200": ["convolution_gpu_bfyx_gemm_like",1],
+        "4773123925616969670": ["convolution_gpu_bfyx_gemm_like",1],
+        "6310724136390087834": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "14916625550370402883": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13973028408397200796": ["convolution_gpu_bfyx_os_iyx_osv16",795],
+        "14122213471825630433": ["convolution_gpu_bfyx_gemm_like",2],
+        "9522661528867955338": ["convolution_gpu_bfyx_gemm_like",1],
+        "6071668124835539929": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15839295895890205274": ["convolution_gpu_bfyx_gemm_like",2],
+        "18302892230881285207": ["convolution_gpu_bfyx_gemm_like",1],
+        "5120466856097219243": ["convolution_gpu_bfyx_gemm_like",2],
+        "17281826959243966826": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "969746749329671447": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "12822126914959112382": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "3106922888635965020": ["convolution_gpu_bfyx_gemm_like",2],
+        "11455518069358829249": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "7172604084103519563": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "12278364834477923930": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "15739278428190392018": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7843508201826629532": ["convolution_gpu_bfyx_os_iyx_osv16",266],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "4197617702037834389": ["convolution_gpu_bfyx_gemm_like",1],
+        "8767817856303586064": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "380316849107383484": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16579057939215877904": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "5390559917122707732": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "6863331059471727622": ["convolution_gpu_bfyx_gemm_like",2],
+        "8132521728369930959": ["convolution_gpu_bfyx_gemm_like",2],
+        "9649445293567537596": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16601702334097258697": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "4021558014531645922": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "6222595759158615206": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7878605163588288309": ["convolution_gpu_bfyx_os_iyx_osv16",469],
+        "6263019986730305851": ["convolution_gpu_bfyx_os_iyx_osv16",857],
+        "11292995457386147494": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "13775529405693629438": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "9485825829394109934": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "17921973525603585874": ["convolution_gpu_bfyx_gemm_like",0],
+        "11324851661119942609": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "509781001842353609": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "13105192484434299621": ["convolution_gpu_bfyx_gemm_like",1],
+        "73865742350616903": ["convolution_gpu_bfyx_gemm_like",1],
+        "16446533347502650316": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "2261453441277654139": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "378801963103874857": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "18059267466971880386": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "5485971317082563152": ["convolution_gpu_bfyx_os_iyx_osv16",645],
+        "3926585856863002495": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "17915846724151945664": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "7264756313770306662": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like",2],
+        "16783619135298589974": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "1497560475414454618": ["convolution_gpu_bfyx_gemm_like",0],
+        "12198263593657033426": ["convolution_gpu_bfyx_os_iyx_osv16",1030],
+        "16364494883229084045": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "9840495023131952174": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14248239982355212178": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "16818714747882774917": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "8285478622349266483": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "10437367877444543776": ["convolution_gpu_bfyx_gemm_like",0],
+        "1563987925712579649": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "14767888121198814523": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "3792945601873900927": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "1201692134690347847": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "15914107501176673997": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "7546586420552408243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "8203171222962341018": ["convolution_gpu_bfyx_gemm_like",2],
+        "17170858505976681742": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "3034482898462686729": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "14784115394395151055": ["convolution_gpu_bfyx_gemm_like",2],
+        "13404888565084206853": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9065894438656900887": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "8140094412609934765": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "12214162812589030126": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "14001406016806064079": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14757749560543979231": ["convolution_gpu_bfyx_gemm_like",2],
+        "6772239376357727149": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "6398819277350155011": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "16426179645101678763": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "5308128387928804050": ["convolution_gpu_bfyx_gemm_like",2],
+        "3409043224171087168": ["convolution_gpu_bfyx_os_iyx_osv16",272],
+        "2625969259447793593": ["convolution_gpu_bfyx_1x1",2],
+        "8780604510524622314": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "12946531140050029900": ["convolution_gpu_bfyx_gemm_like",2],
+        "10058614204420018541": ["convolution_gpu_bfyx_os_iyx_osv16",379],
+        "9553032671453999824": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "2721793280965260548": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "17829047941256922307": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "6062246008880097669": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "13933912937625580405": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "11703557271443535142": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "5093049998173715787": ["convolution_gpu_bfyx_gemm_like",2],
+        "7777333052643961206": ["convolution_gpu_bfyx_os_iyx_osv16",645],
+        "13781423818051299677": ["convolution_gpu_bfyx_gemm_like",2],
+        "14431607479949498164": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "14174888981602932979": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "16474284418841532356": ["convolution_gpu_bfyx_gemm_like",2],
+        "7407975398526425554": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "12864558900883069118": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1643241486250690844": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "5738835498104275267": ["convolution_gpu_bfyx_os_iyx_osv16",277],
+        "5331173521406046122": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "5221320470007950766": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "10791067159964399241": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "3109104171383198425": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12696412964119109465": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "8100595788531468781": ["convolution_gpu_bfyx_os_iyx_osv16",754],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",2],
+        "14763982961176216679": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "4674416595144505741": ["convolution_gpu_bfyx_gemm_like",1],
+        "9455406830371528486": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "6204725118764552662": ["convolution_gpu_bfyx_gemm_like",1],
+        "13733327241591630239": ["convolution_gpu_bfyx_os_iyx_osv16",1],
+        "11092828091552833150": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "2986189945936592561": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "3988024997010367546": ["convolution_gpu_bfyx_os_iyx_osv16",684],
+        "3336303478756453360": ["convolution_gpu_bfyx_gemm_like",2],
+        "3750338655074082587": ["fully_connected_gpu_fb_io_ref",0],
+        "18232278892738147217": ["convolution_gpu_bfyx_os_iyx_osv16",673],
+        "17839839336294937155": ["convolution_gpu_bfyx_gemm_like",1],
+        "1999979442136861875": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "10482582307328548806": ["convolution_gpu_bfyx_os_iyx_osv16",755],
+        "4229105529069729944": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "6556424924189200804": ["convolution_gpu_bfyx_os_iyx_osv16",236],
+        "3872151366780051246": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5504757952698692953": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "9222744127882324405": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "4780291919667721265": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10792503079194374004": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "10890975553758439233": ["convolution_gpu_bfyx_gemm_like",2],
+        "17477062954520561609": ["convolution_gpu_bfyx_gemm_like",2],
+        "9513032457323269513": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "15530407024531326375": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "3860603464276263676": ["convolution_gpu_bfyx_gemm_like",1],
+        "17465517455679097501": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "7179714714302073459": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "5334190564423375247": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "3759515057574218101": ["convolution_gpu_bfyx_gemm_like",1],
+        "9731370183088819573": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14621327324047759584": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "12450814729547235386": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "11430400968543668873": ["convolution_gpu_bfyx_os_iyx_osv16",892],
+        "16541722316343690197": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "14535007186125575064": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "12190841837604350271": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6848989271874647093": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "2649948006897488504": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "17995371099806008878": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "6904130543085920483": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "10328182165125764988": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7264274394359484318": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "7786866732196451977": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "2044363708106765326": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18186615266760475767": ["convolution_gpu_bfyx_os_iyx_osv16",273],
+        "16522364268583242080": ["convolution_gpu_bfyx_gemm_like",2],
+        "13468713306678453952": ["convolution_gpu_bfyx_gemm_like",1],
+        "14991602704357959545": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "877436308867220589": ["convolution_gpu_bfyx_gemm_like",2],
+        "7232326270078161768": ["convolution_gpu_bfyx_gemm_like",2],
+        "6254141935545262078": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4282198629458668761": ["convolution_gpu_bfyx_gemm_like",1],
+        "15247381586316467097": ["convolution_gpu_bfyx_gemm_like",2],
+        "13855438905855887272": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "16084700435355748612": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "9947449295659685973": ["convolution_gpu_bfyx_gemm_like",0],
+        "17515573322312447679": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "11113256687741667688": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "8728178019712933221": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "17310332946322628458": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "13093429681061786539": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "6931953332823066530": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "14744368497944610864": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "18203935818408469865": ["convolution_gpu_bfyx_os_iyx_osv16",298],
+        "7807983899017500046": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "12977678792503377525": ["convolution_gpu_bfyx_gemm_like",1],
+        "11829442945690098558": ["convolution_gpu_bfyx_gemm_like",2],
+        "14230385851791760020": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "13856271274572142709": ["convolution_gpu_bfyx_gemm_like",1],
+        "6471563320494376693": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3774285301357006334": ["convolution_gpu_bfyx_gemm_like",1],
+        "13283842370311517843": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "13183380647506951324": ["convolution_gpu_bfyx_gemm_like",2],
+        "17377293745073971167": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "70580716590540876": ["convolution_gpu_bfyx_gemm_like",1],
+        "13644681270630373984": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "3499109651698979012": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "4408600136502382976": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "12207503176295152756": ["convolution_gpu_bfyx_1x1",2],
+        "17094948685292534952": ["convolution_gpu_bfyx_os_iyx_osv16",923],
+        "1938086876393565238": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17106086048442658788": ["convolution_gpu_bfyx_gemm_like",2],
+        "11308583200952256245": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "2235210915304938149": ["convolution_gpu_bfyx_gemm_like",2],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "541817615957967731": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "6980201892073961793": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "10783981060353445280": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "8354579049246302728": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "8943913562339525413": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7372956570616880244": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "17829148383265978140": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "4991419288164762786": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "17026284168840448378": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "7353563160591978243": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "9795194069954915563": ["convolution_gpu_bfyx_gemm_like",2],
+        "17178308105985812083": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9813748068195103720": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "1778345646142852816": ["convolution_gpu_bfyx_gemm_like",1],
+        "2683507674615735878": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "1076005730007872492": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "879939701282942121": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "10000618285883395700": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "17596685300497748803": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17798636687709019154": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "14729854278671832528": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "4588420324030315321": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "15943141845766932879": ["convolution_gpu_bfyx_1x1",2],
+        "17248756229500447131": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "7683334381958571864": ["convolution_gpu_bfyx_gemm_like",2],
+        "4265693151382066296": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "15163327502374403643": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "4561874206785244358": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "7975810844103449438": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "9839670675413379092": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "3167336012388169649": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "3503893875515897267": ["convolution_gpu_bfyx_gemm_like",0],
+        "3761770343527826418": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "18118237182023167949": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "419783127503173016": ["convolution_gpu_bfyx_os_iyx_osv16",897],
+        "10771803503544737080": ["convolution_gpu_bfyx_os_iyx_osv16",233],
+        "563440246018637010": ["convolution_gpu_yxfb_yxio_b16",0],
+        "3150231129728961455": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "16853250891250756537": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "12238674883388043717": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "15381833359831622179": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "5056859994174498686": ["convolution_gpu_bfyx_gemm_like",2],
+        "7757331094141318304": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "14104238386345631681": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "7351401242363888463": ["convolution_gpu_bfyx_gemm_like",2],
+        "11107930597263802755": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16986358655784856534": ["convolution_gpu_bfyx_gemm_like",2],
+        "13320828013530046693": ["convolution_gpu_bfyx_os_iyx_osv16",755],
+        "4672441137336208890": ["convolution_gpu_bfyx_gemm_like",2],
+        "5627834277145735283": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "4290840152278060614": ["convolution_gpu_bfyx_gemm_like",2],
+        "11583017348580874022": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "4226968857681929488": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "14680730265621679042": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "17635171685500922207": ["convolution_gpu_bfyx_os_iyx_osv16",237],
+        "8860815977851486767": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "14716719350966652036": ["convolution_gpu_bfyx_gemm_like",1],
+        "5079055505117153635": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "10055549084854766170": ["convolution_gpu_bfyx_os_iyx_osv16",985],
+        "16692569816843207989": ["convolution_gpu_bfyx_os_iyx_osv16",1022],
+        "12107262410635772120": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "5192552432194195116": ["convolution_gpu_bfyx_gemm_like",2],
+        "2438374917504708831": ["convolution_gpu_bfyx_gemm_like",2],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16",797],
+        "14462438074931673266": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "10112032316939871435": ["convolution_gpu_bfyx_os_iyx_osv16",6],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",1],
+        "17525564757769958678": ["convolution_gpu_bfyx_gemm_like",1],
+        "4202645222013675478": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "10729288973933590396": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "15779837958180258409": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "16348402367953880206": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "1051506168926530904": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "6438522646185979880": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2668729552208169959": ["convolution_gpu_bfyx_gemm_like",2],
+        "16362857896338778056": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "11857037689248685487": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "970768445746568749": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4347816192417741558": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "16236397968499692493": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "5807196005360653656": ["convolution_gpu_bfyx_gemm_like",2],
+        "5629373398445592781": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "16312223896859176991": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "11418379777288974452": ["convolution_gpu_bfyx_gemm_like",2],
+        "577182964135927041": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "15799159401545270696": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16781127329510211966": ["convolution_gpu_bfyx_gemm_like",1],
+        "2940027113687311893": ["convolution_gpu_bfyx_gemm_like",1],
+        "5688623850477433571": ["convolution_gpu_bfyx_gemm_like",1],
+        "13468081302022888489": ["convolution_gpu_bfyx_gemm_like",2],
+        "2188101366183302888": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12517838703662330663": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "9541630719145326121": ["convolution_gpu_bfyx_gemm_like",1],
+        "1336940384521633733": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "6232363902828992968": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3255465741612432300": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "14389719202147508599": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "16574710115918192418": ["convolution_gpu_bfyx_gemm_like",2],
+        "10377729875228238588": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "9729771183572950642": ["convolution_gpu_bfyx_gemm_like",2],
+        "7398196853452900099": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "2379484884827231127": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "11254635684957519432": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6067904130482758510": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4716188972902735458": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "8032685176029570383": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "1173986078589662704": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "14385185911482960528": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "17977676737774695825": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "7852745450437172519": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "2566302789609970663": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "14719421757340260468": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "7431849514656037251": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "5336120047683197088": ["convolution_gpu_bfyx_gemm_like",2],
+        "598390166442977699": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "6712698149192186833": ["convolution_gpu_bfyx_gemm_like",2],
+        "9920155432685318259": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "7977195117668583981": ["convolution_gpu_bfyx_gemm_like",2],
+        "4366168099274266975": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "4338023436590582323": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "10425889533411573166": ["convolution_gpu_bfyx_gemm_like",2],
+        "7969441643457570812": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "13328449155966085543": ["convolution_gpu_bfyx_gemm_like",2],
+        "7969848911698660033": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "5303970743736042689": ["convolution_gpu_bfyx_gemm_like",2],
+        "16811402686462277562": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "3291180926381314705": ["convolution_gpu_bfyx_os_iyx_osv16",658],
+        "13597240991532942069": ["convolution_gpu_bfyx_os_iyx_osv16",113],
+        "11936530628363072904": ["convolution_gpu_bfyx_gemm_like",0],
+        "10968768803038046390": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "7575675354187625951": ["convolution_gpu_bfyx_gemm_like",2],
+        "18210370419559876426": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "16461809076899645037": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "1779941298820543013": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "11706378390483804857": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "8561261337239934159": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15695415285791951018": ["convolution_gpu_bfyx_gemm_like",2],
+        "3599823735065658574": ["convolution_gpu_bfyx_os_iyx_osv16",428],
+        "4062706195708729345": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "5103094815475470596": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "989564341557094953": ["convolution_gpu_bfyx_os_iyx_osv16",55],
+        "4766071144928072260": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "6769243149577568817": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "5409924335138540834": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "1172103288112689821": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "69439315851965666": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "16402312692470500253": ["convolution_gpu_bfyx_gemm_like",2],
+        "13224814158106791463": ["convolution_gpu_bfyx_gemm_like",2],
+        "11834683513280095384": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "12081835728078383819": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "16566128345135114558": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "11327097771110264965": ["convolution_gpu_bfyx_os_iyx_osv16",944],
+        "7806129039150321333": ["convolution_gpu_bfyx_gemm_like",2],
+        "4772696293208603817": ["convolution_gpu_bfyx_gemm_like",1],
+        "4703107905652287491": ["convolution_gpu_bfyx_gemm_like",2],
+        "14805540705424073865": ["convolution_gpu_bfyx_gemm_like",2],
+        "3673781117412048086": ["convolution_gpu_bfyx_os_iyx_osv16",272],
+        "6780215829176686721": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "11025471731438443683": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15720507574336564201": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "2012181953284568566": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "9153779186876518773": ["convolution_gpu_bfyx_gemm_like",1],
+        "10555597973766215754": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "12309955719964788034": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "5597908143491399643": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "5594180958505308003": ["convolution_gpu_bfyx_os_iyx_osv16",645],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",2],
+        "8931169575495985034": ["convolution_gpu_bfyx_gemm_like",2],
+        "17025324057045572535": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13330734840729670622": ["convolution_gpu_bfyx_gemm_like",1],
+        "724953082687879224": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "8272823732258536202": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "8650948093564284852": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "9671459469252116568": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "16436006771518788093": ["fully_connected_gpu_bs_f_bsv16_af8_vload",0],
+        "5095827462645341808": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "11893541520830049036": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "1103204698908514224": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "6040286126398028933": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "7072606962946873975": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "543472136359161929": ["convolution_gpu_bfyx_gemm_like",2],
+        "17640725195881101275": ["convolution_gpu_bfyx_gemm_like",2],
+        "7669403041163460089": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17723621158215826108": ["convolution_gpu_bfyx_gemm_like",2],
+        "12693511427898130707": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "12929981792125924963": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14046990030104971367": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "18271689282126907793": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "5019077257951332016": ["convolution_gpu_bfyx_gemm_like",2],
+        "3277243911383750280": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "10432365444137108781": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "3868149953087814447": ["convolution_gpu_bfyx_gemm_like",1],
+        "435888248913413834": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "14759179293743468995": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "3930314908786112883": ["convolution_gpu_bfyx_gemm_like",1],
+        "15178921033274918199": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16053585286807864356": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "13497225521878034159": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",951],
+        "1095495157025479260": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "14136097914489095982": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "11754316727756881612": ["convolution_gpu_bfyx_os_iyx_osv16",108],
+        "11626402549863483301": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",731],
+        "17515064188391421150": ["convolution_gpu_bfyx_gemm_like",1],
+        "5312140481706133684": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "12541834857357563605": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "5159738930501638535": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "713121569924250372": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "16816222375242496370": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15609627722687211129": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "2912858944747613525": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "8809017515482311843": ["convolution_gpu_bfyx_os_iyx_osv16",754],
+        "1403617451623027879": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "15511138074959300404": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "7881187047171099732": ["convolution_gpu_bfyx_gemm_like",1],
+        "9726913113016874092": ["convolution_gpu_bfyx_gemm_like",2],
+        "11661208196482963286": ["convolution_gpu_bfyx_os_iyx_osv16",48],
+        "4769003637955328938": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14046114605615338907": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "8860443174052454332": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "2265784112305305260": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3477539135137665170": ["convolution_gpu_bfyx_gemm_like",2],
+        "14466032674083938714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16370218798911151331": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "1509728225855233852": ["convolution_gpu_bfyx_gemm_like",1],
+        "16295660312557315941": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "4914474312076193952": ["convolution_gpu_bfyx_gemm_like",1],
+        "1996860183441418841": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "15690161340392005765": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "4499586349553581439": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "3814584042139408454": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "11690334177981352452": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "15984885011101717258": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "3806761527342944195": ["convolution_gpu_bfyx_gemm_like",2],
+        "12134712464763856064": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "4362304842016958728": ["convolution_gpu_bfyx_os_iyx_osv16",1061],
+        "2231648183489019418": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "487214150851213303": ["convolution_gpu_bfyx_gemm_like",2],
+        "15438530452161762045": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13477548641580029772": ["convolution_gpu_bfyx_gemm_like",1],
+        "11022847760121601465": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "14221578799010900252": ["convolution_gpu_bfyx_gemm_like",2],
+        "15235409162483701027": ["convolution_gpu_bfyx_gemm_like",1],
+        "16489624657475712467": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "3643250372952944907": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "7638626850074132214": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "13131740479277027362": ["fully_connected_gpu_bf_io_gemm",2],
+        "11462462742322068863": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "17087740929472936216": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "13754540732991287617": ["convolution_gpu_bfyx_gemm_like",2],
+        "4553409514380460123": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "18067291256808591467": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "4850497746076450913": ["convolution_gpu_bfyx_gemm_like",2],
+        "11311859068168414878": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "8108843303778211282": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "10672380526821947133": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "14447191095937730964": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "10306542963828398049": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "15217573782563469232": ["convolution_gpu_yxfb_yxio_b16",0],
+        "7940369586324090841": ["convolution_gpu_bfyx_gemm_like",2],
+        "17035903590837750750": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "18375125668176498051": ["convolution_gpu_bfyx_gemm_like",2],
+        "852092858392507925": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "11806402239500046867": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "700717277178942679": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "4238885454989272754": ["convolution_gpu_bfyx_gemm_like",1],
+        "13459514533473657102": ["convolution_gpu_bfyx_os_iyx_osv16",687],
+        "856877003890134554": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "4236174000795439083": ["convolution_gpu_bfyx_gemm_like",2],
+        "13300022131572486202": ["convolution_gpu_bfyx_gemm_like",2],
+        "18218755616248669884": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14540578324750869319": ["convolution_gpu_bfyx_gemm_like",0],
+        "5157249499936659040": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "2096779676054335057": ["convolution_gpu_bfyx_gemm_like",2],
+        "11626398907755088688": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10987953316324712538": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "17201365233492366678": ["convolution_gpu_bfyx_gemm_like",2],
+        "15115780248032030963": ["convolution_gpu_yxfb_yxio_b16",0],
+        "3041612155708729812": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "733956743303342862": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16043683538361975370": ["convolution_gpu_bfyx_gemm_like",2],
+        "1077773457856682663": ["convolution_gpu_bfyx_os_iyx_osv16",178],
+        "13190888313721073437": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15026219694198820614": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",284],
+        "428659495445490820": ["convolution_gpu_bfyx_os_iyx_osv16",929],
+        "5600128039063009632": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "792684262493086891": ["convolution_gpu_bfyx_os_iyx_osv16",1042],
+        "15497797842820949408": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "2649192407401044065": ["convolution_gpu_bfyx_gemm_like",2],
+        "11582534256623549131": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "9519623751582710696": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "1186545671730357033": ["convolution_gpu_bfyx_gemm_like",2],
+        "6324565723045697080": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "4818231379191523896": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "10989937450490049763": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "5240181393417899912": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "1920042803083729276": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "1742897526168249500": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "14045927407431718832": ["convolution_gpu_bfyx_gemm_like",2],
+        "11130439225010714550": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "1701609125136907870": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "11970881115757095265": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "17009318615658405230": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14089893422771228191": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "17209528805596238905": ["convolution_gpu_bfyx_gemm_like",2],
+        "11856266545854830143": ["convolution_gpu_bfyx_gemm_like",2],
+        "10308431308942416781": ["convolution_gpu_bfyx_gemm_like",2],
+        "5649150695527000655": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "12962558681443556219": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "17713034180977313726": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "9366201112659847392": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "580936360000782237": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "4161001033681779582": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "17649961873981897621": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "10900880512948479338": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "12667014405537239093": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "2526832080529662683": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "12793908914872030220": ["convolution_gpu_bfyx_gemm_like",2],
+        "10795104632256101599": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "9416186718345824095": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "2194607895573544953": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "875400109066360897": ["convolution_gpu_bfyx_gemm_like",2],
+        "15980348884716629349": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12003323477818208825": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "18109284647478027063": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "12868739680413736657": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "8402692278765063674": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "10316451248440741901": ["convolution_gpu_bfyx_gemm_like",1],
+        "13025323039227543550": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "13210604117940125947": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "1249137685908951501": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "2173720698351153121": ["convolution_gpu_bfyx_gemm_like",2],
+        "12647099325257717945": ["convolution_gpu_bfyx_gemm_like",1],
+        "11086699387784339943": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "8083672466967374860": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "13296242326766100583": ["convolution_gpu_bfyx_os_iyx_osv16",136],
+        "2608363732937932266": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "5646139101524964833": ["convolution_gpu_bfyx_gemm_like",1],
+        "15602863681196390535": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "12831123539633580270": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "16609136488331186895": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "4994591211723226974": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "8329846097322076175": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "17793292063552633023": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "2704063557078535883": ["convolution_gpu_bfyx_os_iyx_osv16",1049],
+        "3653156933813711765": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "7678457226823073886": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",1],
+        "142650579335909103": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "751912075185318190": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "14026537760442360645": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "13002363400738122017": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "10022487076451608714": ["convolution_gpu_bfyx_gemm_like",2],
+        "11062100629646715785": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5824801192141531089": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "15860915170591763391": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "941626985322260281": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "3308770992373192529": ["convolution_gpu_bfyx_gemm_like",2],
+        "12136029303893296753": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "15705908639736679687": ["convolution_gpu_yxfb_yxio_b16",0],
+        "11897113890115321056": ["convolution_gpu_bfyx_os_iyx_osv16",418],
+        "15713964605078748923": ["convolution_gpu_bfyx_gemm_like",1],
+        "4708035980731751007": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11443268857010762276": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "13649894122307008732": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "10811837819834149164": ["convolution_gpu_bfyx_gemm_like",1],
+        "6845814820599174031": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15641537661939240413": ["convolution_gpu_bfyx_gemm_like",2],
+        "9928406318940388716": ["convolution_gpu_bfyx_gemm_like",1],
+        "6065819201836017182": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "17050675313067213312": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "8746621720912032145": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "14098811155652990436": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "7649413902932043811": ["convolution_gpu_bfyx_gemm_like",2],
+        "13384754476437374504": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "14331658870024759698": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "11529876081402974396": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "14115742296883450319": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17738299860390552088": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "11318913630213187720": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "8501145642605270365": ["convolution_gpu_bfyx_gemm_like",2],
+        "5644068493155655611": ["convolution_gpu_bfyx_gemm_like",2],
+        "7880845322716481548": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "578703329577922869": ["convolution_gpu_bfyx_os_iyx_osv16",184],
+        "5659168916726488798": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "6942016672941874829": ["convolution_gpu_bfyx_gemm_like",2],
+        "9367157746678824712": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "4056979460327024961": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "3215659303601163167": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "11845189428639322474": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "9437794960375526230": ["convolution_gpu_bfyx_os_iyx_osv16",319],
+        "388828310152538138": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "4635570915184713874": ["convolution_gpu_bfyx_gemm_like",2],
+        "654821507679356726": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "12355112948013108181": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9257078583742821465": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "16463823433924519300": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "10025839973092358719": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "6981537186704688907": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "5179760459095053114": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "17310409067211414565": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like",2],
+        "9404677451270692749": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "7941729567451949422": ["convolution_gpu_bfyx_gemm_like",2],
+        "14289082888174784976": ["convolution_gpu_bfyx_gemm_like",1],
+        "10727592780669452048": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "4644580321919256401": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "14281201038135286621": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "7624476043779763605": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "17216583849049249733": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "17856816245251319111": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "7134654288295280046": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",2],
+        "9525535670799618110": ["convolution_gpu_bfyx_gemm_like",2],
+        "1663285216972929652": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "17084977396231597605": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4623542918584461522": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "8251544171504007740": ["convolution_gpu_bfyx_gemm_like",2],
+        "13488495920546871271": ["convolution_gpu_bfyx_os_iyx_osv16",853],
+        "7870154008378361670": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2525260242689556544": ["convolution_gpu_bfyx_gemm_like",2],
+        "10565371760124443824": ["convolution_gpu_bfyx_os_iyx_osv16",347],
+        "4274801141127703532": ["convolution_gpu_bfyx_os_iyx_osv16",993],
+        "7056030150365552588": ["convolution_gpu_bfyx_gemm_like",2],
+        "3272017687600371031": ["convolution_gpu_bfyx_gemm_like",2],
+        "1138439260035360722": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "946479876892100082": ["convolution_gpu_bfyx_gemm_like",1],
+        "5720964268093705079": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "953306082374100275": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "3350601287664242323": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "13833960927635646899": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6860503758000008398": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "5010119207726811326": ["convolution_gpu_bfyx_os_iyx_osv16",1052],
+        "879896719155824868": ["convolution_gpu_bfyx_gemm_like",2],
+        "10492056481694320580": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "11706446082856895571": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "13485300684443803732": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "7958459862276998225": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "14100870590396726248": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "17329287216741045059": ["convolution_gpu_bfyx_gemm_like",2],
+        "5558136691773431495": ["convolution_gpu_bfyx_os_iyx_osv16",989],
+        "13538051178827008933": ["convolution_gpu_bfyx_os_iyx_osv16",160],
+        "12809199739984715013": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "6854611304056079417": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "3746573775462003750": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "17705807503894740726": ["convolution_gpu_bfyx_gemm_like",2],
+        "5524215233998361104": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15662207751131195569": ["convolution_gpu_bfyx_gemm_like",2],
+        "12626014184575881530": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "18012549942299450620": ["convolution_gpu_bfyx_gemm_like",1],
+        "10436819182310112786": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "4456004887590847716": ["convolution_gpu_bfyx_gemm_like",2],
+        "3863816884636503247": ["convolution_gpu_bfyx_gemm_like",2],
+        "3212789693085089063": ["convolution_gpu_bfyx_gemm_like",2],
+        "2497756607567197523": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "18268811652302076976": ["convolution_gpu_bfyx_gemm_like",1],
+        "17870874477143985774": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "11337525286386930242": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "5170245731599664670": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "4435224497850514394": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "14733510474010040334": ["convolution_gpu_bfyx_gemm_like",2],
+        "6585223640997887253": ["convolution_gpu_bfyx_gemm_like",2],
+        "4569338575782832784": ["convolution_gpu_bfyx_gemm_like",2],
+        "13324157125165576832": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "14974730512607138726": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "6635217802203685464": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "6181651715051152713": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "9723314434598141024": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "10023279637210292010": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "9133263538092913983": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "10037086825900566930": ["convolution_gpu_bfyx_os_iyx_osv16",518],
+        "1622880009460832832": ["convolution_gpu_bfyx_os_iyx_osv16",494],
+        "9243949750444156746": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16705621644424684055": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "2066731703492755469": ["convolution_gpu_bfyx_os_iyx_osv16",304],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "6181272224000872375": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "9942726414918759892": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13865227850818392065": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "13681462437496627948": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "4010419602093863685": ["convolution_gpu_yxfb_yxio_b16",0],
+        "7732899312577293959": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "2623687018437195679": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "16403423801823379909": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "15488550074426713959": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "15217183882858251099": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "10084794570892043447": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "16234606052818596502": ["convolution_gpu_bfyx_os_iyx_osv16",997],
+        "5637480705139132901": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "9131235538209388787": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",1],
+        "16884228931101540030": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "12976499206227689731": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "8543619733732987550": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12090536142661253835": ["fully_connected_gpu_bf_io_gemm",1],
+        "9468684953949274635": ["convolution_gpu_bfyx_gemm_like",1],
+        "9798585825695496550": ["convolution_gpu_bfyx_gemm_like",2],
+        "2572395498687401679": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "291868903926685441": ["convolution_gpu_bfyx_gemm_like",2],
+        "17759505449240263390": ["convolution_gpu_bfyx_os_iyx_osv16",310],
+        "3007637520820789085": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "9480653639044390919": ["convolution_gpu_bfyx_os_iyx_osv16",1061],
+        "7472330881076141262": ["convolution_gpu_bfyx_gemm_like",1],
+        "8854234880878427078": ["convolution_gpu_bfyx_gemm_like",2],
+        "15702382940521972117": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "14363654136811880073": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "8975333906619899020": ["convolution_gpu_bfyx_gemm_like",2],
+        "6139574161497189424": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9423958333298993923": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "2653651564133701304": ["convolution_gpu_bfyx_gemm_like",2],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16",755],
+        "6020017927557041768": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16995873636564597028": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "6988492019664525206": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "9803492989444302959": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17041468169694105561": ["convolution_gpu_yxfb_yxio_b16",0],
+        "8866716292621164810": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "2322559721899919275": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "6942049339361951275": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "10782611933832492335": ["convolution_gpu_bfyx_gemm_like",2],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "3285520504090196295": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "13447028922679236865": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "15924583510704449214": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "863952266514375915": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "12467673564660108244": ["convolution_gpu_bfyx_os_iyx_osv16",306],
+        "14923692894655929923": ["fully_connected_gpu_bf_io_gemm",1],
+        "15916505622570323098": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "17222005830854879661": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "2659712601063515059": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "12065769091972094756": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "6093575518270471235": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "15106614232165315070": ["convolution_gpu_bfyx_gemm_like",1],
+        "17025268985366223779": ["convolution_gpu_bfyx_os_iyx_osv16",857],
+        "9849272539053219052": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "16039372573821594566": ["convolution_gpu_bfyx_gemm_like",2],
+        "11728824117049687850": ["convolution_gpu_bfyx_gemm_like",1],
+        "2355214244972870639": ["convolution_gpu_bfyx_os_iyx_osv16",944],
+        "16431165572426232677": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "12625112690264223217": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "7937870623766562191": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "3499645386058307669": ["convolution_gpu_bfyx_gemm_like",1],
+        "8104309105061227444": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "3372770576629463160": ["convolution_gpu_bfyx_gemm_like",1],
+        "11992625045241269569": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "2884499360870038648": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11267742746905371769": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "6341197991729122563": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "16616945998593626851": ["convolution_gpu_bfyx_os_iyx_osv16",750],
+        "18077281411861416889": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "2305461098719675735": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "5055133356846736609": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "7474639594232203854": ["convolution_gpu_bfyx_os_iyx_osv16",97],
+        "490233152678323691": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "7334966010680206302": ["convolution_gpu_bfyx_gemm_like",2],
+        "7966454753124154534": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12112853999307505628": ["convolution_gpu_bfyx_gemm_like",2],
+        "8141428150264829362": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "17490471699618303993": ["convolution_gpu_bfyx_os_iyx_osv16",234],
+        "6962268765187856246": ["convolution_gpu_bfyx_gemm_like",2],
+        "18103534417093702556": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "16711142379173254655": ["convolution_gpu_yxfb_yxio_b16",0],
+        "14444475853714164129": ["convolution_gpu_bfyx_gemm_like",2],
+        "13526488884846845330": ["convolution_gpu_bfyx_gemm_like",0],
+        "14057348639391787117": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "9604982746455852556": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "6656593119788274992": ["convolution_gpu_bfyx_gemm_like",1],
+        "12553441041059632729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1173136780324694038": ["convolution_gpu_yxfb_yxio_b16",0],
+        "11083993858285515074": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "6634330132674952638": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "8619526128410675593": ["convolution_gpu_bfyx_gemm_like",2],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",2],
+        "4135003545872878882": ["convolution_gpu_bfyx_os_iyx_osv16",568],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "5334566325056222430": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "1218323229202187514": ["convolution_gpu_bfyx_gemm_like",2],
+        "5873257164958285393": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "9785114056964539323": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "17006095064160484022": ["convolution_gpu_bfyx_os_iyx_osv16",425],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "15586047342916704364": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "17108987360340581555": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "9062774198518904260": ["convolution_gpu_bfyx_gemm_like",2],
+        "17364712285968437405": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "10556089809203693400": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "17775705003104146872": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "15696910741835640150": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "10046663998164493552": ["convolution_gpu_bfyx_os_iyx_osv16",357],
+        "8195881973746570408": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "5922142661777925178": ["convolution_gpu_bfyx_gemm_like",1],
+        "9275303306340702111": ["convolution_gpu_bfyx_gemm_like",1],
+        "10483664832302187567": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "9741607635826869269": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13893808009363736870": ["convolution_gpu_bfyx_gemm_like",2],
+        "13951781924205611716": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "17951403431757222177": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "8337820318779061494": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "1632416005093914709": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18122858611264877646": ["convolution_gpu_bfyx_gemm_like",1],
+        "9454512817077883797": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "10774528268153772208": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "15739274921308457528": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "11147816119060617810": ["convolution_gpu_bfyx_os_iyx_osv16",122],
+        "10512507780534402341": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "12978370505631031751": ["convolution_gpu_bfyx_gemm_like",2],
+        "5211191663202250117": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "15993427814066246646": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "15192024816519005250": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "1882052795393187384": ["convolution_gpu_bfyx_os_iyx_osv16",665],
+        "12802517759474139810": ["convolution_gpu_bfyx_gemm_like",2],
+        "10808909442136736629": ["convolution_gpu_bfyx_gemm_like",2],
+        "18356980026934328781": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "2065752819810364738": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "7706714181281908433": ["convolution_gpu_bfyx_gemm_like",0],
+        "7840653268996892538": ["convolution_gpu_bfyx_gemm_like",0],
+        "10700011669103135203": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6428098122005804378": ["convolution_gpu_bfyx_os_iyx_osv16",651],
+        "15897477855246170861": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "13206826317378863148": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "10415046594066474634": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2683304757433993300": ["convolution_gpu_bfyx_gemm_like",2],
+        "721174714308243785": ["convolution_gpu_bfyx_gemm_like",2],
+        "2204178900998688268": ["convolution_gpu_bfyx_gemm_like",2],
+        "9740466267717175474": ["convolution_gpu_bfyx_gemm_like",1],
+        "8526484907799590618": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "407189201971322683": ["convolution_gpu_bfyx_os_iyx_osv16",792],
+        "383721620126444793": ["convolution_gpu_bfyx_gemm_like",1],
+        "9882204352209412039": ["convolution_gpu_bfyx_gemm_like",1],
+        "3855859061709004677": ["convolution_gpu_bfyx_gemm_like",2],
+        "10548792624072794724": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "5020788604681810984": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "12026482841341343242": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "1963081583851864291": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2370837049876630969": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "16437124655147660375": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "12397280593466519809": ["convolution_gpu_bfyx_gemm_like",2],
+        "9311802150474489673": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "12058759356433220258": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "1192279884248226739": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "5303170164698694791": ["fully_connected_gpu_bf_io_gemm",2],
+        "3266557807508325807": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "5183231560876991543": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "12707946849050970702": ["convolution_gpu_bfyx_os_iyx_osv16",899],
+        "13938466156916423478": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "17928043901784474130": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "761169277744593430": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "13993548620104010490": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "7561096442572829049": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12641170321047008726": ["convolution_gpu_bfyx_gemm_like",2],
+        "8640150341228170279": ["convolution_gpu_bfyx_os_iyx_osv16",305],
+        "1478419046264331178": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "1036010477232750453": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "12514693341682532560": ["convolution_gpu_bfyx_os_iyx_osv16",951],
+        "10294185397756053636": ["convolution_gpu_bfyx_gemm_like",2],
+        "4161141078006269526": ["convolution_gpu_bfyx_gemm_like",2],
+        "14147460733160099960": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12512751736409465214": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1545105800386716684": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "7026575758396092435": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "5763440554939527411": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "9692654253261175490": ["convolution_gpu_bfyx_gemm_like",2],
+        "5393510569127725391": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "10178951466584845110": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "13973179950424276578": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "15354185859262170540": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "15595549493819416194": ["convolution_gpu_bfyx_os_iyx_osv16",105],
+        "2040762223425679479": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "8253823502854784432": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "216603198215625772": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "1875764913306932583": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "17900257435531434807": ["convolution_gpu_bfyx_gemm_like",2],
+        "8747430148550634190": ["convolution_gpu_bfyx_gemm_like",0],
+        "18416908414174464784": ["convolution_gpu_bfyx_gemm_like",1],
+        "10971070835319242371": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "8655883535274781128": ["convolution_gpu_bfyx_gemm_like",1],
+        "11634932044447867039": ["convolution_gpu_bfyx_gemm_like",2],
+        "17037416417174266088": ["convolution_gpu_bfyx_gemm_like",1],
+        "12421204749289937399": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15392077168521832549": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "13558618754911056302": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "4678607855896512523": ["convolution_gpu_bfyx_gemm_like",2],
+        "8490260671996115530": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "5406129421969383274": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "15640202505592598653": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "15727611564408173858": ["convolution_gpu_bfyx_gemm_like",1],
+        "59739211822469868": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "10416622008071151225": ["convolution_gpu_bfyx_os_iyx_osv16",547],
+        "6114147683777615071": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "16913004986170202203": ["convolution_gpu_bfyx_gemm_like",2],
+        "9891428775774615719": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "12900949103593247293": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9381304526221508530": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "10753540518493641553": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13608239208821071914": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "826850797666395121": ["convolution_gpu_bfyx_gemm_like",1],
+        "18043340998699622388": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6553736978928374036": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "17542176922797334839": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "14117801387057507639": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "9152451371616153112": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11461581290174106570": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "2730604806511016352": ["convolution_gpu_bfyx_os_iyx_osv16",906],
+        "14823616678465136590": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6014752258124559691": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15805087418686802636": ["convolution_gpu_bfyx_gemm_like",1],
+        "15110359240685619357": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "938222258370511187": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "17907223570737272640": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "15643135666029727865": ["convolution_gpu_bfyx_gemm_like",1],
+        "745009493367761775": ["convolution_gpu_bfyx_gemm_like",2],
+        "7201521533301617290": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13760645810144930270": ["convolution_gpu_bfyx_os_iyx_osv16",602],
+        "5353552956675518468": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "13853056718266488510": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "3106591708459602370": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "13762042713029963144": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5941298590926032148": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "17806712457019493207": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "6300691162962736560": ["convolution_gpu_bfyx_os_iyx_osv16",1030],
+        "4885944395876887711": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "941829593638869991": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "12954154886708228545": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "14667209474639064623": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3236003754884728510": ["convolution_gpu_bfyx_gemm_like",2],
+        "7688176479120305539": ["convolution_gpu_bfyx_os_iyx_osv16",840],
+        "13738760763969959522": ["convolution_gpu_bfyx_gemm_like",2],
+        "15094664469997373662": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "15890473622821659630": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "9868561386826862471": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5141753233513623264": ["convolution_gpu_bfyx_os_iyx_osv16",793],
+        "8025053805734757314": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "9220830217525628783": ["convolution_gpu_bfyx_gemm_like",2],
+        "11878734040194151073": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "5589350202160007768": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11185156002426041243": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "7460672405409009037": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "16432425079146486467": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "3800011935243649447": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "6302958994152837045": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "15790005937034794347": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "10892706534058849825": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15948383678216076358": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "12601126285773042005": ["convolution_gpu_bfyx_os_iyx_osv16",686],
+        "10320711719466983961": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "13590444711975157776": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10408322429232132983": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "8939683514448064461": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "2418288192668085805": ["convolution_gpu_bfyx_gemm_like",2],
+        "6057433908801727873": ["convolution_gpu_bfyx_gemm_like",2],
+        "15160738482264643601": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "1154228007901031779": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "13144385730409574259": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "4800587664660105589": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "11726298758004767743": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "6129602738379919488": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "8737417433314100353": ["convolution_gpu_bfyx_gemm_like",2],
+        "18136135457402651842": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "9226443907548972870": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "2842103889477438816": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "15602218079503030465": ["convolution_gpu_bfyx_gemm_like",2],
+        "4479979951990338510": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "11799179287124317845": ["convolution_gpu_bfyx_gemm_like",1],
+        "2004120786408087671": ["convolution_gpu_bfyx_gemm_like",1],
+        "1059505639883914386": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11649407835105973949": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "1902656726461670148": ["convolution_gpu_bfyx_gemm_like",2],
+        "18419183012101393192": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "13011676362747785816": ["convolution_gpu_bfyx_gemm_like",2],
+        "4892959859293355837": ["convolution_gpu_bfyx_gemm_like",1],
+        "1889171157980977747": ["convolution_gpu_bfyx_gemm_like",2],
+        "385046297070779752": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "16286085532892593349": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "1096671695414716274": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "12478309735214802531": ["convolution_gpu_bfyx_os_iyx_osv16",1078],
+        "10264913782610095832": ["convolution_gpu_bfyx_os_iyx_osv16",893],
+        "1573498199681662714": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "15411474884532403722": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "7780140599533242850": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14603590053512154268": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "805221045541170643": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "15190508870639648203": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3388752887767453958": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "12609361477548272638": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "8837721075413149240": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14907097142953816744": ["convolution_gpu_bfyx_gemm_like",2],
+        "13178480813522103091": ["fully_connected_gpu_bf_io_gemm",1],
+        "13646974121952099172": ["convolution_gpu_bfyx_gemm_like",1],
+        "9270950131920019932": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "5132761922124425835": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "9824678205469832038": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "4560479630843098090": ["convolution_gpu_bfyx_gemm_like",2],
+        "5003718302026277632": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "18218631037214746168": ["convolution_gpu_bfyx_gemm_like",2],
+        "8856888761246057127": ["convolution_gpu_bfyx_gemm_like",1],
+        "12812685418923919055": ["convolution_gpu_bfyx_os_iyx_osv16",758],
+        "15293727142789007900": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "14274685812676150168": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "16561224775421968533": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "474139120607442270": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4861982518177129729": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "3860667078458481972": ["convolution_gpu_bfyx_gemm_like",1],
+        "17243648226968859637": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15778834188130183853": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "12087141795291232248": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "15494543914974994991": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "2495655464941634884": ["convolution_gpu_bfyx_os_iyx_osv16",238],
+        "3036808833459559381": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "8460847842045253466": ["convolution_gpu_bfyx_os_iyx_osv16",11],
+        "10110395703775498948": ["convolution_gpu_bfyx_os_iyx_osv16",752],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "12741457056869452536": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10861525139715322534": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "13575423234109624706": ["fully_connected_gpu_yxfb_ref",1],
+        "17163595630291422874": ["convolution_gpu_bfyx_gemm_like",2],
+        "17306482303091342504": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "18417288692814472127": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "9482749589540764069": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3441335188113424896": ["convolution_gpu_bfyx_gemm_like",1],
+        "12421707187947291166": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "8153567933591966877": ["convolution_gpu_bfyx_gemm_like",2],
+        "10130171279527667782": ["convolution_gpu_bfyx_gemm_like",1],
+        "4479117540570599742": ["convolution_gpu_bfyx_gemm_like",2],
+        "5039037192630609823": ["convolution_gpu_bfyx_gemm_like",2],
+        "3909551222373722085": ["convolution_gpu_bfyx_os_iyx_osv16",497],
+        "14746359019867963124": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15619086801947147359": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "3273748387141431306": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "14025496192869856801": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "15320845027635796583": ["convolution_gpu_bfyx_gemm_like",2],
+        "7662200927459001757": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6109013751635776331": ["convolution_gpu_bfyx_gemm_like",2],
+        "14650567822254940018": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "18357544235608006954": ["convolution_gpu_bfyx_gemm_like",1],
+        "13503688893307029975": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "8061914949376516780": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4232250144427804891": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "13486084204140096478": ["convolution_gpu_bfyx_gemm_like",2],
+        "16779678846332091086": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "16729849855476690294": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "8761283252495354972": ["convolution_gpu_bfyx_gemm_like",1],
+        "3239033622277917802": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "15216108478837665623": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "16131448347558322280": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "939718260623752240": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "9441060601228656341": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "4999505377862312410": ["fully_connected_gpu_bf_io_gemm",1],
+        "1353170363915443814": ["convolution_gpu_bfyx_gemm_like",1],
+        "2968031010495399536": ["convolution_gpu_bfyx_gemm_like",2],
+        "15847413004526420496": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "10576856554114055028": ["convolution_gpu_bfyx_gemm_like",2],
+        "3819990462129075757": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "1854612313463195535": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1316444335300814745": ["convolution_gpu_bfyx_os_iyx_osv16",619],
+        "12796777049340516563": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "14883438809987378616": ["convolution_gpu_bfyx_1x1",2],
+        "10001963042016663554": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12531580106484042446": ["convolution_gpu_bfyx_os_iyx_osv16",985],
+        "17993337310288098038": ["convolution_gpu_bfyx_gemm_like",2],
+        "16949056117405140365": ["convolution_gpu_bfyx_gemm_like",2],
+        "4801117903303888658": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "2321767794934000238": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "17208186152576814861": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "8479958930889587809": ["fully_connected_gpu_fb_io_ref",2],
+        "15334195300678132907": ["fully_connected_gpu_bf_io_gemm",1],
+        "7877332346656934022": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "2140514316203117958": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "4424217045094988504": ["convolution_gpu_bfyx_os_iyx_osv16",645],
+        "3788462090984291082": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "4085907608404305515": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "8071957466247137919": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "6791806088355877039": ["convolution_gpu_bfyx_gemm_like",0],
+        "14841539539334726292": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "10783630257421062891": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "10133054058562198093": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "994842991399671507": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17021925795809437171": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14962768577232034246": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "12229574562535756991": ["convolution_gpu_bfyx_gemm_like",2],
+        "1760830986937165861": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "13815395589135469450": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "15154700439767512396": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "5083163738120585821": ["fully_connected_gpu_fb_io_ref",2],
+        "8203550467004532364": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "3220280315905987373": ["convolution_gpu_bfyx_gemm_like",2],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",560],
+        "17700958439420868719": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "9119618606914671839": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "10290107543739998181": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "11595387512434355394": ["convolution_gpu_bfyx_gemm_like",2],
+        "1616603916015535857": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "4933831571091731212": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3810356382905059819": ["convolution_gpu_bfyx_gemm_like",1],
+        "4642234334824303290": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "14578291812739325465": ["convolution_gpu_bfyx_os_iyx_osv16",1013],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",375],
+        "8258382025812748961": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "16683485007140805060": ["fully_connected_gpu_fb_io_ref",2],
+        "7009873605945341897": ["convolution_gpu_bfyx_gemm_like",2],
+        "10967218651864700933": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "13793441296561946357": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "3622409603053918029": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18400379759523099542": ["convolution_gpu_bfyx_gemm_like",1],
+        "2920322372993101148": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "956022649859563080": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "14206076551739831333": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4871907623235871050": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "10848277915422577656": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "9372916528346260712": ["convolution_gpu_bfyx_gemm_like",2],
+        "6323026044750482867": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "2627779045483019709": ["convolution_gpu_bfyx_os_iyx_osv16",847],
+        "12494969618927201911": ["fully_connected_gpu_fb_oi_ref",1],
+        "4714289593698160876": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1089944493540593798": ["convolution_gpu_bfyx_gemm_like",1],
+        "6681818065741882453": ["convolution_gpu_bfyx_gemm_like",2],
+        "16000753982895054944": ["convolution_gpu_bfyx_gemm_like",1],
+        "5941092474669713339": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5835634465164771899": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "12961109385388101976": ["convolution_gpu_yxfb_yxio_b16",1],
+        "60749853744407778": ["convolution_gpu_bfyx_gemm_like",2],
+        "12700372241799686527": ["convolution_gpu_bfyx_gemm_like",1],
+        "12076058470574246054": ["convolution_gpu_bfyx_os_iyx_osv16",273],
+        "17549411807772646930": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "8264178890341675354": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "16469788155263456039": ["convolution_gpu_bfyx_gemm_like",2],
+        "11169292427557543138": ["convolution_gpu_bfyx_os_iyx_osv16",281],
+        "2783577080556699089": ["convolution_gpu_bfyx_gemm_like",1],
+        "14487682847898298214": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "4191326605459754690": ["convolution_gpu_bfyx_os_iyx_osv16",617],
+        "13621339501067135142": ["convolution_gpu_bfyx_gemm_like",2],
+        "13253775441326432265": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "5779388310240896974": ["convolution_gpu_bfyx_os_iyx_osv16",835],
+        "12522495848240087966": ["convolution_gpu_bfyx_gemm_like",1],
+        "878892264408839067": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "17442035600389810700": ["convolution_gpu_bfyx_gemm_like",2],
+        "8859895010324601937": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "4121109463284708890": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "5912303851874077576": ["convolution_gpu_bfyx_gemm_like",2],
+        "7994179151788368291": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "10294610483561043024": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "10544034939133448916": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "4795705973706796563": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "10280619408766255552": ["convolution_gpu_bfyx_gemm_like",2],
+        "18313088176414428990": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "17382660912493284320": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "13352000946213986936": ["convolution_gpu_bfyx_os_iyx_osv16",303],
+        "8398910340371320955": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18068050257421269408": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "5145853681977610916": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "13853630125050609175": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "8124736388338424498": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9514210061704584354": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10702234389482091891": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "9269175963143039426": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "12788968383428254917": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "5848293219267886434": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "2597453794298356435": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "9100044555742394133": ["convolution_gpu_bfyx_os_iyx_osv16",926],
+        "52089503050497755": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "14930789530046665855": ["convolution_gpu_bfyx_gemm_like",0],
+        "2915165824085219545": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "4086556132337751931": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "3621930417735246405": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "3177304125602972370": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "220326805056361171": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "10016815108730511683": ["convolution_gpu_bfyx_gemm_like",1],
+        "2909728331855309274": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "529543453251381109": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11163107409437069532": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "5592556538784745960": ["convolution_gpu_bfyx_gemm_like",2],
+        "9707630588260222630": ["convolution_gpu_bfyx_gemm_like",2],
+        "17050143605017295447": ["convolution_gpu_bfyx_gemm_like",2],
+        "15065019229949449623": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8594644182487917002": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6288489890578212082": ["convolution_gpu_bfyx_gemm_like",1],
+        "15447513376965243034": ["convolution_gpu_bfyx_os_iyx_osv16",47],
+        "9987415314864002460": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "3782239800777370325": ["convolution_gpu_bfyx_gemm_like",1],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16",543],
+        "15047676717402283805": ["convolution_gpu_bfyx_os_iyx_osv16",347],
+        "11919129623429545762": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "13850920989756588064": ["convolution_gpu_bfyx_gemm_like",0],
+        "11772741918108731396": ["convolution_gpu_bfyx_os_iyx_osv16",137],
+        "17316626950179740845": ["convolution_gpu_bfyx_os_iyx_osv16",145],
+        "5291011077679733990": ["convolution_gpu_bfyx_gemm_like",2],
+        "5041111302824362529": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "9043982883185435219": ["convolution_gpu_bfyx_os_iyx_osv16",613],
+        "12668149981216388765": ["convolution_gpu_bfyx_os_iyx_osv16",397],
+        "9695024256541464964": ["convolution_gpu_bfyx_gemm_like",1],
+        "10730222715353420212": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "2857337999074313592": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "60267878504897170": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "8642107585829380438": ["convolution_gpu_bfyx_gemm_like",1],
+        "3895088069642140043": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8689206546467098603": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "10295330953350618042": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5782934278345953016": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "2345023488044002149": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "14091610802555875119": ["convolution_gpu_bfyx_gemm_like",2],
+        "2908249767551054613": ["convolution_gpu_bfyx_gemm_like",2],
+        "3491333679577961640": ["convolution_gpu_bfyx_os_iyx_osv16",968],
+        "17101789600628162503": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1569043950563130463": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16352331970945217438": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "15881381297320383917": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "16264774056719724826": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "14902389080201926109": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "17704040183891532914": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "7915318733663535312": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "649203303142950236": ["convolution_gpu_bfyx_os_iyx_osv16",756],
+        "2294800960010879540": ["convolution_gpu_bfyx_gemm_like",2],
+        "12181889163404078773": ["convolution_gpu_bfyx_gemm_like",2],
+        "13026555349791486777": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "789359733867650915": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "7606728651572102823": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "10728212277329722684": ["convolution_gpu_bfyx_gemm_like",2],
+        "6149673627320838019": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "15979956159651515122": ["convolution_gpu_bfyx_gemm_like",2],
+        "15591167992985613695": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "13550435052563656432": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "17281202179589913619": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "7578177053220150569": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "11465965972527519631": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13484950419220835364": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "12992194515157698316": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "10892456883214928095": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "2732519635571994212": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "13932662890258900896": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "3138374672801504481": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "3399406641489305996": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "1954052357826969119": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "10690972785852373520": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "1082586642383386489": ["convolution_gpu_bfyx_gemm_like",1],
+        "16761856644242716357": ["convolution_gpu_bfyx_os_iyx_osv16",846],
+        "1419073145594317633": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "11239754372812258455": ["convolution_gpu_bfyx_os_iyx_osv16",46],
+        "6631816968511312100": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "17446505012657609153": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "16027853590391209100": ["convolution_gpu_bfyx_gemm_like",0],
+        "14429081455612806819": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "16749148369456398030": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13092232276822302626": ["convolution_gpu_bfyx_os_iyx_osv16",685],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "15078590909693331731": ["convolution_gpu_bfyx_gemm_like",2],
+        "5277400567128489977": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "13698389420396031586": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "7457899998356343871": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "4450409744922989123": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "1081962464388501987": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "11693134363909241514": ["convolution_gpu_yxfb_yxio_b16",2],
+        "10724501418439612080": ["convolution_gpu_bfyx_gemm_like",2],
+        "3499106702307464480": ["convolution_gpu_bfyx_gemm_like",2],
+        "3362190082518348071": ["convolution_gpu_bfyx_gemm_like",2],
+        "16894871557229780934": ["convolution_gpu_bfyx_os_iyx_osv16",141],
+        "13483088320871913126": ["convolution_gpu_bfyx_gemm_like",1],
+        "9191832520273617003": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8482147530539941792": ["convolution_gpu_bfyx_gemm_like",2],
+        "7565221050911842393": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "3199841714087553410": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "8159303545761286685": ["convolution_gpu_bfyx_os_iyx_osv16",346],
+        "16542318967217020315": ["convolution_gpu_bfyx_gemm_like",2],
+        "14026570177552137240": ["convolution_gpu_bfyx_gemm_like",2],
+        "15466940145773097237": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "2221145174704245189": ["convolution_gpu_bfyx_gemm_like",1],
+        "9439431829175743345": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4803370483104261655": ["convolution_gpu_bfyx_gemm_like",1],
+        "9321208819255762521": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "5622089373755094139": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "4185398348055518182": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "16882092367103683293": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "153117141968471446": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "13176385389367548697": ["convolution_gpu_bfyx_gemm_like",1],
+        "6882621854468565774": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "10642327923162019888": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "17442105631503326136": ["convolution_gpu_bfyx_gemm_like",2],
+        "9533360488591027707": ["fully_connected_gpu_fb_io_b8_f8_vload",2],
+        "1867337342417952506": ["convolution_gpu_bfyx_gemm_like",2],
+        "6777045876155144709": ["convolution_gpu_bfyx_os_iyx_osv16",803],
+        "15961487889420208188": ["convolution_gpu_bfyx_os_iyx_osv16",683],
+        "3691705516240577130": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "5240706676373148280": ["convolution_gpu_bfyx_gemm_like",2],
+        "11868551452004726281": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "12894625941923144893": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "18154019240019929225": ["convolution_gpu_bfyx_gemm_like",1],
+        "13912843078550000960": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "9759380701896779097": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "15548971488532746290": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13852065717057446998": ["convolution_gpu_bfyx_gemm_like",2],
+        "14546281065004619074": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "8873614802459592665": ["convolution_gpu_bfyx_gemm_like",2],
+        "14289048840489035546": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "12159582810513550491": ["convolution_gpu_bfyx_gemm_like",1],
+        "6988674007771237080": ["convolution_gpu_bfyx_gemm_like",2],
+        "16307464696265537356": ["convolution_gpu_bfyx_gemm_like",2],
+        "17961702508543961900": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "9631481972809246378": ["convolution_gpu_bfyx_os_iyx_osv16",211],
+        "5109636469531439569": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18215430801133520364": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "14532844474906286088": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "2294318010381635693": ["convolution_gpu_bfyx_gemm_like",2],
+        "10682918518101379579": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "15884763176333003771": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "5673972310424776040": ["convolution_gpu_bfyx_gemm_like",1],
+        "14031009077471784948": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "11717348577195224554": ["convolution_gpu_bfyx_gemm_like",2],
+        "3889519976910355277": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "10014448860206587805": ["convolution_gpu_bfyx_gemm_like",1],
+        "11845013061234102293": ["convolution_gpu_bfyx_gemm_like",2],
+        "16955653765071712611": ["convolution_gpu_bfyx_os_iyx_osv16",268],
+        "16862145184923128012": ["convolution_gpu_bfyx_os_iyx_osv16",615],
+        "4355933224673863178": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "9819596940685093690": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "3087801652564627458": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "13820498543284008286": ["convolution_gpu_bfyx_gemm_like",1],
+        "5429130923188159806": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "3976736548270395981": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "75742659105146536": ["convolution_gpu_bfyx_os_iyx_osv16",1036],
+        "13418701036204748812": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "4584970211859494304": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4974320417566990034": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "13071545223094862275": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "15809639778580769565": ["convolution_gpu_bfyx_gemm_like",2],
+        "7130694811424715594": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "2394023805427701338": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "16992405636352406660": ["convolution_gpu_bfyx_gemm_like",0],
+        "16731107540370927220": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3796274347773622633": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "10995907213890714701": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "15899192375330393731": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "2294026590516781945": ["convolution_gpu_bfyx_gemm_like",1],
+        "3526580286148537369": ["convolution_gpu_bfyx_gemm_like",2],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "603883331897298932": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "2095802691829304676": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "16025442470600124062": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "3706994659266083979": ["convolution_gpu_bfyx_os_iyx_osv16",554],
+        "17711453305763476458": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "18128162750557822655": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16",1051],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "12801481303602178879": ["convolution_gpu_bfyx_gemm_like",2],
+        "7808544677773370430": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "9452470718398027950": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "12794369485239257709": ["convolution_gpu_bfyx_gemm_like",2],
+        "16044646335477470657": ["convolution_gpu_bfyx_gemm_like",2],
+        "11327228813412934262": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "9933958860597451711": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "15493488989417521388": ["convolution_gpu_bfyx_os_iyx_osv16",1112],
+        "13038533272699602337": ["convolution_gpu_bfyx_gemm_like",1],
+        "13327653786981478088": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "7394217382008802567": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "17309326904418811234": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "5740745357953479527": ["convolution_gpu_bfyx_gemm_like",2],
+        "17542414935564676110": ["convolution_gpu_bfyx_os_iyx_osv16",428],
+        "5448537627319798272": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "2727219457659794468": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "14353390922580547467": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "11956435900037329302": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "5235375820995365354": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "1351033666248868977": ["convolution_gpu_bfyx_os_iyx_osv16",723],
+        "4112696777811320312": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "7075659071934895087": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15975964562807570772": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "4488336106517889531": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "4381329435655511217": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "7799984350284425885": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "14544219140091420262": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "846088275031979661": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15488340031228619748": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "10279778381617181802": ["convolution_gpu_bfyx_os_iyx_osv16",994],
+        "8127570953237266335": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "5485749317130402302": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "2007192658799516915": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "11473442921040533207": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "16075006181495932250": ["convolution_gpu_bfyx_gemm_like",1],
+        "5831419373611158773": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "17845905249343189063": ["convolution_gpu_bfyx_gemm_like",2],
+        "6203765709597125063": ["convolution_gpu_bfyx_gemm_like",1],
+        "15271783562528081169": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "5592428580503282095": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "12055647521556218046": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "1040650352205493707": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "1941341635794709702": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "11198301748997371475": ["convolution_gpu_bfyx_gemm_like",1],
+        "11490143853656040028": ["convolution_gpu_bfyx_gemm_like",2],
+        "6156831095718536092": ["convolution_gpu_bfyx_os_iyx_osv16",233],
+        "16720108310653948550": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "3102816736961785641": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "10499265278415026816": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9954050478761346921": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "6942622405269419082": ["convolution_gpu_bfyx_os_iyx_osv16",421],
+        "14559308665571750465": ["convolution_gpu_bfyx_gemm_like",2],
+        "2903605246599054308": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "13723543003759101485": ["convolution_gpu_bfyx_gemm_like",1],
+        "11499219760597131534": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "13472577372534605883": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8961138963663532667": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "17123153447808465303": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "7000486794832106857": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "7369903937189508744": ["convolution_gpu_bfyx_os_iyx_osv16",276],
+        "14224121742920800990": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "1089679781525023551": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "9173631510896381179": ["convolution_gpu_bfyx_gemm_like",2],
+        "3216793152416217495": ["convolution_gpu_bfyx_gemm_like",2],
+        "6673966852801136416": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "2162882863309264684": ["convolution_gpu_bfyx_gemm_like",2],
+        "1972879521448306536": ["convolution_gpu_bfyx_gemm_like",2],
+        "10869005786136023160": ["convolution_gpu_bfyx_os_iyx_osv16",422],
+        "6334639534663495263": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "6213386558868267629": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "18372284940315010254": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "8306337702797456793": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "669771152920944125": ["convolution_gpu_bfyx_gemm_like",0],
+        "8541982562061181756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5550969016335082071": ["convolution_gpu_bfyx_gemm_like",0],
+        "1594612401422787491": ["convolution_gpu_bfyx_gemm_like",2],
+        "2973436171295280783": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "905780459938651623": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "15078262396281327048": ["convolution_gpu_bfyx_gemm_like",0],
+        "16698547937652264447": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "12874626654611400042": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "14691372262153587653": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "6297802534570892679": ["convolution_gpu_bfyx_os_iyx_osv16",568],
+        "5326247361632903583": ["convolution_gpu_bfyx_gemm_like",2],
+        "1474271081523145413": ["convolution_gpu_bfyx_gemm_like",2],
+        "18286006396667126860": ["convolution_gpu_bfyx_gemm_like",1],
+        "4079026972040047969": ["convolution_gpu_bfyx_gemm_like",1],
+        "13800387305792597325": ["convolution_gpu_bfyx_os_iyx_osv16",291],
+        "2030309697153345387": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "6025872155179042054": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "18245935804520236353": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "226601879759378771": ["convolution_gpu_bfyx_os_iyx_osv16",613],
+        "11718418772370938734": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "3682813162987778705": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "15410074937424854348": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "2111669705686676421": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "1952863937205473292": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "7119182041840303390": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "11215217005872946038": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "10912495395422146386": ["convolution_gpu_bfyx_gemm_like",2],
+        "13163146272900339330": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "916389941321470163": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "5381578460674280089": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "17318287523550546026": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "3001615302961701154": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "192209423643075326": ["convolution_gpu_bfyx_gemm_like",1],
+        "11033507346101404633": ["fully_connected_gpu_fb_oi_ref",0],
+        "4099859307693687554": ["convolution_gpu_bfyx_os_iyx_osv16",423],
+        "6142707387281700290": ["convolution_gpu_bfyx_gemm_like",2],
+        "18423051691107460439": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "15114370307779942381": ["convolution_gpu_bfyx_os_iyx_osv16",104],
+        "875142032423622622": ["convolution_gpu_bfyx_os_iyx_osv16",499],
+        "4717620775314557374": ["convolution_gpu_bfyx_gemm_like",1],
+        "10632020369698615114": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "5953754321266570854": ["convolution_gpu_bfyx_os_iyx_osv16",1095],
+        "7800015766976654402": ["convolution_gpu_bfyx_gemm_like",2],
+        "12218337369633748663": ["convolution_gpu_bfyx_os_iyx_osv16",944],
+        "9999553425206328238": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "9641089659148164809": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2349007644347065353": ["convolution_gpu_bfyx_gemm_like",2],
+        "13654816209891478730": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "12319073009094248232": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "5195511638783481084": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "138379779469699309": ["convolution_gpu_bfyx_gemm_like",1],
+        "17638692805430115529": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "1652781065871883392": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "994489782629179836": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "14283458015244508428": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "4282668574670785584": ["convolution_gpu_bfyx_gemm_like",1],
+        "1706927777850488363": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "11559360678008060513": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "18082422341304348326": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "314054598858070952": ["convolution_gpu_bfyx_gemm_like",2],
+        "15628121900226431719": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "14038261392627717712": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "14103112843209793966": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "11192356850081328892": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12672995204641007004": ["convolution_gpu_bfyx_os_iyx_osv16",608],
+        "7113777272518482528": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "6233612563637601101": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "11948858355027908365": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11334122788337402526": ["convolution_gpu_bfyx_1x1",2],
+        "397770940444464146": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "17427036330773218054": ["convolution_gpu_bfyx_os_iyx_osv16",677],
+        "2844746478867668588": ["convolution_gpu_bfyx_gemm_like",2],
+        "7009735776703529573": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "7843498978148810586": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "13613399861925108148": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "3409255127071376537": ["convolution_gpu_bfyx_gemm_like",2],
+        "2242829490403202087": ["convolution_gpu_bfyx_gemm_like",1],
+        "11929531534620071758": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "6443517114667332732": ["convolution_gpu_bfyx_os_iyx_osv16",548],
+        "12782191856884962803": ["convolution_gpu_bfyx_gemm_like",2],
+        "10019470094545733255": ["convolution_gpu_bfyx_gemm_like",0],
+        "11060822686394981344": ["convolution_gpu_bfyx_gemm_like",1],
+        "488298169768725160": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "7368916076070115064": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "4006884370026272807": ["convolution_gpu_bfyx_gemm_like",2],
+        "16425665058951535484": ["convolution_gpu_bfyx_os_iyx_osv16",609],
+        "7824524940405130010": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "2800949804770763798": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7667898603371717971": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "2797723586312707948": ["convolution_gpu_bfyx_gemm_like",2],
+        "7602222004475424358": ["convolution_gpu_bfyx_gemm_like",1],
+        "9090828337597312855": ["convolution_gpu_bfyx_gemm_like",2],
+        "12353956380178079089": ["convolution_gpu_bfyx_gemm_like",2],
+        "12557015880639217508": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "12936220888307335332": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "11622925573287101001": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2850279308978256234": ["convolution_gpu_bfyx_gemm_like",2],
+        "1451466106918423837": ["convolution_gpu_bfyx_os_iyx_osv16",803],
+        "5893940382830835820": ["convolution_gpu_bfyx_os_iyx_osv16",358],
+        "6688522645556262131": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",2],
+        "15838113905712517735": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "17489680436564779197": ["convolution_gpu_bfyx_os_iyx_osv16",750],
+        "7998930863626763670": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "1626430741965136732": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "5796500397424307442": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "18174857480705846286": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "659846949368492111": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "5415319660821122528": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "1653274345637156919": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "18424400171776141118": ["convolution_gpu_bfyx_gemm_like",1],
+        "11806105193035393795": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "12181607120522804433": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "7726714223809300966": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "10546430708947911124": ["convolution_gpu_bfyx_gemm_like",1],
+        "12995903177757437362": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "101401523793806394": ["convolution_gpu_bfyx_gemm_like",2],
+        "12680339228267704518": ["convolution_gpu_bfyx_os_iyx_osv16",121],
+        "12069726772532946193": ["convolution_gpu_bfyx_os_iyx_osv16",509],
+        "16723478941106779069": ["convolution_gpu_bfyx_os_iyx_osv16",654],
+        "4856470441452830056": ["convolution_gpu_bfyx_gemm_like",2],
+        "13960388312976163971": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2802810524370514276": ["convolution_gpu_bfyx_gemm_like",2],
+        "1471837664358450291": ["convolution_gpu_bfyx_gemm_like",2],
+        "13434576226708227155": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "10991423760161409883": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "15378025640603637387": ["convolution_gpu_bfyx_gemm_like",2],
+        "13809330759308309353": ["convolution_gpu_bfyx_gemm_like",1],
+        "13607830451968188080": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "4466647043226271996": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "2582625260054352916": ["convolution_gpu_bfyx_gemm_like",2],
+        "15650839696475698676": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "5994204139128667921": ["convolution_gpu_bfyx_os_iyx_osv16",49],
+        "3715177305271762194": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "16011429608661242565": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "4747159205186229582": ["convolution_gpu_bfyx_os_iyx_osv16",484],
+        "1711220333751274603": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "4378422094110940766": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16865879032845300007": ["convolution_gpu_bfyx_os_iyx_osv16",421],
+        "4239415134522959352": ["convolution_gpu_bfyx_gemm_like",2],
+        "11155444222714959508": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "14990645740260870030": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "6003409324516527726": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "3779229442395464456": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13734043898517059207": ["convolution_gpu_bfyx_gemm_like",0],
+        "1791615587935799399": ["convolution_gpu_bfyx_os_iyx_osv16",427],
+        "7962991673727743706": ["convolution_gpu_bfyx_os_iyx_osv16",2],
+        "5963901433137582265": ["convolution_gpu_bfyx_gemm_like",2],
+        "9213886570531053949": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15322019609805777935": ["convolution_gpu_bfyx_os_iyx_osv16",944],
+        "2041212737963974230": ["convolution_gpu_bfyx_gemm_like",2],
+        "156456996459945842": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "13919204232414535363": ["convolution_gpu_bfyx_os_iyx_osv16",372],
+        "11744368351982723504": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "2543041530639980505": ["convolution_gpu_bfyx_os_iyx_osv16",292],
+        "16833854122884184025": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16936366288366370882": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "9131183544020825260": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "16881283637687482989": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "12617625046664709483": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "6949539207944972855": ["convolution_gpu_bfyx_gemm_like",2],
+        "9285566577169147378": ["convolution_gpu_bfyx_os_iyx_osv16",943],
+        "9714508918051740792": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3797957937905580811": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "9399994156762372761": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "9378269524012289175": ["convolution_gpu_bfyx_gemm_like",2],
+        "1939140810847988694": ["convolution_gpu_bfyx_gemm_like",1],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",2],
+        "1157947252370351851": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "1885075753696445410": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "60509335250891515": ["convolution_gpu_bfyx_gemm_like",1],
+        "11883485911218628865": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "14381420852659789698": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "6146876760962332928": ["convolution_gpu_bfyx_gemm_like",2],
+        "9300767936311837876": ["convolution_gpu_bfyx_gemm_like",0],
+        "6343888265369366589": ["convolution_gpu_bfyx_os_iyx_osv16",944],
+        "7307271009495440764": ["convolution_gpu_bfyx_os_iyx_osv16",339],
+        "9810904714798127155": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "5420766967862917815": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "8107447526839063293": ["convolution_gpu_bfyx_os_iyx_osv16",396],
+        "5275016494706355806": ["convolution_gpu_bfyx_os_iyx_osv16",540],
+        "4720851194954041037": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "4983880246908724272": ["convolution_gpu_bfyx_os_iyx_osv16",1016],
+        "16120988958246503683": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "4792351255949877935": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "2307310127637739872": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "2114232149447438823": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "10789133352712755945": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11077503608116183709": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "1237262535285717993": ["convolution_gpu_bfyx_os_iyx_osv16",272],
+        "3928266232090746643": ["convolution_gpu_bfyx_os_iyx_osv16",794],
+        "4149728557142033774": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "8090497202997192142": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "3737576893817599311": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "15962137123591591534": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "15352245788978088971": ["convolution_gpu_bfyx_os_iyx_osv16",1094],
+        "4793007249026943006": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6008613375871089139": ["convolution_gpu_bfyx_os_iyx_osv16",5],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "1364546124782880196": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "1435153323458789173": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "18199526506796726885": ["convolution_gpu_bfyx_os_iyx_osv16",241],
+        "1890739204389692970": ["convolution_gpu_bfyx_os_iyx_osv16",4],
+        "18084635102736402756": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "7272538316511343863": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "12643423612381102003": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "8434794604559592624": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "4138968242532400395": ["convolution_gpu_bfyx_gemm_like",1],
+        "11599932445375240727": ["convolution_gpu_bfyx_os_iyx_osv16",568],
+        "15193403354218116460": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13569941893504840630": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "8275277322582733101": ["convolution_gpu_bfyx_os_iyx_osv16",424],
+        "5374664689223295796": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "6048964584602891448": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "17264608538692763688": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2715447739580688669": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3950738240651133849": ["convolution_gpu_bfyx_os_iyx_osv16",1112],
+        "18005721959893562716": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "8700574100180128776": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "12427258337646070422": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "8787816339967963727": ["convolution_gpu_bfyx_os_iyx_osv16",242],
+        "11655994466278963438": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "17724604495865223459": ["convolution_gpu_bfyx_gemm_like",2],
+        "17922279129043570176": ["convolution_gpu_bfyx_os_iyx_osv16",985],
+        "6323083153920795679": ["convolution_gpu_bfyx_os_iyx_osv16",239],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",2],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",2],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "6928136130626403937": ["convolution_gpu_bfyx_gemm_like",2],
+        "4356806313729405658": ["convolution_gpu_bfyx_gemm_like",2],
+        "11469881811044037340": ["convolution_gpu_bfyx_os_iyx_osv16",798],
+        "818998169319147148": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5570311824197099845": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "17877776363798202236": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10947686124973711385": ["convolution_gpu_bfyx_os_iyx_osv16",430],
+        "1372939511728986224": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "7868973874302246233": ["convolution_gpu_bfyx_gemm_like",1],
+        "12515465135362865565": ["convolution_gpu_bfyx_os_iyx_osv16",208],
+        "4492332228252010118": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "1075027491444288875": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "3192332625020432602": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "3689722043202617487": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "12255528292506999241": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "2260718905219541967": ["convolution_gpu_bfyx_gemm_like",1],
+        "12011982029561277581": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "8519354640245415816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14985755375924972050": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "40704767167309552": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "15069906408448814772": ["convolution_gpu_bfyx_os_iyx_osv16",277],
+        "5040730152867713388": ["convolution_gpu_bfyx_gemm_like",1],
+        "15800447082078291243": ["convolution_gpu_bfyx_os_iyx_osv16",722],
+        "13025361884606488732": ["convolution_gpu_bfyx_gemm_like",2],
+        "4356817283284529593": ["convolution_gpu_bfyx_gemm_like",2],
+        "14762599606783897222": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9763310312421884308": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "9057036344533510776": ["convolution_gpu_bfyx_gemm_like",2],
+        "7603319690872333930": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "10756831914332769026": ["convolution_gpu_bfyx_gemm_like",1],
+        "7720939595094113814": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "4200340674281276565": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "3995098494991567714": ["convolution_gpu_bfyx_gemm_like",2],
+        "2983038203471784211": ["convolution_gpu_bfyx_gemm_like",2],
+        "13191096881934434519": ["convolution_gpu_bfyx_gemm_like",2],
+        "1572991986657256775": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "13800760323805415740": ["convolution_gpu_bfyx_gemm_like",0],
+        "5865480930796299143": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "5927467766675317093": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "14571022040013651253": ["convolution_gpu_bfyx_gemm_like",1],
+        "16947969669087411530": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "7947870656736319919": ["convolution_gpu_bfyx_os_iyx_osv16",878],
+        "1003101267609305257": ["convolution_gpu_bfyx_gemm_like",2],
+        "9453100135791813000": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7549378486471456156": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "10237524128771958432": ["convolution_gpu_bfyx_gemm_like",2],
+        "7187734276051878356": ["convolution_gpu_bfyx_gemm_like",2],
+        "9796621763733208035": ["convolution_gpu_bfyx_gemm_like",2],
+        "9642229389394495047": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "7700321970687976931": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "3573490922300056520": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "16065744898134487748": ["convolution_gpu_bfyx_os_iyx_osv16",1058],
+        "3067806959725855130": ["convolution_gpu_bfyx_os_iyx_osv16",895],
+        "17975017633455909321": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "5509395737020858006": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "11666226259183201584": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "16626226341188424071": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "11932770338770247767": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "3534971503826416049": ["convolution_gpu_bfyx_gemm_like",1],
+        "7875272450497189442": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "2659031931257084418": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "15829095120243431195": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "2921118493468368908": ["convolution_gpu_bfyx_gemm_like",1],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",2],
+        "8913823292181409151": ["fully_connected_gpu_fb_io_b8_f8_vload",1],
+        "13537323999534292650": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "7545013298074733778": ["convolution_gpu_bfyx_os_iyx_osv16",176],
+        "5040095338370816349": ["convolution_gpu_bfyx_gemm_like",2],
+        "1367483816197881270": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "17025182465337728023": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "17006655627343469372": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "4917595053453614536": ["convolution_gpu_bfyx_gemm_like",0],
+        "12253049204822930675": ["convolution_gpu_bfyx_gemm_like",1],
+        "10404725818204494388": ["convolution_gpu_bfyx_gemm_like",2],
+        "5609922876429907954": ["convolution_gpu_bfyx_gemm_like",2],
+        "9250410390663336388": ["convolution_gpu_bfyx_gemm_like",1],
+        "17152614235879767116": ["convolution_gpu_bfyx_os_iyx_osv16",610],
+        "12755692101476964677": ["convolution_gpu_bfyx_gemm_like",2],
+        "2809950092498355574": ["convolution_gpu_bfyx_os_iyx_osv16",946],
+        "6214677989814002369": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4915831715914920982": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "6403698142681887543": ["convolution_gpu_bfyx_gemm_like",1],
+        "14599780481362761532": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "12577421746159122264": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "18125732229366977468": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "2862999234347597091": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "11066913713501760080": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "11810221946429451169": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "13272818502368975319": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3835286851569826052": ["convolution_gpu_bfyx_gemm_like",2],
+        "5955575949957198434": ["convolution_gpu_bfyx_gemm_like",1],
+        "3011188207492335920": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "2173867324489962689": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17301887391757619741": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "9860570706348640782": ["convolution_gpu_bfyx_gemm_like",2],
+        "2281119269283845320": ["convolution_gpu_bfyx_os_iyx_osv16",220],
+        "18384657372655350144": ["convolution_gpu_bfyx_os_iyx_osv16",944],
+        "8079376692609682448": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "2597523728660247862": ["convolution_gpu_bfyx_os_iyx_osv16",86],
+        "12635265188475834607": ["convolution_gpu_bfyx_os_iyx_osv16",130],
+        "12725647706191463348": ["convolution_gpu_bfyx_gemm_like",2],
+        "9606639214735570069": ["convolution_gpu_bfyx_gemm_like",2],
+        "9767294641786972359": ["convolution_gpu_bfyx_gemm_like",2],
+        "7314288062932060863": ["convolution_gpu_bfyx_os_iyx_osv16",990],
+        "7356440848422235031": ["convolution_gpu_bfyx_gemm_like",1],
+        "17085927772068621152": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11936419502418995274": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "9763754389347695094": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4291531885506213180": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6280726148869856021": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8585205898894363799": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6490907666077364481": ["convolution_gpu_yxfb_yxio_b16",0],
+        "8645965165922150743": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13426254939418471242": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13077917010686381919": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5958300749101873980": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16184142990117192433": ["convolution_gpu_yxfb_yxio_b16",0],
+        "18148431787172327554": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6709883527730513363": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16768497046700403748": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12867038076564517306": ["convolution_gpu_yxfb_yxio_b16",0],
+        "6902485831441844789": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13705072264927031658": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3286496836813087881": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11888011890096886932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "417352773179383568": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7178866013527118649": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13821224753538037982": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "17811558714592064184": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3571330754519284334": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13408839571805750778": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10015368609444108372": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6822432085522584060": ["convolution_gpu_yxfb_yxio_b16",0],
+        "5802466130040230797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7742126547476513275": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2761862049452027986": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2629918844315184499": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14501815053459103515": ["convolution_gpu_yxfb_yxio_b16",1],
+        "13493119419114659706": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12051398350382954787": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7792512829747836997": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17990326690659802090": ["convolution_gpu_yxfb_yxio_b16",2],
+        "13218298785325404589": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8099100633390626027": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6331794802915121861": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3242391637018676328": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7946262362930618714": ["convolution_gpu_yxfb_yxio_b16",0],
+        "15932838442166411183": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2269140636553245446": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17096735128393723245": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9736684300833719045": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6846760451124717672": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7065244994574625911": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11157773554806649837": ["convolution_gpu_yxfb_yxio_b16",1],
+        "6934241437968723825": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7105219760750474587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12771841901357553928": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5977875644245993099": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8652128863605749877": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3766048787611884529": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10961696014697611547": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "16428789154716792138": ["convolution_gpu_yxfb_yxio_b16",0],
+        "13809046727894108358": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7398158542592530232": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7998455776901877973": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15181987458871339815": ["convolution_gpu_bfyx_os_iyx_osv16",1058],
+        "15311930929656759371": ["convolution_gpu_yxfb_yxio_b16",2],
+        "11761545976388416063": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6546440095044731932": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14497254583210965214": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6126579157025017808": ["convolution_gpu_yxfb_yxio_b16",1],
+        "2135164671985938807": ["convolution_gpu_yxfb_yxio_b16",0],
+        "888110783182849535": ["convolution_gpu_yxfb_yxio_b16",0],
+        "12305397676800089268": ["convolution_gpu_yxfb_yxio_b16",2],
+        "14116275901314596944": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4104679489383377966": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3911736807429733938": ["convolution_gpu_yxfb_yxio_b16",2],
+        "5293502980575652171": ["convolution_gpu_yxfb_yxio_b16",0],
+        "16072525303202287969": ["convolution_gpu_yxfb_yxio_b16",0],
+        "17397600088595751782": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15879385408480411034": ["convolution_gpu_yxfb_yxio_b16",2],
+        "12081698011407453832": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10717031088082350652": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7349168847581850619": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1235864574444794315": ["convolution_gpu_yxfb_yxio_b16",1],
+        "636447309806530300": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8260024340787818709": ["convolution_gpu_yxfb_yxio_b16",0],
+        "11942019076226205097": ["convolution_gpu_yxfb_yxio_b16",1],
+        "848735117501914374": ["convolution_gpu_yxfb_yxio_b16",1],
+        "16516262096533373158": ["convolution_gpu_yxfb_yxio_b16",2],
+        "904355798061005466": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7585777271711713778": ["convolution_gpu_yxfb_yxio_b16",0],
+        "3101748967012684440": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15693204620575485046": ["convolution_gpu_yxfb_yxio_b16",0],
+        "3805667660217578518": ["convolution_gpu_yxfb_yxio_b16",2],
+        "6875055157295709098": ["convolution_gpu_yxfb_yxio_b16",1],
+        "8210092359850191682": ["convolution_gpu_yxfb_yxio_b16",0],
+        "6070612528095353265": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4773482308451190487": ["convolution_gpu_yxfb_yxio_b16",0],
+        "8723078862651154959": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3244803973821375252": ["convolution_gpu_yxfb_yxio_b16",1],
+        "4683320313995550908": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5931972000452008090": ["convolution_gpu_yxfb_yxio_b16",1],
+        "1216021647922150199": ["convolution_gpu_yxfb_yxio_b16",2],
+        "17970424536559595893": ["convolution_gpu_yxfb_yxio_b16",2],
+        "231083216612056805": ["convolution_gpu_yxfb_yxio_b16",1],
+        "3112648799276134590": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12327057172281102984": ["convolution_gpu_yxfb_yxio_b16",0],
+        "7369109502608631066": ["convolution_gpu_yxfb_yxio_b16",1],
+        "7134419022268272901": ["convolution_gpu_yxfb_yxio_b16",0],
+        "2263637493894079492": ["convolution_gpu_yxfb_yxio_b16",1],
+        "5312269140190538942": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9312974578711092131": ["convolution_gpu_yxfb_yxio_b16",0],
+        "18101509783610609787": ["convolution_gpu_yxfb_yxio_b16",2],
+        "18359731130169236059": ["convolution_gpu_yxfb_yxio_b16",0],
+        "13009612703754510124": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10465119306486335226": ["convolution_gpu_yxfb_yxio_b16",0],
+        "6962030848164918578": ["convolution_gpu_bfyx_gemm_like",2],
+        "866962088075892990": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "9234877552798111728": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1564644716020135424": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15873670348742608564": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "259019999386390213": ["convolution_gpu_bfyx_gemm_like",2],
+        "3710413162291194839": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "10059412755080252504": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "7775757657060166345": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "7667210091570135646": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "12826353318487441420": ["convolution_gpu_bfyx_gemm_like",2],
+        "9413263409511666221": ["convolution_gpu_bfyx_os_iyx_osv16",651],
+        "6932559254646823380": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "7724893184016174483": ["convolution_gpu_bfyx_os_iyx_osv16",650],
+        "14682047605098567432": ["convolution_gpu_bfyx_gemm_like",2],
+        "14456272420357730548": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "1739161573487933165": ["convolution_gpu_bfyx_gemm_like",1],
+        "10034746179209540014": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "10432925516327889351": ["convolution_gpu_bfyx_os_iyx_osv16",751],
+        "8977099691399563065": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "10462797712860969072": ["convolution_gpu_bfyx_os_iyx_osv16",264],
+        "7020743056013297476": ["convolution_gpu_bfyx_gemm_like",2],
+        "17939745299931100048": ["convolution_gpu_bfyx_os_iyx_osv16",308],
+        "4054010905884346287": ["convolution_gpu_bfyx_gemm_like",2],
+        "384240534894352154": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "16622402936526588344": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "11795686089670429481": ["convolution_gpu_bfyx_os_iyx_osv16",936],
+        "1213958002895787672": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "12715500118796263683": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "10178462061836778766": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "17924819398394001587": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "6831045740006076251": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "17236135174912837061": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "14084855778741260863": ["convolution_gpu_bfyx_os_iyx_osv16",192],
+        "4959718589070770515": ["convolution_gpu_bfyx_os_iyx_osv16",719],
+        "13337122303005980542": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "13947140171097868740": ["convolution_gpu_bfyx_os_iyx_osv16",987],
+        "1168311873250200110": ["convolution_gpu_bfyx_os_iyx_osv16",905],
+        "5023609284081684300": ["convolution_gpu_bfyx_gemm_like",2],
+        "10159790066948852390": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "17381682740282686038": ["convolution_gpu_bfyx_os_iyx_osv16",649],
+        "2772704069752888874": ["convolution_gpu_bfyx_os_iyx_osv16",277],
+        "12318427976031000768": ["convolution_gpu_bfyx_gemm_like",1],
+        "15891746043846062984": ["convolution_gpu_bfyx_gemm_like",2],
+        "45545661884854912": ["convolution_gpu_bfyx_os_iyx_osv16",1122],
+        "11102920976866402928": ["convolution_gpu_bfyx_os_iyx_osv16",951],
+        "15737508945513376813": ["convolution_gpu_bfyx_os_iyx_osv16",1031],
+        "17869697579874327192": ["convolution_gpu_bfyx_os_iyx_osv16",950],
+        "14674266217397415571": ["convolution_gpu_bfyx_gemm_like",2],
+        "7813041847979170166": ["convolution_gpu_bfyx_gemm_like",2],
+        "1962479636209947761": ["convolution_gpu_bfyx_os_iyx_osv16",674],
+        "11539652577193034099": ["convolution_gpu_bfyx_gemm_like",1],
+        "13140527131098422428": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "498420237272375425": ["convolution_gpu_bfyx_gemm_like",2],
+        "779633618375662086": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "7344363094493575878": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "4186957909762095019": ["convolution_gpu_bfyx_gemm_like",2],
+        "2705394837952559308": ["convolution_gpu_bfyx_os_iyx_osv16",1030],
+        "17902799955139047426": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "7831542641855749925": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "14010642743400284761": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11868789283464117390": ["convolution_gpu_bfyx_os_iyx_osv16",1055],
+        "3598116387801985039": ["convolution_gpu_bfyx_os_iyx_osv16",370],
+        "5461980510262646821": ["convolution_gpu_bfyx_gemm_like",2],
+        "10809330882739297269": ["convolution_gpu_bfyx_os_iyx_osv16",640],
+        "15052127817178941719": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "8616175124735896626": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "12175297963550750804": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "2343921093633784755": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "17358462939783262207": ["convolution_gpu_bfyx_os_iyx_osv16",516],
+        "17406383217119217230": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "15365628642332393565": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "6638761803107874904": ["convolution_gpu_bfyx_os_iyx_osv16",513],
+        "1630585964216121575": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "16705941191876956548": ["convolution_gpu_bfyx_os_iyx_osv16",512],
+        "13395074742046717601": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "12659539044474018256": ["convolution_gpu_bfyx_os_iyx_osv16",143],
+        "1557549837620967530": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "14322754320861242412": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "11369389082421346630": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "4986977887030495943": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "11962541545116807979": ["convolution_gpu_bfyx_os_iyx_osv16",543],
+        "8730097760819044515": ["convolution_gpu_bfyx_os_iyx_osv16",378],
+        "11882021989615795558": ["convolution_gpu_bfyx_os_iyx_osv16",378],
+        "16780457022162749898": ["convolution_gpu_bfyx_gemm_like",2],
+        "17140702790441856730": ["convolution_gpu_bfyx_gemm_like",2],
+        "2578325663193624576": ["convolution_gpu_yxfb_yxio_b16",2],
+        "8784358107340738205": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2955459120402821540": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2840794055129352139": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7104266560248570112": ["convolution_gpu_yxfb_yxio_b16",0],
+        "11113125355390956764": ["convolution_gpu_yxfb_yxio_b16",1],
+        "9127827617126714860": ["fully_connected_gpu_yxfb_ref",0],
+        "15148442194461613102": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "13520876347177213888": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "241656278218999298": ["convolution_gpu_yxfb_yxio_b16",0],
+        "2164314506903530487": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15985980444340490463": ["convolution_gpu_yxfb_yxio_b16",0],
+        "5284456216115118110": ["convolution_gpu_yxfb_yxio_b16",1],
+        "466744273945239777": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7099035779223341587": ["convolution_gpu_yxfb_yxio_b16",2],
+        "3096280563014331836": ["convolution_gpu_yxfb_yxio_b16",0],
+        "768820004084041271": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15409184364121627414": ["convolution_gpu_yxfb_yxio_b16",0],
+        "4597873630741623918": ["convolution_gpu_yxfb_yxio_b16",1],
+        "11226912053840621089": ["convolution_gpu_yxfb_yxio_b16",0],
+        "18209930746627816139": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15757351352532908153": ["convolution_gpu_bfyx_os_iyx_osv16",1051],
+        "5041922366297242362": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1068155851494601726": ["convolution_gpu_yxfb_yxio_b16",2],
+        "9309173544512377803": ["convolution_gpu_yxfb_yxio_b16",1],
+        "12721294268595880422": ["convolution_gpu_yxfb_yxio_b16",1],
+        "14248622935809594779": ["convolution_gpu_yxfb_yxio_b16",0],
+        "3742751561273931407": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10745099399736462076": ["convolution_gpu_yxfb_yxio_b16",2],
+        "7412772553395852003": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1290180607037086383": ["convolution_gpu_yxfb_yxio_b16",2],
+        "2172999245833525797": ["convolution_gpu_yxfb_yxio_b16",2],
+        "16601230690171340432": ["convolution_gpu_yxfb_yxio_b16",1],
+        "15457040168177954463": ["convolution_gpu_yxfb_yxio_b16",2],
+        "1129349074674368869": ["convolution_gpu_yxfb_yxio_b16",2],
+        "15669242195570440840": ["convolution_gpu_yxfb_yxio_b16",1],
+        "560996739186313493": ["convolution_gpu_yxfb_yxio_b16",1],
+        "10572380563704942622": ["convolution_gpu_yxfb_yxio_b16",0],
+        "2501411300945696806": ["convolution_gpu_yxfb_yxio_b16",2],
+        "4216366893358625960": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "8846314870152404018": ["convolution_gpu_bfyx_gemm_like",2],
+        "15997145184054496085": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5941095082097535176": ["convolution_gpu_bfyx_gemm_like",1],
+        "15281554100135159550": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "430132942408244070": ["convolution_gpu_bfyx_gemm_like",2],
+        "225809055928705881": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11088324811742486481": ["convolution_gpu_bfyx_gemm_like",2],
+        "522313477023837056": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16316483048621486077": ["convolution_gpu_bfyx_gemm_like",2],
+        "14262482011051329729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4824040283449153298": ["convolution_gpu_bfyx_gemm_like",0],
+        "3948843501884284998": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10006197783106691106": ["convolution_gpu_bfyx_gemm_like",0],
+        "2917999294360728537": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7719954202744123391": ["convolution_gpu_bfyx_gemm_like",2],
+        "10399620940700804517": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5735703235236456131": ["convolution_gpu_bfyx_os_iyx_osv16",300],
+        "8768300687476117215": ["convolution_gpu_bfyx_os_iyx_osv16",345],
+        "7815650257256675477": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "13325762052023866627": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2803569867265035123": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "3856976081672275637": ["convolution_gpu_bfyx_gemm_like",0],
+        "8365255170846178102": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "8075261051536686307": ["convolution_gpu_bfyx_os_iyx_osv16",651],
+        "9184275066167601343": ["convolution_gpu_bfyx_os_iyx_osv16",529],
+        "12248852114219058572": ["convolution_gpu_bfyx_gemm_like",1],
+        "17439102502195540957": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "3059575629482816852": ["convolution_gpu_bfyx_os_iyx_osv16",574],
+        "5516518048239364231": ["convolution_gpu_bfyx_os_iyx_osv16",98],
+        "15833461718320604065": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "16828961272295386615": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "886880682650879171": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "11861634536583463947": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "15325852281951905610": ["convolution_gpu_bfyx_os_iyx_osv16",94],
+        "14365232561737454031": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "7498614018449036163": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "15813044197987178947": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "7287107719392705356": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "2058172559199858297": ["convolution_gpu_bfyx_os_iyx_osv16",383
+        ]
+    },
+    "18": {
+        "14650567822254940018": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15726902746983125797": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "5211831143687501130": ["convolution_gpu_bfyx_os_iyx_osv16",316],
+        "1398177377739338750": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7693459946348737411": ["convolution_gpu_bfyx_os_iyx_osv16",693],
+        "12494969618927201911": ["fully_connected_gpu_yxfb_ref",0],
+        "6324565723045697080": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "2057158988261512114": ["convolution_gpu_bfyx_1x1",2],
+        "5758133252959371492": ["convolution_gpu_bfyx_gemm_like",2],
+        "1934379409955686502": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "3438116423688595487": ["convolution_gpu_bfyx_os_iyx_osv16",1026],
+        "15331103261044247142": ["convolution_gpu_bfyx_os_iyx_osv16",943],
+        "18008552719153887303": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "16234606052818596502": ["convolution_gpu_bfyx_os_iyx_osv16",99],
+        "12932635875905153141": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "8922929126299811091": ["convolution_gpu_bfyx_1x1",0],
+        "12245096462203481681": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "12085348936192462321": ["convolution_gpu_bfyx_gemm_like",2],
+        "14878347463243157447": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "7474592508575297101": ["convolution_gpu_bfyx_1x1",1],
+        "7807983899017500046": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1081962464388501987": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13681462437496627948": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "17310332946322628458": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9243949750444156746": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17001502418583498926": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "7953255701516490034": ["convolution_gpu_bfyx_os_iyx_osv16",61],
+        "1230262279011217327": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4282661608732125403": ["convolution_gpu_bfyx_os_iyx_osv16",732],
+        "9389555743403158574": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7139714914586273766": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "7370273921473161914": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "12590922530749026871": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "4185398348055518182": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "5303970743736042689": ["convolution_gpu_bfyx_gemm_like",2],
+        "11507538232733291666": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "10046663998164493552": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "16992405636352406660": ["convolution_gpu_bfyx_gemm_like",1],
+        "13434576226708227155": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "14136097914489095982": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "13105192484434299621": ["convolution_gpu_bfyx_gemm_like",2],
+        "9423958333298993923": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "14412158605670555579": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "8576733135863336233": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "3511588484597779204": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "14353390922580547467": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "7994179151788368291": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "8971115542951085891": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "17179609670678746034": ["convolution_gpu_bfyx_gemm_like",2],
+        "13268525255152984893": ["convolution_gpu_bfyx_os_iyx_osv16",939],
+        "1383899865465106141": ["convolution_gpu_bfyx_gemm_like",2],
+        "11717348577195224554": ["convolution_gpu_bfyx_gemm_like",2],
+        "135072053401934228": ["convolution_gpu_bfyx_1x1",2],
+        "10555597973766215754": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "14799579913711096584": ["convolution_gpu_bfyx_gemm_like",2],
+        "12813978452097969536": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "11669828823444745889": ["convolution_gpu_bfyx_gemm_like",2],
+        "3128856679264648666": ["convolution_gpu_bfyx_gemm_like",2],
+        "15696910741835640150": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "11806402239500046867": ["convolution_gpu_bfyx_os_iyx_osv16",373],
+        "10320711719466983961": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "4958835037528182801": ["convolution_gpu_bfyx_1x1",2],
+        "3727142736386026852": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "9213563311267466388": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2618108630886857741": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9367157746678824712": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14424566003632608852": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18268811652302076976": ["convolution_gpu_bfyx_gemm_like",2],
+        "9100044555742394133": ["convolution_gpu_bfyx_os_iyx_osv16",171],
+        "15790005937034794347": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "13898284586432291433": ["convolution_gpu_bfyx_gemm_like",2],
+        "3106911159524421371": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "792684262493086891": ["convolution_gpu_bfyx_gemm_like",0],
+        "14885031472057965707": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8398910340371320955": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10264913782610095832": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "13898821685774165645": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "14211903923555028634": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "17912189681971987483": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "15284262113150488297": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "12068974703657294908": ["convolution_gpu_bfyx_1x1",0],
+        "16988275131627316108": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "17983556812075120553": ["convolution_gpu_bfyx_1x1",1],
+        "2581414750854621875": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "2964705957088952872": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "6114147683777615071": ["convolution_gpu_bfyx_os_iyx_osv16",988],
+        "12894625941923144893": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "12129572274423886770": ["convolution_gpu_bfyx_os_iyx_osv16",627],
+        "6341197991729122563": ["convolution_gpu_bfyx_os_iyx_osv16",175],
+        "3285520504090196295": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "3796274347773622633": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "12214162812589030126": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "7311120574972466702": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16811402686462277562": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "15493488989417521388": ["convolution_gpu_bfyx_os_iyx_osv16",1112],
+        "18133334552107213128": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "17342198739672369885": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15351724241036614758": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "6631816968511312100": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13603318842632052764": ["convolution_gpu_bfyx_os_iyx_osv16",381],
+        "2727219457659794468": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8101977280003030465": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12782191856884962803": ["convolution_gpu_bfyx_gemm_like",2],
+        "2704063557078535883": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "1559798212423183813": ["convolution_gpu_bfyx_os_iyx_osv16",274],
+        "11814740669468421049": ["convolution_gpu_bfyx_os_iyx_osv16",121],
+        "18270587701371596297": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "4759671642533786591": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "18232278892738147217": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "7727001441358508665": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "3106591708459602370": ["convolution_gpu_bfyx_os_iyx_osv16",942],
+        "2161052921317193579": ["convolution_gpu_bfyx_gemm_like",2],
+        "14883438809987378616": ["convolution_gpu_bfyx_1x1",2],
+        "7590767013583950613": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "16370218798911151331": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "10014448860206587805": ["convolution_gpu_bfyx_gemm_like",2],
+        "5687802882700097624": ["convolution_gpu_bfyx_os_iyx_osv16",1054],
+        "1143214652021653634": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "641417817126876622": ["convolution_gpu_bfyx_gemm_like",2],
+        "13558618754911056302": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "3909551222373722085": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "5592556538784745960": ["convolution_gpu_bfyx_gemm_like",2],
+        "6623182990939010641": ["convolution_gpu_bfyx_gemm_like",1],
+        "11175955260573469979": ["convolution_gpu_bfyx_os_iyx_osv16",104],
+        "15410074937424854348": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "89439319782574517": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "11398019086259011063": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1116274074896622552": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11107930597263802755": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "2100891581797371600": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3522383297921565178": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "18233660940545931789": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "15426960908024585800": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1287490919205560806": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "9285566577169147378": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10572945270796129630": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "288853243482418538": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2421404763191415191": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2103882464623009432": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "5448537627319798272": ["convolution_gpu_bfyx_gemm_like",0],
+        "11657946392097042544": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "2920322372993101148": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16076153317792960383": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "761169277744593430": ["convolution_gpu_bfyx_os_iyx_osv16",105],
+        "385046297070779752": ["convolution_gpu_bfyx_os_iyx_osv16",533],
+        "7407975398526425554": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "534032316469702287": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "6681818065741882453": ["convolution_gpu_bfyx_gemm_like",0],
+        "13468713306678453952": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2806529556090896246": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "15720507574336564201": ["convolution_gpu_bfyx_gemm_like",2],
+        "9533360488591027707": ["fully_connected_gpu_bs_f_bsv16_af8_vload",0],
+        "14082448162400225052": ["convolution_gpu_bfyx_1x1",2],
+        "17726079670612220433": ["convolution_gpu_bfyx_gemm_like",2],
+        "11334122788337402526": ["convolution_gpu_bfyx_1x1",2],
+        "13558687084677943158": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "6297802534570892679": ["convolution_gpu_bfyx_os_iyx_osv16",158],
+        "7187734276051878356": ["convolution_gpu_bfyx_gemm_like",2],
+        "7868973874302246233": ["convolution_gpu_bfyx_gemm_like",2],
+        "2814805887448339818": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "3002986032379998259": ["convolution_gpu_bfyx_os_iyx_osv16",909],
+        "5597908143491399643": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "17209528805596238905": ["convolution_gpu_bfyx_gemm_like",2],
+        "2968094709908141988": ["convolution_gpu_bfyx_os_iyx_osv16",396],
+        "12796777049340516563": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "1126499865206906037": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "5538883245745495145": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "14070988879848388270": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "12421707187947291166": ["convolution_gpu_bfyx_gemm_like",1],
+        "5385316497510064491": ["fully_connected_gpu_fb_oi_ref",2],
+        "8300655194765375060": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "11130439225010714550": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "13503688893307029975": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16044646335477470657": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9406763539724266157": ["convolution_gpu_bfyx_1x1",0],
+        "3438296636411972401": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10987953316324712538": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "13104509059416300615": ["convolution_gpu_bfyx_os_iyx_osv16",296],
+        "1208161922424418734": ["convolution_gpu_bfyx_gemm_like",2],
+        "12793908914872030220": ["convolution_gpu_bfyx_gemm_like",2],
+        "2566302789609970663": ["convolution_gpu_bfyx_os_iyx_osv16",815],
+        "10765280349477640969": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "17596685300497748803": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "16103943009195163681": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "15249442550355454201": ["convolution_gpu_bfyx_gemm_like",2],
+        "7964396197946740183": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "17823133607491820214": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "15522785615618973614": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "8761283252495354972": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "11897113890115321056": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "12949204491386872217": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "8133587696326295326": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1036010477232750453": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "724953082687879224": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "182115051096556835": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "11723735945517472199": ["convolution_gpu_bfyx_os_iyx_osv16",668],
+        "265124365266629363": ["convolution_gpu_bfyx_os_iyx_osv16",246],
+        "18269685060032395235": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "5010119207726811326": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3976736548270395981": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "1628593159980574595": ["convolution_gpu_bfyx_gemm_like",1],
+        "187352687850707150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13450061819089402572": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "953306082374100275": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "7393601059996816014": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12136029303893296753": ["convolution_gpu_bfyx_os_iyx_osv16",501],
+        "13410850301164057911": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "12977678792503377525": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10280619408766255552": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "6928835003016610382": ["convolution_gpu_bfyx_os_iyx_osv16",1000],
+        "10425889533411573166": ["convolution_gpu_bfyx_gemm_like",2],
+        "16995873636564597028": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "13364676690016875118": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "7009735776703529573": ["convolution_gpu_bfyx_os_iyx_osv16",968],
+        "7354234812009979811": ["convolution_gpu_bfyx_os_iyx_osv16",859],
+        "14757749560543979231": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "3336076058264596420": ["convolution_gpu_bfyx_gemm_like",2],
+        "7209217811135076623": ["convolution_gpu_bfyx_gemm_like",2],
+        "12071914115316550349": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "7903891232234389925": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7650375560336513366": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "13649894122307008732": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "10607904718265020949": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "10404725818204494388": ["convolution_gpu_bfyx_gemm_like",1],
+        "16327433707667075261": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10128120599276549920": ["convolution_gpu_bfyx_1x1",2],
+        "863952266514375915": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "10544411879329675593": ["convolution_gpu_bfyx_os_iyx_osv16",382],
+        "8203171222962341018": ["convolution_gpu_bfyx_gemm_like",2],
+        "17796310681498690253": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "12908594497114706897": ["convolution_gpu_bfyx_1x1",1],
+        "13781423818051299677": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "4717620775314557374": ["convolution_gpu_bfyx_gemm_like",2],
+        "4764776977138392550": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "12801481303602178879": ["convolution_gpu_bfyx_gemm_like",2],
+        "9216608098626790565": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "1028160614515220430": ["convolution_gpu_bfyx_os_iyx_osv16",216],
+        "9589942627115344216": ["convolution_gpu_bfyx_os_iyx_osv16",851],
+        "17790026124881397912": ["fully_connected_gpu_yxfb_ref",2],
+        "10279778381617181802": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13853056718266488510": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "4299492266819967844": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "2490155559809645659": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "15190508870639648203": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3883845471211207871": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "7995820969034996638": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "15271783562528081169": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "742689192890486807": ["convolution_gpu_bfyx_gemm_like",2],
+        "13853630125050609175": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "14711697456265712456": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "2221145174704245189": ["convolution_gpu_bfyx_gemm_like",2],
+        "13038533272699602337": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "6345550009198921347": ["convolution_gpu_bfyx_os_iyx_osv16",998],
+        "3715177305271762194": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "9270950131920019932": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "14150012830816329527": ["convolution_gpu_bfyx_gemm_like",2],
+        "15817443774186015593": ["convolution_gpu_bfyx_1x1",0],
+        "15126660425728872065": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "14484890926084856480": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "15488550074426713959": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "592245952014430043": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "2116913943188857359": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "6860503758000008398": ["convolution_gpu_bfyx_os_iyx_osv16",832],
+        "16027853590391209100": ["convolution_gpu_bfyx_gemm_like",2],
+        "14088382963493477342": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "9019388470685749691": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "5078905972285278557": ["convolution_gpu_bfyx_gemm_like",2],
+        "1500571771538985941": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "11158789938857558596": ["convolution_gpu_bfyx_1x1",0],
+        "14387756025635589673": ["convolution_gpu_bfyx_1x1",0],
+        "4085907608404305515": ["convolution_gpu_bfyx_gemm_like",0],
+        "3782239800777370325": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "9481675228591993785": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "16511393582666965704": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "11970881115757095265": ["convolution_gpu_bfyx_os_iyx_osv16",693],
+        "11810221946429451169": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14691372262153587653": ["convolution_gpu_bfyx_os_iyx_osv16",1059],
+        "1306339989221885682": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "994489782629179836": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "318377908569897093": ["convolution_gpu_bfyx_gemm_like",2],
+        "3281207855459771997": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "3819990462129075757": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "12512751736409465214": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2030309697153345387": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "12534001599784153836": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "12181607120522804433": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "7806129039150321333": ["convolution_gpu_bfyx_gemm_like",2],
+        "13477416097954638887": ["fully_connected_gpu_bf_io_gemm",1],
+        "13489318651148001664": ["convolution_gpu_bfyx_gemm_like",2],
+        "17306482303091342504": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "7744787957569714828": ["convolution_gpu_bfyx_os_iyx_osv16",1096],
+        "16667887002111125871": ["convolution_gpu_bfyx_gemm_like",2],
+        "3499243120652875549": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "3062101811226530720": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "10672380526821947133": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "18037918102910297531": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "10816637153861630723": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "6614833247756539341": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "4347816192417741558": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "5042176052323856983": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18400379759523099542": ["convolution_gpu_bfyx_gemm_like",2],
+        "2095802691829304676": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "2173720698351153121": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "7130694811424715594": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "17638692805430115529": ["convolution_gpu_bfyx_gemm_like",2],
+        "4056971751486746551": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7549378486471456156": ["convolution_gpu_bfyx_os_iyx_osv16",200],
+        "13025361884606488732": ["convolution_gpu_bfyx_gemm_like",1],
+        "1299545313185409227": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "6942049339361951275": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "3001615302961701154": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4994591211723226974": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "3621424752591567930": ["convolution_gpu_bfyx_gemm_like",2],
+        "17381516856910544374": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "4670443882075998209": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "6634330132674952638": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "9819596940685093690": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "7164580481046523192": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "840202264034382558": ["convolution_gpu_bfyx_os_iyx_osv16",843],
+        "10784073615329190425": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "15881381297320383917": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "4623542918584461522": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9868561386826862471": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "1450888744802985214": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "12051595062513871723": ["convolution_gpu_bfyx_1x1",0],
+        "4216958486055161753": ["convolution_gpu_bfyx_gemm_like",1],
+        "17881905640473324965": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7056030150365552588": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "9860570706348640782": ["convolution_gpu_bfyx_gemm_like",0],
+        "8159303545761286685": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "17542176922797334839": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "5197105253412476591": ["convolution_gpu_bfyx_gemm_like",2],
+        "17729546848373991614": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "875296362957469305": ["convolution_gpu_bfyx_gemm_like",2],
+        "11754316727756881612": ["convolution_gpu_bfyx_gemm_like",1],
+        "2287356884312581209": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "13754408679115174221": ["convolution_gpu_bfyx_gemm_like",2],
+        "10892456883214928095": ["convolution_gpu_bfyx_os_iyx_osv16",565],
+        "12601126285773042005": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "17791024851737594885": ["convolution_gpu_bfyx_1x1",0],
+        "2524029454785583409": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "5482851829165191681": ["convolution_gpu_bfyx_os_iyx_osv16",644],
+        "8236018377815149638": ["convolution_gpu_bfyx_os_iyx_osv16",247],
+        "2044363708106765326": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "488298169768725160": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "2984726467649419856": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "17163158934005653629": ["convolution_gpu_bfyx_os_iyx_osv16",505],
+        "7172604084103519563": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "15112599407339712681": ["convolution_gpu_bfyx_1x1",1],
+        "17928043901784474130": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "2915165824085219545": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "15447513376965243034": ["convolution_gpu_bfyx_os_iyx_osv16",57],
+        "875400109066360897": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "1418595171949196661": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "16611452077660879545": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "16431165572426232677": ["convolution_gpu_bfyx_os_iyx_osv16",434],
+        "12015336418727455195": ["convolution_gpu_bfyx_1x1",0],
+        "13613399861925108148": ["convolution_gpu_bfyx_os_iyx_osv16",501],
+        "13059207969254830451": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "6644418194983229139": ["convolution_gpu_bfyx_gemm_like",2],
+        "4795705973706796563": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "2625969259447793593": ["convolution_gpu_bfyx_1x1",2],
+        "10016815108730511683": ["convolution_gpu_bfyx_gemm_like",2],
+        "10090923790949378407": ["convolution_gpu_bfyx_gemm_like",2],
+        "2527189070714658176": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "13611054146745413536": ["convolution_gpu_bfyx_gemm_like",2],
+        "2777318471329665162": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "1354647381212852890": ["convolution_gpu_bfyx_1x1",1],
+        "11192356850081328892": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "3633858263279042265": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "15805087418686802636": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "9205978149692979955": ["convolution_gpu_bfyx_gemm_like",2],
+        "30229601562833524": ["convolution_gpu_bfyx_gemm_like",2],
+        "17839839336294937155": ["convolution_gpu_bfyx_gemm_like",1],
+        "4867937397499803072": ["convolution_gpu_bfyx_gemm_like",2],
+        "6075691042233712335": ["convolution_gpu_bfyx_gemm_like",2],
+        "11031358859656806724": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "16884228931101540030": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "2373860353284525265": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "12802517759474139810": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "1518270620354036926": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "10323345824599612614": ["convolution_gpu_bfyx_gemm_like",1],
+        "12278364834477923930": ["convolution_gpu_bfyx_gemm_like",2],
+        "5039037192630609823": ["convolution_gpu_bfyx_gemm_like",2],
+        "3835286851569826052": ["convolution_gpu_bfyx_gemm_like",2],
+        "7875724726741958520": ["fully_connected_gpu_fb_oi_ref",1],
+        "3441335188113424896": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "6067904130482758510": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "528295119724008711": ["convolution_gpu_bfyx_os_iyx_osv16",508],
+        "17208186152576814861": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "9328223957245552723": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "577182964135927041": ["convolution_gpu_bfyx_os_iyx_osv16",35],
+        "8040001390872143271": ["convolution_gpu_bfyx_gemm_like",2],
+        "3160543867929843861": ["convolution_gpu_bfyx_1x1",0],
+        "586947787345351152": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1122856374602590533": ["convolution_gpu_bfyx_1x1",0],
+        "15287650965861631130": ["convolution_gpu_bfyx_gemm_like",2],
+        "15123868617509445149": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "16542318967217020315": ["convolution_gpu_bfyx_gemm_like",2],
+        "7808544677773370430": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7969441643457570812": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "8907982643256296667": ["convolution_gpu_bfyx_1x1",1],
+        "5479761740065152589": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "3114869763557037270": ["fully_connected_gpu_bfyx_ref",2],
+        "2294026590516781945": ["convolution_gpu_bfyx_gemm_like",2],
+        "1249137685908951501": ["convolution_gpu_bfyx_gemm_like",1],
+        "2940027113687311893": ["convolution_gpu_bfyx_gemm_like",2],
+        "15943141845766932879": ["convolution_gpu_bfyx_1x1",1],
+        "17775705003104146872": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "403634422724914329": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "18154019240019929225": ["convolution_gpu_bfyx_gemm_like",2],
+        "9319254979377483709": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4424217045094988504": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "6800893510381991731": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "18436249934780056991": ["convolution_gpu_bfyx_gemm_like",2],
+        "8931169575495985034": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "10916647716124396856": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "8296551195150971668": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "3974589991022739479": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "9120377367517042357": ["convolution_gpu_bfyx_1x1",0],
+        "6817494598328071314": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "14289048840489035546": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8951040603784899163": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14046114605615338907": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "11626402549863483301": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "8057302050645780813": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "6711878663358611849": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "7076937538747704750": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "17891499682354369344": ["convolution_gpu_bfyx_gemm_like",2],
+        "537074122417021898": ["convolution_gpu_bfyx_gemm_like",2],
+        "13993548620104010490": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "16781127329510211966": ["convolution_gpu_bfyx_gemm_like",2],
+        "15239764240622554314": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",0],
+        "15746620724134970969": ["convolution_gpu_bfyx_1x1",0],
+        "15163327502374403643": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "15728009639807698634": ["convolution_gpu_bfyx_os_iyx_osv16",594],
+        "4436244774193918646": ["fully_connected_gpu_fb_oi_ref",0],
+        "2683507674615735878": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "10795104632256101599": ["convolution_gpu_bfyx_os_iyx_osv16",214],
+        "2909728331855309274": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "13002363400738122017": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "11007944497812650617": ["convolution_gpu_bfyx_gemm_like",2],
+        "15695415285791951018": ["convolution_gpu_bfyx_gemm_like",0],
+        "14011124615649605281": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "16245760498096322525": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "4124478505694604763": ["convolution_gpu_bfyx_1x1",1],
+        "17219920118109316867": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "17353894529222574441": ["convolution_gpu_bfyx_os_iyx_osv16",551],
+        "15349944413643626251": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "5608133987357542077": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "17739868787095417856": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "9226912483632588371": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9057036344533510776": ["convolution_gpu_bfyx_gemm_like",2],
+        "797387385159110695": ["convolution_gpu_bfyx_gemm_like",2],
+        "13951781924205611716": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "2781309272856442321": ["convolution_gpu_bfyx_1x1",2],
+        "3863816884636503247": ["convolution_gpu_bfyx_gemm_like",2],
+        "4400247897123856252": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "2235210915304938149": ["convolution_gpu_bfyx_gemm_like",2],
+        "15890473622821659630": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "2638131332283395057": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6678796313875454849": ["convolution_gpu_bfyx_gemm_like",2],
+        "4542143431130171516": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "17995371099806008878": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "4750755523645265967": ["convolution_gpu_bfyx_gemm_like",2],
+        "6854611304056079417": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "11709992724966310174": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "11988546375476924356": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "14671212883301405408": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14444475853714164129": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "17830290099875088207": ["convolution_gpu_bfyx_gemm_like",2],
+        "11771014003680394135": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "11988285441493553006": ["convolution_gpu_bfyx_gemm_like",0],
+        "10408322429232132983": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "16577611471466452776": ["convolution_gpu_bfyx_gemm_like",1],
+        "726985753660756762": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "16172528828198474326": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8127190765748950828": ["convolution_gpu_bfyx_os_iyx_osv16",644],
+        "15470013032930986062": ["convolution_gpu_bfyx_os_iyx_osv16",482],
+        "11878734040194151073": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7817036102984218692": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "14046990030104971367": ["convolution_gpu_bfyx_os_iyx_osv16",733],
+        "18059267466971880386": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "12864558900883069118": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13793441296561946357": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "12391792381149655331": ["convolution_gpu_bfyx_gemm_like",2],
+        "5211191663202250117": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "2893564501191050837": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "7706714181281908433": ["convolution_gpu_bfyx_gemm_like",2],
+        "9824678205469832038": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "7603319690872333930": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "11315238071192463859": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8619526128410675593": ["convolution_gpu_bfyx_os_iyx_osv16",1112],
+        "4897991181236908768": ["convolution_gpu_bfyx_gemm_like",2],
+        "11530101016435264783": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "9794456440994218671": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "2653651564133701304": ["convolution_gpu_bfyx_os_iyx_osv16",1097],
+        "4190912926126844643": ["convolution_gpu_bfyx_1x1",1],
+        "17006133396401462698": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "12516911293946682547": ["convolution_gpu_bfyx_os_iyx_osv16",620],
+        "18245935804520236353": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14406070210216948643": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "4885944395876887711": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "7692849839965441330": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "18202222342562516071": ["convolution_gpu_bfyx_os_iyx_osv16",890],
+        "8616686489737649890": ["convolution_gpu_bfyx_os_iyx_osv16",190],
+        "3106922888635965020": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8995598177504756805": ["convolution_gpu_bfyx_os_iyx_osv16",855],
+        "3063055767192991776": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "4220826666482500445": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "17108987360340581555": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "9541630719145326121": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "12843671306854567956": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "509781001842353609": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "14823616678465136590": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "12644942072153919043": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "8739347545059610410": ["convolution_gpu_bfyx_gemm_like",2],
+        "2371412124305478965": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12790570304622911607": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "7351733901977025859": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "18128162750557822655": ["convolution_gpu_bfyx_os_iyx_osv16",110],
+        "16710010075465723498": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "6650607472019166205": ["convolution_gpu_bfyx_1x1",1],
+        "8916983923551808409": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "15188570678726970998": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16871004845988227014": ["convolution_gpu_bfyx_1x1",1],
+        "8857763129101380288": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "7430073011895298582": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7181154048972884375": ["convolution_gpu_bfyx_gemm_like",2],
+        "18357544235608006954": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "16084700435355748612": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "8818070832398055086": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15831600396403741571": ["convolution_gpu_bfyx_gemm_like",2],
+        "8075180350084516696": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14898892437285105327": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "10022487076451608714": ["convolution_gpu_bfyx_os_iyx_osv16",695],
+        "17921973525603585874": ["convolution_gpu_bfyx_gemm_like",1],
+        "11311859068168414878": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "10509933181132310969": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11682323163346544125": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "15773157615731010456": ["convolution_gpu_bfyx_os_iyx_osv16",529],
+        "18126685473408206840": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "16720108310653948550": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15223164574152266895": ["convolution_gpu_bfyx_1x1",2],
+        "8881135571874888085": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "10308113903347312964": ["convolution_gpu_bfyx_gemm_like",2],
+        "5091558853871982858": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "5738835498104275267": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "10930640103080573253": ["convolution_gpu_bfyx_1x1",1],
+        "5609922876429907954": ["convolution_gpu_bfyx_gemm_like",0],
+        "15800447082078291243": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "14184895905338394239": ["convolution_gpu_bfyx_os_iyx_osv16",960],
+        "13842309033760176194": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2133849627845285277": ["convolution_gpu_bfyx_os_iyx_osv16",802],
+        "4801117903303888658": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "10890975553758439233": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "3646228701104397128": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "13145474177271090694": ["convolution_gpu_bfyx_gemm_like",2],
+        "2995134938466176198": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12134858519320245809": ["convolution_gpu_bfyx_1x1",2],
+        "18235209540858013173": ["convolution_gpu_bfyx_1x1",0],
+        "11083993858285515074": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "13710319251108632115": ["convolution_gpu_bfyx_1x1",1],
+        "15494543914974994991": ["convolution_gpu_bfyx_gemm_like",2],
+        "4479979951990338510": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "1920042803083729276": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "16589607587365212240": ["convolution_gpu_bfyx_gemm_like",2],
+        "7678457226823073886": ["convolution_gpu_bfyx_os_iyx_osv16",187],
+        "3934290309368153435": ["fully_connected_gpu_bf_io_gemm",0],
+        "10151922632636937118": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12566041126392848976": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "14956246091163580499": ["convolution_gpu_bfyx_os_iyx_osv16",480],
+        "9028970753877215614": ["convolution_gpu_bfyx_os_iyx_osv16",861],
+        "7005509036795164602": ["convolution_gpu_bfyx_1x1",0],
+        "9918371346247634545": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "3814584042139408454": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "12177387334053203378": ["convolution_gpu_bfyx_gemm_like",2],
+        "5485749317130402302": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "13471752029049484143": ["convolution_gpu_bfyx_gemm_like",2],
+        "7368916076070115064": ["convolution_gpu_bfyx_os_iyx_osv16",1002],
+        "1044978617045366709": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "14621327324047759584": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "8961138963663532667": ["convolution_gpu_bfyx_gemm_like",1],
+        "9316082753126682958": ["convolution_gpu_bfyx_gemm_like",1],
+        "9058996149754556268": ["convolution_gpu_bfyx_os_iyx_osv16",673],
+        "7880845322716481548": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "14990645740260870030": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "6219075471508685758": ["convolution_gpu_bfyx_gemm_like",0],
+        "16208488491972128275": ["convolution_gpu_bfyx_os_iyx_osv16",814],
+        "6329618009202266591": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10486348549691280032": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "11872943152839631823": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "8560635685184432720": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "3873183249402084406": ["convolution_gpu_bfyx_gemm_like",2],
+        "11418379777288974452": ["convolution_gpu_bfyx_gemm_like",0],
+        "5469227748156438008": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "394778201589371681": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "13699740641705514374": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "16955653765071712611": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1875764913306932583": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "18423051691107460439": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "7082007579524697455": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "3806761527342944195": ["convolution_gpu_bfyx_gemm_like",0],
+        "16243196137456624852": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "7683334381958571864": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "2406816735581074778": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11856266545854830143": ["convolution_gpu_bfyx_gemm_like",2],
+        "16881283637687482989": ["convolution_gpu_bfyx_os_iyx_osv16",458],
+        "8762901342272872498": ["convolution_gpu_bfyx_os_iyx_osv16",62],
+        "13602140021189675477": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "282274448389888221": ["convolution_gpu_bfyx_os_iyx_osv16",659],
+        "15641537661939240413": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "14896875712028630045": ["convolution_gpu_bfyx_os_iyx_osv16",1057],
+        "12427258337646070422": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16131448347558322280": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "677249604491773387": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16710651492402564794": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9525853014023664813": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "11169292427557543138": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "7338229552985076723": ["convolution_gpu_bfyx_gemm_like",2],
+        "18137106379929135901": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "13121297281694293907": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "10499265278415026816": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11062100629646715785": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "579781312141502576": ["convolution_gpu_bfyx_1x1",2],
+        "7962991673727743706": ["convolution_gpu_bfyx_os_iyx_osv16",766],
+        "15908673392788376468": ["convolution_gpu_bfyx_os_iyx_osv16",1004],
+        "9462315044265139531": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "397770940444464146": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "15814015810740458605": ["convolution_gpu_bfyx_1x1",0],
+        "10290107543739998181": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "4142978475842207311": ["convolution_gpu_bfyx_gemm_like",2],
+        "12190841837604350271": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "1778345646142852816": ["convolution_gpu_bfyx_gemm_like",2],
+        "1697248235682953135": ["convolution_gpu_bfyx_gemm_like",2],
+        "3041752019114501584": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "13698389420396031586": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "12517838703662330663": ["convolution_gpu_bfyx_os_iyx_osv16",15],
+        "5319668297345215520": ["convolution_gpu_bfyx_gemm_like",1],
+        "6213386558868267629": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "9513032457323269513": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "15851356529373376076": ["convolution_gpu_bfyx_os_iyx_osv16",403],
+        "1212319037405620223": ["convolution_gpu_bfyx_gemm_like",2],
+        "1972879521448306536": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "12626014184575881530": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "3138374672801504481": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "14680730265621679042": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "10118395047539851751": ["convolution_gpu_bfyx_gemm_like",2],
+        "13954821927253849036": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "7852144838267007144": ["convolution_gpu_bfyx_os_iyx_osv16",833],
+        "14335423820860953927": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11690334177981352452": ["convolution_gpu_bfyx_os_iyx_osv16",13],
+        "12213354854947437262": ["convolution_gpu_bfyx_1x1",1],
+        "11829442945690098558": ["convolution_gpu_bfyx_gemm_like",0],
+        "8609939102588915855": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "9076758673133996959": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "16036386660666696362": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "13609660900720370993": ["convolution_gpu_bfyx_1x1",2],
+        "1318571118468536310": ["convolution_gpu_bfyx_os_iyx_osv16",365],
+        "7264274394359484318": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "9213886570531053949": ["convolution_gpu_bfyx_os_iyx_osv16",801],
+        "18186615266760475767": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "9152451371616153112": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6843617687528352801": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "11113256687741667688": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13776178598632392721": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "3374410641320310726": ["convolution_gpu_bfyx_os_iyx_osv16",911],
+        "8594644182487917002": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "7009873605945341897": ["convolution_gpu_bfyx_gemm_like",2],
+        "10001963042016663554": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "8792010676469476740": ["convolution_gpu_bfyx_gemm_like",2],
+        "13282951481330978659": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "15471470494305051299": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17427036330773218054": ["convolution_gpu_bfyx_os_iyx_osv16",291],
+        "3534971503826416049": ["convolution_gpu_bfyx_os_iyx_osv16",736],
+        "11066913713501760080": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "2379484884827231127": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "17536308070854915513": ["convolution_gpu_bfyx_1x1",1],
+        "3644282167178264526": ["convolution_gpu_bfyx_gemm_like",2],
+        "14204609663091442879": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "10782611933832492335": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "1059505639883914386": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "13804221028705631415": ["convolution_gpu_bfyx_gemm_like",2],
+        "10724501418439612080": ["convolution_gpu_bfyx_os_iyx_osv16",1001],
+        "9373353053843326128": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "17123463568694499533": ["convolution_gpu_bfyx_gemm_like",2],
+        "7852745450437172519": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "17408275657360833363": ["convolution_gpu_bfyx_1x1",0],
+        "8689206546467098603": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "3800011935243649447": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "7481256533438761028": ["convolution_gpu_bfyx_os_iyx_osv16",1067],
+        "16146350476627599543": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "1841155673858789206": ["fully_connected_gpu_fb_oi_ref",2],
+        "16067605128297748820": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18084635102736402756": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "15003778740401601065": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2283157145557154450": ["convolution_gpu_bfyx_1x1",0],
+        "3463959257726925426": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "2908249767551054613": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "8767817856303586064": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "11020315012951440351": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "7877332346656934022": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "12141300895511301068": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "14826791706471872785": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2242915551775617989": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "9798585825695496550": ["convolution_gpu_bfyx_gemm_like",2],
+        "6101196122606108273": ["convolution_gpu_bfyx_gemm_like",2],
+        "12971822824884826169": ["convolution_gpu_bfyx_gemm_like",2],
+        "15006321421735686121": ["convolution_gpu_bfyx_gemm_like",0],
+        "13488495920546871271": ["convolution_gpu_bfyx_os_iyx_osv16",854],
+        "13163146272900339330": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16711955423531846725": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "14729854278671832528": ["convolution_gpu_bfyx_os_iyx_osv16",236],
+        "2683304757433993300": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "1474271081523145413": ["convolution_gpu_bfyx_gemm_like",2],
+        "15192024816519005250": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "13025323039227543550": ["convolution_gpu_bfyx_os_iyx_osv16",988],
+        "8258382025812748961": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11686670048744589243": ["convolution_gpu_bfyx_gemm_like",2],
+        "314054598858070952": ["convolution_gpu_bfyx_gemm_like",2],
+        "15809639778580769565": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "4202645222013675478": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "8913823292181409151": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "15839295895890205274": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "11565861421381730304": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "805221045541170643": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10650698451740924172": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "490233152678323691": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "14089893422771228191": ["convolution_gpu_bfyx_os_iyx_osv16",52],
+        "12680339228267704518": ["convolution_gpu_bfyx_os_iyx_osv16",120],
+        "14230493618724018658": ["convolution_gpu_bfyx_gemm_like",2],
+        "3860667078458481972": ["convolution_gpu_bfyx_gemm_like",2],
+        "804195263636995800": ["convolution_gpu_bfyx_gemm_like",2],
+        "10682300249493137042": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "18012549942299450620": ["convolution_gpu_bfyx_gemm_like",2],
+        "15217183882858251099": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "5275016494706355806": ["convolution_gpu_bfyx_gemm_like",0],
+        "2567046336192437734": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "5927467766675317093": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "13852065717057446998": ["convolution_gpu_bfyx_gemm_like",2],
+        "15378025640603637387": ["convolution_gpu_bfyx_gemm_like",2],
+        "11529876081402974396": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "8268533335852735248": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "116291934148608396": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "16949056117405140365": ["convolution_gpu_bfyx_os_iyx_osv16",1071],
+        "8251544171504007740": ["convolution_gpu_bfyx_gemm_like",2],
+        "6419580456182610836": ["convolution_gpu_bfyx_os_iyx_osv16",465],
+        "5834245904292669645": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "8132521728369930959": ["convolution_gpu_bfyx_gemm_like",2],
+        "8124736388338424498": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "2780423409483867058": ["convolution_gpu_bfyx_1x1",0],
+        "12098146032672599222": ["convolution_gpu_bfyx_os_iyx_osv16",667],
+        "4747159205186229582": ["convolution_gpu_bfyx_gemm_like",1],
+        "15967614281807823696": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "13486084204140096478": ["convolution_gpu_bfyx_gemm_like",2],
+        "14716719350966652036": ["convolution_gpu_bfyx_gemm_like",2],
+        "6821855018718422278": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "17599383258252980421": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16683485007140805060": ["fully_connected_gpu_fb_io_ref",1],
+        "3286629188347536485": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "18275601715050791851": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "13272818502368975319": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7800015766976654402": ["convolution_gpu_bfyx_gemm_like",2],
+        "13835908664998757647": ["fully_connected_gpu_fb_oi_ref",1],
+        "12394049027081208902": ["convolution_gpu_bfyx_gemm_like",1],
+        "5245308722062496788": ["convolution_gpu_bfyx_os_iyx_osv16",316],
+        "5754844816339228920": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "10797908931694274013": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "13054405729329143152": ["convolution_gpu_bfyx_os_iyx_osv16",459],
+        "1563987925712579649": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "8260130048649729185": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5303170164698694791": ["fully_connected_gpu_bf_io_gemm",2],
+        "12541834857357563605": ["convolution_gpu_bfyx_os_iyx_osv16",884],
+        "7134654288295280046": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "14429081455612806819": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "16749148369456398030": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "7279393739634103483": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "5622089373755094139": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14151747022287993729": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14025496192869856801": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "10436819182310112786": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "2294800960010879540": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "11855070245618904113": ["convolution_gpu_bfyx_os_iyx_osv16",943],
+        "11243840588602365090": ["convolution_gpu_bfyx_os_iyx_osv16",757],
+        "1939140810847988694": ["convolution_gpu_bfyx_gemm_like",2],
+        "12028665820838352309": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "419783127503173016": ["convolution_gpu_bfyx_os_iyx_osv16",185],
+        "15947699374684516369": ["convolution_gpu_bfyx_gemm_like",2],
+        "11936419502418995274": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "2934519615045138808": ["convolution_gpu_bfyx_os_iyx_osv16",948],
+        "6293403765897901528": ["convolution_gpu_bfyx_gemm_like",2],
+        "5390559917122707732": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "9195732599757736182": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "11873734271080160669": ["convolution_gpu_bfyx_os_iyx_osv16",190],
+        "3499106702307464480": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "8881150100883636392": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4156384238797998294": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "6343888265369366589": ["convolution_gpu_bfyx_os_iyx_osv16",293],
+        "2369451367723962073": ["convolution_gpu_bfyx_1x1",2],
+        "5594180958505308003": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "10170577772376890221": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "543472136359161929": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "1791615587935799399": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "10483664832302187567": ["convolution_gpu_bfyx_os_iyx_osv16",774],
+        "7715649642603303319": ["convolution_gpu_bfyx_1x1",2],
+        "10626341369865893888": ["convolution_gpu_bfyx_gemm_like",2],
+        "17556238490521153146": ["convolution_gpu_bfyx_os_iyx_osv16",468],
+        "2912098199463107173": ["convolution_gpu_bfyx_1x1",1],
+        "12242618640422208652": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "8317673282128335201": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "10989937450490049763": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "4927360358387344983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13816104794723484993": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "1104489643524273315": ["convolution_gpu_bfyx_os_iyx_osv16",130],
+        "14805540705424073865": ["convolution_gpu_bfyx_os_iyx_osv16",614],
+        "18142462471803295391": ["convolution_gpu_bfyx_1x1",2],
+        "11327097771110264965": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "142329025839464842": ["convolution_gpu_bfyx_1x1",1],
+        "9257078583742821465": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "4865023158176874622": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "3212789693085089063": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "4716188972902735458": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "17490471699618303993": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "3239033622277917802": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "15094664469997373662": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "14959566236432790882": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "1458615259705605525": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "7937870623766562191": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "6727930402459775131": ["convolution_gpu_bfyx_gemm_like",0],
+        "2349007644347065353": ["convolution_gpu_bfyx_gemm_like",2],
+        "7565221050911842393": ["convolution_gpu_bfyx_os_iyx_osv16",110],
+        "17053671692908867872": ["convolution_gpu_bfyx_os_iyx_osv16",341],
+        "16362857896338778056": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "16694984452720336415": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "9381304526221508530": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "14554225625951128811": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "12860222041026638681": ["convolution_gpu_bfyx_gemm_like",2],
+        "709835724029986012": ["convolution_gpu_bfyx_os_iyx_osv16",565],
+        "7604075520418038662": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13890118723041457532": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "4703107905652287491": ["convolution_gpu_bfyx_gemm_like",2],
+        "2543995971214089085": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "12514693341682532560": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "15386715291503303766": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "4750894407873652809": ["convolution_gpu_bfyx_os_iyx_osv16",346],
+        "11198908896401597838": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15451919862187018297": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "13436376034548670107": ["convolution_gpu_bfyx_os_iyx_osv16",360],
+        "5374969798377773063": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "11330591026581463934": ["convolution_gpu_bfyx_gemm_like",2],
+        "1596353239542510685": ["convolution_gpu_bfyx_os_iyx_osv16",360],
+        "3167336012388169649": ["convolution_gpu_bfyx_os_iyx_osv16",531],
+        "2802810524370514276": ["convolution_gpu_bfyx_gemm_like",0],
+        "12118387933632797428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4133424990380177132": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "12174571114411168588": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "10179916356323479080": ["convolution_gpu_bfyx_gemm_like",2],
+        "5926747396493954633": ["convolution_gpu_bfyx_os_iyx_osv16",229],
+        "4628748977913534701": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "2362092095402043749": ["convolution_gpu_bfyx_gemm_like",2],
+        "18375125668176498051": ["convolution_gpu_bfyx_gemm_like",2],
+        "11560634267092054110": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "13809330759308309353": ["convolution_gpu_bfyx_os_iyx_osv16",979],
+        "9891428775774615719": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "7578177053220150569": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "2089730611490367290": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1077773457856682663": ["convolution_gpu_bfyx_gemm_like",2],
+        "17343050785312683560": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "15065925414996398951": ["convolution_gpu_bfyx_1x1",0],
+        "597073780328219388": ["convolution_gpu_bfyx_gemm_like",2],
+        "5873257164958285393": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "3950738240651133849": ["convolution_gpu_bfyx_os_iyx_osv16",736],
+        "14026570177552137240": ["convolution_gpu_bfyx_gemm_like",2],
+        "10565371760124443824": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "4278280309700908015": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "1561225943337590599": ["convolution_gpu_bfyx_os_iyx_osv16",518],
+        "17248756229500447131": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "381149736509958403": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "4474697990228400564": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14068780861332616363": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "745009493367761775": ["convolution_gpu_bfyx_gemm_like",2],
+        "1316444335300814745": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "13893808009363736870": ["convolution_gpu_bfyx_gemm_like",2],
+        "2668729552208169959": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8071957466247137919": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "16386955278777720573": ["convolution_gpu_bfyx_os_iyx_osv16",848],
+        "9723314434598141024": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "9314293064351558241": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "9321208819255762521": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "3880189981766119529": ["convolution_gpu_bfyx_os_iyx_osv16",427],
+        "12052207771201936228": ["convolution_gpu_bfyx_gemm_like",2],
+        "18218755616248669884": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "10900880512948479338": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "11031625790234068916": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "9731370183088819573": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1616603916015535857": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "4674416595144505741": ["convolution_gpu_bfyx_gemm_like",2],
+        "9144487908815767824": ["convolution_gpu_bfyx_1x1",0],
+        "8176012042686275874": ["convolution_gpu_bfyx_gemm_like",2],
+        "6548949901446632697": ["convolution_gpu_bfyx_1x1",2],
+        "2296581485980163665": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "872401732136570312": ["convolution_gpu_bfyx_os_iyx_osv16",342],
+        "14670068483447729857": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "14764715930784496165": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "17517495652165026573": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15884763176333003771": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "13973179950424276578": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "15428591250165788477": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "3988024997010367546": ["convolution_gpu_bfyx_os_iyx_osv16",693],
+        "18184621367843960190": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "9488453013746383896": ["convolution_gpu_bfyx_gemm_like",1],
+        "4236174000795439083": ["convolution_gpu_bfyx_gemm_like",2],
+        "15914512645931208899": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "16158139166784964096": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "6767159196241633301": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "9421643783312790618": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "5056859994174498686": ["convolution_gpu_bfyx_gemm_like",2],
+        "8790625191540101806": ["convolution_gpu_bfyx_gemm_like",0],
+        "6642767323474835034": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "3491333679577961640": ["convolution_gpu_bfyx_gemm_like",0],
+        "15984885011101717258": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "3336303478756453360": ["convolution_gpu_bfyx_gemm_like",1],
+        "1096671695414716274": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "13775529405693629438": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6391847213494189692": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3325727286860556323": ["convolution_gpu_bfyx_gemm_like",1],
+        "17084977396231597605": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3928266232090746643": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "5364060938737428149": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "4465701487417893814": ["convolution_gpu_bfyx_gemm_like",2],
+        "17770104464900126615": ["convolution_gpu_bfyx_1x1",2],
+        "18118237182023167949": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "530491406341772040": ["convolution_gpu_bfyx_gemm_like",2],
+        "13330734840729670622": ["convolution_gpu_bfyx_gemm_like",0],
+        "5782934278345953016": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "3255465741612432300": ["convolution_gpu_bfyx_os_iyx_osv16",814],
+        "2710485608298356329": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17808913959977434594": ["convolution_gpu_bfyx_gemm_like",0],
+        "3833510944499257797": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "10991423760161409883": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "11327228813412934262": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "15975964562807570772": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "1604661321386793876": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "16934879647229234163": ["convolution_gpu_bfyx_gemm_like",2],
+        "7969848911698660033": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "6988492019664525206": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "16173557782125372935": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "879939701282942121": ["convolution_gpu_bfyx_os_iyx_osv16",918],
+        "10892706534058849825": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "12917241193304093727": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "16120988958246503683": ["convolution_gpu_bfyx_os_iyx_osv16",1017],
+        "11726298758004767743": ["convolution_gpu_bfyx_gemm_like",2],
+        "7958443549125799229": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "9660812093766156608": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "1907439276166837309": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9796621763733208035": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "6232363902828992968": ["convolution_gpu_bfyx_os_iyx_osv16",735],
+        "5124080536266387783": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "16998508915819714690": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "13663893159182636270": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "2582625260054352916": ["convolution_gpu_bfyx_gemm_like",2],
+        "5334566325056222430": ["convolution_gpu_bfyx_gemm_like",1],
+        "13338594271376045657": ["convolution_gpu_bfyx_gemm_like",2],
+        "17465517455679097501": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "15059549186302099880": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "3860603464276263676": ["convolution_gpu_bfyx_gemm_like",2],
+        "2856601829807186494": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "1095495157025479260": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "2597453794298356435": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "18299254635579957284": ["convolution_gpu_bfyx_1x1",1],
+        "6848989271874647093": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "826850797666395121": ["convolution_gpu_bfyx_gemm_like",2],
+        "1403617451623027879": ["convolution_gpu_bfyx_os_iyx_osv16",297],
+        "2321767794934000238": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "4126895998426674411": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "15781622938833984014": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7351401242363888463": ["convolution_gpu_bfyx_gemm_like",2],
+        "8337820318779061494": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "360872770877634346": ["convolution_gpu_bfyx_gemm_like",2],
+        "5656623709782744241": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "4673127824919879657": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "12956726277674279950": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "7026575758396092435": ["convolution_gpu_bfyx_os_iyx_osv16",549],
+        "10792503079194374004": ["convolution_gpu_bfyx_os_iyx_osv16",174],
+        "13497225521878034159": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11215217005872946038": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "12293786134765875615": ["convolution_gpu_bfyx_gemm_like",2],
+        "4720851194954041037": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "472454322186482185": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "10722677916294015259": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "7824524940405130010": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "12232696287029987946": ["convolution_gpu_bfyx_os_iyx_osv16",914],
+        "5406129421969383274": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "6574971185849732667": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "12727541507197887360": ["convolution_gpu_bfyx_os_iyx_osv16",1023],
+        "4957638663977636791": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "846088275031979661": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "9153779186876518773": ["convolution_gpu_bfyx_gemm_like",2],
+        "11706378390483804857": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "5714365398623475983": ["convolution_gpu_bfyx_1x1",0],
+        "5195511638783481084": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "5941092474669713339": ["convolution_gpu_bfyx_gemm_like",1],
+        "13526488884846845330": ["convolution_gpu_bfyx_gemm_like",2],
+        "2659031931257084418": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "6708349666663292171": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "14767888121198814523": ["convolution_gpu_bfyx_os_iyx_osv16",128],
+        "8272823732258536202": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14331658870024759698": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "10432365444137108781": ["convolution_gpu_bfyx_os_iyx_osv16",1063],
+        "7546586420552408243": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "2877521658768725103": ["convolution_gpu_bfyx_gemm_like",2],
+        "13830605041347009953": ["convolution_gpu_bfyx_gemm_like",2],
+        "17434429579652310107": ["convolution_gpu_bfyx_gemm_like",2],
+        "6980201892073961793": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "8794896449397768269": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "951747146164097188": ["convolution_gpu_bfyx_1x1",1],
+        "2597523728660247862": ["convolution_gpu_bfyx_os_iyx_osv16",618],
+        "8376077531098664520": ["convolution_gpu_bfyx_gemm_like",0],
+        "11609821372586026178": ["convolution_gpu_bfyx_gemm_like",2],
+        "17316626950179740845": ["convolution_gpu_bfyx_os_iyx_osv16",561],
+        "8975333906619899020": ["convolution_gpu_bfyx_gemm_like",2],
+        "13189392239349392492": ["convolution_gpu_bfyx_os_iyx_osv16",662],
+        "6769243149577568817": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "7056293586529818253": ["convolution_gpu_bfyx_gemm_like",0],
+        "12707946849050970702": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "11308583200952256245": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9594594523961285945": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "15192230303376521834": ["convolution_gpu_bfyx_gemm_like",2],
+        "16717713360264747483": ["convolution_gpu_bfyx_gemm_like",2],
+        "475043738497218394": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "11110173861174257158": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6780215829176686721": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "13869716373706247686": ["convolution_gpu_bfyx_gemm_like",2],
+        "9323825370872655346": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "10774528268153772208": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "1353170363915443814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1419073145594317633": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "9545968464906009869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1902656726461670148": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "17006095064160484022": ["convolution_gpu_bfyx_os_iyx_osv16",810],
+        "14472187692485966933": ["convolution_gpu_bfyx_gemm_like",0],
+        "2575631797904040925": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "8780604510524622314": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "16925721317097534009": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5751283221740229986": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "13550435052563656432": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "18218631037214746168": ["convolution_gpu_bfyx_gemm_like",2],
+        "1045854873741563331": ["convolution_gpu_bfyx_gemm_like",2],
+        "11115684531624462986": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4617809377006148936": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "6997971129340865650": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "2930898141522848681": ["convolution_gpu_bfyx_1x1",2],
+        "6303682540621797774": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "16117738994809548007": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "4408772370026995920": ["convolution_gpu_bfyx_os_iyx_osv16",578],
+        "7060804814325505165": ["convolution_gpu_bfyx_gemm_like",2],
+        "1774158624592967937": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "123251351612308092": ["convolution_gpu_bfyx_os_iyx_osv16",836],
+        "1718634913016284523": ["convolution_gpu_bfyx_1x1",2],
+        "3101087806792514129": ["convolution_gpu_bfyx_1x1",2],
+        "14230385851791760020": ["convolution_gpu_bfyx_os_iyx_osv16",507],
+        "11430400968543668873": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "7843508201826629532": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "1984152634309440563": ["convolution_gpu_bfyx_gemm_like",2],
+        "3017891343734146267": ["convolution_gpu_bfyx_os_iyx_osv16",102],
+        "2038505773698938555": ["fully_connected_gpu_bs_f_bsv16_b1",2],
+        "13483175684542464385": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "17713034180977313726": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7561096442572829049": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1801731858063091191": ["convolution_gpu_bfyx_gemm_like",1],
+        "16772854836230971016": ["convolution_gpu_bfyx_os_iyx_osv16",302],
+        "18122858611264877646": ["convolution_gpu_bfyx_gemm_like",2],
+        "8458082326743351141": ["convolution_gpu_bfyx_gemm_like",2],
+        "11634932044447867039": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "8153567933591966877": ["convolution_gpu_bfyx_gemm_like",2],
+        "2715447739580688669": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17377315194963069204": ["fully_connected_gpu_fb_oi_ref",2],
+        "7924408980408826942": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "15767973630744679517": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "959260710517842876": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "6192955702438301372": ["convolution_gpu_bfyx_os_iyx_osv16",647],
+        "17243648226968859637": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "9192665896782282996": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "14916625550370402883": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "12604104383683210104": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "3217246278485567748": ["convolution_gpu_bfyx_gemm_like",2],
+        "13011676362747785816": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17793292063552633023": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "12014527187730671229": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "1999979442136861875": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "14910223536998380801": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15914107501176673997": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "5824801192141531089": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "3277243911383750280": ["convolution_gpu_bfyx_gemm_like",1],
+        "16761856644242716357": ["convolution_gpu_bfyx_os_iyx_osv16",482],
+        "10681768474583067517": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "9631545863582097486": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "1040030752340209480": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "7688176479120305539": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3499109651698979012": ["convolution_gpu_bfyx_os_iyx_osv16",1107],
+        "10556089809203693400": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "4880150897829846031": ["convolution_gpu_bfyx_1x1",2],
+        "9840495023131952174": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "954796765467489259": ["convolution_gpu_bfyx_os_iyx_osv16",301],
+        "13810995219720233595": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "6578908625437515675": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "1375156980278317418": ["convolution_gpu_bfyx_gemm_like",2],
+        "4750513665628842598": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "11254635684957519432": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "16692569816843207989": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "16566214123371867456": ["convolution_gpu_bfyx_gemm_like",2],
+        "9287404618748313247": ["convolution_gpu_bfyx_os_iyx_osv16",315],
+        "16402312692470500253": ["convolution_gpu_bfyx_gemm_like",2],
+        "10880081193716628051": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "9942726414918759892": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12351866693978844266": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "5192552432194195116": ["convolution_gpu_bfyx_gemm_like",2],
+        "17089801601582809764": ["convolution_gpu_bfyx_gemm_like",2],
+        "693883892843558363": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "1594612401422787491": ["convolution_gpu_bfyx_gemm_like",2],
+        "16403423801823379909": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "989564341557094953": ["convolution_gpu_bfyx_os_iyx_osv16",69],
+        "2458592904274981909": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "15678385128478075284": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6483208845600234755": ["convolution_gpu_bfyx_os_iyx_osv16",200],
+        "16487774205195979355": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "12181889163404078773": ["convolution_gpu_bfyx_os_iyx_osv16",693],
+        "10865695385270390803": ["convolution_gpu_bfyx_os_iyx_osv16",644],
+        "8797843396807284399": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "12558716383635737426": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "16384186388687043048": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "2968031010495399536": ["convolution_gpu_bfyx_gemm_like",2],
+        "17829148383265978140": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "12223993560805441284": ["convolution_gpu_bfyx_gemm_like",2],
+        "8421388456873652700": ["convolution_gpu_bfyx_os_iyx_osv16",267],
+        "7143510787416483146": ["convolution_gpu_bfyx_os_iyx_osv16",271],
+        "13144385730409574259": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "6003409324516527726": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1071007164550012186": ["convolution_gpu_bfyx_os_iyx_osv16",476],
+        "794499287296495726": ["convolution_gpu_bfyx_1x1",0],
+        "1152691534728260611": ["convolution_gpu_bfyx_1x1",0],
+        "7777333052643961206": ["convolution_gpu_bfyx_os_iyx_osv16",932],
+        "16511749893955141055": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12370729327673204804": ["convolution_gpu_bfyx_os_iyx_osv16",111],
+        "5381578460674280089": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "1336940384521633733": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "17423645390621980919": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "14283458015244508428": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5326247361632903583": ["convolution_gpu_bfyx_gemm_like",2],
+        "8708643228914766202": ["convolution_gpu_bfyx_gemm_like",1],
+        "11446745541571732900": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9835739612255048978": ["convolution_gpu_bfyx_os_iyx_osv16",377],
+        "3308770992373192529": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "11992625045241269569": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "7469127846325904854": ["convolution_gpu_bfyx_os_iyx_osv16",230],
+        "12617625046664709483": ["convolution_gpu_bfyx_os_iyx_osv16",1021],
+        "2659712601063515059": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "17906607354577138153": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "2632535010129224704": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15924583510704449214": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "14421898375873029115": ["convolution_gpu_bfyx_1x1",0],
+        "10037086825900566930": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "8130920994920685157": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "15101680837342453931": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "14811022197918391667": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11759426200341586247": ["convolution_gpu_bfyx_os_iyx_osv16",121],
+        "13247725847475539658": ["convolution_gpu_bfyx_1x1",2],
+        "4285475880886685878": ["convolution_gpu_bfyx_gemm_like",2],
+        "14487842225000203929": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "18135307303959376082": ["convolution_gpu_bfyx_gemm_like",0],
+        "9437794960375526230": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "13224814158106791463": ["convolution_gpu_bfyx_gemm_like",2],
+        "9232653317479846765": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "5311718276151327830": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9439431829175743345": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16135569134646688251": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "4793007249026943006": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "15160738482264643601": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "15201438563802430490": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "8045367391487213749": ["convolution_gpu_bfyx_1x1",1],
+        "1089679781525023551": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10415046594066474634": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "12159582810513550491": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18253784177599134876": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "11939914680143672459": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "11239541755868028928": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "5592526760253524303": ["convolution_gpu_bfyx_os_iyx_osv16",426],
+        "17193614571243427089": ["convolution_gpu_bfyx_gemm_like",2],
+        "16336482874764861478": ["convolution_gpu_bfyx_gemm_like",2],
+        "3388752887767453958": ["convolution_gpu_bfyx_gemm_like",0],
+        "16081386644309102158": ["convolution_gpu_bfyx_gemm_like",2],
+        "12675313398314286884": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "12339692995143159283": ["convolution_gpu_bfyx_gemm_like",2],
+        "3180320769716158201": ["convolution_gpu_bfyx_os_iyx_osv16",977],
+        "18416908414174464784": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "13932662890258900896": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "8170998059688907013": ["convolution_gpu_bfyx_1x1",2],
+        "6911215749850066204": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "4362304842016958728": ["convolution_gpu_bfyx_os_iyx_osv16",1067],
+        "18271689282126907793": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "14821616804286068969": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "7264756313770306662": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "17917978116807564183": ["convolution_gpu_bfyx_gemm_like",2],
+        "16395067736440127496": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "9780938731831129283": ["convolution_gpu_bfyx_gemm_like",2],
+        "2008424849669196225": ["convolution_gpu_bfyx_1x1",1],
+        "12794369485239257709": ["convolution_gpu_bfyx_gemm_like",0],
+        "2007192658799516915": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "9131183544020825260": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "13374993751390784382": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "17224181038411430675": ["convolution_gpu_bfyx_os_iyx_osv16",202],
+        "7398196853452900099": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "10309504812060596568": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "1572991986657256775": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "1345101751956733589": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "14668725050395069435": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "11823205954749139338": ["convolution_gpu_bfyx_gemm_like",2],
+        "6964383468476265892": ["convolution_gpu_bfyx_1x1",2],
+        "3199841714087553410": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "631489011812924153": ["convolution_gpu_bfyx_1x1",0],
+        "669771152920944125": ["convolution_gpu_bfyx_gemm_like",2],
+        "14152716242882609401": ["convolution_gpu_bfyx_gemm_like",0],
+        "14902389080201926109": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14810839157236175179": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "3362190082518348071": ["convolution_gpu_bfyx_gemm_like",2],
+        "13723543003759101485": ["convolution_gpu_bfyx_gemm_like",2],
+        "13642146548740074992": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "11022847760121601465": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "8787816339967963727": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "2530317332900569142": ["convolution_gpu_bfyx_os_iyx_osv16",807],
+        "9277610800970567810": ["convolution_gpu_bfyx_gemm_like",2],
+        "7915318733663535312": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "10879218241103462088": ["convolution_gpu_bfyx_gemm_like",2],
+        "4660288622381620227": ["convolution_gpu_bfyx_os_iyx_osv16",1027],
+        "1103204698908514224": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17900257435531434807": ["convolution_gpu_bfyx_gemm_like",2],
+        "8207349115037232863": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "12790788016297794214": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "12672995204641007004": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15486917753097743853": ["convolution_gpu_bfyx_1x1",2],
+        "8642107585829380438": ["convolution_gpu_bfyx_gemm_like",0],
+        "16117448559783537844": ["convolution_gpu_bfyx_os_iyx_osv16",343],
+        "2307310127637739872": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "4513063773753763458": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "4850497746076450913": ["convolution_gpu_bfyx_gemm_like",0],
+        "3226193790517362610": ["convolution_gpu_bfyx_1x1",0],
+        "8650948093564284852": ["convolution_gpu_bfyx_os_iyx_osv16",269],
+        "13735180250757239202": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "5807196005360653656": ["convolution_gpu_bfyx_gemm_like",2],
+        "16043683538361975370": ["convolution_gpu_bfyx_gemm_like",2],
+        "14603590053512154268": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "14045927407431718832": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "11241838709529552265": ["convolution_gpu_bfyx_gemm_like",2],
+        "6129602738379919488": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8096131027165540886": ["convolution_gpu_bfyx_gemm_like",2],
+        "4353842547963164546": ["convolution_gpu_bfyx_1x1",2],
+        "16731107540370927220": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "9748307611165615848": ["convolution_gpu_bfyx_gemm_like",2],
+        "142270860894725256": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "5179760459095053114": ["convolution_gpu_bfyx_os_iyx_osv16",207],
+        "1089944493540593798": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "190530884420224257": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "1997392406402548974": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "2321148334382088982": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "17854578307286932628": ["convolution_gpu_bfyx_gemm_like",2],
+        "12478309735214802531": ["convolution_gpu_bfyx_os_iyx_osv16",191],
+        "17039993918927377002": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "17329287216741045059": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "1107027047188366075": ["convolution_gpu_bfyx_os_iyx_osv16",918],
+        "13990028451169604107": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2738256633362038820": ["convolution_gpu_bfyx_gemm_like",2],
+        "14006248791647711759": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "10647227605517025377": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "822162932339827810": ["convolution_gpu_bfyx_os_iyx_osv16",539],
+        "4492332228252010118": ["convolution_gpu_bfyx_os_iyx_osv16",980],
+        "861419637283812778": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "17651821953342321913": ["convolution_gpu_bfyx_1x1",2],
+        "6020017927557041768": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2198278382394812839": ["convolution_gpu_bfyx_os_iyx_osv16",1007],
+        "3573490922300056520": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "6065819201836017182": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16053585286807864356": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "8285478622349266483": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "11910735867274493498": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "16436006771518788093": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "16293465561256937726": ["convolution_gpu_bfyx_gemm_like",1],
+        "10437367877444543776": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4163359403543480821": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "11305232900158601613": ["convolution_gpu_bfyx_1x1",1],
+        "276407276027553756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13761566845514364807": ["convolution_gpu_bfyx_os_iyx_osv16",1043],
+        "14546281065004619074": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "4135003545872878882": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "9525535670799618110": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "5682190700442712936": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "10971070835319242371": ["convolution_gpu_bfyx_os_iyx_osv16",841],
+        "11055049031355432623": ["convolution_gpu_bfyx_gemm_like",2],
+        "11318913630213187720": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "10642327923162019888": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2651385050387738902": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6712698149192186833": ["convolution_gpu_bfyx_gemm_like",2],
+        "1832310305089212990": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "15035800097152337587": ["convolution_gpu_bfyx_gemm_like",2],
+        "7712831597869354170": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12806934028210472719": ["convolution_gpu_bfyx_os_iyx_osv16",567],
+        "1074748462756364699": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "6714886136800883594": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "16397733032387984819": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17252449599613270108": ["convolution_gpu_bfyx_os_iyx_osv16",741],
+        "17515064188391421150": ["convolution_gpu_bfyx_gemm_like",2],
+        "2762489653422414995": ["convolution_gpu_bfyx_gemm_like",2],
+        "5242271874488296527": ["convolution_gpu_bfyx_gemm_like",1],
+        "3699344686791530101": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2418288192668085805": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10632020369698615114": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "4958222070605478947": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "10136369729388564720": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "13713501506522022845": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4856470441452830056": ["convolution_gpu_bfyx_gemm_like",2],
+        "13786357802945430475": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "10084794570892043447": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "11637325834858582585": ["convolution_gpu_bfyx_gemm_like",2],
+        "5141753233513623264": ["convolution_gpu_bfyx_os_iyx_osv16",816],
+        "9416186718345824095": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "14614844213016502202": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "13597240991532942069": ["convolution_gpu_bfyx_os_iyx_osv16",1057],
+        "14558572801374416278": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "2171768477223405739": ["convolution_gpu_bfyx_os_iyx_osv16",933],
+        "13754540732991287617": ["convolution_gpu_bfyx_os_iyx_osv16",362],
+        "17833517350994024381": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "8990561333549136048": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "708452703070938673": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "8497468192424557348": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "2572395498687401679": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "621915374938805401": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "6146876760962332928": ["convolution_gpu_bfyx_gemm_like",2],
+        "60509335250891515": ["convolution_gpu_bfyx_gemm_like",2],
+        "4304041922043496030": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "6767245864232675168": ["convolution_gpu_bfyx_gemm_like",2],
+        "755414184406250882": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8323445733669842657": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10049571207493913006": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "14792528369891965810": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16574710115918192418": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "5349415632630235233": ["convolution_gpu_bfyx_1x1",0],
+        "4429109491655891299": ["convolution_gpu_bfyx_gemm_like",2],
+        "2878824076934639346": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "18393312550272875456": ["convolution_gpu_bfyx_1x1",2],
+        "10978173291465325823": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "11473442921040533207": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "10171373375072694210": ["convolution_gpu_bfyx_1x1",1],
+        "2114599010013594942": ["convolution_gpu_bfyx_gemm_like",2],
+        "6688522645556262131": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "17870874477143985774": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3737552767159920174": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "10751536136794650334": ["convolution_gpu_bfyx_gemm_like",2],
+        "13800387305792597325": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "16431857516454692096": ["convolution_gpu_bfyx_os_iyx_osv16",185],
+        "16862145184923128012": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "15082818876354718849": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "17788367809717898285": ["convolution_gpu_bfyx_os_iyx_osv16",366],
+        "18068050257421269408": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "15914058104244750036": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "11795826875463204296": ["convolution_gpu_bfyx_1x1",0],
+        "9524303276541517389": ["convolution_gpu_bfyx_gemm_like",2],
+        "15322019609805777935": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "15649927926091502215": ["convolution_gpu_bfyx_os_iyx_osv16",62],
+        "15247381586316467097": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "70580716590540876": ["convolution_gpu_bfyx_gemm_like",2],
+        "598390166442977699": ["convolution_gpu_bfyx_os_iyx_osv16",540],
+        "4217179485243909459": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6288489890578212082": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12473600360154597915": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "7242013296950669829": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "10532183096485321729": ["convolution_gpu_bfyx_1x1",1],
+        "12247991248100147706": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "1452597292381229708": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15555083739490354527": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "9584652777232392944": ["convolution_gpu_bfyx_os_iyx_osv16",655],
+        "1867337342417952506": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "6942016672941874829": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "9194788897910888066": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "8264178890341675354": ["convolution_gpu_bfyx_os_iyx_osv16",1000],
+        "244921290040927639": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "5019077257951332016": ["convolution_gpu_bfyx_gemm_like",2],
+        "16969463538496570528": ["convolution_gpu_bfyx_gemm_like",2],
+        "1173986078589662704": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "16698547937652264447": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "6362428985273506890": ["convolution_gpu_bfyx_1x1",0],
+        "17377293745073971167": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "15490478608105402679": ["convolution_gpu_bfyx_gemm_like",2],
+        "11198301748997371475": ["convolution_gpu_bfyx_gemm_like",2],
+        "3316798708399098230": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "11267742746905371769": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "220326805056361171": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "14420809655798184553": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "3782315919331102574": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "10783981060353445280": ["convolution_gpu_bfyx_os_iyx_osv16",62],
+        "706049518431331645": ["convolution_gpu_bfyx_gemm_like",2],
+        "17774424004510360936": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "18125732229366977468": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "14686278683380845546": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "2929190644951986399": ["convolution_gpu_bfyx_gemm_like",0],
+        "11640225461345567929": ["convolution_gpu_bfyx_os_iyx_osv16",259],
+        "1954052357826969119": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "8700574100180128776": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "15989894214714907271": ["convolution_gpu_bfyx_gemm_like",2],
+        "14872992823083730615": ["convolution_gpu_bfyx_gemm_like",2],
+        "2114232149447438823": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "17636500109629107732": ["convolution_gpu_bfyx_os_iyx_osv16",346],
+        "8769060267707904998": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "10546430708947911124": ["convolution_gpu_bfyx_gemm_like",2],
+        "12501619443242354860": ["convolution_gpu_bfyx_gemm_like",2],
+        "11087413527078604815": ["convolution_gpu_bfyx_gemm_like",2],
+        "3706994659266083979": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "8295126647635181949": ["convolution_gpu_bfyx_gemm_like",2],
+        "17243576882981097341": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "8701248964531180496": ["convolution_gpu_bfyx_gemm_like",0],
+        "16723478941106779069": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "17546566148752689536": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "13357365044448426880": ["convolution_gpu_bfyx_1x1",0],
+        "7148542290597073512": ["convolution_gpu_bfyx_gemm_like",2],
+        "49948277487706148": ["convolution_gpu_bfyx_1x1",1],
+        "10576856554114055028": ["convolution_gpu_bfyx_gemm_like",2],
+        "10309083227104422150": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "3750338655074082587": ["fully_connected_gpu_yxfb_ref",1],
+        "5951936376654416075": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "4141005390823981166": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10591379189397010097": ["convolution_gpu_bfyx_os_iyx_osv16",988],
+        "16945184617367657570": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "3723613341885592267": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "17092525789052598917": ["convolution_gpu_bfyx_os_iyx_osv16",122],
+        "9616636708366808604": ["convolution_gpu_bfyx_gemm_like",1],
+        "4980217316169616839": ["convolution_gpu_bfyx_1x1",2],
+        "10019470094545733255": ["convolution_gpu_bfyx_gemm_like",2],
+        "10196332102593337214": ["convolution_gpu_bfyx_gemm_like",2],
+        "3499645386058307669": ["convolution_gpu_bfyx_gemm_like",2],
+        "4792351255949877935": ["convolution_gpu_bfyx_gemm_like",2],
+        "5585398540591396124": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "18062849937960759210": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "13787436604877398090": ["convolution_gpu_bfyx_os_iyx_osv16",599],
+        "12914986936318857086": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "7179714714302073459": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "14616969385577243225": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "2108296560864415762": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9426665763007611385": ["convolution_gpu_bfyx_gemm_like",2],
+        "11645116728396933125": ["convolution_gpu_bfyx_gemm_like",0],
+        "14681717813022425567": ["convolution_gpu_bfyx_gemm_like",2],
+        "5308128387928804050": ["convolution_gpu_bfyx_os_iyx_osv16",736],
+        "6719302427415173754": ["convolution_gpu_bfyx_os_iyx_osv16",838],
+        "15430549683839591544": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "5214654427283761256": ["convolution_gpu_bfyx_gemm_like",2],
+        "9065894438656900887": ["convolution_gpu_bfyx_gemm_like",1],
+        "3236003754884728510": ["convolution_gpu_bfyx_os_iyx_osv16",681],
+        "11597391933877736800": ["convolution_gpu_bfyx_gemm_like",2],
+        "12151068022697708126": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "16958329690837977102": ["convolution_gpu_bfyx_gemm_like",2],
+        "3217574161785059951": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "2438374917504708831": ["convolution_gpu_bfyx_gemm_like",2],
+        "12255528292506999241": ["convolution_gpu_bfyx_gemm_like",1],
+        "10947686124973711385": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "11147816119060617810": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "18005721959893562716": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "8002233052700666718": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "9428176632140441528": ["convolution_gpu_bfyx_gemm_like",1],
+        "6131481289104111211": ["convolution_gpu_bfyx_gemm_like",2],
+        "2728938624042183713": ["convolution_gpu_bfyx_gemm_like",2],
+        "14213516751025324346": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10729288973933590396": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "14274685812676150168": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "3737576893817599311": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "16441830491664937048": ["convolution_gpu_bfyx_os_iyx_osv16",605],
+        "8127570953237266335": ["fully_connected_gpu_bf_io_input_spatial",3],
+        "17011363406405852347": ["convolution_gpu_bfyx_gemm_like",2],
+        "17009318615658405230": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "1885075753696445410": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11604111639041106489": ["convolution_gpu_bfyx_os_iyx_osv16",270],
+        "11619548409913646265": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "18356980026934328781": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "10416622008071151225": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "3524531620118359828": ["convolution_gpu_bfyx_os_iyx_osv16",902],
+        "9404953235624894187": ["convolution_gpu_bfyx_os_iyx_osv16",109],
+        "7000486794832106857": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "12655099960717366198": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3024402899381804809": ["convolution_gpu_bfyx_1x1",2],
+        "16266491618150971928": ["convolution_gpu_bfyx_os_iyx_osv16",360],
+        "13296242326766100583": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "1334070221835422461": ["convolution_gpu_bfyx_gemm_like",1],
+        "15961487889420208188": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "1760391741350091665": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "1704404203639481753": ["convolution_gpu_bfyx_gemm_like",2],
+        "15193403354218116460": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14906458674793172507": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "1351033666248868977": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "5498839261395459224": ["convolution_gpu_bfyx_gemm_like",1],
+        "1207026216972160297": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "14719421757340260468": ["convolution_gpu_bfyx_os_iyx_osv16",200],
+        "10679760989906275129": ["convolution_gpu_bfyx_os_iyx_osv16",433],
+        "1478419046264331178": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "2344498602308448450": ["convolution_gpu_bfyx_os_iyx_osv16",502],
+        "9763310312421884308": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "6093575518270471235": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "6673966852801136416": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "7998930863626763670": ["convolution_gpu_bfyx_gemm_like",1],
+        "13608239208821071914": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "8866716292621164810": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "5240181393417899912": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "10112032316939871435": ["convolution_gpu_bfyx_os_iyx_osv16",44],
+        "4818231379191523896": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "3240102173773280414": ["convolution_gpu_bfyx_1x1",0],
+        "2128612971571865547": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "713121569924250372": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "13809898858049445969": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "17746215841755337461": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13358283026528078900": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "8275277322582733101": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "13569941893504840630": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "5963901433137582265": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6171845068913882721": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "15924916465272239832": ["convolution_gpu_bfyx_gemm_like",2],
+        "16683169947375504066": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "9173631510896381179": ["convolution_gpu_bfyx_gemm_like",2],
+        "6458124573210430792": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "16947969669087411530": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "17802514063213000148": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "12589440296742583335": ["convolution_gpu_bfyx_1x1",1],
+        "18432787283148809023": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "15464327246951632247": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "16014822406751503249": ["convolution_gpu_bfyx_os_iyx_osv16",315],
+        "10783630257421062891": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "16037141448095945650": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15602863681196390535": ["convolution_gpu_bfyx_os_iyx_osv16",996],
+        "14973431782875808802": ["convolution_gpu_bfyx_gemm_like",2],
+        "3032921857841371728": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "11077876432364512822": ["fully_connected_gpu_bf_io_input_spatial",3],
+        "5240706676373148280": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8444259010311137762": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "5848293219267886434": ["convolution_gpu_bfyx_os_iyx_osv16",954],
+        "16773645387243701837": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "1779941298820543013": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "16247399911710810038": ["convolution_gpu_bfyx_gemm_like",2],
+        "12134712464763856064": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "1497127399271219422": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "12397280593466519809": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "6104380778870471127": ["convolution_gpu_bfyx_1x1",0],
+        "3286330985102373533": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "12218337369633748663": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "938222258370511187": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "10308431308942416781": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "6585223640997887253": ["convolution_gpu_bfyx_gemm_like",2],
+        "3033264172690274208": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "14147460733160099960": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13762042713029963144": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16295660312557315941": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "17742192339816511494": ["convolution_gpu_bfyx_os_iyx_osv16",534],
+        "17087740929472936216": ["convolution_gpu_bfyx_os_iyx_osv16",482],
+        "16601702334097258697": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "17791773192152464021": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16816222375242496370": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "14733510474010040334": ["convolution_gpu_bfyx_gemm_like",2],
+        "12564687330941036772": ["convolution_gpu_bfyx_os_iyx_osv16",643],
+        "15882969506682501496": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "10471519687597963116": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5485971317082563152": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "4238885454989272754": ["convolution_gpu_bfyx_gemm_like",2],
+        "14397348576352573007": ["convolution_gpu_bfyx_gemm_like",2],
+        "4229105529069729944": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16833026567865627676": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "5295693108687178880": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "12962558681443556219": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "9182897385081081193": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "1545105800386716684": ["convolution_gpu_bfyx_os_iyx_osv16",205],
+        "3603187029740446600": ["convolution_gpu_bfyx_gemm_like",2],
+        "8578747191812631883": ["convolution_gpu_bfyx_os_iyx_osv16",1009],
+        "5120466856097219243": ["convolution_gpu_bfyx_gemm_like",2],
+        "15548971488532746290": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10791067159964399241": ["convolution_gpu_bfyx_os_iyx_osv16",1045],
+        "5097818987523855112": ["convolution_gpu_bfyx_gemm_like",2],
+        "7958459862276998225": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16884396694505987920": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "15991460001131903561": ["convolution_gpu_bfyx_gemm_like",1],
+        "13468081302022888489": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "16896833230469488924": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "6777045876155144709": ["convolution_gpu_bfyx_os_iyx_osv16",875],
+        "3234107167862677811": ["convolution_gpu_bfyx_os_iyx_osv16",587],
+        "17700958439420868719": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12992061224471212714": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14389915292223442327": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "17832542092610191859": ["convolution_gpu_bfyx_os_iyx_osv16",528],
+        "69439315851965666": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "2627779045483019709": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16729849855476690294": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "13733327241591630239": ["convolution_gpu_bfyx_os_iyx_osv16",11],
+        "14985236276429954162": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14050124896329573468": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6090625728451718945": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "14057348639391787117": ["convolution_gpu_bfyx_os_iyx_osv16",327],
+        "2609454334520044465": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "12160764253455777655": ["convolution_gpu_bfyx_os_iyx_osv16",984],
+        "14458851250685872417": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "6729785110495533200": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "787203599734115483": ["convolution_gpu_bfyx_1x1",0],
+        "4772696293208603817": ["convolution_gpu_bfyx_gemm_like",2],
+        "10861525139715322534": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "5093049998173715787": ["convolution_gpu_bfyx_gemm_like",2],
+        "3122997634505472500": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "423221712829930726": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "17915846724151945664": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13954144830230671601": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13237050834496100264": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "16610284927818475574": ["convolution_gpu_bfyx_gemm_like",1],
+        "18199526506796726885": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11582534256623549131": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "17370051888730874220": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "13621339501067135142": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "13973028408397200796": ["convolution_gpu_bfyx_os_iyx_osv16",431],
+        "14251848023416168295": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "16446533347502650316": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "153117141968471446": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "689445825453914111": ["convolution_gpu_bfyx_gemm_like",2],
+        "3830703844770425343": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "3571959174116404960": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "11120846960057008937": ["convolution_gpu_bfyx_os_iyx_osv16",516],
+        "12693511427898130707": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9101018613418825655": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "1451466106918423837": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "1103228955716492167": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12352923639732112511": ["convolution_gpu_bfyx_os_iyx_osv16",75],
+        "17037416417174266088": ["convolution_gpu_bfyx_os_iyx_osv16",225],
+        "5629373398445592781": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "8929453032482114162": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "13485300684443803732": ["convolution_gpu_bfyx_os_iyx_osv16",344],
+        "12976499206227689731": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "14515066741400300669": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "16531824466148265247": ["convolution_gpu_bfyx_os_iyx_osv16",535],
+        "14746359019867963124": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3012566432840424198": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "17147293671640396193": ["fully_connected_gpu_bs_f_bsv16_af8_vload",0],
+        "9480653639044390919": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12741457056869452536": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "2969389503332309296": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "6509758095668864050": ["convolution_gpu_bfyx_os_iyx_osv16",743],
+        "6407471972820516685": ["fully_connected_gpu_fb_oi_ref",1],
+        "18043340998699622388": ["convolution_gpu_bfyx_os_iyx_osv16",138],
+        "16108759090923335184": ["convolution_gpu_bfyx_gemm_like",2],
+        "4355933224673863178": ["convolution_gpu_bfyx_gemm_like",0],
+        "13092232276822302626": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "12011982029561277581": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "5103094815475470596": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16307464696265537356": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "4967444801764057340": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "16783619135298589974": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "6040286126398028933": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "7375461241315602473": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "3892679716763161057": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1411786954276574458": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "9263063714383940562": ["convolution_gpu_bfyx_os_iyx_osv16",815],
+        "13102754309439605192": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "5524218746051008792": ["convolution_gpu_bfyx_os_iyx_osv16",844],
+        "6928136130626403937": ["convolution_gpu_bfyx_gemm_like",2],
+        "9761573038170759563": ["convolution_gpu_bfyx_os_iyx_osv16",316],
+        "6334639534663495263": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "7966454753124154534": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "10306542963828398049": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "3963106895592011725": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12107262410635772120": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "929378940515745198": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5627834277145735283": ["convolution_gpu_bfyx_os_iyx_osv16",83],
+        "11956435900037329302": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "5389189982064081933": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "9767294641786972359": ["convolution_gpu_bfyx_gemm_like",2],
+        "15511138074959300404": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "6181651715051152713": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "14763982961176216679": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "14907097142953816744": ["convolution_gpu_bfyx_gemm_like",2],
+        "13300022131572486202": ["convolution_gpu_bfyx_gemm_like",2],
+        "6418327009347170687": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "1192279884248226739": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "156456996459945842": ["convolution_gpu_bfyx_os_iyx_osv16",814],
+        "4165036357594592683": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15897477855246170861": ["convolution_gpu_bfyx_gemm_like",2],
+        "16789245987103323406": ["convolution_gpu_bfyx_gemm_like",2],
+        "3448477246688526708": ["convolution_gpu_bfyx_gemm_like",0],
+        "17977676737774695825": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "5352861363832390974": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "875142032423622622": ["convolution_gpu_bfyx_os_iyx_osv16",498],
+        "16924006268301179157": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "9741607635826869269": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10071449674652717890": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "9299299311101549958": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "17301887391757619741": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "11147573971701279689": ["convolution_gpu_bfyx_os_iyx_osv16",268],
+        "4809191606466167229": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "3291180926381314705": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "14316077757957132678": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "6109013751635776331": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "7940369586324090841": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "11069983292783104310": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "4481903208484313806": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "4014667229872705228": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "18136135457402651842": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "4197617702037834389": ["convolution_gpu_bfyx_os_iyx_osv16",296],
+        "17222005830854879661": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "4701832665603867798": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "1138439260035360722": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6931953332823066530": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "13472532612464340803": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "2324120381399737261": ["convolution_gpu_bfyx_os_iyx_osv16",267],
+        "5235375820995365354": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "10995907213890714701": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "9502195532658935521": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "580936360000782237": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "3106710091841093202": ["convolution_gpu_bfyx_os_iyx_osv16",991],
+        "12090536142661253835": ["fully_connected_gpu_bf_io_gemm",1],
+        "14352796912241296357": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "16765994345605657100": ["convolution_gpu_bfyx_1x1",0],
+        "9475130054420979752": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "8860443174052454332": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "14487682847898298214": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "8712136292276123857": ["convolution_gpu_bfyx_os_iyx_osv16",603],
+        "4366168099274266975": ["convolution_gpu_bfyx_gemm_like",1],
+        "654122557966242717": ["convolution_gpu_bfyx_gemm_like",1],
+        "9967101735808367971": ["convolution_gpu_bfyx_1x1",0],
+        "18132952464279667664": ["convolution_gpu_bfyx_1x1",2],
+        "14744368497944610864": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8507854696766492454": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16911450336605071390": ["convolution_gpu_bfyx_1x1",2],
+        "438528596970898721": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "3036512701943687724": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "13654816209891478730": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "5353552956675518468": ["convolution_gpu_bfyx_os_iyx_osv16",457],
+        "425744529089575241": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "9274179337770060652": ["convolution_gpu_bfyx_gemm_like",1],
+        "6193161166790398003": ["convolution_gpu_bfyx_gemm_like",1],
+        "16312223896859176991": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16286085532892593349": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "11718418772370938734": ["convolution_gpu_bfyx_gemm_like",2],
+        "14363654136811880073": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "7786866732196451977": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "11455518069358829249": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "14171139920084409181": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "5504757952698692953": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "15548847099740441551": ["convolution_gpu_bfyx_1x1",1],
+        "4889188980319017094": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "12929981792125924963": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "11857037689248685487": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "15591167992985613695": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13314092088416047551": ["fully_connected_gpu_fb_io_ref",1],
+        "16800575429414554907": ["convolution_gpu_bfyx_os_iyx_osv16",448],
+        "7799984350284425885": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "5600807544955072308": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "14221578799010900252": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "13484950419220835364": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "6557428245898292304": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "5157249499936659040": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "5088898934670078153": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1152693503778768433": ["convolution_gpu_bfyx_os_iyx_osv16",963],
+        "4104562704039821482": ["convolution_gpu_bfyx_1x1",1],
+        "3159681096461848644": ["convolution_gpu_bfyx_os_iyx_osv16",295],
+        "9585113116232600562": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "1890739204389692970": ["convolution_gpu_bfyx_os_iyx_osv16",754],
+        "6856130385095139346": ["convolution_gpu_bfyx_os_iyx_osv16",1124],
+        "17104611871050967957": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "5644068493155655611": ["convolution_gpu_bfyx_gemm_like",2],
+        "8709632541892447149": ["convolution_gpu_bfyx_gemm_like",2],
+        "2172121470071868949": ["convolution_gpu_bfyx_gemm_like",2],
+        "16235115911229280717": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "16509472637458153234": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "16820082917500285799": ["convolution_gpu_bfyx_gemm_like",2],
+        "15329680728165965773": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "17882819773586674851": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "9987415314864002460": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13972357557211413688": ["convolution_gpu_bfyx_gemm_like",2],
+        "5940337324384948573": ["convolution_gpu_bfyx_gemm_like",2],
+        "2552187713769926425": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "17525531790109748810": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "8757900457181374694": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11932770338770247767": ["convolution_gpu_bfyx_os_iyx_osv16",438],
+        "7162575953766465459": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "5219399418946822456": ["convolution_gpu_bfyx_gemm_like",2],
+        "16561224775421968533": ["convolution_gpu_bfyx_os_iyx_osv16",759],
+        "11636129433022017868": ["convolution_gpu_bfyx_os_iyx_osv16",600],
+        "14559308665571750465": ["convolution_gpu_bfyx_gemm_like",2],
+        "4499586349553581439": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "17364712285968437405": ["convolution_gpu_bfyx_os_iyx_osv16",1018],
+        "18103534417093702556": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "14133958262039763609": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "5083163738120585821": ["fully_connected_gpu_fb_oi_ref",0],
+        "5649082203775427830": ["convolution_gpu_bfyx_gemm_like",2],
+        "5291011077679733990": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "383721620126444793": ["convolution_gpu_bfyx_gemm_like",2],
+        "11756881293845417212": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "17994361454416813294": ["convolution_gpu_bfyx_os_iyx_osv16",361],
+        "17549411807772646930": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "2649192407401044065": ["convolution_gpu_bfyx_gemm_like",0],
+        "1941341635794709702": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "16108573960501496757": ["convolution_gpu_bfyx_gemm_like",2],
+        "2721793280965260548": ["convolution_gpu_bfyx_os_iyx_osv16",325],
+        "12164298124869114517": ["convolution_gpu_bfyx_os_iyx_osv16",216],
+        "4584970211859494304": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "14281201038135286621": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "4264284648458489052": ["convolution_gpu_bfyx_os_iyx_osv16",221],
+        "6988674007771237080": ["convolution_gpu_bfyx_gemm_like",1],
+        "11800783548769329949": ["convolution_gpu_bfyx_os_iyx_osv16",693],
+        "8656468860180713379": ["convolution_gpu_bfyx_gemm_like",1],
+        "2066731703492755469": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "11772741918108731396": ["convolution_gpu_bfyx_os_iyx_osv16",905],
+        "10990741293315393791": ["convolution_gpu_bfyx_gemm_like",1],
+        "15187035463799513424": ["convolution_gpu_bfyx_1x1",1],
+        "7575634241190730697": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "3067806959725855130": ["convolution_gpu_bfyx_os_iyx_osv16",152],
+        "12946540633035976364": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "6553736978928374036": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "4862529593282936100": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "9175450649281374948": ["convolution_gpu_bfyx_os_iyx_osv16",860],
+        "2842103889477438816": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "2173867324489962689": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9988801796928462423": ["convolution_gpu_bfyx_os_iyx_osv16",245],
+        "3870539490799697188": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "17854208422879910606": ["convolution_gpu_bfyx_gemm_like",2],
+        "5077214229434392730": ["convolution_gpu_bfyx_os_iyx_osv16",956],
+        "8303211644727914658": ["convolution_gpu_bfyx_1x1",1],
+        "6318228858846223186": ["convolution_gpu_bfyx_1x1",2],
+        "8614534946699754256": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "15485701086886851362": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "17021925795809437171": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5047419871737940985": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "4161612746310931789": ["convolution_gpu_bfyx_gemm_like",2],
+        "1587501521145162454": ["convolution_gpu_bfyx_gemm_like",2],
+        "8913950860101596091": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "8856888761246057127": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "481328129206881674": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "10486000767830001094": ["convolution_gpu_bfyx_1x1",1],
+        "4118073384938355655": ["convolution_gpu_bfyx_os_iyx_osv16",678],
+        "6981537186704688907": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9728611486592854529": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "789359733867650915": ["convolution_gpu_bfyx_gemm_like",1],
+        "17264608538692763688": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1653274345637156919": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "5801429077171542466": ["convolution_gpu_bfyx_os_iyx_osv16",485],
+        "9378269524012289175": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "1075027491444288875": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2912858944747613525": ["convolution_gpu_bfyx_os_iyx_osv16",244],
+        "10930115765550856328": ["convolution_gpu_bfyx_gemm_like",2],
+        "13119479079474639169": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "632116056424249698": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15109847707903824859": ["convolution_gpu_bfyx_1x1",0],
+        "5415319660821122528": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "10702234389482091891": ["convolution_gpu_bfyx_gemm_like",2],
+        "12415368596357091523": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "17392594284473856393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "949330876419581703": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "7314288062932060863": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "18377298651236993830": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "4640028527711211109": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "12725675221990905186": ["convolution_gpu_bfyx_gemm_like",2],
+        "6670327979947471550": ["convolution_gpu_bfyx_os_iyx_osv16",163],
+        "2702144517025248597": ["convolution_gpu_bfyx_gemm_like",2],
+        "5581428998642936688": ["convolution_gpu_bfyx_1x1",2],
+        "6902644989079870993": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "13540002981450186147": ["convolution_gpu_bfyx_os_iyx_osv16",964],
+        "13093429681061786539": ["convolution_gpu_bfyx_os_iyx_osv16",1117],
+        "7208008921815475393": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15997754881872769378": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "15417738436777481469": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "2722124265986526212": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "11992353959766718397": ["convolution_gpu_bfyx_os_iyx_osv16",551],
+        "3759057398165607194": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "15689502054035168040": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "11913020016435860608": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "4916769804113823482": ["convolution_gpu_bfyx_1x1",0],
+        "15154700439767512396": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "17975017633455909321": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "18121198117765854866": ["convolution_gpu_bfyx_1x1",2],
+        "9714764457768279762": ["convolution_gpu_bfyx_os_iyx_osv16",746],
+        "11025471731438443683": ["convolution_gpu_bfyx_os_iyx_osv16",536],
+        "11052732052072367261": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "13026555349791486777": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "17281202179589913619": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "7104756264011682902": ["convolution_gpu_bfyx_gemm_like",2],
+        "12700372241799686527": ["convolution_gpu_bfyx_gemm_like",2],
+        "16190949264253468961": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "16579057939215877904": ["convolution_gpu_bfyx_os_iyx_osv16",311],
+        "17764033613416389758": ["convolution_gpu_bfyx_gemm_like",2],
+        "13046322179198317310": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "15354185859262170540": ["convolution_gpu_bfyx_gemm_like",1],
+        "12676167240795292217": ["convolution_gpu_bfyx_gemm_like",0],
+        "16383540667048742064": ["convolution_gpu_bfyx_gemm_like",2],
+        "287386909600391846": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "956022649859563080": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "6062246008880097669": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "2534408579674556441": ["convolution_gpu_bfyx_os_iyx_osv16",966],
+        "2730604806511016352": ["convolution_gpu_bfyx_os_iyx_osv16",154],
+        "17806712457019493207": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "15914342421266687768": ["convolution_gpu_bfyx_gemm_like",2],
+        "13455881643467418059": ["convolution_gpu_bfyx_gemm_like",2],
+        "11104393974242049153": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1573498199681662714": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "17421991623849671076": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4232250144427804891": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "4492673409319122180": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "7460672405409009037": ["convolution_gpu_bfyx_os_iyx_osv16",652],
+        "16541722316343690197": ["convolution_gpu_bfyx_os_iyx_osv16",517],
+        "13404888565084206853": ["convolution_gpu_bfyx_os_iyx_osv16",703],
+        "8788703258318141635": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "8751016391945753900": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "3159147743553063163": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2497756607567197523": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "8069537351442302814": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1819720745131968914": ["convolution_gpu_bfyx_gemm_like",2],
+        "14389719202147508599": ["convolution_gpu_bfyx_os_iyx_osv16",715],
+        "428659495445490820": ["convolution_gpu_bfyx_os_iyx_osv16",175],
+        "4161001033681779582": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "1120455113299469776": ["fully_connected_gpu_bs_f_bsv16_af8_vload",0],
+        "15688186132508213638": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "13078401519973360182": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13161997040644039778": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15399245700982979379": ["convolution_gpu_bfyx_os_iyx_osv16",597],
+        "16105073808368936420": ["convolution_gpu_bfyx_gemm_like",2],
+        "12026482841341343242": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "10173283505468233128": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "12380856644683171627": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "16986610822918634530": ["convolution_gpu_bfyx_1x1",1],
+        "1003101267609305257": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "9305861997313663528": ["convolution_gpu_bfyx_gemm_like",2],
+        "3134489458855347772": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "14217181622713951411": ["convolution_gpu_bfyx_gemm_like",2],
+        "6620782733027313312": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "593712935037568960": ["convolution_gpu_bfyx_os_iyx_osv16",1068],
+        "14912119584313592912": ["convolution_gpu_bfyx_gemm_like",2],
+        "17026284168840448378": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "1541754036637209097": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "1202292109713947702": ["convolution_gpu_bfyx_gemm_like",2],
+        "12259844988981080505": ["convolution_gpu_bfyx_gemm_like",1],
+        "11528310408333718862": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "1471837664358450291": ["convolution_gpu_bfyx_gemm_like",2],
+        "178353385245384751": ["convolution_gpu_bfyx_gemm_like",2],
+        "14667209474639064623": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14091610802555875119": ["convolution_gpu_bfyx_gemm_like",2],
+        "8651641584737798174": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "17024388383581997032": ["convolution_gpu_bfyx_gemm_like",2],
+        "6340128090694375876": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "4274801141127703532": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3154539627593235077": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "6571438978296387721": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "2305461098719675735": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17705807503894740726": ["convolution_gpu_bfyx_gemm_like",0],
+        "1920070013712913772": ["convolution_gpu_bfyx_os_iyx_osv16",157],
+        "17824431042110985323": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "4290840152278060614": ["convolution_gpu_bfyx_gemm_like",2],
+        "13248567106128518549": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "11622925573287101001": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6788311046557489996": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "17599396373608265826": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "16469788155263456039": ["convolution_gpu_bfyx_gemm_like",2],
+        "7585785802379042424": ["convolution_gpu_bfyx_1x1",1],
+        "4678607855896512523": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13312514874803986753": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "14381420852659789698": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "1889171157980977747": ["convolution_gpu_bfyx_gemm_like",2],
+        "7881579844586294503": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "15069906408448814772": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "6949539207944972855": ["convolution_gpu_bfyx_gemm_like",2],
+        "10522649794540845800": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "15117880293418979489": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "16706244336960642883": ["convolution_gpu_bfyx_gemm_like",1],
+        "5912303851874077576": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "10055549084854766170": ["convolution_gpu_bfyx_os_iyx_osv16",973],
+        "5648658688155716974": ["convolution_gpu_bfyx_1x1",1],
+        "12523676912856063091": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17446505012657609153": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "739676584505475609": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "13478984039708550410": ["convolution_gpu_bfyx_os_iyx_osv16",1054],
+        "13320675959188615441": ["convolution_gpu_bfyx_gemm_like",1],
+        "5336120047683197088": ["convolution_gpu_bfyx_gemm_like",2],
+        "1082574490068006980": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "4553409514380460123": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "13124342334495538095": ["convolution_gpu_bfyx_os_iyx_osv16",639],
+        "216603198215625772": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "13902214851539825156": ["convolution_gpu_bfyx_gemm_like",2],
+        "15675968397825708285": ["convolution_gpu_bfyx_os_iyx_osv16",928],
+        "40704767167309552": ["convolution_gpu_bfyx_os_iyx_osv16",206],
+        "969746749329671447": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "17015791782274123780": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "11443268857010762276": ["convolution_gpu_bfyx_gemm_like",2],
+        "17370158297470557151": ["convolution_gpu_bfyx_1x1",2],
+        "7084646429975006971": ["convolution_gpu_bfyx_1x1",0],
+        "5673972310424776040": ["convolution_gpu_bfyx_gemm_like",2],
+        "9148379585489720669": ["convolution_gpu_bfyx_os_iyx_osv16",862],
+        "15031155621982459860": ["convolution_gpu_bfyx_gemm_like",2],
+        "3509487327001107638": ["convolution_gpu_bfyx_gemm_like",2],
+        "12081835728078383819": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "12947341728489226671": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4408600136502382976": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "570683988452622223": ["convolution_gpu_bfyx_os_iyx_osv16",804],
+        "6142707387281700290": ["convolution_gpu_bfyx_gemm_like",1],
+        "15209909241815414156": ["convolution_gpu_bfyx_os_iyx_osv16",183],
+        "529543453251381109": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5132761922124425835": ["convolution_gpu_bfyx_os_iyx_osv16",1022],
+        "11292995457386147494": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "10157866834809927320": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "9383182168277796969": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "5277400567128489977": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "12962552332511702682": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "18221867262301937903": ["convolution_gpu_bfyx_1x1",2],
+        "16294825599850364701": ["convolution_gpu_bfyx_os_iyx_osv16",1005],
+        "14043770215999952932": ["convolution_gpu_bfyx_gemm_like",2],
+        "17419874083634480896": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "15315327794058441258": ["convolution_gpu_bfyx_gemm_like",2],
+        "16567638487719493784": ["convolution_gpu_bfyx_os_iyx_osv16",721],
+        "16094174852600023296": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "7454366978268164047": ["convolution_gpu_bfyx_gemm_like",2],
+        "11834683513280095384": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "10302338806536775954": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "15962137123591591534": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "6149673627320838019": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "1470933384474984858": ["convolution_gpu_bfyx_1x1",0],
+        "3662747857062156477": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "4815047491742617397": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "10887835418423052188": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7878605163588288309": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "18136765667969393174": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "937159502066696999": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "12022152681602871455": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "16985912104363932350": ["convolution_gpu_bfyx_os_iyx_osv16",505],
+        "3087801652564627458": ["convolution_gpu_bfyx_os_iyx_osv16",814],
+        "15643135666029727865": ["convolution_gpu_bfyx_gemm_like",2],
+        "9530116228032101908": ["convolution_gpu_bfyx_1x1",0],
+        "15235409162483701027": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "4476928353532757380": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "970768445746568749": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "16833854122884184025": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "856877003890134554": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "75742659105146536": ["convolution_gpu_bfyx_gemm_like",1],
+        "6008613375871089139": ["convolution_gpu_bfyx_os_iyx_osv16",796],
+        "54019631544204590": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2832268621630415376": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "4615708568396290002": ["convolution_gpu_bfyx_1x1",2],
+        "7843498978148810586": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "15295951849706930711": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "8463615810239412362": ["convolution_gpu_bfyx_1x1",2],
+        "435888248913413834": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "1082586642383386489": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "6726099352298108756": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13676654389512816868": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "2162882863309264684": ["convolution_gpu_bfyx_os_iyx_osv16",313],
+        "6450532136308941035": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "5680236635030250712": ["convolution_gpu_bfyx_1x1",0],
+        "15411474884532403722": ["convolution_gpu_bfyx_os_iyx_osv16",404],
+        "13176385389367548697": ["convolution_gpu_bfyx_gemm_like",2],
+        "11661208196482963286": ["convolution_gpu_bfyx_os_iyx_osv16",85],
+        "3752171257634205726": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "1435153323458789173": ["convolution_gpu_bfyx_gemm_like",2],
+        "14841539539334726292": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "17015328096102652908": ["convolution_gpu_bfyx_gemm_like",2],
+        "8061914949376516780": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "17442105631503326136": ["convolution_gpu_bfyx_os_iyx_osv16",628],
+        "14599780481362761532": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14947798627499698329": ["convolution_gpu_bfyx_gemm_like",2],
+        "2526832080529662683": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "787363431787954804": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "13855438905855887272": ["convolution_gpu_bfyx_os_iyx_osv16",517],
+        "6204725118764552662": ["convolution_gpu_bfyx_gemm_like",0],
+        "15650839696475698676": ["convolution_gpu_bfyx_os_iyx_osv16",82],
+        "12277470820821378855": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "8528750110601691390": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "14462438074931673266": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "7862815466573236157": ["convolution_gpu_bfyx_os_iyx_osv16",875],
+        "10702465758376061967": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5572956736535433608": ["convolution_gpu_bfyx_1x1",1],
+        "18017913952946745878": ["convolution_gpu_bfyx_gemm_like",2],
+        "4840004190985490064": ["convolution_gpu_bfyx_gemm_like",2],
+        "9641089659148164809": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4160656836528944651": ["convolution_gpu_bfyx_os_iyx_osv16",750],
+        "1982176363226079588": ["convolution_gpu_bfyx_os_iyx_osv16",612],
+        "1938086876393565238": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "16609136488331186895": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "17035903590837750750": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "4228437925117070319": ["convolution_gpu_bfyx_1x1",0],
+        "6820284286806022849": ["convolution_gpu_bfyx_gemm_like",2],
+        "14923692894655929923": ["fully_connected_gpu_bf_io_gemm",1],
+        "14749947225382670869": ["convolution_gpu_bfyx_os_iyx_osv16",576],
+        "2816353973187452604": ["convolution_gpu_bfyx_gemm_like",2],
+        "8655315308767111198": ["convolution_gpu_bfyx_1x1",1],
+        "3503893875515897267": ["convolution_gpu_bfyx_os_iyx_osv16",1099],
+        "18426893729833771809": ["convolution_gpu_bfyx_1x1",0],
+        "7974670633697926450": ["convolution_gpu_bfyx_1x1",0],
+        "10178951466584845110": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "13883044928774243663": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "10085059621136526248": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4086556132337751931": ["convolution_gpu_bfyx_gemm_like",1],
+        "11627532066884923848": ["convolution_gpu_bfyx_1x1",0],
+        "15963038745470172423": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "1076005730007872492": ["convolution_gpu_bfyx_os_iyx_osv16",81],
+        "7431849514656037251": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "12348135936862667024": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "1622880009460832832": ["convolution_gpu_bfyx_os_iyx_osv16",680],
+        "4569338575782832784": ["convolution_gpu_bfyx_gemm_like",1],
+        "4226968857681929488": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7605139219344415117": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16230621843665445228": ["convolution_gpu_bfyx_gemm_like",2],
+        "5770286476124511234": ["convolution_gpu_bfyx_gemm_like",2],
+        "14930789530046665855": ["convolution_gpu_bfyx_os_iyx_osv16",1071],
+        "5953754321266570854": ["convolution_gpu_bfyx_gemm_like",2],
+        "3854114166348568039": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "16011429608661242565": ["convolution_gpu_bfyx_os_iyx_osv16",981],
+        "13327653786981478088": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "17948637243158994878": ["convolution_gpu_bfyx_gemm_like",0],
+        "13680926356824317761": ["convolution_gpu_bfyx_os_iyx_osv16",429],
+        "18174857480705846286": ["convolution_gpu_bfyx_os_iyx_osv16",579],
+        "9942099207256025216": ["convolution_gpu_bfyx_gemm_like",2],
+        "2598267743388306204": ["convolution_gpu_bfyx_gemm_like",2],
+        "9275303306340702111": ["convolution_gpu_bfyx_gemm_like",2],
+        "13702692566238948173": ["convolution_gpu_bfyx_gemm_like",1],
+        "14532519639619315651": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "10295330953350618042": ["convolution_gpu_bfyx_os_iyx_osv16",0],
+        "5079055505117153635": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "2438261005924916746": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "14962768577232034246": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9631481972809246378": ["convolution_gpu_bfyx_gemm_like",0],
+        "15048584393463312977": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "12725647706191463348": ["convolution_gpu_bfyx_gemm_like",2],
+        "12228610148087508521": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "10861769381993948050": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "3337625924046561031": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "7113777272518482528": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "6882621854468565774": ["convolution_gpu_bfyx_os_iyx_osv16",952],
+        "18034648276860485300": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "15602218079503030465": ["convolution_gpu_bfyx_gemm_like",2],
+        "16865879032845300007": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "4239133538073498792": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12972798847556569913": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "8203550467004532364": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "17422822627612865758": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "9999553425206328238": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "4773123925616969670": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "9191832520273617003": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7638626850074132214": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "7667898603371717971": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5312140481706133684": ["convolution_gpu_bfyx_os_iyx_osv16",806],
+        "7532088618116521936": ["convolution_gpu_bfyx_gemm_like",1],
+        "8728178019712933221": ["convolution_gpu_bfyx_os_iyx_osv16",1054],
+        "11919129623429545762": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15110359240685619357": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "13191096881934434519": ["convolution_gpu_bfyx_os_iyx_osv16",740],
+        "10267260789603562117": ["convolution_gpu_bfyx_os_iyx_osv16",1000],
+        "15225354446874994535": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "5352061583962489055": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "77073286362822723": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "522181557896569275": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "10308175009371219583": ["convolution_gpu_bfyx_os_iyx_osv16",254],
+        "8943913562339525413": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "11461581290174106570": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15320845027635796583": ["convolution_gpu_bfyx_gemm_like",2],
+        "11655994466278963438": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "503369896500284129": ["convolution_gpu_bfyx_1x1",1],
+        "15308578014507211237": ["convolution_gpu_bfyx_os_iyx_osv16",222],
+        "3216877571075556066": ["convolution_gpu_bfyx_os_iyx_osv16",557],
+        "991586070509079617": ["convolution_gpu_bfyx_gemm_like",2],
+        "2265784112305305260": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "8329846097322076175": ["convolution_gpu_bfyx_os_iyx_osv16",204],
+        "2129726780118554358": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11587239927319376658": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "15824189967727245909": ["convolution_gpu_bfyx_gemm_like",2],
+        "4191326605459754690": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "12319073009094248232": ["convolution_gpu_bfyx_os_iyx_osv16",236],
+        "13369603621524676979": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "18419183012101393192": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "3215659303601163167": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "659846949368492111": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14759179293743468995": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "16582132711225619740": ["convolution_gpu_bfyx_os_iyx_osv16",200],
+        "4915831715914920982": ["convolution_gpu_bfyx_gemm_like",0],
+        "17856816245251319111": ["convolution_gpu_bfyx_os_iyx_osv16",938],
+        "10727592780669452048": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "15899192375330393731": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "2012181953284568566": ["convolution_gpu_bfyx_os_iyx_osv16",749],
+        "3341302541468955849": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11465965972527519631": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "10706267011822108376": ["convolution_gpu_bfyx_1x1",0],
+        "2622434279674583815": ["convolution_gpu_bfyx_gemm_like",1],
+        "8464582977975377118": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "3621930417735246405": ["convolution_gpu_bfyx_os_iyx_osv16",260],
+        "4561874206785244358": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "7802311886554362782": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "6438522646185979880": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "10109431802089940590": ["convolution_gpu_bfyx_gemm_like",2],
+        "941829593638869991": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "15129834325410878425": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2451712485584835395": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5779388310240896974": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15106614232165315070": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "11690533591656807605": ["convolution_gpu_bfyx_gemm_like",2],
+        "3041612155708729812": ["convolution_gpu_bfyx_os_iyx_osv16",1120],
+        "8059328623525062913": ["convolution_gpu_bfyx_gemm_like",2],
+        "7171904645566467208": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "15901675909820977223": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "16489624657475712467": ["convolution_gpu_bfyx_os_iyx_osv16",800],
+        "14359530849521980269": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "4999505377862312410": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "10414903047695486119": ["convolution_gpu_bfyx_os_iyx_osv16",675],
+        "5495776091407365966": ["convolution_gpu_bfyx_gemm_like",2],
+        "7372956570616880244": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "9700808806849459216": ["convolution_gpu_bfyx_1x1",2],
+        "3792945601873900927": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "8140094412609934765": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "11929531534620071758": ["convolution_gpu_bfyx_os_iyx_osv16",988],
+        "11955992313739654625": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "6370189612027110022": ["convolution_gpu_bfyx_gemm_like",2],
+        "18092842590142527927": ["convolution_gpu_bfyx_os_iyx_osv16",355],
+        "2903605246599054308": ["convolution_gpu_bfyx_os_iyx_osv16",250],
+        "12040626513219974957": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "14312549767853703411": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "13324157125165576832": ["convolution_gpu_bfyx_os_iyx_osv16",676],
+        "291868903926685441": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "952318454591754214": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "12757611260347801001": ["convolution_gpu_bfyx_os_iyx_osv16",312],
+        "8615481457481938667": ["convolution_gpu_bfyx_os_iyx_osv16",50],
+        "6307939332939714967": ["convolution_gpu_bfyx_1x1",1],
+        "12229574562535756991": ["convolution_gpu_bfyx_gemm_like",2],
+        "4282198629458668761": ["convolution_gpu_bfyx_gemm_like",1],
+        "17050675313067213312": ["convolution_gpu_bfyx_os_iyx_osv16",1044],
+        "10628725059172743408": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "9514210061704584354": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "7223801044761006523": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "938848188161536107": ["convolution_gpu_bfyx_1x1",1],
+        "10218763091060511457": ["convolution_gpu_bfyx_os_iyx_osv16",101],
+        "12355112948013108181": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12353956380178079089": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "13951717514084457087": ["convolution_gpu_bfyx_os_iyx_osv16",709],
+        "4444730303823507621": ["convolution_gpu_bfyx_gemm_like",2],
+        "7866128397931438774": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "6423785822515265784": ["convolution_gpu_bfyx_gemm_like",2],
+        "5495063314176654751": ["convolution_gpu_bfyx_gemm_like",0],
+        "1172103288112689821": ["convolution_gpu_bfyx_os_iyx_osv16",563],
+        "14431607479949498164": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "7732899312577293959": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "16504962609450876148": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "11868551452004726281": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "12987636957813312667": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15114370307779942381": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "10869005786136023160": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "11706446082856895571": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4062706195708729345": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "7243917162812988891": ["convolution_gpu_bfyx_gemm_like",2],
+        "4652136280940317116": ["convolution_gpu_bfyx_gemm_like",0],
+        "13503608041359512": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5601435819039968726": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "10292243973236220688": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "8025053805734757314": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "2891736961665476908": ["convolution_gpu_bfyx_os_iyx_osv16",427],
+        "15161053469199826008": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9372916528346260712": ["convolution_gpu_bfyx_gemm_like",2],
+        "4892959859293355837": ["convolution_gpu_bfyx_gemm_like",0],
+        "9485825829394109934": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "10128390168715530898": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "9642229389394495047": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "6343396486660315308": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3477539135137665170": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "17658152048177750315": ["convolution_gpu_bfyx_os_iyx_osv16",1119],
+        "15993427814066246646": ["convolution_gpu_bfyx_os_iyx_osv16",315],
+        "13820498543284008286": ["convolution_gpu_bfyx_os_iyx_osv16",727],
+        "487214150851213303": ["convolution_gpu_bfyx_gemm_like",2],
+        "15078590909693331731": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4447065688824381344": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "13644681270630373984": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "4381329435655511217": ["convolution_gpu_bfyx_gemm_like",0],
+        "1364546124782880196": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "15450609897480659306": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "10237524128771958432": ["convolution_gpu_bfyx_gemm_like",2],
+        "8108843303778211282": ["convolution_gpu_bfyx_os_iyx_osv16",1006],
+        "7662200927459001757": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "6139574161497189424": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "1643241486250690844": ["convolution_gpu_bfyx_os_iyx_osv16",338],
+        "5749536453225343663": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "4806571630436601566": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "8241070786700614317": ["convolution_gpu_bfyx_os_iyx_osv16",1077],
+        "5287076386757143976": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "10968768803038046390": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5796500397424307442": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "10893432143734884603": ["convolution_gpu_bfyx_gemm_like",1],
+        "3265415000818832667": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "16768797136991242472": ["convolution_gpu_bfyx_os_iyx_osv16",972],
+        "8873614802459592665": ["convolution_gpu_bfyx_os_iyx_osv16",363],
+        "8270591002934311024": ["convolution_gpu_bfyx_1x1",0],
+        "17951403431757222177": ["fully_connected_gpu_bs_f_bsv16_af8_vload",2],
+        "13851240591038949807": ["convolution_gpu_bfyx_gemm_like",1],
+        "4137755981477177003": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "5589350202160007768": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "6302958994152837045": ["convolution_gpu_bfyx_os_iyx_osv16",126],
+        "6084775920382972735": ["convolution_gpu_bfyx_os_iyx_osv16",1018],
+        "6087091876057515304": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "13646974121952099172": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "13926122593957480821": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "905780459938651623": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "5381354625969068789": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9399994156762372761": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "3070859615622845671": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "7545013298074733778": ["convolution_gpu_bfyx_gemm_like",0],
+        "4282668574670785584": ["convolution_gpu_bfyx_gemm_like",2],
+        "9529614587861271730": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7977195117668583981": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "13439896617880328331": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "743941460026466526": ["convolution_gpu_bfyx_os_iyx_osv16",601],
+        "12954154886708228545": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "15293727142789007900": ["convolution_gpu_bfyx_os_iyx_osv16",333],
+        "8434794604559592624": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "8107447526839063293": ["convolution_gpu_bfyx_os_iyx_osv16",124],
+        "13403161389559730": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "16767392067294252396": ["convolution_gpu_bfyx_gemm_like",2],
+        "775538461106687677": ["fully_connected_gpu_fb_oi_ref",0],
+        "18210370419559876426": ["convolution_gpu_bfyx_os_iyx_osv16",589],
+        "10894058425957901202": ["convolution_gpu_bfyx_1x1",2],
+        "15579919505002150556": ["convolution_gpu_bfyx_gemm_like",2],
+        "14999920879568237166": ["convolution_gpu_bfyx_1x1",0],
+        "8747430148550634190": ["convolution_gpu_bfyx_gemm_like",2],
+        "14131851237755716991": ["convolution_gpu_bfyx_gemm_like",0],
+        "12198263593657033426": ["convolution_gpu_bfyx_os_iyx_osv16",637],
+        "12985650543127289023": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "2929715823970060874": ["convolution_gpu_bfyx_os_iyx_osv16",1103],
+        "12894240573737168362": ["convolution_gpu_bfyx_gemm_like",2],
+        "973966345068677905": ["convolution_gpu_bfyx_1x1",2],
+        "7465681710653503161": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "4664983769199548480": ["convolution_gpu_bfyx_1x1",1],
+        "9939234037869927090": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "7606728651572102823": ["convolution_gpu_bfyx_os_iyx_osv16",215],
+        "4084026445911476156": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "5170245731599664670": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12308359047798183133": ["convolution_gpu_bfyx_gemm_like",0],
+        "9069334144391048686": ["convolution_gpu_bfyx_os_iyx_osv16",856],
+        "13418701036204748812": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13320828013530046693": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "14483314305369207554": ["convolution_gpu_bfyx_1x1",1],
+        "14435120971846098308": ["convolution_gpu_bfyx_os_iyx_osv16",567],
+        "10492056481694320580": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "3007637520820789085": ["convolution_gpu_bfyx_gemm_like",2],
+        "3889519976910355277": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "15829095120243431195": ["convolution_gpu_bfyx_os_iyx_osv16",1072],
+        "4456004887590847716": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "8079376692609682448": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "9043982883185435219": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "3003526572122876385": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "12024143207855886580": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "17174919737114915467": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10811837819834149164": ["convolution_gpu_bfyx_gemm_like",2],
+        "11583985978586657985": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "1900375942069325499": ["convolution_gpu_bfyx_1x1",2],
+        "7474639594232203854": ["convolution_gpu_bfyx_gemm_like",0],
+        "15489746763312425915": ["convolution_gpu_bfyx_gemm_like",2],
+        "15628121900226431719": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "5912451559447635837": ["convolution_gpu_bfyx_os_iyx_osv16",519],
+        "5221320470007950766": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "6263019986730305851": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "7881187047171099732": ["convolution_gpu_bfyx_gemm_like",0],
+        "2452226948562393335": ["convolution_gpu_bfyx_os_iyx_osv16",427],
+        "1838534101161814609": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5843679089588930933": ["convolution_gpu_bfyx_os_iyx_osv16",156],
+        "10412588668458621135": ["convolution_gpu_bfyx_os_iyx_osv16",13],
+        "2669822154816760632": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "517997325935712670": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "11828175723996627443": ["convolution_gpu_bfyx_os_iyx_osv16",975],
+        "4550028191070279999": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5762290464889692462": ["convolution_gpu_bfyx_os_iyx_osv16",974],
+        "5393510569127725391": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "17525564757769958678": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "15488340031228619748": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "2040762223425679479": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "1760690277175249985": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "11092828091552833150": ["convolution_gpu_bfyx_os_iyx_osv16",874],
+        "10110395703775498948": ["convolution_gpu_bfyx_os_iyx_osv16",765],
+        "3216793152416217495": ["convolution_gpu_bfyx_gemm_like",2],
+        "9454512817077883797": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "13251091004269229867": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "13938466156916423478": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "16347412180100581330": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5643908654122573882": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10090036431487700311": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "738850098651678143": ["convolution_gpu_bfyx_os_iyx_osv16",889],
+        "2554991397391195611": ["convolution_gpu_bfyx_gemm_like",2],
+        "2204178900998688268": ["convolution_gpu_bfyx_gemm_like",2],
+        "8671491767142900139": ["convolution_gpu_bfyx_gemm_like",0],
+        "11850332373794932468": ["convolution_gpu_bfyx_os_iyx_osv16",978],
+        "3831261590121101287": ["convolution_gpu_bfyx_os_iyx_osv16",1003],
+        "4099859307693687554": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "1040650352205493707": ["convolution_gpu_bfyx_os_iyx_osv16",219],
+        "11583017348580874022": ["convolution_gpu_bfyx_gemm_like",2],
+        "17907223570737272640": ["convolution_gpu_bfyx_os_iyx_osv16",384],
+        "5898740235388207878": ["convolution_gpu_bfyx_1x1",2],
+        "17634966178519099371": ["convolution_gpu_bfyx_1x1",1],
+        "1701609125136907870": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12194037100109755112": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "10424278617647597641": ["convolution_gpu_bfyx_gemm_like",2],
+        "6996376303337512293": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "4640696923527766618": ["convolution_gpu_bfyx_os_iyx_osv16",228],
+        "11185156002426041243": ["convolution_gpu_bfyx_os_iyx_osv16",713],
+        "12892693137085610062": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "7024495439434892956": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "18180655791734632264": ["convolution_gpu_bfyx_gemm_like",2],
+        "4644580321919256401": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "8100595788531468781": ["convolution_gpu_bfyx_os_iyx_osv16",383],
+        "181006047500375768": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "14571022040013651253": ["convolution_gpu_bfyx_gemm_like",2],
+        "5115134711994944288": ["convolution_gpu_bfyx_gemm_like",1],
+        "16474284418841532356": ["convolution_gpu_bfyx_gemm_like",2],
+        "14762599606783897222": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "17106086048442658788": ["convolution_gpu_bfyx_gemm_like",2],
+        "17216583849049249733": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "13132804928635689780": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "7575675354187625951": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "3105425187506203551": ["convolution_gpu_bfyx_1x1",0],
+        "1698321314111848001": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "6664482192233202590": ["convolution_gpu_bfyx_gemm_like",2],
+        "12545558125736154584": ["convolution_gpu_bfyx_os_iyx_osv16",556],
+        "16559140502701231107": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "15823825508128158158": ["convolution_gpu_bfyx_gemm_like",2],
+        "8526484907799590618": ["convolution_gpu_bfyx_gemm_like",2],
+        "17825280904760131680": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14054116974002669018": ["convolution_gpu_bfyx_1x1",2],
+        "15197248015210313435": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16075006181495932250": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6669808855737023569": ["convolution_gpu_bfyx_os_iyx_osv16",1114],
+        "17025268985366223779": ["convolution_gpu_bfyx_os_iyx_osv16",224],
+        "649203303142950236": ["convolution_gpu_bfyx_os_iyx_osv16",461],
+        "13800760323805415740": ["convolution_gpu_bfyx_gemm_like",2],
+        "6195916781434462809": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11359409533744011242": ["convolution_gpu_bfyx_gemm_like",2],
+        "18109284647478027063": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5582896843095691256": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3332334993503432420": ["convolution_gpu_bfyx_os_iyx_osv16",716],
+        "5994204139128667921": ["convolution_gpu_bfyx_os_iyx_osv16",958],
+        "9759380701896779097": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "6233612563637601101": ["convolution_gpu_bfyx_os_iyx_osv16",324],
+        "12675840135830047968": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4992668316921598993": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "11086699387784339943": ["convolution_gpu_bfyx_gemm_like",2],
+        "3349519148124496343": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "2128376438627103433": ["convolution_gpu_bfyx_gemm_like",2],
+        "52089503050497755": ["convolution_gpu_bfyx_os_iyx_osv16",638],
+        "16936366288366370882": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "7132328255408635227": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "2862999234347597091": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "14835309921389262864": ["convolution_gpu_bfyx_1x1",0],
+        "11499219760597131534": ["convolution_gpu_bfyx_os_iyx_osv16",323],
+        "3432296808755992670": ["convolution_gpu_bfyx_gemm_like",1],
+        "12822126914959112382": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "913496537924971856": ["convolution_gpu_bfyx_os_iyx_osv16",611],
+        "8402692278765063674": ["convolution_gpu_bfyx_os_iyx_osv16",887],
+        "3221469860582147955": ["convolution_gpu_bfyx_gemm_like",2],
+        "12773693193167844110": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "16491532291908469567": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "13328449155966085543": ["convolution_gpu_bfyx_os_iyx_osv16",359],
+        "17515573322312447679": ["convolution_gpu_bfyx_os_iyx_osv16",1053],
+        "6355395905401306995": ["convolution_gpu_bfyx_gemm_like",2],
+        "628191607060767879": ["convolution_gpu_bfyx_os_iyx_osv16",95],
+        "10554266898346470422": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14335074487552883436": ["convolution_gpu_bfyx_gemm_like",1],
+        "10294185397756053636": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "13814086981499638596": ["convolution_gpu_bfyx_os_iyx_osv16",1098],
+        "15979956159651515122": ["convolution_gpu_bfyx_gemm_like",2],
+        "3109104171383198425": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "541817615957967731": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "2809950092498355574": ["convolution_gpu_bfyx_os_iyx_osv16",299],
+        "2294318010381635693": ["convolution_gpu_bfyx_gemm_like",2],
+        "15847413004526420496": ["convolution_gpu_bfyx_os_iyx_osv16",310],
+        "12992194515157698316": ["convolution_gpu_bfyx_os_iyx_osv16",717],
+        "16364494883229084045": ["convolution_gpu_bfyx_os_iyx_osv16",311],
+        "5584432943673435454": ["convolution_gpu_bfyx_os_iyx_osv16",1067],
+        "11862259122805366807": ["fully_connected_gpu_bs_f_bsv16_af8_vload",0],
+        "3643250372952944907": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "16293101831324587788": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1051506168926530904": ["fully_connected_gpu_bf_io_input_spatial",0],
+        "6656593119788274992": ["convolution_gpu_bfyx_os_iyx_osv16",1090],
+        "12946531140050029900": ["convolution_gpu_bfyx_os_iyx_osv16",337],
+        "18150429561058646714": ["convolution_gpu_bfyx_gemm_like",0],
+        "15677717057398875599": ["convolution_gpu_bfyx_gemm_like",2],
+        "18259656768460999562": ["convolution_gpu_bfyx_os_iyx_osv16",339],
+        "14103112843209793966": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "11149782181562145291": ["convolution_gpu_bfyx_gemm_like",2],
+        "11999246609107242706": ["convolution_gpu_bfyx_gemm_like",2],
+        "14540578324750869319": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3930314908786112883": ["convolution_gpu_bfyx_gemm_like",2],
+        "2571882179292959757": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5172712078329324967": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "17829047941256922307": ["convolution_gpu_bfyx_os_iyx_osv16",873],
+        "12515465135362865565": ["convolution_gpu_bfyx_os_iyx_osv16",583],
+        "8195881973746570408": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "9133263538092913983": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8906588133431586825": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4672441137336208890": ["convolution_gpu_bfyx_gemm_like",2],
+        "18140951659547259039": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "3220280315905987373": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "10328182165125764988": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "17738299860390552088": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5582450255753679095": ["convolution_gpu_bfyx_1x1",1],
+        "3192332625020432602": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "9062774198518904260": ["convolution_gpu_bfyx_gemm_like",2],
+        "5497751772699578150": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15277856047844308598": ["convolution_gpu_bfyx_gemm_like",2],
+        "16913004986170202203": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "3392693938352572136": ["convolution_gpu_bfyx_gemm_like",2],
+        "3120553928584920777": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "15052577143485630617": ["convolution_gpu_bfyx_1x1",0],
+        "14808895254077106198": ["convolution_gpu_bfyx_gemm_like",2],
+        "17285815901490707654": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "9553032671453999824": ["convolution_gpu_bfyx_os_iyx_osv16",186],
+        "4800587664660105589": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "8048617952947915835": ["convolution_gpu_bfyx_gemm_like",2],
+        "4560479630843098090": ["convolution_gpu_bfyx_gemm_like",2],
+        "8527193566719173253": ["convolution_gpu_bfyx_gemm_like",0],
+        "548663565933738403": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "13071545223094862275": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "1569043950563130463": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6214194654733781771": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "59739211822469868": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",1],
+        "11901740241052104941": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "380316849107383484": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "15374625876485618845": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "11825293922127550847": ["convolution_gpu_bfyx_gemm_like",2],
+        "2797723586312707948": ["convolution_gpu_bfyx_os_iyx_osv16",745],
+        "3759515057574218101": ["convolution_gpu_bfyx_gemm_like",2],
+        "17889864541794448203": ["convolution_gpu_bfyx_1x1",2],
+        "11428599290755097395": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "3635446784873718932": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "14038261392627717712": ["convolution_gpu_bfyx_os_iyx_osv16",581],
+        "11559360678008060513": ["convolution_gpu_bfyx_os_iyx_osv16",80],
+        "2525260242689556544": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "8479958930889587809": ["fully_connected_gpu_yxfb_ref",1],
+        "3116068331849795558": ["convolution_gpu_bfyx_gemm_like",2],
+        "852092858392507925": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "16463823433924519300": ["convolution_gpu_bfyx_os_iyx_osv16",1116],
+        "7353563160591978243": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "8451212914744825089": ["convolution_gpu_bfyx_os_iyx_osv16",738],
+        "15857087373591747006": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "712165731154577189": ["convolution_gpu_bfyx_os_iyx_osv16",606],
+        "1652781065871883392": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "17006655627343469372": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "3036808833459559381": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "10670103699537731664": ["convolution_gpu_bfyx_gemm_like",2],
+        "11164519756679631743": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "14491949194619001237": ["convolution_gpu_bfyx_os_iyx_osv16",209],
+        "14174888981602932979": ["convolution_gpu_bfyx_os_iyx_osv16",460],
+        "578703329577922869": ["convolution_gpu_bfyx_os_iyx_osv16",1028],
+        "10000618285883395700": ["convolution_gpu_bfyx_os_iyx_osv16",1079],
+        "16706121580364790904": ["convolution_gpu_bfyx_gemm_like",2],
+        "15381833359831622179": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "12493863403516600413": ["convolution_gpu_bfyx_os_iyx_osv16",263],
+        "12935563359569230797": ["convolution_gpu_bfyx_os_iyx_osv16",588],
+        "7000524935770116969": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "1532263118203058517": ["convolution_gpu_bfyx_os_iyx_osv16",267],
+        "18077281411861416889": ["convolution_gpu_bfyx_os_iyx_osv16",1046],
+        "1485662490111767875": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "12990527753120735255": ["convolution_gpu_bfyx_gemm_like",2],
+        "8306337702797456793": ["convolution_gpu_bfyx_gemm_like",2],
+        "11872464450773754851": ["fully_connected_gpu_bs_f_bsv8_af8_vload",2],
+        "7025975403069487257": ["convolution_gpu_bfyx_os_iyx_osv16",590],
+        "877436308867220589": ["convolution_gpu_bfyx_gemm_like",2],
+        "3797957937905580811": ["convolution_gpu_bfyx_gemm_like",2],
+        "10384537928514123040": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "9447458159095730492": ["convolution_gpu_bfyx_gemm_like",2],
+        "16475247464223458061": ["convolution_gpu_bfyx_gemm_like",0],
+        "18173314625562011976": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11670430946096342056": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "5040730152867713388": ["convolution_gpu_bfyx_gemm_like",2],
+        "584086621952390547": ["convolution_gpu_bfyx_gemm_like",2],
+        "16025442470600124062": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "5941852872160795604": ["convolution_gpu_bfyx_gemm_like",0],
+        "4356817283284529593": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12667014405537239093": ["convolution_gpu_bfyx_os_iyx_osv16",1093],
+        "3689722043202617487": ["convolution_gpu_bfyx_os_iyx_osv16",330],
+        "1473214668483422172": ["convolution_gpu_bfyx_gemm_like",2],
+        "9803492989444302959": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "15669490019428002270": ["convolution_gpu_bfyx_os_iyx_osv16",1092],
+        "7271236108345900406": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "12609361477548272638": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "10025839973092358719": ["convolution_gpu_bfyx_os_iyx_osv16",326],
+        "7211355951470869591": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13308187548669026714": ["convolution_gpu_bfyx_1x1",1],
+        "14695781272831602408": ["convolution_gpu_bfyx_os_iyx_osv16",632],
+        "17490188677223978661": ["convolution_gpu_bfyx_gemm_like",2],
+        "12553441041059632729": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "8500148569566077929": ["convolution_gpu_bfyx_os_iyx_osv16",616],
+        "998876398773540321": ["convolution_gpu_bfyx_1x1",2],
+        "4428101657497677982": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11931568365395665142": ["convolution_gpu_bfyx_gemm_like",2],
+        "9280431727790048190": ["convolution_gpu_bfyx_1x1",0],
+        "7875272450497189442": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "10899110544832584656": ["convolution_gpu_bfyx_os_iyx_osv16",1082],
+        "5419775002149092646": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "14385185911482960528": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "157805434489791310": ["convolution_gpu_bfyx_os_iyx_osv16",1030],
+        "9707630588260222630": ["convolution_gpu_bfyx_os_iyx_osv16",321],
+        "17542414935564676110": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "11649407835105973949": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "16218339663410630711": ["convolution_gpu_bfyx_gemm_like",2],
+        "12641170321047008726": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "5020788604681810984": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13970935346154374605": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "4544242784357021697": ["convolution_gpu_bfyx_gemm_like",2],
+        "4112696777811320312": ["convolution_gpu_bfyx_os_iyx_osv16",591],
+        "2355214244972870639": ["convolution_gpu_bfyx_os_iyx_osv16",530],
+        "2345023488044002149": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "3179874645565098825": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7869916853707978306": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "3480732841490521799": ["convolution_gpu_bfyx_os_iyx_osv16",128],
+        "18424400171776141118": ["convolution_gpu_bfyx_gemm_like",2],
+        "13454265023861566476": ["convolution_gpu_bfyx_gemm_like",0],
+        "9695024256541464964": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "12788968383428254917": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3526580286148537369": ["convolution_gpu_bfyx_gemm_like",2],
+        "6071668124835539929": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4754967381316623440": ["convolution_gpu_bfyx_gemm_like",2],
+        "6025872155179042054": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "4342360467977736802": ["convolution_gpu_bfyx_gemm_like",2],
+        "16758962840329202004": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "18251360413872841969": ["convolution_gpu_bfyx_os_iyx_osv16",669],
+        "1361159591875955678": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "2613462626256090659": ["fully_connected_gpu_bs_f_bsv16_af8_vload",0],
+        "11031569203645035546": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "9429695343610239088": ["convolution_gpu_bfyx_os_iyx_osv16",905],
+        "14289082888174784976": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "5933743119393822386": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "10730222715353420212": ["convolution_gpu_bfyx_os_iyx_osv16",706],
+        "11951606039079763598": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "13893789954946953427": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "1663285216972929652": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "17477062954520561609": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "8253823502854784432": ["convolution_gpu_bfyx_os_iyx_osv16",340],
+        "6323026044750482867": ["convolution_gpu_bfyx_os_iyx_osv16",1040],
+        "3141886504884887200": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "8490260671996115530": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "16033512206711124104": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "13961773444580398856": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "10387844339156517393": ["convolution_gpu_bfyx_1x1",2],
+        "7603872175048237237": ["convolution_gpu_bfyx_1x1",2],
+        "11724225282274130518": ["convolution_gpu_bfyx_os_iyx_osv16",714],
+        "16352331970945217438": ["convolution_gpu_bfyx_os_iyx_osv16",736],
+        "5864250949922222051": ["convolution_gpu_bfyx_os_iyx_osv16",319],
+        "12988961529988078346": ["convolution_gpu_bfyx_os_iyx_osv16",595],
+        "18386376129938707290": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16071723603031305677": ["convolution_gpu_bfyx_gemm_like",2],
+        "4437258459981739942": ["convolution_gpu_bfyx_os_iyx_osv16",1047],
+        "13933912937625580405": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "5853697372844744672": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "5570311824197099845": ["convolution_gpu_winograd_6x3_s1_fused",1],
+        "7689320135952025041": ["convolution_gpu_bfyx_gemm_like",0],
+        "6491244517639245276": ["convolution_gpu_bfyx_os_iyx_osv16",747],
+        "17564338309805484464": ["convolution_gpu_bfyx_gemm_like",2],
+        "4766071144928072260": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "9091110033424983286": ["convolution_gpu_bfyx_os_iyx_osv16",322],
+        "9354818521586974021": ["convolution_gpu_bfyx_gemm_like",2],
+        "12985942652866621579": ["fully_connected_gpu_bs_f_bsv8_af8_vload",1],
+        "10967218651864700933": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "7334966010680206302": ["convolution_gpu_bfyx_gemm_like",2],
+        "17947818179123182001": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13190888313721073437": ["convolution_gpu_bfyx_os_iyx_osv16",1101],
+        "15078262396281327048": ["convolution_gpu_bfyx_gemm_like",1],
+        "7947870656736319919": ["convolution_gpu_bfyx_os_iyx_osv16",702],
+        "9090828337597312855": ["convolution_gpu_bfyx_gemm_like",2],
+        "3150231129728961455": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "1008476023750261156": ["convolution_gpu_bfyx_1x1",1],
+        "11528417522960871233": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7121708962074176240": ["convolution_gpu_bfyx_1x1",0],
+        "17101789600628162503": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "5893940382830835820": ["convolution_gpu_bfyx_os_iyx_osv16",729],
+        "583303098958523195": ["convolution_gpu_bfyx_os_iyx_osv16",212],
+        "14813178380338948912": ["convolution_gpu_bfyx_os_iyx_osv16",907],
+        "7349880498513046830": ["convolution_gpu_bfyx_1x1",2],
+        "13115589642140732066": ["convolution_gpu_bfyx_os_iyx_osv16",240],
+        "11744368351982723504": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "7394217382008802567": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "9454954846682513038": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "16683089431066989909": ["convolution_gpu_bfyx_gemm_like",2],
+        "6290317420155851465": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "11948858355027908365": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "6114241186364821679": ["convolution_gpu_bfyx_gemm_like",2],
+        "2937907409658060025": ["convolution_gpu_bfyx_os_iyx_osv16",201],
+        "17025182465337728023": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "4084106758501882407": ["fully_connected_gpu_bf_io_input_spatial",2],
+        "3985659568982275663": ["convolution_gpu_bfyx_os_iyx_osv16",1123],
+        "8655883535274781128": ["convolution_gpu_bfyx_gemm_like",2],
+        "12461575861709234385": ["convolution_gpu_bfyx_gemm_like",2],
+        "11768117585574496387": ["convolution_gpu_bfyx_os_iyx_osv16",301],
+        "9438739171104456179": ["convolution_gpu_bfyx_os_iyx_osv16",955],
+        "1742897526168249500": ["convolution_gpu_bfyx_gemm_like",2],
+        "8792202318168046223": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "11490143853656040028": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "4491380839102267034": ["convolution_gpu_bfyx_gemm_like",2],
+        "2967481531952454828": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "10256831975351722184": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "3788462090984291082": ["convolution_gpu_bfyx_os_iyx_osv16",1084],
+        "8106738346643994005": ["convolution_gpu_bfyx_gemm_like",1],
+        "9500850790449116723": ["convolution_gpu_bfyx_os_iyx_osv16",758],
+        "10979362792894404338": ["convolution_gpu_bfyx_gemm_like",0],
+        "3141773224039276177": ["convolution_gpu_bfyx_1x1",1],
+        "16588325081458426169": ["convolution_gpu_bfyx_gemm_like",0],
+        "17647962002015093887": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "12782932626966309185": ["convolution_gpu_bfyx_os_iyx_osv16",213],
+        "1706927777850488363": ["convolution_gpu_bfyx_os_iyx_osv16",1127],
+        "1372939511728986224": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "17877776363798202236": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "9056038338958199256": ["convolution_gpu_bfyx_os_iyx_osv16",707],
+        "7708321360699824256": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15713964605078748923": ["convolution_gpu_bfyx_gemm_like",2],
+        "18172711677056449158": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "1218323229202187514": ["convolution_gpu_bfyx_gemm_like",2],
+        "3102816736961785641": ["convolution_gpu_bfyx_os_iyx_osv16",992],
+        "16863960779539003201": ["convolution_gpu_bfyx_os_iyx_osv16",555],
+        "4073467095502162430": ["convolution_gpu_bfyx_os_iyx_osv16",255],
+        "5688478347124565305": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "8921636651939679647": ["convolution_gpu_bfyx_1x1",2],
+        "388828310152538138": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "10292349730148518173": ["convolution_gpu_bfyx_gemm_like",2],
+        "13708979487306970634": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "4196367396954155354": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "7649413902932043811": ["convolution_gpu_bfyx_gemm_like",2],
+        "3926585856863002495": ["convolution_gpu_bfyx_os_iyx_osv16",227],
+        "8501145642605270365": ["convolution_gpu_bfyx_gemm_like",2],
+        "13170441257780067955": ["convolution_gpu_bfyx_os_iyx_osv16",1104],
+        "2983038203471784211": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "12480527132372884168": ["convolution_gpu_bfyx_1x1",2],
+        "17640725195881101275": ["convolution_gpu_bfyx_gemm_like",2],
+        "11716771904412649891": ["convolution_gpu_bfyx_os_iyx_osv16",438],
+        "1436052878894538927": ["convolution_gpu_bfyx_os_iyx_osv16",604],
+        "7918742312252115870": ["convolution_gpu_bfyx_os_iyx_osv16",279],
+        "1626430741965136732": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "16267682394077585279": ["convolution_gpu_bfyx_os_iyx_osv16",538],
+        "13709111882513486557": ["convolution_gpu_bfyx_os_iyx_osv16",999],
+        "11469881811044037340": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "9099720270958987421": ["convolution_gpu_bfyx_1x1",1],
+        "4983880246908724272": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "9657324846330221372": ["convolution_gpu_bfyx_1x1",2],
+        "8860815977851486767": ["convolution_gpu_bfyx_os_iyx_osv16",40],
+        "16426179645101678763": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "16206791915939407806": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "1033385936344875354": ["convolution_gpu_bfyx_gemm_like",2],
+        "16744011463988595802": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "11060822686394981344": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "4897448054295474302": ["convolution_gpu_bfyx_gemm_like",1],
+        "12031180482028822765": ["convolution_gpu_bfyx_gemm_like",1],
+        "16789135236017252073": ["convolution_gpu_bfyx_gemm_like",2],
+        "6942622405269419082": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "6181308879301978465": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "15860915170591763391": ["convolution_gpu_bfyx_os_iyx_osv16",1087],
+        "7447163906170805189": ["convolution_gpu_bfyx_os_iyx_osv16",1065],
+        "6312971928547466668": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "5524215233998361104": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "8032685176029570383": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "9947449295659685973": ["convolution_gpu_bfyx_gemm_like",0],
+        "12937333118472722002": ["convolution_gpu_bfyx_gemm_like",2],
+        "10023279637210292010": ["convolution_gpu_bfyx_os_iyx_osv16",1115],
+        "13183380647506951324": ["convolution_gpu_bfyx_gemm_like",1],
+        "18203935818408469865": ["convolution_gpu_bfyx_os_iyx_osv16",653],
+        "2623687018437195679": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "17723621158215826108": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "3273748387141431306": ["convolution_gpu_bfyx_os_iyx_osv16",93],
+        "18094205332383644037": ["convolution_gpu_bfyx_os_iyx_osv16",181],
+        "16547425454653232058": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "2857337999074313592": ["convolution_gpu_bfyx_os_iyx_osv16",1083],
+        "654821507679356726": ["convolution_gpu_bfyx_os_iyx_osv16",957],
+        "15026219694198820614": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "4533786844080178561": ["convolution_gpu_bfyx_os_iyx_osv16",1106],
+        "5440983284868981549": ["convolution_gpu_bfyx_gemm_like",2],
+        "8532217744217419503": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "6522575549211855712": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "15675903059949404837": ["convolution_gpu_bfyx_1x1",0],
+        "8567667881970262923": ["convolution_gpu_bfyx_os_iyx_osv16",634],
+        "7322472892320910654": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "13139625572508441980": ["convolution_gpu_bfyx_os_iyx_osv16",334],
+        "8965747921518186477": ["convolution_gpu_bfyx_os_iyx_osv16",419],
+        "13210604117940125947": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "4505008254511324231": ["convolution_gpu_bfyx_os_iyx_osv16",744],
+        "10635659193402005820": ["convolution_gpu_bfyx_os_iyx_osv16",737],
+        "10917498758625273194": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12581879452540858313": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "4082229510324076196": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3037042229494600258": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "8036474422877454869": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "15011504472108164173": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "603883331897298932": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "3017411837779243878": ["convolution_gpu_bfyx_gemm_like",2],
+        "10398572248321217585": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "10747988576436391912": ["convolution_gpu_bfyx_os_iyx_osv16",1011],
+        "8791285622784082122": ["convolution_gpu_bfyx_os_iyx_osv16",558],
+        "2041212737963974230": ["convolution_gpu_bfyx_gemm_like",2],
+        "916389941321470163": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "6580334406272192111": ["fully_connected_gpu_fb_io_ref",1],
+        "14343008518525689150": ["convolution_gpu_bfyx_1x1",0],
+        "7472330881076141262": ["convolution_gpu_bfyx_gemm_like",2],
+        "13575423234109624706": ["fully_connected_gpu_bs_f_bsv16_af8_vload",1],
+        "9954050478761346921": ["convolution_gpu_bfyx_os_iyx_osv16",610],
+        "7072606962946873975": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "12408889192918919210": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "4622514167765722873": ["convolution_gpu_bfyx_os_iyx_osv16",15],
+        "13184662326021747000": ["convolution_gpu_bfyx_os_iyx_osv16",577],
+        "5835634465164771899": ["convolution_gpu_bfyx_os_iyx_osv16",257],
+        "15778834188130183853": ["convolution_gpu_bfyx_os_iyx_osv16",935],
+        "994182747184593564": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "11207257238719531888": ["convolution_gpu_bfyx_gemm_like",2],
+        "9692654253261175490": ["convolution_gpu_bfyx_os_iyx_osv16",335],
+        "5629670679897666607": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "5762878778443755104": ["fully_connected_gpu_bs_f_bsv8_af8_vload",0],
+        "2150326211917340956": ["convolution_gpu_bfyx_gemm_like",2],
+        "939718260623752240": ["convolution_gpu_bfyx_gemm_like",1],
+        "11459784003592366395": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "7581174843529024536": ["convolution_gpu_bfyx_os_iyx_osv16",635],
+        "15879172437519876393": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1201692134690347847": ["convolution_gpu_bfyx_os_iyx_osv16",1075],
+        "15916505622570323098": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "10135458965276110244": ["convolution_gpu_bfyx_1x1",0],
+        "11820789223587555410": ["convolution_gpu_bfyx_1x1",0],
+        "15466940145773097237": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "9839670675413379092": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15392077168521832549": ["convolution_gpu_bfyx_os_iyx_osv16",710],
+        "296142385116663420": ["convolution_gpu_bfyx_os_iyx_osv16",374],
+        "14258499419905714808": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "15859493313686060349": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "17444003685761357480": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "5840254078917931433": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "4991419288164762786": ["convolution_gpu_bfyx_os_iyx_osv16",584],
+        "7104309382120208659": ["convolution_gpu_bfyx_gemm_like",2],
+        "2477849395789783501": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "5331173521406046122": ["convolution_gpu_bfyx_os_iyx_osv16",646],
+        "14553856088069405595": ["convolution_gpu_bfyx_os_iyx_osv16",1121],
+        "13379165253894817165": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "4871907623235871050": ["convolution_gpu_bfyx_os_iyx_osv16",837],
+        "9440117898128288296": ["convolution_gpu_bfyx_os_iyx_osv16",1010],
+        "11632275875447013409": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4865102850562917067": ["convolution_gpu_bfyx_os_iyx_osv16",854],
+        "386749666417295495": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "11834361584875491425": ["convolution_gpu_bfyx_1x1",0],
+        "1497560475414454618": ["convolution_gpu_bfyx_gemm_like",2],
+        "3746573775462003750": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16264774056719724826": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15799159401545270696": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "3599823735065658574": ["convolution_gpu_bfyx_os_iyx_osv16",959],
+        "10536316961655703500": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "13447028922679236865": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "6133592828563353516": ["convolution_gpu_bfyx_os_iyx_osv16",310],
+        "9452470718398027950": ["convolution_gpu_bfyx_os_iyx_osv16",1031],
+        "13512863534076172940": ["convolution_gpu_bfyx_gemm_like",2],
+        "12978370505631031751": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "1434535531617424039": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "16986358655784856534": ["convolution_gpu_bfyx_gemm_like",2],
+        "15661322183507404821": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7941729567451949422": ["convolution_gpu_bfyx_os_iyx_osv16",315],
+        "10488269059469838160": ["convolution_gpu_bfyx_os_iyx_osv16",908],
+        "12696412964119109465": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "12776081190690731910": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "14994322266840011040": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "11275109735493317886": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "11806105193035393795": ["convolution_gpu_bfyx_gemm_like",2],
+        "13131740479277027362": ["fully_connected_gpu_bs_f_bsv16_b1",1],
+        "1996860183441418841": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "706370730287471796": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "4006884370026272807": ["convolution_gpu_bfyx_gemm_like",2],
+        "5519535335798045279": ["convolution_gpu_bfyx_gemm_like",2],
+        "8141428150264829362": ["convolution_gpu_bfyx_os_iyx_osv16",248],
+        "4803370483104261655": ["convolution_gpu_bfyx_gemm_like",2],
+        "9269175963143039426": ["convolution_gpu_bfyx_os_iyx_osv16",742],
+        "15178921033274918199": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "192209423643075326": ["convolution_gpu_bfyx_gemm_like",2],
+        "7570346182940928159": ["convolution_gpu_bfyx_gemm_like",0],
+        "1251525426317284548": ["convolution_gpu_bfyx_os_iyx_osv16",23],
+        "1509728225855233852": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "12643423612381102003": ["convolution_gpu_bfyx_os_iyx_osv16",830],
+        "8540111719936129376": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "5429130923188159806": ["convolution_gpu_bfyx_os_iyx_osv16",477],
+        "9849272539053219052": ["convolution_gpu_bfyx_os_iyx_osv16",965],
+        "5522698342845820411": ["convolution_gpu_bfyx_os_iyx_osv16",580],
+        "3691705516240577130": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17010172246526353957": ["convolution_gpu_bfyx_1x1",2],
+        "9928406318940388716": ["convolution_gpu_bfyx_os_iyx_osv16",1111],
+        "9999955037598579164": ["convolution_gpu_bfyx_os_iyx_osv16",314],
+        "6403698142681887543": ["convolution_gpu_bfyx_gemm_like",1],
+        "5795073619189010837": ["convolution_gpu_winograd_6x3_s1_fused",2],
+        "15118142492742177336": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "3895088069642140043": ["convolution_gpu_bfyx_os_iyx_osv16",976],
+        "14447191095937730964": ["convolution_gpu_bfyx_os_iyx_osv16",329],
+        "2124033349728954551": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "16522364268583242080": ["convolution_gpu_bfyx_gemm_like",2],
+        "4239415134522959352": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "5658664813683907476": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "13738760763969959522": ["convolution_gpu_bfyx_gemm_like",0],
+        "272730229972987861": ["convolution_gpu_bfyx_os_iyx_osv16",734],
+        "4237276338897143680": ["convolution_gpu_bfyx_os_iyx_osv16",353],
+        "17961702508543961900": ["convolution_gpu_bfyx_os_iyx_osv16",1067],
+        "16152775342222431281": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "16425665058951535484": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "10787747981914307179": ["convolution_gpu_bfyx_1x1",2],
+        "11703557271443535142": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "8640150341228170279": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "2096779676054335057": ["convolution_gpu_bfyx_gemm_like",2],
+        "11975047184326016230": ["convolution_gpu_bfyx_gemm_like",1],
+        "8482147530539941792": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "4734389463002799056": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "4129722446574108695": ["convolution_gpu_bfyx_1x1",0],
+        "9795194069954915563": ["convolution_gpu_bfyx_gemm_like",2],
+        "16681690088928624738": ["convolution_gpu_bfyx_os_iyx_osv16",691],
+        "16567486018945740036": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "6664432489777052771": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "8006738296385794413": ["convolution_gpu_bfyx_os_iyx_osv16",1076],
+        "12809199739984715013": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "9366201112659847392": ["convolution_gpu_bfyx_os_iyx_osv16",364],
+        "577844026691991089": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "8866736221671835567": ["convolution_gpu_bfyx_os_iyx_osv16",53],
+        "13839116996827687373": ["convolution_gpu_bfyx_gemm_like",0],
+        "5095827462645341808": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "54975980454651672": ["convolution_gpu_bfyx_os_iyx_osv16",1081],
+        "18277685132620834972": ["convolution_gpu_bfyx_os_iyx_osv16",159],
+        "4914435717288687793": ["convolution_gpu_bfyx_1x1",2],
+        "6584960721513702502": ["convolution_gpu_bfyx_gemm_like",2],
+        "12393385058735194260": ["convolution_gpu_bfyx_os_iyx_osv16",983],
+        "4465781406991476376": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "4135068756462147853": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "10808909442136736629": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "6904130543085920483": ["convolution_gpu_bfyx_os_iyx_osv16",953],
+        "11450378244355788918": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "8618835732380720921": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "2251029128552117936": ["convolution_gpu_bfyx_os_iyx_osv16",369],
+        "17724604495865223459": ["convolution_gpu_bfyx_gemm_like",2],
+        "2226745622763268469": ["convolution_gpu_bfyx_os_iyx_osv16",331],
+        "7307271009495440764": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "138379779469699309": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10912495395422146386": ["convolution_gpu_bfyx_os_iyx_osv16",261],
+        "11239754372812258455": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "9519623751582710696": ["convolution_gpu_bfyx_os_iyx_osv16",439],
+        "5041111302824362529": ["convolution_gpu_bfyx_os_iyx_osv16",1105],
+        "12489973984967168447": ["convolution_gpu_bfyx_1x1",2],
+        "16442107352245114876": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "17318287523550546026": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7199295899520406795": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "16461809076899645037": ["convolution_gpu_bfyx_os_iyx_osv16",9],
+        "3682813162987778705": ["convolution_gpu_bfyx_os_iyx_osv16",630],
+        "5876880412336151866": ["convolution_gpu_bfyx_os_iyx_osv16",218],
+        "16430562172386510259": ["convolution_gpu_bfyx_gemm_like",2],
+        "5831419373611158773": ["convolution_gpu_bfyx_os_iyx_osv16",739],
+        "2261453441277654139": ["convolution_gpu_bfyx_os_iyx_osv16",54],
+        "4398371999113956082": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "7106362077449435105": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "4635570915184713874": ["convolution_gpu_bfyx_gemm_like",1],
+        "6048964584602891448": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "1551596771935253711": ["convolution_gpu_bfyx_gemm_like",2],
+        "9522661528867955338": ["convolution_gpu_bfyx_gemm_like",2],
+        "5740738339752793113": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "9220830217525628783": ["convolution_gpu_bfyx_gemm_like",2],
+        "3372770576629463160": ["convolution_gpu_bfyx_os_iyx_osv16",231],
+        "15702382940521972117": ["convolution_gpu_bfyx_os_iyx_osv16",1008],
+        "2052712465925238009": ["convolution_gpu_bfyx_os_iyx_osv16",813],
+        "12864204111424196179": ["convolution_gpu_bfyx_1x1",2],
+        "5637480705139132901": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "4079026972040047969": ["convolution_gpu_bfyx_os_iyx_osv16",262],
+        "3603706453982734995": ["convolution_gpu_bfyx_os_iyx_osv16",573],
+        "1643122514049603104": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "4138968242532400395": ["convolution_gpu_bfyx_gemm_like",2],
+        "16666792471632326054": ["convolution_gpu_bfyx_gemm_like",2],
+        "8220168481755031959": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "3430266954211750407": ["convolution_gpu_bfyx_os_iyx_osv16",352],
+        "13760645810144930270": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "12207503176295152756": ["convolution_gpu_bfyx_1x1",0],
+        "1788455099959676873": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "2140514316203117958": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "38736266675995457": ["convolution_gpu_bfyx_os_iyx_osv16",607],
+        "5339985303398206057": ["convolution_gpu_bfyx_os_iyx_osv16",180],
+        "4161141078006269526": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "17443356777503458523": ["convolution_gpu_bfyx_gemm_like",1],
+        "6410682026872155392": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "13713406612642090169": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "16409729623371222748": ["convolution_gpu_bfyx_os_iyx_osv16",982],
+        "1155389358857780776": ["convolution_gpu_bfyx_os_iyx_osv16",463],
+        "2231648183489019418": ["convolution_gpu_bfyx_os_iyx_osv16",814],
+        "4056979460327024961": ["convolution_gpu_bfyx_os_iyx_osv16",708],
+        "13459514533473657102": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "6181272224000872375": ["convolution_gpu_bfyx_gemm_like",2],
+        "12625112690264223217": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "17109520309574369561": ["convolution_gpu_bfyx_gemm_like",2],
+        "12952980509662451384": ["convolution_gpu_bfyx_os_iyx_osv16",462],
+        "5805383505505929391": ["convolution_gpu_bfyx_os_iyx_osv16",624],
+        "9606639214735570069": ["convolution_gpu_bfyx_os_iyx_osv16",249],
+        "10169992769527680821": ["convolution_gpu_bfyx_os_iyx_osv16",700],
+        "4338023436590582323": ["convolution_gpu_bfyx_os_iyx_osv16",127],
+        "16705621644424684055": ["convolution_gpu_bfyx_os_iyx_osv16",356],
+        "15334195300678132907": ["fully_connected_gpu_bf_io_gemm",0],
+        "4625107584562815965": ["convolution_gpu_bfyx_os_iyx_osv16",585],
+        "11848462434662954749": ["convolution_gpu_bfyx_os_iyx_osv16",1015],
+        "13352000946213986936": ["convolution_gpu_bfyx_os_iyx_osv16",1067],
+        "17759505449240263390": ["convolution_gpu_bfyx_os_iyx_osv16",690],
+        "4026686872534942904": ["convolution_gpu_bfyx_os_iyx_osv16",179],
+        "5183231560876991543": ["convolution_gpu_bfyx_os_iyx_osv16",320],
+        "3391032227732782982": ["convolution_gpu_bfyx_os_iyx_osv16",1113],
+        "7177837234452118325": ["convolution_gpu_bfyx_os_iyx_osv16",123],
+        "14578867494693499627": ["convolution_gpu_bfyx_os_iyx_osv16",182],
+        "9410978119783758141": ["convolution_gpu_bfyx_os_iyx_osv16",625],
+        "3304589333915676807": ["convolution_gpu_bfyx_gemm_like",2],
+        "11728824117049687850": ["convolution_gpu_bfyx_gemm_like",2],
+        "708747442142592697": ["convolution_gpu_bfyx_gemm_like",1],
+        "17798636687709019154": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "15959543980008442942": ["convolution_gpu_bfyx_os_iyx_osv16",1085],
+        "16781187505186394353": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "7183578232279711009": ["convolution_gpu_bfyx_os_iyx_osv16",1064],
+        "12635265188475834607": ["convolution_gpu_bfyx_os_iyx_osv16",532],
+        "2800949804770763798": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "1882052795393187384": ["convolution_gpu_bfyx_os_iyx_osv16",671],
+        "9726913113016874092": ["convolution_gpu_bfyx_gemm_like",2],
+        "9048522050692986204": ["convolution_gpu_bfyx_os_iyx_osv16",371],
+        "15133468875250992696": ["convolution_gpu_bfyx_os_iyx_osv16",349],
+        "10429613013253088132": ["convolution_gpu_bfyx_gemm_like",1],
+        "8083672466967374860": ["convolution_gpu_bfyx_os_iyx_osv16",891],
+        "9882204352209412039": ["convolution_gpu_bfyx_os_iyx_osv16",728],
+        "14104238386345631681": ["convolution_gpu_winograd_6x3_s1_fused",0],
+        "15047676717402283805": ["convolution_gpu_bfyx_os_iyx_osv16",367],
+        "11324851661119942609": ["convolution_gpu_bfyx_os_iyx_osv16",253],
+        "15796677813117622429": ["convolution_gpu_bfyx_gemm_like",2],
+        "3429844423226609965": ["convolution_gpu_bfyx_gemm_like",2],
+        "8843585527713905568": ["convolution_gpu_bfyx_gemm_like",2],
+        "10293186062391000719": ["convolution_gpu_bfyx_os_iyx_osv16",7],
+        "2986189945936592561": ["convolution_gpu_bfyx_os_iyx_osv16",559],
+        "16921939234324970069": ["convolution_gpu_bfyx_os_iyx_osv16",1067],
+        "7900926714874404219": ["convolution_gpu_bfyx_depthwise_weights_lwg",1],
+        "10512507780534402341": ["convolution_gpu_bfyx_os_iyx_osv16",1022],
+        "3755253206085028904": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "7524311370696987092": ["convolution_gpu_bfyx_1x1_hgemm_buf_16x1",2],
+        "14991602704357959545": ["convolution_gpu_bfyx_os_iyx_osv16",699],
+        "10728212277329722684": ["convolution_gpu_bfyx_gemm_like",2],
+        "16758697697363920520": ["convolution_gpu_bfyx_os_iyx_osv16",930],
+        "7272538316511343863": ["convolution_gpu_bfyx_gemm_like",0],
+        "4403753181729432604": ["convolution_gpu_bfyx_os_iyx_osv16",1086],
+        "12365282242489300092": ["convolution_gpu_bfyx_os_iyx_osv16",879],
+        "6205240287062600210": ["convolution_gpu_bfyx_gemm_like",2],
+        "9869959062341950047": ["convolution_gpu_bfyx_1x1",1],
+        "11265472910579659280": ["convolution_gpu_bfyx_gemm_like",2],
+        "1540041682425757361": ["convolution_gpu_bfyx_gemm_like",1],
+        "12068797674575015662": ["convolution_gpu_bfyx_os_iyx_osv16",636],
+        "14418429155823196539": ["convolution_gpu_bfyx_gemm_like",2],
+        "15529757761327002288": ["convolution_gpu_bfyx_os_iyx_osv16",332],
+        "11727227430687227444": ["convolution_gpu_bfyx_os_iyx_osv16",354],
+        "12309132521191764927": ["convolution_gpu_bfyx_os_iyx_osv16",730],
+        "1056009037551688122": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3711525118850629466": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "15352245788978088971": ["convolution_gpu_bfyx_os_iyx_osv16",1014],
+        "8819268903800581706": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "17201365233492366678": ["convolution_gpu_bfyx_os_iyx_osv16",1012],
+        "7457899998356343871": ["convolution_gpu_bfyx_os_iyx_osv16",670],
+        "731825454731954517": ["convolution_gpu_bfyx_gemm_like",2],
+        "12972634653821069685": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "751912075185318190": ["convolution_gpu_bfyx_os_iyx_osv16",210],
+        "883436333317162926": ["convolution_gpu_bfyx_1x1",2],
+        "12024817951074673335": ["convolution_gpu_bfyx_1x1",0],
+        "9101903304994333336": ["convolution_gpu_bfyx_os_iyx_osv16",697],
+        "10573920781439771673": ["convolution_gpu_bfyx_os_iyx_osv16",300],
+        "7840653268996892538": ["convolution_gpu_bfyx_gemm_like",2],
+        "4738743763536059708": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "14077148976508649021": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "13590444711975157776": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "4479117540570599742": ["convolution_gpu_bfyx_gemm_like",2],
+        "10923480230259977438": ["convolution_gpu_bfyx_1x1",1],
+        "1914964404168211864": ["convolution_gpu_bfyx_gemm_like",2],
+        "9040046051053703359": ["convolution_gpu_bfyx_gemm_like",2],
+        "3218248162832023196": ["convolution_gpu_bfyx_os_iyx_osv16",970],
+        "11830297960718214360": ["convolution_gpu_bfyx_os_iyx_osv16",258],
+        "4325081100430903742": ["convolution_gpu_bfyx_gemm_like",2],
+        "3202085450628781999": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "16027456210394993913": ["convolution_gpu_bfyx_os_iyx_osv16",1080],
+        "142486914279119363": ["convolution_gpu_bfyx_os_iyx_osv16",633],
+        "10128143628088846123": ["convolution_gpu_bfyx_os_iyx_osv16",971],
+        "2732519635571994212": ["convolution_gpu_bfyx_os_iyx_osv16",967],
+        "8984436655107983227": ["convolution_gpu_bfyx_os_iyx_osv16",688],
+        "10722782762733112118": ["convolution_gpu_bfyx_1x1",1],
+        "12730339458081890990": ["convolution_gpu_bfyx_os_iyx_osv16",368],
+        "4531222427159927606": ["convolution_gpu_bfyx_os_iyx_osv16",725],
+        "12668149981216388765": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "12087141795291232248": ["convolution_gpu_bfyx_os_iyx_osv16",1118],
+        "5659168916726488798": ["convolution_gpu_bfyx_os_iyx_osv16",1074],
+        "18431306649860116380": ["convolution_gpu_bfyx_gemm_like",2],
+        "10682918518101379579": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "1822096761703761792": ["convolution_gpu_bfyx_1x1",1],
+        "474139120607442270": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "5334190564423375247": ["convolution_gpu_bfyx_direct_10_12_16",2],
+        "760687670112194844": ["convolution_gpu_bfyx_os_iyx_osv16",1073],
+        "13178480813522103091": ["fully_connected_gpu_bf_io_gemm",0],
+        "17382660912493284320": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "5558136691773431495": ["convolution_gpu_bfyx_os_iyx_osv16",598],
+        "6863331059471727622": ["convolution_gpu_bfyx_os_iyx_osv16",692],
+        "941626985322260281": ["convolution_gpu_bfyx_gemm_like",1],
+        "101401523793806394": ["convolution_gpu_bfyx_gemm_like",2],
+        "12557015880639217508": ["convolution_gpu_bfyx_os_iyx_osv16",1091],
+        "5763440554939527411": ["convolution_gpu_bfyx_os_iyx_osv16",626],
+        "6635217802203685464": ["convolution_gpu_bfyx_os_iyx_osv16",961],
+        "14352303529756685990": ["convolution_gpu_bfyx_gemm_like",1],
+        "16882092367103683293": ["convolution_gpu_bfyx_depthwise_weights_lwg",2],
+        "3272017687600371031": ["convolution_gpu_bfyx_gemm_like",2],
+        "18424912460022156378": ["convolution_gpu_bfyx_os_iyx_osv16",962],
+        "16770615142634470903": ["convolution_gpu_bfyx_os_iyx_osv16",696],
+        "1752185056297124917": ["convolution_gpu_bfyx_1x1",2],
+        "13317417676446624018": ["convolution_gpu_bfyx_os_iyx_osv16",748],
+        "11883485911218628865": ["convolution_gpu_bfyx_os_iyx_osv16",302],
+        "7700321970687976931": ["convolution_gpu_bfyx_os_iyx_osv16",711],
+        "9423854233835016530": ["convolution_gpu_bfyx_depthwise_weights_lwg",0],
+        "721174714308243785": ["convolution_gpu_bfyx_os_iyx_osv16",631],
+        "10548792624072794724": ["convolution_gpu_bfyx_os_iyx_osv16",629],
+        "2608363732937932266": ["convolution_gpu_bfyx_gemm_like",2],
+        "946479876892100082": ["convolution_gpu_bfyx_gemm_like",2],
+        "17809920600993699808": ["convolution_gpu_bfyx_os_iyx_osv16",87],
+        "5334291640387922287": ["convolution_gpu_bfyx_os_iyx_osv16",1126],
+        "4894227264080887361": ["convolution_gpu_bfyx_os_iyx_osv16",799],
+        "12238674883388043717": ["convolution_gpu_bfyx_os_iyx_osv16",582],
+        "6733731409232284409": ["convolution_gpu_bfyx_os_iyx_osv16",1109],
+        "14502856487639608696": ["convolution_gpu_bfyx_os_iyx_osv16",934],
+        "13538051178827008933": ["convolution_gpu_bfyx_os_iyx_osv16",107],
+        "15759530339367380982": ["convolution_gpu_bfyx_gemm_like",2],
+        "4903592553439092472": ["convolution_gpu_bfyx_os_iyx_osv16",910],
+        "11163107409437069532": ["convolution_gpu_bfyx_os_iyx_osv16",203],
+        "12179581684777023804": ["convolution_gpu_bfyx_gemm_like",2],
+        "16129296588866116913": ["convolution_gpu_bfyx_os_iyx_osv16",1125],
+        "16065744898134487748": ["convolution_gpu_bfyx_os_iyx_osv16",689],
+        "2543041530639980505": ["convolution_gpu_bfyx_os_iyx_osv16",294],
+        "9751582946441607796": ["convolution_gpu_bfyx_gemm_like",1],
+        "9785114056964539323": ["convolution_gpu_bfyx_os_iyx_osv16",256],
+        "7500192998744460131": ["fully_connected_gpu_bf_io_input_spatial",1],
+        "9622546530872848323": ["convolution_gpu_bfyx_os_iyx_osv16",328],
+        "17154337492545826355": ["convolution_gpu_bfyx_gemm_like",1],
+        "9404677451270692749": ["convolution_gpu_bfyx_direct_10_12_16",0],
+        "12388375914105990324": ["convolution_gpu_bfyx_direct_10_12_16",1],
+        "14811603003184578943": ["convolution_gpu_bfyx_gemm_like",2],
+        "17522452942286240233": ["convolution_gpu_bfyx_os_iyx_osv16",704],
+        "18082422341304348326": ["convolution_gpu_bfyx_os_iyx_osv16",698],
+        "17224104246148265328": ["convolution_gpu_bfyx_os_iyx_osv16",705],
+        "9477562342190423343": ["convolution_gpu_bfyx_os_iyx_osv16",1100],
+        "5003718302026277632": ["convolution_gpu_bfyx_os_iyx_osv16",1066],
+        "13204120207726209723": ["fully_connected_gpu_bf_io_gemm",0],
+        "12112853999307505628": ["convolution_gpu_bfyx_gemm_like",2],
+        "2440366541074371090": ["convolution_gpu_bfyx_os_iyx_osv16",839],
+        "9101334153142718004": ["convolution_gpu_bfyx_gemm_like",2],
+        "15619086801947147359": ["convolution_gpu_bfyx_os_iyx_osv16",764],
+        "18180820925685532104": ["convolution_gpu_bfyx_gemm_like",2],
+        "14122213471825630433": ["convolution_gpu_bfyx_gemm_like",2],
+        "13253775441326432265": ["convolution_gpu_bfyx_os_iyx_osv16",1110],
+        "4013707396889204359": ["convolution_gpu_bfyx_os_iyx_osv16",231
+        ]
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_APL.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_APL.cpp
deleted file mode 100644
index 20699192f..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_APL.cpp
+++ /dev/null
@@ -1,2572 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    //APL 10W
-    void tuning_cache_5A84(tuning_data& td)
-    {
-        td.td.insert({
-            { "4583484812233029888", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7560832358324865221", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7382044526960590018", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12372261924257291610", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "1547771611689525848", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3134973665622945888", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18260030211719729324", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "7416143717989012766", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12028963907131702705", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2464531851392092325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8181308759455478086", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "546062289721803579", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4889405384318695802", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12841232643395100314", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14108361259911144680", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2726453304845436156", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "2607416795507802412", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "2175404966338020579", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "14666883719480623074", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3752993663604843837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 56) },
-            { "5274735654559844733", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8174421295799601683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "1967655354607438665", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "15762542971370422224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "8183203099539372914", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "4075343423548891274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "13264497096898621015", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "679058537775669048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "3375634256357960999", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "2844616672368585285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "14235558866846276172", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "18066867692765966577", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "9861424412782371874", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "607078314875528651", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "6234885984223387670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "7223570329858821704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "17234843749633035510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11516168882438876247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "11312664612825940140", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "14846039494240217143", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3390376200501119384", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 115) },
-            { "1113077760071340574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "4614875083188849196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10859023312681572942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "5588692131556725717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 287) },
-            { "7653946972043115920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "9773458066743315157", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10491513939202460216", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "8140122945471321201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "15079423575410353790", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "12844146569641472927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 287) },
-            { "13443914015380511668", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "13404457916017756196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 69) },
-            { "6402415801415013013", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "2040762223425679479", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8595156989254845134", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "14493123117003003092", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7391309333582046386", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9935182178960843140", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15422142509105297183", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14849987788569183527", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14923132847727661051", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13271555597925466454", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6865593216823998846", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12385437755245281331", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4530047829451377456", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "127643210248119703", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2599051617462913767", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3024020696533545102", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16205377892664082400", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "15135655146332608939", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "10848724554175904486", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "15558120704022404428", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "14120354125904513152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "8475075092501403968", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "15892943371777404347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "15078418657871922661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "3502053626453342387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1230316443026403527", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "12846418701225147646", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "17386994561779281406", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "7367684259946371231", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5451072983028714092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "11758623888547009364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15958650715061024845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13899144453581769028", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "10884229860266073967", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "6629431845229592220", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "13199442294147992119", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "5032929712205664246", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14871333176552512036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "17890435688048047959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "11353661571093800805", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3101908018947919238", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "7495240482209084478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "6964506613327100469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "3670645005971806718", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "69949758775887534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "13654393413005772278", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 287) },
-            { "11101512074369779300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "2273811004985590823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7457154125218067377", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "7709677514862642399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "8010619564572573208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16479793487852125428", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3147355028342035061", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12672939642957531547", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12627961914394914920", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16210688853876861607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "1899485873740458557", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "669151029135558505", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "8912067280071688393", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "7714589858275971005", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9794413496918699979", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18350040136091421971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "16931304566154830346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "12816950084297042217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "2359632276970855181", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16592641501972654496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "7754054384598160936", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18195884921517044108", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16256130331524359070", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "13497279823712860029", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "6095972148204769193", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "4149964766407000732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10262104071809780712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10707129891337660055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "3585075254981736756", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3181067565488724209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 194) },
-            { "8636008354706344794", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "4966150965920189853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "5569253153294942795", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "11521347729886549503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "12399471154320580621", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "14851218369956754103", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "14859848826604327499", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "14783159891899899660", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "2369671961317151564", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1691004331056506231", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "4465288557833228023", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13538111995551348621", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "1886751914747841929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "8445964247944285746", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "803205084059316676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "12654698468722759675", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "13484605287576302088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "2469399061693302590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "14782181149367028912", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "1448238652280623323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "2076478920663115306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "7369834759425644726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11702633755046828968", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "2944972038827287015", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "8261441437673092886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "16694312773479519523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "5486494868955566721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "8246009573416434030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "9457894602447879547", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "4598302923247277427", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "3116224788980631217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17103527368951412486", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "10150428063205056209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "16984028253790680977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "17857105233471273424", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "2715609009808401074", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "232807837985324954", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13933912937625580405", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11254635684957519432", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "6729077823331194042", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "15961933828477762733", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "827225131390571924", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10429613013253088132", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14629385997654952321", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7546586420552408243", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "11897687507601277182", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "13975409361394567866", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6458124573210430792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "2385616965635993249", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14462438074931673266", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9338654554616107568", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "15476402794704488137", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "13654816209891478730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "1680424228660495363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "12929981792125924963", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "4698507050987130777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "1094144958579794349", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12358908585763044267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "9793373151408615612", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "90849151510482266", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17277787450259342076", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "2367877811435050998", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "433942345363552443", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14575816691130255191", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3675622521877371819", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "7601637686045360430", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9818496628902493298", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "377651990943545344", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16357661916741979192", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5417669424921804056", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "854020380490533945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "11984602132438314210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "13100228219613095795", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9808704199834907703", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13071373212254908241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "18392748682101174561", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11928475964162658765", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11937547211842355800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "4637568849323640167", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16812695025037565299", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10487883723723512839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15315014737515653325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "1579905786032546689", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "4243996335899627971", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "2545885699369058867", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16926950874716567095", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "6355395905401306995", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2096779676054335057", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4217179485243909459", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17101789600628162503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "6139574161497189424", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "16559140502701231107", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "11459784003592366395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "7869916853707978306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 94) },
-            //{ "3889519976910355277", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            //{ "12081835728078383819", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            //{ "14923692894655929923", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "1841901358010744236", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10888203577545955226", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9799890897264103013", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1827296932806936575", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "13010820430079828498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "6245781545617904772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "8714031312599034571", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12922099252166105096", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3042887030242700493", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1419879016567682338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "12870587285162108523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6103433181190121715", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "3469599265931338557", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14812617666668076833", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14854734265631496499", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "7637441820772916248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "14695781272831602408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "15696910741835640150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "17790593820165047954", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1433224983833208570", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16185194021453870096", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 194) },
-            { "10310918050196558188", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "14885938077915823034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "14442357887993453368", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "4719130523147011420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "8870222084473246330", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "9350596936816632825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "6183248276225219542", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3757195189216622027", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "8318857994507665384", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "12864512857659000129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "6615646900347529347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "5208923086986567490", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "9390793435913144215", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11997629302296435180", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4858270366437120918", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 265) },
-            { "15975176007724247667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "16837473534895641370", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "16214153687871223428", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "8858112708913743577", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "8844677471730173649", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "9043742986995534354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "15621341038256548867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "18268980125375728709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "9757167087033785227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "6513982093384445397", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "9273893819042428704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "8270840662337272430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 141) },
-            { "15021512490648380369", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "6578908625437515675", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13762042713029963144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "668798769117277023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "10917498758625273194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14335423820860953927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "13978750151855895830", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "8965747921518186477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 62) },
-            { "4428101657497677982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "8151272056391095510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 62) },
-            { "17656341100957270390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 44) },
-            { "10295330953350618042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "15901675909820977223", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "6026876733674266377", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 6) },
-            { "381149736509958403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "7962991673727743706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 6) },
-            { "10064251191248475177", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16663239694378513014", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4690831975451405214", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "16132498413588349821", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "10609644803793651808", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "16520784657717262379", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2271187702055786721", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1872921634399989626", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12535576637355537200", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7771729980527620398", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14849108908297747749", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14043770215999952932", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15277856047844308598", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8048617952947915835", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11446745541571732900", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17422822627612865758", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13954144830230671601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "11198908896401597838", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "5582896843095691256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "8133587696326295326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "2007192658799516915", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "9492402787848610840", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10515519878978734341", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "1375156980278317418", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13455881643467418059", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12788968383428254917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 216) },
-            { "12304975739476881266", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3390014193205017427", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "13587202155230938291", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "14462744723628661203", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "16109721499545711936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 106) },
-            { "10090923790949378407", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16672038432561840773", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2973773544904290726", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17193614571243427089", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2862029728492027826", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11066930104187448422", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2755147389712995637", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10662798624911535617", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "2172121470071868949", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3579916582911190192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "5495063314176654751", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3771003491521695667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "15514370342945522276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "17285639145064557279", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "12642574441854544900", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5471430682416582179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "8561154029325525444", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8939900194037985459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "2129726780118554358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "15463465056816958579", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9268536904925062469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "45977313646881991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "7092246390386193774", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14801984300948838261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "2797723586312707948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "8451212914744825089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "5131348852069018593", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "13619081494170885939", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "8818679285688095197", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "4608292692528881356", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "9729987752669765456", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1782966703272153440", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15641674846325113216", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "18140951659547259039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "3416294810798281053", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "12066560812164094695", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "9332596500956923556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "15067550526427941795", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15428062440621131394", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 216) },
-            { "8873614802459592665", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "3367130693014583254", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14135594471530769414", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14971707650115908544", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3036512701943687724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "5334291640387922287", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16245760498096322525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "9928406318940388716", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3369894612786523432", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "590505356692040012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "13240472672791632740", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "7128145024365641089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "11497327844388026594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "7301757962797024939", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "18121689595247452649", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "875296362957469305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "14912119584313592912", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "12494969618927201911", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "4640611487944119712", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "1692411934657235774", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "1673006919995519093", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "10601835610089648700", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "13262672660175739705", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "7639015398436550592", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13867172651521406104", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "14587150810299279663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "17271409929705935575", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16744813357455687598", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17215047912921813592", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "4093195092417588676", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17895953872149392740", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "5918874715861160937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "14498368518428801865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "13857947326347149693", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "4965619590663772423", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17153828952517174005", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "9864812885638557249", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "8410695282651246173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "3011957000022205132", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "18202466898415347292", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "11433166800587133728", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "4499160027703324879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "10225878843410985743", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "10961131057009777878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "17123897723015586893", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3938875063592179645", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "7589320923145169660", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 186) },
-            { "13907115679251591389", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5262155845067632954", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "12323619994816664201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "13711624246076632711", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13879644216615040961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "13418213186769741623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 190) },
-            { "9850414237385072276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "13762987373425432087", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "15387492794262813616", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "9455446170928387706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "7799083605029182328", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "6416346888102436677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "13401162817870652306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "4574862993950020539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "3956303186129893250", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "14928794187754412027", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "9892597035419316966", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "10290107543739998181", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "5275016494706355806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "10947686124973711385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 30) },
-            { "3997597867012981671", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4003433148846544263", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "4361250474585164062", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10523363119855336043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "1718324808394833635", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8430284238380067998", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "3950448771871155887", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "9033877528655370244", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "8878071105867359307", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "7511984934520363336", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "860443413504997114", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4957638663977636791", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14793503588688729262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "9303039486341715392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "8923406201866512905", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "14629889085799380442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "4811310048537439646", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "8622985922687454592", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "8611046137980763541", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1879796404388368873", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "6928835003016610382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "8603207107304593583", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6477198553362516437", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6377441002585730862", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "15761554874575656075", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "118354408955419547", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "13601202334102031245", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11716196499333250570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "16984923535088627888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "364996668506826202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "6412527114952548517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "4892959859293355837", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16127331840410137228", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "13335944978055152562", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10715829903767495958", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6065404265303390338", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "3897655522585667381", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1860663592951633878", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5698748062275134041", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "15945452307780131237", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6764685582382238740", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4737109912659941670", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "9371952894576491521", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "12696412964119109465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "17825874529822806486", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8220763890959777277", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17731591992960147987", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8950668477702067729", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14991602704357959545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "16383540667048742064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16820082917500285799", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6820284286806022849", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17285815901490707654", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "994182747184593564", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6642767323474835034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3215659303601163167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "54975980454651672", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 337) },
-            { "11529876081402974396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3644282167178264526", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "360872770877634346", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16720108310653948550", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14353390922580547467", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9868561386826862471", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17465517455679097501", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5570311824197099845", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7524311370696987092", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "14070988879848388270", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "8296551195150971668", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14352796912241296357", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9840495023131952174", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4720851194954041037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 337) },
-            { "17009318615658405230", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-        });
-        td.td.insert({
-            { "9421643783312790618", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "10693348571961406417", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2940027113687311893", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6090625728451718945", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5643908654122573882", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8852322966320229583", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11185156002426041243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14670068483447729857", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4623542918584461522", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 150) },
-            { "3126708271410621754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "1434535531617424039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "17025268985366223779", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "15398976608777968810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "6149673627320838019", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "7413341807736193935", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6071597471486669736", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1127095963814993729", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8611856835854445891", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11115935318793891293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "5393081375805921525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "17589256877540537468", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4135814997524960840", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15180348902159643465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "2818524781020760666", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "4942080349816430490", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9263314249867362", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14377032179148581309", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "15245529372955421912", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13595283050046771323", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15667549927492357263", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13827442968070281886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "12076060884099762835", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16532386511585070092", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4910582540370962997", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12335148041391647118", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 150) },
-            { "10689880083512104726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "8870164706606458004", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9269498023794081940", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6779832349039897240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13942354789498444722", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "14294764660016835141", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12323510278692809329", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "5728070995112243570", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5381496395266530071", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9712640406795417230", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15036737419347383878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 185) },
-            { "11552594222313787816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 151) },
-            { "9399255910184037480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "10594581016504135920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15640487942881889055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "14165417928501578590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "12251989236991754721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 59) },
-            { "6675363512560434713", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "9831713940431605743", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "6531349504807709133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "2726501303929773572", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "10439704858943788014", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "18137994263450376706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "5711991739289045727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "15255831401757117660", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "3906658058160172747", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "15823433297099049221", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 343) },
-            { "7829483638597533960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "14092273913846393837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "3746578485711843646", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "12228183555926126959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "8776893332387904786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "16672299044236704672", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 343) },
-            { "13309889945947393850", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "15966815420067673043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "7415938485228396256", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9655590024687998403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "14798289196964890724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 153) },
-            { "9794684437872784678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "16729204245488754836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "15185983488152870534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "13821372148587948765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "4727004015814244856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "1738348894912205653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "559491455289877068", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "17312172687490475177", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "3470176432841342662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "8950283515337670839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "3995072673238444396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "1238913228370790536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "928677976151553489", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "4059887681292863495", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10493952422143348278", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5610465912655751128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 187) },
-            { "759163065093339795", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "11300938516591867859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 188) },
-            { "12843263740221725967", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "888316366026890514", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8088645310090149658", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15891058658954073255", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 189) },
-            { "3456538031339928220", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 187) },
-            { "14187063304165334647", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11593893535334124231", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9218293603091125898", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 187) },
-            { "3614865264081581688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "8860682105104682547", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "3775781894241463386", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "8857354069987696352", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "5611508857136313396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "10872828113308792940", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "3726594456692340607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "7541331569935741737", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "5639394073086652531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "11158391063762007051", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "6319861294308997034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "6893801771793379570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "9015970699147699643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "9252735579930779632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "16237353798629485972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "10916127635689513485", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "14631094106016920364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5050075828787158563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "2277573429750402800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "7462044209068160751", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "8879836520351993142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "560198731460537880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "6414187394150266523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "15317838148382459105", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "11219109605495282242", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "10404790565578782014", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "4657890394631454901", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 111) },
-            { "4256171754976506222", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "12658039760507507230", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9883901352719605734", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "5115148310176289236", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "4892280615322354003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "15491567059821267605", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "4716932801711295063", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "15559962129967760292", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "7866546777503165080", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12994023006726461909", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1869893771689012539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "5635187738652974532", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12849693339574251399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "13233683642200681957", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "4310121962651039089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "7620758476872568593", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "10344702612951473525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 187) },
-            { "1668590302432600271", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "8750610033922701675", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "6913992575736424382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "17945230226911262869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "7356559449640788577", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 289) },
-            { "18349175655630268884", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16817085704588915904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 185) },
-            { "9503107262691437536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "15595806193584438610", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "14283867094396458105", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "14215445060938730397", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "12720976113342879024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "14766625154638709852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "11757187678986741715", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "13038212285326297688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "13919423909034348565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "12925256096286953030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "6275163484075546689", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "239651884801599911", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "9500211224156027451", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "10902538092301362853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "8454943813981348115", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "11722951613064434115", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "13547342611064538960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "15171119202712914112", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5451487099025245427", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1814940262511664251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "16341609351317463829", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "14343280871046671393", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "15586404971308258630", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "15891211707425019144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "15351688973597240327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "1844016761754156672", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "17925606428283439978", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "15050158761219834868", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "17448180555072943363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "11422222075976800614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "14284377769814732906", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "667777413731244716", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "11624071786842686451", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "3874974512053082278", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "5471037497181745651", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "6371386660654628561", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "5331835606773958814", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16163821504542698475", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "5697543838890997891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 151) },
-            { "6217438921274668801", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "2633095809604510774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "11218297661079136641", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "3374196543196230185", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "17797320202829145544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "17198778757516749818", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "6440981718484677922", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "1616603916015535857", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "7643715911083095268", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "137903092932521503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "13203019690952060789", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "3918152537861570517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8782903242853500098", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18312668164562040079", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "15160703466234996170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "2751241748685218213", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "12622728760401804660", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 113) },
-            { "1290624457831957354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "10924946887162830574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 133) },
-            { "10789202693606479024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "3718558874911694616", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "12835389389575311182", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17406431092101974143", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "1400409391266374603", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "12154660333025778322", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "4800587664660105589", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16474284418841532356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18117355153710110681", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8449591498895477846", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2962899568083589487", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3491333679577961640", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13945298510228460890", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4282198629458668761", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "779525528509830615", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15002237905129290671", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16833026567865627676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "16991060247581867302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "6902644989079870993", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "15088285782819494786", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3379661203936923589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "10049571207493913006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "15691689005236690951", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "11275109735493317886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "12220860296984467101", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "44210723233569665", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "14014987361364503383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "18189351665719757712", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16159032667792855758", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3374037004378790060", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "6765409971512438438", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10098892297878373639", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 151) },
-            { "981877665302032867", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7472350511000146655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "15513971895394346930", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "6169721205327431190", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4519054607159036572", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "12750124851833311828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "4333851142313192116", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6041620003527819661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "15091361629922645798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "9348121965341418899", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "393951904144235223", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3220084080191614421", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "11610588256244825741", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2802357220980817497", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8972812517118478580", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "4207115359813621211", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 75) },
-            { "16582237002610438015", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "772342953072606219", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8546247990965609013", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "7971830510840138313", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3570484486449791727", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11461079340079820563", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12663860560275361463", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8132521728369930959", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16108573960501496757", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11086699387784339943", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4013707396889204359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "11850332373794932468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "14763982961176216679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "8207349115037232863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "3273748387141431306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            //{ "580936360000782237", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            //{ "10682918518101379579", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            //{ "13178480813522103091", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "14472322679644532468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 141) },
-            { "8378137527264154204", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10180255575636684134", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "18242121098885244699", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6178572652675599622", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6558074021146321216", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "11038938372264857379", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 119) },
-            { "17137800360536507200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 148) },
-            { "14016185289182597841", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "8970519484272874266", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3289746379259038515", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18389174979070260315", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17666483005735191253", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "10845781902676865789", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "13646634862315619979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "5072154928583891344", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "7757331094141318304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "16779678846332091086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "12223166874490429642", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "13316017702896072758", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "10390896207372295988", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "8386498395042623384", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "17923632501885139982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "13398326377839777956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "13520557646924372128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "5996787039089786722", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "2626376166907387273", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "6147643392694904814", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "4311921348668650791", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "8046109476498335792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "14190077682825257613", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "5032302126047788183", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15256375572125522238", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "5083173538217738703", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "17269467004855120308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "2901056469731554922", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "9747637051217505111", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7175860674618956918", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "9530922411870814200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "11015319643831560673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "5182740559503076121", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "7567277014404457462", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "15973842639221447367", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "15951492056203075273", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "5283253936050062275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "17650690912303447913", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "9614936270604202220", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "13998661469619523378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 153) },
-            { "10584034255622783869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "2973436171295280783", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "1908809004094565452", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "1623383628456201603", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 166) },
-            { "5766507688771440170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "16626226341188424071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "14619055893081624406", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "17222005830854879661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "14224121742920800990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "5859124386313585730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "9596156698919548146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "2729099061601852493", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8233922303282945338", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7402006230339617617", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "8420763628389536977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "8325767678959979628", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "7673672840505587739", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "14166169053627992481", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "120923426036313670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "7348084298010357768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "14653065651448352526", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "11008522061447263744", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "305505245310584136", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "6472139251351862598", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10586018593856542117", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16706121580364790904", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5495776091407365966", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16430562172386510259", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5673972310424776040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8797843396807284399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "1698321314111848001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "5762290464889692462", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "3218248162832023196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "12988961529988078346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "4232250144427804891", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11683680166617045816", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "6252429564537528709", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11717348577195224554", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9275303306340702111", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12245096462203481681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 140) },
-            { "18439435691655740074", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "15272426400992401555", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "9136831791301215059", std::make_tuple("fully_connected_gpu_bfyx_ref", -1) },
-            { "18238669114790278675", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "15376246520426368532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 146) },
-            { "16469788155263456039", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9360494451263553093", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7897877428349481398", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9354818521586974021", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5853553261686771766", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6351572488552853754", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11372638316835753193", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6170074103544756465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 150) },
-            { "4544242784357021697", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14225108809796795520", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "178353385245384751", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2002574142025049539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "11630971824787392820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "11542493210215136239", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9595803435783166868", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17610828776103321939", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "18312069177632970412", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 116) },
-            { "7577483892218843723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "4355933224673863178", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15124932296735391043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16888042302987189589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "1584639932403433303", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "15516194807992507442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "6614374536332038989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "5097818987523855112", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6623182990939010641", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "17001023283013862129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "7935150275452094595", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "2326323992207208685", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5072735784865711772", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4683841893192741312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "10341773151035665956", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14109534738984061372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "16706244336960642883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "1967030672241059921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1482100699000420627", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "13632911653636980024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "6198830126915940359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "12125006289181390694", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "14152716242882609401", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16732621354152092286", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17921489101554455214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "7384108582424003436", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13139625572508441980", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "16491532291908469567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "17788367809717898285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "1509728225855233852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "3816705689596666600", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14157505468412850916", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "17366807170224886960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "14548509699664316785", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "10404702662303016402", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "16436357970364549479", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "4858167644379876157", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "11718418772370938734", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 160) },
-            { "989564341557094953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8133676065307881979", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14555883089089918919", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "14026570177552137240", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11686670048744589243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6678796313875454849", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "641417817126876622", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9622546530872848323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9194788897910888066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "522181557896569275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3332334993503432420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16131448347558322280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "13852065717057446998", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4342360467977736802", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16336482874764861478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "6075691042233712335", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7570346182940928159", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "12971822824884826169", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3033264172690274208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17301887391757619741", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "15790005937034794347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "15464327246951632247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5659168916726488798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "8079376692609682448", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15160738482264643601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13953639482255428227", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12293786134765875615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "18214412375127043522", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10670103699537731664", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10147266284710177932", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11443268857010762276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "4659943649635556150", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11726298758004767743", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13896680298436380632", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3797957937905580811", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16294962940703055933", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8526484907799590618", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13723543003759101485", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9873647901670251106", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13268525255152984893", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 189) },
-            { "13008742408950833847", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8616686489737649890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "11423865221956815041", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14990645740260870030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 189) },
-            { "15204453579641378742", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10892456883214928095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 191) },
-            { "17856816245251319111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 189) },
-            { "1799430190598598671", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3106591708459602370", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 191) },
-            { "4111904926378218826", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7678457226823073886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 189) },
-            { "8786249783185140623", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5389189982064081933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 191) },
-            { "15175088047384943892", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15331103261044247142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 111) },
-            { "3627273785739110683", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12478309735214802531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 191) },
-            { "7212944937255713716", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11873734271080160669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 189) },
-            { "18421820525219154881", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9553032671453999824", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 189) },
-            { "3860603464276263676", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14115313335378184289", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "9519623751582710696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "10288726118862235940", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "2231648183489019418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "6316097202867006365", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "16208488491972128275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "17978026144659698965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "2566302789609970663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "2714742023091949586", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "3087801652564627458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "8602155166799218249", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "14230385851791760020", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "10828719108804915700", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "13973179950424276578", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "3668927000317872012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "7947870656736319919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-        });
-        td.td.insert({
-            { "14767888121198814523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "10401632438377178271", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "8762901342272872498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "10783981060353445280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "12608289345175485333", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "7875272450497189442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "1599725688135122629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "11932770338770247767", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "11184047387366978375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "11716771904412649891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "12576157843776905380", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "8402692278765063674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "6781076363516398481", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "3255465741612432300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "9767950219863105043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "7134654288295280046", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "5821853991835395449", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "3480732841490521799", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "6973260260946088987", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "15649927926091502215", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "7910468668367486698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "156456996459945842", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "1994927850993519406", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "16431165572426232677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "2864254144951744544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "5390559917122707732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "8378839908604146288", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "17163158934005653629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "1185280691070355160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "1999979442136861875", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "2527189070714658176", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16032797290430373799", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "16783619135298589974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "12785335515281046438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "9216608098626790565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "18257496796879980386", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "2452226948562393335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "11072545690050335239", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "12668149981216388765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "714898562476771473", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "11115684531624462986", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "8710684853144029787", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "3752171257634205726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "5243587439683016777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "16881283637687482989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "6042976104660344109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "7351733901977025859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "9341400376014914418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "13713406612642090169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "683350872280694452", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "10436819182310112786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10269788826827249402", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "12558716383635737426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7181186153851700294", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "8107447526839063293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "1016414921656805365", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "4871907623235871050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "15539976365475470623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "3880189981766119529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10154958553575016770", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "4561874206785244358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "12358640399843058144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "12956726277674279950", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7177837234452118325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "3160080179644173650", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9832551412183684637", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6347790007333387897", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13219313818719819982", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17780553554354185249", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13315473376247698298", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11815135771923538945", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12465309202808173810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "7171436879576678563", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9407046952012845638", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "8805267762044816983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "14381377343079009210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "13248818835662551847", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "9300668734746602663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "7706778813807762766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "16991433003318725315", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4584399194832832140", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "8558026087297588736", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10198351802037434471", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13257958112171706655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "14722464361594874490", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "1544616395544118800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "5955569479109539856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "17738708576252096108", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 151) },
-            { "10276056345160651377", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "13515249925520423329", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "10055593174764596789", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "8707130584661395715", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 102) },
-            { "11161176476048297041", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "6959258479021077609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "6365109451272429541", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "8191978674781978488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 191) },
-            { "11604224659996035116", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "1759873215866222608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "4559874433048442047", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6937259685509040959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "8242732346001884230", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "16156727721974657541", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 187) },
-            { "13786314015179226945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "4202371435873473624", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 186) },
-            { "10933135228023712253", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "14467312749536832362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "10557843071473489529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "14967016402348718219", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "7594056145185406157", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "12051754199123379659", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "2634827464202220192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "10211888372266149335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "4548339182509526896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "828946941343000506", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "13008375263617223352", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "5638301531544801477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "10213021343800816450", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "12000084249129063723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "18040104088851490930", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "16394608147869554267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "15229178454191871174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "5927467766675317093", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4877661058006573128", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7515937801840512449", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4747017546101861376", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8833751655076849826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "16256124470203598218", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2706523860113152678", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14946999257618007034", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13699343107940933196", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2887515984302814699", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4906737644615337997", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4725303208352054390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14955652052550053223", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "862470330257326268", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10381668587006680936", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2593337359555305520", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10774393239130591748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "16247780189312707876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "4487284881658782961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "2811240876735166934", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "2447678508469638445", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "7454164784767168407", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "18275848121133385773", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13759457214873634937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4855959048455906948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "7160112985819045832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "6880424067049089394", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "748023061136366353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "15793120434966402276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "1932618420321708351", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "3336444565837087463", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "15067224168014815918", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "4431271266410883917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14115818307364071162", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "15250928896997938213", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13013685738525906988", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17607598031220186942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10278583197921433748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "5300123851331202735", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10751381988703627540", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "9905160045246767203", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "2927340528757005274", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16243861301305882872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "15004681374954252324", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4496537089364942280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "13357951046545317387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "12878719705192625362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "4785466104509327241", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2416244034719176938", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "17785504548342377669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12811319921474895164", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10760000973615798613", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6300105753728135778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "5791707725846814784", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13246629627758485603", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "9400558994532871122", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "17865276008842107020", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9981156409872807880", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4626770940790542333", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1094262369519841857", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5523297987528243797", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1789389636704094004", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13544237579827433636", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17696244668222870549", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8926171136732424790", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16179159307898475953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "2692291137583386471", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "1095433004701276122", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16277739324697771064", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "15945243427420522827", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "12296021067910843036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10673589588224406026", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3585431879296991112", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "3119002388778552316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "5322582996019286781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 337) },
-            { "13225749488949717853", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13207215182979880133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "17730578026124357983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "9725306578495355500", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "5277508201756602822", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "12806959657459851511", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "15232478805009654818", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "17712227426604098630", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "2530975976273876727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "6232318392696042532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "13657818175298160631", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "4088603773237062922", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "11177710514557128293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "17515272254985846970", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "312130674630486188", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "18247095696433793115", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "6341728273786101457", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10401462893795799864", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "3032101782888447048", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15078331029547630371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "18043541805861795852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "7608435380564752000", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7129337563584588644", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "12204270722180734542", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "13588405581356678469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "4986281570682617547", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "2214420531345686129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "15030725973433075086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "15384520760315696372", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1915712383376159541", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6176816506826300479", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15331830720555178784", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17558578036713688769", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "2388815483287403961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "15948716167523201661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "6787190800192250525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3378135802544446861", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13053802967262518173", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5503306970973862635", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "9417884304413500664", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7866083951140251349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10084810175406860705", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "10342347371769114236", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4063042455950354352", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10055531955039754920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "13173341667656398216", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9356247214800869277", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "8630592326601832361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5041676938441886628", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "15379755045295790608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "13410979599123644577", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "13504573816477550406", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4459291258089899503", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13992993617743773278", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "13395962624719382401", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "1535675815795592775", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "9849036672784280133", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10780684483689207763", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "4060515618437959603", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "9203467651096078409", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "9698108593334526558", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3252398754887381352", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3120759967333088019", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "2024996599975373573", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14876099702827489987", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "8653894569484019347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "4004518396368398824", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12801342874692090364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "9387557098916352467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "11689587446775003898", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13973363990921590224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "6278030053136901802", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "12122586525659611649", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17711197779492504718", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "12489342380264260364", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12148845150031891038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "5080727465135503101", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16818862727193981112", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "1827977959922344361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "6291003899324240633", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3002862967523058894", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7222921168135747513", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "8696847224485998117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "7453625482178960081", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "1472822945750487574", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2032419134020329477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "805104869568121149", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17215312565214990348", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "1737128374457513820", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16263489451695566992", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1608378717397996752", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "14346703182362139650", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9744493065276230785", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8331721527098298378", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12097373631649932423", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17442035600389810700", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15953351443307161934", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8740196547852036537", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13809436837912218131", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "9722172495422643735", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "2662628817605495834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "6163765140843670080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "15662207751131195569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "16494358566119044242", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "12641727819019838301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "4917595053453614536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "14577496472237742721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "4356806313729405658", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "14282717676967464809", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7275701540104992761", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6459003512612780875", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "12791541622557283904", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11882713776717158678", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10982479758700194728", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "8714769962126708854", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "7639744043430667021", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6804493132858449665", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5204696395552974337", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "8893913418784905112", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13496918758899426996", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4707842387180272918", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "425930963222944558", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3844246198992827038", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14280128364139551919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "6774493262072228712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "5670860641930464485", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10055923266096584825", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13508499324621059445", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "17431631935986646683", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5568431877348597159", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "356320499267651746", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "10632294140185068783", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12339584174527699309", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8556999353039153661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 337) },
-            { "15381427144405510339", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8855986581847188591", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 337) },
-            { "13704396706685353016", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "17128550517647168353", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "8625183189646433895", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "7921388663815287395", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4213330047036138895", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17034122796081495259", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13076343553185159307", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5854267518455107328", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13675314612031135613", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12825029449351875037", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9397711809671506538", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12965800692507042874", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "2647922515901529845", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "10961049607808752432", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "13988022841867948024", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4612862531793961340", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10950469938532358632", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2228733394430438519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "12015814430456201522", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "12344008430499496640", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "9863615330219779441", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3560058786734628608", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16047381404034145819", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9714811479610938662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "7306541374689856571", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "5689486642279577539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "15545653867155770893", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14910368344505819159", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3220771309796407003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "12786796142417489350", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "13947140171097868740", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "1168311873250200110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3495786143085325748", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2164537487697642190", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10623345643437043886", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15240415102190323330", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10321975076426598984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "6467563111927343808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "4280198021826662216", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "9464830880142854424", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12113781253211924677", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5410693492803892704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "4844529595057806427", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10848097581672953022", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7947428837044782745", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "11705938507822117867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "8334753494554256932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "4995468555341975721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "6282308289220311358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "18275232300842488846", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "15754022314306112499", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "15193841338943103284", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "956475051281637098", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "1117811515417136925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "760383787039304033", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "5351526116347538406", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "15923292837937693143", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "2954421933443715181", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "14945079011377285773", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "9573520179708447727", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11432977101529429562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "8918387046558682780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "16699295198130950587", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "17358462939783262207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 139) },
-            { "17406383217119217230", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "14003645277231336821", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 153) },
-            { "6638761803107874904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 140) },
-            { "1630585964216121575", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10745248353587672572", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "13395074742046717601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "12659539044474018256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "6598024975967050290", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2006890470582854116", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 164) },
-            { "11369389082421346630", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "4986977887030495943", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "9681320098885387731", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 6) },
-            { "8730097760819044515", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "11882021989615795558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8202324251716703125", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2932157519158822224", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8431962471592709199", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16116546888494787089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "2954606701225038770", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "6757752550680050481", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "5893257440341358427", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "1327911294059513894", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "7771820069600757360", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "4618159169098049590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "12268432630136256720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "2373658589834410892", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "8440300225468667909", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "14495382595913294626", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "4974435385259831818", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4455497237293642238", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "682912708716537431", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2585176064846114298", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16033144151193421543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "12141880589558027223", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10098661517988566506", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "16192971634546462244", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "14793709237400480942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "1646362346584649954", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "4874397454627474644", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "6171331678772388712", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "7496699438957793920", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16767657090925788431", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1006721963560645335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "14753245713079865819", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "1779870708816318465", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "17157919258161230886", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "12398103047184982980", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "2961249862769657168", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12131460825751874564", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "12365814254940023343", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "17218545462549916519", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11877919824125633092", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10679711602282897680", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "17801375178828079914", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15446821602347034830", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 197) },
-            { "16041087076800110589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "14102351022029437177", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5786978465690715325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10140124683113804219", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14022671143475909407", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7468500876165989695", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "4628560194573173205", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "3963065974337687046", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "13439359175348786664", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5342116782332968020", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "9198777289928370963", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "18276472227494448327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "15774430281717785574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "3887883367078892827", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1004081473410027655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "5460182945235134126", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "7932494263344450271", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5596359111431962318", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1116176429672030385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "14642276070370158123", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1709508499926680213", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15466995361950304551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "12936512845587590244", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4464844599426088921", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12966090642798680442", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "128970554088066862", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7183620142123364052", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "3793885399790365373", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 216) },
-            { "4932548298968525464", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "8248099164876900927", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "249355510483373796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "2837134119351786115", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "141687758281942172", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15718782218800307385", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3191047205441946466", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1564774057733793087", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11134833419828370568", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "1556966764088589197", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "6087676883600048234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "15052286556809931759", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3377724880784871475", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3452246087500006120", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6840268976700446867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "4278180549747978226", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "17856997406888930289", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "16556093306187145310", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2581594444558181374", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3377472614945731801", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10622082408513122112", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-        });
-        td.td.insert({
-            { "11452807035432891156", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "13529174180301001127", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "17184405948599119534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "5921658305530976502", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6802655190570100236", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "17877430344093804543", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "153771221207255459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "15596913527233792996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "1016967125909374575", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6829653688530177613", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6094638411430816112", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "7559615879839693931", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11270266455366424659", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7100226796198950149", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "210793817522061488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "11152357292626304216", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5771335481927877060", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14619753612256300695", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "2839767407547705101", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4981552552200657366", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "9626028243479089234", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3164513064874019611", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2363414141971004557", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8962502004422485576", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3154903035376733831", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16134637021630473012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "5553176511624221429", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 337) },
-            { "4890932609897686394", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "15334769670416409064", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5513667102916409932", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5351705572686943348", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8200094670006738584", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8100051552977329013", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9004823715680825977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "16179959997108523051", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15148625184033310404", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6577754887650563753", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "13182965457868586949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "13839590781642269381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "12711366212612147422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "7963529808900784906", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "12184558469694708819", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 337) },
-            { "3285180770267559354", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6613282637922219205", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17093159649157277089", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14660081992091188026", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16228026045292341333", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "269829518575229806", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13023942860659386957", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13291308922240014334", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "1187622888238643867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "16229324496308453344", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "14019704891647234793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "6141637854990273316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "13524128602135083081", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "531020979837645217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "8416686771626338600", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "2916077416184925232", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "16862531110856250955", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11352536854890889084", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "1683347645109643149", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4374049085310743239", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16159971034327080937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "15779210035964863067", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15153285262450947102", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7049603973253724866", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9389671301472986523", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13891598020647124806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "9315279998737090956", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18261342465838720356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3632541114724731809", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "17088320301520334100", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "4352363968456148009", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "1827842275223841485", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7548767746018027960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "17750850961096057029", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "7606282654661282476", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6201358671959761215", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4829111442270007186", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7267651931396380072", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "1279682391530947146", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2655979063469551930", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14425547983540742516", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "981419593633555198", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12324657364444167791", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3246153532847702583", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4202705710324555180", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "12272318018055307535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "396815044270978782", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15633173680908856082", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16635731992372618666", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10418466892824851134", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "3244777852750357718", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "2443758478383854939", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "13503934436248311972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "2594310972560076285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 153) },
-            { "2424349375092546581", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "7104985983444651979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13518747015059826801", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "11675809062974151496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "4725349695436675084", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "17351243519367619322", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17026338651868178077", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "8730407034445893642", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "144434691308306757", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4114184149613179671", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "2558882920723584206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "16481414687792927331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "17756651805686889890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "2228533392085335649", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "9038567144062573854", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1345293381483212104", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "729683192738752814", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "458997435535883643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "16955907389221472146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "17927673764274384911", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "6418222853479731432", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7539191242110313918", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "18014188548165359278", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "16640379332042800496", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14856197725306980283", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9279474331309267880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "5717588912072437191", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1143426643765799488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "1049385516019456025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "10766144770072425534", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6442062011017461761", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "6063490496423709036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "3892512749863226006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4970240836537468609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "14668725050395069435", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "7939047354407928586", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "365747554145156596", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18207060402110970301", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11049175652352131465", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2982080608393779951", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17216477578093693014", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14116923400742300182", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7029133126202354787", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17420660823086709040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17300489799784213303", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "15549100047322521213", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "8342403220432961494", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4600322689355365368", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "6432444239720173669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5944283189654634640", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "8682613468075783516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5788340143385910170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14166708932229380784", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "9262263820759430835", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "10661619519548036109", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11254313793397682889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "8941570659228294791", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "3711589321155572550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "13440603011986281192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10072782544067079397", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "855625721312733540", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "8643403818712296708", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "3482316012102041163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "15966346359387758212", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "6179768494274723997", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "110891946535801188", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "13300595681637438535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "16686223109098592740", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "4196950243745604808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "15357494333788579519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "5791271012599760917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3502203881558439278", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "6615043890071705766", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16602880550249876273", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "8163937071550477896", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4788158788847752998", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11048286378242522780", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15669268280202512868", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "11708180973354877349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "7429872600277069485", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "18404744652577257121", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "15145594907273468650", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "17189550036105947900", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7167054889777381093", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3278181836788028231", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6040360226338233118", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6877955452402826287", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "10864271596740164097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "14322983802576638073", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "9469688466553577331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "13052522487775745493", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10990480508394584613", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11406807220585770939", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "17014949219411078284", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2128641903680430067", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8751004549226570175", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "12508733516106581272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "1535119834165965208", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6537771397615897748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16911666678187393426", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18163247824658143109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3169531413538986325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "1861963470217658786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "17175653712131007582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "12148428445687813823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "15239273648189016892", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17290692657168386471", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14119365735362663804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "4225327120021140533", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "673126354575235249", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "17628454700752918711", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3529846607992358207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "342387360760418341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "16353520814579109491", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "3134099148543397372", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3042628567386436226", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "868736197323541759", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3241775197578183463", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7851643406001230159", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "8502552745012743053", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "10353443026537243362", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3104552371734307984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "12807894319350246437", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "11258614397356100246", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12946314097679886518", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "12909725304008017600", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "6153017925473103663", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12188122150443559128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "11983651079897753600", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "12988924268115973386", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "4891686540869580517", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15196732464112076502", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "459391085160518545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "7760457628691335753", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4865678723441158246", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15589245661365969249", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "9661616000023492219", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "14777607874956018667", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14113322810933328214", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16281761113420371943", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "16988191641007425377", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "15844881725957151580", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8059328623525062913", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3662747857062156477", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15121448034928438384", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14122213471825630433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14985236276429954162", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14321283775111180227", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "98795127409553442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "14805540705424073865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "3788462090984291082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "11823068760218786389", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "5963105523596432544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "10308431308942416781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "8712136292276123857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "11314582467969020320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "17147293671640396193", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "4465701487417893814", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6144958783262207773", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10467232566885547072", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17262854991782705821", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4635570915184713874", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8706634286501695698", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3863816884636503247", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9252629750817485029", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "13168267319035362901", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "16567638487719493784", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13449466515297095146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10808909442136736629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "15172865163331822352", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "16260483557979578317", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "7469127846325904854", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "8783239368699382065", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "3477539135137665170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "605638562926557381", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2613462626256090659", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "12626994817506009929", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12417557233566012737", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "14056483847542666300", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "446997309263592434", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16589191615146805668", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "17226649394712507758", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "13566885629976429699", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9931266845625995359", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2522707948254032777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "6486250531858548438", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8174273876544952794", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15049304780567617964", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "1321553039928725678", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5105893636044171966", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1661430504764145711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10041204026657386200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "15969909663367854367", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10956917223944472347", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6060390128414591327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "10987291891349907631", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16452573613171944531", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6370356607952251648", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "11547588640573840103", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6882259829255167273", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4184283661465100793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "1799277562177870093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "16276490504942526329", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 337) },
-            { "13939380644892198347", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "14257398784378656791", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "14444423571297570985", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "9708741882115135691", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9374845449632011709", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11907741510409644649", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11833466191385766041", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3276797683943990958", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8494679093555050767", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16807117250109985357", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13642010365337780940", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5622078553841657218", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "3973953743850093759", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "3210709940026980348", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "15122428380000835284", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4492743859922847514", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3555469834146426564", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13140141354298916151", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "7110352624440078898", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "9527046928040225586", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "7797523746053138659", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1478169078874265704", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1264966373832011567", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13715010490012086430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "1470778934882087497", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "12725817227797568697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10024777334075819235", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "364471436103661689", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4052362583575987109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "10657660173790920140", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "6557428245898292304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9440117898128288296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15929262283669093154", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6352520536724420824", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1921667815983542102", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6088184848087986042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "16602667769746047266", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "15953651221917495492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "34011924689025090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "674384870483198184", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3555798556624172621", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13793032417416585006", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9019684110208109757", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "647849627466319112", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11242435114747058327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "17302407573266205607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "13606281481050014632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "2466805217694531959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "5511298016141559884", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "5483150635926637198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "6265211373810873425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "7643647841451578008", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "13254760530618979318", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "16709502837180561673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "15693956942112465267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "17891347169069018262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10521453583707218193", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "14303192614979408043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "16609351383660437793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "11118586558529856637", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10939847328508611170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "5114254088513267110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "10163486148946687267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "13296242326766100583", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "12068797674575015662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10978693262040522687", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "10037086825900566930", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "17216583849049249733", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "341552075482632478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "738850098651678143", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "7139714914586273766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 153) },
-            { "3302557590307975559", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7648248878470053116", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4917917708431763965", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12978593897559876761", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18064160378597803888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "7689593699365225521", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "15819149710195058441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "18274109287723887410", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10269238332775024706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "9167138376243583750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5713105609160120586", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "150812658537571916", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "4485289322925780000", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "17268201530818712998", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "8747430148550634190", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16986358655784856534", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6109013751635776331", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9585113116232600562", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3503893875515897267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "13144385730409574259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "743941460026466526", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "4492332228252010118", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "1920042803083729276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "16436006771518788093", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "17567504672169904482", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "1989849521691057108", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "8203171222962341018", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9795194069954915563", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13369603621524676979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "2930658435447859986", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "10721885719016335538", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "17663718302088575615", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "10749263296616139689", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "18356235677223229518", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2657828809338947050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "15743750994087974449", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "754596461956525575", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17690103717758388022", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1581136092002053880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "184306359395609972", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4891076250667414900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 193) },
-            { "10946917656449245131", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "13963558035989415263", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "13239946614209250451", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "1076938718721677141", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5851532147278358697", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5746129902873132635", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "11592511763160794565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "8244393417024602494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "10340341966852782124", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10014822679257636832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "3975219156915176189", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "13536863026622428609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "11408010379683511978", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "15458285682224384803", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3407965587245145003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "10514330767826407566", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "4251496064392381805", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2384682907808363130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "15705923658253281113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6610298174133949061", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6801247431347692935", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7702208423015808353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "625378771032655972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "6542436061498779527", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10220143644047641696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "5009829190055738132", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9863034269936216346", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14973431782875808802", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11948858355027908365", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "473983206819135409", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "6586872365879203192", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "18412999191021390737", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5274456170971167904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "9275398105290923887", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11340683391412454009", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8100282867486124965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "6361758198448370863", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16431503579923509596", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10280282710562383672", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9138345765585313427", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11117326838088757686", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "18222598708685323020", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5198859831430501652", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16644329894881952739", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "9367630847798077790", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "4906856539144714227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14958085423402252319", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9835535945548454398", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "187589970359123667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "678657374277098506", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8434335101659807351", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "15928128327390664485", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16504425380504793738", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6480587375918509253", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "9751235588096143414", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "16866525370343398909", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "10160678465371702528", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17188750289444625186", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14811603003184578943", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4363379197393466424", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "16403435599807360704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "9367985410929563457", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "10716232679616746794", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "622299920975636640", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "10798283054583509534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "14179140464588572277", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "351304363117543419", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "3499106702307464480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "259619428712608645", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "3296098567244638489", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "13593304587712966846", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "7572277082530361815", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 216) },
-            { "6379337678256717737", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 216) },
-            { "4513178474272034213", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "3390430905253038550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "925607706467451476", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5627536079808515754", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16464493408368412759", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "13839116996827687373", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "307874768879227632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "2321767794934000238", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "10308113903347312964", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6712698149192186833", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14930789530046665855", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2204178900998688268", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17174919737114915467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15154700439767512396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14916625550370402883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "7650375560336513366", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-        });
-        td.td.insert({
-            { "9999553425206328238", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "17515064188391421150", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10437367877444543776", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4362304842016958728", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "383721620126444793", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "138379779469699309", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "3759515057574218101", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2856601829807186494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3286330985102373533", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8159303545761286685", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4056979460327024961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "17823133607491820214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "13678741578702922441", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17310844417517474522", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7287895452784411060", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "6513788469599330141", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1432487477100132607", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2463151488506537801", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4054850047596998735", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9747825473942435842", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "250084243188516935", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "672634960435241508", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "748301576795035305", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14255457787105784042", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2750476114907782459", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10028244201873254140", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "6469067021323571170", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10601714587235375373", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2483181247706575298", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1732853511466309905", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12113297049460198476", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11557224109907477240", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "18084899872055349937", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2890305478244125142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "16659638340060273536", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7297768924198851782", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "13104971224879807298", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5507252417827285564", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8511924860787648884", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8339235544283885013", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5654030701873405891", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1436723751951975466", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8325439593817651819", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17618112803233960227", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12327651080801123538", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "13617891575616631067", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "6020885536659393981", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1940159900852645250", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "753809225159529269", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8790166817024820739", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10677449690354999149", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10593983805743674128", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8217088979257009010", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5687085271369421207", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15279061373346657582", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7096501191029978469", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10094312347267495565", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3510084874150710192", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9391986481292718799", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5259220060268012597", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8117066211911522905", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10716559814452841971", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3949211089098986928", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14752151264004665491", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9195500778955925293", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "13023666909692825369", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10961696014697611547", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "408602315578383859", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "582954161360487990", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2482190331248449465", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4075769657981876449", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14367142998060454343", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8114910678593187231", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4495451816890445327", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17102726573636919392", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10845009858831745215", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3112081942557253948", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1635689655354995548", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10250301712194120144", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4855747489298888657", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14238766089951260596", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7017830157652362654", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5619751660204221930", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "18093663410921658106", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7869191330107002954", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7245974724868795129", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11019243479903456358", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12568255992252373147", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12413024322120393790", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17328716013187434957", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14899206494260920951", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9983462569671477588", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2383983224188083583", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1759538680129620900", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17194386925266836084", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1518413386955573037", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4104380387301024172", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "6500468942462159659", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14136370464716049139", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4056919990977544228", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17351367314312762125", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17586380391909451000", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3561558658922596877", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3296755748686779746", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "534789472217562338", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10470060457279511896", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4917360877294344854", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3020953254086476464", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12307245536623707478", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17039711449439313953", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2734182509541824864", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14521225825422360447", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5857101685300045443", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1547471890307888038", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11159429929932958728", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1382911856313970571", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1854612313463195535", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "13051406650237455505", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15438530452161762045", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7446661399223808792", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17861183465344343443", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2026622899016787854", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "16127482065413259805", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12961109385388101976", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "16855828799826043472", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15658859674277700656", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4833749391314748606", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3326691585067800328", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10718764522366711114", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "13643973579671217152", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11155444222714959508", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "11544626480076777556", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14277552178674323256", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7792811600696842064", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10622803531832712558", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3079343528005019570", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3521119014097924580", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "13643421651252474051", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "6947390018658290847", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11697545935437523887", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2179704411405073702", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "6886280732774854778", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15841879134365332862", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1489646217778958363", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8996027646503556955", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17569170625753249614", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "16686854568163084344", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11187304651899164445", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14540721800838487177", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1979841019103384445", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "13241679793873365192", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7013169017932712804", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17389114672554594444", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "4157112143322859333", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15217255896294251282", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7606241825090144098", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8254388198068394779", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "755942233998922490", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14018816117251124336", std::make_tuple("fully_connected_gpu_fb_io_block_fp16", -1) },
-            { "12054714986067446052", std::make_tuple("fully_connected_gpu_fb_io_block_fp16", -1) },
-            { "1138657035758391650", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3767246406609050779", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8536612779196342267", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "18269766292810651342", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9646020463213439644", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5654817010240784792", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17277846909615605376", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "102220157823566379", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "18213629255325554583", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8809496195168645264", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8506262325379391391", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "555647031314007743", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11133391567691287018", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4531238775069637542", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1050921927000835075", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3929145534169458063", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17125607183887169558", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3771153805567862915", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "5658567026478236676", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14520461267731870642", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15949156027942399242", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14569379143051211142", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5083162050523454050", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "13951906075577108679", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9004122893718097099", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4336765005970913285", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1037896951032802088", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12090010131585526347", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9035445496715584647", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5455756262684457251", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7013197348316253486", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "143894893069959052", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "13984124581247009793", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17964690428632248307", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5850736343172747247", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12379881923680871705", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11864459706509310150", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15222102499748205072", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1198491147477454704", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11400303472547811086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "11660798111579160734", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12081136231782604198", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14508437224082799436", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2396983035676921683", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11489881652545443112", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5651551840851524311", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11753049051286720239", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "479427514681077218", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10277290426401380976", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4919635200134986619", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8180846581099717076", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "6411489040870738143", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4195122768220068448", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "52150349468142798", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4439371893496638788", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4039813343849078927", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "16533127286587475454", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10264270523529136771", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9915620237695279980", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7090467930115498252", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15407802086492754450", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4007960934134542892", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2002110062193477745", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10154803388813032920", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1939527596007045209", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "18436843102627176620", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "277852397173940175", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "6822978927370753017", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10859939917723763131", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9248235209454206632", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2665169698359670120", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "18266967379169677646", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5047972486012090625", std::make_tuple("fully_connected_gpu_fb_io_block_fp16", -1) },
-            { "8183383667948205424", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14478151143114959230", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11396985422513105543", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12114476173765693172", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "17041468169694105561", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "16498300259966485293", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "1173136780324694038", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15786764202107923723", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8913526950888110377", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15988378956341507229", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10993107955805947401", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "6214677989814002369", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10884202393733523875", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4424960026145600447", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9763754389347695094", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "105055722864217258", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9775648000771985077", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15967893151722576439", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "5774841809066688068", std::make_tuple("fully_connected_gpu_fb_io_block_fp16", -1) },
-            { "13402919586406297042", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11335142595937152387", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2689568881580764024", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "6571325912136856822", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "18122652705874970766", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2000008755333069005", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10361998183258703575", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "12348644068948200883", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "736422312606696687", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "16240864447025932692", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8589562027950762944", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9162564861963233717", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3167738956362101592", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7260746128189749064", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15308960063718398523", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10129304668926912275", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4853130422682926168", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14453982453535955244", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "16608982023596566351", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2470663389603706356", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4240407752719875080", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4846563120992975368", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "3706088306568590662", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14866563628584464675", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14721943524627076027", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9323941828298277387", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14109366965145192619", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4923997413838231159", std::make_tuple("fully_connected_gpu_fb_io_block_fp16", -1) },
-            { "181017193671999192", std::make_tuple("fully_connected_gpu_fb_io_block_fp16", -1) },
-            { "10757412618207229106", std::make_tuple("fully_connected_gpu_fb_io_block_fp16", -1) },
-            { "6395263375773555188", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5979046470758784946", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3927359449523162508", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18232387132890063687", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "6709212639543074230", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10086813986911195558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "3109992766790372487", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "794530296606789816", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1249133049911188319", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2006024870459798086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "11914297820344167381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "13079795735173763117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "6241224766048532539", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16524474021378494125", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4407550747921719377", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "7259905085241841240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "6666210546769702280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7814543122045448412", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13554702187867408038", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2547880010597993852", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9061076702890952738", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15460429275475874158", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "7724185199575851246", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "8533091468352267196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "15025120359649460106", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "2613575328969629284", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7463954007838579697", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9151324495773628566", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "651020886445062493", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "8237821273547216740", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2875927974837744359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3674322065648064195", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "137871170540938640", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3066826388383295007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "17483221428915982776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3403906310423395442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3888283018836731569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "13928684419408478520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "11520548550630007970", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "4922714504620931501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "15683804450763499599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "12686604223669447758", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10746289671948325353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "2487976264999747775", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "6163010595188500945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10404333823880552577", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "16662409111036688294", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "5400706842524705774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "17423097433955762667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "18131954418490925431", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "16549854027697846882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10340073416712988987", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "4633923265089466898", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "6808980404170272597", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10592783998150232858", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "1594829714229111215", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "17361714725103230834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 152) },
-            { "15732140959902969012", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5796974850751105634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "10588059104387338398", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "11738780323979052397", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "16342972196376030503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "10406201782146034797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "17342758321852264926", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "15951978466742016539", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14100026884590707572", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8368507377481570353", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16780457022162749898", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17140702790441856730", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2578325663193624576", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8784358107340738205", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2955459120402821540", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "2840794055129352139", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7104266560248570112", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11113125355390956764", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9127827617126714860", std::make_tuple("fully_connected_gpu_fb_io_b8_f8_vload", -1) },
-            { "2268275392299271167", std::make_tuple("fully_connected_gpu_fb_io_block_fp16", -1) },
-            { "10615831454139478379", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "8205640825965213946", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "14337168375989245254", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "11664399629496237233", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "15750539817895707253", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "921209976738626097", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "8590416145336196354", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "10463632805036507382", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "13637537549252005181", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "7581949584623524395", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "9814647153117279415", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "3444250649099578792", std::make_tuple("convolution_gpu_yxfb_yxio_b16", -1) },
-            { "4039483032571506874", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "14309249337788077160", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6254493271976962295", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12387660887222981357", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7723131901316908741", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13963554827358438190", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5001552360784483833", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14201142257504107783", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1066668660701816536", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4664196755018349672", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1103228955716492167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "8618835732380720921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "8391292909068775212", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "9488974186647231896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "9069334144391048686", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 99) },
-            { "12493863403516600413", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "13375084585444085517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "18040173797801558071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "15329174116169594863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "14910223536998380801", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "18277685132620834972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "13090887980792573261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "3067806959725855130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 153) },
-            { "17791773192152464021", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "13603318842632052764", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "12072881177966014126", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14413047954443174304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "9118663018352672834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "3558391988878894288", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "10047727261970275928", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "11527382293059267033", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "8445575388700666150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "4560479630843098090", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12542825714985999760", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4599539412023802059", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "4570119951370893062", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 197) },
-            { "16897917745917378359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 197) },
-            { "6947523163603267191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "2322126126611987721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "6518845972912144959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "9741774854327055438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "7079854103926842364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "5035895518536085765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "4307157272240924516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 151) },
-            { "142486914279119363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "1532263118203058517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "13529694429433303321", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "2820916926593580316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "11140657515428786448", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9028970753877215614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "14038308632095412386", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15928183143089896780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "14071202918199194502", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "12383676694875725364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-        });
-        td.td.insert({
-            // style sample
-            { "108008098283423515", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "4060303280041214217", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6537702661107952067", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "14207620784078112608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "15507553344802827573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "11202969539314825145", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "17875115440447156357", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "5043345769472136917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "16920049042115931889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "18396735425525918800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "2188753401875518777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "861151538204493788", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "6577112081591081699", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "6662263400328602558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "12062286938998602641", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3532486493536194182", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "14486900605080248966", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "3986970741207127928", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "12055000818441091810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "6473775431261965926", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "13358640031183139493", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "6917849789850282518", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "339005357927126796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "3341093105217903149", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "4002803423257090980", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "491985190756430188", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "7294200033269380787", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "4133961720345934549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 6) },
-            { "15578894483600472601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "5153485325286526589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "11666701706717643100", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4425021395842654484", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "16007037430422291336", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "3766679421476531097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "12016934279017055632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "6099288410648891214", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14944495584618629508", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9930151769697976322", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "2751149427305557313", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8308207826619932628", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3823293373281864380", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10416260780913304720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "2847588473935575710", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "6114169197348303753", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12362870423757408187", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "15330333360513835100", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "4337663535143862248", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12324726684926692530", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11152914598877675570", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17706702842712421674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "8596083086448639289", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "3826763780015674157", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "13022765751713107854", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12054929554615967645", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "8483866344820602196", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "17257458463329928325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "17396276238049115844", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12139918033335162307", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "17613450189830338239", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 20) },
-            { "466805001581651681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "12375983338952375600", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "2835926422026106846", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7562282591986948344", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10222410309423438801", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "9667626193041507177", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "5918842657011667447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "10197866743342998409", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "10481938393331020691", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18128936267842454401", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "8968418225456926192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "13004007524122679918", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "3587239831348133052", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "13594576107143259571", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "3622666399417827014", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 6) },
-            { "12576876344393380361", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13160857254841009807", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "5445489344860863060", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1213577713645615257", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "17692282381799629643", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6196533506278647179", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8978870911977287031", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "6336679824344178824", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "6223842516539111057", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "1046547531196124397", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "58154090876617650", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12346479378618214663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "2502439462576713842", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "4182038693129989035", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12115518620344827362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "604454303639822310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "5453339018427413517", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9083797214718240599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 167) },
-            { "5886784323972875305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "1077955953397294307", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "1705252754140106824", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "14801234233433168563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "6099288410648891214", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9667626193041507177", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "8308207826619932628", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14486900605080248966", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "12362870423757408187", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "15507553344802827573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "18128936267842454401", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "7491177930963608610", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "74789225791237471", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "13384934269447336301", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "13121630338540122290", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "15469602039104029406", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7160031288662381100", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7329115981778571341", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-        });
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_GT3_B1.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_GT3_B1.cpp
deleted file mode 100644
index 980aac5b5..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_GT3_B1.cpp
+++ /dev/null
@@ -1,1937 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    // KBL GT3e
-    void tuning_cache_5927_B1(tuning_data& td)
-    {
-        td.td.insert({
-            { "1375156980278317418", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13455881643467418059", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "12788968383428254917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13131740479277027362", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "3390014193205017427", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "1270467775674221667", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14462744723628661203", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "8203171222962341018", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9795194069954915563", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13369603621524676979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13575423234109624706", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "10721885719016335538", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "14567947256029724271", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "10749263296616139689", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "11717348577195224554", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9275303306340702111", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12245096462203481681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "4999505377862312410", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "15272426400992401555", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "9325097933807426691", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "18238669114790278675", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "6664482192233202590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "7454366978268164047", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "16135569134646688251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "10572945270796129630", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "17495198214524203238", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "5221108094913859739", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "1092633914190498221", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "2738256633362038820", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16689586259416414782", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1525652349412826502", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17683302016987200208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "5615525527388396983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "3992735701291817771", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13208739898218342989", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "9536348721941264933", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12803521018213865796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "8854783036772473804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 149) },
-            { "6766480740724769248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "768423629375648579", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4044100281521441011", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "873240542570331563", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12875236165672036211", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12008819728839685704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "2486645741683554648", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "368578589584714524", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "301201776306602054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "13152181652632422771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "10311747599696543062", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11258322449556590366", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14095734330183410835", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "14910223536998380801", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "3352689317181436056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "15832740972576959202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 285) },
-            { "14732184525012592889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "8421045774757048067", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "941232110069825628", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "8975333906619899020", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14800592533315327674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "11816277809167487786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "957781751038897330", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10498289589469975939", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12970943403831707924", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "1300292367195167745", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "3399837016486623477", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16740871614208968868", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "71587235425438167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12717047049023783979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "10478482486372389470", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "6056581247196718403", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3780320160034246719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "2819320453491169732", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16976464773806576190", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "13321672741246923341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "15140532227060261467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 275) },
-            { "9400755775406101904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "10292585962794261197", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "13048561902713182858", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "3658425022428447440", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 272) },
-            { "16947830954662293793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "8397584983137442239", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "1071169341660439058", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "5326247361632903583", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6214194654733781771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "10025839973092358719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16711955423531846725", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2915165824085219545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17108987360340581555", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11972097635078477347", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16926950874716567095", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "1212319037405620223", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12397280593466519809", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "2609454334520044465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "1336940384521633733", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "15271783562528081169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9533360488591027707", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "6930697835136176263", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "14444423571297570985", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "12643423612381102003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 79) },
-            { "18423051691107460439", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 337) },
-            { "15381833359831622179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12040626513219974957", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10647227605517025377", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8127570953237266335", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12876112384009608387", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12663860560275361463", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12352923639732112511", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 76) },
-            { "708452703070938673", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3217246278485567748", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15713964605078748923", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12293786134765875615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "16043683538361975370", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10670103699537731664", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "17854578307286932628", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11443268857010762276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "4479117540570599742", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11726298758004767743", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "2968031010495399536", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3797957937905580811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "1474271081523145413", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8526484907799590618", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "13723543003759101485", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11728824117049687850", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "13268525255152984893", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "14397348576352573007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "8616686489737649890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "13176385389367548697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "14990645740260870030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "7472330881076141262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "10892456883214928095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "9522661528867955338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "17856816245251319111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "14872992823083730615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3106591708459602370", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "11609821372586026178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "7678457226823073886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "10118395047539851751", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "5389189982064081933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "1742897526168249500", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "15331103261044247142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "6644418194983229139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "12478309735214802531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "18012549942299450620", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "11873734271080160669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "10424278617647597641", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "9553032671453999824", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "3860603464276263676", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1207026216972160297", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "9519623751582710696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10328182165125764988", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "2231648183489019418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "17599383258252980421", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "16208488491972128275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "13379165253894817165", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "2566302789609970663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "1478419046264331178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "3087801652564627458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16103943009195163681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "14230385851791760020", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15293727142789007900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "13973179950424276578", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "713121569924250372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7947870656736319919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "1663285216972929652", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "14767888121198814523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2124033349728954551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "8762901342272872498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "17006133396401462698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "10783981060353445280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15110359240685619357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "7875272450497189442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3281207855459771997", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "11932770338770247767", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15860915170591763391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "11716771904412649891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "1095495157025479260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "8402692278765063674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "509781001842353609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "3255465741612432300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "13439896617880328331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "7134654288295280046", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "6769243149577568817", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "3480732841490521799", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "18269685060032395235", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "15649927926091502215", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "69439315851965666", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "156456996459945842", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3012566432840424198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "16431165572426232677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "6324565723045697080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "5390559917122707732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5469227748156438008", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "17163158934005653629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2307310127637739872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "1999979442136861875", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2527189070714658176", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "8329846097322076175", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16783619135298589974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "12214162812589030126", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9216608098626790565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "5179760459095053114", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "2452226948562393335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "4499586349553581439", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12668149981216388765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "2287356884312581209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "11115684531624462986", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "6483208845600234755", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "3752171257634205726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "1774158624592967937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "16881283637687482989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "14749947225382670869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "7351733901977025859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "435888248913413834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "13713406612642090169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16582132711225619740", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "10436819182310112786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "14546281065004619074", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12558716383635737426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "12609361477548272638", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "8107447526839063293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10995907213890714701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4871907623235871050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "7394217382008802567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3880189981766119529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "3759057398165607194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "4561874206785244358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "488298169768725160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12956726277674279950", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "7177837234452118325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 8) },
-            { "9057036344533510776", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5093049998173715787", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13761566845514364807", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "1594612401422787491", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14603590053512154268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "10136369729388564720", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17050675313067213312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "14221578799010900252", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 309) },
-            { "11723735945517472199", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "13810995219720233595", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "2704063557078535883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "10384537928514123040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 310) },
-            { "17427036330773218054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "9796621763733208035", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "14046114605615338907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "5763440554939527411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "12892693137085610062", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "17775705003104146872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "14878347463243157447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "7368916076070115064", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "3499109651698979012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "190530884420224257", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "4202645222013675478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "11324851661119942609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "6232363902828992968", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "4299492266819967844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "9481675228591993785", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "11772741918108731396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "18419183012101393192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "17832542092610191859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "11771014003680394135", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "9192665896782282996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "9763310312421884308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "11430400968543668873", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "3430266954211750407", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "7172604084103519563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "10306542963828398049", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "5235375820995365354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "5091558853871982858", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "12914986936318857086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "2265784112305305260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "9019388470685749691", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "12427258337646070422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15884763176333003771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "7211355951470869591", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15399245700982979379", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "12644942072153919043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5876880412336151866", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "13775529405693629438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "9048522050692986204", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "10642327923162019888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "6410682026872155392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "9454954846682513038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16463823433924519300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "7279393739634103483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "13358283026528078900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "8032685176029570383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "949330876419581703", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "17713034180977313726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "472454322186482185", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "2727219457659794468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "7852745450437172519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "6065819201836017182", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15984885011101717258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "14811022197918391667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16146350476627599543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "16173557782125372935", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "296142385116663420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "12655099960717366198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "7937870623766562191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "9367157746678824712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "18062849937960759210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "11919129623429545762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10522649794540845800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "1104489643524273315", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5419775002149092646", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "9226912483632588371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "4958222070605478947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "4479979951990338510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 123) },
-            { "12022152681602871455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "5740738339752793113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "12087141795291232248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "17825280904760131680", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3974589991022739479", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "1838534101161814609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10046663998164493552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "2305461098719675735", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16504962609450876148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "6345550009198921347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "11239754372812258455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "4347816192417741558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17809920600993699808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16710010075465723498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17729546848373991614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16998508915819714690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "12952980509662451384", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "2683507674615735878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "13059207969254830451", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "16295660312557315941", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "14089893422771228191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "18034648276860485300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "17739868787095417856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10880081193716628051", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "15916505622570323098", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "9101018613418825655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15650839696475698676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "15628121900226431719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "14554225625951128811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "3134489458855347772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "5627834277145735283", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10729288973933590396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "10869005786136023160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "5597908143491399643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "577182964135927041", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16947969669087411530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "861419637283812778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "3643250372952944907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "17977676737774695825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10309504812060596568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "8866736221671835567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "2133849627845285277", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "12793908914872030220", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15947699374684516369", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4660288622381620227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "15914512645931208899", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7460672405409009037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "1541754036637209097", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "89439319782574517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "14088382963493477342", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "18203935818408469865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "13191096881934434519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "7918742312252115870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "15641537661939240413", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "157805434489791310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "7941729567451949422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "10628725059172743408", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4492673409319122180", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "15857087373591747006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "13793441296561946357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "5172712078329324967", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "8780604510524622314", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "1760690277175249985", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "13649894122307008732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17546566148752689536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "12675313398314286884", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "14621327324047759584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "14136097914489095982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7638626850074132214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "9399994156762372761", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "18068050257421269408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "11830297960718214360", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "14959566236432790882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "16884396694505987920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "17947818179123182001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "9381304526221508530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "13932662890258900896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "8268533335852735248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17419874083634480896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "12773693193167844110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "5157249499936659040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "4282661608732125403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "3159147743553063163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "1706927777850488363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "9839670675413379092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "6780215829176686721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "12972634653821069685", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "16129296588866116913", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "18202222342562516071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15426960908024585800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "17026284168840448378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "18118237182023167949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "11113256687741667688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "10555597973766215754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "17517495652165026573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "1832310305089212990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "13855438905855887272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15349944413643626251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "4738743763536059708", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "16611452077660879545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "8101977280003030465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "2012181953284568566", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "2969389503332309296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "14515066741400300669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "9373353053843326128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "10023279637210292010", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "1103204698908514224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "18092842590142527927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "12174571114411168588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "14431607479949498164", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "10279778381617181802", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "4237276338897143680", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "8083672466967374860", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "16705621644424684055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "5352861363832390974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "16945184617367657570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "2995134938466176198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11706378390483804857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "7958459862276998225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11703557271443535142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "5020788604681810984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "15217183882858251099", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "10650698451740924172", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "706370730287471796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "18199526506796726885", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "9269175963143039426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "3691705516240577130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13472532612464340803", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "12388375914105990324", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "11582534256623549131", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1653274345637156919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "5893940382830835820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "17700958439420868719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "12730339458081890990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "6631816968511312100", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "7000524935770116969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "386749666417295495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "7162575953766465459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "11398019086259011063", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "3041612155708729812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "4274801141127703532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "4865023158176874622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "18424912460022156378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "10408322429232132983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "5277400567128489977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "6848989271874647093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "10085059621136526248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "12962552332511702682", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "751912075185318190", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4505008254511324231", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4191326605459754690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "9824678205469832038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "18245935804520236353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "12309132521191764927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "12843671306854567956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "8275277322582733101", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "13698389420396031586", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "12949204491386872217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7370273921473161914", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "941829593638869991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "16206791915939407806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "1500571771538985941", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "2095802691829304676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "17542414935564676110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "12380856644683171627", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "1451466106918423837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "8071957466247137919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "11661208196482963286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "6635217802203685464", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "265124365266629363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "9513032457323269513", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "11814740669468421049", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "5221320470007950766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "14359530849521980269", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "6181651715051152713", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "1450888744802985214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "2842103889477438816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "14006248791647711759", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7072606962946873975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "3599823735065658574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "11311859068168414878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "17525531790109748810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16749148369456398030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "17556238490521153146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "6067904130482758510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "1791615587935799399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "12985650543127289023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "6714886136800883594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "220326805056361171", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "6777045876155144709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "9454512817077883797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "14011124615649605281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "994489782629179836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "4338023436590582323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "1152693503778768433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "5994204139128667921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "17243576882981097341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "5524218746051008792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "2669822154816760632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "7179714714302073459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "13002363400738122017", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "17006095064160484022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "13733327241591630239", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "2623687018437195679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "14077148976508649021", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 123) },
-            { "8272823732258536202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2451712485584835395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "8057302050645780813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "7430073011895298582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5095827462645341808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15129834325410878425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "9660812093766156608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15781622938833984014", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 123) },
-            { "1089679781525023551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "6129602738379919488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5287076386757143976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "16076153317792960383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2108296560864415762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "17006655627343469372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "9404677451270692749", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "1372939511728986224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5311718276151327830", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "529543453251381109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "15591167992985613695", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "15026219694198820614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "8258382025812748961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 123) },
-            { "14810839157236175179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16117738994809548007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "659846949368492111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5211191663202250117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "13418701036204748812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "9714764457768279762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "17310332946322628458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15975964562807570772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "13447028922679236865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "8337820318779061494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "18136765667969393174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "14821616804286068969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "18386376129938707290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16609136488331186895", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1996860183441418841", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "6491244517639245276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "16312223896859176991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "17833517350994024381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "4226968857681929488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "5141753233513623264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "6860503758000008398", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16489624657475712467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "7862815466573236157", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "10679760989906275129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "852092858392507925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "6996376303337512293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10978173291465325823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "6670327979947471550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "11318913630213187720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "123251351612308092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "10784073615329190425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "2261453441277654139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "2937907409658060025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "7852144838267007144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "4408772370026995920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "15411474884532403722", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "9462315044265139531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "6419580456182610836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "12277470820821378855", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "16865879032845300007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "2862999234347597091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "15447513376965243034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "14420809655798184553", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "12954154886708228545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "7575634241190730697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "2344498602308448450", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "4304041922043496030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "10971070835319242371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "4862529593282936100", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "5312140481706133684", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "15522785615618973614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "17798636687709019154", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "1938086876393565238", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "11897113890115321056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "14363654136811880073", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "3928266232090746643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "15882969506682501496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "16426179645101678763", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "18174857480705846286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "598390166442977699", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "5522698342845820411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11559360678008060513", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "13184662326021747000", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "16037141448095945650", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "15094664469997373662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "822162932339827810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "2597453794298356435", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "15851356529373376076", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "7966454753124154534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "7311120574972466702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "16461809076899645037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "11655994466278963438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "6981537186704688907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "7903891232234389925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "4229105529069729944", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "12796777049340516563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "14289048840489035546", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "4239133538073498792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5103094815475470596", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "8560635685184432720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16264774056719724826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2571882179292959757", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16758962840329202004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "4550028191070279999", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15661322183507404821", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "14650567822254940018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3755253206085028904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "8751016391945753900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "288853243482418538", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5047419871737940985", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "8819268903800581706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3746573775462003750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16286085532892593349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16547425454653232058", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "8195881973746570408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "7712831597869354170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "17035903590837750750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "1907439276166837309", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3036808833459559381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "17928043901784474130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "14667209474639064623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "1701609125136907870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2140514316203117958", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "9366201112659847392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "7808544677773370430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2251029128552117936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "9529614587861271730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16811402686462277562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "10554266898346470422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "7817036102984218692", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "6329618009202266591", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16936366288366370882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "8025053805734757314", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "534032316469702287", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "3963106895592011725", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "17994361454416813294", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "14902389080201926109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3796274347773622633", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1306339989221885682", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10900880512948479338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "287386909600391846", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "17542176922797334839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1081962464388501987", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5831419373611158773", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "3179874645565098825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "14906458674793172507", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1934379409955686502", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10178951466584845110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "12693511427898130707", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "18137106379929135901", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "11619548409913646265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "13317417676446624018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "16710651492402564794", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10967218651864700933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "5381578460674280089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "13026555349791486777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "11913020016435860608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "8260130048649729185", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "14133958262039763609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "5585398540591396124", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16442107352245114876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "423221712829930726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "13550435052563656432", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "2440366541074371090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "8300655194765375060", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "13163146272900339330", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "5406129421969383274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "15118142492742177336", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "10727592780669452048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "1076005730007872492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "13699740641705514374", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "13054405729329143152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "13503608041359512", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "14385185911482960528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "11215217005872946038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "4099859307693687554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "4408600136502382976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "3037042229494600258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "1155389358857780776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "11461581290174106570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16896833230469488924", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "11469881811044037340", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "3003526572122876385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14251848023416168295", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "17248756229500447131", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "929378940515745198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "12962558681443556219", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "4481903208484313806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "13558618754911056302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11455518069358829249", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "15890473622821659630", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "6942622405269419082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "13890118723041457532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "11292995457386147494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "5077214229434392730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "17774424004510360936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10412588668458621135", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "7334966010680206302", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4161141078006269526", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 278) },
-            { "6522575549211855712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5629373398445592781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "13374993751390784382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "12976499206227689731", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "9882204352209412039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "5041111302824362529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "13869716373706247686", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6438522646185979880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "2406816735581074778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "8881150100883636392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "593712935037568960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "11970881115757095265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "5584432943673435454", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "4560479630843098090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 344) },
-            { "15374625876485618845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "13102754309439605192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "17912189681971987483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "8153567933591966877", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "1604661321386793876", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "8990561333549136048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "12278364834477923930", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 300) },
-            { "3122997634505472500", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "15669490019428002270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "116291934148608396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "14729854278671832528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "10591379189397010097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "11929531534620071758", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 300) },
-            { "1819720745131968914", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "10607904718265020949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "913496537924971856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "916389941321470163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "1411786954276574458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2730604806511016352", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 139) },
-            { "5843679089588930933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 140) },
-            { "7304346312452588844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "2423754482456771339", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3653156933813711765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "10728212277329722684", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "877436308867220589", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18375125668176498051", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6767245864232675168", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9287404618748313247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8728178019712933221", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "18251360413872841969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "18271689282126907793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "954796765467489259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "13597240991532942069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "5079055505117153635", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 194) },
-            { "4135003545872878882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "11883485911218628865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "2242915551775617989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "10556089809203693400", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 292) },
-            { "3727142736386026852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "1622880009460832832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "4437258459981739942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "14691372262153587653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "12181607120522804433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3159681096461848644", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "6729785110495533200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "15322019609805777935", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "7024495439434892956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 197) },
-            { "10416622008071151225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "5796500397424307442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "15702382940521972117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "6093575518270471235", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "5805383505505929391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1801731858063091191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "1559798212423183813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "5594180958505308003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "4766071144928072260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "8650948093564284852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "3883845471211207871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "4366168099274266975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "578703329577922869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "16863960779539003201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "15450609897480659306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "8203550467004532364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "7431849514656037251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "14484890926084856480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "7777333052643961206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "4424217045094988504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "7994179151788368291", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "15192024816519005250", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "4747159205186229582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 97) },
-            { "5485971317082563152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "18128162750557822655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "12421707187947291166", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "792684262493086891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "941626985322260281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11868551452004726281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "14352303529756685990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "10702234389482091891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "3895088069642140043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "5334566325056222430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "8306337702797456793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "15720507574336564201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "3277243911383750280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "18150429561058646714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "11169292427557543138", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "13933912937625580405", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8295126647635181949", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14213516751025324346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "16509472637458153234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "16589607587365212240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "6988674007771237080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "3448477246688526708", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "8507854696766492454", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "8906588133431586825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "654122557966242717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "10196332102593337214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "15831600396403741571", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "17808913959977434594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "15548971488532746290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "13468713306678453952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "13613399861925108148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17802514063213000148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "13093429681061786539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "12247991248100147706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "14491949194619001237", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "7590767013583950613", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "13210604117940125947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4670443882075998209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "2857337999074313592", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "16036386660666696362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "755414184406250882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 129) },
-            { "12190841837604350271", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "10292243973236220688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "17793292063552633023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "7605139219344415117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "787363431787954804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "7000486794832106857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "13608239208821071914", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "17281202179589913619", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "16985912104363932350", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "14744368497944610864", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "3737552767159920174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "3792945601873900927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "1364546124782880196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "3689722043202617487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "2632535010129224704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "10968768803038046390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "5353552956675518468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7866128397931438774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "18233660940545931789", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "11670430946096342056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "2627779045483019709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11066913713501760080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2552187713769926425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "654821507679356726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "7606728651572102823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "7549378486471456156", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "15410074937424854348", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15114370307779942381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2040762223425679479", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12112853999307505628", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4161612746310931789", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3388752887767453958", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "14046990030104971367", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "16230621843665445228", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "9274179337770060652", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "5115134711994944288", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13898821685774165645", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "3007637520820789085", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "16294825599850364701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "14681717813022425567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4915831715914920982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "12894240573737168362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5448537627319798272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "14389915292223442327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "14274685812676150168", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7732899312577293959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "11956435900037329302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "9263063714383940562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5824801192141531089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "5608133987357542077", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15392077168521832549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "16446533347502650316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "14762599606783897222", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "709835724029986012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "1572991986657256775", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "7398196853452900099", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "8140094412609934765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "2659031931257084418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4640028527711211109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "18172711677056449158", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5183231560876991543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "6821855018718422278", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "13237050834496100264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "7164580481046523192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "2490155559809645659", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "15430549683839591544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4553409514380460123", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3041752019114501584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "4161001033681779582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "4764776977138392550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "6882621854468565774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "8881135571874888085", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14038261392627717712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "628191607060767879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3511588484597779204", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6904130543085920483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7924408980408826942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "9416186718345824095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "14719421757340260468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11936419502418995274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16601702334097258697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4800587664660105589", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "5336120047683197088", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15897477855246170861", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9780938731831129283", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "1473214668483422172", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17515573322312447679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "18356980026934328781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "18077281411861416889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "2543041530639980505", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "16370218798911151331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 287) },
-            { "17316626950179740845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 139) },
-            { "10414903047695486119", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "2809950092498355574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "12011982029561277581", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 287) },
-            { "11267742746905371769", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "12534001599784153836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "1882052795393187384", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "419783127503173016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 139) },
-            { "14211903923555028634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "10892706534058849825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 287) },
-            { "2345023488044002149", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "5754844816339228920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 292) },
-            { "17015791782274123780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "3706994659266083979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "13324157125165576832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "12014527187730671229", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "5170245731599664670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6854611304056079417", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1954052357826969119", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "17824431042110985323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "3603706453982734995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "11992353959766718397", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "15163327502374403643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "16758697697363920520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "10930115765550856328", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "14418429155823196539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1628593159980574595", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "15675968397825708285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "9594594523961285945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "6634330132674952638", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "8434794604559592624", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "3150231129728961455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "12545558125736154584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "15485701086886851362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "18005721959893562716", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "490233152678323691", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "4073467095502162430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "5801429077171542466", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "14841539539334726292", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "9404953235624894187", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 92) },
-            { "17995371099806008878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "8961138963663532667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "425744529089575241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "1316444335300814745", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "761169277744593430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "3325727286860556323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "2526832080529662683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "15470013032930986062", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12255528292506999241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "13119479079474639169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "12813978452097969536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "4991419288164762786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "18210370419559876426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "1616603916015535857", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14962768577232034246", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "1452597292381229708", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7104756264011682902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "7744787957569714828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "13503688893307029975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "9133263538092913983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1383899865465106141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "11829442945690098558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "12394049027081208902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "12159582810513550491", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "17738299860390552088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "797387385159110695", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "8757900457181374694", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "6048964584602891448", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "17882819773586674851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "17829148383265978140", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "14711697456265712456", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "724953082687879224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "805221045541170643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "8241070786700614317", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "9191832520273617003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "12408889192918919210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4885944395876887711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "2651385050387738902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "6303682540621797774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "905780459938651623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4476928353532757380", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "13681462437496627948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "17243648226968859637", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "11192356850081328892", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "9323825370872655346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "10000618285883395700", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6418327009347170687", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "8528750110601691390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "8061914949376516780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "12992194515157698316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "17870874477143985774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "16234606052818596502", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 94) },
-            { "9148379585489720669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 98) },
-            { "9270950131920019932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17001502418583498926", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11163107409437069532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "11465965972527519631", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2534408579674556441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18109284647478027063", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9849272539053219052", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17382660912493284320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7877332346656934022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "6323026044750482867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "9761573038170759563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "12098146032672599222", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "1403617451623027879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "9058996149754556268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "5864250949922222051", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15847413004526420496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "3199841714087553410", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "4957638663977636791", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9437794960375526230", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "9475130054420979752", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "13312514874803986753", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "15997754881872769378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "1941341635794709702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "10157866834809927320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "12308359047798183133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "2986189945936592561", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "6928835003016610382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "10084794570892043447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "15417738436777481469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "18377298651236993830", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7354234812009979811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 102) },
-            { "8656468860180713379", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14472187692485966933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 101) },
-            { "397770940444464146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "14258499419905714808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "17599396373608265826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12935563359569230797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "4892959859293355837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "2802810524370514276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "10290107543739998181", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14907097142953816744", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2525260242689556544", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13328449155966085543", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11856266545854830143", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15993427814066246646", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "2100891581797371600", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "12242618640422208652", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "6133592828563353516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 306) },
-            { "18232278892738147217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "11992625045241269569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "12601126285773042005", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "7457899998356343871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 198) },
-            { "6343888265369366589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "10791067159964399241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 309) },
-            { "11327097771110264965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "5245308722062496788", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 312) },
-            { "10792503079194374004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "4818231379191523896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "2198278382394812839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "3800011935243649447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "9631545863582097486", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1779941298820543013", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "3621930417735246405", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "14435120971846098308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "2893564501191050837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "8108843303778211282", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "3682813162987778705", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "15494543914974994991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "7565221050911842393", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 90) },
-            { "5629670679897666607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "11754316727756881612", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 98) },
-            { "10990741293315393791", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "17024388383581997032", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "10302338806536775954", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "7915318733663535312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "13702692566238948173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "2909728331855309274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "13071545223094862275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9631481972809246378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "13540002981450186147", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7076937538747704750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "18043340998699622388", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7148542290597073512", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "9040046051053703359", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1077773457856682663", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4716188972902735458", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17343050785312683560", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "5687802882700097624", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3524531620118359828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "5688478347124565305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "5504757952698692953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "13800387305792597325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "6574971185849732667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "10573920781439771673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "4992668316921598993", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 292) },
-            { "15778834188130183853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "3062101811226530720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "428659495445490820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "956022649859563080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "13410850301164057911", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "17423645390621980919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "7802311886554362782", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "1172103288112689821", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "17353894529222574441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "16431857516454692096", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "9100044555742394133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "13115589642140732066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "16190949264253468961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "7026575758396092435", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "16761856644242716357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 90) },
-            { "6341197991729122563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 195) },
-            { "17087740929472936216", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "10795104632256101599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "13327653786981478088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "1096671695414716274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "10774528268153772208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "9525853014023664813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "10632020369698615114", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3234107167862677811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8708643228914766202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "12415368596357091523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "1028160614515220430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "5927467766675317093", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "5275016494706355806", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "10947686124973711385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "3349519148124496343", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4003433148846544263", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "11718418772370938734", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 89) },
-            { "989564341557094953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "6942049339361951275", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14555883089089918919", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "15320845027635796583", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4014667229872705228", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 68) },
-            { "2438374917504708831", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3272017687600371031", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16067605128297748820", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "14150012830816329527", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "804195263636995800", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11528417522960871233", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "15378025640603637387", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12860222041026638681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 285) },
-            { "12725647706191463348", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12553441041059632729", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 68) },
-            { "12782191856884962803", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15824189967727245909", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11149782181562145291", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "2653651564133701304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "3526580286148537369", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3985659568982275663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "13642146548740074992", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "2349007644347065353", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6146876760962332928", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17434429579652310107", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9447458159095730492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "8655883535274781128", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7272538316511343863", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17564338309805484464", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "7881187047171099732", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15579919505002150556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "11583017348580874022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "17915846724151945664", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "5319668297345215520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 129) },
-            { "17208186152576814861", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "3633858263279042265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "13853056718266488510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "14759179293743468995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "16995873636564597028", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9438739171104456179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "14429081455612806819", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "9819596940685093690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12085348936192462321", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11951606039079763598", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "8769060267707904998", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17104611871050967957", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2103882464623009432", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2659712601063515059", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "9759380701896779097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "13842309033760176194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "2418288192668085805", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14994322266840011040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16402312692470500253", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16955653765071712611", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 344) },
-            { "8739347545059610410", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13459514533473657102", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "7824524940405130010", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17796310681498690253", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14823616678465136590", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13816104794723484993", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "846088275031979661", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "18125732229366977468", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "8464582977975377118", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6290317420155851465", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "12696412964119109465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "4994591211723226974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "1036010477232750453", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "13786357802945430475", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "1003101267609305257", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14991602704357959545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "6181308879301978465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "15488550074426713959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "4062706195708729345", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "8594644182487917002", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "15881381297320383917", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6040286126398028933", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13926122593957480821", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6213386558868267629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4456004887590847716", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9642229389394495047", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "18259656768460999562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "4983880246908724272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 344) },
-            { "7881579844586294503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "5331173521406046122", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "3285520504090196295", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "7143510787416483146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "13104509059416300615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "10090923790949378407", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3429844423226609965", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "706049518431331645", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17193614571243427089", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3621424752591567930", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11066930104187448422", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "209732971447020989", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16044646335477470657", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "2172121470071868949", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3392693938352572136", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5495063314176654751", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14553856088069405595", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "4967444801764057340", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "12160764253455777655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "17723621158215826108", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2171768477223405739", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12672995204641007004", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5622089373755094139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2129726780118554358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "4160656836528944651", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "11052732052072367261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "18432787283148809023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "16172528828198474326", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16327433707667075261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2797723586312707948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "8451212914744825089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "7025975403069487257", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "8913950860101596091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "15308578014507211237", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "13132804928635689780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4465781406991476376", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "16266491618150971928", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "181006047500375768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "18140951659547259039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "272730229972987861", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "14898892437285105327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "17252449599613270108", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "13436376034548670107", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 371) },
-            { "13787436604877398090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "8873614802459592665", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "13663893159182636270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "1361159591875955678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 312) },
-            { "5912303851874077576", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16245760498096322525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "9928406318940388716", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "3036512701943687724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "5334291640387922287", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "3002986032379998259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "16469788155263456039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8709632541892447149", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "9524303276541517389", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9354818521586974021", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "16781127329510211966", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6351572488552853754", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "907036267078333137", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11855070245618904113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4544242784357021697", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18218631037214746168", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "178353385245384751", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "17658152048177750315", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "11636129433022017868", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 216) },
-            { "2622434279674583815", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "14335074487552883436", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11175955260573469979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "2732519635571994212", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13893789954946953427", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 119) },
-            { "4355933224673863178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18037918102910297531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "16071723603031305677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "1697248235682953135", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "7843498978148810586", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6767159196241633301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 116) },
-            { "5097818987523855112", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6623182990939010641", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6711878663358611849", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "8671491767142900139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "12164298124869114517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "17089801601582809764", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "75742659105146536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "4652136280940317116", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "9751582946441607796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "16706244336960642883", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12581879452540858313", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "17443356777503458523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "939718260623752240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14131851237755716991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "7474639594232203854", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "14152716242882609401", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7998930863626763670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "10323345824599612614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "30229601562833524", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17788367809717898285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1509728225855233852", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13139625572508441980", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "16491532291908469567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "6355395905401306995", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2096779676054335057", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4217179485243909459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "17101789600628162503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6139574161497189424", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16559140502701231107", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "11459784003592366395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "7869916853707978306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "3889519976910355277", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12081835728078383819", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14923692894655929923", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "8132521728369930959", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16108573960501496757", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11086699387784339943", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "4013707396889204359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "11850332373794932468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "14763982961176216679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "8207349115037232863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "3273748387141431306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "580936360000782237", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10682918518101379579", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13178480813522103091", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "11254635684957519432", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "16816222375242496370", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12809199739984715013", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5040730152867713388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "10429613013253088132", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15451919862187018297", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7546586420552408243", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "14487682847898298214", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "3106710091841093202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "6458124573210430792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "9182897385081081193", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14462438074931673266", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "18133334552107213128", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "38736266675995457", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "13654816209891478730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "6263019986730305851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "12929981792125924963", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "3138374672801504481", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "17009318615658405230", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9421643783312790618", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2294026590516781945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "2940027113687311893", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6090625728451718945", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5643908654122573882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "9065894438656900887", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "11185156002426041243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "14670068483447729857", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4623542918584461522", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "1143214652021653634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1434535531617424039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "17025268985366223779", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11507538232733291666", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "6149673627320838019", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16474284418841532356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12461575861709234385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "192209423643075326", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15490478608105402679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 216) },
-            { "3491333679577961640", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8176012042686275874", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "4282198629458668761", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "689445825453914111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "969746749329671447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "16833026567865627676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "13046322179198317310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "6902644989079870993", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "10987953316324712538", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "12515465135362865565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "10049571207493913006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "3926585856863002495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11275109735493317886", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12238674883388043717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "16108759090923335184", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11756881293845417212", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "17839839336294937155", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4703107905652287491", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18180820925685532104", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3835286851569826052", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7807983899017500046", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "10294185397756053636", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "5519535335798045279", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "8701248964531180496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "291868903926685441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "15239764240622554314", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "15963038745470172423", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "11428599290755097395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3180320769716158201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "583303098958523195", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "318377908569897093", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7353563160591978243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "2582625260054352916", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5609922876429907954", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12557015880639217508", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "11528310408333718862", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "1471837664358450291", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7351401242363888463", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "953306082374100275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "15759530339367380982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "13300022131572486202", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15689502054035168040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "16969463538496570528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "10237524128771958432", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7969848911698660033", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "7130694811424715594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "8578747191812631883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "5197105253412476591", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3120553928584920777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4750894407873652809", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12667014405537239093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "13644681270630373984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "7187734276051878356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13253775441326432265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "14733510474010040334", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "3336303478756453360", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16352331970945217438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "13484950419220835364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "4674416595144505741", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "14559308665571750465", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4542143431130171516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13189392239349392492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "7009735776703529573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4220826666482500445", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 346) },
-            { "14792528369891965810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15287650965861631130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "10308175009371219583", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "2903605246599054308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "9213563311267466388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5019077257951332016", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2497756607567197523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "9285566577169147378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3432296808755992670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "7688176479120305539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 166) },
-            { "8818070832398055086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8787816339967963727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "863952266514375915", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 141) },
-            { "5835634465164771899", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "15101680837342453931", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 16) },
-            { "1116274074896622552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "12790788016297794214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "13538051178827008933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "16403423801823379909", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3723613341885592267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "3830703844770425343", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "40704767167309552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "13973028408397200796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16561224775421968533", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 43) },
-            { "11243840588602365090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "14103112843209793966", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "10483664832302187567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "8100595788531468781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6620782733027313312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "6949539207944972855", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11207257238719531888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13898284586432291433", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "5120466856097219243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4197617702037834389", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "1249137685908951501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "14716719350966652036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4840004190985490064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1540041682425757361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "3715177305271762194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "10001963042016663554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "481328129206881674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13404888565084206853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "12348135936862667024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "15471470494305051299", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "6181272224000872375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "4701832665603867798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "2030309697153345387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "5644068493155655611", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4867937397499803072", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "2702144517025248597", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3304589333915676807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12894625941923144893", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "11649407835105973949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "4897991181236908768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 191) },
-            { "12179581684777023804", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2806529556090896246", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11327228813412934262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "5485749317130402302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "3499243120652875549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "10916647716124396856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "5749536453225343663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "789359733867650915", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "12626014184575881530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "1201692134690347847", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "5219399418946822456", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "14217181622713951411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 300) },
-            { "13025323039227543550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "6114147683777615071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "2355214244972870639", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 152) },
-            { "3167336012388169649", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "12218337369633748663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "7264756313770306662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "10492056481694320580", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "14281201038135286621", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "8127190765748950828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "142486914279119363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "1532263118203058517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "5482851829165191681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "10548792624072794724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4239415134522959352", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "9028970753877215614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "2324120381399737261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "10267260789603562117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "9988801796928462423", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "12516911293946682547", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "9213886570531053949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 53) },
-            { "385046297070779752", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "12541834857357563605", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 125) },
-            { "11709992724966310174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "17222005830854879661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "475043738497218394", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 139) },
-            { "1071007164550012186", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "6719302427415173754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "10482582307328548806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "6351347283201596793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 96) },
-            { "6531171505861182429", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "879005904827468163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16290626406346691996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 282) },
-            { "4569338575782832784", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7575675354187625951", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "5795073619189010837", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "15123868617509445149", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5601435819039968726", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14104238386345631681", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17377293745073971167", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12134712464763856064", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5524215233998361104", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1103228955716492167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "8618835732380720921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "15908673392788376468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 264) },
-            { "8482147530539941792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 269) },
-            { "9069334144391048686", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "12493863403516600413", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 273) },
-            { "16692569816843207989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "3438116423688595487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "15602863681196390535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "18277685132620834972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 152) },
-            { "16541722316343690197", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "875142032423622622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 123) },
-            { "8965747921518186477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "3067806959725855130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 137) },
-            { "5779388310240896974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11092828091552833150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "10295330953350618042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "17791773192152464021", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 96) },
-            { "4894227264080887361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "381149736509958403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "13603318842632052764", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "8929453032482114162", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "7662200927459001757", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11473442921040533207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "388828310152538138", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "1643241486250690844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "11806105193035393795", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "8843585527713905568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13248567106128518549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "13708979487306970634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "14406070210216948643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "15352245788978088971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "1435153323458789173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "17638692805430115529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 346) },
-            { "14068780861332616363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "6656593119788274992", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "14695781272831602408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "15696910741835640150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "15315327794058441258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "7545013298074733778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "4026686872534942904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 140) },
-            { "6553736978928374036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "12129572274423886770", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "9723314434598141024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "11031625790234068916", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "1138439260035360722", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8323445733669842657", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "54019631544204590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8971115542951085891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "4584970211859494304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9321208819255762521", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "12617625046664709483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "8264178890341675354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "5334190564423375247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14746359019867963124", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2044363708106765326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5132761922124425835", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "8141428150264829362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "276407276027553756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11878734040194151073", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11622925573287101001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3192332625020432602", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "9785114056964539323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "9410978119783758141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "12523676912856063091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5912451559447635837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 133) },
-            { "10264913782610095832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10309083227104422150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 136) },
-            { "8500148569566077929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "6578908625437515675", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 23) },
-            { "13762042713029963144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1561225943337590599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10917498758625273194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "14335423820860953927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4428101657497677982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 125) },
-            { "15901675909820977223", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "7962991673727743706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "12141300895511301068", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 139) },
-            { "17106086048442658788", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12707946849050970702", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17154337492545826355", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "10109431802089940590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "9428176632140441528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "52089503050497755", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "12297371032753209816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "659150305191479097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "2065752819810364738", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "13583166868754499339", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "13991205023798493715", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "8939683514448064461", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "18337160891834020517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "1154228007901031779", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "15156525717629023944", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "7757331094141318304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16779678846332091086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "5409924335138540834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "4149728557142033774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "6443517114667332732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "5419041493176804960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "15948383678216076358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "9604982746455852556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "15739274921308457528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "4642234334824303290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "13200151444914751729", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "16894871557229780934", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "9933958860597451711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "17094948685292534952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "9762182215179534181", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "18273537339378756543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "7720939595094113814", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "5865480930796299143", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "10058165874008941852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "17309326904418811234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "5592428580503282095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "16348402367953880206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "13607830451968188080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "9311802150474489673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "5159470523468873105", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "7975810844103449438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "11455843788148231615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1410630713443793537", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "17303408650780384587", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "12069726772532946193", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "6204183474669103812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 136) },
-            { "12874626654611400042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "13546876216568825877", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "2973436171295280783", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "1908809004094565452", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2322559721899919275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 125) },
-            { "5766507688771440170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 53) },
-            { "16626226341188424071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "14224121742920800990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "407189201971322683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "8460847842045253466", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "879896719155824868", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "5219048275475447369", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8707189142909022305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "5948701218437980356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17050143605017295447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "8906185843274300447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "8321769923556905957", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10433541468308381909", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "10405183426600618231", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "14885109535362957947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 216) },
-            { "72444706264681262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "16818714747882774917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "16236397968499692493", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "700717277178942679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "482564204402769504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "3221221905804708596", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16467987800266816984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 216) },
-            { "11599932445375240727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "5057534502588100071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "15640202505592598653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3355259926747524578", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "9226443907548972870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "8104309105061227444", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "18384657372655350144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "13739257060165119132", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "9810904714798127155", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "15609627722687211129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "14738573151275130683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "9421927854269492263", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "15962533525948221648", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "15856268902838573812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 195) },
-            { "4085450203909854919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "2370837049876630969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "13464226348405628455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "12228963567837353733", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "10377729875228238588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 197) },
-            { "16362139250976572928", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 195) },
-            { "5420766967862917815", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "14578291812739325465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "18310667924071639899", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "16853250891250756537", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "12990341489637414845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "14630499010941056793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "878892264408839067", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "9259437778054905599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "14974730512607138726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "3600066510593746268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "3140230065585683313", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "15891662883560480723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "11284755586130392759", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "2281119269283845320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 196) },
-            { "12246408434917478929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "13283842370311517843", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "13753473508578037346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "17123153447808465303", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "10700011669103135203", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "9979259596137305973", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "17225578855755054959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "6471563320494376693", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "8146945902795164796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "18372284940315010254", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "2194607895573544953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "1332624116953483870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "158222105675022402", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6830387121684699972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "11077503608116183709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "17847109385592002207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "13384754476437374504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "11462462742322068863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "4265693151382066296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "11070620435959083971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "6982733543386888622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3563614453014995411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "3498490999014554104", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "15595549493819416194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 111) },
-            { "14532844474906286088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9562291747339451180", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "6772239376357727149", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10690972785852373520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 166) },
-            { "4488336106517889531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "10058614204420018541", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "13865227850818392065", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "14100870590396726248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10848277915422577656", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 43) },
-            { "8121179472578287280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "2502125887857336825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "13192808619929896995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "5115661026367632863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "12812685418923919055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "6293403765897901528", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17596685300497748803", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "2150326211917340956", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "530491406341772040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15197248015210313435", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "2816353973187452604", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16383540667048742064", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "16820082917500285799", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6820284286806022849", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17285815901490707654", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "994182747184593564", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6642767323474835034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "3215659303601163167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "54975980454651672", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "11529876081402974396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "14026570177552137240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "11686670048744589243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "6678796313875454849", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "641417817126876622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "9622546530872848323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "9194788897910888066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "522181557896569275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "3332334993503432420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16131448347558322280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "6585223640997887253", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "6205240287062600210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "17522452942286240233", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6571438978296387721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "15511138074959300404", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 236) },
-            { "11107930597263802755", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "10320711719466983961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "16884228931101540030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "8253823502854784432", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "6025872155179042054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "10173283505468233128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "16094174852600023296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "11077876432364512822", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10586018593856542117", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3109104171383198425", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "18136135457402651842", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11834683513280095384", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4806571630436601566", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14849108908297747749", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3644282167178264526", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "360872770877634346", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16720108310653948550", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14353390922580547467", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9868561386826862471", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17465517455679097501", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5570311824197099845", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7524311370696987092", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "14070988879848388270", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "8296551195150971668", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14352796912241296357", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9840495023131952174", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4720851194954041037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 347) },
-            { "13852065717057446998", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4342360467977736802", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "16336482874764861478", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6075691042233712335", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7570346182940928159", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12971822824884826169", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3033264172690274208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "17301887391757619741", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "15790005937034794347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "15464327246951632247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "5659168916726488798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "8079376692609682448", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15160738482264643601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17900257435531434807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16789135236017252073", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13224814158106791463", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5078905972285278557", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4196367396954155354", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7009873605945341897", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7199295899520406795", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16833854122884184025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "14599780481362761532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2572395498687401679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "11810221946429451169", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "18084635102736402756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "59739211822469868", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5240181393417899912", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "15962137123591591534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "10989937450490049763", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9798585825695496550", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9220830217525628783", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "2235210915304938149", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 187) },
-            { "3930314908786112883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "1334070221835422461", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6681818065741882453", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "6980201892073961793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "11530101016435264783", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "4801117903303888658", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5782934278345953016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "13951717514084457087", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "2721793280965260548", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8124736388338424498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "12223993560805441284", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9860570706348640782", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14043770215999952932", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15277856047844308598", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8048617952947915835", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11446745541571732900", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17422822627612865758", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13954144830230671601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "11198908896401597838", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "5582896843095691256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "8133587696326295326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "2007192658799516915", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "9492402787848610840", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10515519878978734341", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16706121580364790904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "5495776091407365966", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "16430562172386510259", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5673972310424776040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8797843396807284399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "1698321314111848001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5762290464889692462", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "3218248162832023196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "12988961529988078346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "4232250144427804891", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11683680166617045816", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "6252429564537528709", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            
-            });
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_ICL_B1_B16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_ICL_B1_B16.cpp
deleted file mode 100644
index fbf1719ca..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_ICL_B1_B16.cpp
+++ /dev/null
@@ -1,1823 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    // KBL GT3e
-    void tuning_cache_8A52_B1_B16(tuning_data& td)
-    {
-        td.td.insert({
-            { "1375156980278317418", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13455881643467418059", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12788968383428254917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13131740479277027362", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "3390014193205017427", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "1270467775674221667", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "14462744723628661203", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "11717348577195224554", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9275303306340702111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "12245096462203481681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "4999505377862312410", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "15272426400992401555", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "9325097933807426691", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "18238669114790278675", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "2738256633362038820", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16689586259416414782", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1525652349412826502", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17683302016987200208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 312) },
-            { "5615525527388396983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 343) },
-            { "3992735701291817771", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13208739898218342989", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "9536348721941264933", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12803521018213865796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 277) },
-            { "8854783036772473804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 266) },
-            { "6766480740724769248", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "768423629375648579", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "4044100281521441011", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "873240542570331563", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12875236165672036211", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12008819728839685704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "2486645741683554648", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "368578589584714524", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "301201776306602054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "13152181652632422771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "10311747599696543062", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11258322449556590366", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14095734330183410835", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "14910223536998380801", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "3352689317181436056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15832740972576959202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 287) },
-            { "14732184525012592889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "8421045774757048067", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "941232110069825628", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "5326247361632903583", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6214194654733781771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "10025839973092358719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "16711955423531846725", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2915165824085219545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "17108987360340581555", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11972097635078477347", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16926950874716567095", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12643423612381102003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "18423051691107460439", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "15381833359831622179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12040626513219974957", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10647227605517025377", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8127570953237266335", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12876112384009608387", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12663860560275361463", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3217246278485567748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "15713964605078748923", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12293786134765875615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "16043683538361975370", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10670103699537731664", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "17854578307286932628", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11443268857010762276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "4479117540570599742", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11726298758004767743", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "2968031010495399536", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3797957937905580811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "1474271081523145413", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8526484907799590618", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "13723543003759101485", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11728824117049687850", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13268525255152984893", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "14397348576352573007", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8616686489737649890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13176385389367548697", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14990645740260870030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "7472330881076141262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "10892456883214928095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "9522661528867955338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "17856816245251319111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "14872992823083730615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "3106591708459602370", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "11609821372586026178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "7678457226823073886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "10118395047539851751", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "5389189982064081933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "1742897526168249500", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "15331103261044247142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6644418194983229139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "12478309735214802531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "18012549942299450620", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "11873734271080160669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "10424278617647597641", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "9553032671453999824", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3860603464276263676", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1207026216972160297", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "9519623751582710696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "10328182165125764988", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "2231648183489019418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "17599383258252980421", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "16208488491972128275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "13379165253894817165", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "2566302789609970663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "1478419046264331178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "3087801652564627458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "16103943009195163681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "14230385851791760020", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "15293727142789007900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "13973179950424276578", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "713121569924250372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "7947870656736319919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "1663285216972929652", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "14767888121198814523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "2124033349728954551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "8762901342272872498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "17006133396401462698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "10783981060353445280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "15110359240685619357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "7875272450497189442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "3281207855459771997", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "11932770338770247767", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "15860915170591763391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "11716771904412649891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "1095495157025479260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "8402692278765063674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "509781001842353609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "3255465741612432300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "13439896617880328331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "7134654288295280046", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "6769243149577568817", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "3480732841490521799", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "18269685060032395235", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "15649927926091502215", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "69439315851965666", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "156456996459945842", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "3012566432840424198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "16431165572426232677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "6324565723045697080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5390559917122707732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "5469227748156438008", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "17163158934005653629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "2307310127637739872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "1999979442136861875", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "2527189070714658176", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "8329846097322076175", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16783619135298589974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "12214162812589030126", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9216608098626790565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 20) },
-            { "5179760459095053114", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "2452226948562393335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 21) },
-            { "4499586349553581439", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12668149981216388765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "2287356884312581209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "11115684531624462986", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 21) },
-            { "6483208845600234755", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "3752171257634205726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 21) },
-            { "1774158624592967937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "16881283637687482989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 21) },
-            { "14749947225382670869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "7351733901977025859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 21) },
-            { "435888248913413834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "13713406612642090169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 20) },
-            { "16582132711225619740", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "10436819182310112786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 20) },
-            { "14546281065004619074", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12558716383635737426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 21) },
-            { "12609361477548272638", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "8107447526839063293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10995907213890714701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4871907623235871050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 21) },
-            { "7394217382008802567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3880189981766119529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 20) },
-            { "3759057398165607194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "4561874206785244358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "488298169768725160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12956726277674279950", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 21) },
-            { "7177837234452118325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "9057036344533510776", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5093049998173715787", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13761566845514364807", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "1594612401422787491", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14603590053512154268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "10136369729388564720", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17050675313067213312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "14221578799010900252", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11723735945517472199", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 299) },
-            { "13810995219720233595", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2704063557078535883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "10384537928514123040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17427036330773218054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 299) },
-            { "9796621763733208035", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14046114605615338907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "5763440554939527411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 137) },
-            { "12892693137085610062", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "17775705003104146872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 137) },
-            { "14878347463243157447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "7368916076070115064", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 137) },
-            { "3499109651698979012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "190530884420224257", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 137) },
-            { "4202645222013675478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "11324851661119942609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 136) },
-            { "6232363902828992968", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "4299492266819967844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 137) },
-            { "9481675228591993785", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "11772741918108731396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 141) },
-            { "18419183012101393192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "17832542092610191859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 139) },
-            { "11771014003680394135", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "9192665896782282996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 140) },
-            { "9763310312421884308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "11430400968543668873", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 137) },
-            { "3430266954211750407", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "7172604084103519563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 137) },
-            { "10306542963828398049", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "5235375820995365354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 137) },
-            { "5091558853871982858", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12914986936318857086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "2265784112305305260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9019388470685749691", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "12427258337646070422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15884763176333003771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "7211355951470869591", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15399245700982979379", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "12644942072153919043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "5876880412336151866", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "13775529405693629438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "9048522050692986204", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "10642327923162019888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "6410682026872155392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "9454954846682513038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "16463823433924519300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "7279393739634103483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13358283026528078900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "8032685176029570383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "949330876419581703", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "17713034180977313726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "472454322186482185", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "2727219457659794468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "7852745450437172519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "6065819201836017182", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15984885011101717258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "14811022197918391667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16146350476627599543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "16173557782125372935", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "296142385116663420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "12655099960717366198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "7937870623766562191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "9367157746678824712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "18062849937960759210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "11919129623429545762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "10522649794540845800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "1104489643524273315", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5419775002149092646", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "9226912483632588371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "4958222070605478947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "4479979951990338510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "12022152681602871455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "5740738339752793113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "12087141795291232248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "17825280904760131680", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3974589991022739479", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "1838534101161814609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "10046663998164493552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "2305461098719675735", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16504962609450876148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "6345550009198921347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "11239754372812258455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "4347816192417741558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17809920600993699808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "16710010075465723498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "17729546848373991614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "16998508915819714690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12952980509662451384", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "2683507674615735878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "13059207969254830451", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "16295660312557315941", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "14089893422771228191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "18034648276860485300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "17739868787095417856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "10880081193716628051", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15916505622570323098", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "9101018613418825655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15650839696475698676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "15628121900226431719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "14554225625951128811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "3134489458855347772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "5627834277145735283", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "10729288973933590396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "10869005786136023160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "5597908143491399643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "577182964135927041", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16947969669087411530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "861419637283812778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "3643250372952944907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17977676737774695825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "10309504812060596568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "8866736221671835567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "2133849627845285277", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "12793908914872030220", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15947699374684516369", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4660288622381620227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "15914512645931208899", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7460672405409009037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "1541754036637209097", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "89439319782574517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "14088382963493477342", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18203935818408469865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "13191096881934434519", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7918742312252115870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 299) },
-            { "15641537661939240413", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "157805434489791310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "7941729567451949422", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10628725059172743408", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4492673409319122180", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "15857087373591747006", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13793441296561946357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "5172712078329324967", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "8780604510524622314", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "1760690277175249985", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "13649894122307008732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "17546566148752689536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "12675313398314286884", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "14621327324047759584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "14136097914489095982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "7638626850074132214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "9399994156762372761", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "18068050257421269408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "11830297960718214360", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "14959566236432790882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "16884396694505987920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "17947818179123182001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "9381304526221508530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "13932662890258900896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "8268533335852735248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "17419874083634480896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "12773693193167844110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "5157249499936659040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "4282661608732125403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "3159147743553063163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "1706927777850488363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "9839670675413379092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "6780215829176686721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "12972634653821069685", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "16129296588866116913", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "18202222342562516071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "15426960908024585800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "17026284168840448378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "18118237182023167949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "11113256687741667688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "10555597973766215754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "17517495652165026573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "1832310305089212990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "13855438905855887272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "15349944413643626251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "4738743763536059708", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "16611452077660879545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "8101977280003030465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "2012181953284568566", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "2969389503332309296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "14515066741400300669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "9373353053843326128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "10023279637210292010", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "1103204698908514224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "18092842590142527927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "12174571114411168588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "14431607479949498164", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "10279778381617181802", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "4237276338897143680", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "8083672466967374860", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "16705621644424684055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "5352861363832390974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "16945184617367657570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "2995134938466176198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "11706378390483804857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "7958459862276998225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "11703557271443535142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "5020788604681810984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "15217183882858251099", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "10650698451740924172", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "706370730287471796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "18199526506796726885", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "9269175963143039426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "3691705516240577130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "13472532612464340803", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "12388375914105990324", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "11582534256623549131", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "1653274345637156919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "5893940382830835820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "17700958439420868719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "12730339458081890990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "6631816968511312100", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "7000524935770116969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "386749666417295495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "7162575953766465459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "11398019086259011063", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "3041612155708729812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "4274801141127703532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "4865023158176874622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "18424912460022156378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "10408322429232132983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "5277400567128489977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "6848989271874647093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "10085059621136526248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "12962552332511702682", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "751912075185318190", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "4505008254511324231", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "4191326605459754690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "9824678205469832038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "18245935804520236353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "12309132521191764927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "12843671306854567956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "8275277322582733101", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 52) },
-            { "13698389420396031586", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12949204491386872217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 49) },
-            { "7370273921473161914", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "941829593638869991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 48) },
-            { "16206791915939407806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "1500571771538985941", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 48) },
-            { "2095802691829304676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17542414935564676110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 52) },
-            { "12380856644683171627", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "1451466106918423837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 52) },
-            { "8071957466247137919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "11661208196482963286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 49) },
-            { "6635217802203685464", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "265124365266629363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 51) },
-            { "9513032457323269513", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "11814740669468421049", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 48) },
-            { "5221320470007950766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14359530849521980269", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 51) },
-            { "6181651715051152713", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "1450888744802985214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 49) },
-            { "2842103889477438816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "14006248791647711759", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 48) },
-            { "7072606962946873975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "3599823735065658574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 48) },
-            { "11311859068168414878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "17525531790109748810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 51) },
-            { "16749148369456398030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17556238490521153146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 51) },
-            { "6067904130482758510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "1791615587935799399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 49) },
-            { "12985650543127289023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "6714886136800883594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 49) },
-            { "220326805056361171", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "6777045876155144709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 49) },
-            { "9454512817077883797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "14011124615649605281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 48) },
-            { "994489782629179836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4338023436590582323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 49) },
-            { "1152693503778768433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "5994204139128667921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 51) },
-            { "17243576882981097341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "5524218746051008792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 48) },
-            { "2669822154816760632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "7179714714302073459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 51) },
-            { "13002363400738122017", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "17006095064160484022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 49) },
-            { "13733327241591630239", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "2623687018437195679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "14077148976508649021", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "8272823732258536202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "2451712485584835395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "8057302050645780813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "7430073011895298582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "5095827462645341808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "15129834325410878425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "9660812093766156608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "15781622938833984014", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "1089679781525023551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "6129602738379919488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "5287076386757143976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "16076153317792960383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "2108296560864415762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "17006655627343469372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "9404677451270692749", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "1372939511728986224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "5311718276151327830", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "529543453251381109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "15591167992985613695", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15026219694198820614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "8258382025812748961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14810839157236175179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "16117738994809548007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "659846949368492111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "5211191663202250117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "13418701036204748812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9714764457768279762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "17310332946322628458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "15975964562807570772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "13447028922679236865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "8337820318779061494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "18136765667969393174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "14821616804286068969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "18386376129938707290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "16609136488331186895", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "1996860183441418841", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "6491244517639245276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "16312223896859176991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17833517350994024381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 372) },
-            { "4226968857681929488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "5141753233513623264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "6860503758000008398", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "16489624657475712467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "7862815466573236157", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "10679760989906275129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "852092858392507925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "6996376303337512293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "10978173291465325823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "6670327979947471550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "11318913630213187720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "123251351612308092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10784073615329190425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "2261453441277654139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "2937907409658060025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "7852144838267007144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "4408772370026995920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15411474884532403722", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "9462315044265139531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "6419580456182610836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "12277470820821378855", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16865879032845300007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "2862999234347597091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "15447513376965243034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "14420809655798184553", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "12954154886708228545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "7575634241190730697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "2344498602308448450", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "4304041922043496030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "10971070835319242371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "4862529593282936100", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "5312140481706133684", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "15522785615618973614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17798636687709019154", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "1938086876393565238", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11897113890115321056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "14363654136811880073", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "3928266232090746643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "15882969506682501496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "16426179645101678763", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "18174857480705846286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "598390166442977699", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "5522698342845820411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "11559360678008060513", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "13184662326021747000", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16037141448095945650", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "15094664469997373662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "822162932339827810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "2597453794298356435", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "15851356529373376076", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "7966454753124154534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "7311120574972466702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16461809076899645037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "11655994466278963438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "6981537186704688907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "7903891232234389925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "4229105529069729944", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12796777049340516563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "14289048840489035546", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "4239133538073498792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5103094815475470596", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "8560635685184432720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "16264774056719724826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "2571882179292959757", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16758962840329202004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "4550028191070279999", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15661322183507404821", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "14650567822254940018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "3755253206085028904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "8751016391945753900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "288853243482418538", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5047419871737940985", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "8819268903800581706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "3746573775462003750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "16286085532892593349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16547425454653232058", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "8195881973746570408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "7712831597869354170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "17035903590837750750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "1907439276166837309", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "3036808833459559381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17928043901784474130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "14667209474639064623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "1701609125136907870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "2140514316203117958", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "9366201112659847392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "7808544677773370430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "2251029128552117936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "9529614587861271730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16811402686462277562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "10554266898346470422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "7817036102984218692", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "6329618009202266591", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16936366288366370882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "8025053805734757314", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "534032316469702287", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "3963106895592011725", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "17994361454416813294", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "14902389080201926109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3796274347773622633", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "1306339989221885682", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "10900880512948479338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "287386909600391846", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "17542176922797334839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "1081962464388501987", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "5831419373611158773", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "3179874645565098825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "14906458674793172507", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "7334966010680206302", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4161141078006269526", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 272) },
-            { "6522575549211855712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "5629373398445592781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "13374993751390784382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 305) },
-            { "12976499206227689731", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "9882204352209412039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "5041111302824362529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13869716373706247686", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6438522646185979880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "2406816735581074778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "8881150100883636392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "593712935037568960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "11970881115757095265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "5584432943673435454", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "4560479630843098090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "15374625876485618845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "12278364834477923930", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3122997634505472500", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 300) },
-            { "15669490019428002270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "13102754309439605192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "17912189681971987483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "8153567933591966877", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "1604661321386793876", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "8990561333549136048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "10728212277329722684", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "877436308867220589", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18375125668176498051", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6767245864232675168", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9287404618748313247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 310) },
-            { "8728178019712933221", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 292) },
-            { "18251360413872841969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "18271689282126907793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "954796765467489259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 292) },
-            { "13597240991532942069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 193) },
-            { "5079055505117153635", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 307) },
-            { "4135003545872878882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "11883485911218628865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "2242915551775617989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "10556089809203693400", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "3727142736386026852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "1622880009460832832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 193) },
-            { "4437258459981739942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "14691372262153587653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 292) },
-            { "12181607120522804433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "3159681096461848644", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "6729785110495533200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "15322019609805777935", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "7024495439434892956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 305) },
-            { "10416622008071151225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "5796500397424307442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "15702382940521972117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6093575518270471235", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "5805383505505929391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "1801731858063091191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "1559798212423183813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 191) },
-            { "5594180958505308003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "4766071144928072260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "8650948093564284852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "3883845471211207871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "4366168099274266975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "578703329577922869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 184) },
-            { "16863960779539003201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "15450609897480659306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "8203550467004532364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "7431849514656037251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "14484890926084856480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "7777333052643961206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "4424217045094988504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 265) },
-            { "7994179151788368291", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "15192024816519005250", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "4747159205186229582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "5485971317082563152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 266) },
-            { "18128162750557822655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 90) },
-            { "12421707187947291166", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "792684262493086891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "941626985322260281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "11868551452004726281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "14352303529756685990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "10702234389482091891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "3895088069642140043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 105) },
-            { "5334566325056222430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "8306337702797456793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "15720507574336564201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "3277243911383750280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "18150429561058646714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "11169292427557543138", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13933912937625580405", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8295126647635181949", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14213516751025324346", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16509472637458153234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "16589607587365212240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "6988674007771237080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "3448477246688526708", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "8507854696766492454", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "8906588133431586825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "654122557966242717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "10196332102593337214", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15831600396403741571", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "17808913959977434594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "15548971488532746290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "13468713306678453952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13613399861925108148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "17802514063213000148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "13093429681061786539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "12247991248100147706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "14491949194619001237", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "7590767013583950613", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "13210604117940125947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "4670443882075998209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "2857337999074313592", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "16036386660666696362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "755414184406250882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "12190841837604350271", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "10292243973236220688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "17793292063552633023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7605139219344415117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "787363431787954804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "7000486794832106857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "13608239208821071914", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "17281202179589913619", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "16985912104363932350", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 133) },
-            { "14744368497944610864", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "3737552767159920174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3792945601873900927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "1364546124782880196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3689722043202617487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "2632535010129224704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "10968768803038046390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "5353552956675518468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "7866128397931438774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "18233660940545931789", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11670430946096342056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "2627779045483019709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 92) },
-            { "11066913713501760080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "2552187713769926425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "654821507679356726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "7606728651572102823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "7549378486471456156", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15410074937424854348", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "15114370307779942381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "2040762223425679479", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "12112853999307505628", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 151) },
-            { "4161612746310931789", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3388752887767453958", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "14046990030104971367", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "16230621843665445228", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "9274179337770060652", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5115134711994944288", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "13898821685774165645", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "3007637520820789085", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "16294825599850364701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "14681717813022425567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "4915831715914920982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "12894240573737168362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "5448537627319798272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "14389915292223442327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "14274685812676150168", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7732899312577293959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "11956435900037329302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "9263063714383940562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "5824801192141531089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "5608133987357542077", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15392077168521832549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "16446533347502650316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "14762599606783897222", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "709835724029986012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "1572991986657256775", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7398196853452900099", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "8140094412609934765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "2659031931257084418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4640028527711211109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "18172711677056449158", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5183231560876991543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "6821855018718422278", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "13237050834496100264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "7164580481046523192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "2490155559809645659", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "15430549683839591544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "4553409514380460123", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3041752019114501584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "4161001033681779582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "4764776977138392550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "6882621854468565774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "8881135571874888085", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14038261392627717712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "628191607060767879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3511588484597779204", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6904130543085920483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7924408980408826942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "9416186718345824095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "14719421757340260468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11936419502418995274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16601702334097258697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "4800587664660105589", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "5336120047683197088", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15897477855246170861", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "9780938731831129283", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "1473214668483422172", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17515573322312447679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "18356980026934328781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "18077281411861416889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "2543041530639980505", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "16370218798911151331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "17316626950179740845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 144) },
-            { "10414903047695486119", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "2809950092498355574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "12011982029561277581", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "11267742746905371769", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "12534001599784153836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 292) },
-            { "1882052795393187384", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "419783127503173016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 144) },
-            { "14211903923555028634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "10892706534058849825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "2345023488044002149", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "5754844816339228920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "17015791782274123780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "3706994659266083979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "13324157125165576832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "12014527187730671229", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "5170245731599664670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "6854611304056079417", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "1954052357826969119", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "17824431042110985323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "3603706453982734995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 140) },
-            { "11992353959766718397", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 186) },
-            { "15163327502374403643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 188) },
-            { "16758697697363920520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "10930115765550856328", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "14418429155823196539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "1628593159980574595", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "15675968397825708285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "9594594523961285945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 188) },
-            { "6634330132674952638", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "8434794604559592624", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "3150231129728961455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "12545558125736154584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "15485701086886851362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 190) },
-            { "18005721959893562716", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 190) },
-            { "490233152678323691", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "4073467095502162430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "5801429077171542466", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 99) },
-            { "14841539539334726292", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 188) },
-            { "9404953235624894187", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 89) },
-            { "17995371099806008878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "8961138963663532667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "425744529089575241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "1316444335300814745", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "761169277744593430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "3325727286860556323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "2526832080529662683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15470013032930986062", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "12255528292506999241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "13119479079474639169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "12813978452097969536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4991419288164762786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "18210370419559876426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "1616603916015535857", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14962768577232034246", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "1452597292381229708", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7104756264011682902", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7744787957569714828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13503688893307029975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "9133263538092913983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "1383899865465106141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "11829442945690098558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "12394049027081208902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "12159582810513550491", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "17738299860390552088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "797387385159110695", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8757900457181374694", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "6048964584602891448", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "17882819773586674851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "17829148383265978140", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "14711697456265712456", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "724953082687879224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "805221045541170643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "8241070786700614317", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "9191832520273617003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "12408889192918919210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "4885944395876887711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "2651385050387738902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "6303682540621797774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "905780459938651623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "4476928353532757380", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "13681462437496627948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "17243648226968859637", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "11192356850081328892", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "9323825370872655346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "10000618285883395700", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6418327009347170687", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "8528750110601691390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "8061914949376516780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "12992194515157698316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "17870874477143985774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "16234606052818596502", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "9148379585489720669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 102) },
-            { "9270950131920019932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "17001502418583498926", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "11163107409437069532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11465965972527519631", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2534408579674556441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "18109284647478027063", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "9849272539053219052", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "17382660912493284320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7877332346656934022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 311) },
-            { "6323026044750482867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "9761573038170759563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 119) },
-            { "12098146032672599222", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 195) },
-            { "1403617451623027879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "9058996149754556268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "5864250949922222051", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "15847413004526420496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "3199841714087553410", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "4957638663977636791", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9437794960375526230", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "9475130054420979752", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "13312514874803986753", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 305) },
-            { "15997754881872769378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "1941341635794709702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "10157866834809927320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "12308359047798183133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "2986189945936592561", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 269) },
-            { "6928835003016610382", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10084794570892043447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "15417738436777481469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "18377298651236993830", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "7354234812009979811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "8656468860180713379", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "14472187692485966933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "397770940444464146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "14258499419905714808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "17599396373608265826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "12935563359569230797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "4892959859293355837", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2802810524370514276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "10290107543739998181", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14907097142953816744", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2525260242689556544", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13328449155966085543", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11856266545854830143", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15993427814066246646", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "2100891581797371600", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "12242618640422208652", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 299) },
-            { "6133592828563353516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "18232278892738147217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "11992625045241269569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "12601126285773042005", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "7457899998356343871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 311) },
-            { "6343888265369366589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "10791067159964399241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 312) },
-            { "11327097771110264965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "5245308722062496788", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 306) },
-            { "10792503079194374004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "4818231379191523896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "2198278382394812839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "3800011935243649447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "9631545863582097486", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "1779941298820543013", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "3621930417735246405", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "14435120971846098308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "2893564501191050837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "8108843303778211282", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "3682813162987778705", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "15494543914974994991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "7565221050911842393", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "5629670679897666607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "11754316727756881612", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "10990741293315393791", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "17024388383581997032", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "10302338806536775954", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "7915318733663535312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "13702692566238948173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "2909728331855309274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "13071545223094862275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "9631481972809246378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "13540002981450186147", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 104) },
-            { "7076937538747704750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "18043340998699622388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "7148542290597073512", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "9040046051053703359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "1077773457856682663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "4716188972902735458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "17343050785312683560", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "5687802882700097624", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "3524531620118359828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "5688478347124565305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "5504757952698692953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "13800387305792597325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 292) },
-            { "6574971185849732667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "10573920781439771673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "4992668316921598993", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "15778834188130183853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "3062101811226530720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 151) },
-            { "428659495445490820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "956022649859563080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "13410850301164057911", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "17423645390621980919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "7802311886554362782", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1172103288112689821", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "17353894529222574441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "16431857516454692096", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "9100044555742394133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "13115589642140732066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "16190949264253468961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "7026575758396092435", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "16761856644242716357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 88) },
-            { "6341197991729122563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "17087740929472936216", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 101) },
-            { "10795104632256101599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "13327653786981478088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "1096671695414716274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "10774528268153772208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "9525853014023664813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "10632020369698615114", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "3234107167862677811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "8708643228914766202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "12415368596357091523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "1028160614515220430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "5927467766675317093", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "5275016494706355806", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "10947686124973711385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "3349519148124496343", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4003433148846544263", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "11718418772370938734", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "989564341557094953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "6942049339361951275", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "14555883089089918919", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "15320845027635796583", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4014667229872705228", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 68) },
-            { "2438374917504708831", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3272017687600371031", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16067605128297748820", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "14150012830816329527", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "804195263636995800", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11528417522960871233", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "15378025640603637387", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12860222041026638681", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12725647706191463348", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12553441041059632729", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "12782191856884962803", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15824189967727245909", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11149782181562145291", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2653651564133701304", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3526580286148537369", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3985659568982275663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "13642146548740074992", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "2349007644347065353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 119) },
-            { "6146876760962332928", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17434429579652310107", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9447458159095730492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "8655883535274781128", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7272538316511343863", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17564338309805484464", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "7881187047171099732", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15579919505002150556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "11583017348580874022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "17915846724151945664", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5319668297345215520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "17208186152576814861", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "3633858263279042265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "13853056718266488510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "14759179293743468995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "16995873636564597028", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 105) },
-            { "9438739171104456179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "14429081455612806819", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "9819596940685093690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "12085348936192462321", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11951606039079763598", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 309) },
-            { "8769060267707904998", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17104611871050967957", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2103882464623009432", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2659712601063515059", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "9759380701896779097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "13842309033760176194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 347) },
-            { "2418288192668085805", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14994322266840011040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16402312692470500253", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16955653765071712611", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 344) },
-            { "8739347545059610410", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13459514533473657102", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 305) },
-            { "7824524940405130010", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17796310681498690253", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14823616678465136590", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13816104794723484993", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "846088275031979661", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "18125732229366977468", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "8464582977975377118", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6290317420155851465", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "12696412964119109465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "4994591211723226974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "1036010477232750453", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "13786357802945430475", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "1003101267609305257", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14991602704357959545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "6181308879301978465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15488550074426713959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "4062706195708729345", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "8594644182487917002", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "15881381297320383917", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6040286126398028933", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13926122593957480821", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6213386558868267629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4456004887590847716", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9642229389394495047", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18259656768460999562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "4983880246908724272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 264) },
-            { "7881579844586294503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5331173521406046122", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "3285520504090196295", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7143510787416483146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "13104509059416300615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 307) },
-            { "10090923790949378407", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3429844423226609965", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "706049518431331645", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17193614571243427089", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3621424752591567930", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11066930104187448422", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "209732971447020989", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16044646335477470657", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 299) },
-            { "2172121470071868949", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3392693938352572136", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5495063314176654751", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14553856088069405595", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "4967444801764057340", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "12160764253455777655", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17723621158215826108", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2171768477223405739", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12672995204641007004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "5622089373755094139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2129726780118554358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "4160656836528944651", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "11052732052072367261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "18432787283148809023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "16172528828198474326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 232) },
-            { "16327433707667075261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2797723586312707948", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8451212914744825089", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7025975403069487257", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "8913950860101596091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "15308578014507211237", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "13132804928635689780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "4465781406991476376", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16266491618150971928", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "181006047500375768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "18140951659547259039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 371) },
-            { "272730229972987861", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "14898892437285105327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "17252449599613270108", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "13436376034548670107", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "13787436604877398090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "8873614802459592665", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13663893159182636270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "1361159591875955678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "5912303851874077576", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16245760498096322525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 371) },
-            { "9928406318940388716", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3036512701943687724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "5334291640387922287", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "3002986032379998259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "16469788155263456039", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8709632541892447149", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9524303276541517389", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 191) },
-            { "9354818521586974021", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16781127329510211966", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6351572488552853754", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "907036267078333137", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11855070245618904113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 194) },
-            { "4544242784357021697", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18218631037214746168", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "178353385245384751", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17658152048177750315", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "11636129433022017868", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "2622434279674583815", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "14335074487552883436", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11175955260573469979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "2732519635571994212", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 239) },
-            { "13893789954946953427", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "4355933224673863178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "18037918102910297531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "16071723603031305677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "1697248235682953135", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "7843498978148810586", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 239) },
-            { "6767159196241633301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5097818987523855112", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "6623182990939010641", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6711878663358611849", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "8671491767142900139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "12164298124869114517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "17089801601582809764", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "75742659105146536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4652136280940317116", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "9751582946441607796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 99) },
-            { "16706244336960642883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "12581879452540858313", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "17443356777503458523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "939718260623752240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "14131851237755716991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "7474639594232203854", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 102) },
-            { "14152716242882609401", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7998930863626763670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "10323345824599612614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "30229601562833524", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17788367809717898285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "1509728225855233852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "13139625572508441980", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "16491532291908469567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "6355395905401306995", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2096779676054335057", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4217179485243909459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 232) },
-            { "17101789600628162503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "6139574161497189424", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16559140502701231107", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "11459784003592366395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "7869916853707978306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "3889519976910355277", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12081835728078383819", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14923692894655929923", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "8132521728369930959", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16108573960501496757", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11086699387784339943", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "4013707396889204359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 111) },
-            { "11850332373794932468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 111) },
-            { "14763982961176216679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "8207349115037232863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "3273748387141431306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "580936360000782237", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10682918518101379579", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13178480813522103091", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "11254635684957519432", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "16816222375242496370", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12809199739984715013", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5040730152867713388", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10429613013253088132", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15451919862187018297", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7546586420552408243", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "14487682847898298214", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "3106710091841093202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "6458124573210430792", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9182897385081081193", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14462438074931673266", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "18133334552107213128", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "38736266675995457", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "13654816209891478730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "6263019986730305851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "12929981792125924963", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "3138374672801504481", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "17009318615658405230", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9421643783312790618", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2294026590516781945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "2940027113687311893", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6090625728451718945", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5643908654122573882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "9065894438656900887", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "11185156002426041243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "14670068483447729857", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4623542918584461522", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "1143214652021653634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1434535531617424039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "17025268985366223779", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 104) },
-            { "11507538232733291666", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 104) },
-            { "6149673627320838019", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "16474284418841532356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12461575861709234385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "192209423643075326", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15490478608105402679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "3491333679577961640", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8176012042686275874", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "4282198629458668761", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "689445825453914111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "969746749329671447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "16833026567865627676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "13046322179198317310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "6902644989079870993", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "10987953316324712538", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "12515465135362865565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "10049571207493913006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "3926585856863002495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11275109735493317886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "12238674883388043717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "16108759090923335184", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11756881293845417212", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "17839839336294937155", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4703107905652287491", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18180820925685532104", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3835286851569826052", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7807983899017500046", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "10294185397756053636", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5519535335798045279", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8701248964531180496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "291868903926685441", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15239764240622554314", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "15963038745470172423", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "11428599290755097395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "3180320769716158201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "583303098958523195", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 269) },
-            { "318377908569897093", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7353563160591978243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "2582625260054352916", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5609922876429907954", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12557015880639217508", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "11528310408333718862", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "1471837664358450291", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7351401242363888463", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "953306082374100275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "15759530339367380982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13300022131572486202", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15689502054035168040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "16969463538496570528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "10237524128771958432", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7969848911698660033", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "7130694811424715594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "8578747191812631883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "5197105253412476591", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3120553928584920777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "4750894407873652809", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12667014405537239093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "13644681270630373984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 372) },
-            { "7187734276051878356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13253775441326432265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "14733510474010040334", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3336303478756453360", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16352331970945217438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "13484950419220835364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "4674416595144505741", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14559308665571750465", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4542143431130171516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "13189392239349392492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "7009735776703529573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "4220826666482500445", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 273) },
-            { "14792528369891965810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15287650965861631130", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10308175009371219583", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "2903605246599054308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "9213563311267466388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 188) },
-            { "5019077257951332016", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2497756607567197523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "9285566577169147378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 190) },
-            { "3432296808755992670", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7688176479120305539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 88) },
-            { "8818070832398055086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8787816339967963727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "863952266514375915", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "5835634465164771899", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "15101680837342453931", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "1116274074896622552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "12790788016297794214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 43) },
-            { "13538051178827008933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "16403423801823379909", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "3723613341885592267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "3830703844770425343", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "40704767167309552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "13973028408397200796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "16561224775421968533", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "11243840588602365090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "14103112843209793966", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "10483664832302187567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "8100595788531468781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "6620782733027313312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "6949539207944972855", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11207257238719531888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "13898284586432291433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5120466856097219243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4197617702037834389", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "1249137685908951501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "14716719350966652036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "4840004190985490064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1540041682425757361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3715177305271762194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "10001963042016663554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "481328129206881674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "13404888565084206853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "12348135936862667024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "15471470494305051299", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "6181272224000872375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "4701832665603867798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "2030309697153345387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "5644068493155655611", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4867937397499803072", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "2702144517025248597", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3304589333915676807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12894625941923144893", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11649407835105973949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "4897991181236908768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "12179581684777023804", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2806529556090896246", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "11327228813412934262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "5485749317130402302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3499243120652875549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "10916647716124396856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "5749536453225343663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "789359733867650915", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "12626014184575881530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "1201692134690347847", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "5219399418946822456", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14217181622713951411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "13025323039227543550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "6114147683777615071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "2355214244972870639", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "3167336012388169649", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "12218337369633748663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "7264756313770306662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "10492056481694320580", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "14281201038135286621", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "8127190765748950828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "142486914279119363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "1532263118203058517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "5482851829165191681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "10548792624072794724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "4239415134522959352", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9028970753877215614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "2324120381399737261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "10267260789603562117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "9988801796928462423", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "12516911293946682547", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "9213886570531053949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "385046297070779752", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "12541834857357563605", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "11709992724966310174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "17222005830854879661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 15) },
-            { "475043738497218394", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "1071007164550012186", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6719302427415173754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "10482582307328548806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "6351347283201596793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 89) },
-            { "6531171505861182429", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "879005904827468163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "16290626406346691996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "4569338575782832784", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7575675354187625951", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 307) },
-            { "5795073619189010837", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "15123868617509445149", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5601435819039968726", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14104238386345631681", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17377293745073971167", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12134712464763856064", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5524215233998361104", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1103228955716492167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "8618835732380720921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "15908673392788376468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "8482147530539941792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "9069334144391048686", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "12493863403516600413", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "16692569816843207989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "3438116423688595487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "15602863681196390535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "18277685132620834972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "16541722316343690197", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 123) },
-            { "875142032423622622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "8965747921518186477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "3067806959725855130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "5779388310240896974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "11092828091552833150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "10295330953350618042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "17791773192152464021", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "4894227264080887361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "381149736509958403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "13603318842632052764", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "8929453032482114162", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "7662200927459001757", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11473442921040533207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "388828310152538138", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "1643241486250690844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "11806105193035393795", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8843585527713905568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13248567106128518549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13708979487306970634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "14406070210216948643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "15352245788978088971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "1435153323458789173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17638692805430115529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14068780861332616363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 340) },
-            { "6656593119788274992", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14695781272831602408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "15696910741835640150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 275) },
-            { "15315327794058441258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7545013298074733778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "4026686872534942904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "6553736978928374036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "12129572274423886770", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "9723314434598141024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "11031625790234068916", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "1138439260035360722", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8323445733669842657", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "54019631544204590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8971115542951085891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "4584970211859494304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9321208819255762521", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "12617625046664709483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "8264178890341675354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "5334190564423375247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14746359019867963124", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2044363708106765326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5132761922124425835", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "8141428150264829362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "276407276027553756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11878734040194151073", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11622925573287101001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3192332625020432602", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "9785114056964539323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "9410978119783758141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "12523676912856063091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5912451559447635837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 281) },
-            { "10264913782610095832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "10309083227104422150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 136) },
-            { "8500148569566077929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 283) },
-            { "6578908625437515675", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "13762042713029963144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1561225943337590599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 121) },
-            { "10917498758625273194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "14335423820860953927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4428101657497677982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15901675909820977223", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "7962991673727743706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "12141300895511301068", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "17106086048442658788", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12707946849050970702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 150) },
-            { "17154337492545826355", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "10109431802089940590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "9428176632140441528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "52089503050497755", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12297371032753209816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "659150305191479097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2065752819810364738", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13583166868754499339", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "13991205023798493715", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "8939683514448064461", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18337160891834020517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1154228007901031779", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "15156525717629023944", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7757331094141318304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "16779678846332091086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "5409924335138540834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4149728557142033774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "6443517114667332732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "5419041493176804960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "15948383678216076358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "9604982746455852556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "15739274921308457528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "4642234334824303290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "13200151444914751729", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "16894871557229780934", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "9933958860597451711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "17094948685292534952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "9762182215179534181", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "18273537339378756543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "7720939595094113814", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "5865480930796299143", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "10058165874008941852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "17309326904418811234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "5592428580503282095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "16348402367953880206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "13607830451968188080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "9311802150474489673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "5159470523468873105", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "7975810844103449438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "11455843788148231615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "1410630713443793537", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "17303408650780384587", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "12069726772532946193", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "6204183474669103812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "12874626654611400042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "13546876216568825877", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "2973436171295280783", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "1908809004094565452", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2322559721899919275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5766507688771440170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16626226341188424071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14224121742920800990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 125) },
-            { "407189201971322683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 15) },
-            { "8460847842045253466", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "879896719155824868", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5219048275475447369", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "8707189142909022305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "5948701218437980356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17050143605017295447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "8906185843274300447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "8321769923556905957", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "10433541468308381909", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "10405183426600618231", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "14885109535362957947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 101) },
-            { "72444706264681262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "16818714747882774917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "16236397968499692493", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "700717277178942679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "482564204402769504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "3221221905804708596", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "16467987800266816984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 234) },
-            { "11599932445375240727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "5057534502588100071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "15640202505592598653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 343) },
-            { "3355259926747524578", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 343) },
-            { "9226443907548972870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "8104309105061227444", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "18384657372655350144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "13739257060165119132", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "9810904714798127155", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "15609627722687211129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "14738573151275130683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "9421927854269492263", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "15962533525948221648", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "15856268902838573812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "4085450203909854919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "2370837049876630969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "13464226348405628455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "12228963567837353733", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "10377729875228238588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "16362139250976572928", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "5420766967862917815", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "14578291812739325465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "18310667924071639899", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "16853250891250756537", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "12990341489637414845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "14630499010941056793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "878892264408839067", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "9259437778054905599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "14974730512607138726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "3600066510593746268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "3140230065585683313", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "15891662883560480723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "11284755586130392759", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "2281119269283845320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "12246408434917478929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13283842370311517843", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "13753473508578037346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "17123153447808465303", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10700011669103135203", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "9979259596137305973", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "17225578855755054959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "6471563320494376693", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "8146945902795164796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "18372284940315010254", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "2194607895573544953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "1332624116953483870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "158222105675022402", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "6830387121684699972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "11077503608116183709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "17847109385592002207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13384754476437374504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "11462462742322068863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "4265693151382066296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "11070620435959083971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "6982733543386888622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "3563614453014995411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 166) },
-            { "3498490999014554104", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "15595549493819416194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 105) },
-            { "14532844474906286088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9562291747339451180", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 23) },
-            { "6772239376357727149", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10690972785852373520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "4488336106517889531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "10058614204420018541", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "13865227850818392065", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "14100870590396726248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10848277915422577656", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 43) },
-            { "8121179472578287280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "2502125887857336825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "13192808619929896995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "5115661026367632863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "12812685418923919055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6293403765897901528", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17596685300497748803", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "2150326211917340956", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "530491406341772040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15197248015210313435", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "2816353973187452604", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16383540667048742064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16820082917500285799", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6820284286806022849", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17285815901490707654", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "994182747184593564", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6642767323474835034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "3215659303601163167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "54975980454651672", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "11529876081402974396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "14026570177552137240", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11686670048744589243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6678796313875454849", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "641417817126876622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9622546530872848323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9194788897910888066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "522181557896569275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "3332334993503432420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "16131448347558322280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "6585223640997887253", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6205240287062600210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 302) },
-            { "17522452942286240233", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "6571438978296387721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "15511138074959300404", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11107930597263802755", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "10320711719466983961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "16884228931101540030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "8253823502854784432", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "6025872155179042054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "10173283505468233128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16094174852600023296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11077876432364512822", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10586018593856542117", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3109104171383198425", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "18136135457402651842", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11834683513280095384", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4806571630436601566", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14849108908297747749", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3644282167178264526", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "360872770877634346", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16720108310653948550", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14353390922580547467", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9868561386826862471", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17465517455679097501", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5570311824197099845", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7524311370696987092", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "14070988879848388270", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "8296551195150971668", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14352796912241296357", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9840495023131952174", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4720851194954041037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "13852065717057446998", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4342360467977736802", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "16336482874764861478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "6075691042233712335", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7570346182940928159", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "12971822824884826169", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3033264172690274208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "17301887391757619741", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "15790005937034794347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "15464327246951632247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "5659168916726488798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "8079376692609682448", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "15160738482264643601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "17900257435531434807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16789135236017252073", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13224814158106791463", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5078905972285278557", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4196367396954155354", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7009873605945341897", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7199295899520406795", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "16833854122884184025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14599780481362761532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "2572395498687401679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "11810221946429451169", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "18084635102736402756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "59739211822469868", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5240181393417899912", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "15962137123591591534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "10989937450490049763", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9798585825695496550", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9220830217525628783", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2235210915304938149", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "3930314908786112883", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1334070221835422461", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "6681818065741882453", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 150) },
-            { "6980201892073961793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "11530101016435264783", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "4801117903303888658", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "5782934278345953016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "13951717514084457087", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "2721793280965260548", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "8124736388338424498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "12223993560805441284", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9860570706348640782", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14043770215999952932", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15277856047844308598", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8048617952947915835", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11446745541571732900", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17422822627612865758", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13954144830230671601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "11198908896401597838", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "5582896843095691256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "8133587696326295326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "2007192658799516915", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "9492402787848610840", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "10515519878978734341", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16706121580364790904", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5495776091407365966", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16430562172386510259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "5673972310424776040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "8797843396807284399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "1698321314111848001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "5762290464889692462", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "3218248162832023196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "12988961529988078346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "4232250144427804891", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "11683680166617045816", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "6252429564537528709", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            
-            });
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT2_B1_B16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT2_B1_B16.cpp
deleted file mode 100644
index db3f82754..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT2_B1_B16.cpp
+++ /dev/null
@@ -1,3478 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    //SKL GT2
-    void tuning_cache_1912_B1_B16(tuning_data& td)
-    {
-        td.td.insert({
-            { "1375156980278317418", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13455881643467418059", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "12788968383428254917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "13131740479277027362", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "3390014193205017427", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "1270467775674221667", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14462744723628661203", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "8203171222962341018", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9795194069954915563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "13369603621524676979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13575423234109624706", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "10721885719016335538", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "14567947256029724271", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "10749263296616139689", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "11717348577195224554", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9275303306340702111", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12245096462203481681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "4999505377862312410", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "15272426400992401555", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "9325097933807426691", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "18238669114790278675", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "6664482192233202590", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7454366978268164047", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "16135569134646688251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "10572945270796129630", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "17495198214524203238", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "5221108094913859739", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "1092633914190498221", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "2738256633362038820", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16689586259416414782", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1525652349412826502", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17683302016987200208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "5615525527388396983", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3992735701291817771", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13208739898218342989", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "9536348721941264933", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12803521018213865796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8854783036772473804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 264) },
-            { "6766480740724769248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "768423629375648579", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4044100281521441011", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "873240542570331563", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12875236165672036211", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12008819728839685704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "2486645741683554648", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "368578589584714524", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "301201776306602054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 99) },
-            { "13152181652632422771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "10311747599696543062", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11258322449556590366", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14095734330183410835", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "14910223536998380801", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "3352689317181436056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15832740972576959202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "14732184525012592889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 136) },
-            { "8421045774757048067", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "941232110069825628", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "8975333906619899020", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14800592533315327674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "11816277809167487786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "957781751038897330", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10498289589469975939", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12970943403831707924", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1300292367195167745", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3399837016486623477", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16740871614208968868", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "71587235425438167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "12717047049023783979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10478482486372389470", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "6056581247196718403", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3780320160034246719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 273) },
-            { "2819320453491169732", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16976464773806576190", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "13321672741246923341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "15140532227060261467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "9400755775406101904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "10292585962794261197", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "13048561902713182858", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 274) },
-            { "3658425022428447440", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "16947830954662293793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "8397584983137442239", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "1071169341660439058", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "5326247361632903583", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6214194654733781771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "10025839973092358719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16711955423531846725", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2915165824085219545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17108987360340581555", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11972097635078477347", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16926950874716567095", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "1212319037405620223", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12397280593466519809", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "2609454334520044465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "1336940384521633733", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "15271783562528081169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9533360488591027707", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "6930697835136176263", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "14444423571297570985", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "12643423612381102003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 79) },
-            { "18423051691107460439", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 99) },
-            { "15381833359831622179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12040626513219974957", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10647227605517025377", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8127570953237266335", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12876112384009608387", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12663860560275361463", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12352923639732112511", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 79) },
-            { "708452703070938673", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "394778201589371681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2477849395789783501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11637325834858582585", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1485662490111767875", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "4300306345092124175", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3402183863499902145", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3217246278485567748", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15713964605078748923", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12293786134765875615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "16043683538361975370", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10670103699537731664", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "17854578307286932628", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11443268857010762276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "4479117540570599742", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11726298758004767743", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "2968031010495399536", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3797957937905580811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "1474271081523145413", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8526484907799590618", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "13723543003759101485", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11728824117049687850", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13268525255152984893", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "14397348576352573007", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8616686489737649890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "13176385389367548697", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14990645740260870030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7472330881076141262", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10892456883214928095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "9522661528867955338", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17856816245251319111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "14872992823083730615", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3106591708459602370", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "11609821372586026178", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7678457226823073886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "10118395047539851751", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5389189982064081933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "1742897526168249500", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15331103261044247142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "6644418194983229139", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12478309735214802531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "18012549942299450620", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11873734271080160669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "10424278617647597641", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9553032671453999824", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "3860603464276263676", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1207026216972160297", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "9519623751582710696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "10328182165125764988", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "2231648183489019418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "17599383258252980421", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "16208488491972128275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "13379165253894817165", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "2566302789609970663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "1478419046264331178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3087801652564627458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "16103943009195163681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "14230385851791760020", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "15293727142789007900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "13973179950424276578", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "713121569924250372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "7947870656736319919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "1663285216972929652", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "14767888121198814523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "2124033349728954551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "8762901342272872498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "17006133396401462698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "10783981060353445280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "15110359240685619357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7875272450497189442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "3281207855459771997", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "11932770338770247767", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "15860915170591763391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "11716771904412649891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "1095495157025479260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "8402692278765063674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 133) },
-            { "509781001842353609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3255465741612432300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "13439896617880328331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7134654288295280046", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "6769243149577568817", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3480732841490521799", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "18269685060032395235", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "15649927926091502215", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 133) },
-            { "69439315851965666", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "156456996459945842", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "3012566432840424198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "16431165572426232677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "6324565723045697080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "5390559917122707732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "5469227748156438008", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "17163158934005653629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "2307310127637739872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "1999979442136861875", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "2527189070714658176", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "8329846097322076175", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16783619135298589974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "12214162812589030126", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9216608098626790565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 55) },
-            { "5179760459095053114", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "2452226948562393335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "4499586349553581439", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12668149981216388765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "2287356884312581209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "11115684531624462986", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "6483208845600234755", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "3752171257634205726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "1774158624592967937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16881283637687482989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "14749947225382670869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "7351733901977025859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "435888248913413834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "13713406612642090169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "16582132711225619740", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "10436819182310112786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 55) },
-            { "14546281065004619074", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12558716383635737426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "12609361477548272638", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "8107447526839063293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "10995907213890714701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4871907623235871050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "7394217382008802567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "3880189981766119529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "3759057398165607194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "4561874206785244358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 55) },
-            { "488298169768725160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12956726277674279950", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 55) },
-            { "7177837234452118325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 8) },
-            { "15031155621982459860", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15223164574152266895", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4834446692898125871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "14766477690417085350", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4461989328775275994", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "10141927023849730720", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10837496380266058422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "5012013738970489338", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16839741351990811959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "7846384623429362522", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9193880745263317167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "2863465257341735941", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10447947790216991304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "12024817951074673335", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13474805373264874144", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "671453551040072499", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "87031578643428011", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14034525799882831106", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10864011008000364415", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5115007207028125638", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2866656294663853474", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7913076120244203725", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15187035463799513424", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17778091287904736965", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9562527071055150197", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10645625090439446714", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9955939178447682108", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7450417963648518926", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6648876837655776653", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "1520529227443340435", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4455369117448405874", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2920840796593281126", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16341722570340169855", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15289152041466330689", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14362876471450307424", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10330180429524641331", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12046017161414846599", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17228810554159747400", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14835309921389262864", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11263540528012919947", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16139615240471264488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "3820661057776133570", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17515847111676784130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "4252157815622916471", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4819131094439732065", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "17264010982688979937", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11277866878590984477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "11324651029379152442", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13425251102263428554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "4571404165794634411", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12279771749366327372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "5754396201681434378", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9809458159478958866", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "5459463503840817402", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6484375582324852109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "7005509036795164602", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10785966734346479177", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "15363606233048272809", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4890043345392707202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "345043289576587800", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4804533178560338520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "13328911884191551889", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13302687772426736346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "15231987838322151865", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17214254645087272557", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "4849343880559509889", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "851057218719456209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "331661172067077796", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "3017824560305532066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "13596876807637507229", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2242602888499888844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "13264617841270329349", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11604794601689380990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "7770000755097925765", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5008350851224686853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "12166852830214895457", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17672785701483179117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "2439993891369206440", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15822546325822628634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "3056212889689424946", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12712071520541638451", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "6217542346826403576", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6290584630172122012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "1245259979364728404", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13006774775034887171", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "3725013268198063198", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "1359720957005310113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "1354647381212852890", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10480527638577674825", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10883992248631603006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18255227391100087860", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13565691057064774487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7954972694876158422", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5118467701668427545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2339864165283480961", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "490931535580183607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "150132162949295379", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14795618530175274538", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14126906427006602775", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "905526102343710614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3385797925880519845", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16238415425814188039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7107677063657303327", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4098191685457418125", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2936333406928424760", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5539793555189956907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10106454449619141260", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5346898505346646714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11807282628372660280", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12375919467924385618", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11705756153433897198", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6651389480007764007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16911464046178654033", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12495003066477974474", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7650862961269327235", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10709828018763273371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5044721291675005144", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "18427056032084727710", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1390379098099686972", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12054200116003751590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9500850790449116723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 8) },
-            { "9057036344533510776", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5093049998173715787", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13761566845514364807", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "1594612401422787491", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14603590053512154268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "10136369729388564720", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17050675313067213312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "14221578799010900252", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11723735945517472199", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "13810995219720233595", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2704063557078535883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "10384537928514123040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17427036330773218054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "9796621763733208035", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "14046114605615338907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "5763440554939527411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12892693137085610062", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "17775705003104146872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14878347463243157447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "7368916076070115064", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "3499109651698979012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "190530884420224257", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4202645222013675478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "11324851661119942609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "6232363902828992968", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "4299492266819967844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "9481675228591993785", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "11772741918108731396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "18419183012101393192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "17832542092610191859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11771014003680394135", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "9192665896782282996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9763310312421884308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 278) },
-            { "11430400968543668873", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3430266954211750407", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 277) },
-            { "7172604084103519563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10306542963828398049", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "5235375820995365354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "5091558853871982858", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "12914986936318857086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2265784112305305260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "9019388470685749691", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12427258337646070422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "15884763176333003771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "7211355951470869591", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15399245700982979379", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "12644942072153919043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5876880412336151866", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "13775529405693629438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "9048522050692986204", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10642327923162019888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "6410682026872155392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "9454954846682513038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16463823433924519300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "7279393739634103483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "13358283026528078900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "8032685176029570383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "949330876419581703", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "17713034180977313726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "472454322186482185", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "2727219457659794468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "7852745450437172519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "6065819201836017182", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15984885011101717258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "14811022197918391667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "16146350476627599543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "16173557782125372935", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "296142385116663420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "12655099960717366198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "7937870623766562191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "9367157746678824712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "18062849937960759210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "11919129623429545762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "10522649794540845800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "1104489643524273315", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5419775002149092646", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "9226912483632588371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "4958222070605478947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "4479979951990338510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "12022152681602871455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "5740738339752793113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "12087141795291232248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "17825280904760131680", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3974589991022739479", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "1838534101161814609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "10046663998164493552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "2305461098719675735", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16504962609450876148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "6345550009198921347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "11239754372812258455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "4347816192417741558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17809920600993699808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16710010075465723498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "17729546848373991614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16998508915819714690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "12952980509662451384", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "2683507674615735878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "13059207969254830451", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16295660312557315941", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "14089893422771228191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "18034648276860485300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "17739868787095417856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10880081193716628051", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15916505622570323098", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "9101018613418825655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15650839696475698676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "15628121900226431719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "14554225625951128811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "3134489458855347772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "5627834277145735283", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10729288973933590396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "10869005786136023160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "5597908143491399643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "577182964135927041", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16947969669087411530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "861419637283812778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "3643250372952944907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "17977676737774695825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10309504812060596568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "8866736221671835567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "2133849627845285277", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 41) },
-            { "13902214851539825156", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "669771152920944125", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16921939234324970069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "7649413902932043811", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5658664813683907476", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "10071449674652717890", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13352000946213986936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "5291011077679733990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 372) },
-            { "1458615259705605525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "543472136359161929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "4644580321919256401", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "12946531140050029900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "5010119207726811326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "3308770992373192529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "16913004986170202203", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4079026972040047969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "2683304757433993300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "3141886504884887200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "14444475853714164129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "10747988576436391912", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "2722124265986526212", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "8856888761246057127", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "1902656726461670148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "3337625924046561031", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "10280619408766255552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "9695024256541464964", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "6733731409232284409", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "15805087418686802636", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "7056030150365552588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "13038533272699602337", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "3737576893817599311", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "8761283252495354972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "17549411807772646930", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "13124342334495538095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "8576733135863336233", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1082586642383386489", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "3217574161785059951", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "18357544235608006954", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "13954821927253849036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "16158139166784964096", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13558687084677943158", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "13809898858049445969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16862145184923128012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "693883892843558363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5393510569127725391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "4533786844080178561", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10128143628088846123", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "5295693108687178880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "16425665058951535484", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "1398177377739338750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "7407975398526425554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "8614534946699754256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "7372956570616880244", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "13676654389512816868", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "9043982883185435219", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "1626430741965136732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "15295951849706930711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "1075027491444288875", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "16084700435355748612", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "16698547937652264447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "16729849855476690294", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "14171139920084409181", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "4264284648458489052", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "8866716292621164810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "11828175723996627443", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "11164519756679631743", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "5558136691773431495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "11031569203645035546", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "4084026445911476156", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "3819990462129075757", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "10055549084854766170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "11657946392097042544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "16768797136991242472", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "12107262410635772120", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "938222258370511187", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "11727227430687227444", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "1040650352205493707", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "1563987925712579649", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "3870539490799697188", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "13170441257780067955", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "17490471699618303993", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "13993548620104010490", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "15728009639807698634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "10991423760161409883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "7242013296950669829", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "11744368351982723504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "7314288062932060863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "9299299311101549958", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "4138968242532400395", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4135068756462147853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16247399911710810038", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6020017927557041768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "11265472910579659280", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12512751736409465214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "17015328096102652908", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14147460733160099960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "10811837819834149164", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2173867324489962689", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "11198301748997371475", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9741607635826869269", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "3860667078458481972", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13590444711975157776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "1551596771935253711", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "632116056424249698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "3499645386058307669", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10471519687597963116", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4429109491655891299", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9439431829175743345", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "70580716590540876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "577844026691991089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "3873183249402084406", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15799159401545270696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "18154019240019929225", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1569043950563130463", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "4491380839102267034", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9243949750444156746", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4772696293208603817", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4927360358387344983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "5770286476124511234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "17084977396231597605", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16800575429414554907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "12793908914872030220", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15947699374684516369", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4660288622381620227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "15914512645931208899", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7460672405409009037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "1541754036637209097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "89439319782574517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "14088382963493477342", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "18203935818408469865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "13191096881934434519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "7918742312252115870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "15641537661939240413", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "157805434489791310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "7941729567451949422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "10628725059172743408", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4492673409319122180", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "15857087373591747006", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13793441296561946357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "5172712078329324967", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "8780604510524622314", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "1760690277175249985", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "13649894122307008732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "17546566148752689536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "12675313398314286884", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "14621327324047759584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "14136097914489095982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "7638626850074132214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "9399994156762372761", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "18068050257421269408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "11830297960718214360", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "14959566236432790882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "16884396694505987920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "17947818179123182001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "9381304526221508530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "13932662890258900896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "8268533335852735248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "17419874083634480896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "12773693193167844110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "5157249499936659040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "4282661608732125403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "3159147743553063163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "1706927777850488363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "9839670675413379092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "6780215829176686721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "12972634653821069685", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "16129296588866116913", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "18202222342562516071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "15426960908024585800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "17026284168840448378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "18118237182023167949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "11113256687741667688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "10555597973766215754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "17517495652165026573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "1832310305089212990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "13855438905855887272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "15349944413643626251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "4738743763536059708", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "16611452077660879545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "8101977280003030465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "2012181953284568566", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "2969389503332309296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "14515066741400300669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "9373353053843326128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "10023279637210292010", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "1103204698908514224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "18092842590142527927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "12174571114411168588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "14431607479949498164", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "10279778381617181802", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "4237276338897143680", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "8083672466967374860", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "16705621644424684055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "5352861363832390974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "16945184617367657570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "2995134938466176198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "11706378390483804857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "7958459862276998225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "11703557271443535142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "5020788604681810984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "15217183882858251099", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "10650698451740924172", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "706370730287471796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "18199526506796726885", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "9269175963143039426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "3691705516240577130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "13472532612464340803", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "12388375914105990324", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "11582534256623549131", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "1653274345637156919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "5893940382830835820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "17700958439420868719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "12730339458081890990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "6631816968511312100", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "7000524935770116969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "386749666417295495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "7162575953766465459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "11398019086259011063", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "3041612155708729812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "4274801141127703532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "4865023158176874622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "18424912460022156378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "10408322429232132983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "5277400567128489977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "6848989271874647093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "10085059621136526248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "12962552332511702682", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "751912075185318190", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "4505008254511324231", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "4191326605459754690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "9824678205469832038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "18245935804520236353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "12309132521191764927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "12843671306854567956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "8275277322582733101", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "13698389420396031586", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "12949204491386872217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7370273921473161914", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "941829593638869991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16206791915939407806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "1500571771538985941", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "2095802691829304676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "17542414935564676110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "12380856644683171627", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "1451466106918423837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "8071957466247137919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "11661208196482963286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "6635217802203685464", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "265124365266629363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "9513032457323269513", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "11814740669468421049", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "5221320470007950766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "14359530849521980269", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "6181651715051152713", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "1450888744802985214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "2842103889477438816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "14006248791647711759", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "7072606962946873975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "3599823735065658574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "11311859068168414878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "17525531790109748810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "16749148369456398030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "17556238490521153146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "6067904130482758510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "1791615587935799399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "12985650543127289023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "6714886136800883594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "220326805056361171", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "6777045876155144709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "9454512817077883797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "14011124615649605281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "994489782629179836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "4338023436590582323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "1152693503778768433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5994204139128667921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "17243576882981097341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "5524218746051008792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "2669822154816760632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "7179714714302073459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "13002363400738122017", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "17006095064160484022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "13733327241591630239", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "11942736969933408358", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7869779894480025247", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5735608687257018419", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "4346591404756288097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "805131056816361237", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "16910952799476896905", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "17512961503976896701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "4773077837537775324", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "12193395770362986433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "5740745357953479527", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "9040145293899470160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "12755692101476964677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "12467673564660108244", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "7432142107544210174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "7232326270078161768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "17238880534517721334", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "7235358742317442134", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "7548031489690889629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "5040095338370816349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "3816674884393241704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "13919204232414535363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "15589007878875898942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "17711453305763476458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "3501882025888946886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "1171681987783013074", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "17585206779958265260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "17046662043776372746", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "9208964785762052001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "4435224497850514394", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "16728762255357411770", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "2968439898708528834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "11845189428639322474", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "16616945998593626851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "16490405739040977260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "4974320417566990034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "6428098122005804378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "17281826959243966826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "7369903937189508744", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "9111988592015450418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "9119618606914671839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1711220333751274603", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "597650904461183283", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16888412539296862194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "3350601287664242323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9702618600245321109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "17649961873981897621", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "3244675355773468991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "9340159617983543624", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "10570285542015420072", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "15968821946892330559", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "5509395737020858006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "3806131437010910920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "4523064418696274869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "12004552919019936392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "18313088176414428990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "5649150695527000655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "14985755375924972050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "9441060601228656341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "11421180829679625737", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "15770767768674603174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "12055647521556218046", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "17908444616754154471", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "5568753513029409478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "12417253210787537988", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "4046830923427667342", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "8108933468437926367", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "84595904778810418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "11756650366229979428", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "1617135706549276688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "3011188207492335920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "12450814729547235386", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "1157947252370351851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "5374664689223295796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "18215430801133520364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "12936220888307335332", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "8746621720912032145", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "12003323477818208825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "17170858505976681742", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "16566128345135114558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "15690161340392005765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "60267878504897170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "3501667344669686338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "8690196189594920365", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "1930929857644673460", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "9671459469252116568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "3266557807508325807", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "18041177945345031826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "18267428053198215471", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "18417288692814472127", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "14031009077471784948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "11666250400445971335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "1367483816197881270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "14248239982355212178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "15820359925623438341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "15216108478837665623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "17489680436564779197", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "14117801387057507639", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "12831123539633580270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "11337525286386930242", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "8431759922045602848", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "9601412379897937608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "9152433123828445089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "3118602494449249177", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "5159738930501638535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "5060012838564094182", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "1905758333157310570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "6870942166356599956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "18067291256808591467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "2826762745628486040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "11841034668170849494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "3034482898462686729", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "15838113905712517735", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "9407646138658641974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "15636128989267984459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "8409488188696700816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "5720964268093705079", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "5922142661777925178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "12900949103593247293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "13483088320871913126", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "13960388312976163971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "1843555260471832708", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "15739278428190392018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "3868149953087814447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "6845814820599174031", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "6203765709597125063", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "12871555773123368130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "1237920404306733800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "7669403041163460089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "6791806088355877039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "8561261337239934159", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "9580986168276580598", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "4708035980731751007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "13734043898517059207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "3177304125602972370", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "15727611564408173858", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "1632416005093914709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "12253049204822930675", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "15078168059698267650", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "12522495848240087966", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "5074273865983613482", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "11936530628363072904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "7870154008378361670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "3774285301357006334", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "4848143712599565301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "10316451248440741901", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "733956743303342862", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "16677044352793659175", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "7075659071934895087", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "8803037667261582905", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "12421204749289937399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "7330202944390548890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "10753540518493641553", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "9999425239167488495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "14001406016806064079", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "7565867291827884997", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "5941298590926032148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "10130171279527667782", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "17344974951998490453", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "5550969016335082071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "3398322619007806698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "11356842300444410831", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "2623687018437195679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "14077148976508649021", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "8272823732258536202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "2451712485584835395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "8057302050645780813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "7430073011895298582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5095827462645341808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "15129834325410878425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "9660812093766156608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15781622938833984014", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "1089679781525023551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "6129602738379919488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "5287076386757143976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16076153317792960383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "2108296560864415762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "17006655627343469372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "9404677451270692749", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "1372939511728986224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5311718276151327830", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "529543453251381109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15591167992985613695", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "15026219694198820614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8258382025812748961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "14810839157236175179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "16117738994809548007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "659846949368492111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "5211191663202250117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "13418701036204748812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9714764457768279762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "17310332946322628458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "15975964562807570772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "13447028922679236865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "8337820318779061494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "18136765667969393174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14821616804286068969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "18386376129938707290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "16609136488331186895", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1996860183441418841", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "6491244517639245276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "16312223896859176991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "17833517350994024381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "4226968857681929488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "5141753233513623264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "6860503758000008398", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16489624657475712467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7862815466573236157", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10679760989906275129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "852092858392507925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "6996376303337512293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10978173291465325823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "6670327979947471550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "11318913630213187720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "123251351612308092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10784073615329190425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "2261453441277654139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "2937907409658060025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "7852144838267007144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "4408772370026995920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "15411474884532403722", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "9462315044265139531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "6419580456182610836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "12277470820821378855", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "16865879032845300007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "2862999234347597091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "15447513376965243034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "14420809655798184553", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "12954154886708228545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7575634241190730697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "2344498602308448450", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "4304041922043496030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "10971070835319242371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "4862529593282936100", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "5312140481706133684", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "15522785615618973614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "17798636687709019154", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "1938086876393565238", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "11897113890115321056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "14363654136811880073", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "3928266232090746643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "15882969506682501496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "16426179645101678763", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "18174857480705846286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "598390166442977699", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "5522698342845820411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11559360678008060513", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "13184662326021747000", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "16037141448095945650", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "15094664469997373662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "822162932339827810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "2597453794298356435", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "15851356529373376076", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "7966454753124154534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "7311120574972466702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16461809076899645037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "1591199515536783245", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "338716975932676215", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "12165079289914715018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "348058686961206025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "17635171685500922207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "9643408025778914022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "5145853681977610916", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "15155676074658242659", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "5269172622193124300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "17037462814585846902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "10100237101982273901", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "15322609677356616580", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "3399406641489305996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "10187930930336324253", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "17252589865292797082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "17922279129043570176", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "6323083153920795679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "9277176009071334860", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "4313392430539923574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "10883341041912056319", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "17310409067211414565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "863057075064640334", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "9131235538209388787", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "12868739680413736657", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "15901724303713479611", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "16944335478353845609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "14025235562200209723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "6556424924189200804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "14398854364550406668", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "6577505360421510286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "14098811155652990436", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "15530407024531326375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "4466647043226271996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "4121109463284708890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "7916244303189113815", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "12309955719964788034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "10133054058562198093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "6294240435687565243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "10178145641713631806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "7585184325339753737", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "9222744127882324405", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "9542325095876448686", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "8155268141318893606", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "8541982562061181756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "13472577372534605883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "15980348884716629349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "9737565171095493297", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "3622409603053918029", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "5657471280535146301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "17025324057045572535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "818998169319147148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "1680468564927032670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "14466032674083938714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "73865742350616903", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "13833960927635646899", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "2783577080556699089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "3563872903821081702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4387041763614917736", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "9714508918051740792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "15412447128995361859", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5965451243366505522", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "13856271274572142709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "5156033406916344703", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "1018687388655376483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "3779229442395464456", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "13448845356783404653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "15578456771467281881", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "18302892230881285207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "9737833587413114584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "467975197394411990", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "994842991399671507", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "778476198101178556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "4769003637955328938", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4914474312076193952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "4091702228990140696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "7602222004475424358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "14544219140091420262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4279062247055842367", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "6603778920476932267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4959403414256988744", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "1425953627379976115", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "13477548641580029772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "1963081583851864291", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16393176054374397767", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "11132679855317294753", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16000753982895054944", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "2727175120437582536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "2921118493468368908", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "11626398907755088688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "3224352307778512793", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7780140599533242850", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "1270307036687208396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "5911282942658469852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "8809017515482311843", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 280) },
-            { "11655994466278963438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "6981537186704688907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "7903891232234389925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4229105529069729944", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "12796777049340516563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "14289048840489035546", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "4239133538073498792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5103094815475470596", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "8560635685184432720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16264774056719724826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "2571882179292959757", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "16758962840329202004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "4550028191070279999", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15661322183507404821", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14650567822254940018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "3755253206085028904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "8751016391945753900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "288853243482418538", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5047419871737940985", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "8819268903800581706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "3746573775462003750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "16286085532892593349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "16547425454653232058", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8195881973746570408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "7712831597869354170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "17035903590837750750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "1907439276166837309", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3036808833459559381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17928043901784474130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14667209474639064623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "1701609125136907870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "2140514316203117958", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "9366201112659847392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "7808544677773370430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "2251029128552117936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "9529614587861271730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "16811402686462277562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "10554266898346470422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "7817036102984218692", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "6329618009202266591", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "16936366288366370882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "8025053805734757314", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "534032316469702287", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "3963106895592011725", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17994361454416813294", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "14902389080201926109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3796274347773622633", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1306339989221885682", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "10900880512948479338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "287386909600391846", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17542176922797334839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1081962464388501987", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "5831419373611158773", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "3179874645565098825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "14906458674793172507", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1934379409955686502", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "10178951466584845110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "12693511427898130707", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "18137106379929135901", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "11619548409913646265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "13317417676446624018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "16710651492402564794", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "10967218651864700933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "5381578460674280089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "13026555349791486777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "11913020016435860608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "8260130048649729185", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "14133958262039763609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "5585398540591396124", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16442107352245114876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "423221712829930726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "13550435052563656432", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "2440366541074371090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "8300655194765375060", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "13163146272900339330", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "5406129421969383274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "15118142492742177336", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10727592780669452048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "1076005730007872492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "13699740641705514374", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "13054405729329143152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "13503608041359512", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "14385185911482960528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "11215217005872946038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "4099859307693687554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "4408600136502382976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "3037042229494600258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "1155389358857780776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "11461581290174106570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16896833230469488924", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11469881811044037340", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "3003526572122876385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14251848023416168295", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "17248756229500447131", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "929378940515745198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "12962558681443556219", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "4481903208484313806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "13558618754911056302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11455518069358829249", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "15890473622821659630", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "6942622405269419082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "13890118723041457532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11292995457386147494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "5077214229434392730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17774424004510360936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "10412588668458621135", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "10771803503544737080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "142650579335909103", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "14116800584981026541", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "12995903177757437362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "6143200133853000387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "11893541520830049036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "6310724136390087834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "6391201577234440562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "12058759356433220258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "17152614235879767116", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "2111669705686676421", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "7333511810266504718", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "7397341452130124383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "2939605281692583169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "1644335606100150388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "2394023805427701338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "12531580106484042446", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "15586047342916704364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "15779837958180258409", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "14123081378489325832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "7818381040882768404", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "12510951219501865365", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "6156831095718536092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "3568514382399560386", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "12065769091972094756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "5321698540631249776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "378801963103874857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "2149582237161177965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "2770397466252831892", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "3039528482572243879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "12577421746159122264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "13553263424160050064", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "4021558014531645922", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "59356084516953804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "1170380397764345558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "13094402291968806996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "6713985030102340818", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "8354579049246302728", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "13815395589135469450", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "13558656230312558247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "11666226259183201584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "11451740938287179908", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "273242667845386507", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "16587061389996963349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "7119182041840303390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "16292848987976256449", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "16437124655147660375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "2495655464941634884", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "10294610483561043024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "14403132596827435096", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "85050336704401597", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "4450409744922989123", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "15528692642731712121", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "16661843849495077745", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "852015206582470545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "9813748068195103720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "10544034939133448916", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "226601879759378771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "16432425079146486467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "7274179284676568361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "5184121466994451498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "3538679039078582272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "9920155432685318259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "8859895010324601937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "14026537760442360645", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "14349625788399542568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "15065019229949449623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "14115742296883450319", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16748662918272106932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "2273992727647793692", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "3190494353583341446", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "8837721075413149240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "2817919813339364130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "14263790627243107300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "12866217660635921034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "290134020607738418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "17207560805775399864", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "5245526691775741296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "4933831571091731212", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "3872151366780051246", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "3541538046227217664", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16182470664818268848", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "8519354640245415816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "6222595759158615206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "7201521533301617290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "15497797842820949408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "3219408878901707426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "2188101366183302888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "14079654309452583394", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "9250410390663336388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "8787438180071123604", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "11799179287124317845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "14206076551739831333", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "9468684953949274635", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "8543619733732987550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "14159596290442764023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "4378422094110940766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "8505040075968411726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "10914921540144371519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "3515437649977762166", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "18035673326929466074", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "9390478179772073718", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "6254141935545262078", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "5955575949957198434", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "5600128039063009632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "14114380593731243715", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 168) },
-            { "10728212277329722684", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "877436308867220589", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18375125668176498051", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "6767245864232675168", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9287404618748313247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "8728178019712933221", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "18251360413872841969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 309) },
-            { "18271689282126907793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 309) },
-            { "954796765467489259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "13597240991532942069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 299) },
-            { "5079055505117153635", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "4135003545872878882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "11883485911218628865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "2242915551775617989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 310) },
-            { "10556089809203693400", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 309) },
-            { "3727142736386026852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 311) },
-            { "1622880009460832832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 299) },
-            { "4437258459981739942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "14691372262153587653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "12181607120522804433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 305) },
-            { "3159681096461848644", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 305) },
-            { "6729785110495533200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 305) },
-            { "15322019609805777935", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "7024495439434892956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "10416622008071151225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "5796500397424307442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "15702382940521972117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6093575518270471235", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "5805383505505929391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "1801731858063091191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "1559798212423183813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "5594180958505308003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "4766071144928072260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "8650948093564284852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "3883845471211207871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "4366168099274266975", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "578703329577922869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 190) },
-            { "16863960779539003201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "15450609897480659306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "8203550467004532364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "7431849514656037251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "14484890926084856480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 272) },
-            { "7777333052643961206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 274) },
-            { "4424217045094988504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "7994179151788368291", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "15192024816519005250", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "4747159205186229582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 111) },
-            { "5485971317082563152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "18128162750557822655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 101) },
-            { "12421707187947291166", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "792684262493086891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "941626985322260281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "11868551452004726281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "14352303529756685990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "10702234389482091891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "3895088069642140043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "5334566325056222430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "8306337702797456793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "15720507574336564201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "3277243911383750280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "18150429561058646714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "11169292427557543138", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "13933912937625580405", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8792010676469476740", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13190888313721073437", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "9477562342190423343", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "1202292109713947702", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8640150341228170279", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "12757611260347801001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "7183578232279711009", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "8984436655107983227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "16397733032387984819", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "16364494883229084045", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "11800783548769329949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "16065744898134487748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "15800447082078291243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "10090036431487700311", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "14045927407431718832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "2162882863309264684", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "16579057939215877904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "3988024997010367546", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "2066731703492755469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 309) },
-            { "13781423818051299677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "5211831143687501130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "6863331059471727622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "6403698142681887543", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7481256533438761028", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "14091610802555875119", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12024143207855886580", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "10170577772376890221", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "721174714308243785", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "15809639778580769565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "16667887002111125871", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12790570304622911607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "8567667881970262923", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "10576856554114055028", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2777318471329665162", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "937159502066696999", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "11087413527078604815", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18186615266760475767", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 275) },
-            { "3833510944499257797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "1218323229202187514", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7683334381958571864", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "16773645387243701837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "16958329690837977102", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9452470718398027950", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "16511393582666965704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "3216793152416217495", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18416908414174464784", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "5498839261395459224", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12198263593657033426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "10014448860206587805", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13330734840729670622", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12676167240795292217", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4850497746076450913", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10016815108730511683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "17948637243158994878", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12259844988981080505", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "15078590909693331731", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11988285441493553006", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13851240591038949807", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "16588325081458426169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "8642107585829380438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "6219075471508685758", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "10546430708947911124", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "2613462626256090659", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "8295126647635181949", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14213516751025324346", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "16509472637458153234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "16589607587365212240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "6988674007771237080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "3448477246688526708", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "8507854696766492454", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "8906588133431586825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "654122557966242717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "10196332102593337214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "15831600396403741571", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "17808913959977434594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "15548971488532746290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "13468713306678453952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "13613399861925108148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "17802514063213000148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "13093429681061786539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "12247991248100147706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "14491949194619001237", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 128) },
-            { "7590767013583950613", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "13210604117940125947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "4670443882075998209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "2857337999074313592", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "16036386660666696362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "755414184406250882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "12190841837604350271", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "10292243973236220688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "17793292063552633023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "7605139219344415117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "787363431787954804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "7000486794832106857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "13608239208821071914", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "17281202179589913619", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "16985912104363932350", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "14744368497944610864", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3737552767159920174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "3792945601873900927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "1364546124782880196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3689722043202617487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "2632535010129224704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "10968768803038046390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "5353552956675518468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "7866128397931438774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "18233660940545931789", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11670430946096342056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "2627779045483019709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11066913713501760080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2552187713769926425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "654821507679356726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "7606728651572102823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "7549378486471456156", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "15410074937424854348", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15114370307779942381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "2040762223425679479", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4803370483104261655", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10415046594066474634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "3441335188113424896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "9277610800970567810", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17179609670678746034", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8251544171504007740", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1353170363915443814", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14540578324750869319", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "13471752029049484143", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9062774198518904260", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17917978116807564183", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3017411837779243878", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12992061224471212714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "13161997040644039778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "11724225282274130518", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "12822126914959112382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "9423958333298993923", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "7307271009495440764", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "17746215841755337461", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "3976736548270395981", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "1192279884248226739", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "5538883245745495145", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "1173986078589662704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "11031358859656806724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "4238885454989272754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 342) },
-            { "8943913562339525413", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "6931953332823066530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "7799984350284425885", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "14204609663091442879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "9091110033424983286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "15829095120243431195", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "3239033622277917802", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "7578177053220150569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "1089944493540593798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 342) },
-            { "15529757761327002288", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18082422341304348326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 342) },
-            { "17219920118109316867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "12026482841341343242", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "3070859615622845671", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "1778345646142852816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "15188570678726970998", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "4750513665628842598", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "3372770576629463160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "2983038203471784211", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "6673966852801136416", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "8792202318168046223", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16441830491664937048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "1419073145594317633", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "17525564757769958678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "13468081302022888489", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "15914058104244750036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "13760645810144930270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5963901433137582265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "14668725050395069435", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "12112853999307505628", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4161612746310931789", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3388752887767453958", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "14046990030104971367", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "16230621843665445228", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9274179337770060652", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5115134711994944288", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13898821685774165645", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "3007637520820789085", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "16294825599850364701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "14681717813022425567", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4915831715914920982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "12894240573737168362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5448537627319798272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "14389915292223442327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 164) },
-            { "14274685812676150168", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "7732899312577293959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "11956435900037329302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "9263063714383940562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "5824801192141531089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "5608133987357542077", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "15392077168521832549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "16446533347502650316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "14762599606783897222", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "709835724029986012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "1572991986657256775", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "7398196853452900099", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "8140094412609934765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "2659031931257084418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "4640028527711211109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "18172711677056449158", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "5183231560876991543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "6821855018718422278", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "13237050834496100264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "7164580481046523192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "2490155559809645659", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "15430549683839591544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "4553409514380460123", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3041752019114501584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "4161001033681779582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "4764776977138392550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "6882621854468565774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8881135571874888085", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "14038261392627717712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "628191607060767879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3511588484597779204", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6904130543085920483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "7924408980408826942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "9416186718345824095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "14719421757340260468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "11936419502418995274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16601702334097258697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4800587664660105589", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12501619443242354860", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7104309382120208659", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2321148334382088982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "4914435717288687793", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4104562704039821482", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13308187548669026714", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "3603187029740446600", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7338229552985076723", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2161052921317193579", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6104380778870471127", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13710319251108632115", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8096131027165540886", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11823205954749139338", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13403161389559730", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "998876398773540321", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9280431727790048190", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "1152691534728260611", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9101903304994333336", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "142270860894725256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "621915374938805401", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "15746620724134970969", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "503369896500284129", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7585785802379042424", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10486348549691280032", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "5758133252959371492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "15117880293418979489", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "9120377367517042357", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4278280309700908015", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "9144487908815767824", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17408275657360833363", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11820789223587555410", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9232653317479846765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "18184621367843960190", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "15059549186302099880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "16765994345605657100", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9869959062341950047", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14343008518525689150", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "3202085450628781999", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "17224104246148265328", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "7322472892320910654", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "12480527132372884168", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "1008476023750261156", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12589440296742583335", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12604104383683210104", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12782932626966309185", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12946540633035976364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18221867262301937903", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10171373375072694210", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17791024851737594885", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "959260710517842876", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16988275131627316108", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15048584393463312977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17381516856910544374", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "5336120047683197088", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15897477855246170861", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9780938731831129283", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1473214668483422172", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17515573322312447679", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18356980026934328781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "18077281411861416889", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2543041530639980505", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16370218798911151331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "17316626950179740845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 194) },
-            { "10414903047695486119", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "2809950092498355574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "12011982029561277581", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "11267742746905371769", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12534001599784153836", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1882052795393187384", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "419783127503173016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 194) },
-            { "14211903923555028634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "10892706534058849825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "2345023488044002149", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5754844816339228920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "17015791782274123780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 296) },
-            { "3706994659266083979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "13324157125165576832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "12014527187730671229", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "5170245731599664670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "6854611304056079417", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "1954052357826969119", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "17824431042110985323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "3603706453982734995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "11992353959766718397", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "15163327502374403643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 193) },
-            { "16758697697363920520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "10930115765550856328", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "14418429155823196539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "1628593159980574595", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 163) },
-            { "15675968397825708285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 193) },
-            { "9594594523961285945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 193) },
-            { "6634330132674952638", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "8434794604559592624", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "3150231129728961455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "12545558125736154584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "15485701086886851362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 193) },
-            { "18005721959893562716", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "490233152678323691", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "4073467095502162430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "5801429077171542466", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 94) },
-            { "14841539539334726292", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "9404953235624894187", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 91) },
-            { "17995371099806008878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "8961138963663532667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "425744529089575241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "1316444335300814745", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "761169277744593430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "3325727286860556323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "2526832080529662683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "15470013032930986062", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "12255528292506999241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "13119479079474639169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "12813978452097969536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "4991419288164762786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "18210370419559876426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "1616603916015535857", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "9748307611165615848", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11147573971701279689", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "10865695385270390803", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "11999246609107242706", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4118073384938355655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "12134858519320245809", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2930898141522848681", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4190912926126844643", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2929190644951986399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "1126499865206906037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "13483175684542464385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "1920070013712913772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "10787747981914307179", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7715649642603303319", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5581428998642936688", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7532088618116521936", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18126685473408206840", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "2878824076934639346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "6548949901446632697", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13609660900720370993", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "883436333317162926", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16293465561256937726", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4759671642533786591", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4903592553439092472", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "2581414750854621875", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11627532066884923848", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17983556812075120553", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9099720270958987421", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8106738346643994005", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2554991397391195611", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13121297281694293907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "8220168481755031959", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14502856487639608696", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16871004845988227014", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12015336418727455195", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "1984152634309440563", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14312549767853703411", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "403634422724914329", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "10751536136794650334", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10135458965276110244", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2008424849669196225", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13735180250757239202", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12351866693978844266", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "6788311046557489996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "14578867494693499627", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11158789938857558596", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9616636708366808604", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11069983292783104310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "708747442142592697", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2780423409483867058", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "3160543867929843861", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11305232900158601613", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12339692995143159283", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9316082753126682958", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15991460001131903561", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17647962002015093887", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4897448054295474302", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14184895905338394239", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15112599407339712681", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10486000767830001094", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14999920879568237166", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14799579913711096584", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6450532136308941035", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "14962768577232034246", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "1452597292381229708", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7104756264011682902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "7744787957569714828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "13503688893307029975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "9133263538092913983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "1383899865465106141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "11829442945690098558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "12394049027081208902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "12159582810513550491", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "17738299860390552088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "797387385159110695", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "8757900457181374694", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "6048964584602891448", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "17882819773586674851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "17829148383265978140", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "14711697456265712456", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "724953082687879224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "805221045541170643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "8241070786700614317", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "9191832520273617003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "12408889192918919210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "4885944395876887711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "2651385050387738902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "6303682540621797774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "905780459938651623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "4476928353532757380", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "13681462437496627948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "17243648226968859637", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11192356850081328892", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "9323825370872655346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "10000618285883395700", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6418327009347170687", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8528750110601691390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "8061914949376516780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "12992194515157698316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "17870874477143985774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16234606052818596502", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "9148379585489720669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "9270950131920019932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "17001502418583498926", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11163107409437069532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "11465965972527519631", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2534408579674556441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18109284647478027063", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9849272539053219052", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "17382660912493284320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17764033613416389758", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18431306649860116380", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "3699344686791530101", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "14151747022287993729", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "826850797666395121", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13486084204140096478", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2114599010013594942", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13251091004269229867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "5240706676373148280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "17490188677223978661", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17854208422879910606", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "8767817856303586064", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10672380526821947133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10730222715353420212", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "16683169947375504066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "2964705957088952872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "14885031472057965707", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11308583200952256245", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "7208008921815475393", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "7113777272518482528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "6334639534663495263", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "10151922632636937118", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "11560634267092054110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "15914107501176673997", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "18218755616248669884", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "9987415314864002460", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "7667898603371717971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "4403753181729432604", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1040030752340209480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "760687670112194844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "9803492989444302959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "216603198215625772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10899110544832584656", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "14447191095937730964", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "11130439225010714550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "4325081100430903742", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4216958486055161753", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4400247897123856252", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "2294800960010879540", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "5195511638783481084", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "9545968464906009869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "12932635875905153141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "16925721317097534009", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "4398371999113956082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "16347412180100581330", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "7877332346656934022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "6323026044750482867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 309) },
-            { "9761573038170759563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 299) },
-            { "12098146032672599222", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "1403617451623027879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "9058996149754556268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "5864250949922222051", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "15847413004526420496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "3199841714087553410", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "4957638663977636791", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9437794960375526230", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "9475130054420979752", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "13312514874803986753", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "15997754881872769378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "1941341635794709702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "10157866834809927320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "12308359047798183133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "2986189945936592561", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "6928835003016610382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "10084794570892043447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 279) },
-            { "15417738436777481469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 277) },
-            { "18377298651236993830", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7354234812009979811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "8656468860180713379", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "14472187692485966933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 99) },
-            { "397770940444464146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14258499419905714808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17599396373608265826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "12935563359569230797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "4892959859293355837", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2802810524370514276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "10290107543739998181", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11587239927319376658", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 296) },
-            { "9076758673133996959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "10432365444137108781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "13092232276822302626", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "14896875712028630045", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "3236003754884728510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "12181889163404078773", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "4856470441452830056", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10022487076451608714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "14811603003184578943", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11565861421381730304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 312) },
-            { "16577611471466452776", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14616969385577243225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "17921973525603585874", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4617809377006148936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "12641170321047008726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "5940337324384948573", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5738835498104275267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "3499106702307464480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "6942016672941874829", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "2173720698351153121", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "17201365233492366678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2877521658768725103", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7689320135952025041", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12031180482028822765", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4717620775314557374", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "13800760323805415740", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "946479876892100082", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5039037192630609823", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13839116996827687373", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17037416417174266088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "2321767794934000238", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "14907097142953816744", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2525260242689556544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "13328449155966085543", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11856266545854830143", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15993427814066246646", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2100891581797371600", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 274) },
-            { "12242618640422208652", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6133592828563353516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 304) },
-            { "18232278892738147217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "11992625045241269569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "12601126285773042005", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "7457899998356343871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "6343888265369366589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "10791067159964399241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 305) },
-            { "11327097771110264965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "5245308722062496788", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "10792503079194374004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "4818231379191523896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "2198278382394812839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "3800011935243649447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "9631545863582097486", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "1779941298820543013", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "3621930417735246405", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 276) },
-            { "14435120971846098308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 276) },
-            { "2893564501191050837", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8108843303778211282", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 278) },
-            { "3682813162987778705", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "15494543914974994991", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7565221050911842393", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 99) },
-            { "5629670679897666607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 272) },
-            { "11754316727756881612", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "10990741293315393791", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "17024388383581997032", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "10302338806536775954", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "7915318733663535312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "13702692566238948173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "2909728331855309274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "13071545223094862275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "9631481972809246378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "13540002981450186147", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "7076937538747704750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "2041212737963974230", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5308128387928804050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 372) },
-            { "8619526128410675593", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "4792351255949877935", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17759505449240263390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "9584652777232392944", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 275) },
-            { "9999955037598579164", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "15961487889420208188", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "541817615957967731", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "13853630125050609175", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "4137755981477177003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "16949056117405140365", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "16014822406751503249", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "7700321970687976931", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "7056293586529818253", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3814584042139408454", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "16992405636352406660", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17442105631503326136", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "9606639214735570069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "7940369586324090841", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "8444259010311137762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "15489746763312425915", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6800893510381991731", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "4156384238797998294", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "11645116728396933125", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10912495395422146386", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "875400109066360897", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "16475247464223458061", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12700372241799686527", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11640225461345567929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "13183380647506951324", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5242271874488296527", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "9488453013746383896", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9726913113016874092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "15979956159651515122", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "9947449295659685973", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "14230493618724018658", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "1704404203639481753", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10404725818204494388", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9767294641786972359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "4282668574670785584", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18043340998699622388", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7148542290597073512", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9040046051053703359", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1077773457856682663", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4716188972902735458", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17343050785312683560", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5687802882700097624", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3524531620118359828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "5688478347124565305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 296) },
-            { "5504757952698692953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "13800387305792597325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "6574971185849732667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "10573920781439771673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "4992668316921598993", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15778834188130183853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "3062101811226530720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "428659495445490820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 165) },
-            { "956022649859563080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "13410850301164057911", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "17423645390621980919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "7802311886554362782", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "1172103288112689821", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 141) },
-            { "17353894529222574441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "16431857516454692096", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 193) },
-            { "9100044555742394133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 166) },
-            { "13115589642140732066", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16190949264253468961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "7026575758396092435", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 163) },
-            { "16761856644242716357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "6341197991729122563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "17087740929472936216", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 92) },
-            { "10795104632256101599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "13327653786981478088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "1096671695414716274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "10774528268153772208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "9525853014023664813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "10632020369698615114", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "3234107167862677811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "8708643228914766202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "12415368596357091523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 111) },
-            { "1028160614515220430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "5927467766675317093", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "17742192339816511494", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11931568365395665142", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "731825454731954517", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15989894214714907271", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13478984039708550410", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "15773157615731010456", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16772854836230971016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "2934519615045138808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "4880150897829846031", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17889864541794448203", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11768117585574496387", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17906607354577138153", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "18270587701371596297", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "18142462471803295391", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4815047491742617397", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4513063773753763458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "2984726467649419856", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11795826875463204296", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15675903059949404837", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15817443774186015593", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14558572801374416278", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15555083739490354527", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3854114166348568039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "3216877571075556066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "739676584505475609", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8303211644727914658", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12908594497114706897", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9918371346247634545", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10893432143734884603", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5339985303398206057", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "5941852872160795604", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 106) },
-            { "17634966178519099371", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "18299254635579957284", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13357365044448426880", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "18135307303959376082", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14764715930784496165", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10979362792894404338", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15006321421735686121", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12370729327673204804", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10722677916294015259", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13454265023861566476", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7995820969034996638", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "5275016494706355806", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "10947686124973711385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 59) },
-            { "3349519148124496343", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4003433148846544263", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "14973431782875808802", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11948858355027908365", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "17951403431757222177", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "6586872365879203192", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "11718418772370938734", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "989564341557094953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "6942049339361951275", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14555883089089918919", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "14808895254077106198", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13830605041347009953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "11955992313739654625", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "16921026268702574340", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "15320845027635796583", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4014667229872705228", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 68) },
-            { "2438374917504708831", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12391792381149655331", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12864558900883069118", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "7209217811135076623", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3272017687600371031", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16067605128297748820", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "14150012830816329527", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16218339663410630711", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2089730611490367290", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "8907982643256296667", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "804195263636995800", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11528417522960871233", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "15378025640603637387", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12860222041026638681", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11597391933877736800", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5042176052323856983", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "17010172246526353957", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "938848188161536107", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12725647706191463348", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12553441041059632729", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 68) },
-            { "12782191856884962803", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15824189967727245909", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16027853590391209100", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5352061583962489055", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "2294318010381635693", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11055049031355432623", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11149782181562145291", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "2653651564133701304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 344) },
-            { "3526580286148537369", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "3985659568982275663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 371) },
-            { "13642146548740074992", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 372) },
-            { "17011363406405852347", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "15386715291503303766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "10292349730148518173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 346) },
-            { "3154539627593235077", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 371) },
-            { "6856130385095139346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 371) },
-            { "2349007644347065353", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6146876760962332928", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17434429579652310107", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9447458159095730492", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8655883535274781128", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7272538316511343863", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17564338309805484464", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 148) },
-            { "7881187047171099732", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15579919505002150556", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11583017348580874022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "17915846724151945664", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5319668297345215520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "17208186152576814861", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3633858263279042265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "13853056718266488510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "14759179293743468995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16995873636564597028", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "9438739171104456179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "14429081455612806819", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "9819596940685093690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9426665763007611385", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "794499287296495726", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4980217316169616839", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16105073808368936420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "9530116228032101908", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8527193566719173253", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16566214123371867456", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1470933384474984858", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10706267011822108376", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16081386644309102158", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3571959174116404960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "12566041126392848976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "7603872175048237237", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "18235209540858013173", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14316077757957132678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "10816637153861630723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "9175450649281374948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "17370158297470557151", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12051595062513871723", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2967481531952454828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12085348936192462321", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11951606039079763598", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "8769060267707904998", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17104611871050967957", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2103882464623009432", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2659712601063515059", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "9759380701896779097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "13842309033760176194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "2418288192668085805", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "14994322266840011040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "16402312692470500253", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "16955653765071712611", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 371) },
-            { "17830290099875088207", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "603883331897298932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "9731370183088819573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "2296581485980163665", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "15133468875250992696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "12972798847556569913", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "17446505012657609153", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "7223801044761006523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "16511749893955141055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "9485825829394109934", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "8130920994920685157", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "3573490922300056520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "5479761740065152589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "9480653639044390919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 346) },
-            { "8739347545059610410", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13459514533473657102", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "7824524940405130010", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17796310681498690253", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14823616678465136590", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13816104794723484993", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "846088275031979661", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "18125732229366977468", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "8464582977975377118", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6290317420155851465", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "12696412964119109465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "4994591211723226974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 347) },
-            { "1036010477232750453", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "13786357802945430475", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "1003101267609305257", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14991602704357959545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 344) },
-            { "7840653268996892538", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15488340031228619748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "5003718302026277632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "7693459946348737411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "10536316961655703500", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "10765280349477640969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "7447163906170805189", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "9319254979377483709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "7843508201826629532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "16395067736440127496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "13820498543284008286", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12071914115316550349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "12727541507197887360", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "17364712285968437405", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "16120988958246503683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "7375461241315602473", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13282951481330978659", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "6181308879301978465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "15488550074426713959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "4062706195708729345", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "11604111639041106489", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "10512507780534402341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "2128612971571865547", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "8594644182487917002", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "15881381297320383917", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6040286126398028933", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13926122593957480821", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6213386558868267629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "4456004887590847716", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9642229389394495047", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "18259656768460999562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "4983880246908724272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 344) },
-            { "7881579844586294503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "5331173521406046122", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 265) },
-            { "3285520504090196295", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "7143510787416483146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 265) },
-            { "18103534417093702556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "9328223957245552723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "11706446082856895571", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "12625112690264223217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "2114232149447438823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "13883044928774243663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "17636500109629107732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "6192955702438301372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "13970935346154374605", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "9692654253261175490", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "2116913943188857359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "12802517759474139810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "13611054146745413536", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13814086981499638596", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "3106922888635965020", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "10509933181132310969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "17318287523550546026", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "11806402239500046867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "12353956380178079089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "875296362957469305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "14912119584313592912", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "12494969618927201911", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "6344802942015047824", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "1692411934657235774", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "615341695338735013", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "10601835610089648700", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "13262672660175739705", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "16522364268583242080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "18253784177599134876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 186) },
-            { "12319073009094248232", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "9954050478761346921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "4640696923527766618", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "1436052878894538927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "16011429608661242565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "4381329435655511217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "13972357557211413688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "6580334406272192111", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "10437599469161149176", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "4490223883171428014", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "2529786184394804665", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "6995235840871804844", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "1671208365782918441", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "13104509059416300615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 266) },
-            { "10090923790949378407", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3429844423226609965", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "706049518431331645", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "17193614571243427089", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3621424752591567930", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11066930104187448422", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "209732971447020989", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16044646335477470657", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "2172121470071868949", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3392693938352572136", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5495063314176654751", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14553856088069405595", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "4967444801764057340", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "12160764253455777655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "17723621158215826108", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "2171768477223405739", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "12672995204641007004", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5622089373755094139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "2129726780118554358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "4160656836528944651", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "11052732052072367261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "18432787283148809023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "16172528828198474326", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16327433707667075261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "2797723586312707948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "8451212914744825089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "7025975403069487257", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "8913950860101596091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 361) },
-            { "15308578014507211237", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "13132804928635689780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "4465781406991476376", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16266491618150971928", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "181006047500375768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "18140951659547259039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "272730229972987861", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "14898892437285105327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "17252449599613270108", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "13436376034548670107", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "13787436604877398090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "8873614802459592665", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "13663893159182636270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1361159591875955678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "5912303851874077576", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16245760498096322525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "9928406318940388716", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "3036512701943687724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "5334291640387922287", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "3002986032379998259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "16469788155263456039", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8709632541892447149", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9524303276541517389", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9354818521586974021", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16781127329510211966", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6351572488552853754", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "907036267078333137", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11855070245618904113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "4544242784357021697", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18218631037214746168", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "178353385245384751", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17658152048177750315", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "11636129433022017868", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 217) },
-            { "2622434279674583815", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14335074487552883436", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11175955260573469979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "2732519635571994212", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13893789954946953427", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "4355933224673863178", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18037918102910297531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "16071723603031305677", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1697248235682953135", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7843498978148810586", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6767159196241633301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "5097818987523855112", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6623182990939010641", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6711878663358611849", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "8671491767142900139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "12164298124869114517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "17089801601582809764", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "75742659105146536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "4652136280940317116", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "9751582946441607796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "16706244336960642883", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12581879452540858313", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17443356777503458523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "939718260623752240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "14131851237755716991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "7474639594232203854", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "14152716242882609401", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7998930863626763670", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10323345824599612614", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "30229601562833524", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17788367809717898285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1509728225855233852", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13139625572508441980", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "16491532291908469567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "6355395905401306995", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2096779676054335057", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4217179485243909459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "17101789600628162503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "6139574161497189424", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "16559140502701231107", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "11459784003592366395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "7869916853707978306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 56) },
-            //{ "3889519976910355277", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            //{ "12081835728078383819", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            //{ "14923692894655929923", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "12794369485239257709", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13338594271376045657", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "677249604491773387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "2668729552208169959", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "13011676362747785816", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "4678607855896512523", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4356817283284529593", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1885075753696445410", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17806712457019493207", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "11862259122805366807", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "15201438563802430490", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "8132521728369930959", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16108573960501496757", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11086699387784339943", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4013707396889204359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "11850332373794932468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "14763982961176216679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "8207349115037232863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "3273748387141431306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "580936360000782237", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10682918518101379579", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13178480813522103091", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "17109520309574369561", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13754408679115174221", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "16717713360264747483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "1045854873741563331", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16767392067294252396", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6114241186364821679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "11241838709529552265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "15192230303376521834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 90) },
-            { "5374969798377773063", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "592245952014430043", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3114869763557037270", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "11254635684957519432", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "16816222375242496370", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12809199739984715013", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5040730152867713388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "10429613013253088132", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15451919862187018297", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7546586420552408243", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "14487682847898298214", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "3106710091841093202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "6458124573210430792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 343) },
-            { "9182897385081081193", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14462438074931673266", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "18133334552107213128", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "38736266675995457", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 209) },
-            { "13654816209891478730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "6263019986730305851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 218) },
-            { "12929981792125924963", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "3138374672801504481", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "4465701487417893814", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12977678792503377525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "10879218241103462088", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2221145174704245189", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4635570915184713874", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16075006181495932250", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "3863816884636503247", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5440983284868981549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "15428591250165788477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "16567638487719493784", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 347) },
-            { "18059267466971880386", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "10808909442136736629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "5682190700442712936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "712165731154577189", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "7469127846325904854", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "5926747396493954633", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "3477539135137665170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "16235115911229280717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "17009318615658405230", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9421643783312790618", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2294026590516781945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 347) },
-            { "2940027113687311893", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6090625728451718945", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5643908654122573882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "9065894438656900887", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "11185156002426041243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "14670068483447729857", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4623542918584461522", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "1143214652021653634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "1434535531617424039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "17025268985366223779", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "11507538232733291666", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "6149673627320838019", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16243196137456624852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "8059328623525062913", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3662747857062156477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "314054598858070952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 347) },
-            { "14122213471825630433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14985236276429954162", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "3265415000818832667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "856877003890134554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "14805540705424073865", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3788462090984291082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "2715447739580688669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "7171904645566467208", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10308431308942416781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "8712136292276123857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "8700574100180128776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "17147293671640396193", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "16474284418841532356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12461575861709234385", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "192209423643075326", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15490478608105402679", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3491333679577961640", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8176012042686275874", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "4282198629458668761", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "689445825453914111", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "969746749329671447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "16833026567865627676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "13046322179198317310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "6902644989079870993", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "10987953316324712538", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "12515465135362865565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "10049571207493913006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "3926585856863002495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11275109735493317886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "12238674883388043717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "101401523793806394", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11007944497812650617", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "3240102173773280414", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14883438809987378616", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13320675959188615441", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11975047184326016230", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2608363732937932266", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15943141845766932879", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15486917753097743853", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8317673282128335201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10635659193402005820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "11450378244355788918", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "2625969259447793593", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12207503176295152756", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4625107584562815965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "1997392406402548974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "2524029454785583409", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4615708568396290002", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5349415632630235233", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16108759090923335184", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11756881293845417212", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "17839839336294937155", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4703107905652287491", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18180820925685532104", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "3835286851569826052", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7807983899017500046", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "10294185397756053636", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 337) },
-            { "5519535335798045279", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8701248964531180496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "291868903926685441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "15239764240622554314", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "15963038745470172423", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "11428599290755097395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "3180320769716158201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 221) },
-            { "583303098958523195", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "3509487327001107638", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2649192407401044065", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7706714181281908433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "15914342421266687768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "1497560475414454618", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13485300684443803732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "14571022040013651253", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "2832268621630415376", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "9383182168277796969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "16487774205195979355", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "2226745622763268469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "13809330759308309353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "11634932044447867039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "318377908569897093", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7353563160591978243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "2582625260054352916", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5609922876429907954", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12557015880639217508", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "11528310408333718862", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "1471837664358450291", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7351401242363888463", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "953306082374100275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "15759530339367380982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 346) },
-            { "13300022131572486202", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15689502054035168040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "16969463538496570528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "10237524128771958432", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7969848911698660033", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7130694811424715594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "8578747191812631883", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5197105253412476591", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3120553928584920777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4750894407873652809", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "12667014405537239093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "13644681270630373984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "15602218079503030465", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3950738240651133849", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "9101334153142718004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "15695415285791951018", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15493488989417521388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 372) },
-            { "3391032227732782982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "8951040603784899163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "13804221028705631415", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1351033666248868977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "11330591026581463934", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "6142707387281700290", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16117448559783537844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "4531222427159927606", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "3116068331849795558", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14389719202147508599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "17053671692908867872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "17025182465337728023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "15035800097152337587", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16770615142634470903", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9378269524012289175", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6727930402459775131", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16362857896338778056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "7187734276051878356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13253775441326432265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 371) },
-            { "14733510474010040334", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "3336303478756453360", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16352331970945217438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 372) },
-            { "13484950419220835364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "4674416595144505741", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14559308665571750465", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4542143431130171516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13189392239349392492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "7009735776703529573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "4220826666482500445", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "14792528369891965810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "15287650965861631130", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10308175009371219583", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "2903605246599054308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "9213563311267466388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5019077257951332016", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2497756607567197523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "9285566577169147378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3432296808755992670", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7688176479120305539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "8818070832398055086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8787816339967963727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "863952266514375915", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "5835634465164771899", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "15101680837342453931", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "1116274074896622552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "12790788016297794214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "13538051178827008933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "16403423801823379909", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 94) },
-            { "3723613341885592267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 6) },
-            { "3830703844770425343", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "40704767167309552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "13973028408397200796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 55) },
-            { "16561224775421968533", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 44) },
-            { "11243840588602365090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "14103112843209793966", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10483664832302187567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "8100595788531468781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6620782733027313312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "13526488884846845330", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3534971503826416049", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 374) },
-            { "10425889533411573166", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "5214654427283761256", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13569941893504840630", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "1318571118468536310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "17724604495865223459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12229574562535756991", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7264274394359484318", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "15069906408448814772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "11857037689248685487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "7977195117668583981", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15678385128478075284", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13025361884606488732", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16723478941106779069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "726985753660756762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "586947787345351152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11418379777288974452", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2575631797904040925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "6288489890578212082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5649082203775427830", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8036474422877454869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 187) },
-            { "3711525118850629466", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1875764913306932583", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "548663565933738403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "17329287216741045059", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11848462434662954749", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "7581174843529024536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11334122788337402526", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7868973874302246233", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17209528805596238905", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7878605163588288309", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 90) },
-            { "5941092474669713339", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "13738760763969959522", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "11988546375476924356", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13680926356824317761", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 52) },
-            { "2530317332900569142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "2891736961665476908", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 54) },
-            { "18008552719153887303", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "1299545313185409227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "17907223570737272640", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 6) },
-            { "6949539207944972855", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11207257238719531888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "13898284586432291433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5120466856097219243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4197617702037834389", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "1249137685908951501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "14716719350966652036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 346) },
-            { "4840004190985490064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1540041682425757361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3715177305271762194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "10001963042016663554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "481328129206881674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "13404888565084206853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "12348135936862667024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "15471470494305051299", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "6181272224000872375", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4701832665603867798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "2030309697153345387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "15643135666029727865", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18180655791734632264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "12990527753120735255", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5303970743736042689", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1596353239542510685", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 372) },
-            { "8040001390872143271", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12052207771201936228", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "9942099207256025216", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "60509335250891515", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11499219760597131534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "6726099352298108756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "597073780328219388", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10783630257421062891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "6988492019664525206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "7132328255408635227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4006884370026272807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13938466156916423478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "8689206546467098603", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "5644068493155655611", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4867937397499803072", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "2702144517025248597", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3304589333915676807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12894625941923144893", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "11649407835105973949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "4897991181236908768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "12179581684777023804", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2806529556090896246", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "11327228813412934262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "5485749317130402302", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3499243120652875549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "10916647716124396856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "5749536453225343663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "789359733867650915", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12626014184575881530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "1201692134690347847", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "15249442550355454201", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2598267743388306204", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "7181154048972884375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "10930640103080573253", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8458082326743351141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "584086621952390547", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4754967381316623440", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4353842547963164546", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6131481289104111211", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "517997325935712670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "5600807544955072308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "973966345068677905", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8532217744217419503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "14614844213016502202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "4126895998426674411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "9700808806849459216", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2438261005924916746", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "4056971751486746551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "8929453032482114162", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "7662200927459001757", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11473442921040533207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 273) },
-            { "388828310152538138", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "1643241486250690844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "11806105193035393795", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8843585527713905568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "13248567106128518549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "13708979487306970634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 277) },
-            { "14406070210216948643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 277) },
-            { "15352245788978088971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 278) },
-            { "1435153323458789173", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17638692805430115529", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14068780861332616363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "6656593119788274992", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "14695781272831602408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "15696910741835640150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "15315327794058441258", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7545013298074733778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "4026686872534942904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "6553736978928374036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "12129572274423886770", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "9723314434598141024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "11031625790234068916", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1138439260035360722", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8323445733669842657", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "54019631544204590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8971115542951085891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "4584970211859494304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9321208819255762521", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "12617625046664709483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "8264178890341675354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "5334190564423375247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "14746359019867963124", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "2044363708106765326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "5132761922124425835", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "8141428150264829362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "276407276027553756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "11878734040194151073", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "11622925573287101001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "3192332625020432602", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "9785114056964539323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "9410978119783758141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "12523676912856063091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5912451559447635837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 141) },
-            { "10264913782610095832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "10309083227104422150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 141) },
-            { "8500148569566077929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "6578908625437515675", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "13762042713029963144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1561225943337590599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 129) },
-            { "10917498758625273194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "14335423820860953927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "875142032423622622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 165) },
-            { "8965747921518186477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "4428101657497677982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "5779388310240896974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 57) },
-            { "11092828091552833150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 123) },
-            { "10295330953350618042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "15901675909820977223", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4894227264080887361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "381149736509958403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "7962991673727743706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "12725675221990905186", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17961702508543961900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "7082007579524697455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "1867337342417952506", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "8931169575495985034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "16542318967217020315", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10626341369865893888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9090828337597312855", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13621339501067135142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "13754540732991287617", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "6669808855737023569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "17640725195881101275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6928136130626403937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15047676717402283805", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "1082574490068006980", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "6557428245898292304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "9440117898128288296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4672441137336208890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14289082888174784976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "5056859994174498686", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16574710115918192418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "15839295895890205274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "16307464696265537356", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "11910735867274493498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "14671212883301405408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12028665820838352309", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "4773123925616969670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13602140021189675477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "7708321360699824256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8609939102588915855", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "10782611933832492335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "8857763129101380288", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "1230262279011217327", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "14424566003632608852", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "5497751772699578150", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "9541630719145326121", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 250) },
-            { "10724501418439612080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "187352687850707150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3438296636411972401", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "4165036357594592683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15106614232165315070", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "17477062954520561609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "6664432489777052771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "3341302541468955849", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11626402549863483301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "3522383297921565178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "8651641584737798174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "12473600360154597915", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "13296242326766100583", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "12068797674575015662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6297802534570892679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "10037086825900566930", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "17216583849049249733", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "1287490919205560806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 153) },
-            { "738850098651678143", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "7139714914586273766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "14050124896329573468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 96) },
-            { "5429130923188159806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "7953255701516490034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 22) },
-            { "6195916781434462809", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "11025471731438443683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "4622514167765722873", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "14680730265621679042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 6) },
-            { "12141300895511301068", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "17106086048442658788", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12707946849050970702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "17154337492545826355", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "10109431802089940590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "9428176632140441528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "52089503050497755", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12297371032753209816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "659150305191479097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "2065752819810364738", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "13583166868754499339", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "13991205023798493715", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "8939683514448064461", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18337160891834020517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "1154228007901031779", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "15156525717629023944", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7757331094141318304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16779678846332091086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "5409924335138540834", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4149728557142033774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 196) },
-            { "6443517114667332732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "5419041493176804960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "15948383678216076358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "9604982746455852556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "15739274921308457528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "4642234334824303290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "13200151444914751729", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "16894871557229780934", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "9933958860597451711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "17094948685292534952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "9762182215179534181", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "18273537339378756543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "7720939595094113814", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "5865480930796299143", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 197) },
-            { "10058165874008941852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "17309326904418811234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 197) },
-            { "5592428580503282095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "16348402367953880206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "13607830451968188080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "9311802150474489673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "5159470523468873105", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "7975810844103449438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "11455843788148231615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1410630713443793537", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17303408650780384587", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "12069726772532946193", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "6204183474669103812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "12874626654611400042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "13546876216568825877", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "2973436171295280783", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 125) },
-            { "1908809004094565452", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2322559721899919275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 131) },
-            { "5766507688771440170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 125) },
-            { "16626226341188424071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11709992724966310174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 161) },
-            { "17222005830854879661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "14224121742920800990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 125) },
-            { "1071007164550012186", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 165) },
-            { "6719302427415173754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "10482582307328548806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "407189201971322683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 23) },
-            { "6531171505861182429", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "879005904827468163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "8460847842045253466", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "10488269059469838160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "11359409533744011242", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14813178380338948912", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "6307939332939714967", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10894058425957901202", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16610284927818475574", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3221469860582147955", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6423785822515265784", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "742689192890486807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7349880498513046830", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2369451367723962073", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11690533591656807605", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9205978149692979955", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2728938624042183713", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2781309272856442321", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "579781312141502576", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12564687330941036772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "8421388456873652700", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12177387334053203378", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11239541755868028928", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "12776081190690731910", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "5648658688155716974", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12213354854947437262", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5680236635030250712", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5751283221740229986", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3646228701104397128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "13776178598632392721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "13364676690016875118", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "3141773224039276177", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16384186388687043048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "14421898375873029115", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8922929126299811091", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10256831975351722184", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12590922530749026871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "15209909241815414156", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "8791285622784082122", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "7474592508575297101", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12068974703657294908", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10682300249493137042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "1788455099959676873", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "15225354446874994535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "3226193790517362610", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15814015810740458605", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4129722446574108695", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "18094205332383644037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "11120846960057008937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "9195732599757736182", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "9939234037869927090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "5898740235388207878", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16694984452720336415", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "4889188980319017094", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14412158605670555579", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 125) },
-            { "3463959257726925426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "15726902746983125797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8463615810239412362", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16531824466148265247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3374410641320310726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "9589942627115344216", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 102) },
-            { "12864204111424196179", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "840202264034382558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "16386955278777720573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 99) },
-            { "16267682394077585279", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "10544411879329675593", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "9835739612255048978", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "6293403765897901528", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17596685300497748803", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "2150326211917340956", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1587501521145162454", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7561096442572829049", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "15078262396281327048", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16383540667048742064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16820082917500285799", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6820284286806022849", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17285815901490707654", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "994182747184593564", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6642767323474835034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "3215659303601163167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "54975980454651672", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11529876081402974396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "10308113903347312964", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6712698149192186833", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "14930789530046665855", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "2204178900998688268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "17174919737114915467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15154700439767512396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14916625550370402883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "7650375560336513366", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "9999553425206328238", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14026570177552137240", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11686670048744589243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6678796313875454849", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "641417817126876622", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9622546530872848323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "9194788897910888066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "522181557896569275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3332334993503432420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16131448347558322280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "15924916465272239832", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11669828823444745889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "7243917162812988891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "17891499682354369344", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14532519639619315651", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3635446784873718932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18275601715050791851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6997971129340865650", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10722782762733112118", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6585223640997887253", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6205240287062600210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "17522452942286240233", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "6571438978296387721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "15511138074959300404", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "11107930597263802755", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "10320711719466983961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16884228931101540030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "8253823502854784432", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "6025872155179042054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "10173283505468233128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 342) },
-            { "16094174852600023296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "11077876432364512822", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10586018593856542117", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "18436249934780056991", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10179916356323479080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "1760391741350091665", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "3109104171383198425", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "18136135457402651842", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11834683513280095384", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4806571630436601566", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14849108908297747749", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8490260671996115530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "2929715823970060874", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "15924583510704449214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "14331658870024759698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "6340128090694375876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "1120455113299469776", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "17268201530818712998", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "3644282167178264526", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "360872770877634346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "16720108310653948550", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14353390922580547467", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9868561386826862471", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17465517455679097501", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5570311824197099845", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7524311370696987092", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "14070988879848388270", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "8296551195150971668", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14352796912241296357", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9840495023131952174", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4720851194954041037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17515064188391421150", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10437367877444543776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "4362304842016958728", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "383721620126444793", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "138379779469699309", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "3759515057574218101", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2856601829807186494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3286330985102373533", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "8159303545761286685", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "4056979460327024961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "17823133607491820214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7969441643457570812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "970768445746568749", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13852065717057446998", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4342360467977736802", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "16336482874764861478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "6075691042233712335", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7570346182940928159", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12971822824884826169", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3033264172690274208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17301887391757619741", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "15790005937034794347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "15464327246951632247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5659168916726488798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "8079376692609682448", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15160738482264643601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "537074122417021898", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3336076058264596420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "1982176363226079588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "15052577143485630617", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9314293064351558241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4958835037528182801", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6817494598328071314", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14387756025635589673", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17536308070854915513", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16027456210394993913", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8655315308767111198", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4447065688824381344", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6843617687528352801", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1418595171949196661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "17900257435531434807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16789135236017252073", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13224814158106791463", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5078905972285278557", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4196367396954155354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "7009873605945341897", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7199295899520406795", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "16833854122884184025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "14599780481362761532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2572395498687401679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "11810221946429451169", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "18084635102736402756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "59739211822469868", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5240181393417899912", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "15962137123591591534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "10989937450490049763", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9798585825695496550", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2362092095402043749", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4444730303823507621", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "487214150851213303", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "745009493367761775", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "3806761527342944195", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "14458851250685872417", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 339) },
-            { "7106362077449435105", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "5853697372844744672", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7603319690872333930", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4628748977913534701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "10565371760124443824", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "1972879521448306536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "13893808009363736870", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6584960721513702502", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9220830217525628783", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2235210915304938149", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3930314908786112883", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1334070221835422461", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6681818065741882453", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6980201892073961793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "11530101016435264783", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "4801117903303888658", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "5782934278345953016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "13951717514084457087", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "2721793280965260548", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "8124736388338424498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "12223993560805441284", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9860570706348640782", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "991586070509079617", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7060804814325505165", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "787203599734115483", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6193161166790398003", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12806934028210472719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "14043770215999952932", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15277856047844308598", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "8048617952947915835", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11446745541571732900", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17422822627612865758", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13954144830230671601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "11198908896401597838", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "5582896843095691256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "8133587696326295326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "2007192658799516915", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "9492402787848610840", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10515519878978734341", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8747430148550634190", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16986358655784856534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 346) },
-            { "6109013751635776331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "9585113116232600562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 350) },
-            { "3503893875515897267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "13144385730409574259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "743941460026466526", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "4492332228252010118", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "1920042803083729276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "16436006771518788093", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "17567504672169904482", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "1989849521691057108", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "16706121580364790904", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5495776091407365966", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16430562172386510259", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5673972310424776040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8797843396807284399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "1698321314111848001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "5762290464889692462", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3218248162832023196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12988961529988078346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4232250144427804891", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11683680166617045816", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "6252429564537528709", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13145474177271090694", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1208161922424418734", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 236) },
-            { "2762489653422414995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "12937333118472722002", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "12917241193304093727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "11020315012951440351", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "1518270620354036926", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2567046336192437734", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16409729623371222748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1044978617045366709", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "8473037597903277214", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "14398366949002972908", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "7334966010680206302", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4161141078006269526", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 278) },
-            { "6522575549211855712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5629373398445592781", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13374993751390784382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "12976499206227689731", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "9882204352209412039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "5041111302824362529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "13869716373706247686", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6438522646185979880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "2406816735581074778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "8881150100883636392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "593712935037568960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "11970881115757095265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "5584432943673435454", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 316) },
-            { "4560479630843098090", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15374625876485618845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "13102754309439605192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 351) },
-            { "17912189681971987483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 318) },
-            { "8153567933591966877", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "1604661321386793876", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "8990561333549136048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "12278364834477923930", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3122997634505472500", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "15669490019428002270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 342) },
-            { "116291934148608396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 342) },
-            { "14729854278671832528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 239) },
-            { "10591379189397010097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "11929531534620071758", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 239) },
-            { "1819720745131968914", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10607904718265020949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "913496537924971856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "916389941321470163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "1411786954276574458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "2730604806511016352", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "5843679089588930933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "7304346312452588844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "2423754482456771339", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "3653156933813711765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "5219399418946822456", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14217181622713951411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "13025323039227543550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "6114147683777615071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "2355214244972870639", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "3167336012388169649", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "12218337369633748663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "7264756313770306662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10492056481694320580", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14281201038135286621", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8127190765748950828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "142486914279119363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "1532263118203058517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "5482851829165191681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "10548792624072794724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 116) },
-            { "4239415134522959352", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9028970753877215614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2324120381399737261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "10267260789603562117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "9988801796928462423", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "12516911293946682547", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "9213886570531053949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 125) },
-            { "385046297070779752", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "12541834857357563605", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 132) },
-            { "475043738497218394", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "6351347283201596793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 96) },
-            { "16290626406346691996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "4569338575782832784", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7575675354187625951", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "5795073619189010837", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "15123868617509445149", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5601435819039968726", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14104238386345631681", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17377293745073971167", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12134712464763856064", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5524215233998361104", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1103228955716492167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "8618835732380720921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15908673392788376468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "8482147530539941792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "9069334144391048686", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 101) },
-            { "12493863403516600413", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "16692569816843207989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 277) },
-            { "3438116423688595487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "15602863681196390535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "18277685132620834972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 276) },
-            { "16541722316343690197", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 128) },
-            { "3067806959725855130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "17791773192152464021", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 102) },
-            { "13603318842632052764", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 14) },
-            { "13131740479277027362", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "15334195300678132907", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "2038505773698938555", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12090536142661253835", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "4999505377862312410", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3934290309368153435", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "5951936376654416075", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "13204120207726209723", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "17108987360340581555", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4795705973706796563", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4084106758501882407", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8127570953237266335", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "7500192998744460131", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "2379484884827231127", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13933912937625580405", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "2040762223425679479", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4800587664660105589", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "1616603916015535857", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10290107543739998181", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "5927467766675317093", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3349519148124496343", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13477416097954638887", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "6942049339361951275", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "5303170164698694791", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "12494969618927201911", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "7875724726741958520", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "13835908664998757647", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "6407471972820516685", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "5385316497510064491", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "17377315194963069204", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "6580334406272192111", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "17790026124881397912", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "13314092088416047551", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "8479958930889587809", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "3750338655074082587", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "16683485007140805060", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "3889519976910355277", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12081835728078383819", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14923692894655929923", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "580936360000782237", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10682918518101379579", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13178480813522103091", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "6149673627320838019", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11077876432364512822", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "6062246008880097669", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4806571630436601566", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "2458592904274981909", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "2007192658799516915", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "1051506168926530904", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4163359403543480821", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4232250144427804891", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "5415319660821122528", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3286629188347536485", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13575423234109624706", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "1841155673858789206", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "6708349666663292171", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "5083163738120585821", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "10572945270796129630", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "4436244774193918646", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "12985942652866621579", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "775538461106687677", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "9533360488591027707", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "8913823292181409151", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "11583985978586657985", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "1485662490111767875", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "11872464450773754851", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "5364060938737428149", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "2613462626256090659", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "14668725050395069435", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "17381516856910544374", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "6450532136308941035", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "2321767794934000238", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "7995820969034996638", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "17951403431757222177", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "1074748462756364699", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "11955992313739654625", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "11939914680143672459", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "17806712457019493207", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "11862259122805366807", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "15201438563802430490", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "5374969798377773063", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "592245952014430043", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3114869763557037270", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "17147293671640396193", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "6911215749850066204", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "2814805887448339818", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "1120455113299469776", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "8002233052700666718", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "16436006771518788093", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "11083993858285515074", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "4133424990380177132", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "1044978617045366709", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "952318454591754214", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "5762878778443755104", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            });
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT2_B8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT2_B8.cpp
deleted file mode 100644
index f15d59e32..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT2_B8.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    //SKL GT2
-    void tuning_cache_1912_B8(tuning_data& td)
-    {
-        td.td.insert({
-           
-            { "9832505855130134649", std::make_tuple("convolution_gpu_yxfb_yxio_b16", 0) },
-            });
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e.cpp
deleted file mode 100644
index bb2c47b7c..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    // SKL GT4e
-    void tuning_cache_193B(tuning_data& td)
-    {
-        tuning_cache_193B_B1_B16(td);
-        tuning_cache_193B_B8(td);
-        tuning_cache_193B_B32_B64(td);
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e_B1_B16.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e_B1_B16.cpp
deleted file mode 100644
index a020fdf5b..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e_B1_B16.cpp
+++ /dev/null
@@ -1,3710 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    // SKL GT4e
-    void tuning_cache_193B_B1_B16(tuning_data& td)
-    {
-        td.td.insert({
-            { "11207257238719531888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "12348135936862667024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "1540041682425757361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6949539207944972855", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4197617702037834389", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "13898284586432291433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3715177305271762194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "481328129206881674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "10001963042016663554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "15471470494305051299", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "14716719350966652036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13404888565084206853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "1249137685908951501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4840004190985490064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5120466856097219243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4701832665603867798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "6181272224000872375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "2030309697153345387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "11207257238719531888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "12348135936862667024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "1540041682425757361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6949539207944972855", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4197617702037834389", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "13898284586432291433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3715177305271762194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "481328129206881674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "10001963042016663554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "15471470494305051299", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "14716719350966652036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13404888565084206853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "1249137685908951501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4840004190985490064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5120466856097219243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4701832665603867798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "6181272224000872375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "2030309697153345387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "10728212277329722684", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "877436308867220589", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18375125668176498051", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14907097142953816744", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2525260242689556544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "13328449155966085543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "11856266545854830143", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15993427814066246646", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2100891581797371600", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 275) },
-            { "12242618640422208652", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6133592828563353516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 304) },
-            { "18232278892738147217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "11992625045241269569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "12601126285773042005", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "5079055505117153635", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 195) },
-            { "7457899998356343871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "6343888265369366589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 296) },
-            { "10791067159964399241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 309) },
-            { "11327097771110264965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "5245308722062496788", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "10792503079194374004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "4818231379191523896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "2198278382394812839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "3800011935243649447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "9631545863582097486", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "7777333052643961206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1779941298820543013", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "3621930417735246405", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "14435120971846098308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 188) },
-            { "2893564501191050837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "8108843303778211282", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3682813162987778705", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "15494543914974994991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "7565221050911842393", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "5629670679897666607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "11754316727756881612", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 96) },
-            { "10990741293315393791", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "17024388383581997032", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "10302338806536775954", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "7915318733663535312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13702692566238948173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "2909728331855309274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "13071545223094862275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "9631481972809246378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "13540002981450186147", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "7076937538747704750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "10290107543739998181", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "6767245864232675168", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9287404618748313247", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8728178019712933221", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "18251360413872841969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "18271689282126907793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "954796765467489259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13597240991532942069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "4135003545872878882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 296) },
-            { "11883485911218628865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "2242915551775617989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "10556089809203693400", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "3727142736386026852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "1622880009460832832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "4437258459981739942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 296) },
-            { "14691372262153587653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "12181607120522804433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3159681096461848644", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "6729785110495533200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "15322019609805777935", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "7024495439434892956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "10416622008071151225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "5796500397424307442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "15702382940521972117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "6093575518270471235", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "5805383505505929391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "1801731858063091191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "1559798212423183813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "5594180958505308003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "4766071144928072260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "8650948093564284852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "3883845471211207871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "4366168099274266975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "578703329577922869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "16863960779539003201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "15450609897480659306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "8203550467004532364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "7431849514656037251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "14484890926084856480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "4424217045094988504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "7994179151788368291", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "15192024816519005250", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "4747159205186229582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "5485971317082563152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "18128162750557822655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 91) },
-            { "12421707187947291166", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "792684262493086891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "941626985322260281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "11868551452004726281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "14352303529756685990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "10702234389482091891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "3895088069642140043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "5334566325056222430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8306337702797456793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "15720507574336564201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "3277243911383750280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "18150429561058646714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "11169292427557543138", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "13933912937625580405", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "1375156980278317418", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13455881643467418059", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12788968383428254917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "13131740479277027362", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "3390014193205017427", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "1270467775674221667", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14462744723628661203", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "8203171222962341018", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9795194069954915563", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13369603621524676979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "13575423234109624706", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "10721885719016335538", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "14567947256029724271", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "10749263296616139689", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "11717348577195224554", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9275303306340702111", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12245096462203481681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "4999505377862312410", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "15272426400992401555", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "9325097933807426691", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "18238669114790278675", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "6664482192233202590", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7454366978268164047", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "16135569134646688251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "10572945270796129630", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "17495198214524203238", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "5221108094913859739", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "1092633914190498221", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "2738256633362038820", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16689586259416414782", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1525652349412826502", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17683302016987200208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "5615525527388396983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3992735701291817771", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13208739898218342989", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "9536348721941264933", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12803521018213865796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8854783036772473804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "6766480740724769248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "768423629375648579", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "4044100281521441011", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "873240542570331563", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12875236165672036211", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12008819728839685704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "2486645741683554648", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "368578589584714524", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 133) },
-            { "301201776306602054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "13152181652632422771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "10311747599696543062", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11258322449556590366", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14095734330183410835", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "14910223536998380801", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "3352689317181436056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15832740972576959202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "14732184525012592889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "8421045774757048067", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "941232110069825628", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "8975333906619899020", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14800592533315327674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11816277809167487786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "957781751038897330", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10498289589469975939", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12970943403831707924", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1300292367195167745", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3399837016486623477", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16740871614208968868", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "71587235425438167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "12717047049023783979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 347) },
-            { "10478482486372389470", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6056581247196718403", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3780320160034246719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "2819320453491169732", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16976464773806576190", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "13321672741246923341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 264) },
-            { "15140532227060261467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "9400755775406101904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "10292585962794261197", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "13048561902713182858", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "3658425022428447440", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "16947830954662293793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "8397584983137442239", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "1071169341660439058", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "5326247361632903583", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6214194654733781771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "10025839973092358719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "16711955423531846725", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "2915165824085219545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "17108987360340581555", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11972097635078477347", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "16926950874716567095", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "1212319037405620223", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12397280593466519809", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "2609454334520044465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1336940384521633733", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15271783562528081169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "9533360488591027707", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "6930697835136176263", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "14444423571297570985", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "12643423612381102003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 79) },
-            { "18423051691107460439", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 99) },
-            { "15381833359831622179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12040626513219974957", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10647227605517025377", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "8127570953237266335", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12876112384009608387", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12663860560275361463", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12352923639732112511", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 79) },
-            { "708452703070938673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "394778201589371681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2477849395789783501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11637325834858582585", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1485662490111767875", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "4300306345092124175", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3402183863499902145", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3217246278485567748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 111) },
-            { "15713964605078748923", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12293786134765875615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "16043683538361975370", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10670103699537731664", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "17854578307286932628", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11443268857010762276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "4479117540570599742", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11726298758004767743", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "2968031010495399536", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3797957937905580811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "1474271081523145413", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8526484907799590618", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "13723543003759101485", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11728824117049687850", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "13268525255152984893", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "14397348576352573007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "8616686489737649890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "13176385389367548697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "14990645740260870030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "7472330881076141262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "10892456883214928095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "9522661528867955338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "17856816245251319111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "14872992823083730615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3106591708459602370", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "11609821372586026178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7678457226823073886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "10118395047539851751", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "5389189982064081933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "1742897526168249500", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "15331103261044247142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "6644418194983229139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "12478309735214802531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "18012549942299450620", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "11873734271080160669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "10424278617647597641", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "9553032671453999824", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "3860603464276263676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "1207026216972160297", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "9519623751582710696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "10328182165125764988", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "2231648183489019418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "17599383258252980421", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "16208488491972128275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "13379165253894817165", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "2566302789609970663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "1478419046264331178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "3087801652564627458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16103943009195163681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "14230385851791760020", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15293727142789007900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "13973179950424276578", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "713121569924250372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "7947870656736319919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "1663285216972929652", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "14767888121198814523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "2124033349728954551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "8762901342272872498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17006133396401462698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "10783981060353445280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15110359240685619357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "7875272450497189442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "3281207855459771997", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "11932770338770247767", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15860915170591763391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "11716771904412649891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "1095495157025479260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "8402692278765063674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "509781001842353609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "3255465741612432300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "13439896617880328331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "7134654288295280046", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "6769243149577568817", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "3480732841490521799", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "18269685060032395235", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "15649927926091502215", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "69439315851965666", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "156456996459945842", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "3012566432840424198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "16431165572426232677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "6324565723045697080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "5390559917122707732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5469227748156438008", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17163158934005653629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "2307310127637739872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "1999979442136861875", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "2527189070714658176", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "8329846097322076175", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "16783619135298589974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "12214162812589030126", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "9216608098626790565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "5179760459095053114", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "2452226948562393335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "4499586349553581439", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "12668149981216388765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "2287356884312581209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "11115684531624462986", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "6483208845600234755", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "3752171257634205726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "1774158624592967937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "16881283637687482989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "14749947225382670869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "7351733901977025859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "435888248913413834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "13713406612642090169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "16582132711225619740", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "10436819182310112786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "14546281065004619074", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "12558716383635737426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "12609361477548272638", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "8107447526839063293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "10995907213890714701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "4871907623235871050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "7394217382008802567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "3880189981766119529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "3759057398165607194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "4561874206785244358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "488298169768725160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "12956726277674279950", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 42) },
-            { "7177837234452118325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 2) },
-            { "15031155621982459860", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15223164574152266895", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4834446692898125871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "14766477690417085350", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4461989328775275994", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "10141927023849730720", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10837496380266058422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "5012013738970489338", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16839741351990811959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "7846384623429362522", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9193880745263317167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "2863465257341735941", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10447947790216991304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "12024817951074673335", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13474805373264874144", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "671453551040072499", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "87031578643428011", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14034525799882831106", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10864011008000364415", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5115007207028125638", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "2866656294663853474", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7913076120244203725", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "15187035463799513424", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17778091287904736965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "9562527071055150197", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10645625090439446714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "9955939178447682108", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7450417963648518926", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "6648876837655776653", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "1520529227443340435", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "4455369117448405874", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2920840796593281126", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "16341722570340169855", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15289152041466330689", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14362876471450307424", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10330180429524641331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "12046017161414846599", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17228810554159747400", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14835309921389262864", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11263540528012919947", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16139615240471264488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3820661057776133570", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17515847111676784130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "4252157815622916471", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4819131094439732065", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "17264010982688979937", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11277866878590984477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "11324651029379152442", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13425251102263428554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "4571404165794634411", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12279771749366327372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "5754396201681434378", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9809458159478958866", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "5459463503840817402", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6484375582324852109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "7005509036795164602", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10785966734346479177", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "15363606233048272809", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4890043345392707202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "345043289576587800", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4804533178560338520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13328911884191551889", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13302687772426736346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "15231987838322151865", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17214254645087272557", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "4849343880559509889", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "851057218719456209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "331661172067077796", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "3017824560305532066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13596876807637507229", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2242602888499888844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13264617841270329349", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11604794601689380990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "7770000755097925765", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5008350851224686853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "12166852830214895457", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17672785701483179117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "2439993891369206440", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15822546325822628634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3056212889689424946", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12712071520541638451", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "6217542346826403576", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6290584630172122012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "1245259979364728404", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13006774775034887171", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3725013268198063198", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "1359720957005310113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "1354647381212852890", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10480527638577674825", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10883992248631603006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18255227391100087860", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13565691057064774487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7954972694876158422", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5118467701668427545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2339864165283480961", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "490931535580183607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "150132162949295379", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14795618530175274538", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14126906427006602775", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "905526102343710614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3385797925880519845", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16238415425814188039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7107677063657303327", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4098191685457418125", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2936333406928424760", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5539793555189956907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10106454449619141260", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5346898505346646714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11807282628372660280", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12375919467924385618", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11705756153433897198", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6651389480007764007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16911464046178654033", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12495003066477974474", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7650862961269327235", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10709828018763273371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5044721291675005144", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "18427056032084727710", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1390379098099686972", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12054200116003751590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9500850790449116723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 41) },
-            { "9057036344533510776", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5093049998173715787", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13761566845514364807", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "1594612401422787491", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14603590053512154268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "10136369729388564720", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17050675313067213312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "14221578799010900252", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11723735945517472199", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "13810995219720233595", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2704063557078535883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "10384537928514123040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "17427036330773218054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "9796621763733208035", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "14046114605615338907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5763440554939527411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "12892693137085610062", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "17775705003104146872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "14878347463243157447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "7368916076070115064", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "3499109651698979012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "190530884420224257", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "4202645222013675478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11324851661119942609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "6232363902828992968", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4299492266819967844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "9481675228591993785", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11772741918108731396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "18419183012101393192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17832542092610191859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "11771014003680394135", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "9192665896782282996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "9763310312421884308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 355) },
-            { "11430400968543668873", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "3430266954211750407", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7172604084103519563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "10306542963828398049", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "5235375820995365354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "5091558853871982858", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "12914986936318857086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "2265784112305305260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "9019388470685749691", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12427258337646070422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15884763176333003771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "7211355951470869591", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15399245700982979379", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "12644942072153919043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5876880412336151866", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "13775529405693629438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "9048522050692986204", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "10642327923162019888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "6410682026872155392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "9454954846682513038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "16463823433924519300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "7279393739634103483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "13358283026528078900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "8032685176029570383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "949330876419581703", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "17713034180977313726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "472454322186482185", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "2727219457659794468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "7852745450437172519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "6065819201836017182", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15984885011101717258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "14811022197918391667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16146350476627599543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "16173557782125372935", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "296142385116663420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "12655099960717366198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "7937870623766562191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9367157746678824712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "18062849937960759210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "11919129623429545762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "10522649794540845800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "1104489643524273315", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5419775002149092646", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "9226912483632588371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "4958222070605478947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "4479979951990338510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "12022152681602871455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "5740738339752793113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "12087141795291232248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "17825280904760131680", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "3974589991022739479", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "1838534101161814609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "10046663998164493552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "2305461098719675735", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16504962609450876148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "6345550009198921347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "11239754372812258455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "4347816192417741558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "17809920600993699808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "16710010075465723498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "17729546848373991614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "16998508915819714690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "12952980509662451384", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "2683507674615735878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "13059207969254830451", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "16295660312557315941", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "14089893422771228191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "18034648276860485300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17739868787095417856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "10880081193716628051", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15916505622570323098", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "9101018613418825655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "15650839696475698676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "15628121900226431719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "14554225625951128811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "3134489458855347772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "5627834277145735283", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "10729288973933590396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "10869005786136023160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "5597908143491399643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "577182964135927041", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "16947969669087411530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "861419637283812778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "3643250372952944907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "17977676737774695825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "10309504812060596568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "8866736221671835567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "2133849627845285277", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "13902214851539825156", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "669771152920944125", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16921939234324970069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "7649413902932043811", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5658664813683907476", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "10071449674652717890", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13352000946213986936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "5291011077679733990", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1458615259705605525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "543472136359161929", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4644580321919256401", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "12946531140050029900", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5010119207726811326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "3308770992373192529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "16913004986170202203", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4079026972040047969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "2683304757433993300", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3141886504884887200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "14444475853714164129", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10747988576436391912", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "2722124265986526212", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "8856888761246057127", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "1902656726461670148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "3337625924046561031", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "10280619408766255552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "9695024256541464964", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "6733731409232284409", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "15805087418686802636", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "7056030150365552588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "13038533272699602337", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "3737576893817599311", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "8761283252495354972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "17549411807772646930", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "13124342334495538095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "8576733135863336233", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "1082586642383386489", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "3217574161785059951", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "18357544235608006954", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "13954821927253849036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "16158139166784964096", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13558687084677943158", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "13809898858049445969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16862145184923128012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "693883892843558363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5393510569127725391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "4533786844080178561", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10128143628088846123", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "5295693108687178880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16425665058951535484", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "1398177377739338750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7407975398526425554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "8614534946699754256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7372956570616880244", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "13676654389512816868", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9043982883185435219", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "1626430741965136732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15295951849706930711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "1075027491444288875", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16084700435355748612", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "16698547937652264447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16729849855476690294", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "14171139920084409181", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4264284648458489052", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "8866716292621164810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11828175723996627443", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "11164519756679631743", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5558136691773431495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "11031569203645035546", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4084026445911476156", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "3819990462129075757", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10055549084854766170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "11657946392097042544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16768797136991242472", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "12107262410635772120", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "938222258370511187", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "11727227430687227444", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1040650352205493707", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "1563987925712579649", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3870539490799697188", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "13170441257780067955", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17490471699618303993", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "13993548620104010490", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15728009639807698634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "10991423760161409883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7242013296950669829", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "11744368351982723504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7314288062932060863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "9299299311101549958", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4138968242532400395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "4135068756462147853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16247399911710810038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "6020017927557041768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "11265472910579659280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "12512751736409465214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "17015328096102652908", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "14147460733160099960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "10811837819834149164", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "2173867324489962689", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11198301748997371475", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "9741607635826869269", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "3860667078458481972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "13590444711975157776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "1551596771935253711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "632116056424249698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "3499645386058307669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "10471519687597963116", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 102) },
-            { "4429109491655891299", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "9439431829175743345", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "70580716590540876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "577844026691991089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "3873183249402084406", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "15799159401545270696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "18154019240019929225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "1569043950563130463", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4491380839102267034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "9243949750444156746", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4772696293208603817", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "4927360358387344983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "5770286476124511234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "17084977396231597605", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16800575429414554907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 10) },
-            { "12793908914872030220", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15947699374684516369", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4660288622381620227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "15914512645931208899", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7460672405409009037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "1541754036637209097", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "89439319782574517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "14088382963493477342", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "18203935818408469865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "13191096881934434519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "7918742312252115870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "15641537661939240413", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "157805434489791310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "7941729567451949422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "10628725059172743408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "4492673409319122180", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "15857087373591747006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "13793441296561946357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "5172712078329324967", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "8780604510524622314", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "1760690277175249985", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "13649894122307008732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "17546566148752689536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "12675313398314286884", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "14621327324047759584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "14136097914489095982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "7638626850074132214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "9399994156762372761", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "18068050257421269408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "11830297960718214360", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "14959566236432790882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "16884396694505987920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "17947818179123182001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "9381304526221508530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "13932662890258900896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "8268533335852735248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "17419874083634480896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "12773693193167844110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "5157249499936659040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "4282661608732125403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "3159147743553063163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "1706927777850488363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "9839670675413379092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "6780215829176686721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "12972634653821069685", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16129296588866116913", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "18202222342562516071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15426960908024585800", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "17026284168840448378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "18118237182023167949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11113256687741667688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10555597973766215754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "17517495652165026573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "1832310305089212990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "13855438905855887272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "15349944413643626251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "4738743763536059708", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16611452077660879545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "8101977280003030465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "2012181953284568566", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "2969389503332309296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "14515066741400300669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "9373353053843326128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10023279637210292010", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "1103204698908514224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "18092842590142527927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "12174571114411168588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "14431607479949498164", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "10279778381617181802", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "4237276338897143680", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "8083672466967374860", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16705621644424684055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "5352861363832390974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "16945184617367657570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "2995134938466176198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "11706378390483804857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "7958459862276998225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "11703557271443535142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "5020788604681810984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "15217183882858251099", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "10650698451740924172", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "706370730287471796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "18199526506796726885", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "9269175963143039426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "3691705516240577130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "13472532612464340803", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "12388375914105990324", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "11582534256623549131", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "1653274345637156919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5893940382830835820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "17700958439420868719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "12730339458081890990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "6631816968511312100", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "7000524935770116969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "386749666417295495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "7162575953766465459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "11398019086259011063", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3041612155708729812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "4274801141127703532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "4865023158176874622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "18424912460022156378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10408322429232132983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "5277400567128489977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "6848989271874647093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "10085059621136526248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "12962552332511702682", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "751912075185318190", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "4505008254511324231", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "4191326605459754690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "9824678205469832038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "18245935804520236353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "12309132521191764927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "12843671306854567956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "8275277322582733101", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "13698389420396031586", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "12949204491386872217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "7370273921473161914", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "941829593638869991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "16206791915939407806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "1500571771538985941", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "2095802691829304676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "17542414935564676110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "12380856644683171627", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "1451466106918423837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "8071957466247137919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "11661208196482963286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "6635217802203685464", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "265124365266629363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "9513032457323269513", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "11814740669468421049", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "5221320470007950766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "14359530849521980269", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "6181651715051152713", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "1450888744802985214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "2842103889477438816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "14006248791647711759", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "7072606962946873975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "3599823735065658574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "11311859068168414878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17525531790109748810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "16749148369456398030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17556238490521153146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "6067904130482758510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "1791615587935799399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "12985650543127289023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "6714886136800883594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "220326805056361171", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "6777045876155144709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "9454512817077883797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "14011124615649605281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "994489782629179836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "4338023436590582323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "1152693503778768433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "5994204139128667921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "17243576882981097341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "5524218746051008792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "2669822154816760632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "7179714714302073459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "13002363400738122017", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "17006095064160484022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "13733327241591630239", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "11942736969933408358", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7869779894480025247", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5735608687257018419", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "4346591404756288097", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "805131056816361237", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "16910952799476896905", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17512961503976896701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "4773077837537775324", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12193395770362986433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "5740745357953479527", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9040145293899470160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "12755692101476964677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "12467673564660108244", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "7432142107544210174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "7232326270078161768", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17238880534517721334", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "7235358742317442134", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7548031489690889629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "5040095338370816349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "3816674884393241704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "13919204232414535363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "15589007878875898942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "17711453305763476458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "3501882025888946886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "1171681987783013074", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "17585206779958265260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "17046662043776372746", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "9208964785762052001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "4435224497850514394", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "16728762255357411770", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "2968439898708528834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "11845189428639322474", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "16616945998593626851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "16490405739040977260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "4974320417566990034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "6428098122005804378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "17281826959243966826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "7369903937189508744", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "9111988592015450418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "9119618606914671839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "1711220333751274603", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "597650904461183283", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "16888412539296862194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "3350601287664242323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "9702618600245321109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "17649961873981897621", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "3244675355773468991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "9340159617983543624", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "10570285542015420072", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "15968821946892330559", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "5509395737020858006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "3806131437010910920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "4523064418696274869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "12004552919019936392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "18313088176414428990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "5649150695527000655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "14985755375924972050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "9441060601228656341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "11421180829679625737", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "15770767768674603174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "12055647521556218046", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "17908444616754154471", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "5568753513029409478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "12417253210787537988", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "4046830923427667342", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "8108933468437926367", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "84595904778810418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "11756650366229979428", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "1617135706549276688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "3011188207492335920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "12450814729547235386", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "1157947252370351851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "5374664689223295796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "18215430801133520364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "12936220888307335332", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "8746621720912032145", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "12003323477818208825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "17170858505976681742", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "16566128345135114558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "15690161340392005765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "60267878504897170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "3501667344669686338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "8690196189594920365", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "1930929857644673460", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "9671459469252116568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "3266557807508325807", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "18041177945345031826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "18267428053198215471", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "18417288692814472127", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "14031009077471784948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "11666250400445971335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "1367483816197881270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "14248239982355212178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "15820359925623438341", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "15216108478837665623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "17489680436564779197", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "14117801387057507639", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "12831123539633580270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "11337525286386930242", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "8431759922045602848", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "9601412379897937608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "9152433123828445089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "3118602494449249177", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "5159738930501638535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "5060012838564094182", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "1905758333157310570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "6870942166356599956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "18067291256808591467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "2826762745628486040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "11841034668170849494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "3034482898462686729", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "15838113905712517735", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "9407646138658641974", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15636128989267984459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "8409488188696700816", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5720964268093705079", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "5922142661777925178", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12900949103593247293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "13483088320871913126", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13960388312976163971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "1843555260471832708", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15739278428190392018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "3868149953087814447", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6845814820599174031", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "6203765709597125063", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12871555773123368130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "1237920404306733800", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7669403041163460089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "6791806088355877039", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8561261337239934159", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "9580986168276580598", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "4708035980731751007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "13734043898517059207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "3177304125602972370", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "15727611564408173858", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "1632416005093914709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "12253049204822930675", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "15078168059698267650", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "12522495848240087966", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5074273865983613482", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11936530628363072904", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7870154008378361670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "3774285301357006334", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "4848143712599565301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "10316451248440741901", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "733956743303342862", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "16677044352793659175", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7075659071934895087", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "8803037667261582905", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "12421204749289937399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "7330202944390548890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "10753540518493641553", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "9999425239167488495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "14001406016806064079", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "7565867291827884997", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "5941298590926032148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "10130171279527667782", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "17344974951998490453", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "5550969016335082071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "3398322619007806698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "11356842300444410831", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 50) },
-            { "2623687018437195679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "14077148976508649021", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "8272823732258536202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "2451712485584835395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "8057302050645780813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "7430073011895298582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5095827462645341808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15129834325410878425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "9660812093766156608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15781622938833984014", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "1089679781525023551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "6129602738379919488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5287076386757143976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16076153317792960383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "2108296560864415762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17006655627343469372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "9404677451270692749", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "1372939511728986224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5311718276151327830", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "529543453251381109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15591167992985613695", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15026219694198820614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "8258382025812748961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "14810839157236175179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16117738994809548007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "659846949368492111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5211191663202250117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "13418701036204748812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "9714764457768279762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "17310332946322628458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15975964562807570772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "13447028922679236865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "8337820318779061494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "18136765667969393174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "14821616804286068969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "18386376129938707290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16609136488331186895", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "1996860183441418841", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "6491244517639245276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "16312223896859176991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17833517350994024381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "4226968857681929488", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "5141753233513623264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "6860503758000008398", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "16489624657475712467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "7862815466573236157", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "10679760989906275129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "852092858392507925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "6996376303337512293", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "10978173291465325823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "6670327979947471550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "11318913630213187720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "123251351612308092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "10784073615329190425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "2261453441277654139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "2937907409658060025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "7852144838267007144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "4408772370026995920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15411474884532403722", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "9462315044265139531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "6419580456182610836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "12277470820821378855", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "16865879032845300007", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "2862999234347597091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "15447513376965243034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "14420809655798184553", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "12954154886708228545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "7575634241190730697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "2344498602308448450", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "4304041922043496030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "10971070835319242371", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "4862529593282936100", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5312140481706133684", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "15522785615618973614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17798636687709019154", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "1938086876393565238", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "11897113890115321056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "14363654136811880073", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "3928266232090746643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "15882969506682501496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 83) },
-            { "16426179645101678763", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "18174857480705846286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "598390166442977699", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "5522698342845820411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "11559360678008060513", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "13184662326021747000", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "16037141448095945650", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "15094664469997373662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "822162932339827810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "2597453794298356435", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15851356529373376076", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "7966454753124154534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "7311120574972466702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "16461809076899645037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "1591199515536783245", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "338716975932676215", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "12165079289914715018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "348058686961206025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "17635171685500922207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "9643408025778914022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "5145853681977610916", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "15155676074658242659", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "5269172622193124300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "17037462814585846902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "10100237101982273901", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "15322609677356616580", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "3399406641489305996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "10187930930336324253", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "17252589865292797082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "17922279129043570176", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "6323083153920795679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "9277176009071334860", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "4313392430539923574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "10883341041912056319", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "17310409067211414565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "863057075064640334", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "9131235538209388787", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "12868739680413736657", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "15901724303713479611", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16944335478353845609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "14025235562200209723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6556424924189200804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "14398854364550406668", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6577505360421510286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "14098811155652990436", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15530407024531326375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "4466647043226271996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4121109463284708890", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "7916244303189113815", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12309955719964788034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "10133054058562198093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6294240435687565243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "10178145641713631806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7585184325339753737", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "9222744127882324405", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 369) },
-            { "9542325095876448686", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8155268141318893606", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8541982562061181756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13472577372534605883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15980348884716629349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "9737565171095493297", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3622409603053918029", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "5657471280535146301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "17025324057045572535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "818998169319147148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "1680468564927032670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "14466032674083938714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "73865742350616903", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "13833960927635646899", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "2783577080556699089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "3563872903821081702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "4387041763614917736", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "9714508918051740792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15412447128995361859", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "5965451243366505522", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "13856271274572142709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "5156033406916344703", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "1018687388655376483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "3779229442395464456", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 102) },
-            { "13448845356783404653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "15578456771467281881", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "18302892230881285207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "9737833587413114584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "467975197394411990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "994842991399671507", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "778476198101178556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "4769003637955328938", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "4914474312076193952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "4091702228990140696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "7602222004475424358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "14544219140091420262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "4279062247055842367", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "6603778920476932267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "4959403414256988744", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "1425953627379976115", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13477548641580029772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "1963081583851864291", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16393176054374397767", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "11132679855317294753", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16000753982895054944", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "2727175120437582536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "2921118493468368908", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "11626398907755088688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3224352307778512793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "7780140599533242850", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "1270307036687208396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "5911282942658469852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "8809017515482311843", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 10) },
-            { "11655994466278963438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "6981537186704688907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "7903891232234389925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "4229105529069729944", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "12796777049340516563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "14289048840489035546", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "4239133538073498792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5103094815475470596", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "8560635685184432720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16264774056719724826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "2571882179292959757", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16758962840329202004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "4550028191070279999", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "15661322183507404821", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "14650567822254940018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "3755253206085028904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "8751016391945753900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "288853243482418538", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5047419871737940985", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "8819268903800581706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "3746573775462003750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16286085532892593349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16547425454653232058", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "8195881973746570408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "7712831597869354170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17035903590837750750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "1907439276166837309", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "3036808833459559381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17928043901784474130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "14667209474639064623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "1701609125136907870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "2140514316203117958", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "9366201112659847392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "7808544677773370430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "2251029128552117936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "9529614587861271730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16811402686462277562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "10554266898346470422", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "7817036102984218692", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "6329618009202266591", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "16936366288366370882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "8025053805734757314", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "534032316469702287", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "3963106895592011725", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17994361454416813294", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "14902389080201926109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "3796274347773622633", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "1306339989221885682", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "10900880512948479338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "287386909600391846", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17542176922797334839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "1081962464388501987", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5831419373611158773", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3179874645565098825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "14906458674793172507", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "1934379409955686502", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "10178951466584845110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "12693511427898130707", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "18137106379929135901", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "11619548409913646265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "13317417676446624018", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "16710651492402564794", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "10967218651864700933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "5381578460674280089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "13026555349791486777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "11913020016435860608", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "8260130048649729185", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "14133958262039763609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "5585398540591396124", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "16442107352245114876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "423221712829930726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "13550435052563656432", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "2440366541074371090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "8300655194765375060", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "13163146272900339330", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "5406129421969383274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "15118142492742177336", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "10727592780669452048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "1076005730007872492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "13699740641705514374", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "13054405729329143152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "13503608041359512", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "14385185911482960528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "11215217005872946038", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "4099859307693687554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "4408600136502382976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "3037042229494600258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "1155389358857780776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "11461581290174106570", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "16896833230469488924", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "11469881811044037340", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "3003526572122876385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "14251848023416168295", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "17248756229500447131", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "929378940515745198", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "12962558681443556219", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "4481903208484313806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "13558618754911056302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "11455518069358829249", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "15890473622821659630", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "6942622405269419082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "13890118723041457532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "11292995457386147494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "5077214229434392730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "17774424004510360936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "10412588668458621135", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "10771803503544737080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "142650579335909103", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "14116800584981026541", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "12995903177757437362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "6143200133853000387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "11893541520830049036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "6310724136390087834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "6391201577234440562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "12058759356433220258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "17152614235879767116", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "2111669705686676421", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "7333511810266504718", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "7397341452130124383", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "2939605281692583169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "1644335606100150388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "2394023805427701338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "12531580106484042446", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "15586047342916704364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "15779837958180258409", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "14123081378489325832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "7818381040882768404", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "12510951219501865365", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "6156831095718536092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "3568514382399560386", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "12065769091972094756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "5321698540631249776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "378801963103874857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "2149582237161177965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "2770397466252831892", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "3039528482572243879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "12577421746159122264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "13553263424160050064", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "4021558014531645922", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "59356084516953804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "1170380397764345558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13094402291968806996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "6713985030102340818", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8354579049246302728", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "13815395589135469450", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13558656230312558247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "11666226259183201584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11451740938287179908", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "273242667845386507", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16587061389996963349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "7119182041840303390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16292848987976256449", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "16437124655147660375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2495655464941634884", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "10294610483561043024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14403132596827435096", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "85050336704401597", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4450409744922989123", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "15528692642731712121", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16661843849495077745", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "852015206582470545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9813748068195103720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "10544034939133448916", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "226601879759378771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "16432425079146486467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7274179284676568361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "5184121466994451498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3538679039078582272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "9920155432685318259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8859895010324601937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "14026537760442360645", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "14349625788399542568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "15065019229949449623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14115742296883450319", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16748662918272106932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "2273992727647793692", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "3190494353583341446", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "8837721075413149240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "2817919813339364130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14263790627243107300", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "12866217660635921034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "290134020607738418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "17207560805775399864", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "5245526691775741296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "4933831571091731212", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3872151366780051246", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "3541538046227217664", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "16182470664818268848", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8519354640245415816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "6222595759158615206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "7201521533301617290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "15497797842820949408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3219408878901707426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "2188101366183302888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14079654309452583394", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9250410390663336388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "8787438180071123604", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "11799179287124317845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "14206076551739831333", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "9468684953949274635", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "8543619733732987550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14159596290442764023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "4378422094110940766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8505040075968411726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "10914921540144371519", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3515437649977762166", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "18035673326929466074", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9390478179772073718", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "6254141935545262078", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "5955575949957198434", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "5600128039063009632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "14114380593731243715", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 10) },
-            { "10728212277329722684", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "877436308867220589", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18375125668176498051", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6767245864232675168", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9287404618748313247", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8728178019712933221", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "18251360413872841969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "18271689282126907793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "954796765467489259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "13597240991532942069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "5079055505117153635", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "4135003545872878882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "11883485911218628865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "2242915551775617989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "10556089809203693400", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "3727142736386026852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "1622880009460832832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "4437258459981739942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "14691372262153587653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "12181607120522804433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "3159681096461848644", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 287) },
-            { "6729785110495533200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 286) },
-            { "15322019609805777935", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "7024495439434892956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "10416622008071151225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "5796500397424307442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "15702382940521972117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "6093575518270471235", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "5805383505505929391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "1801731858063091191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "1559798212423183813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "5594180958505308003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "4766071144928072260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "8650948093564284852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "3883845471211207871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "4366168099274266975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "578703329577922869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "16863960779539003201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "15450609897480659306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "8203550467004532364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "7431849514656037251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "14484890926084856480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "7777333052643961206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "4424217045094988504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "7994179151788368291", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "15192024816519005250", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "4747159205186229582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "5485971317082563152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "18128162750557822655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "12421707187947291166", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "792684262493086891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "941626985322260281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "11868551452004726281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "14352303529756685990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "10702234389482091891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3895088069642140043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5334566325056222430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8306337702797456793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "15720507574336564201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "3277243911383750280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "18150429561058646714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "11169292427557543138", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "13933912937625580405", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8792010676469476740", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13190888313721073437", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "9477562342190423343", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "1202292109713947702", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8640150341228170279", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "12757611260347801001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "7183578232279711009", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8984436655107983227", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16397733032387984819", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16364494883229084045", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "11800783548769329949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "16065744898134487748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "15800447082078291243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "10090036431487700311", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14045927407431718832", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2162882863309264684", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16579057939215877904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "3988024997010367546", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "2066731703492755469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "13781423818051299677", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5211831143687501130", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6863331059471727622", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6403698142681887543", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7481256533438761028", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "14091610802555875119", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12024143207855886580", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10170577772376890221", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "721174714308243785", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15809639778580769565", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16667887002111125871", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12790570304622911607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 185) },
-            { "8567667881970262923", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "10576856554114055028", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2777318471329665162", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "937159502066696999", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "11087413527078604815", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18186615266760475767", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3833510944499257797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "1218323229202187514", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7683334381958571864", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16773645387243701837", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16958329690837977102", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9452470718398027950", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 185) },
-            { "16511393582666965704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "3216793152416217495", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18416908414174464784", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5498839261395459224", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12198263593657033426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "10014448860206587805", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13330734840729670622", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12676167240795292217", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4850497746076450913", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10016815108730511683", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17948637243158994878", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12259844988981080505", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "15078590909693331731", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11988285441493553006", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13851240591038949807", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "16588325081458426169", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8642107585829380438", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6219075471508685758", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10546430708947911124", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "2613462626256090659", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "8295126647635181949", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14213516751025324346", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16509472637458153234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 166) },
-            { "16589607587365212240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6988674007771237080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "3448477246688526708", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "8507854696766492454", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "8906588133431586825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "654122557966242717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "10196332102593337214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15831600396403741571", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "17808913959977434594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "15548971488532746290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 188) },
-            { "13468713306678453952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13613399861925108148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 44) },
-            { "17802514063213000148", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "13093429681061786539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "12247991248100147706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "14491949194619001237", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 165) },
-            { "7590767013583950613", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13210604117940125947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "4670443882075998209", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "2857337999074313592", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "16036386660666696362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "755414184406250882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 63) },
-            { "12190841837604350271", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "10292243973236220688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "17793292063552633023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "7605139219344415117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "787363431787954804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "7000486794832106857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "13608239208821071914", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17281202179589913619", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "16985912104363932350", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 62) },
-            { "14744368497944610864", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3737552767159920174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "3792945601873900927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "1364546124782880196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "3689722043202617487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "2632535010129224704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "10968768803038046390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5353552956675518468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "7866128397931438774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "18233660940545931789", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "11670430946096342056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "2627779045483019709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "11066913713501760080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2552187713769926425", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 41) },
-            { "654821507679356726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "7606728651572102823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "7549378486471456156", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "15410074937424854348", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "15114370307779942381", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2040762223425679479", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4803370483104261655", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10415046594066474634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3441335188113424896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "9277610800970567810", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17179609670678746034", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8251544171504007740", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1353170363915443814", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14540578324750869319", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13471752029049484143", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9062774198518904260", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17917978116807564183", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3017411837779243878", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12992061224471212714", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13161997040644039778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "11724225282274130518", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "12822126914959112382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "9423958333298993923", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "7307271009495440764", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "17746215841755337461", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "3976736548270395981", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "1192279884248226739", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5538883245745495145", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "1173986078589662704", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "11031358859656806724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "4238885454989272754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8943913562339525413", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "6931953332823066530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "7799984350284425885", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "14204609663091442879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "9091110033424983286", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "15829095120243431195", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "3239033622277917802", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "7578177053220150569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "1089944493540593798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "15529757761327002288", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18082422341304348326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "17219920118109316867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "12026482841341343242", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "3070859615622845671", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "1778345646142852816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "15188570678726970998", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4750513665628842598", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "3372770576629463160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "2983038203471784211", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "6673966852801136416", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "8792202318168046223", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "16441830491664937048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "1419073145594317633", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "17525564757769958678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "13468081302022888489", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15914058104244750036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "13760645810144930270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "5963901433137582265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "14668725050395069435", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "12112853999307505628", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "4161612746310931789", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3388752887767453958", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14046990030104971367", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 162) },
-            { "16230621843665445228", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "9274179337770060652", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 280) },
-            { "5115134711994944288", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "13898821685774165645", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "3007637520820789085", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "16294825599850364701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "14681717813022425567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "4915831715914920982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "12894240573737168362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5448537627319798272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 148) },
-            { "14389915292223442327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "14274685812676150168", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "7732899312577293959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "11956435900037329302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "9263063714383940562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "5824801192141531089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "5608133987357542077", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "15392077168521832549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "16446533347502650316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "14762599606783897222", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "709835724029986012", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "1572991986657256775", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "7398196853452900099", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "8140094412609934765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "2659031931257084418", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "4640028527711211109", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "18172711677056449158", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "5183231560876991543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "6821855018718422278", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "13237050834496100264", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "7164580481046523192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 123) },
-            { "2490155559809645659", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "15430549683839591544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "4553409514380460123", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "3041752019114501584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "4161001033681779582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "4764776977138392550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "6882621854468565774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "8881135571874888085", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14038261392627717712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "628191607060767879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "3511588484597779204", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "6904130543085920483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 41) },
-            { "7924408980408826942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "9416186718345824095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "14719421757340260468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 81) },
-            { "11936419502418995274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "16601702334097258697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "4800587664660105589", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12501619443242354860", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7104309382120208659", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2321148334382088982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "4914435717288687793", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4104562704039821482", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13308187548669026714", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "3603187029740446600", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7338229552985076723", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2161052921317193579", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6104380778870471127", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13710319251108632115", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8096131027165540886", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11823205954749139338", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13403161389559730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "998876398773540321", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9280431727790048190", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "1152691534728260611", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9101903304994333336", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "142270860894725256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "621915374938805401", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "15746620724134970969", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "503369896500284129", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7585785802379042424", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10486348549691280032", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "5758133252959371492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "15117880293418979489", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "9120377367517042357", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4278280309700908015", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "9144487908815767824", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17408275657360833363", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11820789223587555410", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9232653317479846765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "18184621367843960190", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "15059549186302099880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "16765994345605657100", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9869959062341950047", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14343008518525689150", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "3202085450628781999", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "17224104246148265328", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7322472892320910654", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "12480527132372884168", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "1008476023750261156", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12589440296742583335", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12604104383683210104", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "12782932626966309185", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12946540633035976364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18221867262301937903", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10171373375072694210", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17791024851737594885", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "959260710517842876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "16988275131627316108", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15048584393463312977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17381516856910544374", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "5336120047683197088", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15897477855246170861", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9780938731831129283", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1473214668483422172", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17515573322312447679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "18356980026934328781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "18077281411861416889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "2543041530639980505", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 281) },
-            { "16370218798911151331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 280) },
-            { "17316626950179740845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "10414903047695486119", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "2809950092498355574", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "12011982029561277581", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "11267742746905371769", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "12534001599784153836", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 281) },
-            { "1882052795393187384", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 281) },
-            { "419783127503173016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "14211903923555028634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "10892706534058849825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 286) },
-            { "2345023488044002149", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 282) },
-            { "5754844816339228920", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 280) },
-            { "17015791782274123780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 280) },
-            { "3706994659266083979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "13324157125165576832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "12014527187730671229", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "5170245731599664670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "6854611304056079417", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "1954052357826969119", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "17824431042110985323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "3603706453982734995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "11992353959766718397", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "15163327502374403643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "16758697697363920520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "10930115765550856328", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "14418429155823196539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "1628593159980574595", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "15675968397825708285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "9594594523961285945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "6634330132674952638", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "8434794604559592624", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "3150231129728961455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "12545558125736154584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "15485701086886851362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "18005721959893562716", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "490233152678323691", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "4073467095502162430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "5801429077171542466", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "14841539539334726292", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "9404953235624894187", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "17995371099806008878", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "8961138963663532667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "425744529089575241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "1316444335300814745", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "761169277744593430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "3325727286860556323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "2526832080529662683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "15470013032930986062", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "12255528292506999241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13119479079474639169", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "12813978452097969536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "4991419288164762786", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "18210370419559876426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "1616603916015535857", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "9748307611165615848", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11147573971701279689", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "10865695385270390803", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "11999246609107242706", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4118073384938355655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "12134858519320245809", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2930898141522848681", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4190912926126844643", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2929190644951986399", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1126499865206906037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 114) },
-            { "13483175684542464385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "1920070013712913772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "10787747981914307179", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7715649642603303319", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5581428998642936688", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7532088618116521936", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18126685473408206840", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 114) },
-            { "2878824076934639346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "6548949901446632697", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13609660900720370993", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "883436333317162926", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16293465561256937726", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4759671642533786591", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "4903592553439092472", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "2581414750854621875", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "11627532066884923848", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17983556812075120553", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9099720270958987421", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8106738346643994005", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2554991397391195611", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "13121297281694293907", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "8220168481755031959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "14502856487639608696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "16871004845988227014", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12015336418727455195", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "1984152634309440563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "14312549767853703411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "403634422724914329", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "10751536136794650334", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "10135458965276110244", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2008424849669196225", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13735180250757239202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "12351866693978844266", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "6788311046557489996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "14578867494693499627", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "11158789938857558596", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9616636708366808604", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "11069983292783104310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "708747442142592697", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "2780423409483867058", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "3160543867929843861", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11305232900158601613", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12339692995143159283", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "9316082753126682958", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15991460001131903561", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17647962002015093887", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "4897448054295474302", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14184895905338394239", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15112599407339712681", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10486000767830001094", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14999920879568237166", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14799579913711096584", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "6450532136308941035", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "14962768577232034246", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "1452597292381229708", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7104756264011682902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "7744787957569714828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13503688893307029975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "9133263538092913983", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "1383899865465106141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "11829442945690098558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "12394049027081208902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "12159582810513550491", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "17738299860390552088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "797387385159110695", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "8757900457181374694", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "6048964584602891448", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "17882819773586674851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "17829148383265978140", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "14711697456265712456", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "724953082687879224", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "805221045541170643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "8241070786700614317", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9191832520273617003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "12408889192918919210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4885944395876887711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "2651385050387738902", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "6303682540621797774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "905780459938651623", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4476928353532757380", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "13681462437496627948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "17243648226968859637", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "11192356850081328892", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "9323825370872655346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "10000618285883395700", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "6418327009347170687", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "8528750110601691390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "8061914949376516780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "12992194515157698316", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "17870874477143985774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "16234606052818596502", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "9148379585489720669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "9270950131920019932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "17001502418583498926", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "11163107409437069532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 80) },
-            { "11465965972527519631", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2534408579674556441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18109284647478027063", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9849272539053219052", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "17382660912493284320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17764033613416389758", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18431306649860116380", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3699344686791530101", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14151747022287993729", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "826850797666395121", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13486084204140096478", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2114599010013594942", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13251091004269229867", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5240706676373148280", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17490188677223978661", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17854208422879910606", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8767817856303586064", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10672380526821947133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 342) },
-            { "10730222715353420212", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "16683169947375504066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "2964705957088952872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "14885031472057965707", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "11308583200952256245", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "7208008921815475393", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7113777272518482528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "6334639534663495263", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "10151922632636937118", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "11560634267092054110", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "15914107501176673997", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "18218755616248669884", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "9987415314864002460", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "7667898603371717971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "4403753181729432604", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "1040030752340209480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "760687670112194844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "9803492989444302959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "216603198215625772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10899110544832584656", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14447191095937730964", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "11130439225010714550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4325081100430903742", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "4216958486055161753", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4400247897123856252", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "2294800960010879540", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "5195511638783481084", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "9545968464906009869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "12932635875905153141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 220) },
-            { "16925721317097534009", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "4398371999113956082", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16347412180100581330", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "7877332346656934022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 289) },
-            { "6323026044750482867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "9761573038170759563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "12098146032672599222", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "1403617451623027879", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 282) },
-            { "9058996149754556268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 282) },
-            { "5864250949922222051", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 287) },
-            { "15847413004526420496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 286) },
-            { "3199841714087553410", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "4957638663977636791", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9437794960375526230", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "9475130054420979752", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "13312514874803986753", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 304) },
-            { "15997754881872769378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "1941341635794709702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "10157866834809927320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "12308359047798183133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "2986189945936592561", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "6928835003016610382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "10084794570892043447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "15417738436777481469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "18377298651236993830", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7354234812009979811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "8656468860180713379", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "14472187692485966933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "397770940444464146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "14258499419905714808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "17599396373608265826", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "12935563359569230797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "4892959859293355837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "2802810524370514276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "10290107543739998181", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11587239927319376658", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9076758673133996959", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10432365444137108781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "13092232276822302626", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "14896875712028630045", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "3236003754884728510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 286) },
-            { "12181889163404078773", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "4856470441452830056", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10022487076451608714", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14811603003184578943", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11565861421381730304", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16577611471466452776", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14616969385577243225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "17921973525603585874", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4617809377006148936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "12641170321047008726", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5940337324384948573", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5738835498104275267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "3499106702307464480", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6942016672941874829", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2173720698351153121", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17201365233492366678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "2877521658768725103", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7689320135952025041", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12031180482028822765", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4717620775314557374", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 268) },
-            { "13800760323805415740", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 267) },
-            { "946479876892100082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "5039037192630609823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "13839116996827687373", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17037416417174266088", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2321767794934000238", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "14907097142953816744", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2525260242689556544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "13328449155966085543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "11856266545854830143", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15993427814066246646", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2100891581797371600", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 370) },
-            { "12242618640422208652", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6133592828563353516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "18232278892738147217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 283) },
-            { "11992625045241269569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 289) },
-            { "12601126285773042005", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 289) },
-            { "7457899998356343871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "6343888265369366589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "10791067159964399241", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11327097771110264965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "5245308722062496788", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 296) },
-            { "10792503079194374004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "4818231379191523896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "2198278382394812839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "3800011935243649447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "9631545863582097486", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "1779941298820543013", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "3621930417735246405", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "14435120971846098308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "2893564501191050837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "8108843303778211282", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "3682813162987778705", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "15494543914974994991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "7565221050911842393", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "5629670679897666607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "11754316727756881612", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "10990741293315393791", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "17024388383581997032", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "10302338806536775954", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "7915318733663535312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "13702692566238948173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "2909728331855309274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13071545223094862275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9631481972809246378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13540002981450186147", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "7076937538747704750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "2041212737963974230", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5308128387928804050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "8619526128410675593", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "4792351255949877935", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17759505449240263390", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "9584652777232392944", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "9999955037598579164", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "15961487889420208188", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 197) },
-            { "541817615957967731", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "13853630125050609175", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "4137755981477177003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "16949056117405140365", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "16014822406751503249", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "7700321970687976931", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "7056293586529818253", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "3814584042139408454", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 313) },
-            { "16992405636352406660", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17442105631503326136", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9606639214735570069", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7940369586324090841", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "8444259010311137762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 269) },
-            { "15489746763312425915", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6800893510381991731", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "4156384238797998294", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 185) },
-            { "11645116728396933125", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10912495395422146386", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "875400109066360897", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16475247464223458061", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12700372241799686527", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11640225461345567929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "13183380647506951324", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5242271874488296527", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9488453013746383896", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9726913113016874092", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "15979956159651515122", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "9947449295659685973", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "14230493618724018658", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "1704404203639481753", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10404725818204494388", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9767294641786972359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "4282668574670785584", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18043340998699622388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "7148542290597073512", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9040046051053703359", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1077773457856682663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "4716188972902735458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "17343050785312683560", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "5687802882700097624", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "3524531620118359828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "5688478347124565305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "5504757952698692953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "13800387305792597325", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "6574971185849732667", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 299) },
-            { "10573920781439771673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "4992668316921598993", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 301) },
-            { "15778834188130183853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "3062101811226530720", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "428659495445490820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "956022649859563080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "13410850301164057911", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "17423645390621980919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "7802311886554362782", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "1172103288112689821", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "17353894529222574441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "16431857516454692096", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "9100044555742394133", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 170) },
-            { "13115589642140732066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "16190949264253468961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "7026575758396092435", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "16761856644242716357", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 167) },
-            { "6341197991729122563", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "17087740929472936216", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "10795104632256101599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "13327653786981478088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "1096671695414716274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "10774528268153772208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "9525853014023664813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "10632020369698615114", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "3234107167862677811", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "8708643228914766202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "12415368596357091523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "1028160614515220430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5927467766675317093", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "17742192339816511494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "11931568365395665142", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "731825454731954517", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15989894214714907271", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "13478984039708550410", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "15773157615731010456", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16772854836230971016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "2934519615045138808", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "4880150897829846031", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17889864541794448203", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11768117585574496387", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17906607354577138153", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 198) },
-            { "18270587701371596297", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 199) },
-            { "18142462471803295391", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4815047491742617397", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "4513063773753763458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "2984726467649419856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "11795826875463204296", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15675903059949404837", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15817443774186015593", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14558572801374416278", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15555083739490354527", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "3854114166348568039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "3216877571075556066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "739676584505475609", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "8303211644727914658", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12908594497114706897", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9918371346247634545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "10893432143734884603", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "5339985303398206057", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "5941852872160795604", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "17634966178519099371", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "18299254635579957284", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13357365044448426880", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "18135307303959376082", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14764715930784496165", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 227) },
-            { "10979362792894404338", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 230) },
-            { "15006321421735686121", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "12370729327673204804", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "10722677916294015259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "13454265023861566476", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "7995820969034996638", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "5275016494706355806", std::make_tuple("convolution_gpu_bfyx_direct_10_12_16", -1) },
-            { "10947686124973711385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "3349519148124496343", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "4003433148846544263", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "14973431782875808802", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11948858355027908365", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "17951403431757222177", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "6586872365879203192", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "11718418772370938734", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "989564341557094953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 87) },
-            { "6942049339361951275", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14555883089089918919", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "14808895254077106198", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13830605041347009953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "11955992313739654625", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "16921026268702574340", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "15320845027635796583", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4014667229872705228", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "2438374917504708831", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12391792381149655331", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12864558900883069118", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "7209217811135076623", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3272017687600371031", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16067605128297748820", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "14150012830816329527", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16218339663410630711", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2089730611490367290", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "8907982643256296667", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "804195263636995800", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11528417522960871233", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "15378025640603637387", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12860222041026638681", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11597391933877736800", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5042176052323856983", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "17010172246526353957", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "938848188161536107", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12725647706191463348", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12553441041059632729", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "12782191856884962803", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15824189967727245909", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16027853590391209100", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5352061583962489055", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "2294318010381635693", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11055049031355432623", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2349007644347065353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 111) },
-            { "6146876760962332928", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17434429579652310107", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9447458159095730492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "8655883535274781128", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7272538316511343863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 225) },
-            { "17564338309805484464", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "7881187047171099732", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15579919505002150556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "11583017348580874022", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "17915846724151945664", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "5319668297345215520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "17208186152576814861", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3633858263279042265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "13853056718266488510", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "14759179293743468995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "16995873636564597028", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "9438739171104456179", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 224) },
-            { "14429081455612806819", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "9819596940685093690", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "9426665763007611385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "794499287296495726", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4980217316169616839", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16105073808368936420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "9530116228032101908", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8527193566719173253", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16566214123371867456", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 191) },
-            { "1470933384474984858", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10706267011822108376", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16081386644309102158", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3571959174116404960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "12566041126392848976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "7603872175048237237", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "18235209540858013173", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14316077757957132678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10816637153861630723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "9175450649281374948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 106) },
-            { "17370158297470557151", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12051595062513871723", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2967481531952454828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 108) },
-            { "12085348936192462321", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11951606039079763598", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "8769060267707904998", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17104611871050967957", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2103882464623009432", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2659712601063515059", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "9759380701896779097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 346) },
-            { "13842309033760176194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "2418288192668085805", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14994322266840011040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16402312692470500253", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16955653765071712611", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "17830290099875088207", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "603883331897298932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "9731370183088819573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "2296581485980163665", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "15133468875250992696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "12972798847556569913", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "17446505012657609153", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "7223801044761006523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "16511749893955141055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "9485825829394109934", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "8130920994920685157", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "3573490922300056520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "5479761740065152589", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9480653639044390919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "8739347545059610410", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13459514533473657102", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "7824524940405130010", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17796310681498690253", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14823616678465136590", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13816104794723484993", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "846088275031979661", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "18125732229366977468", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "8464582977975377118", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6290317420155851465", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "12696412964119109465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "4994591211723226974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "1036010477232750453", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "13786357802945430475", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "1003101267609305257", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14991602704357959545", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "7840653268996892538", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15488340031228619748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "5003718302026277632", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "7693459946348737411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "10536316961655703500", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 196) },
-            { "10765280349477640969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "7447163906170805189", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 195) },
-            { "9319254979377483709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7843508201826629532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16395067736440127496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13820498543284008286", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12071914115316550349", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "12727541507197887360", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17364712285968437405", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16120988958246503683", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7375461241315602473", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13282951481330978659", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "6181308879301978465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "15488550074426713959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4062706195708729345", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "11604111639041106489", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 269) },
-            { "10512507780534402341", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2128612971571865547", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "8594644182487917002", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "15881381297320383917", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6040286126398028933", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13926122593957480821", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6213386558868267629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4456004887590847716", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9642229389394495047", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "18259656768460999562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "4983880246908724272", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 273) },
-            { "7881579844586294503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "5331173521406046122", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3285520504090196295", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "7143510787416483146", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "18103534417093702556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "9328223957245552723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11706446082856895571", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12625112690264223217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2114232149447438823", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13883044928774243663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "17636500109629107732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "6192955702438301372", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "13970935346154374605", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "9692654253261175490", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "2116913943188857359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "12802517759474139810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "13611054146745413536", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13814086981499638596", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "3106922888635965020", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "10509933181132310969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "17318287523550546026", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "11806402239500046867", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "12353956380178079089", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "875296362957469305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "14912119584313592912", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12494969618927201911", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "6344802942015047824", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "1692411934657235774", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "615341695338735013", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "10601835610089648700", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "13262672660175739705", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "16522364268583242080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "18253784177599134876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 189) },
-            { "12319073009094248232", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "9954050478761346921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "4640696923527766618", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "1436052878894538927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "16011429608661242565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "4381329435655511217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "13972357557211413688", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "13104509059416300615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "10090923790949378407", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3429844423226609965", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "706049518431331645", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17193614571243427089", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3621424752591567930", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11066930104187448422", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "209732971447020989", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16044646335477470657", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 346) },
-            { "2172121470071868949", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3392693938352572136", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5495063314176654751", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14553856088069405595", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "4967444801764057340", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "12160764253455777655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "17723621158215826108", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "2171768477223405739", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12672995204641007004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 232) },
-            { "5622089373755094139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2129726780118554358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "4160656836528944651", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11052732052072367261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "18432787283148809023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "16172528828198474326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 232) },
-            { "16327433707667075261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2797723586312707948", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "8451212914744825089", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7025975403069487257", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 201) },
-            { "8913950860101596091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "15308578014507211237", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "13132804928635689780", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "4465781406991476376", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 287) },
-            { "16266491618150971928", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "181006047500375768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 286) },
-            { "18140951659547259039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "272730229972987861", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "14898892437285105327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "17252449599613270108", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 282) },
-            { "13436376034548670107", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "13787436604877398090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 286) },
-            { "8873614802459592665", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "13663893159182636270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "1361159591875955678", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "5912303851874077576", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "16245760498096322525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "9928406318940388716", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "3036512701943687724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5334291640387922287", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "3002986032379998259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 149) },
-            { "16469788155263456039", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8709632541892447149", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9524303276541517389", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9354818521586974021", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16781127329510211966", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6351572488552853754", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "907036267078333137", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11855070245618904113", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "4544242784357021697", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18218631037214746168", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "178353385245384751", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17658152048177750315", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "11636129433022017868", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "2622434279674583815", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14335074487552883436", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11175955260573469979", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "2732519635571994212", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "13893789954946953427", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "4355933224673863178", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18037918102910297531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "16071723603031305677", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "1697248235682953135", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 362) },
-            { "7843498978148810586", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "6767159196241633301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "5097818987523855112", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6623182990939010641", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6711878663358611849", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "8671491767142900139", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "12164298124869114517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "17089801601582809764", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "75742659105146536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "4652136280940317116", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "9751582946441607796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "16706244336960642883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "12581879452540858313", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "17443356777503458523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 282) },
-            { "939718260623752240", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 280) },
-            { "14131851237755716991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "7474639594232203854", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "14152716242882609401", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "7998930863626763670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "10323345824599612614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 336) },
-            { "30229601562833524", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17788367809717898285", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "1509728225855233852", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13139625572508441980", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "16491532291908469567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "6355395905401306995", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2096779676054335057", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4217179485243909459", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17101789600628162503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "6139574161497189424", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "16559140502701231107", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "11459784003592366395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "7869916853707978306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            //{ "3889519976910355277", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            //{ "12081835728078383819", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            //{ "14923692894655929923", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "12794369485239257709", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13338594271376045657", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "677249604491773387", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2668729552208169959", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13011676362747785816", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4678607855896512523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "4356817283284529593", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1885075753696445410", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "17806712457019493207", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "11862259122805366807", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "15201438563802430490", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "8132521728369930959", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16108573960501496757", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11086699387784339943", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 119) },
-            { "4013707396889204359", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11850332373794932468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 215) },
-            { "14763982961176216679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "8207349115037232863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "3273748387141431306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "580936360000782237", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10682918518101379579", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13178480813522103091", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "17109520309574369561", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13754408679115174221", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "16717713360264747483", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1045854873741563331", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16767392067294252396", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6114241186364821679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "11241838709529552265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "15192230303376521834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "5374969798377773063", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "592245952014430043", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3114869763557037270", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "11254635684957519432", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "16816222375242496370", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12809199739984715013", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5040730152867713388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "10429613013253088132", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15451919862187018297", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7546586420552408243", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "14487682847898298214", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "3106710091841093202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "6458124573210430792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "9182897385081081193", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14462438074931673266", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "18133334552107213128", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "38736266675995457", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "13654816209891478730", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "6263019986730305851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "12929981792125924963", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "3138374672801504481", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "4465701487417893814", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12977678792503377525", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "10879218241103462088", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2221145174704245189", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4635570915184713874", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16075006181495932250", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3863816884636503247", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5440983284868981549", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15428591250165788477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "16567638487719493784", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "18059267466971880386", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 328) },
-            { "10808909442136736629", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "5682190700442712936", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "712165731154577189", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "7469127846325904854", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "5926747396493954633", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "3477539135137665170", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16235115911229280717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 269) },
-            { "17009318615658405230", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9421643783312790618", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "2294026590516781945", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 190) },
-            { "2940027113687311893", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6090625728451718945", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5643908654122573882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 190) },
-            { "9065894438656900887", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "11185156002426041243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "14670068483447729857", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4623542918584461522", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "1143214652021653634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "1434535531617424039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "17025268985366223779", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11507538232733291666", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6149673627320838019", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "16243196137456624852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "8059328623525062913", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3662747857062156477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "314054598858070952", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14122213471825630433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14985236276429954162", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3265415000818832667", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "856877003890134554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "14805540705424073865", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3788462090984291082", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "2715447739580688669", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "7171904645566467208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "10308431308942416781", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8712136292276123857", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "8700574100180128776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "17147293671640396193", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "16474284418841532356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12461575861709234385", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "192209423643075326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "15490478608105402679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "3491333679577961640", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8176012042686275874", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "4282198629458668761", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "689445825453914111", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "969746749329671447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "16833026567865627676", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "13046322179198317310", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "6902644989079870993", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "10987953316324712538", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "12515465135362865565", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "10049571207493913006", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "3926585856863002495", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "11275109735493317886", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "12238674883388043717", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "101401523793806394", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11007944497812650617", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "3240102173773280414", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14883438809987378616", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "13320675959188615441", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11975047184326016230", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2608363732937932266", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15943141845766932879", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15486917753097743853", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8317673282128335201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "10635659193402005820", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "11450378244355788918", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "2625969259447793593", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12207503176295152756", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4625107584562815965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "1997392406402548974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "2524029454785583409", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "4615708568396290002", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5349415632630235233", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16108759090923335184", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11756881293845417212", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "17839839336294937155", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4703107905652287491", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18180820925685532104", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 191) },
-            { "3835286851569826052", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7807983899017500046", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "10294185397756053636", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "5519535335798045279", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "8701248964531180496", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "291868903926685441", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "15239764240622554314", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "15963038745470172423", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "11428599290755097395", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "3180320769716158201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "583303098958523195", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 269) },
-            { "3509487327001107638", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2649192407401044065", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7706714181281908433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15914342421266687768", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1497560475414454618", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13485300684443803732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "14571022040013651253", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2832268621630415376", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "9383182168277796969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "16487774205195979355", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "2226745622763268469", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "13809330759308309353", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 228) },
-            { "11634932044447867039", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "318377908569897093", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7353563160591978243", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "2582625260054352916", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5609922876429907954", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12557015880639217508", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "11528310408333718862", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "1471837664358450291", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7351401242363888463", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "953306082374100275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "15759530339367380982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "13300022131572486202", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15689502054035168040", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "16969463538496570528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "10237524128771958432", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7969848911698660033", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "7130694811424715594", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "8578747191812631883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "5197105253412476591", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3120553928584920777", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "4750894407873652809", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "12667014405537239093", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "13644681270630373984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 364) },
-            { "15602218079503030465", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3950738240651133849", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "9101334153142718004", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15695415285791951018", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15493488989417521388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "3391032227732782982", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8951040603784899163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "13804221028705631415", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1351033666248868977", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "11330591026581463934", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6142707387281700290", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16117448559783537844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "4531222427159927606", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3116068331849795558", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14389719202147508599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "17053671692908867872", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "17025182465337728023", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "15035800097152337587", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16770615142634470903", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "9378269524012289175", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6727930402459775131", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16362857896338778056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 368) },
-            { "7187734276051878356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13253775441326432265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "14733510474010040334", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3336303478756453360", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16352331970945217438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "13484950419220835364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "4674416595144505741", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14559308665571750465", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4542143431130171516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13189392239349392492", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "7009735776703529573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "4220826666482500445", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14792528369891965810", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "15287650965861631130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "10308175009371219583", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 166) },
-            { "2903605246599054308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "9213563311267466388", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "5019077257951332016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "2497756607567197523", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "9285566577169147378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3432296808755992670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "7688176479120305539", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 163) },
-            { "8818070832398055086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "8787816339967963727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "863952266514375915", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "5835634465164771899", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "15101680837342453931", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 46) },
-            { "1116274074896622552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "12790788016297794214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "13538051178827008933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 86) },
-            { "16403423801823379909", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 85) },
-            { "3723613341885592267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "3830703844770425343", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "40704767167309552", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "13973028408397200796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "16561224775421968533", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "11243840588602365090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "14103112843209793966", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "10483664832302187567", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "8100595788531468781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "6620782733027313312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "13526488884846845330", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3534971503826416049", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "10425889533411573166", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5214654427283761256", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13569941893504840630", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "1318571118468536310", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17724604495865223459", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12229574562535756991", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7264274394359484318", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15069906408448814772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "11857037689248685487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "7977195117668583981", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15678385128478075284", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13025361884606488732", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16723478941106779069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 269) },
-            { "726985753660756762", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "586947787345351152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11418379777288974452", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2575631797904040925", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "6288489890578212082", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5649082203775427830", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8036474422877454869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 187) },
-            { "3711525118850629466", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1875764913306932583", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "548663565933738403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "17329287216741045059", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11848462434662954749", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "7581174843529024536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "11334122788337402526", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7868973874302246233", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "17209528805596238905", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7878605163588288309", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5941092474669713339", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "13738760763969959522", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 90) },
-            { "11988546375476924356", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 52) },
-            { "13680926356824317761", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 41) },
-            { "2530317332900569142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "2891736961665476908", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 53) },
-            { "18008552719153887303", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "1299545313185409227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "17907223570737272640", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "6949539207944972855", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11207257238719531888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13898284586432291433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5120466856097219243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4197617702037834389", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "1249137685908951501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "14716719350966652036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "4840004190985490064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1540041682425757361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "3715177305271762194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "10001963042016663554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "481328129206881674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13404888565084206853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "12348135936862667024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "15471470494305051299", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "6181272224000872375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "4701832665603867798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "2030309697153345387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "15643135666029727865", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18180655791734632264", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12990527753120735255", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5303970743736042689", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1596353239542510685", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 359) },
-            { "8040001390872143271", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12052207771201936228", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9942099207256025216", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "60509335250891515", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11499219760597131534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "6726099352298108756", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "597073780328219388", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10783630257421062891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "6988492019664525206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "7132328255408635227", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4006884370026272807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13938466156916423478", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 363) },
-            { "8689206546467098603", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "5644068493155655611", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4867937397499803072", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "2702144517025248597", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3304589333915676807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12894625941923144893", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 284) },
-            { "11649407835105973949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "4897991181236908768", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "12179581684777023804", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2806529556090896246", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "11327228813412934262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "5485749317130402302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "3499243120652875549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "10916647716124396856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "5749536453225343663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "789359733867650915", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "12626014184575881530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "1201692134690347847", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "15249442550355454201", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2598267743388306204", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "7181154048972884375", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10930640103080573253", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8458082326743351141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "584086621952390547", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4754967381316623440", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4353842547963164546", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6131481289104111211", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "517997325935712670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 332) },
-            { "5600807544955072308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "973966345068677905", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8532217744217419503", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "14614844213016502202", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "4126895998426674411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "9700808806849459216", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2438261005924916746", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 366) },
-            { "4056971751486746551", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "8929453032482114162", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "7662200927459001757", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11473442921040533207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "388828310152538138", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 356) },
-            { "1643241486250690844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "11806105193035393795", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8843585527713905568", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "13248567106128518549", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13708979487306970634", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "14406070210216948643", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "15352245788978088971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "1435153323458789173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17638692805430115529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14068780861332616363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 324) },
-            { "6656593119788274992", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "14695781272831602408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "15696910741835640150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15315327794058441258", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7545013298074733778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "4026686872534942904", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "6553736978928374036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "12129572274423886770", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "9723314434598141024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "11031625790234068916", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "1138439260035360722", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "8323445733669842657", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "54019631544204590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "8971115542951085891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "4584970211859494304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "9321208819255762521", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "12617625046664709483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "8264178890341675354", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "5334190564423375247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "14746359019867963124", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "2044363708106765326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "5132761922124425835", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "8141428150264829362", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "276407276027553756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11878734040194151073", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11622925573287101001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3192332625020432602", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "9785114056964539323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "9410978119783758141", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "12523676912856063091", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5912451559447635837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "10264913782610095832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "10309083227104422150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "8500148569566077929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 123) },
-            { "6578908625437515675", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "13762042713029963144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "1561225943337590599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "10917498758625273194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "14335423820860953927", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "875142032423622622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 120) },
-            { "8965747921518186477", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 3) },
-            { "4428101657497677982", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "5779388310240896974", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11092828091552833150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 43) },
-            { "10295330953350618042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "15901675909820977223", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "4894227264080887361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "381149736509958403", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "7962991673727743706", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "12725675221990905186", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17961702508543961900", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "7082007579524697455", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1867337342417952506", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8931169575495985034", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16542318967217020315", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10626341369865893888", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9090828337597312855", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13621339501067135142", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "13754540732991287617", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "6669808855737023569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "17640725195881101275", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6928136130626403937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15047676717402283805", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "1082574490068006980", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "6557428245898292304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "9440117898128288296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4672441137336208890", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14289082888174784976", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 198) },
-            { "5056859994174498686", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 188) },
-            { "16574710115918192418", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15839295895890205274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "16307464696265537356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11910735867274493498", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14671212883301405408", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "12028665820838352309", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4773123925616969670", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "13602140021189675477", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7708321360699824256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8609939102588915855", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10782611933832492335", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "8857763129101380288", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1230262279011217327", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14424566003632608852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5497751772699578150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9541630719145326121", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "10724501418439612080", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "187352687850707150", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3438296636411972401", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4165036357594592683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15106614232165315070", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17477062954520561609", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6664432489777052771", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "3341302541468955849", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "11626402549863483301", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "3522383297921565178", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 259) },
-            { "8651641584737798174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 262) },
-            { "12473600360154597915", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "13296242326766100583", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "12068797674575015662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "6297802534570892679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "10037086825900566930", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "17216583849049249733", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 260) },
-            { "1287490919205560806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "738850098651678143", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "7139714914586273766", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 141) },
-            { "14050124896329573468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "5429130923188159806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 98) },
-            { "7953255701516490034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "6195916781434462809", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "11025471731438443683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "4622514167765722873", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "14680730265621679042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "12141300895511301068", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 140) },
-            { "17106086048442658788", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12707946849050970702", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 143) },
-            { "17154337492545826355", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "10109431802089940590", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "9428176632140441528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 244) },
-            { "52089503050497755", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "12297371032753209816", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 214) },
-            { "659150305191479097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "2065752819810364738", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "13583166868754499339", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "13991205023798493715", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "8939683514448064461", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "18337160891834020517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "1154228007901031779", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 353) },
-            { "15156525717629023944", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "7757331094141318304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "16779678846332091086", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5409924335138540834", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "4149728557142033774", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "6443517114667332732", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "5419041493176804960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "15948383678216076358", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "9604982746455852556", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "15739274921308457528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "4642234334824303290", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "13200151444914751729", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "16894871557229780934", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "9933958860597451711", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "17094948685292534952", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "9762182215179534181", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "18273537339378756543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "7720939595094113814", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "5865480930796299143", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "10058165874008941852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "17309326904418811234", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "5592428580503282095", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "16348402367953880206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 242) },
-            { "13607830451968188080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "9311802150474489673", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "5159470523468873105", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "7975810844103449438", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "11455843788148231615", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "1410630713443793537", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "17303408650780384587", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "12069726772532946193", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "6204183474669103812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "12874626654611400042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 135) },
-            { "13546876216568825877", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "2973436171295280783", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 41) },
-            { "1908809004094565452", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 62) },
-            { "2322559721899919275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 167) },
-            { "5766507688771440170", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 22) },
-            { "16626226341188424071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 62) },
-            { "11709992724966310174", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "17222005830854879661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "14224121742920800990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 84) },
-            { "1071007164550012186", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6719302427415173754", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 44) },
-            { "10482582307328548806", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "407189201971322683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "6531171505861182429", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "879005904827468163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 0) },
-            { "8460847842045253466", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "10488269059469838160", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "11359409533744011242", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14813178380338948912", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "6307939332939714967", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10894058425957901202", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16610284927818475574", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3221469860582147955", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6423785822515265784", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "742689192890486807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7349880498513046830", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "2369451367723962073", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "11690533591656807605", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9205978149692979955", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2728938624042183713", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2781309272856442321", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "579781312141502576", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12564687330941036772", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8421388456873652700", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12177387334053203378", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11239541755868028928", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "12776081190690731910", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 170) },
-            { "5648658688155716974", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12213354854947437262", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5680236635030250712", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "5751283221740229986", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3646228701104397128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "13776178598632392721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "13364676690016875118", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 174) },
-            { "3141773224039276177", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16384186388687043048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 172) },
-            { "14421898375873029115", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "8922929126299811091", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10256831975351722184", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12590922530749026871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15209909241815414156", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8791285622784082122", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "7474592508575297101", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "12068974703657294908", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "10682300249493137042", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "1788455099959676873", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "15225354446874994535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "3226193790517362610", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "15814015810740458605", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4129722446574108695", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "18094205332383644037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "11120846960057008937", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "9195732599757736182", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 140) },
-            { "9939234037869927090", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5898740235388207878", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16694984452720336415", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "4889188980319017094", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "14412158605670555579", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "3463959257726925426", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "15726902746983125797", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "8463615810239412362", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16531824466148265247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "3374410641320310726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "9589942627115344216", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 101) },
-            { "12864204111424196179", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "840202264034382558", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 41) },
-            { "16386955278777720573", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 95) },
-            { "16267682394077585279", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10544411879329675593", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 7) },
-            { "9835739612255048978", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 6) },
-            { "6293403765897901528", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17596685300497748803", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "2150326211917340956", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1587501521145162454", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7561096442572829049", std::make_tuple("convolution_gpu_bfyx_3x3_dw_opt", 69) },
-            { "15078262396281327048", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16383540667048742064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16820082917500285799", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6820284286806022849", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17285815901490707654", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "994182747184593564", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "6642767323474835034", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "3215659303601163167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "54975980454651672", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "11529876081402974396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "10308113903347312964", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6712698149192186833", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14930789530046665855", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "2204178900998688268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17174919737114915467", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15154700439767512396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "14916625550370402883", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "7650375560336513366", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "9999553425206328238", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "14026570177552137240", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11686670048744589243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6678796313875454849", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "641417817126876622", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9622546530872848323", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9194788897910888066", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "522181557896569275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "3332334993503432420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "16131448347558322280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "15924916465272239832", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11669828823444745889", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "7243917162812988891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "17891499682354369344", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14532519639619315651", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3635446784873718932", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "18275601715050791851", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6997971129340865650", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "10722782762733112118", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6585223640997887253", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6205240287062600210", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "17522452942286240233", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6571438978296387721", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "15511138074959300404", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11107930597263802755", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10320711719466983961", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16884228931101540030", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8253823502854784432", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "6025872155179042054", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "10173283505468233128", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "16094174852600023296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "11077876432364512822", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10586018593856542117", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "18436249934780056991", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10179916356323479080", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "1760391741350091665", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "1418595171949196661", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "15967614281807823696", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "15329680728165965773", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "8794896449397768269", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "12151068022697708126", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15959543980008442942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "10861769381993948050", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "3316798708399098230", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4734389463002799056", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6911215749850066204", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "18267175011323462494", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3109104171383198425", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "18136135457402651842", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "11834683513280095384", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4806571630436601566", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14849108908297747749", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8490260671996115530", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 347) },
-            { "2929715823970060874", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 346) },
-            { "15924583510704449214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 345) },
-            { "14331658870024759698", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "6340128090694375876", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "1120455113299469776", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "17268201530818712998", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "3644282167178264526", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "360872770877634346", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16720108310653948550", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14353390922580547467", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9868561386826862471", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17465517455679097501", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5570311824197099845", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "7524311370696987092", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "14070988879848388270", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "8296551195150971668", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14352796912241296357", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9840495023131952174", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "4720851194954041037", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "17515064188391421150", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10437367877444543776", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "4362304842016958728", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "383721620126444793", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "138379779469699309", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3759515057574218101", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2856601829807186494", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "3286330985102373533", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "8159303545761286685", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "4056979460327024961", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "17823133607491820214", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "7969441643457570812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "970768445746568749", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "13852065717057446998", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4342360467977736802", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16336482874764861478", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6075691042233712335", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7570346182940928159", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "12971822824884826169", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3033264172690274208", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "17301887391757619741", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 330) },
-            { "15790005937034794347", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 241) },
-            { "15464327246951632247", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5659168916726488798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "8079376692609682448", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "15160738482264643601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "537074122417021898", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3336076058264596420", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 238) },
-            { "1982176363226079588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "15052577143485630617", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "9314293064351558241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "4958835037528182801", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6817494598328071314", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "14387756025635589673", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "17536308070854915513", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16027456210394993913", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "8655315308767111198", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "4447065688824381344", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "6843617687528352801", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "17900257435531434807", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16789135236017252073", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "13224814158106791463", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5078905972285278557", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4196367396954155354", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7009873605945341897", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7199295899520406795", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16833854122884184025", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 347) },
-            { "14599780481362761532", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "2572395498687401679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "11810221946429451169", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "18084635102736402756", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "59739211822469868", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "5240181393417899912", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "15962137123591591534", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "10989937450490049763", std::make_tuple("convolution_gpu_bfyx_1x1_hgemm_buf_16x1", -1) },
-            { "9798585825695496550", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2362092095402043749", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4444730303823507621", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "487214150851213303", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "745009493367761775", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3806761527342944195", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14458851250685872417", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "7106362077449435105", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "5853697372844744672", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "7603319690872333930", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "4628748977913534701", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 338) },
-            { "10565371760124443824", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "1972879521448306536", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "13893808009363736870", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6584960721513702502", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9220830217525628783", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2235210915304938149", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 190) },
-            { "3930314908786112883", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1334070221835422461", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 157) },
-            { "6681818065741882453", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 189) },
-            { "6980201892073961793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "11530101016435264783", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "4801117903303888658", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "5782934278345953016", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "13951717514084457087", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 329) },
-            { "2721793280965260548", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "8124736388338424498", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 331) },
-            { "12223993560805441284", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9860570706348640782", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "991586070509079617", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 192) },
-            { "7060804814325505165", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "787203599734115483", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "6193161166790398003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "12806934028210472719", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 189) },
-            { "7465681710653503161", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "7958443549125799229", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "15548847099740441551", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16986610822918634530", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "438528596970898721", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15109847707903824859", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "7121708962074176240", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "16789245987103323406", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6318228858846223186", std::make_tuple("convolution_gpu_bfyx_1x1", -1) },
-            { "14043770215999952932", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15277856047844308598", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8048617952947915835", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "11446745541571732900", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17422822627612865758", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "13954144830230671601", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "11198908896401597838", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "5582896843095691256", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "8133587696326295326", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 109) },
-            { "2007192658799516915", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "9492402787848610840", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "10515519878978734341", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8747430148550634190", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16986358655784856534", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6109013751635776331", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "9585113116232600562", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3503893875515897267", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "13144385730409574259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "743941460026466526", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "4492332228252010118", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "1920042803083729276", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 229) },
-            { "16436006771518788093", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "17567504672169904482", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "1989849521691057108", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "16706121580364790904", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5495776091407365966", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "16430562172386510259", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5673972310424776040", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8797843396807284399", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "1698321314111848001", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "5762290464889692462", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "3218248162832023196", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "12988961529988078346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "4232250144427804891", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "11683680166617045816", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "6252429564537528709", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13145474177271090694", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "1208161922424418734", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "2762489653422414995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "12937333118472722002", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "12917241193304093727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "11020315012951440351", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "1518270620354036926", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "2567046336192437734", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 210) },
-            { "16409729623371222748", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 212) },
-            { "1044978617045366709", std::make_tuple("fully_connected_gpu_fb_io_b8_f8_vload", -1) },
-            { "8473037597903277214", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "14398366949002972908", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "7334966010680206302", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "4161141078006269526", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 226) },
-            { "6522575549211855712", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "5629373398445592781", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "13374993751390784382", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 315) },
-            { "12976499206227689731", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "9882204352209412039", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 365) },
-            { "5041111302824362529", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13869716373706247686", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6438522646185979880", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "2406816735581074778", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "8881150100883636392", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "593712935037568960", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "11970881115757095265", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 319) },
-            { "5584432943673435454", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 317) },
-            { "4560479630843098090", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15374625876485618845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 348) },
-            { "13102754309439605192", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "17912189681971987483", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "8153567933591966877", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 352) },
-            { "1604661321386793876", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "8990561333549136048", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 360) },
-            { "12278364834477923930", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3122997634505472500", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "15669490019428002270", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "116291934148608396", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "14729854278671832528", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "10591379189397010097", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 237) },
-            { "11929531534620071758", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 239) },
-            { "1819720745131968914", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "10607904718265020949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "913496537924971856", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 187) },
-            { "916389941321470163", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "1411786954276574458", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 357) },
-            { "2730604806511016352", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 107) },
-            { "5843679089588930933", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 110) },
-            { "7304346312452588844", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 231) },
-            { "2423754482456771339", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "3653156933813711765", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "11149782181562145291", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2653651564133701304", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "3526580286148537369", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3985659568982275663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 371) },
-            { "13642146548740074992", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 371) },
-            { "5219399418946822456", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14217181622713951411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 303) },
-            { "13025323039227543550", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "6114147683777615071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "2355214244972870639", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 140) },
-            { "3167336012388169649", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 142) },
-            { "12218337369633748663", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 141) },
-            { "7264756313770306662", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "10492056481694320580", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "14281201038135286621", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "8127190765748950828", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 271) },
-            { "142486914279119363", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "1532263118203058517", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "5482851829165191681", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 270) },
-            { "10548792624072794724", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "4239415134522959352", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "9028970753877215614", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 122) },
-            { "2324120381399737261", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "10267260789603562117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "9988801796928462423", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "12516911293946682547", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 246) },
-            { "9213886570531053949", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 55) },
-            { "385046297070779752", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "12541834857357563605", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 124) },
-            { "475043738497218394", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 136) },
-            { "6351347283201596793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "16290626406346691996", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 23) },
-            { "4569338575782832784", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "7575675354187625951", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 349) },
-            { "5795073619189010837", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "15123868617509445149", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5601435819039968726", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "14104238386345631681", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "17377293745073971167", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "12134712464763856064", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "5524215233998361104", std::make_tuple("convolution_gpu_winograd_6x3_s1_fused", -1) },
-            { "1103228955716492167", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 247) },
-            { "8618835732380720921", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", -1) },
-            { "15908673392788376468", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 261) },
-            { "8482147530539941792", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 299) },
-            { "9069334144391048686", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "12493863403516600413", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "16692569816843207989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 265) },
-            { "3438116423688595487", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 254) },
-            { "15602863681196390535", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 245) },
-            { "18277685132620834972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "16541722316343690197", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 127) },
-            { "3067806959725855130", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 136) },
-            { "17791773192152464021", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 57) },
-            { "13603318842632052764", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 1) },
-            { "879896719155824868", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5219048275475447369", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8707189142909022305", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "5948701218437980356", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "17050143605017295447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 341) },
-            { "8906185843274300447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "8321769923556905957", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "10433541468308381909", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "10405183426600618231", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 354) },
-            { "14885109535362957947", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "72444706264681262", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "16818714747882774917", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 222) },
-            { "16236397968499692493", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "700717277178942679", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "482564204402769504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "3221221905804708596", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "16467987800266816984", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 213) },
-            { "11599932445375240727", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 223) },
-            { "5057534502588100071", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "15640202505592598653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "3355259926747524578", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "9226443907548972870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "8104309105061227444", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "18384657372655350144", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 219) },
-            { "13739257060165119132", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "9810904714798127155", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "15609627722687211129", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 358) },
-            { "14738573151275130683", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "9421927854269492263", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "15962533525948221648", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 138) },
-            { "15856268902838573812", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "4085450203909854919", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 194) },
-            { "2370837049876630969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "13464226348405628455", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 252) },
-            { "12228963567837353733", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "10377729875228238588", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "16362139250976572928", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 154) },
-            { "5420766967862917815", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 194) },
-            { "14578291812739325465", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 155) },
-            { "18310667924071639899", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "16853250891250756537", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "12990341489637414845", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "14630499010941056793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "878892264408839067", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 156) },
-            { "9259437778054905599", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 195) },
-            { "14974730512607138726", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 193) },
-            { "3600066510593746268", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 159) },
-            { "3140230065585683313", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "15891662883560480723", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 367) },
-            { "11284755586130392759", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "2281119269283845320", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 158) },
-            { "12246408434917478929", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "13283842370311517843", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 193) },
-            { "13753473508578037346", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 134) },
-            { "17123153447808465303", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "10700011669103135203", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 321) },
-            { "9979259596137305973", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "17225578855755054959", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6471563320494376693", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "8146945902795164796", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "18372284940315010254", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "2194607895573544953", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "1332624116953483870", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "158222105675022402", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6830387121684699972", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "11077503608116183709", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "17847109385592002207", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "13384754476437374504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "11462462742322068863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 322) },
-            { "4265693151382066296", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "11070620435959083971", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "6982733543386888622", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "3563614453014995411", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "3498490999014554104", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 327) },
-            { "15595549493819416194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 99) },
-            { "14532844474906286088", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "9562291747339451180", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "6772239376357727149", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 126) },
-            { "10690972785852373520", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 49) },
-            { "4488336106517889531", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 82) },
-            { "10058614204420018541", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "13865227850818392065", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "14100870590396726248", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 40) },
-            { "10848277915422577656", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 47) },
-            { "8121179472578287280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "2502125887857336825", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "13192808619929896995", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 4) },
-            { "5115661026367632863", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "12812685418923919055", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 5) },
-            { "13131740479277027362", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "15334195300678132907", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "2038505773698938555", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12090536142661253835", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "4999505377862312410", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "3934290309368153435", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "5951936376654416075", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "13204120207726209723", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "17108987360340581555", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4795705973706796563", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4084106758501882407", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "8127570953237266335", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "7500192998744460131", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "2379484884827231127", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13933912937625580405", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "2040762223425679479", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4800587664660105589", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "1616603916015535857", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10290107543739998181", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "5927467766675317093", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3349519148124496343", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "13477416097954638887", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "6942049339361951275", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "5303170164698694791", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "12494969618927201911", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "7875724726741958520", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "13835908664998757647", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "6407471972820516685", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "5385316497510064491", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "17377315194963069204", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "3889519976910355277", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "12081835728078383819", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "14923692894655929923", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "580936360000782237", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "10682918518101379579", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13178480813522103091", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "6149673627320838019", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "11077876432364512822", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "6062246008880097669", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4806571630436601566", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "2458592904274981909", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "2007192658799516915", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "1051506168926530904", std::make_tuple("fully_connected_gpu_bs_f_bsv16_b1", -1) },
-            { "4163359403543480821", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "4232250144427804891", std::make_tuple("fully_connected_gpu_bf_io_gemm", -1) },
-            { "5415319660821122528", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "3286629188347536485", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "13575423234109624706", std::make_tuple("fully_connected_gpu_yxfb_ref", -1) },
-            { "1841155673858789206", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "6708349666663292171", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "5083163738120585821", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "10572945270796129630", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "4436244774193918646", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "12985942652866621579", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "775538461106687677", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "9533360488591027707", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "8913823292181409151", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "11583985978586657985", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "1485662490111767875", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "11872464450773754851", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "5364060938737428149", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "2613462626256090659", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "14668725050395069435", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "17381516856910544374", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "6450532136308941035", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "2321767794934000238", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "7995820969034996638", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "17951403431757222177", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "1074748462756364699", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "11955992313739654625", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "11939914680143672459", std::make_tuple("fully_connected_gpu_fb_io_ref", -1) },
-            { "17806712457019493207", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "11862259122805366807", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "15201438563802430490", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "5374969798377773063", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "592245952014430043", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "3114869763557037270", std::make_tuple("fully_connected_gpu_fb_oi_ref", -1) },
-            { "17147293671640396193", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "6911215749850066204", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "2814805887448339818", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "1120455113299469776", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "8002233052700666718", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "16436006771518788093", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "11083993858285515074", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "4133424990380177132", std::make_tuple("fully_connected_gpu_bs_f_bsv16_af8_vload", -1) },
-            { "1044978617045366709", std::make_tuple("fully_connected_gpu_fb_io_b8_f8_vload", -1) },
-            { "952318454591754214", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            { "5762878778443755104", std::make_tuple("fully_connected_gpu_bs_f_bsv8_af8_vload", -1) },
-            });
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e_B32_B64.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e_B32_B64.cpp
deleted file mode 100644
index c41fd5c7e..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e_B32_B64.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    // SKL GT4e
-    void tuning_cache_193B_B32_B64(tuning_data& td)
-    {
-        td.td.insert({
-            
-            { "10794662801660960189", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 749) },
-            });
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e_B8.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e_B8.cpp
deleted file mode 100644
index d82ede47d..000000000
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_SKL_GT4e_B8.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
-// Copyright (c) 2018 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    // SKL GT4e
-    void tuning_cache_193B_B8(tuning_data& td)
-    {
-        td.td.insert({
-            { "11207257238719531888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-           /* { "12348135936862667024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "1540041682425757361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6949539207944972855", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4197617702037834389", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "13898284586432291433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3715177305271762194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "481328129206881674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "10001963042016663554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "15471470494305051299", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "14716719350966652036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13404888565084206853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "1249137685908951501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4840004190985490064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5120466856097219243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4701832665603867798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "6181272224000872375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "2030309697153345387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "11207257238719531888", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "12348135936862667024", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 202) },
-            { "1540041682425757361", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "6949539207944972855", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4197617702037834389", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 326) },
-            { "13898284586432291433", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "3715177305271762194", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "481328129206881674", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "10001963042016663554", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "15471470494305051299", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 323) },
-            { "14716719350966652036", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "13404888565084206853", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "1249137685908951501", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 320) },
-            { "4840004190985490064", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "5120466856097219243", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "4701832665603867798", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 243) },
-            { "6181272224000872375", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "2030309697153345387", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 335) },
-            { "10728212277329722684", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "877436308867220589", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "18375125668176498051", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "14907097142953816744", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2525260242689556544", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 375) },
-            { "13328449155966085543", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 373) },
-            { "11856266545854830143", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "15993427814066246646", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "2100891581797371600", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 275) },
-            { "12242618640422208652", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "6133592828563353516", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 304) },
-            { "18232278892738147217", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "11992625045241269569", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "12601126285773042005", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 297) },
-            { "5079055505117153635", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 195) },
-            { "7457899998356343871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "6343888265369366589", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 296) },
-            { "10791067159964399241", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 309) },
-            { "11327097771110264965", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "5245308722062496788", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 314) },
-            { "10792503079194374004", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "4818231379191523896", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 288) },
-            { "2198278382394812839", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "3800011935243649447", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 298) },
-            { "9631545863582097486", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 255) },
-            { "7777333052643961206", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 251) },
-            { "1779941298820543013", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "3621930417735246405", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 263) },
-            { "14435120971846098308", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 188) },
-            { "2893564501191050837", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 181) },
-            { "8108843303778211282", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 256) },
-            { "3682813162987778705", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "15494543914974994991", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "7565221050911842393", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 93) },
-            { "5629670679897666607", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "11754316727756881612", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 96) },
-            { "10990741293315393791", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "17024388383581997032", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "10302338806536775954", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 204) },
-            { "7915318733663535312", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "13702692566238948173", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "2909728331855309274", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "13071545223094862275", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "9631481972809246378", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 211) },
-            { "13540002981450186147", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "7076937538747704750", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "10290107543739998181", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },
-            { "6767245864232675168", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "9287404618748313247", std::make_tuple("convolution_gpu_bfyx_gemm_like", -1) },
-            { "8728178019712933221", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "18251360413872841969", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "18271689282126907793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 294) },
-            { "954796765467489259", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 325) },
-            { "13597240991532942069", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "4135003545872878882", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 296) },
-            { "11883485911218628865", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 293) },
-            { "2242915551775617989", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 291) },
-            { "10556089809203693400", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 290) },
-            { "3727142736386026852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 333) },
-            { "1622880009460832832", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 182) },
-            { "4437258459981739942", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 296) },
-            { "14691372262153587653", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "12181607120522804433", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 334) },
-            { "3159681096461848644", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "6729785110495533200", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "15322019609805777935", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 183) },
-            { "7024495439434892956", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 295) },
-            { "10416622008071151225", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 173) },
-            { "5796500397424307442", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "15702382940521972117", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "6093575518270471235", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "5805383505505929391", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "1801731858063091191", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 175) },
-            { "1559798212423183813", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 176) },
-            { "5594180958505308003", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "4766071144928072260", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 177) },
-            { "8650948093564284852", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "3883845471211207871", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 248) },
-            { "4366168099274266975", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 171) },
-            { "578703329577922869", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 253) },
-            { "16863960779539003201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "15450609897480659306", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 178) },
-            { "8203550467004532364", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "7431849514656037251", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 257) },
-            { "14484890926084856480", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 179) },
-            { "4424217045094988504", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "7994179151788368291", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 180) },
-            { "15192024816519005250", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 258) },
-            { "4747159205186229582", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 103) },
-            { "5485971317082563152", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 249) },
-            { "18128162750557822655", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 91) },
-            { "12421707187947291166", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "792684262493086891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "941626985322260281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 207) },
-            { "11868551452004726281", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "14352303529756685990", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 206) },
-            { "10702234389482091891", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "3895088069642140043", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 200) },
-            { "5334566325056222430", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 205) },
-            { "8306337702797456793", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "15720507574336564201", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "3277243911383750280", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 208) },
-            { "18150429561058646714", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 240) },
-            { "11169292427557543138", std::make_tuple("convolution_gpu_bfyx_os_iyx_osv16", 203) },
-            { "13933912937625580405", std::make_tuple("fully_connected_gpu_bf_io_input_spatial", -1) },*/
-           
-            });
-    }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/arg_max_min_axis.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/arg_max_min_axis.cl
index 9a7691b1a..8e85b9c2f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/arg_max_min_axis.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/arg_max_min_axis.cl
@@ -12,19 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
 #include "include/common.cl"
 #include "include/data_types.cl"
     
 #define GLOBAL_SIZE 128
 #define LOCAL_SIZE GLOBAL_SIZE
 
-typedef struct /* Index and Value type that holds index and value used in this kernel */
-{
-    uint index; 
-    UNIT_TYPE value; 
-} iav_type;
-
 #ifdef BATCH_AXIS
     #define GAP_SIZE (INPUT0_FEATURE_NUM * INPUT0_SIZE_X * INPUT0_SIZE_Y)
     #define VALUES_NUM INPUT0_BATCH_NUM
@@ -73,6 +66,7 @@ typedef struct /* Index and Value type that holds index and value used in this k
 __attribute__((reqd_work_group_size(LOCAL_SIZE, 1, 1)))
 KERNEL(arg_max_gpu_axis)(const __global UNIT_TYPE* input, __global float* output)
 {
+#include "include/arg_max_min_common.cl"
     uint results[TOP_K];
     __local iav_type scratch[LOCAL_SIZE];
     const uint first_dim_id = (uint)get_global_id(1);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/arg_max_min_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/arg_max_min_gpu_ref.cl
index 3ad4ac6d7..7db799b33 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/arg_max_min_gpu_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/arg_max_min_gpu_ref.cl
@@ -12,19 +12,12 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-
 #include "include/common.cl"
 #include "include/data_types.cl"
     
 #define GLOBAL_SIZE 128
 #define LOCAL_SIZE GLOBAL_SIZE
 
-typedef struct /* Index and Value type that holds index and value used in this kernel */
-{
-    uint index; 
-    UNIT_TYPE value; 
-} iav_type;
-
 #ifdef MAX_OUT
     #define COMPARE_SIGN <
     #define UNIT_FILL_VAL UNIT_VAL_MIN
@@ -36,6 +29,7 @@ typedef struct /* Index and Value type that holds index and value used in this k
 __attribute__((reqd_work_group_size(LOCAL_SIZE, 1, 1)))
 KERNEL(arg_max_gpu_top_k)(const __global UNIT_TYPE* input, __global float* output)
 {
+#include "include/arg_max_min_common.cl"
     uint results[TOP_K];
     __local iav_type scratch[LOCAL_SIZE];
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/batch_norm_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/batch_norm_gpu_ref.cl
index 7fe1a8a42..aaf60c3ce 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/batch_norm_gpu_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/batch_norm_gpu_ref.cl
@@ -20,9 +20,17 @@
 __attribute__((reqd_work_group_size(LOCAL_SIZE, 1, 1)))
 KERNEL(batch_norm_gpu)(
     const __global UNIT_TYPE* input,
-#ifdef FORWARD
-     __global UNIT_TYPE* inv_var,
-#endif
+	#ifdef MEAN_VAR_OUT
+		__global UNIT_TYPE* mean_out,
+		__global UNIT_TYPE* variance_out,
+	#endif
+	#ifdef SCALE_SHIFT
+	     __global UNIT_TYPE* scale,
+		 __global UNIT_TYPE* shift,
+	#endif
+	#ifdef FORWARD
+		__global UNIT_TYPE* inv_var,
+	#endif
        __global UNIT_TYPE* output)
 {
     __local ACCUMULATOR_TYPE sum[LOCAL_SIZE];
@@ -56,7 +64,9 @@ KERNEL(batch_norm_gpu)(
     }
 
     UNIT_TYPE mean = sum[0] / (OUTPUT_BATCH_NUM * OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
-
+#ifdef MEAN_VAR_OUT
+		mean_out[f] = mean;
+#endif
     sum[local_idx] = 0;
 
     input_idx = GET_DATA_INDEX(INPUT0, local_idx, f, 0, 0);
@@ -83,7 +93,9 @@ KERNEL(batch_norm_gpu)(
     }
 
     float variance = sum[0] / (OUTPUT_BATCH_NUM * OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
-
+#ifdef MEAN_VAR_OUT
+	variance_out[f] = variance;
+#endif
     float inv_variance = (float)(1.0 / sqrt(variance + EPSILON));
 #ifdef FORWARD
     if (local_idx == 0)
@@ -95,9 +107,15 @@ KERNEL(batch_norm_gpu)(
     {
         for (uint x = 0; x < OUTPUT_SIZE_X; x++)
         {
-            output[out_idx] = inv_variance * (input[out_idx] - mean);
+			#ifdef SCALE_SHIFT
+				output[out_idx] = (inv_variance * (input[out_idx] - mean)) * scale[f] + shift[f];
+			#else
+				output[out_idx] = inv_variance * (input[out_idx] - mean);
+			#endif
             out_idx += OUTPUT_X_PITCH;
         }
         out_idx += OUTPUT_Y_PITCH - OUTPUT_SIZE_X * OUTPUT_X_PITCH;
     }
-}
-\ No newline at end of file
+}
+
+#undef LOCAL_SIZE
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/broadcast_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/broadcast_gpu_ref.cl
index 286608ff5..ecda287cd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/broadcast_gpu_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/broadcast_gpu_ref.cl
@@ -16,16 +16,21 @@
 
 
 KERNEL(broadcast_gpu_ref)(
-    const __global UNIT_TYPE* input,
-    __global UNIT_TYPE* output)
+    const __global INPUT0_TYPE* input,
+    __global INPUT0_TYPE* output)
 {
     // [CONSTEXPR]
     // Input sizes:
-    const uint in_sx = INPUT0_SIZE_X;
-    const uint in_sy = INPUT0_SIZE_Y;
-    const uint in_sf = INPUT0_FEATURE_NUM;
-    const uint in_sb = INPUT0_BATCH_NUM;
+    uint4 input_indices;
+    input_indices[0] = INPUT0_BATCH_NUM;
+    input_indices[1] = INPUT0_FEATURE_NUM;
+    input_indices[2] = INPUT0_SIZE_Y;
+    input_indices[3] = INPUT0_SIZE_X;
 
+    const uint in_sx = input_indices[BROADCAST_ORDER[3]];
+    const uint in_sy = input_indices[BROADCAST_ORDER[2]];
+    const uint in_sf = input_indices[BROADCAST_ORDER[1]];
+    const uint in_sb = input_indices[BROADCAST_ORDER[0]];
 
     const uint out_x  = (uint) get_global_id(0);
     const uint out_y  = (uint) get_global_id(1);
@@ -40,9 +45,8 @@ KERNEL(broadcast_gpu_ref)(
     const uint in_f = out_f % in_sf;
     const uint in_b = out_b % in_sb;
 
-    const uint in_pos  = GET_DATA_INDEX(INPUT0, in_b,  in_f,  in_y,  in_x);
+    const uint in_pos =  INPUT0_OFFSET + in_x + in_sx * (in_y + in_sy * (in_f + in_sf * in_b));
     const uint out_pos = GET_DATA_INDEX(OUTPUT, out_b, out_f, out_y, out_x);
 
-
     output[out_pos] = input[in_pos];
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/contract_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/contract_ref.cl
new file mode 100644
index 000000000..b15787539
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/contract_ref.cl
@@ -0,0 +1,64 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/include_all.cl"
+
+
+KERNEL(contract_ref)(
+    const __global INPUT0_TYPE* input,
+    __global INPUT0_TYPE* output)
+{
+    INPUT0_TYPE out_val = REDUCE_SEED;
+
+#if REDUCE_B
+    for (uint in_b = 0; in_b < INPUT0_BATCH_NUM; ++in_b) {
+#else
+    const uint in_b = (uint) get_global_id(DIM_B);
+#endif
+
+#if REDUCE_F
+    for (uint in_f = 0; in_f < INPUT0_FEATURE_NUM; ++in_f) {
+#else
+    const uint in_f = (uint) get_global_id(DIM_F);
+#endif
+
+#if REDUCE_Y
+    for (uint in_y = 0; in_y < INPUT0_SIZE_Y; ++in_y) {
+#else
+    const uint in_y = (uint) get_global_id(DIM_Y);
+#endif
+
+#if REDUCE_X
+    for (uint in_x = 0; in_x < INPUT0_SIZE_X; ++in_x) {
+#else
+    const uint in_x = (uint) get_global_id(DIM_X);
+#endif
+
+    out_val = REDUCE_OPERATION(out_val, input[GET_DATA_INDEX(INPUT0, in_b, in_f, in_y, in_x)]);
+
+#if REDUCE_X
+    }
+#endif
+#if REDUCE_Y
+    }
+#endif
+#if REDUCE_F
+    }
+#endif
+#if REDUCE_B
+    }
+#endif
+
+    output[GET_DATA_INDEX(OUTPUT, 0, get_global_id(0), get_global_id(1), get_global_id(2))] = out_val;
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_1x1.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_1x1.cl
index bfba2d945..cf2500177 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_1x1.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_1x1.cl
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "include/include_all.cl"
+#include "include/sub_group.cl"
 
 #if FP16_UNIT_USED
     #define ALIGNED_BLOCK_READ8(ptr, byte_offset) as_half8(intel_sub_group_block_read_us8((const __global ushort*)(ptr) + (byte_offset)))
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_1x1_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_1x1_opt.cl
new file mode 100644
index 000000000..87736664a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_1x1_opt.cl
@@ -0,0 +1,238 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/include_all.cl"
+
+#define SIMD_SIZE 8
+__attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
+KERNEL(convolution)(
+    __global INPUT0_TYPE* input, 
+    __global OUTPUT_TYPE* output, 
+    __global FILTER_TYPE* weights, 
+#if BIAS_TERM
+    __global BIAS_TYPE* biases,
+#endif
+    uint split_idx)
+{
+    const uint group_x = get_group_id(0) * OUT_BLOCK_WIDTH;
+    const uint group_y = get_group_id(1) * OUT_BLOCK_HEIGHT;
+    const uint f = (get_group_id(2) * SIMD_SIZE * OUT_BLOCK_DEPTH) % OUTPUT_FEATURE_NUM;
+    const uint b = (get_group_id(2) * SIMD_SIZE * OUT_BLOCK_DEPTH) / OUTPUT_FEATURE_NUM;;
+
+    const uint ifm_part = get_sub_group_id();
+    uint ifm_offset = ifm_part* OUT_BLOCK_DEPTH/2;
+
+    UNIT_TYPE in[OUT_BLOCK_HEIGHT];
+    UNIT_TYPE dotProd0[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OUT_BLOCK_DEPTH/2];
+    UNIT_TYPE dotProd1[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OUT_BLOCK_DEPTH/2];
+
+    for(uint i = 0; i < OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OUT_BLOCK_DEPTH/2; i++)
+    {
+        dotProd0[i] = 0;
+        dotProd1[i] = 0;
+    }
+
+#if OUT_BLOCK_DEPTH == 8
+    const uint filter_offset = f * FILTER_IFM_NUM + ifm_part*(64 * FILTER_IFM_NUM/2);
+#elif OUT_BLOCK_DEPTH == 4
+    const uint filter_offset = f * FILTER_IFM_NUM + ifm_part*(32 * FILTER_IFM_NUM/2);
+#elif OUT_BLOCK_DEPTH == 2
+    const uint filter_offset = f * FILTER_IFM_NUM + ifm_part*(16 * FILTER_IFM_NUM/2);
+#else
+    const uint filter_offset = f*FILTER_OFM_PITCH + ifm_part*(FILTER_IFM_NUM/2) * FILTER_IFM_PITCH;
+#endif
+    const uint input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET + group_x * INPUT0_X_PITCH + group_y * INPUT0_Y_PITCH + ifm_part*(FILTER_IFM_NUM/2) * INPUT0_FEATURE_PITCH;
+
+    //--------------------------------------------------------------------
+    // main computation phase
+    //--------------------------------------------------------------------
+
+    for (uint k = 0; k < FILTER_IFM_NUM/2; ++k)
+    {
+        for(uint i = 0; i < OUT_BLOCK_HEIGHT; i++)
+        {
+            const uint in_offset = input_offset + get_sub_group_local_id() + i * INPUT0_Y_PITCH + k * INPUT0_FEATURE_PITCH;
+            in[i] = input[in_offset];
+        }
+
+#if OUT_BLOCK_DEPTH == 8
+        float8 w = as_float8(intel_sub_group_block_read8((__global uint*)weights + filter_offset + k * 64));
+#elif OUT_BLOCK_DEPTH == 4
+        float4 w = as_float4(intel_sub_group_block_read4((__global uint*)weights + filter_offset + k * 32));
+#elif OUT_BLOCK_DEPTH == 2
+        float2 w = as_float2(intel_sub_group_block_read2((__global uint*)weights + filter_offset + k * 16));
+#endif
+
+        for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+        {
+            for(uint bc = 0; bc < OUT_BLOCK_WIDTH; bc++)
+            {
+                float _in = intel_sub_group_shuffle(in[br], bc);
+                for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+                {
+                    dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += _in * w[bd];
+                    dotProd1[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += _in * w[bd + OUT_BLOCK_DEPTH/2];
+                }
+            }
+        }
+    }
+
+    __local float slm_vals[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OUT_BLOCK_DEPTH * SIMD_SIZE];
+    __local float* slm_p = &slm_vals[0];
+    //--------------------------------------------------------------------
+    // second sub_group in workgroup task
+    //--------------------------------------------------------------------
+    
+    if(ifm_part == 1)
+    {
+        for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+        {
+            for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+            {
+                for(uint bc = 0; bc < OUT_BLOCK_WIDTH; bc++)
+                {
+                    slm_vals[bc + OUT_BLOCK_WIDTH * (get_sub_group_local_id() + SIMD_SIZE * (br + OUT_BLOCK_HEIGHT * bd))] = dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)];
+                    dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] = dotProd1[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)];
+                }
+            }
+        }
+
+    }
+
+    //--------------------------------------------------------------------
+    // first sub_group in workgroup task
+    //--------------------------------------------------------------------
+    
+    if(ifm_part == 0)
+    {
+        for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+        {
+            for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+            {
+                uint width_offset = 0;
+                #if (OUT_BLOCK_WIDTH) >= 4
+                const uint slm_off = OUT_BLOCK_WIDTH * (get_sub_group_local_id() + SIMD_SIZE * (br + OUT_BLOCK_HEIGHT * (bd + OUT_BLOCK_DEPTH/2) ));
+                float4 tmp = (float4)(dotProd1[width_offset + 0 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd1[width_offset + 1 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd1[width_offset + 2 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd1[width_offset + 3 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)]);
+                vstore4(tmp, 0, slm_p + slm_off);
+                width_offset += 4;
+                #endif
+                for(uint bc = width_offset; bc < OUT_BLOCK_WIDTH; bc++)
+                {
+                    slm_vals[bc + OUT_BLOCK_WIDTH * (get_sub_group_local_id() + SIMD_SIZE * (br + OUT_BLOCK_HEIGHT * (bd+OUT_BLOCK_DEPTH/2) ))] = dotProd1[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)];
+                }
+            }
+        }
+
+    }
+
+    //--------------------------------------------------------------------
+    // add bias phase
+    //--------------------------------------------------------------------
+    
+    #if BIAS_TERM
+    for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+    {
+        float _bias = biases[f + (bd + ifm_offset) * SIMD_SIZE + get_sub_group_local_id()];
+        for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+        {
+            for(uint bc = 0; bc < OUT_BLOCK_WIDTH; bc++)
+            {
+                dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += _bias;
+            }
+        }
+    }
+    #endif
+
+    barrier(CLK_LOCAL_MEM_FENCE); // we want to add barrier after biases addition so that the long slm write part latency is shadowed by it
+
+    //--------------------------------------------------------------------
+    // sum sub-group results + activation phase
+    //--------------------------------------------------------------------
+    
+    for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+    {
+        for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+        {
+            uint width_offset = 0;
+            #if (OUT_BLOCK_WIDTH) >= 4
+            const uint slm_off = OUT_BLOCK_WIDTH * (get_sub_group_local_id() + SIMD_SIZE * (br + OUT_BLOCK_HEIGHT * (bd + ifm_offset) ));
+            float4 tmp = vload4(0, slm_p + slm_off);
+            dotProd0[0 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += tmp[0];
+            dotProd0[1 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += tmp[1];
+            dotProd0[2 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += tmp[2];
+            dotProd0[3 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += tmp[3];
+
+            dotProd0[0 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] = ACTIVATION(dotProd0[0 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)], NL_M, NL_N);;
+            dotProd0[1 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] = ACTIVATION(dotProd0[1 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)], NL_M, NL_N);;
+            dotProd0[2 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] = ACTIVATION(dotProd0[2 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)], NL_M, NL_N);;
+            dotProd0[3 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] = ACTIVATION(dotProd0[3 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)], NL_M, NL_N);;
+
+            width_offset += 4;
+            #endif
+
+            for(uint bc = width_offset; bc < OUT_BLOCK_WIDTH; bc++)
+            {
+                dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += slm_vals[bc + OUT_BLOCK_WIDTH * (get_sub_group_local_id() + SIMD_SIZE * (br + OUT_BLOCK_HEIGHT * (bd + ifm_offset) ))];
+                dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] = ACTIVATION(dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)], NL_M, NL_N);;
+            }
+        }
+    }
+
+    //--------------------------------------------------------------------
+    // output phase
+    //--------------------------------------------------------------------
+
+    for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+    {
+        for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+        {
+            uint dst_index = GET_DATA_INDEX(OUTPUT, b, f + (bd + ifm_offset) * SIMD_SIZE + get_sub_group_local_id(), group_y + br, group_x);
+            uint out_vstore_offset = 0;
+            #if (OUT_BLOCK_WIDTH >= 8)
+            float8 tmp = (float8)(dotProd0[out_vstore_offset + 0 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                  dotProd0[out_vstore_offset + 1 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                  dotProd0[out_vstore_offset + 2 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                  dotProd0[out_vstore_offset + 3 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                  dotProd0[out_vstore_offset + 4 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                  dotProd0[out_vstore_offset + 5 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                  dotProd0[out_vstore_offset + 6 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                  dotProd0[out_vstore_offset + 7 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)]);
+            vstore8(tmp, 0, output + dst_index + out_vstore_offset * OUTPUT_X_PITCH);
+            out_vstore_offset += 8;
+            #endif
+            #if (OUT_BLOCK_WIDTH % 8) > 3
+            float4 tmp = (float4)(dotProd0[out_vstore_offset + 0 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                  dotProd0[out_vstore_offset + 1 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                  dotProd0[out_vstore_offset + 2 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                  dotProd0[out_vstore_offset + 3 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)]);
+            vstore4(tmp, 0, output + dst_index + out_vstore_offset * OUTPUT_X_PITCH);
+            out_vstore_offset += 4;
+            #endif
+            #if (OUT_BLOCK_WIDTH % 4) > 1
+            float2 tmp2 = (float2)(dotProd0[out_vstore_offset + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                 dotProd0[out_vstore_offset+1 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)]);
+            vstore2(tmp2, 0, output + dst_index + out_vstore_offset * OUTPUT_X_PITCH);
+            out_vstore_offset += 2;
+            #endif
+            //dst_index += 4 * OUTPUT_X_PITCH;
+            for(uint bc = out_vstore_offset; bc < OUT_BLOCK_WIDTH; bc++)
+            {
+                output[dst_index + bc * OUTPUT_X_PITCH] = dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)];
+            }
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_depthwise_weights_lwg.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_depthwise_weights_lwg.cl
index f21b03d27..9cec96f8e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_depthwise_weights_lwg.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_depthwise_weights_lwg.cl
@@ -103,4 +103,4 @@ KERNEL(convolution_depthwise_weights_lwg)(
     const uint dst_index = GET_DATA_INDEX(OUTPUT, b, f, y, x) + out_split_offset;
     output[dst_index] = ACTIVATION(dotProd, NL_M, NL_N);
     
-}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_gemm_like_fp16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_gemm_like_fp16.cl
index c28f328ea..eb8af3d88 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_gemm_like_fp16.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_gemm_like_fp16.cl
@@ -167,7 +167,7 @@ KERNEL(convolution_f16)(
             #if (PADDING_SIZE_X == 1) && (INPPUT_PADDING_Y == 1) && (FILTER_SIZE_X == 3) && (FILTER_SIZE_Y == 3)
             if ((y_offset +  patch_row < 0) || ((y_offset + patch_row) >= INPUT_SIZE_Y))
             {
-                blockA00 = half_zeros;
+                blockA00 = { 0 };
             }
             else
             {
@@ -178,7 +178,7 @@ KERNEL(convolution_f16)(
             #else
             if ((y_offset +  patch_row < 0) || ((y_offset + patch_row) >= INPUT_SIZE_Y))
             {
-                blockA00 = half_zeros;
+                blockA00 = { 0 };
             }
             else
             {
@@ -193,7 +193,7 @@ KERNEL(convolution_f16)(
             #pragma error
             if ((y_offset +  patch_row < 0) || ((y_offset + patch_row) >= INPUT_SIZE_Y))
             {
-                blockA00 = half_zeros;
+                blockA00 = { 0 };
             }
             else
             {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_gemm_like_fp32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_gemm_like_fp32.cl
index 0366f8fc3..0066e6e97 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_gemm_like_fp32.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_gemm_like_fp32.cl
@@ -15,6 +15,7 @@
 */
 
 #include "include/include_all.cl"
+#include "include/sub_group.cl"
 
 #define TILE_M          2
 #define TILE_K          FILTER_SIZE_X
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl
index e70ca2e5c..07fd6338c 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl
@@ -95,7 +95,7 @@ KERNEL(convolution_gpu_bfyx_os_iyx_osv16)(
 
     uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
     in_addr = batch_idx * INPUT0_BATCH_PITCH;
-    in_addr += in_split_offset + INPUT0_OFFSET_WITH_PADDING + or * STRIDE_SIZE_Y * INPUT0_Y_PITCH + oc * STRIDE_SIZE_X + lid;
+    in_addr += in_split_offset + INPUT0_OFFSET_WITH_PADDING + (or * STRIDE_SIZE_Y * INPUT0_Y_PITCH) + (oc * STRIDE_SIZE_X + lid) * INPUT0_X_PITCH;
 
     for(int kd = 0; kd < FILTER_IFM_NUM; kd++)  // _ID = 3, RGB
     {
@@ -107,7 +107,7 @@ KERNEL(convolution_gpu_bfyx_os_iyx_osv16)(
             // Horizontal position in input block after read.
             const uint in_block_next_x_pos = in_block_pos % IN_BLOCK_WIDTH + SUB_GROUP_SIZE;
 
-            in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + in_block_pos % IN_BLOCK_WIDTH];
+            in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + (in_block_pos % IN_BLOCK_WIDTH) * INPUT0_X_PITCH];
 
             // If we have row break, move to the next row.
             if (in_block_next_x_pos == IN_BLOCK_WIDTH)
@@ -120,7 +120,7 @@ KERNEL(convolution_gpu_bfyx_os_iyx_osv16)(
             const uint in_block_next_x_pos = in_block_pos % IN_BLOCK_WIDTH + SUB_GROUP_SIZE;
 
             if (in_block_next_x_pos <= IN_BLOCK_WIDTH) { //
-                in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + in_block_pos % IN_BLOCK_WIDTH];
+                in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + (in_block_pos % IN_BLOCK_WIDTH) * INPUT0_X_PITCH];
 
                 // If we have row break, move to the next row.
                 if (in_block_next_x_pos == IN_BLOCK_WIDTH)
@@ -132,11 +132,11 @@ KERNEL(convolution_gpu_bfyx_os_iyx_osv16)(
                 const uint sg_br_pos = IN_BLOCK_WIDTH - in_block_pos % IN_BLOCK_WIDTH;
 
                 if (lid < sg_br_pos)
-                    in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + in_block_pos % IN_BLOCK_WIDTH];
+                    in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + (in_block_pos % IN_BLOCK_WIDTH) * INPUT0_X_PITCH];
                 // We have row break inside sub-group. Need to move to next line.
                 tmp_in_addr += INPUT0_Y_PITCH;
                 if (lid >= sg_br_pos)
-                    in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr - sg_br_pos];
+                    in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr - (sg_br_pos * INPUT0_X_PITCH)];
 
                 // If we have another row break, move to the next row.
                 if (in_block_next_x_pos == 2 * IN_BLOCK_WIDTH)
@@ -211,17 +211,51 @@ KERNEL(convolution_gpu_bfyx_os_iyx_osv16)(
         }
     }
 
+
+//--------------------------------------------------------------------
+// output phase
+//--------------------------------------------------------------------
+
 #ifdef LEFTOVERS
     if (feature_idx < OUTPUT_FEATURE_NUM)
 #endif
     for(uint r = 0; r < OUTPUT_BLOCK_HEIGHT; r++) {
         if(!(or + r >= OUTPUT_SIZE_Y))
         {
+#if (OUTPUT_SIZE_X % OUTPUT_BLOCK_WIDTH) == 0 // in this case we don't need to check if we're outside of X boundaries
+            uint out_vstore_offset = 0;
+            #if (OUT_BLOCK_WIDTH % 8) > 3
+            MAKE_VECTOR_TYPE(UNIT_TYPE, 4) tmp = MAKE_VECTOR_TYPE(UNIT_TYPE, 4)(
+                out[out_vstore_offset + 0 + r * OUTPUT_BLOCK_WIDTH],
+                out[out_vstore_offset + 1 + r * OUTPUT_BLOCK_WIDTH],
+                out[out_vstore_offset + 2 + r * OUTPUT_BLOCK_WIDTH],
+                out[out_vstore_offset + 3 + r * OUTPUT_BLOCK_WIDTH]
+            );
+
+            vstore4(tmp, 0, output + out_addr + r * OUTPUT_Y_PITCH + out_vstore_offset * OUTPUT_X_PITCH);
+            out_vstore_offset += 4;
+            #endif
+
+            #if (OUT_BLOCK_WIDTH % 4) > 1
+            MAKE_VECTOR_TYPE(UNIT_TYPE, 2) tmp2 = MAKE_VECTOR_TYPE(UNIT_TYPE, 2)(
+                out[out_vstore_offset + 0 + r * OUTPUT_BLOCK_WIDTH],
+                out[out_vstore_offset + 1 + r * OUTPUT_BLOCK_WIDTH]
+            );
+
+            vstore2(tmp2, 0, output + out_addr + r * OUTPUT_Y_PITCH + out_vstore_offset * OUTPUT_X_PITCH);
+            out_vstore_offset += 2;
+            #endif
+            for(uint c = out_vstore_offset; c < OUTPUT_BLOCK_WIDTH; c++) {
+                // this does a scattered write to 16 different feature maps, so that data within one map is contiguous, thus ready for input to next layer.
+                output[out_addr + r * OUTPUT_Y_PITCH + c] = out[r * OUTPUT_BLOCK_WIDTH + c];
+            }
+#else
             for(uint c = 0; c < OUTPUT_BLOCK_WIDTH; c++) {
                 // this does a scattered write to 16 different feature maps, so that data within one map is contiguous, thus ready for input to next layer.
                 if(!(oc + c >= OUTPUT_SIZE_X))
                     output[out_addr + r * OUTPUT_Y_PITCH + c] = out[r * OUTPUT_BLOCK_WIDTH + c];
             }
+#endif
         }
     }
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16_2_sg.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16_2_sg.cl
new file mode 100644
index 000000000..a7566fd1d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16_2_sg.cl
@@ -0,0 +1,254 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+#include "include/data_types.cl"
+
+#define SIMD_SIZE SUB_GROUP_SIZE
+// ---------------------------------------------------------------------------------------------------------------------
+// Just-in-time macro definitions:
+// ---------------------------------------------------------------------------------------------------------------------
+
+// Required JIT constants:
+//  - INPUT                - [tensor] Input dimensions (batch, spatial and feature).
+//  - OUTPUT               - [tensor] Output dimensions (batch, spatial and feature).
+//  - STRIDE               - [tensor] Stride (only spatial). Factors that describe step size in X or Y dimension of
+//                           input position of application of convolution filter when next ouput value
+//                           (step 1 in in X or Y dimension of output) is computed.
+//  - INPUT0_OFFSET        - [tensor] Offset for the first element
+//                           initial offset input position of application of convolution filter and output position.
+//  - FP16_SUPPORTED       - [0/1] Value indicating whether device supports FP16 OpenCL extension (cl_khr_fp16).
+//  - FP16_UNIT_USED       - [0/1] Value indicating that current kernel should use FP16.
+//  - UNIT_TYPE            - Type of unit of input/output/weight/bias.
+//  - UNIT_VAL_ZERO        - Literal of current UNIT_TYPE that represents 0.
+//  - RELU                 - [0/1] Indicates that ReLU activation function should be used on output.
+//  - NEGATIVE_SLOPE       - [float] Factor for negative output values (required when ReLU is specified).
+//
+//  - SUB_GROUP_SIZE       - [int] Size of used subgroup (SIMD).
+//  - LEFTOVERS            - [int] Optional parameter, required only when number of ofm is not dividable by SUB_GROUP_SIZE
+//                           see comment for FEATURES_THREADS_PER_BATCH for more informations
+
+/*
+gpu::make_jit_constant("OUTPUT_LIMIT",              output_size),
+gpu::make_jit_constant("FILTER",                    filter_mem.argument().size),
+gpu::make_jit_constant("FILTER_ARRAY_NUM",          split),
+gpu::make_jit_constant("OUTPUT_BLOCK_WIDTH",        _kernel_data.block_width));
+gpu::make_jit_constant("OUTPUT_BLOCK_HEIGHT",       _kernel_data.block_height));
+gpu::make_jit_constant("IN_BLOCK_ARRAY_SIZE",       _kernel_data.input_block_array_size));
+gpu::make_jit_constant("IN_BLOCK_WIDTH",            _kernel_data.input_block_width));
+gpu::make_jit_constant("PREFETCH",                  _kernel_data.prefetch));
+if (_kernel_data.leftovers)
+    gpu::make_jit_constant("LEFTOVERS",             _kernel_data.leftovers));
+*/
+
+// FEATURES_THREADS_PER_BATCH defines how many threads in z-dimension are processing single batch.
+// ideally, z-dimension of value n should indicate processing of n-th output feature. however, since
+// threads are stack in groups of SUB_GROUP_SIZE, when number of ofm is not dividable by SUB_GROUP_SIZE
+// there are dummy threads added in z-dimension in count of LEFTOVERS. We need to take them into consideration
+// while calculating batch's id (see lines 86-87). Values calculated by dummy threads are discarded at line 210.
+#ifdef LEFTOVERS
+#define FEATURES_THREADS_PER_BATCH (FILTER_OFM_NUM + LEFTOVERS)
+#else
+#define FEATURES_THREADS_PER_BATCH (FILTER_OFM_NUM)
+#endif
+
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+__attribute__((reqd_work_group_size(1, 1, 2*SUB_GROUP_SIZE)))
+KERNEL(convolution_gpu_bfyx_os_iyx_osv16_2_sg)(
+    const __global UNIT_TYPE* input,
+    __global UNIT_TYPE* output,
+    const __global UNIT_TYPE* weights,
+#if BIAS_TERM
+    const __global UNIT_TYPE* bias,
+#endif   
+    uint split_idx) // TODO: removing this parameter cause a performance degradation... :)
+{
+    const uint oc  = (uint)get_global_id(0) * OUTPUT_BLOCK_WIDTH;  // oc = Output Column
+    const uint or  = (uint)get_global_id(1) * OUTPUT_BLOCK_HEIGHT; // or = Output Row
+    const uint fm  = get_group_id(2) * SUB_GROUP_SIZE + get_sub_group_local_id();//get_global_id(2);                    // fm = Feature Map = od = Output Depth
+    const uint lid = get_sub_group_local_id();
+
+    const uint ifm_part = get_sub_group_id();
+    __local float slm_vals[OUTPUT_BLOCK_WIDTH * OUTPUT_BLOCK_HEIGHT * SIMD_SIZE];
+
+    uint batch_idx = fm / FEATURES_THREADS_PER_BATCH;
+    uint feature_idx = fm % FEATURES_THREADS_PER_BATCH;
+    uint fmg = feature_idx / SUB_GROUP_SIZE;
+
+    UNIT_TYPE in[IN_BLOCK_ARRAY_SIZE];
+    UNIT_TYPE out[OUTPUT_BLOCK_WIDTH * OUTPUT_BLOCK_HEIGHT];
+    UNIT_TYPE w[PREFETCH];
+    uint in_addr;
+    uint weight_addr = fmg * FILTER_IFM_NUM * FILTER_SIZE_X * FILTER_SIZE_Y * SUB_GROUP_SIZE + lid;
+    weight_addr += ifm_part * SUB_GROUP_SIZE * FILTER_IFM_NUM/2 * FILTER_SIZE_X * FILTER_SIZE_Y;
+
+    for(int i = 0; i < (OUTPUT_BLOCK_WIDTH * OUTPUT_BLOCK_HEIGHT); i++) {
+        out[i] = UNIT_VAL_ZERO;
+    }
+
+    uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
+    in_addr = batch_idx * INPUT0_BATCH_PITCH;
+    in_addr += in_split_offset + INPUT0_OFFSET_WITH_PADDING + or * STRIDE_SIZE_Y * INPUT0_Y_PITCH + oc * STRIDE_SIZE_X + lid;
+    in_addr += ifm_part * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM/2;
+
+    for(int kd = 0; kd < FILTER_IFM_NUM/2; kd++)  // _ID = 3, RGB
+    {
+        uint tmp_in_addr = in_addr;
+
+#if IN_BLOCK_WIDTH % SUB_GROUP_SIZE == 0
+        __attribute__((opencl_unroll_hint(IN_BLOCK_ARRAY_SIZE)))
+        for(uint in_block_pos = 0; in_block_pos < IN_BLOCK_ARRAY_SIZE * SUB_GROUP_SIZE; in_block_pos += SUB_GROUP_SIZE) {
+            // Horizontal position in input block after read.
+            const uint in_block_next_x_pos = in_block_pos % IN_BLOCK_WIDTH + SUB_GROUP_SIZE;
+
+            in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + in_block_pos % IN_BLOCK_WIDTH];
+
+            // If we have row break, move to the next row.
+            if (in_block_next_x_pos == IN_BLOCK_WIDTH)
+                tmp_in_addr += INPUT0_Y_PITCH;
+        }
+#elif (2 * IN_BLOCK_WIDTH) % SUB_GROUP_SIZE == 0
+        __attribute__((opencl_unroll_hint(IN_BLOCK_ARRAY_SIZE)))
+        for(uint in_block_pos = 0; in_block_pos < IN_BLOCK_ARRAY_SIZE * SUB_GROUP_SIZE; in_block_pos += SUB_GROUP_SIZE) {
+            // Horizontal position in input block after read.
+            const uint in_block_next_x_pos = in_block_pos % IN_BLOCK_WIDTH + SUB_GROUP_SIZE;
+
+            if (in_block_next_x_pos <= IN_BLOCK_WIDTH) { //
+                in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + in_block_pos % IN_BLOCK_WIDTH];
+
+                // If we have row break, move to the next row.
+                if (in_block_next_x_pos == IN_BLOCK_WIDTH)
+                    tmp_in_addr += INPUT0_Y_PITCH;
+            }
+            else {
+                // TODO: Generalize this step to relax IN_BLOCK_WIDTH restrictions.
+                // Position in sub-group on which new row need to be read.
+                const uint sg_br_pos = IN_BLOCK_WIDTH - in_block_pos % IN_BLOCK_WIDTH;
+
+                if (lid < sg_br_pos)
+                    in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + in_block_pos % IN_BLOCK_WIDTH];
+                // We have row break inside sub-group. Need to move to next line.
+                tmp_in_addr += INPUT0_Y_PITCH;
+                if (lid >= sg_br_pos)
+                    in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr - sg_br_pos];
+
+                // If we have another row break, move to the next row.
+                if (in_block_next_x_pos == 2 * IN_BLOCK_WIDTH)
+                    tmp_in_addr += INPUT0_Y_PITCH;
+            }
+        }
+#else
+    #error IN_BLOCK_WIDTH must be multiple of SUB_GROUP_SIZE or half of SUB_GROUP_SIZE. Other scenarios are not currently implemented.
+#endif
+
+        //move to next filter
+        in_addr += INPUT0_FEATURE_PITCH;
+
+        for(int pf=0; pf<PREFETCH; pf++) {
+            w[pf] = weights[weight_addr]; weight_addr += SUB_GROUP_SIZE;
+        }
+
+        uint wi = 0;
+        uint kr = 0; // kr = Kernel Row
+        LOOP(FILTER_SIZE_Y, kr,  // LOOP is a macro that unrolls the loop.
+        {
+            uint kc = 0; // kc = Kernel Column
+            LOOP(FILTER_SIZE_X, kc,
+            {
+                //w = weights[weight_addr];
+                for(uint br=0; br<OUTPUT_BLOCK_HEIGHT; br++) {
+                    for(uint bc=0; bc<OUTPUT_BLOCK_WIDTH; bc++) {
+
+#if IN_BLOCK_WIDTH != SUB_GROUP_SIZE
+                        //if we fix the programming model, then we could use a nice simple 2d array: val = in[br * STRIDE_SIZE_Y + kr][bc * STRIDE_SIZE_X + kc];
+                        UNIT_TYPE val = intel_sub_group_shuffle( in[(((br * STRIDE_SIZE_Y + kr * DILATION_SIZE_Y) * IN_BLOCK_WIDTH) + (bc * STRIDE_SIZE_X + kc * DILATION_SIZE_X)) / SUB_GROUP_SIZE],
+                                                                    (((br * STRIDE_SIZE_Y + kr * DILATION_SIZE_Y) * IN_BLOCK_WIDTH) + (bc * STRIDE_SIZE_X + kc * DILATION_SIZE_X)) % SUB_GROUP_SIZE);
+#else
+                        UNIT_TYPE val = intel_sub_group_shuffle( in[br * STRIDE_SIZE_Y + kr * DILATION_SIZE_Y], bc * STRIDE_SIZE_X + kc * DILATION_SIZE_X);
+#endif
+
+                        out[br * OUTPUT_BLOCK_WIDTH + bc] = mad(w[wi % PREFETCH], val, out[br * OUTPUT_BLOCK_WIDTH + bc]);
+                    }
+                }
+                w[wi % PREFETCH] = weights[weight_addr];
+                weight_addr += SUB_GROUP_SIZE; // weights must be stored in just the right SIMD swizzled format for this to work, see host code for details.
+                wi++;
+            });
+        });
+        // addr went beyond due to prefetch so move it back to correct location.
+        weight_addr -= PREFETCH * SUB_GROUP_SIZE;
+    }
+
+    if(ifm_part == 1)
+    {
+        for(uint br=0; br<OUTPUT_BLOCK_HEIGHT; br++) {
+            for(uint bc=0; bc<OUTPUT_BLOCK_WIDTH; bc++) {
+                slm_vals[get_sub_group_local_id() + SIMD_SIZE * (bc + OUTPUT_BLOCK_WIDTH * (br) ) ] = out[br * OUTPUT_BLOCK_WIDTH + bc];
+            }
+        }
+    }
+
+    uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
+    uint out_addr = OUTPUT_OFFSET;
+    out_addr += batch_idx * OUTPUT_BATCH_PITCH;
+    out_addr += out_split_offset + feature_idx * OUTPUT_FEATURE_PITCH; // out_addr indices into start of 16 feature maps.
+    out_addr += or * OUTPUT_Y_PITCH + oc;  // offset for the 4x3 block that this workitem is working on;
+
+    if(ifm_part == 0)
+{
+
+#if BIAS_TERM
+    for(uint r = 0; r < OUTPUT_BLOCK_HEIGHT; r++) {
+        for(uint c = 0; c < OUTPUT_BLOCK_WIDTH; c++) {
+#if BIAS_PER_OUTPUT
+            const unsigned bias_index = feature_idx*OUTPUT_SIZE_X*OUTPUT_SIZE_Y + or*OUTPUT_SIZE_X + oc;
+#else
+            const unsigned bias_index = feature_idx;
+#endif
+            out[r * OUTPUT_BLOCK_WIDTH + c] += bias[bias_index];
+        }
+    }
+#endif
+}
+
+    barrier(CLK_LOCAL_MEM_FENCE); // we want to add barrier after biases addition so that the long slm write part latency is shadowed by it
+
+    if(ifm_part == 0)
+{
+    for(uint r = 0; r < OUTPUT_BLOCK_HEIGHT; r++) {
+        for(uint c = 0; c < OUTPUT_BLOCK_WIDTH; c++) {
+            out[r * OUTPUT_BLOCK_WIDTH + c] += slm_vals[get_sub_group_local_id() + SIMD_SIZE * (c + OUTPUT_BLOCK_WIDTH * r)];
+            out[r * OUTPUT_BLOCK_WIDTH + c] = ACTIVATION(out[r * OUTPUT_BLOCK_WIDTH + c], NL_M, NL_N);
+        }
+    }
+
+#ifdef LEFTOVERS
+    if (feature_idx < OUTPUT_FEATURE_NUM)
+#endif
+    for(uint r = 0; r < OUTPUT_BLOCK_HEIGHT; r++) {
+        if(!(or + r >= OUTPUT_SIZE_Y))
+        {
+            for(uint c = 0; c < OUTPUT_BLOCK_WIDTH; c++) {
+                // this does a scattered write to 16 different feature maps, so that data within one map is contiguous, thus ready for input to next layer.
+                if(!(oc + c >= OUTPUT_SIZE_X))
+                    output[out_addr + r * OUTPUT_Y_PITCH + c] = out[r * OUTPUT_BLOCK_WIDTH + c];
+            }
+        }
+    }
+
+}
+
+}
+
+#undef FEATURES_THREADS_PER_BATCH
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_ref.cl
index a36c0209b..0e8a26487 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_bfyx_ref.cl
@@ -52,9 +52,6 @@ KERNEL(convolution)(
 #else
     const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
 #endif
-    const uint filter_offset = f*FILTER_OFM_PITCH;
-    const uint input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET + in_split_offset;
-
     for (uint k = 0; k < FILTER_IFM_NUM; ++k)
     {
         for (uint j = 0; j < FILTER_SIZE_Y ; ++j)
@@ -71,8 +68,18 @@ KERNEL(convolution)(
 
                     if(!zero_x)
                     {
-                        uint input_idx = input_offset + (uint)input_offset_x*INPUT0_X_PITCH + (uint)input_offset_y*INPUT0_Y_PITCH + k*INPUT0_FEATURE_PITCH;
-                        uint filter_idx = filter_offset + k*FILTER_IFM_PITCH + j*FILTER_Y_PITCH + i*FILTER_X_PITCH;
+                        uint input_idx =
+                            GET_DATA_INDEX(
+                                INPUT0, b, k, input_offset_y, input_offset_x)
+                            + in_split_offset;
+                        uint filter_idx = GET_FILTER_INDEX(FILTER, f, k, j, i);
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+                        filter_idx += split_idx * FILTER_LENGTH;
+#endif
+#ifdef LOCAL_CONVOLUTION
+                        filter_idx += FILTER_SIZE_X * FILTER_SIZE_Y
+                            * (x + OUTPUT_SIZE_X * y);
+#endif
 #if QUANTIZATION_TERM
                         dotProd += (int)input[input_idx] * (int)weights[filter_idx];
 #else
@@ -85,10 +92,15 @@ KERNEL(convolution)(
     }
 
 #if BIAS_TERM
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint bias_offset = split_idx * BIAS_LENGTH;
+#else
+    const uint bias_offset = 0;
+#endif
 #if   BIAS_PER_OUTPUT
-    const uint bias_index = GET_DATA_INDEX(BIAS, b, f, y, x);
+    const uint bias_index = bias_offset + GET_DATA_INDEX(BIAS, b, f, y, x);
 #elif BIAS_PER_OFM
-    const uint bias_index = f;
+    const uint bias_index = bias_offset + f;
 #endif
 #if QUANTIZATION_TERM
 #if CALIBRATION_TERM
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_byx8_f4__fs_bs_yx_bsv4_fsv32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_byx8_f4__fs_bs_yx_bsv4_fsv32.cl
new file mode 100644
index 000000000..a495e1d3a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_byx8_f4__fs_bs_yx_bsv4_fsv32.cl
@@ -0,0 +1,170 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+
+#include "include/data_types.cl"
+#include "include/fetch.cl"
+#include "include/mmad.cl"
+
+#define FILTER_IFM_SLICES ((FILTER_IFM_NUM + 3) /4)
+#define FILTER_SIZE_X_SLICES ((FILTER_SIZE_X + 7) / 8)
+
+#define OUT_BLOCK_HEIGHT 4
+#define WEIGHTS_PER_WORKITEM 4 // currently needs to be set to 4, check output stage and float4 on quantizations etc.
+
+#define SCALE 0.11f
+
+#ifdef LIGHTWEIGHT_QUANTIZATION
+
+#define QUANTIZATION \
+    out[w] = convert_uchar_sat((float)dotProd[w*OUT_BLOCK_HEIGHT + h][i] * SCALE + bias_f[w]);
+
+#elif NO_QUANTIZATION
+
+#define QUANTIZATION \
+    out[w] = convert_uchar_sat(dotProd[w*OUT_BLOCK_HEIGHT + h][i]);
+
+#else
+
+#define QUANTIZATION \
+    out[w] = as_uchar( ACTIVATION( convert_char( round( ( (float)dotProd[w*OUT_BLOCK_HEIGHT + h][i] * quant_f[w] * I_QF + bias_f[w]) * calib_f[w])), NL_M, NL_N));
+
+#endif
+
+__attribute__((intel_reqd_sub_group_size(8)))
+KERNEL(convolution_gpu_byx8_f4_fs_bs_yx_bsv4_fsv32)(
+    __global INPUT0_TYPE* input, 
+    __global OUTPUT_TYPE* output, 
+    __global FILTER_TYPE* weights, 
+    __global BIAS_TYPE* biases,
+    __global float* quantizations,
+#if CALIBRATION_TERM
+    __global float* calibrations,
+#endif
+    uint split_idx)
+{
+    const uint x = get_group_id(1) * 8;
+    const uint y = get_group_id(2) * OUT_BLOCK_HEIGHT;
+
+    const uint f = (get_group_id(0) * 8 * WEIGHTS_PER_WORKITEM ) % OUTPUT_FEATURE_NUM;
+    const uint b = (get_group_id(0) * 8 * WEIGHTS_PER_WORKITEM) / OUTPUT_FEATURE_NUM;
+
+    int8 dotProd[OUT_BLOCK_HEIGHT * WEIGHTS_PER_WORKITEM] =  { 0 };
+
+    const int input_x = x * STRIDE_SIZE_X - PADDING_SIZE_X;
+    const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+
+    const uint filter_offset = f*FILTER_OFM_PITCH;
+    const uint input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET;
+
+    for (uint k = 0; k < FILTER_IFM_SLICES; ++k)
+    {
+        __attribute__((opencl_unroll_hint(FILTER_SIZE_Y)))
+        for (uint j = 0; j < FILTER_SIZE_Y ; ++j)
+        {
+            const int input_offset_y = input_y + j * DILATION_SIZE_Y;
+
+            __attribute__((opencl_unroll_hint(FILTER_SIZE_X_SLICES)))
+            for(uint i = 0; i < FILTER_SIZE_X_SLICES; i++)
+            {
+                int8 act_reg[OUT_BLOCK_HEIGHT]; // activations for MMAD
+
+                // preload spatial data
+                __attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
+                for(uint h = 0; h < OUT_BLOCK_HEIGHT; h++)
+                {
+                    uint input_idx = GET_DATA_BYX8_F4_INDEX(INPUT0, b, k * 4, input_offset_y + h * STRIDE_SIZE_Y, input_x + i * 8);
+                    int2 _input_data_01 = as_int2(intel_sub_group_block_read2((__global uint*)(input + input_idx)));
+                    int _input_data_2 = as_int(intel_sub_group_block_read((__global uint*)(input + input_idx + 8 * 8)));
+
+                    act_reg[h][0] = _input_data_01[0];
+                    act_reg[h][1] = intel_sub_group_shuffle_down(_input_data_01[0], _input_data_01[1], STRIDE_SIZE_X * 1);
+                    act_reg[h][2] = intel_sub_group_shuffle_down(_input_data_01[0], _input_data_01[1], STRIDE_SIZE_X * 2);
+                    act_reg[h][3] = intel_sub_group_shuffle_down(_input_data_01[0], _input_data_01[1], STRIDE_SIZE_X * 3);
+                    act_reg[h][4] = _input_data_01[1];
+                    act_reg[h][5] = intel_sub_group_shuffle_down(_input_data_01[1], _input_data_2, STRIDE_SIZE_X * 1);
+                    act_reg[h][6] = intel_sub_group_shuffle_down(_input_data_01[1], _input_data_2, STRIDE_SIZE_X * 2);
+                    act_reg[h][7] = intel_sub_group_shuffle_down(_input_data_01[1], _input_data_2, STRIDE_SIZE_X * 3);
+                }
+
+                __attribute__((opencl_unroll_hint(WEIGHTS_PER_WORKITEM)))
+                for(uint w = 0; w < WEIGHTS_PER_WORKITEM; w++) // iterate over output feature channels for weights
+                {
+                    uint filter_idx = GET_FILTER_OS_IS_Y_X8_OSV8_ISV4(FILTER, f + w * 8, k * 4, j, i * 8);
+                    int8 _w = as_int8(intel_sub_group_block_read8((__global uint*)(weights + filter_idx)));
+
+                    __attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
+                    for(uint h = 0; h < OUT_BLOCK_HEIGHT; h++)
+                    {
+                        // MMAD on 8x WEIGHTS_PER_WORKITEM input channels elements for 8x outputs in WI
+                        dotProd[w*OUT_BLOCK_HEIGHT + h] = MMAD_8x8(act_reg[h], _w, dotProd[w*OUT_BLOCK_HEIGHT + h]);
+                    }
+                }
+            }
+        }
+    }
+
+float4 quant_f = as_float4(intel_sub_group_block_read4((__global uint*) (quantizations + f) ));
+float4 bias_f = as_float4(intel_sub_group_block_read4((__global uint*) (biases + f) ));
+#if CALIBRATION_TERM
+float4 calib_f = as_float4(intel_sub_group_block_read4((__global uint*) (calibrations + f) ));
+#endif
+
+__attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
+for(uint h = 0; h < OUT_BLOCK_HEIGHT; h++)
+{
+    const uint dst_index = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b, f + get_sub_group_local_id(), y + h, x);
+
+    __attribute__((opencl_unroll_hint(8)))
+    for(uint i = 0; i < 8; i++)
+    {
+
+    #if WEIGHTS_PER_WORKITEM == 4
+    
+        uchar4 out;
+        __attribute__((opencl_unroll_hint(WEIGHTS_PER_WORKITEM)))
+        for(uint w = 0; w < WEIGHTS_PER_WORKITEM; w++)
+        {
+            QUANTIZATION;
+        }
+        intel_sub_group_block_write_uc4((__global uchar*)(output + dst_index + 32 * 4 * i), out);
+    
+    #else
+    
+        __attribute__((opencl_unroll_hint(WEIGHTS_PER_WORKITEM)))
+        for(uint w = 0; w < WEIGHTS_PER_WORKITEM; w++)
+        {
+        #if CALIBRATION_TERM
+            dotProd[w*OUT_BLOCK_HEIGHT + h][i] = (UNIT_TYPE)round(((float)dotProd[w*OUT_BLOCK_HEIGHT + h][i] * quant_f[w] * I_QF + bias_f[w]) * calib_f[w]);
+        #else  // CALIBRATION_TERM
+            dotProd[w*OUT_BLOCK_HEIGHT + h][i] = (UNIT_TYPE)round(((float)dotProd[w*OUT_BLOCK_HEIGHT + h][i] * quant_f[w] * I_QF + bias_f[w]) * O_QF);
+        #endif // CALIBRATION_TERM
+            output[dst_index + 32 * 4 * i + 8 * w] = ACTIVATION(convert_char(dotProd[w*OUT_BLOCK_HEIGHT + h][i]), NL_M, NL_N);
+        }
+    
+    #endif
+    }
+}
+
+}
+
+#undef OUT_BLOCK_HEIGHT
+#undef WEIGHTS_PER_WORKITEM
+
+#undef FILTER_SIZE_X_SLICES
+#undef FILTER_IFM_SLICES
+
+#undef SCALE
+#undef QUANTIZATION
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_byxf_fs_bs_yx_bsv4_fsv32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_byxf_fs_bs_yx_bsv4_fsv32.cl
new file mode 100644
index 000000000..a240d4b7d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_byxf_fs_bs_yx_bsv4_fsv32.cl
@@ -0,0 +1,105 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/include_all.cl"
+
+#define OBS 8
+__attribute__((intel_reqd_sub_group_size(8)))
+KERNEL(convolution)(
+    __global INPUT0_TYPE* input, 
+    __global OUTPUT_TYPE* output, 
+    __global FILTER_TYPE* weights, 
+#if BIAS_TERM
+    __global BIAS_TYPE* biases,
+#endif
+#if QUANTIZATION_TERM
+    __global float* quantizations,
+#endif
+#if CALIBRATION_TERM
+    __global float* calibrations,
+#endif
+    uint split_idx)
+{
+    const uint f_pack = (get_group_id(0) * 32) % OUTPUT_FEATURE_NUM;
+    const uint b = (get_group_id(0) * 32) / OUTPUT_FEATURE_NUM;
+
+    const uint x = get_group_id(1) * OBS;
+    const uint y = get_group_id(2);
+
+    int4 dotProd[OBS] = { 0 };
+
+    const int input_x = x * STRIDE_SIZE_X - PADDING_SIZE_X;
+    const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+
+    const uint filter_offset = f_pack*FILTER_OFM_PITCH;
+    const uint input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET;
+
+    for (uint j = 0; j < FILTER_SIZE_Y ; ++j)
+    {
+        const int input_offset_y = input_y + j;
+        for (uint i = 0; i < FILTER_SIZE_X ; ++i)
+        {
+            const int input_offset_x = input_x + i + STRIDE_SIZE_X * get_sub_group_local_id();
+            uint input_idx = input_offset + (uint)input_offset_x*INPUT0_X_PITCH + (uint)input_offset_y*INPUT0_Y_PITCH;
+            uint filter_idx = filter_offset + j*FILTER_Y_PITCH + i*FILTER_X_PITCH;
+
+            char input_data[3];
+            char2 _i = vload2(0, input + input_idx);
+            input_data[0] = _i.s0;
+            input_data[1] = _i.s1;
+            input_data[2] = input[input_idx + 2];
+
+            for (uint k = 0; k < FILTER_IFM_NUM; ++k)
+            {
+                char4 w_data = as_char4(intel_sub_group_block_read((const __global uint*)(weights + filter_idx)));
+                for(uint r = 0; r < OBS; r++)
+                {
+                    char in = intel_sub_group_shuffle(input_data[k], r);
+                    for(uint c = 0; c < 4; c++)
+                    {
+                        dotProd[r][c] += (int)in * (int)w_data[c];
+                    }
+                }
+                filter_idx += FILTER_IFM_PITCH;
+            }
+        }
+    }
+
+
+const uint dst_index = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b, f_pack, y, x + get_sub_group_local_id());
+const uint _f_idx = f_pack + get_sub_group_local_id() * 4;
+float4 quants = vload4(0, quantizations + _f_idx );
+float4 calibs = vload4(0, calibrations + _f_idx );
+float4 bias = vload4(0, biases + _f_idx );
+for(uint r = 0; r < OBS; r++)
+{
+    char4 char_output;
+    for(uint c = 0; c < 4; c++)
+    {
+        const uint f_idx = f_pack + get_sub_group_local_id() * 4 + c;
+    #if BIAS_TERM
+        const uint bias_index = f_idx;
+    #if CALIBRATION_TERM
+        dotProd[r][c] = (UNIT_TYPE)round(((float)dotProd[r][c] * quants[c] * I_QF + bias[c]) * calibs[c]);
+    #else  // CALIBRATION_TERM
+        dotProd[r][c] = (UNIT_TYPE)round(((float)dotProd[r][c] * quants[c] * I_QF + bias[c]) * O_QF);
+    #endif // CALIBRATION_TERM
+    #endif
+        char_output[c] = ACTIVATION(convert_char(dotProd[r][c]), NL_M, NL_N);
+    }
+    const uint out_idx = intel_sub_group_shuffle(dst_index, r);
+    intel_sub_group_block_write( (__global uint*)(output + out_idx) , as_uint(char_output));
+}
+
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_imad.cl
new file mode 100644
index 000000000..0fa75ddc6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_imad.cl
@@ -0,0 +1,202 @@
+// Copyright (c) 2018-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/common.cl"
+#include "include/fetch.cl"
+#include "include/data_types.cl"
+#include "include/imad.cl"
+
+#ifndef NON_BLOCK_LOAD
+// block loads for inputs and weights should be fastest, but compiler seems
+// to do better with a mix, regular loads for inputs and block loads for weights. 
+#define BLOCK_LOAD_WEIGHTS
+#endif
+// Input reading operation is always blocked.
+#define BLOCK_LOAD_INPUTS
+
+// for now kernel stride is square
+#define K_WSTRIDE K_STRIDE
+#define K_HSTRIDE K_STRIDE
+
+// need KERNEL width for first output + STRIDE more for each additional.
+#define IN_BLOCK_WIDTH  (K_WIDTH  + K_WSTRIDE * (OUT_BLOCK_WIDTH  - 1))
+#define IN_BLOCK_HEIGHT (K_HEIGHT + K_HSTRIDE * (OUT_BLOCK_HEIGHT - 1))
+
+// for imad we are packing 4 8bit activations per 32 bit SIMD lane
+// if we later add 4bit, then PACK would be 8.
+#define PACK 4
+
+__attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
+KERNEL (convolution_gpu_imad)(
+    __global uint        *inputs,
+    __global OUTPUT_TYPE *outputs,
+    __global int         *weights
+#if BIAS_TERM
+    ,__global BIAS_TYPE  *biases
+#endif
+#if QUANTIZATION_TERM
+    ,__global float      *quantizations
+#endif
+#if CALIBRATION_TERM
+    ,__global float      *calibrations
+#endif
+)
+{
+    const uint oc = get_global_id(0) * OUT_BLOCK_WIDTH;  // oc = Output Column
+    const uint or = get_global_id(1) * OUT_BLOCK_HEIGHT; // or = Output Row
+    const uint fm = get_global_id(2);                    // fm = Feature Map = od = Output Depth, SIMD is across this dimension, WG is 1x1x16
+    const uint fmg = get_group_id(2);
+    const uint lid = get_local_id(2);
+    const uint batch = fm / _OD;
+
+    uint in[IN_BLOCK_HEIGHT];
+    int  out[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT] = { 0 };  // this is the 32 bit signed accumulator that must be converted to 8 bits before final write.
+
+    #define NUM_FILTERS (K_HEIGHT * K_WIDTH)
+    int w[NUM_FILTERS];
+
+    int in_addr;
+
+#ifdef BLOCK_LOAD_WEIGHTS
+    int weight_addr = (fmg % (_OD / SIMD_SIZE)) * ((_ID * K_HEIGHT * K_WIDTH * SIMD_SIZE) / PACK);
+#else
+    int weight_addr = (fmg % (_OD / SIMD_SIZE)) * ((_ID * K_HEIGHT * K_WIDTH * SIMD_SIZE) / PACK) + lid;
+#endif
+
+    uint input_size = (_ID * (_IH + IHPAD) * (_IW + IWPAD)) / PACK; // dividing by PACK to get right number of 32bit entities.
+
+    __attribute__((opencl_unroll_hint(1)))
+    for(int kd = 0; kd < (_ID / PACK); kd++) // For imad we do 4X less input feature map iterations since we are packing 4 of them in each uchar4.  For now assume _ID is multiple of packing factor.
+    {
+
+#ifdef BLOCK_LOAD_INPUTS
+        in_addr = kd * (_IH + IHPAD) * (_IW + IWPAD) + (or * K_STRIDE) * (_IW + IWPAD) + (oc * K_STRIDE);
+#else
+        in_addr = kd * (_IH + IHPAD) * (_IW + IWPAD) + (or * K_STRIDE) * (_IW + IWPAD) + (oc * K_STRIDE) + lid;
+#endif
+        in_addr += batch * input_size;  // adjust for batching
+
+        for(uint reg = 0; reg < IN_BLOCK_HEIGHT; reg++) {
+#ifdef BLOCK_LOAD_INPUTS
+            in[reg] = intel_sub_group_block_read((const __global uint*) &inputs[in_addr]);
+#else
+            in[reg] = inputs[in_addr];// read SIMD_SIZE elements wide
+#endif
+            in_addr += (_IW + IWPAD);  // move to next row down
+        }
+
+#ifdef BLOCK_LOAD_WEIGHTS
+        *((int8*)&w[0]) = as_int8(intel_sub_group_block_read8((const __global uint*) &weights[weight_addr]));
+        w[8]= as_int(intel_sub_group_block_read((const __global uint*) &weights[weight_addr + (SIMD_SIZE<<3)]));
+        weight_addr += SIMD_SIZE*NUM_FILTERS;
+#else
+        for(int pf=0; pf < NUM_FILTERS; pf++) {
+            w[pf] = weights[weight_addr];
+            weight_addr += SIMD_SIZE;
+        }
+#endif
+
+        int wi = 0;
+        int kr = 0; // kr = Kernel Row
+        LOOP(K_HEIGHT, kr,
+        {
+            int kc = 0; // kc = Kernel Column
+            LOOP(K_WIDTH, kc,
+            {
+                for (int br = 0; br < OUT_BLOCK_HEIGHT; br++) {
+                    for (int bc = 0; bc < OUT_BLOCK_WIDTH; bc++) {
+                        uint input = sub_group_broadcast(in[br * K_HSTRIDE + kr], bc * K_WSTRIDE + kc);
+
+                        out[br * OUT_BLOCK_WIDTH + bc] =
+#ifdef CONVO_UNSIGNED
+                            IMAD(out[br * OUT_BLOCK_WIDTH + bc], as_uchar4(input), as_char4(w[wi]));
+#else
+                            IMAD(out[br * OUT_BLOCK_WIDTH + bc], as_char4(input), as_char4(w[wi]));
+#endif
+                    }
+                }
+                wi++;
+            });
+        });
+    } //for kd
+
+    // Feature maps are an array of slices, each H,W position within the slice contains
+    // four 8bit feature maps, packed like RGBA components into a 32 bit pixel.
+    int row_size_bytes = (_OW + OWPAD) * PACK;
+
+    // Slice_pack is a pack of 4 feature map tiles that are [OH][OW][4]
+    // that are stored within the full [N][C/4][H][W][4] output.
+    int slice_pack_size_bytes = row_size_bytes * (_OH + OHPAD);
+
+    // Dividing the feature map index by 4 gives us the slice_pack_index in each lane
+    // (each lane within block of 4 will have same index).
+    int slice_pack_index = fm / PACK;
+
+    // Each group of 4 simd lanes points to start of it's slice pack.
+    int slice_pack_start_addr_bytes = slice_pack_index * slice_pack_size_bytes;
+
+    // Make each lane within the group of 4(PACK) simd lanes point to an individual byte
+    // witihn the uchar4 at start of slice pack.
+    int slice_pack_addr_bytes = slice_pack_start_addr_bytes + (lid % PACK);
+
+    // Adjust to particular tile that we are working on
+    slice_pack_addr_bytes += (or + OUTPUT_PAD_BEFORE_SIZE_Y) * row_size_bytes
+                             + (oc + OUTPUT_PAD_BEFORE_SIZE_X) * PACK;
+
+    for (int r = 0; r < OUT_BLOCK_HEIGHT; r++) {
+        for (int c = 0; c < OUT_BLOCK_WIDTH; c++) {
+            uint out_idx = slice_pack_addr_bytes + r * row_size_bytes + (c*PACK);
+#if QUANTIZATION_TERM
+            int dotProd       = out[r * OUT_BLOCK_WIDTH + c];
+#else
+            UNIT_TYPE dotProd = out[r * OUT_BLOCK_WIDTH + c];
+#endif
+
+#if BIAS_TERM
+            const uint f = fm % _OD;
+    #if   BIAS_PER_OUTPUT
+            #error convolution_gpu_imad.cl: BIAS_PER_OUTPUT - not supported
+    #elif BIAS_PER_OFM
+            const uint bias_index = f;
+    #endif
+
+    #if QUANTIZATION_TERM
+        #if CALIBRATION_TERM
+
+            dotProd = (UNIT_TYPE)round( ((float)dotProd * quantizations[f] * I_QF + biases[bias_index])
+                                        * calibrations[f] );
+        #else
+            dotProd = (UNIT_TYPE)round( ((float)dotProd * quantizations[f] * I_QF + biases[bias_index])
+                                        * O_QF );
+        #endif // CALIBRATION_TERM
+    #else
+            dotProd += (UNIT_TYPE)biases[bias_index];
+    #endif // QUANTIZATION_TERM
+#endif // BIAS_TERM
+
+#if QUANTIZATION_TERM
+            UNIT_TYPE dotProd_A = ACTIVATION(convert_char(dotProd), NL_M, NL_N);
+#else
+            UNIT_TYPE dotProd_A = ACTIVATION(dotProd, NL_M, NL_N);
+#endif
+
+#ifdef CONVO_UNSIGNED
+            outputs[out_idx] = (uchar)( max((int)dotProd_A , 0) & 0xFF );
+#else
+            outputs[out_idx] = (uchar)dotProd_A & 0xFF;
+#endif
+        } // for (int c = 0; c < OUT_BLOCK_WIDTH; c++)
+    } // for (int r = 0; r < OUT_BLOCK_HEIGHT; r++)
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_32x32sg_128x128wg_slm_int8.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_32x32sg_128x128wg_slm_int8.cl
new file mode 100644
index 000000000..381f198e7
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_32x32sg_128x128wg_slm_int8.cl
@@ -0,0 +1,396 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/mmad.cl"
+
+#define SCALE 0.11f
+
+#ifdef LIGHTWEIGHT_QUANTIZATION
+
+#define QUANTIZATION(idx) \
+    {\
+        for(uint z = 0; z < 4; z++)\
+        {\
+            regC_uchar16[z * 4 + 0] = convert_uchar_sat( (regC[0 * 4 + i][idx + z / 4]) * SCALE + bias_f.s0);\
+            regC_uchar16[z * 4 + 1] = convert_uchar_sat( (regC[1 * 4 + i][idx + z / 4]) * SCALE + bias_f.s1);\
+            regC_uchar16[z * 4 + 2] = convert_uchar_sat( (regC[2 * 4 + i][idx + z / 4]) * SCALE + bias_f.s2);\
+            regC_uchar16[z * 4 + 3] = convert_uchar_sat( (regC[3 * 4 + i][idx + z / 4]) * SCALE + bias_f.s3);\
+        }\
+    }
+
+#elif NO_QUANTIZATION
+
+#define QUANTIZATION(idx) \
+    regC_uchar16.s0 = convert_uchar_sat(regC[0 * 4 + i][idx]);\
+    regC_uchar16.s1 = convert_uchar_sat(regC[1 * 4 + i][idx]);\
+    regC_uchar16.s2 = convert_uchar_sat(regC[2 * 4 + i][idx]);\
+    regC_uchar16.s3 = convert_uchar_sat(regC[3 * 4 + i][idx]);\
+    \
+    regC_uchar16.s4 = convert_uchar_sat(regC[0 * 4 + i][idx+1]);\
+    regC_uchar16.s5 = convert_uchar_sat(regC[1 * 4 + i][idx+1]);\
+    regC_uchar16.s6 = convert_uchar_sat(regC[2 * 4 + i][idx+1]);\
+    regC_uchar16.s7 = convert_uchar_sat(regC[3 * 4 + i][idx+1]);\
+    \
+    regC_uchar16.s8 = convert_uchar_sat(regC[0 * 4 + i][idx+2]);\
+    regC_uchar16.s9 = convert_uchar_sat(regC[1 * 4 + i][idx+2]);\
+    regC_uchar16.sa = convert_uchar_sat(regC[2 * 4 + i][idx+2]);\
+    regC_uchar16.sb = convert_uchar_sat(regC[3 * 4 + i][idx+2]);\
+    \
+    regC_uchar16.sc = convert_uchar_sat(regC[0 * 4 + i][idx+3]);\
+    regC_uchar16.sd = convert_uchar_sat(regC[1 * 4 + i][idx+3]);\
+    regC_uchar16.se = convert_uchar_sat(regC[2 * 4 + i][idx+3]);\
+    regC_uchar16.sf = convert_uchar_sat(regC[3 * 4 + i][idx+3]);
+
+#else
+
+#define QUANTIZATION(idx) \
+    regC_uchar16.s0 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s1 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.s2 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.s3 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.s4 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+1]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s5 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+1]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.s6 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+1]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.s7 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+1]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.s8 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+2]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s9 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+2]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.sa = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+2]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.sb = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+2]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.sc = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+3]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.sd = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+3]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.se = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+3]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.sf = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+3]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));
+
+#endif
+
+
+inline uint FUNC(calculate_output_offset_to_account_padding)(uint cOffset)
+{
+#if OUT_WITH_PADDING == 1
+    uint tmp_idx = cOffset;
+    uint f_val_idx = tmp_idx % 32;
+    tmp_idx /= 32;
+    uint b_val_idx = tmp_idx % 4;
+    tmp_idx /= 4;
+    uint x_idx = tmp_idx % OUTPUT_SIZE_X;
+    tmp_idx /= OUTPUT_SIZE_X;
+    uint y_idx = tmp_idx % OUTPUT_SIZE_Y;
+    tmp_idx /= OUTPUT_SIZE_Y;
+    uint b_slice_idx = tmp_idx % (OUTPUT_BATCH_NUM / 4);
+    tmp_idx /= (OUTPUT_BATCH_NUM / 4);
+    uint f_slice_idx = tmp_idx % (OUTPUT_FEATURE_NUM / 32);
+
+    uint padded_offset = f_slice_idx * OUT_F_BLOCK_PITCH;
+    padded_offset += b_slice_idx * OUT_B_BLOCK_PITCH;
+    padded_offset += y_idx * OUT_Y_PITCH;
+    padded_offset += x_idx * OUT_X_PITCH;
+    padded_offset += b_val_idx * 32;
+    padded_offset += f_val_idx;
+    padded_offset += OUT_OFFSET;
+
+    return padded_offset;
+#else
+    return cOffset;
+#endif
+}
+
+inline void FUNC(mmad_32x32_int8)(  __local uint* l_tileA, const uint l_offsetTileA,
+                                    __local int8* l_tileB, const uint l_offsetTileB_col0,
+                                    const uint l_offsetTileB_col1, const uint l_offsetTileB_col2,
+                                    const uint l_offsetTileB_col3, int8* rowA, int8* colB,
+                                    int8* regC)
+{
+    // Read tile A from SLM to regA
+    uint l_offsetTileATemp = l_offsetTileA;
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        rowA[j] = as_int8(SLM_BLOCK_READ_8(&l_tileA[l_offsetTileATemp]));
+        l_offsetTileATemp += 8 * SG_SIZE;
+    }
+    // Read tile B from SLM to regB and compute mmad
+    colB[0] = l_tileB[l_offsetTileB_col0];
+    colB[1] = l_tileB[l_offsetTileB_col1];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[0*(SIMD_LANE_M / 8) + j] = MMAD_8x8( rowA[j], colB[0], regC[0*(SIMD_LANE_M / 8) + j]);
+    }
+    colB[0] = l_tileB[l_offsetTileB_col2];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[1*(SIMD_LANE_M / 8) + j] = MMAD_8x8( rowA[j], colB[1], regC[1*(SIMD_LANE_M / 8) + j] );
+    }
+    colB[1] = l_tileB[l_offsetTileB_col3];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[2*(SIMD_LANE_M / 8) + j] = MMAD_8x8(rowA[j], colB[0], regC[2*(SIMD_LANE_M / 8) + j]);
+    }
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[3*(SIMD_LANE_M / 8) + j] = MMAD_8x8(rowA[j], colB[1], regC[3*(SIMD_LANE_M / 8) + j]);
+    }
+}
+
+/*
+ *  \brief GEMM kernel to compute MxN matrix using SLM
+ *  \param g_inA  - Input matrix 
+ *  \param g_inB  - Input matrix 
+ *  \param g_outC - Output matrix
+ */
+
+__attribute__((intel_reqd_sub_group_size(SG_SIZE)))
+KERNEL(Kernel_GEMM_MMAD8_32x32SG_128x128WG_SLM_INT8)
+  (
+  __global char* const g_inA,
+  __global int* g_outC,
+  __global char* const g_inB,
+    #if BIAS_TERM
+        __global BIAS_TYPE* biases,
+    #endif
+        __global float* quantizations,
+    #if CALIBRATION_TERM
+        __global float* calibrations,
+    #endif
+        uint split_idx
+
+   )
+{
+
+    __global int4* const g_matrixA = (__global int4*)g_inA;
+    __global int4* const g_matrixB = (__global int4*)g_inB;
+    __global int8* g_matrixC = (__global int8*)g_outC;
+
+    // Each work-group works to compute 128x128 tile.
+    // Each work-group contains 16 sub-groups.
+    // Each sub-group within the work-group works to compute a 32x32 tile.
+    // 1) All work-items in WG fill SLM with tileA (128x32) and tileB (32x128).
+    // 2) Each sub-group works to compute 32x32 tileC (stored in regC).
+    //    Note that each work-item in the sub-group computes a 32x4 chunk of tileC.
+    // 3) Repeat until tileC is fully computed (while moving tileA and tileB "windows")
+    __local int8 l_workGroupTileA[2 * (WG_TILE_M * MATRIX_SMALL_K) / sizeof(int8)]; // [2*128*32/8] = 1024 
+    __local int8 l_workGroupTileB[2 * (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int8)]; // [2*128*32/8] = 1024 
+
+    __local uint* l_workGroupTileA_uint = (__local uint*)l_workGroupTileA;
+    __local int4* l_workGroupTileA_int4 = (__local int4*)l_workGroupTileA;
+    __local int4* l_workGroupTileB_int4 = (__local int4*)l_workGroupTileB;
+
+    const uint l_groupSize = get_local_size(DIM_X) * get_local_size(DIM_Y);
+
+    const uint l_pingPongOffsetA_uint = (WG_TILE_M * MATRIX_SMALL_K) / sizeof(uint);
+    const uint l_pingPongOffsetB_int8 = (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int8);
+    const uint l_pingPongOffsetA_int4 = (WG_TILE_M * MATRIX_SMALL_K) / sizeof(int4);
+    const uint l_pingPongOffsetB_int4 = (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int4);
+
+    // Thread IDs
+    const uint g_tidY = get_global_id(DIM_Y); // 0,...,all_wi_inY
+    const uint g_tidX = get_global_id(DIM_X); // 0,...,all_wi_inX
+    const uint l_tidX = get_local_id(DIM_X);  // 0,...,31 in WG
+    const uint l_tidY = get_local_id(DIM_Y);  // 0,1,2,3  in WG
+    const uint l_tid = l_tidY * get_local_size(DIM_X) + l_tidX; // 0,1,2,...127
+
+    // SubGroup IDs
+    const uint sg_tid = get_sub_group_local_id();            // 0,1,...,8
+    const uint sg_global_idX = (uint)(g_tidX / SG_SIZE);     //{0}/8
+    const uint sg_global_idY = g_tidY;                       //{0}
+
+    const uint sg_local_idX = (uint)(l_tidX / SG_SIZE);      // {0,...,31}/8={0,0,0,0,0...,1,1,1,...,3,3,3}
+    const uint sg_local_idY = l_tidY;                        // 0,1,2,3
+    const uint sg_local_id = sg_local_idY * get_local_size(DIM_X) / SG_SIZE + sg_local_idX;  // get_local_size(DIM_X) / SG_SIZE = 32/8 = 4
+
+    const uint sub_group_id = get_sub_group_id();
+
+
+    // Registers
+    int8 regC[(SIMD_LANE_M / 8) * SIMD_LANE_N] = {0}; // Each work-item responsible for 32x4 ints elts   // (32/8)*4
+    int8 rowA[(SG_TILE_M * MATRIX_SMALL_K / SG_SIZE) / sizeof(int8)]; // each work-item will hold 1/8 of matrixA 
+    int8 colB[2];  // each lane will store 32x4 piece of matrixB
+
+    // SLM indices
+    const uint l_offsetTileA = SG_TILE_M * (MATRIX_SMALL_K / sizeof(uint)) * sg_local_idY;
+    const uint numElements32x32TileB = (MATRIX_SMALL_K * SG_TILE_N) / sizeof(int8);
+    const uint numElements32x8TileB = numElements32x32TileB / 4;
+    const uint l_offsetTileB = numElements32x32TileB * sg_local_idX;
+    const uint l_offsetTileB_col0 = l_offsetTileB + sg_tid;
+    const uint l_offsetTileB_col1 = l_offsetTileB + 1 * numElements32x8TileB + sg_tid;
+    const uint l_offsetTileB_col2 = l_offsetTileB + 2 * numElements32x8TileB + sg_tid;
+    const uint l_offsetTileB_col3 = l_offsetTileB + 3 * numElements32x8TileB + sg_tid;
+
+    // Global indices
+    uint g_idxA[2];
+    uint g_idxB[2];
+#ifdef TILED_GLOBAL_LAYOUT // 32-row major (matrixA) and 32-col major (matrixB)
+    g_idxA[0] = ((MATRIX_SMALL_K / sizeof(int4)) * WG_TILE_M) * get_group_id(DIM_Y) + l_tid;
+    g_idxB[0] = ((MATRIX_SMALL_K / sizeof(int4)) * WG_TILE_N) * get_group_id(DIM_X) + l_tid;
+    g_idxA[1] = g_idxA[0] + l_groupSize;
+    g_idxB[1] = g_idxB[0] + l_groupSize;
+#else // Row (matrixA) and Col (matrixB) major layout
+    g_idxA[0] = WG_TILE_M * (MATRIX_K / sizeof(int4)) * get_group_id(DIM_Y) +
+               (l_tid / 2) * (MATRIX_K / sizeof(int4)) + (l_tid % 2);
+    g_idxB[0] = WG_TILE_N * (MATRIX_K / sizeof(int4)) * get_group_id(DIM_X) +
+               (l_tid / 2) * (MATRIX_K / sizeof(int4)) + (l_tid % 2);
+    g_idxA[1] = g_idxA[0] + (l_groupSize / 2) * (MATRIX_K / sizeof(int4));
+    g_idxB[1] = g_idxB[0] + (l_groupSize / 2) * (MATRIX_K / sizeof(int4));
+#endif
+
+    // Initial SLM setup
+    {
+        l_workGroupTileA_int4[l_tid] = g_matrixA[g_idxA[0]];
+        l_workGroupTileB_int4[l_tid] = g_matrixB[g_idxB[0]];
+        l_workGroupTileA_int4[l_tid + l_groupSize] = g_matrixA[g_idxA[1]];
+        l_workGroupTileB_int4[l_tid + l_groupSize] = g_matrixB[g_idxB[1]];
+	   
+#ifdef TILED_GLOBAL_LAYOUT
+        g_idxA[0] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+#else
+        g_idxA[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_SMALL_K / sizeof(int4);
+#endif
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+
+    int4 hdcReadValueA[2];
+    int4 hdcReadValueB[2];
+
+    __attribute__((opencl_unroll_hint(1)))
+    for (uint k = 0; k < (MATRIX_K / MATRIX_SMALL_K) - 1; k++)
+    {
+        /*
+         * SLM setup - HDC read only
+         */
+        // Overlap HDC reads with mmad compute
+        hdcReadValueA[0] = g_matrixA[g_idxA[0]];
+        hdcReadValueB[0] = g_matrixB[g_idxB[0]];
+        hdcReadValueA[1] = g_matrixA[g_idxA[1]];
+        hdcReadValueB[1] = g_matrixB[g_idxB[1]];
+
+#ifdef TILED_GLOBAL_LAYOUT
+        g_idxA[0] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+#else
+        g_idxA[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_SMALL_K / sizeof(int4);
+#endif
+
+        /*
+         * mmad compute
+         */
+        FUNC_CALL(mmad_32x32_int8)(&l_workGroupTileA_uint[(k % 2) * l_pingPongOffsetA_uint],
+                                l_offsetTileA, &l_workGroupTileB[(k % 2) * l_pingPongOffsetB_int8],
+                                l_offsetTileB_col0, l_offsetTileB_col1, l_offsetTileB_col2,
+                                l_offsetTileB_col3, rowA, colB, regC);
+
+        /*
+         * SLM setup - SLM write only
+         */
+        l_workGroupTileA_int4[((k + 1) % 2 * l_pingPongOffsetA_int4) + l_tid] = hdcReadValueA[0];
+        l_workGroupTileB_int4[((k + 1) % 2 * l_pingPongOffsetB_int4) + l_tid] = hdcReadValueB[0];
+        l_workGroupTileA_int4[((k + 1) % 2 * l_pingPongOffsetA_int4) + l_tid + l_groupSize] = hdcReadValueA[1];
+        l_workGroupTileB_int4[((k + 1) % 2 * l_pingPongOffsetB_int4) + l_tid + l_groupSize] = hdcReadValueB[1];
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+    } // main outer loop
+
+    /*
+     * Last mmad compute iteration (avoids branching in main loop)
+     */
+
+    FUNC_CALL(mmad_32x32_int8)(
+        &l_workGroupTileA_uint[(((MATRIX_K / MATRIX_SMALL_K) - 1) % 2) * l_pingPongOffsetA_uint],
+        l_offsetTileA,
+        &l_workGroupTileB[(((MATRIX_K / MATRIX_SMALL_K) - 1) % 2) * l_pingPongOffsetB_int8],
+        l_offsetTileB_col0, l_offsetTileB_col1, l_offsetTileB_col2, l_offsetTileB_col3, rowA, colB,
+        regC);
+
+#ifdef OUTPUT_TILED_GLOBAL_LAYOUT
+    // Write out in swizzled manner after quantizing
+    __global uchar* g_outC_uchar = (__global uchar*)g_outC;
+    uint cOffset = sg_global_idX * (MATRIX_M * SG_TILE_N / sizeof(uchar)) +
+                   sg_global_idY * (SG_TILE_M * SG_TILE_N / sizeof(uchar));
+
+    uchar16 regC_uchar16;
+    uint offset_uc16 = 0;
+
+    const uint workgroup_id_x = get_group_id(0); 
+    uint feature_off = 32*(sub_group_id % (WG_TILE_N / 32)) + WG_TILE_N*workgroup_id_x; //=32*{0,1,2,3} + WG_TILE_N * workgroup_id_x 
+    uint feature = get_sub_group_local_id()*4 + feature_off;
+
+    float4 quant_f = vload4(0, quantizations + feature);
+    float4 bias_f = vload4(0, biases + feature);
+    float4 calib_f = vload4(0, calibrations + feature);
+
+#if MMAD_SUPPORTED == 1
+    __attribute__((opencl_unroll_hint( SG_TILE_M / (sizeof(int8) / sizeof(int)) )))
+#endif
+    for (uint i = 0; i < SG_TILE_M / (sizeof(int8) / sizeof(int)); i++)
+    {
+        uint padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(cOffset);
+        {
+            // B0..3, F0..31
+            QUANTIZATION(0);
+        }
+
+        intel_sub_group_block_write4((__global uint*)(g_outC_uchar + padded_offset), as_uint4(regC_uchar16));
+        cOffset += sizeof(uchar16) * SG_SIZE;
+
+        // now we need to calculate again for other x
+        padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(cOffset);
+        {
+            // B0..3, F0..31
+            QUANTIZATION(4);
+        }
+
+        intel_sub_group_block_write4( (__global uint*)(g_outC_uchar + padded_offset), as_uint4(regC_uchar16) );
+        cOffset += sizeof(uchar16) * SG_SIZE;
+    }
+#else
+    // Write final accumulated values
+    uint cOffset = sg_global_idX * ((MATRIX_M / 8) * SG_TILE_N) + sg_global_idY * (SG_TILE_M / 8) +
+                   sg_tid * (MATRIX_M / 8);
+    __attribute__((opencl_unroll_hint(SIMD_LANE_N)))
+    for (uint i = 0; i < (SIMD_LANE_N); ++i)
+    {
+        __attribute__((opencl_unroll_hint(SIMD_LANE_M / 8)))
+        for (uint j = 0; j < (SIMD_LANE_M / 8); ++j)
+        {
+            g_matrixC[cOffset + j] = regC[i*(SIMD_LANE_M / 8) + j];
+        }
+        cOffset += SG_SIZE * (MATRIX_M / 8);
+    }
+#endif
+
+}
+
+#undef QUANTIZATION
+#undef SCALE
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_32x32sg_224x128wg_slm_int8.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_32x32sg_224x128wg_slm_int8.cl
new file mode 100644
index 000000000..94a38d7ba
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_32x32sg_224x128wg_slm_int8.cl
@@ -0,0 +1,389 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/mmad.cl"
+
+#define SCALE 0.11f
+
+#ifdef LIGHTWEIGHT_QUANTIZATION
+
+#define QUANTIZATION(idx) \
+    {\
+        for(uint z = 0; z < 4; z++)\
+        {\
+            regC_uchar16[z * 4 + 0] = convert_uchar_sat( (regC[0 * 4 + i][idx + z / 4]) * SCALE + bias_f.s0);\
+            regC_uchar16[z * 4 + 1] = convert_uchar_sat( (regC[1 * 4 + i][idx + z / 4]) * SCALE + bias_f.s1);\
+            regC_uchar16[z * 4 + 2] = convert_uchar_sat( (regC[2 * 4 + i][idx + z / 4]) * SCALE + bias_f.s2);\
+            regC_uchar16[z * 4 + 3] = convert_uchar_sat( (regC[3 * 4 + i][idx + z / 4]) * SCALE + bias_f.s3);\
+        }\
+    }
+
+#elif NO_QUANTIZATION
+
+#define QUANTIZATION(idx) \
+    regC_uchar16.s0 = convert_uchar_sat(regC[0 * 4 + i][idx]);\
+    regC_uchar16.s1 = convert_uchar_sat(regC[1 * 4 + i][idx]);\
+    regC_uchar16.s2 = convert_uchar_sat(regC[2 * 4 + i][idx]);\
+    regC_uchar16.s3 = convert_uchar_sat(regC[3 * 4 + i][idx]);\
+    \
+    regC_uchar16.s4 = convert_uchar_sat(regC[0 * 4 + i][idx+1]);\
+    regC_uchar16.s5 = convert_uchar_sat(regC[1 * 4 + i][idx+1]);\
+    regC_uchar16.s6 = convert_uchar_sat(regC[2 * 4 + i][idx+1]);\
+    regC_uchar16.s7 = convert_uchar_sat(regC[3 * 4 + i][idx+1]);\
+    \
+    regC_uchar16.s8 = convert_uchar_sat(regC[0 * 4 + i][idx+2]);\
+    regC_uchar16.s9 = convert_uchar_sat(regC[1 * 4 + i][idx+2]);\
+    regC_uchar16.sa = convert_uchar_sat(regC[2 * 4 + i][idx+2]);\
+    regC_uchar16.sb = convert_uchar_sat(regC[3 * 4 + i][idx+2]);\
+    \
+    regC_uchar16.sc = convert_uchar_sat(regC[0 * 4 + i][idx+3]);\
+    regC_uchar16.sd = convert_uchar_sat(regC[1 * 4 + i][idx+3]);\
+    regC_uchar16.se = convert_uchar_sat(regC[2 * 4 + i][idx+3]);\
+    regC_uchar16.sf = convert_uchar_sat(regC[3 * 4 + i][idx+3]);
+
+#else
+
+#define QUANTIZATION(idx) \
+    regC_uchar16.s0 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s1 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.s2 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.s3 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.s4 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+1]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s5 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+1]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.s6 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+1]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.s7 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+1]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.s8 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+2]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s9 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+2]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.sa = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+2]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.sb = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+2]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.sc = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+3]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.sd = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+3]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.se = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+3]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.sf = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+3]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));
+
+#endif
+
+inline uint FUNC(calculate_output_offset_to_account_padding)(uint cOffset)
+{
+#if OUT_WITH_PADDING == 1
+    uint tmp_idx = cOffset;
+    uint f_val_idx = tmp_idx % 32;
+    tmp_idx /= 32;
+    uint b_val_idx = tmp_idx % 4;
+    tmp_idx /= 4;
+    uint x_idx = tmp_idx % OUTPUT_SIZE_X;
+    tmp_idx /= OUTPUT_SIZE_X;
+    uint y_idx = tmp_idx % OUTPUT_SIZE_Y;
+    tmp_idx /= OUTPUT_SIZE_Y;
+    uint b_slice_idx = tmp_idx % (OUTPUT_BATCH_NUM / 4);
+    tmp_idx /= (OUTPUT_BATCH_NUM / 4);
+    uint f_slice_idx = tmp_idx % (OUTPUT_FEATURE_NUM / 32);
+
+    uint padded_offset = f_slice_idx * OUT_F_BLOCK_PITCH;
+    padded_offset += b_slice_idx * OUT_B_BLOCK_PITCH;
+    padded_offset += y_idx * OUT_Y_PITCH;
+    padded_offset += x_idx * OUT_X_PITCH;
+    padded_offset += b_val_idx * 32;
+    padded_offset += f_val_idx;
+    padded_offset += OUT_OFFSET;
+
+    return padded_offset;
+#else
+    return cOffset;
+#endif
+}
+
+inline void FUNC(mmad_32x32_int8)(  __local uint* l_tileA, const uint l_offsetTileA,
+                                    __local int8* l_tileB, const uint l_offsetTileB_col0,
+                                    const uint l_offsetTileB_col1, const uint l_offsetTileB_col2,
+                                    const uint l_offsetTileB_col3, int8* rowA, int8* colB,
+                                    int8* regC)
+{
+    // Read tile A from SLM to regA
+    uint l_offsetTileATemp = l_offsetTileA;
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+		rowA[j] = as_int8(SLM_BLOCK_READ_8(&l_tileA[l_offsetTileATemp]));
+        l_offsetTileATemp += 8 * SG_SIZE;
+    }
+    // Read tile B from SLM to regB and compute mmad
+    colB[0] = l_tileB[l_offsetTileB_col0];
+    colB[1] = l_tileB[l_offsetTileB_col1];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[0*(SIMD_LANE_M / 8) + j] = MMAD_8x8( rowA[j], colB[0], regC[0*(SIMD_LANE_M / 8) + j]);
+    }
+    colB[0] = l_tileB[l_offsetTileB_col2];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[1*(SIMD_LANE_M / 8) + j] = MMAD_8x8( rowA[j], colB[1], regC[1*(SIMD_LANE_M / 8) + j] );
+	}
+    colB[1] = l_tileB[l_offsetTileB_col3];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[2*(SIMD_LANE_M / 8) + j] = MMAD_8x8(rowA[j], colB[0], regC[2*(SIMD_LANE_M / 8) + j]);
+    }
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[3*(SIMD_LANE_M / 8) + j] = MMAD_8x8(rowA[j], colB[1], regC[3*(SIMD_LANE_M / 8) + j]);
+    }
+}
+
+/*
+ *  \brief GEMM kernel to compute MxN matrix using SLM
+ *  \param g_inA  - Input matrix 
+ *  \param g_inB  - Input matrix 
+ *  \param g_outC - Output matrix
+ */
+
+__attribute__((intel_reqd_sub_group_size(SG_SIZE)))
+KERNEL(Kernel_GEMM_MMAD8_32x32SG_224x128WG_SLM_INT8)
+														  (__global char* const g_inA,                                                         
+														  __global int* g_outC,
+														  __global char* const g_inB,                                                           
+ 														    #if BIAS_TERM
+																__global BIAS_TYPE* biases,
+															#endif
+																__global float* quantizations,
+															#if CALIBRATION_TERM
+																__global float* calibrations,
+															#endif
+																uint split_idx
+
+														   )
+{
+
+    __global int4* const g_matrixA = (__global int4*)g_inA;
+    __global int4* const g_matrixB = (__global int4*)g_inB;
+    __global int8* g_matrixC = (__global int8*)g_outC;
+
+    // Each work-group works to compute 128x128 tile.
+    // Each work-group contains 16 sub-groups.
+    // Each sub-group within the work-group works to compute a 32x32 tile.
+    // 1) All work-items in WG fill SLM with tileA (128x32) and tileB (32x128).
+    // 2) Each sub-group works to compute 32x32 tileC (stored in regC).
+    //    Note that each work-item in the sub-group computes a 32x4 chunk of tileC.
+    // 3) Repeat until tileC is fully computed (while moving tileA and tileB "windows")
+    __local int8 l_workGroupTileA[2 * (WG_TILE_M * MATRIX_SMALL_K) / sizeof(int8)];
+    __local int8 l_workGroupTileB[2 * (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int8)];
+
+    __local uint* l_workGroupTileA_uint = (__local uint*)l_workGroupTileA;
+    __local int4* l_workGroupTileA_int4 = (__local int4*)l_workGroupTileA;
+    __local int4* l_workGroupTileB_int4 = (__local int4*)l_workGroupTileB;
+
+    const uint l_groupSize = get_local_size(DIM_X) * get_local_size(DIM_Y);
+
+    const uint l_pingPongOffsetA_uint = (WG_TILE_M * MATRIX_SMALL_K) / sizeof(uint);
+    const uint l_pingPongOffsetB_int8 = (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int8);
+    const uint l_pingPongOffsetA_int4 = (WG_TILE_M * MATRIX_SMALL_K) / sizeof(int4);
+    const uint l_pingPongOffsetB_int4 = (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int4);
+
+    // Thread IDs
+    const uint g_tidY = get_global_id(DIM_Y);
+    const uint g_tidX = get_global_id(DIM_X);
+    const uint l_tidX = get_local_id(DIM_X);
+    const uint l_tidY = get_local_id(DIM_Y);
+    const uint l_tid = l_tidY * get_local_size(DIM_X) + l_tidX;
+
+    // SubGroup IDs
+    const uint sg_tid = get_sub_group_local_id();
+    const uint sg_global_idX = (uint)(g_tidX / SG_SIZE);
+    const uint sg_global_idY = g_tidY;
+    const uint sg_local_idX = (uint)(l_tidX / SG_SIZE);
+    const uint sg_local_idY = l_tidY;
+    const uint sg_local_id = sg_local_idY * get_local_size(DIM_X) / SG_SIZE + sg_local_idX;
+
+	const uint sub_group_id = get_sub_group_id();
+
+    // Registers
+    int8 regC[(SIMD_LANE_M / 8) * SIMD_LANE_N] = {0}; // Each work-item responsible for 32x4 ints elts
+    int8 rowA[(SG_TILE_M * MATRIX_SMALL_K / SG_SIZE) / sizeof(int8)]; // each work-item will hold 1/8 of matrixA
+    int8 colB[2];  // each lane will store 32x4 piece of matrixB
+
+    // SLM indices
+    const uint l_offsetTileA = SG_TILE_M * (MATRIX_SMALL_K / sizeof(uint)) * sg_local_idY;
+    const uint numElements32x32TileB = (MATRIX_SMALL_K * SG_TILE_N) / sizeof(int8);
+    const uint numElements32x8TileB = numElements32x32TileB / 4;
+    const uint l_offsetTileB = numElements32x32TileB * sg_local_idX;
+    const uint l_offsetTileB_col0 = l_offsetTileB + sg_tid;
+    const uint l_offsetTileB_col1 = l_offsetTileB + 1 * numElements32x8TileB + sg_tid;
+    const uint l_offsetTileB_col2 = l_offsetTileB + 2 * numElements32x8TileB + sg_tid;
+    const uint l_offsetTileB_col3 = l_offsetTileB + 3 * numElements32x8TileB + sg_tid;
+
+    // Global indices
+    uint g_idxA[2];
+    uint g_idxB[2];
+#ifdef TILED_GLOBAL_LAYOUT // 32-row major (matrixA) and 32-col major (matrixB)
+    g_idxA[0] = ((MATRIX_SMALL_K / sizeof(int4)) * WG_TILE_M) * get_group_id(DIM_Y) + l_tid;
+    g_idxB[0] = ((MATRIX_SMALL_K / sizeof(int4)) * WG_TILE_N) * get_group_id(DIM_X) + l_tid;
+    g_idxA[1] = g_idxA[0] + l_groupSize;
+    g_idxB[1] = g_idxB[0] + l_groupSize;
+#else // Row (matrixA) and Col (matrixB) major layout
+    g_idxA[0] = WG_TILE_M * (MATRIX_K / sizeof(int4)) * get_group_id(DIM_Y) +
+               (l_tid / 2) * (MATRIX_K / sizeof(int4)) + (l_tid % 2);
+    g_idxB[0] = WG_TILE_N * (MATRIX_K / sizeof(int4)) * get_group_id(DIM_X) +
+               (l_tid / 2) * (MATRIX_K / sizeof(int4)) + (l_tid % 2);
+    g_idxA[1] = g_idxA[0] + (l_groupSize / 2) * (MATRIX_K / sizeof(int4));
+    g_idxB[1] = g_idxB[0] + (l_groupSize / 2) * (MATRIX_K / sizeof(int4));
+#endif
+    // Initial SLM setup
+    {
+        l_workGroupTileA_int4[l_tid] = g_matrixA[g_idxA[0]];
+        l_workGroupTileB_int4[l_tid] = g_matrixB[g_idxB[0]];
+
+        l_workGroupTileA_int4[l_tid + l_groupSize] = g_matrixA[g_idxA[1]];
+        if (l_tid < 32)
+        {
+            // Not all work-items will be needed to fetch the remaining matrix B
+            l_workGroupTileB_int4[l_tid + l_groupSize] = g_matrixB[g_idxB[1]];
+        }
+#ifdef TILED_GLOBAL_LAYOUT
+        g_idxA[0] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+#else
+        g_idxA[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_SMALL_K / sizeof(int4);
+#endif
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    int4 hdcReadValueA[2];
+    int4 hdcReadValueB[2];
+
+    __attribute__((opencl_unroll_hint(1)))
+    for (uint k = 0; k < (MATRIX_K / MATRIX_SMALL_K) - 1; k++)
+    {
+        hdcReadValueA[0] = g_matrixA[g_idxA[0]];
+        hdcReadValueB[0] = g_matrixB[g_idxB[0]];
+        hdcReadValueA[1] = g_matrixA[g_idxA[1]];
+        if (l_tid < 32)
+        {
+            // Not all work-items will be needed to fetch the remaining matrix B
+            hdcReadValueB[1] = g_matrixB[g_idxB[1]];
+        }
+#ifdef TILED_GLOBAL_LAYOUT
+        g_idxA[0] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+#else
+        g_idxA[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_SMALL_K / sizeof(int4);
+#endif
+
+
+        //MMAD compute
+        FUNC_CALL(mmad_32x32_int8)(&l_workGroupTileA_uint[(k % 2) * l_pingPongOffsetA_uint],
+                                l_offsetTileA, &l_workGroupTileB[(k % 2) * l_pingPongOffsetB_int8],
+                                l_offsetTileB_col0, l_offsetTileB_col1, l_offsetTileB_col2,
+                                l_offsetTileB_col3, rowA, colB, regC);
+
+        //SLM setup - SLM write only
+        l_workGroupTileA_int4[((k + 1) % 2 * l_pingPongOffsetA_int4) + l_tid] = hdcReadValueA[0];
+        l_workGroupTileB_int4[((k + 1) % 2 * l_pingPongOffsetB_int4) + l_tid] = hdcReadValueB[0];
+        l_workGroupTileA_int4[((k + 1) % 2 * l_pingPongOffsetA_int4) + l_tid + l_groupSize] = hdcReadValueA[1];
+        if (l_tid < 32)
+        {
+            // Not all work-items will be needed to fetch the remaining matrix B
+            l_workGroupTileB_int4[((k + 1) % 2 * l_pingPongOffsetB_int4) + l_tid + l_groupSize] = hdcReadValueB[1];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    } // main outer loop
+
+    //Last MMAD compute iteration (avoids branching in main loop)
+	FUNC_CALL(mmad_32x32_int8)(
+        &l_workGroupTileA_uint[(((MATRIX_K / MATRIX_SMALL_K) - 1) % 2) * l_pingPongOffsetA_uint],
+        l_offsetTileA,
+        &l_workGroupTileB[(((MATRIX_K / MATRIX_SMALL_K) - 1) % 2) * l_pingPongOffsetB_int8],
+        l_offsetTileB_col0, l_offsetTileB_col1, l_offsetTileB_col2, l_offsetTileB_col3, rowA, colB,
+        regC);
+
+
+#ifdef OUTPUT_TILED_GLOBAL_LAYOUT
+	
+    // Write out in swizzled manner after quantizing
+    __global uchar* g_outC_uchar = (__global uchar*)g_outC;
+    uint cOffset = sg_global_idX * (MATRIX_M * SG_TILE_N / sizeof(uchar)) +
+                   sg_global_idY * (SG_TILE_M * SG_TILE_N / sizeof(uchar));
+
+    uchar16 regC_uchar16;
+    uint offset_uc16 = 0;
+
+	const uint workgroup_id_x = get_group_id(0); 
+	uint feature_off = 32*(sub_group_id % (WG_TILE_N / 32)) + WG_TILE_N*workgroup_id_x; //=32*{0,1,2,3} + WG_TILE_N * workgroup_id_x 
+	uint feature = get_sub_group_local_id()*4 + feature_off;
+
+    float4 quant_f = vload4(0, quantizations + feature);
+    float4 bias_f = vload4(0, biases + feature);
+    float4 calib_f = vload4(0, calibrations + feature);
+
+#if MMAD_SUPPORTED == 1
+    __attribute__((opencl_unroll_hint( SG_TILE_M / (sizeof(int8) / sizeof(int)) )))
+#endif
+    for (uint i = 0; i < SG_TILE_M / (sizeof(int8) / sizeof(int)); i++)
+    {
+        uint padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(cOffset);
+        {
+            // B0..3, F0..31		
+            QUANTIZATION(0);
+        }
+
+        intel_sub_group_block_write4((__global uint*)(g_outC_uchar + padded_offset), as_uint4(regC_uchar16));
+		cOffset += sizeof(uchar16) * SG_SIZE;
+
+        // now we need to calculate again for other x
+        padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(cOffset);
+        {
+            // B0..3, F0..31
+            QUANTIZATION(4);
+        }
+
+        intel_sub_group_block_write4( (__global uint*)(g_outC_uchar + padded_offset), as_uint4(regC_uchar16) );
+        cOffset += sizeof(uchar16) * SG_SIZE;
+    }
+
+#else
+    // Write final accumulated values
+    uint cOffset = sg_global_idX * ((MATRIX_M / 8) * SG_TILE_N) + sg_global_idY * (SG_TILE_M / 8) +
+                   sg_tid * (MATRIX_M / 8);
+    __attribute__((opencl_unroll_hint(SIMD_LANE_N)))
+    for (uint i = 0; i < (SIMD_LANE_N); ++i)
+    {
+        __attribute__((opencl_unroll_hint(SIMD_LANE_M / 8)))
+        for (uint j = 0; j < (SIMD_LANE_M / 8); ++j)
+        {
+            g_matrixC[cOffset + j] = regC[i*(SIMD_LANE_M / 8) + j];
+        }
+        cOffset += SG_SIZE * (MATRIX_M / 8);
+    }
+#endif
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_32x32sg_slm_int8.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_32x32sg_slm_int8.cl
new file mode 100644
index 000000000..0a6d73112
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_32x32sg_slm_int8.cl
@@ -0,0 +1,430 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/mmad.cl"
+
+inline uint FUNC(calculate_output_offset_to_account_padding)(uint cOffset)
+{
+    uint tmp_idx = cOffset;
+    uint f_val_idx = tmp_idx % 32;
+    tmp_idx /= 32;
+    uint b_val_idx = tmp_idx % 4;
+    tmp_idx /= 4;
+    uint x_idx = tmp_idx % OUTPUT_SIZE_X;
+    tmp_idx /= OUTPUT_SIZE_X;
+    uint y_idx = tmp_idx % OUTPUT_SIZE_Y;
+    tmp_idx /= OUTPUT_SIZE_Y;
+    uint b_slice_idx = tmp_idx % (OUTPUT_BATCH_NUM / 4);
+    tmp_idx /= (OUTPUT_BATCH_NUM / 4);
+    uint f_slice_idx = tmp_idx % (OUTPUT_FEATURE_NUM / 32);
+
+    uint padded_offset = f_slice_idx * OUT_F_BLOCK_PITCH;
+    padded_offset += b_slice_idx * OUT_B_BLOCK_PITCH;
+    padded_offset += y_idx * OUT_Y_PITCH;
+    padded_offset += x_idx * OUT_X_PITCH;
+    padded_offset += b_val_idx * 32;
+    padded_offset += f_val_idx;
+    padded_offset += OUT_OFFSET;
+
+    return padded_offset;
+}
+
+inline void FUNC(mmad_32x32_int8)(  __local uint* l_tileA, const uint l_offsetTileA,
+                                    __local int8* l_tileB, const uint l_offsetTileB_col0,
+                                    const uint l_offsetTileB_col1, const uint l_offsetTileB_col2,
+                                    const uint l_offsetTileB_col3, int8* rowA, int8* colB,
+                                    int8* regC)
+{
+    // Read tile A from SLM to regA
+    uint l_offsetTileATemp = l_offsetTileA;
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+		rowA[j] = as_int8(SLM_BLOCK_READ_8(&l_tileA[l_offsetTileATemp]));
+        l_offsetTileATemp += 8 * SG_SIZE;
+    }
+    // Read tile B from SLM to regB and compute mmad
+    colB[0] = l_tileB[l_offsetTileB_col0];
+    colB[1] = l_tileB[l_offsetTileB_col1];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[0*(SIMD_LANE_M / 8) + j] = MMAD_8x8( rowA[j], colB[0], regC[0*(SIMD_LANE_M / 8) + j]);
+    }
+    colB[0] = l_tileB[l_offsetTileB_col2];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[1*(SIMD_LANE_M / 8) + j] = MMAD_8x8( rowA[j], colB[1], regC[1*(SIMD_LANE_M / 8) + j] );
+	}
+    colB[1] = l_tileB[l_offsetTileB_col3];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[2*(SIMD_LANE_M / 8) + j] = MMAD_8x8(rowA[j], colB[0], regC[2*(SIMD_LANE_M / 8) + j]);
+    }
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[3*(SIMD_LANE_M / 8) + j] = MMAD_8x8(rowA[j], colB[1], regC[3*(SIMD_LANE_M / 8) + j]);
+    }
+}
+
+/*
+ *  \brief GEMM kernel to compute MxN matrix using SLM
+ *  \param g_inA  - Input matrix 
+ *  \param g_inB  - Input matrix 
+ *  \param g_outC - Output matrix
+ */
+
+__attribute__((intel_reqd_sub_group_size(SG_SIZE)))   
+KERNEL(Kernel_GEMM_MMAD8_32x32SG_128x128WG_SLM_INT8)
+														  (														  
+														  __global char* const g_inA,                                                    
+														  __global int* g_outC,
+														  __global char* const g_inB,                                                     
+ 														    #if BIAS_TERM
+																__global BIAS_TYPE* biases,
+															#endif
+																__global float* quantizations,
+															#if CALIBRATION_TERM
+																__global float* calibrations,
+															#endif
+																uint split_idx
+
+														   )
+{
+
+    __global int4* const g_matrixA = (__global int4*)g_inA;
+    __global int4* const g_matrixB = (__global int4*)g_inB;
+    __global int8* g_matrixC = (__global int8*)g_outC;
+
+    // 1) All work-items in work-group fill SLM with tileA and tileB.
+    // 2) Each sub-group works to compute a 32x32 tileC (stored in regC).
+    //    Note that each work-item in the sub-group computes a 32x4 chunk of tileC.
+    // 3) Repeat until tileC is fully computed (while moving tileA and tileB "windows")
+    __local int8 l_workGroupTileA_0[(WG_TILE_M * MATRIX_SMALL_K) / sizeof(int8)];
+    __local int8 l_workGroupTileB_0[(WG_TILE_N * MATRIX_SMALL_K) / sizeof(int8)];
+    __local uint* l_workGroupTileA_uint_0 = (__local uint*)l_workGroupTileA_0;
+
+    __local int8 l_workGroupTileA_1[(WG_TILE_M * MATRIX_SMALL_K) / sizeof(int8)];
+    __local int8 l_workGroupTileB_1[(WG_TILE_N * MATRIX_SMALL_K) / sizeof(int8)];
+    __local uint* l_workGroupTileA_uint_1 = (__local uint*)l_workGroupTileA_1;
+
+    __local int8* l_workGroupTileA_live =  l_workGroupTileA_0;
+    __local int8* l_workGroupTileB_live =  l_workGroupTileB_0;
+    __local uint* l_workGroupTileA_live_uint = l_workGroupTileA_uint_0;
+
+    __local int4* l_workGroupTileA_0_int4 = (__local int4*)l_workGroupTileA_0;
+    __local int4* l_workGroupTileB_0_int4 = (__local int4*)l_workGroupTileB_0;
+    __local int4* l_workGroupTileA_1_int4 = (__local int4*)l_workGroupTileA_1;
+    __local int4* l_workGroupTileB_1_int4 = (__local int4*)l_workGroupTileB_1;
+
+    const uint l_groupSize = get_local_size(DIM_X) * get_local_size(DIM_Y);
+
+    // Thread IDs
+    const uint g_tidY = get_global_id(DIM_Y);
+    const uint g_tidX = get_global_id(DIM_X);
+    const uint l_tidX = get_local_id(DIM_X);
+    const uint l_tidY = get_local_id(DIM_Y);
+    const uint l_tid = l_tidY * get_local_size(DIM_X) + l_tidX;
+
+    // SubGroup IDs
+    const uint sg_tid = get_sub_group_local_id();
+    const uint sg_global_idX = (uint)(g_tidX / SG_SIZE);
+    const uint sg_global_idY = g_tidY;
+    const uint sg_local_idX = (uint)(l_tidX / SG_SIZE);
+    const uint sg_local_idY = l_tidY;
+    const uint sg_local_id = sg_local_idY * get_local_size(DIM_X) / SG_SIZE + sg_local_idX;
+
+    const uint sub_group_id = get_sub_group_id();
+
+    // Registers
+    int8 regC[(SIMD_LANE_M / 8) * SIMD_LANE_N] = {0}; // Each work-item responsible for 32x4 ints elts
+    int8 rowA[(SG_TILE_M * MATRIX_SMALL_K / SG_SIZE) / sizeof(int8)]; // each work-item will hold 1/8 of matrixA
+    int8 colB[2]; // each lane will store 32x4 piece of matrixB
+
+    // SLM indices
+    const uint l_offsetTileA = SG_TILE_M * (MATRIX_SMALL_K / sizeof(uint)) * sg_local_idY;
+    const uint numElements32x32TileB = (MATRIX_SMALL_K * SG_TILE_N) / sizeof(int8);
+    const uint numElements32x8TileB = numElements32x32TileB / 4;
+    const uint l_offsetTileB = numElements32x32TileB * sg_local_idX;
+    const uint l_offsetTileB_col0 = l_offsetTileB + sg_tid;
+    const uint l_offsetTileB_col1 = l_offsetTileB + 1 * numElements32x8TileB + sg_tid;
+    const uint l_offsetTileB_col2 = l_offsetTileB + 2 * numElements32x8TileB + sg_tid;
+    const uint l_offsetTileB_col3 = l_offsetTileB + 3 * numElements32x8TileB + sg_tid;
+
+    // Global indices
+#ifdef TILED_GLOBAL_LAYOUT  // 32-row major (matrixA) and 32-col major (matrixB)
+    uint g_idxA = ((MATRIX_SMALL_K / sizeof(int4)) * WG_TILE_M) * get_group_id(DIM_Y) + l_tid;
+    uint g_idxB = ((MATRIX_SMALL_K / sizeof(int4)) * WG_TILE_N) * get_group_id(DIM_X) + l_tid;
+#else  // Row (matrixA) and Col (matrixB) major layout
+    uint g_idxA = WG_TILE_M * (MATRIX_K / sizeof(int4)) * get_group_id(DIM_Y) +
+                  (l_tid / 2) * (MATRIX_K / sizeof(int4)) + (l_tid % 2);
+    uint g_idxB = WG_TILE_N * (MATRIX_K / sizeof(int4)) * get_group_id(DIM_X) +
+                  (l_tid / 2) * (MATRIX_K / sizeof(int4)) + (l_tid % 2);
+#endif
+
+    // Initial SLM setup
+    {
+        uint g_idxATemp = g_idxA;
+        for (uint i = l_tid; i < (WG_TILE_M * MATRIX_SMALL_K / sizeof(int4)); i += WG_SIZE)
+        {
+            l_workGroupTileA_0_int4[i] = g_matrixA[g_idxATemp];
+#ifdef TILED_GLOBAL_LAYOUT
+            g_idxATemp += WG_SIZE;
+#else
+            g_idxATemp += (WG_SIZE / 2) * (MATRIX_K / sizeof(int4));
+#endif
+        }
+
+        uint g_idxBTemp = g_idxB;
+        for (uint i = l_tid; i < (WG_TILE_N * MATRIX_SMALL_K / sizeof(int4)); i += WG_SIZE)
+        {
+            l_workGroupTileB_0_int4[i] = g_matrixB[g_idxBTemp];
+#ifdef TILED_GLOBAL_LAYOUT
+            g_idxBTemp += WG_SIZE;
+#else
+            g_idxBTemp +=  (WG_SIZE / 2) * (MATRIX_K / sizeof(int4));
+#endif
+        }
+
+#ifdef TILED_GLOBAL_LAYOUT
+        g_idxA += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+#else
+        g_idxA += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB += MATRIX_SMALL_K / sizeof(int4);
+#endif
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+
+    int4 hdcReadValueA[(WG_TILE_M * MATRIX_SMALL_K / sizeof(int4)) / WG_SIZE < 1
+                           ? 1
+                           : (WG_TILE_M * MATRIX_SMALL_K / sizeof(int4)) / WG_SIZE];
+    int4 hdcReadValueB[(WG_TILE_N * MATRIX_SMALL_K / sizeof(int4)) / WG_SIZE < 1
+                           ? 1
+                           : (WG_TILE_N * MATRIX_SMALL_K / sizeof(int4)) / WG_SIZE];
+
+    __attribute__((opencl_unroll_hint(1)))
+    for (uint k = 0; k < (MATRIX_K / MATRIX_SMALL_K) - 1; k++)
+    {
+        /*
+         * SLM setup - HDC read only
+         */
+
+#if ((MATRIX_K / MATRIX_SMALL_K) > 1)
+        uint g_idxATemp = g_idxA;
+        for (uint i = l_tid, j = 0; i < (WG_TILE_M * MATRIX_SMALL_K / sizeof(int4)); i += WG_SIZE, ++j)
+        {
+            hdcReadValueA[j] = g_matrixA[g_idxATemp];
+#ifdef TILED_GLOBAL_LAYOUT
+            g_idxATemp += WG_SIZE;
+#else
+            g_idxATemp += (WG_SIZE / 2) * (MATRIX_K / sizeof(int4));
+#endif
+        }
+
+        uint g_idxBTemp = g_idxB;
+        for (uint i = l_tid, j = 0; i < (WG_TILE_N * MATRIX_SMALL_K / sizeof(int4)); i += WG_SIZE, ++j)
+        {
+            hdcReadValueB[j] = g_matrixB[g_idxBTemp];
+#ifdef TILED_GLOBAL_LAYOUT
+            g_idxBTemp += WG_SIZE;
+#else
+            g_idxBTemp += (WG_SIZE / 2) * (MATRIX_K / sizeof(int4));
+#endif
+        }
+
+#ifdef TILED_GLOBAL_LAYOUT
+        g_idxA += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+#else
+        g_idxA += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB += MATRIX_SMALL_K / sizeof(int4);
+#endif
+#endif
+
+        /*
+         * MMAD compute
+         */
+
+        FUNC_CALL(mmad_32x32_int8)(l_workGroupTileA_live_uint, l_offsetTileA, l_workGroupTileB_live,
+                                l_offsetTileB_col0, l_offsetTileB_col1, l_offsetTileB_col2,
+                                l_offsetTileB_col3, rowA, colB, regC);
+
+        /*
+         * SLM setup - SLM write only
+         */
+
+#if ((MATRIX_K / MATRIX_SMALL_K) > 1)
+        if (k % 2 == 0)
+        {
+            for (uint i = l_tid, j = 0; i < (WG_TILE_M * MATRIX_SMALL_K / sizeof(int4));
+                 i += WG_SIZE, ++j)
+            {
+                l_workGroupTileA_1_int4[i] = hdcReadValueA[j];
+            }
+
+            for (uint i = l_tid, j = 0; i < (WG_TILE_N * MATRIX_SMALL_K / sizeof(int4));
+                 i += WG_SIZE, ++j)
+            {
+                l_workGroupTileB_1_int4[i] = hdcReadValueB[j];
+            }
+
+            l_workGroupTileA_live = l_workGroupTileA_1;
+            l_workGroupTileB_live = l_workGroupTileB_1;
+            l_workGroupTileA_live_uint = l_workGroupTileA_uint_1;
+        }
+        else
+        {
+            for (uint i = l_tid, j = 0; i < (WG_TILE_M * MATRIX_SMALL_K / sizeof(int4));
+                 i += WG_SIZE, ++j)
+            {
+                l_workGroupTileA_0_int4[i] = hdcReadValueA[j];
+            }
+
+            for (uint i = l_tid, j = 0; i < (WG_TILE_N * MATRIX_SMALL_K / sizeof(int4));
+                 i += WG_SIZE, ++j)
+            {
+                l_workGroupTileB_0_int4[i] = hdcReadValueB[j];
+            }
+
+            l_workGroupTileA_live = l_workGroupTileA_0;
+            l_workGroupTileB_live = l_workGroupTileB_0;
+            l_workGroupTileA_live_uint = l_workGroupTileA_uint_0;
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+#endif
+    }
+
+    /*
+     * Last MMAD compute iteration (avoids branching in main loop)
+     */
+    FUNC_CALL(mmad_32x32_int8)(l_workGroupTileA_live_uint, l_offsetTileA, l_workGroupTileB_live,
+                            l_offsetTileB_col0, l_offsetTileB_col1, l_offsetTileB_col2,
+                            l_offsetTileB_col3, rowA, colB, regC);
+                            
+#ifdef OUTPUT_TILED_GLOBAL_LAYOUT
+    // Write out in swizzled manner after quantizing
+    __global uchar* g_outC_uchar = (__global uchar*)g_outC;
+    uint cOffset = sg_global_idX * (MATRIX_M * SG_TILE_N / sizeof(uchar)) +
+                   sg_global_idY * (SG_TILE_M * SG_TILE_N / sizeof(uchar));
+
+    uchar8 regC_uchar8[SIMD_LANE_M * SIMD_LANE_N / (sizeof(uchar8) / sizeof(uchar))];
+    uint offset_uc8 = 0;
+
+	const uint workgroup_id_x = get_group_id(0); 
+	uint feature_off = 32*(sub_group_id % (WG_TILE_N / 32)) + WG_TILE_N*workgroup_id_x; //=32*{0,1,2,3} + WG_TILE_N * workgroup_id_x 
+	uint feature = get_sub_group_local_id() + feature_off;
+
+    float4 quant_f = as_float4(intel_sub_group_block_read4((__global uint*) (quantizations + feature) ));
+    float4 bias_f = as_float4(intel_sub_group_block_read4((__global uint*) (biases + feature) ));
+    float4 calib_f = as_float4(intel_sub_group_block_read4((__global uint*) (calibrations + feature) ));
+
+#if MMAD_SUPPORTED == 1
+    __attribute__((opencl_unroll_hint( SG_TILE_M / (sizeof(int8) / sizeof(int)) )))
+#endif
+    for (uint i = 0; i < SG_TILE_M / (sizeof(int8) / sizeof(int)); i++)
+    {
+        // begin of account for output PADDING
+        uint padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(cOffset);
+        // end of account for padding
+
+        // B0 F0..31
+		regC_uchar8[offset_uc8].s0 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i].s0) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s1 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i].s0) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s2 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i].s0) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s3 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i].s0) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));	
+        // B1 F0..31		
+		regC_uchar8[offset_uc8].s4 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i].s1) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s5 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i].s1) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s6 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i].s1) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s7 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i].s1) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));	
+
+		FUNC_CALL(sub_group_block_write_uchar8)(&g_outC_uchar[padded_offset], regC_uchar8[offset_uc8]);
+        cOffset += sizeof(uchar8) * SG_SIZE;
+        padded_offset += sizeof(uchar8) * SG_SIZE;
+        offset_uc8++;
+
+        // B2 F0..31
+        regC_uchar8[offset_uc8].s0 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i].s2) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s1 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i].s2) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s2 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i].s2) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s3 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i].s2) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));	
+        // B3 F0..31		
+		regC_uchar8[offset_uc8].s4 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i].s3) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s5 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i].s3) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s6 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i].s3) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s7 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i].s3) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));	
+		
+		FUNC_CALL(sub_group_block_write_uchar8)(&g_outC_uchar[padded_offset], regC_uchar8[offset_uc8]);
+		cOffset += sizeof(uchar8) * SG_SIZE;
+        offset_uc8++;
+
+        // now we need to calculate again for other x
+        padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(cOffset);
+        //
+
+        regC_uchar8[offset_uc8].s0 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i].s4) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s1 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i].s4) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s2 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i].s4) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s3 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i].s4) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));	
+		
+		regC_uchar8[offset_uc8].s4 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i].s5) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s5 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i].s5) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s6 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i].s5) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s7 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i].s5) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));
+
+		FUNC_CALL(sub_group_block_write_uchar8)(&g_outC_uchar[padded_offset], regC_uchar8[offset_uc8]);
+        cOffset += sizeof(uchar8) * SG_SIZE;
+        padded_offset += sizeof(uchar8) * SG_SIZE;
+        offset_uc8++;
+
+        regC_uchar8[offset_uc8].s0 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i].s6) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s1 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i].s6) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s2 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i].s6) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s3 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i].s6) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));	
+		
+		regC_uchar8[offset_uc8].s4 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i].s7) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s5 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i].s7) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s6 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i].s7) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));
+		regC_uchar8[offset_uc8].s7 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i].s7) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));
+
+		FUNC_CALL(sub_group_block_write_uchar8)(&g_outC_uchar[padded_offset], regC_uchar8[offset_uc8]);
+        cOffset += sizeof(uchar8) * SG_SIZE;
+        offset_uc8++;
+    }
+#else
+    // Write final accumulated values
+    uint cOffset = sg_global_idX * ((MATRIX_M / 8) * SG_TILE_N) + sg_global_idY * (SG_TILE_M / 8) +
+                   sg_tid * (MATRIX_M / 8);
+    __attribute__((opencl_unroll_hint(SIMD_LANE_N)))
+    for (uint i = 0; i < (SIMD_LANE_N); ++i)
+    {
+        __attribute__((opencl_unroll_hint(SIMD_LANE_M / 8)))
+        for (uint j = 0; j < (SIMD_LANE_M / 8); ++j)
+        {
+            g_matrixC[cOffset + j] = regC[i*(SIMD_LANE_M / 8) + j];
+        }
+        cOffset += SG_SIZE * (MATRIX_M / 8);
+    }
+#endif
+
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_batched_block.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_batched_block.cl
new file mode 100644
index 000000000..0e6505973
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_batched_block.cl
@@ -0,0 +1,194 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/fetch.cl"
+#include "include/mmad.cl"
+
+#define SCALE 0.11f
+
+#ifdef LIGHTWEIGHT_QUANTIZATION
+    
+#define QUANTIZATION \
+    uchar4 out;\
+    out[0] = convert_uchar_sat((float)dotProd[o + OUT_BLOCK_WIDTH * 0][b] * SCALE + bias_f.s0);\
+    out[1] = convert_uchar_sat((float)dotProd[o + OUT_BLOCK_WIDTH * 1][b] * SCALE + bias_f.s1);\
+    out[2] = convert_uchar_sat((float)dotProd[o + OUT_BLOCK_WIDTH * 2][b] * SCALE + bias_f.s2);\
+    out[3] = convert_uchar_sat((float)dotProd[o + OUT_BLOCK_WIDTH * 3][b] * SCALE + bias_f.s3);
+
+#elif NO_QUANTIZATION
+
+#define QUANTIZATION \
+    uchar4 out;\
+    out[0] = convert_uchar_sat(dotProd[o + OUT_BLOCK_WIDTH * 0][b]);\
+    out[1] = convert_uchar_sat(dotProd[o + OUT_BLOCK_WIDTH * 1][b]);\
+    out[2] = convert_uchar_sat(dotProd[o + OUT_BLOCK_WIDTH * 2][b]);\
+    out[3] = convert_uchar_sat(dotProd[o + OUT_BLOCK_WIDTH * 3][b]);
+
+#else
+
+#define QUANTIZATION \
+    char4 out;\
+    out[0] = ACTIVATION(convert_char(round(((float)dotProd[o + OUT_BLOCK_WIDTH * 0][b] * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N);\
+    out[1] = ACTIVATION(convert_char(round(((float)dotProd[o + OUT_BLOCK_WIDTH * 1][b] * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N);\
+    out[2] = ACTIVATION(convert_char(round(((float)dotProd[o + OUT_BLOCK_WIDTH * 2][b] * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N);\
+    out[3] = ACTIVATION(convert_char(round(((float)dotProd[o + OUT_BLOCK_WIDTH * 3][b] * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N);
+
+#endif
+
+#define FILTER_IFM_MMAD_NUM ((FILTER_IFM_NUM + 31) / 32)
+#define FILTER_OFM_MMAD_NUM ((FILTER_OFM_NUM + 7) / 8)
+#define FILTER_IFM_ALIGNED (FILTER_IFM_MMAD_NUM * 32)
+#define FILTER_OFM_ALIGNED (FILTER_OFM_MMAD_NUM * 8)
+// input data is in blocks 4batch x 32 features
+
+#define NEEDED_INPUT_X ((OUT_BLOCK_WIDTH-1) * (STRIDE_SIZE_X) + (FILTER_SIZE_X - 1) + 1)
+
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+KERNEL(convolution_mmad_batched_block)(
+    __global INPUT0_TYPE* input, 
+    __global OUTPUT_TYPE* output, 
+    __global FILTER_TYPE* weights, 
+    __global BIAS_TYPE* biases,
+    const __global float* quantizations,
+#if CALIBRATION_TERM
+    const __global float* calibrations,
+#endif
+    uint split_idx)
+{
+    const uint x = get_global_id(0) * OUT_BLOCK_WIDTH;
+    const uint y = get_global_id(1) * OUT_BLOCK_HEIGHT;
+
+#if WEIGHTS_PER_WORKITEM == 4
+    const uint f = (get_group_id(2) * 32 + get_sub_group_local_id() * 4) % FILTER_OFM_ALIGNED;
+#else
+    const uint f = ((get_group_id(2) * WEIGHTS_PER_WORKITEM * 8) + get_sub_group_local_id() ) % FILTER_OFM_ALIGNED;
+#endif
+    const uint b_block = (get_group_id(2) * 8 * WEIGHTS_PER_WORKITEM) / FILTER_OFM_ALIGNED;
+
+    // all accumulators
+    int4 dotProd[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * WEIGHTS_PER_WORKITEM] = { 0 };
+
+    const int input_x = x * STRIDE_SIZE_X - PADDING_SIZE_X;
+    const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+
+    const uint filter_offset = ((get_group_id(2) * WEIGHTS_PER_WORKITEM) % FILTER_OFM_MMAD_NUM) * FILTER_OFM_BLOCK_PITCH;
+    const uint input_offset = IN_OFFSET + IN_B_BLOCK_PITCH * b_block;
+
+    uint filter_idx = filter_offset;
+    __attribute__((opencl_unroll_hint(1)))
+    for (uint k = 0; k < FILTER_IFM_MMAD_NUM; ++k)
+    {
+        __attribute__((opencl_unroll_hint(FILTER_SIZE_Y)))
+        for (uint j = 0; j < FILTER_SIZE_Y; ++j)
+        {
+            
+            ////// preloading input data //////
+            int4 preloaded_input[NEEDED_INPUT_X];
+            for(int p = 0; p < NEEDED_INPUT_X; p++)
+            {
+                const int input_offset_y = input_y + j;
+                const int input_offset_x = input_x + p;
+
+                uint input_idx = input_offset + input_offset_y * IN_Y_PITCH + input_offset_x * IN_X_PITCH + k * IN_F_BLOCK_PITCH;
+                preloaded_input[p] = as_int4(intel_sub_group_block_read4((const __global uint*)(input + input_idx)));
+            }
+
+            __attribute__((opencl_unroll_hint(WEIGHTS_PER_WORKITEM)))
+            for(uint wi = 0; wi < WEIGHTS_PER_WORKITEM; wi++)
+            {
+                ////// preloading weights data //////
+                int8 preloaded_weights[FILTER_SIZE_X];
+                uint tmp_filter_idx = filter_idx;
+                __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+                for(uint w = 0; w < FILTER_SIZE_X; w++)
+                {
+                    preloaded_weights[w] = as_int8(intel_sub_group_block_read8((const __global uint*) (weights + tmp_filter_idx + (wi * FILTER_OFM_BLOCK_PITCH))));
+                    tmp_filter_idx += FILTER_X_PITCH;
+                }
+                ////// computing //////
+                __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+                for (uint i = 0; i < FILTER_SIZE_X; ++i)
+                {
+                    __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+                    for(uint ox = 0; ox < OUT_BLOCK_WIDTH; ox++)
+                    {
+                        const uint out_idx = ox + wi * OUT_BLOCK_WIDTH;
+                        const uint in_idx = ox * STRIDE_SIZE_X + i;
+                        dotProd[out_idx] = MMAD_4x8(preloaded_input[in_idx], preloaded_weights[i], dotProd[out_idx]);
+                    }
+                }
+            }
+            filter_idx += FILTER_X_PITCH * FILTER_SIZE_X;
+        }
+    }
+
+////// QUANTIZE & OUTPUT //////
+
+#if WEIGHTS_PER_WORKITEM == 4
+
+float4 quant_f = vload4(0, quantizations + f);
+float4 bias_f = vload4(0, biases + f);
+float4 calib_f = vload4(0, calibrations + f);
+
+__attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+for(uint o = 0; o < OUT_BLOCK_WIDTH; o++)
+{
+    const uint dst_index = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b_block*4, f, y, x + o);
+    uint4 to_output;
+    __attribute__((opencl_unroll_hint(4)))
+    for(uint b = 0; b < 4; b++)
+    {
+        QUANTIZATION;
+        to_output[b] = as_uint(out);
+    }
+    intel_sub_group_block_write4((__global uint*)(output + dst_index), to_output);
+}
+#else
+__attribute__((opencl_unroll_hint(WEIGHTS_PER_WORKITEM)))
+for(uint w = 0; w < WEIGHTS_PER_WORKITEM; w++)
+{
+    float quant_f = quantizations[f + w * 8];
+    float bias_f = biases[f + w * 8];
+#if CALIBRATION_TERM
+    float calib_f = calibrations[f + w * 8];
+#endif
+    __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+    for(uint o = 0; o < OUT_BLOCK_WIDTH; o++)
+    {
+        const uint out_idx = o + OUT_BLOCK_WIDTH * w;
+        __attribute__((opencl_unroll_hint(4)))
+        for(uint b = 0; b < 4; b++)
+        {
+        #if CALIBRATION_TERM
+            dotProd[out_idx][b] = (UNIT_TYPE)round(((float)dotProd[out_idx][b] * quant_f * I_QF + bias_f) * calib_f);
+        #else  // CALIBRATION_TERM
+            dotProd[out_idx][b] = (UNIT_TYPE)round(((float)dotProd[out_idx][b] * quant_f * I_QF + bias_f) * O_QF);
+        #endif // CALIBRATION_TERM
+
+            const uint dst_index = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b_block*4 + b, f + w * 8, y, x + o);
+            output[dst_index] = ACTIVATION(convert_char(dotProd[out_idx][b]), NL_M, NL_N);
+        }
+    }
+}
+#endif
+
+}
+
+#undef FILTER_IFM_MMAD_NUM
+#undef FILTER_OFM_MMAD_NUM
+#undef FILTER_IFM_ALIGNED
+#undef FILTER_OFM_ALIGNED
+
+#undef SCALE
+#undef QUANTIZATION
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_batched_block_1x1.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_batched_block_1x1.cl
new file mode 100644
index 000000000..bc58c70ea
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_batched_block_1x1.cl
@@ -0,0 +1,241 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/fetch.cl"
+#include "include/mmad.cl"
+
+#define SCALE 0.11f
+
+#ifdef LIGHTWEIGHT_QUANTIZATION
+
+#define QUANTIZATION \
+    uchar4 out;\
+    out[0] = convert_uchar_sat((float)dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 0][b] * SCALE + bias_f.s0);\
+    out[1] = convert_uchar_sat((float)dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 1][b] * SCALE + bias_f.s1);\
+    out[2] = convert_uchar_sat((float)dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 2][b] * SCALE + bias_f.s2);\
+    out[3] = convert_uchar_sat((float)dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 0][b] * SCALE + bias_f.s3);
+
+#elif NO_QUANTIZATION
+
+#define QUANTIZATION \
+    uchar4 out;\
+    out[0] = convert_uchar_sat(dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 0][b]);\
+    out[1] = convert_uchar_sat(dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 1][b]);\
+    out[2] = convert_uchar_sat(dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 2][b]);\
+    out[3] = convert_uchar_sat(dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 3][b]);
+
+#else
+
+#define QUANTIZATION \
+    char4 out;\
+    out[0] = ACTIVATION(convert_char(round( ((float)dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 0][b]  * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0 ) ), NL_M, NL_N);\
+    out[1] = ACTIVATION(convert_char(round( ((float)dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 1][b]  * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1 ) ), NL_M, NL_N);\
+    out[2] = ACTIVATION(convert_char(round( ((float)dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 2][b]  * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2 ) ), NL_M, NL_N);\
+    out[3] = ACTIVATION(convert_char(round( ((float)dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * 3][b]  * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3 ) ), NL_M, NL_N);
+
+#endif
+
+#define FILTER_IFM_MMAD_NUM ((FILTER_IFM_NUM + 31) / 32)
+#define FILTER_OFM_MMAD_NUM ((FILTER_OFM_NUM + 7) / 8)
+#define FILTER_IFM_ALIGNED (FILTER_IFM_MMAD_NUM * 32)
+#define FILTER_OFM_ALIGNED (FILTER_OFM_MMAD_NUM * 8)
+// input data is in blocks 4batch x 32 features
+
+#define NEEDED_INPUT_X ((OUT_BLOCK_WIDTH-1) * (STRIDE_SIZE_X) + (FILTER_SIZE_X - 1) + 1)
+#define NEEDED_INPUT_Y ((OUT_BLOCK_HEIGHT-1) * (STRIDE_SIZE_Y) + (FILTER_SIZE_Y - 1) + 1)
+
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+KERNEL(convolution_mmad_batched_block_1x1)(
+    __global INPUT0_TYPE* input,
+    __global OUTPUT_TYPE* output,
+    __global FILTER_TYPE* weights,
+    __global BIAS_TYPE* biases,
+    const __global float* quantizations,
+#if CALIBRATION_TERM
+    const __global float* calibrations,
+#endif
+    uint split_idx)
+{
+    const uint x = get_global_id(0) * OUT_BLOCK_WIDTH;
+    const uint y = get_global_id(1) * OUT_BLOCK_HEIGHT;
+
+#if WEIGHTS_PER_WORKITEM == 4
+    const uint f = (get_group_id(2) * 32 + get_sub_group_local_id() * 4) % FILTER_OFM_ALIGNED;
+#else
+    const uint f = ((get_group_id(2) * WEIGHTS_PER_WORKITEM * 8) + get_sub_group_local_id() ) % FILTER_OFM_ALIGNED;
+#endif
+    const uint b_block = (get_group_id(2) * 8 * WEIGHTS_PER_WORKITEM) / FILTER_OFM_ALIGNED;
+
+    int4 dotProd[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * WEIGHTS_PER_WORKITEM] = { 0 };
+
+    const int input_x = x * STRIDE_SIZE_X - PADDING_SIZE_X;
+    const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+
+    const uint filter_offset = ((get_group_id(2) * WEIGHTS_PER_WORKITEM) % FILTER_OFM_MMAD_NUM) * FILTER_OFM_BLOCK_PITCH;
+    const uint input_offset = IN_OFFSET + IN_B_BLOCK_PITCH * b_block;
+
+    uint filter_idx = filter_offset;
+    for (uint k = 0; k < FILTER_IFM_MMAD_NUM; ++k)
+    {
+        ////// preloading input data //////
+        int4 preloaded_input[NEEDED_INPUT_X * NEEDED_INPUT_Y];
+        for(int h = 0; h < NEEDED_INPUT_Y; h++)
+        {
+            for(int p = 0; p < NEEDED_INPUT_X; p++)
+            {
+                const int input_offset_y = input_y + h;
+                const int input_offset_x = input_x + p;
+
+                uint input_idx = input_offset + input_offset_y * IN_Y_PITCH + input_offset_x * IN_X_PITCH + k * IN_F_BLOCK_PITCH;
+                preloaded_input[p + h * NEEDED_INPUT_X] = as_int4(intel_sub_group_block_read4((const __global uint*)(input + input_idx)));
+            }
+        }
+
+        __attribute__((opencl_unroll_hint(FILTER_SIZE_Y)))
+        for (uint j = 0; j < FILTER_SIZE_Y; ++j)
+        {
+            __attribute__((opencl_unroll_hint(FILTER_SIZE_X)))
+            for (uint i = 0; i < FILTER_SIZE_X; ++i)
+            {
+                ////// preloading weights data //////
+                int8 preloaded_weights[WEIGHTS_PER_WORKITEM];
+                __attribute__((opencl_unroll_hint(WEIGHTS_PER_WORKITEM)))
+                for(uint w = 0; w < WEIGHTS_PER_WORKITEM; w++)
+                {
+                    preloaded_weights[w] = as_int8(intel_sub_group_block_read8((const __global uint*) (weights + (filter_idx + w * FILTER_OFM_BLOCK_PITCH) ) ));
+                }
+
+                ////// computing //////
+                __attribute__((opencl_unroll_hint(WEIGHTS_PER_WORKITEM)))
+                for(uint w = 0; w < WEIGHTS_PER_WORKITEM; w++)
+                {
+                    __attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
+                    for(uint oy = 0; oy < OUT_BLOCK_HEIGHT; oy++)
+                    {
+                        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+                        for(uint ox = 0; ox < OUT_BLOCK_WIDTH; ox++)
+                        {
+                            const uint out_idx = ox + OUT_BLOCK_WIDTH * (oy + w * OUT_BLOCK_HEIGHT);
+                            const uint preloaded_idx =ox * STRIDE_SIZE_X + i + NEEDED_INPUT_X * (oy * STRIDE_SIZE_Y + j);
+                            dotProd[out_idx] = MMAD_4x8(preloaded_input[preloaded_idx], preloaded_weights[w], dotProd[out_idx]);
+                        }
+                    }
+                }
+                filter_idx += FILTER_X_PITCH;
+            }
+        }
+    }
+
+
+#if WEIGHTS_PER_WORKITEM == 4
+
+float4 quant_f = vload4(0, quantizations + f);
+float4 bias_f = vload4(0, biases + f);
+float4 calib_f = vload4(0, calibrations + f);
+__attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
+for(uint h = 0; h < OUT_BLOCK_HEIGHT; h++)
+{
+    __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+    for(uint o = 0; o < OUT_BLOCK_WIDTH; o++)
+    {
+        const uint dst_index = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b_block*4, f, y + h, x + o);
+
+        uint4 to_output;
+        __attribute__((opencl_unroll_hint(4)))
+        for(uint b = 0; b < 4; b++)
+        {
+            const uint out_idx = o + OUT_BLOCK_WIDTH * h;
+
+            QUANTIZATION;
+            to_output[b] = as_uint(out);
+        }
+        intel_sub_group_block_write4((__global uint*)(output + dst_index), to_output);
+    }
+}
+
+#else // WEIGHTS_PER_WORKITEM ==4
+
+////// QUANTIZE & OUTPUT //////
+__attribute__((opencl_unroll_hint(WEIGHTS_PER_WORKITEM)))
+for(uint w = 0; w < WEIGHTS_PER_WORKITEM; w++)
+{
+    float quant_f = quantizations[f + w * 8];
+    float bias_f = biases[f + w * 8];
+#if CALIBRATION_TERM
+    float calib_f = calibrations[f + w * 8];
+#endif
+    __attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
+    for(uint h = 0; h < OUT_BLOCK_HEIGHT; h++)
+    {
+        __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+        for(uint o = 0; o < OUT_BLOCK_WIDTH; o++)
+        {
+            const uint out_idx = o + OUT_BLOCK_WIDTH * (h + w * OUT_BLOCK_HEIGHT);
+            for(uint b = 0; b < 4; b++)
+            {
+            #if CALIBRATION_TERM
+                dotProd[out_idx][b] = (UNIT_TYPE)round(((float)dotProd[out_idx][b] * quant_f * I_QF + bias_f) * calib_f);
+            #else  // CALIBRATION_TERM
+                dotProd[out_idx][b] = (UNIT_TYPE)round(((float)dotProd[out_idx][b] * quant_f * I_QF + bias_f) * O_QF);
+            #endif // CALIBRATION_TERM
+            }
+        }
+    }
+}
+
+////// OUTPUT STAGE //////
+__attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
+for(uint h = 0; h < OUT_BLOCK_HEIGHT; h++)
+{
+    __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+    for(uint o = 0; o < OUT_BLOCK_WIDTH; o++)
+    {
+        const uint dst_index = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b_block*4, f, y + h, x + o);
+        
+        __attribute__((opencl_unroll_hint(4)))
+        for(uint b = 0; b < 4; b++)
+        {
+            #if WEIGHTS_PER_WORKITEM == 2
+                char2 out;
+                const uint out_idx = o + OUT_BLOCK_WIDTH * h;
+                out[0] = ACTIVATION(convert_char(dotProd[out_idx][b]), NL_M, NL_N);
+                out[1] = ACTIVATION(convert_char(dotProd[out_idx + OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT][b]), NL_M, NL_N);
+
+                intel_sub_group_block_write_uc2((__global uchar*)(output + dst_index + b * 32), as_uchar2(out));
+            #else
+            __attribute__((opencl_unroll_hint(WEIGHTS_PER_WORKITEM)))
+            for(uint w = 0; w < WEIGHTS_PER_WORKITEM; w++)
+            {
+                const uint out_idx = o + OUT_BLOCK_WIDTH * (h + w * OUT_BLOCK_HEIGHT);
+                const uint dst_index = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b_block*4, f + w * 8, y + h, x + o);
+                char char_val = ACTIVATION(convert_char(dotProd[out_idx][b]), NL_M, NL_N);
+                output[dst_index + b * 32] = char_val;
+            }
+            #endif
+        }
+    }
+}
+
+#endif // WEIGHTS_PER_WORKITEM ==4
+
+}
+
+#undef FILTER_IFM_MMAD_NUM
+#undef FILTER_OFM_MMAD_NUM
+#undef FILTER_IFM_ALIGNED
+#undef FILTER_OFM_ALIGNED
+
+
+#undef SCALE
+#undef QUANTIZATION
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_slm_2x14_rep4.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_slm_2x14_rep4.cl
new file mode 100644
index 000000000..f9e04cf77
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_slm_2x14_rep4.cl
@@ -0,0 +1,948 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/data_types.cl"
+#include "include/mmad.cl"
+
+#define SCALE 0.11f
+
+#ifdef LIGHTWEIGHT_QUANTIZATION
+
+#define QUANTIZATION \
+    slm_write0.s0 = convert_uchar_sat((float)outvec.s0 * SCALE + bias_f);\
+    slm_write0.s1 = convert_uchar_sat((float)outvec.s1 * SCALE + bias_f);\
+    slm_write0.s2 = convert_uchar_sat((float)outvec.s2 * SCALE + bias_f);\
+    slm_write0.s3 = convert_uchar_sat((float)outvec.s3 * SCALE + bias_f);
+
+#elif NO_QUANTIZATION
+
+#define QUANTIZATION(idx) \
+    slm_write0.s0 = convert_uchar_sat(outvec.s0);\
+    slm_write0.s1 = convert_uchar_sat(outvec.s1);\
+    slm_write0.s2 = convert_uchar_sat(outvec.s2);\
+    slm_write0.s3 = convert_uchar_sat(outvec.s3);
+
+#else
+
+#define QUANTIZATION \
+    slm_write0.s0 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec.s0) * quant_f * I_QF + bias_f) * calib_f)), NL_M, NL_N));\
+    slm_write0.s1 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec.s1) * quant_f * I_QF + bias_f) * calib_f)), NL_M, NL_N));\
+    slm_write0.s2 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec.s2) * quant_f * I_QF + bias_f) * calib_f)), NL_M, NL_N));\
+    slm_write0.s3 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec.s3) * quant_f * I_QF + bias_f) * calib_f)), NL_M, NL_N));
+
+#endif
+
+// mapping to clDNN
+#define _MMAD_4x8(C, A, B) MMAD_4x8(A, B, C)
+#define _OD OUTPUT_FEATURE_NUM
+#define _OW OUTPUT_SIZE_X
+#define _OH OUTPUT_SIZE_Y
+#define OWPAD (OUTPUT_PAD_BEFORE_SIZE_X + OUTPUT_PAD_AFTER_SIZE_X)
+#define OHPAD (OUTPUT_PAD_BEFORE_SIZE_Y + OUTPUT_PAD_AFTER_SIZE_Y)
+#define _IH INPUT0_SIZE_Y
+#define _IW INPUT0_SIZE_X
+#define _ID INPUT0_FEATURE_NUM
+#define K_HEIGHT FILTER_SIZE_Y
+#define K_WIDTH FILTER_SIZE_X
+#define BATCH_SIZE OUTPUT_BATCH_NUM
+
+#define IHPAD (INPUT0_PAD_BEFORE_SIZE_Y + INPUT0_PAD_AFTER_SIZE_Y)
+#define IWPAD (INPUT0_PAD_BEFORE_SIZE_X + INPUT0_PAD_AFTER_SIZE_X)
+#define K_STRIDE STRIDE_SIZE_X
+// end of mapping
+
+// for now kernel stride is square
+#define K_WSTRIDE K_STRIDE
+#define K_HSTRIDE K_STRIDE
+
+#define PACK 32
+#define BATCH_PACK 4
+
+__attribute__((intel_reqd_sub_group_size(8)))
+KERNEL(convolution_mmad_slm_2x14_rep4)(
+__global int8 *inputs,
+__global uchar* outputs,
+__global int8* weights,
+#if BIAS_TERM
+    __global BIAS_TYPE* biases,
+#endif
+#if QUANTIZATION_TERM
+    const __global float* quantizations,
+#endif
+#if CALIBRATION_TERM
+    const __global float* calibrations,
+#endif
+    uint split_idx
+)
+{
+	const uint TILE_H = OUT_BLOCK_HEIGHT*LOCAL_SIZE_Z;
+	const uint TILE_W = OUT_BLOCK_WIDTH*LOCAL_SIZE_Y; 
+
+	ushort fmg     = get_group_id(0);   // Output Depth 
+	ushort group_y = get_group_id(1);   // Output Width
+	ushort group_z = get_group_id(2);   // Output Height
+
+	/* 32,1,4 WG , SIMD8 - 16 HW threads in a WG
+	threads 0-3   (group1) : (lid_x:0-15,lid_y:0,lid_z:0)	
+	threads 4-7   (group2) : (lid_x:0-15,lid_y:0,lid_z:1)
+	threads 8-11  (group3) : (lid_x:0-15,lid_y:0,lid_z:2)
+	threads 12-15  (group4) : (lid_x:0-15,lid_y:0,lid_z:3)
+	
+    Verify sub_group_layout through below printfs 
+	
+	if(group_z == 0 && group_y == 0 && fmg == 0 && get_sub_group_id() == 31) { 
+			printf("\n sub_group_local_id: %d, lid_x: %d, lid_y: %d, lid_z: %d ", get_sub_group_local_id(), get_local_id(0) ,get_local_id(1),get_local_id(2));	
+			printf("\n #WorkgroupsX: %d, #WorkgroupsY: %d, #WorkgroupsZ: %d",get_num_groups(0),get_num_groups(1),get_num_groups(2)); 	
+	}
+	
+	If sub_group_layout is different then derive lid_x, lid_z
+	
+	lid_z: thread_id/4
+	*/
+	
+	/* Thread, local IDs */
+	ushort thread_id 		= get_sub_group_id();
+	ushort threadid_group_4 = thread_id % 4;
+	ushort threadid_mod_2   = thread_id%2;
+	ushort threadid_mod_8   = thread_id % 8;
+
+	ushort lid_x    = get_local_id(0);
+	ushort lid_z    = get_local_id(2);
+
+	uchar  lane_id  = get_sub_group_local_id();
+
+	/* 32-bit signed accumulator for 4 mini-batches , for a thread OUT_BLOCK_WIDTH*HEIGHT*4 registers are used
+	   Will be converted to 8-bits before final write														*/
+	 
+	int4 out[ OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH ] = { 0 } ;
+	
+	/* Account for batching */
+
+	ushort batch = ( fmg*LOCAL_SIZE_X ) /_OD;
+
+	// Size calculated for int8 elements , One Batch processing is [H][W][4N][32C]
+	uint input_size = (_IH + IHPAD) * (_IW + IWPAD) * BATCH_PACK ;  
+	
+	uint in_addr_offset = batch*input_size;
+	
+	/* Goto activation tile for work group, offset is w.r.t int8 array */
+	
+	uint groupy_tile = TILE_W*group_y;
+	uint groupz_tile = TILE_H*group_z;
+	
+     in_addr_offset += (groupz_tile * K_STRIDE) * (_IW + IWPAD) * BATCH_PACK + (groupy_tile * K_STRIDE) * BATCH_PACK;
+	 
+	 	/* SLM space for Activation, Weights
+	       ( 32,1,4 ) Workgroup - 4 tiles along Y direction and 32 different output channels
+	        Activation - 10Wx16Wx4Nx32C Weights -9RSx32Kx32C	*/
+	
+	__local int8 act_slm      [  10*16*4 ];
+	__local int8 weight_slm   [  9*32  ];
+   
+   /* 10Hx16Wx4Nx32C activation tile written into SLM.  Distribute among 16 threads in Workgroup
+	   threads 0-1 write 16x4x32 of H=0, W=0...15 ( 8x4x32 per thread )
+	   threads 2-3 write 16x4x32 of H=1, W=0...15 ( 8x4x32 per thread )
+	   threads 4-5 write 16x4x32 of H=2, W=0...15 ( 8x4x32 per thread )
+	   threads 6-7 write 16x4x32 of H=3, W=0...15 ( 8x4x32 per thread )
+	   threads 8-9 write 16x4x32 of H=4, W=0...15 ( 8x4x32 per thread )
+	   threads 10-11 write 16x4x32 of H=5, W=0...15 ( 8x4x32 per thread )
+	   threads 12 write 16x4x32 of H=6, W=0...15 ( 16x4x32 per thread )
+	   thread 13 writes 16x4x32 of H=7
+	   thread 14 writes 16x4x32 of H=8
+	   thread 15 writes 16x4x32 of H=9
+
+	   Interleaved write to avoid SLM BC
+	   
+	   threads0,1 write 16x4x32 together
+	   thread0 writes first 4x32 block, thread1 writes next 4x32 block etc.
+   */
+
+        
+	/* Goto activation tile for thread in group */
+	
+	uint row_offset   =  thread_id / 2;
+	
+	if ( thread_id >= 12 ) {
+		row_offset = 6 + thread_id - 12 - threadid_mod_2;
+	}
+	
+	// In addr offset for the particular thread
+	in_addr_offset    += row_offset * K_STRIDE * (_IW + IWPAD ) * BATCH_PACK ;
+
+   /* Activation SLM indices */
+    uint act_slm_write =  row_offset * ( TILE_W + 2) * BATCH_PACK;
+	uint act_slm_read  =  OUT_BLOCK_HEIGHT * lid_z * ( TILE_W + 2) * BATCH_PACK ;
+
+	/* Weights 
+	   Weight Global Tensor Order: [K/8][C/32][R][S][8C][8K][4C]
+	*/
+	
+	/* 9RSx32Kx32C Weight Block in SLM
+	   thread0 handles ( reads from global ) w(0,0),w(0,1),w(0,2) of K=0 ( k=0..7)
+	   thread1 handles w(0,0),w(0,1),w(0,2) of K=1 ( k=8..15)
+	   thread2 handles w(1,0),w(1,1) of K=0 ( k=0..7)
+	   thread3 handles w(1,0),w(1,1) of K=1 ( k=8..15)
+	   thread4 handles w(1,2),w(2,0) of K=0 ( k=0..7)
+	   thread5 handles w(1,2),w(2,0) of K=1 ( k=8..15)
+	   thread6 handles w(2,1),w(2,2) of K=0 ( k=0..7)
+	   thread7 handles w(2,1),w(2,2) of K=1 ( k=8..15)
+	   
+	   Similarly threads8-15 handles for K=2,3
+	   
+	   Weight Layout in SLM
+	   
+	   w(R=0,S=0,k=0..7,C=0..15),w(R=0,S=0,k=8..15,C=0..15)
+	   w(R=0,S=0,k=0..7,C=16..31),w(R=0,S=0,k=8..15,C=16..31)
+	   
+	   Above interleaving present to avoid SLM Bank conflicts when fused threads read from SLM
+	   Thread0 will read k=0..7, thread1 will read k=8..15
+	   
+	   First all output channels are present in SLM, then next weight pixel is present in SLM */
+	  
+	 #define NUM_FILTERS (K_HEIGHT * K_WIDTH)
+	  
+	 uint output_depth = fmg % ( _OD / LOCAL_SIZE_X ); 
+	 	  
+	 uint weight_size_CRS =  ( _ID / PACK ) * NUM_FILTERS * 8; //8 output channels packed inside
+	 
+	 // Global weight addr for workgroup
+	 uint weight_global_addr_offset =  output_depth * 4 * weight_size_CRS ; //32 output channels per workgroup
+	 
+	 // Global weight address for thread 
+	 uint weight_global_channel_offset = threadid_mod_2 * weight_size_CRS ;
+	 
+	uint slm_channel_offset = 0;
+	 
+    if ( thread_id >= 8 ) {
+		weight_global_channel_offset +=  2*weight_size_CRS;
+		slm_channel_offset = 1;	
+    }
+	 
+	 uint weight_global_pixel_offset = 0;
+	 uint slm_pixel_offset = 0;
+	 
+    if ( threadid_mod_8 >=2  )
+    {
+		weight_global_pixel_offset = 3*8 +  ( ( (threadid_mod_8/2) - 1 )*2*8 );
+		slm_pixel_offset 		   = 3*LOCAL_SIZE_X + ( ( (threadid_mod_8/2) - 1 )*2*LOCAL_SIZE_X );
+    }
+	 
+	 weight_global_addr_offset += weight_global_channel_offset + weight_global_pixel_offset;
+	 
+	 /* Weight slm write index */
+	 
+	 uint slm_write_weight = threadid_mod_2*4  + slm_pixel_offset + slm_channel_offset * 16;
+	 
+	 /* Weight slm read index */
+	 
+	 uint wt_slm_rd_offset = threadid_group_4*8;
+ 
+    if ( threadid_mod_2 )
+    {
+		wt_slm_rd_offset = wt_slm_rd_offset - 8 + 4;
+    }
+	
+	int kd;
+	
+	__attribute__((opencl_unroll_hint(1)))
+	for(kd = 0; kd <  ( _ID / PACK ) ; kd++) 
+	{
+
+	{
+			/* Load Activation from global to SLM */
+				
+			int in_addr = kd * (_IH + IHPAD) * (_IW + IWPAD) * BATCH_SIZE + in_addr_offset;
+
+			__global uint *activation_tile = (__global uint*)&inputs[ in_addr ];
+			
+			__local uint *act_slm_ptr   = (__local uint *) &act_slm [ act_slm_write  ];
+			
+			/* The odd thread in fused pair will start from next 4x8 block */
+			
+			activation_tile += threadid_mod_2*4*8;
+			act_slm_ptr 	+= threadid_mod_2*4*8;
+					
+			int4 act_col_0 =  as_int4( intel_sub_group_block_read4(activation_tile) );	
+			int4 act_col_1 =  as_int4( intel_sub_group_block_read4(activation_tile + 8*8) );				
+			int4 act_col_2 =  as_int4( intel_sub_group_block_read4(activation_tile + 2*8*8) );				
+			int4 act_col_3 =  as_int4( intel_sub_group_block_read4(activation_tile + 3*8*8) );
+			int4 act_col_4 =  as_int4( intel_sub_group_block_read4(activation_tile + 4*8*8) );			
+			int4 act_col_5 =  as_int4( intel_sub_group_block_read4(activation_tile + 5*8*8) );								
+			int4 act_col_6 =  as_int4( intel_sub_group_block_read4(activation_tile + 6*8*8) );				
+			int4 act_col_7 =  as_int4( intel_sub_group_block_read4(activation_tile + 7*8*8) );				
+
+			SLM_BLOCK_WRITE_4 ( act_slm_ptr , as_uint4 ( act_col_0 ) );				
+			SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 8*8 ) , as_uint4 ( act_col_1 ) );
+			SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 2*8*8 ) , as_uint4 ( act_col_2 ) );
+			SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 3*8*8 ) , as_uint4 ( act_col_3 ) );
+			SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 4*8*8 ) , as_uint4 ( act_col_4 ) );
+			SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 5*8*8 ) , as_uint4 ( act_col_5 ) );
+			SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 6*8*8 ) , as_uint4 ( act_col_6 ) );
+			SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 7*8*8 ) , as_uint4 ( act_col_7 ) );
+
+			if ( thread_id >=12 )
+            {
+				activation_tile = activation_tile + 1 * (_IW + IWPAD ) * BATCH_PACK * 8;
+				act_slm_ptr 	+= 8*8*8;		
+	
+				int4 act_col_9 =  as_int4( intel_sub_group_block_read4(activation_tile) );				
+				int4 act_col_10 =  as_int4( intel_sub_group_block_read4(activation_tile + 8*8) );				
+				int4 act_col_11 =  as_int4( intel_sub_group_block_read4(activation_tile + 2*8*8) );				
+				int4 act_col_12 =  as_int4( intel_sub_group_block_read4(activation_tile + 3*8*8) );
+				int4 act_col_13 =  as_int4( intel_sub_group_block_read4(activation_tile + 4*8*8) );			
+				int4 act_col_14 =  as_int4( intel_sub_group_block_read4(activation_tile + 5*8*8) );								
+				int4 act_col_15 =  as_int4( intel_sub_group_block_read4(activation_tile + 6*8*8) );				
+				int4 act_col_16 =  as_int4( intel_sub_group_block_read4(activation_tile + 7*8*8) );				
+				
+				SLM_BLOCK_WRITE_4 ( act_slm_ptr  , as_uint4 ( act_col_9 ) );				
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 8*8 )   , as_uint4 ( act_col_10 ) );
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 2*8*8 ) , as_uint4 ( act_col_11 ) );
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 3*8*8 ) , as_uint4 ( act_col_12 ) );
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 4*8*8 ) , as_uint4 ( act_col_13 ) );
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 5*8*8 ) , as_uint4 ( act_col_14 ) );
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 6*8*8 ) , as_uint4 ( act_col_15 ) );
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 7*8*8 ) , as_uint4 ( act_col_16 ) );
+			}
+
+		/* load weights from global to weight_slm */
+		
+			int weight_addr = kd * NUM_FILTERS * 8 + weight_global_addr_offset;
+
+			__global uint *weight_tile   = (__global uint*)&weights    [ weight_addr ];
+			__local  uint *wt_slm_ptr    = (__local uint *) &weight_slm [ slm_write_weight  ];
+			
+			int4 w0 = as_int4 ( intel_sub_group_block_read4( weight_tile ) );						
+			int4 w1 = as_int4 ( intel_sub_group_block_read4( weight_tile + 4*8 ) );			
+			int4 w2 = as_int4 ( intel_sub_group_block_read4( weight_tile + 8*8 ) );	
+			int4 w3 = as_int4 ( intel_sub_group_block_read4( weight_tile + 12*8 ) );
+			
+			SLM_BLOCK_WRITE_4 ( wt_slm_ptr , as_uint4 ( w0 ) );
+			SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr + 8*8 )   , as_uint4 ( w1 ) );		
+			SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr + 32*8 ) , as_uint4 ( w2 ) );
+			SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr + 32*8 + 8*8 ) , as_uint4 ( w3 ) );
+		   
+		   if( threadid_mod_8 < 2 )
+           { 
+				weight_tile += 16*8;
+				wt_slm_ptr  += 2*32*8;
+			
+				int4 w4 = as_int4 ( intel_sub_group_block_read4( weight_tile ) );						
+				int4 w5 = as_int4 ( intel_sub_group_block_read4( weight_tile + 4*8 ) );			
+				
+				SLM_BLOCK_WRITE_4 ( wt_slm_ptr , as_uint4 ( w4 ) );
+				SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr + 8*8 )   , as_uint4 ( w5 ) );		
+			}
+	}		
+
+		// Synchronize SLM writes across workgroup
+		 barrier(CLK_LOCAL_MEM_FENCE);		
+
+			uint wt_slm_rd = wt_slm_rd_offset;
+		
+			__local uint *slm_ptr0     = (__local uint *) &act_slm[ act_slm_read ];
+			__local uint *slm_ptr1     = (__local uint *) &weight_slm[ wt_slm_rd ];
+			
+			int8 weights_reg0, weights_reg1,weights_reg2;
+			
+			/**********************************************************************************************************
+			  First phase - load first row of weights and for the first activation row - 1Hx8Wx4N inputs at a time
+                          - Weights - 24 registers, Activations - 32 registers: Total 56 registers used	for input data			  
+			***********************************************************************************************************/
+			{ 
+					int4 act_reg[ 8 ];
+	
+	                 /* Load weights from SLM into registers  */
+				{
+					    weights_reg0.s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 ) );
+					    weights_reg0.s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 + 64 ) );
+						slm_ptr1   			   += LOCAL_SIZE_X*8;	
+						
+						weights_reg1.s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 ) );
+					    weights_reg1.s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 + 64 ) );
+						slm_ptr1   			   += LOCAL_SIZE_X*8;	
+						
+						weights_reg2.s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 ) );
+					    weights_reg2.s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 + 64 ) );
+						slm_ptr1   			   += LOCAL_SIZE_X*8;	
+				}
+						
+			/* load first 1Hx8Wx4N inputs - Activation Broadcast will occur since it is same for fused threads */
+			
+				__attribute__((opencl_unroll_hint(8)))
+				for (int ic = 0; ic < 8; ic++)
+				{
+	                 /* Load activations from SLM into registers  */
+					 
+					 uint slm_offset = ic * BATCH_PACK * 8 ;
+					 
+    				 act_reg [ ic ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset)) ; 
+				}
+			
+			/* Convolve */ 
+			
+			   /* order the mmad instructions to minimize dependency on src0,dst - also try to maximise reuse of weights-reg*/
+
+                out[ 0 ] = _MMAD_4x8 ( out[ 0 ], act_reg[0], weights_reg0 );
+				out[ 1 ] = _MMAD_4x8 ( out[ 1 ], act_reg[1], weights_reg0 );
+				out[ 2 ] = _MMAD_4x8 ( out[ 2 ], act_reg[2], weights_reg0 );
+				out[ 3 ] = _MMAD_4x8 ( out[ 3 ], act_reg[3], weights_reg0 );
+				out[ 4 ] = _MMAD_4x8 ( out[ 4 ], act_reg[4], weights_reg0 );
+				out[ 5 ] = _MMAD_4x8 ( out[ 5 ], act_reg[5], weights_reg0 );
+				out[ 6 ] = _MMAD_4x8 ( out[ 6 ], act_reg[6], weights_reg0 );
+				out[ 7 ] = _MMAD_4x8 ( out[ 7 ], act_reg[7], weights_reg0 );
+
+				out[ 0 ] = _MMAD_4x8 ( out[ 0 ], act_reg[1], weights_reg1 );
+				out[ 1 ] = _MMAD_4x8 ( out[ 1 ], act_reg[2], weights_reg1 );
+				out[ 2 ] = _MMAD_4x8 ( out[ 2 ], act_reg[3], weights_reg1 );
+				out[ 3 ] = _MMAD_4x8 ( out[ 3 ], act_reg[4], weights_reg1 );
+				out[ 4 ] = _MMAD_4x8 ( out[ 4 ], act_reg[5], weights_reg1 );
+				out[ 5 ] = _MMAD_4x8 ( out[ 5 ], act_reg[6], weights_reg1 );
+				out[ 6 ] = _MMAD_4x8 ( out[ 6 ], act_reg[7], weights_reg1 );
+				
+				out[ 0 ] = _MMAD_4x8 ( out[ 0 ], act_reg[2], weights_reg2 );
+				out[ 1 ] = _MMAD_4x8 ( out[ 1 ], act_reg[3], weights_reg2 );
+				out[ 2 ] = _MMAD_4x8 ( out[ 2 ], act_reg[4], weights_reg2 );
+				out[ 3 ] = _MMAD_4x8 ( out[ 3 ], act_reg[5], weights_reg2 );
+				out[ 4 ] = _MMAD_4x8 ( out[ 4 ], act_reg[6], weights_reg2 );
+				out[ 5 ] = _MMAD_4x8 ( out[ 5 ], act_reg[7], weights_reg2 );
+			   
+				/* load next 1Hx8Wx4N inputs */
+		
+				__attribute__((opencl_unroll_hint(8)))
+				for (int ic = 8; ic < 16; ic++)
+				{
+					 uint slm_offset = ic * BATCH_PACK * 8;
+					 
+					 act_reg [ ic - 8 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset) ) ; 
+				}
+				
+				/* Convolve */				
+				
+				out[ 6 ] = _MMAD_4x8 ( out[ 6 ], act_reg[0], weights_reg2 );
+				out[ 7 ] = _MMAD_4x8 ( out[ 7 ], act_reg[1], weights_reg2 );
+				out[ 8 ] = _MMAD_4x8 ( out[ 8 ], act_reg[2], weights_reg2 );
+				out[ 9 ] = _MMAD_4x8 ( out[ 9 ], act_reg[3], weights_reg2 );
+				out[ 10 ] = _MMAD_4x8 ( out[ 10 ], act_reg[4], weights_reg2 );
+				out[ 11 ] = _MMAD_4x8 ( out[ 11 ], act_reg[5], weights_reg2 );
+				out[ 12 ] = _MMAD_4x8 ( out[ 12 ], act_reg[6], weights_reg2 );
+				out[ 13 ] = _MMAD_4x8 ( out[ 13 ], act_reg[7], weights_reg2 );
+				
+				out[ 7 ]  =  _MMAD_4x8 ( out[ 7 ], act_reg[0], weights_reg1 );
+				out[ 8 ]  =  _MMAD_4x8 ( out[ 8 ], act_reg[1], weights_reg1 );
+				out[ 9 ]  = _MMAD_4x8 (  out[ 9 ],  act_reg[2], weights_reg1 );
+				out[ 10 ] = _MMAD_4x8 ( out[ 10 ], act_reg[3], weights_reg1 );
+				out[ 11 ] = _MMAD_4x8 ( out[ 11 ], act_reg[4], weights_reg1 );
+				out[ 12 ] = _MMAD_4x8 ( out[ 12 ], act_reg[5], weights_reg1 );
+				out[ 13 ] = _MMAD_4x8 ( out[ 13 ], act_reg[6], weights_reg1 );
+				
+				out[ 8 ] =  _MMAD_4x8 ( out[ 8 ],  act_reg[0], weights_reg0 );
+				out[ 9 ] = _MMAD_4x8 ( out [ 9 ],   act_reg[1], weights_reg0 );
+				out[ 10 ] = _MMAD_4x8 ( out[ 10 ], act_reg[2], weights_reg0 );
+				out[ 11 ] = _MMAD_4x8 ( out[ 11 ], act_reg[3], weights_reg0 );
+				out[ 12 ] = _MMAD_4x8 ( out[ 12 ], act_reg[4], weights_reg0 );
+				out[ 13 ] = _MMAD_4x8 ( out[ 13 ], act_reg[5], weights_reg0 );
+			}	
+			
+			/* Second , Third phase */
+		{
+				int8 weights_reg3, weights_reg4,weights_reg5;
+				int4 act_reg_2[ 6 ];
+
+				/*****************************************************************************************************************************************
+				 Second phase - load second row of weights, now both rows are in registers, for the second activation row - 1Hx6Wx4N inputs at a time
+                              - Weights - 48 registers, Activations - 24 registers: Total 72 registers used	for input data			 
+				******************************************************************************************************************************************/
+				
+				 /* Load weights of row = 1 from SLM into registers  */
+				 {
+				 
+						weights_reg3.s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 ) );
+					    weights_reg3.s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 + 64 ) );
+						slm_ptr1   			   += LOCAL_SIZE_X*8;	
+						
+						weights_reg4.s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 ) );
+					    weights_reg4.s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 + 64 ) );
+						slm_ptr1   			   += LOCAL_SIZE_X*8;	
+						
+						weights_reg5.s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 ) );
+					    weights_reg5.s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 + 64 ) );
+						slm_ptr1   			   += LOCAL_SIZE_X*8;	
+				}
+				
+				/* load input row =1,col=0:1  1Hx2Wx8N  */
+					 
+				uint slm_row_offset_2 	  = 1*(TILE_W + 2)*BATCH_PACK*8;	 
+				
+				act_reg_2 [ 0 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_row_offset_2) ) ; 
+				act_reg_2 [ 1 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_row_offset_2 + BATCH_PACK*8) ) ; 
+				
+				out[ 14 ] = _MMAD_4x8 ( out[ 14 ], act_reg_2[0] , weights_reg0 );
+				out[ 0 ]  = _MMAD_4x8 ( out[ 0 ],  act_reg_2[0]  , weights_reg3 );
+				out[ 1 ]  = _MMAD_4x8 ( out[ 1 ],  act_reg_2[1]  , weights_reg3 );
+				out[ 15 ] = _MMAD_4x8 ( out[ 15 ], act_reg_2[1] , weights_reg0 );
+				
+				out[ 14 ] = _MMAD_4x8 ( out[ 14 ], act_reg_2[1], weights_reg1 );
+				out[ 0 ]  = _MMAD_4x8 ( out[ 0 ],  act_reg_2[1], weights_reg4 );			
+				
+				/* load input row =1,col=2:7,8:13,1Hx6Wx4N  */
+				
+				uint col = 2;
+				
+				__attribute__((opencl_unroll_hint(2)))
+				do {
+				
+				uint slm_offset 	  = 1*(TILE_W + 2)*BATCH_PACK*8 + col*BATCH_PACK*8;	 
+	
+				act_reg_2 [ 0 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset)) ; 
+				act_reg_2 [ 1 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset +   BATCH_PACK*8)) ; 
+				act_reg_2 [ 2 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset + 2*BATCH_PACK*8)) ; 
+				act_reg_2 [ 3 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset + 3*BATCH_PACK*8) ) ; 
+   				act_reg_2 [ 4 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset + 4*BATCH_PACK*8) ) ; 
+   				act_reg_2 [ 5 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset + 5*BATCH_PACK*8) ) ; 
+   
+   				uint first_row_offset   = col - 2;
+				uint second_row_offset  = 14 + col - 2;
+
+   				out [ first_row_offset ]      =  _MMAD_4x8 ( out[ first_row_offset ] ,    act_reg_2[0] , weights_reg5 );
+				out [ first_row_offset + 1 ]  =  _MMAD_4x8 ( out[ first_row_offset + 1] , act_reg_2[0],  weights_reg4 );
+				out [ first_row_offset + 2 ]  =  _MMAD_4x8 ( out[ first_row_offset + 2] , act_reg_2[0],  weights_reg3 );
+				out [ first_row_offset + 3 ]  =  _MMAD_4x8 ( out[ first_row_offset + 3 ], act_reg_2[1], weights_reg3 );
+				
+				out [ second_row_offset ]      =  _MMAD_4x8 ( out[ second_row_offset ] , act_reg_2[0] , weights_reg2 );			
+				out [ second_row_offset + 1 ]  =  _MMAD_4x8 ( out[ second_row_offset + 1] , act_reg_2[0],  weights_reg1 );
+				out [ second_row_offset + 2 ]  =  _MMAD_4x8 ( out[ second_row_offset + 2] , act_reg_2[0],  weights_reg0 );
+				out [ second_row_offset + 3 ]  =  _MMAD_4x8 ( out[ second_row_offset + 3], act_reg_2[1], weights_reg0 );
+				
+				out [ first_row_offset + 1 ]   = _MMAD_4x8 (  out[ first_row_offset + 1 ], act_reg_2[1], weights_reg5 );
+				out [ first_row_offset + 2 ]   = _MMAD_4x8 (  out[ first_row_offset + 2 ], act_reg_2[1], weights_reg4 );
+				out [ first_row_offset + 3 ]   = _MMAD_4x8 ( out[ first_row_offset + 3 ],  act_reg_2[2], weights_reg4 );
+				out [ first_row_offset + 4 ]   = _MMAD_4x8 ( out[ first_row_offset + 4 ],  act_reg_2[2], weights_reg3 );
+				
+				out [ second_row_offset + 1 ]  = _MMAD_4x8 (  out[ second_row_offset + 1 ], act_reg_2[1], weights_reg2 );
+				out [ second_row_offset + 2 ]  = _MMAD_4x8 (  out[ second_row_offset + 2 ], act_reg_2[1], weights_reg1 );
+				out [ second_row_offset + 3 ]   = _MMAD_4x8 ( out[ second_row_offset + 3 ], act_reg_2[2], weights_reg1 );
+				out [ second_row_offset + 4 ]   = _MMAD_4x8 ( out[ second_row_offset + 4 ], act_reg_2[2], weights_reg0 );
+
+				out [ first_row_offset + 2 ]   = _MMAD_4x8 ( out[ first_row_offset + 2], act_reg_2[2], weights_reg5 );				
+				out [ first_row_offset + 3 ]   = _MMAD_4x8 ( out[ first_row_offset + 3], act_reg_2[3], weights_reg5 );
+				out [ first_row_offset + 4 ]   = _MMAD_4x8 ( out[ first_row_offset + 4], act_reg_2[3], weights_reg4 );
+				out [ first_row_offset + 5 ]   = _MMAD_4x8 ( out[ first_row_offset + 5], act_reg_2[3], weights_reg3 );	
+				
+				out [ second_row_offset + 2 ]   = _MMAD_4x8 ( out[ second_row_offset + 2], act_reg_2[2], weights_reg2 );				
+				out [ second_row_offset + 3 ]   = _MMAD_4x8 ( out[ second_row_offset + 3], act_reg_2[3], weights_reg2 );
+				out [ second_row_offset + 4 ]   = _MMAD_4x8 ( out[ second_row_offset + 4], act_reg_2[3], weights_reg1 );
+				out [ second_row_offset + 5 ]   = _MMAD_4x8 ( out[ second_row_offset + 5], act_reg_2[3], weights_reg0 );	
+
+				out [ first_row_offset + 6 ]   = _MMAD_4x8 ( out[ first_row_offset + 6], act_reg_2[4], weights_reg3 );
+				out [ first_row_offset + 7 ]   = _MMAD_4x8 ( out[ first_row_offset + 7], act_reg_2[5], weights_reg3 );
+				out [ first_row_offset + 5 ]   = _MMAD_4x8 ( out[ first_row_offset + 5], act_reg_2[4], weights_reg4 );
+				out [ first_row_offset + 6 ]   = _MMAD_4x8 ( out[ first_row_offset + 6], act_reg_2[5], weights_reg4 );
+				out [ first_row_offset + 4 ]   = _MMAD_4x8 ( out[ first_row_offset + 4], act_reg_2[4], weights_reg5 );
+				out [ first_row_offset + 5 ]   = _MMAD_4x8 ( out[ first_row_offset + 5], act_reg_2[5], weights_reg5 );
+				
+				out [ second_row_offset + 6 ]   = _MMAD_4x8 ( out[ second_row_offset + 6], act_reg_2[4], weights_reg0 );
+				out [ second_row_offset + 7 ]   = _MMAD_4x8 ( out[ second_row_offset + 7], act_reg_2[5], weights_reg0 );
+				out [ second_row_offset + 5 ]   = _MMAD_4x8 ( out[ second_row_offset + 5], act_reg_2[4], weights_reg1 );
+				out [ second_row_offset + 6 ]   = _MMAD_4x8 ( out[ second_row_offset + 6], act_reg_2[5], weights_reg1 );
+				out [ second_row_offset + 4 ]   = _MMAD_4x8 ( out[ second_row_offset + 4], act_reg_2[4], weights_reg2 );				
+				out [ second_row_offset + 5 ]   = _MMAD_4x8 ( out[ second_row_offset + 5], act_reg_2[5], weights_reg2 );
+
+				col +=6;
+				
+				} while ( col < 14 );
+				
+				/* load input row =1,col=14:15  1Hx2Wx4N  */
+
+				uint slm_row_offset_3 	  = 1 * (TILE_W + 2) * BATCH_PACK * 8 + 14 * BATCH_PACK * 8;	
+
+				act_reg_2 [ 0 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_row_offset_3)) ; 
+				act_reg_2 [ 1 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_row_offset_3 +   BATCH_PACK*8)) ; 
+				
+				out[ 13 ]  = _MMAD_4x8 ( out[ 13 ],   act_reg_2[0],  weights_reg4 );		
+				out[ 27 ]  = _MMAD_4x8 ( out[ 27 ],   act_reg_2[0],  weights_reg1 );		
+				out[ 26 ]  = _MMAD_4x8 ( out[ 26 ],   act_reg_2[0],  weights_reg2 );	
+				
+				out[ 12 ]  = _MMAD_4x8 ( out[ 12 ],  act_reg_2[0],  weights_reg5 );					
+				out[ 13 ]  = _MMAD_4x8 ( out[ 13 ],  act_reg_2[1],  weights_reg5 );	
+				
+				out[ 27 ]  = _MMAD_4x8 ( out[ 27 ],  act_reg_2[1],  weights_reg2 );
+				
+                /****************************************************************************************************************************************
+				   Third phase - load third row of weights, this replaces first weight row, for the third activation row read 1Hx6Wx4N inputs at a time 
+				               - Weights - 48 registers, Activations - 24 registers: Total 72 registers used for input data			  
+				*****************************************************************************************************************************************/
+				
+				 /* Load weights of row = 2 from SLM into registers - replaces row = 0 weights  */
+				 {
+					    weights_reg0.s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 ) );
+					    weights_reg0.s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 + 64 ) );
+						slm_ptr1   			   += LOCAL_SIZE_X*8;	
+						
+						weights_reg1.s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 ) );
+					    weights_reg1.s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 + 64 ) );
+						slm_ptr1   			   += LOCAL_SIZE_X*8;	
+						
+						weights_reg2.s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 ) );
+					    weights_reg2.s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptr1 + 64 ) );
+						slm_ptr1   			   += LOCAL_SIZE_X*8;	
+				}
+				
+				uint slm_row_offset_4 	  = 2*(TILE_W + 2)*BATCH_PACK*8;	 
+				
+				act_reg_2 [ 0 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_row_offset_4)) ; 
+				act_reg_2 [ 1 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_row_offset_4 + BATCH_PACK*8)) ; 
+		
+				out[ 14 ] = _MMAD_4x8 ( out[ 14 ], act_reg_2[0] , weights_reg3 );
+				out[ 0 ]  = _MMAD_4x8 ( out[ 0 ],  act_reg_2[0]  , weights_reg0 );
+				out[ 1 ]  = _MMAD_4x8 ( out[ 1 ],  act_reg_2[1]  , weights_reg0 );
+				out[ 15 ] = _MMAD_4x8 ( out[ 15 ], act_reg_2[1] , weights_reg3 );
+				
+				out[ 14 ] = _MMAD_4x8 ( out[ 14 ], act_reg_2[1], weights_reg4 );
+				out[ 0 ]  = _MMAD_4x8 ( out[ 0 ],  act_reg_2[1], weights_reg1 );	
+				
+				/* load input row =2,col=2:7,8:13,1Hx6Wx4N  */
+				
+				uint col_2 = 2;
+				
+				__attribute__((opencl_unroll_hint(2)))
+				do {
+				
+				uint slm_offset 	  = 2*(TILE_W + 2)*BATCH_PACK*8 + col_2*BATCH_PACK*8;	 
+	
+				act_reg_2 [ 0 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset)) ; 
+				act_reg_2 [ 1 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset +   BATCH_PACK*8)) ; 
+				act_reg_2 [ 2 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset + 2*BATCH_PACK*8)) ; 
+				act_reg_2 [ 3 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset + 3*BATCH_PACK*8) ) ; 
+   				act_reg_2 [ 4 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset + 4*BATCH_PACK*8) ) ; 
+   				act_reg_2 [ 5 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset + 5*BATCH_PACK*8) ) ; 
+   
+   				uint first_row_offset   = col_2 - 2;
+				uint second_row_offset  = 14 + col_2 - 2;
+   			
+				out [ first_row_offset + 1 ]  =  _MMAD_4x8 ( out[ first_row_offset + 1] , act_reg_2[0],  weights_reg1 );
+				out [ first_row_offset + 2 ]  =  _MMAD_4x8 ( out[ first_row_offset + 2] , act_reg_2[0],  weights_reg0 );
+				out [ first_row_offset + 3 ]  =  _MMAD_4x8 ( out[ first_row_offset + 3 ], act_reg_2[1], weights_reg0 );
+				out [ first_row_offset ]      =  _MMAD_4x8 ( out[ first_row_offset ] ,    act_reg_2[0] , weights_reg2 );
+				
+				out [ second_row_offset + 1 ]  =  _MMAD_4x8 ( out[ second_row_offset + 1] , act_reg_2[0],  weights_reg4 );
+				out [ second_row_offset + 2 ]  =  _MMAD_4x8 ( out[ second_row_offset + 2] , act_reg_2[0],  weights_reg3 );
+				out [ second_row_offset + 3 ]  =  _MMAD_4x8 ( out[ second_row_offset + 3], act_reg_2[1], weights_reg3 );
+				out [ second_row_offset ]      =  _MMAD_4x8 ( out[ second_row_offset ] , act_reg_2[0] , weights_reg5 );
+				
+				out [ first_row_offset + 1 ]   = _MMAD_4x8 (  out[ first_row_offset + 1 ], act_reg_2[1], weights_reg2 );
+				out [ first_row_offset + 2 ]   = _MMAD_4x8 (  out[ first_row_offset + 2 ], act_reg_2[1], weights_reg1 );
+				out [ first_row_offset + 3 ]   = _MMAD_4x8 ( out[ first_row_offset + 3 ],  act_reg_2[2], weights_reg1 );
+				out [ first_row_offset + 4 ]   = _MMAD_4x8 ( out[ first_row_offset + 4 ],  act_reg_2[2], weights_reg0 );
+				
+				out [ second_row_offset + 1 ]  = _MMAD_4x8 (  out[ second_row_offset + 1 ], act_reg_2[1], weights_reg5 );
+				out [ second_row_offset + 2 ]  = _MMAD_4x8 (  out[ second_row_offset + 2 ], act_reg_2[1], weights_reg4 );
+				out [ second_row_offset + 3 ]   = _MMAD_4x8 ( out[ second_row_offset + 3 ], act_reg_2[2], weights_reg4 );
+				out [ second_row_offset + 4 ]   = _MMAD_4x8 ( out[ second_row_offset + 4 ], act_reg_2[2], weights_reg3 );
+
+				out [ first_row_offset + 5 ]   = _MMAD_4x8 ( out[ first_row_offset + 5], act_reg_2[3], weights_reg0 );	
+				out [ first_row_offset + 2 ]   = _MMAD_4x8 ( out[ first_row_offset + 2], act_reg_2[2], weights_reg2 );				
+				out [ first_row_offset + 3 ]   = _MMAD_4x8 ( out[ first_row_offset + 3], act_reg_2[3], weights_reg2 );
+				out [ first_row_offset + 4 ]   = _MMAD_4x8 ( out[ first_row_offset + 4], act_reg_2[3], weights_reg1 );
+				
+				out [ second_row_offset + 5 ]   = _MMAD_4x8 ( out[ second_row_offset + 5], act_reg_2[3], weights_reg3 );	
+				out [ second_row_offset + 2 ]   = _MMAD_4x8 ( out[ second_row_offset + 2], act_reg_2[2], weights_reg5 );				
+				out [ second_row_offset + 3 ]   = _MMAD_4x8 ( out[ second_row_offset + 3], act_reg_2[3], weights_reg5 );
+				out [ second_row_offset + 4 ]   = _MMAD_4x8 ( out[ second_row_offset + 4], act_reg_2[3], weights_reg4 );
+
+				out [ first_row_offset + 6 ]   = _MMAD_4x8 ( out[ first_row_offset + 6], act_reg_2[4], weights_reg0 );
+				out [ first_row_offset + 7 ]   = _MMAD_4x8 ( out[ first_row_offset + 7], act_reg_2[5], weights_reg0 );
+				out [ first_row_offset + 5 ]   = _MMAD_4x8 ( out[ first_row_offset + 5], act_reg_2[4], weights_reg1 );
+				out [ first_row_offset + 6 ]   = _MMAD_4x8 ( out[ first_row_offset + 6], act_reg_2[5], weights_reg1 );				
+				out [ first_row_offset + 4 ]   = _MMAD_4x8 ( out[ first_row_offset + 4], act_reg_2[4], weights_reg2 );
+				out [ first_row_offset + 5 ]   = _MMAD_4x8 ( out[ first_row_offset + 5], act_reg_2[5], weights_reg2 );
+				
+				out [ second_row_offset + 6 ]   = _MMAD_4x8 ( out[ second_row_offset + 6], act_reg_2[4], weights_reg3 );
+				out [ second_row_offset + 7 ]   = _MMAD_4x8 ( out[ second_row_offset + 7], act_reg_2[5], weights_reg3 );
+				out [ second_row_offset + 5 ]   = _MMAD_4x8 ( out[ second_row_offset + 5], act_reg_2[4], weights_reg4 );
+				out [ second_row_offset + 6 ]   = _MMAD_4x8 ( out[ second_row_offset + 6], act_reg_2[5], weights_reg4 );
+				out [ second_row_offset + 4 ]   = _MMAD_4x8 ( out[ second_row_offset + 4], act_reg_2[4], weights_reg5 );				
+				out [ second_row_offset + 5 ]   = _MMAD_4x8 ( out[ second_row_offset + 5], act_reg_2[5], weights_reg5 );
+				
+				col_2 +=6;
+				
+				} while ( col_2 < 14 );
+				
+				/* load input row =2,col=14:15  1Hx2Wx4N  */
+
+				uint slm_row_offset_5 	  = 2 * (TILE_W + 2) * BATCH_PACK * 8 + 14 * BATCH_PACK * 8;	
+
+				act_reg_2 [ 0 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_row_offset_5)) ; 
+				act_reg_2 [ 1 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_row_offset_5 +   BATCH_PACK*8)) ; 
+				
+				out[ 13 ]  = _MMAD_4x8 ( out[ 13 ],   act_reg_2[0],  weights_reg1 );		
+				out[ 27 ]  = _MMAD_4x8 ( out[ 27 ],   act_reg_2[0],  weights_reg4 );		
+				out[ 26 ]  = _MMAD_4x8 ( out[ 26 ],   act_reg_2[0],  weights_reg5 );	
+				
+				out[ 12 ]  = _MMAD_4x8 ( out[ 12 ],  act_reg_2[0],  weights_reg2 );					
+				out[ 13 ]  = _MMAD_4x8 ( out[ 13 ],  act_reg_2[1],  weights_reg2 );	
+				
+				out[ 27 ]  = _MMAD_4x8 ( out[ 27 ],  act_reg_2[1],  weights_reg5 );
+	}
+				
+				/*************************************************************************************************
+				   Fourth phase - discard middle weight row, for fourth activation row load 1Hx8Wx4N at a time 
+				                - Weights - 24 registers, Activations - 32 registers: Total 56 registers used for input data			  
+				**************************************************************************************************/
+		{ 
+					int4 act_reg[ 8 ];
+				
+			/* load first 1Hx8Wx4N inputs */
+			
+				uint slm_row_offset_6 =  3 * (TILE_W + 2) * BATCH_PACK * 8 ;
+
+				__attribute__((opencl_unroll_hint(8)))
+				for (int ic = 0; ic < 8; ic++)
+				{
+	                 /* Load activations from SLM into registers  */
+					 uint slm_offset = ic * BATCH_PACK * 8  + slm_row_offset_6;
+    				 act_reg [ ic ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset)) ; 
+				}
+			
+			/* Convolve */ 
+			
+				uint phase_offset = 14;
+				
+				out[ phase_offset + 0 ] = _MMAD_4x8 ( out[ phase_offset +0 ], act_reg[0], weights_reg0 );
+				out[ phase_offset + 1 ] = _MMAD_4x8 ( out[ phase_offset +1 ], act_reg[1], weights_reg0 );
+				out[ phase_offset +2 ] = _MMAD_4x8 ( out[ phase_offset +2 ], act_reg[2], weights_reg0 );
+				out[ phase_offset +3 ] = _MMAD_4x8 ( out[ phase_offset +3 ], act_reg[3], weights_reg0 );
+				out[ phase_offset +4 ] = _MMAD_4x8 ( out[ phase_offset +4 ], act_reg[4], weights_reg0 );
+				out[ phase_offset +5 ] = _MMAD_4x8 ( out[ phase_offset +5 ], act_reg[5], weights_reg0 );
+				out[ phase_offset +6 ] = _MMAD_4x8 ( out[ phase_offset +6 ], act_reg[6], weights_reg0 );
+				out[ phase_offset +7 ] = _MMAD_4x8 ( out[ phase_offset +7 ], act_reg[7], weights_reg0 );
+
+				out[ phase_offset +0 ] = _MMAD_4x8 ( out[ phase_offset +0 ], act_reg[1], weights_reg1 );
+				out[ phase_offset +1 ] = _MMAD_4x8 ( out[ phase_offset +1 ], act_reg[2], weights_reg1 );
+				out[ phase_offset +2 ] = _MMAD_4x8 ( out[ phase_offset +2 ], act_reg[3], weights_reg1 );
+				out[ phase_offset +3 ] = _MMAD_4x8 ( out[ phase_offset +3 ], act_reg[4], weights_reg1 );
+				out[ phase_offset +4 ] = _MMAD_4x8 ( out[ phase_offset +4 ], act_reg[5], weights_reg1 );
+				out[ phase_offset +5 ] = _MMAD_4x8 ( out[ phase_offset +5 ], act_reg[6], weights_reg1 );
+				out[ phase_offset +6 ] = _MMAD_4x8 ( out[ phase_offset +6 ], act_reg[7], weights_reg1 );
+				
+				out[ phase_offset +0 ] = _MMAD_4x8 ( out[ phase_offset +0 ], act_reg[2], weights_reg2 );
+				out[ phase_offset +1 ] = _MMAD_4x8 ( out[ phase_offset +1 ], act_reg[3], weights_reg2 );
+				out[ phase_offset +2 ] = _MMAD_4x8 ( out[ phase_offset +2 ], act_reg[4], weights_reg2 );
+				out[ phase_offset +3 ] = _MMAD_4x8 ( out[ phase_offset +3 ], act_reg[5], weights_reg2 );
+				out[ phase_offset +4 ] = _MMAD_4x8 ( out[ phase_offset +4 ], act_reg[6], weights_reg2 );
+				out[ phase_offset +5 ] = _MMAD_4x8 ( out[ phase_offset +5 ], act_reg[7], weights_reg2 );
+			   
+				/* load next 1Hx8Wx4N inputs */
+		
+				__attribute__((opencl_unroll_hint(8)))
+				for (int ic = 8; ic < 16; ic++)
+				{
+					 uint slm_offset = ic * BATCH_PACK * 8 + slm_row_offset_6;
+					 act_reg [ ic - 8 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset)) ; 
+				}
+				
+				/* Convolve */				
+				
+				out[ phase_offset +6 ] = _MMAD_4x8 ( out[ phase_offset +6 ], act_reg[0], weights_reg2 );
+				out[ phase_offset +7 ] = _MMAD_4x8 ( out[ phase_offset +7 ], act_reg[1], weights_reg2 );
+				out[ phase_offset + 8 ] = _MMAD_4x8 ( out[ phase_offset +8 ], act_reg[2], weights_reg2 );
+				out[ phase_offset +9 ] = _MMAD_4x8 ( out[phase_offset + 9 ], act_reg[3], weights_reg2 );
+				out[ phase_offset +10 ] = _MMAD_4x8 ( out[ phase_offset +10 ], act_reg[4], weights_reg2 );
+				out[ phase_offset +11 ] = _MMAD_4x8 ( out[phase_offset + 11 ], act_reg[5], weights_reg2 );
+				out[ phase_offset +12 ] = _MMAD_4x8 ( out[ phase_offset +12 ], act_reg[6], weights_reg2 );
+				out[ phase_offset +13 ] = _MMAD_4x8 ( out[ phase_offset +13 ], act_reg[7], weights_reg2 );
+				
+				out[ phase_offset +7 ] =  _MMAD_4x8 ( out[ phase_offset +7 ], act_reg[0], weights_reg1 );
+				out[ phase_offset +8 ] =  _MMAD_4x8 ( out[phase_offset + 8 ], act_reg[1], weights_reg1 );
+				out[ phase_offset +9 ] = _MMAD_4x8 ( out[ phase_offset +9 ], act_reg[2], weights_reg1 );
+				out[ phase_offset +10 ] = _MMAD_4x8 ( out[ phase_offset +10 ], act_reg[3], weights_reg1 );
+				out[ phase_offset +11 ] = _MMAD_4x8 ( out[ phase_offset +11 ], act_reg[4], weights_reg1 );
+				out[ phase_offset +12 ] = _MMAD_4x8 ( out[ phase_offset +12 ], act_reg[5], weights_reg1 );
+				out[ phase_offset +13 ] = _MMAD_4x8 ( out[phase_offset + 13 ], act_reg[6], weights_reg1 );
+				
+				out[ phase_offset +8 ] =  _MMAD_4x8 ( out[phase_offset + 8 ],  act_reg[0], weights_reg0 );
+				out[ phase_offset +9 ] = _MMAD_4x8 ( out[ phase_offset +9 ], act_reg[1], weights_reg0 );
+				out[ phase_offset +10 ] = _MMAD_4x8 ( out[ phase_offset +10 ], act_reg[2], weights_reg0 );
+				out[ phase_offset +11 ] = _MMAD_4x8 ( out[phase_offset + 11 ], act_reg[3], weights_reg0 );
+				out[ phase_offset +12 ] = _MMAD_4x8 ( out[ phase_offset +12 ], act_reg[4], weights_reg0 );
+				out[ phase_offset +13 ] = _MMAD_4x8 ( out[phase_offset + 13 ], act_reg[5], weights_reg0 );
+			}	
+			
+			// To make sure all threads in WG have finished compute before next depth tile of activation and weights are loaded into SLM
+			barrier(CLK_LOCAL_MEM_FENCE);	
+			
+	} //for kd
+
+        /****************************************************************************************************************
+		*******************************Output Write Stage****************************************************************
+		****************************************************************************************************************/
+		
+		/* 
+		   Outputs will be passed through activation function and quantized to 8 bits before writing 
+		   Output will be in same format as input [K/32][N/4][P][Q][4N][32K]
+		   Writes are staged in SLM so that 32-bit writes can be done to Global memory 
+		*/	
+			
+		/******************* Write output to SLM *************************************/	
+			
+		/*  Quantize and pack 4x1 byte - from consectuive n-coordinates
+         	Write uint32 from each lane to SLM , the entire thread will write 8-consecutive K-coorindates	
+			Four threads will write 4x8xuint32 for 32 output channels and 4 batches
+			This will be repeated for entire WG-tile
+			
+			Assume one SLM row as 32 uints ( 32 channels , four batches for each channel - 4NK )
+		*/
+
+			 uint out_slm_write        =  lid_z * TILE_W * OUT_BLOCK_HEIGHT * 32 + threadid_group_4 * 8 + lane_id;
+
+			__local uchar4*  out_slm   = (__local uchar4*)  &act_slm;
+			__local uchar4* out_slm_2  = (__local uchar4*)  &out_slm[ out_slm_write ];
+		
+			/* Scale the accumulator down and do the ReLU before converting to 8 bits */
+
+			/*  Real code might do this, but need to get scale right or the convert to uchar saturates and then doesn''t match CPU 
+			float scale = (float)SCALE_FACTOR;
+
+			uchar outchar = (uchar)max(((float)outint) * scale, 0.0f); */
+
+            const uint _feature = ((fmg * 32) % _OD) + get_local_id(0);
+            float quant_f = as_float(intel_sub_group_block_read((__global uint*) (quantizations + _feature) ));
+            float bias_f = as_float(intel_sub_group_block_read((__global uint*) (biases + _feature) ));
+            float calib_f = as_float(intel_sub_group_block_read((__global uint*) (calibrations + _feature) ));
+
+			__attribute__((opencl_unroll_hint(OUT_BLOCK_HEIGHT)))
+			for (int r = 0; r < OUT_BLOCK_HEIGHT; r++)
+            {
+			    __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+				for (int c = 0; c < OUT_BLOCK_WIDTH; c++)
+                {
+					int4 outvec = out[ r * OUT_BLOCK_WIDTH + c];
+			
+					uchar4 slm_write0;
+					
+					int slm_addr = c * 32 + r * TILE_W * 32;
+					
+					/*TODO - Activation & Quantization  code goes here -  presently applying ReLU and  taking lower 8-bits */
+
+                    slm_write0.s0 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec.s0) * quant_f * I_QF + bias_f) * calib_f)), NL_M, NL_N));
+                    slm_write0.s1 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec.s1) * quant_f * I_QF + bias_f) * calib_f)), NL_M, NL_N));
+                    slm_write0.s2 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec.s2) * quant_f * I_QF + bias_f) * calib_f)), NL_M, NL_N));
+                    slm_write0.s3 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec.s3) * quant_f * I_QF + bias_f) * calib_f)), NL_M, NL_N));
+					
+					out_slm_2[ slm_addr ]   = slm_write0;
+
+				} // out_block_width-for loop
+			
+			}  // out_block_height-for loop
+
+			//  Wait till all threads in WG finish placing the output
+			  barrier(CLK_LOCAL_MEM_FENCE);
+			  
+			/******************* Read from SLM & Write to Global *************************************/	
+			
+		    /* Each lane will read uint4 from SLM - 4K x 4N values. Swizzle them into 4N x 4K order
+
+     		   SLM Read Distribution - 8Px14Qx4Nx32K output tile
+			
+			   Threads 0-1 handles row0, col 0-13,
+			   Threads 2-3 handles row1, col 0-13,
+			   ..
+			   Threads 14-15 handles row7, col 0-13 */
+
+			uint row_id =   thread_id / 2;
+			uint col_id =   ( thread_id % 2 )*7;
+			
+			uint out_slm_read =  col_id * 32 + row_id * TILE_W * 32 + lane_id * 4;
+			
+			__local uint4 *out_slm3   = (__local uint4*) &out_slm[ out_slm_read ];
+			
+			/* feature maps are an array of slicePacks, each H,W position within the slice pack contains 32 8bit feature maps(channels) of 8 different batches */
+			uint row_size_bytes        = (_OW + OWPAD) * PACK * BATCH_PACK;
+			
+			/* slice_pack is a pack of 32 feature map tiles that are [OH][OW][4][32] that are stored within the full [K/32][N/4][OH][OW][4][32] output */
+			uint slice_pack_size_bytes = row_size_bytes * (_OH + OHPAD); 
+			
+			/* Each fmg writes [OH][OW][4][32]*/
+		
+		 	uint output_depth_index      =  output_depth;
+
+			uint batch_index			 =  batch;
+			
+			uint slice_pack_addr_bytes  = output_depth_index * slice_pack_size_bytes * ( BATCH_SIZE / BATCH_PACK ) + batch_index * slice_pack_size_bytes + (groupz_tile + row_id ) * row_size_bytes + (groupy_tile + col_id ) * PACK * BATCH_PACK; 
+			
+			__global uint* output_write = (__global uint *) &outputs [ slice_pack_addr_bytes ];
+			
+			/* Each lane writes 4K values of 4 batches and 8 different columns */
+			
+			/* 4K values of K=0..31 */
+			
+			const char  mask_constant = 0xFF;
+            
+			__attribute__((opencl_unroll_hint(7)))
+			for ( int c=0; c<7; c++ )
+            {
+				/* Get 4K4N values in uint4 - each uint containing 4N values of a K
+ 				   swizzle the data and pack into another uint4 containing 4N4K values - each uint containing 4K values of a N. 
+				   Use block_writes for writing uint4 */
+                
+				uint4 out_k4n4 = out_slm3 [ c*8 ];
+
+               	//Pack 4K values of first n
+				uchar4 out_n0k4;
+
+				out_n0k4.s0 = out_k4n4.s0 & mask_constant;
+				out_n0k4.s1 = out_k4n4.s1 & mask_constant;
+				out_n0k4.s2 = out_k4n4.s2 & mask_constant;
+				out_n0k4.s3 = out_k4n4.s3 & mask_constant;
+		
+		        /* Assigning to uchar hence need to get the required bits to lower 8-bits*/
+				
+				//Pack 4K values of second n		
+				uchar4 out_n1k4;
+				
+			    out_n1k4.s0 = (out_k4n4.s0 >> 8) & mask_constant;
+				out_n1k4.s1 = (out_k4n4.s1 >> 8) & mask_constant;
+				out_n1k4.s2 = (out_k4n4.s2 >> 8) & mask_constant;
+				out_n1k4.s3 = (out_k4n4.s3 >> 8) & mask_constant;
+
+		        //Pack 4K values of third n			
+				uchar4 out_n2k4;
+				
+				out_n2k4.s0  = (out_k4n4.s0 >> 16) & mask_constant;
+				out_n2k4.s1  = (out_k4n4.s1 >> 16) & mask_constant;
+				out_n2k4.s2  = (out_k4n4.s2 >> 16) & mask_constant;
+				out_n2k4.s3  = (out_k4n4.s3 >> 16) & mask_constant;
+
+		        //Pack 4K values of fourth n
+				uchar4 out_n3k4;
+
+				out_n3k4.s0 = (out_k4n4.s0 >> 24) & mask_constant;
+				out_n3k4.s1 = (out_k4n4.s1 >> 24) & mask_constant;
+				out_n3k4.s2 = (out_k4n4.s2 >> 24) & mask_constant;
+				out_n3k4.s3 = (out_k4n4.s3 >> 24) & mask_constant;
+				
+				uint4 out_n4k4;
+				
+				out_n4k4.s0 = as_uint ( out_n0k4 );
+				out_n4k4.s1 = as_uint ( out_n1k4 );
+				out_n4k4.s2 = as_uint ( out_n2k4 );
+				out_n4k4.s3 = as_uint ( out_n3k4 );
+								
+			    intel_sub_group_block_write4 ( output_write , out_n4k4 );
+
+				output_write += 4*8;
+			}
+} //end of kernel
+
+#undef SCAL
+#undef QUANTIZATION
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_slm_7x7_rep4.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_slm_7x7_rep4.cl
new file mode 100644
index 000000000..7030a2e96
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_mmad_slm_7x7_rep4.cl
@@ -0,0 +1,1044 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/mmad.cl"
+
+#define SCALE 0.11f
+
+#ifdef LIGHTWEIGHT_QUANTIZATION
+
+#define QUANTIZATION \
+    out_write_N2K4[0].s0 = convert_uchar_sat((float)outvec0.s0 * SCALE + bias_f.s0); /*K= lane_id,N=0*/ \
+    out_write_N2K4[0].s1 = convert_uchar_sat((float)outvec1.s0 * SCALE + bias_f.s1); /*K= lane_id + 8,N=0*/\
+    out_write_N2K4[0].s2 = convert_uchar_sat((float)outvec2.s0 * SCALE + bias_f.s2); /*K= lane_id + 16,N=0*/\
+    out_write_N2K4[0].s3 = convert_uchar_sat((float)outvec3.s0 * SCALE + bias_f.s3); /*K= lane_id + 24,N=0*/\
+    \    
+    out_write_N2K4[0].s4 = convert_uchar_sat((float)outvec0.s1 * SCALE + bias_f.s0); /*K= lane_id,N=1*/\
+    out_write_N2K4[0].s5 = convert_uchar_sat((float)outvec1.s1 * SCALE + bias_f.s1); /*K= lane_id + 8,N=1*/\
+    out_write_N2K4[0].s6 = convert_uchar_sat((float)outvec2.s1 * SCALE + bias_f.s2); /*K= lane_id + 16,N=1*/\
+    out_write_N2K4[0].s7 = convert_uchar_sat((float)outvec3.s1 * SCALE + bias_f.s3); /*K= lane_id + 24,N=1*/\
+    \
+    out_write_N2K4[1].s0 = convert_uchar_sat((float)outvec0.s2 * SCALE + bias_f.s0); /*K= lane_id,N=2*/\
+    out_write_N2K4[1].s1 = convert_uchar_sat((float)outvec1.s2 * SCALE + bias_f.s1); /*K= lane_id + 8,N=2*/\
+    out_write_N2K4[1].s2 = convert_uchar_sat((float)outvec2.s2 * SCALE + bias_f.s2); /*K= lane_id + 16,N=2*/\
+    out_write_N2K4[1].s3 = convert_uchar_sat((float)outvec3.s2 * SCALE + bias_f.s3); /*K= lane_id + 24,N=2*/\
+    \
+    out_write_N2K4[1].s4 = convert_uchar_sat((float)outvec0.s3 * SCALE + bias_f.s0); /*K= lane_id,N=3*/\
+    out_write_N2K4[1].s5 = convert_uchar_sat((float)outvec1.s3 * SCALE + bias_f.s1); /*K= lane_id + 8,N=3*/\
+    out_write_N2K4[1].s6 = convert_uchar_sat((float)outvec2.s3 * SCALE + bias_f.s2); /*K= lane_id + 16,N=3*/\
+    out_write_N2K4[1].s7 = convert_uchar_sat((float)outvec3.s3 * SCALE + bias_f.s3); /*K= lane_id + 24,N=3*/
+
+#elif NO_QUANTIZATION
+
+#define QUANTIZATION \
+    out_write_N2K4[0].s0 = convert_uchar_sat(outvec0.s0); /*K= lane_id,N=0*/ \
+    out_write_N2K4[0].s1 = convert_uchar_sat(outvec1.s0); /*K= lane_id + 8,N=0*/\
+    out_write_N2K4[0].s2 = convert_uchar_sat(outvec2.s0); /*K= lane_id + 16,N=0*/\
+    out_write_N2K4[0].s3 = convert_uchar_sat(outvec3.s0); /*K= lane_id + 24,N=0*/\
+    \    
+    out_write_N2K4[0].s4 = convert_uchar_sat(outvec0.s1); /*K= lane_id,N=1*/\
+    out_write_N2K4[0].s5 = convert_uchar_sat(outvec1.s1); /*K= lane_id + 8,N=1*/\
+    out_write_N2K4[0].s6 = convert_uchar_sat(outvec2.s1); /*K= lane_id + 16,N=1*/\
+    out_write_N2K4[0].s7 = convert_uchar_sat(outvec3.s1); /*K= lane_id + 24,N=1*/\
+    \
+    out_write_N2K4[1].s0 = convert_uchar_sat(outvec0.s2); /*K= lane_id,N=2*/\
+    out_write_N2K4[1].s1 = convert_uchar_sat(outvec1.s2); /*K= lane_id + 8,N=2*/\
+    out_write_N2K4[1].s2 = convert_uchar_sat(outvec2.s2); /*K= lane_id + 16,N=2*/\
+    out_write_N2K4[1].s3 = convert_uchar_sat(outvec3.s2); /*K= lane_id + 24,N=2*/\
+    \
+    out_write_N2K4[1].s4 = convert_uchar_sat(outvec0.s3); /*K= lane_id,N=3*/\
+    out_write_N2K4[1].s5 = convert_uchar_sat(outvec1.s3); /*K= lane_id + 8,N=3*/\
+    out_write_N2K4[1].s6 = convert_uchar_sat(outvec2.s3); /*K= lane_id + 16,N=3*/\
+    out_write_N2K4[1].s7 = convert_uchar_sat(outvec3.s3); /*K= lane_id + 24,N=3*/
+
+#else
+
+#define QUANTIZATION \
+    out_write_N2K4[0].s0 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec0.s0) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N)); /*K= lane_id,N=0*/ \
+    out_write_N2K4[0].s1 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec1.s0) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N)); /*K= lane_id + 8,N=0*/\
+    out_write_N2K4[0].s2 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec2.s0) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N)); /*K= lane_id + 16,N=0*/\
+    out_write_N2K4[0].s3 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec3.s0) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N)); /*K= lane_id + 24,N=0*/\
+    \    
+    out_write_N2K4[0].s4 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec0.s1) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N)); /*K= lane_id,N=1*/\
+    out_write_N2K4[0].s5 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec1.s1) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N)); /*K= lane_id + 8,N=1*/\
+    out_write_N2K4[0].s6 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec2.s1) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N)); /*K= lane_id + 16,N=1*/\
+    out_write_N2K4[0].s7 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec3.s1) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N)); /*K= lane_id + 24,N=1*/\
+    \
+    out_write_N2K4[1].s0 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec0.s2) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N)); /*K= lane_id,N=2*/\
+    out_write_N2K4[1].s1 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec1.s2) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N)); /*K= lane_id + 8,N=2*/\
+    out_write_N2K4[1].s2 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec2.s2) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N)); /*K= lane_id + 16,N=2*/\
+    out_write_N2K4[1].s3 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec3.s2) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N)); /*K= lane_id + 24,N=2*/\
+    \
+    out_write_N2K4[1].s4 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec0.s3) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N)); /*K= lane_id,N=3*/\
+    out_write_N2K4[1].s5 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec1.s3) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N)); /*K= lane_id + 8,N=3*/\
+    out_write_N2K4[1].s6 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec2.s3) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N)); /*K= lane_id + 16,N=3*/\
+    out_write_N2K4[1].s7 = as_uchar(ACTIVATION(convert_char(round(((float)(outvec3.s3) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N)); /*K= lane_id + 24,N=3*/
+
+#endif
+
+// mapping to clDNN
+#define _MMAD_4x8(C, A, B) MMAD_4x8(A, B, C)
+#define _OD OUTPUT_FEATURE_NUM
+#define _OW OUTPUT_SIZE_X
+#define _OH OUTPUT_SIZE_Y
+#define OWPAD (OUTPUT_PAD_BEFORE_SIZE_X + OUTPUT_PAD_AFTER_SIZE_X)
+#define OHPAD (OUTPUT_PAD_BEFORE_SIZE_Y + OUTPUT_PAD_AFTER_SIZE_Y)
+#define _IH INPUT0_SIZE_Y
+#define _IW INPUT0_SIZE_X
+#define _ID INPUT0_FEATURE_NUM
+#define K_HEIGHT FILTER_SIZE_Y
+#define K_WIDTH FILTER_SIZE_X
+#define BATCH_SIZE OUTPUT_BATCH_NUM
+
+#define IHPAD (INPUT0_PAD_BEFORE_SIZE_Y + INPUT0_PAD_AFTER_SIZE_Y)
+#define IWPAD (INPUT0_PAD_BEFORE_SIZE_X + INPUT0_PAD_AFTER_SIZE_X)
+#define K_STRIDE STRIDE_SIZE_X
+// end of mapping
+
+// for now kernel stride is square
+#define K_WSTRIDE K_STRIDE
+#define K_HSTRIDE K_STRIDE
+
+#define PACK 32
+#define BATCH_PACK 4
+
+__attribute__((intel_reqd_sub_group_size(8)))
+KERNEL(convolution_mmad_slm_2x14_rep4)(
+__global int8 *inputs,
+__global uchar* outputs,
+__global int8* weights,
+#if BIAS_TERM
+    __global BIAS_TYPE* biases,
+#endif
+#if QUANTIZATION_TERM
+    const __global float* quantizations,
+#endif
+#if CALIBRATION_TERM
+    const __global float* calibrations,
+#endif
+    uint split_idx
+)
+{
+	const uint TILE_H = OUT_BLOCK_HEIGHT*LOCAL_SIZE_Z;
+	const uint TILE_W = OUT_BLOCK_WIDTH*LOCAL_SIZE_Y;
+
+	ushort fmg     = get_group_id(0);   // Output Depth
+	ushort group_y = get_group_id(1);   // Output Width
+	ushort group_z = get_group_id(2);   // Output Height
+
+	/* 16,1,8 WG , SIMD8 - 16 HW threads in a WG
+	threads 0-1 : ( lid_x:0-15,lid_y:0,lid_z:0)
+	threads 2-3 : ( lid_x:0-15,lid_y:0,lid_z:1)
+	..
+	threads 12-13: ( lid_x:0-15, lid_y:0,lid_z:6)
+	threads 14-15: ( lid_x:0-15, lid_y:0,lid_z:7)
+	*/
+
+	/* Thread, local IDs */
+	ushort thread_id 		= get_sub_group_id();
+	ushort threadid_mod_2   = thread_id % 2;
+	ushort threadid_mod_8   = thread_id % 8;
+
+	ushort lid_x    = get_local_id(0);
+	ushort lid_z    = get_local_id(2);
+
+	uchar  lane_id  = get_sub_group_local_id();
+
+	/* 32-bit signed accumulator , 112 output registers for 1Px7Qx4Nx32K output tile size
+	   Will be converted to 8-bits before final write */
+
+	int4  out_07 [ OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH ]   = {0}; // For output channels 0-7
+	int4  out_815[ OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH ]   = {0}; // For output channels 8-15
+	int4  out_1623[ OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH ]  = {0}; // For output channels 16-23
+	int4  out_2431[ OUT_BLOCK_HEIGHT * OUT_BLOCK_WIDTH ]  = {0}; // For output channels 24-31
+
+	/* Account for batching */
+
+	ushort batch 	= ( fmg*LOCAL_SIZE_X*4 ) /_OD; // Each thread processing 32 output_channels and each fmg processing 64 output channels , LOCAL_SIZE_X is only 16
+
+	// Size calculated for int8 elements
+	uint input_size = (_IH + IHPAD) * (_IW + IWPAD) * BATCH_PACK ;
+
+	uint in_addr_offset = batch*input_size;
+
+	/* Goto activation tile for work group, offset is w.r.t int8 array */
+
+	uint groupy_tile = TILE_W*group_y;
+	uint groupz_tile = TILE_H*group_z;
+
+    in_addr_offset += (groupz_tile * K_STRIDE) * (_IW + IWPAD) * BATCH_PACK + (groupy_tile * K_STRIDE) * BATCH_PACK;
+
+	 	/* SLM space for Activation, Weights
+	       ( 16,1,8 ) Workgroup - 7 tiles along Y direction and 64 different output channels
+		    2 threads used to load global memory
+	        Activation - 9Hx9Wx4Nx32C Weights -3Rx3Sx64Kx32C	*/
+
+	__local int8 act_slm      [  9*9*4 ];
+	__local int8 weight_slm   [  9*64  ];
+
+   /* 9Hx9Wx4Nx32C activation tile written into SLM.  Distribute among 14 threads in Workgroup
+	   threads 0-1 write 9x4x32 of  H=0, W=0...8
+	   threads 2-3 write 9x4x32 of H=1, W=0...8
+	   threads 4-5 write 9x4x32 of H=2, W=0...8
+	   threads 6-7  write 9x4x32 of H=3, W=0...8
+	   threads 8-9 write 9x4x32 of H=4, W=0...8
+	   threads 10-11 write 9x4x32 of H=5,W=0...8
+	   threads 12-13 write 9x4x32 of H=6,W=0...8
+	   threads 14 write 9x4x32 of H=7,W=0...8
+	   threads 15 write 9x4x32 of H=8,W=0...8 */
+
+	/* Goto activation tile for thread in group */
+
+	uint row_offset   =  thread_id / 2;
+
+	if ( thread_id >= 14 )
+    {
+        row_offset = 7;
+	}
+
+	// In addr offset for the particular thread
+	in_addr_offset    += row_offset * K_STRIDE * (_IW + IWPAD ) * BATCH_PACK ;
+
+   /* Activation SLM indices */
+    uint act_slm_write =  row_offset * ( TILE_W + 2) * BATCH_PACK;
+	uint act_slm_read  =  OUT_BLOCK_HEIGHT * lid_z * ( TILE_W + 2) * BATCH_PACK ;
+
+	/* 9RSx64Kx32C Weight Block in SLM
+	   thread0 handles ( reads from global ) w(0,0),w(0,1),w(0,2) of K=0,1 ( k=0..15 )
+	   thread1 handles w(0,0),w(0,1),w(0,2) of K=2,3 ( k=16..31)
+	   thread2 handles w(1,0),w(1,1) of K=0,1 ( k=0..15)
+	   thread3 handles w(1,0),w(1,1) of K=2,3 ( k=16..31)
+	   thread4 handles w(1,2),w(2,0) of K=0,1 ( k=0..15)
+	   thread5 handles w(1,2),w(2,0) of K=2,3 ( k=16..31)
+	   thread6 handles w(2,1),w(2,2) of K=0,1 ( k=0..15)
+	   thread7 handles w(2,1),w(2,2) of K=2,3 ( k=16..31)
+
+	   Similarly threads8-15 handles for K=4,5,6,7
+
+	   Weight Layout in SLM
+
+	   w(R=0,S=0,k=0..7,C=0..15),w(R=0,S=0,k=32..39,C=0..15)
+	   w(R=0,S=0,k=0..7,C=16..31),w(R=0,S=0,k=32..39,C=16..31)
+
+	   Above interleaving present to avoid SLM Bank conflicts when fused threads read from SLM
+	   Thread0 will read k=0..31, thread1 will read k=32..63
+
+	   First all output channels are present in SLM, then next weight pixel is present in SLM */
+
+	 #define NUM_FILTERS (K_HEIGHT * K_WIDTH)
+
+	 uint output_depth    = fmg % ( _OD / ( LOCAL_SIZE_X * 4 ) ); //LOCAL_SIZE_X=16, 64 output channels used
+
+	 uint weight_size_CRS =  ( _ID / PACK ) * NUM_FILTERS * 8; //8 output channels packed inside
+
+	 // Global weight addr for workgroup
+	 uint weight_global_addr_offset =  output_depth * 8 * weight_size_CRS ; //64 output channels per workgroup
+
+	 /* Global weight address for thread */
+
+	 // Goto appropriate output channel in weights
+	 uint weight_global_channel_offset = threadid_mod_2 * 2 * weight_size_CRS ;
+
+	uint slm_channel_offset     = threadid_mod_2;
+	uint bc_fused_thread_offset = 0;
+
+	 if ( thread_id >= 8 )
+    {
+		bc_fused_thread_offset =  1;
+
+		weight_global_channel_offset =  4 * weight_size_CRS + slm_channel_offset * weight_size_CRS * 2 ;
+    }
+
+	 // Goto appropriate pixel in weights
+
+	 uint weight_global_pixel_offset = 0;
+	 uint slm_pixel_offset = 0;
+
+    if ( threadid_mod_8 >=2  )
+    {
+	 /* First three pixels handled by threads 0-1, then 2 pixels handled by two threads */
+
+		weight_global_pixel_offset = 3*8 +  ( ( (threadid_mod_8/2) - 1 )*2*8 );
+		slm_pixel_offset 		   = 3*64 + ( ( (threadid_mod_8/2) - 1 )*2*64 );
+    }
+
+    weight_global_addr_offset += weight_global_channel_offset + weight_global_pixel_offset;
+
+	 /* Weight slm write index */
+
+	 uint slm_write_weight = slm_pixel_offset + slm_channel_offset * 32 + bc_fused_thread_offset * 4;
+
+	 /* Weight slm read index */
+
+	 /* Thread 0  reads output channels 0-15, thread 1 handles output channels 16-31, data present in interleaved
+	    manner in SLM
+		Data layout in SLM
+
+		w(0,0) C=0..7, K = 0..7 | w(0,0) C=0..7, K = 32..39
+		w(0,0) C=8..15,K=0..7   | w(0,0) C=8..15,K = 32..39
+		w(0,0) C=0..7, K=8..15  | w(0,0) C=0..7, K = 40..47
+		w(0,0) C=8..15,K=8..15  | w(0,0) C=8..15,K=  40..47
+
+		*/
+    uint wt_slm_rd_offset = threadid_mod_2*4;
+
+	int kd;
+
+	__attribute__((opencl_unroll_hint(1)))
+	for(kd = 0; kd <  ( _ID / PACK ) ; kd++)
+	{
+		{
+			/* Load Activation from global to SLM */
+
+			int in_addr = kd * (_IH + IHPAD) * (_IW + IWPAD) * BATCH_SIZE + in_addr_offset;
+
+			__global uint *activation_tile = (__global uint*)&inputs[ in_addr ];
+
+			__local uint *act_slm_ptr   = (__local uint *) &act_slm [ act_slm_write  ];
+
+			/* The odd thread in fused pair will start from next 4x8 block */
+
+			activation_tile += threadid_mod_2*4*8;
+			act_slm_ptr 	+= threadid_mod_2*4*8;
+
+			int4 act_col_0 =  as_int4( intel_sub_group_block_read4(activation_tile) );//col 0
+			int4 act_col_1 =  as_int4( intel_sub_group_block_read4(activation_tile + 8*8) );//col 2
+			int4 act_col_2 =  as_int4( intel_sub_group_block_read4(activation_tile + 2*8*8) );//col 4
+			int4 act_col_3 =  as_int4( intel_sub_group_block_read4(activation_tile + 3*8*8) );//col 6
+
+			SLM_BLOCK_WRITE_4 ( act_slm_ptr , as_uint4 ( act_col_0 ) );
+			SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 8*8 ) , as_uint4 ( act_col_1 ) );
+			SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 2*8*8 ) , as_uint4 ( act_col_2 ) );
+			SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 3*8*8 ) , as_uint4 ( act_col_3 ) );
+
+			if ( threadid_mod_2  == 0 )
+            {
+				int4 act_col_4 =  as_int4( intel_sub_group_block_read4(activation_tile + 4*8*8) );
+
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 4*8*8 ) , as_uint4 ( act_col_4 ) );
+			}
+
+			if ( thread_id >=14)
+            {
+				activation_tile  = activation_tile + 1 * (_IW + IWPAD ) * BATCH_PACK * 8;
+				act_slm_ptr 	 = act_slm_ptr + (TILE_W + 2)  * BATCH_PACK *8;
+
+				int4 act_col_9 =  as_int4( intel_sub_group_block_read4(activation_tile) );
+				int4 act_col_10 =  as_int4( intel_sub_group_block_read4(activation_tile + 8*8) );
+				int4 act_col_11 =  as_int4( intel_sub_group_block_read4(activation_tile + 2*8*8) );
+				int4 act_col_12 =  as_int4( intel_sub_group_block_read4(activation_tile + 3*8*8) );
+
+				SLM_BLOCK_WRITE_4 ( act_slm_ptr  , as_uint4 ( act_col_9 ) );
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 8*8 )   , as_uint4 ( act_col_10 ) );
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 2*8*8 ) , as_uint4 ( act_col_11 ) );
+				SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 3*8*8 ) , as_uint4 ( act_col_12 ) );
+
+				if ( threadid_mod_2  == 0 )
+                {
+					int4 act_col_13 =  as_int4( intel_sub_group_block_read4(activation_tile + 4*8*8) );
+
+					SLM_BLOCK_WRITE_4 ( ( act_slm_ptr + 4*8*8 ) , as_uint4 ( act_col_13 ) );
+				}
+			}
+
+		/* load weights from global to weight_slm */
+
+			int weight_addr = kd * NUM_FILTERS * 8 + weight_global_addr_offset;
+
+			__global uint *weight_tile   = (__global uint*)&weights    [ weight_addr ];
+			__local  uint *wt_slm_ptr    = (__local uint *)&weight_slm [ slm_write_weight  ];
+
+			__global uint *weight_tile_2   = weight_tile;
+			__local uint *wt_slm_ptr_2     = wt_slm_ptr;
+
+			int4 w0 = as_int4 ( intel_sub_group_block_read4( weight_tile ) );	// Pixel1 K=0..7 C=0..15
+			int4 w1 = as_int4 ( intel_sub_group_block_read4( weight_tile + 4*8 ) );	// Pixel1 K=0..7 C=16..31
+			int4 w2 = as_int4 ( intel_sub_group_block_read4( weight_tile + 8*8 ) );	// Pixel2 K=0..7 C=0..15
+			int4 w3 = as_int4 ( intel_sub_group_block_read4( weight_tile + 12*8 ) );// Pixel2 K=0..7 C=16..31
+
+			// Goto next output channel
+			weight_tile += weight_size_CRS*8;
+
+			int4 w4 = as_int4 ( intel_sub_group_block_read4( weight_tile ) );	// Pixel1 K=8..15 C=0..15
+			int4 w5 = as_int4 ( intel_sub_group_block_read4( weight_tile + 4*8 ) );	// Pixel1 K=8..15 C=16..31
+			int4 w6 = as_int4 ( intel_sub_group_block_read4( weight_tile + 8*8 ) );	// Pixel2 K=8..15 C=0..15
+			int4 w7 = as_int4 ( intel_sub_group_block_read4( weight_tile + 12*8 ) );// Pixel2 K=8..15 C=16..31
+
+			SLM_BLOCK_WRITE_4 ( wt_slm_ptr, as_uint4 ( w0 ) );
+			SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr + 8*8 ) , as_uint4 ( w1 ) );
+			SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr + 64*8 ), as_uint4 ( w2 ) );
+			SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr + 64*8 + 8*8 ), as_uint4 ( w3 ) );
+
+			wt_slm_ptr  += 16*8;
+
+			SLM_BLOCK_WRITE_4 ( wt_slm_ptr , as_uint4 ( w4 ) );
+			SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr + 8*8 )   , as_uint4 ( w5 ) );
+			SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr + 64*8 ) , as_uint4 ( w6 ) );
+			SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr + 64*8 + 8*8 ) , as_uint4 ( w7 ) );
+
+		   if( threadid_mod_8 < 2 )
+           {
+				// Goto next pixel
+				weight_tile_2 += 16*8;
+				wt_slm_ptr_2  += 2*64*8;
+
+				int4 w0 = as_int4 ( intel_sub_group_block_read4( weight_tile_2 ) );	// Pixel1 K=0..7 C=0..15
+				int4 w1 = as_int4 ( intel_sub_group_block_read4( weight_tile_2 + 4*8 ) );	// Pixel1 K=0..7 C=16..31
+
+				// Goto next output channel
+				weight_tile_2 += weight_size_CRS*8;
+
+				int4 w4 = as_int4 ( intel_sub_group_block_read4( weight_tile_2 ) );	// Pixel1 K=8..15 C=0..15
+				int4 w5 = as_int4 ( intel_sub_group_block_read4( weight_tile_2 + 4*8 ) );	// Pixel1 K=8..15 C=16..31
+
+				SLM_BLOCK_WRITE_4 ( wt_slm_ptr_2, as_uint4 ( w0 ) );
+				SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr_2 + 8*8 ) , as_uint4 ( w1 ) );
+
+				wt_slm_ptr_2  += 16*8;
+
+				SLM_BLOCK_WRITE_4 ( wt_slm_ptr_2 , as_uint4 ( w4 ) );
+				SLM_BLOCK_WRITE_4 ( ( wt_slm_ptr_2 + 8*8 )   , as_uint4 ( w5 ) );
+			}
+	}
+
+		// Synchronize SLM writes across workgroup
+		 barrier(CLK_LOCAL_MEM_FENCE);
+
+		if ( lid_z <= 6 )
+        {
+			uint wt_slm_rd = wt_slm_rd_offset;
+
+			__local uint *slm_ptr0     = (__local uint *) &act_slm[ act_slm_read ];
+			__local uint *slm_ptr1     = (__local uint *) &weight_slm[ wt_slm_rd ];
+
+			/* balancing load of weights, activations   */
+			int8 weights_reg[3]; //24 registers
+			int4 act_reg[18];    //72 registers
+			uint slm_read_pixel_offset = 64*8;
+
+			/**********************************************************************************************************
+			  First phase - multiply first row of weights  and 1st row of activations
+			***********************************************************************************************************/
+
+	                 /* Load weights from SLM into registers - row0, output channels 0..7  */
+
+				{
+					 	__local uint *slm_ptrw0  = slm_ptr1;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+						slm_ptrw0   			 += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+						slm_ptrw0   			 += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+				}
+
+			/* load 1Hx9Wx4N inputs, Activation row0   */
+
+				__attribute__((opencl_unroll_hint(9)))
+				for (int ic = 0; ic < 9; ic++)
+				{
+	                 /* Load activations from SLM into registers  */
+
+					 uint slm_offset = ic * BATCH_PACK * 8 ;
+
+    				 act_reg [ ic ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset)) ;
+				}
+
+			/* Convolve */
+
+			   /* order the mmad instructions to minimize dependency on src0,dst - also try to maximise reuse of weights-reg*/
+
+				/*  Output channels 0-7 */
+
+				out_07[ 0 ] = _MMAD_4x8 ( out_07[ 0 ], act_reg[0], weights_reg[0] );
+				out_07[ 1 ] = _MMAD_4x8 ( out_07[ 1 ], act_reg[1], weights_reg[0] );
+				out_07[ 2 ] = _MMAD_4x8 ( out_07[ 2 ], act_reg[2], weights_reg[0] );
+				out_07[ 3 ] = _MMAD_4x8 ( out_07[ 3 ], act_reg[3], weights_reg[0] );
+				out_07[ 4 ] = _MMAD_4x8 ( out_07[ 4 ], act_reg[4], weights_reg[0] );
+				out_07[ 5 ] = _MMAD_4x8 ( out_07[ 5 ], act_reg[5], weights_reg[0] );
+				out_07[ 6 ] = _MMAD_4x8 ( out_07[ 6 ], act_reg[6], weights_reg[0] );
+
+				out_07[ 0 ] = _MMAD_4x8 ( out_07[ 0 ], act_reg[1], weights_reg[1] );
+				out_07[ 1 ] = _MMAD_4x8 ( out_07[ 1 ], act_reg[2], weights_reg[1] );
+				out_07[ 2 ] = _MMAD_4x8 ( out_07[ 2 ], act_reg[3], weights_reg[1] );
+				out_07[ 3 ] = _MMAD_4x8 ( out_07[ 3 ], act_reg[4], weights_reg[1] );
+				out_07[ 4 ] = _MMAD_4x8 ( out_07[ 4 ], act_reg[5], weights_reg[1] );
+				out_07[ 5 ] = _MMAD_4x8 ( out_07[ 5 ], act_reg[6], weights_reg[1] );
+				out_07[ 6 ] = _MMAD_4x8 ( out_07[ 6 ], act_reg[7], weights_reg[1] );
+
+				out_07[ 0 ] = _MMAD_4x8 ( out_07[ 0 ], act_reg[2], weights_reg[2] );
+				out_07[ 1 ] = _MMAD_4x8 ( out_07[ 1 ], act_reg[3], weights_reg[2] );
+				out_07[ 2 ] = _MMAD_4x8 ( out_07[ 2 ], act_reg[4], weights_reg[2] );
+				out_07[ 3 ] = _MMAD_4x8 ( out_07[ 3 ], act_reg[5], weights_reg[2] );
+				out_07[ 4 ] = _MMAD_4x8 ( out_07[ 4 ], act_reg[6], weights_reg[2] );
+				out_07[ 5 ] = _MMAD_4x8 ( out_07[ 5 ], act_reg[7], weights_reg[2] );
+				out_07[ 6 ] = _MMAD_4x8 ( out_07[ 6 ], act_reg[8], weights_reg[2] );
+
+		     /* Load weights from SLM into registers - row0, output channels 8..15  */
+
+				{
+					 	__local uint *slm_ptrw0 = slm_ptr1 + 2*8*8;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+						slm_ptrw0   			 += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+						slm_ptrw0   			 += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+				}
+
+				out_815[ 0 ] = _MMAD_4x8 ( out_815[ 0 ], act_reg[0], weights_reg[0] );
+				out_815[ 1 ] = _MMAD_4x8 ( out_815[ 1 ], act_reg[1], weights_reg[0] );
+				out_815[ 2 ] = _MMAD_4x8 ( out_815[ 2 ], act_reg[2], weights_reg[0] );
+				out_815[ 3 ] = _MMAD_4x8 ( out_815[ 3 ], act_reg[3], weights_reg[0] );
+				out_815[ 4 ] = _MMAD_4x8 ( out_815[ 4 ], act_reg[4], weights_reg[0] );
+				out_815[ 5 ] = _MMAD_4x8 ( out_815[ 5 ], act_reg[5], weights_reg[0] );
+				out_815[ 6 ] = _MMAD_4x8 ( out_815[ 6 ], act_reg[6], weights_reg[0] );
+
+				out_815[ 0 ] = _MMAD_4x8 ( out_815[ 0 ], act_reg[1], weights_reg[1] );
+				out_815[ 1 ] = _MMAD_4x8 ( out_815[ 1 ], act_reg[2], weights_reg[1] );
+				out_815[ 2 ] = _MMAD_4x8 ( out_815[ 2 ], act_reg[3], weights_reg[1] );
+				out_815[ 3 ] = _MMAD_4x8 ( out_815[ 3 ], act_reg[4], weights_reg[1] );
+				out_815[ 4 ] = _MMAD_4x8 ( out_815[ 4 ], act_reg[5], weights_reg[1] );
+				out_815[ 5 ] = _MMAD_4x8 ( out_815[ 5 ], act_reg[6], weights_reg[1] );
+				out_815[ 6 ] = _MMAD_4x8 ( out_815[ 6 ], act_reg[7], weights_reg[1] );
+
+				out_815[ 0 ] = _MMAD_4x8 ( out_815[ 0 ], act_reg[2], weights_reg[2] );
+				out_815[ 1 ] = _MMAD_4x8 ( out_815[ 1 ], act_reg[3], weights_reg[2] );
+				out_815[ 2 ] = _MMAD_4x8 ( out_815[ 2 ], act_reg[4], weights_reg[2] );
+				out_815[ 3 ] = _MMAD_4x8 ( out_815[ 3 ], act_reg[5], weights_reg[2] );
+				out_815[ 4 ] = _MMAD_4x8 ( out_815[ 4 ], act_reg[6], weights_reg[2] );
+				out_815[ 5 ] = _MMAD_4x8 ( out_815[ 5 ], act_reg[7], weights_reg[2] );
+				out_815[ 6 ] = _MMAD_4x8 ( out_815[ 6 ], act_reg[8], weights_reg[2] );
+
+				/* Load weights from SLM into registers - row0, output channels 16..23  */
+				{
+					 	__local uint *slm_ptrw0 = slm_ptr1 + 4*8*8;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+						slm_ptrw0   			 += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+						slm_ptrw0   			 += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+				}
+
+				out_1623[ 0 ] = _MMAD_4x8 ( out_1623[ 0 ], act_reg[0], weights_reg[0] );
+				out_1623[ 1 ] = _MMAD_4x8 ( out_1623[ 1 ], act_reg[1], weights_reg[0] );
+				out_1623[ 2 ] = _MMAD_4x8 ( out_1623[ 2 ], act_reg[2], weights_reg[0] );
+				out_1623[ 3 ] = _MMAD_4x8 ( out_1623[ 3 ], act_reg[3], weights_reg[0] );
+				out_1623[ 4 ] = _MMAD_4x8 ( out_1623[ 4 ], act_reg[4], weights_reg[0] );
+				out_1623[ 5 ] = _MMAD_4x8 ( out_1623[ 5 ], act_reg[5], weights_reg[0] );
+				out_1623[ 6 ] = _MMAD_4x8 ( out_1623[ 6 ], act_reg[6], weights_reg[0] );
+
+				/* load 1Hx9Wx4N inputs, Activation row1   */
+
+				uint slm_row_offset_2 	  = 1*(TILE_W + 2)*BATCH_PACK*8;
+
+				__attribute__((opencl_unroll_hint(9)))
+				for (int ic = 0; ic < 9; ic++)
+				{
+	                 /* Load activations from SLM into registers  */
+
+					 uint slm_offset = slm_row_offset_2 + ic * BATCH_PACK * 8 ;
+
+    				 act_reg [ ic + 9 ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset)) ;
+				}
+
+				out_1623[ 0 ] = _MMAD_4x8 ( out_1623[ 0 ], act_reg[1], weights_reg[1] );
+				out_1623[ 1 ] = _MMAD_4x8 ( out_1623[ 1 ], act_reg[2], weights_reg[1] );
+				out_1623[ 2 ] = _MMAD_4x8 ( out_1623[ 2 ], act_reg[3], weights_reg[1] );
+				out_1623[ 3 ] = _MMAD_4x8 ( out_1623[ 3 ], act_reg[4], weights_reg[1] );
+				out_1623[ 4 ] = _MMAD_4x8 ( out_1623[ 4 ], act_reg[5], weights_reg[1] );
+				out_1623[ 5 ] = _MMAD_4x8 ( out_1623[ 5 ], act_reg[6], weights_reg[1] );
+				out_1623[ 6 ] = _MMAD_4x8 ( out_1623[ 6 ], act_reg[7], weights_reg[1] );
+
+				out_1623[ 0 ] = _MMAD_4x8 ( out_1623[ 0 ], act_reg[2], weights_reg[2] );
+				out_1623[ 1 ] = _MMAD_4x8 ( out_1623[ 1 ], act_reg[3], weights_reg[2] );
+				out_1623[ 2 ] = _MMAD_4x8 ( out_1623[ 2 ], act_reg[4], weights_reg[2] );
+				out_1623[ 3 ] = _MMAD_4x8 ( out_1623[ 3 ], act_reg[5], weights_reg[2] );
+				out_1623[ 4 ] = _MMAD_4x8 ( out_1623[ 4 ], act_reg[6], weights_reg[2] );
+				out_1623[ 5 ] = _MMAD_4x8 ( out_1623[ 5 ], act_reg[7], weights_reg[2] );
+				out_1623[ 6 ] = _MMAD_4x8 ( out_1623[ 6 ], act_reg[8], weights_reg[2] );
+
+				/* Load weights from SLM into registers - row0, output channels 24..31  */
+				{
+					 	__local uint *slm_ptrw0 = slm_ptr1 + 6*8*8;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+						slm_ptrw0   			 += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+						slm_ptrw0   			 += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw0 + 64 ) );
+				}
+
+				out_2431[ 0 ] = _MMAD_4x8 ( out_2431[ 0 ], act_reg[0], weights_reg[0] );
+				out_2431[ 1 ] = _MMAD_4x8 ( out_2431[ 1 ], act_reg[1], weights_reg[0] );
+				out_2431[ 2 ] = _MMAD_4x8 ( out_2431[ 2 ], act_reg[2], weights_reg[0] );
+				out_2431[ 3 ] = _MMAD_4x8 ( out_2431[ 3 ], act_reg[3], weights_reg[0] );
+				out_2431[ 4 ] = _MMAD_4x8 ( out_2431[ 4 ], act_reg[4], weights_reg[0] );
+				out_2431[ 5 ] = _MMAD_4x8 ( out_2431[ 5 ], act_reg[5], weights_reg[0] );
+				out_2431[ 6 ] = _MMAD_4x8 ( out_2431[ 6 ], act_reg[6], weights_reg[0] );
+
+				out_2431[ 0 ] = _MMAD_4x8 ( out_2431[ 0 ], act_reg[1], weights_reg[1] );
+				out_2431[ 1 ] = _MMAD_4x8 ( out_2431[ 1 ], act_reg[2], weights_reg[1] );
+				out_2431[ 2 ] = _MMAD_4x8 ( out_2431[ 2 ], act_reg[3], weights_reg[1] );
+				out_2431[ 3 ] = _MMAD_4x8 ( out_2431[ 3 ], act_reg[4], weights_reg[1] );
+				out_2431[ 4 ] = _MMAD_4x8 ( out_2431[ 4 ], act_reg[5], weights_reg[1] );
+				out_2431[ 5 ] = _MMAD_4x8 ( out_2431[ 5 ], act_reg[6], weights_reg[1] );
+				out_2431[ 6 ] = _MMAD_4x8 ( out_2431[ 6 ], act_reg[7], weights_reg[1] );
+
+				out_2431[ 0 ] = _MMAD_4x8 ( out_2431[ 0 ], act_reg[2], weights_reg[2] );
+				out_2431[ 1 ] = _MMAD_4x8 ( out_2431[ 1 ], act_reg[3], weights_reg[2] );
+				out_2431[ 2 ] = _MMAD_4x8 ( out_2431[ 2 ], act_reg[4], weights_reg[2] );
+				out_2431[ 3 ] = _MMAD_4x8 ( out_2431[ 3 ], act_reg[5], weights_reg[2] );
+				out_2431[ 4 ] = _MMAD_4x8 ( out_2431[ 4 ], act_reg[6], weights_reg[2] );
+				out_2431[ 5 ] = _MMAD_4x8 ( out_2431[ 5 ], act_reg[7], weights_reg[2] );
+				out_2431[ 6 ] = _MMAD_4x8 ( out_2431[ 6 ], act_reg[8], weights_reg[2] );
+
+			/**********************************************************************************************************
+			  Second phase - multiply second row of weights  and second row of activations
+			***********************************************************************************************************/
+
+			 /* Load weights from SLM into registers - row1, output channels 0..7  */
+				{
+					 	__local uint *slm_ptrw1  = slm_ptr1 + 3*slm_read_pixel_offset;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+						slm_ptrw1   			 += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+						slm_ptrw1  			     += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+				}
+
+				out_07[ 0 ] = _MMAD_4x8 ( out_07[ 0 ], act_reg[9], weights_reg[0] );
+				out_07[ 1 ] = _MMAD_4x8 ( out_07[ 1 ], act_reg[10], weights_reg[0] );
+				out_07[ 2 ] = _MMAD_4x8 ( out_07[ 2 ], act_reg[11], weights_reg[0] );
+				out_07[ 3 ] = _MMAD_4x8 ( out_07[ 3 ], act_reg[12], weights_reg[0] );
+				out_07[ 4 ] = _MMAD_4x8 ( out_07[ 4 ], act_reg[13], weights_reg[0] );
+				out_07[ 5 ] = _MMAD_4x8 ( out_07[ 5 ], act_reg[14], weights_reg[0] );
+				out_07[ 6 ] = _MMAD_4x8 ( out_07[ 6 ], act_reg[15], weights_reg[0] );
+
+				out_07[ 0 ] = _MMAD_4x8 ( out_07[ 0 ], act_reg[10], weights_reg[1] );
+				out_07[ 1 ] = _MMAD_4x8 ( out_07[ 1 ], act_reg[11], weights_reg[1] );
+				out_07[ 2 ] = _MMAD_4x8 ( out_07[ 2 ], act_reg[12], weights_reg[1] );
+				out_07[ 3 ] = _MMAD_4x8 ( out_07[ 3 ], act_reg[13], weights_reg[1] );
+				out_07[ 4 ] = _MMAD_4x8 ( out_07[ 4 ], act_reg[14], weights_reg[1] );
+				out_07[ 5 ] = _MMAD_4x8 ( out_07[ 5 ], act_reg[15], weights_reg[1] );
+				out_07[ 6 ] = _MMAD_4x8 ( out_07[ 6 ], act_reg[16], weights_reg[1] );
+
+				out_07[ 0 ] = _MMAD_4x8 ( out_07[ 0 ], act_reg[11], weights_reg[2] );
+				out_07[ 1 ] = _MMAD_4x8 ( out_07[ 1 ], act_reg[12], weights_reg[2] );
+				out_07[ 2 ] = _MMAD_4x8 ( out_07[ 2 ], act_reg[13], weights_reg[2] );
+				out_07[ 3 ] = _MMAD_4x8 ( out_07[ 3 ], act_reg[14], weights_reg[2] );
+				out_07[ 4 ] = _MMAD_4x8 ( out_07[ 4 ], act_reg[15], weights_reg[2] );
+				out_07[ 5 ] = _MMAD_4x8 ( out_07[ 5 ], act_reg[16], weights_reg[2] );
+				out_07[ 6 ] = _MMAD_4x8 ( out_07[ 6 ], act_reg[17], weights_reg[2] );
+
+				    /* Load weights from SLM into registers - row1, output channels 8..15  */
+				{
+					 	__local uint *slm_ptrw1 = slm_ptr1 + 3*slm_read_pixel_offset + 2*8*8;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+						slm_ptrw1   			   += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+						slm_ptrw1   			   += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+				}
+
+				out_815[ 0 ] = _MMAD_4x8 ( out_815[ 0 ], act_reg[9], weights_reg[0] );
+				out_815[ 1 ] = _MMAD_4x8 ( out_815[ 1 ], act_reg[10], weights_reg[0] );
+				out_815[ 2 ] = _MMAD_4x8 ( out_815[ 2 ], act_reg[11], weights_reg[0] );
+				out_815[ 3 ] = _MMAD_4x8 ( out_815[ 3 ], act_reg[12], weights_reg[0] );
+				out_815[ 4 ] = _MMAD_4x8 ( out_815[ 4 ], act_reg[13], weights_reg[0] );
+				out_815[ 5 ] = _MMAD_4x8 ( out_815[ 5 ], act_reg[14], weights_reg[0] );
+				out_815[ 6 ] = _MMAD_4x8 ( out_815[ 6 ], act_reg[15], weights_reg[0] );
+
+				out_815[ 0 ] = _MMAD_4x8 ( out_815[ 0 ], act_reg[10], weights_reg[1] );
+				out_815[ 1 ] = _MMAD_4x8 ( out_815[ 1 ], act_reg[11], weights_reg[1] );
+				out_815[ 2 ] = _MMAD_4x8 ( out_815[ 2 ], act_reg[12], weights_reg[1] );
+				out_815[ 3 ] = _MMAD_4x8 ( out_815[ 3 ], act_reg[13], weights_reg[1] );
+				out_815[ 4 ] = _MMAD_4x8 ( out_815[ 4 ], act_reg[14], weights_reg[1] );
+				out_815[ 5 ] = _MMAD_4x8 ( out_815[ 5 ], act_reg[15], weights_reg[1] );
+				out_815[ 6 ] = _MMAD_4x8 ( out_815[ 6 ], act_reg[16], weights_reg[1] );
+
+				out_815[ 0 ] = _MMAD_4x8 ( out_815[ 0 ], act_reg[11], weights_reg[2] );
+				out_815[ 1 ] = _MMAD_4x8 ( out_815[ 1 ], act_reg[12], weights_reg[2] );
+				out_815[ 2 ] = _MMAD_4x8 ( out_815[ 2 ], act_reg[13], weights_reg[2] );
+				out_815[ 3 ] = _MMAD_4x8 ( out_815[ 3 ], act_reg[14], weights_reg[2] );
+				out_815[ 4 ] = _MMAD_4x8 ( out_815[ 4 ], act_reg[15], weights_reg[2] );
+				out_815[ 5 ] = _MMAD_4x8 ( out_815[ 5 ], act_reg[16], weights_reg[2] );
+				out_815[ 6 ] = _MMAD_4x8 ( out_815[ 6 ], act_reg[17], weights_reg[2] );
+
+				/* Load weights from SLM into registers - row1, output channels 16..23  */
+				{
+					 	__local uint *slm_ptrw1 = slm_ptr1 + 3*slm_read_pixel_offset + 4*8*8;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+						slm_ptrw1   			   += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+						slm_ptrw1   			   += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+				}
+
+				out_1623[ 0 ] = _MMAD_4x8 ( out_1623[ 0 ], act_reg[9], weights_reg[0] );
+				out_1623[ 1 ] = _MMAD_4x8 ( out_1623[ 1 ], act_reg[10], weights_reg[0] );
+				out_1623[ 2 ] = _MMAD_4x8 ( out_1623[ 2 ], act_reg[11], weights_reg[0] );
+				out_1623[ 3 ] = _MMAD_4x8 ( out_1623[ 3 ], act_reg[12], weights_reg[0] );
+				out_1623[ 4 ] = _MMAD_4x8 ( out_1623[ 4 ], act_reg[13], weights_reg[0] );
+				out_1623[ 5 ] = _MMAD_4x8 ( out_1623[ 5 ], act_reg[14], weights_reg[0] );
+				out_1623[ 6 ] = _MMAD_4x8 ( out_1623[ 6 ], act_reg[15], weights_reg[0] );
+
+				/* load 1Hx9Wx4N inputs, Activation row2  */
+
+				uint slm_row_offset_3	  = 2*(TILE_W + 2)*BATCH_PACK*8;
+
+				__attribute__((opencl_unroll_hint(9)))
+				for (int ic = 0; ic < 9; ic++)
+				{
+	                 /* Load activations from SLM into registers  */
+
+					 uint slm_offset = slm_row_offset_3 + ic * BATCH_PACK * 8 ;
+
+    				 act_reg [ ic ] = as_int4 (SLM_BLOCK_READ_4 (slm_ptr0 + slm_offset)) ;
+				}
+
+				out_1623[ 0 ] = _MMAD_4x8 ( out_1623[ 0 ], act_reg[10], weights_reg[1] );
+				out_1623[ 1 ] = _MMAD_4x8 ( out_1623[ 1 ], act_reg[11], weights_reg[1] );
+				out_1623[ 2 ] = _MMAD_4x8 ( out_1623[ 2 ], act_reg[12], weights_reg[1] );
+				out_1623[ 3 ] = _MMAD_4x8 ( out_1623[ 3 ], act_reg[13], weights_reg[1] );
+				out_1623[ 4 ] = _MMAD_4x8 ( out_1623[ 4 ], act_reg[14], weights_reg[1] );
+				out_1623[ 5 ] = _MMAD_4x8 ( out_1623[ 5 ], act_reg[15], weights_reg[1] );
+				out_1623[ 6 ] = _MMAD_4x8 ( out_1623[ 6 ], act_reg[16], weights_reg[1] );
+
+				out_1623[ 0 ] = _MMAD_4x8 ( out_1623[ 0 ], act_reg[11], weights_reg[2] );
+				out_1623[ 1 ] = _MMAD_4x8 ( out_1623[ 1 ], act_reg[12], weights_reg[2] );
+				out_1623[ 2 ] = _MMAD_4x8 ( out_1623[ 2 ], act_reg[13], weights_reg[2] );
+				out_1623[ 3 ] = _MMAD_4x8 ( out_1623[ 3 ], act_reg[14], weights_reg[2] );
+				out_1623[ 4 ] = _MMAD_4x8 ( out_1623[ 4 ], act_reg[15], weights_reg[2] );
+				out_1623[ 5 ] = _MMAD_4x8 ( out_1623[ 5 ], act_reg[16], weights_reg[2] );
+				out_1623[ 6 ] = _MMAD_4x8 ( out_1623[ 6 ], act_reg[17], weights_reg[2] );
+
+				/* Load weights from SLM into registers - row1, output channels 24..31  */
+				{
+					 	__local uint *slm_ptrw1 = slm_ptr1 + 3*slm_read_pixel_offset + 6*8*8;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+						slm_ptrw1   			   += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+						slm_ptrw1   			   += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw1 + 64 ) );
+				}
+
+				out_2431[ 0 ] = _MMAD_4x8 ( out_2431[ 0 ], act_reg[9], weights_reg[0] );
+				out_2431[ 1 ] = _MMAD_4x8 ( out_2431[ 1 ], act_reg[10], weights_reg[0] );
+				out_2431[ 2 ] = _MMAD_4x8 ( out_2431[ 2 ], act_reg[11], weights_reg[0] );
+				out_2431[ 3 ] = _MMAD_4x8 ( out_2431[ 3 ], act_reg[12], weights_reg[0] );
+				out_2431[ 4 ] = _MMAD_4x8 ( out_2431[ 4 ], act_reg[13], weights_reg[0] );
+				out_2431[ 5 ] = _MMAD_4x8 ( out_2431[ 5 ], act_reg[14], weights_reg[0] );
+				out_2431[ 6 ] = _MMAD_4x8 ( out_2431[ 6 ], act_reg[15], weights_reg[0] );
+
+				out_2431[ 0 ] = _MMAD_4x8 ( out_2431[ 0 ], act_reg[10], weights_reg[1] );
+				out_2431[ 1 ] = _MMAD_4x8 ( out_2431[ 1 ], act_reg[11], weights_reg[1] );
+				out_2431[ 2 ] = _MMAD_4x8 ( out_2431[ 2 ], act_reg[12], weights_reg[1] );
+				out_2431[ 3 ] = _MMAD_4x8 ( out_2431[ 3 ], act_reg[13], weights_reg[1] );
+				out_2431[ 4 ] = _MMAD_4x8 ( out_2431[ 4 ], act_reg[14], weights_reg[1] );
+				out_2431[ 5 ] = _MMAD_4x8 ( out_2431[ 5 ], act_reg[15], weights_reg[1] );
+				out_2431[ 6 ] = _MMAD_4x8 ( out_2431[ 6 ], act_reg[16], weights_reg[1] );
+
+				out_2431[ 0 ] = _MMAD_4x8 ( out_2431[ 0 ], act_reg[11], weights_reg[2] );
+				out_2431[ 1 ] = _MMAD_4x8 ( out_2431[ 1 ], act_reg[12], weights_reg[2] );
+				out_2431[ 2 ] = _MMAD_4x8 ( out_2431[ 2 ], act_reg[13], weights_reg[2] );
+				out_2431[ 3 ] = _MMAD_4x8 ( out_2431[ 3 ], act_reg[14], weights_reg[2] );
+				out_2431[ 4 ] = _MMAD_4x8 ( out_2431[ 4 ], act_reg[15], weights_reg[2] );
+				out_2431[ 5 ] = _MMAD_4x8 ( out_2431[ 5 ], act_reg[16], weights_reg[2] );
+				out_2431[ 6 ] = _MMAD_4x8 ( out_2431[ 6 ], act_reg[17], weights_reg[2] );
+
+			/**********************************************************************************************************
+			  Third phase - multiply third row of weights  and third row of activations
+			***********************************************************************************************************/
+
+				 /* Load weights from SLM into registers - row2, output channels 0..7  */
+				{
+					 	__local uint *slm_ptrw2  = slm_ptr1 + 6*slm_read_pixel_offset;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+						slm_ptrw2   			   += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+						slm_ptrw2 			     += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+				}
+
+				out_07[ 0 ] = _MMAD_4x8 ( out_07[ 0 ], act_reg[0], weights_reg[0] );
+				out_07[ 1 ] = _MMAD_4x8 ( out_07[ 1 ], act_reg[1], weights_reg[0] );
+				out_07[ 2 ] = _MMAD_4x8 ( out_07[ 2 ], act_reg[2], weights_reg[0] );
+				out_07[ 3 ] = _MMAD_4x8 ( out_07[ 3 ], act_reg[3], weights_reg[0] );
+				out_07[ 4 ] = _MMAD_4x8 ( out_07[ 4 ], act_reg[4], weights_reg[0] );
+				out_07[ 5 ] = _MMAD_4x8 ( out_07[ 5 ], act_reg[5], weights_reg[0] );
+				out_07[ 6 ] = _MMAD_4x8 ( out_07[ 6 ], act_reg[6], weights_reg[0] );
+
+				out_07[ 0 ] = _MMAD_4x8 ( out_07[ 0 ], act_reg[1], weights_reg[1] );
+				out_07[ 1 ] = _MMAD_4x8 ( out_07[ 1 ], act_reg[2], weights_reg[1] );
+				out_07[ 2 ] = _MMAD_4x8 ( out_07[ 2 ], act_reg[3], weights_reg[1] );
+				out_07[ 3 ] = _MMAD_4x8 ( out_07[ 3 ], act_reg[4], weights_reg[1] );
+				out_07[ 4 ] = _MMAD_4x8 ( out_07[ 4 ], act_reg[5], weights_reg[1] );
+				out_07[ 5 ] = _MMAD_4x8 ( out_07[ 5 ], act_reg[6], weights_reg[1] );
+				out_07[ 6 ] = _MMAD_4x8 ( out_07[ 6 ], act_reg[7], weights_reg[1] );
+
+				out_07[ 0 ] = _MMAD_4x8 ( out_07[ 0 ], act_reg[2], weights_reg[2] );
+				out_07[ 1 ] = _MMAD_4x8 ( out_07[ 1 ], act_reg[3], weights_reg[2] );
+				out_07[ 2 ] = _MMAD_4x8 ( out_07[ 2 ], act_reg[4], weights_reg[2] );
+				out_07[ 3 ] = _MMAD_4x8 ( out_07[ 3 ], act_reg[5], weights_reg[2] );
+				out_07[ 4 ] = _MMAD_4x8 ( out_07[ 4 ], act_reg[6], weights_reg[2] );
+				out_07[ 5 ] = _MMAD_4x8 ( out_07[ 5 ], act_reg[7], weights_reg[2] );
+				out_07[ 6 ] = _MMAD_4x8 ( out_07[ 6 ], act_reg[8], weights_reg[2] );
+
+				     /* Load weights from SLM into registers - row2, output channels 8..15  */
+				{
+					 	__local uint *slm_ptrw2 = slm_ptr1 + 6*slm_read_pixel_offset + 2*8*8;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+						slm_ptrw2   			   += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+						slm_ptrw2   			   += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+				}
+
+				out_815[ 0 ] = _MMAD_4x8 ( out_815[ 0 ], act_reg[0], weights_reg[0] );
+				out_815[ 1 ] = _MMAD_4x8 ( out_815[ 1 ], act_reg[1], weights_reg[0] );
+				out_815[ 2 ] = _MMAD_4x8 ( out_815[ 2 ], act_reg[2], weights_reg[0] );
+				out_815[ 3 ] = _MMAD_4x8 ( out_815[ 3 ], act_reg[3], weights_reg[0] );
+				out_815[ 4 ] = _MMAD_4x8 ( out_815[ 4 ], act_reg[4], weights_reg[0] );
+				out_815[ 5 ] = _MMAD_4x8 ( out_815[ 5 ], act_reg[5], weights_reg[0] );
+				out_815[ 6 ] = _MMAD_4x8 ( out_815[ 6 ], act_reg[6], weights_reg[0] );
+
+				out_815[ 0 ] = _MMAD_4x8 ( out_815[ 0 ], act_reg[1], weights_reg[1] );
+				out_815[ 1 ] = _MMAD_4x8 ( out_815[ 1 ], act_reg[2], weights_reg[1] );
+				out_815[ 2 ] = _MMAD_4x8 ( out_815[ 2 ], act_reg[3], weights_reg[1] );
+				out_815[ 3 ] = _MMAD_4x8 ( out_815[ 3 ], act_reg[4], weights_reg[1] );
+				out_815[ 4 ] = _MMAD_4x8 ( out_815[ 4 ], act_reg[5], weights_reg[1] );
+				out_815[ 5 ] = _MMAD_4x8 ( out_815[ 5 ], act_reg[6], weights_reg[1] );
+				out_815[ 6 ] = _MMAD_4x8 ( out_815[ 6 ], act_reg[7], weights_reg[1] );
+
+				out_815[ 0 ] = _MMAD_4x8 ( out_815[ 0 ], act_reg[2], weights_reg[2] );
+				out_815[ 1 ] = _MMAD_4x8 ( out_815[ 1 ], act_reg[3], weights_reg[2] );
+				out_815[ 2 ] = _MMAD_4x8 ( out_815[ 2 ], act_reg[4], weights_reg[2] );
+				out_815[ 3 ] = _MMAD_4x8 ( out_815[ 3 ], act_reg[5], weights_reg[2] );
+				out_815[ 4 ] = _MMAD_4x8 ( out_815[ 4 ], act_reg[6], weights_reg[2] );
+				out_815[ 5 ] = _MMAD_4x8 ( out_815[ 5 ], act_reg[7], weights_reg[2] );
+				out_815[ 6 ] = _MMAD_4x8 ( out_815[ 6 ], act_reg[8], weights_reg[2] );
+
+				/* Load weights from SLM into registers - row2, output channels 16..23  */
+				{
+					 	__local uint *slm_ptrw2 = slm_ptr1 + 6*slm_read_pixel_offset + 4*8*8;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+						slm_ptrw2   			   += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+						slm_ptrw2   			   += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+				}
+
+				out_1623[ 0 ] = _MMAD_4x8 ( out_1623[ 0 ], act_reg[0], weights_reg[0] );
+				out_1623[ 1 ] = _MMAD_4x8 ( out_1623[ 1 ], act_reg[1], weights_reg[0] );
+				out_1623[ 2 ] = _MMAD_4x8 ( out_1623[ 2 ], act_reg[2], weights_reg[0] );
+				out_1623[ 3 ] = _MMAD_4x8 ( out_1623[ 3 ], act_reg[3], weights_reg[0] );
+				out_1623[ 4 ] = _MMAD_4x8 ( out_1623[ 4 ], act_reg[4], weights_reg[0] );
+				out_1623[ 5 ] = _MMAD_4x8 ( out_1623[ 5 ], act_reg[5], weights_reg[0] );
+				out_1623[ 6 ] = _MMAD_4x8 ( out_1623[ 6 ], act_reg[6], weights_reg[0] );
+
+				out_1623[ 0 ] = _MMAD_4x8 ( out_1623[ 0 ], act_reg[1], weights_reg[1] );
+				out_1623[ 1 ] = _MMAD_4x8 ( out_1623[ 1 ], act_reg[2], weights_reg[1] );
+				out_1623[ 2 ] = _MMAD_4x8 ( out_1623[ 2 ], act_reg[3], weights_reg[1] );
+				out_1623[ 3 ] = _MMAD_4x8 ( out_1623[ 3 ], act_reg[4], weights_reg[1] );
+				out_1623[ 4 ] = _MMAD_4x8 ( out_1623[ 4 ], act_reg[5], weights_reg[1] );
+				out_1623[ 5 ] = _MMAD_4x8 ( out_1623[ 5 ], act_reg[6], weights_reg[1] );
+				out_1623[ 6 ] = _MMAD_4x8 ( out_1623[ 6 ], act_reg[7], weights_reg[1] );
+
+				out_1623[ 0 ] = _MMAD_4x8 ( out_1623[ 0 ], act_reg[2], weights_reg[2] );
+				out_1623[ 1 ] = _MMAD_4x8 ( out_1623[ 1 ], act_reg[3], weights_reg[2] );
+				out_1623[ 2 ] = _MMAD_4x8 ( out_1623[ 2 ], act_reg[4], weights_reg[2] );
+				out_1623[ 3 ] = _MMAD_4x8 ( out_1623[ 3 ], act_reg[5], weights_reg[2] );
+				out_1623[ 4 ] = _MMAD_4x8 ( out_1623[ 4 ], act_reg[6], weights_reg[2] );
+				out_1623[ 5 ] = _MMAD_4x8 ( out_1623[ 5 ], act_reg[7], weights_reg[2] );
+				out_1623[ 6 ] = _MMAD_4x8 ( out_1623[ 6 ], act_reg[8], weights_reg[2] );
+
+				/* Load weights from SLM into registers - row3, output channels 24..31  */
+				{
+					 	__local uint *slm_ptrw2 = slm_ptr1 + 6*slm_read_pixel_offset + 6*8*8;
+
+					    weights_reg[0].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[0].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+						slm_ptrw2   			   += slm_read_pixel_offset;
+
+						weights_reg[1].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[1].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+						slm_ptrw2   			   += slm_read_pixel_offset;
+
+						weights_reg[2].s0123     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 ) );
+					    weights_reg[2].s4567     = as_int4 ( SLM_BLOCK_READ_4 ( slm_ptrw2 + 64 ) );
+				}
+
+				out_2431[ 0 ] = _MMAD_4x8 ( out_2431[ 0 ], act_reg[0], weights_reg[0] );
+				out_2431[ 1 ] = _MMAD_4x8 ( out_2431[ 1 ], act_reg[1], weights_reg[0] );
+				out_2431[ 2 ] = _MMAD_4x8 ( out_2431[ 2 ], act_reg[2], weights_reg[0] );
+				out_2431[ 3 ] = _MMAD_4x8 ( out_2431[ 3 ], act_reg[3], weights_reg[0] );
+				out_2431[ 4 ] = _MMAD_4x8 ( out_2431[ 4 ], act_reg[4], weights_reg[0] );
+				out_2431[ 5 ] = _MMAD_4x8 ( out_2431[ 5 ], act_reg[5], weights_reg[0] );
+				out_2431[ 6 ] = _MMAD_4x8 ( out_2431[ 6 ], act_reg[6], weights_reg[0] );
+
+				out_2431[ 0 ] = _MMAD_4x8 ( out_2431[ 0 ], act_reg[1], weights_reg[1] );
+				out_2431[ 1 ] = _MMAD_4x8 ( out_2431[ 1 ], act_reg[2], weights_reg[1] );
+				out_2431[ 2 ] = _MMAD_4x8 ( out_2431[ 2 ], act_reg[3], weights_reg[1] );
+				out_2431[ 3 ] = _MMAD_4x8 ( out_2431[ 3 ], act_reg[4], weights_reg[1] );
+				out_2431[ 4 ] = _MMAD_4x8 ( out_2431[ 4 ], act_reg[5], weights_reg[1] );
+				out_2431[ 5 ] = _MMAD_4x8 ( out_2431[ 5 ], act_reg[6], weights_reg[1] );
+				out_2431[ 6 ] = _MMAD_4x8 ( out_2431[ 6 ], act_reg[7], weights_reg[1] );
+
+				out_2431[ 0 ] = _MMAD_4x8 ( out_2431[ 0 ], act_reg[2], weights_reg[2] );
+				out_2431[ 1 ] = _MMAD_4x8 ( out_2431[ 1 ], act_reg[3], weights_reg[2] );
+				out_2431[ 2 ] = _MMAD_4x8 ( out_2431[ 2 ], act_reg[4], weights_reg[2] );
+				out_2431[ 3 ] = _MMAD_4x8 ( out_2431[ 3 ], act_reg[5], weights_reg[2] );
+				out_2431[ 4 ] = _MMAD_4x8 ( out_2431[ 4 ], act_reg[6], weights_reg[2] );
+				out_2431[ 5 ] = _MMAD_4x8 ( out_2431[ 5 ], act_reg[7], weights_reg[2] );
+				out_2431[ 6 ] = _MMAD_4x8 ( out_2431[ 6 ], act_reg[8], weights_reg[2] );
+		}
+
+			// To make sure all threads in WG have finished compute before next depth tile of activation and weights are loaded into SLM
+			barrier(CLK_LOCAL_MEM_FENCE);
+	} //for kd
+
+        /****************************************************************************************************************
+		*******************************Output Write Stage****************************************************************
+		****************************************************************************************************************/
+			/*
+		   Outputs will be passed through activation function and quantized to 8 bits before writing
+		   Output will be in same format as input [K/32][N/4][P][Q][4N][32K] */
+
+			/******************* Write output to SLM *************************************/
+
+		/*  Quantize and pack 4x1 byte - from consectuive n-coordinates
+			Each thread produces [1P][7Q][4N][32K]
+         	Write uint32 from each lane to SLM , the entire thread will write 32-consecutive K-coorindates
+
+			Assume one SLM row as 32 uints ( 32 channels , four batches for each channel - 4NK )
+			In SLM 7x7x4x32 present first then the next 32 channels
+		*/
+
+		if( lid_z <= 6 )
+        {
+			/* feature maps are an array of slicePacks, each H,W position within the slice pack contains 32 8bit feature maps(channels) of 8 different batches */
+			uint row_size_bytes        = (_OW + OWPAD) * PACK * BATCH_PACK;
+
+			/* slice_pack is a pack of 32 feature map tiles that are [OH][OW][4][32] that are stored within the full [K/32][N/4][OH][OW][4][32] output */
+			uint slice_pack_size_bytes = row_size_bytes * (_OH + OHPAD);
+
+			/* Each output_depth WG writes 64 output channels */
+
+		 	uint output_depth_index      =  output_depth*2 + threadid_mod_2;
+			uint batch_index			 =  batch;
+
+			/* Each WG produces entire 7x7 output, hence no group_y, group_z tiling */
+
+            uint output_offset_x = groupy_tile * OUT_X_PITCH;
+            uint output_offset_y = groupz_tile * OUT_Y_PITCH;
+			uint slice_pack_addr_bytes  = output_depth_index * slice_pack_size_bytes * ( BATCH_SIZE / BATCH_PACK ) + batch_index * slice_pack_size_bytes + lid_z * row_size_bytes;
+						
+			__global uchar* output_write_ptr = (__global uchar *) &outputs [ slice_pack_addr_bytes + output_offset_x + output_offset_y ];
+
+                const uint feature = output_depth_index * 32 + get_sub_group_local_id();
+
+                const float4 quant_f = as_float4(intel_sub_group_block_read4((__global uint*) (quantizations + feature) ));
+                const float4 bias_f = as_float4(intel_sub_group_block_read4((__global uint*) (biases + feature) ));
+                const float4 calib_f = as_float4(intel_sub_group_block_read4((__global uint*) (calibrations + feature) ));
+
+                __attribute__((opencl_unroll_hint(OUT_BLOCK_WIDTH)))
+				for (int col = 0; col < OUT_BLOCK_WIDTH; col++)
+                {
+
+					int4 outvec0 = out_07[col];
+					int4 outvec1 = out_815[col];
+					int4 outvec2 = out_1623[col];
+					int4 outvec3 = out_2431[col];
+
+					/* Non-Linear Activation & Quantization code */
+
+					uchar8 out_write_N2K4[2];
+
+                    QUANTIZATION;
+
+					intel_sub_group_block_write_uc8 (  output_write_ptr  , out_write_N2K4[0] );
+					output_write_ptr += 64;
+					intel_sub_group_block_write_uc8 (  output_write_ptr  , out_write_N2K4[1] );
+					output_write_ptr += 64;
+
+				} // out_block_width-for loop
+		}//lid_z loop
+} //end of kernel
+
+#undef SCAL
+#undef QUANTIZATION
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_winograd_2x3_s1_fused.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_winograd_2x3_s1_fused.cl
index 1623a9559..603e148c5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_winograd_2x3_s1_fused.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_winograd_2x3_s1_fused.cl
@@ -18,7 +18,7 @@
 // Output matrix dimensions: M x N
 // --------------------------------------------------------------------------------------------------------------------------------
 
-#include "include/data_types.cl"
+#include "include/common.cl"
 
 
 #define DOT4i0( _result, _A, _B, i)					\
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_ref.cl
index 03affe9b2..33f88c0b2 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_ref.cl
@@ -39,6 +39,11 @@ KERNEL(convolution_gpu_yxfb_ref)(
     const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
 #endif
     const uint input_offset = INPUT0_OFFSET + batch_offset*INPUT0_BATCH_PITCH + in_split_offset;
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint filter_offset = split_idx * FILTER_LENGTH;
+#else
+    const uint filter_offset = 0;
+#endif
 
     for (uint i = 0; i < FILTER_SIZE_Y; i++)
     {
@@ -55,7 +60,7 @@ KERNEL(convolution_gpu_yxfb_ref)(
                 if(!zero)
                 {
                     uint input_idx = input_offset + (uint)input_offset_x*INPUT0_X_PITCH + (uint)input_offset_y*INPUT0_Y_PITCH;
-                    uint filter_idx = ofm_offset*FILTER_OFM_PITCH + i*FILTER_Y_PITCH + j*FILTER_X_PITCH;
+                    uint filter_idx = filter_offset + ofm_offset*FILTER_OFM_PITCH + i*FILTER_Y_PITCH + j*FILTER_X_PITCH;
 
                     for (uint h = 0; h < FILTER_IFM_NUM; h++)
                     {
@@ -68,7 +73,12 @@ KERNEL(convolution_gpu_yxfb_ref)(
         }
     }
 #if BIAS_TERM
-    result += bias[ofm_offset];
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint bias_offset = split_idx * BIAS_LENGTH;
+#else
+    const uint bias_offset = 0;
+#endif
+    result += bias[ofm_offset + bias_offset];
 #endif
     const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
     const uint dst_index = batch_offset*OUTPUT_BATCH_PITCH + ofm_offset*OUTPUT_FEATURE_PITCH + out_y*OUTPUT_Y_PITCH + out_x*OUTPUT_X_PITCH + OUTPUT_OFFSET + out_split_offset;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b16_fp16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b16_fp16.cl
index edf68f846..2b1fb4cc7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b16_fp16.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b16_fp16.cl
@@ -14,6 +14,7 @@
 
 
 #include "include/include_all.cl"
+#include "include/sub_group.cl"
 
 __attribute__((intel_reqd_sub_group_size(16)))
 __attribute__((reqd_work_group_size(16, 1, 1)))
@@ -31,15 +32,15 @@ KERNEL(convolution_gpu_yxfb_yxio_b16)(
     // get_global_size(1) -> Output size in X-dimension.
     // get_global_size(2) -> Output size in Y-dimension.
     // get_global_id(0)   -> Id of work item computing single spatial point of output indicated by get_global_id(1), get_global_id(2).
-    // get_global_id(1)   -> Current x-position in output.
-    // get_global_id(2)   -> Current y-position in output.
+    // get_group_id(1)   -> Current x-position in output.
+    // get_group_id(2)   -> Current y-position in output.
     //
     // WORK_ITEMS_PER_SINGLE_BATCHES_ELEMENTS -> Number of work items needed to compute entire one batch for at least one feature and one spatial point.
     //                                           (this number in current implementation computes also OFM_PER_WORK_ITEM output features at the same time).
     // FILTER_ARRAY_NUM                       -> Number of filters groups (split size).
 
-    const uint out_x = get_global_id(1);
-    const uint out_y = get_global_id(2);
+    const uint out_x = get_group_id(1);
+    const uint out_y = get_group_id(2);
 
     const uint output_f_size = OUTPUT_PAD_BEFORE_FEATURE_NUM + OUTPUT_FEATURE_NUM + OUTPUT_PAD_AFTER_FEATURE_NUM;
     const uint output_x_size = OUTPUT_PAD_BEFORE_SIZE_X + OUTPUT_SIZE_X + OUTPUT_PAD_AFTER_SIZE_X;
@@ -140,6 +141,15 @@ KERNEL(convolution_gpu_yxfb_yxio_b16)(
     }
 
 #if defined(USE_BLOCK_READ_2) || defined(USE_BLOCK_READ_1)
+    #if BATCHES_PER_WORK_ITEM == 4
+        uint _out_id = OUTPUT_VIEW_OFFSET + out_id;
+        for(uint i = 0; i < 16; i++)
+        {
+            *(__global uint*)(output + _out_id) = as_uint((half2)(_data[0][i], _data[1][i]));
+            *(__global uint*)(output + _out_id + 32) = as_uint((half2)(_data[2][i], _data[3][i]));
+            _out_id += OUTPUT_FEATURE_PITCH;
+        }
+    #else
     for(uint s = 0; s < BATCHES_PER_WORK_ITEM / 2; s++)
     {
         uint _out_id = OUTPUT_VIEW_OFFSET + out_id + chunk_size * s * LOCAL_WORK_GROUP_SIZE;
@@ -160,6 +170,7 @@ KERNEL(convolution_gpu_yxfb_yxio_b16)(
         *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].se, _data[chunk_size * s + 1].se)); _out_id += OUTPUT_FEATURE_PITCH;
         *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].sf, _data[chunk_size * s + 1].sf)); _out_id += OUTPUT_FEATURE_PITCH;
     }
+    #endif
 #else
     for(uint s = 0; s < BATCHES_PER_WORK_ITEM; s++)
     {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b16_fp32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b16_fp32.cl
index dd869b54b..004f8e03b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b16_fp32.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b16_fp32.cl
@@ -14,6 +14,7 @@
 
 
 #include "include/include_all.cl"
+#include "include/sub_group.cl"
 
 KERNEL(convolution_gpu_yxfb_yxio_b16)(
     const __global float* input,
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b1_block_fp32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b1_block_fp32.cl
index 181b6193b..a56896db7 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b1_block_fp32.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b1_block_fp32.cl
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "include/include_all.cl"
+#include "include/sub_group.cl"
 
 __attribute__((reqd_work_group_size(LOCAL_WORK_GROUP_SIZE, 1, 1)))
 KERNEL(convolution_gpu_yxfb_yxio_b1_block)(
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b1_block_multiple_x_fp32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b1_block_multiple_x_fp32.cl
index 0f2722f29..85aa75d8e 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b1_block_multiple_x_fp32.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b1_block_multiple_x_fp32.cl
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "include/include_all.cl"
+#include "include/sub_group.cl"
 
 __attribute__((reqd_work_group_size(LOCAL_WORK_GROUP_SIZE, 1, 1)))
 KERNEL(convolution_gpu_yxfb_yxio_b1_block_multiple_x)(
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b8_fp32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b8_fp32.cl
index 519c82226..21fc110b3 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b8_fp32.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_gpu_yxfb_yxio_b8_fp32.cl
@@ -14,6 +14,7 @@
 
 
 #include "include/include_all.cl"
+#include "include/sub_group.cl"
 
 __attribute__((reqd_work_group_size(LOCAL_WORK_GROUP_SIZE, 1, 1)))
 KERNEL(convolution_gpu_yxfb_yxio_b8)(
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_grad_weights_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_grad_weights_ref.cl
index cba96cbc3..95641e242 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_grad_weights_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_grad_weights_ref.cl
@@ -93,13 +93,16 @@ KERNEL(convolution_grad_weights_gpu_ref)(
 #endif
     }
 
-#if MOMENTUM
-    float update_gradient_w = lr * (grad_w + DECAY_RATE * filter[weights_idx]) + prev_grad_w[weights_idx] * MOMENTUM_FACTOR;
-    filter[weights_idx] -= update_gradient_w;
-    prev_grad_w[weights_idx] = update_gradient_w;
+#if OUTPUT_GRAD_W
+    output[weights_idx] = grad_w;
 #else
-    filter[weights_idx] -= lr * grad_w + DECAY_RATE * lr * filter[weights_idx];
-#endif
+    #if MOMENTUM
+        float update_gradient_w = lr * (grad_w + DECAY_RATE * filter[weights_idx]) + prev_grad_w[weights_idx] * MOMENTUM_FACTOR;
+        filter[weights_idx] -= update_gradient_w;
+        prev_grad_w[weights_idx] = update_gradient_w;
+    #else
+        filter[weights_idx] -= lr * grad_w + DECAY_RATE * lr * filter[weights_idx];
+    #endif
 
 #if BIAS_TERM
         if(ifm == 0 && id_x == 0 && id_y == 0)
@@ -114,4 +117,6 @@ KERNEL(convolution_grad_weights_gpu_ref)(
         }
 #endif
 
+#endif
+
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_grad_weights_yxfb.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_grad_weights_yxfb.cl
index 1d6ffeafd..fba71dbdd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_grad_weights_yxfb.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/convolution_grad_weights_yxfb.cl
@@ -90,13 +90,16 @@ KERNEL(convolution_grad_weights_gpu_ref)(
 
     if (local_id == 0)
     {
-#if MOMENTUM
-        UNIT_TYPE update_gradient_w = lr * (grad_w + DECAY_RATE * filter[weights_idx]) + prev_grad_w[weights_idx] * MOMENTUM_FACTOR;
-        filter[weights_idx] -= update_gradient_w;
-        prev_grad_w[weights_idx] = update_gradient_w;
+#if OUTPUT_GRAD_W
+        output[weights_idx] = grad_w;
 #else
-        filter[weights_idx] -= lr * (grad_w + DECAY_RATE * filter[weights_idx]);
-#endif
+    #if MOMENTUM
+            UNIT_TYPE update_gradient_w = lr * (grad_w + DECAY_RATE * filter[weights_idx]) + prev_grad_w[weights_idx] * MOMENTUM_FACTOR;
+            filter[weights_idx] -= update_gradient_w;
+            prev_grad_w[weights_idx] = update_gradient_w;
+    #else
+            filter[weights_idx] -= lr * (grad_w + DECAY_RATE * filter[weights_idx]);
+    #endif
 
 #if BIAS_TERM
         if(ifm == 0 && id_x == 0 && id_y == 0)
@@ -110,5 +113,6 @@ KERNEL(convolution_grad_weights_gpu_ref)(
 #endif
         }
 #endif
+#endif
     }
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_bfyx_opt.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_bfyx_opt.cl
index 2b2e0c962..a1dcd67eb 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_bfyx_opt.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_bfyx_opt.cl
@@ -63,6 +63,11 @@ KERNEL(deconvolution_gpu_bfyx_opt)(
     const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
 #endif
     const uint input_offset = INPUT0_OFFSET + batch_offset*INPUT0_BATCH_PITCH + in_split_offset;
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint filter_offset = split_idx * FILTER_LENGTH;
+#else
+    const uint filter_offset = 0;
+#endif
 
     for (uint i = start_y; i < FILTER_SIZE_Y; i+=STRIDE_SIZE_Y)
     {
@@ -83,7 +88,7 @@ KERNEL(deconvolution_gpu_bfyx_opt)(
                     uint input_idx = input_offset + (uint)fixed_input_offset_x*INPUT0_X_PITCH + (uint)fixed_input_offset_y*INPUT0_Y_PITCH;
 
 #if GRADIENT
-                    uint filter_idx = ofm_offset*FILTER_IFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
+                    uint filter_idx = filter_offset + ofm_offset*FILTER_IFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                     for (uint h = 0; h < FILTER_OFM_NUM; h++)
                     {
                         result = fma(input[input_idx], filter[filter_idx], result);
@@ -91,7 +96,7 @@ KERNEL(deconvolution_gpu_bfyx_opt)(
                         input_idx += INPUT0_FEATURE_PITCH;
                     }
 #else
-                    uint filter_idx = ofm_offset*FILTER_OFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
+                    uint filter_idx = filter_offset + ofm_offset*FILTER_OFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                     for (uint h = 0; h < FILTER_IFM_NUM; h++)
                     {
                         result = fma(input[input_idx], filter[filter_idx], result);
@@ -104,7 +109,12 @@ KERNEL(deconvolution_gpu_bfyx_opt)(
         }
     }
 #if BIAS_TERM
-    result += bias[ofm_offset];
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint bias_offset = split_idx * BIAS_LENGTH;
+#else
+    const uint bias_offset = 0;
+#endif
+    result += bias[ofm_offset + bias_offset];
 #endif
     const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
     const uint dst_index = OUTPUT_OFFSET + out_split_offset + batch_offset*OUTPUT_BATCH_PITCH + ofm_offset*OUTPUT_FEATURE_PITCH + id_y*OUTPUT_Y_PITCH + id_x*OUTPUT_X_PITCH;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl
index d2a369b22..4e8fa0df1 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/deconvolution_gpu_ref.cl
@@ -55,6 +55,11 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
     const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
 #endif
     const uint input_offset = INPUT0_OFFSET + batch_offset*INPUT0_BATCH_PITCH + in_split_offset;
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint filter_offset = split_idx * FILTER_LENGTH;
+#else
+    const uint filter_offset = 0;
+#endif
 
     for (uint i = 0; i < FILTER_SIZE_Y; i++)
     {
@@ -74,7 +79,7 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
                     uint fixed_input_offset_y = (uint)input_offset_y / STRIDE_SIZE_Y;
                     uint input_idx = input_offset + (uint)fixed_input_offset_x*INPUT0_X_PITCH + (uint)fixed_input_offset_y*INPUT0_Y_PITCH;
 #if GRADIENT
-                    uint filter_idx = ofm_offset*FILTER_IFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
+                    uint filter_idx = filter_offset + ofm_offset*FILTER_IFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                     for (uint h = 0; h < FILTER_OFM_NUM; h++)
                     {
                         result = fma(input[input_idx], filter[filter_idx], result);
@@ -82,7 +87,7 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
                         input_idx += INPUT0_FEATURE_PITCH;
                     }
 #else
-                    uint filter_idx = ofm_offset*FILTER_OFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
+                    uint filter_idx = filter_offset + ofm_offset*FILTER_OFM_PITCH + (FILTER_SIZE_Y - i - 1)*FILTER_Y_PITCH + (FILTER_SIZE_X - j - 1)*FILTER_X_PITCH;
                     for (uint h = 0; h < FILTER_IFM_NUM; h++)
                     {
                         result = fma(input[input_idx], filter[filter_idx], result);
@@ -95,7 +100,12 @@ KERNEL(deconvolution_gpu_yxfb_ref)(
         }
     }
 #if BIAS_TERM
-    result += bias[ofm_offset];
+#if GROUPED && !DEPTHWISE_SEPARABLE_OPT
+    const uint bias_offset = split_idx * BIAS_LENGTH;
+#else
+    const uint bias_offset = 0;
+#endif
+    result += bias[ofm_offset + bias_offset];
 #endif
     const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
     const uint dst_index = OUTPUT_OFFSET + out_split_offset + batch_offset*OUTPUT_BATCH_PITCH + ofm_offset*OUTPUT_FEATURE_PITCH + out_y*OUTPUT_Y_PITCH + out_x*OUTPUT_X_PITCH;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/depth_to_space_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/depth_to_space_ref.cl
new file mode 100644
index 000000000..2c96cc4c3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/depth_to_space_ref.cl
@@ -0,0 +1,36 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+
+KERNEL(depth_to_space_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
+{
+    const uint batch = get_global_id(0);
+    const uint feature = get_global_id(1);
+    const uint y = get_global_id(2) / OUTPUT_SIZE_X;
+    const uint x = get_global_id(2) % OUTPUT_SIZE_X;
+
+    const uint input_y = y / BLOCK_SIZE;
+    const uint offset_y = y % BLOCK_SIZE;
+
+    const uint input_x = x / BLOCK_SIZE;
+    const uint offset_x = (x % BLOCK_SIZE);
+    const uint offset_feature = (offset_y * BLOCK_SIZE + offset_x) * OUTPUT_FEATURE_NUM;
+
+    const uint output_index = OUTPUT_OFFSET + (batch * OUTPUT_BATCH_PITCH) + (feature * OUTPUT_FEATURE_PITCH) + (y * OUTPUT_Y_PITCH) + x;
+    const uint input_feature = feature + offset_feature;
+    const uint input_index = INPUT0_OFFSET + (batch * INPUT0_BATCH_PITCH) + (input_feature * INPUT0_FEATURE_PITCH) + (input_y * INPUT0_Y_PITCH) + input_x;
+    output[output_index] = input[input_index];
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output.cl
new file mode 100644
index 000000000..94c14e4e9
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output.cl
@@ -0,0 +1,217 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+#include "include/detection_output_common.cl"
+
+KERNEL (detection_output)(__global UNIT_TYPE* input_location, __global UNIT_TYPE* output, __global UNIT_TYPE* input_confidence, __global UNIT_TYPE* input_prior_box)
+{
+    const uint idx = get_global_id(0);              // bbox idx
+    const uint local_id = get_local_id(0) * NUM_OF_ITEMS; // All bboxes from one image in work group
+    const uint idx_image = idx / NUM_OF_ITERATIONS;  // idx of current image
+
+    __local uint indexes[NUM_OF_PRIORS];
+    __local uint scores_size[NUM_CLASSES * NUM_OF_IMAGES];
+    __local bool stillSorting;
+
+    uint indexes_class_0[NUM_OF_PRIORS];
+
+    int last_bbox_in_class = NUM_OF_ITEMS;
+    bool is_last_bbox_in_class = false;
+    for (uint it = 0; it < NUM_OF_ITEMS; it ++)
+    {
+        if (((local_id + it + 1) % NUM_OF_PRIORS) == 0 )
+        {
+            last_bbox_in_class = it;
+            is_last_bbox_in_class = true;
+            break;
+        }
+    }
+
+    for (uint idx_class = 0; idx_class < NUM_CLASSES; idx_class++)
+    {
+        if (idx_class == BACKGROUND_LABEL_ID)
+        {
+            continue;
+        }
+
+        for (uint it = 0;  it < NUM_OF_ITEMS; it++)
+        {
+            indexes[local_id + it] = local_id + it; 
+        }
+
+        stillSorting = true;
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        bool is_last_bbox_in_image = (is_last_bbox_in_class) && (idx_class == (NUM_CLASSES - 1));
+
+        while(stillSorting)
+        {
+            barrier(CLK_LOCAL_MEM_FENCE);
+            stillSorting = false;
+
+            for (uint i = 0; i < 2; i++)
+            {
+                for (uint it = 0; it < NUM_OF_ITEMS; it++)
+                {
+                    uint item_id = local_id + it;
+     
+                    uint idx1 = indexes[item_id];
+                    uint idx2 = indexes[item_id+1];
+                    bool perform = false;
+                    if ((((i % 2) && (item_id % 2)) ||
+                        ((!(i % 2)) && (!(item_id % 2)))) &&
+                        (it < last_bbox_in_class))
+                    {
+                        perform = true;
+                    }
+
+                    if (perform &&
+                        (FUNC_CALL(get_score)(input_confidence, idx1, idx_class, idx_image) <
+                         FUNC_CALL(get_score)(input_confidence, idx2, idx_class, idx_image)))
+                    {
+                        indexes[item_id] = idx2;
+                        indexes[item_id+1] = idx1;
+                        stillSorting = true;
+                    }
+                    barrier(CLK_LOCAL_MEM_FENCE);
+                }
+            }
+        }
+
+        // Do it only once per class in image
+        if (is_last_bbox_in_class)
+        {
+            UNIT_TYPE adaptive_threshold = NMS_THRESHOLD;
+            uint post_nms_count = 0;
+            const uint shared_class = (SHARE_LOCATION)? 0 : idx_class;
+            scores_size[idx_class] = 0;
+
+            // Do the "keep" algorithm only for classes with confidence greater than CONFIDENCE_THRESHOLD.
+            // Check first, the biggest one (after sort) element in class.
+            if (FUNC_CALL(get_score)(input_confidence, indexes[0], idx_class, idx_image) != 0.0f)
+            {
+                for (uint i = 0; i < SCORES_COUNT; i++)
+                {
+                    const uint bb_idx = indexes[i];
+                    bool keep = true;
+                    for (uint j = 0; j < post_nms_count; j++)
+                    {
+                        if (!keep)
+                        {
+                            break;
+                        }
+
+                        UNIT_TYPE overlap = 0.0;
+                        const uint bb_idx2 = indexes[j];
+
+                        UNIT_TYPE decoded_bbox1[4];
+                        FUNC_CALL(get_decoded_bbox)(decoded_bbox1, input_location, input_prior_box, bb_idx, shared_class, idx_image);
+                        UNIT_TYPE decoded_bbox2[4];
+                        FUNC_CALL(get_decoded_bbox)(decoded_bbox2, input_location, input_prior_box, bb_idx2, shared_class, idx_image);
+                        bool intersecting =
+                            (decoded_bbox1[0] < decoded_bbox2[2]) &
+                            (decoded_bbox2[0] < decoded_bbox1[2]) &
+                            (decoded_bbox1[1] < decoded_bbox2[3]) &
+                            (decoded_bbox2[1] < decoded_bbox1[3]);
+
+                        if (intersecting)
+                        {
+                            const UNIT_TYPE intersect_width = min(decoded_bbox1[2], decoded_bbox2[2]) - max(decoded_bbox1[0], decoded_bbox2[0]);
+                            const UNIT_TYPE intersect_height = min(decoded_bbox1[3], decoded_bbox2[3]) - max(decoded_bbox1[1], decoded_bbox2[1]);
+                            const UNIT_TYPE intersect_size = intersect_width * intersect_height;
+                            const UNIT_TYPE bbox1_area = (decoded_bbox1[2] - decoded_bbox1[0]) * (decoded_bbox1[3] - decoded_bbox1[1]);
+                            const UNIT_TYPE bbox2_area = (decoded_bbox2[2] - decoded_bbox2[0]) * (decoded_bbox2[3] - decoded_bbox2[1]);
+                            overlap = intersect_size / (bbox1_area + bbox2_area - intersect_size);
+                        }
+                        keep = (overlap <= adaptive_threshold);
+                    }
+                    if (keep)
+                    {
+                        indexes[post_nms_count] = indexes[i];
+                        ++post_nms_count;
+                    }
+                    if ((keep) && (ETA < 1) && (adaptive_threshold > 0.5))
+                    {
+                        adaptive_threshold *= ETA;
+                    }
+                }
+            }
+            // Write number of scores to global memory, for proper output order in separated work groups
+            scores_size[idx_class] = post_nms_count;
+        }
+
+        stillSorting = true;
+        // Wait for scores number from all classes in images
+        barrier(CLK_LOCAL_MEM_FENCE);
+
+        uint output_offset = (idx_image * NUM_CLASSES_OUT + idx_class - HIDDEN_CLASS) * SCORES_COUNT;
+
+        for (uint it = 0; it < NUM_OF_ITEMS; it++)
+        {
+            const uint local_id_out = local_id + it;
+            
+            if (local_id_out < scores_size[idx_class])
+            {
+                const uint score_idx = indexes[local_id_out];
+                uint bb_idx = indexes[local_id_out];
+                const uint shared_class = (SHARE_LOCATION)? 0 : idx_class;
+                UNIT_TYPE decoded_bbox[4];
+                FUNC_CALL(get_decoded_bbox)(decoded_bbox, input_location, input_prior_box, bb_idx, shared_class, idx_image);
+
+                const uint out_idx = (local_id_out + output_offset) * OUTPUT_ROW_SIZE + OUTPUT_OFFSET;
+                output[out_idx] = TO_UNIT_TYPE(idx_image);
+                output[out_idx + 1] = TO_UNIT_TYPE(idx_class);
+                output[out_idx + 2] = FUNC_CALL(get_score)(input_confidence, score_idx, idx_class, idx_image);
+                output[out_idx + 3] = decoded_bbox[0];
+                output[out_idx + 4] = decoded_bbox[1];
+                output[out_idx + 5] = decoded_bbox[2];
+                output[out_idx + 6] = decoded_bbox[3];
+            }
+        }
+
+        // If work item is processing last bbox in image (we already know the number of all detections),
+        // use it to fill rest of keep_top_k items if number of detections is smaller
+        if (is_last_bbox_in_class)
+        {
+            uint out_idx = output_offset + scores_size[idx_class];
+
+            uint current_top_k = output_offset + SCORES_COUNT;
+            for (uint i = out_idx; i < current_top_k; i++)
+            {
+                out_idx = i * OUTPUT_ROW_SIZE + OUTPUT_OFFSET;
+                output[out_idx] = -1.0;
+                output[out_idx + 1] = 0.0;
+                output[out_idx + 2] = 0.0;
+                output[out_idx + 3] = 0.0;
+                output[out_idx + 4] = 0.0;
+                output[out_idx + 5] = 0.0;
+                output[out_idx + 6] = 0.0;
+            }
+        }
+
+        // Write number of scores kept in first step of detection output
+        if (is_last_bbox_in_image)
+        {
+            uint scores_sum = 0;
+            for (uint i = 0; i < NUM_CLASSES; i++)
+            {
+                scores_sum += scores_size[i];
+            }
+            output[idx_image] = scores_sum;
+
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_sort.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_sort.cl
new file mode 100644
index 000000000..1a74d963a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/detection_output_sort.cl
@@ -0,0 +1,217 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+#include "include/detection_output_common.cl"
+
+UNIT_TYPE FUNC(get_score_sort)(__global UNIT_TYPE* input_bboxes, const uint idx_bbox, const uint idx_image)
+{
+    if (idx_bbox == KEEP_BBOXES_NUM)
+    {
+        // Idx set to dummy value, return -1 to exclude this element from sorting
+        return -1;
+    }
+    else
+    {
+        return input_bboxes[(idx_bbox + idx_image * NUM_OF_IMAGE_BBOXES) * OUTPUT_ROW_SIZE + INPUT_OFFSET + SCORE_OFFSET];
+    }
+}
+
+KERNEL (detection_output_sort)(__global UNIT_TYPE* input_bboxes, __global UNIT_TYPE* output)
+{
+    __local uint indexes[NUM_CLASSES_IN];
+    __local bool stillSorting;
+    __local uint output_count;
+    __local uint num_out_per_class[NUM_CLASSES_IN];
+
+    output_count = 0;
+    num_out_per_class[get_local_id(0)] = 0;
+
+    const uint image_id = get_global_id(0) / NUM_CLASSES_IN;
+    const uint local_id = get_local_id(0) * NUM_OF_ITEMS_SORT; // All bboxes from one image in work group
+
+    uint image_offset_input = image_id * NUM_OF_IMAGE_BBOXES;
+
+    uint count_sum = 0;
+    for (uint i = 0; i < image_id; i++)
+    {
+        count_sum += (input_bboxes[i] < KEEP_TOP_K)? input_bboxes[i] : KEEP_TOP_K;
+    }
+
+    uint image_offset_output = count_sum * OUTPUT_ROW_SIZE;
+
+    // If there is less elements than needed, write input to output
+    if (input_bboxes[image_id] <= KEEP_TOP_K)
+    {
+        if (local_id == 0)
+        {
+            for (uint class = 0; class < NUM_CLASSES_IN; class++)
+            {
+                if (class == BACKGROUND_LABEL_ID && !HIDDEN_CLASS)
+                {
+                    continue;
+                }
+                for (uint i = 0; i < NUM_OF_CLASS_BBOXES; i++)
+                {
+                    uint input_idx = (i + image_offset_input + class * NUM_OF_CLASS_BBOXES) * OUTPUT_ROW_SIZE + INPUT_OFFSET;
+                    if (input_bboxes[input_idx] != -1)
+                    {
+                        uint out_idx = output_count * OUTPUT_ROW_SIZE + image_offset_output;
+
+                        for (uint idx = 0; idx < OUTPUT_ROW_SIZE; idx++)
+                        {
+                            output[out_idx + idx] = input_bboxes[input_idx + idx];
+                        }
+
+                        output_count++;
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        uint sorted_output[KEEP_TOP_K * NUM_CLASSES_IN];
+
+        for (uint it = 0; it < NUM_OF_ITEMS_SORT; it++)
+        {
+            indexes[local_id + it] = (local_id + it) * NUM_OF_CLASS_BBOXES;
+        }
+
+        while (output_count < KEEP_BBOXES_NUM)
+        {
+            stillSorting = true;
+
+            while(stillSorting)
+            {
+                barrier(CLK_LOCAL_MEM_FENCE);
+                stillSorting = false;
+                for (uint it = 0; it < NUM_OF_ITEMS_SORT; it++)
+                {
+                    uint item_id = local_id + it;
+                    for (uint i = 0; i < 2; i++)
+                    {
+
+                        uint idx1 = indexes[item_id];
+                        uint idx2 = indexes[item_id+1];
+                        bool perform = false;
+                        if ((((i % 2) && (item_id % 2)) ||
+                            ((!(i % 2)) && (!(item_id % 2)))) &&
+                            (item_id != (NUM_CLASSES_IN - 1)))
+                        {
+                            perform = true;
+                        }
+
+                        if (perform &&
+                            (FUNC_CALL(get_score_sort)(input_bboxes, idx1, image_id) <
+                             FUNC_CALL(get_score_sort)(input_bboxes, idx2, image_id)))
+                        {
+                            indexes[item_id] = idx2;
+                            indexes[item_id+1] = idx1;
+                            stillSorting = true;
+                        }
+                        barrier(CLK_LOCAL_MEM_FENCE);
+                    }
+                }
+            }
+
+            if (local_id == 0)
+            {
+                UNIT_TYPE top_score = FUNC_CALL(get_score_sort)(input_bboxes, indexes[0], image_id);
+
+                if (top_score != 0)
+                {
+                    for (uint it = 0; (it < NUM_CLASSES_IN) && (output_count < KEEP_BBOXES_NUM); it++)
+                    {
+                        if (FUNC_CALL(get_score_sort)(input_bboxes, indexes[it], image_id) == top_score)
+                        {
+                            // write to output, create counter, and check if keep_top_k is satisfied.
+                            uint input_idx = (indexes[it] + image_offset_input) * OUTPUT_ROW_SIZE + INPUT_OFFSET;
+                            uint class_idx = input_bboxes[input_idx + 1] - HIDDEN_CLASS;
+
+                            sorted_output[class_idx * KEEP_TOP_K + num_out_per_class[class_idx]] = input_idx;
+                            num_out_per_class[class_idx]++;
+
+                            indexes[it]++;
+                            output_count++;
+
+                            // If all class elements are written to output, set dummy value to exclude class from sorting.
+                            if ((indexes[it] % NUM_OF_CLASS_BBOXES) == 0)
+                            {
+                                indexes[it] = KEEP_BBOXES_NUM;
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    // There is no more significant results to sort.
+                    output_count = KEEP_BBOXES_NUM;
+                }
+            }
+            barrier(CLK_LOCAL_MEM_FENCE);
+        }
+        if (local_id == 0)
+        {
+            output_count = 0;
+            for (uint i = 0; i < NUM_CLASSES_IN; i++)
+            {
+                for (uint j = 0; j < num_out_per_class[i]; j++)
+                {
+
+                    uint out_idx = output_count * OUTPUT_ROW_SIZE + image_offset_output;
+                    for (uint idx = 0; idx < OUTPUT_ROW_SIZE; idx++)
+                    {
+                        output[out_idx + idx] = input_bboxes[sorted_output[i * KEEP_TOP_K + j] + idx];
+                    }
+                    output_count++;
+                }
+           }
+           uint image_count_sum = (input_bboxes[image_id] < KEEP_TOP_K)? input_bboxes[image_id] : KEEP_TOP_K;
+           for (output_count; output_count < image_count_sum; output_count++)
+           {
+                uint out_idx = output_count * OUTPUT_ROW_SIZE + image_offset_output;
+                output[out_idx] = -1.0;
+                output[out_idx + 1] = 0.0;
+                output[out_idx + 2] = 0.0;
+                output[out_idx + 3] = 0.0;
+                output[out_idx + 4] = 0.0;
+                output[out_idx + 5] = 0.0;
+                output[out_idx + 6] = 0.0;
+           }
+        }
+    }
+
+    if (local_id == 0 &&
+        image_id == (NUM_IMAGES - 1))
+    {
+        for (output_count += count_sum; output_count < (KEEP_TOP_K *  NUM_IMAGES); output_count++ )
+        {
+            uint out_idx = output_count * OUTPUT_ROW_SIZE;
+            output[out_idx] = -1.0;
+            output[out_idx + 1] = 0.0;
+            output[out_idx + 2] = 0.0;
+            output[out_idx + 3] = 0.0;
+            output[out_idx + 4] = 0.0;
+            output[out_idx + 5] = 0.0;
+            output[out_idx + 6] = 0.0;
+        }
+    }
+
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_b_fs_yx_fsv4.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_b_fs_yx_fsv4.cl
new file mode 100644
index 000000000..2d598ea52
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_b_fs_yx_fsv4.cl
@@ -0,0 +1,100 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "include/include_all.cl"
+
+#define PACK 4
+
+#define SGR_MAX_SIZE   (get_max_sub_group_size())
+#define SGR_LOCAL_ID   (get_sub_group_local_id())
+
+#define GET_INDEX(_x) \
+   ( ((_x / SGR_MAX_SIZE) * SGR_MAX_SIZE /* Normed to max_subgroup_size */)   \
+     * (4 * sizeof(int)                  /* 4xINT32 per sub_group reading */) \
+   )
+
+inline int16 FUNC(get_int16)(const __global UNIT_TYPE* src, uint idx)
+{
+    int4 int_data = as_int4(intel_sub_group_block_read4((const __global uint*)(src + idx)));
+    int16 to_return;
+    for(uint i = 0; i < 4; i++)
+    {
+        for(uint j = 0; j < 4; j++)
+        {
+            to_return[i * 4 + j] = as_char4(int_data[i])[j];
+        }
+    }
+    return to_return;
+}
+#define GET_INPUT(A, B) FUNC_CALL(get_int16)(A, GET_INDEX(x))
+
+
+__attribute__((intel_reqd_sub_group_size(8)))
+KERNEL(eltwise_b_fs_yx_fsv4)(
+    INPUTS_DECLS
+    __global UNIT_TYPE* output
+#if CALIBRATION_TERM
+    , const __global float* calibrations
+#endif
+    )
+{
+    // This kernel works with linearized data w/o strides and padding
+    // so only one dimension 'X' is required
+    const uint x   = get_global_id(0);
+    const uint idx = GET_INDEX(x);
+
+    int16 res;
+
+    DO_ELTWISE;
+
+    for(uint i = 0; i < 4; i++)
+    {
+        const uint out_idx = idx + (sizeof(int) * (SGR_LOCAL_ID + (i * SGR_MAX_SIZE)));
+        char4 char_res;
+
+        for(uint j = 0; j < 4; j++)
+        {
+            int res_tmp = res[i * 4 + j];
+        #if QUANTIZATION_TERM
+        #if CALIBRATION_TERM
+            // Batch:
+            const uint b = out_idx / OUTPUT_BATCH_PITCH;
+            // Feature:
+            // Because of specific data layout Feature  must be normed to PACK size
+            uint d3 = ((out_idx - b * OUTPUT_BATCH_PITCH) / (OUTPUT_FEATURE_PITCH * PACK)) * PACK;
+            res_tmp = (int)round(((float)res_tmp) * calibrations[d3+j]);
+        #else  // CALIBRATION_TERM
+            res_tmp = (int)round(((float)res_tmp) * O_QF);
+        #endif // CALIBRATION_TERM
+        #endif // QUANTIZATION_TERM
+        
+        #ifdef ELTW_UNSIGNED
+            char_res[j] = ACTIVATION(convert_uchar(res_tmp), NL_M, NL_N);
+        #else
+            char_res[j] = ACTIVATION(convert_char(res_tmp), NL_M, NL_N);
+        #endif
+        }
+        // put 4 chars into output
+        // char_result[i] = as_int(char_res);
+        *((__global int*)(output + out_idx)) = as_int(char_res);
+    }
+}
+
+#undef PACK
+#undef SGR_MAX_SIZE
+#undef SGR_LOCAL_ID
+#undef GET_INDEX
+#undef GET_INPUT
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_fs_bs_yx_bsv4_fsv32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_fs_bs_yx_bsv4_fsv32.cl
index fe5e4a8a4..388d50de0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_fs_bs_yx_bsv4_fsv32.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/eltwise_fs_bs_yx_bsv4_fsv32.cl
@@ -16,8 +16,13 @@
 
 #include "include/include_all.cl"
 
+#ifdef INPUT_STRIDED
+#define GET_INDEX(src) \
+    GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(src, d4, d3, d2 * CAT(src, _STRIDE_Y), d1 * CAT(src, _STRIDE_X)) 
+#else
 #define GET_INDEX(src) \
     GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(src, d4, d3, d2, d1) 
+#endif
 
 int16 FUNC(get_int16)(const __global UNIT_TYPE* src, uint idx)
 {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/embed_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/embed_ref.cl
index c51a61e3a..f1a5a4e5f 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/embed_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/embed_ref.cl
@@ -12,16 +12,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "include/include_all.cl"
 
-#include "include/common.cl"
-#include "include/data_types.cl"
-
-KERNEL(embed_ref)(const __global UNIT_TYPE* input0, __global UNIT_TYPE* output, __global UNIT_TYPE* weights, __global UNIT_TYPE* biases)
+KERNEL(embed_ref)(const __global UNIT_TYPE* input0,
+    __global UNIT_TYPE* output,
+    const __global UNIT_TYPE* weights
+#if BIAS_TERM
+    ,const __global UNIT_TYPE* biases
+#endif
+)
 {
     const uint x = (uint)get_global_id(0);
 	const uint y = (uint)get_global_id(1);
 	const uint b = (uint)get_global_id(2);
+
 	uint output_idx = (b*INPUT0_ELEMENTS_COUNT*NUM_OUTPUT_SIZE)+(uint)(x*NUM_OUTPUT_SIZE+y);
-    output[output_idx] = weights[(uint)(input0[(b*INPUT0_ELEMENTS_COUNT)+x]*NUM_OUTPUT_SIZE+y)] + biases[y]; 
+    output[output_idx] = weights[(uint)(input0[(b*INPUT0_ELEMENTS_COUNT)+x]*NUM_OUTPUT_SIZE+y)];
+#if BIAS_TERM
+    output[output_idx] += biases[y];
+#endif
 }
-	
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_bs_f_bsv16_af8_vload.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_bs_f_bsv16_af8_vload.cl
index e11fb14d3..9a3bac2b0 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_bs_f_bsv16_af8_vload.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_bs_f_bsv16_af8_vload.cl
@@ -14,54 +14,30 @@
 
 
 #include "include/include_all.cl"
-
-#if FP16_UNIT_USED
-    // Block read - currently block is 4 bytes aligned.
-    #define ALIGNED_BLOCK_READ8(ptr, byte_offset) as_half8(intel_sub_group_block_read_us8((const __global ushort*)(ptr) + (byte_offset)))
-
-    #define MULTIPLY_BLOCKS_16x8(_result, _blockA, _blockB)  \
-    {   \
-        const half16 acol0 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s0 ); \
-        const half16 acol1 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s1 ); \
-        const half16 acol2 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s2 ); \
-        const half16 acol3 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s3 ); \
-        const half16 acol4 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s4 ); \
-        const half16 acol5 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s5 ); \
-        const half16 acol6 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s6 ); \
-        const half16 acol7 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s7 ); \
-        _result = fma( _blockB.s0, acol0, _result ); \
-        _result = fma( _blockB.s1, acol1, _result ); \
-        _result = fma( _blockB.s2, acol2, _result ); \
-        _result = fma( _blockB.s3, acol3, _result ); \
-        _result = fma( _blockB.s4, acol4, _result ); \
-        _result = fma( _blockB.s5, acol5, _result ); \
-        _result = fma( _blockB.s6, acol6, _result ); \
-        _result = fma( _blockB.s7, acol7, _result ); \
-    }
-#else
-    // Block read - currently block is 4 bytes aligned.
-    #define ALIGNED_BLOCK_READ8(ptr, byte_offset) as_float8(intel_sub_group_block_read8((const __global uint*)(ptr) + (byte_offset)))
-
-    #define MULTIPLY_BLOCKS_8x8(_result, _blockA, _blockB)  \
-    {   \
-        const float8 acol0 = TRANSPOSE_BLOCK_8( _blockA.s0 ); \
-        const float8 acol1 = TRANSPOSE_BLOCK_8( _blockA.s1 ); \
-        const float8 acol2 = TRANSPOSE_BLOCK_8( _blockA.s2 ); \
-        const float8 acol3 = TRANSPOSE_BLOCK_8( _blockA.s3 ); \
-        const float8 acol4 = TRANSPOSE_BLOCK_8( _blockA.s4 ); \
-        const float8 acol5 = TRANSPOSE_BLOCK_8( _blockA.s5 ); \
-        const float8 acol6 = TRANSPOSE_BLOCK_8( _blockA.s6 ); \
-        const float8 acol7 = TRANSPOSE_BLOCK_8( _blockA.s7 ); \
-        _result = mad( _blockB.s0, acol0, _result ); \
-        _result = mad( _blockB.s1, acol1, _result ); \
-        _result = mad( _blockB.s2, acol2, _result ); \
-        _result = mad( _blockB.s3, acol3, _result ); \
-        _result = mad( _blockB.s4, acol4, _result ); \
-        _result = mad( _blockB.s5, acol5, _result ); \
-        _result = mad( _blockB.s6, acol6, _result ); \
-        _result = mad( _blockB.s7, acol7, _result ); \
-    }
-#endif
+#include "include/sub_group.cl"
+
+// Block read - currently block is 4 bytes aligned.
+#define ALIGNED_BLOCK_READ8(ptr, byte_offset) as_half8(intel_sub_group_block_read_us8((const __global ushort*)(ptr) + (byte_offset)))
+
+#define MULTIPLY_BLOCKS_16x8(_result, _blockA, _blockB)  \
+{   \
+    const half16 acol0 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s0 ); \
+    const half16 acol1 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s1 ); \
+    const half16 acol2 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s2 ); \
+    const half16 acol3 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s3 ); \
+    const half16 acol4 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s4 ); \
+    const half16 acol5 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s5 ); \
+    const half16 acol6 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s6 ); \
+    const half16 acol7 = TRANSPOSE_BLOCK_16_FP16_HALF_TYPE( _blockA.s7 ); \
+    _result = fma( _blockB.s0, acol0, _result ); \
+    _result = fma( _blockB.s1, acol1, _result ); \
+    _result = fma( _blockB.s2, acol2, _result ); \
+    _result = fma( _blockB.s3, acol3, _result ); \
+    _result = fma( _blockB.s4, acol4, _result ); \
+    _result = fma( _blockB.s5, acol5, _result ); \
+    _result = fma( _blockB.s6, acol6, _result ); \
+    _result = fma( _blockB.s7, acol7, _result ); \
+}
 
 #define SUB_GROUP_SIZE 16
 
@@ -115,7 +91,4 @@ KERNEL (fully_connected_gpu_xb_bs_xs_xsv8_bsv16_vload)(
 
 #undef SUB_GROUP_SIZE
 #undef ALIGNED_BLOCK_READ8
-#undef MAKE_VECTOR_TYPE
-#undef CONCAT_TOKEN
-#undef CONCAT_TOKEN_HANDLER1
-#undef MULTIPLY_BLOCKS_16x16
+#undef MULTIPLY_BLOCKS_16x8
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_bs_f_bsv8_af8_vload.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_bs_f_bsv8_af8_vload.cl
index 918351999..109829f58 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_bs_f_bsv8_af8_vload.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_bs_f_bsv8_af8_vload.cl
@@ -14,6 +14,7 @@
 
 
 #include "include/include_all.cl"
+#include "include/sub_group.cl"
 
 #if FP16_UNIT_USED
     // Block read - currently block is 4 bytes aligned.
@@ -224,7 +225,4 @@ KERNEL (fully_connected_gpu_xb_bs_xs_xsv8_bsv8_vload)(
 
 #undef SUB_GROUP_SIZE
 #undef ALIGNED_BLOCK_READ8
-#undef MAKE_VECTOR_TYPE
-#undef CONCAT_TOKEN
-#undef CONCAT_TOKEN_HANDLER1
 #undef MULTIPLY_BLOCKS_8x8
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_fb_io_b8_f8.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_fb_io_b8_f8.cl
index 556adecd0..bf212050b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_fb_io_b8_f8.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_fb_io_b8_f8.cl
@@ -14,6 +14,7 @@
 
 
 #include "include/include_all.cl"
+#include "include/sub_group.cl"
 
 __attribute__((reqd_work_group_size(8, 1, 1)))
 KERNEL (fully_connected_gpu_xb_xb_b8_x8)(
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_fb_io_b8_f8_vload.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_fb_io_b8_f8_vload.cl
index ed86d491d..4d596f74b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_fb_io_b8_f8_vload.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_fb_io_b8_f8_vload.cl
@@ -14,6 +14,7 @@
 
 
 #include "include/include_all.cl"
+#include "include/sub_group.cl"
 
 #if FP16_UNIT_USED
     #define MULTIPLY_BLOCKS_8x8(_result, _blockA, _blockB)  \
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_imad.cl
new file mode 100644
index 000000000..af8a8fb46
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_imad.cl
@@ -0,0 +1,95 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/common.cl"
+
+#include "include/data_types.cl"
+#include "include/fetch.cl"
+#include "include/imad.cl"
+
+#define SIMD_SIZE         16
+#define BYTES_PER_READ    (sizeof(int))
+#define BYTES_PER_READ8   (8 * BYTES_PER_READ)
+
+__attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
+KERNEL(fully_connected_gpu_IMAD)(
+    const __global INPUT0_TYPE* input,
+    __global OUTPUT_TYPE* output,
+    const __global FILTER_TYPE* weights
+#if BIAS_TERM
+    , const __global BIAS_TYPE* biases
+#endif
+#if QUANTIZATION_TERM
+    ,const __global float* quantizations
+#endif
+#if CALIBRATION_TERM
+    ,const __global float* calibrations
+#endif
+    )
+{
+    // This kernel works with linearized data w/o strides and padding
+    // so only one dimension 'F' is required
+    const uint f = get_global_id(0);
+    const uint b = get_global_id(1);
+
+    if (f >= OUTPUT_FEATURE_NUM) {
+        return;
+    }
+
+    int dotProd = 0;
+
+    uint idx_w = ((f / SIMD_SIZE) * SIMD_SIZE) * INPUT0_FEATURE_NUM;
+    const __global INPUT0_TYPE* current_input = &input[GET_DATA_INDEX(INPUT0, b, 0, 0, 0)];
+
+    for (uint idx_i = 0; idx_i < INPUT0_FEATURE_NUM; idx_i += BYTES_PER_READ8) {
+        int input_data = as_int(intel_sub_group_block_read((const __global uint*)(current_input + idx_i)));
+        int8 activations;  //activations of all lanes
+        activations.s0 = sub_group_broadcast(input_data, 0);
+        activations.s1 = sub_group_broadcast(input_data, 1);
+        activations.s2 = sub_group_broadcast(input_data, 2);
+        activations.s3 = sub_group_broadcast(input_data, 3);
+        activations.s4 = sub_group_broadcast(input_data, 4);
+        activations.s5 = sub_group_broadcast(input_data, 5);
+        activations.s6 = sub_group_broadcast(input_data, 6);
+        activations.s7 = sub_group_broadcast(input_data, 7);
+
+        int8 weights_data = as_int8(intel_sub_group_block_read8((const __global uint*)(weights + idx_w)));
+        idx_w += SIMD_SIZE * BYTES_PER_READ8;
+
+        for (int i = 0; i < 8; i++) {
+            dotProd = IMAD(dotProd, as_char4(activations[i]), as_char4(weights_data[i]));
+        }
+    }
+
+#if BIAS_TERM
+#if   BIAS_PER_OUTPUT
+    const uint bias_index = GET_DATA_INDEX(BIAS, b, f, y, x);
+#elif BIAS_PER_OFM
+    const uint bias_index = f;
+#endif
+#if CALIBRATION_TERM
+    dotProd = (UNIT_TYPE)round(((float)dotProd * quantizations[f] * I_QF + biases[bias_index]) * calibrations[f]);
+#else  // CALIBRATION_TERM
+    dotProd = (UNIT_TYPE)round(((float)dotProd * quantizations[f] * I_QF + biases[bias_index]) * O_QF);
+#endif // CALIBRATION_TERM
+#endif // BIAS_TERM
+
+    const uint out_index = GET_DATA_INDEX(OUTPUT, b, f, 0, 0);
+    output[out_index] = ACTIVATION(convert_char(dotProd), NL_M, NL_N);
+}
+
+#undef SIMD_SIZE
+#undef BYTES_PER_READ
+#undef BYTES_PER_READ8
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_yxfb_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_yxfb_ref.cl
index e8ea6757b..5c63b79dd 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_yxfb_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fully_connected_gpu_yxfb_ref.cl
@@ -14,6 +14,7 @@
 
 
 #include "include/include_all.cl"
+#include "include/reshape_dims.cl"
 
 // Required JIT constants:
 //  - FP16_SUPPORTED       - [0/1] Value indicating whether device supports FP16 OpenCL extension (cl_khr_fp16).
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_bn_scale_kernel_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_bn_scale_kernel_ref.cl
new file mode 100644
index 000000000..82e0921c8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_bn_scale_kernel_ref.cl
@@ -0,0 +1,197 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/include_all.cl"
+
+#define LOCAL_SIZE INPUT0_BATCH_NUM
+
+__attribute__((reqd_work_group_size(LOCAL_SIZE, 1, 1)))
+KERNEL(convolution)(
+    __global INPUT0_TYPE* input, 
+    __global OUTPUT_TYPE* output, 
+    __global FILTER_TYPE* weights, 
+#if BIAS_TERM
+    __global BIAS_TYPE* biases,
+#endif
+    uint split_idx,
+    __global INPUT0_TYPE* scale_in
+#if SCALE_BIAS_TERM
+    , __global INPUT0_TYPE* scale_bias
+#endif
+#if FUSED_TRAINING
+    , __global INPUT0_TYPE* inv_var,
+    __global INPUT0_TYPE* conv_output,
+    __global INPUT0_TYPE* bn_output
+#endif
+    )
+{
+    const uint f = get_global_id(1);
+    const uint b = get_global_id(0);
+
+    UNIT_TYPE conv_out = UNIT_VAL_ZERO;
+
+    const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
+
+    const uint filter_offset = f*FILTER_OFM_PITCH;
+    const uint input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET + in_split_offset;
+
+    for (uint y = 0; y < OUTPUT_SIZE_Y; ++y)
+    {
+        const int input_y = y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+        for (uint x = 0; x < OUTPUT_SIZE_X; ++x)
+        {
+            const int input_x = x * STRIDE_SIZE_X - PADDING_SIZE_X;
+            for (uint k = 0; k < FILTER_IFM_NUM; ++k)
+            {
+                for (uint j = 0; j < FILTER_SIZE_Y ; ++j)
+                {
+                    const int input_offset_y = input_y + j * DILATION_SIZE_Y;
+                    const bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
+
+                    if(!zero_y)
+                    {
+                        for (uint i = 0; i < FILTER_SIZE_X ; ++i)
+                        {
+                            const int input_offset_x = input_x + i * DILATION_SIZE_X;
+                            const bool zero_x = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
+
+                            if(!zero_x)
+                            {
+                                uint input_idx = input_offset + (uint)input_offset_x*INPUT0_X_PITCH + (uint)input_offset_y*INPUT0_Y_PITCH + k*INPUT0_FEATURE_PITCH;
+                                uint filter_idx = filter_offset + k*FILTER_IFM_PITCH + j*FILTER_Y_PITCH + i*FILTER_X_PITCH;
+                                conv_out += input[input_idx] * weights[filter_idx];       
+                            }
+                        }
+                    }
+                }
+            }
+#if BIAS_TERM
+                conv_out += (UNIT_TYPE)biases[f];
+#endif
+
+                const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * OUTPUT_FEATURE_NUM;
+                const uint dst_index = GET_DATA_INDEX(OUTPUT, b, f, y, x) + out_split_offset;
+#ifdef FUSED_TRAINING
+                conv_output[dst_index] = conv_out;
+#else
+                output[dst_index] = conv_out;
+#endif
+        }
+    }
+
+
+    // BATCH NORM PART
+    barrier(CLK_LOCAL_MEM_FENCE);
+    
+    __local ACCUMULATOR_TYPE sum[LOCAL_SIZE];
+
+    const uint local_idx = b;
+
+    sum[local_idx] = 0;
+
+    uint input_idx = GET_DATA_INDEX(OUTPUT, local_idx, f, 0, 0);
+    for (uint y = 0; y < OUTPUT_SIZE_Y; y++)
+    {
+        for (uint x = 0; x < OUTPUT_SIZE_X; x++)
+        {
+#ifdef FUSED_TRAINING
+            UNIT_TYPE in = conv_output[input_idx];
+#else
+            UNIT_TYPE in = output[input_idx];
+#endif
+            sum[local_idx] += in;
+            input_idx += OUTPUT_X_PITCH;
+        }
+        input_idx += OUTPUT_Y_PITCH - OUTPUT_SIZE_X * OUTPUT_X_PITCH;
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for(uint offset = LOCAL_SIZE / 2; offset > 0; offset /= 2) 
+    {
+        if (local_idx < offset) 
+        {
+            sum[local_idx] += sum[local_idx + offset];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+
+    UNIT_TYPE mean = sum[0] / (OUTPUT_BATCH_NUM * OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+
+    sum[local_idx] = 0;
+
+    input_idx = GET_DATA_INDEX(OUTPUT, local_idx, f, 0, 0);
+    for (uint y = 0; y < OUTPUT_SIZE_Y; y++)
+    {
+        for (uint x = 0; x < OUTPUT_SIZE_X; x++)
+        {
+#ifdef FUSED_TRAINING
+            UNIT_TYPE in = conv_output[input_idx] - mean;
+#else
+            UNIT_TYPE in = output[input_idx] - mean;
+#endif
+            sum[local_idx] += in * in;
+            input_idx += OUTPUT_X_PITCH;
+        }
+        input_idx += OUTPUT_Y_PITCH - OUTPUT_SIZE_X * OUTPUT_X_PITCH;
+    }
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    for(uint offset = LOCAL_SIZE / 2; offset > 0; offset /= 2) 
+    {
+        if (local_idx < offset) 
+        {
+            sum[local_idx] += sum[local_idx + offset];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+
+    float variance = sum[0] / (OUTPUT_BATCH_NUM * OUTPUT_SIZE_X * OUTPUT_SIZE_Y);
+
+    float inv_variance = (float)(1.0 / sqrt(variance + EPSILON));
+
+#ifdef FUSED_TRAINING
+    if (local_idx == 0)
+        inv_var[f] = inv_variance;
+#endif
+
+    uint out_idx = GET_DATA_INDEX(OUTPUT, local_idx, f, 0, 0);
+    for (uint y = 0; y < OUTPUT_SIZE_Y; y++)
+    {
+        for (uint x = 0; x < OUTPUT_SIZE_X; x++)
+        {
+#ifdef FUSED_TRAINING
+            UNIT_TYPE out_val = inv_variance * (conv_output[out_idx] - mean);
+            bn_output[out_idx] = out_val;
+#ifdef SCALE_BIAS_TERM
+            output[out_idx] = ACTIVATION(out_val * scale_in[f] + scale_bias[f], NL_M, NL_N);  
+#else
+            output[out_idx] = ACTIVATION(out_val * scale_in[f], NL_M, NL_N);  
+#endif
+#else
+#ifdef SCALE_BIAS_TERM
+            output[out_idx] = ACTIVATION(inv_variance * (output[out_idx] - mean) * scale_in[f] + scale_bias[f], NL_M, NL_N);  
+#else
+            output[out_idx] = ACTIVATION(inv_variance * (output[out_idx] - mean) * scale_in[f], NL_M, NL_N);
+#endif
+#endif
+            out_idx += OUTPUT_X_PITCH;
+        }
+        out_idx += OUTPUT_Y_PITCH - OUTPUT_SIZE_X * OUTPUT_X_PITCH;
+    }
+
+}
+
+#undef LOCAL_SIZE
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_bfyx_1x1_opt_fp32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_bfyx_1x1_opt_fp32.cl
new file mode 100644
index 000000000..b22e2d9eb
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_bfyx_1x1_opt_fp32.cl
@@ -0,0 +1,254 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/include_all.cl"
+
+#define SIMD_SIZE 8
+__attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
+KERNEL(fused_conv_eltwise_gpu_bfyx_1x1_opt)(
+    __global INPUT0_TYPE* input, 
+    __global OUTPUT_TYPE* output, 
+    __global FILTER_TYPE* weights, 
+#if BIAS_TERM
+    __global BIAS_TYPE* biases,
+#endif
+    uint split_idx,
+    const __global float* src3)
+{
+   const uint group_x = get_group_id(0) * OUT_BLOCK_WIDTH;
+    const uint group_y = get_group_id(1) * OUT_BLOCK_HEIGHT;
+    const uint f = (get_group_id(2) * SIMD_SIZE * OUT_BLOCK_DEPTH) % OUTPUT_FEATURE_NUM;
+    const uint b = (get_group_id(2) * SIMD_SIZE * OUT_BLOCK_DEPTH) / OUTPUT_FEATURE_NUM;;
+
+    const uint ifm_part = get_sub_group_id();
+    uint ifm_offset = ifm_part* OUT_BLOCK_DEPTH/2;
+
+    UNIT_TYPE in[OUT_BLOCK_HEIGHT];
+    UNIT_TYPE dotProd0[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OUT_BLOCK_DEPTH/2];
+    UNIT_TYPE dotProd1[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OUT_BLOCK_DEPTH/2];
+
+    for(uint i = 0; i < OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OUT_BLOCK_DEPTH/2; i++)
+    {
+        dotProd0[i] = 0;
+        dotProd1[i] = 0;
+    }
+
+#if OUT_BLOCK_DEPTH == 8
+    const uint filter_offset = f * FILTER_IFM_NUM + ifm_part*(64 * FILTER_IFM_NUM/2);
+#elif OUT_BLOCK_DEPTH == 4
+    const uint filter_offset = f * FILTER_IFM_NUM + ifm_part*(32 * FILTER_IFM_NUM/2);
+#elif OUT_BLOCK_DEPTH == 2
+    const uint filter_offset = f * FILTER_IFM_NUM + ifm_part*(16 * FILTER_IFM_NUM/2);
+#else
+    const uint filter_offset = f*FILTER_OFM_PITCH + ifm_part*(FILTER_IFM_NUM/2) * FILTER_IFM_PITCH;
+#endif
+    const uint input_offset = b*INPUT0_BATCH_PITCH + INPUT0_OFFSET + group_x * INPUT0_X_PITCH + group_y * INPUT0_Y_PITCH + ifm_part*(FILTER_IFM_NUM/2) * INPUT0_FEATURE_PITCH;
+
+    //--------------------------------------------------------------------
+    // main computation phase
+    //--------------------------------------------------------------------
+
+    for (uint k = 0; k < FILTER_IFM_NUM/2; ++k)
+    {
+        for(uint i = 0; i < OUT_BLOCK_HEIGHT; i++)
+        {
+            const uint in_offset = input_offset + get_sub_group_local_id() + i * INPUT0_Y_PITCH + k * INPUT0_FEATURE_PITCH;
+            in[i] = input[in_offset];
+        }
+
+#if OUT_BLOCK_DEPTH == 8
+        float8 w = as_float8(intel_sub_group_block_read8((__global uint*)weights + filter_offset + k * 64));
+#elif OUT_BLOCK_DEPTH == 4
+        float4 w = as_float4(intel_sub_group_block_read4((__global uint*)weights + filter_offset + k * 32));
+#elif OUT_BLOCK_DEPTH == 2
+        float2 w = as_float2(intel_sub_group_block_read2((__global uint*)weights + filter_offset + k * 16));
+#endif
+
+        for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+        {
+            for(uint bc = 0; bc < OUT_BLOCK_WIDTH; bc++)
+            {
+                float _in = intel_sub_group_shuffle(in[br], bc);
+                for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+                {
+                    dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += _in * w[bd];
+                    dotProd1[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += _in * w[bd + OUT_BLOCK_DEPTH/2];
+                }
+            }
+        }
+    }
+
+    __local float slm_vals[OUT_BLOCK_WIDTH * OUT_BLOCK_HEIGHT * OUT_BLOCK_DEPTH * SIMD_SIZE];
+
+    //--------------------------------------------------------------------
+    // second sub_group in workgroup task
+    //--------------------------------------------------------------------
+    
+    if(ifm_part == 1)
+    {
+        for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+        {
+            for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+            {
+                for(uint bc = 0; bc < OUT_BLOCK_WIDTH; bc++)
+                {
+                    slm_vals[SIMD_SIZE * (bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)) + get_sub_group_local_id()] = dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)];
+                    dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] = dotProd1[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)];
+                }
+            }
+        }
+
+    }
+
+    //--------------------------------------------------------------------
+    // first sub_group in workgroup task
+    //--------------------------------------------------------------------
+    
+    if(ifm_part == 0)
+    {
+        for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+        {
+            for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+            {
+                for(uint bc = 0; bc < OUT_BLOCK_WIDTH; bc++)
+                {
+                    slm_vals[SIMD_SIZE * (bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * (bd+OUT_BLOCK_DEPTH/2) )) + get_sub_group_local_id()] = dotProd1[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)];
+                }
+            }
+        }
+
+    }
+
+    //--------------------------------------------------------------------
+    // add bias phase
+    //--------------------------------------------------------------------
+    
+    #if BIAS_TERM
+    for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+    {
+        float _bias = biases[f + (bd + ifm_offset) * SIMD_SIZE + get_sub_group_local_id()];
+        for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+        {
+            for(uint bc = 0; bc < OUT_BLOCK_WIDTH; bc++)
+            {
+                dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += _bias;
+            }
+        }
+    }
+    #endif
+
+    barrier(CLK_LOCAL_MEM_FENCE); // we want to add barrier after biases addition so that the long slm write part latency is shadowed by it
+
+    //--------------------------------------------------------------------
+    // sum sub-group results + activation phase
+    //--------------------------------------------------------------------
+    
+    for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+    {
+        for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+        {
+            for(uint bc = 0; bc < OUT_BLOCK_WIDTH; bc++)
+            {
+                dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += slm_vals[SIMD_SIZE * (bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * (bd + ifm_offset) )) + get_sub_group_local_id()];
+                dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] = ACTIVATION(dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)], NL_M, NL_N);;
+            }
+        }
+    }
+
+    //--------------------------------------------------------------------
+    // eltwise with eltwise activation phase
+    //--------------------------------------------------------------------
+    #if IN_OUT_OPT != 1
+    for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+    {
+        for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+        {
+            for(uint bc = 0; bc < OUT_BLOCK_WIDTH; bc++)
+            {
+                uint src3_offset = GET_DATA_INDEX(INPUT1, b, f + (bd + ifm_offset) * SIMD_SIZE + get_sub_group_local_id(), (group_y + br) * ELTW_STRIDE_Y, (group_x + bc) * ELTW_STRIDE_X);
+                dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += src3[src3_offset];
+                dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] = ACTIVATION_ELTW(dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)], NL_M_ELTW, NL_N_ELTW);
+            }
+        }
+    }
+    #endif
+
+    //--------------------------------------------------------------------
+    // output phase
+    //--------------------------------------------------------------------
+
+    for(uint bd = 0; bd < OUT_BLOCK_DEPTH/2; bd++)
+    {
+        for(uint br = 0; br < OUT_BLOCK_HEIGHT; br++)
+        {
+            uint dst_index = GET_DATA_INDEX(OUTPUT, b, f + (bd + ifm_offset) * SIMD_SIZE + get_sub_group_local_id(), group_y + br, group_x);
+            uint out_vstore_offset = 0;
+            #if (OUT_BLOCK_WIDTH >= 8)
+            {
+                float8 tmp = (float8)(dotProd0[out_vstore_offset + 0 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd0[out_vstore_offset + 1 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd0[out_vstore_offset + 2 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd0[out_vstore_offset + 3 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd0[out_vstore_offset + 4 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd0[out_vstore_offset + 5 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd0[out_vstore_offset + 6 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd0[out_vstore_offset + 7 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)]);
+#if IN_OUT_OPT == 1
+                float8 tmp2 = vload8(0, output + dst_index + out_vstore_offset * OUTPUT_X_PITCH);
+                tmp += tmp2;
+                tmp = ACTIVATION_ELTW(tmp, NL_M_ELTW, NL_N_ELTW);
+#endif
+                vstore8(tmp, 0, output + dst_index + out_vstore_offset * OUTPUT_X_PITCH);
+                out_vstore_offset += 8;
+            }
+            #endif
+            #if (OUT_BLOCK_WIDTH % 8) > 3
+            {
+                float4 tmp = (float4)(dotProd0[out_vstore_offset + 0 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd0[out_vstore_offset + 1 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd0[out_vstore_offset + 2 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                      dotProd0[out_vstore_offset + 3 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)]);
+#if IN_OUT_OPT == 1
+                float4 tmp2 = vload4(0, output + dst_index + out_vstore_offset * OUTPUT_X_PITCH);
+                tmp += tmp2;
+                tmp = ACTIVATION_ELTW(tmp, NL_M_ELTW, NL_N_ELTW);
+#endif
+                vstore4(tmp, 0, output + dst_index + out_vstore_offset * OUTPUT_X_PITCH);
+                out_vstore_offset += 4;
+            }
+            #endif
+            #if (OUT_BLOCK_WIDTH % 4) > 1
+            {
+                float2 tmp = (float2)(dotProd0[out_vstore_offset + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)],
+                                       dotProd0[out_vstore_offset+1 + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)]);
+#if IN_OUT_OPT == 1
+                float2 tmp2 = vload2(0, output + dst_index + out_vstore_offset * OUTPUT_X_PITCH);
+                tmp += tmp2;
+                tmp = ACTIVATION_ELTW(tmp, NL_M_ELTW, NL_N_ELTW);
+#endif
+                vstore2(tmp, 0, output + dst_index + out_vstore_offset * OUTPUT_X_PITCH);
+                out_vstore_offset += 2;
+            }
+            #endif
+            for(uint bc = out_vstore_offset; bc < OUT_BLOCK_WIDTH; bc++)
+            {
+#if IN_OUT_OPT == 1
+                dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] += output[dst_index + bc * OUTPUT_X_PITCH];
+                dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)] = ACTIVATION_ELTW(dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)], NL_M_ELTW, NL_N_ELTW);
+#endif                
+                output[dst_index + bc * OUTPUT_X_PITCH] = dotProd0[bc + OUT_BLOCK_WIDTH * (br + OUT_BLOCK_HEIGHT * bd)];
+            }
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_bfyx_os_iyx_osv16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_bfyx_os_iyx_osv16.cl
new file mode 100644
index 000000000..bd439e093
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_bfyx_os_iyx_osv16.cl
@@ -0,0 +1,252 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+#include "include/data_types.cl"
+
+
+// ---------------------------------------------------------------------------------------------------------------------
+// Just-in-time macro definitions:
+// ---------------------------------------------------------------------------------------------------------------------
+
+// Required JIT constants:
+//  - INPUT                - [tensor] Input dimensions (batch, spatial and feature).
+//  - OUTPUT               - [tensor] Output dimensions (batch, spatial and feature).
+//  - STRIDE               - [tensor] Stride (only spatial). Factors that describe step size in X or Y dimension of
+//                           input position of application of convolution filter when next ouput value
+//                           (step 1 in in X or Y dimension of output) is computed.
+//  - INPUT0_OFFSET        - [tensor] Offset for the first element
+//                           initial offset input position of application of convolution filter and output position.
+//  - FP16_SUPPORTED       - [0/1] Value indicating whether device supports FP16 OpenCL extension (cl_khr_fp16).
+//  - FP16_UNIT_USED       - [0/1] Value indicating that current kernel should use FP16.
+//  - UNIT_TYPE            - Type of unit of input/output/weight/bias.
+//  - UNIT_VAL_ZERO        - Literal of current UNIT_TYPE that represents 0.
+//  - RELU                 - [0/1] Indicates that ReLU activation function should be used on output.
+//  - NEGATIVE_SLOPE       - [float] Factor for negative output values (required when ReLU is specified).
+//
+//  - SUB_GROUP_SIZE       - [int] Size of used subgroup (SIMD).
+//  - LEFTOVERS            - [int] Optional parameter, required only when number of ofm is not dividable by SUB_GROUP_SIZE
+//                           see comment for FEATURES_THREADS_PER_BATCH for more informations
+
+/*
+gpu::make_jit_constant("OUTPUT_LIMIT",              output_size),
+gpu::make_jit_constant("FILTER",                    filter_mem.argument().size),
+gpu::make_jit_constant("FILTER_ARRAY_NUM",          split),
+gpu::make_jit_constant("OUTPUT_BLOCK_WIDTH",        _kernel_data.block_width));
+gpu::make_jit_constant("OUTPUT_BLOCK_HEIGHT",       _kernel_data.block_height));
+gpu::make_jit_constant("IN_BLOCK_ARRAY_SIZE",       _kernel_data.input_block_array_size));
+gpu::make_jit_constant("IN_BLOCK_WIDTH",            _kernel_data.input_block_width));
+gpu::make_jit_constant("PREFETCH",                  _kernel_data.prefetch));
+if (_kernel_data.leftovers)
+    gpu::make_jit_constant("LEFTOVERS",             _kernel_data.leftovers));
+*/
+
+// FEATURES_THREADS_PER_BATCH defines how many threads in z-dimension are processing single batch.
+// ideally, z-dimension of value n should indicate processing of n-th output feature. however, since
+// threads are stack in groups of SUB_GROUP_SIZE, when number of ofm is not dividable by SUB_GROUP_SIZE
+// there are dummy threads added in z-dimension in count of LEFTOVERS. We need to take them into consideration
+// while calculating batch's id (see lines 86-87). Values calculated by dummy threads are discarded at line 210.
+#ifdef LEFTOVERS
+#define FEATURES_THREADS_PER_BATCH (FILTER_OFM_NUM + LEFTOVERS)
+#else
+#define FEATURES_THREADS_PER_BATCH (FILTER_OFM_NUM)
+#endif
+
+__attribute__((intel_reqd_sub_group_size(SUB_GROUP_SIZE)))
+__attribute__((reqd_work_group_size(1, 1, SUB_GROUP_SIZE)))
+KERNEL(convolution_gpu_bfyx_os_iyx_osv16)(
+    const __global UNIT_TYPE* input,
+    __global UNIT_TYPE* output,
+    const __global UNIT_TYPE* weights,
+#if BIAS_TERM
+    const __global UNIT_TYPE* bias,
+#endif   
+    uint split_idx,
+    const __global UNIT_TYPE* eltw_input) // TODO: removing this parameter cause a performance degradation... :)
+{
+    const uint oc  = (uint)get_global_id(0) * OUTPUT_BLOCK_WIDTH;  // oc = Output Column
+    const uint or  = (uint)get_global_id(1) * OUTPUT_BLOCK_HEIGHT; // or = Output Row
+    const uint fm  = get_global_id(2);                    // fm = Feature Map = od = Output Depth
+    const uint lid = get_sub_group_local_id();
+
+    uint batch_idx = fm / FEATURES_THREADS_PER_BATCH;
+    uint feature_idx = fm % FEATURES_THREADS_PER_BATCH;
+    uint fmg = feature_idx / SUB_GROUP_SIZE;
+
+    UNIT_TYPE in[IN_BLOCK_ARRAY_SIZE];
+    UNIT_TYPE out[OUTPUT_BLOCK_WIDTH * OUTPUT_BLOCK_HEIGHT];
+    UNIT_TYPE w[PREFETCH];
+    uint in_addr;
+    uint weight_addr = fmg * FILTER_IFM_NUM * FILTER_SIZE_X * FILTER_SIZE_Y * SUB_GROUP_SIZE + lid;
+
+    for(int i = 0; i < (OUTPUT_BLOCK_WIDTH * OUTPUT_BLOCK_HEIGHT); i++) {
+        out[i] = UNIT_VAL_ZERO;
+    }
+
+    uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * FILTER_IFM_NUM;
+    in_addr = batch_idx * INPUT0_BATCH_PITCH;
+    in_addr += in_split_offset + INPUT0_OFFSET_WITH_PADDING + or * STRIDE_SIZE_Y * INPUT0_Y_PITCH + oc * STRIDE_SIZE_X + lid;
+
+    for(int kd = 0; kd < FILTER_IFM_NUM; kd++)  // _ID = 3, RGB
+    {
+        uint tmp_in_addr = in_addr;
+
+#if IN_BLOCK_WIDTH % SUB_GROUP_SIZE == 0
+        __attribute__((opencl_unroll_hint(IN_BLOCK_ARRAY_SIZE)))
+        for(uint in_block_pos = 0; in_block_pos < IN_BLOCK_ARRAY_SIZE * SUB_GROUP_SIZE; in_block_pos += SUB_GROUP_SIZE) {
+            // Horizontal position in input block after read.
+            const uint in_block_next_x_pos = in_block_pos % IN_BLOCK_WIDTH + SUB_GROUP_SIZE;
+
+            in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + in_block_pos % IN_BLOCK_WIDTH];
+
+            // If we have row break, move to the next row.
+            if (in_block_next_x_pos == IN_BLOCK_WIDTH)
+                tmp_in_addr += INPUT0_Y_PITCH;
+        }
+#elif (2 * IN_BLOCK_WIDTH) % SUB_GROUP_SIZE == 0
+        __attribute__((opencl_unroll_hint(IN_BLOCK_ARRAY_SIZE)))
+        for(uint in_block_pos = 0; in_block_pos < IN_BLOCK_ARRAY_SIZE * SUB_GROUP_SIZE; in_block_pos += SUB_GROUP_SIZE) {
+            // Horizontal position in input block after read.
+            const uint in_block_next_x_pos = in_block_pos % IN_BLOCK_WIDTH + SUB_GROUP_SIZE;
+
+            if (in_block_next_x_pos <= IN_BLOCK_WIDTH) { //
+                in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + in_block_pos % IN_BLOCK_WIDTH];
+
+                // If we have row break, move to the next row.
+                if (in_block_next_x_pos == IN_BLOCK_WIDTH)
+                    tmp_in_addr += INPUT0_Y_PITCH;
+            }
+            else {
+                // TODO: Generalize this step to relax IN_BLOCK_WIDTH restrictions.
+                // Position in sub-group on which new row need to be read.
+                const uint sg_br_pos = IN_BLOCK_WIDTH - in_block_pos % IN_BLOCK_WIDTH;
+
+                if (lid < sg_br_pos)
+                    in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr + in_block_pos % IN_BLOCK_WIDTH];
+                // We have row break inside sub-group. Need to move to next line.
+                tmp_in_addr += INPUT0_Y_PITCH;
+                if (lid >= sg_br_pos)
+                    in[in_block_pos / SUB_GROUP_SIZE] = input[tmp_in_addr - sg_br_pos];
+
+                // If we have another row break, move to the next row.
+                if (in_block_next_x_pos == 2 * IN_BLOCK_WIDTH)
+                    tmp_in_addr += INPUT0_Y_PITCH;
+            }
+        }
+#else
+    #error IN_BLOCK_WIDTH must be multiple of SUB_GROUP_SIZE or half of SUB_GROUP_SIZE. Other scenarios are not currently implemented.
+#endif
+
+        //move to next filter
+        in_addr += INPUT0_FEATURE_PITCH;
+
+        for(int pf=0; pf<PREFETCH; pf++) {
+            w[pf] = weights[weight_addr]; weight_addr += SUB_GROUP_SIZE;
+        }
+
+        uint wi = 0;
+        uint kr = 0; // kr = Kernel Row
+        LOOP(FILTER_SIZE_Y, kr,  // LOOP is a macro that unrolls the loop.
+        {
+            uint kc = 0; // kc = Kernel Column
+            LOOP(FILTER_SIZE_X, kc,
+            {
+                //w = weights[weight_addr];
+                for(uint br=0; br<OUTPUT_BLOCK_HEIGHT; br++) {
+                    for(uint bc=0; bc<OUTPUT_BLOCK_WIDTH; bc++) {
+
+#if IN_BLOCK_WIDTH != SUB_GROUP_SIZE
+                        //if we fix the programming model, then we could use a nice simple 2d array: val = in[br * STRIDE_SIZE_Y + kr][bc * STRIDE_SIZE_X + kc];
+                        UNIT_TYPE val = intel_sub_group_shuffle( in[(((br * STRIDE_SIZE_Y + kr * DILATION_SIZE_Y) * IN_BLOCK_WIDTH) + (bc * STRIDE_SIZE_X + kc * DILATION_SIZE_X)) / SUB_GROUP_SIZE],
+                                                                    (((br * STRIDE_SIZE_Y + kr * DILATION_SIZE_Y) * IN_BLOCK_WIDTH) + (bc * STRIDE_SIZE_X + kc * DILATION_SIZE_X)) % SUB_GROUP_SIZE);
+#else
+                        UNIT_TYPE val = intel_sub_group_shuffle( in[br * STRIDE_SIZE_Y + kr * DILATION_SIZE_Y], bc * STRIDE_SIZE_X + kc * DILATION_SIZE_X);
+#endif
+
+                        out[br * OUTPUT_BLOCK_WIDTH + bc] = mad(w[wi % PREFETCH], val, out[br * OUTPUT_BLOCK_WIDTH + bc]);
+                    }
+                }
+                w[wi % PREFETCH] = weights[weight_addr];
+                weight_addr += SUB_GROUP_SIZE; // weights must be stored in just the right SIMD swizzled format for this to work, see host code for details.
+                wi++;
+            });
+        });
+        // addr went beyond due to prefetch so move it back to correct location.
+        weight_addr -= PREFETCH * SUB_GROUP_SIZE;
+    }
+
+    uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * FILTER_OFM_NUM;
+    uint out_addr = OUTPUT_OFFSET;
+    out_addr += batch_idx * OUTPUT_BATCH_PITCH;
+    out_addr += out_split_offset + feature_idx * OUTPUT_FEATURE_PITCH; // out_addr indices into start of 16 feature maps.
+    out_addr += or * OUTPUT_Y_PITCH + oc;  // offset for the 4x3 block that this workitem is working on;
+
+#if BIAS_TERM
+    for(uint r = 0; r < OUTPUT_BLOCK_HEIGHT; r++) {
+        for(uint c = 0; c < OUTPUT_BLOCK_WIDTH; c++) {
+#if BIAS_PER_OUTPUT
+            const unsigned bias_index = feature_idx*OUTPUT_SIZE_X*OUTPUT_SIZE_Y + or*OUTPUT_SIZE_X + oc;
+#else
+            const unsigned bias_index = feature_idx;
+#endif
+            out[r * OUTPUT_BLOCK_WIDTH + c] += bias[bias_index];
+        }
+    }
+#endif
+
+
+    for(uint r = 0; r < OUTPUT_BLOCK_HEIGHT; r++) {
+        for(uint c = 0; c < OUTPUT_BLOCK_WIDTH; c++) {
+            out[r * OUTPUT_BLOCK_WIDTH + c] = ACTIVATION(out[r * OUTPUT_BLOCK_WIDTH + c], NL_M, NL_N);
+        }
+    }
+
+#if IN_OUT_OPT != 1
+    // eltwise part
+    uint eltw_addr = INPUT1_OFFSET;
+    eltw_addr += batch_idx * INPUT1_BATCH_PITCH;
+    eltw_addr += out_split_offset + feature_idx * INPUT1_FEATURE_PITCH; // eltw_addr indices into start of 16 feature maps.
+    eltw_addr += (or * ELTW_STRIDE_Y) * INPUT1_Y_PITCH + (oc * ELTW_STRIDE_X);  // offset for the 4x3 block that this workitem is working on;
+
+    for(uint r = 0; r < OUTPUT_BLOCK_HEIGHT; r++) {
+        for(uint c = 0; c < OUTPUT_BLOCK_WIDTH; c++) {
+            out[r * OUTPUT_BLOCK_WIDTH + c] += eltw_input[eltw_addr + r * INPUT1_Y_PITCH * ELTW_STRIDE_Y + c * ELTW_STRIDE_X];
+            out[r * OUTPUT_BLOCK_WIDTH + c] = ACTIVATION_ELTW(out[r * OUTPUT_BLOCK_WIDTH + c], NL_M_ELTW, NL_N_ELTW);
+        }
+    }
+    // end of eltwise part
+#endif
+
+#ifdef LEFTOVERS
+    if (feature_idx < OUTPUT_FEATURE_NUM)
+#endif
+    for(uint r = 0; r < OUTPUT_BLOCK_HEIGHT; r++) {
+        if(!(or + r >= OUTPUT_SIZE_Y))
+        {
+            for(uint c = 0; c < OUTPUT_BLOCK_WIDTH; c++) {
+                // this does a scattered write to 16 different feature maps, so that data within one map is contiguous, thus ready for input to next layer.
+                if(!(oc + c >= OUTPUT_SIZE_X))
+                {
+#if IN_OUT_OPT == 1
+                    out[r * OUTPUT_BLOCK_WIDTH + c] += output[out_addr + r * OUTPUT_Y_PITCH + c];
+                    out[r * OUTPUT_BLOCK_WIDTH + c] = ACTIVATION_ELTW(out[r * OUTPUT_BLOCK_WIDTH + c], NL_M_ELTW, NL_N_ELTW);
+#endif
+                    output[out_addr + r * OUTPUT_Y_PITCH + c] = out[r * OUTPUT_BLOCK_WIDTH + c];
+                }
+            }
+        }
+    }
+}
+
+#undef FEATURES_THREADS_PER_BATCH
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_gemm_fp32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_gemm_fp32.cl
new file mode 100644
index 000000000..022431d88
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_gemm_fp32.cl
@@ -0,0 +1,602 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "include/include_all.cl"
+#include "include/sub_group.cl"
+#include "include/fetch.cl"
+
+#define TILE_M          2
+#define TILE_K          FILTER_SIZE_X
+#define TILE_N          32
+
+inline uint FUNC(calculate_eltw_input_offset_based_on_output_offset)(uint out_offset, uint strideX, uint strideY)
+{
+// bfyx
+    uint tmp_idx = out_offset;
+    uint x_idx = tmp_idx % OUTPUT_SIZE_X;
+    x_idx *= strideX;
+    tmp_idx /= OUTPUT_SIZE_X;
+    uint y_idx = tmp_idx % OUTPUT_SIZE_Y;
+    y_idx *= strideY;
+    tmp_idx /= OUTPUT_SIZE_Y;
+    uint f_idx = tmp_idx % OUTPUT_FEATURE_NUM;
+    tmp_idx /= OUTPUT_FEATURE_NUM;
+    uint b_idx = tmp_idx % OUTPUT_BATCH_NUM;
+
+    return GET_DATA_INDEX(INPUT1, b_idx, f_idx, y_idx, x_idx);
+}
+
+__attribute__((intel_reqd_sub_group_size(8)))
+KERNEL(fused_conv_eltwise_gemm_fp32)(
+    const __global float *src0,
+    __global float *dst,
+    const __global float *src1,
+#if BIAS_TERM
+    const __global float *bias,
+#endif
+    uint split_idx,
+    const __global float* src3)
+{
+#include "include/vec_typedefs.cl"
+
+    const unsigned group_x = get_group_id(0);
+    const unsigned group_y = get_group_id(1);
+    const unsigned global_x = get_global_id(0);
+    const unsigned global_y = get_global_id(1);
+    const unsigned global_z = get_global_id(2);
+
+    unsigned interleaved_y;
+    unsigned kernel_y;
+    unsigned kernel_idx;
+
+    // Result ctile (*dst) is M rows x N columns
+    // LWG size is 1x8.  Thus each thread calculates 8*M rows x N cols of ctile.
+    float8  blockC00 = 0.f;
+    float8  blockC10 = 0.f;
+    float8  blockC20 = 0.f;
+    float8  blockC30 = 0.f;
+    float8  blockC01 = 0.f;
+    float8  blockC11 = 0.f;
+    float8  blockC21 = 0.f;
+    float8  blockC31 = 0.f;
+
+    const uint in_split_offset = split_idx * INPUT0_FEATURE_PITCH * INPUT0_FEATURE_NUM;
+    // Src0 (patch input) is directly used as atile.
+    // Each work item points to the start of a different patch.
+    // atile is M rows x K columns.
+    const uint src0_read_offset0_const = INPUT0_OFFSET_WITH_PADDING + in_split_offset
+     + INPUT0_BATCH_PITCH * global_z                                                         // batch offset
+     + ( ( ( global_y * TILE_M + 0 ) / OUTPUT_SIZE_X ) * STRIDE_SIZE_Y * INPUT0_Y_PITCH )    // y offset
+     + ( ( ( global_y * TILE_M + 0 ) % OUTPUT_SIZE_X ) * STRIDE_SIZE_X );                    // x offset
+    const uint src0_read_offset1_const = INPUT0_OFFSET_WITH_PADDING + in_split_offset
+     + INPUT0_BATCH_PITCH * global_z                                                 // batch offset
+     + ( ( ( global_y * TILE_M + 1 ) / OUTPUT_SIZE_X ) * STRIDE_SIZE_Y * INPUT0_Y_PITCH )    // y offset
+     + ( ( ( global_y * TILE_M + 1 ) % OUTPUT_SIZE_X ) * STRIDE_SIZE_X );                    // x offset
+
+    // Src1 (filter) is directly used as btile.
+    // It starts at the top of src1 and walks down.
+    // btile is K rows x N columns.
+    uint src0_read_offset0 = src0_read_offset0_const;
+    uint src0_read_offset1 = src0_read_offset1_const;
+    uint src1_read_offset = ( global_x * TILE_N * 2);
+
+#define DOT_PRODUCT_8( _result, _rowA, colB )    \
+    {   \
+        _result.s0 = mad( _rowA, sub_group_broadcast( colB,  0 ), _result.s0 );  \
+        _result.s1 = mad( _rowA, sub_group_broadcast( colB,  1 ), _result.s1 );  \
+        _result.s2 = mad( _rowA, sub_group_broadcast( colB,  2 ), _result.s2 );  \
+        _result.s3 = mad( _rowA, sub_group_broadcast( colB,  3 ), _result.s3 );  \
+        _result.s4 = mad( _rowA, sub_group_broadcast( colB,  4 ), _result.s4 );  \
+        _result.s5 = mad( _rowA, sub_group_broadcast( colB,  5 ), _result.s5 );  \
+        _result.s6 = mad( _rowA, sub_group_broadcast( colB,  6 ), _result.s6 );  \
+        _result.s7 = mad( _rowA, sub_group_broadcast( colB,  7 ), _result.s7 );  \
+    }
+
+    // Walk DOWN src0 (patch 0, 1, 2, ...) and DOWN src1.
+    // Inner loop loads and FMADs one row (FILTER_SIZE_X) of each input patch
+    // and FILTER_SIZE_X/2 rows of interleaved filter.
+    unsigned patch_depth = 0;
+    do
+    {
+        unsigned patch_row = 0;
+        do
+        {
+            // Load atile and btile.
+            // Kernel data is partially interleaved.  Every 2 rows are interleaved at float8 granularity.
+            // The exception is that if FILTER_SIZE_X is odd the last row is not interleaved.  The non
+            // interleaved row is padded with zero to ensure same size as interleaved rows. This
+            // interleaving is done to ensure 0% GDR bank conflicts.  For example, this is how the
+            // kernel data would be arranged before/after interleaving for FILTER_SIZE_X=3.
+            // (0, 0) (8, 0) (16, 0) (24, 0) ...       (0, 0) (0, 1) (8, 0) (0, 1) (16, 0) (0, 1) (24, 0) ..
+            // (0, 1) (8, 1) (16, 1) (24, 1) ... =>    (0, 2) (8, 2) (16, 2) (24, 2) ...
+            // (0, 2) (8, 2) (16, 2) (24, 2) ...       ...
+            // ...
+            const bool kernel_width_is_odd = FILTER_SIZE_X % 2 == 1;
+
+            float blockA00[FILTER_SIZE_X];
+            float blockA01[FILTER_SIZE_X];
+            
+            // in case the data is not aligned to sizeof(T)*FILTER_SIZE_X we need to use vload or set the data in a loop
+            {
+                unsigned i = 0;
+                LOOP(FILTER_SIZE_X, i, 
+                {
+#if LEFTOVERS == 1
+                    if(src0_read_offset0_const + (FILTER_SIZE_Y - 1) * INPUT0_Y_PITCH + (INPUT0_FEATURE_NUM - 1) * (INPUT0_FEATURE_PITCH - ( FILTER_SIZE_Y * INPUT0_Y_PITCH )) >= INPUT0_BATCH_NUM * INPUT0_BATCH_PITCH)
+                    {
+                        if(src0_read_offset0 + i < INPUT0_BATCH_NUM * INPUT0_BATCH_PITCH)
+                            blockA00[i] = src0[src0_read_offset0 + i];
+                    }
+                    else
+#endif
+                        blockA00[i] = src0[src0_read_offset0 + i];
+
+#if LEFTOVERS == 1
+                    if(src0_read_offset1_const + (FILTER_SIZE_Y - 1) * INPUT0_Y_PITCH + (INPUT0_FEATURE_NUM - 1) * (INPUT0_FEATURE_PITCH - ( FILTER_SIZE_Y * INPUT0_Y_PITCH )) >= INPUT0_BATCH_NUM * INPUT0_BATCH_PITCH)
+                    {
+                        if(src0_read_offset1 + i < INPUT0_BATCH_NUM * INPUT0_BATCH_PITCH)
+                            blockA01[i] = src0[src0_read_offset1 + i];
+                    }
+                    else
+#endif
+                        blockA01[i] = src0[src0_read_offset1 + i];
+                } )
+            }
+
+            float*  pblockA00 = (float*)(&blockA00);
+            float*  pblockA01 = (float*)(&blockA01);
+
+            src0_read_offset0 += INPUT0_Y_PITCH;
+            src0_read_offset1 += INPUT0_Y_PITCH;
+
+
+            float blockB00[FILTER_SIZE_X*4];
+            float8* p8BlockB00 = (float8*)blockB00;
+            float4* p4BlockB00 = (float4*)blockB00;
+            float*  pBlockB00 =  (float* )blockB00;
+
+            interleaved_y = 0;
+            LOOP(FILTER_SIZE_X_DIV2, interleaved_y,
+            {
+                p8BlockB00[interleaved_y] = as_float8( intel_sub_group_block_read8( (const __global uint*)src1 + src1_read_offset ) );
+                src1_read_offset += ALIGNED_OFM * 2;
+            } )
+            if ( kernel_width_is_odd )
+            {
+                p4BlockB00[FILTER_SIZE_X - 1] = as_float4( intel_sub_group_block_read4( (const __global uint*)src1 + src1_read_offset ) );
+                src1_read_offset += ALIGNED_OFM * 2;
+            }
+
+            // Perform MADs
+            kernel_idx = 0;
+            interleaved_y = 0;
+            LOOP(FILTER_SIZE_X_DIV2, interleaved_y,
+            {
+                kernel_y = interleaved_y * 2;
+                DOT_PRODUCT_8( blockC00, pblockA00[kernel_y    ], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC01, pblockA01[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_8( blockC00, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC01, pblockA01[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_8( blockC10, pblockA00[kernel_y    ], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC11, pblockA01[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_8( blockC10, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC11, pblockA01[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_8( blockC20, pblockA00[kernel_y    ], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC21, pblockA01[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_8( blockC20, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC21, pblockA01[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_8( blockC30, pblockA00[kernel_y    ], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC31, pblockA01[kernel_y    ], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_8( blockC30, pblockA00[kernel_y + 1], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC31, pblockA01[kernel_y + 1], pBlockB00[kernel_idx] ); kernel_idx++;
+            } )
+            if ( kernel_width_is_odd )
+            {
+                kernel_y = interleaved_y * 2;
+                DOT_PRODUCT_8( blockC00, pblockA00[kernel_y], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC01, pblockA01[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_8( blockC10, pblockA00[kernel_y], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC11, pblockA01[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_8( blockC20, pblockA00[kernel_y], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC21, pblockA01[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+                DOT_PRODUCT_8( blockC30, pblockA00[kernel_y], pBlockB00[kernel_idx] );
+                DOT_PRODUCT_8( blockC31, pblockA01[kernel_y], pBlockB00[kernel_idx] ); kernel_idx++;
+            }
+        }
+
+        //while( ++patch_row < 1 ); //debug
+        while( ++patch_row < FILTER_SIZE_Y );
+
+        src0_read_offset0 += INPUT0_FEATURE_PITCH - ( FILTER_SIZE_Y * INPUT0_Y_PITCH ); // reset to start of next slice of patch
+        src0_read_offset1 += INPUT0_FEATURE_PITCH - ( FILTER_SIZE_Y * INPUT0_Y_PITCH ); // reset to start of next slice of patch
+    }
+    //while ( ++patch_depth < 1 );  //debug
+    while ( ++patch_depth < INPUT0_FEATURE_NUM );
+
+    const uint out_split_offset = split_idx * OUTPUT_FEATURE_PITCH * OUTPUT_FEATURE_NUM;
+    // Dst resembles a cube of width x height x (output channel * batches).  Each tile writes:
+    // (SIMD * TILE_M) x 1 x TILE_N.  Partial writes most likely generated if padding used.
+    __global float *out0 = dst + OUTPUT_OFFSET + out_split_offset
+     + global_z * OUTPUT_BATCH_PITCH                                                   // batch offset
+     + ( group_x * TILE_N ) * OUTPUT_FEATURE_PITCH                                     // channel offset
+     + ( ( global_y * TILE_M ) / OUTPUT_SIZE_X ) * OUTPUT_Y_PITCH                      // y offset
+     + ( ( global_y * TILE_M ) % OUTPUT_SIZE_X );                                      // x offset
+    __global float *out1 = dst + OUTPUT_OFFSET + out_split_offset
+     + global_z * OUTPUT_BATCH_PITCH                                                   // batch offset
+     + ( group_x * TILE_N ) * OUTPUT_FEATURE_PITCH                                     // channel offset
+     + ( ( global_y * TILE_M + 1 ) / OUTPUT_SIZE_X ) * OUTPUT_Y_PITCH                  // y offset
+     + ( ( global_y * TILE_M + 1 ) % OUTPUT_SIZE_X );                                  // x offset
+
+    #if BIAS_TERM
+    __global float8* biasPtr = (__global float8*) (bias + group_x * TILE_N);
+    #endif
+    
+    uint out0_offset = OUTPUT_OFFSET + out_split_offset
+     + global_z * OUTPUT_BATCH_PITCH                                                   // batch offset
+     + ( group_x * TILE_N ) * OUTPUT_FEATURE_PITCH                                     // channel offset
+     + ( ( global_y * TILE_M ) / OUTPUT_SIZE_X ) * OUTPUT_Y_PITCH                      // y offset
+     + ( ( global_y * TILE_M ) % OUTPUT_SIZE_X );                                      // x offset
+
+     uint out1_offset = OUTPUT_OFFSET + out_split_offset
+     + global_z * OUTPUT_BATCH_PITCH                                                   // batch offset
+     + ( group_x * TILE_N ) * OUTPUT_FEATURE_PITCH                                     // channel offset
+     + ( ( global_y * TILE_M + 1 ) / OUTPUT_SIZE_X ) * OUTPUT_Y_PITCH                  // y offset
+     + ( ( global_y * TILE_M + 1 ) % OUTPUT_SIZE_X ); 
+
+    //-----------------------------------------------------------------------------------------------//
+    // OUTPUT PHASE
+    //-----------------------------------------------------------------------------------------------//
+    if( global_y * TILE_M < OUTPUT_SIZE_X * OUTPUT_SIZE_Y )
+    {
+        if ( ( OUTPUT_FEATURE_NUM % TILE_N ) == 0 )
+        {
+            #if BIAS_TERM
+            blockC00 += *biasPtr;
+            blockC10 += *(biasPtr + 1);
+            blockC20 += *(biasPtr + 2);
+            blockC30 += *(biasPtr + 3);
+            #endif
+
+            blockC00 = ACTIVATION(blockC00, NL_M, NL_N);
+            blockC10 = ACTIVATION(blockC10, NL_M, NL_N);
+            blockC20 = ACTIVATION(blockC20, NL_M, NL_N);
+            blockC30 = ACTIVATION(blockC30, NL_M, NL_N);
+
+            // eltwise
+            uint src3_offset = FUNC_CALL(calculate_eltw_input_offset_based_on_output_offset)(out0_offset, ELTW_STRIDE_X,ELTW_STRIDE_Y);
+            for(uint i = 0; i < 8; i++)
+            {
+                blockC00[i] += src3[src3_offset + (i + 0 )* INPUT1_FEATURE_PITCH];
+                blockC10[i] += src3[src3_offset + (i + 8 )* INPUT1_FEATURE_PITCH];
+                blockC20[i] += src3[src3_offset + (i + 16)* INPUT1_FEATURE_PITCH];
+                blockC30[i] += src3[src3_offset + (i + 24)* INPUT1_FEATURE_PITCH];
+            }
+
+            blockC00 = ACTIVATION_ELTW(blockC00, NL_M_ELTW, NL_N_ELTW);
+            blockC10 = ACTIVATION_ELTW(blockC10, NL_M_ELTW, NL_N_ELTW);
+            blockC20 = ACTIVATION_ELTW(blockC20, NL_M_ELTW, NL_N_ELTW);
+            blockC30 = ACTIVATION_ELTW(blockC30, NL_M_ELTW, NL_N_ELTW);
+            // end eltwise
+
+            for( unsigned i = 0; i < 8; i++ )
+            {
+                out0[( 0+i) * OUTPUT_FEATURE_PITCH] = blockC00[i];
+                out0[( 8+i) * OUTPUT_FEATURE_PITCH] = blockC10[i];
+                out0[(16+i) * OUTPUT_FEATURE_PITCH] = blockC20[i];
+                out0[(24+i) * OUTPUT_FEATURE_PITCH] = blockC30[i];
+            }
+        }
+        else
+        {
+            if ( ( global_x + 1 ) < get_global_size(0) )
+            {
+                #if BIAS_TERM
+                blockC00 += *biasPtr;
+                blockC10 += *(biasPtr + 1);
+                blockC20 += *(biasPtr + 2);
+                blockC30 += *(biasPtr + 3);
+                #endif
+
+                blockC00 = ACTIVATION(blockC00, NL_M, NL_N);
+                blockC10 = ACTIVATION(blockC10, NL_M, NL_N);
+                blockC20 = ACTIVATION(blockC20, NL_M, NL_N);
+                blockC30 = ACTIVATION(blockC30, NL_M, NL_N);
+
+                // eltwise
+                uint src3_offset = FUNC_CALL(calculate_eltw_input_offset_based_on_output_offset)(out0_offset, ELTW_STRIDE_X,ELTW_STRIDE_Y);
+                for(uint i = 0; i < 8; i++)
+                {
+                    blockC00[i] += src3[src3_offset + (i + 0 )* INPUT1_FEATURE_PITCH];
+                    blockC10[i] += src3[src3_offset + (i + 8 )* INPUT1_FEATURE_PITCH];
+                    blockC20[i] += src3[src3_offset + (i + 16)* INPUT1_FEATURE_PITCH];
+                    blockC30[i] += src3[src3_offset + (i + 24)* INPUT1_FEATURE_PITCH];
+                }
+    
+                blockC00 = ACTIVATION_ELTW(blockC00, NL_M_ELTW, NL_N_ELTW);
+                blockC10 = ACTIVATION_ELTW(blockC10, NL_M_ELTW, NL_N_ELTW);
+                blockC20 = ACTIVATION_ELTW(blockC20, NL_M_ELTW, NL_N_ELTW);
+                blockC30 = ACTIVATION_ELTW(blockC30, NL_M_ELTW, NL_N_ELTW);
+                // end eltwise
+
+                for ( unsigned i = 0; i < 8; i++ )
+                {
+                    out0[( 0+i) * OUTPUT_FEATURE_PITCH] = blockC00[i];
+                    out0[( 8+i) * OUTPUT_FEATURE_PITCH] = blockC10[i];
+                    out0[(16+i) * OUTPUT_FEATURE_PITCH] = blockC20[i];
+                    out0[(24+i) * OUTPUT_FEATURE_PITCH] = blockC30[i];
+                }
+            }
+            else
+            {
+                if ( ( OUTPUT_FEATURE_NUM % TILE_N ) >= 24 )
+                {
+                    #if BIAS_TERM
+                    blockC00 += *biasPtr;
+                    blockC10 += *(biasPtr + 1);
+                    blockC20 += *(biasPtr + 2);
+                    if (( OUTPUT_FEATURE_NUM % TILE_N) > 24 ) blockC30 += *(biasPtr + 3);
+                    #endif
+
+                    blockC00 = ACTIVATION(blockC00, NL_M, NL_N);
+                    blockC10 = ACTIVATION(blockC10, NL_M, NL_N);
+                    blockC20 = ACTIVATION(blockC20, NL_M, NL_N);
+
+                    // remaining output channels
+                    for (unsigned i = 0; i < OUTPUT_FEATURE_NUM % 8; i++)
+                    {
+                        blockC30[i] = ACTIVATION(blockC30[i], NL_M, NL_N);
+                    }
+
+                    // eltwise
+                    uint src3_offset = FUNC_CALL(calculate_eltw_input_offset_based_on_output_offset)(out0_offset, ELTW_STRIDE_X,ELTW_STRIDE_Y);
+                    for(uint i = 0; i < 8; i++)
+                    {
+                        blockC00[i] += src3[src3_offset + (i + 0 )* INPUT1_FEATURE_PITCH];
+                        blockC10[i] += src3[src3_offset + (i + 8 )* INPUT1_FEATURE_PITCH];
+                        blockC20[i] += src3[src3_offset + (i + 16)* INPUT1_FEATURE_PITCH];
+                    }
+
+                    // remaining output channels
+                    for (unsigned i = 0; i < OUTPUT_FEATURE_NUM % 8; i++)
+                    {
+                        blockC30[i] += src3[src3_offset + (i + 24 )* INPUT1_FEATURE_PITCH];
+                        blockC30[i] = ACTIVATION_ELTW(blockC30[i], NL_M_ELTW, NL_N_ELTW);
+                    }
+        
+                    blockC00 = ACTIVATION_ELTW(blockC00, NL_M_ELTW, NL_N_ELTW);
+                    blockC10 = ACTIVATION_ELTW(blockC10, NL_M_ELTW, NL_N_ELTW);
+                    blockC20 = ACTIVATION_ELTW(blockC20, NL_M_ELTW, NL_N_ELTW);
+                    // end eltwise
+
+                    for (unsigned i = 0; i < 8; i++)
+                    {
+                        out0[( 0+i) * OUTPUT_FEATURE_PITCH] = blockC00[i];
+                        out0[( 8+i) * OUTPUT_FEATURE_PITCH] = blockC10[i];
+                        out0[(16+i) * OUTPUT_FEATURE_PITCH] = blockC20[i];
+                    }
+
+                    // remaining output channels
+                    for (unsigned i = 0; i < OUTPUT_FEATURE_NUM % 8; i++)
+                    {
+                        out0[(24+i) * OUTPUT_FEATURE_PITCH] = blockC30[i];
+                    }
+                }
+                else if ( ( OUTPUT_FEATURE_NUM % TILE_N ) >= 16 )
+                {
+                    #if BIAS_TERM
+                    blockC00 += *biasPtr;
+                    blockC10 += *(biasPtr + 1);
+                    if (( OUTPUT_FEATURE_NUM % TILE_N) > 16 )
+                        blockC20 += *(biasPtr + 2);
+                    #endif
+
+                    blockC00 = ACTIVATION(blockC00, NL_M, NL_N);
+                    blockC10 = ACTIVATION(blockC10, NL_M, NL_N);
+
+                    for (unsigned i = 0; i < 8; i++)
+                    {
+                        out0[( 0+i) * OUTPUT_FEATURE_PITCH] = blockC00[i];
+                        out0[( 8+i) * OUTPUT_FEATURE_PITCH] = blockC10[i];
+                    }
+
+                    for (unsigned i = 0; i < OUTPUT_FEATURE_NUM % 8; i++)
+                    {
+                        out0[(16+i) * OUTPUT_FEATURE_PITCH] = ACTIVATION(blockC20[i], NL_M, NL_N);
+
+                    }
+                }
+                else if ( ( OUTPUT_FEATURE_NUM % TILE_N ) >= 8 )
+                {
+                    #if BIAS_TERM
+                    blockC00 += *biasPtr;
+                    if (( OUTPUT_FEATURE_NUM % TILE_N) > 8 )
+                        blockC10 += *(biasPtr + 1);
+                    #endif
+
+                    blockC00 = ACTIVATION(blockC00, NL_M, NL_N);
+
+                    for (unsigned i = 0; i < 8; i++)
+                    {
+                        out0[( 0+i) * OUTPUT_FEATURE_PITCH] = blockC00[i];
+                    }
+
+                    for (unsigned i = 0; i < OUTPUT_FEATURE_NUM % 8; i++)
+                    {
+                        out0[(8+i) * OUTPUT_FEATURE_PITCH] = ACTIVATION(blockC10[i], NL_M, NL_N);
+                    }
+                }
+                else
+                {
+                    #if BIAS_TERM
+                    blockC00 += *biasPtr;
+                    #endif
+                    for (unsigned i = 0; i < OUTPUT_FEATURE_NUM % 8; i++)
+                    {
+                        out0[( 0+i) * OUTPUT_FEATURE_PITCH] = ACTIVATION(blockC00[i], NL_M, NL_N);
+                    }
+                }
+            }
+        }
+    }
+
+    if ((global_y * TILE_M + 1) < OUTPUT_SIZE_X * OUTPUT_SIZE_Y )
+    {
+        if ( ( OUTPUT_FEATURE_NUM % TILE_N ) == 0 )
+        {
+            #if BIAS_TERM
+            blockC01 += *biasPtr;
+            blockC11 += *(biasPtr + 1);
+            blockC21 += *(biasPtr + 2);
+            blockC31 += *(biasPtr + 3);
+            #endif
+
+            blockC01 = ACTIVATION(blockC01, NL_M, NL_N);
+            blockC11 = ACTIVATION(blockC11, NL_M, NL_N);
+            blockC21 = ACTIVATION(blockC21, NL_M, NL_N);
+            blockC31 = ACTIVATION(blockC31, NL_M, NL_N);
+
+            // eltwise
+            uint src3_offset = FUNC_CALL(calculate_eltw_input_offset_based_on_output_offset)(out1_offset, ELTW_STRIDE_X,ELTW_STRIDE_Y);
+            for(uint i = 0; i < 8; i++)
+            {
+                blockC01[i] += src3[src3_offset + (i + 0 )* INPUT1_FEATURE_PITCH];
+                blockC11[i] += src3[src3_offset + (i + 8 )* INPUT1_FEATURE_PITCH];
+                blockC21[i] += src3[src3_offset + (i + 16)* INPUT1_FEATURE_PITCH];
+                blockC31[i] += src3[src3_offset + (i + 24)* INPUT1_FEATURE_PITCH];
+            }
+
+            blockC01 = ACTIVATION_ELTW(blockC01, NL_M_ELTW, NL_N_ELTW);
+            blockC11 = ACTIVATION_ELTW(blockC11, NL_M_ELTW, NL_N_ELTW);
+            blockC21 = ACTIVATION_ELTW(blockC21, NL_M_ELTW, NL_N_ELTW);
+            blockC31 = ACTIVATION_ELTW(blockC31, NL_M_ELTW, NL_N_ELTW);
+            // end eltwise
+
+            for( unsigned i = 0; i < 8; i++ )
+            {
+                out1[( 0+i) * OUTPUT_FEATURE_PITCH] = blockC01[i];
+                out1[( 8+i) * OUTPUT_FEATURE_PITCH] = blockC11[i];
+                out1[(16+i) * OUTPUT_FEATURE_PITCH] = blockC21[i];
+                out1[(24+i) * OUTPUT_FEATURE_PITCH] = blockC31[i];
+            }
+        }
+        else
+        {
+            if ( ( global_x + 1 ) < get_global_size(0) )
+            {
+                #if BIAS_TERM
+                blockC01 += *biasPtr;
+                blockC11 += *(biasPtr + 1);
+                blockC21 += *(biasPtr + 2);
+                blockC31 += *(biasPtr + 3);
+                #endif
+
+                blockC01 = ACTIVATION(blockC01, NL_M, NL_N);
+                blockC11 = ACTIVATION(blockC11, NL_M, NL_N);
+                blockC21 = ACTIVATION(blockC21, NL_M, NL_N);
+                blockC31 = ACTIVATION(blockC31, NL_M, NL_N);
+
+                for ( unsigned i = 0; i < 8; i++ )
+                {
+                    out1[( 0+i) * OUTPUT_FEATURE_PITCH] = blockC01[i];
+                    out1[( 8+i) * OUTPUT_FEATURE_PITCH] = blockC11[i];
+                    out1[(16+i) * OUTPUT_FEATURE_PITCH] = blockC21[i];
+                    out1[(24+i) * OUTPUT_FEATURE_PITCH] = blockC31[i];
+                }
+            }
+            else
+            {
+                if ( ( OUTPUT_FEATURE_NUM % TILE_N ) >= 24 )
+                {
+                    #if BIAS_TERM
+                    blockC01 += *biasPtr;
+                    blockC11 += *(biasPtr + 1);
+                    blockC21 += *(biasPtr + 2);
+                    if ( ( OUTPUT_FEATURE_NUM % TILE_N ) > 24 ) blockC31 += *(biasPtr + 3);
+                    #endif
+
+                    blockC01 = ACTIVATION(blockC01, NL_M, NL_N);
+                    blockC11 = ACTIVATION(blockC11, NL_M, NL_N);
+                    blockC21 = ACTIVATION(blockC21, NL_M, NL_N);
+
+                    for (unsigned i = 0; i < 8; i++)
+                    {
+                        out1[( 0+i) * OUTPUT_FEATURE_PITCH] = blockC01[i];
+                        out1[( 8+i) * OUTPUT_FEATURE_PITCH] = blockC11[i];
+                        out1[(16+i) * OUTPUT_FEATURE_PITCH] = blockC21[i];
+                    }
+
+                    // Remaining channels
+                    for (unsigned i = 0; i < OUTPUT_FEATURE_NUM % 8; i++)
+                    {
+                        out1[(24+i) * OUTPUT_FEATURE_PITCH] = ACTIVATION(blockC31[i], NL_M, NL_N);
+                    }
+                }
+                else if ( ( OUTPUT_FEATURE_NUM % TILE_N ) >= 16 )
+                {
+                    #if BIAS_TERM
+                    blockC01 += *biasPtr;
+                    blockC11 += *(biasPtr + 1);
+                    if ( ( OUTPUT_FEATURE_NUM % TILE_N ) > 16 ) blockC21 += *(biasPtr + 2);
+                    #endif
+
+                    blockC01 = ACTIVATION(blockC01, NL_M, NL_N);
+                    blockC11 = ACTIVATION(blockC11, NL_M, NL_N);
+
+                    for (unsigned i = 0; i < 8; i++)
+                    {
+                        out1[( 0+i) * OUTPUT_FEATURE_PITCH] = blockC01[i];
+                        out1[( 8+i) * OUTPUT_FEATURE_PITCH] = blockC11[i];
+                    }
+
+                    for (unsigned i = 0; i < OUTPUT_FEATURE_NUM % 8; i++)
+                    {
+                        out1[(16+i) * OUTPUT_FEATURE_PITCH] = ACTIVATION(blockC21[i], NL_M, NL_N);
+                    }
+                }
+                else if ( ( OUTPUT_FEATURE_NUM % TILE_N ) >= 8 )
+                {
+                    #if BIAS_TERM
+                    blockC01 += *biasPtr;
+                    if ( ( OUTPUT_FEATURE_NUM % TILE_N ) > 8 ) blockC11 += *(biasPtr + 1);
+                    #endif
+
+                    blockC01 = ACTIVATION(blockC01, NL_M, NL_N);
+
+                    for (unsigned i = 0; i < 8; i++)
+                    {
+                        out1[( 0+i) * OUTPUT_FEATURE_PITCH] = blockC01[i];
+                    }
+
+                    for (unsigned i = 0; i < OUTPUT_FEATURE_NUM % 8; i++)
+                    {
+                        out1[(8+i) * OUTPUT_FEATURE_PITCH] = ACTIVATION(blockC11[i], NL_M, NL_N);
+                    }
+                }
+                else
+                {
+                    #if BIAS_TERM
+                    blockC01 += *biasPtr;
+                    #endif
+
+                    for (unsigned i = 0; i < OUTPUT_FEATURE_NUM % 8; i++)
+                    {
+                        out1[( 0+i) * OUTPUT_FEATURE_PITCH] = ACTIVATION(blockC01[i], NL_M, NL_N);
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_mmad_32x32sg_128x128wg_slm_int8.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_mmad_32x32sg_128x128wg_slm_int8.cl
new file mode 100644
index 000000000..68f3bdf09
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_mmad_32x32sg_128x128wg_slm_int8.cl
@@ -0,0 +1,509 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/mmad.cl"
+
+#define SUM_SCALE 0.11f
+#define SCALE 0.11f
+
+#ifdef LIGHTWEIGHT_QUANTIZATION
+
+#define QUANTIZATION(idx) \
+    {\
+        float4 tmp;\
+        for(uint z = 0; z < 4; z++)\
+        {\
+            tmp.s0 = (float)eltw_input_vals[z * 4 + 0] * SUM_SCALE + bias_f.s0;\
+            tmp.s1 = (float)eltw_input_vals[z * 4 + 1] * SUM_SCALE + bias_f.s1;\
+            tmp.s2 = (float)eltw_input_vals[z * 4 + 2] * SUM_SCALE + bias_f.s2;\
+            tmp.s3 = (float)eltw_input_vals[z * 4 + 3] * SUM_SCALE + bias_f.s3;\
+            \
+            regC_uchar16[z * 4 + 0] = convert_uchar_sat( (regC[0 * 4 + i][idx + z / 4]) * SCALE + tmp.s0);\
+            regC_uchar16[z * 4 + 1] = convert_uchar_sat( (regC[1 * 4 + i][idx + z / 4]) * SCALE + tmp.s1);\
+            regC_uchar16[z * 4 + 2] = convert_uchar_sat( (regC[2 * 4 + i][idx + z / 4]) * SCALE + tmp.s2);\
+            regC_uchar16[z * 4 + 3] = convert_uchar_sat( (regC[3 * 4 + i][idx + z / 4]) * SCALE + tmp.s3);\
+        }\
+    }
+
+#elif NO_QUANTIZATION
+
+#define QUANTIZATION(idx) \
+    regC_uchar16.s0 = regC[0 * 4 + i][idx];\
+    regC_uchar16.s1 = regC[1 * 4 + i][idx];\
+    regC_uchar16.s2 = regC[2 * 4 + i][idx];\
+    regC_uchar16.s3 = regC[3 * 4 + i][idx];\
+    \
+    regC_uchar16.s4 = regC[0 * 4 + i][idx+1];\
+    regC_uchar16.s5 = regC[1 * 4 + i][idx+1];\
+    regC_uchar16.s6 = regC[2 * 4 + i][idx+1];\
+    regC_uchar16.s7 = regC[3 * 4 + i][idx+1];\
+    \
+    regC_uchar16.s8 = regC[0 * 4 + i][idx+2];\
+    regC_uchar16.s9 = regC[1 * 4 + i][idx+2];\
+    regC_uchar16.sa = regC[2 * 4 + i][idx+2];\
+    regC_uchar16.sb = regC[3 * 4 + i][idx+2];\
+    \
+    regC_uchar16.sc = regC[0 * 4 + i][idx+3];\
+    regC_uchar16.sd = regC[1 * 4 + i][idx+3];\
+    regC_uchar16.se = regC[2 * 4 + i][idx+3];\
+    regC_uchar16.sf = regC[3 * 4 + i][idx+3];\
+    {\
+        int16 sum;\
+        for(uint s = 0; s <16; s++)\
+        {\
+            sum[s] = (int)as_char(regC_uchar16[s]) + (int)as_char(eltw_input_vals[s]);\
+        }\
+        regC_uchar16.s0 = convert_uchar_sat( sum.s0 );\
+        regC_uchar16.s1 = convert_uchar_sat( sum.s1 );\
+        regC_uchar16.s2 = convert_uchar_sat( sum.s2 );\
+        regC_uchar16.s3 = convert_uchar_sat( sum.s3 );\
+        \
+        regC_uchar16.s4 = convert_uchar_sat( sum.s4 );\
+        regC_uchar16.s5 = convert_uchar_sat( sum.s5 );\
+        regC_uchar16.s6 = convert_uchar_sat( sum.s6 );\
+        regC_uchar16.s7 = convert_uchar_sat( sum.s7 );\
+        \
+        regC_uchar16.s8 = convert_uchar_sat( sum.s8 );\
+        regC_uchar16.s9 = convert_uchar_sat( sum.s9 );\
+        regC_uchar16.sa = convert_uchar_sat( sum.sa );\
+        regC_uchar16.sb = convert_uchar_sat( sum.sb );\
+        \
+        regC_uchar16.sc = convert_uchar_sat( sum.sc );\
+        regC_uchar16.sd = convert_uchar_sat( sum.sd );\
+        regC_uchar16.se = convert_uchar_sat( sum.se );\
+        regC_uchar16.sf = convert_uchar_sat( sum.sf );\
+    }
+
+#else
+
+#define QUANTIZATION(idx) \
+    regC_uchar16.s0 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s1 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.s2 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.s3 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.s4 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+1]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s5 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+1]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.s6 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+1]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.s7 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+1]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.s8 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+2]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s9 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+2]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.sa = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+2]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.sb = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+2]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.sc = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+3]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.sd = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+3]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.se = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+3]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.sf = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+3]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    {\
+        int16 sum;\
+        for(uint s = 0; s <16; s++)\
+        {\
+            sum[s] = (int)as_char(regC_uchar16[s]) + (int)as_char(eltw_input_vals[s]);\
+        }\
+        regC_uchar16.s0 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s0)  * eltw_calib_f.s0)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s1 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s1)  * eltw_calib_f.s1)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s2 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s2)  * eltw_calib_f.s2)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s3 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s3)  * eltw_calib_f.s3)), NL_M_ELTW, NL_N_ELTW));\
+        \
+        regC_uchar16.s4 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s4)  * eltw_calib_f.s0)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s5 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s5)  * eltw_calib_f.s1)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s6 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s6)  * eltw_calib_f.s2)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s7 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s7)  * eltw_calib_f.s3)), NL_M_ELTW, NL_N_ELTW));\
+        \
+        regC_uchar16.s8 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s8)  * eltw_calib_f.s0)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s9 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s9)  * eltw_calib_f.s1)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.sa = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.sa)  * eltw_calib_f.s2)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.sb = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.sb)  * eltw_calib_f.s3)), NL_M_ELTW, NL_N_ELTW));\
+        \
+        regC_uchar16.sc = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.sc)  * eltw_calib_f.s0)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.sd = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.sd)  * eltw_calib_f.s1)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.se = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.se)  * eltw_calib_f.s2)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.sf = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.sf)  * eltw_calib_f.s3)), NL_M_ELTW, NL_N_ELTW));\
+    }
+#endif
+
+
+inline uint FUNC(calculate_output_offset_to_account_padding)(uint cOffset)
+{
+#if OUT_WITH_PADDING == 1
+    uint tmp_idx = cOffset;
+    uint f_val_idx = tmp_idx % 32;
+    tmp_idx /= 32;
+    uint b_val_idx = tmp_idx % 4;
+    tmp_idx /= 4;
+    uint x_idx = tmp_idx % OUTPUT_SIZE_X;
+    tmp_idx /= OUTPUT_SIZE_X;
+    uint y_idx = tmp_idx % OUTPUT_SIZE_Y;
+    tmp_idx /= OUTPUT_SIZE_Y;
+    uint b_slice_idx = tmp_idx % (OUTPUT_BATCH_NUM / 4);
+    tmp_idx /= (OUTPUT_BATCH_NUM / 4);
+    uint f_slice_idx = tmp_idx % (OUTPUT_FEATURE_NUM / 32);
+
+    uint padded_offset = f_slice_idx * OUT_F_BLOCK_PITCH;
+    padded_offset += b_slice_idx * OUT_B_BLOCK_PITCH;
+    padded_offset += y_idx * OUT_Y_PITCH;
+    padded_offset += x_idx * OUT_X_PITCH;
+    padded_offset += b_val_idx * 32;
+    padded_offset += f_val_idx;
+    padded_offset += OUT_OFFSET;
+
+    return padded_offset;
+#else
+    return cOffset;
+#endif
+}
+
+#if IN_OUT_OPT != 1
+inline uint FUNC(calculate_eltw_input_offset_based_on_output_offset_account_padding)(uint cOffset, uint strideX, uint strideY)
+{
+#if ELTW_WITH_PADDING == 1 || ELTW_STRIDE_X != 1 || ELTW_STRIDE_Y != 1
+    uint tmp_idx = cOffset;
+    uint f_val_idx = tmp_idx % 32;
+    tmp_idx /= 32;
+    uint b_val_idx = tmp_idx % 4;
+    tmp_idx /= 4;
+    uint x_idx = tmp_idx % OUTPUT_SIZE_X;
+    x_idx *= strideX;
+    tmp_idx /= OUTPUT_SIZE_X;
+    uint y_idx = tmp_idx % OUTPUT_SIZE_Y;
+    y_idx *= strideY;
+    tmp_idx /= OUTPUT_SIZE_Y;
+    uint b_slice_idx = tmp_idx % (OUTPUT_BATCH_NUM / 4);
+    tmp_idx /= (OUTPUT_BATCH_NUM / 4);
+    uint f_slice_idx = tmp_idx % (OUTPUT_FEATURE_NUM / 32);
+
+    uint padded_offset = f_slice_idx * IN2_F_BLOCK_PITCH;
+    padded_offset += b_slice_idx * IN2_B_BLOCK_PITCH;
+    padded_offset += y_idx * IN2_Y_PITCH;
+    padded_offset += x_idx * IN2_X_PITCH;
+    padded_offset += b_val_idx * 32;
+    padded_offset += f_val_idx;
+    padded_offset += IN2_OFFSET;
+
+    return padded_offset;
+#else
+    return cOffset;
+#endif
+}
+#endif
+
+inline void FUNC(mmad_32x32_int8)(  __local uint* l_tileA, const uint l_offsetTileA,
+                                    __local int8* l_tileB, const uint l_offsetTileB_col0,
+                                    const uint l_offsetTileB_col1, const uint l_offsetTileB_col2,
+                                    const uint l_offsetTileB_col3, int8* rowA, int8* colB,
+                                    int8* regC)
+{
+    // Read tile A from SLM to regA
+    uint l_offsetTileATemp = l_offsetTileA;
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        rowA[j] = as_int8(SLM_BLOCK_READ_8(&l_tileA[l_offsetTileATemp]));
+        l_offsetTileATemp += 8 * SG_SIZE;
+    }
+    // Read tile B from SLM to regB and compute mmad
+    colB[0] = l_tileB[l_offsetTileB_col0];
+    colB[1] = l_tileB[l_offsetTileB_col1];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[0*(SIMD_LANE_M / 8) + j] = MMAD_8x8( rowA[j], colB[0], regC[0*(SIMD_LANE_M / 8) + j]);
+    }
+    colB[0] = l_tileB[l_offsetTileB_col2];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[1*(SIMD_LANE_M / 8) + j] = MMAD_8x8( rowA[j], colB[1], regC[1*(SIMD_LANE_M / 8) + j] );
+    }
+    colB[1] = l_tileB[l_offsetTileB_col3];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[2*(SIMD_LANE_M / 8) + j] = MMAD_8x8(rowA[j], colB[0], regC[2*(SIMD_LANE_M / 8) + j]);
+    }
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[3*(SIMD_LANE_M / 8) + j] = MMAD_8x8(rowA[j], colB[1], regC[3*(SIMD_LANE_M / 8) + j]);
+    }
+}
+
+/*
+ *  \brief GEMM kernel to compute MxN matrix using SLM
+ *  \param g_inA  - Input matrix 
+ *  \param g_inB  - Input matrix 
+ *  \param g_outC - Output matrix
+ */
+
+__attribute__((intel_reqd_sub_group_size(SG_SIZE)))   
+KERNEL(Kernel_GEMM_MMAD8_32x32SG_128x128WG_SLM_INT8_fused_eltwise)
+  (
+  __global char* const g_inA,
+  __global int* g_outC,
+  __global char* const g_inB,
+    #if BIAS_TERM
+        __global BIAS_TYPE* biases,
+    #endif
+        __global float* quantizations,
+    #if CALIBRATION_TERM
+        __global float* calibrations,
+    #endif
+        uint split_idx,
+  __global char* const input2,
+  __global float* eltw_calibrations
+   )
+{
+
+    __global int4* const g_matrixA = (__global int4*)g_inA;
+    __global int4* const g_matrixB = (__global int4*)g_inB;
+    __global int8* g_matrixC = (__global int8*)g_outC;
+
+    // Each work-group works to compute 128x128 tile.
+    // Each work-group contains 16 sub-groups.
+    // Each sub-group within the work-group works to compute a 32x32 tile.
+    // 1) All work-items in WG fill SLM with tileA (128x32) and tileB (32x128).
+    // 2) Each sub-group works to compute 32x32 tileC (stored in regC).
+    //    Note that each work-item in the sub-group computes a 32x4 chunk of tileC.
+    // 3) Repeat until tileC is fully computed (while moving tileA and tileB "windows")
+    __local int8 l_workGroupTileA[2 * (WG_TILE_M * MATRIX_SMALL_K) / sizeof(int8)]; // [2*128*32/8] = 1024 
+    __local int8 l_workGroupTileB[2 * (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int8)]; // [2*128*32/8] = 1024 
+
+    __local uint* l_workGroupTileA_uint = (__local uint*)l_workGroupTileA;
+    __local int4* l_workGroupTileA_int4 = (__local int4*)l_workGroupTileA;
+    __local int4* l_workGroupTileB_int4 = (__local int4*)l_workGroupTileB;
+
+    const uint l_groupSize = get_local_size(DIM_X) * get_local_size(DIM_Y);
+
+    const uint l_pingPongOffsetA_uint = (WG_TILE_M * MATRIX_SMALL_K) / sizeof(uint);
+    const uint l_pingPongOffsetB_int8 = (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int8);
+    const uint l_pingPongOffsetA_int4 = (WG_TILE_M * MATRIX_SMALL_K) / sizeof(int4);
+    const uint l_pingPongOffsetB_int4 = (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int4);
+
+    // Thread IDs
+    const uint g_tidY = get_global_id(DIM_Y); // 0,...,all_wi_inY
+    const uint g_tidX = get_global_id(DIM_X); // 0,...,all_wi_inX
+    const uint l_tidX = get_local_id(DIM_X);  // 0,...,31 in WG
+    const uint l_tidY = get_local_id(DIM_Y);  // 0,1,2,3  in WG
+    const uint l_tid = l_tidY * get_local_size(DIM_X) + l_tidX; // 0,1,2,...127
+
+    // SubGroup IDs
+    const uint sg_tid = get_sub_group_local_id();            // 0,1,...,8
+    const uint sg_global_idX = (uint)(g_tidX / SG_SIZE);     //{0}/8
+    const uint sg_global_idY = g_tidY;                       //{0}
+
+    const uint sg_local_idX = (uint)(l_tidX / SG_SIZE);      // {0,...,31}/8={0,0,0,0,0...,1,1,1,...,3,3,3}
+    const uint sg_local_idY = l_tidY;                        // 0,1,2,3
+    const uint sg_local_id = sg_local_idY * get_local_size(DIM_X) / SG_SIZE + sg_local_idX;  // get_local_size(DIM_X) / SG_SIZE = 32/8 = 4
+
+    const uint sub_group_id = get_sub_group_id();
+
+
+    // Registers
+    int8 regC[(SIMD_LANE_M / 8) * SIMD_LANE_N] = {0}; // Each work-item responsible for 32x4 ints elts   // (32/8)*4
+    int8 rowA[(SG_TILE_M * MATRIX_SMALL_K / SG_SIZE) / sizeof(int8)]; // each work-item will hold 1/8 of matrixA 
+    int8 colB[2];  // each lane will store 32x4 piece of matrixB
+
+    // SLM indices
+    const uint l_offsetTileA = SG_TILE_M * (MATRIX_SMALL_K / sizeof(uint)) * sg_local_idY;
+    const uint numElements32x32TileB = (MATRIX_SMALL_K * SG_TILE_N) / sizeof(int8);
+    const uint numElements32x8TileB = numElements32x32TileB / 4;
+    const uint l_offsetTileB = numElements32x32TileB * sg_local_idX;
+    const uint l_offsetTileB_col0 = l_offsetTileB + sg_tid;
+    const uint l_offsetTileB_col1 = l_offsetTileB + 1 * numElements32x8TileB + sg_tid;
+    const uint l_offsetTileB_col2 = l_offsetTileB + 2 * numElements32x8TileB + sg_tid;
+    const uint l_offsetTileB_col3 = l_offsetTileB + 3 * numElements32x8TileB + sg_tid;
+
+    // Global indices
+    uint g_idxA[2];
+    uint g_idxB[2];
+#ifdef TILED_GLOBAL_LAYOUT // 32-row major (matrixA) and 32-col major (matrixB)
+    g_idxA[0] = ((MATRIX_SMALL_K / sizeof(int4)) * WG_TILE_M) * get_group_id(DIM_Y) + l_tid;
+    g_idxB[0] = ((MATRIX_SMALL_K / sizeof(int4)) * WG_TILE_N) * get_group_id(DIM_X) + l_tid;
+    g_idxA[1] = g_idxA[0] + l_groupSize;
+    g_idxB[1] = g_idxB[0] + l_groupSize;
+#else // Row (matrixA) and Col (matrixB) major layout
+    g_idxA[0] = WG_TILE_M * (MATRIX_K / sizeof(int4)) * get_group_id(DIM_Y) +
+               (l_tid / 2) * (MATRIX_K / sizeof(int4)) + (l_tid % 2);
+    g_idxB[0] = WG_TILE_N * (MATRIX_K / sizeof(int4)) * get_group_id(DIM_X) +
+               (l_tid / 2) * (MATRIX_K / sizeof(int4)) + (l_tid % 2);
+    g_idxA[1] = g_idxA[0] + (l_groupSize / 2) * (MATRIX_K / sizeof(int4));
+    g_idxB[1] = g_idxB[0] + (l_groupSize / 2) * (MATRIX_K / sizeof(int4));
+#endif
+
+    // Initial SLM setup
+    {
+        l_workGroupTileA_int4[l_tid] = g_matrixA[g_idxA[0]];
+        l_workGroupTileB_int4[l_tid] = g_matrixB[g_idxB[0]];
+        l_workGroupTileA_int4[l_tid + l_groupSize] = g_matrixA[g_idxA[1]];
+        l_workGroupTileB_int4[l_tid + l_groupSize] = g_matrixB[g_idxB[1]];
+
+#ifdef TILED_GLOBAL_LAYOUT
+        g_idxA[0] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+#else
+        g_idxA[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_SMALL_K / sizeof(int4);
+#endif
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+
+    int4 hdcReadValueA[2];
+    int4 hdcReadValueB[2];
+
+    __attribute__((opencl_unroll_hint(1)))
+    for (uint k = 0; k < (MATRIX_K / MATRIX_SMALL_K) - 1; k++)
+    {
+        /*
+         * SLM setup - HDC read only
+         */
+        // Overlap HDC reads with mmad compute
+        hdcReadValueA[0] = g_matrixA[g_idxA[0]];
+        hdcReadValueB[0] = g_matrixB[g_idxB[0]];
+        hdcReadValueA[1] = g_matrixA[g_idxA[1]];
+        hdcReadValueB[1] = g_matrixB[g_idxB[1]];
+
+#ifdef TILED_GLOBAL_LAYOUT
+        g_idxA[0] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+#else
+        g_idxA[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_SMALL_K / sizeof(int4);
+#endif
+
+        /*
+         * mmad compute
+         */
+        FUNC_CALL(mmad_32x32_int8)(&l_workGroupTileA_uint[(k % 2) * l_pingPongOffsetA_uint],
+                                l_offsetTileA, &l_workGroupTileB[(k % 2) * l_pingPongOffsetB_int8],
+                                l_offsetTileB_col0, l_offsetTileB_col1, l_offsetTileB_col2,
+                                l_offsetTileB_col3, rowA, colB, regC);
+
+        /*
+         * SLM setup - SLM write only
+         */
+        l_workGroupTileA_int4[((k + 1) % 2 * l_pingPongOffsetA_int4) + l_tid] = hdcReadValueA[0];
+        l_workGroupTileB_int4[((k + 1) % 2 * l_pingPongOffsetB_int4) + l_tid] = hdcReadValueB[0];
+        l_workGroupTileA_int4[((k + 1) % 2 * l_pingPongOffsetA_int4) + l_tid + l_groupSize] = hdcReadValueA[1];
+        l_workGroupTileB_int4[((k + 1) % 2 * l_pingPongOffsetB_int4) + l_tid + l_groupSize] = hdcReadValueB[1];
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+    } // main outer loop
+
+    /*
+     * Last mmad compute iteration (avoids branching in main loop)
+     */
+
+    FUNC_CALL(mmad_32x32_int8)(
+        &l_workGroupTileA_uint[(((MATRIX_K / MATRIX_SMALL_K) - 1) % 2) * l_pingPongOffsetA_uint],
+        l_offsetTileA,
+        &l_workGroupTileB[(((MATRIX_K / MATRIX_SMALL_K) - 1) % 2) * l_pingPongOffsetB_int8],
+        l_offsetTileB_col0, l_offsetTileB_col1, l_offsetTileB_col2, l_offsetTileB_col3, rowA, colB,
+        regC);
+
+#ifdef OUTPUT_TILED_GLOBAL_LAYOUT
+    // Write out in swizzled manner after quantizing
+    __global uchar* g_outC_uchar = (__global uchar*)g_outC;
+    uint cOffset = sg_global_idX * (MATRIX_M * SG_TILE_N / sizeof(uchar)) +
+                   sg_global_idY * (SG_TILE_M * SG_TILE_N / sizeof(uchar));
+
+    uchar16 regC_uchar16;
+    uint offset_uc16 = 0;
+
+    const uint workgroup_id_x = get_group_id(0); 
+    uint feature_off = 32*(sub_group_id % (WG_TILE_N / 32)) + WG_TILE_N*workgroup_id_x; //=32*{0,1,2,3} + WG_TILE_N * workgroup_id_x 
+    uint feature = get_sub_group_local_id()*4 + feature_off;
+
+    float4 quant_f = vload4(0, quantizations + feature);
+    float4 bias_f = vload4(0, biases + feature);
+    float4 calib_f = vload4(0, calibrations + feature);
+
+    // eltwise calibs
+    float4 eltw_calib_f = vload4(0, eltw_calibrations + feature);
+
+    uchar16 eltw[(2*SG_TILE_M) / (sizeof(int8) / sizeof(int))];
+    uint tmpcOff = cOffset;
+    __attribute__((opencl_unroll_hint( SG_TILE_M / (sizeof(int8) / sizeof(int)) )))
+    for (uint i = 0; i < (2*SG_TILE_M) / (sizeof(int8) / sizeof(int)); i++)
+    {
+        uint padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(tmpcOff);
+#if IN_OUT_OPT == 1
+        eltw[i] = as_uchar16(intel_sub_group_block_read4((__global uint*)(g_outC_uchar + padded_offset)));
+#else
+        const uint eltw_second_input_offset = FUNC_CALL(calculate_eltw_input_offset_based_on_output_offset_account_padding)(tmpcOff, ELTW_STRIDE_X, ELTW_STRIDE_Y);
+        eltw[i] = as_uchar16(intel_sub_group_block_read4((__global uint*)(input2 + eltw_second_input_offset)));
+#endif
+        tmpcOff += sizeof(uchar16) * SG_SIZE;
+    }
+
+#if MMAD_SUPPORTED == 1
+    __attribute__((opencl_unroll_hint( SG_TILE_M / (sizeof(int8) / sizeof(int)) )))
+#endif
+    for (uint i = 0; i < SG_TILE_M / (sizeof(int8) / sizeof(int)); i++)
+    {
+        uint padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(cOffset);
+        {
+            uchar16 eltw_input_vals = eltw[i * 2];
+            // B0..3, F0..31
+            QUANTIZATION(0);
+        }
+
+        intel_sub_group_block_write4((__global uint*)(g_outC_uchar + padded_offset), as_uint4(regC_uchar16));
+        cOffset += sizeof(uchar16) * SG_SIZE;
+
+        // now we need to calculate again for other x
+        padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(cOffset);
+        {
+            uchar16 eltw_input_vals = eltw[i * 2 + 1];
+            // B0..3, F0..31
+            QUANTIZATION(4);
+        }
+
+        intel_sub_group_block_write4( (__global uint*)(g_outC_uchar + padded_offset), as_uint4(regC_uchar16) );
+        cOffset += sizeof(uchar16) * SG_SIZE;
+    }
+#else
+    // Write final accumulated values
+    uint cOffset = sg_global_idX * ((MATRIX_M / 8) * SG_TILE_N) + sg_global_idY * (SG_TILE_M / 8) +
+                   sg_tid * (MATRIX_M / 8);
+    __attribute__((opencl_unroll_hint(SIMD_LANE_N)))
+    for (uint i = 0; i < (SIMD_LANE_N); ++i)
+    {
+        __attribute__((opencl_unroll_hint(SIMD_LANE_M / 8)))
+        for (uint j = 0; j < (SIMD_LANE_M / 8); ++j)
+        {
+            g_matrixC[cOffset + j] = regC[i*(SIMD_LANE_M / 8) + j];
+        }
+        cOffset += SG_SIZE * (MATRIX_M / 8);
+    }
+#endif
+}
+
+#undef SUM_SCALE
+#undef SCALE
+#undef QUANTIZATION
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_mmad_32x32sg_224x128wg_slm_int8.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_mmad_32x32sg_224x128wg_slm_int8.cl
new file mode 100644
index 000000000..45148c136
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_mmad_32x32sg_224x128wg_slm_int8.cl
@@ -0,0 +1,505 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/mmad.cl"
+
+#define SUM_SCALE 0.11f
+#define SCALE 0.11f
+
+#ifdef LIGHTWEIGHT_QUANTIZATION
+
+#define QUANTIZATION(idx) \
+    {\
+        float4 tmp;\
+        for(uint z = 0; z < 4; z++)\
+        {\
+            tmp.s0 = (float)eltw_input_vals[z * 4 + 0] * SUM_SCALE + bias_f.s0;\
+            tmp.s1 = (float)eltw_input_vals[z * 4 + 1] * SUM_SCALE + bias_f.s1;\
+            tmp.s2 = (float)eltw_input_vals[z * 4 + 2] * SUM_SCALE + bias_f.s2;\
+            tmp.s3 = (float)eltw_input_vals[z * 4 + 3] * SUM_SCALE + bias_f.s3;\
+            \
+            regC_uchar16[z * 4 + 0] = convert_uchar_sat( (regC[0 * 4 + i][idx + z / 4]) * SCALE + tmp.s0);\
+            regC_uchar16[z * 4 + 1] = convert_uchar_sat( (regC[1 * 4 + i][idx + z / 4]) * SCALE + tmp.s1);\
+            regC_uchar16[z * 4 + 2] = convert_uchar_sat( (regC[2 * 4 + i][idx + z / 4]) * SCALE + tmp.s2);\
+            regC_uchar16[z * 4 + 3] = convert_uchar_sat( (regC[3 * 4 + i][idx + z / 4]) * SCALE + tmp.s3);\
+        }\
+    }
+
+#elif NO_QUANTIZATION
+
+#define QUANTIZATION(idx) \
+    regC_uchar16.s0 = regC[0 * 4 + i][idx];\
+    regC_uchar16.s1 = regC[1 * 4 + i][idx];\
+    regC_uchar16.s2 = regC[2 * 4 + i][idx];\
+    regC_uchar16.s3 = regC[3 * 4 + i][idx];\
+    \
+    regC_uchar16.s4 = regC[0 * 4 + i][idx+1];\
+    regC_uchar16.s5 = regC[1 * 4 + i][idx+1];\
+    regC_uchar16.s6 = regC[2 * 4 + i][idx+1];\
+    regC_uchar16.s7 = regC[3 * 4 + i][idx+1];\
+    \
+    regC_uchar16.s8 = regC[0 * 4 + i][idx+2];\
+    regC_uchar16.s9 = regC[1 * 4 + i][idx+2];\
+    regC_uchar16.sa = regC[2 * 4 + i][idx+2];\
+    regC_uchar16.sb = regC[3 * 4 + i][idx+2];\
+    \
+    regC_uchar16.sc = regC[0 * 4 + i][idx+3];\
+    regC_uchar16.sd = regC[1 * 4 + i][idx+3];\
+    regC_uchar16.se = regC[2 * 4 + i][idx+3];\
+    regC_uchar16.sf = regC[3 * 4 + i][idx+3];\
+    {\
+        int16 sum;\
+        for(uint s = 0; s <16; s++)\
+        {\
+            sum[s] = (int)as_char(regC_uchar16[s]) + (int)as_char(eltw_input_vals[s]);\
+        }\
+        regC_uchar16.s0 = convert_uchar_sat( sum.s0 );\
+        regC_uchar16.s1 = convert_uchar_sat( sum.s1 );\
+        regC_uchar16.s2 = convert_uchar_sat( sum.s2 );\
+        regC_uchar16.s3 = convert_uchar_sat( sum.s3 );\
+        \
+        regC_uchar16.s4 = convert_uchar_sat( sum.s4 );\
+        regC_uchar16.s5 = convert_uchar_sat( sum.s5 );\
+        regC_uchar16.s6 = convert_uchar_sat( sum.s6 );\
+        regC_uchar16.s7 = convert_uchar_sat( sum.s7 );\
+        \
+        regC_uchar16.s8 = convert_uchar_sat( sum.s8 );\
+        regC_uchar16.s9 = convert_uchar_sat( sum.s9 );\
+        regC_uchar16.sa = convert_uchar_sat( sum.sa );\
+        regC_uchar16.sb = convert_uchar_sat( sum.sb );\
+        \
+        regC_uchar16.sc = convert_uchar_sat( sum.sc );\
+        regC_uchar16.sd = convert_uchar_sat( sum.sd );\
+        regC_uchar16.se = convert_uchar_sat( sum.se );\
+        regC_uchar16.sf = convert_uchar_sat( sum.sf );\
+    }
+
+#else
+
+#define QUANTIZATION(idx) \
+    regC_uchar16.s0 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s1 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.s2 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.s3 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.s4 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+1]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s5 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+1]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.s6 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+1]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.s7 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+1]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.s8 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+2]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.s9 = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+2]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.sa = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+2]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.sb = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+2]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    \
+    regC_uchar16.sc = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[0 * 4 + i][idx+3]) * quant_f.s0 * I_QF + bias_f.s0) * calib_f.s0)), NL_M, NL_N));\
+    regC_uchar16.sd = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[1 * 4 + i][idx+3]) * quant_f.s1 * I_QF + bias_f.s1) * calib_f.s1)), NL_M, NL_N));\
+    regC_uchar16.se = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[2 * 4 + i][idx+3]) * quant_f.s2 * I_QF + bias_f.s2) * calib_f.s2)), NL_M, NL_N));\
+    regC_uchar16.sf = as_uchar(ACTIVATION( convert_char(round(( (float)(regC[3 * 4 + i][idx+3]) * quant_f.s3 * I_QF + bias_f.s3) * calib_f.s3)), NL_M, NL_N));\
+    {\
+        int16 sum;\
+        for(uint s = 0; s <16; s++)\
+        {\
+            sum[s] = (int)as_char(regC_uchar16[s]) + (int)as_char(eltw_input_vals[s]);\
+        }\
+        regC_uchar16.s0 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s0)  * eltw_calib_f.s0)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s1 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s1)  * eltw_calib_f.s1)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s2 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s2)  * eltw_calib_f.s2)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s3 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s3)  * eltw_calib_f.s3)), NL_M_ELTW, NL_N_ELTW));\
+        \
+        regC_uchar16.s4 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s4)  * eltw_calib_f.s0)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s5 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s5)  * eltw_calib_f.s1)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s6 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s6)  * eltw_calib_f.s2)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s7 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s7)  * eltw_calib_f.s3)), NL_M_ELTW, NL_N_ELTW));\
+        \
+        regC_uchar16.s8 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s8)  * eltw_calib_f.s0)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.s9 = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.s9)  * eltw_calib_f.s1)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.sa = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.sa)  * eltw_calib_f.s2)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.sb = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.sb)  * eltw_calib_f.s3)), NL_M_ELTW, NL_N_ELTW));\
+        \
+        regC_uchar16.sc = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.sc)  * eltw_calib_f.s0)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.sd = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.sd)  * eltw_calib_f.s1)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.se = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.se)  * eltw_calib_f.s2)), NL_M_ELTW, NL_N_ELTW));\
+        regC_uchar16.sf = as_uchar(ACTIVATION_ELTW( convert_char((int)round( (float)(sum.sf)  * eltw_calib_f.s3)), NL_M_ELTW, NL_N_ELTW));\
+    }
+#endif
+
+inline uint FUNC(calculate_output_offset_to_account_padding)(uint cOffset)
+{
+#if OUT_WITH_PADDING == 1
+    uint tmp_idx = cOffset;
+    uint f_val_idx = tmp_idx % 32;
+    tmp_idx /= 32;
+    uint b_val_idx = tmp_idx % 4;
+    tmp_idx /= 4;
+    uint x_idx = tmp_idx % OUTPUT_SIZE_X;
+    tmp_idx /= OUTPUT_SIZE_X;
+    uint y_idx = tmp_idx % OUTPUT_SIZE_Y;
+    tmp_idx /= OUTPUT_SIZE_Y;
+    uint b_slice_idx = tmp_idx % (OUTPUT_BATCH_NUM / 4);
+    tmp_idx /= (OUTPUT_BATCH_NUM / 4);
+    uint f_slice_idx = tmp_idx % (OUTPUT_FEATURE_NUM / 32);
+
+    uint padded_offset = f_slice_idx * OUT_F_BLOCK_PITCH;
+    padded_offset += b_slice_idx * OUT_B_BLOCK_PITCH;
+    padded_offset += y_idx * OUT_Y_PITCH;
+    padded_offset += x_idx * OUT_X_PITCH;
+    padded_offset += b_val_idx * 32;
+    padded_offset += f_val_idx;
+    padded_offset += OUT_OFFSET;
+
+    return padded_offset;
+#else
+    return cOffset;
+#endif
+}
+
+#if IN_OUT_OPT != 1
+inline uint FUNC(calculate_eltw_input_offset_based_on_output_offset_account_padding)(uint cOffset, uint strideX, uint strideY)
+{
+#if ELTW_WITH_PADDING == 1 || ELTW_STRIDE_X != 1 || ELTW_STRIDE_Y != 1
+    uint tmp_idx = cOffset;
+    uint f_val_idx = tmp_idx % 32;
+    tmp_idx /= 32;
+    uint b_val_idx = tmp_idx % 4;
+    tmp_idx /= 4;
+    uint x_idx = tmp_idx % OUTPUT_SIZE_X;
+    x_idx *= strideX;
+    tmp_idx /= OUTPUT_SIZE_X;
+    uint y_idx = tmp_idx % OUTPUT_SIZE_Y;
+    y_idx *= strideY;
+    tmp_idx /= OUTPUT_SIZE_Y;
+    uint b_slice_idx = tmp_idx % (OUTPUT_BATCH_NUM / 4);
+    tmp_idx /= (OUTPUT_BATCH_NUM / 4);
+    uint f_slice_idx = tmp_idx % (OUTPUT_FEATURE_NUM / 32);
+
+    uint padded_offset = f_slice_idx * IN2_F_BLOCK_PITCH;
+    padded_offset += b_slice_idx * IN2_B_BLOCK_PITCH;
+    padded_offset += y_idx * IN2_Y_PITCH;
+    padded_offset += x_idx * IN2_X_PITCH;
+    padded_offset += b_val_idx * 32;
+    padded_offset += f_val_idx;
+    padded_offset += IN2_OFFSET;
+
+    return padded_offset;
+#else
+    return cOffset;
+#endif
+}
+#endif
+
+inline void FUNC(mmad_32x32_int8)(  __local uint* l_tileA, const uint l_offsetTileA,
+                                    __local int8* l_tileB, const uint l_offsetTileB_col0,
+                                    const uint l_offsetTileB_col1, const uint l_offsetTileB_col2,
+                                    const uint l_offsetTileB_col3, int8* rowA, int8* colB,
+                                    int8* regC)
+{
+    // Read tile A from SLM to regA
+    uint l_offsetTileATemp = l_offsetTileA;
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        rowA[j] = as_int8(SLM_BLOCK_READ_8(&l_tileA[l_offsetTileATemp]));
+        l_offsetTileATemp += 8 * SG_SIZE;
+    }
+    // Read tile B from SLM to regB and compute mmad
+    colB[0] = l_tileB[l_offsetTileB_col0];
+    colB[1] = l_tileB[l_offsetTileB_col1];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[0*(SIMD_LANE_M / 8) + j] = MMAD_8x8( rowA[j], colB[0], regC[0*(SIMD_LANE_M / 8) + j]);
+    }
+    colB[0] = l_tileB[l_offsetTileB_col2];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[1*(SIMD_LANE_M / 8) + j] = MMAD_8x8( rowA[j], colB[1], regC[1*(SIMD_LANE_M / 8) + j] );
+    }
+    colB[1] = l_tileB[l_offsetTileB_col3];
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[2*(SIMD_LANE_M / 8) + j] = MMAD_8x8(rowA[j], colB[0], regC[2*(SIMD_LANE_M / 8) + j]);
+    }
+    __attribute__((opencl_unroll_hint(SG_TILE_M / 8)))
+    for (uint j = 0; j < (SG_TILE_M / 8); ++j)
+    {
+        // Compute partial C
+        regC[3*(SIMD_LANE_M / 8) + j] = MMAD_8x8(rowA[j], colB[1], regC[3*(SIMD_LANE_M / 8) + j]);
+    }
+}
+
+/*
+ *  \brief GEMM kernel to compute MxN matrix using SLM
+ *  \param g_inA  - Input matrix 
+ *  \param g_inB  - Input matrix 
+ *  \param g_outC - Output matrix
+ */
+
+__attribute__((intel_reqd_sub_group_size(SG_SIZE)))
+KERNEL(Kernel_GEMM_MMAD8_32x32SG_224x128WG_SLM_INT8_fused_eltwise)
+  (__global char* const g_inA,
+  __global int* g_outC,
+  __global char* const g_inB,
+    #if BIAS_TERM
+        __global BIAS_TYPE* biases,
+    #endif
+        __global float* quantizations,
+    #if CALIBRATION_TERM
+        __global float* calibrations,
+    #endif
+        uint split_idx,
+  __global char* const input2,
+  __global float* eltw_calibrations
+   )
+{
+
+    __global int4* const g_matrixA = (__global int4*)g_inA;
+    __global int4* const g_matrixB = (__global int4*)g_inB;
+    __global int8* g_matrixC = (__global int8*)g_outC;
+
+    // Each work-group works to compute 128x128 tile.
+    // Each work-group contains 16 sub-groups.
+    // Each sub-group within the work-group works to compute a 32x32 tile.
+    // 1) All work-items in WG fill SLM with tileA (128x32) and tileB (32x128).
+    // 2) Each sub-group works to compute 32x32 tileC (stored in regC).
+    //    Note that each work-item in the sub-group computes a 32x4 chunk of tileC.
+    // 3) Repeat until tileC is fully computed (while moving tileA and tileB "windows")
+    __local int8 l_workGroupTileA[2 * (WG_TILE_M * MATRIX_SMALL_K) / sizeof(int8)];
+    __local int8 l_workGroupTileB[2 * (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int8)];
+
+    __local uint* l_workGroupTileA_uint = (__local uint*)l_workGroupTileA;
+    __local int4* l_workGroupTileA_int4 = (__local int4*)l_workGroupTileA;
+    __local int4* l_workGroupTileB_int4 = (__local int4*)l_workGroupTileB;
+
+    const uint l_groupSize = get_local_size(DIM_X) * get_local_size(DIM_Y);
+
+    const uint l_pingPongOffsetA_uint = (WG_TILE_M * MATRIX_SMALL_K) / sizeof(uint);
+    const uint l_pingPongOffsetB_int8 = (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int8);
+    const uint l_pingPongOffsetA_int4 = (WG_TILE_M * MATRIX_SMALL_K) / sizeof(int4);
+    const uint l_pingPongOffsetB_int4 = (WG_TILE_N * MATRIX_SMALL_K) / sizeof(int4);
+
+    // Thread IDs
+    const uint g_tidY = get_global_id(DIM_Y);
+    const uint g_tidX = get_global_id(DIM_X);
+    const uint l_tidX = get_local_id(DIM_X);
+    const uint l_tidY = get_local_id(DIM_Y);
+    const uint l_tid = l_tidY * get_local_size(DIM_X) + l_tidX;
+
+    // SubGroup IDs
+    const uint sg_tid = get_sub_group_local_id();
+    const uint sg_global_idX = (uint)(g_tidX / SG_SIZE);
+    const uint sg_global_idY = g_tidY;
+    const uint sg_local_idX = (uint)(l_tidX / SG_SIZE);
+    const uint sg_local_idY = l_tidY;
+    const uint sg_local_id = sg_local_idY * get_local_size(DIM_X) / SG_SIZE + sg_local_idX;
+
+    const uint sub_group_id = get_sub_group_id();
+
+    // Registers
+    int8 regC[(SIMD_LANE_M / 8) * SIMD_LANE_N] = {0}; // Each work-item responsible for 32x4 ints elts
+    int8 rowA[(SG_TILE_M * MATRIX_SMALL_K / SG_SIZE) / sizeof(int8)]; // each work-item will hold 1/8 of matrixA
+    int8 colB[2];  // each lane will store 32x4 piece of matrixB
+
+    // SLM indices
+    const uint l_offsetTileA = SG_TILE_M * (MATRIX_SMALL_K / sizeof(uint)) * sg_local_idY;
+    const uint numElements32x32TileB = (MATRIX_SMALL_K * SG_TILE_N) / sizeof(int8);
+    const uint numElements32x8TileB = numElements32x32TileB / 4;
+    const uint l_offsetTileB = numElements32x32TileB * sg_local_idX;
+    const uint l_offsetTileB_col0 = l_offsetTileB + sg_tid;
+    const uint l_offsetTileB_col1 = l_offsetTileB + 1 * numElements32x8TileB + sg_tid;
+    const uint l_offsetTileB_col2 = l_offsetTileB + 2 * numElements32x8TileB + sg_tid;
+    const uint l_offsetTileB_col3 = l_offsetTileB + 3 * numElements32x8TileB + sg_tid;
+
+    // Global indices
+    uint g_idxA[2];
+    uint g_idxB[2];
+#ifdef TILED_GLOBAL_LAYOUT // 32-row major (matrixA) and 32-col major (matrixB)
+    g_idxA[0] = ((MATRIX_SMALL_K / sizeof(int4)) * WG_TILE_M) * get_group_id(DIM_Y) + l_tid;
+    g_idxB[0] = ((MATRIX_SMALL_K / sizeof(int4)) * WG_TILE_N) * get_group_id(DIM_X) + l_tid;
+    g_idxA[1] = g_idxA[0] + l_groupSize;
+    g_idxB[1] = g_idxB[0] + l_groupSize;
+#else // Row (matrixA) and Col (matrixB) major layout
+    g_idxA[0] = WG_TILE_M * (MATRIX_K / sizeof(int4)) * get_group_id(DIM_Y) +
+               (l_tid / 2) * (MATRIX_K / sizeof(int4)) + (l_tid % 2);
+    g_idxB[0] = WG_TILE_N * (MATRIX_K / sizeof(int4)) * get_group_id(DIM_X) +
+               (l_tid / 2) * (MATRIX_K / sizeof(int4)) + (l_tid % 2);
+    g_idxA[1] = g_idxA[0] + (l_groupSize / 2) * (MATRIX_K / sizeof(int4));
+    g_idxB[1] = g_idxB[0] + (l_groupSize / 2) * (MATRIX_K / sizeof(int4));
+#endif
+    // Initial SLM setup
+    {
+        l_workGroupTileA_int4[l_tid] = g_matrixA[g_idxA[0]];
+        l_workGroupTileB_int4[l_tid] = g_matrixB[g_idxB[0]];
+
+        l_workGroupTileA_int4[l_tid + l_groupSize] = g_matrixA[g_idxA[1]];
+        if (l_tid < 32)
+        {
+            // Not all work-items will be needed to fetch the remaining matrix B
+            l_workGroupTileB_int4[l_tid + l_groupSize] = g_matrixB[g_idxB[1]];
+        }
+#ifdef TILED_GLOBAL_LAYOUT
+        g_idxA[0] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+#else
+        g_idxA[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_SMALL_K / sizeof(int4);
+#endif
+
+        barrier(CLK_LOCAL_MEM_FENCE);
+    }
+    int4 hdcReadValueA[2];
+    int4 hdcReadValueB[2];
+
+    __attribute__((opencl_unroll_hint(1)))
+    for (uint k = 0; k < (MATRIX_K / MATRIX_SMALL_K) - 1; k++)
+    {
+        hdcReadValueA[0] = g_matrixA[g_idxA[0]];
+        hdcReadValueB[0] = g_matrixB[g_idxB[0]];
+        hdcReadValueA[1] = g_matrixA[g_idxA[1]];
+        if (l_tid < 32)
+        {
+            // Not all work-items will be needed to fetch the remaining matrix B
+            hdcReadValueB[1] = g_matrixB[g_idxB[1]];
+        }
+#ifdef TILED_GLOBAL_LAYOUT
+        g_idxA[0] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_M * MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_N * MATRIX_SMALL_K / sizeof(int4);
+#else
+        g_idxA[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[0] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxA[1] += MATRIX_SMALL_K / sizeof(int4);
+        g_idxB[1] += MATRIX_SMALL_K / sizeof(int4);
+#endif
+
+
+        //MMAD compute
+        FUNC_CALL(mmad_32x32_int8)(&l_workGroupTileA_uint[(k % 2) * l_pingPongOffsetA_uint],
+                                l_offsetTileA, &l_workGroupTileB[(k % 2) * l_pingPongOffsetB_int8],
+                                l_offsetTileB_col0, l_offsetTileB_col1, l_offsetTileB_col2,
+                                l_offsetTileB_col3, rowA, colB, regC);
+
+        //SLM setup - SLM write only
+        l_workGroupTileA_int4[((k + 1) % 2 * l_pingPongOffsetA_int4) + l_tid] = hdcReadValueA[0];
+        l_workGroupTileB_int4[((k + 1) % 2 * l_pingPongOffsetB_int4) + l_tid] = hdcReadValueB[0];
+        l_workGroupTileA_int4[((k + 1) % 2 * l_pingPongOffsetA_int4) + l_tid + l_groupSize] = hdcReadValueA[1];
+        if (l_tid < 32)
+        {
+            // Not all work-items will be needed to fetch the remaining matrix B
+            l_workGroupTileB_int4[((k + 1) % 2 * l_pingPongOffsetB_int4) + l_tid + l_groupSize] = hdcReadValueB[1];
+        }
+        barrier(CLK_LOCAL_MEM_FENCE);
+    } // main outer loop
+
+    //Last MMAD compute iteration (avoids branching in main loop)
+    FUNC_CALL(mmad_32x32_int8)(
+        &l_workGroupTileA_uint[(((MATRIX_K / MATRIX_SMALL_K) - 1) % 2) * l_pingPongOffsetA_uint],
+        l_offsetTileA,
+        &l_workGroupTileB[(((MATRIX_K / MATRIX_SMALL_K) - 1) % 2) * l_pingPongOffsetB_int8],
+        l_offsetTileB_col0, l_offsetTileB_col1, l_offsetTileB_col2, l_offsetTileB_col3, rowA, colB,
+        regC);
+
+
+#ifdef OUTPUT_TILED_GLOBAL_LAYOUT
+
+    // Write out in swizzled manner after quantizing
+    __global uchar* g_outC_uchar = (__global uchar*)g_outC;
+    uint cOffset = sg_global_idX * (MATRIX_M * SG_TILE_N / sizeof(uchar)) +
+                   sg_global_idY * (SG_TILE_M * SG_TILE_N / sizeof(uchar));
+
+    uchar16 regC_uchar16;
+    uint offset_uc16 = 0;
+
+    const uint workgroup_id_x = get_group_id(0); 
+    uint feature_off = 32*(sub_group_id % (WG_TILE_N / 32)) + WG_TILE_N*workgroup_id_x; //=32*{0,1,2,3} + WG_TILE_N * workgroup_id_x 
+    uint feature = get_sub_group_local_id()*4 + feature_off;
+
+    float4 quant_f = vload4(0, quantizations + feature);
+    float4 bias_f = vload4(0, biases + feature);
+    float4 calib_f = vload4(0, calibrations + feature);
+
+    // eltwise calibs
+    float4 eltw_calib_f = vload4(0, eltw_calibrations + feature);
+
+    uchar16 eltw[(2*SG_TILE_M) / (sizeof(int8) / sizeof(int))];
+    uint tmpcOff = cOffset;
+    __attribute__((opencl_unroll_hint( SG_TILE_M / (sizeof(int8) / sizeof(int)) )))
+    for (uint i = 0; i < (2*SG_TILE_M) / (sizeof(int8) / sizeof(int)); i++)
+    {
+        uint padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(tmpcOff);
+#if IN_OUT_OPT == 1
+        eltw[i] = as_uchar16(intel_sub_group_block_read4((__global uint*)(g_outC_uchar + padded_offset)));
+#else
+        const uint eltw_second_input_offset = FUNC_CALL(calculate_eltw_input_offset_based_on_output_offset_account_padding)(tmpcOff, ELTW_STRIDE_X, ELTW_STRIDE_Y);
+        eltw[i] = as_uchar16(intel_sub_group_block_read4((__global uint*)(input2 + eltw_second_input_offset)));
+#endif
+        tmpcOff += sizeof(uchar16) * SG_SIZE;
+    }
+
+#if MMAD_SUPPORTED == 1
+    __attribute__((opencl_unroll_hint( SG_TILE_M / (sizeof(int8) / sizeof(int)) )))
+#endif
+    for (uint i = 0; i < SG_TILE_M / (sizeof(int8) / sizeof(int)); i++)
+    {
+        uint padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(cOffset);
+        {
+            uchar16 eltw_input_vals = eltw[i * 2];
+            // B0..3, F0..31
+            QUANTIZATION(0);
+        }
+
+        intel_sub_group_block_write4((__global uint*)(g_outC_uchar + padded_offset), as_uint4(regC_uchar16));
+        cOffset += sizeof(uchar16) * SG_SIZE;
+
+        // now we need to calculate again for other x
+        padded_offset = FUNC_CALL(calculate_output_offset_to_account_padding)(cOffset);
+        {
+            uchar16 eltw_input_vals = eltw[i * 2 + 1];
+            // B0..3, F0..31
+            QUANTIZATION(4);
+        }
+
+        intel_sub_group_block_write4( (__global uint*)(g_outC_uchar + padded_offset), as_uint4(regC_uchar16) );
+        cOffset += sizeof(uchar16) * SG_SIZE;
+    }
+#else
+    // Write final accumulated values
+    uint cOffset = sg_global_idX * ((MATRIX_M / 8) * SG_TILE_N) + sg_global_idY * (SG_TILE_M / 8) +
+                   sg_tid * (MATRIX_M / 8);
+    __attribute__((opencl_unroll_hint(SIMD_LANE_N)))
+    for (uint i = 0; i < (SIMD_LANE_N); ++i)
+    {
+        __attribute__((opencl_unroll_hint(SIMD_LANE_M / 8)))
+        for (uint j = 0; j < (SIMD_LANE_M / 8); ++j)
+        {
+            g_matrixC[cOffset + j] = regC[i*(SIMD_LANE_M / 8) + j];
+        }
+        cOffset += SG_SIZE * (MATRIX_M / 8);
+    }
+#endif
+}
+
+#undef SUM_SCALE
+#undef SCALE
+#undef QUANTIZATION
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_yxfb_yxio_b16_fp16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_yxfb_yxio_b16_fp16.cl
new file mode 100644
index 000000000..241200f66
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/fused_conv_eltwise_gpu_yxfb_yxio_b16_fp16.cl
@@ -0,0 +1,256 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+#include "include/sub_group.cl"
+
+__attribute__((intel_reqd_sub_group_size(16)))
+__attribute__((reqd_work_group_size(16, 1, 1)))
+KERNEL(fused_conv_eltwise_gpu_yxfb_yxio_b16)(
+    const __global UNIT_TYPE* input,
+    __global UNIT_TYPE* output,
+    const __global UNIT_TYPE* filter,
+#if BIAS_TERM
+    const __global UNIT_TYPE* bias,
+#endif
+    uint split_idx,
+    const __global UNIT_TYPE* input2)
+{
+    // get_global_size(0) -> Number of work items needed to compute all features and all batches for single output spatial position
+    //                       (single (x, y) point in output).
+    // get_global_size(1) -> Output size in X-dimension.
+    // get_global_size(2) -> Output size in Y-dimension.
+    // get_global_id(0)   -> Id of work item computing single spatial point of output indicated by get_global_id(1), get_global_id(2).
+    // get_group_id(1)   -> Current x-position in output.
+    // get_group_id(2)   -> Current y-position in output.
+    //
+    // WORK_ITEMS_PER_SINGLE_BATCHES_ELEMENTS -> Number of work items needed to compute entire one batch for at least one feature and one spatial point.
+    //                                           (this number in current implementation computes also OFM_PER_WORK_ITEM output features at the same time).
+    // FILTER_ARRAY_NUM                       -> Number of filters groups (split size).
+
+    const uint out_x = get_group_id(1);
+    const uint out_y = get_group_id(2);
+
+    const uint output_f_size = OUTPUT_PAD_BEFORE_FEATURE_NUM + OUTPUT_FEATURE_NUM + OUTPUT_PAD_AFTER_FEATURE_NUM;
+    const uint output_x_size = OUTPUT_PAD_BEFORE_SIZE_X + OUTPUT_SIZE_X + OUTPUT_PAD_AFTER_SIZE_X;
+    const uint linear_id_xy = OUTPUT_PAD_BEFORE_SIZE_X + out_x + output_x_size * (out_y + OUTPUT_PAD_BEFORE_SIZE_Y);
+    uint global_id = (((uint)get_global_id(0) / WORK_ITEMS_PER_SINGLE_BATCHES_ELEMENTS) + (linear_id_xy * FILTER_ARRAY_NUM + split_idx) * (output_f_size / OFM_PER_WORK_ITEM)) * WORK_ITEMS_PER_SINGLE_BATCHES_ELEMENTS;
+
+    const uint sub_group_id = get_local_id(0);
+
+#if defined(USE_BLOCK_READ_2) || defined(USE_BLOCK_READ_1)
+    const uint chunk_size = sizeof(uint)/sizeof(UNIT_TYPE);
+#else
+    const uint chunk_size = 1;
+#endif
+
+    const uint out_batch_id = chunk_size * sub_group_id + LOCAL_WORK_GROUP_SIZE * BATCHES_PER_WORK_ITEM * ((uint)get_group_id(0) % LOCAL_WORK_GROUPS_PER_SINGLE_BATCHES_ELEMENTS);
+
+    const uint out_id = (global_id / WORK_ITEMS_PER_SINGLE_BATCHES_ELEMENTS) * OFM_PER_WORK_ITEM * OUTPUT_FEATURE_PITCH + OUTPUT_PAD_BEFORE_FEATURE_NUM * OUTPUT_FEATURE_PITCH + OUTPUT_PAD_BEFORE_BATCH_NUM + out_batch_id;
+
+    const uint ofm_offset = ((global_id * OFM_PER_WORK_ITEM) / WORK_ITEMS_PER_SINGLE_BATCHES_ELEMENTS) % output_f_size;
+
+#if IN_OUT_OPT != 1 // calculating eltwise offset
+    const uint eltw_x = out_x * ELTW_STRIDE_X;
+    const uint eltw_y = out_y * ELTW_STRIDE_Y;
+
+    const uint eltw_f_size = INPUT1_PAD_BEFORE_FEATURE_NUM + INPUT1_FEATURE_NUM + INPUT1_PAD_AFTER_FEATURE_NUM;
+    const uint eltw_x_size = INPUT1_PAD_BEFORE_SIZE_X + INPUT1_SIZE_X + INPUT1_PAD_AFTER_SIZE_X;
+
+    const uint eltw_linear_id_xy = INPUT1_PAD_BEFORE_SIZE_X + eltw_x + eltw_x_size * (eltw_y + INPUT1_PAD_BEFORE_SIZE_Y);
+
+    uint eltw_global_id = (((uint)get_global_id(0) / WORK_ITEMS_PER_SINGLE_BATCHES_ELEMENTS) + (eltw_linear_id_xy * FILTER_ARRAY_NUM + split_idx) * (eltw_f_size / OFM_PER_WORK_ITEM)) * WORK_ITEMS_PER_SINGLE_BATCHES_ELEMENTS;
+    const uint eltw_id = (eltw_global_id / WORK_ITEMS_PER_SINGLE_BATCHES_ELEMENTS) * OFM_PER_WORK_ITEM * INPUT1_FEATURE_PITCH + INPUT1_PAD_BEFORE_FEATURE_NUM * INPUT1_FEATURE_PITCH + INPUT1_PAD_BEFORE_BATCH_NUM + out_batch_id;
+#endif
+
+    // Each component of vector element contains computation for separate output feature.
+    half16 _data[BATCHES_PER_WORK_ITEM];
+    for(uint i = 0; i < BATCHES_PER_WORK_ITEM; i++)
+    {
+        _data[i] = UNIT_VAL_ZERO;
+    }
+
+    const int x = (int)out_x * STRIDE_SIZE_X - PADDING_SIZE_X;
+    const int y = (int)out_y * STRIDE_SIZE_Y - PADDING_SIZE_Y;
+
+    for (uint i = 0; i < FILTER_SIZE_Y; i++)
+    {
+        const int input_offset_y = y + i * DILATION_SIZE_Y;
+        const bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
+
+        if(!zero_y)
+        {
+            for (uint j = 0; j < FILTER_SIZE_X; j++)
+            {
+                const int input_offset_x = x + j * DILATION_SIZE_X;
+                const bool zero = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
+
+                if(!zero)
+                {
+                    uint input_idx = input_offset_x*INPUT0_X_PITCH + input_offset_y*INPUT0_Y_PITCH;
+                    input_idx += INPUT0_OFFSET + split_idx * FILTER_IFM_NUM * INPUT0_FEATURE_PITCH;
+                    input_idx += out_batch_id;
+
+                    //sub_group_id used as offset to make each workitem load different filter, and then shuffle it
+                    // 2 * sub_group_id is used because we group 2 halfs as one uint element.
+                    uint filter_idx = ofm_offset + 2*sub_group_id + i*FILTER_Y_PITCH + j*FILTER_X_PITCH;
+
+                    for (uint h = 0; h < FILTER_IFM_NUM; h++)
+                    {
+#if defined(USE_BLOCK_READ_2)
+                        half4 _input = as_half4(intel_sub_group_block_read2((const __global uint*)(input + input_idx)));
+                        uint filter_val_pair = *(const __global uint*)(filter + filter_idx);
+                        half16 filter_transp = TRANSPOSE_BLOCK_16_FP16(filter_val_pair);
+                        _data[0] = fma(_input.s0, filter_transp, _data[0]);
+                        _data[1] = fma(_input.s1, filter_transp, _data[1]);
+                        _data[2] = fma(_input.s2, filter_transp, _data[2]);
+                        _data[3] = fma(_input.s3, filter_transp, _data[3]);
+                        input_idx += INPUT0_FEATURE_PITCH;
+#elif defined(USE_BLOCK_READ_1)
+                        half2 _input = as_half2(intel_sub_group_block_read((const __global uint*)(input + input_idx)));
+                        uint filter_val_pair = *(const __global uint*)(filter + filter_idx);
+                        half16 filter_transp = TRANSPOSE_BLOCK_16_FP16(filter_val_pair);
+                        _data[0] = fma(_input.s0, filter_transp, _data[0]);
+                        _data[1] = fma(_input.s1, filter_transp, _data[1]);
+                        input_idx += INPUT0_FEATURE_PITCH;
+#else
+                        uint filter_val_pair = *(const __global uint*)(filter + filter_idx);
+                        half16 filter_transp = TRANSPOSE_BLOCK_16_FP16(filter_val_pair);
+                        for(uint s = 0; s < BATCHES_PER_WORK_ITEM; s++)
+                        {
+                            _data[s] = fma(input[input_idx], filter_transp, _data[s]);
+                            input_idx += LOCAL_WORK_GROUP_SIZE;
+                        }
+                        input_idx += INPUT0_FEATURE_PITCH - BATCHES_PER_WORK_ITEM * LOCAL_WORK_GROUP_SIZE;
+#endif
+                        filter_idx += FILTER_IFM_PITCH;
+                    }
+                }
+            }
+        }
+    }
+
+#if BIAS_TERM
+    uint bias_val_pair = *(const __global uint*)(bias + (ofm_offset + 2 * sub_group_id));
+    for(uint s = 0; s < BATCHES_PER_WORK_ITEM; s++)
+    {
+        ADD_BIAS_16_FP16(_data[s], bias_val_pair);
+    }
+#endif
+    for(uint s = 0; s < BATCHES_PER_WORK_ITEM; s++)
+    {
+        _data[s] = ACTIVATION(_data[s], NL_M, NL_N);
+    }
+
+#if defined(USE_BLOCK_READ_2) || defined(USE_BLOCK_READ_1)
+    #if BATCHES_PER_WORK_ITEM == 4
+        uint _out_id = OUTPUT_VIEW_OFFSET + out_id;
+        for(uint i = 0; i < 16; i++)
+        {
+#if IN_OUT_OPT == 1
+            half2 eltw_second_input_data0 = as_half2(*(__global uint*)(output + _out_id ));
+            half2 eltw_second_input_data1 = as_half2(*(__global uint*)(output + _out_id + 32));
+#else
+            uint _eltw_id = INPUT1_VIEW_OFFSET + eltw_id;
+            half2 eltw_second_input_data0 = as_half2(*(__global uint*)(input2 + _eltw_id + i * INPUT1_FEATURE_PITCH));
+            half2 eltw_second_input_data1 = as_half2(*(__global uint*)(input2 + _eltw_id + i * INPUT1_FEATURE_PITCH + 32));
+#endif
+            _data[0][i] += eltw_second_input_data0.s0;
+            _data[1][i] += eltw_second_input_data0.s1;
+            _data[2][i] += eltw_second_input_data1.s0;
+            _data[3][i] += eltw_second_input_data1.s1;
+
+            _data[0][i] = ACTIVATION_ELTW(_data[0][i], NL_M_ELTW, NL_N_ELTW);
+            _data[1][i] = ACTIVATION_ELTW(_data[1][i], NL_M_ELTW, NL_N_ELTW);
+            _data[2][i] = ACTIVATION_ELTW(_data[2][i], NL_M_ELTW, NL_N_ELTW);
+            _data[3][i] = ACTIVATION_ELTW(_data[3][i], NL_M_ELTW, NL_N_ELTW);
+
+            *(__global uint*)(output + _out_id) = as_uint((half2)(_data[0][i], _data[1][i]));
+            *(__global uint*)(output + _out_id + 32) = as_uint((half2)(_data[2][i], _data[3][i]));
+            _out_id += OUTPUT_FEATURE_PITCH;
+        }
+    #else
+    for(uint s = 0; s < BATCHES_PER_WORK_ITEM / 2; s++)
+    {
+        uint _out_id = OUTPUT_VIEW_OFFSET + out_id + chunk_size * s * LOCAL_WORK_GROUP_SIZE;
+
+        for(uint i = 0; i < 16; i++)
+        {
+#if IN_OUT_OPT == 1
+            half2 eltw_second_input_data = as_half2(*(__global uint*)(output + _out_id + i * OUTPUT_FEATURE_PITCH));
+#else
+            uint _eltw_id = INPUT1_VIEW_OFFSET + eltw_id + chunk_size * s * LOCAL_WORK_GROUP_SIZE;
+            half2 eltw_second_input_data = as_half2(*(__global uint*)(input2 + _eltw_id + i * INPUT1_FEATURE_PITCH));
+#endif
+            _data[chunk_size * s][i] += eltw_second_input_data.s0;
+            _data[chunk_size * s + 1][i] += eltw_second_input_data.s1;
+            _data[chunk_size * s][i] = ACTIVATION_ELTW(_data[chunk_size * s][i], NL_M_ELTW, NL_N_ELTW);
+            _data[chunk_size * s + 1][i] = ACTIVATION_ELTW(_data[chunk_size * s + 1][i], NL_M_ELTW, NL_N_ELTW);
+        }
+
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].s0, _data[chunk_size * s + 1].s0)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].s1, _data[chunk_size * s + 1].s1)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].s2, _data[chunk_size * s + 1].s2)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].s3, _data[chunk_size * s + 1].s3)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].s4, _data[chunk_size * s + 1].s4)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].s5, _data[chunk_size * s + 1].s5)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].s6, _data[chunk_size * s + 1].s6)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].s7, _data[chunk_size * s + 1].s7)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].s8, _data[chunk_size * s + 1].s8)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].s9, _data[chunk_size * s + 1].s9)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].sa, _data[chunk_size * s + 1].sa)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].sb, _data[chunk_size * s + 1].sb)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].sc, _data[chunk_size * s + 1].sc)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].sd, _data[chunk_size * s + 1].sd)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].se, _data[chunk_size * s + 1].se)); _out_id += OUTPUT_FEATURE_PITCH;
+        *(__global uint*)(output + _out_id) = as_uint((half2)(_data[chunk_size * s].sf, _data[chunk_size * s + 1].sf)); _out_id += OUTPUT_FEATURE_PITCH;
+    }
+    #endif
+#else
+    for(uint s = 0; s < BATCHES_PER_WORK_ITEM; s++)
+    {
+        uint _out_id = OUTPUT_VIEW_OFFSET + out_id + s * LOCAL_WORK_GROUP_SIZE;
+
+        for(uint i = 0; i < 16; i++)
+        {
+#if IN_OUT_OPT == 1
+            half eltw_second_input_data = output[_out_id + i * OUTPUT_FEATURE_PITCH];
+#else
+            uint _eltw_id = INPUT1_VIEW_OFFSET + eltw_id + s * LOCAL_WORK_GROUP_SIZE;
+            half eltw_second_input_data = output[_eltw_id + i * INPUT1_FEATURE_PITCH];
+#endif
+            _data[s][i] += eltw_second_input_data;
+            _data[s][i] = ACTIVATION_ELTW(_data[s][i], NL_M_ELTW, NL_N_ELTW);
+        }
+
+        output[_out_id] = _data[s].s0; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].s1; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].s2; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].s3; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].s4; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].s5; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].s6; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].s7; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].s8; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].s9; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].sa; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].sb; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].sc; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].sd; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].se; _out_id += OUTPUT_FEATURE_PITCH;
+        output[_out_id] = _data[s].sf; _out_id += OUTPUT_FEATURE_PITCH;
+    }
+#endif
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_ref.cl
new file mode 100644
index 000000000..ee2adda91
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gather_ref.cl
@@ -0,0 +1,33 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+
+KERNEL(gather_ref)(const __global UNIT_TYPE* dictionary, const __global float* indices, __global UNIT_TYPE* output)
+{
+    const uint workItemId = get_global_id(0);
+
+    if (workItemId >= COMPUTATIONAL_OPERATIONS_NUMBER)
+        return;
+
+    uint partNumber = workItemId / INPUT1_LENGTH;
+    uint outputIndex = workItemId * SLICE_SIZE;
+    uint index = workItemId - (partNumber * INPUT1_LENGTH);
+
+    for (int k = 0; k < SLICE_SIZE; ++k)
+    {
+        output[outputIndex++] = dictionary[(partNumber * PART_SIZE) + ((uint) indices[index] * SLICE_SIZE) + k];
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_ref.cl
index 26656ab58..a8a29b110 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/gemm_ref.cl
@@ -50,7 +50,7 @@ for (uint i = 0; i < Y1; ++i)
 		value = fma(input0[in0_idx], input1[in1_idx], value);
 	}
 #if TRANSPOSE_INPUT1 && TRANSPOSE_INPUT2
-	uint out_idx = y * X1 + x + b * X1 * Y2;
+	uint out_idx = x * Y2 + y + b * X1 * Y2;
 #elif TRANSPOSE_INPUT1
 	uint out_idx = x * X2 + y + b * X1 * Y1;
 #elif TRANSPOSE_INPUT2
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl
index 4bc9338e5..14db17dad 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/generic_eltwise_ref.cl
@@ -16,13 +16,24 @@
 
 #include "include/include_all.cl"
 
-#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM
+#ifdef INPUT_STRIDED
+
+#define GET_INDEX(prefix, num) \
+    CAT(CAT(prefix, num), _OFFSET) + \
+    ((d1 * CAT(CAT(prefix, num), _STRIDE_X)) % CAT(CAT(prefix, num), _SIZE_X))*CAT(CAT(prefix, num), _X_PITCH) +\
+    ((d2 * CAT(CAT(prefix, num), _STRIDE_Y)) % CAT(CAT(prefix, num), _SIZE_Y))*CAT(CAT(prefix, num), _Y_PITCH) +\
+    (d3 % CAT(CAT(prefix, num), _FEATURE_NUM))*CAT(CAT(prefix, num), _FEATURE_PITCH) + \
+    (d4 % CAT(CAT(prefix, num), _BATCH_NUM  ))*CAT(CAT(prefix, num), _BATCH_PITCH)
+
+#else
+
+#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
 
 #define GET_INDEX(prefix, num)                                                          \
     CAT(CAT(prefix, num), _OFFSET) +                                                    \
-    (d1 % CAT(CAT(prefix, num), _SIZE_X     ))*CAT(CAT(prefix, num), _X_PITCH) +         \
-    (d2 % CAT(CAT(prefix, num), _SIZE_Y     ))*CAT(CAT(prefix, num), _Y_PITCH) +         \
-    (d3 % CAT(CAT(prefix, num), _FEATURE_NUM))*CAT(CAT(prefix, num), _FEATURE_PITCH) +   \
+    (d1 % CAT(CAT(prefix, num), _SIZE_X     ))*CAT(CAT(prefix, num), _X_PITCH) +        \
+    (d2 % CAT(CAT(prefix, num), _SIZE_Y     ))*CAT(CAT(prefix, num), _Y_PITCH) +        \
+    (d3 % CAT(CAT(prefix, num), _FEATURE_NUM))*CAT(CAT(prefix, num), _FEATURE_PITCH) +  \
     (d4 % CAT(CAT(prefix, num), _BATCH_NUM  ))*CAT(CAT(prefix, num), _BATCH_PITCH)
 
 #elif ELTWISE_NO_PITCH_SAME_DIMS
@@ -40,6 +51,9 @@
 
 #endif
 
+#endif
+
+
 KERNEL(eltwise)(
     INPUTS_DECLS
     __global UNIT_TYPE* output
@@ -48,9 +62,9 @@ KERNEL(eltwise)(
 #endif
     )
 {
-#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM
-    const uint d1 = get_global_id(GWS_YX) % INPUT0_SIZE_X;   // X
-    const uint d2 = get_global_id(GWS_YX) / INPUT0_SIZE_X;   // Y
+#if ELTWISE_LAYOUT_BASED || QUANTIZATION_TERM || ELTWISE_BROADCAST
+    const uint d1 = get_global_id(GWS_YX) % OUTPUT_SIZE_X;  // X
+    const uint d2 = get_global_id(GWS_YX) / OUTPUT_SIZE_X;  // Y
     const uint d3 = get_global_id(GWS_FEATURE);             // Feature
     const uint d4 = get_global_id(GWS_BATCH);               // Batch
 
@@ -67,7 +81,7 @@ KERNEL(eltwise)(
     const uint d2 = get_global_id(1);
     const uint d3 = get_global_id(2) % OUTPUT_SIZES[2];
     const uint d4 = get_global_id(2) / OUTPUT_SIZES[2];
-    
+
     uint output_offset = OUTPUT_OFFSET +
                          d1*OUTPUT_PITCHES[0] +
                          d2*OUTPUT_PITCHES[1] +
@@ -80,7 +94,7 @@ KERNEL(eltwise)(
 #else
     UNIT_TYPE res;
 #endif
-    
+
     DO_ELTWISE;
 
 #if QUANTIZATION_TERM
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_GT3.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/arg_max_min_common.cl
index 4bfd3d1aa..52531ee30 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_GT3.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/arg_max_min_common.cl
@@ -14,13 +14,13 @@
 // limitations under the License.
 */
 
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
-{
-    // KBL GT3e
-    void tuning_cache_5927(tuning_data& td)
+/* Index and Value type that holds index and value used in this kernel */
+
+#ifndef IAV_STRUCT_DEFINED
+    typedef struct 
     {
-        tuning_cache_5927_B1(td);
-    }
-}
-\ No newline at end of file
+        uint index; 
+        UNIT_TYPE value; 
+    } iav_type;
+    #define IAV_STRUCT_DEFINED
+#endif
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/common.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/common.cl
index d5ca25870..24040f2ae 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/common.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/common.cl
@@ -14,6 +14,10 @@
 // limitations under the License.
 */
 
+#if defined(cl_khr_fp16)
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif
+
 #define __CAT(x, y) x##y
 #define CAT(x, y) __CAT(x, y)
 
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/data_types.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/data_types.cl
index 99492168d..8d3559127 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/data_types.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/data_types.cl
@@ -14,10 +14,6 @@
 // limitations under the License.
 */
 
-#if defined(cl_khr_fp16)
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-#endif
-
 // TODO: currently we calculate on float32 because it's lot of "add" operation and it stuck on the value "8192.0f"
 #if !defined(ACCUMULATOR_TYPE)
     #define ACCUMULATOR_TYPE float
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/detection_output_common.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/detection_output_common.cl
new file mode 100644
index 000000000..68016af23
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/detection_output_common.cl
@@ -0,0 +1,180 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#define PRIOR_BOX_SIZE 4 // Each prior-box consists of [xmin, ymin, xmax, ymax].
+#define OUTPUT_ROW_SIZE 7 // Each detection consists of [image_id, label, confidence, xmin, ymin, xmax, ymax].
+
+#define CODE_TYPE_CORNER 0
+#define CODE_TYPE_CENTER_SIZE 1
+#define CODE_TYPE_CORNER_SIZE 2
+
+#define HIDDEN_CLASS ((BACKGROUND_LABEL_ID == 0 && SHARE_LOCATION)?  1 : 0)
+#define NUM_OF_IMAGES INPUT0_BATCH_NUM
+#define NUM_LOC_CLASSES ((SHARE_LOCATION)? 1 : NUM_CLASSES)
+#define NUM_CLASSES_OUT ((HIDDEN_CLASS == 1)? NUM_CLASSES - 1 : NUM_CLASSES)
+#define NUM_OF_PRIORS (INPUT0_LENGTH / (NUM_OF_IMAGES * NUM_LOC_CLASSES * PRIOR_BOX_SIZE))
+#define NUM_OF_ITEMS ((NUM_OF_PRIORS / 256) + 1)
+#define NUM_OF_ITERATIONS ((NUM_OF_PRIORS % NUM_OF_ITEMS == 0)? (NUM_OF_PRIORS / NUM_OF_ITEMS) : ((NUM_OF_PRIORS / NUM_OF_ITEMS) + 1))
+
+#define X_SIZE INPUT0_Y_PITCH
+#define Y_SIZE (INPUT0_FEATURE_PITCH/INPUT0_Y_PITCH)
+#define LOCATION_PADDING (INPUT0_PAD_BEFORE_SIZE_Y * X_SIZE + INPUT0_PAD_BEFORE_SIZE_X)
+#define LOC_XY_SIZE_PRODUCT (X_SIZE * Y_SIZE)
+#define CONF_PADDING (CONF_PADDING_Y * CONF_SIZE_X + CONF_PADDING_X)
+#define CONF_XY_SIZE_PRODUCT (CONF_SIZE_X * CONF_SIZE_Y)
+
+#define NUM_OF_PRIOR_COMPONENTS (NUM_OF_PRIORS * PRIOR_BOX_SIZE)
+#define NUM_OF_IMAGE_CONF (INPUT0_LENGTH/NUM_OF_IMAGES/PRIOR_BOX_SIZE)
+
+#define SCORES_COUNT (((TOP_K != -1) && (TOP_K < NUM_OF_PRIORS))? TOP_K : NUM_OF_PRIORS)
+
+#define OUTPUT_OFFSET (((NUM_OF_IMAGES + 15) / 16) * 16)
+#define SCORE_OFFSET 2
+
+#define INPUT_OFFSET (((NUM_IMAGES + 15) / 16) * 16)
+#define INPUT_BBOXES_COUNT ((INPUT0_LENGTH - INPUT_OFFSET) / OUTPUT_ROW_SIZE)
+#define NUM_CLASSES_IN NUM_CLASSES_OUT
+#define BBOXES_NUM_BASED_TOP_K (TOP_K * NUM_CLASSES_IN * NUM_IMAGES)
+#define INPUT_BBOXES_LENGTH (((TOP_K != -1) && (BBOXES_NUM_BASED_TOP_K < INPUT_BBOXES_COUNT))? BBOXES_NUM_BASED_TOP_K : INPUT_BBOXES_COUNT)
+#define NUM_OF_CLASS_BBOXES (INPUT_BBOXES_LENGTH / (NUM_IMAGES * NUM_CLASSES_IN))
+#define NUM_OF_IMAGE_BBOXES (INPUT_BBOXES_LENGTH / NUM_IMAGES)
+#define NUM_OF_ITEMS_SORT ((NUM_CLASSES_IN / 256) + 1)
+
+
+// Number of bboxes to keep in output
+#define KEEP_BBOXES_NUM ((KEEP_TOP_K < NUM_OF_IMAGE_BBOXES)? KEEP_TOP_K : NUM_OF_IMAGE_BBOXES)
+
+void FUNC(get_decoded_bbox)(UNIT_TYPE* decoded_bbox, __global UNIT_TYPE* input_location, __global UNIT_TYPE* input_prior_box, const uint idx_prior, const uint idx_class, const uint idx_image)
+{
+    const uint prior_offset = idx_prior * PRIOR_INFO_SIZE + PRIOR_COORD_OFFSET;
+    uint location_offset =
+        (NUM_LOC_CLASSES * (idx_prior * PRIOR_BOX_SIZE) + idx_image * INPUT0_FEATURE_NUM + idx_class * PRIOR_BOX_SIZE) *
+        LOC_XY_SIZE_PRODUCT +
+        LOCATION_PADDING;
+
+    UNIT_TYPE prior_bboxes[4] = {
+        input_prior_box[prior_offset],
+        input_prior_box[prior_offset + 1],
+        input_prior_box[prior_offset + 2],
+        input_prior_box[prior_offset + 3]};
+
+    if (!PRIOR_IS_NORMALIZED)
+    {
+        prior_bboxes[0] /= IMAGE_WIDTH;
+        prior_bboxes[1] /= IMAGE_HEIGH;
+        prior_bboxes[2] /= IMAGE_WIDTH;
+        prior_bboxes[3] /= IMAGE_HEIGH;
+    }
+
+    if (CODE_TYPE == CODE_TYPE_CORNER)
+    {
+        if (VARIANCE_ENCODED_IN_TARGET)
+        {
+            // variance is encoded in target, we simply need to add the offset predictions.
+            for(uint i = 0; i < PRIOR_BOX_SIZE; i++)
+            {
+                decoded_bbox[i] =
+                    prior_bboxes[i] +
+                    input_location[location_offset];
+
+                location_offset += LOC_XY_SIZE_PRODUCT;
+            }
+        }
+        else
+        {
+            // variance is encoded in bbox, we need to scale the offset accordingly.
+            for(uint i = 0; i < PRIOR_BOX_SIZE; i++)
+            {
+                decoded_bbox[i] = 
+                    mad(input_prior_box[NUM_OF_PRIOR_COMPONENTS + i], // prior variances are places after prior bboxes
+                        input_location[location_offset],
+                        prior_bboxes[i]);
+
+                location_offset += LOC_XY_SIZE_PRODUCT;
+            }
+        }
+    }
+    else if (CODE_TYPE == CODE_TYPE_CENTER_SIZE)
+    {
+        const UNIT_TYPE prior_width = prior_bboxes[2] - prior_bboxes[0];
+        const UNIT_TYPE prior_height = prior_bboxes[3] - prior_bboxes[1];
+        const UNIT_TYPE prior_center_x = (prior_bboxes[0] + prior_bboxes[2]) / 2;
+        const UNIT_TYPE prior_center_y = (prior_bboxes[1] + prior_bboxes[3]) / 2;
+        const UNIT_TYPE bbox_xmin = input_location[location_offset];
+        const UNIT_TYPE bbox_ymin = input_location[location_offset + LOC_XY_SIZE_PRODUCT];
+        const UNIT_TYPE bbox_xmax = input_location[location_offset + 2 * LOC_XY_SIZE_PRODUCT];
+        const UNIT_TYPE bbox_ymax = input_location[location_offset + 3 * LOC_XY_SIZE_PRODUCT];
+        UNIT_TYPE decode_bbox_center_x, decode_bbox_center_y;
+        UNIT_TYPE decode_bbox_width, decode_bbox_height;
+
+        if (VARIANCE_ENCODED_IN_TARGET)
+        {
+            // variance is encoded in target, we simply need to restore the offset predictions.
+            decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;
+            decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;
+            decode_bbox_width = (exp(bbox_xmax) * prior_width) / 2;
+            decode_bbox_height = (exp(bbox_ymax) * prior_height) / 2;
+        }
+        else
+        {
+            // variance is encoded in bbox, we need to scale the offset accordingly.
+            decode_bbox_center_x = input_prior_box[NUM_OF_PRIOR_COMPONENTS] * bbox_xmin * prior_width + prior_center_x;
+            decode_bbox_center_y = input_prior_box[NUM_OF_PRIOR_COMPONENTS + 1] * bbox_ymin * prior_height + prior_center_y;
+            decode_bbox_width = (exp(input_prior_box[NUM_OF_PRIOR_COMPONENTS + 2] * bbox_xmax) * prior_width) / 2;
+            decode_bbox_height = (exp(input_prior_box[NUM_OF_PRIOR_COMPONENTS + 3] * bbox_ymax) * prior_height) / 2;
+        }
+
+        decoded_bbox[0] = decode_bbox_center_x - decode_bbox_width;
+        decoded_bbox[1] = decode_bbox_center_y - decode_bbox_height;
+        decoded_bbox[2] = decode_bbox_center_x + decode_bbox_width;
+        decoded_bbox[3] = decode_bbox_center_y + decode_bbox_height;
+    }
+    else
+    {
+        const UNIT_TYPE prior_width = prior_bboxes[2] - prior_bboxes[0];
+        const UNIT_TYPE prior_height = prior_bboxes[3] - prior_bboxes[1];
+        const UNIT_TYPE bbox_xmin = input_location[location_offset];
+        const UNIT_TYPE bbox_ymin = input_location[location_offset + LOC_XY_SIZE_PRODUCT];
+        const UNIT_TYPE bbox_xmax = input_location[location_offset + 2 * LOC_XY_SIZE_PRODUCT];
+        const UNIT_TYPE bbox_ymax = input_location[location_offset + 3 * LOC_XY_SIZE_PRODUCT];
+
+        if (VARIANCE_ENCODED_IN_TARGET)
+        {
+            // variance is encoded in target, we simply need to add the offset predictions.
+            decoded_bbox[0] = prior_bboxes[0] + bbox_xmin * prior_width;
+            decoded_bbox[1] = prior_bboxes[1] + bbox_ymin * prior_height;
+            decoded_bbox[2] = prior_bboxes[2] + bbox_xmax * prior_width;
+            decoded_bbox[3] = prior_bboxes[3] + bbox_ymax * prior_height;
+        }
+        else
+        {
+            // variance is encoded in bbox, we need to scale the offset accordingly.
+            decoded_bbox[0] = prior_bboxes[0] + input_prior_box[NUM_OF_PRIOR_COMPONENTS] * bbox_xmin * prior_width;
+            decoded_bbox[1] = prior_bboxes[1] + input_prior_box[NUM_OF_PRIOR_COMPONENTS + 1] * bbox_ymin * prior_height;
+            decoded_bbox[2] = prior_bboxes[2] + input_prior_box[NUM_OF_PRIOR_COMPONENTS + 2] * bbox_xmax * prior_width;
+            decoded_bbox[3] = prior_bboxes[3] + input_prior_box[NUM_OF_PRIOR_COMPONENTS + 3] * bbox_ymax * prior_height;
+        }
+    } 
+}
+
+UNIT_TYPE FUNC(get_score)(__global UNIT_TYPE* input_confidence, const uint idx_prior, const uint idx_class, const uint idx_image)
+{
+    const uint confidence_offset =                    // offset in kernel input 'input_confidence'
+            (idx_prior * NUM_CLASSES + idx_image * NUM_OF_PRIORS * NUM_CLASSES + idx_class) *
+            CONF_XY_SIZE_PRODUCT +
+            CONF_PADDING;
+
+    return (input_confidence[confidence_offset] > CONFIDENCE_THRESHOLD)? input_confidence[confidence_offset] : 0;
+}
+
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl
index 582e9f525..837f4fce3 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/fetch.cl
@@ -72,6 +72,27 @@ inline uint FUNC(get_byxf_af32_index)(uint b, uint f, uint y, uint x, uint y_pit
 		CAT(prefix, _FEATURE_NUM),                 \
 		CAT(prefix, _OFFSET))
 
+inline uint FUNC(get_byx8_f4_index)(uint b, uint f, uint y, uint x,
+    uint x_pitch, uint y_pitch, uint b_pitch, uint f_size, uint x_size, uint offset)
+{
+    const uint f_aligned_to_4 = ((f_size + 3) / 4) * 4;
+    const uint x_aligned_to_8 = ((x_size + 7) / 8) * 8;
+    const uint b_offset = b * b_pitch;
+    const uint xy_offset = x * x_pitch + y * y_pitch;
+    const uint f_offset = f;
+    const size_t idx = offset + xy_offset + b_offset + f_offset;
+    return idx;
+}
+
+#define GET_DATA_BYX8_F4_INDEX(prefix, b, f, y, x)\
+	FUNC_CALL(get_byx8_f4_index)(                 \
+		b, f, y, x, CAT(prefix, _X_PITCH),          \
+		CAT(prefix, _Y_PITCH),                      \
+		CAT(prefix, _BATCH_PITCH),                      \
+		CAT(prefix, _FEATURE_NUM),                 \
+		CAT(prefix, _SIZE_X),                 \
+		CAT(prefix, _OFFSET))
+
 #define GET_DATA_BF8_XY16_INDEX(prefix, b, f, y, x)     \
     FUNC_CALL(get_bf8_xy16_index)(                      \
         b, f, y, x, CAT(prefix, _SIZE_X ),              \
@@ -249,7 +270,35 @@ inline uint FUNC(get_os_is_yx_isa8_osv8_isv4_index)(uint o, uint i, uint y, uint
         CAT(prefix, _OFM_NUM),                                                  \
         CAT(prefix, _OFFSET))
 
+inline uint FUNC(get_os_is_yx_isa8_osv8_isv4_swizzled_by_4_index)(uint o, uint i, uint y, uint x, uint size_x, uint size_y, uint size_ifm, uint size_ofm, uint offset)
+{
+    const uint o_swizzled = (o % 4) * 8 + ((o % 32) / 4) + (o / 32) * 32;
+
+    const uint f_32_aligned = ((size_ifm + 31)/32) * 32;
+	const uint isv2_idx = i % 4;
+	const uint osv_idx = o_swizzled % 8;
+	const uint isv1_idx = (i / 4) % 8;
+	const uint is_idx = i / 32;
+	const uint os_idx = o_swizzled / 8;
 
+	size_t idx = offset + isv2_idx + 4 * (osv_idx + 8 * isv1_idx);
+	idx += x * 4 * 8 * 8;
+	idx += y * size_x * 4 * 8 * 8;
+	idx += is_idx * size_y * size_x * 4 * 8 * 8;
+	idx += os_idx * (f_32_aligned/32) * size_y * size_x * 4 * 8 * 8;
+
+    return idx;
+}
+
+#define GET_FILTER_OS_IS_YX_ISA8_OSV8_ISV4_SWIZZLED_BY_4_INDEX(prefix, o, i, y, x) \
+	FUNC_CALL(get_os_is_yx_isa8_osv8_isv4_swizzled_by_4_index)(                    \
+        o, i, y, x, CAT(prefix, _SIZE_X ),                                      \
+        CAT(prefix, _SIZE_Y),                                                \
+        CAT(prefix, _IFM_NUM),                                                  \
+        CAT(prefix, _OFM_NUM),                                                  \
+        CAT(prefix, _OFFSET))
+
+        
 inline uint FUNC(get_is_o_yx_isv32_index)(uint o, uint i, uint y, uint x, uint i_size, uint o_size, uint x_size, uint y_size)
 {
     const uint i_aligned_to_32 = ((i_size + 31) / 32) * 32;
@@ -266,6 +315,106 @@ inline uint FUNC(get_is_o_yx_isv32_index)(uint o, uint i, uint y, uint x, uint i
         CAT(prefix, _SIZE_X),\
         CAT(prefix, _SIZE_Y))
 
+inline uint FUNC(get_is_o32_yx_isv32_swizzled_by_4_index)(uint o, uint i, uint y, uint x, uint i_size, uint o_size, uint x_size, uint y_size)
+{
+    const uint o_aligned_to_32 = ((o_size + 31) / 32) * 32;
+    const uint o_swizzled = (o % 4) * 8 + ((o % 32) / 4) + (o / 32) * 32;
+    const uint i_aligned_to_32 = ((i_size + 31) / 32) * 32;
+    const uint i_val = i % 32;
+    const uint i_slice = i / 32;
+    const size_t idx = i_val + 32* (x + x_size * (y + y_size * (o_swizzled + o_aligned_to_32 * i_slice) ) );
+    return idx;
+}
+
+#define GET_FILTER_IS_O32_YX_ISV32_SWIZZLED_BY_4(prefix, o, i, y, x)\
+    FUNC_CALL(get_is_o32_yx_isv32_swizzled_by_4_index)(\
+        o, i, y, x, CAT(prefix, _IFM_NUM),\
+        CAT(prefix, _OFM_NUM),\
+        CAT(prefix, _SIZE_X),\
+        CAT(prefix, _SIZE_Y))
+
+inline uint FUNC(get_os_is_y_x8_osv8_isv4_index)(uint o, uint i, uint y, uint x, uint i_size, uint o_size, uint x_size, uint y_size)
+{
+    const uint i_aligned_to_4 = ((i_size + 3) / 4) * 4;
+    const uint o_aligned_to_8 = ((o_size + 7) / 8) * 8;
+    const uint x_aligned_to_8 = ((x_size + 7) / 8) * 8;
+    const uint i_val = i % 4;
+    const uint i_slice = i / 4;
+    const uint o_val = o % 8;
+    const uint o_slice = o / 8;
+    const size_t idx = i_val + 4 * (o_val + 8 * ( x + x_aligned_to_8 * (y + y_size * (i_slice + (i_aligned_to_4/4) * (o_slice)))));
+    return idx;
+}
+
+#define GET_FILTER_OS_IS_Y_X8_OSV8_ISV4(prefix, o, i, y, x)\
+    FUNC_CALL(get_os_is_y_x8_osv8_isv4_index)(\
+        o, i, y, x, CAT(prefix, _IFM_NUM),\
+        CAT(prefix, _OFM_NUM),\
+        CAT(prefix, _SIZE_X),\
+        CAT(prefix, _SIZE_Y))
+
+#define GET_DATA_B_FS_YX_FSV4_INDEX(prefix, o, i, y, x)\
+    FUNC_CALL(get_b_fs_yx_fsv4)(\
+        o, i, y, x,\
+        CAT(prefix, _FEATURE_NUM),\
+        CAT(prefix, _PAD_BEFORE_SIZE_Y), CAT(prefix, _SIZE_Y), CAT(prefix, _PAD_AFTER_SIZE_Y),\
+        CAT(prefix, _PAD_BEFORE_SIZE_X), CAT(prefix, _SIZE_X), CAT(prefix, _PAD_AFTER_SIZE_X))
+
+inline uint FUNC(get_b_fs_yx_fsv4)(uint o, uint i, uint y, uint x,
+                                   uint feature_num,
+                                   uint pad_before_size_y, uint size_y, uint pad_after_size_y,
+                                   uint pad_before_size_x, uint size_x, uint pad_after_size_x)
+{
+    const uint tile = 4;
+    uint id_tile = i / tile;
+    uint id      = i - id_tile * tile;
+
+    const uint feature_num_aligned4 = ((feature_num + 3) / 4) * 4;
+
+    uint idx = o * (feature_num_aligned4 / tile) *
+                   (pad_before_size_y + size_y + pad_after_size_y) *
+                   (pad_before_size_x + size_x + pad_after_size_x) * tile
+               + id_tile * (pad_before_size_y + size_y + pad_after_size_y) *
+                           (pad_before_size_x + size_x + pad_after_size_x) * tile
+               + pad_before_size_y * (pad_before_size_x + size_x + pad_after_size_x) * tile
+               + y * (pad_before_size_x + size_x + pad_after_size_x) * tile
+               + pad_before_size_x * tile
+               + x * tile
+               + id;
+
+    return idx;
+}
+
+#define GET_FILTER_OS_IS_YX_OSV16_ISV4_INDEX(prefix, o, i, y, x)\
+    FUNC_CALL(get_os_is_yx_osv16_isv4)(\
+        o, i, y, x,\
+        CAT(prefix, _IFM_PITCH),\
+        CAT(prefix, _OFM_PITCH),\
+        CAT(prefix, _SIZE_X))
+
+inline uint FUNC(get_os_is_yx_osv16_isv4)(uint o, uint i, uint y, uint x,
+                                          uint i_size,
+                                          uint o_size,
+                                          uint x_size)
+{
+    const uint otd = 16;
+    uint out_depth_tile = o / otd;
+    uint od             = o - out_depth_tile * otd;
+
+    const uint tile = 4;
+    uint id_tile = i / tile;
+    uint id      = i - id_tile * tile;
+
+    uint idx = out_depth_tile * (o_size / tile) * otd * tile
+               + id_tile               * i_size * otd * tile
+               + y                     * x_size * otd * tile
+               + x                              * otd * tile
+               + od                                   * tile
+               + id;
+
+    return idx;
+}
+
 #define DECLARE_SAMPLER const sampler_t imageSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST
 
 #if FP16_UNIT_USED
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/imad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/imad.cl
new file mode 100644
index 000000000..d05e20e8f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/imad.cl
@@ -0,0 +1,34 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+inline int FUNC(imad_SW)(int acc, uchar4 input, char4 weight) __attribute__((overloadable)) {
+    acc += input[0] * weight[0];
+    acc += input[1] * weight[1];
+    acc += input[2] * weight[2];
+    acc += input[3] * weight[3];
+    return acc;
+}
+
+inline int FUNC(imad_SW)(int acc, char4 input, char4 weight) __attribute__((overloadable)) {
+    acc += input[0] * weight[0];
+    acc += input[1] * weight[1];
+    acc += input[2] * weight[2];
+    acc += input[3] * weight[3];
+    return acc;
+}
+
+
+#define IMAD(_O, _I, _W) FUNC_CALL(imad_SW)(_O, _I, _W)
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/include_all.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/include_all.cl
index 6b030bc31..cc1c7ea40 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/include_all.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/include_all.cl
@@ -16,6 +16,4 @@
 
 #include "common.cl"
 #include "data_types.cl"
-#include "sub_group.cl"
-#include "reshape_dims.cl"
 #include "fetch.cl"
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/mmad.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/mmad.cl
index 12000752e..4fc07adf1 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/mmad.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/mmad.cl
@@ -14,6 +14,47 @@
 // limitations under the License.
 */
 
+void FUNC(intel_sub_group_block_write_4)( __local uint* p, uint4 data )
+{
+    p[ get_sub_group_local_id() ] = data.s0;
+    p += 8;
+    p[ get_sub_group_local_id() ] = data.s1;
+    p += 8;
+    p[ get_sub_group_local_id() ] = data.s2;
+    p += 8;
+    p[ get_sub_group_local_id() ] = data.s3;
+}
+
+uint4 FUNC(intel_sub_group_block_read_uint4)(const __local uint* p)
+{
+    uint4 ret;
+    uint idx = get_sub_group_local_id();
+
+    ret.s0 = p[idx]; idx += get_max_sub_group_size();
+    ret.s1 = p[idx]; idx += get_max_sub_group_size();
+    ret.s2 = p[idx]; idx += get_max_sub_group_size();
+    ret.s3 = p[idx]; idx += get_max_sub_group_size();
+
+    return ret;
+}
+
+uint8 FUNC(intel_sub_group_block_read_uint8)(const __local uint* p)
+{
+    uint8 ret;
+    uint idx = get_sub_group_local_id();
+
+    ret.s0 = p[idx]; idx += get_max_sub_group_size();
+    ret.s1 = p[idx]; idx += get_max_sub_group_size();
+    ret.s2 = p[idx]; idx += get_max_sub_group_size();
+    ret.s3 = p[idx]; idx += get_max_sub_group_size();
+    ret.s4 = p[idx]; idx += get_max_sub_group_size();
+    ret.s5 = p[idx]; idx += get_max_sub_group_size();
+    ret.s6 = p[idx]; idx += get_max_sub_group_size();
+    ret.s7 = p[idx]; idx += get_max_sub_group_size();
+
+    return ret;
+}
+
 inline int FUNC(mmad_4)(char4 input, char4 weight, int acc)
 {
 	acc += (input[0] * weight[0]);
@@ -75,7 +116,54 @@ inline int8 FUNC(mmad8x8)(int8 A_vectors, int8 B_vectors, int8 acc)
     return ret;
 }
 
+// TODO: remove it when cl_intel_subgroups_char extension will work
+inline void FUNC(sub_group_block_write_uchar8)(__global uchar* outPtr, uchar8 v)
+{
+#ifdef cl_intel_subgroups_char
+    intel_sub_group_block_write_uc8(outPtr, v);
+#else
+    uint idx = get_sub_group_local_id();
+
+	outPtr[idx] = v.s0; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s1; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s2; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s3; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s4; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s5; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s6; idx += get_max_sub_group_size();
+    outPtr[idx] = v.s7; idx += get_max_sub_group_size();
+#endif
+}
+
+inline uchar8 FUNC(sub_group_block_read_uchar8)(const __global uchar* ptr)
+{
+#ifdef cl_intel_subgroups_char
+    return intel_sub_group_block_read_uc8(ptr);
+#else
+    uint idx = get_sub_group_local_id();
+
+    uchar8 ret;
+
+    ret.s0 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s1 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s2 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s3 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s4 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s5 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s6 = ptr[idx]; idx += get_max_sub_group_size();
+    ret.s7 = ptr[idx]; idx += get_max_sub_group_size();
+
+    return ret;
+
+#endif
+}
+
+//
+
 
 #define MMAD_8(A, B, C) FUNC_CALL(mmad8)(A, B, C)
 #define MMAD_4x8(A, B, C) FUNC_CALL(mmad4x8)(A, B, C)
 #define MMAD_8x8(A, B, C) FUNC_CALL(mmad8x8)(A, B, C)
+#define SLM_BLOCK_WRITE_4(A, B) (FUNC_CALL(intel_sub_group_block_write_4)(A, B))
+#define SLM_BLOCK_READ_4(A) (FUNC_CALL(intel_sub_group_block_read_uint4)(A))
+#define SLM_BLOCK_READ_8(A) (FUNC_CALL(intel_sub_group_block_read_uint8)(A))
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/vec_typedefs.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/vec_typedefs.cl
index e0fdb4951..8b50ecbb5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/vec_typedefs.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/include/vec_typedefs.cl
@@ -52,37 +52,3 @@ typedef struct float14 { float s0; float s1; float s2; float s3; float s4; float
 typedef struct float15 { float s0; float s1; float s2; float s3; float s4; float s5;
                          float s6; float s7; float s8; float s9; float sa; float sb; float sc; float sd; float se; } float15;
 typedef struct float0 { float s0; } float0; //never used but makes compiler happy.
-
-#if (KERNEL_WIDTH == 1)
-__constant half1 half_zeros= (half1){0};
-#elif (KERNEL_WIDTH == 2)
-    __constant half2 half_zeros = (half2)(0);
-#elif (KERNEL_WIDTH == 3)
-    __constant half3 half_zeros = (half3)(0);
-#elif (KERNEL_WIDTH == 4)
-    __constant half4 half_zeros = (half4)(0);
-#elif (KERNEL_WIDTH == 5)
-    __constant half5 half_zeros = (half5){0, 0, 0, 0, 0};
-#elif (KERNEL_WIDTH == 6)
-    __constant half6 half_zeros = (half6){0, 0, 0, 0, 0, 0};
-#elif (KERNEL_WIDTH == 7)
-    __constant half7 half_zeros = (half7){0, 0, 0, 0, 0, 0, 0};
-#elif (KERNEL_WIDTH == 8)
-    __constant half8 half_zeros = (half8)(0);
-#elif (KERNEL_WIDTH == 9)
-    __constant half9 half_zeros = (half9){0, 0, 0, 0, 0, 0, 0, 0, 0};
-#elif (KERNEL_WIDTH == 10)
-    __constant half10 half_zeros = (half10){0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-#elif (KERNEL_WIDTH == 11)
-    __constant half11 half_zeros = (half11){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-#elif (KERNEL_WIDTH == 12)
-    __constant half12 half_zeros = (half12){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-#elif (KERNEL_WIDTH == 13)
-    __constant half13 half_zeros = (half13){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-#elif (KERNEL_WIDTH == 14)
-    __constant half14 half_zeros = (half14){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-#elif (KERNEL_WIDTH == 15)
-    __constant half15 half_zeros = (half15){0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-#elif (KERNEL_WIDTH == 16)
-    __constant half16 half_zeros = (half16)(0);
-#endif
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/index_select_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/index_select_gpu_ref.cl
index 9862c1a86..33d340337 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/index_select_gpu_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/index_select_gpu_ref.cl
@@ -17,7 +17,9 @@
 
 KERNEL(index_select_gpu_ref)(
     const __global UNIT_TYPE* input,
+#ifndef REVERSE
     const __global int* indices,
+#endif
     __global UNIT_TYPE* output)
 {
     // [CONSTEXPR]:
@@ -29,32 +31,73 @@ KERNEL(index_select_gpu_ref)(
     const uint out_b         = (uint) get_global_id(0);
     const uint indices_idx   = (uint) get_global_id(1);
     const uint feature_idx   = (uint) get_global_id(2);
-    const uint indices_value = indices[indices_idx];
 
+    #if AXES_NUMBER == 1
+        #ifdef REVERSE
+            const uint indices_value = REVERSE_AXIS_SIZE - 1 - indices_idx;
+        #else
+            const uint indices_value = indices[indices_idx];
+        #endif
+    #elif AXES_NUMBER > 1
+        #ifdef REVERSE
+            uint indices_value[4] =  { 
+                #ifdef REVERSE_INDEX_SELECT_AXIS_BATCH_SIZE
+                    REVERSE_INDEX_SELECT_AXIS_BATCH_SIZE - 1 - out_b,
+                #else
+                    out_b,
+                #endif
+                #ifdef REVERSE_INDEX_SELECT_AXIS_FEATURE_SIZE
+                    REVERSE_INDEX_SELECT_AXIS_FEATURE_SIZE - 1 - feature_idx,
+                #else
+                    feature_idx,
+                #endif
+                #ifdef REVERSE_INDEX_SELECT_AXIS_Y_SIZE
+                    REVERSE_INDEX_SELECT_AXIS_Y_SIZE - 1 - indices_idx,
+                #else
+                    indices_idx,
+                #endif             
+                    0     
+             };
+        #endif
+    #endif
+    
     // [LOGIC]:
-#ifdef INDEX_SELECT_AXIS_BATCH
-    for(uint x = 0; x < input_sx; x++)
-    { 
-        for(uint y = 0; y < input_sy; y++)
-        {  
-            output[GET_DATA_INDEX(OUTPUT, indices_idx, feature_idx, y, x)] = input[GET_DATA_INDEX(INPUT0, indices_value, feature_idx, y, x)];
+    #if AXES_NUMBER > 1
+        for(uint x = 0; x < input_sx; x++)
+        {
+            #ifdef REVERSE_INDEX_SELECT_AXIS_X_SIZE
+                indices_value[3] = REVERSE_INDEX_SELECT_AXIS_X_SIZE - 1 - x;
+            #else
+                indices_value[3] = x;
+            #endif
+            output[GET_DATA_INDEX(OUTPUT, out_b, feature_idx, indices_idx, x)] = input[GET_DATA_INDEX(INPUT0, indices_value[0], indices_value[1], indices_value[2], indices_value[3])];
         }
-    }
-#elif defined INDEX_SELECT_AXIS_FEATURE
-    for(uint x = 0; x < input_sx; x++)
-    {
-        output[GET_DATA_INDEX(OUTPUT, out_b, indices_idx, feature_idx, x)] = input[GET_DATA_INDEX(INPUT0, out_b, indices_value, feature_idx, x)];
-    }
-#elif defined INDEX_SELECT_AXIS_X
-    for(uint i = 0; i < input_sx; i++)
-    {
-        output[GET_DATA_INDEX(OUTPUT, out_b, feature_idx, i, indices_idx)] = input[GET_DATA_INDEX(INPUT0, out_b, feature_idx, i, indices_value)];
-    }
-#elif defined INDEX_SELECT_AXIS_Y
+            
+    #else
+        #ifdef INDEX_SELECT_AXIS_BATCH
+            for(uint x = 0; x < input_sx; x++)
+            { 
+                for(uint y = 0; y < input_sy; y++)
+                {  
+                    output[GET_DATA_INDEX(OUTPUT, indices_idx, feature_idx, y, x)] = input[GET_DATA_INDEX(INPUT0, indices_value, feature_idx, y, x)];
+                }
+            }
+        #elif defined INDEX_SELECT_AXIS_FEATURE
+            for(uint x = 0; x < input_sx; x++)
+            {
+                output[GET_DATA_INDEX(OUTPUT, out_b, indices_idx, feature_idx, x)] = input[GET_DATA_INDEX(INPUT0, out_b, indices_value, feature_idx, x)];
+            }
+        #elif defined INDEX_SELECT_AXIS_X
+            for(uint i = 0; i < input_sy; i++)
+            {
+                output[GET_DATA_INDEX(OUTPUT, out_b, feature_idx, i, indices_idx)] = input[GET_DATA_INDEX(INPUT0, out_b, feature_idx, i, indices_value)];
+            }
+        #elif defined INDEX_SELECT_AXIS_Y
 
-    for(uint i = 0; i < input_sx; i++)
-    {
-        output[GET_DATA_INDEX(OUTPUT, out_b, feature_idx, indices_idx, i)] = input[GET_DATA_INDEX(INPUT0, out_b, feature_idx, indices_value, i)];
-    }
-#endif
+            for(uint i = 0; i < input_sx; i++)
+            {
+                output[GET_DATA_INDEX(OUTPUT, out_b, feature_idx, indices_idx, i)] = input[GET_DATA_INDEX(INPUT0, out_b, feature_idx, indices_value, i)];
+            }
+        #endif
+    #endif
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_elt_gpu_bfyx_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_elt_gpu_bfyx_ref.cl
index f4d8f723a..682b83a41 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_elt_gpu_bfyx_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_elt_gpu_bfyx_ref.cl
@@ -18,9 +18,9 @@
 #define ACTIVATION_LOGISTIC(input)                      (UNIT_VAL_ONE/(UNIT_VAL_ONE + exp(-input)))
 #define ACTIVATION_HYPERBOLIC_TAN(input)                (tanh(input))
 
-// tempGEMM = [ batch, direction, 1, 4 * hidden_size ]
-// cell     = [ batch, direction, 1,     hidden_size ] optional
-// output   = [ batch, direction, 2,     hidden_size ] output
+// tempGEMM = [ batch, 1, direction, 4 * hidden_size ]
+// cell     = [ batch, 1, direction, hidden_size ] optional
+// output   = [ batch, 1, direction, hidden_size ] output
 KERNEL(lstm_elt)(
     const __global INPUT0_TYPE* input,
     __global OUTPUT_TYPE* output
@@ -47,9 +47,9 @@ KERNEL(lstm_elt)(
 #endif
 
 #if CELL_TERM
-    val += cell[GET_DATA_INDEX(CELL, b, 0, 0, x)] * ACTIVATION_LOGISTIC(CLIP(ft));
+    val += cell[GET_DATA_INDEX(CELL, b, 0, CELL_DIRECTION, x)] * ACTIVATION_LOGISTIC(CLIP(ft));
 #endif
 
-    output[GET_DATA_INDEX(OUTPUT, b, 0, 0, x)] = ACTIVATION_HYPERBOLIC_TAN(val) * ACTIVATION_LOGISTIC(ot); // hidden
+    output[GET_DATA_INDEX(OUTPUT, b, 0, 0, x)] = (OUTPUT_TYPE)(ACTIVATION_HYPERBOLIC_TAN(val) * ACTIVATION_LOGISTIC(ot)); // hidden
     output[GET_DATA_INDEX(OUTPUT, b, 1, 0, x)] = (OUTPUT_TYPE)val; // cell
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_gemm_gpu_bfyx_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_gemm_gpu_bfyx_ref.cl
index 39800750a..90370bd56 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_gemm_gpu_bfyx_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_gemm_gpu_bfyx_ref.cl
@@ -43,14 +43,14 @@ KERNEL(lstm_gemm)(
 
     ACCUMULATOR_TYPE dotProd = 0;
     for(uint x = 0; x < INPUT0_SIZE_X; ++x ) {
-      const uint input_idx     = GET_DATA_INDEX(INPUT0, b, 0, 0, x);
+      const uint input_idx     = GET_DATA_INDEX(INPUT0, b, 0, INPUT_DIRECTION, x);
       const uint weights_idx   = GET_DATA_INDEX(WEIGHTS, 0, DIRECTION, y, x);
       dotProd += (ACCUMULATOR_TYPE)(input[input_idx] * weights[weights_idx]);
     }
 
 #if HIDDEN_TERM
     for(uint x = 0; x < HIDDEN_SIZE_X; ++x ) {
-      const uint hidden_idx    = GET_DATA_INDEX(HIDDEN, b, 0, 0, x);
+      const uint hidden_idx    = GET_DATA_INDEX(HIDDEN, b, 0, HIDDEN_DIRECTION, x);
       const uint recurrent_idx = GET_DATA_INDEX(RECURRENT, 0, DIRECTION, y, x);
       dotProd += (ACCUMULATOR_TYPE)(hidden[hidden_idx] * recurrent[recurrent_idx]);
     }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.cl
new file mode 100644
index 000000000..82c3e7fe0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_gemv_gpu_subgroup1x64_bfyx_ff_SIMD16.cl
@@ -0,0 +1,128 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+
+#ifndef DIRECTION
+#define DIRECTION 0
+#endif
+
+#ifndef SIMD
+#define SIMD 16
+#endif
+
+// Sums value of result across all subgroups.
+#define SUM_ACROSS_SUB_GROUP(val) \
+ \
+{ \
+    val += intel_sub_group_shuffle(val, x+1); \
+    val += intel_sub_group_shuffle(val, x+2); \ 
+    val += intel_sub_group_shuffle(val, x+4); \ 
+    val += (SIMD > 8) ? intel_sub_group_shuffle(val, x+8) : 0; \ 
+    val += (SIMD > 16) ? intel_sub_group_shuffle(val, x+16) : 0; \ 
+} 
+
+// input     = [    batch,  sequence,               1,      input_size ]
+// weights   = [        1, direction, 4 * hidden_size,      input_size ]
+// recurrent = [        1, direction, 4 * hidden_size,     hidden_size ]
+// biases    = [        1,         1,       direction, 4 * hidden_size ] optional
+// hidden    = [    batch, direction,               1,     hidden_size ] optional
+// tempGEMM  = [    batch, direction,               1, 4 * hidden_size ] output
+
+__attribute__((reqd_work_group_size(SIMD, 1, 1)))
+KERNEL(lstm_gemm)(
+    const __global INPUT0_TYPE* input,
+    __global OUTPUT_TYPE* output,
+    const __global WEIGHTS_TYPE* weights
+#if HIDDEN_TERM
+    , const __global OUTPUT_TYPE* hidden,
+    const __global RECURRENT_TYPE* recurrent
+#endif
+#if BIAS_TERM
+    , const __global BIAS_TYPE* biases
+#endif
+    )
+{
+    const uint x = get_local_id(0);
+    const uint y = get_global_id(1);
+	const int local_sz = get_local_size(0);
+	const int weight_num_rows = get_global_size(1);
+
+	uint K; 	
+	int start_offset;
+	int end_offset;
+	int matrix_offset;  
+	int vector_offset; 
+	float4 sum;
+	float result;
+	
+	K = INPUT0_SIZE_X;  // Width of  weight matrix
+	start_offset = GET_DATA_INDEX(WEIGHTS, 0, DIRECTION, y, 0);  // set as the starting offset of the weight matrix 
+	end_offset = start_offset + K;
+	matrix_offset = start_offset + (x * 4);  // Weight offset for the work item to work on
+	vector_offset = GET_DATA_INDEX(INPUT0, 0, 0, INPUT_DIRECTION, (x*4));  // Input offset for the work item to work on
+	sum = (float4)(0.f);
+	result = 0;
+	for(; matrix_offset < end_offset; matrix_offset += (local_sz * 4), vector_offset += (local_sz * 4))
+	{
+		float4 mask = (float4) (1 , (matrix_offset + 1) < end_offset , (matrix_offset + 2) < end_offset , (matrix_offset + 3) < end_offset);
+		float4 m = (float4) (weights[matrix_offset], weights[matrix_offset + 1], weights[matrix_offset + 2], weights[matrix_offset + 3]);
+		m = m * mask;
+		
+		const float4 v = (float4) (input[vector_offset], input[vector_offset + 1], input[vector_offset + 2], input[vector_offset + 3]);
+		
+		sum = mad(m, v, sum);
+	}
+	
+	result = sum.x + sum.y + sum.z + sum.w;
+
+#if HIDDEN_TERM
+	K = HIDDEN_SIZE_X;  // width of recurrent matrix
+	start_offset =  GET_DATA_INDEX(RECURRENT, 0, DIRECTION, y, 0);  // set as the starting offset of the recurrent matrix 
+	end_offset = start_offset + K;
+	matrix_offset = start_offset + (x * 4);  // recurrent offset for the work item to work on
+	vector_offset = GET_DATA_INDEX(HIDDEN, 0, 0, HIDDEN_DIRECTION, (x*4));  // hidden vector offset for the work item to work on
+	sum = (float4)(0.f);
+	for(; matrix_offset < end_offset; matrix_offset += (local_sz * 4), vector_offset += (local_sz * 4))
+	{
+		float4 mask = (float4) (1 , (matrix_offset + 1) < end_offset , (matrix_offset + 2) < end_offset , (matrix_offset + 3) < end_offset);
+		float4 m = (float4) (recurrent[matrix_offset], recurrent[matrix_offset + 1], recurrent[matrix_offset + 2], recurrent[matrix_offset + 3]);
+		m = m * mask;
+
+		const float4 v = (float4) (hidden[vector_offset], hidden[vector_offset + 1], hidden[vector_offset + 2], hidden[vector_offset + 3]);
+		
+		sum = mad(m, v, sum);
+	}
+	
+	result += sum.x + sum.y + sum.z + sum.w;
+#endif
+	
+	// Add together partial sums contained in each work item's "result" variable
+	SUM_ACROSS_SUB_GROUP(result);
+
+	if(x == 0) 
+	{	
+		output[y] = (OUTPUT_TYPE)result;
+
+#if BIAS_TERM
+		const uint bias_idx = GET_DATA_INDEX(BIAS, 0, 0, DIRECTION, y);
+		float bias = (ACCUMULATOR_TYPE)biases[bias_idx];
+		output[y] += (OUTPUT_TYPE)bias;
+#endif 
+	}
+}
+
+#undef SUM_ACROSS_SUB_GROUP
+#undef SIMD
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.cl
new file mode 100644
index 000000000..0be579bc0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/lstm_gemv_gpu_subgroup1x64_bfyx_hh_SIMD16.cl
@@ -0,0 +1,131 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+
+#ifndef DIRECTION
+#define DIRECTION 0
+#endif
+
+#ifndef SIMD
+#define SIMD 16
+#endif
+
+// Sums value of result across all subgroups.
+#define SUM_ACROSS_SUB_GROUP(val) \
+ \
+{ \
+    val += intel_sub_group_shuffle(val, x+1); \
+    val += intel_sub_group_shuffle(val, x+2); \ 
+    val += intel_sub_group_shuffle(val, x+4); \ 
+    val += intel_sub_group_shuffle(val, x+8); \
+} 
+
+// input     = [    batch,  sequence,               1,      input_size ]
+// weights   = [        1, direction, 4 * hidden_size,      input_size ]
+// recurrent = [        1, direction, 4 * hidden_size,     hidden_size ]
+// biases    = [        1,         1,       direction, 4 * hidden_size ] optional
+// hidden    = [    batch, direction,               1,     hidden_size ] optional
+// tempGEMM  = [    batch, direction,               1, 4 * hidden_size ] output
+
+__attribute__((reqd_work_group_size(SIMD, 1, 1)))
+KERNEL(lstm_gemm)(
+    const __global INPUT0_TYPE* input,
+    __global OUTPUT_TYPE* output,
+    const __global WEIGHTS_TYPE* weights
+#if HIDDEN_TERM
+    , const __global OUTPUT_TYPE* hidden,
+    const __global RECURRENT_TYPE* recurrent
+#endif
+#if BIAS_TERM
+    , const __global BIAS_TYPE* biases
+#endif
+    )
+{
+    const uint x = get_local_id(0);
+    const uint y = get_global_id(1);
+	const int local_sz = get_local_size(0);
+
+	uint K; 	
+	int start_offset;
+	int end_offset;
+	int matrix_offset;  
+	int vector_offset; 
+	float4 sum;
+	float result;
+	
+	K = INPUT0_SIZE_X;  // Width of  weight matrix
+	start_offset = GET_DATA_INDEX(WEIGHTS, 0, DIRECTION, y, 0);  // set as the starting offset of the weight matrix 
+	end_offset = start_offset + K;
+	matrix_offset = start_offset + (x * 4);  // Weight offset for the work item to work on
+	vector_offset = GET_DATA_INDEX(INPUT0, 0, 0, INPUT_DIRECTION, (x*4));  // Input offset for the work item to work on
+	sum = (float4)(0.f);
+	result = 0;
+	for(; matrix_offset < end_offset; matrix_offset += (local_sz * 4), vector_offset += (local_sz * 4))
+	{
+		half4 mask = (half4) (1 , (matrix_offset + 1) < end_offset , (matrix_offset + 2) < end_offset , (matrix_offset + 3) < end_offset);
+		half4 m = (half4) (weights[matrix_offset], weights[matrix_offset + 1], weights[matrix_offset + 2], weights[matrix_offset + 3]);
+		m = m * mask;
+		
+		const half4 v = (half4)(input[vector_offset], input[vector_offset + 1], input[vector_offset + 2], input[vector_offset + 3]);
+		
+		sum = mad(convert_float4(m), convert_float4(v), sum);
+	}
+	
+	result = sum.x + sum.y + sum.z + sum.w;
+
+#if HIDDEN_TERM
+	K = HIDDEN_SIZE_X;  // width of recurrent matrix
+	start_offset = GET_DATA_INDEX(RECURRENT, 0, DIRECTION, y, 0);  // set as the starting offset of the recurrent matrix 
+	end_offset = start_offset + K;
+	matrix_offset = start_offset + (x * 4);  // recurrent offset for the work item to work on
+	vector_offset = GET_DATA_INDEX(HIDDEN, 0, 0, HIDDEN_DIRECTION, (x*4));  // hidden vector offset for the work item to work on
+	sum = (float4)(0.f);
+	for(; matrix_offset < end_offset; matrix_offset += (local_sz * 4), vector_offset += (local_sz * 4))
+	{
+		half4 mask = (half4) (1 , (matrix_offset + 1) < end_offset , (matrix_offset + 2) < end_offset , (matrix_offset + 3) < end_offset);
+		half4 m = (half4) (recurrent[matrix_offset], recurrent[matrix_offset + 1], recurrent[matrix_offset + 2], recurrent[matrix_offset + 3]);
+		m = m * mask;
+
+		const half4 v = (half4) (hidden[vector_offset], hidden[vector_offset + 1], hidden[vector_offset + 2], hidden[vector_offset + 3]);
+		
+		sum = mad(convert_float4(m), convert_float4(v), sum);
+	}
+	
+	result += sum.x + sum.y + sum.z + sum.w;
+#endif
+	
+	// Add together partial sums contained in each work item's "result" variable
+	SUM_ACROSS_SUB_GROUP(result);
+
+	if(x == 0) 
+	{	
+	    output[y] = 0;// (half)result;
+
+#if BIAS_TERM
+		const uint bias_idx = GET_DATA_INDEX(BIAS, 0, 0, DIRECTION, y);
+		half bias = biases[bias_idx];
+		result += (float)bias;
+#endif 
+
+		output[y] = (half)result;
+		//output[y] = convert_half_rte(result);
+
+
+	}
+}
+
+#undef SUM_ACROSS_SUB_GROUP
+#undef SIMD
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/one_hot_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/one_hot_ref.cl
new file mode 100644
index 000000000..b3f02ae9a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/one_hot_ref.cl
@@ -0,0 +1,39 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/include_all.cl"
+
+#define GET_COORDS_INDEX(prefix, coords) GET_DATA_INDEX(prefix, coords[0], coords[1], coords[2], coords[3])
+
+KERNEL(one_hot_ref)(
+    const __global INPUT0_TYPE* input,
+    __global INPUT0_TYPE* output)
+{
+    uint in_coords[4] = { 0, get_global_id(0), get_global_id(1), get_global_id(2) };
+    uint out_coords[4] = { 0, get_global_id(0), get_global_id(1), get_global_id(2) };
+    for (uint i = 0; i < ONE_HOT_AXIS; ++i)
+        out_coords[i] = out_coords[i + 1];
+
+    // Fill the output with 0
+    for (out_coords[ONE_HOT_AXIS] = 0; out_coords[ONE_HOT_AXIS] < ONE_HOT_LIMIT; ++out_coords[ONE_HOT_AXIS])
+        output[GET_COORDS_INDEX(OUTPUT, out_coords)] = 0;
+
+    // Put in the 1; ignore bad input values
+    INPUT0_TYPE val = input[GET_COORDS_INDEX(INPUT0, in_coords)];
+    if (val >= 0 && val < ONE_HOT_LIMIT)
+    {
+        out_coords[ONE_HOT_AXIS] = val;
+        output[GET_COORDS_INDEX(OUTPUT, out_coords)] = 1;
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/permute_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/permute_ref.cl
index a980555cd..a85c82f02 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/permute_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/permute_ref.cl
@@ -12,34 +12,29 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "include/common.cl"
-#include "include/data_types.cl"
+#include "include/include_all.cl"
 
 
 KERNEL (permute_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
 {
     uint4 input_indices, output_indices;
     
-    input_indices[0] = get_global_id(0);
-    input_indices[1] = get_global_id(1);
-    input_indices[2] = get_global_id(2) % INPUT0_SIZES[2];
-    input_indices[3] = get_global_id(2) / INPUT0_SIZES[2];
+    //gws(y, x, b*f)
+    //input_indices[b, f, x, y]
+    input_indices[3] = get_global_id(0); 
+    input_indices[2] = get_global_id(1);
+    input_indices[1] = get_global_id(2) % INPUT0_FEATURE_NUM;
+    input_indices[0] = get_global_id(2) / INPUT0_FEATURE_NUM;
     
+    //PERMUTE_ORDER[b, f, x, y]
+    //output_indices[b, f, x, y]
     output_indices[0] = input_indices[PERMUTE_ORDER[0]];
     output_indices[1] = input_indices[PERMUTE_ORDER[1]];
     output_indices[2] = input_indices[PERMUTE_ORDER[2]];
     output_indices[3] = input_indices[PERMUTE_ORDER[3]];
     
-    uint input_offset =  INPUT0_OFFSET +
-                         input_indices[0]*INPUT0_PITCHES[0] +
-                         input_indices[1]*INPUT0_PITCHES[1] +
-                         input_indices[2]*INPUT0_PITCHES[2] +
-                         input_indices[3]*INPUT0_PITCHES[3];
-    uint output_offset = OUTPUT_OFFSET +
-                         output_indices[0]*OUTPUT_PITCHES[0] +
-                         output_indices[1]*OUTPUT_PITCHES[1] +
-                         output_indices[2]*OUTPUT_PITCHES[2] +
-                         output_indices[3]*OUTPUT_PITCHES[3];
+    uint input_offset =  GET_DATA_INDEX(INPUT0, input_indices[0], input_indices[1], input_indices[3], input_indices[2]);
+    uint output_offset = GET_DATA_INDEX(OUTPUT, output_indices[0], output_indices[1], output_indices[3], output_indices[2]);
 
     output[output_offset] = ACTIVATION(input[input_offset], NL_M, NL_N);
 }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_b_fs_yx_fsv4.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_b_fs_yx_fsv4.cl
new file mode 100644
index 000000000..a31592d69
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_b_fs_yx_fsv4.cl
@@ -0,0 +1,143 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+
+#if MAX_POOLING
+    #define INIT_VAL CHAR_MIN
+#elif AVG_POOLING
+    #define INIT_VAL 0
+#else
+#error
+#endif
+
+
+inline int FUNC(apply_pooling)(int tmp, int in)
+{
+#if MAX_POOLING
+    return max(tmp, in);
+#elif AVG_POOLING
+    return tmp + in;
+#endif
+}
+
+KERNEL(pooling_gpu_b_fs_yx_fsv4)(
+    const __global UNIT_TYPE* input,
+    __global UNIT_TYPE* output)
+{
+    const uint x    = (uint)get_global_id(0);
+    const uint y    = (uint)get_global_id(1);
+    const uint bf   = (uint)get_global_id(2);
+    const uint f    = (bf * 4) % INPUT0_FEATURE_NUM;
+    const uint b    = (bf * 4) / INPUT0_FEATURE_NUM;
+
+    const int offset_x = (int)x*STRIDE_SIZE_X - PADDING_SIZE_X;
+    const int offset_y = (int)y*STRIDE_SIZE_Y - PADDING_SIZE_Y;
+
+    int result[4] = { INIT_VAL, INIT_VAL, INIT_VAL, INIT_VAL };
+
+#ifdef CHECK_BOUNDRY
+    if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
+        offset_y + POOL_SIZE_Y < 0 || offset_y >= INPUT0_SIZE_Y)
+    {
+        return;
+    }
+
+#ifdef DYNAMIC_KERNEL_DIVIDER
+    uint num_elementes = 0;
+#endif
+
+    const uint batch_and_feature_offset = GET_DATA_B_FS_YX_FSV4_INDEX(INPUT0, b, f, 0, 0);
+    for(uint j = 0; j < POOL_SIZE_Y; j++)
+    {
+        int input_offset_y = offset_y + j;
+        bool zero_y = input_offset_y >= INPUT0_SIZE_Y || input_offset_y < 0;
+        if(!zero_y)
+        {
+            for(uint i = 0; i < POOL_SIZE_X; i++)
+            {
+                int input_offset_x = offset_x + i;
+                bool zero = input_offset_x >= INPUT0_SIZE_X || input_offset_x < 0;
+                if(!zero)
+                {
+                    const uint input_idx = batch_and_feature_offset + input_offset_y*IN_Y_PITCH + input_offset_x*IN_X_PITCH;
+
+                    int int_data   = *((const __global int*)(input + input_idx));
+                    char4 ch4_data = as_char4(int_data);
+                    result[0] = FUNC_CALL(apply_pooling)(result[0], (int)ch4_data[0]);
+                    result[1] = FUNC_CALL(apply_pooling)(result[1], (int)ch4_data[1]);
+                    result[2] = FUNC_CALL(apply_pooling)(result[2], (int)ch4_data[2]);
+                    result[3] = FUNC_CALL(apply_pooling)(result[3], (int)ch4_data[3]);
+
+#ifdef DYNAMIC_KERNEL_DIVIDER
+                    num_elementes++;
+#endif
+                }
+            }
+        }
+    }
+#ifdef DYNAMIC_WITH_PADDING_KERNEL_DIVIDER
+    const int hend = min(offset_y + POOL_SIZE_Y, INPUT0_SIZE_Y + PADDING_SIZE_Y);
+    const int wend = min(offset_x + POOL_SIZE_X, INPUT0_SIZE_X + PADDING_SIZE_X);
+    const uint num_elementes = (hend - offset_y) * (wend - offset_x);
+#endif
+#else // !CHECK_BOUNDRY
+    uint input_idx = GET_DATA_B_FS_YX_FSV4_INDEX(INPUT0, b, f, offset_y, offset_x);
+
+    for(uint j = 0; j < POOL_SIZE_Y; j++)
+    {
+        for(uint i = 0; i < POOL_SIZE_X; i++)
+        {
+            int int_data   = *((const __global int*)(input + input_idx));
+            char4 ch4_data = as_char4(int_data);
+            result[0] = FUNC_CALL(apply_pooling)(result[0], (int)ch4_data[0]);
+            result[1] = FUNC_CALL(apply_pooling)(result[1], (int)ch4_data[1]);
+            result[2] = FUNC_CALL(apply_pooling)(result[2], (int)ch4_data[2]);
+            result[3] = FUNC_CALL(apply_pooling)(result[3], (int)ch4_data[3]);
+
+            input_idx += IN_X_PITCH;
+        }
+        input_idx += (IN_Y_PITCH - POOL_SIZE_X*IN_X_PITCH);
+    }
+
+#if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER)
+    const uint num_elementes = POOL_SIZE_X*POOL_SIZE_Y;
+#endif
+#endif
+
+#if defined AVG_POOLING
+    #if defined(DYNAMIC_KERNEL_DIVIDER) || defined(DYNAMIC_WITH_PADDING_KERNEL_DIVIDER)
+        for(uint i = 0; i < 4; i++)
+        {
+            result[i] = convert_int(round(((float)result[i] / max(num_elementes, (uint)1)));
+        }
+    #else
+        for(uint i = 0; i < 4; i++)
+        {
+            result[i] = convert_int(round((float)result[i] / (int)(POOL_SIZE_Y * POOL_SIZE_X)));
+        }
+    #endif
+#endif
+
+    char4 char_res;
+    for(uint op = 0; op < 4; op++)
+    {
+        char_res[op] = ACTIVATION(convert_char(result[op]), NL_M ,NL_N);
+    }
+    const uint output_pos = GET_DATA_B_FS_YX_FSV4_INDEX(OUTPUT, b, f, y, x);
+    *((__global int*)(output + output_pos)) = as_int(char_res);
+}
+
+#undef INIT_VAL
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_fs_bs_yx_bsv4_fsv32.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_fs_bs_yx_bsv4_fsv32.cl
index 130cd8cca..c23652ad9 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_fs_bs_yx_bsv4_fsv32.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pooling_gpu_fs_bs_yx_bsv4_fsv32.cl
@@ -43,8 +43,8 @@ KERNEL(pooling_gpu_fs_bs_yx_bsv4_fsv32)(
     const uint bf   = (uint)get_global_id(2);
 	// we process 4 features per workitem that's why we need to divide it
     const uint aligned32_features = ((INPUT0_FEATURE_NUM + 31) / 32) * 32;
-    const uint f    = 4 * (bf % (aligned32_features / 4));
-    const uint b_block = bf / (aligned32_features / 4);
+    const uint f    = (get_global_id(2) * 4) % aligned32_features;
+    const uint b = 4 * ((get_global_id(2) * 4) / aligned32_features);
     
     if (x >= OUTPUT_SIZE_X)
     {
@@ -54,11 +54,7 @@ KERNEL(pooling_gpu_fs_bs_yx_bsv4_fsv32)(
     const int offset_x = (int)x*STRIDE_SIZE_X - PADDING_SIZE_X;
     const int offset_y = (int)y*STRIDE_SIZE_Y - PADDING_SIZE_Y;
     
-    int4 result[4];
-    for(uint b = 0; b < 4; b++)
-    {
-        result[b] = INIT_VAL;
-    }
+    int4 result[4] = { INIT_VAL };
 
 #ifdef CHECK_BOUNDRY
     if (offset_x + POOL_SIZE_X < 0 || offset_x >= INPUT0_SIZE_X ||
@@ -71,7 +67,7 @@ KERNEL(pooling_gpu_fs_bs_yx_bsv4_fsv32)(
     uint num_elementes = 0;
 #endif
 
-    const uint batch_and_feature_offset = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(INPUT0, b_block * 4, f, 0, 0);
+    const uint batch_and_feature_offset = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(INPUT0, b, f, 0, 0);
     for(uint j = 0; j < POOL_SIZE_Y; j++)
     {
         int input_offset_y = offset_y + j;
@@ -110,7 +106,7 @@ KERNEL(pooling_gpu_fs_bs_yx_bsv4_fsv32)(
     const uint num_elementes = (hend - offset_y) * (wend - offset_x);
 #endif
 #else
-    uint input_idx = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(INPUT0, b_block * 4, f, offset_y, offset_x);
+    uint input_idx = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(INPUT0, b, f, offset_y, offset_x);
 
     for(uint j = 0; j < POOL_SIZE_Y; j++)
     {
@@ -156,14 +152,18 @@ KERNEL(pooling_gpu_fs_bs_yx_bsv4_fsv32)(
     #endif
 #endif
 
-for(uint b = 0; b < 4; b++)
-{
-    for(uint op = 0; op < 4; op++)
+    int4 char_result;
+    for(uint b = 0; b < 4; b++)
     {
-        const uint output_pos = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b_block*4 + b, f+op, y, x);
-        output[output_pos] = ACTIVATION(convert_char(result[b][op]), NL_M ,NL_N);
+        char4 char_res = as_char4(char_result[b]);
+        for(uint op = 0; op < 4; op++)
+        {
+            char_res[op] = ACTIVATION(convert_char(result[b][op]), NL_M ,NL_N);
+        }
+        char_result[b] = as_int(char_res);
     }
-}
+    const uint output_pos = GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b, f, y, x);
+    intel_sub_group_block_write4((__global uint*)(output + output_pos), as_uint4(char_result));																						
 }
 
 #undef INIT_VAL
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pyramid_roi_align_gpu_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pyramid_roi_align_gpu_ref.cl
new file mode 100644
index 000000000..f1b766417
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/pyramid_roi_align_gpu_ref.cl
@@ -0,0 +1,159 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/include_all.cl"
+
+#define META_OFFSET_X                   4
+#define META_OFFSET_Y                   5
+
+#define SIZE_TAB_PARAMETERS             4
+
+struct Parameters
+{
+    int h_source, w_source, f_Size, x_Size, y_Size, offset;
+};
+
+__constant struct Parameters parameters [SIZE_TAB_PARAMETERS] =
+        {
+            { INPUT2_SIZE_Y, INPUT2_SIZE_X, INPUT2_FEATURE_PITCH, INPUT2_X_PITCH, INPUT2_Y_PITCH, INPUT2_OFFSET },
+            { INPUT3_SIZE_Y, INPUT3_SIZE_X, INPUT3_FEATURE_PITCH, INPUT3_X_PITCH, INPUT3_Y_PITCH, INPUT3_OFFSET },
+            { INPUT4_SIZE_Y, INPUT4_SIZE_X, INPUT4_FEATURE_PITCH, INPUT4_X_PITCH, INPUT4_Y_PITCH, INPUT4_OFFSET },
+            { INPUT5_SIZE_Y, INPUT5_SIZE_X, INPUT5_FEATURE_PITCH, INPUT5_X_PITCH, INPUT5_Y_PITCH, INPUT5_OFFSET }
+        };
+
+
+KERNEL(pyramidROIAlign_gpu_ref)(
+    const __global INPUT0_TYPE *boxes,
+    const __global INPUT1_TYPE *image_meta,
+    const __global INPUT2_TYPE *P2,
+    const __global INPUT3_TYPE *P3,
+    const __global INPUT4_TYPE *P4,
+    const __global INPUT5_TYPE *P5,
+    const __global INPUT6_TYPE *dim,
+    __global OUTPUT_TYPE *output)
+{
+    // [CONSTEXPR]:
+    const uint kerNum = (uint) get_global_id(0);
+
+    const __global float *feature_map_Ptr[SIZE_TAB_PARAMETERS];
+    int f_Size;
+
+    INPUT1_TYPE img_dim_X = image_meta[GET_DATA_INDEX(INPUT1, 0, 0, 0, META_OFFSET_X)];
+    INPUT1_TYPE img_dim_Y = image_meta[GET_DATA_INDEX(INPUT1, 0, 0, 0, META_OFFSET_Y)];
+
+    INPUT1_TYPE image_area = img_dim_X * img_dim_Y;
+    INPUT1_TYPE scale = sqrt(image_area) / 224.0;
+
+    INPUT0_TYPE hU = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 2)];
+    INPUT0_TYPE hL = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 0)];
+    INPUT0_TYPE h = hU - hL;
+    INPUT0_TYPE wU = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 3)];
+    INPUT0_TYPE wL = boxes[GET_DATA_INDEX(INPUT0, 0, 0, kerNum, 1)];
+    INPUT0_TYPE w = wU - wL;
+
+    int roi_level = (int)round(log2(sqrt(h*w) * scale));
+
+    // 0 <= roi_level <= 3
+    roi_level = min(3, max(0, 2 + roi_level));
+
+    feature_map_Ptr[0] = P2;
+    feature_map_Ptr[1] = P3;
+    feature_map_Ptr[2] = P4;
+    feature_map_Ptr[3] = P5;
+
+    f_Size = parameters[roi_level].f_Size;
+
+    //calculate cooficients for transformation
+    INPUT0_TYPE y1 = hL * (parameters[roi_level].h_source - 1);
+    INPUT0_TYPE x1 = wL * (parameters[roi_level].w_source - 1);
+    INPUT0_TYPE y2 = hU * (parameters[roi_level].h_source - 1);
+    INPUT0_TYPE x2 = wU * (parameters[roi_level].w_source - 1);
+    INPUT0_TYPE deltaX = (x2 - x1) / (OUTPUT_SIZE_X - 1);
+    INPUT0_TYPE deltaY = (y2 - y1) / (OUTPUT_SIZE_Y - 1);
+    INPUT0_TYPE y = y1;
+
+   //transformation
+    for (int i = 0; i < OUTPUT_SIZE_Y; ++i) //loop by 'y' coordinate
+    {
+        int ya = (int)floor(y);
+        int yb = (int)ceil(y);
+
+        if (ya < 0) ya = 0;
+        if (yb >= parameters[roi_level].h_source) yb = parameters[roi_level].h_source - 1;
+        if (yb - ya == 0)
+        {
+            if (yb + 2 < parameters[roi_level].h_source) ++yb;
+            else --ya;
+        }
+
+        INPUT0_TYPE x = x1;
+
+        for (int j = 0; j < OUTPUT_SIZE_X; ++j) //loop by 'x' coordinate
+        {
+            int xa = (int)floor(x);
+            int xb = (int)ceil(x);
+            if (xa < 0) xa = 0;
+            if (xb >= parameters[roi_level].w_source) xb = parameters[roi_level].w_source - 1;
+            if (xb - xa == 0)
+            {
+                if (xb + 2 < parameters[roi_level].w_source) ++xb;
+                else --xa;
+            }
+
+    /* BILINEAR TRANSFORMATION
+         (xa,yb,f3)*---------------------------------*(xb,yb,f2)
+                   |                                 |
+                   |          *(x,y)                 |
+                   |                                 |
+         (xa,ya,f0)*---------------------------------*(xb,ya,f1)
+   */
+            //cooficients for bilinear transformation
+            INPUT0_TYPE a = yb - y;
+            INPUT0_TYPE b = y - ya;
+            INPUT0_TYPE c = xb - x;
+            INPUT0_TYPE d = x - xa;
+
+            /*#define GET_DATA_INDEX(prefix, b, f, y, x)  \
+                CAT(prefix, _OFFSET) +                  \
+                (x)*CAT(prefix, _X_PITCH) +             \
+                (y)*CAT(prefix, _Y_PITCH) +             \
+                (f)*CAT(prefix, _FEATURE_PITCH) +       \
+                (b)*CAT(prefix, _BATCH_PITCH)
+
+            For P2, P3, P4, P5 batch size is always 0 */
+
+            size_t f0Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * ya + parameters[roi_level].x_Size * xa;
+            size_t f1Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * ya + parameters[roi_level].x_Size * xb;
+            size_t f2Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * yb + parameters[roi_level].x_Size * xb;
+            size_t f3Ind = parameters[roi_level].offset + parameters[roi_level].y_Size * yb + parameters[roi_level].x_Size * xa;
+            size_t ind_out = OUTPUT_OFFSET + i * OUTPUT_Y_PITCH + j * OUTPUT_X_PITCH + kerNum * OUTPUT_BATCH_PITCH;
+
+            for (int k = 0; k < OUTPUT_FEATURE_NUM; ++k) //transformation for every feature
+            {
+                INPUT0_TYPE f0 = feature_map_Ptr[roi_level][k * f_Size + f0Ind];
+                INPUT0_TYPE f1 = feature_map_Ptr[roi_level][k * f_Size + f1Ind];
+                INPUT0_TYPE f2 = feature_map_Ptr[roi_level][k * f_Size + f2Ind];
+                INPUT0_TYPE f3 = feature_map_Ptr[roi_level][k * f_Size + f3Ind];
+
+                INPUT0_TYPE f03 = f3 * b + f0 * a;
+                INPUT0_TYPE f12 = f2 * b + f1 * a;
+                INPUT0_TYPE f = f03 * c + f12 * d;
+
+                output[k * OUTPUT_FEATURE_PITCH + ind_out] = f;
+            }
+            x += deltaX;
+        }
+        y += deltaY;
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data.cl
index 591a07c0c..04a795512 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data.cl
@@ -30,8 +30,12 @@ inline uint FUNC(get_input_index)(uint b, uint f, uint y, uint x)
     return GET_DATA_BF8_XY16_INDEX(INPUT0, b, f, y, x);
 #elif defined INPUT0_LAYOUT_BYXF_AF32
 	return GET_DATA_BYXF_AF32_INDEX(INPUT0, b, f, y, x);
+#elif defined INPUT0_LAYOUT_BYX8_F4
+	return GET_DATA_BYX8_F4_INDEX(INPUT0, b, f, y, x);
 #elif defined INPUT0_LAYOUT_FS_BS_YX_BSV4_FSV32
     return GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(INPUT0, b, f, y, x);
+#elif defined INPUT0_LAYOUT_B_FS_YX_FSV4
+    return GET_DATA_B_FS_YX_FSV4_INDEX(INPUT0, b, f, y, x);
 #else
 #error reorder_data.cl: input format - not supported
 #endif
@@ -50,8 +54,12 @@ inline uint FUNC(get_output_index)(uint b, uint f, uint y, uint x)
     return GET_DATA_BF8_XY16_INDEX(OUTPUT, b, f, y, x);
 #elif defined OUTPUT_LAYOUT_BYXF_AF32
 	return GET_DATA_BYXF_AF32_INDEX(OUTPUT, b, f, y, x);
+#elif defined OUTPUT_LAYOUT_BYX8_F4
+	return GET_DATA_BYX8_F4_INDEX(OUTPUT, b, f, y, x);
 #elif defined OUTPUT_LAYOUT_FS_BS_YX_BSV4_FSV32
     return GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b, f, y, x);
+#elif defined OUTPUT_LAYOUT_B_FS_YX_FSV4
+    return GET_DATA_B_FS_YX_FSV4_INDEX(OUTPUT, b, f, y, x);
 #else
 #error reorder_data.cl: output format - not supported
 #endif
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data_byxf_f32_to_byx8_f4_i8.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data_byxf_f32_to_byx8_f4_i8.cl
new file mode 100644
index 000000000..0efd2cc33
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_data_byxf_f32_to_byx8_f4_i8.cl
@@ -0,0 +1,136 @@
+// Copyright (c) 2016-2017 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/reshape_dims.cl"
+#include "include/fetch.cl"
+
+#include "include/data_types.cl"
+
+///////////////////////// Input Index /////////////////////////
+inline uint FUNC(get_input_index)(uint b, uint f, uint y, uint x)
+{
+#if   INPUT0_SIMPLE
+    return GET_DATA_INDEX(INPUT0, b, f, y, x);
+#elif defined INPUT0_LAYOUT_BS_F_BSV8__AF8  || \
+      defined INPUT0_LAYOUT_BS_F_BSV16__AF8
+    return GET_DATA_BS_FYX_BSV8_INDEX(INPUT0, b, f, y, x, SUB_GROUP_SIZE);
+#elif defined INPUT0_LAYOUT_BF8_XY16
+    return GET_DATA_BF8_XY16_INDEX(INPUT0, b, f, y, x);
+#elif defined INPUT0_LAYOUT_BYXF_AF32
+	return GET_DATA_BYXF_AF32_INDEX(INPUT0, b, f, y, x);
+#elif defined INPUT0_LAYOUT_BYX8_F4
+	return GET_DATA_BYX8_F4_INDEX(INPUT0, b, f, y, x);
+#elif defined INPUT0_LAYOUT_FS_BS_YX_BSV4_FSV32
+    return GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(INPUT0, b, f, y, x);
+#elif defined INPUT0_LAYOUT_B_FS_YX_FSV4
+    return GET_DATA_B_FS_YX_FSV4_INDEX(INPUT0, b, f, y, x);
+#else
+#error reorder_data.cl: input format - not supported
+#endif
+}
+
+///////////////////////// Output Index /////////////////////////
+
+inline uint FUNC(get_output_index)(uint b, uint f, uint y, uint x)
+{
+#if   OUTPUT_SIMPLE
+    return GET_DATA_INDEX(OUTPUT, b, f, y, x);
+#elif defined OUTPUT_LAYOUT_BS_F_BSV8__AF8  || \
+      defined OUTPUT_LAYOUT_BS_F_BSV16__AF8
+    return GET_DATA_BS_FYX_BSV8_INDEX(OUTPUT, b, f, y, x, SUB_GROUP_SIZE);
+#elif defined OUTPUT_LAYOUT_BF8_XY16
+    return GET_DATA_BF8_XY16_INDEX(OUTPUT, b, f, y, x);
+#elif defined OUTPUT_LAYOUT_BYXF_AF32
+	return GET_DATA_BYXF_AF32_INDEX(OUTPUT, b, f, y, x);
+#elif defined OUTPUT_LAYOUT_BYX8_F4
+	return GET_DATA_BYX8_F4_INDEX(OUTPUT, b, f, y, x);
+#elif defined OUTPUT_LAYOUT_FS_BS_YX_BSV4_FSV32
+    return GET_DATA_FS_BS_YX_BSV4_FSV32_INDEX(OUTPUT, b, f, y, x);
+#elif defined OUTPUT_LAYOUT_B_FS_YX_FSV4
+    return GET_DATA_B_FS_YX_FSV4_INDEX(OUTPUT, b, f, y, x);
+#else
+#error reorder_data.cl: output format - not supported
+#endif
+}
+
+__attribute__((intel_reqd_sub_group_size(16)))
+KERNEL (reorder_data_byxf_f32_to_byx8_f4_i8)(
+    const __global INPUT_REORDER_TYPE* input, 
+    __global OUTPUT_REORDER_TYPE* output
+#ifdef MEAN_SUBTRACT_IN_BUFFER
+    , __global MEAN_SUBTRACT_TYPE* mean_subtract
+#endif
+    )
+{
+    const uint x = get_global_id(0);
+    const uint y = get_group_id(1);
+    const uint b = get_group_id(2);
+
+    const uint input_idx  = FUNC_CALL(get_input_index)(b, 0, y, x);
+    const uint output_idx = FUNC_CALL(get_output_index)(b, 0, y, x);
+
+#if defined MEAN_SUBTRACT_INSIDE_PARAMS
+    float4 res;
+    res.s0 = TO_MEAN_TYPE(input[input_idx]);
+    res.s0 = MEAN_OP(res.s0, VALUE_TO_SUBTRACT[0 % VALUE_TO_SUBTRACT_SIZE]);
+    res.s1 = TO_MEAN_TYPE(input[input_idx+1]);
+    res.s1 = MEAN_OP(res.s1, VALUE_TO_SUBTRACT[1 % VALUE_TO_SUBTRACT_SIZE]);
+    res.s2 = TO_MEAN_TYPE(input[input_idx+2]);
+    res.s2 = MEAN_OP(res.s2, VALUE_TO_SUBTRACT[2 % VALUE_TO_SUBTRACT_SIZE]);
+    res.s3 = 0;
+#elif defined MEAN_SUBTRACT_IN_BUFFER
+#if defined MEAN_PER_FEATURE
+    MAKE_VECTOR_TYPE(MEAN_SUBTRACT_TYPE, 4) res;
+    res.s0 = TO_MEAN_TYPE(input[input_idx]);
+    res.s0 = MEAN_OP(res.s0, mean_subtract[0]);
+    res.s1 = TO_MEAN_TYPE(input[input_idx+1]);
+    res.s1 = MEAN_OP(res.s1, mean_subtract[1]);
+    res.s2 = TO_MEAN_TYPE(input[input_idx+2]);
+    res.s2 = MEAN_OP(res.s2, mean_subtract[2]);
+    res.s3 = 0
+#else
+    MAKE_VECTOR_TYPE(MEAN_SUBTRACT_TYPE, 4) res;
+    res.s0 = TO_MEAN_TYPE(input[input_idx]);
+    res.s1 = TO_MEAN_TYPE(input[input_idx+1]);
+    res.s2 = TO_MEAN_TYPE(input[input_idx+2]);
+    res.s3 = 0; 
+
+    uint4 msv;
+    msv = FUNC_CALL(reshape_dims)(b,0,y,x, INPUT0_SIZE_Y, INPUT0_SIZE_X, MEAN_SUBTRACT_SIZE_Y, MEAN_SUBTRACT_SIZE_X, INPUT0_DIMS, MEAN_SUBTRACT_DIMS);
+    res.s0 = MEAN_OP(res.s0, mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv[0], msv[1], msv[2], msv[3])]);
+
+    msv = FUNC_CALL(reshape_dims)(b,1,y,x, INPUT0_SIZE_Y, INPUT0_SIZE_X, MEAN_SUBTRACT_SIZE_Y, MEAN_SUBTRACT_SIZE_X, INPUT0_DIMS, MEAN_SUBTRACT_DIMS);
+    res.s1 = MEAN_OP(res.s1, mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv[0], msv[1], msv[2], msv[3])]);
+
+    msv = FUNC_CALL(reshape_dims)(b,2,y,x, INPUT0_SIZE_Y, INPUT0_SIZE_X, MEAN_SUBTRACT_SIZE_Y, MEAN_SUBTRACT_SIZE_X, INPUT0_DIMS, MEAN_SUBTRACT_DIMS);
+    res.s2 = MEAN_OP(res.s2, mean_subtract[GET_DATA_INDEX_SAFE(MEAN_SUBTRACT, msv[0], msv[1], msv[2], msv[3])]);
+#endif
+#else
+    MAKE_VECTOR_TYPE(CALC_TYPE, 4) res;
+    res.s0 = TO_CALC_TYPE(input[input_idx]);
+    res.s1 = TO_CALC_TYPE(input[input_idx+1]);
+    res.s2 = TO_CALC_TYPE(input[input_idx+2]);
+    res.s3 = 0;
+#endif
+
+    char4 out_vals;
+    out_vals.s0 = ACTIVATION(TO_OUTPUT_REORDER_TYPE(res.s0), NL_M ,NL_N);
+    out_vals.s1 = ACTIVATION(TO_OUTPUT_REORDER_TYPE(res.s1), NL_M ,NL_N);
+    out_vals.s2 = ACTIVATION(TO_OUTPUT_REORDER_TYPE(res.s2), NL_M ,NL_N);
+    out_vals.s3 = 0;
+
+    __global uint* dst = (__global uint*)output;
+    dst[output_idx/4] = as_uint(out_vals);
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl
index 33a662a12..7caa43db4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reorder_weights.cl
@@ -13,7 +13,9 @@
 // limitations under the License.
 
 
-#include "include/include_all.cl"
+#include "include/fetch.cl"
+#include "include/reshape_dims.cl"
+#include "include/data_types.cl"
 
 
 ///////////////////////// Input Index /////////////////////////
@@ -26,6 +28,10 @@ inline uint FUNC(get_input_index)(uint o, uint i, uint y, uint x)
       defined INPUT0_LAYOUT_OS_I_OSV8__AI8  || \
       defined INPUT0_LAYOUT_OS_I_OSV16__AI8
     return GET_FILTER_OS_IYX_OSV8_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
+#elif defined INPUT0_LAYOUT_IYX_OSV32
+    return GET_FILTER_OS_IYX_OSV8_INDEX(INPUT0, o, i, y, x, 32);
+#elif defined INPUT0_LAYOUT_IYX_OSV64
+    return GET_FILTER_OS_IYX_OSV8_INDEX(INPUT0, o, i, y, x, 64);
 #elif defined INPUT0_LAYOUT_OS_IYX_OSV16_ROTATE_180
     return GET_FILTER_OS_IYX_OSV8_ROTATE_180_INDEX(INPUT0, o, i, y, x, SUB_GROUP_SIZE);
 #elif defined INPUT0_LAYOUT_I_YXS_OS_YXSV2_OSV16
@@ -38,6 +44,10 @@ inline uint FUNC(get_input_index)(uint o, uint i, uint y, uint x)
 	return GET_FILTER_OS_IS_YX_ISA8_OSV8_ISV4(INPUT0, o, i, y, x);
 #elif defined INPUT0_LAYOUT_IS_O_YX_ISV32
     return GET_FILTER_IS_O_YX_ISV32(INPUT0, o, i, y, x);
+#elif defined INPUT0_LAYOUT_IS_O32_YX_ISV32_SWIZZLED_BY_4
+    return GET_FILTER_IS_O32_YX_ISV32_SWIZZLED_BY_4(INPUT0, o, i, y, x);
+#elif defined INPUT0_LAYOUT_OS_IS_Y_X8_OSV8_ISV4
+    return GET_FILTER_OS_IS_Y_X8_OSV8_ISV4(INPUT0, o, i, y, x);
 #else
 #error reorder_weights.cl: input format - not supported
 #endif
@@ -54,6 +64,10 @@ inline uint FUNC(get_output_index)(uint o, uint i, uint y, uint x)
       defined OUTPUT_LAYOUT_OS_I_OSV8__AI8  || \
       defined OUTPUT_LAYOUT_OS_I_OSV16__AI8
     return GET_FILTER_OS_IYX_OSV8_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
+#elif defined OUTPUT_LAYOUT_OS_IYX_OSV32
+    return GET_FILTER_OS_IYX_OSV8_INDEX(OUTPUT, o, i, y, x, 32);
+#elif defined OUTPUT_LAYOUT_OS_IYX_OSV64
+    return GET_FILTER_OS_IYX_OSV8_INDEX(OUTPUT, o, i, y, x, 64);
 #elif defined OUTPUT_LAYOUT_OS_IYX_OSV16_ROTATE_180
     return GET_FILTER_OS_IYX_OSV8_ROTATE_180_INDEX(OUTPUT, o, i, y, x, SUB_GROUP_SIZE);
 #elif defined OUTPUT_LAYOUT_I_YXS_OS_YXSV2_OSV16
@@ -66,6 +80,14 @@ inline uint FUNC(get_output_index)(uint o, uint i, uint y, uint x)
 	return GET_FILTER_OS_IS_YX_ISA8_OSV8_ISV4(OUTPUT, o, i, y, x);
 #elif defined OUTPUT_LAYOUT_IS_O_YX_ISV32
     return GET_FILTER_IS_O_YX_ISV32(OUTPUT, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_IS_O32_YX_ISV32_SWIZZLED_BY_4
+    return GET_FILTER_IS_O32_YX_ISV32_SWIZZLED_BY_4(OUTPUT, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_OS_IS_Y_X8_OSV8_ISV4
+    return GET_FILTER_OS_IS_Y_X8_OSV8_ISV4(OUTPUT, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_OS_IS_YX_OSV16_ISV4
+    return GET_FILTER_OS_IS_YX_OSV16_ISV4_INDEX(OUTPUT, o, i, y, x);
+#elif defined OUTPUT_LAYOUT_OS_IS_YX_ISA8_OSV8_ISV4_SWIZZLED_BY_4
+    return GET_FILTER_OS_IS_YX_ISA8_OSV8_ISV4_SWIZZLED_BY_4_INDEX(OUTPUT, o, i, y, x);
 #else
 #error reorder_weights.cl: output format - not supported
 #endif
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reverse_sequence_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reverse_sequence_ref.cl
new file mode 100644
index 000000000..96235d610
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/reverse_sequence_ref.cl
@@ -0,0 +1,43 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+
+KERNEL(reverse_sequence_ref)(const __global UNIT_TYPE* input, const __global float* seq_lengths, __global UNIT_TYPE* output)
+{
+    const uint batch = get_global_id(0);
+    const uint feature = get_global_id(1);
+    const uint y = get_global_id(2) / INPUT0_SIZE_X;
+    const uint x = get_global_id(2) % INPUT0_SIZE_X;
+    uint dimensions[] = { batch, feature, y, x };
+
+    const uint input_index = INPUT0_OFFSET +
+                             batch * INPUT0_BATCH_PITCH +
+                             feature * INPUT0_FEATURE_PITCH +
+                             y * INPUT0_Y_PITCH +
+                             x * INPUT0_X_PITCH;
+
+    const uint length = seq_lengths[dimensions[BATCH_AXIS]];
+    if (dimensions[SEQ_AXIS] < length)
+        dimensions[SEQ_AXIS] = length - dimensions[SEQ_AXIS] - 1;
+
+    const uint output_index = OUTPUT_OFFSET +
+                              dimensions[0] * OUTPUT_BATCH_PITCH +
+                              dimensions[1] * OUTPUT_FEATURE_PITCH +
+                              dimensions[2] * OUTPUT_Y_PITCH +
+                              dimensions[3] * OUTPUT_X_PITCH;
+
+    output[output_index] = input[input_index];
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ps_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ps_ref.cl
new file mode 100644
index 000000000..194b06d95
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ps_ref.cl
@@ -0,0 +1,141 @@
+// Copyright (c) 2016-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "include/common.cl"
+#include "include/data_types.cl"
+
+// Each RoI is described by 5 elements [batch_id xmin ymin xmax ymax]
+#define ROI_NUM_ELEMENTS 5
+
+#define COORD_T float
+#define ACCUM_T float
+
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#define CLAMP(v,l,u) MAX((l),MIN((v),(u)))
+
+KERNEL(roi_pooling_ps_gpu)(const __global INPUT0_TYPE * src_data,
+                                 __global OUTPUT_TYPE * dst_data,
+                           const __global INPUT1_TYPE * src_rois)
+{
+    const size_t i = get_global_id(0);
+
+    const uint x = i % OUTPUT_SIZE_X;
+    const uint y = i / OUTPUT_SIZE_X % OUTPUT_SIZE_Y;
+    const uint c = i / OUTPUT_SIZE_X / OUTPUT_SIZE_Y % OUTPUT_FEATURE_NUM;
+    const uint r = i / OUTPUT_SIZE_X / OUTPUT_SIZE_Y / OUTPUT_FEATURE_NUM % OUTPUT_ROI_NUM;
+
+    const __global INPUT1_TYPE* roi_ptr = &src_rois[INPUT1_BATCH_PITCH * r];
+    const int src_batch_idx = (int)(roi_ptr[0]);
+
+#if BILINEAR_POOLING
+
+    COORD_T roi_start_w = roi_ptr[1] * SPATIAL_SCALE;
+    COORD_T roi_start_h = roi_ptr[2] * SPATIAL_SCALE;
+    COORD_T roi_end_w   = roi_ptr[3] * SPATIAL_SCALE;
+    COORD_T roi_end_h   = roi_ptr[4] * SPATIAL_SCALE;
+
+    COORD_T roi_height = (roi_end_h - roi_start_h);
+    COORD_T roi_width  = (roi_end_w - roi_start_w);
+
+    ACCUM_T res = 0.0f;
+
+    for (int bin_y = 0; bin_y < SPATIAL_BINS_Y; bin_y++)
+    {
+        for (int bin_x = 0; bin_x < SPATIAL_BINS_X; bin_x++)
+        {
+            COORD_T box_xmin = roi_start_w + (bin_x + 0) * (roi_width / SPATIAL_BINS_X);
+            COORD_T box_xmax = roi_start_w + (bin_x + 1) * (roi_width / SPATIAL_BINS_X);
+            COORD_T box_ymin = roi_start_h + (bin_y + 0) * (roi_height / SPATIAL_BINS_Y);
+            COORD_T box_ymax = roi_start_h + (bin_y + 1) * (roi_height / SPATIAL_BINS_Y);
+
+            const uint gc = c + (bin_y*SPATIAL_BINS_X + bin_x)*OUTPUT_FEATURE_NUM;
+            const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + src_batch_idx*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*gc;
+            COORD_T height_scale = POOLED_HEIGHT > 1 ? (box_ymax - box_ymin) * (INPUT0_SIZE_Y - 1) / (POOLED_HEIGHT - 1)
+                                                     : 0.0f;
+            COORD_T width_scale = POOLED_WIDTH > 1 ? (box_xmax - box_xmin) * (INPUT0_SIZE_X - 1) / (POOLED_WIDTH - 1)
+                                                   : 0.0f;
+
+            float in_y = POOLED_HEIGHT > 1 ? (y * height_scale + box_ymin * (INPUT0_SIZE_Y - 1))
+                                           : 0.5f * (box_ymin + box_ymax) * (INPUT0_SIZE_Y - 1);
+            float in_x = POOLED_WIDTH > 1 ? (x * width_scale + box_xmin * (INPUT0_SIZE_X - 1))
+                                          : 0.5f * (box_xmin + box_xmax) * (INPUT0_SIZE_X - 1);
+
+            if (!(in_y < 0 || in_y > (COORD_T)(INPUT0_SIZE_Y - 1) ||
+                  in_x < 0 || in_x > (COORD_T)(INPUT0_SIZE_X - 1) || src_batch_idx == -1))
+            {
+                int top_y_index    = (int)(floor(in_y));
+                int bottom_y_index = (int)(min(ceil(in_y), (COORD_T)INPUT0_SIZE_Y - 1));
+                int left_x_index   = (int)(floor(in_x));
+                int right_x_index  = (int)(min(ceil(in_x), (COORD_T)INPUT0_SIZE_X - 1));
+
+                ACCUM_T top_left     = (ACCUM_T)data[top_y_index*INPUT0_Y_PITCH + left_x_index*INPUT0_X_PITCH];
+                ACCUM_T top_right    = (ACCUM_T)data[top_y_index*INPUT0_Y_PITCH + right_x_index*INPUT0_X_PITCH];
+                ACCUM_T bottom_left  = (ACCUM_T)data[bottom_y_index*INPUT0_Y_PITCH + left_x_index*INPUT0_X_PITCH];
+                ACCUM_T bottom_right = (ACCUM_T)data[bottom_y_index*INPUT0_Y_PITCH + right_x_index*INPUT0_X_PITCH];
+
+                ACCUM_T top    = top_left + (top_right - top_left) * (in_x - left_x_index);
+                ACCUM_T bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index);
+
+                res += top + (bottom - top) * (in_y - top_y_index);
+            }
+        }
+    }
+
+    res /= (SPATIAL_BINS_Y*SPATIAL_BINS_X);
+#elif AVG_POOLING
+    const uint work_c = x + POOLED_WIDTH * (y + POOLED_HEIGHT * c);
+    const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + src_batch_idx*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*work_c;
+
+    const COORD_T roi_x  = (COORD_T)(round(roi_ptr[1]) + 0.f) * SPATIAL_SCALE;
+    const COORD_T roi_y  = (COORD_T)(round(roi_ptr[2]) + 0.f) * SPATIAL_SCALE;
+    const COORD_T roi_x1 = (COORD_T)(round(roi_ptr[3]) + 1.f) * SPATIAL_SCALE;
+    const COORD_T roi_y1 = (COORD_T)(round(roi_ptr[4]) + 1.f) * SPATIAL_SCALE;
+
+    // The final coordinate is within the ROI and malformed dimensions are treated as 1
+    const COORD_T roi_w = max(roi_x1 - roi_x, .1f);
+    const COORD_T roi_h = max(roi_y1 - roi_y, .1f);
+
+    const COORD_T dx_begin = (x + 0) * (COORD_T)(roi_w / POOLED_WIDTH);
+    const COORD_T dy_begin = (y + 0) * (COORD_T)(roi_h / POOLED_HEIGHT);
+    const COORD_T dx_after = (x + 1) * (COORD_T)(roi_w / POOLED_WIDTH);
+    const COORD_T dy_after = (y + 1) * (COORD_T)(roi_h / POOLED_HEIGHT);
+
+    // clamp in case roi_x or roi_y were unreasonable
+    const int x_begin = CLAMP(floor(roi_x + dx_begin), 0, INPUT0_SIZE_X);
+    const int y_begin = CLAMP(floor(roi_y + dy_begin), 0, INPUT0_SIZE_Y);
+    const int x_after = CLAMP(ceil(roi_x + dx_after), 0, INPUT0_SIZE_X);
+    const int y_after = CLAMP(ceil(roi_y + dy_after), 0, INPUT0_SIZE_Y);
+
+    ACCUM_T res = 0.0f;
+
+    for (int yy = y_begin; yy < y_after; ++yy)
+    {
+        for (int xx = x_begin; xx < x_after; ++xx)
+        {
+            INPUT0_TYPE val = data[xx*INPUT0_X_PITCH + yy*INPUT0_Y_PITCH];
+            res += (ACCUM_T)val;
+        }
+    }
+
+    const COORD_T area = (y_after - y_begin) * (x_after - x_begin);
+    if (area)
+        res /= area;
+
+#else
+#error "Unsupported pooling mode"
+#endif
+    const uint output_offset = OUTPUT_OFFSET + x*OUTPUT_X_PITCH + y*OUTPUT_Y_PITCH + c*OUTPUT_FEATURE_PITCH + r*OUTPUT_ROI_PITCH;
+    dst_data[output_offset] = ACTIVATION((OUTPUT_TYPE)(res), NL_M, NL_N);
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ref.cl
index 0c006bc0b..2006d571b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ref.cl
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/roi_pooling_ref.cl
@@ -1,4 +1,4 @@
-// Copyright (c) 2016-2018 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -32,11 +32,7 @@
 #define DST_H POOLED_HEIGHT
 #define PITCH_ROI_R INPUT1_BATCH_PITCH
 
-#if GROUP_SIZE == 0
 #define DST_C INPUT0_FEATURE_NUM
-#else
-#define DST_C (GROUP_SIZE ? (INPUT0_FEATURE_NUM / GROUP_SIZE / GROUP_SIZE) : INPUT0_FEATURE_NUM)
-#endif
 
 // Note: In the non-ROI_OLD case we keep the coordinates in float instead
 //       of using UNIT_TYPE, since with FP16 we might actually lose some
@@ -52,12 +48,6 @@
 #error - unknown ROI_POOLING kernel type
 #endif
 
-/****************************************************************************
- *                                                                          *
- *                                RoI Pooling                               *
- *                                                                          *
- ***************************************************************************/
-
 KERNEL(roi_pooling_gpu)
 (
     const __global INPUT0_TYPE * src_data,
@@ -76,7 +66,9 @@ KERNEL(roi_pooling_gpu)
     //       with SPATIAL_SCALE: It makes sense since the resolution of
     //       the pooled data is limited by its dimensions. (Is this clear?)
 
-    const __global INPUT1_TYPE * roi_ptr = &src_rois[PITCH_ROI_R * r];
+    const __global INPUT1_TYPE* roi_ptr = &src_rois[PITCH_ROI_R * r];
+
+    const int src_batch_idx = (int)(roi_ptr[0]);
 
 #if BILINEAR_POOLING
     const uint output_offset = OUTPUT_OFFSET + x*OUTPUT_X_PITCH + y*OUTPUT_Y_PITCH + c*OUTPUT_FEATURE_PITCH + r*OUTPUT_ROI_PITCH;
@@ -86,13 +78,13 @@ KERNEL(roi_pooling_gpu)
     COORD_T roi_end_w   = roi_ptr[3];
     COORD_T roi_end_h   = roi_ptr[4];
 
-    COORD_T height_scale = (roi_end_h - roi_start_h) * (SRC_H - 1) / (COORD_T)(POOLED_HEIGHT - 1);
-    COORD_T width_scale  = (roi_end_w - roi_start_w) * (SRC_W - 1) / (COORD_T)(POOLED_WIDTH  - 1);
+    COORD_T height_scale = (roi_end_h - roi_start_h) * (SRC_H - 1.0f) / (COORD_T)(POOLED_HEIGHT - 1.0f);
+    COORD_T width_scale  = (roi_end_w - roi_start_w) * (SRC_W - 1.0f) / (COORD_T)(POOLED_WIDTH  - 1.0f);
 
-    COORD_T in_y = y*height_scale + roi_start_h*(COORD_T)(SRC_H - 1);
-    COORD_T in_x = x*width_scale  + roi_start_w*(COORD_T)(SRC_W - 1);
+    COORD_T in_y = y*height_scale + roi_start_h*(COORD_T)(SRC_H - 1.0f);
+    COORD_T in_x = x*width_scale  + roi_start_w*(COORD_T)(SRC_W - 1.0f);
 
-    if (in_y < 0 || in_y > (COORD_T)(SRC_H - 1) || in_x < 0 || in_x > (COORD_T)(SRC_W - 1) || roi_ptr[0] == -1) {
+    if (in_y < 0 || in_y > (COORD_T)(SRC_H - 1) || in_x < 0 || in_x > (COORD_T)(SRC_W - 1) || src_batch_idx == -1) {
         dst_data[output_offset] = ACTIVATION((OUTPUT_TYPE)0, NL_M, NL_N);
         return;
     }
@@ -102,7 +94,7 @@ KERNEL(roi_pooling_gpu)
     int left_x_index   = (int)(floor(in_x));
     int right_x_index  = (int)(min(ceil(in_x), (COORD_T)SRC_W - 1));
 
-    const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + INPUT0_FEATURE_PITCH*c;
+    const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + src_batch_idx*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*c;
 
     ACCUM_T top_left     = (ACCUM_T)data[top_y_index*INPUT0_Y_PITCH + left_x_index*INPUT0_X_PITCH];
     ACCUM_T top_right    = (ACCUM_T)data[top_y_index*INPUT0_Y_PITCH + right_x_index*INPUT0_X_PITCH];
@@ -117,7 +109,6 @@ KERNEL(roi_pooling_gpu)
     dst_data[output_offset] = ACTIVATION((OUTPUT_TYPE)res, NL_M, NL_N);
 #else
 
-#if USE_OLD_SCALE_AND_ROUNDING
     const int roi_x  = round(roi_ptr[1] * SPATIAL_SCALE);
     const int roi_y  = round(roi_ptr[2] * SPATIAL_SCALE);
     const int roi_x1 = round(roi_ptr[3] * SPATIAL_SCALE);
@@ -126,16 +117,6 @@ KERNEL(roi_pooling_gpu)
     // The final coordinate is within the ROI and malformed dimensions are treated as 1
     const uint roi_w = max(roi_x1 - roi_x, 0) + 1;
     const uint roi_h = max(roi_y1 - roi_y, 0) + 1;
-#else
-    const COORD_T roi_x  = (COORD_T)(round(roi_ptr[1]) + 0.f) * SPATIAL_SCALE;
-    const COORD_T roi_y  = (COORD_T)(round(roi_ptr[2]) + 0.f) * SPATIAL_SCALE;
-    const COORD_T roi_x1 = (COORD_T)(round(roi_ptr[3]) + 1.f) * SPATIAL_SCALE;
-    const COORD_T roi_y1 = (COORD_T)(round(roi_ptr[4]) + 1.f) * SPATIAL_SCALE;
-
-    // The final coordinate is within the ROI and malformed dimensions are treated as 1
-    const COORD_T roi_w = max(roi_x1 - roi_x, .1f);
-    const COORD_T roi_h = max(roi_y1 - roi_y, .1f);
-#endif
 
     // Note that when the "after" is rounded rounded up else we get the last cell,
     // instead of the cell beyond (For "symmetry").
@@ -145,7 +126,6 @@ KERNEL(roi_pooling_gpu)
     // [0, 1, 3, 4]                                     # as expected
     // >>> [((x + 1) * 6) // 4 for x in [0, 1, 2, 3]]   # "after" values
     // [1, 3, 4 ,6]                                     # [2, 3, 5, 6] expected!
-#if USE_OLD_SCALE_AND_ROUNDING
     const int dx_begin = ((x + 0) * roi_w) / DST_W;
     const int dy_begin = ((y + 0) * roi_h) / DST_H;
     const int dx_after = ((x + 1) * roi_w + (DST_W - 1)) / DST_W;
@@ -156,38 +136,8 @@ KERNEL(roi_pooling_gpu)
     const int y_begin = clamp(roi_y + dy_begin, 0, SRC_H);
     const int x_after = clamp(roi_x + dx_after, 0, SRC_W);
     const int y_after = clamp(roi_y + dy_after, 0, SRC_H);
-#else
-    const COORD_T dx_begin = (x + 0) * (COORD_T)(roi_w / DST_W);
-    const COORD_T dy_begin = (y + 0) * (COORD_T)(roi_h / DST_H);
-    const COORD_T dx_after = (x + 1) * (COORD_T)(roi_w / DST_W);
-    const COORD_T dy_after = (y + 1) * (COORD_T)(roi_h / DST_H);
-
-    // clamp in case roi_x or roi_y were unreasonable
-    const int x_begin = CLAMP(floor(roi_x + dx_begin), 0, SRC_W);
-    const int y_begin = CLAMP(floor(roi_y + dy_begin), 0, SRC_H);
-    const int x_after = CLAMP(ceil(roi_x + dx_after), 0, SRC_W);
-    const int y_after = CLAMP(ceil(roi_y + dy_after), 0, SRC_H);
-#endif
-
-#if GROUP_SIZE == 0
-    const uint work_c = c;
-#else
-
-#if 0
-    const COORD_T group_bin_w = (COORD_T)roi_w / DST_W;
-    const COORD_T group_bin_h = (COORD_T)roi_h / DST_H;
-
-    const uint group_x = CLAMP(x * group_bin_w, 0, GROUP_SIZE - 1);
-    const uint group_y = CLAMP(y * group_bin_h, 0, GROUP_SIZE - 1);
-#else
-    const uint group_x = x;
-    const uint group_y = y;
-#endif
-
-    const uint work_c = group_x + GROUP_SIZE * (group_y + GROUP_SIZE * c);
-#endif
 
-    const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + INPUT0_FEATURE_PITCH*work_c;
+    const __global INPUT0_TYPE* data = src_data + INPUT0_OFFSET + src_batch_idx*INPUT0_BATCH_PITCH + INPUT0_FEATURE_PITCH*c;
 
 #if MAX_POOLING
     ACCUM_T res = x_begin < x_after && y_begin < y_after ? -FLT_MAX : 0;
@@ -208,7 +158,6 @@ KERNEL(roi_pooling_gpu)
 
 #if (!MAX_POOLING)
     {
-        //TODO(ruv): again, differs from the standard fixed size area (?)
         const COORD_T area = (y_after - y_begin) * (x_after - x_begin);
         if (area) res /= area;
     }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/shuffle_channels_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/shuffle_channels_ref.cl
new file mode 100644
index 000000000..77ba6982c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/shuffle_channels_ref.cl
@@ -0,0 +1,43 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+
+KERNEL(shuffle_channels_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
+{
+    const uint batch = get_global_id(0);
+    const uint feature = get_global_id(1);
+    const uint y = get_global_id(2) / OUTPUT_SIZE_X;
+    const uint x = get_global_id(2) % OUTPUT_SIZE_X;
+    const uint dimensions[] = { batch, feature, y, x };
+
+    const uint current_group = dimensions[AXIS] / GROUP_SIZE;
+    const uint position_in_group = dimensions[AXIS] % GROUP_SIZE;
+    const uint input_index = INPUT0_OFFSET + (batch * INPUT0_BATCH_PITCH) + (feature * INPUT0_FEATURE_PITCH) + (y * INPUT0_Y_PITCH) + x;
+
+    uint output_index = OUTPUT_OFFSET;
+
+    for (uint i = 0; i < AXIS; ++i) {
+        output_index += dimensions[i] * INPUT0_PITCHES[INPUT0_DIMS - i - 1];
+    }
+
+    output_index += (position_in_group * GROUPS_NUMBER + current_group) * INPUT0_PITCHES[INPUT0_DIMS - AXIS - 1];
+
+    for (uint i = AXIS + 1; i < INPUT0_DIMS; ++i) {
+        output_index += dimensions[i] * INPUT0_PITCHES[INPUT0_DIMS - i - 1];
+    }
+
+    output[output_index] = input[input_index];
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/strided_slice_ref.cl b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/strided_slice_ref.cl
new file mode 100644
index 000000000..1fec68a16
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/cl_kernels/strided_slice_ref.cl
@@ -0,0 +1,50 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "include/include_all.cl"
+
+KERNEL(strided_slice_ref)(const __global UNIT_TYPE* input, __global UNIT_TYPE* output)
+{
+    const uint batch = get_global_id(0);
+    const uint feature = get_global_id(1);
+
+#if NEW_AXIS_MODE
+    // If NEW_AXIS_MODE that just copy input to output
+    const uint y_input = get_global_id(2) / INPUT0_SIZE_X;
+    const uint x_input = get_global_id(2) % INPUT0_SIZE_X;
+    const uint input_index = INPUT0_OFFSET +
+        batch * INPUT0_BATCH_PITCH +
+        feature * INPUT0_FEATURE_PITCH +
+        y_input * INPUT0_Y_PITCH +
+        x_input * INPUT0_X_PITCH;
+    output[input_index] = input[input_index];
+#else
+    const uint y = get_global_id(2) / OUTPUT_SIZE_X;
+    const uint x = get_global_id(2) % OUTPUT_SIZE_X;
+    const uint input_index = INPUT0_OFFSET +
+            (SLICE_BEGIN_BATCH + batch * SLICE_STEPS_BATCH) * INPUT0_BATCH_PITCH +
+            (SLICE_BEGIN_FEATURE + feature * SLICE_STEPS_FEATURE) * INPUT0_FEATURE_PITCH +
+            (SLICE_BEGIN_Y + y * SLICE_STEPS_Y) * INPUT0_Y_PITCH +
+            (SLICE_BEGIN_X + x * SLICE_STEPS_X) * INPUT0_X_PITCH;
+
+    const uint output_index = OUTPUT_OFFSET +
+            batch * OUTPUT_BATCH_PITCH +
+            feature * OUTPUT_FEATURE_PITCH +
+            y * OUTPUT_Y_PITCH +
+            x * OUTPUT_X_PITCH;
+
+    output[output_index] = input[input_index];
+#endif
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp
index 4a2344e49..47ab1535b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.cpp
@@ -181,11 +181,11 @@ namespace kernel_selector
         {
             std::cout << "ERROR: dispatch data for kernel: " << kernelName << " is incorrect: GWS0: " << runInfo.gws0 << " LWS0: " << runInfo.lws0 << std::endl;
         }
-        if (runInfo.gws0 % runInfo.lws0 != 0)
+        if (runInfo.gws1 % runInfo.lws1 != 0)
         {
             std::cout << "ERROR: dispatch data for kernel: " << kernelName << " is incorrect: GWS1: " << runInfo.gws1 << " LWS1: " << runInfo.lws1 << std::endl;
         }
-        if (runInfo.gws0 % runInfo.lws0 != 0)
+        if (runInfo.gws2 % runInfo.lws2 != 0)
         {
             std::cout << "ERROR: dispatch data for kernel: " << kernelName << " is incorrect: GWS2: " << runInfo.gws2 << " LWS2: " << runInfo.lws2 << std::endl;
         }
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h
index 917b4e5ac..1bc50e864 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/common_kernel_base.h
@@ -49,7 +49,7 @@ namespace kernel_selector
         std::string                     CreateJit(const std::string& template_name, const JitConstants& constants, const std::string& kernel_name) const;
         std::string                     GetEntryPoint(const std::string& templateName, const std::string& layerID, const optional_params& options) const;
         Arguments                       GetArgsDesc(uint32_t num_of_input, bool use_weights, bool use_bias, bool use_quantization = false, bool use_calibration = 0) const;
-        std::shared_ptr<KernelString>   GetKernelString(const std::string& kernel_name, const std::string& jit, const std::string& entry_point, const EngineInfo& engine_info, const std::string& exe_mode = ROUND_ROBIN) const;
-        void                            FillCLKernelData(clKernelData& kernel, const CommonDispatchData& runInfo, const EngineInfo& engine_info, const std::string& kernel_map_name, const std::string& jit, const std::string& entry_point, const std::string& exe_mode = ROUND_ROBIN,
+        std::shared_ptr<KernelString>   GetKernelString(const std::string& kernel_name, const std::string& jit, const std::string& entry_point, const EngineInfo& engine_info, const std::string& exe_mode = DEFAULT) const;
+        void                            FillCLKernelData(clKernelData& kernel, const CommonDispatchData& runInfo, const EngineInfo& engine_info, const std::string& kernel_map_name, const std::string& jit, const std::string& entry_point, const std::string& exe_mode = DEFAULT,
                                                             bool weights = false, bool bias = false, int number_of_inputs = 1, bool quantization = false, bool calibration = false) const;    };
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
index 1a426a06a..0f1cf4dec 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
 */
 
 #include "jitter.h"
+#include "tensor_type.h"
 
 namespace kernel_selector {
 
@@ -23,6 +24,7 @@ namespace kernel_selector {
         switch (wType)
         {
         case WeightsType::INT8: return GetTypeName<int8_t>();
+        case WeightsType::UINT8: return GetTypeName<uint8_t>();
         case WeightsType::F16:  return "half";
         case WeightsType::F32:  return GetTypeName<float>();
         default: return "";
@@ -58,6 +60,28 @@ namespace kernel_selector {
         }
     }
 
+    std::string toCodeString(float val) {
+        if (std::isinf(val))
+            return std::signbit(val) ? "-INFINITY" : "INFINITY";
+        std::stringstream ss;
+        // Workaround GCC compiler/STL bug
+        ss << "as_float(0x" << std::hex << *reinterpret_cast<uint32_t*>(&val) << ")";
+
+        ss << " /*" << std::scientific << val << "*/";
+        return ss.str();
+    }
+
+    std::string toCodeString(double val) {
+        if (std::isinf(val))
+            return std::signbit(val) ? "-INFINITY" : "INFINITY";
+        std::stringstream ss;
+        // Workaround GCC compiler/STL bug
+        ss << "as_double(0x" << std::hex << *reinterpret_cast<uint64_t*>(&val) << ")";
+
+        ss << " /*" << std::scientific << val << "*/";
+        return ss.str();
+    }
+
     JitDefinitions JitConstants::GetDefinitions() const
     {
         JitDefinitions definitons;
@@ -70,6 +94,53 @@ namespace kernel_selector {
         return definitons;
     }
 
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // TensorBaseTJitConstant
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    template<typename DType, typename Layout>
+    class TensorBaseTJitConstant : public JitConstant
+    {
+    protected:
+        TensorBaseTJitConstant(const std::string& name) : JitConstant(name) {}
+
+    public:
+
+        JitDefinitions GetDefinitions(const Tensor::TensorBaseT<DType, Layout>& t) const
+        {
+            JitDefinitions definitions{
+                { _name + "_TYPE",          toCLType(t.GetDType()) },
+            { _name + "_OFFSET",        toCodeString(t.GetFirstElementOffset()) },
+            { _name + "_VIEW_OFFSET",   toCodeString(t.GetViewOffset()) },
+            { _name + "_LENGTH",        toCodeString(t.LogicalSize()) },
+            { _name + "_DIMS",          toCodeString(t.GetDims().size()) },
+            { _name + "_SIMPLE",        toCodeString(t.SimpleLayout()) },
+            { "TO_" + _name + "_TYPE",  "convert_" + toCLType(t.GetDType()) },
+            { _name + "_LAYOUT_" + toString(t.GetLayout()), "1" },
+            };
+
+            definitions.push_back({ _name + "_SIZE",        toCodeString(t.GetDims().size()) });
+            definitions.push_back({ _name + "_SIZES",       toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 1, [](const Tensor::Dim& d) { return d.v; }) });
+            definitions.push_back({ _name + "_PITCHES",     toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 1, [](const Tensor::Dim& d) { return d.pitch; }) });
+            definitions.push_back({ _name + "_PAD_BEFORE",  toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 0, [](const Tensor::Dim& d) { return d.pad.before; }) });
+            definitions.push_back({ _name + "_PAD_AFTER",   toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 0, [](const Tensor::Dim& d) { return d.pad.after; }) });
+
+            return definitions;
+        }
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // DataTensorJitConstant
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class DataTensorJitConstant : public TensorBaseTJitConstant<Datatype, DataLayout>
+    {
+        const DataTensor _tensor;
+
+    public:
+        DataTensorJitConstant(const std::string& name, const DataTensor& t) : TensorBaseTJitConstant(name), _tensor(t) {}
+
+        JitDefinitions GetDefinitions() const override;
+    };
+
     JitDefinitions DataTensorJitConstant::GetDefinitions() const
     {
         JitDefinitions baseDefinitions = TensorBaseTJitConstant::GetDefinitions(_tensor);
@@ -100,19 +171,37 @@ namespace kernel_selector {
         return definitions;
     }
 
+    std::shared_ptr<JitConstant> MakeJitConstant(const std::string& name, const DataTensor& value)
+    {
+        return std::static_pointer_cast<JitConstant>(std::make_shared<DataTensorJitConstant>(name, value));
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // WeightTensorJitConstant
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, WeightsLayout>
+    {
+        const WeightsTensor _tensor;
+
+    public:
+        WeightTensorJitConstant(const std::string& name, const WeightsTensor& t) : TensorBaseTJitConstant(name), _tensor(t) {}
+
+        JitDefinitions GetDefinitions() const override;
+    };
+
     JitDefinitions WeightTensorJitConstant::GetDefinitions() const
     {
         JitDefinitions baseDefinitions = TensorBaseTJitConstant::GetDefinitions(_tensor);
 
         JitDefinitions definitions{
-        { _name + "_SIZE_X",        toCodeString(_tensor.X().v) },
-        { _name + "_SIZE_Y",        toCodeString(_tensor.Y().v) },
-        { _name + "_IFM_NUM",       toCodeString(_tensor.IFM().v) },
-        { _name + "_OFM_NUM",       toCodeString(_tensor.OFM().v) },
-        { _name + "_X_PITCH",       toCodeString(_tensor.X().pitch) },
-        { _name + "_Y_PITCH",       toCodeString(_tensor.Y().pitch) },
-        { _name + "_IFM_PITCH",     toCodeString(_tensor.IFM().pitch) },
-        { _name + "_OFM_PITCH",     toCodeString(_tensor.OFM().pitch) },
+            { _name + "_SIZE_X",        toCodeString(_tensor.X().v) },
+            { _name + "_SIZE_Y",        toCodeString(_tensor.Y().v) },
+            { _name + "_IFM_NUM",       toCodeString(_tensor.IFM().v) },
+            { _name + "_OFM_NUM",       toCodeString(_tensor.OFM().v) },
+            { _name + "_X_PITCH",       toCodeString(_tensor.X().pitch) },
+            { _name + "_Y_PITCH",       toCodeString(_tensor.Y().pitch) },
+            { _name + "_IFM_PITCH",     toCodeString(_tensor.IFM().pitch) },
+            { _name + "_OFM_PITCH",     toCodeString(_tensor.OFM().pitch) },
         };
 
         definitions.insert(definitions.end(), baseDefinitions.begin(), baseDefinitions.end());
@@ -120,63 +209,71 @@ namespace kernel_selector {
         return definitions;
     }
 
-    std::shared_ptr<JitConstant> MakeActivationJitConstants(ActivationFunction activation_function)
+    std::shared_ptr<JitConstant> MakeJitConstant(const std::string& name, const WeightsTensor& value)
     {
+        return std::static_pointer_cast<JitConstant>(std::make_shared<WeightTensorJitConstant>(name, value));
+    }
+
+    std::shared_ptr<JitConstant> MakeActivationJitConstants(ActivationFunction activation_function, const std::string& suffix)
+    {
+        std::string name = "ACTIVATION" + suffix;
         // TODO: use native_exp and use cast for APL
         switch (activation_function)
         {
         case ActivationFunction::LOGISTIC:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(UNIT_VAL_ONE/(UNIT_VAL_ONE + exp(-input)))");
+            return MakeJitConstant(name + "(input, m, n)", "(UNIT_VAL_ONE/(UNIT_VAL_ONE + exp(-input)))");
         case ActivationFunction::HYPERBOLIC_TAN:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(tanh(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(tanh(input))");
         case ActivationFunction::RELU:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(UNIT_MAX_FUNC(UNIT_VAL_ZERO, input))");
+            return MakeJitConstant(name + "(input, m, n)", "(UNIT_MAX_FUNC(UNIT_VAL_ZERO, input))");
         case ActivationFunction::RELU_NEGATIVE_SLOPE:
-            return MakeJitConstant("ACTIVATION(input, slope, n)", "isinf(TO_UNIT_TYPE(slope)) ? ((input >= UNIT_VAL_ZERO) ? \
+            return MakeJitConstant(name + "(input, slope, n)", "isinf(TO_UNIT_TYPE(slope)) ? ((input >= UNIT_VAL_ZERO) ? \
                                                         input : -TO_UNIT_TYPE(slope)) : \
                                                         (UNIT_MAX_FUNC(input, UNIT_VAL_ZERO) + TO_UNIT_TYPE(slope) * UNIT_MIN_FUNC(input, UNIT_VAL_ZERO))");
         case ActivationFunction::ELU:
-            return MakeJitConstant("ACTIVATION(input, alpha, n)", "(UNIT_MAX_FUNC(input, UNIT_VAL_ZERO) +  \
+            return MakeJitConstant(name + "(input, alpha, n)", "(UNIT_MAX_FUNC(input, UNIT_VAL_ZERO) +  \
                                                         TO_UNIT_TYPE(alpha) * (exp(UNIT_MIN_FUNC(input, UNIT_VAL_ZERO)) - UNIT_VAL_ONE));");
         case ActivationFunction::CLAMP:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(UNIT_MAX_FUNC(TO_UNIT_TYPE(m), UNIT_MIN_FUNC(TO_UNIT_TYPE(n), input)))");
+            return MakeJitConstant(name + "(input, m, n)", "(UNIT_MAX_FUNC(TO_UNIT_TYPE(m), UNIT_MIN_FUNC(TO_UNIT_TYPE(n), input)))");
         case ActivationFunction::SOFTRELU:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(log(UNIT_VAL_ONE + exp(input)))");
+            return MakeJitConstant(name + "(input, m, n)", "(log(UNIT_VAL_ONE + exp(input)))");
         case ActivationFunction::ABS:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(fabs(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(fabs(input))");
         case ActivationFunction::LINEAR:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(m*input + n)");
+            return MakeJitConstant(name + "(input, m, n)", "(m*input + n)");
         case ActivationFunction::SQUARE:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(input*input)");
+            return MakeJitConstant(name + "(input, m, n)", "(input*input)");
         case ActivationFunction::SQRT:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(sqrt(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(sqrt(input))");
         case ActivationFunction::SIN:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(sin(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(sin(input))");
         case ActivationFunction::ASIN:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(asin(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(asin(input))");
         case ActivationFunction::SINH:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(sinh(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(sinh(input))");
         case ActivationFunction::COS:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(cos(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(cos(input))");
         case ActivationFunction::ACOS:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(acos(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(acos(input))");
         case ActivationFunction::COSH:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(cosh(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(cosh(input))");
         case ActivationFunction::LOG:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(log(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(log(input))");
         case ActivationFunction::LOG2:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(log2(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(log2(input))");
         case ActivationFunction::EXP:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "(exp(input))");
+            return MakeJitConstant(name + "(input, m, n)", "(exp(input))");
+        case ActivationFunction::NOT:
+            return MakeJitConstant(name + "(input, m, n)", "((input != 0) ? UNIT_VAL_ZERO : UNIT_VAL_ONE)");
         case ActivationFunction::RELU_GRAD:
-            return MakeJitConstant("ACTIVATION(input_grad, input, m, n)", "(input_grad * (input > UNIT_VAL_ZERO ? TO_UNIT_TYPE(1) : TO_UNIT_TYPE(0)))");
+            return MakeJitConstant(name + "(input_grad, input, m, n)", "(input_grad * (input > UNIT_VAL_ZERO ? TO_UNIT_TYPE(1) : TO_UNIT_TYPE(0)))");
         case ActivationFunction::RELU_NEGATIVE_SLOPE_GRAD:
-            return MakeJitConstant("ACTIVATION(input_grad, input, slope, n)", "(input_grad * ((input > UNIT_VAL_ZERO ? TO_UNIT_TYPE(1) : TO_UNIT_TYPE(0)) + TO_UNIT_TYPE(slope) * (input <= 0 ? TO_UNIT_TYPE(1) : TO_UNIT_TYPE(0))))");
+            return MakeJitConstant(name + "(input_grad, input, slope, n)", "(input_grad * ((input > UNIT_VAL_ZERO ? TO_UNIT_TYPE(1) : TO_UNIT_TYPE(0)) + TO_UNIT_TYPE(slope) * (input <= 0 ? TO_UNIT_TYPE(1) : TO_UNIT_TYPE(0))))");
         case ActivationFunction::NONE_GRAD:
-            return MakeJitConstant("ACTIVATION(input_grad, input, m, n)", "input_grad");
+            return MakeJitConstant(name + "(input_grad, input, m, n)", "input_grad");
         case ActivationFunction::NONE:
         default:
-            return MakeJitConstant("ACTIVATION(input, m, n)", "input");
+            return MakeJitConstant(name + "(input, m, n)", "input");
         }
     }
 
@@ -195,27 +292,47 @@ namespace kernel_selector {
         case Datatype::INT8:
             unit_type = "char";
             unit_max_val = "CHAR_MAX";
-            unit_min_val = "-UNIT_VAL_MAX";
+            unit_min_val = "CHAR_MIN";
             unit_val_one = "(char) 1";
             unit_val_zero = "(char) 0";
             to_unit_type = "convert_char(v)";
             unit_max_func = "max";
             unit_min_func = "min";
             break;
+        case Datatype::UINT8:
+            unit_type = "uchar";
+            unit_max_val = "UCHAR_MAX";
+            unit_min_val = "0";
+            unit_val_one = "(uchar) 1";
+            unit_val_zero = "(uchar) 0";
+            to_unit_type = "convert_uchar(v)";
+            unit_max_func = "max";
+            unit_min_func = "min";
+            break;
         case Datatype::INT32:
             unit_type = "int";
             unit_max_val = "INT_MAX";
-            unit_min_val = "-UNIT_VAL_MAX";
+            unit_min_val = "INT_MIN";
             unit_val_one = "(int) 1";
             unit_val_zero = "(int) 0";
             to_unit_type = "convert_int(v)";
             unit_max_func = "max";
             unit_min_func = "min";
             break;
+        case Datatype::UINT32:
+            unit_type = "uint";
+            unit_max_val = "UINT_MAX";
+            unit_min_val = "0";
+            unit_val_one = "(uint) 1";
+            unit_val_zero = "(uint) 0";
+            to_unit_type = "convert_uint(v)";
+            unit_max_func = "max";
+            unit_min_func = "min";
+            break;
         case Datatype::INT64:
             unit_type = "long";
             unit_max_val = "LONG_MAX";
-            unit_min_val = "-UNIT_VAL_MAX";
+            unit_min_val = "LONG_MIN";
             unit_val_one = "(long) 1";
             unit_val_zero = "(long) 0";
             to_unit_type = "convert_long(v)";
@@ -256,6 +373,16 @@ namespace kernel_selector {
             MakeJitConstant("UNIT_MIN_FUNC",        unit_min_func),
         };
     }
+
+    JitConstants MakeActivationJitConstants(const base_activation_params& params, const std::string& suffix)
+    {
+        return JitConstants{
+            MakeJitConstant("NL_M" + suffix, params.m),
+            MakeJitConstant("NL_N" + suffix, params.n),
+            MakeActivationJitConstants(params.function, suffix)
+        };
+    }
+
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // MakeBaseParamsJitConstants
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -265,12 +392,16 @@ namespace kernel_selector {
         bool bInt8Used = params.output.GetDType() == Datatype::INT8;
         bool bInt32Used = params.output.GetDType() == Datatype::INT32;
         bool bInt64Used = params.output.GetDType() == Datatype::INT64;
+        bool bUInt8Used = params.output.GetDType() == Datatype::UINT8;
+        bool bUInt32Used = params.output.GetDType() == Datatype::INT32;
         for (const auto& i : params.inputs)
         {
             bFP16Used |= i.GetDType() == Datatype::F16;
             bInt8Used |= i.GetDType() == Datatype::INT8;
             bInt32Used |= i.GetDType() == Datatype::INT32;
             bInt64Used |= i.GetDType() == Datatype::INT64;
+            bUInt8Used |= i.GetDType() == Datatype::UINT8;
+            bUInt32Used |= i.GetDType() == Datatype::UINT32;
         }
 
         JitConstants jit{
@@ -281,16 +412,11 @@ namespace kernel_selector {
             MakeJitConstant("INT8_UNIT_USED",       bInt8Used),
             MakeJitConstant("INT32_UNIT_USED",      bInt32Used),
             MakeJitConstant("INT64_UNIT_USED",      bInt64Used),
+            MakeJitConstant("UINT8_UNIT_USED",      bUInt8Used),
+            MakeJitConstant("UINT32_UNIT_USED",     bUInt32Used),
             MakeJitConstant("GRADIENT",             params.gradient),
         };
 
-        // for activation function
-        jit.AddConstants({
-            MakeJitConstant("NL_M",                 params.activationParams.m),
-            MakeJitConstant("NL_N",                 params.activationParams.n),
-            MakeActivationJitConstants(params.activationFunc),
-            });
-
         if (bInt8Used)
         {
             jit.Merge(MakeUnitTypeJitConstants(Datatype::INT8));
@@ -307,11 +433,22 @@ namespace kernel_selector {
         {
             jit.Merge(MakeUnitTypeJitConstants(Datatype::INT64));
         }
+        else if (bUInt8Used)
+        {
+            jit.Merge(MakeUnitTypeJitConstants(Datatype::UINT8));
+        }
+        else if (bUInt32Used)
+        {
+            jit.Merge(MakeUnitTypeJitConstants(Datatype::UINT32));
+        }
         else
         {
             jit.Merge(MakeUnitTypeJitConstants(Datatype::F32));
         }
 
+        // for activation function
+        jit.Merge(MakeActivationJitConstants(params.activation));
+
         for (size_t i = 0; i < params.inputs.size(); i++)
         {
             jit.AddConstant(MakeJitConstant("INPUT" + toCodeString(i), params.inputs[i]));
@@ -344,4 +481,4 @@ namespace kernel_selector {
         return jit;
     }
 
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h
index 3e65a0bde..3992de908 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/jitter.h
@@ -18,8 +18,6 @@
 #pragma once
 
 #include "kernel_selector_common.h"
-#include "kernel_selector_params.h"
-#include "tensor_type.h"
 
 #include <sstream>
 #include <cmath>
@@ -27,6 +25,8 @@
 
 namespace kernel_selector {
 
+struct base_params;
+
 using JitDefinitions = std::vector<std::pair<std::string, std::string>>;
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -66,69 +66,20 @@ std::string getMeanOpString(MeanOp op);
 template<typename T>
 std::string toCodeString(T val) { return std::to_string(val); }
 
-template<>
-inline std::string toCodeString<std::string>(std::string val) { return val; }
-
-template<>
-inline std::string toCodeString<const char*>(const char* val) { return val; }
-
-template<>
-inline std::string toCodeString<char*>(char* val) { return val; }
-
-template<>
-inline std::string toCodeString<bool>(bool val)
-{
-    std::stringstream ss;
-    ss << static_cast<int>(val);
-    return ss.str();
-}
-
-template<>
-inline std::string toCodeString<const bool>(const bool val)
-{
-    std::stringstream ss;
-    ss << static_cast<int>(val);
-    return ss.str();
-}
-
-template<>
-inline std::string toCodeString<float>(float val) {
-    if (std::isinf(val))
-        return std::signbit(val) ? "-INFINITY" : "INFINITY";
-    std::stringstream ss;
-#ifdef __GNUC__
-    // Workaround GCC compiler/STL bug
-    ss << "as_float(0x" << std::hex << *reinterpret_cast<uint32_t*>(&val) << ")";
-#else
-    ss << std::hexfloat << val << "f";
-#endif
-    ss << " /*" << std::scientific << val << "*/";
-    return ss.str();
-}
-
-template<>
-inline std::string toCodeString<double>(double val) {
-    if (std::isinf(val))
-        return std::signbit(val) ? "-INFINITY" : "INFINITY";
-    std::stringstream ss;
-#ifdef __GNUC__
-    // Workaround GCC compiler/STL bug
-    ss << "as_double(0x" << std::hex << *reinterpret_cast<uint64_t*>(&val) << ")";
-#else
-    ss << std::hexfloat << val;
-#endif
-    ss << " /*" << std::scientific << val << "*/";
-    return ss.str();
-}
+inline std::string toCodeString(const std::string& val) { return val; }
+inline std::string toCodeString(const char* val) { return val; }
+inline std::string toCodeString(bool val) { return val ? "1" : "0"; }
+std::string toCodeString(float val);
+std::string toCodeString(double val);
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // JitConstant
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 template <typename VecT, typename ValT, typename Func>
-inline std::string toVectorString(const VecT& vec, const std::string& vertorType, size_t maxDim, ValT padFillingVal, Func fetchFunc)
+inline std::string toVectorString(const VecT& vec, const std::string& vectorType, size_t maxDim, ValT padFillingVal, Func fetchFunc)
 {
     std::stringstream ss;
-    ss << "(" << vertorType << " []){ ";
+    ss << "(" << vectorType << " []){ ";
     for (size_t i = 0; i < vec.size(); i++)
         ss << toCodeString(fetchFunc(vec[i])) << ",";
     for (size_t i = vec.size(); i < maxDim; i++)
@@ -171,75 +122,8 @@ std::shared_ptr<JitConstant> MakeJitConstant(const std::string& name, T value)
     return std::static_pointer_cast<JitConstant>(std::make_shared<simple_jit_constant>(name, toCodeString(value)));
 }
 
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// TensorBaseTJitConstant
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-template<typename DType, typename Layout>
-class TensorBaseTJitConstant : public JitConstant
-{
-protected:
-    TensorBaseTJitConstant(const std::string& name) : JitConstant(name) {}
-
-public:
-
-    JitDefinitions GetDefinitions(const Tensor::TensorBaseT<DType, Layout>& t) const
-    {
-        JitDefinitions definitions{
-            { _name + "_TYPE",          toCLType(t.GetDType()) },
-            { _name + "_OFFSET",        toCodeString(t.GetFirstElementOffset()) },
-            { _name + "_VIEW_OFFSET",   toCodeString(t.GetViewOffset()) },
-            { _name + "_LENGTH",        toCodeString(t.LogicalSize()) },
-            { _name + "_DIMS",          toCodeString(t.GetDims().size()) },
-            { _name + "_SIMPLE",        toCodeString(t.SimpleLayout()) },
-            { "TO_" + _name + "_TYPE",  "convert_" + toCLType(t.GetDType()) },
-            { _name + "_LAYOUT_" + toString(t.GetLayout()), "1" },
-        };
-
-        definitions.push_back({ _name + "_SIZE",        toCodeString(t.GetDims().size()) });
-        definitions.push_back({ _name + "_SIZES",       toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 1, [](const Tensor::Dim& d) { return d.v; }) });
-        definitions.push_back({ _name + "_PITCHES",     toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 1, [](const Tensor::Dim& d) { return d.pitch; }) });
-        definitions.push_back({ _name + "_PAD_BEFORE",  toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 0, [](const Tensor::Dim& d) { return d.pad.before; }) });
-        definitions.push_back({ _name + "_PAD_AFTER",   toVectorString(t.GetDims(), "size_t", KERNEL_SELECTOR_TENSOR_DIM_MAX, 0, [](const Tensor::Dim& d) { return d.pad.after; }) });
-
-        return definitions;
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// DataTensorJitConstant
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-class DataTensorJitConstant : public TensorBaseTJitConstant<Datatype, DataLayout>
-{
-    const DataTensor _tensor;
-
-public:
-    DataTensorJitConstant(const std::string& name, const DataTensor& t) : TensorBaseTJitConstant(name), _tensor(t) {}
-
-    JitDefinitions GetDefinitions() const override;
-};
-
-inline std::shared_ptr<JitConstant> MakeJitConstant(const std::string& name, const DataTensor& value) 
-{
-    return std::static_pointer_cast<JitConstant>(std::make_shared<DataTensorJitConstant>(name, value));
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-// WeightTensorJitConstant
-////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-class WeightTensorJitConstant : public TensorBaseTJitConstant<WeightsType, WeightsLayout>
-{
-    const WeightsTensor _tensor;
-
-public:
-    WeightTensorJitConstant(const std::string& name, const WeightsTensor& t) : TensorBaseTJitConstant(name), _tensor(t) {}
-
-    JitDefinitions GetDefinitions() const override;
-};
-
-inline std::shared_ptr<JitConstant> MakeJitConstant(const std::string& name, const WeightsTensor& value) 
-{
-    return std::static_pointer_cast<JitConstant>(std::make_shared<WeightTensorJitConstant>(name, value));
-}
+std::shared_ptr<JitConstant> MakeJitConstant(const std::string& name, const struct Tensor::DataTensor& value);
+std::shared_ptr<JitConstant> MakeJitConstant(const std::string& name, const struct Tensor::WeightsTensor& value);
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // VectorDataJitConstant
@@ -354,6 +238,7 @@ public:
     JitDefinitions GetDefinitions() const;
 };
 
+JitConstants MakeActivationJitConstants(const base_activation_params& params, const std::string& suffix="");
 JitConstants MakeBaseParamsJitConstants(const base_params& params);
 JitConstants MakeLoopUnrollParamsJitConstants(uint32_t loopCount);
 JitConstants MakeUnitTypeJitConstants(Datatype dataType);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp
index 92933f832..04607dc99 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.cpp
@@ -21,93 +21,11 @@
 
 namespace kernel_selector {
 
-    bool CheckConvolutionPaddedInputDesc(const convolution_params& params, const DataTensor& reqDesc)
-    {
-        assert(params.inputs.size() == 1);
-
-        bool properPadding =
-            reqDesc.X().pad.before <= params.inputs[0].X().pad.before &&
-            reqDesc.Y().pad.before <= params.inputs[0].Y().pad.before &&
-            reqDesc.Feature().pad.before <= params.inputs[0].Feature().pad.before &&
-            reqDesc.Batch().pad.before <= params.inputs[0].Batch().pad.before;
-
-        properPadding &=
-            reqDesc.X().pad.after <= params.inputs[0].X().pad.after &&
-            reqDesc.Y().pad.after <= params.inputs[0].Y().pad.after &&
-            reqDesc.Feature().pad.after <= params.inputs[0].Feature().pad.after &&
-            reqDesc.Batch().pad.after <= params.inputs[0].Batch().pad.after;
-
-        properPadding &= ((params.padding.x == 0 && params.padding.y == 0) || params.inputs[0].GetPaddedVal() == 0.f);
-
-        return properPadding;
-    }
-
-    DataTensor GetConvolutionBFYXPaddedTensor(const convolution_params& cp)
-    {
-        assert(cp.inputs.size() == 1);
-        assert(cp.inputs[0].GetDims().size() == 4U);
-
-        DataTensor t = cp.inputs[0];
-        std::vector<Tensor::Pad> pad{ { 0,0 },{ 0,0 },{ 0,0 },{ 0,0 } };
-
-        pad[0].before = cp.padding.x;
-        pad[1].before = cp.padding.y;
-
-        const auto inputLimitX = (cp.output.X().v - 1) * cp.stride.x + (cp.filterSize.x - 1) * cp.dilation.x + 1;
-        const auto inputLimitY = (cp.output.Y().v - 1) * cp.stride.y + (cp.filterSize.y - 1) * cp.dilation.y + 1;
-
-        pad[0].after = (size_t)std::max((int)inputLimitX - (int)t.X().v - (int)pad[0].before, (int)0);
-        pad[1].after = (size_t)std::max((int)inputLimitY - (int)t.Y().v - (int)pad[1].before, (int)0);
-
-        Tensor::NDims dims(4);
-        const Tensor::NDims& orgDims = cp.inputs[0].GetDims();
-        size_t pitch = 1;
-        for (size_t i = 0; i < dims.size(); i++)
-        {
-            dims[i].pad = pad[i];
-            dims[i].v = orgDims[i].v;
-            dims[i].pitch = pitch;
-            pitch *= dims[i].LogicalDimPadded();
-        }
-
-        return{ dims, t.GetDType(), t.GetLayout() };
-    }
-
-    bool CovolutionCheckInput(const Params& p, const optional_params& o)
-    {
-        const convolution_params& params = static_cast<const convolution_params&>(p);
-        const convolution_optional_params& optParams = static_cast<const convolution_optional_params&>(o);
-
-        const auto req_input = GetConvolutionBFYXPaddedTensor(params);
-        const bool bProperInputDesc = CheckConvolutionPaddedInputDesc(params, req_input);
-        const bool bInputPadded = optParams.allowInputReordering || bProperInputDesc;
-
-        if (!bInputPadded)
-        {
-            return false;
-        }
-
-        return true;
-    }
-
-    bool CovolutionUpdateInputParams(convolution_params& params)
-    {
-        const auto req_input = GetConvolutionBFYXPaddedTensor(params);
-        const bool bProperInputDesc = CheckConvolutionPaddedInputDesc(params, req_input);
-
-        if (!bProperInputDesc)
-        {
-            params.inputs[0] = req_input;
-            return true;
-        }
-
-        return false;
-    }
-
-    WeightsType DataTypeToWeightsType(Datatype t)
+    static WeightsType DataTypeToWeightsType(Datatype t)
     {
         switch (t)
         {
+        case Datatype::UINT8:   return WeightsType::UINT8;
         case Datatype::INT8:    return WeightsType::INT8;
         case Datatype::F16:     return WeightsType::F16;
         case Datatype::F32:     return WeightsType::F32;
@@ -116,9 +34,10 @@ namespace kernel_selector {
         }
     }
 
-    bool CheckWeights(const WeightsTensor& tensor, WeightsType reqType, std::vector<WeightsLayout> reqLayouts)
+    static bool CheckWeights(const WeightsTensor& tensor, WeightsType reqType, std::vector<WeightsLayout> reqLayouts, const ParamsKey& paramsKey)
     {
-        if (reqType != tensor.GetDType())
+        if ((reqType != tensor.GetDType()) &&
+            !(paramsKey.isEnabledDifferentInputWeightsTypes()))
         {
             return false;
         }
@@ -170,7 +89,7 @@ namespace kernel_selector {
         return true;
     }
 
-    bool UpdateWeightsParams(weight_bias_params& newParams, const optional_params& options, std::vector<WeightsLayout> layouts, WeightsReorderParams& weightsReorderParams)
+    bool UpdateWeightsParams(weight_bias_params& newParams, const optional_params& options, std::vector<WeightsLayout> layouts, WeightsReorderParams& weightsReorderParams, const ParamsKey& paramsKey)
     {
         //validate if weights type is image and if device supports requested sizes
         for (auto& requested_layout : layouts)
@@ -184,8 +103,8 @@ namespace kernel_selector {
         const weight_bias_optional_params& optParams = static_cast<const weight_bias_optional_params&>(options);
 
         const auto dtype = DataTypeToWeightsType(newParams.inputs[0].GetDType());
-        bool bProperWeights = CheckWeights(newParams.weights, dtype, layouts);
-
+        bool bProperWeights = CheckWeights(
+                                  newParams.weights, dtype, layouts, paramsKey);
         if (!bProperWeights)
         {
             if (!optParams.allowStaticInputReordering)
@@ -274,7 +193,7 @@ namespace kernel_selector {
     std::vector<size_t> GetOptimalLocalWorkGroupSizes(std::vector<size_t> gws)
     {
         const size_t lws_max = 256;
-        const size_t optimal_lws_values[] = { 256, 224, 192, 160, 128, 96, 64, 32, 16, 8, 7, 6, 5, 4, 3, 2, 1 };
+        const size_t optimal_lws_values[] = { 256, 227, 224, 192, 160, 128, 96, 64, 32, 16, 8, 7, 6, 5, 4, 2, 1 };
         size_t total_lws = 1;
         std::vector<size_t> lws;
         for (size_t i = 0; i < gws.size(); ++i)
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.h
index e7cc7cf10..dbd6fe4eb 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/kernel_selector_utils.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,22 +17,16 @@
 #pragma once
 
 #include "jitter.h"
-#include "tensor_type.h"
 
 namespace kernel_selector {
 
     struct weight_bias_params;
-    struct convolution_params;
+    struct optional_params;
+    struct WeightsReorderParams;
 
-    bool CheckConvolutionPaddedInputDesc(const convolution_params& params, const DataTensor& reqDesc);
-    DataTensor GetConvolutionBFYXPaddedTensor(const convolution_params& cp);
-    bool CovolutionCheckInput(const Params& p, const optional_params& o);
-    bool CovolutionUpdateInputParams(convolution_params& params);
-    WeightsType DataTypeToWeightsType(Datatype t);
-    bool CheckWeights(const WeightsTensor& tensor, WeightsType reqType, std::vector<WeightsLayout> reqLayouts);
     std::vector<size_t> GetImageSizes(const kernel_selector::WeightsTensor& dimensions, const WeightsLayout layout);
     bool CheckImageSize(const weight_bias_params& newParams, const WeightsLayout layout);
-    bool UpdateWeightsParams(weight_bias_params& newParams, const optional_params& options, std::vector<WeightsLayout> layouts, WeightsReorderParams& weightsReorderParams);
+    bool UpdateWeightsParams(weight_bias_params& newParams, const optional_params& options, std::vector<WeightsLayout> layouts, WeightsReorderParams& weightsReorderParams,  const ParamsKey& paramsKey = ParamsKey());
     JitConstants GetTensorFriendlyWorkGroupsJit(const DataTensor& t);
     std::vector<size_t> GetTensorFriendlyWorkGroups(const DataTensor& t);
     std::vector<size_t> GetOptimalLocalWorkGroupSizes(std::vector<size_t> gws);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/primitive_db_gen.py b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/primitive_db_gen.py
index 22b48feb9..41e78f0a5 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/common/primitive_db_gen.py
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/common/primitive_db_gen.py
@@ -58,13 +58,13 @@ class OpenCL2CHeaders(object):
         self.include_files[filename] = {}
         #kernel_name = name[:name.find('.')]
         kernel_name = name[:name.find('.cl')]
-        res = '{{"{}",\nR"__krnl(\n'.format(kernel_name)
+        res = '{{"{}",\n(std::string) R"__krnl(\n'.format(kernel_name)
         content = self.append_file_content(filename, filename)
         max_lines = 200
 
         for i, line in enumerate(content.split('\n')):
             if i % max_lines == 0:
-                res += ')__krnl"\nR"__krnl('
+                res += ')__krnl"\n + R"__krnl('
             res += line + '\n'
 
         res += ')__krnl"}},\n\n'.format(kernel_name, self.append_file_content(filename, filename))
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h
index 80b501e8d..28450a5a4 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_base.h
@@ -40,7 +40,17 @@ namespace kernel_selector
             return GetKernelsData(params, options);
         }
 
-        virtual ParamsKey GetSupportedKey() const = 0;
+        virtual bool Supports(const Params& params, const optional_params& options) const
+        {
+            const ParamsKey requireKey = params.GetParamsKey().Merge(options.GetSupportedKey());
+            return GetSupportedKey().Support(requireKey);
+        }
+
+        bool SupportsTuning() const
+        {
+            return GetSupportedKey().TuningSupport();
+        }
+
         virtual const std::string GetName() const { return kernelName; }
 
         static const primitive_db& get_db() { return db; }
@@ -50,8 +60,9 @@ namespace kernel_selector
         const std::string kernelName;
 
         static size_t UniqeID() { return counter++; } // TODO: use interlocked
+        virtual ParamsKey GetSupportedKey() const = 0;
         
     private:
         static size_t counter;
     };
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector.cpp
index 6e938d006..2968c1085 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector.cpp
@@ -85,11 +85,9 @@ namespace kernel_selector {
         if (params.GetType() == kType &&
             options.GetType() == kType)
         {
-            const ParamsKey requireKey = params.GetParamsKey().Merge(options.GetSupportedKey());
             for (const auto& implementation : implementations)
             {
-                const ParamsKey implKey = implementation->GetSupportedKey();
-                if (implKey.Support(requireKey))
+                if (implementation->Supports(params, options))
                 {
                     try
                     {
@@ -146,25 +144,23 @@ namespace kernel_selector {
     {
         KernelsData kernelsData;
         std::string kernelName;
-
         if (params.GetType() == kType &&
             options.GetType() == kType)
         {
             std::string hash = std::to_string(create_hash(params.to_string()));
-            ParamsKey requireKey = params.GetParamsKey().Merge(options.GetSupportedKey());
-            
             std::tuple<std::string, int> cachedKernelConfig;
             if (options.tuningParams.mode == TuningMode::TUNING_DISABLED) // Try to load kernel/config from offline cache
             {
 #if ENABLE_OFFLINE_TUNING_CACHE
-                cachedKernelConfig = autoTuner.LoadKernelOffline(params.engineInfo.deviceId, hash);
+                cachedKernelConfig = autoTuner.LoadKernelOffline(params.engineInfo.deviceCache, hash);
+                
 #else
                 return  GetNaiveBestKernel(params, options, kType);
 #endif
             }
             else // Try to load kernel/config from on-line cache
             {
-                cachedKernelConfig = autoTuner.LoadKernelOnline(options.tuningParams.mode, options.tuningParams.cacheFilePath, params.engineInfo.deviceId, params.engineInfo.driverVersion, params.engineInfo.hostVersion, hash);
+                cachedKernelConfig = autoTuner.LoadKernelOnline(options.tuningParams.mode, options.tuningParams.cacheFilePath, params.engineInfo.computeUnitsCount, hash);
             }       
             bool hashFoundInCache = !std::get<0>(cachedKernelConfig).empty();
 
@@ -179,7 +175,7 @@ namespace kernel_selector {
                     if (implementation->GetName().compare(cachedkernelName) == 0)
                     {            
                         KernelsData kds = implementation->GetTunedKernelsDataByIndex(params, options, autoTuneIndex);
-                        if (kds.size() && kds[0].kernels.size() && implementation->GetSupportedKey().Support(requireKey))
+                        if (kds.size() && kds[0].kernels.size() && implementation->Supports(params, options))
                         {
                             kernelsData = kds;
                             kernelsData[0].kernelName = cachedkernelName;
@@ -208,9 +204,7 @@ namespace kernel_selector {
 
             for (const auto& implementation : implementations)
             {
-                
-                const ParamsKey implKey = implementation->GetSupportedKey();
-                if (implKey.Support(requireKey) && implKey.TuningSupport())
+                if (implementation->Supports(params, options) && implementation->SupportsTuning())
                 {
                     try
                     {
@@ -219,11 +213,11 @@ namespace kernel_selector {
                         
                         for (size_t i = 0; i < kds.size(); i++)
                         {
-                            kds[i].runTime = runTimes[i];  
+                            kds[i].runTime = runTimes[i];
                             if (kernelsData.size() == 0 || kds[i].runTime < kernelsData[0].runTime)
                             {
                                 kernelsData = { kds[i] };
-                                kernelName = implementation->GetName();
+                                kernelName = implementation->GetName();                                
                             }
                         }
                     }
@@ -240,9 +234,8 @@ namespace kernel_selector {
                 for (const auto& implementation : implementations)
                 {
 
-                    const ParamsKey implKey = implementation->GetSupportedKey();
                     //this time, check only implementations that have disabled tuning
-                    if (implKey.Support(requireKey) && !implKey.TuningSupport())
+                    if (implementation->Supports(params, options) && !implementation->SupportsTuning())
                     {
                         try
                         {
@@ -271,10 +264,10 @@ namespace kernel_selector {
             {
                 kernelsData[0].kernelName = kernelName;
                 kernelsData[0].kernels[0].layerID = params.layerID;
-                autoTuner.StoreKernel(options.tuningParams.cacheFilePath, hash, kernelName, kernelsData[0].autoTuneIndex);
+                autoTuner.StoreKernel(options.tuningParams.cacheFilePath, hash, kernelName, kernelsData[0].autoTuneIndex, params.engineInfo.computeUnitsCount);
             }
         } 
 
         return kernelsData;
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
index f441136aa..c35748a48 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.cpp
@@ -17,7 +17,7 @@
 #include "kernel_selector_common.h"
 #include <sstream>
 
-namespace kernel_selector
+namespace kernel_selector 
 {
     std::string GetStringEnv(const char* varName)
     {
@@ -72,6 +72,7 @@ namespace kernel_selector
         case ActivationFunction::LOG:                   method = "LOG"; break;
 		case ActivationFunction::LOG2:                  method = "LOG2"; break;
         case ActivationFunction::EXP:                   method = "EXP"; break;
+        case ActivationFunction::NOT:                   method = "NOT"; break;
         case ActivationFunction::NONE:                  method = "NONE"; break;
         case ActivationFunction::NONE_GRAD:             method = "NONE_GRAD"; break;
         default: break;
@@ -95,7 +96,9 @@ namespace kernel_selector
         case kernel_selector::DataLayout::brfyx:             return "BRFYX";
         case kernel_selector::DataLayout::winograd_2x3_s1_data: return "WINOGRAD_2x3_S1_DATA";
         case kernel_selector::DataLayout::byxf_af32: return "BYXF_AF32";
+        case kernel_selector::DataLayout::byx8_f4: return "BYX8_F4";
         case kernel_selector::DataLayout::fs_bs_yx_bsv4_fsv32: return "FS_BS_YX_BSV4_FSV32";
+        case kernel_selector::DataLayout::b_fs_yx_fsv4:      return "B_FS_YX_FSV4";
         default: return "";
         }
     }
@@ -308,6 +311,8 @@ namespace kernel_selector
         case WeightsLayout::iyxo:                       return "IYXO";
         case WeightsLayout::yxio:                       return "YXIO";
         case WeightsLayout::os_iyx_osv16:               return "OS_IYX_OSV16";
+        case WeightsLayout::os_iyx_osv32:               return "OS_IYX_OSV32";
+        case WeightsLayout::os_iyx_osv64:               return "OS_IYX_OSV64";
         case WeightsLayout::os_iyx_osv16_rotate_180:    return "OS_IYX_OSV16_ROTATE_180";
         case WeightsLayout::os_i_osv16:                 return "OS_I_OSV16";
         case WeightsLayout::os_i_osv8__ai8:             return "OS_I_OSV8__AI8";
@@ -323,7 +328,12 @@ namespace kernel_selector
         case WeightsLayout::image_2d_weights_winograd_6x3_s1_fbxyb: return "IMAGE_2D_WEIGHTS_WINOGRAD_6x3_S1_FBXYB";
         case WeightsLayout::image_2d_weights_winograd_6x3_s1_xfbyb: return "IMAGE_2D_WEIGHTS_WINOGRAD_6x3_S1_XFBYB";
         case WeightsLayout::os_is_yx_isa8_osv8_isv4: return "OS_IS_YX_ISA8_OSV8_ISV4";
+        case WeightsLayout::os_is_yx_isa8_osv8_isv4_swizzled_by_4: return "OS_IS_YX_ISA8_OSV8_ISV4_SWIZZLED_BY_4";
         case WeightsLayout::is_o_yx_isv32: return "IS_O_YX_ISV32";
+        case WeightsLayout::is_o32_yx_isv32_swizzled_by_4: return "IS_O32_YX_ISV32_SWIZZLED_BY_4";
+        case WeightsLayout::os_is_y_x8_osv8_isv4: return "OS_IS_Y_X8_OSV8_ISV4";
+        case WeightsLayout::os_is_yx_osv16_isv4:  return "OS_IS_YX_OSV16_ISV4";
+
         default:
             return "";
             break;
@@ -354,6 +364,18 @@ namespace kernel_selector
         }
     }
 
+    std::string toString(GatherAxis a)
+    {
+        switch (a)
+        {
+            case GatherAxis::X:         return "X";
+            case GatherAxis::Y:         return "Y";
+            case GatherAxis::FEATURE:   return "FEATURE";
+            case GatherAxis::BATCH:     return "BATCH";
+            default: return "";
+        }
+    }
+
     std::string toString(SampleType type)
     {
         switch (type)
@@ -388,13 +410,6 @@ namespace kernel_selector
         }
     }
 
-    std::string toString(NonLinearParams params)
-    {
-        std::stringstream s;
-        s << "m" << params.m << "_n" << params.n;
-        return s.str();
-    }
-
     std::string toString(const Tensor::Dim& dim)
     {
         std::stringstream s;
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h
index ef12a74b9..9f5f30474 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_common.h
@@ -17,7 +17,6 @@
 #pragma once
 
 #include "kernel_selector_params.h"
-#include "primitive_db.h"
 
 #include <cfloat>
 #include <cstdint>
@@ -29,7 +28,8 @@
 #include <vector>
 
 #define AGE_BASED "-cl-no-subgroup-ifp"
-#define ROUND_ROBIN ""
+#define DEFAULT ""
+#define NO_PRERA_SCH "-cl-intel-no-prera-scheduling"
 
 namespace kernel_selector {
 
@@ -279,9 +279,9 @@ namespace kernel_selector {
     std::string toString(WeightsLayout layout);
     std::string toString(ConcatAxis a);
     std::string toString(TileAxis a);
+    std::string toString(GatherAxis a);
     std::string toString(SampleType type);
     std::string toString(const BorderType type);
-    std::string toString(NonLinearParams params);
     std::string toString(const Tensor::Dim& dim);
     std::string toString(const DataTensor& tensor);
     std::string toString(const IndexSelectAxis& axis);
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.cpp b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.cpp
index bd718c17a..fab212737 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.cpp
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.cpp
@@ -347,6 +347,16 @@ namespace kernel_selector {
         }
     }
 
+    void ParamsKey::EnableFusedConvEltwEltwiseStride()
+    {
+        key.restrict.val.dedicated.fused_conv_eltw.stride = 1;
+    }
+
+    void ParamsKey::EnableEltwiseStride()
+    {
+        key.restrict.val.dedicated.eltwise.stride = 1;
+    }
+
     void ParamsKey::EnableArgMaxMinAxis(ArgMaxMinAxis a)
     {
         switch (a)
@@ -400,19 +410,35 @@ namespace kernel_selector {
             key.restrict.val.dedicated.lookt.indicesOther = 1;
     }
 
+    void ParamsKey::EnableFusedConvEltwiseRWOutOpt()
+    {
+        key.restrict.val.dedicated.fused_conv_eltw.rw_out_opt = 1;
+    }
+
     bool ParamsKey::Support(const ParamsKey& k) const
     {
-        return
-            ((key.restrict.raw & k.key.restrict.raw) == k.key.restrict.raw) && // check if this kernel supports this params
-            ((key.machineInfo.raw & k.key.machineInfo.raw) == key.machineInfo.raw) && // check if machine supports this kernel
-            ((key.inputType.raw & k.key.inputType.raw) == k.key.inputType.raw) &&
-            ((key.outputType.raw & k.key.outputType.raw) == k.key.outputType.raw) &&
-            ((key.inputWeightsType.raw & k.key.inputWeightsType.raw) == k.key.inputWeightsType.raw) &&
-            ((key.outputWeightsType.raw & k.key.outputWeightsType.raw) == k.key.outputWeightsType.raw) &&
-            ((key.inputLayout & k.key.inputLayout) != 0 || key.inputLayout == k.key.inputLayout) &&
-            ((key.outputLayout & k.key.outputLayout) != 0 || key.outputLayout == k.key.outputLayout) &&
-            ((key.weightsInputLayout & k.key.weightsInputLayout) != 0 || key.weightsInputLayout == k.key.weightsInputLayout) &&
-            ((key.weightsOutputLayout & k.key.weightsOutputLayout) != 0 || key.weightsOutputLayout == k.key.weightsOutputLayout);
+        if (!((key.restrict.raw & k.key.restrict.raw) == k.key.restrict.raw)) // check if this kernel supports this params
+            return false;
+        if (!((key.machineInfo.raw & k.key.machineInfo.raw) == key.machineInfo.raw)) // check if machine supports this kernel
+            return false;
+        if (!((key.inputType.raw & k.key.inputType.raw) == k.key.inputType.raw))
+            return false;
+        if (!((key.outputType.raw & k.key.outputType.raw) == k.key.outputType.raw))
+            return false;
+        if (!((key.inputWeightsType.raw & k.key.inputWeightsType.raw) == k.key.inputWeightsType.raw))
+            return false;
+        if (!((key.outputWeightsType.raw & k.key.outputWeightsType.raw) == k.key.outputWeightsType.raw))
+            return false;
+        if (!((key.inputLayout & k.key.inputLayout) != 0 || key.inputLayout == k.key.inputLayout))
+            return false;
+        if (!((key.outputLayout & k.key.outputLayout) != 0 || key.outputLayout == k.key.outputLayout))
+            return false;
+        if (!((key.weightsInputLayout & k.key.weightsInputLayout) != 0 || key.weightsInputLayout == k.key.weightsInputLayout))
+            return false;
+        if (!((key.weightsOutputLayout & k.key.weightsOutputLayout) != 0 || key.weightsOutputLayout == k.key.weightsOutputLayout))
+            return false;
+
+        return true;
     }
 
     ParamsKey ParamsKey::Merge(const ParamsKey& k) const
@@ -542,12 +568,18 @@ namespace kernel_selector {
         return k;
     }
 
+    std::string base_activation_params::to_string() const
+    {
+        std::stringstream s;
+        s << "m" << m << "_n" << n << "_" << toString(function);
+        return s.str();
+    }
+
     std::string base_params::to_string() const
     {
         std::stringstream s;
         s << Params::to_string() << "_";
-        s << toString(activationParams) << "_";
-        s << toString(activationFunc) << "_";
+        s << activation.to_string() << "_";
 
         for (auto input : inputs)
         {
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
index d4351f2b4..d8c51997b 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
+++ b/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -20,8 +20,8 @@
 #include <memory>
 #include <cstddef>
 #include "common_types.h"
-#include "common_tools.h"
 #include "tensor_type.h"
+#include "document.h"
 
 namespace kernel_selector
 {
@@ -58,6 +58,7 @@ namespace kernel_selector
                 struct val_t
                 {
                     uint32_t different_types : 1;
+                    uint32_t different_input_weights_types : 1;
                     uint32_t offset : 1;
                     uint32_t pitches : 1;
                     uint32_t batching : 1;
@@ -120,15 +121,18 @@ namespace kernel_selector
                             uint32_t fixedKenrelDivider : 1;
                             uint32_t dynamicKenrelDivider : 1;
                             uint32_t dynamicKenrelDividerWithPadding : 1;
+                            uint32_t position_sensitive : 1;
                         } pooling;
                         struct conv_t
                         {
                             uint32_t split : 1;
                             uint32_t dilation : 1;
-                            uint32_t depthwiseSeparableOpt : 1;
+                            uint32_t depthwise_separable_opt : 1;
                             uint32_t transposed : 1;
                             uint32_t quantization : 1;
                             uint32_t calibration : 1;
+                            uint32_t local : 1;
+                            uint32_t grouped : 1;
                         } conv;
                         struct fc_t {} fc;
                         struct softmax_t
@@ -171,6 +175,11 @@ namespace kernel_selector
                         {
                             uint32_t winograd : 1;
                         } reorder;
+                        struct eltwise_t
+                        {
+                            uint32_t stride : 1;
+                            uint32_t broadcast : 1;
+                        } eltwise;
                         struct lstm_gemm_t {
                             uint32_t bias : 1;
                             uint32_t hidden : 1;
@@ -178,6 +187,21 @@ namespace kernel_selector
                         struct lstm_elt_t {
                             uint32_t cell : 1;
                         } lstm_elt;
+                        struct fused_conv_eltw_t {
+                            // conv
+                            uint32_t split : 1;
+                            uint32_t dilation : 1;
+                            uint32_t depthwise_separable_opt : 1;
+                            uint32_t transposed : 1;
+                            uint32_t quantization : 1;
+                            uint32_t calibration : 1;
+                            uint32_t local : 1;
+                            uint32_t grouped : 1;
+                            // eltw
+                            uint32_t stride : 1;
+                            // fused conv eltw
+                            uint32_t rw_out_opt : 1;
+                        } fused_conv_eltw;
                     } dedicated;
                 } val;
                 uint64_t raw;
@@ -233,6 +257,8 @@ namespace kernel_selector
         void EnableAllOutputWeightsType();
         void EnableFP16Emulation() { key.restrict.val.FP16Emulation = 1; }
         void EnableDifferentTypes() { key.restrict.val.different_types = 1; }
+        void EnableDifferentInputWeightsTypes() {
+            key.restrict.val.different_input_weights_types = 1; }
         void EnableInputLayout(DataLayout l) { key.inputLayout |= (1 << l); }
         void EnableAllInputLayout() { key.inputLayout = 0xffffffff; }
         void EnableOutputLayout(DataLayout l) { key.outputLayout |= (1 << l); }
@@ -261,16 +287,32 @@ namespace kernel_selector
         void EnablePoolKernelDividerMode(KernelDividerMode m);
         void EnablePoolType(PoolType t);
         void EnablePoolRemainder(PoolRemainder r);
+        void EnablePositionSensitivePooling() { key.restrict.val.dedicated.pooling.position_sensitive = 1; }
         void EnableSplitSupport() { key.restrict.val.dedicated.conv.split = 1; }
         void EnableDilation() { key.restrict.val.dedicated.conv.dilation = 1; }
-        void EnableDepthwiseSeparableOpt() { key.restrict.val.dedicated.conv.depthwiseSeparableOpt = 1; }
+        void EnableDepthwiseSeparableOpt() { key.restrict.val.dedicated.conv.depthwise_separable_opt = 1; }
+        void EnableLocalConvolution() { key.restrict.val.dedicated.conv.local = 1; }
+        void EnableGroupedConvolution() { key.restrict.val.dedicated.conv.grouped = 1; }
         void EnableTranspose() { key.restrict.val.dedicated.conv.transposed = 1; }
         void EnableInt8Quantization() { key.restrict.val.dedicated.conv.quantization = 1; }
         void EnableOutputCalibration() { key.restrict.val.dedicated.conv.calibration = 1; }
+
+        void EnableFusedConvEltwSplitSupport() { key.restrict.val.dedicated.fused_conv_eltw.split = 1; }
+        void EnableFusedConvEltwDilation() { key.restrict.val.dedicated.fused_conv_eltw.dilation = 1; }
+        void EnableFusedConvEltwDepthwiseSeparableOpt() { key.restrict.val.dedicated.fused_conv_eltw.depthwise_separable_opt = 1; }
+        void EnableFusedConvEltwLocalConvolution() { key.restrict.val.dedicated.fused_conv_eltw.local = 1; }
+        void EnableFusedConvEltwGroupedConvolution() { key.restrict.val.dedicated.fused_conv_eltw.grouped = 1; }
+        void EnableFusedConvEltwTranspose() { key.restrict.val.dedicated.fused_conv_eltw.transposed = 1; }
+        void EnableFusedConvEltwInt8Quantization() { key.restrict.val.dedicated.fused_conv_eltw.quantization = 1; }
+        void EnableFusedConvEltwOutputCalibration() { key.restrict.val.dedicated.fused_conv_eltw.calibration = 1; }
+        void EnableFusedConvEltwEltwiseStride();
+
         void EnableWinogradReorder() { key.restrict.val.dedicated.reorder.winograd = 1; }
         void EnableSoftmaxDim(SoftmaxDim d);
         void EnableConcatAxis(ConcatAxis a);
         void EnableUpSamplingSampleType(SampleType a);
+        void EnableEltwiseStride();
+        void EnableEltwiseBroadcast() { key.restrict.val.dedicated.eltwise.broadcast = 1; }
         void EnableLSTMGEMMBias() { key.restrict.val.dedicated.lstm_gemm.bias = 1; }
         void EnableLSTMGEMMHidden() { key.restrict.val.dedicated.lstm_gemm.hidden = 1; }
         void EnableLSTMEltCell() { key.restrict.val.dedicated.lstm_elt.cell = 1; }
@@ -280,6 +322,7 @@ namespace kernel_selector
         void EnableArgMaxMinAxis(ArgMaxMinAxis a);
         void EnableLookUpTableIndicesFormat(Datatype a);
         void EnableIndexSelectAxis(IndexSelectAxis a);
+        void EnableFusedConvEltwiseRWOutOpt();
         bool Support(const ParamsKey& k) const;
         bool TuningSupport() const
         {
@@ -287,6 +330,9 @@ namespace kernel_selector
                 return true;
             return false;
         }
+        bool isEnabledDifferentInputWeightsTypes() const {
+            return key.restrict.val.different_input_weights_types ? true : false;
+        }
         ParamsKey Merge(const ParamsKey& k) const;
 
     private:
@@ -305,6 +351,7 @@ namespace kernel_selector
         bool bImageSupport = false;
         bool bIMADSupport = false;
         bool bIMMADSupport = false;
+        uint32_t computeUnitsCount = 0;
         uint64_t maxWorkGroupSize = 0;
         uint64_t maxLocalMemSize = 0;
         uint64_t maxImage2dWidth = 0;
@@ -312,6 +359,7 @@ namespace kernel_selector
         std::string deviceId = "";
         std::string driverVersion = "";
         std::string hostVersion = "";
+        std::shared_ptr<rapidjson::Document> deviceCache;
     };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -336,17 +384,31 @@ namespace kernel_selector
     };
 
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    // base_activation_params
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+    struct base_activation_params
+    {
+        ActivationFunction  function = ActivationFunction::NONE;
+        float m = 1.f;
+        float n = 0.f;
+
+        base_activation_params() = default;
+        base_activation_params(const float m, const float n) : m(m), n(n) {}
+
+        virtual std::string to_string() const;
+    };
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     // base_params
     ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
     struct base_params : public Params
     {
         virtual ~base_params() {}
 
-        ActivationFunction  activationFunc = ActivationFunction::NONE;
-        NonLinearParams     activationParams;
-        MultiDataTensor     inputs;
-        DataTensor          output;
-        bool                gradient = false;
+        base_activation_params activation;
+        MultiDataTensor        inputs;
+        DataTensor             output;
+        bool                   gradient = false;
 
         virtual std::string to_string() const;
         virtual ParamsKey GetParamsKey() const;
diff --git a/inference-engine/thirdparty/clDNN/src/CMakeLists.txt b/inference-engine/thirdparty/clDNN/src/CMakeLists.txt
index 0ba989f7d..861a09dab 100644
--- a/inference-engine/thirdparty/clDNN/src/CMakeLists.txt
+++ b/inference-engine/thirdparty/clDNN/src/CMakeLists.txt
@@ -77,6 +77,26 @@ file(GLOB __CLDNN_Headers__api__c
     "${__CLDNN_Directory__api__c}/*.hpp"
   )
 
+set(__CLDNN_Label__api_extension       "api_extension")
+file(GLOB __CLDNN_Headers__api_extension
+    "${CLDNN__API_EXTENSION_DIR}/*.h"
+    "${CLDNN__API_EXTENSION_DIR}/*.hpp"
+  )
+
+set(__CLDNN_Directory__api_extension__cpp "${CLDNN__API_EXTENSION_DIR}/CPP")
+set(__CLDNN_Label__api_extension__cpp     "${__CLDNN_Label__api_extension}\\CPP")
+file(GLOB __CLDNN_Headers__api_extension__cpp
+    "${__CLDNN_Directory__api_extension__cpp}/*.h"
+    "${__CLDNN_Directory__api_extension__cpp}/*.hpp"
+  )
+
+set(__CLDNN_Directory__api_extension__c "${CLDNN__API_EXTENSION_DIR}/C")
+set(__CLDNN_Label__api_extension__c     "${__CLDNN_Label__api_extension}\\C")
+file(GLOB __CLDNN_Headers__api_extension__c
+    "${__CLDNN_Directory__api_extension__c}/*.h"
+    "${__CLDNN_Directory__api_extension__c}/*.hpp"
+  )
+
 set(__CLDNN_Label__main                "")
 file(GLOB __CLDNN_Sources__main
     "${CMAKE_CURRENT_SOURCE_DIR}/*.h"
@@ -84,6 +104,14 @@ file(GLOB __CLDNN_Sources__main
     "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
   )
 
+set(__CLDNN_Directory__graph_opt       "${CMAKE_CURRENT_SOURCE_DIR}/graph_optimizer")
+set(__CLDNN_Label__graph_opt           "graph_optimizer")
+file(GLOB __CLDNN_Sources__graph_opt
+    "${__CLDNN_Directory__graph_opt}/*.h"
+    "${__CLDNN_Directory__graph_opt}/*.hpp"
+    "${__CLDNN_Directory__graph_opt}/*.cpp"
+  )
+
 set(__CLDNN_Directory__include         "${CMAKE_CURRENT_SOURCE_DIR}/include")
 set(__CLDNN_Label__include             "include")
 file(GLOB __CLDNN_Headers__include
@@ -146,10 +174,14 @@ set(__CLDNN_Directory__ks_cache          "${__CLDNN_Directory__ks_core}/cache")
 
 set(__CLDNN_AllSources
     ${__CLDNN_Headers__api}
+    ${__CLDNN_Sources__graph_opt}
     ${__CLDNN_Headers__include}
     ${__CLDNN_Sources__caps}
     ${__CLDNN_Headers__api__cpp}
     ${__CLDNN_Headers__api__c}
+    ${__CLDNN_Headers__api_extension}
+    ${__CLDNN_Headers__api_extension__c}
+    ${__CLDNN_Headers__api_extension__cpp}
     ${__CLDNN_Sources__main}
     ${__CLDNN_Sources__gpu}
     ${__CLDNN_Sources__cache}
@@ -161,16 +193,20 @@ set_property(SOURCE ${__CLDNN_Sources__cg_cache} PROPERTY GENERATED TRUE)
 
 # =============================================== Filters ==============================================
 
-source_group("${__CLDNN_Label__api}"        FILES ${__CLDNN_Headers__api})
-source_group("${__CLDNN_Label__api__cpp}"   FILES ${__CLDNN_Headers__api__cpp})
-source_group("${__CLDNN_Label__api__c}"     FILES ${__CLDNN_Headers__api__c})
-source_group("${__CLDNN_Label__include}"    FILES ${__CLDNN_Headers__include})
-source_group("${__CLDNN_Label__caps}"       FILES ${__CLDNN_Sources__caps})
-source_group("${__CLDNN_Label__main}"       FILES ${__CLDNN_Sources__main})
-source_group("${__CLDNN_Label__gpu}"        FILES ${__CLDNN_Sources__gpu})
-source_group("${__CLDNN_Label__cache}"      FILES ${__CLDNN_Sources__cache})
-source_group("${__CLDNN_Label__ch_kernels}" FILES ${__CLDNN_Sources__ch_kernels})
-source_group("${__CLDNN_Label__cg_cache}"   FILES ${__CLDNN_Sources__cg_cache})
+source_group("${__CLDNN_Label__api}"                  FILES ${__CLDNN_Headers__api})
+source_group("${__CLDNN_Label__api__cpp}"             FILES ${__CLDNN_Headers__api__cpp})
+source_group("${__CLDNN_Label__api__c}"               FILES ${__CLDNN_Headers__api__c})
+source_group("${__CLDNN_Label__api_extension}"        FILES ${__CLDNN_Headers__api_extension})
+source_group("${__CLDNN_Label__api_extension__cpp}"   FILES ${__CLDNN_Headers__api_extension__cpp})
+source_group("${__CLDNN_Label__api_extension__c}"     FILES ${__CLDNN_Headers__api_extension__c})
+source_group("${__CLDNN_Label__include}"              FILES ${__CLDNN_Headers__include})
+source_group("${__CLDNN_Label__graph_opt}"            FILES ${__CLDNN_Sources__graph_opt})
+source_group("${__CLDNN_Label__caps}"                 FILES ${__CLDNN_Sources__caps})
+source_group("${__CLDNN_Label__main}"                 FILES ${__CLDNN_Sources__main})
+source_group("${__CLDNN_Label__gpu}"                  FILES ${__CLDNN_Sources__gpu})
+source_group("${__CLDNN_Label__cache}"                FILES ${__CLDNN_Sources__cache})
+source_group("${__CLDNN_Label__ch_kernels}"           FILES ${__CLDNN_Sources__ch_kernels})
+source_group("${__CLDNN_Label__cg_cache}"             FILES ${__CLDNN_Sources__cg_cache})
 
 # ===================================== Include/Link directories =======================================
 
diff --git a/inference-engine/thirdparty/clDNN/src/activation.cpp b/inference-engine/thirdparty/clDNN/src/activation.cpp
index 503c7205c..f4592874c 100644
--- a/inference-engine/thirdparty/clDNN/src/activation.cpp
+++ b/inference-engine/thirdparty/clDNN/src/activation.cpp
@@ -29,6 +29,8 @@ primitive_type_id activation_type_id()
 
 layout activation_inst::calc_output_layout(activation_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for activation_node!");
     return node.input().get_non_padded_output_layout();
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/activation_grad.cpp b/inference-engine/thirdparty/clDNN/src/activation_grad.cpp
index 9d277c585..ecae7733b 100644
--- a/inference-engine/thirdparty/clDNN/src/activation_grad.cpp
+++ b/inference-engine/thirdparty/clDNN/src/activation_grad.cpp
@@ -29,6 +29,9 @@ primitive_type_id activation_grad_type_id()
 
 layout activation_grad_inst::calc_output_layout(activation_grad_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "activation_grad_node!");
     return node.input().get_non_padded_output_layout();
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/apply_adam.cpp b/inference-engine/thirdparty/clDNN/src/apply_adam.cpp
index 1b0e9d443..24b659e58 100644
--- a/inference-engine/thirdparty/clDNN/src/apply_adam.cpp
+++ b/inference-engine/thirdparty/clDNN/src/apply_adam.cpp
@@ -27,8 +27,15 @@ primitive_type_id apply_adam_type_id()
     return &instance;
 }
 
+apply_adam_node::typed_program_node(const std::shared_ptr<apply_adam> prim, program_impl& prog)
+    : parent(prim, prog)
+{
+    can_share_buffer(false); //apply adam's output initial val should be either 0 or use same buffer as mutable_data after it (no allocation needed)
+}
 layout apply_adam_inst::calc_output_layout(apply_adam_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for apply_adam_node!");
     return node.input().get_non_padded_output_layout();
 }
 
@@ -72,4 +79,4 @@ apply_adam_inst::typed_primitive_inst(network_impl& network, apply_adam_node con
     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "beta1_power format", beta1_power_format.value, "supported beta1_power formats", format::yxfb, format::bfyx);
     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "beta2_power format", beta2_power_format.value, "supported beta2_power formats", format::yxfb, format::bfyx);
 }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp b/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp
index aa2f0e40b..96cdca3e8 100644
--- a/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp
+++ b/inference-engine/thirdparty/clDNN/src/arg_max_min.cpp
@@ -31,7 +31,10 @@ namespace cldnn
 
 	layout arg_max_min_inst::calc_output_layout(arg_max_min_node const& node)
 	{
-		auto desc = node.get_primitive();
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for "
+                  "arg_max_min_node!");
+        auto desc = node.get_primitive();
 
 		auto input_layout = node.input().get_output_layout();
 
diff --git a/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp b/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp
index aed36d0e6..4badd962a 100644
--- a/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/average_unpooling.cpp
@@ -30,6 +30,9 @@ namespace cldnn
 
     layout average_unpooling_inst::calc_output_layout(average_unpooling_node const& node)
     {
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for "
+                  "average_unpooling_node!");
         auto desc = node.get_primitive();
 
         auto input_layout = node.input().get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/batch_norm.cpp b/inference-engine/thirdparty/clDNN/src/batch_norm.cpp
index 0aea3e665..2b972b1bc 100644
--- a/inference-engine/thirdparty/clDNN/src/batch_norm.cpp
+++ b/inference-engine/thirdparty/clDNN/src/batch_norm.cpp
@@ -18,6 +18,7 @@
 #include "primitive_type_base.h"
 #include "error_handler.h"
 #include "json_object.h"
+#include "mutable_data_inst.h"
 
 namespace cldnn
 {
@@ -29,40 +30,43 @@ primitive_type_id batch_norm_type_id()
 
 layout batch_norm_inst::calc_output_layout(batch_norm_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for batch_norm_node!");
     return node.input().get_non_padded_output_layout();
 }
 
 std::string batch_norm_inst::to_string(batch_norm_node const& node)
 {
-    auto desc      = node.get_primitive();
-    auto node_info = node.desc_to_json();
-    auto& mean     = node.mean();
     bool variance_term = node.variance_term();
-    auto& inv_var  = node.inv_variance();
 
     std::stringstream primitive_description;
-
     json_composite batch_norm_info;
     if (node.use_global_stats())
     {
-        batch_norm_info.add("mean_id", mean.id());
+        batch_norm_info.add("mean_id", node.mean().id());
         if (variance_term)
         {
             batch_norm_info.add("variance_id", node.variance().id());
         }
     }
+    if (node.use_scale_shift())
+    {
+        batch_norm_info.add("scale_id", node.scale().id());
+        batch_norm_info.add("shift_id", node.shift().id());
+    }
     if (node.forwad_pass())
     {
-        batch_norm_info.add("inv_var", inv_var.id());
+        batch_norm_info.add("inv_var", node.inv_variance().id());
     }
-    batch_norm_info.add("epsilon", desc->epsilon);
+    batch_norm_info.add("epsilon", node.get_primitive()->epsilon);
 
-    node_info->add("batch norm info", batch_norm_info);
-    node_info->dump(primitive_description);
+    node.desc_to_json()->add("batch norm info", batch_norm_info);
+    node.desc_to_json()->dump(primitive_description);
 
     return primitive_description.str();
 }
 
+
 batch_norm_inst::typed_primitive_inst(network_impl& network, batch_norm_node const& node)
     :parent(network, node) 
 {
@@ -71,8 +75,27 @@ batch_norm_inst::typed_primitive_inst(network_impl& network, batch_norm_node con
         auto mean_format = node.mean().get_output_layout().format;
         auto variance_format = node.variance().get_output_layout().format;
 
-        CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Mean format", mean_format.value, "supported mean formats", format::yxfb, format::bfyx);
-        CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Variance format", variance_format.value, "supported variance formats", format::yxfb, format::bfyx);
+        CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Mean format", mean_format.value, "supported mean formats", format::yxfb, format::bfyx, format::byxf);
+        CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Variance format", variance_format.value, "supported variance formats", format::yxfb, format::bfyx, format::byxf);
+
+		auto is_mean_mutable_data = node.mean().is_type<mutable_data>();
+		auto is_var_mutable_data = node.variance().is_type<mutable_data>();
+
+		CLDNN_ERROR_BOOL(node.id(), "mean and variance are not the same type", (is_mean_mutable_data != is_var_mutable_data), "");
     }
+
+	if (use_scale_shift()) {
+		auto scale_format = node.scale().get_output_layout().format;
+		auto shift_format = node.shift().get_output_layout().format;
+
+		CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Scale format", scale_format.value, "supported scale formats", format::yxfb, format::bfyx, format::byxf);
+		CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Shift format", shift_format.value, "supported shift formats", format::yxfb, format::bfyx, format::byxf);
+	}
+
+	if (forwad_pass())
+	{
+		auto is_inv_var_mutable_data = node.inv_variance().is_type<mutable_data>();
+		CLDNN_ERROR_BOOL(node.id(), "inv_variance is not mutable_data type", !is_inv_var_mutable_data, "");
+	}
+}
 }
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/batch_norm_grad.cpp b/inference-engine/thirdparty/clDNN/src/batch_norm_grad.cpp
index d30e771f8..cadcb7d8a 100644
--- a/inference-engine/thirdparty/clDNN/src/batch_norm_grad.cpp
+++ b/inference-engine/thirdparty/clDNN/src/batch_norm_grad.cpp
@@ -29,6 +29,9 @@ namespace cldnn
 
     layout batch_norm_grad_inst::calc_output_layout(parent::typed_node const& node)
     {
+        assert(
+            (bool)node.get_primitive()->output_data_type == false
+            && "Output data type forcing is not supported for batch_norm_grad_node!");
         return node.input().get_non_padded_output_layout();
     }
 
diff --git a/inference-engine/thirdparty/clDNN/src/border.cpp b/inference-engine/thirdparty/clDNN/src/border.cpp
index b07a1f9e7..2a2c5b648 100644
--- a/inference-engine/thirdparty/clDNN/src/border.cpp
+++ b/inference-engine/thirdparty/clDNN/src/border.cpp
@@ -30,22 +30,24 @@ primitive_type_id border_type_id()
 
 layout border_inst::calc_output_layout(border_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for border_node!");
     auto input_layout = node.input().get_output_layout();
     auto desc         = node.get_primitive();
 
     auto&& new_size = input_layout.size;
-    new_size += desc->left_top_sizes;
-    new_size += desc->right_bottom_sizes;
+    new_size += desc->left_top_sizes.sub({0, 0, 0, 0});
+    new_size += desc->right_bottom_sizes.sub({0, 0, 0, 0});
 
-    return {input_layout.data_type, input_layout.format, new_size};
+    return { input_layout.data_type, input_layout.format, {new_size.batch[0], new_size.feature[0], new_size.spatial[0], new_size.spatial[1]} };
 }
 
 std::string border_inst::to_string(border_node const& node)
 {
     auto desc = node.get_primitive();
 
-    const auto& left_top_sizes     = desc->left_top_sizes;
-    const auto& right_bottom_sizes = desc->right_bottom_sizes;
+    const auto& left_top_sizes     = desc->left_top_sizes.sub({0, 0, 0, 0});
+    const auto& right_bottom_sizes = desc->right_bottom_sizes.sub({0, 0, 0, 0});
     const auto& border_value       = std::to_string(desc->border_value);
 
     const char* border_type_str = "unknown";
@@ -80,8 +82,8 @@ border_inst::typed_primitive_inst(network_impl& network, border_node const& node
     const auto input_format = input_layout.format;
     const auto& input_sizes = input_layout.size;
 
-    auto lt_sizes = argument.left_top_sizes;
-    auto rb_sizes = argument.right_bottom_sizes;
+    auto lt_sizes = argument.left_top_sizes.sub({0, 0, 0, 0});
+    auto rb_sizes = argument.right_bottom_sizes.sub({0, 0, 0, 0});
     auto b_type   = argument.type;
 
     CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Input format", input_format.value, "supported border primitive input formats",
diff --git a/inference-engine/thirdparty/clDNN/src/broadcast.cpp b/inference-engine/thirdparty/clDNN/src/broadcast.cpp
index 4113e5340..d7f87383b 100644
--- a/inference-engine/thirdparty/clDNN/src/broadcast.cpp
+++ b/inference-engine/thirdparty/clDNN/src/broadcast.cpp
@@ -30,28 +30,39 @@ primitive_type_id broadcast_type_id()
 
 layout broadcast_inst::calc_output_layout(broadcast_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for broadcast_node!");
     auto input_layout = node.input().get_output_layout();
     auto desc         = node.get_primitive();
 
-    auto&& new_size = tensor::max(desc->broadcast_sizes, input_layout.size);
-    return {input_layout.data_type, input_layout.format, new_size};
+    return {input_layout.data_type, input_layout.format, desc->broadcast_sizes};
 }
 
 std::string broadcast_inst::to_string(broadcast_node const& node)
 {
-    auto desc = node.get_primitive();
+    auto desc                  = node.get_primitive();
+    auto node_info             = node.desc_to_json();
+    const auto& broadcast_sizes   = desc->broadcast_sizes;
+    const auto& broadcast_axes = desc->broadcast_axes;
+    auto& input                = node.input();
 
-    const auto& broadcast_sizes     = desc->broadcast_sizes;
+    std::stringstream primitive_description;
+    std::stringstream ss_broadcast_axes;
+
+    for (size_t i = 0; i < broadcast_axes.size(); ++i)
+    {
+        ss_broadcast_axes << broadcast_axes.at(i);
+        i != (broadcast_axes.size() - 1) ? ss_broadcast_axes << ", " : ss_broadcast_axes << "";
+    }
 
-    auto node_info  = node.desc_to_json();
-   
     json_composite broadcast_info;
-    broadcast_info.add("broadcast sizes", broadcast_sizes.to_string());
+    broadcast_info.add("input id", input.id());
+    broadcast_info.add("broadcast_sizes", broadcast_sizes.to_string());
+    broadcast_info.add("broadcast axes", ss_broadcast_axes.str());
 
     node_info->add("broadcast info", broadcast_info);
-
-    std::stringstream primitive_description;
     node_info->dump(primitive_description);
+
     return primitive_description.str();
 }
 
@@ -60,23 +71,56 @@ broadcast_inst::typed_primitive_inst(network_impl& network, broadcast_node const
 {
     auto input_layout = node.input().get_output_layout();
 
-    const auto input_format = input_layout.format;
     const auto& input_sizes = input_layout.size;
-
-    auto bc_sizes = argument.broadcast_sizes;
-
-    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Input format", input_format.value, "supported broadcast primitive input formats",
-                                  format::bfyx, format::yxfb, format::byxf);
-
-
-    // Check if sizes of broadcast are in proper range.
-    CLDNN_ERROR_TENSOR_SIZES_LESS_THAN(node.id(), "Broadcast sizes", bc_sizes, "0 value", {1, 1, 1, 1},
-                                       "Invalid broadcast size: non-positive value");
-
-    bc_sizes = tensor::max(bc_sizes, input_sizes);
-
-    // Check if sizes of broadcast are compatible with sizes of input.
-    CLDNN_ERROR_TENSOR_SIZES_NOT_DIVIDABLE(node.id(), "Broadcast sizes", bc_sizes, "input sizes", input_sizes,
+    const auto& output_sizes = argument.broadcast_sizes;
+
+    std::vector<tensor::value_type> input_dims = {input_sizes.batch[0], input_sizes.feature[0],
+                                                  input_sizes.spatial[1], input_sizes.spatial[0]};
+    std::vector<tensor::value_type> reordered_input_dims(4, 0);
+    std::set<uint16_t> existing;
+
+    const auto& broadcast_axes = node.get_primitive()->broadcast_axes;
+    size_t broadcast_axes_size = broadcast_axes.size();
+    size_t index = 0;
+    size_t input_index = broadcast_axes_size;
+
+    if (broadcast_axes_size > 4)
+    {
+        CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: broadcast_axes size should be less or equal 4.");
+    }
+    for (size_t i = 0; i < broadcast_axes_size; ++i)
+    {
+        if (broadcast_axes.at(i) >= 4)
+        {
+            CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: broadcast_axes index should be within broadcast_sizes range.");
+        }
+        if (existing.find(broadcast_axes.at(i)) != existing.end())
+        {
+            CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: Duplicate axes numbers was found in broadcast_axes.");
+        }
+        existing.insert(broadcast_axes.at(i));
+    }
+    for (size_t i = 0; i < input_index; ++i)
+    {
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Input size on dimension number " + std::to_string(i), input_dims.at(i), "", 1, "Must be equal 1.");
+    }
+    //bfyx format
+    for (size_t i = 0; i < 4; ++i)
+    {
+        if (std::find(broadcast_axes.begin(), broadcast_axes.end(), i) != broadcast_axes.end())
+        {
+            reordered_input_dims.at(i) = input_dims.at(index);
+            ++index;
+        }
+        else
+        {
+            reordered_input_dims.at(i) = input_dims.at(input_index);
+            ++input_index;
+        }
+    }
+    tensor input_sizes_to_compare(reordered_input_dims.at(0), reordered_input_dims.at(1), reordered_input_dims.at(3), reordered_input_dims.at(2));
+
+    CLDNN_ERROR_TENSOR_SIZES_NOT_DIVIDABLE(node.id(), "Broadcast sizes", output_sizes, "input sizes", input_sizes_to_compare,
                                            "Invalid broadcast size: not dividable by input size");
 }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/src/caps/public/gpu_devices.inc b/inference-engine/thirdparty/clDNN/src/caps/public/gpu_devices.inc
deleted file mode 100644
index 06c1554ba..000000000
--- a/inference-engine/thirdparty/clDNN/src/caps/public/gpu_devices.inc
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
-// Copyright (c) 2016 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-
-//HD IRIS 5XX series
-GEN_DEVICE(HD510,         0x1906, HD5XX, GEN9, GT1)
-GEN_DEVICE(HD520,         0x1916, HD5XX, GEN9, GT2)
-GEN_DEVICE(HD515,         0x191E, HD5XX, GEN9, GT2)
-GEN_DEVICE(HD530,         0x1912, HD5XX, GEN9, GT2)
-GEN_DEVICE(IRIS_540_550,  0x1926, HD5XX, GEN9, GT3)
-GEN_DEVICE(HD510,         0x1902, HD5XX, GEN9, GT1)
-GEN_DEVICE(IRIS_PRO_580,  0x193A, HD5XX, GEN9, GT4)
-GEN_DEVICE(IRIS_PRO_580,  0x193B, HD5XX, GEN9, GT4)
-GEN_DEVICE(HD530,         0x191B, HD5XX, GEN9, GT2)
-GEN_DEVICE(HD_P530,       0x191D, HD5XX, GEN9, GT2)
-GEN_DEVICE(IRIS_PRO_P580, 0x193D, HD5XX, GEN9, GT4)
-
-GEN_DEVICE(HD_500,        0x5A84, HD500_505, GEN9, GT1)
-GEN_DEVICE(HD_505,        0x5A85, HD500_505, GEN9, GT1)
-GEN_DEVICE(Joule_570x,    0x1A84, HD500_505, GEN9, GT1)
-GEN_DEVICE(Joule_550x,    0x1A85, HD500_505, GEN9, GT1)
-
-//HD IRIS 6XX series
-GEN_DEVICE(HD610,         0x5906, HD6XX, GEN9, GT1)
-GEN_DEVICE(HD620,         0x5916, HD6XX, GEN9, GT2)
-GEN_DEVICE(IRIS_PLUS_640, 0x5926, HD6XX, GEN9, GT3)
-GEN_DEVICE(IRIS_PLUS_650, 0x5927, HD6XX, GEN9, GT3)
-GEN_DEVICE(HD615,         0x591E, HD6XX, GEN9, GT2)
-GEN_DEVICE(HD610,         0x5902, HD6XX, GEN9, GT1)
-GEN_DEVICE(HD630,         0x5912, HD6XX, GEN9, GT2)
-GEN_DEVICE(HD630,         0x591B, HD6XX, GEN9, GT2)
-GEN_DEVICE(HD_P630,       0x591D, HD6XX, GEN9, GT2)
-
-//8th generation
-GEN_DEVICE(HD610,         0x3E90, HD6XX, GEN9, GT1)
-GEN_DEVICE(HD610,         0x3E93, HD6XX, GEN9, GT1)
-GEN_DEVICE(HD620,         0x3E91, HD6XX, GEN9, GT2)
-GEN_DEVICE(HD620,         0x3E92, HD6XX, GEN9, GT2)
-GEN_DEVICE(HD620,         0x3E96, HD6XX, GEN9, GT2)
-GEN_DEVICE(HD620,         0x5917, HD6XX, GEN9, GT2)
-GEN_DEVICE(HD630,         0x3EA5, HD6XX, GEN9, GT3)
-GEN_DEVICE(HD630,         0x3EA6, HD6XX, GEN9, GT3)
-GEN_DEVICE(HD630,         0x3EA7, HD6XX, GEN9, GT3)
-GEN_DEVICE(HD630,         0x3EA8, HD6XX, GEN9, GT3)
-
-GEN_DEVICE(HD605,       0x3184, HD6XX, GEN9, GT2)  
-GEN_DEVICE(HD600   ,       0x3185, HD6XX, GEN9, GT2)
-
-
-
diff --git a/inference-engine/thirdparty/clDNN/src/caps/public/gpu_enums.inc b/inference-engine/thirdparty/clDNN/src/caps/public/gpu_enums.inc
deleted file mode 100644
index b811d6895..000000000
--- a/inference-engine/thirdparty/clDNN/src/caps/public/gpu_enums.inc
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) 2017 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-
-GPU_CONFIGURATION(GT0,           0)
-GPU_CONFIGURATION(GT1,          10)
-GPU_CONFIGURATION(GT2,          20)
-GPU_CONFIGURATION(GT3,          30)
-GPU_CONFIGURATION(GT4,          40)
-GPU_CONFIGURATION(GT_UNKNOWN, 1000)
-
-GPU_MODEL(HD500_505,   505)
-GPU_MODEL(HD5XX,       599)
-GPU_MODEL(HD6XX,       699)
-GPU_MODEL(FUTURE,    10000)
-
-GPU_ARCHITECTURE(GEN9,          90)
-GPU_ARCHITECTURE(GEN_UNKNOWN, 1000)
-
diff --git a/inference-engine/thirdparty/clDNN/src/caps/public/mode.inc b/inference-engine/thirdparty/clDNN/src/caps/public/mode.inc
index d2f02afd1..965373f70 100644
--- a/inference-engine/thirdparty/clDNN/src/caps/public/mode.inc
+++ b/inference-engine/thirdparty/clDNN/src/caps/public/mode.inc
@@ -14,6 +14,5 @@
 // limitations under the License.
 */
 
-bool public_caps = true;
 bool is_imad_supported(int) { return false; }
 bool is_immad_supported(int) { return false; }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/cldnn.cpp b/inference-engine/thirdparty/clDNN/src/cldnn.cpp
index 2985bef04..a69069aa5 100644
--- a/inference-engine/thirdparty/clDNN/src/cldnn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/cldnn.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -107,45 +107,55 @@ void cldnn_change_input_layout(cldnn_topology topology, cldnn_primitive_id id, c
     });
 }
 
-void cldnn_get_primitive_ids(cldnn_topology topology, char* ids, size_t size, size_t* size_ret, cldnn_status* status)
+static void primitive_id_vector_to_char_array(
+    char* names,
+    size_t size,
+    size_t* size_ret,
+    cldnn_status* status,
+    const std::vector<primitive_id>& vec)
 {
-    return exception_handler(CLDNN_ERROR, status, [&]()
+    *size_ret = std::accumulate(
+        std::begin(vec),
+        std::end(vec),
+        size_t(1), // final zero symbol
+        [](size_t acc, const cldnn::primitive_id& id)
     {
-        SHOULD_NOT_BE_NULL(topology, "Topology");
-        auto ids_size = api_cast(topology)->get_primitives().size();
-        SHOULD_NOT_EQUAL_0(ids_size, "Primitives number");
-        auto& primitives_ids = api_cast(topology)->get_primitives_id();
-        *size_ret = std::accumulate(
-            std::begin(primitives_ids),
-            std::end(primitives_ids),
-            size_t(1), //final zero symbol
-            [](size_t acc, const cldnn::primitive_id& id)
-            {
-                return acc + id.size() + 1; // plus zero symbol
-            });
+        return acc + id.size() + 1; // plus zero symbol
+    });
 
-        if (size < *size_ret)
-        {
-            if (status) *status = CLDNN_INVALID_ARG;
-            return;
-        }
+    if (size < *size_ret)
+    {
+        if (status) *status = CLDNN_INVALID_ARG;
+        return;
+    }
 
-        size_t i = 0;
-        for (auto& id : primitives_ids)
-        {
-            // workaround for Microsoft VC++
+    size_t i = 0;
+    for (auto& id : vec)
+    {
+        // workaround for Microsoft VC++
 #if defined _MSC_VER
 #pragma warning(push)
 #pragma warning(disable: 4996)
 #endif
-            i += id.copy(ids + i, size - i - 2);
+        i += id.copy(names + i, size - i - 2);
 #if defined _MSC_VER
 #pragma warning(pop)
 #endif
-            ids[i++] = 0; // plus zero symbol
-            assert(i < size);
-        }
-        ids[i] = 0; // final zero symbol
+        names[i++] = 0; // plus zero symbol
+        assert(i < size);
+    }
+    names[i] = 0; // final zero symbol
+}
+
+void cldnn_get_primitive_ids(cldnn_topology topology, char* ids, size_t size, size_t* size_ret, cldnn_status* status)
+{
+    return exception_handler(CLDNN_ERROR, status, [&]()
+    {
+        SHOULD_NOT_BE_NULL(topology, "Topology");
+        auto ids_size = api_cast(topology)->get_primitives().size();
+        SHOULD_NOT_EQUAL_0(ids_size, "Primitives number");
+        auto&& primitives_ids = api_cast(topology)->get_primitives_id();
+        primitive_id_vector_to_char_array(ids, size, size_ret, status, primitives_ids);
     });
 }
 
@@ -206,19 +216,19 @@ cldnn_engine cldnn_create_engine(/*cldnn_engine_type*/ int32_t type, uint32_t en
 
 void cldnn_retain_engine(cldnn_engine engine, cldnn_status* status)
 {
-        exception_handler(CLDNN_ERROR, status, [&]() 
-        { 
+        exception_handler(CLDNN_ERROR, status, [&]()
+        {
             SHOULD_NOT_BE_NULL(engine, "Engine");
-            api_cast(engine)->add_ref(); 
+            api_cast(engine)->add_ref();
         });
 }
 
 void cldnn_release_engine(cldnn_engine engine, cldnn_status* status)
 {
-        exception_handler(CLDNN_ERROR, status, [&]() 
-        { 
+        exception_handler(CLDNN_ERROR, status, [&]()
+        {
             SHOULD_NOT_BE_NULL(engine, "Engine");
-            api_cast(engine)->release(); 
+            api_cast(engine)->release();
         });
 }
 
@@ -296,19 +306,19 @@ CLDNN_API int32_t cldnn_is_user_event(cldnn_event event, cldnn_status * status)
 void cldnn_retain_event(cldnn_event event, cldnn_status* status)
 {
 
-    exception_handler(CLDNN_ERROR, status, [&]() 
-    { 
+    exception_handler(CLDNN_ERROR, status, [&]()
+    {
         SHOULD_NOT_BE_NULL(event, "Event");
-        api_cast(event)->add_ref(); 
+        api_cast(event)->add_ref();
     });
 }
 
 void cldnn_release_event(cldnn_event event, cldnn_status* status)
 {
-    exception_handler(CLDNN_ERROR, status, [&]() 
-    { 
+    exception_handler(CLDNN_ERROR, status, [&]()
+    {
         SHOULD_NOT_BE_NULL(event, "Event");
-        api_cast(event)->release(); 
+        api_cast(event)->release();
     });
 }
 
@@ -447,10 +457,10 @@ void cldnn_set_network_input(cldnn_network network, cldnn_primitive_id id, cldnn
 {
     exception_handler(CLDNN_ERROR, status, [&]()
     {
+        SHOULD_NOT_BE_NULL(mem, "Mem");
         auto mem_size = api_cast(mem)->size();
         SHOULD_NOT_BE_NULL(network,     "Network");
         SHOULD_NOT_BE_NULL(id,          "Id");
-        SHOULD_NOT_BE_NULL(mem,         "Mem");
         SHOULD_NOT_EQUAL_0(mem_size,    "Memory size");
         api_cast(network)->set_input_data(id, *api_cast(mem));
     });
@@ -466,7 +476,7 @@ void cldnn_set_learning_rate(cldnn_network network, float lr, cldnn_status* stat
 
 float cldnn_get_learning_rate(cldnn_network network, cldnn_status* status)
 {
-    return exception_handler<float>(CLDNN_ERROR, status, 0, [&]() 
+    return exception_handler<float>(CLDNN_ERROR, status, 0, [&]()
     {
         return api_cast(network)->get_learning_rate();
     });
@@ -485,7 +495,7 @@ cldnn_engine cldnn_get_network_engine(cldnn_network network, cldnn_status* statu
 cldnn_program cldnn_get_network_program(cldnn_network network, cldnn_status* status)
 {
     return exception_handler<cldnn_program>(CLDNN_ERROR, status, nullptr, [&]()
-    {   
+    {
         SHOULD_NOT_BE_NULL(network, "Network");
         refcounted_obj_ptr<cldnn::program_impl> ptr{const_cast<cldnn::program_impl*>(&api_cast(network)->get_program())};
         return api_cast(ptr.detach());
@@ -509,7 +519,7 @@ void cldnn_get_primitive_info(cldnn_network network, cldnn_primitive_id prim_id,
         size_t i = 0;
         for (const auto c : prim_info)
         {
-            info[i++] = c; 
+            info[i++] = c;
             assert(i < size);
         }
         info[i] = 0; // final zero symbol
@@ -520,41 +530,10 @@ void cldnn_get_network_output_names(cldnn_network network, char* names, size_t s
 {
     exception_handler(CLDNN_ERROR, status, [&]()
     {
-        auto output_size = api_cast(network)->get_output_ids().size();
-        SHOULD_NOT_BE_NULL(network,        "Network");
-        SHOULD_NOT_EQUAL_0(output_size, "Output size");
+        SHOULD_NOT_BE_NULL(network, "Network");
         auto&& output_ids = api_cast(network)->get_output_ids();
-        *size_ret = std::accumulate(
-            std::begin(output_ids),
-            std::end(output_ids),
-            size_t(1), // final zero symbol
-            [](size_t acc, const cldnn::primitive_id& id)
-            {
-                return acc + id.size() + 1; // plus zero symbol
-            });
-
-        if(size < *size_ret)
-        {
-            if (status) *status = CLDNN_INVALID_ARG;
-            return;
-        }
-
-        size_t i = 0;
-        for(auto& id: output_ids)
-        {
-// workaround for Microsoft VC++
-#if defined _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4996)
-#endif
-            i += id.copy(names + i, size - i - 2);
-#if defined _MSC_VER
-#pragma warning(pop)
-#endif
-            names[i++] = 0; // plus zero symbol
-            assert(i < size);
-        }
-        names[i] = 0; // final zero symbol
+        SHOULD_NOT_EQUAL_0(output_ids.size(), "Output size");
+        primitive_id_vector_to_char_array(names, size, size_ret, status, output_ids);
     });
 }
 
@@ -562,41 +541,10 @@ void cldnn_get_network_executed_primitive_names(cldnn_network network, char* nam
 {
     exception_handler(CLDNN_ERROR, status, [&]()
     {
-        auto primitives_size = api_cast(network)->get_executed_primitive_ids().size();
         SHOULD_NOT_BE_NULL(network, "Network");
-        SHOULD_NOT_EQUAL_0(primitives_size, "Primitives size");
         auto&& primitive_ids = api_cast(network)->get_executed_primitive_ids();
-        *size_ret = std::accumulate(
-            std::begin(primitive_ids),
-            std::end(primitive_ids),
-            size_t(1), // final zero symbol
-            [](size_t acc, const cldnn::primitive_id& id)
-        {
-            return acc + id.size() + 1; // plus zero symbol
-        });
-
-        if (size < *size_ret)
-        {
-            if (status) *status = CLDNN_INVALID_ARG;
-            return;
-        }
-
-        size_t i = 0;
-        for (auto& id : primitive_ids)
-        {
-            // workaround for Microsoft VC++
-#if defined _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4996)
-#endif
-            i += id.copy(names + i, size - i - 2);
-#if defined _MSC_VER
-#pragma warning(pop)
-#endif
-            names[i++] = 0; // plus zero symbol
-            assert(i < size);
-        }
-        names[i] = 0; // final zero symbol
+        SHOULD_NOT_EQUAL_0(primitive_ids.size(), "Primitives size");
+        primitive_id_vector_to_char_array(names, size, size_ret, status, primitive_ids);
     });
 }
 
@@ -604,41 +552,10 @@ void cldnn_get_network_all_primitive_names(cldnn_network network, char* names, s
 {
     exception_handler(CLDNN_ERROR, status, [&]()
     {
-        auto primitives_size = api_cast(network)->get_all_primitive_ids().size();
         SHOULD_NOT_BE_NULL(network, "Network");
-        SHOULD_NOT_EQUAL_0(primitives_size, "Primitives size");
         auto&& primitive_ids = api_cast(network)->get_all_primitive_ids();
-        *size_ret = std::accumulate(
-            std::begin(primitive_ids),
-            std::end(primitive_ids),
-            size_t(1), // final zero symbol
-            [](size_t acc, const cldnn::primitive_id& id)
-        {
-            return acc + id.size() + 1; // plus zero symbol
-        });
-
-        if (size < *size_ret)
-        {
-            if (status) *status = CLDNN_INVALID_ARG;
-            return;
-        }
-
-        size_t i = 0;
-        for (auto& id : primitive_ids)
-        {
-            // workaround for Microsoft VC++
-#if defined _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4996)
-#endif
-            i += id.copy(names + i, size - i - 2);
-#if defined _MSC_VER
-#pragma warning(pop)
-#endif
-            names[i++] = 0; // plus zero symbol
-            assert(i < size);
-        }
-        names[i] = 0; // final zero symbol
+        SHOULD_NOT_EQUAL_0(primitive_ids.size(), "Primitives size");
+        primitive_id_vector_to_char_array(names, size, size_ret, status, primitive_ids);
     });
 }
 
@@ -646,41 +563,10 @@ void cldnn_get_network_all_primitive_org_names(cldnn_network network, char* name
 {
     exception_handler(CLDNN_ERROR, status, [&]()
     {
-        auto primitives_size = api_cast(network)->get_all_primitive_org_ids().size();
         SHOULD_NOT_BE_NULL(network, "Network");
-        SHOULD_NOT_EQUAL_0(primitives_size, "Primitives size");
         auto&& primitive_ids = api_cast(network)->get_all_primitive_org_ids();
-        *size_ret = std::accumulate(
-            std::begin(primitive_ids),
-            std::end(primitive_ids),
-            size_t(1), // final zero symbol
-            [](size_t acc, const cldnn::primitive_id& id)
-        {
-            return acc + id.size() + 1; // plus zero symbol
-        });
-
-        if (size < *size_ret)
-        {
-            if (status) *status = CLDNN_INVALID_ARG;
-            return;
-        }
-
-        size_t i = 0;
-        for (auto& id : primitive_ids)
-        {
-            // workaround for Microsoft VC++
-#if defined _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4996)
-#endif
-            i += id.copy(names + i, size - i - 2);
-#if defined _MSC_VER
-#pragma warning(pop)
-#endif
-            names[i++] = 0; // plus zero symbol
-            assert(i < size);
-        }
-        names[i] = 0; // final zero symbol
+        SHOULD_NOT_EQUAL_0(primitive_ids.size(), "Primitives size");
+        primitive_id_vector_to_char_array(names, size, size_ret, status, primitive_ids);
     });
 }
 
@@ -770,7 +656,7 @@ cldnn_memory cldnn_attach_memory(cldnn_layout layout, void* pointer, size_t size
     return exception_handler<cldnn_memory>(CLDNN_ERROR, status, nullptr, [&]()
     {
         cldnn::layout layout_obj(layout);
-        if (layout_obj.bytes_count() > size) 
+        if (layout_obj.bytes_count() > size)
             throw std::invalid_argument("buffer size does not match layout size");
         return api_cast(new cldnn::simple_attached_memory(layout_obj, pointer));
     });
@@ -914,6 +800,8 @@ PRIMITIVE_TYPE_ID_CALL_IMPL(deconvolution)
 PRIMITIVE_TYPE_ID_CALL_IMPL(concatenation)
 PRIMITIVE_TYPE_ID_CALL_IMPL(eltwise)
 PRIMITIVE_TYPE_ID_CALL_IMPL(fully_connected)
+PRIMITIVE_TYPE_ID_CALL_IMPL(fused_conv_bn_scale)
+PRIMITIVE_TYPE_ID_CALL_IMPL(fused_conv_eltwise)
 PRIMITIVE_TYPE_ID_CALL_IMPL(input_layout)
 PRIMITIVE_TYPE_ID_CALL_IMPL(lookup_table)
 PRIMITIVE_TYPE_ID_CALL_IMPL(lrn)
@@ -932,6 +820,7 @@ PRIMITIVE_TYPE_ID_CALL_IMPL(proposal)
 PRIMITIVE_TYPE_ID_CALL_IMPL(roi_pooling)
 PRIMITIVE_TYPE_ID_CALL_IMPL(prior_box)
 PRIMITIVE_TYPE_ID_CALL_IMPL(detection_output)
+PRIMITIVE_TYPE_ID_CALL_IMPL(detection_output_sort)
 PRIMITIVE_TYPE_ID_CALL_IMPL(normalize)
 PRIMITIVE_TYPE_ID_CALL_IMPL(generic_layer)
 PRIMITIVE_TYPE_ID_CALL_IMPL(custom_gpu_primitive)
@@ -950,3 +839,12 @@ PRIMITIVE_TYPE_ID_CALL_IMPL(tile)
 PRIMITIVE_TYPE_ID_CALL_IMPL(gemm)
 PRIMITIVE_TYPE_ID_CALL_IMPL(select)
 PRIMITIVE_TYPE_ID_CALL_IMPL(index_select)
+PRIMITIVE_TYPE_ID_CALL_IMPL(condition)
+PRIMITIVE_TYPE_ID_CALL_IMPL(pyramid_roi_align)
+PRIMITIVE_TYPE_ID_CALL_IMPL(contract)
+PRIMITIVE_TYPE_ID_CALL_IMPL(one_hot)
+PRIMITIVE_TYPE_ID_CALL_IMPL(gather)
+PRIMITIVE_TYPE_ID_CALL_IMPL(depth_to_space)
+PRIMITIVE_TYPE_ID_CALL_IMPL(shuffle_channels)
+PRIMITIVE_TYPE_ID_CALL_IMPL(strided_slice)
+PRIMITIVE_TYPE_ID_CALL_IMPL(reverse_sequence)
diff --git a/inference-engine/thirdparty/clDNN/src/concatenation.cpp b/inference-engine/thirdparty/clDNN/src/concatenation.cpp
index a7e445216..7ab7643da 100644
--- a/inference-engine/thirdparty/clDNN/src/concatenation.cpp
+++ b/inference-engine/thirdparty/clDNN/src/concatenation.cpp
@@ -29,6 +29,8 @@ primitive_type_id concatenation_type_id()
 
 layout concatenation_inst::calc_output_layout(concatenation_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for concatenation_node!");
     auto desc = node.get_primitive();
 
     auto input_layout = node.input(0).get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/condition.cpp b/inference-engine/thirdparty/clDNN/src/condition.cpp
new file mode 100644
index 000000000..58be0cd62
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/condition.cpp
@@ -0,0 +1,85 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "condition_inst.h"
+
+#include "error_handler.h"
+#include "json_object.h"
+#include "primitive_type_base.h"
+
+
+namespace cldnn
+{
+primitive_type_id condition_type_id()
+{
+    static primitive_type_base<condition> instance;
+    return &instance;
+}
+/*
+    Calc_output_layout method is called only when output layout is invalidated.
+    It means, that it is called when:
+    1) It has never been called.
+    2) Dependency has changed output layout.
+    In this both cases, we need to recalc branch_true and branch_false.
+    !* We can be sure, that this method was called AT LEAST once during graph compilation.*!
+*/
+layout condition_inst::calc_output_layout(condition_node const& node)
+{
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for condition_node!");
+    node.set_branches();
+
+    auto branch_true_output = node.get_branch_true()->get_outputs();
+    auto branch_false_output = node.get_branch_false()->get_outputs();
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Count of branch true outputs", branch_true_output.size(), "expected outputs size", 1, "Branch true should have one output.");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Count of branch false outputs", branch_false_output.size(), "expected outputs size", 1, "Branch false should have one output.");
+    
+    auto layout_true = branch_true_output.at(0)->get_output_layout();
+    auto layout_false = branch_false_output.at(0)->get_output_layout();
+    CLDNN_ERROR_LAYOUT_MISMATCH(node.id(), "Branch true output layout", layout_true, "branch false output layout", layout_false, "Layout of the branches should be the same.");
+
+    return layout_true;
+}
+
+std::string condition_inst::to_string(condition_node const& node)
+{
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+    json_composite condition_info;
+
+    node_info->add("condition info", condition_info);
+
+    std::stringstream primitive_description;
+    node_info->dump(primitive_description);
+    return primitive_description.str();
+}
+
+/*
+Condition primitive is resuing memory with the input.
+*/
+condition_inst::typed_primitive_inst(network_impl& network, condition_node const& node)
+    : parent(network, node)
+    , _net_true(node.get_program().get_engine().allocate_network(*node.get_branch_true(), true))
+    , _net_false(node.get_program().get_engine().allocate_network(*node.get_branch_false(), true))
+{
+    auto compare_tensor = node.compare().get_output_layout().size;
+    auto input_tensor = node.input().get_output_layout().size;
+    CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(), "Compare tensor", compare_tensor, "input tensor", input_tensor, "Compare primitive is too big.");
+
+    auto compare_with_offster_tensor = compare_tensor + node.offset();
+    CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(), "Offset with compare tensor", compare_with_offster_tensor, "input tensor", input_tensor, "Offset is too big.");
+
+}
+}
diff --git a/inference-engine/thirdparty/clDNN/src/constants_propagator.cpp b/inference-engine/thirdparty/clDNN/src/constants_propagator.cpp
deleted file mode 100644
index 2a6cdad7a..000000000
--- a/inference-engine/thirdparty/clDNN/src/constants_propagator.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
-// Copyright (c) 2017 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-#include "constants_propagator.h"
-#include "engine_impl.h"
-#include "program_impl.h"
-#include "network_impl.h"
-#include "memory_impl.h"
-
-#include "api/CPP/input_layout.hpp"
-
-using namespace cldnn;
-
-constants_propagator::constants_propagator(program_impl::ptr program) : prog(program)
-{
-}
-
-void constants_propagator::visit_node(program_node& node)
-{
-    if (node.is_constant())
-        handle_constant(node);
-}
-
-std::list<std::pair<primitive_id, memory_impl::ptr>> constants_propagator::calculate()
-{
-    if (!has_non_trivial_constants)
-        return{};
-
-    build_options bo;
-    bo.set_option(build_option::optimize_data(false));
-    bo.set_option(build_option::outputs(const_outputs));
-    network_impl::ptr net = prog->get_engine().build_network(tpl, bo, true);
-    for (auto& cin : const_inputs)
-        net->set_input_data(cin->id(), cin->get_attached_memory());
-
-    net->execute({});
-    net->reset_execution(true); //wait for computations to complete
-    auto outputs = net->get_outputs();
-
-    std::list<std::pair<primitive_id, memory_impl::ptr>> ret;
-    for (auto& out : outputs)
-        ret.push_back({ out->id(), &out->output_memory() });
-
-    return ret;
-}
-
-void constants_propagator::handle_constant(program_node& node)
-{
-    if (!node.is_type<data>())
-    {
-        add_constant(node);
-        if (node.has_non_const_user())
-            const_outputs.push_back(node.id());
-    }
-}
-
-void constants_propagator::add_constant(program_node& node)
-{
-    if (node.is_type<data>())
-        return;
-
-    tpl.add(node.desc);
-    has_non_trivial_constants = true;
-
-    //if a node is either an endpoint or an output, always add it as an output
-    if (node.is_endpoint() || node.is_output())
-        const_outputs.push_back(node.id());
-
-    //if a non-tirivial constant has a trivial input, add this input as an input for our network
-    add_deps_to_tpl(node.get_dependencies());
-}
-
-void constants_propagator::add_deps_to_tpl(const std::vector<program_node*>& deps)
-{
-     /*   
-        Nodes can share dependencies, if we already have dep in tpl, don't add it again.
-        example:          
-            C   <--- shared dep
-           / \
-          /   \
-         A     B
-     */
-    for (auto& dep : deps)
-    {
-        if (dep->is_type<data>())
-        {
-            if (is_already_in_tpl(dep->id())) continue;
-            tpl.add(std::make_shared<input_layout>(dep->id(), dep->as<data>().get_primitive()->mem.get_layout()));
-            const_inputs.push_back(&dep->as<data>());
-        }
-    }
-}
-
-bool constants_propagator::is_already_in_tpl(const primitive_id& id)
-{
-    for (auto const& id_in_tpl : tpl.get_primitives_id())
-    {
-        if (id == id_in_tpl) return true;
-    }
-    return false;
-}
-\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/contract.cpp b/inference-engine/thirdparty/clDNN/src/contract.cpp
new file mode 100644
index 000000000..020f40434
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/contract.cpp
@@ -0,0 +1,130 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "contract_inst.h"
+
+#include "error_handler.h"
+#include "json_object.h"
+#include "primitive_type_base.h"
+
+
+namespace cldnn
+{
+    primitive_type_id contract_type_id()
+    {
+        static primitive_type_base<contract> instance;
+        return &instance;
+    }
+
+    layout contract_inst::calc_output_layout(contract_node const& node)
+    {
+        auto input_layout = node.input().get_output_layout();
+        const auto& input_sizes = input_layout.size;
+        auto desc = node.get_primitive();
+        auto reduction_axes = desc->reduction_axes;
+
+        std::vector<tensor::value_type> input_dims = { input_sizes.batch[0], input_sizes.feature[0],
+            input_sizes.spatial[1], input_sizes.spatial[0] };
+        std::vector<tensor::value_type> output_sizes(4, 0);
+        int cur_dim = 3;
+        for (int i = 3; i >= 0; --i)
+        {
+            while (std::find(reduction_axes.begin(), reduction_axes.end(), cur_dim) != reduction_axes.end() && cur_dim >= 0)
+                --cur_dim;
+            output_sizes.at(i) = cur_dim >= 0 ? input_dims.at(cur_dim--) : 1;
+        }
+
+        return { input_layout.data_type, input_layout.format, cldnn::tensor(output_sizes[0], output_sizes[1], output_sizes[3], output_sizes[2]) };
+    }
+
+    std::string contract_inst::to_string(contract_node const& node)
+    {
+        auto desc = node.get_primitive();
+        auto node_info = node.desc_to_json();
+        const auto& reduction_axes = desc->reduction_axes;
+        auto& input = node.input();
+
+        std::stringstream primitive_description;
+        std::stringstream ss_reduction_axes;
+
+        for (size_t i = 0; i < reduction_axes.size(); ++i)
+        {
+            ss_reduction_axes << reduction_axes.at(i);
+            i != (reduction_axes.size() - 1) ? ss_reduction_axes << ", " : ss_reduction_axes << "";
+        }
+
+        std::string str_mode;
+        switch (desc->mode)
+        {
+        case contract_mode::sum:
+            str_mode = "sum";
+            break;
+        case contract_mode::prod:
+            str_mode = "product";
+            break;
+        case contract_mode::all:
+            str_mode = "all";
+            break;
+        case contract_mode::any:
+            str_mode = "any";
+            break;
+        case contract_mode::max:
+            str_mode = "max";
+            break;
+        default:
+            str_mode = "not supported mode";
+            break;
+        }
+
+        json_composite contract_info;
+        contract_info.add("input id", input.id());
+        contract_info.add("mode", str_mode);
+        contract_info.add("reduction axes", ss_reduction_axes.str());
+
+        node_info->add("contract info", contract_info);
+        node_info->dump(primitive_description);
+
+        return primitive_description.str();
+    }
+
+    contract_inst::typed_primitive_inst(network_impl& network, contract_node const& node)
+        : parent(network, node)
+    {
+        std::set<uint16_t> existing;
+        const auto& reduction_axes = node.get_primitive()->reduction_axes;
+        size_t reduction_axes_size = reduction_axes.size();
+
+        if (reduction_axes.empty())
+        {
+            CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: reduction_axes should not be empty.");
+        }
+        if (reduction_axes_size > 4)
+        {
+            CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: reduction_axes size should be less or equal 4.");
+        }
+        for (size_t i = 0; i < reduction_axes_size; ++i)
+        {
+            if (reduction_axes.at(i) >= 4)
+            {
+                CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: reduction_axes index should be within reduction_axes range.");
+            }
+            if (existing.find(reduction_axes.at(i)) != existing.end())
+            {
+                CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: Duplicate axes numbers was found in reduction_axes.");
+            }
+            existing.insert(reduction_axes.at(i));
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/convolution.cpp b/inference-engine/thirdparty/clDNN/src/convolution.cpp
index cdb6ff27c..fcdda7f99 100644
--- a/inference-engine/thirdparty/clDNN/src/convolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/convolution.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -31,6 +31,8 @@ primitive_type_id convolution_type_id()
 
 layout convolution_inst::calc_output_layout(convolution_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for convolution_node!");
     auto desc = node.get_primitive();
 
     auto input_layout = node.input().get_output_layout();
@@ -103,8 +105,16 @@ layout convolution_inst::calc_output_layout(convolution_node const& node)
     auto output_range = calc_sliding_window_output_range<swor_mode::all>(
         input_layout.size, filter_size, input_offset, stride, dilation, true, 1);
 
-    tensor output_size(input_layout.size.batch[0], number_of_features,
-                       output_range.spatial[0], output_range.spatial[1]);
+    tensor::value_type output_features = desc->output_size.feature[0] != 0 ? desc->output_size.feature[0] : number_of_features;
+    tensor output_size = tensor(input_layout.size.batch[0], output_features,
+        output_range.spatial[0], output_range.spatial[1]);
+
+    // due to performance reason for using fs_bs_yx_bsv4_fsv32 first convolution have 3 features, so first conv layer will take byxf and return fs_bs_yx_bsv4_fsv32
+    if (input_layout.data_type == data_types::i8 && input_layout.format == format::byx8_f4 && input_layout.size.batch[0] % 4 == 0 && input_layout.size.feature[0] == 3)
+    {
+        return layout{ input_layout.data_type, cldnn::format::fs_bs_yx_bsv4_fsv32, output_size };
+    }
+
     return { input_layout.data_type, input_layout.format, output_size };
 }
 
@@ -122,6 +132,8 @@ std::string convolution_inst::to_string(convolution_node const& node)
     json_composite conv_info;
     conv_info.add("stride", strd.to_string());
     conv_info.add("input offset", desc->input_offset.to_string());
+    conv_info.add("padding above", desc->padding_above.to_string());
+    conv_info.add("padding below", desc->padding_below.to_string());
     conv_info.add("split", split);
     conv_info.add("dilation", dilation.to_string());
     conv_info.add("with activation", activation);
@@ -148,8 +160,8 @@ convolution_inst::typed_primitive_inst(network_impl& network, convolution_node c
     auto output_inst = node.get_output_layout();
     auto output_size = output_inst.size;
 
-    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input number of dimensions", input_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Input/output dims mismtach");
-    CLDNN_ERROR_NOT_EQUAL(node.id(), "Stride number of dimensions", stride.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "stride/output dims mismtach");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input number of dimensions", input_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Input/output dims mismatch");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Stride number of dimensions", stride.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "stride/output dims mismatch");
 
     auto split = node.get_split();
     for (decltype(split) j = 0; j < split; j++)
@@ -162,18 +174,24 @@ convolution_inst::typed_primitive_inst(network_impl& network, convolution_node c
             CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias feature[0]", bias_inst.size.feature[0], "expected size of feature", 1, "Biases isn't 1D vector.");
             CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[1]", bias_inst.size.spatial[1], "expected size of spatial[1]", 1, "Biases isn't 1D vector.");
           
-            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[0]", bias_inst.size.spatial[0], "expected feature map number", output_size.feature[0] / split, "Bias/fm mismtach");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[0]", bias_inst.size.spatial[0], "expected feature map number", output_size.feature[0] / split, "Bias/fm mismatch");
         }
 
         auto input_offset = argument.input_offset;
 
-        CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights number of dimensions", filter_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Weights/output dims mismtach");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights number of dimensions", filter_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Weights/output dims mismatch");
         CLDNN_ERROR_NOT_EQUAL(node.id(), "Convolution padding mode", node.get_output_layout().data_padding.filling_value(), "padding value", 0.0f, "Unknown padding mode.");
-        CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset number of dimensions", input_offset.raw.size(), "input number of dimensions", input_inst.size.raw.size(), "Input offset/ input size mismtach");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset number of dimensions", input_offset.raw.size(), "input number of dimensions", input_inst.size.raw.size(), "Input offset/ input size mismatch");
         CLDNN_ERROR_NOT_EQUAL(node.id(), "Output feature size", output_size.feature.size(), "expected feature size", 1, "Only one-dimensional features are supported");
         CLDNN_ERROR_NOT_EQUAL(node.id(), "Output batch size", output_size.batch.size(), "expected output size", 1, "Only one-dimensional batch size are supported");
         CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights spatial size", filter_inst.size.spatial.size(), "expected weights spatial size", 2, "Weights have to have 2 dimensions in spatial domain.");
-        CLDNN_ERROR_LESS_THAN(node.id(), "Weights feature maps number", (input_inst.size.feature[0] - input_offset.feature[0]) / split, "input feature maps number", filter_inst.size.feature[0], "Weights/ifm mismtach");
+        CLDNN_ERROR_LESS_THAN(node.id(), "Weights feature maps number", (input_inst.size.feature[0] - input_offset.feature[0]) / split, "input feature maps number", filter_inst.size.feature[0], "Weights/ifm mismatch");
+        if (filter_inst.format == format::bf_lyx_yx) // local convolution
+        {
+            auto local = filter_inst.size.local;
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Number of local x dimension", local[0], "output x dimension", output_inst.size.spatial[0], "Weights/output dims mismatch");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Number of local y dimension", local[1], "output y dimension", output_inst.size.spatial[1], "Weights/output dims mismatch");
+        }
     }
 }
 }
diff --git a/inference-engine/thirdparty/clDNN/src/convolution_grad_weights.cpp b/inference-engine/thirdparty/clDNN/src/convolution_grad_weights.cpp
index e8d711645..90be77cb8 100644
--- a/inference-engine/thirdparty/clDNN/src/convolution_grad_weights.cpp
+++ b/inference-engine/thirdparty/clDNN/src/convolution_grad_weights.cpp
@@ -31,9 +31,16 @@ primitive_type_id convolution_grad_weights_type_id()
 
 layout convolution_grad_weights_inst::calc_output_layout(convolution_grad_weights_node const& node)
 {
-    //output buffer will not be used in this primitive
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "convolution_grad_weights_node!");
+    //output buffer will not be used in this primitive unless output gradient weights is set
     auto input_grad_layout_size = node.input(0).get_output_layout();
-    return{ input_grad_layout_size.data_type, input_grad_layout_size.format, { 1, 1, 1, 1 } };
+    tensor output_sizes = { 1, 1, 1, 1 };
+    if (node.output_grad_w())
+        output_sizes = node.weights().get_output_layout().size;
+
+    return{ input_grad_layout_size.data_type, input_grad_layout_size.format, output_sizes };
 }
 
 std::string convolution_grad_weights_inst::to_string(convolution_grad_weights_node const& node)
diff --git a/inference-engine/thirdparty/clDNN/src/crop.cpp b/inference-engine/thirdparty/clDNN/src/crop.cpp
index 01c2e2d4b..e8463face 100644
--- a/inference-engine/thirdparty/clDNN/src/crop.cpp
+++ b/inference-engine/thirdparty/clDNN/src/crop.cpp
@@ -30,23 +30,54 @@ primitive_type_id crop_type_id()
 
 layout crop_inst::calc_output_layout(crop_node const& node)
 {
-    auto input_layout = node.input().get_output_layout();
-    auto result = layout({ input_layout.data_type, input_layout.format, node.get_primitive()->reference_input });
-    return result;
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for crop_node!");
+    const auto& ref_in_sizes = node.get_primitive()->reference_input;
+    const auto in_layout     = node.input().get_output_layout();
+    const auto& in_sizes     = in_layout.size;
+    const auto& offsets      = node.get_primitive()->offsets;
+
+    // Check for borders variant of crop.
+    if (ref_in_sizes.batch[0] < 0 || ref_in_sizes.feature[0] < 0 ||
+        ref_in_sizes.spatial[0] < 0 || ref_in_sizes.spatial[1] < 0)
+    {
+        // Ignore not supported dimensions.
+        const auto rb_sizes = ref_in_sizes.negate().sub({0, 0, 0, 0});
+        const auto lt_sizes = offsets.sub({0, 0, 0, 0});
+
+        const auto out_sizes = in_sizes - (rb_sizes + lt_sizes);
+
+        return layout({in_layout.data_type, in_layout.format, out_sizes});
+    }
+    return layout({in_layout.data_type, in_layout.format, ref_in_sizes});
 }
 
 std::string crop_inst::to_string(crop_node const& node)
 {
-    auto desc       = node.get_primitive();
-    auto offsets    = desc->offsets;
+    const auto& desc       = node.get_primitive();
+    auto ref_in_sizes      = desc->reference_input;
+    const auto& offsets    = desc->offsets;
+    const auto in_layout   = node.input().get_output_layout();
+    const auto& in_sizes   = in_layout.size;
+
     auto node_info  = node.desc_to_json();
-    auto ref_input  = desc->reference_input;
-    
+
+        // Check for borders variant of crop.
+    if (ref_in_sizes.batch[0] < 0 || ref_in_sizes.feature[0] < 0 ||
+        ref_in_sizes.spatial[0] < 0 || ref_in_sizes.spatial[1] < 0)
+    {
+        // Ignore not supported dimensions.
+        const auto rb_sizes = ref_in_sizes.negate().sub({0, 0, 0, 0});
+        const auto lt_sizes = offsets.sub({0, 0, 0, 0});
+
+        ref_in_sizes = in_sizes - (rb_sizes + lt_sizes);
+    }
+
     std::stringstream primitive_description;
 
     json_composite crop_info;
-    crop_info.add("reference input", ref_input.to_string());
-    crop_info.add("offset", offsets.to_string());    
+    crop_info.add("reference input size", ref_in_sizes.to_string());
+    crop_info.add("offset",               offsets.to_string());
 
     node_info->add("crop info", crop_info);
     node_info->dump(primitive_description);
@@ -55,23 +86,39 @@ std::string crop_inst::to_string(crop_node const& node)
 }
 
 crop_inst::typed_primitive_inst(network_impl& network, crop_node const& node)
-    :parent(network, node)
+    : parent(network, node)
 {
-    auto reference_input_sizes = argument.reference_input;
-    auto inp_layout = node.input().get_output_layout();
-    auto input_sizes = inp_layout.size;
-    auto input_format = inp_layout.format;
-    auto offsets = argument.offsets;
+    const auto& ref_in_sizes = argument.reference_input;
+    const auto in_layout     = node.input().get_output_layout();
+    const auto& in_sizes     = in_layout.size;
+    const auto in_format     = in_layout.format;
+    const auto& offsets      = argument.offsets;
+
+    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Input format", in_format.value, "supported crop input formats", format::yxfb, format::bfyx, format::fyxb);
+
+    // Check for borders variant of crop.
+    if (ref_in_sizes.batch[0] < 0 || ref_in_sizes.feature[0] < 0 ||
+        ref_in_sizes.spatial[0] < 0 || ref_in_sizes.spatial[1] < 0)
+    {
+        // Ignore not supported dimensions.
+        const auto rb_sizes = ref_in_sizes.negate().sub({0, 0, 0, 0});
+        const auto lt_sizes = offsets.sub({0, 0, 0, 0});
+
+        const auto out_sizes = in_sizes - (rb_sizes + lt_sizes);
 
-    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Input format", input_format.value, "supported crop input formats", format::yxfb, format::bfyx );
+        CLDNN_ERROR_TENSOR_SIZES_LESS_THAN(node.id(), "Left/top/lower borders",     lt_sizes, "0 value", {}, "Invalid border size: negative");
+        CLDNN_ERROR_TENSOR_SIZES_LESS_THAN(node.id(), "Right/bottom/upper borders", rb_sizes, "0 value", {}, "Invalid border size: negative");
+
+        CLDNN_ERROR_TENSOR_SIZES_LESS_THAN(node.id(), "Input sizes - border sizes", out_sizes, "1 value", {1, 1, 1, 1}, "Invalid border sizes: greater-equal input sizes");
+    }
 
     //check if output sizes matches reference input sizes
-    CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(), "Reference input", reference_input_sizes, "input sizes", input_sizes, "Reference input tensor/ input tensor mismtach");
-    
+    CLDNN_ERROR_TENSOR_SIZES_GREATER_THAN(node.id(), "Reference input", ref_in_sizes, "input sizes", in_sizes, "Reference input tensor/ input tensor mismtach");
+
     //check if offsets do not extend input sizes and if match the output sizes
-    CLDNN_ERROR_TENSOR_SIZES_LESS_THAN(node.id(), "Batch offsets", offsets, "0 value", { 0, 0, 0, 0 }, "Invalid Batch offset: negative value");
-    auto input_size_sub_offsets = input_sizes - offsets;
-    CLDNN_ERROR_TENSOR_SIZES_LESS_THAN(node.id(), "input sizes - offsets", input_size_sub_offsets, "reference input sizes", reference_input_sizes, "Invalid Batch offset: exceeds data for output!");
+    CLDNN_ERROR_TENSOR_SIZES_LESS_THAN(node.id(), "Batch offsets", offsets, "0 value", {}, "Invalid Batch offset: negative value");
+    auto input_size_sub_offsets = in_sizes - offsets;
+    CLDNN_ERROR_TENSOR_SIZES_LESS_THAN(node.id(), "input sizes - offsets", input_size_sub_offsets, "reference input sizes", ref_in_sizes, "Invalid Batch offset: exceeds data for output!");
 
     if (node.can_be_optimized())
     {
@@ -96,4 +143,4 @@ void crop_inst::reuse_input()
 {
     _output = _network.get_engine().reinterpret_buffer(input_memory(), node.get_output_layout());
 }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/src/data.cpp b/inference-engine/thirdparty/clDNN/src/data.cpp
index cccb4e9ed..7b7702e1b 100644
--- a/inference-engine/thirdparty/clDNN/src/data.cpp
+++ b/inference-engine/thirdparty/clDNN/src/data.cpp
@@ -48,6 +48,7 @@ data_node::typed_program_node(const std::shared_ptr<data> dprim, program_impl& p
     : parent(dprim, prog), mem(api_cast(dprim->mem.get()))
 {
     constant = true;
+    can_share_buffer(false);
     recalc_output_layout(false);
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
index 563ff8fc1..6c7dad9ab 100644
--- a/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
+++ b/inference-engine/thirdparty/clDNN/src/deconvolution.cpp
@@ -31,6 +31,8 @@ primitive_type_id deconvolution_type_id()
 
 layout deconvolution_inst::calc_output_layout(deconvolution_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for deconvolution_node!");
     auto desc = node.get_primitive();
 
     auto input_layout = node.input().get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp b/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp
new file mode 100644
index 000000000..7c0b5f047
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/depth_to_space.cpp
@@ -0,0 +1,78 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "depth_to_space_inst.h"
+
+#include "primitive_type_base.h"
+#include "error_handler.h"
+#include "json_object.h"
+
+namespace cldnn
+{
+primitive_type_id depth_to_space_type_id()
+{
+    static primitive_type_base<depth_to_space> instance;
+    return &instance;
+}
+
+layout depth_to_space_inst::calc_output_layout(depth_to_space_node const& node)
+{
+    auto desc = node.get_primitive();
+
+    auto input_layout = node.input(0).get_output_layout();
+    auto input_format = input_layout.format;
+
+    const size_t block_size = desc->block_size;
+
+    if (block_size < 2)
+        CLDNN_ERROR_MESSAGE(node.id(), "Invalid depthToSpace block_size value (should equal at least two). Actual block size is" +
+            std::to_string(block_size));
+
+    if (input_layout.size.feature[0] % (block_size * block_size) != 0)
+        CLDNN_ERROR_MESSAGE(node.id(), "The depth of the input tensor must be divisible by squared block size. Actual block size is " +
+            std::to_string(block_size));
+
+    const size_t feature = input_layout.size.feature[0] / block_size / block_size;
+    const size_t y = input_layout.size.spatial[1] * block_size;
+    const size_t x = input_layout.size.spatial[0] * block_size;
+
+    return layout{input_layout.data_type, input_format, tensor(TensorValue(input_layout.size.batch[0]), TensorValue(feature), TensorValue(x), TensorValue(y))};
+}
+
+std::string depth_to_space_inst::to_string(depth_to_space_node const& node)
+{
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+    auto& input = node.input();
+
+    std::stringstream primitive_description;
+
+    json_composite depth_to_space_info;
+    depth_to_space_info.add("input id", input.id());
+    depth_to_space_info.add("block size", desc->block_size);
+
+    node_info->add("depth_to_space info", depth_to_space_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+depth_to_space_inst::typed_primitive_inst(network_impl& network, depth_to_space_node const& node)
+    : parent(network, node)
+{
+}
+
+}
diff --git a/inference-engine/thirdparty/clDNN/src/detection_output.cpp b/inference-engine/thirdparty/clDNN/src/detection_output.cpp
index e8fa392a9..4d121df07 100644
--- a/inference-engine/thirdparty/clDNN/src/detection_output.cpp
+++ b/inference-engine/thirdparty/clDNN/src/detection_output.cpp
@@ -30,16 +30,48 @@ primitive_type_id detection_output_type_id()
 
 layout detection_output_inst::calc_output_layout(detection_output_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "detection_output_node!");
     CLDNN_ERROR_NOT_EQUAL(node.id(), "Detection output layer input number", node.get_dependencies().size(), "expected number of inputs", static_cast<size_t>(3), "");
 
     auto input_layout = node.location().get_output_layout();
 
     // Batch size and feature size are 1.
-    // Number of bounding boxes to be kept is set to keep_top_k*batch size. 
-    // If number of detections is lower than keep_top_k, will write dummy results at the end with image_id=-1. 
+    // Number of bounding boxes to be kept is set to keep_top_k*batch size.
+    // If number of detections is lower than top_k, will write dummy results at the end with image_id=-1.
     // Each row is a 7 dimension vector, which stores:
     // [image_id, label, confidence, xmin, ymin, xmax, ymax]
-    return{ input_layout.data_type, cldnn::format::bfyx, cldnn::tensor(1, 1, DETECTION_OUTPUT_ROW_SIZE, node.get_primitive()->keep_top_k * input_layout.size.batch[0]) };
+    int output_size = (int)input_layout.get_linear_size() / PRIOR_BOX_SIZE;
+    int num_classes = node.get_primitive()->num_classes;
+
+    if (node.get_primitive()->share_location)
+    {
+        num_classes = (node.get_primitive()->background_label_id == 0) ? node.get_primitive()->num_classes - 1 : node.get_primitive()->num_classes;
+        output_size *= num_classes;
+    }
+
+    if (node.get_primitive()->top_k != -1)
+    {
+        int top_k = node.get_primitive()->top_k * num_classes * input_layout.size.batch[0];
+        if (top_k < output_size)
+        {
+            output_size = top_k;
+        }
+    }
+
+    output_size *= DETECTION_OUTPUT_ROW_SIZE;
+    // Add space for number of output results per image - needed in the next detection output step
+    output_size += ((input_layout.size.batch[0] + 15) / 16) * 16;
+
+    if (node.get_program().get_options().get<build_option_type::detection_output_gpu>()->enabled())
+    {
+        return{ input_layout.data_type, cldnn::format::bfyx, cldnn::tensor(1, 1, 1, output_size) };
+    }
+    else
+    {
+        return{ input_layout.data_type, cldnn::format::bfyx, cldnn::tensor(1, 1, DETECTION_OUTPUT_ROW_SIZE, node.get_primitive()->keep_top_k * input_layout.size.batch[0]) };
+    }
 }
 
 std::string detection_output_inst::to_string(detection_output_node const& node)
@@ -50,12 +82,13 @@ std::string detection_output_inst::to_string(detection_output_node const& node)
     auto variance_encoded    = desc->variance_encoded_in_target ? "true" : "false";
     auto prior_is_normalized = desc->prior_is_normalized ? "true" : "false";
     auto decrease_label_id   = desc->decrease_label_id ? "true" : "false";
-    auto clip                = desc->clip ? "true" : "false";
+    auto clip_before_nms     = desc->clip_before_nms ? "true" : "false";
+    auto clip_after_nms      = desc->clip_after_nms ? "true" : "false";
     auto& input_location     = node.location();
     auto& input_prior_box    = node.prior_box();
     auto& input_confidence   = node.confidence();
 
-    
+
     std::stringstream primitive_description;
     std::string       str_code_type;
 
@@ -74,7 +107,7 @@ std::string detection_output_inst::to_string(detection_output_node const& node)
         str_code_type = "not supported code type";
         break;
     }
-    
+
     json_composite detec_out_info;
     detec_out_info.add("input location id", input_location.id());
     detec_out_info.add("input confidence id", input_confidence.id());
@@ -95,7 +128,8 @@ std::string detection_output_inst::to_string(detection_output_node const& node)
     detec_out_info.add("input_width", desc->input_width);
     detec_out_info.add("input_height", desc->input_height);
     detec_out_info.add("decrease_label_id", decrease_label_id);
-    detec_out_info.add("clip", clip);
+    detec_out_info.add("clip_before_nms", clip_before_nms);
+    detec_out_info.add("clip_after_nms", clip_after_nms);
     detec_out_info.dump(primitive_description);
 
     node_info->add("dection output info", detec_out_info);
@@ -125,11 +159,77 @@ detection_output_inst::typed_primitive_inst(network_impl& network, detection_out
     auto desc              = node.get_primitive();
     int prior_feature_size = desc->variance_encoded_in_target ? 1 : 2;
     tensor prior_box_size = prior_box_layout.size;
-    CLDNN_ERROR_NOT_EQUAL(node.id(), "Prior box batch size", prior_box_size.batch[0], "expected value", 1, "");
     CLDNN_ERROR_NOT_EQUAL(node.id(), "Prior box spatial X", prior_box_size.spatial[0], "expected value", 1, "");
     CLDNN_ERROR_NOT_EQUAL(node.id(), "Prior box feature size", prior_box_size.feature[0], "expected value", prior_feature_size, "");
 
     CLDNN_ERROR_BOOL(node.id(), "Detection output layer padding", node.is_padded(), "Detection output layer doesn't support output padding.");
     CLDNN_ERROR_BOOL(node.id(), "Detection output layer Prior-box input padding", node.get_dependency(2).is_padded(), "Detection output layer doesn't support input padding in Prior-Box input");
 }
+
+/************************ Detection Output keep_top_k part ************************/
+
+primitive_type_id detection_output_sort_type_id()
+{
+    static primitive_type_base<detection_output_sort> instance;
+    return &instance;
+}
+
+layout detection_output_sort_inst::calc_output_layout(detection_output_sort_node const& node)
+{
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "detection_output_sort_node!");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Detection output layer input number", node.get_dependencies().size(), "expected number of inputs", static_cast<size_t>(1), "");
+
+    auto input_layout = node.input().get_output_layout();
+    int keep_top_k = node.as<detection_output_sort>().get_primitive()->keep_top_k;
+    int num_images = node.as<detection_output_sort>().get_primitive()->num_images;
+
+    // If detection output sort is used as a second part of detection output get proper info from detection otput node
+    if (num_images == 0)
+    {
+        CLDNN_ERROR_BOOL(node.id(), "node.get_dependency(0).is_type<detection_output>()", !node.get_dependency(0).is_type<detection_output>(), "Cannot calculate output layout.");
+        input_layout = node.get_dependency(0).as<detection_output>().location().get_output_layout();
+        keep_top_k = node.get_dependency(0).as<detection_output>().get_primitive()->keep_top_k;
+        num_images = input_layout.size.batch[0];
+    }
+    // Batch size and feature size are 1.
+    // Number of bounding boxes to be kept is set to keep_top_k*batch size.
+    // If number of detections is lower than keep_top_k, will write dummy results at the end with image_id=-1.
+    // Each row is a 7 dimension vector, which stores:
+    // [image_id, label, confidence, xmin, ymin, xmax, ymax]
+    return{ input_layout.data_type, cldnn::format::bfyx, cldnn::tensor(1, 1, DETECTION_OUTPUT_ROW_SIZE, keep_top_k * num_images) };
+}
+
+std::string detection_output_sort_inst::to_string(detection_output_sort_node const& node)
+{
+    auto node_info = node.desc_to_json();
+    auto desc = node.get_primitive();
+
+    auto& input_bboxes = node.input();
+
+    std::stringstream primitive_description;
+
+    json_composite detec_out_info;
+    detec_out_info.add("input bboxes id", input_bboxes.id());
+    detec_out_info.add("num_classes:", desc->num_images);
+    detec_out_info.add("num_classes:", desc->num_classes);
+    detec_out_info.add("keep_top_k", desc->keep_top_k);
+    detec_out_info.add("share_location", desc->share_location);
+    detec_out_info.add("top_k", desc->top_k);
+    detec_out_info.dump(primitive_description);
+
+    node_info->add("dection output info", detec_out_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+detection_output_sort_inst::typed_primitive_inst(network_impl& network, detection_output_sort_node const& node)
+    :parent(network, node)
+{
+    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Input memory format", node.get_dependency(0).get_output_layout().format.value, "expected bfyx input format", format::bfyx);
+
+    CLDNN_ERROR_BOOL(node.id(), "Detecion output layer padding", node.is_padded(), "Detection output layer doesn't support output padding.");
+}
 }
diff --git a/inference-engine/thirdparty/clDNN/src/eltwise.cpp b/inference-engine/thirdparty/clDNN/src/eltwise.cpp
index 1ee22cc82..2a6835bca 100644
--- a/inference-engine/thirdparty/clDNN/src/eltwise.cpp
+++ b/inference-engine/thirdparty/clDNN/src/eltwise.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -30,22 +30,53 @@ primitive_type_id eltwise_type_id()
 
 layout eltwise_inst::calc_output_layout(eltwise_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for eltwise_inst_node!");
+
     auto input_node_layout = node.input().get_non_padded_output_layout();
+
+    auto size = input_node_layout.size;
+    for (size_t i = 1; i < node.inputs_count(); i++)
+    {
+        size = tensor::max(size, node.input(i).get_non_padded_output_layout().size);
+    }
+    auto output_layout = layout(input_node_layout.data_type, input_node_layout.format, size);
+    auto mode = node.get_primitive()->mode;
     //list of operations supported for integer types
     if (input_node_layout.data_type == data_types::i8 ||
         input_node_layout.data_type == data_types::i32 ||
         input_node_layout.data_type == data_types::i64)
     {
-        auto mode = node.get_primitive()->mode;
-        std::vector<eltwise_mode> eltwise_int_modes = { eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::prod, eltwise_mode::div };
+        std::vector<eltwise_mode> eltwise_int_modes = { eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::prod, eltwise_mode::div, eltwise_mode::min, eltwise_mode::max, eltwise_mode::mod,
+                                                        eltwise_mode::eq, eltwise_mode::ne, eltwise_mode::lt, eltwise_mode::le, eltwise_mode::gt, eltwise_mode::ge,
+                                                        eltwise_mode::logic_and, eltwise_mode::logic_or, eltwise_mode::logic_xor };
         if (std::find(eltwise_int_modes.begin(), eltwise_int_modes.end(), mode) == eltwise_int_modes.end())
             CLDNN_ERROR_MESSAGE(node.id(), "Requested eltwise mode is not supported for integer types.");
     }
 
-    return input_node_layout;
+    // Logic and comparison operations should return i8 for any inputs
+    std::vector<eltwise_mode> eltwise_bool_modes = { eltwise_mode::eq, eltwise_mode::ne, eltwise_mode::lt, eltwise_mode::le,
+                                                     eltwise_mode::gt, eltwise_mode::ge,
+                                                     eltwise_mode::logic_and, eltwise_mode::logic_or, eltwise_mode::logic_xor };
+    if (std::find(eltwise_bool_modes.begin(), eltwise_bool_modes.end(), mode) != eltwise_bool_modes.end())
+    {
+        output_layout.data_type = data_types::i8;
+        if (node.get_primitive()->with_activation)
+            CLDNN_ERROR_MESSAGE(node.id(), "Activations are not supported for logical operations.");
+    }
+
+    auto eltw = std::static_pointer_cast<const eltwise>((node.get_primitive()));
+    if (!eltw->stride.empty())
+    {
+        // we can safely use only first stride, since we're using first input, and input / stride should give exact same value for every input
+        input_node_layout.size.spatial[0] /= eltw->stride[0].spatial[0];
+        input_node_layout.size.spatial[1] /= eltw->stride[0].spatial[1];
+        return input_node_layout;
+    }
+    return output_layout;
 }
 
-static inline std::string stringify_vector(std::vector<float> v)
+static inline std::string stringify_vector(const std::vector<float>& v)
 {
     std::stringstream s;
 
@@ -90,13 +121,43 @@ std::string eltwise_inst::to_string(eltwise_node const& node)
             break;
     case eltwise_mode::min:
             str_mode = "min";
-         break;
+            break;
     case eltwise_mode::pow:
             str_mode = "pow";
             break;
+    case eltwise_mode::squared_diff:
+            str_mode = "squared_diff";
+            break;
     case eltwise_mode::mod:
             str_mode = "mod";
             break;
+    case eltwise_mode::eq:
+            str_mode = "equal";
+            break;
+    case eltwise_mode::ne:
+            str_mode = "not equal";
+            break;
+    case eltwise_mode::lt:
+            str_mode = "less";
+            break;
+    case eltwise_mode::le:
+            str_mode = "less-or-equal";
+            break;
+    case eltwise_mode::gt:
+            str_mode = "greater";
+            break;
+    case eltwise_mode::ge:
+            str_mode = "greater-or-equal";
+            break;
+    case eltwise_mode::logic_and:
+            str_mode = "and";
+            break;
+    case eltwise_mode::logic_or:
+            str_mode = "or";
+            break;
+    case eltwise_mode::logic_xor:
+            str_mode = "xor";
+            break;
     default:
             str_mode = "not supported mode";
             break;
@@ -126,21 +187,78 @@ std::string eltwise_inst::to_string(eltwise_node const& node)
 eltwise_inst::typed_primitive_inst(network_impl& network, eltwise_node const& node)
     :parent(network, node)
 {
-    auto input_layout = node.input().get_output_layout();
-    auto batch_size = input_layout.size.batch[0];
-    auto feature_size = input_layout.size.feature[0];
+    check_inputs_count(node);
+    // check for stride
+    auto prim = node.get_primitive();
+    if (!prim->stride.empty())
+    {
+        // number of strides must match number of inputs
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Eltwise inputs count", node.inputs_count(), "Eltwise strides count", prim->stride.size(), "");
 
-    auto input_batch_size = input_layout.size.batch[0];
-    auto input_feature_size = input_layout.size.feature[0];
+        const auto out_x = node.get_output_layout().size.spatial[0];
+        const auto out_y = node.get_output_layout().size.spatial[1];
+        // check if strides are correctly set. I.e INPUT_SIZE_X / STRIDE_X = OUTPUT_SIZE_X, same for Y dimension
+        for (size_t i = 0; i < node.inputs_count(); i++)
+        {
+            const auto& in_layout = node.input(i).get_output_layout();
+            auto stride = prim->stride[i];
 
-    if (batch_size != 1)
+            const auto in_x_div_stride_x = in_layout.size.spatial[0] / stride.spatial[0];
+            if(in_x_div_stride_x != out_x)
+                CLDNN_ERROR_NOT_EQUAL(node.id(), "Eltwise input_x / stride_x", in_x_div_stride_x, "Eltwise output_x", out_x, "");
+
+            const auto in_y_div_stride_y = in_layout.size.spatial[1] / stride.spatial[1];
+            if(in_y_div_stride_y != out_y)
+                CLDNN_ERROR_NOT_EQUAL(node.id(), "Eltwise inputyx / stride_y", in_y_div_stride_y, "Eltwise output_y", out_y, "");
+        }
+    }
+    else
     {
-        CLDNN_ERROR_NOT_EQUAL(node.id(), "Eltwise batch size", batch_size, "input batch size", input_batch_size, "");
+        std::vector<int32_t> input0_size = node.input().get_output_layout().size.raw.vector();
+        for (size_t i = 1; i < node.inputs_count(); i++)
+        {
+            std::vector<int32_t> input_size = node.input(i).get_output_layout().size.raw.vector();
+            for (size_t d = 0; d < input0_size.size(); d++)
+            {
+                bool sizes_equal = input0_size[d] == input_size[d];
+                bool broadcast = (input0_size[d] == 1 || input_size[d] == 1) && (input0_size[d] != 1 || input_size[d] != 1);
+                CLDNN_ERROR_BOOL(node.id(), "Sizes equal or broadcast is possible", !(sizes_equal || broadcast), "Invalid input shapes");
+            }
+        }
     }
+}
 
-    if (feature_size != 1)
+void eltwise_inst::check_inputs_count(eltwise_node const &node)
+{
+    const size_t inputs_number = node.get_primitive()->input.size();
+    const eltwise_mode mode = node.get_primitive()->mode;
+
+    switch (mode)
     {
-        CLDNN_ERROR_NOT_EQUAL(node.id(), "Eltwise feature size", feature_size, "input feature size", input_feature_size, "");
+        case eltwise_mode::sum:
+        case eltwise_mode::sub:
+        case eltwise_mode::div:
+        case eltwise_mode::prod:
+        case eltwise_mode::max:
+        case eltwise_mode::min:
+        case eltwise_mode::mod:
+        case eltwise_mode::logic_and:
+        case eltwise_mode::logic_or:
+        case eltwise_mode::logic_xor:
+            if (inputs_number < 2)
+                CLDNN_ERROR_MESSAGE(node.id(), "Invalid eltwise inputs number (should be equal at least to 2). Actual: " + std::to_string(inputs_number));
+            break;
+        case eltwise_mode::eq:
+        case eltwise_mode::ne:
+        case eltwise_mode::lt:
+        case eltwise_mode::le:
+        case eltwise_mode::gt:
+        case eltwise_mode::ge:
+        case eltwise_mode::squared_diff:
+        case eltwise_mode::pow:
+            if (inputs_number != 2)
+                CLDNN_ERROR_MESSAGE(node.id(), "Invalid eltwise inputs number (should be equal to 2). Actual: " + std::to_string(inputs_number));
+            break;
     }
 }
 }
diff --git a/inference-engine/thirdparty/clDNN/src/embed.cpp b/inference-engine/thirdparty/clDNN/src/embed.cpp
index b2087b078..b1c61992a 100644
--- a/inference-engine/thirdparty/clDNN/src/embed.cpp
+++ b/inference-engine/thirdparty/clDNN/src/embed.cpp
@@ -31,11 +31,13 @@ namespace cldnn
 
 	layout embed_inst::calc_output_layout(embed_node const& node)
 	{
-		auto input_layout = node.input().get_output_layout();
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for embed_node!");
+        auto input_layout = node.input().get_output_layout();
 		auto desc = node.get_primitive();
 		auto weights_layout = node.weights().get_output_layout();
 
-		auto result = layout(input_layout.data_type, format::bfyx, tensor(input_layout.size.batch[0], input_layout.size.spatial[0] * input_layout.size.spatial[1], weights_layout.size.batch[0], 1));
+		auto result = layout(input_layout.data_type, format::bfyx, tensor(input_layout.size.batch[0], input_layout.size.spatial[0], weights_layout.size.batch[0], 1));
 		return result;
 		
 	}
@@ -66,5 +68,8 @@ namespace cldnn
 		auto output_size = output_memory().get_layout();
 		CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "input format", input_size.format.value, "expected format", format::yxfb, format::bfyx);
 		CLDNN_ERROR_NOT_EQUAL(node.id(), "Input size", input_size.size.raw.size(), "output size", output_size.size.raw.size(), "");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Input batch", input_size.size.batch[0], "output batch", output_size.size.batch[0], "");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Input feature", input_size.size.feature[0], "size 1", 1, "");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Input y size ", input_size.size.spatial[1], "size 1", 1, "");
 	}
 }
diff --git a/inference-engine/thirdparty/clDNN/src/engine.cpp b/inference-engine/thirdparty/clDNN/src/engine.cpp
index f0e6a5330..f88393843 100644
--- a/inference-engine/thirdparty/clDNN/src/engine.cpp
+++ b/inference-engine/thirdparty/clDNN/src/engine.cpp
@@ -40,6 +40,8 @@ gpu_toolkit_config convert_configuration(const engine_configuration conf)
     result.ocl_sources_dumps_dir = conf.sources_dumps_dir;
     result.priority_mode = static_cast<cldnn_priority_mode_type>(conf.priority_mode);
     result.throttle_mode = static_cast<cldnn_throttle_mode_type>(conf.throttle_mode);
+    result.user_context = static_cast<cl::Context*>(conf.context);
+    result.tuning_cache_path = conf.tuning_cache_path;
     return result;
 }
 
@@ -49,6 +51,15 @@ engine_impl::engine_impl(const engine_configuration& conf)
     , _memory_pool(*this)
 { }
 
+engine_impl::~engine_impl()
+{ 
+    /*
+        Engine, which is main owner of context deallocate events pool manually, because
+        of the event_impl <-> gpu_toolkit dependencies.
+    */
+    _context->release_events_pool();
+}
+
 memory_impl::ptr engine_impl::allocate_memory(layout layout)
 {
     return _memory_pool.get_memory(layout);
@@ -96,7 +107,7 @@ bool engine_impl::is_the_same_buffer(const memory_impl& mem1, const memory_impl&
 event_impl::ptr engine_impl::create_user_event(bool set)
 {
     try {
-        return{ new gpu::user_event(get_context(), set), false };
+        return _context->create_user_event(set);
     }
     catch (cl::Error const& err) {
         throw gpu::ocl_error(err);
@@ -113,19 +124,29 @@ void engine_impl::release_pending_memory()
     get_context()->release_pending_memory();
 }
 
-program_impl::ptr engine_impl::build_program(const topology_impl& topology, const build_options& options, bool is_internal)
+program_impl::ptr engine_impl::build_program(const topology_impl& topology, const build_options& options, bool is_internal, bool no_optimizations)
+{
+    return{ new program_impl(*this, topology, options, is_internal, no_optimizations), false };
+}
+
+program_impl::ptr engine_impl::build_program(const std::set<std::shared_ptr<program_node>>& nodes, const build_options& options, bool is_internal)
+{
+    return{ new program_impl(*this, nodes, options, is_internal), false };
+}
+
+network_impl::ptr engine_impl::build_network(const topology_impl& topology, const build_options& options, bool is_internal)
 {
-    return{ new program_impl(*this, topology, options, is_internal), false };
+    return{ new network_impl(*this, topology, options, is_internal), false };
 }
 
-network_impl::ptr engine_impl::build_network(const topology_impl& topology, const build_options& options, bool internal_network)
+network_impl::ptr engine_impl::build_network(const std::set<std::shared_ptr<program_node>>& nodes, const build_options& options, bool is_internal)
 {
-    return{ new network_impl(*this, topology, options, internal_network), false };
+    return{ new network_impl(*this, nodes, options, is_internal), false };
 }
 
-network_impl::ptr engine_impl::allocate_network(const program_impl& program)
+network_impl::ptr engine_impl::allocate_network(const program_impl& program, bool is_internal)
 {
-    return{ new network_impl(program), false };
+    return{ new network_impl(program, is_internal), false };
 }
 
 void engine_impl::wait_for_events(std::vector<event_impl::ptr> const & events)
diff --git a/inference-engine/thirdparty/clDNN/src/error_handler.cpp b/inference-engine/thirdparty/clDNN/src/error_handler.cpp
index 6a23ca108..74b365266 100644
--- a/inference-engine/thirdparty/clDNN/src/error_handler.cpp
+++ b/inference-engine/thirdparty/clDNN/src/error_handler.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -43,16 +43,16 @@ void err_details::cldnn_print_error_message(const std::string& file, int line, c
 
 void error_message(const std::string& file, int line, const std::string& instance_id, const std::string& message)
 {
-        std::stringstream error_msg;
-        error_msg << message << std::endl;
-        err_details::cldnn_print_error_message(file, line, instance_id, error_msg);
+    std::stringstream error_msg;
+    error_msg << message << std::endl;
+    err_details::cldnn_print_error_message(file, line, instance_id, error_msg);
 }
 
 void error_on_not_supported_fp16(const std::string& file, int line, const std::string& instance_id, uint8_t supp_fp16, bool fp16_used)
 {
-    std::stringstream error_msg;
     if (!supp_fp16 && fp16_used)
     {
+        std::stringstream error_msg;
         error_msg << "GPU device does not support half precision floating-point formats (cl_khr_fp16 extension)" << std::endl;
         err_details::cldnn_print_error_message(file, line, instance_id, error_msg);
     }
@@ -60,20 +60,23 @@ void error_on_not_supported_fp16(const std::string& file, int line, const std::s
 
 void error_on_bool(const std::string& file, int line, const std::string& instance_id, const std::string& condition_id, bool condition, const std::string& additional_message)
 {
-    std::stringstream error_msg;
     if (condition)
     {
+        std::stringstream error_msg;
         auto condition_to_string = [](const bool& condi)->std::string { return condi ? "true" : "false"; };
         error_msg << condition_id << "(" << condition_to_string(condition) << ") should be " << condition_to_string(!condition) << std::endl;
         err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
     }
 }
 
-void error_on_mismatching_data_types(const std::string& file, int line, const std::string& instance_id, const std::string& data_format_1_id, data_types data_format_1, const std::string& data_format_2_id, data_types data_format_2, const std::string& additional_message)
+void error_on_mismatching_data_types(const std::string& file, int line, const std::string& instance_id, const std::string& data_format_1_id, data_types data_format_1, const std::string& data_format_2_id, data_types data_format_2, const std::string& additional_message, bool ignore_sign)
 {
-    std::stringstream error_msg;
-    if (data_format_1 != data_format_2)
+    if (data_format_1 != data_format_2 &&
+        !ignore_sign &&
+         ((data_format_1 == data_types::i8 && data_format_2 == data_types::u8) ||
+          (data_format_1 == data_types::u8 && data_format_2 == data_types::i8)))
     {
+        std::stringstream error_msg;
         error_msg << "Data formats are incompatible." << std::endl;
         error_msg << data_format_1_id << " format is: " << data_type_traits::name(data_format_1) << ", " << data_format_2_id << " is: " << data_type_traits::name(data_format_2) << std::endl;
         error_msg << "Data formats should be the same!" << std::endl;
@@ -101,18 +104,18 @@ void error_on_tensor_dims_less_than_other_tensor_dims(const std::string& file, i
         errors.push_back("Spatial y");
     }
 
-    std::stringstream error_msg;
     if (!errors.empty())
     {
-                error_msg << tensor_id << " sizes: " << tens << std::endl;
-                error_msg << tensor_to_compare_to_id << " sizes: " << tens_to_compre << std::endl;
-                error_msg << "All " << tensor_id << " dimensions should not be less than " << tensor_to_compare_to_id << " dimensions." << std::endl;
-                error_msg << "Mismatching dimensions: ";
-                for (size_t i = 0; i < errors.size(); i++)
-                {
-                    error_msg << errors.at(i) << std::endl;
-                }
-                err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
+        std::stringstream error_msg;
+        error_msg << tensor_id << " sizes: " << tens << std::endl;
+        error_msg << tensor_to_compare_to_id << " sizes: " << tens_to_compre << std::endl;
+        error_msg << "All " << tensor_id << " dimensions should not be less than " << tensor_to_compare_to_id << " dimensions." << std::endl;
+        error_msg << "Mismatching dimensions: ";
+        for (size_t i = 0; i < errors.size(); i++)
+        {
+            error_msg << errors.at(i) << std::endl;
+        }
+        err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
     }
 }
 
@@ -136,9 +139,9 @@ void error_on_tensor_dims_greater_than_other_tensor_dims(const std::string& file
         errors.push_back("Spatial y");
     }
 
-    std::stringstream error_msg;
     if (!errors.empty())
     {
+        std::stringstream error_msg;
         error_msg << tensor_id << " sizes: " << tens << std::endl;
         error_msg << tensor_to_compare_to_id << " sizes: " << tens_to_compre << std::endl;
         error_msg << "All " << tensor_id << " dimensions should not be greater than " << tensor_to_compare_to_id << std::endl;
@@ -171,9 +174,9 @@ void error_on_tensor_dims_not_dividable_by_other_tensor_dims(const std::string&
         errors.push_back("Spatial y");
     }
 
-    std::stringstream error_msg;
     if (!errors.empty())
     {
+        std::stringstream error_msg;
         error_msg << tensor_id << " sizes: " << tens << std::endl;
         error_msg << tensor_to_compare_to_id << " sizes: " << tens_to_compre << std::endl;
         error_msg << "All " << tensor_id << " dimensions must be dividable by corresponding dimensions from " << tensor_to_compare_to_id << std::endl;
diff --git a/inference-engine/thirdparty/clDNN/src/fully_connected.cpp b/inference-engine/thirdparty/clDNN/src/fully_connected.cpp
index bb960ebec..cba38f2d2 100644
--- a/inference-engine/thirdparty/clDNN/src/fully_connected.cpp
+++ b/inference-engine/thirdparty/clDNN/src/fully_connected.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -58,6 +58,9 @@ bool is_batch_after_spatial(const std::string order)
 
 layout fully_connected_inst::calc_output_layout(fully_connected_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "fully_connected_node!");
     auto desc = node.get_primitive();
     
     auto input_layout = node.input().get_output_layout();
@@ -104,7 +107,7 @@ fully_connected_inst::typed_primitive_inst(network_impl& network, fully_connecte
     auto input_layout = node.input().get_output_layout();
     auto output_layout = node.get_output_layout();
 
-    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "input format", input_layout.format.value, "expected format", format::yxfb, format::bfyx, format::byxf_af32, format::fs_bs_yx_bsv4_fsv32);
+    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "input format", input_layout.format.value, "expected format", format::yxfb, format::bfyx, format::byxf_af32, format::fs_bs_yx_bsv4_fsv32, format::b_fs_yx_fsv4);
     CLDNN_ERROR_NOT_EQUAL(node.id(), "Input size", input_layout.size.raw.size(), "output size", output_layout.size.raw.size(), "");
 }
 }
diff --git a/inference-engine/thirdparty/clDNN/src/fully_connected_grad_input.cpp b/inference-engine/thirdparty/clDNN/src/fully_connected_grad_input.cpp
index 6a13c2eff..d5d8196c0 100644
--- a/inference-engine/thirdparty/clDNN/src/fully_connected_grad_input.cpp
+++ b/inference-engine/thirdparty/clDNN/src/fully_connected_grad_input.cpp
@@ -30,6 +30,9 @@ primitive_type_id fully_connected_grad_input_type_id()
 
 layout fully_connected_grad_input_inst::calc_output_layout(fully_connected_grad_input_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "fully_connected_grad_input_node!");
     auto desc = node.get_primitive();
     
     auto input_layout = node.input().get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/fully_connected_grad_weights.cpp b/inference-engine/thirdparty/clDNN/src/fully_connected_grad_weights.cpp
index 8332eaaf9..378a3c712 100644
--- a/inference-engine/thirdparty/clDNN/src/fully_connected_grad_weights.cpp
+++ b/inference-engine/thirdparty/clDNN/src/fully_connected_grad_weights.cpp
@@ -30,6 +30,9 @@ primitive_type_id fully_connected_grad_weights_type_id()
 
 layout fully_connected_grad_weights_inst::calc_output_layout(fully_connected_grad_weights_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "fully_connected_grad_weights_node!");
     //output buffer will not be used in this primitive
     auto input_grad_layout_size = node.input().get_output_layout();
     return{ input_grad_layout_size.data_type, input_grad_layout_size.format,{ 1, 1, 1, 1 } };
diff --git a/inference-engine/thirdparty/clDNN/src/fused_conv_bn_scale.cpp b/inference-engine/thirdparty/clDNN/src/fused_conv_bn_scale.cpp
new file mode 100644
index 000000000..d8e36a1cb
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/fused_conv_bn_scale.cpp
@@ -0,0 +1,131 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "fused_conv_bn_scale_inst.h"
+#include "primitive_type_base.h"
+#include "sliding_window_utils.h"
+#include "error_handler.h"
+#include "json_object.h"
+
+namespace cldnn
+{
+primitive_type_id fused_conv_bn_scale_type_id()
+{
+    static primitive_type_base<fused_conv_bn_scale> instance;
+    return &instance;
+}
+// TODO: unify this code with regular convolution.
+layout fused_conv_bn_scale_inst::calc_output_layout(fused_conv_bn_scale_node const& node)
+{
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "fused_conv_bn_scale_node!");
+    auto desc = node.get_primitive();
+
+    auto input_layout = node.input().get_output_layout();
+    auto weights_layout = node.weights(0).get_output_layout(); //weights are stored after inputs
+
+    auto input_offset = desc->input_offset;
+    auto stride = desc->stride;
+    auto split = desc->weights.size();
+    auto dilation = desc->dilation;
+
+    // compute how many outputs in rows and columns will be generate by filter. 
+    // outp <= (input_size - (2*input_offset) - kernel_size)/ stride 
+    auto filter_size = weights_layout.size;
+
+    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Stride spatial X", stride.spatial[0], "value", 0, "Stride spatial X must be positive (>= 1)");
+    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Stride spatial Y", stride.spatial[1], "value", 0, "Stride spatial Y must be positive (>= 1)");
+    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Dilatation spatial X", dilation.spatial[0], "value", 0, "Dilatation patial X must be positive (>= 1)");
+    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Dilatation spatial Y", dilation.spatial[1], "value", 0, "Dilatation spatial Y must be positive (>= 1)");
+    CLDNN_ERROR_GREATER_THAN(node.id(), "Input offset spatial X", 2 * input_offset.spatial[0], "input layout spatial X", input_layout.size.spatial[0], "There is no input data to process");
+    CLDNN_ERROR_GREATER_THAN(node.id(), "Input offset spatial Y", 2 * input_offset.spatial[1], "input layout spatial Y", input_layout.size.spatial[1], "There is no input data to process");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset feature", input_offset.feature[0], "", 0, "Input offset in feature is not supported");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset batch", input_offset.batch[0], "", 0, "Input offset in batch is not supported");
+
+    // get output feature map from weights. It should be the same as number of biases. Will be verified in convolution::create()
+    auto number_of_features = weights_layout.size.batch[0] * static_cast<int32_t>(split);
+
+    auto output_range = calc_sliding_window_output_range<swor_mode::all>(
+        input_layout.size, filter_size, input_offset, stride, { 1, 1, 1, 1 }, true, 1);
+
+    tensor output_size(input_layout.size.batch[0], number_of_features,
+                       output_range.spatial[0], output_range.spatial[1]);
+    return { input_layout.data_type, input_layout.format, output_size };
+}
+
+std::string fused_conv_bn_scale_inst::to_string(fused_conv_bn_scale_node const& node)
+{    
+    auto desc       = node.get_primitive();
+    auto strd       = desc->stride;
+    auto split      = node.get_split();
+    auto node_info  = node.desc_to_json();
+    auto activation = desc->with_activation ? " true" : "false";
+
+    std::stringstream primitive_description;
+
+    json_composite fuse_info;
+    fuse_info.add("stride", strd.to_string());
+    fuse_info.add("input offset", desc->input_offset.to_string());
+    fuse_info.add("split", split);
+    fuse_info.add("with activation", activation);
+    fuse_info.add("slope", desc->activation_negative_slope);
+
+    node_info->add("fused_conv_bn_scale info", fuse_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+fused_conv_bn_scale_inst::typed_primitive_inst(network_impl& network, fused_conv_bn_scale_node const& node)
+    : parent(network, node)
+{
+    auto stride = argument.stride;
+
+    auto input_inst = node.input().get_output_layout();
+    auto output_inst = node.get_output_layout();
+    auto output_size = output_inst.size;
+
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input number of dimensions", input_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Input/output dims mismtach");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Stride number of dimensions", stride.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "stride/output dims mismtach");
+
+    auto split = node.get_split();
+    for (decltype(split) j = 0; j < split; j++)
+    {
+        auto filter_inst = node.weights(j).get_output_layout(); //convolution filter
+        if (bias_term())
+        {
+            auto bias_inst = node.bias(j).get_output_layout();
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias batch[0]", bias_inst.size.batch[0], "expected size of batch", 1, "Biases isn't 1D vector.");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias feature[0]", bias_inst.size.feature[0], "expected size of feature", 1, "Biases isn't 1D vector.");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[1]", bias_inst.size.spatial[1], "expected size of spatial[1]", 1, "Biases isn't 1D vector.");
+          
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[0]", bias_inst.size.spatial[0], "expected feature map number", output_size.feature[0] / split, "Bias/fm mismtach");
+        }
+
+        auto input_offset = argument.input_offset;
+
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights number of dimensions", filter_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Weights/output dims mismtach");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Convolution padding mode", node.get_output_layout().data_padding.filling_value(), "padding value", 0.0f, "Unknown padding mode.");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset number of dimensions", input_offset.raw.size(), "input number of dimensions", input_inst.size.raw.size(), "Input offset/ input size mismtach");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Output feature size", output_size.feature.size(), "expected feature size", 1, "Only one-dimensional features are supported");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Output batch size", output_size.batch.size(), "expected output size", 1, "Only one-dimensional batch size are supported");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights spatial size", filter_inst.size.spatial.size(), "expected weights spatial size", 2, "Weights have to have 2 dimensions in spatial domain.");
+        CLDNN_ERROR_LESS_THAN(node.id(), "Weights feature maps number", (input_inst.size.feature[0] - input_offset.feature[0]) / split, "input feature maps number", filter_inst.size.feature[0], "Weights/ifm mismtach");
+    }
+}
+}
diff --git a/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp b/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp
new file mode 100644
index 000000000..b1b436faa
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/fused_conv_eltwise.cpp
@@ -0,0 +1,196 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "fused_conv_eltwise_inst.h"
+#include "primitive_type_base.h"
+#include "sliding_window_utils.h"
+#include "error_handler.h"
+#include "json_object.h"
+
+namespace cldnn
+{
+primitive_type_id fused_conv_eltwise_type_id()
+{
+    static primitive_type_base<fused_conv_eltwise> instance;
+    return &instance;
+}
+
+layout fused_conv_eltwise_inst::calc_output_layout(fused_conv_eltwise_node const& node)
+{
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "fused_conv_eltwise_node!");
+    auto desc = node.get_primitive();
+
+    auto input_layout = node.input().get_output_layout();
+    auto weights_layout = node.weights(0).get_output_layout(); //weights are stored after inputs
+
+    auto input_offset = desc->conv.input_offset;
+    auto stride = desc->conv.stride;
+    auto dilation = desc->conv.dilation;
+    auto split = desc->conv.weights.size();
+
+    // compute how many outputs in rows and columns will be generate by filter. 
+    // outp <= (input_size - (2*input_offset) - kernel_size)/ stride 
+    auto filter_size = weights_layout.size;
+
+    // TODO: Consider moving general parameter verification to arguments constructor.
+    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Stride spatial X", stride.spatial[0], "value", 0, "Stride spatial X must be positive (>= 1)");
+    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Stride spatial Y", stride.spatial[1], "value", 0, "Stride spatial Y must be positive (>= 1)");
+    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Dilatation spatial X", dilation.spatial[0], "value", 0, "Dilatation patial X must be positive (>= 1)");
+    CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "Dilatation spatial Y", dilation.spatial[1], "value", 0, "Dilatation spatial Y must be positive (>= 1)");
+    CLDNN_ERROR_GREATER_THAN(node.id(), "Input offset spatial X", 2 * input_offset.spatial[0], "input layout spatial X", input_layout.size.spatial[0], "There is no input data to process");
+    CLDNN_ERROR_GREATER_THAN(node.id(), "Input offset spatial Y", 2 * input_offset.spatial[1], "input layout spatial Y", input_layout.size.spatial[1], "There is no input data to process");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset feature", input_offset.feature[0], "", 0, "Input offset in feature is not supported");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset batch", input_offset.batch[0], "", 0, "Input offset in batch is not supported");
+
+    // TODO: FCN and SSD used offset larger than convolution size. does it make sense to support it? do we support it on the ref kernels?
+//     CLDNN_ERROR_GREATER_THAN(node.id(), "Negate input offset spatial X", -input_offset.spatial[0], "input window size spatial X", filter_size.spatial[0], "First convolution is outside of image. please reduce input offset X");
+//     CLDNN_ERROR_GREATER_THAN(node.id(), "Negate input offset spatial Y", -input_offset.spatial[1], "input window size spatial Y", filter_size.spatial[1], "First convolution is outside of image. please reduce input offset Y");
+
+    if (input_layout.format == format::winograd_2x3_s1_weights || input_layout.format == format::winograd_2x3_s1_fused_weights ||
+        input_layout.format == format::winograd_6x3_s1_fused_weights || input_layout.format == format::image_2d_weights_winograd_6x3_s1_fbxyb || input_layout.format == format::image_2d_weights_winograd_6x3_s1_xfbyb)
+        CLDNN_ERROR_MESSAGE(node.id(), "Input for convolution should not be in windograd weights format - it is reserved for weights only");
+
+    if (input_layout.format == format::winograd_2x3_s1_data)
+    {
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "convolution split", split, "expected value", 1, "Convolution with winograd input only supports split == 1");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "stride spatial X", stride.spatial[0], "expected value", 1, "Convolution's input in winograd_2x3_s1_data format can only be used with stride 1x1");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "stride spatial Y", stride.spatial[1], "expected value", 1, "Convolution's input in winograd_2x3_s1_data format can only be used with stride 1x1");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Dilatation spatial X", dilation.spatial[0], "expected value", 1, "Winograd 2x3 convolution does not support dilatation");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Dilatation spatial Y", dilation.spatial[1], "expected value", 1, "Winograd 2x3 convolution does not support dilatation");
+        if (input_layout.size.feature[0] % 32 != 0)
+            CLDNN_ERROR_MESSAGE(node.id(), "Input for winograd 2x3 convolution should have features count divisable by 32");
+        if (weights_layout.size.batch[0] % 32 != 0)
+            CLDNN_ERROR_MESSAGE(node.id(), "Number of filters (OFM) for winograd 2x3 convolution should be divisable by 32");
+
+        if (node.get_primitive()->conv.with_activation)
+            CLDNN_ERROR_MESSAGE(node.id(), "Winograd 2x3 convolution should not have activation fused - activation should be performed at transformation from winograd domain stage");
+
+        CLDNN_ERROR_LESS_THAN(node.id(), "input width", input_layout.size.spatial[0], "filter width", 3, "Convolution input is smaller than weights");
+        CLDNN_ERROR_LESS_THAN(node.id(), "input height", input_layout.size.spatial[1], "filter height", 3, "Convolution input is smaller than weights");
+
+        constexpr tensor::value_type filter_height = 3; //by definition of format::winograd_2x3_s1_data (our assumption)
+        constexpr tensor::value_type winograd_filter_height = filter_height; //for this format, winograd filter is considered to be a set of 1d filters so its height should remain the same as original filter's
+
+        return layout{ input_layout.data_type, input_layout.format, tensor{ input_layout.size.batch[0], weights_layout.size.batch[0], input_layout.size.spatial[0], input_layout.size.spatial[1] - winograd_filter_height + 1 }, input_layout.data_padding };
+    }
+
+    // get output feature map from weights. It should be the same as number of biases. Will be verifed in convolution::create()
+    auto number_of_features = weights_layout.size.batch[0] * static_cast<int32_t>(split);
+
+    if (desc->conv.with_output_size)
+    {
+        CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "User defined output spatial X", desc->conv.output_size.spatial[0], "value", 0, "must be positive(>= 1)");
+        CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "User defined output spatial Y", desc->conv.output_size.spatial[1], "value", 0, "must be positive(>= 1)");
+
+        tensor output_size(input_layout.size.batch[0], number_of_features,
+                           desc->conv.output_size.spatial[0], desc->conv.output_size.spatial[1]);
+        return { input_layout.data_type, input_layout.format, output_size };
+    }
+
+    auto output_range = calc_sliding_window_output_range<swor_mode::all>(
+        input_layout.size, filter_size, input_offset, stride, dilation, true, 1);
+
+    tensor output_size(input_layout.size.batch[0], number_of_features,
+                       output_range.spatial[0], output_range.spatial[1]);
+
+    
+    // due to performance reason for using fs_bs_yx_bsv4_fsv32 first convolution have 3 features, so first conv layer will take byxf and return fs_bs_yx_bsv4_fsv32
+    if (input_layout.data_type == data_types::i8 && input_layout.format == format::byx8_f4 && input_layout.size.batch[0] % 4 == 0 && input_layout.size.feature[0] == 3)
+    {
+        return layout{ input_layout.data_type, cldnn::format::fs_bs_yx_bsv4_fsv32, output_size };
+    }
+
+    return { input_layout.data_type, input_layout.format, output_size };
+}
+
+std::string fused_conv_eltwise_inst::to_string(fused_conv_eltwise_node const& node)
+{    
+    auto desc       = node.get_primitive();
+    auto strd       = desc->conv.stride;
+    auto split      = node.get_split();
+    auto dilation   = desc->conv.dilation;
+    auto node_info  = node.desc_to_json();
+    auto activation = desc->conv.with_activation ? " true" : "false";
+
+    std::stringstream primitive_description;
+
+    json_composite conv_info;
+    conv_info.add("stride", strd.to_string());
+    conv_info.add("input offset", desc->conv.input_offset.to_string());
+    conv_info.add("split", split);
+    conv_info.add("dilation", dilation.to_string());
+    conv_info.add("with activation", activation);
+    conv_info.add("slope", desc->conv.activation_negative_slope);
+    if (desc->conv.with_output_size)
+    {
+        json_composite ud_out_size_info;
+        ud_out_size_info.add("size", desc->conv.output_size.to_string());
+        conv_info.add("with user defined output size", ud_out_size_info);
+    }
+
+    node_info->add("convolution info", conv_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+fused_conv_eltwise_inst::typed_primitive_inst(network_impl& network, fused_conv_eltwise_node const& node)
+    : parent(network, node)
+{
+    auto stride = argument.conv.stride;
+
+    auto input_inst = node.input().get_output_layout();
+    auto output_inst = node.get_output_layout();
+    auto output_size = output_inst.size;
+
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Input number of dimensions", input_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Input/output dims mismatch");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Stride number of dimensions", stride.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "stride/output dims mismatch");
+
+    auto split = node.get_split();
+    for (decltype(split) j = 0; j < split; j++)
+    {
+        auto filter_inst = node.weights(j).get_output_layout(); //convolution filter
+        if (bias_term())
+        {
+            auto bias_inst = node.bias(j).get_output_layout();
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias batch[0]", bias_inst.size.batch[0], "expected size of batch", 1, "Biases isn't 1D vector.");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias feature[0]", bias_inst.size.feature[0], "expected size of feature", 1, "Biases isn't 1D vector.");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[1]", bias_inst.size.spatial[1], "expected size of spatial[1]", 1, "Biases isn't 1D vector.");
+          
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Bias spatial[0]", bias_inst.size.spatial[0], "expected feature map number", output_size.feature[0] / split, "Bias/fm mismatch");
+        }
+
+        auto input_offset = argument.conv.input_offset;
+
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights number of dimensions", filter_inst.size.raw.size(), "output number of dimensions", output_inst.size.raw.size(), "Weights/output dims mismatch");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Convolution padding mode", node.get_output_layout().data_padding.filling_value(), "padding value", 0.0f, "Unknown padding mode.");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Input offset number of dimensions", input_offset.raw.size(), "input number of dimensions", input_inst.size.raw.size(), "Input offset/ input size mismatch");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Output feature size", output_size.feature.size(), "expected feature size", 1, "Only one-dimensional features are supported");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Output batch size", output_size.batch.size(), "expected output size", 1, "Only one-dimensional batch size are supported");
+        CLDNN_ERROR_NOT_EQUAL(node.id(), "Weights spatial size", filter_inst.size.spatial.size(), "expected weights spatial size", 2, "Weights have to have 2 dimensions in spatial domain.");
+        CLDNN_ERROR_LESS_THAN(node.id(), "Weights feature maps number", (input_inst.size.feature[0] - input_offset.feature[0]) / split, "input feature maps number", filter_inst.size.feature[0], "Weights/ifm mismatch");
+        if (filter_inst.format == format::bf_lyx_yx) // local convolution
+        {
+            auto local = filter_inst.size.local;
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Number of local x dimension", local[0], "output x dimension", output_inst.size.spatial[0], "Weights/output dims mismatch");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Number of local y dimension", local[1], "output y dimension", output_inst.size.spatial[1], "Weights/output dims mismatch");
+        }
+    }
+}
+}
diff --git a/inference-engine/thirdparty/clDNN/src/gather.cpp b/inference-engine/thirdparty/clDNN/src/gather.cpp
new file mode 100644
index 000000000..121d5730a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gather.cpp
@@ -0,0 +1,68 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gather_inst.h"
+
+#include "primitive_type_base.h"
+#include "error_handler.h"
+#include "json_object.h"
+
+namespace cldnn
+{
+primitive_type_id gather_type_id()
+{
+    static primitive_type_base<gather> instance;
+    return &instance;
+}
+
+layout gather_inst::calc_output_layout(gather_node const& node)
+{
+    auto desc = node.get_primitive();
+
+    auto input_layout = node.input(1).get_output_layout();
+    auto input_format = input_layout.format;
+
+    auto input_shape = node.get_primitive()->output_shape;
+
+
+    return layout{input_layout.data_type, input_format, input_shape};
+}
+
+std::string gather_inst::to_string(gather_node const& node)
+{
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+    auto& input = node.input();
+
+    std::stringstream primitive_description;
+
+    json_composite gather_info;
+    gather_info.add("input id", input.id());
+    gather_info.add("axis", desc->axis);
+    gather_info.add("output shape", desc->output_shape.to_string());
+
+    node_info->add("gather info", gather_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+gather_inst::typed_primitive_inst(network_impl& network, gather_node const& node)
+    : parent(network, node)
+{
+}
+
+}
diff --git a/inference-engine/thirdparty/clDNN/src/gemm.cpp b/inference-engine/thirdparty/clDNN/src/gemm.cpp
index a8072bc3b..49f8cc7aa 100644
--- a/inference-engine/thirdparty/clDNN/src/gemm.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gemm.cpp
@@ -31,6 +31,8 @@ primitive_type_id gemm_type_id()
 
 layout gemm_inst::calc_output_layout(gemm_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for gemm_node!");
     auto input1_layout = node.input(0).get_output_layout();
     auto input2_layout = node.input(1).get_output_layout();
     bool transpose_input1 = node.get_primitive()->transpose_input1;
@@ -89,8 +91,8 @@ gemm_inst::typed_primitive_inst(network_impl& network, gemm_node const& node)
         if (node.inputs_count() > 2)
         {
             auto input3_layout = node.input(2).get_output_layout();
-            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input1 Columns count", input3_layout.size.spatial[0], "Input2 Rows count", input_layout.size.spatial[1], "");
-            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input1 Columns count", input3_layout.size.spatial[1], "Input2 Rows count", input2_layout.size.spatial[0], "");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input3 Columns count", input3_layout.size.spatial[0], "Input2 Columns count", input2_layout.size.spatial[0], "");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input3 Rows count", input3_layout.size.spatial[1], "Input1 Rows count", input_layout.size.spatial[1], "");
         }
     }
 
@@ -100,8 +102,8 @@ gemm_inst::typed_primitive_inst(network_impl& network, gemm_node const& node)
         if (node.inputs_count() > 2)
         {
             auto input3_layout = node.input(2).get_output_layout();
-            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input1 Columns count", input3_layout.size.spatial[0], "Input2 Rows count", input_layout.size.spatial[0], "");
-            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input1 Columns count", input3_layout.size.spatial[1], "Input2 Rows count", input2_layout.size.spatial[0], "");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input13 Columns count", input3_layout.size.spatial[0], "Input2 Rows count", input2_layout.size.spatial[1], "");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input3 Rows count", input3_layout.size.spatial[1], "Input1 Rows count", input_layout.size.spatial[1], "");
         }
     }
     else if (transpose_input1 && !transpose_input2)
@@ -110,8 +112,8 @@ gemm_inst::typed_primitive_inst(network_impl& network, gemm_node const& node)
         if (node.inputs_count() > 2)
         {
             auto input3_layout = node.input(2).get_output_layout();
-            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input1 Columns count", input3_layout.size.spatial[0], "Input2 Rows count", input_layout.size.spatial[1], "");
-            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input1 Columns count", input3_layout.size.spatial[1], "Input2 Rows count", input2_layout.size.spatial[1], "");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input3 Columns count", input3_layout.size.spatial[0], "Input2 Columns count", input2_layout.size.spatial[0], "");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input3 Rows count", input3_layout.size.spatial[1], "Input1 Columns count", input_layout.size.spatial[0], "");
         }
     }
     else
@@ -120,8 +122,8 @@ gemm_inst::typed_primitive_inst(network_impl& network, gemm_node const& node)
         if (node.inputs_count() > 2)
         {
             auto input3_layout = node.input(2).get_output_layout();
-            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input1 Columns count", input3_layout.size.spatial[0], "Input2 Rows count", input_layout.size.spatial[0], "");
-            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input1 Columns count", input3_layout.size.spatial[1], "Input2 Rows count", input2_layout.size.spatial[1], "");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input3 Columns count", input3_layout.size.spatial[0], "Input2 Rows count", input2_layout.size.spatial[1], "");
+            CLDNN_ERROR_NOT_EQUAL(node.id(), "Input3 Rows count", input3_layout.size.spatial[1], "Input1 Columns count", input_layout.size.spatial[0], "");
         }
     }
 
diff --git a/inference-engine/thirdparty/clDNN/src/generic_layer.cpp b/inference-engine/thirdparty/clDNN/src/generic_layer.cpp
index 8b5cb6f28..6d1c3c6ef 100644
--- a/inference-engine/thirdparty/clDNN/src/generic_layer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/generic_layer.cpp
@@ -31,6 +31,12 @@ primitive_type_id generic_layer_type_id()
     return &instance;
 }
 
+generic_layer_node::typed_program_node(const std::shared_ptr<generic_layer> prim, program_impl& prog)
+    : parent(prim, prog)
+{
+    can_share_buffer(false);
+}
+
 generic_layer_inst::typed_primitive_inst(network_impl& network, generic_layer_node const& node)
     : parent(network, node)
 {
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
index a6d46a6d8..d4b40a475 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/activation_gpu.cpp
@@ -48,14 +48,14 @@ struct activation_gpu : typed_primitive_gpu_impl<activation>
         auto activation_params = get_default_params<kernel_selector::activation_params>(arg);
         auto activation_optional_params = get_default_optional_params<kernel_selector::activation_optional_params>(arg.get_program());
 
-        convert_new_activation_func(arg.get_primitive(), activation_params);
+        convert_new_activation_func(arg.get_primitive(), activation_params.activation);
 
         if (arg.is_parameterized())
         {
             const auto& slope_layout = arg.slope_input().get_output_layout();
             const auto& output_layout = arg.get_output_layout();
 
-            const auto params_num = kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activationFunc);
+            const auto params_num = kernel_selector::GetActivationAdditionalParamsNumber(activation_params.activation.function);
 
             CLDNN_ERROR_LESS_THAN(arg.id(), "Slope layout size count", slope_layout.size.count(), "output_layout.size.feature[0] * params_num", static_cast<size_t>(output_layout.size.feature[0] * params_num), "Error - not enough data inside additional params buffer");
             
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/activation_grad_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/activation_grad_gpu.cpp
index fefd5361a..a599f0b0f 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/activation_grad_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/activation_grad_gpu.cpp
@@ -52,16 +52,16 @@ struct activation_grad_gpu : typed_primitive_gpu_impl<activation_grad>
 
         activation_grad_params.gradient = true;
         activation_grad_params.inputs.push_back(convert_data_tensor(arg.get_dependency(1).get_output_layout()));
-        activation_grad_params.activationFunc = get_kernel_selector_activation_grad_param(primitive->activation_grad_func);
-        activation_grad_params.activationParams.m = primitive->additional_params.a;
-        activation_grad_params.activationParams.n = primitive->additional_params.b;
+        activation_grad_params.activation.function = get_kernel_selector_activation_grad_param(primitive->activation_grad_func);
+        activation_grad_params.activation.m = primitive->additional_params.a;
+        activation_grad_params.activation.n = primitive->additional_params.b;
 
         if (arg.is_parameterized())
         {
             const auto& slope_layout = arg.slope_input().get_output_layout();
             const auto& output_layout = arg.get_output_layout();
 
-            const auto params_num = kernel_selector::GetActivationAdditionalParamsNumber(activation_grad_params.activationFunc);
+            const auto params_num = kernel_selector::GetActivationAdditionalParamsNumber(activation_grad_params.activation.function);
 
             CLDNN_ERROR_LESS_THAN(arg.id(), "Slope layout size count", slope_layout.size.count(), "output_layout.size.feature[0] * params_num", static_cast<size_t>(output_layout.size.feature[0] * params_num), "Error - not enough data inside additional params buffer");
 
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp
index ec4249e80..a14e1932f 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/arg_max_min_gpu.cpp
@@ -33,13 +33,6 @@ namespace cldnn {
 
 		protected:
 
-			virtual bool validate(typed_primitive_inst<arg_max_min>& instance) const override
-			{
-				bool res = parent::validate(instance);
-
-				return res;
-			}
-
 			virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<arg_max_min>& instance, int32_t) const override
 			{
 				kernel::kernel_arguments_data args = parent::get_arguments(instance, 0);
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/batch_norm_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/batch_norm_gpu.cpp
index 8adb88888..f5364ad45 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/batch_norm_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/batch_norm_gpu.cpp
@@ -37,17 +37,20 @@ protected:
     {
         kernel::kernel_arguments_data args;
 
-        
-        if (!instance.use_global_stats())
-        {
-            args.inputs = { &instance.input_memory() };
-            if (instance.forwad_pass())
-                args.inputs.push_back(&instance.inv_variance_memory());
-        }
-        else
-        {
-            args.inputs = { &instance.input_memory(), &instance.mean_memory(), &instance.variance_memory() };
-        }
+		args.inputs = { &instance.input_memory() };
+
+		if (instance.use_global_stats()) {
+			args.inputs.push_back(&instance.mean_memory());
+			args.inputs.push_back(&instance.variance_memory());
+		}
+
+		if (instance.use_scale_shift()) {
+			args.inputs.push_back(&instance.scale_memory());
+			args.inputs.push_back(&instance.shift_memory());
+		}
+
+		if (instance.forwad_pass())
+			args.inputs.push_back(&instance.inv_variance_memory());
 
         args.output = &instance.output_memory();
 
@@ -58,13 +61,17 @@ public:
 
     static primitive_impl* create(const batch_norm_node &arg) 
     { 
-        if (!arg.use_global_stats())
+        if (!arg.use_global_stats()
+			|| arg.calc_mean_var() )
         {
             auto norm_params = get_default_params<kernel_selector::batch_norm_params>(arg);
             auto norm_optional_params = get_default_optional_params<kernel_selector::batch_norm_optional_params>(arg.get_program());
 
             norm_params.batchNormParams.epsilon = arg.get_primitive()->epsilon;
             norm_params.batchNormParams.with_inv_var = arg.forwad_pass();
+			norm_params.batchNormParams.with_scale_shift = arg.use_scale_shift();
+            if (arg.calc_mean_var())
+			    norm_params.batchNormParams.with_mean_var_out = arg.calc_mean_var();
 
             auto& kernel_selector = kernel_selector::batch_norm_kernel_selector::Instance();
             auto best_kernels = kernel_selector.GetBestKernels(norm_params, norm_optional_params);
@@ -86,7 +93,7 @@ public:
 
             ew_params.inputs.push_back(convert_data_tensor(arg.mean().get_output_layout()));
             ew_params.inputs.push_back(convert_data_tensor(arg.variance().get_output_layout()));
-
+			
             ew_params.operations.push_back({
                 { kernel_selector::eltwise_params::InputType::Buffer(0), kernel_selector::eltwise_params::InputType::Buffer(1) },
                 kernel_selector::eltwise_mode::SUB });
@@ -103,6 +110,19 @@ public:
                 { kernel_selector::eltwise_params::InputType::Intermediate(0), kernel_selector::eltwise_params::InputType::Intermediate(2) },
                 kernel_selector::eltwise_mode::MUL });
 
+			if (arg.use_scale_shift()) {
+				ew_params.inputs.push_back(convert_data_tensor(arg.scale().get_output_layout()));
+				ew_params.inputs.push_back(convert_data_tensor(arg.shift().get_output_layout()));
+
+				ew_params.operations.push_back({
+					{ kernel_selector::eltwise_params::InputType::Intermediate(3), kernel_selector::eltwise_params::InputType::Buffer(3) },
+					kernel_selector::eltwise_mode::MUL });
+
+				ew_params.operations.push_back({
+					{ kernel_selector::eltwise_params::InputType::Intermediate(4), kernel_selector::eltwise_params::InputType::Buffer(4) },
+					kernel_selector::eltwise_mode::ADD });
+			}
+
             ew_params.layoutBased = true;
 
             auto& kernel_selector = kernel_selector::eltwise_kernel_selector::Instance();
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp
index 8c72bdcd9..fc3667a20 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/broadcast_gpu.cpp
@@ -35,6 +35,25 @@ struct broadcast_gpu : typed_primitive_gpu_impl<broadcast>
         auto bc_params          = get_default_params<kernel_selector::broadcast_params>(arg, 1);
         auto bc_optional_params = get_default_optional_params<kernel_selector::broadcast_optional_params>(arg.get_program());
 
+        const auto& broadcast_axes = arg.get_primitive()->broadcast_axes;
+        uint16_t index = (uint16_t) 0;
+        uint16_t input_index = (uint16_t) broadcast_axes.size();
+
+        //bfyx format
+        for (size_t i = 0; i < 4; ++i)
+        {
+            if (std::find(broadcast_axes.begin(), broadcast_axes.end(), i) != broadcast_axes.end())
+            {
+                bc_params.input_order.push_back(index);
+                ++index;
+            }
+            else
+            {
+                bc_params.input_order.push_back(input_index);
+                ++input_index;
+            }
+        }
+
         auto& kernel_selector = kernel_selector::broadcast_kernel_selector::Instance();
         auto best_kernels = kernel_selector.GetBestKernels(bc_params, bc_optional_params);
 
@@ -49,20 +68,12 @@ namespace {
         attach() {
             auto val_fw = broadcast_gpu::create;
 
-            implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
-            implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
-            implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8,  format::yxfb), val_fw);
-            implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8,  format::yxfb), val_fw);
-
             implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
             implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
             implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8,  format::bfyx), val_fw);
             implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8,  format::bfyx), val_fw);
-
-            implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
-            implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
-            implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i8,  format::byxf), val_fw);
-            implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::u8,  format::byxf), val_fw);
+            implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+            implementation_map<broadcast>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
         }
         ~attach() = default;
     };
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.cpp b/inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.cpp
new file mode 100644
index 000000000..d8db570b4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.cpp
@@ -0,0 +1,151 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "command_queues_builder.h"
+#include "error_handler.h"
+
+namespace cldnn { namespace gpu{
+
+    command_queues_builder::command_queues_builder(const cl::Context& context, const cl::Device& device, const cl_platform_id& platform_id)
+        : _context(context)
+        , _device(device)
+        , _platform_id(platform_id)
+        , _priority_mode(cldnn_priority_disabled)
+        , _throttle_mode(cldnn_throttle_disabled)
+    {}
+
+    cl_command_queue_properties command_queues_builder::get_properties()
+    {
+        cl_command_queue_properties ret = ((_profiling ? CL_QUEUE_PROFILING_ENABLE : 0) | (_out_of_order ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0));
+        return ret;
+    }
+
+    void command_queues_builder::build()
+    {
+        auto properties = get_properties();
+
+        if (_priority_mode == cldnn_priority_disabled &&
+            _throttle_mode == cldnn_throttle_disabled)
+        {
+            _queue = cl::CommandQueue(_context, _device, properties);
+            return;
+        }
+
+        unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR;
+
+        switch (_priority_mode)
+        {
+        case cldnn_priority_high:
+            cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR;
+            break;
+        case cldnn_priority_low:
+            cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR;
+            break;
+        default:
+            break;
+        }
+
+        unsigned cl_queue_throttle_value = CL_QUEUE_THROTTLE_MED_KHR;
+
+        switch (_throttle_mode)
+        {
+        case cldnn_throttle_high:
+            cl_queue_throttle_value = CL_QUEUE_THROTTLE_HIGH_KHR;
+            break;
+        case cldnn_throttle_low:
+            cl_queue_throttle_value = CL_QUEUE_THROTTLE_LOW_KHR;
+            break;
+        default:
+            break;
+        }
+
+        cl_int error_code = CL_SUCCESS;
+
+        if (_priority_mode != cldnn_priority_disabled &&
+            _throttle_mode != cldnn_throttle_disabled)
+        {
+            cl_queue_properties properties_low[] = {
+                CL_QUEUE_PRIORITY_KHR, cl_queue_priority_value,
+                CL_QUEUE_THROTTLE_KHR, cl_queue_throttle_value,
+                CL_QUEUE_PROPERTIES, properties,
+                0 };
+
+            _queue = clCreateCommandQueueWithProperties(
+                _context.get(),
+                _device.get(),
+                properties_low,
+                &error_code);
+        }
+        else if (_priority_mode != cldnn_priority_disabled)
+        {
+            cl_queue_properties properties_low[] = {
+                CL_QUEUE_PRIORITY_KHR, cl_queue_priority_value,
+                CL_QUEUE_PROPERTIES, properties,
+                0 };
+
+            _queue = clCreateCommandQueueWithProperties(
+                _context.get(),
+                _device.get(),
+                properties_low,
+                &error_code);
+        }
+        else if (_throttle_mode != cldnn_throttle_disabled)
+        {
+            cl_queue_properties properties_low[] = {
+                CL_QUEUE_THROTTLE_KHR, cl_queue_throttle_value,
+                CL_QUEUE_PROPERTIES, properties,
+                0 };
+
+            _queue = clCreateCommandQueueWithProperties(
+                _context.get(),
+                _device.get(),
+                properties_low,
+                &error_code);
+        }
+
+        if (error_code != CL_SUCCESS) {
+            CLDNN_ERROR_MESSAGE("Command queues builders", "clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code));
+        }
+    }
+
+    void command_queues_builder::set_priority_mode(cldnn_priority_mode_type priority, bool extension_support)
+    {
+        if (priority != cldnn_priority_disabled && !extension_support)
+        {
+            CLDNN_ERROR_MESSAGE(
+                "Command queues builders - priority_mode",
+                "The param priority_mode is set in engine_configuration,\
+                but cl_khr_priority_hints or cl_khr_create_command_queue\
+                is not supported by current OpenCL implementation.");
+        }
+        _priority_mode = priority;
+    }
+
+    void command_queues_builder::set_throttle_mode(cldnn_throttle_mode_type throttle, bool extension_support)
+    {
+        if (throttle != cldnn_throttle_disabled && !extension_support)
+        {
+            CLDNN_ERROR_MESSAGE(
+                "Command queues builders - throttle_mode",
+                "The param throttle_mode is set in engine_configuration,\
+                but cl_khr_throttle_hints is not supported by current OpenCL implementation.");
+        }
+        _throttle_mode = throttle;
+    }
+}
+}
+
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.h b/inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.h
new file mode 100644
index 000000000..4d375cbd3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/command_queues_builder.h
@@ -0,0 +1,46 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "ocl_toolkit.h"
+
+namespace cldnn { namespace gpu {
+    class command_queues_builder
+    {
+    public:
+        command_queues_builder(const cl::Context& context, const cl::Device& device, const cl_platform_id& platform_id);
+        void build();
+        void set_throttle_mode(cldnn_throttle_mode_type throttle, bool extension_support);
+        void set_priority_mode(cldnn_priority_mode_type priority, bool extension_support);
+        void set_profiling(bool flag) { _profiling = flag; }
+        void set_out_of_order(bool flag) { _out_of_order = flag; }
+        cl::CommandQueue& queue() { return _queue; }
+        cl::CommandQueue queue() const { return _queue; }
+
+    private:
+        cl::CommandQueue _queue;
+        cl::Context _context;
+        cl::Device  _device;
+        cl_platform_id _platform_id;
+        bool _profiling;
+        bool _out_of_order;
+        cldnn_priority_mode_type _priority_mode;
+        cldnn_throttle_mode_type _throttle_mode;
+
+        cl_command_queue_properties get_properties();
+    };
+}}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
index 98d8be2a6..032fa6318 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/concatenation_gpu.cpp
@@ -115,6 +115,8 @@ namespace {
                 { std::make_tuple(engine_types::ocl, data_types::i8,  format::byxf), concatenation_gpu::create },
                 { std::make_tuple(engine_types::ocl, data_types::i32,  format::byxf), concatenation_gpu::create },
                 { std::make_tuple(engine_types::ocl, data_types::i64,  format::byxf), concatenation_gpu::create },
+                { std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), concatenation_gpu::create },
+                { std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), concatenation_gpu::create },
                 // MMAD
                 { std::make_tuple(engine_types::ocl, data_types::i8,  format::byxf_af32), concatenation_gpu::create },
 
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp
new file mode 100644
index 000000000..30d7eaded
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/condition_gpu.cpp
@@ -0,0 +1,144 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "condition_inst.h"
+#include "network_impl.h"
+#include "implementation_map.h"
+#include "math_utils.h"
+
+#include <algorithm>
+
+namespace cldnn { namespace gpu {
+
+struct condition_gpu : typed_primitive_impl<condition>
+{
+    const condition_node& outer;
+
+    condition_gpu(const condition_node& outer)
+        : outer(outer)
+    {}
+
+    event_impl::ptr execute_impl(const std::vector<event_impl::ptr>& events, condition_inst& instance) override
+    {
+        for (auto& a : events)
+        {
+            a->wait();
+        }
+        auto ev = instance.get_network().get_engine().create_user_event(false);
+
+        bool exec_branch = choose_branch_to_exec(instance);
+        memory_impl::ptr memory_to_copy;
+        if (exec_branch)
+            memory_to_copy = &execute_branch(instance.get_net_true(), instance.result_id(), instance.input_memory());
+        else
+            memory_to_copy = &execute_branch(instance.get_net_false(), instance.result_id(), instance.input_memory());
+        //just copy memory
+        mem_lock<float> inp_ptr{ memory_to_copy };
+        mem_lock<float> out_ptr{ instance.output_memory() };
+        std::copy(inp_ptr.begin(), inp_ptr.end(), out_ptr.begin());
+        dynamic_cast<cldnn::user_event*>(ev.get())->set(); // set as complete
+        return ev;
+    }
+
+    static primitive_impl* create(const condition_node& arg)
+    { 
+        return new condition_gpu(arg);
+    }
+
+private:
+    /*
+    Add functions here.
+    */
+    bool check_condition(const float value_1, const float value_2, const cond_functions& func) const
+    {
+        switch (func)
+        {
+        case cond_functions::EQUAL: return value_1 == value_2;
+            break;
+        case cond_functions::GREATER: return value_1 > value_2;
+            break;
+        case cond_functions::LESS: return value_1 < value_2;
+            break;
+        default:
+            throw("Unknown comparision function for: " + outer.id());
+            break;
+        }
+    }
+
+    /*
+    Loop over memory and check condition.
+    Returns boolean flag, which says what branch should be executed.
+    */
+    bool choose_branch_to_exec(condition_inst& instance) const
+    {
+        mem_lock<float> lock_compare_data{ instance.compare_memory() };
+        auto compare_layout = instance.compare_memory().get_layout();
+        auto compare_ptr = lock_compare_data.begin();
+
+        mem_lock<float> lock_input{ instance.input_memory() };
+        auto input_layout = instance.input_memory().get_layout();
+        auto input_ptr = lock_input.begin();
+
+        auto function = instance.argument.function;
+        auto& offset = instance.argument.offset;
+        auto& range = compare_layout.size;
+
+        for (auto b = 0; b < range.batch[0]; b++)
+        {
+            for (auto f = 0; f < range.feature[0]; f++)
+            {
+                for (auto y = 0; y < range.spatial[1]; y++)
+                {
+                    for (auto x = 0; x < range.spatial[0]; x++)
+                    {
+                        auto input_idx = input_layout.get_linear_offset({
+                            b + offset.batch[0],
+                            f + offset.feature[0],
+                            x + offset.spatial[0],
+                            y + offset.spatial[1]
+                            });
+                        auto compare_idx = compare_layout.get_linear_offset({ b, f, x, y });
+                        if (!check_condition(input_ptr[input_idx], compare_ptr[compare_idx], function)) return false;
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
+    
+
+    memory_impl& execute_branch(network_impl::ptr branch, const primitive_id& input_id, memory_impl& input_memory) const
+    {
+        branch->set_input_data(input_id, input_memory);
+        branch->execute({});
+        return branch->get_outputs().at(0)->output_memory();
+    }
+
+};
+
+namespace {
+    struct attach {
+        attach() {
+            implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx),
+                condition_gpu::create);
+            implementation_map<condition>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb),
+                condition_gpu::create);
+        }
+        ~attach() = default;
+    };
+    attach attach_impl;
+}
+} 
+}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/configuration.cpp b/inference-engine/thirdparty/clDNN/src/gpu/configuration.cpp
index e9b4b47a8..c1702a0cd 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/configuration.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/configuration.cpp
@@ -15,7 +15,7 @@
 */
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
-#include "ocl_toolkit.h"
+#include "confiugration.h"
 
 namespace cldnn {
     namespace gpu {
@@ -30,6 +30,8 @@ namespace cldnn {
             , host_out_of_order(false)
             , log("")
             , ocl_sources_dumps_dir("")
+            , user_context(nullptr)            
+            , tuning_cache_path("cache.json")        
         {}
     }
 }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/confiugration.h b/inference-engine/thirdparty/clDNN/src/gpu/confiugration.h
new file mode 100644
index 000000000..3f7b258c0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/confiugration.h
@@ -0,0 +1,50 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include <string>
+#include "api/C/cldnn.h"
+
+namespace cl
+{
+class Context;
+}
+namespace cldnn {
+    namespace gpu {
+        struct configuration
+        {
+            enum device_types { default_device = 0, cpu, gpu, accelerator };
+
+            configuration();
+
+            bool enable_profiling;
+            bool meaningful_kernels_names;
+            bool dump_custom_program;
+            device_types device_type;
+            uint32_t device_vendor;
+            std::string compiler_options;
+            std::string single_kernel_name;
+            bool host_out_of_order;
+            std::string log;
+            std::string ocl_sources_dumps_dir;
+            cldnn_priority_mode_type priority_mode;
+            cldnn_throttle_mode_type throttle_mode;
+            cl::Context* user_context;
+            std::string tuning_cache_path;
+        };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/contract_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/contract_gpu.cpp
new file mode 100644
index 000000000..b7f1c22e5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/contract_gpu.cpp
@@ -0,0 +1,88 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "contract_inst.h"
+
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "kernel_selector_helper.h"
+#include "error_handler.h"
+#include "contract/contract_kernel_selector.h"
+#include "contract/contract_kernel_base.h"
+
+namespace cldnn {
+    namespace gpu {
+
+        namespace
+        {
+            inline kernel_selector::ContractMode convert_to_contract_mode(contract_mode mode)
+            {
+                switch (mode)
+                {
+                case contract_mode::sum:  return kernel_selector::ContractMode::SUM;
+                case contract_mode::prod:  return kernel_selector::ContractMode::PRODUCT;
+                case contract_mode::all:  return kernel_selector::ContractMode::ALL;
+                case contract_mode::any: return kernel_selector::ContractMode::ANY;
+                case contract_mode::max: return kernel_selector::ContractMode::MAX;
+
+                default:
+                    return kernel_selector::ContractMode::SUM;
+                }
+            }
+        }
+
+        struct contract_gpu : typed_primitive_gpu_impl<contract>
+        {
+            using parent = typed_primitive_gpu_impl<contract>;
+            using parent::parent;
+
+
+            static primitive_impl* create(const contract_node& arg)
+            {
+                auto c_params = get_default_params<kernel_selector::contract_params>(arg, 1);
+                auto c_optional_params = get_default_optional_params<kernel_selector::contract_optional_params>(arg.get_program());
+
+                c_params.reduction_axes = arg.get_primitive()->reduction_axes;
+                c_params.mode = convert_to_contract_mode(arg.get_primitive()->mode);
+
+                auto& kernel_selector = kernel_selector::contract_kernel_selector::Instance();
+                auto best_kernels = kernel_selector.GetBestKernels(c_params, c_optional_params);
+
+                CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
+
+                return new contract_gpu(arg, best_kernels[0]);
+            }
+        };
+
+        namespace {
+            struct attach {
+                attach() {
+                    auto val_fw = contract_gpu::create;
+
+                    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+                    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+                    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+                    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+                    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+                    implementation_map<contract>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
+                }
+                ~attach() = default;
+            };
+
+            attach attach_impl;
+
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
index dd5a004e8..54e63a780 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/convolution_gpu.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -32,13 +32,17 @@ struct convolution_gpu : typed_primitive_gpu_impl<convolution>
 
 protected:
 
-    virtual bool validate(typed_primitive_inst<convolution>& instance) const override
+    virtual bool validate_impl(const typed_primitive_inst<convolution>& instance) const override
     {
-        bool res = parent::validate(instance);
+        bool res = true;
+
+        auto outer_id = _outer.id();
+        auto data_type = instance.node.input().get_output_layout().data_type;
 
         // Check whether all memory elements use the same unit type (FP16 or FP32).
-        CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.node.input().get_output_layout().data_type, "output memory", instance.node.get_output_layout().data_type, "");
-        CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.node.input().get_output_layout().data_type, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
+        CLDNN_ERROR_DATA_TYPES_MISMATCH(outer_id, "Input memory", data_type, "output memory", instance.node.get_output_layout().data_type, "");
+        // Integer signed/unsigned is ok for convoluiton
+        CLDNN_ERROR_DATA_TYPES_MISMATCH_IGNORE_SIGN(outer_id, "Input memory", data_type, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
 
         return res;
     }
@@ -59,6 +63,11 @@ protected:
         return _outer.get_split(); 
     }
 
+    virtual uint32_t get_groups() const override
+    {
+        return _outer.get_groups();
+    }
+
 public:
 
     static primitive_impl* create(const convolution_node &arg)
@@ -72,6 +81,7 @@ public:
         const auto& stride          = primitive->stride;
         const auto& dilation        = primitive->dilation;
         const auto& input_offset    = primitive->input_offset;
+        const auto& groups           = primitive->groups;
 
         const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
         const auto actual_split = depthwise_separable_opt ? (decltype(split))1 : split;
@@ -80,22 +90,24 @@ public:
 
         assert(arg.get_output_layout().size.feature[0] / primitive->split() == weights_layout.size.batch[0]);
 
-        auto conv_params = get_weights_bias_default_params<kernel_selector::convolution_params>(arg, actual_split);
+        auto conv_params = get_weights_bias_default_params<kernel_selector::convolution_params>(arg, (groups > 1 && !depthwise_separable_opt) ? groups : actual_split, groups);
         auto conv_optional_params = get_default_weights_bias_optional_params<kernel_selector::convolution_optional_params>(arg.get_program());
 
         const auto additional_offset = tensor::max(input_offset, 0);
         if (additional_offset != 0)
         {
-            conv_params.inputs[0] = convert_data_tensor(input_layout, actual_split, additional_offset);
+            conv_params.inputs[0] = convert_data_tensor(input_layout, (groups > 1 && !depthwise_separable_opt) ? groups : actual_split, additional_offset);
         }
 
         if(primitive->with_activation)
-            convert_activation_func_params(primitive, conv_params);
+            convert_activation_func_params(primitive, conv_params.activation);
 
-        conv_params.depthwiseSeparableOpt = depthwise_separable_opt;
+        conv_params.depthwise_separable_opt = depthwise_separable_opt;
         conv_params.transposed = transposed;
 
+        conv_params.local_convolution = weights_size.local[0] > 1 || weights_size.local[1] > 1;
         conv_params.split = split;
+        conv_params.groups = groups;
         conv_params.filterSize = {
             (uint32_t)weights_size.spatial[0],
             (uint32_t)weights_size.spatial[1],
@@ -141,8 +153,7 @@ public:
 
         kernel_selector::KernelsData best_kernels = kernel_selector.GetBestKernels(conv_params, conv_optional_params);
 		
-        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
-
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with these arguments");
         auto conv = new convolution_gpu(arg, best_kernels[0]);
 
         return conv;
@@ -165,7 +176,12 @@ namespace{
             implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), convolution_gpu::create);
             // MMAD
             implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), convolution_gpu::create);
+            implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byx8_f4), convolution_gpu::create);
+
             implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), convolution_gpu::create);
+            implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), convolution_gpu::create);
+            implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), convolution_gpu::create);
+            implementation_map<convolution>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), convolution_gpu::create);
         }
         ~attach() {}
     };
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/convolution_grad_weights_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/convolution_grad_weights_gpu.cpp
index b8bc15734..5f39cac57 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/convolution_grad_weights_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/convolution_grad_weights_gpu.cpp
@@ -31,9 +31,9 @@ struct convolution_grad_weights_gpu : typed_primitive_gpu_impl<convolution_grad_
 
 protected:
 
-    virtual bool validate(typed_primitive_inst<convolution_grad_weights>& instance) const override
+    virtual bool validate_impl(const typed_primitive_inst<convolution_grad_weights>& instance) const override
     {
-        bool res = parent::validate(instance);
+        bool res = true;
 
         CLDNN_ERROR_NOT_EQUAL(_outer.id(), "convolution_grad_weights filling value", _outer.get_output_layout().data_padding.filling_value(), "padding mode", 0.0f, "Unknown padding mode in convolution_grad_weights.");
         // Check whether all memory elements use the same unit type (FP16 or FP32).
@@ -96,13 +96,15 @@ public:
         const tensor dilation = {0,0,1,1};
 #endif
         const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
+        const auto output_grad_w = arg.output_grad_w();
 
         const auto& input_offset = primitive->input_offset;
 
         auto conv_grad_weights_params = get_default_learning_params<kernel_selector::convolution_grad_weights_params>(arg, depthwise_separable_opt ? 1 : split);
         auto conv_grad_weights_optional_params = get_default_learning_optional_params<kernel_selector::convolution_grad_weights_optional_params>(arg.get_program());
 
-        conv_grad_weights_params.depthwiseSeparableOpt = depthwise_separable_opt;
+        conv_grad_weights_params.depthwise_separable_opt = depthwise_separable_opt;
+        conv_grad_weights_params.output_grad_w = output_grad_w;
 
         conv_grad_weights_params.gradient = true;
         conv_grad_weights_params.inputs.push_back(convert_data_tensor(arg.get_dependency(1).get_output_layout()));
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
index d5638ce3c..86a025563 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/crop_gpu.cpp
@@ -67,10 +67,28 @@ namespace {
 
             implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::yxfb), val_fw);
             implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::yxfb), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::yxfb), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::yxfb), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::yxfb), val_fw);
             implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
             implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
             implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::byxf), val_fw);
             implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::byxf), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::byxf), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::byxf), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::fyxb), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::fyxb), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fyxb), val_fw);
+            implementation_map<crop>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::fyxb), val_fw);
         }
         ~attach() {}
     };
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp
index a4c940df3..d4256de2a 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/custom_gpu_primitive_gpu.cpp
@@ -98,6 +98,7 @@ static void add_layout_to_jit(kernel_selector::jit_constants& mem_consts, const
     // #define INPUT0_TYPE float 
     static const std::map<data_types, std::string> dataTypeToIndex{
         { data_types::i8    ,"char" },
+        { data_types::u8    ,"uchar" },
         { data_types::i32   ,"int" },
         { data_types::i64   ,"long" },
         { data_types::f16   ,"half" },
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
index 68ffdbeae..7ec6291f2 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/deconvolution_gpu.cpp
@@ -32,9 +32,9 @@ struct deconvolution_gpu : typed_primitive_gpu_impl<deconvolution>
 protected:
 
     // TODO: share it with convolution and fully connected
-    virtual bool validate(typed_primitive_inst<deconvolution>& instance) const override
+    virtual bool validate_impl(const typed_primitive_inst<deconvolution>& instance) const override
     {
-        bool res = parent::validate(instance);
+        bool res = true;
 
         CLDNN_ERROR_NOT_EQUAL(_outer.id(), "deconvolution filling value", _outer.get_output_layout().data_padding.filling_value(), "padding mode", 0.0f, "Unknown padding mode in deconvolution.");
         // Check whether all memory elements use the same unit type (FP16 or FP32).
@@ -64,6 +64,11 @@ protected:
         return _outer.get_split(); 
     }
 
+    virtual uint32_t get_groups() const override
+    {
+        return _outer.get_groups();
+    }
+
 public:
 
     static primitive_impl* create(const deconvolution_node& arg)
@@ -93,18 +98,21 @@ public:
         const tensor dilation = {0,0,1,1};
 #endif
         const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
+        const auto actual_split = depthwise_separable_opt ? (decltype(split))1 : split;
 
         const auto& input_offset = primitive->input_offset;
+        const auto& groups = primitive->groups;
 
-        auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(arg, depthwise_separable_opt ? 1 : split);
+        auto deconv_params = get_weights_bias_default_params<kernel_selector::deconvolution_params>(arg, (groups > 1 && !depthwise_separable_opt) ? groups : actual_split, groups);
         auto deconv_optional_params = get_default_weights_bias_optional_params<kernel_selector::deconvolution_optional_params>(arg.get_program());
 
         if(primitive->with_activation)
-            convert_activation_func_params(primitive, deconv_params);
+            convert_activation_func_params(primitive, deconv_params.activation);
 
-        deconv_params.depthwiseSeparableOpt = depthwise_separable_opt;
+        deconv_params.depthwise_separable_opt = depthwise_separable_opt;
 
         deconv_params.split = split;
+        deconv_params.groups = groups;
         deconv_params.filterSize = {
             (uint32_t)weights_size.spatial[0],
             (uint32_t)weights_size.spatial[1],
@@ -136,8 +144,7 @@ public:
         auto& kernel_selector = kernel_selector::deconvolution_kernel_selector::Instance();
         auto best_kernels = kernel_selector.GetBestKernels(deconv_params, deconv_optional_params);
 
-        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
-
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with these arguments");
         auto deconv = new deconvolution_gpu(arg, best_kernels[0]);
 
         return deconv;
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp
new file mode 100644
index 000000000..bc29029df
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/depth_to_space_gpu.cpp
@@ -0,0 +1,72 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "depth_to_space_inst.h"
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "kernel_selector_helper.h"
+#include "depth_to_space/depth_to_space_kernel_selector.h"
+#include "depth_to_space/depth_to_space_kernel_ref.h"
+#include "error_handler.h"
+
+using namespace cldnn;
+
+namespace cldnn
+{
+    namespace gpu
+    {
+        struct depth_to_space_gpu : typed_primitive_gpu_impl<depth_to_space>
+        {
+            using parent = typed_primitive_gpu_impl<depth_to_space>;
+            using parent::parent;
+
+        public:
+
+            static primitive_impl* create(const depth_to_space_node& arg)
+            {
+                auto depth_to_space_params = get_default_params<kernel_selector::depth_to_space_params>(arg);
+                auto depth_to_space_optional_params =
+                        get_default_optional_params<kernel_selector::depth_to_space_optional_params>(arg.get_program());
+
+                depth_to_space_params.block_size = arg.get_primitive()->block_size;
+
+                auto& kernel_selector = kernel_selector::depth_to_space_kernel_selector::Instance();
+                auto best_kernels = kernel_selector.GetBestKernels(depth_to_space_params, depth_to_space_optional_params);
+
+                CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
+
+                auto depth_to_space = new depth_to_space_gpu(arg, best_kernels[0]);
+
+                return depth_to_space;
+            }
+        };
+
+        namespace
+        {
+            struct attach
+            {
+                attach()
+                {
+                    auto val_fw = depth_to_space_gpu::create;
+                    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+                    implementation_map<depth_to_space>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+                }
+                ~attach() = default;
+            };
+            attach attach_impl;
+        }
+    } //namespace cldnn
+} //namespace gpu
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp
new file mode 100644
index 000000000..dab69d1b6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/detection_output_cpu.cpp
@@ -0,0 +1,652 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "detection_output_inst.h"
+#include "kernel.h"
+#include "network_impl.h"
+#include "implementation_map.h"
+#include "math_utils.h"
+
+#include <algorithm>
+#include <stdexcept>
+#include <string>
+#include <type_traits>
+#include <xmmintrin.h>
+
+#ifdef FIX_OPENMP_RELEASE_ISSUE
+#ifdef OPENMP_FOUND
+#include <omp.h>
+#endif
+#endif
+
+namespace cldnn { namespace gpu {
+
+namespace {
+    struct bounding_box
+    {
+        float xmin;
+        float ymin;
+        float xmax;
+        float ymax;
+
+        bounding_box() : xmin(0), ymin(0), xmax(0), ymax(0) {}
+
+        bounding_box(const float xmin, const float ymin, const float xmax, const float ymax) :
+            xmin(xmin), ymin(ymin), xmax(xmax), ymax(ymax) {}
+
+        // Computes the area of a bounding box.
+        float area() const
+        {
+            return (xmax - xmin) * (ymax - ymin);
+        }
+    };
+}
+
+/************************ Detection Output CPU ************************/
+struct detection_output_cpu : typed_primitive_impl<detection_output>
+{
+    const detection_output_node& outer;
+
+    detection_output_cpu(const detection_output_node& outer)
+        : outer(outer)
+    {}
+
+    static void decode_bounding_box(
+        const bounding_box& prior_bbox, const std::array<float, PRIOR_BOX_SIZE>& prior_variance,
+        const prior_box_code_type code_type, const bool variance_encoded_in_target,
+        const bounding_box& bbox, bounding_box* decoded_bbox,
+        const bool prior_is_normalized, const size_t image_width, const size_t image_height, const bool clip_before_nms)
+    {
+        float prior_bbox_xmin = prior_bbox.xmin;
+        float prior_bbox_ymin = prior_bbox.ymin;
+        float prior_bbox_xmax = prior_bbox.xmax;
+        float prior_bbox_ymax = prior_bbox.ymax;
+
+        float bbox_xmin = bbox.xmin;
+        float bbox_ymin = bbox.ymin;
+        float bbox_xmax = bbox.xmax;
+        float bbox_ymax = bbox.ymax;
+
+        if (!prior_is_normalized) {
+            prior_bbox_xmin /= image_width;
+            prior_bbox_ymin /= image_height;
+            prior_bbox_xmax /= image_width;
+            prior_bbox_ymax /= image_height;
+        }
+
+        switch (code_type)
+        {
+            case prior_box_code_type::corner:
+            {
+                if (variance_encoded_in_target)
+                {
+                    // variance is encoded in target, we simply need to add the offset predictions.
+                    decoded_bbox->xmin = prior_bbox_xmin + bbox_xmin;
+                    decoded_bbox->ymin = prior_bbox_ymin + bbox_ymin;
+                    decoded_bbox->xmax = prior_bbox_xmax + bbox_xmax;
+                    decoded_bbox->ymax = prior_bbox_ymax + bbox_ymax;
+                }
+                else
+                {
+                    // variance is encoded in bbox, we need to scale the offset accordingly.
+                    decoded_bbox->xmin = prior_bbox_xmin + prior_variance[0] * bbox_xmin;
+                    decoded_bbox->ymin = prior_bbox_ymin + prior_variance[1] * bbox_ymin;
+                    decoded_bbox->xmax = prior_bbox_xmax + prior_variance[2] * bbox_xmax;
+                    decoded_bbox->ymax = prior_bbox_ymax + prior_variance[3] * bbox_ymax;
+                }
+                break;
+            }
+            case prior_box_code_type::center_size:
+            {
+                const float prior_width = prior_bbox_xmax - prior_bbox_xmin;
+                assert(prior_width > 0);
+                const float prior_height = prior_bbox_ymax - prior_bbox_ymin;
+                assert(prior_height > 0);
+                const float prior_center_x = (prior_bbox_xmin + prior_bbox_xmax) / 2.f;
+                const float prior_center_y = (prior_bbox_ymin + prior_bbox_ymax) / 2.f;
+                float decode_bbox_center_x, decode_bbox_center_y;
+                float decode_bbox_width, decode_bbox_height;
+                if (variance_encoded_in_target)
+                {
+                    // variance is encoded in target, we simply need to restore the offset predictions.
+                    decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;
+                    decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;
+                    decode_bbox_width = (exp(bbox_xmax) * prior_width);
+                    decode_bbox_height = (exp(bbox_ymax) * prior_height);
+                }
+                else
+                {
+                    // variance is encoded in bbox, we need to scale the offset accordingly.
+                    decode_bbox_center_x = prior_variance[0] * bbox_xmin * prior_width + prior_center_x;
+                    decode_bbox_center_y = prior_variance[1] * bbox_ymin * prior_height + prior_center_y;
+                    decode_bbox_width = (exp(prior_variance[2] * bbox_xmax) * prior_width);
+                    decode_bbox_height = (exp(prior_variance[3] * bbox_ymax) * prior_height);
+                }
+                decoded_bbox->xmin = decode_bbox_center_x - decode_bbox_width  / 2.0f;
+                decoded_bbox->ymin = decode_bbox_center_y - decode_bbox_height / 2.0f;
+                decoded_bbox->xmax = decode_bbox_center_x + decode_bbox_width  / 2.0f;
+                decoded_bbox->ymax = decode_bbox_center_y + decode_bbox_height / 2.0f;
+                break;
+            }
+            case prior_box_code_type::corner_size:
+            {
+                const float prior_width = prior_bbox_xmax - prior_bbox_xmin;
+                assert(prior_width > 0);
+                const float prior_height = prior_bbox_ymax - prior_bbox_ymin;
+                assert(prior_height > 0);
+                if (variance_encoded_in_target)
+                {
+                    // variance is encoded in target, we simply need to add the offset predictions.
+                    decoded_bbox->xmin = prior_bbox_xmin + bbox_xmin * prior_width;
+                    decoded_bbox->ymin = prior_bbox_ymin + bbox_ymin * prior_height;
+                    decoded_bbox->xmax = prior_bbox_xmax + bbox_xmax * prior_width;
+                    decoded_bbox->ymax = prior_bbox_ymax + bbox_ymax * prior_height;
+                }
+                else
+                {
+                    // variance is encoded in bbox, we need to scale the offset accordingly.
+                    decoded_bbox->xmin = prior_bbox_xmin + prior_variance[0] * bbox_xmin * prior_width;
+                    decoded_bbox->ymin = prior_bbox_ymin + prior_variance[1] * bbox_ymin * prior_height;
+                    decoded_bbox->xmax = prior_bbox_xmax + prior_variance[2] * bbox_xmax * prior_width;
+                    decoded_bbox->ymax = prior_bbox_ymax + prior_variance[3] * bbox_ymax * prior_height;
+                }
+                break;
+            }
+            default:
+            {
+                assert(0);
+            }
+        }
+
+        if (clip_before_nms)
+        {
+            decoded_bbox->xmin = std::max(0.0f, std::min(1.0f, decoded_bbox->xmin));
+            decoded_bbox->ymin = std::max(0.0f, std::min(1.0f, decoded_bbox->ymin));
+            decoded_bbox->xmax = std::max(0.0f, std::min(1.0f, decoded_bbox->xmax));
+            decoded_bbox->ymax = std::max(0.0f, std::min(1.0f, decoded_bbox->ymax));
+        }
+    }
+
+    static void apply_nms(const std::vector<bounding_box>& bboxes,
+        std::vector<std::pair<float,int>>& scores,
+        const float nms_threshold, const float eta, const int top_k)
+    {
+        // Sort the scores in descending order and keep top_k scores if needed.
+        if ((top_k != -1) && ((int)scores.size() > top_k))
+        {
+            std::partial_sort(scores.begin(), scores.begin() + top_k, scores.end(), [](const std::pair<float, int>& p1, const std::pair<float, int>& p2) { return (p1.first > p2.first) || (p1.first == p2.first && p1.second < p2.second); });
+            scores.resize(top_k);
+        }
+        else
+        {
+            std::stable_sort(scores.begin(), scores.end(), [](const std::pair<float, int>& p1, const std::pair<float, int>& p2) { return p1.first > p2.first; });
+        }
+
+        // NMS
+        float adaptive_threshold = nms_threshold;
+        int post_nms_count = 0;
+
+        for (auto score_index : scores)
+        {
+            const int idx = score_index.second;
+            bounding_box box1(bboxes[idx]);
+            bool keep = true;
+            for (int i = 0; i < post_nms_count; ++i)
+            {
+                if (!keep)
+                {
+                    break;
+                }
+                bounding_box box2(bboxes[scores[i].second]);
+                bool intersecting = (box1.xmin < box2.xmax) & (box2.xmin < box1.xmax) & (box1.ymin < box2.ymax) & (box2.ymin < box1.ymax);
+                float overlap = 0.0f;
+                if (intersecting)
+                {
+                    const float intersect_width = std::min(box1.xmax, box2.xmax) - std::max(box1.xmin, box2.xmin);
+                    const float intersect_height = std::min(box1.ymax, box2.ymax) - std::max(box1.ymin, box2.ymin);
+                    const float intersect_size = intersect_width * intersect_height;
+                    overlap = intersect_size / (box1.area() + box2.area() - intersect_size);
+                }
+                keep = (overlap <= adaptive_threshold);
+            }
+            if (keep)
+            {
+                scores[post_nms_count] = score_index;
+                ++post_nms_count;
+            }
+            if (keep && eta < 1 && adaptive_threshold > 0.5)
+            {
+                adaptive_threshold *= eta;
+            }
+        }
+        scores.resize(post_nms_count); // scores holds only the items that were kept after the NMS.
+    }
+
+    template<typename dtype>
+    void generate_detections(const detection_output_inst& instance, const int num_of_images, const std::vector<std::vector<std::vector<bounding_box>>>& all_bboxes, std::vector<std::vector<std::vector<std::pair<float,int>>>>& confidences)
+    {
+        mem_lock<dtype> lock{ instance.output_memory() };
+        auto out_ptr = lock.begin();
+
+        const auto& args = instance.argument;
+        std::vector<std::vector<std::vector<std::pair<float,int>>>> final_detections; // Per image -> For each label: Pair (score, prior index)
+        for (int image = 0; image < num_of_images; ++image)
+        {
+            const std::vector<std::vector<bounding_box> >& bboxes_per_image = all_bboxes[image];
+            std::vector<std::vector<std::pair<float,int>>>& conf_per_image = confidences[image];
+            int num_det = 0;
+#ifdef FIX_OPENMP_RELEASE_ISSUE
+#ifdef OPENMP_FOUND
+            int num_available_threads = omp_get_max_threads();
+            //half available threads usage shows the best perf results for both SKL (4c8t) and APL (4c4t) for this part of detection output
+            int num_threads_to_use = (omp_in_parallel() == 0) ? num_available_threads/2 : 1;
+            #pragma omp parallel for num_threads(num_threads_to_use) reduction(+:num_det)
+#endif
+#endif
+            for (int cls = 0; cls < (int)args.num_classes; ++cls)
+            {
+                if ((int)cls == args.background_label_id)
+                {
+                    conf_per_image[cls].clear();
+                    continue; // Skip background class.
+                }
+                std::vector<std::pair<float,int>>& scores = conf_per_image[cls];
+                const int label = args.share_location ? 0 : cls;
+                apply_nms(bboxes_per_image[label], scores, args.nms_threshold, args.eta, args.top_k);
+                num_det += (int)scores.size();
+            }
+            if (num_det > args.keep_top_k)
+            {
+                std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
+                score_index_pairs.reserve(num_det);
+                for (int label = 0; label < (int)args.num_classes; ++label)
+                {
+                    std::vector<std::pair<float, int>>& scores = confidences[image][label];
+                    for (std::pair<float, int> score_index : scores)
+                    {
+                        score_index_pairs.emplace_back(score_index.first, std::make_pair(label, score_index.second));
+                    }
+                }
+
+                // Keep top k results per image.
+                auto sort_function = [](const std::pair<float, std::pair<int, int>>& p1, const std::pair<float, std::pair<int, int>>& p2) { return p1.first > p2.first; };
+                if ((int)score_index_pairs.size() > args.keep_top_k)
+                {
+                    std::partial_sort(score_index_pairs.begin(), score_index_pairs.begin() + args.keep_top_k, score_index_pairs.end(), sort_function);
+                    score_index_pairs.resize(args.keep_top_k);
+                }
+                else
+                {
+                    std::sort(score_index_pairs.begin(), score_index_pairs.end(), sort_function);
+                }
+
+                // Store the new indices.
+                std::vector<std::vector<std::pair<float,int>>> new_indices(args.num_classes);
+                for (int j = 0; j < (int)score_index_pairs.size(); ++j)
+                {
+                    int label = score_index_pairs[j].second.first;
+                    int idx = score_index_pairs[j].second.second;
+                    new_indices[label].emplace_back(score_index_pairs[j].first, idx);
+                }
+                final_detections.emplace_back(new_indices);
+            }
+            else
+            {
+                final_detections.emplace_back(confidences[image]);
+            }
+        }
+
+        int count = 0;
+        for (int image = 0; image < num_of_images; ++image)
+        {
+            const std::vector<std::vector<bounding_box> >& bboxes_per_image = all_bboxes[image];
+            auto& final_detections_per_image = final_detections[image];
+            for (int label = 0; label < (int)final_detections_per_image.size(); ++label)
+            {
+                int loc_label = args.share_location ? 0 : label;
+                const std::vector<bounding_box>& bboxes = bboxes_per_image[loc_label];
+                const std::vector<std::pair<float,int>>& label_detections = final_detections_per_image[label];
+                for (std::pair<float,int> score_prior : label_detections)
+                {
+                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE] = (dtype)(float)image;
+                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 1] = args.decrease_label_id ? ((dtype)((float)label - 1.0f))
+                                                                                            : (dtype)(float)label;
+                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 2] = (dtype)score_prior.first;
+                    const bounding_box& bbox = bboxes[score_prior.second];
+                    float xmin = bbox.xmin;
+                    float ymin = bbox.ymin;
+                    float xmax = bbox.xmax;
+                    float ymax = bbox.ymax;
+
+                    if (args.clip_after_nms)
+                    {
+                        xmin = std::max(0.0f, std::min(1.0f, xmin));
+                        ymin = std::max(0.0f, std::min(1.0f, ymin));
+                        xmax = std::max(0.0f, std::min(1.0f, xmax));
+                        ymax = std::max(0.0f, std::min(1.0f, ymax));
+                    }
+
+                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 3] = (dtype)xmin;
+                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 4] = (dtype)ymin;
+                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 5] = (dtype)xmax;
+                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 6] = (dtype)ymax;
+                    ++count;
+                }
+            }
+        }
+
+        //In case number of detections is smaller than keep_top_k fill the rest of the buffer with invalid image id (-1).
+        while (count < num_of_images*args.keep_top_k)
+        {
+            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE] = (dtype)-1.f;
+            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 1] = (dtype)0.f;
+            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 2] = (dtype)0.f;
+            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 3] = (dtype)0.f;
+            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 4] = (dtype)0.f;
+            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 5] = (dtype)0.f;
+            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 6] = (dtype)0.f;
+            ++count;
+        }
+    }
+
+    // Compute the linear index taking the padding into account.
+    static inline int get_linear_feature_index(const int batch_id, const int feature_id, const int input_buffer_size_f, const int input_buffer_size_y,
+        const int input_buffer_size_x, const int input_padding_lower_y, const int input_padding_lower_x)
+    {
+        // This helper function assumes input layout with x_size = 1 and y_size = 1;
+        // Location and confidence inputs should be tensors with size {b,f,1,1}.
+        // This is validated in detection output primitive instance creation.
+
+        int input_idx = (batch_id * input_buffer_size_f + feature_id) * input_buffer_size_y * input_buffer_size_x;
+        input_idx += input_padding_lower_y * input_buffer_size_x + input_padding_lower_x;
+
+        return input_idx;
+    }
+
+    template<typename dtype>
+    void extract_locations_per_image(const detection_output_inst& instance, std::vector<std::vector<std::vector<bounding_box>>>& locations, const int num_of_priors, const int num_loc_classes)
+    {
+        const bool share_location = instance.argument.share_location;
+        auto& input_location = instance.location_memory();
+        const int num_of_images = (int)locations.size();
+
+        mem_lock<dtype> lock{ input_location };
+        auto location_data = lock.begin();
+
+        assert(num_of_priors * num_loc_classes * PRIOR_BOX_SIZE == input_location.get_layout().size.feature[0]);
+
+        const auto& input_buffer_size = input_location.get_layout().get_buffer_size();
+        const int input_buffer_size_x = input_buffer_size.spatial[0];
+        const int input_buffer_size_y = input_buffer_size.spatial[1];
+        const int input_buffer_size_f = input_buffer_size.feature[0];
+        const auto& input_padding = input_location.get_layout().data_padding;
+        const int input_padding_lower_x = input_padding.lower_size().spatial[0];
+        const int input_padding_lower_y = input_padding.lower_size().spatial[1];
+
+        for (int image = 0; image < num_of_images; ++image)
+        {
+            std::vector<std::vector<bounding_box>>& label_to_bbox = locations[image];
+            label_to_bbox.resize(num_loc_classes);
+            for (int cls = 0; cls < num_loc_classes; ++cls)
+            {
+                int label = share_location ? 0 : cls;
+                auto & bboxes = label_to_bbox[label];
+                bboxes.resize(num_of_priors);
+
+                for (int prior = 0; prior < num_of_priors; ++prior)
+                {
+                    int idx = prior * num_loc_classes * PRIOR_BOX_SIZE;
+                    bboxes[prior].xmin = (float)(location_data[get_linear_feature_index(image, idx + cls * PRIOR_BOX_SIZE, input_buffer_size_f, input_buffer_size_y,
+                                                                                        input_buffer_size_x, input_padding_lower_y, input_padding_lower_x)]);
+                    bboxes[prior].ymin = (float)(location_data[get_linear_feature_index(image, idx + cls * PRIOR_BOX_SIZE + 1, input_buffer_size_f, input_buffer_size_y,
+                                                                                        input_buffer_size_x, input_padding_lower_y, input_padding_lower_x)]);
+                    bboxes[prior].xmax = (float)(location_data[get_linear_feature_index(image, idx + cls * PRIOR_BOX_SIZE + 2, input_buffer_size_f, input_buffer_size_y,
+                                                                                        input_buffer_size_x, input_padding_lower_y, input_padding_lower_x)]);
+                    bboxes[prior].ymax = (float)(location_data[get_linear_feature_index(image, idx + cls * PRIOR_BOX_SIZE + 3, input_buffer_size_f, input_buffer_size_y,
+                                                                                        input_buffer_size_x, input_padding_lower_y, input_padding_lower_x)]);
+                }
+            }
+        }
+    }
+
+    template<typename dtype>
+    void extract_prior_boxes_and_variances(const detection_output_inst& instance, const bool variance_encoded_in_target,
+                                           const int32_t prior_info_size, const int32_t prior_coordinates_offset, const int32_t images_count,
+                                           std::vector<bounding_box>& prior_bboxes,  std::vector<std::array<float, PRIOR_BOX_SIZE>>& prior_variances)
+    {
+        auto& input_prior_box = instance.prior_box_memory();
+        const int num_of_priors = (int)prior_bboxes.size() / images_count;
+
+        mem_lock<dtype> lock{ input_prior_box };
+        for (int i = 0; i < images_count; i++)
+        {
+            auto prior_box_data = lock.begin() + i*num_of_priors*prior_info_size * (variance_encoded_in_target ? 1 : 2);
+
+            for (int prior = 0; prior < num_of_priors; ++prior)
+            {
+                int idx = prior * prior_info_size + prior_coordinates_offset;
+                prior_bboxes[i*num_of_priors + prior] = bounding_box((float)(prior_box_data[idx]), (float)(prior_box_data[idx + 1]), (float)(prior_box_data[idx + 2]), (float)(prior_box_data[idx + 3]));
+                idx += num_of_priors * prior_info_size;
+                for (int j = 0; j < PRIOR_BOX_SIZE; ++j)
+                {
+                    prior_variances[i*num_of_priors + prior][j] = variance_encoded_in_target ? 0.0f : (float)(prior_box_data[idx + j]);
+                }
+            }
+
+        }
+    }
+
+    template<typename dtype>
+    void extract_confidences_per_image(const detection_output_inst& instance, std::vector<std::vector<std::vector<std::pair<float,int>>>>& confidences, const int num_of_priors)
+    {
+        const int num_classes = instance.argument.num_classes;
+
+        const int num_of_images = (int)confidences.size();
+        auto& input_confidence = instance.confidence_memory();
+        const float confidence_threshold = instance.argument.confidence_threshold;
+
+        mem_lock<dtype> lock{ &input_confidence };
+        auto confidence_data = lock.begin();
+
+        assert(num_of_priors * num_classes == input_confidence.get_layout().size.feature[0]);
+
+        const auto& input_buffer_size = input_confidence.get_layout().get_buffer_size();
+        const int input_buffer_size_x = input_buffer_size.spatial[0];
+        const int input_buffer_size_y = input_buffer_size.spatial[1];
+        const int input_buffer_size_f = input_buffer_size.feature[0];
+        const auto& input_padding = input_confidence.get_layout().data_padding;
+        const int input_padding_lower_x = input_padding.lower_size().spatial[0];
+        const int input_padding_lower_y = input_padding.lower_size().spatial[1];
+        const int stride = input_buffer_size_y * input_buffer_size_x;
+
+        for (int image = 0; image < num_of_images; ++image)
+        {
+            std::vector<std::vector<std::pair<float,int>>>& label_to_scores = confidences[image];
+            label_to_scores.resize(num_classes);
+            int idx = get_linear_feature_index(image, 0, input_buffer_size_f, input_buffer_size_y,
+                input_buffer_size_x, input_padding_lower_y, input_padding_lower_x);
+
+            if (stride == 1 && std::is_same<dtype, float>::value)
+            {
+                float const* confidence_ptr_float = (float const*)(&(*confidence_data));
+                confidence_ptr_float += idx;
+                __m128 threshold = _mm_load_ps1(&confidence_threshold);
+                for (int prior = 0; prior < num_of_priors; ++prior)
+                {
+                    int cls = 0;
+                    for (; cls + 3 < num_classes; cls += 4)
+                    {
+                        __m128 scores = _mm_loadu_ps(confidence_ptr_float);
+                        confidence_ptr_float += 4;
+                        __m128i mask128 = _mm_castps_si128(_mm_cmpgt_ps(scores, threshold));
+                        if (_mm_testz_si128(mask128, mask128))
+                        {
+                            continue;
+                        }
+                        int mask = _mm_movemask_ps(_mm_castsi128_ps(mask128));
+                        if (mask & 1)
+                        {
+                            label_to_scores[cls + 0].emplace_back(_mm_cvtss_f32(scores), prior);
+                        }
+                        if (mask & 2)
+                        {
+                            int score = _mm_extract_ps(scores, 1);
+                            float s = reinterpret_cast<float&>(score);
+                            label_to_scores[cls + 1].emplace_back(s, prior);
+                        }
+                        if (mask & 4)
+                        {
+                            int score = _mm_extract_ps(scores, 2);
+                            float s = reinterpret_cast<float&>(score);
+                            label_to_scores[cls + 2].emplace_back(s, prior);
+                        }
+                        if (mask & 8)
+                        {
+                            int score = _mm_extract_ps(scores, 3);
+                            float s = reinterpret_cast<float&>(score);
+                            label_to_scores[cls + 3].emplace_back(s, prior);
+                        }
+                    }
+                    for (; cls < num_classes; ++cls)
+                    {
+                        float score = *confidence_ptr_float;
+                        if (score > confidence_threshold)
+                        {
+                            label_to_scores[cls].emplace_back(score, prior);
+                        }
+                        ++confidence_ptr_float;
+                    }
+                }
+            }
+            else
+            {
+                for (int prior = 0; prior < num_of_priors; ++prior)
+                {
+                    for (int cls = 0; cls < num_classes; ++cls)
+                    {
+                        float score = (float)confidence_data[idx];
+                        if (score > confidence_threshold)
+                        {
+                            label_to_scores[cls].emplace_back(score, prior);
+                        }
+                        idx += stride;
+                    }
+                }
+            }
+        }
+    }
+
+    template<typename dtype>
+    void prepare_data(const detection_output_inst& instance, std::vector<std::vector<std::vector<bounding_box>>> &bboxes, std::vector<std::vector<std::vector<std::pair<float, int>>>>& confidences)
+    {
+        assert(bboxes.size() == confidences.size());
+
+        const auto& args = instance.argument;
+
+        const int num_of_images = (int)bboxes.size();
+        const int num_of_priors = instance.prior_box_memory().get_layout().size.spatial[1] / args.prior_info_size;
+        const int num_loc_classes = args.share_location ? 1 : args.num_classes;
+
+        // Extract locations per image.
+        std::vector<std::vector<std::vector<bounding_box>>> locations(num_of_images); // Per image : label -> bounding boxes.
+        extract_locations_per_image<dtype>(instance, locations, num_of_priors, num_loc_classes);
+
+        int32_t batches_in_prior_boxes = instance.prior_box_memory().get_layout().size.batch[0];
+        std::vector<bounding_box> prior_bboxes(batches_in_prior_boxes*num_of_priors); // Prior-Boxes (identical for all images since we assume all images in a batch are of same dimension).
+        std::vector<std::array<float, PRIOR_BOX_SIZE>> prior_variances(batches_in_prior_boxes*num_of_priors); // Variances per prior-box (identical for all images since we assume all images in a batch are of same dimension).
+        extract_prior_boxes_and_variances<dtype>(instance, args.variance_encoded_in_target,
+                                                 args.prior_info_size, args.prior_coordinates_offset, batches_in_prior_boxes,
+                                                 prior_bboxes, prior_variances);
+
+        // Create the decoded bounding boxes according to locations predictions and prior-boxes.
+        for (int image = 0; image < num_of_images; ++image)
+        {
+            std::vector<std::vector<bounding_box>>& bboxes_per_image = bboxes[image];
+            bboxes_per_image.resize(num_loc_classes);
+            locations[image].resize(num_loc_classes);
+            for (int cls = 0; cls < num_loc_classes; ++cls)
+            {
+                const int label = args.share_location ? 0 : cls;
+                if (!args.share_location && label == args.background_label_id)
+                {
+                    continue; // Skip background class.
+                }
+                const std::vector<bounding_box>& label_loc_preds = locations[image][label];
+                int label_loc_preds_size = (int)label_loc_preds.size();
+
+                bboxes_per_image[label].clear();
+
+                for (int i = 0; i < label_loc_preds_size; ++i)
+                {
+                    bounding_box decoded_bbox;
+                    int32_t pb_offset = (batches_in_prior_boxes > 1) ? (image*num_of_priors + i) : i;
+                    int32_t var_offset = (batches_in_prior_boxes > 1) ? (image*num_of_priors + i) : i;
+                    decode_bounding_box(prior_bboxes[pb_offset], prior_variances[var_offset],
+                                        args.code_type, args.variance_encoded_in_target, label_loc_preds[i], &decoded_bbox,
+                                        args.prior_is_normalized, args.input_width, args.input_height, args.clip_before_nms);
+                    bboxes_per_image[label].emplace_back(decoded_bbox);
+                }
+            }
+        }
+
+        // Extract confidences per image.
+        extract_confidences_per_image<dtype>(instance, confidences, num_of_priors);
+    }
+
+    event_impl::ptr execute_impl(const std::vector<event_impl::ptr>& events, detection_output_inst& instance) override
+    {
+        for (auto& a : events)
+        {
+            a->wait();
+        }
+
+        auto ev = instance.get_network().get_engine().create_user_event(false);
+
+        const int num_of_images = instance.location_memory().get_layout().size.batch[0]; //batch size
+
+        std::vector<std::vector<std::vector<bounding_box>>> bboxes(num_of_images); // Per image : label -> decoded bounding boxes.
+        std::vector<std::vector<std::vector<std::pair<float, int>>>> confidences(num_of_images); // Per image : class -> confidences per bounding box.
+
+        if (instance.location_memory().get_layout().data_type == data_types::f32)
+        {
+            prepare_data<data_type_to_type<data_types::f32>::type>(instance, bboxes, confidences);
+
+            generate_detections<data_type_to_type<data_types::f32>::type>(instance, num_of_images, bboxes, confidences);
+        }
+        else
+        {
+            prepare_data<data_type_to_type<data_types::f16>::type>(instance, bboxes, confidences);
+
+            generate_detections<data_type_to_type<data_types::f16>::type>(instance, num_of_images, bboxes, confidences);
+        }
+
+        dynamic_cast<cldnn::user_event*>(ev.get())->set(); // set as complete
+        // TODO: consider refactoring create_user_event() to return cldnn::user_event*
+        return ev;
+    }
+
+    static primitive_impl* create(const detection_output_node& arg)
+    {
+        return new detection_output_cpu(arg);
+    }
+};
+
+primitive_impl* runDetectOutCpu(const detection_output_node& arg)
+{
+    return new detection_output_cpu(arg);
+}
+
+}}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/detection_output_gpu.cpp
index 55754a8e9..bfafd180d 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/detection_output_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/detection_output_gpu.cpp
@@ -15,17 +15,11 @@
 */
 
 #include "detection_output_inst.h"
-#include "kernel.h"
-#include "kd_selector.h"
-#include "network_impl.h"
-#include "implementation_map.h"
-#include "math_utils.h"
-
-#include <algorithm>
-#include <stdexcept>
-#include <string>
-#include <type_traits>
-#include <xmmintrin.h>
+#include "primitive_gpu_base.h"
+#include "error_handler.h"
+#include "kernel_selector_helper.h"
+#include "detection_output/detection_output_kernel_base.h"
+#include "detection_output/detection_output_kernel_selector.h"
 
 #ifdef FIX_OPENMP_RELEASE_ISSUE
 #ifdef OPENMP_FOUND
@@ -35,606 +29,134 @@
 
 namespace cldnn { namespace gpu {
 
-namespace {
-    struct bounding_box
-    {
-        float xmin;
-        float ymin;
-        float xmax;
-        float ymax;
-
-        bounding_box() : xmin(0), ymin(0), xmax(0), ymax(0) {}
-
-        bounding_box(const float xmin, const float ymin, const float xmax, const float ymax) : 
-            xmin(xmin), ymin(ymin), xmax(xmax), ymax(ymax) {}
-
-        // Computes the area of a bounding box.
-        float area() const
-        {
-            return (xmax - xmin) * (ymax - ymin);
-        }
-    };
-}
-
-struct detection_output_gpu : typed_primitive_impl<detection_output>
+struct detection_output_gpu : typed_primitive_gpu_impl<detection_output>
 {
-    const detection_output_node& outer;
-
-    detection_output_gpu(const detection_output_node& outer)
-        : outer(outer)
-    {}
+    using parent = typed_primitive_gpu_impl<detection_output>;
+    using parent::parent;
 
-    static void decode_bounding_box(
-        const bounding_box& prior_bbox, const std::array<float, PRIOR_BOX_SIZE>& prior_variance,
-        const prior_box_code_type code_type, const bool variance_encoded_in_target,
-        const bounding_box& bbox, bounding_box* decoded_bbox,
-        const bool prior_is_normalized, const size_t image_width, const size_t image_height, const bool clip)
+private:
+    static void setDetectOutSpecificParams(kernel_selector::detection_output_params::DedicatedParams& detectOutParams, const detection_output_node& arg)
     {
-        float prior_bbox_xmin = prior_bbox.xmin;
-        float prior_bbox_ymin = prior_bbox.ymin;
-        float prior_bbox_xmax = prior_bbox.xmax;
-        float prior_bbox_ymax = prior_bbox.ymax;
-
-        float bbox_xmin = bbox.xmin;
-        float bbox_ymin = bbox.ymin;
-        float bbox_xmax = bbox.xmax;
-        float bbox_ymax = bbox.ymax;
-
-        if (!prior_is_normalized) {
-            prior_bbox_xmin /= image_width;
-            prior_bbox_ymin /= image_height;
-            prior_bbox_xmax /= image_width;
-            prior_bbox_ymax /= image_height;
-        }
-
-        switch (code_type)
-        {
-            case prior_box_code_type::corner:
-            {
-                if (variance_encoded_in_target)
-                {
-                    // variance is encoded in target, we simply need to add the offset predictions.
-                    decoded_bbox->xmin = prior_bbox_xmin + bbox_xmin;
-                    decoded_bbox->ymin = prior_bbox_ymin + bbox_ymin;
-                    decoded_bbox->xmax = prior_bbox_xmax + bbox_xmax;
-                    decoded_bbox->ymax = prior_bbox_ymax + bbox_ymax;
-                }
-                else
-                {
-                    // variance is encoded in bbox, we need to scale the offset accordingly.
-                    decoded_bbox->xmin = prior_bbox_xmin + prior_variance[0] * bbox_xmin;
-                    decoded_bbox->ymin = prior_bbox_ymin + prior_variance[1] * bbox_ymin;
-                    decoded_bbox->xmax = prior_bbox_xmax + prior_variance[2] * bbox_xmax;
-                    decoded_bbox->ymax = prior_bbox_ymax + prior_variance[3] * bbox_ymax;
-                }
-                break;
-            }
-            case prior_box_code_type::center_size:
-            {
-                const float prior_width = prior_bbox_xmax - prior_bbox_xmin;
-                assert(prior_width > 0);
-                const float prior_height = prior_bbox_ymax - prior_bbox_ymin;
-                assert(prior_height > 0);
-                const float prior_center_x = (prior_bbox_xmin + prior_bbox_xmax) / 2.f;
-                const float prior_center_y = (prior_bbox_ymin + prior_bbox_ymax) / 2.f;
-                float decode_bbox_center_x, decode_bbox_center_y;
-                float decode_bbox_width, decode_bbox_height;
-                if (variance_encoded_in_target)
-                {
-                    // variance is encoded in target, we simply need to restore the offset predictions.
-                    decode_bbox_center_x = bbox_xmin * prior_width + prior_center_x;
-                    decode_bbox_center_y = bbox_ymin * prior_height + prior_center_y;
-                    decode_bbox_width = (exp(bbox_xmax) * prior_width);
-                    decode_bbox_height = (exp(bbox_ymax) * prior_height);
-                }
-                else
-                {
-                    // variance is encoded in bbox, we need to scale the offset accordingly.
-                    decode_bbox_center_x = prior_variance[0] * bbox_xmin * prior_width + prior_center_x;
-                    decode_bbox_center_y = prior_variance[1] * bbox_ymin * prior_height + prior_center_y;
-                    decode_bbox_width = (exp(prior_variance[2] * bbox_xmax) * prior_width);
-                    decode_bbox_height = (exp(prior_variance[3] * bbox_ymax) * prior_height);
-                }
-                decoded_bbox->xmin = decode_bbox_center_x - decode_bbox_width  / 2.0f;
-                decoded_bbox->ymin = decode_bbox_center_y - decode_bbox_height / 2.0f;
-                decoded_bbox->xmax = decode_bbox_center_x + decode_bbox_width  / 2.0f;
-                decoded_bbox->ymax = decode_bbox_center_y + decode_bbox_height / 2.0f;
-                break;
-            }
-            case prior_box_code_type::corner_size:
-            {
-                const float prior_width = prior_bbox_xmax - prior_bbox_xmin;
-                assert(prior_width > 0);
-                const float prior_height = prior_bbox_ymax - prior_bbox_ymin;
-                assert(prior_height > 0);
-                if (variance_encoded_in_target)
-                {
-                    // variance is encoded in target, we simply need to add the offset predictions.
-                    decoded_bbox->xmin = prior_bbox_xmin + bbox_xmin * prior_width;
-                    decoded_bbox->ymin = prior_bbox_ymin + bbox_ymin * prior_height;
-                    decoded_bbox->xmax = prior_bbox_xmax + bbox_xmax * prior_width;
-                    decoded_bbox->ymax = prior_bbox_ymax + bbox_ymax * prior_height;
-                }
-                else
-                {
-                    // variance is encoded in bbox, we need to scale the offset accordingly.
-                    decoded_bbox->xmin = prior_bbox_xmin + prior_variance[0] * bbox_xmin * prior_width;
-                    decoded_bbox->ymin = prior_bbox_ymin + prior_variance[1] * bbox_ymin * prior_height;
-                    decoded_bbox->xmax = prior_bbox_xmax + prior_variance[2] * bbox_xmax * prior_width;
-                    decoded_bbox->ymax = prior_bbox_ymax + prior_variance[3] * bbox_ymax * prior_height;
-                }
-                break;
-            }
-            default:
-            {
-                assert(0);
-            }
-        }
-
-        if (clip)
-        {
-            decoded_bbox->xmin = std::max(0.0f, std::min(1.0f, decoded_bbox->xmin));
-            decoded_bbox->ymin = std::max(0.0f, std::min(1.0f, decoded_bbox->ymin));
-            decoded_bbox->xmax = std::max(0.0f, std::min(1.0f, decoded_bbox->xmax));
-            decoded_bbox->ymax = std::max(0.0f, std::min(1.0f, decoded_bbox->ymax));
-        }
+        auto primitive = arg.get_primitive();
+        detectOutParams.keep_top_k = primitive->keep_top_k;
+        detectOutParams.num_classes = primitive->num_classes;
+        detectOutParams.top_k = primitive->top_k;
+        detectOutParams.background_label_id = primitive->background_label_id;
+        detectOutParams.code_type = (int32_t)primitive->code_type;
+        detectOutParams.share_location = primitive->share_location;
+        detectOutParams.variance_encoded_in_target = primitive->variance_encoded_in_target;
+        detectOutParams.nms_threshold = primitive->nms_threshold;
+        detectOutParams.eta = primitive->eta;
+        detectOutParams.confidence_threshold = primitive->confidence_threshold;
+        detectOutParams.prior_coordinates_offset = primitive->prior_coordinates_offset;
+        detectOutParams.prior_info_size = primitive->prior_info_size;
+        detectOutParams.prior_is_normalized = primitive->prior_is_normalized;
+        detectOutParams.input_width = primitive->input_width;
+        detectOutParams.input_heigh = primitive->input_height;
+        detectOutParams.conf_size_x = arg.confidence().get_output_layout().get_buffer_size().spatial[0];
+        detectOutParams.conf_size_y = arg.confidence().get_output_layout().get_buffer_size().spatial[1];
+        detectOutParams.conf_padding_x = arg.confidence().get_output_layout().data_padding.lower_size().spatial[0];
+        detectOutParams.conf_padding_y = arg.confidence().get_output_layout().data_padding.lower_size().spatial[1];
     }
 
-    static void apply_nms(const std::vector<bounding_box>& bboxes,
-        std::vector<std::pair<float,int>>& scores,
-        const float nms_threshold, const float eta, const int top_k) 
-    {
-        // Sort the scores in descending order and keep top_k scores if needed.
-        if ((top_k != -1) && ((int)scores.size() > top_k))
-        {
-            std::partial_sort(scores.begin(), scores.begin() + top_k, scores.end(), [](const std::pair<float, int>& p1, const std::pair<float, int>& p2) { return (p1.first > p2.first) || (p1.first == p2.first && p1.second < p2.second); });
-            scores.resize(top_k);
-        }
-        else
-        {
-            std::stable_sort(scores.begin(), scores.end(), [](const std::pair<float, int>& p1, const std::pair<float, int>& p2) { return p1.first > p2.first; });
-        }
-
-        // NMS
-        float adaptive_threshold = nms_threshold;
-        int post_nms_count = 0;
 
-        for (auto score_index : scores)
-        {
-            const int idx = score_index.second;
-            bounding_box box1(bboxes[idx]);
-            bool keep = true;
-            for (int i = 0; i < post_nms_count; ++i)
-            {
-                if (!keep)
-                {
-                    break;
-                }
-                bounding_box box2(bboxes[scores[i].second]);
-                bool intersecting = (box1.xmin < box2.xmax) & (box2.xmin < box1.xmax) & (box1.ymin < box2.ymax) & (box2.ymin < box1.ymax);
-                float overlap = 0.0f;
-                if (intersecting)
-                {
-                    const float intersect_width = std::min(box1.xmax, box2.xmax) - std::max(box1.xmin, box2.xmin);
-                    const float intersect_height = std::min(box1.ymax, box2.ymax) - std::max(box1.ymin, box2.ymin);
-                    const float intersect_size = intersect_width * intersect_height;
-                    overlap = intersect_size / (box1.area() + box2.area() - intersect_size);
-                }
-                keep = (overlap <= adaptive_threshold);
-            }
-            if (keep)
-            {
-                scores[post_nms_count] = score_index;
-                ++post_nms_count;
-            }
-            if (keep && eta < 1 && adaptive_threshold > 0.5)
-            {
-                adaptive_threshold *= eta;
-            }
-        }
-        scores.resize(post_nms_count); // scores holds only the items that were kept after the NMS.
-    }
+public:
 
-    template<typename dtype>
-    void generate_detections(const detection_output_inst& instance, const int num_of_images, const std::vector<std::vector<std::vector<bounding_box>>>& all_bboxes, std::vector<std::vector<std::vector<std::pair<float,int>>>>& confidences)
+    static primitive_impl* create(const detection_output_node& arg)
     {
-        mem_lock<dtype> lock{ instance.output_memory() };
-        auto out_ptr = lock.begin();
-
-        const auto& args = instance.argument;
-        std::vector<std::vector<std::vector<std::pair<float,int>>>> final_detections; // Per image -> For each label: Pair (score, prior index)
-        for (int image = 0; image < num_of_images; ++image)
-        {
-            const std::vector<std::vector<bounding_box> >& bboxes_per_image = all_bboxes[image];
-            std::vector<std::vector<std::pair<float,int>>>& conf_per_image = confidences[image];
-            int num_det = 0;
-#ifdef FIX_OPENMP_RELEASE_ISSUE
-#ifdef OPENMP_FOUND
-            int num_available_threads = omp_get_max_threads();
-            //half available threads usage shows the best perf results for both SKL (4c8t) and APL (4c4t) for this part of detection output
-            int num_threads_to_use = (omp_in_parallel() == 0) ? num_available_threads/2 : 1;
-            #pragma omp parallel for num_threads(num_threads_to_use) reduction(+:num_det)
-#endif
-#endif
-            for (int cls = 0; cls < (int)args.num_classes; ++cls)
-            {
-                if ((int)cls == args.background_label_id)
-                {
-                    conf_per_image[cls].clear();
-                    continue; // Skip background class.
-                }
-                std::vector<std::pair<float,int>>& scores = conf_per_image[cls];
-                const int label = args.share_location ? 0 : cls;
-                apply_nms(bboxes_per_image[label], scores, args.nms_threshold, args.eta, args.top_k);
-                num_det += (int)scores.size();
-            }
-            if (num_det > args.keep_top_k)
-            {
-                std::vector<std::pair<float, std::pair<int, int>>> score_index_pairs;
-                score_index_pairs.reserve(num_det);
-                for (int label = 0; label < (int)args.num_classes; ++label)
-                {
-                    std::vector<std::pair<float, int>>& scores = confidences[image][label];
-                    for (std::pair<float, int> score_index : scores)
-                    {
-                        score_index_pairs.emplace_back(score_index.first, std::make_pair(label, score_index.second));
-                    }
-                }
-
-                // Keep top k results per image.
-                auto sort_function = [](const std::pair<float, std::pair<int, int>>& p1, const std::pair<float, std::pair<int, int>>& p2) { return p1.first > p2.first; };
-                if ((int)score_index_pairs.size() > args.keep_top_k)
-                {
-                    std::partial_sort(score_index_pairs.begin(), score_index_pairs.begin() + args.keep_top_k, score_index_pairs.end(), sort_function);
-                    score_index_pairs.resize(args.keep_top_k);
-                }
-                else
-                {
-                    std::sort(score_index_pairs.begin(), score_index_pairs.end(), sort_function);
-                }
-
-                // Store the new indices.
-                std::vector<std::vector<std::pair<float,int>>> new_indices(args.num_classes);
-                for (int j = 0; j < (int)score_index_pairs.size(); ++j)
-                {
-                    int label = score_index_pairs[j].second.first;
-                    int idx = score_index_pairs[j].second.second;
-                    new_indices[label].emplace_back(score_index_pairs[j].first, idx);
-                }
-                final_detections.emplace_back(new_indices);
-            }
-            else
-            {
-                final_detections.emplace_back(confidences[image]);
-            }
-        }
-
-        int count = 0;
-        for (int image = 0; image < num_of_images; ++image)
-        {
-            const std::vector<std::vector<bounding_box> >& bboxes_per_image = all_bboxes[image];
-            auto& final_detections_per_image = final_detections[image];
-            for (int label = 0; label < (int)final_detections_per_image.size(); ++label)
-            {
-                int loc_label = args.share_location ? 0 : label;
-                const std::vector<bounding_box>& bboxes = bboxes_per_image[loc_label];
-                const std::vector<std::pair<float,int>>& label_detections = final_detections_per_image[label];
-                for (std::pair<float,int> score_prior : label_detections)
-                {
-                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE] = (dtype)(float)image;
-                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 1] = args.decrease_label_id ? ((dtype)((float)label - 1.0f))
-                                                                                            : (dtype)(float)label;
-                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 2] = (dtype)score_prior.first;
-                    const bounding_box& bbox = bboxes[score_prior.second];
-                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 3] = (dtype)bbox.xmin;
-                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 4] = (dtype)bbox.ymin;
-                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 5] = (dtype)bbox.xmax;
-                    out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 6] = (dtype)bbox.ymax;
-                    ++count;
-                }
-            }
-        }
-
-        //In case number of detections is smaller than keep_top_k fill the rest of the buffer with invalid image id (-1).
-        while (count < num_of_images*args.keep_top_k)
+        if (!arg.get_program().get_options().get<build_option_type::detection_output_gpu>()->enabled())
         {
-            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE] = (dtype)-1.f;
-            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 1] = (dtype)0.f;
-            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 2] = (dtype)0.f;
-            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 3] = (dtype)0.f;
-            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 4] = (dtype)0.f;
-            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 5] = (dtype)0.f;
-            out_ptr[count * DETECTION_OUTPUT_ROW_SIZE + 6] = (dtype)0.f;
-            ++count;
+            return runDetectOutCpu(arg);
         }
-    }
 
-    // Compute the linear index taking the padding into account.
-    static inline int get_linear_feature_index(const int batch_id, const int feature_id, const int input_buffer_size_f, const int input_buffer_size_y, 
-        const int input_buffer_size_x, const int input_padding_lower_y, const int input_padding_lower_x)
-    {
-        // This helper function assumes input layout with x_size = 1 and y_size = 1;
-        // Location and confidence inputs should be tensors with size {b,f,1,1}.
-        // This is validated in detection output primitive instance creation.
+        auto detect_out_params = get_default_params<kernel_selector::detection_output_params>(arg);
+        auto detect_out_optional_params = get_default_optional_params<kernel_selector::detection_output_optional_params>(arg.get_program());
 
-        int input_idx = (batch_id * input_buffer_size_f + feature_id) * input_buffer_size_y * input_buffer_size_x;
-        input_idx += input_padding_lower_y * input_buffer_size_x + input_padding_lower_x;
+        setDetectOutSpecificParams(detect_out_params.detectOutParams, arg);
 
-        return input_idx;
-    }
+        auto& kernel_selector = kernel_selector::detection_output_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(detect_out_params, detect_out_optional_params);
 
-    template<typename dtype>
-    void extract_locations_per_image(const detection_output_inst& instance, std::vector<std::vector<std::vector<bounding_box>>>& locations, const int num_of_priors, const int num_loc_classes)
-    {
-        const bool share_location = instance.argument.share_location;
-        auto& input_location = instance.location_memory();
-        const int num_of_images = (int)locations.size();
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
 
-        mem_lock<dtype> lock{ input_location };
-        auto location_data = lock.begin();
+        auto detect_out = new detection_output_gpu(arg, best_kernels[0]);
 
-        assert(num_of_priors * num_loc_classes * PRIOR_BOX_SIZE == input_location.get_layout().size.feature[0]);
-
-        const auto& input_buffer_size = input_location.get_layout().get_buffer_size();
-        const int input_buffer_size_x = input_buffer_size.spatial[0];
-        const int input_buffer_size_y = input_buffer_size.spatial[1];
-        const int input_buffer_size_f = input_buffer_size.feature[0];
-        const auto& input_padding = input_location.get_layout().data_padding;
-        const int input_padding_lower_x = input_padding.lower_size().spatial[0];
-        const int input_padding_lower_y = input_padding.lower_size().spatial[1];
-
-        for (int image = 0; image < num_of_images; ++image)
-        {
-            std::vector<std::vector<bounding_box>>& label_to_bbox = locations[image];
-            label_to_bbox.resize(num_loc_classes);
-            for (int cls = 0; cls < num_loc_classes; ++cls)
-            {
-                int label = share_location ? 0 : cls;
-                auto & bboxes = label_to_bbox[label];
-                bboxes.resize(num_of_priors);
-
-                for (int prior = 0; prior < num_of_priors; ++prior)
-                {
-                    int idx = prior * num_loc_classes * PRIOR_BOX_SIZE;
-                    bboxes[prior].xmin = (float)(location_data[get_linear_feature_index(image, idx + cls * PRIOR_BOX_SIZE, input_buffer_size_f, input_buffer_size_y,
-                                                                                        input_buffer_size_x, input_padding_lower_y, input_padding_lower_x)]);
-                    bboxes[prior].ymin = (float)(location_data[get_linear_feature_index(image, idx + cls * PRIOR_BOX_SIZE + 1, input_buffer_size_f, input_buffer_size_y,
-                                                                                        input_buffer_size_x, input_padding_lower_y, input_padding_lower_x)]);
-                    bboxes[prior].xmax = (float)(location_data[get_linear_feature_index(image, idx + cls * PRIOR_BOX_SIZE + 2, input_buffer_size_f, input_buffer_size_y,
-                                                                                        input_buffer_size_x, input_padding_lower_y, input_padding_lower_x)]);
-                    bboxes[prior].ymax = (float)(location_data[get_linear_feature_index(image, idx + cls * PRIOR_BOX_SIZE + 3, input_buffer_size_f, input_buffer_size_y,
-                                                                                        input_buffer_size_x, input_padding_lower_y, input_padding_lower_x)]);
-                }   
-            }
-        }
+        return detect_out;
     }
+};
 
-    template<typename dtype>
-    void extract_prior_boxes_and_variances(const detection_output_inst& instance, const bool variance_encoded_in_target,
-                                           const int32_t prior_info_size, const int32_t prior_coordinates_offset,
-                                           std::vector<bounding_box>& prior_bboxes,  std::vector<std::array<float, PRIOR_BOX_SIZE>>& prior_variances)
-    {
-        auto& input_prior_box = instance.prior_box_memory();
-        const int num_of_priors = (int)prior_bboxes.size();
+primitive_impl* runDetectOutGpu(const detection_output_node& arg, kernel_selector::KernelData kernel)
+{
+    return new detection_output_gpu(arg, kernel);
+}
 
-        mem_lock<dtype> lock{ input_prior_box };
-        auto prior_box_data = lock.begin();
+/************************ Detection Output keep_top_k part ************************/
 
-        for (int prior = 0; prior < num_of_priors; ++prior)
-        {
-            int idx = prior * prior_info_size + prior_coordinates_offset;
-            prior_bboxes[prior] = bounding_box((float)(prior_box_data[idx]), (float)(prior_box_data[idx + 1]), (float)(prior_box_data[idx + 2]), (float)(prior_box_data[idx + 3]));
-            idx += num_of_priors * prior_info_size;
-            for (int j = 0; j < PRIOR_BOX_SIZE; ++j)
-            {
-                prior_variances[prior][j] = variance_encoded_in_target ? 0.0f : (float)(prior_box_data[idx + j]);
-            }
-        }
-    }
+struct detection_output_sort_gpu : typed_primitive_gpu_impl<detection_output_sort>
+{
+    using parent = typed_primitive_gpu_impl<detection_output_sort>;
+    using parent::parent;
 
-    template<typename dtype>
-    void extract_confidences_per_image(const detection_output_inst& instance, std::vector<std::vector<std::vector<std::pair<float,int>>>>& confidences, const int num_of_priors)
+private:
+    static void setDetectOutSpecificParams(kernel_selector::detection_output_params::DedicatedParams& detectOutParams, const detection_output_sort_node& arg)
     {
-        const int num_classes = instance.argument.num_classes;
-
-        const int num_of_images = (int)confidences.size();
-        auto& input_confidence = instance.confidence_memory();
-        const float confidence_threshold = instance.argument.confidence_threshold;
-
-        mem_lock<dtype> lock{ &input_confidence };
-        auto confidence_data = lock.begin();
-
-        assert(num_of_priors * num_classes == input_confidence.get_layout().size.feature[0]);
-
-        const auto& input_buffer_size = input_confidence.get_layout().get_buffer_size();
-        const int input_buffer_size_x = input_buffer_size.spatial[0];
-        const int input_buffer_size_y = input_buffer_size.spatial[1];
-        const int input_buffer_size_f = input_buffer_size.feature[0];
-        const auto& input_padding = input_confidence.get_layout().data_padding;
-        const int input_padding_lower_x = input_padding.lower_size().spatial[0];
-        const int input_padding_lower_y = input_padding.lower_size().spatial[1];
-        const int stride = input_buffer_size_y * input_buffer_size_x;
-
-        for (int image = 0; image < num_of_images; ++image)
+        if (arg.get_dependency(0).is_type<detection_output>())
         {
-            std::vector<std::vector<std::pair<float,int>>>& label_to_scores = confidences[image];
-            label_to_scores.resize(num_classes);
-            int idx = get_linear_feature_index(image, 0, input_buffer_size_f, input_buffer_size_y,
-                input_buffer_size_x, input_padding_lower_y, input_padding_lower_x);
-            
-            if (stride == 1 && std::is_same<dtype, float>::value)
-            {
-                float const* confidence_ptr_float = (float const*)(&(*confidence_data));
-                confidence_ptr_float += idx;
-                __m128 threshold = _mm_load_ps1(&confidence_threshold);
-                for (int prior = 0; prior < num_of_priors; ++prior)
-                {
-                    int cls = 0;
-                    for (; cls + 3 < num_classes; cls += 4)
-                    {
-                        __m128 scores = _mm_loadu_ps(confidence_ptr_float);
-                        confidence_ptr_float += 4;
-                        __m128i mask128 = _mm_castps_si128(_mm_cmpgt_ps(scores, threshold));
-                        if (_mm_testz_si128(mask128, mask128))
-                        {
-                            continue;
-                        }
-                        int mask = _mm_movemask_ps(_mm_castsi128_ps(mask128));
-                        if (mask & 1)
-                        {
-                            label_to_scores[cls + 0].emplace_back(_mm_cvtss_f32(scores), prior);
-                        }
-                        if (mask & 2)
-                        {
-                            int score = _mm_extract_ps(scores, 1);
-                            float s = reinterpret_cast<float&>(score);
-                            label_to_scores[cls + 1].emplace_back(s, prior);
-                        }
-                        if (mask & 4)
-                        {
-                            int score = _mm_extract_ps(scores, 2);
-                            float s = reinterpret_cast<float&>(score);
-                            label_to_scores[cls + 2].emplace_back(s, prior);
-                        }
-                        if (mask & 8)
-                        {
-                            int score = _mm_extract_ps(scores, 3);
-                            float s = reinterpret_cast<float&>(score);
-                            label_to_scores[cls + 3].emplace_back(s, prior);
-                        }
-                    }
-                    for (; cls < num_classes; ++cls)
-                    {
-                        float score = *confidence_ptr_float;
-                        if (score > confidence_threshold)
-                        {
-                            label_to_scores[cls].emplace_back(score, prior);
-                        }
-                        ++confidence_ptr_float;
-                    }
-                }
-            }
-            else
-            {
-                for (int prior = 0; prior < num_of_priors; ++prior)
-                {
-                    for (int cls = 0; cls < num_classes; ++cls)
-                    {
-                        float score = (float)confidence_data[idx];
-                        if (score > confidence_threshold)
-                        {
-                            label_to_scores[cls].emplace_back(score, prior);
-                        }
-                        idx += stride;
-                    }
-                }
-            }
+            auto primitive = arg.get_dependency(0).as<detection_output>().get_primitive();
+            detectOutParams.keep_top_k = primitive->keep_top_k;
+            detectOutParams.num_classes = primitive->num_classes;
+            detectOutParams.num_images = arg.get_dependency(0).as<detection_output>().location().get_output_layout().size.batch[0];
+            detectOutParams.top_k = primitive->top_k;
+            detectOutParams.share_location = primitive->share_location;
+            detectOutParams.background_label_id = primitive->background_label_id;
         }
-    }
-
-    template<typename dtype>
-    void prepare_data(const detection_output_inst& instance, std::vector<std::vector<std::vector<bounding_box>>> &bboxes, std::vector<std::vector<std::vector<std::pair<float, int>>>>& confidences)
-    {
-        assert(bboxes.size() == confidences.size());
-
-        const auto& args = instance.argument;
-
-        const int num_of_images = (int)bboxes.size();
-        const int num_of_priors = instance.prior_box_memory().get_layout().size.spatial[1] / args.prior_info_size;
-        const int num_loc_classes = args.share_location ? 1 : args.num_classes;
-
-        // Extract locations per image.
-        std::vector<std::vector<std::vector<bounding_box>>> locations(num_of_images); // Per image : label -> bounding boxes.
-        extract_locations_per_image<dtype>(instance, locations, num_of_priors, num_loc_classes);
-
-        // Extract prior boxes - same within a batch.
-        std::vector<bounding_box> prior_bboxes(num_of_priors); // Prior-Boxes (identical for all images since we assume all images in a batch are of same dimension).
-        std::vector<std::array<float, PRIOR_BOX_SIZE>> prior_variances(num_of_priors); // Variances per prior-box (identical for all images since we assume all images in a batch are of same dimension).
-        extract_prior_boxes_and_variances<dtype>(instance, args.variance_encoded_in_target,
-                                                 args.prior_info_size, args.prior_coordinates_offset,
-                                                 prior_bboxes, prior_variances);
-
-        // Create the decoded bounding boxes according to locations predictions and prior-boxes. 
-        for (int image = 0; image < num_of_images; ++image)
+        else
         {
-            std::vector<std::vector<bounding_box>>& bboxes_per_image = bboxes[image];
-            bboxes_per_image.resize(num_loc_classes);
-            locations[image].resize(num_loc_classes);
-            for (int cls = 0; cls < num_loc_classes; ++cls)
-            {
-                const int label = args.share_location ? 0 : cls;
-                if (!args.share_location && label == args.background_label_id)
-                {
-                    continue; // Skip background class.
-                }
-                const std::vector<bounding_box>& label_loc_preds = locations[image][label];
-                int label_loc_preds_size = (int)label_loc_preds.size();
-                assert((int)prior_bboxes.size() == label_loc_preds_size);
-                
-                bboxes_per_image[label].clear();
-
-                for (int i = 0; i < label_loc_preds_size; ++i)
-                {
-                    bounding_box decoded_bbox;
-                    decode_bounding_box(prior_bboxes[i], prior_variances[i], args.code_type, args.variance_encoded_in_target, label_loc_preds[i], &decoded_bbox,
-                                        args.prior_is_normalized, args.input_width, args.input_height, args.clip);
-                    bboxes_per_image[label].emplace_back(decoded_bbox);
-                }
-            }
+            auto primitive = arg.get_primitive();
+            detectOutParams.keep_top_k = primitive->keep_top_k;
+            detectOutParams.num_classes = primitive->num_classes;
+            detectOutParams.num_images = primitive->num_images;
+            detectOutParams.top_k = primitive->top_k;
+            detectOutParams.share_location = primitive->share_location;
+            detectOutParams.background_label_id = primitive->background_label_id;
         }
-
-        // Extract confidences per image.
-        extract_confidences_per_image<dtype>(instance, confidences, num_of_priors);
     }
 
-    event_impl::ptr execute_impl(const std::vector<event_impl::ptr>& events, detection_output_inst& instance) override
+public:
+    static primitive_impl* create(const detection_output_sort_node& arg)
     {
-        for (auto& a : events) 
-        {
-            a->wait();
-        }
-
-        auto ev = instance.get_network().get_engine().create_user_event(false);
-
-        const int num_of_images = instance.location_memory().get_layout().size.batch[0]; //batch size
+        auto detect_out_params = get_default_params<kernel_selector::detection_output_params>(arg);
+        auto detect_out_optional_params = get_default_optional_params<kernel_selector::detection_output_optional_params>(arg.get_program());
 
-        std::vector<std::vector<std::vector<bounding_box>>> bboxes(num_of_images); // Per image : label -> decoded bounding boxes.
-        std::vector<std::vector<std::vector<std::pair<float, int>>>> confidences(num_of_images); // Per image : class -> confidences per bounding box.
+        setDetectOutSpecificParams(detect_out_params.detectOutParams, arg);
 
-        if (instance.location_memory().get_layout().data_type == data_types::f32)
-        {
-            prepare_data<data_type_to_type<data_types::f32>::type>(instance, bboxes, confidences);
-
-            generate_detections<data_type_to_type<data_types::f32>::type>(instance, num_of_images, bboxes, confidences);
-        }
-        else
-        {
-            prepare_data<data_type_to_type<data_types::f16>::type>(instance, bboxes, confidences);
+        auto& kernel_selector = kernel_selector::detection_output_sort_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(detect_out_params, detect_out_optional_params);
 
-            generate_detections<data_type_to_type<data_types::f16>::type>(instance, num_of_images, bboxes, confidences);
-        }
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
 
-        dynamic_cast<cldnn::user_event*>(ev.get())->set(); // set as complete
-        // TODO: consider refactoring create_user_event() to return cldnn::user_event*
-        return ev;
-    }
+        auto detect_out = new detection_output_sort_gpu(arg, best_kernels[0]);
 
-    static primitive_impl* create(const detection_output_node& arg)
-    {
-        return new detection_output_gpu(arg);
+        return detect_out;
     }
 };
 
+primitive_impl* runDetectOutSortGpu(const detection_output_sort_node& arg, kernel_selector::KernelData kernel)
+{
+    return new detection_output_sort_gpu(arg, kernel);
+}
+
 namespace {
     struct attach {
-        attach()
-        {
+        attach() {
             implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), detection_output_gpu::create);
             implementation_map<detection_output>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), detection_output_gpu::create);
+            implementation_map<detection_output_sort>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), detection_output_sort_gpu::create);
+            implementation_map<detection_output_sort>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), detection_output_sort_gpu::create);
         }
-
         ~attach() {}
     };
     attach attach_impl;
 }
+
 }}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
index 4833983fa..5219fe971 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/eltwise_gpu.cpp
@@ -1,5 +1,9 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+<<<<<<< HEAD
+// Copyright (c) 2019 Intel Corporation
+=======
+// Copyright (c) 2016-2019 Intel Corporation
+>>>>>>> 0473785... Eltwise operation added: equal, not_equal, less, less_equal, greater, greater_equal, logical_and, logical_or, logical_xor, that produce int output with 0 and 1 values
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -26,7 +30,7 @@ namespace cldnn { namespace gpu {
 
 namespace
 {
-    inline kernel_selector::eltwise_mode convect_to_eltwise_mode(eltwise_mode mode)
+    inline kernel_selector::eltwise_mode convert_to_eltwise_mode(eltwise_mode mode)
     {
         switch (mode)
         {
@@ -38,6 +42,16 @@ namespace
         case eltwise_mode::min: return kernel_selector::eltwise_mode::MIN;
         case eltwise_mode::pow: return kernel_selector::eltwise_mode::POW;
         case eltwise_mode::mod: return kernel_selector::eltwise_mode::MODULU;
+        case eltwise_mode::eq: return kernel_selector::eltwise_mode::EQ;
+        case eltwise_mode::ne: return kernel_selector::eltwise_mode::NE;
+        case eltwise_mode::lt: return kernel_selector::eltwise_mode::LT;
+        case eltwise_mode::le: return kernel_selector::eltwise_mode::LE;
+        case eltwise_mode::gt: return kernel_selector::eltwise_mode::GT;
+        case eltwise_mode::ge: return kernel_selector::eltwise_mode::GE;
+        case eltwise_mode::logic_and: return kernel_selector::eltwise_mode::LOGIC_AND;
+        case eltwise_mode::logic_or: return kernel_selector::eltwise_mode::LOGIC_OR;
+        case eltwise_mode::logic_xor: return kernel_selector::eltwise_mode::LOGIC_XOR;
+        case eltwise_mode::squared_diff: return kernel_selector::eltwise_mode::SQUARED_DIFF;
         default:
             return kernel_selector::eltwise_mode::ADD;
         }
@@ -58,8 +72,8 @@ protected:
     }
 
 public:
-    static primitive_impl* create(const eltwise_node& arg) 
-    { 
+    static primitive_impl* create(const eltwise_node& arg)
+    {
         auto ew_params = get_default_params<kernel_selector::eltwise_params>(arg);
         auto ew_optional_params = get_default_optional_params<kernel_selector::eltwise_optional_params>(arg.get_program());
 
@@ -70,17 +84,17 @@ public:
 
         const auto& primitive = arg.get_primitive();
         if(primitive->with_activation)
-            convert_activation_func_params(primitive, ew_params);
+            convert_activation_func_params(primitive, ew_params.activation);
 
-        ew_params.operations.push_back({ 
+        ew_params.operations.push_back({
             { kernel_selector::eltwise_params::InputType::Buffer(0), kernel_selector::eltwise_params::InputType::Buffer(1) },
-            convect_to_eltwise_mode(primitive->mode) });
+            convert_to_eltwise_mode(primitive->mode) });
 
         for (uint32_t i = 2; i < static_cast<uint32_t>(arg.inputs_count()); i++)
         {
             ew_params.operations.push_back({{ kernel_selector::eltwise_params::InputType::Intermediate(i-2),
                                                             kernel_selector::eltwise_params::InputType::Buffer(i) },
-                                                            convect_to_eltwise_mode(primitive->mode) });
+                                                            convert_to_eltwise_mode(primitive->mode) });
         }
 
         if (primitive->mode == eltwise_mode::sum)
@@ -91,7 +105,53 @@ public:
         for (size_t i = 0; i < ew_params.inputs.size(); i++)
         {
             if (!ew_params.inputs[i].SameDims(ew_params.output))
-                ew_params.layoutBased = true;
+            {
+                std::vector<int32_t> input_size  = arg.input(i).get_output_layout().size.raw.vector();
+                std::vector<int32_t> output_size = arg.get_output_layout().size.raw.vector();
+                bool broadcast = false;
+                for (size_t d = 0; d < output_size.size(); d++)
+                {
+                    if (output_size[d] != 1 || input_size[d] == 1)
+                        broadcast = true;
+                }
+                if (broadcast)
+                {
+                    ew_params.broadcast = true;
+                    break;
+                }
+                else
+                {
+                    ew_params.layoutBased = true;
+                    break;
+                }
+
+            }
+        }
+
+        // stride
+        if (!primitive->stride.empty())
+        {
+            const auto& stride = primitive->stride;
+            ew_params.stride.resize(stride.size());
+            for (size_t i = 0; i < primitive->stride.size(); i++)
+            {
+                ew_params.stride[i] = { (uint32_t)stride[i].spatial[0], (uint32_t)stride[i].spatial[1] };
+            }
+        }
+
+        // check if strides are the same
+        if(!ew_params.stride.empty())
+        {
+            const auto& stride = ew_params.stride[0];
+            for (size_t i = 1; i < ew_params.stride.size(); i++)
+            {
+                if (stride.x != ew_params.stride[i].x || stride.y != ew_params.stride[i].y)
+                    ew_params.layoutBased = true;
+            }
+        }
+        else if (!ew_params.inputs[0].SameDimsSizes(ew_params.inputs[1]))
+        {
+            ew_params.broadcast = true;
         }
 
         if (primitive->output_calibration_factors.size() > 0 || primitive->output_quantization_factor != 1.0f)
@@ -139,7 +199,9 @@ namespace {
                 { std::make_tuple(engine_types::ocl, data_types::i64, format::byxf), eltwise_gpu::create },
                 // MMAD
                 { std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), eltwise_gpu::create },
-                { std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), eltwise_gpu::create }
+                { std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), eltwise_gpu::create },
+                { std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), eltwise_gpu::create },
+                { std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), eltwise_gpu::create }
             });
         }
         ~attach() {}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/engine_info.cpp b/inference-engine/thirdparty/clDNN/src/gpu/engine_info.cpp
index 73d20d633..e693bef9f 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/engine_info.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/engine_info.cpp
@@ -18,6 +18,10 @@
 #include <unordered_map>
 #include <string>
 #include <cassert>
+#include <time.h>
+#include <limits>
+#include <chrono>
+#include "istreamwrapper.h"
 
 #ifdef _WIN32
 #define WIN32_LEAN_AND_MEAN
@@ -25,10 +29,18 @@
 #include <SetupAPI.h>
 #include <devguid.h>
 #include <cstring>
-#elif defined(__linux__)
-#include <fstream>
+#else
+#include <unistd.h>
+#include <limits.h>
+#include <link.h>
+#include <dlfcn.h>
 #endif
+
+#include <fstream>
 #include <iostream>
+#include <utility>
+
+
 namespace cldnn { namespace gpu{
 
 namespace {
@@ -118,40 +130,55 @@ std::string to_string_hex(int val)
     return std::string("0x") + &buf[i];
 }
 
-struct device_info
-{
-    engine_info_internal::models model;
-    engine_info_internal::architectures arch;
-    engine_info_internal::configurations config;
-    std::string code;
-};
-
 #include "mode.inc"
 
-const device_info& get_device_info(int device_id)
-{
-#define GEN_DEVICE(code, dev_id, model, arch, conf) { dev_id, {engine_info_internal::model, engine_info_internal::arch, engine_info_internal::conf, #code} },
-    static const std::unordered_map<int, device_info> device_map
+std::shared_ptr<rapidjson::Document> get_cache_from_file(uint32_t compute_units_count, const gpu_toolkit& context) {
+    std::string tuning_cache_path = context.get_configuration().tuning_cache_path;
+    if (tuning_cache_path.compare("cache.json") == 0)
     {
-#include "gpu_devices.inc"
-    };
-#undef GEN_DEVICE
-    
-    auto it = device_map.find(device_id);
-    if (it == device_map.end())
+#ifdef _WIN32
+        char path[MAX_PATH];
+        HMODULE hm = NULL;
+        GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
+                          GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                          (LPCSTR)&get_cache_from_file, &hm);
+        GetModuleFileName(hm, path, sizeof(path));
+        std::string bin_path(path);
+        tuning_cache_path = bin_path.substr(0, bin_path.find_last_of("\\")) + "\\cache.json";
+#else
+        Dl_info dl_info;
+        dladdr((void*)device_info_failed_msg, &dl_info);
+        std::string bin_path(dl_info.dli_fname);
+        tuning_cache_path = bin_path.substr(0, bin_path.find_last_of("/")) + "/cache.json";
+#endif
+    }
+    rapidjson::Document cacheFile;
+    rapidjson::Document cacheDeviceData;
+    auto computeUnits = std::to_string(compute_units_count);
+    std::ifstream f(tuning_cache_path);
+    if (f.good())
     {
-        if (public_caps)
+        rapidjson::IStreamWrapper isw{ f };
+        cacheFile.ParseStream(isw);
+        auto errorCode = cacheFile.GetParseError();
+        if (!cacheFile.HasMember(computeUnits.c_str()) && errorCode == 0)
+        {
+            computeUnits = "24";
+        }
+        if (cacheFile.HasMember(computeUnits.c_str()) && errorCode == 0)
         {
-            throw std::runtime_error(std::string(device_info_failed_msg) + " - unsupported device id: " + to_string_hex(device_id) + ". Note: HD5xx+ devices are supported");
+            cacheDeviceData.CopyFrom(cacheFile[computeUnits.c_str()], cacheDeviceData.GetAllocator());
         }
         else
         {
-            std::cerr << "[WARNING]. Device ID (" << to_string_hex(device_id) << ") not supported. Pretending to behave like SKL GT2." << std::endl;
-            int new_device_id = 6433;
-            return device_map.at(new_device_id);
+            cacheDeviceData.Parse("{}");
         }
     }
-    return device_map.at(device_id);
+    else
+    {
+        cacheDeviceData.Parse("{}");
+    }
+    return std::make_shared < rapidjson::Document>(std::move(cacheDeviceData));
 }
 
 } // namespace <anonymous>
@@ -160,13 +187,17 @@ engine_info_internal::engine_info_internal(const gpu_toolkit& context)
 {
     auto device_id = get_gpu_device_id();
     if (0 == device_id) throw std::runtime_error(device_info_failed_msg);
-    auto& dev_info = get_device_info(device_id);
-    model = dev_info.model;
-    architecture = dev_info.arch;
-    configuration = dev_info.config;
     dev_id = to_string_hex(device_id);
     driver_version = context.device().getInfo<CL_DRIVER_VERSION>();
 
+    compute_units_count = context.device().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
+    try {
+        device_cache = get_cache_from_file(compute_units_count, context);
+    }
+    catch (...){
+        std::cout << "[WARNING] error during parsing cache file, tuning data won't be used" << std::endl;
+        device_cache->Parse("{}");
+    }
     cores_count = static_cast<uint32_t>(context.device().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>());
     core_frequency = static_cast<uint32_t>(context.device().getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>());
 
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/engine_info.h b/inference-engine/thirdparty/clDNN/src/gpu/engine_info.h
index 4ad7d46e5..384eae885 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/engine_info.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/engine_info.h
@@ -15,69 +15,22 @@
 */
 #pragma once
 #include <cstdint>
+#include <memory>
 #include "api/CPP/engine.hpp"
+#include "document.h"
 
-namespace cldnn { namespace gpu {
+
+namespace cldnn {
+    namespace gpu {
 
 class gpu_toolkit;
 struct engine_info_internal : cldnn::engine_info
 {
-    #ifdef GPU_CONFIGURATION
-        #undef GPU_CONFIGURATION
-    #endif
-    #ifdef GPU_MODEL
-        #undef GPU_MODEL
-    #endif
-    #ifdef GPU_ARCHITECTURE
-        #undef GPU_ARCHITECTURE
-    #endif
-
-
-    enum configurations
-    {
-        #define GPU_CONFIGURATION(enum_name, enum_value) enum_name = enum_value,
-        #define GPU_MODEL(enum_name, enum_value)
-        #define GPU_ARCHITECTURE(enum_name, enum_value)
-        #include "gpu_enums.inc"
-        #undef GPU_CONFIGURATION
-        #undef GPU_MODEL
-        #undef GPU_ARCHITECTURE
-    };
-
-    
-
-    enum models
-    {
-        #define GPU_CONFIGURATION(enum_name, enum_value)
-        #define GPU_MODEL(enum_name, enum_value) enum_name = enum_value,
-        #define GPU_ARCHITECTURE(enum_name, enum_value)
-        #include "gpu_enums.inc"
-        #undef GPU_CONFIGURATION
-        #undef GPU_MODEL
-        #undef GPU_ARCHITECTURE
-    };
-
-    
-
-    enum architectures
-    {
-        #define GPU_CONFIGURATION(enum_name, enum_value)
-        #define GPU_MODEL(enum_name, enum_value)
-        #define GPU_ARCHITECTURE(enum_name, enum_value) enum_name = enum_value,
-        #include "gpu_enums.inc"
-        #undef GPU_CONFIGURATION
-        #undef GPU_MODEL
-        #undef GPU_ARCHITECTURE
-    };
-
-    #undef GPU_CONFIGURATION
-
-
-    configurations configuration;
-    models model;
-    architectures architecture;
     std::string dev_id;
     std::string driver_version;
+    std::uint32_t compute_units_count;
+    std::shared_ptr<rapidjson::Document> device_cache; 
+
 private:
     friend class gpu_toolkit;
     explicit engine_info_internal(const gpu_toolkit& context);
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/events_pool.h b/inference-engine/thirdparty/clDNN/src/gpu/events_pool.h
new file mode 100644
index 000000000..11a0e37a8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/events_pool.h
@@ -0,0 +1,139 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "refcounted_obj.h"
+#include "event_impl.h"
+#include "meta_utils.h"
+#include <iostream>
+
+namespace cldnn {
+    namespace gpu {
+
+        class gpu_toolkit;
+
+        template<typename Type,
+            typename U = typename std::enable_if<
+            meta::is_any_of<Type, base_event, user_event, base_events>::value>::type>
+        class event_pool_impl
+        {
+        protected:
+            event_pool_impl() = default;
+
+            using type = Type;
+
+            event_impl::ptr get_from_pool(std::shared_ptr<gpu_toolkit>& ctx)
+            {
+                for (auto& ev : _events)
+                {
+                    if (!ev->is_valid())
+                        return ev;
+                }
+                return allocate({ new Type(ctx), false });
+            }
+
+            void reset_events()
+            {
+                for (auto& ev : _events)
+                    ev->reset();
+            }
+
+        private:
+            std::vector<event_impl::ptr> _events;
+
+            event_impl::ptr allocate(const event_impl::ptr& obj)
+            {
+                _events.emplace_back(obj);
+                return _events.back();
+            }
+        };
+
+        struct base_event_pool : event_pool_impl<base_event>
+        {
+            event_impl::ptr get(std::shared_ptr<gpu_toolkit>& ctx, const cl::Event& ev, const uint64_t q_stamp)
+            {
+                auto ret = get_from_pool(ctx);
+                dynamic_cast<type*>(ret.get())->attach_ocl_event(ev, q_stamp);
+                return ret;
+            }
+            void reset()
+            {
+                reset_events();
+            }
+        };
+
+        struct user_event_pool : event_pool_impl<user_event>
+        {
+            event_impl::ptr get(std::shared_ptr<gpu_toolkit>& ctx, bool set = false)
+            {
+                auto ret = get_from_pool(ctx);
+                dynamic_cast<type*>(ret.get())->attach_event(set);
+                return ret;
+            }
+            void reset()
+            {
+                reset_events();
+            }
+        };
+
+        struct group_event_pool : event_pool_impl<base_events>
+        {
+            event_impl::ptr get(std::shared_ptr<gpu_toolkit>& ctx, const std::vector<event_impl::ptr>& deps)
+            {
+                auto ret_ev = get_from_pool(ctx);
+                dynamic_cast<type*>(ret_ev.get())->attach_events(deps);
+                return ret_ev;
+            }
+            void reset()
+            {
+                reset_events();
+            }
+        };
+
+        class events_pool
+        {
+        public:
+            events_pool() = default;
+
+            event_impl::ptr get_from_base_pool(std::shared_ptr<gpu_toolkit> ctx, const cl::Event& ev, const uint64_t q_stamp)
+            {
+                return _base_pool.get(ctx, ev, q_stamp);
+            }
+
+            event_impl::ptr get_from_user_pool(std::shared_ptr<gpu_toolkit> ctx, bool set = false)
+            {
+                return _user_pool.get(ctx, set);
+            }
+
+            event_impl::ptr get_from_group_pool(std::shared_ptr<gpu_toolkit> ctx, const std::vector<event_impl::ptr>& deps)
+            {
+                return _group_pool.get(ctx, deps);
+            }
+
+            void reset_events()
+            {
+                _base_pool.reset();
+                _user_pool.reset();
+                _group_pool.reset();
+            }
+
+        private:
+            base_event_pool _base_pool;
+            user_event_pool _user_pool;
+            group_event_pool _group_pool;
+        };
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/events_waiter.h b/inference-engine/thirdparty/clDNN/src/gpu/events_waiter.h
index ca3a8ac73..d16b56e97 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/events_waiter.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/events_waiter.h
@@ -16,9 +16,8 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
-#include "api/CPP/profiling.hpp"
-#include "ocl_user_event.h"
 #include "ocl_toolkit.h"
+#include "event_impl.h"
 
 namespace cldnn { namespace gpu {
 class events_waiter : public context_holder
@@ -29,12 +28,6 @@ public:
 
     event_impl::ptr run(const std::vector<event_impl::ptr>& dependencies)
     {
-        if (dependencies.size() == 0)
-        {
-            auto ev = new gpu::user_event(context(), true);
-            return{ ev, false };
-        }
-
         if (dependencies.size() == 1)
             return dependencies[0];
 
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp
index 517a73249..649460183 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_gpu.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -92,7 +92,7 @@ public:
         fc_optional_params.allowInputReordering = true;
 
         if(arg.get_primitive()->with_activation)
-            convert_activation_func_params(arg.get_primitive(), fc_params);
+            convert_activation_func_params(arg.get_primitive(), fc_params.activation);
 
         fc_params.output = fc_params.output.FlattenFeatureAndSpatials();
 
@@ -154,6 +154,9 @@ namespace {
                 // MMAD
                 { std::make_tuple(engine_types::ocl, data_types::i8,  format::byxf_af32), val_fw },
                 { std::make_tuple(engine_types::ocl, data_types::i8,  format::fs_bs_yx_bsv4_fsv32), val_fw },
+                // IMAD
+                { std::make_tuple(engine_types::ocl, data_types::i8,  format::b_fs_yx_fsv4), val_fw },
+                { std::make_tuple(engine_types::ocl, data_types::u8,  format::b_fs_yx_fsv4), val_fw },
             });
         }
         ~attach() {}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_grad_weights_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_grad_weights_gpu.cpp
index fb22ef0a4..89a32e1b0 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_grad_weights_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/fully_connected_grad_weights_gpu.cpp
@@ -33,9 +33,9 @@ struct fully_connected_grad_weights_gpu : typed_primitive_gpu_impl<fully_connect
 
 protected:
 
-    virtual bool validate(typed_primitive_inst<fully_connected_grad_weights>& instance) const override
+    virtual bool validate_impl(const typed_primitive_inst<fully_connected_grad_weights>& instance) const override
     {
-        bool res = parent::validate(instance);
+        bool res = true;
 
         if (instance.use_momentum())
         {
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_bn_scale_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_bn_scale_gpu.cpp
new file mode 100644
index 000000000..f51ae8eec
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_bn_scale_gpu.cpp
@@ -0,0 +1,166 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_bn_scale_inst.h"
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "error_handler.h"
+#include "kernel_selector_helper.h"
+#include "kernel_runner.h"
+#include "fused_conv_bn_scale/fused_conv_bn_scale_kernel_selector.h"
+#include "fused_conv_bn_scale/fused_conv_bn_scale_kernel_base.h"
+
+namespace cldnn { namespace gpu {
+
+struct fused_conv_bn_scale_gpu : typed_primitive_gpu_impl<fused_conv_bn_scale>
+{
+    using parent = typed_primitive_gpu_impl<fused_conv_bn_scale>;
+    using parent::parent;
+
+protected:
+
+    virtual bool validate_impl(const typed_primitive_inst<fused_conv_bn_scale>& instance) const override
+    {
+        bool res = true;
+
+        // Check whether all memory elements use the same unit type (FP16 or FP32).
+        CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.node.input().get_output_layout().data_type, "output memory", instance.node.get_output_layout().data_type, "");
+        CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.node.input().get_output_layout().data_type, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
+
+        return res;
+    }
+
+    virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<fused_conv_bn_scale>& instance, int32_t split) const override
+    {
+        kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
+        auto desc = std::static_pointer_cast<const fused_conv_bn_scale>(instance.desc());
+
+        args.weights              = &instance.weights_memory(split);
+        args.bias                 = instance.bias_term() ? &instance.bias_memory(split) : nullptr;
+
+        if (!desc->scale_bias.empty())
+        {
+            if (instance.is_fused_in_training())
+            {
+                args.inputs.push_back(&instance.dep_memory(instance.dependencies().size() - 4));
+                args.inputs.push_back(&instance.dep_memory(instance.dependencies().size() - 3));
+                args.inputs.push_back(&instance.dep_memory(instance.dependencies().size() - 2));
+                args.inputs.push_back(&instance.dep_memory(instance.dependencies().size() - 1));
+            }
+            else
+            {
+                args.inputs.push_back(&instance.dep_memory(instance.dependencies().size() - 1));
+            }
+        }
+        else if (instance.is_fused_in_training())
+        {
+            args.inputs.push_back(&instance.dep_memory(instance.dependencies().size() - 3));
+            args.inputs.push_back(&instance.dep_memory(instance.dependencies().size() - 2));
+            args.inputs.push_back(&instance.dep_memory(instance.dependencies().size() - 1));
+        }
+
+        return args;
+    }
+
+    virtual int32_t get_split() const override
+    { 
+        return _outer.get_split(); 
+    }
+
+public:
+
+    static primitive_impl* create(const fused_conv_bn_scale_node &arg)
+    {
+        const auto& primitive       = arg.get_primitive();
+        const auto& input_layout    = arg.input().get_output_layout();
+        const auto& weights_layout  = arg.weights(0).get_output_layout();
+        const auto& weights_size    = weights_layout.size;
+
+        const auto& split           = primitive->split();
+        const auto& stride          = primitive->stride;
+        const auto& input_offset    = primitive->input_offset;
+        const auto& dilation        = primitive->dilation;
+
+        assert(arg.get_output_layout().size.feature[0] / primitive->split() == weights_layout.size.batch[0]);
+
+        auto fuse_params = get_weights_bias_default_params<kernel_selector::fused_conv_bn_scale_params>(arg, split);
+        auto fuse_optional_params = get_default_weights_bias_optional_params<kernel_selector::fused_conv_bn_scale_optional_params>(arg.get_program());
+
+        const auto additional_offset = tensor::max(input_offset, 0);
+        if (additional_offset != 0)
+        {
+            fuse_params.inputs[0] = convert_data_tensor(input_layout, split, additional_offset);
+        }
+
+        fuse_params.epsilon = arg.get_primitive()->epsilon;
+
+        fuse_params.fused_in_training = arg.is_fused_in_training();
+        fuse_params.scale_bias = arg.scale_bias_term();
+
+        if(primitive->with_activation)
+            convert_activation_func_params(primitive, fuse_params.activation);
+
+        fuse_params.split = split;
+        fuse_params.filterSize = {
+            (uint32_t)weights_size.spatial[0],
+            (uint32_t)weights_size.spatial[1],
+        };
+
+        fuse_params.padding = {
+            (uint32_t)std::max(-input_offset.spatial[0], 0),
+            (uint32_t)std::max(-input_offset.spatial[1], 0)
+        };
+
+        fuse_params.stride = {
+            (uint32_t)stride.spatial[0],
+            (uint32_t)stride.spatial[1]
+        };
+
+        fuse_params.dilation = {
+            (uint32_t)dilation.spatial[0],
+            (uint32_t)dilation.spatial[1]
+        };
+
+        auto& kernel_selector = kernel_selector::fused_conv_bn_scale_kernel_selector::Instance();
+
+        const auto& tuning_config = arg.get_program().get_options().get<build_option_type::tuning_config>();
+
+        if (tuning_config->config.mode == tuning_mode::tuning_tune_and_cache)
+        {
+            fuse_optional_params.tuningParams.runner = std::make_shared<gpu::kernel_runner>(arg.get_program().get_engine(), true);
+        }
+
+        kernel_selector::KernelsData best_kernels = kernel_selector.GetBestKernels(fuse_params, fuse_optional_params);
+		
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
+
+        auto fuse = new fused_conv_bn_scale_gpu(arg, best_kernels[0]);
+
+        return fuse;
+    }
+};
+
+namespace{
+    struct attach {
+        attach() {
+            implementation_map<fused_conv_bn_scale>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), fused_conv_bn_scale_gpu::create);
+            implementation_map<fused_conv_bn_scale>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), fused_conv_bn_scale_gpu::create);
+        }
+        ~attach() {}
+    };
+    attach attach_impl;
+}
+} }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp
new file mode 100644
index 000000000..ea619ba49
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/fused_conv_eltwise_gpu.cpp
@@ -0,0 +1,214 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "fused_conv_eltwise_inst.h"
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "error_handler.h"
+#include "kernel_selector_helper.h"
+#include "kernel_runner.h"
+#include "fused_conv_eltwise/fused_conv_eltwise_kernel_selector.h"
+#include "fused_conv_eltwise/fused_conv_eltwise_kernel_base.h"
+
+namespace cldnn { namespace gpu {
+
+struct fused_conv_eltwise_gpu : typed_primitive_gpu_impl<fused_conv_eltwise>
+{
+    using parent = typed_primitive_gpu_impl<fused_conv_eltwise>;
+    using parent::parent;
+
+protected:
+
+    virtual bool validate_impl(const typed_primitive_inst<fused_conv_eltwise>& instance) const override
+    {
+        bool res = true;
+
+        auto outer_id = _outer.id();
+        auto data_type = instance.node.input().get_output_layout().data_type;
+
+        // Check whether all memory elements use the same unit type (FP16 or FP32).
+        CLDNN_ERROR_DATA_TYPES_MISMATCH(outer_id, "Input memory", data_type, "output memory", instance.node.get_output_layout().data_type, "");
+        CLDNN_ERROR_DATA_TYPES_MISMATCH(outer_id, "Input memory", data_type, "filter memory", instance.weights_memory(0).get_layout().data_type, "");
+
+        return res;
+    }
+
+    virtual kernel::kernel_arguments_data get_arguments(typed_primitive_inst<fused_conv_eltwise>& instance, int32_t split) const override
+    {
+        kernel::kernel_arguments_data args = parent::get_arguments(instance, split);
+
+        args.weights              = &instance.weights_memory(split);
+        args.bias                 = instance.bias_term() ? &instance.bias_memory(split) : nullptr;
+        args.weights_quantization_factors = instance.weights_quantization_factors_term() ? &instance.weights_quantization_factors_memory(split) : nullptr;
+        args.output_calibration_factors = instance.conv_output_calibration_factors_term() ? &instance.output_calibration_factors_memory(split) : nullptr;
+        if (instance.eltw_output_calibration_factors_term())
+            args.fused_op_calibration_factors.push_back(&instance.eltw_output_calibration_factors_memory());
+        return args;
+    }
+
+    virtual int32_t get_split() const override
+    { 
+        return _outer.get_split(); 
+    }
+
+public:
+
+    static primitive_impl* create(const fused_conv_eltwise_node &arg)
+    {
+        const auto& primitive       = arg.get_primitive();
+        const auto& input_layout    = arg.input().get_output_layout();
+        const auto& weights_layout  = arg.weights(0).get_output_layout();
+        const auto& weights_size    = weights_layout.size;
+
+        const auto& split           = primitive->split();
+        const auto& stride          = primitive->conv.stride;
+        const auto& dilation        = primitive->conv.dilation;
+        const auto& input_offset    = primitive->conv.input_offset;
+
+        const auto depthwise_separable_opt = arg.get_depthwise_sep_opt();
+        const auto actual_split = depthwise_separable_opt ? (decltype(split))1 : split;
+
+        const auto transposed = arg.get_transposed();
+
+        assert(arg.get_output_layout().size.feature[0] / primitive->split() == weights_layout.size.batch[0]);
+
+        // conv params
+        auto fused_params = get_weights_bias_default_params<kernel_selector::fused_conv_eltwise_params>(arg, actual_split);
+        // add second input for eltwise
+        if (!static_cast<const fused_conv_eltwise*>(arg.get_primitive().get())->second_input_in_output)
+        {
+            fused_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
+        }
+
+        auto& conv_params = fused_params.conv;
+        auto& eltw_params = fused_params.eltw;
+
+        auto conv_optional_params = get_default_weights_bias_optional_params<kernel_selector::fused_conv_eltwise_optional_params>(arg.get_program());
+
+        const auto additional_offset = tensor::max(input_offset, 0);
+        if (additional_offset != 0)
+        {
+            fused_params.inputs[0] = convert_data_tensor(input_layout, actual_split, additional_offset);
+        }
+
+        if (primitive->conv.with_activation)
+        {
+            convert_activation_func_params(&primitive->conv, fused_params.activation);
+        }
+        if (primitive->eltw.with_activation)
+        {
+            convert_activation_func_params(&primitive->eltw, fused_params.eltw.activation);
+        }
+
+        fused_params.conv.depthwise_separable_opt = depthwise_separable_opt;
+        fused_params.conv.transposed = transposed;
+
+        fused_params.second_input_in_output = primitive->second_input_in_output;
+
+        conv_params.local_convolution = weights_size.local[0] > 1 || weights_size.local[1] > 1;
+        conv_params.split = split;
+        conv_params.filterSize = {
+            (uint32_t)weights_size.spatial[0],
+            (uint32_t)weights_size.spatial[1],
+        };
+
+        conv_params.padding = {
+            (uint32_t)std::max(-input_offset.spatial[0], 0),
+            (uint32_t)std::max(-input_offset.spatial[1], 0)
+        };
+
+        conv_params.stride = {
+            (uint32_t)stride.spatial[0],
+            (uint32_t)stride.spatial[1]
+        };
+        conv_params.dilation = {
+            (uint32_t)dilation.spatial[0],
+            (uint32_t)dilation.spatial[1]
+        };
+        
+        if (primitive->conv.weights_quantization_factors.size() > 0)
+        {
+            conv_params.int8_quantization = true;
+            conv_params.weights_quantization_factors.push_back(convert_data_tensor(arg.weights_quantization_factors().get_output_layout()).FlattenFeatureAndSpatials());
+            conv_params.input_quantization_factor = arg.get_conv_input_qf();
+
+            if (primitive->conv.output_calibration_factors.size() > 0)
+            {
+                conv_params.output_calibration = true;
+                conv_params.output_calibration_factors.push_back(convert_data_tensor(arg.conv_output_calibration_factors().get_output_layout()).FlattenFeatureAndSpatials());
+            }
+            else
+                conv_params.output_quantization_factor = arg.get_conv_output_qf();
+        }
+
+        // eltw params
+        if (primitive->eltw.output_calibration_factors.size() > 0 || primitive->eltw.output_quantization_factor != 1.0f)
+        {
+            eltw_params.int8_quantization = true;
+
+            if (primitive->eltw.output_calibration_factors.size() > 0)
+            {
+                eltw_params.output_calibration = true;
+                eltw_params.output_calibration_factors.push_back(convert_data_tensor(arg.eltw_output_calibration_factors().get_output_layout()).FlattenFeatureAndSpatials());
+            }
+            else
+                eltw_params.output_quantization_factor = arg.get_eltw_output_qf();
+        }
+
+        // stride
+        if (!primitive->eltw.stride.empty())
+        {
+            const auto& eltw_stride = primitive->eltw.stride;
+            eltw_params.stride.resize(eltw_stride.size());
+            for (size_t i = 0; i < primitive->eltw.stride.size(); i++)
+            {
+                eltw_params.stride[i] = { (uint32_t)eltw_stride[i].spatial[0], (uint32_t)eltw_stride[i].spatial[1] };
+            }
+        }
+
+        auto& kernel_selector = kernel_selector::fused_conv_eltwise_kernel_selector::Instance();
+
+        const auto& tuning_config = arg.get_program().get_options().get<build_option_type::tuning_config>();
+
+        if (tuning_config->config.mode == tuning_mode::tuning_tune_and_cache)
+        {
+            conv_optional_params.tuningParams.runner = std::make_shared<gpu::kernel_runner>(arg.get_program().get_engine(), true);
+        }
+
+        kernel_selector::KernelsData best_kernels = kernel_selector.GetBestKernels(fused_params, conv_optional_params);
+		
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
+
+        auto conv = new fused_conv_eltwise_gpu(arg, best_kernels[0]);
+
+        return conv;
+    }
+};
+
+namespace{
+    struct attach {
+        attach() {
+            implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), fused_conv_eltwise_gpu::create);
+            implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::yxfb), fused_conv_eltwise_gpu::create);
+            implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), fused_conv_eltwise_gpu::create);
+            // MMAD
+            implementation_map<fused_conv_eltwise>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), fused_conv_eltwise_gpu::create);
+        }
+        ~attach() {}
+    };
+    attach attach_impl;
+}
+} }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp
new file mode 100644
index 000000000..776246f95
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/gather_gpu.cpp
@@ -0,0 +1,86 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "gather_inst.h"
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "kernel_selector_helper.h"
+#include "gather/gather_kernel_selector.h"
+#include "gather/gather_kernel_ref.h"
+#include "error_handler.h"
+
+using namespace cldnn;
+
+namespace cldnn
+{
+namespace gpu
+{
+    kernel_selector::gather_axis convert_axis(gather::gather_axis axis)
+    {
+        switch (axis)
+        {
+            case gather::along_x: return kernel_selector::gather_axis::X;
+            case gather::along_y: return kernel_selector::gather_axis::Y;
+            case gather::along_f: return kernel_selector::gather_axis::FEATURE;
+            case gather::along_b: return kernel_selector::gather_axis::BATCH;
+            default:
+                return kernel_selector::gather_axis::X;
+        }
+    }
+
+    struct gather_gpu : typed_primitive_gpu_impl<gather>
+    {
+        using parent = typed_primitive_gpu_impl<gather>;
+        using parent::parent;
+
+    public:
+
+        static primitive_impl* create(const gather_node& arg)
+        {
+            auto gather_params = get_default_params<kernel_selector::gather_params>(arg);
+            auto gather_optional_params = get_default_optional_params<kernel_selector::gather_optional_params>(arg.get_program());
+
+            gather_params.axis = convert_axis(arg.get_primitive()->axis);
+
+            gather_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
+
+            auto& kernel_selector = kernel_selector::gather_kernel_selector::Instance();
+            auto best_kernels = kernel_selector.GetBestKernels(gather_params, gather_optional_params);
+
+            CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
+
+            auto gather = new gather_gpu(arg, best_kernels[0]);
+
+            return gather;
+        }
+    };
+
+    namespace
+    {
+        struct attach
+        {
+            attach()
+            {
+                auto val_fw = gather_gpu::create;
+                implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+                implementation_map<gather>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+            }
+            ~attach() = default;
+        };
+        attach attach_impl;
+    }
+} //namespace cldnn
+} //namespace gpu
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/index_select_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/index_select_gpu.cpp
index 0dab9157c..41f826a0c 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/index_select_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/index_select_gpu.cpp
@@ -26,17 +26,22 @@ namespace cldnn { namespace gpu {
 
 namespace
 {
-    inline kernel_selector::IndexSelectAxis convect_to_index_select_axis(index_select_axis_name axis)
+    inline std::vector<kernel_selector::IndexSelectAxis> convert_to_index_select_axis(std::vector<index_select_axis_name> axes)
     {
-        switch (axis)
+        std::vector<kernel_selector::IndexSelectAxis> axes_names = {};
+        for (size_t i = 0; i < axes.size(); i++)
         {
-        case index_select_axis_name::along_b:  return kernel_selector::IndexSelectAxis::BATCH;
-        case index_select_axis_name::along_f:  return kernel_selector::IndexSelectAxis::FEATURE;
-        case index_select_axis_name::along_x:  return kernel_selector::IndexSelectAxis::X;
-        case index_select_axis_name::along_y: return kernel_selector::IndexSelectAxis::Y;
-        default:
-            return kernel_selector::IndexSelectAxis::BATCH;
+            switch (axes[i])
+            {
+            case index_select_axis_name::along_b:  axes_names.push_back(kernel_selector::IndexSelectAxis::BATCH); break;
+            case index_select_axis_name::along_f:  axes_names.push_back(kernel_selector::IndexSelectAxis::FEATURE); break;
+            case index_select_axis_name::along_x:  axes_names.push_back(kernel_selector::IndexSelectAxis::X); break;
+            case index_select_axis_name::along_y:  axes_names.push_back(kernel_selector::IndexSelectAxis::Y); break;
+            default:
+                axes_names.push_back(kernel_selector::IndexSelectAxis::BATCH); break;
+            }
         }
+        return axes_names;
     }
 }
 
@@ -50,8 +55,11 @@ struct index_select_gpu : typed_primitive_gpu_impl<index_select>
         auto index_select_params          = get_default_params<kernel_selector::index_select_params>(arg, 1);
         auto index_select_optional_params = get_default_optional_params<kernel_selector::index_select_optional_params>(arg.get_program());
 
-        index_select_params.inputs.push_back(convert_data_tensor(arg.indices().get_output_layout()));
-        index_select_params.axis = convect_to_index_select_axis(arg.get_axis());
+        if (!arg.get_reverse())
+            index_select_params.inputs.push_back(convert_data_tensor(arg.indices().get_output_layout()));
+
+        index_select_params.axes = convert_to_index_select_axis(arg.get_axes());
+        index_select_params.reverse = arg.get_reverse();
         
         auto& kernel_selector = kernel_selector::index_select_kernel_selector::Instance();
         auto best_kernels = kernel_selector.GetBestKernels(index_select_params, index_select_optional_params);
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp b/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp
index ad9767042..ca7c24daa 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp
@@ -115,10 +115,22 @@ namespace {
                 }
                 break;
             case kernel_selector::kernel_argument_types::OUTPUT_CALIBRATION_FACTORS:
-                if (data.output_calibration_factors)
+                if (args[i].index == 0)
                 {
-                    status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.output_calibration_factors).get_buffer());
+                    if (data.output_calibration_factors)
+                    {
+                        status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.output_calibration_factors).get_buffer());
+                    }
                 }
+                else
+                {
+                    size_t new_idx = args[i].index - 1;
+                    if (new_idx < data.fused_op_calibration_factors.size() && data.fused_op_calibration_factors[new_idx])
+                    {
+                        status = kernel.setArg(i, dynamic_cast<const gpu::gpu_buffer&>(*data.fused_op_calibration_factors[new_idx]).get_buffer());
+                    }
+                }
+
                 break;
             case kernel_selector::kernel_argument_types::SCALE_TABLE:
                 if (data.scale_table)
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel.h b/inference-engine/thirdparty/clDNN/src/gpu/kernel.h
index 4088b12c9..67a5cf8d3 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/kernel.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/kernel.h
@@ -17,7 +17,8 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-#include "memory_gpu.h"
+#include "ocl_toolkit.h"
+#include "memory_impl.h"
 #include "kernels_cache.h"
 #include "event_impl.h"
 
@@ -69,6 +70,8 @@ public:
         memory_impl::cptr slope;
         memory_impl::cptr prev_weights_grad;
         memory_impl::cptr prev_bias_grad;
+        // used for fused primitives
+        std::vector<memory_impl::cptr> fused_op_calibration_factors;
         int32_t           split          = 0;
         float             lr;
         const kernel_selector::kernel_scalar_arguments* scalars = nullptr;
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h b/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h
index e3d7375e6..24fe6dbcc 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.h
@@ -35,7 +35,7 @@ public:
 private:
 
     const int compilation_batch_size = 50;
-    const int runs_per_kernel = 10;
+    const int runs_per_kernel = 3;
 
     void prepare_kernel_args(const kernel_selector::KernelsData& kernels_data, gpu::kernel::kernel_arguments_data& args);
 
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp b/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp
index ed3c6aa98..8f33b6367 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/kernels_cache.cpp
@@ -354,3 +354,4 @@ void kernels_cache::build_all()
 }
 
 }}
+ 
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lookup_table_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/lookup_table_gpu.cpp
index 890a8b605..e6ddab685 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/lookup_table_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/lookup_table_gpu.cpp
@@ -33,9 +33,9 @@ namespace cldnn {
 
         protected:
 
-            virtual bool validate(typed_primitive_inst<lookup_table>& instance) const override
+            virtual bool validate_impl(const typed_primitive_inst<lookup_table>& instance) const override
             {
-                bool res = parent::validate(instance);
+                bool res = true;
 
                 // Check whether all memory elements use the same unit type (FP16 or FP32).
                 CLDNN_ERROR_DATA_TYPES_MISMATCH(_outer.id(), "Input memory", instance.input_memory(1).get_layout().data_type, "output memory", instance.output_memory().get_layout().data_type, "");
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp
index 69baa64ed..b9f8eef10 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/lstm_elt_gpu.cpp
@@ -47,12 +47,6 @@ protected:
         return args;
     }
 
-    virtual bool validate(typed_primitive_inst<lstm_elt>& instance) const override
-    {
-        bool res = parent::validate(instance);
-
-        return res;
-    }
 public:
 
     static primitive_impl* create(const lstm_elt_node& arg)
@@ -64,11 +58,16 @@ public:
         {
             const auto& cell_layout = arg.cell().get_output_layout();
             lstm_elt_params.SetCell(convert_data_tensor(cell_layout));
+            // TODO: make a generic function to get the direction
+            if (cell_layout.size.spatial[1] > 1) {
+                lstm_elt_params.cell_direction = arg.direction();
+            }
         }
 
         lstm_elt_params.SetOffsetOrder(arg.offset_order());
         lstm_elt_params.clip = arg.clip();
         lstm_elt_params.input_forget = arg.input_forget();
+        lstm_elt_params.direction = arg.direction();
 
         auto& kernel_selector = kernel_selector::lstm_elt_kernel_selector::Instance();
         auto best_kernels = kernel_selector.GetBestKernels(lstm_elt_params, lstm_elt_optional_params);
@@ -90,6 +89,8 @@ namespace {
             implementation_map<lstm_elt>::add({
                 { std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw },
                 { std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw },
+                { std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw },
+                { std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw },
             });
         }
         ~attach() {}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp
index 7cb6b1176..40d601abf 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/lstm_gemm_gpu.cpp
@@ -50,12 +50,6 @@ protected:
         return args;
     }
 
-    virtual bool validate(typed_primitive_inst<lstm_gemm>& instance) const override
-    {
-        bool res = parent::validate(instance);
-
-        return res;
-    }
 public:
 
     static primitive_impl* create(const lstm_gemm_node& arg)
@@ -78,8 +72,25 @@ public:
 
             const auto& hidden_layout = arg.hidden().get_output_layout();
             lstm_gemm_params.SetHidden(convert_data_tensor(hidden_layout));
+            // TODO: make a generic function to get the direction
+            if (hidden_layout.size.spatial[1] > 1) {
+                lstm_gemm_params.hidden_direction = arg.direction();
+            }
         }
         lstm_gemm_params.direction = arg.direction();
+        
+        // Update the direction of the input for the gemm kernel
+        const auto& input_layout = arg.input().get_output_layout();
+        size_t input_directions = input_layout.size.spatial[1];
+
+        if (input_directions > 1)  // For bidirection input, input direction can be 1 or 0
+        {
+            lstm_gemm_params.input_direction = arg.direction();
+        }
+        else  // For unidirectional input
+        {
+            lstm_gemm_params.input_direction = 0;
+        }
 
         auto lstm_gemm_optional_params = get_default_optional_params<kernel_selector::lstm_gemm_optional_params>(arg.get_program());
 
@@ -103,6 +114,8 @@ namespace {
             implementation_map<lstm_gemm>::add({
                 { std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw },
                 { std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw },
+                { std::make_tuple(engine_types::ocl, data_types::f32, format::fyxb), val_fw },
+                { std::make_tuple(engine_types::ocl, data_types::f16, format::fyxb), val_fw },
             });
         }
         ~attach() {}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp
index e4978075c..c50f631c7 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/memory_gpu.cpp
@@ -22,7 +22,7 @@
 namespace cldnn { namespace gpu {
 
 gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine, const layout& layout)
-    : memory_impl(engine, layout)
+    : memory_impl(engine, layout, false)
     , _context(engine->get_context())
     , _lock_count(0)
     , _buffer(_context->context(), CL_MEM_READ_WRITE, size())
@@ -34,7 +34,7 @@ gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine, const layo
 }
 
 gpu_buffer::gpu_buffer(const refcounted_obj_ptr<engine_impl>& engine, const layout& new_layout, const cl::Buffer& buffer)
-    : memory_impl(engine, new_layout)
+    : memory_impl(engine, new_layout, true)
     , _context(engine->get_context())
     , _lock_count(0)
     , _buffer(buffer)
@@ -67,7 +67,7 @@ void gpu_buffer::fill(unsigned char pattern, event_impl::ptr ev) {
 }
 
 gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const layout& layout)
-    : memory_impl(engine, layout)
+    : memory_impl(engine, layout, false)
     , _context(engine->get_context())
     , _lock_count(0)
     , _mapped_ptr(nullptr)
@@ -110,7 +110,7 @@ gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const la
 }
 
 gpu_image2d::gpu_image2d(const refcounted_obj_ptr<engine_impl>& engine, const layout& new_layout, const cl::Image2D& buffer)
-    : memory_impl(engine, new_layout)
+    : memory_impl(engine, new_layout, true)
     , _context(engine->get_context())
     , _lock_count(0)
     , _buffer(buffer)
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h b/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h
index 8e015abdf..7c9f82017 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_base_event.h
@@ -14,7 +14,11 @@ struct profiling_period_ocl_start_stop
 struct ocl_base_event : virtual public event_impl
 {
 public:
-    ocl_base_event(uint64_t queue_stamp = 0) : _queue_stamp(queue_stamp) {}
+    ocl_base_event(uint64_t queue_stamp = 0, bool valid = false) 
+        : _queue_stamp(queue_stamp)
+    {
+        _attached = valid;
+    }
     uint64_t get_queue_stamp() const { return _queue_stamp; }
 protected:
     uint64_t _queue_stamp = 0;
@@ -23,20 +27,31 @@ protected:
 struct base_event : virtual public ocl_base_event
 {
 public:
-    base_event(std::shared_ptr<gpu_toolkit> ctx, cl::Event const& ev, uint64_t queue_stamp = 0) : ocl_base_event(queue_stamp), _ctx(ctx), _event(ev)
+    base_event(std::shared_ptr<gpu_toolkit> ctx, cl::Event const& ev, uint64_t queue_stamp = 0) 
+        : ocl_base_event(queue_stamp, true)
+        , _ctx(ctx)
+        , _event(ev)
+    {}
+
+    base_event(std::shared_ptr<gpu_toolkit> ctx)
+        : ocl_base_event(0, false)
+        , _ctx(ctx)
     {}
 
+    void attach_ocl_event(const cl::Event& ev, const uint64_t q_stamp)
+    {
+        _event = ev;
+        _queue_stamp = q_stamp;
+        _attached = true;
+    }
+
     std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; }
     cl::Event get() { return _event; }
 
-
 private:
     std::shared_ptr<gpu_toolkit> _ctx;
-    cl::Event _event;
     bool _callback_set = false;
-
     void set_ocl_callback();
-
     static void CL_CALLBACK ocl_event_completion_callback(cl_event, cl_int, void* me);
 
 private:
@@ -46,26 +61,50 @@ private:
     bool get_profiling_info_impl(std::list<cldnn_profiling_interval>& info) override;
 
     friend struct base_events;
+
+protected:
+    cl::Event _event;
 };
 
 struct base_events : virtual public ocl_base_event
 {
 public:
-    base_events(std::shared_ptr<gpu_toolkit> ctx, std::vector<event_impl::ptr> const &ev) : ocl_base_event(0), _ctx(ctx), _events(ev)
+    base_events(std::shared_ptr<gpu_toolkit> ctx, std::vector<event_impl::ptr> const &ev) 
+        : ocl_base_event(0, true)
+        , _ctx(ctx)
+        , _events(ev)
+    {
+        set_queue_stamp();
+    }
+
+    base_events(std::shared_ptr<gpu_toolkit> ctx)
+        : ocl_base_event(0, false)
+        , _ctx(ctx)
+    {}
+
+    void attach_events(const std::vector<event_impl::ptr>& ev) 
+    { 
+        if (_attached)
+            throw std::runtime_error("Trying to attach events to valid event object.");
+        _events = ev;
+        _attached = true;
+        set_queue_stamp();
+    }
+
+    std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; }
+
+private:
+    void set_queue_stamp()
     {
         uint64_t _queue_stamp_max = 0;
-        for (size_t i = 0; i < ev.size(); i++)
+        for (size_t i = 0; i < _events.size(); i++)
         {
-            auto * _base_event = dynamic_cast<base_event*>(ev[i].get());
+            auto * _base_event = dynamic_cast<base_event*>(_events[i].get());
             if (_base_event->get_queue_stamp() > _queue_stamp_max)
                 _queue_stamp_max = _base_event->get_queue_stamp();
         }
         _queue_stamp = _queue_stamp_max;
     }
-
-    std::shared_ptr<gpu_toolkit> get_context() const { return _ctx; }
-
-private:
     void wait_impl() override;
     bool is_set_impl() override;
 
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp
new file mode 100644
index 000000000..46ba2a1ff
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.cpp
@@ -0,0 +1,178 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "ocl_builder.h"
+#include "confiugration.h"
+
+// NOTE: Due to buggy scope transition of warnings we need to disable warning in place of use/instantation
+//       of some types (even though we already disabled them in scope of definition of these types).
+//       Moreover this warning is pretty much now only for annoyance: it is generated due to lack
+//       of proper support for mangling of custom GCC attributes into type name (usually when used
+//       with templates, even from standard library).
+#if defined __GNUC__ && __GNUC__ >= 6
+#pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
+
+namespace cldnn { namespace gpu{
+
+    ocl_builder::ocl_builder(const configuration& config)
+        : _is_user_context(config.user_context != nullptr ? true : false)
+    {
+        if (_is_user_context)
+        {
+            _context = *config.user_context;
+            build_device_from_user_context(config);
+        }
+        else
+        {
+            build_device(config);
+            build_context();
+        }
+        build_platform_id();
+    }
+
+    void ocl_builder::build_device_from_user_context(const configuration& config)
+    {
+        auto all_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
+        auto num_devices = _context.getInfo<CL_CONTEXT_NUM_DEVICES>();
+        if (num_devices != 1)
+        {
+            throw std::runtime_error("[ERROR]. Number of devices from user context is not equal to 1.");
+        }
+        auto device = all_devices.at(0);
+        auto dev_type = device.getInfo<CL_DEVICE_TYPE>();
+        if (dev_type != CL_DEVICE_TYPE_GPU)
+        {
+            throw std::runtime_error("[ERROR]. User defined device is not an gpu device!");
+        }
+
+        std::list<std::string> reasons;
+        if (does_device_match_config(config, device, reasons))
+        {
+            _device = device;
+            return;
+        }
+        else
+        {
+            std::string error_msg = "No OpenCL device found which would match provided configuration:";
+            for (const auto& reason : reasons)
+                error_msg += "\n    " + reason;
+            throw std::invalid_argument(std::move(error_msg));
+        }
+
+    }
+
+    void ocl_builder::build_device(const configuration& config)
+    {
+        std::list<std::string> reasons;
+        cl_uint n = 0;
+
+        // Get number of platforms availible
+        cl_int err = clGetPlatformIDs(0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err));
+        }
+
+        // Get platform list
+        std::vector<cl_platform_id> platform_ids(n);
+        err = clGetPlatformIDs(n, platform_ids.data(), NULL);
+        if (err != CL_SUCCESS) {
+            throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err));
+        }
+
+        for (auto& id : platform_ids)
+        {
+            cl::Platform platform = cl::Platform(id);
+            std::vector<cl::Device> devices;
+            platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
+            for (auto& d : devices)
+            {
+                if (does_device_match_config(config, d, reasons))
+                {
+                    _device = d;
+                    return;
+                }
+            }
+        }
+
+        if (reasons.empty())
+            throw std::runtime_error("Could not find any OpenCL device");
+
+        std::string error_msg = "No OpenCL device found which would match provided configuration:";
+        for (const auto& reason : reasons)
+            error_msg += "\n    " + reason;
+
+        throw std::invalid_argument(std::move(error_msg));
+    }
+
+    void ocl_builder::build_context()
+    {
+        _context = cl::Context(_device);
+    }
+
+    bool ocl_builder::does_device_match_config(const configuration& config, const cl::Device& dev, std::list<std::string>& reasons)
+    {
+        auto dev_name = dev.getInfo<CL_DEVICE_NAME>();
+        bool ok = true;
+
+        auto dev_type = dev.getInfo<CL_DEVICE_TYPE>();
+
+        cl_device_type device_types[] = {
+            CL_DEVICE_TYPE_DEFAULT,
+            CL_DEVICE_TYPE_CPU,
+            CL_DEVICE_TYPE_GPU,
+            CL_DEVICE_TYPE_ACCELERATOR };
+
+        if (dev_type != device_types[config.device_type])
+        {
+            reasons.push_back(dev_name + ": invalid device type");
+            ok = false;
+        }
+
+        auto vendor_id = dev.getInfo<CL_DEVICE_VENDOR_ID>();
+        if (vendor_id != config.device_vendor)
+        {
+            reasons.push_back(dev_name + ": invalid vendor type");
+            ok = false;
+        }
+
+        if (config.host_out_of_order)
+        {
+            auto queue_properties = dev.getInfo<CL_DEVICE_QUEUE_PROPERTIES>();
+            using cmp_t = std::common_type<decltype(queue_properties), typename std::underlying_type<cl::QueueProperties>::type>::type;
+            if (!(static_cast<cmp_t>(queue_properties) & static_cast<cmp_t>(cl::QueueProperties::OutOfOrder)))
+            {
+                reasons.push_back(dev_name + ": missing out of order support");
+                ok = false;
+            }
+        }
+        return ok;
+    }
+
+    void ocl_builder::build_platform_id()
+    {
+        cl_int err;
+        _platform_id = _device.getInfo<CL_DEVICE_PLATFORM>(&err);
+        if (err != CL_SUCCESS)
+        {
+            throw std::runtime_error("Error getting OpenCL platform_id from device!");
+        }
+    }
+
+}
+}
+
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h
new file mode 100644
index 000000000..0f6f6e0ea
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_builder.h
@@ -0,0 +1,54 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+// we want exceptions
+#define CL_HPP_ENABLE_EXCEPTIONS
+#define CL_HPP_MINIMUM_OPENCL_VERSION 120
+#define CL_HPP_TARGET_OPENCL_VERSION 120
+#include <cl2_wrapper.h>
+#include <list>
+
+namespace cldnn {
+namespace gpu {
+    struct configuration;
+
+    class ocl_builder
+    {
+    public:
+        ocl_builder(const configuration& config);
+        cl::Context get_context() const { return _context; }
+        const cl::Device &get_device() const { return _device; }
+        cl_platform_id get_platform_id() const { return _platform_id; }
+        bool is_user_context() const { return _is_user_context; }
+
+    private:
+        cl::Context _context;
+        cl::Device  _device;
+        cl_platform_id _platform_id;
+        bool _is_user_context;
+
+        void build_device_from_user_context(const configuration& config);
+        void build_device(const configuration& config);
+        void build_context();
+        bool does_device_match_config(const configuration& config, const cl::Device& dev, std::list<std::string>& reasons);
+        void build_platform_id();
+    };
+
+}
+}
+
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp
index d74a036f0..0044ec368 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.cpp
@@ -18,6 +18,8 @@
 #include "ocl_toolkit.h"
 #include "ocl_base_event.h"
 #include "ocl_user_event.h"
+#include "command_queues_builder.h"
+#include "events_pool.h"
 
 #include <cassert>
 #include <iomanip>
@@ -70,96 +72,6 @@ ocl_error::ocl_error(cl::Error const & err) : error(err.what() + std::string(",
 {
 }
 
-namespace {
-
-    cl_device_type convert_configuration_device_type(configuration::device_types device_type)
-    {
-        cl_device_type device_types[] = {
-                CL_DEVICE_TYPE_DEFAULT,
-                CL_DEVICE_TYPE_CPU,
-                CL_DEVICE_TYPE_GPU,
-                CL_DEVICE_TYPE_ACCELERATOR };
-        return device_types[device_type];
-    }
- 
-    bool does_device_match_config(cl::Device const& dev, configuration const& config, std::list<std::string>& reasons)
-    {
-        auto dev_name = dev.getInfo<CL_DEVICE_NAME>();
-        bool ok = true;
-
-        auto dev_type = dev.getInfo<CL_DEVICE_TYPE>();
-
-        if (dev_type != convert_configuration_device_type(config.device_type))
-        {
-            reasons.push_back(dev_name + ": invalid device type");
-            ok = false;
-        }
-
-        auto vendor_id = dev.getInfo<CL_DEVICE_VENDOR_ID>();
-        if (vendor_id != config.device_vendor)
-        {
-            reasons.push_back(dev_name + ": invalid vendor type");
-            ok = false;
-        }
-
-        if (config.host_out_of_order)
-        {
-            auto queue_properties = dev.getInfo<CL_DEVICE_QUEUE_PROPERTIES>();
-            using cmp_t = std::common_type<decltype(queue_properties), typename std::underlying_type<cl::QueueProperties>::type>::type;
-            if (!(static_cast<cmp_t>(queue_properties) & static_cast<cmp_t>(cl::QueueProperties::OutOfOrder)))
-            {
-                reasons.push_back(dev_name + ": missing out of order support");
-                ok = false;
-            }
-        }
-
-        return ok;
-    }
-}
-
-cl::Device get_gpu_device(const configuration& config, cl_platform_id& platform_id)
-{
-    std::list<std::string> reasons;
-    cl_uint n = 0;
-
-    // Get number of platforms availible
-    cl_int err = clGetPlatformIDs(0, NULL, &n);
-    if (err != CL_SUCCESS) {
-        throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err));
-    }
-
-    // Get platform list
-    std::vector<cl_platform_id> platform_ids(n);
-    err = clGetPlatformIDs(n, platform_ids.data(), NULL);
-    if (err != CL_SUCCESS) {
-        throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err));
-    }
-
-    for (auto& id : platform_ids)
-    {
-        cl::Platform platform = cl::Platform(id);
-        std::vector<cl::Device> devices;
-        platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
-        for (auto& d : devices)
-        {
-            if (does_device_match_config(d, config, reasons))
-            {
-                platform_id = id;
-                return d;
-            }
-        }
-    }
-
-    if (reasons.empty())
-        throw std::runtime_error("Could not find any OpenCL device");
-
-    std::string error_msg = "No OpenCL device found which would match provided configuration:";
-    for (const auto& reason : reasons)
-        error_msg += "\n    " + reason;
-
-    throw std::invalid_argument(std::move(error_msg));
-}
-
 std::shared_ptr<gpu_toolkit> gpu_toolkit::create(const configuration & cfg)
 {
     struct make_shared_wa : public gpu_toolkit { make_shared_wa(const configuration& cfg) : gpu_toolkit(cfg) {} };
@@ -176,116 +88,21 @@ struct gpu_toolkit::ocl_logger
     std::ofstream _log_file;
 };
 
-gpu_toolkit::gpu_toolkit(const configuration& config) 
+gpu_toolkit::gpu_toolkit(const configuration& config)
     : _configuration(config)
-    , _device(get_gpu_device(config, _platform_id))
+    , _ocl_builder(config)
+    , _user_context(_ocl_builder.is_user_context())
     , _neo_driver(strstr(get_device_version().c_str(), "NEO") ? true : false)
-    , _context(_device)
-    , _command_queue(_context,
-                     _device,
-                     (config.enable_profiling
-                        ? cl::QueueProperties::Profiling
-                        : cl::QueueProperties::None) | 
-                     (config.host_out_of_order && _neo_driver
-                        ? cl::QueueProperties::OutOfOrder
-                        : cl::QueueProperties::None))
+    , _context(_ocl_builder.get_context())
+    , _platform_id(_ocl_builder.get_platform_id())
     , _engine_info(*this)
     , _kernels_cache(*this)
+    , _events_pool(new events_pool())
 {
-    _device.getInfo(CL_DEVICE_EXTENSIONS, &_extensions);
-
-    cl_command_queue_properties queue_properties =
-        ((config.enable_profiling) ?
-            CL_QUEUE_PROFILING_ENABLE :
-            0) |
-            ((config.host_out_of_order &&
-                _neo_driver) ?
-                CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE :
-                0);
-
-    if (_configuration.priority_mode != cldnn_priority_disabled)
-    {
-        if (extension_supported("cl_khr_priority_hints") &&
-            extension_supported("cl_intelx_create_command_queue"))
-            // TODO add check when caps will be availible (instead of cl_intelx_create_command_queue)
-            //&& extension_supported("cl_khr_create_command_queue"))
-        {
-            // TODO: When cl_khr_create_command_queue will be availible the
-            // function name will change to clCreateCommandQueueWithPropertiesKHR
-            // in place of clCreateCommandQueueWithPropertiesINTEL.
-#ifndef WIN32
-    #pragma GCC diagnostic push
-    #pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-            pfn_clCreateCommandQueueWithPropertiesINTEL clCreateCommandQueueWithPropertiesINTEL =
-                (pfn_clCreateCommandQueueWithPropertiesINTEL)clGetExtensionFunctionAddressForPlatform(
-                    _platform_id,
-                    "clCreateCommandQueueWithPropertiesINTEL");
-#ifndef WIN32
-    #pragma GCC diagnostic pop
-#endif
-            unsigned cl_queue_priority_value = CL_QUEUE_PRIORITY_MED_KHR;
-
-            switch (_configuration.priority_mode)
-            {
-            case cldnn_priority_high:
-                cl_queue_priority_value = CL_QUEUE_PRIORITY_HIGH_KHR;
-                break;
-            case cldnn_priority_low:
-                cl_queue_priority_value = CL_QUEUE_PRIORITY_LOW_KHR;
-                break;
-            default:
-                break;
-            }
-
-            cl_int error_code = CL_SUCCESS;
-            cl_queue_properties properties_low[] = {
-                CL_QUEUE_PRIORITY_KHR, cl_queue_priority_value,
-                CL_QUEUE_PROPERTIES, queue_properties,
-                0 };
-
-            _command_queue = clCreateCommandQueueWithPropertiesINTEL(
-                _context.get(),
-                _device.get(),
-                properties_low,
-                &error_code);
-
-            if (error_code != CL_SUCCESS) {
-                throw std::runtime_error("clCreateCommandQueueWithPropertiesINTEL error " + std::to_string(error_code));
-            }
-        }
-        else
-        {
-            throw std::invalid_argument(
-                "The param priority_mode is set in engine_configuration,\
-                 but cl_khr_priority_hints or cl_khr_create_command_queue\
-                 is not supported by current OpenCL implementation.");
-        }
-    }
-    else
-    {
-        _command_queue = cl::CommandQueue(_context, _device, queue_properties);
-    }
-
-    if (_configuration.throttle_mode != cldnn_throttle_disabled)
-    {
-        if (extension_supported("cl_khr_throttle_hints"))
-        {
-            throw std::invalid_argument(
-                "The param throttle_mode is set in engine_configuration,\
-                 but it is placeholder for future use. It has no effect for now\
-                 and should be set to cldnn_throttle_disabled");
-        }
-        else
-        {
-            throw std::invalid_argument(
-                "The param throttle_mode is set in engine_configuration,\
-                 but cl_khr_throttle_hints is not supported by current OpenCL implementation.");
-        }
-    }
+    _ocl_builder.get_device().getInfo(CL_DEVICE_EXTENSIONS, &_extensions);
+    build_command_queues(config);
 
     _logger = std::unique_ptr<ocl_logger>(new ocl_logger());
-
     if (logging_enabled())
     {
         open_log()
@@ -303,9 +120,7 @@ gpu_toolkit::gpu_toolkit(const configuration& config)
             << "    engine log: "          << _configuration.log << "\n"
             << "    sources dumps: "       << _configuration.ocl_sources_dumps_dir << "\n"
             << "\nEngine info:\n"
-            << "    configuration: "       << std::to_string(_engine_info.configuration) << "\n"
-            << "    model: "               << std::to_string(_engine_info.model) << "\n"
-            << "    architecture: "        << std::to_string(_engine_info.architecture) << "\n"
+            << "    device id: "           << _engine_info.dev_id << "\n"
             << "    cores count: "         << _engine_info.cores_count << "\n"
             << "    core frequencey: "     << _engine_info.core_frequency << "\n"
             << "    max work group size: " << _engine_info.max_work_group_size << "\n"
@@ -313,10 +128,28 @@ gpu_toolkit::gpu_toolkit(const configuration& config)
             << "    fp16: "                << std::boolalpha << (_engine_info.supports_fp16 != 0) << "\n"
             << "    fp16 denorms: "        << std::boolalpha << (_engine_info.supports_fp16_denorms != 0) << "\n"
             << "    subgroups short: "     << std::boolalpha << (_engine_info.supports_subgroups_short != 0) << "\n"
+            << "    used defined context: "<< std::boolalpha << _user_context << "\n"
             << std::endl;
     }
 }
 
+void gpu_toolkit::build_command_queues(const configuration& config)
+{
+    command_queues_builder queue_builder(_context, _ocl_builder.get_device(), _platform_id);
+    queue_builder.set_profiling(config.enable_profiling);
+    queue_builder.set_out_of_order((config.host_out_of_order && _neo_driver));
+
+    bool priorty_extensions = extension_supported("cl_khr_priority_hints") && extension_supported("cl_khr_create_command_queue");
+    queue_builder.set_priority_mode(config.priority_mode, priorty_extensions);
+
+    bool throttle_extensions = extension_supported("cl_khr_throttle_hints") && extension_supported("cl_khr_create_command_queue");
+    queue_builder.set_throttle_mode(config.throttle_mode, throttle_extensions);
+
+    queue_builder.build();
+
+    _command_queue = queue_builder.queue();
+}
+
 event_impl::ptr gpu_toolkit::enqueue_kernel(cl::Kernel const& kern, cl::NDRange const& global, cl::NDRange const& local, std::vector<event_impl::ptr> const & deps)
 {
     std::vector<cl::Event> dep_events;
@@ -358,14 +191,13 @@ event_impl::ptr gpu_toolkit::enqueue_kernel(cl::Kernel const& kern, cl::NDRange
 
         log(_queue_counter + 1, msg);
     }
-
-    return{ new base_event(shared_from_this(), ret_ev, ++_queue_counter), false };
+    return _events_pool->get_from_base_pool(shared_from_this(), ret_ev, ++_queue_counter);
 }
 
 event_impl::ptr gpu_toolkit::enqueue_marker(std::vector<event_impl::ptr> const& deps)
 {
     if (deps.empty())
-        return{ new user_event(shared_from_this(), true), false };
+        return _events_pool->get_from_user_pool(shared_from_this(), true);
 
     if (!_configuration.host_out_of_order)
     {
@@ -379,7 +211,7 @@ event_impl::ptr gpu_toolkit::enqueue_marker(std::vector<event_impl::ptr> const&
 
             try {
                 _command_queue.enqueueMarkerWithWaitList(&dep_events, &ret_ev);
-            } 
+            }
             catch (cl::Error const& err) {
                 throw ocl_error(err);
             }
@@ -396,19 +228,33 @@ event_impl::ptr gpu_toolkit::enqueue_marker(std::vector<event_impl::ptr> const&
 
         if (logging_enabled())
             log(_queue_counter + 1, "Marker with dependencies: " + events_list_to_string(deps));
-
-        return{ new base_event(shared_from_this(), ret_ev, ++_queue_counter), false };
+        return _events_pool->get_from_base_pool(shared_from_this(), ret_ev, ++_queue_counter);
     }
     else
     {
         sync_events(deps);
-        return{ new base_event(shared_from_this(), _last_barrier_ev, _last_barrier), false };
+        return _events_pool->get_from_base_pool(shared_from_this(), _last_barrier_ev, _last_barrier);
     }
 }
 
 event_impl::ptr gpu_toolkit::group_events(std::vector<event_impl::ptr> const& deps)
 {
-    return{ new base_events(shared_from_this(), deps), false };
+    return _events_pool->get_from_group_pool(shared_from_this(), deps);
+}
+
+event_impl::ptr gpu_toolkit::create_user_event(bool set)
+{
+    return _events_pool->get_from_user_pool(shared_from_this(), set);
+}
+
+void gpu_toolkit::reset_events()
+{
+    _events_pool->reset_events();
+}
+
+void gpu_toolkit::release_events_pool()
+{
+    _events_pool.reset();
 }
 
 void gpu_toolkit::flush()
@@ -419,7 +265,7 @@ void gpu_toolkit::flush()
 }
 void gpu_toolkit::release_pending_memory()
 {
-    /* 
+    /*
     TODO: Temp. solution, untill proper API calls from OpenCL are released.
     */
     void* ptr = nullptr;
@@ -483,14 +329,14 @@ void gpu_toolkit::sync_events(std::vector<event_impl::ptr> const & deps)
     {
         try {
             if (_output_event)
-            { 
+            {
                 _command_queue.enqueueBarrierWithWaitList(nullptr, &_last_barrier_ev);
             }
             else
             {
                 _command_queue.enqueueBarrierWithWaitList(nullptr, nullptr);
             }
-            
+
         }
         catch (cl::Error const& err) {
             throw ocl_error(err);
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h
index 50c746093..1a69bd493 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_toolkit.h
@@ -17,25 +17,20 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 #pragma once
 
-// we want exceptions
-#define CL_HPP_ENABLE_EXCEPTIONS
-#define CL_HPP_MINIMUM_OPENCL_VERSION 120
-#define CL_HPP_TARGET_OPENCL_VERSION 120
-#include <cl2_wrapper.h>
+#include "ocl_builder.h"
 
-#include "api/CPP/profiling.hpp"
 #include "kernels_cache.h"
 #include "engine_info.h"
 #include "event_impl.h"
+#include "confiugration.h"
 
 #include <memory>
 #include <chrono>
 
-namespace cldnn { 
+namespace cldnn {
     typedef cl::vector<cl::vector<unsigned char>> kernels_binaries_vector;
-    typedef cl::vector<kernels_binaries_vector> kernels_binaries_container;	
+    typedef cl::vector<kernels_binaries_vector> kernels_binaries_container;
 namespace gpu {
-
 typedef  CL_API_ENTRY cl_command_queue(CL_API_CALL *pfn_clCreateCommandQueueWithPropertiesINTEL)(
     cl_context context,
     cl_device_id device,
@@ -48,26 +43,7 @@ public:
     ocl_error(cl::Error const& err);
 };
 
-struct configuration
-{
-    enum device_types { default_device = 0, cpu, gpu, accelerator };
-
-    configuration();
-
-    bool enable_profiling;
-    bool meaningful_kernels_names;
-    bool dump_custom_program;
-    device_types device_type;
-    uint32_t device_vendor;
-    std::string compiler_options;
-    std::string single_kernel_name;
-    bool host_out_of_order;
-    std::string log;
-    std::string ocl_sources_dumps_dir;
-    cldnn_priority_mode_type priority_mode;
-    cldnn_throttle_mode_type throttle_mode;
-};
-
+class events_pool;
 class gpu_toolkit;
 
 class context_holder
@@ -82,42 +58,18 @@ protected:
 
 };
 
-struct profiling_period_event : instrumentation::profiling_period
-{
-    profiling_period_event(const cl::Event& event, cl_profiling_info start, cl_profiling_info end)
-        : _event(event)
-        , _start(start)
-        , _end(end)
-    {}
-
-    std::chrono::nanoseconds value() const override
-    {
-        cl_ulong start_nanoseconds;
-        _event.getProfilingInfo(_start, &start_nanoseconds);
-        cl_ulong end_nanoseconds;
-        _event.getProfilingInfo(_end, &end_nanoseconds);
-        return std::chrono::nanoseconds(static_cast<long long>(end_nanoseconds - start_nanoseconds));
-    }
-
-private:
-    cl::Event _event;
-    cl_profiling_info _start;
-    cl_profiling_info _end;
-};
-
 class gpu_toolkit : public std::enable_shared_from_this<gpu_toolkit>
 {
     friend class context_holder;
 
 protected:
     gpu_toolkit(const configuration& aconfiguration = configuration());
-
 public:
     static std::shared_ptr<gpu_toolkit> create(const configuration& cfg = configuration());
     const cl::Context& context() const { return _context; }
-    const cl::Device& device() const { return _device; }
+    const cl::Device& device() const { return _ocl_builder.get_device(); }
     const cl::CommandQueue& queue() const { return _command_queue; }
-    
+
     const configuration& get_configuration() const { return _configuration; }
     engine_info_internal get_engine_info() const { return _engine_info; }
     kernels_cache& get_kernels_cache() { return _kernels_cache; }
@@ -125,7 +77,7 @@ public:
     void store_binaries(kernels_binaries_vector binaries) { _binaries.push_back(binaries); }
     bool get_serialization_flag() { return _serialize; }
     void set_serialization_flag(bool serialization_flag) { _serialize = serialization_flag; }
-    
+
     inline bool extension_supported(const std::string ext) { return _extensions.find(ext) != std::string::npos; }
 
     gpu_toolkit(const gpu_toolkit& other) = delete;
@@ -139,6 +91,9 @@ public:
     event_impl::ptr enqueue_kernel(cl::Kernel const& kern, cl::NDRange const& global, cl::NDRange const& local, std::vector<event_impl::ptr> const& deps);
     event_impl::ptr enqueue_marker(std::vector<event_impl::ptr> const& deps);
     event_impl::ptr group_events(std::vector<event_impl::ptr> const& deps);
+    void reset_events();
+    event_impl::ptr create_user_event(bool set);
+    void release_events_pool();
 
     void flush();
     void release_pending_memory();
@@ -147,10 +102,10 @@ public:
     void log(uint64_t id, std::string const& msg);
     bool logging_enabled() const { return !_configuration.log.empty(); }
     bool is_neo_driver() { return _neo_driver; }
-
 private:
     configuration _configuration;
-    cl::Device _device;
+    ocl_builder _ocl_builder;
+    bool _user_context = false;
     bool _neo_driver = false;
     cl::Context _context;
     cl::CommandQueue _command_queue;
@@ -162,6 +117,7 @@ private:
 
     std::atomic<uint64_t> _queue_counter{ 0 };
     std::atomic<uint64_t> _last_barrier{ 0 };
+    std::unique_ptr<events_pool> _events_pool;
     cl::Event _last_barrier_ev;
 
     std::string _extensions;
@@ -174,7 +130,9 @@ private:
     bool _output_event = false;
     std::ofstream& open_log();
 
-    std::string get_device_version() { return _device.getInfo<CL_DEVICE_VERSION>(); }
+    std::string get_device_version() { return _ocl_builder.get_device().getInfo<CL_DEVICE_VERSION>(); }
+
+    void build_command_queues(const configuration& config);
 };
 
 }}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.cpp b/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.cpp
index 5769193a8..c35713475 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.cpp
@@ -1,5 +1,22 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
 #include "ocl_user_event.h"
 
+
 using namespace cldnn::gpu;
 
 void user_event::set_impl()
@@ -10,6 +27,7 @@ void user_event::set_impl()
     static_cast<cl::UserEvent&&>(get()).setStatus(CL_COMPLETE);
     _duration = std::unique_ptr<cldnn::instrumentation::profiling_period_basic>(
                             new cldnn::instrumentation::profiling_period_basic(_timer.uptime()));
+    _attached = true;
 }
 
 bool user_event::get_profiling_info_impl(std::list<cldnn_profiling_interval>& info) {
@@ -20,4 +38,4 @@ bool user_event::get_profiling_info_impl(std::list<cldnn_profiling_interval>& in
     
     info.push_back({ "duration", static_cast<uint64_t>(_duration->value().count()) });
     return true;
-}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.h b/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.h
index 8fe269200..6346aadd3 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/ocl_user_event.h
@@ -1,6 +1,24 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+
 #pragma once
 
 #include "ocl_base_event.h"
+#include "api/CPP/profiling.hpp"
 
 #ifdef _WIN32
 #pragma warning(push)
@@ -11,14 +29,23 @@ namespace cldnn { namespace gpu {
 
 struct user_event : public base_event, public cldnn::user_event
 {
-    user_event(std::shared_ptr<gpu_toolkit> ctx, bool auto_set = false) : base_event(ctx, cl::UserEvent(ctx->context())), cldnn::user_event(auto_set)
-    {
-        if (auto_set)
-            user_event::set_impl();
-    }
+    user_event(std::shared_ptr<gpu_toolkit> ctx) 
+        : base_event(ctx)
+        , cldnn::user_event(false)
+    {}
 
     void set_impl() override;
-
+    void attach_event(bool set)
+    {
+        _event = cl::UserEvent(get_context()->context());
+        //we need to reset the timer(since attach_ocl_event is called only when this object is being reused)
+        _timer = cldnn::instrumentation::timer<>(); 
+        if (set)
+        {
+            set_impl();
+            _set = set;
+        }
+    }
     bool get_profiling_info_impl(std::list<cldnn_profiling_interval>& info) override;
 
 protected:
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp
new file mode 100644
index 000000000..8b7c4f1cf
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/one_hot_gpu.cpp
@@ -0,0 +1,72 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "one_hot_inst.h"
+
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "kernel_selector_helper.h"
+#include "one_hot/one_hot_kernel_selector.h"
+#include "one_hot/one_hot_kernel_base.h"
+#include "error_handler.h"
+
+namespace cldnn {
+    namespace gpu {
+
+        struct one_hot_gpu : typed_primitive_gpu_impl<one_hot>
+        {
+            using parent = typed_primitive_gpu_impl<one_hot>;
+            using parent::parent;
+
+
+            static primitive_impl* create(const one_hot_node& arg)
+            {
+                auto oh_params = get_default_params<kernel_selector::one_hot_params>(arg, 1);
+                auto oh_optional_params = get_default_optional_params<kernel_selector::one_hot_optional_params>(arg.get_program());
+
+                oh_params.one_hot_axis = arg.get_primitive()->one_hot_axis;
+
+                auto output_sizes = arg.get_output_layout().size;
+                std::vector<tensor::value_type> output_dims = { output_sizes.batch[0], output_sizes.feature[0],
+                    output_sizes.spatial[1], output_sizes.spatial[0] };
+                oh_params.one_hot_limit = output_dims[oh_params.one_hot_axis];
+
+                auto& kernel_selector = kernel_selector::one_hot_kernel_selector::Instance();
+                auto best_kernels = kernel_selector.GetBestKernels(oh_params, oh_optional_params);
+
+                CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with these arguments");
+
+                return new one_hot_gpu(arg, best_kernels[0]);
+            }
+        };
+
+        namespace {
+            struct attach {
+                attach() {
+                    auto val_fw = one_hot_gpu::create;
+
+                    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::bfyx), val_fw);
+                    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::bfyx), val_fw);
+                    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i32, format::bfyx), val_fw);
+                    implementation_map<one_hot>::add(std::make_tuple(engine_types::ocl, data_types::i64, format::bfyx), val_fw);
+                }
+                ~attach() = default;
+            };
+
+            attach attach_impl;
+
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp
index 6bf02082b..886562560 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/permute_gpu.cpp
@@ -36,13 +36,8 @@ struct permute_gpu : typed_primitive_gpu_impl<permute>
         auto permute_params = get_default_params<kernel_selector::permute_params>(arg);
         auto permute_optional_params = get_default_optional_params<kernel_selector::permute_optional_params>(arg.get_program());
 
-        uint16_t max_input_index = (uint16_t)(permute_params.inputs[0].GetDims().size() - 1);
         const auto& permute_order = arg.get_primitive()->permute_order;
-        for (size_t i = 0; i < permute_order.size(); i++)
-        {
-            auto order = permute_order[permute_order.size() - 1 - i];
-            permute_params.order.push_back(max_input_index - order);
-        }
+        permute_params.order = permute_order;
         auto& kernel_selector = kernel_selector::permute_kernel_selector::Instance();
         auto best_kernels = kernel_selector.GetBestKernels(permute_params, permute_optional_params);
 
@@ -65,4 +60,4 @@ namespace {
     };
     attach attach_impl;
 }
-} }
-\ No newline at end of file
+} }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
index e21df5199..401b716f4 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/pooling_gpu.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -113,6 +113,11 @@ public:
         pp.poolType                 = cldnn_2_pool_type(primitive->mode);
         pp.remainderAction          = kernel_selector::pool_remainder::CEIL;
 
+        if (primitive->global_pooling) {
+            primitive->size.spatial[0] = input_sizes.spatial[0];
+            primitive->size.spatial[1] = input_sizes.spatial[1];
+        }
+
         //check if last pooling window goes outside of input size + padding. If so the avg pooling size will be adjusted to that.
         auto dynamic_mode = (((output_sizes.spatial[0] - 1) * stride.spatial[0]) + primitive->size.spatial[0]) > -2 * input_offset.spatial[0] + input_sizes.spatial[0] ||
             (((output_sizes.spatial[1] - 1) * stride.spatial[1]) + primitive->size.spatial[1]) > -2 * input_offset.spatial[1] + input_sizes.spatial[1];
@@ -174,6 +179,8 @@ namespace {
             // MMAD
             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::byxf_af32), pooling_gpu::create);
             implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::fs_bs_yx_bsv4_fsv32), pooling_gpu::create);
+            implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::i8, format::b_fs_yx_fsv4), pooling_gpu::create);
+            implementation_map<pooling>::add(std::make_tuple(engine_types::ocl, data_types::u8, format::b_fs_yx_fsv4), pooling_gpu::create);
         }
         ~attach() {}
     };
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.cpp b/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.cpp
index 3128f2aff..f11a8ec15 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.cpp
@@ -16,10 +16,6 @@
 
 #include "primitive_gpu_base.h"
 
-#include "detection_output_inst.h"
-#include "proposal_inst.h"
-#include "prior_box_inst.h"
-
 namespace cldnn {
     namespace gpu {
 
@@ -27,12 +23,8 @@ namespace cldnn {
         {
             for (const auto& user : users)
             {
-                if (user->type() == detection_output::type_id() ||
-                    user->type() == prior_box::type_id() ||
-                    user->type() == proposal::type_id())
-                {
+                if (user->get_selected_impl()->is_cpu())
                     return true;
-                }
             }
             return false;
         }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h b/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h
index 8343147ce..704b83e1e 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h
+++ b/inference-engine/thirdparty/clDNN/src/gpu/primitive_gpu_base.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include "primitive_inst.h"
+#include "program_impl.h"
 #include "kernel.h"
 #include "events_waiter.h"
 #include "error_handler.h"
@@ -30,8 +31,8 @@ namespace cldnn { namespace gpu
 bool is_any_user_cpu(const std::list<const program_node*>& users);
 
 /*
-Base class for all implementation of specified primitive type.
-For example, all convolution implementations should derive from typed_primitive_impl<convolution>.
+Base class for all GPU implementation of specified primitive type.
+For example, all gpu convolution implementations should derive from typed_primitive_gpu_impl<convolution>.
 */
 template <class PType>
 struct typed_primitive_gpu_impl : public typed_primitive_impl<PType>
@@ -67,13 +68,11 @@ struct typed_primitive_gpu_impl : public typed_primitive_impl<PType>
             auto& eimpl = arg.get_program().get_engine();
             _intermediates_memory.push_back(eimpl.allocate_memory(expected_layout));
         }
-    }
-protected:
 
-    virtual bool validate(typed_primitive_inst<PType>&) const
-    {
-        return true;
     }
+    bool is_cpu() const override { return false; }
+
+protected:
 
     virtual bool optimized_out(typed_primitive_inst<PType>&) const
     {
@@ -99,6 +98,11 @@ protected:
         return 1;
     }
 
+    virtual uint32_t get_groups() const
+    {
+        return 1;
+    }
+
     event_impl::ptr aggregate_events(const std::vector<event_impl::ptr>& events, bool group=false) const
     {
         if (events.size() == 1)
@@ -112,9 +116,6 @@ protected:
 
     virtual event_impl::ptr execute_impl(const std::vector<event_impl::ptr>& events, typed_primitive_inst<PType>& instance) override
     {
-        const bool validated = validate(instance);
-        CLDNN_ERROR_NOT_EQUAL(_outer.id(), "validate", validated, "", true, "not a valid instance.");
-
         if (optimized_out(instance))
         {
             return aggregate_events(events);
@@ -124,6 +125,9 @@ protected:
 
         // TODO - split should be handle in kernel selector by providing multiple kernels.
         auto split = get_split();
+        auto groups = get_groups();
+        if (split == 1)
+            split = groups;
 
         // we iterate over split first in order to be able parallelism with OOOQ mechanism.
         for (size_t k = 0; k < _kernels.size(); ++k)
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp
index 6eb7393ca..d172f84cf 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/proposal_gpu.cpp
@@ -16,7 +16,6 @@
 
 #include "proposal_inst.h"
 #include "kernel.h"
-#include "kd_selector.h"
 #include "implementation_map.h"
 #include "network_impl.h"
 #include "engine_impl.h"
@@ -38,7 +37,7 @@ namespace {
     *                                                                          *
     ****************************************************************************/
 
-    inline const float & clamp(const float & v, const float & lower, const float & upper)
+    inline const float& clamp(const float & v, const float & lower, const float & upper)
     {
         return std::max(lower, std::min(v, upper));
     }
@@ -54,22 +53,22 @@ namespace {
     {
         float x0, y0, x1, y1;
 
-        inline float area() const 
-        { 
-            return std::max(0.f, y1 - y0 + 1.f) * std::max(0.f, x1 - x0 + 1.f); 
+        inline float area() const
+        {
+            return std::max(0.f, y1 - y0 + 1.f) * std::max(0.f, x1 - x0 + 1.f);
         }
     };
 
     struct delta_t { float shift_x, shift_y, log_w, log_h; };
 
-    struct proposal_t 
-    { 
+    struct proposal_t
+    {
         proposal_t() = default;
         proposal_t(const roi_t& r, const float c, const size_t& o) : roi(r), confidence(c), ord(o) {}
 
-        roi_t roi; 
-        float confidence; 
-        size_t ord; 
+        roi_t roi;
+        float confidence;
+        size_t ord;
     };
 
     inline float float_read_helper(const float* mem)
@@ -124,7 +123,8 @@ namespace {
             int img_w,
             int img_h,
             float coordinates_offset,
-            bool initial_clip)
+            bool initial_clip,
+            bool clip_before_nms)
     {
         float x0 = box.start_x + anchor_shift_x;
         float y0 = box.start_y + anchor_shift_y;
@@ -149,10 +149,20 @@ namespace {
         const float half_pred_w = std::exp(delta.log_w) * anchor_w * .5f;
         const float half_pred_h = std::exp(delta.log_h) * anchor_h * .5f;
 
-        return { clamp(pred_center_x - half_pred_w, 0.f, img_w - coordinates_offset),
-                 clamp(pred_center_y - half_pred_h, 0.f, img_h - coordinates_offset),
-                 clamp(pred_center_x + half_pred_w, 0.f, img_w - coordinates_offset),
-                 clamp(pred_center_y + half_pred_h, 0.f, img_h - coordinates_offset) };
+        float new_x0 = pred_center_x - half_pred_w;
+        float new_y0 = pred_center_y - half_pred_h;
+        float new_x1 = pred_center_x + half_pred_w;
+        float new_y1 = pred_center_y + half_pred_h;
+
+        if (clip_before_nms)
+        {
+            new_x0 = clamp(new_x0, 0.f, img_w - coordinates_offset);
+            new_y0 = clamp(new_y0, 0.f, img_h - coordinates_offset);
+            new_x1 = clamp(new_x1, 0.f, img_w - coordinates_offset);
+            new_y1 = clamp(new_y1, 0.f, img_h - coordinates_offset);
+        }
+
+        return { new_x0, new_y0, new_x1, new_y1 };
     }
 
     std::vector<roi_t> perform_nms(
@@ -242,11 +252,13 @@ struct proposal_gpu : typed_primitive_impl<proposal>
 
         bool swap_xy = instance.argument.swap_xy;
         bool initial_clip = instance.argument.initial_clip;
+        bool clip_before_nms = instance.argument.clip_before_nms;
+        bool clip_after_nms = instance.argument.clip_after_nms;
         float coordinates_offset = instance.argument.coordinates_offset;
         float box_coordinate_scale = instance.argument.box_coordinate_scale;
         float box_size_scale = instance.argument.box_size_scale;
 
-        if (image_info.get_layout().count() == 4)
+        if (image_info.get_layout().size.feature[0] == 4)
         {
             img_w = static_cast<int>(float_read_helper(image_info_mem + proposal_inst::image_info_width_index) + EPSILON);
             img_h = static_cast<int>(float_read_helper(image_info_mem + proposal_inst::image_info_height_index) + EPSILON);
@@ -262,13 +274,13 @@ struct proposal_gpu : typed_primitive_impl<proposal>
             scaled_min_bbox_size *= img_z;
 
             min_bbox_x = scaled_min_bbox_size;
-            if (image_info.get_layout().count() > proposal_inst::image_info_scale_min_bbox_x)
+            if (image_info.get_layout().size.feature[0] > proposal_inst::image_info_scale_min_bbox_x)
             {
                 min_bbox_x = static_cast<int>(min_bbox_x * float_read_helper(image_info_mem + proposal_inst::image_info_scale_min_bbox_x));
             }
 
             min_bbox_y = scaled_min_bbox_size;
-            if (image_info.get_layout().count() > proposal_inst::image_info_scale_min_bbox_y)
+            if (image_info.get_layout().size.feature[0] > proposal_inst::image_info_scale_min_bbox_y)
             {
                 min_bbox_y = static_cast<int>(min_bbox_y * float_read_helper(image_info_mem + proposal_inst::image_info_scale_min_bbox_y));
             }
@@ -291,67 +303,80 @@ struct proposal_gpu : typed_primitive_impl<proposal>
         const dtype* cls_scores_mem = cls_scores_ptr.data();
         const dtype* bbox_pred_mem  = bbox_pred_ptr.data();
 
-        std::vector<proposal_t> sorted_proposals_confidence;
-        sorted_proposals_confidence.reserve(fm_h * fm_w * anchors_num);
-        for (int y = 0; y < fm_h; ++y)
+        for (int n = 0; n < score_size.batch[0]; n++)
         {
-            for (int x = 0; x < fm_w; ++x)
+            std::vector<proposal_t> sorted_proposals_confidence;
+            size_t num_proposals = fm_h * fm_w * anchors_num;
+            sorted_proposals_confidence.reserve(num_proposals);
+            for (int y = 0; y < fm_h; ++y)
             {
-                const int anchor_shift_x = (swap_xy ? y : x) * instance.argument.feature_stride;
-                const int anchor_shift_y = (swap_xy ? x : y) * instance.argument.feature_stride;
-                const int location_index = y * fm_w + x;
-
-                // we assume proposals are grouped by window location
-                for (unsigned int anchor_index = 0; anchor_index < anchors_num ; anchor_index++)
+                for (int x = 0; x < fm_w; ++x)
                 {
-                    float dx0 = float_read_helper(bbox_pred_mem + location_index + fm_sz * (anchor_index * 4 + 0)) / box_coordinate_scale;
-                    float dy0 = float_read_helper(bbox_pred_mem + location_index + fm_sz * (anchor_index * 4 + 1)) / box_coordinate_scale;
-                    float dx1 = float_read_helper(bbox_pred_mem + location_index + fm_sz * (anchor_index * 4 + 2)) / box_size_scale;
-                    float dy1 = float_read_helper(bbox_pred_mem + location_index + fm_sz * (anchor_index * 4 + 3)) / box_size_scale;
-
-                    delta_t bbox_delta { dx0, dy0, dx1, dy1 };
-
-                    const roi_t& roi = gen_bbox(anchors[anchor_index], bbox_delta, anchor_shift_x, anchor_shift_y,
-                                                img_w, img_h, coordinates_offset, initial_clip);
-
-                    int bbox_w = (int)(roi.x1 - roi.x0 + coordinates_offset);
-                    int bbox_h = (int)(roi.y1 - roi.y0 + coordinates_offset);
-
-                    unsigned int scores_index = location_index + fm_sz * (anchor_index + (unsigned int)anchors_num);
-                    float proposal_confidence = (min_bbox_x <= bbox_w)* (min_bbox_y <= bbox_h) * float_read_helper(cls_scores_mem + scores_index);
-                    sorted_proposals_confidence.emplace_back(roi, proposal_confidence, sorted_proposals_confidence.size());
+                    const int anchor_shift_x = (swap_xy ? y : x) * instance.argument.feature_stride;
+                    const int anchor_shift_y = (swap_xy ? x : y) * instance.argument.feature_stride;
+                    const int location_index = y * fm_w + x;
+
+                    // we assume proposals are grouped by window location
+                    for (unsigned int anchor_index = 0; anchor_index < anchors_num ; anchor_index++)
+                    {
+                        float dx0 = float_read_helper(bbox_pred_mem + n*num_proposals*4 + location_index + fm_sz * (anchor_index * 4 + 0)) / box_coordinate_scale;
+                        float dy0 = float_read_helper(bbox_pred_mem + n*num_proposals*4 + location_index + fm_sz * (anchor_index * 4 + 1)) / box_coordinate_scale;
+                        float dx1 = float_read_helper(bbox_pred_mem + n*num_proposals*4 + location_index + fm_sz * (anchor_index * 4 + 2)) / box_size_scale;
+                        float dy1 = float_read_helper(bbox_pred_mem + n*num_proposals*4 + location_index + fm_sz * (anchor_index * 4 + 3)) / box_size_scale;
+
+                        delta_t bbox_delta { dx0, dy0, dx1, dy1 };
+
+                        const roi_t& roi = gen_bbox(anchors[anchor_index], bbox_delta, anchor_shift_x, anchor_shift_y,
+                                                    img_w, img_h, coordinates_offset, initial_clip, clip_before_nms);
+
+                        int bbox_w = (int)(roi.x1 - roi.x0 + coordinates_offset);
+                        int bbox_h = (int)(roi.y1 - roi.y0 + coordinates_offset);
+
+                        size_t scores_index = n*num_proposals * 2 + location_index + fm_sz * (anchor_index + anchors_num);
+                        float proposal_confidence = (min_bbox_x <= bbox_w)* (min_bbox_y <= bbox_h) * float_read_helper(cls_scores_mem + scores_index);
+                        sorted_proposals_confidence.emplace_back(roi, proposal_confidence, sorted_proposals_confidence.size());
+                    }
                 }
             }
-        }
 
-        size_t pre_nms = std::min(instance.argument.pre_nms_topn, (int)sorted_proposals_confidence.size());
-        sort_and_keep_n_items(sorted_proposals_confidence, pre_nms);
-        const std::vector<roi_t>& res = perform_nms(sorted_proposals_confidence, instance.argument.iou_threshold,
-                                                    instance.argument.post_nms_topn, coordinates_offset);
+            size_t pre_nms = std::min(instance.argument.pre_nms_topn, (int)sorted_proposals_confidence.size());
+            sort_and_keep_n_items(sorted_proposals_confidence, pre_nms);
+            std::vector<roi_t> res = perform_nms(sorted_proposals_confidence, instance.argument.iou_threshold,
+                                                 instance.argument.post_nms_topn, coordinates_offset);
 
-        auto& output = instance.output_memory();
+            auto& output = instance.output_memory();
 
-        mem_lock<dtype> output_ptr{ output };
-        dtype* top_data = output_ptr.data();
+            mem_lock<dtype> output_ptr{ output };
+            dtype* top_data = output_ptr.data() + n*instance.argument.post_nms_topn*5;
 
-        size_t res_num_rois = res.size();
+            size_t res_num_rois = res.size();
 
-        for (size_t i = 0; i < res_num_rois; ++i)
-        {
-            float_write_helper(top_data + 5 * i + 0, 0.0f);
-            float_write_helper(top_data + 5 * i + 1, res[i].x0);
-            float_write_helper(top_data + 5 * i + 2, res[i].y0);
-            float_write_helper(top_data + 5 * i + 3, res[i].x1);
-            float_write_helper(top_data + 5 * i + 4, res[i].y1);
-        }
 
-        for (size_t i = res_num_rois; i < (size_t)instance.argument.post_nms_topn; i++)
-        {
-            float_write_helper(top_data + 5*i + 0, -1.0f);
-            float_write_helper(top_data + 5*i + 1,  0.0f);
-            float_write_helper(top_data + 5*i + 2,  0.0f);
-            float_write_helper(top_data + 5*i + 3,  0.0f);
-            float_write_helper(top_data + 5*i + 4,  0.0f);
+            for (size_t i = 0; i < res_num_rois; ++i)
+            {
+                if (clip_after_nms)
+                {
+                    res[i].x0 = clamp(res[i].x0, 0.0f, float(img_w));
+                    res[i].y0 = clamp(res[i].y0, 0.0f, float(img_h));
+                    res[i].x1 = clamp(res[i].x1, 0.0f, float(img_w));
+                    res[i].y1 = clamp(res[i].y1, 0.0f, float(img_h));
+                }
+
+                float_write_helper(top_data + 5 * i + 0, float(n));
+                float_write_helper(top_data + 5 * i + 1, res[i].x0 / (instance.argument.normalize ? img_w : 1.0f));
+                float_write_helper(top_data + 5 * i + 2, res[i].y0 / (instance.argument.normalize ? img_h : 1.0f));
+                float_write_helper(top_data + 5 * i + 3, res[i].x1 / (instance.argument.normalize ? img_w : 1.0f));
+                float_write_helper(top_data + 5 * i + 4, res[i].y1 / (instance.argument.normalize ? img_h : 1.0f));
+            }
+
+            for (size_t i = res_num_rois; i < (size_t)instance.argument.post_nms_topn; i++)
+            {
+                float_write_helper(top_data + 5*i + 0, -1.0f);
+                float_write_helper(top_data + 5*i + 1,  0.0f);
+                float_write_helper(top_data + 5*i + 2,  0.0f);
+                float_write_helper(top_data + 5*i + 3,  0.0f);
+                float_write_helper(top_data + 5*i + 4,  0.0f);
+            }
         }
     }
 
@@ -380,17 +405,15 @@ struct proposal_gpu : typed_primitive_impl<proposal>
     static primitive_impl* create(const proposal_node& arg)
     {
         const layout & l = arg.image_info().get_output_layout();
-        const size_t count = l.size.count();
+        const size_t count = static_cast<size_t>(l.size.feature[0]);
 
         //Supported image_info sizes and components meaning:
         // - image_info[3] = { img_height, img_width, img_depth }
         // - image_info[4] = { img_height, img_width, scale_min_bbox_y, scale_min_bbox_x }
         // - image_info[6] = { img_height, img_width, img_depth, scale_min_bbox_y, scale_min_bbox_x, scale_depth_index }
-        if ((size_t)l.size.feature[0] != count || (count != 3 && count != 4 && count != 6)) {
+        if (count != 3 && count != 4 && count != 6) {
             CLDNN_ERROR_MESSAGE(arg.id(), "image_info must have either 3, 4 or 6 items");
         }
-        CLDNN_ERROR_BOOL(arg.id(), "Batching", !hasSingleBatchOutput(arg.bbox_pred()), "Proposal doesn't support batching.");
-        CLDNN_ERROR_BOOL(arg.id(), "Batching", !hasSingleBatchOutput(arg.cls_score()), "Proposal doesn't support batching.");
 
         return new proposal_gpu(arg);
     }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp
new file mode 100644
index 000000000..d5164a1af
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/pyramid_roi_align_gpu.cpp
@@ -0,0 +1,76 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "pyramid_roi_align_inst.h"
+
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "kernel_selector_helper.h"
+#include "pyramid_roi_align/pyramid_roi_align_kernel_selector.h"
+#include "pyramid_roi_align/pyramid_roi_align_kernel_base.h"
+#include "error_handler.h"
+#include "pyramid_roi_align_inst.h"
+#include "network_impl.h"
+
+
+#define DEPTH_OF_FEATURE_MAP            4
+#define NUM_COORDINATES                 4
+#define META_OFFSET_X                   4
+#define META_OFFSET_Y                   5
+
+namespace cldnn {  namespace gpu {
+
+struct pyramid_roi_align_gpu : typed_primitive_gpu_impl<pyramid_roi_align>
+{
+    using parent = typed_primitive_gpu_impl<pyramid_roi_align>;
+    using parent::parent;
+
+    static primitive_impl* create(const pyramidROIAlign_node& arg)
+    {
+        auto pyramidROIAlign_params = get_default_params<kernel_selector::PyramidROIAlign_params>(arg, 1);
+        auto pyramidROIAlign_optional_params = get_default_optional_params<kernel_selector::PyramidROIAlign_optional_params>(arg.get_program());
+
+        pyramidROIAlign_params.inputs.push_back(convert_data_tensor(arg.image_meta().get_output_layout()));
+        pyramidROIAlign_params.inputs.push_back(convert_data_tensor(arg.P2().get_output_layout()));
+        pyramidROIAlign_params.inputs.push_back(convert_data_tensor(arg.P3().get_output_layout()));
+        pyramidROIAlign_params.inputs.push_back(convert_data_tensor(arg.P4().get_output_layout()));
+        pyramidROIAlign_params.inputs.push_back(convert_data_tensor(arg.P5().get_output_layout()));
+        pyramidROIAlign_params.inputs.push_back(convert_data_tensor(arg.pool_size().get_output_layout()));
+
+
+        auto& kernel_selector = kernel_selector::PyramidROIAlign_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(pyramidROIAlign_params, pyramidROIAlign_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
+
+        return new pyramid_roi_align_gpu(arg, best_kernels[0]);
+    }
+
+};
+
+namespace {
+    struct attach {
+        attach() {
+            auto val_fw = pyramid_roi_align_gpu::create;
+            implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+            implementation_map<pyramid_roi_align>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+        }
+        ~attach() = default;
+
+    };
+
+    attach attach_impl;
+
+}
+}}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp
new file mode 100644
index 000000000..146a86468
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/reverse_sequence_gpu.cpp
@@ -0,0 +1,71 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "reverse_sequence_inst.h"
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "kernel_selector_helper.h"
+#include "reverse_sequence/reverse_sequence_kernel_selector.h"
+#include "reverse_sequence/reverse_sequence_kernel_ref.h"
+#include "error_handler.h"
+
+using namespace cldnn;
+
+namespace cldnn { namespace gpu
+{
+struct reverse_sequence_gpu : typed_primitive_gpu_impl<reverse_sequence>
+{
+    using parent = typed_primitive_gpu_impl<reverse_sequence>;
+    using parent::parent;
+
+public:
+
+    static primitive_impl* create(const reverse_sequence_node& arg)
+    {
+        auto reverse_sequence_params = get_default_params<kernel_selector::reverse_sequence_params>(arg);
+        auto reverse_sequence_optional_params = get_default_optional_params<kernel_selector::reverse_sequence_optional_params>(arg.get_program());
+
+        reverse_sequence_params.seq_axis = arg.get_primitive()->seq_axis;
+        reverse_sequence_params.batch_axis = arg.get_primitive()->batch_axis;
+
+        reverse_sequence_params.inputs.push_back(convert_data_tensor(arg.input(1).get_output_layout()));
+
+        auto& kernel_selector = kernel_selector::reverse_sequence_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(reverse_sequence_params, reverse_sequence_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
+
+        auto reverse_sequence = new reverse_sequence_gpu(arg, best_kernels[0]);
+
+        return reverse_sequence;
+    }
+};
+
+namespace
+{
+    struct attach
+    {
+        attach()
+        {
+            auto val_fw = reverse_sequence_gpu::create;
+            implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+            implementation_map<reverse_sequence>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+        }
+        ~attach() = default;
+    };
+    attach attach_impl;
+}
+} }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp
index 3ff7df6f2..d4d5dd67f 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/roi_pooling_gpu.cpp
@@ -83,30 +83,22 @@ public:
         CLDNN_ERROR_NOT_EQUAL(arg.id(), "roi_pooling padding filling value", padding_filling_value, "padding mode", 0.0f, "Unknown padding mode in roi_pooling.");
         CLDNN_ERROR_NOT_PROPER_FORMAT(arg.id(), "Input_layout.format", input_layout.format.value, "output_layout.format", output_layout.format);
 
-        auto group_sz = primitive->group_sz;
-        auto in_feat = input_layout.get_buffer_size().feature[0];
-        auto out_feat = output_layout.get_buffer_size().feature[0];
-
-        CLDNN_ERROR_LESS_THAN(arg.id(), "Group size", group_sz, "value", 0, "");
-        if (group_sz) {
-            CLDNN_ERROR_NOT_EQUAL(arg.id(), "input feture map", in_feat, "group_sz * group_sz * out_feat", group_sz * group_sz * out_feat, "");
-        }
-        CLDNN_ERROR_BOOL(arg.id(), "Batching", !hasSingleBatchOutput(arg.input()), "PS/ RoI Pooling doesn't support batching.");
-
         auto roi_params = get_default_params<kernel_selector::roi_pooling_params>(arg);
         auto roi_optional_params = get_default_optional_params<kernel_selector::roi_pooling_optional_params>(arg.get_program());
 
         const auto& out = roi_params.output;
-        
+
         const auto roi_bfyx = convert_data_tensor(rois_layout);
         const auto roi_bf = roi_bfyx.FlattenFeatureAndSpatials();
         roi_params.inputs.push_back(roi_bf);
         roi_params.output = { out.GetDims(), out.GetDType(), kernel_selector::data_layout::brfyx, out.GetViewOffset(), out.PhysicalSize(), out.GetPaddedVal() }; // TOOD: it's an hack - cldnn doesn't support roi pooling with batching
-        roi_params.mode         = cldnn_2_pool_type(primitive->mode);
-        roi_params.pooledWidth  = primitive->pooled_width;
-        roi_params.pooledHeight = primitive->pooled_height;
-        roi_params.spatialScale = primitive->spatial_scale;
-        roi_params.groupSize    = group_sz;
+        roi_params.mode               = cldnn_2_pool_type(primitive->mode);
+        roi_params.position_sensitive = primitive->position_sensitive;
+        roi_params.pooledWidth        = primitive->pooled_width;
+        roi_params.pooledHeight       = primitive->pooled_height;
+        roi_params.spatialScale       = primitive->spatial_scale;
+        roi_params.spatial_bins_x     = primitive->spatial_bins_x;
+        roi_params.spatial_bins_y     = primitive->spatial_bins_y;
 
         auto& kernel_selector = kernel_selector::roi_pooling_kernel_selector::Instance();
         auto best_kernels = kernel_selector.GetBestKernels(roi_params, roi_optional_params);
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp
new file mode 100644
index 000000000..454810f61
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/shuffle_channels_gpu.cpp
@@ -0,0 +1,75 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "shuffle_channels_inst.h"
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "kernel_selector_helper.h"
+#include "shuffle_channels/shuffle_channels_kernel_selector.h"
+#include "shuffle_channels/shuffle_channels_kernel_ref.h"
+#include "error_handler.h"
+
+using namespace cldnn;
+
+namespace cldnn { namespace gpu {
+
+struct shuffle_channels_gpu : typed_primitive_gpu_impl<shuffle_channels>
+{
+    using parent = typed_primitive_gpu_impl<shuffle_channels>;
+    using parent::parent;
+
+public:
+
+    static primitive_impl* create(const shuffle_channels_node& arg)
+    {
+        auto shuffle_channels_params = get_default_params<kernel_selector::shuffle_channels_params>(arg);
+        auto shuffle_channels_optional_params = get_default_optional_params<kernel_selector::shuffle_channels_optional_params>(arg.get_program());
+
+        const int32_t number_of_dims = 4;
+        int32_t axis = arg.get_primitive()->axis;
+
+        if (axis < 0)
+            axis += number_of_dims;
+
+        shuffle_channels_params.group = arg.get_primitive()->group;
+        shuffle_channels_params.axis = axis;
+
+        auto& kernel_selector = kernel_selector::shuffle_channels_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(shuffle_channels_params, shuffle_channels_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
+
+        auto shuffle_channels = new shuffle_channels_gpu(arg, best_kernels[0]);
+
+        return shuffle_channels;
+    }
+};
+
+namespace
+{
+    struct attach
+    {
+        attach()
+        {
+            auto val_fw = shuffle_channels_gpu::create;
+            implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+            implementation_map<shuffle_channels>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+        }
+        ~attach() = default;
+    };
+    attach attach_impl;
+}
+} }
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp
new file mode 100644
index 000000000..b093ca2b5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/gpu/strided_slice_gpu.cpp
@@ -0,0 +1,99 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "strided_slice_inst.h"
+#include "primitive_gpu_base.h"
+#include "implementation_map.h"
+#include "kernel_selector_helper.h"
+#include "strided_slice/strided_slice_kernel_ref.h"
+#include "strided_slice/strided_slice_kernel_selector.h"
+#include "error_handler.h"
+#include "data_inst.h"
+
+using namespace cldnn;
+
+namespace cldnn
+{
+namespace gpu
+{
+
+struct strided_slice_gpu : typed_primitive_gpu_impl<strided_slice>
+{
+    using parent = typed_primitive_gpu_impl<strided_slice>;
+    using parent::parent;
+public:
+    static primitive_impl* create(const strided_slice_node& arg)
+    {
+        auto strided_slice_params = get_default_params<kernel_selector::strided_slice_params>(arg);
+        auto strided_slice_optional_params = get_default_optional_params<kernel_selector::strided_slice_optional_params>(arg.get_program());
+        const int32_t numberOfDims = 4;
+
+        auto complete_strided_slice_params = [&](std::vector<int32_t>& param) {
+            for (size_t i = param.size(); i < numberOfDims; ++i)
+                param.push_back(1);
+        };
+
+        auto completeStridedSliceMasks = [&](std::vector<uint8_t>& mask) {
+            for (size_t i = mask.size(); i < numberOfDims; ++i)
+                mask.push_back(0);
+        };
+
+        // Getting data from constant inputs. There are 3 args: Begin, End, Stride
+        for (size_t i = 1; i < arg.get_dependencies().size(); ++i) {
+            auto& input = arg.get_dependency(i).as<data>();
+            auto& mem = input.get_attached_memory();
+            int32_t* data = static_cast<int32_t*>(mem.lock());
+            std::vector<int32_t> vData = std::vector<int32_t>(data, data + input.get_output_layout().count());
+            complete_strided_slice_params(vData);
+            strided_slice_params.striding_params.push_back(vData);
+            mem.unlock();
+        }
+
+        strided_slice_params.end_mask = arg.get_primitive()->end_mask;
+        completeStridedSliceMasks(strided_slice_params.end_mask);
+        strided_slice_params.begin_mask = arg.get_primitive()->begin_mask;
+        completeStridedSliceMasks(strided_slice_params.begin_mask);
+        strided_slice_params.new_axis_mask = arg.get_primitive()->new_axis_mask;
+        strided_slice_params.shrink_axis_mask = arg.get_primitive()->shrink_axis_mask;
+        completeStridedSliceMasks(strided_slice_params.shrink_axis_mask);
+
+        auto& kernel_selector = kernel_selector::strided_slice_kernel_selector::Instance();
+        auto best_kernels = kernel_selector.GetBestKernels(strided_slice_params, strided_slice_optional_params);
+
+        CLDNN_ERROR_BOOL(arg.id(), "Best_kernel.empty()", best_kernels.empty(), "Cannot find a proper kernel with this arguments");
+
+        auto strided_slice = new strided_slice_gpu(arg, best_kernels[0]);
+
+        return strided_slice;
+    }
+};
+
+namespace
+{
+    struct attach
+    {
+        attach()
+        {
+            auto val_fw = strided_slice_gpu::create;
+            implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f32, format::bfyx), val_fw);
+            implementation_map<strided_slice>::add(std::make_tuple(engine_types::ocl, data_types::f16, format::bfyx), val_fw);
+        }
+        ~attach() = default;
+    };
+    attach attach_impl;
+}
+} //namespace gpu
+} //namespace cldnn
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/upsampling_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/upsampling_gpu.cpp
index 423c58ecb..aa373059b 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/upsampling_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/upsampling_gpu.cpp
@@ -50,7 +50,7 @@ struct upsampling_gpu : typed_primitive_gpu_impl<upsampling>
         
         const auto& primitive = arg.get_primitive();
         if(primitive->with_activation)
-            convert_activation_func_params(primitive, us_params);
+            convert_activation_func_params(primitive, us_params.activation);
 
         us_params.scale = primitive->scale;
         us_params.num_filter = primitive->num_filter;
diff --git a/inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp b/inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp
index c116e2a24..30bbf7e4a 100644
--- a/inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp
+++ b/inference-engine/thirdparty/clDNN/src/gpu/wait_for_events_gpu.cpp
@@ -36,6 +36,11 @@ public:
         return events_waiter.run(events);
     }
 
+    bool validate(const primitive_inst&) const override
+    {
+        return true;
+    }
+
     static primitive_impl* create_data(const data_node& data)
     {
         return new wait_for_events_gpu(data);
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
new file mode 100644
index 000000000..d903f5c9f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_required_reorders.cpp
@@ -0,0 +1,143 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include <algorithm>
+
+#include "pass_manager.h"
+#include "program_node.h"
+#include "mutable_data_inst.h"
+#include "concatenation_inst.h"
+#include "scale_inst.h"
+#include "tensor_type.h"
+
+/*
+This pass checks if data formats (layouts) of output/input in hidden layers match.
+If not than required reorder is added to the network.
+*/
+
+/*
+Add a reorder in between node and usr with reorder_layout as layout
+*/
+void add_required_reorders::add_reorder(program_impl& p, program_node* node, program_node* usr, layout reorder_layout)
+{
+
+    auto new_reorder = std::make_shared<reorder>(node->id() + "_reorder_" + usr->id(),
+                                                 node->id(),
+                                                 reorder_layout);
+    auto& new_reorder_node = p.get_or_create(new_reorder);
+
+    //ToDo: add a method to program_impl class which adds an intermediate node given a node and its user
+    auto it = std::find(usr->get_dependencies().begin(), usr->get_dependencies().end(), node);
+    if (it == usr->get_dependencies().end())
+    {
+        throw error("Inconcistency in topology description: user of a node is not present among its dependecies.", CLDNN_ERROR);
+    }
+    auto idx = it - usr->get_dependencies().begin();
+    if (idx < 0 || (size_t)idx >= usr->get_dependencies().size())
+    {
+        throw error("Internal Error: container index out of range exception.", CLDNN_ERROR);
+    }
+    p.add_intermediate(new_reorder_node, *usr, idx);
+}
+
+void add_required_reorders::run(program_impl& p)
+{
+    auto usr_itr = p.get_processing_order().begin();
+    while (usr_itr != p.get_processing_order().end())
+    {
+        auto& usr = *usr_itr++;
+        if (usr->get_dependencies().size() == 0)
+            continue;  // only nodes with dependencies
+        if (usr->is_type<internal_primitive>() || usr->is_type<data>())
+            continue;
+        if (usr->type()->does_an_implementation_exist(p.get_engine(), *usr))
+            continue;
+
+        /*
+            First check if there are non data flow dependencies for the primitive
+            if so then choose the same output format as the data
+        */
+        bool correct_layout_selected = false;
+        for (auto& node : usr->get_dependencies())
+        {
+            if (!node->is_in_data_flow())
+            {
+                /*
+                    ToDo: Here we should handle also the situation where primitive usr has data inputs in different formats
+                */
+                layout current_layout(usr->get_output_layout().data_type,
+                    node->get_output_layout().format,
+                    usr->get_output_layout().size);
+                usr->set_output_layout(current_layout);
+                if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr))
+                {
+                    correct_layout_selected = true;
+                    break;
+                }
+                else
+                {
+                    throw error("Internal Error: no layout format available for " + usr->id() +
+                        " comaptible with " + node->id(), CLDNN_ERROR);
+                }
+            }
+        }
+
+        if (!correct_layout_selected) {
+            //This list of preffered layouts has been selected arbitrary due to developers' experience
+            cldnn::format preffered_layout_formats[]{
+                cldnn::format::bfyx,
+                cldnn::format::yxfb,
+                cldnn::format::byxf,
+            };
+
+            for (auto new_layout_format : preffered_layout_formats)
+            {
+                layout current_layout(usr->get_output_layout().data_type,
+                    new_layout_format,
+                    usr->get_output_layout().size);
+                usr->set_output_layout(current_layout);
+                if (usr->type()->does_possible_implementation_exist(p.get_engine(), *usr))
+                {
+                    correct_layout_selected = true;
+                    break;
+                }
+            }
+
+            if (!correct_layout_selected) {
+                throw error("Internal Error: no implementation for " + usr->id() + " kernel which satisfies output format dependecies.", CLDNN_ERROR);
+            }
+        }
+
+        // layout is selected now add required reorders
+        auto dep_itr = usr->get_dependencies().begin();
+        while (dep_itr != usr->get_dependencies().end())
+        {
+            auto node = *dep_itr++;
+            //do not add a reorder if usr or node are reorders or does not belong to data_flow
+            if (!usr->is_type<reorder>() &&
+                !node->is_type<reorder>() &&
+                node->is_in_data_flow())
+            {
+                if ((usr->get_output_layout()!=node->get_output_layout()))
+                {
+                    add_reorder(p, node, usr, usr->get_output_layout());
+                }
+            }
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_reshape_to_primitives.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_reshape_to_primitives.cpp
new file mode 100644
index 000000000..e78cf86c4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/add_reshape_to_primitives.cpp
@@ -0,0 +1,120 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "batch_norm_inst.h"
+#include "reshape_inst.h"
+
+using namespace cldnn;
+
+//Some primitives require a specific shape for thier inputs/parameters.
+//We should check this and add reshape to be compliant with this.
+//
+//Example: batch_norm primitive requires that mean/variance/scale/shift is shape {1, X, 1, 1}
+void add_reshape_to_primitives::run(program_impl& p)
+{
+    auto processing_order = p.get_processing_order();
+
+    for (auto& node : processing_order)
+    {
+        //if node is batch_norm and mean/var are given (i.e. use eltwise kernel to calculate batch_norm)
+        if (node->is_type<batch_norm>() &&
+            (!node->as<batch_norm>().calc_mean_var() && node->as<batch_norm>().use_global_stats()))
+        {
+            auto mean_layout = node->as<batch_norm>().mean().get_output_layout();
+            auto mean_size = mean_layout.size;
+            auto mean_x = mean_size.spatial[0];
+            auto mean_y = mean_size.spatial[1];
+            auto mean_b = mean_size.batch[0];
+
+            if (mean_x != 1
+                || mean_y != 1
+                || mean_b != 1)
+            {
+                auto mean_name = node->as<batch_norm>().mean().id();
+                std::vector<int32_t> mean_sizes = mean_size.sizes();
+                int32_t mean_max_size = *std::max_element(std::begin(mean_sizes), std::end(mean_sizes));
+
+                auto r_prim = std::make_shared<reshape>("reshape_" + mean_name + "_" + node->id(), mean_name, tensor(1, mean_max_size, 1, 1));
+                auto& r_prim_node = p.get_or_create(r_prim);
+
+                p.add_intermediate(r_prim_node, *node, 1, true);
+            }
+
+            auto variance_size = node->as<batch_norm>().variance().get_output_layout().size;
+            auto variance_x = variance_size.spatial[0];
+            auto variance_y = variance_size.spatial[1];
+            auto variance_b = variance_size.batch[0];
+
+            if (variance_x != 1
+                || variance_y != 1
+                || variance_b != 1)
+            {
+                auto variance_name = node->as<batch_norm>().variance().id();
+                std::vector<int32_t> variance_sizes = variance_size.sizes();
+                int32_t variance_max_size = *std::max_element(std::begin(variance_sizes), std::end(variance_sizes));
+
+                auto r_prim = std::make_shared<reshape>("reshape_" + variance_name + "_" + node->id(), variance_name, tensor(1, variance_max_size, 1, 1));
+                auto& r_prim_node = p.get_or_create(r_prim);
+
+                p.add_intermediate(r_prim_node, *node, 2, true);
+            }
+
+            if (node->as<batch_norm>().use_scale_shift())
+            {
+                auto scale_size = node->as<batch_norm>().scale().get_output_layout().size;
+                auto scale_x = scale_size.spatial[0];
+                auto scale_y = scale_size.spatial[1];
+                auto scale_b = scale_size.batch[0];
+
+                if (scale_x != 1
+                    || scale_y != 1
+                    || scale_b != 1)
+                {
+                    auto scale_name = node->as<batch_norm>().scale().id();
+                    std::vector<int32_t> scale_sizes = scale_size.sizes();
+                    int32_t scale_max_size = *std::max_element(std::begin(scale_sizes), std::end(scale_sizes));
+
+                    auto r_prim = std::make_shared<reshape>("reshape_" + scale_name + "_" + node->id(), scale_name, tensor(1, scale_max_size, 1, 1));
+                    auto& r_prim_node = p.get_or_create(r_prim);
+
+                    p.add_intermediate(r_prim_node, *node, 3, true);
+                }
+
+                auto shift_size = node->as<batch_norm>().shift().get_output_layout().size;
+                auto shift_x = shift_size.spatial[0];
+                auto shift_y = shift_size.spatial[1];
+                auto shift_b = shift_size.batch[0];
+
+                if (shift_x != 1
+                    || shift_y != 1
+                    || shift_b != 1)
+                {
+                    auto shift_name = node->as<batch_norm>().shift().id();
+                    std::vector<int32_t> shift_sizes = shift_size.sizes();
+                    int32_t shift_max_size = *std::max_element(std::begin(shift_sizes), std::end(shift_sizes));
+
+                    auto r_prim = std::make_shared<reshape>("reshape_" + shift_name + "_" + node->id(), shift_name, tensor(1, shift_max_size, 1, 1));
+                    auto& r_prim_node = p.get_or_create(r_prim);
+
+                    p.add_intermediate(r_prim_node, *node, 4, true);
+                }
+            }
+        }
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp
new file mode 100644
index 000000000..c7e90793c
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/calculate_prior_boxes.cpp
@@ -0,0 +1,47 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "prior_box_inst.h"
+#include "program_node.h"
+#include "program_impl.h"
+
+using namespace cldnn;
+
+void calculate_prior_boxes::run(program_impl& p)
+{
+    auto itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end())
+    {
+        auto& node = (*itr++);
+        if (!node->is_type<prior_box>())
+            continue;
+
+        auto& pb_node = node->as<prior_box>();
+
+        pb_node.calc_result();
+        p.remove_connection(pb_node.input(), pb_node);
+
+        auto& result = pb_node.get_result_buffer();
+        result.add_ref(); // need to inc ref count since we will be assigning this memory as cldnn_memory in next line that is not ref_count_obj
+        auto cpp_mem = details::memory_c_to_cpp_converter::convert(api_cast(&result));
+
+        auto& data_node = p.get_or_create(std::make_shared<data>("_cldnn_tmp_" + pb_node.id() + "_result", cpp_mem));
+        p.replace(pb_node, data_node);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp
new file mode 100644
index 000000000..db7c659b8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/compile_graph.cpp
@@ -0,0 +1,39 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "internal_primitive.h"
+#include "data_inst.h"
+#include "mutable_data_inst.h"
+#include "program_node.h"
+#include "engine_impl.h"
+
+using namespace cldnn;
+
+void compile_graph::run(program_impl& p)
+{
+    for (auto& node : p.get_processing_order())
+    {
+        if (!node->is_type<internal_primitive>() && !node->is_type<data>())
+        {
+            node->get_output_layout();
+            if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty()))
+                node->selected_impl = node->type()->choose_impl(p.get_engine(), *node);
+        }
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp
new file mode 100644
index 000000000..eea35eec0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_remove_stride.cpp
@@ -0,0 +1,105 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "api/CPP/tensor.hpp"
+
+#include "pass_manager.h"
+
+#include "convolution_inst.h"
+#include "eltwise_inst.h"
+
+#include <memory>
+
+using namespace cldnn;
+
+void eltwise_remove_stride::conv_stride_extend(program_impl& p, program_node& node, cldnn::tensor& tensor)
+{
+    // make sure we have only 1 user
+    if (node.get_users().size() > 1)
+        return;
+
+    const auto conv = std::static_pointer_cast<const convolution>(node.get_primitive());
+    auto weights_node_ptr = p.get_node_ptr(conv->weights[0]);
+    auto filter_size = weights_node_ptr->get_output_layout().size;
+    // make sure this is conv 1x1
+    if (filter_size.spatial[0] == 1 && filter_size.spatial[1] == 1)
+    {
+        auto deps = node.get_dependencies();
+        for (auto dep : deps)
+        {
+            if (dep->is_type<convolution>())
+            {
+                conv_stride_extend(p, *dep, tensor);
+                dep->recalc_output_layout(true);
+                break;
+            }
+        }
+        auto c = const_cast<convolution*>(&(*conv));
+        c->with_output_size = false;
+        node.recalc_output_layout(true);
+    }
+    else
+    {
+        bool can_shrink_x = (filter_size.spatial[0] - (conv->stride.spatial[0] + (tensor.spatial[0] - 1))) >= 0;
+        bool can_shrink_y = (filter_size.spatial[1] - (conv->stride.spatial[1] + (tensor.spatial[1] - 1))) >= 0;
+        if (can_shrink_x && can_shrink_y)
+        {
+            auto c = const_cast<convolution*>(&(*conv));
+            c->stride.spatial[0] += tensor.spatial[0] - 1;
+            c->stride.spatial[1] += tensor.spatial[1] - 1;
+            c->with_output_size = false;
+            node.recalc_output_layout(true);
+            tensor.spatial[0] = 1;
+            tensor.spatial[1] = 1;
+        }
+    }
+}
+
+void eltwise_remove_stride::run(program_impl& p)
+{
+    for (auto& node : p.get_processing_order())
+    {
+        if (node->is_type<eltwise>())
+        {
+            // TODO: make fp16 work
+            if (node->get_output_layout().data_type != data_types::i8 && node->get_output_layout().data_type != data_types::f32)
+            {
+                if (node->get_output_layout().data_type != data_types::f16 || node->get_output_layout().format != format::yxfb)
+                {
+                    continue;
+                }
+            }
+
+            const auto eltw = std::static_pointer_cast<const eltwise>(node->get_primitive());
+            if (!eltw->stride.empty())
+            {
+                auto deps = node->get_dependencies();
+                for (size_t i = 0; i < deps.size(); i++)
+                {
+                    auto dep = deps[i];
+                    // TODO: add other primitives beside convolution here
+                    if (dep->is_type<convolution>())
+                    {
+                        auto e = const_cast<eltwise*>(&(*eltw));
+                        conv_stride_extend(p, *dep, e->stride[i]);
+                    }
+                }
+            }
+        }
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_shrinking.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_shrinking.cpp
new file mode 100644
index 000000000..b3d0e009a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/eltwise_shrinking.cpp
@@ -0,0 +1,132 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "pass_manager.h"
+#include "eltwise_inst.h"
+
+using namespace cldnn;
+
+void eltwise_shrinking::run(program_impl& p)
+{
+    std::vector<program_node*> convs_to_shrink;
+
+    for (auto& node : p.get_processing_order())
+    {
+        if (node->is_type<eltwise>())
+        {
+            // TODO: make fp16 work
+            if (node->get_output_layout().data_type != data_types::i8 && node->get_output_layout().data_type != data_types::f32)
+            {
+                if (node->get_output_layout().data_type != data_types::f16 || node->get_output_layout().format != format::yxfb)
+                {
+                    continue;
+                }
+            }
+
+            const auto eltw = std::static_pointer_cast<const eltwise>(node->get_primitive());
+            // TODO: support cases which already have stride!
+            if (eltw->stride.empty())
+            {
+                bool can_shrink = true;
+                int32_t stride_x = 0;
+                int32_t stride_y = 0;
+                convs_to_shrink.clear();
+                auto users = node->get_users();
+                for (auto user : users)
+                {
+                    // currently we can shrink only if users are convolutions
+                    if (!user->is_type<convolution>())
+                    {
+                        can_shrink = false;
+                        break;
+                    }
+
+                    if (user->get_output_layout().format == format::b_fs_yx_fsv4)
+                    {
+                        // Workaround for VIS-1079
+                        // Currently, we don't have "conv + eltwise" optimization for
+                        // IMAD and it blocks us to run the whole ResNet-50.i8 topology in IMAD.
+                        // As workaround, this optimization will be temporary switched off for
+                        // "format == b_fs_yx_fsv4"(IMAD specific data layout).
+                        // TODO: Please, remove this code, when VIS - 1079 will be done.
+                        can_shrink = false;
+                        break;
+                    }
+
+                    const auto conv = std::static_pointer_cast<const convolution>(user->get_primitive());
+                    if (conv->weights.size() != 1)
+                    {
+                        can_shrink = false;
+                        break;
+                    }
+
+                    auto weights_node_ptr = p.get_node_ptr(conv->weights[0]);
+                    auto filter_size = weights_node_ptr->get_output_layout().size;
+                    // make sure this is conv 1x1
+                    if (filter_size.spatial[0] != 1 || filter_size.spatial[1] != 1)
+                    {
+                        can_shrink = false;
+                        break;
+                    }
+
+                    // make sure convolution can accept shrinked input by modifying stride
+                    if (conv->stride.spatial[0] > 1 || conv->stride.spatial[1] > 1)
+                    {
+                        if (stride_x == 0)
+                            stride_x = conv->stride.spatial[0];
+                        if (stride_y == 0)
+                            stride_y = conv->stride.spatial[1];
+
+                        // make sure stride across all eltwise's convolution users is the same
+                        if (conv->stride.spatial[0] != stride_x || conv->stride.spatial[1] != stride_y)
+                        {
+                            can_shrink = false;
+                            break;
+                        }
+                        convs_to_shrink.push_back(user);
+                    }
+                    else
+                    {
+                        can_shrink = false;
+                        break;
+                    }
+                }
+                if (can_shrink)
+                {
+                    // add stride for every eltwise's inputs to have shrinked output
+                    auto e = const_cast<eltwise*>(&(*eltw));
+                    for (size_t user = 0; user < node->get_users().size(); user++)
+                    {
+                        e->stride.push_back({ 0,0,stride_x,stride_y });
+                    }
+                    node->recalc_output_layout();
+
+                    // change stride on every convolution
+                    for (size_t i = 0; i < convs_to_shrink.size(); i++)
+                    {
+                        const auto conv = std::static_pointer_cast<const convolution>(convs_to_shrink[i]->get_primitive());
+                        auto c = const_cast<convolution*>(&(*conv));
+                        c->stride.spatial[0] = 1;
+                        c->stride.spatial[1] = 1;
+                        // TODO: remove forcing "false" with_output_size if not needed
+                        c->with_output_size = false;
+                        convs_to_shrink[i]->recalc_output_layout();
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp
new file mode 100644
index 000000000..64e3853fd
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/graph_initializations.cpp
@@ -0,0 +1,641 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "program_node.h"
+
+#include "split_inst.h"
+#include "convolution_inst.h"
+#include "crop_inst.h"
+#include "lstm_inst.h"
+#include "reshape_inst.h"
+#include "upsampling_inst.h"
+
+#include <iomanip>
+
+using namespace cldnn;
+
+namespace cldnn
+{
+    std::string get_id_string(size_t i) {
+        std::stringstream ss;
+        ss << std::setw(5) << std::setfill('0') << i;
+        return ss.str();
+    }
+
+    // ToDo: rewrite methods in this class the same style (maybe: handle_<primitive_name>() ), 
+    //       is it possible to avoid iterating over all nodes several times?
+    //       do we have any repeated code here, can we make it more readable?
+    void graph_initializations::replace_nodes(program_impl& p)
+    {
+        auto itr = p.nodes_map.begin();
+        while (itr != p.nodes_map.end())
+        {
+            auto node_itr = itr++;
+            auto& node = (*node_itr).second;
+
+            if (node->is_type<split>())
+            {
+                //check if split is not used by any primitive, as it will be optimized
+                if (node->get_users().size() != 0)
+                    throw std::logic_error("Split layer cannot be used directly! Please use split output \"" + node->id() + ":<split_output_id>\"!");
+
+                //get_output size and validate split primitive inputs
+                auto output_layout = node->get_output_layout();
+                auto output_layout_size = output_layout.size;
+
+                auto split_prim = node->as<split>().typed_desc();
+                primitive_id input_id = split_prim->input[0];
+                auto split_num = split_prim->output_offsets.size();
+
+                //create crop for each split ouptut provided
+                for (decltype(split_num) i = 0; i < split_num; i++)
+                {
+                    primitive_id output_id = node->id() + ":" + split_prim->output_ids[i];
+
+                    auto node_ptr = p.nodes_map.find(output_id)->second;
+
+                    //calculate crop reference input size
+                    tensor reference_input_size;
+
+                    // For all the split offsets before the last split offset, the size can be calculated
+                    // size_of_offset[n] = offset[n + 1] - offset[n];
+                    if (i != (split_num - 1))
+                    {
+                        reference_input_size += split_prim->output_offsets[i + 1] - split_prim->output_offsets[i];
+                    }
+                    // For the last split i.e. size[split_num - 1] = split_input.size - offsets[n];
+                    else
+                    {
+                        reference_input_size += output_layout_size - split_prim->output_offsets[i];
+                    }
+
+                    // For all the other dimensions, copy from the split_input
+                    for (int dimension = 0; dimension < CLDNN_TENSOR_DIM_MAX; dimension++)
+                    {
+                        reference_input_size.raw[dimension]
+                            = (reference_input_size.raw[dimension] == 0) ? output_layout_size.raw[dimension] : reference_input_size.raw[dimension];
+                    }
+
+                    //update crop primitive
+                    node_ptr->set_output_padding(output_layout.data_padding);
+                    auto crop_prim = node_ptr->as<crop>().typed_desc();
+                    crop_prim->reference_input = reference_input_size;
+                }
+
+                //remove input->split connection and remove original split node
+                p.remove_connection(node->get_dependency(0), *node);
+                p.optimized_out.push_back(node->id());
+                p.nodes_map.erase(node->id());
+                continue;
+            }
+
+            //find upsampling primitives with bilinear filtering and create deconvolution with proper weights instead
+            if (node->is_type<upsampling>())
+            {
+                auto upsampling_prim = node->as<upsampling>().typed_desc();
+
+                if (upsampling_prim->sample_type != upsampling_sample_type::bilinear)
+                    continue;
+
+                //check if num_filter is not 0 (required for bilinear upsampling)
+                if (upsampling_prim->num_filter == 0)
+                    throw std::logic_error("num_filter in upsampling cannot be 0 in bilinear filtering mode in \"" + node->id() + "\"!");
+
+                primitive_id upsampling_id = node->id();
+                auto& input_node = node->get_dependency(0);
+
+                primitive_id input_id = upsampling_prim->input[0];
+                auto num_filter = upsampling_prim->num_filter;
+
+                //setting deconvolution parameters based on upsampling input
+                auto scale = static_cast<tensor::value_type>(upsampling_prim->scale);
+                tensor stride(1, 1, scale, scale);
+                auto offset = static_cast<tensor::value_type>(std::ceil((scale - 1) / 2.f));
+                tensor input_offset(0, 0, -offset, -offset);
+
+                //setting weights for deconvolution
+                auto kernel_size = static_cast<tensor::value_type>((2 * scale) - (scale % 2));
+                layout weights_layout(data_types::f32, format::bfyx, tensor(1, 1, kernel_size, kernel_size));
+
+                std::vector<primitive_id> weights_vec;
+                for (uint32_t weights_idx = 0; weights_idx < num_filter; weights_idx++)
+                {
+                    memory_impl::ptr data_to_allocate = p.get_engine().allocate_memory(weights_layout);
+                    mem_lock<float> dst{ data_to_allocate };
+                    float *dst_data = dst.data();
+                    //initialize with bilinear weights data
+                    auto f = static_cast<uint32_t>(std::ceil(kernel_size / 2.0f));
+                    float c = (2 * f - 1 - f % 2) / (2.f * f);
+                    float x = 0.f;
+                    float y = 0.f;
+                    for (size_t i = 0; i < weights_layout.count(); ++i) {
+                        x = static_cast<float>(i % kernel_size);
+                        y = static_cast<float>((i / kernel_size) % kernel_size);
+                        dst_data[i] = (1 - std::abs(x / f - c)) * (1 - std::abs(y / f - c));
+                    }
+
+                    //create weights primitive, with dummy memory which will be replaced in firther step
+                    primitive_id weights_id = upsampling_id + "_deconvolution_weights" + std::to_string(weights_idx);
+                    layout dummy_layout(data_types::f32, format::bfyx, tensor(1, 1, 1, 1));
+                    float zero = 0.f;
+                    auto weights_prim = std::make_shared<data>(weights_id, memory::attach(dummy_layout, &zero, 1));
+                    p.get_or_create(weights_prim);
+
+                    weights_vec.push_back(weights_id);
+
+                    auto weights_node_ptr = p.nodes_map.find(weights_id)->second;
+
+                    //attach weights buffer
+                    auto& data_node = weights_node_ptr->as<data>();
+                    data_node.attach_memory(*data_to_allocate, false);
+                }
+
+                //remove upsampling node, rename it and move to the optimized list
+                p.remove_connection(node->get_dependency(0), *node);
+                auto rename_id = upsampling_id + "_tmp";
+                p.rename(*node, rename_id);
+
+                //create deconvolution primitive
+                auto deconv_prim = std::make_shared<deconvolution>(upsampling_id, input_id, weights_vec, stride, input_offset);
+                p.get_or_create(deconv_prim);
+
+                auto deconv_node_ptr = p.nodes_map.find(upsampling_id)->second;
+
+                auto upsampling_node_ptr = p.nodes_map.find(rename_id)->second;
+                p.replace_all_usages(*upsampling_node_ptr, *deconv_node_ptr);
+                p.optimized_out.push_back(rename_id);
+                p.nodes_map.erase(rename_id);
+
+                //add connections input->deconvolution and weights->deconvolution
+                p.add_connection(input_node, *deconv_node_ptr);
+
+                for (uint32_t weights_idx = 0; weights_idx < num_filter; weights_idx++)
+                {
+                    auto weights_node_ptr = p.nodes_map.find(weights_vec[weights_idx])->second;
+                    p.add_connection(*weights_node_ptr, *deconv_node_ptr);
+                }
+                continue;
+            }
+
+            //find deconvolution primitives with stride 1 and change them to convolution with trasposed weights
+            if (node->is_type<deconvolution>())
+            {
+                if (!p.get_options().get<build_option_type::optimize_data>()->enabled())
+                    continue;
+
+                auto deconv_prim = node->as<deconvolution>().typed_desc();
+
+                //limit optimization to stride = 1
+                if (deconv_prim->stride.spatial[0] != 1 || deconv_prim->stride.spatial[1] != 1 || deconv_prim->gradient())
+                    continue;
+
+                primitive_id deconv_id = node->id();
+                auto& input_node = node->get_dependency(0);
+
+                primitive_id input_id = deconv_prim->input[0];
+
+                //setting convolution parameters based on deconvolution params
+                auto stride = deconv_prim->stride;
+                auto weights = deconv_prim->weights;
+                std::vector<primitive_id> weights_vec;
+                for (auto& weights_id : weights)
+                    weights_vec.push_back(weights_id);
+                auto biases = deconv_prim->bias;
+                std::vector<primitive_id> bias_vec;
+                for (auto& bias_id : biases)
+                    bias_vec.push_back(bias_id);
+                auto input_offset = deconv_prim->input_offset;
+                auto with_activation = deconv_prim->with_activation;
+                auto activation_negative_slope = deconv_prim->activation_negative_slope;
+                auto output_padding = deconv_prim->output_padding;
+
+                //remove deconvolution node and its connections to weights and biases, rename it and move to the optimized list
+                tensor filter_size = { 1, 1, 1, 1 };
+                p.remove_connection(node->get_dependency(0), *node);
+                for (auto& weights_id : weights_vec)
+                {
+                    auto weights_node_ptr = p.nodes_map.find(weights_id)->second;
+                    p.remove_connection(*weights_node_ptr, *node);
+                    //get filter spatial sizes for input offset adjustment, perform this only once as all filters shouls have same size
+                    if (weights_id == weights_vec[0])
+                        filter_size = weights_node_ptr->get_output_layout().size;
+                }
+
+                input_offset.spatial[0] = std::abs(input_offset.spatial[0]) - (filter_size.spatial[0] - 1);
+                input_offset.spatial[1] = std::abs(input_offset.spatial[1]) - (filter_size.spatial[1] - 1);
+
+                if (!bias_vec.empty())
+                {
+                    for (auto& bias_id : bias_vec)
+                    {
+                        auto bias_id_node_ptr = p.nodes_map.find(bias_id)->second;
+                        p.remove_connection(*bias_id_node_ptr, *node);
+                    }
+                }
+                auto rename_id = deconv_id + "_tmp";
+                p.rename(*node, rename_id);
+
+                //create convolution primitive
+                if (biases.size() != 0)
+                {
+                    auto conv_prim = std::make_shared<convolution>(deconv_id, input_id, weights_vec, bias_vec,
+                        stride, input_offset, tensor{ 1, 1, 1, 1 }, with_activation, activation_negative_slope, output_padding);
+                    p.get_or_create(conv_prim);
+                }
+                else
+                {
+                    auto conv_prim = std::make_shared<convolution>(deconv_id, input_id, weights_vec,
+                        stride, input_offset, tensor{ 1, 1, 1, 1 }, with_activation, activation_negative_slope, output_padding);
+                    p.get_or_create(conv_prim);
+                }
+
+                auto conv_node_ptr = p.nodes_map.find(deconv_id)->second;
+                auto conv_node = &conv_node_ptr->as<convolution>();
+                conv_node->set_transposed(true);
+
+                //add connections input->convolution, weights->convolution and bias->convolution
+                p.add_connection(input_node, *conv_node_ptr);
+
+                for (auto& weights_id : weights_vec)
+                {
+                    auto weights_node_ptr = p.nodes_map.find(weights_id)->second;
+                    p.add_connection(*weights_node_ptr, *conv_node_ptr);
+                }
+
+                if (!bias_vec.empty())
+                {
+                    for (auto& bias_id : bias_vec)
+                    {
+                        auto bias_id_node_ptr = p.nodes_map.find(bias_id)->second;
+                        p.add_connection(*bias_id_node_ptr, *conv_node_ptr);
+                    }
+                }
+
+                auto deconv_node_ptr = p.nodes_map.find(rename_id)->second;
+                p.replace_all_usages(*deconv_node_ptr, *conv_node_ptr);
+                p.optimized_out.push_back(rename_id);
+                p.nodes_map.erase(rename_id);
+
+                continue;
+            }
+        }
+    }
+
+    void graph_initializations::handle_detection_output(program_impl& p)
+    {
+        auto itr = p.nodes_map.begin(); //note we need to use iterators since currently processed element can be removed
+        while (itr != p.nodes_map.end())
+        {
+            auto node_itr = itr++;
+            auto& node = *(*node_itr).second;
+            // Create second part detection output primitive and replace nodes names - do it only once
+            if ((p.get_options().get<build_option_type::detection_output_gpu>()->enabled()) &&
+                (node.is_type<detection_output>()) &&
+                (node.id().find("_pre") == std::string::npos))    //ToDo: this will fail if user will name the primitive with using _pre like do_pre
+                                                                  //      we need to use node mark() or some other idea to prevent it   
+            {
+                // rename detection output
+                const primitive_id detect_out_node_name = node.id();
+                const primitive_id new_primitive_id = detect_out_node_name + "_pre";
+                p.rename(node, new_primitive_id);
+
+                auto detect_out_prim = node.as<detection_output>().typed_desc();
+                // Create new primitive, "keep top k" part of detection output
+                // ToDo: add a default parameters to the detection_output_sort class constructor to get rid off this initialization from here
+                auto detect_out_sort_prim = std::make_shared<detection_output_sort>(
+                    detect_out_node_name,
+                    node.id(),
+                    // not important params here - it will be set during "primitive_impl* create" func in "detection_output_sort_gpu"
+                    0,      // num_images
+                    0,      // num_classes
+                    0,      // keep_top_k
+                    false,  // share_location
+                    0,      // top_k
+                    -1,     // background_label_id
+                    detect_out_prim->output_padding);
+
+                p.get_or_create(detect_out_sort_prim);
+
+                auto sort_node = p.nodes_map.find(detect_out_node_name)->second;
+
+                // Add connection to second part of detection output
+                if (node.get_users().size())
+                {
+                    p.add_intermediate(*sort_node, *(node.get_users().front()), 0, false);
+                }
+                else
+                {
+                    p.add_connection(node, *sort_node);
+                }
+            }
+        }
+    }
+
+    void graph_initializations::handle_lstm(program_impl& p)
+    {
+        bool has_lstm_children;
+        auto itr = p.nodes_map.begin(); //note we need to use iterators since currently processed element can be removed
+        while (itr != p.nodes_map.end())
+        {
+            auto node_itr = itr++;
+            auto& node = (*node_itr).second;
+            has_lstm_children = false;
+            // replace lstm node with lstm_gemm and lstm_elt nodes
+            if (node->is_type<lstm>()) {
+                bool initial_hidden_term = node->as<lstm>().initial_hidden_term();
+                bool initial_cell_term = node->as<lstm>().initial_cell_term();
+                bool bias_term = node->as<lstm>().bias_term();
+                auto lstm_prim = node->as<lstm>().typed_desc();
+                primitive_id weights_id = lstm_prim->weights;
+                primitive_id recurrent_id = lstm_prim->recurrent;
+                primitive_id bias_id = bias_term ? lstm_prim->bias : "";
+                primitive_id initial_hidden_id = initial_hidden_term ? lstm_prim->initial_hidden : "";
+                primitive_id initial_cell_id = initial_cell_term ? lstm_prim->initial_cell : "";
+
+                //removing connection with weights to get proper dependency order for next operations
+                p.remove_connection(*p.nodes_map.at(weights_id), *node);
+                p.remove_connection(*p.nodes_map.at(recurrent_id), *node);
+                if (bias_term)
+                    p.remove_connection(*p.nodes_map.at(bias_id), *node);
+                if (initial_hidden_term)
+                    p.remove_connection(*p.nodes_map.at(initial_hidden_id), *node);
+                if (initial_cell_term)
+                    p.remove_connection(*p.nodes_map.at(initial_cell_id), *node);
+
+                //calculating sizes
+                auto input_size = node->get_dependency(0).get_output_layout().size;
+                auto recurrent_size = p.nodes_map.at(recurrent_id)->get_output_layout().size;
+
+                // hidden tensor size = [batch, seq, hidden_size, direction]
+                // the output of the element wise operation is cropped and used in the next time step
+                // sequence_len = 1 and direction = 1. The backward pass is separated from the forward pass
+                auto hidden_size = tensor(input_size.batch[0], 1, recurrent_size.spatial[0], 1);
+
+                size_t directions = recurrent_size.feature[0];
+                size_t input_directions = input_size.spatial[1];
+                size_t num_input_dependencies = node->get_dependencies().size();
+                size_t input_vector_size = node->as<lstm>().sequence_len();
+                size_t sequence_len = input_vector_size;
+
+                // Calculate the input sequence length for the lstm node
+                // Case 1: If the input comes in as a concatenated input i.e. the
+                // input is not divided into sequence elements
+                if (input_vector_size == 1 && num_input_dependencies == 1)
+                {
+                    // Either the input actually has 1 sequence element
+                    auto& input = node->get_dependency(0);
+                    auto input_layout = input.get_output_layout();
+                    tensor input_tensor = input_layout.size;
+
+                    // Get the sequence length from the input to LSTM
+                    sequence_len = input_layout.size.feature[0];
+
+                    // If the input's feature/sequence length field is > 1, i.e. If
+                    // the sequence elements are concatenated into one single input
+                    // then it has to be split into individual sequence elements
+                    if (sequence_len > 1)
+                    {
+                        for (size_t sequence_element = 0; sequence_element < sequence_len; sequence_element++)
+                        {
+                            primitive_id crop_id = input.id() + ":crop:" + get_id_string(sequence_element);
+                            tensor crop_tensor{ input_tensor.batch[0], 1, input_tensor.spatial[0], input_tensor.spatial[1] };
+                            tensor offset_tensor{ 0, static_cast<tensor::value_type>(sequence_element), 0, 0 };
+                            auto input_crop = std::make_shared<crop>(crop_id, input.id(), crop_tensor, offset_tensor);
+                            auto& input_crop_node = p.get_or_create(input_crop);
+
+                            // Add the crop nodes as user for input
+                            p.add_connection(node->get_dependency(0), input_crop_node);
+
+                            // Connect crop with lstm
+                            p.add_connection(input_crop_node, *node);
+                        }
+
+                        // We have the sequence elements (cropped inputs) as input to LSTM. 
+                        // The original input is no longer a dependency to LSTM. 
+                        // Remove the input node as a dependency to LSTM
+                        p.remove_connection(node->get_dependency(0), *node);
+
+                        // Update the total no. of input dependecies
+                        num_input_dependencies = node->get_dependencies().size();
+                    }
+                }
+
+                //if the sequence has a single element but it has multiple inputs then
+                //the parent of this lstm is an lstm node. If this is a bidirectional lstm
+                //then the sequence length is the number of dependencies divided by 2.
+                else if (input_vector_size == 1 && num_input_dependencies > 1)
+                {
+                    sequence_len = (directions == 1) ? num_input_dependencies : num_input_dependencies / 2;
+                }
+
+                //check if this lstm node has an lstm child
+                for (auto& user : node->get_users())
+                {
+                    if (user->is_type<lstm>())
+                    {
+                        has_lstm_children = true;
+                    }
+                }
+
+                bool emit_last_cell = lstm_prim->output_selection == cldnn_lstm_output_hidden_cell ||
+                    lstm_prim->output_selection == cldnn_lstm_output_sequence_cell;
+                bool emit_sequence = lstm_prim->output_selection == cldnn_lstm_output_sequence_cell ||
+                    lstm_prim->output_selection == cldnn_lstm_output_sequence;
+
+                std::vector<program_node*> cell_list(directions * sequence_len);
+                std::vector<program_node*> hidden_list(directions * sequence_len);
+                std::map<size_t, std::pair<primitive_id, program_node*>> output_map;
+                auto dependencies = node->get_dependencies();
+
+                //lstm expanding
+                for (size_t dir = 0; dir < directions; ++dir) {
+                    auto hidden_id = initial_hidden_id;
+                    auto cell_id = initial_cell_id;
+                    for (size_t i = 0; i < sequence_len; ++i) {
+                        size_t idx = i + dir * sequence_len;
+                        primitive_id lstm_gemm_id = node->id() + ":lstm_gemm" + get_id_string(idx);
+                        primitive_id lstm_elt_id = node->id() + ":lstm_elt" + get_id_string(idx);
+                        primitive_id crop_id = node->id() + ":crop" + get_id_string(idx);
+
+                        size_t input_idx = i;
+                        //for bidirectional lstms, if first LSTM layer then reverse input
+                        //for subsequent stacked layers the input is strided on the dir dimension
+                        if (directions > 0) {
+                            if (num_input_dependencies > sequence_len) { // stacked layer
+                                input_idx = dir * sequence_len + i;
+                            }
+                            else
+                            {
+                                if ((input_directions < 2) && dir > 0) { // first layer
+                                    input_idx = sequence_len - i - 1;
+                                }
+                            }
+                        }
+
+                        //primitive_id lstm_gemm_input_id = node->get_dependency(input_idx).get_primitive()->id;
+                        //the line below requires an attention: get_org_primitive_id() might not be an actual id of a node (see rename method)
+                        //ToDO: ensure that get_org_primitive_id() is suitable here
+                        primitive_id lstm_gemm_input_id = node->get_dependency(input_idx).get_org_primitive_id();
+
+                        auto lstm_gemm_node = std::make_shared<lstm_gemm>(lstm_gemm_id, lstm_gemm_input_id, weights_id, recurrent_id, bias_id, hidden_id, (uint32_t)dir);
+                        auto &n1 = p.get_or_create(lstm_gemm_node);
+
+                        auto lstm_elt_node = std::make_shared<lstm_elt>(lstm_elt_id, lstm_gemm_id, cell_id, lstm_prim->clip, lstm_prim->input_forget,
+                            lstm_prim->activations, lstm_prim->activation_params, lstm_prim->offset_order, (uint32_t)dir);
+                        auto &n2 = p.get_or_create(lstm_elt_node);
+                        //adding lstm_elt as user
+                        p.add_connection(n1, n2);
+                        //adding dependecy to lstm_gemm node
+                        //input
+                        p.add_connection(node->get_dependency(input_idx), n1);
+                        //adding weights and initial values to lstm_gemm
+                        p.add_connection(*p.nodes_map.at(weights_id), n1);
+                        p.add_connection(*p.nodes_map.at(recurrent_id), n1);
+                        if (bias_term)
+                            p.add_connection(*p.nodes_map.at(bias_id), n1);
+
+                        //adding cell and hiddens as dependencies
+                        if (i > 0)
+                        {
+                            p.add_connection(*cell_list[size_t(i - 1) * directions + dir], n2);
+                            p.add_connection(*hidden_list[size_t(i - 1) * directions + dir], n1);
+                        }
+                        //if initial values are present
+                        else
+                        {
+                            if (initial_hidden_term)
+                                p.add_connection(*p.nodes_map.at(hidden_id), n1);
+                            if (initial_cell_term)
+                                p.add_connection(*p.nodes_map.at(cell_id), n2);
+                        }
+
+                        //lstm_hidden
+                        {
+                            hidden_id = crop_id + ":hidden";
+                            auto crop_hidden = std::make_shared<crop>(hidden_id, lstm_elt_id, hidden_size, tensor{ 0,0,0,0 });
+                            auto &n3 = p.get_or_create(crop_hidden);
+                            //adding eltwise as dependency to hidden
+                            p.add_connection(n2, n3);
+
+                            //if parent is lstm adding hiddens as dependency
+                            if (has_lstm_children)
+                            {
+                                for (auto& user : node->get_users())
+                                {
+                                    p.add_connection(n3, *user);
+                                }
+                            }
+                            hidden_list[i * directions + dir] = &n3;
+                            if (i == sequence_len - 1 || emit_sequence)
+                            {
+                                output_map[i * directions + dir] = { hidden_id, &n3 };
+                            }
+                        }
+
+                        //lstm_cell
+                        if (i < sequence_len - 1 || emit_last_cell)
+                        {
+                            cell_id = crop_id + ":cell";
+                            auto crop_cell = std::make_shared<crop>(cell_id, lstm_elt_id, hidden_size, tensor{ 0,1,0,0 });
+                            auto &n4 = p.get_or_create(crop_cell);
+                            p.add_connection(n2, n4);
+                            cell_list[i * directions + dir] = &n4;
+                            if (i == sequence_len - 1)
+                            {
+                                output_map[sequence_len * directions + dir] = { cell_id, &n4 };
+                            }
+                        }
+                    }
+                }
+                //if there is no next lstm, concatenation is created
+                if (!has_lstm_children)
+                {
+                    std::vector<primitive_id> output_ids_offsets;
+                    for (auto& e : output_map)
+                    {
+                        output_ids_offsets.push_back(e.second.first);
+                    }
+                    primitive_id original_id = node->id();
+                    primitive_id concatenation_id = original_id + ":concat";
+                    auto concatenation_primitive = std::make_shared<concatenation>(concatenation_id, output_ids_offsets, concatenation::along_f);
+                    auto &concatenation_node = p.get_or_create(concatenation_primitive);
+                    for (auto& e : output_map)
+                    {
+                        p.add_connection(*e.second.second, concatenation_node);
+                    }
+                    if (directions == 2) {
+                        // bidirectional support requires concatenations along the direction and sequence axis
+                        // instead we can concatenate along the sequence axis and reshape the tensor to the account
+                        // for the direction
+                        size_t concatenate_len = emit_sequence ? sequence_len : 1;
+                        if (emit_last_cell) concatenate_len++;
+
+                        tensor output_size{ input_size.batch[0], static_cast<int32_t>(concatenate_len), hidden_size.spatial[0], (int32_t)directions };
+                        primitive_id reshape_id = original_id + ":reshape";
+                        auto reshape_primitive = std::make_shared<reshape>(reshape_id, concatenation_id, output_size);
+                        auto &reshape_node = p.get_or_create(reshape_primitive);
+                        p.add_connection(concatenation_node, reshape_node);
+                        p.replace_all_usages(*node, reshape_node);
+                    }
+                    else
+                    {
+                        p.replace_all_usages(*node, concatenation_node);
+                    }
+                }
+                //removing expanded node
+                p.remove_all_connections(*node);
+                p.nodes_map.erase(node->id());
+                continue;
+            }
+        }
+
+    }
+
+    void graph_initializations::set_outputs(program_impl& p)
+    {
+        auto outputs_option = p.get_options().get<build_option_type::outputs>();
+        if (!outputs_option->outputs.empty())
+        {
+            for (auto const& output : outputs_option->outputs)
+            {
+                auto o_node = p.nodes_map.at(output);
+                o_node->set_output(true);
+                p.outputs.push_back(o_node.get());
+            }
+        }
+        else
+        {
+            for (auto& node : p.nodes_map)
+                if (node.second->is_endpoint())
+                {
+                    node.second->set_output(true);
+                    p.outputs.push_back(node.second.get());
+                }
+        }
+    }
+
+    void graph_initializations::run(program_impl& p)
+    {
+        replace_nodes(p);
+        handle_detection_output(p);
+        handle_lstm(p);
+        set_outputs(p);
+        p.get_processing_order().calc_processing_order(p);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp
new file mode 100644
index 000000000..d11dce259
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/handle_input_padding.cpp
@@ -0,0 +1,94 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "border_inst.h"
+#include "convolution_inst.h"
+#include "error_handler.h"
+
+using namespace cldnn;
+
+//Some primitives support padding for input.
+//There are 2 types of padding: symmetric and asymettric.
+//Symmetric padding can be done using input_offset parameter for primitives.
+//Asymmetric padding can be done by adding border primitive before them. It's safe way without modyfing optimized kernels.
+void handle_input_padding::run(program_impl& p)
+{
+    auto processing_order = p.get_processing_order();
+
+    for (auto& node : processing_order)
+    {
+        if (node->is_type<convolution>()
+            && (node->as<convolution>().get_primitive()->padding_above.spatial[0] != 0 || node->as<convolution>().get_primitive()->padding_above.spatial[1] != 0
+                || node->as<convolution>().get_primitive()->padding_below.spatial[0] != 0 || node->as<convolution>().get_primitive()->padding_below.spatial[1] != 0))
+        {
+            auto conv = node->as<convolution>().get_primitive();
+            auto conv_primitive = const_cast<convolution*>(&(*conv));
+
+            //Asymmetric padding
+            if (node->as<convolution>().get_primitive()->padding_above.spatial[0] != node->as<convolution>().get_primitive()->padding_below.spatial[0]
+                || node->as<convolution>().get_primitive()->padding_above.spatial[1] != node->as<convolution>().get_primitive()->padding_below.spatial[1])
+            {
+                primitive_id conv_id = conv_primitive->id;
+                primitive_id input_id = conv_primitive->input[0];
+
+                auto padding_above = conv_primitive->padding_above;
+                auto padding_below = conv_primitive->padding_below;
+
+                CLDNN_ERROR_NOT_EQUAL(node->as<convolution>().id(), "Padding above feature", padding_above.feature[0], "", 0, "Padding above in feature is not supported");
+                CLDNN_ERROR_NOT_EQUAL(node->as<convolution>().id(), "Padding above batch", padding_above.batch[0], "", 0, "Padding above in batch is not supported");
+                CLDNN_ERROR_NOT_EQUAL(node->as<convolution>().id(), "Padding below feature", padding_below.feature[0], "", 0, "Padding below in feature is not supported");
+                CLDNN_ERROR_NOT_EQUAL(node->as<convolution>().id(), "Padding below batch", padding_below.batch[0], "", 0, "Padding below in batch is not supported");
+                
+                CLDNN_ERROR_LESS_THAN(node->as<convolution>().id(), "Padding above X", padding_above.spatial[0], "", 0, "Padding above in X cannot be negative");
+                CLDNN_ERROR_LESS_THAN(node->as<convolution>().id(), "Padding above Y", padding_above.spatial[1], "", 0, "Padding above in Y cannot be negative");
+                CLDNN_ERROR_LESS_THAN(node->as<convolution>().id(), "Padding below X", padding_below.spatial[0], "", 0, "Padding below in X cannot be negative");
+                CLDNN_ERROR_LESS_THAN(node->as<convolution>().id(), "Padding below Y", padding_below.spatial[1], "", 0, "Padding below in Y cannot be negative");
+
+                //set padding_above/padding_below to zeros - border primitive do the job
+                conv_primitive->padding_above = tensor(0, 0, 0, 0);
+                conv_primitive->padding_below = tensor(0, 0, 0, 0);
+
+                //create border primitive
+                primitive_id border_id = input_id + "_border_" + conv_id;
+                auto b_prim = std::make_shared<border>(border_id, input_id,
+                    padding_above,
+                    padding_below,
+                    border_type::constant, 0.0f);
+
+                auto& b_prim_node = p.get_or_create(b_prim);
+
+                p.add_intermediate(b_prim_node, *node, 0, true);
+
+                continue;
+            }
+            //Symmetric padding
+            else
+            {
+                //set input_offset
+                conv_primitive->input_offset = conv_primitive->padding_above.negate().add(conv_primitive->input_offset);
+
+                //set padding_above/padding_below to zeros - input_offset do the job
+                conv_primitive->padding_above = tensor(0, 0, 0, 0);
+                conv_primitive->padding_below = tensor(0, 0, 0, 0);
+
+                node->as<convolution>().recalc_output_layout(true);
+            }
+        }
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/mark_nodes.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/mark_nodes.cpp
new file mode 100644
index 000000000..29c17a4b5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/mark_nodes.cpp
@@ -0,0 +1,43 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "program_impl.h"
+
+using namespace cldnn;
+
+void mark_nodes::run(program_impl& p) {
+    mark_constants(p);
+    mark_data_flow(p);
+}
+
+void mark_nodes::mark_constants(program_impl& p)
+{
+    for (auto& node : p.get_processing_order())
+    {
+        p.mark_if_constant(*node);
+    }
+}
+
+void mark_nodes::mark_data_flow(program_impl& p)
+{
+    for (auto const& node : p.get_processing_order())
+    {
+        p.mark_if_data_flow(*node);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
new file mode 100644
index 000000000..0a13dc3d4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/post_optimize_weights.cpp
@@ -0,0 +1,131 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "pass_manager.h"
+#include "program_helpers.h"
+#include "api_extension/CPP/fused_conv_eltwise.hpp"
+#include "include/fused_conv_eltwise_inst.h"
+
+namespace cldnn
+{
+
+post_optimize_weights::post_optimize_weights(layout_optimizer& lo_ref) : base_pass("post_optimize_weights"), _lo(lo_ref) {}
+
+void post_optimize_weights::run(program_impl& p) {
+    run(p, _lo);
+}
+
+//function which prepares given primitive for weights optimization
+template <typename T>
+void post_optimize_weights::optimize_weights(T& node, layout_optimizer& lo, program_impl& p)
+{
+    auto weights_offset = node.get_primitive()->input.size();
+    auto bias_offset = weights_offset + program_helpers::wrap_if_single(node.get_primitive()->weights).size();
+    for (auto i = weights_offset; i < bias_offset; i++)
+    {
+        auto& weights = node.get_dependency(i);
+        auto* impl = node.get_selected_impl().get();
+        auto output_layout = node.get_output_layout();
+        auto& weights_node = node.get_dependency(1);
+        auto weights_layout = weights_node.get_output_layout();
+        const auto weights_type = layout_optimizer::data_type::weights;
+
+        auto reorders = lo.get_generic_layer(
+            impl->_weights_reorder_params,
+            weights.id(),
+            weights_layout,
+            weights_type);
+
+        for (auto& reorder : reorders)
+        {
+            //insert new generic_layer node to topology
+            p.add_intermediate(reorder.first, node, i, !reorder.second);
+            //set generic_layer's node output layout and implementation
+            auto& g_node = node.get_dependency(i);
+            g_node.get_output_layout(false);
+            g_node.selected_impl = g_node.type()->choose_impl(p.get_engine(), g_node);
+        }
+        //set the old output layout and do not invalidate users as change of weights will not affect output layout
+        node.set_output_layout(output_layout, false);
+    }
+}
+
+//function which prepares given primitive for weights optimization
+template <>
+void post_optimize_weights::optimize_weights<fused_conv_eltwise_node>(fused_conv_eltwise_node& node, layout_optimizer& lo, program_impl& p)
+{
+    auto weights_offset = node.get_primitive()->input.size();
+    auto bias_offset = weights_offset + program_helpers::wrap_if_single(node.get_primitive()->conv.weights).size();
+    for (auto i = weights_offset; i < bias_offset; i++)
+    {
+        auto& weights = node.get_dependency(i);
+        auto* impl = node.get_selected_impl().get();
+        auto output_layout = node.get_output_layout();
+        auto& weights_node = node.get_dependency(1);
+        auto weights_layout = weights_node.get_output_layout();
+        const auto weights_type = layout_optimizer::data_type::weights;
+
+        auto reorders = lo.get_generic_layer(
+            impl->_weights_reorder_params,
+            weights.id(),
+            weights_layout,
+            weights_type);
+
+        for (auto& reorder : reorders)
+        {
+            //insert new generic_layer node to topology
+            p.add_intermediate(reorder.first, node, i, !reorder.second);
+            //set generic_layer's node output layout and implementation
+            auto& g_node = node.get_dependency(i);
+            g_node.get_output_layout(false);
+            g_node.selected_impl = g_node.type()->choose_impl(p.get_engine(), g_node);
+        }
+        //set the old output layout and do not invalidate users as change of weights will not affect output layout
+        node.set_output_layout(output_layout, false);
+    }
+}
+
+template void post_optimize_weights::optimize_weights<convolution_node>(convolution_node& node, layout_optimizer& lo, program_impl& p);
+template void post_optimize_weights::optimize_weights<deconvolution_node>(deconvolution_node& node, layout_optimizer& lo, program_impl& p);
+template void post_optimize_weights::optimize_weights<fully_connected_node>(fully_connected_node& node, layout_optimizer& lo, program_impl& p);
+
+void post_optimize_weights::run(program_impl& p, layout_optimizer& lo)
+{
+     for (auto& node : p.get_processing_order())
+    {
+        if (node->type() == convolution::type_id())
+        {
+            optimize_weights(node->as<convolution>(), lo, p);
+        }
+        else if (node->type() == deconvolution::type_id())
+        {
+            optimize_weights(node->as<deconvolution>(), lo, p);
+        }
+        else if (node->type() == fully_connected::type_id())
+        {
+            optimize_weights(node->as<fully_connected>(), lo, p);
+        }
+        else if (node->type() == fused_conv_eltwise::type_id())
+        {
+            optimize_weights(node->as<fused_conv_eltwise>(), lo, p);
+        }
+    }
+}
+
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_optimize_bias.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_optimize_bias.cpp
new file mode 100644
index 000000000..95e102ea2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/pre_optimize_bias.cpp
@@ -0,0 +1,87 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "program_node.h"
+#include "layout_optimizer.h"
+#include "program_impl.h"
+#include "program_helpers.h"
+#include "fully_connected_inst.h"
+
+using namespace cldnn;
+
+pre_optimize_bias::pre_optimize_bias(layout_optimizer& lo_ref) : base_pass("pre_optimize_bias"), _lo(lo_ref) {}
+
+void pre_optimize_bias::run(program_impl& p) {
+    run(p, _lo);
+}
+
+//function which prepares given primitive for weights optimization
+template <typename T>
+void pre_optimize_bias::optimize_bias(T& node, layout_optimizer& lo, program_impl& p)
+{
+    layout output_layout = node.get_output_layout();
+
+    size_t weights_offset = node.get_primitive()->input.size();
+    size_t bias_offset = weights_offset + program_helpers::wrap_if_single(node.get_primitive()->weights).size();
+    for (size_t i = bias_offset; i < node.get_dependencies().size(); ++i)
+    {
+        //find weights primitive with given pimitive_id and add it to weights_optimizer
+        const program_node& bias = node.get_dependency(i);
+        const auto bias_type = layout_optimizer::data_type::bias;
+        auto reorder = lo.get_reorder(
+            bias.get_output_layout(),
+            bias.id(),
+            bias_type,
+            node,
+            output_layout);
+
+        if (reorder.first)
+            p.add_intermediate(reorder.first, node, i, !reorder.second);
+    }
+}
+template void pre_optimize_bias::optimize_bias<convolution_node>(convolution_node& node, layout_optimizer& lo, program_impl& p);
+template void pre_optimize_bias::optimize_bias<deconvolution_node>(deconvolution_node& node, layout_optimizer& lo, program_impl& p);
+template void pre_optimize_bias::optimize_bias<fully_connected_node>(fully_connected_node& node, layout_optimizer& lo, program_impl& p);
+template void pre_optimize_bias::optimize_bias<embed_node>(embed_node& node, layout_optimizer& lo, program_impl& p);
+
+
+void pre_optimize_bias::run(program_impl& p, layout_optimizer& lo)
+{
+    for (auto& prim : p.get_processing_order())
+    {
+        if (prim->type() == convolution::type_id())
+        {
+            if (!prim->as<convolution>().weights_quantization_term())
+                optimize_bias(prim->as<convolution>(), lo, p);
+        }
+        else if (prim->type() == deconvolution::type_id())
+        {
+            optimize_bias(prim->as<deconvolution>(), lo, p);
+        }
+        else if (prim->type() == fully_connected::type_id())
+        {
+            if (!prim->as<fully_connected>().weights_quantization_term())
+                optimize_bias(prim->as<fully_connected>(), lo, p);
+        }
+        else if (prim->type() == embed::type_id())
+        {
+            optimize_bias(prim->as<embed>(), lo, p);
+        }
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prep_opt_depthwise_sep_post.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prep_opt_depthwise_sep_post.cpp
new file mode 100644
index 000000000..0f0457797
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prep_opt_depthwise_sep_post.cpp
@@ -0,0 +1,100 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "program_helpers.h"
+
+
+template <typename T>
+void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre(program_impl& p, T& node)
+{
+    if (!node.get_depthwise_sep_opt())
+        return;
+
+    if (node.get_groups() > 1) {
+        if (node.get_groups() >= 16) {
+            node.set_groups(1);  // use one kernel
+        }
+        return; // no concatenations requered
+    }
+
+    const auto& split = node.get_primitive()->split();
+
+    auto dependency_offset = node.get_primitive()->input.size();
+    //concatenate weights
+    {
+        //if weights were optimized it is needed to use the sizes after optimization
+        auto target_layout = program_helpers::get_weights_layout(node.get_dependency(dependency_offset), split);
+        program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
+        dependency_offset++;
+    }
+
+    //concatenate biases
+    if (node.get_primitive()->bias.size() != 0)
+    {
+        const auto& bias_layout = node.get_dependency(dependency_offset).get_output_layout();
+        auto target_layout = layout(bias_layout.data_type, cldnn::format::bfyx, { 1, 1, bias_layout.size.spatial[0] * split, 1 });
+        program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
+        dependency_offset++;
+    }
+
+    if (node.template is_type<convolution>())
+    {
+        auto& prim_node = node.template as<convolution>();
+        const auto& prim = prim_node.get_primitive();
+
+        // concatenate weights quantization factors
+        if (prim->weights_quantization_factors.size() != 0)
+        {
+            const auto& weights_quantization_layout = node.get_dependency(dependency_offset).get_output_layout();
+            auto target_layout = layout(weights_quantization_layout.data_type, cldnn::format::bfyx, { 1, 1, weights_quantization_layout.size.batch[0] * split, 1 });
+            program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
+            dependency_offset++;
+        }
+        // concatenate output callibration factors
+        if (prim->output_calibration_factors.size() != 0)
+        {
+            const auto& output_callibration_layout = node.get_dependency(dependency_offset).get_output_layout();
+            auto target_layout = layout(output_callibration_layout.data_type, cldnn::format::bfyx, { 1, 1, output_callibration_layout.size.batch[0] * split, 1 });
+            program_helpers::merge_buffers(p.get_engine(), node, target_layout, dependency_offset, dependency_offset + split);
+            dependency_offset++;
+        }
+    }
+
+    if (node.get_primitive())
+        //override node split, as only one kernel will be executed
+        node.set_split(1);
+}
+template void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre<convolution_node>(program_impl& p, convolution_node& node);
+template void prep_opt_depthwise_sep_post::optimize_depthwise_sep_pre<deconvolution_node>(program_impl& p, deconvolution_node& node);
+
+void prep_opt_depthwise_sep_post::run(program_impl& p)
+{
+    //depthwise separated convolution/deconvolution optimization
+    for (auto& prim : p.get_processing_order())
+    {
+        if (prim->type() == convolution::type_id())
+        {
+            optimize_depthwise_sep_pre(p, prim->as<convolution>());
+        }
+        else if (prim->type() == deconvolution::type_id())
+        {
+            optimize_depthwise_sep_pre(p, prim->as<deconvolution>());
+        }
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
new file mode 100644
index 000000000..500a6fee8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_buffer_fusing.cpp
@@ -0,0 +1,321 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "api/CPP/eltwise.hpp"
+#include "api/CPP/pooling.hpp"
+#include "api/CPP/upsampling.hpp"
+#include "primitive_inst.h"
+#include "activation_inst.h"
+#include "concatenation_inst.h"
+#include "crop_inst.h"
+#include "eltwise_inst.h"
+#include "reshape_inst.h"
+#include "scale_inst.h"
+
+#include "pass_manager.h"
+#include "program_helpers.h"
+
+
+using namespace cldnn;
+
+//ToDo remove friendship relation from  program_node 
+
+void prepare_buffer_fusing::run(program_impl& p)
+{
+    bool is_debug = p.get_options().get<build_option_type::debug>()->enabled();
+    /*
+    We need to take care of proper ordering by types.
+    1. Concats
+    2. Crops
+    3. Others
+    Concat before crops is needed because of the crop fusing padding requirments. 
+    If crop is before concat there can be padding mismtach, since concat changes padding.
+    */
+    auto can_optimize = [](const program_node* node)
+    {
+        if (node->is_output() ||
+            (node->get_fused_activation_func() != cldnn_activation_func_t::activation_none))
+        {
+            return false;
+        }
+        return true;
+    };
+
+    //[1] First try to optimize all concats
+    auto node_itr = p.get_processing_order().begin();
+    while (node_itr != p.get_processing_order().end())
+    {
+        auto& node = (*node_itr++);
+        if (!can_optimize(node))
+            continue;
+        program_helpers::do_for_types<concatenation>(*node, [&p, is_debug](concatenation_node& node)
+        {
+            // we need to avoid mixing padded and unpadded buffer 
+            bool all_dependencies_padded = true;
+            bool all_dependencies_unpadded = true;
+            for (auto& input : node.get_dependencies()) {
+                layout l = input->get_output_layout();
+                if (static_cast<bool>(l.data_padding))
+                    all_dependencies_unpadded = false;
+                else
+                    all_dependencies_padded = false;
+            }
+            auto concat_axis = node.get_primitive()->axis;
+            auto padd = node.get_output_layout().data_padding;
+
+            tensor lower_padd = padd.lower_size();
+            tensor upper_padd = padd.upper_size();
+
+            auto upper_padd_val = node.get_output_layout().get_buffer_size().raw[concat_axis] - lower_padd.raw[concat_axis];
+            tensor lower_padd_offset = lower_padd;
+
+            std::list<std::pair<const std::vector<program_node*>, tensor>> stack = { std::make_pair(node.get_dependencies(), tensor{ 0, 0, 0, 0 }) };
+            while (!stack.empty())
+            {
+                auto nodes_list = stack.front();
+                stack.pop_front();
+
+                auto cascade_adjustment = nodes_list.second;
+                upper_padd.raw[concat_axis] = upper_padd_val;
+                lower_padd = lower_padd_offset;
+
+                //check if concatenation in place can be applied for inputs set
+                for (auto input : nodes_list.first)
+                {
+                    //if any of this node's inputs is used by more than one primitive and is not optimized concatenation then do not fuse buffers,
+                    //also, if an input is marked as network output, prevent optimizations which would affect a form of its output (unless debug flag is set)
+                    // todo: in future, if this case is problem, it can be optimized further to enable buffer fusing
+                    //       per single input rather than all/none
+                    // + restrict input types to those which support padding on x,y,b and f
+                    if (!input->support_padding() ||
+                        (input->is_output() && !is_debug) ||
+                        input->get_users().size() > 2)
+                        return;
+
+                    if (input->get_users().size() > 1)
+                    {
+                        auto user_count = input->get_users().size();
+                        for (auto& user : input->get_users())
+                            if (user->is_type<concatenation>())
+                                user_count--;
+                        if (user_count != 1) // user_cout == 0 means that input will be used only by concatenations, so we cannot apply concat in place for it
+                            return;
+                    }
+                }
+
+                //apply concatenation in place optimization
+                for (auto input : nodes_list.first)
+                {
+                    auto input_lenght = input->get_output_layout().size.raw[concat_axis];
+
+                    bool optimized_concat_input = false;
+                    if (input->type() == concatenation::type_id() && input->can_be_optimized())
+                    {
+                        if (input->as<concatenation>().get_primitive()->axis != node.get_primitive()->axis)
+                            return;
+                        optimized_concat_input = true;
+                    }
+
+                    // shrink upper pad so it points at the end of the input's buffer
+                    //
+                    //   |--- lower padd ---|                    |---------- upper padd -----------|
+                    //   |-- output padd ---| ----- input1 ------|----- input2 -----|-- out padd --|
+                    upper_padd.raw[concat_axis] -= input_lenght;
+
+                    //adjust padding sizes for cascade concatenations
+                    auto lower_padd_tmp = lower_padd;
+                    lower_padd_tmp.raw[concat_axis] += cascade_adjustment.raw[concat_axis];
+                    auto upper_padd_tmp = upper_padd;
+                    upper_padd_tmp.raw[concat_axis] -= cascade_adjustment.raw[concat_axis];
+
+                    // set new padding for input
+                    input->set_output_padding(padding(lower_padd_tmp.sizes(), upper_padd_tmp.sizes()));
+
+                    // move lower padd further
+                    //
+                    //   |-------------- lower padd -------------|---------- upper padd -----------|
+                    //   |-- output padd ---| ----- input1 ------|----- input2 -----|-- out padd --|
+
+                    lower_padd.raw[concat_axis] += input_lenght;
+
+                    if (optimized_concat_input && !input->get_dependencies().empty())
+                        stack.push_back(std::make_pair(input->get_dependencies(), input->get_output_layout().data_padding.lower_size()));
+                }
+            }
+
+            node.can_be_optimized(true);
+            for (auto dep : node.get_users())
+            {
+                dep->can_share_buffer(false);
+            }
+            if (!all_dependencies_padded && !all_dependencies_unpadded)
+                node.can_share_buffer(false);
+        });
+    }
+
+    //[2] Then try to optimize all crops
+    node_itr = p.get_processing_order().begin();
+    while (node_itr != p.get_processing_order().end())
+    {
+        auto& node = (*node_itr++);
+        if (!can_optimize(node))
+            continue;
+        // zero copy
+        program_helpers::do_for_types<crop>(*node, [&p, is_debug](crop_node& node)
+        {
+            //if the node is marked as network output, prevent optimizations which would affect a form of its output, unless debug flag is set
+            if (node.is_output() && !is_debug)
+                return;
+
+            //do not optimize when next node is concatenation which is not output
+            if (node.get_users().size() == 1 && node.get_users().front()->is_type<concatenation>() && !node.get_users().front()->is_output())
+                return;
+
+            if (node.get_dependencies().size() == 1 &&
+                node.get_users().size() > 0)
+            {
+                // optimization is available for cropping across depth(features) only
+                // if output padding has defined padding across features already it wouldn't
+                // work because it expect to have zeros in the padded area.
+                const auto& crop_layout = node.get_output_layout();
+                auto format = crop_layout.format;
+                auto crop_prim = node.get_primitive();
+                auto input_layout = node.get_dependency(0).get_output_layout();
+                const auto& crop_size = crop_layout.size;
+                const auto& out_padd = crop_layout.data_padding;
+                if (format == format::bfyx &&
+                    crop_size.batch[0] == input_layout.size.batch[0] &&
+                    crop_size.spatial[0] == input_layout.size.spatial[0] &&
+                    crop_size.spatial[1] == input_layout.size.spatial[1] &&
+                    out_padd.lower_size().feature[0] == 0 &&
+                    out_padd.upper_size().feature[0] == 0 &&
+                    out_padd.lower_size().batch[0] == 0 &&
+                    out_padd.upper_size().batch[0] == 0 &&
+                    out_padd.lower_size().spatial[0] == 0 &&
+                    out_padd.lower_size().spatial[1] == 0 &&
+                    out_padd.upper_size().spatial[0] == 0 &&
+                    out_padd.upper_size().spatial[1] == 0)
+                {
+                    //  Regular crop
+                    //  crop input buffer
+                    //  |___________data____________|
+                    //
+                    //  crop output buffer
+                    //  |-------->| offsets[f]  |<--|
+                    //            |_____data____|
+                    //             <------------>
+                    //           reference size
+                    //
+                    //  In-place crop
+                    //  crop output buffer
+                    //  |_low_pad_|__data_size__|___|<-upper pad
+
+                    node.set_output_padding(padding(
+                        { out_padd.lower_size().batch[0], crop_prim->offsets.feature[0], out_padd.lower_size().spatial[0], out_padd.lower_size().spatial[1] },
+                        { out_padd.upper_size().batch[0], input_layout.size.feature[0] - crop_prim->offsets.feature[0] - crop_size.feature[0],
+                            out_padd.upper_size().spatial[0], out_padd.upper_size().spatial[1] }));
+                    node.can_be_optimized(true);
+                }
+            }
+        });
+    }
+
+    //[3] Optimize all other primitives
+    node_itr = p.get_processing_order().begin();
+    while (node_itr != p.get_processing_order().end())
+    {
+        auto& node = (*node_itr++);
+        if (!can_optimize(node))
+            continue;
+        program_helpers::do_for_types<reshape>(*node, [&p](reshape_node& node)
+        {
+            node.get_output_layout();
+            if (node.is_in_place()
+                && node.get_fused_activation_func() == activation_none)
+                node.can_be_optimized(true);
+        });
+        program_helpers::do_for_types<reorder>(*node, [&p](reorder_node& node)
+        {
+            auto& input = node.input();
+            auto output_layout = node.get_output_layout();
+            //This is WA for topologies that due to additional reorders added perform worse with conv1x1 optimization
+            auto remove_bf8_xy_opt = ((input.is_type<pooling>() || input.is_type<concatenation>()) &&
+                output_layout.format == format::bf8_xy16 && input.get_users().size() == 1);
+            //Remove reorder from convolution 1x1 to bfyx in some conditions
+            auto remove_byxf_opt = (input.is_type<convolution>() &&
+                input.get_users().size() == 1 &&
+                input.get_output_layout().format == format::byxf);
+            //check if all inputs user have the same format
+            auto all_users_same_format = true;
+            auto input_user_layout_format = input.get_users().front()->get_output_layout().format;
+            for (auto const& user : input.get_users())
+            {
+                if (user->get_output_layout().format != input_user_layout_format)
+                {
+                    all_users_same_format = false;
+                    break;
+                }
+            }
+            auto same_data_type = input.get_output_layout().data_type == output_layout.data_type;
+            //Optimization only available in case of layers that support different input and output formats.
+            //todo: new api needs to be created to read such caps
+            if (!(input.is_type<pooling>() && (output_layout.format == format::bfyx || output_layout.format == format::yxfb || output_layout.format == format::byxf) && all_users_same_format && same_data_type) &&
+                !remove_bf8_xy_opt &&
+                !(input.is_type<convolution>() && input.get_output_layout().format == format::bf8_xy16) &&
+                !(input.is_type<eltwise>() && (output_layout.format == format::bfyx || output_layout.format == format::yxfb || output_layout.format == format::byxf) && all_users_same_format && same_data_type) &&
+                !(remove_byxf_opt && (node.get_users().front()->is_type<eltwise>() || node.get_users().front()->is_type<pooling>())))
+                return;
+
+            if (remove_bf8_xy_opt)
+            {
+                auto users_user_layout = node.get_users().front()->get_users().front()->get_output_layout();
+                // if users_user_layout is still bf8_yx16 (stacked convolutions) then leave the reorder
+                if (users_user_layout.format == format::bf8_xy16)
+                    return;
+                auto input_layout = input.get_output_layout();
+                auto target_layout = layout(input_layout.data_type, users_user_layout.format, input_layout.size, input_layout.data_padding);
+                input.set_output_layout(target_layout, false);
+            }
+            else if (remove_byxf_opt)
+            {
+                auto user = node.get_users().front();
+                auto users_users = node.get_users().front()->get_users();
+
+                for (auto const& users_user : users_users)
+                {
+                    if (users_user->get_output_layout().format != format::byxf && !users_user->is_type<eltwise>())
+                    {
+                        remove_byxf_opt = false;
+                        break;
+                    }
+                }
+
+                if (remove_byxf_opt)
+                {
+                    auto input_layout = input.get_output_layout();
+                    user->set_output_layout(input_layout, false);
+                }
+            }
+            else
+                input.set_output_layout(output_layout, false);
+
+            node.can_be_optimized(true);
+            p.extract_and_remove(node); //try to remove redundant reorders
+        });
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_depthwise_sep_opt.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_depthwise_sep_opt.cpp
new file mode 100644
index 000000000..dd288a2ce
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_depthwise_sep_opt.cpp
@@ -0,0 +1,70 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "program_helpers.h"
+
+
+template <typename T>
+void prepare_depthwise_sep_opt::optimize_depthwise_sep_pre(T& node)
+{
+    if (node.get_groups() == 1) {
+        //enable optimization only when IFM / split <= 8 (otherwise scheduling multiple opt kernels is better) and split >= 16
+        if (!(node.get_dependency(0).get_output_layout().size.feature[0] / node.get_primitive()->split() <= 8) ||
+            !(node.get_primitive()->split() >= 16))
+            return;
+
+        //make sure the weights and biases are data type and
+        //are not reused in other primitives as they will be overriden with concatenated ones
+        for (size_t i = 1; i < node.get_dependencies().size(); i++)
+        {
+            auto& weights_or_biases = node.get_dependency(i);
+            if (weights_or_biases.get_users().size() > 1 || weights_or_biases.type() != data::type_id())
+                return;
+        }
+    }
+    else {
+        //enable optimization only when IFM / groups <= 8 (otherwise scheduling multiple opt kernels is better) and groups >= 16
+        if (!(node.get_dependency(0).get_output_layout().size.feature[0] / node.get_groups() <= 8) ||
+            !(node.get_groups() >= 16))
+            return;
+    }
+
+    node.set_depthwise_sep_opt(true);
+}
+
+template void prepare_depthwise_sep_opt::optimize_depthwise_sep_pre<convolution_node>(convolution_node& node);
+template void prepare_depthwise_sep_opt::optimize_depthwise_sep_pre<deconvolution_node>(deconvolution_node& node);
+
+void prepare_depthwise_sep_opt::run(program_impl& p)
+{
+    //depthiwise separated convolution/deconvolution optimization
+    for (auto& prim : p.get_processing_order())
+    {
+        if (prim->type() == convolution::type_id())
+        {
+            optimize_depthwise_sep_pre(prim->as<convolution>());
+        }
+        else if (prim->type() == deconvolution::type_id())
+        {
+            optimize_depthwise_sep_pre(prim->as<deconvolution>());
+        }
+    }
+}
+
+
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp
new file mode 100644
index 000000000..8c536cc40
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_padding.cpp
@@ -0,0 +1,146 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pooling_inst.h"
+#include "program_node.h"
+#include "pass_manager.h"
+#include "convolution_inst.h"
+#include "sliding_window_utils.h"
+
+using namespace cldnn;
+
+void prepare_padding::run(program_impl& p)
+{
+    if (output_size_handling_enabled)
+    {
+        // Prepare upper padding for primitives that support output_size parameter.
+        for (const auto& node : p.get_processing_order())
+        {
+            if (node->is_type<convolution>())
+            {
+                auto& prim_node = node->as<convolution>();
+                const auto& prim = prim_node.get_primitive();
+
+                if (!prim->with_output_size)
+                    continue;
+
+                auto filter_size = prim_node.weights(0).get_output_layout().size;
+
+                auto needed_padding = calc_sliding_window_needed_input_padding(
+                    prim_node.input().get_output_layout(),
+                    prim->output_size, filter_size, prim->input_offset, prim->stride, prim->dilation, false, 1);
+                p.apply_needed_padding(prim_node, prim_node.input(), needed_padding);
+            }
+            else if (node->is_type<deconvolution>())
+            {
+                auto& prim_node = node->as<deconvolution>();
+                const auto& prim = prim_node.get_primitive();
+
+                if (!prim->with_output_size)
+                    continue;
+
+                auto filter_size = prim_node.weights(0).get_output_layout().size;
+
+                auto needed_padding = calc_sliding_window_needed_input_padding(
+                    prim_node.input().get_output_layout(),
+                    prim->output_size, filter_size, prim->input_offset, prim->stride, { 1, 1, 1, 1 }, true, 1);
+
+                p.apply_needed_padding(prim_node, prim_node.input(), needed_padding);
+            }
+            else if (node->is_type<pooling>())
+            {
+                auto& prim_node = node->as<pooling>();
+                const auto& prim = prim_node.get_primitive();
+
+                if (!prim->with_output_size)
+                    continue;
+
+                // NOTE: Currently there is no pooling implementation/pooling mode which does not check input data range.
+                // There is no need to add padding requirements on pooling inputs.
+                //auto needed_padding = calc_sliding_window_needed_input_padding(
+                //    prim_node.input().get_output_layout(),
+                //    prim->output_size, prim->size, prim->input_offset, prim->stride, {1, 1, 1, 1}, false, 1);
+                auto needed_padding = prim_node.input().get_output_layout().data_padding;
+
+                p.apply_needed_padding(prim_node, prim_node.input(), needed_padding);
+            }
+        }
+    }
+
+    // Prepare optimized padding for bfyx convolution.
+    for (auto& pair : p.nodes_map)
+    {
+        if (pair.second->type() != convolution::type_id())
+            continue;
+
+        auto& node = pair.second->as<convolution>();
+        if (node.get_dependencies().empty())
+            continue;
+
+        auto conv = node.get_primitive();
+        auto& conv_input_node = node.get_dependency(0);
+        auto conv_layout = node.get_output_layout();
+
+        // right now output padding optimization is only available for bfyx format and data type = float32
+        if (conv_layout.format != cldnn::format::bfyx
+            && conv_layout.format != cldnn::format::bf8_xy16
+            && conv_layout.format != cldnn::format::byxf_af32
+            && conv_layout.format != cldnn::format::fs_bs_yx_bsv4_fsv32
+            && conv_layout.format != cldnn::format::b_fs_yx_fsv4)
+        {
+            continue;
+        }
+
+        // We shoudn't apply any padding to nodes which are marked as outputs
+        if (conv_input_node.is_output())
+            continue;
+
+        // Calculating input padding needed for convolution
+        auto& filter_node = node.as<convolution>().weights(0);
+        auto filter_prim = filter_node.get_primitive();
+
+        layout filter_layout = filter_node.get_output_layout();
+
+        // convolution have only one input primitive
+        auto prev_prim_output_layout = conv_input_node.get_output_layout();
+
+        // Compute initial required paddings for primitive used as input for convolution.
+        auto input_offset = conv->input_offset;
+        auto stride = conv->stride;
+        auto dilation = conv->dilation;
+
+        auto input_limit_x = input_offset.spatial[0] + (conv_layout.size.spatial[0] - 1) * stride.spatial[0] + (filter_layout.size.spatial[0] - 1) * dilation.spatial[0] + 1;
+        auto input_limit_y = input_offset.spatial[1] + (conv_layout.size.spatial[1] - 1) * stride.spatial[1] + (filter_layout.size.spatial[1] - 1) * dilation.spatial[1] + 1;
+
+        auto left_padding = std::max(-input_offset.spatial[0], 0);
+        auto top_padding = std::max(-input_offset.spatial[1], 0);
+        auto right_padding = std::max(input_limit_x - prev_prim_output_layout.size.spatial[0], 0);
+        auto bottom_padding = std::max(input_limit_y - prev_prim_output_layout.size.spatial[1], 0);
+
+        // Adjust right padding, so entire buffer size in X dimension is properly aligned.
+        // TODO: NOTE: Will be reenabled with next check-in once heuristic for line-aligned algorithm will be added.
+        //auto needed_buffer_size_x = static_cast<cldnn::tensor::value_type>(
+        //    round_up_to(left_padding + prev_prim_output_layout.size.spatial[0] + right_padding, 16));
+        //right_padding = needed_buffer_size_x - left_padding - prev_prim_output_layout.size.spatial[0];
+
+        cldnn::padding needed_padding({ 0, 0, left_padding, top_padding }, { 0, 0, right_padding, bottom_padding }, 0);
+        needed_padding = padding::max(prev_prim_output_layout.data_padding, needed_padding);
+
+        p.apply_needed_padding(node, conv_input_node, needed_padding);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
new file mode 100644
index 000000000..e204b05ae
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/prepare_primitive_fusing.cpp
@@ -0,0 +1,542 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "api/CPP/pooling.hpp"
+#include "api/CPP/proposal.hpp"
+#include "api/CPP/roi_pooling.hpp"
+
+#include "program_helpers.h"
+#include "pass_manager.h"
+
+#include "activation_inst.h"
+#include "batch_norm_inst.h"
+#include "batch_norm_grad_inst.h"
+#include "crop_inst.h"
+#include "eltwise_inst.h"
+#include "fused_conv_bn_scale_inst.h"
+#include "fused_conv_eltwise_inst.h"
+#include "lrn_inst.h"
+#include "mutable_data_inst.h"
+#include "mvn_inst.h"
+#include "normalize_inst.h"
+#include "permute_inst.h"
+#include "reshape_inst.h"
+#include "softmax_inst.h"
+#include "scale_inst.h"
+#include "scale_grad_weights_inst.h"
+#include "upsampling_inst.h"
+
+
+void prepare_primitive_fusing::fuse_skip_layers(program_impl& p, program_node* node)
+{
+    program_helpers::do_for_types<eltwise>(*node, [&p](eltwise_node& node)
+    {
+        if (node.get_primitive()->mode != eltwise_mode::sum || node.inputs_count() != 2)
+            return;
+
+        // both inputs should be deconvolutions
+        if (!(node.input(0).is_type<deconvolution>() && node.input(1).is_type<deconvolution>()))
+        {
+            return;
+        }
+
+        auto& to_fuse_with = node.input(0);
+        int to_fuse_index = 1;
+
+        //remove dependencies and users of elwtise that is going to be extracted
+        p.add_connection(node.input(to_fuse_index), to_fuse_with);
+        p.remove_connection(node.input(to_fuse_index), node);
+
+        p.get_processing_order().erase(&to_fuse_with);
+        p.get_processing_order().insert(&node, &to_fuse_with);
+
+        if (node.get_fused_activation_func() != activation_none)
+            to_fuse_with.set_fused_activation(node.get_fused_activation_func(), node.get_fused_activation_params());
+        to_fuse_with.set_output_padding(node.get_output_layout().data_padding);
+
+        p.extract_and_remove(node);
+    });
+}
+
+template<typename T>
+static bool node_is_type(program_node* n)
+{
+    return n->is_type<T>();
+}
+
+void prepare_primitive_fusing::fuse_conv_bn_scale(program_impl& p, program_node* node)
+{
+    program_helpers::do_for_types<convolution>(*node, [&p](convolution_node& node)
+    {
+        if (node.get_users().size() > 2)
+            return;
+
+        auto found_bn = std::find_if(node.get_users().begin(), node.get_users().end(), node_is_type<batch_norm>);
+        auto bn_node = found_bn != node.get_users().end() ? *found_bn : nullptr;
+        if (bn_node != nullptr)
+        {
+            if (bn_node->get_users().size() > 2)
+                return;
+
+            auto found_scale = std::find_if(bn_node->get_users().begin(), bn_node->get_users().end(), node_is_type<scale>);
+            auto sc_node = found_bn != node.get_users().end() ? *found_scale : nullptr;
+            if (sc_node != nullptr)
+            {
+                int bn_index = int(std::distance(node.get_users().begin(), found_bn));
+                int sc_index = int(std::distance(bn_node->get_users().begin(), found_scale));
+                auto scale_prim = std::static_pointer_cast<const scale>(sc_node->get_primitive());
+                auto bn_prim = std::static_pointer_cast<const batch_norm>(bn_node->get_primitive());
+                auto prim = node.get_primitive();
+                bool training = false;
+
+                if (node.get_users().size() == 2)
+                {
+                    training = true;
+                    float zero = 0.0f;
+                    layout dummy_layout(data_types::f32, format::bfyx, tensor(1, 1, 1, 1));
+
+                    auto bn_backw = node.get_users().begin();
+                    std::advance(bn_backw, bn_index == 0 ? 1 : 0);
+                    if (!(*bn_backw)->is_type<batch_norm_grad>())
+                        return;
+                    auto sc_backw = bn_node->get_users().begin();
+                    std::advance(sc_backw, sc_index == 0 ? 1 : 0);
+                    if (!(*sc_backw)->is_type<scale_grad_weights>())
+                        return;
+
+                    auto conv_out_prim = std::make_shared<mutable_data>(prim->id + "_fused_conv_out", memory::attach(dummy_layout, &zero, 1));
+                    auto& conv_out_node = p.get_or_create(conv_out_prim);
+                    auto conv_out_mem = p.get_engine().allocate_memory(node.get_output_layout());
+                    conv_out_node.as<mutable_data>().attach_memory(*conv_out_mem, false);
+                    p.add_intermediate(conv_out_node, **bn_backw, 1, true);
+
+                    auto bn_out_prim = std::make_shared<mutable_data>(prim->id + "_fused_bn_out", memory::attach(dummy_layout, &zero, 1));
+                    auto& bn_out_node = p.get_or_create(bn_out_prim);
+                    auto bn_out_mem = p.get_engine().allocate_memory(bn_node->get_output_layout());
+                    bn_out_node.as<mutable_data>().attach_memory(*bn_out_mem, false);
+                    p.add_intermediate(bn_out_node, **sc_backw, 0, true);
+                }
+
+                auto new_conv = std::make_shared<fused_conv_bn_scale>(prim->id + "_fused", prim->input[0], prim->weights.ref(), prim->bias.ref(), bn_prim->epsilon,
+                    scale_prim->input[1], scale_prim->bias, prim->stride, prim->dilation, prim->input_offset, bn_prim->inv_variance,
+                    prim->with_activation, prim->activation_negative_slope, prim->output_padding);
+                auto& new_node = p.get_or_create(new_conv);
+                p.replace(node, new_node);
+
+                while (sc_node->get_dependencies().size() > 1)     // ToDo: here we modify users and dependencies,
+                                                                   // It should be done through public methods in program_node/program_impl
+                                                                   // to avoid friend declarations
+                {
+                    auto& dep = sc_node->get_dependency(sc_node->get_dependencies().size() - 1);
+                    p.remove_connection(dep, *sc_node);
+                    dep.users.push_back(&new_node);
+                    if (sc_node->get_dependencies().size() == 1)
+                        new_node.dependencies.insert(new_node.dependencies.begin() + 1, &dep);
+                    else
+                        new_node.dependencies.push_back(&dep);
+                }
+                p.extract_and_remove(*sc_node);
+                while (bn_node->get_dependencies().size() > 1)
+                {
+                    auto& dep = bn_node->get_dependency(bn_node->get_dependencies().size() - 1);
+                    p.remove_connection(dep, *bn_node);
+                    new_node.dependencies.push_back(&dep);
+                }
+                p.extract_and_remove(*bn_node);
+                auto inv_var_node = std::find_if(new_node.dependencies.begin(), new_node.dependencies.end(),
+                    [&new_conv](const program_node* node) { return node->id().find(new_conv->inv_variance) != std::string::npos; });
+                (*inv_var_node)->users.push_back(&new_node);
+
+                if (training)
+                {
+                    auto user = std::find_if(new_node.get_users().begin(), new_node.get_users().end(),
+                        [](const program_node* node) { return node->id().find("_fused_conv_out") != std::string::npos; });
+                    p.reverse_connection(new_node, **user);
+                    user = std::find_if(new_node.get_users().begin(), new_node.get_users().end(),
+                        [](const program_node* node) { return node->id().find("_fused_bn_out") != std::string::npos; });
+                    p.reverse_connection(new_node, **user);
+                    p.get_processing_order().calculate_BFS_processing_order();
+                }
+            }
+        }
+    });
+}
+
+void prepare_conv_eltw_fusing::fuse_conv_eltwise(program_impl& p, program_node* node)
+{
+    // make sure this convolution have only 1 user and it's eltwise
+    // maek sure convolution is not an output
+    if (node->users.size() != 1 ||
+        node->is_output())
+        return;
+
+    if (!(*(node->users.begin()))->is_type<eltwise>())
+        return;
+
+    convolution_node * conv_node = static_cast<convolution_node*>(node);
+    convolution & conv = const_cast<convolution&>(*conv_node->get_primitive());
+
+    // currently works only for this format
+    if ( (conv_node->get_output_layout().format != cldnn::format::fs_bs_yx_bsv4_fsv32 || conv_node->get_output_layout().data_type != cldnn::data_types::i8) &&
+         (conv_node->get_output_layout().format != cldnn::format::bfyx || conv_node->get_output_layout().data_type != cldnn::data_types::f32) &&
+         (conv_node->get_output_layout().format != cldnn::format::yxfb || conv_node->get_output_layout().data_type != cldnn::data_types::f16)
+       )
+        return;
+
+    auto weights_node_ptr = p.nodes_map.find(conv.weights[0])->second;
+    auto filter_size = weights_node_ptr->get_output_layout().size;
+
+    // make sure if this is conv 1x1 its stride is 1x1
+    if (filter_size.spatial[0] == 1 && filter_size.spatial[1] == 1)
+    {
+        if (conv.stride.spatial[0] != 1 || conv.stride.spatial[1] != 1)
+            return;
+    }
+    else
+        return;
+
+    eltwise_node * eltw_node = static_cast<eltwise_node*>(*(node->users.begin()));
+
+    // make sure eltwise have only 2 inputs
+    // make sure eltwise is not an output
+    if (eltw_node->inputs_count() != 2 ||
+        eltw_node->is_output())
+        return;
+
+    // only single ADD operation is currently supported
+    // TODO: enable more
+    eltwise & eltw = const_cast<eltwise&>(*eltw_node->get_primitive());
+    if (eltw.mode != eltwise_mode::sum)
+        return;
+
+    if (eltw_node->get_fused_activation_func() == activation_relu_negative_slope)
+    {
+        eltw.with_activation = true;
+        eltw.activation_negative_slope = eltw_node->get_fused_activation_params().a;
+    }
+    else
+    {
+        return;
+    }
+
+    int eltw_fused_input_idx; // <-- this input gets fused with eltwise
+    int eltw_second_input_idx; // <-- this input is not fused, so we add it in kernel
+    // here we check which input gets execute as last one, and fuse it
+    if (p.processing_order.get_processing_number(&eltw_node->input(0)) < p.processing_order.get_processing_number(&eltw_node->input(1)))
+    {
+        eltw_fused_input_idx = 1;
+        eltw_second_input_idx = 0;
+    }
+    else
+    {
+        eltw_fused_input_idx = 0;
+        eltw_second_input_idx = 1;
+    }
+
+    // we check if input to fuse is convolution that we're right now processing
+    if (eltw_node->input(eltw_fused_input_idx).id() != conv.id)
+        return;
+
+    primitive_id conv_id = conv_node->id();
+
+    // get strides for other than our conv input
+    std::vector<tensor> new_eltw_strides;
+    // conv strides modified by eltwise stride
+    tensor new_conv_stride = conv.stride;
+
+    if (eltw.stride.size() == eltw_node->inputs_count())
+    {
+        // for cases when stride from eltwise must be applied into fused convolution
+        new_conv_stride.spatial[0] *= eltw.stride[eltw_fused_input_idx].spatial[0];
+        new_conv_stride.spatial[1] *= eltw.stride[eltw_fused_input_idx].spatial[1];
+        // stride from non-fused eltwise input
+        new_eltw_strides.push_back(eltw.stride[eltw_second_input_idx]);
+    }
+
+    auto fused_conv_eltw = std::make_shared<fused_conv_eltwise>(
+        conv.id + "_fused_" + eltw.id,
+        conv_node->input().id(),
+        eltw_node->input(eltw_second_input_idx).id(),
+        eltw.mode,
+        conv.weights.ref(),
+        conv.bias.ref(),
+        conv.weights_quantization_factors.ref(),
+        conv.output_calibration_factors.ref(),
+        conv.input_quantization_factor,
+        eltw.output_calibration_factors,
+        new_eltw_strides,
+        new_conv_stride,
+        conv.input_offset,
+        conv.dilation,
+        conv.with_activation,
+        conv.activation_negative_slope,
+        eltw.with_activation,
+        eltw.activation_negative_slope
+        );
+
+    auto& new_node = p.get_or_create(fused_conv_eltw);
+    p.replace(*conv_node, new_node);
+
+    // right now new node's user is eltwise, let's clear users and take eltwise's users
+    new_node.users.clear();
+    p.replace_all_usages(*eltw_node, new_node);
+
+    // TODO: do it better, now it's done in a very ugly way to have good dependency order
+    std::vector<program_node*> updated_deps;
+    updated_deps.push_back(new_node.dependencies[0]);
+
+    // add second input
+    updated_deps.push_back(&eltw_node->input(eltw_second_input_idx));
+    eltw_node->input(eltw_second_input_idx).users.push_back(&new_node);
+
+    for (size_t d = 1; d < new_node.dependencies.size(); d++)
+    {
+        updated_deps.push_back(new_node.dependencies[d]);
+    }
+
+    if (eltw_node->output_calibration_term())
+    {
+        updated_deps.push_back(&eltw_node->output_calibration_factors());
+        eltw_node->output_calibration_factors().users.push_back(&new_node);
+    }
+
+    new_node.dependencies = updated_deps;
+
+    while (eltw_node->dependencies.size() > 1)
+    {
+        auto& dep = eltw_node->get_dependency(eltw_node->get_dependencies().size() - 1);
+        p.remove_connection(dep, *eltw_node);
+    }
+
+    p.extract_and_remove(*eltw_node);
+    new_node.recalc_output_layout();
+}
+
+void prepare_primitive_fusing::run(program_impl& p)
+{
+    bool is_debug = p.get_options().get<build_option_type::debug>()->enabled();
+
+    std::list<program_node*> conv_nodes;
+    auto itr = p.get_processing_order().begin(); //note we need to use iterators since currently processed element can be removed
+    while (itr != p.get_processing_order().end())
+    {
+        auto node_itr = itr++;
+        if ((*node_itr)->is_type<convolution>())
+            conv_nodes.push_back(*node_itr);
+    }
+
+    // Disabled due to kernel being not optimized
+    //itr = conv_nodes.begin();
+    //while (itr != conv_nodes.end())
+    //{
+    //    auto node_itr = itr++;
+    //    auto& node = (*node_itr);
+
+    //    fuse_conv_bn_scale(p, node);
+    //}
+
+    //This loop tries fusing several reorders one by one (if present) into one reorder
+    itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end())
+    {
+        auto node_itr = itr++;
+        auto& node = (*node_itr);
+
+        if (node->is_output())
+            continue;
+
+        program_helpers::do_for_types<reorder>(*node, [&p, is_debug](reorder_node& node)
+        {
+            auto& input = node.input();
+
+            //Restrictions:
+            // - inputs cannot be padded
+            // - primitives input cannot be output
+            // - input was optimized
+            if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.get_dependencies().size() != 1 ||
+                input.can_be_optimized())
+                return;
+
+            // - check if previous node is reorder with 1 user (and if the layouts are the same - remove reorder)
+            // - do not fuse if current node has mean subtract
+            if (input.get_users().size() != 1 ||
+                (!input.is_type<reorder>() && input.get_output_layout() != node.get_users().front()->get_output_layout()) ||
+                node.has_mean() || !node.get_primitive()->subtract_per_feature.empty())
+                return;
+
+            input.set_output_layout(node.get_output_layout(), false);
+            p.extract_and_remove(node);
+        });
+    }
+
+    itr = p.processing_order.begin();
+    while (itr != p.processing_order.end())
+    {
+        auto node_itr = itr++;
+        auto& node = (*node_itr);
+
+        program_helpers::do_for_types<activation>(*node, [&p, is_debug](activation_node& node)
+        {
+            auto& input = node.input();
+
+            //Restrictions:
+            // - inputs cannot be padded
+            // - primitives input cannot be output
+            // - no activation additional input
+            // - input was optimized
+            if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.is_output() ||
+                node.get_dependencies().size() != 1 || input.can_be_optimized())
+                return;
+
+            // - check if there is no activation fused already
+            // - limit to primitives which implementations support activation fusing
+            if (input.get_users().size() != 1 || input.get_fused_activation_func() != activation_none ||
+                //TODO: new api needs to be created to read such caps
+                //right now use whitelist so no new primitives will be affected in case of lack of fused activation support
+                (!input.is_type<batch_norm>() && !input.is_type<concatenation>() && !input.is_type<convolution>() &&
+                    !input.is_type<crop>() && !input.is_type<deconvolution>() && !input.is_type<eltwise>() &&
+                    !input.is_type<fully_connected>() && !input.is_type<lrn>() && !input.is_type<normalize>() &&
+                    !input.is_type<permute>() && !input.is_type<pooling>() && !input.is_type<reorder>() &&
+                    !input.is_type<reshape>() && !input.is_type<roi_pooling>() && !input.is_type<scale>() &&
+                    !input.is_type<softmax>() && !input.is_type<upsampling>() && !input.is_type<mvn>()))
+                return;
+
+            input.set_fused_activation(node.get_primitive()->activation_func, node.get_primitive()->additional_params);
+            input.set_output_padding(node.get_output_layout().data_padding);
+
+            p.extract_and_remove(node);
+        });
+    }
+
+    //This loop tries fusing eltwise (sum) with deconvolution
+    itr = p.get_processing_order().begin();
+    while (itr != p.get_processing_order().end())
+    {
+        auto node_itr = itr++;
+        auto& node = (*node_itr);
+
+        fuse_skip_layers(p, node);
+    }
+}
+
+void prepare_conv_eltw_fusing::run(program_impl& p)
+{
+    std::list<program_node*> conv_nodes;
+    auto itr = p.get_processing_order().begin(); //note we need to use iterators since currently processed element can be removed
+    while (itr != p.get_processing_order().end())
+    {
+        auto node_itr = itr++;
+        if ((*node_itr)->is_type<convolution>())
+            conv_nodes.push_back(*node_itr);
+    }
+
+    //fuse conv + eltwise after activations
+    itr = conv_nodes.begin();
+    while (itr != conv_nodes.end())
+    {
+        auto node_itr = itr++;
+        auto& node = (*node_itr);
+
+        fuse_conv_eltwise(p, node);
+    }
+}
+
+void prepare_conv_eltw_read_write_opt::conv_eltwise_read_write_opt(program_impl& p, program_node* node)
+{
+    fused_conv_eltwise_node * fused_conv_eltw_node = static_cast<fused_conv_eltwise_node*>(node);
+    program_node * second_input_node = &fused_conv_eltw_node->get_dependency(1);
+    // output layouts must match
+    if (fused_conv_eltw_node->get_output_layout() != second_input_node->get_output_layout()) // check whole layout
+    {
+        return;
+    }
+    
+    // buffer shared between primitives, if second input is mutable data, then we can reuse this memory
+    auto shared_buffer_mem = second_input_node->is_type<mutable_data>() ? second_input_node->as<mutable_data>().get_attached_memory_ptr() : p.get_engine().allocate_memory(node->get_output_layout());
+
+    float zero = 0.0f;
+    layout dummy_layout(data_types::f32, format::bfyx, tensor(1, 1, 1, 1));
+    
+    // this one is the first one to write data to
+    auto rw_output_prim0 = std::make_shared<mutable_data>(fused_conv_eltw_node->id() + "_RW_OPT_use", memory::attach(dummy_layout, &zero, 1));
+    // this one already expects data to be inside
+    auto rw_output_prim1 = std::make_shared<mutable_data>(fused_conv_eltw_node->id() + "_RW_OPT_reuse", memory::attach(dummy_layout, &zero, 1));
+
+    auto& rw_output_node0 = p.get_or_create(rw_output_prim0);
+    auto& rw_output_node1 = p.get_or_create(rw_output_prim1);
+
+    rw_output_node0.as<mutable_data>().attach_memory(*shared_buffer_mem, false);
+    rw_output_node1.as<mutable_data>().attach_memory(*shared_buffer_mem, false);
+
+    // add connection between second input node -> rw_output_node0 -> node
+    p.add_intermediate(rw_output_node0, *node, 1, true);
+    // replace other connections with rw_output_node0
+    auto itr = second_input_node->users.begin();
+    while (itr != second_input_node->users.end())
+    {
+        auto& usage = (*itr++);
+        if (usage->id() != rw_output_node0.id() && usage->id() != node->id())
+        {
+            usage->replace_dependency(*second_input_node, rw_output_node0);
+        }
+    }
+    // add connection between node -> rw_output_node1 -> after nodes
+    //first find index in our first user's dependency
+    size_t dep_idx = 0;
+    for (auto dep : (*(node->users.begin()))->dependencies)
+    {
+        if (dep->id() == node->id())
+            break;
+        dep_idx++;
+    }
+    p.add_intermediate(rw_output_node1, **(node->users.begin()), dep_idx, true);
+    // replace other connections with rw_output_node1
+    itr = node->users.begin();
+    while (itr != node->users.end())
+    {
+        auto& usage = (*itr++);
+        if (usage->id() != rw_output_node1.id() && usage->id() != node->id())
+        {
+            usage->replace_dependency(*node, rw_output_node1);
+        }
+    }
+    fused_conv_eltwise* prim = const_cast<fused_conv_eltwise*>((fused_conv_eltw_node->get_primitive().get()));
+    prim->second_input_in_output = true;
+}
+
+void prepare_conv_eltw_read_write_opt::run(program_impl& p)
+{
+    std::list<program_node*> fused_conv_eltw_nodes;
+    auto itr = p.get_processing_order().begin(); //note we need to use iterators since currently processed element can be removed
+    while (itr != p.get_processing_order().end())
+    {
+        auto node_itr = itr++;
+        if ((*node_itr)->is_type<fused_conv_eltwise>())
+            fused_conv_eltw_nodes.push_back(*node_itr);
+    }
+
+    //fuse conv + eltwise after activations
+    itr = fused_conv_eltw_nodes.begin();
+    while (itr != fused_conv_eltw_nodes.end())
+    {
+        auto node_itr = itr++;
+        auto& node = (*node_itr);
+
+        conv_eltwise_read_write_opt(p, node);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp
new file mode 100644
index 000000000..3b7fd3300
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/propagate_constants.cpp
@@ -0,0 +1,194 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "program_node.h"
+#include "engine_impl.h"
+#include "program_impl.h"
+#include "network_impl.h"
+#include "data_inst.h"
+
+
+using namespace cldnn;
+
+//ToDo remove friendship relation from  program_node and program_impl
+void propagate_constants::run(program_impl& p)
+{
+    for (auto& node : p.get_processing_order())
+    {
+        if (node->is_constant())
+            handle_constant(p, *node);
+    }
+
+    auto&& to_replace = calculate(p.get_engine());
+
+    //remove all nodes which are no longer relevant, i.e. nodes which:
+    // 1. are constants, and
+    // 2. do not have non-const user (so their data are not used during inference), and
+    // 3. are not marked as outputs.
+    // in case if node has either non-const user or is marked as output, it should be replace with cldnn::data rather than removed (see next loop)
+    auto proc_itr = p.get_processing_order().begin();
+    while (proc_itr != p.get_processing_order().end())
+    {
+        auto& node = (*proc_itr++);
+        if (!node->is_constant())
+            continue;
+        if (has_non_const_user(*node) || (node->is_output() && !node->is_type<data>()))
+            continue;
+
+        auto& users = node->users;
+        auto& deps = node->dependencies;
+
+        for (size_t idx = 0; idx < deps.size(); idx++)
+        {
+            deps.at(idx)->users.remove(node);
+        }
+        deps.clear();
+
+        for (auto& usr : users) {
+            auto& usr_deps = usr->dependencies;
+            usr_deps.erase(std::remove(usr_deps.begin(), usr_deps.end(), node), usr_deps.end());
+        }
+        users.clear();
+
+        if (!node->is_output())
+        {
+            auto rem = p.remove_if_dangling(*node);
+            assert(rem && "Non-output constant node which has only constant users should have been removed during constants propagation pass");
+            (void)rem;
+        }
+    }
+
+    //replace all constant nodes which are relevant for inference (either used by non-const user or marked as output) with recomputed cldnn::data
+    for (auto& cout : to_replace)
+    {
+        auto& id_to_replace = cout.first;
+
+        //TODO: do not use API primitives internally and get rid of this last 'cldnn::memory' internal usage
+        memory api_memory = details::memory_c_to_cpp_converter::convert(api_cast(cout.second.get()));
+        //c-cpp converter does not retain since normally it is done inside API-impl layer (cldnn.cpp) so we need to do it manually
+        cout.second->add_ref();
+
+        auto const_data = std::make_shared<data>("_cldnn_const_prop_" + id_to_replace, api_memory /* <<< REMOVE ME WHEN POSSIBLE */);
+        auto& new_node = p.get_or_create(const_data);
+        auto& curr_node = p.get_node(id_to_replace);
+
+        if (!curr_node.is_type<generic_layer>())
+        {
+            auto curr_node_deps = curr_node.get_dependencies();
+            for (auto& dep : curr_node_deps)
+            {
+                auto dep_users = dep->get_users();
+                for (auto& dep_user : dep_users)
+                {
+                    if (dep_user == &curr_node)
+                        p.remove_connection(*dep, curr_node);
+                }
+            }
+        }
+
+        curr_node.dependencies.clear();
+        //remove all constant users (as they will be either removed or replaced by cldnn::data which does not have any dependencies)
+        curr_node.users.erase(
+            std::remove_if(curr_node.users.begin(), curr_node.users.end(), [](program_node* node) { return node->is_constant(); }),
+            curr_node.users.end()
+        );
+        p.replace(curr_node, new_node);
+    }
+}
+
+bool propagate_constants::has_non_const_user(program_node& node) const {
+    if (!node.is_constant()) return true;
+    for (auto &user : node.get_users())
+    {
+        if (!user->is_constant()) return true;
+    }
+    return false;
+}
+
+std::list<std::pair<primitive_id, memory_impl::ptr>> propagate_constants::calculate(engine_impl &engine)
+{
+    if (!has_non_trivial_constants)
+        return{};
+
+    build_options bo;
+    bo.set_option(build_option::optimize_data(false));
+    bo.set_option(build_option::outputs(const_outputs));
+    network_impl::ptr net = engine.build_network(nodes, bo, true);
+    for (auto& cin : const_inputs)
+        net->set_input_data(cin->id(), cin->get_attached_memory());
+
+    net->execute({});
+    net->reset_execution(true); //wait for computations to complete
+    auto outputs = net->get_outputs();
+
+    std::list<std::pair<primitive_id, memory_impl::ptr>> ret;
+    for (auto& out : outputs)
+        ret.push_back({ out->id(), &out->output_memory() });
+
+    return ret;
+}
+
+void propagate_constants::handle_constant(program_impl& prog, program_node& node)
+{
+    if (!node.is_type<data>())
+    {
+        add_constant(prog, node);
+        if (has_non_const_user(node))
+            const_outputs.push_back(node.id());
+    }
+}
+
+void propagate_constants::add_constant(program_impl& prog, program_node& node)
+{
+    if (node.is_type<data>())
+        return;
+    nodes.insert(prog.get_node_ptr(node.get_primitive()->id));
+    has_non_trivial_constants = true;
+
+    //if a node is either an endpoint or an output, always add it as an output
+    if (node.is_endpoint() || node.is_output())
+        const_outputs.push_back(node.id());
+
+    //if a non-tirivial constant has a trivial input, add this input as an input for our network
+    add_deps_to_tpl(prog, node.get_dependencies());
+}
+
+void propagate_constants::add_deps_to_tpl(program_impl& prog, const std::vector<program_node*>& deps)
+{
+    /*
+    Nodes can share dependencies, if we already have dep in tpl, don't add it again.
+    example:
+    C   <--- shared dep
+    / \
+    /   \
+    A     B
+    */
+    for (auto& dep : deps)
+    {
+        if (dep->is_type<data>())
+        {
+            auto dep_ptr = prog.get_node_ptr(dep->get_primitive()->id);
+            if (nodes.find(dep_ptr) == nodes.end())
+            {
+                nodes.insert(prog.get_node_ptr(dep->get_primitive()->id));
+                const_inputs.push_back(&dep->as<data>());
+            }
+        }
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
new file mode 100644
index 000000000..bc36609f4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/remove_redundant_reorders.cpp
@@ -0,0 +1,92 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+#include "program_helpers.h"
+
+using namespace cldnn;
+
+void remove_redundant_reorders::run(program_impl& p)
+{
+    auto itr = p.get_processing_order().begin(); //note we need to use iterators since currently processed element can be removed
+    while (itr != p.get_processing_order().end())
+    {
+        auto& node = (*itr++); //post-inc to avoid invalidation due to possible erase
+        if (!node->is_type<reorder>()) //only care for reorders
+            continue;
+
+        program_node* current_node = node;
+        std::vector<program_node*> r_nodes_to_remove;
+
+        auto optimize = true;
+        while (current_node)
+        {
+            auto& r_node = current_node->as<reorder>();
+            current_node = nullptr;
+
+            if (r_node.has_mean() || !r_node.get_primitive()->subtract_per_feature.empty()  //do not optimize if mean of subtract are present
+                || r_node.is_output()) //do not optimize when both reorder and layer before are outputs
+            {
+                optimize = false;
+                break;
+            }
+
+            r_nodes_to_remove.push_back(&r_node);
+
+            if (r_node.get_dependency(0).is_type<reorder>() && r_node.get_dependencies().size() == 1 && r_node.get_users().size() == 1 && r_node.get_dependency(0).get_users().size() == 1)
+                current_node = &r_node.get_dependency(0);
+        }
+        if (!optimize)
+            continue;
+
+        assert(node->get_dependencies().size() == 1 && "reorder without mean should have exactly one dependecy (input)");
+        auto& r_output = r_nodes_to_remove.front();
+        auto& r_input = r_nodes_to_remove.back()->get_dependency(0);
+        auto o_layout = r_output->get_output_layout();
+        auto i_layout = r_input.get_output_layout();
+
+        auto ident = program_helpers::are_layouts_identical(o_layout, i_layout);
+        if (!ident.second)
+            continue;
+
+        for (auto remove_reorder_node : r_nodes_to_remove)
+        {
+            auto& r_node = remove_reorder_node->as<reorder>();
+
+            if (ident.first && ident.second && r_node.is_output() && r_node.get_dependency(0).is_input()) //do not optimize when reorder is output and layer before is input
+            {
+                optimize = false;
+                break;
+            }
+        }
+        if (!optimize)
+            continue;
+
+        auto rem_itr = r_nodes_to_remove.begin();
+        while (rem_itr != r_nodes_to_remove.end())
+        {
+            auto remove_reorder_node = *rem_itr++;
+            auto& r_node = remove_reorder_node->as<reorder>();
+            //mark as optimized
+            r_node.can_be_optimized(true);
+            r_node.requires_reinterpret(!ident.first);
+            if (ident.first) //no need of reshape
+                p.extract_and_remove(r_node); //try to remove if possible (with respect to r_node not being marked as output)
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
new file mode 100644
index 000000000..757088134
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/reorder_inputs.cpp
@@ -0,0 +1,269 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "api/CPP/proposal.hpp"
+#include "api/CPP/roi_pooling.hpp"
+#include "api/CPP/reorg_yolo.hpp"
+#include "api/CPP/eltwise.hpp"
+#include "upsampling_inst.h"
+#include "pass_manager.h"
+#include "program_node.h"
+#include "layout_optimizer.h"
+#include "program_impl.h"
+#include "program_helpers.h"
+
+using namespace cldnn;
+
+//ToDo remove friendship relation from program_impl
+
+reorder_inputs::reorder_inputs(layout_optimizer& lo_ref) : base_pass("reorder_inputs"), _lo(lo_ref) {}
+
+void reorder_inputs::run(program_impl& p) {
+    run(p, _lo);
+}
+
+void reorder_inputs::run(program_impl& p, layout_optimizer& lo)
+{
+    //first pass to set layout optimization_attributes for topology
+    for (auto& node : p.get_processing_order())
+    {
+        auto& prim = *node;
+        if (prim.type() == cldnn::convolution::type_id())
+        {
+            if (prim.as<convolution>().get_primitive()->split() > 1)
+                lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::splitted_convolution, 1);
+        }
+
+        //list of layers that do not support yxfb or perform worse than bfyx
+        if (prim.type() == cldnn::detection_output::type_id() || prim.type() == cldnn::proposal::type_id() ||
+            prim.type() == cldnn::roi_pooling::type_id() || prim.type() == cldnn::deconvolution::type_id() ||
+            prim.type() == cldnn::upsampling::type_id() || prim.type() == cldnn::reorg_yolo::type_id())
+            lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bfyx_only_layer, 1);
+    }
+
+    const auto reorder_input = [&p, &lo](typed_program_node<convolution>& conv_node)
+    {
+        auto conv_prim = conv_node.get_primitive();
+        auto& input_node = conv_node.get_dependency(0);
+        auto&& weights_layout = conv_node.weights(0).get_output_layout();
+        auto&& input_layout = input_node.get_output_layout();
+
+        std::shared_ptr<reorder> new_input = nullptr;
+
+        if (input_node.type() == reorder::type_id()) //convolution's input is a reorder
+        {
+            auto reorder_prim = input_node.as<reorder>().typed_desc();
+            auto& reorder_input = input_node.get_dependency(0);
+            auto reorder_layout = input_node.get_output_layout();
+            reorder_layout.data_type = *reorder_prim->output_data_type;
+            new_input = lo.get_reorder(
+                reorder_layout,
+                reorder_prim->id,
+                layout_optimizer::data_type::input,
+                conv_node,
+                weights_layout).first;
+
+            auto reorder_removed = false;
+            if (new_input && new_input->output_format != format::winograd_2x3_s1_data && new_input->output_format != format::bf8_xy16 && new_input->output_format != format::byxf) //output format is not optimal
+            {
+                auto reorder_input_layout = reorder_input.get_output_layout();
+
+                auto opt_layout = layout(*new_input->output_data_type, new_input->output_format, reorder_input_layout.size);
+                if (reorder_input_layout == opt_layout) //reorder 'breaks' optimal format
+                {
+                    if (reorder_prim->subtract_per_feature.empty() &&
+                        reorder_prim->mean.empty() &&
+                        !reorder_prim->output_padding) //just plain reorder
+                    {
+                        conv_node.replace_dependency(0, reorder_input);
+                        if (input_node.get_users().size() == 0 && !input_node.is_output())
+                        {
+                            reorder_removed = p.extract_and_remove(input_node);
+                        }
+                        new_input = nullptr;
+                    }
+                    else //change reorder's output layout
+                    {
+                        reorder_prim->output_format = opt_layout.format;
+                        reorder_prim->output_data_type = opt_layout.data_type;
+                        new_input = nullptr;
+                    }
+                }
+                else //current reorder gives bad output, simply change it
+                {
+                    reorder_prim->output_format = opt_layout.format;
+                    reorder_prim->output_data_type = opt_layout.data_type;
+                    new_input = nullptr;
+                }
+            }
+
+            if (!reorder_removed)
+                input_node.recalc_output_layout();
+            else
+                conv_node.recalc_output_layout();
+        }
+        else
+        {
+            new_input = lo.get_reorder(
+                input_node.get_output_layout(),
+                input_node.id(),
+                layout_optimizer::data_type::input,
+                conv_node,
+                weights_layout).first;
+        }
+
+        if (new_input && new_input->output_format == format::winograd_2x3_s1_data)
+        {
+            auto lower_size = (conv_prim->input_offset.negate() + input_layout.size);
+
+            tensor upper_input_padding = tensor{ 0 };
+            upper_input_padding.spatial[0] = (2 - (lower_size.spatial[0] % 2)) % 2;          //winograd conv requires input's x to be in form 4 + 2n, with restriction that x >= 3, we can shortage it to x % 2 == 0
+            upper_input_padding.spatial[1] = (8 - ((lower_size.spatial[1] - 2) % 8)) % 8;    //for y, y - 2 % 8 == 0 must hold
+
+            p.apply_needed_padding(conv_node, input_node, padding{ conv_prim->input_offset.negate().sizes(), upper_input_padding.sizes() });
+
+            auto winograd_output = std::make_shared<reorder>("_winograd_" + conv_node.id(), conv_node.id(), input_layout.format,
+                input_layout.data_type, std::vector<float>{}, cldnn_reorder_mean_mode::mean_subtract, conv_node.output_layout.data_padding);
+            conv_node.output_layout.data_padding = padding{};
+            program_node& back_node = p.get_or_create(winograd_output);
+            p.get_processing_order().insert_next(&conv_node, &back_node);
+
+            auto bias_term = conv_node.bias_term();
+            //create additional eltwise node after reorder to compute bias
+            if (bias_term)
+            {
+                auto& bias_node = conv_node.get_dependency(2);
+                std::vector<primitive_id> inputs = { back_node.id(), bias_node.id() };
+                auto winograd_output_biases = std::make_shared<eltwise>(back_node.id() + "_bias", inputs,
+                    cldnn::eltwise_mode::sum, conv_prim->with_activation, conv_prim->activation_negative_slope,
+                    back_node.get_output_layout().data_padding);
+                back_node.get_output_layout().data_padding = padding{};
+                auto& back_bias_node = p.get_or_create(winograd_output_biases);
+                p.get_processing_order().insert_next(&back_node, &back_bias_node);
+                p.replace_all_usages(back_node, back_bias_node);
+                p.add_connection(back_node, back_bias_node);
+                p.add_connection(bias_node, back_bias_node);
+                conv_node.invalidate_users();
+                p.replace_all_usages(conv_node, back_bias_node);
+            }
+
+            if (conv_prim->with_activation)
+            {
+                conv_node.typed_desc()->with_activation = false;
+                if (!bias_term)
+                    back_node.set_fused_activation(activation_relu_negative_slope, cldnn_activation_additional_params_t{ conv_prim->activation_negative_slope });
+            }
+
+            if (!bias_term)
+            {
+                conv_node.invalidate_users();
+                p.replace_all_usages(conv_node, back_node);
+            }
+            p.add_connection(conv_node, back_node);
+
+            auto& r_node = p.get_or_create(new_input);
+            r_node.as<reorder>().set_input_offset(conv_prim->input_offset);
+
+            if (!bias_term)
+            {
+                p.swap_names(conv_node, back_node);
+                if (conv_node.is_output())
+                {
+                    conv_node.set_output(false);
+                    back_node.set_output(true);
+                    for (auto& output : p.get_outputs())
+                    {
+                        if (output == &conv_node)
+                        {
+                            output = &back_node;
+                            break;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                conv_node.remove_dependency(2);
+                auto& back_bias_node = *(p.nodes_map.find(back_node.id() + "_bias")->second);
+                p.swap_names(conv_node, back_bias_node);
+                if (conv_node.is_output())
+                {
+                    conv_node.set_output(false);
+                    back_bias_node.set_output(true);
+                    for (auto& output : p.get_outputs())
+                    {
+                        if (output == &conv_node)
+                        {
+                            output = &back_bias_node;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        if (new_input && (new_input->output_format == format::bf8_xy16 || new_input->output_format == format::byxf))
+        {
+            auto conv1x1_output = std::make_shared<reorder>("_conv1x1_reorder_back_" + conv_node.id(), conv_node.id(), input_layout.format, input_layout.data_type);
+            auto& back_node = p.get_or_create(conv1x1_output);
+            p.get_processing_order().insert_next(&conv_node, &back_node);
+            conv_node.invalidate_users();
+            p.replace_all_usages(conv_node, back_node);
+            p.add_connection(conv_node, back_node);
+        }
+
+        if (new_input)
+        {
+            auto& r_node = p.get_or_create(new_input);
+            p.add_intermediate(r_node, conv_node, 0, r_node.get_dependencies().empty());
+            conv_node.recalc_output_layout();
+        }
+    };
+
+    const auto reorder_input_detection_output = [&p, &lo](typed_program_node<detection_output>& detection_output_node)
+    {
+        auto detection_output_prim = detection_output_node.get_primitive();
+
+        for (size_t i = 0; i < detection_output_node.get_dependencies().size(); i++)
+        {
+            auto& input = detection_output_node.get_dependency(i);
+            std::shared_ptr<reorder> new_input = lo.get_reorder(
+                input.get_output_layout(),
+                input.id(),
+                layout_optimizer::data_type::input,
+                detection_output_node,
+                layout{ data_types::f32, format::bfyx, tensor{} }).first;
+
+            if (new_input)
+            {
+                p.add_intermediate(new_input, detection_output_node, i);
+            }
+        }
+    };
+
+    for (auto& prim : p.get_processing_order())
+    {
+        //there's an assumption that only convolution will take data/input_layout as input
+        //exception to that rule would be a convolution which takes a reorder as input - see reoder_input above
+        program_helpers::do_for_types<convolution, detection_output>(*prim,
+            reorder_input,                  //case for convolution
+            reorder_input_detection_output  //case for detection-output
+            );
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/graph_optimizer/trim_to_outputs.cpp b/inference-engine/thirdparty/clDNN/src/graph_optimizer/trim_to_outputs.cpp
new file mode 100644
index 000000000..f9ff2f626
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/graph_optimizer/trim_to_outputs.cpp
@@ -0,0 +1,76 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "pass_manager.h"
+
+//ToDo: remove those include with the appropriate code below once we will have support for multiple outputs of a primitive
+#include "batch_norm_inst.h"
+#include "max_unpooling_inst.h"
+#include "pooling_inst.h"
+
+using namespace cldnn;
+
+//This pass optimizes out nodes which have no impact on outputs
+void trim_to_outputs::run(program_impl& p)
+{
+    const size_t actual_nodes = p.get_processing_order().size();
+    if (!actual_nodes) //degenerated case but can happen
+        return;
+
+    if (p.get_outputs().size() == actual_nodes)
+        return;
+
+    //do backward bfs starting from all outputs
+    std::list<const std::vector<program_node*>*> stack = { &(p.get_outputs()) };
+
+    std::vector<program_node*> special_nodes;
+    for (auto& node : p.get_processing_order())
+    {
+        if (node->is_type<input_layout>() ||  //input layout may become disconnected during prior boxes calculations so it may have not been marked at this place but we don't want to remove it
+            node->is_type<max_unpooling>() || // ToDo: remove this after support for multi-outputs in primitives will be implemented.
+            node->is_type<batch_norm>() ||
+            (node->is_type<pooling>() && node->as<pooling>().get_primitive()->mode == pooling_mode::max_with_argmax))
+                special_nodes.push_back(node);
+    }
+    stack.push_back(&special_nodes);
+
+    while (!stack.empty())
+    {
+        auto nodes_list = stack.front();
+        stack.pop_front();
+
+        for (auto& node : *nodes_list)
+        {
+            if (!node->is_marked())
+            {
+                node->mark();
+                if (!node->get_dependencies().empty())
+                    stack.push_back(&node->get_dependencies());
+            }
+        }
+    }
+
+    //all not-marked nodes should be removed
+    std::list<program_node*> to_rem;
+    for (auto& node : p.get_processing_order())
+    {
+        if (!node->is_marked())
+            to_rem.push_back(node);
+    }
+    p.remove_nodes(to_rem);
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/include/activation_inst.h b/inference-engine/thirdparty/clDNN/src/include/activation_inst.h
index 80a56475c..7ff10cf5c 100644
--- a/inference-engine/thirdparty/clDNN/src/include/activation_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/activation_inst.h
@@ -26,6 +26,7 @@ template <>
 struct typed_program_node<activation> : public typed_program_node_base<activation>
 {
     using parent = typed_program_node_base<activation>;
+    typed_program_node(const std::shared_ptr<activation> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
 
 public:
     using parent::parent;
diff --git a/inference-engine/thirdparty/clDNN/src/include/apply_adam_inst.h b/inference-engine/thirdparty/clDNN/src/include/apply_adam_inst.h
index f9c735815..0ca4cdad3 100644
--- a/inference-engine/thirdparty/clDNN/src/include/apply_adam_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/apply_adam_inst.h
@@ -25,6 +25,7 @@ namespace cldnn
 template <>
 struct typed_program_node<apply_adam> : public typed_program_node_base<apply_adam>
 {
+    typed_program_node(const std::shared_ptr<apply_adam> prim, program_impl& prog);
     using parent = typed_program_node_base<apply_adam>;
 
 public:
diff --git a/inference-engine/thirdparty/clDNN/src/include/batch_norm_inst.h b/inference-engine/thirdparty/clDNN/src/include/batch_norm_inst.h
index 956952777..175dc8ef1 100644
--- a/inference-engine/thirdparty/clDNN/src/include/batch_norm_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/batch_norm_inst.h
@@ -18,6 +18,7 @@
 #pragma once
 #include "api/CPP/batch_norm.hpp"
 #include "primitive_inst.h"
+#include "mutable_data_inst.h"
 
 namespace cldnn
 {
@@ -33,10 +34,34 @@ public:
     program_node& input() const { return get_dependency(0); }
     program_node& mean() const { return get_dependency(1); }
     program_node& variance() const { return get_dependency(2); }
-    program_node& inv_variance() const { return get_dependency(1); };
+    program_node& scale() const 
+	{ 
+		if(get_dependencies().size() >= 5)
+			return get_dependency(3); 
+		else
+			return get_dependency(1);
+	}
+    program_node& shift() const 
+	{ 
+		if (get_dependencies().size() >= 5)
+			return get_dependency(4); 
+		else
+			return get_dependency(2);
+	}
+    program_node& inv_variance() const 
+	{ 
+		if (get_dependencies().size() == 2)
+			return get_dependency(1);
+		else if (get_dependencies().size() == 6)
+			return get_dependency(5);
+		else
+			return get_dependency(3);
+	};
     bool variance_term() const { return !get_primitive()->variance.empty(); }
     bool use_global_stats() const { return !get_primitive()->mean.empty() && !get_primitive()->variance.empty(); };
+    bool use_scale_shift() const { return !get_primitive()->scale.empty() && !get_primitive()->shift.empty(); };
     bool forwad_pass() const { return !get_primitive()->inv_variance.empty(); };
+    bool calc_mean_var() const { return (use_global_stats() && mean().is_type<mutable_data>() && variance().is_type<mutable_data>()); };
 
 };
 
@@ -56,9 +81,33 @@ public:
 
     memory_impl& mean_memory() const { return dep_memory(1); }
     memory_impl& variance_memory() const { return dep_memory(2); }
-    memory_impl& inv_variance_memory() const { return dep_memory(1); };
+    memory_impl& scale_memory() const 
+	{
+		if (dependencies().size() >= 5)
+			return dep_memory(3);
+		else
+			return dep_memory(1);
+	}
+    memory_impl& shift_memory() const 
+	{
+		if (dependencies().size() >= 5)
+			return dep_memory(4);
+		else
+			return dep_memory(2);
+	}
+    memory_impl& inv_variance_memory() const
+	{
+		if (dependencies().size() == 2)
+			return dep_memory(1);
+		else if (dependencies().size() == 6)
+			return dep_memory(5);
+		else
+			return dep_memory(3);
+	};
     bool use_global_stats() const { return !argument.mean.empty() && !argument.variance.empty(); };
+    bool use_scale_shift() const { return !argument.scale.empty() && !argument.scale.empty(); };
     bool forwad_pass() const { return !argument.inv_variance.empty(); };
+    bool calc_mean_var() const { return node.calc_mean_var(); };
 };
 
 using batch_norm_inst = typed_primitive_inst<batch_norm>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/border_inst.h b/inference-engine/thirdparty/clDNN/src/include/border_inst.h
index ff3b28b2a..1190bfc2d 100644
--- a/inference-engine/thirdparty/clDNN/src/include/border_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/border_inst.h
@@ -28,16 +28,15 @@ struct typed_program_node<border> : typed_program_node_base<border>
 private:
     using parent = typed_program_node_base<border>;
 
-
 public:
     using parent::parent;
 
+    typed_program_node(const std::shared_ptr<border> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
     program_node& input() const { return get_dependency(0); }
 };
 
 using border_node = typed_program_node<border>;
 
-
 template <>
 class typed_primitive_inst<border> : public typed_primitive_inst_base<border>
 {
diff --git a/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h b/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h
index f10b562c5..0cc920efe 100644
--- a/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/broadcast_inst.h
@@ -28,10 +28,10 @@ struct typed_program_node<broadcast> : typed_program_node_base<broadcast>
 private:
     using parent = typed_program_node_base<broadcast>;
 
-
 public:
     using parent::parent;
 
+    typed_program_node(const std::shared_ptr<broadcast> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
     program_node& input() const { return get_dependency(0); }
 };
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h b/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h
index 2ef3b1bb9..dfd0dd5c6 100644
--- a/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/concatenation_inst.h
@@ -26,6 +26,7 @@ template <>
 struct typed_program_node<concatenation> : public typed_program_node_base<concatenation>
 {
     using parent = typed_program_node_base<concatenation>;
+    typed_program_node(const std::shared_ptr<concatenation> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
 
 public:
     using parent::parent;
diff --git a/inference-engine/thirdparty/clDNN/src/include/condition_inst.h b/inference-engine/thirdparty/clDNN/src/include/condition_inst.h
new file mode 100644
index 000000000..1d8c1d621
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/condition_inst.h
@@ -0,0 +1,127 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include <api/CPP/condition.hpp>
+
+#include "network_impl.h"
+#include "primitive_inst.h"
+
+namespace cldnn
+{
+namespace details
+{
+
+}
+
+template <>
+struct typed_program_node<condition> : public typed_program_node_base<condition>
+{
+private:
+    using parent = typed_program_node_base<condition>;
+
+    class branch
+    {
+    public:
+        branch(topology_impl& tpl) : _topology(tpl) {}
+
+        void set(const program_node& node)
+        {
+            add_or_change_input_layout(node);
+            _program = node.get_program().get_engine().build_program(_topology, node.get_program().get_options(), true); //rebuild program 
+        }
+        program_impl::ptr get() const { return _program; }
+
+    private:
+        topology_impl & _topology;
+        program_impl::ptr _program = nullptr;
+
+        void add_or_change_input_layout(const program_node& node)
+        {
+            auto layout = node.get_dependency(0).get_output_layout();
+            auto input_id = node.as<condition>().result_id();
+            if (_program == nullptr) //if first run, create input_layout
+            {
+                _topology.add(std::make_shared<input_layout>(input_id, layout));
+                for (auto& prim : _topology.get_primitives())
+                {
+                    for (auto& inp : prim.second->input)
+                    {
+                        if (inp == node.id())
+                            inp = input_id;
+                    }
+                }
+            }
+            else
+            {
+                _topology.change_input_layout(input_id, layout);
+            }
+        }
+    };
+
+public:
+    using parent::parent;
+
+    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+        : parent(prim, prog)
+        , _branch_true(*api_cast(this->get_primitive()->topology_true.get()))
+        , _branch_false(*api_cast(this->get_primitive()->topology_false.get()))
+    {
+    }
+
+    program_node& input() const { return get_dependency(0); }
+    program_node& compare() const { return get_dependency(1); }
+    cond_functions func() const { return get_primitive()->function; }
+    tensor offset() const { return get_primitive()->offset; }
+    void set_branches() const
+    {
+        _branch_true.set(*this);
+        _branch_false.set(*this);
+    }
+    program_impl::ptr get_branch_true() const { return _branch_true.get(); }
+    program_impl::ptr get_branch_false() const{ return _branch_false.get(); }
+    primitive_id result_id() const { return id() + ":result"; }
+
+private:
+    mutable branch _branch_true;
+    mutable branch _branch_false;
+};
+
+using condition_node = typed_program_node<condition>;
+
+
+template <>
+class typed_primitive_inst<condition> : public typed_primitive_inst_base<condition>
+{
+    using parent = typed_primitive_inst_base<condition>;
+
+public:
+    static layout calc_output_layout(condition_node const& node);
+    static std::string to_string(condition_node const& node);
+    typed_primitive_inst(network_impl& network, condition_node const& node);
+
+    memory_impl& input_memory() const { return dep_memory(0); }
+    memory_impl& compare_memory() const { return dep_memory(1); }
+    network_impl::ptr get_net_true() const { return _net_true; }
+    network_impl::ptr get_net_false() const { return _net_false; }
+    primitive_id result_id() const { return node.result_id(); }
+private:
+    network_impl::ptr _net_true;
+    network_impl::ptr _net_false;
+};
+
+using condition_inst = typed_primitive_inst<condition>;
+}
diff --git a/inference-engine/thirdparty/clDNN/src/include/constants_propagator.h b/inference-engine/thirdparty/clDNN/src/include/constants_propagator.h
deleted file mode 100644
index 7b402f37a..000000000
--- a/inference-engine/thirdparty/clDNN/src/include/constants_propagator.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
-// Copyright (c) 2017 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma once
-
-#include "program_impl.h"
-#include "data_inst.h"
-
-namespace cldnn
-{
-
-class constants_propagator
-{
-public:
-    constants_propagator(program_impl::ptr program);
-
-    void visit_node(program_node& node);
-
-    std::list<std::pair<primitive_id, memory_impl::ptr>> calculate();
-
-private:
-    program_impl::ptr prog;
-    topology_impl tpl;
-    std::list<typed_program_node<data>*> const_inputs;
-    std::vector<primitive_id> const_outputs;
-    bool has_non_trivial_constants = false;
-
-    void handle_constant(program_node& node);
-    void add_constant(program_node& node);
-    void add_deps_to_tpl(const std::vector<program_node*>& node);
-    bool is_already_in_tpl(const primitive_id& id);
-};
-
-}
diff --git a/inference-engine/thirdparty/clDNN/src/include/contract_inst.h b/inference-engine/thirdparty/clDNN/src/include/contract_inst.h
new file mode 100644
index 000000000..bc783bc67
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/contract_inst.h
@@ -0,0 +1,53 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include <api/CPP/contract.hpp>
+
+#include "primitive_inst.h"
+
+
+namespace cldnn
+{
+    template <>
+    struct typed_program_node<contract> : typed_program_node_base<contract>
+    {
+    private:
+        using parent = typed_program_node_base<contract>;
+
+    public:
+        using parent::parent;
+
+        typed_program_node(const std::shared_ptr<contract> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
+        program_node& input() const { return get_dependency(0); }
+    };
+
+    using contract_node = typed_program_node<contract>;
+
+
+    template <>
+    class typed_primitive_inst<contract> : public typed_primitive_inst_base<contract>
+    {
+        using parent = typed_primitive_inst_base<contract>;
+
+    public:
+        static layout calc_output_layout(contract_node const& node);
+        static std::string to_string(contract_node const& node);
+        typed_primitive_inst(network_impl& network, contract_node const& node);
+    };
+
+    using contract_inst = typed_primitive_inst<contract>;
+}
diff --git a/inference-engine/thirdparty/clDNN/src/include/convolution_grad_weights_inst.h b/inference-engine/thirdparty/clDNN/src/include/convolution_grad_weights_inst.h
index ed32f0c8e..cd1571dbe 100644
--- a/inference-engine/thirdparty/clDNN/src/include/convolution_grad_weights_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/convolution_grad_weights_inst.h
@@ -90,6 +90,11 @@ public:
             return false;
     }
 
+    bool output_grad_w() const
+    {
+        return get_primitive()->output_grad_w;
+    }
+
 private:
     int32_t split;
     bool depthwise_sep_opt;
@@ -165,6 +170,11 @@ public:
         else
             return false;
     }
+
+    bool output_grad_w() const
+    {
+        return argument.output_grad_w;
+    }
 };
 
 using convolution_grad_weights_inst = typed_primitive_inst<convolution_grad_weights>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h b/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h
index b47e09fab..8366839fb 100644
--- a/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/convolution_inst.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -37,7 +37,9 @@ public:
         , transposed(false)
         , input_qf(this->get_primitive()->input_quantization_factor)
         , output_qf(this->get_primitive()->output_quantization_factor)
+        , groups(this->get_primitive()->groups)
     {
+        support_padding(true);
     }
 
     void set_split(int32_t node_split) { split = node_split; }
@@ -49,6 +51,9 @@ public:
     void set_transposed(bool node_transposed) { transposed = node_transposed; }
     bool get_transposed() const { return transposed; }
 
+    void set_groups(uint32_t node_groups) { groups = node_groups; }
+    uint32_t get_groups() const { return groups; }
+
     program_node& input() const { return get_dependency(0); }
 
     program_node& weights(size_t idx = 0) const
@@ -107,6 +112,7 @@ private:
     bool transposed;
     float input_qf;
     float output_qf;
+    uint32_t groups;
 };
 
 using convolution_node = typed_program_node<convolution>;
@@ -125,34 +131,50 @@ public:
 
     memory_impl& weights_memory(size_t index) const
     {
-        if (static_cast<int32_t>(index) >= node.get_split())
-            throw std::range_error("weights offset too big");
-        
-        return dep_memory(1 + index);
+        if (node.get_groups() == 1) {
+            if (static_cast<int32_t>(index) >= node.get_split())
+                throw std::range_error("weights offset too big");
+            return dep_memory(1 + index);
+        }
+        else { // all weights are in one buffer
+            return dep_memory(1);
+        }
     }
 
     memory_impl& bias_memory(size_t index) const
     { 
-        if (static_cast<int32_t>(index) >= node.get_split())
-            throw std::range_error("bias offset too big");
-
-        return dep_memory(1 + node.get_split() + index);
+        if (node.get_groups() == 1) {
+            if (static_cast<int32_t>(index) >= node.get_split())
+                throw std::range_error("bias offset too big");
+            return dep_memory(1 + node.get_split() + index);
+        }
+        else { // all bias are in one buffer
+            return dep_memory(2);
+        }
     }
 
     memory_impl& weights_quantization_factors_memory(size_t index) const
     {
-        if (static_cast<int32_t>(index) >= node.get_split())
-            throw std::range_error("quantization factors offset too big");
-
-        return dep_memory(1 + 2*node.get_split() + index);
+        if (node.get_groups() == 1) {
+            if (static_cast<int32_t>(index) >= node.get_split())
+                throw std::range_error("quantization factors offset too big");
+            return dep_memory(1 + 2 * node.get_split() + index);
+        }
+        else { // all quantization_factors are in one buffer
+            return dep_memory(3);
+        };
     }
 
     memory_impl& output_calibration_factors_memory(size_t index) const
     {
-        if (static_cast<int32_t>(index) >= node.get_split())
-            throw std::range_error("quantization factors offset too big");
-
-        return dep_memory(1 + 3 * node.get_split() + index);
+        if (node.get_groups() == 1) {
+            if (static_cast<int32_t>(index) >= node.get_split())
+                throw std::range_error("quantization factors offset too big");
+            return dep_memory(1 + 3 * node.get_split() + index);
+        }
+        else { // all calibration_factors are in one buffer
+            return dep_memory(4);
+        }
     }
 
     bool bias_term() const
diff --git a/inference-engine/thirdparty/clDNN/src/include/crop_inst.h b/inference-engine/thirdparty/clDNN/src/include/crop_inst.h
index ef4260f9c..d845aac92 100644
--- a/inference-engine/thirdparty/clDNN/src/include/crop_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/crop_inst.h
@@ -23,13 +23,15 @@ namespace cldnn
 {
 
 template <>
-class typed_program_node<crop> : public typed_program_node_base<crop>
+struct typed_program_node<crop> : public typed_program_node_base<crop>
 {
+private:
     using parent = typed_program_node_base<crop>;
 
 public:
     using parent::parent;
 
+    typed_program_node(const std::shared_ptr<crop> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
     program_node& input() const { return get_dependency(0); }
 };
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h b/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h
index 377be1d10..0e19a23e9 100644
--- a/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/custom_gpu_primitive_inst.h
@@ -42,6 +42,9 @@ class typed_primitive_inst<custom_gpu_primitive> : public typed_primitive_inst_b
 public:
     static layout calc_output_layout(custom_gpu_primitive_node const& node)
     {
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for "
+                  "custom_gpu_primitive_node!");
         layout output_layout = node.get_primitive()->output_layout;
         
         // if the output layout format was set to any, it means the layer output format will be the same as the first input
diff --git a/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h b/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h
index a2e1516f2..adfe356ad 100644
--- a/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/deconvolution_inst.h
@@ -32,7 +32,9 @@ public:
         : parent(prim, prog)
         , split(this->get_primitive()->split())
         , depthwise_sep_opt(false)
+        , groups(this->get_primitive()->groups)
     {
+        support_padding(true);
     }
 
     
@@ -42,6 +44,9 @@ public:
     void set_depthwise_sep_opt(bool node_depthwise_sep_opt) { depthwise_sep_opt = node_depthwise_sep_opt; }
     bool get_depthwise_sep_opt() const { return depthwise_sep_opt; }
 
+    void set_groups(uint32_t node_groups) { groups = node_groups; }
+    uint32_t get_groups() const { return groups; }
+
     program_node& input() const { return get_dependency(0); }
 
     program_node& weights(size_t idx = 0) const
@@ -73,21 +78,22 @@ public:
         if (static_cast<int32_t>(idx) > 0)
             throw std::range_error("Only one input for fused sum is supported");
 
-        int d_idx = 1 + this->get_split() + idx;
+        size_t d_idx = 1 + this->get_split() + idx;
         d_idx += bias_term() ? this->get_split() : 0;
         return get_dependency(d_idx);
     }
 
     bool has_fused_sum() const
     {
-        int d_idx = 1 + this->get_split();
+        size_t d_idx = 1 + this->get_split();
         d_idx += bias_term() ? this->get_split() : 0;
-        return static_cast<int>(dependencies.size()) == (d_idx + 1);
+        return dependencies.size() == (d_idx + 1);
     }
 
 private:
     int32_t split;
     bool depthwise_sep_opt;
+    uint32_t groups;
 };
 
 using deconvolution_node = typed_program_node<deconvolution>;
@@ -106,21 +112,28 @@ public:
 
     memory_impl& weights_memory(size_t index) const
     {
-        if (static_cast<int32_t>(index) >= node.get_split())
-            throw std::range_error("weights offset too big");
-
-        return dep_memory(1 + index);
+        if (node.get_groups() == 1) {
+            if (static_cast<int32_t>(index) >= node.get_split())
+                throw std::range_error("weights offset too big");
+            return dep_memory(1 + index);
+        }
+        else { // all weights are in one buffer
+            return dep_memory(1);
+        }
     }
 
     memory_impl& bias_memory(size_t index) const
     {
-        if (argument.bias.size() == 0 && static_cast<int32_t>(index) >= node.get_split())
-            throw std::range_error("no bias data");
-
-        if (static_cast<int32_t>(index) > node.get_split())
-            throw std::range_error("bias offset too big");
-
-        return dep_memory(1 + node.get_split() + index);
+        if (node.get_groups() == 1) {
+            if (argument.bias.size() == 0 && static_cast<int32_t>(index) >= node.get_split())
+                throw std::range_error("no bias data");
+            if (static_cast<int32_t>(index) > node.get_split())
+                throw std::range_error("bias offset too big");
+            return dep_memory(1 + node.get_split() + index);
+        }
+        else { // all bias are in one buffer
+            return dep_memory(2);
+        }
     }
 
     bool bias_term() const
diff --git a/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h b/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h
new file mode 100644
index 000000000..5dda8d43e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/depth_to_space_inst.h
@@ -0,0 +1,51 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "api/CPP/depth_to_space.hpp"
+#include "primitive_inst.h"
+
+namespace  cldnn
+{
+template <>
+struct typed_program_node<depth_to_space> : public typed_program_node_base<depth_to_space>
+{
+    using parent = typed_program_node_base<depth_to_space>;
+
+public:
+    using parent::parent;
+
+    program_node& input(size_t index = 0) const { return get_dependency(index); }
+};
+
+using depth_to_space_node = typed_program_node<depth_to_space>;
+
+template <>
+class typed_primitive_inst<depth_to_space> : public typed_primitive_inst_base<depth_to_space>
+{
+    using parent = typed_primitive_inst_base<depth_to_space>;
+
+public:
+    static layout calc_output_layout(depth_to_space_node const& node);
+    static std::string to_string(depth_to_space_node const& node);
+
+public:
+    typed_primitive_inst(network_impl& network, depth_to_space_node const& desc);
+};
+
+using depth_to_space_inst = typed_primitive_inst<depth_to_space>;
+}
diff --git a/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h b/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h
index f918b6d47..d1d24a785 100644
--- a/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/detection_output_inst.h
@@ -34,6 +34,7 @@ class typed_program_node<detection_output> : public typed_program_node_base<dete
 public:
     using parent::parent;
 
+    program_node& input() const { return get_dependency(0); }
     program_node& location() const { return get_dependency(0); }
     program_node& confidence() const { return get_dependency(1); }
     program_node& prior_box() const { return get_dependency(2); }
@@ -60,4 +61,38 @@ public:
 
 using detection_output_inst = typed_primitive_inst<detection_output>;
 
+template <>
+class typed_program_node<detection_output_sort> : public typed_program_node_base<detection_output_sort>
+{
+    using parent = typed_program_node_base<detection_output_sort>;
+
+public:
+    using parent::parent;
+
+    program_node& input() const { return get_dependency(0); }
+};
+
+using detection_output_sort_node = typed_program_node<detection_output_sort>;
+
+template <>
+class typed_primitive_inst<detection_output_sort> : public typed_primitive_inst_base<detection_output_sort>
+{
+    using parent = typed_primitive_inst_base<detection_output_sort>;
+
+public:
+    static layout calc_output_layout(detection_output_sort_node const& node);
+    static std::string to_string(detection_output_sort_node const& node);
+
+public:
+    typed_primitive_inst(network_impl& network, detection_output_sort_node const& node);
+};
+
+using detection_output_sort_inst = typed_primitive_inst<detection_output_sort>;
+
+namespace gpu {
+    primitive_impl* runDetectOutCpu(const detection_output_node& arg);
+    primitive_impl* runDetectOutGpu(const detection_output_node& arg, kernel_selector::KernelData kernel);
+    primitive_impl* runDetectOutSortGpu(const detection_output_sort_node& arg, kernel_selector::KernelData kernel);
 }
+
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h b/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h
index f6d8f6115..7b6e6cfc7 100644
--- a/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/eltwise_inst.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -54,6 +54,7 @@ template <>
 class typed_primitive_inst<eltwise> : public typed_primitive_inst_base<eltwise>
 {
     using parent = typed_primitive_inst_base<eltwise>;
+    static void check_inputs_count(eltwise_node const &node);
 
 public:
     static layout calc_output_layout(eltwise_node const& node);
diff --git a/inference-engine/thirdparty/clDNN/src/include/embed_inst.h b/inference-engine/thirdparty/clDNN/src/include/embed_inst.h
index 045522624..11bdc2473 100644
--- a/inference-engine/thirdparty/clDNN/src/include/embed_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/embed_inst.h
@@ -55,4 +55,4 @@ namespace cldnn
 
 	using embed_inst = typed_primitive_inst<embed>;
 
-}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/include/engine_impl.h b/inference-engine/thirdparty/clDNN/src/include/engine_impl.h
index ea1234abd..3f81b7657 100644
--- a/inference-engine/thirdparty/clDNN/src/include/engine_impl.h
+++ b/inference-engine/thirdparty/clDNN/src/include/engine_impl.h
@@ -22,7 +22,6 @@
 #include "refcounted_obj.h"
 #include "implementation_map.h"
 #include "memory_pool.h"
-
 #include "gpu/engine_info.h"
 
 #include <memory>
@@ -41,6 +40,7 @@ struct event_impl;
 struct topology_impl;
 struct program_impl;
 struct network_impl;
+struct program_node;
 
 template <class>
 struct typed_program_node;
@@ -49,9 +49,8 @@ struct engine_impl : public refcounted_obj<engine_impl>
 {
 public:
     engine_impl(const engine_configuration& conf);
-
+    ~engine_impl();
     engine_types type() const { return engine_types::ocl; }
-
     refcounted_obj_ptr<memory_impl> allocate_memory(layout layout);
     refcounted_obj_ptr<memory_impl> allocate_memory(layout layout, primitive_id, uint32_t, std::set<primitive_id>, bool reusable = true);
     refcounted_obj_ptr<memory_impl> reinterpret_buffer(const memory_impl& memory, layout new_layout);
@@ -60,11 +59,13 @@ public:
     refcounted_obj_ptr<event_impl> create_user_event(bool set = false);
     void wait_for_events(std::vector<event_impl::ptr> const& events);
 
-    refcounted_obj_ptr<program_impl> build_program(const topology_impl& topology, const build_options& options, bool is_internal = false);
+    refcounted_obj_ptr<program_impl> build_program(const topology_impl& topology, const build_options& options, bool is_internal = false, bool no_optimizations = false);
+    refcounted_obj_ptr<program_impl> build_program(const std::set<std::shared_ptr<program_node>>& nodes, const build_options & options, bool is_internal);
     void compile_program(program_impl& prog);
 
-    refcounted_obj_ptr<network_impl> allocate_network(const program_impl& program);
-    refcounted_obj_ptr<network_impl> build_network(const topology_impl& topology, const build_options& options, bool internal_network = false);
+    refcounted_obj_ptr<network_impl> allocate_network(const program_impl& program, bool is_internal = false);
+    refcounted_obj_ptr<network_impl> build_network(const topology_impl& topology, const build_options& options, bool is_internal = false);
+    refcounted_obj_ptr<network_impl> build_network(const std::set<std::shared_ptr<program_node>>& nodes, const build_options & options, bool is_internal);
     void flush_network();
     void release_pending_memory();
 
@@ -77,7 +78,23 @@ public:
         auto factory = implementation_map<T>::get(type(), node);
         return std::move(std::unique_ptr<primitive_impl>(factory(node)));
     }
-    
+
+    template <class T>
+    bool does_an_implementation_exist(typed_program_node<T> const& node)
+    {
+        if (&node.get_program().get_engine() != this)
+          throw std::invalid_argument("engine_impl::create_primitive_impl: program's engine does not match called engine");
+        return implementation_map<T>::check(type(), node);
+    }
+
+    template <class T>
+    bool does_possible_implementation_exist(typed_program_node<T> const& node)
+    {
+        if (&node.get_program().get_engine() != this)
+            throw std::invalid_argument("engine_impl::create_primitive_impl: program's engine does not match called engine");
+        return implementation_map<T>::check_io_eq(type(), node);
+    }
+
     const engine_configuration& configuration() const { return _configuration; }
     void set_mem_pool(bool flag) { _configuration.enable_memory_pool = flag; }
     std::shared_ptr<gpu_toolkit> get_context() const { return _context; }
@@ -97,4 +114,4 @@ private:
 };
 }
 
-API_CAST(::cldnn_engine, cldnn::engine_impl)
+API_CAST(::cldnn_engine, cldnn::engine_impl)
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/include/error_handler.h b/inference-engine/thirdparty/clDNN/src/include/error_handler.h
index 36f6bd733..5126d0150 100644
--- a/inference-engine/thirdparty/clDNN/src/include/error_handler.h
+++ b/inference-engine/thirdparty/clDNN/src/include/error_handler.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2017 Intel Corporation
+// Copyright (c) 2017-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -34,23 +34,33 @@ namespace err_details
 template<typename N1, typename N2>
 inline void error_on_not_equal(const std::string& file, int line, const std::string& instance_id, const std::string& number_id, N1 number, const std::string& compare_to_id, N2 number_to_compare_to, const std::string& additional_message = "")
 {
-    std::stringstream error_msg;
+    if (number != static_cast<decltype(number)>(number_to_compare_to))
     {
-        if (number != static_cast<decltype(number)>(number_to_compare_to))
-        {
-            error_msg << number_id << "(=" << number << ") is not equal to: " << compare_to_id << "(=" << number_to_compare_to << ")" << std::endl;
-            err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
-        }
+        std::stringstream error_msg;
+        error_msg << number_id << "(=" << number << ") is not equal to: " << compare_to_id << "(=" << number_to_compare_to << ")" << std::endl;
+        err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
     }
 }
 #define CLDNN_ERROR_NOT_EQUAL(instance_id, number_id, number, compare_to_id, number_to_compare_to, add_msg) error_on_not_equal(__FILE__, __LINE__, instance_id, number_id, number, compare_to_id, number_to_compare_to, add_msg)
 
 template<typename N1, typename N2>
+inline void error_on_equal(const std::string& file, int line, const std::string& instance_id, const std::string& number_id, N1 number, const std::string& compare_to_id, N2 number_to_compare_to, const std::string& additional_message = "")
+{
+    if (number == static_cast<decltype(number)>(number_to_compare_to))
+    {
+        std::stringstream error_msg;
+        error_msg << number_id << "(=" << number << ") is equal to: " << compare_to_id << "(=" << number_to_compare_to << ")" << std::endl;
+        err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
+    }
+}
+#define CLDNN_ERROR_EQUAL(instance_id, number_id, number, compare_to_id, number_to_compare_to, add_msg) error_on_equal(__FILE__, __LINE__, instance_id, number_id, number, compare_to_id, number_to_compare_to, add_msg)
+
+template<typename N1, typename N2>
 inline void error_on_greater_than(const std::string& file, int line, const std::string& instance_id, const std::string& number_id, N1 number, const std::string& compare_to_id, N2 number_to_compare_to, const std::string& additional_message = "")
 {
-    std::stringstream error_msg;
     if (number > static_cast<decltype(number)>(number_to_compare_to))
     {
+        std::stringstream error_msg;
         error_msg << number_id << "(=" << number << ") is greater than: " << compare_to_id << "(=" << number_to_compare_to << ")" << std::endl;
         err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
     }
@@ -60,9 +70,9 @@ inline void error_on_greater_than(const std::string& file, int line, const std::
 template<typename N1, typename N2>
 inline void error_on_less_than(const std::string& file, int line, const std::string& instance_id, const std::string& number_id, N1 number, const std::string& compare_to_id, N2 number_to_compare_to, const std::string& additional_message = "")
 {
-    std::stringstream error_msg;
     if (number < static_cast<decltype(number)>(number_to_compare_to))
     {
+        std::stringstream error_msg;
         error_msg << number_id << "(=" << number << ") is less than: " << compare_to_id << "(=" << number_to_compare_to << ")" << std::endl;
         err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
     }
@@ -72,9 +82,9 @@ inline void error_on_less_than(const std::string& file, int line, const std::str
 template<typename N1, typename N2>
 inline void error_on_less_or_equal_than(const std::string& file, int line, const std::string& instance_id, const std::string& number_id, N1 number, const std::string& compare_to_id, N2 number_to_compare_to, const std::string& additional_message = "")
 {
-    std::stringstream error_msg;
     if (number <= static_cast<decltype(number)>(number_to_compare_to))
     {
+        std::stringstream error_msg;
         error_msg << number_id << "(=" << number << ") is less or equal than: " << compare_to_id << "(=" << number_to_compare_to << ")" << std::endl;
         err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
     }
@@ -84,9 +94,9 @@ inline void error_on_less_or_equal_than(const std::string& file, int line, const
 template<typename N1, typename N2>
 inline void error_on_greater_or_equal_than(const std::string& file, int line, const std::string& instance_id, const std::string& number_id, N1 number, const std::string& compare_to_id, N2 number_to_compare_to, const std::string& additional_message = "")
 {
-    std::stringstream error_msg;
     if (number >= static_cast<decltype(number)>(number_to_compare_to))
     {
+        std::stringstream error_msg;
         error_msg << number_id << "(=" << number << ") is greater or equal than: " << compare_to_id << "(=" << number_to_compare_to << ")" << std::endl;
         err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
     }
@@ -96,9 +106,9 @@ inline void error_on_greater_or_equal_than(const std::string& file, int line, co
 template<typename ptr>
 inline void error_on_nullptr(const std::string& file, int line, const std::string& instance_id, const std::string& condition_id, ptr condition, const std::string& additional_message = "")
 {
-    std::stringstream error_msg;
     if (condition == nullptr)
     {
+        std::stringstream error_msg;
         error_msg << condition_id << " should not be null" << std::endl;
         err_details::cldnn_print_error_message(file, line, instance_id, error_msg, additional_message);
     }
@@ -108,7 +118,6 @@ inline void error_on_nullptr(const std::string& file, int line, const std::strin
 template<typename M = format, typename... Ms>
 inline void error_on_not_proper_enum_values(const std::string& file, int line, const std::string& instance_id, const std::string& mode_id, M mode, const std::string& modes_id, Ms... modes_to_compare_to)
 {
-    std::stringstream error_msg;
     auto enum_value_string = [](const M& mode)->std::string {
         if (std::is_same<M, format::type>::value)
         {
@@ -119,6 +128,7 @@ inline void error_on_not_proper_enum_values(const std::string& file, int line, c
     const std::array<const M, sizeof...(Ms)> modes{ std::forward<Ms>(modes_to_compare_to)... };
     if (std::all_of(modes.begin(), modes.end(), [&](const M& m)->int {return mode != m; }))
     {
+        std::stringstream error_msg;
         error_msg << mode_id << "( " << enum_value_string(mode) << " ) is incompatible with " << modes_id << ". Should be one of: ";
         for (const auto& ms : modes)
         {
@@ -142,8 +152,9 @@ void error_on_mismatch_layout(const std::string& file, int line, const std::stri
 void error_on_bool(const std::string& file, int line, const std::string& instance_id, const std::string& condition_id, bool condition, const std::string& additional_message = "");
 #define CLDNN_ERROR_BOOL(instance_id, condition_id, condition, add_msg) error_on_bool(__FILE__, __LINE__, instance_id, condition_id, condition, add_msg)
 
-void error_on_mismatching_data_types(const std::string& file, int line, const std::string& instance_id, const std::string& data_format_1_id, data_types data_format_1, const std::string& data_format_2_id, data_types data_format_2, const std::string& additional_message = "");
+void error_on_mismatching_data_types(const std::string& file, int line, const std::string& instance_id, const std::string& data_format_1_id, data_types data_format_1, const std::string& data_format_2_id, data_types data_format_2, const std::string& additional_message = "", bool ignore_sign = false);
 #define CLDNN_ERROR_DATA_TYPES_MISMATCH(instance_id, data_format_1_id, data_format_1, data_format_2_id, data_format_2, add_msg) error_on_mismatching_data_types(__FILE__, __LINE__, instance_id, data_format_1_id, data_format_1, data_format_2_id, data_format_2, add_msg)
+#define CLDNN_ERROR_DATA_TYPES_MISMATCH_IGNORE_SIGN(instance_id, data_format_1_id, data_format_1, data_format_2_id, data_format_2, add_msg) error_on_mismatching_data_types(__FILE__, __LINE__, instance_id, data_format_1_id, data_format_1, data_format_2_id, data_format_2, add_msg, true)
 
 void error_on_tensor_dims_less_than_other_tensor_dims(const std::string& file, int line, const std::string& instance_id, const std::string& tensor_id, const tensor& tens, const std::string& tensor_to_compare_to_id, const tensor& tens_to_compre, const std::string& additional_message = "");
 #define CLDNN_ERROR_TENSOR_SIZES_LESS_THAN(instance_id, tensor_id, tensor_1, compare_to_id, tensor_to_compare_to, ...) error_on_tensor_dims_less_than_other_tensor_dims(__FILE__, __LINE__, instance_id, tensor_id, tensor_1, compare_to_id, tensor_to_compare_to, __VA_ARGS__)
diff --git a/inference-engine/thirdparty/clDNN/src/include/event_impl.h b/inference-engine/thirdparty/clDNN/src/include/event_impl.h
index a8adc745f..4e696e284 100644
--- a/inference-engine/thirdparty/clDNN/src/include/event_impl.h
+++ b/inference-engine/thirdparty/clDNN/src/include/event_impl.h
@@ -33,7 +33,8 @@ public:
 
     void wait();
     bool is_set();
-    
+    virtual bool is_valid() const { return _attached; }
+    virtual void reset() { _attached = false; }
     //returns true if handler has been successfully added
     bool add_event_handler(cldnn_event_handler handler, void* data);
     
@@ -48,7 +49,7 @@ private:
 
 protected:
     bool _set = false;
-
+    bool _attached = false; //because ocl event can be attached later, we need mechanism to check if such event was attached
     void call_handlers();
 
     virtual void wait_impl() = 0;
diff --git a/inference-engine/thirdparty/clDNN/src/include/fused_conv_bn_scale_inst.h b/inference-engine/thirdparty/clDNN/src/include/fused_conv_bn_scale_inst.h
new file mode 100644
index 000000000..7e7b572b2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/fused_conv_bn_scale_inst.h
@@ -0,0 +1,149 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "api_extension/CPP/fused_conv_bn_scale.hpp"
+#include "primitive_inst.h"
+
+#include <memory>
+
+namespace cldnn
+{
+
+template <>
+struct typed_program_node<fused_conv_bn_scale> : public typed_program_node_base<fused_conv_bn_scale>
+{
+    using parent = typed_program_node_base<fused_conv_bn_scale>;
+
+public:
+    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+        : parent(prim, prog)
+        , split(this->get_primitive()->split())
+    {
+    }
+
+    void set_split(int32_t node_split) { split = node_split; }
+    int32_t get_split() const { return split; }
+
+    program_node& input(size_t idx = 0) const
+    {
+        if (static_cast<int32_t>(idx) >= static_cast<int32_t>(desc->input.size()))
+            throw std::range_error("input index too big");
+
+        return get_dependency(idx);
+    }
+
+    program_node& weights(size_t idx = 0) const
+    {
+        if (static_cast<int32_t>(idx) >= this->get_split())
+            throw std::range_error("weights offset too big");
+
+        return get_dependency(desc->input.size() + idx);
+    }
+
+    program_node& bias(size_t idx = 0) const
+    { 
+        if (static_cast<int32_t>(idx) >= this->get_split())
+            throw std::range_error("bias offset too big");
+
+        return get_dependency(desc->input.size() + this->get_split() + idx);
+    }
+
+    program_node& weights_quantization_factors(size_t idx = 0) const
+    {
+        if (static_cast<int32_t>(idx) >= this->get_split())
+            throw std::range_error("quantization factor offset too big");
+
+        return get_dependency(desc->input.size() + 2*this->get_split() + idx);
+    }
+
+    program_node& output_calibration_factors(size_t idx = 0) const
+    {
+        if (static_cast<int32_t>(idx) >= this->get_split())
+            throw std::range_error("calibration factor offset too big");
+
+        return get_dependency(desc->input.size() + 3 * this->get_split() + idx);
+    }
+
+    bool bias_term() const
+    {
+        return get_primitive()->bias.size() > 0;
+    }
+
+    bool scale_bias_term() const
+    {
+        return !get_primitive()->scale_bias.empty();
+    }
+
+    bool is_fused_in_training() const
+    {
+        return !get_primitive()->inv_variance.empty();
+    }
+
+private:
+    int32_t split;
+};
+
+using fused_conv_bn_scale_node = typed_program_node<fused_conv_bn_scale>;
+
+template <>
+class typed_primitive_inst<fused_conv_bn_scale> : public typed_primitive_inst_base<fused_conv_bn_scale>
+{
+    using parent = typed_primitive_inst_base<fused_conv_bn_scale>;
+
+public:
+    static layout calc_output_layout(fused_conv_bn_scale_node const& node);
+    static std::string to_string(fused_conv_bn_scale_node const& node);
+
+public:
+    typed_primitive_inst(network_impl& network, fused_conv_bn_scale_node const& node);
+
+    memory_impl& weights_memory(size_t index) const
+    {
+        if (static_cast<int32_t>(index) >= node.get_split())
+            throw std::range_error("weights offset too big");
+        
+        return dep_memory(inputs_memory_count() + index);
+    }
+
+    memory_impl& bias_memory(size_t index) const
+    { 
+        if (static_cast<int32_t>(index) >= node.get_split())
+            throw std::range_error("bias offset too big");
+
+        return dep_memory(inputs_memory_count() + node.get_split() + index);
+    }
+
+    bool bias_term() const
+    {
+        return node.bias_term();
+    }
+
+    bool scale_bias_term() const
+    {
+        return node.scale_bias_term();
+    }
+
+    bool is_fused_in_training() const
+    {
+        return node.is_fused_in_training();
+    }
+};
+
+using fused_conv_bn_scale_inst = typed_primitive_inst<fused_conv_bn_scale>;
+
+}
diff --git a/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h b/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h
new file mode 100644
index 000000000..051ec11b0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/fused_conv_eltwise_inst.h
@@ -0,0 +1,204 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "api_extension/CPP/fused_conv_eltwise.hpp"
+#include "primitive_inst.h"
+
+#include <memory>
+
+namespace cldnn
+{
+
+template <>
+struct typed_program_node<fused_conv_eltwise> : public typed_program_node_base<fused_conv_eltwise>
+{
+    using parent = typed_program_node_base<fused_conv_eltwise>;
+
+public:
+    typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+        : parent(prim, prog)
+        , split(this->get_primitive()->split())
+        , depthwise_sep_opt(false)
+        , transposed(false)
+        , conv_input_qf(this->get_primitive()->conv.input_quantization_factor)
+        , conv_output_qf(this->get_primitive()->conv.output_quantization_factor)
+    {
+    }
+
+    void set_split(int32_t node_split) { split = node_split; }
+    int32_t get_split() const { return split; }
+
+    void set_depthwise_sep_opt(bool node_depthwise_sep_opt) { depthwise_sep_opt = node_depthwise_sep_opt; }
+    bool get_depthwise_sep_opt() const { return depthwise_sep_opt; }
+
+    void set_transposed(bool node_transposed) { transposed = node_transposed; }
+    bool get_transposed() const { return transposed; }
+
+    program_node& input(size_t idx = 0) const
+    {
+        if (static_cast<int32_t>(idx) >= static_cast<int32_t>(desc->input.size()))
+            throw std::range_error("input index too big");
+
+        return get_dependency(idx);
+    }
+
+    program_node& weights(size_t idx = 0) const
+    {
+        if (static_cast<int32_t>(idx) >= this->get_split())
+            throw std::range_error("weights offset too big");
+
+        return get_dependency(desc->input.size() + idx);
+    }
+
+    program_node& bias(size_t idx = 0) const
+    {
+        if (static_cast<int32_t>(idx) >= this->get_split())
+            throw std::range_error("bias offset too big");
+
+        return get_dependency(desc->input.size() + this->get_split() + idx);
+    }
+
+    program_node& weights_quantization_factors(size_t idx = 0) const
+    {
+        if (static_cast<int32_t>(idx) >= this->get_split())
+            throw std::range_error("quantization factor offset too big");
+
+        return get_dependency(desc->input.size() + 2 * this->get_split() + idx);
+    }
+
+    program_node& conv_output_calibration_factors(size_t idx = 0) const
+    {
+        if (static_cast<int32_t>(idx) >= this->get_split())
+            throw std::range_error("calibration factor offset too big");
+
+        return get_dependency(desc->input.size() + 3 * this->get_split() + idx);
+    }
+
+    program_node& eltw_output_calibration_factors() const
+    {
+        return get_dependency(desc->input.size() + 4 * this->get_split());
+    }
+
+    bool bias_term() const
+    {
+        return get_primitive()->conv.bias.size() > 0;
+    }
+
+    bool weights_quantization_term() const
+    {
+        return get_primitive()->conv.weights_quantization_factors.size() > 0;
+    }
+
+    bool conv_output_calibration_term() const
+    {
+        return get_primitive()->conv.output_calibration_factors.size() > 0;
+    }
+
+    bool eltw_output_calibration_term() const
+    {
+        return get_primitive()->eltw.output_calibration_factors.size() > 0;
+    }
+
+    float get_conv_input_qf() const { return conv_input_qf; }
+    float get_conv_output_qf() const { return conv_output_qf; }
+    float get_eltw_output_qf() const { return eltw_output_qf; }
+
+private:
+    int32_t split;
+    bool depthwise_sep_opt;
+    bool transposed;
+    float conv_input_qf;
+    float conv_output_qf;
+    float eltw_output_qf;
+};
+
+using fused_conv_eltwise_node = typed_program_node<fused_conv_eltwise>;
+
+template <>
+class typed_primitive_inst<fused_conv_eltwise> : public typed_primitive_inst_base<fused_conv_eltwise>
+{
+    using parent = typed_primitive_inst_base<fused_conv_eltwise>;
+
+public:
+    static layout calc_output_layout(fused_conv_eltwise_node const& node);
+    static std::string to_string(fused_conv_eltwise_node const& node);
+
+public:
+    typed_primitive_inst(network_impl& network, fused_conv_eltwise_node const& node);
+
+    memory_impl& weights_memory(size_t index) const
+    {
+        if (static_cast<int32_t>(index) >= node.get_split())
+            throw std::range_error("weights offset too big");
+        
+        return dep_memory(2 + index);
+    }
+
+    memory_impl& bias_memory(size_t index) const
+    { 
+        if (static_cast<int32_t>(index) >= node.get_split())
+            throw std::range_error("bias offset too big");
+
+        return dep_memory(2 + node.get_split() + index);
+    }
+
+    memory_impl& weights_quantization_factors_memory(size_t index) const
+    {
+        if (static_cast<int32_t>(index) >= node.get_split())
+            throw std::range_error("quantization factors offset too big");
+
+        return dep_memory(2 + 2*node.get_split() + index);
+    }
+
+    memory_impl& output_calibration_factors_memory(size_t index) const
+    {
+        if (static_cast<int32_t>(index) >= node.get_split())
+            throw std::range_error("quantization factors offset too big");
+
+        return dep_memory(2 + 3 * node.get_split() + index);
+    }
+
+    memory_impl& eltw_output_calibration_factors_memory() const
+    {
+        return dep_memory(2 + 4 * node.get_split());
+    }
+
+    bool bias_term() const
+    {
+        return node.bias_term();
+    }
+
+    bool weights_quantization_factors_term() const
+    {
+        return node.weights_quantization_term();
+    }
+
+    bool conv_output_calibration_factors_term() const
+    {
+        return node.conv_output_calibration_term();
+    }
+
+    bool eltw_output_calibration_factors_term() const
+    {
+        return node.eltw_output_calibration_term();
+    }
+};
+
+using fused_conv_eltwise_inst = typed_primitive_inst<fused_conv_eltwise>;
+
+}
diff --git a/inference-engine/thirdparty/clDNN/src/include/gather_inst.h b/inference-engine/thirdparty/clDNN/src/include/gather_inst.h
new file mode 100644
index 000000000..a2ee8292d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/gather_inst.h
@@ -0,0 +1,51 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "api/CPP/gather.hpp"
+#include "primitive_inst.h"
+
+namespace  cldnn
+{
+template <>
+struct typed_program_node<gather> : public typed_program_node_base<gather>
+{
+    using parent = typed_program_node_base<gather>;
+
+public:
+    using parent::parent;
+
+    program_node& input(size_t index = 0) const { return get_dependency(index); }
+};
+
+using gather_node = typed_program_node<gather>;
+
+template <>
+class typed_primitive_inst<gather> : public typed_primitive_inst_base<gather>
+{
+    using parent = typed_primitive_inst_base<gather>;
+
+public:
+    static layout calc_output_layout(gather_node const& node);
+    static std::string to_string(gather_node const& node);
+
+public:
+    typed_primitive_inst(network_impl& network, gather_node const& desc);
+};
+
+using gather_inst = typed_primitive_inst<gather>;
+}
diff --git a/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h b/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h
index 468591b3e..2bba00133 100644
--- a/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/generic_layer_inst.h
@@ -26,6 +26,7 @@ template <>
 struct typed_program_node<generic_layer> : public typed_program_node_base<generic_layer>
 {
     using parent = typed_program_node_base<generic_layer>;
+    typed_program_node(const std::shared_ptr<generic_layer> prim, program_impl& prog);
 public:
     using parent::parent;
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/implementation_map.h b/inference-engine/thirdparty/clDNN/src/include/implementation_map.h
index 5fc271019..747203886 100644
--- a/inference-engine/thirdparty/clDNN/src/include/implementation_map.h
+++ b/inference-engine/thirdparty/clDNN/src/include/implementation_map.h
@@ -57,6 +57,10 @@ struct implementation_key
     {
         return std::make_tuple(engine_type, primitive.get_dependency(0).get_output_layout().data_type, primitive.get_dependency(0).get_output_layout().format);
     }
+    type operator()(engine_types engine_type, const layout& proposed_layout)
+    {
+        return std::make_tuple(engine_type, proposed_layout.data_type, proposed_layout.format);
+    }
 };
 
 template<>
@@ -67,6 +71,10 @@ struct implementation_key<permute>
     {
         return engine_type;
     }
+    type operator()(engine_types engine_type, const layout&)
+    {
+        return engine_type;
+    }
 };
 
 template<>
@@ -77,6 +85,11 @@ struct implementation_key<reorder>
     {
         return engine_type;
     }
+    type operator()(engine_types engine_type, const layout&)
+    {
+        return engine_type;
+    }
+
 };
 
 template<>
@@ -87,6 +100,11 @@ struct implementation_key<generic_layer>
     {
         return engine_type;
     }
+    type operator()(engine_types engine_type, const layout&)
+    {
+        return engine_type;
+    }
+
 };
 
 template<>
@@ -97,6 +115,11 @@ struct implementation_key<custom_gpu_primitive>
     {
         return engine_type;
     }
+    type operator()(engine_types engine_type, const layout&)
+    {
+        return engine_type;
+    }
+
 };
 
 template<>
@@ -107,6 +130,11 @@ struct implementation_key<reshape>
     {
         return engine_type;
     }
+    type operator()(engine_types engine_type, const layout&)
+    {
+        return engine_type;
+    }
+
 };
 
 template<>
@@ -117,6 +145,11 @@ struct implementation_key<data>
     {
         return engine_type;
     }
+    type operator()(engine_types engine_type, const layout&)
+    {
+        return engine_type;
+    }
+
 };
 
 template<>
@@ -127,6 +160,10 @@ struct implementation_key<mutable_data>
     {
         return engine_type;
     }
+    type operator()(engine_types engine_type, const layout&)
+    {
+        return engine_type;
+    }
 };
 
 template<>
@@ -137,6 +174,11 @@ struct implementation_key<input_layout>
     {
         return engine_type;
     }
+    type operator()(engine_types engine_type, const layout&)
+    {
+        return engine_type;
+    }
+
 };
 
 template<>
@@ -147,6 +189,10 @@ struct implementation_key<prior_box>
     {
         return engine_type;
     }
+    type operator()(engine_types engine_type, const layout&)
+    {
+        return engine_type;
+    }
 };
 
 template<typename primitive_kind>
@@ -162,12 +208,35 @@ public:
         auto key = key_builder()(engine_type, primitive);
         auto it = map_type::instance().find(key);
         if (it == std::end(map_type::instance())) 
-            throw std::runtime_error(std::string("implementation_map for ")+typeid(primitive_kind).name() +" could not find any implementation to match key");
-
+            throw std::runtime_error(
+                std::string("implementation_map for ") + typeid(primitive_kind).name()
+                    + " could not find any implementation to match key");
         // create implementation & attach it to result 
         return it->second;
     }
 
+    //check if for a given engine and type there exist an implementation
+    static bool check(engine_types engine_type, const typed_program_node<primitive_kind>& primitive)
+    {
+        auto key = key_builder()(engine_type, primitive);
+        auto it = map_type::instance().find(key);
+        if (it == std::end(map_type::instance()))
+            return false;
+        else
+            return true;
+    }
+
+    //check if there exists a kernel implementation of a primitive with output set it primitive's output layout
+    static bool check_io_eq(engine_types engine_type, const typed_program_node<primitive_kind>& primitive)
+    {
+        auto key = key_builder()(engine_type, primitive.get_output_layout());
+        auto it = map_type::instance().find(key);
+        if (it == std::end(map_type::instance()))
+            return false;
+        else
+            return true;
+    }
+
     static void add(typename map_type::key_type key, factory_type factory) {
         map_type::instance().insert({ key, factory });
     }
diff --git a/inference-engine/thirdparty/clDNN/src/include/index_select_inst.h b/inference-engine/thirdparty/clDNN/src/include/index_select_inst.h
index 0d775f65b..b4148495a 100644
--- a/inference-engine/thirdparty/clDNN/src/include/index_select_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/index_select_inst.h
@@ -36,7 +36,8 @@ namespace cldnn
 		}
 		program_node& input() const { return get_dependency(0); }
         program_node& indices() const { return get_dependency(1); }
-        index_select_axis_name get_axis() const { return get_primitive()->axis; }
+        bool get_reverse() const { return get_primitive()->reverse; }
+        std::vector<index_select_axis_name> get_axes() const { return get_primitive()->axis; }
 	};
 
 	using index_select_node = typed_program_node<index_select>;
@@ -53,7 +54,8 @@ namespace cldnn
 
         memory_impl& input() const { return dep_memory(0); }
         memory_impl& indices() const { return dep_memory(1); }
-        index_select_axis_name get_axis() const { return node.get_axis(); }
+        bool get_reverse() const { return node.get_reverse(); }
+        std::vector<index_select_axis_name> get_axes() const { return node.get_axes(); }
 	};
 
 	using index_select_inst = typed_primitive_inst<index_select>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h b/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h
index 64d9ba7dc..ef4bbe1c7 100644
--- a/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/input_layout_inst.h
@@ -24,10 +24,12 @@ namespace cldnn
 struct memory_impl;
 
 template <>
-struct typed_program_node<input_layout> : public typed_program_node_base<input_layout>
+struct typed_program_node<input_layout> : public typed_program_node_base<input_layout> 
 {
     using parent = typed_program_node_base<input_layout>;
     using parent::parent;
+
+    typed_program_node(const std::shared_ptr<input_layout> prim, program_impl& prog);
 };
 
 using input_layout_node = typed_program_node<input_layout>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
index 6030ccd25..b21729e70 100644
--- a/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
+++ b/inference-engine/thirdparty/clDNN/src/include/kernel_selector_helper.h
@@ -15,10 +15,7 @@
 #pragma once
 
 #include "api/C/cldnn.h"
-#include "api/CPP/program.hpp"
-
-#include "gpu/ocl_toolkit.h"
-#include "program_impl.h"
+#include "api/CPP/tensor.hpp"
 
 #include "kernel_selector_params.h"
 #include "kernel_selector_common.h"
@@ -28,6 +25,16 @@
 
 using namespace cldnn;
 
+namespace cldnn
+{
+    enum class data_types : size_t;
+    enum class tuning_mode;
+    struct format;
+    struct layout;
+    struct program_impl;
+    struct program_node;
+}
+
 namespace kernel_selector
 {
     using n_dims                            = kernel_selector::Tensor::NDims;
@@ -63,6 +70,7 @@ namespace kernel_selector
     using tuning_mode                       = kernel_selector::TuningMode;
     using sample_type                       = kernel_selector::SampleType;
     using border_type                       = kernel_selector::BorderType;
+    using gather_axis                       = kernel_selector::GatherAxis;
 
     using data_tensor                       = kernel_selector::DataTensor;
     using weights_tensor                    = kernel_selector::WeightsTensor;
@@ -74,6 +82,8 @@ namespace kernel_selector
     using params                            = kernel_selector::Params;
     using weights_reorder_params            = kernel_selector::WeightsReorderParams;
     using generic_kernel_params             = kernel_selector::GenericKernelParams;
+
+    struct training_params;
 }
 
 kernel_selector::data_type to_data_type(data_types dt);
@@ -104,59 +114,45 @@ kernel_selector::dim_tensor<T> convert_dim_vector(const tensor& t)
 }
 
 template <typename p_type>
-inline void convert_activation_func_params(const p_type primitive, kernel_selector::base_params& params)
+inline void convert_activation_func_params(const p_type primitive, kernel_selector::base_activation_params& params)
 {
     const float negative_slope = primitive->activation_negative_slope;
     if (negative_slope != 0.0f)
     {
-        params.activationParams.m = negative_slope;
-        params.activationFunc = kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
+        params.m = negative_slope;
+        params.function = kernel_selector::activation_function::RELU_NEGATIVE_SLOPE;
     }
     else
     {
-        params.activationFunc = kernel_selector::activation_function::RELU;
+        params.function = kernel_selector::activation_function::RELU;
     }
 }
 
 template <typename arg_t>
-inline void convert_fused_activation_func_params(const arg_t& arg, kernel_selector::base_params& params)
+inline void convert_fused_activation_func_params(const arg_t& arg, kernel_selector::base_activation_params& params)
 {
-    params.activationParams.m = arg.get_fused_activation_params().a;
-    params.activationParams.n = arg.get_fused_activation_params().b;
-    params.activationFunc = get_kernel_selector_activation_param(arg.get_fused_activation_func());
+    params.m = arg.get_fused_activation_params().a;
+    params.n = arg.get_fused_activation_params().b;
+    params.function = get_kernel_selector_activation_param(arg.get_fused_activation_func());
 }
 
 template <typename p_type>
-inline void convert_new_activation_func(const p_type primitive, kernel_selector::base_params& params)
+inline void convert_new_activation_func(const p_type primitive, kernel_selector::base_activation_params& params)
 {
-    params.activationFunc = get_kernel_selector_activation_param(primitive->activation_func);
-    params.activationParams.m = primitive->additional_params.a;
-    params.activationParams.n = primitive->additional_params.b;
+    params.function = get_kernel_selector_activation_param(primitive->activation_func);
+    params.m = primitive->additional_params.a;
+    params.n = primitive->additional_params.b;
 }
 
+void set_params(const program_node& node, kernel_selector::params& params);
+
 template <typename params_t, typename arg_t>
 inline params_t get_default_params(const arg_t& arg, uint32_t split = 1)
 {
     params_t params;
 
-    const auto& context = arg.get_program().get_engine().get_context();
-    const auto& engine_info = context->get_engine_info();
-
-    params.engineInfo.bSubGroupSupport      = context->extension_supported("cl_intel_subgroups");
-    params.engineInfo.bSubGroupShortSupport = context->extension_supported("cl_intel_subgroups_short");
-    params.engineInfo.bFP16Support          = context->extension_supported("cl_khr_fp16");
-    params.engineInfo.bFP64Support          = context->extension_supported("cl_khr_fp64");
-    params.engineInfo.bIMADSupport          = engine_info.supports_imad != 0;
-    params.engineInfo.bIMMADSupport         = engine_info.supports_immad != 0;
-    params.engineInfo.bImageSupport         = engine_info.supports_image != 0;
-    params.engineInfo.maxWorkGroupSize      = engine_info.max_work_group_size;
-    params.engineInfo.maxLocalMemSize       = engine_info.max_local_mem_size;
-    params.engineInfo.maxImage2dWidth       = engine_info.max_image2d_width;
-    params.engineInfo.maxImage2dHeight      = engine_info.max_image2d_height;
-    params.engineInfo.deviceId              = engine_info.dev_id;
-    params.engineInfo.driverVersion         = engine_info.driver_version;
-    params.engineInfo.hostVersion           = to_host_version(cldnn::get_version());
-    
+    set_params(arg, params);
+
     const auto& input_layout    = arg.input().get_output_layout();
     const auto& output_layout   = arg.get_output_layout();
 
@@ -165,63 +161,61 @@ inline params_t get_default_params(const arg_t& arg, uint32_t split = 1)
 
     params.layerID = arg.id();
 
-    convert_fused_activation_func_params(arg, params);
+    convert_fused_activation_func_params(arg, params.activation);
 
     return params;
 }
 
 template <typename params_t, typename arg_t>
-inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split = 1)
+inline params_t get_weights_bias_default_params(const arg_t& arg, uint32_t split = 1, uint32_t groups = 1)
 {
     params_t params = get_default_params<params_t>(arg, split);
-
     const auto& weights_layout = arg.weights().get_output_layout();
-    params.weights = convert_weights_tensor(weights_layout);
+    if (groups == 1) {
+        params.weights = convert_weights_tensor(weights_layout);
+    }
+    else {
+        params.weights = convert_weights_tensor(layout(weights_layout.data_type, weights_layout.format,
+            { weights_layout.size.batch[0]/(int)groups, weights_layout.size.feature[0], weights_layout.size.spatial[0], weights_layout.size.spatial[1] }
+        ));
+    }
 
     if (arg.bias_term())
     {
         const auto& bias_layout = arg.bias().get_output_layout();
         // bias per output is not supported on cldnn
-        params.bias.push_back(convert_data_tensor(bias_layout).FlattenFeatureAndSpatials());
+        if (groups == 1) {
+            params.bias.push_back(convert_data_tensor(bias_layout).FlattenFeatureAndSpatials());        }
+        else {
+            params.bias.push_back(convert_data_tensor(
+                layout(
+                    bias_layout.data_type, bias_layout.format,
+                    { bias_layout.size.batch[0], bias_layout.size.feature[0], bias_layout.size.spatial[0]/(int)groups, bias_layout.size.spatial[1] }
+                )).FlattenFeatureAndSpatials()
+            );
+        }
     }
 
     return params;
 }
 
+void set_learning_params(const program_node& node, kernel_selector::training_params& params, bool use_momentum);
+
 template <typename params_t, typename arg_t>
 inline params_t get_default_learning_params(const arg_t& arg, uint32_t split = 1)
 {
 	params_t params = get_weights_bias_default_params<params_t>(arg, split);
-
-	const auto learning_params = arg.get_program().get_options().template get<build_option_type::learning_config>()->params;
-
-	if (arg.use_momentum())
-	{
-		params.use_momentum = true;
-	}
-
-	params.momentum_factor = learning_params.momentum;
-	params.weights_decay = learning_params.weights_decay;
-
+    set_learning_params(arg, params, arg.use_momentum());
 	return params;
 }
 
+void set_optional_params(const program_impl& program, kernel_selector::optional_params& params);
+
 template <typename optional_params_t>
 inline optional_params_t get_default_optional_params(const program_impl& program)
 {
     optional_params_t params;
-    
-    const auto& context = program.get_engine().get_context();
-
-    params.meaningfulKernelsNames       = context->get_configuration().meaningful_kernels_names;
-    params.allowStaticInputReordering   = program.get_options().get<build_option_type::optimize_data>()->enabled();
-    params.allowInputReordering         = false;
-    params.allowOutputReordering        = false;
-    
-    const auto& tuning_config = program.get_options().get<build_option_type::tuning_config>();
-    params.tuningParams.mode = to_tuning_mode(tuning_config->config.mode);
-    params.tuningParams.cacheFilePath = tuning_config->config.cache_file_path;
-
+    set_optional_params(program, params);
     return params;
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h b/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h
index 9530b783e..4d69dda12 100644
--- a/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/lstm_elt_inst.h
@@ -40,6 +40,7 @@ public:
         return clip_val; 
     }
     bool input_forget() const { return get_primitive()->input_forget; }
+    int32_t direction() const { return get_primitive()->direction; }
 };
 
 using lstm_elt_node = typed_program_node<lstm_elt>;
@@ -66,6 +67,7 @@ public:
         return clip_val;
     }
     bool input_forget() const { return argument.input_forget; }
+    uint32_t direction() const { return argument.direction; }
 };
 
 using lstm_elt_inst = typed_primitive_inst<lstm_elt>;
diff --git a/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h b/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h
index 14c449b2f..f03c4fb4d 100644
--- a/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/max_unpooling_inst.h
@@ -26,7 +26,7 @@ template <>
 struct typed_program_node<max_unpooling> : public typed_program_node_base<max_unpooling>
 {
     using parent = typed_program_node_base<max_unpooling>;
-
+    typed_program_node(const std::shared_ptr<max_unpooling> prim, program_impl& prog);
 public:
     using parent::parent;
     program_node& input() const { return get_dependency(0); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/memory_impl.h b/inference-engine/thirdparty/clDNN/src/include/memory_impl.h
index 515f6dc49..5c18b7bc8 100644
--- a/inference-engine/thirdparty/clDNN/src/include/memory_impl.h
+++ b/inference-engine/thirdparty/clDNN/src/include/memory_impl.h
@@ -27,10 +27,15 @@ namespace cldnn
 
 struct memory_impl : refcounted_obj<memory_impl>
 {
-    memory_impl(const engine_impl::ptr& engine, layout layout): _engine(engine), _layout(layout){}
+    memory_impl(const engine_impl::ptr& engine, layout layout, bool reused=false)
+        : _engine(engine)
+        , _layout(layout)
+        , _reused(reused)
+    {}
+
     virtual ~memory_impl()
     {
-        if (_engine != nullptr)
+        if (_engine != nullptr && !_reused) 
         {
             _engine->get_memory_pool().subtract_memory_used(_layout.bytes_count());
         }
@@ -45,6 +50,8 @@ struct memory_impl : refcounted_obj<memory_impl>
 protected:
     const engine_impl::ptr _engine;
     const layout _layout;
+private:
+    bool _reused;
 };
 
 struct simple_attached_memory : memory_impl
diff --git a/inference-engine/thirdparty/clDNN/src/include/memory_pool.h b/inference-engine/thirdparty/clDNN/src/include/memory_pool.h
index b5135ebb6..1e835f839 100644
--- a/inference-engine/thirdparty/clDNN/src/include/memory_pool.h
+++ b/inference-engine/thirdparty/clDNN/src/include/memory_pool.h
@@ -110,7 +110,7 @@ struct padded_pool_comparer
 class memory_pool
 {
     memory_pool();
-
+    
     refcounted_obj_ptr<memory_impl> alloc_memory(const layout& layout);
     static bool has_conflict(const memory_set&, const std::set<primitive_id>&, uint32_t);
 
@@ -122,7 +122,7 @@ class memory_pool
     uint64_t _max_peak_memory_used;
 public:
     memory_pool(engine_impl& engine);
-
+    ~memory_pool();
     refcounted_obj_ptr<memory_impl> get_memory(const layout& layout, const primitive_id& id, uint32_t network_id,  const std::set<primitive_id>& restrictions, bool reusable = true); // get from pool or create memory allocation
     refcounted_obj_ptr<memory_impl> get_memory(const layout& layout);
     refcounted_obj_ptr<memory_impl> get_from_non_padded_pool(const layout& layout, const primitive_id& id, uint32_t network_id, const std::set<primitive_id>&);
diff --git a/inference-engine/thirdparty/clDNN/src/include/meta_utils.h b/inference-engine/thirdparty/clDNN/src/include/meta_utils.h
index ad18786e8..de1c55aad 100644
--- a/inference-engine/thirdparty/clDNN/src/include/meta_utils.h
+++ b/inference-engine/thirdparty/clDNN/src/include/meta_utils.h
@@ -62,4 +62,4 @@ struct is_internal_primitive : public std::integral_constant<bool,
                                                     std::is_same<T, typename std::remove_cv<T>::type>::value> {};
 
 }
-}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/include/network_impl.h b/inference-engine/thirdparty/clDNN/src/include/network_impl.h
index 4874d37a5..b9b26e099 100644
--- a/inference-engine/thirdparty/clDNN/src/include/network_impl.h
+++ b/inference-engine/thirdparty/clDNN/src/include/network_impl.h
@@ -39,6 +39,7 @@ struct network_impl : public refcounted_obj<network_impl>
 public:
     network_impl(const program_impl& program, bool is_internal = false);
     network_impl(engine_impl& engine, const topology_impl& topo, const build_options& options = build_options(), bool is_internal = false);
+    network_impl(engine_impl& engine, const std::set<std::shared_ptr<program_node>>& nodes, const build_options & options, bool is_internal);
 
     const program_impl& get_program() const { return *_program; }
     engine_impl& get_engine() const { return _program->get_engine(); }
@@ -61,19 +62,19 @@ public:
     std::vector<primitive_id> get_all_primitive_ids() const;
     std::vector<primitive_id> get_all_primitive_org_ids() const;
     void execute(const std::vector<event_impl::ptr>& events);
-
+    void validate_primitives();
     // Implementation specific calls
     std::shared_ptr<primitive_inst> get_primitive(const primitive_id& id);
     std::string get_primitive_info(const primitive_id& id) const;
     const event_impl::ptr& get_primitive_event(const primitive_id& id) const { return _events.at(id); }
     std::vector<std::shared_ptr<primitive_inst>> get_primitives(const std::vector<primitive_id>& ids);
     std::vector<std::shared_ptr<primitive_inst>> get_primitives(const std::vector<program_node*>& nodes);
-    event_impl::ptr execute_primitive(const std::shared_ptr<primitive_inst>& primitive, const std::vector<event_impl::ptr>& events);
+    void execute_primitive(const std::shared_ptr<primitive_inst>& primitive, const std::vector<event_impl::ptr>& events);
     void allocate_primitives();
     void build_insts_deps();
     uint32_t get_id() const { return net_id; }
+    void build_exec_order();    
     bool is_internal() const { return _internal; }
-
 private:
     uint32_t net_id = 0; 
     const program_impl::cptr _program;
@@ -89,6 +90,10 @@ private:
     std::unordered_map<primitive_id, event_impl::ptr> _events;
 
     void allocate_primitive_instance(program_node const& node);
+    void add_to_exec_order(const primitive_id& id);
+    std::shared_ptr<primitive_inst> find_in_internal_networks(const primitive_id& id);
+    std::shared_ptr<primitive_inst> find_primitive(const primitive_id& id);
+    void check_names();
 };
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h b/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h
new file mode 100644
index 000000000..21157be2a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/one_hot_inst.h
@@ -0,0 +1,53 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+
+#include <api/CPP/one_hot.hpp>
+
+#include "primitive_inst.h"
+
+
+namespace cldnn
+{
+    template <>
+    struct typed_program_node<one_hot> : typed_program_node_base<one_hot>
+    {
+    private:
+        using parent = typed_program_node_base<one_hot>;
+
+    public:
+        using parent::parent;
+
+        typed_program_node(const std::shared_ptr<one_hot> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
+        program_node& input() const { return get_dependency(0); }
+    };
+
+    using one_hot_node = typed_program_node<one_hot>;
+
+
+    template <>
+    class typed_primitive_inst<one_hot> : public typed_primitive_inst_base<one_hot>
+    {
+        using parent = typed_primitive_inst_base<one_hot>;
+
+    public:
+        static layout calc_output_layout(one_hot_node const& node);
+        static std::string to_string(one_hot_node const& node);
+        typed_primitive_inst(network_impl& network, one_hot_node const& node);
+    };
+
+    using one_hot_inst = typed_primitive_inst<one_hot>;
+}
diff --git a/inference-engine/thirdparty/clDNN/src/include/pass_manager.h b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h
new file mode 100644
index 000000000..f295d1dcc
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/pass_manager.h
@@ -0,0 +1,276 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#pragma once
+
+#include "program_impl.h"
+#include "layout_optimizer.h"
+
+namespace cldnn
+{
+    class base_pass
+    {
+        friend class pass_manager;
+    public:
+        base_pass(const std::string& pass_name) : name(pass_name) {}
+        virtual void run(program_impl& p) = 0;
+        std::string get_name() { return name; }
+        void clean_marks(program_impl& p) {
+            for (auto& node : p.get_processing_order())
+            {
+                node->unmark();
+            }
+        }
+    private:
+        const std::string name;
+    };
+
+    class pass_manager
+    {
+    public:
+        pass_manager()
+        {
+            pass_count = 0;
+        }
+        void run(program_impl& p, base_pass& pass)
+        {
+            pass.run(p);
+            std::string dump_file_name;
+            if (pass_count < 10)
+                dump_file_name += "0";
+            dump_file_name += std::to_string(pass_count) + "_" + pass.get_name();
+            p.dump_program(dump_file_name.c_str(), true);
+            pass.clean_marks(p);
+            pass_count++;
+        }
+        uint32_t get_pass_count() { return pass_count; }
+        uint32_t inc_pass_count() { return ++pass_count; }
+        ~pass_manager() {}
+    private:
+        uint32_t pass_count;
+    };
+
+    class add_required_reorders : public base_pass
+    {
+    public:
+        add_required_reorders() : base_pass("add_required_reorders") {}
+    private:
+        virtual void run(program_impl& p) override;
+        void add_reorder(program_impl& p, program_node* node, program_node* usr, layout reorder_layout);
+    };
+
+    class add_reshape_to_primitives : public base_pass
+    {
+    public:
+        add_reshape_to_primitives() : base_pass("add_reshape_to_primitives_pass") {}
+    private:
+        virtual void run(program_impl& p) override;
+    };
+
+    class calculate_prior_boxes : public base_pass
+    {
+    public: 
+        calculate_prior_boxes() : base_pass("calculated_prior_boxes") {}
+    private:
+        virtual void run(program_impl& p) override;
+    };
+
+    class compile_graph: public base_pass
+    {
+    public:
+        compile_graph() : base_pass("compile_graph") {}
+    private:
+        virtual void run(program_impl& p) override;
+    };
+
+    class eltwise_shrinking : public base_pass
+    {
+    public:
+        eltwise_shrinking() : base_pass("eltwise_shrinking") {}
+    private:
+        virtual void run(program_impl& p) override;
+    };
+
+    class eltwise_remove_stride : public base_pass
+    {
+    public:
+        eltwise_remove_stride() : base_pass("eltwise_remove_stride") {}
+    private:
+        virtual void run(program_impl& p) override;
+        void conv_stride_extend(program_impl & p, program_node & node, cldnn::tensor & tensor);
+    };
+
+    class graph_initializations : public base_pass 
+    {
+    public:
+        graph_initializations() : base_pass("init") {}
+    private:
+        virtual void run(program_impl& p) override;
+        void replace_nodes(program_impl& p);
+        void handle_detection_output(program_impl& p);
+        void handle_lstm(program_impl& p);
+        void set_outputs(program_impl& p);  
+    };
+
+    class handle_input_padding : public base_pass
+    {
+    public:
+        handle_input_padding() : base_pass("handle_input_padding") {}
+    private:
+        virtual void run(program_impl& p) override;
+    };
+
+    class mark_nodes : public base_pass
+    {
+    public:
+        mark_nodes() : base_pass("analyzed_graph") {}
+    private:
+        virtual void run(program_impl& p) override;
+        void mark_constants(program_impl& p);
+        void mark_data_flow(program_impl& p);
+    };
+
+    class prepare_buffer_fusing : public base_pass
+    {
+    public:
+        prepare_buffer_fusing() : base_pass("prepare_buffer_fusing") {}
+    private:
+        virtual void run(program_impl& p) override;
+    };
+
+    class prepare_conv_eltw_fusing : public base_pass
+    {
+    public:
+        prepare_conv_eltw_fusing() : base_pass("prepare_conv_eltw_fusing") {}
+    private:
+        virtual void run(program_impl& p) override;
+        void fuse_conv_eltwise(program_impl& p, program_node* node);
+    };
+
+    class prepare_conv_eltw_read_write_opt : public base_pass
+    {
+    public:
+        prepare_conv_eltw_read_write_opt() : base_pass("prepare_conv_eltw_read_write_opt") {}
+    private:
+        virtual void run(program_impl& p) override;
+        void conv_eltwise_read_write_opt(program_impl& p, program_node* node);
+    };
+
+    class prepare_depthwise_sep_opt : public base_pass
+    {
+    public:
+        prepare_depthwise_sep_opt() : base_pass("prepare_depthwise_sep_opt") {}
+    private:
+        virtual void run(program_impl& p) override;
+        template <typename T> void optimize_depthwise_sep_pre(T& node);
+    };
+
+    class prep_opt_depthwise_sep_post : public base_pass
+    {
+    public:
+        prep_opt_depthwise_sep_post() : base_pass("prep_opt_depthwise_sep_post") {}
+    private:
+        virtual void run(program_impl& p) override;
+        template <typename T> void optimize_depthwise_sep_pre(program_impl& p, T& node);
+    };
+
+    class prepare_primitive_fusing : public base_pass
+    {
+    public:
+        prepare_primitive_fusing() : base_pass("prepare_primitive_fusing") {}
+    private:
+        virtual void run(program_impl& p) override;
+        void fuse_skip_layers(program_impl& p, program_node* node);
+        void fuse_conv_bn_scale(program_impl& p, program_node* node);
+    };
+
+    class pre_optimize_bias : public base_pass
+    {
+    public:
+        pre_optimize_bias(layout_optimizer& lo_ref);
+    private:
+        virtual void run(program_impl& p) override;
+        virtual void run(program_impl& p, layout_optimizer& lo);
+        template <typename T>
+        void optimize_bias(T& node, layout_optimizer& lo, program_impl& p);
+        layout_optimizer& _lo;
+    };
+
+    class prepare_padding : public base_pass
+    {
+    public:
+        prepare_padding(bool output_size_handling_enabled_switch) : base_pass("prepare_padding"),
+            output_size_handling_enabled(output_size_handling_enabled_switch) {}
+    private:
+        virtual void run(program_impl& p) override;
+        bool output_size_handling_enabled;
+    };
+
+    class post_optimize_weights : public base_pass
+    {
+    public:
+        post_optimize_weights(layout_optimizer& lo_ref);
+    private:
+        virtual void run(program_impl& p) override;
+        virtual void run(program_impl& p, layout_optimizer& lo);
+        template <typename T>
+        void optimize_weights(T& node, layout_optimizer& lo, program_impl& p);
+        layout_optimizer& _lo;
+    };
+
+    class propagate_constants : public base_pass
+    {
+    public:
+        propagate_constants() : base_pass("propagate_constants") {}
+    private:
+        virtual void run(program_impl& p) override;
+        std::list<std::pair<primitive_id, memory_impl::ptr>> calculate(engine_impl &engine);
+        bool has_non_const_user(program_node& node) const;
+        void handle_constant(program_impl& prog, program_node& node);
+        void add_constant(program_impl& prog, program_node& node);
+        void add_deps_to_tpl(program_impl& prog, const std::vector<program_node*>& node);
+
+        bool has_non_trivial_constants = false;
+        std::list<typed_program_node<data>*> const_inputs;
+        std::vector<primitive_id> const_outputs;
+        std::set<std::shared_ptr<program_node>> nodes;
+    };
+
+    class remove_redundant_reorders : public base_pass
+    {
+    public:
+        remove_redundant_reorders() : base_pass("remove_redundant_reorders") {}
+        virtual void run(program_impl& p) override;
+    };
+
+    class reorder_inputs : public base_pass
+    {
+    public:
+        reorder_inputs(layout_optimizer& lo_ref);
+    private:
+        virtual void run(program_impl& p) override;
+        virtual void run(program_impl& p, layout_optimizer& lo);
+        layout_optimizer& _lo;
+    };
+
+    class trim_to_outputs : public base_pass
+    {
+    public:
+        trim_to_outputs() : base_pass("trimmed") {}
+    private:
+        virtual void run(program_impl& p) override;
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/include/permute_inst.h b/inference-engine/thirdparty/clDNN/src/include/permute_inst.h
index bb76c9a16..e538eda87 100644
--- a/inference-engine/thirdparty/clDNN/src/include/permute_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/permute_inst.h
@@ -26,6 +26,7 @@ template <>
 struct typed_program_node<permute> : public typed_program_node_base<permute>
 {
     using parent = typed_program_node_base<permute>;
+    typed_program_node(const std::shared_ptr<permute> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
 
 public:
     using parent::parent;
diff --git a/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h b/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h
index 2956667b9..2bdc44482 100644
--- a/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/pooling_inst.h
@@ -26,6 +26,7 @@ template <>
 struct typed_program_node<pooling> : public typed_program_node_base<pooling>
 {
     using parent = typed_program_node_base<pooling>;
+    typed_program_node(const std::shared_ptr<pooling> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
 
 public:
     using parent::parent;
diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h b/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
index 563e6d163..0a7b9f347 100644
--- a/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/primitive_inst.h
@@ -21,7 +21,6 @@
 #include "api/CPP/concatenation.hpp"
 
 #include "event_impl.h"
-#include "program_impl.h"
 #include "memory_impl.h"
 #include "meta_utils.h"
 #include "kernel_selector_helper.h"
@@ -53,17 +52,18 @@ struct primitive_impl
     //   A special member function is user-provided if it is user-declared and not explicitly defaulted or deleted
     //   on its first declaration.
     primitive_impl() : _weights_reorder_params() {}
-    primitive_impl(const kernel_selector::weights_reorder_params& params, std::string kernel_name = "") : _weights_reorder_params(params), kernel_name(kernel_name) {}
+    primitive_impl(const kernel_selector::weights_reorder_params& params, std::string kernel_name = "") : _weights_reorder_params(params), _kernel_name(kernel_name) {}
     virtual ~primitive_impl() = default;
 
     virtual event_impl::ptr execute(const std::vector<event_impl::ptr>& events, primitive_inst& instance) = 0;
-
-	std::string get_kernel_name() { return kernel_name; };
-
+    virtual bool validate(const primitive_inst& instance) const = 0;
+	std::string get_kernel_name() const { return _kernel_name; };
     // TODO: added a derived class for weights reordering (maybe for all static data reordering)
     const kernel_selector::weights_reorder_params _weights_reorder_params;
+    // class typed_primitive_gpu_impl override this with return false;
+    virtual bool is_cpu() const { return true; }
 private:
-	std::string kernel_name;
+	std::string _kernel_name;
 };
 
 /*
@@ -92,12 +92,12 @@ public:
     primitive_id id() const { return _node.id(); }
     primitive_id org_id() const { return _node.get_org_primitive_id(); }
     bool can_be_optimized() const { return _node.can_be_optimized(); }
-    const std::shared_ptr<const primitive> desc() const { return _node.get_primitive(); }
+    std::shared_ptr<const primitive> desc() const { return _node.get_primitive(); }
     network_impl& get_network() const { return _network; }
     uint32_t get_network_id() const;
 
     //return pointer to const to prevent arbitrary 'execute' call -> use primitive_inst.execute() instead
-    const primitive_impl* get_impl() const { return _impl.get(); }
+    primitive_impl* get_impl() const { return _impl.get(); }
 
     memory_impl& input_memory(size_t index = 0)  const 
     { 
@@ -107,7 +107,7 @@ public:
     }
 
     event_impl::ptr execute(const std::vector<event_impl::ptr>& events);
-
+    bool validate() const { return _impl->validate(*this); }
     bool output_changed() const { return _output_changed; }
     void reset_output_change() { _output_changed = false; }
 
@@ -150,7 +150,8 @@ protected:
 
 /*
 Base class for all implementation of specified primitive type.
-For example, all convolution implementations should derive from typed_primitive_impl<convolution>.
+For example, all cpu convolution implementations should derive directly from typed_primitive_impl<convolution>.
+GPU implementations should derive from typed_primitive_gpu_impl<convolution>;
 */
 template <class PType>
 struct typed_primitive_impl : public primitive_impl
@@ -158,7 +159,6 @@ struct typed_primitive_impl : public primitive_impl
     static_assert(meta::is_primitive<PType>::value, "PType should be a non-const, non-volatile class derived from primitive");
 
     using primitive_impl::primitive_impl;
-
 private:
     event_impl::ptr execute(const std::vector<refcounted_obj_ptr<event_impl>>& event, primitive_inst& instance) override
     {
@@ -169,8 +169,23 @@ private:
 
         return execute_impl(event, reinterpret_cast<typed_primitive_inst<PType>&>(instance));
     }
-
     virtual event_impl::ptr execute_impl(const std::vector<event_impl::ptr>& event, typed_primitive_inst<PType>& instance) = 0;
+
+    virtual bool validate(const primitive_inst& instance) const override
+    {
+        if (instance.type() != PType::type_id())
+            throw std::invalid_argument("Implementation type does not match primitive type");
+        if (instance.get_impl() != this)
+            throw std::invalid_argument("Trying to validate primitive implementation with mismatching primitive instance");
+
+        return validate_impl(reinterpret_cast<const typed_primitive_inst<PType>&>(instance));
+    }
+    virtual bool validate_impl(const typed_primitive_inst<PType>&) const
+    {
+        return true;
+    }
+
+
 };
 
 namespace details
diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_type.h b/inference-engine/thirdparty/clDNN/src/include/primitive_type.h
index 2b19e4ac6..1347a4452 100644
--- a/inference-engine/thirdparty/clDNN/src/include/primitive_type.h
+++ b/inference-engine/thirdparty/clDNN/src/include/primitive_type.h
@@ -40,6 +40,8 @@ struct cldnn_primitive_type
     virtual std::shared_ptr<cldnn::program_node> create_node(cldnn::program_impl& program, const std::shared_ptr<cldnn::primitive> prim) const = 0;
     virtual std::shared_ptr<cldnn::primitive_inst> create_instance(cldnn::network_impl& network, const cldnn::program_node& node) const = 0;
     virtual std::unique_ptr<cldnn::primitive_impl> choose_impl(cldnn::engine_impl& engine, const cldnn::program_node& node) const = 0;    
+    virtual bool does_an_implementation_exist(cldnn::engine_impl& engine, const cldnn::program_node& node) const = 0;
+    virtual bool does_possible_implementation_exist(cldnn::engine_impl& engine, const cldnn::program_node& node) const = 0;
     virtual cldnn::layout calc_output_layout(const cldnn::program_node& node) const = 0;
     virtual std::string to_string(const cldnn::program_node& node) const = 0;
 
diff --git a/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h b/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h
index 2f4f74542..91a9dec8c 100644
--- a/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h
+++ b/inference-engine/thirdparty/clDNN/src/include/primitive_type_base.h
@@ -63,6 +63,20 @@ struct primitive_type_base : ::cldnn_primitive_type
         return engine.create_primitive_impl(node.as<PType>());
     }
 
+    bool does_an_implementation_exist(engine_impl& engine, const cldnn::program_node& node) const override
+    {
+        if (node.type() != this)
+            throw std::invalid_argument("primitive_type_base::choose_impl: primitive type mismatch");
+        return engine.does_an_implementation_exist(node.as<PType>());
+    }
+
+    bool does_possible_implementation_exist(engine_impl& engine, const cldnn::program_node& node) const override
+    {
+        if (node.type() != this)
+            throw std::invalid_argument("primitive_type_base::choose_impl: primitive type mismatch");
+        return engine.does_possible_implementation_exist(node.as<PType>());
+    }
+
     cldnn::layout calc_output_layout(const cldnn::program_node& node) const override
     {
         if (node.type() != this)
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h b/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h
index 0ee0e677c..2e61ffd35 100644
--- a/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_dump_graph.h
@@ -18,7 +18,7 @@
 
 #include "program_impl.h"
 #include "program_node.h"
-#include "data_inst.h"
+#include "gpu/ocl_toolkit.h"
 #include <fstream>
 
 namespace cldnn
@@ -30,7 +30,4 @@ namespace cldnn
     void dump_graph_processing_order(std::ofstream&, const program_impl&);
     void dump_graph_init(std::ofstream&, const program_impl&, std::function<bool(program_node const&)> const&);
     void dump_graph_info(std::ofstream&, const program_impl&, std::function<bool(program_node const&)> const&);
-    void dump_to_xml(std::ofstream& graph, const program_impl& program, std::function<bool(program_node const&)> const& filter, std::vector<unsigned long long>& offsets, std::vector<std::string>& data_names);
-    void dump_kernels(kernels_binaries_container program_binaries, std::vector<unsigned long long>& offsets, std::vector<std::string>& data_names, std::ofstream& file_stream);
-    void dump_data(memory_impl& mem, std::ofstream& stream, unsigned long long& total_offset, unsigned long long type);
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_helpers.h b/inference-engine/thirdparty/clDNN/src/include/program_helpers.h
new file mode 100644
index 000000000..1c7cb1eeb
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/program_helpers.h
@@ -0,0 +1,114 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+#include "program_node.h"
+#include "engine_impl.h"
+#include "program_impl.h"
+
+namespace cldnn
+{
+    struct program_helpers
+    {
+        //helper function which creates single-element array if it's given anything
+        //other than std::vector.
+        //It should be used in generic code when there's a need to force vector usage
+        //in foreach loop over variable which can in one context be a vector or a scalar
+        //in another.
+        //example:
+        // T t;
+        // for (auto& string : wrap_if_single(t.dump()))
+        //depending on type T, t.dump() may return either std::string or std::vector<std::string>,
+        //to ensure compatibility between these cases, wrap_if_single will create single-element
+        //container in case t.dump() would return plain std::string.
+        //
+        // T& case -> returns container which holds T&
+        template <class T>
+        static program_impl::single_element_container<T> wrap_if_single(T& t)
+        {
+            return program_impl::single_element_container<T>(t);
+        }
+
+        //helper function which creates single-element array if it's given anything
+        //other than std::vector.
+        // T const& case -> returns container which holds T const&
+        template <class T>
+        static program_impl::single_element_container<T const> wrap_if_single(T const& t)
+        {
+            return program_impl::single_element_container<T const>(t);
+        }
+
+        //helper function which creates single-element array if it's given anything
+        //other than std::vector.
+        // T&& case -> returns container which holds new instance of T created by moving given param
+        template <class T>
+        static program_impl::single_element_container<T> wrap_if_single(T&& t)
+        {
+            static_assert(meta::always_false<T>::value, "Wrapping temporary object into single_element_container is an error (requires valid reference)");
+            return program_impl::single_element_container<T>(t);
+        }
+
+        //helper function which creates single-element array if it's given anything
+        //other than std::vector.
+        // std::vector case -> does not wrap, returns t as-is
+        static const primitive::fixed_size_vector_ref& wrap_if_single(primitive::fixed_size_vector_ref const& t)
+        {
+            return t;
+        }
+
+        //helper function for selecting function basing on the type of the given primitive
+        //this is the termination case for parameter pack recurrence, see overload below for logic
+        template <class... T>
+        static void do_for_types(program_node&)
+        {
+            return;
+        }
+
+        //helper function for selecting function basing on the type of the given primitive
+        //this function should be explicitly given set of types and implicitly set of functions.
+        //both sets should have equal size. First function will be called if type of the given primitive
+        //will match first explicitly given type, second will be called if it matches second explicitly given
+        //type etc.
+        //Functions given as arguments should themselves take std::shared_ptr<const T> as argument
+        //where T is the type that should be match if this function should be called
+        //
+        //example:
+        // do_for_types<
+        //      convolution,
+        //      pooling
+        //  >(primitive,
+        //      [](typed_program_node<convolution>&){ do something if 'primitive' is a convolution },
+        //      [](typed_program_node<pooling>&)    { do something if 'primitive' is a pooling }
+        //  );
+        template <class T, class... RestOfT, class Func, class... RestOfFuncs>
+        static decltype(static_cast<void>(std::declval<Func>()(std::declval<typed_program_node<T>&>()))) do_for_types(
+            program_node& node,
+            Func const& func,
+            RestOfFuncs const&... rest)
+        {
+            if (node.type() == T::type_id())
+                func(node.as<T>());
+            else
+                do_for_types<RestOfT...>(node, rest...);
+        }
+        static void merge_buffers(engine_impl &engine, program_node &node, layout target_layout, size_t begin_offset, size_t end_offset);
+        static layout get_weights_layout(typed_program_node<cldnn::data> &data_node, int32_t split);
+        static std::pair<bool, bool> are_layouts_identical(layout const& l1, layout const& l2);
+    };
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_impl.h b/inference-engine/thirdparty/clDNN/src/include/program_impl.h
index c3cb67329..c518d9ca0 100644
--- a/inference-engine/thirdparty/clDNN/src/include/program_impl.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_impl.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -15,136 +15,206 @@
 */
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
+
 #pragma once
+
 #include "api/CPP/program.hpp"
 
 #include "refcounted_obj.h"
-#include "topology_impl.h"
 #include "engine_impl.h"
-#include "program_node.h"
-#include "memory_impl.h"
 
 #include <list>
-#include <algorithm>
 
 namespace cldnn
 {
 
+struct topology_impl;
 struct primitive_impl;
+struct program_node;
 class layout_optimizer;
-class constants_propagator;
-
+class pass_manager;
+class program_impl_wrapper;
 /*
     cldnn_program implementation
 */
 struct program_impl : public refcounted_obj<program_impl>
 {
-    friend struct program_node;
-
+    friend class calculate_prior_boxes;             // to be removed when possible
+    friend class graph_initializations;             // to be removed when possible
+    friend class prepare_padding;                   // to be removed when possible
+    friend class propagate_constants;               // to be removed when possible
+    friend class prepare_primitive_fusing;          // to be removed when possible
+    friend class prepare_conv_eltw_fusing;          // to be removed when possible
+    friend class prepare_conv_eltw_read_write_opt;  // to be removed when possible
+    friend class reorder_inputs;                    // to be removed when possible
+    friend class program_impl_wrapper;              // this class is intended to extend the interface of program_impl for 
+                                                    // the usage within tests_core_internal project only
 public:
-    program_impl(engine_impl& engine_ref, topology_impl const& topology, build_options const& options, bool is_internal);
-
-    void dump_memory_pool() const;
-
-    engine_impl& get_engine() const { return *engine; }
-    build_options get_options() const { return options; }
-    bool is_debug_build() const { return options.get<build_option_type::debug>()->enabled(); }
-
-    std::list<std::shared_ptr<program_node>> get_nodes() const;
-    std::list<program_node*> get_processing_order() const { return processing_order; }
-    std::list<primitive_id> get_optimized_out() const { return optimized_out; }
-    program_node& get_node(primitive_id const& id)
+    struct nodes_ordering
     {
-        try 
+    public:
+        typedef std::list<program_node*> list_of_nodes;
+        typedef list_of_nodes::const_iterator const_iterator;
+        typedef list_of_nodes::iterator node_iterator;
+        const_iterator begin() const { return _processing_order.begin(); }
+        const_iterator end() const { return _processing_order.end(); }
+
+        void calc_processing_order_visit(program_node* node);
+        void calc_processing_order(program_impl& p);
+        int32_t get_processing_number(program_node* node) const { return get_processing_number(get_processing_iterator(*node)); }
+        // int32_t get_processing_number(const_iterator iter) const { return 1+(int32_t)std::distance(begin(), iter); }
+        int32_t get_processing_number(node_iterator iter) const { return 1 + (int32_t)std::distance(_processing_order.begin(), const_iterator(iter)); }
+        void calculate_BFS_processing_order();
+        size_t size() { return _processing_order.size(); }
+        bool is_correct(program_node* node);
+        
+        node_iterator get_processing_iterator(program_node& node) const
         {
-            return *nodes_map.at(id);
+            return processing_order_iterators.at(&node);
         }
-        catch (...)
+        void clear()
         {
-            throw std::runtime_error("Program doesn't contain primtive node: " + id);
+            processing_order_iterators.clear();
+            _processing_order.clear();
         }
-    }
 
-    bool has_node(const primitive_id& prim) const
-    {
-        return nodes_map.count(prim) > 0;
-    }
+        void insert(program_node* key_node, program_node* node)
+        {
+            node_iterator _where = processing_order_iterators.at(key_node);
+            processing_order_iterators[node] = _processing_order.insert(_where, node);
+        }
 
-    program_node const& get_node(primitive_id const& id) const
-    {
-        try
+        void insert_next(program_node* key_node, program_node* node)
         {
-            return *nodes_map.at(id);
+            node_iterator _where = std::next(processing_order_iterators.at(key_node));
+            processing_order_iterators[node] = _processing_order.insert(_where, node);
         }
-        catch (...)
+
+        void erase(program_node* key_node)
         {
-            throw std::runtime_error("Program doesn't contain primtive node: " + id);
+            node_iterator i = processing_order_iterators.at(key_node);
+            processing_order_iterators.erase(key_node);
+            _processing_order.erase(i);
         }
-    }
+
+    private:
+        list_of_nodes _processing_order;
+        std::map<program_node*, node_iterator> processing_order_iterators;
+    };
+
+    template <class T>
+    struct single_element_container
+    {
+        single_element_container(T& t) : elem(&t)
+        {}
+        constexpr size_t size() const { return 1; }
+        single_element_container begin() const { return single_element_container(elem); }
+        single_element_container end() const { return single_element_container(nullptr); }
+        single_element_container& operator ++() { elem = nullptr; return *this; }
+        bool operator !=(single_element_container const& sec) { return elem != sec.elem; }
+
+       T operator *() { return *elem; }
+
+    private:
+        single_element_container(T* t) : elem(t)
+        {}
+
+        T* elem;
+    };
+    program_impl(engine_impl& engine_ref, topology_impl const& topology, build_options const& options, bool is_internal, bool no_optimizations=false);
+    /* constructor used to build a program from subset of nodes of other program (used in propagate_constants) */
+    program_impl(engine_impl& engine_ref, std::set<std::shared_ptr<program_node>> const &nodes, build_options const& options, bool is_internal);
+    ~program_impl();
+    engine_impl& get_engine() const { return *engine; }
+    const build_options& get_options() const { return options; }
+    std::list<program_node*>& get_inputs() { return inputs; }     // ToDo: redesign trim to ouptut pass to make it const as_well as get_engine and get options 
+    std::vector<program_node*>& get_outputs() { return outputs; }  // ToDo: redesign reorder-inputs pass to make it const as_well as get_engine and get options 
+    bool is_debug_build() const { return options.get<build_option_type::debug>()->enabled(); }
+    const nodes_ordering& get_processing_order() const;
+    nodes_ordering& get_processing_order();
+    const std::list<primitive_id>& get_optimized_out() const { return optimized_out; }
+    bool has_node(const primitive_id& prim) const { return nodes_map.count(prim) > 0; }
+    program_node& get_node(primitive_id const& id);
+    program_node const& get_node(primitive_id const& id) const;
+    std::shared_ptr<program_node> get_node_ptr(const primitive_id& prim) { return nodes_map.at(prim);  }
+    std::shared_ptr<program_node> get_node_ptr(const primitive_id& prim) const { return nodes_map.at(prim); }
+    void dump_memory_pool() const;
+
+    //returns already existing program_node for given primitive 'prim' (lookup in 'nodes_map')
+    //if it was previously created, otherwise creates and then returns program_node
+    program_node& get_or_create(std::shared_ptr<primitive> prim);
+
+    // Inserts given program_node 'node' as an intermediate node between 'next' and it's
+    //  dependency at 'prev_idx' index.
+    void add_intermediate(program_node& node, program_node& next, size_t prev_idx,
+                          bool connect_int_node_with_old_dep = true,
+                          bool move_usrs_of_prev_to_node = false);
+
+    // Gets or creates program_node for given primitive 'prim' and inserts it as an intermediate
+    // node between 'next' and it's dependency at 'prev_idx' index.
+    void add_intermediate(std::shared_ptr<primitive> prim, program_node& next, size_t prev_idx, 
+        bool connect_int_node_with_old_dep = true,
+        bool move_usrs_of_prev_to_node = false);
+
+    //removes a node from the graph and deletes it afterwards,
+    //prereq: node cannot be marked as output and has to have exactly one dependency
+    //returns if 'node' has been extracted and removed successfully
+    bool extract_and_remove(program_node& node);
+
+    //returns if 'node' has been removed
+    bool remove_if_dangling(program_node& node);
+
+    void mark_if_constant(program_node& node);
+    // mark if the node is in data flow assuming that all dependencies are marked properly
+    void mark_if_data_flow(program_node& node);
+    //Reverses connection - user becomes dependency.
+
+    void remove_nodes(std::list<program_node*>& to_remove);
+    void dump_program(const char* stage, bool with_full_info, std::function<bool(program_node const&)> const& filter = nullptr) const;
 
 private:
     uint32_t prog_id = 0;
-
     engine_impl::ptr engine;
     build_options options;
-
     std::list<program_node*> inputs;
     std::vector<program_node*> outputs;
-    std::list<program_node*> processing_order;
+    nodes_ordering processing_order;
+    std::unique_ptr<pass_manager> pm;
 
     std::map<primitive_id, std::shared_ptr<program_node>> nodes_map;
-
     std::list<primitive_id> optimized_out;
 
-    // TODO: Remove once we will get full support for input/output padding in all primitive implementations.
-    bool output_size_handling_enabled;
-
     /*
     ** High-level functions, in order of usage
     */
-    void init_graph(topology_impl const& topology);
-    void pre_optimize_graph();
-    void post_optimize_graph();
-    void compile_graph();
+    /* build nodes internal structure based on topology */
+    void prepare_nodes(topology_impl const& topology);
+    /* build nodes internal structure based on the subset of nodes of other program  (used in propagate_constants) */
+    void prepare_nodes(std::set<std::shared_ptr<program_node>> const& nodes);
+    void add_node_dependencies(program_node* node_ptr);
+    void copy_node_dependencies(program_node* dest, program_node* src);
+    void build_program(bool is_internal);
+    void init_graph();
+    void set_options();
+
+    void run_graph_compilation();
+    void pre_optimize_graph(bool is_internal);
+    void post_optimize_graph(bool is_internal);
     void cleanup();
 
     /*
-    ** Initialization functions
-    */
-    void set_outputs();
-    void calc_processing_order();
-    void calc_prior_boxes();
-
-    /*
     ** Analysis functions
     */
-    void mark_constants();
-    void mark_data_flow();
     // TODO: Remove once we will get full support for input/output padding in all primitive implementations.
-    void analyze_output_size_handling_need();
-    void replace_nodes_pre();
-    void replace_nodes_post();
-	void handle_lstm();
+    bool analyze_output_size_handling_need();
+
+    // handle split, deconvolution and upsampling
     void handle_reshape();
 
     /*
     ** Optimization functions
     */
-    void trim_to_outputs();
-    void remove_redundant_reorders();
-    void calculate_BFS_processing_order();
-    void reorder_inputs(layout_optimizer& lo);
-    void pre_optimize_bias(layout_optimizer& lo);
-    void post_optimize_weights(layout_optimizer& lo);
     void apply_needed_padding(program_node& node, program_node& prev_node, const padding& needed_padding);
-    void prepare_padding();
-    void propagate_constants();
-    void prepare_buffer_fusing();
-    void fuse_skip_layers(program_node* node);
-    void prepare_primitive_fusing();
-    void prepare_depthwise_sep_opt();
-    void prep_opt_depthwise_sep_post();
-    void update_processing_numbers();
 
     /*
     ** Memory pool functions
@@ -158,57 +228,15 @@ private:
     /*
     ** Utilities
     */
+    void add_split_outputs();
+    // mark if the node is constant assuming that all dependencies are marked properly
+    void reverse_connection(program_node& dep_node, program_node& user_node);
 
-    //returns already existing program_node for given primitive 'prim' (lookup in 'nodes_map')
-    //if it was previously created, otherwise creates and then returns program_node
-    program_node& get_or_create(std::shared_ptr<primitive> prim);
-
-    // Inserts given program_node 'node' as an intermediate node between 'next' and it's
-    //  dependency at 'prev_idx' index.
-    void add_intermediate(program_node& node, program_node& next, size_t prev_idx, bool connect_int_node_with_old_dep = true);
-
-    // Gets or creates program_node for given primitive 'prim' and inserts it as an intermediate
-    // node between 'next' and it's dependency at 'prev_idx' index.
-    void add_intermediate(std::shared_ptr<primitive> prim, program_node& next, size_t prev_idx, bool connect_int_node_with_old_dep = true)
-    {
-        add_intermediate(get_or_create(prim), next, prev_idx, connect_int_node_with_old_dep);
-    }
-
-    void add_connection(program_node& prev, program_node& next)
-    {
-        prev.users.push_back(&next);
-        next.dependencies.push_back(&prev);
-    }
+    void add_connection(program_node& prev, program_node& next);
 
-    void remove_connection(program_node& prev, program_node& next)
-    {
-        prev.users.remove(&next);
-        next.dependencies.erase(std::remove(next.dependencies.begin(), next.dependencies.end(), &prev), next.dependencies.end());
-    }
-
-    void remove_all_connections(program_node& node) {
-        // since the graph is not topological sorted, we need to remove the node from both dependencies and users
-        for (auto &e : node.users) {
-            e->dependencies.erase(std::remove(e->dependencies.begin(), e->dependencies.end(), &node), e->dependencies.end());
-        }
-        for(auto &e : node.dependencies) {
-            e->users.remove(&node);
-        }
-        node.dependencies.clear();
-		node.users.clear();
-    }
+    void remove_connection(program_node& prev, program_node& next);
 
-    bool processing_order_is_correct(program_node* node)
-    {
-        for (auto& dep : node->get_dependencies())
-        {
-            if (node->processing_num < dep->processing_num)
-            {
-                return false;
-            }
-        }
-        return true;
-    }
+    void remove_all_connections(program_node& node);
 
     void rename(program_node & node, primitive_id const & new_id);
     void swap_names(program_node& node1, program_node& node2);
@@ -216,37 +244,9 @@ private:
 
     //old_node - node which will be replaced
     //new_node - node which will replace the old one
-    //replace_whole_branch - if set to true, 'old_node' will be replaced with all its dependencies and new_node will retain its dependencies
-    //  old's dependencies which are post-dominates by 'old_node' will also be removed
-    void replace(program_node& old_node, program_node& new_node, bool replace_whole_branch, bool check_output_layouts_integrity = true);
-
-    //returns if 'node' has been removed
-    bool remove_if_dangling(program_node& node, bool detach_whole_branch = false);
-
-    //removes a node from the graph and deletes it afterwards,
-    //prereq: node cannot be marked as output and has to have exactly one dependency
-    //returns if 'node' has been extracted and removed successfully
-    bool extract_and_remove(program_node& node);
-    void replace_data_with_optimized(std::map<primitive_id, memory_impl::ptr> const& replace_map);
-    void dump_program(const char* stage, bool with_full_info, std::function<bool(program_node const&)> const& filter = nullptr) const;
-    //Dumps weights and biasses in serialization process, not working yet, in progress.
-    void dump_weights_and_biasses(std::vector<unsigned long long>& offsets, std::vector<std::string>& data_names, std::ofstream& file_stream) const;
-    //Makes serialization with given name.
-    //Placeholder, not working yet, in progress.
-    void serialize(std::string network_name, std::function<bool(program_node const&)> const& filter = nullptr) const;
-
-    template <typename T>
-    void optimize_bias(T& node, layout_optimizer& lo);
-
-    template <typename T>
-    void optimize_weights(T& node, layout_optimizer& lo);
-
-    template <typename T>
-    void optimize_depthwise_sep_pre(T& node);
-
-    template <typename T>
-    void optimize_depthwise_sep_post(T& node);
+    void replace(program_node& old_node, program_node& new_node);
 };
+
 }
 
 API_CAST(::cldnn_program, cldnn::program_impl)
diff --git a/inference-engine/thirdparty/clDNN/src/include/program_node.h b/inference-engine/thirdparty/clDNN/src/include/program_node.h
index e9df77a7d..42cec074b 100644
--- a/inference-engine/thirdparty/clDNN/src/include/program_node.h
+++ b/inference-engine/thirdparty/clDNN/src/include/program_node.h
@@ -22,12 +22,12 @@
 
 #include "meta_utils.h"
 
-#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-
 namespace cldnn
 {
 
 struct program_impl;
+class reorder_inputs;
+class graph_initializations;
 
 template <class T>
 struct typed_program_node;
@@ -51,8 +51,14 @@ class xml_composite;
 */
 struct program_node
 {
-    friend struct program_impl;
-    friend class constants_propagator;
+    friend struct program_impl;                     // to be removed when possible
+    friend class compile_graph;                     // to be removed when possible
+    friend class graph_initializations;             // to be removed when possible
+    friend class prepare_primitive_fusing;          // to be removed when possible
+    friend class prepare_conv_eltw_fusing;          // to be removed when possible
+    friend class prepare_conv_eltw_read_write_opt;  // to be removed when possible
+    friend class propagate_constants;               // to be removed when possible
+    friend class post_optimize_weights;             // to be removed when possible - requires an access to selected_impl
 
     template <class PType>
     friend struct typed_program_node;
@@ -82,10 +88,10 @@ public:
     std::vector<program_node*> const& get_dependencies() const { return dependencies; }
     program_node& get_dependency(size_t idx) const { return *dependencies.at(idx); }
 
-    //replaces idx-th dependency of 'this' with 'new_dep', calls program::remove_if_dangling(old_dep, detach_whole_branch)
-    void replace_dependency(size_t idx, program_node& new_dep, bool detach_whole_branch = false);
-    //searches for 'old_dep' in dependencies list of 'this' and replaces it with 'new_dep', calls program::remove_if_dangling(old_dep, detach_whole_branch)
-    void replace_dependency(program_node const& old_dep, program_node& new_dep, bool detach_whole_branch = false);
+    //replaces idx-th dependency of 'this' with 'new_dep', calls program::remove_if_dangling(old_dep)
+    void replace_dependency(size_t idx, program_node& new_dep);
+    //searches for 'old_dep' in dependencies list of 'this' and replaces it with 'new_dep', calls program::remove_if_dangling(old_dep)
+    void replace_dependency(program_node const& old_dep, program_node& new_dep);
 
     std::vector<primitive_id> get_dependencies_ids() const;
 
@@ -113,8 +119,7 @@ public:
     std::list<const program_node*> const& get_users() const { return reinterpret_cast<const std::list<const program_node*>&>(users); }
 
     std::unique_ptr<json_composite> desc_to_json() const;
-	std::unique_ptr<xml_composite> desc_to_xml() const;
-    //do not modify primitive directly to keep synchronisation wit graph
+    //do not modify primitive directly to keep synchronisation with graph
     std::shared_ptr<const primitive> get_primitive() const { return desc; }
     //primitive modification functions
     void set_output_padding(padding const& padd)
@@ -132,7 +137,7 @@ public:
     //only calculated output layout (for external usage), does not modify/use cached output layout nor invalidate users
     layout calc_output_layout() const;
 
-    //uses cached output layout if vlid, if not calls 'calc_output_layout' and stores its result + invalidate all users if layout has changed and @p invalidate_users_if_changed is set to true
+    //uses cached output layout if valid, if not calls 'calc_output_layout' and stores its result + invalidate all users if layout has changed and @p invalidate_users_if_changed is set to true
     layout get_output_layout(bool invalidate_users_if_changed = true);
     //returns cached output layout if valid, otherwise throws an exception
     layout get_output_layout() const;
@@ -159,7 +164,6 @@ public:
     bool is_output() const { return output; }
 
     bool is_valid_output_layout() const { return valid_output_layout; }
-    uint32_t get_processing_num() const { return processing_num; }
 
     uint8_t mark(uint8_t val = 1) { uint8_t ret = user_mark; user_mark = val; return ret; }
     void unmark() { user_mark = 0; }
@@ -183,19 +187,25 @@ public:
         return fused_activation.additional_params;
     }
 
+    // check/set if the node can be optimized out (removed from the network)
     bool can_be_optimized() const { return optimized; }
     void can_be_optimized(bool opt) { optimized = opt; }
 
+    // check/set if the node's buffer can be shared during the memory pool optimization
+    bool can_share_buffer() const { return share_buffer; }
+    void can_share_buffer(bool share) { share_buffer = share; }
+
+    // check/set if the node support padding in x,y,b and f
+    bool support_padding() const { return _support_padding; }
+    void support_padding(bool support) { _support_padding = support; }
+
     primitive_id get_org_primitive_id() const { return org_id; }
-    void set_org_primitive_id(primitive_id org_prim_id) 
-    {
-        org_id = org_prim_id;
-    }
 
     bool is_constant() const { return constant; }
-    bool has_non_const_user() const { return (!constant || constant_frontier); }
-    //returns true if this node is within main data flow of the network (i.e. it does not describe helper data like convolution's weights etc.)
+    
+    // returns true if this node is within main data flow of the network (i.e. it does not describe helper data like convolution's weights etc.)
     bool is_in_data_flow() const { return data_flow; }
+
     //conversion from generic to specific
     template <class To, class..., class = typename std::enable_if<!std::is_same<To, primitive>::value>::type>
     typed_program_node<To>& as()
@@ -248,28 +258,22 @@ protected:
     std::vector<program_node*> dependencies;
     std::list<program_node*> users;
 
-#if defined(__GNUC__) && (GCC_VERSION < 40900)
-    std::list<program_node*>::iterator processing_itr;
-#else
-    std::list<program_node*>::const_iterator processing_itr;
-#endif
-    uint32_t processing_num = 0;
-
     // list of primitives that can reuse same memory buffers due to execution order conflicts
-    std::set<primitive_id> memory_dependencies;  
+    std::set<primitive_id> memory_dependencies;
 
     bool constant = false;
-    bool constant_frontier = false;
     bool data_flow = false;
 
     bool output = false;
     uint8_t user_mark = 0;
     bool optimized = false;
+    bool share_buffer = true;
+    bool _support_padding = false;
 
     mutable bool has_reused_memory = false;
     mutable uint32_t reused_memory_color = 0;
 
-    primitive_id org_id = "";
+    const primitive_id org_id;
 
     struct fused_activation_params
     {
@@ -288,8 +292,9 @@ namespace details
     struct api_typed_program_node_base : public program_node
     {
         static_assert(meta::is_api_primitive<PType>::value, "PType should name a non-const, non-volatile type derived from cldnn::primitive but not from cldnn::internal_primitive");
+        friend class cldnn::graph_initializations;
         friend struct cldnn::program_impl;
-
+        friend class cldnn::reorder_inputs;
     public:
         using program_node::program_node;
 
@@ -369,4 +374,4 @@ struct typed_program_node : public typed_program_node_base<PType>
     program_node& input() const { return program_node::get_dependency(0); }
 };
 
-}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h b/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h
new file mode 100644
index 000000000..f87b3f48d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/pyramid_roi_align_inst.h
@@ -0,0 +1,64 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "api/CPP/pyramid_roi_align.hpp"
+#include "primitive_inst.h"
+
+#include <memory>
+
+namespace cldnn {
+    template <>
+    struct typed_program_node<pyramid_roi_align> : public typed_program_node_base<pyramid_roi_align>
+    {
+        using parent = typed_program_node_base<pyramid_roi_align>;
+
+    public:
+        typed_program_node(std::shared_ptr<primitive> prim, program_impl& prog)
+             : parent(prim, prog)
+            {}
+
+        program_node& input() const { return get_dependency(0); }
+        program_node& boxes() const { return get_dependency(0); }
+        program_node& image_meta() const { return get_dependency(1); }
+        program_node& P2() const { return get_dependency(2); }
+        program_node& P3() const { return get_dependency(3); }
+        program_node& P4() const { return get_dependency(4); }
+        program_node& P5() const { return get_dependency(5); }
+        program_node& pool_size() const { return get_dependency(6); }
+    };
+
+    using pyramidROIAlign_node = typed_program_node<pyramid_roi_align>;
+
+    template <>
+    class typed_primitive_inst<pyramid_roi_align> : public typed_primitive_inst_base<pyramid_roi_align>
+    {
+        using parent = typed_primitive_inst_base<pyramid_roi_align>;
+
+    public:
+        static layout calc_output_layout(pyramidROIAlign_node const& node);
+        static std::string to_string(pyramidROIAlign_node const& node);
+        typed_primitive_inst(network_impl& network, pyramidROIAlign_node const& node);
+
+        memory_impl& boxes() const { return dep_memory(0); }
+        memory_impl& image_meta() const { return dep_memory(1); }
+        memory_impl& P2() const { return dep_memory(2); }
+        memory_impl& P3() const { return dep_memory(3); }
+        memory_impl& P4() const { return dep_memory(4); }
+        memory_impl& P5() const { return dep_memory(5); }
+        memory_impl& pool_size() const { return dep_memory(6); }
+    };
+
+    using pyramid_roi_align_inst = typed_primitive_inst<pyramid_roi_align>;
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h b/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h
index 1ac9fdfcb..a97153afe 100644
--- a/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/reshape_inst.h
@@ -26,6 +26,7 @@ template <>
 struct typed_program_node<reshape> : public typed_program_node_base<reshape>
 {
     using parent = typed_program_node_base<reshape>;
+    typed_program_node(const std::shared_ptr<reshape> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
 
 public:
     using parent::parent;
diff --git a/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h b/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h
new file mode 100644
index 000000000..ac02b8ea6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/reverse_sequence_inst.h
@@ -0,0 +1,51 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "api/CPP/reverse_sequence.hpp"
+#include "primitive_inst.h"
+
+namespace  cldnn
+{
+    template <>
+    struct typed_program_node<reverse_sequence> : public typed_program_node_base<reverse_sequence>
+    {
+        using parent = typed_program_node_base<reverse_sequence>;
+
+    public:
+        using parent::parent;
+
+        program_node& input(size_t index = 0) const { return get_dependency(index); }
+    };
+
+    using reverse_sequence_node = typed_program_node<reverse_sequence>;
+
+    template <>
+    class typed_primitive_inst<reverse_sequence> : public typed_primitive_inst_base<reverse_sequence>
+    {
+        using parent = typed_primitive_inst_base<reverse_sequence>;
+
+    public:
+        static layout calc_output_layout(reverse_sequence_node const& node);
+        static std::string to_string(reverse_sequence_node const& node);
+
+    public:
+        typed_primitive_inst(network_impl& network, reverse_sequence_node const& desc);
+    };
+
+    using reverse_sequence_inst = typed_primitive_inst<reverse_sequence>;
+}
diff --git a/inference-engine/thirdparty/clDNN/src/include/scale_inst.h b/inference-engine/thirdparty/clDNN/src/include/scale_inst.h
index 405507ace..b239ef16a 100644
--- a/inference-engine/thirdparty/clDNN/src/include/scale_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/scale_inst.h
@@ -25,11 +25,13 @@ namespace cldnn
 template <>
 struct typed_program_node<scale> : public typed_program_node_base<scale>
 {
+private:
     using parent = typed_program_node_base<scale>;
 
 public:
     using parent::parent;
 
+    typed_program_node(const std::shared_ptr<scale> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
     program_node& input() const { return get_dependency(0); }
     program_node& scale_in() const { return get_dependency(1); }
     program_node& bias() const { return get_dependency(2); }
diff --git a/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h b/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h
new file mode 100644
index 000000000..5a633a525
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/shuffle_channels_inst.h
@@ -0,0 +1,51 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "api/CPP/shuffle_channels.hpp"
+#include "primitive_inst.h"
+
+namespace  cldnn
+{
+template <>
+struct typed_program_node<shuffle_channels> : public typed_program_node_base<shuffle_channels>
+{
+    using parent = typed_program_node_base<shuffle_channels>;
+
+public:
+    using parent::parent;
+
+    program_node& input(size_t index = 0) const { return get_dependency(index); }
+};
+
+using shuffle_channels_node = typed_program_node<shuffle_channels>;
+
+template <>
+class typed_primitive_inst<shuffle_channels> : public typed_primitive_inst_base<shuffle_channels>
+{
+    using parent = typed_primitive_inst_base<shuffle_channels>;
+
+public:
+    static layout calc_output_layout(shuffle_channels_node const& node);
+    static std::string to_string(shuffle_channels_node const& node);
+
+public:
+    typed_primitive_inst(network_impl& network, shuffle_channels_node const& desc);
+};
+
+using shuffle_channels_inst = typed_primitive_inst<shuffle_channels>;
+}
diff --git a/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h b/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h
new file mode 100644
index 000000000..a12e536b4
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/include/strided_slice_inst.h
@@ -0,0 +1,51 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma once
+#include "api/CPP/strided_slice.hpp"
+#include "primitive_inst.h"
+
+namespace  cldnn
+{
+template <>
+struct typed_program_node<strided_slice> : public typed_program_node_base<strided_slice>
+{
+    using parent = typed_program_node_base<strided_slice>;
+
+public:
+    using parent::parent;
+
+    program_node& input(size_t index = 0) const { return get_dependency(index); }
+};
+
+using strided_slice_node = typed_program_node<strided_slice>;
+
+template <>
+class typed_primitive_inst<strided_slice> : public typed_primitive_inst_base<strided_slice>
+{
+    using parent = typed_primitive_inst_base<strided_slice>;
+
+public:
+    static layout calc_output_layout(strided_slice_node const& node);
+    static std::string to_string(strided_slice_node const& node);
+
+public:
+    typed_primitive_inst(network_impl& network, strided_slice_node const& desc);
+};
+
+using strided_slice_inst = typed_primitive_inst<strided_slice>;
+}
diff --git a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
index 5d83c5b2f..f274381b0 100644
--- a/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
+++ b/inference-engine/thirdparty/clDNN/src/include/to_string_utils.h
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2017 Intel Corporation
+// Copyright (c) 2017-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -26,33 +26,15 @@ inline std::string bool_to_str(bool cond)
     return cond ? "true" : "false";
 }
 
-inline std::string get_extr_type(const char* str)
+inline std::string get_extr_type(const std::string& str)
 {
-    if (!str)
-    {
-        return{};
-    }
+    auto begin = str.find('<');
+    auto end = str.find('>');
 
-    while (*str && *str != '<')
-    {
-        ++str;
-    }
-    if (!*str)
-    {
-        return{};
-    }
+    if (begin == std::string::npos || end == std::string::npos)
+        return {};
 
-    auto end = str;
-    while (*end && *end != '>')
-    {
-        ++end;
-    }
-    if (!*end)
-    {
-        return{};
-    }
-
-    return{ str + 1, end };
+    return str.substr(begin + 1, (end - begin) -1);
 }
 
 inline std::string dt_to_str(data_types dt)
@@ -60,6 +42,7 @@ inline std::string dt_to_str(data_types dt)
     switch (dt)
     {
     case data_types::i8: return "i8";
+    case data_types::u8: return "u8";
     case data_types::i32: return "i32";
     case data_types::i64: return "i64";
     case data_types::f16: return "f16";
@@ -73,18 +56,36 @@ inline std::string fmt_to_str(format fmt)
 {
     switch (fmt.value)
     {
-    case format::bfyx: return "bfyx";
-    case format::byxf: return "byxf";
     case format::yxfb: return "yxfb";
+    case format::byxf: return "byxf";
+    case format::bfyx: return "bfyx";
     case format::fyxb: return "fyxb";
-    case format::bs_x_bsv16: return "bs_x_bsv16";
+    case format::os_iyx_osv16: return "os_iyx_osv16";
+    case format::os_iyx_osv32: return "os_iyx_osv32";
+    case format::os_iyx_osv64: return "os_iyx_osv64";
     case format::bs_xs_xsv8_bsv8: return "bs_xs_xsv8_bsv8";
     case format::bs_xs_xsv8_bsv16: return "bs_xs_xsv8_bsv16";
-    case format::os_iyx_osv16: return "os_iyx_osv16";
+    case format::bs_x_bsv16: return "bs_x_bsv16";
+    case format::bf8_xy16: return "bf8_xy16";
+    case format::image_2d_weights_c4_fyx_b: return "image_2d_weights_c4_fyx_b";
+    case format::image_2d_weights_c1_b_fyx: return "image_2d_weights_c1_b_fyx";
+    case format::winograd_2x3_s1_data: return "winograd_2x3_s1_data";
+    case format::winograd_2x3_s1_weights: return "winograd_2x3_s1_weights";
+    case format::winograd_2x3_s1_fused_weights: return "winograd_2x3_s1_fused_weights";
+    case format::winograd_6x3_s1_fused_weights: return "winograd_6x3_s1_fused_weights";
+    case format::image_2d_weights_winograd_6x3_s1_fbxyb: return "image_2d_weights_winograd_6x3_s1_fbxyb";
+    case format::image_2d_weights_winograd_6x3_s1_xfbyb: return "image_2d_weights_winograd_6x3_s1_xfbyb";
     case format::os_is_yx_isa8_osv8_isv4: return "os_is_yx_isa8_osv8_isv4";
+    case format::os_is_yx_isa8_osv8_isv4_swizzled_by_4: return "os_is_yx_isa8_osv8_isv4_swizzled_by_4";
     case format::is_o_yx_isv32: return "is_o_yx_isv32";
+    case format::is_o32_yx_isv32_swizzled_by_4: return "is_o32_yx_isv32_swizzled_by_4";
+    case format::os_is_y_x8_osv8_isv4: return "os_is_y_x8_osv8_isv4";
     case format::byxf_af32: return "byxf_af32";
+    case format::byx8_f4: return "byx8_f4";
     case format::fs_bs_yx_bsv4_fsv32: return "fs_bs_yx_bsv4_fsv32";
+    case format::bf_lyx_yx: return "bf_lyx_yx";
+    case format::b_fs_yx_fsv4: return "b_fs_yx_fs4"; break;
+    case format::os_is_yx_osv16_isv4: return "os_is_yx_osv16_isv4"; break;
     default:
         return "unknown (" + std::to_string(fmt.value) + ")";
     }
diff --git a/inference-engine/thirdparty/clDNN/src/include/upsampling_inst.h b/inference-engine/thirdparty/clDNN/src/include/upsampling_inst.h
index 2cf4d47e8..dd1d3904c 100644
--- a/inference-engine/thirdparty/clDNN/src/include/upsampling_inst.h
+++ b/inference-engine/thirdparty/clDNN/src/include/upsampling_inst.h
@@ -27,6 +27,7 @@ template <>
 struct typed_program_node<upsampling> : public typed_program_node_base<upsampling>
 {
     using parent = typed_program_node_base<upsampling>;
+    typed_program_node(const std::shared_ptr<upsampling> prim, program_impl& prog) : parent(prim, prog) { support_padding(true); }
 
 public:
     using parent::parent;
diff --git a/inference-engine/thirdparty/clDNN/src/include/xml_object.h b/inference-engine/thirdparty/clDNN/src/include/xml_object.h
deleted file mode 100644
index c32eddd91..000000000
--- a/inference-engine/thirdparty/clDNN/src/include/xml_object.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
-// Copyright (c) 2017 Intel Corporation
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-*/
-#pragma once
-#include <string>
-#include <type_traits>
-#include <unordered_map>
-#include <ostream>
-#include <memory>
-
-namespace cldnn
-{
-    class xml_base;
-    using xml_key = std::string;
-    using xml_base_ptr = std::shared_ptr<xml_base>;
-    using xml_map = std::unordered_map<xml_key, xml_base_ptr>;
-
-    class xml_base
-    {
-    public:
-        virtual void dump(std::ostream& out, int offset) = 0;
-    };
-
-    template<class Type>
-    class xml_leaf : public xml_base
-    {
-    private:
-        Type value;
-    public:
-        xml_leaf(const Type& val) : value(val) {}
-        xml_leaf(Type&& val) : value(std::move(val)) {}
-        void dump(std::ostream& out, int) override
-        {
-            out << value;
-        }
-    };
-
-    template<class Type>
-    class xml_basic_array : public xml_base
-    {
-    private:
-        std::vector<Type> values;
-    public:
-        xml_basic_array(const std::vector<Type>& arr) : values(arr) {}
-        xml_basic_array(std::vector<Type>&& arr) : values(std::move(arr)) {}
-        void dump(std::ostream& out, int) override
-        {
-            const char* delim = "";
-            for (size_t i = 0; i < values.size(); i++)
-            {
-                out << delim << values[i];
-                delim = ",";
-            }
-        }
-    };
-
-    class xml_composite : public xml_base
-    {
-    private:
-        xml_map children;
-    public:
-        void dump(std::ostream& out, int offset = -1) override
-        {
-            offset++;
-            bool first = true;
-            static int offset_temp;
-            std::string spaces(offset * 4, ' ');
-            if (offset!=0) out << "\n";
-            for (const auto& it : children)
-            {
-                if (first)
-                {
-                    out << spaces << "<" << it.first << ">";
-                    first = false;
-                }
-                else
-                    out << "\n" << spaces << "<" << it.first << ">";
-
-                offset_temp = offset;
-                it.second->dump(out, offset);
-
-                std::string spaces_behind(0, ' ');
-                if (offset_temp != offset)
-                    spaces_behind = spaces;
-                out << spaces_behind << "</" << it.first << ">";
-                if (offset == 1)
-                {
-                    out << spaces << "\n";
-                }
-            };
-
-            if (offset > 0)
-            {
-                out << spaces << "\n";
-                offset--;
-            }
-        }
-
-        template<class Type>
-        void add(xml_key key, Type value)
-        {
-            children[key] = std::make_shared<xml_leaf<Type>>(value);
-        }
-        void add(xml_key key, xml_composite comp)
-        {
-            children[key] = std::make_shared<xml_composite>(comp);
-        }
-        template<class Type>
-        void add(xml_key key, std::vector<Type> array)
-        {
-            children[key] = std::make_shared<xml_basic_array<Type>>(array);
-        }
-    };
-
-
-}
-
diff --git a/inference-engine/thirdparty/clDNN/src/index_select.cpp b/inference-engine/thirdparty/clDNN/src/index_select.cpp
index 9c1447022..88acded14 100644
--- a/inference-engine/thirdparty/clDNN/src/index_select.cpp
+++ b/inference-engine/thirdparty/clDNN/src/index_select.cpp
@@ -30,36 +30,44 @@ namespace cldnn
 
 	layout index_select_inst::calc_output_layout(index_select_node const& node)
 	{
-		auto desc = node.get_primitive();
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for "
+                  "index_select_node!");
+        auto desc = node.get_primitive();
 
         auto input_layout = node.input().get_output_layout();
-        auto indices_layout = node.indices().get_output_layout();
-        auto indices_size = indices_layout.size.spatial[0];
-
-        auto axis = node.get_axis();
+        
         
         int32_t output_b = input_layout.size.batch[0];
         int32_t output_f = input_layout.size.feature[0];
         int32_t output_x = input_layout.size.spatial[0];
         int32_t output_y = input_layout.size.spatial[1];
 
-        switch (axis)
-        {
-        case index_select_axis_name::along_b:
-            output_b = indices_size;
-            break;
-        case index_select_axis_name::along_f:
-            output_f = indices_size;
-            break;
-        case index_select_axis_name::along_x:
-            output_x = indices_size;
-            break;
-        case index_select_axis_name::along_y:
-            output_y = indices_size;
-            break;
-        default:
-            CLDNN_ERROR_MESSAGE(node.id(), "UNSPORTTED AXIS");
-            break;
+        if (!node.get_reverse()) {
+            auto indices_layout = node.indices().get_output_layout();
+            auto indices_size = indices_layout.size.spatial[0];
+            auto axes = node.get_axes();
+            for (size_t i = 0; i < axes.size(); i++)
+            {
+                switch (axes[i])
+                {
+                case index_select_axis_name::along_b:
+                    output_b = indices_size;
+                    break;
+                case index_select_axis_name::along_f:
+                    output_f = indices_size;
+                    break;
+                case index_select_axis_name::along_x:
+                    output_x = indices_size;
+                    break;
+                case index_select_axis_name::along_y:
+                    output_y = indices_size;
+                    break;
+                default:
+                    CLDNN_ERROR_MESSAGE(node.id(), "UNSUPPORTED AXIS");
+                    break;
+                }
+            }
         }
         return layout{ input_layout.data_type, input_layout.format, { output_b, output_f, output_x, output_y } };
 	}
@@ -71,27 +79,30 @@ namespace cldnn
 		std::stringstream primitive_description;
 
         std::string axis_str = "";
-        switch (desc->axis)
+        for (size_t i = 0; i < desc->axis.size(); i++)
         {
-        case index_select_axis_name::along_b:
-            axis_str = "along_b";
-            break;
-        case index_select_axis_name::along_f:
-            axis_str = "along_f";
-            break;
-        case index_select_axis_name::along_y:
-            axis_str = "along_y";
-            break;
-        case index_select_axis_name::along_x:
-            axis_str = "along_x";
-            break;
-        default:
-            axis_str = "not supported axis";
-            break;
+            switch (desc->axis.at(i))
+            {
+            case index_select_axis_name::along_b:
+                axis_str += "along_b, ";
+                break;
+            case index_select_axis_name::along_f:
+                axis_str += "along_f, ";
+                break;
+            case index_select_axis_name::along_y:
+                axis_str += "along_y, ";
+                break;
+            case index_select_axis_name::along_x:
+                axis_str += "along_x, ";
+                break;
+            default:
+                axis_str += "not supported axis, ";
+                break;
+            }
         }
 
         json_composite index_select_info;
-        index_select_info.add("axis", axis_str);
+        index_select_info.add("axes", axis_str);
 
         node_info->add("index_select_info", index_select_info);
 		node_info->dump(primitive_description);
@@ -104,17 +115,21 @@ namespace cldnn
 	{
         auto& input = node.input();
         auto input_layout = input.get_output_layout();
-        auto& indices = node.indices();
-        auto indices_layout = indices.get_output_layout();
         auto const node_id = node.id();
 
-        CLDNN_ERROR_DATA_TYPES_MISMATCH(node_id, "indicies data_type", indices_layout.data_type, "i32 data_type ", data_types::i32, "");
         CLDNN_ERROR_NOT_PROPER_FORMAT(node_id, "input_format", input_layout.format, "supported input format", format::bfyx, format::yxfb);
-        CLDNN_ERROR_NOT_PROPER_FORMAT(node_id, "input_format", indices_layout.format, "supported indicies format", format::bfyx, format::yxfb);
-        CLDNN_ERROR_NOT_EQUAL(node_id, "indicies batch_size", indices_layout.size.batch[0], "expected size", 1, "");
-        CLDNN_ERROR_NOT_EQUAL(node_id, "indicies feature_size", indices_layout.size.feature[0], "expected size", 1, "");
-        CLDNN_ERROR_NOT_EQUAL(node_id, "indicies y_size", indices_layout.size.spatial[1], "expected size", 1, "");
-        CLDNN_ERROR_LESS_THAN(node_id, "indicies x_size", indices_layout.size.spatial[0], "expected size", 1, "");
+        
+        if (!node.get_reverse())
+        {
+            auto& indices = node.indices();
+            auto indices_layout = indices.get_output_layout();
 
+            CLDNN_ERROR_DATA_TYPES_MISMATCH(node_id, "indicies data_type", indices_layout.data_type, "i32 data_type ", data_types::i32, "");
+            CLDNN_ERROR_NOT_EQUAL(node_id, "indicies batch_size", indices_layout.size.batch[0], "expected size", 1, "");
+            CLDNN_ERROR_NOT_EQUAL(node_id, "indicies feature_size", indices_layout.size.feature[0], "expected size", 1, "");
+            CLDNN_ERROR_NOT_EQUAL(node_id, "indicies y_size", indices_layout.size.spatial[1], "expected size", 1, "");
+            CLDNN_ERROR_LESS_THAN(node_id, "indicies x_size", indices_layout.size.spatial[0], "expected size", 1, "");
+            CLDNN_ERROR_NOT_PROPER_FORMAT(node_id, "input_format", indices_layout.format, "supported indicies format", format::bfyx, format::yxfb);
+        }
 	}
 }
diff --git a/inference-engine/thirdparty/clDNN/src/input_layout.cpp b/inference-engine/thirdparty/clDNN/src/input_layout.cpp
index 8ec055f29..6fa58610d 100644
--- a/inference-engine/thirdparty/clDNN/src/input_layout.cpp
+++ b/inference-engine/thirdparty/clDNN/src/input_layout.cpp
@@ -29,6 +29,12 @@ primitive_type_id input_layout_type_id()
     return &instance;
 }
 
+input_layout_node::typed_program_node(const std::shared_ptr<input_layout> dprim, program_impl& prog)
+    : parent(dprim, prog)
+{
+    can_share_buffer(false);
+}
+
 input_layout_inst::typed_primitive_inst(network_impl& network, input_layout_node const& node)
     : parent(network, node)
 {
diff --git a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
index 67be2cebf..d073d8b2e 100644
--- a/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
+++ b/inference-engine/thirdparty/clDNN/src/kernel_selector_helper.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2016-2018 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -13,6 +13,14 @@
 // limitations under the License.
 
 #include "kernel_selector_helper.h"
+#include "kernel_selector_params.h"
+
+#include "gpu/ocl_toolkit.h"
+
+#include "program_node.h"
+#include "program_impl.h"
+
+#include "training_params.h"
 
 kernel_selector::data_type to_data_type(data_types dt)
 {
@@ -51,6 +59,7 @@ kernel_selector::weights_type to_weights_type(data_types dt)
     switch (dt)
     {
     case cldnn::data_types::i8:     return kernel_selector::weights_type::INT8;
+    case cldnn::data_types::u8:     return kernel_selector::weights_type::UINT8;
     case cldnn::data_types::f16:    return kernel_selector::weights_type::F16;
     case cldnn::data_types::f32:    return kernel_selector::weights_type::F32;
     default:
@@ -64,6 +73,7 @@ data_types from_weights_type(kernel_selector::weights_type dt)
     switch (dt)
     {
     case kernel_selector::weights_type::INT8:   return data_types::i8;
+    case kernel_selector::weights_type::UINT8:  return data_types::u8;
     case kernel_selector::weights_type::F16:    return data_types::f16;
     case kernel_selector::weights_type::F32:    return data_types::f32;
     default:
@@ -86,8 +96,10 @@ kernel_selector::data_layout to_data_layout(format f)
     case format::bf8_xy16:          return kernel_selector::data_layout::bf8_xy16;
     case format::winograd_2x3_s1_data:  return kernel_selector::data_layout::winograd_2x3_s1_data;
     case format::byxf_af32: return kernel_selector::data_layout::byxf_af32;
+    case format::byx8_f4: return kernel_selector::data_layout::byx8_f4;
     case format::fs_bs_yx_bsv4_fsv32: return kernel_selector::data_layout::fs_bs_yx_bsv4_fsv32;
         //     case format::brfyx:          return kernel_selector::data_layout::brfyx;
+    case format::b_fs_yx_fsv4:       return kernel_selector::data_layout::b_fs_yx_fsv4;
     default:
         return kernel_selector::data_layout::bfyx;
     }
@@ -109,6 +121,7 @@ cldnn::format from_data_layout(kernel_selector::data_layout l)
     case kernel_selector::data_layout::brfyx:             return cldnn::format::bfyx;
     case kernel_selector::data_layout::winograd_2x3_s1_data:   return cldnn::format::winograd_2x3_s1_data;
     case kernel_selector::data_layout::byxf_af32: return cldnn::format::byxf_af32;
+    case kernel_selector::data_layout::byx8_f4: return cldnn::format::byx8_f4;
     case kernel_selector::data_layout::fs_bs_yx_bsv4_fsv32: return cldnn::format::fs_bs_yx_bsv4_fsv32;
     default:
         return cldnn::format::bfyx;
@@ -125,6 +138,8 @@ kernel_selector::weights_layout to_weights_layout(format f)
     case format::byxf:              return kernel_selector::weights_layout::oyxi;
     case format::yxfb:              return kernel_selector::weights_layout::yxio;
     case format::os_iyx_osv16:      return kernel_selector::weights_layout::os_iyx_osv16;
+    case format::os_iyx_osv32:      return kernel_selector::weights_layout::os_iyx_osv32;
+    case format::os_iyx_osv64:      return kernel_selector::weights_layout::os_iyx_osv64;
     case format::bs_xs_xsv8_bsv8:   return kernel_selector::weights_layout::os_i_osv8__ai8;
     case format::bs_xs_xsv8_bsv16:  return kernel_selector::weights_layout::os_i_osv16__ai8;
     case format::bs_x_bsv16:        return kernel_selector::weights_layout::os_i_osv16;
@@ -135,8 +150,13 @@ kernel_selector::weights_layout to_weights_layout(format f)
     case format::winograd_6x3_s1_fused_weights: return kernel_selector::weights_layout::winograd_6x3_s1_fused_weights;
     case format::image_2d_weights_winograd_6x3_s1_fbxyb:     return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb;
     case format::image_2d_weights_winograd_6x3_s1_xfbyb:     return kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb;
-    case format::os_is_yx_isa8_osv8_isv4: return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4;
+    case format::os_is_yx_isa8_osv8_isv4:                    return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4;
+    case format::os_is_yx_isa8_osv8_isv4_swizzled_by_4:      return kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4_swizzled_by_4;
     case format::is_o_yx_isv32: return kernel_selector::weights_layout::is_o_yx_isv32;
+    case format::is_o32_yx_isv32_swizzled_by_4: return kernel_selector::weights_layout::is_o32_yx_isv32_swizzled_by_4;
+    case format::os_is_y_x8_osv8_isv4: return kernel_selector::weights_layout::os_is_y_x8_osv8_isv4;
+    case format::bf_lyx_yx:                                  return kernel_selector::weights_layout::bf_lyx_yx;
+    case format::os_is_yx_osv16_isv4:  return kernel_selector::weights_layout::os_is_yx_osv16_isv4;
     default:
         return kernel_selector::weights_layout::oi;
     }
@@ -147,24 +167,30 @@ cldnn::format::type from_weights_layout(kernel_selector::weights_layout l)
     switch (l)
     {
     case kernel_selector::weights_layout::oi:
-    case kernel_selector::weights_layout::oiyx:               return cldnn::format::bfyx;
-    case kernel_selector::weights_layout::oyxi:               return cldnn::format::byxf;
+    case kernel_selector::weights_layout::oiyx:            return cldnn::format::bfyx;
+    case kernel_selector::weights_layout::oyxi:            return cldnn::format::byxf;
     case kernel_selector::weights_layout::io:
-    case kernel_selector::weights_layout::iyxo:               return cldnn::format::fyxb;
-    case kernel_selector::weights_layout::yxio:               return cldnn::format::yxfb;
-    case kernel_selector::weights_layout::os_iyx_osv16:       return cldnn::format::os_iyx_osv16;
-    case kernel_selector::weights_layout::os_i_osv16:         return cldnn::format::bs_x_bsv16;
-    case kernel_selector::weights_layout::os_i_osv8__ai8:     return cldnn::format::bs_xs_xsv8_bsv8;
-    case kernel_selector::weights_layout::os_i_osv16__ai8:    return cldnn::format::bs_xs_xsv8_bsv16;
-    case kernel_selector::weights_layout::image_2d_weights_c4_fyx_b:        return cldnn::format::image_2d_weights_c4_fyx_b;
-    case kernel_selector::weights_layout::image_2d_weights_c1_b_fyx:        return cldnn::format::image_2d_weights_c1_b_fyx;
-    case kernel_selector::weights_layout::winograd_2x3_s1_weights:          return cldnn::format::winograd_2x3_s1_weights;
-    case kernel_selector::weights_layout::winograd_2x3_s1_fused_weights:    return cldnn::format::winograd_2x3_s1_fused_weights;
-    case kernel_selector::weights_layout::winograd_6x3_s1_fused_weights:    return cldnn::format::winograd_6x3_s1_fused_weights;
-    case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb:        return cldnn::format::image_2d_weights_winograd_6x3_s1_fbxyb;
-    case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb:        return cldnn::format::image_2d_weights_winograd_6x3_s1_xfbyb;
-    case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4: return cldnn::format::os_is_yx_isa8_osv8_isv4;
-    case kernel_selector::weights_layout::is_o_yx_isv32: return cldnn::format::is_o_yx_isv32;
+    case kernel_selector::weights_layout::iyxo:            return cldnn::format::fyxb;
+    case kernel_selector::weights_layout::yxio:            return cldnn::format::yxfb;
+    case kernel_selector::weights_layout::os_iyx_osv16:    return cldnn::format::os_iyx_osv16;
+    case kernel_selector::weights_layout::os_iyx_osv32:    return cldnn::format::os_iyx_osv32;
+    case kernel_selector::weights_layout::os_iyx_osv64:    return cldnn::format::os_iyx_osv64;
+    case kernel_selector::weights_layout::os_i_osv16:      return cldnn::format::bs_x_bsv16;
+    case kernel_selector::weights_layout::os_i_osv8__ai8:  return cldnn::format::bs_xs_xsv8_bsv8;
+    case kernel_selector::weights_layout::os_i_osv16__ai8: return cldnn::format::bs_xs_xsv8_bsv16;
+    case kernel_selector::weights_layout::image_2d_weights_c4_fyx_b:     return cldnn::format::image_2d_weights_c4_fyx_b;
+    case kernel_selector::weights_layout::image_2d_weights_c1_b_fyx:     return cldnn::format::image_2d_weights_c1_b_fyx;
+    case kernel_selector::weights_layout::winograd_2x3_s1_weights:       return cldnn::format::winograd_2x3_s1_weights;
+    case kernel_selector::weights_layout::winograd_2x3_s1_fused_weights: return cldnn::format::winograd_2x3_s1_fused_weights;
+    case kernel_selector::weights_layout::winograd_6x3_s1_fused_weights: return cldnn::format::winograd_6x3_s1_fused_weights;
+    case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_fbxyb: return cldnn::format::image_2d_weights_winograd_6x3_s1_fbxyb;
+    case kernel_selector::weights_layout::image_2d_weights_winograd_6x3_s1_xfbyb: return cldnn::format::image_2d_weights_winograd_6x3_s1_xfbyb;
+    case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4:                return cldnn::format::os_is_yx_isa8_osv8_isv4;
+    case kernel_selector::weights_layout::os_is_yx_isa8_osv8_isv4_swizzled_by_4:  return cldnn::format::os_is_yx_isa8_osv8_isv4_swizzled_by_4;
+    case kernel_selector::weights_layout::is_o_yx_isv32:                          return cldnn::format::is_o_yx_isv32;
+    case kernel_selector::weights_layout::is_o32_yx_isv32_swizzled_by_4: return cldnn::format::is_o32_yx_isv32_swizzled_by_4;
+    case kernel_selector::weights_layout::os_is_y_x8_osv8_isv4: return cldnn::format::os_is_y_x8_osv8_isv4;
+    case kernel_selector::weights_layout::bf_lyx_yx:                              return cldnn::format::bf_lyx_yx;
     default:
         return cldnn::format::bfyx;
     }
@@ -213,6 +239,11 @@ kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split
         new_vals[3] = align_to(vals[3], 32);
         new_vals[2] = align_to(vals[2], 4);
     }
+    if (ks_layout == kernel_selector::Tensor::byx8_f4)
+    {
+        new_vals[3] = align_to(vals[3], 4);
+        new_vals[2] = align_to(vals[2], 8);
+    }
 
     for (size_t i = 0; i < vec.size(); i++)
     {
@@ -245,9 +276,8 @@ kernel_selector::data_tensor convert_data_tensor(const layout& l, uint32_t split
 
 kernel_selector::weights_tensor convert_weights_tensor(const layout& l)
 {
-    assert(l.format.dimension() == 4);
-    const auto& t = l.size.sizes(format::bfyx);
-    const auto base_layout = kernel_selector::weights_layout::oiyx;
+    const auto& t = l.size.sizes(l.format);
+    const auto base_layout = to_weights_layout(l.format);
     const auto ks_type = to_weights_type(l.data_type);
     const auto ks_layout = to_weights_layout(l.format);
     std::vector<size_t> vec(kernel_selector::WeightsTensor::ChannelsCount(base_layout));
@@ -307,10 +337,12 @@ kernel_selector::activation_function get_kernel_selector_activation_param(cldnn_
         return kernel_selector::activation_function::COSH;
     case activation_log:
         return kernel_selector::activation_function::LOG;
-	case activation_log2:
-		return kernel_selector::activation_function::LOG2;
+    case activation_log2:
+        return kernel_selector::activation_function::LOG2;
     case activation_exp:
         return kernel_selector::activation_function::EXP;
+    case activation_not:
+        return kernel_selector::activation_function::NOT;
     default:
         throw std::runtime_error("Unknown activation function");
         break;
@@ -331,4 +363,54 @@ kernel_selector::activation_function get_kernel_selector_activation_grad_param(c
         throw std::runtime_error("Unknown activation_grad function");
         break;
     }
+}
+
+void set_params(const program_node& node, kernel_selector::params& params)
+{
+    const auto& context = node.get_program().get_engine().get_context();
+    const auto& engine_info = context->get_engine_info();
+
+    params.engineInfo.bSubGroupSupport = context->extension_supported("cl_intel_subgroups");
+    params.engineInfo.bSubGroupShortSupport = context->extension_supported("cl_intel_subgroups_short");
+    params.engineInfo.bFP16Support = context->extension_supported("cl_khr_fp16");
+    params.engineInfo.bFP64Support = context->extension_supported("cl_khr_fp64");
+    params.engineInfo.bIMADSupport = engine_info.supports_imad != 0;
+    params.engineInfo.bIMMADSupport = engine_info.supports_immad != 0;
+    params.engineInfo.bImageSupport = engine_info.supports_image != 0;
+    params.engineInfo.maxWorkGroupSize = engine_info.max_work_group_size;
+    params.engineInfo.maxLocalMemSize = engine_info.max_local_mem_size;
+    params.engineInfo.maxImage2dWidth = engine_info.max_image2d_width;
+    params.engineInfo.maxImage2dHeight = engine_info.max_image2d_height;
+    params.engineInfo.deviceId = engine_info.dev_id;
+    params.engineInfo.computeUnitsCount = engine_info.compute_units_count;
+    params.engineInfo.deviceCache = engine_info.device_cache;
+    params.engineInfo.driverVersion = engine_info.driver_version;
+    params.engineInfo.hostVersion = to_host_version(cldnn::get_version());
+}
+
+void set_learning_params(const program_node& node, kernel_selector::training_params& params, bool use_momentum)
+{
+    const auto learning_params = node.get_program().get_options().template get<build_option_type::learning_config>()->params;
+
+    if (use_momentum)
+    {
+        params.use_momentum = true;
+    }
+
+    params.momentum_factor = learning_params.momentum;
+    params.weights_decay = learning_params.weights_decay;
+}
+
+void set_optional_params(const program_impl& program, kernel_selector::optional_params& params)
+{
+    const auto& context = program.get_engine().get_context();
+
+    params.meaningfulKernelsNames = context->get_configuration().meaningful_kernels_names;
+    params.allowStaticInputReordering = program.get_options().get<build_option_type::optimize_data>()->enabled();
+    params.allowInputReordering = false;
+    params.allowOutputReordering = false;
+
+    const auto& tuning_config = program.get_options().get<build_option_type::tuning_config>();
+    params.tuningParams.mode = to_tuning_mode(tuning_config->config.mode);
+    params.tuningParams.cacheFilePath = tuning_config->config.cache_file_path;
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
index e2723fd17..8adf3d545 100644
--- a/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
+++ b/inference-engine/thirdparty/clDNN/src/layout_optimizer.cpp
@@ -201,6 +201,12 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout, data_
             expected_tensor = current_layout.size;
             expected_format = cldnn::format::byxf;
         }
+        // IMAD case
+        else if (current_layout.format == format::b_fs_yx_fsv4 ||
+                 current_layout.format == format::os_is_yx_osv16_isv4)
+        {
+            // Nothing to do, just go out from here.
+        }
         // MMAD case
         else if (current_layout.data_type == data_types::i8)
         {
@@ -211,7 +217,8 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout, data_
             || (_output_size_handling_enabled && prim->with_output_size) ||
             node.get_transposed())
         {
-            if (current_layout.data_type == data_types::f32 &&
+            // commented out due to performance reasons, maybe enable in future
+            /*if (current_layout.data_type == data_types::f32 &&
                 current_layout.size.batch[0] % 16 == 0 &&
                 current_layout.format == format::bfyx &&
                 output_or_weights_layout.size.spatial[0] == 1 && output_or_weights_layout.size.spatial[1] == 1 &&
@@ -226,7 +233,7 @@ layout layout_optimizer::get_expected_layout(layout const& current_layout, data_
                     expected_format = cldnn::format::bf8_xy16;
                 }
             }
-            else
+            else*/
             {
                 expected_tensor = current_layout.size;
                 expected_format = cldnn::format::bfyx;
diff --git a/inference-engine/thirdparty/clDNN/src/lookup_table.cpp b/inference-engine/thirdparty/clDNN/src/lookup_table.cpp
index 432bc4437..22cd517fd 100644
--- a/inference-engine/thirdparty/clDNN/src/lookup_table.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lookup_table.cpp
@@ -31,6 +31,9 @@ namespace cldnn
 
     layout lookup_table_inst::calc_output_layout(lookup_table_node const& node)
     {
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for "
+                  "lookup_table_node!");
         auto desc = node.get_primitive();
 
         auto input_data_layout = node.input().get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/lrn.cpp b/inference-engine/thirdparty/clDNN/src/lrn.cpp
index b25b6cb86..1fe2b267f 100644
--- a/inference-engine/thirdparty/clDNN/src/lrn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lrn.cpp
@@ -29,6 +29,8 @@ primitive_type_id lrn_type_id()
 
 layout lrn_inst::calc_output_layout(lrn_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for lrn_node!");
     return node.input().get_non_padded_output_layout();
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/lstm.cpp b/inference-engine/thirdparty/clDNN/src/lstm.cpp
index 7c8078203..fae374a47 100644
--- a/inference-engine/thirdparty/clDNN/src/lstm.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lstm.cpp
@@ -31,18 +31,21 @@ primitive_type_id lstm_type_id()
 
 layout lstm_inst::calc_output_layout(lstm_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for lstm_node!");
     auto input_layout = node.input().get_output_layout();
     auto hidden_layout = node.inital_hidden().get_output_layout();
 
-    // input     = [        1,  sequence,           batch,      input_size ]
-    // weights   = [        1, direction, 4 * hidden_size,      input_size ]
-    // recurrent = [        1, direction, 4 * hidden_size,     hidden_size ]
-    // biases    = [        1,         1,       direction, 4 * hidden_size ]
-    // hidden    = [        1, direction,           batch,     hidden_size ]
-    // cell      = [        1, direction,           batch,     hidden_size ]
-    // output    = [ sequence, direction,           batch,     hidden_size ]    
+    // input     = [ batch,  sequence,       direction,      input_size ]
+    // weights   = [     1, direction, 4 * hidden_size,      input_size ]
+    // recurrent = [     1, direction, 4 * hidden_size,     hidden_size ]
+    // biases    = [     1,         1,       direction, 4 * hidden_size ]
+    // hidden    = [ batch,         1,       direction,     hidden_size ]
+    // cell      = [ batch,         1,       direction,     hidden_size ]
+    // output    = [ batch,  sequence,       direction,     hidden_size ]
 	auto result = layout(input_layout.data_type, format::bfyx,
-                  tensor(hidden_layout.size.feature[0], input_layout.size.feature[0], hidden_layout.size.spatial[0], hidden_layout.size.spatial[1]));
+                  tensor(hidden_layout.size.feature[0], input_layout.size.feature[0],
+                         hidden_layout.size.spatial[0], hidden_layout.size.spatial[1]));
     return result;
 }
 
@@ -75,10 +78,8 @@ std::string lstm_inst::to_string(lstm_node const& node)
 lstm_inst::typed_primitive_inst(network_impl& network, lstm_node const& node)
     :parent(network, node)
 {
-    // [ARIEL] TODO: That do we need to check here??
-    auto input_size = node.input().get_output_layout();
-    // auto output_size = output_memory().get_layout();
-    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "input format", input_size.format.value, "expected format", format::bfyx);
-    //CLDNN_ERROR_NOT_EQUAL(node.id(), "Input size", input_size.size.raw.size(), "output size", output_size.size.raw.size(), "");
+    auto input_layout = node.input().get_output_layout();
+    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "input format", input_layout.format.value, "expected format", format::bfyx);
 }
+
 }
diff --git a/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp b/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp
index 718939d8f..d809f8682 100644
--- a/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lstm_elt.cpp
@@ -30,6 +30,8 @@ primitive_type_id lstm_elt_type_id()
 
 layout lstm_elt_inst::calc_output_layout(lstm_elt_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for lstm_elt_node!");
     auto desc = node.get_primitive();
     auto input_layout = node.input().get_output_layout();
 
@@ -38,7 +40,7 @@ layout lstm_elt_inst::calc_output_layout(lstm_elt_node const& node)
     // output{bfyx}   = [b: batch, f: 2,         x: direction, y: hidden_size ] output
     // The output of the lstm_elt node is the concatenation of the intermediate [hidden, cell] tensors.
     // A crop/split node is needed to extract each individual tensors
-    auto result = layout(input_layout.data_type, format::bfyx,
+    auto result = layout(input_layout.data_type, input_layout.format,
                     tensor(input_layout.size.batch[0], 2, input_layout.size.spatial[0] / 4, input_layout.size.feature[0]));
     return result;
 }
@@ -63,6 +65,6 @@ lstm_elt_inst::typed_primitive_inst(network_impl& network, lstm_elt_node const&
     :parent(network, node)
 {
     auto input_size = node.input().get_output_layout();
-    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "input format", input_size.format.value, "expected format", format::bfyx);
+    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "input format", input_size.format.value, "expected format", format::bfyx, format::fyxb);
 }
 }
diff --git a/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp b/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp
index 31d36fa7e..e39a271ed 100644
--- a/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp
+++ b/inference-engine/thirdparty/clDNN/src/lstm_gemm.cpp
@@ -31,6 +31,8 @@ primitive_type_id lstm_gemm_type_id()
 
 layout lstm_gemm_inst::calc_output_layout(lstm_gemm_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for lstm_gemm_node!");
     auto desc = node.get_primitive();
     auto input_layout = node.input().get_output_layout();
     auto weights_layout = node.weights().get_output_layout();
@@ -41,8 +43,7 @@ layout lstm_gemm_inst::calc_output_layout(lstm_gemm_node const& node)
     //   biases{bfyx}    = [b: 1,     f:1 ,          x: direction,       y:  4 * hidden_size ]
     //   hidden{bfyx}    = [b: batch, f:  direction, x: 1 ,              y: hidden_size ] optional
     //   tempGEMM{bfyx}  = [b: batch, f: direction,  x: 4*hidden_size,   y: 1] output
-
-    auto result = layout(input_layout.data_type, format::bfyx, tensor(input_layout.size.batch[0], weights_layout.size.feature[0], weights_layout.size.spatial[1], 1));
+    auto result = layout(input_layout.data_type, input_layout.format, tensor(input_layout.size.batch[0], weights_layout.size.feature[0], weights_layout.size.spatial[1], 1));
     return result;
 }
 
@@ -71,7 +72,7 @@ std::string lstm_gemm_inst::to_string(lstm_gemm_node const& node)
 lstm_gemm_inst::typed_primitive_inst(network_impl& network, lstm_gemm_node const& node)
     :parent(network, node)
 {
-    auto input_size = node.input().get_output_layout();
-    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "input format", input_size.format.value, "expected format", format::bfyx);
+    auto input_layout = node.input().get_output_layout();
+    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "input format", input_layout.format.value, "expected format", format::bfyx, format::fyxb);
 }
 }
diff --git a/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp b/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp
index da67f0239..5e2d99b78 100644
--- a/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/max_unpooling.cpp
@@ -28,8 +28,16 @@ primitive_type_id max_unpooling_type_id()
     return &instance;
 }
 
+max_unpooling_node::typed_program_node(const std::shared_ptr<max_unpooling> prim, program_impl& prog) 
+    : parent(prim, prog)
+{
+    can_share_buffer(false); // for max_unpooling initial zero values are significant
+}
+
 layout max_unpooling_inst::calc_output_layout(max_unpooling_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+          && "Output data type forcing is not supported for max_unpooling_node!");
     auto desc = node.get_primitive();
 
     auto input_layout = node.input().get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/memory_pool.cpp b/inference-engine/thirdparty/clDNN/src/memory_pool.cpp
index d339492ea..2a36ee19c 100644
--- a/inference-engine/thirdparty/clDNN/src/memory_pool.cpp
+++ b/inference-engine/thirdparty/clDNN/src/memory_pool.cpp
@@ -24,6 +24,8 @@
 #include "memory_impl.h"
 #include "program_impl.h"
 
+#include "program_node.h"
+
 #include "gpu/memory_gpu.h"
 namespace cldnn
 {
@@ -69,6 +71,8 @@ namespace cldnn
             }
         }
     }
+    memory_pool::~memory_pool()
+    { }
 
     bool memory_pool::has_conflict(const memory_set& a, const std::set<primitive_id>& b, uint32_t b_network_id)
     {   
diff --git a/inference-engine/thirdparty/clDNN/src/mutable_data.cpp b/inference-engine/thirdparty/clDNN/src/mutable_data.cpp
index d2deb029d..9ad7fef70 100644
--- a/inference-engine/thirdparty/clDNN/src/mutable_data.cpp
+++ b/inference-engine/thirdparty/clDNN/src/mutable_data.cpp
@@ -49,6 +49,7 @@ mutable_data_node::typed_program_node(const std::shared_ptr<mutable_data> dprim,
     : parent(dprim, prog), mem(api_cast(dprim->mem.get()))
 {
     recalc_output_layout(false);
+    can_share_buffer(false);
     fill_memory();
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/mvn.cpp b/inference-engine/thirdparty/clDNN/src/mvn.cpp
index 267437660..d0460a976 100644
--- a/inference-engine/thirdparty/clDNN/src/mvn.cpp
+++ b/inference-engine/thirdparty/clDNN/src/mvn.cpp
@@ -28,6 +28,8 @@ primitive_type_id mvn_type_id()
 
 layout mvn_inst::calc_output_layout(mvn_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for mvn_node!");
     return node.input().get_non_padded_output_layout();
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/network.cpp b/inference-engine/thirdparty/clDNN/src/network.cpp
index cca47a634..07ade9a3f 100644
--- a/inference-engine/thirdparty/clDNN/src/network.cpp
+++ b/inference-engine/thirdparty/clDNN/src/network.cpp
@@ -26,10 +26,16 @@
 #include "error_handler.h"
 #include "primitive_inst.h"
 #include "input_layout_inst.h"
+#include "condition_inst.h"
 #include "kernel_selector_helper.h"
 #include <algorithm>
 
+#include "gpu/ocl_toolkit.h"
+
+
 //#define DEBUG_DUMP_PATH "/tmp/dump/"
+
+
 #ifdef DEBUG_DUMP_PATH
 #include <iomanip>
 #include <fstream>
@@ -41,7 +47,6 @@
 
 namespace cldnn
 {
-
 #ifdef DEBUG_DUMP_PATH
 static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false)
     {
@@ -142,6 +147,7 @@ static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false
         std::replace(filename.begin(), filename.end(), '\\', '_');
         std::replace(filename.begin(), filename.end(), '/', '_');
         std::replace(filename.begin(), filename.end(), ' ', '_');
+        std::replace(filename.begin(), filename.end(), ':', '_');
         filename = DEBUG_DUMP_PATH + filename + ".txt";
 
         std::ofstream file_stream(filename);
@@ -151,9 +157,8 @@ static float convert_half_to_float(half_t val, bool flush_denorm_to_zero = false
             dump<half_t>(mem, file_stream);
     }
 #endif
-
 /*
-Network_impl will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by const. propagator).
+Network_impl will always have net_id = 0 when it will be cldnn internal micronetwork (created i.e by propagate_constants opt pass).
 */
 network_impl::network_impl(const program_impl& program, bool is_internal)
     : _program(&program)
@@ -166,8 +171,10 @@ network_impl::network_impl(const program_impl& program, bool is_internal)
     }
 
     allocate_primitives();
+    check_names();
     build_insts_deps();
-
+    build_exec_order();
+    validate_primitives();
     _program->dump_memory_pool();
 }
 
@@ -176,6 +183,20 @@ network_impl::network_impl(engine_impl& engine, const topology_impl& topo, const
 {
 }
 
+network_impl::network_impl(engine_impl& engine, const std::set<std::shared_ptr<program_node>>& nodes, const build_options& options, bool is_internal)
+    : network_impl(*engine.build_program(nodes, options, is_internal), is_internal)
+{
+}
+
+void network_impl::validate_primitives()
+{
+    for (auto const& prim : _exec_order)
+    {
+        bool valid = prim->validate();
+        CLDNN_ERROR_NOT_EQUAL(prim->id(), "validate", valid, "", true, "has not a valid instance.");
+    }
+}
+
 void network_impl::reset_execution(bool wait)
 {
     if (wait && _events.size() > 0)
@@ -198,13 +219,12 @@ void network_impl::reset_execution(bool wait)
 void network_impl::set_input_data(const primitive_id& id, memory_impl& data)
 {
     std::shared_ptr<primitive_inst> primitive_inst;
-    try {
-        primitive_inst = _primitives.at(id);
-    }
-    catch (...)
-    {
+
+    primitive_inst = find_primitive(id);
+
+    if(primitive_inst == nullptr)
         throw std::runtime_error("topology doesn't contain prmitive:" + id);
-    }
+
     if (primitive_inst->type() != input_layout::type_id())
     {
         CLDNN_ERROR_MESSAGE(id, "primitive " + id + " is not an input");
@@ -217,6 +237,46 @@ void network_impl::set_input_data(const primitive_id& id, memory_impl& data)
     input->set_data(data);
 }
 
+void cldnn::network_impl::check_names()
+{
+    for (auto const& prim : _primitives)
+    {
+        if (find_in_internal_networks(prim.first) != nullptr)
+            CLDNN_ERROR_MESSAGE("Network_impl", "Found primitive with id: " + prim.first
+                + "in anotother network.");
+    }
+}
+
+std::shared_ptr<primitive_inst> cldnn::network_impl::find_primitive(const primitive_id& id)
+{
+    std::shared_ptr<primitive_inst> ret;
+
+    if (_primitives.find(id) != _primitives.end())
+        return _primitives.at(id);
+
+    return find_in_internal_networks(id);
+}
+
+std::shared_ptr<primitive_inst> cldnn::network_impl::find_in_internal_networks(const primitive_id& id)
+{
+    std::shared_ptr<primitive_inst> ret;
+
+    for (auto const& prim : _primitives)
+    {
+        if (prim.second->type() == condition::type_id()) //currently only condition inst contains mini networks
+        {
+            auto cond_inst = std::static_pointer_cast<condition_inst>(prim.second);
+            ret = cond_inst->get_net_true()->find_primitive(id);
+            if (ret != nullptr)
+                return ret;
+            ret = cond_inst->get_net_false()->find_primitive(id);
+            if (ret != nullptr)
+                return ret;
+        }
+    }
+    return nullptr;
+}
+
 void network_impl::set_learning_rate(const float lr)
 {
     _learning_rate = lr;
@@ -228,16 +288,18 @@ float network_impl::get_learning_rate()
 }
 
 std::string network_impl::get_primitive_info(const primitive_id& id) const
-{    
+{
     const auto& node = _program->get_node(id);
     return node.type()->to_string(node);
 }
 
 void network_impl::allocate_primitives()
 {
-    auto nodes = _program->get_nodes();
     std::vector<std::shared_ptr<program_node>> nodes_to_allocate{};
-    nodes_to_allocate.insert(nodes_to_allocate.begin(), nodes.begin(), nodes.end());
+    for (auto node : _program->get_processing_order())
+    {
+        nodes_to_allocate.push_back(_program->get_node_ptr(node->id()));
+    }
     std::sort(nodes_to_allocate.begin(), nodes_to_allocate.end(), [](std::shared_ptr<program_node> const& lhs,
                                                                      std::shared_ptr<program_node> const& rhs)
     {
@@ -250,7 +312,6 @@ void network_impl::allocate_primitives()
     }
 }
 
-
 void network_impl::build_insts_deps()
 {
     for (auto& inst : _primitives)
@@ -259,18 +320,32 @@ void network_impl::build_insts_deps()
     }
 }
 
+void network_impl::build_exec_order()
+{
+    for (auto& node : _program->get_processing_order())
+    {
+        if (!node->is_type<data>() &&
+            !(node->is_type<mutable_data>() && node->get_dependencies().empty()))
+        {
+            add_to_exec_order(node->id());
+        }
+    }
+}
+void network_impl::add_to_exec_order(const primitive_id& id)
+{
+    auto inst = get_primitive(id);
+    _exec_order.push_back(inst);
+}
+
 void network_impl::execute(const std::vector<refcounted_obj_ptr<event_impl>>& events)
 {
     //Wait for previous execution completion
     reset_execution(false);
 
-    for (auto& inst : _program->get_processing_order())
+    for (auto& inst : _exec_order)
     {
-        if (!inst->is_type<data>() &&
-            !(inst->is_type<mutable_data>() && inst->get_dependencies().empty()))
-        {
 #ifdef DEBUG_DUMP_PATH
-            auto& node = _program->get_node(inst->id());
+        auto& node = _program->get_node(inst->id());
 
         std::string layer_name = node.id();
 #if DUMP_VERBOSE
@@ -287,10 +362,9 @@ void network_impl::execute(const std::vector<refcounted_obj_ptr<event_impl>>& ev
             }
         }
 #endif
-            execute_primitive(get_primitive(inst->id()), events);
-            _exec_order.push_back(get_primitive(inst->id()));
+        execute_primitive(inst, events);
 #ifdef DEBUG_DUMP_PATH
-            #if DUMP_SINGLE_LAYER
+#if DUMP_SINGLE_LAYER
         if (layer_name == DUMP_LAYER_NAME)
 #endif
         {
@@ -298,7 +372,6 @@ void network_impl::execute(const std::vector<refcounted_obj_ptr<event_impl>>& ev
         }
         get_engine().flush_network();
 #endif
-        }
     }
 
     for (auto& inst : _program->get_processing_order())
@@ -307,10 +380,10 @@ void network_impl::execute(const std::vector<refcounted_obj_ptr<event_impl>>& ev
         //the mutable_data can be updated when is both user or dependency.
         if (inst->is_type<mutable_data>())
         {
-            decltype(inst->get_processing_num()) proc_num = 0;
+            decltype(_program->get_processing_order().get_processing_number(inst)) proc_num = 0;
             for (auto& user : inst->get_users())
             {
-                auto user_proc_num = user->get_processing_num();
+                auto user_proc_num = _program->get_processing_order().get_processing_number(user);
                 if (user_proc_num > proc_num)
                 {
                     _events[inst->id()] = _events[user->id()];
@@ -322,7 +395,7 @@ void network_impl::execute(const std::vector<refcounted_obj_ptr<event_impl>>& ev
             {
                 for (auto& dep : inst->get_dependencies())
                 {
-                    auto dep_proc_num = dep->get_processing_num();
+                    auto dep_proc_num = _program->get_processing_order().get_processing_number(dep);
                     if (dep_proc_num > proc_num)
                     {
                         _events[inst->id()] = _events[dep->id()];
@@ -343,8 +416,10 @@ void network_impl::execute(const std::vector<refcounted_obj_ptr<event_impl>>& ev
         prim.second->reset_output_change();
     }
 
-    // Using output of previouse network as input to another one may cause hazard (in OOOQ mode) if user would not 
-    // provide proper event to execution. Flushing pipeline should prevent this kind of issues. 
+    get_engine().get_context()->reset_events();
+
+    // Using output of previouse network as input to another one may cause hazard (in OOOQ mode) if user would not
+    // provide proper event to execution. Flushing pipeline should prevent this kind of issues.
     // In scenarios with a big number of very small networks it can provide performance drop.
     get_engine().flush_network();
 }
@@ -363,7 +438,9 @@ std::vector<primitive_id> network_impl::get_executed_primitive_ids() const
     std::vector<primitive_id> ret;
     ret.reserve(_exec_order.size());
     for (auto const& executed_primitive : _exec_order)
+    {
         ret.push_back(executed_primitive->id());
+    }
     return ret;
 }
 
@@ -410,7 +487,7 @@ std::vector<std::shared_ptr<primitive_inst>> network_impl::get_primitives(const
     return result;
 }
 
-refcounted_obj_ptr<event_impl> network_impl::execute_primitive(const std::shared_ptr<primitive_inst>& primitive, const std::vector<refcounted_obj_ptr<event_impl>>& events)
+void network_impl::execute_primitive(const std::shared_ptr<primitive_inst>& primitive, const std::vector<refcounted_obj_ptr<event_impl>>& events)
 {
     auto id = primitive->id();
     auto it = _events.find(id);
@@ -422,9 +499,7 @@ refcounted_obj_ptr<event_impl> network_impl::execute_primitive(const std::shared
         ev = primitive->execute(events);
     else
         ev = get_engine().create_user_event(true);
-
     _events.insert({ id, ev });
-    return ev;
 }
 
 void network_impl::allocate_primitive_instance(program_node const& node)
@@ -443,5 +518,4 @@ void network_impl::allocate_primitive_instance(program_node const& node)
             _data_outputs.push_back(inst);
     }
 }
-
 }
diff --git a/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp b/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp
new file mode 100644
index 000000000..ae19ac5ab
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/nodes_ordering.cpp
@@ -0,0 +1,119 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "program_impl.h"
+#include "program_node.h"
+#include "error_handler.h"
+
+namespace cldnn
+{
+    // helper method for calc_processing order
+    void program_impl::nodes_ordering::calc_processing_order_visit(program_node* node)
+    {
+        if (node->is_marked())
+            return;
+        for (auto user : node->users)
+        {
+            calc_processing_order_visit(user);
+        }
+        node->mark();
+        _processing_order.push_front(node);
+        processing_order_iterators[node] = _processing_order.begin();
+        return;
+    }
+
+    //DFS to sort nodes topologically
+    //any topological sort of nodes is required for further optimizations
+    void program_impl::nodes_ordering::calc_processing_order(program_impl& p)
+    {
+        _processing_order.clear();
+        for (auto input : p.get_inputs())
+        {
+            calc_processing_order_visit(input);
+        }
+        for (auto& node : _processing_order)
+        {
+            node->unmark();
+        }
+        return;
+    }
+
+    /*
+    recalculate processing_order
+    algorithm based on: CLRS 24.5 (critical path in DAG)
+    modifications: adjust for multiple inputs
+    input: any topological order in processing order
+    output: BFS topological order.
+    */
+    void program_impl::nodes_ordering::calculate_BFS_processing_order()
+    {
+        std::map<program_node*, int> distances;
+        for (auto itr : _processing_order)
+        {
+            distances[itr] = -1;
+        }
+        int max_distance = 0;
+        for (auto itr : _processing_order)
+        {
+            //Init
+            if (distances[itr] == -1) {     // this must be an input
+                distances[itr] = 0;         // initialize input
+            }
+            // RELAX
+            for (auto& user : itr->get_users())
+            {
+                distances[user] = std::max(distances[user], distances[itr] + 1);
+                max_distance = std::max(max_distance, distances[user]);
+            }
+        }
+
+        //bucket sort nodes based on their max distance from input
+        std::vector<std::vector<program_node*>> dist_lists;
+        dist_lists.resize(max_distance + 1);
+        for (auto itr : _processing_order)
+        {
+            dist_lists[distances[itr]].push_back(itr);
+        }
+
+        //replace the old processing order by the new one, still topological.
+        _processing_order.clear();
+        for (auto& dist : dist_lists)
+        {
+            for (auto& node : dist)
+            {
+                _processing_order.push_back(node);
+                processing_order_iterators[node] = _processing_order.end();
+                processing_order_iterators[node]--;
+            }
+        }
+        return;
+    }
+
+    //verifies if a given node will be processed before all its dependent nodes
+    bool program_impl::nodes_ordering::is_correct(program_node* node)
+    {
+        for (auto& dep : node->get_dependencies())
+        {
+            if (get_processing_number(node) < get_processing_number(dep))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/normalize.cpp b/inference-engine/thirdparty/clDNN/src/normalize.cpp
index d9ec578ff..e364575c1 100644
--- a/inference-engine/thirdparty/clDNN/src/normalize.cpp
+++ b/inference-engine/thirdparty/clDNN/src/normalize.cpp
@@ -29,6 +29,8 @@ primitive_type_id normalize_type_id()
 
 layout normalize_inst::calc_output_layout(normalize_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for normalize_node!");
     return node.input().get_non_padded_output_layout();
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/one_hot.cpp b/inference-engine/thirdparty/clDNN/src/one_hot.cpp
new file mode 100644
index 000000000..a7c1539a9
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/one_hot.cpp
@@ -0,0 +1,97 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "one_hot_inst.h"
+
+#include "error_handler.h"
+#include "json_object.h"
+#include "primitive_type_base.h"
+
+
+namespace cldnn
+{
+    primitive_type_id one_hot_type_id()
+    {
+        static primitive_type_base<one_hot> instance;
+        return &instance;
+    }
+
+    layout one_hot_inst::calc_output_layout(one_hot_node const& node)
+    {
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for one_hot_node!");
+        auto input_layout = node.input().get_output_layout();
+        auto desc = node.get_primitive();
+
+        if (desc->one_hot_axis > 3)
+        {
+            CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: one_hot_axis should be less or equal to 3.");
+        }
+
+        return{ input_layout.data_type, input_layout.format, desc->shape };
+    }
+
+    std::string one_hot_inst::to_string(one_hot_node const& node)
+    {
+        auto desc = node.get_primitive();
+        auto node_info = node.desc_to_json();
+        const auto& shape = desc->shape;
+        const auto& one_hot_axis = desc->one_hot_axis;
+        auto& input = node.input();
+
+        std::stringstream primitive_description;
+
+        json_composite one_hot_info;
+        one_hot_info.add("input id", input.id());
+        one_hot_info.add("output shape", shape.to_string());
+        one_hot_info.add("one-hot axis", one_hot_axis);
+
+        node_info->add("one_hot info", one_hot_info);
+        node_info->dump(primitive_description);
+
+        return primitive_description.str();
+    }
+
+    one_hot_inst::typed_primitive_inst(network_impl& network, one_hot_node const& node)
+        : parent(network, node)
+    {
+        auto input_layout = node.input().get_output_layout();
+
+        const auto& input_sizes = input_layout.size;
+        const auto& output_sizes = argument.shape;
+
+        std::vector<tensor::value_type> input_dims = { input_sizes.batch[0], input_sizes.feature[0],
+            input_sizes.spatial[1], input_sizes.spatial[0] };
+        std::vector<tensor::value_type> output_dims = { output_sizes.batch[0], output_sizes.feature[0],
+            output_sizes.spatial[1], output_sizes.spatial[0] };
+
+        const auto& one_hot_axis = node.get_primitive()->one_hot_axis;
+        if (input_dims[0] != 1)
+        {
+            CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: input batch size should be equal to 1.");
+        }
+
+        //bfyx format
+        for (int i = 3, j = 3; i > 0; --i, --j)
+        {
+            if (j == one_hot_axis)
+                --j;
+            if (input_dims[i] != output_dims[j])
+            {
+                CLDNN_ERROR_MESSAGE(node.id(), "Incorrect parameters configuration: shape does not fit input size.");
+            }
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/permute.cpp b/inference-engine/thirdparty/clDNN/src/permute.cpp
index af3259748..38e684e97 100644
--- a/inference-engine/thirdparty/clDNN/src/permute.cpp
+++ b/inference-engine/thirdparty/clDNN/src/permute.cpp
@@ -31,42 +31,18 @@ primitive_type_id permute_type_id()
     return &instance;
 }
 
-static std::vector<uint16_t> get_permute_order(permute_node const& node, format::type fmt)
-{
-
-    CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "node format", fmt, "byxf, yxfb, bfyx, fyxb", format::byxf, format::yxfb, format::bfyx, format::fyxb);
-    switch (fmt)
-    {
-        // For input formats:
-        // 0 - batch (b), 1 - feature (f), 2, 3 - spatial (x -> 2, y -> 3)
-    case format::byxf:
-        return{ 0, 3, 2, 1 };
-
-    case format::yxfb:
-        return{ 3, 2, 1, 0 };
-
-    case format::bfyx:
-        return{ 0, 1, 3, 2 };
-
-    case format::fyxb:
-        return{ 1, 3, 2, 0 };
-
-    default:
-        throw std::invalid_argument("This format is not supported in GPU permute_inst");
-    }
-}
 
 layout permute_inst::calc_output_layout(permute_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for permute_node!");
     auto input_layout = node.input().get_output_layout();
     auto permute_order = node.get_primitive()->permute_order;
-    auto input_sizes_ordered = input_layout.size.sizes(input_layout.format);
-
-    const auto& fmt_2_bfxy = get_permute_order(node, input_layout.format);
     std::vector<tensor::value_type> output_sizes;
-    for (auto i : fmt_2_bfxy)
+
+    for (size_t x = 0; x < permute_order.size(); x++)
     {
-        output_sizes.push_back(input_sizes_ordered[permute_order[i]]);
+        output_sizes.push_back(input_layout.size.raw[permute_order[x]]);
     }
 
     auto input_size = tensor(output_sizes);
diff --git a/inference-engine/thirdparty/clDNN/src/pooling.cpp b/inference-engine/thirdparty/clDNN/src/pooling.cpp
index 6006d1400..18ecebcf8 100644
--- a/inference-engine/thirdparty/clDNN/src/pooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/pooling.cpp
@@ -30,6 +30,8 @@ primitive_type_id pooling_type_id()
 
 layout pooling_inst::calc_output_layout(parent::typed_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for pooling_node!");
     auto desc = node.get_primitive();
 
     auto input_layout = node.input().get_output_layout();
@@ -50,6 +52,11 @@ layout pooling_inst::calc_output_layout(parent::typed_node const& node)
         CLDNN_ERROR_NOT_PROPER_FORMAT(node.id(), "Input_layout.format", input_layout.format.value, "argmax_layout.format", argmax_layout.format);
     }
 
+    if (desc->global_pooling) {
+        window_size.spatial[0] = input_layout.size.spatial[0];
+        window_size.spatial[1] = input_layout.size.spatial[1];	
+    }
+
     // TODO: Consider moving general parameter verification to arguments constructor.
     CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "stride spatial X", stride.spatial[0], "", 0, "Stride spatial X must be positive (>= 1)");
     CLDNN_ERROR_LESS_OR_EQUAL_THAN(node.id(), "stride spatial Y", stride.spatial[1], "", 0, "Stride spatial Y must be positive (>= 1)");
diff --git a/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp b/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp
index 32c78618b..30ff83663 100644
--- a/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp
+++ b/inference-engine/thirdparty/clDNN/src/primitive_inst.cpp
@@ -22,6 +22,7 @@
 #include "input_layout_inst.h"
 #include "max_unpooling_inst.h"
 #include "apply_adam_inst.h"
+#include "fused_conv_eltwise_inst.h"
 
 #include "network_impl.h"
 #include "engine_impl.h"
@@ -40,11 +41,12 @@ uint32_t primitive_inst::get_network_id() const
 
 event_impl::ptr primitive_inst::execute(const std::vector<event_impl::ptr>& events)
 {
-    CLDNN_ERROR_BOOL(id(), "Invalid/unset input", !_has_valid_input, "Cannot execute primitive " + id() + " with invalid/unset input");
+    const auto primitive_id = id();
+    CLDNN_ERROR_BOOL(primitive_id, "Invalid/unset input", !_has_valid_input, "Cannot execute primitive " + primitive_id + " with invalid/unset input");
     on_execute();
 
     if (_exec_deps.size() == 0)
-       return _impl->execute(events, *this);      
+        return _impl->execute(events, *this);
 
     std::vector<event_impl::ptr> dependencies;
     dependencies.reserve(_exec_deps.size());
@@ -53,15 +55,15 @@ event_impl::ptr primitive_inst::execute(const std::vector<event_impl::ptr>& even
         auto id = input->id();
         try {
             // if the requested event deos not exits it means that it has not been executed, so the processing_order is wrong or synchronization failed.
-            auto ev = get_network().get_primitive_event(id); 
+            auto ev = get_network().get_primitive_event(id);
             dependencies.emplace_back(ev);
-        }
+            }
         catch (const std::out_of_range& oor) {
-            std::string temp = std::string("internal CLDNN error: execution order corrupted.") + std::string("\n") +  std::string(oor.what() + std::string("\n"));
+            std::string temp = std::string("internal CLDNN error: execution order corrupted.") + std::string("\n") + std::string(oor.what() + std::string("\n"));
             CLDNN_ERROR_MESSAGE(id, temp);
         }
     }
-    return _impl->execute(dependencies, *this);  
+    return _impl->execute(dependencies, *this);
 }
 
 void primitive_inst::build_deps()
@@ -95,6 +97,16 @@ primitive_inst::primitive_inst(network_impl& network, program_node const& node,
             //For certain primitives, it is known which dependency is used for synchronization only
             else if (user->is_type<apply_adam>() && (user->as<apply_adam>().has_additional_dep()) && (user->as<apply_adam>().additional_dep().id() == node.id()))
                 user_count--;
+            else if (user->is_type<fused_conv_eltwise>())
+            {
+                if ((*user->as<fused_conv_eltwise>().get_users().begin())->is_type<mutable_data>())
+                {
+                    if (user->as<fused_conv_eltwise>().get_dependency(1).id() == node.id())
+                    {
+                        user_count--;
+                    }
+                }
+            } 
         }
 
         if (user_count == 1 && mutable_data_count == 1)
@@ -119,15 +131,9 @@ memory_impl::ptr primitive_inst::allocate_output()
         return get_network().get_engine().allocate_memory(layout, _node.id(), get_network_id(), _node.get_memory_dependencies(), false);
     }
     else if (_network.is_internal() ||
-        _node.is_type<data>() ||
-        _node.is_type<mutable_data>() ||
-        _node.is_type<input_layout>() ||
-        //for max_unpooling initial zero values are significant
-        _node.is_type<max_unpooling>() ||
-        //apply adam's output initial val should be either 0 or use same buffer as mutable_data after it (no allocation needed)
-        _node.is_type<apply_adam>() ||
-        _node.can_be_optimized() ||
-        _node.is_output())
+             (!_node.can_share_buffer()) ||
+             _node.can_be_optimized() ||
+            _node.is_output())
     {
         return get_network().get_engine().allocate_memory(layout);
     }
diff --git a/inference-engine/thirdparty/clDNN/src/prior_box.cpp b/inference-engine/thirdparty/clDNN/src/prior_box.cpp
index 6f3678b8c..d4a53fb66 100644
--- a/inference-engine/thirdparty/clDNN/src/prior_box.cpp
+++ b/inference-engine/thirdparty/clDNN/src/prior_box.cpp
@@ -194,6 +194,8 @@ void prior_box_node::calc_result()
 
 layout prior_box_inst::calc_output_layout(prior_box_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for prior_box_node!");
     auto desc = node.get_primitive();
     auto input_layout = node.input().get_output_layout();
     assert(input_layout.size.spatial.size() == 2);
diff --git a/inference-engine/thirdparty/clDNN/src/program.cpp b/inference-engine/thirdparty/clDNN/src/program.cpp
index 005e8833d..731da08e9 100644
--- a/inference-engine/thirdparty/clDNN/src/program.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program.cpp
@@ -16,55 +16,38 @@
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
-#include "program_impl.h"
-#include "primitive_inst.h"
-#include "layout_optimizer.h"
-#include "constants_propagator.h"
-
-#include "primitive_type.h"
-#include "api/CPP/activation.hpp"
-#include "api/CPP/eltwise.hpp"
-#include "api/CPP/input_layout.hpp"
-#include "api/CPP/pooling.hpp"
-#include "api/CPP/proposal.hpp"
-#include "api/CPP/roi_pooling.hpp"
-#include "api/CPP/reorg_yolo.hpp"
-
-#include "activation_inst.h"
-#include "batch_norm_inst.h"
+#include "error_handler.h"
+#include "kernel_selector_helper.h"
 #include "internal_primitive.h"
 #include "internal_primitive_type_base.h"
+#include "layout_optimizer.h"
+#include "pass_manager.h"
+#include "primitive_type.h"
+#include "program_dump_graph.h"
+#include "program_helpers.h"
+#include "program_impl.h"
+#include "sliding_window_utils.h"
+
 #include "convolution_inst.h"
 #include "concatenation_inst.h"
 #include "crop_inst.h"
 #include "data_inst.h"
-#include "mutable_data_inst.h"
 #include "deconvolution_inst.h"
 #include "detection_output_inst.h"
-#include "lrn_inst.h"
-#include "normalize_inst.h"
-#include "permute_inst.h"
+#include "input_layout_inst.h"
+#include "lstm_inst.h"
+#include "lstm_elt_inst.h"
+#include "lstm_gemm_inst.h"
+#include "mutable_data_inst.h"
+#include "pooling_inst.h"
+#include "primitive_inst.h"
 #include "prior_box_inst.h"
+#include "proposal_inst.h"
 #include "reorder_inst.h"
 #include "reshape_inst.h"
-#include "scale_inst.h"
-#include "embed_inst.h"
-#include "softmax_inst.h"
 #include "split_inst.h"
-#include "program_dump_graph.h"
-#include "upsampling_inst.h"
-#include "eltwise_inst.h"
-#include "fully_connected_inst.h"
-#include "mvn_inst.h"
-#include "lstm_inst.h"
-#include "lstm_gemm_inst.h"
-#include "lstm_elt_inst.h"
-#include "embed_inst.h"
 
-#include "network_impl.h"
-#include "kernel_selector_helper.h"
-#include "sliding_window_utils.h"
-#include "error_handler.h"
+#include "gpu/ocl_toolkit.h"
 
 #include <fstream>
 #include <algorithm>
@@ -72,214 +55,55 @@
 #include <iostream>
 #include <sstream>
 #include <iomanip>
+#include <memory>
 
+program_impl::program_impl(engine_impl& engine_ref, topology_impl const& topology, build_options const& options, bool is_internal, bool no_optimizations)
+    : engine(&engine_ref), options(options), processing_order(* new nodes_ordering), pm(std::unique_ptr<pass_manager>(new pass_manager()))
+{
+    set_options();
+    prepare_nodes(topology);
+    if (no_optimizations)
+        init_graph();
+    else
+        build_program(is_internal);
+}
 
-namespace {
-
-    //helper function for selecting function basing on the type of the given primitive
-    //this is the termination case for parameter pack recurrence, see overload below for logic
-    template <class... T>
-    void do_for_types(program_node&)
-    {
-        return;
-    }
-
-    //helper function for selecting function basing on the type of the given primitive
-    //this function should be explicitly given set of types and implicitly set of functions.
-    //both sets should have equal size. First function will be called if type of the given primitive
-    //will match first explicitly given type, second will be called if it matches second explicitly given
-    //type etc.
-    //Functions given as arguments should themselves take std::shared_ptr<const T> as argument
-    //where T is the type that should be match if this function should be called
-    //
-    //example:
-    // do_for_types<
-    //      convolution,
-    //      pooling
-    //  >(primitive,
-    //      [](typed_program_node<convolution>&){ do something if 'primitive' is a convolution },
-    //      [](typed_program_node<pooling>&)    { do something if 'primitive' is a pooling }
-    //  );
-    template <class T, class... RestOfT, class Func, class... RestOfFuncs>
-    decltype(static_cast<void>(std::declval<Func>()(std::declval<typed_program_node<T>&>()))) do_for_types(
-        program_node& node,
-        Func const& func,
-        RestOfFuncs const&... rest)
-    {
-        if (node.type() == T::type_id())
-            func(node.as<T>());
-        else
-            do_for_types<RestOfT...>(node, rest...);
-    }
-
-    template <class T>
-    struct single_element_container
-    {
-        single_element_container(T& t) : elem(&t)
-        {}
-
-        constexpr size_t size() const { return 1; }
-        single_element_container<T> begin() const { return single_element_container(elem); }
-        single_element_container<T> end() const { return single_element_container(nullptr); }
-        single_element_container<T>& operator ++() { elem = nullptr; return *this; }
-        bool operator !=(single_element_container const& sec) { return elem != sec.elem; }
-
-        T operator *() { return *elem; }
-
-    private:
-        single_element_container(T* t) : elem(t)
-        {}
-
-        T* elem;
-    };
-
-    //helper function which creates single-element array if it's given anything
-    //other than std::vector.
-    //It should be used in generic code when there's a need to force vector usage
-    //in foreach loop over variable which can in one context be a vector or a scalar
-    //in another.
-    //example:
-    // T t;
-    // for (auto& string : wrap_if_single(t.dump()))
-    //depending on type T, t.dump() may return either std::string or std::vector<std::string>,
-    //to ensure compatibility between these cases, wrap_if_single will create single-element
-    //container in case t.dump() would return plain std::string.
-    //
-    // T& case -> returns container which holds T&
-    template <class T>
-    single_element_container<T> wrap_if_single(T& t)
-    {
-        return single_element_container<T>(t);
-    }
-
-    //helper function which creates single-element array if it's given anything
-    //other than std::vector.
-    // T const& case -> returns container which holds T const&
-    template <class T>
-    single_element_container<T const> wrap_if_single(T const& t)
-    {
-        return single_element_container<T const>(t);
-    }
-
-    //helper function which creates single-element array if it's given anything
-    //other than std::vector.
-    // T&& case -> returns container which holds new instance of T created by moving given param
-    template <class T>
-    single_element_container<T> wrap_if_single(T&& t)
-    {
-        static_assert(meta::always_false<T>::value, "Wrapping temporary object into single_element_container is an error (requires valid reference)");
-        return single_element_container<T>(t);
-    }
-
-    //helper function which creates single-element array if it's given anything
-    //other than std::vector.
-    // std::vector case -> does not wrap, returns t as-is
-    primitive::fixed_size_vector_ref const& wrap_if_single(primitive::fixed_size_vector_ref const& t)
-    {
-        return t;
-    }
-
-    //helper function for merging the weights/biases buffers on cpu side for depthwise separable convolution optimization
-    void merge_buffers(engine_impl::ptr engine, program_node &node, layout target_layout, size_t begin_offset, size_t end_offset)
-    {
-        memory_impl::ptr data_to_allocate = engine->allocate_memory(target_layout);
-
-        for (size_t i = begin_offset; i < end_offset; i++)
-        {
-            auto& weights = node.get_dependency(i).as<data>();
-            mem_lock<char> src{ weights.get_attached_memory() };
-            mem_lock<char> dst{ data_to_allocate };
-            std::copy(src.begin(), src.end(), dst.begin() + (i - begin_offset)*src.size());
-        }
-
-        for (size_t i = 0; i < end_offset - begin_offset - 1; i++)
-            node.remove_dependency(begin_offset + 1);
+program_impl::program_impl(engine_impl& engine_ref, std::set<std::shared_ptr<program_node>> const& nodes, build_options const& options, bool is_internal)
+    : engine(&engine_ref), options(options), processing_order(*new nodes_ordering), pm(std::unique_ptr<pass_manager>(new pass_manager()))
+{
+    set_options();
+    prepare_nodes(nodes);
+    build_program(is_internal);
+}
 
-        auto& data_node = node.get_dependency(begin_offset).as<data>();
-        data_node.attach_memory(*data_to_allocate, false);
-    }
+program_impl::~program_impl() = default;
 
-    //helper function for getting target layout used in depthwise sep optimization
-    layout get_weights_layout(typed_program_node<cldnn::data> &data_node, int32_t split)
+program_node& program_impl::get_node(primitive_id const& id)
+{
+    try
     {
-        auto mem_layout = data_node.get_output_layout();
-
-        return layout(mem_layout.data_type, mem_layout.format, { split * mem_layout.size.batch[0], mem_layout.size.feature[0], mem_layout.size.spatial[0], mem_layout.size.spatial[1] });
+        return *nodes_map.at(id);
     }
-
-    // pair.first tells whether l1 and l2 are absolutely identical
-    // pair.second tells whether l1 and l2 can be reinterpreted to each other without need of reordering
-    // note: layouts can only be considered identical if data size described by both layouts match (so no data are genereted nor dropped)
-    // note: if layouts describe two buffers with different size, consider them not to be identical even if smaller buffer can be considered to hold subsequence of larger buffer,
-    //       this behavior is required to force buffer allocation for smaller buffer which, currently, should always be performed
-    std::pair<bool, bool> are_layouts_identical(layout const& l1, layout const& l2)
+    catch (...)
     {
-        if (l1 == l2)
-            return{ true, true };
-        if (l1.data_type != l2.data_type)
-            return{ false, false };
-        if (l1.size != l2.size)
-            return{ false, false };
-        if (l1.get_linear_size() != l2.get_linear_size())
-            return{ false, false };
-        if ((l1.format == format::bf8_xy16 && l2.format != format::bf8_xy16) ||
-            (l2.format == format::bf8_xy16 && l1.format != format::bf8_xy16))
-            return{ false, false };
-
-        auto l1_pitch = l1.get_pitches();
-        auto l2_pitch = l2.get_pitches();
-
-        //ignore pitches which will never be used (for dims with size == 1)
-        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
-            if (l1.size.raw[i] == 1)
-                l1_pitch.raw[i] = 0;
-        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
-            if (l2.size.raw[i] == 1)
-                l2_pitch.raw[i] = 0;
-
-        auto l1_offset = l1.get_linear_offset();
-        auto l2_offset = l2.get_linear_offset();
-        if (l1_pitch == l2_pitch && l1_offset == l2_offset)
-            return{ false, true };
-
-        return{ false, false };
+        throw std::runtime_error("Program doesn't contain primtive node: " + id);
     }
 }
 
-program_impl::program_impl(engine_impl& engine_ref, topology_impl const& topology, build_options const& options, bool is_internal)
-    : engine(&engine_ref), options(options), output_size_handling_enabled(true)
+program_node const& program_impl::get_node(primitive_id const& id) const
 {
-    static std::atomic<uint32_t> id_gen{ 0 };
-    prog_id = ++id_gen;
-    assert(prog_id != 0);
-
-    if ((options.get<build_option_type::tuning_config>()->config.mode == tuning_mode::tuning_tune_and_cache) &&
-        !engine->configuration().enable_profiling)
+    try
     {
-        throw std::invalid_argument("Engine must be created with profiling enabled in tune_and_cache mode!");
+        return *nodes_map.at(id);
     }
-
-    init_graph(topology);
-    pre_optimize_graph();
-    compile_graph();
-    post_optimize_graph();
-
-    engine->compile_program(*this);
-    this->dump_program("13_finished", true);
-
-    //Makes serialization with given name.
-    //Placeholder, not working yet, in progress.
-    auto serialization_network_name = get_serialization_network_name(options);
-    if (!serialization_network_name.empty() && !is_internal)
+    catch (...)
     {
-        this->serialize(serialization_network_name);
+        throw std::runtime_error("Program doesn't contain primtive node: " + id);
     }
-
-    cleanup();
 }
 
 // TODO: Remove once we will get full support for input/output padding in all primitive implementations.
-void program_impl::analyze_output_size_handling_need()
+bool program_impl::analyze_output_size_handling_need()
 {
     bool handling_needed = false;
 
@@ -344,883 +168,362 @@ void program_impl::analyze_output_size_handling_need()
         }
     }
 
-    output_size_handling_enabled = handling_needed;
+    return handling_needed;
 }
 
-std::list<std::shared_ptr<program_node>> program_impl::get_nodes() const
+// create new nodes for a program based on the set of nodes
+// method created to be used by propagate_constants to build sub program from constant nodes 
+void program_impl::prepare_nodes(std::set<std::shared_ptr<program_node>>const &nodes)
 {
-    std::list<std::shared_ptr<program_node>> ret;
-
-    for (auto& node : processing_order)
-        ret.push_back(nodes_map.at(node->id()));
-    return ret;
-}
-
-void program_impl::init_graph(topology_impl const& topology)
-{
-    auto const& topo_map = topology.get_primitives();
-    for (auto const& prim : topo_map)
+    for (const auto& itr : nodes)
     {
-        auto& n = get_or_create(prim.second);
-        inputs.push_back(&n);
+        if (itr.get()->is_type<data>())
+        {
+            get_or_create(
+                std::make_shared<input_layout>(itr.get()->id(), itr.get()->as<data>().get_primitive()->mem.get_layout())
+            );
+        }
+        else
+        {
+            get_or_create(itr->desc);
+        }
     }
-    replace_nodes_pre();
-
-    for (auto itr = inputs.begin(); itr != inputs.end(); )
+    for (const auto& node : nodes_map)
     {
-        auto node_itr = itr++;
-        auto& node = (*node_itr);
-        auto deps = node->get_primitive()->dependencies();
-        if (deps.empty())
-            continue;
-
-        //add pointers to node's dependencies
-        for (auto& dep : deps)
+        auto node_ptr = node.second;
+        if (node_ptr == nullptr)
+            throw error("NULL pointer in nodes_map.", CLDNN_ERROR);
+        //ToDo: avoid O(n^2) run time here (pass map instead of set?)
+        bool found = false;
+        for (const auto& src_node : nodes)
         {
-            try {
-                auto dep_node = nodes_map.at(dep);
-                node->dependencies.push_back(dep_node.get());
-                dep_node->users.push_back(node);
-            }
-            catch (...) {
-                throw std::runtime_error("Program doesn't contain primitive: " + dep +
-                    " that is input to: " + node->get_primitive()->id);
+            if (src_node == nullptr)
+                throw error("NULL pointer in nodes_map.", CLDNN_ERROR);
+            if (node.first == src_node->get_primitive()->id)
+            {
+                copy_node_dependencies(node_ptr.get(), src_node.get());
+                found = true;
+                break;
             }
         }
-
-        //primitive has dependencies so remove it from 'inputs'
-        inputs.erase(node_itr);
+        if (!found)
+        {
+            add_node_dependencies(node_ptr.get());
+        }
+        if (node_ptr->dependencies.size() == 0)
+            inputs.push_back(node_ptr.get());
     }
-
-    replace_nodes_post();
-    handle_lstm();
-    set_outputs();
-    calc_processing_order();
-
-    dump_program("0_init", true);
-
-    calc_prior_boxes(); dump_program("1_calculated_prior_boxes", true);
-    mark_constants();
-    mark_data_flow();
-    dump_program("2_analyzed_graph", true);
 }
 
-void program_impl::pre_optimize_graph()
+// create all nodes from topology primitives, add dependencies among them and create inputs list
+void program_impl::prepare_nodes(topology_impl const &topology)
 {
-    trim_to_outputs(); dump_program("3_trimmed", true);
-    calculate_BFS_processing_order();
-    analyze_output_size_handling_need();
-    for (auto& node : processing_order)
-    {
-        if (!node->is_type<internal_primitive>() && !node->is_type<data>())
-            node->get_output_layout();
-    }
-
-    if (options.get<build_option_type::optimize_data>()->enabled())
+    auto const& topo_map = topology.get_primitives();
+    for (const auto& prim : topo_map)
     {
-        prepare_primitive_fusing();
-        layout_optimizer lo(output_size_handling_enabled);
-        reorder_inputs(lo);
-        // this code should move to post compilation after kernel selector will support handling reorder bias
-        pre_optimize_bias(lo);
-        dump_program("4_reordered_inputs", true);
+        get_or_create(prim.second);
     }
-
-    handle_reshape();
-    remove_redundant_reorders(); dump_program("5_removed_redundant_reorders", true);
-    prepare_padding();
-    prepare_depthwise_sep_opt();
-
-    propagate_constants(); dump_program("6_propagated_constants", true);
-
-    //try to fuse buffers (i.e. depth_concat in bfyx format) after padding calculations
-    if (options.get<build_option_type::optimize_data>()->enabled())
+    add_split_outputs();
+    for (const auto& node : nodes_map)
     {
-        prepare_buffer_fusing();
+        auto node_ptr = node.second.get();
+        if (node_ptr == nullptr)
+            throw error("NULL pointer in nodes_map.", CLDNN_ERROR);
+        add_node_dependencies(node_ptr);
+        if (node_ptr->dependencies.size()==0)
+        {
+            inputs.push_back(node_ptr);
+        }
     }
-
-    dump_program("7_pre_optimized", true);
 }
 
-void program_impl::compile_graph()
+// add node's dependecies from its primitive dependencies
+void program_impl::add_node_dependencies(program_node* node)
 {
-    for (auto& node : processing_order)
+    auto deps = node->get_primitive()->dependencies();
+    //add pointers to node's dependencies
+    for (auto& dep : deps)
     {
-        if (!node->is_type<internal_primitive>() && !node->is_type<data>())
-        {
-            node->get_output_layout();
-            if (!node->is_type<data>() && !(node->is_type<mutable_data>() && node->get_dependencies().empty()))
-                node->selected_impl = node->type()->choose_impl(*engine, *node);
+        try {
+            auto dep_node = nodes_map.at(dep);
+            node->dependencies.push_back(dep_node.get());
+            dep_node->users.push_back(node);
+        }
+        catch (...) {
+            throw std::runtime_error("Program doesn't contain primitive: " + dep +
+                " that is input to: " + node->get_primitive()->id);
         }
     }
-
-    dump_program("8_compiled", true);
 }
 
-void program_impl::post_optimize_graph()
+/* helper method for program_impl constructor from list of nodes which
+   copies src_node dependecies to the destination node dest_node dependencies.
+   But only to those which appaer in this program implementation nodes_map */
+void program_impl::copy_node_dependencies(program_node* dest_node, program_node* src_node)
 {
-    layout_optimizer lo;
-    post_optimize_weights(lo); dump_program("9_reordered_weights", true);
-    remove_redundant_reorders(); dump_program("10_removed_redundant_reorders", true); //TODO: do we need it at this place also?
-    propagate_constants(); dump_program("11_propagated_constants", true);
-    prep_opt_depthwise_sep_post();
-    update_processing_numbers(); dump_program("12_validated_processing_order", true);
-    prepare_memory_dependencies();
+    if (dest_node->get_primitive()->id != src_node->get_primitive()->id)
+    {
+        throw std::runtime_error("Node " + src_node->get_primitive()->id +  " and its copy " + dest_node->get_primitive()->id + " do not match.");
+    }
+    auto src_deps = src_node->get_dependencies();
+    //add pointers to node's dependencies
+    for (auto& src_dep : src_deps)
+    {
+        // do not copy dependencies to nodes which does not belong to the new (subgraph) topology
+        if (nodes_map.find(src_dep->get_primitive()->id) == nodes_map.end()) continue;
+
+        try {
+            auto dest_dep = nodes_map.at(src_dep->get_primitive()->id);
+            dest_node->dependencies.push_back(dest_dep.get());
+            dest_dep->users.push_back(dest_node);
+        }
+        catch (...) {
+            throw std::runtime_error("Program doesn't contain primitive: " + src_dep->get_primitive()->id +
+                " that is input to: " + src_node->get_primitive()->id);
+        }
+    }
 }
 
-void program_impl::cleanup()
+void program_impl::set_options()
 {
-    for (auto& node : processing_order)
-        if (!node->is_type<internal_primitive>())
-            node->get_output_layout();
+    static std::atomic<uint32_t> id_gen{ 0 };
+    prog_id = ++id_gen;
+    assert(prog_id != 0);
 
-    //in debug build, at the end, mark all nodes as outputs so user can query for buffers of all not-optimized nodes, including internal ones etc.
-    if (is_debug_build())
+    if ((options.get<build_option_type::tuning_config>()->config.mode == tuning_mode::tuning_tune_and_cache) &&
+        !engine->configuration().enable_profiling)
     {
-        for (auto& node : processing_order)
-        {
-            if (!node->is_output())
-            {
-                node->set_output(true);
-                outputs.push_back(node);
-            }
-        }
+        throw std::invalid_argument("Engine must be created with profiling enabled in tune_and_cache mode!");
     }
 }
 
-std::string get_id_string(size_t i) {
-    std::stringstream ss;
-    ss << std::setw(5) << std::setfill('0') << i;
-    return ss.str();
-}
-
-void program_impl::replace_nodes_pre()
+void program_impl::build_program(bool is_internal)
 {
-    auto itr = nodes_map.begin();
-    while (itr != nodes_map.end())
+    init_graph();
     {
-        auto node_itr = itr++;
-        auto& node = (*node_itr).second;
-
-        //find split primitives and create crop primitives out of them
-        if (node->is_type<split>())
-        {
-            auto split_prim = node->as<split>().typed_desc();
-            primitive_id input_id = split_prim->input[0];
-            auto split_num = split_prim->output_offsets.size();
-
-            //create crop for each split ouptut provided
-            for (decltype(split_num) i = 0; i < split_num; i++)
-            {
-                primitive_id output_id = node->id() + ":" + split_prim->output_ids[i];
-
-                //create dummy crop primitive and add it to nodes map
-                auto crop_prim = std::make_shared<crop>(output_id, input_id, tensor{ 1,1,1,1 }, split_prim->output_offsets[i]);
-                get_or_create(crop_prim);
-            }
-        }
+        pre_optimize_graph(is_internal);
+    }
+    run_graph_compilation();
+    {
+        post_optimize_graph(is_internal);
     }
+    engine->compile_program(*this);
+    this->dump_program("finished", true);
+    cleanup();
 }
 
-
-void program_impl::replace_nodes_post()
+void program_impl::init_graph()
 {
-    auto itr = nodes_map.begin(); //note we need to use iterators since currently processed element can be removed
-    while (itr != nodes_map.end())
-    {
-        auto node_itr = itr++;
-        auto& node = (*node_itr).second;
-
-        //find split primitives and create crop primitives out of them
-        if (node->is_type<split>())
-        {
-            //check if split is not used by any primitive, as it will be optimized
-            if (node->get_users().size() != 0)
-                throw std::logic_error("Split layer cannot be used directly! Please use split output \"" + node->id() + ":<split_output_id>\"!");
-
-            //get_output size and validate split primitive inputs
-            auto output_layout = node->get_output_layout();
-            auto output_layout_size = output_layout.size;
-
-            auto split_prim = node->as<split>().typed_desc();
-            primitive_id input_id = split_prim->input[0];
-            auto split_num = split_prim->output_offsets.size();
-
-            //create crop for each split ouptut provided
-            for (decltype(split_num) i = 0; i < split_num; i++)
-            {
-                primitive_id output_id = node->id() + ":" + split_prim->output_ids[i];
-
-                auto node_ptr = nodes_map.find(output_id)->second;
-
-                //calculate crop reference input size
-                tensor reference_input_size;
-
-                for (decltype(split_num) j = 0; j < i; j++)
-                    reference_input_size += split_prim->output_offsets[j + 1] - split_prim->output_offsets[j];
-
-                for (decltype(split_num) j = i; j < split_num - 1; j++)
-                    reference_input_size += split_prim->output_offsets[j + 1] - split_prim->output_offsets[j];
-
-                reference_input_size = output_layout_size - reference_input_size;
-
-                //update crop primitive and add connections
-                node_ptr->set_output_padding(output_layout.data_padding);
-                auto crop_prim = node_ptr->as<crop>().typed_desc();
-                crop_prim->reference_input = reference_input_size;
-
-                add_connection(node->get_dependency(0), *node_ptr);
-            }
-
-            //remove input->split connection and remove original split node
-            remove_connection(node->get_dependency(0), *node);
-            optimized_out.push_back(node->id());
-            nodes_map.erase(node->id());
-            continue;
-        }
-
-        //find upsampling primitives with bilinear filtering and create deconvolution with proper weights instead
-        if (node->is_type<upsampling>())
-        {
-            auto upsampling_prim = node->as<upsampling>().typed_desc();
-
-            if (upsampling_prim->sample_type != upsampling_sample_type::bilinear)
-                continue;
-
-            //check if num_filter is not 0 (required for bilinear upsampling)
-            if (upsampling_prim->num_filter == 0)
-                throw std::logic_error("num_filter in upsampling cannot be 0 in bilinear filtering mode in \"" + node->id() + "\"!");
-
-            primitive_id upsampling_id = node->id();
-            auto& input_node = node->get_dependency(0);
-
-            primitive_id input_id = upsampling_prim->input[0];
-            auto num_filter = upsampling_prim->num_filter;
-
-            //setting deconvolution parameters based on upsampling input
-            auto scale = static_cast<tensor::value_type>(upsampling_prim->scale);
-            tensor stride(1, 1, scale, scale);
-            auto offset = static_cast<tensor::value_type>(std::ceil((scale - 1) / 2.f));
-            tensor input_offset(0, 0, -offset, -offset);
-
-            //setting weights for deconvolution
-            auto kernel_size = static_cast<tensor::value_type>((2 * scale) - (scale % 2));
-            layout weights_layout(data_types::f32, format::bfyx, tensor(1, 1, kernel_size, kernel_size));
-
-            std::vector<primitive_id> weights_vec;
-            for (uint32_t weights_idx = 0; weights_idx < num_filter; weights_idx++)
-            {
-                memory_impl::ptr data_to_allocate = engine->allocate_memory(weights_layout);
-                mem_lock<float> dst{ data_to_allocate };
-                float *dst_data = dst.data();
-                //initialize with bilinear weights data
-                auto f = static_cast<uint32_t>(std::ceil(kernel_size / 2.0f));
-                float c = (2 * f - 1 - f % 2) / (2.f * f);
-                float x = 0.f;
-                float y = 0.f;
-                for (size_t i = 0; i < weights_layout.count(); ++i) {
-                    x = static_cast<float>(i % kernel_size);
-                    y = static_cast<float>((i / kernel_size) % kernel_size);
-                    dst_data[i] = (1 - std::abs(x / f - c)) * (1 - std::abs(y / f - c));
-                }
-
-                //create weights primitive, with dummy memory which will be replaced in firther step
-                primitive_id weights_id = upsampling_id + "_deconvolution_weights" + std::to_string(weights_idx);
-                layout dummy_layout(data_types::f32, format::bfyx, tensor(1, 1, 1, 1));
-                float zero = 0.f;
-                auto weights_prim = std::make_shared<data>(weights_id, memory::attach(dummy_layout, &zero, 1));
-                get_or_create(weights_prim);
-
-                weights_vec.push_back(weights_id);
-
-                auto weights_node_ptr = nodes_map.find(weights_id)->second;
-
-                //attach weights buffer
-                auto& data_node = weights_node_ptr->as<data>();
-                data_node.attach_memory(*data_to_allocate, false);
-            }
-
-            //remove upsampling node, rename it and move to the optimized list
-            remove_connection(node->get_dependency(0), *node);
-            auto rename_id = upsampling_id + "_tmp";
-            rename(*node, rename_id);
+    graph_initializations graph_initializations_pass;
+    pm->run(*this, graph_initializations_pass);
+
+    calculate_prior_boxes calculate_prior_boxes_pass;
+    pm->run(*this, calculate_prior_boxes_pass);
+    
+    mark_nodes mark_nodes_pass;
+    pm->run(*this, mark_nodes_pass);
+}
 
-            //create deconvolution primitive
-            auto deconv_prim = std::make_shared<deconvolution>(upsampling_id, input_id, weights_vec, stride, input_offset);
-            get_or_create(deconv_prim);
+void program_impl::run_graph_compilation() {
+    compile_graph compile_graph_pass;
+    pm->run(*this, compile_graph_pass);
+}
 
-            auto deconv_node_ptr = nodes_map.find(upsampling_id)->second;
+void program_impl::pre_optimize_graph(bool is_internal)
+{
+    trim_to_outputs trim_pass; //trim to outputs
+    pm->run(*this, trim_pass); // ToDo remove hidden dependencies from trimm pass
 
-            auto upsampling_node_ptr = nodes_map.find(rename_id)->second;
-            replace_all_usages(*upsampling_node_ptr, *deconv_node_ptr);
-            optimized_out.push_back(rename_id);
-            nodes_map.erase(rename_id);
+    handle_input_padding handle_input_padding; // handle symmetric and asymmetric padding for input
+    pm->run(*this, handle_input_padding);
 
-            //add connections input->deconvolution and weights->deconvolution
-            add_connection(input_node, *deconv_node_ptr);
+    add_reshape_to_primitives add_reshape_to_primitives_pass; // add reshape to input/parameters for some primitives
+    pm->run(*this, add_reshape_to_primitives_pass);
 
-            for (uint32_t weights_idx = 0; weights_idx < num_filter; weights_idx++)
-            {
-                auto weights_node_ptr = nodes_map.find(weights_vec[weights_idx])->second;
-                add_connection(*weights_node_ptr, *deconv_node_ptr);
-            }
-            continue;
-        }
+    processing_order.calculate_BFS_processing_order(); // this method makes sense only for OOOQ (out of order execution queue)
 
-        //find deconvolution primitives with stride 1 and change them to convolution with trasposed weights
-        if (node->is_type<deconvolution>())
-        {
-            if (!options.get<build_option_type::optimize_data>()->enabled())
-                continue;
+    bool output_size_handling_enabled = analyze_output_size_handling_need();
+    for (auto& node : processing_order)
+    {
+        if (!node->is_type<internal_primitive>() && !node->is_type<data>())
+            node->get_output_layout();
+    }
 
-            auto deconv_prim = node->as<deconvolution>().typed_desc();
+    if (options.get<build_option_type::optimize_data>()->enabled())
+    {
+        prepare_primitive_fusing prepare_primitive_fusing_pass;
+        pm->run(*this, prepare_primitive_fusing_pass);
 
-            //limit optimization to stride = 1
-            if (deconv_prim->stride.spatial[0] != 1 || deconv_prim->stride.spatial[1] != 1 || deconv_prim->gradient())
-                continue;
+        layout_optimizer lo(output_size_handling_enabled);
+        reorder_inputs reorder_inputs_pass(lo);
+        pm->run(*this, reorder_inputs_pass);
 
-            primitive_id deconv_id = node->id();
-            auto& input_node = node->get_dependency(0);
+        // this code should be moved to post compilation after kernel selector will support handling reorder bias
+        pre_optimize_bias pre_optimize_bias_pass(lo);
+        pm->run(*this, pre_optimize_bias_pass);
 
-            primitive_id input_id = deconv_prim->input[0];
-
-            //setting convolution parameters based on deconvolution params
-            auto stride = deconv_prim->stride;
-            auto weights = deconv_prim->weights;
-            std::vector<primitive_id> weights_vec;
-            for (auto& weights_id : weights)
-                weights_vec.push_back(weights_id);
-            auto biases = deconv_prim->bias;
-            std::vector<primitive_id> bias_vec;
-            for (auto& bias_id : biases)
-                bias_vec.push_back(bias_id);
-            auto input_offset = deconv_prim->input_offset;
-            auto with_activation = deconv_prim->with_activation;
-            auto activation_negative_slope = deconv_prim->activation_negative_slope;
-            auto output_padding = deconv_prim->output_padding;
-
-            //remove deconvolution node and its connections to weights and biases, rename it and move to the optimized list
-            tensor filter_size = { 1, 1, 1, 1 };
-            remove_connection(node->get_dependency(0), *node);
-            for (auto& weights_id : weights_vec)
-            {
-                auto weights_node_ptr = nodes_map.find(weights_id)->second;
-                remove_connection(*weights_node_ptr, *node);
-                //get filter spatial sizes for input offset adjustment, perform this only once as all filters shouls have same size
-                if (weights_id == weights_vec[0])
-                    filter_size = weights_node_ptr->get_output_layout().size;
-            }
+        // passes regarding conv + eltwise optimizations
 
-            input_offset.spatial[0] = std::abs(input_offset.spatial[0]) - (filter_size.spatial[0] - 1);
-            input_offset.spatial[1] = std::abs(input_offset.spatial[1]) - (filter_size.spatial[1] - 1);
+        // shrinking eltwise if users are conv 1x1 with stride > 1 optimization
+        eltwise_shrinking eltwise_shrinking_pass;
+        pm->run(*this, eltwise_shrinking_pass);
 
-            if (!bias_vec.empty())
-            {
-                for (auto& bias_id : bias_vec)
-                {
-                    auto bias_id_node_ptr = nodes_map.find(bias_id)->second;
-                    remove_connection(*bias_id_node_ptr, *node);
-                }
-            }
-            auto rename_id = deconv_id + "_tmp";
-            rename(*node, rename_id);
+        // trying to set stride to 1x1 by shrinking convolutions before eltwise if doable
+        eltwise_remove_stride eltwise_remove_stride_pass;
+        pm->run(*this, eltwise_remove_stride_pass);
 
-            //create convolution primitive
-            if (biases.size() != 0)
-            {
-                auto conv_prim = std::make_shared<convolution>(deconv_id, input_id, weights_vec, bias_vec,
-                    stride, input_offset, tensor{ 1, 1, 1, 1 }, with_activation, activation_negative_slope, output_padding);
-                get_or_create(conv_prim);
-            }
-            else
-            {
-                auto conv_prim = std::make_shared<convolution>(deconv_id, input_id, weights_vec,
-                    stride, input_offset, tensor{ 1, 1, 1, 1 }, with_activation, activation_negative_slope, output_padding);
-                get_or_create(conv_prim);
-            }
+        prepare_conv_eltw_fusing prepare_conv_eltw_fusing_pass;
+        pm->run(*this, prepare_conv_eltw_fusing_pass);
 
-            auto conv_node_ptr = nodes_map.find(deconv_id)->second;
-            auto conv_node = &conv_node_ptr->as<convolution>();
-            conv_node->set_transposed(true);
+        prepare_conv_eltw_read_write_opt prepare_conv_eltw_read_write_opt_pass;
+        pm->run(*this, prepare_conv_eltw_read_write_opt_pass);
+    }
 
-            //add connections input->convolution, weights->convolution and bias->convolution
-            add_connection(input_node, *conv_node_ptr);
+    handle_reshape();
 
-            for (auto& weights_id : weights_vec)
-            {
-                auto weights_node_ptr = nodes_map.find(weights_id)->second;
-                add_connection(*weights_node_ptr, *conv_node_ptr);
-            }
+    remove_redundant_reorders remove_redundant_reorders_pass;
+    pm->run(*this, remove_redundant_reorders_pass);
 
-            if (!bias_vec.empty())
-            {
-                for (auto& bias_id : bias_vec)
-                {
-                    auto bias_id_node_ptr = nodes_map.find(bias_id)->second;
-                    add_connection(*bias_id_node_ptr, *conv_node_ptr);
-                }
-            }
+    prepare_padding prepare_padding_pass(output_size_handling_enabled);
+    pm->run(*this, prepare_padding_pass);
 
-            auto deconv_node_ptr = nodes_map.find(rename_id)->second;
-            replace_all_usages(*deconv_node_ptr, *conv_node_ptr);
-            optimized_out.push_back(rename_id);
-            nodes_map.erase(rename_id);
+    prepare_depthwise_sep_opt prepare_depthwise_sep_opt_pass;
+    pm->run(*this, prepare_depthwise_sep_opt_pass);
 
-            continue;
-        }
+    if (!is_internal)
+    {
+        propagate_constants propagate_constants_pass;  // ToDo remove hidden dependencies from propagate_constants pass
+        pm->run(*this, propagate_constants_pass);
     }
-}
 
-void program_impl::handle_lstm()
-{
-    bool has_lstm_children;
-    auto itr = nodes_map.begin(); //note we need to use iterators since currently processed element can be removed
-    while (itr != nodes_map.end())
+    //try to fuse buffers (i.e. depth_concat in bfyx format) after padding calculations
+    if (options.get<build_option_type::optimize_data>()->enabled())
     {
-        auto node_itr = itr++;
-        auto& node = (*node_itr).second;
-        has_lstm_children = false;
-        // replace lstm node with lstm_gemm and lstm_elt nodes
-        if (node->is_type<lstm>()) {
-            bool initial_hidden_term = node->as<lstm>().initial_hidden_term();
-            bool initial_cell_term = node->as<lstm>().initial_cell_term();
-            bool bias_term = node->as<lstm>().bias_term();
-            auto lstm_prim = node->as<lstm>().typed_desc();
-            primitive_id weights_id = lstm_prim->weights;
-            primitive_id recurrent_id = lstm_prim->recurrent;
-            primitive_id bias_id = bias_term ? lstm_prim->bias : "";
-            primitive_id initial_hidden_id = initial_hidden_term ? lstm_prim->initial_hidden : "";
-            primitive_id initial_cell_id = initial_cell_term ? lstm_prim->initial_cell : "";
-            //removing connection with weights to get proper dependency order for next operations
-            remove_connection(*nodes_map.at(weights_id), *node);
-            remove_connection(*nodes_map.at(recurrent_id), *node);
-            if (bias_term)
-                remove_connection(*nodes_map.at(bias_id), *node);
-            if (initial_hidden_term)
-                remove_connection(*nodes_map.at(initial_hidden_id), *node);
-            if (initial_cell_term)
-                remove_connection(*nodes_map.at(initial_cell_id), *node);
-
-            //calculating sizes
-            auto input_size = node->get_dependency(0).get_output_layout().size;
-            auto recurrent_size = nodes_map.at(recurrent_id)->get_output_layout().size;
-            auto hidden_size = tensor(input_size.batch[0], 1, recurrent_size.spatial[0], input_size.feature[0]);
-            size_t directions = recurrent_size.feature[0];
-            size_t input_dependencies = node->get_dependencies().size();
-            size_t sequence_len = node->as<lstm>().sequence_len();
-
-            //if the sequence has a single element but it has multiple inputs then
-            //the parent of this lstm is an lstm node. If this is a bidirectional lstm
-            //then the sequence length is the number of dependencies divided by 2.
-            if (sequence_len == 1 && input_dependencies > 1)
-                sequence_len = (directions == 1) ? input_dependencies : input_dependencies / 2;
-
-            //check if this lstm node has an lstm child
-            for (auto& user : node->get_users())
-            {
-                if (user->is_type<lstm>())
-                {
-                    has_lstm_children = true;
-                }
-            }
-
-            std::vector<program_node*> cell_list(directions * sequence_len);
-            std::vector<program_node*> concat_depends(directions * sequence_len);
-            std::vector<primitive_id> output_ids_offsets(directions * sequence_len);
-
-            primitive_id hidden_fwd_id = initial_hidden_id;
-            primitive_id hidden_bwd_id = initial_hidden_id;
-            primitive_id cell_fwd_id = initial_cell_id;
-            primitive_id cell_bwd_id = initial_cell_id;
-
-            auto split_direction = [&](const std::string gate, bool initial_term, primitive_id& fwd_id, primitive_id& bwd_id) {
-                if (initial_term) {
-                    primitive_id initial_id = fwd_id;
-                    fwd_id = node->id() + ":" + gate + "_fwd";
-                    auto fwd_node = std::make_shared<crop>(fwd_id, initial_id, hidden_size, tensor{ 0,0,0,0 });
-                    auto &n1 = get_or_create(fwd_node);
-                    add_connection(*nodes_map.at(initial_id), n1);
-                    bwd_id = node->id() + ":" + gate + "_bwd";
-                    auto bwd_node = std::make_shared<crop>(bwd_id, initial_id, hidden_size, tensor{ 0,1,0,0 });
-                    auto &n2 = get_or_create(bwd_node);
-                    add_connection(*nodes_map.at(initial_id), n2);
-                }
-            };
-
-            //if bidirectional lstm then initial_hidden and initial_cell terms need to be split
-            if (directions > 1) {
-                split_direction("hidden", initial_hidden_term, hidden_fwd_id, hidden_bwd_id);
-                split_direction("cell", initial_cell_term, cell_fwd_id, cell_bwd_id);
-            }
-
-            //lstm expanding
-            for (size_t dir = 0; dir < directions; ++dir) {
-                auto hidden_id = dir == 0 ? hidden_fwd_id : hidden_bwd_id;
-                auto cell_id = dir == 0 ? cell_fwd_id : cell_bwd_id;
-                for (size_t i = 0; i < sequence_len; ++i) {
-                    size_t idx = i + dir * sequence_len;
-                    primitive_id lstm_gemm_id = node->id() + ":lstm_gemm" + get_id_string(idx);
-                    primitive_id lstm_elt_id = node->id() + ":lstm_elt" + get_id_string(idx);
-                    primitive_id crop_id = node->id() + ":crop" + get_id_string(idx);
-
-                    size_t input_idx = i;
-                    //for bidirectional lstms, if first LSTM layer then reverse input
-                    //for subsequent stacked layers the input is strided on the dir dimension
-                    if (directions > 0) {
-                        if (input_dependencies > sequence_len) { // stacked layer
-                            input_idx = dir * sequence_len + i;
-                        }
-                        else
-                        {
-                            if (dir > 0) { // first layer
-                                input_idx = sequence_len - i - 1;
-                            }
-                        }
-                    }
-                    primitive_id lstm_gemm_input_id = node->get_dependency(input_idx).get_org_primitive_id();
-
-                    auto lstm_gemm_node = std::make_shared<lstm_gemm>(lstm_gemm_id, lstm_gemm_input_id, weights_id, recurrent_id, bias_id, hidden_id, (uint32_t)dir);
-                    auto &n1 = get_or_create(lstm_gemm_node);
-
-                    auto lstm_elt_node = std::make_shared<lstm_elt>(lstm_elt_id, lstm_gemm_id, cell_id, lstm_prim->clip, lstm_prim->input_forget,
-                        lstm_prim->activations, lstm_prim->activation_params, lstm_prim->offset_order);
-                    auto &n2 = get_or_create(lstm_elt_node);
-                    //adding lstm_elt as user
-                    add_connection(n1, n2);
-                    //adding dependecy to lstm_gemm node
-                    //input
-                    add_connection(node->get_dependency(input_idx), n1);
-                    //adding weights and initial values to lstm_gemm
-                    add_connection(*nodes_map.at(weights_id), n1);
-                    add_connection(*nodes_map.at(recurrent_id), n1);
-                    if (bias_term)
-                        add_connection(*nodes_map.at(bias_id), n1);
-
-                    //adding cell and hiddens as dependencies
-                    if (i > 0)
-                    {
-                        add_connection(*cell_list[size_t(i - 1) * directions + dir], n2);
-                        add_connection(*(concat_depends[size_t(i - 1) * directions + dir]), n1);
-                    }
-                    //if initial values are present
-                    else
-                    {
-                        if (initial_hidden_term)
-                            add_connection(*nodes_map.at(hidden_id), n1);
-                        if (initial_cell_term)
-                            add_connection(*nodes_map.at(cell_id), n2);
-                    }
-
-                    //lstm_hidden
-                    hidden_id = crop_id + ":hidden";
-                    auto crop_hidden = std::make_shared<crop>(hidden_id, lstm_elt_id, hidden_size, tensor{ 0,0,0,0 });
-                    auto &n3 = get_or_create(crop_hidden);
-                    //adding eltwise as dependency to hidden
-                    add_connection(n2, n3);
-
-                    //if parent is lstm adding hiddens as dependency
-                    if (has_lstm_children)
-                    {
-                        for (auto& user : node->get_users())
-                        {
-                            add_connection(n3, *user);
-                        }
-                    }
-                    concat_depends[i * directions + dir] = &n3;
-
-                    //lstm_cell
-                    if (i < sequence_len - 1) {
-                        cell_id = crop_id + ":cell";
-                        auto crop_cell = std::make_shared<crop>(cell_id, lstm_elt_id, hidden_size, tensor{ 0,1,0,0 });
-                        auto &n4 = get_or_create(crop_cell);
-                        add_connection(n2, n4);
-                        cell_list[i * directions + dir] = &n4;
-                    }
-                    output_ids_offsets[i * directions + dir] = hidden_id;
-                }
-            }
-
-            //if there is no next lstm, concatenation is created
-            if (!has_lstm_children)
-            {
-                primitive_id original_id = node->id();
-                primitive_id concatenation_id = original_id + ":concat";
-                auto concatenation_primitive = std::make_shared<concatenation>(concatenation_id, output_ids_offsets, concatenation::along_f);
-                auto &concatenation_node = get_or_create(concatenation_primitive);
-                for (auto sub_dependency : concat_depends)
-                {
-                    add_connection(*sub_dependency, concatenation_node);
-                }
-                if (directions == 2) {
-                    // bidirectional support requires concatenations along the direction and sequence axis
-                    // instead we can concatenate along the sequence axis and reshape the tensor to the account
-                    // for the direction
-                    tensor output_size {input_size.batch[0], (int32_t)sequence_len, hidden_size.spatial[0], (int32_t)directions};
-                    primitive_id reshape_id = original_id + ":reshape";
-                    auto reshape_primitive = std::make_shared<reshape>(reshape_id, concatenation_id, output_size);
-                    auto &reshape_node = get_or_create(reshape_primitive);
-                    add_connection(concatenation_node, reshape_node);
-                    for (auto& user : node->get_users())
-                    {
-                        add_connection(reshape_node, *user);
-                    }
-                }
-            }
-
-            //removing expanded node
-            remove_all_connections(*node);
-            nodes_map.erase(node->id());
-            continue;
-        }
+        prepare_buffer_fusing prepare_buffer_fusing_pass;
+        pm->run(*this, prepare_buffer_fusing_pass);
     }
 
+    //check if there exists some layout incompatibilities and add an reorder node if required
+    add_required_reorders add_required_reorders_pass;
+    pm->run(*this, add_required_reorders_pass);
 }
 
-void program_impl::set_outputs()
+void program_impl::post_optimize_graph(bool is_internal)
 {
-    auto outputs_option = options.get<build_option_type::outputs>();
-    if (!outputs_option->outputs.empty())
-    {
-        for (auto const& output : outputs_option->outputs)
-        {
-            auto o_node = nodes_map.at(output);
-            o_node->set_output(true);
-            outputs.push_back(o_node.get());
-        }
-    }
-    else
-    {
-        for (auto& node : nodes_map)
-            if (node.second->is_endpoint())
-            {
-                node.second->set_output(true);
-                outputs.push_back(node.second.get());
-            }
-    }
-}
+    layout_optimizer lo;
+    post_optimize_weights post_optimize_weights_pass(lo);
+    pm->run(*this, post_optimize_weights_pass);
 
-void program_impl::calc_processing_order()
-{
-    processing_order.clear();
+    remove_redundant_reorders remove_redundant_reorders_pass;
+    pm->run(*this, remove_redundant_reorders_pass); //TODO: do we need it at this place also?
 
-    //run dfs to sort nodes topologically
-    for (auto input : inputs)
+    if (!is_internal)
     {
-        if (input->is_marked())
-            continue;
-
-        input->mark();
-        std::list<std::pair<program_node*, std::list<program_node*>::const_iterator>> stack = { std::make_pair(input, input->users.begin()) };
-
-        while (!stack.empty()) //imitate call stack
-        {
-        new_frame:
-            auto& frame = stack.back();
-
-            while (frame.second != frame.first->users.end())
-            {
-                auto successor = *frame.second;
-                ++frame.second;
-
-                if (!successor->is_marked())
-                {
-                    successor->mark();
-
-                    //recurrence call
-                    stack.push_back(std::make_pair(successor, successor->users.begin()));
-                    goto new_frame;
-                }
-            }
-
-            //we have finished processing one node so add it to the processing queue
-            processing_order.push_front(frame.first);
-            frame.first->processing_itr = processing_order.begin();
-
-            //return from call
-            stack.pop_back();
-        }
+        propagate_constants propagate_constants_pass;  // ToDo remove hidden dependencies from propagate_constants pass
+        pm->run(*this, propagate_constants_pass);
     }
 
-    uint32_t idx = 0;
-    for (auto& node : processing_order)
-    {
-        node->processing_num = ++idx;
-        node->unmark();
-    }
+    prep_opt_depthwise_sep_post prep_opt_depthwise_sep_post_pass;
+    pm->run(*this, prep_opt_depthwise_sep_post_pass);
+   
+    prepare_memory_dependencies();
 }
 
-void program_impl::update_processing_numbers()
+// mark if the node is constant assuming that all dependencies are marked properly
+void program_impl::mark_if_constant(program_node& node) 
 {
-    uint32_t idx = 0;
-    for (auto& node : processing_order)
-    {
-        node->processing_num = ++idx;
-    }
-
-    for (auto& node : processing_order)
+    if (node.get_dependencies().empty())
+        return;
+    if (node.is_type<prior_box>())
+        return;
+    node.constant = true;
+    for (auto& dep : node.get_dependencies())
     {
-        if (!processing_order_is_correct(node))
+        if (!dep->constant)
         {
-            CLDNN_ERROR_MESSAGE(node->id(), "Incorrect processing order");
-            return;
+            node.constant = false;
+            break;
         }
     }
 }
 
-void program_impl::calc_prior_boxes()
+// mark if the node is in data flow assuming that all dependencies are marked properly
+void program_impl::mark_if_data_flow(program_node& node) 
 {
-    auto itr = processing_order.begin();
-    while (itr != processing_order.end())
+    if (node.is_type<mutable_data>() || node.is_type<input_layout>())
     {
-        auto& node = (*itr++);
-        if (!node->is_type<prior_box>())
-            continue;
-
-        auto& pb_node = node->as<prior_box>();
-
-        pb_node.calc_result();
-        remove_connection(pb_node.input(), pb_node);
-
-        auto& result = pb_node.get_result_buffer();
-        result.add_ref(); // need to inc ref count since we will be assigning this memory as cldnn_memory in next line that is not ref_count_obj
-        auto cpp_mem = details::memory_c_to_cpp_converter::convert(api_cast(&result));
-
-        auto& data_node = get_or_create(std::make_shared<data>("_cldnn_tmp_" + pb_node.id() + "_result", cpp_mem));
-        replace(pb_node, data_node, false, false);
+        node.data_flow = true;
     }
-}
-
-
-
-void program_impl::mark_constants()
-{
-    for (auto& node : processing_order)
+    else
     {
-        if (node->dependencies.empty())
-            continue;
-        if (node->is_type<prior_box>())
-            continue;
-
-        node->constant = true;
-        for (auto& dep : node->get_dependencies())
+        node.data_flow = false;
+        size_t inputs_count = node.get_dependencies().size();
+        if (node.is_type<detection_output>() || node.is_type<proposal>())
+            inputs_count = 2; //ignore third input as it is related to prior boxes (i.e. concat of prior-boxes)
+        for (size_t idx = 0; idx < inputs_count; idx++)
         {
-            if (!dep->constant)
+            if (node.get_dependency(idx).is_in_data_flow())
             {
-                node->constant = false;
-                break;
+                node.data_flow = true;
+                return;
             }
         }
-
-        if (!node->constant)
-            for (auto& dep : node->get_dependencies())
-                if (dep->constant)
-                    dep->constant_frontier = true;
     }
 }
-
-void program_impl::mark_data_flow()
+void program_impl::cleanup()
 {
-    std::list<program_node*> stack;
-    for (auto const& node : processing_order)
-    {
-        if ((node->is_endpoint() && !node->constant) || node->is_type<mutable_data>())
-        {
-            stack.push_back(node);
-            node->data_flow = true;
-            node->mark();
-        }
-    }
+    for (auto& node : processing_order)
+        if (!node->is_type<internal_primitive>())
+            node->get_output_layout();
 
-    while (!stack.empty())
+    //in debug build, at the end, mark all nodes as outputs so user can query for buffers of all not-optimized nodes, including internal ones etc.
+    if (is_debug_build())
     {
-        auto node = stack.front();
-        stack.pop_front();
-
-        size_t dep_idx = 0;
-        size_t inputs_count = (node->is_type<internal_primitive>() ? node->get_dependencies().size() : node->get_primitive()->input.size());
-        //TODO: remove this hack after addition of constants propagation pass
-        if (node->is_type<detection_output>() || node->is_type<proposal>())
-            inputs_count = 2; //ignore third input as it is related to prior boxes (i.e. concat of prior-boxes)
-
-        for (auto dep : node->get_dependencies())
+        for (auto& node : processing_order)
         {
-            bool data_flow = (dep_idx < inputs_count && !dep->constant);
-            ++dep_idx;
-            if (!data_flow)
-                continue;
-
-            dep->data_flow = data_flow;
-
-            if (dep->is_marked())
-                continue;
-
-            stack.push_back(dep);
-            dep->mark();
+            if (!node->is_output())
+            {
+                node->set_output(true);
+                outputs.push_back(node);
+            }
         }
     }
-
-    for (auto& node : processing_order)
-    {
-        assert(!node->constant || !node->data_flow); //node which is constant cannot be marked as data flow
-        node->unmark();
-    }
 }
 
-void program_impl::trim_to_outputs()
-{
-    size_t actual_nodes = processing_order.size();
-    if (!actual_nodes) //degenerated case but can happen
-        return;
-
-    if (outputs.size() == actual_nodes)
-        return;
-
-    //do backward bfs starting from all outputs
-    std::list<const std::vector<program_node*>*> stack = { &outputs };
-    while (!stack.empty())
+void program_impl::add_split_outputs() {
+    auto itr = nodes_map.begin();
+    while (itr != nodes_map.end())
     {
-        auto nodes_list = stack.front();
-        stack.pop_front();
+        auto node_itr = itr++;
+        auto& node = (*node_itr).second;
 
-        for (auto node : *nodes_list)
+        if (node->is_type<split>())
         {
-            if (!node->is_marked())
+            auto split_prim = node->as<split>().typed_desc();
+            primitive_id input_id = split_prim->input[0];
+            auto split_num = split_prim->output_offsets.size();
+
+            //create crop for each split ouptut provided
+            for (decltype(split_num) i = 0; i < split_num; i++)
             {
-                node->mark();
-                if (!node->get_dependencies().empty())
-                    stack.push_back(&node->get_dependencies());
+                primitive_id output_id = node->id() + ":" + split_prim->output_ids[i];
+
+                //create dummy crop primitive and add it to nodes map
+                auto crop_prim = std::make_shared<crop>(output_id, input_id, tensor{ 1,1,1,1 }, split_prim->output_offsets[i]);
+                get_or_create(crop_prim);
             }
         }
     }
+}
 
-    //all not-marked nodes should be removed
-    std::list<program_node*> to_rem;
-    for (auto node : processing_order)
-    {
-        if (node->is_type<input_layout>()) //input layout may become disconnected during prior boxes calculations so it may have not been marked at this place but we don't want to remove it
-            node->mark();
-        else if (!node->is_marked())
-            to_rem.push_back(node);
-    }
-
-    for (auto const& node : to_rem)
-    {
-        if (node->is_input())
-            inputs.remove(node);
-        else
-        {
-            for (auto dep : node->dependencies)
-                if (dep->is_marked())
-                    dep->users.remove(node);
-        }
-
-        for (auto user : node->users)
-            if (user->is_marked())
-                user->dependencies.erase(std::remove(user->dependencies.begin(), user->dependencies.end(), node), user->dependencies.end());
+program_impl::nodes_ordering& program_impl::get_processing_order()
+{
+    return processing_order;
+}
 
-        optimized_out.push_back(node->id());
-        nodes_map.erase(node->id());
-    }
+const program_impl::nodes_ordering& program_impl::get_processing_order() const
+{
+    return processing_order;
 }
 
 void add_memory_dependency(program_node* node, program_node* dep)
@@ -1273,36 +576,36 @@ void program_impl::basic_memory_dependencies()
     }
 }
 
+
 void program_impl::skipped_branch_memory_dependencies()
 {
-    auto itr = processing_order.begin();
-    // Primitive A can't use primitive B buffer if B->processing_num < A->processing_num and any of B users processing_num > A->processing_num
+    // Primitive A can't use primitive B buffer if processing_num(B) < processing_num(A) and for any usr - the user of B processing_num(usr) > processing_num(A)
     // Otherwise it could override data that has to be used in the future.
-    // TODO: improve algorithm to to O(n*log(n))
-    while (itr != processing_order.end())
+    auto itrB = processing_order.begin();
+    while (itrB != processing_order.end())
     {
-        auto& node = *itr;
-        itr++;
-        auto itr2 = processing_order.begin();
-        if (itr2 == itr)
+        auto& nodeB = *itrB;
+        auto itrA = ++itrB;
+        if (nodeB->get_users().size()==0)
             continue;
-        while (itr2 != processing_order.end())
+
+        // find the last user of B in processing order
+        auto itrUsr = nodeB->get_users().begin();
+        auto lastUsr = itrUsr++;
+        while (itrUsr != nodeB->get_users().end())
         {
-            auto& node2 = *itr2;
-            itr2++;
-            if (node2->get_processing_num() < node->get_processing_num())
-            {
-                // if at least one user will be processed after 'node', node2 has to be added to forbiden list
-                for (auto usr : node2->get_users())
-                {
-                    if (usr->get_processing_num() > node->get_processing_num())
-                    {
-                        add_memory_dependency(node, node2);
-                        add_memory_dependency(node2, node);
-                        break;
-                    }
-                }
-            }
+            if (processing_order.get_processing_number(*lastUsr) < processing_order.get_processing_number(*itrUsr))
+                lastUsr = itrUsr;
+            itrUsr++;
+        }
+
+        //mark all nodes in between B and lastUsr of B as forbidden to share buffer with B
+        while (itrA != processing_order.get_processing_iterator(**lastUsr))
+        {
+            auto& nodeA = *itrA;
+            itrA++;
+            add_memory_dependency(nodeA, nodeB);
+            add_memory_dependency(nodeB, nodeA);
         }
     }
 }
@@ -1314,7 +617,7 @@ void program_impl::oooq_memory_dependencies()
     // Set of nodes between two syncing points will be called sync_region.
     // Major rules is: can't share resource with nodes in my sync_region
 
-    uint32_t last_barrier = 0;
+    int32_t last_barrier = 0;
     bool needs_barrier = false;
     std::vector<cldnn::program_node*> sync_region;
     while (itr != processing_order.end())
@@ -1325,7 +628,7 @@ void program_impl::oooq_memory_dependencies()
         // if any of dep has proccess num after barrier -> needs barrier
         for (auto dep : node->get_dependencies())
         {
-            if (dep->get_processing_num() >= last_barrier)
+            if (processing_order.get_processing_number(dep) >= last_barrier)
             {
                 needs_barrier = true;
                 break;
@@ -1334,7 +637,7 @@ void program_impl::oooq_memory_dependencies()
 
         if (needs_barrier)
         {
-            last_barrier = node->get_processing_num();
+            last_barrier = processing_order.get_processing_number(node);
             needs_barrier = false;
             // add each pair bi-direction dependency
             for (auto nd1 = sync_region.begin(); nd1 + 1 != sync_region.end(); nd1++)
@@ -1392,452 +695,6 @@ std::string program_impl::get_memory_dependencies_string() const
     return mem_dep;
 }
 
-void program_impl::remove_redundant_reorders()
-{
-    auto itr = processing_order.begin(); //note we need to use iterators since currently processed element can be removed
-    while (itr != processing_order.end())
-    {
-        auto& node = (*itr++); //post-inc to avoid invalidation due to possible erase
-        if (!node->is_type<reorder>()) //only care for reorders
-            continue;
-
-        program_node* current_node = node;
-        std::vector<program_node*> r_nodes_to_remove;
-
-        auto optimize = true;
-        while (current_node)
-        {
-            auto& r_node = current_node->as<reorder>();
-            current_node = nullptr;
-
-            if (r_node.has_mean() || !r_node.get_primitive()->subtract_per_feature.empty() ||  //do not optimize if mean of subtract are present
-                (r_node.is_output() && r_node.get_dependency(0).is_output())) //do not optimize when both reorder and layer before are outputs
-            {
-                optimize = false;
-                break;
-            }
-
-            r_nodes_to_remove.push_back(&r_node);
-
-            if (r_node.get_dependency(0).is_type<reorder>() && r_node.get_dependencies().size() == 1 && r_node.get_users().size() == 1 && r_node.get_dependency(0).get_users().size() == 1)
-                current_node = &r_node.get_dependency(0);
-        }
-        if (!optimize)
-            continue;
-
-        assert(node->dependencies.size() == 1 && "reorder without mean should have exactly one dependecy (input)");
-        auto& r_output = r_nodes_to_remove.front();
-        auto& r_input = r_nodes_to_remove.back()->get_dependency(0);
-        auto o_layout = r_output->get_output_layout();
-        auto i_layout = r_input.get_output_layout();
-
-        auto ident = are_layouts_identical(o_layout, i_layout);
-        if (!ident.second)
-            continue;
-
-        for (auto remove_reorder_node : r_nodes_to_remove)
-        {
-            auto& r_node = remove_reorder_node->as<reorder>();
-
-            if (ident.first && ident.second && r_node.is_output() && r_node.get_dependency(0).is_input()) //do not optimize when reorder is output and layer before is input
-            {
-                optimize = false;
-                break;
-            }
-        }
-        if (!optimize)
-            continue;
-
-        for (auto remove_reorder_node : r_nodes_to_remove)
-        {
-            auto& r_node = remove_reorder_node->as<reorder>();
-
-            //mark as optimized
-            r_node.can_be_optimized(true);
-            r_node.requires_reinterpret(!ident.first);
-            if (ident.first) //no need of reshape
-                extract_and_remove(r_node); //try to remove if possible (with respect to r_node not being marked as output)
-        }
-    }
-}
-
-/*
-    recalculate processing_order
-    algorithm based on: CLRS 24.5 (critical path in DAG)
-    modifications: adjust for multiple inputs
-    input: any topological order in processing order
-    output: BFS topological order.
-*/
-
-void program_impl::calculate_BFS_processing_order() {
-    std::map<program_node*, int> distances;
-    for (auto itr : processing_order)
-    {
-        distances[itr] = -1;
-    }
-    int max_distance = 0;
-    for (auto itr : processing_order)
-    {
-        //Init
-        if (distances[itr] == -1) {     // this must be an input
-            distances[itr] = 0;         // initialize input
-        }
-        // RELAX
-        for (auto& user : itr->get_users())
-        {
-            distances[user] = std::max(distances[user], distances[itr] + 1);
-            max_distance = std::max(max_distance, distances[user]);
-        }
-    }
-
-    //bucket sort nodes based on their max distance from input
-    std::vector<std::vector<program_node*>> dist_lists;
-    dist_lists.resize(max_distance + 1);
-    for (auto itr : processing_order)
-    {
-        dist_lists[distances[itr]].push_back(itr);
-    }
-
-    //replace the old processing order by the new one, still topological.
-    processing_order.clear();
-    for (auto& dist : dist_lists)
-    {
-        for (auto& node : dist)
-        {
-            processing_order.push_back(node);
-            node->processing_itr = processing_order.end();
-            node->processing_itr--;
-        }
-    }
-    update_processing_numbers();
-    return;
-}
-
-void program_impl::reorder_inputs(layout_optimizer& lo)
-{
-    //first pass to set layout optimization_attributes for topology
-    for (auto& p : nodes_map)
-    {
-        auto& prim = *p.second;
-        if (prim.type() == cldnn::convolution::type_id())
-        {
-            if (prim.as<convolution>().get_primitive()->split() > 1)
-                lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::splitted_convolution, 1);
-        }
-
-        //list of layers that do not support yxfb or perform worse than bfyx
-        if (prim.type() == cldnn::detection_output::type_id() || prim.type() == cldnn::proposal::type_id() ||
-            prim.type() == cldnn::roi_pooling::type_id() || prim.type() == cldnn::deconvolution::type_id() ||
-            prim.type() == cldnn::upsampling::type_id() || prim.type() == cldnn::reorg_yolo::type_id())
-            lo.set_optimization_attribute(layout_optimizer::optimization_attributes_type::bfyx_only_layer, 1);
-    }
-
-    const auto reorder_input = [this, &lo](typed_program_node<convolution>& conv_node)
-    {
-        auto conv_prim = conv_node.get_primitive();
-        auto& input_node = conv_node.get_dependency(0);
-        auto&& weights_layout = conv_node.weights(0).get_output_layout();
-        auto&& input_layout = input_node.get_output_layout();
-
-        std::shared_ptr<reorder> new_input = nullptr;
-
-        if (input_node.type() == reorder::type_id()) //convolution's input is a reorder
-        {
-            auto reorder_prim = input_node.as<reorder>().typed_desc();
-            auto& reorder_input = input_node.get_dependency(0);
-            auto reorder_layout = input_node.get_output_layout();
-            reorder_layout.data_type = reorder_prim->output_data_type;
-            new_input = lo.get_reorder(
-                reorder_layout,
-                reorder_prim->id,
-                layout_optimizer::data_type::input,
-                conv_node,
-                weights_layout).first;
-
-            auto reorder_removed = false;
-            if (new_input && new_input->output_format != format::winograd_2x3_s1_data && new_input->output_format != format::bf8_xy16 && new_input->output_format != format::byxf) //output format is not optimal
-            {
-                auto reorder_input_layout = reorder_input.get_output_layout();
-
-                auto opt_layout = layout(new_input->output_data_type, new_input->output_format, reorder_input_layout.size);
-                if (reorder_input_layout == opt_layout) //reorder 'breaks' optimal format
-                {
-                    if (reorder_prim->subtract_per_feature.empty() &&
-                        reorder_prim->mean.empty() &&
-                        !reorder_prim->output_padding) //just plain reorder
-                    {
-                        conv_node.replace_dependency(0, reorder_input);
-                        if (input_node.get_users().size() == 0 && !input_node.is_output())
-                        {
-                            reorder_removed = extract_and_remove(input_node);
-                        }
-                        new_input = nullptr;
-                    }
-                    else //change reorder's output layout
-                    {
-                        reorder_prim->output_format = opt_layout.format;
-                        reorder_prim->output_data_type = opt_layout.data_type;
-                        new_input = nullptr;
-                    }
-                }
-                else //current reorder gives bad output, simply change it
-                {
-                    reorder_prim->output_format = opt_layout.format;
-                    reorder_prim->output_data_type = opt_layout.data_type;
-                    new_input = nullptr;
-                }
-            }
-
-            if (!reorder_removed)
-                input_node.recalc_output_layout();
-            else
-                conv_node.recalc_output_layout();
-        }
-        else
-        {
-            new_input = lo.get_reorder(
-                input_node.get_output_layout(),
-                input_node.id(),
-                layout_optimizer::data_type::input,
-                conv_node,
-                weights_layout).first;
-        }
-
-        if (new_input && new_input->output_format == format::winograd_2x3_s1_data)
-        {
-            auto lower_size = (conv_prim->input_offset.negate() + input_layout.size);
-
-            tensor upper_input_padding = tensor{ 0 };
-            upper_input_padding.spatial[0] = (2 - (lower_size.spatial[0] % 2)) % 2;          //winograd conv requires input's x to be in form 4 + 2n, with restriction that x >= 3, we can shortage it to x % 2 == 0
-            upper_input_padding.spatial[1] = (8 - ((lower_size.spatial[1] - 2) % 8)) % 8;    //for y, y - 2 % 8 == 0 must hold
-
-            apply_needed_padding(conv_node, input_node, padding{ conv_prim->input_offset.negate().sizes(), upper_input_padding.sizes() });
-
-            auto winograd_output = std::make_shared<reorder>("_winograd_" + conv_node.id(), conv_node.id(), input_layout.format,
-                input_layout.data_type, std::vector<float>{}, cldnn_reorder_mean_mode::mean_subtract, conv_node.output_layout.data_padding);
-            conv_node.output_layout.data_padding = padding{};
-            auto& back_node = get_or_create(winograd_output);
-            back_node.processing_itr = processing_order.insert(std::next(conv_node.processing_itr), &back_node);
-
-            auto bias_term = conv_node.bias_term();
-            //create additional eltwise node after reorder to compute bias
-            if (bias_term)
-            {
-                auto& bias_node = conv_node.get_dependency(2);
-                std::vector<primitive_id> inputs = { back_node.id(), bias_node.id() };
-                auto winograd_output_biases = std::make_shared<eltwise>(back_node.id() + "_bias", inputs,
-                    cldnn::eltwise_mode::sum, conv_prim->with_activation, conv_prim->activation_negative_slope,
-                    back_node.output_layout.data_padding);
-                back_node.output_layout.data_padding = padding{};
-                auto& back_bias_node = get_or_create(winograd_output_biases);
-                back_bias_node.processing_itr = processing_order.insert(std::next(back_node.processing_itr), &back_bias_node);
-                replace_all_usages(back_node, back_bias_node);
-                add_connection(back_node, back_bias_node);
-                add_connection(bias_node, back_bias_node);
-                conv_node.invalidate_users();
-                replace_all_usages(conv_node, back_bias_node);
-            }
-
-            if (conv_prim->with_activation)
-            {
-                conv_node.typed_desc()->with_activation = false;
-                if (!bias_term)
-                    back_node.set_fused_activation(activation_relu_negative_slope, cldnn_activation_additional_params_t{ conv_prim->activation_negative_slope, 0.0f });
-            }
-
-            if (!bias_term)
-            {
-                conv_node.invalidate_users();
-                replace_all_usages(conv_node, back_node);
-            }
-            add_connection(conv_node, back_node);
-
-            auto& r_node = get_or_create(new_input);
-            r_node.as<reorder>().set_input_offset(conv_prim->input_offset);
-
-            if (!bias_term)
-            {
-                swap_names(conv_node, back_node);
-                if (conv_node.is_output())
-                {
-                    conv_node.set_output(false);
-                    back_node.set_output(true);
-                    for (auto& output : outputs)
-                    {
-                        if (output == &conv_node)
-                        {
-                            output = &back_node;
-                            break;
-                        }
-                    }
-                }
-            }
-            else
-            {
-                conv_node.remove_dependency(2);
-                auto& back_bias_node = *nodes_map.find(back_node.id() + "_bias")->second;
-                swap_names(conv_node, back_bias_node);
-                if (conv_node.is_output())
-                {
-                    conv_node.set_output(false);
-                    back_bias_node.set_output(true);
-                    for (auto& output : outputs)
-                    {
-                        if (output == &conv_node)
-                        {
-                            output = &back_bias_node;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-
-        if (new_input && (new_input->output_format == format::bf8_xy16 || new_input->output_format == format::byxf))
-        {
-            auto conv1x1_output = std::make_shared<reorder>("_conv1x1_reorder_back_" + conv_node.id(), conv_node.id(), input_layout.format, input_layout.data_type);
-            auto& back_node = get_or_create(conv1x1_output);
-            back_node.processing_itr = processing_order.insert(std::next(conv_node.processing_itr), &back_node);
-
-            conv_node.invalidate_users();
-            replace_all_usages(conv_node, back_node);
-            add_connection(conv_node, back_node);
-        }
-
-        if (new_input)
-        {
-            auto& r_node = get_or_create(new_input);
-            add_intermediate(r_node, conv_node, 0, r_node.dependencies.empty());
-            conv_node.recalc_output_layout();
-        }
-    };
-
-    const auto reorder_input_detection_output = [this, &lo](typed_program_node<detection_output>& detection_output_node)
-    {
-        auto detection_output_prim = detection_output_node.get_primitive();
-
-        for (size_t i = 0; i < detection_output_node.get_dependencies().size(); i++)
-        {
-            auto& input = detection_output_node.get_dependency(i);
-            std::shared_ptr<reorder> new_input = lo.get_reorder(
-                input.get_output_layout(),
-                input.id(),
-                layout_optimizer::data_type::input,
-                detection_output_node,
-                layout{ data_types::f32, format::bfyx, tensor{} }).first;
-
-            if (new_input)
-            {
-                add_intermediate(new_input, detection_output_node, i);
-            }
-        }
-    };
-
-    for (auto& prim : processing_order)
-    {
-        //there's an assumption that only convolution will take data/input_layout as input
-        //exception to that rule would be a convolution which takes a reorder as input - see reoder_input above
-        do_for_types<convolution, detection_output>(*prim,
-            reorder_input,                  //case for convolution
-            reorder_input_detection_output  //case for detection-output
-            );
-    }
-}
-
-//function which prepares given primitive for weights optimization
-template <typename T>
-void program_impl::optimize_bias(T& node, layout_optimizer& lo)
-{
-    layout output_layout = node.get_output_layout();
-
-    size_t weights_offset = node.get_primitive()->input.size();
-    size_t bias_offset = weights_offset + wrap_if_single(node.get_primitive()->weights).size();
-    for (size_t i = bias_offset; i < node.get_dependencies().size(); ++i)
-    {
-        //find weights primitive with given pimitive_id and add it to weights_optimizer
-        const program_node& bias = node.get_dependency(i);
-        const auto bias_type = layout_optimizer::data_type::bias;
-        auto reorder = lo.get_reorder(
-                bias.get_output_layout(),
-                bias.id(),
-                bias_type,
-                node,
-                output_layout);
-
-        if (reorder.first)
-            this->add_intermediate(reorder.first, node, i, !reorder.second);
-    }
-}
-template void program_impl::optimize_bias<convolution_node>(convolution_node& node, layout_optimizer& lo);
-template void program_impl::optimize_bias<deconvolution_node>(deconvolution_node& node, layout_optimizer& lo);
-template void program_impl::optimize_bias<fully_connected_node>(fully_connected_node& node, layout_optimizer& lo);
-template void program_impl::optimize_bias<embed_node>(embed_node& node, layout_optimizer& lo);
-
-void program_impl::pre_optimize_bias(layout_optimizer& lo)
-{
-    for (auto& p : nodes_map)
-    {
-        auto& prim = *p.second;
-        if (prim.type() == convolution::type_id())
-        {
-            if (!prim.as<convolution>().weights_quantization_term())
-                optimize_bias(prim.as<convolution>(), lo);
-        }
-        else if (prim.type() == deconvolution::type_id())
-        {
-            optimize_bias(prim.as<deconvolution>(), lo);
-        }
-        else if (prim.type() == fully_connected::type_id())
-        {
-            if (!prim.as<fully_connected>().weights_quantization_term())
-                optimize_bias(prim.as<fully_connected>(), lo);
-        }
-        else if (prim.type() == embed::type_id())
-        {
-            optimize_bias(prim.as<embed>(), lo);
-        }
-    }
-}
-
-template <typename T>
-void program_impl::optimize_depthwise_sep_pre(T& node)
-{
-    //enable optimization only when IFM / split <= 8 (otherwise scheduling multiple opt kernels is better) and split >= 16
-    if (!(node.get_dependency(0).get_output_layout().size.feature[0] / node.get_primitive()->split() <= 8) ||
-        !(node.get_primitive()->split() >= 16))
-        return;
-
-    //make sure the weights and biases are data type and
-    //are not reused in other primitives as they will be overriden with concatenated ones
-    for (size_t i = 1; i < node.get_dependencies().size(); i++)
-    {
-        auto& weights_or_biases = node.get_dependency(i);
-        if (weights_or_biases.get_users().size() > 1 || weights_or_biases.type() != data::type_id())
-            return;
-    }
-
-    node.set_depthwise_sep_opt(true);
-}
-template void program_impl::optimize_depthwise_sep_pre<convolution_node>(convolution_node& node);
-template void program_impl::optimize_depthwise_sep_pre<deconvolution_node>(deconvolution_node& node);
-
-void program_impl::prepare_depthwise_sep_opt()
-{
-    //depthiwise separated convolution/deconvolution optimization
-    for (auto& p : nodes_map)
-    {
-        auto& prim = *p.second;
-        if (prim.type() == convolution::type_id())
-        {
-            optimize_depthwise_sep_pre(prim.as<convolution>());
-        }
-        else if (prim.type() == deconvolution::type_id())
-        {
-            optimize_depthwise_sep_pre(prim.as<deconvolution>());
-        }
-    }
-}
-
 void program_impl::handle_reshape()
 {
     //reshape primitive by definition does not change underlying data, only shape description
@@ -1857,6 +714,10 @@ void program_impl::handle_reshape()
             if (input_node.is_type<reorder>())
                 continue;
 
+            node->get_output_layout();
+            if (node->as<reshape>().is_in_place())
+                node->optimized = true;
+
             //vector for storing nodes that are reorder type, for which splitted primitives are needed (except for the first one where orginal reshape will be used)
             std::vector<program_node*> reorder_node_to_split;
 
@@ -1896,7 +757,7 @@ void program_impl::handle_reshape()
                         auto& new_reshape_node = get_or_create(new_reshape);
                         add_connection(input_node, new_reshape_node);
                         user->replace_dependency(0, new_reshape_node);
-                        new_reshape_node.processing_itr = processing_order.insert(std::next(input_node.processing_itr), &new_reshape_node);
+                        processing_order.insert_next(&input_node, &new_reshape_node);
                         reorder_reshape_nodes.push_back(&new_reshape_node);
                     }
                 }
@@ -1905,9 +766,18 @@ void program_impl::handle_reshape()
                 auto reshape_reorder_id = 0;
                 for (const auto& reorder_node : reorder_node_to_split)
                 {
+                    /*
+                    auto new_reshape = std::make_shared<reshape>("_reshape_split_" + user->id() + "_" + node->id(), input_node.id(), output_shape);
+                    auto& new_reshape_node = get_or_create(new_reshape);
+                    add_connection(input_node, new_reshape_node);
+                    user->replace_dependency(0, new_reshape_node);
+                    processing_order.insert(std::next(processing_order.get_processing_iterator(input_node)), &new_reshape_node);
+                    reorder_reshape_nodes.push_back(&new_reshape_node);
+                    */
                     auto& reorder_reshape_node = reorder_reshape_nodes[reshape_reorder_id];
                     auto reshape_in_layout = reorder_node->get_output_layout();
-                    auto reshape_input = std::make_shared<reorder>("_reshape_input_" + reorder_node->id() + "_" + reorder_reshape_node->id(), input_node.id(), reshape_in_layout.format, reshape_in_layout.data_type);
+                    auto reshape_input = std::make_shared<reorder>("_reshape_input_" + reorder_node->id() + "_" + reorder_reshape_node->id(), input_node.id(),
+                        reshape_in_layout.format, reshape_in_layout.data_type);
                     auto& reshape_input_node = get_or_create(reshape_input);
                     add_intermediate(reshape_input_node, *reorder_reshape_node, 0, reshape_input_node.dependencies.empty());
                     reshape_reorder_id++;
@@ -1920,7 +790,7 @@ void program_impl::handle_reshape()
                 auto bfyx_layout = layout({ reshape_layout.data_type, cldnn::format::bfyx, reshape_layout.size });
                 //when some primitive does an implicit reorder to some other format then we lose the info about pitches in reshape stage
                 //we assume user provides the input vector in bfyx
-                if (!are_layouts_identical(reshape_layout, bfyx_layout).second)
+                if (!program_helpers::are_layouts_identical(reshape_layout, bfyx_layout).second)
                 {
                     auto reshape_input = std::make_shared<reorder>("_reshape_input_" + node->id(), input_node.id(), cldnn::format::bfyx, reshape_layout.data_type);
                     auto& reshape_input_node = get_or_create(reshape_input);
@@ -1948,141 +818,6 @@ void program_impl::handle_reshape()
     }
 }
 
-//function which prepares given primitive for weights optimization
-template <typename T>
-void program_impl::optimize_weights(T& node, layout_optimizer& lo)
-{
-    auto weights_offset = node.get_primitive()->input.size();
-    auto bias_offset = weights_offset + wrap_if_single(node.get_primitive()->weights).size();
-    for (auto i = weights_offset; i < bias_offset; i++)
-    {
-        auto& weights = node.get_dependency(i);
-        auto* impl = node.get_selected_impl().get();
-        auto output_layout = node.get_output_layout();
-        auto& weights_node = node.get_dependency(1);
-        auto weights_layout = weights_node.get_output_layout();
-        const auto weights_type = layout_optimizer::data_type::weights;
-
-        auto reorders = lo.get_generic_layer(
-                impl->_weights_reorder_params,
-                weights.id(),
-                weights_layout,
-                weights_type);
-
-        for (auto& reorder : reorders)
-        {
-            //insert new generic_layer node to topology
-            this->add_intermediate(reorder.first, node, i, !reorder.second);
-            //set generic_layer's node output layout and implementation
-            auto& g_node = node.get_dependency(i);
-            g_node.get_output_layout(false);
-            g_node.selected_impl = g_node.type()->choose_impl(*engine, g_node);
-        }
-        //set the old output layout and do not invalidate users as change of weights will not affect output layout
-        node.set_output_layout(output_layout, false);
-    }
-}
-template void program_impl::optimize_weights<convolution_node>(convolution_node& node, layout_optimizer& lo);
-template void program_impl::optimize_weights<deconvolution_node>(deconvolution_node& node, layout_optimizer& lo);
-template void program_impl::optimize_weights<fully_connected_node>(fully_connected_node& node, layout_optimizer& lo);
-
-void program_impl::post_optimize_weights(layout_optimizer& lo)
-{
-    for (auto& p : nodes_map)
-    {
-        auto& prim = *p.second;
-        if (prim.type() == convolution::type_id())
-        {
-            optimize_weights(prim.as<convolution>(), lo);
-        }
-        else if (prim.type() == deconvolution::type_id())
-        {
-            optimize_weights(prim.as<deconvolution>(), lo);
-        }
-        else if (prim.type() == fully_connected::type_id())
-        {
-            optimize_weights(prim.as<fully_connected>(), lo);
-        }
-        //else if (prim.type() == lstm_gemm::type_id())//TODO: Enable postoptimize weights for lstm
-        //{
-        //    prep_opt(prim.as<lstm_gemm>()); //we should take care of weights and reccurent
-        //}
-    }
-}
-
-template <typename T>
-void program_impl::optimize_depthwise_sep_post(T& node)
-{
-    if (!node.get_depthwise_sep_opt())
-        return;
-
-    const auto& split = node.get_primitive()->split();
-
-    auto dependency_offset = node.get_primitive()->input.size();
-    //concatenate weights
-    {
-        //if weights were optimized it is needed to use the sizes after optimization
-        auto target_layout = get_weights_layout(node.get_dependency(dependency_offset), split);
-        merge_buffers(engine, node, target_layout, dependency_offset, dependency_offset + split);
-        dependency_offset++;
-    }
-
-    //concatenate biases
-    if (node.get_primitive()->bias.size() != 0)
-    {
-        const auto& bias_layout = node.get_dependency(dependency_offset).get_output_layout();
-        auto target_layout = layout(bias_layout.data_type, cldnn::format::bfyx, { 1, 1, bias_layout.size.spatial[0] * split, 1 });
-        merge_buffers(engine, node, target_layout, dependency_offset, dependency_offset + split);
-        dependency_offset++;
-    }
-
-    if (node.template is_type<convolution>())
-    {
-        auto& prim_node = node.template as<convolution>();
-        const auto& prim = prim_node.get_primitive();
-
-        // concatenate weights quantization factors
-        if (prim->weights_quantization_factors.size() != 0)
-        {
-            const auto& weights_quantization_layout = node.get_dependency(dependency_offset).get_output_layout();
-            auto target_layout = layout(weights_quantization_layout.data_type, cldnn::format::bfyx, { 1, 1, weights_quantization_layout.size.batch[0] * split, 1 });
-            merge_buffers(engine, node, target_layout, dependency_offset, dependency_offset + split);
-            dependency_offset++;
-        }
-        // concatenate output callibration factors
-        if (prim->output_calibration_factors.size() != 0)
-        {
-            const auto& output_callibration_layout = node.get_dependency(dependency_offset).get_output_layout();
-            auto target_layout = layout(output_callibration_layout.data_type, cldnn::format::bfyx, { 1, 1, output_callibration_layout.size.batch[0] * split, 1 });
-            merge_buffers(engine, node, target_layout, dependency_offset, dependency_offset + split);
-            dependency_offset++;
-        }
-    }
-
-    if (node.get_primitive())
-        //override node split, as only one kernel will be executed
-        node.set_split(1);
-}
-template void program_impl::optimize_depthwise_sep_post<convolution_node>(convolution_node& node);
-template void program_impl::optimize_depthwise_sep_post<deconvolution_node>(deconvolution_node& node);
-
-void program_impl::prep_opt_depthwise_sep_post()
-{
-    //depthiwise separated convolution/deconvolution optimization
-    for (auto& p : nodes_map)
-    {
-        auto& prim = *p.second;
-        if (prim.type() == convolution::type_id())
-        {
-            optimize_depthwise_sep_post(prim.as<convolution>());
-        }
-        else if (prim.type() == deconvolution::type_id())
-        {
-            optimize_depthwise_sep_post(prim.as<deconvolution>());
-        }
-    }
-}
-
 void program_impl::apply_needed_padding(program_node& node, program_node& prev_node,
     const padding& needed_padding)
 {
@@ -2105,632 +840,96 @@ void program_impl::apply_needed_padding(program_node& node, program_node& prev_n
     prev_node.merge_output_padding(needed_padding);
 }
 
-void program_impl::prepare_padding()
+void program_impl::reverse_connection(program_node& dep_node, program_node& user_node)
 {
-    if (output_size_handling_enabled)
+    if (std::find(dep_node.users.begin(), dep_node.users.end(), &user_node) != dep_node.users.end())
     {
-        // Prepare upper padding for primitives that support output_size parameter.
-        for (const auto& node : processing_order)
-        {
-            if (node->is_type<convolution>())
-            {
-                auto& prim_node = node->as<convolution>();
-                const auto& prim = prim_node.get_primitive();
-
-                if (!prim->with_output_size)
-                    continue;
-
-                auto filter_size = prim_node.weights(0).get_output_layout().size;
-
-                auto needed_padding = calc_sliding_window_needed_input_padding(
-                    prim_node.input().get_output_layout(),
-                    prim->output_size, filter_size, prim->input_offset, prim->stride, prim->dilation, false, 1);
-                apply_needed_padding(prim_node, prim_node.input(), needed_padding);
-            }
-            else if (node->is_type<deconvolution>())
-            {
-                auto& prim_node = node->as<deconvolution>();
-                const auto& prim = prim_node.get_primitive();
-
-                if (!prim->with_output_size)
-                    continue;
-
-                auto filter_size = prim_node.weights(0).get_output_layout().size;
-
-                auto needed_padding = calc_sliding_window_needed_input_padding(
-                    prim_node.input().get_output_layout(),
-                    prim->output_size, filter_size, prim->input_offset, prim->stride, { 1, 1, 1, 1 }, true, 1);
-
-                apply_needed_padding(prim_node, prim_node.input(), needed_padding);
-            }
-            else if (node->is_type<pooling>())
-            {
-                auto& prim_node = node->as<pooling>();
-                const auto& prim = prim_node.get_primitive();
-
-                if (!prim->with_output_size)
-                    continue;
-
-                // NOTE: Currently there is no pooling implementation/pooling mode which does not check input data range.
-                // There is no need to add padding requirements on pooling inputs.
-                //auto needed_padding = calc_sliding_window_needed_input_padding(
-                //    prim_node.input().get_output_layout(),
-                //    prim->output_size, prim->size, prim->input_offset, prim->stride, {1, 1, 1, 1}, false, 1);
-                auto needed_padding = prim_node.input().get_output_layout().data_padding;
-
-                apply_needed_padding(prim_node, prim_node.input(), needed_padding);
-            }
-        }
-    }
-
-    // Prepare optimized padding for bfyx convolution.
-    for (auto& pair : nodes_map)
-    {
-        if (pair.second->type() != convolution::type_id())
-            continue;
-
-        auto& node = pair.second->as<convolution>();
-        if (node.get_dependencies().empty())
-            continue;
-
-        auto conv = node.get_primitive();
-        auto& conv_input_node = node.get_dependency(0);
-        auto conv_layout = node.get_output_layout();
-
-        // right now output padding optimization is only available for bfyx format and data type = float32
-        if (conv_layout.format != cldnn::format::bfyx
-            && conv_layout.format != cldnn::format::bf8_xy16
-            && conv_layout.format != cldnn::format::byxf_af32
-            && conv_layout.format != cldnn::format::fs_bs_yx_bsv4_fsv32)
-        {
-            continue;
-        }
-
-        // Calculating input padding needed for convolution
-        auto& filter_node = node.as<convolution>().weights(0);
-        auto filter_prim = filter_node.get_primitive();
-
-        layout filter_layout = filter_node.get_output_layout();
-
-        // convolution have only one input primitive
-        auto prev_prim_output_layout = conv_input_node.get_output_layout();
-
-        // Compute initial required paddings for primitive used as input for convolution.
-        auto input_offset = conv->input_offset;
-        auto stride = conv->stride;
-        auto dilation = conv->dilation;
-
-        auto input_limit_x = input_offset.spatial[0] + (conv_layout.size.spatial[0] - 1) * stride.spatial[0] + (filter_layout.size.spatial[0] - 1) * dilation.spatial[0] + 1;
-        auto input_limit_y = input_offset.spatial[1] + (conv_layout.size.spatial[1] - 1) * stride.spatial[1] + (filter_layout.size.spatial[1] - 1) * dilation.spatial[1] + 1;
-
-        auto left_padding = std::max(-input_offset.spatial[0], 0);
-        auto top_padding = std::max(-input_offset.spatial[1], 0);
-        auto right_padding = std::max(input_limit_x - prev_prim_output_layout.size.spatial[0], 0);
-        auto bottom_padding = std::max(input_limit_y - prev_prim_output_layout.size.spatial[1], 0);
-
-        // Adjust right padding, so entire buffer size in X dimension is properly aligned.
-        // TODO: NOTE: Will be reenabled with next check-in once heuristic for line-aligned algorithm will be added.
-        //auto needed_buffer_size_x = static_cast<cldnn::tensor::value_type>(
-        //    round_up_to(left_padding + prev_prim_output_layout.size.spatial[0] + right_padding, 16));
-        //right_padding = needed_buffer_size_x - left_padding - prev_prim_output_layout.size.spatial[0];
-
-        cldnn::padding needed_padding({ 0, 0, left_padding, top_padding }, { 0, 0, right_padding, bottom_padding }, 0);
-        needed_padding = padding::max(prev_prim_output_layout.data_padding, needed_padding);
-
-        apply_needed_padding(node, conv_input_node, needed_padding);
+        remove_connection(dep_node, user_node);
+        add_connection(user_node, dep_node);
     }
+    else
+        throw std::runtime_error("Trying to reverse connection, but nodes are wrongly or not connected.");
 }
 
-void program_impl::propagate_constants()
+program_node& program_impl::get_or_create(std::shared_ptr<primitive> prim)
 {
-    constants_propagator prop(this);
+    auto itr = nodes_map.lower_bound(prim->id);
+    if (itr != nodes_map.end() && itr->first == prim->id)
+        return *itr->second;
 
-    for (auto& node : processing_order)
-        prop.visit_node(*node);
+    auto new_node = prim->type->create_node(*this, prim);
+    nodes_map.insert(itr, { prim->id, new_node });
+    return *new_node;
+}
 
-    auto&& to_replace = prop.calculate();
+void program_impl::add_intermediate(program_node& node, program_node& next, size_t prev_idx,
+    bool connect_int_node_with_old_dep, bool move_usrs_of_prev_to_node)
+{
+    if (connect_int_node_with_old_dep && !node.dependencies.empty())
+        throw std::invalid_argument("Node which is about to be added in between two other nodes should not have any existing dependencies");
 
-    //remove all nodes which are no longer relevant, i.e. nodes which:
-    // 1. are constants, and
-    // 2. do not have non-const user (so their data are not used during inference), and
-    // 3. are not marked as outputs.
-    // in case if node has either non-const user or is marked as output, it should be replace with cldnn::data rather than removed (see next loop)
-    auto proc_itr = processing_order.begin();
-    while (proc_itr != processing_order.end())
+    auto& prev = next.get_dependency(prev_idx);
+    //firstly add connection, later replace dependency, so 'prev' won't become dangling and therefore removed
+    if (connect_int_node_with_old_dep)
     {
-        auto& node = (*proc_itr++);
-        if (!node->is_constant())
-            continue;
-        if (node->has_non_const_user() || (node->is_output() && !node->is_type<data>()))
-            continue;
-
-        auto& users = node->users;
-        auto& deps = node->dependencies;
-
-        for (size_t idx = 0; idx < deps.size(); idx++)
-        {
-            deps.at(idx)->users.remove(node);
-        }
-        deps.clear();
-
-        for (auto& usr : users) {
-            auto& usr_deps = usr->dependencies;
-            usr_deps.erase(std::remove(usr_deps.begin(), usr_deps.end(), node), usr_deps.end());
-        }
-        users.clear();
-
-        if (!node->is_output())
+        add_connection(prev, node);
+        if (processing_order.size() != 0)
         {
-            auto rem = remove_if_dangling(*node);
-            assert(rem && "Non-output constant node which has only constant users should have been removed during constants propagation pass");
-            (void)rem;
+            processing_order.insert_next(&prev, &node);
         }
     }
 
-    //replace all constant nodes which are relevant for inference (either used by non-const user or marked as output) with recomputed cldnn::data
-    for (auto& cout : to_replace)
-    {
-        auto& id_to_replace = cout.first;
-
-        //TODO: do not use API primitives internally and get rid of this last 'cldnn::memory' internal usage
-        memory api_memory = details::memory_c_to_cpp_converter::convert(api_cast(cout.second.get()));
-        //c-cpp converter does not retain since normally it is done inside API-impl layer (cldnn.cpp) so we need to do it manually
-        cout.second->add_ref();
-
-        auto const_data = std::make_shared<data>("_cldnn_const_prop_" + id_to_replace, api_memory /* <<< REMOVE ME WHEN POSSIBLE */);
-        auto& new_node = get_or_create(const_data);
-        auto& curr_node = *nodes_map.at(id_to_replace);
-
-        if (!curr_node.is_type<generic_layer>())
+    if (move_usrs_of_prev_to_node) {
+        auto itr = prev.get_users().begin();
+        while(itr!= prev.get_users().end())
         {
-            auto curr_node_deps = curr_node.get_dependencies();
-            for (auto& dep : curr_node_deps)
-            {
-                auto dep_users = dep->get_users();
-                for (auto& dep_user : dep_users)
-                {
-                    if (dep_user == &curr_node)
-                        remove_connection(*dep, curr_node);
-                }
-            }
+            auto usr = *itr;
+            itr++;
+            if (usr->id() != node.id())
+                usr->replace_dependency(prev, node);
         }
-
-        curr_node.dependencies.clear();
-        //remove all constant users (as they will be either removed or replaced by cldnn::data which does not have any dependencies)
-        curr_node.users.erase(
-            std::remove_if(curr_node.users.begin(), curr_node.users.end(), [](program_node* node) { return node->is_constant(); }),
-            curr_node.users.end()
-        );
-        replace(curr_node, new_node, false, false);
+        mark_if_constant(prev);
+        mark_if_constant(node);
+        mark_if_data_flow(prev);
+        mark_if_data_flow(node);
     }
-}
-
-void program_impl::prepare_buffer_fusing()
-{
-    bool is_debug = options.get<build_option_type::debug>()->enabled();
-    auto itr = processing_order.begin();
-    while (itr != processing_order.end())
-    {
-        auto& node = (*itr++);
-
-        // TODO: Move fused activation to previous layer when possible
-        if (node->fused_activation.activation_func != cldnn_activation_func_t::activation_none)
-            continue;
-
-        do_for_types<concatenation>(*node, [this, is_debug](concatenation_node& node)
-        {
-            // buffer fusing should not be performed if one of inputs produces padded output since
-            // it could break desired memory alignment. On the other hand, if this node uses all inputs
-            // exclusively (see check above) they should not have output padding set since concatenation
-            // does not ask for any.
-            if (node.has_padded_dependency())
-                return;
-
-            auto concat_axis = node.get_primitive()->axis;
-            auto padd = node.get_output_layout().data_padding;
-
-            tensor lower_padd = padd.lower_size();
-            tensor upper_padd = padd.upper_size();
-
-            auto upper_padd_val = node.get_output_layout().get_buffer_size().raw[concat_axis] - lower_padd.raw[concat_axis];
-            tensor lower_padd_offset = lower_padd;
-
-            std::list<std::pair<const std::vector<program_node*>, tensor>> stack = { std::make_pair(node.get_dependencies(), tensor{ 0, 0, 0, 0 }) };
-            while (!stack.empty())
-            {
-                auto nodes_list = stack.front();
-                stack.pop_front();
-
-                auto cascade_adjustment = nodes_list.second;
-                upper_padd.raw[concat_axis] = upper_padd_val;
-                lower_padd = lower_padd_offset;
-
-                //check if concatenation in place can be applied for inputs set
-                for (auto input : nodes_list.first)
-                {
-                    //if any of this node's inputs is used by more than one primitive and is not optimized concatenation then do not fuse buffers,
-                    //also, if an input is marked as network output, prevent optimizations which would affect a form of its output (unless debug flag is set)
-                    // todo: in future, if this case is problem, it can be optimized further to enable buffer fusing
-                    //       per single input rather than all/none
-                    // + restrict input types to pooling, convolution and activation only due to problems with output padding on b and f
-                    if ((!input->is_type<pooling>() && !input->is_type<convolution>() && !input->is_type<activation>() && !input->is_type<concatenation>() && !input->is_type<crop>() && !input->is_type<scale>()) ||
-                        (input->is_output() && !is_debug) ||
-                        input->get_users().size() > 2)
-                        return;
-
-                    if (input->get_users().size() > 1)
-                    {
-                        auto user_count = input->get_users().size();
-                        for (auto& user : input->get_users())
-                            if (user->is_type<concatenation>())
-                                user_count--;
-                        if (user_count > 1)
-                            return;
-                    }
-
-                    //check only for spatial paddings. Accept feature and batch
-                    if (input->get_output_layout().data_padding.lower_size().spatial[0] != 0 ||
-                        input->get_output_layout().data_padding.upper_size().spatial[0] != 0 ||
-                        input->get_output_layout().data_padding.lower_size().spatial[1] != 0 ||
-                        input->get_output_layout().data_padding.upper_size().spatial[1] != 0)
-                        return;
-                }
-
-                //apply concatenation in place optimization
-                for (auto input : nodes_list.first)
-                {
-                    auto input_lenght = input->get_output_layout().size.raw[concat_axis];
-
-                    // shrink upper pad so it points at the end of the input's buffer
-                    //
-                    //   |--- lower padd ---|                    |---------- upper padd -----------|
-                    //   |-- output padd ---| ----- input1 ------|----- input2 -----|-- out padd --|
-                    upper_padd.raw[concat_axis] -= input_lenght;
-
-                    //adjust padding sizes for cascade concatenations
-                    auto lower_padd_tmp = lower_padd;
-                    lower_padd_tmp.raw[concat_axis] += cascade_adjustment.raw[concat_axis];
-                    auto upper_padd_tmp = upper_padd;
-                    upper_padd_tmp.raw[concat_axis] -= cascade_adjustment.raw[concat_axis];
-
-                    // set new padding for input
-                    input->set_output_padding(padding(lower_padd_tmp.sizes(), upper_padd_tmp.sizes()));
-
-                    // move lower padd further
-                    //
-                    //   |-------------- lower padd -------------|---------- upper padd -----------|
-                    //   |-- output padd ---| ----- input1 ------|----- input2 -----|-- out padd --|
-
-                    lower_padd.raw[concat_axis] += input_lenght;
-
-                    if (input->type() == concatenation::type_id() && input->can_be_optimized())
-                    {
-                        if (input->as<concatenation>().get_primitive()->axis != node.get_primitive()->axis)
-                            return;
-
-                        if (!input->get_dependencies().empty())
-                            stack.push_back(std::make_pair(input->get_dependencies(), input->get_output_layout().data_padding.lower_size()));
-                    }
-                }
-            }
-
-            node.can_be_optimized(true);
-        });
-
-        // zero copy
-        do_for_types<crop>(*node, [this, is_debug](crop_node& node)
-        {
-            //if the node is marked as network output, prevent optimizations which would affect a form of its output, unless debug flag is set
-            if (node.is_output() && !is_debug)
-                return;
-
-            //do not optimize when next node is concatenation which is not output
-            if (node.get_users().size() == 1 && node.get_users().front()->is_type<concatenation>() && !node.get_users().front()->is_output())
-                return;
-
-            if (node.get_dependencies().size() == 1 &&
-                node.get_users().size() > 0)
-            {
-                // optimization is avaiable for croping across depth(features) only
-                // if output padding has defined padding accross featuers already it wouldn't
-                // work because it expect to have zeros in the padded area.
-                auto format = node.get_output_layout().format;
-                auto crop_prim = node.get_primitive();
-                auto input_layout = node.get_dependency(0).get_output_layout();
-                auto out_padd = node.get_output_layout().data_padding;
-                if (format == format::bfyx &&
-                    crop_prim->reference_input.batch[0] == input_layout.size.batch[0] &&
-                    crop_prim->reference_input.spatial[0] == input_layout.size.spatial[0] &&
-                    crop_prim->reference_input.spatial[1] == input_layout.size.spatial[1] &&
-                    out_padd.lower_size().feature[0] == 0 &&
-                    out_padd.upper_size().feature[0] == 0 &&
-                    out_padd.lower_size().batch[0] == 0 &&
-                    out_padd.upper_size().batch[0] == 0 &&
-                    out_padd.lower_size().spatial[0] == 0 &&
-                    out_padd.lower_size().spatial[1] == 0 &&
-                    out_padd.upper_size().spatial[0] == 0 &&
-                    out_padd.upper_size().spatial[1] == 0)
-                {
-                    //  Regular crop
-                    //  crop input buffer
-                    //  |___________data____________|
-                    //
-                    //  crop output buffer
-                    //  |-------->| offsets[f]  |<--|
-                    //            |_____data____|
-                    //             <------------>
-                    //           reference size
-                    //
-                    //  Inplace crop
-                    //  crop output buffer
-                    //  |_low_pad_|__data_size__|___|<-upper pad
-
-                    node.set_output_padding(padding(
-                    { out_padd.lower_size().batch[0], crop_prim->offsets.feature[0], out_padd.lower_size().spatial[0], out_padd.lower_size().spatial[1] },
-                    { out_padd.upper_size().batch[0], input_layout.size.feature[0] - crop_prim->offsets.feature[0] - crop_prim->reference_input.feature[0],
-                        out_padd.upper_size().spatial[0], out_padd.upper_size().spatial[1] }));
-                    node.can_be_optimized(true);
-                }
-            }
-        });
-
-        do_for_types<reshape>(*node, [this](reshape_node& node)
-        {
-            node.get_output_layout();
-            if (node.is_in_place() && node.get_fused_activation_func() == activation_none)
-                node.can_be_optimized(true);
-        });
-        do_for_types<reorder>(*node, [this](reorder_node& node)
-        {
-            auto& input = node.input();
-            auto output_layout = node.get_output_layout();
-            //This is WA for topologies that due to additional reorders added perform worse with conv1x1 optimization
-            auto remove_bf8_xy_opt = ((input.is_type<pooling>() || input.is_type<concatenation>()) &&
-                output_layout.format == format::bf8_xy16 && input.get_users().size() == 1);
-            //Remove reorder from convolution 1x1 to bfyx in some conditions
-            auto remove_byxf_opt = (input.is_type<convolution>() &&
-                input.get_users().size() == 1 &&
-                input.get_output_layout().format == format::byxf);
-            //check if all inputs user have the same format
-            auto all_users_same_format = true;
-            auto input_user_layout_format = input.get_users().front()->get_output_layout().format;
-            for (auto const& user : input.get_users())
-            {
-                if (user->get_output_layout().format != input_user_layout_format)
-                {
-                    all_users_same_format = false;
-                    break;
-                }
-            }
-            auto same_data_type = input.get_output_layout().data_type == output_layout.data_type;
-            //Optimization only available in case of layers that support different input and output formats.
-            //todo: new api needs to be created to read such caps
-            if (!(input.is_type<pooling>() && (output_layout.format == format::bfyx || output_layout.format == format::yxfb || output_layout.format == format::byxf) && all_users_same_format && same_data_type) &&
-                !remove_bf8_xy_opt &&
-                !(input.is_type<convolution>() && input.get_output_layout().format == format::bf8_xy16) &&
-                !(input.is_type<eltwise>() && (output_layout.format == format::bfyx || output_layout.format == format::yxfb || output_layout.format == format::byxf) && all_users_same_format && same_data_type) &&
-                !(remove_byxf_opt && (node.get_users().front()->is_type<eltwise>() || node.get_users().front()->is_type<pooling>())))
-                return;
-
-            if (remove_bf8_xy_opt)
-            {
-                auto users_user_layout = node.get_users().front()->get_users().front()->get_output_layout();
-				// if users_user_layout is still bf8_yx16 (stacked convolutions) then leave the reorder
-				if (users_user_layout.format == format::bf8_xy16)
-					return;
-                auto input_layout = input.get_output_layout();
-                auto target_layout = layout(input_layout.data_type, users_user_layout.format, input_layout.size, input_layout.data_padding);
-                input.set_output_layout(target_layout, false);
-            }
-            else if (remove_byxf_opt)
-            {
-                auto user = node.get_users().front();
-                auto users_users = node.get_users().front()->get_users();
-
-                for (auto const& users_user : users_users)
-                {
-                    if (users_user->get_output_layout().format != format::byxf && !users_user->is_type<eltwise>())
-                    {
-                        remove_byxf_opt = false;
-                        break;
-                    }
-                }
-
-                if (remove_byxf_opt)
-                {
-                    auto input_layout = input.get_output_layout();
-                    user->set_output_layout(input_layout, false);
-                }
-            }
-            else
-                input.set_output_layout(output_layout, false);
-
-            node.can_be_optimized(true);
-            extract_and_remove(node); //try to remove redundant reorders
-        });
+    else {
+        next.replace_dependency(prev_idx, node);
+        node.constant = prev.constant;
+        node.data_flow = prev.data_flow;
     }
 }
 
-void program_impl::fuse_skip_layers(program_node* node)
+void program_impl::add_intermediate(std::shared_ptr<primitive> prim, program_node& next, size_t prev_idx, 
+    bool connect_int_node_with_old_dep, bool move_usrs_of_prev_to_node)
 {
-    do_for_types<eltwise>(*node, [this](eltwise_node& node)
-    {
-        bool skippable = false;
-        int index = 0;
-        if (node.get_primitive()->mode != eltwise_mode::sum || node.inputs_count() != 2)
-            return;
-
-        if (node.input(0).is_type<deconvolution>())
-        {
-            skippable = true;
-        }
-        else if (node.input(1).is_type<deconvolution>())
-        {
-            skippable = true;
-            index = 1;
-        }
-
-        if (!skippable)
-            return;
-
-        auto& to_fuse_with = node.input(index).as<deconvolution>();
-        int to_fuse_index = index == 0 ? 1 : 0;
-
-        // check that node doesn't have fused eltwise already
-        if (to_fuse_with.has_fused_sum())
-            return;
-
-        //remove dependencies and users of elwtise that is going to be extracted
-        add_connection(node.input(to_fuse_index), to_fuse_with);
-        remove_connection(node.input(to_fuse_index), node);
-
-        //replace processing_num of the node where fusing take place and eltwise
-        auto new_processing_num = node.processing_num;
-        processing_order.erase(to_fuse_with.processing_itr);
-        to_fuse_with.processing_itr = processing_order.insert(node.processing_itr, &to_fuse_with);
-        to_fuse_with.processing_num = new_processing_num;
-
-        //make sure that new fused node's users have higher processing_num than fused node
-        for (auto user : to_fuse_with.get_users())
-        {
-            if (user->processing_num < new_processing_num)
-            {
-                processing_order.erase(user->processing_itr);
-                user->processing_itr = processing_order.insert(std::next(to_fuse_with.processing_itr), user);
-                user->processing_num = new_processing_num + 1;
-            }
-        }
-
-        if (node.get_fused_activation_func() != activation_none)
-            to_fuse_with.set_fused_activation(node.get_fused_activation_func(), node.get_fused_activation_params());
-        to_fuse_with.set_output_padding(node.get_output_layout().data_padding);
-
-        extract_and_remove(node);
-    });
+    add_intermediate(get_or_create(prim), next, prev_idx, connect_int_node_with_old_dep, move_usrs_of_prev_to_node);
 }
 
-void program_impl::prepare_primitive_fusing()
+void program_impl::add_connection(program_node& prev, program_node& next)
 {
-    bool is_debug = options.get<build_option_type::debug>()->enabled();
-
-    auto itr = processing_order.begin(); //note we need to use iterators since currently processed element can be removed
-    while (itr != processing_order.end())
-    {
-        auto node_itr = itr++;
-        auto& node = (*node_itr);
-
-        do_for_types<activation>(*node, [this, is_debug](activation_node& node)
-        {
-
-            auto& input = node.input();
-
-            //Restrictions:
-            // - inputs cannot be padded
-            // - primitives input cannot be output
-            // - no activation additional input
-            // - input was optimized
-            if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.is_output() ||
-                node.get_dependencies().size() != 1 || input.can_be_optimized())
-                return;
-
-            // - check if there is no activation fused already
-            // - limit to primitives which implementations support activation fusing
-            if (input.get_users().size() != 1 || input.get_fused_activation_func() != activation_none ||
-                //TODO: new api needs to be created to read such caps
-                //right now use whitelist so no new primitives will be affected in case of lack of fused activation support
-                (!input.is_type<batch_norm>() && !input.is_type<concatenation>() && !input.is_type<convolution>() &&
-                    !input.is_type<crop>() && !input.is_type<deconvolution>() && !input.is_type<eltwise>() &&
-                    !input.is_type<fully_connected>() && !input.is_type<lrn>() && !input.is_type<normalize>() &&
-                    !input.is_type<permute>() && !input.is_type<pooling>() && !input.is_type<reorder>() &&
-                    !input.is_type<roi_pooling>() && !input.is_type<scale>() &&
-                    !input.is_type<softmax>() && !input.is_type<upsampling>() && !input.is_type<mvn>()))
-                return;
-
-            input.set_fused_activation(node.get_primitive()->activation_func, node.get_primitive()->additional_params);
-            input.set_output_padding(node.get_output_layout().data_padding);
-
-            extract_and_remove(node);
-        });
-    }
-
-    //Second loop tries fusing several reorders one by one (if present) into one reorder
-    itr = processing_order.begin();
-    while (itr != processing_order.end())
-    {
-        auto node_itr = itr++;
-        auto& node = (*node_itr);
-
-        do_for_types<reorder>(*node, [this, is_debug](reorder_node& node)
-        {
-            auto& input = node.input();
-
-            //Restrictions:
-            // - inputs cannot be padded
-            // - primitives input cannot be output
-            // - input was optimized
-            if (node.has_padded_dependency() || (input.is_output() && !is_debug) || node.get_dependencies().size() != 1 ||
-                input.can_be_optimized())
-                return;
-
-            // - check if previous node is reorder with 1 user
-            // - do not fuse if current node has mean subtract
-            if (input.get_users().size() != 1 || !input.is_type<reorder>() ||
-                node.has_mean() || !node.get_primitive()->subtract_per_feature.empty())
-                return;
-
-            input.set_output_layout(node.get_output_layout(), false);
-            extract_and_remove(node);
-        });
-    }
-    //Third loop tries fusing eltwise (sum) with deconvolution
-    itr = processing_order.begin();
-    while (itr != processing_order.end())
-    {
-        auto node_itr = itr++;
-        auto& node = (*node_itr);
-
-        fuse_skip_layers(node);
-    }
+    prev.users.push_back(&next);
+    next.dependencies.push_back(&prev);
 }
 
-program_node& program_impl::get_or_create(std::shared_ptr<primitive> prim)
+void program_impl::remove_connection(program_node& prev, program_node& next)
 {
-    auto itr = nodes_map.lower_bound(prim->id);
-    if (itr != nodes_map.end() && itr->first == prim->id)
-        return *itr->second;
-
-    auto new_node = prim->type->create_node(*this, prim);
-    new_node->set_org_primitive_id(new_node->id());
-    nodes_map.insert(itr, { prim->id, new_node });
-    return *new_node;
+    prev.users.remove(&next);
+    next.dependencies.erase(std::remove(next.dependencies.begin(), next.dependencies.end(), &prev), next.dependencies.end());
 }
 
-void program_impl::add_intermediate(program_node& node, program_node& next, size_t prev_idx, bool connect_int_node_with_old_dep)
-{
-    if (connect_int_node_with_old_dep && !node.dependencies.empty())
-        throw std::invalid_argument("Node which is about to be added inbetween two other nodes should not have any existing dependencies");
-
-    auto& prev = next.get_dependency(prev_idx);
-    //firstly add connection, later replace dependency, so 'prev' won't become dangling and therefore removed
-    if (connect_int_node_with_old_dep)
+void program_impl::remove_all_connections(program_node& node) {
+    // since the graph is not topological sorted, we need to remove the node from both dependencies and users
+    for (auto &e : node.users)
     {
-        add_connection(prev, node);
-        if (node.processing_itr != processing_order.end())
-            processing_order.erase(node.processing_itr);
-
-        auto itr = prev.processing_itr;
-        node.processing_itr = processing_order.insert(++itr, &node);
-        node.processing_num = prev.processing_num;
+        e->dependencies.erase(std::remove(e->dependencies.begin(), e->dependencies.end(), &node), e->dependencies.end());
     }
-
-    next.replace_dependency(prev_idx, node);
-    node.constant = prev.constant;
-    node.data_flow = prev.data_flow;
-    if (prev.constant_frontier)
+    for (auto &e : node.dependencies) 
     {
-        node.constant_frontier = true;
-        prev.constant_frontier = false;
+        e->users.remove(&node);
     }
+    node.dependencies.clear();
+    node.users.clear();
 }
 
 void program_impl::rename(program_node & node, primitive_id const & new_id)
@@ -2776,9 +975,9 @@ void program_impl::replace_all_usages(program_node & old_node, program_node & ne
     }
 }
 
-void program_impl::replace(program_node& old_node, program_node& new_node, bool replace_whole_branch, bool check_output_layouts_integrity)
+void program_impl::replace(program_node& old_node, program_node& new_node)
 {
-    if ((!new_node.dependencies.empty() && !replace_whole_branch) || !new_node.users.empty())
+    if (!new_node.dependencies.empty() || !new_node.users.empty())
         throw std::invalid_argument("Node which is about to replace other node should be detached");
 
     if (new_node.is_output())
@@ -2788,15 +987,13 @@ void program_impl::replace(program_node& old_node, program_node& new_node, bool
     new_node.output_layout = old_node.get_output_layout();
     new_node.valid_output_layout = old_node.valid_output_layout;
 
-    if (!replace_whole_branch)
+    
+    //copy old's dependencies
+    while (!old_node.dependencies.empty())
     {
-        //copy old's dependencies
-        while (!old_node.dependencies.empty())
-        {
-            auto& dep = old_node.dependencies.back();
-            add_connection(*dep, new_node);
-            remove_connection(*dep, old_node);
-        }
+        auto& dep = old_node.dependencies.front();
+        add_connection(*dep, new_node);
+        remove_connection(*dep, old_node);
     }
 
     //append users
@@ -2815,9 +1012,6 @@ void program_impl::replace(program_node& old_node, program_node& new_node, bool
 
     old_node.users.clear();
 
-    if (check_output_layouts_integrity && new_node.valid_output_layout)
-        new_node.recalc_output_layout();
-
     bool old_was_output = false;
     //copy node's state
     if (old_node.is_output())
@@ -2832,17 +1026,11 @@ void program_impl::replace(program_node& old_node, program_node& new_node, bool
         inputs.remove(&old_node);
 
     new_node.constant = old_node.constant;
-    new_node.constant_frontier = old_node.constant_frontier;
     new_node.user_mark = old_node.user_mark;
 
-    auto old_news_pos = new_node.processing_itr;
-    new_node.processing_itr = processing_order.insert(old_node.processing_itr, &new_node);
-    new_node.processing_num = old_node.processing_num;
-    if (old_news_pos != processing_order.end())
-        processing_order.erase(old_news_pos);
-    if (old_node.processing_itr != processing_order.end())
-        processing_order.erase(old_node.processing_itr);
-
+    processing_order.insert(&old_node, &new_node);
+    if (processing_order.get_processing_iterator(old_node) != processing_order.end())
+        processing_order.erase(&old_node);
     nodes_map.erase(id);
     rename(new_node, id);
 
@@ -2854,65 +1042,23 @@ void program_impl::replace(program_node& old_node, program_node& new_node, bool
     }
 }
 
-bool program_impl::remove_if_dangling(program_node& node, bool detach_whole_branch)
+bool program_impl::remove_if_dangling(program_node& node)
 {
     if (!node.users.empty())
         return false;
-    if (!detach_whole_branch && !node.dependencies.empty())
+    if (!node.dependencies.empty())
         return false;
 
-    std::list<program_node*> to_remove;
-    std::list<program_node*> marked;
-    if (detach_whole_branch)
+    if (!node.is_output() || is_debug_build())
     {
-        node.mark();
-        std::list<program_node*> queue = { &node };
-        while (!queue.empty())
-        {
-            auto curr = queue.front();
-            queue.pop_front();
-            marked.push_back(curr);
-
-            //remove only if all users also has been marked
-            bool rem = !std::any_of(curr->get_users().begin(), curr->get_users().end(), [](program_node* node) { return !node->is_marked(); });
-            if (rem)
-                to_remove.push_back(curr);
-
-            for (auto dep : curr->get_dependencies())
-            {
-                if (!dep->is_marked())
-                {
-                    dep->mark();
-                    queue.push_back(dep);
-                }
-            }
-        }
-    }
-    else
-        to_remove.push_back(&node);
-
-    for (auto n : marked)
-        n->unmark();
-
-    for (auto rem : to_remove)
-    {
-        if (!rem->is_output() || is_debug_build())
-        {
-            if (detach_whole_branch)
-            {
-                for (auto& user : rem->get_users())
-                    user->remove_dependency(*rem);
-            }
-            if (rem->is_input())
-                inputs.remove(rem);
+        if (node.is_input())
+            inputs.remove(&node);
 
-            if (std::find(processing_order.begin(), processing_order.end(), rem) != processing_order.end())
-                processing_order.erase(rem->processing_itr);
-            optimized_out.push_back(rem->id());
-            nodes_map.erase(rem->id());
-        }
+        if (std::find(processing_order.begin(), processing_order.end(), &node) != processing_order.end())
+            processing_order.erase(&node);
+        optimized_out.push_back(node.id());
+        nodes_map.erase(node.id());
     }
-
     return true;
 }
 
@@ -2943,13 +1089,6 @@ bool program_impl::extract_and_remove(program_node& node)
     node.dependencies.clear();
     input.users.remove(&node);
 
-    if (node.constant_frontier)
-    {
-        assert(node.constant && "Constant frontier should also, by definition, be constant");
-        assert(input.constant && "Input for constant forontier should, by definition, be constant");
-        input.constant_frontier = true;
-    }
-
     if (!node.is_endpoint())
         replace_all_usages(node, input);
     else
@@ -2958,14 +1097,26 @@ bool program_impl::extract_and_remove(program_node& node)
     return true;
 }
 
-void program_impl::replace_data_with_optimized(std::map<primitive_id, memory_impl::ptr> const & replace_map)
+void program_impl::remove_nodes(std::list<program_node*>& to_remove)
 {
-    for (auto& result : replace_map)
+    for (auto const& node : to_remove)
     {
-        auto& node = *nodes_map.at(result.first);
-        assert(node.is_type<data>() && "Optimized primitive is not a cldnn::data");
-        assert(result.second != nullptr && "Memory which handles result of optimization should not be nullptr");
-        node.as<data>().attach_memory(*result.second, false);
+        if (node->is_input())
+            get_inputs().remove(node);
+        else
+        {
+            for (auto& dep : node->dependencies)
+                dep->users.remove(node);
+        }
+        for (auto& user : node->users)
+        {
+            user->dependencies.erase(std::remove(user->dependencies.begin(),
+                user->dependencies.end(), node),
+                user->dependencies.end());
+        }
+        get_processing_order().erase(node);
+        optimized_out.push_back(node->id());
+        nodes_map.erase(node->id());
     }
 }
 
@@ -2978,17 +1129,17 @@ void program_impl::dump_memory_pool() const
     {
         return;
     }
-
     path += "cldnn_memory_pool.log";
     auto dep = get_memory_dependencies_string();
     get_engine().dump_memory_pool(*this, path, dep);
-    dump_program("14_memory_pool", true);
+    std::string dump_file_name = std::to_string(pm->get_pass_count()+1) + "_memory_pool";
+    dump_program(dump_file_name.c_str(), true);
 }
 
 //TODO: break this function into number of smaller ones + add per-primitive fields (possibly use primitive_inst::to_string?)
 void program_impl::dump_program(const char* stage, bool with_full_info, std::function<bool(program_node const&)> const& filter) const
 {
-    auto path = get_dir_path(options);
+    std::string path = get_dir_path(options);
     if (path.empty())
     {
         return;
@@ -3012,41 +1163,4 @@ void program_impl::dump_program(const char* stage, bool with_full_info, std::fun
     dump_graph_optimized(graph, *this);
 }
 
-//Dumps weights and biasses in serialization process, not working yet, in progress.
-void program_impl::dump_weights_and_biasses(std::vector<unsigned long long>& offsets, std::vector<std::string>& data_names, std::ofstream& file_stream) const
-{
-    for (auto const& n : nodes_map)
-    {
-        auto dependency_count = (unsigned int)n.second.get()->get_dependencies().size();
-        for (unsigned int dp = 0; dp < dependency_count; dp++)
-        {
-            auto& dependency = n.second.get()->get_dependency(dp);
-            if (dependency.is_type<data>())
-            {
-                offsets.push_back(offsets.empty() ? 0ull : offsets.back());
-                auto& mem = dependency.as<data>().get_attached_memory();
-                if (mem.get_layout().data_type == data_types::f32)
-                    dump_data(mem, file_stream, offsets.back(), sizeof(float));
-                else
-                    dump_data(mem, file_stream, offsets.back(), sizeof(short));
-                data_names.push_back(dependency.as<data>().id());
-            }
-        }
-    }
-    file_stream.close();
-}
-
-//Makes serialization with given name.
-//Placeholder, not working yet, in progress.
-void program_impl::serialize(std::string network_name, std::function<bool(program_node const&)> const& filter) const
-{
-    std::vector<unsigned long long> offsets;
-    std::vector<std::string> data_names;
-
-    std::ofstream file_stream(network_name + "_" + "serialization" + ".bin", std::ios::binary);
-    dump_kernels(engine->get_context().get()->get_kernels_cache().get_context().get_binaries(), offsets, data_names, file_stream);
-    dump_weights_and_biasses(offsets, data_names, file_stream);
 
-    std::ofstream graph(network_name + "_" + "serialization" + ".xml");
-    dump_to_xml(graph, *this, filter, offsets, data_names);
-}
diff --git a/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp b/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp
index 7e4c7396d..b82dd0edd 100644
--- a/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program_dump_graph.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2018 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -18,7 +18,13 @@
 
 #include "program_dump_graph.h"
 #include "to_string_utils.h"
-#include "xml_object.h"
+#include "data_inst.h"
+#include "condition_inst.h"
+
+#include "gpu/ocl_toolkit.h"
+
+#include "to_string_utils.h"
+
 #include <algorithm>
 #include <vector>
 
@@ -152,12 +158,12 @@ namespace cldnn
         graph.close();
     }
 
-    std::string get_node_id(program_node* ptr)
+    std::string get_node_id(const program_node* ptr)
     {
         return "node_" + std::to_string(reinterpret_cast<uintptr_t>(ptr));
     }
 
-    void dump_full_node(std::ofstream& out, program_node* node)
+    void dump_full_node(std::ofstream& out, const program_node* node)
     {
         out << node->type()->to_string(*node);
     }
@@ -193,31 +199,7 @@ namespace cldnn
     {
         const auto extr_oformat = [](program_node* ptr)
         {
-            std::string out = "";
-            switch (ptr->get_output_layout().format)
-            {
-            case format::yxfb: out = "yxfb"; break;
-            case format::byxf: out = "byxf"; break;
-            case format::bfyx: out = "bfyx"; break;
-            case format::fyxb: out = "fyxb"; break;
-            case format::os_iyx_osv16: out = "os_iyx_osv16"; break;
-            case format::bs_xs_xsv8_bsv8: out = "bs_xs_xsv8_bsv8"; break;
-            case format::bs_xs_xsv8_bsv16: out = "bs_xs_xsv8_bsv16"; break;
-            case format::bs_x_bsv16: out = "bs_x_bsv16"; break;
-            case format::bf8_xy16: out = "bf8_xy16"; break;
-            case format::image_2d_weights_c1_b_fyx: out = "image_2d_weights_c1_b_fyx"; break;
-            case format::image_2d_weights_c4_fyx_b: out = "image_2d_weights_c4_fyx_b"; break;
-            case format::image_2d_weights_winograd_6x3_s1_fbxyb: out = "image_2d_weights_winograd_6x3_s1_fbxyb"; break;
-            case format::image_2d_weights_winograd_6x3_s1_xfbyb: out = "image_2d_weights_winograd_6x3_s1_xfbyb"; break;
-            case format::os_is_yx_isa8_osv8_isv4: out = "os_is_yx_isa8_osv8_isv4"; break;
-            case format::is_o_yx_isv32: out = "is_o_yx_isv32"; break;
-            case format::byxf_af32: out = "byxf_af32"; break;
-            case format::fs_bs_yx_bsv4_fsv32: out = "fs_bs_yx_bsv4_fsv32"; break;
-            case format::any: out = "any"; break;
-            default:
-                out = "unk format";
-                break;
-            }
+            std::string out = fmt_to_str(ptr->get_output_layout().format);
 
             if (!ptr->is_valid_output_layout())
                 out += " (invalid)";
@@ -225,22 +207,6 @@ namespace cldnn
             return out;
         };
 
-        const auto extr_data_type = [](program_node* ptr)
-        {
-            std::string out = "";
-            switch (ptr->get_output_layout().data_type)
-            {
-            case data_types::i8: out = "i8"; break;
-            case data_types::u8: out = "u8"; break;
-            case data_types::f16: out = "f16"; break;
-            case data_types::f32: out = "f32"; break;
-            default:
-                out = "unknown data_type";
-                break;
-            }
-            return out;
-        };
-
         const auto dump_mem_info = [](program_node* ptr)
         {
             std::string out = "size_info: ";
@@ -262,7 +228,7 @@ namespace cldnn
         };
 
         graph << "digraph cldnn_program {\n";
-        for (auto& node : program.get_nodes())
+        for (auto& node : program.get_processing_order())
         {
             if (filter && !filter(*node))
             {
@@ -272,23 +238,36 @@ namespace cldnn
                 #pragma clang diagnostic push
                 #pragma clang diagnostic ignored "-Wpotentially-evaluated-expression"
             #endif
-            std::string node_type = get_extr_type(typeid(*node).name());
-            graph << "    " << get_node_id(node.get()) << "[label=\"" << node->id() << ":\n" << node_type << "\n out format: " + extr_oformat(node.get())
-                << "\n out data_type: " + extr_data_type(node.get())
-                << "\\nprocessing number: " << node->get_processing_num() << "\\n color:" << (node->is_reusing_memory() ? std::to_string(node->get_reused_memory_color()) : "none")
+            auto& node_type = typeid(*node);
+            std::string node_type_name = get_extr_type(node_type.name());
+            graph << "    " << get_node_id(node) << "[label=\"" << node->id() << ":\n" << node_type_name << "\n out format: " + extr_oformat(node)
+                << "\n out data_type: " + dt_to_str(node->get_output_layout().data_type)
+                << "\\nprocessing number: " << program.get_processing_order().get_processing_number(node) << "\\n color:" << (node->is_reusing_memory() ? std::to_string(node->get_reused_memory_color()) : "none")
                 << (node->can_be_optimized() ? "\\n optimized out" : "");
-            if (node_type != "struct cldnn::data" && node_type != "struct cldnn::input_layout" && !node->can_be_optimized())
+
+            if (node_type_name != "struct cldnn::data" && node_type_name != "struct cldnn::input_layout" && !node->can_be_optimized())
+            {
                 graph << "\\n Selected kernel: " << (node->get_selected_impl() == nullptr ? "none" : node->get_selected_impl().get()->get_kernel_name()
-                    + "\n" + dump_mem_info(node.get()));
+                    + "\n" + dump_mem_info(node));
+            }
             graph << "\"";
             #ifdef __clang__
                 #pragma clang diagnostic pop
             #endif
 
+            if (node->is_type<condition>())
+            {
+                graph << ", shape=diamond";
+            }
             if (node->is_type<data>() || node->is_constant())
+            {
                 graph << ", shape=box";
+            }
             if (node->is_type<internal_primitive>())
+            {
                 graph << ", color=blue";
+            }
+
             if (node->is_reusing_memory())
             {
                 graph << ", fillcolor=\"" << colors[node->get_reused_memory_color() % colors.size()] << "\" ";
@@ -303,9 +282,9 @@ namespace cldnn
                     continue;
                 }
                 bool doubled = true;
-                if (std::find(user->get_dependencies().begin(), user->get_dependencies().end(), node.get()) == user->get_dependencies().end())
+                if (std::find(user->get_dependencies().begin(), user->get_dependencies().end(), node) == user->get_dependencies().end())
                     doubled = false;
-                graph << "    " << get_node_id(node.get()) << " -> " << get_node_id(user);
+                graph << "    " << get_node_id(node) << " -> " << get_node_id(user);
 
                 bool data_flow = node->is_in_data_flow() && user->is_in_data_flow();
                 if (data_flow)
@@ -330,12 +309,12 @@ namespace cldnn
                     continue;
                 }
 
-                if (std::find(dep->get_users().begin(), dep->get_users().end(), node.get()) != dep->get_users().end())
+                if (std::find(dep->get_users().begin(), dep->get_users().end(), node) != dep->get_users().end())
                 {
                     continue;
                 }
 
-                graph << "   " << get_node_id(node.get()) << " -> " << get_node_id(dep) << " [style=dashed, label=\"dep\", constraint=false];\n";
+                graph << "   " << get_node_id(node) << " -> " << get_node_id(dep) << " [style=dashed, label=\"dep\", constraint=false];\n";
             }
         }
         graph << "}\n";
@@ -361,101 +340,16 @@ namespace cldnn
 
     void dump_graph_info(std::ofstream& graph, const program_impl& program, std::function<bool(program_node const&)> const& filter)
     {
-        for (auto& node : program.get_nodes())
+        for (auto& node : program.get_processing_order())
         {
             if (filter && !filter(*node))
                 continue;
 
-            dump_full_node(graph, node.get());
+            dump_full_node(graph, node);
             graph << std::endl << std::endl;
         }
         close_stream(graph);
     }
-
-    //Function used by serialization. Not working yet, in progress.
-    void dump_to_xml(std::ofstream& graph, const program_impl& program, std::function<bool(program_node const&)> const& filter, std::vector<unsigned long long>& offsets, std::vector<std::string>& data_names)
-    {
-        xml_composite data_container, node_container;
-        auto node_number = 1;
-        auto kernels_number = 1;
-        auto postion = 0u;
-        auto offset = 0ull;
-        auto size = offsets.at(0);
-        for (auto& node : program.get_nodes())
-        {
-            if (filter && !filter(*node))
-                continue;
-
-            std::string package_name = "node_" + std::to_string(node_number);
-            auto node_info = node.get()->desc_to_xml();
-            auto id = node->id();
-            for (auto p = postion; p < (unsigned int)data_names.size(); p++)
-            {
-                    if (p != 0)
-                    {
-                        offset = offsets.at(p - 1);
-                        size = offsets.at(p) - offsets.at(p - 1);
-                    }
-                    if (data_names.at(p).find("kernels") != std::string::npos)
-                    {
-                        node_info.reset(new xml_composite());
-                        node_info->add("id", data_names.at(p));
-                        id = "kernels";
-                        package_name = "kernels_" + std::to_string(kernels_number);
-
-                        postion++;
-                        kernels_number++;
-                        node_number--;
-                    }
-                    if (data_names.at(p).find(id) != std::string::npos)
-                    {
-                        node_info->add("data_offset", std::to_string(offset));
-                        node_info->add("data_size", std::to_string(size));
-                        node_number++;
-                        break;
-                    }
-            }
-            node_container.add(package_name, node_info.get()); 
-        }
-        data_container.add("data", node_container);
-        data_container.dump(graph);
-        close_stream(graph);
-    }
-
-    //Function used by serialization. Not working yet, in progress.
-    void dump_kernels(kernels_binaries_container program_binaries, std::vector<unsigned long long>& offsets, std::vector<std::string>& data_names, std::ofstream& file_stream)
-    {
-        auto offset_temp = 0ull;
-        for (unsigned int i = 0; i < (unsigned int)program_binaries.size(); i++)
-        {
-            for (unsigned int j = 0; j < (unsigned int)program_binaries.at(i).size(); j++)
-            {
-                for (unsigned int k = 0; k < (unsigned int)program_binaries.at(i).at(j).size(); k++)
-                {
-                    char* p = (char*)&program_binaries.at(i).at(j).at(k);
-                    file_stream.write(p, sizeof(char));
-                    offset_temp += sizeof(char);
-                }
-            }
-            offsets.push_back(offset_temp);
-            std::string offset_name = "kernels_part_" + std::to_string(i+1);
-            data_names.push_back(offset_name);
-        }
-    }
-
-    //Function used by serialization. Not working yet, in progress.
-    void dump_data(memory_impl& mem, std::ofstream& stream, unsigned long long& total_offset, unsigned long long type)
-    {
-        auto offset = 0ull;
-        char * ptr = (char*)mem.lock();
-        for (unsigned int x = 0; x < (unsigned int)mem.get_layout().count(); x++)
-        {
-            stream.write(ptr + offset, type);
-            offset += type;
-        }
-        mem.unlock();
-        total_offset += offset;
-    }
 }
 
  
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/program_helpers.cpp b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp
new file mode 100644
index 000000000..4565c0bf3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/program_helpers.cpp
@@ -0,0 +1,92 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include "program_helpers.h"
+#include "program_impl.h"
+#include "data_inst.h"
+
+namespace cldnn
+{
+    //helper function for merging the weights/biases buffers on cpu side for depthwise separable convolution optimization
+    void program_helpers::merge_buffers(engine_impl &engine, program_node &node, layout target_layout, size_t begin_offset, size_t end_offset)
+    {
+        memory_impl::ptr data_to_allocate = engine.allocate_memory(target_layout);
+
+        for (size_t i = begin_offset; i < end_offset; i++)
+        {
+            auto& weights = node.get_dependency(i).as<data>();
+            mem_lock<char> src{ weights.get_attached_memory() };
+            mem_lock<char> dst{ data_to_allocate };
+            std::copy(src.begin(), src.end(), dst.begin() + (i - begin_offset)*src.size());
+        }
+
+        for (size_t i = 0; i < end_offset - begin_offset - 1; i++)
+            node.remove_dependency(begin_offset + 1);
+
+        auto& data_node = node.get_dependency(begin_offset).as<data>();
+        data_node.attach_memory(*data_to_allocate, false);
+    }
+
+    //helper function for getting target layout used in depthwise sep optimization
+    layout program_helpers::get_weights_layout(typed_program_node<cldnn::data> &data_node, int32_t split)
+    {
+        auto mem_layout = data_node.get_output_layout();
+
+        return layout(mem_layout.data_type, mem_layout.format, { split * mem_layout.size.batch[0], mem_layout.size.feature[0], mem_layout.size.spatial[0], mem_layout.size.spatial[1] });
+    }
+
+    // pair.first tells whether l1 and l2 are absolutely identical
+    // pair.second tells whether l1 and l2 can be reinterpreted to each other without need of reordering
+    // note: layouts can only be considered identical if data size described by both layouts match (so no data are genereted nor dropped)
+    // note: if layouts describe two buffers with different size, consider them not to be identical even if smaller buffer can be considered to hold subsequence of larger buffer,
+    //       this behavior is required to force buffer allocation for smaller buffer which, currently, should always be performed
+    std::pair<bool, bool> program_helpers::are_layouts_identical(layout const& l1, layout const& l2)
+    {
+        if (l1 == l2)
+            return{ true, true };
+        if (l1.data_type != l2.data_type)
+            return{ false, false };
+        if (l1.size != l2.size)
+            return{ false, false };
+        if (l1.get_linear_size() != l2.get_linear_size())
+            return{ false, false };
+        if ((l1.format == format::bf8_xy16 && l2.format != format::bf8_xy16) ||
+            (l2.format == format::bf8_xy16 && l1.format != format::bf8_xy16) ||
+            (l1.format == format::b_fs_yx_fsv4 && l2.format != format::b_fs_yx_fsv4) ||
+            (l2.format == format::b_fs_yx_fsv4 && l1.format != format::b_fs_yx_fsv4))
+            return{ false, false };
+
+        auto l1_pitch = l1.get_pitches();
+        auto l2_pitch = l2.get_pitches();
+
+        //ignore pitches which will never be used (for dims with size == 1)
+        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
+            if (l1.size.raw[i] == 1)
+                l1_pitch.raw[i] = 0;
+        for (size_t i = 0; i < CLDNN_TENSOR_DIM_MAX; ++i)
+            if (l2.size.raw[i] == 1)
+                l2_pitch.raw[i] = 0;
+
+        auto l1_offset = l1.get_linear_offset();
+        auto l2_offset = l2.get_linear_offset();
+        if (l1_pitch == l2_pitch && l1_offset == l2_offset)
+            return{ false, true };
+
+        return{ false, false };
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/src/program_node.cpp b/inference-engine/thirdparty/clDNN/src/program_node.cpp
index 078c4f554..7ed454698 100644
--- a/inference-engine/thirdparty/clDNN/src/program_node.cpp
+++ b/inference-engine/thirdparty/clDNN/src/program_node.cpp
@@ -18,21 +18,18 @@
 #include "program_impl.h"
 #include "primitive_inst.h"
 #include "to_string_utils.h"
-
 #include "json_object.h"
-#include "xml_object.h"
+
 
 using namespace cldnn;
 
-program_node::program_node(std::shared_ptr<primitive> prim, program_impl & prog) : desc(prim), myprog(prog)
+program_node::program_node(std::shared_ptr<primitive> prim, program_impl & prog) : desc(prim), myprog(prog), org_id(prim->id)
 {
     if (prim)
         output_layout.data_padding = prim->output_padding;
-
-    processing_itr = prog.processing_order.end();
 }
 
-void program_node::replace_dependency(size_t idx, program_node& new_dep, bool detach_whole_branch)
+void program_node::replace_dependency(size_t idx, program_node& new_dep)
 {
     if (idx >= dependencies.size())
         return;
@@ -40,17 +37,17 @@ void program_node::replace_dependency(size_t idx, program_node& new_dep, bool de
         return;
 
     dependencies[idx]->users.remove(this);
-    myprog.remove_if_dangling(*dependencies[idx], detach_whole_branch);
+    myprog.remove_if_dangling(*dependencies[idx]);
 
     dependencies[idx] = &new_dep;
     new_dep.users.push_back(this);
 }
 
-void program_node::replace_dependency(program_node const& old_dep, program_node& new_dep, bool detach_whole_branch)
+void program_node::replace_dependency(program_node const& old_dep, program_node& new_dep)
 {
     for (size_t i = 0; i < dependencies.size(); ++i)
         if (dependencies[i] == &old_dep)
-            return replace_dependency(i, new_dep, detach_whole_branch);
+            return replace_dependency(i, new_dep);
 }
 
 std::vector<primitive_id> program_node::get_dependencies_ids() const
@@ -86,68 +83,6 @@ void program_node::add_memory_dependency(std::vector<primitive_id> prim_list)
     memory_dependencies.insert(prim_list.begin(),prim_list.end());
 }
 
-//Function used by serialization. Not working yet, in progress.
-std::unique_ptr<xml_composite> program_node::desc_to_xml() const
-{
-    std::unique_ptr<xml_composite> node_info = std::unique_ptr<xml_composite>(new xml_composite());
-    node_info->add("id", id());
-    node_info->add("valid_output_layout", bool_to_str(valid_output_layout));
-
-    xml_composite output_layout_info;
-    output_layout_info.add("data_type", dt_to_str(output_layout.data_type));
-    output_layout_info.add("format", fmt_to_str(output_layout.format));
-    output_layout_info.add("size", output_layout.size.to_string());
-
-    xml_composite padding_info;
-    padding_info.add("lower_size", output_layout.data_padding.lower_size().to_string());
-    padding_info.add("upper_size", output_layout.data_padding.upper_size().to_string());
-    output_layout_info.add("padding_info", padding_info);
-
-    node_info->add("output_layout", output_layout_info);
-    node_info->add("processing_number", processing_num);
-    node_info->add("constant", bool_to_str(constant));
-    node_info->add("output", bool_to_str(output));
-
-    std::vector<std::string> deps_ptrs;
-    {
-        bool empty = true;
-        auto itr = dependencies.begin();
-        while (itr != dependencies.end())
-        {
-            if (empty)
-            {
-                empty = false;
-            }
-            deps_ptrs.push_back(std::to_string(reinterpret_cast<uintptr_t>(*itr++)));
-        }
-        if (deps_ptrs.empty())
-        {
-            deps_ptrs.push_back("null");
-        }
-    }
-    node_info->add("dependencies", deps_ptrs);
-
-    std::vector<std::string> users_ptrs;
-    {
-        bool empty = true;
-        auto itr = users.begin();
-        while (itr != users.end())
-        {
-            if (empty)
-            {
-                empty = false;
-            }
-            users_ptrs.push_back(std::to_string(reinterpret_cast<uintptr_t>(*itr++)));
-        }
-        if (users_ptrs.empty())
-        {
-            users_ptrs.push_back("null");
-        }
-    }
-    node_info->add("users", users_ptrs);
-    return node_info;
-    }
-
 std::unique_ptr<json_composite> program_node::desc_to_json() const
 {
     std::unique_ptr<json_composite> node_info = std::unique_ptr<json_composite>(new json_composite());
@@ -169,7 +104,6 @@ std::unique_ptr<json_composite> program_node::desc_to_json() const
 
     node_info->add("output layout", output_layout_info);
 
-    node_info->add("processing number", processing_num);
     node_info->add("in data flow", bool_to_str(data_flow));
     node_info->add("constant", bool_to_str(constant));
     node_info->add("in data flow", bool_to_str(data_flow));
@@ -334,3 +268,4 @@ void details::internal_program_node_base::set_implementation(std::unique_ptr<pri
 {
     selected_impl = std::move(impl);
 }
+
diff --git a/inference-engine/thirdparty/clDNN/src/proposal.cpp b/inference-engine/thirdparty/clDNN/src/proposal.cpp
index c2cd1fb45..7e9810498 100644
--- a/inference-engine/thirdparty/clDNN/src/proposal.cpp
+++ b/inference-engine/thirdparty/clDNN/src/proposal.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2017-2018 Intel Corporation
+// Copyright (c) 2017-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -39,10 +39,12 @@ primitive_type_id proposal_type_id()
 
 layout proposal_inst::calc_output_layout(proposal_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for proposal_node!");
     auto desc = node.get_primitive();
     layout input_layout = node.get_dependency(cls_scores_index).get_output_layout();
 
-    return layout(input_layout.data_type, format::bfyx, { desc->post_nms_topn, CLDNN_ROI_VECTOR_SIZE, 1, 1 });
+    return layout(input_layout.data_type, format::bfyx, { input_layout.size.batch[0] * desc->post_nms_topn, CLDNN_ROI_VECTOR_SIZE, 1, 1 });
 }
 
 static inline std::string stringify_vector(std::vector<float> v)
@@ -81,10 +83,12 @@ std::string proposal_inst::to_string(proposal_node const& node)
 
     std::stringstream primitive_description;
 
-    auto swap_xy = desc->swap_xy ? "true" : "false";
-    auto initial_clip = desc->initial_clip ? "true" : "false";
-    auto round_ratios = desc->round_ratios ? "true" : "false";
-    auto shift_anchors = desc->shift_anchors ? "true" : "false";
+    auto swap_xy         = desc->swap_xy         ? "true" : "false";
+    auto initial_clip    = desc->initial_clip    ? "true" : "false";
+    auto round_ratios    = desc->round_ratios    ? "true" : "false";
+    auto shift_anchors   = desc->shift_anchors   ? "true" : "false";
+    auto clip_before_nms = desc->clip_before_nms ? "true" : "false";
+    auto clip_after_nms  = desc->clip_after_nms  ? "true" : "false";
 
     json_composite proposal_info;
     proposal_info.add("cls score", stringify_port(node.cls_score()));
@@ -107,6 +111,8 @@ std::string proposal_inst::to_string(proposal_node const& node)
     params.add("initial clip", initial_clip);
     params.add("round ratios", round_ratios);
     params.add("shift anchors", shift_anchors);
+    params.add("clip_before_nms", clip_before_nms);
+    params.add("clip_after_nms", clip_after_nms);
     proposal_info.add("params", params);
 
     node_info->add("proposal info", proposal_info);
diff --git a/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp b/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp
new file mode 100644
index 000000000..a9b82c125
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/pyramid_roi_align.cpp
@@ -0,0 +1,63 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include "pyramid_roi_align_inst.h"
+#include "primitive_type_base.h"
+#include "error_handler.h"
+#include "json_object.h"
+
+namespace cldnn {
+    primitive_type_id pyramid_roi_align_type_id()
+    {
+        static primitive_type_base<pyramid_roi_align> instance;
+        return &instance;
+    }
+
+    layout pyramid_roi_align_inst::calc_output_layout(pyramidROIAlign_node const &node)
+    {
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for "
+                  "pyramidROIAlign_node!");
+
+        auto desc = node.get_primitive();
+
+        auto boxes_layout = node.boxes().get_output_layout();
+        auto P2_layout = node.P2().get_output_layout();
+        auto pool_size_layout = node.pool_size().get_output_layout();
+ 
+        int32_t output_b = boxes_layout.size.spatial[1];
+        int32_t output_f = P2_layout.size.feature[0];
+
+        int32_t output_x = pool_size_layout.size.spatial[0];
+        int32_t output_y = pool_size_layout.size.spatial[1];
+       
+        return layout{ P2_layout.data_type, P2_layout.format, { output_b, output_f, output_x, output_y } };
+    }
+
+    std::string pyramid_roi_align_inst::to_string(pyramidROIAlign_node const& node)
+    {
+        auto desc = node.get_primitive();
+        auto node_info = node.desc_to_json();
+        std::stringstream primitive_description;
+        json_composite pyramid_roi_align_info;
+        node_info->add("pyramid_roi_align_info", pyramid_roi_align_info);
+        node_info->dump(primitive_description);
+        return primitive_description.str();
+    }
+    
+    pyramid_roi_align_inst::typed_primitive_inst(network_impl& network, pyramidROIAlign_node const& node)
+        : parent(network, node)
+    { }
+}
diff --git a/inference-engine/thirdparty/clDNN/src/region_yolo.cpp b/inference-engine/thirdparty/clDNN/src/region_yolo.cpp
index 3fe079f4b..4bec7a0f4 100644
--- a/inference-engine/thirdparty/clDNN/src/region_yolo.cpp
+++ b/inference-engine/thirdparty/clDNN/src/region_yolo.cpp
@@ -28,6 +28,9 @@ namespace cldnn
 
     layout region_yolo_inst::calc_output_layout(region_yolo_node const& node)
     {
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for "
+                  "region_yolo_node!");
         auto input_layout = node.input().get_output_layout();
         auto desc = node.get_primitive();
 
diff --git a/inference-engine/thirdparty/clDNN/src/reorder.cpp b/inference-engine/thirdparty/clDNN/src/reorder.cpp
index e7aab42d3..c9428b3d5 100644
--- a/inference-engine/thirdparty/clDNN/src/reorder.cpp
+++ b/inference-engine/thirdparty/clDNN/src/reorder.cpp
@@ -36,7 +36,7 @@ layout reorder_inst::calc_output_layout(reorder_node const& node)
     auto input_layout = node.input().get_output_layout();
     auto ifmt = input_layout.format;
 
-    auto odt = node.get_primitive()->output_data_type;
+    auto odt = *node.get_primitive()->output_data_type;
     auto ofmt = node.get_primitive()->output_format;
     auto op = node.get_primitive()->output_padding;
 
diff --git a/inference-engine/thirdparty/clDNN/src/reorg_yolo.cpp b/inference-engine/thirdparty/clDNN/src/reorg_yolo.cpp
index 29ceb9f44..9c1e85cfc 100644
--- a/inference-engine/thirdparty/clDNN/src/reorg_yolo.cpp
+++ b/inference-engine/thirdparty/clDNN/src/reorg_yolo.cpp
@@ -28,6 +28,9 @@ namespace cldnn
 
     layout reorg_yolo_inst::calc_output_layout(reorg_yolo_node const& node)
     {
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for "
+                  "reorg_yolo_node!");
         auto input_layout = node.input().get_output_layout();
         auto desc = node.get_primitive();
         auto stride = desc->stride;
diff --git a/inference-engine/thirdparty/clDNN/src/reshape.cpp b/inference-engine/thirdparty/clDNN/src/reshape.cpp
index 182537536..0cc687082 100644
--- a/inference-engine/thirdparty/clDNN/src/reshape.cpp
+++ b/inference-engine/thirdparty/clDNN/src/reshape.cpp
@@ -32,8 +32,31 @@ primitive_type_id reshape_type_id()
 
 layout reshape_inst::calc_output_layout(reshape_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for reshape_node!");
     auto input_layout = node.input().get_non_padded_output_layout();
-    input_layout.size = node.get_primitive()->output_shape;
+    auto sizes = node.get_primitive()->output_shape.sizes();
+    auto input_sizes = input_layout.size.sizes();
+    size_t need_recalc = 0;
+    uint32_t shape_count = 1;
+
+    for (size_t i = 0; i < sizes.size(); i++) {
+        if (sizes[i] == -1) {
+            if (need_recalc) {
+                CLDNN_ERROR_MESSAGE(node.id(), "Only one dimension of the new shape can be -1");
+            }
+            need_recalc = i;
+            continue;
+        }
+        if (sizes[i] == 0) {
+            sizes[i] = input_sizes[i];
+        }
+        shape_count *= sizes[i];
+    }
+    if (need_recalc)
+        sizes[need_recalc] = (int)input_layout.size.count() / shape_count;
+
+    input_layout.size = tensor(sizes);
     return input_layout;
 }
 
@@ -61,7 +84,7 @@ reshape_inst::typed_primitive_inst(network_impl& network, reshape_node const& no
     auto input_layout = node.input().get_output_layout();
     auto output_layout = node.get_output_layout();
     CLDNN_ERROR_DATA_TYPES_MISMATCH(node.id(), "Input layout data typr", input_layout.data_type, "output layout data type", output_layout.data_type, "");
-    CLDNN_ERROR_NOT_EQUAL(node.id(), "Output layout count", output_layout.count(), "input layout count", input_layout.count(), "Output layout of reshape pirmitive changes size of input buffer");
+    CLDNN_ERROR_NOT_EQUAL(node.id(), "Output layout count", output_layout.count(), "input layout count", input_layout.count(), "Output layout of reshape primitive changes size of input buffer");
 
     //if reshape operated in-place, postpone creation of the output until network run,
     //then create new memory object as the reinterpreted output of the previous primitive
@@ -88,4 +111,4 @@ void reshape_inst::reuse_input()
     _output = _network.get_engine().reinterpret_buffer(input_memory(), node.get_output_layout());
 }
 
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp b/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp
new file mode 100644
index 000000000..8673c207d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/reverse_sequence.cpp
@@ -0,0 +1,65 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "reverse_sequence_inst.h"
+
+#include "primitive_type_base.h"
+#include "error_handler.h"
+#include "json_object.h"
+
+namespace cldnn
+{
+primitive_type_id reverse_sequence_type_id()
+{
+    static primitive_type_base<reverse_sequence> instance;
+    return &instance;
+}
+
+layout reverse_sequence_inst::calc_output_layout(reverse_sequence_node const& node)
+{
+    auto desc = node.get_primitive();
+
+    auto input_layout = node.input(0).get_output_layout();
+    auto input_format = input_layout.format;
+
+    return layout{input_layout.data_type, input_format, input_layout.size};
+}
+
+std::string reverse_sequence_inst::to_string(reverse_sequence_node const& node)
+{
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+
+    std::stringstream primitive_description;
+
+    json_composite reverse_sequence_info;
+    reverse_sequence_info.add("input id", node.input(0).id());
+    reverse_sequence_info.add("sequence lengths id", node.input(1).id());
+    reverse_sequence_info.add("sequence axis", desc->seq_axis);
+    reverse_sequence_info.add("batch axis", desc->batch_axis);
+
+    node_info->add("reverse_sequence info", reverse_sequence_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+reverse_sequence_inst::typed_primitive_inst(network_impl& network, reverse_sequence_node const& node)
+: parent(network, node)
+{
+}
+
+}
diff --git a/inference-engine/thirdparty/clDNN/src/roi_pooling.cpp b/inference-engine/thirdparty/clDNN/src/roi_pooling.cpp
index 0d45548ea..cbaca7bce 100644
--- a/inference-engine/thirdparty/clDNN/src/roi_pooling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/roi_pooling.cpp
@@ -29,44 +29,35 @@ primitive_type_id roi_pooling_type_id()
 
 layout roi_pooling_inst::calc_output_layout(roi_pooling_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for roi_pooling_node!");
     auto desc = node.get_primitive();
     layout data_layout = node.input().get_output_layout();
-    int fm = data_layout.size.feature[0];
-
     layout rois_layout = node.rois().get_output_layout();
     int num_rois = rois_layout.size.batch[0];
+    int out_fm = desc->position_sensitive ? desc->output_dim : data_layout.size.feature[0];
 
-    int gss = desc->group_sz * desc->group_sz;
-
-
-    CLDNN_ERROR_LESS_THAN(node.id(), "Group size", desc->group_sz, "value", 0, "");
-    if (gss && fm % gss != 0)
-    {
-        CLDNN_ERROR_MESSAGE(node.id(), "group_sz must be either 0 (For RoIPooling) or satisfy fm % (group_sz^2) == 0");
-    }
-    
-    if (gss)
-    {
-        fm /= gss;
-    }
-
-    return layout(data_layout.data_type, format::bfyx, { num_rois, fm, desc->pooled_width, desc->pooled_height });
+    return layout(data_layout.data_type, format::bfyx, { num_rois, out_fm, desc->pooled_width, desc->pooled_height });
 }
 
 std::string roi_pooling_inst::to_string(roi_pooling_node const& node)
 {
     auto desc      = node.get_primitive();
     auto mode      = desc->mode == pooling_mode::max ? "max" : desc->mode == pooling_mode::bilinear ? "bilinear" : "average";
+    auto is_ps     = desc->position_sensitive ? "true" : "false";
     auto node_info = node.desc_to_json();
 
     std::stringstream primitive_description;
 
     json_composite roi_info;
     roi_info.add("mode", mode);
+    roi_info.add("position sensitive", is_ps);
     roi_info.add("pooled_w", desc->pooled_width);
     roi_info.add("pooled_h", desc->pooled_height);
     roi_info.add("spatial_scale", desc->spatial_scale);
-    roi_info.add("group_sz", desc->group_sz);
+    roi_info.add("output_dim", desc->output_dim);
+    roi_info.add("spatial_bins_x", desc->spatial_bins_x);
+    roi_info.add("spatial_bins_y", desc->spatial_bins_y);
 
     node_info->add("roi info", roi_info);
     node_info->dump(primitive_description);
diff --git a/inference-engine/thirdparty/clDNN/src/scale.cpp b/inference-engine/thirdparty/clDNN/src/scale.cpp
index 1c71f0d8d..c95fcf751 100644
--- a/inference-engine/thirdparty/clDNN/src/scale.cpp
+++ b/inference-engine/thirdparty/clDNN/src/scale.cpp
@@ -29,6 +29,8 @@ primitive_type_id scale_type_id()
 
 layout scale_inst::calc_output_layout(scale_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for scale_node!");
     auto result = node.input().get_non_padded_output_layout();
 
     auto scale_sizes = node.scale_in().get_non_padded_output_layout().size;
diff --git a/inference-engine/thirdparty/clDNN/src/scale_grad_input.cpp b/inference-engine/thirdparty/clDNN/src/scale_grad_input.cpp
index 8f2716bca..9adcbe7d9 100644
--- a/inference-engine/thirdparty/clDNN/src/scale_grad_input.cpp
+++ b/inference-engine/thirdparty/clDNN/src/scale_grad_input.cpp
@@ -29,6 +29,9 @@ namespace cldnn
 
     layout scale_grad_input_inst::calc_output_layout(scale_grad_input_node const& node)
     {
+        assert((bool)node.get_primitive()->output_data_type == false
+               && "Output data type forcing is not supported for "
+                  "scale_grad_input_node!");
         auto result = node.input().get_non_padded_output_layout();
 
         auto scale_in_sizes = node.scale_in().get_non_padded_output_layout().size;
diff --git a/inference-engine/thirdparty/clDNN/src/scale_grad_weights.cpp b/inference-engine/thirdparty/clDNN/src/scale_grad_weights.cpp
index 3d4a7b2a8..13a0110f0 100644
--- a/inference-engine/thirdparty/clDNN/src/scale_grad_weights.cpp
+++ b/inference-engine/thirdparty/clDNN/src/scale_grad_weights.cpp
@@ -29,6 +29,9 @@ primitive_type_id scale_grad_weights_type_id()
 
 layout scale_grad_weights_inst::calc_output_layout(scale_grad_weights_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "scale_grad_weights_node!");
     //output buffer will not be used in this primitive
     auto input_grad_layout_size = node.input().get_output_layout();
     return{ input_grad_layout_size.data_type, input_grad_layout_size.format,{ 1, 1, 1, 1 } };
diff --git a/inference-engine/thirdparty/clDNN/src/select.cpp b/inference-engine/thirdparty/clDNN/src/select.cpp
index df5aaa831..da799e083 100644
--- a/inference-engine/thirdparty/clDNN/src/select.cpp
+++ b/inference-engine/thirdparty/clDNN/src/select.cpp
@@ -30,6 +30,8 @@ primitive_type_id select_type_id()
 
 layout select_inst::calc_output_layout(select_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for select_node!");
     return node.input().get_non_padded_output_layout();
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp b/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp
new file mode 100644
index 000000000..e89654a38
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/shuffle_channels.cpp
@@ -0,0 +1,83 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "shuffle_channels_inst.h"
+
+#include "primitive_type_base.h"
+#include "error_handler.h"
+#include "json_object.h"
+
+namespace cldnn
+{
+primitive_type_id shuffle_channels_type_id()
+{
+    static primitive_type_base<shuffle_channels> instance;
+    return &instance;
+}
+
+layout shuffle_channels_inst::calc_output_layout(shuffle_channels_node const& node)
+{
+    auto desc = node.get_primitive();
+
+    auto input_layout = node.input(0).get_output_layout();
+    auto input_format = input_layout.format;
+
+    const int32_t number_of_dims = 4;
+    const int32_t group = desc->group;
+    int32_t axis = desc->axis;
+
+    if (axis < 0)
+        axis += number_of_dims;
+
+    if (axis < 0 || axis >= number_of_dims)
+        CLDNN_ERROR_MESSAGE(node.id(), "Incorrect axis value! Actual axis is" + std::to_string(group));
+
+    if (group < 1)
+        CLDNN_ERROR_MESSAGE(node.id(), "Invalid group size value (should equal at least one). Actual block size is" +
+                                       std::to_string(group));
+
+    if (input_layout.size.sizes(format::bfyx)[axis] % group != 0)
+        CLDNN_ERROR_MESSAGE(node.id(), "Group parameter must evenly divide the channel dimension. Actual group size is " +
+                                       std::to_string(group));
+
+    return layout{input_layout.data_type, input_format, input_layout.size};
+}
+
+std::string shuffle_channels_inst::to_string(shuffle_channels_node const& node)
+{
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+    auto& input = node.input();
+
+    std::stringstream primitive_description;
+
+    json_composite shuffle_channels_info;
+    shuffle_channels_info.add("input id", input.id());
+    shuffle_channels_info.add("groups number", desc->group);
+    shuffle_channels_info.add("axis", desc->axis);
+
+    node_info->add("shuffle_channels info", shuffle_channels_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+shuffle_channels_inst::typed_primitive_inst(network_impl& network, shuffle_channels_node const& node)
+: parent(network, node)
+{
+}
+
+}
diff --git a/inference-engine/thirdparty/clDNN/src/softmax.cpp b/inference-engine/thirdparty/clDNN/src/softmax.cpp
index 1096b8770..70c568804 100644
--- a/inference-engine/thirdparty/clDNN/src/softmax.cpp
+++ b/inference-engine/thirdparty/clDNN/src/softmax.cpp
@@ -28,6 +28,8 @@ primitive_type_id softmax_type_id()
 
 layout softmax_inst::calc_output_layout(softmax_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for softmax_node!");
     return node.input().get_output_layout();
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/softmax_loss_grad.cpp b/inference-engine/thirdparty/clDNN/src/softmax_loss_grad.cpp
index df94b6079..41069f5a2 100644
--- a/inference-engine/thirdparty/clDNN/src/softmax_loss_grad.cpp
+++ b/inference-engine/thirdparty/clDNN/src/softmax_loss_grad.cpp
@@ -28,6 +28,9 @@ primitive_type_id softmax_loss_grad_type_id()
 
 layout softmax_loss_grad_inst::calc_output_layout(softmax_loss_grad_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for "
+              "softmax_loss_grad_node!");
     return node.input().get_non_padded_output_layout();
 }
 
diff --git a/inference-engine/thirdparty/clDNN/src/split.cpp b/inference-engine/thirdparty/clDNN/src/split.cpp
index 01dc4cff3..4b5d366b9 100644
--- a/inference-engine/thirdparty/clDNN/src/split.cpp
+++ b/inference-engine/thirdparty/clDNN/src/split.cpp
@@ -30,6 +30,8 @@ primitive_type_id split_type_id()
 
 layout split_inst::calc_output_layout(split_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for split_node!");
     auto output_ids = node.get_primitive()->output_ids;
     auto output_offsets = node.get_primitive()->output_offsets;
     auto param_num = output_ids.size();
@@ -81,4 +83,4 @@ split_inst::typed_primitive_inst(network_impl& network, split_node const& node)
     CLDNN_ERROR_MESSAGE(node.id(), "Split primitive instance should not be created!");
 }
 
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/src/strided_slice.cpp b/inference-engine/thirdparty/clDNN/src/strided_slice.cpp
new file mode 100644
index 000000000..9a2390aa8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/src/strided_slice.cpp
@@ -0,0 +1,141 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include "strided_slice_inst.h"
+#include "primitive_type_base.h"
+#include "error_handler.h"
+#include "json_object.h"
+#include "data_inst.h"
+
+namespace cldnn
+{
+primitive_type_id strided_slice_type_id()
+{
+    static primitive_type_base<strided_slice> instance;
+    return &instance;
+}
+
+layout strided_slice_inst::calc_output_layout(strided_slice_node const& node) {
+    const size_t numberOfDims = 4;
+    auto desc = node.get_primitive();
+    auto input_layout = node.input(0).get_output_layout();
+    auto input_format = input_layout.format;
+
+    auto completeStridedSliceParams = [&](std::vector<int32_t>& param) {
+        for (size_t i = param.size(); i < numberOfDims; ++i)
+            param.push_back(1);
+    };
+
+    auto completeStridedSliceMasks = [&](std::vector<uint8_t>& mask) {
+        for (size_t i = mask.size(); i < numberOfDims; ++i)
+            mask.push_back(0);
+    };
+
+    auto maskStridedSliceParams = [&](std::vector<int32_t>& param, const std::vector<uint8_t>& mask) {
+        for (size_t i = 0; i < param.size(); ++i)
+            if (mask[i])
+                param[i] = input_layout.size.sizes(format::bfyx)[i];
+    };
+
+    // Getting data from constant inputs. There are 3 args: Begin, End, Stride
+    std::vector<std::vector<int32_t>> stridedSliceArgs;
+    for (size_t i = 1; i < node.get_dependencies().size(); ++i) {
+        auto& input = node.get_dependency(i).as<data>();
+        auto& mem = input.get_attached_memory();
+        int32_t* data = static_cast<int32_t*>(mem.lock());
+        std::vector<int32_t> vData = std::vector<int32_t>(data, data + input.get_output_layout().count());
+        completeStridedSliceParams(vData);
+        stridedSliceArgs.push_back(vData);
+        mem.unlock();
+    }
+
+    std::vector<uint8_t> beginMask(desc->begin_mask);
+    completeStridedSliceMasks(beginMask);
+    std::vector<uint8_t> endMask(desc->end_mask);
+    completeStridedSliceMasks(endMask);
+
+    auto& begin = stridedSliceArgs[0];
+    auto& end = stridedSliceArgs[1];
+    const auto& strides = stridedSliceArgs[2];
+    std::vector<int32_t> outputDimsSizes;
+
+    // If the ith bit of begin_mask is set, begin[i] is ignored and the fullest possible range in that dimension is used instead.
+    maskStridedSliceParams(begin, beginMask);
+    // end_mask works analogously
+    maskStridedSliceParams(end, endMask);
+
+    auto isShiftPossible = [] (std::vector<int32_t>& dims) -> bool {
+        if (dims[dims.size() - 1] == 1)
+            return true;
+        else
+            return false;
+    };
+
+    // If the new_axis_mask is set, then begin, end, and stride are ignored
+    if (std::find(desc->new_axis_mask.begin(), desc->new_axis_mask.end(), 1) == desc->new_axis_mask.end()) {
+        for (size_t i = 0; i < numberOfDims; ++i) {
+            int32_t outputDimSize = (end[i] - begin[i]) / strides[i];
+            if ((end[i] - begin[i]) % strides[i] != 0)
+                outputDimSize++;
+            outputDimsSizes.push_back(outputDimSize);
+        }
+    } else {
+        outputDimsSizes = input_layout.size.sizes(format::bfyx);
+        for (size_t i = 0; i < desc->new_axis_mask.size(); ++i)
+            if (desc->new_axis_mask[desc->new_axis_mask.size() - i - 1] == 1)
+                if (isShiftPossible(outputDimsSizes)) {
+                    for (size_t j = outputDimsSizes.size() - 1; j > i; --j)
+                        outputDimsSizes[j] = outputDimsSizes[j - 1];
+                    outputDimsSizes[i] = 1;
+                }
+    }
+
+    return layout{input_layout.data_type, input_format, tensor(outputDimsSizes[0], outputDimsSizes[1], outputDimsSizes[3], outputDimsSizes[2])};
+}
+
+std::string strided_slice_inst::to_string(strided_slice_node const& node)
+{
+    auto desc = node.get_primitive();
+    auto node_info = node.desc_to_json();
+    auto& input = node.input();
+
+    std::stringstream primitive_description;
+
+    json_composite strided_slice_info;
+    strided_slice_info.add("input id", input.id());
+    strided_slice_info.add("begin_param id", node.get_dependency(1).id());
+    strided_slice_info.add("end_param id", node.get_dependency(2).id());
+    strided_slice_info.add("stride_param id", node.get_dependency(3).id());
+    strided_slice_info.add("begin mask", node.get_primitive()->begin_mask);
+    strided_slice_info.add("end mask", node.get_primitive()->end_mask);
+    strided_slice_info.add("new axis mask", node.get_primitive()->new_axis_mask);
+    strided_slice_info.add("shrink axis mask", node.get_primitive()->shrink_axis_mask);
+    strided_slice_info.add("begin_param shape", node.get_dependency(1).get_output_layout().size.to_string());
+    strided_slice_info.add("end_param shape", node.get_dependency(2).get_output_layout().size.to_string());
+    strided_slice_info.add("stride_param shape", node.get_dependency(3).get_output_layout().size.to_string());
+
+    node_info->add("strided_slice info", strided_slice_info);
+    node_info->dump(primitive_description);
+
+    return primitive_description.str();
+}
+
+strided_slice_inst::typed_primitive_inst(network_impl& network, strided_slice_node const& node)
+    : parent(network, node)
+{
+}
+
+}
diff --git a/inference-engine/thirdparty/clDNN/src/tile.cpp b/inference-engine/thirdparty/clDNN/src/tile.cpp
index 9e47b709f..c592aa98e 100644
--- a/inference-engine/thirdparty/clDNN/src/tile.cpp
+++ b/inference-engine/thirdparty/clDNN/src/tile.cpp
@@ -30,6 +30,8 @@ primitive_type_id tile_type_id()
 
 layout tile_inst::calc_output_layout(tile_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for tile_node!");
     auto desc = node.get_primitive();
 
     auto input_layout = node.input().get_output_layout();
diff --git a/inference-engine/thirdparty/clDNN/src/upsampling.cpp b/inference-engine/thirdparty/clDNN/src/upsampling.cpp
index 75ca2f9e5..fa57f8855 100644
--- a/inference-engine/thirdparty/clDNN/src/upsampling.cpp
+++ b/inference-engine/thirdparty/clDNN/src/upsampling.cpp
@@ -29,6 +29,8 @@ primitive_type_id upsampling_type_id()
 
 layout upsampling_inst::calc_output_layout(upsampling_node const& node)
 {
+    assert((bool)node.get_primitive()->output_data_type == false
+           && "Output data type forcing is not supported for upsampling_node!");
     auto desc = node.get_primitive();
     auto input_layout = node.input().get_output_layout();
     auto scale = desc->scale;
diff --git a/inference-engine/thirdparty/clDNN/tests/CMakeLists.txt b/inference-engine/thirdparty/clDNN/tests/CMakeLists.txt
index 9ceaa2268..7f906cde4 100644
--- a/inference-engine/thirdparty/clDNN/tests/CMakeLists.txt
+++ b/inference-engine/thirdparty/clDNN/tests/CMakeLists.txt
@@ -15,15 +15,15 @@
 
 # ========================================= Name / Output settings =====================================
 
-set(CLDNN_BUILD__PROJ             "tests")
+set(CLDNN_BUILD__PROJ             "clDNN_unit_tests")
 set(CLDNN_BUILD__PROJ_LABEL       "${CLDNN_BUILD__PROJ}")
 set(CLDNN_BUILD__PROJ_OUTPUT_NAME "${CLDNN_BUILD__PROJ}${CLDNN__OUT_CPU_SUFFIX}")
 
 # =========================================== Compiler options =========================================
-
 intel_config_flag_apply_settings(CompilerOptions CMAKE_CXX_FLAGS ALL_PATTERN ""
     SET
       WarnLevel3
+      StandardCxx11
   )
 if (NOT MSVC)
   intel_config_flag_apply_settings(CompilerOptions CMAKE_CXX_FLAGS ALL_PATTERN ""
diff --git a/inference-engine/thirdparty/clDNN/tests/module_tests/events_pool_test.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/events_pool_test.cpp
new file mode 100644
index 000000000..c7509ff3d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/module_tests/events_pool_test.cpp
@@ -0,0 +1,65 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+
+
+#include <gtest/gtest.h>
+#include "api/CPP/engine.hpp"
+#include "test_utils/test_utils.h"
+#include "api/CPP/input_layout.hpp"
+#include "api/CPP/network.hpp"
+
+using namespace tests;
+using namespace cldnn;
+
+TEST(events_pool, DISABLED_basic_test)
+{
+    /*
+    This tests if the events pool works and there's no memory leak.
+    */
+    auto batch_num = 1;
+    auto feature_num = 4;
+    auto x_size = 1;
+    auto y_size = 1;
+
+    topology topology;
+    topology.add(input_layout("input", { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num))}}));
+    topology.add(activation("relu", "input", activation_relu));
+    topology.add(activation("relu1", "relu", activation_relu));
+    topology.add(activation("relu2", "relu1", activation_relu));
+    topology.add(activation("relu3", "relu2", activation_relu));
+    topology.add(activation("relu4", "relu3", activation_relu));
+    topology.add(activation("relu5", "relu4", activation_relu));
+
+    build_options bo;
+    bo.set_option(build_option::optimize_data(true));
+
+    for (int i = 0; i < 20; i++)
+    {
+        engine eng;// here we build new engine i times
+        auto input = memory::allocate(eng, { data_types::f32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
+        std::vector<float> input_vec = { -1.f, 2.f, -3.f, 4.f };
+        for (int j = 0; j < 20; j++) //then we build network j times
+        {
+            network network(eng, topology, bo);
+            network.set_input_data("input", input);
+            for(int k = 0; k < 20; k++) //and execute that network k times
+                network.execute();  
+        }
+        EXPECT_EQ(eng.get_max_used_device_memory_size(), (uint64_t)80);
+        eng.~engine();
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/module_tests/gpu_toolkit_test.cpp b/inference-engine/thirdparty/clDNN/tests/module_tests/gpu_toolkit_test.cpp
index 7f6bbc030..e0e28a821 100644
--- a/inference-engine/thirdparty/clDNN/tests/module_tests/gpu_toolkit_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/module_tests/gpu_toolkit_test.cpp
@@ -14,17 +14,123 @@
 // limitations under the License.
 */
 
-
-
 #include <gtest/gtest.h>
 #include "api/CPP/engine.hpp"
+#include "test_utils/test_utils.h"
+#include "api/CPP/network.hpp"
+#include "api/CPP/topology.hpp"
+#include "api/CPP/input_layout.hpp"
+#include "api/CPP/activation.hpp"
+#include "api/C/input_layout.h"
+#include "api/C/activation.h"
+#include "api/C/cldnn.h"
+
+#include "test_utils.h"
+
+#define CL_HPP_ENABLE_EXCEPTIONS
+#define CL_HPP_MINIMUM_OPENCL_VERSION 120
+#define CL_HPP_TARGET_OPENCL_VERSION 120
+
+#if defined __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#elif defined __GNUC__ && __GNUC__ >= 6
+#pragma GCC diagnostic ignored "-Wignored-attributes"
+#endif
+
+#include <cl2_wrapper.h>
 
 using namespace cldnn;
 
+class user_gpu_toolkit
+{
+public:
+    user_gpu_toolkit()
+    {
+        get_platform_and_device(get_plaftorm());
+        create_context_from_one_device();
+    }
+
+    cl_context get_gpu_context() const { return _gpu_context; }
+
+private:
+    cl_platform_id _platform_id;
+    cl_device_id _gpu_device;
+    cl_context _gpu_context;
+
+    void create_context_from_one_device()
+    {
+        cl_int error = 0;
+        _gpu_context = clCreateContext(0, 1, &_gpu_device, 0, 0, &error);
+        if (error != CL_SUCCESS)
+        {
+            throw std::runtime_error("error creating context");
+        }
+    }
+
+    cl_platform_id get_plaftorm()
+    {
+        cl_uint n = 0;
+        cl_int err = clGetPlatformIDs(0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err));
+        }
+
+        // Get platform list
+        std::vector<cl_platform_id> platform_ids(n);
+        err = clGetPlatformIDs(n, platform_ids.data(), NULL);
+        if (err != CL_SUCCESS) {
+            throw std::runtime_error("clGetPlatformIDs error " + std::to_string(err));
+        }
+        return platform_ids[0];
+    }
+
+    void get_platform_and_device(cl_platform_id platform_id)
+    {
+        _platform_id = platform_id;
+        cl_int err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &_gpu_device, 0);
+        if (err != CL_SUCCESS) {
+            throw std::runtime_error("clGetDeviceIDs error " + std::to_string(err));
+        }
+    }
+};
+
 TEST(gpu_engine, engine_info)
 {
-    engine engine;
+    const auto& engine = tests::get_test_engine();
     auto info = engine.get_info();
     EXPECT_GT(info.cores_count, 0u);
     EXPECT_GT(info.core_frequency, 0u);
+}
+
+TEST(gpu_engine, DISABLED_user_context)
+{
+    user_gpu_toolkit gpu_toolkit;
+    cl_context user_context = gpu_toolkit.get_gpu_context();
+
+    //[0] Check if the user engine config works.
+    auto engine_config = cldnn::engine_configuration(false, false, false, "", "", true, "", "", cldnn::priority_mode_types::disabled, cldnn::throttle_mode_types::disabled, true, &user_context);
+    
+    //[1]Check if the engine creation works.
+    engine engine(engine_config);
+    auto info = engine.get_info();
+    EXPECT_GT(info.cores_count, 0u);
+    EXPECT_GT(info.core_frequency, 0u);
+
+    //[2]Now check if the queues works (run simple network).
+    topology topo;
+    auto inp_lay = cldnn::layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,2,2});
+    auto input_mem = cldnn::memory::allocate(engine, inp_lay);
+    tests::set_values<float>(input_mem, { 1.0f, 2.0f, 3.0f, 4.0f });
+    auto inp = input_layout("input", inp_lay);
+    auto activ = activation("this_needs_queue", "input", cldnn_activation_func::activation_abs);
+    topo.add(inp, activ);
+    network net(engine, topo);
+
+    net.set_input_data("input", input_mem);
+    auto out = net.execute();
+    auto out_ptr = out.at("this_needs_queue").get_memory().pointer<float>();
+    EXPECT_EQ(out.size(), size_t(1));
+    for(uint32_t i = 0;i < 4; i++)
+        EXPECT_EQ(out_ptr[i], float(i+1));
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_grad_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_grad_gpu_test.cpp
index d05135665..7cc7865ea 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_grad_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_grad_gpu_test.cpp
@@ -43,7 +43,7 @@ TEST(activation_grad_f16_fw_gpu, basic_bfyx_all_functions)
     //  a: 0.5, b: 2.5
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_grad = memory::allocate(engine, { data_types::f16, format::bfyx,{ 1, 1, 5, 4 } });
     auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 1, 1, 5, 4 } });
@@ -142,7 +142,7 @@ TEST(activation_grad_f32_fw_gpu, basic_bfyx_all_functions)
     //  a: 0.5, b: 2.5
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp
index e40de23fb..9ec8de141 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/activation_simple_gpu_test.cpp
@@ -34,6 +34,61 @@ using namespace cldnn;
 using namespace tests;
 
 
+TEST(activation_f32_fw_gpu, not_basic_yxfb) {
+    //  Input:
+    //  1 0 -3  4  5
+    //  0  2  3  4 -6
+    //  3 -3  3  0  1
+    //  1  1  1 -1  0
+    //
+    //  Output:
+    //  0, 1, 0, 0, 0,
+    //  1, 0, 0, 0, 0,
+    //  0, 0, 0, 1, 0,
+    //  0, 0, 0, 0, 1
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } });
+    set_values(input,
+    { 1.0f, 0.0f, -3.0f, 4.0f, 5.0f,
+      0.0f, 2.0f, 3.0f, 4.0f, -6.0f,
+      3.0f, -3.0f, 3.0f, 0.0f, 1.0f,
+      1.0f, 1.0f, 1.0f, -1.0f, 0.0f });
+    VF<float> output_vec = {
+        0.0f, 1.0f, 0.0f, 0.0f, 0.0f,
+        1.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 1.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f, 1.0f };
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        activation("not", "input", activation_not));
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "not");
+
+    auto output_memory = outputs.at("not").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<float>();
+
+    int y_size = output_layout.size.spatial[1];
+    int x_size = output_layout.size.spatial[0];
+    int f_size = output_layout.size.feature[0];
+    int b_size = output_layout.size.batch[0];
+    EXPECT_EQ(output_layout.format, format::yxfb);
+    EXPECT_EQ(y_size, 4);
+    EXPECT_EQ(x_size, 5);
+    EXPECT_EQ(f_size, 1);
+    EXPECT_EQ(b_size, 1);
+
+    for (size_t i = 0; i < output_vec.size(); ++i) {
+        EXPECT_FLOAT_EQ(output_vec[i], output_ptr[i]);
+    }
+}
+
 TEST(activation_f32_fw_gpu, relu_basic_yxfb) {
     //  Input:
     //  1 -2 -3  4  5
@@ -49,7 +104,7 @@ TEST(activation_f32_fw_gpu, relu_basic_yxfb) {
     //  3   -1.5  3    5    1
     //  1    1    1   -0.5  1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } });
     set_values(input,
@@ -102,7 +157,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions)
     //  a: 0.5, b: 2.5
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 5, 4 } });
     auto input_params = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 1 } });
@@ -130,7 +185,8 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions)
         activation_cos,
         activation_cosh,
         activation_exp,
-		activation_log2,
+        activation_not,
+        activation_log2,
     };
 
     cldnn_activation_additional_params params = { 0.5f, 2.5f };
@@ -229,12 +285,15 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions)
                 case activation_exp:
                     EXPECT_FLOAT_EQ(std::exp((float)input_ptr[i]), output_ptr[i]);
                     break;
-                case activation_log2:
-                    if (input_ptr[i] > 0) //logarithm exist only for positive real values
+                case activation_not:
+                    EXPECT_FLOAT_EQ((float)(!input_ptr[i]), output_ptr[i]);
+                    break;
+				case activation_log2:
+					if (input_ptr[i] > 0) //logarithm exist only for positive real values
                     {
-                        EXPECT_FLOAT_EQ(std::log2((float)input_ptr[i]), output_ptr[i]);
+						EXPECT_FLOAT_EQ(std::log2((float)input_ptr[i]), output_ptr[i]);
                     }
-                    break;
+					break;
                 default:
                     break;
                 }
@@ -245,7 +304,7 @@ TEST(activation_f32_fw_gpu, basic_yxfb_all_functions)
 
 TEST(activation_f32_fw_gpu, basic_yxfb_asin_acos_log)
 {
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 4 } });
     set_values(input, { 0.12f, 0.56f, 0.45f, 0.789f, 0.546f, 0.999f, 0.7899f, 0.6677f});
@@ -328,7 +387,7 @@ TEST(activation_f32_fw_gpu, relu_basic_input_padding_yxfb) {
     //  3   -1.5  3    5    1
     //  1    1    1   -0.5  1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } });
 
@@ -394,7 +453,7 @@ TEST(activation_f32_fw_gpu, relu_basic_output_padding_yxfb) {
     //  0    0    0    0    0    0    0    0    0    0    0
     //  0    0    0    0    0    0    0    0    0    0    0
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } });
     set_values(input,
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/add_reorders_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/add_reorders_gpu_test.cpp
new file mode 100644
index 000000000..952f2c13b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/add_reorders_gpu_test.cpp
@@ -0,0 +1,213 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+#include "api/CPP/memory.hpp"
+#include <api/CPP/input_layout.hpp>
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+#include <api/CPP/engine.hpp>
+#include "test_utils/test_utils.h"
+#include <api/CPP/reorder.hpp>
+#include <api/CPP/data.hpp>
+#include <api/CPP/activation.hpp>
+#include <api/CPP/mutable_data.hpp>
+#include <api/CPP/layout.hpp>
+#include <api/CPP/tile.hpp>
+#include <api/CPP/reshape.hpp>
+
+#include <api/CPP/batch_norm.hpp>
+#include <api/CPP/concatenation.hpp>
+
+using namespace cldnn;
+using namespace tests;
+
+/*
+These tests are inteded to check if additional reorders are being added  properly during 
+add_reorders optimization pass.
+*/
+
+//Input has incompatible format
+TEST(add_reorders_gpu, basic1) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::fyxb,{ 2, 2, 3, 2 } }); //format unsupported by batch_norm!
+    auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+    set_values(input, {
+        1.f, 2.f, -10.f,
+        3.f, 4.f, -14.f,
+        5.f, 6.f, -12.f,
+        7.f, 8.f, -16.f,
+        0.f, 0.f, -11.f,
+        0.5f, -0.5f, -15.f,
+        1.5f, 5.2f, -13.f,
+        12.f, 9.f, -17.f
+    });
+
+    set_values(mean, { 0.1f, 0.2f });
+    set_values(variance, { 0.4f, 0.5f });
+
+    float epsilon = 1e-3f;
+    float expected_out[] = {
+        1.42125f,  3.00042f,
+       -0.28256f, -0.28256f,
+      -15.94960f,  4.57958f,
+      -15.82340f,  0.42384f,
+        6.15875f,-22.26620f,
+       -0.98896f,-21.47460f,
+        7.73791f,  9.31708f,
+        1.83664f,  7.06401f,
+       -19.1079f,  10.8962f,
+       -18.6490f,  16.6711f,
+        12.4754f, -25.4246f,
+        12.4327f, -24.3002f};
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("mean", mean));
+    topology.add(data("variance", variance));
+    topology.add(batch_norm("batch_norm", "input", "mean", "variance", epsilon));
+
+    network network(engine, topology); // without additional reorders we would get an exception here
+    network.set_input_data("input", input);
+
+    EXPECT_EQ(network.get_all_primitive_org_ids().size(), size_t(5));
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory().pointer<float>();
+    for (int i = 0; i < 2 * 2 * 3 * 2; i++)
+    {
+        EXPECT_NEAR(expected_out[i], output[i], epsilon);
+    }
+}
+
+//concatenation of incompatible convolutions
+TEST(add_reorders_gpu, two_convolutions_and_concatenation) {
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    build_opt.set_option(build_option::optimize_data(false));
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
+    auto weights1 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 2 } });
+    auto weights2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
+
+    set_values(input, { 1.1f, 1.2f, 1.3f, 1.4f });
+    set_values(weights1, { 2.1f, 3.1f});
+    set_values(weights2, { 1.1f, 0.1f});
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("weights1", weights1));
+    topology.add(data("weights2", weights2));
+
+    topology.add(cldnn::convolution("conv1", { "input" }, { "weights1" }));
+    topology.add(cldnn::reorder("reorder", "input", cldnn::layout(data_types::f32, format::byxf, 4)));
+    topology.add(cldnn::convolution("conv2", { "reorder" }, { "weights2" }));
+
+    topology.add(cldnn::concatenation("concat", { "conv1", "conv2" }, cldnn::concatenation::along_f));
+
+    network network(engine, topology, build_opt);
+    network.set_input_data("input", input);
+
+    //concatenation accepts inputs in different formats, so no reorders should be added here
+    EXPECT_EQ(network.get_all_primitive_org_ids().size(), size_t(7));
+    auto outputs = network.execute();
+
+    float expected_out[] = { 6.34f, 1.34f, 6.86f, 1.46f };
+    float epsilon = 1e-3f;
+
+    for (auto& it : outputs)
+    {
+        auto output = it.second.get_memory().pointer<float>();
+        for (size_t cntr = 0; cntr < 2 * 2; cntr++)
+        {
+            EXPECT_NEAR(expected_out[cntr], output[cntr], epsilon);
+        }
+    }
+}
+
+template<typename data_t>
+void tile_ref(const memory& input, memory& output, tile::tile_axis axis, int num_tiles)
+{
+    auto get_sizes = [](const tensor& size, tile::tile_axis axis) -> std::pair<int, int>
+    {
+        switch (axis)
+        {
+        case tile::along_b: return std::make_pair(1, size.batch[0] * size.feature[0] * size.spatial[1] * size.spatial[0]);
+        case tile::along_f: return std::make_pair(size.batch[0], size.feature[0] * size.spatial[1] * size.spatial[0]);
+        case tile::along_y: return std::make_pair(size.batch[0] * size.feature[0], size.spatial[1] * size.spatial[0]);
+        case tile::along_x: return std::make_pair(size.batch[0] * size.feature[0] * size.spatial[1], size.spatial[0]);
+        default: throw std::invalid_argument("Invalid axis(" + std::to_string(static_cast<int>(axis)) + ") in tile ref version");
+        }
+    };
+
+    const pointer<data_t> src = input.pointer<data_t>();
+    pointer<data_t> dst = output.pointer<data_t>();
+
+    const data_t* psrc = src.data();
+    data_t* pdst = dst.data();
+
+    auto sizes = get_sizes(input.get_layout().size, axis);
+    int outer_dim = sizes.first;
+    int inner_dim = sizes.second;
+
+    for (int i = 0; i < outer_dim; i++)
+    {
+        for (int t = 0; t < num_tiles; t++)
+        {
+            for (int j = 0; j < inner_dim; j++)
+            {
+                pdst[j] = psrc[j];
+            }
+            pdst += inner_dim;
+        }
+        psrc += inner_dim;
+    }
+}
+
+TEST(add_reorders_gpu, basic_reshape_and_tile) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 2, 2, 1 } });
+    auto output_ref = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 1, 4, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(reshape("reshape", "input", tensor(2, 1, 2, 1)));
+    topology.add(tile("tile", "reshape", tile::along_y, 4));
+
+    std::vector<float> input_vec = { 1.f, 0.f, 5.f, 1.5f };
+    set_values(input, input_vec);
+    tile_ref<float>(input, output_ref, tile::along_y, 4);
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    //reorder is required as tile accepts only bfyx format
+    EXPECT_EQ(network.get_all_primitive_org_ids().size(), size_t(4));
+    auto outputs = network.execute();
+
+    auto output = outputs.at("tile").get_memory();
+    auto output_ptr = output.pointer<float>();
+    auto output_ref_ptr = output_ref.pointer<float>();
+
+    for (unsigned int i = 0; i < output_ref.count(); ++i) {
+        EXPECT_EQ(output_ptr[i], output_ref_ptr[i]);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/apply_adam_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/apply_adam_gpu_test.cpp
index d0af068da..6d2250cec 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/apply_adam_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/apply_adam_gpu_test.cpp
@@ -34,7 +34,7 @@ using namespace tests;
 TEST(apply_adam_gpu, basic_in2x2x3x2_bfyx) {
     // Test creates topology with two apply adam primitives (t = [0, 1]) with the same output variable which is updated.
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
     auto var = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/arg_max_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/arg_max_gpu_test.cpp
index 42471e117..b66cfaee6 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/arg_max_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/arg_max_gpu_test.cpp
@@ -33,7 +33,7 @@ using namespace tests;
 TEST(arg_max_gpu, base) {
 	//  Input  : 2x3x2x2
 	static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2;
-	engine engine;
+	const auto& engine = get_test_engine();
 
 	auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
 	topology topology;
@@ -85,7 +85,7 @@ TEST(arg_max_gpu, base) {
 TEST(arg_max_gpu_batch_one, base) {
     //  Input  : 2x3x2x2
     static const int32_t x_size = 2, y_size = 2, feature_num = 5, batch_num = 1, top_k = 8;
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
     topology topology;
@@ -164,7 +164,7 @@ TEST(arg_max_gpu_batch_one, base) {
 TEST(arg_max_gpu_top_k, base) {
 	//  Input  : 2x3x2x2
 	static const int32_t x_size = 2, y_size = 2, feature_num = 5, batch_num = 2;
-	engine engine;
+	const auto& engine = get_test_engine();
 	const int top_k = 8;
 	auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
 	topology topology;
@@ -249,7 +249,7 @@ TEST(arg_max_gpu_min, base) {
 	//  Input  : 2x3x2x2
 	static const int32_t x_size = 2, y_size = 2, feature_num = 4,
 		batch_num = 2;
-	engine engine;
+	const auto& engine = get_test_engine();
 
 	auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
 	topology topology;
@@ -303,7 +303,7 @@ TEST(arg_max_gpu_min, base) {
 TEST(arg_max_gpu_min_top_k, base) {
 	//  Input  : 2x3x2x2
 	static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2;
-	engine engine;
+	const auto& engine = get_test_engine();
 	const int top_k = 3;
 	auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
 	topology topology;
@@ -385,7 +385,7 @@ TEST(arg_max_gpu_min_top_k, base) {
 TEST(arg_max_gpu_min_axis_batch, base) {
     //  Input  : 2x3x2x2
     static const int32_t x_size = 2, y_size = 2, feature_num = 4, batch_num = 2;
-    engine engine;
+    const auto& engine = get_test_engine();
     const int top_k = 2;
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
     topology topology;
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/average_unpooling_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/average_unpooling_gpu_test.cpp
index bb30293a0..d537c89bb 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/average_unpooling_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/average_unpooling_gpu_test.cpp
@@ -52,7 +52,7 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1) {
     //  f1: b0:  1.5     2.5    1       b1:   1.75   2.9375   1.1875
     //  f1: b0:  1.5     2.5    1       b1:   1.75   2.9375   1.1875
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
 
@@ -119,7 +119,7 @@ TEST(average_unpooling_gpu, basic_in2x2x3x2_with_average_pooling_unpooling) {
     //  f1: b0:  1.5     1.5    0.5  b1:   1.75   1.75   1
     //  f1: b0:  1.5     1.5    0.5  b1:   1.75   1.75   1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
 
@@ -191,7 +191,7 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1_output_padding) {
     //  f0: b0:  0.625   -0.5  -1.125   b1:   0  -1.6875  -1.6875
     //  f1: b0:  1.5     2.5    1       b1:   1.75   2.9375   1.1875
     //  f1: b0:  1.5     2.5    1       b1:   1.75   2.9375   1.1875
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
 
@@ -272,7 +272,7 @@ TEST(average_unpooling_gpu, basic_in2x2x2x1_fp16) {
     //  f1: b0:  1.5     2.5    1       b1:   1.75   2.9375   1.1875
     //  f1: b0:  1.5     2.5    1       b1:   1.75   2.9375   1.1875
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 2, 2, 1 } });
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/batch_norm_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/batch_norm_gpu_test.cpp
index ddc19ff73..0de6c6689 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/batch_norm_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/batch_norm_gpu_test.cpp
@@ -25,6 +25,7 @@
 #include "test_utils/test_utils.h"
 #include <api/CPP/reorder.hpp>
 #include <api/CPP/data.hpp>
+#include <api/CPP/mutable_data.hpp>
 
 using namespace cldnn;
 using namespace tests;
@@ -49,7 +50,7 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2) {
     //  f1: 107.0624
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
     auto mean = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 1 } });
@@ -103,6 +104,102 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2) {
     }
 }
 
+TEST(batch_normalization_gpu, basic_in2x3x2x2_scale_shift) {
+	//  Mean   : 3x2x2
+	//  Input  : 2x3x2x2
+	//  Output : 2x3x2x2
+
+	//  Input:
+	//  f0: b0:  1    2  -10   b1:   0    0     -11
+	//  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15  
+	//  f1: b0:  5    6  -12   b1:   1.5  5.2   -13     
+	//  f1: b0:  7    8  -16   b1:   12   9     -17
+	//
+	//  Mean
+	//  f0: -3.3333
+	//  f1: -0.3583
+	//
+	//  Variance
+	//  f0: 44.9305
+	//  f1: 107.0624
+	//
+	//  Scale
+	//  f0: 2.0
+	//  f1: 1.0
+	//
+	//  Shift
+	//  f0: 0.0
+	//  f1: 5.0
+
+
+
+	const auto& engine = get_test_engine();
+
+	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+	auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+	float epsilon = 0.0001f;
+
+	topology topology;
+	topology.add(input_layout("input", input.get_layout()));
+	topology.add(data("mean", mean));
+	topology.add(data("variance", variance));
+	topology.add(data("scale", scale));
+	topology.add(data("shift", shift));
+	topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift",  epsilon));
+
+	set_values(input, {
+		1.f, 0.f, 5.f, 1.5f,
+		2.f, 0.f, 6.f, 5.2f,
+		-10.f, -11.f, -12.f, -13.f,
+		3.f, 0.5f, 7.f, 12.f,
+		4.f, -0.5f, 8.f, 9.f,
+		-14.f, -15.f, -16.f, -17.f
+	});
+
+	set_values(mean, { -3.3333f, -0.3583f });
+	set_values(variance, { 44.9305f, 107.0624f });
+	set_values(scale, { 2.f, 1.f });
+	set_values(shift, { 0.f, 5.f });
+
+	network network(engine, topology);
+
+	network.set_input_data("input", input);
+
+	auto outputs = network.execute();
+
+	auto output = outputs.at("batch_norm").get_memory();
+	auto output_ptr = output.pointer<float>();
+
+	for (int j = 0; j < 2; ++j) { //F
+		float sum = 0, var = 0;
+
+		auto scalep = scale.pointer<float>();
+		auto shiftp = shift.pointer<float>();
+		float scalef = scalep[j];
+		float shiftf = shiftp[j];
+
+		for (int i = 0; i < 2; ++i) { //B
+			for (int k = 0; k < 2; ++k) { //Y
+				for (int l = 0; l < 3; ++l) { //X
+					float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
+					data = (data - shiftf) / scalef;
+					sum += data;
+					var += data * data;
+				}
+			}
+		}
+		sum /= 2 * 3 * 2; 
+		var /= 2 * 3 * 2;
+
+		EXPECT_NEAR(sum, 0, 1e-03F);
+		EXPECT_NEAR(var, 1, 1e-03F);
+	}
+}
+
 TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc) {
     //  Mean   : 3x2x2
     //  Input  : 2x3x2x2
@@ -123,17 +220,16 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc) {
     //  f1: 107.0624
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
-    auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
     auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
 
     float epsilon = 0.0001f;
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
-    topology.add(data("inv_variance", inv_variance));
+    topology.add(mutable_data("inv_variance", inv_variance));
     topology.add(batch_norm("batch_norm", "input", epsilon, "inv_variance"));
 
     set_values(input, {
@@ -173,6 +269,513 @@ TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc) {
     }
 }
 
+TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc_no_inv_var) {
+    //  Mean   : 3x2x2
+    //  Input  : 2x3x2x2
+    //  Output : 2x3x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  -10   b1:   0    0     -11
+    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15  
+    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13     
+    //  f1: b0:  7    8  -16   b1:   12   9     -17
+    //
+    //  Mean
+    //  f0: -3.3333
+    //  f1: -0.3583
+    //
+    //  Variance
+    //  f0: 44.9305
+    //  f1: 107.0624
+
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(batch_norm("batch_norm", "input", epsilon));
+
+    set_values(input, {
+        1.f, 0.f, 5.f, 1.5f,
+        2.f, 0.f, 6.f, 5.2f,
+        -10.f, -11.f, -12.f, -13.f,
+        3.f, 0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f, 9.f,
+        -14.f, -15.f, -16.f, -17.f
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0, var = 0;
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+    }
+}
+
+TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc_scale_shift) {
+	//  Mean   : 3x2x2
+	//  Input  : 2x3x2x2
+	//  Output : 2x3x2x2
+
+	//  Input:
+	//  f0: b0:  1    2  -10   b1:   0    0     -11
+	//  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15  
+	//  f1: b0:  5    6  -12   b1:   1.5  5.2   -13     
+	//  f1: b0:  7    8  -16   b1:   12   9     -17
+	//
+	//  Mean
+	//  f0: -3.3333
+	//  f1: -0.3583
+	//
+	//  Variance
+	//  f0: 44.9305
+	//  f1: 107.0624
+	//
+	//  Scale
+	//  f0: 2.0
+	//  f1: 1.0
+	//
+	//  Shift
+	//  f0: 0.0
+	//  f1: 5.0
+
+
+	const auto& engine = get_test_engine();
+
+	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+	auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+	float epsilon = 0.0001f;
+
+	topology topology;
+	topology.add(input_layout("input", input.get_layout()));
+	topology.add(data("scale", scale));
+	topology.add(data("shift", shift));
+	topology.add(mutable_data("inv_variance", inv_variance));
+	topology.add(batch_norm("batch_norm", "input", epsilon, "scale", "shift", "inv_variance"));
+
+	set_values(input, {
+		1.f, 0.f, 5.f, 1.5f,
+		2.f, 0.f, 6.f, 5.2f,
+		-10.f, -11.f, -12.f, -13.f,
+		3.f, 0.5f, 7.f, 12.f,
+		4.f, -0.5f, 8.f, 9.f,
+		-14.f, -15.f, -16.f, -17.f
+	});
+
+	set_values(scale, { 2.f, 1.f });
+	set_values(shift, { 0.f, 5.f });
+
+	network network(engine, topology);
+
+	network.set_input_data("input", input);
+
+	auto outputs = network.execute();
+
+	auto output = outputs.at("batch_norm").get_memory();
+	auto output_ptr = output.pointer<float>();
+
+	for (int j = 0; j < 2; ++j) { //F
+		float sum = 0, var = 0;
+
+		auto scalep = scale.pointer<float>();
+		auto shiftp = shift.pointer<float>();
+		float scalef = scalep[j];
+		float shiftf = shiftp[j];
+
+		for (int i = 0; i < 2; ++i) { //B
+			for (int k = 0; k < 2; ++k) { //Y
+				for (int l = 0; l < 3; ++l) { //X
+					float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
+					data = (data - shiftf) / scalef;
+					sum += data;
+					var += data * data;
+				}
+			}
+		}
+		sum /= 2 * 3 * 2;
+		var /= 2 * 3 * 2;
+
+		EXPECT_NEAR(sum, 0, 1e-03F);
+		EXPECT_NEAR(var, 1, 1e-03F);
+	}
+}
+
+TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_calc_scale_shift_no_inv_var) {
+    //  Mean   : 3x2x2
+    //  Input  : 2x3x2x2
+    //  Output : 2x3x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  -10   b1:   0    0     -11
+    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15  
+    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13     
+    //  f1: b0:  7    8  -16   b1:   12   9     -17
+    //
+    //  Mean
+    //  f0: -3.3333
+    //  f1: -0.3583
+    //
+    //  Variance
+    //  f0: 44.9305
+    //  f1: 107.0624
+    //
+    //  Scale
+    //  f0: 2.0
+    //  f1: 1.0
+    //
+    //  Shift
+    //  f0: 0.0
+    //  f1: 5.0
+
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(batch_norm("batch_norm", "input", epsilon, "scale", "shift"));
+
+    set_values(input, {
+        1.f, 0.f, 5.f, 1.5f,
+        2.f, 0.f, 6.f, 5.2f,
+        -10.f, -11.f, -12.f, -13.f,
+        3.f, 0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f, 9.f,
+        -14.f, -15.f, -16.f, -17.f
+    });
+
+    set_values(scale, { 2.f, 1.f });
+    set_values(shift, { 0.f, 5.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
+                    data = (data - shiftf) / scalef;
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+    }
+}
+
+TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs) {
+	//  Mean   : 3x2x2
+	//  Input  : 2x3x2x2
+	//  Output : 2x3x2x2
+
+	//  Input:
+	//  f0: b0:  1    2  -10   b1:   0    0     -11
+	//  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15  
+	//  f1: b0:  5    6  -12   b1:   1.5  5.2   -13     
+	//  f1: b0:  7    8  -16   b1:   12   9     -17
+	//
+	//  Mean (to be calculated)
+	//  f0: -3.3333
+	//  f1: -0.3583
+	//
+	//  Variance (to be calculated)
+	//  f0: 44.9305
+	//  f1: 107.0624
+	//
+	//  Scale
+	//  f0: 2.0
+	//  f1: 1.0
+	//
+	//  Shift
+	//  f0: 0.0
+	//  f1: 5.0
+
+
+	const auto& engine = get_test_engine();
+
+	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+	auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+	float epsilon = 0.0001f;
+
+	topology topology;
+	topology.add(input_layout("input", input.get_layout()));
+	topology.add(data("scale", scale));
+	topology.add(data("shift", shift));
+	topology.add(mutable_data("mean_out", mean_out));
+	topology.add(mutable_data("variance_out", variance_out));
+	topology.add(mutable_data("inv_variance", inv_variance));
+	topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift", "inv_variance"));
+
+	set_values(input, {
+		1.f, 0.f, 5.f, 1.5f,
+		2.f, 0.f, 6.f, 5.2f,
+		-10.f, -11.f, -12.f, -13.f,
+		3.f, 0.5f, 7.f, 12.f,
+		4.f, -0.5f, 8.f, 9.f,
+		-14.f, -15.f, -16.f, -17.f
+	});
+
+	set_values(scale, { 2.f, 1.f });
+	set_values(shift, { 0.f, 5.f });
+
+	network network(engine, topology);
+
+	network.set_input_data("input", input);
+
+	auto outputs = network.execute();
+
+	auto output = outputs.at("batch_norm").get_memory();
+	auto output_ptr = output.pointer<float>();
+
+	std::vector<float> mean_ref = { -3.3333f, -0.3583f };
+	std::vector<float> val_ref = { 44.9305f, 107.0624f };
+
+	for (int j = 0; j < 2; ++j) { //F
+		float sum = 0, var = 0;
+
+		auto scalep = scale.pointer<float>();
+		auto shiftp = shift.pointer<float>();
+		float scalef = scalep[j];
+		float shiftf = shiftp[j];
+
+		auto meanp = mean_out.pointer<float>();
+		auto varp = variance_out.pointer<float>();
+		float meanf = meanp[j];
+		float varf = varp[j];
+
+		for (int i = 0; i < 2; ++i) { //B
+			for (int k = 0; k < 2; ++k) { //Y
+				for (int l = 0; l < 3; ++l) { //X
+					float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
+					data = (data - shiftf) / scalef;
+					sum += data;
+					var += data * data;
+				}
+			}
+		}
+		sum /= 2 * 3 * 2;
+		var /= 2 * 3 * 2;
+
+		EXPECT_NEAR(sum, 0, 1e-03F);
+		EXPECT_NEAR(var, 1, 1e-03F);
+
+		EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
+		EXPECT_NEAR(varf, val_ref[j], 1e-03F);
+	}
+}
+
+TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs_no_inv_var) {
+    //  Mean   : 3x2x2
+    //  Input  : 2x3x2x2
+    //  Output : 2x3x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  -10   b1:   0    0     -11
+    //  f0: b0:  3    4  -14   b1:   0.5 -0.5   -15  
+    //  f1: b0:  5    6  -12   b1:   1.5  5.2   -13     
+    //  f1: b0:  7    8  -16   b1:   12   9     -17
+    //
+    //  Mean (to be calculated)
+    //  f0: -3.3333
+    //  f1: -0.3583
+    //
+    //  Variance (to be calculated)
+    //  f0: 44.9305
+    //  f1: 107.0624
+    //
+    //  Scale
+    //  f0: 2.0
+    //  f1: 1.0
+    //
+    //  Shift
+    //  f0: 0.0
+    //  f1: 5.0
+
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+    auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(mutable_data("mean_out", mean_out));
+    topology.add(mutable_data("variance_out", variance_out));
+    topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift"));
+
+    set_values(input, {
+        1.f, 0.f, 5.f, 1.5f,
+        2.f, 0.f, 6.f, 5.2f,
+        -10.f, -11.f, -12.f, -13.f,
+        3.f, 0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f, 9.f,
+        -14.f, -15.f, -16.f, -17.f
+    });
+
+    set_values(scale, { 2.f, 1.f });
+    set_values(shift, { 0.f, 5.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> mean_ref = { -3.3333f, -0.3583f };
+    std::vector<float> val_ref = { 44.9305f, 107.0624f };
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        auto meanp = mean_out.pointer<float>();
+        auto varp = variance_out.pointer<float>();
+        float meanf = meanp[j];
+        float varf = varp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
+                    data = (data - shiftf) / scalef;
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+
+        EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
+        EXPECT_NEAR(varf, val_ref[j], 1e-03F);
+    }
+}
+
+TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs_error_out_type) {
+	const auto& engine = get_test_engine();
+
+	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+	auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+	auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+	float epsilon = 0.0001f;
+
+	topology topology;
+	topology.add(input_layout("input", input.get_layout()));
+	topology.add(data("scale", scale));
+	topology.add(data("shift", shift));
+	topology.add(data("mean_out", mean_out));
+	topology.add(data("variance_out", variance_out));
+	topology.add(data("inv_variance", inv_variance));
+	topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift", "inv_variance"));
+
+	EXPECT_ANY_THROW(network(engine, topology));
+}
+
+TEST(batch_normalization_gpu, basic_in2x3x2x2_with_var_mean_outputs_error_non_equal_types) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+    auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(data("mean_out", mean_out));
+    topology.add(mutable_data("variance_out", variance_out));
+    topology.add(mutable_data("inv_variance", inv_variance));
+    topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift", "inv_variance"));
+
+    EXPECT_ANY_THROW(network(engine, topology));
+}
+
+
 TEST(batch_normalization_gpu, basic_in2x2x3x2_bfyx) {
     //  Mean   : 3x2x2
     //  Input  : 2x3x2x2
@@ -193,7 +796,7 @@ TEST(batch_normalization_gpu, basic_in2x2x3x2_bfyx) {
     //  f1: 107.0624
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
     auto mean = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
@@ -274,7 +877,7 @@ TEST(batch_normalization_gpu, basic_in2x2x3x2_bfyx_padding) {
     //  f1: 107.0624
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
     auto mean = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 1, 1 } });
@@ -332,4 +935,1772 @@ TEST(batch_normalization_gpu, basic_in2x2x3x2_bfyx_padding) {
         EXPECT_NEAR(sum, 0, 1e-03F);
         EXPECT_NEAR(var, 1, 1e-03F);
     }
-}
-\ No newline at end of file
+}
+
+TEST(batch_normalization_gpu, basic_to_string) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+
+    auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+    auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+    auto inv_variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+    auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+
+    topology.add(data("mean", mean));
+    topology.add(data("variance", variance));
+
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+
+    topology.add(mutable_data("inv_variance", inv_variance));
+
+    topology.add(mutable_data("mean_out", mean_out));
+    topology.add(mutable_data("variance_out", variance_out));
+
+    topology.add(batch_norm("batch_norm0", "input", "mean", "variance", epsilon));
+    topology.add(batch_norm("batch_norm1", "input", "mean", "variance", "scale", "shift", epsilon));
+    topology.add(batch_norm("batch_norm2", "input", epsilon));
+    topology.add(batch_norm("batch_norm3", "input", epsilon, "inv_variance"));
+    topology.add(batch_norm("batch_norm4", "input", epsilon, "scale", "shift"));
+    topology.add(batch_norm("batch_norm5", "input", epsilon, "scale", "shift", "inv_variance"));
+    topology.add(batch_norm("batch_norm6", "input", epsilon, "mean_out", "variance_out", "scale", "shift" ));
+    topology.add(batch_norm("batch_norm7", "input", epsilon, "mean_out", "variance_out", "scale", "shift", "inv_variance"));
+
+    network network(engine, topology);
+
+    size_t zero_length = 0;
+
+    EXPECT_NE(network.get_primitive_info("batch_norm0").length(), zero_length);
+    EXPECT_NE(network.get_primitive_info("batch_norm1").length(), zero_length);
+    EXPECT_NE(network.get_primitive_info("batch_norm2").length(), zero_length);
+    EXPECT_NE(network.get_primitive_info("batch_norm3").length(), zero_length);
+    EXPECT_NE(network.get_primitive_info("batch_norm4").length(), zero_length);
+    EXPECT_NE(network.get_primitive_info("batch_norm5").length(), zero_length);
+    EXPECT_NE(network.get_primitive_info("batch_norm6").length(), zero_length);
+    EXPECT_NE(network.get_primitive_info("batch_norm7").length(), zero_length);
+}                                         
+
+
+TEST(batch_normalization_gpu, basic_in2x3x2x2_yxfb_scale_shift_different_shapes) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+    auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 1, 1 } });
+    auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 2 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("mean", mean));
+    topology.add(data("variance", variance));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
+
+    set_values(input, {
+        1.f, 0.f, 5.f, 1.5f,
+        2.f, 0.f, 6.f, 5.2f,
+        -10.f, -11.f, -12.f, -13.f,
+        3.f, 0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f, 9.f,
+        -14.f, -15.f, -16.f, -17.f
+    });
+
+    set_values(mean, { -3.3333f, -0.3583f });
+    set_values(variance, { 44.9305f, 107.0624f });
+    set_values(scale, { 2.f, 1.f });
+    set_values(shift, { 0.f, 5.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
+                    data = (data - shiftf) / scalef;
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+    }
+}
+
+TEST(batch_normalization_gpu, basic_in2x3x2x2_yxfb_scale_shift_different_shapes_input_layouts) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+    auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 1, 1 } });
+    auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 2 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("mean", mean.get_layout()));
+    topology.add(input_layout("variance", variance.get_layout()));
+    topology.add(input_layout("scale", scale.get_layout()));
+    topology.add(input_layout("shift", shift.get_layout()));
+    topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
+
+    set_values(input, {
+        1.f, 0.f, 5.f, 1.5f,
+        2.f, 0.f, 6.f, 5.2f,
+        -10.f, -11.f, -12.f, -13.f,
+        3.f, 0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f, 9.f,
+        -14.f, -15.f, -16.f, -17.f
+    });
+
+    set_values(mean, { -3.3333f, -0.3583f });
+    set_values(variance, { 44.9305f, 107.0624f });
+    set_values(scale, { 2.f, 1.f });
+    set_values(shift, { 0.f, 5.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("mean", mean);
+    network.set_input_data("variance", variance);
+    network.set_input_data("scale", scale);
+    network.set_input_data("shift", shift);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
+                    data = (data - shiftf) / scalef;
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+    }
+}
+
+TEST(batch_normalization_gpu, basic_in2x3x2x2_yxfb_with_var_mean_outputs_no_inv_var_different_shapes) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
+    auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 1, 1 } });
+    auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 2 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(mutable_data("mean_out", mean_out));
+    topology.add(mutable_data("variance_out", variance_out));
+    topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift"));
+
+    set_values(input, {
+        1.f, 0.f, 5.f, 1.5f,
+        2.f, 0.f, 6.f, 5.2f,
+        -10.f, -11.f, -12.f, -13.f,
+        3.f, 0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f, 9.f,
+        -14.f, -15.f, -16.f, -17.f
+    });
+
+    set_values(scale, { 2.f, 1.f });
+    set_values(shift, { 0.f, 5.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> mean_ref = { -3.3333f, -0.3583f };
+    std::vector<float> val_ref = { 44.9305f, 107.0624f };
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        auto meanp = mean_out.pointer<float>();
+        auto varp = variance_out.pointer<float>();
+        float meanf = meanp[j];
+        float varf = varp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    float data = output_ptr[i + 2 * j + 2 * 2 * l + 2 * 2 * 3 * k];
+                    data = (data - shiftf) / scalef;
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+
+        EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
+        EXPECT_NEAR(varf, val_ref[j], 1e-03F);
+    }
+}
+
+TEST(batch_normalization_gpu, basic_in2x2x3x2_byxf_scale_shift_different_shapes) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 3, 2 } });
+    auto mean = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 1, 1, 1 } });
+    auto variance = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 2, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("mean", mean));
+    topology.add(data("variance", variance));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
+
+    set_values(input, {
+        1.f, 5.f, 2.f, 6.f, -10.f, -12.f, 
+        3.f, 7.f, 4.f, 8.f, -14.f, -16.f, 
+        0.f, 1.5f, 0.f, 5.2f, -11.f, -13.f, 
+        0.5f, 12.f, -0.5f, 9.f, -15.f, -17.f
+    });
+
+    set_values(mean, { -3.3333f, -0.3583f });
+    set_values(variance, { 44.9305f, 107.0624f });
+    set_values(scale, { 2.f, 1.f });
+    set_values(shift, { 0.f, 5.f });
+
+    std::vector<float> expected_result{
+        0.646469f, 0.517855f, 0.795655f, 0.614501f, -0.99458f, -1.12512f, 
+        0.944842f, 0.711146f, 1.09403f, 0.807792f, -1.59133f, -1.5117f, 
+        0.497283f, 0.179596f, 0.497283f, 0.537184f, -1.14377f, -1.22176f, 
+        0.571876f, 1.19437f, 0.42269f, 0.904437f, -1.74051f, -1.60834f
+    };
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    auto index = 12 * i + 6 * k + 2 * l + j;
+                    float data = output_ptr[index];
+                    data = (data - shiftf) / scalef;
+                    EXPECT_NEAR(data, expected_result[index], 1e-3F);
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+    }
+}
+
+TEST(batch_normalization_gpu, basic_in2x2x3x2_byxf_with_var_mean_outputs_no_inv_var_different_shapes) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 3, 2 } });
+    auto mean_out = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 1, 1, 1 } });
+    auto variance_out = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 2, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(mutable_data("mean_out", mean_out));
+    topology.add(mutable_data("variance_out", variance_out));
+    topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift"));
+
+    set_values(input, {
+        1.f, 5.f, 2.f, 6.f, -10.f, -12.f,
+        3.f, 7.f, 4.f, 8.f, -14.f, -16.f,
+        0.f, 1.5f, 0.f, 5.2f, -11.f, -13.f,
+        0.5f, 12.f, -0.5f, 9.f, -15.f, -17.f
+    });
+
+    set_values(scale, { 2.f, 1.f });
+    set_values(shift, { 0.f, 5.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> mean_ref = { -3.3333f, -0.3583f };
+    std::vector<float> val_ref = { 44.9305f, 107.0624f };
+
+    std::vector<float> expected_result{
+        0.646469f, 0.517855f, 0.795655f, 0.614501f, -0.99458f, -1.12512f,
+        0.944842f, 0.711146f, 1.09403f, 0.807792f, -1.59133f, -1.5117f,
+        0.497283f, 0.179596f, 0.497283f, 0.537184f, -1.14377f, -1.22176f,
+        0.571876f, 1.19437f, 0.42269f, 0.904437f, -1.74051f, -1.60834f
+    };
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        auto meanp = mean_out.pointer<float>();
+        auto varp = variance_out.pointer<float>();
+        float meanf = meanp[j];
+        float varf = varp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    auto index = 12 * i + 6 * k + 2 * l + j;
+                    float data = output_ptr[index];
+                    data = (data - shiftf) / scalef;
+                    EXPECT_NEAR(data, expected_result[index], 1e-3F);
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+
+        EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
+        EXPECT_NEAR(varf, val_ref[j], 1e-03F);
+    }
+}
+
+
+TEST(batch_normalization_gpu, basic_in2x3x5x2_yxfb_scale_shift_different_shapes) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 5, 3, 2 } });
+    auto mean = memory::allocate(engine, { data_types::f32, format::yxfb,{ 5, 1, 1, 1 } });
+    auto variance = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 5, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 5, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 5 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("mean", mean));
+    topology.add(data("variance", variance));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
+
+    set_values(input, {
+        // y0x0
+        1.f, 0.f, // f0
+        5.f, 1.5f, // f1
+        1.f, 0.f, // f2
+        5.f, 1.5f, // f3
+        1.f, 0.f, // f4
+
+        // y0x1
+        2.f, 0.f, 
+        6.f, 5.2f,
+        2.f, 0.f,
+        6.f, 5.2f,
+        2.f, 0.f,
+
+        // y0x2
+        -10.f, -11.f, 
+        -12.f, -13.f,
+        -10.f, -11.f,
+        -12.f, -13.f,
+        -10.f, -11.f,
+
+        // y1x0
+        3.f, 0.5f, 
+        7.f, 12.f,
+        3.f, 0.5f,
+        7.f, 12.f,
+        3.f, 0.5f,
+
+        // y1x1
+        4.f, -0.5f, 
+        8.f, 9.f,
+        4.f, -0.5f,
+        8.f, 9.f,
+        4.f, -0.5f,
+
+        // y1x2
+        -14.f, -15.f,
+        -16.f, -17.f,
+        -14.f, -15.f,
+        -16.f, -17.f,
+        - 14.f, -15.f
+    });
+
+    set_values(mean, { -3.3333f, -0.3583f, -3.3333f, -0.3583f, -3.3333f });
+    set_values(variance, { 44.9305f, 107.0624f, 44.9305f, 107.0624f, 44.9305f });
+    set_values(scale, { 2.f, 1.f, 3.f, 4.f, 5.f });
+    set_values(shift, { 0.f, 5.f, -5.f, -15.f, 0.5f });
+
+    std::vector<float> expected_result{
+        0.646469f, 0.497283f, 
+        0.517855f, 0.179596f, 
+        0.646469f, 0.497283f, 
+        0.517855f, 0.179596f, 
+        0.646469f, 0.497283f, 
+        
+        0.795655f, 0.497283f, 
+        0.614501f, 0.537184f, 
+        0.795655f, 0.497283f, 
+        0.614501f, 0.537184f, 
+        0.795655f, 0.497283f, 
+        
+        -0.99458f, -1.14377f, 
+        -1.12512f, -1.22176f, 
+        -0.99458f, -1.14377f, 
+        -1.12512f, -1.22176f, 
+        -0.99458f, -1.14377f, 
+        
+        0.944842f, 0.571876f, 
+        0.711146f, 1.19437f, 
+        0.944842f, 0.571876f, 
+        0.711146f, 1.19437f, 
+        0.944842f, 0.571876f, 
+        
+        1.09403f, 0.42269f, 
+        0.807792f, 0.904437f, 
+        1.09403f, 0.42269f, 
+        0.807792f, 0.904437f, 
+        1.09403f, 0.42269f, 
+        
+        -1.59133f, -1.74051f, 
+        -1.5117f, -1.60834f, 
+        -1.59133f, -1.74051f, 
+        -1.5117f, -1.60834f, 
+        -1.59133f, -1.74051f
+    };
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 5; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    int index = 30 * k + 10 * l + 2 * j + i;
+                    float data = output_ptr[index];
+                    data = (data - shiftf) / scalef;
+                    EXPECT_NEAR(data, expected_result[index], 1e-3F);
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+    }
+}
+
+TEST(batch_normalization_gpu, basic_in2x3x5x2_yxfb_with_var_mean_outputs_no_inv_var_different_shapes) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 5, 3, 2 } });
+    auto mean_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 5, 1, 1, 1 } });
+    auto variance_out = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 5, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 5, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 5 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(mutable_data("mean_out", mean_out));
+    topology.add(mutable_data("variance_out", variance_out));
+    topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift"));
+
+    set_values(input, {
+        // y0x0
+        1.f, 0.f, // f0
+        5.f, 1.5f, // f1
+        1.f, 0.f, // f2
+        5.f, 1.5f, // f3
+        1.f, 0.f, // f4
+
+        // y0x1
+        2.f, 0.f,
+        6.f, 5.2f,
+        2.f, 0.f,
+        6.f, 5.2f,
+        2.f, 0.f,
+
+        // y0x2
+        -10.f, -11.f,
+        -12.f, -13.f,
+        -10.f, -11.f,
+        -12.f, -13.f,
+        -10.f, -11.f,
+
+        // y1x0
+        3.f, 0.5f,
+        7.f, 12.f,
+        3.f, 0.5f,
+        7.f, 12.f,
+        3.f, 0.5f,
+
+        // y1x1
+        4.f, -0.5f,
+        8.f, 9.f,
+        4.f, -0.5f,
+        8.f, 9.f,
+        4.f, -0.5f,
+
+        // y1x2
+        -14.f, -15.f,
+        -16.f, -17.f,
+        -14.f, -15.f,
+        -16.f, -17.f,
+        -14.f, -15.f
+    });
+
+    set_values(scale, { 2.f, 1.f, 3.f, 4.f, 5.f });
+    set_values(shift, { 0.f, 5.f, -5.f, -15.f, 0.5f });
+
+    std::vector<float> expected_result{
+        0.646469f, 0.497283f,
+        0.517855f, 0.179596f,
+        0.646469f, 0.497283f,
+        0.517855f, 0.179596f,
+        0.646469f, 0.497283f,
+
+        0.795655f, 0.497283f,
+        0.614501f, 0.537184f,
+        0.795655f, 0.497283f,
+        0.614501f, 0.537184f,
+        0.795655f, 0.497283f,
+
+        -0.99458f, -1.14377f,
+        -1.12512f, -1.22176f,
+        -0.99458f, -1.14377f,
+        -1.12512f, -1.22176f,
+        -0.99458f, -1.14377f,
+
+        0.944842f, 0.571876f,
+        0.711146f, 1.19437f,
+        0.944842f, 0.571876f,
+        0.711146f, 1.19437f,
+        0.944842f, 0.571876f,
+
+        1.09403f, 0.42269f,
+        0.807792f, 0.904437f,
+        1.09403f, 0.42269f,
+        0.807792f, 0.904437f,
+        1.09403f, 0.42269f,
+
+        -1.59133f, -1.74051f,
+        -1.5117f, -1.60834f,
+        -1.59133f, -1.74051f,
+        -1.5117f, -1.60834f,
+        -1.59133f, -1.74051f
+    };
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> mean_ref = { -3.3333f, -0.3583f, -3.3333f, -0.3583f, -3.3333f };
+    std::vector<float> val_ref = { 44.9305f, 107.0624f, 44.9305f, 107.0624f, 44.9305f };
+
+    for (int j = 0; j < 5; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        auto meanp = mean_out.pointer<float>();
+        auto varp = variance_out.pointer<float>();
+        float meanf = meanp[j];
+        float varf = varp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    int index = 30 * k + 10 * l + 2 * j + i;
+                    float data = output_ptr[index];
+                    data = (data - shiftf) / scalef;
+                    EXPECT_NEAR(data, expected_result[index], 1e-3F);
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+
+        EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
+        EXPECT_NEAR(varf, val_ref[j], 1e-03F);
+    }
+}
+
+TEST(batch_normalization_gpu, basic_in2x2x3x5_byxf_scale_shift_different_shapes) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 5, 3, 2 } });
+    auto mean = memory::allocate(engine, { data_types::f32, format::byxf,{ 5, 1, 1, 1 } });
+    auto variance = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 5, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 5, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 5 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("mean", mean));
+    topology.add(data("variance", variance));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(batch_norm("batch_norm", "input", "mean", "variance", "scale", "shift", epsilon));
+
+    set_values(input, {
+        // b0y0
+        1.f, 5.f, 1.f, 5.f, 1.f, // x0
+        2.f, 6.f, 2.f, 6.f, 2.f, // x1
+        -10.f, -12.f, -10.f, -12.f, -10.f, //x2
+
+        // b0y1
+        3.f, 7.f, 3.f, 7.f, 3.f,
+        4.f, 8.f, 4.f, 8.f, 4.f,
+        -14.f, -16.f, -14.f, -16.f, -14.f,
+        
+        // b1y0
+        0.f, 1.5f, 0.f, 1.5f, 0.f,
+        0.f, 5.2f, 0.f, 5.2f, 0.f,
+        -11.f, -13.f, -11.f, -13.f, -11.f,
+        
+        // b1y1
+        0.5f, 12.f, 0.5f, 12.f, 0.5f,
+        -0.5f, 9.f, -0.5f, 9.f, -0.5f,
+        -15.f, -17.f, -15.f, -17.f, -15.f
+    });
+
+    set_values(mean, { -3.3333f, -0.3583f, -3.3333f, -0.3583f, -3.3333f });
+    set_values(variance, { 44.9305f, 107.0624f, 44.9305f, 107.0624f, 44.9305f });
+    set_values(scale, { 2.f, 1.f, 3.f, 4.f, 5.f });
+    set_values(shift, { 0.f, 5.f, -5.f, -15.f, 0.5f });
+
+    std::vector<float> expected_result{
+        0.646469f, 0.517855f, 0.646469f, 0.517855f, 0.646469f,
+        0.795655f, 0.614501f, 0.795655f, 0.614501f, 0.795655f,
+        -0.99458f, -1.12512f, -0.99458f, -1.12512f, -0.99458f,
+
+        0.944842f, 0.711146f, 0.944842f, 0.711146f, 0.944842f,
+        1.09403f, 0.807792f, 1.09403f, 0.807792f, 1.09403f,
+        -1.59133f, -1.5117f, -1.59133f, -1.5117f, -1.59133f,
+
+        0.497283f, 0.179596f, 0.497283f, 0.179596f, 0.497283f,
+        0.497283f, 0.537184f, 0.497283f, 0.537184f, 0.497283f,
+        -1.14377f, -1.22176f, -1.14377f, -1.22176f, -1.14377f,
+
+        0.571876f, 1.19437f, 0.571876f, 1.19437f, 0.571876f,
+        0.42269f, 0.904437f, 0.42269f, 0.904437f, 0.42269f,
+        -1.74051f, -1.60834f, -1.74051f, -1.60834f, -1.74051f
+    };
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 5; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    auto index = 30 * i + 15 * k + 5 * l + j;
+                    float data = output_ptr[index];
+                    data = (data - shiftf) / scalef;
+                    EXPECT_NEAR(data, expected_result[index], 1e-3F);
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+    }
+}
+
+TEST(batch_normalization_gpu, basic_in2x2x3x5_byxf_with_var_mean_outputs_no_inv_var_different_shapes) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 5, 3, 2 } });
+    auto mean_out = memory::allocate(engine, { data_types::f32, format::byxf,{ 5, 1, 1, 1 } });
+    auto variance_out = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 5, 1, 1 } });
+    auto scale = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 5, 1 } });
+    auto shift = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 5 } });
+
+    float epsilon = 0.0001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("scale", scale));
+    topology.add(data("shift", shift));
+    topology.add(mutable_data("mean_out", mean_out));
+    topology.add(mutable_data("variance_out", variance_out));
+    topology.add(batch_norm("batch_norm", "input", epsilon, "mean_out", "variance_out", "scale", "shift"));
+
+    set_values(input, {
+        // b0y0
+        1.f, 5.f, 1.f, 5.f, 1.f, // x0
+        2.f, 6.f, 2.f, 6.f, 2.f, // x1
+        -10.f, -12.f, -10.f, -12.f, -10.f, //x2
+
+        // b0y1
+        3.f, 7.f, 3.f, 7.f, 3.f,
+        4.f, 8.f, 4.f, 8.f, 4.f,
+        -14.f, -16.f, -14.f, -16.f, -14.f,
+
+        // b1y0
+        0.f, 1.5f, 0.f, 1.5f, 0.f,
+        0.f, 5.2f, 0.f, 5.2f, 0.f,
+        -11.f, -13.f, -11.f, -13.f, -11.f,
+
+        // b1y1
+        0.5f, 12.f, 0.5f, 12.f, 0.5f,
+        -0.5f, 9.f, -0.5f, 9.f, -0.5f,
+        -15.f, -17.f, -15.f, -17.f, -15.f
+    });
+
+    set_values(scale, { 2.f, 1.f, 3.f, 4.f, 5.f });
+    set_values(shift, { 0.f, 5.f, -5.f, -15.f, 0.5f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> mean_ref = { -3.3333f, -0.3583f, -3.3333f, -0.3583f, -3.3333f };
+    std::vector<float> val_ref = { 44.9305f, 107.0624f, 44.9305f, 107.0624f, 44.9305f };
+
+    std::vector<float> expected_result{
+        0.646469f, 0.517855f, 0.646469f, 0.517855f, 0.646469f,
+        0.795655f, 0.614501f, 0.795655f, 0.614501f, 0.795655f,
+        -0.99458f, -1.12512f, -0.99458f, -1.12512f, -0.99458f,
+
+        0.944842f, 0.711146f, 0.944842f, 0.711146f, 0.944842f,
+        1.09403f, 0.807792f, 1.09403f, 0.807792f, 1.09403f,
+        -1.59133f, -1.5117f, -1.59133f, -1.5117f, -1.59133f,
+
+        0.497283f, 0.179596f, 0.497283f, 0.179596f, 0.497283f,
+        0.497283f, 0.537184f, 0.497283f, 0.537184f, 0.497283f,
+        -1.14377f, -1.22176f, -1.14377f, -1.22176f, -1.14377f,
+
+        0.571876f, 1.19437f, 0.571876f, 1.19437f, 0.571876f,
+        0.42269f, 0.904437f, 0.42269f, 0.904437f, 0.42269f,
+        -1.74051f, -1.60834f, -1.74051f, -1.60834f, -1.74051f
+    };
+
+    for (int j = 0; j < 5; ++j) { //F
+        float sum = 0, var = 0;
+
+        auto scalep = scale.pointer<float>();
+        auto shiftp = shift.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        auto meanp = mean_out.pointer<float>();
+        auto varp = variance_out.pointer<float>();
+        float meanf = meanp[j];
+        float varf = varp[j];
+
+        for (int i = 0; i < 2; ++i) { //B
+            for (int k = 0; k < 2; ++k) { //Y
+                for (int l = 0; l < 3; ++l) { //X
+                    auto index = 30 * i + 15 * k + 5 * l + j;
+                    float data = output_ptr[index];
+                    data = (data - shiftf) / scalef;
+                    EXPECT_NEAR(data, expected_result[index], 1e-3F);
+                    sum += data;
+                    var += data * data;
+                }
+            }
+        }
+        sum /= 2 * 3 * 2;
+        var /= 2 * 3 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-03F);
+        EXPECT_NEAR(var, 1, 1e-03F);
+
+        EXPECT_NEAR(meanf, mean_ref[j], 1e-03F);
+        EXPECT_NEAR(varf, val_ref[j], 1e-03F);
+    }
+}
+
+
+TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b1c2h2w2)
+{
+    const auto& engine = get_test_engine();
+
+    tensor input_shape = { 1, 2, 2, 2 };
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
+    tensor mean_shape = { feature(2) };
+    auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
+    tensor var_shape = { feature(2) };
+    auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
+    tensor gamma_shape = { feature(2) };
+    auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
+    tensor beta_shape = { feature(2) };
+    auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
+
+    float eps = 0.001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("gamma", gamma));
+    topology.add(data("beta", beta));
+    topology.add(mutable_data("mean", mean));
+    topology.add(mutable_data("variance", variance));
+    topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
+
+    set_values<float>(input, {
+        0.54881352f,
+        0.71518934f,
+        0.60276335f,
+        0.54488319f,
+
+        0.42365479f,
+        0.64589411f,
+        0.4375872f,
+        0.89177299f
+    });
+
+    set_values<float>(gamma, { 1.f, 1.f });
+    set_values<float>(beta, { 0.f, 0.f });
+
+    std::vector<float> expected_result { 
+        -0.71498716f,
+        1.48388731f,
+        -0.00196938f,
+        -0.76693159f,
+
+        -0.91316032f,
+        0.23943391f,
+        -0.84090298f,
+        1.51462936f 
+    };
+
+    std::vector<float> expected_mean = { 0.602912f, 0.599727f };
+    std::vector<float> expected_variance = { 0.00472505f, 0.0361782f };
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0;
+
+        auto scalep = gamma.pointer<float>();
+        auto shiftp = beta.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        auto meanp = mean.pointer<float>();
+        auto varp = variance.pointer<float>();
+        float meanf = meanp[j];
+        float varf = varp[j];
+
+        for (int k = 0; k < 2; ++k) { //Y
+            for (int l = 0; l < 2; ++l) { //X
+                int index = 4 * j + 2 * k + l;
+                float data = output_ptr[index];
+                data = (data - shiftf) / scalef;
+                EXPECT_NEAR(data, expected_result[index], 1e-5F);
+                sum += data;
+            }
+        }
+
+        sum /= 2 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-5F);
+
+        EXPECT_NEAR(meanf, expected_mean[j], 1e-5F);
+        EXPECT_NEAR(varf, expected_variance[j], 1e-5F);
+    }
+}
+
+TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c2h2w1)
+{
+    const auto& engine = get_test_engine();
+
+    tensor input_shape = { 2, 2, 1, 2 };
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
+    tensor mean_shape = { feature(2) };
+    auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
+    tensor var_shape = { feature(2) };
+    auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
+    tensor gamma_shape = { feature(2) };
+    auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
+    tensor beta_shape = { feature(2) };
+    auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
+
+    float eps = 0.001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("gamma", gamma));
+    topology.add(data("beta", beta));
+    topology.add(mutable_data("mean", mean));
+    topology.add(mutable_data("variance", variance));
+    topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
+
+
+    set_values<float>(input, { 
+        0.54881352f,
+        0.71518934f,
+
+        0.60276335f,
+        0.54488319f,
+
+
+
+        0.42365479f,
+        0.64589411f,
+
+        0.4375872f,
+        0.89177299f
+    });
+
+    set_values<float>(gamma, { 1.f, 1.f });
+    set_values<float>(beta, { 0.f, 0.f });
+
+    std::vector<float> expected_result{
+        -0.30327f, 
+        1.1561f, 
+
+        -0.0963782f, 
+        -0.434702f, 
+        
+
+
+        -1.4011f, 
+        0.548275f, 
+
+        -1.06187f,
+        1.59295f };
+
+    std::vector<float> expected_mean = { 0.583388f, 0.619252f };
+    std::vector<float> expected_variance = { 0.0119972f, 0.0282681f };
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0;
+
+        auto scalep = gamma.pointer<float>();
+        auto shiftp = beta.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        auto meanp = mean.pointer<float>();
+        auto varp = variance.pointer<float>();
+        float meanf = meanp[j];
+        float varf = varp[j];
+
+        for (int k = 0; k < 2; ++k) { //B
+            for (int l = 0; l < 2; ++l) { //Y
+                int index = 4 * k + 2 * j + l;
+                float data = output_ptr[index];
+                data = (data - shiftf) / scalef;
+                EXPECT_NEAR(data, expected_result[index], 1e-5F);
+                sum += data;
+            }
+        }
+
+        sum /= 2 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-5F);
+
+        EXPECT_NEAR(meanf, expected_mean[j], 1e-5F);
+        EXPECT_NEAR(varf, expected_variance[j], 1e-5F);
+    }
+}
+
+TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c2h2w1)
+{
+    const auto& engine = get_test_engine();
+
+    tensor input_shape = { 2, 2, 1, 2 };
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
+    tensor mean_shape = { feature(2) };
+    auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
+    tensor var_shape = { feature(2) };
+    auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
+    tensor gamma_shape = { feature(2) };
+    auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
+    tensor beta_shape = { feature(2) };
+    auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
+
+    float eps = 0.001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("gamma", gamma));
+    topology.add(data("beta", beta));
+    topology.add(data("mean", mean));
+    topology.add(data("variance", variance));
+    topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
+
+
+    set_values<float>(input, { 
+        0.54881352f,
+        0.71518934f,
+
+        0.60276335f,
+        0.54488319f,
+
+
+
+        0.42365479f,
+        0.64589411f,
+
+        0.4375872f,
+        0.89177299f
+    });
+
+    set_values<float>(gamma, { 1.f, 1.f });
+    set_values<float>(beta, { 0.f, 0.f });
+
+    set_values<float>(mean, { 0.583388f, 0.619252f });
+    set_values<float>(variance, { 0.0119972f, 0.0282681f });
+
+    std::vector<float> expected_result{
+        -0.30327f,
+        1.1561f,
+
+        -0.0963782f,
+        -0.434702f,
+        
+        
+        -1.4011f,
+        0.548275f,
+        
+        -1.06187f,
+        1.59295f };
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0;
+
+        auto scalep = gamma.pointer<float>();
+        auto shiftp = beta.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        for (int k = 0; k < 2; ++k) { //B
+            for (int l = 0; l < 2; ++l) { //Y
+                int index = 4 * k + 2 * j + l;
+                float data = output_ptr[index];
+                data = (data - shiftf) / scalef;
+                EXPECT_NEAR(data, expected_result[index], 1e-5F);
+                sum += data;
+            }
+        }
+
+        sum /= 2 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-5F);
+    }
+}
+
+TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c2h2w1_different_shapes)
+{
+    const auto& engine = get_test_engine();
+
+    tensor input_shape = { 2, 2, 1, 2 };
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
+    tensor mean_shape = { 2, 1, 1, 1 };
+    auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
+    tensor var_shape = { 1, 2, 1, 1 };
+    auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
+    tensor gamma_shape = { 1, 1, 2, 1 };
+    auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
+    tensor beta_shape = { 1, 1, 1, 2 };
+    auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
+
+    float eps = 0.001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("gamma", gamma));
+    topology.add(data("beta", beta));
+    topology.add(mutable_data("mean", mean));
+    topology.add(mutable_data("variance", variance));
+    topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
+
+
+    set_values<float>(input, {
+        0.54881352f,
+        0.71518934f,
+
+        0.60276335f,
+        0.54488319f,
+
+
+
+        0.42365479f,
+        0.64589411f,
+
+        0.4375872f,
+        0.89177299f
+    });
+
+    set_values<float>(gamma, { 2.f, 3.f });
+    set_values<float>(beta, { 5.f, 10.f });
+
+    std::vector<float> expected_result{
+        -0.30327f,
+        1.1561f,
+
+        -0.0963782f,
+        -0.434702f,
+
+
+
+        -1.4011f,
+        0.548275f,
+
+        -1.06187f,
+        1.59295f };
+
+    std::vector<float> expected_mean = { 0.583388f, 0.619252f };
+    std::vector<float> expected_variance = { 0.0119972f, 0.0282681f };
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0;
+
+        auto scalep = gamma.pointer<float>();
+        auto shiftp = beta.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        auto meanp = mean.pointer<float>();
+        auto varp = variance.pointer<float>();
+        float meanf = meanp[j];
+        float varf = varp[j];
+
+        for (int k = 0; k < 2; ++k) { //B
+            for (int l = 0; l < 2; ++l) { //Y
+                int index = 4 * k + 2 * j + l;
+                float data = output_ptr[index];
+                data = (data - shiftf) / scalef;
+                EXPECT_NEAR(data, expected_result[index], 1e-5F);
+                sum += data;
+            }
+        }
+
+        sum /= 2 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-5F);
+
+        EXPECT_NEAR(meanf, expected_mean[j], 1e-5F);
+        EXPECT_NEAR(varf, expected_variance[j], 1e-5F);
+    }
+}
+
+TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c2h2w1_different_shapes)
+{
+    const auto& engine = get_test_engine();
+
+    tensor input_shape = { 2, 2, 1, 2 };
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
+    tensor mean_shape = { 2, 1, 1, 1 };
+    auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
+    tensor var_shape = { 1, 1, 2, 1 };
+    auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
+    tensor gamma_shape = { 1, 1, 2, 1 };
+    auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
+    tensor beta_shape = { 1, 1, 1, 2 };
+    auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
+
+    float eps = 0.001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("gamma", gamma));
+    topology.add(data("beta", beta));
+    topology.add(data("mean", mean));
+    topology.add(data("variance", variance));
+    topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
+
+
+    set_values<float>(input, {
+        0.54881352f,
+        0.71518934f,
+
+        0.60276335f,
+        0.54488319f,
+
+
+
+        0.42365479f,
+        0.64589411f,
+
+        0.4375872f,
+        0.89177299f
+    });
+
+    set_values<float>(gamma, { 2.f, 3.f });
+    set_values<float>(beta, { 5.f, 10.f });
+
+    set_values<float>(mean, { 0.583388f, 0.619252f });
+    set_values<float>(variance, { 0.0119972f, 0.0282681f });
+
+    std::vector<float> expected_result{
+        -0.30327f,
+        1.1561f,
+
+        -0.0963782f,
+        -0.434702f,
+
+
+        -1.4011f,
+        0.548275f,
+
+        -1.06187f,
+        1.59295f };
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 2; ++j) { //F
+        float sum = 0;
+
+        auto scalep = gamma.pointer<float>();
+        auto shiftp = beta.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        for (int k = 0; k < 2; ++k) { //B
+            for (int l = 0; l < 2; ++l) { //Y
+                int index = 4 * k + 2 * j + l;
+                float data = output_ptr[index];
+                data = (data - shiftf) / scalef;
+                EXPECT_NEAR(data, expected_result[index], 1e-5F);
+                sum += data;
+            }
+        }
+
+        sum /= 2 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-5F);
+    }
+}
+
+TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b2c5h2w1_different_shapes)
+{
+    const auto& engine = get_test_engine();
+
+    tensor input_shape = { 2, 5, 1, 2 };
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
+    tensor mean_shape = { 5, 1, 1, 1 };
+    auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
+    tensor var_shape = { 1, 5, 1, 1 };
+    auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
+    tensor gamma_shape = { 1, 1, 5, 1 };
+    auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
+    tensor beta_shape = { 1, 1, 1, 5 };
+    auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
+
+    float eps = 0.001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("gamma", gamma));
+    topology.add(data("beta", beta));
+    topology.add(mutable_data("mean", mean));
+    topology.add(mutable_data("variance", variance));
+    topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
+
+
+    set_values<float>(input, {
+        0.54881352f,
+        0.71518934f,
+
+        0.60276335f,
+        0.54488319f,
+
+        0.54881352f,
+        0.71518934f,
+
+        0.60276335f,
+        0.54488319f,
+
+        0.54881352f,
+        0.71518934f,
+
+
+
+        0.42365479f,
+        0.64589411f,
+
+        0.4375872f,
+        0.89177299f,
+
+        0.42365479f,
+        0.64589411f,
+
+        0.4375872f,
+        0.89177299f,
+
+        0.42365479f,
+        0.64589411f
+    });
+
+    set_values<float>(gamma, { 2.f, 3.f, 4.f, 5.f, 1.f });
+    set_values<float>(beta, { 5.f, 10.f, -10.f, -15.f, 0.f });
+
+    std::vector<float> expected_result{
+        -0.30327f,
+        1.1561f,
+
+        -0.0963782f,
+        -0.434702f,
+
+        -0.30327f,
+        1.1561f,
+
+        -0.0963782f,
+        -0.434702f,
+
+        -0.30327f,
+        1.1561f,
+
+
+
+
+        -1.4011f,
+        0.548275f,
+
+        -1.06187f,
+        1.59295f,
+
+        -1.4011f,
+        0.548275f,
+
+        -1.06187f,
+        1.59295f,
+
+        -1.4011f,
+        0.548275f
+    };
+
+    std::vector<float> expected_mean = { 0.583388f, 0.619252f, 0.583388f, 0.619252f, 0.583388f };
+    std::vector<float> expected_variance = { 0.0119972f, 0.0282681f, 0.0119972f, 0.0282681f, 0.0119972f };
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 5; ++j) { //F
+        float sum = 0;
+
+        auto scalep = gamma.pointer<float>();
+        auto shiftp = beta.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        auto meanp = mean.pointer<float>();
+        auto varp = variance.pointer<float>();
+        float meanf = meanp[j];
+        float varf = varp[j];
+
+        for (int k = 0; k < 2; ++k) { //B
+            for (int l = 0; l < 2; ++l) { //Y
+                int index = 10 * k + 2 * j + l;
+                float data = output_ptr[index];
+                data = (data - shiftf) / scalef;
+                EXPECT_NEAR(data, expected_result[index], 1e-5F);
+                sum += data;
+            }
+        }
+
+        sum /= 2 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-5F);
+
+        EXPECT_NEAR(meanf, expected_mean[j], 1e-5F);
+        EXPECT_NEAR(varf, expected_variance[j], 1e-5F);
+    }
+}
+
+TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_inference_b2c5h2w1_different_shapes)
+{
+    const auto& engine = get_test_engine();
+
+    tensor input_shape = { 2, 5, 1, 2 };
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
+    tensor mean_shape = { 5, 1, 1, 1 };
+    auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
+    tensor var_shape = { 1, 5, 1, 1 };
+    auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
+    tensor gamma_shape = { 1, 1, 5, 1 };
+    auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
+    tensor beta_shape = { 1, 1, 1, 5 };
+    auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
+
+    float eps = 0.001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("gamma", gamma));
+    topology.add(data("beta", beta));
+    topology.add(data("mean", mean));
+    topology.add(data("variance", variance));
+    topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
+
+
+    set_values<float>(input, {
+        0.54881352f,
+        0.71518934f,
+
+        0.60276335f,
+        0.54488319f,
+
+        0.54881352f,
+        0.71518934f,
+
+        0.60276335f,
+        0.54488319f,
+
+        0.54881352f,
+        0.71518934f,
+
+
+
+        0.42365479f,
+        0.64589411f,
+
+        0.4375872f,
+        0.89177299f,
+
+        0.42365479f,
+        0.64589411f,
+
+        0.4375872f,
+        0.89177299f,
+
+        0.42365479f,
+        0.64589411f
+    });
+
+    set_values<float>(gamma, { 2.f, 3.f, 4.f, 5.f, 1.f });
+    set_values<float>(beta, { 5.f, 10.f, -10.f, -15.f, 0.f });
+
+    std::vector<float> expected_result{
+        -0.30327f,
+        1.1561f,
+
+        -0.0963782f,
+        -0.434702f,
+
+        -0.30327f,
+        1.1561f,
+
+        -0.0963782f,
+        -0.434702f,
+
+        -0.30327f,
+        1.1561f,
+
+
+
+
+        -1.4011f,
+        0.548275f,
+
+        -1.06187f,
+        1.59295f,
+
+        -1.4011f,
+        0.548275f,
+
+        -1.06187f,
+        1.59295f,
+
+        -1.4011f,
+        0.548275f
+    };
+
+    set_values<float>(mean, { 0.583388f, 0.619252f, 0.583388f, 0.619252f, 0.583388f });
+    set_values<float>(variance, { 0.0119972f, 0.0282681f, 0.0119972f, 0.0282681f, 0.0119972f });
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("batch_norm").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    for (int j = 0; j < 5; ++j) { //F
+        float sum = 0;
+
+        auto scalep = gamma.pointer<float>();
+        auto shiftp = beta.pointer<float>();
+        float scalef = scalep[j];
+        float shiftf = shiftp[j];
+
+        for (int k = 0; k < 2; ++k) { //B
+            for (int l = 0; l < 2; ++l) { //Y
+                int index = 10 * k + 2 * j + l;
+                float data = output_ptr[index];
+                data = (data - shiftf) / scalef;
+                EXPECT_NEAR(data, expected_result[index], 1e-5F);
+                sum += data;
+            }
+        }
+
+        sum /= 2 * 2;
+
+        EXPECT_NEAR(sum, 0, 1e-5F);
+    }
+}
+
+TEST(ngraph_batch_normalization_gpu, batchnorm_fprop_b1c2h2w2_no_bn_output)
+{
+    engine engine;
+
+    tensor input_shape = { 1, 2, 2, 2 };
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, input_shape });
+    tensor mean_shape = { feature(2) };
+    auto mean = memory::allocate(engine, { data_types::f32, format::bfyx, mean_shape });
+    tensor var_shape = { feature(2) };
+    auto variance = memory::allocate(engine, { data_types::f32, format::bfyx, var_shape });
+    tensor gamma_shape = { feature(2) };
+    auto gamma = memory::allocate(engine, { data_types::f32, format::bfyx, gamma_shape });
+    tensor beta_shape = { feature(2) };
+    auto beta = memory::allocate(engine, { data_types::f32, format::bfyx, beta_shape });
+
+    float eps = 0.001f;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("gamma", gamma));
+    topology.add(data("beta", beta));
+    topology.add(mutable_data("mean", mean));
+    topology.add(mutable_data("variance", variance));
+    topology.add(batch_norm("batch_norm", "input", eps, "mean", "variance", "gamma", "beta"));
+
+    set_values<float>(input, {
+        0.54881352f,
+        0.71518934f,
+        0.60276335f,
+        0.54488319f,
+
+        0.42365479f,
+        0.64589411f,
+        0.4375872f,
+        0.89177299f
+    });
+
+    set_values<float>(gamma, { 1.f, 1.f });
+    set_values<float>(beta, { 0.f, 0.f });
+
+    std::vector<float> expected_mean = { 0.602912f, 0.599727f };
+    std::vector<float> expected_variance = { 0.00472505f, 0.0361782f };
+
+    build_options bo;
+    bo.set_option(build_option::outputs({ "mean", "variance" }));
+    network network(engine, topology, bo);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    for (int j = 0; j < 2; ++j) { //F
+        auto meanp = mean.pointer<float>();
+        auto varp = variance.pointer<float>();
+        float meanf = meanp[j];
+        float varf = varp[j];
+
+        EXPECT_NEAR(meanf, expected_mean[j], 1e-5F);
+        EXPECT_NEAR(varf, expected_variance[j], 1e-5F);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/batch_norm_grad_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/batch_norm_grad_gpu_test.cpp
index f6c364b0d..f9b820b15 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/batch_norm_grad_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/batch_norm_grad_gpu_test.cpp
@@ -51,7 +51,7 @@ TEST(batch_normalization_backward_gpu, basic_in2x2x2x3) {
     //  f0: 0.1491862
     //  f1: 0.0966454
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
     auto grad_input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp
index 7a2539913..f1b3da3c3 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/border_gpu_test.cpp
@@ -73,7 +73,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant) {
     constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
@@ -149,7 +149,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_constant_non_constant) {
     constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
@@ -225,7 +225,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror) {
     constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
@@ -301,7 +301,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_mirror_101) {
     constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
@@ -379,7 +379,7 @@ TEST(border_gpu, basic_yxfb_0x0x1x2_0x0x3x4_border_edge) {
     constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
@@ -454,7 +454,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_constant) {
     constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
@@ -524,7 +524,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror) {
     constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
@@ -590,7 +590,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_mirror_101) {
     constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
@@ -656,7 +656,7 @@ TEST(border_gpu, basic_bfyx_2x1x2x3_1x2x3x4_border_edge) {
     constexpr auto out_size_y = in_size_y + blt_size_y + brb_size_y;
     constexpr auto out_size_x = in_size_x + blt_size_x + brb_size_x;
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {in_size_b, in_size_f, in_size_x, in_size_y}});
 
     topology topology;
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/broadcast_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/broadcast_gpu_test.cpp
index 965a65eac..548ef3d82 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/broadcast_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/broadcast_gpu_test.cpp
@@ -31,62 +31,44 @@
 using namespace cldnn;
 using namespace ::tests;
 
-
 template<typename T>
-static std::vector<T> generate_rnd_real_input(
-    const std::size_t b, const std::size_t f, const std::size_t y, const std::size_t x,
-    const T min = static_cast<T>(0), const T max = static_cast<T>(1), const unsigned rnd_bits = 9)
+void start_broadcast_test(data_types cldnn_data_type, std::vector<size_t> output_shape,
+                          std::vector<size_t> input_shape, std::vector<size_t> broadcast_axes,
+                          std::vector<T> golden_data)
 {
-    static std::default_random_engine rnd_gen(random_seed);
-    cldnn::tests::distributions::uniform_quantized_real_distribution<T> rnd_dist(min, max, rnd_bits);
-
-    std::vector<T> data;
-    data.reserve(b * f * y * x);
-    for (size_t i = 0; i < b * f * y * x; ++i)
-        data.push_back(rnd_dist(rnd_gen));
-
-    return data;
-}
-
-
-TEST(broadcast_gpu, basic_yxfb_1x1x2x3_to_1x2x2x9) {
-    //  Input (BF:XY) :  1x1:3x2
-    //  Output (BF:XY):  1x2:9x2
-
-    constexpr auto in_size_b = 1;
-    constexpr auto in_size_f = 1;
-    constexpr auto in_size_y = 2;
-    constexpr auto in_size_x = 3;
+    size_t input_data_size = accumulate(input_shape.rbegin(), input_shape.rend(), (size_t)1, std::multiplies<size_t>());
+    EXPECT_GE(input_data_size, (size_t)1);
+    std::vector<T> input_data = {};
+    for (size_t i = 1; i <= input_data_size; ++i) {
+        input_data.push_back((T)i);
+    }
 
-    constexpr auto bc_scale_b = 1;
-    constexpr auto bc_scale_f = 2;
-    constexpr auto bc_scale_y = 1;
-    constexpr auto bc_scale_x = 3;
+    EXPECT_EQ(golden_data.size(), accumulate(output_shape.rbegin(), output_shape.rend(), (size_t)1, std::multiplies<size_t>()));
 
-    constexpr auto out_size_b = bc_scale_b * in_size_b;
-    constexpr auto out_size_f = bc_scale_f * in_size_f;
-    constexpr auto out_size_y = bc_scale_y * in_size_y;
-    constexpr auto out_size_x = bc_scale_x * in_size_x;
+    std::vector<tensor::value_type> output_4d(4, 1);
+    for(size_t i = 0; i < output_shape.size(); ++i) {
+        output_4d.at(4 - output_shape.size() + i) = (tensor::value_type)output_shape.at(i);
+    }
+    std::vector<tensor::value_type> input_4d(4, 1);
+    for(size_t i = 0; i < input_shape.size(); ++i) {
+        input_4d.at(4 - input_shape.size() + i) = (tensor::value_type)input_shape.at(i);
+    }
+    std::vector<uint16_t> fixed_b_axes;
+    size_t shift = 4 - output_shape.size();
+    for(size_t i = 0; i < shift; ++i) {
+        fixed_b_axes.push_back((uint16_t) i);
+    }
+    for(size_t i = 0; i < broadcast_axes.size(); ++i) {
+        fixed_b_axes.push_back((uint16_t) (broadcast_axes.at(i) + shift));
+    }
 
-    engine engine;
-    auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, {cldnn_data_type, format::bfyx, {input_4d.at(0), input_4d.at(1), input_4d.at(3), input_4d.at(2)}});
 
     topology topology;
-    topology.add(
-        input_layout("input", input.get_layout())
-    );
-    topology.add(
-        broadcast("output", "input", {out_size_b, out_size_f, out_size_x, out_size_y})
-    );
-
-    std::vector<float> input_data = {
-         41, -11, 13,
-        107, -66,  0,
-    };
-    std::vector<float> out_data = {
-         41,  41,   -11, -11,   13, 13,    41,  41,   -11, -11,   13, 13,    41,  41,   -11, -11,   13, 13,
-        107, 107,   -66, -66,    0,  0,   107, 107,   -66, -66,    0,  0,   107, 107,   -66, -66,    0,  0,
-    };
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(broadcast("output", "input", {output_4d.at(0), output_4d.at(1), output_4d.at(3), output_4d.at(2)}, fixed_b_axes));
+
     set_values(input, input_data);
 
     network network(engine, topology);
@@ -94,375 +76,972 @@ TEST(broadcast_gpu, basic_yxfb_1x1x2x3_to_1x2x2x9) {
     auto outputs = network.execute();
 
     auto output = outputs.at("output").get_memory();
-    auto output_ptr = output.pointer<float>();
-
-    ASSERT_EQ(out_data.size(), static_cast<std::size_t>(out_size_b * out_size_f * out_size_y * out_size_x));
-
-    for (auto b = 0; b < out_size_b; ++b) {             // B
-        for (auto f = 0; f < out_size_f; ++f) {         // F
-            for (auto y = 0; y < out_size_y; ++y) {     // Y
-                for (auto x = 0; x < out_size_x; ++x) { // X
-                    auto output_off = ((y * out_size_x + x) * out_size_f + f) * out_size_b + b; // YXFB
-
-                    EXPECT_EQ(output_ptr[output_off], out_data[output_off]);
+    auto output_ptr = output.pointer<T>();
+
+    for (tensor::value_type b = 0; b < output_4d.at(0); ++b) {
+        for (tensor::value_type f = 0; f < output_4d.at(1); ++f) {
+            for (tensor::value_type y = 0; y < output_4d.at(2); ++y) {
+                for (tensor::value_type x = 0; x < output_4d.at(3); ++x) {
+                    auto output_off = ((b * output_4d.at(1) + f) * output_4d.at(2) + y) * output_4d.at(3) + x;
+                    EXPECT_EQ(output_ptr[output_off], golden_data[output_off]);
                 }
             }
         }
     }
 }
 
-TEST(broadcast_gpu, basic_bfyx_4x2x2x2_to_8x2x6x4) {
-    //  Input (BF:XY) :  4x2:2x2
-    //  Output (BF:XY):  8x2:6x4
+TEST(broadcast_gpu_float, bfyx_1_to_5_w_b_axes_0) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0};
+    start_broadcast_test<float>(data_types::f32, {5}, {1}, {0}, golden_data);
+}
 
-    constexpr auto in_size_b = 4;
-    constexpr auto in_size_f = 2;
-    constexpr auto in_size_y = 2;
-    constexpr auto in_size_x = 2;
+TEST(broadcast_gpu_uint8_t, bfyx_1_to_5_w_b_axes_0) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1};
+    start_broadcast_test<uint8_t>(data_types::u8, {5}, {1}, {0}, golden_data);
+}
 
-    constexpr auto bc_scale_b = 2;
-    constexpr auto bc_scale_f = 1;
-    constexpr auto bc_scale_y = 3;
-    constexpr auto bc_scale_x = 2;
+TEST(broadcast_gpu_float, bfyx_1_to_4x5_w_b_axes_0x1) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    start_broadcast_test<float>(data_types::f32, {4, 5}, {1}, {0, 1}, golden_data);
+}
 
-    constexpr auto out_size_b = bc_scale_b * in_size_b;
-    constexpr auto out_size_f = bc_scale_f * in_size_f;
-    constexpr auto out_size_y = bc_scale_y * in_size_y;
-    constexpr auto out_size_x = bc_scale_x * in_size_x;
+TEST(broadcast_gpu_uint8_t, bfyx_1_to_4x5_w_b_axes_0x1) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+    start_broadcast_test<uint8_t>(data_types::u8, {4, 5}, {1}, {0, 1}, golden_data);
+}
 
-    engine engine;
-    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {in_size_b, in_size_f, in_size_x, in_size_y}});
+TEST(broadcast_gpu_float, bfyx_1_to_3x4x5_w_b_axes_0x1x2) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    start_broadcast_test<float>(data_types::f32, {3, 4, 5}, {1}, {0, 1, 2}, golden_data);
+}
 
-    topology topology;
-    topology.add(
-        input_layout("input", input.get_layout())
-    );
-    topology.add(
-        broadcast("output", "input", {out_size_b, out_size_f, out_size_x, out_size_y})
-    );
-
-    std::vector<float> input_data = {
-         11,  12,
-         21,  22,
-
-        -11, -12,
-        -21, -22,
-
-
-         13,  14,
-         23,  24,
-
-        -13, -14,
-        -23, -24,
-
-
-         15,  16,
-         25,  26,
-
-        -15, -16,
-        -25, -26,
-
-
-         17,  18,
-         27,  28,
-
-        -17, -18,
-        -27, -28,
-    };
-    std::vector<float> out_data = {
-         11,  12,  11,  12,
-         21,  22,  21,  22,
-         11,  12,  11,  12,
-         21,  22,  21,  22,
-         11,  12,  11,  12,
-         21,  22,  21,  22,
-
-        -11, -12, -11, -12,
-        -21, -22, -21, -22,
-        -11, -12, -11, -12,
-        -21, -22, -21, -22,
-        -11, -12, -11, -12,
-        -21, -22, -21, -22,
-
-
-         13,  14,  13,  14,
-         23,  24,  23,  24,
-         13,  14,  13,  14,
-         23,  24,  23,  24,
-         13,  14,  13,  14,
-         23,  24,  23,  24,
-
-        -13, -14, -13, -14,
-        -23, -24, -23, -24,
-        -13, -14, -13, -14,
-        -23, -24, -23, -24,
-        -13, -14, -13, -14,
-        -23, -24, -23, -24,
-
-
-         15,  16,  15,  16,
-         25,  26,  25,  26,
-         15,  16,  15,  16,
-         25,  26,  25,  26,
-         15,  16,  15,  16,
-         25,  26,  25,  26,
-
-        -15, -16, -15, -16,
-        -25, -26, -25, -26,
-        -15, -16, -15, -16,
-        -25, -26, -25, -26,
-        -15, -16, -15, -16,
-        -25, -26, -25, -26,
-
-
-         17,  18,  17,  18,
-         27,  28,  27,  28,
-         17,  18,  17,  18,
-         27,  28,  27,  28,
-         17,  18,  17,  18,
-         27,  28,  27,  28,
-
-        -17, -18, -17, -18,
-        -27, -28, -27, -28,
-        -17, -18, -17, -18,
-        -27, -28, -27, -28,
-        -17, -18, -17, -18,
-        -27, -28, -27, -28,
-
-
-         11,  12,  11,  12,
-         21,  22,  21,  22,
-         11,  12,  11,  12,
-         21,  22,  21,  22,
-         11,  12,  11,  12,
-         21,  22,  21,  22,
-
-        -11, -12, -11, -12,
-        -21, -22, -21, -22,
-        -11, -12, -11, -12,
-        -21, -22, -21, -22,
-        -11, -12, -11, -12,
-        -21, -22, -21, -22,
-
-
-         13,  14,  13,  14,
-         23,  24,  23,  24,
-         13,  14,  13,  14,
-         23,  24,  23,  24,
-         13,  14,  13,  14,
-         23,  24,  23,  24,
-
-        -13, -14, -13, -14,
-        -23, -24, -23, -24,
-        -13, -14, -13, -14,
-        -23, -24, -23, -24,
-        -13, -14, -13, -14,
-        -23, -24, -23, -24,
-
-
-         15,  16,  15,  16,
-         25,  26,  25,  26,
-         15,  16,  15,  16,
-         25,  26,  25,  26,
-         15,  16,  15,  16,
-         25,  26,  25,  26,
-
-        -15, -16, -15, -16,
-        -25, -26, -25, -26,
-        -15, -16, -15, -16,
-        -25, -26, -25, -26,
-        -15, -16, -15, -16,
-        -25, -26, -25, -26,
-
-
-         17,  18,  17,  18,
-         27,  28,  27,  28,
-         17,  18,  17,  18,
-         27,  28,  27,  28,
-         17,  18,  17,  18,
-         27,  28,  27,  28,
-
-        -17, -18, -17, -18,
-        -27, -28, -27, -28,
-        -17, -18, -17, -18,
-        -27, -28, -27, -28,
-        -17, -18, -17, -18,
-        -27, -28, -27, -28,
-    };
-    set_values(input, input_data);
+TEST(broadcast_gpu_uint8_t, bfyx_1_to_3x4x5_w_b_axes_0x1x2) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+    start_broadcast_test<uint8_t>(data_types::u8, {3, 4, 5}, {1}, {0, 1, 2}, golden_data);
+}
 
-    network network(engine, topology);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+TEST(broadcast_gpu_float, bfyx_1_to_2x3x4x5_w_b_axes_0x1x2x3) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {1}, {0, 1, 2, 3}, golden_data);
+}
 
-    auto output = outputs.at("output").get_memory();
-    auto output_ptr = output.pointer<float>();
+TEST(broadcast_gpu_uint8_t, bfyx_1_to_2x3x4x5_w_b_axes_0x1x2x3) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {1}, {0, 1, 2, 3}, golden_data);
+}
 
-    ASSERT_EQ(out_data.size(), static_cast<std::size_t>(out_size_b * out_size_f * out_size_y * out_size_x));
+TEST(broadcast_gpu_float, bfyx_1_to_5_w_o_b_axes) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0};
+    start_broadcast_test<float>(data_types::f32, {5}, {1}, {}, golden_data);
+}
 
-    for (auto b = 0; b < out_size_b; ++b) {             // B
-        for (auto f = 0; f < out_size_f; ++f) {         // F
-            for (auto y = 0; y < out_size_y; ++y) {     // Y
-                for (auto x = 0; x < out_size_x; ++x) { // X
-                    auto output_off = ((b * out_size_f + f) * out_size_y + y) * out_size_x + x; // BFYX
+TEST(broadcast_gpu_uint8_t, bfyx_1_to_5_w_o_b_axes) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1};
+    start_broadcast_test<uint8_t>(data_types::u8, {5}, {1}, {}, golden_data);
+}
 
-                    EXPECT_EQ(output_ptr[output_off], out_data[output_off]);
-                }
-            }
-        }
-    }
+TEST(broadcast_gpu_float, bfyx_3_to_12_w_o_b_axes) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0};
+    start_broadcast_test<float>(data_types::f32, {12}, {3}, {}, golden_data);
 }
 
-TEST(broadcast_gpu, basic_byxf_2x3x4x5_to_10x12x12x10) {
-    //  Input (BF:XY) :    2x3:5x4
-    //  Output (BF:XY):  10x12:10x12
+TEST(broadcast_gpu_uint8_t, bfyx_3_to_12_w_o_b_axes) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3};
+    start_broadcast_test<uint8_t>(data_types::u8, {12}, {3}, {}, golden_data);
+}
 
-    constexpr auto in_size_b = 2;
-    constexpr auto in_size_f = 3;
-    constexpr auto in_size_y = 4;
-    constexpr auto in_size_x = 5;
+TEST(broadcast_gpu_float, bfyx_1x1_to_4x5_w_o_b_axes) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
+    start_broadcast_test<float>(data_types::f32, {4, 5}, {1, 1}, {}, golden_data);
+}
 
-    constexpr auto bc_scale_b = 5;
-    constexpr auto bc_scale_f = 4;
-    constexpr auto bc_scale_y = 3;
-    constexpr auto bc_scale_x = 2;
+TEST(broadcast_gpu_uint8_t, bfyx_1x1_to_4x5_w_o_b_axes) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+    start_broadcast_test<uint8_t>(data_types::u8, {4, 5}, {1, 1}, {}, golden_data);
+}
 
-    constexpr auto out_size_b = bc_scale_b * in_size_b;
-    constexpr auto out_size_f = bc_scale_f * in_size_f;
-    constexpr auto out_size_y = bc_scale_y * in_size_y;
-    constexpr auto out_size_x = bc_scale_x * in_size_x;
+TEST(broadcast_gpu_float, bfyx_2x3_to_8x6_w_o_b_axes) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 4.0, 5.0, 6.0,
+                                      1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 4.0, 5.0, 6.0,
+                                      1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 4.0, 5.0, 6.0,
+                                      1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 4.0, 5.0, 6.0};
+    start_broadcast_test<float>(data_types::f32, {8, 6}, {2, 3}, {}, golden_data);
+}
 
-    engine engine;
-    auto input = memory::allocate(engine, {data_types::f32, format::byxf, {in_size_b, in_size_f, in_size_x, in_size_y}});
+TEST(broadcast_gpu_uint8_t, bfyx_2x3_to_8x6_w_o_b_axes) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6,
+                                        1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6,
+                                        1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6,
+                                        1, 2, 3, 1, 2, 3, 4, 5, 6, 4, 5, 6};
+    start_broadcast_test<uint8_t>(data_types::u8, {8, 6}, {2, 3}, {}, golden_data);
+}
 
-    topology topology;
-    topology.add(
-        input_layout("input", input.get_layout())
-    );
-    topology.add(
-        broadcast("output", "input", {out_size_b, out_size_f, out_size_x, out_size_y})
-    );
-
-    std::vector<float> input_data = generate_rnd_real_input<float>(in_size_b, in_size_f, in_size_y, in_size_x, -8.0f, 8.0f);
-    set_values(input, input_data);
+TEST(broadcast_gpu_float, bfyx_2x3x4_to_6x6x4_w_o_b_axes) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
+                                      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0,
+                                      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
+                                      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0,
+                                      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
+                                      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0,
+                                      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0};
+    start_broadcast_test<float>(data_types::f32, {6, 6, 4}, {2, 3, 4}, {}, golden_data);
+}
 
-    network network(engine, topology);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+TEST(broadcast_gpu_uint8_t, bfyx_2x3x4_to_6x6x4_w_o_b_axes) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
+    start_broadcast_test<uint8_t>(data_types::u8, {6, 6, 4}, {2, 3, 4}, {}, golden_data);
+}
 
-    auto output = outputs.at("output").get_memory();
-    auto output_ptr = output.pointer<float>();
+TEST(broadcast_gpu_float, bfyx_2x3x4x5_to_2x9x8x5_w_o_b_axes) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+                                      31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+                                      31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                                      41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0,
+                                      51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
+                                      41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0,
+                                      51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+                                      31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+                                      31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                                      41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0,
+                                      51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
+                                      41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0,
+                                      51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+                                      31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+                                      31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                                      41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0,
+                                      51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
+                                      41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0,
+                                      51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
+                                      61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0,
+                                      71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0,
+                                      61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0,
+                                      71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0,
+                                      81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0,
+                                      91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0,
+                                      81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0,
+                                      91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0,
+                                      101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0,
+                                      111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0,
+                                      101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0,
+                                      111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0,
+                                      61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0,
+                                      71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0,
+                                      61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0,
+                                      71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0,
+                                      81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0,
+                                      91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0,
+                                      81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0,
+                                      91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0,
+                                      101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0,
+                                      111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0,
+                                      101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0,
+                                      111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0,
+                                      61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0,
+                                      71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0,
+                                      61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0,
+                                      71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0,
+                                      81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0,
+                                      91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0,
+                                      81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0,
+                                      91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0,
+                                      101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0,
+                                      111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0,
+                                      101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0,
+                                      111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0};
+    start_broadcast_test<float>(data_types::f32, {2, 9, 8, 5}, {2, 3, 4, 5}, {}, golden_data);
+}
 
-    for (auto b = 0; b < out_size_b; ++b) {             // B
-        for (auto f = 0; f < out_size_f; ++f) {         // F
-            for (auto y = 0; y < out_size_y; ++y) {     // Y
-                for (auto x = 0; x < out_size_x; ++x) { // X
-                    auto output_off = ((b * out_size_y + y) * out_size_x + x) * out_size_f + f; // BYXF
+TEST(broadcast_gpu_uint8_t, bfyx_2x3x4x5_to_2x9x8x5_w_o_b_axes) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                                        31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                                        31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                                        41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                                        51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                                        41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                                        51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                                        31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                                        31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                                        41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                                        51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                                        41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                                        51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                                        31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                                        31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                                        41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                                        51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                                        41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+                                        51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                                        61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                                        71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                                        61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                                        71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                                        81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                                        91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
+                                        81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                                        91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
+                                        101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+                                        111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
+                                        101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+                                        111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
+                                        61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                                        71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                                        61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                                        71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                                        81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                                        91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
+                                        81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                                        91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
+                                        101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+                                        111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
+                                        101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+                                        111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
+                                        61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                                        71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                                        61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
+                                        71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
+                                        81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                                        91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
+                                        81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
+                                        91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
+                                        101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+                                        111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
+                                        101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
+                                        111, 112, 113, 114, 115, 116, 117, 118, 119, 120};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 9, 8, 5}, {2, 3, 4, 5}, {}, golden_data);
+}
 
-                    auto in_b = b % in_size_b;
-                    auto in_f = f % in_size_f;
-                    auto in_y = y % in_size_y;
-                    auto in_x = x % in_size_x;
+TEST(broadcast_gpu_float, bfyx_3_to_2x3_w_b_axes_0) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 1.0, 2.0, 3.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3}, {3}, {0}, golden_data);
+}
 
-                    auto input_off  = ((in_b * in_size_y + in_y) * in_size_x + in_x) * in_size_f + in_f; // BYXF
+TEST(broadcast_gpu_uint8_t, bfyx_3_to_2x3_w_b_axes_0) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 1, 2, 3};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3}, {3}, {0}, golden_data);
+}
 
+TEST(broadcast_gpu_float, bfyx_3_to_2x6_w_b_axes_0) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0};
+    start_broadcast_test<float>(data_types::f32, {2, 6}, {3}, {0}, golden_data);
+}
 
-                    EXPECT_EQ(output_ptr[output_off], input_data[input_off]);
-                }
-            }
-        }
-    }
+TEST(broadcast_gpu_uint8_t, bfyx_3_to_2x6_w_b_axes_0) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 6}, {3}, {0}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2_to_2x3_w_b_axes_1) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 2.0, 2.0, 2.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3}, {2}, {1}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2_to_2x3_w_b_axes_1) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 2, 2, 2};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3}, {2}, {1}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2_to_6x3_w_b_axes_1) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0,
+                                      2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0};
+    start_broadcast_test<float>(data_types::f32, {6, 3}, {2}, {1}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2_to_6x3_w_b_axes_1) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 2, 2, 2, 1, 1, 1,
+                                        2, 2, 2, 1, 1, 1, 2, 2, 2};
+    start_broadcast_test<uint8_t>(data_types::u8, {6, 3}, {2}, {1}, golden_data);
 }
 
-TEST(broadcast_gpu, basic_bfyx_2x1x1x5_to_2x13x11x5) {
-    //  Input (BF:XY) :   2x1:5x1
-    //  Output (BF:XY):  2x13:5x11
+TEST(broadcast_gpu_float, bfyx_3x4_to_2x3x4_w_b_axes_0) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4}, {3, 4}, {0}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_3x4_to_2x3x4_w_b_axes_0) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4}, {3, 4}, {0}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2x4_to_2x3x4_w_b_axes_1) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
+                                      5.0, 6.0, 7.0, 8.0, 5.0, 6.0, 7.0, 8.0, 5.0, 6.0, 7.0, 8.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4}, {2, 4}, {1}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2x4_to_2x3x4_w_b_axes_1) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
+                                        5, 6, 7, 8, 5, 6, 7, 8, 5, 6, 7, 8};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4}, {2, 4}, {1}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2x3_to_2x3x4_w_b_axes_2) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0,
+                                      4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4}, {2, 3}, {2}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2x3_to_2x3x4_w_b_axes_2) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+                                        4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4}, {2, 3}, {2}, golden_data);
+}
 
-    constexpr auto in_size_b = 2;
-    constexpr auto in_size_f = 1;
-    constexpr auto in_size_y = 1;
-    constexpr auto in_size_x = 5;
+TEST(broadcast_gpu_float, bfyx_4_to_2x3x4_w_b_axes_0_1) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0,
+                                      1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4}, {4}, {0, 1}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_4_to_2x3x4_w_b_axes_0_1) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,
+                                        1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4}, {4}, {0, 1}, golden_data);
+}
 
-    constexpr auto bc_scale_b = 1;
-    constexpr auto bc_scale_f = 13;
-    constexpr auto bc_scale_y = 11;
-    constexpr auto bc_scale_x = 1;
+TEST(broadcast_gpu_float, bfyx_3_to_2x3x4_w_b_axes_0_2) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0,
+                                      1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4}, {3}, {0, 2}, golden_data);
+}
 
-    constexpr auto out_size_b = bc_scale_b * in_size_b;
-    constexpr auto out_size_f = bc_scale_f * in_size_f;
-    constexpr auto out_size_y = bc_scale_y * in_size_y;
-    constexpr auto out_size_x = bc_scale_x * in_size_x;
+TEST(broadcast_gpu_uint8_t, bfyx_3_to_2x3x4_w_b_axes_0_2) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+                                        1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4}, {3}, {0, 2}, golden_data);
+}
 
-    engine engine;
-    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {in_size_b, in_size_f, in_size_x, in_size_y}});
+TEST(broadcast_gpu_float, bfyx_2_to_2x3x4_w_b_axes_1_2) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4}, {2}, {1, 2}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2_to_2x3x4_w_b_axes_1_2) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4}, {2}, {1, 2}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_3x4x5_to_2x3x4x5_w_b_axes_0) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
+                                      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0,
+                                      25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0,
+                                      37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0,
+                                      49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
+                                      13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0,
+                                      25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0,
+                                      37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0,
+                                      49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {3, 4, 5}, {0}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_3x4x5_to_2x3x4x5_w_b_axes_0) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
+                                        37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+                                        49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+                                        13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+                                        25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
+                                        37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+                                        49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {3, 4, 5}, {0}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2x4x5_to_2x3x4x5_w_b_axes_1) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+                                      31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+                                      31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+                                      31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {2, 4, 5}, {1}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2x4x5_to_2x3x4x5_w_b_axes_1) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                                        31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                                        31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+                                        21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+                                        31, 32, 33, 34, 35, 36, 37, 38, 39, 40};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {2, 4, 5}, {1}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2x3x5_to_2x3x4x5_w_b_axes_2) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 11.0, 12.0, 13.0, 14.0, 15.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 11.0, 12.0, 13.0, 14.0, 15.0,
+                                      16.0, 17.0, 18.0, 19.0, 20.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      16.0, 17.0, 18.0, 19.0, 20.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 21.0, 22.0, 23.0, 24.0, 25.0,
+                                      21.0, 22.0, 23.0, 24.0, 25.0, 21.0, 22.0, 23.0, 24.0, 25.0,
+                                      26.0, 27.0, 28.0, 29.0, 30.0, 26.0, 27.0, 28.0, 29.0, 30.0,
+                                      26.0, 27.0, 28.0, 29.0, 30.0, 26.0, 27.0, 28.0, 29.0, 30.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {2, 3, 5}, {2}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2x3x5_to_2x3x4x5_w_b_axes_2) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 11, 12, 13, 14, 15,
+                                        11, 12, 13, 14, 15, 11, 12, 13, 14, 15,
+                                        16, 17, 18, 19, 20, 16, 17, 18, 19, 20,
+                                        16, 17, 18, 19, 20, 16, 17, 18, 19, 20,
+                                        21, 22, 23, 24, 25, 21, 22, 23, 24, 25,
+                                        21, 22, 23, 24, 25, 21, 22, 23, 24, 25,
+                                        26, 27, 28, 29, 30, 26, 27, 28, 29, 30,
+                                        26, 27, 28, 29, 30, 26, 27, 28, 29, 30};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {2, 3, 5}, {2}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2x3x4_to_2x3x4x5_w_b_axes_3) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+                                      7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0,
+                                      9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0,
+                                      11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0,
+                                      13.0, 13.0, 13.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0,
+                                      15.0, 15.0, 15.0, 15.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16.0,
+                                      17.0, 17.0, 17.0, 17.0, 17.0, 18.0, 18.0, 18.0, 18.0, 18.0,
+                                      19.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0,
+                                      21.0, 21.0, 21.0, 21.0, 21.0, 22.0, 22.0, 22.0, 22.0, 22.0,
+                                      23.0, 23.0, 23.0, 23.0, 23.0, 24.0, 24.0, 24.0, 24.0, 24.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {2, 3, 4}, {3}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2x3x4_to_2x3x4x5_w_b_axes_3) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
+                                        7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
+                                        9, 9, 9, 9, 9, 10, 10, 10, 10, 10,
+                                        11, 11, 11, 11, 11, 12, 12, 12, 12, 12,
+                                        13, 13, 13, 13, 13, 14, 14, 14, 14, 14,
+                                        15, 15, 15, 15, 15, 16, 16, 16, 16, 16,
+                                        17, 17, 17, 17, 17, 18, 18, 18, 18, 18,
+                                        19, 19, 19, 19, 19, 20, 20, 20, 20, 20,
+                                        21, 21, 21, 21, 21, 22, 22, 22, 22, 22,
+                                        23, 23, 23, 23, 23, 24, 24, 24, 24, 24};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {2, 3, 4}, {3}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_4x5_to_2x3x4x5_w_b_axes_0_1) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {4, 5}, {0, 1}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_4x5_to_2x3x4x5_w_b_axes_0_1) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+                                        1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 16, 17, 18, 19, 20};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {4, 5}, {0, 1}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_3x5_to_2x3x4x5_w_b_axes_0_2) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 11.0, 12.0, 13.0, 14.0, 15.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 11.0, 12.0, 13.0, 14.0, 15.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 11.0, 12.0, 13.0, 14.0, 15.0,
+                                      11.0, 12.0, 13.0, 14.0, 15.0, 11.0, 12.0, 13.0, 14.0, 15.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {3, 5}, {0, 2}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_3x5_to_2x3x4x5_w_b_axes_0_2) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 11, 12, 13, 14, 15,
+                                        11, 12, 13, 14, 15, 11, 12, 13, 14, 15,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        11, 12, 13, 14, 15, 11, 12, 13, 14, 15,
+                                        11, 12, 13, 14, 15, 11, 12, 13, 14, 15};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {3, 5}, {0, 2}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_3x4_to_2x3x4x5_w_b_axes_0_3) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+                                      7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0,
+                                      9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0,
+                                      11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+                                      7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0,
+                                      9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0,
+                                      11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {3, 4}, {0, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_3x4_to_2x3x4x5_w_b_axes_0_3) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
+                                        7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
+                                        9, 9, 9, 9, 9, 10, 10, 10, 10, 10,
+                                        11, 11, 11, 11, 11, 12, 12, 12, 12, 12,
+                                        1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
+                                        7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
+                                        9, 9, 9, 9, 9, 10, 10, 10, 10, 10,
+                                        11, 11, 11, 11, 11, 12, 12, 12, 12, 12};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {3, 4}, {0, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2x5_to_2x3x4x5_w_b_axes_1_2) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0,
+                                      6.0, 7.0, 8.0, 9.0, 10.0, 6.0, 7.0, 8.0, 9.0, 10.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {2, 5}, {1, 2}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2x5_to_2x3x4x5_w_b_axes_1_2) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10,
+                                        6, 7, 8, 9, 10, 6, 7, 8, 9, 10};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {2, 5}, {1, 2}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2x4_to_2x3x4x5_w_b_axes_1_3) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+                                      7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0,
+                                      5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+                                      7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0,
+                                      5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+                                      7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {2, 4}, {1, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2x4_to_2x3x4x5_w_b_axes_1_3) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
+                                        7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
+                                        5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
+                                        7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
+                                        5, 5, 5, 5, 5, 6, 6, 6, 6, 6,
+                                        7, 7, 7, 7, 7, 8, 8, 8, 8, 8};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {2, 4}, {1, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2x3_to_2x3x4x5_w_b_axes_2_3) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0,
+                                      4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0,
+                                      5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0,
+                                      6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+                                      6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {2, 3}, {2, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2x3_to_2x3x4x5_w_b_axes_2_3) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+                                        3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+                                        4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+                                        4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+                                        5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+                                        5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+                                        6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+                                        6, 6, 6, 6, 6, 6, 6, 6, 6, 6};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {2, 3}, {2, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_5_to_2x3x4x5_w_b_axes_0_1_2) {
+    std::vector<float> golden_data = {1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0,
+                                      1.0, 2.0, 3.0, 4.0, 5.0, 1.0, 2.0, 3.0, 4.0, 5.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {5}, {0, 1, 2}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_5_to_2x3x4x5_w_b_axes_0_1_2) {
+    std::vector<uint8_t> golden_data = {1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5,
+                                        1, 2, 3, 4, 5, 1, 2, 3, 4, 5};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {5}, {0, 1, 2}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_4_to_2x3x4x5_w_b_axes_0_1_3) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {4}, {0, 1, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_4_to_2x3x4x5_w_b_axes_0_1_3) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
+                                        1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 4, 4, 4, 4, 4};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {4}, {0, 1, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_3_to_2x3x4x5_w_b_axes_0_2_3) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0,
+                                      3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {3}, {0, 2, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_3_to_2x3x4x5_w_b_axes_0_2_3) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+                                        3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+                                        3, 3, 3, 3, 3, 3, 3, 3, 3, 3};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {3}, {0, 2, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_float, bfyx_2_to_2x3x4x5_w_b_axes_1_2_3) {
+    std::vector<float> golden_data = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+                                      2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0};
+    start_broadcast_test<float>(data_types::f32, {2, 3, 4, 5}, {2}, {1, 2, 3}, golden_data);
+}
+
+TEST(broadcast_gpu_uint8_t, bfyx_2_to_2x3x4x5_w_b_axes_1_2_3) {
+    std::vector<uint8_t> golden_data = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+                                        2, 2, 2, 2, 2, 2, 2, 2, 2, 2};
+    start_broadcast_test<uint8_t>(data_types::u8, {2, 3, 4, 5}, {2}, {1, 2, 3}, golden_data);
+}
+
+
+TEST(broadcast_gpu, basic_error_wrong_b_axes_size) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 1, 1}});
 
     topology topology;
-    topology.add(
-        input_layout("input", input.get_layout())
-    );
-    topology.add(
-        broadcast("output", "input", {out_size_b, out_size_f, out_size_x, out_size_y})
-    );
-
-    std::vector<float> input_data = generate_rnd_real_input<float>(in_size_b, in_size_f, in_size_y, in_size_x, -8.0f, 8.0f);
-    set_values(input, input_data);
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(broadcast("output", "input", {2, 3, 4, 5}, {0, 1, 2, 3, 4}));
 
-    network network(engine, topology);
-    network.set_input_data("input", input);
-    auto outputs = network.execute();
+    std::string msg_to_find = "Incorrect parameters configuration: broadcast_axes size should be less or equal 4.";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
 
-    auto output = outputs.at("output").get_memory();
-    auto output_ptr = output.pointer<float>();
+TEST(broadcast_gpu, basic_error_wrong_b_axis_value) {
 
-    for (auto b = 0; b < out_size_b; ++b) {             // B
-        for (auto f = 0; f < out_size_f; ++f) {         // F
-            for (auto y = 0; y < out_size_y; ++y) {     // Y
-                for (auto x = 0; x < out_size_x; ++x) { // X
-                    auto output_off = ((b * out_size_f + f) * out_size_y + y) * out_size_x + x; // BFYX
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 1, 1}});
 
-                    auto in_b = b % in_size_b;
-                    auto in_f = f % in_size_f;
-                    auto in_y = y % in_size_y;
-                    auto in_x = x % in_size_x;
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(broadcast("output", "input", {2, 3, 4, 5}, {0, 4}));
 
-                    auto input_off  = ((in_b * in_size_f + in_f) * in_size_y + in_y) * in_size_x + in_x; // BFYX
+    std::string msg_to_find = "Incorrect parameters configuration: broadcast_axes index should be within broadcast_sizes range.";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
 
+TEST(broadcast_gpu, basic_error_duplicate_b_axis_values) {
 
-                    EXPECT_EQ(output_ptr[output_off], input_data[input_off]);
-                }
-            }
-        }
-    }
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 1, 1}});
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(broadcast("output", "input", {2, 3, 4, 5}, {0, 1, 1}));
+
+    std::string msg_to_find = "Incorrect parameters configuration: Duplicate axes numbers was found in broadcast_axes.";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
 }
 
-TEST(broadcast_gpu, basic_error_on_nondiv_bc_size) {
-    //  Input (BF:XY) :   2x1:5x1
-    //  Output (BF:XY):  2x13:5x11
+TEST(broadcast_gpu, basic_error_wrong_input_dimension_0) {
 
-    constexpr auto in_size_b = 2;
-    constexpr auto in_size_f = 1;
-    constexpr auto in_size_y = 1;
-    constexpr auto in_size_x = 5;
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {2, 3, 4, 5}});
 
-    constexpr auto out_size_b = in_size_b;
-    constexpr auto out_size_f = in_size_f;
-    constexpr auto out_size_y = in_size_y;
-    constexpr auto out_size_x = 7;
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(broadcast("output", "input", {2, 3, 4, 5}, {1}));
 
-    engine engine;
-    auto input = memory::allocate(engine, {data_types::f32, format::yxfb, {in_size_b, in_size_f, in_size_x, in_size_y}});
+    std::string msg_to_find = "Input size on dimension number 0(=2) is not equal to: (=1)";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
+
+TEST(broadcast_gpu, basic_error_not_dividable_2x3x4x5_to_3x3x4x5) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {2, 3, 4, 5}});
 
     topology topology;
-    topology.add(
-        input_layout("input", input.get_layout())
-    );
-    topology.add(
-        broadcast("output", "input", {out_size_b, out_size_f, out_size_x, out_size_y})
-    );
-
-    std::vector<float> input_data = generate_rnd_real_input<float>(in_size_b, in_size_f, in_size_y, in_size_x, -8.0f, 8.0f);
-    set_values(input, input_data);
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(broadcast("output", "input", {3, 3, 4, 5}, {}));
 
-    EXPECT_ANY_THROW(network(engine, topology));
+    std::string msg_to_find = "Invalid broadcast size: not dividable by input size";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
 }
 
+TEST(broadcast_gpu, basic_error_not_dividable_3_to_2x3x4x5_w_b_axes_0x1x3) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, 3, 1}});
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(broadcast("output", "input", {2, 3, 4, 5}, {0, 1, 3}));
+
+    std::string msg_to_find = "Invalid broadcast size: not dividable by input size";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
+
+TEST(broadcast_gpu, basic_error_not_dividable_4x5_to_3x4x5_w_b_axes_1) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 3, 5, 4}});
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(broadcast("output", "input", {2, 3, 4, 5}, {1}));
+
+    std::string msg_to_find = "Invalid broadcast size: not dividable by input size";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/command_queue_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/command_queue_test.cpp
new file mode 100644
index 000000000..ade14c55b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/command_queue_test.cpp
@@ -0,0 +1,165 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+#include <api/CPP/engine.hpp>
+#include <api/CPP/input_layout.hpp>
+#include "test_utils/test_utils.h"
+#include "api/CPP/arg_max_min.hpp"
+
+using namespace cldnn;
+using namespace tests;
+using namespace std;
+
+#ifdef _WIN32
+#include <windows.h>
+#include <WinBase.h>
+static int                              g_run_once = 1;
+static int                              g_qpc_availible;
+static LARGE_INTEGER                    g_qpc_frec;
+
+// Function for future use to measure performance
+unsigned long GetMilliseconds(void)
+{
+    unsigned long ms;
+    LARGE_INTEGER qpc_ticks;
+
+    if (g_run_once) {
+        g_qpc_availible = QueryPerformanceFrequency(&g_qpc_frec);
+        // QPC returns nonzero value if HW supports high resolution perf counter
+        EXPECT_NE(g_qpc_availible, 0); 
+        g_run_once = 0;
+    }
+
+    if (g_qpc_availible) {
+        QueryPerformanceCounter(&qpc_ticks);
+        ms = (unsigned long)(1000.0 * ((double)(qpc_ticks.QuadPart)) / ((double)(g_qpc_frec.QuadPart)));
+    }
+    // back up if High-Resolution Timer is not available
+    else ms = GetTickCount();
+
+    return ms;
+}
+#endif
+
+
+// Run some topology too see if command queue does work correctly
+// Coppied from arg_max_gpu.base test.
+void exexute_network(cldnn::engine engine)
+{
+    //  Input  : 2x3x2x2
+    static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(arg_max_min("arg_max", "input", arg_max_min::max));
+
+    vector<float> input_vec = {
+        //y0x0 y0x1 y1x0 y1x1
+        /*b0f0*/0.1f, -0.1f, 0.9f,  1.5f,
+        /*b0f1*/0.2f, 0.2f,  -10.f, 5.2f,
+        /*b0f2*/0.2f, 0.2f,  -10.f, 5.2f,
+
+        /*b1f0*/3.f,  0.5f,  7.f,   10.f,
+        /*b1f1*/4.f,  0.5f,  8.f,   8.2f,
+        /*b1f2*/0.2f, 0.2f,  -10.f, 5.2f
+    };
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "arg_max");
+
+    auto output = outputs.at("arg_max").get_memory();
+    auto output_ptr = output.pointer<float>();
+    float out_buffer[batch_num];
+    for (uint32_t i = 0; i < batch_num; i++)
+    {
+        out_buffer[i] = get_value<float>(output_ptr, i);
+    }
+    int size = x_size * y_size * feature_num;
+    int index;
+    float value;
+    for (int i = 0; i < batch_num; i++) {
+        EXPECT_GE(out_buffer[i], 0);
+        EXPECT_LT(out_buffer[i], size);
+        index = (int)out_buffer[i];
+        value = input_vec[i*size + (int)index];
+        for (int j = 0; j < size; j++)
+        {
+            EXPECT_LE(input_vec[i*size + j], value);
+        }
+    }
+}
+
+TEST(command_queue_test, test_priority_hints) {
+    engine_configuration configuration =
+        engine_configuration(
+            false,          // profiling
+            false,          // decorate_kernel_names
+            false,          // dump_custom_program
+            "",             // options
+            "",             // single_kernel
+            true,           // primitives_parallelisation
+            "",             // engine_log
+            "",             // sources_dumps_dir
+            priority_mode_types::low,
+            throttle_mode_types::disabled);
+    cldnn::engine engine(configuration);
+    exexute_network(engine);
+}
+
+TEST(command_queue_test, test_throttle_hints) {
+    engine_configuration configuration =
+        engine_configuration(
+            false,          // profiling
+            false,          // decorate_kernel_names
+            false,          // dump_custom_program
+            "",             // options
+            "",             // single_kernel
+            true,           // primitives_parallelisation
+            "",             // engine_log
+            "",             // sources_dumps_dir
+            priority_mode_types::disabled,
+            throttle_mode_types::high);
+    cldnn::engine engine(configuration);
+    exexute_network(engine);
+}
+
+TEST(command_queue_test, test_priority_and_throttle_hints) {
+    engine_configuration configuration =
+        engine_configuration(
+            false,          // profiling
+            false,          // decorate_kernel_names
+            false,          // dump_custom_program
+            "",             // options
+            "",             // single_kernel
+            true,           // primitives_parallelisation
+            "",             // engine_log
+            "",             // sources_dumps_dir
+            priority_mode_types::high,
+            throttle_mode_types::low);
+    cldnn::engine engine(configuration);
+    exexute_network(engine);
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/condition_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/condition_gpu_test.cpp
new file mode 100644
index 000000000..09e299e7e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/condition_gpu_test.cpp
@@ -0,0 +1,617 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+
+#include <api/CPP/engine.hpp>
+#include <api/CPP/input_layout.hpp>
+#include <api/CPP/memory.hpp>
+#include <api/CPP/concatenation.hpp>
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+#include <api/CPP/pooling.hpp>
+#include <api/CPP/condition.hpp>
+#include <api/CPP/softmax.hpp>
+#include <api/CPP/scale.hpp>
+#include <api/CPP/data.hpp>
+#include "test_utils/test_utils.h"
+
+#include <cstddef>
+
+
+using namespace cldnn;
+using namespace ::tests;
+
+
+bool is_output_equal(const cldnn::memory& mem, const std::vector<float>& ref)
+{
+    auto ptr = mem.pointer<float>();
+    for (size_t i = 0; i < mem.get_layout().count(); i++)
+    {
+        if (!are_equal(ptr[i], ref[i])) return false;
+    }
+    return true;
+}
+
+topology generate_simple_branch (bool branch_true_false, const primitive_id& input_id)
+{
+    topology branch;
+    if (branch_true_false)
+    {
+        branch.add(
+            pooling(input_id + "_when_true", input_id, cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+        );
+    }
+    else
+    {
+        branch.add(
+            pooling(input_id + "_when_false", input_id, cldnn::pooling_mode::average, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+        );
+    }
+    return branch;
+}
+
+
+TEST(condition_gpu, basic_equal_comp) {
+    const auto& engine = get_test_engine();
+    build_options bs;
+    bs.set_option(build_option::optimize_data(true));
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
+    auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto scale_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    
+    topology branch_true = generate_simple_branch(true, "condi");
+    topology branch_false = generate_simple_branch(false, "condi");
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        input_layout("compare", compare.get_layout())
+    );    
+    topology.add(
+        input_layout("scale_data", scale_mem.get_layout())
+    );
+    topology.add(
+        condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL)
+    );  
+    topology.add(
+        scale("output", "condi", "scale_data")
+    );
+
+    network net(engine, topology, bs);
+    set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f });
+    set_values(scale_mem, { 10.0f });
+    net.set_input_data("input", input);
+    net.set_input_data("scale_data", scale_mem);
+
+    decltype(net.execute()) out;
+
+    //WHEN TRUE
+    set_values(compare, { 1.0f });
+    net.set_input_data("compare", compare);
+    out = net.execute();
+    auto out_data_true = out.at("output").get_memory();
+    EXPECT_TRUE(is_output_equal(out_data_true, {20.0f, 40.0f}));
+
+    //WHEN FALSE
+    set_values(compare, { 4.0f });
+    net.set_input_data("compare", compare);
+    out = net.execute();
+    auto out_data_false = out.at("output").get_memory();
+    EXPECT_TRUE(is_output_equal(out_data_false, { 15.0f, 35.0f }));
+
+}
+
+TEST(condition_gpu, basic_range_equal_comp) {
+
+    const auto& engine = get_test_engine();
+    build_options bs;
+    bs.set_option(build_option::optimize_data(true));
+    auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
+
+    auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
+
+    topology branch_true = generate_simple_branch(true, "condi");
+    topology branch_false = generate_simple_branch(false, "condi");
+
+    topology topology;
+    topology.add(
+        input_layout("input0", input0.get_layout())
+    );
+    topology.add(
+        input_layout("input1", input1.get_layout())
+    );
+    topology.add(
+        input_layout("compare", compare.get_layout())
+    );
+    topology.add(
+        concatenation("concat", { "input0", "input1" }, concatenation::along_x)
+    );
+    topology.add( 
+        condition("condi", "concat", branch_true, branch_false, "compare", cond_functions::EQUAL)
+    );
+
+    std::vector<float> input0_data = {
+        1, 2, 3, 4
+    };
+    std::vector<float> input1_data = {
+        5, 6, 7, 8
+    };
+    std::vector<float> compare_data_true = {
+        1, 2, 3
+    };
+    std::vector<float> pooling_when_true_data = {
+        2, 4, 6, 8
+    };
+    std::vector<float> compare_data_false = {
+        1, 2, 10
+    };
+    std::vector<float> pooling_when_false_data = {
+        1.5, 3.5, 5.5, 7.5
+    };
+
+    set_values(input0, input0_data);
+    set_values(input1, input1_data);
+    network net(engine, topology, bs);
+    net.set_input_data("input0", input0);
+    net.set_input_data("input1", input1);
+
+    decltype(net.execute()) outputs;
+
+    //CHECK TRUE
+    set_values(compare, compare_data_true);
+    net.set_input_data("compare", compare);
+    outputs = net.execute();
+
+    auto out_data_true = outputs.at("condi").get_memory();
+    EXPECT_TRUE(is_output_equal(out_data_true, pooling_when_true_data));
+
+    //CHECK FALSE
+    set_values(compare, compare_data_false);
+    net.set_input_data("compare", compare);
+    outputs = net.execute();
+
+    auto out_data_false = outputs.at("condi").get_memory();
+    EXPECT_TRUE(is_output_equal(out_data_false, pooling_when_false_data));
+}
+
+std::pair<std::vector<float>, std::vector<float>> get_values_to_compare(const cldnn::tensor& offset, const cldnn::tensor& range, const std::vector<float>& values, const cldnn::layout& input_lay, const cond_functions& func)
+{
+    std::vector<float> ret_true;
+    std::vector<float> ret_false;
+    auto mem_desc = generic_test::get_linear_memory_desc(input_lay);
+    for (int32_t b = 0; b < range.batch[0]; b++)
+    {
+        for (int32_t f = 0; f < range.feature[0]; f++)
+        {
+            for (int32_t y = 0; y < range.spatial[1]; y++)
+            {
+                for (int32_t x = 0; x < range.spatial[0]; x++)
+                {
+                    auto linear_idx = generic_test::get_linear_index(
+                        input_lay,
+                        offset.batch[0] + b,
+                        offset.feature[0] + f,
+                        offset.spatial[1] + y,
+                        offset.spatial[0] + x,
+                        mem_desc);
+
+                    switch (func)
+                    {
+                    case cond_functions::EQUAL:
+                        ret_true.push_back(values.at(linear_idx));
+                        ret_false.push_back(-1.0f);
+                        break;
+                    case cond_functions::GREATER: 
+                        ret_true.push_back(values.at(linear_idx) - 1.0f);
+                        ret_false.push_back(99.0f);
+                        break;
+                    case cond_functions::LESS: 
+                        ret_true.push_back(values.at(linear_idx) + 1.0f);
+                        ret_false.push_back(-1.0f);
+                        break;
+                    }
+                }
+            }
+        }
+    }
+    return { ret_true, ret_false };
+}
+
+TEST(DISABLED_condition_gpu, generic_test_true_false) {
+
+    const auto& engine = get_test_engine();
+    build_options bs;
+    bs.set_option(build_option::optimize_data(true));
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 5, 2, 5, 1 } });
+    std::vector<float> input_data(50);
+    std::iota(input_data.begin(), input_data.end(), 0.0f);
+
+    std::vector<cond_functions> functions = {
+        cond_functions::EQUAL,
+        cond_functions::GREATER,
+        cond_functions::LESS,
+    };
+
+    // ranges, with data when condition is true or false
+    std::vector<cldnn::tensor> ranges = {
+        {1, 1, 1, 1},
+        {1, 1, 3, 1},
+        {2, 1, 1, 1},
+        {2, 1, 1, 1}
+    };
+
+    std::vector<cldnn::tensor> offsets = {
+        { 0, 0, 0, 0},
+        { 0, 0, 1, 0},
+        { 0, 0, 2, 0},
+        { 2, 0, 0, 0},
+        { 2, 1, 1, 0}
+    };
+
+    std::vector<float> pooling_when_true_data = {
+        2, 4, 7, 9, 12, 14, 17,
+        19, 22, 24, 27, 29, 32,
+        34, 37, 39, 42, 44, 47, 49
+    };
+
+    std::vector<float> pooling_when_false_data = {
+        1, 3, 6, 8, 11, 13, 16,
+        18, 21, 23, 26, 28, 31,
+        33, 36, 38, 41, 43, 46, 48
+    };
+
+    for (auto const& func : functions)
+    {
+        for (auto const& range : ranges)
+        {
+            for (auto const& offset : offsets)
+            {
+                auto comp_values = get_values_to_compare(offset, range, input_data, input.get_layout(), func);
+                auto comp_values_true = comp_values.first;
+                auto comp_values_false = comp_values.second;
+
+                auto compare = memory::allocate(engine, { data_types::f32, format::bfyx, range });
+
+                topology branch_true;
+                topology branch_false;
+                branch_true.add(
+                    pooling("pooling_when_true", "condi", cldnn::pooling_mode::max, { 1, 1, 3, 1 }, { 1, 1, 2, 1 })
+                );
+                branch_false.add(
+                    pooling("pooling_when_false", "condi", cldnn::pooling_mode::average, { 1, 1, 3, 1 }, { 1, 1, 2, 1 })
+                );
+
+                topology topology;
+                topology.add(
+                    input_layout("input", input.get_layout())
+                );
+                topology.add(
+                    input_layout("compare", compare.get_layout())
+                );
+                topology.add(
+                    condition("condi", "input", branch_true, branch_false, "compare", func, offset)
+                );
+
+                set_values(input, input_data);
+                network net(engine, topology, bs);
+                net.set_input_data("input", input);
+
+                decltype(net.execute()) outputs;
+
+                //CHECK TRUE
+                set_values(compare, comp_values_true);
+                net.set_input_data("compare", compare);
+                outputs = net.execute();
+
+                auto out_data_true = outputs.at("condi").get_memory();
+                EXPECT_TRUE(is_output_equal(out_data_true, pooling_when_true_data));
+
+                //CHECK FALSE
+                set_values(compare, comp_values_false);
+                net.set_input_data("compare", compare);
+                outputs = net.execute();
+
+                auto out_data_false = outputs.at("condi").get_memory();
+                EXPECT_TRUE(is_output_equal(out_data_false, pooling_when_false_data));
+
+            }
+        }
+    }
+}
+
+TEST(condition_gpu, basic_stacked_ifs) {
+
+    /*   
+        <prims...>
+        <if>
+        <...>
+        <end_if>
+        <...>
+        <if>
+        <...>
+        <end_if>
+        <prims...>    
+    */
+    const auto& engine = get_test_engine();
+    build_options bs;
+    bs.set_option(build_option::optimize_data(true));
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
+    auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto compare2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+
+
+    topology condi_1_true = generate_simple_branch(true, "condi");
+    topology condi_1_false = generate_simple_branch(false, "condi");
+    topology condi_2_true;
+    condi_2_true.add(
+        activation("activ_when_true", "condi2", cldnn_activation_func::activation_log2)
+    );
+    topology condi_2_false;
+    condi_2_false.add(
+        activation("activ_when_false", "condi2", cldnn_activation_func::activation_relu)
+    );
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        input_layout("compare", compare.get_layout())
+    );
+    topology.add(
+        condition("condi", "input", condi_1_true, condi_1_false, "compare", cond_functions::EQUAL)
+    );
+    topology.add(
+        input_layout("compare2", compare2.get_layout())
+    );
+    topology.add(
+        condition("condi2", "condi", condi_2_true, condi_2_false, "compare2", cond_functions::GREATER)
+    );
+
+    std::vector<float> input_data = {
+        1, 2, 3, 4
+    };
+    std::vector<float> compare_data = {
+        1
+    };
+    std::vector<float> compare_2_data = {
+        0.0f, 0.0f
+    };
+    set_values(input, input_data);
+    set_values(compare, compare_data);
+    set_values(compare2, compare_2_data);
+
+    network net(engine, topology, bs);
+    net.set_input_data("input", input);
+    net.set_input_data("compare", compare);
+    net.set_input_data("compare2", compare2);
+    auto outputs = net.execute();
+
+    auto out_data = outputs.at("condi2").get_memory();
+    EXPECT_TRUE(is_output_equal(out_data, {1.0f, 2.0f}));
+}
+
+TEST(condition_gpu, basic_nested_ifs) {
+
+    /*
+    <prims...>
+    <if 0>
+    <...>
+    <if 1>
+    <...>
+    <end_if 1>
+    <...>
+    <end_if 0>
+    <prims...>
+    */
+    const auto& engine = get_test_engine();
+    build_options bs;
+    bs.set_option(build_option::optimize_data(true));
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
+    auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto compare2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+    auto scale_5_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    set_values(scale_5_mem, { 5.0f });
+    auto scale_10_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    set_values(scale_10_mem, { 10.0f });
+
+
+    topology nested_true;
+    {
+        nested_true.add(scale("scale_5", "condi_nested", "scale_5_data"),
+            data("scale_5_data", scale_5_mem));
+    }
+    topology nested_false;
+    {
+        nested_false.add(scale("scale_10", "condi_nested", "scale_10_data"),
+            data("scale_10_data", scale_10_mem));
+    }
+
+    topology branch_true;
+    branch_true.add(
+        pooling("pooling_when_true", "condi", cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+    );
+    branch_true.add(
+        input_layout("compare2", compare2.get_layout())
+    );
+
+    branch_true.add(
+        condition(
+        "condi_nested",
+        "pooling_when_true",
+        nested_true,
+        nested_false,
+        "compare2",
+        cond_functions::EQUAL)
+    );
+
+    topology branch_false;
+    branch_false.add(
+        pooling("pooling_when_false", "condi", cldnn::pooling_mode::average, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+    );
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+
+    topology.add(
+        input_layout("compare", compare.get_layout())
+    );
+
+    topology.add(
+        condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL)
+    );
+
+    std::vector<float> input_data = {
+        1.0f, 2.0f, 3.0f, 4.0f
+    };
+    std::vector<float> compare_data = {
+        1.0f
+    };
+    std::vector<float> compare_2_data = {
+        2.0f, 4.0f
+    };
+    set_values(input, input_data);
+    set_values(compare, compare_data);
+    set_values(compare2, compare_2_data);
+
+    network net(engine, topology, bs);
+    net.set_input_data("input", input);
+    net.set_input_data("compare", compare);
+    net.set_input_data("compare2", compare2);
+    auto outputs = net.execute();
+
+    auto out_data = outputs.at("condi").get_memory();
+    EXPECT_TRUE(is_output_equal(out_data, { 10.0f, 20.0f }));
+}
+
+
+TEST(condition_gpu, negative_compare_wrong_layout) {
+    const auto& engine = get_test_engine();
+    build_options bs;
+    bs.set_option(build_option::optimize_data(true));
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
+    auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 1 } });
+
+    topology branch_true = generate_simple_branch(true, "condi");
+    topology branch_false = generate_simple_branch(false, "condi");
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        input_layout("compare", compare.get_layout())
+    );
+    topology.add(
+        condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL)
+    );
+
+    EXPECT_ANY_THROW(network net(engine, topology, bs););
+}
+
+TEST(condition_gpu, negative_too_big_offset) {
+    const auto& engine = get_test_engine();
+    build_options bs;
+    bs.set_option(build_option::optimize_data(true));
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
+    auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
+
+    topology branch_true = generate_simple_branch(true, "condi");
+    topology branch_false = generate_simple_branch(false, "condi");
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        input_layout("compare", compare.get_layout())
+    );
+    topology.add(
+        condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL, {1, 1, 2, 1})
+    );
+
+    EXPECT_ANY_THROW(network net(engine, topology, bs););
+}
+
+TEST(condition_gpu, negative_not_same_layouts) {
+    const auto& engine = get_test_engine();
+    build_options bs;
+    bs.set_option(build_option::optimize_data(true));
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
+    auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    
+    topology branch_true;
+    branch_true.add(
+        pooling("pooling_when_true", "condi", cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+    );
+
+    topology branch_false;
+    branch_false.add(
+        pooling("pooling_when_false", "condi", cldnn::pooling_mode::max, { 0, 0, 4, 1 }, { 0, 0, 4, 1 })
+    );
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        input_layout("compare", compare.get_layout())
+    );
+    topology.add(
+        condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL)
+    );
+
+    EXPECT_ANY_THROW(network net(engine, topology, bs););
+}
+
+TEST(condition_gpu, negative_same_names_within_different_networks) {
+    const auto& engine = get_test_engine();
+    build_options bs;
+    bs.set_option(build_option::optimize_data(true));
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
+    auto compare = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    topology branch_true;
+    branch_true.add(
+        pooling("pooling_check_name", "condi", cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+    );
+
+    topology branch_false;
+    branch_false.add(
+        pooling("pooling_when_false", "condi", cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+    );
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        input_layout("compare", compare.get_layout())
+    );
+    topology.add(
+        condition("condi", "input", branch_true, branch_false, "compare", cond_functions::EQUAL)
+    );
+    topology.add(
+        pooling("pooling_check_name", "condi", cldnn::pooling_mode::max, { 0, 0, 2, 1 }, { 0, 0, 2, 1 })
+    );
+    
+    EXPECT_ANY_THROW(network net(engine, topology, bs););
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/contract_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/contract_gpu_test.cpp
new file mode 100644
index 000000000..1a2c67153
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/contract_gpu_test.cpp
@@ -0,0 +1,352 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+
+#include <api/CPP/engine.hpp>
+#include <api/CPP/input_layout.hpp>
+#include <api/CPP/memory.hpp>
+#include <api/CPP/contract.hpp>
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+
+#include "test_utils/test_utils.h"
+#include "test_utils/uniform_quantized_real_distribution.hpp"
+
+#include <cstddef>
+
+using namespace cldnn;
+using namespace ::tests;
+
+template <typename T>
+T reduce_execute(cldnn::contract_mode mode, T x, T y) {
+    switch (mode) {
+    case contract_mode::sum:
+        return x + y;
+    case contract_mode::prod:
+        return x * y;
+    case contract_mode::all:
+        return x && y;
+    case contract_mode::any:
+        return x || y;
+    case contract_mode::max:
+        return x > y ? x : y;
+    default:
+        return (T)0;
+    }
+}
+
+template <typename T>
+VVVVF<T> reduce_dim(VVVVF<T> &input,
+    cldnn::contract_mode mode, uint16_t axis,
+    int input_padding_y = 0, int input_padding_x = 0,
+    int output_padding_y = 0, int output_padding_x = 0) {
+
+    size_t padding_y = input_padding_y + output_padding_y;
+    size_t padding_x = input_padding_x + output_padding_x;
+    size_t out_sizes[4];
+    out_sizes[0] = input.size();
+    out_sizes[1] = input[0].size();
+    out_sizes[2] = input[0][0].size() + 2 * padding_y;
+    out_sizes[3] = input[0][0][0].size() + 2 * padding_x;
+    if (axis == 0)
+        out_sizes[0] = 1;
+    else
+        for (uint16_t i = axis; i > 0; --i)
+        {
+            out_sizes[i] = out_sizes[i - 1];
+            out_sizes[i - 1] = 1;
+        }
+    VVVVF<T> output(out_sizes[0], VVVF<T>(out_sizes[1], VVF<T>(out_sizes[2], VF<T>(out_sizes[3]))));
+
+    switch (axis) {
+    case 0:
+        for (size_t f = 0; f < out_sizes[1]; ++f)
+            for (size_t y = 0; y < out_sizes[2]; ++y)
+                for (size_t x = 0; x < out_sizes[3]; ++x)
+                {
+                    T res = input[0][f][y][x];
+                    size_t orig_b = input.size();
+                    for (size_t b = 1; b < orig_b; ++b)
+                        res = reduce_execute<T>(mode, res, input[b][f][y][x]);
+                    output[0][f][y][x] = res;
+                }
+        break;
+    case 1:
+        for (size_t b = 0; b < out_sizes[1]; ++b)
+            for (size_t y = 0; y < out_sizes[2]; ++y)
+                for (size_t x = 0; x < out_sizes[3]; ++x)
+                {
+                    T res = input[b][0][y][x];
+                    size_t orig_f = input[0].size();
+                    for (size_t f = 1; f < orig_f; ++f)
+                        res = reduce_execute<T>(mode, res, input[b][f][y][x]);
+                    output[0][b][y][x] = res;
+                }
+        break;
+    case 2:
+        for (size_t b = 0; b < out_sizes[1]; ++b)
+            for (size_t f = 0; f < out_sizes[2]; ++f)
+                for (size_t x = 0; x < out_sizes[3]; ++x)
+                {
+                    T res = input[b][f][0][x];
+                    size_t orig_y = input[0][0].size();
+                    for (size_t y = 1; y < orig_y; ++y)
+                        res = reduce_execute<T>(mode, res, input[b][f][y][x]);
+                    output[0][b][f][x] = res;
+                }
+        break;
+    case 3:
+        for (size_t b = 0; b < out_sizes[1]; ++b)
+            for (size_t f = 0; f < out_sizes[2]; ++f)
+                for (size_t y = 0; y < out_sizes[3]; ++y)
+                {
+                    T res = input[b][f][y][0];
+                    size_t orig_x = input[0][0][0].size();
+                    for (size_t x = 1; x < orig_x; ++x)
+                        res = reduce_execute<T>(mode, res, input[b][f][y][x]);
+                    output[0][b][f][y] = res;
+                }
+        break;
+    default: break;
+    }
+    return output;
+}
+
+template <typename T>
+VVVVF<T> reduce_input(VVVVF<T> &input,
+    cldnn::contract_mode mode, std::vector<uint16_t> reduction_axes,
+    int input_padding_y = 0, int input_padding_x = 0,
+    int output_padding_y = 0, int output_padding_x = 0) {
+    VVVVF<T> output(input);
+    for (size_t i = 0; i < reduction_axes.size(); ++i)
+        output = reduce_dim<T>(output, mode, reduction_axes[i], input_padding_y, input_padding_x, output_padding_y, output_padding_x);
+    return output;
+}
+
+std::string print_axes(std::vector<uint16_t> reduction_axes)
+{
+    std::stringstream res;
+    res << "[";
+    for (size_t i = 0; i < reduction_axes.size(); ++i)
+    {
+        if (i != 0)
+            res << ", ";
+        res << reduction_axes[i];
+    }
+    res << "]";
+    return res.str();
+}
+
+template <typename T>
+void generic_contract_test_float(cldnn::format test_input_fmt, int input_b, int input_f, int input_y, int input_x, cldnn::contract_mode mode,
+    std::vector<uint16_t> reduction_axes, int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0, int output_padding_x = 0) {
+
+    int min_random = -2, max_random = 2;
+    VVVVF<T> input_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
+    VF<T> input_rnd_vec = flatten_4d<T>(test_input_fmt, input_rnd);
+
+    const auto& engine = get_test_engine();
+    tensor input_tensor(input_b, input_f, input_x, input_y);
+    auto input = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
+    set_values(input, input_rnd_vec);
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(contract("output", "input", mode, reduction_axes));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "output");
+
+    auto output_memory = outputs.at("output").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<T>();
+
+    VVVVF<T> output_cpu = reduce_input<T>(input_rnd, mode, reduction_axes, input_padding_y, input_padding_x, output_padding_y, output_padding_x);
+    EXPECT_EQ(output_layout.format.value, test_input_fmt.value);
+    tensor output_tensor = output_layout.get_buffer_size();
+    int y_size = output_tensor.spatial[1];
+    int x_size = output_tensor.spatial[0];
+    int f_size = output_tensor.feature[0];
+    int b_size = output_tensor.batch[0];
+    EXPECT_EQ(y_size, (int)output_cpu[0][0].size());
+    EXPECT_EQ(x_size, (int)output_cpu[0][0][0].size());
+    EXPECT_EQ(f_size, (int)output_cpu[0].size());
+    EXPECT_EQ(b_size, (int)output_cpu.size());
+
+
+    bool test_is_correct = true;
+    VF<T> output_cpu_vec = flatten_4d<T>(test_input_fmt, output_cpu);
+    for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
+        if (!floating_point_equal(output_cpu_vec[i], output_ptr[i]) && !(std::isnan((float)output_cpu_vec[i]) && std::isnan((float)output_ptr[i]))) {
+            test_is_correct = false;
+            break;
+        }
+    }
+    EXPECT_EQ(test_is_correct, true) << std::endl
+        << "failing test parameters:" << std::endl
+        << "input_b = " << input_b << std::endl
+        << "input_f = " << input_f << std::endl
+        << "input_y = " << input_y << std::endl
+        << "input_x = " << input_x << std::endl
+        << "contract_mode = " << (int)mode << std::endl
+        << "axes = " << print_axes(reduction_axes) << std::endl
+        << "input_padding_y = " << input_padding_y << std::endl
+        << "input_padding_x = " << input_padding_x << std::endl
+        << "output_padding_y = " << output_padding_y << std::endl
+        << "output_padding_x = " << output_padding_x << std::endl;
+}
+
+template <typename T>
+void generic_contract_test_int(cldnn::format test_input_fmt, int input_b, int input_f, int input_y, int input_x, cldnn::contract_mode mode,
+    std::vector<uint16_t> reduction_axes, int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0, int output_padding_x = 0) {
+
+    int min_random = -2, max_random = 2;
+    VVVVF<T> input_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
+    VF<T> input_rnd_vec = flatten_4d<T>(test_input_fmt, input_rnd);
+
+    const auto& engine = get_test_engine();
+    tensor input_tensor(input_b, input_f, input_x, input_y);
+    auto input = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
+    set_values(input, input_rnd_vec);
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(contract("output", "input", mode, reduction_axes));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "output");
+
+    auto output_memory = outputs.at("output").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<T>();
+
+    VVVVF<T> output_cpu = reduce_input<T>(input_rnd, mode, reduction_axes, input_padding_y, input_padding_x, output_padding_y, output_padding_x);
+    EXPECT_EQ(output_layout.format.value, test_input_fmt.value);
+    tensor output_tensor = output_layout.get_buffer_size();
+    int y_size = output_tensor.spatial[1];
+    int x_size = output_tensor.spatial[0];
+    int f_size = output_tensor.feature[0];
+    int b_size = output_tensor.batch[0];
+    EXPECT_EQ(y_size, (int)output_cpu[0][0].size());
+    EXPECT_EQ(x_size, (int)output_cpu[0][0][0].size());
+    EXPECT_EQ(f_size, (int)output_cpu[0].size());
+    EXPECT_EQ(b_size, (int)output_cpu.size());
+
+
+    bool test_is_correct = true;
+    VF<T> output_cpu_vec = flatten_4d<T>(test_input_fmt, output_cpu);
+
+    for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
+        if (output_cpu_vec[i] != output_ptr[i]) {
+            test_is_correct = false;
+            break;
+        }
+    }
+    EXPECT_EQ(test_is_correct, true) << std::endl
+        << "failing test parameters:" << std::endl
+        << "input_b = " << input_b << std::endl
+        << "input_f = " << input_f << std::endl
+        << "input_y = " << input_y << std::endl
+        << "input_x = " << input_x << std::endl
+        << "contract_mode = " << (int)mode << std::endl
+        << "axes = " << print_axes(reduction_axes) << std::endl
+        << "input_padding_y = " << input_padding_y << std::endl
+        << "input_padding_x = " << input_padding_x << std::endl
+        << "output_padding_y = " << output_padding_y << std::endl
+        << "output_padding_x = " << output_padding_x << std::endl;
+}
+
+TEST(contract_gpu_f32, generic_y_sum) {
+    generic_contract_test_float<float>(format::bfyx, 5, 5, 5, 5, contract_mode::sum, { 2 });
+}
+
+TEST(contract_gpu_f32, generic_fx_prod) {
+    generic_contract_test_float<float>(format::bfyx, 5, 5, 5, 5, contract_mode::sum, { 1, 3 });
+}
+
+TEST(contract_gpu_i32, generic_f_all) {
+    generic_contract_test_int<int32_t>(format::bfyx, 5, 5, 5, 5, contract_mode::all, { 1 });
+}
+
+TEST(contract_gpu_i32, generic_bfyx_any) {
+    generic_contract_test_int<int32_t>(format::bfyx, 5, 5, 5, 5, contract_mode::any, { 0, 1, 2, 3 });
+}
+
+TEST(contract_gpu_f32, generic_f_max) {
+    generic_contract_test_float<float>(format::bfyx, 5, 5, 5, 5, contract_mode::max, { 1 });
+}
+
+TEST(contract_gpu_i32, generic_f_max) {
+    generic_contract_test_int<int32_t>(format::bfyx, 5, 5, 5, 5, contract_mode::max, { 1 });
+}
+
+TEST(contract_error, basic_error_empty_r_axes) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(contract("output", "input", contract_mode::sum, { }));
+
+    std::string msg_to_find = "Incorrect parameters configuration: reduction_axes should not be empty.";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
+
+TEST(contract_error, basic_error_wrong_r_axes_size) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(contract("output", "input", contract_mode::sum, { 0, 1, 2, 3, 4 }));
+
+    std::string msg_to_find = "Incorrect parameters configuration: reduction_axes size should be less or equal 4.";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
+
+TEST(contract_error, basic_error_wrong_r_axis_value) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(contract("output", "input", contract_mode::sum, { 0, 4 }));
+
+    std::string msg_to_find = "Incorrect parameters configuration: reduction_axes index should be within reduction_axes range.";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
+
+TEST(contract_error, basic_error_duplicate_r_axis_values) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(contract("output", "input", contract_mode::sum, { 0, 1, 1 }));
+
+    std::string msg_to_find = "Incorrect parameters configuration: Duplicate axes numbers was found in reduction_axes.";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
index b65144466..c37493b1e 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_gpu_test.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 
 #include <gtest/gtest.h>
+#include <gmock/gmock.h>
 #include "api/CPP/memory.hpp"
 #include <api/CPP/input_layout.hpp>
 #include "api/CPP/convolution.hpp"
@@ -60,7 +61,7 @@ T kahan_summation(std::vector<T> &input) {
 
 template<typename T>
 VVF<T> reference_convolve(VVVF<T> &input, VVVF<T> &filter, int stride_y, int stride_x, float bias, int dilation_y = 1, int dilation_x = 1,
-        int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0, 
+        int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0,
         int output_padding_x = 0, size_t f_begin = 0)
 {
     size_t kernel_extent_y = dilation_y * (filter[0].size() - 1) + 1;
@@ -144,7 +145,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_no_bias) {
     // 21  28  39
     // 18  20  20
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32,format::yxfb,{ 1, 1, 5, 4 } });
     auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 3, 2 } });
@@ -216,7 +217,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) {
     // 21  28  39
     // 18  20  20
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 5, 4 } });
     auto weights = memory::allocate(engine, { data_types::i8,format::bfyx,{ 1, 1, 3, 2 } });
@@ -263,7 +264,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_int8_no_bias) {
 
 TEST(convolution_f32_fw_gpu, with_output_size_same_input) {
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 320, 320 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 64, 4, 7, 7 } });
@@ -275,7 +276,7 @@ TEST(convolution_f32_fw_gpu, with_output_size_same_input) {
         data("weights2", weights2),
         convolution::create_with_output_size("conv1", "input", { "weights" }, {1, 64, 160, 160}, {1, 1, 2, 2}, {0, 0, -3, -3}),
         convolution::create_with_output_size("conv2", "input", { "weights2" }, {1, 64, 320, 320}, {1, 1, 1, 1}, {0, 0, -3, -3})
-    );
+        );
 
     network network(engine, topology);
     network.set_input_data("input", input);
@@ -294,16 +295,16 @@ TEST(convolution_f32_fw_gpu, three_convolutions_same_weights) {
     //  Input:
     //  1  1   1  1
     //  1  1   1  1
-    //  
+    //
     //  Filter:
-    //  1 
-    //  
+    //  1
+    //
     //  Output:
     //  8  8   8  8
     //  8  8   8  8
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {1,2,2,2} });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 2,2,1,1 } });
@@ -373,7 +374,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution) {
     //  Bias:
     //  1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 5, 4 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 2 } });
@@ -421,7 +422,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution) {
 
 TEST(convolution_f32_fw_gpu, basic_convolution_bfyx_weights_as_input_layout) {
     //Same params as convolution_f32_fw_gpu, basic_convolution but with bfyx optimized data and weights set as input_layout
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,
     { 1, 1, 5, 4 }
     });
@@ -518,7 +519,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) {
     //  Bias:
     //  1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 3 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
@@ -588,6 +589,422 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_padding) {
     //print_2d(temp_vec);
 }
 
+TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding) {
+    //  Filter : 2x2
+    //  Stride : 1x1
+    //  Input  : 3x4
+    //  Input padding : above 2x1, below 2x1
+    //  Output : 6x5
+    //  Padding: Zero
+    //
+    //  Input:
+    //  z  z  z  z  z  z
+    //  z  z  z  z  z  z
+    //  z  1  2  3  4  z
+    //  z  2  2  3  4  z
+    //  z  3  3  3  5  z
+    //  z  z  z  z  z  z
+    //  z  z  z  z  z  z
+    //
+    //  Filter:
+    //  1  1
+    //  1  1
+    //
+    //  Output:
+    //  1  1  1  1  1
+    //  2  4  6  8  5
+    //  4  8 11 15  9
+    //  6 11 12 16 10
+    //  4  7  7  9  6
+    //  1  1  1  1  1
+    //
+    //  Bias:
+    //  1
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
+    set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
+    set_values(biases, { 1.0f });
+    VVF<float> output_vec = {
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 2.0f, 4.0f, 6.0f, 8.0f, 5.0f },
+        { 4.0f, 8.0f, 11.0f, 15.0f, 9.0f },
+        { 6.0f, 11.0f, 12.0f, 16.0f, 10.0f },
+        { 4.0f, 7.0f, 7.0f, 9.0f, 6.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        data("biases", biases),
+        convolution(
+            "conv",
+            "input",
+            { "weights" },
+            { "biases" },
+            { 1,1,1,1 },
+            { 0,0,0,0 },
+            { 1, 1, 1, 1 },
+            { 0,0,1,2 },
+            { 0,0,1,2 },
+            false,
+            0,
+            padding{ { 0,0,0,0 }, 0 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "conv");
+
+    auto output_memory = outputs.at("conv").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<float>();
+
+    int y_size = output_layout.size.spatial[1];
+    int x_size = output_layout.size.spatial[0];
+    int f_size = output_layout.size.feature[0];
+    int b_size = output_layout.size.batch[0];
+    EXPECT_EQ(output_layout.format, format::yxfb);
+    EXPECT_EQ(y_size, 6);
+    EXPECT_EQ(x_size, 5);
+    EXPECT_EQ(f_size, 1);
+    EXPECT_EQ(b_size, 1);
+
+    for (int y = 0; y < y_size; ++y) {
+        for (int x = 0; x < x_size; ++x) {
+            EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
+        }
+    }
+}
+
+TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding) {
+    //  Filter : 2x2
+    //  Stride : 1x1
+    //  Input  : 3x4
+    //  Input padding : above 2x1, below 3x2
+    //  Output : 7x6
+    //  Padding: Zero
+    //
+    //  Input:
+    //  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z
+    //  z  1  2  3  4  z  z
+    //  z  2  2  3  4  z  z
+    //  z  3  3  3  5  z  z
+    //  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z
+    //
+    //  Filter:
+    //  1  1
+    //  1  1
+    //
+    //  Output:
+    //  1  1  1  1  1  1
+    //  2  4  6  8  5  1
+    //  4  8 11 15  9  1
+    //  6 11 12 16 10  1
+    //  4  7  7  9  6  1
+    //  1  1  1  1  1  1
+    //  1  1  1  1  1  1
+    //
+    //  Bias:
+    //  1
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
+    set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
+    set_values(biases, { 1.0f });
+    VVF<float> output_vec = {
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 2.0f, 4.0f, 6.0f, 8.0f, 5.0f, 1.0f },
+        { 4.0f, 8.0f, 11.0f, 15.0f, 9.0f, 1.0f },
+        { 6.0f, 11.0f, 12.0f, 16.0f, 10.0f, 1.0f },
+        { 4.0f, 7.0f, 7.0f, 9.0f, 6.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        data("biases", biases),
+        convolution(
+            "conv",
+            "input",
+            { "weights" },
+            { "biases" },
+            { 1,1,1,1 },
+            { 0,0,0,0 },
+            { 1, 1, 1, 1 },
+            { 0,0,1,2 },
+            { 0,0,2,3 },
+            false,
+            0,
+            padding{ { 0,0,0,0 }, 0 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "conv");
+
+    auto output_memory = outputs.at("conv").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<float>();
+
+    int y_size = output_layout.size.spatial[1];
+    int x_size = output_layout.size.spatial[0];
+    int f_size = output_layout.size.feature[0];
+    int b_size = output_layout.size.batch[0];
+    EXPECT_EQ(output_layout.format, format::yxfb);
+    EXPECT_EQ(y_size, 7);
+    EXPECT_EQ(x_size, 6);
+    EXPECT_EQ(f_size, 1);
+    EXPECT_EQ(b_size, 1);
+
+    for (int y = 0; y < y_size; ++y) {
+        for (int x = 0; x < x_size; ++x) {
+            EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
+        }
+    }
+}
+
+TEST(convolution_f32_fw_gpu, basic_convolution_sym_input_padding_with_input_offset) {
+    //  Filter : 2x2
+    //  Stride : 1x1
+    //  Input  : 3x4
+    //  Input padding : above 2x1, below 2x1
+    //  Input offset: 2x1
+    //  Output : 10x7
+    //  Padding: Zero
+    //
+    //  Input:
+    //  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z
+    //  z  z  1  2  3  4  z  z
+    //  z  z  2  2  3  4  z  z
+    //  z  z  3  3  3  5  z  z
+    //  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z
+    //
+    //  Filter:
+    //  1  1
+    //  1  1
+    //
+    //  Output:
+    //  1  1  1  1  1  1  1
+    //  1  1  1  1  1  1  1
+    //  1  1  1  1  1  1  1
+    //  1  2  4  6  8  5  1
+    //  1  4  8 11 15  9  1
+    //  1  6 11 12 16 10  1
+    //  1  4  7  7  9  6  1
+    //  1  1  1  1  1  1  1
+    //  1  1  1  1  1  1  1
+    //  1  1  1  1  1  1  1
+    //
+    //  Bias:
+    //  1
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
+    set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
+    set_values(biases, { 1.0f });
+    VVF<float> output_vec = {
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 2.0f, 4.0f, 6.0f, 8.0f, 5.0f, 1.0f },
+        { 1.0f, 4.0f, 8.0f, 11.0f, 15.0f, 9.0f, 1.0f },
+        { 1.0f, 6.0f, 11.0f, 12.0f, 16.0f, 10.0f, 1.0f },
+        { 1.0f, 4.0f, 7.0f, 7.0f, 9.0f, 6.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        data("biases", biases),
+        convolution(
+            "conv",
+            "input",
+            { "weights" },
+            { "biases" },
+            { 1,1,1,1 },
+            { 0,0,-1,-2 },
+            { 1, 1, 1, 1 },
+            { 0,0,1,2 },
+            { 0,0,1,2 },
+            false,
+            0,
+            padding{ { 0,0,0,0 }, 0 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "conv");
+
+    auto output_memory = outputs.at("conv").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<float>();
+
+    int y_size = output_layout.size.spatial[1];
+    int x_size = output_layout.size.spatial[0];
+    int f_size = output_layout.size.feature[0];
+    int b_size = output_layout.size.batch[0];
+    EXPECT_EQ(output_layout.format, format::yxfb);
+    EXPECT_EQ(y_size, 10);
+    EXPECT_EQ(x_size, 7);
+    EXPECT_EQ(f_size, 1);
+    EXPECT_EQ(b_size, 1);
+
+    for (int y = 0; y < y_size; ++y) {
+        for (int x = 0; x < x_size; ++x) {
+            EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
+        }
+    }
+}
+
+TEST(convolution_f32_fw_gpu, basic_convolution_asym_input_padding_with_input_offset) {
+    //  Filter : 2x2
+    //  Stride : 1x1
+    //  Input  : 3x4
+    //  Input padding : above 2x1, below 3x2
+    //  Input offset: 2x1
+    //  Output : 11x8
+    //  Padding: Zero
+    //
+    //  Input:
+    //  z  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z  z
+    //  z  z  1  2  3  4  z  z  z
+    //  z  z  2  2  3  4  z  z  z
+    //  z  z  3  3  3  5  z  z  z
+    //  z  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z  z
+    //  z  z  z  z  z  z  z  z  z
+    //
+    //  Filter:
+    //  1  1
+    //  1  1
+    //
+    //  Output:
+    //  1  1  1  1  1  1  1  1
+    //  1  1  1  1  1  1  1  1
+    //  1  1  1  1  1  1  1  1
+    //  1  2  4  6  8  5  1  1
+    //  1  4  8 11 15  9  1  1
+    //  1  6 11 12 16 10  1  1
+    //  1  4  7  7  9  6  1  1
+    //  1  1  1  1  1  1  1  1
+    //  1  1  1  1  1  1  1  1
+    //  1  1  1  1  1  1  1  1
+    //  1  1  1  1  1  1  1  1
+    //
+    //  Bias:
+    //  1
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 4, 3 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    set_values(input, { 1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 2.0f, 3.0f, 4.0f, 3.0f, 3.0f, 3.0f, 5.0f });
+    set_values(weights, { 1.0f, 1.0f, 1.0f, 1.0f });
+    set_values(biases, { 1.0f });
+    VVF<float> output_vec = {
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 2.0f, 4.0f, 6.0f, 8.0f, 5.0f, 1.0f, 1.0f },
+        { 1.0f, 4.0f, 8.0f, 11.0f, 15.0f, 9.0f, 1.0f, 1.0f },
+        { 1.0f, 6.0f, 11.0f, 12.0f, 16.0f, 10.0f, 1.0f, 1.0f },
+        { 1.0f, 4.0f, 7.0f, 7.0f, 9.0f, 6.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f },
+        { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f } };
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        data("biases", biases),
+        convolution(
+            "conv",
+            "input",
+            { "weights" },
+            { "biases" },
+            { 1,1,1,1 },
+            { 0,0,-1,-2 },
+            { 1, 1, 1, 1 },
+            { 0,0,1,2 },
+            { 0,0,2,3 },
+            false,
+            0,
+            padding{ { 0,0,0,0 }, 0 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "conv");
+
+    auto output_memory = outputs.at("conv").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<float>();
+
+    int y_size = output_layout.size.spatial[1];
+    int x_size = output_layout.size.spatial[0];
+    int f_size = output_layout.size.feature[0];
+    int b_size = output_layout.size.batch[0];
+    EXPECT_EQ(output_layout.format, format::yxfb);
+    EXPECT_EQ(y_size, 11);
+    EXPECT_EQ(x_size, 8);
+    EXPECT_EQ(f_size, 1);
+    EXPECT_EQ(b_size, 1);
+
+    for (int y = 0; y < y_size; ++y) {
+        for (int x = 0; x < x_size; ++x) {
+            EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
+        }
+    }
+}
+
 TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) {
     //  Filter : 2x2
     //  Stride : 1x1
@@ -622,7 +1039,7 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) {
     //  Bias:
     //  1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 3 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
@@ -682,9 +1099,9 @@ TEST(convolution_f32_fw_gpu, basic_convolution_input_and_output_padding) {
     EXPECT_EQ(f_size, 1);
     EXPECT_EQ(b_size, 1);
 
-    for (int y = y_pad; y < y_size - y_pad; ++y) 
+    for (int y = y_pad; y < y_size - y_pad; ++y)
     {
-        for (int x = x_pad; x < x_size - x_pad; ++x) 
+        for (int x = x_pad; x < x_size - x_pad; ++x)
         {
             EXPECT_EQ(output_vec[y][x], output_ptr[y * x_size + x]);
         }
@@ -737,7 +1154,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad_random) {
     }
     VF<float> output_rnd_vec = flatten_4d<float>(format::yxfb, output_rnd);
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32,  format::yxfb, { 1, 1, 4, 4 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } });
@@ -807,7 +1224,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad_random) {
     }
     VF<float> output_rnd_vec = flatten_4d<float>(format::yxfb, output_rnd);
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } });
@@ -865,7 +1282,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x1x1_nopad) {
     //  8  0.5
     //  6  9
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 1 } });
@@ -919,7 +1336,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in2x2x1x2_nopad) {
     //
     //  Output:
     //  3.65 -5.36
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 1, 1 }, 1 } });
@@ -971,7 +1388,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz2x1x2x1_in1x2x1_nopad) {
     //   5.1  f=0
     //  -5.2  f=1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 2 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 2 } });
@@ -1030,7 +1447,7 @@ TEST(convolution_f32_fw_gpu, basic_ofm_wsiz3x2x2x1_in2x2x1_nopad) {
     //   64,0  f=1
     //  103.0  f=2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 2 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } });
@@ -1086,7 +1503,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2x1x3_wstr2x2_in2x2x1x1_nopad) {
     //   2.12
     //   3.08
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 1, 1 }, 3 } });
@@ -1142,7 +1559,7 @@ TEST(convolution_f32_fw_gpu, wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
     //
     //  Output:
     //  12.25
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 1 } });
@@ -1199,7 +1616,7 @@ TEST(convolution_f32_fw_gpu, offsets_wsiz3x3_wstr2x2_in2x2x1x1_zeropad) {
     //   Output:
     //   rnd   rnd
     //   rnd   2.0
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
@@ -1276,7 +1693,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2) {
     //   8  3.65 0.5 -5.36
     //   6  3.65 9   -5.36
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 4, 4 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 2, 2 }, 2 } });
@@ -1379,7 +1796,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) {
     //   8  8 3.65 3.65 0.5  0.5 -5.36 -5.36
     //   6  6 3.65 3.65 9    9   -5.36 -5.36
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 4, 4 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 2,{ 2, 2 }, 2 } });
@@ -1444,11 +1861,190 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2) {
     EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 15));
 }
 
+TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2) {
+    //  data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 4, 4 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+
+    set_values(input, {
+        -0.5f,  0.5f,  1.0f,  1.5f,  0.5f,  2.3f,  2.0f, -0.4f,
+        1.5f,  2.0f, -0.5f, -4.0f,  0.0f,  1.0f, -1.0f,  3.0f,
+        0.5f,  0.5f,  0.5f,  1.5f, -1.0f,  2.3f,  1.0f, -0.4f,
+        0.5f,  2.0f,  2.0f, -4.0f,  1.5f,  1.0f, -0.5f,  3.0f
+    });
+    set_values(weights, {
+        -2.0f, 0.5f, 3.5f, 1.5f,
+        -1.2f, 1.5f, 0.5f, -0.5f
+    });
+    set_values(biases, { 2.0f, -1.0f });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        data("biases", biases),
+        convolution(
+            "conv",
+            "input",
+            { "weights" },
+            { "biases" },
+            2, // number of groups
+            { 0,0,2,2 },
+            { 0,0,0,0 },
+            { 1,1,1,1 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "conv");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    auto output_ptr = output_prim.pointer<float>();
+
+    EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 0));
+    EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 1));
+    EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 2));
+    EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 3));
+    EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 4));
+    EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 5));
+    EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 6));
+    EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 7));
+}
+
+TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_group2_bfyx) {
+    //  data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x1_nopad_split2
+
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 2, 4, 4 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+
+    set_values(input, {
+        -0.5f,  0.5f,  1.0f,  1.5f,  0.5f,  2.3f,  2.0f, -0.4f,
+        1.5f,  2.0f, -0.5f, -4.0f,  0.0f,  1.0f, -1.0f,  3.0f,
+        0.5f,  0.5f,  0.5f,  1.5f, -1.0f,  2.3f,  1.0f, -0.4f,
+        0.5f,  2.0f,  2.0f, -4.0f,  1.5f,  1.0f, -0.5f,  3.0f
+    });
+    set_values(weights, {
+        -2.0f, 0.5f, 3.5f, 1.5f,
+        -1.2f, 1.5f, 0.5f, -0.5f
+    });
+    set_values(biases, { 2.0f, -1.0f });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        reorder("input_1", "input", { data_types::f32,format::bfyx,{ 1, 2, 4, 4 } }),
+        data("weights", weights),
+        data("biases", biases),
+        convolution(
+            "conv",
+            "input_1",
+            { "weights" },
+            { "biases" },
+            2, // number of groups
+            { 0,0,2,2 },
+            { 0,0,0,0 },
+            { 1,1,1,1 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "conv");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    auto output_ptr = output_prim.pointer<float>();
+
+    EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 0));
+    EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 1));
+    EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 2));
+    EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 3));
+    EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 4));
+    EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 5));
+    EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 6));
+    EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 7));
+}
+
+TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group2) {
+    //  data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2
+
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 4, 4 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+
+    set_values(input, {
+        -0.5f, -0.5f,  0.5f,  0.5f,  1.0f,  1.0f,  1.5f,  1.5f,  0.5f,  0.5f,  2.3f,  2.3f,  2.0f,  2.0f, -0.4f, -0.4f,
+        1.5f,  1.5f,  2.0f,  2.0f, -0.5f, -0.5f, -4.0f, -4.0f,  0.0f,  0.0f,  1.0f,  1.0f, -1.0f, -1.0f,  3.0f,  3.0f,
+        0.5f,  0.5f,  0.5f,  0.5f,  0.5f,  0.5f,  1.5f,  1.5f, -1.0f, -1.0f,  2.3f,  2.3f,  1.0f,  1.0f, -0.4f, -0.4f,
+        0.5f,  0.5f,  2.0f,  2.0f,  2.0f,  2.0f, -4.0f, -4.0f,  1.5f,  1.5f,  1.0f,  1.0f, -0.5f, -0.5f,  3.0f,  3.0f,
+    });
+    set_values(weights, {
+        -2.0f, 0.5f, 3.5f, 1.5f,
+        -1.2f, 1.5f, 0.5f, -0.5f
+    });
+    set_values(biases, { 2.0f, -1.0f });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        data("biases", biases),
+        convolution(
+            "conv",
+            "input",
+            { "weights" },
+            { "biases" },
+            2, // number of groups
+            { 1,1,2,2 },
+            { 0,0,0,0 },
+            { 1,1,1,1 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "conv");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    auto output_ptr = output_prim.pointer<float>();
+
+    EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 0));
+    EXPECT_FLOAT_EQ(8.0f, get_value<float>(output_ptr, 1));
+    EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 2));
+    EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 3));
+    EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 4));
+    EXPECT_FLOAT_EQ(0.5f, get_value<float>(output_ptr, 5));
+    EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 6));
+    EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 7));
+    EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 8));
+    EXPECT_FLOAT_EQ(6.0f, get_value<float>(output_ptr, 9));
+    EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 10));
+    EXPECT_FLOAT_EQ(3.65f, get_value<float>(output_ptr, 11));
+    EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 12));
+    EXPECT_FLOAT_EQ(9.0f, get_value<float>(output_ptr, 13));
+    EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 14));
+    EXPECT_FLOAT_EQ(-5.36f, get_value<float>(output_ptr, 15));
+}
+
 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt) {
     //  Test for depthwise separable optimization, there are 16 weights and biases (split 16)
     //  data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 but with batch 1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 16, 4, 4 } });
 
@@ -1545,7 +2141,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw
 TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt_bfyx) {
     //  Test for depthwise separable optimization, there are 16 weights and biases (split 16)
     //  data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2 but with batch 1
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 16, 4, 4 } });
 
@@ -1643,6 +2239,200 @@ TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthw
     }
 }
 
+TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16) {
+    //  Test for grouped convolution, there are 16 joined weights and biases (group 16)
+    //  data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt
+
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 16, 4, 4 } });
+
+    set_values(input, {
+        -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f, -0.5f, -0.5f,  0.5f,  0.5f,
+        1.0f,  1.0f,  1.5f,  1.5f, 1.0f,  1.0f,  1.5f,  1.5f, 1.0f,  1.0f,  1.5f,  1.5f, 1.0f,  1.0f,  1.5f,  1.5f, 1.0f,  1.0f,  1.5f,  1.5f, 1.0f,  1.0f,  1.5f,  1.5f, 1.0f,  1.0f,  1.5f,  1.5f, 1.0f,  1.0f,  1.5f,  1.5f,
+        0.5f,  0.5f,  2.3f,  2.3f, 0.5f,  0.5f,  2.3f,  2.3f, 0.5f,  0.5f,  2.3f,  2.3f, 0.5f,  0.5f,  2.3f,  2.3f, 0.5f,  0.5f,  2.3f,  2.3f, 0.5f,  0.5f,  2.3f,  2.3f, 0.5f,  0.5f,  2.3f,  2.3f, 0.5f,  0.5f,  2.3f,  2.3f,
+        2.0f,  2.0f, -0.4f, -0.4f, 2.0f,  2.0f, -0.4f, -0.4f, 2.0f,  2.0f, -0.4f, -0.4f, 2.0f,  2.0f, -0.4f, -0.4f, 2.0f,  2.0f, -0.4f, -0.4f, 2.0f,  2.0f, -0.4f, -0.4f, 2.0f,  2.0f, -0.4f, -0.4f, 2.0f,  2.0f, -0.4f, -0.4f,
+        1.5f,  1.5f,  2.0f,  2.0f, 1.5f,  1.5f,  2.0f,  2.0f, 1.5f,  1.5f,  2.0f,  2.0f, 1.5f,  1.5f,  2.0f,  2.0f, 1.5f,  1.5f,  2.0f,  2.0f, 1.5f,  1.5f,  2.0f,  2.0f, 1.5f,  1.5f,  2.0f,  2.0f, 1.5f,  1.5f,  2.0f,  2.0f,
+        -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f, -0.5f, -0.5f, -4.0f, -4.0f,
+        0.0f,  0.0f,  1.0f,  1.0f, 0.0f,  0.0f,  1.0f,  1.0f, 0.0f,  0.0f,  1.0f,  1.0f, 0.0f,  0.0f,  1.0f,  1.0f, 0.0f,  0.0f,  1.0f,  1.0f, 0.0f,  0.0f,  1.0f,  1.0f, 0.0f,  0.0f,  1.0f,  1.0f, 0.0f,  0.0f,  1.0f,  1.0f,
+        -1.0f, -1.0f,  3.0f,  3.0f, -1.0f, -1.0f,  3.0f,  3.0f, -1.0f, -1.0f,  3.0f,  3.0f, -1.0f, -1.0f,  3.0f,  3.0f, -1.0f, -1.0f,  3.0f,  3.0f, -1.0f, -1.0f,  3.0f,  3.0f, -1.0f, -1.0f,  3.0f,  3.0f, -1.0f, -1.0f,  3.0f,  3.0f,
+        0.5f,  0.5f,  0.5f,  0.5f, 0.5f,  0.5f,  0.5f,  0.5f, 0.5f,  0.5f,  0.5f,  0.5f, 0.5f,  0.5f,  0.5f,  0.5f, 0.5f,  0.5f,  0.5f,  0.5f, 0.5f,  0.5f,  0.5f,  0.5f, 0.5f,  0.5f,  0.5f,  0.5f, 0.5f,  0.5f,  0.5f,  0.5f,
+        0.5f,  0.5f,  1.5f,  1.5f, 0.5f,  0.5f,  1.5f,  1.5f, 0.5f,  0.5f,  1.5f,  1.5f, 0.5f,  0.5f,  1.5f,  1.5f, 0.5f,  0.5f,  1.5f,  1.5f, 0.5f,  0.5f,  1.5f,  1.5f, 0.5f,  0.5f,  1.5f,  1.5f, 0.5f,  0.5f,  1.5f,  1.5f,
+        -1.0f, -1.0f,  2.3f,  2.3f, -1.0f, -1.0f,  2.3f,  2.3f, -1.0f, -1.0f,  2.3f,  2.3f, -1.0f, -1.0f,  2.3f,  2.3f, -1.0f, -1.0f,  2.3f,  2.3f, -1.0f, -1.0f,  2.3f,  2.3f, -1.0f, -1.0f,  2.3f,  2.3f, -1.0f, -1.0f,  2.3f,  2.3f,
+        1.0f,  1.0f, -0.4f, -0.4f, 1.0f,  1.0f, -0.4f, -0.4f, 1.0f,  1.0f, -0.4f, -0.4f, 1.0f,  1.0f, -0.4f, -0.4f, 1.0f,  1.0f, -0.4f, -0.4f, 1.0f,  1.0f, -0.4f, -0.4f, 1.0f,  1.0f, -0.4f, -0.4f, 1.0f,  1.0f, -0.4f, -0.4f,
+        0.5f,  0.5f,  2.0f,  2.0f, 0.5f,  0.5f,  2.0f,  2.0f, 0.5f,  0.5f,  2.0f,  2.0f, 0.5f,  0.5f,  2.0f,  2.0f, 0.5f,  0.5f,  2.0f,  2.0f, 0.5f,  0.5f,  2.0f,  2.0f, 0.5f,  0.5f,  2.0f,  2.0f, 0.5f,  0.5f,  2.0f,  2.0f,
+        2.0f,  2.0f, -4.0f, -4.0f, 2.0f,  2.0f, -4.0f, -4.0f, 2.0f,  2.0f, -4.0f, -4.0f, 2.0f,  2.0f, -4.0f, -4.0f, 2.0f,  2.0f, -4.0f, -4.0f, 2.0f,  2.0f, -4.0f, -4.0f, 2.0f,  2.0f, -4.0f, -4.0f, 2.0f,  2.0f, -4.0f, -4.0f,
+        1.5f,  1.5f,  1.0f,  1.0f, 1.5f,  1.5f,  1.0f,  1.0f, 1.5f,  1.5f,  1.0f,  1.0f, 1.5f,  1.5f,  1.0f,  1.0f, 1.5f,  1.5f,  1.0f,  1.0f, 1.5f,  1.5f,  1.0f,  1.0f, 1.5f,  1.5f,  1.0f,  1.0f, 1.5f,  1.5f,  1.0f,  1.0f,
+        -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f, -0.5f, -0.5f,  3.0f,  3.0f,
+    });
+
+    topology topology(input_layout("input", input.get_layout()));
+
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 16, 1 } });
+
+    set_values(weights,
+        {
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f
+        }
+    );
+    set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f});
+
+    topology.add(
+        data("weights", weights),
+        data("bias", biases)
+    );
+
+    topology.add(
+        convolution(
+            "conv",
+            "input",
+            { "weights" },
+            { "bias" },
+            16,
+            { 1,1,2,2 },
+            { 0,0,0,0 },
+            { 1,1,1,1 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "conv");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    auto output_ptr = output_prim.pointer<float>();
+
+    std::vector<float> expected_output_vec = {
+        8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f, 8.0f, 8.0f, 3.65f, 3.65f,
+        0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f, 0.5f, 0.5f, -5.36f, -5.36f,
+        6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f, 6.0f, 6.0f, 3.65f, 3.65f,
+        9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f, 9.0f, 9.0f, -5.36f, -5.36f,
+    };
+
+    for (unsigned int i = 0; i < expected_output_vec.size(); i++)
+    {
+        EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
+    }
+}
+
+TEST(convolution_f32_fw_gpu, basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_group16_bfyx) {
+    //  Test for grouped convolution, there are 16 joined weights and biases (group 16)
+    //  data is similar as in basic_wsiz2x2_wstr2x2_in4x4x2x2_nopad_split2_depthwise_sep_opt_bfyx
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 16, 4, 4 } });
+
+    set_values(input, {
+        -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
+        0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
+        -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
+        0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
+        -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
+        0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
+        -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
+        0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
+        -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
+        0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
+        -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
+        0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
+        -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
+        0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
+        -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f, 0.5f, -1.0f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f,
+        0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f, 0.5f, 1.5f, 2.3f, -0.4f, 2.0f, -4.0f, 1.0f, 3.0f,
+    });
+
+    topology topology(input_layout("input", input.get_layout()));
+
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 16, 1 } });
+
+    set_values(weights,
+        {
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f,
+            -2.0f, 0.5f, 3.5f, 1.5f,
+            -1.2f, 1.5f, 0.5f, -0.5f
+        }
+    );
+
+    set_values(biases, { 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f, 2.0f, -1.0f});
+
+    topology.add(
+            data("weights", weights),
+            data("bias", biases)
+    );
+
+    topology.add(
+        convolution(
+            "conv",
+            "input",
+            { "weights" },
+            { "bias" },
+            16,
+            { 1,1,2,2 },
+            { 0,0,0,0 },
+            { 1,1,1,1 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "conv");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    auto output_ptr = output_prim.pointer<float>();
+
+    std::vector<float> expected_output_vec = {
+        8.0f, 0.5f,  6.0f,  9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
+        8.0f, 0.5f,  6.0f,  9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
+        8.0f, 0.5f,  6.0f,  9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
+        8.0f, 0.5f,  6.0f,  9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
+        8.0f, 0.5f,  6.0f,  9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
+        8.0f, 0.5f,  6.0f,  9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
+        8.0f, 0.5f,  6.0f,  9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
+        8.0f, 0.5f,  6.0f,  9.0f, 3.65f,-5.36f, 3.65f, -5.36f,
+    };
+
+    for (unsigned int i = 0; i < expected_output_vec.size(); i++)
+    {
+        EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
+    }
+}
+
 TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_nopad_split2) {
     //  Filter : 1x1
     //  Stride : 2x2
@@ -1659,7 +2449,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_nopad_split2) {
     //
     //  Filter1:
     //  -2 -0.5  ofm=0
-    //   1  2    ofm=1 
+    //   1  2    ofm=1
     //  Bias1:
     //   1  5
     //
@@ -1671,13 +2461,13 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_nopad_split2) {
     //  -1  2.5
     //
     //  Output:
-    //  -2.25  
+    //  -2.25
     //   7.5
     //
     //  -1.75
     //   2.25
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 4, 1, 1 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } });
@@ -1740,7 +2530,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) {
     //
     //  Filter1:
     //  -2  ofm=0
-    //   1  ofm=1 
+    //   1  ofm=1
     //  Bias1:
     //   1  5
     //
@@ -1752,14 +2542,14 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x2x1_nopad_split2) {
     //  -1  2.5
     //
     //  Output:
-    //  -2  
+    //  -2
     //   6.5
     //
     //   1
     //   3.5
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 1, 1 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 4 } });
@@ -1838,7 +2628,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no
     //  -1   2.5 2
     //
     //  Output:
-    //  -1.5  
+    //  -1.5
     //   8
     //   7.75
     //
@@ -1847,7 +2637,7 @@ TEST(convolution_f32_fw_gpu, basic_wsiz1x1_wstr2x2_in1x1x4x1_filter_1x3x2x1x1_no
     //  -2
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 4, 1, 1 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1,{ 1, 1 }, 6 } });
@@ -1924,7 +2714,7 @@ TEST(convolution_gpu, trivial_convolution_relu) {
     //  4  0.0
     //  2  5
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
@@ -1998,7 +2788,7 @@ TEST(convolution_gpu, relu_with_negative_slope) {
     //  4  -0.35
     //  2  5
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
     //auto output = memory::allocate({ memory::format::yxfb_f32,{ 1 ,{ 2, 2 }, 1 } });
@@ -2049,7 +2839,7 @@ TEST(convolution_gpu, relu_with_negative_slope) {
 
 TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) {
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     extern const std::vector<float> conv_1x1_output;
 
@@ -2091,7 +2881,7 @@ TEST(convolution_gpu, DISABLED_two_1x1_kernels_after_each_other) {
 
     auto output_ptr = output_prim.pointer<float>();
     auto output_layout = output_prim.get_layout();
-    
+
     int y_size = output_layout.size.spatial[1];
     int x_size = output_layout.size.spatial[0];
     int f_size = output_layout.size.feature[0];
@@ -2140,7 +2930,7 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp32)
     const int32_t output_x = (input_x - weights_x) / stride_x + 1;
     const int32_t output_y = (input_y - weights_y) / stride_y + 1;
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_size = tensor( batch_size, input_feature_count, input_x, input_y );
     auto input = memory::allocate(engine, { data_types::f32, input_format, input_size });
@@ -2311,7 +3101,7 @@ void quantize_weights(cldnn::memory& weights, cldnn::memory& w_qf)
         for (int w = 0; w < batch_pitch; w++)
             if (max < abs(ptr[ofm* batch_pitch + w]))
                 max = abs(ptr[ofm* batch_pitch + w]);
-       
+
         if (max == (T)0)
             max = (T)1; // do not quantize
 
@@ -2429,7 +3219,7 @@ TEST(convolution_f32_fw_gpu, byte_activation) {
     engine_configuration eng_conf(false, false, false, "", "", true, "", "kernels");
     engine engine{ eng_conf };
     auto input = memory::allocate(engine, { data_types::i8, format::bfyx,{ 1, 1, 5, 4 } });
-       
+
     VVVF<char> output_vec = {
         {
             { 11, 0, 15 },
@@ -2505,7 +3295,7 @@ TEST(convolution_f32_fw_gpu, quantized_convolution_low_prec_single_ofq) {
     //  Bias:
     //  1 -8
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
     auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
@@ -2518,11 +3308,11 @@ TEST(convolution_f32_fw_gpu, quantized_convolution_low_prec_single_ofq) {
 
     set_values(biases, { 1.0f, -8.0f });
     VVVF<float> output_vec = {
-        { 
+        {
             { 21.0f, 28.0f, 39.0f },
             { 18.0f, 20.0f, 20.0f }
         },
-        { 
+        {
             { 155.0f, 245.0f, 348.0f },
             { 142.0f, 140.0f, 178.0f }
         } };
@@ -2546,7 +3336,7 @@ TEST(convolution_f32_fw_gpu, quantized_convolution_low_prec_single_ofq) {
 
     auto input = memory::allocate(engine, { data_types::i8, format::bfyx,{ 1, 1, 5, 4 } });
     auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 2 } });
-    float i_qf = 1.0f; 
+    float i_qf = 1.0f;
     float o_qf = 127.0f / max_abs<float>(output_memory_f);
 
     std::vector<char> weights_values = { 1, 2, 1, 2, 1, 2, 19, 17, -1, -10, 32, 23 };
@@ -2618,7 +3408,7 @@ TEST(convolution_f32_fw_gpu, quantized_convolution_high_prec_calib_per_ofm) {
     //
     //  Bias:
     //  1 -8
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
     auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
@@ -2656,10 +3446,10 @@ TEST(convolution_f32_fw_gpu, quantized_convolution_high_prec_calib_per_ofm) {
 
     auto output_memory_f = outputs_f.at("conv_f").get_memory();
     auto output_ptr_f = output_memory_f.pointer<float>();
-    
+
     auto input = memory::allocate(engine, { data_types::i8, format::bfyx,{ 1, 1, 5, 4 } });
     auto weights = memory::allocate(engine, { data_types::i8, format::bfyx,{ 2, 1, 3, 2 } });
-    float i_qf = 1.0f; 
+    float i_qf = 1.0f;
 
     std::vector<char> weights_values = { 1, 2, 1, 2, 1, 2, 19, 17, -1, -10, 32, 23 };
     set_values<char>(input, { 1, 2, 3, 4, 5, 2, 2, 3, 4, 6, 3, 3, 3, 5, 1, 1, 1, 1, 1, 1 });
@@ -2751,7 +3541,7 @@ TEST(convolution_f32_fw_gpu, calibration_advance) {
     //  Bias2:
     //  2  4  0
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 4 } });
     auto weights_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
@@ -2762,10 +3552,10 @@ TEST(convolution_f32_fw_gpu, calibration_advance) {
     auto w_qf_2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
 
     std::vector<float> weights_values_f = { 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.9f, 1.7f, -1.0f, -1.0f, 3.2f, 2.3f };
-    std::vector<float> weights_values_f_2 = { 
+    std::vector<float> weights_values_f_2 = {
         1.5f, 2.3f, -1.0f, 3.0f, 5.6f, -1.0f,
         3.0f, 5.6f, -1.0f, 1.0f, 2.0f, 3.0f,
-        
+
         1.9f, 1.7f, -1.0f, 1.9f, 1.7f, -1.0f,
         -1.0f, 3.2f, 2.3f, -1.0f, 3.2f, 2.3f,
 
@@ -2835,19 +3625,380 @@ TEST(convolution_f32_fw_gpu, calibration_advance) {
     auto o_qf = output_calibrations_2.pointer<float>();
 
     for (int f = 0; f < out_size.feature[0]; f++)
-        for (int y = 0; y < out_size.spatial[1]; ++y) {
-            for (int x = 0; x < out_size.spatial[0]; ++x) {
-                EXPECT_NEAR(ref_ptr[x + out_size.spatial[0] * (y + out_size.spatial[1]*f)], ((float)test_ptr[x + out_size.spatial[0] * (y + out_size.spatial[1] * f)]) / o_qf[f], 3.0f);
+    {
+        for (int y = 0; y < out_size.spatial[1]; ++y)
+        {
+            for (int x = 0; x < out_size.spatial[0]; ++x)
+            {
+                EXPECT_NEAR(ref_ptr[x + out_size.spatial[0]
+                    * (y + out_size.spatial[1] * f)], ((float)test_ptr[x + out_size.spatial[0]
+                        * (y + out_size.spatial[1] * f)]) / o_qf[f], 3.0f);
             }
         }
+    }
+
+}
+TEST(convolution_f32_fw_gpu, local_basic) {
+    //  Filter : 3x3x2x2 - local sizes
+    //  Stride : 1x1
+    //  Input  : 4x4
+    //  Output : 3x3
+    //
+    //  Input:
+    //  1  1  1  1
+    //  1  1  1  1
+    //  2  2  2  2
+    //  2  2  2  2
+    //
+    //
+    //  Filter:
+    //  0 0  1 1  2 2
+    //  0 0  1 1  2 2
+    //
+    //  3 3  4 4  5 5
+    //  3 3  4 4  5 5
+    //
+    //  6 6  7 7  8 8
+    //  6 6  7 7  8 8
+    //
+    //  Output:
+    //  0  4  8
+    // 18 24 30
+    // 48 56 64
+    //
+
+    const auto& engine = get_test_engine();
+    tensor local_size = tensor(1,1,2,2,3,3);
+    auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 4 } });
+    auto weights_f = memory::allocate(engine, { data_types::f32, format::bf_lyx_yx, local_size });
+    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    std::vector<float> weights_values_f = {
+        0.0, 0.0, 0.0, 0.0,
+        1.0, 1.0, 1.0, 1.0,
+        2.0, 2.0, 2.0, 2.0,
 
+        3.0, 3.0, 3.0, 3.0,
+        4.0, 4.0, 4.0, 4.0,
+        5.0, 5.0, 5.0, 5.0,
+
+        6.0, 6.0, 6.0, 6.0,
+        7.0, 7.0, 7.0, 7.0,
+        8.0, 8.0, 8.0, 8.0,
+    };
+    set_values<float>(input_f, { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0 });
+    set_values<float>(weights_f, weights_values_f);
+    set_values(biases, { 0.0f });
+    std::vector<float> output_vec =
+    {
+        0.0f, 4.0f, 8.0f,
+        18.0f, 24.0f, 30.0f,
+        48.0f, 56.0f, 64.0f
+    };
+
+    topology topology_f(
+        input_layout("input_f", input_f.get_layout()),
+        data("weights_f", weights_f),
+        data("biases", biases),
+        convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 1 }));
+
+    build_options opts;
+    opts.set_option(build_option::optimize_data(true));
+    network network_f(engine, topology_f, opts);
+    network_f.set_input_data("input_f", input_f);
+
+    auto outputs_f = network_f.execute();
+    EXPECT_EQ(outputs_f.begin()->first, "conv_f");
+
+    auto output_memory_f = outputs_f.at("conv_f").get_memory();
+    auto output_ptr_f = output_memory_f.pointer<float>();
+    unsigned int cntr = 0;
+    for (auto fl : output_ptr_f)
+        EXPECT_FLOAT_EQ(fl, output_vec[cntr++]);
 }
 
+
+TEST(convolution_f32_fw_gpu, local_multi_out_features) {
+    //  Filter : 3x1x3x3x2x2 - local sizes
+    //  Stride : 1x1
+    //  Input  : 4x4
+    //  Output : 3x3x3
+    //
+    //  Input:
+    //  1  1  1  1
+    //  1  1  1  1
+    //  2  2  2  2
+    //  2  2  2  2
+    //
+    //
+    //  Filter:
+    //  0 0  1 1  2 2  --- 1 ofm
+    //  0 0  1 1  2 2
+    //
+    //  3 3  4 4  5 5
+    //  3 3  4 4  5 5
+    //
+    //  6 6  7 7  8 8
+    //  6 6  7 7  8 8
+    //
+    //  0 0  0 0  0 0  --- 2 ofm
+    //  0 0  0 0  0 0
+    //
+    //  0 0  0 0  0 0
+    //  0 0  0 0  0 0
+    //
+    //  0 0  0 0  0 0
+    //  0 0  0 0  0 0
+    //
+    //  0 0  2 2  4 4 --- 3 ofm
+    //  0 0  2 2  4 4
+    //
+    //  6 6  8 8  1 1
+    //  6 6  8 8  1 1
+    //
+    //  3 3  5 5  7 7
+    //  3 3  5 5  7 7
+    //
+
+    //
+    //  Output:
+    //  0  4  8
+    // 18 24 30
+    // 48 56 64
+    //
+    //  0  0  0
+    //  0  0  0
+    //  0  0  0
+    //
+    //  0  8 16
+    // 36 48  6
+    // 24 40 56
+    //
+
+    const auto& engine = get_test_engine();
+    tensor local_size = tensor(3,1,2,2,3,3);
+    auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 4 } });
+    auto weights_f = memory::allocate(engine, { data_types::f32, format::bf_lyx_yx, local_size });
+    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
+
+    std::vector<float> weights_values_f = {
+        0.0, 0.0, 0.0, 0.0,
+        1.0, 1.0, 1.0, 1.0,
+        2.0, 2.0, 2.0, 2.0,
+
+        3.0, 3.0, 3.0, 3.0,
+        4.0, 4.0, 4.0, 4.0,
+        5.0, 5.0, 5.0, 5.0,
+
+        6.0, 6.0, 6.0, 6.0,
+        7.0, 7.0, 7.0, 7.0,
+        8.0, 8.0, 8.0, 8.0,
+
+        0.0, 0.0, 0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+
+        0.0, 0.0, 0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+
+        0.0, 0.0, 0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+        0.0, 0.0, 0.0, 0.0,
+
+        0.0, 0.0, 0.0, 0.0,
+        2.0, 2.0, 2.0, 2.0,
+        4.0, 4.0, 4.0, 4.0,
+
+        6.0, 6.0, 6.0, 6.0,
+        8.0, 8.0, 8.0, 8.0,
+        1.0, 1.0, 1.0, 1.0,
+
+        3.0, 3.0, 3.0, 3.0,
+        5.0, 5.0, 5.0, 5.0,
+        7.0, 7.0, 7.0, 7.0,
+    };
+    set_values<float>(input_f, { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0 });
+    set_values<float>(weights_f, weights_values_f);
+    set_values(biases, { 0.0f, 0.0f, 0.0f });
+    std::vector<float> output_vec =
+    {
+        0.0f,  4.0f,  8.0f,
+        18.0f, 24.0f, 30.0f,
+        48.0f, 56.0f, 64.0f,
+
+        0.0f,  0.0f, 0.0f,
+        0.0f,  0.0f, 0.0f,
+        0.0f,  0.0f, 0.0f,
+
+        0.0f,  8.0f, 16.0f,
+        36.0f, 48.0f,  6.0f,
+        24.0f, 40.0f, 56.0f,
+    };
+
+    topology topology_f(
+        input_layout("input_f", input_f.get_layout()),
+        data("weights_f", weights_f),
+        data("biases", biases),
+        convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 1 }));
+
+    build_options opts;
+    opts.set_option(build_option::optimize_data(true));
+    network network_f(engine, topology_f, opts);
+    network_f.set_input_data("input_f", input_f);
+
+    auto outputs_f = network_f.execute();
+    EXPECT_EQ(outputs_f.begin()->first, "conv_f");
+
+    auto output_memory_f = outputs_f.at("conv_f").get_memory();
+    auto output_ptr_f = output_memory_f.pointer<float>();
+    unsigned int cntr = 0;
+    for (auto fl : output_ptr_f)
+    {
+        EXPECT_FLOAT_EQ(fl, output_vec[cntr++]);
+    }
+}
+
+TEST(convolution_f32_fw_gpu, local_multi_input_features) {
+    //  Filter : 1x3x3x3x2x2 - local sizes
+    //  Stride : 1x1
+    //  Input  : 3x4x4
+    //  Output : 3x3
+    //
+    //  Input:
+    //  0  0  0  0
+    //  0  0  0  0
+    //  0  0  0  0
+    //  0  0  0  0
+    //
+    //  1  1  1  1
+    //  1  1  1  1
+    //  1  1  1  1
+    //  1  1  1  1
+    //
+    //  2  2  2  2
+    //  2  2  2  2
+    //  2  2  2  2
+    //  2  2  2  2
+    //
+    //
+    //  Filter:
+    //  0 0  1 1  2 2
+    //  0 0  1 1  2 2
+    //
+    //  3 3  4 4  5 5
+    //  3 3  4 4  5 5
+    //
+    //  6 6  7 7  8 8
+    //  6 6  7 7  8 8
+    //
+    //  0 0  1 1  2 2
+    //  0 0  1 1  2 2
+    //
+    //  3 3  4 4  5 5
+    //  3 3  4 4  5 5
+    //
+    //  6 6  7 7  8 8
+    //  6 6  7 7  8 8
+    //
+    //  0 0  1 1  2 2
+    //  0 0  1 1  2 2
+    //
+    //  3 3  4 4  5 5
+    //  3 3  4 4  5 5
+    //
+    //  6 6  7 7  8 8
+    //  6 6  7 7  8 8
+    //
+    //  Output:
+    //  0  4  8
+    // 18 24 30
+    // 48 56 64
+    //
+
+    const auto& engine = get_test_engine();
+    tensor local_size = tensor(1,3,2,2,3,3);
+    auto input_f = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 3, 4, 4 } });
+    auto weights_f = memory::allocate(engine, { data_types::f32, format::bf_lyx_yx, local_size });
+    cldnn::memory biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    std::vector<float> weights_values_f = {
+        0.0, 0.0, 0.0, 0.0,
+        1.0, 1.0, 1.0, 1.0,
+        2.0, 2.0, 2.0, 2.0,
+
+        3.0, 3.0, 3.0, 3.0,
+        4.0, 4.0, 4.0, 4.0,
+        5.0, 5.0, 5.0, 5.0,
+
+        6.0, 6.0, 6.0, 6.0,
+        7.0, 7.0, 7.0, 7.0,
+        8.0, 8.0, 8.0, 8.0,
+
+        0.0, 0.0, 0.0, 0.0,
+        1.0, 1.0, 1.0, 1.0,
+        2.0, 2.0, 2.0, 2.0,
+
+        3.0, 3.0, 3.0, 3.0,
+        4.0, 4.0, 4.0, 4.0,
+        5.0, 5.0, 5.0, 5.0,
+
+        6.0, 6.0, 6.0, 6.0,
+        7.0, 7.0, 7.0, 7.0,
+        8.0, 8.0, 8.0, 8.0,
+
+        0.0, 0.0, 0.0, 0.0,
+        1.0, 1.0, 1.0, 1.0,
+        2.0, 2.0, 2.0, 2.0,
+
+        3.0, 3.0, 3.0, 3.0,
+        4.0, 4.0, 4.0, 4.0,
+        5.0, 5.0, 5.0, 5.0,
+
+        6.0, 6.0, 6.0, 6.0,
+        7.0, 7.0, 7.0, 7.0,
+        8.0, 8.0, 8.0, 8.0,
+    };
+    set_values<float>(input_f, {
+        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+        2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0 });
+    set_values<float>(weights_f, weights_values_f);
+    set_values(biases, { 0.0f });
+    std::vector<float> output_vec =
+    {
+        60.0f, 72.0f, 84.0f,
+        24.0f, 36.0f, 48.0f,
+        24.0f, 36.0f, 48.0f
+    };
+
+    topology topology_f(
+        input_layout("input_f", input_f.get_layout()),
+        data("weights_f", weights_f),
+        data("biases", biases),
+        convolution("conv_f", "input_f", { "weights_f" }, { "biases" }, { 0, 0, 1, 1 }));
+
+    build_options opts;
+    opts.set_option(build_option::optimize_data(true));
+    network network_f(engine, topology_f, opts);
+    network_f.set_input_data("input_f", input_f);
+
+    auto outputs_f = network_f.execute();
+    EXPECT_EQ(outputs_f.begin()->first, "conv_f");
+
+    auto output_memory_f = outputs_f.at("conv_f").get_memory();
+    auto output_ptr_f = output_memory_f.pointer<float>();
+    unsigned int cntr = 0;
+    for (auto fl : output_ptr_f)
+        EXPECT_FLOAT_EQ(fl, output_vec[cntr++]);
+}
+
+
 TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
 {
 #define USE_OLD_WEIGHTS_FORMAT 0
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     if (!engine.get_info().supports_fp16)
     {
@@ -3053,12 +4204,228 @@ TEST(convolution_gpu, basic_yxfb_4_4_yxfb_2_2_b16_if2_of16_st2_2_p0_sp1_fp16)
 #undef USE_OLD_WEIGHTS_FORMAT
 }
 
+using TestParamType_convolution_gpu = ::testing::tuple<int,   // 0 - Filter size
+                                                       int,   // 1 - Input features
+                                                       int,   // 2 - Stride
+                                                       int,   // 3 - Output padding
+                                                       bool>; // 4 - With bias
+
+struct convolution_gpu : public ::testing::TestWithParam<TestParamType_convolution_gpu>
+{
+    static std::string
+    PrintToStringParamName(testing::TestParamInfo<TestParamType_convolution_gpu> param_info)
+    {
+        // construct a readable name
+        return std::to_string(testing::get<0>(param_info.param))
+            + 'x' + std::to_string(testing::get<0>(param_info.param))
+            + "_f" + std::to_string(testing::get<1>(param_info.param))
+            + "_stride" + std::to_string(testing::get<2>(param_info.param))
+            + "_pad" + std::to_string(testing::get<3>(param_info.param))
+            + (testing::get<4>(param_info.param) ? "_bias" : "");
+    }
+};
+
+TEST_P(convolution_gpu, b_fs_yx_fsv4)
+{
+    const int in_B = 2;
+    const int in_X = 56;
+    const int in_Y = 56;
+    const int _OuD = 32;
+    const int W_B = _OuD;
+
+    // Kernel sizes
+    int W_X = testing::get<0>(GetParam());
+    int W_Y = W_X;
+
+    // Convoluiton offset
+    int offSet = -(W_X / 2);
+
+    // Features
+    int in_F = testing::get<1>(GetParam());
+    int W_F = in_F;
+
+    // Stride
+    int stride = testing::get<2>(GetParam());
+
+    // Output padding
+    int output_padding = testing::get<3>(GetParam());
+
+    // Biases
+    bool with_bias = testing::get<4>(GetParam());
+
+    engine engine;
+
+    // Input data init
+    std::vector<char> Data(in_B * in_F * in_X * in_Y);
+    std::iota(Data.begin(), Data.end(), 0);
+    auto input = memory::allocate(engine, {data_types::i8, format::bfyx, {in_B, in_F, in_X, in_Y}});
+    set_values(input, std::move(Data));
+
+    // Create a topology
+    topology topology(input_layout("input", input.get_layout()));
+
+    // Reorder
+    topology.add(reorder("reorder_in",
+                         "input",
+                         layout(data_types::i8, format::b_fs_yx_fsv4, {in_B, in_F, in_X, in_Y})));
+
+    // Weights init
+    std::vector<char> Weights(W_B * W_F * W_X * W_Y);
+    std::iota(Weights.begin(), Weights.end(), 0);
+    auto weights_gold =
+        memory::allocate(engine, {data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}});
+    auto weights_imad =
+        memory::allocate(engine, {data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}});
+    set_values(weights_gold, Weights);
+    set_values(weights_imad, std::move(Weights));
+    topology.add(data("weights_gold", weights_gold), data("weights_imad", weights_imad));
+
+    if (with_bias)
+    {
+        // Bias, Callibraiton, Quantization
+        std::vector<float> vB(_OuD), vC(_OuD), vQ(_OuD);
+        float x = 0.1f;
+        std::generate(vB.begin(), vB.end(), [x]() mutable {
+            x += 0.01f;
+            if (x >= 0.9f)
+                x = 0.1f;
+            return x;
+        });
+        x = 0.2f;
+        std::generate(vC.begin(), vC.end(), [x]() mutable {
+            x += 0.01f;
+            if (x >= 0.9f)
+                x = 0.2f;
+            return x;
+        });
+        x = 0.3f;
+        std::generate(vQ.begin(), vQ.end(), [x]() mutable {
+            x += 0.01f;
+            if (x >= 0.9f)
+                x = 0.3f;
+            return x;
+        });
+        auto bias_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
+        auto bias_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
+        auto callib_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
+        auto callib_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
+        auto quant_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
+        auto quant_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, _OuD, 1}});
+        set_values(bias_gold, vB);
+        set_values(bias_imad, std::move(vB));
+        set_values(callib_gold, vC);
+        set_values(callib_imad, std::move(vC));
+        set_values(quant_gold, vQ);
+        set_values(quant_imad, std::move(vQ));
+        topology.add(data("bias_gold", bias_gold),
+                     data("callib_gold", callib_gold),
+                     data("quant_gold", quant_gold));
+        topology.add(data("bias_imad", bias_imad),
+                     data("callib_imad", callib_imad),
+                     data("quant_imad", quant_imad));
+
+        // Convolutions
+        convolution conv_gold("conv_gold",
+                              "input",
+                              {"weights_gold"},
+                              {"bias_gold"},
+                              {"quant_gold"},
+                              {"callib_gold"},
+                              1.0f,
+                              {1, 1, stride, stride},
+                              {0, 0, offSet, offSet});
+        convolution conv_imad("conv_imad",
+                              "reorder_in",
+                              {"weights_imad"},
+                              {"bias_imad"},
+                              {"quant_imad"},
+                              {"callib_imad"},
+                              1.0f,
+                              {1, 1, stride, stride},
+                              {0, 0, offSet, offSet});
+        conv_gold.output_padding = padding({0, 0, output_padding, output_padding}, 0.0f);
+        conv_imad.output_padding = padding({0, 0, output_padding, output_padding}, 0.0f);
+        topology.add(conv_gold, conv_imad);
+    }
+    else
+    {
+        // Convolutions
+        convolution conv_gold(
+            "conv_gold", "input", {"weights_gold"}, {1, 1, stride, stride}, {0, 0, offSet, offSet});
+        convolution conv_imad(
+            "conv_imad", "reorder_in", {"weights_imad"}, {1, 1, stride, stride}, {0, 0, offSet, offSet});
+        conv_gold.output_padding = padding({0, 0, output_padding, output_padding}, 0.0f);
+        conv_imad.output_padding = padding({0, 0, output_padding, output_padding}, 0.0f);
+        topology.add(conv_gold, conv_imad);
+    }
+
+    // Reorder
+    topology.add(reorder("reorder_out",
+                         "conv_imad",
+                         layout(data_types::i8,
+                                format::bfyx,
+                                {in_B, W_B, (in_X + stride - 1) / stride, (in_Y + stride - 1) / stride},
+                                padding({0, 0, output_padding, output_padding}, 0.0f))));
+
+    // Network build
+    build_options build_opt;
+    build_opt.set_option(build_option::optimize_data(true));
+    network network(engine, topology, build_opt);
+
+    // Network execuiton
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto out_gold = outputs.find("conv_gold");
+    auto out_test = outputs.find("reorder_out");
+    ASSERT_NE(out_gold, outputs.end());
+    ASSERT_NE(out_test, outputs.end());
+
+    auto gold_ptr = out_gold->second.get_memory().pointer<char>();
+    auto test_ptr = out_test->second.get_memory().pointer<char>();
+
+    ASSERT_EQ(gold_ptr.size(), test_ptr.size());
+    for (size_t i = 0; i < gold_ptr.size(); i++)
+    {
+        ASSERT_EQ(gold_ptr[i], test_ptr[i]);
+    }
+}
+
+// Select particular test cases
+INSTANTIATE_TEST_CASE_P(convolution_gpu_imad,
+                        convolution_gpu,
+                        ::testing::Values(
+                            // Filter size, Input features, Stride, Output padding, With bias
+                            TestParamType_convolution_gpu(1, 32, 1, 0, false),
+                            TestParamType_convolution_gpu(3, 32, 1, 0, false),
+                            TestParamType_convolution_gpu(7,  3, 1, 0, false),
+                            TestParamType_convolution_gpu(1, 32, 1, 0, true),
+                            TestParamType_convolution_gpu(3, 32, 1, 0, true),
+                            TestParamType_convolution_gpu(7,  3, 1, 0, true),
+                            TestParamType_convolution_gpu(1, 32, 1, 1, false),
+                            TestParamType_convolution_gpu(3, 32, 1, 1, false),
+                            TestParamType_convolution_gpu(7,  3, 1, 1, false),
+                            TestParamType_convolution_gpu(1, 32, 2, 0, false),
+                            TestParamType_convolution_gpu(3, 32, 2, 0, false),
+                            TestParamType_convolution_gpu(7,  3, 2, 0, false)),
+                        convolution_gpu::PrintToStringParamName);
+//// or test all combinations
+//INSTANTIATE_TEST_CASE_P(convolution_gpu_imad,
+//                        convolution_gpu,
+//                        ::testing::Combine(::testing::Values(1, 3, 7),    // Filter size
+//                                           ::testing::Values(3, 32),      // Input features
+//                                           ::testing::Values(1, 2),       // Stride
+//                                           ::testing::Values(0, 1),       // Output padding
+//                                           ::testing::Values(false, true) // With bias
+//                                           ),
+//                        convolution_gpu::PrintToStringParamName);
+
 class convolution_test : public tests::generic_test
 {
 
 public:
 
-    static void TearDownTestCase() 
+    static void TearDownTestCase()
     {
         for (auto generic_params : all_generic_params)
         {
@@ -3073,9 +4440,9 @@ public:
 
     static std::vector<cldnn::primitive*> generate_specific_test_params()
     {
-        // TODO: check split 
+        // TODO: check split
 
-        // TODO: check convolution without bias 
+        // TODO: check convolution without bias
 
         const std::vector<primitive_id>& weights = { "input1" };
         const std::vector<primitive_id>& bias = { "input2" };
@@ -3120,7 +4487,9 @@ public:
 
         std::vector<tensor> input_tensor_size = { tensor(1, 5, 59, 72), tensor(8, 3, 63, 56), tensor(16, 2, 50, 50), tensor(32, 1, 44, 62) };
 
-        for (cldnn::data_types data_type : test_data_types())
+        auto data_types = test_data_types();
+
+        for (cldnn::data_types data_type : data_types)
         {
             for (cldnn::format input_format : input_formats)
             {
@@ -3232,7 +4601,7 @@ public:
         const cldnn::convolution* convolution = (cldnn::convolution*)layer_params;
 
         data_types dt = inputs[0].get_layout().data_type;
-        
+
         tensor input_size = inputs[0].get_layout().size;
         tensor dilation = convolution->dilation;
         tensor stride = convolution->stride;
@@ -3261,7 +4630,7 @@ public:
 
         // Initialized output with zeros.
         std::fill(output_mem.begin(), output_mem.end(), static_cast<Type>(0));
-    
+
         // Add the bias
         for (int b = 0; b < input_size.batch[0]; b++)
         {
@@ -3377,7 +4746,7 @@ TEST_P(convolution_test, CONVOLUTION)
     run_single_test();
 }
 
-INSTANTIATE_TEST_CASE_P(DISABLED_CONVOLUTION, 
-                        convolution_test, 
+INSTANTIATE_TEST_CASE_P(DISABLED_CONVOLUTION,
+                        convolution_test,
                         ::testing::ValuesIn(convolution_test::generate_all_test_params()),
                         tests::generic_test::custom_param_name_functor());
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_grad_input_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_grad_input_gpu_test.cpp
index a3cbc0a75..6f8b9d4c4 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_grad_input_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_grad_input_gpu_test.cpp
@@ -48,7 +48,7 @@ TEST(convolution_grad_input_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad
     //  -4    3.5    -0.5   21
     //   12  -18      4     -9
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
@@ -103,7 +103,7 @@ TEST(convolution_grad_input_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad
     //  -4    3.5    -0.5   21
     //   12  -18      4     -9
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
@@ -139,7 +139,7 @@ TEST(convolution_grad_input_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad
     }
 }
 
-TEST(convolution_grad_input_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_fusion) {
+TEST(convolution_grad_input_f32_fw_gpu, DISABLED_basic_wsiz2x2_in2x2x1x2_bfyx_stride2_fusion) {
     //  Filter : 2x2
     //  Input  : 2x2x1x2
     //  Output : 2x2x1x2
@@ -157,7 +157,7 @@ TEST(convolution_grad_input_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_fus
     //  -4    3.5    -0.5   21
     //   12  -18      4     -9
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
@@ -198,8 +198,8 @@ TEST(convolution_grad_input_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_fus
     auto output_ptr = output_prim.pointer<float>();
 
     std::vector<float> expected_output_vec = {
-        -3.f, 5.5f, 15.f, -14.f,
-        4.5f, 27.f, 11.f, 0.f
+        -3.f, 5.5f, 14.f, -15.f,
+        4.5f, 27.f, 10.f, -1.f
     };
 
     for (unsigned int i = 0; i < expected_output_vec.size(); i++)
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_grad_weights_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_grad_weights_gpu_test.cpp
index 0857fbaf0..1a7cd2a0d 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_grad_weights_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/convolution_grad_weights_gpu_test.cpp
@@ -33,6 +33,22 @@
 using namespace cldnn;
 using namespace tests;
 
+void validate_output(std::vector<float> expected_weights_vec, std::map<primitive_id, network_output> outputs)
+{
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "conv_grad_weights");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+    auto output_ptr = output_prim.pointer<float>();
+
+    for (unsigned int i = 0; i < expected_weights_vec.size(); i++)
+    {
+        float x = float_round(expected_weights_vec[i]);
+        float y = float_round(output_ptr[i]);
+        EXPECT_FLOAT_EQ(x, y) << "on weights verification" << random_seed << std::endl;
+    }
+}
+
 TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1) {
     //  Filter : 2x2
     //  Input grad  : 1x2x2x2
@@ -47,7 +63,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_p
     //  8  0.5
     //  6  9
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
@@ -123,7 +139,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in8x1x2x2_bfyx_stride2_p
     //  8  0.5
     //  6  9
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
@@ -195,7 +211,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_p
     //  8  0.5
     //  6  9
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
@@ -257,7 +273,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_p
     //  Bias:
     //  0
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.001f;
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 
@@ -275,10 +291,12 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_p
         mutable_data("biases", biases),
         convolution("conv", "input_reordered", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 }),
         convolution_grad_input("conv_grad_input", "conv", { "weights" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 }),
-        convolution_grad_weights("conv_grad_weights", "conv", "input_reordered", { "weights" }, { "biases" }, { 1, 1, 1, 1 }, { 0, 0, -1, -1 })
+        convolution_grad_weights("conv_grad_weights", "conv", "input_reordered", { "weights" }, { "biases" }, { 1, 1, 1, 1 },
+        { 0, 0, -1, -1 }, { 1,1,1,1 }, "conv_grad_input")
     );
-
-    network network(engine, topology);
+    build_options opt;
+    opt.set_option(build_option::outputs({ "conv_grad_input", "conv_grad_weights" }));
+    network network(engine, topology, opt);
     network.set_input_data("input", input);
     network.set_learning_rate(lr);
 
@@ -329,7 +347,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_p
     //  8  0.5    1  2
     //  6  9      3  4
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
@@ -424,7 +442,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz1x1_in1x2x5x5_bfyx_stride2_p
     //  5  6   7  8
     //  9  10 11 11
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 5, 5 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 4 } });
@@ -515,7 +533,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz2x2_in32x1x2x2_yxfb_stride1)
     // y2: x1: 0.5  0.6  0.7  0.9  1    1.1  0.7  0.9  0.1  1.9  0.6  0.5  0.4  0.1  0.1  1.7  0.5  0.4  0.5  0.6  0.7  0.8  0.8  1.7  1.8  1.2  2.1  0.5  0.2  0.9  1.5  1.6
     // y2: x2: 0.5  0.6  0.7  0.9  1    1.1  0.7  0.9  0.1  1.9  0.1  1.7  0.5  0.4  0.4  0.1  0.1  1.7  0.5  0.4  0.5  0.6  1.2  2.1  0.5  0.2  0.9  0.4  0.1  1.2  1.7  1.8
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::yxfb,{ 32, 1, 2, 2 } });
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 32, 1, 3, 3 } });
@@ -597,7 +615,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz3x3_in2x1x3x3_bfyx_stride1_p
     //  0.5  0.6  0.7  0.9  1    1.1  0.7  0.9  0.1
     //  0.7  0.8  0.8  1.7  1.8  1.2  2.1  0.5  0.2
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 3 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
@@ -679,7 +697,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz3x3_in2x1x3x3_bfyx_stride1_p
     //  0.5  0.6  0.7  0.9  1    1.1  0.7  0.9  0.1
     //  0.7  0.8  0.8  1.7  1.8  1.2  2.1  0.5  0.2
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 3 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 3 } });
@@ -781,7 +799,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz7x7_in2x1x7x7_bfyx_stride1_p
     // b0:f0: 0.7  0.8  0.8  0.7  0.8  0.2  0.1    b0:f1: 0.4  0.6  0.1  0.2  0.1  0.1  0.7
     // b0:f0: 0.5  0.6  0.7  0.9  0.   0.1  0.7    b0:f1: 0.5  0.3  0.7  0.5  0.4  0.1  0.7
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 7, 7 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
@@ -927,7 +945,7 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz7x7_in2x1x7x7_bfyx_stride1_p
     // b0:f0: 0.7  0.8  0.8  0.7  0.8  0.2  0.1    b0:f1: 0.4  0.6  0.1  0.2  0.1  0.1  0.7
     // b0:f0: 0.5  0.6  0.7  0.9  0.   0.1  0.7    b0:f1: 0.5  0.3  0.7  0.5  0.4  0.1  0.7
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 7, 7 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 7, 7 } });
@@ -1044,3 +1062,52 @@ TEST(convolution_grad_weights_f32_fw_gpu, basic_wsiz7x7_in2x1x7x7_bfyx_stride1_p
         EXPECT_FLOAT_EQ(x, -y) << "on biases verification" << random_seed << std::endl;
     }
 }
+
+TEST(convolution_grad_weights_f32_fw_gpu, ngraph_2d_1item_2iterations) {
+    //  Filter : 2x1x2x2
+    //  Input grad  : 1x2x4x2
+    //  Input  : 1x1x5x3
+    //  Stride : 1x1
+
+    const auto& engine = get_test_engine();
+    auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 4, 2 } });
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 5, 3 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
+
+   
+    topology topology(
+        input_layout("input_grad", input_grad.get_layout()),
+        data("input", input),
+        mutable_data("weights", weights),
+        convolution_grad_weights("conv_grad_weights", "input_grad", "input", { "weights" }, { 1,1,1,1 }, { 0,0,0,0 }, { 1,1,1,1 }, true)
+    );
+
+    build_options bo;
+    bo.set_option(build_option::optimize_data(true));
+    network network(engine, topology, bo);
+    
+
+    // set values for first iteration
+    set_values(input,
+        { 0.671875f, 0.546875f, -0.5625f, -0.359375f, -0.09375f, 0.546875f, -0.546875f, 0.890625f, 0.828125f, -0.546875f, 1.f, -0.078125f, -0.890625f, 0.40625f, -0.359375f });
+    set_values(input_grad,
+        {   1.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 
+            0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f });
+    network.set_input_data("input_grad", input_grad);
+    std::vector<float> expected_weights_vec =
+    {   0.671875f, 0.546875f, 0.546875f, -0.546875f,
+        0.f, 0.f, 0.f, 0.f };
+    auto outputs = network.execute();
+    validate_output(expected_weights_vec, outputs);
+
+    // set values for second iteration
+    set_values(input_grad,
+        {   0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+            0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 1.f });
+    network.set_input_data("input_grad", input_grad);
+    expected_weights_vec = 
+    {   0.f, 0.f, 0.f, 0.f,
+        0.828125f, -0.546875f, 0.40625f, -0.359375f };
+    outputs =  network.execute();
+    validate_output(expected_weights_vec, outputs);
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
index 8ab62ad68..fb8b6e364 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/crop_gpu_test.cpp
@@ -45,7 +45,7 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all) {
     //  Input      : 2x3x4x5
     //  Output     : 1x2x2x3
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 2;
     auto feature_num = 3;
@@ -88,12 +88,60 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all) {
     }
 }
 
+TEST(crop_gpu, basic_int_in2x3x2x2_crop_all) {
+    //  Reference  : 1x2x2x2
+    //  Input      : 2x3x4x5
+    //  Output     : 1x2x2x3
+
+    const auto& engine = get_test_engine();
+
+    auto batch_num = 2;
+    auto feature_num = 3;
+    auto x_size = 4;
+    auto y_size = 5;
+
+    auto crop_batch_num = batch_num - 1;
+    auto crop_feature_num = feature_num - 1;
+    auto crop_x_size = x_size - 2;
+    auto crop_y_size = y_size - 2;
+
+    auto input = memory::allocate(engine, { data_types::i32, format::yxfb,{ batch_num, feature_num, x_size, y_size } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 }));
+
+    std::vector<int32_t> input_vec = generate_random_input<int32_t>(batch_num, feature_num, y_size, x_size, -10, 10);
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("crop").get_memory();
+    auto output_ptr = output.pointer<int32_t>();
+
+    for (int b = 0; b < crop_batch_num; ++b) { //B
+        for (int f = 0; f < crop_feature_num; ++f) { //F
+            for (int y = 0; y < crop_y_size; ++y) { //Y
+                for (int x = 0; x < crop_x_size; ++x) { //X
+                    int linear_id = b + batch_num * (f + feature_num * (x + x_size * y));
+                    int output_linear_id = b + crop_batch_num * (f + crop_feature_num * (x + crop_x_size * y));
+                    EXPECT_EQ(output_ptr[output_linear_id], input_vec[linear_id]);
+                }
+            }
+        }
+    }
+}
+
 TEST(crop_gpu, basic_in2x3x2x2_crop_all_bfyx) {
     //  Reference  : 3x1x2x2
     //  Input      : 6x2x4x3
     //  Output     : 3x1x2x2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 6;
     auto feature_num = 2;
@@ -137,6 +185,149 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_all_bfyx) {
     }
 }
 
+TEST(crop_gpu, basic_int_in2x3x2x2_crop_all_bfyx) {
+    //  Reference  : 3x1x2x2
+    //  Input      : 6x2x4x3
+    //  Output     : 3x1x2x2
+
+    const auto& engine = get_test_engine();
+
+    auto batch_num = 6;
+    auto feature_num = 2;
+    auto x_size = 4;
+    auto y_size = 3;
+
+    auto crop_batch_num = batch_num - 3;
+    auto crop_feature_num = feature_num - 1;
+    auto crop_x_size = x_size - 2;
+    auto crop_y_size = y_size - 1;
+
+    auto input = memory::allocate(engine, { data_types::i32,format::bfyx,{ batch_num, feature_num, x_size, y_size } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 }));
+
+    std::vector<int32_t> input_vec = generate_random_input<int32_t>(batch_num, feature_num, y_size, x_size, -10, 10);
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("crop").get_memory();
+    auto output_ptr = output.pointer<int32_t>();
+    std::vector<int32_t> a;
+    for (int b = 0; b < crop_batch_num; ++b) { //B
+        for (int f = 0; f < crop_feature_num; ++f) { //F
+            for (int y = 0; y < crop_y_size; ++y) { //Y
+                for (int x = 0; x < crop_x_size; ++x) { //X
+                    int linear_id = x + x_size * (y + y_size * (f + feature_num * b));
+                    int output_linear_id = x + crop_x_size * (y + crop_y_size * (f + crop_feature_num * b));
+                    a.push_back(output_ptr[output_linear_id]);
+                    EXPECT_EQ(output_ptr[output_linear_id], input_vec[linear_id]);
+                }
+            }
+        }
+    }
+}
+
+TEST(crop_gpu, basic_in2x3x2x2_crop_all_fyxb) {
+    //  Reference  : 3x1x2x2
+    //  Input      : 6x2x4x3
+    //  Output     : 3x1x2x2
+
+    const auto& engine = get_test_engine();
+
+    auto batch_num = 6;
+    auto feature_num = 2;
+    auto x_size = 4;
+    auto y_size = 3;
+
+    auto crop_batch_num = batch_num - 3;
+    auto crop_feature_num = feature_num - 1;
+    auto crop_x_size = x_size - 2;
+    auto crop_y_size = y_size - 1;
+
+    auto input = memory::allocate(engine, { data_types::f32,format::fyxb,{ batch_num, feature_num, x_size, y_size } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, {0, 0, 0, 0} ));
+
+    std::vector<float> input_vec = generate_random_input<float>(batch_num, feature_num, y_size, x_size, -10, 10);
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("crop").get_memory();
+    auto output_ptr = output.pointer<float>();
+    for (int b = 0; b < crop_batch_num; ++b) { //B
+        for (int f = 0; f < crop_feature_num; ++f) { //F
+            for (int y = 0; y < crop_y_size; ++y) { //Y
+                for (int x = 0; x < crop_x_size; ++x) { //X
+                    int linear_id = b + batch_num * (x + x_size * (y + y_size * f));
+                    int output_linear_id = b + crop_batch_num * (x + crop_x_size * (y + crop_y_size * f));
+                    EXPECT_EQ(output_ptr[output_linear_id], input_vec[linear_id]);
+                }
+            }
+        }
+    }
+}
+
+TEST(crop_gpu, basic_int_in2x3x2x2_crop_all_fyxb) {
+    //  Reference  : 3x1x2x2
+    //  Input      : 6x2x4x3
+    //  Output     : 3x1x2x2
+
+    const auto& engine = get_test_engine();
+
+    auto batch_num = 6;
+    auto feature_num = 2;
+    auto x_size = 4;
+    auto y_size = 3;
+
+    auto crop_batch_num = batch_num - 3;
+    auto crop_feature_num = feature_num - 1;
+    auto crop_x_size = x_size - 2;
+    auto crop_y_size = y_size - 1;
+
+    auto input = memory::allocate(engine, { data_types::i32,format::fyxb,{ batch_num, feature_num, x_size, y_size } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(crop("crop", "input", { crop_batch_num, crop_feature_num, crop_x_size, crop_y_size }, { 0, 0, 0, 0 }));
+
+    std::vector<int32_t> input_vec = generate_random_input<int32_t>(batch_num, feature_num, y_size, x_size, -10, 10);
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("crop").get_memory();
+    auto output_ptr = output.pointer<int32_t>();
+    for (int b = 0; b < crop_batch_num; ++b) { //B
+        for (int f = 0; f < crop_feature_num; ++f) { //F
+            for (int y = 0; y < crop_y_size; ++y) { //Y
+                for (int x = 0; x < crop_x_size; ++x) { //X
+                    int linear_id = b + batch_num * (x + x_size * (y + y_size * f));
+                    int output_linear_id = b + crop_batch_num * (x + crop_x_size * (y + crop_y_size * f));
+                    EXPECT_EQ(output_ptr[output_linear_id], input_vec[linear_id]);
+                }
+            }
+        }
+    }
+}
+
 TEST(crop_gpu, basic_in2x3x2x2_crop_offsets) {
     //  Reference  : 1x2x2x1
     //  Offsets    : 1x0x1x1
@@ -145,11 +336,11 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_offsets) {
 
     //  Input:
     //  f0: b0:  1    2  -10   b1:   0    0    -11
-    //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15  
-    //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13     
+    //  f0: b0:  3    4  -14   b1:   0.5 -0.5  -15
+    //  f1: b0:  5    6  -12   b1:   1.5  5.2  -13
     //  f1: b0:  7    8  -16   b1:   12   8    -17
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 2;
     auto feature_num = 2;
@@ -202,11 +393,76 @@ TEST(crop_gpu, basic_in2x3x2x2_crop_offsets) {
     }
 }
 
+TEST(crop_gpu, basic_int_in2x3x2x2_crop_offsets) {
+    //  Reference  : 1x2x2x1
+    //  Offsets    : 1x0x1x1
+    //  Input      : 2x2x3x2
+    //  Output     : 1x2x2x1
+
+    //  Input:
+    //  f0: b0:  1    2  -10   b1:   0    0    -11
+    //  f0: b0:  3    4  -14   b1:   50   -5   -15
+    //  f1: b0:  5    6  -12   b1:   15   52   -13
+    //  f1: b0:  7    8  -16   b1:   12   8    -17
+
+    const auto& engine = get_test_engine();
+
+    auto batch_num = 2;
+    auto feature_num = 2;
+    auto x_size = 3;
+    auto y_size = 2;
+
+    auto crop_batch_num = batch_num - 1;
+    auto crop_feature_num = feature_num;
+    auto crop_x_size = x_size - 1;
+    auto crop_y_size = y_size - 1;
+
+    auto batch_offset = 1;
+    auto feature_offset = 0;
+    auto x_offset = 1;
+    auto y_offset = 1;
+
+    auto input = memory::allocate(engine, { data_types::i32, format::yxfb,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(crop("crop", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num)), { tensor(feature(0)) }));
+
+    std::vector<int32_t> input_vec = { 1, 0, 5, 15,
+        2, 0, 6, 52,
+        -10, -11, -12, -13,
+        3, 50, 7, 12,
+        4, -5, 8, 8,
+        -14, -15, -16, -17 };
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("crop").get_memory();
+    auto output_ptr = output.pointer<int32_t>();
+
+    for (int b = 0; b < crop_batch_num; ++b) { //B
+        for (int f = 0; f < crop_feature_num; ++f) { //F
+            for (int y = 0; y < crop_y_size; ++y) { //Y
+                for (int x = 0; x < crop_x_size; ++x) { //X
+                    int linear_id = (b + batch_offset) + batch_num * ((f + feature_offset) + feature_num * ((x + x_offset) + x_size * (y + y_offset)));
+                    int output_linear_id = b + crop_batch_num * (f + crop_feature_num * (x + crop_x_size * y));
+                    EXPECT_EQ(output_ptr[output_linear_id], input_vec[linear_id]);
+                }
+            }
+        }
+    }
+}
+
 TEST(crop_gpu, basic_in1x4x1x1_split) {
     // Tests split with crop implementation
     //                 _CROP_1(1x3x1x1,offset(0x0x0x0))
     //                |
-    //  INPUT(1x4x1x1)  
+    //  INPUT(1x4x1x1)
     //                |_
     //                  CROP_2(1x1x1x1,offset(0x3x0x0))
     //
@@ -231,7 +487,7 @@ TEST(crop_gpu, basic_in1x4x1x1_split) {
 
     //  Out2:
     //  f0: 4.0
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 1;
     auto feature_num = 4;
@@ -278,11 +534,87 @@ TEST(crop_gpu, basic_in1x4x1x1_split) {
         EXPECT_EQ(output_ptr_2[i], out2[i]);
 }
 
+TEST(crop_gpu, basic_int_in1x4x1x1_split) {
+    // Tests split with crop implementation
+    //                 _CROP_1(1x3x1x1,offset(0x0x0x0))
+    //                |
+    //  INPUT(1x4x1x1)
+    //                |_
+    //                  CROP_2(1x1x1x1,offset(0x3x0x0))
+    //
+    //  Reference1  : 1x3x1x1
+    //  Offsets1    : 0x0x0x0
+    //  Reference2  : 1x1x1x1
+    //  Offsets2    : 0x3x0x0
+    //  Input       : 1x4x1x1
+    //  Output1     : 1x3x1x1
+    //  Output2     : 1x1x1x1
+
+    //  Input:
+    //  f0: -1
+    //  f1:  2
+    //  f2: -3
+    //  f3:  4
+
+    //  Out1:
+    //  f0: -1
+    //  f1:  2
+    //  f2: -3
+
+    //  Out2:
+    //  f0: 4
+    const auto& engine = get_test_engine();
+
+    auto batch_num = 1;
+    auto feature_num = 4;
+    auto x_size = 1;
+    auto y_size = 1;
+
+    auto crop_batch_num = 1;
+    auto crop_feature_num_1 = 3;
+    auto crop_feature_num_2 = 1;
+    auto crop_x_size = 1;
+    auto crop_y_size = 1;
+    auto feature_offset_1 = 0;
+    auto feature_offset_2 = 3;
+    auto input = memory::allocate(engine, { data_types::i32, format::bfyx,{ tensor(spatial(x_size, y_size), feature(feature_num), batch(batch_num)) } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(crop("crop1", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_1)), { tensor(feature(feature_offset_1), spatial(0,0),batch(0)) }));
+    topology.add(crop("crop2", "input", tensor(batch(crop_batch_num), spatial(crop_x_size, crop_y_size), feature(crop_feature_num_2)), { tensor(feature(feature_offset_2), spatial(0,0),batch(0)) }));
+
+    std::vector<int32_t> input_vec = { -1, 2, -3, 4 };
+    std::vector<int32_t> out1 = { -1, 2,-3 };
+    std::vector<int32_t> out2 = { 4, };
+    set_values(input, input_vec);
+    build_options bo;
+    bo.set_option(build_option::optimize_data(true));
+    bo.set_option(build_option::outputs(topology.get_primitive_ids()));
+
+    network network(engine, topology, bo);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("crop1").get_memory();
+    auto output_ptr = output.pointer<int32_t>();
+
+    for (size_t i = 0; i < out1.size(); i++)
+        EXPECT_EQ(output_ptr[i], out1[i]);
+
+    std::cout << std::endl;
+    auto output_2 = outputs.at("crop2").get_memory();
+    auto output_ptr_2 = output_2.pointer<int32_t>();
+
+    for (size_t i = 0; i < out2.size(); i++)
+        EXPECT_EQ(output_ptr_2[i], out2[i]);
+}
+
 TEST(crop_gpu, basic_in1x4x1x1_split_w_relu) {
     // Tests split with crop implementation
     //                        _ CROP_1(1x3x1x1,offset(0x0x0x0)) --> RELU
     //                       |
-    //  INPUT(1x4x1x1)--RELU  
+    //  INPUT(1x4x1x1)--RELU
     //                       |_
     //                          CROP_2(1x1x1x1,offset(0x3x0x0)) --> RELU
     //
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/custom_gpu_primitive_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/custom_gpu_primitive_test.cpp
index a3ad88faf..f74a5f985 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/custom_gpu_primitive_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/custom_gpu_primitive_test.cpp
@@ -60,7 +60,7 @@ TEST(custom_gpu_primitive_f32, add_basic_in2x2x2x2) {
     //  f1: b0:   15  16.5  b1:  22    16.5     
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
@@ -153,7 +153,7 @@ void add_basic_in2x2x2x2_with_reorder()
     //  f1: b0:   15  16.5  b1:  22    16.5     
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
@@ -266,7 +266,7 @@ TEST(custom_gpu_primitive_f32, eltwise_add_basic_in2x2x2x2) {
     //  f1: b0:   15  16.5  b1:  22    16.5     
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
@@ -359,7 +359,7 @@ TEST(custom_gpu_primitive_f32, add_eltwise_basic_in2x2x2x2) {
     //  f1: b0:   15  16.5  b1:  22    16.5     
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
@@ -452,7 +452,7 @@ TEST(custom_gpu_primitive_f32, two_kernels_with_same_entry_point_basic_in2x2x2x2
     //  f1: b0:   15  16.5  b1:  22    16.5     
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
 
@@ -523,3 +523,74 @@ TEST(custom_gpu_primitive_f32, two_kernels_with_same_entry_point_basic_in2x2x2x2
         EXPECT_TRUE(are_equal(input_ptr[i] + 7, output_ptr[i]));
     }
 }
+
+TEST(custom_gpu_primitive_u8, add_basic_in2x2x2x2) {
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } });
+
+    std::string kernel_code =
+        R"__krnl(
+            __kernel void add_kernel(const __global uchar* input0, const __global uchar* input1, __global uchar* output)
+            {
+                const unsigned idx = get_global_id(0);
+                output[idx] = input0[idx] + input1[idx];
+            }
+        )__krnl";
+    std::string entry_point = "add_kernel";
+    std::vector<cldnn_arg> parameters = { { arg_input, 0 },{ arg_input, 1 },{ arg_output, 0 } };
+    layout output_layout = { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } };
+    std::vector<size_t> gws = { output_layout.count() };
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(custom_gpu_primitive(
+        "user_kernel",
+        { "input", "input2" },
+        { kernel_code },
+        entry_point,
+        parameters,
+        "-cl-mad-enable",
+        output_layout,
+        gws));
+
+    set_values<unsigned char>(input, {
+          1,   0,   5,    1,
+        200, 100, 160,  150,
+        130,   0, 175,   12,
+          4, 100,   8,  180
+    });
+
+    set_values<unsigned char>(input2, {
+         0,  2,  0,  2,
+        55, 75, 20,  4,
+        15, 17, 80, 10,
+         2, 60,  0, 20 
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "user_kernel");
+
+    auto output = outputs.at("user_kernel").get_memory();
+
+    unsigned char answers[16] = {
+          1,   2,   5,   3,
+        255, 175, 180, 154,
+        145,  17, 255,  22,
+          6, 160,   8, 200
+    };
+
+    auto output_ptr = output.pointer<unsigned char>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
index 2f3ffe5c7..f546e37cd 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/deconvolution_gpu_test.cpp
@@ -53,7 +53,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad) {
     //   18    0.75  7.25   
     //   23    42.5  15.5   
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
@@ -115,7 +115,7 @@ TEST(deconvolution_f32_fw_gpu, no_bias_basic_wsiz2x2_in2x2x1x1_nopad) {
     //   18    0.75  7.25   
     //   23    42.5  15.5   
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
@@ -173,7 +173,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_nopad_bfyx) {    //  Filt
     //   18    0.75  7.25   
     //   23    42.5  15.5   
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
@@ -233,7 +233,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_pad1) {
     //  Output:
     //  0.75  
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
@@ -284,7 +284,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride2_nopad) {
     //  Output:
     //  0.75  
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
@@ -349,7 +349,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x1_stride4_pad2) {
     //  0    0    0
     //  6    0   -18
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
@@ -411,7 +411,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_stride2_pad1) {
     //  -3    4.5    0.5   22
     //   13  -17     5    -7
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
@@ -476,7 +476,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1) {
     //  f1: 1    8.5
     //  f1: 17 - 13
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 1, 2, 2 } });
@@ -537,7 +537,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1) {
     //  -3    4.5    0.5   22
     //   13  -17     5    -7
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 2, 2 } });
@@ -599,7 +599,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_stride2_pad1_input_p
     //  -3    4.5    0.5   22
     //   13  -17     5    -7
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
@@ -666,7 +666,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2x2_in2x2x1x1_stride2_pad1_input_padd
     //  f1: 1    8.5
     //  f1: 17 - 13
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 1, 2, 2 } });
@@ -728,7 +728,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) {
     //  -3    4.5    0.5   22
     //   13  -17     5    -7
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
@@ -789,7 +789,7 @@ TEST(deconvolution_f16_fw_gpu, basic_wsiz2x2_in2x2x1x2_bfyx_yxfb_stride2_pad1) {
     //  -3    4.5    0.5   22
     //   13  -17     5    -7
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 1, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
@@ -861,7 +861,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2)
     //  -3    4.5    -8   -28
     //   13  -17     1    -17
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
@@ -906,11 +906,56 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2)
     }
 }
 
+TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group2) {
+    //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
+
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 2, 2 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 1 } });
+
+    set_values(input, { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f });
+    set_values(weights, {
+        -2.f, 2.f, 7.f, -0.5f,
+        -4.f, 1.f, -9.f, -7.f
+    });
+    set_values(biases, { 1.0f, -1.0f });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        data("biases", biases),
+        deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 2, 2 }, { 0, 0, -1, -1 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "deconv");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    auto output_ptr = output_prim.pointer<float>();
+
+    std::vector<float> expected_output_vec = {
+        -3.f, 4.5f, 13.f, -17.f,
+        -8.f, -28.f, 1.f, -17.f
+    };
+
+    for (unsigned int i = 0; i < expected_output_vec.size(); i++)
+    {
+        EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
+    }
+}
+
 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt) {
     //  Test for depthwise separable optimization, there are 16 weights and biases (split 16)
     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
 
-    engine engine; 
+    const auto& engine = get_test_engine(); 
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
     set_values(input, 
@@ -989,11 +1034,93 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_
     }
 }
 
+TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16) {
+    //  Test for depthwise separable optimization, there are 16 joined weights and biases (group 16)
+    //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt
+
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
+    set_values(input,
+    { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
+    });
+
+    topology topology(input_layout("input", input.get_layout()));
+
+    std::vector<primitive_id> weights_vec;
+    std::vector<primitive_id> bias_vec;
+    
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 16, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 16, 1 } });
+
+    set_values(weights, 
+        {
+            -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f
+        }
+    );
+    set_values(biases, { 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f });
+    topology.add(
+        data("weights", weights),
+        data("bias", biases)
+    );
+   
+    topology.add(deconvolution("deconv", "input", { "weights" }, { "bias" }, 16, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "deconv");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    auto output_ptr = output_prim.pointer<float>();
+
+    std::vector<float> expected_output_vec = {
+        -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+    };
+
+    for (unsigned int i = 0; i < expected_output_vec.size(); i++)
+    {
+        EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
+    }
+}
+
 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt_ofm2) {
     //  Test for depthwise separable optimization, there are 16 weights and biases (split 16)
     //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
     set_values(input,
@@ -1072,6 +1199,96 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_
     }
 }
 
+TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_group16_ofm2) {
+    //  Test for depthwise separable optimization, there are 16 joined weights and biases (group 16)
+    //  data is similar as in basic_wsiz2x2_in1x2x2x2_bfyx_stride2_pad1_split2_depthwise_sep_opt_ofm2
+
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 16, 2, 2 } });
+    set_values(input,
+    { 8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f,
+        8.f, 0.5f, 6.f, 9.f, 1.f, 3.f, 2.f, 4.f
+    });
+
+    topology topology(input_layout("input", input.get_layout()));
+
+    std::vector<primitive_id> weights_vec;
+    std::vector<primitive_id> bias_vec;
+    
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 32, 1, 2, 2 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 32, 1 } });
+    
+    set_values(weights,
+        {
+            -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
+            -2.f, 2.f, 7.f, -0.5f, -2.f, 2.f, 7.f, -0.5f,
+            -4.f, 1.f, -9.f, -7.f, -4.f, 1.f, -9.f, -7.f,
+        }
+    );
+
+    set_values(biases,
+        {
+            1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f,
+            1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f
+        }
+    );
+
+    topology.add(
+        data("weights", weights),
+        data("bias", biases)
+    );
+
+    topology.add(deconvolution("deconv", "input", { "weights" }, { "bias" }, 16, { 1, 1, 2, 2 }, { 0, 0, -1, -1 }));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "deconv");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    auto output_ptr = output_prim.pointer<float>();
+
+    std::vector<float> expected_output_vec = {
+        -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+        -3.f, 4.5f, 13.f, -17.f,-3.f, 4.5f, 13.f, -17.f, -8.f, -28.f, 1.f, -17.f, -8.f, -28.f, 1.f, -17.f,
+    };
+
+    for (unsigned int i = 0; i < expected_output_vec.size(); i++)
+    {
+        EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
+    }
+}
+
+
 TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_ofm3) {
     //  Filter : 1x1
     //  Stride : 1x1
@@ -1109,7 +1326,7 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_
     //   6
     //  -2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } });
     auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 2, 1, 1 } });
@@ -1152,4 +1369,52 @@ TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_
     {
         EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
     }
-}
-\ No newline at end of file
+}
+
+TEST(deconvolution_f32_fw_gpu, basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_group2_ofm3) {
+    //  data is similar as in basic_wsiz2x2_in1x6x1x1_bfyx_stride2_pad1_split2_ofm3
+
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 4, 1, 1 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 6, 2, 1, 1 } });
+    auto biases = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 6, 1 } });
+
+    set_values(input, {
+        1.5f, 0.5f, 2.0f, -1.0f
+    });
+    set_values(weights, {
+        -2.0f, 1.0f, 1.0f, 3.0f, 0.5f, 8.0f,
+        4.0f, -4.0f, 2.0f, 0.5f, -0.5f, 3.0f
+    });
+    set_values(biases, {
+        1.0f, 5.0f, 3.0f,
+        -1.0f, 2.5f, 2.0f
+    });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        data("biases", biases),
+        deconvolution("deconv", "input", { "weights" }, { "biases" }, 2, { 1, 1, 1, 1 }, { 0, 0, 0, 0 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "deconv");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    auto output_ptr = output_prim.pointer<float>();
+
+    std::vector<float> expected_output_vec = {
+        -1.5f, 8.0f, 7.75f, 11.0f, 6.0f, -2.0f
+    };
+    for (unsigned int i = 0; i < expected_output_vec.size(); i++)
+    {
+        EXPECT_FLOAT_EQ(expected_output_vec[i], output_ptr[i]);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp
index e0f08ab4a..9ffa10c1e 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_concatenate_gpu_test.cpp
@@ -20,6 +20,10 @@
 #include "api/CPP/memory.hpp"
 #include <api/CPP/input_layout.hpp>
 #include "api/CPP/concatenation.hpp"
+#include "api/CPP/convolution.hpp"
+#include "api/CPP/data.hpp"
+#include "api/CPP/pooling.hpp"
+#include "api/CPP/upsampling.hpp"
 #include <api/CPP/topology.hpp>
 #include <api/CPP/network.hpp>
 #include <api/CPP/engine.hpp>
@@ -63,7 +67,7 @@ TEST(depth_concatenate_f32_gpu, test01) {
     //  0   -0.2  :f4
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input1 = memory::allocate(engine, {data_types::f32, format::yxfb, { 2,2,1,1 }});
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,3,1,1 }});
 
@@ -123,7 +127,7 @@ void concat_basic_with_reorder()
     //  0    0  :f4
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input1 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,2,1,1 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,3,1,1 } });
     auto outs = { 2.0f, 3.0f, 0.0f, 1.0f, 1.0f, 4.0f, -4.0f, -7.0f, 0.0f, 0.0f };
@@ -200,7 +204,7 @@ TEST(depth_concatenate_f32_gpu, test02) {
     //  0   -0.2  :f7
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input1 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,2,1,1 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2,3,1,1 } });
     auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2,3,1,1 } });
@@ -246,12 +250,47 @@ TEST(depth_concatenate_f32_gpu, test02) {
     EXPECT_FLOAT_EQ(-0.2f, output_ptr[15]);
 }
 
+TEST(concatenate_f32_gpu, test_concatenation_of_pool_and_unpool)
+{
+    engine engine;
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
+    auto weights = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 2, 1 } });
+
+    set_values(input1, { 16.0f, 32.0f, 128.0f, 256.0f });
+    set_values(weights, { .1f, .2f });
+    topology topology;
+    topology.add(input_layout("input1", input1.get_layout()));
+    topology.add(pooling("pool1", "input1",
+        cldnn::pooling_mode::max,
+        { 1,1,2,1 },          /*kernel*/
+        { 1,1,1,1 }           /*stride*/
+    ));
+    topology.add(upsampling("unpool1", "input1", 1, 0, upsampling_sample_type::nearest));
+    topology.add(concatenation("concat1", { "pool1", "unpool1" }, cldnn::concatenation::along_x));
+    topology.add(data("weights", weights)),
+    topology.add(convolution("conv", "concat1", { "weights" }));
+
+    cldnn::build_options options;
+    options.set_option(cldnn::build_option::optimize_data(true));
+    network network(engine, topology, options);
+    network.set_input_data("input1", input1);
+
+    auto outputs = network.execute({});
+    auto output = outputs.at("conv").get_memory();
+    std::vector<float> out_ref = { 6.4f, 8.f, 51.2f, 64.f };
+    auto output_ptr = output.pointer<float>();
+    for (int i=0; i<4; i++)
+    {
+        EXPECT_NEAR(output_ptr[i], out_ref[i], 1e-3);
+    }
+}
+
 TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) {
     //  Test for cascade concatenation optimization.
     //  Despite having concatenations one after another and connected to different non padded activation primitives,
     //  graph should remove all concatenations from execution.
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,2,2,1 } });
 
     set_values(input1, { 16.0f, 32.0f, 128.0f, 256.0f });
@@ -305,7 +344,7 @@ TEST(depth_concatenate_f32_gpu, test03_cascade_concat_opt) {
 TEST(depth_concatenate_f32_gpu, test04_fused_relu) {
     // 2 inputs of size 3x10x10 concatenated on f axis with fused relu
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,3,10,10 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,3,10,10 } });
 
@@ -345,11 +384,10 @@ TEST(depth_concatenate_f32_gpu, test04_fused_relu) {
     }
 }
 
-
 TEST(depth_concatenate_f32_gpu, test05_different_formats) {
     // 2 inputs of size 3x10x10 concatenated on f axis 
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1,3,2,2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1,3,2,2 } });
 
@@ -397,6 +435,237 @@ TEST(depth_concatenate_f32_gpu, test05_different_formats) {
 
 }
 
+TEST(depth_concatenate_i32_gpu, optimize_data01) {
+
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    auto input = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,1,1 } });
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(cldnn::concatenation("int1", { "input" }, cldnn::concatenation::along_f));
+    topology.add(cldnn::concatenation("result1", { "int1" }, cldnn::concatenation::along_f));
+    topology.add(cldnn::concatenation("result2", { "int1" }, cldnn::concatenation::along_f));
+
+
+    std::vector<int> input_data = { 4 };
+    std::vector<int> out_data = { 4 };
+    set_values(input, input_data);
+
+    build_opt.set_option(build_option::optimize_data(true));
+    network network(engine, topology, build_opt);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    for (auto& it : outputs)
+    {
+        auto output_ptr = it.second.get_memory().pointer<int>();
+        EXPECT_EQ(output_ptr[0], out_data[0]);
+    }
+}
+
+TEST(depth_concatenate_i32_gpu, optimize_data02) {
+
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
+    auto input2 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
+    auto input3 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
+    auto input4 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
+
+    topology topology;
+    topology.add(
+        input_layout("input1", input1.get_layout())
+    );
+    topology.add(
+        input_layout("input2", input2.get_layout())
+    );
+    topology.add(
+        input_layout("input3", input3.get_layout())
+    );
+    topology.add(
+        input_layout("input4", input4.get_layout())
+    );
+
+    topology.add(cldnn::concatenation("concat1", { "input1", "input2" }, cldnn::concatenation::along_x));
+    topology.add(cldnn::concatenation("concat2", { "input3", "input4" }, cldnn::concatenation::along_x));
+    topology.add(cldnn::concatenation("concat3", { "input2", "input4" }, cldnn::concatenation::along_x));
+
+    topology.add(cldnn::concatenation("concat4", { "concat1", "concat2" }, cldnn::concatenation::along_x));
+    topology.add(cldnn::concatenation("concat5", { "concat2", "concat3" }, cldnn::concatenation::along_x));
+
+    topology.add(cldnn::concatenation("concat6", { "concat4", "concat5" }, cldnn::concatenation::along_x));
+
+    std::vector<int> input_data1 =
+    { 1, 2,
+      3, 4 };
+
+    std::vector<int> input_data2 =
+    { 5, 6,
+      7, 8 };
+
+    std::vector<int> input_data3 =
+    { 9, 10,
+     11, 12 };
+
+    std::vector<int> input_data4 =
+    { 12, 14,
+      15, 16 };
+
+    std::vector<int> c6_data =
+    { 1, 2, 5, 6,  9, 10, 12, 14,  9, 10, 12, 14, 5, 6, 12, 14,
+      3, 4, 7, 8, 11, 12, 15, 16, 11, 12, 15, 16, 7, 8, 15, 16 };
+
+    set_values(input1, input_data1);
+    set_values(input2, input_data2);
+    set_values(input3, input_data3);
+    set_values(input4, input_data4);
+
+    build_opt.set_option(build_option::optimize_data(true));
+    network network(engine, topology, build_opt);
+    network.set_input_data("input1", input1);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+    network.set_input_data("input4", input4);
+    auto outputs = network.execute();
+
+    auto output_concat6 = outputs.at("concat6").get_memory().pointer<int>();
+
+    for (size_t i = 0; i < output_concat6.size(); i++) {
+        EXPECT_EQ(output_concat6[i], c6_data[i]);
+    }
+}
+
+TEST(depth_concatenate_i32_gpu, optimize_data03) {
+
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
+
+    topology topology;
+    topology.add(
+        input_layout("input1", input1.get_layout())
+    );
+
+    topology.add(cldnn::concatenation("concat1", { "input1" }, cldnn::concatenation::along_x));
+
+    topology.add(cldnn::concatenation("concat2", { "concat1" }, cldnn::concatenation::along_x));
+    topology.add(cldnn::concatenation("concat3", { "concat1" }, cldnn::concatenation::along_x));
+
+    topology.add(cldnn::concatenation("concat4", { "concat3" }, cldnn::concatenation::along_x));
+
+    std::vector<int> input_data1 =
+    { 1, 2,
+      3, 4 };
+
+    std::vector<int> output_data =
+    { 1, 2,
+      3, 4 };
+
+    set_values(input1, input_data1);
+
+    build_opt.set_option(build_option::optimize_data(true));
+    network network(engine, topology, build_opt);
+    network.set_input_data("input1", input1);
+
+    auto outputs = network.execute();
+
+    for (auto& it : outputs)
+    {
+        auto output_ptr = it.second.get_memory().pointer<int>();
+        for (size_t i = 0; i < output_ptr.size(); i++) {
+            EXPECT_EQ(output_ptr[i], output_data[i]);
+        }
+    }
+}
+
+TEST(depth_concatenate_i32_gpu, optimize_data04) {
+
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
+
+    topology topology;
+    topology.add(
+        input_layout("input1", input1.get_layout())
+    );
+
+    topology.add(cldnn::concatenation("concat1", { "input1" }, cldnn::concatenation::along_x));
+
+    topology.add(cldnn::concatenation("concat2", { "concat1" }, cldnn::concatenation::along_x));
+    topology.add(cldnn::concatenation("concat3", { "concat1" }, cldnn::concatenation::along_x));
+
+    topology.add(cldnn::concatenation("concat4", { "concat2", "concat3" }, cldnn::concatenation::along_x));
+
+    std::vector<int> input_data1 =
+    { 1, 2,
+      3, 4 };
+
+    std::vector<int> output_data =
+    { 1, 2, 1, 2,
+      3, 4, 3, 4 };
+
+    set_values(input1, input_data1);
+
+    build_opt.set_option(build_option::optimize_data(true));
+    network network(engine, topology, build_opt);
+    network.set_input_data("input1", input1);
+
+    auto outputs = network.execute();
+
+    for (auto& it : outputs)
+    {
+        auto output_ptr = it.second.get_memory().pointer<int>();
+        for (size_t i = 0; i < output_ptr.size(); i++) {
+            EXPECT_EQ(output_ptr[i], output_data[i]);
+        }
+    }
+}
+
+TEST(depth_concatenate_i32_gpu, optimize_data05) {
+
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    auto input1 = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1,1,2,2 } });
+
+    topology topology;
+    topology.add(
+        input_layout("input1", input1.get_layout())
+    );
+
+    topology.add(cldnn::concatenation("concat1", { "input1" }, cldnn::concatenation::along_x));
+
+    topology.add(cldnn::concatenation("concat2", { "concat1" }, cldnn::concatenation::along_x));
+    topology.add(cldnn::concatenation("concat3", { "concat1" }, cldnn::concatenation::along_x));
+
+    topology.add(cldnn::concatenation("concat4", { "concat2", "concat3" }, cldnn::concatenation::along_x));
+    topology.add(cldnn::concatenation("concat5", { "concat1", "concat4" }, cldnn::concatenation::along_x));
+
+    std::vector<int> input_data1 =
+    { 1, 2,
+      3, 4 };
+
+    std::vector<int> c5_data =
+    { 1, 2, 1, 2, 1, 2,
+      3, 4, 3, 4, 3, 4 };
+
+    set_values(input1, input_data1);
+
+    build_opt.set_option(build_option::optimize_data(true));
+    network network(engine, topology, build_opt);
+    network.set_input_data("input1", input1);
+
+    auto outputs = network.execute();
+
+    auto output_concat5 = outputs.at("concat5").get_memory().pointer<int>();
+
+    for (size_t i = 0; i < output_concat5.size(); i++) {
+        EXPECT_EQ(output_concat5[i], c5_data[i]);
+    }
+}
+
 //////////////////////////////////////////////////////////////////////////////
 //                                                                          //
 //                      Exhaustive Negative Matrix tests                    //
@@ -409,7 +678,7 @@ static network setup_depth_concatatenate_network(const std::vector<data_types> d
     assert(dts.size() == ts.size());
     const size_t sz = ts.size();
 
-    engine engine;
+    const auto& engine = get_test_engine();
     topology topology;
 
     std::vector<std::string> input_names;
@@ -504,7 +773,9 @@ public:
     {
         std::vector<tests::test_params*> all_generic_params;
 
-        for (cldnn::data_types dt : test_data_types())
+        auto data_types = test_data_types();
+
+        for (cldnn::data_types dt : data_types)
         for (int32_t b : test_batch_sizes)
         for (tensor & t : test_input_sizes)
         {
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp
new file mode 100644
index 000000000..49e8dcbb6
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/depth_to_space_gpu_test.cpp
@@ -0,0 +1,308 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+
+#include <api/CPP/input_layout.hpp>
+#include <api/CPP/memory.hpp>
+#include <api/CPP/depth_to_space.hpp>
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+
+#include <cstddef>
+#include <tests/test_utils/test_utils.h>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(depth_to_space_fp16_gpu, d1411_bs2) {
+    //  Input  : 1x4x1x1
+    //  Block size : 2
+    //  Output : 1x1x2x2
+    //  Input values in fp16
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 4, 1, 1 } });
+    size_t block_size = 2;
+
+    set_values(input1, {
+        FLOAT16(0.0f), FLOAT16(1.0f),
+        FLOAT16(2.0f), FLOAT16(3.0f)
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input1.get_layout()));
+    topology.add(
+        depth_to_space("depth_to_space", "Input0", block_size)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input1);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("depth_to_space").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+        0.f, 1.f, 2.f, 3.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(depth_to_space_fp16_gpu, d1421_bs2) {
+    //  Input  : 1x4x2x1
+    //  Block size : 2
+    //  Output : 1x1x4x2
+    //  Input values in fp16
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 4, 1, 2 } });
+    size_t block_size = 2;
+
+    set_values(input1, {
+        FLOAT16(0.0f), FLOAT16(1.0f),
+        FLOAT16(2.0f), FLOAT16(3.0f),
+        FLOAT16(4.0f), FLOAT16(5.0f),
+        FLOAT16(6.0f), FLOAT16(7.0f)
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input1.get_layout()));
+    topology.add(
+        depth_to_space("depth_to_space", "Input0", block_size)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input1);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("depth_to_space").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+        0.0f, 2.0f, 4.0f, 6.0f, 1.0f, 3.0f, 5.0f, 7.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(depth_to_space_fp16_gpu, d1933_bs3) {
+    //  Input  : 1x9x3x3
+    //  Block size : 3
+    //  Output : 1x1x9x9
+    //  Input values in fp16
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 1, 9, 3, 3 } });
+    size_t block_size = 3;
+
+    set_values(input1, {
+        FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f),
+        FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
+        FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f),
+        FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
+        FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f), FLOAT16(24.0f),
+        FLOAT16(25.0f), FLOAT16(26.0f), FLOAT16(27.0f), FLOAT16(28.0f), FLOAT16(29.0f),
+        FLOAT16(30.0f), FLOAT16(31.0f), FLOAT16(32.0f), FLOAT16(33.0f), FLOAT16(34.0f),
+        FLOAT16(35.0f), FLOAT16(36.0f), FLOAT16(37.0f), FLOAT16(38.0f), FLOAT16(39.0f),
+        FLOAT16(40.0f), FLOAT16(41.0f), FLOAT16(42.0f), FLOAT16(43.0f), FLOAT16(44.0f),
+        FLOAT16(45.0f), FLOAT16(46.0f), FLOAT16(47.0f), FLOAT16(48.0f), FLOAT16(49.0f),
+        FLOAT16(50.0f), FLOAT16(51.0f), FLOAT16(52.0f), FLOAT16(53.0f), FLOAT16(54.0f),
+        FLOAT16(55.0f), FLOAT16(56.0f), FLOAT16(57.0f), FLOAT16(58.0f), FLOAT16(59.0f),
+        FLOAT16(60.0f), FLOAT16(61.0f), FLOAT16(62.0f), FLOAT16(63.0f), FLOAT16(64.0f),
+        FLOAT16(65.0f), FLOAT16(66.0f), FLOAT16(67.0f), FLOAT16(68.0f), FLOAT16(69.0f),
+        FLOAT16(70.0f), FLOAT16(71.0f), FLOAT16(72.0f), FLOAT16(73.0f), FLOAT16(74.0f),
+        FLOAT16(75.0f), FLOAT16(76.0f), FLOAT16(77.0f), FLOAT16(78.0f), FLOAT16(79.0f),
+        FLOAT16(80.0f)
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input1.get_layout()));
+    topology.add(
+            depth_to_space("depth_to_space", "Input0", block_size)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input1);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("depth_to_space").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+        0.0f, 9.0f, 18.0f, 1.0f, 10.0f, 19.0f, 2.0f, 11.0f, 20.0f, 27.0f,
+        36.0f, 45.0f, 28.0f, 37.0f, 46.0f, 29.0f, 38.0f, 47.0f, 54.0f, 63.0f,
+        72.0f, 55.0f, 64.0f, 73.0f, 56.0f, 65.0f, 74.0f, 3.0f, 12.0f, 21.0f,
+        4.0f, 13.0f, 22.0f, 5.0f, 14.0f, 23.0f, 30.0f, 39.0f, 48.0f, 31.0f,
+        40.0f, 49.0f, 32.0f, 41.0f, 50.0f, 57.0f, 66.0f, 75.0f, 58.0f, 67.0f,
+        76.0f, 59.0f, 68.0f, 77.0f, 6.0f, 15.0f, 24.0f, 7.0f, 16.0f, 25.0f,
+        8.0f, 17.0f, 26.0f, 33.0f, 42.0f, 51.0f, 34.0f, 43.0f, 52.0f, 35.0f,
+        44.0f, 53.0f, 60.0f, 69.0f, 78.0f, 61.0f, 70.0f, 79.0f, 62.0f, 71.0f,
+        80.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(depth_to_space_fp32_gpu, d1411_bs2) {
+    //  Input  : 1x4x1x1
+    //  Block size : 2
+    //  Output : 1x1x2x2
+    //  Input values in fp32
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } });
+    size_t block_size = 2;
+
+    set_values(input1, {
+        0.f, 1.f, 2.f, 3.f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input1.get_layout()));
+    topology.add(
+        depth_to_space("depth_to_space", "Input0", block_size)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input1);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("depth_to_space").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+        0.f, 1.f, 2.f, 3.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(depth_to_space_fp32_gpu, d1421_bs2) {
+    //  Input  : 1x4x2x1
+    //  Block size : 2
+    //  Output : 1x1x4x2
+    //  Input values in fp32
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 2 } });
+    size_t block_size = 2;
+
+    set_values(input1, {
+        0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input1.get_layout()));
+    topology.add(
+        depth_to_space("depth_to_space", "Input0", block_size)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input1);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("depth_to_space").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+        0.f, 2.f, 4.f, 6.f, 1.f, 3.f, 5.f, 7.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(depth_to_space_fp32_gpu, d1933_bs3) {
+    //  Input  : 1x9x3x3
+    //  Block size : 3
+    //  Output : 1x1x9x9
+    //  Input values in fp32
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 9, 3, 3 } });
+    size_t block_size = 3;
+
+    set_values(input1, {
+        0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+        10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+        20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f,
+        30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
+        40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
+        50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f,
+        60.0f, 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f,
+        70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f,
+        80.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input1.get_layout()));
+    topology.add(
+        depth_to_space("depth_to_space", "Input0", block_size)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input1);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("depth_to_space").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+        0.0f, 9.0f, 18.0f, 1.0f, 10.0f, 19.0f, 2.0f, 11.0f, 20.0f, 27.0f,
+        36.0f, 45.0f, 28.0f, 37.0f, 46.0f, 29.0f, 38.0f, 47.0f, 54.0f, 63.0f,
+        72.0f, 55.0f, 64.0f, 73.0f, 56.0f, 65.0f, 74.0f, 3.0f, 12.0f, 21.0f,
+        4.0f, 13.0f, 22.0f, 5.0f, 14.0f, 23.0f, 30.0f, 39.0f, 48.0f, 31.0f,
+        40.0f, 49.0f, 32.0f, 41.0f, 50.0f, 57.0f, 66.0f, 75.0f, 58.0f, 67.0f,
+        76.0f, 59.0f, 68.0f, 77.0f, 6.0f, 15.0f, 24.0f, 7.0f, 16.0f, 25.0f,
+        8.0f, 17.0f, 26.0f, 33.0f, 42.0f, 51.0f, 34.0f, 43.0f, 52.0f, 35.0f,
+        44.0f, 53.0f, 60.0f, 69.0f, 78.0f, 61.0f, 70.0f, 79.0f, 62.0f, 71.0f,
+        80.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp
index df2799a45..ff920a9c9 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/detection_output_test.cpp
@@ -123,6 +123,33 @@ public:
         }
     }
 
+    void init_buffer_sort(cldnn::memory input_buff)
+    {
+        auto input_data_ptr = input_buff.pointer<T>();
+
+        EXPECT_EQ((int)input_buff.count(), 128);
+
+        T* input_data = input_data_ptr.data();
+        input_data[0] = 8;
+        input_data[1] = 3;
+        input_data[16] = 0; input_data[17] = 0; input_data[18] = 0.6f; input_data[19] = 0.55f; input_data[20] = 0.55f; input_data[21] = 0.85f; input_data[22] = 0.85f;
+        input_data[23] = 0; input_data[24] = 0; input_data[25] = 0.4f; input_data[26] = 0.15f; input_data[27] = 0.55f; input_data[28] = 0.45f; input_data[29] = 0.85f;
+        input_data[30] = 0; input_data[31] = 0; input_data[32] = 0.2f; input_data[33] = 0.55f; input_data[34] = 0.15f; input_data[35] = 0.85f; input_data[36] = 0.45f;
+        input_data[37] = 0; input_data[38] = 0; input_data[39] = 0.0f; input_data[40] = 0.15f; input_data[41] = 0.15f; input_data[42] = 0.45f; input_data[43] = 0.45f;
+        input_data[44] = 0; input_data[45] = 1; input_data[46] = 1.0f; input_data[47] = 0.20f; input_data[48] = 0.20f; input_data[49] = 0.50f; input_data[50] = 0.50f;
+        input_data[51] = 0; input_data[52] = 1; input_data[53] = 0.8f; input_data[54] = 0.50f; input_data[55] = 0.20f; input_data[56] = 0.80f; input_data[57] = 0.50f;
+        input_data[58] = 0; input_data[59] = 1; input_data[60] = 0.6f; input_data[61] = 0.20f; input_data[62] = 0.50f; input_data[63] = 0.50f; input_data[64] = 0.80f;
+        input_data[65] = 0; input_data[66] = 1; input_data[67] = 0.4f; input_data[68] = 0.50f; input_data[69] = 0.50f; input_data[70] = 0.80f; input_data[71] = 0.80f;
+        input_data[72] = 1; input_data[73] = 0; input_data[74] = 1.0f; input_data[75] = 0.25f; input_data[76] = 0.25f; input_data[77] = 0.55f; input_data[78] = 0.55f;
+        input_data[79] = 1; input_data[80] = 0; input_data[81] = 0.4f; input_data[82] = 0.45f; input_data[83] = 0.45f; input_data[84] = 0.75f; input_data[85] = 0.75f;
+        input_data[86] = -1; input_data[87] = 0; input_data[88] = 0; input_data[89] = 0; input_data[90] = 0; input_data[91] = 0; input_data[92] = 0;
+        input_data[93] = -1; input_data[94] = 0; input_data[95] = 0; input_data[96] = 0; input_data[97] = 0; input_data[98] = 0; input_data[99] = 0;
+        input_data[100] = 1; input_data[101] = 1; input_data[102] = 0.6f; input_data[103] = 0.40f; input_data[104] = 0.40f; input_data[105] = 0.70f; input_data[106] = 0.70f;
+        input_data[107] = -1; input_data[108] = 0; input_data[109] = 0; input_data[110] = 0; input_data[111] = 0; input_data[112] = 0; input_data[113] = 0;
+        input_data[114] = -1; input_data[115] = 0; input_data[116] = 0; input_data[117] = 0; input_data[118] = 0; input_data[119] = 0; input_data[120] = 0;
+        input_data[121] = -1; input_data[122] = 0; input_data[123] = 0; input_data[124] = 0; input_data[125] = 0; input_data[126] = 0; input_data[127] = 0;
+    }
+
     void check_results(const memory& output, const int num, const std::string values)
     {
         assert(num < output.get_layout().size.spatial[1]);
@@ -145,526 +172,817 @@ public:
             EXPECT_TRUE(floating_point_equal(data[num * output.get_layout().size.spatial[0] + i], (T)(float)atof(items[i].c_str())));
         }
     }
-    static const int num_of_images = 2;
-    static const int num_classes = 2;
-    static const int num_priors = 4;
-    static const int img_size = 300;
-    const float nms_threshold;
-};
 
-typedef ::testing::Types<float, FLOAT16> detection_output_test_types;
-TYPED_TEST_CASE(detection_output_test, detection_output_test_types);
+    void setup_basic(bool runOnGPU)
+    {
+        const bool share_location = true;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 150;
 
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
 
-TYPED_TEST(detection_output_test, test_setup_basic)
-{
-    const bool share_location = true;
-    const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 150;
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
 
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4} });
+        topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k));
 
-    topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
 
-    topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k));
-    network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
 
-    auto outputs = network.execute();
-    
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
-    
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
-}
+        auto outputs = network.execute();
 
-TYPED_TEST(detection_output_test, test_forward_share_location)
-{
-    const bool share_location = true;
-    const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 4;
-    const int background_label_id = 0;
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
 
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4} });
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+    }
 
-    this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+    void setup_two_layers(bool runOnGPU)
+    {
+        const bool share_location = true;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 150;
 
-    topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
 
-    topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
-    network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
 
-    auto outputs = network.execute();
+        topology.add(detection_output("detection_output_1", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k));
+        topology.add(detection_output("detection_output_2", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k));
 
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
 
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
 
-    auto output_prim = outputs.begin()->second.get_memory();
-    
-    this->check_results(output_prim, 0, "0 1 1.0 0.15 0.15 0.45 0.45");
-    this->check_results(output_prim, 1, "0 1 0.8 0.55 0.15 0.85 0.45");
-    this->check_results(output_prim, 2, "0 1 0.6 0.15 0.55 0.45 0.85");
-    this->check_results(output_prim, 3, "0 1 0.4 0.55 0.55 0.85 0.85");
-    this->check_results(output_prim, 4, "1 1 0.6 0.45 0.45 0.75 0.75");
-    this->check_results(output_prim, 5, "1 1 0.0 0.25 0.25 0.55 0.55");
-    this->check_results(output_prim, 6, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 7, "-1 0 0 0 0 0 0");
-}
+        auto outputs = network.execute();
 
-TYPED_TEST(detection_output_test, test_forward_num_detections_greater_than_keep_top_k)
-{
-    const bool share_location = true;
-    const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 1;
-    const int background_label_id = 0;
+        EXPECT_EQ(outputs.size(), size_t(2));
+        unsigned i = 1;
+        for (auto it = outputs.begin(); it != outputs.begin(); it++)
+        {
 
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4} });
+            EXPECT_EQ(it->first, "detection_output_" + std::to_string(i));
 
-    this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+            EXPECT_EQ(it->second.get_memory().get_layout().size.batch[0], 1);
+            EXPECT_EQ(it->second.get_memory().get_layout().size.feature[0], 1);
+            EXPECT_EQ(it->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+            EXPECT_EQ(it->second.get_memory().get_layout().size.spatial[0], 7);
+            i++;
+        }
+    }
 
-    topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+    void forward_share_location(bool runOnGPU)
+    {
+        const bool share_location = true;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 4;
+        const int background_label_id = 0;
 
-    topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
-    network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
 
-    auto outputs = network.execute();
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
 
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
 
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+        topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
 
-    auto output_prim = outputs.begin()->second.get_memory();
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
 
-    this->check_results(output_prim, 0, "0 1 1.0 0.15 0.15 0.45 0.45");
-    this->check_results(output_prim, 1, "1 1 0.6 0.45 0.45 0.75 0.75");
-}
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
 
-TYPED_TEST(detection_output_test, test_forward_num_detections_smaller_than_keep_top_k)
-{
-    const bool share_location = true;
-    const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 6;
-    const int background_label_id = 0;
+        auto outputs = network.execute();
 
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4} });
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
 
-    this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
 
-    topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+        auto output_prim = outputs.begin()->second.get_memory();
 
-    topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
-    network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+        check_results(output_prim, 0, "0 1 1.0 0.15 0.15 0.45 0.45");
+        check_results(output_prim, 1, "0 1 0.8 0.55 0.15 0.85 0.45");
+        check_results(output_prim, 2, "0 1 0.6 0.15 0.55 0.45 0.85");
+        check_results(output_prim, 3, "0 1 0.4 0.55 0.55 0.85 0.85");
+        check_results(output_prim, 4, "1 1 0.6 0.45 0.45 0.75 0.75");
+        check_results(output_prim, 5, "1 1 0.0 0.25 0.25 0.55 0.55");
+        check_results(output_prim, 6, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 7, "-1 0 0 0 0 0 0");
+    }
 
-    auto outputs = network.execute();
+    void forward_num_detections_greater_than_keep_top_k(bool runOnGPU)
+    {
+        const bool share_location = true;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 1;
+        const int background_label_id = 0;
 
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
 
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
 
-    auto output_prim = outputs.begin()->second.get_memory();
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
 
-    this->check_results(output_prim, 0, "0 1 1.0 0.15 0.15 0.45 0.45");
-    this->check_results(output_prim, 1, "0 1 0.8 0.55 0.15 0.85 0.45");
-    this->check_results(output_prim, 2, "0 1 0.6 0.15 0.55 0.45 0.85");
-    this->check_results(output_prim, 3, "0 1 0.4 0.55 0.55 0.85 0.85");
-    this->check_results(output_prim, 4, "1 1 0.6 0.45 0.45 0.75 0.75");
-    this->check_results(output_prim, 5, "1 1 0.0 0.25 0.25 0.55 0.55");
-    this->check_results(output_prim, 6, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 7, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 8, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 9, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 10, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 11, "-1 0 0 0 0 0 0");
-}
+        topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
 
-TYPED_TEST(detection_output_test, test_forward_share_location_top_k)
-{
-    const bool share_location = true;
-    const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 2;
-    const int top_k = 2;
-    const int background_label_id = 0;
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
 
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
 
-    this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+        auto outputs = network.execute();
 
-    topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
 
-    topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
-    network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
 
-    auto outputs = network.execute();
+        auto output_prim = outputs.begin()->second.get_memory();
 
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
+        check_results(output_prim, 0, "0 1 1.0 0.15 0.15 0.45 0.45");
+        check_results(output_prim, 1, "1 1 0.6 0.45 0.45 0.75 0.75");
+    }
 
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+    void forward_num_detections_smaller_than_keep_top_k(bool runOnGPU)
+    {
+        const bool share_location = true;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 6;
+        const int background_label_id = 0;
 
-    auto output_prim = outputs.begin()->second.get_memory();
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
 
-    this->check_results(output_prim, 0, "0 1 1.0 0.15 0.15 0.45 0.45");
-    this->check_results(output_prim, 1, "0 1 0.8 0.55 0.15 0.85 0.45");
-    this->check_results(output_prim, 2, "1 1 0.6 0.45 0.45 0.75 0.75");
-    this->check_results(output_prim, 3, "-1 0 0 0 0 0 0");
-}
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
 
-TYPED_TEST(detection_output_test, test_forward_no_share_location)
-{
-    const bool share_location = false;
-    const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 10;
-    const int background_label_id = -1;
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
 
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
+        topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
 
-    this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
 
-    topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
+
+        auto outputs = network.execute();
+
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
+
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+
+        auto output_prim = outputs.begin()->second.get_memory();
+
+        check_results(output_prim, 0, "0 1 1.0 0.15 0.15 0.45 0.45");
+        check_results(output_prim, 1, "0 1 0.8 0.55 0.15 0.85 0.45");
+        check_results(output_prim, 2, "0 1 0.6 0.15 0.55 0.45 0.85");
+        check_results(output_prim, 3, "0 1 0.4 0.55 0.55 0.85 0.85");
+        check_results(output_prim, 4, "1 1 0.6 0.45 0.45 0.75 0.75");
+        check_results(output_prim, 5, "1 1 0.0 0.25 0.25 0.55 0.55");
+        check_results(output_prim, 6, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 7, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 8, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 9, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 10, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 11, "-1 0 0 0 0 0 0");
+    }
 
-    topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
-    network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+    void test_forward_share_location_top_k(bool runOnGPU)
+    {
+        const bool share_location = true;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 2;
+        const int top_k = 2;
+        const int background_label_id = 0;
 
-    auto outputs = network.execute();
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
 
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
 
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
 
-    auto output_prim = outputs.begin()->second.get_memory();
+        topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
 
-    this->check_results(output_prim, 0, "0 0 0.6 0.55 0.55 0.85 0.85");
-    this->check_results(output_prim, 1, "0 0 0.4 0.15 0.55 0.45 0.85");
-    this->check_results(output_prim, 2, "0 0 0.2 0.55 0.15 0.85 0.45");
-    this->check_results(output_prim, 3, "0 0 0.0 0.15 0.15 0.45 0.45");
-    this->check_results(output_prim, 4, "0 1 1.0 0.20 0.20 0.50 0.50");
-    this->check_results(output_prim, 5, "0 1 0.8 0.50 0.20 0.80 0.50");
-    this->check_results(output_prim, 6, "0 1 0.6 0.20 0.50 0.50 0.80");
-    this->check_results(output_prim, 7, "0 1 0.4 0.50 0.50 0.80 0.80");
-    this->check_results(output_prim, 8, "1 0 1.0 0.25 0.25 0.55 0.55");
-    this->check_results(output_prim, 9, "1 0 0.4 0.45 0.45 0.75 0.75");
-    this->check_results(output_prim, 10, "1 1 0.6 0.40 0.40 0.70 0.70");
-    this->check_results(output_prim, 11, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 12, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 13, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 14, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 15, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 16, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 17, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 18, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 19, "-1 0 0 0 0 0 0");
-}
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
 
-TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k)
-{
-    const bool share_location = false;
-    const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 4;
-    const int background_label_id = -1;
-    const int top_k = 2;
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
 
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
+        auto outputs = network.execute();
 
-    this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
 
-    topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
 
-    topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
-    network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+        auto output_prim = outputs.begin()->second.get_memory();
 
-    auto outputs = network.execute();
+        check_results(output_prim, 0, "0 1 1.0 0.15 0.15 0.45 0.45");
+        check_results(output_prim, 1, "0 1 0.8 0.55 0.15 0.85 0.45");
+        check_results(output_prim, 2, "1 1 0.6 0.45 0.45 0.75 0.75");
+        check_results(output_prim, 3, "-1 0 0 0 0 0 0");
+    }
 
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
+    void forward_no_share_location(bool runOnGPU)
+    {
+        const bool share_location = false;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 10;
+        const int background_label_id = -1;
 
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
 
-    auto output_prim = outputs.begin()->second.get_memory();
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
 
-    this->check_results(output_prim, 0, "0 0 0.6 0.55 0.55 0.85 0.85");
-    this->check_results(output_prim, 1, "0 0 0.4 0.15 0.55 0.45 0.85");
-    this->check_results(output_prim, 2, "0 1 1.0 0.20 0.20 0.50 0.50");
-    this->check_results(output_prim, 3, "0 1 0.8 0.50 0.20 0.80 0.50");
-    this->check_results(output_prim, 4, "1 0 1.0 0.25 0.25 0.55 0.55");
-    this->check_results(output_prim, 5, "1 1 0.6 0.40 0.40 0.70 0.70");
-    this->check_results(output_prim, 6, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 7, "-1 0 0 0 0 0 0");
-}
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
 
-TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0)
-{
-    const bool share_location = false;
-    const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 5;
-    const int background_label_id = 0;
+        topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
 
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
 
-    this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
+
+        auto outputs = network.execute();
+
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
+
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+
+        auto output_prim = outputs.begin()->second.get_memory();
+
+        check_results(output_prim, 0, "0 0 0.6 0.55 0.55 0.85 0.85");
+        check_results(output_prim, 1, "0 0 0.4 0.15 0.55 0.45 0.85");
+        check_results(output_prim, 2, "0 0 0.2 0.55 0.15 0.85 0.45");
+        check_results(output_prim, 3, "0 0 0.0 0.15 0.15 0.45 0.45");
+        check_results(output_prim, 4, "0 1 1.0 0.20 0.20 0.50 0.50");
+        check_results(output_prim, 5, "0 1 0.8 0.50 0.20 0.80 0.50");
+        check_results(output_prim, 6, "0 1 0.6 0.20 0.50 0.50 0.80");
+        check_results(output_prim, 7, "0 1 0.4 0.50 0.50 0.80 0.80");
+        check_results(output_prim, 8, "1 0 1.0 0.25 0.25 0.55 0.55");
+        check_results(output_prim, 9, "1 0 0.4 0.45 0.45 0.75 0.75");
+        check_results(output_prim, 10, "1 1 0.6 0.40 0.40 0.70 0.70");
+        check_results(output_prim, 11, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 12, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 13, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 14, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 15, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 16, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 17, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 18, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 19, "-1 0 0 0 0 0 0");
+    }
 
-    topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+    void forward_no_share_location_top_k(bool runOnGPU)
+    {
+        const bool share_location = false;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 4;
+        const int background_label_id = -1;
+        const int top_k = 2;
 
-    topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
-    network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
 
-    auto outputs = network.execute();
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
 
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
 
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+        topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
 
-    auto output_prim = outputs.begin()->second.get_memory();
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
 
-    this->check_results(output_prim, 0, "0 1 1.0 0.20 0.20 0.50 0.50");
-    this->check_results(output_prim, 1, "0 1 0.8 0.50 0.20 0.80 0.50");
-    this->check_results(output_prim, 2, "0 1 0.6 0.20 0.50 0.50 0.80");
-    this->check_results(output_prim, 3, "0 1 0.4 0.50 0.50 0.80 0.80");
-    this->check_results(output_prim, 4, "1 1 0.6 0.40 0.40 0.70 0.70");
-    this->check_results(output_prim, 5, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 6, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 7, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 8, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 9, "-1 0 0 0 0 0 0");
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
+
+        auto outputs = network.execute();
+
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
+
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+
+        auto output_prim = outputs.begin()->second.get_memory();
+
+        check_results(output_prim, 0, "0 0 0.6 0.55 0.55 0.85 0.85");
+        check_results(output_prim, 1, "0 0 0.4 0.15 0.55 0.45 0.85");
+        check_results(output_prim, 2, "0 1 1.0 0.20 0.20 0.50 0.50");
+        check_results(output_prim, 3, "0 1 0.8 0.50 0.20 0.80 0.50");
+        check_results(output_prim, 4, "1 0 1.0 0.25 0.25 0.55 0.55");
+        check_results(output_prim, 5, "1 1 0.6 0.40 0.40 0.70 0.70");
+        check_results(output_prim, 6, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 7, "-1 0 0 0 0 0 0");
+    }
+
+    void forward_no_share_location_neg_0(bool runOnGPU)
+    {
+        const bool share_location = false;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 5;
+        const int background_label_id = 0;
+
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
+
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+
+        topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold));
+
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
+
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
+
+        auto outputs = network.execute();
+
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
+
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+
+        auto output_prim = outputs.begin()->second.get_memory();
+
+        check_results(output_prim, 0, "0 1 1.0 0.20 0.20 0.50 0.50");
+        check_results(output_prim, 1, "0 1 0.8 0.50 0.20 0.80 0.50");
+        check_results(output_prim, 2, "0 1 0.6 0.20 0.50 0.50 0.80");
+        check_results(output_prim, 3, "0 1 0.4 0.50 0.50 0.80 0.80");
+        check_results(output_prim, 4, "1 1 0.6 0.40 0.40 0.70 0.70");
+        check_results(output_prim, 5, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 6, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 7, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 8, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 9, "-1 0 0 0 0 0 0");
+    }
+
+    void forward_no_share_location_neg_0_top_k(bool runOnGPU)
+    {
+        const bool share_location = false;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 2;
+        const int background_label_id = 0;
+        const int top_k = 2;
+
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
+
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+
+        topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
+
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
+
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
+
+        auto outputs = network.execute();
+
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
+
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+
+        auto output_prim = outputs.begin()->second.get_memory();
+
+        check_results(output_prim, 0, "0 1 1.0 0.20 0.20 0.50 0.50");
+        check_results(output_prim, 1, "0 1 0.8 0.50 0.20 0.80 0.50");
+        check_results(output_prim, 2, "1 1 0.6 0.40 0.40 0.70 0.70");
+        check_results(output_prim, 3, "-1 0 0 0 0 0 0");
+    }
+
+    void forward_no_share_location_top_k_input_padding(bool runOnGPU)
+    {
+        const bool share_location = false;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 4;
+        const int background_label_id = -1;
+        const int top_k = 2;
+
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
+
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+        topology.add(reorder("input_location_padded", "input_location", input_location.get_layout().with_padding({ { 0, 0, 12, 3 },{ 0, 0, 5, 11 } })));
+        topology.add(reorder("input_confidence_padded", "input_confidence", input_location.get_layout().with_padding({ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } })));
+
+        topology.add(detection_output("detection_output", "input_location_padded", "input_confidence_padded", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
+
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
+
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
+
+        auto outputs = network.execute();
+
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
+
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+
+        auto output_prim = outputs.begin()->second.get_memory();
+
+        check_results(output_prim, 0, "0 0 0.6 0.55 0.55 0.85 0.85");
+        check_results(output_prim, 1, "0 0 0.4 0.15 0.55 0.45 0.85");
+        check_results(output_prim, 2, "0 1 1.0 0.20 0.20 0.50 0.50");
+        check_results(output_prim, 3, "0 1 0.8 0.50 0.20 0.80 0.50");
+        check_results(output_prim, 4, "1 0 1.0 0.25 0.25 0.55 0.55");
+        check_results(output_prim, 5, "1 1 0.6 0.40 0.40 0.70 0.70");
+        check_results(output_prim, 6, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 7, "-1 0 0 0 0 0 0");
+    }
+
+    void test_forward_no_share_location_top_k_faster_rcnn_case(bool runOnGPU)
+    {
+        const bool share_location = false;
+        const int num_loc_classes = share_location ? 1 : this->num_classes;
+        const int keep_top_k = 4;
+        const int background_label_id = -1;
+        const int top_k = 2;
+        const float eta = 1.0f;
+        const prior_box_code_type code_type = prior_box_code_type::corner;
+        const bool variance_encoded_in_target = true;
+        const float confidence_threshold = -std::numeric_limits<float>::max();
+        const int32_t prior_info_size = 5;
+        const int32_t prior_coordinates_offset = 1;
+        const bool prior_is_normalized = true;
+
+        const auto& engine = get_test_engine();
+        cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
+        cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
+        cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1, 1, 1, this->num_priors * prior_info_size } });
+
+        this->init_buffers(input_prior_box, input_confidence, input_location, share_location, variance_encoded_in_target,
+            prior_info_size, prior_coordinates_offset, prior_is_normalized);
+
+        topology topology;
+        topology.add(input_layout("input_location", input_location.get_layout()));
+        topology.add(input_layout("input_confidence", input_confidence.get_layout()));
+        topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+        topology.add(reorder("input_location_padded", "input_location", input_location.get_layout().with_padding({ { 0, 0, 12, 3 },{ 0, 0, 5, 11 } })));
+        topology.add(reorder("input_confidence_padded", "input_confidence", input_location.get_layout().with_padding({ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } })));
+
+        topology.add(detection_output("detection_output", "input_location_padded", "input_confidence_padded", "input_prior_box",
+            this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k,
+            eta, code_type, variance_encoded_in_target, confidence_threshold, prior_info_size, prior_coordinates_offset,
+            prior_is_normalized, this->img_size, this->img_size
+        ));
+
+        build_options opts;
+        if (runOnGPU)
+        {
+            opts.set_option(build_option::detection_output_gpu(true));
+        }
+
+        network network(engine, topology, opts);
+        network.set_input_data("input_location", input_location);
+        network.set_input_data("input_confidence", input_confidence);
+        network.set_input_data("input_prior_box", input_prior_box);
+
+        auto outputs = network.execute();
+
+        EXPECT_EQ(outputs.size(), size_t(1));
+        EXPECT_EQ(outputs.begin()->first, "detection_output");
+
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
+        EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+
+        auto output_prim = outputs.begin()->second.get_memory();
+
+        check_results(output_prim, 0, "0 0 0.6 0.55 0.55 0.85 0.85");
+        check_results(output_prim, 1, "0 0 0.4 0.15 0.55 0.45 0.85");
+        check_results(output_prim, 2, "0 1 1.0 0.20 0.20 0.50 0.50");
+        check_results(output_prim, 3, "0 1 0.8 0.50 0.20 0.80 0.50");
+        check_results(output_prim, 4, "1 0 1.0 0.25 0.25 0.55 0.55");
+        check_results(output_prim, 5, "1 1 0.6 0.40 0.40 0.70 0.70");
+        check_results(output_prim, 6, "-1 0 0 0 0 0 0");
+        check_results(output_prim, 7, "-1 0 0 0 0 0 0");
+    }
+
+    static const int num_of_images = 2;
+    static const int num_classes = 2;
+    static const int num_priors = 4;
+    static const int img_size = 300;
+    const float nms_threshold;
+};
+
+typedef ::testing::Types<float, FLOAT16> detection_output_test_types;
+TYPED_TEST_CASE(detection_output_test, detection_output_test_types);
+
+
+TYPED_TEST(detection_output_test, test_setup_basic)
+{
+    this->setup_basic(false);
 }
 
-TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_top_k)
+TYPED_TEST(detection_output_test, test_setup_basic_gpu)
 {
-    const bool share_location = false;
-    const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 2;
-    const int background_label_id = 0;
-    const int top_k = 2;
+    this->setup_basic(true);
+}
 
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx, { this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx, { this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx, { 1, 2, 1, this->num_priors * 4 } });
+TYPED_TEST(detection_output_test, test_setup_two_layers)
+{
+    this->setup_two_layers(false);
+}
 
-    this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
+TYPED_TEST(detection_output_test, test_setup_two_layers_gpu)
+{
+    this->setup_two_layers(true);
+}
 
-    topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
+TYPED_TEST(detection_output_test, test_forward_share_location)
+{
+    this->forward_share_location(false);
+}
 
-    topology.add(detection_output("detection_output", "input_location", "input_confidence", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
-    network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+TYPED_TEST(detection_output_test, test_forward_share_location_gpu)
+{
+    this->forward_share_location(true);
+}
 
-    auto outputs = network.execute();
+TYPED_TEST(detection_output_test, test_forward_num_detections_greater_than_keep_top_k)
+{
+    this->forward_num_detections_greater_than_keep_top_k(false);
+}
 
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
+TYPED_TEST(detection_output_test, test_forward_num_detections_greater_than_keep_top_k_gpu)
+{
+    this->forward_num_detections_greater_than_keep_top_k(true);
+}
 
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+TYPED_TEST(detection_output_test, test_forward_num_detections_smaller_than_keep_top_k)
+{
+    this->forward_num_detections_smaller_than_keep_top_k(false);
+}
 
-    auto output_prim = outputs.begin()->second.get_memory();
+TYPED_TEST(detection_output_test, test_forward_num_detections_smaller_than_keep_top_k_gpu)
+{
+    this->forward_num_detections_smaller_than_keep_top_k(true);
+}
 
-    this->check_results(output_prim, 0, "0 1 1.0 0.20 0.20 0.50 0.50");
-    this->check_results(output_prim, 1, "0 1 0.8 0.50 0.20 0.80 0.50");
-    this->check_results(output_prim, 2, "1 1 0.6 0.40 0.40 0.70 0.70");
-    this->check_results(output_prim, 3, "-1 0 0 0 0 0 0");
+TYPED_TEST(detection_output_test, test_forward_share_location_top_k)
+{
+    this->test_forward_share_location_top_k(false);
 }
 
-TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_input_padding)
+TYPED_TEST(detection_output_test, test_forward_share_location_top_k_gpu)
 {
-    const bool share_location = false;
-    const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 4;
-    const int background_label_id = -1;
-    const int top_k = 2;
+    this->test_forward_share_location_top_k(true);
+}
 
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 2, 1, this->num_priors * 4 } });
+TYPED_TEST(detection_output_test, test_forward_no_share_location)
+{
+    this->forward_no_share_location(false);
+}
 
-    this->init_buffers(input_prior_box, input_confidence, input_location, share_location);
-    topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
-    topology.add(reorder("input_location_padded", "input_location", input_location.get_layout().with_padding({ { 0, 0, 12, 3 },{ 0, 0, 5, 11 } })));
-    topology.add(reorder("input_confidence_padded", "input_confidence", input_location.get_layout().with_padding({ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } })));
+TYPED_TEST(detection_output_test, test_forward_no_share_location_gpu)
+{
+    this->forward_no_share_location(true);
+}
 
-    topology.add(detection_output("detection_output", "input_location_padded", "input_confidence_padded", "input_prior_box", this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k));
-    network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k)
+{
+    this->forward_no_share_location_top_k(false);
+}
 
-    auto outputs = network.execute();
+TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_gpu)
+{
+    this->forward_no_share_location_top_k(true);
+}
 
-    EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
+TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0)
+{
+    this->forward_no_share_location_neg_0(false);
+}
 
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[1], keep_top_k * this->num_of_images);
-    EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.spatial[0], 7);
+TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_gpu)
+{
+    this->forward_no_share_location_neg_0(true);
+}
 
-    auto output_prim = outputs.begin()->second.get_memory();
+TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_top_k)
+{
+    this->forward_no_share_location_neg_0_top_k(false);
+}
 
-    this->check_results(output_prim, 0, "0 0 0.6 0.55 0.55 0.85 0.85");
-    this->check_results(output_prim, 1, "0 0 0.4 0.15 0.55 0.45 0.85");
-    this->check_results(output_prim, 2, "0 1 1.0 0.20 0.20 0.50 0.50");
-    this->check_results(output_prim, 3, "0 1 0.8 0.50 0.20 0.80 0.50");
-    this->check_results(output_prim, 4, "1 0 1.0 0.25 0.25 0.55 0.55");
-    this->check_results(output_prim, 5, "1 1 0.6 0.40 0.40 0.70 0.70");
-    this->check_results(output_prim, 6, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 7, "-1 0 0 0 0 0 0");
+TYPED_TEST(detection_output_test, test_forward_no_share_location_neg_0_top_k_gpu)
+{
+    this->forward_no_share_location_neg_0_top_k(true);
+}
+
+TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_input_padding)
+{
+    this->forward_no_share_location_top_k_input_padding(false);
+}
+
+TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_input_padding_gpu)
+{
+    this->forward_no_share_location_top_k_input_padding(true);
 }
 
 TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_faster_rcnn_case)
 {
+    this->test_forward_no_share_location_top_k_faster_rcnn_case(false);
+}
+
+TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_faster_rcnn_case_gpu)
+{
+    this->test_forward_no_share_location_top_k_faster_rcnn_case(true);
+}
+
+TYPED_TEST(detection_output_test, test_detection_output_sort_gpu)
+{
     const bool share_location = false;
     const int num_loc_classes = share_location ? 1 : this->num_classes;
-    const int keep_top_k = 4;
+    const int keep_top_k = 10;
     const int background_label_id = -1;
-    const int top_k = 2;
-    const float eta = 1.0f;
-    const prior_box_code_type code_type = prior_box_code_type::corner;
-    const bool variance_encoded_in_target = true;
-    const float confidence_threshold = -std::numeric_limits<float>::max();
-    const int32_t prior_info_size = 5;
-    const int32_t prior_coordinates_offset = 1;
-    const bool prior_is_normalized = true;
-
-    cldnn::engine engine;
-    cldnn::memory input_location = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * num_loc_classes * 4, 1, 1 } });
-    cldnn::memory input_confidence = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ this->num_of_images, this->num_priors * this->num_classes, 1, 1 } });
-    cldnn::memory input_prior_box = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 1, 1, this->num_priors * prior_info_size } });
-
-    this->init_buffers(input_prior_box, input_confidence, input_location, share_location, variance_encoded_in_target,
-                       prior_info_size, prior_coordinates_offset, prior_is_normalized);
+    const int top_k = -1;
+
+    const unsigned out_row_size = 7;
+    const unsigned score_space = ((this->num_of_images + 15) / 16) * 16;
+    int input_size = this->num_of_images * num_loc_classes * this->num_priors * out_row_size + score_space;
+
+    const auto& engine = get_test_engine();
+    cldnn::memory input_buff = memory::allocate(engine, { type_to_data_type<TypeParam>::value, format::bfyx,{ 1, 1, 1, input_size } });
+
+    this->init_buffer_sort(input_buff);
 
     topology topology;
-    topology.add(input_layout("input_location", input_location.get_layout()));
-    topology.add(input_layout("input_confidence", input_confidence.get_layout()));
-    topology.add(input_layout("input_prior_box", input_prior_box.get_layout()));
-    topology.add(reorder("input_location_padded", "input_location", input_location.get_layout().with_padding({ { 0, 0, 12, 3 },{ 0, 0, 5, 11 } })));
-    topology.add(reorder("input_confidence_padded", "input_confidence", input_location.get_layout().with_padding({ { 0, 0, 2, 7 },{ 0, 0, 13, 1 } })));
-
-    topology.add(detection_output("detection_output", "input_location_padded", "input_confidence_padded", "input_prior_box",
-                                  this->num_classes, keep_top_k, share_location, background_label_id, this->nms_threshold, top_k,
-                                  eta, code_type, variance_encoded_in_target, confidence_threshold, prior_info_size, prior_coordinates_offset,
-                                  prior_is_normalized, this->img_size, this->img_size
-                    ));
+    topology.add(input_layout("input_location", input_buff.get_layout()));
+
+    topology.add(detection_output_sort("detection_output_sort", "input_location", this->num_of_images, this->num_classes, keep_top_k, share_location, top_k, background_label_id));
     network network(engine, topology);
-    network.set_input_data("input_location", input_location);
-    network.set_input_data("input_confidence", input_confidence);
-    network.set_input_data("input_prior_box", input_prior_box);
+    network.set_input_data("input_location", input_buff);
 
     auto outputs = network.execute();
 
     EXPECT_EQ(outputs.size(), size_t(1));
-    EXPECT_EQ(outputs.begin()->first, "detection_output");
+    EXPECT_EQ(outputs.begin()->first, "detection_output_sort");
 
     EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.batch[0], 1);
     EXPECT_EQ(outputs.begin()->second.get_memory().get_layout().size.feature[0], 1);
@@ -675,11 +993,23 @@ TYPED_TEST(detection_output_test, test_forward_no_share_location_top_k_faster_rc
 
     this->check_results(output_prim, 0, "0 0 0.6 0.55 0.55 0.85 0.85");
     this->check_results(output_prim, 1, "0 0 0.4 0.15 0.55 0.45 0.85");
-    this->check_results(output_prim, 2, "0 1 1.0 0.20 0.20 0.50 0.50");
-    this->check_results(output_prim, 3, "0 1 0.8 0.50 0.20 0.80 0.50");
-    this->check_results(output_prim, 4, "1 0 1.0 0.25 0.25 0.55 0.55");
-    this->check_results(output_prim, 5, "1 1 0.6 0.40 0.40 0.70 0.70");
-    this->check_results(output_prim, 6, "-1 0 0 0 0 0 0");
-    this->check_results(output_prim, 7, "-1 0 0 0 0 0 0");
+    this->check_results(output_prim, 2, "0 0 0.2 0.55 0.15 0.85 0.45");
+    this->check_results(output_prim, 3, "0 0 0.0 0.15 0.15 0.45 0.45");
+    this->check_results(output_prim, 4, "0 1 1.0 0.20 0.20 0.50 0.50");
+    this->check_results(output_prim, 5, "0 1 0.8 0.50 0.20 0.80 0.50");
+    this->check_results(output_prim, 6, "0 1 0.6 0.20 0.50 0.50 0.80");
+    this->check_results(output_prim, 7, "0 1 0.4 0.50 0.50 0.80 0.80");
+    this->check_results(output_prim, 8, "1 0 1.0 0.25 0.25 0.55 0.55");
+    this->check_results(output_prim, 9, "1 0 0.4 0.45 0.45 0.75 0.75");
+    this->check_results(output_prim, 10, "1 1 0.6 0.40 0.40 0.70 0.70");
+    this->check_results(output_prim, 11, "-1 0 0 0 0 0 0");
+    this->check_results(output_prim, 12, "-1 0 0 0 0 0 0");
+    this->check_results(output_prim, 13, "-1 0 0 0 0 0 0");
+    this->check_results(output_prim, 14, "-1 0 0 0 0 0 0");
+    this->check_results(output_prim, 15, "-1 0 0 0 0 0 0");
+    this->check_results(output_prim, 16, "-1 0 0 0 0 0 0");
+    this->check_results(output_prim, 17, "-1 0 0 0 0 0 0");
+    this->check_results(output_prim, 18, "-1 0 0 0 0 0 0");
+    this->check_results(output_prim, 19, "-1 0 0 0 0 0 0");
 }
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
index 750aaa567..417ab0720 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/eltwise_gpu_test.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@
 #include <api/CPP/network.hpp>
 #include <api/CPP/engine.hpp>
 #include <api/CPP/reorder.hpp>
+#include <api/CPP/data.hpp>
 #include "test_utils/test_utils.h"
 
 namespace cldnn
@@ -97,7 +98,7 @@ void generic_eltwise_test(cldnn::format test_input_fmt, int input_b, int input_f
     VF<T> input1_rnd_vec = flatten_4d<T>(test_input_fmt, input1_rnd);
     VF<T> input2_rnd_vec = flatten_4d<T>(test_input_fmt, input2_rnd);
 
-    engine engine;
+    const auto& engine = get_test_engine();
     tensor input_tensor( input_b, input_f, input_x, input_y );
     auto input1 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
     auto input2 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
@@ -157,31 +158,835 @@ void generic_eltwise_test(cldnn::format test_input_fmt, int input_b, int input_f
         << "type = " << (sizeof(T) == 2 ? "float16" : "float32") << std::endl;
 }
 
+TEST(eltwise_gpu_f32, equal_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  0, 1, 0, 1,
+    //  0, 0, 1, 0,
+    //  0, 0, 0, 0,
+    //  0, 1, 0, 0
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::eq));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 0, 1, 0, 1,
+                                    0, 0, 1, 0,
+                                    0, 0, 0, 0,
+                                    0, 1, 0, 0 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, not_equal_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  1, 0, 1, 0,
+    //  1, 1, 0, 1,
+    //  1, 1, 1, 1,
+    //  1, 0, 1, 1
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::ne));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 1, 0, 1, 0,
+                                    1, 1, 0, 1,
+                                    1, 1, 1, 1,
+                                    1, 0, 1, 1 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, less_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  0, 0, 0, 0,
+    //  1, 1, 0, 0,
+    //  1, 1, 1, 0,
+    //  0, 0, 0, 0
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::lt));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 0, 0, 0, 0,
+                                    1, 1, 0, 0,
+                                    1, 1, 1, 0,
+                                    0, 0, 0, 0 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, less_equal_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  0, 1, 0, 1,
+    //  1, 1, 1, 0,
+    //  1, 1, 1, 0,
+    //  0, 1, 0, 0
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::le));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 0, 1, 0, 1,
+                                    1, 1, 1, 0,
+                                    1, 1, 1, 0,
+                                    0, 1, 0, 0 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, greater_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  1, 0, 1, 0,
+    //  0, 0, 0, 1,
+    //  0, 0, 0, 1,
+    //  1, 0, 1, 1
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::gt));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 1, 0, 1, 0,
+                                    0, 0, 0, 1,
+                                    0, 0, 0, 1,
+                                    1, 0, 1, 1 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, greater_equal_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  1, 1, 1, 1,
+    //  0, 0, 1, 1,
+    //  0, 0, 0, 1,
+    //  1, 1, 1, 1
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::ge));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 1, 1, 1, 1,
+                                    0, 0, 1, 1,
+                                    0, 0, 0, 1,
+                                    1, 1, 1, 1 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, logicalAND_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  1, 1, 1, 1,
+    //  1, 0, 1, 1,
+    //  1, 1, 1, 1,
+    //  1, 0, 1, 1
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_and));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 1, 1, 1, 1,
+                                    1, 0, 1, 1,
+                                    1, 1, 1, 1,
+                                    1, 0, 1, 1 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, logicalAND_in3_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input3 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Input3
+    //  0.f, 0.f, 0.f, 0.f,
+    //  0.f, 0.f, 0.f, 0.f,
+    //  1.f, 1.f, 1.f, 1.f,
+    //  1.f, 1.f, 1.f, 1.f
+    //
+    //  Output:
+    //  0, 0, 0, 0,
+    //  0, 0, 0, 0,
+    //  1, 1, 1, 1,
+    //  1, 0, 1, 1
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    set_values(input3, {
+            0.f, 0.f, 0.f, 0.f,
+            0.f, 0.f, 0.f, 0.f,
+            1.f, 1.f, 1.f, 1.f,
+            1.f, 1.f, 1.f, 1.f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("input3", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::logic_and));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 0, 0, 0, 0,
+                                    0, 0, 0, 0,
+                                    1, 1, 1, 1,
+                                    1, 0, 1, 1 };
+ 
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, logicalOR_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  1, 1, 1, 1,
+    //  1, 1, 1, 1,
+    //  1, 1, 1, 1,
+    //  1, 0, 1, 1
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_or));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 1, 1, 1, 1,
+                                    1, 1, 1, 1,
+                                    1, 1, 1, 1,
+                                    1, 0, 1, 1 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, logicalOR_in3_float_out1_int) {
+    //  Input3 : 2x2x2x2
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Input3
+    //  0.f, 1.f, 1.f, 1.f,
+    //  0.f, 1.f, 1.f, 0.f,
+    //  1.f, 1.f, 1.f, 1.f,
+    //  1.f, 1.f, 1.f, 1.f
+    //
+    //  Output:
+    //  1, 1, 1, 1,
+    //  1, 1, 1, 1,
+    //  1, 1, 1, 1,
+    //  1, 1, 1, 1
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    set_values(input3, {
+            0.f, 1.f, 1.f, 1.f,
+            0.f, 1.f, 1.f, 0.f,
+            1.f, 1.f, 1.f, 1.f,
+            1.f, 1.f, 1.f, 1.f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("input3", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::logic_or));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 1, 1, 1, 1,
+                                    1, 1, 1, 1,
+                                    1, 1, 1, 1,
+                                    1, 1, 1, 1 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
+TEST(eltwise_gpu_f32, logicalXOR_in2_float_out1_int) {
+    //  Input2 : 2x2x2x2
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  1.f,   2.5f,    5.f,    1.5f,
+    //  2.f,   0.f,     6.f,    5.2f,
+    //  3.f,   0.5f,    7.f,    12.f,
+    //  4.f,   0.f,     8.f,    8.f
+    //
+    //  Input2
+    //  0.5f,  2.5f,    0.5f,   1.5f,
+    //  5.f,   7.f,     6.f,    4.f,
+    //  15.f,  17.f,    8.f,    10.f,
+    //  -2.f,  0.f,     -0.5f,  -2.5f
+    //
+    //  Output:
+    //  0, 0, 0, 0,
+    //  0, 1, 0, 0,
+    //  0, 0, 0, 0,
+    //  0, 0, 0, 0
+
+    const auto& engine = get_test_engine();
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    set_values(input1, {
+            1.f,   2.5f, 5.f, 1.5f,
+            2.f,   0.f, 6.f, 5.2f,
+            3.f,  0.5f, 7.f, 12.f,
+            4.f, 0.f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+            0.5f,   2.5f,  0.5f,  1.5f,
+            5.f,   7.f,    6.f,   4.f,
+            15.f,  17.f,    8.f,  10.f,
+            -2.f,  0.f,  -0.5f, -2.5f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::logic_xor));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input1);
+    network.set_input_data("input2", input2);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+    auto output_ptr = output.pointer<int8_t>();
+
+    std::vector<int8_t> answers = { 0, 0, 0, 0,
+                                    0, 1, 0, 0,
+                                    0, 0, 0, 0,
+                                    0, 0, 0, 0 };
+
+    for (size_t i = 0; i < answers.size(); ++i) {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
+}
+
 TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) {
     //  Input2   : 2x2x2
     //  Input  : 2x2x2x2
     //  Output : 2x2x2x2
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0       
-    //  f0: b0:  3    4  b1:   0.5 -0.5     
-    //  f1: b0:  5    6  b1:   1.5  5.2     
-    //  f1: b0:  7    8  b1:   12   8       
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7 
+    //  f0: b0: 0.5  5   b1: 2.5  7
     //  f0: b0: 15  -2   b1: 17   6.5
     //  f1: b0: 0.5  2   b1: 2.5  4
     //  f1: b0: 8   -0.5 b1: 10   -2.5
     //
     //  Output:
-    //  f0: b0:   1.5  7    b1:  2.5   7      
-    //  f0: b0:   18   2    b1:  17.5  6     
-    //  f1: b0:   5.5  8    b1:   4    9.2     
-    //  f1: b0:   15  16.5  b1:  22    16.5     
+    //  f0: b0:   1.5  7    b1:  2.5   7
+    //  f0: b0:   18   2    b1:  17.5  6
+    //  f1: b0:   5.5  8    b1:   4    9.2
+    //  f1: b0:   15  16.5  b1:  22    16.5
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
@@ -228,30 +1033,566 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2) {
     }
 }
 
+TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_channel) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+        1.f,   0.f,
+        2.f,   0.f,
+
+        3.f,  0.5f,
+        4.f, -0.5f,
+    });
+
+    set_values(input2, {
+         0.5f,  2.5f,
+         0.5f,  2.5f,
+
+          5.f,   7.f,
+          2.f,   4.f,
+
+         15.f,  17.f,
+          8.f,  10.f,
+
+         -2.f,  6.5f,
+        -0.5f, -2.5f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 2.5f,
+                          2.5f, 2.5f,
+
+                          6.f,   7.f,
+                          4.f,   4.f,
+
+                          18.f, 17.5f,
+                          12.f,  9.5f,
+
+                          1.f,   7.f,
+                          3.5f, -3.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_x) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+
+    set_values(input2, {
+        1.f,
+        0.f,
+
+        2.f,
+        0.f,
+
+        3.f,
+        0.5f,
+
+        4.f,
+        -0.5f,
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 3.5f,
+                          0.5f, 2.5f,
+
+                          7.f,   9.f,
+                          2.f,   4.f,
+
+                          18.f, 20.f,
+                          8.5f,  10.5f,
+
+                          2.f,   10.5f,
+                          -1.f, -3.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_y) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+
+    set_values(input2, {
+        1.f, 0.f,
+        2.f, 0.f,
+
+        3.f, 0.5f,
+        4.f, -0.5f,
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 2.5f,
+                          2.5f, 2.5f,
+
+                          8.f,   7.5f,
+                          6.f,   3.5f,
+
+                          16.f,  17.f,
+                          10.f,  10.f,
+
+                          1.f,   7.f,
+                          3.5f, -3.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_batch) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+
+    set_values(input2, {
+        1.f, 0.f,
+
+        2.f, 0.f,
+
+        3.f, 0.5f,
+
+        4.f, -0.5f,
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 2.5f,
+                          1.5f, 2.5f,
+
+                          7.f,   7.f,
+                          4.f,   4.f,
+
+                          18.f,  17.5f,
+                          11.f,  10.5f,
+
+                          2.f,   6.f,
+                          3.5f, -3.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_in2x2x2x2_broadcast_multiple_dims) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+    set_values(input2, {
+            1.f,
+            2.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.5f, 3.5f,
+                          1.5f, 3.5f,
+
+                          7.f,   9.f,
+                          4.f,   6.f,
+
+                          16.f, 18.f,
+                          9.f,  11.f,
+
+                          0.f,   8.5f,
+                          1.5f, -0.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, pow_in2x2x2x2_broadcast_all) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::pow));
+
+    set_values(input, {
+            1.f,  2.f,
+            3.f,  4.f,
+
+            5.f,  6.f,
+            7.f,  8.f,
+
+            9.f,  10.f,
+           11.f,  12.f,
+
+            13.f, 14.f,
+            15.f, 16.f });
+
+
+    set_values(input2, { 2.0f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 1.f, 4.f,
+                          9.f, 16.f,
+
+                          25.f, 36.f,
+                          49.f, 64.f,
+
+                          81.f,  100.f,
+                          121.f, 144.f,
+
+                          169.f, 196.f,
+                          225.f, 256.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_same_dim) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("input3", input3.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+
+    set_values(input2, {
+        1.f, 0.f,
+
+        2.f, 0.f,
+
+        3.f, 0.5f,
+
+        4.f, -0.5f,
+    });
+
+    set_values(input3, {
+        3.f, 2.f,
+
+        1.f, 2.f,
+
+        -2.f, 1.5f,
+
+        -4.f, 0.5f,
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 4.5f, 4.5f,
+                          4.5f, 4.5f,
+
+                          8.f,   9.f,
+                          5.f,   6.f,
+
+                          16.f,  19.f,
+                          9.f,  12.f,
+
+                          -2.f,  6.5f,
+                          -0.5f, -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, add_basic_in2x2x2x2_broadcast_2_inputs_diff_dim) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("input3", input3.get_layout()));
+    topology.add(eltwise("eltwise", {"input", "input2", "input3"}, eltwise_mode::sum));
+
+    set_values(input, {
+            0.5f,  2.5f,
+            0.5f,  2.5f,
+
+            5.f,   7.f,
+            2.f,   4.f,
+
+            15.f,  17.f,
+            8.f,  10.f,
+
+            -2.f,  6.5f,
+            -0.5f, -2.5f });
+
+
+    set_values(input2, {
+        1.f, 0.f,
+
+        2.f, 0.f,
+
+        3.f, 0.5f,
+
+        4.f, -0.5f,
+    });
+
+    set_values(input3, {
+        3.f, 2.f,
+        1.f, 2.f,
+
+        -2.f, 1.5f,
+        -4.f, 0.5f,
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = { 4.5f, 4.5f,
+                          2.5f, 4.5f,
+
+                          10.f,  9.f,
+                          5.f,   6.f,
+
+                          16.f,  19.f,
+                          7.f,  11.f,
+
+                           0.f,   7.5f,
+                          -0.5f, -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
 TEST(eltwise_gpu_f32, max_basic_in4x4x4x4) {
     //  Input2   : 2x2x2
     //  Input  : 2x2x2x2
     //  Output : 2x2x2x2
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0       
-    //  f0: b0:  3    4  b1:   0.5 -0.5     
-    //  f1: b0:  5    6  b1:   1.5  5.2     
-    //  f1: b0:  7    8  b1:   12   8       
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7 
+    //  f0: b0: 0.5  5   b1: 2.5  7
     //  f0: b0: 15   6   b1: 17   8
     //  f1: b0: 0.5  2   b1: 2.5  4
     //  f1: b0: 8   -0.5 b1: 10   -2.5
     //
     //  Output:
-    //  f0: b0:    1   5    b1:  2.5   7       
-    //  f0: b0:   15   6    b1:  17    8    
-    //  f1: b0:    5   6    b1:  2.5   5.2     
-    //  f1: b0:    8   8    b1:  12    8     
+    //  f0: b0:    1   5    b1:  2.5   7
+    //  f0: b0:   15   6    b1:  17    8
+    //  f1: b0:    5   6    b1:  2.5   5.2
+    //  f1: b0:    8   8    b1:  12    8
     //
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
@@ -269,7 +1610,7 @@ TEST(eltwise_gpu_f32, max_basic_in4x4x4x4) {
     });
 
     set_values(input2, {
-        0.5f,  2.5f,  0.5f,  2.5f, 
+        0.5f,  2.5f,  0.5f,  2.5f,
          5.f,   7.f,   2.f,   4.f,
         15.f,  17.f,   8.f,  10.f,
          6.f,   8.f, -0.5f, -2.5f });
@@ -305,25 +1646,25 @@ TEST(eltwise_gpu_f32, sub_basic_in4x4x4x4) {
     //  Output : 2x2x2x2
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0       
-    //  f0: b0:  3    4  b1:   0.5 -0.5     
-    //  f1: b0:  5    6  b1:   1.5  5.2     
-    //  f1: b0:  7    8  b1:   12   8       
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7 
+    //  f0: b0: 0.5  5   b1: 2.5  7
     //  f0: b0: 15   6   b1: 17   8
     //  f1: b0: 0.5  2   b1: -1   2
     //  f1: b0: 8   -0.5 b1: 8.5  10.5
     //
     //  Output:
-    //  f0: b0:   0.5  -3    b1:  -2.5  -7       
-    //  f0: b0:   -12  -2    b1:  -16.5 -8.5    
-    //  f1: b0:   4.5   4    b1:  2.5    3.2     
-    //  f1: b0:   -1    8.5  b1:  3.5   -2.5     
+    //  f0: b0:   0.5  -3    b1:  -2.5  -7
+    //  f0: b0:   -12  -2    b1:  -16.5 -8.5
+    //  f1: b0:   4.5   4    b1:  2.5    3.2
+    //  f1: b0:   -1    8.5  b1:  3.5   -2.5
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
 
@@ -374,13 +1715,13 @@ TEST(eltwise_gpu_int, basic_in4x4x4x4) {
     //  Same params as in eltwise_gpu_f32, sub_basic_in4x4x4x4 but using int types instead
 
     std::vector<data_types> data_types_to_test = { data_types::i8, data_types::i32, data_types::i64 };
-    std::vector<eltwise_mode> eltwise_ops_to_test = { eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::div, eltwise_mode::prod };
+    std::vector<eltwise_mode> eltwise_ops_to_test = { eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::div, eltwise_mode::prod, eltwise_mode::min, eltwise_mode::max, eltwise_mode::mod };
 
     for (auto& data_type : data_types_to_test)
     {
         for (auto& mode : eltwise_ops_to_test)
         {
-            engine engine;
+            const auto& engine = get_test_engine();
             auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
             auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
 
@@ -434,6 +1775,84 @@ TEST(eltwise_gpu_int, basic_in4x4x4x4) {
                     expected = std::min(input_1_vec[i], input_2_vec[i]);
                 else if (mode == eltwise_mode::max)
                     expected = std::max(input_1_vec[i], input_2_vec[i]);
+                else if (mode == eltwise_mode::mod) {
+                    expected = std::fmod(input_1_vec[i], input_2_vec[i]);
+                }
+
+
+                EXPECT_TRUE(are_equal(std::floor(expected), output_ptr[i]));
+            }
+        }
+    }
+}
+
+TEST(eltwise_gpu_f32_int, basic_in4x4x4x4) {
+    // Same params as in eltwise_gpu_f32, sub_basic_in4x4x4x4 but using int types for first input.
+    //
+    // Eltwise supports mixed inputs, but only first input can be set as intX.
+
+    std::vector<data_types> data_types_to_test = { data_types::i8, data_types::i32, data_types::i64 };
+    std::vector<eltwise_mode> eltwise_ops_to_test = { eltwise_mode::sum, eltwise_mode::sub, eltwise_mode::div, eltwise_mode::prod, eltwise_mode::min, eltwise_mode::max, eltwise_mode::mod };
+
+    for (auto& data_type : data_types_to_test)
+    {
+        for (auto& mode : eltwise_ops_to_test)
+        {
+            const auto& engine = get_test_engine();
+            auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+            auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+
+            topology topology;
+            topology.add(input_layout("input", input.get_layout()));
+            topology.add(input_layout("input2", input2.get_layout()));
+            topology.add(reorder("input_reorder", "input", { data_type, format::yxfb,{ 2, 2, 2, 2 } }));
+            topology.add(eltwise("eltwise", { "input_reorder", "input2" }, mode));
+            topology.add(reorder("eltwise_reorder", "eltwise", { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } }));
+
+            std::vector<float> input_1_vec = {
+                1.f,   0.f,  5.f,  1.f,
+                2.f,   0.f,  6.f,  5.f,
+                3.f,   0.f, 7.f,  12.f,
+                4.f,   0.f, 8.f,   8.f
+            };
+            set_values(input, input_1_vec);
+
+            std::vector<float> input_2_vec = {
+                0.f,  2.f,  0.f, -1.f,
+                5.f,   7.f,   2.f,   2.f,
+                15.f,  17.f,   8.f,   8.f,
+                6.f,   8.f, 0.f,  10.f };
+            set_values(input2, input_2_vec);
+
+            network network(engine, topology);
+            network.set_input_data("input", input);
+            network.set_input_data("input2", input2);
+            auto outputs = network.execute();
+
+            ASSERT_EQ(outputs.size(), size_t(1));
+            EXPECT_EQ(outputs.begin()->first, "eltwise_reorder");
+
+            auto output = outputs.at("eltwise_reorder").get_memory();
+
+            auto output_ptr = output.pointer<float>();
+
+            for (int i = 0; i < 16; i++)
+            {
+                float expected = 0.f;
+                if (mode == eltwise_mode::sum)
+                    expected = input_1_vec[i] + input_2_vec[i];
+                else if (mode == eltwise_mode::sub)
+                    expected = input_1_vec[i] - input_2_vec[i];
+                else if (mode == eltwise_mode::prod)
+                    expected = input_1_vec[i] * input_2_vec[i];
+                else if (mode == eltwise_mode::div)
+                    expected = input_1_vec[i] / input_2_vec[i];
+                else if (mode == eltwise_mode::min)
+                    expected = std::min(input_1_vec[i], input_2_vec[i]);
+                else if (mode == eltwise_mode::max)
+                    expected = std::max(input_1_vec[i], input_2_vec[i]);
+                else if (mode == eltwise_mode::mod)
+                    expected = std::fmod(input_1_vec[i], input_2_vec[i]);
 
                 EXPECT_TRUE(are_equal(std::floor(expected), output_ptr[i]));
             }
@@ -447,33 +1866,33 @@ TEST(eltwise_gpu_f32, prod_basic_in4x4x4x4) {
     //  Output : 2x2x2x2
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0       
-    //  f0: b0:  3    4  b1:   0.5 -0.5     
-    //  f1: b0:  5    6  b1:   1    5.2     
-    //  f1: b0:  7    8  b1:   12   7.5       
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1    5.2
+    //  f1: b0:  7    8  b1:   12   7.5
     //
     //  Input2
-    //  f0: b0: 0.5  0.5   b1: 5  2 
+    //  f0: b0: 0.5  0.5   b1: 5  2
     //  f0: b0: 2.5  2.5   b1: 7  4
     //  f1: b0: 15   8     b1: 6  -0.5
     //  f1: b0: 17   10    b1: 8  -2.5
     //
     //  Output:
-    //  f0: b0:   0.5  1     b1:  0      0       
-    //  f0: b0:   7.5  10    b1:  3.5   -2     
-    //  f1: b0:   75   48    b1:  6     -2.6     
-    //  f1: b0:   119  80    b1:  96   -18.75     
+    //  f0: b0:   0.5  1     b1:  0      0
+    //  f0: b0:   7.5  10    b1:  3.5   -2
+    //  f1: b0:   75   48    b1:  6     -2.6
+    //  f1: b0:   119  80    b1:  96   -18.75
     //
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
     topology.add(input_layout("input2", input2.get_layout()));
     topology.add(eltwise("eltwise", {"input", "input2"}, eltwise_mode::prod));
-    
+
     set_values(input, {
         1.f,   0.f,  5.f,  1.f,
         2.f,   0.f,  6.f,  5.2f,
@@ -503,7 +1922,7 @@ TEST(eltwise_gpu_f32, prod_basic_in4x4x4x4) {
         1.0f,   0.0f,    48.f,   -2.6f,
         7.5f,   3.5f,   119.f,   96.0f,
        10.0f,  -2.0f,    80.f, -18.75f };
-    
+
     auto output_ptr = output.pointer<float>();
 
     for (int i = 0; i < 16; i++)
@@ -519,24 +1938,24 @@ TEST(eltwise_gpu_f32, max_basic_in4x4x4x4_input_padding) {
     //  Input Padding: 2x1 (with reorder)
 
     //  Input:
-    //  f0: b0:  1    2  b1:   0    0       
-    //  f0: b0:  3    4  b1:   0.5 -0.5     
-    //  f1: b0:  5    6  b1:   1.5  5.2     
-    //  f1: b0:  7    8  b1:   12   8       
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
     //
     //  Input2
-    //  f0: b0: 0.5  5   b1: 2.5  7 
+    //  f0: b0: 0.5  5   b1: 2.5  7
     //  f0: b0: 15   6   b1: 17   8
     //  f1: b0: 0.5  2   b1: 2.5  4
     //  f1: b0: 8   -0.5 b1: 10   -2.5
     //
     //  Output:
-    //  f0: b0:    1   5    b1:  2.5   7       
-    //  f0: b0:   15   6    b1:  17    8    
-    //  f1: b0:    5   6    b1:  2.5   5.2     
-    //  f1: b0:    8   8    b1:  12    8     
+    //  f0: b0:    1   5    b1:  2.5   7
+    //  f0: b0:   15   6    b1:  17    8
+    //  f1: b0:    5   6    b1:  2.5   5.2
+    //  f1: b0:    8   8    b1:  12    8
     //
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
@@ -610,7 +2029,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients) {
     //  f1: b0:   7.5  8.25  b1:  11    8.25
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
@@ -658,7 +2077,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients) {
 }
 
 TEST(eltwise_gpu_f32, coefficients_count_check) {
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
@@ -718,7 +2137,7 @@ TEST(eltwise_gpu_f32, add_basic_in4x4x2x2_with_coefficients_3inputs) {
     //  f1: b0:   8.5   8.75   b1:  11    8.75
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
@@ -805,7 +2224,7 @@ TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) {
     //  f1: b0:    5   6    b1:  2.5   7
     //  f1: b0:    9   8    b1:  12    8
     //
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
@@ -866,12 +2285,332 @@ TEST(eltwise_gpu_f32, max_3inputs_in4x4x4x4_input_padding) {
 }
 
 
+TEST(eltwise_gpu_f32, stride_test_2x2) {
+    //  Input  : 2x2x2x2
+    //  Input2 : 2x2x4x4
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 1  2  3  4   b1: 17 18 19 20
+    //  f0: b0: 5  6  7  8   b1: 21 22 23 24
+    //  f0: b0: 9  10 11 12  b1: 25 26 27 28
+    //  f0: b0: 13 14 15 16  b1: 29 30 31 32
+
+    //  f1: b0: 33 34 35 36  b1: 49 50 51 52
+    //  f1: b0: 37 38 39 40  b1: 53 54 55 56
+    //  f1: b0: 41 42 43 44  b1: 57 58 59 60
+    //  f1: b0: 45 46 47 48  b1: 61 62 63 64
+
+    //
+    //  Output:
+    //  f0: b0: 1  3    b1:  17  19
+    //  f0: b0: 9  11   b1:  25  27
+    //  f1: b0: 33 35   b1:  49  51
+    //  f1: b0: 41 43   b1:  57  59
+    //
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 4, 4 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", "input", "input2", { {0,0,1,1}, {0,0,2,2} }, eltwise_mode::max));
+
+    set_values(input, {
+        1.f,   0.f,  5.f,  1.5f,
+        2.f,   0.f,  6.f,  5.2f,
+        3.f,   0.5f, 7.f, 12.f,
+        4.f,  -0.5f, 8.f,  8.f
+        });
+
+    set_values<float>(input2, {
+        1,  17, 33, 49,
+        2,  18, 33, 50,
+        3,  19, 35, 51,
+        4,  20, 36, 52,
+        5,  21, 37, 53,
+        6,  22, 38, 54,
+        7,  23, 39, 55,
+        8,  24, 40, 56,
+        9,  25, 41, 57,
+        10, 26, 42, 58,
+        11, 27, 43, 59,
+        12, 28, 44, 60,
+        13, 29, 45, 61,
+        14, 30, 46, 62,
+        15, 31, 47, 63,
+        16, 32, 48, 64 });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = {
+        1,  17,  33,  49,
+        3,  19,  35,  51,
+        9,  25,  41,  57,
+        11, 27,  43,  59 };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(eltwise_gpu_f32, broadcast_test_in4x4x2x2) {
+    //  Input2   : 2x1x1
+    //  Input  : 2x2x2x2
+    //  Output : 2x2x2x2
+
+    //  Input:
+    //  f0: b0:  1    2  b1:   0    0
+    //  f0: b0:  3    4  b1:   0.5 -0.5
+    //  f1: b0:  5    6  b1:   1.5  5.2
+    //  f1: b0:  7    8  b1:   12   8
+    //
+    //  Input2
+    //  f0: b0: 0.5  b1: 2.5
+    //  f1: b0: 0.5  b1: 2.5
+    //
+    //  Output:
+    //  f0: b0:   1.5  7    b1:  2.5   7
+    //  f0: b0:   18   2    b1:  17.5  6
+    //  f1: b0:   5.5  8    b1:   4    9.2
+    //  f1: b0:   15  16.5  b1:  22    16.5
+    //
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(eltwise("eltwise", { "input", "input2" }, eltwise_mode::sum));
+
+    set_values(input, {
+        1.f,   0.f, 5.f, 1.5f,
+        2.f,   0.f, 6.f, 5.2f,
+        3.f,  0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,   2.5f,  0.5f,  2.5f
+    });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output = outputs.at("eltwise").get_memory();
+
+    float answers[16] = {
+        1.5f,   2.5f,   5.5f,   4.f,
+        2.5f,   2.5f,   6.5f,   7.7f,
+        3.5f,    3.f,   7.5f,  14.5f,
+        4.5f,    2.f,   8.5f,  10.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+
+template <typename T>
+int8_t eltwise_bool_execute(cldnn::eltwise_mode mode, T x, T y) {
+    switch (mode) {
+    case eltwise_mode::eq:
+        return x == y;
+    case eltwise_mode::ne:
+        return x != y;
+    case eltwise_mode::lt:
+        return x < y;
+    case eltwise_mode::le:
+        return x <= y;
+    case eltwise_mode::gt:
+        return x > y;
+    case eltwise_mode::ge:
+        return x >= y;
+    case eltwise_mode::logic_and:
+        return x && y;
+    case eltwise_mode::logic_or:
+        return x || y;
+    default:
+        return (int8_t)0;
+    }
+}
+
+template <typename T>
+VVVVF<int8_t> eltwise_bool_reference(VVVVF<T> &input1, VVVVF<T> &input2,
+    cldnn::eltwise_mode mode, int input_padding_y = 0,
+    int input_padding_x = 0, int output_padding_y = 0,
+    int output_padding_x = 0) {
+
+    size_t padding_y = input_padding_y + output_padding_y;
+    size_t padding_x = input_padding_x + output_padding_x;
+    size_t output_b = input1.size();
+    size_t output_f = input1[0].size();
+    size_t output_y = input1[0][0].size() + 2 * padding_y;
+    size_t output_x = input1[0][0][0].size() + 2 * padding_x;
+    VVVVF<int8_t> output(output_b, VVVF<int8_t>(output_f, VVF<int8_t>(output_y, VF<int8_t>(output_x))));
+
+    T res;
+    for (size_t b = 0; b < output_b; ++b) {
+        for (size_t f = 0; f < output_f; ++f) {
+            for (size_t y = 0; y < input1[0][0].size(); ++y) {
+                for (size_t x = 0; x < input1[0][0][0].size(); ++x) {
+                    res = eltwise_bool_execute<T>(mode, input1[b][f][y][x], input2[b][f][y][x]);
+                    output[b][f][y + padding_y][x + padding_x] = res;
+                }
+            }
+        }
+    }
+    return output;
+}
+
+template <typename T>
+void generic_eltwise_bool_test(cldnn::format test_input_fmt, int input_b, int input_f, int input_y, int input_x, cldnn::eltwise_mode mode,
+    int input_padding_y, int input_padding_x, int output_padding_y, int output_padding_x) {
+
+    int min_random = -2, max_random = 2;
+    VVVVF<T> input1_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
+    VVVVF<T> input2_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
+    VF<T> input1_rnd_vec = flatten_4d<T>(test_input_fmt, input1_rnd);
+    VF<T> input2_rnd_vec = flatten_4d<T>(test_input_fmt, input2_rnd);
+
+    const auto& engine = get_test_engine();
+    tensor input_tensor( input_b, input_f, input_x, input_y );
+    auto input1 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
+    auto input2 = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
+    set_values(input1, input1_rnd_vec);
+    set_values(input2, input2_rnd_vec);
+
+    topology topology;
+    topology.add(input_layout("input1", input1.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(reorder("reorder1", "input1", input1.get_layout().with_padding({{ 0, 0, input_padding_x, input_padding_y }, 0 })));
+    topology.add(eltwise("eltwise", {"reorder1", "input2"}, mode, false, 0.f, { { 0, 0, output_padding_x, output_padding_y }, 0 }));
+
+    network network(engine, topology);
+    network.set_input_data("input1", input1);
+    network.set_input_data("input2", input2);
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "eltwise");
+
+    auto output_memory = outputs.at("eltwise").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<int8_t>();
+
+    VVVVF<int8_t> output_cpu = eltwise_bool_reference<T>(input1_rnd, input2_rnd, mode, input_padding_y, input_padding_x, output_padding_y, output_padding_x);
+    EXPECT_EQ(output_layout.format.value, test_input_fmt.value);
+    tensor output_tensor = output_layout.get_buffer_size();
+    int y_size = output_tensor.spatial[1];
+    int x_size = output_tensor.spatial[0];
+    int f_size = output_tensor.feature[0];
+    int b_size = output_tensor.batch[0];
+    EXPECT_EQ(y_size, (int)output_cpu[0][0].size());
+    EXPECT_EQ(x_size, (int)output_cpu[0][0][0].size());
+    EXPECT_EQ(f_size, (int)output_cpu[0].size());
+    EXPECT_EQ(b_size, (int)output_cpu.size());
+
+    bool test_is_correct = true;
+    VF<int8_t> output_cpu_vec = flatten_4d<int8_t>(test_input_fmt, output_cpu);
+    for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
+        if (output_cpu_vec[i] != output_ptr[i]) {
+            test_is_correct = false;
+            break;
+        }
+    }
+    EXPECT_EQ(test_is_correct, true) << std::endl
+        << "failing test parameters:" << std::endl
+        << "input_b = " << input_b << std::endl
+        << "input_f = " << input_f << std::endl
+        << "input_y = " << input_y << std::endl
+        << "input_x = " << input_x << std::endl
+        << "eltwise_mode = " << (int)mode << std::endl
+        << "input_padding_y = " << input_padding_y << std::endl
+        << "input_padding_x = " << input_padding_x << std::endl
+        << "output_padding_y = " << output_padding_y << std::endl
+        << "output_padding_x = " << output_padding_x << std::endl
+        << "type = " << (sizeof(T) == 1 ? "int8" : "int32") << std::endl;
+}
+
+void run_eltwise_bool_generic_test(cldnn::eltwise_mode mode)
+{
+    cldnn::format test_inputs_fmt = cldnn::format::bfyx;
+    std::pair<int, int> input_size = { 227, 227 };
+
+    generic_eltwise_bool_test<int32_t>(test_inputs_fmt, 1, 1, input_size.first, input_size.second, mode, 0, 0, 0, 0);
+    generic_eltwise_bool_test<int8_t>(test_inputs_fmt, 1, 1, input_size.first, input_size.second, mode, 0, 0, 0, 0);
+}
+
+TEST(eltwise_gpu_bool, eltwise_eq) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::eq);
+}
+
+TEST(eltwise_gpu_bool, eltwise_ne) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::ne);
+}
+
+TEST(eltwise_gpu_bool, eltwise_lt) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::lt);
+}
+
+TEST(eltwise_gpu_bool, eltwise_le) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::le);
+}
+
+TEST(eltwise_gpu_bool, eltwise_gt) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::gt);
+}
+
+TEST(eltwise_gpu_bool, eltwise_ge) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::ge);
+}
+
+TEST(eltwise_gpu_bool, eltwise_and) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::logic_and);
+}
+
+TEST(eltwise_gpu_bool, eltwise_or) {
+    run_eltwise_bool_generic_test(cldnn::eltwise_mode::logic_or);
+}
+
+
 void run_eltwise_generic_test(cldnn::eltwise_mode mode)
 {
     cldnn::format test_inputs_fmt = cldnn::format::bfyx;
     std::pair<int, int> input_size = { 227, 227 };
 
-    engine engine;
+    const auto& engine = get_test_engine();
     bool f16_supported = !!engine.get_info().supports_fp16;
     if (!f16_supported) {
         std::cout << "[ SKIPPED  ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl;
@@ -898,6 +2637,295 @@ TEST(eltwise_gpu, eltwise_mod) {
     run_eltwise_generic_test(cldnn::eltwise_mode::mod);
 }
 
+
+TEST(eltwise_gpu, b_fs_yx_fsv4_w_callib) {
+    int B_array[] = {   1,   4,   16,   32, 0 };  // Batch
+    int F_array[] = { 256, 512, 1024, 2048, 0 };  // Features
+    int I_array[] = {  56,  28,   14,   14, 0 };  // Input MxM data sizes
+
+    for (int j = 0; F_array[j]; j++) {
+        const auto& engine = get_test_engine();
+
+        int in_B = B_array[j];
+        int in_F = F_array[j];
+
+        int in_X = I_array[j],
+            in_Y = in_X;
+
+        // Input data init
+        std::vector<char> Data(in_B * in_F * in_X * in_Y);
+        for (size_t i = 0; i < Data.size(); i++)
+            Data[i] = static_cast<char>(i);
+        std::vector<char> DataGold(Data);
+
+        // Expected "gold" output and IMAD output.
+        std::vector<char>  vGoldOutput;
+        std::vector<char>  vTestOutput;
+
+        // Mem initialization
+        // This is user data, no kernels here
+        auto input1 = memory::allocate(engine,
+                                      { data_types::i8,
+                                          format::bfyx,
+                                          { in_B, in_F, in_X, in_Y } });
+        std::vector<char> data_i1(DataGold);
+        set_values(input1, std::move(data_i1));
+        auto input2 = memory::allocate(engine,
+                                      { data_types::i8,
+                                          format::bfyx,
+                                          { in_B, in_F, in_X, in_Y } });
+        std::vector<char> data_i2(DataGold);
+        set_values(input2, std::move(data_i2));
+
+        auto callib = memory::allocate(engine,
+                                       { data_types::f32,
+                                         format::bfyx,
+                                         { 1, in_F, 1, 1 } });
+        std::vector<float> data_c(in_F);
+        float sign = 1;
+        for (size_t i = 0; i < data_c.size(); i++) {
+            data_c[i] = ((i + 1) % 7) ? sign : -sign;
+            sign *= (float)1.0123;
+        }
+        set_values(callib, std::move(data_c));
+
+        // "Golden" Eltwise
+        {
+            topology topology;
+
+            auto eltw = eltwise("eltw_GOLD",
+                                "input1", "input2",
+                                "callib",
+                                eltwise_mode::sum, true);
+
+            // Create a topology
+            topology.add(input_layout("input1", input1.get_layout()),
+                         input_layout("input2", input2.get_layout()),
+                         eltw);
+
+            topology.add(data("callib", callib));
+
+            // Network processing
+            network network(engine, topology);
+            network.set_input_data("input1", input1);
+            network.set_input_data("input2", input2);
+            auto outputs = network.execute();
+
+            // Validation
+            auto searchC = outputs.find("eltw_GOLD");
+            EXPECT_NE(searchC, outputs.end());
+            auto output = outputs.begin()->second.get_memory();
+            auto output_ptr = output.pointer<char>();
+            vGoldOutput.reserve(output_ptr.size());
+            for (size_t i = 0; i < output_ptr.size(); i++)
+                vGoldOutput.push_back(output_ptr[i]);
+        }
+
+        // "IMAD" Eltwise
+        {
+            topology topology;
+
+            // Reorder (a-ka swizzelling) input to MMAD/IMAD Pooling format
+            topology.add(reorder("reorder1_Swizzelled",
+                                 "input1",
+                                 layout(data_types::i8,
+                                        format::b_fs_yx_fsv4,
+                                        { in_B, in_F, in_X, in_Y })),
+                         reorder("reorder2_Swizzelled",
+                                  "input2",
+                                  layout(data_types::i8,
+                                         format::b_fs_yx_fsv4,
+                                         { in_B, in_F, in_X, in_Y })));
+
+            auto eltw = eltwise("eltw_IMAD",
+                                "reorder1_Swizzelled", "reorder2_Swizzelled",
+                                "callib",
+                                eltwise_mode::sum, true);
+
+            topology.add(input_layout("input1", input1.get_layout()),
+                         input_layout("input2", input2.get_layout()),
+                         eltw);
+
+            topology.add(data("callib", callib));
+
+            // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling
+            topology.add(reorder("reorder_UnSwizzelled",
+                                 "eltw_IMAD",
+                                 layout(data_types::i8,
+                                        format::bfyx,
+                                        { in_B, in_F, in_X, in_Y })));
+
+            // Network processing
+            network network(engine, topology);
+            network.set_input_data("input1", input1);
+            network.set_input_data("input2", input2);
+            auto outputs = network.execute();
+
+            // Validation
+            auto searchC = outputs.find("reorder_UnSwizzelled");
+            EXPECT_NE(searchC, outputs.end());
+            auto output = outputs.begin()->second.get_memory();
+            auto output_ptr = output.pointer<char>();
+            vTestOutput.reserve(output_ptr.size());
+            for (size_t i = 0; i < output_ptr.size(); i++)
+                vTestOutput.push_back(output_ptr[i]);
+        }
+
+        // Result validation
+        ASSERT_TRUE(vGoldOutput.size() == vTestOutput.size());
+        for (size_t i = 0; i < vGoldOutput.size(); i++)
+            ASSERT_TRUE(vTestOutput[i] == vGoldOutput[i]);
+    } // for (int j = 0; F_array[j]; j++)
+}
+
+TEST(eltwise_gpu, b_fs_yx_fsv4_wo_callib) {
+    //
+    // Input data
+    const int BATCH = 1;
+    const int in_B = BATCH;
+
+    const auto& engine = get_test_engine();
+
+    int in_F = 256;
+
+    int in_X = 56,
+        in_Y = in_X;
+
+    // Input data init
+    std::vector<char> Data(in_B * in_F * in_X * in_Y);
+    for (size_t i = 0; i < Data.size(); i++)
+        Data[i] = static_cast<char>(i);
+    std::vector<char> DataGold(Data);
+
+    // Mem initialization
+    // This is user data, no kernels here
+    auto input1 = memory::allocate(engine,
+                                   { data_types::i8,
+                                       format::bfyx,
+                                       { in_B, in_F, in_X, in_Y } });
+    std::vector<char> data_i1(DataGold);
+    for (size_t i = 0; i < data_i1.size(); i++) data_i1[i] = data_i1[i] + 1;
+    set_values(input1, std::move(data_i1));
+
+    auto input2 = memory::allocate(engine,
+                                  { data_types::i8,
+                                      format::bfyx,
+                                      { in_B, in_F, in_X, in_Y } });
+    std::vector<char> data_i2(DataGold);
+    for (size_t i = 0; i < data_i2.size(); i++) data_i2[i] = data_i2[i] + 2;
+    set_values(input2, std::move(data_i2));
+
+    auto input3 = memory::allocate(engine,
+                                  { data_types::i8,
+                                      format::bfyx,
+                                      { in_B, in_F, in_X, in_Y } });
+    std::vector<char> data_i3(DataGold);
+    for (size_t i = 0; i < data_i3.size(); i++) data_i3[i] = data_i3[i] + 3;
+    set_values(input3, std::move(data_i3));
+
+    cldnn::eltwise_mode mode[] = { cldnn::eltwise_mode::min,
+                                   cldnn::eltwise_mode::max,
+                                   cldnn::eltwise_mode::sum };
+
+    for (int i = 0; i < 3; i++) {
+        // Expected "gold" output and IMAD output.
+        std::vector<char>  vGoldOutput;
+        std::vector<char>  vTestOutput;
+
+        // "Golden" Eltwise
+        {
+            topology topology;
+
+            auto eltw = eltwise("eltw_GOLD",
+                                { "input1", "input2", "input3" },
+                                mode[i], true);
+
+            // Create a topology
+            topology.add(input_layout("input1", input1.get_layout()),
+                         input_layout("input2", input2.get_layout()),
+                         input_layout("input3", input3.get_layout()),
+                         eltw);
+
+            // Network processing
+            network network(engine, topology);
+            network.set_input_data("input1", input1);
+            network.set_input_data("input2", input2);
+            network.set_input_data("input3", input3);
+            auto outputs = network.execute();
+
+            // Validation
+            auto searchC = outputs.find("eltw_GOLD");
+            EXPECT_NE(searchC, outputs.end());
+            auto output = outputs.begin()->second.get_memory();
+            auto output_ptr = output.pointer<char>();
+            vGoldOutput.reserve(output_ptr.size());
+            for (size_t i = 0; i < output_ptr.size(); i++)
+                vGoldOutput.push_back(output_ptr[i]);
+        }
+
+        // "IMAD" Eltwise
+        {
+            topology topology;
+
+            // Reorder (a-ka swizzelling) input to MMAD/IMAD Pooling format
+            topology.add(reorder("reorder1_Swizzelled",
+                                 "input1",
+                                 layout(data_types::i8,
+                                        format::b_fs_yx_fsv4,
+                                        { in_B, in_F, in_X, in_Y })),
+                         reorder("reorder2_Swizzelled",
+                                 "input2",
+                                 layout(data_types::i8,
+                                        format::b_fs_yx_fsv4,
+                                        { in_B, in_F, in_X, in_Y })),
+                         reorder("reorder3_Swizzelled",
+                                 "input3",
+                                 layout(data_types::i8,
+                                        format::b_fs_yx_fsv4,
+                                        { in_B, in_F, in_X, in_Y })));
+
+            auto eltw = eltwise("eltw_IMAD",
+                                { "reorder1_Swizzelled",
+                                  "reorder2_Swizzelled",
+                                  "reorder3_Swizzelled" },
+                                mode[i], true);
+
+            topology.add(input_layout("input1", input1.get_layout()),
+                         input_layout("input2", input2.get_layout()),
+                         input_layout("input3", input3.get_layout()),
+                         eltw);
+
+            // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling
+            topology.add(reorder("reorder_UnSwizzelled",
+                                 "eltw_IMAD",
+                                 layout(data_types::i8,
+                                     format::bfyx,
+                                     { in_B, in_F, in_X, in_Y })));
+
+            // Network processing
+            network network(engine, topology);
+            network.set_input_data("input1", input1);
+            network.set_input_data("input2", input2);
+            network.set_input_data("input3", input3);
+            auto outputs = network.execute();
+
+            // Validation
+            auto searchC = outputs.find("reorder_UnSwizzelled");
+            EXPECT_NE(searchC, outputs.end());
+            auto output = outputs.begin()->second.get_memory();
+            auto output_ptr = output.pointer<char>();
+            vTestOutput.reserve(output_ptr.size());
+            for (size_t i = 0; i < output_ptr.size(); i++)
+                vTestOutput.push_back(output_ptr[i]);
+        }
+
+        // Result validation
+        ASSERT_TRUE(vGoldOutput.size() == vTestOutput.size());
+        for (size_t i = 0; i < vGoldOutput.size(); i++)
+            ASSERT_TRUE(vTestOutput[i] == vGoldOutput[i]);
+    }
+}
+
 TEST(DISABLED_eltwise_gpu, generic_random) {
     VF<cldnn::format> test_inputs_fmts = { cldnn::format::bfyx, cldnn::format::yxfb };
     VF<cldnn::eltwise_mode> modes = { cldnn::eltwise_mode::sum, cldnn::eltwise_mode::sub, cldnn::eltwise_mode::max, cldnn::eltwise_mode::prod };
@@ -905,7 +2933,7 @@ TEST(DISABLED_eltwise_gpu, generic_random) {
     VF<float> slopes = { 0.0f, -0.0f, -17.19f, 1028.8f, std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity() };
     std::vector<std::pair<int, int>> input_sizes = { { 100, 100 },{ 227, 227 },{ 400, 600 } };
 
-    engine engine;
+    const auto& engine = get_test_engine();
     bool f16_supported = !!engine.get_info().supports_fp16;
     if (!f16_supported) {
         std::cout << "[ SKIPPED  ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl;
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/embed_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/embed_gpu_test.cpp
index be00e84cc..1ed45153d 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/embed_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/embed_gpu_test.cpp
@@ -53,12 +53,12 @@ TEST(embed_gpu, seq3num4) {
     //   0.0    0.0   0.0    0.0
     //   6.0    8.0  -2.0   -2.0
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto batch = 1;
     auto sequence_length = 3;
     auto num_output_size = 4;
     auto vocab_size = 3;
-    auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch, 1, 1, sequence_length } });
+    auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch, 1, sequence_length, 1 } });
     auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ num_output_size, 1, vocab_size, 1 } });
     auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch, 1, 1, num_output_size } });
     auto output_ref = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch, sequence_length, num_output_size, 1 } });
@@ -119,12 +119,12 @@ TEST(embed_gpu, b2seq2num3) {
     //   -1.0   0.0   1.0   -1.0   4.0   4.0
     //    10.0  18.0  19.0  -1.0   0.0   1.0
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto batch = 2;
     auto sequence_length = 2;
     auto num_output_size = 3;
     auto vocab_size = 3;
-    auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch, 1, 1, sequence_length } });
+    auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch, 1, sequence_length, 1 } });
     auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ num_output_size, 1, vocab_size, 1 } });
     auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 1, 1, num_output_size } });
     auto output_ref = memory::allocate(engine, { data_types::f32,format::bfyx,{ batch, sequence_length, num_output_size, 1 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_gpu_test.cpp
index f07fa00a7..4882d9a4f 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_gpu_test.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -54,11 +54,11 @@ VVVVF<T> fully_connected_reference(VVVVF<T> &input, VVVVF<T> &weights, VF<T> &bi
     size_t input_y = input[0][0].size();
     size_t input_x = input[0][0][0].size();
     size_t output_b = input.size();        // input is assumed to be bfyx
-    size_t output_x = weights.size();    // weights is assumed to be bfyx
-    VVVVF<T> output(output_b, VVVF<T>(1, VVF<T>(1, VF<T>(output_x))));
+    size_t output_f = weights.size();    // weights is assumed to be bfyx
+    VVVVF<T> output(output_b, VVVF<T>(1, VVF<T>(1, VF<T>(output_f))));
     float res;
     for (size_t b = 0; b < output_b; ++b) {
-        for (size_t n = 0; n < output_x; ++n) {
+        for (size_t n = 0; n < output_f; ++n) {
             res = bias[n];
             for (size_t f = 0; f < input_f; ++f) {
                 for (size_t y = 0; y < input_y; ++y) {
@@ -76,20 +76,20 @@ VVVVF<T> fully_connected_reference(VVVVF<T> &input, VVVVF<T> &weights, VF<T> &bi
 }
 
 template <typename T>
-void generic_fully_connected_test(cldnn::format test_input_fmt, cldnn::format test_weights_fmt, int input_b, int f, int y, int x, int output_x, bool relu, T slope = 0) {
+void generic_fully_connected_test(cldnn::format test_input_fmt, cldnn::format test_weights_fmt, int input_b, int f, int y, int x, int output_f, bool relu, T slope = 0) {
     int min_random = -2, max_random = 2;
     VVVVF<T> input_rnd = generate_random_4d<T>(input_b, f, y, x, min_random, max_random);
-    VVVVF<T> weights_rnd = generate_random_4d<T>(output_x, f, y, x, min_random, max_random);
-    VF<T> bias_rnd_vec = generate_random_1d<T>(output_x, min_random, max_random);
+    VVVVF<T> weights_rnd = generate_random_4d<T>(output_f, f, y, x, min_random, max_random);
+    VF<T> bias_rnd_vec = generate_random_1d<T>(output_f, min_random, max_random);
     VF<T> input_rnd_vec = flatten_4d<T>(test_input_fmt, input_rnd);
     VF<T> weights_rnd_vec = flatten_4d<T>(test_weights_fmt, weights_rnd);
 
-    engine engine;
+    const auto& engine = get_test_engine();
     tensor input_tensor(input_b, f, x, y);
-    tensor weights_tensor(output_x, f, x, y);
+    tensor weights_tensor(output_f, f, x, y);
     auto input = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
     auto weights = memory::allocate(engine, { type_to_data_type<T>::value, test_weights_fmt, weights_tensor });
-    auto bias = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1,1,output_x,1 } });
+    auto bias = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1,1,output_f,1 } });
     set_values(input, input_rnd_vec);
     set_values(weights, weights_rnd_vec);
     set_values(bias, bias_rnd_vec);
@@ -115,9 +115,9 @@ void generic_fully_connected_test(cldnn::format test_input_fmt, cldnn::format te
     //EXPECT_EQ(output_layout.format.value, test_input_fmt);
     tensor output_tensor = output_layout.size;
     int b_size = output_tensor.batch[0];
-    int x_size = output_tensor.spatial[0];
+    int x_size = output_tensor.feature[0];
     EXPECT_EQ(b_size, input_b);
-    EXPECT_EQ(x_size, output_x);
+    EXPECT_EQ(x_size, output_f);
     unsigned num_of_operations = f * x * y * 2;
     float ulp = (1.0f / 1024.0f) * num_of_operations;
     bool test_is_correct = true;
@@ -139,7 +139,7 @@ void generic_fully_connected_test(cldnn::format test_input_fmt, cldnn::format te
         << "f = " << f << std::endl
         << "y = " << y << std::endl
         << "x = " << x << std::endl
-        << "output_x = " << output_x << std::endl
+        << "output_f = " << output_f << std::endl
         << "relu = " << relu << std::endl
         << "slope = " << (float)slope << std::endl
         << "type = " << (sizeof(T) == 2 ? "float16" : "float32") << std::endl;
@@ -154,7 +154,7 @@ TEST(DISABLED_fully_connected_gpu, generic_random_short) {
     std::vector<std::pair<int, int>> input_sizes = { {28, 28}, {64, 64}, {100, 100}, {227, 227}, {1000, 1}, {1, 4096} };
     VF<int> outputs_x = { 5, 16 };
 
-    engine engine;
+    const auto& engine = get_test_engine();
     bool f16_supported = !!engine.get_info().supports_fp16;
     if (!f16_supported) {
         std::cout << "[ SKIPPED  ] float16 combinations are skipped (cl_khr_fp16 is not supported)." << std::endl;
@@ -165,18 +165,18 @@ TEST(DISABLED_fully_connected_gpu, generic_random_short) {
             for (const auto& b : batches) {
                 for(const auto& f : features) {
                     for (const auto& sizes : input_sizes) {
-                        for (int output_x : outputs_x) {
+                        for (int output_f : outputs_x) {
                             for (bool relu_activated : relu) {
-                                    generic_fully_connected_test<float>(test_input_fmt, test_weights_fmt, b, f, sizes.second, sizes.first, output_x, relu_activated);
+                                    generic_fully_connected_test<float>(test_input_fmt, test_weights_fmt, b, f, sizes.second, sizes.first, output_f, relu_activated);
                                     if (!f16_supported) continue;
-                                    generic_fully_connected_test<FLOAT16>(test_input_fmt, test_weights_fmt, b, f, sizes.second, sizes.first, output_x, relu_activated);
+                                    generic_fully_connected_test<FLOAT16>(test_input_fmt, test_weights_fmt, b, f, sizes.second, sizes.first, output_f, relu_activated);
                             }
                         }
                     }
                 }
             }
         }
-    }  
+    }
 }
 
 TEST(fully_connected_gpu, no_biases) {
@@ -203,7 +203,7 @@ TEST(fully_connected_gpu, no_biases) {
     const int32_t input_x = 3, input_b = 1,  // size of whole input buffer
         weight_b = 4, weight_x = 3;  // size of whole weights buffer
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b, 1, input_x, 1} });
     auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
@@ -261,7 +261,7 @@ TEST(fully_connected_gpu, no_biases_int8) {
     const int32_t input_x = 3, input_b = 1,  // size of whole input buffer
         weight_b = 4, weight_x = 3;  // size of whole weights buffer
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ input_b, 1, input_x, 1 } });
     auto weights_prim = memory::allocate(engine, { data_types::i8,format::bfyx,{ weight_b, 1, weight_x, 1 } });
@@ -319,16 +319,15 @@ TEST(fully_connected_gpu, xb_f32_batch_1) {
     //  Output:
     //   2.5    2.75    0.75   7
 
-    const int32_t output_x = 4,  // size of whole output buffer
+    const int32_t output_f = 4,  // size of whole output buffer
         input_x = 3, input_b = 1,  // size of whole input buffer
         weight_b = 4, weight_x = 3;  // size of whole weights buffer
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate( engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1 } });
-    //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_x } },{ 1 } } });
     auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
-    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx, { 1,1,output_x, 1} });
+    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx, { 1,1,output_f, 1} });
 
     set_values(input_prim, { -0.5f, 2.0f, 0.5f });
     set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
@@ -380,16 +379,15 @@ TEST(fully_connected_gpu, xb_f32_batch_2) {
     //   2.5    2.75     0.75   7
     //   4      1        2.75   5
 
-    const int32_t output_x = 4,  // size of whole output buffer
+    const int32_t output_f = 4,  // size of whole output buffer
         input_x = 3, input_b = 2,  // size of whole input buffer
         weight_b = 4, weight_x = 3;  // size of whole weights buffer
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b,1,input_x, 1 } });
-    //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_x } },{ 1 } } });
     auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
-    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_x,1 } });
+    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } });
 
     set_values(input_prim, { -0.5f, 1.0f, 2.0f, 1.5f, 0.5f, 0.0f });
     set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
@@ -442,16 +440,16 @@ TEST(fully_connected_gpu, x_f32) {
     //  Output:
     //   2.5    2.75    0.75   7
 
-    const int32_t output_x = 4,                 // size of whole output buffer
+    const int32_t output_f = 4,                 // size of whole output buffer
         input_x = 3,                 // size of whole input buffer
         weight_b = 4, weight_x = 3;  // size of whole weights buffer
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx, { 1,1,input_x,1 } });
-    //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_x } },{ 1 } } });
+    //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_f } },{ 1 } } });
     auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
-    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_x,1 } });
+    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } });
 
     set_values(input_prim, { -0.5f, 2.0f, 0.5f });
     set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
@@ -492,7 +490,7 @@ TEST(fully_connected_gpu, yxfn_f32) {
     //   3  -4      f1: b0
 
     //  Weights:
-    //   1  -1      n0: fm0  
+    //   1  -1      n0: fm0
     //   2   0      n0: fm1
     //   3   4      n1: fm0
     //   0.5 5      n1: fm1
@@ -503,7 +501,7 @@ TEST(fully_connected_gpu, yxfn_f32) {
     //  Output:
     //   10  -28.5
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 2, 2, 1 } });
     //auto output_prim = memory::allocate({ memory::format::xb_f32,{ 2 ,{ { 1 } }, 1 } });
@@ -557,16 +555,16 @@ TEST(fully_connected_gpu, xb_f32_batch_1_relu) {
     //  Output:
     //   2.5   0      0.75  0
 
-    const int32_t output_x = 4,  // size of whole output buffer
+    const int32_t output_f = 4,  // size of whole output buffer
         input_x = 3, input_b = 1,  // size of whole input buffer
         weight_b = 4, weight_x = 3;  // size of whole weights buffer
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b, 1, input_x, 1 } });
-    //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_x } },{ 1 } } });
+    //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_f } },{ 1 } } });
     auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
-    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_x, 1 } });
+    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f, 1 } });
 
     set_values(input_prim, { -0.5f, 2.0f, 0.5f });
     set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
@@ -618,16 +616,16 @@ TEST(fully_connected_gpu, xb_f32_batch_2_relu) {
     //   2.5    0   0.75   0
     //   4      0   2.75   0
 
-    const int32_t output_x = 4,  // size of whole output buffer
+    const int32_t output_f = 4,  // size of whole output buffer
         input_x = 3, input_b = 2,  // size of whole input buffer
         weight_b = 4, weight_x = 3;  // size of whole weights buffer
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32,format::yxfb,{ input_b, 1, input_x, 1 } });
-    //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_x } },{ 1 } } });
+    //auto output_prim = memory::allocate({ memory::format::xb_f32,{ output_b,{ { output_f } },{ 1 } } });
     auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
-    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_x,1 } });
+    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } });
 
     set_values(input_prim, { -0.5f, 1.0f, 2.0f, 1.5f, 0.5f, 0.0f });
     set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
@@ -680,16 +678,16 @@ TEST(fully_connected_gpu, x_f32_relu) {
     //  Output:
     //   2.5   0    0.75  0
 
-    const int32_t output_x = 4,                 // size of whole output buffer
+    const int32_t output_f = 4,                 // size of whole output buffer
         input_x = 3,                 // size of whole input buffer
         weight_b = 4, weight_x = 3;  // size of whole weights buffer
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,input_x,1 } });
-    //auto output_prim = memory::allocate({ memory::format::x_f32,{ 1       ,{ { output_x } }, 1 } });
+    //auto output_prim = memory::allocate({ memory::format::x_f32,{ 1       ,{ { output_f } }, 1 } });
     auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
-    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_x,1 } });
+    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } });
 
     set_values(input_prim, { -0.5f, 2.0f, 0.5f });
     set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
@@ -739,16 +737,16 @@ TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) {
     //  Output:
     //   2.5   -0.125    0.75  -0.1
 
-    const int32_t output_x = 4,                 // size of whole output buffer
+    const int32_t output_f = 4,                 // size of whole output buffer
         input_x = 3,                 // size of whole input buffer
         weight_b = 4, weight_x = 3;  // size of whole weights buffer
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,input_x,1 } });
-    //auto output_prim = memory::allocate({ memory::format::x_f32,{ 1       ,{ { output_x } }, 1 } });
+    //auto output_prim = memory::allocate({ memory::format::x_f32,{ 1       ,{ { output_f } }, 1 } });
     auto weights_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ weight_b, 1, weight_x, 1 } });
-    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_x,1 } });
+    auto bias_prim = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1,1,output_f,1 } });
 
     set_values(input_prim, { -0.5f, 2.0f, 0.5f });
     set_values(weights_prim, { 1.5f, 1.0f, 0.5f, -1.0f, 0.0f, 0.5f, 0.5f, -0.5f, -2.0f, -0.5f, 1.0f, 1.5f });
@@ -777,3 +775,126 @@ TEST(fully_connected_gpu, x_f32_relu_with_negative_slope) {
     EXPECT_EQ(0.75f, output_ptr[2]);
     EXPECT_EQ(-0.1f, output_ptr[3]);
 }
+
+TEST(fully_connected_gpu, b_fs_yx_fsv4)
+{
+    const auto& engine = get_test_engine();
+
+    const int in_B = 2;
+    const int in_F = 2048;
+    const int in_Y = 1;
+    const int in_X = 1;
+
+    const int W_B = 1000;
+    const int W_F = in_F;
+    const int W_Y = in_Y;
+    const int W_X = in_X;
+
+    // Input data
+    std::vector<char> Data(in_F * in_B); // in_X=in_Y=1
+    int i = 0;
+    std::generate(Data.begin(), Data.end(), [i]() mutable { return i++ % 9; });
+    auto input = memory::allocate(engine, {data_types::i8, format::bfyx, {in_B, in_F, in_X, in_Y}});
+    set_values(input, std::move(Data));
+
+    // Create a topology
+    topology topology(input_layout("input", input.get_layout()));
+
+    // Reorder
+    topology.add(reorder("reorder_in",
+                         "input",
+                         layout(data_types::i8, format::b_fs_yx_fsv4, {in_B, in_F, in_X, in_Y})));
+
+    // Weights
+    std::vector<char> Weights(W_B * W_F);
+    i = 0;
+    std::generate(Weights.begin(), Weights.end(), [W_F, i]() mutable {
+        return i % 2 ? -(i++) / W_F - 1 : (i++) / W_F + 1;
+    });
+    auto weights_gold =
+        memory::allocate(engine, {data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}});
+    auto weights_imad =
+        memory::allocate(engine, {data_types::i8, format::bfyx, {W_B, W_F, W_X, W_Y}});
+    set_values(weights_gold, Weights);
+    set_values(weights_imad, std::move(Weights));
+    topology.add(data("weights_gold", weights_gold), data("weights_imad", weights_imad));
+
+    // Bias, Callibraiton, Quantization
+    std::vector<float> vB(in_F), vC(in_F), vQ(in_F);
+    float x = 0.1f;
+    std::generate(vB.begin(), vB.end(), [x]() mutable {
+        x += 0.01f;
+        if (x >= 0.9f)
+            x = 0.1f;
+        return x;
+    });
+    x = 0.2f;
+    std::generate(vC.begin(), vC.end(), [x]() mutable {
+        x += 0.01f;
+        if (x >= 0.9f)
+            x = 0.2f;
+        return x;
+    });
+    x = 0.3f;
+    std::generate(vQ.begin(), vQ.end(), [x]() mutable {
+        x += 0.01f;
+        if (x >= 0.9f)
+            x = 0.3f;
+        return x;
+    });
+    auto bias_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
+    auto bias_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
+    auto callib_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
+    auto callib_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
+    auto quant_gold = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
+    auto quant_imad = memory::allocate(engine, {data_types::f32, format::bfyx, {1, 1, in_F, 1}});
+    set_values(bias_gold, vB);
+    set_values(bias_imad, std::move(vB));
+    set_values(callib_gold, vC);
+    set_values(callib_imad, std::move(vC));
+    set_values(quant_gold, vQ);
+    set_values(quant_imad, std::move(vQ));
+    topology.add(data("bias_gold", bias_gold),
+                 data("callib_gold", callib_gold),
+                 data("quant_gold", quant_gold));
+    topology.add(data("bias_imad", bias_imad),
+                 data("callib_imad", callib_imad),
+                 data("quant_imad", quant_imad));
+
+    // Fully connected
+    fully_connected fullc_gold(
+        "fullc_gold", "input", "weights_gold", {"bias_gold"}, {"quant_gold"}, {"callib_gold"}, 1.0f);
+    fully_connected fullc_imad(
+        "fullc_imad", "reorder_in", "weights_imad", {"bias_imad"}, {"quant_imad"}, {"callib_imad"}, 1.0f);
+    topology.add(fullc_gold, fullc_imad);
+
+    // Output reorder
+    auto reorder_gold =
+        reorder("reorder_gold", fullc_gold, layout(data_types::i8, format::bfyx, {in_B, W_B, 1, 1}));
+    auto reorder_imad =
+        reorder("reorder_imad", fullc_imad, layout(data_types::i8, format::bfyx, {in_B, W_B, 1, 1}));
+    topology.add(reorder_gold, reorder_imad);
+
+    // Network build
+    build_options build_opt;
+    build_opt.set_option(build_option::optimize_data(true));
+    network network(engine, topology, build_opt);
+
+    // Network execuiton
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    auto out_gold = outputs.find("reorder_gold");
+    auto out_test = outputs.find("reorder_imad");
+
+    ASSERT_NE(out_gold, outputs.end());
+    ASSERT_NE(out_test, outputs.end());
+    auto gold_ptr = out_gold->second.get_memory().pointer<char>();
+    auto test_ptr = out_test->second.get_memory().pointer<char>();
+
+    ASSERT_EQ(gold_ptr.size(), test_ptr.size());
+    for (size_t i = 0; i < gold_ptr.size(); i++)
+    {
+        ASSERT_EQ(gold_ptr[i], test_ptr[i]);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_grad_input_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_grad_input_gpu_test.cpp
index 71a107d0c..27375765a 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_grad_input_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_grad_input_gpu_test.cpp
@@ -51,7 +51,7 @@ TEST(fully_connected_grad_input_gpu, basic_bfyx) {
     //  -1.125  5.625   10.125
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_grad_weights_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_grad_weights_gpu_test.cpp
index 7edb6311f..b470bdada 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_grad_weights_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fully_connected_grad_weights_gpu_test.cpp
@@ -44,7 +44,7 @@ TEST(fully_connected_grad_weights_gpu, basic_bfyx) {
     //  Input_grad:
     //   1.5   0.75  -2.25  3
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 1 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 3, 1 } });
@@ -125,7 +125,7 @@ TEST(fully_connected_grad_weights_gpu, basic_bfyx_b8) {
     //   1.5   0.75  -2.25  3
     //   1   1  1  1
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 8, 1, 4, 1 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 8, 1, 3, 1 } });
@@ -206,7 +206,7 @@ TEST(fully_connected_grad_weights_gpu, basic_bfyx_no_bias) {
     //  Input_grad:
     //   1.5   0.75  -2.25  3
 
-    engine engine;
+    const auto& engine = get_test_engine();
     float lr = 0.00001f;
     auto input_grad = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 1 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp
new file mode 100644
index 000000000..a46c6b9a3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/fused_conv_eltwise_gpu_test.cpp
@@ -0,0 +1,112 @@
+/*
+// Copyright (c) 2016 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+#include "api/CPP/memory.hpp"
+#include <api/CPP/input_layout.hpp>
+#include "api/CPP/convolution.hpp"
+#include "api/CPP/eltwise.hpp"
+#include "api/CPP/reorder.hpp"
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+#include <api/CPP/engine.hpp>
+#include "test_utils/test_utils.h"
+#include <api/CPP/data.hpp>
+
+#include <cmath>
+#include <gmock/gmock.h>
+#include <limits>
+
+using namespace cldnn;
+using namespace tests;
+using namespace testing;
+
+TEST(fused_conv_eltwise, basic_0)
+{
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 4, 5 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
+
+    set_values(input, {
+        1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
+        -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
+    });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        convolution("conv", "input", { "weights" }),
+        eltwise("eltwise", "input", "conv", eltwise_mode::sum),
+        reorder("out", "eltwise", format::bfyx, data_types::f32));
+
+    build_options opt;
+    opt.set_option(build_option::optimize_data(true));
+    network network(engine, topology, opt);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "out");
+
+    auto output = outputs.begin()->second.get_memory();
+    auto&& out_layout = output.get_layout();
+
+    EXPECT_EQ(out_layout.format, format::bfyx);
+    EXPECT_EQ(out_layout.size.batch[0], 1);
+    EXPECT_EQ(out_layout.size.feature[0], 1);
+    EXPECT_EQ(out_layout.size.spatial[0], 4);
+    EXPECT_EQ(out_layout.size.spatial[1], 5);
+}
+
+
+TEST(fused_conv_eltwise, dont_fuse_if_conv_elt_are_outputs)
+{
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 5 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    set_values(input, {
+        1.0f,  2.0f, -15.f,  3.0f, 4.0f, -15.f, 5.0f,  6.0f, -15.f, 7.0f,
+        -15.f, 0.0f,  0.0f, -15.f, 0.5f, -0.5f, -15.f, 8.0f,  1.5f,  5.2f
+        });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        data("weights", weights),
+        convolution("conv", "input", { "weights" }),
+        eltwise("out", "input", "conv", eltwise_mode::sum));
+
+    build_options opt;
+    opt.set_option(build_option::optimize_data(true));
+    network network(engine, topology, opt);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "out");
+
+    auto output = outputs.begin()->second.get_memory();
+    auto&& out_layout = output.get_layout();
+
+    EXPECT_EQ(out_layout.format, format::bfyx);
+    EXPECT_EQ(out_layout.size.batch[0], 1);
+    EXPECT_EQ(out_layout.size.feature[0], 1);
+    EXPECT_EQ(out_layout.size.spatial[0], 4);
+    EXPECT_EQ(out_layout.size.spatial[1], 5);
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp
new file mode 100644
index 000000000..90b4e80d5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/gather_gpu_test.cpp
@@ -0,0 +1,513 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+
+#include <api/CPP/input_layout.hpp>
+#include <api/CPP/memory.hpp>
+#include <api/CPP/gather.hpp>
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+
+#include <cstddef>
+#include <tests/test_utils/test_utils.h>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(gather_gpu_fp16, d14_axisB) {
+    //  Indexes  : 2x2x1x1
+    //  Dictionary : 1x4x1x1
+    //  Axis : 0
+    //  Output : 1x4x2x1
+    //  Input values in fp16
+
+    //  Indexes:
+    //  0.f, 1.f, 1.f, 0.f
+    //
+    //  Dictionary:
+    //  1.f, 2.f, 3.f, 4.f
+    //
+    //  Output:
+    //  1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes
+    auto axis = cldnn::gather::gather_axis::along_b;
+
+    set_values(input1, {
+        FLOAT16(1.0f), FLOAT16(2.0f),
+        FLOAT16(3.0f), FLOAT16(4.0f)
+    });
+
+    set_values(input2, {
+        0.f, 1.f,
+        1.f, 0.f
+    });
+
+    topology topology;
+    topology.add(input_layout("InputDictionary", input1.get_layout()));
+    topology.add(input_layout("InputText", input2.get_layout()));
+    topology.add(
+        gather("gather", "InputDictionary", "InputText", axis, tensor(1, 4, 1, 2))
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("InputDictionary", input1);
+    network.set_input_data("InputText", input2);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("gather").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+        1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(gather_gpu_fp16, d222_axisB) {
+    //  Indexes  : 3x2x2x1
+    //  Dictionary : 2x2x1x1
+    //  Axis : 0
+    //  Output : 2x2x2x2
+    //  Input values in fp16
+
+    //  Indexes:
+    //  0.f, 1.f, 2.f, 1.f
+    //
+    //  Dictionary:
+    //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
+    //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    //
+    //  Output:
+    //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
+    auto axis = cldnn::gather::gather_axis::along_b;
+
+    set_values(input1, {
+        FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f),
+        FLOAT16(4.f), FLOAT16(5.f), FLOAT16(6.f),
+
+        FLOAT16(7.f), FLOAT16(8.f), FLOAT16(9.f),
+        FLOAT16(10.f), FLOAT16(11.f), FLOAT16(12.f)
+    });
+
+    set_values(input2, {
+        0.f, 1.f,
+        2.f, 1.f
+    });
+
+    topology topology;
+    topology.add(input_layout("InputDictionary", input1.get_layout()));
+    topology.add(input_layout("InputText", input2.get_layout()));
+    topology.add(
+        gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("InputDictionary", input1);
+    network.set_input_data("InputText", input2);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("gather").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+        1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(gather_gpu_fp16, d22_axisY) {
+    //  Indexes  : 2x2x3x1
+    //  Dictionary : 2x2x1x1
+    //  Axis : 2
+    //  Output : 2x2x2x2
+    //  Input values in fp16
+
+    //  Indexes:
+    //  0.f, 1.f, 2.f, 1.f
+    //
+    //  Dictionary:
+    //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
+    //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    //
+    //  Output:
+    //  1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
+    auto axis = cldnn::gather::gather_axis::along_y;
+
+    set_values(input1, {
+        FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f),
+        FLOAT16(4.f), FLOAT16(5.f), FLOAT16(6.f),
+
+        FLOAT16(7.f), FLOAT16(8.f), FLOAT16(9.f),
+        FLOAT16(10.f), FLOAT16(11.f), FLOAT16(12.f)
+    });
+
+    set_values(input2, {
+        0.f, 1.f, 2.f, 1.f
+    });
+
+    topology topology;
+    topology.add(input_layout("InputDictionary", input1.get_layout()));
+    topology.add(input_layout("InputText", input2.get_layout()));
+    topology.add(
+        gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("InputDictionary", input1);
+    network.set_input_data("InputText", input2);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("gather").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+        1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(gather_gpu_fp16, d22_axisF) {
+    //  Indexes  : 2x3x2x1
+    //  Dictionary : 2x2x1x1
+    //  Axis : 2
+    //  Output : 2x2x2x2
+    //  Input values in fp16
+
+    //  Indexes:
+    //  0.f, 1.f, 2.f, 1.f
+    //
+    //  Dictionary:
+    //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
+    //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    //
+    //  Output:
+    //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
+    auto axis = cldnn::gather::gather_axis::along_f;
+
+    set_values(input1, {
+            FLOAT16(1.f), FLOAT16(2.f), FLOAT16(3.f),
+            FLOAT16(4.f), FLOAT16(5.f), FLOAT16(6.f),
+
+            FLOAT16(7.f), FLOAT16(8.f), FLOAT16(9.f),
+            FLOAT16(10.f), FLOAT16(11.f), FLOAT16(12.f)
+    });
+
+    set_values(input2, {
+            0.f, 1.f, 2.f, 1.f
+    });
+
+    topology topology;
+    topology.add(input_layout("InputDictionary", input1.get_layout()));
+    topology.add(input_layout("InputText", input2.get_layout()));
+    topology.add(
+            gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("InputDictionary", input1);
+    network.set_input_data("InputText", input2);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("gather").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+            1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(gather_gpu_fp32, d14_axisB) {
+    //  Indexes  : 2x2x1x1
+    //  Dictionary : 1x4x1x1
+    //  Axis : 0
+    //  Output : 1x4x2x1
+    //  Input values in fp32
+
+    //  Indexes:
+    //  0.f, 1.f, 1.f, 0.f
+    //
+    //  Dictionary:
+    //  1.f, 2.f, 3.f, 4.f
+    //
+    //  Output:
+    //  1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Dictionary
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 1, 1 } }); // Indexes
+    auto axis = cldnn::gather::gather_axis::along_b;
+
+    set_values(input1, {
+        1.0f, 2.0f,
+        3.0f, 4.0f
+    });
+
+    set_values(input2, {
+        0.f, 1.f,
+        1.f, 0.f
+    });
+
+    topology topology;
+    topology.add(input_layout("InputDictionary", input1.get_layout()));
+    topology.add(input_layout("InputText", input2.get_layout()));
+    topology.add(
+        gather("gather", "InputDictionary", "InputText", axis, tensor(1, 4, 1, 2))
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("InputDictionary", input1);
+    network.set_input_data("InputText", input2);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("gather").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+        1.f, 2.f, 3.f, 4.f, 3.f, 4.f, 1.f, 2.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(gather_gpu_fp32, d222_axisB) {
+    //  Indexes  : 3x2x2x1
+    //  Dictionary : 2x2x1x1
+    //  Axis : 0
+    //  Output : 2x2x2x2
+    //  Input values in fp32
+
+    //  Indexes:
+    //  0.f, 1.f, 2.f, 1.f
+    //
+    //  Dictionary:
+    //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
+    //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    //
+    //  Output:
+    //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 2, 1, 2 } }); // Dictionary
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
+    auto axis = cldnn::gather::gather_axis::along_b;
+
+    set_values(input1, {
+        1.f, 2.f, 3.f,
+        4.f, 5.f, 6.f,
+
+        7.f, 8.f, 9.f,
+        10.f, 11.f, 12.f
+    });
+
+    set_values(input2, {
+        0.f, 1.f, 2.f, 1.f
+    });
+
+    topology topology;
+    topology.add(input_layout("InputDictionary", input1.get_layout()));
+    topology.add(input_layout("InputText", input2.get_layout()));
+    topology.add(
+        gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("InputDictionary", input1);
+    network.set_input_data("InputText", input2);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("gather").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+        1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 5.f, 6.f, 7.f, 8.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(gather_gpu_fp32, d22_axisY) {
+    //  Indexes  : 2x2x3x1
+    //  Dictionary : 2x2x1x1
+    //  Axis : 2
+    //  Output : 2x2x2x2
+    //  Input values in fp32
+
+    //  Indexes:
+    //  0.f, 1.f, 2.f, 1.f
+    //
+    //  Dictionary:
+    //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
+    //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    //
+    //  Output:
+    //  1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 3 } }); // Dictionary
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
+    auto axis = cldnn::gather::gather_axis::along_y;
+
+    set_values(input1, {
+        1.f, 2.f, 3.f,
+        4.f, 5.f, 6.f,
+
+        7.f, 8.f, 9.f,
+        10.f, 11.f, 12.f
+    });
+
+    set_values(input2, {
+        0.f, 1.f, 2.f, 1.f
+    });
+
+    topology topology;
+    topology.add(input_layout("InputDictionary", input1.get_layout()));
+    topology.add(input_layout("InputText", input2.get_layout()));
+    topology.add(
+        gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("InputDictionary", input1);
+    network.set_input_data("InputText", input2);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("gather").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+        1.f, 2.f, 3.f, 2.f, 4.f, 5.f, 6.f, 5.f, 7.f, 8.f, 9.f, 8.f, 10.f, 11.f, 12.f, 11.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(gather_gpu_fp32, d22_axisF) {
+    //  Indexes  : 2x3x2x1
+    //  Dictionary : 2x2x1x1
+    //  Axis : 1
+    //  Output : 2x2x2x2
+    //  Input values in fp32
+
+    //  Indexes:
+    //  0.f, 1.f, 2.f, 1.f
+    //
+    //  Dictionary:
+    //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f,
+    //  7.f, 8.f, 9.f, 10.f, 11.f, 12.f
+    //
+    //  Output:
+    //  1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
+
+    engine engine;
+
+    auto input1 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 3, 1, 2 } }); // Dictionary
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } }); // Indexes
+    auto axis = cldnn::gather::gather_axis::along_f;
+
+    set_values(input1, {
+            1.f, 2.f, 3.f,
+            4.f, 5.f, 6.f,
+
+            7.f, 8.f, 9.f,
+            10.f, 11.f, 12.f
+    });
+
+    set_values(input2, {
+            0.f, 1.f, 2.f, 1.f
+    });
+
+    topology topology;
+    topology.add(input_layout("InputDictionary", input1.get_layout()));
+    topology.add(input_layout("InputText", input2.get_layout()));
+    topology.add(
+            gather("gather", "InputDictionary", "InputText", axis, tensor(2, 2, 2, 2))
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("InputDictionary", input1);
+    network.set_input_data("InputText", input2);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("gather").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 3.f, 4.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 9.f, 10.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/gemm_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/gemm_gpu_test.cpp
index ad39dcc59..1a77be6dc 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/gemm_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/gemm_gpu_test.cpp
@@ -32,7 +32,7 @@ using namespace cldnn;
 using namespace ::tests;
 
 TEST(gemm_gpu, basic_bfyx_t1) {
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 4 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 4 } });
 
@@ -83,7 +83,7 @@ TEST(gemm_gpu, basic_bfyx_t1) {
     }
 }
 TEST(gemm_gpu, basic_bfyx_t2) {
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 3 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 1 } });
 
@@ -131,7 +131,7 @@ TEST(gemm_gpu, basic_bfyx_t2) {
 }
 
 TEST(gemm_gpu, basic_bfyx_t1t2) {
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 4 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 4, 1 } });
 
@@ -188,7 +188,7 @@ TEST(gemm_gpu, basic_bfyx_t1t2) {
 }
 
 TEST(gemm_gpu, basic_input3) {
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 3 } });
     auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
@@ -252,10 +252,10 @@ TEST(gemm_gpu, basic_input3) {
 }
 
 TEST(gemm_gpu, basic_input3_t1t2) {
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 3 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 2 } });
-    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 2 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 4 } });
     float alpha = 2.f;
     float beta = 3.f;
 
@@ -272,8 +272,10 @@ TEST(gemm_gpu, basic_input3_t1t2) {
     };
 
     std::vector<float> input3_data = {
-        1.0f, 0.0f, 1.0f, 0.0f,
-        2.0f, 2.0f, 1.0f, 1.0f,
+        1.0f, 0.0f,
+        1.0f, 0.0f,
+        2.0f, 2.0f,
+        1.0f, 1.0f,
     };
 
     set_values(input, input_data);
@@ -281,8 +283,10 @@ TEST(gemm_gpu, basic_input3_t1t2) {
     set_values(input3, input3_data);
 
     std::vector<float> out_data = {
-        15.0f, 12.0f, 27.0f, 24.0f,
-        12.0f, 14.0f, 17.0f, 19.0f,
+        15.0f, 6.0f,
+        15.0f, 8.0f,
+        30.0f, 20.0f,
+        27.0f, 19.0f
     };
 
     topology topology;
@@ -314,8 +318,217 @@ TEST(gemm_gpu, basic_input3_t1t2) {
         EXPECT_FLOAT_EQ(output_ptr[i], out_data[i]);
     }
 }
+TEST(gemm_gpu, basic_input3_1) {
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 4 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 3 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 4 } });
+    float alpha = 2.f;
+    float beta = 3.f;
+
+    std::vector<float> input_data = {
+        1.0f, 1.0f, 0.0f,
+        2.0f, 0.0f, 0.0f,
+        3.0f, 1.0f, 0.0f,
+        4.0f, 0.0f, 0.0f
+    };
+
+    std::vector<float> input_data2 = {
+        3.0f, 2.0f,
+        3.0f, 1.0f,
+        1.0f, 2.0f,
+    };
+
+    std::vector<float> input3_data = {
+        1.0f, 0.0f,
+        1.0f, 0.0f,
+        2.0f, 2.0f,
+        1.0f, 1.0f,
+    };
+
+    set_values(input, input_data);
+    set_values(input2, input_data2);
+    set_values(input3, input3_data);
+
+    std::vector<float> out_data = {
+        15.0f, 6.0f,
+        15.0f, 8.0f,
+        30.0f, 20.0f,
+        27.0f, 19.0f
+    };
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        input_layout("input2", input2.get_layout())
+    );
+    topology.add(
+        input_layout("input3", input3.get_layout())
+    );
+    topology.add(
+        gemm("output", "input", "input2", "input3", false, false, alpha, beta)
+
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("output").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    EXPECT_EQ(output_ptr.size(), (uint32_t)8);
+
+    for (uint32_t i = 0; i < out_data.size(); ++i) {
+        EXPECT_FLOAT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(gemm_gpu, basic_input3_t2) {
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 4 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 2 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 4 } });
+    float alpha = 2.f;
+    float beta = 3.f;
+
+
+    std::vector<float> input_data = {
+        1.0f, 1.0f, 0.0f,
+        2.0f, 0.0f, 0.0f,
+        3.0f, 1.0f, 0.0f,
+        4.0f, 0.0f, 0.0f
+    };
+
+
+    std::vector<float> input_data2 = {
+        3.0f, 3.0f, 1.0f,
+        2.0f, 1.0f, 2.0f,
+    };
+
+    std::vector<float> input3_data = {
+        1.0f, 0.0f,
+        1.0f, 0.0f,
+        2.0f, 2.0f,
+        1.0f, 1.0f,
+    };
+
+    set_values(input, input_data);
+    set_values(input2, input_data2);
+    set_values(input3, input3_data);
+
+    std::vector<float> out_data = {
+        15.0f, 6.0f,
+        15.0f, 8.0f,
+        30.0f, 20.0f,
+        27.0f, 19.0f,
+    };
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        input_layout("input2", input2.get_layout())
+    );
+    topology.add(
+        input_layout("input3", input3.get_layout())
+    );
+    topology.add(
+        gemm("output", "input", "input2", "input3", false, true, alpha, beta)
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("output").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    EXPECT_EQ(output_ptr.size(), (uint32_t)8);
+
+    for (uint32_t i = 0; i < out_data.size(); ++i) {
+        EXPECT_FLOAT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(gemm_gpu, basic_input3_t1) {
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 4, 3 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 3 } });
+    auto input3 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 4 } });
+    float alpha = 2.f;
+    float beta = 3.f;
+
+
+    std::vector<float> input_data = {
+        1.0f, 2.0f, 3.0f, 4.0f,
+        1.0f, 0.0f, 1.0f, 0.0f,
+        0.0f, 0.0f, 0.0f, 0.0f
+    };
+
+    std::vector<float> input_data2 = {
+        3.0f, 2.0f,
+        3.0f, 1.0f,
+        1.0f, 2.0f
+    };
+
+    std::vector<float> input3_data = {
+        1.0f, 0.0f,
+        1.0f, 0.0f,
+        2.0f, 2.0f,
+        1.0f, 1.0f,
+    };
+
+    set_values(input, input_data);
+    set_values(input2, input_data2);
+    set_values(input3, input3_data);
+
+    std::vector<float> out_data = {
+        15.0f, 6.0f,
+        15.0f, 8.0f,
+        30.0f, 20.0f,
+        27.0f, 19.0f,
+    };
+
+    topology topology;
+    topology.add(
+        input_layout("input", input.get_layout())
+    );
+    topology.add(
+        input_layout("input2", input2.get_layout())
+    );
+    topology.add(
+        input_layout("input3", input3.get_layout())
+    );
+    topology.add(
+        gemm("output", "input", "input2", "input3", true, false, alpha, beta)
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("input3", input3);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("output").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    EXPECT_EQ(output_ptr.size(), (uint32_t)8);
+
+    for (uint32_t i = 0; i < out_data.size(); ++i) {
+        EXPECT_FLOAT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
 TEST(gemm_gpu, basic_bfyx) {
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 4, 3 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 1, 4 } });
 
@@ -373,7 +586,7 @@ TEST(gemm_gpu, basic_bfyx) {
 }
 
 TEST(gemm_gpu, basic3_bfyx) {
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 5, 1, 500, 9 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 5, 1, 1, 500 } });
 
@@ -2979,7 +3192,7 @@ TEST(gemm_gpu, basic3_bfyx) {
 }
 
 TEST(gemm_gpu, basic_smarcink2) {
-    engine engine;
+    const auto& engine = get_test_engine();
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 2, 3 } });
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/index_select_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/index_select_gpu_test.cpp
index 7a0e38971..218cac08c 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/index_select_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/index_select_gpu_test.cpp
@@ -223,7 +223,7 @@ TEST(index_select_gpu, basic_along_b_3_executes_bfyx)
     indices: {1, 1, 4, 1}
     output: {4, 2, 3, 4}
     */
-    engine engine;
+    const auto& engine = get_test_engine();
     constexpr auto in_size_b = 5;
     constexpr auto in_size_f = 2;
     constexpr auto in_size_x = 3;
@@ -299,7 +299,7 @@ TEST(index_select_gpu, basic_along_f_3_executes_bfyx)
     indices: {1, 1, 10, 1}
     output: {2, 10, 3, 3}
     */
-    engine engine;
+    const auto& engine = get_test_engine();
     constexpr auto in_size_b = 2;
     constexpr auto in_size_f = 5;
     constexpr auto in_size_x = 3;
@@ -375,7 +375,7 @@ TEST(index_select_gpu, basic_along_x_3_executes_bfyx)
     indices: {1, 1, 3, 1}
     output: {3, 4, 3, 5}
     */
-    engine engine;
+    const auto& engine = get_test_engine();
     constexpr auto in_size_b = 3;
     constexpr auto in_size_f = 4;
     constexpr auto in_size_x = 6;
@@ -451,7 +451,7 @@ TEST(index_select_gpu, basic_along_y_3_executes_bfyx)
     indices: {1, 1, 5, 1}
     output: {2, 4, 4, 5}
     */
-    engine engine;
+    const auto& engine = get_test_engine();
     constexpr auto in_size_b = 2;
     constexpr auto in_size_f = 4;
     constexpr auto in_size_x = 4;
@@ -527,7 +527,7 @@ TEST(index_select_gpu, basic_along_b_3_executes_yxfb)
     indices: {1, 1, 4, 1}
     output: {4, 2, 3, 4}
     */
-    engine engine;
+    const auto& engine = get_test_engine();
     constexpr auto in_size_b = 5;
     constexpr auto in_size_f = 2;
     constexpr auto in_size_x = 3;
@@ -604,7 +604,7 @@ TEST(index_select_gpu, basic_along_f_3_executes_yxfb)
     indices: {1, 1, 10, 1}
     output: {2, 10, 3, 3}
     */
-    engine engine;
+    const auto& engine = get_test_engine();
     constexpr auto in_size_b = 2;
     constexpr auto in_size_f = 5;
     constexpr auto in_size_x = 3;
@@ -681,7 +681,7 @@ TEST(index_select_gpu, basic_along_x_3_executes_yxfb)
     indices: {1, 1, 3, 1}
     output: {3, 4, 3, 5}
     */
-    engine engine;
+    const auto& engine = get_test_engine();
     constexpr auto in_size_b = 3;
     constexpr auto in_size_f = 4;
     constexpr auto in_size_x = 6;
@@ -757,7 +757,7 @@ TEST(index_select_gpu, basic_along_y_3_executes_yxfb)
     indices: {1, 1, 5, 1}
     output: {2, 4, 4, 5}
     */
-    engine engine;
+    const auto& engine = get_test_engine();
     constexpr auto in_size_b = 2;
     constexpr auto in_size_f = 4;
     constexpr auto in_size_x = 4;
@@ -826,3 +826,862 @@ TEST(index_select_gpu, basic_along_y_3_executes_yxfb)
         }
     }
 }
+
+TEST(index_select_gpu, reverse_along_b_bfyx)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 4, 2 } });
+
+    std::vector<float> input_data = {
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+
+        8.f,  9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f, 15.f,
+
+
+
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f,
+
+        24.f, 25.f, 26.f, 27.f,
+        28.f, 29.f, 30.f, 31.f,
+    };
+
+    std::vector<float> out_data = {
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f,
+
+        24.f, 25.f, 26.f, 27.f,
+        28.f, 29.f, 30.f, 31.f,
+
+        
+
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+
+        8.f,  9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f, 15.f,
+    };
+
+    constexpr auto axis = index_select_axis_name::along_b;
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+    
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(index_select_gpu, reverse_along_f_bfyx)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 3, 4 } });
+
+    std::vector<float> input_data = {
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f
+    };
+
+    std::vector<float> out_data = {
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f,
+
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f
+    };
+
+    constexpr auto axis = index_select_axis_name::along_f;
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(index_select_gpu, reverse_along_y_bfyx)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 4, 3 } });
+
+    std::vector<float> input_data = {
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f
+    };
+
+    std::vector<float> out_data = {
+        8.f,  9.f, 10.f, 11.f,
+        4.f,  5.f,  6.f,  7.f,
+        0.f,  1.f,  2.f,  3.f,
+        
+        20.f, 21.f, 22.f, 23.f,
+        16.f, 17.f, 18.f, 19.f,
+        12.f, 13.f, 14.f, 15.f
+    };
+
+    constexpr auto axis = index_select_axis_name::along_y;
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(index_select_gpu, reverse_along_x_bfyx)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 4, 3 } });
+
+    std::vector<float> input_data = {
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f
+    };
+
+    std::vector<float> out_data = {
+        3.f,  2.f,  1.f,  0.f,
+        7.f,  6.f,  5.f,  4.f,
+        11.f,  10.f, 9.f, 8.f,
+
+        15.f, 14.f, 13.f, 12.f,
+        19.f, 18.f, 17.f, 16.f,
+        23.f, 22.f, 21.f, 20.f
+    };
+
+    constexpr auto axis = index_select_axis_name::along_x;
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+
+TEST(index_select_gpu, reverse_along_y_yxfb)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 4, 2, 2, 2 } });
+
+    std::vector<float> input_data = {
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+
+        8.f,  9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f, 15.f,
+
+
+
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f,
+
+        24.f, 25.f, 26.f, 27.f,
+        28.f, 29.f, 30.f, 31.f,
+    };
+
+    std::vector<float> out_data = {
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f,
+
+        24.f, 25.f, 26.f, 27.f,
+        28.f, 29.f, 30.f, 31.f,
+
+
+
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+
+        8.f,  9.f, 10.f, 11.f,
+        12.f, 13.f, 14.f, 15.f,
+    };
+
+    constexpr auto axis = index_select_axis_name::along_y;
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(index_select_gpu, reverse_along_x_yxfb)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 3, 4, 2, 1 } });
+
+    std::vector<float> input_data = {
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f
+    };
+
+    std::vector<float> out_data = {
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f,
+
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f
+    };
+
+    constexpr auto axis = index_select_axis_name::along_x;
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(index_select_gpu, reverse_along_f_yxfb)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 4, 3, 2, 1 } });
+
+    std::vector<float> input_data = {
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f
+    };
+
+    std::vector<float> out_data = {
+        8.f,  9.f, 10.f, 11.f,
+        4.f,  5.f,  6.f,  7.f,
+        0.f,  1.f,  2.f,  3.f,
+
+        20.f, 21.f, 22.f, 23.f,
+        16.f, 17.f, 18.f, 19.f,
+        12.f, 13.f, 14.f, 15.f
+    };
+
+    constexpr auto axis = index_select_axis_name::along_f;
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(index_select_gpu, reverse_along_b_yxfb)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 4, 3, 2, 1 } });
+
+    std::vector<float> input_data = {
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f
+    };
+
+    std::vector<float> out_data = {
+        3.f,  2.f,  1.f,  0.f,
+        7.f,  6.f,  5.f,  4.f,
+        11.f,  10.f, 9.f, 8.f,
+
+        15.f, 14.f, 13.f, 12.f,
+        19.f, 18.f, 17.f, 16.f,
+        23.f, 22.f, 21.f, 20.f
+    };
+
+    constexpr auto axis = index_select_axis_name::along_b;
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+
+TEST(index_select_gpu, reverse_along_yx_bfyx)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 4, 3 } });
+
+    std::vector<float> input_data = {
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f
+    };
+
+    std::vector<float> out_data = {
+        11.f,  10.f, 9.f, 8.f,
+        7.f,  6.f,  5.f,  4.f,
+        3.f,  2.f,  1.f,  0.f,
+
+        23.f, 22.f, 21.f, 20.f,
+        19.f, 18.f, 17.f, 16.f,
+        15.f, 14.f, 13.f, 12.f
+    };
+
+    std::vector<index_select_axis_name> axis = { index_select_axis_name::along_y, index_select_axis_name::along_x };
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(index_select_gpu, reverse_along_fyx_bfyx)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 2, 4, 3 } });
+
+    std::vector<float> input_data = {
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f
+    };
+
+    std::vector<float> out_data = {
+        23.f, 22.f, 21.f, 20.f,
+        19.f, 18.f, 17.f, 16.f,
+        15.f, 14.f, 13.f, 12.f,
+
+        11.f,  10.f, 9.f, 8.f,
+        7.f,  6.f,  5.f,  4.f,
+        3.f,  2.f,  1.f,  0.f
+    };
+
+    std::vector<index_select_axis_name> axis = { index_select_axis_name::along_f, index_select_axis_name::along_y, index_select_axis_name::along_x };
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(index_select_gpu, reverse_along_bfyx_bfyx)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 3, 3, 4, 3 } });
+
+    std::vector<float> input_data = {
+        // b0f0
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+        // f1
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f,
+        // f2
+        24.f, 25.f, 26.f, 27.f,
+        28.f, 29.f, 30.f, 31.f,
+        32.f, 33.f, 34.f, 35.f,
+
+        // b1f0
+        36.f, 37.f, 38.f, 39.f,
+        40.f, 41.f, 42.f, 43.f,
+        44.f, 45.f, 46.f, 47.f,
+        // f1
+        48.f, 49.f, 50.f, 51.f,
+        52.f, 53.f, 54.f, 55.f,
+        56.f, 57.f, 58.f, 59.f,
+        // f2
+        60.f, 61.f, 62.f, 63.f,
+        64.f, 65.f, 66.f, 67.f,
+        68.f, 69.f, 70.f, 71.f,
+
+        // b2f0
+        72.f, 73.f, 74.f, 75.f,
+        76.f, 77.f, 78.f, 79.f,
+        80.f, 81.f, 82.f, 83.f,
+        // f1
+        84.f, 85.f, 86.f, 87.f,
+        88.f, 89.f, 90.f, 91.f,
+        92.f, 93.f, 94.f, 95.f,
+        // f2
+        96.f, 97.f, 98.f, 99.f,
+        100.f, 101.f, 102.f, 103.f,
+        104.f, 105.f, 106.f, 107.f
+    };
+
+    std::vector<float> out_data = {
+        107.f, 106.f, 105.f, 104.f,
+        103.f, 102.f, 101.f, 100.f,
+        99.f, 98.f, 97.f, 96.f,
+
+        95.f, 94.f, 93.f, 92.f,
+        91.f, 90.f, 89.f, 88.f,
+        87.f, 86.f, 85.f, 84.f,
+
+        83.f, 82.f, 81.f, 80.f,
+        79.f, 78.f, 77.f, 76.f,
+        75.f, 74.f, 73.f, 72.f,
+
+
+        71.f, 70.f, 69.f, 68.f,
+        67.f, 66.f, 65.f, 64.f,
+        63.f, 62.f, 61.f, 60.f,
+
+        59.f, 58.f, 57.f, 56.f,
+        55.f, 54.f, 53.f, 52.f,
+        51.f, 50.f, 49.f, 48.f,
+
+        47.f, 46.f, 45.f, 44.f,
+        43.f, 42.f, 41.f, 40.f,
+        39.f, 38.f, 37.f, 36.f,
+
+        
+        35.f, 34.f, 33.f, 32.f,
+        31.f, 30.f, 29.f, 28.f,
+        27.f, 26.f, 25.f, 24.f,
+        
+        23.f, 22.f, 21.f, 20.f,
+        19.f, 18.f, 17.f, 16.f,
+        15.f, 14.f, 13.f, 12.f,
+
+        11.f,  10.f, 9.f, 8.f,
+        7.f,  6.f,  5.f,  4.f,
+        3.f,  2.f,  1.f,  0.f
+    };
+
+    std::vector<index_select_axis_name> axis = { index_select_axis_name::along_b, index_select_axis_name::along_f, index_select_axis_name::along_y, index_select_axis_name::along_x };
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(index_select_gpu, reverse_along_bfx_yxfb)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 4, 3, 3, 3 } });
+
+    std::vector<float> input_data = {
+        // y0x0
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+        // x1
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f,
+        // x2
+        24.f, 25.f, 26.f, 27.f,
+        28.f, 29.f, 30.f, 31.f,
+        32.f, 33.f, 34.f, 35.f,
+
+        // y1x0
+        36.f, 37.f, 38.f, 39.f,
+        40.f, 41.f, 42.f, 43.f,
+        44.f, 45.f, 46.f, 47.f,
+        // x1
+        48.f, 49.f, 50.f, 51.f,
+        52.f, 53.f, 54.f, 55.f,
+        56.f, 57.f, 58.f, 59.f,
+        // x2
+        60.f, 61.f, 62.f, 63.f,
+        64.f, 65.f, 66.f, 67.f,
+        68.f, 69.f, 70.f, 71.f,
+
+        // y2x0
+        72.f, 73.f, 74.f, 75.f,
+        76.f, 77.f, 78.f, 79.f,
+        80.f, 81.f, 82.f, 83.f,
+        // x1
+        84.f, 85.f, 86.f, 87.f,
+        88.f, 89.f, 90.f, 91.f,
+        92.f, 93.f, 94.f, 95.f,
+        // x2
+        96.f, 97.f, 98.f, 99.f,
+        100.f, 101.f, 102.f, 103.f,
+        104.f, 105.f, 106.f, 107.f
+    };
+
+    std::vector<float> out_data = {
+        35.f, 34.f, 33.f, 32.f,
+        31.f, 30.f, 29.f, 28.f,
+        27.f, 26.f, 25.f, 24.f,
+
+        23.f, 22.f, 21.f, 20.f,
+        19.f, 18.f, 17.f, 16.f,
+        15.f, 14.f, 13.f, 12.f,
+
+        11.f,  10.f, 9.f, 8.f,
+        7.f,  6.f,  5.f,  4.f,
+        3.f,  2.f,  1.f,  0.f,
+
+
+        71.f, 70.f, 69.f, 68.f,
+        67.f, 66.f, 65.f, 64.f,
+        63.f, 62.f, 61.f, 60.f,
+
+        59.f, 58.f, 57.f, 56.f,
+        55.f, 54.f, 53.f, 52.f,
+        51.f, 50.f, 49.f, 48.f,
+
+        47.f, 46.f, 45.f, 44.f,
+        43.f, 42.f, 41.f, 40.f,
+        39.f, 38.f, 37.f, 36.f,
+
+
+        107.f, 106.f, 105.f, 104.f,
+        103.f, 102.f, 101.f, 100.f,
+        99.f, 98.f, 97.f, 96.f,
+
+        95.f, 94.f, 93.f, 92.f,
+        91.f, 90.f, 89.f, 88.f,
+        87.f, 86.f, 85.f, 84.f,
+
+        83.f, 82.f, 81.f, 80.f,
+        79.f, 78.f, 77.f, 76.f,
+        75.f, 74.f, 73.f, 72.f
+    };
+
+    std::vector<index_select_axis_name> axis = { index_select_axis_name::along_f, index_select_axis_name::along_b, index_select_axis_name::along_x };
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+
+TEST(index_select_gpu, reverse_along_bfyx_yxfb)
+{
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 4, 3, 3, 3 } });
+
+    std::vector<float> input_data = {
+        // y0x0
+        0.f,  1.f,  2.f,  3.f,
+        4.f,  5.f,  6.f,  7.f,
+        8.f,  9.f, 10.f, 11.f,
+        // x1
+        12.f, 13.f, 14.f, 15.f,
+        16.f, 17.f, 18.f, 19.f,
+        20.f, 21.f, 22.f, 23.f,
+        // x2
+        24.f, 25.f, 26.f, 27.f,
+        28.f, 29.f, 30.f, 31.f,
+        32.f, 33.f, 34.f, 35.f,
+
+        // y1x0
+        36.f, 37.f, 38.f, 39.f,
+        40.f, 41.f, 42.f, 43.f,
+        44.f, 45.f, 46.f, 47.f,
+        // x1
+        48.f, 49.f, 50.f, 51.f,
+        52.f, 53.f, 54.f, 55.f,
+        56.f, 57.f, 58.f, 59.f,
+        // x2
+        60.f, 61.f, 62.f, 63.f,
+        64.f, 65.f, 66.f, 67.f,
+        68.f, 69.f, 70.f, 71.f,
+
+        // y2x0
+        72.f, 73.f, 74.f, 75.f,
+        76.f, 77.f, 78.f, 79.f,
+        80.f, 81.f, 82.f, 83.f,
+        // x1
+        84.f, 85.f, 86.f, 87.f,
+        88.f, 89.f, 90.f, 91.f,
+        92.f, 93.f, 94.f, 95.f,
+        // x2
+        96.f, 97.f, 98.f, 99.f,
+        100.f, 101.f, 102.f, 103.f,
+        104.f, 105.f, 106.f, 107.f
+    };
+
+    std::vector<float> out_data = {
+        107.f, 106.f, 105.f, 104.f,
+        103.f, 102.f, 101.f, 100.f,
+        99.f, 98.f, 97.f, 96.f,
+
+        95.f, 94.f, 93.f, 92.f,
+        91.f, 90.f, 89.f, 88.f,
+        87.f, 86.f, 85.f, 84.f,
+
+        83.f, 82.f, 81.f, 80.f,
+        79.f, 78.f, 77.f, 76.f,
+        75.f, 74.f, 73.f, 72.f,
+
+
+        71.f, 70.f, 69.f, 68.f,
+        67.f, 66.f, 65.f, 64.f,
+        63.f, 62.f, 61.f, 60.f,
+
+        59.f, 58.f, 57.f, 56.f,
+        55.f, 54.f, 53.f, 52.f,
+        51.f, 50.f, 49.f, 48.f,
+
+        47.f, 46.f, 45.f, 44.f,
+        43.f, 42.f, 41.f, 40.f,
+        39.f, 38.f, 37.f, 36.f,
+
+
+        35.f, 34.f, 33.f, 32.f,
+        31.f, 30.f, 29.f, 28.f,
+        27.f, 26.f, 25.f, 24.f,
+
+        23.f, 22.f, 21.f, 20.f,
+        19.f, 18.f, 17.f, 16.f,
+        15.f, 14.f, 13.f, 12.f,
+
+        11.f,  10.f, 9.f, 8.f,
+        7.f,  6.f,  5.f,  4.f,
+        3.f,  2.f,  1.f,  0.f
+    };
+
+    std::vector<index_select_axis_name> axis = { index_select_axis_name::along_b, index_select_axis_name::along_f, index_select_axis_name::along_y, index_select_axis_name::along_x };
+
+    topology topo;
+    topo.add(
+        input_layout("input", input.get_layout())
+    );
+    topo.add(
+        index_select("index_select", "input", axis)
+    );
+
+    network net(engine, topo);
+
+    set_values(input, input_data);
+    net.set_input_data("input", input);
+
+    auto outputs = net.execute();
+    auto output_mem = outputs.at("index_select").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    for (size_t i = 0; i < output_ptr.size(); i++)
+    {
+        EXPECT_EQ(output_ptr[i], out_data[i]);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/lookup_table_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/lookup_table_test.cpp
index 72dd2fc83..45f0408c1 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/lookup_table_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/lookup_table_test.cpp
@@ -32,7 +32,7 @@ using namespace tests;
 TEST(lookup_table_base, base) {
     //  Input  : 2x3x2x2
     static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2;
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { batch_num, feature_num, x_size , y_size } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx, {2, 1, 1, 1} });
@@ -83,7 +83,7 @@ TEST(lookup_table_base, base) {
 TEST(lookup_table_num, base) {
     //  Input  : 2x3x2x2
     static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2, number_of_values = 3;
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 3, 1 } });
@@ -160,7 +160,7 @@ TEST(lookup_table_num, base) {
 TEST(lookup_table_with_arg_max, base) {
     //  Input  : 2x3x2x2
     static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2;
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, x_size , y_size } });
     topology topology;
@@ -207,7 +207,7 @@ TEST(lookup_table_with_arg_max, base) {
 TEST(lookup_table_axis, base) {
     //  Input  : 2x3x2x2
     static const int32_t x_size = 2, y_size = 2, feature_num = 3, batch_num = 2, number_of_values = 2;
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 3, 2, 2 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp
index 13c6c93f8..ba109f037 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/lstm_gpu_test.cpp
@@ -21,6 +21,7 @@
 #include "api/CPP/lstm.hpp"
 #include <api/CPP/split.hpp>
 #include <api/CPP/crop.hpp>
+#include <api/CPP/reshape.hpp>
 #include <api/CPP/concatenation.hpp>
 #include <api/CPP/topology.hpp>
 #include <api/CPP/tensor.hpp>
@@ -29,10 +30,14 @@
 #include "test_utils/test_utils.h"
 #include <api/CPP/data.hpp>
 #include "instrumentation.h"
+#include <test_utils/float16.h>
 
 #include <sstream>
 #include <iomanip>
 
+#ifdef WIN32
+#pragma warning(disable: 4503)
+#endif
 
 using namespace cldnn;
 using namespace tests;
@@ -88,7 +93,7 @@ VVVVF<T> lstm_gemm_reference(VVVVF<T>& input, VVVVF<T>& weights, VVVVF<T>& recur
             }
             if (hasHidden) {
                 for (size_t x = 0; x < hidden_size; ++x) {
-                    res += (T)recurrent[0][dir][y][x] * (T)hidden[b][dir][0][x];
+                    res += (T)recurrent[0][dir][y][x] * (T)hidden[b][0][dir][x];
                 }
             }
             if (hasBias) {
@@ -102,7 +107,9 @@ VVVVF<T> lstm_gemm_reference(VVVVF<T>& input, VVVVF<T>& weights, VVVVF<T>& recur
 
 template <typename T>
 VVVVF<T> lstm_elt_reference(VVVVF<T>& tempGEMM, VVVVF<T>& cell,
-                     bool hasCell = true, float clip_threshold = 0, bool input_forget = false, size_t dir = 0) {
+                            bool hasCell = true, float clip_threshold = 0,
+                            bool input_forget = false, size_t dir = 0)
+{
     size_t hidden_size = tempGEMM[0][0][0].size() / 4;
     size_t batch_size = tempGEMM.size();
     VVVVF<T> tempOut(batch_size, VVVF<T>(2, VVF<T>(1, VF<T>(hidden_size))));
@@ -113,16 +120,28 @@ VVVVF<T> lstm_elt_reference(VVVVF<T>& tempGEMM, VVVVF<T>& cell,
         T *ot = &tempGEMM[b][0][0][off.ot];
         T *ft = &tempGEMM[b][0][0][off.ft];
         T *zt = &tempGEMM[b][0][0][off.zt];
+
         for (size_t h = 0; h < hidden_size; ++h) {
-            T val = sigmoid(clip(it[h], clip_threshold)) * std::tanh((float)clip(zt[h], clip_threshold));
+
+            // Convert all inputs to float for all the elementwise operations. This is done to immitate
+            // how lstm kernel is performing the elementwise operations.
+            float fp32_it = (float)it[h];
+            float fp32_ot = (float)ot[h];
+            float fp32_ft = (float)ft[h];
+            float fp32_zt = (float)zt[h];
+            float val = sigmoid(clip(fp32_it, clip_threshold)) * std::tanh(clip(fp32_zt, clip_threshold));
+
             if (input_forget) {
-                val *= (1 - ft[h]);
+                val *= (1 - fp32_ft);
             }
             if (hasCell) {
-                val += cell[b][dir][0][h] * sigmoid(clip(ft[h], clip_threshold));
+                val += (float)cell[b][0][dir][h] * sigmoid(clip(fp32_ft, clip_threshold));
             }
-            tempOut[b][0][0][h] = std::tanh((float)val) * sigmoid(ot[h]);
-            tempOut[b][1][0][h] = val;
+
+            // Convert back to output data type before storing it into the output buffer. Currently, the output
+            // data type may be float or FLOAT16 (half)
+            tempOut[b][0][0][h] = (T)(std::tanh(val) * sigmoid(fp32_ot));
+            tempOut[b][1][0][h] = (T)val;
         }
     }
     return tempOut;
@@ -154,10 +173,14 @@ void print(const std::string& s, VVVVF<T>& input) {
 // tempGEMM  = [    batch,         1,               1, 4 * hidden_size ] temporary output
 // output    = [    batch,  sequence,       direction,     hidden_size ] output
 template <typename T>
-void lstm_reference(VVVVF<T>& input, VVVVF<T>& hidden, VVVVF<T>& cell, VVVVF<T>& weights, VVVVF<T>& recurrent, VVVVF<T>& bias,
-    VVVVF<T>& output, VVVVF<T>& last_hidden, VVVVF<T>& last_cell,
-    bool hasBias = true, bool hasInitialHidden = true, bool hasInitialCell = true,
-    float clip_threshold = 0, bool input_forget = false, bool scramble_input = true) {
+void lstm_reference(VVVVF<T>& input, VVVVF<T>& hidden, VVVVF<T>& cell,
+                    VVVVF<T>& weights, VVVVF<T>& recurrent, VVVVF<T>& bias,
+                    VVVVF<T>& output, VVVVF<T>& last_hidden,
+                    VVVVF<T>& last_cell, bool hasBias = true,
+                    bool hasInitialHidden = true, bool hasInitialCell = true,
+                    float clip_threshold = 0, bool input_forget = false,
+                    bool scramble_input = true)
+{
     size_t sequence_len = input[0].size();
     size_t dir_len = weights[0].size();
     size_t batch = input.size();
@@ -179,8 +202,8 @@ void lstm_reference(VVVVF<T>& input, VVVVF<T>& hidden, VVVVF<T>& cell, VVVVF<T>&
             // tempOutput[batch][0] = hidden and tempOutput[batch][1] = cell
             for (size_t i = 0; i < batch; i++) {
                 output[i][seq][dir] = tempOutput[i][0][0];
-                hidden[i][dir] = tempOutput[i][0];
-                cell[i][dir] = tempOutput[i][1];
+                hidden[i][0][dir] = tempOutput[i][0][0];
+                cell[i][0][dir] = tempOutput[i][1][0];
             }
             tempHasInitialHidden = true;
             tempHasInitialCell = true;
@@ -210,12 +233,23 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size,
 
     VVVVF<T> ref_output = lstm_gemm_reference(ref_input, ref_weights, ref_recurrent, ref_bias, ref_hidden, 0, hasBias, hasHidden);
 
-    engine engine;
-    memory input = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ batch_size,   sequence_len,  input_size,      1 } });
-    memory weights = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1,            direction,     input_size,      4 * hidden_size } });
-    memory recurrent = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1,            direction,     hidden_size,     4 * hidden_size } });
-    memory biases = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1,            1,             4 * hidden_size, direction } });
-    memory hidden = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ batch_size,   direction,     hidden_size,     1 } });
+    constexpr auto dt = std::is_same<T, float>::value ? data_types::f32 : data_types::f16;
+    const auto& engine = get_test_engine();
+
+    // If the input is of fp16 type then, the memory will be allocated as such
+    if (!engine.get_info().supports_fp16)
+    {
+        if (dt == data_types::f16)
+        {
+            return;
+        }
+    }
+
+    memory input = memory::allocate(engine, { dt, format::bfyx,     { batch_size,   sequence_len,  input_size,      1 } });
+    memory weights = memory::allocate(engine, { dt, format::bfyx,   { 1,            direction,     input_size,      4 * hidden_size } });
+    memory recurrent = memory::allocate(engine, { dt, format::bfyx, { 1,            direction,     hidden_size,     4 * hidden_size } });
+    memory biases = memory::allocate(engine, { dt, format::bfyx,    { 1,            1,             4 * hidden_size, direction } });
+    memory hidden = memory::allocate(engine, { dt, format::bfyx,    { batch_size,   direction,     hidden_size,     1 } });
 
     set_values(input, ref_input_vec);
     set_values(weights, ref_weights_vec);
@@ -250,13 +284,13 @@ void generic_lstm_gemm_gpu_test(int sequence_len, int direction, int batch_size,
     int i = 0;
     for (int b = 0; b < batch_size; ++b) {
         for (int x = 0; x < 4 * hidden_size; ++x)
-            EXPECT_EQ(ref_output[b][0][0][x], output_ptr[i++]);
+            EXPECT_FLOAT_EQ(ref_output[b][0][0][x], output_ptr[i++]);
     }
 }
 
 template<typename T>
 void generic_lstm_elt_gpu_test(int sequence_len, int direction, int batch_size, int input_size, int hidden_size, bool hasCell = true,
-    float clip_threshold = 0.f, bool input_forget = false) {
+    T clip_threshold = (T)0.f, bool input_forget = false) {
     // tempGEMM  = [        1, direction,           batch, 4 * hidden_size ] input
     // cell      = [        1, direction,           batch,     hidden_size ] optional
     // output    = [        2, direction,           batch,     hidden_size ] output concat[hidden, cell]
@@ -269,9 +303,25 @@ void generic_lstm_elt_gpu_test(int sequence_len, int direction, int batch_size,
 
     VVVVF<T> ref_output = lstm_elt_reference(ref_tempGEMM, ref_cell, hasCell, clip_threshold, input_forget);
 
-    engine engine;
-    memory tempGEMM = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ batch_size,    direction, 4 * hidden_size, 1 } });
-    memory cell = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ batch_size,    direction,     hidden_size, 1 } });
+    // We observe some mismatch in down-converting from fp32 to fp16
+    // between the reference implementation and opencl kernel. This can be
+    // a simple rounding error. Thus, for fp16 we are increasing our tolerance
+    // to error from 1E-4 to 1E-2
+    constexpr float ferror = std::is_same<T, float>::value ? (float)1E-4 : (float)1E-2;
+    constexpr auto dt = std::is_same<T, float>::value ? data_types::f32 : data_types::f16;
+    const auto& engine = get_test_engine();
+
+    // If the input is of fp16 type then, the memory will be allocated as such
+    if (!engine.get_info().supports_fp16)
+    {
+        if (dt == data_types::f16)
+        {
+            return;
+        }
+    }
+
+    memory tempGEMM = memory::allocate(engine, { dt, format::bfyx,{ batch_size,    direction, 4 * hidden_size, 1 } });
+    memory cell = memory::allocate(engine, { dt, format::bfyx,{ batch_size,    direction,     hidden_size, 1 } });
     set_values(tempGEMM, ref_tempGEMM_vec);
     set_values(cell, ref_cell_vec);
 
@@ -298,7 +348,7 @@ void generic_lstm_elt_gpu_test(int sequence_len, int direction, int batch_size,
             for (int x = 0; x < hidden_size; ++x)
             {
                 auto idx = b * 2 * hidden_size + j * hidden_size + x;
-                EXPECT_NEAR(ref_output[b][j][0][x], output_ptr[idx], FERROR);
+                ASSERT_NEAR(ref_output[b][j][0][x], output_ptr[idx] , ferror);
             }
         }
     }
@@ -388,7 +438,7 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz
     lstm_reference(ref_input, ref_hidden, ref_cell, ref_weights, ref_recurrent, ref_bias, ref_output, last_hidden, last_cell,
         hasBias, hasInitialHidden, hasInitialCell);
 
-    engine engine;
+    const auto& engine = get_test_engine();
     memory input = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ batch_size, sequence_len,  input_size,       1 } });
     memory weights = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1,          direction,     input_size,       4 * hidden_size } });
     memory recurrent = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx,{ 1,          direction,     hidden_size,      4 * hidden_size } });
@@ -434,7 +484,7 @@ void generic_lstm_custom_gpu_test(int sequence_len, int direction, int batch_siz
 template<typename T>
 void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batch_size, int input_size, int hidden_size,
                             bool hasBias = true, bool hasInitialHidden = true, bool hasInitialCell = true,
-                            float clip_threshold = 0, bool input_forget = false) {
+                            T clip_threshold = 0, bool input_forget = false) {
     std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
             << " Sequence Len = " << sequence_len << " Direction = " << direction << " Batch Size = " << batch_size << std::endl;
     int min_random = -2, max_random = 2;
@@ -452,8 +502,8 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
         ref_weights.push_back(generate_random_4d<T>(1, direction, 4 * hidden_size, i==0 ? input_size : hidden_size, min_random, max_random));
         ref_recurrent.push_back(generate_random_4d<T>(1, direction, 4 * hidden_size, hidden_size, min_random, max_random));
         ref_bias.push_back(generate_random_4d<T>(1, 1, direction, 4 * hidden_size, min_random, max_random));
-        ref_hidden.push_back(generate_random_4d<T>(batch_size, direction, 1, hidden_size, min_random, max_random));
-        ref_cell.push_back(generate_random_4d<T>(batch_size, direction, 1, hidden_size, min_random, max_random));
+        ref_hidden.push_back(generate_random_4d<T>(batch_size, 1, direction, hidden_size, min_random, max_random));
+        ref_cell.push_back(generate_random_4d<T>(batch_size, 1, direction, hidden_size, min_random, max_random));
         ref_output.push_back(VVVVF<T>(batch_size, VVVF<T>(sequence_len, VVF<T>(direction, VF<T>(hidden_size)))));
     }
 
@@ -471,8 +521,8 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
         ref_cell_vec.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_cell[i]));
     }
 
-    VVVVF<T> last_hidden(batch_size, VVVF<T>(direction, VVF<T>(1, VF<T>(hidden_size))));
-    VVVVF<T> last_cell(batch_size, VVVF<T>(direction, VVF<T>(1, VF<T>(hidden_size))));
+    VVVVF<T> last_hidden(batch_size, VVVF<T>(1, VVF<T>(direction, VF<T>(hidden_size))));
+    VVVVF<T> last_cell(batch_size, VVVF<T>(1, VVF<T>(direction, VF<T>(hidden_size))));
 
     lstm_reference(ref_input, ref_hidden[0], ref_cell[0], ref_weights[0], ref_recurrent[0], ref_bias[0], ref_output[0],
                    last_hidden, last_cell, hasBias, hasInitialHidden, hasInitialCell,
@@ -485,9 +535,24 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
                         clip_threshold, input_forget, false);
     }
 
-    engine engine;
+    // We observe some mismatch in down-converting from fp32 to fp16
+    // between the reference implementation and opencl kernel. This can be
+    // a simple rounding error. Thus, for fp16 we are increasing our tolerance
+    // to error from 1E-4 to 1E-2
+    constexpr float ferror = std::is_same<T, float>::value ? (float)1E-4 : (float)1E-2;
+    constexpr auto dt = std::is_same<T, float>::value ? data_types::f32 : data_types::f16;
+    const auto& engine = get_test_engine();
 
-    memory input = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, {batch_size, sequence_len, input_size, 1} });
+    // If the input is of fp16 type then, the memory will be allocated as such
+    if (!engine.get_info().supports_fp16)
+    {
+        if (dt == data_types::f16)
+        {
+            return;
+        }
+    }
+
+    memory input = memory::allocate(engine, { dt, format::bfyx, {batch_size, sequence_len, input_size, 1} });
     set_values(input, ref_input_vec);
 
     std::vector<memory> weights;
@@ -496,20 +561,20 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
     std::vector<memory> hidden;
     std::vector<memory> cell;
     for(int i = 0; i < layers; ++i) {
-        weights.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, direction, i==0 ? input_size : hidden_size, 4 * hidden_size } }));
+        weights.push_back(memory::allocate(engine, { dt, format::bfyx, { 1, direction, i==0 ? input_size : hidden_size, 4 * hidden_size } }));
         set_values(weights[i], ref_weights_vec[i]);
-        recurrent.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, direction, hidden_size, 4 * hidden_size } }));
+        recurrent.push_back(memory::allocate(engine, { dt, format::bfyx, { 1, direction, hidden_size, 4 * hidden_size } }));
         set_values(recurrent[i], ref_recurrent_vec[i]);
         if (hasBias) {
-            biases.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, 1, 4 * hidden_size, direction } }));
+            biases.push_back(memory::allocate(engine, { dt, format::bfyx, { 1, 1, 4 * hidden_size, direction } }));
             set_values(biases[i], ref_bias_vec[i]);
         }
         if (hasInitialHidden) {
-            hidden.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { batch_size,  direction, hidden_size, 1 } }));
+            hidden.push_back(memory::allocate(engine, { dt, format::bfyx, { batch_size, 1, hidden_size, direction } }));
             set_values(hidden[i], ref_hidden_vec[i]);
         }
         if (hasInitialCell) {
-            cell.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { batch_size, direction, hidden_size, 1 } }));
+            cell.push_back(memory::allocate(engine, { dt, format::bfyx, { batch_size, 1, hidden_size, direction} }));
             set_values(cell[i], ref_cell_vec[i]);
         }
     }
@@ -543,12 +608,14 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
         if (i == 0) {
             topology.add(lstm(lstm_id, lstm_inputs, weights_id, recurrent_id,
                             hasBias ? biases_id : "", hasInitialHidden ? hidden_id : "", hasInitialCell ? cell_id : "", "",
-                            clip_threshold, input_forget, {}, {}, default_offset_type));
+                            clip_threshold, input_forget, {}, {},
+                            cldnn_lstm_output::cldnn_lstm_output_sequence, default_offset_type));
         }
         else {
             topology.add(lstm(lstm_id, { prev_lstm_id }, weights_id, recurrent_id,
                             hasBias ? biases_id : "", hasInitialHidden ? hidden_id : "", hasInitialCell ? cell_id : "", "",
-                            clip_threshold, input_forget, {}, {}, default_offset_type));
+                            clip_threshold, input_forget, {}, {},
+                            cldnn_lstm_output::cldnn_lstm_output_sequence, default_offset_type));
         }
         prev_lstm_id = lstm_id;
     }
@@ -567,17 +634,17 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
         ASSERT_EQ(output_size, size_t(hidden_size * sequence_len * batch_size * direction));
 
         auto output = outputs.begin()->second.get_memory();
-        
+
         // Get the output tensor
         cldnn::layout output_layout = output.get_layout();
-        cldnn::tensor output_tensor = output_layout.size; 
-        
+        cldnn::tensor output_tensor = output_layout.size;
+
         // Compare the output tensor configuration against the reference value
         // Output tensor is configured in bfyx format
         ASSERT_EQ(batch_size, output_tensor.batch[0]);
         ASSERT_EQ(sequence_len, output_tensor.feature[0]);
         ASSERT_EQ(direction, output_tensor.spatial[1]);
-        ASSERT_EQ(hidden_size, output_tensor.spatial[0]); 
+        ASSERT_EQ(hidden_size, output_tensor.spatial[0]);
 
         auto output_ptr = output.pointer<T>();
         int32_t i = 0;
@@ -585,7 +652,998 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
             for (int32_t s = 0; s < sequence_len; ++s) {
                 for (int32_t d = 0; d < direction; ++d) {
                     for (int32_t x = 0; x <  hidden_size; ++x) {
-                        ASSERT_NEAR(ref_output[layers-1][b][s][d][x], output_ptr[i++], FERROR);
+                        ASSERT_NEAR(ref_output[layers - 1][b][s][d][x], output_ptr[i++], ferror);
+                    }
+                }
+            }
+        }
+    }
+}
+
+// -------------------------------------------------------
+template<typename T>
+void lstm_gpu_output_test(const cldnn_lstm_output& output_selection, int directions) {
+    int layers = 1;
+    int sequence_len = 4;
+    int batch_size = 3;
+    int input_size = 3;
+    int hidden_size = 4;
+
+    std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
+            << " Sequence Len = " << sequence_len << " Directions = " << directions << " Batch Size = " << batch_size
+			<< " Output selection: " << output_selection << std::endl;
+    int min_random = -2, max_random = 2;
+
+    VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
+    VVVVF<T> ref_weights = generate_random_4d<T>(1, directions, 4 * hidden_size, input_size, min_random, max_random);
+    VVVVF<T> ref_recurrent = generate_random_4d<T>(1, directions, 4 * hidden_size, hidden_size, min_random, max_random);
+    VVVVF<T> ref_bias = generate_random_4d<T>(1, 1, directions, 4 * hidden_size, min_random, max_random);
+    VVVVF<T> ref_hidden = generate_random_4d<T>(batch_size, 1, directions, hidden_size, min_random, max_random);
+    VVVVF<T> ref_cell = generate_random_4d<T>(batch_size, 1, directions, hidden_size, min_random, max_random);
+    VVVVF<T> ref_output = VVVVF<T>(batch_size, VVVF<T>(sequence_len, VVF<T>(directions, VF<T>(hidden_size))));
+
+    VF<T> ref_input_vec = flatten_4d<T>(cldnn::format::bfyx, ref_input);
+    VF<T> ref_weights_vec = flatten_4d<T>(cldnn::format::bfyx, ref_weights);
+    VF<T> ref_recurrent_vec = flatten_4d<T>(cldnn::format::bfyx, ref_recurrent);
+    VF<T> ref_bias_vec = flatten_4d<T>(cldnn::format::bfyx, ref_bias);
+    VF<T> ref_hidden_vec = flatten_4d<T>(cldnn::format::bfyx, ref_hidden);
+    VF<T> ref_cell_vec = flatten_4d<T>(cldnn::format::bfyx, ref_cell);
+
+    VVVVF<T> last_hidden(batch_size, VVVF<T>(1, VVF<T>(directions, VF<T>(hidden_size))));
+    VVVVF<T> last_cell(batch_size, VVVF<T>(1, VVF<T>(directions, VF<T>(hidden_size))));
+
+    lstm_reference(ref_input, ref_hidden, ref_cell, ref_weights, ref_recurrent, ref_bias, ref_output,
+                   last_hidden, last_cell, true, true, true,
+                   (T)0, false, true);
+
+    const auto& engine = get_test_engine();
+
+    memory input = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, {batch_size, sequence_len, input_size, 1} });
+    memory weights = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, directions, input_size , 4 * hidden_size } });
+    memory recurrent = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, directions, hidden_size, 4 * hidden_size } });
+    memory biases = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, 1, 4 * hidden_size, directions } });
+    memory hidden = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { batch_size, 1, hidden_size, directions } });
+    memory cell = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { batch_size, 1, hidden_size, directions } });
+
+    set_values(input, ref_input_vec);
+    set_values(weights, ref_weights_vec);
+    set_values(recurrent, ref_recurrent_vec);
+    set_values(biases, ref_bias_vec);
+    set_values(hidden, ref_hidden_vec);
+    set_values(cell, ref_cell_vec);
+
+    bool emit_last_cell = output_selection == cldnn_lstm_output_hidden_cell ||
+                          output_selection == cldnn_lstm_output_sequence_cell;
+    bool emit_last_hidden = output_selection == cldnn_lstm_output_hidden ||
+                            output_selection == cldnn_lstm_output_hidden_cell;
+
+    topology topology;
+    std::vector<std::pair<primitive_id, tensor>> input_ids_offsets;
+    std::vector<primitive_id> lstm_inputs;
+    std::vector<primitive_id> output_ids_offsets;
+
+    topology.add(input_layout("input", input.get_layout()));
+    for (int i = 0; i < sequence_len; ++i)
+    {
+        input_ids_offsets.push_back({get_string_id(i), {0, i, 0, 0}});
+        lstm_inputs.push_back("inputSplit:"+get_string_id(i));
+    }
+    topology.add(split("inputSplit", "input", input_ids_offsets));
+    topology.add(data("weights", weights));
+    topology.add(data("recurrent", recurrent));
+    topology.add(data("biases", biases));
+    topology.add(input_layout("hidden", hidden.get_layout()));
+    topology.add(input_layout("cell", cell.get_layout()));
+    topology.add(lstm("lstm", lstm_inputs, "weights", "recurrent",
+                      "biases", "hidden", "cell", "", 0, false, {}, {},
+                      output_selection, default_offset_type));
+    if (emit_last_cell)
+    {
+        int32_t concatenation_len = emit_last_hidden ? 2 : sequence_len + 1;
+        tensor hidden_tensor {batch_size, concatenation_len - 1, hidden_size, directions};
+        tensor cell_tensor {batch_size, 1, hidden_size, directions};
+        topology.add(crop(emit_last_hidden ? "crop:last_hidden" : "crop:sequence", "lstm", hidden_tensor, tensor {0, 0, 0, 0}));
+        topology.add(crop("crop:last_cell", "lstm", cell_tensor, tensor {0, concatenation_len - 1, 0, 0}));
+    }
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    network.set_input_data("hidden", hidden);
+    network.set_input_data("cell", cell);
+
+    auto outputs = network.execute();
+	uint32_t ref_num_output_primitives = 1;  // Output will return atleast 1 primitive
+
+	if (emit_last_cell) {
+		// add another primitve to account for cell state if the output selection includes cell state
+		ref_num_output_primitives += 1;
+	}
+
+	// check if the number of returned primitives match the expected number of output primitives
+	ASSERT_EQ(ref_num_output_primitives, outputs.size());
+
+	for (auto itr = outputs.begin(); itr != outputs.end(); itr++)
+	{
+        auto output_tensor = itr->second.get_memory().get_layout().size;
+        primitive_id primitive_name = itr->first;
+
+		cldnn::memory output_memory = itr->second.get_memory();
+        int32_t output_size = (int32_t)(itr->second.get_memory().size() / sizeof(T));
+		cldnn::tensor ref_output_tensor;
+		VVVVF<T> ref_primitive_output;
+
+		int32_t ref_batch_size = batch_size;
+		int32_t ref_hidden_size = hidden_size;
+		int32_t ref_directions = directions;
+
+        int32_t ref_seq_len = 1;
+        // Set the reference output against which the primitive's output will be compared
+		if (primitive_name.find("crop:last_cell") != std::string::npos)
+		{
+			ref_primitive_output = last_cell;
+		}
+		else if (emit_last_hidden || primitive_name.find("crop:last_hidden") != std::string::npos)
+		{
+			ref_primitive_output = last_hidden;
+		}
+		else
+		{
+			ref_seq_len = sequence_len;
+			ref_primitive_output = ref_output;
+		}
+
+		ref_output_tensor = { ref_batch_size, ref_seq_len, ref_hidden_size, ref_directions };
+		int32_t ref_output_size = ref_batch_size * ref_seq_len * ref_hidden_size * ref_directions;
+
+		// The number of elements in reference should match the number of elements in the primitive's output
+		ASSERT_EQ(ref_output_size , output_size);
+
+        // Compare the output tensor configuration against the reference value
+        // Output tensor is configured in bfyx format
+        ASSERT_EQ(ref_batch_size, output_tensor.batch[0]);
+        ASSERT_EQ(ref_seq_len, output_tensor.feature[0]);		// Sequence length should match
+		ASSERT_EQ(ref_directions, output_tensor.spatial[1]);	// directions should match
+        ASSERT_EQ(ref_hidden_size, output_tensor.spatial[0]);	// input size should match
+
+        auto output_ptr = output_memory.pointer<T>();
+
+		int32_t i = 0;
+		for (int32_t b = 0; b < ref_batch_size; ++b) {
+			for (int32_t s = 0; s < ref_seq_len; ++s) {
+				for (int32_t d = 0; d < ref_directions; ++d) {
+					for (int32_t x = 0; x < ref_hidden_size; ++x) {
+                        ASSERT_NEAR(ref_primitive_output[b][s][d][x], output_ptr[i++], FERROR);
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+// -------------------------------------------------------
+template<typename T>
+void lstm_gpu_format_test(const cldnn::format& format, int directions) {
+    int layers = 1;
+    int sequence_len = 6;
+    int batch_size = 3;
+    int input_size = 4;
+    int hidden_size = 5;
+
+    cldnn_lstm_output output_selection = cldnn_lstm_output::cldnn_lstm_output_sequence;
+
+    std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
+            << " Sequence Len = " << sequence_len << " Directions = " << directions << " Batch Size = " << batch_size
+            << " Output selection: " << output_selection << std::endl;
+    int min_random = -2, max_random = 2;
+
+    VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
+    VVVVF<T> ref_weights = generate_random_4d<T>(1, directions, 4 * hidden_size, input_size, min_random, max_random);
+    VVVVF<T> ref_recurrent = generate_random_4d<T>(1, directions, 4 * hidden_size, hidden_size, min_random, max_random);
+    VVVVF<T> ref_bias = generate_random_4d<T>(1, 1, directions, 4 * hidden_size, min_random, max_random);
+    VVVVF<T> ref_hidden = generate_random_4d<T>(batch_size, 1, directions, hidden_size, min_random, max_random);
+    VVVVF<T> ref_cell = generate_random_4d<T>(batch_size, 1, directions, hidden_size, min_random, max_random);
+    VVVVF<T> ref_output = VVVVF<T>(batch_size, VVVF<T>(sequence_len, VVF<T>(directions, VF<T>(hidden_size))));
+
+    VF<T> ref_input_vec = flatten_4d<T>(format, ref_input);
+    VF<T> ref_weights_vec = flatten_4d<T>(cldnn::format::bfyx, ref_weights);
+    VF<T> ref_recurrent_vec = flatten_4d<T>(cldnn::format::bfyx, ref_recurrent);
+    VF<T> ref_bias_vec = flatten_4d<T>(cldnn::format::bfyx, ref_bias);
+    VF<T> ref_hidden_vec = flatten_4d<T>(format, ref_hidden);
+    VF<T> ref_cell_vec = flatten_4d<T>(format, ref_cell);
+
+    VVVVF<T> last_hidden(batch_size, VVVF<T>(1, VVF<T>(directions, VF<T>(hidden_size))));
+    VVVVF<T> last_cell(batch_size, VVVF<T>(1, VVF<T>(directions, VF<T>(hidden_size))));
+
+    lstm_reference(ref_input, ref_hidden, ref_cell, ref_weights, ref_recurrent, ref_bias, ref_output,
+                   last_hidden, last_cell, true, true, true,
+                   (T)0, false, true);
+
+    const auto& engine = get_test_engine();
+
+    memory input = memory::allocate(engine, { type_to_data_type<T>::value,format, {batch_size, sequence_len, input_size, 1} });
+    memory weights = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, directions, input_size , 4 * hidden_size } });
+    memory recurrent = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, directions, hidden_size, 4 * hidden_size } });
+    memory biases = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, 1, 4 * hidden_size, directions } });
+    memory hidden = memory::allocate(engine, { type_to_data_type<T>::value, format, { batch_size, 1, hidden_size, directions } });
+    memory cell = memory::allocate(engine, { type_to_data_type<T>::value, format, { batch_size, 1, hidden_size, directions } });
+
+    set_values(input, ref_input_vec);
+    set_values(weights, ref_weights_vec);
+    set_values(recurrent, ref_recurrent_vec);
+    set_values(biases, ref_bias_vec);
+    set_values(hidden, ref_hidden_vec);
+    set_values(cell, ref_cell_vec);
+
+    bool emit_last_cell = output_selection == cldnn_lstm_output_hidden_cell ||
+                          output_selection == cldnn_lstm_output_sequence_cell;
+    bool emit_last_hidden = output_selection == cldnn_lstm_output_hidden ||
+                            output_selection == cldnn_lstm_output_hidden_cell;
+
+    topology topology;
+    std::vector<std::pair<primitive_id, tensor>> input_ids_offsets;
+    std::vector<primitive_id> lstm_inputs;
+    std::vector<primitive_id> output_ids_offsets;
+
+    topology.add(input_layout("input", input.get_layout()));
+    for (int i = 0; i < sequence_len; ++i)
+    {
+        input_ids_offsets.push_back({get_string_id(i), {0, i, 0, 0}});
+        lstm_inputs.push_back("inputSplit:"+get_string_id(i));
+    }
+    topology.add(split("inputSplit", "input", input_ids_offsets));
+    topology.add(data("weights", weights));
+    topology.add(data("recurrent", recurrent));
+    topology.add(data("biases", biases));
+    topology.add(input_layout("hidden", hidden.get_layout()));
+    topology.add(input_layout("cell", cell.get_layout()));
+    topology.add(lstm("lstm"+get_string_id(0), lstm_inputs, "weights", "recurrent",
+                      "biases", "hidden", "cell", "", 0, false, {}, {},
+                      output_selection, default_offset_type));
+
+    if (emit_last_cell)
+    {
+        int32_t concatenation_len = emit_last_hidden ? 2 : sequence_len + 1;
+        tensor hidden_tensor {batch_size, concatenation_len - 1, hidden_size, directions};
+        tensor cell_tensor {batch_size, 1, hidden_size, directions};
+        topology.add(crop(emit_last_hidden ? "crop:last_hidden" : "crop:sequence", "lstm", hidden_tensor, tensor {0, 0, 0, 0}));
+        topology.add(crop("crop:last_cell", "lstm", cell_tensor, tensor {0, concatenation_len - 1, 0, 0}));
+    }
+
+    network network(engine, topology);
+    std::map<primitive_id, network_output> outputs;
+
+    network.set_input_data("input", input);
+    network.set_input_data("hidden", hidden);
+    network.set_input_data("cell", cell);
+    outputs = network.execute();
+
+    uint32_t ref_num_output_primitives = 1;  // Output will return atleast 1 primitive
+
+    if (emit_last_cell) {
+        // add another primitve to account for cell state if the output selection includes cell state
+        ref_num_output_primitives += 1;
+    }
+
+    // check if the number of returned primitives match the expected number of output primitives
+    ASSERT_EQ(ref_num_output_primitives, outputs.size());
+
+    for (auto itr = outputs.begin(); itr != outputs.end(); itr++)
+    {
+        auto output_tensor = itr->second.get_memory().get_layout().size;
+        primitive_id primitive_name = itr->first;
+
+        cldnn::memory output_memory = itr->second.get_memory();
+        int32_t output_size = (int32_t)(itr->second.get_memory().size() / sizeof(T));
+        cldnn::tensor ref_output_tensor;
+        VVVVF<T> ref_primitive_output;
+
+        int32_t ref_batch_size = batch_size;
+        int32_t ref_hidden_size = hidden_size;
+        int32_t ref_directions = directions;
+
+        int32_t ref_seq_len = 1;
+        // Set the reference output against which the primitive's output will be compared
+        if (primitive_name.find("crop:last_cell") != std::string::npos)
+        {
+            ref_primitive_output = last_cell;
+        }
+        else if (emit_last_hidden || primitive_name.find("crop:last_hidden") != std::string::npos)
+        {
+            ref_primitive_output = last_hidden;
+        }
+        else
+        {
+            ref_seq_len = sequence_len;
+            ref_primitive_output = ref_output;
+        }
+
+        ref_output_tensor = { ref_batch_size, ref_seq_len, ref_hidden_size, ref_directions };
+        int32_t ref_output_size = ref_batch_size * ref_seq_len * ref_hidden_size * ref_directions;
+
+        // The number of elements in reference should match the number of elements in the primitive's output
+        ASSERT_EQ(ref_output_size , output_size);
+
+        // Compare the output tensor configuration against the reference value
+        // Output tensor is configured in bfyx format
+        ASSERT_EQ(ref_batch_size, output_tensor.batch[0]);
+        ASSERT_EQ(ref_seq_len, output_tensor.feature[0]);       // Sequence length should match
+        ASSERT_EQ(ref_directions, output_tensor.spatial[1]);    // directions should match
+        ASSERT_EQ(ref_hidden_size, output_tensor.spatial[0]);   // input size should match
+
+        auto output_ptr = output_memory.pointer<T>();
+
+        int32_t i = 0;
+        if (format == cldnn::format::bfyx) {
+            for (int32_t b = 0; b < ref_batch_size; ++b) {
+                for (int32_t s = 0; s < ref_seq_len; ++s) {
+                    for (int32_t d = 0; d < ref_directions; ++d) {
+                        for (int32_t x = 0; x < ref_hidden_size; ++x) {
+                            ASSERT_NEAR(ref_primitive_output[b][s][d][x], output_ptr[i++], FERROR);
+                        }
+                    }
+                }
+            }
+        }
+        else if(format == cldnn::format::fyxb)
+        {
+            for (int32_t s = 0; s < ref_seq_len; ++s) {
+                for (int32_t d = 0; d < ref_directions; ++d) {
+                    for (int32_t x = 0; x < ref_hidden_size; ++x) {
+                        for (int32_t b = 0; b < ref_batch_size; ++b) {
+                            ASSERT_NEAR(ref_primitive_output[b][s][d][x], output_ptr[i++], FERROR);
+                        }
+                    }
+                }
+            }
+        }
+
+    }
+}
+
+// -------------------------------------------------------
+template<typename T>
+void lstm_gpu_users_test() {
+    int sequence_len = 2;
+    int batch_size = 1;
+    int input_size = 1;
+    int hidden_size = 1;
+    int directions = 1;
+    int min_random = -2, max_random = 2;
+
+    // The following test is designed to test the user dependencies of an LSTM node when replaced by subcomponents
+    // by the graph compiler.
+    // The output of an LSTM node is set to last_hidden only. Then we concatenate the last_hidden with the initial_hidden tensor:
+    // (input, weights, recurrent, bias, initial_hidden, inital_cell) -> LSTM -> last_hidden
+    // concatenation(last_hidden, initial_hidden)
+    // If the replacing is is done correctly then the initial_hidden tensor should match the output of the concatenation
+    // by an offset along the sequence.
+
+    VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
+    VVVVF<T> ref_weights = generate_random_4d<T>(1, directions, 4 * hidden_size, input_size, min_random, max_random);
+    VVVVF<T> ref_recurrent = generate_random_4d<T>(1, directions, 4 * hidden_size, hidden_size, min_random, max_random);
+    VVVVF<T> ref_bias = generate_random_4d<T>(1, 1, directions, 4 * hidden_size, min_random, max_random);
+    VVVVF<T> ref_hidden = generate_random_4d<T>(batch_size, 1, directions, hidden_size, min_random, max_random);
+    VVVVF<T> ref_cell = generate_random_4d<T>(batch_size, 1, directions, hidden_size, min_random, max_random);
+    VVVVF<T> ref_output = VVVVF<T>(batch_size, VVVF<T>(sequence_len, VVF<T>(directions, VF<T>(hidden_size))));
+
+    VF<T> ref_input_vec = flatten_4d<T>(format::bfyx, ref_input);
+    VF<T> ref_weights_vec = flatten_4d<T>(format::bfyx, ref_weights);
+    VF<T> ref_recurrent_vec = flatten_4d<T>(format::bfyx, ref_recurrent);
+    VF<T> ref_bias_vec = flatten_4d<T>(format::bfyx, ref_bias);
+    VF<T> ref_hidden_vec = flatten_4d<T>(format::bfyx, ref_hidden);
+    VF<T> ref_cell_vec = flatten_4d<T>(format::bfyx, ref_cell);
+
+    VVVVF<T> last_hidden(batch_size, VVVF<T>(1, VVF<T>(directions, VF<T>(hidden_size))));
+    VVVVF<T> last_cell(batch_size, VVVF<T>(1, VVF<T>(directions, VF<T>(hidden_size))));
+
+    const auto& engine = get_test_engine();
+
+    memory input = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, {batch_size, sequence_len, input_size, 1} });
+    memory weights = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, directions, input_size , 4 * hidden_size } });
+    memory recurrent = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, directions, hidden_size, 4 * hidden_size } });
+    memory biases = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, 1, 4 * hidden_size, directions } });
+    memory hidden = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { batch_size, 1, hidden_size, directions } });
+    memory cell = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { batch_size, 1, hidden_size, directions } });
+
+    set_values(input, ref_input_vec);
+    set_values(weights, ref_weights_vec);
+    set_values(recurrent, ref_recurrent_vec);
+    set_values(biases, ref_bias_vec);
+    set_values(hidden, ref_hidden_vec);
+    set_values(cell, ref_cell_vec);
+
+    topology topology;
+    std::vector<std::pair<primitive_id, tensor>> input_ids_offsets;
+    std::vector<primitive_id> lstm_inputs;
+
+    topology.add(input_layout("input", input.get_layout()));
+    for (int i = 0; i < sequence_len; ++i)
+    {
+        input_ids_offsets.push_back({get_string_id(i), {0, i, 0, 0}});
+        lstm_inputs.push_back("inputSplit:"+get_string_id(i));
+    }
+    topology.add(split("inputSplit", "input", input_ids_offsets));
+    topology.add(data("weights", weights));
+    topology.add(data("recurrent", recurrent));
+    topology.add(data("biases", biases));
+    topology.add(input_layout("hidden", hidden.get_layout()));
+    topology.add(input_layout("cell", cell.get_layout()));
+    topology.add(lstm("lstm", lstm_inputs, "weights", "recurrent",
+                      "biases", "hidden", "cell", "", 0, false, {}, {},
+                      cldnn_lstm_output::cldnn_lstm_output_hidden, default_offset_type));
+    std::vector<primitive_id> output_ids_offsets {"lstm", "hidden"};
+    topology.add(concatenation("concatenation", output_ids_offsets, concatenation::along_f));
+
+    network network(engine, topology);
+    std::map<primitive_id, network_output> outputs;
+
+    network.set_input_data("input", input);
+    network.set_input_data("hidden", hidden);
+    network.set_input_data("cell", cell);
+    outputs = network.execute();
+
+    // check if the number of returned primitives match the expected number of output primitives
+    ASSERT_EQ(size_t(1), outputs.size());
+    cldnn::memory output_memory = outputs.begin()->second.get_memory();
+    auto output_ptr = output_memory.pointer<T>();
+
+    int32_t i = 0;
+    for (int32_t b = 0; b < batch_size; ++b) {
+        for (int32_t s = 0; s < 1; ++s) {
+            for (int32_t d = 0; d < directions; ++d) {
+                for (int32_t x = 0; x < hidden_size; ++x) {
+                    int32_t idx = x + hidden_size * (d + directions * ((s+1) + sequence_len * b));
+                    ASSERT_NEAR(ref_hidden[b][s][d][x], output_ptr[idx], FERROR);
+                }
+            }
+        }
+    }
+}
+
+// -------------------------------------------------------
+template<typename T>
+void lstm_gpu_concatenated_input_test(int layers, int sequence_len, int direction,
+						              int batch_size, int input_size, int hidden_size,
+						              bool has_bias = true, bool has_initial_hidden = true,
+						              bool has_initial_cell = true, float clip_threshold = 0,
+						              bool input_forget = false)
+{
+	std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
+		<< " Sequence Len = " << sequence_len << " Direction = " << direction << " Batch Size = " << batch_size << std::endl;
+	int min_random = -2, max_random = 2;
+
+	VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
+
+	std::vector<VVVVF<T>> ref_weights;
+	std::vector<VVVVF<T>> ref_recurrent;
+	std::vector<VVVVF<T>> ref_bias;
+	std::vector<VVVVF<T>> ref_hidden;
+	std::vector<VVVVF<T>> ref_cell;
+	std::vector<VVVVF<T>> ref_output;
+
+	for (int i = 0; i < layers; ++i) {
+		ref_weights.push_back(generate_random_4d<T>(1, direction, 4 * hidden_size, i == 0 ? input_size : hidden_size, min_random, max_random));
+		ref_recurrent.push_back(generate_random_4d<T>(1, direction, 4 * hidden_size, hidden_size, min_random, max_random));
+		ref_bias.push_back(generate_random_4d<T>(1, 1, direction, 4 * hidden_size, min_random, max_random));
+		ref_hidden.push_back(generate_random_4d<T>(batch_size, 1, direction, hidden_size, min_random, max_random));
+		ref_cell.push_back(generate_random_4d<T>(batch_size, 1, direction, hidden_size, min_random, max_random));
+		ref_output.push_back(VVVVF<T>(batch_size, VVVF<T>(sequence_len, VVF<T>(direction, VF<T>(hidden_size)))));
+	}
+
+	VF<T> ref_input_vec = flatten_4d<T>(cldnn::format::bfyx, ref_input);
+
+	std::vector<VF<T>> ref_weights_vec;
+	std::vector<VF<T>> ref_recurrent_vec;
+	std::vector<VF<T>> ref_bias_vec;
+	std::vector<VF<T>> ref_hidden_vec;
+	std::vector<VF<T>> ref_cell_vec;
+	for (int i = 0; i < layers; ++i) {
+		ref_weights_vec.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_weights[i]));
+		ref_recurrent_vec.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_recurrent[i]));
+		ref_bias_vec.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_bias[i]));
+		ref_hidden_vec.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_hidden[i]));
+		ref_cell_vec.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_cell[i]));
+	}
+
+	VVVVF<T> last_hidden(batch_size, VVVF<T>(1, VVF<T>(direction, VF<T>(hidden_size))));
+	VVVVF<T> last_cell(batch_size, VVVF<T>(1, VVF<T>(direction, VF<T>(hidden_size))));
+
+	lstm_reference(ref_input, ref_hidden[0], ref_cell[0], ref_weights[0], ref_recurrent[0], ref_bias[0], ref_output[0],
+		last_hidden, last_cell, has_bias, has_initial_hidden, has_initial_cell,
+		clip_threshold, input_forget, true);
+
+	for (int i = 1; i < layers; ++i) {
+		lstm_reference(ref_output[i - 1], ref_hidden[i], ref_cell[i], ref_weights[i], ref_recurrent[i],
+			ref_bias[i], ref_output[i],
+			last_hidden, last_cell, has_bias, has_initial_hidden, has_initial_cell,
+			clip_threshold, input_forget, false);
+	}
+
+	const auto& engine = get_test_engine();
+
+	memory input = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, {batch_size, sequence_len, input_size, 1} });
+	set_values(input, ref_input_vec);
+
+	std::vector<memory> weights;
+	std::vector<memory> recurrent;
+	std::vector<memory> biases;
+	std::vector<memory> hidden;
+	std::vector<memory> cell;
+	for (int i = 0; i < layers; ++i) {
+		weights.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, direction, i == 0 ? input_size : hidden_size, 4 * hidden_size } }));
+		set_values(weights[i], ref_weights_vec[i]);
+		recurrent.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, direction, hidden_size, 4 * hidden_size } }));
+		set_values(recurrent[i], ref_recurrent_vec[i]);
+		if (has_bias) {
+			biases.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { 1, 1, 4 * hidden_size, direction } }));
+			set_values(biases[i], ref_bias_vec[i]);
+		}
+		if (has_initial_hidden) {
+			hidden.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { batch_size, 1, hidden_size, direction } }));
+			set_values(hidden[i], ref_hidden_vec[i]);
+		}
+		if (has_initial_cell) {
+			cell.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, { batch_size, 1, hidden_size, direction} }));
+			set_values(cell[i], ref_cell_vec[i]);
+		}
+	}
+
+	topology topology;
+	std::vector<std::pair<primitive_id, tensor>> input_ids_offsets;
+	std::vector<primitive_id> lstm_inputs;
+	std::vector<primitive_id> output_ids_offsets;
+
+	topology.add(input_layout("input", input.get_layout()));
+	cldnn::primitive_id prev_node_id;
+
+    for (int i = 0; i < layers; ++i) {
+		std::string sid = get_string_id(i);
+		std::string lstm_id = "lstm" + sid;
+		std::string weights_id = "weights" + sid;
+		std::string recurrent_id = "recurrent" + sid;
+		std::string biases_id = "biases" + sid;
+		std::string hidden_id = "hidden" + sid;
+		std::string cell_id = "cell" + sid;
+		std::string output_crop_id = "crop:sequence:" + sid;
+
+		topology.add(data(weights_id, weights[i]));
+		topology.add(data(recurrent_id, recurrent[i]));
+		if (has_bias) topology.add(data(biases_id, biases[i]));
+		if (has_initial_hidden) topology.add(input_layout(hidden_id, hidden[i].get_layout()));
+		if (has_initial_cell) topology.add(input_layout(cell_id, cell[i].get_layout()));
+		if (i == 0) {
+            topology.add(lstm(lstm_id, { "input" }, weights_id, recurrent_id,
+				has_bias ? biases_id : "", has_initial_hidden ? hidden_id : "", has_initial_cell ? cell_id : "", "",
+				clip_threshold, input_forget, {}, {},
+				cldnn_lstm_output::cldnn_lstm_output_sequence_cell, default_offset_type));
+		}
+		else {
+			topology.add(lstm(lstm_id, { prev_node_id }, weights_id, recurrent_id,
+				has_bias ? biases_id : "", has_initial_hidden ? hidden_id : "", has_initial_cell ? cell_id : "", "",
+				clip_threshold, input_forget, {}, {},
+				cldnn_lstm_output::cldnn_lstm_output_sequence_cell, default_offset_type));
+		}
+
+        // Crop out the whole output sequence element
+		topology.add(crop(output_crop_id, lstm_id, {batch_size, sequence_len, hidden_size, direction}, {0, 0, 0, 0}));
+
+       // Save the node id to provide it as input to the next lstm layer
+		prev_node_id = output_crop_id;
+	}
+
+	network network(engine, topology);
+	network.set_input_data("input", input);
+	for (int i = 0; i < layers; ++i) {
+		std::string sid = get_string_id(i);
+		if (has_initial_hidden) network.set_input_data("hidden" + sid, hidden[i]);
+		if (has_initial_cell) network.set_input_data("cell" + sid, cell[i]);
+	}
+	auto outputs = network.execute();
+	{
+		ASSERT_EQ(outputs.size(), size_t(1));
+		size_t output_size = outputs.begin()->second.get_memory().size() / sizeof(T);
+		ASSERT_EQ(output_size, size_t(hidden_size * sequence_len * batch_size * direction));
+
+		auto output = outputs.begin()->second.get_memory();
+
+		// Get the output tensor
+		cldnn::layout output_layout = output.get_layout();
+		cldnn::tensor output_tensor = output_layout.size;
+
+		// Compare the output tensor configuration against the reference value
+		// Output tensor is configured in bfyx format
+		ASSERT_EQ(batch_size, output_tensor.batch[0]);
+		ASSERT_EQ(sequence_len, output_tensor.feature[0]);
+		ASSERT_EQ(direction, output_tensor.spatial[1]);
+		ASSERT_EQ(hidden_size, output_tensor.spatial[0]);
+
+		auto output_ptr = output.pointer<T>();
+		int32_t i = 0;
+		for (int32_t b = 0; b < batch_size; ++b) {
+			for (int32_t s = 0; s < sequence_len; ++s) {
+				for (int32_t d = 0; d < direction; ++d) {
+					for (int32_t x = 0; x < hidden_size; ++x) {
+						ASSERT_NEAR(ref_output[layers - 1][b][s][d][x], output_ptr[i++], FERROR);
+					}
+				}
+			}
+		}
+	}
+}
+
+// This test checks chained and stacked LSTM topology. The configuration allows to create
+// LSTM topology with multiple layers and can also be chained together.
+template<typename T>
+void lstm_gpu_chain_test(int batch_size, int input_size, int hidden_size,
+                         int directions, size_t layers, size_t chains, int sequence_len,
+                         const cldnn_lstm_output& output_selection)
+{
+    int min_random = -2, max_random = 2;
+    bool has_bias = false;
+    bool has_initial_hidden = false;
+    bool has_initial_cell = false;
+    float clip_threshold = 0;
+    bool input_forget = false;
+
+    std::cout << "Layers = " << layers << " Input Size = " << input_size << " Hidden Size = " << hidden_size
+        << " Sequence Len = " << sequence_len << " Directions = " << directions << " Batch Size = " << batch_size
+        << " Output selection: " << output_selection << std::endl;
+
+    VVVVF<T> ref_input = generate_random_4d<T>(batch_size, sequence_len, 1, input_size, min_random, max_random);
+    std::vector<std::vector< VVVVF<T>>> ref_weights;
+    std::vector<std::vector< VVVVF<T>>> ref_recurrent;
+    std::vector<std::vector< VVVVF<T>>> ref_bias;
+    std::vector<std::vector< VVVVF<T>>> ref_hidden;
+    std::vector<std::vector< VVVVF<T>>> ref_cell;
+    std::vector<std::vector< VVVVF<T>>> ref_output;
+
+    // Create the 4 dimensional weight, bias, hidden, cell state and output vectors
+    for (size_t chain = 0; chain < chains; chain++) {
+
+        std::vector<VVVVF<T>> per_chain_ref_weights;
+        std::vector<VVVVF<T>> per_chain_ref_recurrent;
+        std::vector<VVVVF<T>> per_chain_ref_bias;
+        std::vector<VVVVF<T>> per_chain_ref_hidden;
+        std::vector<VVVVF<T>> per_chain_ref_cell;
+        std::vector<VVVVF<T>> per_chain_ref_output;
+
+        for (size_t layer = 0; layer < layers; layer++) {
+            per_chain_ref_weights.push_back(generate_random_4d<T>(1, directions, 4 * hidden_size, (layer == 0) ? input_size : hidden_size, min_random, max_random));
+            per_chain_ref_recurrent.push_back(generate_random_4d<T>(1, directions, 4 * hidden_size, hidden_size, min_random, max_random));
+            per_chain_ref_bias.push_back(generate_random_4d<T>(1, 1, directions, 4 * hidden_size, min_random, max_random));
+            per_chain_ref_hidden.push_back(generate_random_4d<T>(batch_size, 1, directions, hidden_size, min_random, max_random));
+            per_chain_ref_cell.push_back(generate_random_4d<T>(batch_size, 1, directions, hidden_size, min_random, max_random));
+            per_chain_ref_output.push_back(VVVVF<T>(batch_size, VVVF<T>(sequence_len, VVF<T>(directions, VF<T>(hidden_size)))));
+        }
+
+        ref_weights.push_back(per_chain_ref_weights);
+        ref_recurrent.push_back(per_chain_ref_recurrent);
+        ref_bias.push_back(per_chain_ref_bias);
+        ref_hidden.push_back(per_chain_ref_hidden);
+        ref_cell.push_back(per_chain_ref_cell);
+        ref_output.push_back(per_chain_ref_output);
+    }
+
+    VF<T> ref_input_vec;
+    std::vector<std::vector< VF<T>>> ref_weights_vec;
+    std::vector<std::vector< VF<T>>> ref_recurrent_vec;
+    std::vector<std::vector< VF<T>>> ref_bias_vec;
+    std::vector<std::vector< VF<T>>> ref_hidden_vec;
+    std::vector<std::vector< VF<T>>> ref_cell_vec;
+    std::vector<std::vector< VF<T>>> ref_output_vec;
+
+    ref_input_vec = flatten_4d<T>(cldnn::format::bfyx, ref_input);
+
+    // flatten all the 4 dimensional vectors across chains and layers
+    for (size_t chain = 0; chain < chains; chain++) {
+
+        std::vector<VF<T>> per_chain_ref_weights;
+        std::vector<VF<T>> per_chain_ref_recurrent;
+        std::vector<VF<T>> per_chain_ref_bias;
+        std::vector<VF<T>> per_chain_ref_hidden;
+        std::vector<VF<T>> per_chain_ref_cell;
+        std::vector<VF<T>> per_chain_ref_output;
+
+        for (size_t layer = 0; layer < layers; layer++) {
+            per_chain_ref_weights.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_weights[chain][layer]));
+            per_chain_ref_recurrent.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_recurrent[chain][layer]));
+            per_chain_ref_bias.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_bias[chain][layer]));
+            per_chain_ref_hidden.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_hidden[chain][layer]));
+            per_chain_ref_cell.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_cell[chain][layer]));
+            per_chain_ref_output.push_back(flatten_4d<T>(cldnn::format::bfyx, ref_output[chain][layer]));
+        }
+
+        ref_weights_vec.push_back(per_chain_ref_weights);
+        ref_recurrent_vec.push_back(per_chain_ref_recurrent);
+        ref_bias_vec.push_back(per_chain_ref_bias);
+        ref_hidden_vec.push_back(per_chain_ref_hidden);
+        ref_cell_vec.push_back(per_chain_ref_cell);
+        ref_output_vec.push_back(per_chain_ref_output);
+    }
+
+    std::vector<std::vector<VVVVF<T>>> last_hidden(chains, std::vector<VVVVF<T> >(layers, VVVVF<T>(batch_size, VVVF<T>(1, VVF<T>(directions, VF<T>(hidden_size))))));
+    std::vector<std::vector<VVVVF<T>>> last_cell(chains, std::vector<VVVVF<T> >(layers, VVVVF<T>(batch_size, VVVF<T>(1, VVF<T>(directions, VF<T>(hidden_size))))));
+
+    for (size_t chain = 0; chain < chains; chain++) {
+        lstm_reference(ref_input, ref_hidden[chain][0], ref_cell[chain][0], ref_weights[chain][0],
+                       ref_recurrent[chain][0], ref_bias[chain][0], ref_output[chain][0],
+                       last_hidden[chain][0], last_cell[chain][0], has_bias,
+                       chain == 0 ? has_initial_hidden : true,
+                       chain == 0 ? has_initial_cell : true,
+                       clip_threshold, input_forget, true);
+
+        if (chain < chains - 1)
+        {
+            ref_hidden[chain + 1][0] = last_hidden[chain][0];
+            ref_cell[chain + 1][0] = last_cell[chain][0];
+        }
+    }
+
+    for (size_t layer = 1; layer < layers; ++layer) {
+        for (size_t chain = 0; chain < chains; chain++) {
+            lstm_reference(ref_output[chain][layer - 1], ref_hidden[chain][layer], ref_cell[chain][layer],
+                           ref_weights[chain][layer], ref_recurrent[chain][layer], ref_bias[chain][layer],
+                           ref_output[chain][layer], last_hidden[chain][layer], last_cell[chain][layer], has_bias,
+                           chain == 0 ? has_initial_hidden : true,
+                           chain == 0 ? has_initial_cell : true,
+                           clip_threshold, input_forget,
+                           false);
+
+            if (chain < chains - 1)
+            {
+                ref_hidden[chain + 1][layer] = last_hidden[chain][layer];
+                ref_cell[chain + 1][layer] = last_cell[chain][layer];
+            }
+        }
+    }
+
+    const auto& engine = get_test_engine();
+    tensor input_tensor = { batch_size, sequence_len, input_size, 1 };
+    layout layout = { type_to_data_type<T>::value, cldnn::format::bfyx, input_tensor };
+
+    memory input = memory::allocate(engine, layout);
+    set_values(input, ref_input_vec);
+
+    // 2-dim vectors to support chain and layers
+    std::vector<std::vector<memory>> weights;
+    std::vector<std::vector<memory>> recurrent;
+    std::vector<std::vector<memory>> biases;
+    std::vector<std::vector<memory>> hidden;
+    std::vector<std::vector<memory>> cell;
+
+    for (size_t chain = 0; chain < chains; chain++) {
+        std::vector<memory> per_chain_weights;
+        std::vector<memory> per_chain_recurrent;
+        std::vector<memory> per_chain_biases;
+        std::vector<memory> per_chain_hidden;
+        std::vector<memory> per_chain_cell;
+
+        for (size_t layer = 0; layer < layers; layer++) {
+            per_chain_weights.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, {1, directions, layer == 0 ? input_size : hidden_size, 4 * hidden_size} }));
+            set_values(per_chain_weights[layer], ref_weights_vec[chain][layer]);
+
+            per_chain_recurrent.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, {1, directions, hidden_size, 4 * hidden_size} }));
+            set_values(per_chain_recurrent[layer], ref_recurrent_vec[chain][layer]);
+
+            if (has_bias)
+            {
+                per_chain_biases.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, {1, 1, 4 * hidden_size, directions} }));
+                set_values(per_chain_biases[layer], ref_bias_vec[chain][layer]);
+            }
+
+            if (has_initial_hidden)
+            {
+                per_chain_hidden.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, {1, 1, hidden_size, directions} }));
+                set_values(per_chain_hidden[layer], ref_hidden_vec[chain][layer]);
+            }
+
+            if (has_initial_cell)
+            {
+                per_chain_cell.push_back(memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, {1, 1, hidden_size, directions} }));
+                set_values(per_chain_cell[layer], ref_cell_vec[chain][layer]);
+            }
+        }
+
+        weights.push_back(per_chain_weights);
+        recurrent.push_back(per_chain_recurrent);
+        biases.push_back(per_chain_biases);
+        hidden.push_back(per_chain_hidden);
+        cell.push_back(per_chain_cell);
+    }
+
+    // Start creating the topology
+    cldnn::topology topology;
+    std::vector<std::pair<primitive_id, cldnn::tensor>> input_ids_offsets;
+    std::vector<primitive_id> lstm_inputs;
+    std::vector<primitive_id> output_ids_offsets;
+
+    topology.add(input_layout("input", input.get_layout()));
+
+    for (int feature = 0; feature < sequence_len; feature++) {
+        input_ids_offsets.push_back({ get_string_id(feature), {0, feature, 0, 0} });
+        lstm_inputs.push_back("inputSplit:" + get_string_id(feature));
+    }
+    topology.add(split("inputSplit", "input", input_ids_offsets));
+
+    bool emit_last_hidden = output_selection == cldnn_lstm_output_hidden
+        || output_selection == cldnn_lstm_output_hidden_cell;
+
+    std::vector<cldnn::primitive_id> output_sequence_ids;
+    std::vector<cldnn::primitive_id> last_hidden_ids;
+    std::vector<cldnn::primitive_id> last_cell_ids;
+
+    for (size_t chain = 0; chain < chains; chain++) {
+
+        // Add all the primitives to the network
+        std::vector<cldnn::primitive_id> prev_output_sequence_ids(output_sequence_ids);
+        std::vector<cldnn::primitive_id> prev_last_hidden_ids(last_hidden_ids);
+        std::vector<cldnn::primitive_id> prev_last_cell_ids(last_cell_ids);
+
+        // Erase all the temporary primitive id containers
+        output_sequence_ids.clear();
+        last_cell_ids.clear();
+        last_hidden_ids.clear();
+
+        for (size_t layer = 0; layer < layers; layer++) {
+            std::string chain_id = get_string_id(chain);
+            std::string layer_id = get_string_id(layer);
+            std::string lstm_id = "lstm:" + chain_id + ":" + layer_id;
+            std::string weights_id = "weights:" + chain_id + ":" + layer_id;
+            std::string recurrent_id = "recurrent:" + chain_id + ":" + layer_id;
+            std::string biases_id = "biases:" + chain_id + ":" + layer_id;
+            std::string hidden_id = "hidden:" + chain_id + ":" + layer_id;
+            std::string cell_id = "cell:" + chain_id + ":" + layer_id;
+            std::string crop_seq_id = "crop:sequence:" + chain_id + ":" + layer_id;
+            std::string crop_last_cell_id = "crop:last_cell:" + chain_id + ":" + layer_id;
+            std::string crop_last_hidden_id = "crop:last_hidden:" + chain_id + ":" + layer_id;
+
+            primitive_id initial_hidden_id;
+            primitive_id initial_cell_id;
+            cldnn_lstm_output output_selection_per_layer;
+
+            topology.add(data(weights_id, weights[chain][layer]));
+            topology.add(data(recurrent_id, recurrent[chain][layer]));
+            if (has_bias) topology.add(data(biases_id, biases[chain][layer]));
+
+            if (chain == 0 && layer == 0)
+            {
+                if (has_initial_hidden) topology.add(input_layout(hidden_id, hidden[chain][layer].get_layout()));
+                if (has_initial_cell) topology.add(input_layout(cell_id, cell[chain][layer].get_layout()));
+            }
+
+            // Get the initial hidden and initial cell for each layer for each chain link
+            if (chain == 0)
+            {
+                initial_hidden_id = has_initial_hidden ? hidden_id : "";
+                initial_cell_id = has_initial_cell ? cell_id : "";
+            }
+            else
+            {
+                initial_hidden_id = prev_last_hidden_ids[layer];
+                initial_cell_id = prev_last_cell_ids[layer];
+            }
+
+            // Output selection for all the layers except the last layer has to have the sequence,
+            // last hidden and last cell
+            if (layer < layers - 1)
+            {
+                output_selection_per_layer = cldnn_lstm_output::cldnn_lstm_output_sequence_cell;
+            }
+            else
+            {
+                // For the last layer, use the output selection provided by the user
+                output_selection_per_layer = output_selection;
+            }
+
+            if (layer == 0)
+            {
+                topology.add(lstm(lstm_id, lstm_inputs, weights_id, recurrent_id,
+                    has_bias ? biases_id : "",
+                    initial_hidden_id, initial_cell_id,
+                    "", clip_threshold, input_forget, {}, {},
+                    output_selection_per_layer, default_offset_type));
+            }
+            else
+            {
+                topology.add(lstm(lstm_id, { output_sequence_ids[layer - 1] }, weights_id, recurrent_id,
+                    has_bias ? biases_id : "",
+                    initial_hidden_id, initial_cell_id,
+                    "", clip_threshold, input_forget, {}, {},
+                    output_selection_per_layer, default_offset_type));
+            }
+
+            tensor sequence_tensor{ batch_size, sequence_len, hidden_size, directions };
+            tensor cell_tensor{ batch_size, 1, hidden_size, directions };
+            tensor last_hidden_tensor{ batch_size, 1, hidden_size, directions };
+
+            // For all the layers except the last layer, we need to crop output sequence,
+            // last hidden and last cell.
+            // The output sequence goes into the next layer of lstm in a chain link
+            // The last cell state and last hidden go to the lstm node in the same layer
+            // next in chain
+            topology.add(crop(crop_seq_id, lstm_id, sequence_tensor, tensor{ 0, 0, 0, 0 }));  // Add crop to get the sequence
+            topology.add(crop(crop_last_hidden_id, lstm_id, last_hidden_tensor, tensor{ 0, sequence_len - 1, 0, 0 }));  // Add crop to get the last hidden element
+            topology.add(crop(crop_last_cell_id, lstm_id, cell_tensor, tensor{ 0, sequence_len, 0, 0 }));  // Add crop to get the last cell element
+
+            // Keep a copy of the sequence, last hidden and last cell primitve id for each layer
+            output_sequence_ids.push_back(crop_seq_id);
+            last_hidden_ids.push_back(crop_last_hidden_id);
+            last_cell_ids.push_back(crop_last_cell_id);
+        }
+    }
+
+    // Creating network out of the above designed topology
+    cldnn::network network(engine, topology);
+    network.set_input_data("input", input);
+    for (size_t layer = 0; layer < layers; layer++) {
+        std::string sid = get_string_id(layer);
+        if (has_initial_hidden) network.set_input_data("hidden:000:" + sid, hidden[0][layer]); // 0 is the chain link index
+        if (has_initial_cell) network.set_input_data("cell:000:" + sid, cell[0][layer]); // 0 is the chain link index
+    }
+
+    auto outputs = network.execute();
+    for (auto itr = outputs.begin(); itr != outputs.end(); itr++)
+    {
+        auto output_tensor = itr->second.get_memory().get_layout().size;
+        primitive_id primitive_name = itr->first;
+
+        // Split the primitive id to get the chain id
+        // Eg: primitive id: crop:last_cell:XXX:YYY
+        // XXX is the chain id
+        // YYY is the layer id
+        std::string chain_str = primitive_name.substr(primitive_name.find(":", primitive_name.find(":") + 1) + 1, 5);
+        std::string layer_str = primitive_name.substr(primitive_name.find(":", primitive_name.find(":", primitive_name.find(":") + 1) + 1) + 1, 5);
+        size_t chain_id = stoi(chain_str);
+        size_t layer_id = stoi(layer_str);
+
+        cldnn::memory output_memory = itr->second.get_memory();
+        int32_t output_size = (int32_t)(itr->second.get_memory().size() / sizeof(T));
+        cldnn::tensor ref_output_tensor;
+        VVVVF<T> ref_primitive_output;
+
+        int32_t ref_batch_size = batch_size;
+        int32_t ref_hidden_size = hidden_size;
+        int32_t ref_directions = directions;
+
+        int32_t ref_seq_len = 1;
+
+        // Set the reference output against which the primitive's output will be compared
+        if (primitive_name.find("crop:last_cell") != std::string::npos)
+        {
+            ref_primitive_output = last_cell[chain_id][layer_id];
+        }
+        else if (emit_last_hidden || primitive_name.find("crop:last_hidden") != std::string::npos)
+        {
+            ref_primitive_output = last_hidden[chain_id][layer_id];
+        }
+        else
+        {
+            ref_seq_len = sequence_len;
+            ref_primitive_output = ref_output[chain_id][layers - 1];
+        }
+
+        ref_output_tensor = { ref_batch_size, ref_seq_len, ref_hidden_size, ref_directions };
+        int32_t ref_output_size = ref_batch_size * ref_seq_len * ref_hidden_size * ref_directions;
+
+        // The number of elements in reference should match the number of elements in the primitive's output
+        ASSERT_EQ(ref_output_size, output_size);
+
+        // Compare the output tensor configuration against the reference value
+        // Output tensor is configured in bfyx format
+        ASSERT_EQ(ref_batch_size, output_tensor.batch[0]);
+        ASSERT_EQ(ref_seq_len, output_tensor.feature[0]);		// Sequence length should match
+        ASSERT_EQ(ref_directions, output_tensor.spatial[1]);	// directions should match
+        ASSERT_EQ(ref_hidden_size, output_tensor.spatial[0]);	// input size should match
+
+        auto output_ptr = output_memory.pointer<T>();
+
+        int32_t i = 0;
+        for (int32_t b = 0; b < ref_batch_size; ++b) {
+            for (int32_t s = 0; s < ref_seq_len; ++s) {
+                for (int32_t d = 0; d < ref_directions; ++d) {
+                    for (int32_t x = 0; x < ref_hidden_size; ++x) {
+                        ASSERT_NEAR(ref_primitive_output[b][s][d][x], output_ptr[i++], FERROR);
                     }
                 }
             }
@@ -593,6 +1651,7 @@ void generic_lstm_gpu_test(int layers, int sequence_len, int direction, int batc
     }
 }
 
+
 TEST(lstm_gemm_gpu, generic_lstm_gemm_test_f32) {
     generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, true, true);
 }
@@ -609,6 +1668,24 @@ TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f32) {
     generic_lstm_gemm_gpu_test<float>(1, 1, 3, 6, 2, false, false);
 }
 
+// LSTM GEMM tests to test LSTM GEMMV kernel implementation
+TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_test_f32) {
+    generic_lstm_gemm_gpu_test<float>(5, 1, 1, 1024, 1024, true, true);
+}
+
+TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_bias_f32) {
+    generic_lstm_gemm_gpu_test<float>(1, 1, 1, 256, 2, false, true);
+}
+
+TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_f32) {
+    generic_lstm_gemm_gpu_test<float>(1, 1, 1, 64, 2, true, false);
+}
+
+TEST(lstm_gemm_gpu, gemv_bfyx_1x64_lstm_gemm_no_hidden_bias_f32) {
+    generic_lstm_gemm_gpu_test<float>(1, 1, 1, 64, 2, false, false);
+}
+
+// LSTM ELT Tests
 TEST(lstm_elt_gpu, generic_lstm_elt_test_clip_f32) {
     generic_lstm_elt_gpu_test<float>(1, 1, 4, 6, 3, true, 0.3f);
 }
@@ -751,9 +1828,234 @@ TEST(lstm_gpu, generic_lstm_stacked_seq_bi_f32) {
     generic_lstm_gpu_test<float>(4, 7, 2, 3, 3, 2, true, true, true);
 }
 
+// optional outputs support
+TEST(lstm_gpu, output_test_sequence_f32) {
+    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_sequence, 1);
+}
+
+TEST(lstm_gpu, output_test_hidden_f32) {
+    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_hidden, 1);
+}
+
+TEST(lstm_gpu, output_test_hidden_cell_f32) {
+    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_hidden_cell, 1);
+}
+
+TEST(lstm_gpu, output_test_sequence_cell_f32) {
+    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_sequence_cell, 1);
+}
+
+TEST(lstm_gpu, output_test_sequence_bi_f32) {
+    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_sequence, 2);
+}
+
+TEST(lstm_gpu, output_test_hidden_bi_f32) {
+    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_hidden, 2);
+}
+
+TEST(lstm_gpu, output_test_hidden_cell_bi_f32) {
+    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_hidden_cell, 2);
+}
+
+TEST(lstm_gpu, output_test_sequence_cell_bi_f32) {
+    lstm_gpu_output_test<float>(cldnn_lstm_output::cldnn_lstm_output_sequence_cell, 2);
+}
+
+// format tests
+TEST(lstm_gpu, lstm_gpu_format_bfyx_f32) {
+    lstm_gpu_format_test<float>(cldnn::format::bfyx, 1);
+}
+
+TEST(lstm_gpu, lstm_gpu_format_bfyx_bi_f32) {
+    lstm_gpu_format_test<float>(cldnn::format::bfyx, 2);
+}
+
+TEST(lstm_gpu, lstm_gpu_format_fyxb_f32) {
+    lstm_gpu_format_test<float>(cldnn::format::fyxb, 1);
+}
+
+TEST(lstm_gpu, lstm_gpu_format_fyxb_bi_f32) {
+    lstm_gpu_format_test<float>(cldnn::format::fyxb, 2);
+}
+
+// test for LSTM users' dependencies
+TEST(lstm_gpu, lstm_users_f32) {
+    lstm_gpu_users_test<float>();
+}
+
+// Test for LSTM with concatenated input
+TEST(lstm_gpu, generic_lstm_concatenated_input) {
+    lstm_gpu_concatenated_input_test<float>(1, 2, 2, 1, 1, 1, true, true, true);
+}
+
+TEST(lstm_gpu, generic_lstm_concatenated_input_multi_layer) {
+    lstm_gpu_concatenated_input_test<float>(5, 5, 2, 1, 1, 4, true, true, true);
+}
+
+// test for LSTM with chain and stack (multilayer)
+TEST(lstm_gpu, generic_lstm_chained_unidirectional_f32) {
+    // batch size = 1
+    // input size = 2
+    // hidden size = 4
+    // directions = 1
+    // layers = 1
+    // chains = 1
+    // sequence length = 1
+    // output selection = output sequence and cell
+    lstm_gpu_chain_test<float>(1, 2, 4, 1, 1, 2, 1, cldnn_lstm_output::cldnn_lstm_output_sequence_cell);
+}
+
+TEST(lstm_gpu, generic_lstm_chained_bidirectional_f32) {
+    // batch size = 1
+    // input size = 2
+    // hidden size = 4
+    // directions = 2
+    // layers = 1
+    // chains = 1
+    // sequence length = 1
+    // output selection = output sequence and cell
+    lstm_gpu_chain_test<float>(1, 2, 4, 2, 1, 1, 1, cldnn_lstm_output::cldnn_lstm_output_sequence_cell);
+}
+
+TEST(lstm_gpu, generic_lstm_chained_no_stack_bidirectional_f32) {
+    // batch size = 2
+    // input size = 2
+    // hidden size = 4
+    // directions = 2
+    // layers = 1
+    // chains = 2
+    // sequence length = 5
+    // output selection = output sequence and cell
+    lstm_gpu_chain_test<float>(2, 2, 4, 2, 1, 2, 5, cldnn_lstm_output::cldnn_lstm_output_sequence_cell);
+}
+
+TEST(lstm_gpu, generic_lstm_chained_stacked_bidirectional_f32) {
+    // batch size = 2
+    // input size = 2
+    // hidden size = 4
+    // directions = 2
+    // layers = 4
+    // chains = 2
+    // sequence length = 5
+    // output selection = output sequence and cell
+    lstm_gpu_chain_test<float>(2, 2, 4, 2, 4, 2, 5, cldnn_lstm_output::cldnn_lstm_output_sequence_cell);
+}
+
+// FP16 Half precision tests
+TEST(lstm_gemm_gpu, generic_lstm_gemm_test_f16) {
+    generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, true, true);
+}
+
+TEST(lstm_gemm_gpu, generic_lstm_gemm_no_bias_f16) {
+    generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, false, true);
+}
+
+TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_f16) {
+    generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, true, false);
+}
+
+TEST(lstm_gemm_gpu, generic_lstm_gemm_no_hidden_bias_f16) {
+    generic_lstm_gemm_gpu_test<FLOAT16>(1, 1, 3, 6, 2, false, false);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_test_clip_f16) {
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.3f);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_test_input_forget_f16) {
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.f, 1);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_test_clip_input_forget_f16) {
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true, 0.5f, 1);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_test_f16) {
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, true);
+}
+
+TEST(lstm_elt_gpu, generic_lstm_elt_no_cell_f16) {
+    generic_lstm_elt_gpu_test<FLOAT16>(1, 1, 4, 6, 3, false);
+}
+
+TEST(lstm_gpu, generic_lstm_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, false, true, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_hidden_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_hidden_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, true);
+}
+
+TEST(lstm_gpu, generic_lstm_no_cell_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, true, false);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_cell_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, true, false);
+}
+
+TEST(lstm_gpu, generic_lstm_no_hidden_cell_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, true, false, false);
+}
+
+TEST(lstm_gpu, generic_lstm_no_bias_hidden_cell_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 5, 4, 3, false, false, false);
+}
+
+TEST(lstm_gpu, generic_lstm_clip_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 0);
+}
+
+TEST(lstm_gpu, generic_lstm_input_forget_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.f, 1);
+}
+
+TEST(lstm_gpu, generic_lstm_clip_input_forget_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true, 0.3f, 1);
+}
+
+TEST(lstm_gpu, generic_lstm_offset_order_ifoz_f16) {
+    default_offset_type = cldnn_lstm_offset_order_ifoz;
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 1, 3, 3, 2, true, true, true);
+    default_offset_type = cldnn_lstm_offset_order_iofz;
+}
+
+TEST(lstm_gpu, generic_lstm_canonical_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 1, 1, 1, 1, 1, true, true, true);
+}
+
+// bidirectional support
+TEST(lstm_gpu, generic_lstm_bi_bias_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, false, false);
+}
+
+TEST(lstm_gpu, generic_lstm_bi_bias_hidden_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, false);
+}
+
+TEST(lstm_gpu, generic_lstm_bi_bias_hidden_cell_f16) {
+    generic_lstm_gpu_test<FLOAT16>(1, 7, 2, 2, 3, 4, true, true, true);
+}
+
+// multi-layer support
+TEST(lstm_gpu, generic_lstm_stacked_seq_f16) {
+    generic_lstm_gpu_test<FLOAT16>(4, 7, 1, 3, 3, 2, true, true, true);
+}
+
+TEST(lstm_gpu, generic_lstm_stacked_bi_f16) {
+    generic_lstm_gpu_test<FLOAT16>(4, 7, 2, 3, 3, 2, true, true, true);
+}
+
 // TODO: Add tests for the following:
-// optional concatenate output
-// optional last hidden
-// optional last cell
+// integration testing using multi-layer and chained LSTMs
+// LSTMs single input
 // optional activation list
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/max_unpooling_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/max_unpooling_gpu_test.cpp
index ec78a6c63..afade14f3 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/max_unpooling_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/max_unpooling_gpu_test.cpp
@@ -57,7 +57,7 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2) {
     //  f1: b0:  0    0  0   b1:   0    0    0
     //  f1: b0:  0    8  16  b1:   12   0    17
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 1 } });
     auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
@@ -139,7 +139,7 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_output_padding) {
     //  f1: b0:  0    0  0   b1:   0    0    0
     //  f1: b0:  0    8  16  b1:   12   0    17
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
     auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
@@ -230,7 +230,7 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_output_size) {
     //  f1: b0:  0    0  0   b1:   0    0    0
     //  f1: b0:  0    8  16  b1:   12   0    17
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
     auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
@@ -311,7 +311,7 @@ TEST(max_unpooling_gpu, basic_in2x3x2x2_fp16) {
     //  f1: b0:  0    0  0   b1:   0    0    0
     //  f1: b0:  0    8  16  b1:   12   0    17
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 2, 2, 2, 1 } });
     auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
@@ -392,7 +392,7 @@ TEST(max_unpooling_gpu, basic_in2x2x3x2_max_with_argmax_pooling_unpooling) {
     //  f1: b0:  0    8  16  b1:   12   0    17
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
     auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp
index 6bca8f22f..75821bf00 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/memory_test.cpp
@@ -26,6 +26,9 @@
 #include <api/CPP/pooling.hpp>
 #include <api/CPP/concatenation.hpp>
 #include <api/CPP/data.hpp>
+#include <api/CPP/reshape.hpp>
+#include <api/CPP/crop.hpp>
+#include <api/CPP/scale.hpp>
 
 #include "test_utils/test_utils.h"
 
@@ -72,7 +75,7 @@ TEST(memory_tests, DISABLED_network_creation_loop)
 #endif
 TEST(memory_pool, basic_non_padded_relu_pipe) {
     // 5 relu's of size 1x4x1x1
-    engine engine;
+    const cldnn::engine engine;// here we need new engine
     auto batch_num = 1;
     auto feature_num = 4;
     auto x_size = 1;
@@ -106,7 +109,7 @@ TEST(memory_pool, basic_non_padded_relu_and_pooling_pipe) {
     // uncomment this line to disable memory pool
     /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false };
     engine engine{ cfg };*/
-    engine engine;
+    const cldnn::engine engine;// here we need new engine
     auto batch_num = 1;
     auto feature_num = 4;
     auto x_size = 4;
@@ -144,7 +147,7 @@ TEST(memory_pool, multi_outputs_network) {
     // uncomment this line to disable memory pool
     /*engine_configuration cfg{ false, false, false, std::string(), std::string(), true, std::string(),std::string(), 0, false };
     engine engine{ cfg };*/
-    engine engine;
+    const cldnn::engine engine;// here we need new engine
     auto batch_num = 1;
     auto feature_num = 4;
     auto x_size = 4;
@@ -173,11 +176,8 @@ TEST(memory_pool, multi_outputs_network) {
     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)2048);
 }
 
-// Disabled since ref values seems to be incorrect.
-// Test passes when Relu4 is fused with concat1 and then concat1 is optimized out,
-// but this optimizations order is invalid.
-// TODO: fix the test
-TEST(memory_pool, DISABLED_oooq) {
+
+TEST(memory_pool, oooq) {
     /*          -- relu1 - concat1- relu4 -- 
         input<  -- relu2 /                   >-- concat2 -- relu6
                 -- relu3 --  relu5 --------- 
@@ -210,14 +210,10 @@ TEST(memory_pool, DISABLED_oooq) {
     network.set_input_data("input", input);
     auto outputs = network.execute();
 
-    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2304);
+    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2816);
 }
 
-// Disabled since ref values seems to be incorrect.
-// Test passes when Relu4 is fused with concat1 and then concat1 is optimized out,
-// but this optimizations order is invalid.
-// TODO: fix the test
-TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) {
+TEST(memory_pool, shared_mem_pool_same_topology_twice) {
     /*                -- relu1 - concat1- relu4 --
     input<  -- relu2 |                             >-- concat2 -- relu6
                       -- relu3 --  relu5 ---------
@@ -261,7 +257,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) {
     auto output_layout_first = output_memory_first.get_layout();
     auto output_ptr_first = output_memory_first.pointer<float>();
 
-    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2304);
+    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 2816);
 
     network network_second(engine, topology, bo);
     network_second.set_input_data("input", input);
@@ -271,7 +267,7 @@ TEST(memory_pool, DISABLED_shared_mem_pool_same_topology_twice) {
     auto output_layout_second = output_memory_second.get_layout();
     auto output_ptr_second = output_memory_second.pointer<float>();
 
-    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)3072);
+    EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t) 3584);
     EXPECT_EQ(output_layout_first, output_layout_second);
 
     int y_size = output_layout_first.size.spatial[1];
@@ -461,3 +457,112 @@ TEST(memory_pool, shared_dep_two_output) {
     auto outputs = network.execute();
     EXPECT_EQ(engine.get_max_used_device_memory_size(), (uint64_t)256);
 }
+
+TEST(memory_pool, non_opt_intermidate_opt_after) {
+
+    engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
+    engine engine{ cfg };
+    auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 2, 2 });
+    auto input_layout2 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 1, 2, 2 });
+
+    auto input_memory1 = cldnn::memory::allocate(engine, input_layout1);
+    auto input_memory2 = cldnn::memory::allocate(engine, input_layout2);
+    auto scale_memory = cldnn::memory::allocate(engine, layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 }));
+    auto data_memory = cldnn::data("scale_mem", scale_memory);
+
+    set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f });
+    set_values(input_memory2, { 5.0f, 6.0f, 7.0f, 8.0f });
+    set_values(scale_memory, { 1.0f});
+
+    auto reshape_tensor = cldnn::tensor(8, 1, 1, 1);
+    auto input = cldnn::input_layout("input1", input_layout1);
+    auto input2 = cldnn::input_layout("input2", input_layout2);
+    auto concat = cldnn::concatenation("concat", { "input1", "input2" }, cldnn::concatenation::along_b);
+    auto reshape = cldnn::reshape("reshape", "concat", reshape_tensor);
+    auto crop1 = cldnn::crop("crop1", "reshape", { 1,1,1,1 }, { 0, 0, 0, 0 });
+    auto crop2 = cldnn::crop("crop2", "reshape", { 1,1,1,1 }, { 1, 0, 0, 0 });
+    auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
+    auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
+
+    auto topology = cldnn::topology(
+        input, input2,
+        concat,
+        reshape,
+        crop1, crop2,
+        eltwise1, eltwise2,
+        data_memory
+    );
+
+    build_options bo;
+    bo.set_option(build_option::optimize_data(false));
+    network network(engine, topology, bo);
+    network.set_input_data("input1", input_memory1);
+    network.set_input_data("input2", input_memory2);
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), static_cast<size_t>(2));
+
+    auto out1 = outputs.at("elt1");
+    auto out2 = outputs.at("elt2");
+
+    auto out1_ptr = out1.get_memory().pointer<float>();
+    auto out2_ptr = out2.get_memory().pointer<float>();
+    EXPECT_EQ(out1_ptr[0], 1.0f);
+    EXPECT_EQ(out2_ptr[0], 2.0f);
+}
+
+TEST(memory_pool, add_mem_dep_test) {
+
+    engine_configuration cfg{ false, false, false, std::string(), std::string(), true /*oooq*/, std::string(),std::string(), priority_mode_types::disabled, throttle_mode_types::disabled, true /*mem_pool*/ };
+    engine engine{ cfg };
+    auto input_layout1 = layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1, 2, 2, 2 });
+
+    auto input_memory1 = cldnn::memory::allocate(engine, input_layout1);
+    auto scale_memory = cldnn::memory::allocate(engine, layout(cldnn::data_types::f32, cldnn::format::bfyx, { 1,1,1,1 }));
+    auto data_memory = cldnn::data("scale_mem", scale_memory);
+
+    set_values(input_memory1, { 1.0f, 2.0f, 3.0f, 4.0f,
+        5.0f, 6.0f, 7.0f, 8.0f});
+    set_values(scale_memory, { 1.0f });
+
+
+    auto input = cldnn::input_layout("input1", input_layout1);
+    auto actv1 = cldnn::activation("input_activ1", "input1", cldnn_activation_func::activation_abs);
+    auto actv2 = cldnn::activation("input_activ2", "input1", cldnn_activation_func::activation_abs);
+    auto crop1 = cldnn::crop("crop1", "input_activ1", { 1,1,2,2 }, { 0, 0, 0, 0 });
+    auto crop2 = cldnn::crop("crop2", "input_activ2", { 1,1,2,2 }, { 0, 1, 0, 0 });
+    auto eltwise1 = cldnn::scale("elt1", "crop1", "scale_mem");
+    auto eltwise2 = cldnn::scale("elt2", "crop2", "scale_mem");
+    auto actv3 = cldnn::activation("out3", "elt1", cldnn_activation_func::activation_abs);
+    auto actv4 = cldnn::activation("out4", "elt2", cldnn_activation_func::activation_abs);
+
+    auto topology = cldnn::topology(
+        input,
+        crop1, crop2,
+        actv1, actv2,
+        eltwise1, eltwise2,
+        data_memory,
+        actv3, actv4
+    );
+
+    build_options bo;
+    bo.set_option(build_option::optimize_data(true));
+    network network(engine, topology, bo);
+    network.set_input_data("input1", input_memory1);
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), static_cast<size_t>(2));
+
+    auto out1 = outputs.at("out3");
+    auto out2 = outputs.at("out4");
+
+    auto out1_ptr = out1.get_memory().pointer<float>();
+    auto out2_ptr = out2.get_memory().pointer<float>();
+    EXPECT_EQ(out1_ptr[0], 1.0f);
+    EXPECT_EQ(out1_ptr[1], 2.0f);
+    EXPECT_EQ(out1_ptr[2], 3.0f);
+    EXPECT_EQ(out1_ptr[3], 4.0f);
+
+    EXPECT_EQ(out2_ptr[0], 5.0f);
+    EXPECT_EQ(out2_ptr[1], 6.0f);
+    EXPECT_EQ(out2_ptr[2], 7.0f);
+    EXPECT_EQ(out2_ptr[3], 8.0f);
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp
index b63bbe6b5..da2cc3807 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/mvn_gpu_test.cpp
@@ -139,7 +139,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx)
     using namespace cldnn;
     using namespace tests;
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
 
@@ -167,7 +167,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_fp16)
     using namespace cldnn;
     using namespace tests;
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
 
@@ -195,7 +195,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance)
     using namespace cldnn;
     using namespace tests;
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
 
@@ -223,7 +223,7 @@ TEST(mvn_gpu_test, mvn_test_across_channels_bfyx_normalize_variance_fp16)
     using namespace cldnn;
     using namespace tests;
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
 
@@ -251,7 +251,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx)
     using namespace cldnn;
     using namespace tests;
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
 
@@ -279,7 +279,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_fp16)
     using namespace cldnn;
     using namespace tests;
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
 
@@ -307,7 +307,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance)
     using namespace cldnn;
     using namespace tests;
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 7, 10, 17, 13 } });
 
@@ -335,7 +335,7 @@ TEST(mvn_gpu_test, mvn_test_within_channels_bfyx_normalize_variance_fp16)
     using namespace cldnn;
     using namespace tests;
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f16, format::bfyx,{ 7, 10, 17, 13 } });
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/one_hot_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/one_hot_gpu_test.cpp
new file mode 100644
index 000000000..8c0271753
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/one_hot_gpu_test.cpp
@@ -0,0 +1,193 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+
+#include <api/CPP/engine.hpp>
+#include <api/CPP/input_layout.hpp>
+#include <api/CPP/memory.hpp>
+#include <api/CPP/one_hot.hpp>
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+
+#include "test_utils/test_utils.h"
+#include "test_utils/uniform_quantized_real_distribution.hpp"
+
+#include <cstddef>
+
+using namespace cldnn;
+using namespace ::tests;
+
+template <typename T>
+VVVVF<T> one_hot_cpu(VVVVF<T> &input, uint16_t axis,
+    int32_t one_hot_limit, int input_padding_y = 0,
+    int input_padding_x = 0, int output_padding_y = 0,
+    int output_padding_x = 0) {
+
+    size_t padding_y = input_padding_y + output_padding_y;
+    size_t padding_x = input_padding_x + output_padding_x;
+    size_t out_sizes[4];
+    out_sizes[0] = input.size();
+    out_sizes[1] = input[0].size();
+    out_sizes[2] = input[0][0].size() + 2 * padding_y;
+    out_sizes[3] = input[0][0][0].size() + 2 * padding_x;
+    for (uint16_t i = 0; i < axis; ++i)
+        out_sizes[i] = out_sizes[i + 1];
+    out_sizes[axis] = one_hot_limit;
+    VVVVF<T> output(out_sizes[0], VVVF<T>(out_sizes[1], VVF<T>(out_sizes[2], VF<T>(out_sizes[3]))));
+
+    switch (axis) {
+    case 0:
+        for (size_t b = 0; b < out_sizes[0]; ++b)
+            for (size_t f = 0; f < out_sizes[1]; ++f)
+                for (size_t y = 0; y < out_sizes[2]; ++y)
+                    for (size_t x = 0; x < out_sizes[3]; ++x)
+                        output[b][f][y][x] = input[0][f][y][x] == (T)b ? 1 : 0;
+        break;
+    case 1:
+        for (size_t b = 0; b < out_sizes[0]; ++b)
+            for (size_t f = 0; f < out_sizes[1]; ++f)
+                for (size_t y = 0; y < out_sizes[2]; ++y)
+                    for (size_t x = 0; x < out_sizes[3]; ++x)
+                        output[b][f][y][x] = input[0][b][y][x] == (T)f ? 1 : 0;
+        break;
+    case 2:
+        for (size_t b = 0; b < out_sizes[0]; ++b)
+            for (size_t f = 0; f < out_sizes[1]; ++f)
+                for (size_t y = 0; y < out_sizes[2]; ++y)
+                    for (size_t x = 0; x < out_sizes[3]; ++x)
+                        output[b][f][y][x] = input[0][b][f][x] == (T)y ? 1 : 0;
+        break;
+    case 3:
+        for (size_t b = 0; b < out_sizes[0]; ++b)
+            for (size_t f = 0; f < out_sizes[1]; ++f)
+                for (size_t y = 0; y < out_sizes[2]; ++y)
+                    for (size_t x = 0; x < out_sizes[3]; ++x)
+                        output[b][f][y][x] = input[0][b][f][y] == (T)x ? 1 : 0;
+        break;
+    default: break;
+    }
+    return output;
+}
+
+template <typename T>
+void generic_one_hot_test_int(cldnn::format test_input_fmt, int input_b, int input_f, int input_y, int input_x, tensor shape,
+    uint16_t one_hot_axis, int input_padding_y = 0, int input_padding_x = 0, int output_padding_y = 0, int output_padding_x = 0) {
+    std::vector<tensor::value_type> output_dims = { shape.batch[0], shape.feature[0],
+        shape.spatial[1], shape.spatial[0] };
+    int32_t one_hot_limit = output_dims[one_hot_axis];
+
+    int min_random = -2, max_random = one_hot_limit + 2;
+    VVVVF<T> input_rnd = generate_random_4d<T>(input_b, input_f, input_y, input_x, min_random, max_random);
+    VF<T> input_rnd_vec = flatten_4d<T>(test_input_fmt, input_rnd);
+
+    const auto& engine = get_test_engine();
+    tensor input_tensor(input_b, input_f, input_x, input_y);
+    auto input = memory::allocate(engine, { type_to_data_type<T>::value, test_input_fmt, input_tensor });
+    set_values(input, input_rnd_vec);
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(one_hot("output", "input", shape, one_hot_axis));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "output");
+
+    auto output_memory = outputs.at("output").get_memory();
+    auto output_layout = output_memory.get_layout();
+    auto output_ptr = output_memory.pointer<T>();
+
+    VVVVF<T> output_cpu = one_hot_cpu<T>(input_rnd, one_hot_axis, one_hot_limit, input_padding_y, input_padding_x, output_padding_y, output_padding_x);
+    EXPECT_EQ(output_layout.format.value, test_input_fmt.value);
+    tensor output_tensor = output_layout.get_buffer_size();
+    int y_size = output_tensor.spatial[1];
+    int x_size = output_tensor.spatial[0];
+    int f_size = output_tensor.feature[0];
+    int b_size = output_tensor.batch[0];
+    EXPECT_EQ(y_size, (int)output_cpu[0][0].size());
+    EXPECT_EQ(x_size, (int)output_cpu[0][0][0].size());
+    EXPECT_EQ(f_size, (int)output_cpu[0].size());
+    EXPECT_EQ(b_size, (int)output_cpu.size());
+
+
+    bool test_is_correct = true;
+    VF<T> output_cpu_vec = flatten_4d<T>(test_input_fmt, output_cpu);
+
+    for (size_t i = 0; i < output_cpu_vec.size(); ++i) {
+        if (output_cpu_vec[i] != output_ptr[i]) {
+            test_is_correct = false;
+            break;
+        }
+    }
+    EXPECT_EQ(test_is_correct, true) << std::endl
+        << "failing test parameters:" << std::endl
+        << "input_b = " << input_b << std::endl
+        << "input_f = " << input_f << std::endl
+        << "input_y = " << input_y << std::endl
+        << "input_x = " << input_x << std::endl
+        << "one_hot_limit = " << one_hot_limit << std::endl
+        << "one_hot_axis = " << one_hot_axis << std::endl
+        << "input_padding_y = " << input_padding_y << std::endl
+        << "input_padding_x = " << input_padding_x << std::endl
+        << "output_padding_y = " << output_padding_y << std::endl
+        << "output_padding_x = " << output_padding_x << std::endl;
+}
+
+TEST(one_hot_gpu_i32, generic_y_in10_oh5) {
+    generic_one_hot_test_int<int32_t>(format::bfyx, 1, 10, 10, 10, tensor(10, 10, 10, 5), 2);
+}
+
+
+TEST(one_hot_error, basic_error_wrong_batch_size) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::i32, format::bfyx, { 10, 1, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(one_hot("output", "input", tensor(10, 1, 1, 50), 2));
+
+    std::string msg_to_find = "Incorrect parameters configuration: input batch size should be equal to 1.";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
+
+TEST(one_hot_error, basic_error_wrong_axis) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(one_hot("output", "input", tensor(1, 1, 1, 50), 4));
+
+    std::string msg_to_find = "Incorrect parameters configuration: one_hot_axis should be less or equal to 3.";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
+
+TEST(one_hot_error, basic_error_bad_shape) {
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::i32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(one_hot("output", "input", tensor(1, 5, 1, 50), 2));
+
+    std::string msg_to_find = "Incorrect parameters configuration: shape does not fit input size.";
+    EXPECT_ANY_THROW(check_exception_massage(engine, topology, msg_to_find));
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/permute_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/permute_gpu_test.cpp
index 8f455ae87..80657c16c 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/permute_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/permute_gpu_test.cpp
@@ -25,7 +25,9 @@
 #include <api/CPP/engine.hpp>
 #include "test_utils/test_utils.h"
 #include <api/CPP/data.hpp>
-
+#include <api/CPP/fully_connected.hpp>
+#include <api/CPP/reshape.hpp>
+#include <api/CPP/crop.hpp>
 #include <cmath>
 #include <gmock/gmock.h>
 #include <limits>
@@ -34,7 +36,66 @@ using namespace cldnn;
 using namespace tests;
 using namespace testing;
 
-TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2)
+
+TEST(permute_gpu_f32, output_ordering_test)
+{
+    const auto& engine = get_test_engine();
+
+
+    std::vector<std::vector<int32_t>> input_tensors =
+    {
+        { 10, 5, 15, 2 },{ 2, 4, 6, 8 },{ 2, 2, 3, 2 },{ 9, 8, 7, 4 }
+    };
+    std::vector<std::vector<uint16_t>> permutations =
+    {
+        { 0, 1, 2, 3 }, //do nothing
+    { 0, 1, 3, 2 }, //replace x with y
+    { 1, 0, 3, 2 }, //replace b with f
+    { 0, 2, 3, 1 }  //big permutation
+    };
+    std::vector<format> input_formats = { format::bfyx, format::yxfb };
+
+    auto get_permutation = [&](const std::vector<int32_t>& inp1, const std::vector<uint16_t>& order)
+    {
+        EXPECT_EQ(inp1.size(), order.size());
+        std::vector<int32_t> output;
+        for (auto const& o : order)
+        {
+            output.push_back(inp1.at(o));
+        }
+        return output;
+    };
+
+    for (auto const& fr : input_formats)
+    {
+        for (auto const& inp_t : input_tensors)
+        {
+            for (auto const& perm : permutations)
+            {
+
+                auto input = memory::allocate(engine, { data_types::f32, fr, tensor(inp_t) });
+                topology topology(
+                    input_layout("input", input.get_layout()),
+                    permute("permute", "input", perm));
+
+                network network(engine, topology);
+                network.set_input_data("input", input);
+                auto outputs = network.execute();
+                auto output = outputs.at("permute");
+                auto output_mem = output.get_memory();
+                EXPECT_EQ(outputs.size(), size_t(1));
+                auto ref_tensor = get_permutation(inp_t, perm);
+                auto out_tensor = output_mem.get_layout().size;
+                EXPECT_EQ(out_tensor.batch[0], ref_tensor[0]);
+                EXPECT_EQ(out_tensor.feature[0], ref_tensor[1]);
+                EXPECT_EQ(out_tensor.spatial[0], ref_tensor[2]);
+                EXPECT_EQ(out_tensor.spatial[1], ref_tensor[3]);
+            }
+        }
+    }
+}
+
+TEST(permute_gpu_f32, basic_bfyx_permute_0_1_2_3)
 {
     //  Input               : bfyx:2x2x3x2
     //  Permute order       : { 0,1,3,2 }
@@ -45,7 +106,64 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2)
     //  f1: b0:  5    6   -15  b1:   1.5  5.2   -15
     //  f1: b0:  7    8   -15  b1:   12   8     -15
     //
+    //  Output = input
+
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
+
+    std::vector<float> values =
+    {
+        1.0f,  2.0f, -15.f,
+        3.0f,  4.0f, -15.f,
+
+        5.0f,  6.0f, -15.f,
+        7.0f,  8.0f, -15.f,
+
+        0.0f,  0.0f, -15.f,
+        0.5f, -0.5f, -15.f,
+
+        1.5f,  5.2f, -15.f,
+        12.0f, 8.0f, -15.f
+    };
+
+    set_values(input, values);
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        permute("permute", "input", { 0, 1, 2, 3 }));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "permute");
+
+    auto output = outputs.begin()->second.get_memory();
+
+
+    auto output_ptr = output.pointer<float>();
+    for (int i = 0; i < 24; i++)
+    {
+        EXPECT_FLOAT_EQ(values[i], output_ptr[i]);
+    }
+
+}
+
+TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2)
+{
+    //  Input               : bfyx:2x2x3x2
+    //  Permute order       : { 0,1,3,2 }
+    //
     //  Input:
+    //  f0: b0:  1    2   -15  b1:   0    0     -15
+    //  f0: b0:  3    4   -15  b1:   0.5 -0.5   -15
+    //  f1: b0:  5    6   -15  b1:   1.5  5.2   -15
+    //  f1: b0:  7    8   -15  b1:   12   8     -15
+    //
+    //  Output
     //  f0: b0:  1    3  b1:   0    0.5
     //  f0: b0:  2    4  b1:   0    -0.5
     //  f0: b0:  -15 -15 b1:   -15  -15
@@ -54,9 +172,9 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2)
     //  f1: b0:  -15 -15 b1:   -15   -15
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
 
     set_values(input, {
         1.0f,  2.0f, -15.f,
@@ -70,7 +188,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2)
 
         1.5f,  5.2f, -15.f,
         12.0f, 8.0f, -15.f,
-    });
+        });
 
     topology topology(
         input_layout("input", input.get_layout()),
@@ -111,56 +229,20 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2)
 
 }
 
-TEST(permute_gpu_f32, basic_yxfb_permute_3_2_0_1)
+TEST(permute_gpu_f32, basic_yxfb_permute_1_0_2_3)
 {
-    //  Input               : yxfb:2x2x2x2
-    //  Permute order       : { 3,2,0,1 }
-    //  Output padding      : 0x1
-    //
-    //  Input:
-    //  f0: b0:  1    2  b1:   0    0
-    //  f0: b0:  3    4  b1:   0.5 -0.5
-    //  f1: b0:  5    6  b1:   1.5  5.2
-    //  f1: b0:  7    8  b1:   12   8
-    //
-    //  Output:
-    //  b0 f0:  1    2
-    //  b0 f0:  3    4
-    //
-    //  b0 f1:  5    6
-    //  b0 f1:  7    8
-    //
-    //  b1 f0:  0    0
-    //  b1 f0: 0.5 -0.5
-    //
-    //  b1 f1: 1.5  5.2
-    //  b1 f1: 12    8
-    //
-
-    engine engine;
+    const auto& engine = get_test_engine();
 
-    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input_mem = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 100, 64, 1 } });
 
-    set_values(input, {
-        1.f, 0.f,
-        5.f, 1.5f,
-
-        2.f, 0.f,
-        6.f, 5.2f,
-
-        3.f, 0.5f,
-        7.f, 12.f,
-
-        4.f, -0.5f,
-        8.f, 8.f
-    });
+    tests::set_random_values<float>(input_mem);
 
     topology topology(
-        input_layout("input", input.get_layout()),
-        permute("permute", "input", { 3, 2, 0, 1 }, { { 0, 0, 1, 0}, 0 }));
+        input_layout("input", input_mem.get_layout()),
+        permute("permute", "input", { 1, 0, 2, 3 }));
 
     network network(engine, topology);
-    network.set_input_data("input", input);
+    network.set_input_data("input", input_mem);
 
     auto outputs = network.execute();
     EXPECT_EQ(outputs.size(), size_t(1));
@@ -168,22 +250,11 @@ TEST(permute_gpu_f32, basic_yxfb_permute_3_2_0_1)
 
     auto output = outputs.begin()->second.get_memory();
 
-    float answers[32] = {
-        0.0f, 0.0f, 0.0f, 0.0f, 
-        1.0f,  2.0f, 3.0f,  4.0f,
-        5.0f,  6.0f, 7.0f,  8.0f, 
-        0.0f, 0.0f, 0.0f, 0.0f,
-
-        0.0f, 0.0f, 0.0f, 0.0f,
-        0.0f, 0.0f, 0.5f, -0.5f,
-        1.5f, 5.2f, 12.0f, 8.0f,
-        0.0f, 0.0f, 0.0f, 0.0f
-    };
-
     auto output_ptr = output.pointer<float>();
-    for (int i = 0; i < 32; i++)
+    auto input_ptr = input_mem.pointer<float>();
+    for (int i = 0; i < 6400; i++)
     {
-        EXPECT_FLOAT_EQ(answers[i], output_ptr[i]);
+        EXPECT_FLOAT_EQ(input_ptr[i], output_ptr[i]);
     }
 
 }
@@ -209,7 +280,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2_input_padding)
     //  f1: b0:  -15 -15 b1:   -15   -15
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
 
@@ -225,7 +296,7 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2_input_padding)
 
         1.5f,  5.2f, -15.f,
         12.0f, 8.0f, -15.f,
-    });
+        });
 
     topology topology(
         input_layout("input", input.get_layout()),
@@ -267,10 +338,120 @@ TEST(permute_gpu_f32, basic_bfyx_permute_0_1_3_2_input_padding)
 
 }
 
+TEST(permute_gpu_f32, basic_yxfb_permute_batch_with_feature)
+{
+    //  Input               : yxfb:8x2x1x1
+    //  Permute order       : { 1, 0, 2, 3 }
+    //  Output              : yxfb:2x8x1x1
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 8, 2, 1, 1 } });
+
+    set_values(input, {
+        //b0 - b7 for f=0
+        1.f, 0.f, 5.f, 1.5f, 2.f, 0.f, 6.f, 5.2f,
+
+        //b0 - b7 for f=1
+        3.f, 0.5f, 7.f, 12.f, 4.f, -0.5f, 8.f, 8.f
+        });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        permute("permute", "input", { 1, 0, 2, 3 }));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "permute");
+
+    auto output = outputs.begin()->second.get_memory();
+    auto out_tensor = output.get_layout().size;
+    EXPECT_EQ(out_tensor.batch[0], 2);
+    EXPECT_EQ(out_tensor.feature[0], 8);
+    EXPECT_EQ(out_tensor.spatial[0], 1);
+    EXPECT_EQ(out_tensor.spatial[1], 1);
+
+    float answers[16] = {
+        1.0f, 3.0f,
+        0.0f, 0.5f,
+        5.f, 7.f,
+        1.5f, 12.f,
+        2.f, 4.f,
+        0.f, -0.5f,
+        6.f, 8.f,
+        5.2f, 8.f
+    };
+
+    auto output_ptr = output.pointer<float>();
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_FLOAT_EQ(answers[i], output_ptr[i]);
+    }
+
+}
+
+TEST(permute_gpu_f32, basic_bfyx_permute_batch_with_feature)
+{
+    //  Input               : yxfb:8x2x1x1
+    //  Permute order       : { 1, 0, 2, 3 }
+    //  Output              : yxfb:2x8x1x1
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 8, 1, 1 } });
+
+    set_values(input, {
+        //f0 - f7 for b=0
+        1.f, 0.f, 5.f, 1.5f, 2.f, 0.f, 6.f, 5.2f,
+
+        //f0 - f7 for b=1
+        3.f, 0.5f, 7.f, 12.f, 4.f, -0.5f, 8.f, 8.f
+        });
+
+    topology topology(
+        input_layout("input", input.get_layout()),
+        permute("permute", "input", { 1, 0, 2, 3 }));
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "permute");
+
+    auto output = outputs.begin()->second.get_memory();
+    auto out_tensor = output.get_layout().size;
+    EXPECT_EQ(out_tensor.batch[0], 8);
+    EXPECT_EQ(out_tensor.feature[0], 2);
+    EXPECT_EQ(out_tensor.spatial[0], 1);
+    EXPECT_EQ(out_tensor.spatial[1], 1);
+
+    float answers[16] = {
+        1.0f, 3.0f,
+        0.0f, 0.5f,
+        5.f, 7.f,
+        1.5f, 12.f,
+        2.f, 4.f,
+        0.f, -0.5f,
+        6.f, 8.f,
+        5.2f, 8.f
+    };
+
+    auto output_ptr = output.pointer<float>();
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_FLOAT_EQ(answers[i], output_ptr[i]);
+    }
+
+}
+
 template<data_types DType>
 void permute_test_with_reorder()
 {
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
 
@@ -286,7 +467,7 @@ void permute_test_with_reorder()
 
         1.0f,  5.0f, -15.f,
         12.0f, 8.0f, -15.f,
-    });
+        });
 
     topology topology(
         input_layout("input", input.get_layout()),
@@ -338,4 +519,102 @@ TEST(permute_gpu_i32, basic_bfyx_permute_0_1_3_2) {
 
 TEST(permute_gpu_i64, basic_bfyx_permute_0_1_3_2) {
     permute_test_with_reorder<data_types::i64>();
-}
-\ No newline at end of file
+}
+
+TEST(fc_permute_crop_gpu, basic_permute_yxfb)
+{
+    const auto& engine = get_test_engine();
+
+    auto input_mem = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 5, 1, 512 } });
+
+    //Topolgy creates permute which "repalces" the batch with the feature.
+    topology topology(
+        input_layout("input", input_mem.get_layout()),  // yxfb {1, 5, 1, 512 }}
+        permute("permute", "input", { 1, 0, 2, 3 })  // yxfb {5, 1, 1, 512}  --- without permute fix yxfb {1, 5, 512, 1}
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input_mem);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "permute");
+
+    auto output = outputs.begin()->second.get_memory();
+    auto out_tensor = output.get_layout().size;
+    EXPECT_EQ(out_tensor.batch[0], 5);
+    EXPECT_EQ(out_tensor.feature[0], 1);
+    EXPECT_EQ(out_tensor.spatial[0], 1);
+    EXPECT_EQ(out_tensor.spatial[1], 512);
+    EXPECT_EQ(output.get_layout().format, cldnn::format::yxfb);
+}
+
+TEST(fc_permute_crop_gpu, basic_0)
+{
+
+    const auto& engine = get_test_engine();
+
+    auto input_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 5, 11264, 1, 1 } });
+    auto weights_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 512, 11264, 1, 1 } });
+    auto bias_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 512, 1 } });
+
+    topology topology(
+        input_layout("input", input_mem.get_layout()),                   // bfyx {5, 11264, 1, 1}}
+        data("weights", weights_mem),
+        data("bias", bias_mem),
+        fully_connected("fully_connected", "input", "weights", "bias"),  // yxfb {5, 512, 1, 1}
+        reshape("reshape", "fully_connected", { 1, 5, 1, 512 }),           // yxfb {1, 5, 1, 512}
+        permute("permute", "reshape", { 1, 0, 2, 3 }),                     // yxfb {5, 1, 1, 512}        --- without permute fix yxfb {1, 5, 512, 1}
+        crop("crop", "permute", { 1, 1, 1, 512 }, { 4, 0, 0 ,0 })           // without permute fix it will fail "Tensor pitches didn't set correctly"
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input_mem);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "crop");
+
+    auto output = outputs.begin()->second.get_memory();
+    auto out_tensor = output.get_layout().size;
+    EXPECT_EQ(out_tensor.batch[0], 1);
+    EXPECT_EQ(out_tensor.feature[0], 1);
+    EXPECT_EQ(out_tensor.spatial[0], 1);
+    EXPECT_EQ(out_tensor.spatial[1], 512);
+    EXPECT_EQ(output.get_layout().format, cldnn::format::yxfb);
+}
+
+TEST(fc_permute_gpu, basic_permute_bfyx)
+{
+    const auto& engine = get_test_engine();
+
+    auto input_mem = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 5, 1, 256 } });
+
+    tests::set_random_values<float>(input_mem);
+
+    //Topolgy creates permute which "repalces" the batch with the feature.
+    topology topology(
+        input_layout("input", input_mem.get_layout()),
+        permute("permute", "input", { 1, 0, 2, 3 })
+    );
+
+    network network(engine, topology);
+    network.set_input_data("input", input_mem);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "permute");
+
+    auto output = outputs.begin()->second.get_memory();
+    auto out_tensor = output.get_layout().size;
+    EXPECT_EQ(out_tensor.batch[0], 5);
+    EXPECT_EQ(out_tensor.feature[0], 1);
+    EXPECT_EQ(out_tensor.spatial[0], 1);
+    EXPECT_EQ(out_tensor.spatial[1], 256);
+    EXPECT_EQ(output.get_layout().format, cldnn::format::bfyx);
+
+    auto input_ptr = input_mem.pointer<float>();
+    auto output_ptr = output.pointer<float>();
+    for (int i = 0; i < 5 * 256; i++)
+        EXPECT_NEAR(input_ptr[i], output_ptr[i], 1e-3f);
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp
index 711a5ec5a..3bcd27192 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/pooling_gpu_test.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2016 Intel Corporation
+// Copyright (c) 2016-2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -54,7 +54,7 @@ TEST(pooling_forward_gpu, basic_max_byxf_f32_wsiz3x3_wstr1x1_i1x3x3x8_nopad) {
     //  Expected output:
     //  [ 8.0, 0.0, 0.0, 4,0, 0,5, -0.5, -0.5, -0.5 ]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32,  format::byxf,{ 1, 8, 3, 3 } });
 
@@ -99,7 +99,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) {
     //  Expected output:
     //  [ 2.0]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32,  format::yxfb, { 1, 1, 3, 3 } });
 
@@ -122,9 +122,47 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz3x3_wstr1x1_i3x3x1x1_nopad) {
     EXPECT_EQ(2.0f, output_ptr[0]);
 }
 
+TEST(pooling_forward_gpu, basic_max_yxfb_f32_global_i3x3x1x1_nopad) {
+    //  Brief test description.
+    //
+    //  Pool mode: max
+    //  Global pooling: true
+    //  Padding: none
+    //
+    //  Input data:
+    //  [-0.5,  1.0,  0.5]
+    //  [ 2.0,  1.5, -0.5]
+    //  [ 0.0, -1.0,  0.5]
+    //
+    //  Expected output:
+    //  [ 2.0]
+
+    const auto& engine = get_test_engine();
+
+    auto input_prim = memory::allocate(engine, { data_types::f32,  format::yxfb,{ 1, 1, 3, 3 } });
+
+    topology topology;
+    topology.add(input_layout("input_prim", input_prim.get_layout()));
+    topology.add(pooling("pool_prim", "input_prim", pooling_mode::max));
+
+    network network(engine, topology);
+    set_values(input_prim, { -0.5f, 1.0f, 0.5f, 2.0f, 1.5f, -0.5f, 0.0f, -1.0f, 0.5f });
+    network.set_input_data("input_prim", input_prim);
+
+    auto outputs = network.execute();
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "pool_prim");
+
+    auto output_prim = outputs.begin()->second.get_memory();
+
+    auto output_ptr = output_prim.pointer<float>();
+
+    EXPECT_EQ(2.0f, output_ptr[0]);
+}
+
 TEST(pooling_forward_gpu, basic_max_pooling_int8) {
 
-    engine engine;
+    const auto& engine = get_test_engine();
     layout in_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,3,3 } };
     layout out_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,1,1 } };
     layout byte_layout = { type_to_data_type<int8_t>::value, format::bfyx,{ 1,1,3,3 } };
@@ -171,7 +209,7 @@ TEST(pooling_forward_gpu, basic_max_pooling_int8) {
 
 TEST(pooling_forward_gpu, basic_avg_pooling_int8) {
 
-    engine engine;
+    const auto& engine = get_test_engine();
     layout in_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,3,3 } };
     layout out_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,1,1 } };
     layout byte_layout = { type_to_data_type<int8_t>::value, format::bfyx,{ 1,1,3,3 } };
@@ -235,7 +273,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) {
     //  [ 2.0,  1.5]
     //  [ 2.0,  1.5]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
 
@@ -279,7 +317,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr2x2_i4x4x1x1_nopad) {
     //  [ 2.0,  0.5]
     //  [ 0.5,  0.5]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 4, 4 } });
 
@@ -333,7 +371,7 @@ TEST(pooling_forward_gpu, basic_max_yxfb_f32_wsiz2x2_wstr1x1_i3x3x2x2_nopad) {
     //  [ 0.5,  1.0]         [ 1.0,  0.5]
     //  [-0.5,  1.5]         [ 1.0,  0.0]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 3 } });
 
@@ -383,7 +421,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad)
     //  [ 1.5, -0.5]
     //  [   -1, 0.5]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 
@@ -428,7 +466,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad)
     //  [ 1.5,  -0.5]
     //  [   1,  -0.5]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
 
@@ -477,7 +515,7 @@ TEST(pooling_forward_gpu, basic_avg_yxfb_f32_wsiz2x2_wstr1x1_i3x3x1x1_nopad) {
     //  [ 1.0,   0.625]
     //  [ 1.625, 0.875]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
 
@@ -522,7 +560,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i2x2x1x1_zeropad)
     //  [ 0.375, -0.125]
     //  [ -0.25,  0.125]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 2, 2 } });
 
@@ -567,7 +605,7 @@ TEST(pooling_forward_gpu, offsets_avg_bfyx_f32_wsiz3x3_wstr3x3_i1x1x3x3_zeropad)
     //  [ 0.177777, -0.133333]
     //  [ 0.333333,  0.55]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 3, 3 } });
 
@@ -615,7 +653,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_f32_wsiz2x2_wstr2x2_i3x3x1x1_zeropad)
     //  [  0.375,    0.5]
     //  [ -0.125, -1.125]
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input_prim = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 3, 3 } });
 
@@ -664,7 +702,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_out
     //  [0, 0, 0, 0, 0, 0]
     //  [0, 0, 0, 0, 0, 0]
 
-    engine engine;
+    const auto& engine = get_test_engine();
     std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
 
     for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
@@ -725,7 +763,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_out
     //  [0, 1, -0.5, 0, 0]
     //  [0, 0, 0, 0, 0]
 
-    engine engine;
+    const auto& engine = get_test_engine();
     std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
 
     for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
@@ -795,7 +833,7 @@ TEST(pooling_forward_gpu, offsets_avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inp
     //  [0, 0, 0, 0, 0, 0]
     //  [0, 0, 0, 0, 0, 0]
 
-    engine engine;
+    const auto& engine = get_test_engine();
     std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
 
     for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
@@ -858,7 +896,7 @@ TEST(pooling_forward_gpu, offsets_max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inp
     //  [0, 1, -0.5, 0]
     //  [0, 0, 0, 0, 0]
 
-    engine engine;
+    const auto& engine = get_test_engine();
     std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
 
     for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
@@ -929,7 +967,7 @@ TEST(pooling_forward_gpu, avg_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i2x2x1x1_inpad2x1_ou
     //  [0, 0, 0, 0, 0, 0]
     //  [0, 0, 0, 0, 0, 0]
 
-    engine engine;
+    const auto& engine = get_test_engine();
     std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
 
     for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
@@ -997,7 +1035,7 @@ TEST(pooling_forward_gpu, max_yxfb_bfyx_f32_wsiz2x2_wstr2x2_i3x3x1x1_inpad2x1_ou
     //  [0, 12, 14, 16, 0]
     //  [0, 0, 0, 0, 0]
 
-    engine engine;
+    const auto& engine = get_test_engine();
     std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
 
     for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
@@ -1067,7 +1105,7 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax) {
     //  f1: b0:  10  11   b1:   21    23
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
     auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
@@ -1146,7 +1184,7 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_input_padding) {
     //  f1: b0:  10  11   b1:   21    23
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
     auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
@@ -1226,7 +1264,7 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_output_padding) {
     //  f1: b0:  10  11   b1:   21    23
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
     auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
@@ -1316,7 +1354,7 @@ TEST(pooling_forward_gpu, basic_in2x2x3x2_max_with_argmax_with_output_size) {
     //  f1: b0:  10  11   b1:   21    23
 
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
     auto arg_max = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 1 } });
@@ -1500,6 +1538,133 @@ TEST(pooling_forward_gpu, yxfb_average_without_padding_i1x1_w3x3_s1x1_o1x1_fp16)
     generic_average_wo_padding_test<FLOAT16>(format::yxfb, spatial(1, 1), spatial(1, 1), spatial(3, 3), tensor{ 0,0,1,1 }, tensor{ 0,0,-1,-1 });
 }
 
+TEST(pooling_forward_gpu, b_fs_yx_fsv4)
+{
+    int B_array[] = {  16,    4, 0 };  // Batch
+    int F_array[] = {  64, 2048, 0 };  // Features
+    int I_array[] = { 112,    7, 0 };  // Input MxM data sizes
+    int W_array[] = {   7,    3, 0 };  // Filter (a-ka weights) sizes
+    int S_array[] = {   1,    2, 0 };  // Strides
+    for (int j = 0; F_array[j]; j++) {
+        int in_B = B_array[j];
+
+        int in_F = F_array[j];
+
+        int in_X = I_array[j],
+            in_Y = in_X;
+
+        int W_X = W_array[j],
+            W_Y = W_X;
+
+        int S_X = S_array[j],
+            S_Y = S_X;
+
+        // Input data init
+        std::vector<char> Data(in_B * in_F * in_X * in_Y);
+        for (size_t i = 0; i < Data.size(); i++)
+            Data[i] = static_cast<char>(i);
+        std::vector<char> DataGold(Data);
+
+        // Expected "gold" output and IMAD output.
+        std::vector<char>  vGoldOutput;
+        std::vector<char>  vTestOutput;
+
+        engine   engine;
+
+        // "Golden" Pooling
+        {
+            // Mem initialization
+            // This is user data, no kernels here
+            auto input = memory::allocate(engine,
+                                          { data_types::i8,
+                                              format::bfyx,
+                                              { in_B, in_F, in_X, in_Y } });
+            set_values(input, std::move(DataGold));
+
+            auto pool = pooling("pool_GOLD",
+                                 "input",
+                                 pooling_mode::max,
+                                 { 1, 1, W_X, W_Y },  // kernel_size
+                                 { 1, 1, S_X, S_Y }); // stride
+
+            // Create a topology with a simple Convolution layer
+            topology topology(input_layout("input", input.get_layout()),
+                              pool);
+
+            // Network processing
+            network network(engine, topology);
+            network.set_input_data("input", input);
+            //network_exe(network, vGoldOutput, "pool_GOLD");
+            auto outputs = network.execute();
+            auto searchC = outputs.find("pool_GOLD");
+            ASSERT_FALSE(searchC == outputs.end());
+            auto output = outputs.begin()->second.get_memory();
+            auto output_ptr = output.pointer<char>();
+            vGoldOutput.reserve(output_ptr.size());
+            for (size_t i = 0; i < output_ptr.size(); i++)
+                vGoldOutput.push_back(output_ptr[i]);
+        }
+
+        //
+        // IMAD Pooling
+        //
+        {
+            topology topology;
+
+            // Mem initialization
+            // This is user data, no kernels here
+            auto input = memory::allocate(engine,
+                                          { data_types::i8,
+                                              format::bfyx,
+                                              { in_B, in_F, in_X, in_Y } });
+            set_values(input, std::move(Data));
+
+            // Add input to topology
+            topology.add(
+                input_layout("input", input.get_layout()));
+
+            // Reorder (a-ka swizzelling) input to MMAD/IMAD Pooling format
+            topology.add(reorder("reorder_Swizzelled",
+                         "input",
+                         layout(data_types::i8,
+                                format::b_fs_yx_fsv4,
+                                { in_B, in_F, in_X, in_Y })));
+
+            // Add Convoluiton to topology
+            topology.add(pooling("pool_IMAD",
+                                 "reorder_Swizzelled",
+                                 pooling_mode::max,
+                                 { 1, 1, W_X, W_Y },  // kernel_size
+                                 { 1, 1, S_X, S_Y })); // stride
+
+            // Back reordering (a-ka unswizzelling) output from MMAD/IMAD pooling
+            topology.add(reorder("reorder_UnSwizzelled",
+                                 "pool_IMAD",
+                                 layout(data_types::i8,
+                                        format::bfyx,
+                                        { in_B, in_F, in_X, in_Y })));
+
+            network network(engine, topology);
+            network.set_input_data("input", input);
+            //network_exe(network, vTestOutput, "reorder_UnSwizzelled");
+            auto outputs = network.execute();
+            auto searchC = outputs.find("reorder_UnSwizzelled");
+            ASSERT_FALSE(searchC == outputs.end());
+            auto output = outputs.begin()->second.get_memory();
+            auto output_ptr = output.pointer<char>();
+            vTestOutput.reserve(output_ptr.size());
+            for (size_t i = 0; i < output_ptr.size(); i++)
+                vTestOutput.push_back(output_ptr[i]);
+        }
+
+        // Result validation
+        ASSERT_TRUE(vGoldOutput.size() == vTestOutput.size());
+        for (size_t i = 0; i < vGoldOutput.size(); i++)
+            ASSERT_TRUE(vTestOutput[i] == vGoldOutput[i]);
+
+    } // for (int j = 0; F_array[j]; i++)
+}
+
 
 class pooling_test : public tests::generic_test
 {
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/propagate_constants_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/propagate_constants_gpu_test.cpp
new file mode 100644
index 000000000..673e7eaf9
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/propagate_constants_gpu_test.cpp
@@ -0,0 +1,69 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include <gtest/gtest.h>
+#include "api/CPP/memory.hpp"
+#include <api/CPP/input_layout.hpp>
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+#include <api/CPP/engine.hpp>
+#include "test_utils/test_utils.h"
+#include <api/CPP/concatenation.hpp>
+#include <api/CPP/reorder.hpp>
+#include <api/CPP/data.hpp>
+#include <api/CPP/reshape.hpp>
+
+using namespace cldnn;
+using namespace tests;
+
+//We expect additional reorder to be added in between "weights1" and "reshape1".
+//This situation should be handled properly by propagate constants optimization phase
+TEST(propagate_constants, copy_dependecies_from_nodes) {
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    build_opt.set_option(build_option::optimize_data(true));
+
+    auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto weights1 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 1 } });
+    auto weights2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
+
+    set_values(input, { FLOAT16(1.1f), FLOAT16(1.2f), FLOAT16(1.3f), FLOAT16(1.4f) });
+    set_values(weights1, { FLOAT16(2.1f), FLOAT16(3.1f) });
+    set_values(weights2, { 1.1f, 0.1f });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("weights1", weights1));
+    topology.add(data("weights2", weights2));
+    topology.add(reshape("reshape1", "weights1", tensor(spatial(1, 2))));
+    topology.add(reorder("reorder2", "input", layout(data_types::f32, format::byxf, 4)));
+    topology.add(reorder("reorder1", "reshape1", layout(data_types::f32, format::byxf, 4)));
+    topology.add(concatenation("concat", { "reorder1", "weights2" }, concatenation::along_x));
+    topology.add(convolution("conv2", { "reorder2" }, { "concat" }));
+    network network(engine, topology, build_opt);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    float epsilon = 1e-2f;
+    for (auto& it : outputs)
+    {
+        auto output = it.second.get_memory().pointer<float>();
+        EXPECT_NEAR(7.8f, output[0], epsilon);
+    }
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/proposal_cpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/proposal_cpu_test.cpp
index 93e36a6e0..c1b818df7 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/proposal_cpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/proposal_cpu_test.cpp
@@ -94,7 +94,7 @@ template <typename Dtype>
 TestRunnerProposal<Dtype>::TestRunnerProposal() :
                             _cls_scores_layout(cldnn::type_to_data_type<Dtype>::value, format::bfyx, { 1, 18, 23, 14 } ),
                             _bbox_pred_layout(cldnn::type_to_data_type<Dtype>::value, format::bfyx, { 1, 36, 23, 14 } ),
-                            _image_info_layout(cldnn::type_to_data_type<Dtype>::value, format::bfyx, { 1, 1, 3, 1 } ),
+                            _image_info_layout(cldnn::type_to_data_type<Dtype>::value, format::bfyx, { 1, 3, 1, 1 } ),
                             _test_layer(layer_name, 
                                         cls_scores_name, 
                                         bbox_pred_name,
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp
new file mode 100644
index 000000000..db7a9d27a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/pyramid_roi_align_gpu_test.cpp
@@ -0,0 +1,191 @@
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+
+#include <api/CPP/engine.hpp>
+#include <api/CPP/input_layout.hpp>
+#include <api/CPP/pyramid_roi_align.hpp>
+#include <api/CPP/memory.hpp>
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+#include <api/CPP/mutable_data.hpp>
+
+#include "test_utils/test_utils.h"
+
+
+using namespace cldnn;
+using namespace tests;
+
+enum Test_index { //order the same as test_data table
+    BOXES = 0,
+    IMAGE_META,
+    P5,
+    P4,
+    P3,
+    P2,
+    POOL
+};
+
+struct Test_data
+{
+    const char *parameter_name;
+    int32_t b, f, x, y;
+    std::vector<float> dataTMP;
+};
+
+Test_data test_data [] = //order the same as enum Test_index
+{
+    {
+        "boxes",
+        1, 1, 1, 4,
+        { 0.274695277f, 0.39985016f, 0.751607299f, 0.649529517f }
+    },
+    {   
+        "image_meta",
+        1, 1, 1, 93,
+        { 0, 415, 640, 3, 1024, 1024, 3, 180, 0, 844, 1024, 1.6f }
+    },
+    {
+        "P5",
+        1, 1, 32, 32,
+        {
+            -2.33415818f, -1.46765602f,  -0.998123348f, -0.945146739f, -0.721071541f, -1.19279253f, -1.37023795f,   -1.61545324f,  -2.03868198f,  -1.72659981f,  -1.5598495f,   -1.55309856f,  -1.53211606f, -1.86645496f, -1.84540808f,   -1.68674099f, -1.60733783f, -1.43271363f, -1.37408626f,  -1.35044777f, -1.25868618f, -0.965965867f, -0.881696165f, -0.709434509f, -0.494760394f, -0.482933104f, -1.26238084f, -1.45486391f, -1.00801146f, -0.840218246f, -0.420806766f, 0.635412455f,
+            -5.15252113f, -4.81609535f,  -4.33736563f,  -4.5069356f,   -4.69305611f,  -5.35324192f, -5.4090085f,    -5.18345022f,  -5.57966137f,  -6.08182287f,  -6.4237361f,   -6.63587379f,  -6.60395145f, -6.99704218f, -7.26061678f,   -7.13621283f, -6.92309761f, -6.54043388f, -6.0931859f,   -5.95154953f, -5.92886162f, -5.60794735f,  -5.39521217f,  -5.24937916f,  -4.93126583f,  -5.03314447f,  -6.35518694f, -5.97401428f, -4.61507177f, -3.88595009f,  -3.10539627f, -1.12507141f,
+            -4.58263206f, -4.23551846f,  -3.71995449f,  -3.9303925f,   -4.22284889f,  -4.90389252f, -4.90515423f,   -4.35046101f,  -4.93061686f,  -5.62805653f,  -7.13111687f,  -8.04961014f,  -8.61973f,    -8.91128826f, -9.59987259f,   -9.77626991f, -9.34930134f, -8.41235256f, -7.99330997f,  -7.56377172f, -7.41074753f, -7.68792772f, -7.52159262f, -7.23604727f, -6.43461895f, -6.30558538f, -7.42862511f, -6.57217264f, -4.36673212f, -3.42791319f, -2.78279519f, -1.13899291f,
+            -4.05928659f, -3.63066411f,  -3.08045626f,  -3.49022269f,  -3.33089471f,  -3.98461342f, -3.60927105f,   -3.47735429f,  -4.22189903f,  -5.61483288f,  -6.73310328f,  -7.82119894f,  -7.76680946f, -7.81351185f, -8.53846359f,   -8.85490894f, -8.87630653f, -8.05196667f, -7.37027693f,  -6.48965073f, -6.0011878f, -6.49297428f, -6.87221718f, -6.6889801f, -5.67975998f, -5.48370981f, -6.48479271f, -5.99923038f, -4.15075731f, -3.24771428f, -2.38959575f, -0.802779257f,
+            -3.8221159f,  -3.2125051f,   -2.67735672f,  -3.35456967f,  -2.42953777f,  -1.97508657f, -0.0455740131f, 0.200172856f,  -1.73673642f,  -4.14228773f,  -6.05798674f,  -6.92922974f,  -6.31088972f, -5.24032164f, -5.8104291f,    -6.21769142f, -6.71948385f, -6.34254694f, -5.40050459f,  -3.83635306f, -2.84016895f, -3.47709227f, -4.53029394f, -4.79398346f, -4.15029287f, -4.34026718f, -5.05020094f, -4.96476984f, -3.85935163f, -3.06635952f, -2.21780515f, -0.550920606f,
+            -3.38425207f, -2.47040701f,  -1.75725257f,  -2.67789435f,  -1.93510687f,  -0.023562137f, 3.12235284f,   3.195858f,     -0.502758205f, -3.64130497f,  -4.92483091f,  -5.37235212f,  -4.44142771f, -3.01087427f, -2.56460142f,   -3.36131048f, -4.67883253f, -4.97649288f, -4.15489054f,  -3.05888772f, -2.53061557f, -2.89280939f, -3.89569187f, -3.85883617f, -3.85448074f, -3.72637963f, -4.17853975f, -3.72458243f, -3.2028439f, -2.26282644f, -1.57095635f, -0.0362351872f,
+            -2.86179805f, -1.77212584f,  -1.01908028f,  -2.22856259f,  -2.04378486f,  -0.389851034f, 2.5954473f,    3.546386f,     -0.572356939f, -3.22942686f,  -4.71709538f,  -5.06511068f,  -4.19580078f, -2.62281418f, -1.84743559f,   -1.72474909f, -2.85398459f, -3.05193329f, -2.1715126f,   -1.87324941f, -2.42470956f, -3.27851868f, -4.05942631f, -3.64058971f, -3.65105247f, -3.37935495f, -3.88859773f, -3.24483466f, -2.69226313f, -1.51380038f, -0.803811312f, 0.575846195f,
+            -2.44617772f, -1.21146309f,  -0.406607807f, -1.79564178f,  -2.15529561f,  -1.86219978f, -0.642769337f,  -0.119694829f, -3.55873179f,  -6.07527542f,  -7.34461832f,  -7.5732069f,   -5.2650032f,  -2.78443551f, -2.01951551f,   -2.20919466f, -3.48502755f, -3.39159703f, -2.84414029f,  -3.01556158f, -4.17538118f, -4.6715436f, -4.51803017f, -3.98833418f, -4.03647232f, -3.56217432f, -4.35153055f, -3.35357046f, -2.34758973f, -0.991552889f, -0.410246134f, 0.853578329f,
+            -2.32879257f, -0.983750522f, -0.21862191f,  -1.63332736f,  -2.70467782f,  -3.79070854f, -3.12105083f,   -3.37172794f,  -5.87286377f,  -7.56662798f,  -8.18826008f,  -7.51929522f,  -5.9531951f,  -4.06868601f, -2.65765858f,   -2.80148482f, -4.28907013f, -4.32930136f, -4.3640132f,   -4.59029436f, -5.4193697f, -5.89368916f, -5.6321454f, -5.52998543f, -5.09114599f, -3.59506583f, -3.95068288f, -3.30025363f, -2.04802871f, -0.637728035f, -0.245602071f, 0.879402578f,
+            -2.35637832f, -0.938572884f, -0.137476623f, -1.41782618f,  -2.65590358f,  -4.25014019f, -4.0826478f,    -4.17878771f,  -5.6027894f,   -7.31306791f,  -7.89162493f,  -7.03756762f,  -6.09949017f, -5.60607052f, -4.94666481f,   -4.39400244f, -4.67201567f, -4.2205472f,  -4.38528776f,  -4.6779213f, -4.83282471f, -4.84141684f, -4.65654802f, -4.24497604f, -3.85145688f, -2.74431086f, -3.78755236f, -3.00524449f, -1.81372464f, -0.552992642f, -0.150228053f, 0.944489419f,
+            -2.39807153f, -0.961493254f, -0.207601368f, -1.41579533f,  -2.26456952f,  -3.31752872f, -2.37754416f,   -2.27816534f,  -3.3359437f,   -4.83316755f,  -4.82455635f,  -5.1267004f,   -4.75627851f, -6.18640566f, -7.98392439f,   -9.12876225f, -8.12104893f, -7.43801117f, -5.90858698f,  -3.8132503f, -2.49779272f, -2.64403725f, -2.50610948f, -2.27564049f, -2.08231401f, -2.0385685f, -3.72143364f, -3.04797244f, -1.76300609f, -0.521960258f, -0.0881003886f, 0.961502016f,
+            -2.44038081f, -1.01705039f,  -0.289608359f, -1.37090492f,  -1.93311131f,  -2.47754407f, -1.31518912f,   -0.804416537f, -0.930097163f, -0.780354142f, -0.834263086f, -1.50460267f,  -3.63839531f, -4.60880566f, -6.8964262f,    -8.66131878f, -9.60757637f, -8.79116344f, -6.86388493f,  -4.30527639f, -1.8283174f, -1.4908253f, -1.37629032f, -1.22827542f, -1.60703599f, -2.33176303f, -3.86254454f, -2.99731207f, -1.65976918f, -0.461797535f, 0.0194968097f, 0.998998225f,
+            -2.46240711f, -1.03391945f,  -0.35371244f,  -1.40552509f,  -1.92847848f,  -2.80441093f, -1.44593406f,   -0.652132452f, -0.4637236f,   -0.377687186f, -0.223660469f, -1.29031694f,  -2.68966746f, -3.15799189f, -3.18843555f,   -4.4910984f, -6.69606543f, -8.33802032f,  -8.19927311f,  -6.32680511f, -3.98862648f, -2.22264123f, -1.55090904f, -1.1854068f, -1.3106786f, -1.90384912f, -3.67234707f, -2.88272882f, -1.53641987f, -0.362456888f, 0.0893754214f, 1.02051163f,
+            -2.48206067f, -1.02961993f,  -0.368244141f, -1.42910719f,  -1.93446803f,  -2.968822f,   -1.83339584f,   -1.077631f,    -1.20465982f,  -1.57803464f,  -1.41360343f,  -1.76699162f,  -2.31551576f, -2.05016136f, -0.0285568349f, 1.02111804f, -1.09839404f, -3.57055283f,  -6.42463684f,  -6.38169003f, -6.04913425f, -3.92720795f, -2.87601185f, -2.27725315f, -1.91104662f, -1.94828415f, -3.19035602f, -2.59298229f, -1.44278193f, -0.386298746f, 0.0836858153f, 0.999346912f,
+            -2.48712945f, -1.01729345f,  -0.474304944f, -1.67669559f,  -2.10705042f,  -3.42592764f, -2.34152699f,   -1.83562672f,  -1.90750253f,  -2.23259664f,  -1.80318487f,  -2.05461431f,  -2.2218473f,  -1.68138134f,  1.89481843f,   4.749331f,    4.48664188f,  1.76011801f,  -2.80741739f,  -5.01609373f, -6.86733389f, -4.95238161f, -3.11620855f, -2.35959673f, -2.14903998f, -2.22679043f, -3.25020576f, -2.55579758f, -1.45884585f, -0.450649738f, 0.0580532737f, 0.980433941f,
+            -2.5185082f,  -1.06924045f,  -0.577468932f, -1.7359041f,   -2.2522819f,   -3.44346404f, -2.27338934f,   -1.50737846f,  -1.4048748f,   -1.7626915f,   -1.77618313f,  -2.55145335f,  -2.72144723f, -1.09168231f,  3.47705436f,   7.27473307f,  7.77128983f,  4.76851988f,  -0.231550142f, -4.59473372f, -7.91270256f, -5.9186945f, -3.17887211f, -1.95729899f, -2.12510371f, -2.66853952f, -3.79930806f, -2.93926597f, -1.47657454f, -0.51107496f, 0.0374269597f, 0.9673509f,
+            -2.57245374f, -1.16771162f,  -0.721676588f, -1.80981266f,  -2.38730669f,  -3.6522367f,  -2.01576495f,   -0.8515746f,   -0.121799529f, -1.13752592f,  -1.98465598f,  -3.21510339f,  -3.90218043f, -1.90408611f,  3.62870288f,   9.53359127f,  12.2969809f,  9.25624657f,  3.08819818f,   -3.57391787f, -8.53378582f, -6.41586733f, -3.14953685f, -1.97396016f, -2.7328465f, -3.78186893f, -4.93579912f, -3.55470729f, -1.54245102f, -0.482002735f, 0.0237279348f, 0.970623732f,
+            -2.6402328f,  -1.25508213f,  -0.813264728f, -1.85111022f,  -2.31478047f,  -3.37323236f, -1.72119153f,   -0.622631073f,  0.275214434f, -1.74099112f,  -3.82077885f,  -5.72362041f,  -7.07592487f, -5.2477479f,   1.65343058f,   9.84803104f,  13.9755783f,  12.027339f,   6.53266191f,   0.243630022f, -4.9232049f, -4.36105299f, -1.71283042f, -1.22028506f, -2.47615337f, -3.96648002f, -4.9211669f, -3.52139068f, -1.58175361f, -0.453389883f, 0.0172070079f, 0.974586606f,
+            -2.69985747f, -1.30426204f,  -0.813042939f, -1.84938121f,  -2.33455706f,  -3.75564861f, -2.54689479f,   -2.26757884f,  -1.79824364f,  -2.93493605f,  -4.15734148f,  -4.67264462f,  -5.97829533f, -6.07628202f, -0.634435117f,  7.86048698f,  13.385828f,   13.8827438f,  9.38942051f,   3.89634967f, -1.39140749f, -2.39509726f, -1.62092125f, -1.5939455f, -2.25631547f, -3.52288079f, -4.53593159f, -3.25450349f, -1.60031211f, -0.435814232f, 0.0219062977f, 0.986854315f,
+            -2.74063468f, -1.31302822f,  -0.820956767f, -1.81994605f,  -2.28283525f,  -3.5440836f,  -2.51103139f,   -2.81304479f,  -3.26139283f,  -3.37517047f,  -3.98655128f,  -4.15412378f,  -4.92545223f, -5.78675413f, -3.06408238f,   3.01499391f,  8.77478504f,  10.6144304f,  8.11615849f,   4.45580721f, 0.623039126f, -1.10865057f, -1.95774138f, -2.36074567f, -2.57845926f, -3.33297563f, -3.97079587f, -2.93356919f, -1.50071633f, -0.443875313f, 0.0236797072f, 0.991317093f,
+            -2.77299833f, -1.32691216f,  -0.831916511f, -1.82886219f,  -2.0734787f,   -3.13335371f, -1.50032151f,   -1.46733963f,  -2.72959828f,  -3.5253818f,   -4.29566097f,  -5.57419872f,  -6.24431992f, -6.32591867f, -5.26826477f,   -3.04502487f, 0.449693143f, 3.47979259f,  3.50362659f,   2.58046269f, 0.579684913f, -0.919588447f, -2.08200479f, -2.6678884f, -2.59757757f, -3.0013814f, -3.42182064f, -2.75994992f, -1.48684669f, -0.477065891f, 0.0327885784f, 0.994787097f,
+            -2.7904563f,  -1.33298481f,  -0.825692832f, -1.78411806f,  -1.98032236f,  -2.94529605f, -1.540254f,     -1.03917682f,  -1.87087965f,  -2.15394163f,  -2.24386406f,  -1.56417131f,  -1.79924405f, -2.09344101f, -3.65430427f,   -4.66693974f, -4.27157164f, -1.08878291f, -0.221785039f, -0.0799107477f, -0.684955359f, -1.22172666f, -1.90416121f, -2.04627061f, -2.09932423f, -2.7114203f, -3.33123398f, -2.65206981f, -1.4748162f, -0.431342453f, 0.0863730982f, 1.03362691f,
+            -2.80970526f, -1.32318377f,  -0.788406253f, -1.62803352f,  -1.83336627f,  -2.71299958f, -1.29830825f,   -0.898415565f, -1.27306414f,  -1.4642626f,   -1.53942132f,  -0.524312437f, -1.13679814f, -2.15964532f, -3.81581545f,   -6.19301414f, -6.9342289f,  -4.5518117f,  -4.05187798f,  -3.89661026f, -2.73003149f, -1.90081847f, -1.18712986f, -1.05476069f, -1.45352709f, -2.40461349f, -3.57806826f, -2.67894101f, -1.34701252f, -0.292546421f, 0.223820776f, 1.15115368f,
+            -2.83941913f, -1.31946158f,  -0.752137005f, -1.59541857f,  -1.98224044f,  -3.13006711f, -2.87664342f,   -2.74078941f,  -2.44921613f,  -1.53203559f,  -1.11937928f,  -0.268255889f, -1.06444466f, -2.87781739f, -4.91630268f,   -8.23729324f, -10.6890593f, -10.1742487f, -8.88589478f,  -7.06334209f, -4.42162704f, -2.8048737f, -0.9670524f, -0.169980749f, -0.62598449f, -1.46366549f, -3.44733119f, -2.70727062f, -1.12550855f, 0.0431886837f, 0.491125584f, 1.39527845f,
+            -2.88625073f, -1.36332977f,  -0.782323718f, -1.70872879f,  -2.29862785f,  -3.65832949f, -3.41763759f,   -2.27270484f,  -1.15727568f,  -0.485867918f, -0.534794629f, -0.99851644f,  -1.86469233f, -3.56163645f, -6.06065321f,   -8.93986511f, -11.1936483f, -11.16537f,   -9.42015839f,  -7.1612606f, -4.54605007f, -3.13340139f, -1.05612564f, -0.218226328f, -0.347539067f, -0.917124569f, -3.23879743f, -2.66016054f, -1.1019274f, 0.280594468f, 0.802835882f, 1.70916617f,
+            -2.95432734f, -1.55732679f,  -0.9671579f,   -1.87740719f,  -2.52375722f,  -3.9269383f,  -3.63090515f,   -2.16633034f,  -1.57592404f,  -1.65385628f,  -2.63003421f,  -3.4876802f,   -4.29189682f, -4.7487464f,  -5.76429272f,   -6.65200949f, -7.45039988f, -7.22736359f, -6.15258741f,  -5.31453133f, -3.85754275f, -3.2067554f, -1.73008275f, -1.35701323f, -1.16924942f, -1.25322843f, -3.28507686f, -2.95321226f, -1.38456213f, 0.187379554f, 0.978641272f, 1.96348953f,
+            -3.11177945f, -1.80547488f,  -1.13023674f,  -1.9582721f,   -2.37351155f,  -3.67039227f, -3.1937058f,    -2.27774191f,  -2.11655211f,  -2.92763114f,  -3.51109672f,  -4.43897057f,  -4.60774946f, -5.22836876f, -5.26246691f,   -5.41725492f, -5.64507723f, -5.44532156f, -5.25552511f,  -5.40288162f, -4.75492859f, -4.50234127f, -3.85268068f, -3.71338868f, -3.31360817f, -3.09147811f, -4.54734945f, -3.58751845f, -1.86106849f, -0.0580402128f, 0.987123847f, 2.12943125f,
+            -3.36467028f, -2.15916252f,  -1.39851403f,  -2.21555972f,  -2.6277256f,   -3.89018989f, -3.28536391f,   -2.4179709f,   -2.31355095f,  -3.14865518f,  -3.84860849f,  -4.44453287f,  -4.50857449f, -4.88197565f, -4.95770359f,   -5.04250717f, -4.74955845f, -4.8034606f,  -4.87089396f,  -5.45653677f, -5.71883726f, -5.90324974f, -5.92616558f, -5.50277519f, -5.18182898f, -5.07875252f, -6.3301487f, -4.71556807f, -2.65147376f, -0.510522306f, 0.768599629f, 2.15899801f,
+            -3.76996517f, -2.77193499f,  -2.04029584f,  -2.67725992f,  -3.11456323f,  -4.35716057f, -3.96405196f,   -3.11866283f,  -2.89303422f,  -3.84127808f,  -4.63507318f,  -5.34559536f,  -5.6741724f,  -5.9913516f,  -5.89291143f,   -6.14835787f, -5.75908613f, -5.48700523f, -5.17146826f,  -5.74538183f, -6.23743486f, -6.26235199f, -6.18846273f, -5.73266459f, -5.36256504f, -5.36837292f, -6.48104477f, -4.97722006f, -3.1608839f, -1.36612868f, -0.0857250318f, 1.66240442f,
+            -4.26793671f, -3.59083676f,  -2.84308076f,  -3.14333463f,  -3.37969398f,  -4.55007124f, -4.40458679f,   -3.53423572f,  -2.78584123f,  -3.32700229f,  -3.87822628f,  -5.09642506f,  -6.15807199f, -6.88138437f, -7.01429605f,   -7.22634697f, -7.04120684f, -6.64636993f, -5.79211712f,  -5.76786995f, -5.5597887f, -5.01553154f, -4.96951723f, -4.92054939f, -4.69466639f, -4.54826736f, -5.57798719f, -4.50945187f, -3.42488861f, -2.2323885f, -1.17007399f, 0.706006825f,
+            -4.58093643f, -4.09917927f,  -3.6026299f,   -3.76272631f,  -4.10116673f,  -5.68298769f, -5.67115974f,   -5.20354462f,  -4.87026978f,  -5.25120115f,  -5.51101351f,  -6.41377878f,  -7.30511761f, -8.20695019f, -8.15464306f,   -8.10768127f, -7.72227478f, -7.57483578f, -6.83547497f,  -6.92473555f, -6.26031017f, -5.44693089f, -4.98586988f, -4.71777868f, -4.84076738f, -4.88040304f, -5.76190281f, -4.94208717f, -3.97660065f, -3.09410763f, -2.34518123f, -0.401388973f,
+            -4.02035284f, -4.02879238f,  -4.01832962f,  -4.46334934f,  -5.42945766f,  -7.13510704f, -7.44949913f,   -7.30862284f,  -7.11234093f,  -7.16781139f,  -7.24586773f,  -7.57177401f,  -7.80264711f, -7.91191673f, -7.63455296f,   -7.31139612f, -7.24533272f, -7.21524429f, -7.19505501f,  -7.53508186f, -7.19776154f, -6.55349255f, -6.06127691f, -5.76581764f, -6.03102398f, -6.42573166f, -7.26578999f, -6.23923731f, -5.2162056f, -4.30992317f, -3.86889744f, -2.24626088f
+        }
+    },
+    {
+        "P4",
+        1, 1, 64, 64,
+        {/*Intentionaly 0 elements. Test for this elemnt does not used*/}
+    },
+    {
+        "P3",
+        1, 1, 128, 128,
+        {/*Intentionaly 0 elements. Test for this elemnt does not used*/}
+    },
+    {
+        "P2",
+        1, 1, 128, 128,
+        {/*Intentionaly 0 elements. Test for this elemnt does not used*/}
+    },
+    {
+        "pool",
+        1, 1, 7, 7,
+        {/*Intentionaly 0 elements. Values not important - only layout*/}
+    }
+};
+
+memory allocate_memory(Test_index key, const engine &engine) 
+{
+    auto ret = memory::allocate(engine, { data_types::f32, format::bfyx, { test_data[key].b, test_data[key].f, test_data[key].y, test_data[key].x } });
+    set_values(ret, test_data[key].dataTMP);
+    return ret;
+}
+
+TEST(pyramidROIAlign_gpu, basic_functionality)
+{
+    const auto& engine = get_test_engine();
+
+    std::vector<float> answer =
+    {
+            -5.56710863f, -4.15980053f,  -3.62781334f, -4.4299016f,   -4.32974339f,  -4.59520054f, -5.14869022f,
+            -4.04856586f, -6.20199442f,  -8.62770653f, -9.3613081f,   -7.69766426f,  -4.6893239f,  -1.79761052f,
+            -2.1207974f,  -0.0283275843f, 2.62955427f,  0.693355441f, -3.21296549f,  -5.62806273f, -6.13721943f,
+            -3.01667213f,  1.90189886f,   9.18445969f,  11.0731812f,   5.476161f,    -2.67103052f, -8.19120693f,
+            -5.73783922f, -2.93177485f,   5.87217808f,  11.9360819f,   10.5841255f,   4.8481946f,  -0.81512779f,
+            -2.63171887f, -3.56354189f,  -4.38874054f, -2.65824175f,   0.0660879612f, 0.36207819f, -0.571367621f,
+            -2.00750613f, -4.5745883f,   -8.36942673f, -10.7424393f,  -9.67979145f,  -7.39468241f, -4.24828815f
+    };
+
+    auto boxes = allocate_memory(BOXES, engine);
+    auto image_meta = allocate_memory(IMAGE_META, engine);
+    auto P5_tensor = allocate_memory(P5, engine);
+    auto P4_tensor = allocate_memory(P4, engine);
+    auto P3_tensor = allocate_memory(P3, engine);
+    auto P2_tensor = allocate_memory(P2, engine);
+    auto pool_size = allocate_memory(POOL, engine);
+
+    topology topo;
+    topo.add(input_layout(test_data[BOXES].parameter_name, boxes.get_layout()));
+    topo.add(input_layout(test_data[IMAGE_META].parameter_name, image_meta.get_layout()));
+    topo.add(input_layout(test_data[P2].parameter_name, P2_tensor.get_layout()));
+    topo.add(input_layout(test_data[P3].parameter_name, P3_tensor.get_layout()));
+    topo.add(input_layout(test_data[P4].parameter_name, P4_tensor.get_layout()));
+    topo.add(input_layout(test_data[P5].parameter_name, P5_tensor.get_layout()));
+    topo.add(input_layout(test_data[POOL].parameter_name, pool_size.get_layout()));
+
+    topo.add(pyramid_roi_align("pyramidROIAlign", 
+            test_data[BOXES].parameter_name, 
+            test_data[IMAGE_META].parameter_name,
+            test_data[P2].parameter_name,
+            test_data[P3].parameter_name,
+            test_data[P4].parameter_name,
+            test_data[P5].parameter_name,
+            test_data[POOL].parameter_name));
+
+    network net(engine, topo);
+    net.set_input_data(test_data[BOXES].parameter_name, boxes);
+    net.set_input_data(test_data[IMAGE_META].parameter_name, image_meta);
+    net.set_input_data(test_data[P2].parameter_name, P2_tensor);
+    net.set_input_data(test_data[P3].parameter_name, P3_tensor);
+    net.set_input_data(test_data[P4].parameter_name, P4_tensor);
+    net.set_input_data(test_data[P5].parameter_name, P5_tensor);
+    net.set_input_data(test_data[POOL].parameter_name, pool_size);
+
+    auto outputs = net.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "pyramidROIAlign");
+
+    auto output_mem = outputs.at("pyramidROIAlign").get_memory();
+    auto output_ptr = output_mem.pointer<float>();
+
+    int k = 0;
+    for (float val1 : output_ptr)
+    {
+        EXPECT_NEAR(val1, answer[k++], 1e-5);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp
index 5bb2857ef..03996ba14 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/reorder_gpu_test.cpp
@@ -59,7 +59,7 @@ TEST(reorder_gpu_f32, basic)
     //  b1 f1: 12    8
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     layout output_layout(data_types::f32, format::bfyx,{ 2,2,2,2 });
@@ -145,7 +145,7 @@ TEST(reorder_gpu_f32, basic_subtract) {
     //  b1 f1: 10    7
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32,  format::yxfb, { 2, 2, 2, 2 } });
     layout output_layout( data_types::f32, format::bfyx, {2,2,2,2} );
@@ -234,7 +234,7 @@ TEST(reorder_gpu_f32, basic_subtract_value) {
     //  b1 f1:  9.5  5.5
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     layout output_layout(data_types::f32, format::bfyx,{ 2,2,2,2 });
@@ -318,7 +318,7 @@ TEST(reorder_gpu_f16, basic_subtract_f32_output_f32) {
     //  b1 f1: 10    7
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     if (!engine.get_info().supports_fp16)
     {
@@ -413,7 +413,7 @@ TEST(reorder_gpu_f16, basic_subtract_value) {
     //  b1 f1:  9.5  5.5
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
     if (!engine.get_info().supports_fp16)
     {
         std::cout << "[ SKIPPED ] The test is skipped (cl_khr_fp16 is not supported)." << std::endl;
@@ -482,7 +482,7 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) {
     //  Output is expected to contain the same value as input in range of indices from 0x0000 to 0xF801.
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     if (!engine.get_info().supports_fp16)
     {
@@ -562,7 +562,7 @@ TEST(reorder_gpu, basic_convert_f16_f32_f16) {
 
 TEST(reorder_gpu, basic_convert_int8) {
 
-    engine engine;
+    const auto& engine = get_test_engine();
     layout in_layout = { type_to_data_type<float>::value,format::byxf,{ 1,1,3,3 } };
     layout byte_layout = { type_to_data_type<int8_t>::value, format::bfyx,{ 1,1,3,3 } };
     std::initializer_list<float> input_f = { 1.0f, -2.5f, 3.1f, -4.0f, 5.03f, -6.99f, 7.0f, -8.0f, 9.0f };
@@ -620,7 +620,7 @@ TEST(reorder_gpu, basic_convert_uint8rgbabyxf_to_fp32_bfyx) {
 	//
 	const int kernel_size = 5;
 	const int feature_size = 4;
-	engine engine;
+	const auto& engine = get_test_engine();
 
 	if (!engine.get_info().supports_fp16)
 	{
@@ -751,7 +751,7 @@ TEST(reorder_gpu_f32, basic_yxfb_to_bfyx_input_padding)
     //  f1: b0:  5    6  b1:   1.5  5.2
     //  f1: b0:  7    8  b1:   12   8
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
     layout output_layout(data_types::f32, format::bfyx, { 2,2,2,2 });
@@ -830,7 +830,7 @@ TEST(reorder_gpu_f32, basic_bfyx_to_yxfb_input_padding)
     //  b1 f1: 12    8
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
     layout output_layout(data_types::f32, format::yxfb, { 2,2,2,2 });
@@ -910,7 +910,34 @@ TEST(reorder_gpu_opt, basic_remove_redundant)
     EXPECT_TRUE(outputs.at("r2").get_memory().get_layout().format == format::yxfb);
 }
 
-TEST(reorder_gpu_opt, basic_remove_redundant_due_to_implicit_reorders)
+TEST(reorder_gpu_opt, remove_redundant_activation_fuse)
+{
+    engine eng;
+
+    memory in = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{ 1, 1, 2, 1 } });
+    set_values(in, { -1.0f, -1.0f });
+    memory scale_mem = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{1, 1, 1, 1 } });
+    set_values(scale_mem, { 2.0f });
+    topology tpl{
+        input_layout("in", in.get_layout()),
+        reorder("r1", "in", format::bfyx, data_types::f32),
+        activation("relu", "r1", cldnn_activation_func::activation_relu_negative_slope, {0.01f, 0.0f}),
+        data("scale_data", scale_mem),
+        scale("output", "relu", "scale_data")
+    };
+
+    build_options opts;
+    opts.set_option(build_option::optimize_data(true));
+
+    network net(eng, tpl, opts);
+    net.set_input_data("in", in);
+    auto outputs = net.execute();
+    auto out_ptr = outputs.begin()->second.get_memory().pointer<float>();
+    EXPECT_FLOAT_EQ(out_ptr[0], -0.02f);
+    EXPECT_FLOAT_EQ(out_ptr[1], -0.02f);
+}
+
+TEST(reorder_gpu_opt, basic_do_not_remove_redundant_due_it_is_output)
 {
     engine eng;
 
@@ -920,7 +947,7 @@ TEST(reorder_gpu_opt, basic_remove_redundant_due_to_implicit_reorders)
         input_layout("in", in.get_layout()),
         convolution("conv", "in", { "weights" }),
         data("weights", weights),
-        reorder("r1", "conv", format::bfyx, data_types::f32) //optimize data should add conversion from yxfb to bfyx and 'conv' should output data in bfyx as well (IE case)
+        reorder("r1", "conv", format::bfyx, data_types::f32) //reoder is output - do not optimize
     };
 
     build_options opts;
@@ -931,8 +958,10 @@ TEST(reorder_gpu_opt, basic_remove_redundant_due_to_implicit_reorders)
     auto outputs = net.execute();
     auto executed_primitives = net.get_executed_primitives();
 
-    //remove redundant reorder optimization should replace redundant reorder node with convolution
-    EXPECT_TRUE(executed_primitives.count("conv") == 0);
+    //all pirmitives in this test needs to be executed
+    EXPECT_TRUE(executed_primitives.count("conv") == 1);
+    EXPECT_TRUE(executed_primitives.count("in") == 1);
+    EXPECT_TRUE(executed_primitives.count("r1") == 1);
     ASSERT_TRUE(outputs.count("r1") == 1);
     EXPECT_TRUE(outputs.at("r1").get_memory().get_layout().format == format::bfyx);
 }
@@ -965,6 +994,35 @@ TEST(reorder_gpu_opt, basic_remove_redundant_output_due_to_implicit_reorders)
     EXPECT_TRUE(outputs.at("r1").get_memory().get_layout().format == format::bfyx);
 }
 
+TEST(reorder_gpu_opt, basic_remove_redundant_due_to_implicit_reorders)
+{
+    engine eng;
+
+    memory in = memory::allocate(eng, { data_types::f32, format::yxfb, tensor{ 1, 2, 2, 1 } });
+    memory weights = memory::allocate(eng, { data_types::f32, format::bfyx, tensor{ 1, 2, 2, 1 } });
+    topology tpl{
+        input_layout("in", in.get_layout()),
+        convolution("conv", "in",{ "weights" }),
+        data("weights", weights),
+        reorder("r1", "conv", format::bfyx, data_types::f32), //optimize data should add conversion from yxfb to bfyx and 'conv' should output data in bfyx as well (IE case)
+        softmax("output", "r1")
+    };
+
+    build_options opts;
+    opts.set_option(build_option::optimize_data(true));
+
+    network net(eng, tpl, opts);
+    net.set_input_data("in", in);
+    auto outputs = net.execute();
+    auto executed_primitives = net.get_executed_primitives();
+
+    //remove redundant reorder optimization should remove r1 node
+    EXPECT_TRUE(executed_primitives.count("r1") == 0);
+    //all pirmitives in this test needs to be executed
+    ASSERT_TRUE(outputs.count("output") == 1);
+    EXPECT_TRUE(outputs.at("output").get_memory().get_layout().format == format::bfyx);
+}
+
 TEST(reorder_gpu_opt, non_trivial_remove_redundant)
 {
     engine eng;
@@ -987,7 +1045,7 @@ TEST(reorder_gpu_opt, non_trivial_remove_redundant)
 
     ASSERT_TRUE(executed_primitives.count("in") == 1);
     //ASSERT_TRUE(all_primitives.at("r1") == "_optimized_");
-    EXPECT_TRUE(executed_primitives.at("in") == outputs.at("r1").get_event());
+    EXPECT_TRUE(executed_primitives.at("in") != outputs.at("r1").get_event());
     ASSERT_TRUE(outputs.count("r1") == 1);
     EXPECT_TRUE(outputs.at("r1").get_memory().get_layout().format == format::bfyx);
 }
@@ -1129,7 +1187,7 @@ TEST(reorder_gpu_opt, mean_mul_val_float_to_int)
 TEST(reorder_gpu_i32, basic)
 {
     //  Test for converting data types f32->i32
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
     layout output_layout(data_types::i32, format::bfyx, { 2,2,2,2 });
@@ -1170,7 +1228,7 @@ TEST(reorder_gpu_i32, basic)
 TEST(reorder_gpu_i64, basic)
 {
     //  Test for converting data types f32->i64
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
     layout output_layout(data_types::i64, format::bfyx, { 2,2,2,2 });
@@ -1232,6 +1290,8 @@ public:
     static std::vector<std::tuple<test_params*, cldnn::primitive*>> generate_specific_test_params()
     {
         generic_test::generate_generic_test_params(all_generic_params);
+
+        const auto data_types = test_data_types();
         
         for (const auto& test_param : all_generic_params)
         {
@@ -1239,7 +1299,7 @@ public:
 
             std::vector<cldnn::layout> output_layouts = {};
 
-            for (const auto& dt : test_data_types())
+            for (const auto& dt : data_types)
             {
                 for (const auto& fmt : generic_test::test_input_formats)
                 {
@@ -1280,7 +1340,7 @@ public:
         assert(mean == "");
         assert(subtract_per_feature.size() == 0);
         
-        auto output = memory::allocate(engine, cldnn::layout(reorder->output_data_type, inputs[0].get_layout().format, inputs[0].get_layout().size));
+        auto output = memory::allocate(engine, cldnn::layout(*reorder->output_data_type, inputs[0].get_layout().format, inputs[0].get_layout().size));
 
         cldnn::pointer<InputType> input_mem = inputs[0].pointer<InputType>();
         cldnn::pointer<OutputType> output_mem = output.pointer<OutputType>();
@@ -1299,7 +1359,7 @@ public:
     {
         if (generic_params->data_type == data_types::f32)
         {
-            if (((cldnn::reorder*)layer_params)->output_data_type == data_types::f32)
+            if (*layer_params->output_data_type == data_types::f32)
             {
                 return generate_reference_typed<float, float>(inputs);
             }
@@ -1310,7 +1370,7 @@ public:
         }
         else
         {
-            if (((cldnn::reorder*)layer_params)->output_data_type == data_types::f32)
+            if (*layer_params->output_data_type == data_types::f32)
             {
                 return generate_reference_typed<FLOAT16, float>(inputs);
             }
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/reshape_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/reshape_gpu_test.cpp
index b7a6852b1..d75d9ee17 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/reshape_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/reshape_gpu_test.cpp
@@ -43,7 +43,7 @@ void verify_int(const int32_t &output_value, const int32_t &value)
 template <class ElemType>
 void generic_reshape_test(format fmt, tensor const& input_size, tensor const& reshape_size, bool in_place, padding const& input_padd = padding(), padding const& output_padd = padding())
 {
-    engine engine;
+    const auto& engine = get_test_engine();
 
     //allocate input memory
     auto data_type = data_types::f32;
@@ -501,7 +501,7 @@ TEST(reshape_gpu_f32, multiple_users_with_reorder) {
     //  b1f0:  0.0
     //  b1f1:  4.0
 
-    engine engine;
+    const auto& engine = get_test_engine();
     auto batch_num = 2;
     auto feature_num = 2;
     auto x_size = 1;
@@ -536,4 +536,49 @@ TEST(reshape_gpu_f32, multiple_users_with_reorder) {
 
     for (size_t i = 0; i < out2.size(); i++)
         EXPECT_EQ(output_ptr_2[i], out2[i]);
-}
-\ No newline at end of file
+}
+
+TEST(reshape_gpu_f32, calc_output_shape) {
+
+    //  INPUT(bfyx,2x2x1x1) -- RESHAPE(1, 1, 0, -1)  
+
+    //  Input:
+    //  b0f0: -1.0
+    //  b0f1:  2.0
+    //  b1f0: -3.0
+    //  b1f1:  4.0
+    //
+    // output_shape (1, 1, 1, 4)
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 1, 1 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(reshape("reshape", "input", tensor(1, 1, 0, -1)));
+
+    set_values(input, { -1.f, 2.f, -3.f, 4.f });
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "reshape");
+
+    auto output = outputs.at("reshape").get_memory();
+
+    EXPECT_TRUE(output.get_layout().data_type == input.get_layout().data_type);
+    EXPECT_TRUE(output.get_layout().format == input.get_layout().format);
+
+    ASSERT_TRUE(output.get_layout().size == tensor(1, 1, 1, 4));
+
+    float answers[4] = { -1.f, 2.f, -3.f, 4.f };
+
+    auto output_ptr = output.pointer<float>();
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/reverse_sequence_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/reverse_sequence_gpu_test.cpp
new file mode 100644
index 000000000..441b5588a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/reverse_sequence_gpu_test.cpp
@@ -0,0 +1,580 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+
+#include <api/CPP/input_layout.hpp>
+#include <api/CPP/memory.hpp>
+#include <api/CPP/reverse_sequence.hpp>
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+
+#include <cstddef>
+#include <tests/test_utils/test_utils.h>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(reverese_sequence_gpu_test, fp32_d2_2_ba1_sa0) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
+    size_t batch_axis = 1;
+    size_t seq_axis = 0;
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f
+    });
+
+    set_values(seq_lengths, {
+            1.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.0f, 3.0f, 2.0f, 1.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba0_sa1) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 3, 1, 3 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
+    size_t batch_axis = 0;
+    size_t seq_axis = 1;
+
+    set_values(input, {
+        0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+        10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+        20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
+    });
+
+    set_values(seq_lengths, {
+        2.0f, 2.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f,
+            12.0f, 13.0f, 14.0f, 9.0f, 10.0f, 11.0f, 15.0f, 16.0f, 17.0f,
+            21.0f, 22.0f, 23.0f, 18.0f, 19.0f, 20.0f, 24.0f, 25.0f, 26.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp32_d3_3_3_ba2_sa0) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 3, 1, 3 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
+    size_t batch_axis = 2;
+    size_t seq_axis = 0;
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
+    });
+
+    set_values(seq_lengths, {
+            2.0f, 2.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 
+            18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa3) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
+    size_t batch_axis = 0;
+    size_t seq_axis = 3;
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f
+    });
+
+    set_values(seq_lengths, {
+            1.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 
+            6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 
+            13.0f, 12.0f, 15.0f, 14.0f, 17.0f, 16.0f, 
+            19.0f, 18.0f, 21.0f, 20.0f, 23.0f, 22.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba0_sa2) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
+    size_t batch_axis = 0;
+    size_t seq_axis = 2;
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f
+    });
+
+    set_values(seq_lengths, {
+            2.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            2.0f, 3.0f, 0.0f, 1.0f, 4.0f, 5.0f, 
+            8.0f, 9.0f, 6.0f, 7.0f, 10.0f, 11.0f, 
+            14.0f, 15.0f, 12.0f, 13.0f, 16.0f, 17.0f, 
+            20.0f, 21.0f, 18.0f, 19.0f, 22.0f, 23.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp32_d2_2_3_2ba2_sa0) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 3 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
+    size_t batch_axis = 2;
+    size_t seq_axis = 0;
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f
+    });
+
+    set_values(seq_lengths, {
+            1.0f, 1.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.0f, 1.0f, 2.0f, 3.0f, 16.0f, 17.0f, 
+            6.0f, 7.0f, 8.0f, 9.0f, 22.0f, 23.0f, 
+            12.0f, 13.0f, 14.0f, 15.0f, 4.0f, 5.0f, 
+            18.0f, 19.0f, 20.0f, 21.0f, 10.0f, 11.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp16_d2_2_ba1_sa0) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 1, 1 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
+    size_t batch_axis = 1;
+    size_t seq_axis = 0;
+
+    set_values(input, {
+            FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f)
+    });
+
+    set_values(seq_lengths, {
+            1.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+            0.0f, 3.0f, 2.0f, 1.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba0_sa1) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 3 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
+    size_t batch_axis = 0;
+    size_t seq_axis = 1;
+
+    set_values(input, {
+            FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
+            FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
+            FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f), FLOAT16(24.0f), FLOAT16(25.0f), FLOAT16(26.0f)
+    });
+
+    set_values(seq_lengths, {
+            2.0f, 2.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<uint16_t >();
+
+    std::vector<float> expected_results = {
+            3.0f, 4.0f, 5.0f, 0.0f, 1.0f, 2.0f, 6.0f, 7.0f, 8.0f,
+            12.0f, 13.0f, 14.0f, 9.0f, 10.0f, 11.0f, 15.0f, 16.0f, 17.0f,
+            21.0f, 22.0f, 23.0f, 18.0f, 19.0f, 20.0f, 24.0f, 25.0f, 26.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp16_d3_3_3_ba2_sa0) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 3, 3, 1, 3 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
+    size_t batch_axis = 2;
+    size_t seq_axis = 0;
+
+    set_values(input, {
+            FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
+            FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
+            FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f), FLOAT16(24.0f), FLOAT16(25.0f), FLOAT16(26.0f)
+    });
+
+    set_values(seq_lengths, {
+            2.0f, 2.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+            9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+            18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa3) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
+    size_t batch_axis = 0;
+    size_t seq_axis = 3;
+
+    set_values(input, {
+            FLOAT16(0.0f), FLOAT16( 1.0f), FLOAT16( 2.0f), FLOAT16( 3.0f), FLOAT16( 4.0f), FLOAT16( 5.0f), FLOAT16( 6.0f), FLOAT16( 7.0f), FLOAT16( 8.0f), FLOAT16( 9.0f),
+            FLOAT16(10.0f), FLOAT16( 11.0f), FLOAT16( 12.0f), FLOAT16( 13.0f), FLOAT16( 14.0f), FLOAT16( 15.0f), FLOAT16( 16.0f), FLOAT16( 17.0f), FLOAT16( 18.0f), FLOAT16( 19.0f),
+            FLOAT16(20.0f), FLOAT16( 21.0f), FLOAT16( 22.0f), FLOAT16( 23.0f)
+    });
+
+    set_values(seq_lengths, {
+            1.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f,
+            6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f,
+            13.0f, 12.0f, 15.0f, 14.0f, 17.0f, 16.0f,
+            19.0f, 18.0f, 21.0f, 20.0f, 23.0f, 22.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba0_sa2) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 1, 1 } });
+    size_t batch_axis = 0;
+    size_t seq_axis = 2;
+
+    set_values(input, {
+            FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
+            FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
+            FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f)
+    });
+
+    set_values(seq_lengths, {
+            2.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+            2.0f, 3.0f, 0.0f, 1.0f, 4.0f, 5.0f,
+            8.0f, 9.0f, 6.0f, 7.0f, 10.0f, 11.0f,
+            14.0f, 15.0f, 12.0f, 13.0f, 16.0f, 17.0f,
+            20.0f, 21.0f, 18.0f, 19.0f, 22.0f, 23.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
+
+TEST(reverese_sequence_gpu_test, fp16_d2_2_3_2ba2_sa0) {
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f16, format::bfyx, { 2, 2, 2, 3 } });
+    auto seq_lengths = memory::allocate(engine, { data_types::f32, format::bfyx, { 3, 1, 1, 1 } });
+    size_t batch_axis = 2;
+    size_t seq_axis = 0;
+
+    set_values(input, {
+            FLOAT16(0.0f), FLOAT16(1.0f), FLOAT16(2.0f), FLOAT16(3.0f), FLOAT16(4.0f), FLOAT16(5.0f), FLOAT16(6.0f), FLOAT16(7.0f), FLOAT16(8.0f), FLOAT16(9.0f),
+            FLOAT16(10.0f), FLOAT16(11.0f), FLOAT16(12.0f), FLOAT16(13.0f), FLOAT16(14.0f), FLOAT16(15.0f), FLOAT16(16.0f), FLOAT16(17.0f), FLOAT16(18.0f), FLOAT16(19.0f),
+            FLOAT16(20.0f), FLOAT16(21.0f), FLOAT16(22.0f), FLOAT16(23.0f)
+    });
+
+    set_values(seq_lengths, {
+            1.0f, 1.0f, 2.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("seq_lengths", seq_lengths.get_layout()));
+    topology.add(
+            reverse_sequence("reverse_sequence", "input", "seq_lengths", seq_axis, batch_axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("seq_lengths", seq_lengths);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("reverse_sequence").get_memory();
+    auto output_ptr = output.pointer<uint16_t>();
+
+    std::vector<float> expected_results = {
+            0.0f, 1.0f, 2.0f, 3.0f, 16.0f, 17.0f,
+            6.0f, 7.0f, 8.0f, 9.0f, 22.0f, 23.0f,
+            12.0f, 13.0f, 14.0f, 15.0f, 4.0f, 5.0f,
+            18.0f, 19.0f, 20.0f, 21.0f, 10.0f, 11.0f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], float16_to_float32(output_ptr[i]));
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp
index 6aecf6396..f5b9a7a53 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/scale_gpu_test.cpp
@@ -47,7 +47,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_same_size) {
     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5     
     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
@@ -107,7 +107,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_bfyx) {
     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5     
     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
@@ -165,7 +165,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_scale_bfyx) {
     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5     
     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 2;
     auto feature_num = 2;
@@ -243,7 +243,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_same_size_bias_term) {
     //  f1: b0:  3.1    3.2  3.25   b1:   3.3   3.4   3.5     
     //  f1: b0:  4.6    4.7  4.75   b1:   4.8   4.9   4
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 3, 2 } });
@@ -313,7 +313,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_scalar) {
     //  Scale:
     //  0.1    0.2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 2;
     auto feature_num = 2;
@@ -378,7 +378,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_y) {
     //  Scale:
     //  0.1    0.2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 2;
     auto feature_num = 2;
@@ -445,7 +445,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_fb) {
     //  f0b0: 0.1   f0b1: 0.2
     //  f1b0: 0.5   f1b1: 2.0
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 2;
     auto feature_num = 2;
@@ -511,7 +511,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_f) {
     //  Scale: per feature
     //  f0bx: 0.1   f1bx: 0.2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 2;
     auto feature_num = 2;
@@ -578,7 +578,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_x) {
     //  Scale:
     //  0.1    0.2  0.25
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 2;
     auto feature_num = 2;
@@ -646,7 +646,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_xy) {
     //  f0:  0.1    0.2  0.25
     //  f0:  0.6    0.7  0.75
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 2;
     auto feature_num = 2;
@@ -719,7 +719,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_batch1) {
     //  f1: b0:  1.1    1.2  1.25    
     //  f1: b0:  1.6    1.7  1.75
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 2;
     auto feature_num = 2;
@@ -793,7 +793,7 @@ TEST(scale_gpu, basic_in2x3_scale_same_size_bx) {
     //  b0: -0.1 3.2  7
     //  b1: 0    1   -1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
@@ -857,7 +857,7 @@ TEST(scale_gpu, basic_in2x3_scale_same_size_xb) {
     //  x0: -0.1  3.2   7
     //  x1: 0       1  -1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
@@ -919,7 +919,7 @@ TEST(scale_gpu, basic_in2x3_scale_single_value_bx) {
     //  Bias:
     //  -0.1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
@@ -979,7 +979,7 @@ TEST(scale_gpu, basic_in2x3_scale_single_value_xb) {
     //  Bias:
     //  -0.1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } });
@@ -1036,7 +1036,7 @@ TEST(scale_gpu, basic_in2x3_scale_same_size_no_bias_bx) {
     //  b0: 3.1   0.2   0.17
     //  b1: 10     -3      1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
     auto scale_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 1, 3, 1 } });
@@ -1086,7 +1086,7 @@ TEST(scale_gpu, basic_in2x3_scale_same_size_no_bias_xb) {
     //  x0: 3.1    0.2   0.17
     //  x1: 10      -3      1
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb, { 3, 1, 2, 1 } });
@@ -1139,7 +1139,7 @@ TEST(scale_gpu, basic_in2x3x2x2_scale_yxfb_bfyx_same_size_padding) {
     //  0.1    0.2
     //  0.6    0.5
      
-    engine engine;
+    const auto& engine = get_test_engine();
     std::vector<format> formats_to_test = { format::yxfb , format::bfyx };
 
     for (std::vector<format>::iterator it = formats_to_test.begin(); it != formats_to_test.end(); ++it)
@@ -1204,7 +1204,7 @@ static network setup_scale_network(
     bool pass_bias          //TODO: a WA for lack of std::optional<tensor> bias
 )
 {
-    engine engine;
+    const auto& engine = get_test_engine();
     topology topology;
 
     auto input_mem = memory::allocate(engine, { dt, f, input_tensor });
@@ -1327,7 +1327,9 @@ public:
 
         std::vector<tests::test_params*> all_generic_params;
 
-        for (cldnn::data_types dt : test_data_types())
+        auto data_types = test_data_types();
+
+        for (cldnn::data_types dt : data_types)
         for (tensor & t : test_input_sizes)
         {
             std::vector<std::vector<int>> attempted_dims;
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/scale_grad_input_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/scale_grad_input_test.cpp
index 8b9a22c3b..c7057d549 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/scale_grad_input_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/scale_grad_input_test.cpp
@@ -46,7 +46,7 @@ TEST(scale_grad_input_gpu, basic_in2x3x2x2_scale_same_size) {
     //  f1: b0:  1.1    1.2  1.25   b1:   1.3   1.4   1.5     
     //  f1: b0:  1.6    1.7  1.75   b1:   1.8   1.9   2
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
     auto scale_input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 3, 2 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/scale_grad_weights_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/scale_grad_weights_test.cpp
index 79d750104..680c68ed9 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/scale_grad_weights_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/scale_grad_weights_test.cpp
@@ -52,7 +52,7 @@ TEST(scale_grad_weights_gpu, basic_in2x3x2x2) {
     //  f0: 0.1
     //  f1: 0.6  
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
     auto grad_input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
@@ -137,7 +137,7 @@ TEST(scale_grad_weights_gpu, basic_in2x3x2x2_bias) {
     //  f0: 1
     //  f1: 0.5
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
     auto grad_input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
@@ -237,7 +237,7 @@ TEST(scale_grad_weights_gpu, basic_in2x3x2x2_bias_momentum) {
     //  f0: 1
     //  f1: 0.5
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
     auto grad_input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 3, 2 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/select_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/select_gpu_test.cpp
index 228664b87..abd2cffe4 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/select_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/select_gpu_test.cpp
@@ -29,17 +29,17 @@ using namespace tests;
 
 // select_gpu_f32
 TEST(select_gpu_f32, select_basic) {
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
     auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb, { 2, 2, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
 
     topology topology;
     topology.add(input_layout("input", input.get_layout()));
     topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
     set_values(input, {
         1.f,   0.f, 5.f, 1.5f,
@@ -54,17 +54,17 @@ TEST(select_gpu_f32, select_basic) {
         15.f,  17.f,    8.f,  10.f,
         -2.f,  6.5f,  -0.5f, -2.5f });
 
-	set_values(mask, {
-		0.f,   0.f,  0.f,  0.f,
-		1.f,   1.f,  1.f,  1.f,
-		0.f,   1.f,  0.f,  1.f,
-		1.f,   0.f,  1.f,  0.f });
+    set_values(mask, {
+        0.f,   0.f,  0.f,  0.f,
+        1.f,   1.f,  1.f,  1.f,
+        0.f,   1.f,  0.f,  1.f,
+        1.f,   0.f,  1.f,  0.f });
 
     network network(engine, topology);
 
     network.set_input_data("input", input);
     network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
+    network.set_input_data("mask", mask);
     auto outputs = network.execute();
 
     auto output = outputs.at("select").get_memory();
@@ -83,1137 +83,1137 @@ TEST(select_gpu_f32, select_basic) {
 }
 
 TEST(select_gpu_f32, select_basic_negative) {
-	engine engine;
-
-	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
-
-	set_values(input, {
-		1.f,   0.f, 5.f, 1.5f,
-		2.f,   0.f, 6.f, 5.2f,
-		3.f,  0.5f, 7.f, 12.f,
-		4.f, -0.5f, 8.f,  8.f
-	});
-
-	set_values(input2, {
-		0.5f,   2.5f,  0.5f,  2.5f,
-		5.f,   7.f,    2.f,   4.f,
-		15.f,  17.f,    8.f,  10.f,
-		-2.f,  6.5f,  -0.5f, -2.5f });
-
-	set_values(mask, {
-		-0.f,   -0.f,  -0.f,  -0.f,
-		-1.f,   -1.f,  -1.f,  -1.f,
-		-0.f,   -1.f,  -0.f,  -1.f,
-		-1.f,   -0.f,  -1.f,  -0.f });
-
-	network network(engine, topology);
-
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
-
-	auto output = outputs.at("select").get_memory();
-
-	float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
-		2.f,   0.f,    6.f,   5.2f,
-		15.f,   0.5f,   8.f,  12.f,
-		4.f,   6.5f,   8.f,  -2.5f };
-
-	auto output_ptr = output.pointer<float>();
-
-	for (int i = 0; i < 16; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
+
+    set_values(input, {
+        1.f,   0.f, 5.f, 1.5f,
+        2.f,   0.f, 6.f, 5.2f,
+        3.f,  0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,   2.5f,  0.5f,  2.5f,
+        5.f,   7.f,    2.f,   4.f,
+        15.f,  17.f,    8.f,  10.f,
+        -2.f,  6.5f,  -0.5f, -2.5f });
+
+    set_values(mask, {
+        -0.f,   -0.f,  -0.f,  -0.f,
+        -1.f,   -1.f,  -1.f,  -1.f,
+        -0.f,   -1.f,  -0.f,  -1.f,
+        -1.f,   -0.f,  -1.f,  -0.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("select").get_memory();
+
+    float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
+        2.f,   0.f,    6.f,   5.2f,
+        15.f,   0.5f,   8.f,  12.f,
+        4.f,   6.5f,   8.f,  -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f32, select_basic_comma) {
-	engine engine;
-
-	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
-
-	set_values(input, {
-		1.f,   0.f, 5.f, 1.5f,
-		2.f,   0.f, 6.f, 5.2f,
-		3.f,  0.5f, 7.f, 12.f,
-		4.f, -0.5f, 8.f,  8.f
-	});
-
-	set_values(input2, {
-		0.5f,   2.5f,  0.5f,  2.5f,
-		5.f,   7.f,    2.f,   4.f,
-		15.f,  17.f,    8.f,  10.f,
-		-2.f,  6.5f,  -0.5f, -2.5f });
-
-	set_values(mask, {
-		0.f,   0.f,  0.f,  0.f,
-		0.1f,   0.3f,  0.5f,  0.7f,
-		-0.f,   -0.1f,  -0.f,  -0.5f,
-		-0.7f,   -0.f,  -1.5f,  -0.f });
-
-	network network(engine, topology);
-
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
-
-	auto output = outputs.at("select").get_memory();
-
-	float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
-		2.f,   0.f,    6.f,   5.2f,
-		15.f,   0.5f,   8.f,  12.f,
-		4.f,   6.5f,   8.f,  -2.5f };
-
-	auto output_ptr = output.pointer<float>();
-
-	for (int i = 0; i < 16; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
+
+    set_values(input, {
+        1.f,   0.f, 5.f, 1.5f,
+        2.f,   0.f, 6.f, 5.2f,
+        3.f,  0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,   2.5f,  0.5f,  2.5f,
+        5.f,   7.f,    2.f,   4.f,
+        15.f,  17.f,    8.f,  10.f,
+        -2.f,  6.5f,  -0.5f, -2.5f });
+
+    set_values(mask, {
+        0.f,   0.f,  0.f,  0.f,
+        0.1f,   0.3f,  0.5f,  0.7f,
+        -0.f,   -0.1f,  -0.f,  -0.5f,
+        -0.7f,   -0.f,  -1.5f,  -0.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("select").get_memory();
+
+    float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
+        2.f,   0.f,    6.f,   5.2f,
+        15.f,   0.5f,   8.f,  12.f,
+        4.f,   6.5f,   8.f,  -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f32, select_basic_error_input_sizes) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 3, 4, 5, 6 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 3, 4, 5, 6 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	EXPECT_ANY_THROW(network(engine, topology));
+    EXPECT_ANY_THROW(network(engine, topology));
 }
 
 TEST(select_gpu_f32, select_basic_error_mask_sizes) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 3, 4, 5, 6 } });
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 3, 4, 5, 6 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	EXPECT_ANY_THROW(network(engine, topology));
+    EXPECT_ANY_THROW(network(engine, topology));
 }
 
 TEST(select_gpu_f32, select_basic_error_input_types) {
-	engine engine;
-
-	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 2, 2, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
-	EXPECT_ANY_THROW(network(engine, topology));
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 2, 2, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
+    EXPECT_ANY_THROW(network(engine, topology));
 }
 
 TEST(select_gpu_f32, select_basic_error_input_formats) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	EXPECT_ANY_THROW(network(engine, topology));
+    EXPECT_ANY_THROW(network(engine, topology));
 }
 
 TEST(select_gpu_f32, select_basic_byxf) {
-	engine engine;
-
-	auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 2, 2 } });
-
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
-
-	set_values(input, {
-		1.f,   0.f, 5.f, 1.5f,
-		2.f,   0.f, 6.f, 5.2f,
-		3.f,  0.5f, 7.f, 12.f,
-		4.f, -0.5f, 8.f,  8.f
-	});
-
-	set_values(input2, {
-		0.5f,   2.5f,  0.5f,  2.5f,
-		5.f,   7.f,    2.f,   4.f,
-		15.f,  17.f,    8.f,  10.f,
-		-2.f,  6.5f,  -0.5f, -2.5f });
-
-	set_values(mask, {
-		0.f,   0.f,  0.f,  0.f,
-		1.f,   1.f,  1.f,  1.f,
-		0.f,   1.f,  0.f,  1.f,
-		1.f,   0.f,  1.f,  0.f });
-
-	network network(engine, topology);
-
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
-
-	auto output = outputs.at("select").get_memory();
-
-	float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
-		2.f,   0.f,    6.f,   5.2f,
-		15.f,   0.5f,   8.f,  12.f,
-		4.f,   6.5f,   8.f,  -2.5f };
-
-	auto output_ptr = output.pointer<float>();
-
-	for (int i = 0; i < 16; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::byxf,{ 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
+
+    set_values(input, {
+        1.f,   0.f, 5.f, 1.5f,
+        2.f,   0.f, 6.f, 5.2f,
+        3.f,  0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,   2.5f,  0.5f,  2.5f,
+        5.f,   7.f,    2.f,   4.f,
+        15.f,  17.f,    8.f,  10.f,
+        -2.f,  6.5f,  -0.5f, -2.5f });
+
+    set_values(mask, {
+        0.f,   0.f,  0.f,  0.f,
+        1.f,   1.f,  1.f,  1.f,
+        0.f,   1.f,  0.f,  1.f,
+        1.f,   0.f,  1.f,  0.f });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("select").get_memory();
+
+    float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
+        2.f,   0.f,    6.f,   5.2f,
+        15.f,   0.5f,   8.f,  12.f,
+        4.f,   6.5f,   8.f,  -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f32, select_basic_mask_f16) {
-	engine engine;
-
-	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 2, 2, 2, 2 } });
-
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
-
-	set_values(input, {
-		1.f,   0.f, 5.f, 1.5f,
-		2.f,   0.f, 6.f, 5.2f,
-		3.f,  0.5f, 7.f, 12.f,
-		4.f, -0.5f, 8.f,  8.f
-	});
-
-	set_values(input2, {
-		0.5f,   2.5f,  0.5f,  2.5f,
-		5.f,   7.f,    2.f,   4.f,
-		15.f,  17.f,    8.f,  10.f,
-		-2.f,  6.5f,  -0.5f, -2.5f });
-
-	set_values<uint16_t>(mask, {
-		0,   0,  0,  0,
-		1,   1,  1,  1,
-		0,   1,  0,  1,
-		1,   0,  1,  0 });
-
-	network network(engine, topology);
-
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
-
-	auto output = outputs.at("select").get_memory();
-
-	float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
-		2.f,   0.f,    6.f,   5.2f,
-		15.f,   0.5f,   8.f,  12.f,
-		4.f,   6.5f,   8.f,  -2.5f };
-
-	auto output_ptr = output.pointer<float>();
-
-	for (int i = 0; i < 16; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
+
+    set_values(input, {
+        1.f,   0.f, 5.f, 1.5f,
+        2.f,   0.f, 6.f, 5.2f,
+        3.f,  0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,   2.5f,  0.5f,  2.5f,
+        5.f,   7.f,    2.f,   4.f,
+        15.f,  17.f,    8.f,  10.f,
+        -2.f,  6.5f,  -0.5f, -2.5f });
+
+    set_values<uint16_t>(mask, {
+        0,   0,  0,  0,
+        1,   1,  1,  1,
+        0,   1,  0,  1,
+        1,   0,  1,  0 });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("select").get_memory();
+
+    float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
+        2.f,   0.f,    6.f,   5.2f,
+        15.f,   0.5f,   8.f,  12.f,
+        4.f,   6.5f,   8.f,  -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f32, select_basic_mask_i8) {
-	engine engine;
-
-	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 2, 2, 2, 2 } });
-
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
-
-	set_values(input, {
-		1.f,   0.f, 5.f, 1.5f,
-		2.f,   0.f, 6.f, 5.2f,
-		3.f,  0.5f, 7.f, 12.f,
-		4.f, -0.5f, 8.f,  8.f
-	});
-
-	set_values(input2, {
-		0.5f,   2.5f,  0.5f,  2.5f,
-		5.f,   7.f,    2.f,   4.f,
-		15.f,  17.f,    8.f,  10.f,
-		-2.f,  6.5f,  -0.5f, -2.5f });
-
-	set_values<char>(mask, {
-		0,   0,  0,  0,
-		1,   1,  1,  1,
-		0,   1,  0,  1,
-		1,   0,  1,  0 });
-
-	network network(engine, topology);
-
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
-
-	auto output = outputs.at("select").get_memory();
-
-	float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
-		2.f,   0.f,    6.f,   5.2f,
-		15.f,   0.5f,   8.f,  12.f,
-		4.f,   6.5f,   8.f,  -2.5f };
-
-	auto output_ptr = output.pointer<float>();
-
-	for (int i = 0; i < 16; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
+
+    set_values(input, {
+        1.f,   0.f, 5.f, 1.5f,
+        2.f,   0.f, 6.f, 5.2f,
+        3.f,  0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,   2.5f,  0.5f,  2.5f,
+        5.f,   7.f,    2.f,   4.f,
+        15.f,  17.f,    8.f,  10.f,
+        -2.f,  6.5f,  -0.5f, -2.5f });
+
+    set_values<char>(mask, {
+        0,   0,  0,  0,
+        1,   1,  1,  1,
+        0,   1,  0,  1,
+        1,   0,  1,  0 });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("select").get_memory();
+
+    float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
+        2.f,   0.f,    6.f,   5.2f,
+        15.f,   0.5f,   8.f,  12.f,
+        4.f,   6.5f,   8.f,  -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f32, select_basic_mask_u8) {
-	engine engine;
-
-	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } });
-
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
-
-	set_values(input, {
-		1.f,   0.f, 5.f, 1.5f,
-		2.f,   0.f, 6.f, 5.2f,
-		3.f,  0.5f, 7.f, 12.f,
-		4.f, -0.5f, 8.f,  8.f
-	});
-
-	set_values(input2, {
-		0.5f,   2.5f,  0.5f,  2.5f,
-		5.f,   7.f,    2.f,   4.f,
-		15.f,  17.f,    8.f,  10.f,
-		-2.f,  6.5f,  -0.5f, -2.5f });
-
-	set_values<char>(mask, {
-		0,   0,  0,  0,
-		1,   1,  1,  1,
-		0,   1,  0,  1,
-		1,   0,  1,  0 });
-
-	network network(engine, topology);
-
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
-
-	auto output = outputs.at("select").get_memory();
-
-	float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
-		2.f,   0.f,    6.f,   5.2f,
-		15.f,   0.5f,   8.f,  12.f,
-		4.f,   6.5f,   8.f,  -2.5f };
-
-	auto output_ptr = output.pointer<float>();
-
-	for (int i = 0; i < 16; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 2, 2, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 2, 2, 2, 2 } });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
+
+    set_values(input, {
+        1.f,   0.f, 5.f, 1.5f,
+        2.f,   0.f, 6.f, 5.2f,
+        3.f,  0.5f, 7.f, 12.f,
+        4.f, -0.5f, 8.f,  8.f
+    });
+
+    set_values(input2, {
+        0.5f,   2.5f,  0.5f,  2.5f,
+        5.f,   7.f,    2.f,   4.f,
+        15.f,  17.f,    8.f,  10.f,
+        -2.f,  6.5f,  -0.5f, -2.5f });
+
+    set_values<unsigned char>(mask, {
+        0,   0,  0,  0,
+        128,   210,  150,  177,
+        0,   211,  0,  255,
+        199,   0,  160,  0 });
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
+
+    auto output = outputs.at("select").get_memory();
+
+    float answers[16] = { 0.5f,  2.5f,   0.5f,  2.5f,
+        2.f,   0.f,    6.f,   5.2f,
+        15.f,   0.5f,   8.f,  12.f,
+        4.f,   6.5f,   8.f,  -2.5f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (int i = 0; i < 16; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f32, select_basic_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values(input, {
-		1.f,	0.f,	2.f,	0.f
-	});
+    set_values(input, {
+        1.f,    0.f,    2.f,    0.f
+    });
 
-	set_values(input2, {
-		0.5f,	2.5f,	5.f,	7.f
-	});
+    set_values(input2, {
+        0.5f,    2.5f,    5.f,    7.f
+    });
 
-	set_values(mask, {
-		0.f,	0.f,	1.f,	1.f
-	});
+    set_values(mask, {
+        0.f,    0.f,    1.f,    1.f
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	float answers[4] = { 
-		0.5f,	2.5f,	2.f,	0.f
-	};
+    float answers[4] = { 
+        0.5f,    2.5f,    2.f,    0.f
+    };
 
-	auto output_ptr = output.pointer<float>();
+    auto output_ptr = output.pointer<float>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f32, select_basic_bfyx_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values(input, {
-		1.f,   0.f,
-		2.f,   0.f
-	});
+    set_values(input, {
+        1.f,   0.f,
+        2.f,   0.f
+    });
 
-	set_values(input2, {
-		0.5f,   2.5f,
-		5.f,   7.f
-	});
+    set_values(input2, {
+        0.5f,   2.5f,
+        5.f,   7.f
+    });
 
-	set_values(mask, {
-		0.f,   0.f,
-		1.f,   1.f
-	});
+    set_values(mask, {
+        0.f,   0.f,
+        1.f,   1.f
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	float answers[4] = {
-		0.5f,  2.5f,
-		2.f,   0.f
-	};
+    float answers[4] = {
+        0.5f,  2.5f,
+        2.f,   0.f
+    };
 
-	auto output_ptr = output.pointer<float>();
+    auto output_ptr = output.pointer<float>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f32, select_basic_byxf_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values(input, {
-		1.f,   0.f,
-		2.f,   0.f
-	});
+    set_values(input, {
+        1.f,   0.f,
+        2.f,   0.f
+    });
 
-	set_values(input2, {
-		0.5f,   2.5f,
-		5.f,   7.f
-	});
+    set_values(input2, {
+        0.5f,   2.5f,
+        5.f,   7.f
+    });
 
-	set_values(mask, {
-		0.f,   0.f,
-		1.f,   1.f
-	});
+    set_values(mask, {
+        0.f,   0.f,
+        1.f,   1.f
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	float answers[4] = {
-		0.5f,  2.5f,
-		2.f,   0.f
-	};
+    float answers[4] = {
+        0.5f,  2.5f,
+        2.f,   0.f
+    };
 
-	auto output_ptr = output.pointer<float>();
+    auto output_ptr = output.pointer<float>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 // select_gpu_f16
 TEST(select_gpu_f16, select_basic_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<uint16_t>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<uint16_t>(input, {
+        1,   0,
+        2,   0
+    });
 
-	set_values<uint16_t>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<uint16_t>(input2, {
+        0,   2,
+        5,   7
+    });
 
-	set_values<uint16_t>(mask, {
-		0,   0,
-		1,   1
-	});
+    set_values<uint16_t>(mask, {
+        0,   0,
+        1,   1
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	uint16_t answers[4] = {
-		0,  2,
-		2,   0
-	};
+    uint16_t answers[4] = {
+        0,  2,
+        2,   0
+    };
 
-	auto output_ptr = output.pointer<uint16_t>();
+    auto output_ptr = output.pointer<uint16_t>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f16, select_basic_mask_f32_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<uint16_t>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<uint16_t>(input, {
+        1,   0,
+        2,   0
+    });
 
-	set_values<uint16_t>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<uint16_t>(input2, {
+        0,   2,
+        5,   7
+    });
 
-	set_values<float>(mask, {
-		0.f,   0.f,
-		1.5f,   0.4f
-	});
+    set_values<float>(mask, {
+        0.f,   0.f,
+        1.5f,   0.4f
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	uint16_t answers[4] = {
-		0,  2,
-		2,   0
-	};
+    uint16_t answers[4] = {
+        0,  2,
+        2,   0
+    };
 
-	auto output_ptr = output.pointer<uint16_t>();
+    auto output_ptr = output.pointer<uint16_t>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f16, select_basic_mask_i8_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<uint16_t>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<uint16_t>(input, {
+        1,   0,
+        2,   0
+    });
 
-	set_values<uint16_t>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<uint16_t>(input2, {
+        0,   2,
+        5,   7
+    });
 
-	set_values<char>(mask, {
-		0,   0,
-		1,   1
-	});
+    set_values<char>(mask, {
+        0,   0,
+        1,   1
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	uint16_t answers[4] = {
-		0,  2,
-		2,   0
-	};
+    uint16_t answers[4] = {
+        0,  2,
+        2,   0
+    };
 
-	auto output_ptr = output.pointer<uint16_t>();
+    auto output_ptr = output.pointer<uint16_t>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 TEST(select_gpu_f16, select_basic_mask_u8_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<uint16_t>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<uint16_t>(input, {
+        1,   0,
+        2,   0
+    });
 
-	set_values<uint16_t>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<uint16_t>(input2, {
+        0,   2,
+        5,   7
+    });
 
-	set_values<char>(mask, {
-		0,   0,
-		1,   1
-	});
+    set_values<unsigned char>(mask, {
+        0,   0,
+        128,   255
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	uint16_t answers[4] = {
-		0,  2,
-		2,   0
-	};
+    uint16_t answers[4] = {
+        0,  2,
+        2,   0
+    };
 
-	auto output_ptr = output.pointer<uint16_t>();
+    auto output_ptr = output.pointer<uint16_t>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
 }
 
 // select_gpu_i8
 TEST(select_gpu_i8, select_basic_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<char>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<char>(input, {
+        1,   0,
+        2,   0
+    });
 
-	set_values<char>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<char>(input2, {
+        0,   2,
+        5,   7
+    });
 
-	set_values<char>(mask, {
-		0,   0,
-		3,   5
-	});
+    set_values<char>(mask, {
+        0,   0,
+        3,   5
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	int answers[4] = {
-		0,  2,
-		2,  0
-	};
+    int answers[4] = {
+        0,  2,
+        2,  0
+    };
 
-	auto output_ptr = output.pointer<char>();
+    auto output_ptr = output.pointer<char>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_EQ(answers[i], output_ptr[i]);
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
 }
 
 TEST(select_gpu_i8, select_basic_mask_f32_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<char>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<char>(input, {
+        1,   0,
+        2,   0
+    });
 
-	set_values<char>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<char>(input2, {
+        0,   2,
+        5,   7
+    });
 
-	set_values<float>(mask, {
-		0.f,   0.f,
-		1.5f,  0.4f
-	});
+    set_values<float>(mask, {
+        0.f,   0.f,
+        1.5f,  0.4f
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	int answers[4] = {
-		0,  2,
-		2,  0
-	};
+    int answers[4] = {
+        0,  2,
+        2,  0
+    };
 
-	auto output_ptr = output.pointer<char>();
+    auto output_ptr = output.pointer<char>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_EQ(answers[i], output_ptr[i]);
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
 }
 
 TEST(select_gpu_i8, select_basic_mask_f16_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<char>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<char>(input, {
+        1,   0,
+        2,   0
+    });
 
-	set_values<char>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<char>(input2, {
+        0,   2,
+        5,   7
+    });
 
-	set_values<uint16_t>(mask, {
-		0,   0,
-		3,   5
-	});
+    set_values<uint16_t>(mask, {
+        0,   0,
+        3,   5
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	int answers[4] = {
-		0,  2,
-		2,  0
-	};
+    int answers[4] = {
+        0,  2,
+        2,  0
+    };
 
-	auto output_ptr = output.pointer<char>();
+    auto output_ptr = output.pointer<char>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_EQ(answers[i], output_ptr[i]);
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
 }
 
 TEST(select_gpu_i8, select_basic_mask_u8_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<char>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<char>(input, {
+        1,   0,
+        2,   0
+    });
 
-	set_values<char>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<char>(input2, {
+        0,   2,
+        5,   7
+    });
 
-	set_values<char>(mask, {
-		0,   0,
-		3,   5
-	});
+    set_values<unsigned char>(mask, {
+        0,   0,
+        128,   255
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	int answers[4] = {
-		0,  2,
-		2,  0
-	};
+    int answers[4] = {
+        0,  2,
+        2,  0
+    };
 
-	auto output_ptr = output.pointer<char>();
+    auto output_ptr = output.pointer<char>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_EQ(answers[i], output_ptr[i]);
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
 }
 
 // select_gpu_u8
 TEST(select_gpu_u8, select_basic_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<char>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<unsigned char>(input, {
+        128,   0,
+        255,   0
+    });
 
-	set_values<char>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<unsigned char>(input2, {
+        0,   255,
+        205,   128
+    });
 
-	set_values<char>(mask, {
-		0,   0,
-		1,   1
-	});
+    set_values<unsigned char>(mask, {
+        0,   0,
+        128,   255
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	int answers[4] = {
-		0,  2,
-		2,  0
-	};
+    unsigned char answers[4] = {
+        0,  255,
+        255,  0
+    };
 
-	auto output_ptr = output.pointer<char>();
+    auto output_ptr = output.pointer<unsigned char>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_EQ(answers[i], output_ptr[i]);
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
 }
 
 TEST(select_gpu_u8, select_basic_mask_f32_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<char>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<unsigned char>(input, {
+        128,   0,
+        255,   0
+    });
 
-	set_values<char>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<unsigned char>(input2, {
+        0,   255,
+        205,   128
+    });
 
-	set_values<float>(mask, {
-		0.f,   0.f,
-		1.5f,  0.4f
-	});
+    set_values<float>(mask, {
+        0.f,   0.f,
+        1.5f,  0.4f
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	int answers[4] = {
-		0,  2,
-		2,  0
-	};
+    int answers[4] = {
+        0,  255,
+        255,  0
+    };
 
-	auto output_ptr = output.pointer<char>();
+    auto output_ptr = output.pointer<unsigned char>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_EQ(answers[i], output_ptr[i]);
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
 }
 
 TEST(select_gpu_u8, select_basic_mask_f16_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<char>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<unsigned char>(input, {
+        128,   0,
+        255,   0
+    });
 
-	set_values<char>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<unsigned char>(input2, {
+        0,   255,
+        205,   128
+    });
 
-	set_values<uint16_t>(mask, {
-		0,   0,
-		1,   1
-	});
+    set_values<uint16_t>(mask, {
+        0,   0,
+        1,   1
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	int answers[4] = {
-		0,  2,
-		2,  0
-	};
+    unsigned char answers[4] = {
+        0,  255,
+        255,  0
+    };
 
-	auto output_ptr = output.pointer<char>();
+    auto output_ptr = output.pointer<unsigned char>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_EQ(answers[i], output_ptr[i]);
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
 }
 
 TEST(select_gpu_u8, select_basic_mask_i8_1x1x2x2) {
-	engine engine;
+    const auto& engine = get_test_engine();
 
-	auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
-	auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto input2 = memory::allocate(engine, { data_types::u8, format::yxfb,{ 1, 1, 2, 2 } });
+    auto mask = memory::allocate(engine, { data_types::i8, format::yxfb,{ 1, 1, 2, 2 } });
 
-	topology topology;
-	topology.add(input_layout("input", input.get_layout()));
-	topology.add(input_layout("input2", input2.get_layout()));
-	topology.add(input_layout("mask", mask.get_layout()));
-	topology.add(cldnn::select("select", "input", "input2", "mask"));
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(input_layout("input2", input2.get_layout()));
+    topology.add(input_layout("mask", mask.get_layout()));
+    topology.add(cldnn::select("select", "input", "input2", "mask"));
 
-	set_values<char>(input, {
-		1,   0,
-		2,   0
-	});
+    set_values<unsigned char>(input, {
+        128,   0,
+        255,   0
+    });
 
-	set_values<char>(input2, {
-		0,   2,
-		5,   7
-	});
+    set_values<unsigned char>(input2, {
+        0,   255,
+        205,   128
+    });
 
-	set_values<char>(mask, {
-		0,   0,
-		1,   1
-	});
+    set_values<char>(mask, {
+        0,   0,
+        1,   1
+    });
 
-	network network(engine, topology);
+    network network(engine, topology);
 
-	network.set_input_data("input", input);
-	network.set_input_data("input2", input2);
-	network.set_input_data("mask", mask);
-	auto outputs = network.execute();
+    network.set_input_data("input", input);
+    network.set_input_data("input2", input2);
+    network.set_input_data("mask", mask);
+    auto outputs = network.execute();
 
-	auto output = outputs.at("select").get_memory();
+    auto output = outputs.at("select").get_memory();
 
-	int answers[4] = {
-		0,  2,
-		2,  0
-	};
+    unsigned char answers[4] = {
+        0,  255,
+        255,  0
+    };
 
-	auto output_ptr = output.pointer<char>();
+    auto output_ptr = output.pointer<unsigned char>();
 
-	for (int i = 0; i < 4; i++)
-	{
-		EXPECT_EQ(answers[i], output_ptr[i]);
-	}
+    for (int i = 0; i < 4; i++)
+    {
+        EXPECT_EQ(answers[i], output_ptr[i]);
+    }
 }
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/shuffle_channels_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/shuffle_channels_test.cpp
new file mode 100644
index 000000000..630b3b8a0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/shuffle_channels_test.cpp
@@ -0,0 +1,386 @@
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+
+#include <api/CPP/input_layout.hpp>
+#include <api/CPP/memory.hpp>
+#include <api/CPP/shuffle_channels.hpp>
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+
+#include <cstddef>
+#include <tests/test_utils/test_utils.h>
+
+using namespace cldnn;
+using namespace ::tests;
+
+TEST(shuffle_channels_fp32_gpu, d1_15_2_2_ax1_g5) {
+    engine engine;
+
+    auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 15, 2, 2 } });
+    int32_t axis = 1;
+    int32_t group = 5;
+
+    set_values(input0, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f,
+            30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
+            40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
+            50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input0.get_layout()));
+    topology.add(
+            shuffle_channels("shuffle_channels", "Input0", group, axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input0);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("shuffle_channels").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f,
+            4.f, 5.f, 6.f, 7.f, 16.f, 17.f, 18.f, 19.f, 28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f, 52.f, 53.f, 54.f, 55.f,
+            8.f, 9.f, 10.f, 11.f, 20.f, 21.f, 22.f, 23.f, 32.f, 33.f, 34.f, 35.f, 44.f, 45.f, 46.f, 47.f, 56.f, 57.f, 58.f, 59.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+
+TEST(shuffle_channels_fp32_gpu, d1_15_2_2_axm3_g5) {
+    engine engine;
+
+    auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 15, 2, 2 } });
+    int32_t axis = -3;
+    int32_t group = 5;
+
+    set_values(input0, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f,
+            30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
+            40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
+            50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input0.get_layout()));
+    topology.add(
+            shuffle_channels("shuffle_channels", "Input0", group, axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input0);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("shuffle_channels").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f,
+            4.f, 5.f, 6.f, 7.f, 16.f, 17.f, 18.f, 19.f, 28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f, 52.f, 53.f, 54.f, 55.f,
+            8.f, 9.f, 10.f, 11.f, 20.f, 21.f, 22.f, 23.f, 32.f, 33.f, 34.f, 35.f, 44.f, 45.f, 46.f, 47.f, 56.f, 57.f, 58.f, 59.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(shuffle_channels_fp32_gpu, d15_2_2_ax0_g5) {
+    engine engine;
+
+    auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 15, 2, 1, 2 } });
+    int32_t axis = 0;
+    int32_t group = 5;
+
+    set_values(input0, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f,
+            30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
+            40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
+            50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input0.get_layout()));
+    topology.add(
+            shuffle_channels("shuffle_channels", "Input0", group, axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input0);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("shuffle_channels").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f,
+            4.f, 5.f, 6.f, 7.f, 16.f, 17.f, 18.f, 19.f, 28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f, 52.f, 53.f, 54.f, 55.f,
+            8.f, 9.f, 10.f, 11.f, 20.f, 21.f, 22.f, 23.f, 32.f, 33.f, 34.f, 35.f, 44.f, 45.f, 46.f, 47.f, 56.f, 57.f, 58.f, 59.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(shuffle_channels_fp32_gpu, d15_2_2_axm4_g5) {
+    engine engine;
+
+    auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 15, 2, 1, 2 } });
+    int32_t axis = -4;
+    int32_t group = 5;
+
+    set_values(input0, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f,
+            30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
+            40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
+            50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input0.get_layout()));
+    topology.add(
+            shuffle_channels("shuffle_channels", "Input0", group, axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input0);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("shuffle_channels").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.f, 1.f, 2.f, 3.f, 12.f, 13.f, 14.f, 15.f, 24.f, 25.f, 26.f, 27.f, 36.f, 37.f, 38.f, 39.f, 48.f, 49.f, 50.f, 51.f,
+            4.f, 5.f, 6.f, 7.f, 16.f, 17.f, 18.f, 19.f, 28.f, 29.f, 30.f, 31.f, 40.f, 41.f, 42.f, 43.f, 52.f, 53.f, 54.f, 55.f,
+            8.f, 9.f, 10.f, 11.f, 20.f, 21.f, 22.f, 23.f, 32.f, 33.f, 34.f, 35.f, 44.f, 45.f, 46.f, 47.f, 56.f, 57.f, 58.f, 59.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g3) {
+    engine engine;
+
+    auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 6 } });
+    int32_t axis = -2;
+    int32_t group = 3;
+
+    set_values(input0, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input0.get_layout()));
+    topology.add(
+            shuffle_channels("shuffle_channels", "Input0", group, axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input0);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("shuffle_channels").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.f, 2.f, 4.f, 1.f, 3.f, 5.f, 6.f, 8.f, 10.f, 7.f, 9.f, 11.f,
+            12.f, 14.f, 16.f, 13.f, 15.f, 17.f, 18.f, 20.f, 22.f, 19.f, 21.f, 23.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g3) {
+    engine engine;
+
+    auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 6, 1, 2 } });
+    int32_t axis = -3;
+    int32_t group = 3;
+
+    set_values(input0, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input0.get_layout()));
+    topology.add(
+            shuffle_channels("shuffle_channels", "Input0", group, axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input0);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("shuffle_channels").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.f, 1.f, 4.f, 5.f, 8.f, 9.f, 2.f, 3.f, 6.f, 7.f, 10.f, 11.f,
+            12.f, 13.f, 16.f, 17.f, 20.f, 21.f, 14.f, 15.f, 18.f, 19.f, 22.f, 23.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(shuffle_channels_fp32_gpu, d2_2_6_axm2_g2) {
+    engine engine;
+
+    auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 6 } });
+    int32_t axis = -2;
+    int32_t group = 2;
+
+    set_values(input0, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input0.get_layout()));
+    topology.add(
+            shuffle_channels("shuffle_channels", "Input0", group, axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input0);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("shuffle_channels").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.f, 3.f, 1.f, 4.f, 2.f, 5.f, 6.f, 9.f, 7.f, 10.f, 8.f, 11.f,
+            12.f, 15.f, 13.f, 16.f, 14.f, 17.f, 18.f, 21.f, 19.f, 22.f, 20.f, 23.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(shuffle_channels_fp32_gpu, d2_6_2_axm3_g2) {
+    engine engine;
+
+    auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 6, 1, 2 } });
+    int32_t axis = -3;
+    int32_t group = 2;
+
+    set_values(input0, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input0.get_layout()));
+    topology.add(
+            shuffle_channels("shuffle_channels", "Input0", group, axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input0);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("shuffle_channels").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.f, 1.f, 6.f, 7.f, 2.f, 3.f, 8.f, 9.f, 4.f, 5.f, 10.f, 11.f,
+            12.f, 13.f, 18.f, 19.f, 14.f, 15.f, 20.f, 21.f, 16.f, 17.f, 22.f, 23.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
+
+TEST(shuffle_channels_fp32_gpu, d6_axm0_g2) {
+    engine engine;
+
+    auto input0 = memory::allocate(engine, { data_types::f32, format::bfyx, { 6, 1, 1, 1 } });
+    int32_t axis = 0;
+    int32_t group = 2;
+
+    set_values(input0, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f
+    });
+
+    topology topology;
+    topology.add(input_layout("Input0", input0.get_layout()));
+    topology.add(
+            shuffle_channels("shuffle_channels", "Input0", group, axis)
+    );
+
+    network network(engine, topology);
+
+    network.set_input_data("Input0", input0);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("shuffle_channels").get_memory();
+    auto output_ptr = output.pointer<float>();
+
+    std::vector<float> expected_results = {
+            0.f, 3.f, 1.f, 4.f, 2.f, 5.f
+    };
+
+    for (size_t i = 0; i < expected_results.size(); ++i) {
+        EXPECT_EQ(expected_results[i], output_ptr[i]);
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp
index 2a1802c4a..a68500862 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_gpu_test.cpp
@@ -41,12 +41,13 @@ public:
     float out_buffer[out_size];
     float expected_buffer[out_size];
 
-    cldnn::engine engine;
+    const cldnn::engine& engine;
     cldnn::memory input;
+
     //neural::primitive output = memory::allocate({ memory::format::xb_f32, {output_b, {{output_x}}, 1}});
 
     softmax_gpu_xb_f32_test_fixture()
-        :engine()
+        : engine(get_test_engine())
         ,input(memory::allocate(engine, { data_types::f32, format::yxfb, { input_b, 1, input_x, 1}}))
     {}
 
@@ -191,7 +192,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_fyx) {
     //  Input  : 2x3x2x2
     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
     topology topology;
@@ -264,7 +265,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_y) {
     //  Input  : 2x3x2x2
     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
     topology topology;
@@ -359,7 +360,7 @@ TEST(softmax_gpu_bfyx_f32, normalize_f) {
     //  Input  : 2x3x2x2
     static const int32_t x_size = 2, y_size = 2, feature_num = 3,
         batch_num = 2, buf_size = x_size*y_size * batch_num * feature_num;
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ batch_num, feature_num, x_size , y_size } });
     topology topology;
@@ -447,7 +448,7 @@ TEST(softmax_gpu_yxfb_f32, normalize_f) {
 
     static const int32_t x_size = 1, y_size = 2, feature_num = 1,
         batch_num = 12, buf_size = x_size*y_size * batch_num * feature_num;
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ batch_num, feature_num, y_size , x_size } });
     topology topology;
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_loss_grad_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_loss_grad_gpu_test.cpp
index 6c5a5efed..302ca0b74 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_loss_grad_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/softmax_loss_grad_gpu_test.cpp
@@ -31,7 +31,7 @@ using namespace tests;
 
 TEST(softmax_loss_grad_f32_fw_gpu, basic1) {
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 4, 1 } });
     auto labels = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 1, 1, 1 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/split_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/split_gpu_test.cpp
index 2df001879..921c382ce 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/split_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/split_gpu_test.cpp
@@ -23,8 +23,12 @@
 #include <api/CPP/topology.hpp>
 #include <api/CPP/network.hpp>
 #include <api/CPP/engine.hpp>
+#include <api/CPP/reorder.hpp>
 #include "test_utils/test_utils.h"
 
+#include <sstream>
+#include <iomanip>
+
 using namespace cldnn;
 using namespace tests;
 
@@ -55,6 +59,300 @@ void check_feature_map(cldnn::pointer<T> output_ptr, std::vector<T> &input_vec,
     }
 }
 
+template<typename T>
+void split_test(int batch_num, int feature_num, int x_size, int y_size, std::vector<cldnn::tensor> split_offsets)
+{
+    const auto& engine = get_test_engine();
+    cldnn::tensor reference_input_size = { batch_num, feature_num, x_size, y_size };
+
+    cldnn::memory input = memory::allocate(engine, { type_to_data_type<T>::value, format::bfyx, reference_input_size });
+    std::vector<std::pair<primitive_id, cldnn::tensor> > input_ids_offsets;
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    
+    // lambda exoression to create the primitive id for the splits
+    auto create_split_id = [](size_t splitNum) {
+        std::stringstream ss;
+        ss << std::setw(5) << std::setfill('0') << splitNum;
+
+        return ss.str();
+    };
+
+    // Create the splits with the split ids for the topology
+    for (size_t splitNum = 0; splitNum < split_offsets.size(); splitNum++) 
+    {
+        input_ids_offsets.push_back({ create_split_id(splitNum), split_offsets[splitNum]});
+    }
+
+    topology.add(split("split", "input", input_ids_offsets));
+
+    std::vector<T> input_vec = generate_random_input<T>(batch_num, feature_num, y_size, x_size, -10, 10);
+    set_values(input, input_vec);
+
+    network network(engine, topology);
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    // The number of splits should match the expected number of splits
+    EXPECT_EQ(outputs.size(), size_t(split_offsets.size()));
+    
+    std::vector<cldnn::tensor> expected_sizes;
+    for (size_t splitNum = 0; splitNum < split_offsets.size(); splitNum++)  // Calculate the expected sizes
+    {
+        cldnn::tensor size;
+
+        if (splitNum < (split_offsets.size() - 1))
+        {
+            size = split_offsets[splitNum + 1] - split_offsets[splitNum];
+        }
+        else
+        {
+            size = reference_input_size - split_offsets[splitNum];
+        }
+
+        // For all the other dimensions, copy from the split_input
+        for (int dimension = 0; dimension < CLDNN_TENSOR_DIM_MAX; dimension++)
+        {
+            size.raw[dimension]
+                = (size.raw[dimension] == 0) ? reference_input_size.raw[dimension] : size.raw[dimension];
+        }
+
+        expected_sizes.push_back(size);
+    }
+
+    pointer<T> input_ptr = input.pointer<T>();
+
+    for (size_t splitNum = 0; splitNum < split_offsets.size(); splitNum++)
+    {
+        primitive_id split_id = "split:" + create_split_id(splitNum);
+        cldnn::memory output = outputs.at(split_id).get_memory();
+        auto prim = output.get_layout();
+        EXPECT_EQ(prim.size, expected_sizes[splitNum]);
+        auto output_ptr = output.pointer<T>();
+
+        // Output tensor size
+        auto output_batch = prim.size.batch[0];
+        auto output_feature = prim.size.feature[0];
+        auto output_x = prim.size.spatial[0];
+        auto output_y = prim.size.spatial[1];
+
+        // Input offsets, starting from which we will compare the output
+        auto input_batch_offset = split_offsets[splitNum].batch[0];
+        auto input_feature_offset = split_offsets[splitNum].feature[0];
+        auto input_y_offset = split_offsets[splitNum].spatial[1];
+        auto input_x_offset = split_offsets[splitNum].spatial[0];
+        
+        // iterator to iterate through input buffer
+        auto input_batch_itr = input_batch_offset;
+        auto input_feature_itr = input_feature_offset;
+        auto input_y_itr = input_y_offset;
+        auto input_x_itr = input_x_offset;
+        
+        for (auto b = 0; b < output_batch; ++b) {  // B
+            
+                // reset the input feature iterator
+            input_feature_itr = input_feature_offset; 
+            for (auto f = 0; f < output_feature; f++) {  // F
+                
+                // reset the input y iterator
+                input_y_itr = input_y_offset;  
+                for (auto y = 0; y < output_y; y++) {  // Y
+                    
+                    // reset the input x iterator
+                    input_x_itr = input_x_offset;  
+                    for (auto x = 0; x < output_x; x++) {  // X
+                        auto linear_id = input_x_itr + x_size * (input_y_itr + y_size * (input_feature_itr + feature_num * input_batch_itr)); // index in input
+                        auto output_linear_id = x + output_x * (y + output_y * (f + output_feature * b)); // index in output
+                        EXPECT_EQ(output_ptr[output_linear_id], input_vec[linear_id]);
+                        input_x_itr++;  // update the input x iterator
+                    }
+                    input_y_itr++;  // update the input y iterator
+                }
+                input_feature_itr++;  // update the input feature iterator
+            }
+            input_batch_itr++;  // update the input batch iterator
+        }
+    }
+}
+
+TEST(split_gpu, split_1d_uneven_2_splits) {
+
+    //  Input      : 2x4x3x3
+    //  Output1    : 2x1x3x3
+    //  Output2    : 2x3x3x3
+    //  Split params:
+    //  id: "out0", offsets: { 0, 0, 0, 0 }
+    //  id: "out1", offsets: { 0, 1, 0, 0 }
+
+    auto batch_num = 2;
+    auto feature_num = 4;
+    auto x_size = 3;
+    auto y_size = 3;
+    std::vector<cldnn::tensor> split_offsets = {
+                                                {0, 0, 0, 0},
+                                                {0, 1, 0, 0}                                                
+                                               };
+
+    split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
+}
+
+
+TEST(split_gpu, basic_split_concat_optimization) {
+
+    const auto& engine = get_test_engine();
+
+    auto input = memory::allocate(engine, { data_types::f32,format::bfyx,{ 1, 25, 1, 256 } });
+    tests::set_random_values<float>(input);
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    std::vector<std::pair<primitive_id, tensor>> offsets;
+    std::vector<primitive_id> ids;
+    for (int i = 0; i < 25; i++)
+    {
+        auto id = "crop_" + std::to_string(i);
+        ids.push_back("split:" + id);
+        offsets.push_back({ id, {0, i, 0, 0} });
+    }
+
+    topology.add(split("split", "input", offsets));
+    topology.add(concatenation("concat", ids, concatenation::along_f));
+    topology.add(reorder("output", "concat", format::bfyx, data_types::f32));
+
+    build_options opts;
+    opts.set_option(build_option::optimize_data(true));
+    network network(engine, topology, opts);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    auto output = outputs.at("output").get_memory();
+    auto output_ptr = output.pointer<float>();
+    auto input_ptr = input.pointer<float>();
+
+    for (int i = 0; i < 25*256; ++i)
+    {
+        EXPECT_EQ(output_ptr[i], input_ptr[i]);
+    }
+}
+
+TEST(split_gpu, split_1d_uneven_3_splits) {
+
+    //  Input      : 2x8x3x3
+    //  Output1    : 2x1x3x3
+    //  Output2    : 2x3x3x3
+    //  Output3    : 2x4x3x3
+    //  Split params:
+    //  id: "out0", offsets: { 0, 0, 0, 0 }
+    //  id: "out1", offsets: { 0, 1, 0, 0 }
+    //  id: "out2", offsets: { 0, 4, 0, 0 }
+
+    auto batch_num = 2;
+    auto feature_num = 8;
+    auto x_size = 3;
+    auto y_size = 3;
+    std::vector<cldnn::tensor> split_offsets = {
+                                                {0, 0, 0, 0},
+                                                {0, 1, 0, 0},
+                                                {0, 4, 0, 0},
+                                               };
+
+    split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
+}
+
+TEST(split_gpu, split_2d_uneven_2_splits) {
+
+    //  Input      : 2x8x10x3
+    //  Output1    : 2x1x4x3
+    //  Output2    : 2x3x6x3
+    //  Split params:
+    //  id: "out0", offsets: { 0, 0, 0, 0 }
+    //  id: "out1", offsets: { 0, 1, 4, 0 }
+
+    auto batch_num = 2;
+    auto feature_num = 8;
+    auto x_size = 10;
+    auto y_size = 3;
+    std::vector<cldnn::tensor> split_offsets = {
+                                                {0, 0, 0, 0},
+                                                {0, 1, 4, 0}
+                                               };
+
+    split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
+}
+
+TEST(split_gpu, split_2d_uneven_3_split3) {
+
+    //  Input      : 2x8x10x3
+    //  Output1    : 2x1x4x3
+    //  Output2    : 2x3x3x3
+    //  Output3    : 2x4x3x3
+    //  Split params:
+    //  id: "out0", offsets: { 0, 0, 0, 0 }
+    //  id: "out1", offsets: { 0, 1, 4, 0 }
+    //  id: "out2", offsets: { 0, 4, 7, 0 }
+
+    auto batch_num = 2;
+    auto feature_num = 8;
+    auto x_size = 10;
+    auto y_size = 3;
+    std::vector<cldnn::tensor> split_offsets = {
+                                                {0, 0, 0, 0},
+                                                {0, 1, 4, 0},
+                                                {0, 4, 7, 0},
+                                               };
+
+    split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
+}
+
+TEST(split_gpu, split_3d_uneven_2_splits) {
+
+    //  Input      : 2x8x10x3
+    //  Output1    : 2x1x4x1
+    //  Output2    : 2x7x6x2
+    //  Split params:
+    //  id: "out0", offsets: { 0, 0, 0, 0 }
+    //  id: "out1", offsets: { 0, 1, 4, 1 }
+
+    auto batch_num = 2;
+    auto feature_num = 8;
+    auto x_size = 10;
+    auto y_size = 3;
+    std::vector<cldnn::tensor> split_offsets = {
+                                                {0, 0, 0, 0},
+                                                {0, 1, 4, 1}
+                                               };
+
+    split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
+}
+
+TEST(split_gpu, split_3d_uneven_3_splits) {
+
+    //  Input      : 2x8x10x5
+    //  Output1    : 2x1x4x1
+    //  Output2    : 2x6x4x1
+    //  Output3    : 2x1x2x1
+    //  Split params:
+    //  id: "out0", offsets: { 0, 0, 0, 0 }
+    //  id: "out1", offsets: { 0, 1, 4, 1 }
+    //  id: "out2", offsets: { 0, 7, 8, 2 }
+
+    auto batch_num = 2;
+    auto feature_num = 8;
+    auto x_size = 10;
+    auto y_size = 3;
+    std::vector<cldnn::tensor> split_offsets = {
+                                                {0, 0, 0, 0},
+                                                {0, 1, 4, 1},
+                                                {0, 7, 8, 2}
+                                               };
+
+    split_test<float>(batch_num, feature_num, x_size, y_size, split_offsets);
+}
+
 TEST(split_gpu, basic_in2x3x2x2_split_feature_bfyx) {
     //  Input      : 6x3x4x3
     //  3 x Outputs: 6x1x4x3
@@ -63,7 +361,7 @@ TEST(split_gpu, basic_in2x3x2x2_split_feature_bfyx) {
     //  id: "out1", offsets: { 0, 1, 0, 0 }
     //  id: "out2", offsets: { 0, 2, 0, 0 }
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 6;
     auto feature_num = 3;
@@ -110,7 +408,7 @@ TEST(split_gpu, basic_in2x3x2x2_split_scale_feature_bfyx) {
     //  id: "out2", offsets: { 0, 2, 0, 0 }
     //  Additional scale layer at the end
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto batch_num = 6;
     auto feature_num = 3;
@@ -143,7 +441,7 @@ TEST(split_gpu, basic_in2x3x2x2_split_scale_feature_bfyx) {
     set_values(scale_input1, scale_input_vec1);
     std::vector<float> scale_input_vec2 = { 3.f };
     set_values(scale_input2, scale_input_vec2);
-
+   
     std::vector<float> input_vec = generate_random_input<float>(batch_num, feature_num, y_size, x_size, -10, 10);
     set_values(input, input_vec);
 
@@ -165,4 +463,4 @@ TEST(split_gpu, basic_in2x3x2x2_split_scale_feature_bfyx) {
         auto output_ptr = output.pointer<float>();
         check_feature_map<float>(output_ptr, input_vec, batch_num, feature_num, y_size, x_size, i, i + 1);
     }
-}
-\ No newline at end of file
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp
new file mode 100644
index 000000000..c673071f2
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/strided_slice_gpu_test.cpp
@@ -0,0 +1,375 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+#include <gtest/gtest.h>
+#include <api/CPP/input_layout.hpp>
+#include "api/CPP/strided_slice.hpp"
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+#include <api/CPP/engine.hpp>
+#include "test_utils/test_utils.h"
+#include <api/CPP/data.hpp>
+
+
+using namespace cldnn;
+using namespace tests;
+
+
+TEST(strided_slice_gpu_f32, test_2x2x2x2) {
+    // Input (BFYX): 2x2x2x2
+    // Begin (BFYX): 0x0x0x0
+    // End (BFYX): 2x2x2x2
+    // Stride (BFYX): 1x1x1x1
+    // Output (BFYX): 2x2x2x2
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+            9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
+    });
+    set_values(begin, {
+            0, 0, 0, 0
+    });
+    set_values(end, {
+            2, 2, 2, 2
+    });
+    set_values(strides, {
+            1, 1, 1, 1
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("input2", begin));
+    topology.add(data("input3", end));
+    topology.add(data("input4", strides));
+    topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, {}, {}, {}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = {
+            0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(strided_slice_gpu_f32, test_2x2x2x2_2) {
+    // Input (BFYX): 2x2x2x2
+    // Begin (BFYX): 1x1x1x1
+    // End (BFYX): 2x2x2x2
+    // Stride (BFYX): 1x1x1x1
+    // Output (BFYX): 1x1x1x1
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
+    auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+            9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
+    });
+    set_values(begin, {
+            1, 1, 1, 1
+    });
+    set_values(end, {
+            2, 2, 2, 2
+    });
+    set_values(strides, {
+            1, 1, 1, 1
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("input2", begin));
+    topology.add(data("input3", end));
+    topology.add(data("input4", strides));
+    topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, {}, {}, {}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = { 15.f };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(strided_slice_gpu_f32, test_2x2x4x3) {
+    // Input (BFYX): 2x2x4x3
+    // Begin (BFYX): 0x0x0x0
+    // End (BFYX): 2x2x4x3
+    // Stride (BFYX): 1x1x2x1
+    // Output (BFYX): 2x2x2x3
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 4 } });
+    auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+
+    set_values(input, {
+            0.f, 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f,
+            9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f, 17.f,
+            18.f, 19.f, 20.f, 21.f, 22.f, 23.f, 24.f, 25.f, 26.f,
+            27.f, 28.f, 29.f, 30.f, 31.f, 32.f, 33.f, 34.f, 35.f,
+            36.f, 37.f, 38.f, 39.f, 40.f, 41.f, 42.f, 43.f, 44.f,
+            45.f, 46.f, 47.f
+    });
+    set_values(begin, {
+            0, 0, 0, 0
+    });
+    set_values(end, {
+            2, 2, 4, 3
+    });
+    set_values(strides, {
+            1, 1, 2, 1
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("input2", begin));
+    topology.add(data("input3", end));
+    topology.add(data("input4", strides));
+    topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, {}, {}, {}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = {
+            0.f, 1.f, 2.f, 6.f, 7.f, 8.f, 12.f, 13.f, 14.f, 18.f, 19.f, 20.f,
+            24.f, 25.f, 26.f, 30.f, 31.f, 32.f, 36.f, 37.f, 38.f, 42.f, 43.f, 44.f
+    };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(strided_slice_gpu_f32, test_2x2x4x4) {
+    // Input (BFYX): 2x2x1x1
+    // Begin (BFYX): 1x0x0x1
+    // End (BFYX): 2x2x4x4
+    // Stride (BFYX): 1x1x1x2
+    // Output (BFYX): 1x2x2x3
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 4, 4 } });
+    auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+            20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f,
+            30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
+            40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
+            50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f,
+            60.0f, 61.0f, 62.0f, 63.0f
+    });
+    set_values(begin, {
+            1, 0, 0, 1
+    });
+    set_values(end, {
+            2, 2, 4, 4
+    });
+    set_values(strides, {
+            1, 1, 2, 1
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("input2", begin));
+    topology.add(data("input3", end));
+    topology.add(data("input4", strides));
+    topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, {}, {}, {}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = {
+            33.f, 34.f, 35.f, 41.f, 42.f, 43.f, 49.f, 50.f, 51.f, 57.f, 58.f, 59.f
+    };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(strided_slice_gpu_f32, test_2x2x4x1_new_axis_mask) {
+    // Input (BFYX): 2x2x4x1
+    // New_axis_mask: 1
+    // Output (BFYX): 1x2x2x4
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 4 } });
+    auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
+    });
+    set_values(begin, {
+            1, 0, 1, 0
+    });
+    set_values(end, {
+            2, 2, 4, 4
+    });
+    set_values(strides, {
+            1, 1, 1, 2
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("input2", begin));
+    topology.add(data("input3", end));
+    topology.add(data("input4", strides));
+    topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, { 1 }, {}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = {
+            0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+            10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f
+    };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
+
+TEST(strided_slice_gpu_f32, test_2x2x1x1_new_axis_mask_2) {
+    // Input (BFYX): 2x2x1x1
+    // New_axis_mask: 101
+    // Output (BFYX): 1x2x1x2
+
+    const auto& engine = get_test_engine();
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 1, 1 } });
+    auto begin = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto end = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+    auto strides = memory::allocate(engine, { data_types::i32, format::bfyx, { 4, 1, 1, 1 } });
+
+    set_values(input, {
+            0.0f, 1.0f, 2.0f, 3.0f
+    });
+    set_values(begin, {
+            1, 0, 1, 0
+    });
+    set_values(end, {
+            2, 2, 4, 4
+    });
+    set_values(strides, {
+            1, 1, 1, 2
+    });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("input2", begin));
+    topology.add(data("input3", end));
+    topology.add(data("input4", strides));
+    topology.add(strided_slice("strided_slice", "input", "input2", "input3", "input4", {}, {}, { 1, 0, 1 }, {}));
+
+    network network(engine, topology);
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), size_t(1));
+    EXPECT_EQ(outputs.begin()->first, "strided_slice");
+
+    auto output = outputs.at("strided_slice").get_memory();
+
+    std::vector<float> answers = {
+            0.0f, 1.0f, 2.0f, 3.0f
+    };
+
+    auto output_ptr = output.pointer<float>();
+
+    for (size_t i = 0; i < answers.size(); ++i)
+    {
+        EXPECT_TRUE(are_equal(answers[i], output_ptr[i]));
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/tile_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/tile_gpu_test.cpp
index 1edf8a986..0d49b04a0 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/tile_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/tile_gpu_test.cpp
@@ -69,7 +69,7 @@ void tile_ref(const memory& input, memory& output, tile::tile_axis axis, int num
 }
 
 TEST(tile_gpu, basic_in1x2x2x2_axis_b) {
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
     auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 2, 2 } });
@@ -99,7 +99,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_b) {
 }
 
 TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
     auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 4, 2, 2 } });
@@ -132,7 +132,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_f) {
 }
 
 TEST(tile_gpu, basic_in1x2x2x2_axis_y) {
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
     auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 4, 2 } });
@@ -165,7 +165,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_y) {
 }
 
 TEST(tile_gpu, basic_in1x2x2x2_axis_x) {
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 2 } });
     auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
@@ -197,7 +197,7 @@ TEST(tile_gpu, basic_in1x2x2x2_axis_x) {
 }
 
 TEST(tile_gpu, basic_in1x2x2x2_axis_x_dense) {
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 1 } });
     auto output_ref = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 2, 2, 4 } });
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp
index a5c06f208..3491933ce 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/topology_test.cpp
@@ -445,11 +445,11 @@ protected:
     cldnn::layout* output_layout;
     std::vector<unsigned> generator;
 
-    static cldnn::engine engine;
+    static const cldnn::engine& engine;
     static std::vector<cldnn::layout*> all_output_layouts;//just for tear-down
 };
 
-cldnn::engine topology_test::engine;
+const cldnn::engine& topology_test::engine = tests::get_test_engine();
 std::vector<cldnn::layout*> topology_test::all_output_layouts = {};
 
 std::vector<topology_test::topology_generator::topology_layer_type*> topology_test::topology_generator::layer_types = {
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/trim_to_outputs_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/trim_to_outputs_gpu_test.cpp
new file mode 100644
index 000000000..428881f29
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/trim_to_outputs_gpu_test.cpp
@@ -0,0 +1,200 @@
+/*
+// Copyright (c) 2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+#include <gtest/gtest.h>
+#include "api/CPP/memory.hpp"
+#include <api/CPP/input_layout.hpp>
+#include "api/CPP/concatenation.hpp"
+#include <api/CPP/topology.hpp>
+#include <api/CPP/network.hpp>
+#include <api/CPP/engine.hpp>
+#include <api/CPP/data.hpp>
+#include "test_utils/test_utils.h"
+
+using namespace cldnn;
+using namespace tests;
+
+/*
+    This set of tests has been designed to check the correctness of trim_to_outputs optimization pass
+*/
+
+
+/*
+   In this test we check if the convolution conv2 will be eliminated from the network. This is expected to be done in trim_to_outputs optimization pass
+
+   Network structure:  input  -> conv1 (output)
+                           \
+                            ---> conv2 (to be eliminated)
+*/
+TEST(trim_to_outputs, one_node_to_eliminate_case1) {
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    build_opt.set_option(cldnn::build_option::outputs({ "conv1" }));
+    build_opt.set_option(build_option::optimize_data(false));             // to avoid adding reorders
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb, { 1, 1, 1, 1 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
+    auto bias = memory::allocate(engine, { data_types::f32, format::bfyx, { 1, 1, 1, 1 } });
+
+    set_values(input, { 1.1f });
+    set_values(weights, { 2.1f });
+    set_values(bias, { 1.6f });
+
+    std::vector<float> out_data = { 3.91f };
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("weights", weights));
+    topology.add(data("bias", bias));
+    topology.add(cldnn::convolution("conv1", { "input" }, { "weights" }, { "bias" }));
+    topology.add(cldnn::convolution("conv2", { "input" }, { "weights" }, { "bias" }));
+
+    network network(engine, topology, build_opt);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), (size_t)1); // there is only one output
+    EXPECT_EQ(network.get_executed_primitives().size(), (size_t)2);   // input and conv1 where executed
+    EXPECT_EQ(network.get_all_primitive_ids().size(), (size_t)4);     // also bias and weights still exist
+
+    for (auto& it : outputs)
+    {
+        auto output_ptr = it.second.get_memory().pointer<float>();
+        for (size_t cntr = 0; cntr < out_data.size(); cntr++)
+        {
+            EXPECT_NEAR(output_ptr[cntr], out_data[cntr], 1e-4);
+        }
+        EXPECT_EQ(it.first, "conv1");
+    }
+}
+
+/*
+in this test we check if the convolution conv2 will be eliminated from the network. This is expected to be done in trim_to_outputs optimization pass
+
+Network structure:  input  -> conv1 (output)
+                        \
+                         ---> conv2 (to be eliminated along with its weights and bias)
+*/
+TEST(trim_to_outputs, one_node_to_eliminate_case2) {
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    build_opt.set_option(cldnn::build_option::outputs({ "conv1" }));
+    build_opt.set_option(build_option::optimize_data(false));             // to avoid adding reorders
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 1 } });
+    auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto bias1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto bias2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    set_values(input, { 1.1f });
+    set_values(weights1, { 2.1f });
+    set_values(bias1, { 1.6f });
+    set_values(weights2, { 0.3f });
+    set_values(bias2, { 0.2f });
+
+    std::vector<float> out_data = { 3.91f };
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("weights1", weights1));
+    topology.add(data("bias1", bias1));
+    topology.add(cldnn::convolution("conv1", { "input" }, { "weights1" }, { "bias1" }));
+    topology.add(data("weights2", weights2));
+    topology.add(data("bias2", bias2));
+    topology.add(cldnn::convolution("conv2", { "input" }, { "weights2" }, { "bias2" }));
+
+    network network(engine, topology, build_opt);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), (size_t)1); // there is only one output
+    EXPECT_EQ(network.get_executed_primitives().size(), (size_t)2);   // input and conv1 where executed
+    EXPECT_EQ(network.get_all_primitive_ids().size(), (size_t)4);     // also bias1 and weights1 still exist
+
+    for (auto& it : outputs)
+    {
+        auto output_ptr = it.second.get_memory().pointer<float>();
+
+        for (size_t cntr = 0; cntr < out_data.size(); cntr++)
+        {
+            EXPECT_NEAR(output_ptr[cntr], out_data[cntr], 1e-4);
+        }
+        EXPECT_EQ(it.first, "conv1");
+    }
+}
+
+/*
+in this test we check if the convolution conv2 will be eliminated from the network. This is expected to be done in trim_to_outputs optimization pass
+
+Network structure:  input ---> conv1 --- ---> conv4 (output)
+                        \
+                         --->  conv2  ---> conv3
+Convolutions conv2, conv3 should be optimized out along with weights23 shered by conv2 and conv3.
+*/
+TEST(trim_to_outputs, two_nodes_to_eliminate_case1) {
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    build_opt.set_option(cldnn::build_option::outputs({ "conv4" }));
+    build_opt.set_option(build_option::optimize_data(false));             // to avoid adding reorders
+
+    auto input = memory::allocate(engine, { data_types::f32, format::yxfb,{ 1, 1, 1, 1 } });
+    auto weights1 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto weights23 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto weights4 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+    auto bias = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 1, 1 } });
+
+    set_values(input, { 1.1f });
+    set_values(weights1, { 2.1f });
+    set_values(weights23, { 3.0f });
+    set_values(weights4, { 2.0f });
+    set_values(bias, { 1.6f });
+
+    std::vector<float> out_data = { 9.42f };
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("weights1", weights1));
+    topology.add(data("bias", bias));
+    topology.add(cldnn::convolution("conv1", { "input" }, { "weights1" }, { "bias" }));
+    topology.add(data("weights23", weights23));
+    topology.add(cldnn::convolution("conv2", { "input" }, { "weights23" }, { "bias" }));
+    topology.add(cldnn::convolution("conv3", { "conv2" }, { "weights23" }, { "bias" }));
+    topology.add(data("weights4", weights4));
+    topology.add(cldnn::convolution("conv4", { "conv1" }, { "weights4" }, { "bias" }));
+
+    network network(engine, topology, build_opt);
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    EXPECT_EQ(outputs.size(), (size_t)1); // there is only one output
+    EXPECT_EQ(network.get_executed_primitives().size(), (size_t)3);   // input, conv1 and conv4  where executed
+    EXPECT_EQ(network.get_all_primitive_ids().size(), (size_t)6);     // also bias weights1 and weights4 still exist
+
+    for (auto& it : outputs)
+    {
+        auto output_ptr = it.second.get_memory().pointer<float>();
+
+        for (size_t cntr = 0; cntr < out_data.size(); cntr++)
+        {
+            EXPECT_NEAR(output_ptr[cntr], out_data[cntr], 1e-4);
+        }
+        EXPECT_EQ(it.first, "conv4");
+    }
+}
+
diff --git a/inference-engine/thirdparty/clDNN/tests/test_cases/upsampling_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests/test_cases/upsampling_gpu_test.cpp
index eceece231..d0d76a816 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_cases/upsampling_gpu_test.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_cases/upsampling_gpu_test.cpp
@@ -41,7 +41,7 @@ TEST(upsampling_gpu, basic_in2x3x2x2_nearest) {
     //  f1: b0:  7    8  -16   b1:   12   9     -17
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx, { 2, 2, 3, 2 } });
 
@@ -112,7 +112,7 @@ TEST(upsampling_gpu, basic_in2x3x2x2_bilinear) {
     //  f0: b0:  3    4
     //
 
-    engine engine;
+    const auto& engine = get_test_engine();
 
     auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 1, 1, 2, 2 } });
 
diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.cpp b/inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.cpp
index e9c5ba27d..e90744fb7 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_utils/instrumentation.cpp
@@ -22,6 +22,7 @@
 #include <vector>
 #include <iostream>
 
+
 namespace instrumentation {
     // initalize dumping directory for whole run
     const std::string logger::dump_dir = DUMP_DIRECTORY;
@@ -303,7 +304,7 @@ namespace instrumentation {
         auto i_size = mem_arg.size.batch[0]; //batch = input feature map
         auto x_size = mem_arg.size.spatial[0]; // spatial_x = output feature map
         auto weights_size = mem_arg.size.count();
-        int xsv = 8, bsv = 8; 
+        int xsv = 8, bsv = 8;
         unsigned int input_it = 0, input_i_it= 0 , input_o_it = 0;
         for (cldnn::tensor::value_type it = 0; it < weights_size; it++)
         {
@@ -371,9 +372,10 @@ namespace instrumentation {
     }
 
     template <class T>
-    void dump(const cldnn::memory& mem, std::vector<std::vector<std::stringstream>>& streams)
+    void dump(const cldnn::memory& mem, std::vector<std::vector<std::string>>& dump_strings)
     {
         auto mem_ptr = mem.pointer<T>();
+        std::stringstream stream;
 
         auto&& pitches = mem.get_layout().get_pitches();
         auto&& size = mem.get_layout().size;
@@ -386,39 +388,40 @@ namespace instrumentation {
                     for (cldnn::tensor::value_type x = 0; x < size.spatial[0]; ++x)
                     {
                         unsigned int input_it = b*pitches.batch[0] + f*pitches.feature[0] + y*pitches.spatial[1] + x*pitches.spatial[0];
-                        streams[b][f] << convert_element(mem_ptr[input_it]) << " ";
+                        stream << convert_element(mem_ptr[input_it]) << " ";
                         input_it++;
                     }
-                    streams[b][f] << std::endl;
+                    stream << std::endl;
+                    dump_strings[b][f] = stream.str();
                 }
             }
         }
     }
 
     void logger::log_memory_to_file(const cldnn::memory& mem, std::string prefix, bool single_batch, cldnn::tensor::value_type batch_id, bool single_feature, cldnn::tensor::value_type feature_id)
-    {        
+    {
         auto batch = mem.get_layout().size.batch[0];
         auto feature = mem.get_layout().size.feature[0];
         auto eng_type =  "gpu" ;
-        std::vector<std::vector<std::stringstream>> streams(batch);
+        std::vector<std::vector<std::string>> dump_strings(batch);
         for(cldnn::tensor::value_type b = 0; b < batch; b++)
         {
-            streams[b].resize(feature);
+            dump_strings[b].resize(feature);
         }
 
         if (mem.get_layout().data_type == cldnn::data_types::f32)
-            dump<float>(mem, streams);
+            dump<float>(mem, dump_strings);
         else
-            dump<half_t>(mem, streams);
+            dump<half_t>(mem, dump_strings);
 
         for (cldnn::tensor::value_type b = 0; b < batch; b++)
             for (cldnn::tensor::value_type f = 0; f < feature; f++)
             {
-                if ((!single_batch || b == batch_id) && (!single_feature || f == feature_id))
+                if (!single_batch || (b == batch_id && f == feature_id))
                 {
                     std::string filename((dump_dir + "/" + prefix + "_" + eng_type + "_b" + std::to_string(b) + "_f" + std::to_string(f) + ".txt"));
-                    std::ofstream file_stream = std::ofstream(filename, std::ios::out);
-                    file_stream << streams[b][f].str();
+                    std::ofstream file_stream(filename);
+                    file_stream << dump_strings[b][f];
                     file_stream.close();
                 }
             }
diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.cpp b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.cpp
index ddc74677b..b46bedfaa 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.cpp
+++ b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.cpp
@@ -77,7 +77,7 @@ namespace tests
                     {
                         values.push_back(static_cast<float>(multipler + j));
                     }
-                    tests::set_values_per_batch_and_feature<float>(input_mems[i], generic_params->input_layouts[i], values);
+                    tests::set_values_per_batch_and_feature<float>(input_mems[i], values);
                     multipler = values.size();
                 }
                 else
@@ -87,7 +87,7 @@ namespace tests
                     {
                         values.push_back(FLOAT16(static_cast<float>(multipler + j)));
                     }
-                    tests::set_values_per_batch_and_feature<FLOAT16>(input_mems[i], generic_params->input_layouts[i], values);
+                    tests::set_values_per_batch_and_feature<FLOAT16>(input_mems[i], values);
                     multipler = values.size();
                 }        
             }                        
@@ -276,7 +276,7 @@ namespace tests
         return{ p, calc_offfset(layout, p) };
     }
 
-    size_t generic_test::get_linear_index(const layout & layout, size_t b, size_t f, size_t y, size_t x, const memory_desc& desc)
+    size_t generic_test::get_linear_index(const layout&, size_t b, size_t f, size_t y, size_t x, const memory_desc& desc)
     {
         return 
             desc.offset + 
@@ -309,7 +309,9 @@ namespace tests
         //{ format::yx,{ 8,8 } } , { format::yx,{ 9,9 } } , { format::yx,{ 10,10 } } , { format::yx,{ 11,11 } } , { format::yx,{ 12,12 } } , { format::yx,{ 13,13 } } ,
         //{ format::yx,{ 14,14 } } , { format::yx,{ 15,15 } } , { format::yx,{ 16,16 } } };
 
-        for (cldnn::data_types data_type : test_data_types())
+        auto data_types = test_data_types();
+
+        for (cldnn::data_types data_type : data_types)
         {
             for (cldnn::format fmt : test_input_formats)
             {
@@ -329,6 +331,12 @@ namespace tests
         return all_generic_params;
     }
 
+    const cldnn::engine & get_test_engine()
+    {
+        static const cldnn::engine engine;
+        return engine;
+    }
+
     const std::string test_dump::name() const
     {
         std::string temp = name_str;
@@ -377,8 +385,7 @@ namespace tests
         std::vector<cldnn::data_types> result;
         result.push_back(cldnn::data_types::f32);
         
-        cldnn::engine temp;
-        if(temp.get_info().supports_fp16)
+        if(get_test_engine().get_info().supports_fp16)
         {
             result.push_back(cldnn::data_types::f16);
         }
@@ -390,4 +397,4 @@ namespace tests
     std::vector<int32_t> generic_test::test_feature_sizes = { 1, 2 };// , 3, 15};
     std::vector<tensor> generic_test::test_input_sizes = { { 1, 1, 100, 100 } ,{ 1, 1, 277, 277 } ,{ 1, 1, 400, 600 } };
 
-}
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h
index aed214619..62892df2f 100644
--- a/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h
+++ b/inference-engine/thirdparty/clDNN/tests/test_utils/test_utils.h
@@ -21,6 +21,7 @@
 #include "api/CPP/memory.hpp"
 #include "api/CPP/tensor.hpp"
 #include "api/CPP/program.hpp"
+#include "api/CPP/network.hpp"
 #include <iostream>
 #include <limits>
 #include <random>
@@ -40,6 +41,8 @@
 #include "api/CPP/activation.hpp"
 #include "api/CPP/pooling.hpp"
 
+#include <chrono>
+
 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
 
 namespace tests {
@@ -79,7 +82,7 @@ inline VF<T> flatten_4d(cldnn::format input_format, VVVVF<T> &data) {
     size_t b = data[0].size();
     size_t c = data[0][0].size();
     size_t d = data[0][0][0].size();
-    VF<T> vec(a * b * c * d, 0.0f);
+    VF<T> vec(a * b * c * d, (T)(0.0f));
     size_t idx = 0;
 
     switch (input_format.value) {
@@ -91,6 +94,14 @@ inline VF<T> flatten_4d(cldnn::format input_format, VVVVF<T> &data) {
                             vec[idx++] = data[bi][fi][yi][xi];
             break;
         
+        case cldnn::format::fyxb:
+            for (size_t fi = 0; fi < b; ++fi)
+                for (size_t yi = 0; yi < c; ++yi)
+                    for (size_t xi = 0; xi < d; ++xi)
+                        for (size_t bi = 0; bi < a; ++bi)
+                            vec[idx++] = data[bi][fi][yi][xi];
+            break;
+
         case cldnn::format::bfyx:
             for (size_t bi = 0; bi < a; ++bi)
                 for (size_t fi = 0; fi < b; ++fi)
@@ -183,7 +194,7 @@ void set_values(const cldnn::memory& mem, std::vector<T> args) {
 }
 
 template<typename T>
-void set_values_per_batch_and_feature(const cldnn::memory& mem, const cldnn::layout& layout, std::vector<T> args)
+void set_values_per_batch_and_feature(const cldnn::memory& mem, std::vector<T> args)
 {
     auto mem_ptr = mem.pointer<T>();
     auto&& pitches = mem.get_layout().get_pitches();
@@ -219,6 +230,24 @@ void set_random_values(const cldnn::memory& mem, bool sign = false, unsigned sig
 }
 
 
+// Tries to construct a network, checking if an expected error appears
+inline void check_exception_massage(const cldnn::engine& engine, cldnn::topology& topology, std::string msg_to_find)
+{
+    try {
+        cldnn::network(engine, topology);
+    }
+    catch (std::exception & exc) {
+        std::string msg(exc.what());
+        if (msg.find(msg_to_find) != std::string::npos) {
+            throw;
+        }
+        else {
+            printf("%s\n", exc.what());
+        }
+    }
+}
+
+
 // Checks equality of floats.
 // For values less than absoulte_error_limit, absolute error will be counted
 // for others, the relatve error will be counted.
@@ -318,6 +347,8 @@ struct memory_desc
     size_t offset;
 };
 
+const cldnn::engine & get_test_engine();
+
 struct test_dump
 {
     const std::string name() const;
@@ -358,7 +389,7 @@ public:
     };
 
 protected:
-    cldnn::engine engine;
+    const cldnn::engine& engine = get_test_engine();
     test_params* generic_params;
     test_dump test_info;
     cldnn::primitive* layer_params;
@@ -422,7 +453,9 @@ inline void PrintTupleTo(const std::tuple<tests::test_params*, cldnn::primitive*
             << " Pooled width: " << p->pooled_width
             << " Pooled height: " << p->pooled_height
             << " Spatial scale: " << p->spatial_scale
-            << " Group size: " << p->group_sz;
+            << " Spatial bins x: " << p->spatial_bins_x
+            << " Spatial bins y: " << p->spatial_bins_y
+            << " Output dim: " << p->output_dim;
     }
     else if(primitive->type == cldnn::scale::type_id())
     {
@@ -437,7 +470,7 @@ inline void PrintTupleTo(const std::tuple<tests::test_params*, cldnn::primitive*
     else if (primitive->type == cldnn::reorder::type_id())
     {
         auto reorder = static_cast<cldnn::reorder*>(primitive);
-        str << "Output data type: " << cldnn::data_type_traits::name(reorder->output_data_type) << " Mean: " << reorder->mean << "Subtract per feature: " << "TODO" /*std::vector<float> subtract_per_feature*/;
+        str << "Output data type: " << cldnn::data_type_traits::name(*reorder->output_data_type) << " Mean: " << reorder->mean << "Subtract per feature: " << "TODO" /*std::vector<float> subtract_per_feature*/;
     }
     else if (primitive->type == cldnn::normalize::type_id())
     {
diff --git a/inference-engine/thirdparty/clDNN/tests_core_internal/CMakeLists.txt b/inference-engine/thirdparty/clDNN/tests_core_internal/CMakeLists.txt
new file mode 100644
index 000000000..cdc5811f8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests_core_internal/CMakeLists.txt
@@ -0,0 +1,311 @@
+# Copyright (c) 2019 Intel Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#      http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ====================================== Helper constant variables =====================================
+
+# Order of scan for special capabilities files (.inc files with capabilities description).
+set(CLDNN__CAPS_SCAN_ORDER
+    "private"
+    "internal"
+    "public"
+  )
+
+# ========================================= Name / Output settings =====================================
+
+set(CLDNN_BUILD__PROJ             "tests_core_internal")
+set(CLDNN_BUILD__PROJ_LABEL       "${CLDNN_BUILD__PROJ}")
+set(CLDNN_BUILD__PROJ_OUTPUT_NAME "${CLDNN_BUILD__PROJ}${CLDNN__OUT_CPU_SUFFIX}")
+
+# =========================================== Compiler options =========================================
+intel_config_flag_apply_settings(CompilerOptions CMAKE_CXX_FLAGS ALL_PATTERN ""
+    SET
+	  StandardCxx11
+      RttiEnabled
+  )
+
+if (NOT MSVC)
+  intel_config_flag_apply_settings(CompilerOptions CMAKE_CXX_FLAGS ALL_PATTERN ""
+      SET_RAW
+        "-Wno-error=conversion-null"
+        "-Wno-error=type-limits"
+        "-Wno-error=unused-variable"
+    )
+endif ()
+
+find_package(OpenMP)
+if (OPENMP_FOUND)
+    add_definitions(-DOPENMP_FOUND)
+    set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif()
+
+# ================================== Compiler preprocessor definitions =================================
+
+set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS
+    CLDNN_EXPORTS
+    EXPORT_NEURAL_SYMBOLS
+    "CLDNN_VERSION_MAJOR=${CLDNN__VERSION_MAJOR}"
+    "CLDNN_VERSION_MINOR=${CLDNN__VERSION_MINOR}"
+    "CLDNN_VERSION_BUILD=${CLDNN__VERSION_BUILD}"
+    "CLDNN_VERSION_REVISION=${CLDNN__VERSION_REVISION}"
+  )
+
+
+# ========================================= Source/Header files ========================================
+
+set(__CLDNN_Directory__clDNN_copy             "${CMAKE_CURRENT_SOURCE_DIR}/../src")
+set(__CLDNN_Label__clDNN_copy                 "clDNN")
+file(GLOB __CLDNN_Sources__clDNN_copy
+    "${__CLDNN_Directory__clDNN_copy}/*.h"
+    "${__CLDNN_Directory__clDNN_copy}/*.hpp"
+    "${__CLDNN_Directory__clDNN_copy}/*.cpp"
+    "${__CLDNN_Directory__clDNN_copy}/*.inc"
+  )
+
+set(__CLDNN_Label__api                 "${__CLDNN_Label__clDNN_copy}\\api")
+file(GLOB __CLDNN_Headers__api
+    "${CLDNN__API_DIR}/*.h"
+    "${CLDNN__API_DIR}/*.hpp"
+  )
+
+set(__CLDNN_Directory__api__cpp "${CLDNN__API_DIR}/CPP")
+set(__CLDNN_Label__api__cpp     "${__CLDNN_Label__api}\\CPP")
+file(GLOB __CLDNN_Headers__api__cpp
+    "${__CLDNN_Directory__api__cpp}/*.h"
+    "${__CLDNN_Directory__api__cpp}/*.hpp"
+  )
+
+set(__CLDNN_Directory__api__c "${CLDNN__API_DIR}/C")
+set(__CLDNN_Label__api__c     "${__CLDNN_Label__api}\\C")
+file(GLOB __CLDNN_Headers__api__c
+    "${__CLDNN_Directory__api__c}/*.h"
+    "${__CLDNN_Directory__api__c}/*.hpp"
+  )
+
+set(__CLDNN_Label__api_extension       "${__CLDNN_Label__clDNN_copy}\\api_extension")
+file(GLOB __CLDNN_Headers__api_extension
+    "${CLDNN__API_EXTENSION_DIR}/*.h"
+    "${CLDNN__API_EXTENSION_DIR}/*.hpp"
+  )
+
+set(__CLDNN_Directory__api_extension__cpp "${CLDNN__API_EXTENSION_DIR}/CPP")
+set(__CLDNN_Label__api_extension__cpp     "${__CLDNN_Label__api_extension}\\CPP")
+file(GLOB __CLDNN_Headers__api_extension__cpp
+    "${__CLDNN_Directory__api_extension__cpp}/*.h"
+    "${__CLDNN_Directory__api_extension__cpp}/*.hpp"
+  )
+
+set(__CLDNN_Directory__api_extension__c "${CLDNN__API_EXTENSION_DIR}/C")
+set(__CLDNN_Label__api_extension__c     "${__CLDNN_Label__api_extension}\\C")
+file(GLOB __CLDNN_Headers__api_extension__c
+    "${__CLDNN_Directory__api_extension__c}/*.h"
+    "${__CLDNN_Directory__api_extension__c}/*.hpp"
+  )
+
+set(__CLDNN_Label__main                "")
+file(GLOB __CLDNN_Sources__main
+    "${CMAKE_CURRENT_SOURCE_DIR}/*.h"
+    "${CMAKE_CURRENT_SOURCE_DIR}/*.hpp"
+    "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp"
+  )
+
+set(__CLDNN_Directory__graph_opt       "${CMAKE_CURRENT_SOURCE_DIR}/../src/graph_optimizer")
+set(__CLDNN_Label__graph_opt           "${__CLDNN_Label__clDNN_copy}\\graph_optimizer")
+file(GLOB __CLDNN_Sources__graph_opt
+    "${__CLDNN_Directory__graph_opt}/*.h"
+    "${__CLDNN_Directory__graph_opt}/*.hpp"
+    "${__CLDNN_Directory__graph_opt}/*.cpp"
+  )
+
+set(__CLDNN_Directory__include         "${CMAKE_CURRENT_SOURCE_DIR}/../src/include")
+set(__CLDNN_Label__include             "${__CLDNN_Label__clDNN_copy}\\include")
+file(GLOB __CLDNN_Headers__include
+    "${__CLDNN_Directory__include}/*.h"
+    "${__CLDNN_Directory__include}/*.hpp"
+  )
+
+set(__CLDNN_Directory__test_cases      "${CMAKE_CURRENT_SOURCE_DIR}/test_cases")
+set(__CLDNN_Label__test_cases          "test cases")
+file(GLOB __CLDNN_Sources__test_cases
+    "${__CLDNN_Directory__test_cases}/*.h"
+    "${__CLDNN_Directory__test_cases}/*.hpp"
+    "${__CLDNN_Directory__test_cases}/*.cpp"
+  )
+
+set(__CLDNN_Directory__test_utils      "${CMAKE_CURRENT_SOURCE_DIR}/../tests/test_utils")
+set(__CLDNN_Label__test_utils          "test utils")
+file(GLOB __CLDNN_Sources__test_utils
+    "${__CLDNN_Directory__test_utils}/*.h"
+    "${__CLDNN_Directory__test_utils}/*.hpp"
+    "${__CLDNN_Directory__test_utils}/*.cpp"
+  )
+
+set(__CLDNN_Directory__gtest           "${CLDNN__GTEST_DIR}")
+set(__CLDNN_Label__gtest               "google test framework")
+file(GLOB __CLDNN_Sources__gtest
+    "${__CLDNN_Directory__gtest}/*.cc"
+  )
+
+# Special handling of capabilities files.
+set(__CLDNN_Directory__caps            "${CMAKE_CURRENT_SOURCE_DIR}/../src/caps")
+set(__CLDNN_Label__caps                "${__CLDNN_Label__clDNN_copy}\\caps")
+foreach(__CLDNN_CapsScanDir ${CLDNN__CAPS_SCAN_ORDER})
+  string(REPLACE ";" "\;" __CLDNN_CapsScanDir "${__CLDNN_CapsScanDir}") # [WA#1] Must escape ; again if occurred in item.
+  file(GLOB __CLDNN_Sources__caps "${__CLDNN_Directory__caps}/${__CLDNN_CapsScanDir}/*.inc")
+  list(LENGTH __CLDNN_Sources__caps __CLDNN_CapsScanDirFileCount)
+  if(__CLDNN_CapsScanDirFileCount GREATER 0)
+    set(__CLDNN_IncDirectory__caps "${__CLDNN_Directory__caps}/${__CLDNN_CapsScanDir}")
+    message(STATUS "[clDNN] Selected capabilities: ${__CLDNN_CapsScanDir}")
+    break()
+  endif()
+endforeach()
+if(NOT (__CLDNN_CapsScanDirFileCount GREATER 0))
+  message(FATAL_ERROR "[clDNN] Cannot locate any capabilities files in \"${__CLDNN_Directory__caps}\" subdirectories.")
+endif()
+unset(__CLDNN_CapsScanDir)
+unset(__CLDNN_CapsScanDirFileCount)
+
+set(__CLDNN_Directory__gpu             "${CMAKE_CURRENT_SOURCE_DIR}/../src/gpu")
+set(__CLDNN_Label__gpu                 "${__CLDNN_Label__clDNN_copy}\\gpu")
+file(GLOB __CLDNN_Sources__gpu
+    "${__CLDNN_Directory__gpu}/*.h"
+    "${__CLDNN_Directory__gpu}/*.hpp"
+    "${__CLDNN_Directory__gpu}/*.cpp"
+    "${__CLDNN_Directory__gpu}/*.inc"
+  )
+
+set(__CLDNN_Directory__cache           "${__CLDNN_Directory__gpu}/cache")
+set(__CLDNN_Label__cache               "${__CLDNN_Label__gpu}\\cache")
+file(GLOB __CLDNN_Sources__cache
+    "${__CLDNN_Directory__cache}/*.h"
+    "${__CLDNN_Directory__cache}/*.hpp"
+    "${__CLDNN_Directory__cache}/*.cpp"
+  )
+
+set(__CLDNN_Directory__ch_kernels      "${__CLDNN_Directory__cache}/kernels")
+set(__CLDNN_Label__ch_kernels          "${__CLDNN_Label__cache}\\kernels")
+file(GLOB __CLDNN_Sources__ch_kernels
+    "${__CLDNN_Directory__ch_kernels}/*.cl"
+  )
+
+set(__CLDNN_Directory__cg_cache        "${CLDNN__CODEGEN_INCDIR}")
+set(__CLDNN_CGDirectory__cg_cache      "${CLDNN__CODEGEN_DIR}/cache")
+set(__CLDNN_Label__cg_cache            "${__CLDNN_Label__cache}\\codegen")
+
+set(__CLDNN_Directory__ks_main           "${CLDNN__KERNEL_SELECTOR_DIR}")
+set(__CLDNN_Directory__ks_core           "${CLDNN__KERNEL_SELECTOR_DIR}/core")
+set(__CLDNN_Directory__ks_common         "${CLDNN__KERNEL_SELECTOR_DIR}/common")
+set(__CLDNN_Directory__ks_core_common    "${__CLDNN_Directory__ks_core}/common")
+set(__CLDNN_Directory__ks_actual_kernels "${__CLDNN_Directory__ks_core}/actual_kernels")
+set(__CLDNN_Directory__ks_cache          "${__CLDNN_Directory__ks_core}/cache")
+
+
+set(__CLDNN_AllSources
+    ${__CLDNN_Sources__clDNN_copy}
+    ${__CLDNN_Headers__api}
+    ${__CLDNN_Sources__graph_opt}
+    ${__CLDNN_Headers__include}
+    ${__CLDNN_Sources__caps}
+    ${__CLDNN_Headers__api__cpp}
+    ${__CLDNN_Headers__api__c}
+    ${__CLDNN_Headers__api_extension}
+    ${__CLDNN_Headers__api_extension__c}
+    ${__CLDNN_Headers__api_extension__cpp}
+    ${__CLDNN_Sources__main}
+    ${__CLDNN_Sources__gpu}
+    ${__CLDNN_Sources__cache}
+    ${__CLDNN_Sources__ch_kernels}
+    ${__CLDNN_Sources__cg_cache}
+    ${__CLDNN_Sources__test_cases}
+    ${__CLDNN_Sources__test_utils}
+    ${__CLDNN_Sources__gtest}
+  )
+# Helping with some generators.
+set_property(SOURCE ${__CLDNN_Sources__cg_cache} PROPERTY GENERATED TRUE)
+
+
+# =============================================== Filters ==============================================
+
+source_group("${__CLDNN_Label__api}"                  FILES ${__CLDNN_Headers__api})
+source_group("${__CLDNN_Label__api__cpp}"             FILES ${__CLDNN_Headers__api__cpp})
+source_group("${__CLDNN_Label__api__c}"               FILES ${__CLDNN_Headers__api__c})
+source_group("${__CLDNN_Label__api_extension}"        FILES ${__CLDNN_Headers__api_extension})
+source_group("${__CLDNN_Label__api_extension__cpp}"   FILES ${__CLDNN_Headers__api_extension__cpp})
+source_group("${__CLDNN_Label__api_extension__c}"     FILES ${__CLDNN_Headers__api_extension__c})
+source_group("${__CLDNN_Label__include}"              FILES ${__CLDNN_Headers__include})
+source_group("${__CLDNN_Label__graph_opt}"            FILES ${__CLDNN_Sources__graph_opt})
+source_group("${__CLDNN_Label__caps}"                 FILES ${__CLDNN_Sources__caps})
+source_group("${__CLDNN_Label__main}"                 FILES ${__CLDNN_Sources__main})
+source_group("${__CLDNN_Label__gpu}"                  FILES ${__CLDNN_Sources__gpu})
+source_group("${__CLDNN_Label__cache}"                FILES ${__CLDNN_Sources__cache})
+source_group("${__CLDNN_Label__ch_kernels}"           FILES ${__CLDNN_Sources__ch_kernels})
+source_group("${__CLDNN_Label__cg_cache}"             FILES ${__CLDNN_Sources__cg_cache})
+source_group("${__CLDNN_Label__test_cases}"           FILES ${__CLDNN_Sources__test_cases})
+source_group("${__CLDNN_Label__test_utils}"           FILES ${__CLDNN_Sources__test_utils})
+source_group("${__CLDNN_Label__gtest}"                FILES ${__CLDNN_Sources__gtest})
+
+
+# ===================================== Include/Link directories =======================================
+
+include_directories(
+    "${CLDNN__MAIN_DIR}"
+    "${CLDNN__MAIN_DIR}/src"
+    "${CLDNN__GTEST_DIR}"
+    "${__CLDNN_Directory__test_utils}"
+    "${CMAKE_CURRENT_SOURCE_DIR}"
+    "${__CLDNN_Directory__include}"
+    "${__CLDNN_IncDirectory__caps}"
+    "${__CLDNN_Directory__ks_core}"
+    "${__CLDNN_Directory__ks_core}/common"
+    "${__CLDNN_Directory__ks_actual_kernels}"
+    "${__CLDNN_Directory__ks_common}"
+    "${__CLDNN_Directory__gpu}"
+  )
+
+# =================================== Link targets and dependencies ====================================
+
+# Tests executable.
+add_executable("${CLDNN_BUILD__PROJ}"
+    ${__CLDNN_AllSources}
+  )
+
+set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY PROJECT_LABEL "${CLDNN_BUILD__PROJ_LABEL}")
+set_property(TARGET "${CLDNN_BUILD__PROJ}" PROPERTY OUTPUT_NAME   "${CLDNN_BUILD__PROJ_OUTPUT_NAME}")
+
+
+# Set library dependencies
+target_link_libraries("${CLDNN_BUILD__PROJ}"
+    # "${CLDNN_BUILD__PROJ__clDNN}"
+    OpenCL
+    cldnn_kernel_selector
+  )
+
+if(WIN32)
+  target_link_libraries("${CLDNN_BUILD__PROJ}" setupapi)
+elseif((NOT ANDROID) AND (UNIX))
+  target_link_libraries("${CLDNN_BUILD__PROJ}" pthread)
+endif()
+target_link_libraries("${CLDNN_BUILD__PROJ}" ${CLDNN__SYSTEM_LINK_LIBRARIES})
+
+# =================================== Custom pre- and post-steps =======================================
+
+if(CLDNN__RUN_TESTS)
+  add_custom_command(TARGET "${CLDNN_BUILD__PROJ}" POST_BUILD
+      WORKING_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}"
+      COMMAND "${CLDNN_BUILD__PROJ}"
+      COMMENT "Executing tests..."
+    )
+endif()
+
+# ======================================================================================================
diff --git a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_ICL.cpp b/inference-engine/thirdparty/clDNN/tests_core_internal/main.cpp
index a5e90ad19..02fbc7da6 100644
--- a/inference-engine/thirdparty/clDNN/kernel_selector/core/cache/cache_ICL.cpp
+++ b/inference-engine/thirdparty/clDNN/tests_core_internal/main.cpp
@@ -1,5 +1,5 @@
 /*
-// Copyright (c) 2018 Intel Corporation
+// Copyright (c) 2019 Intel Corporation
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -14,13 +14,10 @@
 // limitations under the License.
 */
 
-#include "auto_tuner.h"
-#include "auto_tuner_offline.h"
-namespace kernel_selector 
+#include "gtest/gtest.h"
+
+int main(int argc, char* argv[])
 {
-    // ICL_GT2
-    void tuning_cache_8A52(tuning_data& td)
-    {
-        tuning_cache_8A52_B1_B16(td);
-    }
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests_core_internal/program_impl_wrapper.h b/inference-engine/thirdparty/clDNN/tests_core_internal/program_impl_wrapper.h
new file mode 100644
index 000000000..ddb9a1ca9
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests_core_internal/program_impl_wrapper.h
@@ -0,0 +1,32 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+namespace cldnn
+{
+    struct program_node;
+    struct program_impl;
+    // This class is intended to allow using private methods from program_impl within tests_core_internal project.
+    // Once needed, more methods wrapper should be added here.
+    class program_impl_wrapper
+    {
+    public:
+        static void add_connection(program_impl& p, program_node& prev, program_node& next)
+        {
+            p.add_connection(prev, next);
+        }
+    };
+
+}
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/graph_manipulation_gpu_test.cpp b/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/graph_manipulation_gpu_test.cpp
new file mode 100644
index 000000000..4a02d5c4e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/tests_core_internal/test_cases/graph_manipulation_gpu_test.cpp
@@ -0,0 +1,203 @@
+/*
+// Copyright (c) 2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+*/
+
+#include <memory>
+
+#include <gtest/gtest.h>
+
+#include "program_impl.h"
+#include "api_impl.h"
+#include "topology_impl.h"
+#include "engine_impl.h"
+#include "memory_impl.h"
+#include "data_inst.h"
+#include "activation_inst.h"
+#include "convolution_inst.h"
+#include "crop_inst.h"
+#include "network_impl.h"
+#include "reshape_inst.h"
+#include "pass_manager.h"
+
+#include "test_utils.h"
+#include "program_impl_wrapper.h"
+
+using namespace cldnn;
+using namespace ::tests;
+
+/* Basic test to show how the program can be build and run within internal tests 
+   in similar way as it is done in tests utilizing clDNN API */
+TEST(basic, test1) {
+    const auto& engine = get_test_engine();
+    build_options build_opt;
+    build_opt.set_option(build_option::optimize_data(true));
+
+    auto input = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 2 } });
+    auto weights1 = memory::allocate(engine, { data_types::f16, format::yxfb,{ 1, 1, 2, 1 } });
+    auto weights2 = memory::allocate(engine, { data_types::f32, format::byxf,{ 1, 1, 1, 2 } });
+
+    set_values(input, { FLOAT16(1.1f), FLOAT16(1.2f), FLOAT16(1.3f), FLOAT16(1.4f) });
+    set_values(weights1, { FLOAT16(2.1f), FLOAT16(3.1f) });
+    set_values(weights2, { 1.1f, 0.1f });
+
+    topology topology;
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("weights1", weights1));
+    topology.add(data("weights2", weights2));
+    topology.add(reshape("reshape1", "weights1", tensor(spatial(1, 2))));
+    topology.add(reorder("reorder2", "input", layout(data_types::f32, format::byxf, 4)));
+    topology.add(reorder("reorder1", "reshape1", layout(data_types::f32, format::byxf, 4)));
+    topology.add(concatenation("concat", { "reorder1", "weights2" }, concatenation::along_x));
+    topology.add(convolution("conv2", { "reorder2" }, { "concat" }));
+
+    program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false);
+    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog);
+    network network = api_cast(net.get());
+
+    network.set_input_data("input", input);
+
+    auto outputs = network.execute();
+
+    float epsilon = 1e-2f;
+    for (auto& it : outputs)
+    {
+        auto output = it.second.get_memory().pointer<float>();
+        EXPECT_NEAR(7.8f, output[0], epsilon);
+    }
+}
+
+/*
+    This test creates a program without optimization passes, even the compilation is being run manualy. 
+    Thus, a single method from program_impl like add_intermediate might be tested separately.
+*/
+TEST(add_intermediate_gpu, test1)
+{
+    build_options build_opt;
+    topology topology;
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx, {2, 2, 2, 2} });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx, {2, 2, 2, 2} });
+    auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 1, 1 } });
+
+    set_values(input, { (1.1f), (1.2f), (1.3f), (1.4f),
+                        (2.1f), (2.2f), (2.3f), (2.4f),
+                        (3.1f), (3.2f), (3.3f), (3.4f),
+                        (4.1f), (4.2f), (4.3f), (4.4f) });
+    set_values(weights, { (1.5f), (1.6f), (1.7f), (1.8f),
+                          (2.5f), (2.6f), (2.7f), (2.8f),
+                          (3.5f), (3.6f), (3.7f), (3.8f),
+                          (4.5f), (4.6f), (4.7f), (4.8f) });
+
+    set_values(weights2, { (5.5f), (5.6f), (5.7f), (5.8f) });
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("weights", weights));
+    topology.add(data("weights2", weights2));
+    topology.add(cldnn::convolution("conv1a", { "input" }, { "weights" }));
+    topology.add(cldnn::convolution("conv1b", { "input" }, { "weights" }));
+    topology.add(cldnn::convolution("conv2a", { "conv1a" }, { "weights2" }));
+    auto new_reorder = std::make_shared<reorder>("reorder","nothing", input.get_layout());
+    program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false, true);
+    prog->add_intermediate(new_reorder, prog->get_node("conv1a"), 0);
+    prog->dump_program("custom_dump", true);
+
+    pass_manager pm;
+    compile_graph compile_graph_pass;
+    pm.run(*prog, compile_graph_pass);
+
+    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog);
+    network network = api_cast(net.get());
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    std::vector<float> expected_output_vec = {
+        32.2f, 60.2f, 66.6f, 126.6f,
+        514.22f, 532.7f, 1075.26f, 1113.9f
+    };
+
+    uint32_t output_size = 4;
+    uint32_t output_index = 0;
+    for (auto& it : outputs)
+    {
+        auto output = it.second.get_memory().pointer<float>();
+        for (uint32_t x = 0; x < output_size; x++)
+        {
+            EXPECT_FLOAT_EQ(expected_output_vec[x+output_size*output_index], output[x]);
+        }
+        output_index++;
+    }
+}
+
+/* This test shows how to use private members (here: add_connection) of program_impl using program_impl_wraper */
+TEST(add_intermediate_gpu, test2)
+{
+    build_options build_opt;
+    topology topology;
+    engine engine;
+
+    auto input = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
+    auto weights = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 2, 2 } });
+    auto weights2 = memory::allocate(engine, { data_types::f32, format::bfyx,{ 2, 2, 1, 1 } });
+
+    set_values(input, { (1.1f), (1.2f), (1.3f), (1.4f),
+        (2.1f), (2.2f), (2.3f), (2.4f),
+        (3.1f), (3.2f), (3.3f), (3.4f),
+        (4.1f), (4.2f), (4.3f), (4.4f) });
+    set_values(weights, { (1.5f), (1.6f), (1.7f), (1.8f),
+        (2.5f), (2.6f), (2.7f), (2.8f),
+        (3.5f), (3.6f), (3.7f), (3.8f),
+        (4.5f), (4.6f), (4.7f), (4.8f) });
+
+    set_values(weights2, { (5.5f), (5.6f), (5.7f), (5.8f) });
+
+    topology.add(input_layout("input", input.get_layout()));
+    topology.add(data("weights2", weights2));
+
+    topology.add(cldnn::convolution("conv2a", { "input" }, { "weights2" }));
+    topology.add(cldnn::convolution("conv2b", { "input" }, { "weights2" }));
+
+    std::vector<primitive_id> w_vec;
+    w_vec.push_back("weights");
+    auto new_conv = std::make_shared<convolution>("conv1a", "input", w_vec);
+    auto weights_node = std::make_shared<data>("weights", weights);
+    program_impl::ptr prog = api_cast(engine.get())->build_program(*api_cast(topology.get()), build_opt, false, true);
+
+    prog->add_intermediate(new_conv, prog->get_node("conv2a"), 0, true, true);
+    program_impl_wrapper::add_connection(*prog, prog->get_or_create(weights_node), prog->get_or_create(new_conv));
+    prog->dump_program("custom_dump", true);
+
+    pass_manager pm;
+    compile_graph compile_graph_pass;
+    pm.run(*prog, compile_graph_pass);
+
+    cldnn::refcounted_obj_ptr<cldnn::network_impl> net = api_cast(engine.get())->allocate_network(*prog);
+    network network = api_cast(net.get());
+    network.set_input_data("input", input);
+    auto outputs = network.execute();
+
+    std::vector<float> expected_output_vec = {
+        514.22f, 532.7f, 1075.26f, 1113.9f
+    };
+
+    uint32_t output_size = 4;
+    for (auto& it : outputs)
+    {
+        auto output = it.second.get_memory().pointer<float>();
+        for (uint32_t x = 0; x < output_size; x++)
+        {
+            EXPECT_FLOAT_EQ(expected_output_vec[x], output[x]);
+        }
+    }
+}
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/allocators.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/allocators.h
new file mode 100644
index 000000000..06b342044
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/allocators.h
@@ -0,0 +1,284 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_ALLOCATORS_H_
+#define RAPIDJSON_ALLOCATORS_H_
+
+#include "rapidjson.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////
+// Allocator
+
+/*! \class rapidjson::Allocator
+    \brief Concept for allocating, resizing and freeing memory block.
+    
+    Note that Malloc() and Realloc() are non-static but Free() is static.
+    
+    So if an allocator need to support Free(), it needs to put its pointer in 
+    the header of memory block.
+
+\code
+concept Allocator {
+    static const bool kNeedFree;    //!< Whether this allocator needs to call Free().
+
+    // Allocate a memory block.
+    // \param size of the memory block in bytes.
+    // \returns pointer to the memory block.
+    void* Malloc(size_t size);
+
+    // Resize a memory block.
+    // \param originalPtr The pointer to current memory block. Null pointer is permitted.
+    // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.)
+    // \param newSize the new size in bytes.
+    void* Realloc(void* originalPtr, size_t originalSize, size_t newSize);
+
+    // Free a memory block.
+    // \param pointer to the memory block. Null pointer is permitted.
+    static void Free(void *ptr);
+};
+\endcode
+*/
+
+
+/*! \def RAPIDJSON_ALLOCATOR_DEFUALT_CHUNK_CAPACITY
+    \ingroup RAPIDJSON_CONFIG
+    \brief User-defined kDefaultChunkCapacity definition.
+
+    User can define this as any \c size that is a power of 2.
+*/
+
+#ifndef RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY
+#define RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY (64 * 1024)
+#endif
+
+
+///////////////////////////////////////////////////////////////////////////////
+// CrtAllocator
+
+//! C-runtime library allocator.
+/*! This class is just wrapper for standard C library memory routines.
+    \note implements Allocator concept
+*/
+class CrtAllocator {
+public:
+    static const bool kNeedFree = true;
+    void* Malloc(size_t size) { 
+        if (size) //  behavior of malloc(0) is implementation defined.
+            return std::malloc(size);
+        else
+            return NULL; // standardize to returning NULL.
+    }
+    void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) {
+        (void)originalSize;
+        if (newSize == 0) {
+            std::free(originalPtr);
+            return NULL;
+        }
+        return std::realloc(originalPtr, newSize);
+    }
+    static void Free(void *ptr) { std::free(ptr); }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// MemoryPoolAllocator
+
+//! Default memory allocator used by the parser and DOM.
+/*! This allocator allocate memory blocks from pre-allocated memory chunks. 
+
+    It does not free memory blocks. And Realloc() only allocate new memory.
+
+    The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default.
+
+    User may also supply a buffer as the first chunk.
+
+    If the user-buffer is full then additional chunks are allocated by BaseAllocator.
+
+    The user-buffer is not deallocated by this allocator.
+
+    \tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator.
+    \note implements Allocator concept
+*/
+template <typename BaseAllocator = CrtAllocator>
+class MemoryPoolAllocator {
+public:
+    static const bool kNeedFree = false;    //!< Tell users that no need to call Free() with this allocator. (concept Allocator)
+
+    //! Constructor with chunkSize.
+    /*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
+        \param baseAllocator The allocator for allocating memory chunks.
+    */
+    MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : 
+        chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
+    {
+    }
+
+    //! Constructor with user-supplied buffer.
+    /*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size.
+
+        The user buffer will not be deallocated when this allocator is destructed.
+
+        \param buffer User supplied buffer.
+        \param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader).
+        \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
+        \param baseAllocator The allocator for allocating memory chunks.
+    */
+    MemoryPoolAllocator(void *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
+        chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
+    {
+        RAPIDJSON_ASSERT(buffer != 0);
+        RAPIDJSON_ASSERT(size > sizeof(ChunkHeader));
+        chunkHead_ = reinterpret_cast<ChunkHeader*>(buffer);
+        chunkHead_->capacity = size - sizeof(ChunkHeader);
+        chunkHead_->size = 0;
+        chunkHead_->next = 0;
+    }
+
+    //! Destructor.
+    /*! This deallocates all memory chunks, excluding the user-supplied buffer.
+    */
+    ~MemoryPoolAllocator() {
+        Clear();
+        RAPIDJSON_DELETE(ownBaseAllocator_);
+    }
+
+    //! Deallocates all memory chunks, excluding the user-supplied buffer.
+    void Clear() {
+        while (chunkHead_ && chunkHead_ != userBuffer_) {
+            ChunkHeader* next = chunkHead_->next;
+            baseAllocator_->Free(chunkHead_);
+            chunkHead_ = next;
+        }
+        if (chunkHead_ && chunkHead_ == userBuffer_)
+            chunkHead_->size = 0; // Clear user buffer
+    }
+
+    //! Computes the total capacity of allocated memory chunks.
+    /*! \return total capacity in bytes.
+    */
+    size_t Capacity() const {
+        size_t capacity = 0;
+        for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
+            capacity += c->capacity;
+        return capacity;
+    }
+
+    //! Computes the memory blocks allocated.
+    /*! \return total used bytes.
+    */
+    size_t Size() const {
+        size_t size = 0;
+        for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
+            size += c->size;
+        return size;
+    }
+
+    //! Allocates a memory block. (concept Allocator)
+    void* Malloc(size_t size) {
+        if (!size)
+            return NULL;
+
+        size = RAPIDJSON_ALIGN(size);
+        if (chunkHead_ == 0 || chunkHead_->size + size > chunkHead_->capacity)
+            if (!AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size))
+                return NULL;
+
+        void *buffer = reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size;
+        chunkHead_->size += size;
+        return buffer;
+    }
+
+    //! Resizes a memory block (concept Allocator)
+    void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) {
+        if (originalPtr == 0)
+            return Malloc(newSize);
+
+        if (newSize == 0)
+            return NULL;
+
+        originalSize = RAPIDJSON_ALIGN(originalSize);
+        newSize = RAPIDJSON_ALIGN(newSize);
+
+        // Do not shrink if new size is smaller than original
+        if (originalSize >= newSize)
+            return originalPtr;
+
+        // Simply expand it if it is the last allocation and there is sufficient space
+        if (originalPtr == reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size - originalSize) {
+            size_t increment = static_cast<size_t>(newSize - originalSize);
+            if (chunkHead_->size + increment <= chunkHead_->capacity) {
+                chunkHead_->size += increment;
+                return originalPtr;
+            }
+        }
+
+        // Realloc process: allocate and copy memory, do not free original buffer.
+        if (void* newBuffer = Malloc(newSize)) {
+            if (originalSize)
+                std::memcpy(newBuffer, originalPtr, originalSize);
+            return newBuffer;
+        }
+        else
+            return NULL;
+    }
+
+    //! Frees a memory block (concept Allocator)
+    static void Free(void *ptr) { (void)ptr; } // Do nothing
+
+private:
+    //! Copy constructor is not permitted.
+    MemoryPoolAllocator(const MemoryPoolAllocator& rhs) /* = delete */;
+    //! Copy assignment operator is not permitted.
+    MemoryPoolAllocator& operator=(const MemoryPoolAllocator& rhs) /* = delete */;
+
+    //! Creates a new chunk.
+    /*! \param capacity Capacity of the chunk in bytes.
+        \return true if success.
+    */
+    bool AddChunk(size_t capacity) {
+        if (!baseAllocator_)
+            ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator)();
+        if (ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(baseAllocator_->Malloc(RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + capacity))) {
+            chunk->capacity = capacity;
+            chunk->size = 0;
+            chunk->next = chunkHead_;
+            chunkHead_ =  chunk;
+            return true;
+        }
+        else
+            return false;
+    }
+
+    static const int kDefaultChunkCapacity = RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY; //!< Default chunk capacity.
+
+    //! Chunk header for perpending to each chunk.
+    /*! Chunks are stored as a singly linked list.
+    */
+    struct ChunkHeader {
+        size_t capacity;    //!< Capacity of the chunk in bytes (excluding the header itself).
+        size_t size;        //!< Current size of allocated memory in bytes.
+        ChunkHeader *next;  //!< Next chunk in the linked list.
+    };
+
+    ChunkHeader *chunkHead_;    //!< Head of the chunk linked-list. Only the head chunk serves allocation.
+    size_t chunk_capacity_;     //!< The minimum capacity of chunk when they are allocated.
+    void *userBuffer_;          //!< User supplied buffer.
+    BaseAllocator* baseAllocator_;  //!< base allocator for allocating memory chunks.
+    BaseAllocator* ownBaseAllocator_;   //!< base allocator created by this object.
+};
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_ENCODINGS_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/cursorstreamwrapper.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/cursorstreamwrapper.h
new file mode 100644
index 000000000..52c11a7c0
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/cursorstreamwrapper.h
@@ -0,0 +1,78 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_CURSORSTREAMWRAPPER_H_
+#define RAPIDJSON_CURSORSTREAMWRAPPER_H_
+
+#include "stream.h"
+
+#if defined(__GNUC__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4702)  // unreachable code
+RAPIDJSON_DIAG_OFF(4512)  // assignment operator could not be generated
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+
+//! Cursor stream wrapper for counting line and column number if error exists.
+/*!
+    \tparam InputStream     Any stream that implements Stream Concept
+*/
+template <typename InputStream, typename Encoding = UTF8<> >
+class CursorStreamWrapper : public GenericStreamWrapper<InputStream, Encoding> {
+public:
+    typedef typename Encoding::Ch Ch;
+
+    CursorStreamWrapper(InputStream& is):
+        GenericStreamWrapper<InputStream, Encoding>(is), line_(1), col_(0) {}
+
+    // counting line and column number
+    Ch Take() {
+        Ch ch = this->is_.Take();
+        if(ch == '\n') {
+            line_ ++;
+            col_ = 0;
+        } else {
+            col_ ++;
+        }
+        return ch;
+    }
+
+    //! Get the error line number, if error exists.
+    size_t GetLine() const { return line_; }
+    //! Get the error column number, if error exists.
+    size_t GetColumn() const { return col_; }
+
+private:
+    size_t line_;   //!< Current Line
+    size_t col_;    //!< Current Column
+};
+
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+RAPIDJSON_DIAG_POP
+#endif
+
+#if defined(__GNUC__)
+RAPIDJSON_DIAG_POP
+#endif
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_CURSORSTREAMWRAPPER_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/document.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/document.h
new file mode 100644
index 000000000..dfa499e7b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/document.h
@@ -0,0 +1,2643 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_DOCUMENT_H_
+#define RAPIDJSON_DOCUMENT_H_
+
+/*! \file document.h */
+
+#include "reader.h"
+#include "internal/meta.h"
+#include "internal/strfunc.h"
+#include "memorystream.h"
+#include "encodedstream.h"
+#include <new>      // placement new
+#include <limits>
+
+RAPIDJSON_DIAG_PUSH
+#ifdef __clang__
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(switch-enum)
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#elif defined(_MSC_VER)
+RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
+RAPIDJSON_DIAG_OFF(4244) // conversion from kXxxFlags to 'uint16_t', possible loss of data
+#endif
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_OFF(effc++)
+#endif // __GNUC__
+
+#ifndef RAPIDJSON_NOMEMBERITERATORCLASS
+#include <iterator> // std::random_access_iterator_tag
+#endif
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+#include <utility> // std::move
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+// Forward declaration.
+template <typename Encoding, typename Allocator>
+class GenericValue;
+
+template <typename Encoding, typename Allocator, typename StackAllocator>
+class GenericDocument;
+
+//! Name-value pair in a JSON object value.
+/*!
+    This class was internal to GenericValue. It used to be a inner struct.
+    But a compiler (IBM XL C/C++ for AIX) have reported to have problem with that so it moved as a namespace scope struct.
+    https://code.google.com/p/rapidjson/issues/detail?id=64
+*/
+template <typename Encoding, typename Allocator> 
+struct GenericMember { 
+    GenericValue<Encoding, Allocator> name;     //!< name of member (must be a string)
+    GenericValue<Encoding, Allocator> value;    //!< value of member.
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericMemberIterator
+
+#ifndef RAPIDJSON_NOMEMBERITERATORCLASS
+
+//! (Constant) member iterator for a JSON object value
+/*!
+    \tparam Const Is this a constant iterator?
+    \tparam Encoding    Encoding of the value. (Even non-string values need to have the same encoding in a document)
+    \tparam Allocator   Allocator type for allocating memory of object, array and string.
+
+    This class implements a Random Access Iterator for GenericMember elements
+    of a GenericValue, see ISO/IEC 14882:2003(E) C++ standard, 24.1 [lib.iterator.requirements].
+
+    \note This iterator implementation is mainly intended to avoid implicit
+        conversions from iterator values to \c NULL,
+        e.g. from GenericValue::FindMember.
+
+    \note Define \c RAPIDJSON_NOMEMBERITERATORCLASS to fall back to a
+        pointer-based implementation, if your platform doesn't provide
+        the C++ <iterator> header.
+
+    \see GenericMember, GenericValue::MemberIterator, GenericValue::ConstMemberIterator
+ */
+template <bool Const, typename Encoding, typename Allocator>
+class GenericMemberIterator {
+
+    friend class GenericValue<Encoding,Allocator>;
+    template <bool, typename, typename> friend class GenericMemberIterator;
+
+    typedef GenericMember<Encoding,Allocator> PlainType;
+    typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
+
+public:
+    //! Iterator type itself
+    typedef GenericMemberIterator Iterator;
+    //! Constant iterator type
+    typedef GenericMemberIterator<true,Encoding,Allocator>  ConstIterator;
+    //! Non-constant iterator type
+    typedef GenericMemberIterator<false,Encoding,Allocator> NonConstIterator;
+
+    /** \name std::iterator_traits support */
+    //@{
+    typedef ValueType      value_type;
+    typedef ValueType *    pointer;
+    typedef ValueType &    reference;
+    typedef std::ptrdiff_t difference_type;
+    typedef std::random_access_iterator_tag iterator_category;
+    //@}
+
+    //! Pointer to (const) GenericMember
+    typedef pointer         Pointer;
+    //! Reference to (const) GenericMember
+    typedef reference       Reference;
+    //! Signed integer type (e.g. \c ptrdiff_t)
+    typedef difference_type DifferenceType;
+
+    //! Default constructor (singular value)
+    /*! Creates an iterator pointing to no element.
+        \note All operations, except for comparisons, are undefined on such values.
+     */
+    GenericMemberIterator() : ptr_() {}
+
+    //! Iterator conversions to more const
+    /*!
+        \param it (Non-const) iterator to copy from
+
+        Allows the creation of an iterator from another GenericMemberIterator
+        that is "less const".  Especially, creating a non-constant iterator
+        from a constant iterator are disabled:
+        \li const -> non-const (not ok)
+        \li const -> const (ok)
+        \li non-const -> const (ok)
+        \li non-const -> non-const (ok)
+
+        \note If the \c Const template parameter is already \c false, this
+            constructor effectively defines a regular copy-constructor.
+            Otherwise, the copy constructor is implicitly defined.
+    */
+    GenericMemberIterator(const NonConstIterator & it) : ptr_(it.ptr_) {}
+    Iterator& operator=(const NonConstIterator & it) { ptr_ = it.ptr_; return *this; }
+
+    //! @name stepping
+    //@{
+    Iterator& operator++(){ ++ptr_; return *this; }
+    Iterator& operator--(){ --ptr_; return *this; }
+    Iterator  operator++(int){ Iterator old(*this); ++ptr_; return old; }
+    Iterator  operator--(int){ Iterator old(*this); --ptr_; return old; }
+    //@}
+
+    //! @name increment/decrement
+    //@{
+    Iterator operator+(DifferenceType n) const { return Iterator(ptr_+n); }
+    Iterator operator-(DifferenceType n) const { return Iterator(ptr_-n); }
+
+    Iterator& operator+=(DifferenceType n) { ptr_+=n; return *this; }
+    Iterator& operator-=(DifferenceType n) { ptr_-=n; return *this; }
+    //@}
+
+    //! @name relations
+    //@{
+    bool operator==(ConstIterator that) const { return ptr_ == that.ptr_; }
+    bool operator!=(ConstIterator that) const { return ptr_ != that.ptr_; }
+    bool operator<=(ConstIterator that) const { return ptr_ <= that.ptr_; }
+    bool operator>=(ConstIterator that) const { return ptr_ >= that.ptr_; }
+    bool operator< (ConstIterator that) const { return ptr_ < that.ptr_; }
+    bool operator> (ConstIterator that) const { return ptr_ > that.ptr_; }
+    //@}
+
+    //! @name dereference
+    //@{
+    Reference operator*() const { return *ptr_; }
+    Pointer   operator->() const { return ptr_; }
+    Reference operator[](DifferenceType n) const { return ptr_[n]; }
+    //@}
+
+    //! Distance
+    DifferenceType operator-(ConstIterator that) const { return ptr_-that.ptr_; }
+
+private:
+    //! Internal constructor from plain pointer
+    explicit GenericMemberIterator(Pointer p) : ptr_(p) {}
+
+    Pointer ptr_; //!< raw pointer
+};
+
+#else // RAPIDJSON_NOMEMBERITERATORCLASS
+
+// class-based member iterator implementation disabled, use plain pointers
+
+template <bool Const, typename Encoding, typename Allocator>
+struct GenericMemberIterator;
+
+//! non-const GenericMemberIterator
+template <typename Encoding, typename Allocator>
+struct GenericMemberIterator<false,Encoding,Allocator> {
+    //! use plain pointer as iterator type
+    typedef GenericMember<Encoding,Allocator>* Iterator;
+};
+//! const GenericMemberIterator
+template <typename Encoding, typename Allocator>
+struct GenericMemberIterator<true,Encoding,Allocator> {
+    //! use plain const pointer as iterator type
+    typedef const GenericMember<Encoding,Allocator>* Iterator;
+};
+
+#endif // RAPIDJSON_NOMEMBERITERATORCLASS
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericStringRef
+
+//! Reference to a constant string (not taking a copy)
+/*!
+    \tparam CharType character type of the string
+
+    This helper class is used to automatically infer constant string
+    references for string literals, especially from \c const \b (!)
+    character arrays.
+
+    The main use is for creating JSON string values without copying the
+    source string via an \ref Allocator.  This requires that the referenced
+    string pointers have a sufficient lifetime, which exceeds the lifetime
+    of the associated GenericValue.
+
+    \b Example
+    \code
+    Value v("foo");   // ok, no need to copy & calculate length
+    const char foo[] = "foo";
+    v.SetString(foo); // ok
+
+    const char* bar = foo;
+    // Value x(bar); // not ok, can't rely on bar's lifetime
+    Value x(StringRef(bar)); // lifetime explicitly guaranteed by user
+    Value y(StringRef(bar, 3));  // ok, explicitly pass length
+    \endcode
+
+    \see StringRef, GenericValue::SetString
+*/
+template<typename CharType>
+struct GenericStringRef {
+    typedef CharType Ch; //!< character type of the string
+
+    //! Create string reference from \c const character array
+#ifndef __clang__ // -Wdocumentation
+    /*!
+        This constructor implicitly creates a constant string reference from
+        a \c const character array.  It has better performance than
+        \ref StringRef(const CharType*) by inferring the string \ref length
+        from the array length, and also supports strings containing null
+        characters.
+
+        \tparam N length of the string, automatically inferred
+
+        \param str Constant character array, lifetime assumed to be longer
+            than the use of the string in e.g. a GenericValue
+
+        \post \ref s == str
+
+        \note Constant complexity.
+        \note There is a hidden, private overload to disallow references to
+            non-const character arrays to be created via this constructor.
+            By this, e.g. function-scope arrays used to be filled via
+            \c snprintf are excluded from consideration.
+            In such cases, the referenced string should be \b copied to the
+            GenericValue instead.
+     */
+#endif
+    template<SizeType N>
+    GenericStringRef(const CharType (&str)[N]) RAPIDJSON_NOEXCEPT
+        : s(str), length(N-1) {}
+
+    //! Explicitly create string reference from \c const character pointer
+#ifndef __clang__ // -Wdocumentation
+    /*!
+        This constructor can be used to \b explicitly  create a reference to
+        a constant string pointer.
+
+        \see StringRef(const CharType*)
+
+        \param str Constant character pointer, lifetime assumed to be longer
+            than the use of the string in e.g. a GenericValue
+
+        \post \ref s == str
+
+        \note There is a hidden, private overload to disallow references to
+            non-const character arrays to be created via this constructor.
+            By this, e.g. function-scope arrays used to be filled via
+            \c snprintf are excluded from consideration.
+            In such cases, the referenced string should be \b copied to the
+            GenericValue instead.
+     */
+#endif
+    explicit GenericStringRef(const CharType* str)
+        : s(str), length(NotNullStrLen(str)) {}
+
+    //! Create constant string reference from pointer and length
+#ifndef __clang__ // -Wdocumentation
+    /*! \param str constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
+        \param len length of the string, excluding the trailing NULL terminator
+
+        \post \ref s == str && \ref length == len
+        \note Constant complexity.
+     */
+#endif
+    GenericStringRef(const CharType* str, SizeType len)
+        : s(RAPIDJSON_LIKELY(str) ? str : emptyString), length(len) { RAPIDJSON_ASSERT(str != 0 || len == 0u); }
+
+    GenericStringRef(const GenericStringRef& rhs) : s(rhs.s), length(rhs.length) {}
+
+    //! implicit conversion to plain CharType pointer
+    operator const Ch *() const { return s; }
+
+    const Ch* const s; //!< plain CharType pointer
+    const SizeType length; //!< length of the string (excluding the trailing NULL terminator)
+
+private:
+    SizeType NotNullStrLen(const CharType* str) {
+        RAPIDJSON_ASSERT(str != 0);
+        return internal::StrLen(str);
+    }
+
+    /// Empty string - used when passing in a NULL pointer
+    static const Ch emptyString[];
+
+    //! Disallow construction from non-const array
+    template<SizeType N>
+    GenericStringRef(CharType (&str)[N]) /* = delete */;
+    //! Copy assignment operator not permitted - immutable type
+    GenericStringRef& operator=(const GenericStringRef& rhs) /* = delete */;
+};
+
+template<typename CharType>
+const CharType GenericStringRef<CharType>::emptyString[] = { CharType() };
+
+//! Mark a character pointer as constant string
+/*! Mark a plain character pointer as a "string literal".  This function
+    can be used to avoid copying a character string to be referenced as a
+    value in a JSON GenericValue object, if the string's lifetime is known
+    to be valid long enough.
+    \tparam CharType Character type of the string
+    \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
+    \return GenericStringRef string reference object
+    \relatesalso GenericStringRef
+
+    \see GenericValue::GenericValue(StringRefType), GenericValue::operator=(StringRefType), GenericValue::SetString(StringRefType), GenericValue::PushBack(StringRefType, Allocator&), GenericValue::AddMember
+*/
+template<typename CharType>
+inline GenericStringRef<CharType> StringRef(const CharType* str) {
+    return GenericStringRef<CharType>(str);
+}
+
+//! Mark a character pointer as constant string
+/*! Mark a plain character pointer as a "string literal".  This function
+    can be used to avoid copying a character string to be referenced as a
+    value in a JSON GenericValue object, if the string's lifetime is known
+    to be valid long enough.
+
+    This version has better performance with supplied length, and also
+    supports string containing null characters.
+
+    \tparam CharType character type of the string
+    \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
+    \param length The length of source string.
+    \return GenericStringRef string reference object
+    \relatesalso GenericStringRef
+*/
+template<typename CharType>
+inline GenericStringRef<CharType> StringRef(const CharType* str, size_t length) {
+    return GenericStringRef<CharType>(str, SizeType(length));
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+//! Mark a string object as constant string
+/*! Mark a string object (e.g. \c std::string) as a "string literal".
+    This function can be used to avoid copying a string to be referenced as a
+    value in a JSON GenericValue object, if the string's lifetime is known
+    to be valid long enough.
+
+    \tparam CharType character type of the string
+    \param str Constant string, lifetime assumed to be longer than the use of the string in e.g. a GenericValue
+    \return GenericStringRef string reference object
+    \relatesalso GenericStringRef
+    \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+*/
+template<typename CharType>
+inline GenericStringRef<CharType> StringRef(const std::basic_string<CharType>& str) {
+    return GenericStringRef<CharType>(str.data(), SizeType(str.size()));
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericValue type traits
+namespace internal {
+
+template <typename T, typename Encoding = void, typename Allocator = void>
+struct IsGenericValueImpl : FalseType {};
+
+// select candidates according to nested encoding and allocator types
+template <typename T> struct IsGenericValueImpl<T, typename Void<typename T::EncodingType>::Type, typename Void<typename T::AllocatorType>::Type>
+    : IsBaseOf<GenericValue<typename T::EncodingType, typename T::AllocatorType>, T>::Type {};
+
+// helper to match arbitrary GenericValue instantiations, including derived classes
+template <typename T> struct IsGenericValue : IsGenericValueImpl<T>::Type {};
+
+} // namespace internal
+
+///////////////////////////////////////////////////////////////////////////////
+// TypeHelper
+
+namespace internal {
+
+template <typename ValueType, typename T>
+struct TypeHelper {};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, bool> {
+    static bool Is(const ValueType& v) { return v.IsBool(); }
+    static bool Get(const ValueType& v) { return v.GetBool(); }
+    static ValueType& Set(ValueType& v, bool data) { return v.SetBool(data); }
+    static ValueType& Set(ValueType& v, bool data, typename ValueType::AllocatorType&) { return v.SetBool(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, int> {
+    static bool Is(const ValueType& v) { return v.IsInt(); }
+    static int Get(const ValueType& v) { return v.GetInt(); }
+    static ValueType& Set(ValueType& v, int data) { return v.SetInt(data); }
+    static ValueType& Set(ValueType& v, int data, typename ValueType::AllocatorType&) { return v.SetInt(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, unsigned> {
+    static bool Is(const ValueType& v) { return v.IsUint(); }
+    static unsigned Get(const ValueType& v) { return v.GetUint(); }
+    static ValueType& Set(ValueType& v, unsigned data) { return v.SetUint(data); }
+    static ValueType& Set(ValueType& v, unsigned data, typename ValueType::AllocatorType&) { return v.SetUint(data); }
+};
+
+#ifdef _MSC_VER
+RAPIDJSON_STATIC_ASSERT(sizeof(long) == sizeof(int));
+template<typename ValueType>
+struct TypeHelper<ValueType, long> {
+    static bool Is(const ValueType& v) { return v.IsInt(); }
+    static long Get(const ValueType& v) { return v.GetInt(); }
+    static ValueType& Set(ValueType& v, long data) { return v.SetInt(data); }
+    static ValueType& Set(ValueType& v, long data, typename ValueType::AllocatorType&) { return v.SetInt(data); }
+};
+
+RAPIDJSON_STATIC_ASSERT(sizeof(unsigned long) == sizeof(unsigned));
+template<typename ValueType>
+struct TypeHelper<ValueType, unsigned long> {
+    static bool Is(const ValueType& v) { return v.IsUint(); }
+    static unsigned long Get(const ValueType& v) { return v.GetUint(); }
+    static ValueType& Set(ValueType& v, unsigned long data) { return v.SetUint(data); }
+    static ValueType& Set(ValueType& v, unsigned long data, typename ValueType::AllocatorType&) { return v.SetUint(data); }
+};
+#endif
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, int64_t> {
+    static bool Is(const ValueType& v) { return v.IsInt64(); }
+    static int64_t Get(const ValueType& v) { return v.GetInt64(); }
+    static ValueType& Set(ValueType& v, int64_t data) { return v.SetInt64(data); }
+    static ValueType& Set(ValueType& v, int64_t data, typename ValueType::AllocatorType&) { return v.SetInt64(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, uint64_t> {
+    static bool Is(const ValueType& v) { return v.IsUint64(); }
+    static uint64_t Get(const ValueType& v) { return v.GetUint64(); }
+    static ValueType& Set(ValueType& v, uint64_t data) { return v.SetUint64(data); }
+    static ValueType& Set(ValueType& v, uint64_t data, typename ValueType::AllocatorType&) { return v.SetUint64(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, double> {
+    static bool Is(const ValueType& v) { return v.IsDouble(); }
+    static double Get(const ValueType& v) { return v.GetDouble(); }
+    static ValueType& Set(ValueType& v, double data) { return v.SetDouble(data); }
+    static ValueType& Set(ValueType& v, double data, typename ValueType::AllocatorType&) { return v.SetDouble(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, float> {
+    static bool Is(const ValueType& v) { return v.IsFloat(); }
+    static float Get(const ValueType& v) { return v.GetFloat(); }
+    static ValueType& Set(ValueType& v, float data) { return v.SetFloat(data); }
+    static ValueType& Set(ValueType& v, float data, typename ValueType::AllocatorType&) { return v.SetFloat(data); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, const typename ValueType::Ch*> {
+    typedef const typename ValueType::Ch* StringType;
+    static bool Is(const ValueType& v) { return v.IsString(); }
+    static StringType Get(const ValueType& v) { return v.GetString(); }
+    static ValueType& Set(ValueType& v, const StringType data) { return v.SetString(typename ValueType::StringRefType(data)); }
+    static ValueType& Set(ValueType& v, const StringType data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); }
+};
+
+#if RAPIDJSON_HAS_STDSTRING
+template<typename ValueType> 
+struct TypeHelper<ValueType, std::basic_string<typename ValueType::Ch> > {
+    typedef std::basic_string<typename ValueType::Ch> StringType;
+    static bool Is(const ValueType& v) { return v.IsString(); }
+    static StringType Get(const ValueType& v) { return StringType(v.GetString(), v.GetStringLength()); }
+    static ValueType& Set(ValueType& v, const StringType& data, typename ValueType::AllocatorType& a) { return v.SetString(data, a); }
+};
+#endif
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, typename ValueType::Array> {
+    typedef typename ValueType::Array ArrayType;
+    static bool Is(const ValueType& v) { return v.IsArray(); }
+    static ArrayType Get(ValueType& v) { return v.GetArray(); }
+    static ValueType& Set(ValueType& v, ArrayType data) { return v = data; }
+    static ValueType& Set(ValueType& v, ArrayType data, typename ValueType::AllocatorType&) { return v = data; }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, typename ValueType::ConstArray> {
+    typedef typename ValueType::ConstArray ArrayType;
+    static bool Is(const ValueType& v) { return v.IsArray(); }
+    static ArrayType Get(const ValueType& v) { return v.GetArray(); }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, typename ValueType::Object> {
+    typedef typename ValueType::Object ObjectType;
+    static bool Is(const ValueType& v) { return v.IsObject(); }
+    static ObjectType Get(ValueType& v) { return v.GetObject(); }
+    static ValueType& Set(ValueType& v, ObjectType data) { return v = data; }
+    static ValueType& Set(ValueType& v, ObjectType data, typename ValueType::AllocatorType&) { return v = data; }
+};
+
+template<typename ValueType> 
+struct TypeHelper<ValueType, typename ValueType::ConstObject> {
+    typedef typename ValueType::ConstObject ObjectType;
+    static bool Is(const ValueType& v) { return v.IsObject(); }
+    static ObjectType Get(const ValueType& v) { return v.GetObject(); }
+};
+
+} // namespace internal
+
+// Forward declarations
+template <bool, typename> class GenericArray;
+template <bool, typename> class GenericObject;
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericValue
+
+//! Represents a JSON value. Use Value for UTF8 encoding and default allocator.
+/*!
+    A JSON value can be one of 7 types. This class is a variant type supporting
+    these types.
+
+    Use the Value if UTF8 and default allocator
+
+    \tparam Encoding    Encoding of the value. (Even non-string values need to have the same encoding in a document)
+    \tparam Allocator   Allocator type for allocating memory of object, array and string.
+*/
+template <typename Encoding, typename Allocator = MemoryPoolAllocator<> > 
+class GenericValue {
+public:
+    //! Name-value pair in an object.
+    typedef GenericMember<Encoding, Allocator> Member;
+    typedef Encoding EncodingType;                  //!< Encoding type from template parameter.
+    typedef Allocator AllocatorType;                //!< Allocator type from template parameter.
+    typedef typename Encoding::Ch Ch;               //!< Character type derived from Encoding.
+    typedef GenericStringRef<Ch> StringRefType;     //!< Reference to a constant string
+    typedef typename GenericMemberIterator<false,Encoding,Allocator>::Iterator MemberIterator;  //!< Member iterator for iterating in object.
+    typedef typename GenericMemberIterator<true,Encoding,Allocator>::Iterator ConstMemberIterator;  //!< Constant member iterator for iterating in object.
+    typedef GenericValue* ValueIterator;            //!< Value iterator for iterating in array.
+    typedef const GenericValue* ConstValueIterator; //!< Constant value iterator for iterating in array.
+    typedef GenericValue<Encoding, Allocator> ValueType;    //!< Value type of itself.
+    typedef GenericArray<false, ValueType> Array;
+    typedef GenericArray<true, ValueType> ConstArray;
+    typedef GenericObject<false, ValueType> Object;
+    typedef GenericObject<true, ValueType> ConstObject;
+
+    //!@name Constructors and destructor.
+    //@{
+
+    //! Default constructor creates a null value.
+    GenericValue() RAPIDJSON_NOEXCEPT : data_() { data_.f.flags = kNullFlag; }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    //! Move constructor in C++11
+    GenericValue(GenericValue&& rhs) RAPIDJSON_NOEXCEPT : data_(rhs.data_) {
+        rhs.data_.f.flags = kNullFlag; // give up contents
+    }
+#endif
+
+private:
+    //! Copy constructor is not permitted.
+    GenericValue(const GenericValue& rhs);
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    //! Moving from a GenericDocument is not permitted.
+    template <typename StackAllocator>
+    GenericValue(GenericDocument<Encoding,Allocator,StackAllocator>&& rhs);
+
+    //! Move assignment from a GenericDocument is not permitted.
+    template <typename StackAllocator>
+    GenericValue& operator=(GenericDocument<Encoding,Allocator,StackAllocator>&& rhs);
+#endif
+
+public:
+
+    //! Constructor with JSON value type.
+    /*! This creates a Value of specified type with default content.
+        \param type Type of the value.
+        \note Default content for number is zero.
+    */
+    explicit GenericValue(Type type) RAPIDJSON_NOEXCEPT : data_() {
+        static const uint16_t defaultFlags[] = {
+            kNullFlag, kFalseFlag, kTrueFlag, kObjectFlag, kArrayFlag, kShortStringFlag,
+            kNumberAnyFlag
+        };
+        RAPIDJSON_NOEXCEPT_ASSERT(type >= kNullType && type <= kNumberType);
+        data_.f.flags = defaultFlags[type];
+
+        // Use ShortString to store empty string.
+        if (type == kStringType)
+            data_.ss.SetLength(0);
+    }
+
+    //! Explicit copy constructor (with allocator)
+    /*! Creates a copy of a Value by using the given Allocator
+        \tparam SourceAllocator allocator of \c rhs
+        \param rhs Value to copy from (read-only)
+        \param allocator Allocator for allocating copied elements and buffers. Commonly use GenericDocument::GetAllocator().
+        \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ buffer)
+        \see CopyFrom()
+    */
+    template <typename SourceAllocator>
+    GenericValue(const GenericValue<Encoding,SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings = false) {
+        switch (rhs.GetType()) {
+        case kObjectType: {
+                SizeType count = rhs.data_.o.size;
+                Member* lm = reinterpret_cast<Member*>(allocator.Malloc(count * sizeof(Member)));
+                const typename GenericValue<Encoding,SourceAllocator>::Member* rm = rhs.GetMembersPointer();
+                for (SizeType i = 0; i < count; i++) {
+                    new (&lm[i].name) GenericValue(rm[i].name, allocator, copyConstStrings);
+                    new (&lm[i].value) GenericValue(rm[i].value, allocator, copyConstStrings);
+                }
+                data_.f.flags = kObjectFlag;
+                data_.o.size = data_.o.capacity = count;
+                SetMembersPointer(lm);
+            }
+            break;
+        case kArrayType: {
+                SizeType count = rhs.data_.a.size;
+                GenericValue* le = reinterpret_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
+                const GenericValue<Encoding,SourceAllocator>* re = rhs.GetElementsPointer();
+                for (SizeType i = 0; i < count; i++)
+                    new (&le[i]) GenericValue(re[i], allocator, copyConstStrings);
+                data_.f.flags = kArrayFlag;
+                data_.a.size = data_.a.capacity = count;
+                SetElementsPointer(le);
+            }
+            break;
+        case kStringType:
+            if (rhs.data_.f.flags == kConstStringFlag && !copyConstStrings) {
+                data_.f.flags = rhs.data_.f.flags;
+                data_  = *reinterpret_cast<const Data*>(&rhs.data_);
+            }
+            else
+                SetStringRaw(StringRef(rhs.GetString(), rhs.GetStringLength()), allocator);
+            break;
+        default:
+            data_.f.flags = rhs.data_.f.flags;
+            data_  = *reinterpret_cast<const Data*>(&rhs.data_);
+            break;
+        }
+    }
+
+    //! Constructor for boolean value.
+    /*! \param b Boolean value
+        \note This constructor is limited to \em real boolean values and rejects
+            implicitly converted types like arbitrary pointers.  Use an explicit cast
+            to \c bool, if you want to construct a boolean JSON value in such cases.
+     */
+#ifndef RAPIDJSON_DOXYGEN_RUNNING // hide SFINAE from Doxygen
+    template <typename T>
+    explicit GenericValue(T b, RAPIDJSON_ENABLEIF((internal::IsSame<bool, T>))) RAPIDJSON_NOEXCEPT  // See #472
+#else
+    explicit GenericValue(bool b) RAPIDJSON_NOEXCEPT
+#endif
+        : data_() {
+            // safe-guard against failing SFINAE
+            RAPIDJSON_STATIC_ASSERT((internal::IsSame<bool,T>::Value));
+            data_.f.flags = b ? kTrueFlag : kFalseFlag;
+    }
+
+    //! Constructor for int value.
+    explicit GenericValue(int i) RAPIDJSON_NOEXCEPT : data_() {
+        data_.n.i64 = i;
+        data_.f.flags = (i >= 0) ? (kNumberIntFlag | kUintFlag | kUint64Flag) : kNumberIntFlag;
+    }
+
+    //! Constructor for unsigned value.
+    explicit GenericValue(unsigned u) RAPIDJSON_NOEXCEPT : data_() {
+        data_.n.u64 = u; 
+        data_.f.flags = (u & 0x80000000) ? kNumberUintFlag : (kNumberUintFlag | kIntFlag | kInt64Flag);
+    }
+
+    //! Constructor for int64_t value.
+    explicit GenericValue(int64_t i64) RAPIDJSON_NOEXCEPT : data_() {
+        data_.n.i64 = i64;
+        data_.f.flags = kNumberInt64Flag;
+        if (i64 >= 0) {
+            data_.f.flags |= kNumberUint64Flag;
+            if (!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000)))
+                data_.f.flags |= kUintFlag;
+            if (!(static_cast<uint64_t>(i64) & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
+                data_.f.flags |= kIntFlag;
+        }
+        else if (i64 >= static_cast<int64_t>(RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
+            data_.f.flags |= kIntFlag;
+    }
+
+    //! Constructor for uint64_t value.
+    explicit GenericValue(uint64_t u64) RAPIDJSON_NOEXCEPT : data_() {
+        data_.n.u64 = u64;
+        data_.f.flags = kNumberUint64Flag;
+        if (!(u64 & RAPIDJSON_UINT64_C2(0x80000000, 0x00000000)))
+            data_.f.flags |= kInt64Flag;
+        if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x00000000)))
+            data_.f.flags |= kUintFlag;
+        if (!(u64 & RAPIDJSON_UINT64_C2(0xFFFFFFFF, 0x80000000)))
+            data_.f.flags |= kIntFlag;
+    }
+
+    //! Constructor for double value.
+    explicit GenericValue(double d) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = d; data_.f.flags = kNumberDoubleFlag; }
+
+    //! Constructor for float value.
+    explicit GenericValue(float f) RAPIDJSON_NOEXCEPT : data_() { data_.n.d = static_cast<double>(f); data_.f.flags = kNumberDoubleFlag; }
+
+    //! Constructor for constant string (i.e. do not make a copy of string)
+    GenericValue(const Ch* s, SizeType length) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(StringRef(s, length)); }
+
+    //! Constructor for constant string (i.e. do not make a copy of string)
+    explicit GenericValue(StringRefType s) RAPIDJSON_NOEXCEPT : data_() { SetStringRaw(s); }
+
+    //! Constructor for copy-string (i.e. do make a copy of string)
+    GenericValue(const Ch* s, SizeType length, Allocator& allocator) : data_() { SetStringRaw(StringRef(s, length), allocator); }
+
+    //! Constructor for copy-string (i.e. do make a copy of string)
+    GenericValue(const Ch*s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Constructor for copy-string from a string object (i.e. do make a copy of string)
+    /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+     */
+    GenericValue(const std::basic_string<Ch>& s, Allocator& allocator) : data_() { SetStringRaw(StringRef(s), allocator); }
+#endif
+
+    //! Constructor for Array.
+    /*!
+        \param a An array obtained by \c GetArray().
+        \note \c Array is always pass-by-value.
+        \note the source array is moved into this value and the sourec array becomes empty.
+    */
+    GenericValue(Array a) RAPIDJSON_NOEXCEPT : data_(a.value_.data_) {
+        a.value_.data_ = Data();
+        a.value_.data_.f.flags = kArrayFlag;
+    }
+
+    //! Constructor for Object.
+    /*!
+        \param o An object obtained by \c GetObject().
+        \note \c Object is always pass-by-value.
+        \note the source object is moved into this value and the sourec object becomes empty.
+    */
+    GenericValue(Object o) RAPIDJSON_NOEXCEPT : data_(o.value_.data_) {
+        o.value_.data_ = Data();
+        o.value_.data_.f.flags = kObjectFlag;
+    }
+
+    //! Destructor.
+    /*! Need to destruct elements of array, members of object, or copy-string.
+    */
+    ~GenericValue() {
+        if (Allocator::kNeedFree) { // Shortcut by Allocator's trait
+            switch(data_.f.flags) {
+            case kArrayFlag:
+                {
+                    GenericValue* e = GetElementsPointer();
+                    for (GenericValue* v = e; v != e + data_.a.size; ++v)
+                        v->~GenericValue();
+                    Allocator::Free(e);
+                }
+                break;
+
+            case kObjectFlag:
+                for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
+                    m->~Member();
+                Allocator::Free(GetMembersPointer());
+                break;
+
+            case kCopyStringFlag:
+                Allocator::Free(const_cast<Ch*>(GetStringPointer()));
+                break;
+
+            default:
+                break;  // Do nothing for other types.
+            }
+        }
+    }
+
+    //@}
+
+    //!@name Assignment operators
+    //@{
+
+    //! Assignment with move semantics.
+    /*! \param rhs Source of the assignment. It will become a null value after assignment.
+    */
+    GenericValue& operator=(GenericValue& rhs) RAPIDJSON_NOEXCEPT {
+        if (RAPIDJSON_LIKELY(this != &rhs)) {
+            this->~GenericValue();
+            RawAssign(rhs);
+        }
+        return *this;
+    }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    //! Move assignment in C++11
+    GenericValue& operator=(GenericValue&& rhs) RAPIDJSON_NOEXCEPT {
+        return *this = rhs.Move();
+    }
+#endif
+
+    //! Assignment of constant string reference (no copy)
+    /*! \param str Constant string reference to be assigned
+        \note This overload is needed to avoid clashes with the generic primitive type assignment overload below.
+        \see GenericStringRef, operator=(T)
+    */
+    GenericValue& operator=(StringRefType str) RAPIDJSON_NOEXCEPT {
+        GenericValue s(str);
+        return *this = s;
+    }
+
+    //! Assignment with primitive types.
+    /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
+        \param value The value to be assigned.
+
+        \note The source type \c T explicitly disallows all pointer types,
+            especially (\c const) \ref Ch*.  This helps avoiding implicitly
+            referencing character strings with insufficient lifetime, use
+            \ref SetString(const Ch*, Allocator&) (for copying) or
+            \ref StringRef() (to explicitly mark the pointer as constant) instead.
+            All other pointer types would implicitly convert to \c bool,
+            use \ref SetBool() instead.
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::IsPointer<T>), (GenericValue&))
+    operator=(T value) {
+        GenericValue v(value);
+        return *this = v;
+    }
+
+    //! Deep-copy assignment from Value
+    /*! Assigns a \b copy of the Value to the current Value object
+        \tparam SourceAllocator Allocator type of \c rhs
+        \param rhs Value to copy from (read-only)
+        \param allocator Allocator to use for copying
+        \param copyConstStrings Force copying of constant strings (e.g. referencing an in-situ buffer)
+     */
+    template <typename SourceAllocator>
+    GenericValue& CopyFrom(const GenericValue<Encoding, SourceAllocator>& rhs, Allocator& allocator, bool copyConstStrings = false) {
+        RAPIDJSON_ASSERT(static_cast<void*>(this) != static_cast<void const*>(&rhs));
+        this->~GenericValue();
+        new (this) GenericValue(rhs, allocator, copyConstStrings);
+        return *this;
+    }
+
+    //! Exchange the contents of this value with those of other.
+    /*!
+        \param other Another value.
+        \note Constant complexity.
+    */
+    GenericValue& Swap(GenericValue& other) RAPIDJSON_NOEXCEPT {
+        GenericValue temp;
+        temp.RawAssign(*this);
+        RawAssign(other);
+        other.RawAssign(temp);
+        return *this;
+    }
+
+    //! free-standing swap function helper
+    /*!
+        Helper function to enable support for common swap implementation pattern based on \c std::swap:
+        \code
+        void swap(MyClass& a, MyClass& b) {
+            using std::swap;
+            swap(a.value, b.value);
+            // ...
+        }
+        \endcode
+        \see Swap()
+     */
+    friend inline void swap(GenericValue& a, GenericValue& b) RAPIDJSON_NOEXCEPT { a.Swap(b); }
+
+    //! Prepare Value for move semantics
+    /*! \return *this */
+    GenericValue& Move() RAPIDJSON_NOEXCEPT { return *this; }
+    //@}
+
+    //!@name Equal-to and not-equal-to operators
+    //@{
+    //! Equal-to operator
+    /*!
+        \note If an object contains duplicated named member, comparing equality with any object is always \c false.
+        \note Linear time complexity (number of all values in the subtree and total lengths of all strings).
+    */
+    template <typename SourceAllocator>
+    bool operator==(const GenericValue<Encoding, SourceAllocator>& rhs) const {
+        typedef GenericValue<Encoding, SourceAllocator> RhsType;
+        if (GetType() != rhs.GetType())
+            return false;
+
+        switch (GetType()) {
+        case kObjectType: // Warning: O(n^2) inner-loop
+            if (data_.o.size != rhs.data_.o.size)
+                return false;           
+            for (ConstMemberIterator lhsMemberItr = MemberBegin(); lhsMemberItr != MemberEnd(); ++lhsMemberItr) {
+                typename RhsType::ConstMemberIterator rhsMemberItr = rhs.FindMember(lhsMemberItr->name);
+                if (rhsMemberItr == rhs.MemberEnd() || lhsMemberItr->value != rhsMemberItr->value)
+                    return false;
+            }
+            return true;
+            
+        case kArrayType:
+            if (data_.a.size != rhs.data_.a.size)
+                return false;
+            for (SizeType i = 0; i < data_.a.size; i++)
+                if ((*this)[i] != rhs[i])
+                    return false;
+            return true;
+
+        case kStringType:
+            return StringEqual(rhs);
+
+        case kNumberType:
+            if (IsDouble() || rhs.IsDouble()) {
+                double a = GetDouble();     // May convert from integer to double.
+                double b = rhs.GetDouble(); // Ditto
+                return a >= b && a <= b;    // Prevent -Wfloat-equal
+            }
+            else
+                return data_.n.u64 == rhs.data_.n.u64;
+
+        default:
+            return true;
+        }
+    }
+
+    //! Equal-to operator with const C-string pointer
+    bool operator==(const Ch* rhs) const { return *this == GenericValue(StringRef(rhs)); }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Equal-to operator with string object
+    /*! \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+     */
+    bool operator==(const std::basic_string<Ch>& rhs) const { return *this == GenericValue(StringRef(rhs)); }
+#endif
+
+    //! Equal-to operator with primitive types
+    /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c true, \c false
+    */
+    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>,internal::IsGenericValue<T> >), (bool)) operator==(const T& rhs) const { return *this == GenericValue(rhs); }
+
+    //! Not-equal-to operator
+    /*! \return !(*this == rhs)
+     */
+    template <typename SourceAllocator>
+    bool operator!=(const GenericValue<Encoding, SourceAllocator>& rhs) const { return !(*this == rhs); }
+
+    //! Not-equal-to operator with const C-string pointer
+    bool operator!=(const Ch* rhs) const { return !(*this == rhs); }
+
+    //! Not-equal-to operator with arbitrary types
+    /*! \return !(*this == rhs)
+     */
+    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator!=(const T& rhs) const { return !(*this == rhs); }
+
+    //! Equal-to operator with arbitrary types (symmetric version)
+    /*! \return (rhs == lhs)
+     */
+    template <typename T> friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator==(const T& lhs, const GenericValue& rhs) { return rhs == lhs; }
+
+    //! Not-Equal-to operator with arbitrary types (symmetric version)
+    /*! \return !(rhs == lhs)
+     */
+    template <typename T> friend RAPIDJSON_DISABLEIF_RETURN((internal::IsGenericValue<T>), (bool)) operator!=(const T& lhs, const GenericValue& rhs) { return !(rhs == lhs); }
+    //@}
+
+    //!@name Type
+    //@{
+
+    Type GetType()  const { return static_cast<Type>(data_.f.flags & kTypeMask); }
+    bool IsNull()   const { return data_.f.flags == kNullFlag; }
+    bool IsFalse()  const { return data_.f.flags == kFalseFlag; }
+    bool IsTrue()   const { return data_.f.flags == kTrueFlag; }
+    bool IsBool()   const { return (data_.f.flags & kBoolFlag) != 0; }
+    bool IsObject() const { return data_.f.flags == kObjectFlag; }
+    bool IsArray()  const { return data_.f.flags == kArrayFlag; }
+    bool IsNumber() const { return (data_.f.flags & kNumberFlag) != 0; }
+    bool IsInt()    const { return (data_.f.flags & kIntFlag) != 0; }
+    bool IsUint()   const { return (data_.f.flags & kUintFlag) != 0; }
+    bool IsInt64()  const { return (data_.f.flags & kInt64Flag) != 0; }
+    bool IsUint64() const { return (data_.f.flags & kUint64Flag) != 0; }
+    bool IsDouble() const { return (data_.f.flags & kDoubleFlag) != 0; }
+    bool IsString() const { return (data_.f.flags & kStringFlag) != 0; }
+
+    // Checks whether a number can be losslessly converted to a double.
+    bool IsLosslessDouble() const {
+        if (!IsNumber()) return false;
+        if (IsUint64()) {
+            uint64_t u = GetUint64();
+            volatile double d = static_cast<double>(u);
+            return (d >= 0.0)
+                && (d < static_cast<double>((std::numeric_limits<uint64_t>::max)()))
+                && (u == static_cast<uint64_t>(d));
+        }
+        if (IsInt64()) {
+            int64_t i = GetInt64();
+            volatile double d = static_cast<double>(i);
+            return (d >= static_cast<double>((std::numeric_limits<int64_t>::min)()))
+                && (d < static_cast<double>((std::numeric_limits<int64_t>::max)()))
+                && (i == static_cast<int64_t>(d));
+        }
+        return true; // double, int, uint are always lossless
+    }
+
+    // Checks whether a number is a float (possible lossy).
+    bool IsFloat() const  {
+        if ((data_.f.flags & kDoubleFlag) == 0)
+            return false;
+        double d = GetDouble();
+        return d >= -3.4028234e38 && d <= 3.4028234e38;
+    }
+    // Checks whether a number can be losslessly converted to a float.
+    bool IsLosslessFloat() const {
+        if (!IsNumber()) return false;
+        double a = GetDouble();
+        if (a < static_cast<double>(-(std::numeric_limits<float>::max)())
+                || a > static_cast<double>((std::numeric_limits<float>::max)()))
+            return false;
+        double b = static_cast<double>(static_cast<float>(a));
+        return a >= b && a <= b;    // Prevent -Wfloat-equal
+    }
+
+    //@}
+
+    //!@name Null
+    //@{
+
+    GenericValue& SetNull() { this->~GenericValue(); new (this) GenericValue(); return *this; }
+
+    //@}
+
+    //!@name Bool
+    //@{
+
+    bool GetBool() const { RAPIDJSON_ASSERT(IsBool()); return data_.f.flags == kTrueFlag; }
+    //!< Set boolean value
+    /*! \post IsBool() == true */
+    GenericValue& SetBool(bool b) { this->~GenericValue(); new (this) GenericValue(b); return *this; }
+
+    //@}
+
+    //!@name Object
+    //@{
+
+    //! Set this value as an empty object.
+    /*! \post IsObject() == true */
+    GenericValue& SetObject() { this->~GenericValue(); new (this) GenericValue(kObjectType); return *this; }
+
+    //! Get the number of members in the object.
+    SizeType MemberCount() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size; }
+
+    //! Get the capacity of object.
+    SizeType MemberCapacity() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.capacity; }
+
+    //! Check whether the object is empty.
+    bool ObjectEmpty() const { RAPIDJSON_ASSERT(IsObject()); return data_.o.size == 0; }
+
+    //! Get a value from an object associated with the name.
+    /*! \pre IsObject() == true
+        \tparam T Either \c Ch or \c const \c Ch (template used for disambiguation with \ref operator[](SizeType))
+        \note In version 0.1x, if the member is not found, this function returns a null value. This makes issue 7.
+        Since 0.2, if the name is not correct, it will assert.
+        If user is unsure whether a member exists, user should use HasMember() first.
+        A better approach is to use FindMember().
+        \note Linear time complexity.
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >),(GenericValue&)) operator[](T* name) {
+        GenericValue n(StringRef(name));
+        return (*this)[n];
+    }
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >),(const GenericValue&)) operator[](T* name) const { return const_cast<GenericValue&>(*this)[name]; }
+
+    //! Get a value from an object associated with the name.
+    /*! \pre IsObject() == true
+        \tparam SourceAllocator Allocator of the \c name value
+
+        \note Compared to \ref operator[](T*), this version is faster because it does not need a StrLen().
+        And it can also handle strings with embedded null characters.
+
+        \note Linear time complexity.
+    */
+    template <typename SourceAllocator>
+    GenericValue& operator[](const GenericValue<Encoding, SourceAllocator>& name) {
+        MemberIterator member = FindMember(name);
+        if (member != MemberEnd())
+            return member->value;
+        else {
+            RAPIDJSON_ASSERT(false);    // see above note
+
+            // This will generate -Wexit-time-destructors in clang
+            // static GenericValue NullValue;
+            // return NullValue;
+
+            // Use static buffer and placement-new to prevent destruction
+            static char buffer[sizeof(GenericValue)];
+            return *new (buffer) GenericValue();
+        }
+    }
+    template <typename SourceAllocator>
+    const GenericValue& operator[](const GenericValue<Encoding, SourceAllocator>& name) const { return const_cast<GenericValue&>(*this)[name]; }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Get a value from an object associated with name (string object).
+    GenericValue& operator[](const std::basic_string<Ch>& name) { return (*this)[GenericValue(StringRef(name))]; }
+    const GenericValue& operator[](const std::basic_string<Ch>& name) const { return (*this)[GenericValue(StringRef(name))]; }
+#endif
+
+    //! Const member iterator
+    /*! \pre IsObject() == true */
+    ConstMemberIterator MemberBegin() const { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer()); }
+    //! Const \em past-the-end member iterator
+    /*! \pre IsObject() == true */
+    ConstMemberIterator MemberEnd() const   { RAPIDJSON_ASSERT(IsObject()); return ConstMemberIterator(GetMembersPointer() + data_.o.size); }
+    //! Member iterator
+    /*! \pre IsObject() == true */
+    MemberIterator MemberBegin()            { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer()); }
+    //! \em Past-the-end member iterator
+    /*! \pre IsObject() == true */
+    MemberIterator MemberEnd()              { RAPIDJSON_ASSERT(IsObject()); return MemberIterator(GetMembersPointer() + data_.o.size); }
+
+    //! Request the object to have enough capacity to store members.
+    /*! \param newCapacity  The capacity that the object at least need to have.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \note Linear time complexity.
+    */
+    GenericValue& MemberReserve(SizeType newCapacity, Allocator &allocator) {
+        RAPIDJSON_ASSERT(IsObject());
+        if (newCapacity > data_.o.capacity) {
+            SetMembersPointer(reinterpret_cast<Member*>(allocator.Realloc(GetMembersPointer(), data_.o.capacity * sizeof(Member), newCapacity * sizeof(Member))));
+            data_.o.capacity = newCapacity;
+        }
+        return *this;
+    }
+
+    //! Check whether a member exists in the object.
+    /*!
+        \param name Member name to be searched.
+        \pre IsObject() == true
+        \return Whether a member with that name exists.
+        \note It is better to use FindMember() directly if you need the obtain the value as well.
+        \note Linear time complexity.
+    */
+    bool HasMember(const Ch* name) const { return FindMember(name) != MemberEnd(); }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Check whether a member exists in the object with string object.
+    /*!
+        \param name Member name to be searched.
+        \pre IsObject() == true
+        \return Whether a member with that name exists.
+        \note It is better to use FindMember() directly if you need the obtain the value as well.
+        \note Linear time complexity.
+    */
+    bool HasMember(const std::basic_string<Ch>& name) const { return FindMember(name) != MemberEnd(); }
+#endif
+
+    //! Check whether a member exists in the object with GenericValue name.
+    /*!
+        This version is faster because it does not need a StrLen(). It can also handle string with null character.
+        \param name Member name to be searched.
+        \pre IsObject() == true
+        \return Whether a member with that name exists.
+        \note It is better to use FindMember() directly if you need the obtain the value as well.
+        \note Linear time complexity.
+    */
+    template <typename SourceAllocator>
+    bool HasMember(const GenericValue<Encoding, SourceAllocator>& name) const { return FindMember(name) != MemberEnd(); }
+
+    //! Find member by name.
+    /*!
+        \param name Member name to be searched.
+        \pre IsObject() == true
+        \return Iterator to member, if it exists.
+            Otherwise returns \ref MemberEnd().
+
+        \note Earlier versions of Rapidjson returned a \c NULL pointer, in case
+            the requested member doesn't exist. For consistency with e.g.
+            \c std::map, this has been changed to MemberEnd() now.
+        \note Linear time complexity.
+    */
+    MemberIterator FindMember(const Ch* name) {
+        GenericValue n(StringRef(name));
+        return FindMember(n);
+    }
+
+    ConstMemberIterator FindMember(const Ch* name) const { return const_cast<GenericValue&>(*this).FindMember(name); }
+
+    //! Find member by name.
+    /*!
+        This version is faster because it does not need a StrLen(). It can also handle string with null character.
+        \param name Member name to be searched.
+        \pre IsObject() == true
+        \return Iterator to member, if it exists.
+            Otherwise returns \ref MemberEnd().
+
+        \note Earlier versions of Rapidjson returned a \c NULL pointer, in case
+            the requested member doesn't exist. For consistency with e.g.
+            \c std::map, this has been changed to MemberEnd() now.
+        \note Linear time complexity.
+    */
+    template <typename SourceAllocator>
+    MemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) {
+        RAPIDJSON_ASSERT(IsObject());
+        RAPIDJSON_ASSERT(name.IsString());
+        MemberIterator member = MemberBegin();
+        for ( ; member != MemberEnd(); ++member)
+            if (name.StringEqual(member->name))
+                break;
+        return member;
+    }
+    template <typename SourceAllocator> ConstMemberIterator FindMember(const GenericValue<Encoding, SourceAllocator>& name) const { return const_cast<GenericValue&>(*this).FindMember(name); }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Find member by string object name.
+    /*!
+        \param name Member name to be searched.
+        \pre IsObject() == true
+        \return Iterator to member, if it exists.
+            Otherwise returns \ref MemberEnd().
+    */
+    MemberIterator FindMember(const std::basic_string<Ch>& name) { return FindMember(GenericValue(StringRef(name))); }
+    ConstMemberIterator FindMember(const std::basic_string<Ch>& name) const { return FindMember(GenericValue(StringRef(name))); }
+#endif
+
+    //! Add a member (name-value pair) to the object.
+    /*! \param name A string value as name of member.
+        \param value Value of any type.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \note The ownership of \c name and \c value will be transferred to this object on success.
+        \pre  IsObject() && name.IsString()
+        \post name.IsNull() && value.IsNull()
+        \note Amortized Constant time complexity.
+    */
+    GenericValue& AddMember(GenericValue& name, GenericValue& value, Allocator& allocator) {
+        RAPIDJSON_ASSERT(IsObject());
+        RAPIDJSON_ASSERT(name.IsString());
+
+        ObjectData& o = data_.o;
+        if (o.size >= o.capacity)
+            MemberReserve(o.capacity == 0 ? kDefaultObjectCapacity : (o.capacity + (o.capacity + 1) / 2), allocator);
+        Member* members = GetMembersPointer();
+        members[o.size].name.RawAssign(name);
+        members[o.size].value.RawAssign(value);
+        o.size++;
+        return *this;
+    }
+
+    //! Add a constant string value as member (name-value pair) to the object.
+    /*! \param name A string value as name of member.
+        \param value constant string reference as value of member.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \pre  IsObject()
+        \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below.
+        \note Amortized Constant time complexity.
+    */
+    GenericValue& AddMember(GenericValue& name, StringRefType value, Allocator& allocator) {
+        GenericValue v(value);
+        return AddMember(name, v, allocator);
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Add a string object as member (name-value pair) to the object.
+    /*! \param name A string value as name of member.
+        \param value constant string reference as value of member.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \pre  IsObject()
+        \note This overload is needed to avoid clashes with the generic primitive type AddMember(GenericValue&,T,Allocator&) overload below.
+        \note Amortized Constant time complexity.
+    */
+    GenericValue& AddMember(GenericValue& name, std::basic_string<Ch>& value, Allocator& allocator) {
+        GenericValue v(value, allocator);
+        return AddMember(name, v, allocator);
+    }
+#endif
+
+    //! Add any primitive value as member (name-value pair) to the object.
+    /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
+        \param name A string value as name of member.
+        \param value Value of primitive type \c T as value of member
+        \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \pre  IsObject()
+
+        \note The source type \c T explicitly disallows all pointer types,
+            especially (\c const) \ref Ch*.  This helps avoiding implicitly
+            referencing character strings with insufficient lifetime, use
+            \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref
+            AddMember(StringRefType, StringRefType, Allocator&).
+            All other pointer types would implicitly convert to \c bool,
+            use an explicit cast instead, if needed.
+        \note Amortized Constant time complexity.
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&))
+    AddMember(GenericValue& name, T value, Allocator& allocator) {
+        GenericValue v(value);
+        return AddMember(name, v, allocator);
+    }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericValue& AddMember(GenericValue&& name, GenericValue&& value, Allocator& allocator) {
+        return AddMember(name, value, allocator);
+    }
+    GenericValue& AddMember(GenericValue&& name, GenericValue& value, Allocator& allocator) {
+        return AddMember(name, value, allocator);
+    }
+    GenericValue& AddMember(GenericValue& name, GenericValue&& value, Allocator& allocator) {
+        return AddMember(name, value, allocator);
+    }
+    GenericValue& AddMember(StringRefType name, GenericValue&& value, Allocator& allocator) {
+        GenericValue n(name);
+        return AddMember(n, value, allocator);
+    }
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+
+
+    //! Add a member (name-value pair) to the object.
+    /*! \param name A constant string reference as name of member.
+        \param value Value of any type.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \note The ownership of \c value will be transferred to this object on success.
+        \pre  IsObject()
+        \post value.IsNull()
+        \note Amortized Constant time complexity.
+    */
+    GenericValue& AddMember(StringRefType name, GenericValue& value, Allocator& allocator) {
+        GenericValue n(name);
+        return AddMember(n, value, allocator);
+    }
+
+    //! Add a constant string value as member (name-value pair) to the object.
+    /*! \param name A constant string reference as name of member.
+        \param value constant string reference as value of member.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \pre  IsObject()
+        \note This overload is needed to avoid clashes with the generic primitive type AddMember(StringRefType,T,Allocator&) overload below.
+        \note Amortized Constant time complexity.
+    */
+    GenericValue& AddMember(StringRefType name, StringRefType value, Allocator& allocator) {
+        GenericValue v(value);
+        return AddMember(name, v, allocator);
+    }
+
+    //! Add any primitive value as member (name-value pair) to the object.
+    /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
+        \param name A constant string reference as name of member.
+        \param value Value of primitive type \c T as value of member
+        \param allocator Allocator for reallocating memory. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \pre  IsObject()
+
+        \note The source type \c T explicitly disallows all pointer types,
+            especially (\c const) \ref Ch*.  This helps avoiding implicitly
+            referencing character strings with insufficient lifetime, use
+            \ref AddMember(StringRefType, GenericValue&, Allocator&) or \ref
+            AddMember(StringRefType, StringRefType, Allocator&).
+            All other pointer types would implicitly convert to \c bool,
+            use an explicit cast instead, if needed.
+        \note Amortized Constant time complexity.
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&))
+    AddMember(StringRefType name, T value, Allocator& allocator) {
+        GenericValue n(name);
+        return AddMember(n, value, allocator);
+    }
+
+    //! Remove all members in the object.
+    /*! This function do not deallocate memory in the object, i.e. the capacity is unchanged.
+        \note Linear time complexity.
+    */
+    void RemoveAllMembers() {
+        RAPIDJSON_ASSERT(IsObject()); 
+        for (MemberIterator m = MemberBegin(); m != MemberEnd(); ++m)
+            m->~Member();
+        data_.o.size = 0;
+    }
+
+    //! Remove a member in object by its name.
+    /*! \param name Name of member to be removed.
+        \return Whether the member existed.
+        \note This function may reorder the object members. Use \ref
+            EraseMember(ConstMemberIterator) if you need to preserve the
+            relative order of the remaining members.
+        \note Linear time complexity.
+    */
+    bool RemoveMember(const Ch* name) {
+        GenericValue n(StringRef(name));
+        return RemoveMember(n);
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    bool RemoveMember(const std::basic_string<Ch>& name) { return RemoveMember(GenericValue(StringRef(name))); }
+#endif
+
+    template <typename SourceAllocator>
+    bool RemoveMember(const GenericValue<Encoding, SourceAllocator>& name) {
+        MemberIterator m = FindMember(name);
+        if (m != MemberEnd()) {
+            RemoveMember(m);
+            return true;
+        }
+        else
+            return false;
+    }
+
+    //! Remove a member in object by iterator.
+    /*! \param m member iterator (obtained by FindMember() or MemberBegin()).
+        \return the new iterator after removal.
+        \note This function may reorder the object members. Use \ref
+            EraseMember(ConstMemberIterator) if you need to preserve the
+            relative order of the remaining members.
+        \note Constant time complexity.
+    */
+    MemberIterator RemoveMember(MemberIterator m) {
+        RAPIDJSON_ASSERT(IsObject());
+        RAPIDJSON_ASSERT(data_.o.size > 0);
+        RAPIDJSON_ASSERT(GetMembersPointer() != 0);
+        RAPIDJSON_ASSERT(m >= MemberBegin() && m < MemberEnd());
+
+        MemberIterator last(GetMembersPointer() + (data_.o.size - 1));
+        if (data_.o.size > 1 && m != last)
+            *m = *last; // Move the last one to this place
+        else
+            m->~Member(); // Only one left, just destroy
+        --data_.o.size;
+        return m;
+    }
+
+    //! Remove a member from an object by iterator.
+    /*! \param pos iterator to the member to remove
+        \pre IsObject() == true && \ref MemberBegin() <= \c pos < \ref MemberEnd()
+        \return Iterator following the removed element.
+            If the iterator \c pos refers to the last element, the \ref MemberEnd() iterator is returned.
+        \note This function preserves the relative order of the remaining object
+            members. If you do not need this, use the more efficient \ref RemoveMember(MemberIterator).
+        \note Linear time complexity.
+    */
+    MemberIterator EraseMember(ConstMemberIterator pos) {
+        return EraseMember(pos, pos +1);
+    }
+
+    //! Remove members in the range [first, last) from an object.
+    /*! \param first iterator to the first member to remove
+        \param last  iterator following the last member to remove
+        \pre IsObject() == true && \ref MemberBegin() <= \c first <= \c last <= \ref MemberEnd()
+        \return Iterator following the last removed element.
+        \note This function preserves the relative order of the remaining object
+            members.
+        \note Linear time complexity.
+    */
+    MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) {
+        RAPIDJSON_ASSERT(IsObject());
+        RAPIDJSON_ASSERT(data_.o.size > 0);
+        RAPIDJSON_ASSERT(GetMembersPointer() != 0);
+        RAPIDJSON_ASSERT(first >= MemberBegin());
+        RAPIDJSON_ASSERT(first <= last);
+        RAPIDJSON_ASSERT(last <= MemberEnd());
+
+        MemberIterator pos = MemberBegin() + (first - MemberBegin());
+        for (MemberIterator itr = pos; itr != last; ++itr)
+            itr->~Member();
+        std::memmove(static_cast<void*>(&*pos), &*last, static_cast<size_t>(MemberEnd() - last) * sizeof(Member));
+        data_.o.size -= static_cast<SizeType>(last - first);
+        return pos;
+    }
+
+    //! Erase a member in object by its name.
+    /*! \param name Name of member to be removed.
+        \return Whether the member existed.
+        \note Linear time complexity.
+    */
+    bool EraseMember(const Ch* name) {
+        GenericValue n(StringRef(name));
+        return EraseMember(n);
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    bool EraseMember(const std::basic_string<Ch>& name) { return EraseMember(GenericValue(StringRef(name))); }
+#endif
+
+    template <typename SourceAllocator>
+    bool EraseMember(const GenericValue<Encoding, SourceAllocator>& name) {
+        MemberIterator m = FindMember(name);
+        if (m != MemberEnd()) {
+            EraseMember(m);
+            return true;
+        }
+        else
+            return false;
+    }
+
+    Object GetObject() { RAPIDJSON_ASSERT(IsObject()); return Object(*this); }
+    ConstObject GetObject() const { RAPIDJSON_ASSERT(IsObject()); return ConstObject(*this); }
+
+    //@}
+
+    //!@name Array
+    //@{
+
+    //! Set this value as an empty array.
+    /*! \post IsArray == true */
+    GenericValue& SetArray() { this->~GenericValue(); new (this) GenericValue(kArrayType); return *this; }
+
+    //! Get the number of elements in array.
+    SizeType Size() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size; }
+
+    //! Get the capacity of array.
+    SizeType Capacity() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.capacity; }
+
+    //! Check whether the array is empty.
+    bool Empty() const { RAPIDJSON_ASSERT(IsArray()); return data_.a.size == 0; }
+
+    //! Remove all elements in the array.
+    /*! This function do not deallocate memory in the array, i.e. the capacity is unchanged.
+        \note Linear time complexity.
+    */
+    void Clear() {
+        RAPIDJSON_ASSERT(IsArray()); 
+        GenericValue* e = GetElementsPointer();
+        for (GenericValue* v = e; v != e + data_.a.size; ++v)
+            v->~GenericValue();
+        data_.a.size = 0;
+    }
+
+    //! Get an element from array by index.
+    /*! \pre IsArray() == true
+        \param index Zero-based index of element.
+        \see operator[](T*)
+    */
+    GenericValue& operator[](SizeType index) {
+        RAPIDJSON_ASSERT(IsArray());
+        RAPIDJSON_ASSERT(index < data_.a.size);
+        return GetElementsPointer()[index];
+    }
+    const GenericValue& operator[](SizeType index) const { return const_cast<GenericValue&>(*this)[index]; }
+
+    //! Element iterator
+    /*! \pre IsArray() == true */
+    ValueIterator Begin() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer(); }
+    //! \em Past-the-end element iterator
+    /*! \pre IsArray() == true */
+    ValueIterator End() { RAPIDJSON_ASSERT(IsArray()); return GetElementsPointer() + data_.a.size; }
+    //! Constant element iterator
+    /*! \pre IsArray() == true */
+    ConstValueIterator Begin() const { return const_cast<GenericValue&>(*this).Begin(); }
+    //! Constant \em past-the-end element iterator
+    /*! \pre IsArray() == true */
+    ConstValueIterator End() const { return const_cast<GenericValue&>(*this).End(); }
+
+    //! Request the array to have enough capacity to store elements.
+    /*! \param newCapacity  The capacity that the array at least need to have.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \note Linear time complexity.
+    */
+    GenericValue& Reserve(SizeType newCapacity, Allocator &allocator) {
+        RAPIDJSON_ASSERT(IsArray());
+        if (newCapacity > data_.a.capacity) {
+            SetElementsPointer(reinterpret_cast<GenericValue*>(allocator.Realloc(GetElementsPointer(), data_.a.capacity * sizeof(GenericValue), newCapacity * sizeof(GenericValue))));
+            data_.a.capacity = newCapacity;
+        }
+        return *this;
+    }
+
+    //! Append a GenericValue at the end of the array.
+    /*! \param value        Value to be appended.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+        \pre IsArray() == true
+        \post value.IsNull() == true
+        \return The value itself for fluent API.
+        \note The ownership of \c value will be transferred to this array on success.
+        \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient.
+        \note Amortized constant time complexity.
+    */
+    GenericValue& PushBack(GenericValue& value, Allocator& allocator) {
+        RAPIDJSON_ASSERT(IsArray());
+        if (data_.a.size >= data_.a.capacity)
+            Reserve(data_.a.capacity == 0 ? kDefaultArrayCapacity : (data_.a.capacity + (data_.a.capacity + 1) / 2), allocator);
+        GetElementsPointer()[data_.a.size++].RawAssign(value);
+        return *this;
+    }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericValue& PushBack(GenericValue&& value, Allocator& allocator) {
+        return PushBack(value, allocator);
+    }
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+
+    //! Append a constant string reference at the end of the array.
+    /*! \param value        Constant string reference to be appended.
+        \param allocator    Allocator for reallocating memory. It must be the same one used previously. Commonly use GenericDocument::GetAllocator().
+        \pre IsArray() == true
+        \return The value itself for fluent API.
+        \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient.
+        \note Amortized constant time complexity.
+        \see GenericStringRef
+    */
+    GenericValue& PushBack(StringRefType value, Allocator& allocator) {
+        return (*this).template PushBack<StringRefType>(value, allocator);
+    }
+
+    //! Append a primitive value at the end of the array.
+    /*! \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t
+        \param value Value of primitive type T to be appended.
+        \param allocator    Allocator for reallocating memory. It must be the same one as used before. Commonly use GenericDocument::GetAllocator().
+        \pre IsArray() == true
+        \return The value itself for fluent API.
+        \note If the number of elements to be appended is known, calls Reserve() once first may be more efficient.
+
+        \note The source type \c T explicitly disallows all pointer types,
+            especially (\c const) \ref Ch*.  This helps avoiding implicitly
+            referencing character strings with insufficient lifetime, use
+            \ref PushBack(GenericValue&, Allocator&) or \ref
+            PushBack(StringRefType, Allocator&).
+            All other pointer types would implicitly convert to \c bool,
+            use an explicit cast instead, if needed.
+        \note Amortized constant time complexity.
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericValue&))
+    PushBack(T value, Allocator& allocator) {
+        GenericValue v(value);
+        return PushBack(v, allocator);
+    }
+
+    //! Remove the last element in the array.
+    /*!
+        \note Constant time complexity.
+    */
+    GenericValue& PopBack() {
+        RAPIDJSON_ASSERT(IsArray());
+        RAPIDJSON_ASSERT(!Empty());
+        GetElementsPointer()[--data_.a.size].~GenericValue();
+        return *this;
+    }
+
+    //! Remove an element of array by iterator.
+    /*!
+        \param pos iterator to the element to remove
+        \pre IsArray() == true && \ref Begin() <= \c pos < \ref End()
+        \return Iterator following the removed element. If the iterator pos refers to the last element, the End() iterator is returned.
+        \note Linear time complexity.
+    */
+    ValueIterator Erase(ConstValueIterator pos) {
+        return Erase(pos, pos + 1);
+    }
+
+    //! Remove elements in the range [first, last) of the array.
+    /*!
+        \param first iterator to the first element to remove
+        \param last  iterator following the last element to remove
+        \pre IsArray() == true && \ref Begin() <= \c first <= \c last <= \ref End()
+        \return Iterator following the last removed element.
+        \note Linear time complexity.
+    */
+    ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) {
+        RAPIDJSON_ASSERT(IsArray());
+        RAPIDJSON_ASSERT(data_.a.size > 0);
+        RAPIDJSON_ASSERT(GetElementsPointer() != 0);
+        RAPIDJSON_ASSERT(first >= Begin());
+        RAPIDJSON_ASSERT(first <= last);
+        RAPIDJSON_ASSERT(last <= End());
+        ValueIterator pos = Begin() + (first - Begin());
+        for (ValueIterator itr = pos; itr != last; ++itr)
+            itr->~GenericValue();
+        std::memmove(static_cast<void*>(pos), last, static_cast<size_t>(End() - last) * sizeof(GenericValue));
+        data_.a.size -= static_cast<SizeType>(last - first);
+        return pos;
+    }
+
+    Array GetArray() { RAPIDJSON_ASSERT(IsArray()); return Array(*this); }
+    ConstArray GetArray() const { RAPIDJSON_ASSERT(IsArray()); return ConstArray(*this); }
+
+    //@}
+
+    //!@name Number
+    //@{
+
+    int GetInt() const          { RAPIDJSON_ASSERT(data_.f.flags & kIntFlag);   return data_.n.i.i;   }
+    unsigned GetUint() const    { RAPIDJSON_ASSERT(data_.f.flags & kUintFlag);  return data_.n.u.u;   }
+    int64_t GetInt64() const    { RAPIDJSON_ASSERT(data_.f.flags & kInt64Flag); return data_.n.i64; }
+    uint64_t GetUint64() const  { RAPIDJSON_ASSERT(data_.f.flags & kUint64Flag); return data_.n.u64; }
+
+    //! Get the value as double type.
+    /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessDouble() to check whether the converison is lossless.
+    */
+    double GetDouble() const {
+        RAPIDJSON_ASSERT(IsNumber());
+        if ((data_.f.flags & kDoubleFlag) != 0)                return data_.n.d;   // exact type, no conversion.
+        if ((data_.f.flags & kIntFlag) != 0)                   return data_.n.i.i; // int -> double
+        if ((data_.f.flags & kUintFlag) != 0)                  return data_.n.u.u; // unsigned -> double
+        if ((data_.f.flags & kInt64Flag) != 0)                 return static_cast<double>(data_.n.i64); // int64_t -> double (may lose precision)
+        RAPIDJSON_ASSERT((data_.f.flags & kUint64Flag) != 0);  return static_cast<double>(data_.n.u64); // uint64_t -> double (may lose precision)
+    }
+
+    //! Get the value as float type.
+    /*! \note If the value is 64-bit integer type, it may lose precision. Use \c IsLosslessFloat() to check whether the converison is lossless.
+    */
+    float GetFloat() const {
+        return static_cast<float>(GetDouble());
+    }
+
+    GenericValue& SetInt(int i)             { this->~GenericValue(); new (this) GenericValue(i);    return *this; }
+    GenericValue& SetUint(unsigned u)       { this->~GenericValue(); new (this) GenericValue(u);    return *this; }
+    GenericValue& SetInt64(int64_t i64)     { this->~GenericValue(); new (this) GenericValue(i64);  return *this; }
+    GenericValue& SetUint64(uint64_t u64)   { this->~GenericValue(); new (this) GenericValue(u64);  return *this; }
+    GenericValue& SetDouble(double d)       { this->~GenericValue(); new (this) GenericValue(d);    return *this; }
+    GenericValue& SetFloat(float f)         { this->~GenericValue(); new (this) GenericValue(static_cast<double>(f)); return *this; }
+
+    //@}
+
+    //!@name String
+    //@{
+
+    const Ch* GetString() const { RAPIDJSON_ASSERT(IsString()); return (data_.f.flags & kInlineStrFlag) ? data_.ss.str : GetStringPointer(); }
+
+    //! Get the length of string.
+    /*! Since rapidjson permits "\\u0000" in the json string, strlen(v.GetString()) may not equal to v.GetStringLength().
+    */
+    SizeType GetStringLength() const { RAPIDJSON_ASSERT(IsString()); return ((data_.f.flags & kInlineStrFlag) ? (data_.ss.GetLength()) : data_.s.length); }
+
+    //! Set this value as a string without copying source string.
+    /*! This version has better performance with supplied length, and also support string containing null character.
+        \param s source string pointer. 
+        \param length The length of source string, excluding the trailing null terminator.
+        \return The value itself for fluent API.
+        \post IsString() == true && GetString() == s && GetStringLength() == length
+        \see SetString(StringRefType)
+    */
+    GenericValue& SetString(const Ch* s, SizeType length) { return SetString(StringRef(s, length)); }
+
+    //! Set this value as a string without copying source string.
+    /*! \param s source string reference
+        \return The value itself for fluent API.
+        \post IsString() == true && GetString() == s && GetStringLength() == s.length
+    */
+    GenericValue& SetString(StringRefType s) { this->~GenericValue(); SetStringRaw(s); return *this; }
+
+    //! Set this value as a string by copying from source string.
+    /*! This version has better performance with supplied length, and also support string containing null character.
+        \param s source string. 
+        \param length The length of source string, excluding the trailing null terminator.
+        \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
+    */
+    GenericValue& SetString(const Ch* s, SizeType length, Allocator& allocator) { return SetString(StringRef(s, length), allocator); }
+
+    //! Set this value as a string by copying from source string.
+    /*! \param s source string. 
+        \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \post IsString() == true && GetString() != s && strcmp(GetString(),s) == 0 && GetStringLength() == length
+    */
+    GenericValue& SetString(const Ch* s, Allocator& allocator) { return SetString(StringRef(s), allocator); }
+
+    //! Set this value as a string by copying from source string.
+    /*! \param s source string reference
+        \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \post IsString() == true && GetString() != s.s && strcmp(GetString(),s) == 0 && GetStringLength() == length
+    */
+    GenericValue& SetString(StringRefType s, Allocator& allocator) { this->~GenericValue(); SetStringRaw(s, allocator); return *this; }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Set this value as a string by copying from source string.
+    /*! \param s source string.
+        \param allocator Allocator for allocating copied buffer. Commonly use GenericDocument::GetAllocator().
+        \return The value itself for fluent API.
+        \post IsString() == true && GetString() != s.data() && strcmp(GetString(),s.data() == 0 && GetStringLength() == s.size()
+        \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+    */
+    GenericValue& SetString(const std::basic_string<Ch>& s, Allocator& allocator) { return SetString(StringRef(s), allocator); }
+#endif
+
+    //@}
+
+    //!@name Array
+    //@{
+
+    //! Templated version for checking whether this value is type T.
+    /*!
+        \tparam T Either \c bool, \c int, \c unsigned, \c int64_t, \c uint64_t, \c double, \c float, \c const \c char*, \c std::basic_string<Ch>
+    */
+    template <typename T>
+    bool Is() const { return internal::TypeHelper<ValueType, T>::Is(*this); }
+
+    template <typename T>
+    T Get() const { return internal::TypeHelper<ValueType, T>::Get(*this); }
+
+    template <typename T>
+    T Get() { return internal::TypeHelper<ValueType, T>::Get(*this); }
+
+    template<typename T>
+    ValueType& Set(const T& data, AllocatorType& allocator) { return internal::TypeHelper<ValueType, T>::Set(*this, data, allocator); }
+
+    //@}
+
+    //! Generate events of this value to a Handler.
+    /*! This function adopts the GoF visitor pattern.
+        Typical usage is to output this JSON value as JSON text via Writer, which is a Handler.
+        It can also be used to deep clone this value via GenericDocument, which is also a Handler.
+        \tparam Handler type of handler.
+        \param handler An object implementing concept Handler.
+    */
+    template <typename Handler>
+    bool Accept(Handler& handler) const {
+        switch(GetType()) {
+        case kNullType:     return handler.Null();
+        case kFalseType:    return handler.Bool(false);
+        case kTrueType:     return handler.Bool(true);
+
+        case kObjectType:
+            if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
+                return false;
+            for (ConstMemberIterator m = MemberBegin(); m != MemberEnd(); ++m) {
+                RAPIDJSON_ASSERT(m->name.IsString()); // User may change the type of name by MemberIterator.
+                if (RAPIDJSON_UNLIKELY(!handler.Key(m->name.GetString(), m->name.GetStringLength(), (m->name.data_.f.flags & kCopyFlag) != 0)))
+                    return false;
+                if (RAPIDJSON_UNLIKELY(!m->value.Accept(handler)))
+                    return false;
+            }
+            return handler.EndObject(data_.o.size);
+
+        case kArrayType:
+            if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
+                return false;
+            for (const GenericValue* v = Begin(); v != End(); ++v)
+                if (RAPIDJSON_UNLIKELY(!v->Accept(handler)))
+                    return false;
+            return handler.EndArray(data_.a.size);
+    
+        case kStringType:
+            return handler.String(GetString(), GetStringLength(), (data_.f.flags & kCopyFlag) != 0);
+    
+        default:
+            RAPIDJSON_ASSERT(GetType() == kNumberType);
+            if (IsDouble())         return handler.Double(data_.n.d);
+            else if (IsInt())       return handler.Int(data_.n.i.i);
+            else if (IsUint())      return handler.Uint(data_.n.u.u);
+            else if (IsInt64())     return handler.Int64(data_.n.i64);
+            else                    return handler.Uint64(data_.n.u64);
+        }
+    }
+
+private:
+    template <typename, typename> friend class GenericValue;
+    template <typename, typename, typename> friend class GenericDocument;
+
+    enum {
+        kBoolFlag       = 0x0008,
+        kNumberFlag     = 0x0010,
+        kIntFlag        = 0x0020,
+        kUintFlag       = 0x0040,
+        kInt64Flag      = 0x0080,
+        kUint64Flag     = 0x0100,
+        kDoubleFlag     = 0x0200,
+        kStringFlag     = 0x0400,
+        kCopyFlag       = 0x0800,
+        kInlineStrFlag  = 0x1000,
+
+        // Initial flags of different types.
+        kNullFlag = kNullType,
+        kTrueFlag = kTrueType | kBoolFlag,
+        kFalseFlag = kFalseType | kBoolFlag,
+        kNumberIntFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag,
+        kNumberUintFlag = kNumberType | kNumberFlag | kUintFlag | kUint64Flag | kInt64Flag,
+        kNumberInt64Flag = kNumberType | kNumberFlag | kInt64Flag,
+        kNumberUint64Flag = kNumberType | kNumberFlag | kUint64Flag,
+        kNumberDoubleFlag = kNumberType | kNumberFlag | kDoubleFlag,
+        kNumberAnyFlag = kNumberType | kNumberFlag | kIntFlag | kInt64Flag | kUintFlag | kUint64Flag | kDoubleFlag,
+        kConstStringFlag = kStringType | kStringFlag,
+        kCopyStringFlag = kStringType | kStringFlag | kCopyFlag,
+        kShortStringFlag = kStringType | kStringFlag | kCopyFlag | kInlineStrFlag,
+        kObjectFlag = kObjectType,
+        kArrayFlag = kArrayType,
+
+        kTypeMask = 0x07
+    };
+
+    static const SizeType kDefaultArrayCapacity = 16;
+    static const SizeType kDefaultObjectCapacity = 16;
+
+    struct Flag {
+#if RAPIDJSON_48BITPOINTER_OPTIMIZATION
+        char payload[sizeof(SizeType) * 2 + 6];     // 2 x SizeType + lower 48-bit pointer
+#elif RAPIDJSON_64BIT
+        char payload[sizeof(SizeType) * 2 + sizeof(void*) + 6]; // 6 padding bytes
+#else
+        char payload[sizeof(SizeType) * 2 + sizeof(void*) + 2]; // 2 padding bytes
+#endif
+        uint16_t flags;
+    };
+
+    struct String {
+        SizeType length;
+        SizeType hashcode;  //!< reserved
+        const Ch* str;
+    };  // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+
+    // implementation detail: ShortString can represent zero-terminated strings up to MaxSize chars
+    // (excluding the terminating zero) and store a value to determine the length of the contained
+    // string in the last character str[LenPos] by storing "MaxSize - length" there. If the string
+    // to store has the maximal length of MaxSize then str[LenPos] will be 0 and therefore act as
+    // the string terminator as well. For getting the string length back from that value just use
+    // "MaxSize - str[LenPos]".
+    // This allows to store 13-chars strings in 32-bit mode, 21-chars strings in 64-bit mode,
+    // 13-chars strings for RAPIDJSON_48BITPOINTER_OPTIMIZATION=1 inline (for `UTF8`-encoded strings).
+    struct ShortString {
+        enum { MaxChars = sizeof(static_cast<Flag*>(0)->payload) / sizeof(Ch), MaxSize = MaxChars - 1, LenPos = MaxSize };
+        Ch str[MaxChars];
+
+        inline static bool Usable(SizeType len) { return                       (MaxSize >= len); }
+        inline void     SetLength(SizeType len) { str[LenPos] = static_cast<Ch>(MaxSize -  len); }
+        inline SizeType GetLength() const       { return  static_cast<SizeType>(MaxSize -  str[LenPos]); }
+    };  // at most as many bytes as "String" above => 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+
+    // By using proper binary layout, retrieval of different integer types do not need conversions.
+    union Number {
+#if RAPIDJSON_ENDIAN == RAPIDJSON_LITTLEENDIAN
+        struct I {
+            int i;
+            char padding[4];
+        }i;
+        struct U {
+            unsigned u;
+            char padding2[4];
+        }u;
+#else
+        struct I {
+            char padding[4];
+            int i;
+        }i;
+        struct U {
+            char padding2[4];
+            unsigned u;
+        }u;
+#endif
+        int64_t i64;
+        uint64_t u64;
+        double d;
+    };  // 8 bytes
+
+    struct ObjectData {
+        SizeType size;
+        SizeType capacity;
+        Member* members;
+    };  // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+
+    struct ArrayData {
+        SizeType size;
+        SizeType capacity;
+        GenericValue* elements;
+    };  // 12 bytes in 32-bit mode, 16 bytes in 64-bit mode
+
+    union Data {
+        String s;
+        ShortString ss;
+        Number n;
+        ObjectData o;
+        ArrayData a;
+        Flag f;
+    };  // 16 bytes in 32-bit mode, 24 bytes in 64-bit mode, 16 bytes in 64-bit with RAPIDJSON_48BITPOINTER_OPTIMIZATION
+
+    RAPIDJSON_FORCEINLINE const Ch* GetStringPointer() const { return RAPIDJSON_GETPOINTER(Ch, data_.s.str); }
+    RAPIDJSON_FORCEINLINE const Ch* SetStringPointer(const Ch* str) { return RAPIDJSON_SETPOINTER(Ch, data_.s.str, str); }
+    RAPIDJSON_FORCEINLINE GenericValue* GetElementsPointer() const { return RAPIDJSON_GETPOINTER(GenericValue, data_.a.elements); }
+    RAPIDJSON_FORCEINLINE GenericValue* SetElementsPointer(GenericValue* elements) { return RAPIDJSON_SETPOINTER(GenericValue, data_.a.elements, elements); }
+    RAPIDJSON_FORCEINLINE Member* GetMembersPointer() const { return RAPIDJSON_GETPOINTER(Member, data_.o.members); }
+    RAPIDJSON_FORCEINLINE Member* SetMembersPointer(Member* members) { return RAPIDJSON_SETPOINTER(Member, data_.o.members, members); }
+
+    // Initialize this value as array with initial data, without calling destructor.
+    void SetArrayRaw(GenericValue* values, SizeType count, Allocator& allocator) {
+        data_.f.flags = kArrayFlag;
+        if (count) {
+            GenericValue* e = static_cast<GenericValue*>(allocator.Malloc(count * sizeof(GenericValue)));
+            SetElementsPointer(e);
+            std::memcpy(static_cast<void*>(e), values, count * sizeof(GenericValue));
+        }
+        else
+            SetElementsPointer(0);
+        data_.a.size = data_.a.capacity = count;
+    }
+
+    //! Initialize this value as object with initial data, without calling destructor.
+    void SetObjectRaw(Member* members, SizeType count, Allocator& allocator) {
+        data_.f.flags = kObjectFlag;
+        if (count) {
+            Member* m = static_cast<Member*>(allocator.Malloc(count * sizeof(Member)));
+            SetMembersPointer(m);
+            std::memcpy(static_cast<void*>(m), members, count * sizeof(Member));
+        }
+        else
+            SetMembersPointer(0);
+        data_.o.size = data_.o.capacity = count;
+    }
+
+    //! Initialize this value as constant string, without calling destructor.
+    void SetStringRaw(StringRefType s) RAPIDJSON_NOEXCEPT {
+        data_.f.flags = kConstStringFlag;
+        SetStringPointer(s);
+        data_.s.length = s.length;
+    }
+
+    //! Initialize this value as copy string with initial data, without calling destructor.
+    void SetStringRaw(StringRefType s, Allocator& allocator) {
+        Ch* str = 0;
+        if (ShortString::Usable(s.length)) {
+            data_.f.flags = kShortStringFlag;
+            data_.ss.SetLength(s.length);
+            str = data_.ss.str;
+        } else {
+            data_.f.flags = kCopyStringFlag;
+            data_.s.length = s.length;
+            str = static_cast<Ch *>(allocator.Malloc((s.length + 1) * sizeof(Ch)));
+            SetStringPointer(str);
+        }
+        std::memcpy(str, s, s.length * sizeof(Ch));
+        str[s.length] = '\0';
+    }
+
+    //! Assignment without calling destructor
+    void RawAssign(GenericValue& rhs) RAPIDJSON_NOEXCEPT {
+        data_ = rhs.data_;
+        // data_.f.flags = rhs.data_.f.flags;
+        rhs.data_.f.flags = kNullFlag;
+    }
+
+    template <typename SourceAllocator>
+    bool StringEqual(const GenericValue<Encoding, SourceAllocator>& rhs) const {
+        RAPIDJSON_ASSERT(IsString());
+        RAPIDJSON_ASSERT(rhs.IsString());
+
+        const SizeType len1 = GetStringLength();
+        const SizeType len2 = rhs.GetStringLength();
+        if(len1 != len2) { return false; }
+
+        const Ch* const str1 = GetString();
+        const Ch* const str2 = rhs.GetString();
+        if(str1 == str2) { return true; } // fast path for constant string
+
+        return (std::memcmp(str1, str2, sizeof(Ch) * len1) == 0);
+    }
+
+    Data data_;
+};
+
+//! GenericValue with UTF8 encoding
+typedef GenericValue<UTF8<> > Value;
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericDocument 
+
+//! A document for parsing JSON text as DOM.
+/*!
+    \note implements Handler concept
+    \tparam Encoding Encoding for both parsing and string storage.
+    \tparam Allocator Allocator for allocating memory for the DOM
+    \tparam StackAllocator Allocator for allocating memory for stack during parsing.
+    \warning Although GenericDocument inherits from GenericValue, the API does \b not provide any virtual functions, especially no virtual destructor.  To avoid memory leaks, do not \c delete a GenericDocument object via a pointer to a GenericValue.
+*/
+template <typename Encoding, typename Allocator = MemoryPoolAllocator<>, typename StackAllocator = CrtAllocator>
+class GenericDocument : public GenericValue<Encoding, Allocator> {
+public:
+    typedef typename Encoding::Ch Ch;                       //!< Character type derived from Encoding.
+    typedef GenericValue<Encoding, Allocator> ValueType;    //!< Value type of the document.
+    typedef Allocator AllocatorType;                        //!< Allocator type from template parameter.
+
+    //! Constructor
+    /*! Creates an empty document of specified type.
+        \param type             Mandatory type of object to create.
+        \param allocator        Optional allocator for allocating memory.
+        \param stackCapacity    Optional initial capacity of stack in bytes.
+        \param stackAllocator   Optional allocator for allocating memory for stack.
+    */
+    explicit GenericDocument(Type type, Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) :
+        GenericValue<Encoding, Allocator>(type),  allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_()
+    {
+        if (!allocator_)
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+    }
+
+    //! Constructor
+    /*! Creates an empty document which type is Null. 
+        \param allocator        Optional allocator for allocating memory.
+        \param stackCapacity    Optional initial capacity of stack in bytes.
+        \param stackAllocator   Optional allocator for allocating memory for stack.
+    */
+    GenericDocument(Allocator* allocator = 0, size_t stackCapacity = kDefaultStackCapacity, StackAllocator* stackAllocator = 0) : 
+        allocator_(allocator), ownAllocator_(0), stack_(stackAllocator, stackCapacity), parseResult_()
+    {
+        if (!allocator_)
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+    }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    //! Move constructor in C++11
+    GenericDocument(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT
+        : ValueType(std::forward<ValueType>(rhs)), // explicit cast to avoid prohibited move from Document
+          allocator_(rhs.allocator_),
+          ownAllocator_(rhs.ownAllocator_),
+          stack_(std::move(rhs.stack_)),
+          parseResult_(rhs.parseResult_)
+    {
+        rhs.allocator_ = 0;
+        rhs.ownAllocator_ = 0;
+        rhs.parseResult_ = ParseResult();
+    }
+#endif
+
+    ~GenericDocument() {
+        Destroy();
+    }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    //! Move assignment in C++11
+    GenericDocument& operator=(GenericDocument&& rhs) RAPIDJSON_NOEXCEPT
+    {
+        // The cast to ValueType is necessary here, because otherwise it would
+        // attempt to call GenericValue's templated assignment operator.
+        ValueType::operator=(std::forward<ValueType>(rhs));
+
+        // Calling the destructor here would prematurely call stack_'s destructor
+        Destroy();
+
+        allocator_ = rhs.allocator_;
+        ownAllocator_ = rhs.ownAllocator_;
+        stack_ = std::move(rhs.stack_);
+        parseResult_ = rhs.parseResult_;
+
+        rhs.allocator_ = 0;
+        rhs.ownAllocator_ = 0;
+        rhs.parseResult_ = ParseResult();
+
+        return *this;
+    }
+#endif
+
+    //! Exchange the contents of this document with those of another.
+    /*!
+        \param rhs Another document.
+        \note Constant complexity.
+        \see GenericValue::Swap
+    */
+    GenericDocument& Swap(GenericDocument& rhs) RAPIDJSON_NOEXCEPT {
+        ValueType::Swap(rhs);
+        stack_.Swap(rhs.stack_);
+        internal::Swap(allocator_, rhs.allocator_);
+        internal::Swap(ownAllocator_, rhs.ownAllocator_);
+        internal::Swap(parseResult_, rhs.parseResult_);
+        return *this;
+    }
+
+    // Allow Swap with ValueType.
+    // Refer to Effective C++ 3rd Edition/Item 33: Avoid hiding inherited names.
+    using ValueType::Swap;
+
+    //! free-standing swap function helper
+    /*!
+        Helper function to enable support for common swap implementation pattern based on \c std::swap:
+        \code
+        void swap(MyClass& a, MyClass& b) {
+            using std::swap;
+            swap(a.doc, b.doc);
+            // ...
+        }
+        \endcode
+        \see Swap()
+     */
+    friend inline void swap(GenericDocument& a, GenericDocument& b) RAPIDJSON_NOEXCEPT { a.Swap(b); }
+
+    //! Populate this document by a generator which produces SAX events.
+    /*! \tparam Generator A functor with <tt>bool f(Handler)</tt> prototype.
+        \param g Generator functor which sends SAX events to the parameter.
+        \return The document itself for fluent API.
+    */
+    template <typename Generator>
+    GenericDocument& Populate(Generator& g) {
+        ClearStackOnExit scope(*this);
+        if (g(*this)) {
+            RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object
+            ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document
+        }
+        return *this;
+    }
+
+    //!@name Parse from stream
+    //!@{
+
+    //! Parse JSON text from an input stream (with Encoding conversion)
+    /*! \tparam parseFlags Combination of \ref ParseFlag.
+        \tparam SourceEncoding Encoding of input stream
+        \tparam InputStream Type of input stream, implementing Stream concept
+        \param is Input stream to be parsed.
+        \return The document itself for fluent API.
+    */
+    template <unsigned parseFlags, typename SourceEncoding, typename InputStream>
+    GenericDocument& ParseStream(InputStream& is) {
+        GenericReader<SourceEncoding, Encoding, StackAllocator> reader(
+            stack_.HasAllocator() ? &stack_.GetAllocator() : 0);
+        ClearStackOnExit scope(*this);
+        parseResult_ = reader.template Parse<parseFlags>(is, *this);
+        if (parseResult_) {
+            RAPIDJSON_ASSERT(stack_.GetSize() == sizeof(ValueType)); // Got one and only one root object
+            ValueType::operator=(*stack_.template Pop<ValueType>(1));// Move value from stack to document
+        }
+        return *this;
+    }
+
+    //! Parse JSON text from an input stream
+    /*! \tparam parseFlags Combination of \ref ParseFlag.
+        \tparam InputStream Type of input stream, implementing Stream concept
+        \param is Input stream to be parsed.
+        \return The document itself for fluent API.
+    */
+    template <unsigned parseFlags, typename InputStream>
+    GenericDocument& ParseStream(InputStream& is) {
+        return ParseStream<parseFlags, Encoding, InputStream>(is);
+    }
+
+    //! Parse JSON text from an input stream (with \ref kParseDefaultFlags)
+    /*! \tparam InputStream Type of input stream, implementing Stream concept
+        \param is Input stream to be parsed.
+        \return The document itself for fluent API.
+    */
+    template <typename InputStream>
+    GenericDocument& ParseStream(InputStream& is) {
+        return ParseStream<kParseDefaultFlags, Encoding, InputStream>(is);
+    }
+    //!@}
+
+    //!@name Parse in-place from mutable string
+    //!@{
+
+    //! Parse JSON text from a mutable string
+    /*! \tparam parseFlags Combination of \ref ParseFlag.
+        \param str Mutable zero-terminated string to be parsed.
+        \return The document itself for fluent API.
+    */
+    template <unsigned parseFlags>
+    GenericDocument& ParseInsitu(Ch* str) {
+        GenericInsituStringStream<Encoding> s(str);
+        return ParseStream<parseFlags | kParseInsituFlag>(s);
+    }
+
+    //! Parse JSON text from a mutable string (with \ref kParseDefaultFlags)
+    /*! \param str Mutable zero-terminated string to be parsed.
+        \return The document itself for fluent API.
+    */
+    GenericDocument& ParseInsitu(Ch* str) {
+        return ParseInsitu<kParseDefaultFlags>(str);
+    }
+    //!@}
+
+    //!@name Parse from read-only string
+    //!@{
+
+    //! Parse JSON text from a read-only string (with Encoding conversion)
+    /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
+        \tparam SourceEncoding Transcoding from input Encoding
+        \param str Read-only zero-terminated string to be parsed.
+    */
+    template <unsigned parseFlags, typename SourceEncoding>
+    GenericDocument& Parse(const typename SourceEncoding::Ch* str) {
+        RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
+        GenericStringStream<SourceEncoding> s(str);
+        return ParseStream<parseFlags, SourceEncoding>(s);
+    }
+
+    //! Parse JSON text from a read-only string
+    /*! \tparam parseFlags Combination of \ref ParseFlag (must not contain \ref kParseInsituFlag).
+        \param str Read-only zero-terminated string to be parsed.
+    */
+    template <unsigned parseFlags>
+    GenericDocument& Parse(const Ch* str) {
+        return Parse<parseFlags, Encoding>(str);
+    }
+
+    //! Parse JSON text from a read-only string (with \ref kParseDefaultFlags)
+    /*! \param str Read-only zero-terminated string to be parsed.
+    */
+    GenericDocument& Parse(const Ch* str) {
+        return Parse<kParseDefaultFlags>(str);
+    }
+
+    template <unsigned parseFlags, typename SourceEncoding>
+    GenericDocument& Parse(const typename SourceEncoding::Ch* str, size_t length) {
+        RAPIDJSON_ASSERT(!(parseFlags & kParseInsituFlag));
+        MemoryStream ms(reinterpret_cast<const char*>(str), length * sizeof(typename SourceEncoding::Ch));
+        EncodedInputStream<SourceEncoding, MemoryStream> is(ms);
+        ParseStream<parseFlags, SourceEncoding>(is);
+        return *this;
+    }
+
+    template <unsigned parseFlags>
+    GenericDocument& Parse(const Ch* str, size_t length) {
+        return Parse<parseFlags, Encoding>(str, length);
+    }
+    
+    GenericDocument& Parse(const Ch* str, size_t length) {
+        return Parse<kParseDefaultFlags>(str, length);
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    template <unsigned parseFlags, typename SourceEncoding>
+    GenericDocument& Parse(const std::basic_string<typename SourceEncoding::Ch>& str) {
+        // c_str() is constant complexity according to standard. Should be faster than Parse(const char*, size_t)
+        return Parse<parseFlags, SourceEncoding>(str.c_str());
+    }
+
+    template <unsigned parseFlags>
+    GenericDocument& Parse(const std::basic_string<Ch>& str) {
+        return Parse<parseFlags, Encoding>(str.c_str());
+    }
+
+    GenericDocument& Parse(const std::basic_string<Ch>& str) {
+        return Parse<kParseDefaultFlags>(str);
+    }
+#endif // RAPIDJSON_HAS_STDSTRING    
+
+    //!@}
+
+    //!@name Handling parse errors
+    //!@{
+
+    //! Whether a parse error has occurred in the last parsing.
+    bool HasParseError() const { return parseResult_.IsError(); }
+
+    //! Get the \ref ParseErrorCode of last parsing.
+    ParseErrorCode GetParseError() const { return parseResult_.Code(); }
+
+    //! Get the position of last parsing error in input, 0 otherwise.
+    size_t GetErrorOffset() const { return parseResult_.Offset(); }
+
+    //! Implicit conversion to get the last parse result
+#ifndef __clang // -Wdocumentation
+    /*! \return \ref ParseResult of the last parse operation
+
+        \code
+          Document doc;
+          ParseResult ok = doc.Parse(json);
+          if (!ok)
+            printf( "JSON parse error: %s (%u)\n", GetParseError_En(ok.Code()), ok.Offset());
+        \endcode
+     */
+#endif
+    operator ParseResult() const { return parseResult_; }
+    //!@}
+
+    //! Get the allocator of this document.
+    Allocator& GetAllocator() {
+        RAPIDJSON_ASSERT(allocator_);
+        return *allocator_;
+    }
+
+    //! Get the capacity of stack in bytes.
+    size_t GetStackCapacity() const { return stack_.GetCapacity(); }
+
+private:
+    // clear stack on any exit from ParseStream, e.g. due to exception
+    struct ClearStackOnExit {
+        explicit ClearStackOnExit(GenericDocument& d) : d_(d) {}
+        ~ClearStackOnExit() { d_.ClearStack(); }
+    private:
+        ClearStackOnExit(const ClearStackOnExit&);
+        ClearStackOnExit& operator=(const ClearStackOnExit&);
+        GenericDocument& d_;
+    };
+
+    // callers of the following private Handler functions
+    // template <typename,typename,typename> friend class GenericReader; // for parsing
+    template <typename, typename> friend class GenericValue; // for deep copying
+
+public:
+    // Implementation of Handler
+    bool Null() { new (stack_.template Push<ValueType>()) ValueType(); return true; }
+    bool Bool(bool b) { new (stack_.template Push<ValueType>()) ValueType(b); return true; }
+    bool Int(int i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
+    bool Uint(unsigned i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
+    bool Int64(int64_t i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
+    bool Uint64(uint64_t i) { new (stack_.template Push<ValueType>()) ValueType(i); return true; }
+    bool Double(double d) { new (stack_.template Push<ValueType>()) ValueType(d); return true; }
+
+    bool RawNumber(const Ch* str, SizeType length, bool copy) { 
+        if (copy) 
+            new (stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator());
+        else
+            new (stack_.template Push<ValueType>()) ValueType(str, length);
+        return true;
+    }
+
+    bool String(const Ch* str, SizeType length, bool copy) { 
+        if (copy) 
+            new (stack_.template Push<ValueType>()) ValueType(str, length, GetAllocator());
+        else
+            new (stack_.template Push<ValueType>()) ValueType(str, length);
+        return true;
+    }
+
+    bool StartObject() { new (stack_.template Push<ValueType>()) ValueType(kObjectType); return true; }
+    
+    bool Key(const Ch* str, SizeType length, bool copy) { return String(str, length, copy); }
+
+    bool EndObject(SizeType memberCount) {
+        typename ValueType::Member* members = stack_.template Pop<typename ValueType::Member>(memberCount);
+        stack_.template Top<ValueType>()->SetObjectRaw(members, memberCount, GetAllocator());
+        return true;
+    }
+
+    bool StartArray() { new (stack_.template Push<ValueType>()) ValueType(kArrayType); return true; }
+    
+    bool EndArray(SizeType elementCount) {
+        ValueType* elements = stack_.template Pop<ValueType>(elementCount);
+        stack_.template Top<ValueType>()->SetArrayRaw(elements, elementCount, GetAllocator());
+        return true;
+    }
+
+private:
+    //! Prohibit copying
+    GenericDocument(const GenericDocument&);
+    //! Prohibit assignment
+    GenericDocument& operator=(const GenericDocument&);
+
+    void ClearStack() {
+        if (Allocator::kNeedFree)
+            while (stack_.GetSize() > 0)    // Here assumes all elements in stack array are GenericValue (Member is actually 2 GenericValue objects)
+                (stack_.template Pop<ValueType>(1))->~ValueType();
+        else
+            stack_.Clear();
+        stack_.ShrinkToFit();
+    }
+
+    void Destroy() {
+        RAPIDJSON_DELETE(ownAllocator_);
+    }
+
+    static const size_t kDefaultStackCapacity = 1024;
+    Allocator* allocator_;
+    Allocator* ownAllocator_;
+    internal::Stack<StackAllocator> stack_;
+    ParseResult parseResult_;
+};
+
+//! GenericDocument with UTF8 encoding
+typedef GenericDocument<UTF8<> > Document;
+
+//! Helper class for accessing Value of array type.
+/*!
+    Instance of this helper class is obtained by \c GenericValue::GetArray().
+    In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1.
+*/
+template <bool Const, typename ValueT>
+class GenericArray {
+public:
+    typedef GenericArray<true, ValueT> ConstArray;
+    typedef GenericArray<false, ValueT> Array;
+    typedef ValueT PlainType;
+    typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
+    typedef ValueType* ValueIterator;  // This may be const or non-const iterator
+    typedef const ValueT* ConstValueIterator;
+    typedef typename ValueType::AllocatorType AllocatorType;
+    typedef typename ValueType::StringRefType StringRefType;
+
+    template <typename, typename>
+    friend class GenericValue;
+
+    GenericArray(const GenericArray& rhs) : value_(rhs.value_) {}
+    GenericArray& operator=(const GenericArray& rhs) { value_ = rhs.value_; return *this; }
+    ~GenericArray() {}
+
+    SizeType Size() const { return value_.Size(); }
+    SizeType Capacity() const { return value_.Capacity(); }
+    bool Empty() const { return value_.Empty(); }
+    void Clear() const { value_.Clear(); }
+    ValueType& operator[](SizeType index) const {  return value_[index]; }
+    ValueIterator Begin() const { return value_.Begin(); }
+    ValueIterator End() const { return value_.End(); }
+    GenericArray Reserve(SizeType newCapacity, AllocatorType &allocator) const { value_.Reserve(newCapacity, allocator); return *this; }
+    GenericArray PushBack(ValueType& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericArray PushBack(ValueType&& value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericArray PushBack(StringRefType value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (const GenericArray&)) PushBack(T value, AllocatorType& allocator) const { value_.PushBack(value, allocator); return *this; }
+    GenericArray PopBack() const { value_.PopBack(); return *this; }
+    ValueIterator Erase(ConstValueIterator pos) const { return value_.Erase(pos); }
+    ValueIterator Erase(ConstValueIterator first, ConstValueIterator last) const { return value_.Erase(first, last); }
+
+#if RAPIDJSON_HAS_CXX11_RANGE_FOR
+    ValueIterator begin() const { return value_.Begin(); }
+    ValueIterator end() const { return value_.End(); }
+#endif
+
+private:
+    GenericArray();
+    GenericArray(ValueType& value) : value_(value) {}
+    ValueType& value_;
+};
+
+//! Helper class for accessing Value of object type.
+/*!
+    Instance of this helper class is obtained by \c GenericValue::GetObject().
+    In addition to all APIs for array type, it provides range-based for loop if \c RAPIDJSON_HAS_CXX11_RANGE_FOR=1.
+*/
+template <bool Const, typename ValueT>
+class GenericObject {
+public:
+    typedef GenericObject<true, ValueT> ConstObject;
+    typedef GenericObject<false, ValueT> Object;
+    typedef ValueT PlainType;
+    typedef typename internal::MaybeAddConst<Const,PlainType>::Type ValueType;
+    typedef GenericMemberIterator<Const, typename ValueT::EncodingType, typename ValueT::AllocatorType> MemberIterator;  // This may be const or non-const iterator
+    typedef GenericMemberIterator<true, typename ValueT::EncodingType, typename ValueT::AllocatorType> ConstMemberIterator;
+    typedef typename ValueType::AllocatorType AllocatorType;
+    typedef typename ValueType::StringRefType StringRefType;
+    typedef typename ValueType::EncodingType EncodingType;
+    typedef typename ValueType::Ch Ch;
+
+    template <typename, typename>
+    friend class GenericValue;
+
+    GenericObject(const GenericObject& rhs) : value_(rhs.value_) {}
+    GenericObject& operator=(const GenericObject& rhs) { value_ = rhs.value_; return *this; }
+    ~GenericObject() {}
+
+    SizeType MemberCount() const { return value_.MemberCount(); }
+    SizeType MemberCapacity() const { return value_.MemberCapacity(); }
+    bool ObjectEmpty() const { return value_.ObjectEmpty(); }
+    template <typename T> ValueType& operator[](T* name) const { return value_[name]; }
+    template <typename SourceAllocator> ValueType& operator[](const GenericValue<EncodingType, SourceAllocator>& name) const { return value_[name]; }
+#if RAPIDJSON_HAS_STDSTRING
+    ValueType& operator[](const std::basic_string<Ch>& name) const { return value_[name]; }
+#endif
+    MemberIterator MemberBegin() const { return value_.MemberBegin(); }
+    MemberIterator MemberEnd() const { return value_.MemberEnd(); }
+    GenericObject MemberReserve(SizeType newCapacity, AllocatorType &allocator) const { value_.MemberReserve(newCapacity, allocator); return *this; }
+    bool HasMember(const Ch* name) const { return value_.HasMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+    bool HasMember(const std::basic_string<Ch>& name) const { return value_.HasMember(name); }
+#endif
+    template <typename SourceAllocator> bool HasMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.HasMember(name); }
+    MemberIterator FindMember(const Ch* name) const { return value_.FindMember(name); }
+    template <typename SourceAllocator> MemberIterator FindMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.FindMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+    MemberIterator FindMember(const std::basic_string<Ch>& name) const { return value_.FindMember(name); }
+#endif
+    GenericObject AddMember(ValueType& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(ValueType& name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#if RAPIDJSON_HAS_STDSTRING
+    GenericObject AddMember(ValueType& name, std::basic_string<Ch>& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#endif
+    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&)) AddMember(ValueType& name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericObject AddMember(ValueType&& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(ValueType&& name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(ValueType& name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(StringRefType name, ValueType&& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericObject AddMember(StringRefType name, ValueType& value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    GenericObject AddMember(StringRefType name, StringRefType value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    template <typename T> RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (GenericObject)) AddMember(StringRefType name, T value, AllocatorType& allocator) const { value_.AddMember(name, value, allocator); return *this; }
+    void RemoveAllMembers() { value_.RemoveAllMembers(); }
+    bool RemoveMember(const Ch* name) const { return value_.RemoveMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+    bool RemoveMember(const std::basic_string<Ch>& name) const { return value_.RemoveMember(name); }
+#endif
+    template <typename SourceAllocator> bool RemoveMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.RemoveMember(name); }
+    MemberIterator RemoveMember(MemberIterator m) const { return value_.RemoveMember(m); }
+    MemberIterator EraseMember(ConstMemberIterator pos) const { return value_.EraseMember(pos); }
+    MemberIterator EraseMember(ConstMemberIterator first, ConstMemberIterator last) const { return value_.EraseMember(first, last); }
+    bool EraseMember(const Ch* name) const { return value_.EraseMember(name); }
+#if RAPIDJSON_HAS_STDSTRING
+    bool EraseMember(const std::basic_string<Ch>& name) const { return EraseMember(ValueType(StringRef(name))); }
+#endif
+    template <typename SourceAllocator> bool EraseMember(const GenericValue<EncodingType, SourceAllocator>& name) const { return value_.EraseMember(name); }
+
+#if RAPIDJSON_HAS_CXX11_RANGE_FOR
+    MemberIterator begin() const { return value_.MemberBegin(); }
+    MemberIterator end() const { return value_.MemberEnd(); }
+#endif
+
+private:
+    GenericObject();
+    GenericObject(ValueType& value) : value_(value) {}
+    ValueType& value_;
+};
+
+RAPIDJSON_NAMESPACE_END
+RAPIDJSON_DIAG_POP
+
+#endif // RAPIDJSON_DOCUMENT_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/encodedstream.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/encodedstream.h
new file mode 100644
index 000000000..223601c05
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/encodedstream.h
@@ -0,0 +1,299 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_ENCODEDSTREAM_H_
+#define RAPIDJSON_ENCODEDSTREAM_H_
+
+#include "stream.h"
+#include "memorystream.h"
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Input byte stream wrapper with a statically bound encoding.
+/*!
+    \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
+    \tparam InputByteStream Type of input byte stream. For example, FileReadStream.
+*/
+template <typename Encoding, typename InputByteStream>
+class EncodedInputStream {
+    RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+public:
+    typedef typename Encoding::Ch Ch;
+
+    EncodedInputStream(InputByteStream& is) : is_(is) { 
+        current_ = Encoding::TakeBOM(is_);
+    }
+
+    Ch Peek() const { return current_; }
+    Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
+    size_t Tell() const { return is_.Tell(); }
+
+    // Not implemented
+    void Put(Ch) { RAPIDJSON_ASSERT(false); }
+    void Flush() { RAPIDJSON_ASSERT(false); } 
+    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+    EncodedInputStream(const EncodedInputStream&);
+    EncodedInputStream& operator=(const EncodedInputStream&);
+
+    InputByteStream& is_;
+    Ch current_;
+};
+
+//! Specialized for UTF8 MemoryStream.
+template <>
+class EncodedInputStream<UTF8<>, MemoryStream> {
+public:
+    typedef UTF8<>::Ch Ch;
+
+    EncodedInputStream(MemoryStream& is) : is_(is) {
+        if (static_cast<unsigned char>(is_.Peek()) == 0xEFu) is_.Take();
+        if (static_cast<unsigned char>(is_.Peek()) == 0xBBu) is_.Take();
+        if (static_cast<unsigned char>(is_.Peek()) == 0xBFu) is_.Take();
+    }
+    Ch Peek() const { return is_.Peek(); }
+    Ch Take() { return is_.Take(); }
+    size_t Tell() const { return is_.Tell(); }
+
+    // Not implemented
+    void Put(Ch) {}
+    void Flush() {} 
+    Ch* PutBegin() { return 0; }
+    size_t PutEnd(Ch*) { return 0; }
+
+    MemoryStream& is_;
+
+private:
+    EncodedInputStream(const EncodedInputStream&);
+    EncodedInputStream& operator=(const EncodedInputStream&);
+};
+
+//! Output byte stream wrapper with statically bound encoding.
+/*!
+    \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
+    \tparam OutputByteStream Type of input byte stream. For example, FileWriteStream.
+*/
+template <typename Encoding, typename OutputByteStream>
+class EncodedOutputStream {
+    RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+public:
+    typedef typename Encoding::Ch Ch;
+
+    EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) { 
+        if (putBOM)
+            Encoding::PutBOM(os_);
+    }
+
+    void Put(Ch c) { Encoding::Put(os_, c);  }
+    void Flush() { os_.Flush(); }
+
+    // Not implemented
+    Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
+    Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
+    size_t Tell() const { RAPIDJSON_ASSERT(false);  return 0; }
+    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+    EncodedOutputStream(const EncodedOutputStream&);
+    EncodedOutputStream& operator=(const EncodedOutputStream&);
+
+    OutputByteStream& os_;
+};
+
+#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
+
+//! Input stream wrapper with dynamically bound encoding and automatic encoding detection.
+/*!
+    \tparam CharType Type of character for reading.
+    \tparam InputByteStream type of input byte stream to be wrapped.
+*/
+template <typename CharType, typename InputByteStream>
+class AutoUTFInputStream {
+    RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+public:
+    typedef CharType Ch;
+
+    //! Constructor.
+    /*!
+        \param is input stream to be wrapped.
+        \param type UTF encoding type if it is not detected from the stream.
+    */
+    AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
+        RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);        
+        DetectType();
+        static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
+        takeFunc_ = f[type_];
+        current_ = takeFunc_(*is_);
+    }
+
+    UTFType GetType() const { return type_; }
+    bool HasBOM() const { return hasBOM_; }
+
+    Ch Peek() const { return current_; }
+    Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
+    size_t Tell() const { return is_->Tell(); }
+
+    // Not implemented
+    void Put(Ch) { RAPIDJSON_ASSERT(false); }
+    void Flush() { RAPIDJSON_ASSERT(false); } 
+    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+    AutoUTFInputStream(const AutoUTFInputStream&);
+    AutoUTFInputStream& operator=(const AutoUTFInputStream&);
+
+    // Detect encoding type with BOM or RFC 4627
+    void DetectType() {
+        // BOM (Byte Order Mark):
+        // 00 00 FE FF  UTF-32BE
+        // FF FE 00 00  UTF-32LE
+        // FE FF        UTF-16BE
+        // FF FE        UTF-16LE
+        // EF BB BF     UTF-8
+
+        const unsigned char* c = reinterpret_cast<const unsigned char *>(is_->Peek4());
+        if (!c)
+            return;
+
+        unsigned bom = static_cast<unsigned>(c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
+        hasBOM_ = false;
+        if (bom == 0xFFFE0000)                  { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
+        else if (bom == 0x0000FEFF)             { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
+        else if ((bom & 0xFFFF) == 0xFFFE)      { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take();                           }
+        else if ((bom & 0xFFFF) == 0xFEFF)      { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take();                           }
+        else if ((bom & 0xFFFFFF) == 0xBFBBEF)  { type_ = kUTF8;    hasBOM_ = true; is_->Take(); is_->Take(); is_->Take();              }
+
+        // RFC 4627: Section 3
+        // "Since the first two characters of a JSON text will always be ASCII
+        // characters [RFC0020], it is possible to determine whether an octet
+        // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
+        // at the pattern of nulls in the first four octets."
+        // 00 00 00 xx  UTF-32BE
+        // 00 xx 00 xx  UTF-16BE
+        // xx 00 00 00  UTF-32LE
+        // xx 00 xx 00  UTF-16LE
+        // xx xx xx xx  UTF-8
+
+        if (!hasBOM_) {
+            int pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
+            switch (pattern) {
+            case 0x08: type_ = kUTF32BE; break;
+            case 0x0A: type_ = kUTF16BE; break;
+            case 0x01: type_ = kUTF32LE; break;
+            case 0x05: type_ = kUTF16LE; break;
+            case 0x0F: type_ = kUTF8;    break;
+            default: break; // Use type defined by user.
+            }
+        }
+
+        // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
+        if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
+        if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
+    }
+
+    typedef Ch (*TakeFunc)(InputByteStream& is);
+    InputByteStream* is_;
+    UTFType type_;
+    Ch current_;
+    TakeFunc takeFunc_;
+    bool hasBOM_;
+};
+
+//! Output stream wrapper with dynamically bound encoding and automatic encoding detection.
+/*!
+    \tparam CharType Type of character for writing.
+    \tparam OutputByteStream type of output byte stream to be wrapped.
+*/
+template <typename CharType, typename OutputByteStream>
+class AutoUTFOutputStream {
+    RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+public:
+    typedef CharType Ch;
+
+    //! Constructor.
+    /*!
+        \param os output stream to be wrapped.
+        \param type UTF encoding type.
+        \param putBOM Whether to write BOM at the beginning of the stream.
+    */
+    AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
+        RAPIDJSON_ASSERT(type >= kUTF8 && type <= kUTF32BE);
+
+        // Runtime check whether the size of character type is sufficient. It only perform checks with assertion.
+        if (type_ == kUTF16LE || type_ == kUTF16BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
+        if (type_ == kUTF32LE || type_ == kUTF32BE) RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
+
+        static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
+        putFunc_ = f[type_];
+
+        if (putBOM)
+            PutBOM();
+    }
+
+    UTFType GetType() const { return type_; }
+
+    void Put(Ch c) { putFunc_(*os_, c); }
+    void Flush() { os_->Flush(); } 
+
+    // Not implemented
+    Ch Peek() const { RAPIDJSON_ASSERT(false); return 0;}
+    Ch Take() { RAPIDJSON_ASSERT(false); return 0;}
+    size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
+    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+    AutoUTFOutputStream(const AutoUTFOutputStream&);
+    AutoUTFOutputStream& operator=(const AutoUTFOutputStream&);
+
+    void PutBOM() { 
+        typedef void (*PutBOMFunc)(OutputByteStream&);
+        static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
+        f[type_](*os_);
+    }
+
+    typedef void (*PutFunc)(OutputByteStream&, Ch);
+
+    OutputByteStream* os_;
+    UTFType type_;
+    PutFunc putFunc_;
+};
+
+#undef RAPIDJSON_ENCODINGS_FUNC
+
+RAPIDJSON_NAMESPACE_END
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_FILESTREAM_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/encodings.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/encodings.h
new file mode 100644
index 000000000..0b2446795
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/encodings.h
@@ -0,0 +1,716 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_ENCODINGS_H_
+#define RAPIDJSON_ENCODINGS_H_
+
+#include "rapidjson.h"
+
+#if defined(_MSC_VER) && !defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
+RAPIDJSON_DIAG_OFF(4702)  // unreachable code
+#elif defined(__GNUC__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+RAPIDJSON_DIAG_OFF(overflow)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////
+// Encoding
+
+/*! \class rapidjson::Encoding
+    \brief Concept for encoding of Unicode characters.
+
+\code
+concept Encoding {
+    typename Ch;    //! Type of character. A "character" is actually a code unit in unicode's definition.
+
+    enum { supportUnicode = 1 }; // or 0 if not supporting unicode
+
+    //! \brief Encode a Unicode codepoint to an output stream.
+    //! \param os Output stream.
+    //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
+    template<typename OutputStream>
+    static void Encode(OutputStream& os, unsigned codepoint);
+
+    //! \brief Decode a Unicode codepoint from an input stream.
+    //! \param is Input stream.
+    //! \param codepoint Output of the unicode codepoint.
+    //! \return true if a valid codepoint can be decoded from the stream.
+    template <typename InputStream>
+    static bool Decode(InputStream& is, unsigned* codepoint);
+
+    //! \brief Validate one Unicode codepoint from an encoded stream.
+    //! \param is Input stream to obtain codepoint.
+    //! \param os Output for copying one codepoint.
+    //! \return true if it is valid.
+    //! \note This function just validating and copying the codepoint without actually decode it.
+    template <typename InputStream, typename OutputStream>
+    static bool Validate(InputStream& is, OutputStream& os);
+
+    // The following functions are deal with byte streams.
+
+    //! Take a character from input byte stream, skip BOM if exist.
+    template <typename InputByteStream>
+    static CharType TakeBOM(InputByteStream& is);
+
+    //! Take a character from input byte stream.
+    template <typename InputByteStream>
+    static Ch Take(InputByteStream& is);
+
+    //! Put BOM to output byte stream.
+    template <typename OutputByteStream>
+    static void PutBOM(OutputByteStream& os);
+
+    //! Put a character to output byte stream.
+    template <typename OutputByteStream>
+    static void Put(OutputByteStream& os, Ch c);
+};
+\endcode
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+// UTF8
+
+//! UTF-8 encoding.
+/*! http://en.wikipedia.org/wiki/UTF-8
+    http://tools.ietf.org/html/rfc3629
+    \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char.
+    \note implements Encoding concept
+*/
+template<typename CharType = char>
+struct UTF8 {
+    typedef CharType Ch;
+
+    enum { supportUnicode = 1 };
+
+    template<typename OutputStream>
+    static void Encode(OutputStream& os, unsigned codepoint) {
+        if (codepoint <= 0x7F) 
+            os.Put(static_cast<Ch>(codepoint & 0xFF));
+        else if (codepoint <= 0x7FF) {
+            os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
+            os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
+        }
+        else if (codepoint <= 0xFFFF) {
+            os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
+            os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
+            os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
+        }
+        else {
+            RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+            os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
+            os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
+            os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
+            os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
+        }
+    }
+
+    template<typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+        if (codepoint <= 0x7F) 
+            PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
+        else if (codepoint <= 0x7FF) {
+            PutUnsafe(os, static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
+        }
+        else if (codepoint <= 0xFFFF) {
+            PutUnsafe(os, static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
+        }
+        else {
+            RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+            PutUnsafe(os, static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
+            PutUnsafe(os, static_cast<Ch>(0x80 | (codepoint & 0x3F)));
+        }
+    }
+
+    template <typename InputStream>
+    static bool Decode(InputStream& is, unsigned* codepoint) {
+#define RAPIDJSON_COPY() c = is.Take(); *codepoint = (*codepoint << 6) | (static_cast<unsigned char>(c) & 0x3Fu)
+#define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
+#define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70)
+        typename InputStream::Ch c = is.Take();
+        if (!(c & 0x80)) {
+            *codepoint = static_cast<unsigned char>(c);
+            return true;
+        }
+
+        unsigned char type = GetRange(static_cast<unsigned char>(c));
+        if (type >= 32) {
+            *codepoint = 0;
+        } else {
+            *codepoint = (0xFFu >> type) & static_cast<unsigned char>(c);
+        }
+        bool result = true;
+        switch (type) {
+        case 2: RAPIDJSON_TAIL(); return result;
+        case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
+        case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result;
+        case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
+        case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
+        case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result;
+        case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
+        default: return false;
+        }
+#undef RAPIDJSON_COPY
+#undef RAPIDJSON_TRANS
+#undef RAPIDJSON_TAIL
+    }
+
+    template <typename InputStream, typename OutputStream>
+    static bool Validate(InputStream& is, OutputStream& os) {
+#define RAPIDJSON_COPY() os.Put(c = is.Take())
+#define RAPIDJSON_TRANS(mask) result &= ((GetRange(static_cast<unsigned char>(c)) & mask) != 0)
+#define RAPIDJSON_TAIL() RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x70)
+        Ch c;
+        RAPIDJSON_COPY();
+        if (!(c & 0x80))
+            return true;
+
+        bool result = true;
+        switch (GetRange(static_cast<unsigned char>(c))) {
+        case 2: RAPIDJSON_TAIL(); return result;
+        case 3: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
+        case 4: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x50); RAPIDJSON_TAIL(); return result;
+        case 5: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x10); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
+        case 6: RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
+        case 10: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x20); RAPIDJSON_TAIL(); return result;
+        case 11: RAPIDJSON_COPY(); RAPIDJSON_TRANS(0x60); RAPIDJSON_TAIL(); RAPIDJSON_TAIL(); return result;
+        default: return false;
+        }
+#undef RAPIDJSON_COPY
+#undef RAPIDJSON_TRANS
+#undef RAPIDJSON_TAIL
+    }
+
+    static unsigned char GetRange(unsigned char c) {
+        // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+        // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
+        static const unsigned char type[] = {
+            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+            0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
+            0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
+            0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
+            0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
+            8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+            10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+        };
+        return type[c];
+    }
+
+    template <typename InputByteStream>
+    static CharType TakeBOM(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        typename InputByteStream::Ch c = Take(is);
+        if (static_cast<unsigned char>(c) != 0xEFu) return c;
+        c = is.Take();
+        if (static_cast<unsigned char>(c) != 0xBBu) return c;
+        c = is.Take();
+        if (static_cast<unsigned char>(c) != 0xBFu) return c;
+        c = is.Take();
+        return c;
+    }
+
+    template <typename InputByteStream>
+    static Ch Take(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        return static_cast<Ch>(is.Take());
+    }
+
+    template <typename OutputByteStream>
+    static void PutBOM(OutputByteStream& os) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xEFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xBBu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xBFu));
+    }
+
+    template <typename OutputByteStream>
+    static void Put(OutputByteStream& os, Ch c) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>(c));
+    }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// UTF16
+
+//! UTF-16 encoding.
+/*! http://en.wikipedia.org/wiki/UTF-16
+    http://tools.ietf.org/html/rfc2781
+    \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
+    \note implements Encoding concept
+
+    \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
+    For streaming, use UTF16LE and UTF16BE, which handle endianness.
+*/
+template<typename CharType = wchar_t>
+struct UTF16 {
+    typedef CharType Ch;
+    RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
+
+    enum { supportUnicode = 1 };
+
+    template<typename OutputStream>
+    static void Encode(OutputStream& os, unsigned codepoint) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
+        if (codepoint <= 0xFFFF) {
+            RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
+            os.Put(static_cast<typename OutputStream::Ch>(codepoint));
+        }
+        else {
+            RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+            unsigned v = codepoint - 0x10000;
+            os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
+            os.Put(static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
+        }
+    }
+
+
+    template<typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
+        if (codepoint <= 0xFFFF) {
+            RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
+            PutUnsafe(os, static_cast<typename OutputStream::Ch>(codepoint));
+        }
+        else {
+            RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+            unsigned v = codepoint - 0x10000;
+            PutUnsafe(os, static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
+            PutUnsafe(os, static_cast<typename OutputStream::Ch>((v & 0x3FF) | 0xDC00));
+        }
+    }
+
+    template <typename InputStream>
+    static bool Decode(InputStream& is, unsigned* codepoint) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
+        typename InputStream::Ch c = is.Take();
+        if (c < 0xD800 || c > 0xDFFF) {
+            *codepoint = static_cast<unsigned>(c);
+            return true;
+        }
+        else if (c <= 0xDBFF) {
+            *codepoint = (static_cast<unsigned>(c) & 0x3FF) << 10;
+            c = is.Take();
+            *codepoint |= (static_cast<unsigned>(c) & 0x3FF);
+            *codepoint += 0x10000;
+            return c >= 0xDC00 && c <= 0xDFFF;
+        }
+        return false;
+    }
+
+    template <typename InputStream, typename OutputStream>
+    static bool Validate(InputStream& is, OutputStream& os) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
+        typename InputStream::Ch c;
+        os.Put(static_cast<typename OutputStream::Ch>(c = is.Take()));
+        if (c < 0xD800 || c > 0xDFFF)
+            return true;
+        else if (c <= 0xDBFF) {
+            os.Put(c = is.Take());
+            return c >= 0xDC00 && c <= 0xDFFF;
+        }
+        return false;
+    }
+};
+
+//! UTF-16 little endian encoding.
+template<typename CharType = wchar_t>
+struct UTF16LE : UTF16<CharType> {
+    template <typename InputByteStream>
+    static CharType TakeBOM(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        CharType c = Take(is);
+        return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
+    }
+
+    template <typename InputByteStream>
+    static CharType Take(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        unsigned c = static_cast<uint8_t>(is.Take());
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
+        return static_cast<CharType>(c);
+    }
+
+    template <typename OutputByteStream>
+    static void PutBOM(OutputByteStream& os) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
+    }
+
+    template <typename OutputByteStream>
+    static void Put(OutputByteStream& os, CharType c) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
+    }
+};
+
+//! UTF-16 big endian encoding.
+template<typename CharType = wchar_t>
+struct UTF16BE : UTF16<CharType> {
+    template <typename InputByteStream>
+    static CharType TakeBOM(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        CharType c = Take(is);
+        return static_cast<uint16_t>(c) == 0xFEFFu ? Take(is) : c;
+    }
+
+    template <typename InputByteStream>
+    static CharType Take(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
+        return static_cast<CharType>(c);
+    }
+
+    template <typename OutputByteStream>
+    static void PutBOM(OutputByteStream& os) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
+    }
+
+    template <typename OutputByteStream>
+    static void Put(OutputByteStream& os, CharType c) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>((static_cast<unsigned>(c) >> 8) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(static_cast<unsigned>(c) & 0xFFu));
+    }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// UTF32
+
+//! UTF-32 encoding. 
+/*! http://en.wikipedia.org/wiki/UTF-32
+    \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
+    \note implements Encoding concept
+
+    \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
+    For streaming, use UTF32LE and UTF32BE, which handle endianness.
+*/
+template<typename CharType = unsigned>
+struct UTF32 {
+    typedef CharType Ch;
+    RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
+
+    enum { supportUnicode = 1 };
+
+    template<typename OutputStream>
+    static void Encode(OutputStream& os, unsigned codepoint) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
+        RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+        os.Put(codepoint);
+    }
+
+    template<typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
+        RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+        PutUnsafe(os, codepoint);
+    }
+
+    template <typename InputStream>
+    static bool Decode(InputStream& is, unsigned* codepoint) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
+        Ch c = is.Take();
+        *codepoint = c;
+        return c <= 0x10FFFF;
+    }
+
+    template <typename InputStream, typename OutputStream>
+    static bool Validate(InputStream& is, OutputStream& os) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
+        Ch c;
+        os.Put(c = is.Take());
+        return c <= 0x10FFFF;
+    }
+};
+
+//! UTF-32 little endian enocoding.
+template<typename CharType = unsigned>
+struct UTF32LE : UTF32<CharType> {
+    template <typename InputByteStream>
+    static CharType TakeBOM(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        CharType c = Take(is);
+        return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c;
+    }
+
+    template <typename InputByteStream>
+    static CharType Take(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        unsigned c = static_cast<uint8_t>(is.Take());
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
+        return static_cast<CharType>(c);
+    }
+
+    template <typename OutputByteStream>
+    static void PutBOM(OutputByteStream& os) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
+    }
+
+    template <typename OutputByteStream>
+    static void Put(OutputByteStream& os, CharType c) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
+    }
+};
+
+//! UTF-32 big endian encoding.
+template<typename CharType = unsigned>
+struct UTF32BE : UTF32<CharType> {
+    template <typename InputByteStream>
+    static CharType TakeBOM(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        CharType c = Take(is);
+        return static_cast<uint32_t>(c) == 0x0000FEFFu ? Take(is) : c; 
+    }
+
+    template <typename InputByteStream>
+    static CharType Take(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        unsigned c = static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 24;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 16;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take())) << 8;
+        c |= static_cast<unsigned>(static_cast<uint8_t>(is.Take()));
+        return static_cast<CharType>(c);
+    }
+
+    template <typename OutputByteStream>
+    static void PutBOM(OutputByteStream& os) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0x00u));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFEu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(0xFFu));
+    }
+
+    template <typename OutputByteStream>
+    static void Put(OutputByteStream& os, CharType c) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 24) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 16) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>((c >> 8) & 0xFFu));
+        os.Put(static_cast<typename OutputByteStream::Ch>(c & 0xFFu));
+    }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// ASCII
+
+//! ASCII encoding.
+/*! http://en.wikipedia.org/wiki/ASCII
+    \tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
+    \note implements Encoding concept
+*/
+template<typename CharType = char>
+struct ASCII {
+    typedef CharType Ch;
+
+    enum { supportUnicode = 0 };
+
+    template<typename OutputStream>
+    static void Encode(OutputStream& os, unsigned codepoint) {
+        RAPIDJSON_ASSERT(codepoint <= 0x7F);
+        os.Put(static_cast<Ch>(codepoint & 0xFF));
+    }
+
+    template<typename OutputStream>
+    static void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+        RAPIDJSON_ASSERT(codepoint <= 0x7F);
+        PutUnsafe(os, static_cast<Ch>(codepoint & 0xFF));
+    }
+
+    template <typename InputStream>
+    static bool Decode(InputStream& is, unsigned* codepoint) {
+        uint8_t c = static_cast<uint8_t>(is.Take());
+        *codepoint = c;
+        return c <= 0X7F;
+    }
+
+    template <typename InputStream, typename OutputStream>
+    static bool Validate(InputStream& is, OutputStream& os) {
+        uint8_t c = static_cast<uint8_t>(is.Take());
+        os.Put(static_cast<typename OutputStream::Ch>(c));
+        return c <= 0x7F;
+    }
+
+    template <typename InputByteStream>
+    static CharType TakeBOM(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        uint8_t c = static_cast<uint8_t>(Take(is));
+        return static_cast<Ch>(c);
+    }
+
+    template <typename InputByteStream>
+    static Ch Take(InputByteStream& is) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
+        return static_cast<Ch>(is.Take());
+    }
+
+    template <typename OutputByteStream>
+    static void PutBOM(OutputByteStream& os) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        (void)os;
+    }
+
+    template <typename OutputByteStream>
+    static void Put(OutputByteStream& os, Ch c) {
+        RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
+        os.Put(static_cast<typename OutputByteStream::Ch>(c));
+    }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// AutoUTF
+
+//! Runtime-specified UTF encoding type of a stream.
+enum UTFType {
+    kUTF8 = 0,      //!< UTF-8.
+    kUTF16LE = 1,   //!< UTF-16 little endian.
+    kUTF16BE = 2,   //!< UTF-16 big endian.
+    kUTF32LE = 3,   //!< UTF-32 little endian.
+    kUTF32BE = 4    //!< UTF-32 big endian.
+};
+
+//! Dynamically select encoding according to stream's runtime-specified UTF encoding type.
+/*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType().
+*/
+template<typename CharType>
+struct AutoUTF {
+    typedef CharType Ch;
+
+    enum { supportUnicode = 1 };
+
+#define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
+
+    template<typename OutputStream>
+    static RAPIDJSON_FORCEINLINE void Encode(OutputStream& os, unsigned codepoint) {
+        typedef void (*EncodeFunc)(OutputStream&, unsigned);
+        static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
+        (*f[os.GetType()])(os, codepoint);
+    }
+
+    template<typename OutputStream>
+    static RAPIDJSON_FORCEINLINE void EncodeUnsafe(OutputStream& os, unsigned codepoint) {
+        typedef void (*EncodeFunc)(OutputStream&, unsigned);
+        static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(EncodeUnsafe) };
+        (*f[os.GetType()])(os, codepoint);
+    }
+
+    template <typename InputStream>
+    static RAPIDJSON_FORCEINLINE bool Decode(InputStream& is, unsigned* codepoint) {
+        typedef bool (*DecodeFunc)(InputStream&, unsigned*);
+        static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
+        return (*f[is.GetType()])(is, codepoint);
+    }
+
+    template <typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
+        typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
+        static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
+        return (*f[is.GetType()])(is, os);
+    }
+
+#undef RAPIDJSON_ENCODINGS_FUNC
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Transcoder
+
+//! Encoding conversion.
+template<typename SourceEncoding, typename TargetEncoding>
+struct Transcoder {
+    //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
+    template<typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) {
+        unsigned codepoint;
+        if (!SourceEncoding::Decode(is, &codepoint))
+            return false;
+        TargetEncoding::Encode(os, codepoint);
+        return true;
+    }
+
+    template<typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
+        unsigned codepoint;
+        if (!SourceEncoding::Decode(is, &codepoint))
+            return false;
+        TargetEncoding::EncodeUnsafe(os, codepoint);
+        return true;
+    }
+
+    //! Validate one Unicode codepoint from an encoded stream.
+    template<typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
+        return Transcode(is, os);   // Since source/target encoding is different, must transcode.
+    }
+};
+
+// Forward declaration.
+template<typename Stream>
+inline void PutUnsafe(Stream& stream, typename Stream::Ch c);
+
+//! Specialization of Transcoder with same source and target encoding.
+template<typename Encoding>
+struct Transcoder<Encoding, Encoding> {
+    template<typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool Transcode(InputStream& is, OutputStream& os) {
+        os.Put(is.Take());  // Just copy one code unit. This semantic is different from primary template class.
+        return true;
+    }
+    
+    template<typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool TranscodeUnsafe(InputStream& is, OutputStream& os) {
+        PutUnsafe(os, is.Take());  // Just copy one code unit. This semantic is different from primary template class.
+        return true;
+    }
+    
+    template<typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE bool Validate(InputStream& is, OutputStream& os) {
+        return Encoding::Validate(is, os);  // source/target encoding are the same
+    }
+};
+
+RAPIDJSON_NAMESPACE_END
+
+#if defined(__GNUC__) || (defined(_MSC_VER) && !defined(__clang__))
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_ENCODINGS_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/error/en.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/error/en.h
new file mode 100644
index 000000000..2db838bff
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/error/en.h
@@ -0,0 +1,74 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_ERROR_EN_H_
+#define RAPIDJSON_ERROR_EN_H_
+
+#include "error.h"
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(switch-enum)
+RAPIDJSON_DIAG_OFF(covered-switch-default)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Maps error code of parsing into error message.
+/*!
+    \ingroup RAPIDJSON_ERRORS
+    \param parseErrorCode Error code obtained in parsing.
+    \return the error message.
+    \note User can make a copy of this function for localization.
+        Using switch-case is safer for future modification of error codes.
+*/
+inline const RAPIDJSON_ERROR_CHARTYPE* GetParseError_En(ParseErrorCode parseErrorCode) {
+    switch (parseErrorCode) {
+        case kParseErrorNone:                           return RAPIDJSON_ERROR_STRING("No error.");
+
+        case kParseErrorDocumentEmpty:                  return RAPIDJSON_ERROR_STRING("The document is empty.");
+        case kParseErrorDocumentRootNotSingular:        return RAPIDJSON_ERROR_STRING("The document root must not be followed by other values.");
+    
+        case kParseErrorValueInvalid:                   return RAPIDJSON_ERROR_STRING("Invalid value.");
+    
+        case kParseErrorObjectMissName:                 return RAPIDJSON_ERROR_STRING("Missing a name for object member.");
+        case kParseErrorObjectMissColon:                return RAPIDJSON_ERROR_STRING("Missing a colon after a name of object member.");
+        case kParseErrorObjectMissCommaOrCurlyBracket:  return RAPIDJSON_ERROR_STRING("Missing a comma or '}' after an object member.");
+    
+        case kParseErrorArrayMissCommaOrSquareBracket:  return RAPIDJSON_ERROR_STRING("Missing a comma or ']' after an array element.");
+
+        case kParseErrorStringUnicodeEscapeInvalidHex:  return RAPIDJSON_ERROR_STRING("Incorrect hex digit after \\u escape in string.");
+        case kParseErrorStringUnicodeSurrogateInvalid:  return RAPIDJSON_ERROR_STRING("The surrogate pair in string is invalid.");
+        case kParseErrorStringEscapeInvalid:            return RAPIDJSON_ERROR_STRING("Invalid escape character in string.");
+        case kParseErrorStringMissQuotationMark:        return RAPIDJSON_ERROR_STRING("Missing a closing quotation mark in string.");
+        case kParseErrorStringInvalidEncoding:          return RAPIDJSON_ERROR_STRING("Invalid encoding in string.");
+
+        case kParseErrorNumberTooBig:                   return RAPIDJSON_ERROR_STRING("Number too big to be stored in double.");
+        case kParseErrorNumberMissFraction:             return RAPIDJSON_ERROR_STRING("Miss fraction part in number.");
+        case kParseErrorNumberMissExponent:             return RAPIDJSON_ERROR_STRING("Miss exponent in number.");
+
+        case kParseErrorTermination:                    return RAPIDJSON_ERROR_STRING("Terminate parsing due to Handler error.");
+        case kParseErrorUnspecificSyntaxError:          return RAPIDJSON_ERROR_STRING("Unspecific syntax error.");
+
+        default:                                        return RAPIDJSON_ERROR_STRING("Unknown error.");
+    }
+}
+
+RAPIDJSON_NAMESPACE_END
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_ERROR_EN_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/error/error.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/error/error.h
new file mode 100644
index 000000000..9311d2f03
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/error/error.h
@@ -0,0 +1,161 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_ERROR_ERROR_H_
+#define RAPIDJSON_ERROR_ERROR_H_
+
+#include "../rapidjson.h"
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
+/*! \file error.h */
+
+/*! \defgroup RAPIDJSON_ERRORS RapidJSON error handling */
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_ERROR_CHARTYPE
+
+//! Character type of error messages.
+/*! \ingroup RAPIDJSON_ERRORS
+    The default character type is \c char.
+    On Windows, user can define this macro as \c TCHAR for supporting both
+    unicode/non-unicode settings.
+*/
+#ifndef RAPIDJSON_ERROR_CHARTYPE
+#define RAPIDJSON_ERROR_CHARTYPE char
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_ERROR_STRING
+
+//! Macro for converting string literial to \ref RAPIDJSON_ERROR_CHARTYPE[].
+/*! \ingroup RAPIDJSON_ERRORS
+    By default this conversion macro does nothing.
+    On Windows, user can define this macro as \c _T(x) for supporting both
+    unicode/non-unicode settings.
+*/
+#ifndef RAPIDJSON_ERROR_STRING
+#define RAPIDJSON_ERROR_STRING(x) x
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////
+// ParseErrorCode
+
+//! Error code of parsing.
+/*! \ingroup RAPIDJSON_ERRORS
+    \see GenericReader::Parse, GenericReader::GetParseErrorCode
+*/
+enum ParseErrorCode {
+    kParseErrorNone = 0,                        //!< No error.
+
+    kParseErrorDocumentEmpty,                   //!< The document is empty.
+    kParseErrorDocumentRootNotSingular,         //!< The document root must not follow by other values.
+
+    kParseErrorValueInvalid,                    //!< Invalid value.
+
+    kParseErrorObjectMissName,                  //!< Missing a name for object member.
+    kParseErrorObjectMissColon,                 //!< Missing a colon after a name of object member.
+    kParseErrorObjectMissCommaOrCurlyBracket,   //!< Missing a comma or '}' after an object member.
+
+    kParseErrorArrayMissCommaOrSquareBracket,   //!< Missing a comma or ']' after an array element.
+
+    kParseErrorStringUnicodeEscapeInvalidHex,   //!< Incorrect hex digit after \\u escape in string.
+    kParseErrorStringUnicodeSurrogateInvalid,   //!< The surrogate pair in string is invalid.
+    kParseErrorStringEscapeInvalid,             //!< Invalid escape character in string.
+    kParseErrorStringMissQuotationMark,         //!< Missing a closing quotation mark in string.
+    kParseErrorStringInvalidEncoding,           //!< Invalid encoding in string.
+
+    kParseErrorNumberTooBig,                    //!< Number too big to be stored in double.
+    kParseErrorNumberMissFraction,              //!< Miss fraction part in number.
+    kParseErrorNumberMissExponent,              //!< Miss exponent in number.
+
+    kParseErrorTermination,                     //!< Parsing was terminated.
+    kParseErrorUnspecificSyntaxError            //!< Unspecific syntax error.
+};
+
+//! Result of parsing (wraps ParseErrorCode)
+/*!
+    \ingroup RAPIDJSON_ERRORS
+    \code
+        Document doc;
+        ParseResult ok = doc.Parse("[42]");
+        if (!ok) {
+            fprintf(stderr, "JSON parse error: %s (%u)",
+                    GetParseError_En(ok.Code()), ok.Offset());
+            exit(EXIT_FAILURE);
+        }
+    \endcode
+    \see GenericReader::Parse, GenericDocument::Parse
+*/
+struct ParseResult {
+    //!! Unspecified boolean type
+    typedef bool (ParseResult::*BooleanType)() const;
+public:
+    //! Default constructor, no error.
+    ParseResult() : code_(kParseErrorNone), offset_(0) {}
+    //! Constructor to set an error.
+    ParseResult(ParseErrorCode code, size_t offset) : code_(code), offset_(offset) {}
+
+    //! Get the error code.
+    ParseErrorCode Code() const { return code_; }
+    //! Get the error offset, if \ref IsError(), 0 otherwise.
+    size_t Offset() const { return offset_; }
+
+    //! Explicit conversion to \c bool, returns \c true, iff !\ref IsError().
+    operator BooleanType() const { return !IsError() ? &ParseResult::IsError : NULL; }
+    //! Whether the result is an error.
+    bool IsError() const { return code_ != kParseErrorNone; }
+
+    bool operator==(const ParseResult& that) const { return code_ == that.code_; }
+    bool operator==(ParseErrorCode code) const { return code_ == code; }
+    friend bool operator==(ParseErrorCode code, const ParseResult & err) { return code == err.code_; }
+
+    bool operator!=(const ParseResult& that) const { return !(*this == that); }
+    bool operator!=(ParseErrorCode code) const { return !(*this == code); }
+    friend bool operator!=(ParseErrorCode code, const ParseResult & err) { return err != code; }
+
+    //! Reset error code.
+    void Clear() { Set(kParseErrorNone); }
+    //! Update error code and offset.
+    void Set(ParseErrorCode code, size_t offset = 0) { code_ = code; offset_ = offset; }
+
+private:
+    ParseErrorCode code_;
+    size_t offset_;
+};
+
+//! Function pointer type of GetParseError().
+/*! \ingroup RAPIDJSON_ERRORS
+
+    This is the prototype for \c GetParseError_X(), where \c X is a locale.
+    User can dynamically change locale in runtime, e.g.:
+\code
+    GetParseErrorFunc GetParseError = GetParseError_En; // or whatever
+    const RAPIDJSON_ERROR_CHARTYPE* s = GetParseError(document.GetParseErrorCode());
+\endcode
+*/
+typedef const RAPIDJSON_ERROR_CHARTYPE* (*GetParseErrorFunc)(ParseErrorCode);
+
+RAPIDJSON_NAMESPACE_END
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_ERROR_ERROR_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/filereadstream.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/filereadstream.h
new file mode 100644
index 000000000..f1bfb7d0b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/filereadstream.h
@@ -0,0 +1,99 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_FILEREADSTREAM_H_
+#define RAPIDJSON_FILEREADSTREAM_H_
+
+#include "stream.h"
+#include <cstdio>
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(unreachable-code)
+RAPIDJSON_DIAG_OFF(missing-noreturn)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! File byte stream for input using fread().
+/*!
+    \note implements Stream concept
+*/
+class FileReadStream {
+public:
+    typedef char Ch;    //!< Character type (byte).
+
+    //! Constructor.
+    /*!
+        \param fp File pointer opened for read.
+        \param buffer user-supplied buffer.
+        \param bufferSize size of buffer in bytes. Must >=4 bytes.
+    */
+    FileReadStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferSize_(bufferSize), bufferLast_(0), current_(buffer_), readCount_(0), count_(0), eof_(false) { 
+        RAPIDJSON_ASSERT(fp_ != 0);
+        RAPIDJSON_ASSERT(bufferSize >= 4);
+        Read();
+    }
+
+    Ch Peek() const { return *current_; }
+    Ch Take() { Ch c = *current_; Read(); return c; }
+    size_t Tell() const { return count_ + static_cast<size_t>(current_ - buffer_); }
+
+    // Not implemented
+    void Put(Ch) { RAPIDJSON_ASSERT(false); }
+    void Flush() { RAPIDJSON_ASSERT(false); } 
+    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+    // For encoding detection only.
+    const Ch* Peek4() const {
+        return (current_ + 4 <= bufferLast_) ? current_ : 0;
+    }
+
+private:
+    void Read() {
+        if (current_ < bufferLast_)
+            ++current_;
+        else if (!eof_) {
+            count_ += readCount_;
+            readCount_ = std::fread(buffer_, 1, bufferSize_, fp_);
+            bufferLast_ = buffer_ + readCount_ - 1;
+            current_ = buffer_;
+
+            if (readCount_ < bufferSize_) {
+                buffer_[readCount_] = '\0';
+                ++bufferLast_;
+                eof_ = true;
+            }
+        }
+    }
+
+    std::FILE* fp_;
+    Ch *buffer_;
+    size_t bufferSize_;
+    Ch *bufferLast_;
+    Ch *current_;
+    size_t readCount_;
+    size_t count_;  //!< Number of characters read
+    bool eof_;
+};
+
+RAPIDJSON_NAMESPACE_END
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_FILESTREAM_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/filewritestream.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/filewritestream.h
new file mode 100644
index 000000000..8b48fee19
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/filewritestream.h
@@ -0,0 +1,104 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_FILEWRITESTREAM_H_
+#define RAPIDJSON_FILEWRITESTREAM_H_
+
+#include "stream.h"
+#include <cstdio>
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(unreachable-code)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Wrapper of C file stream for output using fwrite().
+/*!
+    \note implements Stream concept
+*/
+class FileWriteStream {
+public:
+    typedef char Ch;    //!< Character type. Only support char.
+
+    FileWriteStream(std::FILE* fp, char* buffer, size_t bufferSize) : fp_(fp), buffer_(buffer), bufferEnd_(buffer + bufferSize), current_(buffer_) { 
+        RAPIDJSON_ASSERT(fp_ != 0);
+    }
+
+    void Put(char c) { 
+        if (current_ >= bufferEnd_)
+            Flush();
+
+        *current_++ = c;
+    }
+
+    void PutN(char c, size_t n) {
+        size_t avail = static_cast<size_t>(bufferEnd_ - current_);
+        while (n > avail) {
+            std::memset(current_, c, avail);
+            current_ += avail;
+            Flush();
+            n -= avail;
+            avail = static_cast<size_t>(bufferEnd_ - current_);
+        }
+
+        if (n > 0) {
+            std::memset(current_, c, n);
+            current_ += n;
+        }
+    }
+
+    void Flush() {
+        if (current_ != buffer_) {
+            size_t result = std::fwrite(buffer_, 1, static_cast<size_t>(current_ - buffer_), fp_);
+            if (result < static_cast<size_t>(current_ - buffer_)) {
+                // failure deliberately ignored at this time
+                // added to avoid warn_unused_result build errors
+            }
+            current_ = buffer_;
+        }
+    }
+
+    // Not implemented
+    char Peek() const { RAPIDJSON_ASSERT(false); return 0; }
+    char Take() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
+    char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+    // Prohibit copy constructor & assignment operator.
+    FileWriteStream(const FileWriteStream&);
+    FileWriteStream& operator=(const FileWriteStream&);
+
+    std::FILE* fp_;
+    char *buffer_;
+    char *bufferEnd_;
+    char *current_;
+};
+
+//! Implement specialized version of PutN() with memset() for better performance.
+template<>
+inline void PutN(FileWriteStream& stream, char c, size_t n) {
+    stream.PutN(c, n);
+}
+
+RAPIDJSON_NAMESPACE_END
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_FILESTREAM_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/fwd.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/fwd.h
new file mode 100644
index 000000000..e8104e841
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/fwd.h
@@ -0,0 +1,151 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_FWD_H_
+#define RAPIDJSON_FWD_H_
+
+#include "rapidjson.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+// encodings.h
+
+template<typename CharType> struct UTF8;
+template<typename CharType> struct UTF16;
+template<typename CharType> struct UTF16BE;
+template<typename CharType> struct UTF16LE;
+template<typename CharType> struct UTF32;
+template<typename CharType> struct UTF32BE;
+template<typename CharType> struct UTF32LE;
+template<typename CharType> struct ASCII;
+template<typename CharType> struct AutoUTF;
+
+template<typename SourceEncoding, typename TargetEncoding>
+struct Transcoder;
+
+// allocators.h
+
+class CrtAllocator;
+
+template <typename BaseAllocator>
+class MemoryPoolAllocator;
+
+// stream.h
+
+template <typename Encoding>
+struct GenericStringStream;
+
+typedef GenericStringStream<UTF8<char> > StringStream;
+
+template <typename Encoding>
+struct GenericInsituStringStream;
+
+typedef GenericInsituStringStream<UTF8<char> > InsituStringStream;
+
+// stringbuffer.h
+
+template <typename Encoding, typename Allocator>
+class GenericStringBuffer;
+
+typedef GenericStringBuffer<UTF8<char>, CrtAllocator> StringBuffer;
+
+// filereadstream.h
+
+class FileReadStream;
+
+// filewritestream.h
+
+class FileWriteStream;
+
+// memorybuffer.h
+
+template <typename Allocator>
+struct GenericMemoryBuffer;
+
+typedef GenericMemoryBuffer<CrtAllocator> MemoryBuffer;
+
+// memorystream.h
+
+struct MemoryStream;
+
+// reader.h
+
+template<typename Encoding, typename Derived>
+struct BaseReaderHandler;
+
+template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator>
+class GenericReader;
+
+typedef GenericReader<UTF8<char>, UTF8<char>, CrtAllocator> Reader;
+
+// writer.h
+
+template<typename OutputStream, typename SourceEncoding, typename TargetEncoding, typename StackAllocator, unsigned writeFlags>
+class Writer;
+
+// prettywriter.h
+
+template<typename OutputStream, typename SourceEncoding, typename TargetEncoding, typename StackAllocator, unsigned writeFlags>
+class PrettyWriter;
+
+// document.h
+
+template <typename Encoding, typename Allocator> 
+struct GenericMember;
+
+template <bool Const, typename Encoding, typename Allocator>
+class GenericMemberIterator;
+
+template<typename CharType>
+struct GenericStringRef;
+
+template <typename Encoding, typename Allocator> 
+class GenericValue;
+
+typedef GenericValue<UTF8<char>, MemoryPoolAllocator<CrtAllocator> > Value;
+
+template <typename Encoding, typename Allocator, typename StackAllocator>
+class GenericDocument;
+
+typedef GenericDocument<UTF8<char>, MemoryPoolAllocator<CrtAllocator>, CrtAllocator> Document;
+
+// pointer.h
+
+template <typename ValueType, typename Allocator>
+class GenericPointer;
+
+typedef GenericPointer<Value, CrtAllocator> Pointer;
+
+// schema.h
+
+template <typename SchemaDocumentType>
+class IGenericRemoteSchemaDocumentProvider;
+
+template <typename ValueT, typename Allocator>
+class GenericSchemaDocument;
+
+typedef GenericSchemaDocument<Value, CrtAllocator> SchemaDocument;
+typedef IGenericRemoteSchemaDocumentProvider<SchemaDocument> IRemoteSchemaDocumentProvider;
+
+template <
+    typename SchemaDocumentType,
+    typename OutputHandler,
+    typename StateAllocator>
+class GenericSchemaValidator;
+
+typedef GenericSchemaValidator<SchemaDocument, BaseReaderHandler<UTF8<char>, void>, CrtAllocator> SchemaValidator;
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_RAPIDJSONFWD_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/biginteger.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/biginteger.h
new file mode 100644
index 000000000..a31c8a88d
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/biginteger.h
@@ -0,0 +1,290 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_BIGINTEGER_H_
+#define RAPIDJSON_BIGINTEGER_H_
+
+#include "../rapidjson.h"
+
+#if defined(_MSC_VER) && !__INTEL_COMPILER && defined(_M_AMD64)
+#include <intrin.h> // for _umul128
+#pragma intrinsic(_umul128)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+class BigInteger {
+public:
+    typedef uint64_t Type;
+
+    BigInteger(const BigInteger& rhs) : count_(rhs.count_) {
+        std::memcpy(digits_, rhs.digits_, count_ * sizeof(Type));
+    }
+
+    explicit BigInteger(uint64_t u) : count_(1) {
+        digits_[0] = u;
+    }
+
+    BigInteger(const char* decimals, size_t length) : count_(1) {
+        RAPIDJSON_ASSERT(length > 0);
+        digits_[0] = 0;
+        size_t i = 0;
+        const size_t kMaxDigitPerIteration = 19;  // 2^64 = 18446744073709551616 > 10^19
+        while (length >= kMaxDigitPerIteration) {
+            AppendDecimal64(decimals + i, decimals + i + kMaxDigitPerIteration);
+            length -= kMaxDigitPerIteration;
+            i += kMaxDigitPerIteration;
+        }
+
+        if (length > 0)
+            AppendDecimal64(decimals + i, decimals + i + length);
+    }
+    
+    BigInteger& operator=(const BigInteger &rhs)
+    {
+        if (this != &rhs) {
+            count_ = rhs.count_;
+            std::memcpy(digits_, rhs.digits_, count_ * sizeof(Type));
+        }
+        return *this;
+    }
+    
+    BigInteger& operator=(uint64_t u) {
+        digits_[0] = u;            
+        count_ = 1;
+        return *this;
+    }
+
+    BigInteger& operator+=(uint64_t u) {
+        Type backup = digits_[0];
+        digits_[0] += u;
+        for (size_t i = 0; i < count_ - 1; i++) {
+            if (digits_[i] >= backup)
+                return *this; // no carry
+            backup = digits_[i + 1];
+            digits_[i + 1] += 1;
+        }
+
+        // Last carry
+        if (digits_[count_ - 1] < backup)
+            PushBack(1);
+
+        return *this;
+    }
+
+    BigInteger& operator*=(uint64_t u) {
+        if (u == 0) return *this = 0;
+        if (u == 1) return *this;
+        if (*this == 1) return *this = u;
+
+        uint64_t k = 0;
+        for (size_t i = 0; i < count_; i++) {
+            uint64_t hi;
+            digits_[i] = MulAdd64(digits_[i], u, k, &hi);
+            k = hi;
+        }
+        
+        if (k > 0)
+            PushBack(k);
+
+        return *this;
+    }
+
+    BigInteger& operator*=(uint32_t u) {
+        if (u == 0) return *this = 0;
+        if (u == 1) return *this;
+        if (*this == 1) return *this = u;
+
+        uint64_t k = 0;
+        for (size_t i = 0; i < count_; i++) {
+            const uint64_t c = digits_[i] >> 32;
+            const uint64_t d = digits_[i] & 0xFFFFFFFF;
+            const uint64_t uc = u * c;
+            const uint64_t ud = u * d;
+            const uint64_t p0 = ud + k;
+            const uint64_t p1 = uc + (p0 >> 32);
+            digits_[i] = (p0 & 0xFFFFFFFF) | (p1 << 32);
+            k = p1 >> 32;
+        }
+        
+        if (k > 0)
+            PushBack(k);
+
+        return *this;
+    }
+
+    BigInteger& operator<<=(size_t shift) {
+        if (IsZero() || shift == 0) return *this;
+
+        size_t offset = shift / kTypeBit;
+        size_t interShift = shift % kTypeBit;
+        RAPIDJSON_ASSERT(count_ + offset <= kCapacity);
+
+        if (interShift == 0) {
+            std::memmove(digits_ + offset, digits_, count_ * sizeof(Type));
+            count_ += offset;
+        }
+        else {
+            digits_[count_] = 0;
+            for (size_t i = count_; i > 0; i--)
+                digits_[i + offset] = (digits_[i] << interShift) | (digits_[i - 1] >> (kTypeBit - interShift));
+            digits_[offset] = digits_[0] << interShift;
+            count_ += offset;
+            if (digits_[count_])
+                count_++;
+        }
+
+        std::memset(digits_, 0, offset * sizeof(Type));
+
+        return *this;
+    }
+
+    bool operator==(const BigInteger& rhs) const {
+        return count_ == rhs.count_ && std::memcmp(digits_, rhs.digits_, count_ * sizeof(Type)) == 0;
+    }
+
+    bool operator==(const Type rhs) const {
+        return count_ == 1 && digits_[0] == rhs;
+    }
+
+    BigInteger& MultiplyPow5(unsigned exp) {
+        static const uint32_t kPow5[12] = {
+            5,
+            5 * 5,
+            5 * 5 * 5,
+            5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+            5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5
+        };
+        if (exp == 0) return *this;
+        for (; exp >= 27; exp -= 27) *this *= RAPIDJSON_UINT64_C2(0X6765C793, 0XFA10079D); // 5^27
+        for (; exp >= 13; exp -= 13) *this *= static_cast<uint32_t>(1220703125u); // 5^13
+        if (exp > 0)                 *this *= kPow5[exp - 1];
+        return *this;
+    }
+
+    // Compute absolute difference of this and rhs.
+    // Assume this != rhs
+    bool Difference(const BigInteger& rhs, BigInteger* out) const {
+        int cmp = Compare(rhs);
+        RAPIDJSON_ASSERT(cmp != 0);
+        const BigInteger *a, *b;  // Makes a > b
+        bool ret;
+        if (cmp < 0) { a = &rhs; b = this; ret = true; }
+        else         { a = this; b = &rhs; ret = false; }
+
+        Type borrow = 0;
+        for (size_t i = 0; i < a->count_; i++) {
+            Type d = a->digits_[i] - borrow;
+            if (i < b->count_)
+                d -= b->digits_[i];
+            borrow = (d > a->digits_[i]) ? 1 : 0;
+            out->digits_[i] = d;
+            if (d != 0)
+                out->count_ = i + 1;
+        }
+
+        return ret;
+    }
+
+    int Compare(const BigInteger& rhs) const {
+        if (count_ != rhs.count_)
+            return count_ < rhs.count_ ? -1 : 1;
+
+        for (size_t i = count_; i-- > 0;)
+            if (digits_[i] != rhs.digits_[i])
+                return digits_[i] < rhs.digits_[i] ? -1 : 1;
+
+        return 0;
+    }
+
+    size_t GetCount() const { return count_; }
+    Type GetDigit(size_t index) const { RAPIDJSON_ASSERT(index < count_); return digits_[index]; }
+    bool IsZero() const { return count_ == 1 && digits_[0] == 0; }
+
+private:
+    void AppendDecimal64(const char* begin, const char* end) {
+        uint64_t u = ParseUint64(begin, end);
+        if (IsZero())
+            *this = u;
+        else {
+            unsigned exp = static_cast<unsigned>(end - begin);
+            (MultiplyPow5(exp) <<= exp) += u;   // *this = *this * 10^exp + u
+        }
+    }
+
+    void PushBack(Type digit) {
+        RAPIDJSON_ASSERT(count_ < kCapacity);
+        digits_[count_++] = digit;
+    }
+
+    static uint64_t ParseUint64(const char* begin, const char* end) {
+        uint64_t r = 0;
+        for (const char* p = begin; p != end; ++p) {
+            RAPIDJSON_ASSERT(*p >= '0' && *p <= '9');
+            r = r * 10u + static_cast<unsigned>(*p - '0');
+        }
+        return r;
+    }
+
+    // Assume a * b + k < 2^128
+    static uint64_t MulAdd64(uint64_t a, uint64_t b, uint64_t k, uint64_t* outHigh) {
+#if defined(_MSC_VER) && defined(_M_AMD64)
+        uint64_t low = _umul128(a, b, outHigh) + k;
+        if (low < k)
+            (*outHigh)++;
+        return low;
+#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
+        __extension__ typedef unsigned __int128 uint128;
+        uint128 p = static_cast<uint128>(a) * static_cast<uint128>(b);
+        p += k;
+        *outHigh = static_cast<uint64_t>(p >> 64);
+        return static_cast<uint64_t>(p);
+#else
+        const uint64_t a0 = a & 0xFFFFFFFF, a1 = a >> 32, b0 = b & 0xFFFFFFFF, b1 = b >> 32;
+        uint64_t x0 = a0 * b0, x1 = a0 * b1, x2 = a1 * b0, x3 = a1 * b1;
+        x1 += (x0 >> 32); // can't give carry
+        x1 += x2;
+        if (x1 < x2)
+            x3 += (static_cast<uint64_t>(1) << 32);
+        uint64_t lo = (x1 << 32) + (x0 & 0xFFFFFFFF);
+        uint64_t hi = x3 + (x1 >> 32);
+
+        lo += k;
+        if (lo < k)
+            hi++;
+        *outHigh = hi;
+        return lo;
+#endif
+    }
+
+    static const size_t kBitCount = 3328;  // 64bit * 54 > 10^1000
+    static const size_t kCapacity = kBitCount / sizeof(Type);
+    static const size_t kTypeBit = sizeof(Type) * 8;
+
+    Type digits_[kCapacity];
+    size_t count_;
+};
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_BIGINTEGER_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/diyfp.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/diyfp.h
new file mode 100644
index 000000000..b6c2cf561
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/diyfp.h
@@ -0,0 +1,271 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
+// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
+// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
+
+#ifndef RAPIDJSON_DIYFP_H_
+#define RAPIDJSON_DIYFP_H_
+
+#include "../rapidjson.h"
+#include <limits>
+
+#if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse64)
+#pragma intrinsic(_umul128)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
+struct DiyFp {
+    DiyFp() : f(), e() {}
+
+    DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
+
+    explicit DiyFp(double d) {
+        union {
+            double d;
+            uint64_t u64;
+        } u = { d };
+
+        int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
+        uint64_t significand = (u.u64 & kDpSignificandMask);
+        if (biased_e != 0) {
+            f = significand + kDpHiddenBit;
+            e = biased_e - kDpExponentBias;
+        }
+        else {
+            f = significand;
+            e = kDpMinExponent + 1;
+        }
+    }
+
+    DiyFp operator-(const DiyFp& rhs) const {
+        return DiyFp(f - rhs.f, e);
+    }
+
+    DiyFp operator*(const DiyFp& rhs) const {
+#if defined(_MSC_VER) && defined(_M_AMD64)
+        uint64_t h;
+        uint64_t l = _umul128(f, rhs.f, &h);
+        if (l & (uint64_t(1) << 63)) // rounding
+            h++;
+        return DiyFp(h, e + rhs.e + 64);
+#elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
+        __extension__ typedef unsigned __int128 uint128;
+        uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
+        uint64_t h = static_cast<uint64_t>(p >> 64);
+        uint64_t l = static_cast<uint64_t>(p);
+        if (l & (uint64_t(1) << 63)) // rounding
+            h++;
+        return DiyFp(h, e + rhs.e + 64);
+#else
+        const uint64_t M32 = 0xFFFFFFFF;
+        const uint64_t a = f >> 32;
+        const uint64_t b = f & M32;
+        const uint64_t c = rhs.f >> 32;
+        const uint64_t d = rhs.f & M32;
+        const uint64_t ac = a * c;
+        const uint64_t bc = b * c;
+        const uint64_t ad = a * d;
+        const uint64_t bd = b * d;
+        uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
+        tmp += 1U << 31;  /// mult_round
+        return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
+#endif
+    }
+
+    DiyFp Normalize() const {
+        RAPIDJSON_ASSERT(f != 0); // https://stackoverflow.com/a/26809183/291737
+#if defined(_MSC_VER) && defined(_M_AMD64)
+        unsigned long index;
+        _BitScanReverse64(&index, f);
+        return DiyFp(f << (63 - index), e - (63 - index));
+#elif defined(__GNUC__) && __GNUC__ >= 4
+        int s = __builtin_clzll(f);
+        return DiyFp(f << s, e - s);
+#else
+        DiyFp res = *this;
+        while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
+            res.f <<= 1;
+            res.e--;
+        }
+        return res;
+#endif
+    }
+
+    DiyFp NormalizeBoundary() const {
+        DiyFp res = *this;
+        while (!(res.f & (kDpHiddenBit << 1))) {
+            res.f <<= 1;
+            res.e--;
+        }
+        res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
+        res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
+        return res;
+    }
+
+    void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
+        DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
+        DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
+        mi.f <<= mi.e - pl.e;
+        mi.e = pl.e;
+        *plus = pl;
+        *minus = mi;
+    }
+
+    double ToDouble() const {
+        union {
+            double d;
+            uint64_t u64;
+        }u;
+        RAPIDJSON_ASSERT(f <= kDpHiddenBit + kDpSignificandMask);
+        if (e < kDpDenormalExponent) {
+            // Underflow.
+            return 0.0;
+        }
+        if (e >= kDpMaxExponent) {
+            // Overflow.
+            return std::numeric_limits<double>::infinity();
+        }
+        const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
+            static_cast<uint64_t>(e + kDpExponentBias);
+        u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
+        return u.d;
+    }
+
+    static const int kDiySignificandSize = 64;
+    static const int kDpSignificandSize = 52;
+    static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
+    static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
+    static const int kDpMinExponent = -kDpExponentBias;
+    static const int kDpDenormalExponent = -kDpExponentBias + 1;
+    static const uint64_t kDpExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
+    static const uint64_t kDpSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
+    static const uint64_t kDpHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
+
+    uint64_t f;
+    int e;
+};
+
+inline DiyFp GetCachedPowerByIndex(size_t index) {
+    // 10^-348, 10^-340, ..., 10^340
+    static const uint64_t kCachedPowers_F[] = {
+        RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
+        RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
+        RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
+        RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
+        RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
+        RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
+        RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
+        RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
+        RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
+        RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
+        RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
+        RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
+        RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
+        RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
+        RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
+        RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
+        RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
+        RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
+        RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
+        RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
+        RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
+        RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
+        RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
+        RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
+        RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
+        RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
+        RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
+        RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
+        RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
+        RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
+        RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
+        RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
+        RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
+        RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
+        RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
+        RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
+        RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
+        RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
+        RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
+        RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
+        RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
+        RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
+        RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
+        RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
+    };
+    static const int16_t kCachedPowers_E[] = {
+        -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007,  -980,
+        -954,  -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,
+        -688,  -661,  -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,
+        -422,  -396,  -369,  -343,  -316,  -289,  -263,  -236,  -210,  -183,
+        -157,  -130,  -103,   -77,   -50,   -24,     3,    30,    56,    83,
+        109,   136,   162,   189,   216,   242,   269,   295,   322,   348,
+        375,   402,   428,   455,   481,   508,   534,   561,   588,   614,
+        641,   667,   694,   720,   747,   774,   800,   827,   853,   880,
+        907,   933,   960,   986,  1013,  1039,  1066
+    };
+    RAPIDJSON_ASSERT(index < 87);
+    return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
+}
+
+inline DiyFp GetCachedPower(int e, int* K) {
+
+    //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
+    double dk = (-61 - e) * 0.30102999566398114 + 347;  // dk must be positive, so can do ceiling in positive
+    int k = static_cast<int>(dk);
+    if (dk - k > 0.0)
+        k++;
+
+    unsigned index = static_cast<unsigned>((k >> 3) + 1);
+    *K = -(-348 + static_cast<int>(index << 3));    // decimal exponent no need lookup table
+
+    return GetCachedPowerByIndex(index);
+}
+
+inline DiyFp GetCachedPower10(int exp, int *outExp) {
+    RAPIDJSON_ASSERT(exp >= -348);
+    unsigned index = static_cast<unsigned>(exp + 348) / 8u;
+    *outExp = -348 + static_cast<int>(index) * 8;
+    return GetCachedPowerByIndex(index);
+}
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_DIYFP_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/dtoa.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/dtoa.h
new file mode 100644
index 000000000..bf2e9b2e5
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/dtoa.h
@@ -0,0 +1,245 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+// This is a C++ header-only implementation of Grisu2 algorithm from the publication:
+// Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
+// integers." ACM Sigplan Notices 45.6 (2010): 233-243.
+
+#ifndef RAPIDJSON_DTOA_
+#define RAPIDJSON_DTOA_
+
+#include "itoa.h" // GetDigitsLut()
+#include "diyfp.h"
+#include "ieee754.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+RAPIDJSON_DIAG_OFF(array-bounds) // some gcc versions generate wrong warnings https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59124
+#endif
+
+inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) {
+    while (rest < wp_w && delta - rest >= ten_kappa &&
+           (rest + ten_kappa < wp_w ||  /// closer
+            wp_w - rest > rest + ten_kappa - wp_w)) {
+        buffer[len - 1]--;
+        rest += ten_kappa;
+    }
+}
+
+inline int CountDecimalDigit32(uint32_t n) {
+    // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
+    if (n < 10) return 1;
+    if (n < 100) return 2;
+    if (n < 1000) return 3;
+    if (n < 10000) return 4;
+    if (n < 100000) return 5;
+    if (n < 1000000) return 6;
+    if (n < 10000000) return 7;
+    if (n < 100000000) return 8;
+    // Will not reach 10 digits in DigitGen()
+    //if (n < 1000000000) return 9;
+    //return 10;
+    return 9;
+}
+
+inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {
+    static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };
+    const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
+    const DiyFp wp_w = Mp - W;
+    uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
+    uint64_t p2 = Mp.f & (one.f - 1);
+    int kappa = CountDecimalDigit32(p1); // kappa in [0, 9]
+    *len = 0;
+
+    while (kappa > 0) {
+        uint32_t d = 0;
+        switch (kappa) {
+            case  9: d = p1 /  100000000; p1 %=  100000000; break;
+            case  8: d = p1 /   10000000; p1 %=   10000000; break;
+            case  7: d = p1 /    1000000; p1 %=    1000000; break;
+            case  6: d = p1 /     100000; p1 %=     100000; break;
+            case  5: d = p1 /      10000; p1 %=      10000; break;
+            case  4: d = p1 /       1000; p1 %=       1000; break;
+            case  3: d = p1 /        100; p1 %=        100; break;
+            case  2: d = p1 /         10; p1 %=         10; break;
+            case  1: d = p1;              p1 =           0; break;
+            default:;
+        }
+        if (d || *len)
+            buffer[(*len)++] = static_cast<char>('0' + static_cast<char>(d));
+        kappa--;
+        uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
+        if (tmp <= delta) {
+            *K += kappa;
+            GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f);
+            return;
+        }
+    }
+
+    // kappa = 0
+    for (;;) {
+        p2 *= 10;
+        delta *= 10;
+        char d = static_cast<char>(p2 >> -one.e);
+        if (d || *len)
+            buffer[(*len)++] = static_cast<char>('0' + d);
+        p2 &= one.f - 1;
+        kappa--;
+        if (p2 < delta) {
+            *K += kappa;
+            int index = -kappa;
+            GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * (index < 9 ? kPow10[index] : 0));
+            return;
+        }
+    }
+}
+
+inline void Grisu2(double value, char* buffer, int* length, int* K) {
+    const DiyFp v(value);
+    DiyFp w_m, w_p;
+    v.NormalizedBoundaries(&w_m, &w_p);
+
+    const DiyFp c_mk = GetCachedPower(w_p.e, K);
+    const DiyFp W = v.Normalize() * c_mk;
+    DiyFp Wp = w_p * c_mk;
+    DiyFp Wm = w_m * c_mk;
+    Wm.f++;
+    Wp.f--;
+    DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K);
+}
+
+inline char* WriteExponent(int K, char* buffer) {
+    if (K < 0) {
+        *buffer++ = '-';
+        K = -K;
+    }
+
+    if (K >= 100) {
+        *buffer++ = static_cast<char>('0' + static_cast<char>(K / 100));
+        K %= 100;
+        const char* d = GetDigitsLut() + K * 2;
+        *buffer++ = d[0];
+        *buffer++ = d[1];
+    }
+    else if (K >= 10) {
+        const char* d = GetDigitsLut() + K * 2;
+        *buffer++ = d[0];
+        *buffer++ = d[1];
+    }
+    else
+        *buffer++ = static_cast<char>('0' + static_cast<char>(K));
+
+    return buffer;
+}
+
+inline char* Prettify(char* buffer, int length, int k, int maxDecimalPlaces) {
+    const int kk = length + k;  // 10^(kk-1) <= v < 10^kk
+
+    if (0 <= k && kk <= 21) {
+        // 1234e7 -> 12340000000
+        for (int i = length; i < kk; i++)
+            buffer[i] = '0';
+        buffer[kk] = '.';
+        buffer[kk + 1] = '0';
+        return &buffer[kk + 2];
+    }
+    else if (0 < kk && kk <= 21) {
+        // 1234e-2 -> 12.34
+        std::memmove(&buffer[kk + 1], &buffer[kk], static_cast<size_t>(length - kk));
+        buffer[kk] = '.';
+        if (0 > k + maxDecimalPlaces) {
+            // When maxDecimalPlaces = 2, 1.2345 -> 1.23, 1.102 -> 1.1
+            // Remove extra trailing zeros (at least one) after truncation.
+            for (int i = kk + maxDecimalPlaces; i > kk + 1; i--)
+                if (buffer[i] != '0')
+                    return &buffer[i + 1];
+            return &buffer[kk + 2]; // Reserve one zero
+        }
+        else
+            return &buffer[length + 1];
+    }
+    else if (-6 < kk && kk <= 0) {
+        // 1234e-6 -> 0.001234
+        const int offset = 2 - kk;
+        std::memmove(&buffer[offset], &buffer[0], static_cast<size_t>(length));
+        buffer[0] = '0';
+        buffer[1] = '.';
+        for (int i = 2; i < offset; i++)
+            buffer[i] = '0';
+        if (length - kk > maxDecimalPlaces) {
+            // When maxDecimalPlaces = 2, 0.123 -> 0.12, 0.102 -> 0.1
+            // Remove extra trailing zeros (at least one) after truncation.
+            for (int i = maxDecimalPlaces + 1; i > 2; i--)
+                if (buffer[i] != '0')
+                    return &buffer[i + 1];
+            return &buffer[3]; // Reserve one zero
+        }
+        else
+            return &buffer[length + offset];
+    }
+    else if (kk < -maxDecimalPlaces) {
+        // Truncate to zero
+        buffer[0] = '0';
+        buffer[1] = '.';
+        buffer[2] = '0';
+        return &buffer[3];
+    }
+    else if (length == 1) {
+        // 1e30
+        buffer[1] = 'e';
+        return WriteExponent(kk - 1, &buffer[2]);
+    }
+    else {
+        // 1234e30 -> 1.234e33
+        std::memmove(&buffer[2], &buffer[1], static_cast<size_t>(length - 1));
+        buffer[1] = '.';
+        buffer[length + 1] = 'e';
+        return WriteExponent(kk - 1, &buffer[0 + length + 2]);
+    }
+}
+
+inline char* dtoa(double value, char* buffer, int maxDecimalPlaces = 324) {
+    RAPIDJSON_ASSERT(maxDecimalPlaces >= 1);
+    Double d(value);
+    if (d.IsZero()) {
+        if (d.Sign())
+            *buffer++ = '-';     // -0.0, Issue #289
+        buffer[0] = '0';
+        buffer[1] = '.';
+        buffer[2] = '0';
+        return &buffer[3];
+    }
+    else {
+        if (value < 0) {
+            *buffer++ = '-';
+            value = -value;
+        }
+        int length, K;
+        Grisu2(value, buffer, &length, &K);
+        return Prettify(buffer, length, K, maxDecimalPlaces);
+    }
+}
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_DTOA_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/ieee754.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/ieee754.h
new file mode 100644
index 000000000..c2684ba2a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/ieee754.h
@@ -0,0 +1,78 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_IEEE754_
+#define RAPIDJSON_IEEE754_
+
+#include "../rapidjson.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+class Double {
+public:
+    Double() {}
+    Double(double d) : d_(d) {}
+    Double(uint64_t u) : u_(u) {}
+
+    double Value() const { return d_; }
+    uint64_t Uint64Value() const { return u_; }
+
+    double NextPositiveDouble() const {
+        RAPIDJSON_ASSERT(!Sign());
+        return Double(u_ + 1).Value();
+    }
+
+    bool Sign() const { return (u_ & kSignMask) != 0; }
+    uint64_t Significand() const { return u_ & kSignificandMask; }
+    int Exponent() const { return static_cast<int>(((u_ & kExponentMask) >> kSignificandSize) - kExponentBias); }
+
+    bool IsNan() const { return (u_ & kExponentMask) == kExponentMask && Significand() != 0; }
+    bool IsInf() const { return (u_ & kExponentMask) == kExponentMask && Significand() == 0; }
+    bool IsNanOrInf() const { return (u_ & kExponentMask) == kExponentMask; }
+    bool IsNormal() const { return (u_ & kExponentMask) != 0 || Significand() == 0; }
+    bool IsZero() const { return (u_ & (kExponentMask | kSignificandMask)) == 0; }
+
+    uint64_t IntegerSignificand() const { return IsNormal() ? Significand() | kHiddenBit : Significand(); }
+    int IntegerExponent() const { return (IsNormal() ? Exponent() : kDenormalExponent) - kSignificandSize; }
+    uint64_t ToBias() const { return (u_ & kSignMask) ? ~u_ + 1 : u_ | kSignMask; }
+
+    static int EffectiveSignificandSize(int order) {
+        if (order >= -1021)
+            return 53;
+        else if (order <= -1074)
+            return 0;
+        else
+            return order + 1074;
+    }
+
+private:
+    static const int kSignificandSize = 52;
+    static const int kExponentBias = 0x3FF;
+    static const int kDenormalExponent = 1 - kExponentBias;
+    static const uint64_t kSignMask = RAPIDJSON_UINT64_C2(0x80000000, 0x00000000);
+    static const uint64_t kExponentMask = RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
+    static const uint64_t kSignificandMask = RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
+    static const uint64_t kHiddenBit = RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
+
+    union {
+        double d_;
+        uint64_t u_;
+    };
+};
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_IEEE754_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/itoa.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/itoa.h
new file mode 100644
index 000000000..9b1c45cc1
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/itoa.h
@@ -0,0 +1,308 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_ITOA_
+#define RAPIDJSON_ITOA_
+
+#include "../rapidjson.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+inline const char* GetDigitsLut() {
+    static const char cDigitsLut[200] = {
+        '0','0','0','1','0','2','0','3','0','4','0','5','0','6','0','7','0','8','0','9',
+        '1','0','1','1','1','2','1','3','1','4','1','5','1','6','1','7','1','8','1','9',
+        '2','0','2','1','2','2','2','3','2','4','2','5','2','6','2','7','2','8','2','9',
+        '3','0','3','1','3','2','3','3','3','4','3','5','3','6','3','7','3','8','3','9',
+        '4','0','4','1','4','2','4','3','4','4','4','5','4','6','4','7','4','8','4','9',
+        '5','0','5','1','5','2','5','3','5','4','5','5','5','6','5','7','5','8','5','9',
+        '6','0','6','1','6','2','6','3','6','4','6','5','6','6','6','7','6','8','6','9',
+        '7','0','7','1','7','2','7','3','7','4','7','5','7','6','7','7','7','8','7','9',
+        '8','0','8','1','8','2','8','3','8','4','8','5','8','6','8','7','8','8','8','9',
+        '9','0','9','1','9','2','9','3','9','4','9','5','9','6','9','7','9','8','9','9'
+    };
+    return cDigitsLut;
+}
+
+inline char* u32toa(uint32_t value, char* buffer) {
+    RAPIDJSON_ASSERT(buffer != 0);
+
+    const char* cDigitsLut = GetDigitsLut();
+
+    if (value < 10000) {
+        const uint32_t d1 = (value / 100) << 1;
+        const uint32_t d2 = (value % 100) << 1;
+
+        if (value >= 1000)
+            *buffer++ = cDigitsLut[d1];
+        if (value >= 100)
+            *buffer++ = cDigitsLut[d1 + 1];
+        if (value >= 10)
+            *buffer++ = cDigitsLut[d2];
+        *buffer++ = cDigitsLut[d2 + 1];
+    }
+    else if (value < 100000000) {
+        // value = bbbbcccc
+        const uint32_t b = value / 10000;
+        const uint32_t c = value % 10000;
+
+        const uint32_t d1 = (b / 100) << 1;
+        const uint32_t d2 = (b % 100) << 1;
+
+        const uint32_t d3 = (c / 100) << 1;
+        const uint32_t d4 = (c % 100) << 1;
+
+        if (value >= 10000000)
+            *buffer++ = cDigitsLut[d1];
+        if (value >= 1000000)
+            *buffer++ = cDigitsLut[d1 + 1];
+        if (value >= 100000)
+            *buffer++ = cDigitsLut[d2];
+        *buffer++ = cDigitsLut[d2 + 1];
+
+        *buffer++ = cDigitsLut[d3];
+        *buffer++ = cDigitsLut[d3 + 1];
+        *buffer++ = cDigitsLut[d4];
+        *buffer++ = cDigitsLut[d4 + 1];
+    }
+    else {
+        // value = aabbbbcccc in decimal
+
+        const uint32_t a = value / 100000000; // 1 to 42
+        value %= 100000000;
+
+        if (a >= 10) {
+            const unsigned i = a << 1;
+            *buffer++ = cDigitsLut[i];
+            *buffer++ = cDigitsLut[i + 1];
+        }
+        else
+            *buffer++ = static_cast<char>('0' + static_cast<char>(a));
+
+        const uint32_t b = value / 10000; // 0 to 9999
+        const uint32_t c = value % 10000; // 0 to 9999
+
+        const uint32_t d1 = (b / 100) << 1;
+        const uint32_t d2 = (b % 100) << 1;
+
+        const uint32_t d3 = (c / 100) << 1;
+        const uint32_t d4 = (c % 100) << 1;
+
+        *buffer++ = cDigitsLut[d1];
+        *buffer++ = cDigitsLut[d1 + 1];
+        *buffer++ = cDigitsLut[d2];
+        *buffer++ = cDigitsLut[d2 + 1];
+        *buffer++ = cDigitsLut[d3];
+        *buffer++ = cDigitsLut[d3 + 1];
+        *buffer++ = cDigitsLut[d4];
+        *buffer++ = cDigitsLut[d4 + 1];
+    }
+    return buffer;
+}
+
+inline char* i32toa(int32_t value, char* buffer) {
+    RAPIDJSON_ASSERT(buffer != 0);
+    uint32_t u = static_cast<uint32_t>(value);
+    if (value < 0) {
+        *buffer++ = '-';
+        u = ~u + 1;
+    }
+
+    return u32toa(u, buffer);
+}
+
+inline char* u64toa(uint64_t value, char* buffer) {
+    RAPIDJSON_ASSERT(buffer != 0);
+    const char* cDigitsLut = GetDigitsLut();
+    const uint64_t  kTen8 = 100000000;
+    const uint64_t  kTen9 = kTen8 * 10;
+    const uint64_t kTen10 = kTen8 * 100;
+    const uint64_t kTen11 = kTen8 * 1000;
+    const uint64_t kTen12 = kTen8 * 10000;
+    const uint64_t kTen13 = kTen8 * 100000;
+    const uint64_t kTen14 = kTen8 * 1000000;
+    const uint64_t kTen15 = kTen8 * 10000000;
+    const uint64_t kTen16 = kTen8 * kTen8;
+
+    if (value < kTen8) {
+        uint32_t v = static_cast<uint32_t>(value);
+        if (v < 10000) {
+            const uint32_t d1 = (v / 100) << 1;
+            const uint32_t d2 = (v % 100) << 1;
+
+            if (v >= 1000)
+                *buffer++ = cDigitsLut[d1];
+            if (v >= 100)
+                *buffer++ = cDigitsLut[d1 + 1];
+            if (v >= 10)
+                *buffer++ = cDigitsLut[d2];
+            *buffer++ = cDigitsLut[d2 + 1];
+        }
+        else {
+            // value = bbbbcccc
+            const uint32_t b = v / 10000;
+            const uint32_t c = v % 10000;
+
+            const uint32_t d1 = (b / 100) << 1;
+            const uint32_t d2 = (b % 100) << 1;
+
+            const uint32_t d3 = (c / 100) << 1;
+            const uint32_t d4 = (c % 100) << 1;
+
+            if (value >= 10000000)
+                *buffer++ = cDigitsLut[d1];
+            if (value >= 1000000)
+                *buffer++ = cDigitsLut[d1 + 1];
+            if (value >= 100000)
+                *buffer++ = cDigitsLut[d2];
+            *buffer++ = cDigitsLut[d2 + 1];
+
+            *buffer++ = cDigitsLut[d3];
+            *buffer++ = cDigitsLut[d3 + 1];
+            *buffer++ = cDigitsLut[d4];
+            *buffer++ = cDigitsLut[d4 + 1];
+        }
+    }
+    else if (value < kTen16) {
+        const uint32_t v0 = static_cast<uint32_t>(value / kTen8);
+        const uint32_t v1 = static_cast<uint32_t>(value % kTen8);
+
+        const uint32_t b0 = v0 / 10000;
+        const uint32_t c0 = v0 % 10000;
+
+        const uint32_t d1 = (b0 / 100) << 1;
+        const uint32_t d2 = (b0 % 100) << 1;
+
+        const uint32_t d3 = (c0 / 100) << 1;
+        const uint32_t d4 = (c0 % 100) << 1;
+
+        const uint32_t b1 = v1 / 10000;
+        const uint32_t c1 = v1 % 10000;
+
+        const uint32_t d5 = (b1 / 100) << 1;
+        const uint32_t d6 = (b1 % 100) << 1;
+
+        const uint32_t d7 = (c1 / 100) << 1;
+        const uint32_t d8 = (c1 % 100) << 1;
+
+        if (value >= kTen15)
+            *buffer++ = cDigitsLut[d1];
+        if (value >= kTen14)
+            *buffer++ = cDigitsLut[d1 + 1];
+        if (value >= kTen13)
+            *buffer++ = cDigitsLut[d2];
+        if (value >= kTen12)
+            *buffer++ = cDigitsLut[d2 + 1];
+        if (value >= kTen11)
+            *buffer++ = cDigitsLut[d3];
+        if (value >= kTen10)
+            *buffer++ = cDigitsLut[d3 + 1];
+        if (value >= kTen9)
+            *buffer++ = cDigitsLut[d4];
+
+        *buffer++ = cDigitsLut[d4 + 1];
+        *buffer++ = cDigitsLut[d5];
+        *buffer++ = cDigitsLut[d5 + 1];
+        *buffer++ = cDigitsLut[d6];
+        *buffer++ = cDigitsLut[d6 + 1];
+        *buffer++ = cDigitsLut[d7];
+        *buffer++ = cDigitsLut[d7 + 1];
+        *buffer++ = cDigitsLut[d8];
+        *buffer++ = cDigitsLut[d8 + 1];
+    }
+    else {
+        const uint32_t a = static_cast<uint32_t>(value / kTen16); // 1 to 1844
+        value %= kTen16;
+
+        if (a < 10)
+            *buffer++ = static_cast<char>('0' + static_cast<char>(a));
+        else if (a < 100) {
+            const uint32_t i = a << 1;
+            *buffer++ = cDigitsLut[i];
+            *buffer++ = cDigitsLut[i + 1];
+        }
+        else if (a < 1000) {
+            *buffer++ = static_cast<char>('0' + static_cast<char>(a / 100));
+
+            const uint32_t i = (a % 100) << 1;
+            *buffer++ = cDigitsLut[i];
+            *buffer++ = cDigitsLut[i + 1];
+        }
+        else {
+            const uint32_t i = (a / 100) << 1;
+            const uint32_t j = (a % 100) << 1;
+            *buffer++ = cDigitsLut[i];
+            *buffer++ = cDigitsLut[i + 1];
+            *buffer++ = cDigitsLut[j];
+            *buffer++ = cDigitsLut[j + 1];
+        }
+
+        const uint32_t v0 = static_cast<uint32_t>(value / kTen8);
+        const uint32_t v1 = static_cast<uint32_t>(value % kTen8);
+
+        const uint32_t b0 = v0 / 10000;
+        const uint32_t c0 = v0 % 10000;
+
+        const uint32_t d1 = (b0 / 100) << 1;
+        const uint32_t d2 = (b0 % 100) << 1;
+
+        const uint32_t d3 = (c0 / 100) << 1;
+        const uint32_t d4 = (c0 % 100) << 1;
+
+        const uint32_t b1 = v1 / 10000;
+        const uint32_t c1 = v1 % 10000;
+
+        const uint32_t d5 = (b1 / 100) << 1;
+        const uint32_t d6 = (b1 % 100) << 1;
+
+        const uint32_t d7 = (c1 / 100) << 1;
+        const uint32_t d8 = (c1 % 100) << 1;
+
+        *buffer++ = cDigitsLut[d1];
+        *buffer++ = cDigitsLut[d1 + 1];
+        *buffer++ = cDigitsLut[d2];
+        *buffer++ = cDigitsLut[d2 + 1];
+        *buffer++ = cDigitsLut[d3];
+        *buffer++ = cDigitsLut[d3 + 1];
+        *buffer++ = cDigitsLut[d4];
+        *buffer++ = cDigitsLut[d4 + 1];
+        *buffer++ = cDigitsLut[d5];
+        *buffer++ = cDigitsLut[d5 + 1];
+        *buffer++ = cDigitsLut[d6];
+        *buffer++ = cDigitsLut[d6 + 1];
+        *buffer++ = cDigitsLut[d7];
+        *buffer++ = cDigitsLut[d7 + 1];
+        *buffer++ = cDigitsLut[d8];
+        *buffer++ = cDigitsLut[d8 + 1];
+    }
+
+    return buffer;
+}
+
+inline char* i64toa(int64_t value, char* buffer) {
+    RAPIDJSON_ASSERT(buffer != 0);
+    uint64_t u = static_cast<uint64_t>(value);
+    if (value < 0) {
+        *buffer++ = '-';
+        u = ~u + 1;
+    }
+
+    return u64toa(u, buffer);
+}
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_ITOA_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/meta.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/meta.h
new file mode 100644
index 000000000..d401edf85
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/meta.h
@@ -0,0 +1,186 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_INTERNAL_META_H_
+#define RAPIDJSON_INTERNAL_META_H_
+
+#include "../rapidjson.h"
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(6334)
+#endif
+
+#if RAPIDJSON_HAS_CXX11_TYPETRAITS
+#include <type_traits>
+#endif
+
+//@cond RAPIDJSON_INTERNAL
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+// Helper to wrap/convert arbitrary types to void, useful for arbitrary type matching
+template <typename T> struct Void { typedef void Type; };
+
+///////////////////////////////////////////////////////////////////////////////
+// BoolType, TrueType, FalseType
+//
+template <bool Cond> struct BoolType {
+    static const bool Value = Cond;
+    typedef BoolType Type;
+};
+typedef BoolType<true> TrueType;
+typedef BoolType<false> FalseType;
+
+
+///////////////////////////////////////////////////////////////////////////////
+// SelectIf, BoolExpr, NotExpr, AndExpr, OrExpr
+//
+
+template <bool C> struct SelectIfImpl { template <typename T1, typename T2> struct Apply { typedef T1 Type; }; };
+template <> struct SelectIfImpl<false> { template <typename T1, typename T2> struct Apply { typedef T2 Type; }; };
+template <bool C, typename T1, typename T2> struct SelectIfCond : SelectIfImpl<C>::template Apply<T1,T2> {};
+template <typename C, typename T1, typename T2> struct SelectIf : SelectIfCond<C::Value, T1, T2> {};
+
+template <bool Cond1, bool Cond2> struct AndExprCond : FalseType {};
+template <> struct AndExprCond<true, true> : TrueType {};
+template <bool Cond1, bool Cond2> struct OrExprCond : TrueType {};
+template <> struct OrExprCond<false, false> : FalseType {};
+
+template <typename C> struct BoolExpr : SelectIf<C,TrueType,FalseType>::Type {};
+template <typename C> struct NotExpr  : SelectIf<C,FalseType,TrueType>::Type {};
+template <typename C1, typename C2> struct AndExpr : AndExprCond<C1::Value, C2::Value>::Type {};
+template <typename C1, typename C2> struct OrExpr  : OrExprCond<C1::Value, C2::Value>::Type {};
+
+
+///////////////////////////////////////////////////////////////////////////////
+// AddConst, MaybeAddConst, RemoveConst
+template <typename T> struct AddConst { typedef const T Type; };
+template <bool Constify, typename T> struct MaybeAddConst : SelectIfCond<Constify, const T, T> {};
+template <typename T> struct RemoveConst { typedef T Type; };
+template <typename T> struct RemoveConst<const T> { typedef T Type; };
+
+
+///////////////////////////////////////////////////////////////////////////////
+// IsSame, IsConst, IsMoreConst, IsPointer
+//
+template <typename T, typename U> struct IsSame : FalseType {};
+template <typename T> struct IsSame<T, T> : TrueType {};
+
+template <typename T> struct IsConst : FalseType {};
+template <typename T> struct IsConst<const T> : TrueType {};
+
+template <typename CT, typename T>
+struct IsMoreConst
+    : AndExpr<IsSame<typename RemoveConst<CT>::Type, typename RemoveConst<T>::Type>,
+              BoolType<IsConst<CT>::Value >= IsConst<T>::Value> >::Type {};
+
+template <typename T> struct IsPointer : FalseType {};
+template <typename T> struct IsPointer<T*> : TrueType {};
+
+///////////////////////////////////////////////////////////////////////////////
+// IsBaseOf
+//
+#if RAPIDJSON_HAS_CXX11_TYPETRAITS
+
+template <typename B, typename D> struct IsBaseOf
+    : BoolType< ::std::is_base_of<B,D>::value> {};
+
+#else // simplified version adopted from Boost
+
+template<typename B, typename D> struct IsBaseOfImpl {
+    RAPIDJSON_STATIC_ASSERT(sizeof(B) != 0);
+    RAPIDJSON_STATIC_ASSERT(sizeof(D) != 0);
+
+    typedef char (&Yes)[1];
+    typedef char (&No) [2];
+
+    template <typename T>
+    static Yes Check(const D*, T);
+    static No  Check(const B*, int);
+
+    struct Host {
+        operator const B*() const;
+        operator const D*();
+    };
+
+    enum { Value = (sizeof(Check(Host(), 0)) == sizeof(Yes)) };
+};
+
+template <typename B, typename D> struct IsBaseOf
+    : OrExpr<IsSame<B, D>, BoolExpr<IsBaseOfImpl<B, D> > >::Type {};
+
+#endif // RAPIDJSON_HAS_CXX11_TYPETRAITS
+
+
+//////////////////////////////////////////////////////////////////////////
+// EnableIf / DisableIf
+//
+template <bool Condition, typename T = void> struct EnableIfCond  { typedef T Type; };
+template <typename T> struct EnableIfCond<false, T> { /* empty */ };
+
+template <bool Condition, typename T = void> struct DisableIfCond { typedef T Type; };
+template <typename T> struct DisableIfCond<true, T> { /* empty */ };
+
+template <typename Condition, typename T = void>
+struct EnableIf : EnableIfCond<Condition::Value, T> {};
+
+template <typename Condition, typename T = void>
+struct DisableIf : DisableIfCond<Condition::Value, T> {};
+
+// SFINAE helpers
+struct SfinaeTag {};
+template <typename T> struct RemoveSfinaeTag;
+template <typename T> struct RemoveSfinaeTag<SfinaeTag&(*)(T)> { typedef T Type; };
+
+#define RAPIDJSON_REMOVEFPTR_(type) \
+    typename ::RAPIDJSON_NAMESPACE::internal::RemoveSfinaeTag \
+        < ::RAPIDJSON_NAMESPACE::internal::SfinaeTag&(*) type>::Type
+
+#define RAPIDJSON_ENABLEIF(cond) \
+    typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \
+        <RAPIDJSON_REMOVEFPTR_(cond)>::Type * = NULL
+
+#define RAPIDJSON_DISABLEIF(cond) \
+    typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \
+        <RAPIDJSON_REMOVEFPTR_(cond)>::Type * = NULL
+
+#define RAPIDJSON_ENABLEIF_RETURN(cond,returntype) \
+    typename ::RAPIDJSON_NAMESPACE::internal::EnableIf \
+        <RAPIDJSON_REMOVEFPTR_(cond), \
+         RAPIDJSON_REMOVEFPTR_(returntype)>::Type
+
+#define RAPIDJSON_DISABLEIF_RETURN(cond,returntype) \
+    typename ::RAPIDJSON_NAMESPACE::internal::DisableIf \
+        <RAPIDJSON_REMOVEFPTR_(cond), \
+         RAPIDJSON_REMOVEFPTR_(returntype)>::Type
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+//@endcond
+
+#if defined(_MSC_VER) && !defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_INTERNAL_META_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/pow10.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/pow10.h
new file mode 100644
index 000000000..02f475d70
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/pow10.h
@@ -0,0 +1,55 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_POW10_
+#define RAPIDJSON_POW10_
+
+#include "../rapidjson.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+//! Computes integer powers of 10 in double (10.0^n).
+/*! This function uses lookup table for fast and accurate results.
+    \param n non-negative exponent. Must <= 308.
+    \return 10.0^n
+*/
+inline double Pow10(int n) {
+    static const double e[] = { // 1e-0...1e308: 309 * 8 bytes = 2472 bytes
+        1e+0,  
+        1e+1,  1e+2,  1e+3,  1e+4,  1e+5,  1e+6,  1e+7,  1e+8,  1e+9,  1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 
+        1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40,
+        1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60,
+        1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
+        1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100,
+        1e+101,1e+102,1e+103,1e+104,1e+105,1e+106,1e+107,1e+108,1e+109,1e+110,1e+111,1e+112,1e+113,1e+114,1e+115,1e+116,1e+117,1e+118,1e+119,1e+120,
+        1e+121,1e+122,1e+123,1e+124,1e+125,1e+126,1e+127,1e+128,1e+129,1e+130,1e+131,1e+132,1e+133,1e+134,1e+135,1e+136,1e+137,1e+138,1e+139,1e+140,
+        1e+141,1e+142,1e+143,1e+144,1e+145,1e+146,1e+147,1e+148,1e+149,1e+150,1e+151,1e+152,1e+153,1e+154,1e+155,1e+156,1e+157,1e+158,1e+159,1e+160,
+        1e+161,1e+162,1e+163,1e+164,1e+165,1e+166,1e+167,1e+168,1e+169,1e+170,1e+171,1e+172,1e+173,1e+174,1e+175,1e+176,1e+177,1e+178,1e+179,1e+180,
+        1e+181,1e+182,1e+183,1e+184,1e+185,1e+186,1e+187,1e+188,1e+189,1e+190,1e+191,1e+192,1e+193,1e+194,1e+195,1e+196,1e+197,1e+198,1e+199,1e+200,
+        1e+201,1e+202,1e+203,1e+204,1e+205,1e+206,1e+207,1e+208,1e+209,1e+210,1e+211,1e+212,1e+213,1e+214,1e+215,1e+216,1e+217,1e+218,1e+219,1e+220,
+        1e+221,1e+222,1e+223,1e+224,1e+225,1e+226,1e+227,1e+228,1e+229,1e+230,1e+231,1e+232,1e+233,1e+234,1e+235,1e+236,1e+237,1e+238,1e+239,1e+240,
+        1e+241,1e+242,1e+243,1e+244,1e+245,1e+246,1e+247,1e+248,1e+249,1e+250,1e+251,1e+252,1e+253,1e+254,1e+255,1e+256,1e+257,1e+258,1e+259,1e+260,
+        1e+261,1e+262,1e+263,1e+264,1e+265,1e+266,1e+267,1e+268,1e+269,1e+270,1e+271,1e+272,1e+273,1e+274,1e+275,1e+276,1e+277,1e+278,1e+279,1e+280,
+        1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300,
+        1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308
+    };
+    RAPIDJSON_ASSERT(n >= 0 && n <= 308);
+    return e[n];
+}
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_POW10_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/regex.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/regex.h
new file mode 100644
index 000000000..377f86ce8
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/regex.h
@@ -0,0 +1,737 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_INTERNAL_REGEX_H_
+#define RAPIDJSON_INTERNAL_REGEX_H_
+
+#include "../allocators.h"
+#include "../stream.h"
+#include "stack.h"
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(switch-enum)
+RAPIDJSON_DIAG_OFF(implicit-fallthrough)
+#elif defined(_MSC_VER)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
+#endif
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+#if __GNUC__ >= 7
+RAPIDJSON_DIAG_OFF(implicit-fallthrough)
+#endif
+#endif
+
+#ifndef RAPIDJSON_REGEX_VERBOSE
+#define RAPIDJSON_REGEX_VERBOSE 0
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+///////////////////////////////////////////////////////////////////////////////
+// DecodedStream
+
+template <typename SourceStream, typename Encoding>
+class DecodedStream {
+public:
+    DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); }
+    unsigned Peek() { return codepoint_; }
+    unsigned Take() {
+        unsigned c = codepoint_;
+        if (c) // No further decoding when '\0'
+            Decode();
+        return c;
+    }
+
+private:
+    void Decode() {
+        if (!Encoding::Decode(ss_, &codepoint_))
+            codepoint_ = 0;
+    }
+
+    SourceStream& ss_;
+    unsigned codepoint_;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericRegex
+
+static const SizeType kRegexInvalidState = ~SizeType(0);  //!< Represents an invalid index in GenericRegex::State::out, out1
+static const SizeType kRegexInvalidRange = ~SizeType(0);
+
+template <typename Encoding, typename Allocator>
+class GenericRegexSearch;
+
+//! Regular expression engine with subset of ECMAscript grammar.
+/*!
+    Supported regular expression syntax:
+    - \c ab     Concatenation
+    - \c a|b    Alternation
+    - \c a?     Zero or one
+    - \c a*     Zero or more
+    - \c a+     One or more
+    - \c a{3}   Exactly 3 times
+    - \c a{3,}  At least 3 times
+    - \c a{3,5} 3 to 5 times
+    - \c (ab)   Grouping
+    - \c ^a     At the beginning
+    - \c a$     At the end
+    - \c .      Any character
+    - \c [abc]  Character classes
+    - \c [a-c]  Character class range
+    - \c [a-z0-9_] Character class combination
+    - \c [^abc] Negated character classes
+    - \c [^a-c] Negated character class range
+    - \c [\b]   Backspace (U+0008)
+    - \c \\| \\\\ ...  Escape characters
+    - \c \\f Form feed (U+000C)
+    - \c \\n Line feed (U+000A)
+    - \c \\r Carriage return (U+000D)
+    - \c \\t Tab (U+0009)
+    - \c \\v Vertical tab (U+000B)
+
+    \note This is a Thompson NFA engine, implemented with reference to 
+        Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).", 
+        https://swtch.com/~rsc/regexp/regexp1.html 
+*/
+template <typename Encoding, typename Allocator = CrtAllocator>
+class GenericRegex {
+public:
+    typedef Encoding EncodingType;
+    typedef typename Encoding::Ch Ch;
+    template <typename, typename> friend class GenericRegexSearch;
+
+    GenericRegex(const Ch* source, Allocator* allocator = 0) : 
+        ownAllocator_(allocator ? 0 : RAPIDJSON_NEW(Allocator)()), allocator_(allocator ? allocator : ownAllocator_), 
+        states_(allocator_, 256), ranges_(allocator_, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), 
+        anchorBegin_(), anchorEnd_()
+    {
+        GenericStringStream<Encoding> ss(source);
+        DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss);
+        Parse(ds);
+    }
+
+    ~GenericRegex()
+    {
+        RAPIDJSON_DELETE(ownAllocator_);
+    }
+
+    bool IsValid() const {
+        return root_ != kRegexInvalidState;
+    }
+
+private:
+    enum Operator {
+        kZeroOrOne,
+        kZeroOrMore,
+        kOneOrMore,
+        kConcatenation,
+        kAlternation,
+        kLeftParenthesis
+    };
+
+    static const unsigned kAnyCharacterClass = 0xFFFFFFFF;   //!< For '.'
+    static const unsigned kRangeCharacterClass = 0xFFFFFFFE;
+    static const unsigned kRangeNegationFlag = 0x80000000;
+
+    struct Range {
+        unsigned start; // 
+        unsigned end;
+        SizeType next;
+    };
+
+    struct State {
+        SizeType out;     //!< Equals to kInvalid for matching state
+        SizeType out1;    //!< Equals to non-kInvalid for split
+        SizeType rangeStart;
+        unsigned codepoint;
+    };
+
+    struct Frag {
+        Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {}
+        SizeType start;
+        SizeType out; //!< link-list of all output states
+        SizeType minIndex;
+    };
+
+    State& GetState(SizeType index) {
+        RAPIDJSON_ASSERT(index < stateCount_);
+        return states_.template Bottom<State>()[index];
+    }
+
+    const State& GetState(SizeType index) const {
+        RAPIDJSON_ASSERT(index < stateCount_);
+        return states_.template Bottom<State>()[index];
+    }
+
+    Range& GetRange(SizeType index) {
+        RAPIDJSON_ASSERT(index < rangeCount_);
+        return ranges_.template Bottom<Range>()[index];
+    }
+
+    const Range& GetRange(SizeType index) const {
+        RAPIDJSON_ASSERT(index < rangeCount_);
+        return ranges_.template Bottom<Range>()[index];
+    }
+
+    template <typename InputStream>
+    void Parse(DecodedStream<InputStream, Encoding>& ds) {
+        Stack<Allocator> operandStack(allocator_, 256);    // Frag
+        Stack<Allocator> operatorStack(allocator_, 256);   // Operator
+        Stack<Allocator> atomCountStack(allocator_, 256);  // unsigned (Atom per parenthesis)
+
+        *atomCountStack.template Push<unsigned>() = 0;
+
+        unsigned codepoint;
+        while (ds.Peek() != 0) {
+            switch (codepoint = ds.Take()) {
+                case '^':
+                    anchorBegin_ = true;
+                    break;
+
+                case '$':
+                    anchorEnd_ = true;
+                    break;
+
+                case '|':
+                    while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation)
+                        if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
+                            return;
+                    *operatorStack.template Push<Operator>() = kAlternation;
+                    *atomCountStack.template Top<unsigned>() = 0;
+                    break;
+
+                case '(':
+                    *operatorStack.template Push<Operator>() = kLeftParenthesis;
+                    *atomCountStack.template Push<unsigned>() = 0;
+                    break;
+
+                case ')':
+                    while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis)
+                        if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
+                            return;
+                    if (operatorStack.Empty())
+                        return;
+                    operatorStack.template Pop<Operator>(1);
+                    atomCountStack.template Pop<unsigned>(1);
+                    ImplicitConcatenation(atomCountStack, operatorStack);
+                    break;
+
+                case '?':
+                    if (!Eval(operandStack, kZeroOrOne))
+                        return;
+                    break;
+
+                case '*':
+                    if (!Eval(operandStack, kZeroOrMore))
+                        return;
+                    break;
+
+                case '+':
+                    if (!Eval(operandStack, kOneOrMore))
+                        return;
+                    break;
+
+                case '{':
+                    {
+                        unsigned n, m;
+                        if (!ParseUnsigned(ds, &n))
+                            return;
+
+                        if (ds.Peek() == ',') {
+                            ds.Take();
+                            if (ds.Peek() == '}')
+                                m = kInfinityQuantifier;
+                            else if (!ParseUnsigned(ds, &m) || m < n)
+                                return;
+                        }
+                        else
+                            m = n;
+
+                        if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}')
+                            return;
+                        ds.Take();
+                    }
+                    break;
+
+                case '.':
+                    PushOperand(operandStack, kAnyCharacterClass);
+                    ImplicitConcatenation(atomCountStack, operatorStack);
+                    break;
+
+                case '[':
+                    {
+                        SizeType range;
+                        if (!ParseRange(ds, &range))
+                            return;
+                        SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass);
+                        GetState(s).rangeStart = range;
+                        *operandStack.template Push<Frag>() = Frag(s, s, s);
+                    }
+                    ImplicitConcatenation(atomCountStack, operatorStack);
+                    break;
+
+                case '\\': // Escape character
+                    if (!CharacterEscape(ds, &codepoint))
+                        return; // Unsupported escape character
+                    // fall through to default
+
+                default: // Pattern character
+                    PushOperand(operandStack, codepoint);
+                    ImplicitConcatenation(atomCountStack, operatorStack);
+            }
+        }
+
+        while (!operatorStack.Empty())
+            if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
+                return;
+
+        // Link the operand to matching state.
+        if (operandStack.GetSize() == sizeof(Frag)) {
+            Frag* e = operandStack.template Pop<Frag>(1);
+            Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0));
+            root_ = e->start;
+
+#if RAPIDJSON_REGEX_VERBOSE
+            printf("root: %d\n", root_);
+            for (SizeType i = 0; i < stateCount_ ; i++) {
+                State& s = GetState(i);
+                printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint);
+            }
+            printf("\n");
+#endif
+        }
+    }
+
+    SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) {
+        State* s = states_.template Push<State>();
+        s->out = out;
+        s->out1 = out1;
+        s->codepoint = codepoint;
+        s->rangeStart = kRegexInvalidRange;
+        return stateCount_++;
+    }
+
+    void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) {
+        SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint);
+        *operandStack.template Push<Frag>() = Frag(s, s, s);
+    }
+
+    void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) {
+        if (*atomCountStack.template Top<unsigned>())
+            *operatorStack.template Push<Operator>() = kConcatenation;
+        (*atomCountStack.template Top<unsigned>())++;
+    }
+
+    SizeType Append(SizeType l1, SizeType l2) {
+        SizeType old = l1;
+        while (GetState(l1).out != kRegexInvalidState)
+            l1 = GetState(l1).out;
+        GetState(l1).out = l2;
+        return old;
+    }
+
+    void Patch(SizeType l, SizeType s) {
+        for (SizeType next; l != kRegexInvalidState; l = next) {
+            next = GetState(l).out;
+            GetState(l).out = s;
+        }
+    }
+
+    bool Eval(Stack<Allocator>& operandStack, Operator op) {
+        switch (op) {
+            case kConcatenation:
+                RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2);
+                {
+                    Frag e2 = *operandStack.template Pop<Frag>(1);
+                    Frag e1 = *operandStack.template Pop<Frag>(1);
+                    Patch(e1.out, e2.start);
+                    *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex));
+                }
+                return true;
+
+            case kAlternation:
+                if (operandStack.GetSize() >= sizeof(Frag) * 2) {
+                    Frag e2 = *operandStack.template Pop<Frag>(1);
+                    Frag e1 = *operandStack.template Pop<Frag>(1);
+                    SizeType s = NewState(e1.start, e2.start, 0);
+                    *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex));
+                    return true;
+                }
+                return false;
+
+            case kZeroOrOne:
+                if (operandStack.GetSize() >= sizeof(Frag)) {
+                    Frag e = *operandStack.template Pop<Frag>(1);
+                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
+                    *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex);
+                    return true;
+                }
+                return false;
+
+            case kZeroOrMore:
+                if (operandStack.GetSize() >= sizeof(Frag)) {
+                    Frag e = *operandStack.template Pop<Frag>(1);
+                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
+                    Patch(e.out, s);
+                    *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex);
+                    return true;
+                }
+                return false;
+
+            default: 
+                RAPIDJSON_ASSERT(op == kOneOrMore);
+                if (operandStack.GetSize() >= sizeof(Frag)) {
+                    Frag e = *operandStack.template Pop<Frag>(1);
+                    SizeType s = NewState(kRegexInvalidState, e.start, 0);
+                    Patch(e.out, s);
+                    *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex);
+                    return true;
+                }
+                return false;
+        }
+    }
+
+    bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) {
+        RAPIDJSON_ASSERT(n <= m);
+        RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag));
+
+        if (n == 0) {
+            if (m == 0)                             // a{0} not support
+                return false;
+            else if (m == kInfinityQuantifier)
+                Eval(operandStack, kZeroOrMore);    // a{0,} -> a*
+            else {
+                Eval(operandStack, kZeroOrOne);         // a{0,5} -> a?
+                for (unsigned i = 0; i < m - 1; i++)
+                    CloneTopOperand(operandStack);      // a{0,5} -> a? a? a? a? a?
+                for (unsigned i = 0; i < m - 1; i++)
+                    Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a?
+            }
+            return true;
+        }
+
+        for (unsigned i = 0; i < n - 1; i++)        // a{3} -> a a a
+            CloneTopOperand(operandStack);
+
+        if (m == kInfinityQuantifier)
+            Eval(operandStack, kOneOrMore);         // a{3,} -> a a a+
+        else if (m > n) {
+            CloneTopOperand(operandStack);          // a{3,5} -> a a a a
+            Eval(operandStack, kZeroOrOne);         // a{3,5} -> a a a a?
+            for (unsigned i = n; i < m - 1; i++)
+                CloneTopOperand(operandStack);      // a{3,5} -> a a a a? a?
+            for (unsigned i = n; i < m; i++)
+                Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a?
+        }
+
+        for (unsigned i = 0; i < n - 1; i++)
+            Eval(operandStack, kConcatenation);     // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a?
+
+        return true;
+    }
+
+    static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; }
+
+    void CloneTopOperand(Stack<Allocator>& operandStack) {
+        const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation
+        SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_)
+        State* s = states_.template Push<State>(count);
+        memcpy(s, &GetState(src.minIndex), count * sizeof(State));
+        for (SizeType j = 0; j < count; j++) {
+            if (s[j].out != kRegexInvalidState)
+                s[j].out += count;
+            if (s[j].out1 != kRegexInvalidState)
+                s[j].out1 += count;
+        }
+        *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count);
+        stateCount_ += count;
+    }
+
+    template <typename InputStream>
+    bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) {
+        unsigned r = 0;
+        if (ds.Peek() < '0' || ds.Peek() > '9')
+            return false;
+        while (ds.Peek() >= '0' && ds.Peek() <= '9') {
+            if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295
+                return false; // overflow
+            r = r * 10 + (ds.Take() - '0');
+        }
+        *u = r;
+        return true;
+    }
+
+    template <typename InputStream>
+    bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) {
+        bool isBegin = true;
+        bool negate = false;
+        int step = 0;
+        SizeType start = kRegexInvalidRange;
+        SizeType current = kRegexInvalidRange;
+        unsigned codepoint;
+        while ((codepoint = ds.Take()) != 0) {
+            if (isBegin) {
+                isBegin = false;
+                if (codepoint == '^') {
+                    negate = true;
+                    continue;
+                }
+            }
+
+            switch (codepoint) {
+            case ']':
+                if (start == kRegexInvalidRange)
+                    return false;   // Error: nothing inside []
+                if (step == 2) { // Add trailing '-'
+                    SizeType r = NewRange('-');
+                    RAPIDJSON_ASSERT(current != kRegexInvalidRange);
+                    GetRange(current).next = r;
+                }
+                if (negate)
+                    GetRange(start).start |= kRangeNegationFlag;
+                *range = start;
+                return true;
+
+            case '\\':
+                if (ds.Peek() == 'b') {
+                    ds.Take();
+                    codepoint = 0x0008; // Escape backspace character
+                }
+                else if (!CharacterEscape(ds, &codepoint))
+                    return false;
+                // fall through to default
+
+            default:
+                switch (step) {
+                case 1:
+                    if (codepoint == '-') {
+                        step++;
+                        break;
+                    }
+                    // fall through to step 0 for other characters
+
+                case 0:
+                    {
+                        SizeType r = NewRange(codepoint);
+                        if (current != kRegexInvalidRange)
+                            GetRange(current).next = r;
+                        if (start == kRegexInvalidRange)
+                            start = r;
+                        current = r;
+                    }
+                    step = 1;
+                    break;
+
+                default:
+                    RAPIDJSON_ASSERT(step == 2);
+                    GetRange(current).end = codepoint;
+                    step = 0;
+                }
+            }
+        }
+        return false;
+    }
+    
+    SizeType NewRange(unsigned codepoint) {
+        Range* r = ranges_.template Push<Range>();
+        r->start = r->end = codepoint;
+        r->next = kRegexInvalidRange;
+        return rangeCount_++;
+    }
+
+    template <typename InputStream>
+    bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) {
+        unsigned codepoint;
+        switch (codepoint = ds.Take()) {
+            case '^':
+            case '$':
+            case '|':
+            case '(':
+            case ')':
+            case '?':
+            case '*':
+            case '+':
+            case '.':
+            case '[':
+            case ']':
+            case '{':
+            case '}':
+            case '\\':
+                *escapedCodepoint = codepoint; return true;
+            case 'f': *escapedCodepoint = 0x000C; return true;
+            case 'n': *escapedCodepoint = 0x000A; return true;
+            case 'r': *escapedCodepoint = 0x000D; return true;
+            case 't': *escapedCodepoint = 0x0009; return true;
+            case 'v': *escapedCodepoint = 0x000B; return true;
+            default:
+                return false; // Unsupported escape character
+        }
+    }
+
+    Allocator* ownAllocator_;
+    Allocator* allocator_;
+    Stack<Allocator> states_;
+    Stack<Allocator> ranges_;
+    SizeType root_;
+    SizeType stateCount_;
+    SizeType rangeCount_;
+
+    static const unsigned kInfinityQuantifier = ~0u;
+
+    // For SearchWithAnchoring()
+    bool anchorBegin_;
+    bool anchorEnd_;
+};
+
+template <typename RegexType, typename Allocator = CrtAllocator>
+class GenericRegexSearch {
+public:
+    typedef typename RegexType::EncodingType Encoding;
+    typedef typename Encoding::Ch Ch;
+
+    GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) : 
+        regex_(regex), allocator_(allocator), ownAllocator_(0),
+        state0_(allocator, 0), state1_(allocator, 0), stateSet_()
+    {
+        RAPIDJSON_ASSERT(regex_.IsValid());
+        if (!allocator_)
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+        stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize()));
+        state0_.template Reserve<SizeType>(regex_.stateCount_);
+        state1_.template Reserve<SizeType>(regex_.stateCount_);
+    }
+
+    ~GenericRegexSearch() {
+        Allocator::Free(stateSet_);
+        RAPIDJSON_DELETE(ownAllocator_);
+    }
+
+    template <typename InputStream>
+    bool Match(InputStream& is) {
+        return SearchWithAnchoring(is, true, true);
+    }
+
+    bool Match(const Ch* s) {
+        GenericStringStream<Encoding> is(s);
+        return Match(is);
+    }
+
+    template <typename InputStream>
+    bool Search(InputStream& is) {
+        return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_);
+    }
+
+    bool Search(const Ch* s) {
+        GenericStringStream<Encoding> is(s);
+        return Search(is);
+    }
+
+private:
+    typedef typename RegexType::State State;
+    typedef typename RegexType::Range Range;
+
+    template <typename InputStream>
+    bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) {
+        DecodedStream<InputStream, Encoding> ds(is);
+
+        state0_.Clear();
+        Stack<Allocator> *current = &state0_, *next = &state1_;
+        const size_t stateSetSize = GetStateSetSize();
+        std::memset(stateSet_, 0, stateSetSize);
+
+        bool matched = AddState(*current, regex_.root_);
+        unsigned codepoint;
+        while (!current->Empty() && (codepoint = ds.Take()) != 0) {
+            std::memset(stateSet_, 0, stateSetSize);
+            next->Clear();
+            matched = false;
+            for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
+                const State& sr = regex_.GetState(*s);
+                if (sr.codepoint == codepoint ||
+                    sr.codepoint == RegexType::kAnyCharacterClass || 
+                    (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint)))
+                {
+                    matched = AddState(*next, sr.out) || matched;
+                    if (!anchorEnd && matched)
+                        return true;
+                }
+                if (!anchorBegin)
+                    AddState(*next, regex_.root_);
+            }
+            internal::Swap(current, next);
+        }
+
+        return matched;
+    }
+
+    size_t GetStateSetSize() const {
+        return (regex_.stateCount_ + 31) / 32 * 4;
+    }
+
+    // Return whether the added states is a match state
+    bool AddState(Stack<Allocator>& l, SizeType index) {
+        RAPIDJSON_ASSERT(index != kRegexInvalidState);
+
+        const State& s = regex_.GetState(index);
+        if (s.out1 != kRegexInvalidState) { // Split
+            bool matched = AddState(l, s.out);
+            return AddState(l, s.out1) || matched;
+        }
+        else if (!(stateSet_[index >> 5] & (1u << (index & 31)))) {
+            stateSet_[index >> 5] |= (1u << (index & 31));
+            *l.template PushUnsafe<SizeType>() = index;
+        }
+        return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation.
+    }
+
+    bool MatchRange(SizeType rangeIndex, unsigned codepoint) const {
+        bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
+        while (rangeIndex != kRegexInvalidRange) {
+            const Range& r = regex_.GetRange(rangeIndex);
+            if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
+                return yes;
+            rangeIndex = r.next;
+        }
+        return !yes;
+    }
+
+    const RegexType& regex_;
+    Allocator* allocator_;
+    Allocator* ownAllocator_;
+    Stack<Allocator> state0_;
+    Stack<Allocator> state1_;
+    uint32_t* stateSet_;
+};
+
+typedef GenericRegex<UTF8<> > Regex;
+typedef GenericRegexSearch<Regex> RegexSearch;
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
+#if defined(__clang__) || defined(_MSC_VER)
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_INTERNAL_REGEX_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/stack.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/stack.h
new file mode 100644
index 000000000..89558d0da
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/stack.h
@@ -0,0 +1,231 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_INTERNAL_STACK_H_
+#define RAPIDJSON_INTERNAL_STACK_H_
+
+#include "../allocators.h"
+#include "swap.h"
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+///////////////////////////////////////////////////////////////////////////////
+// Stack
+
+//! A type-unsafe stack for storing different types of data.
+/*! \tparam Allocator Allocator for allocating stack memory.
+*/
+template <typename Allocator>
+class Stack {
+public:
+    // Optimization note: Do not allocate memory for stack_ in constructor.
+    // Do it lazily when first Push() -> Expand() -> Resize().
+    Stack(Allocator* allocator, size_t stackCapacity) : allocator_(allocator), ownAllocator_(0), stack_(0), stackTop_(0), stackEnd_(0), initialCapacity_(stackCapacity) {
+    }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    Stack(Stack&& rhs)
+        : allocator_(rhs.allocator_),
+          ownAllocator_(rhs.ownAllocator_),
+          stack_(rhs.stack_),
+          stackTop_(rhs.stackTop_),
+          stackEnd_(rhs.stackEnd_),
+          initialCapacity_(rhs.initialCapacity_)
+    {
+        rhs.allocator_ = 0;
+        rhs.ownAllocator_ = 0;
+        rhs.stack_ = 0;
+        rhs.stackTop_ = 0;
+        rhs.stackEnd_ = 0;
+        rhs.initialCapacity_ = 0;
+    }
+#endif
+
+    ~Stack() {
+        Destroy();
+    }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    Stack& operator=(Stack&& rhs) {
+        if (&rhs != this)
+        {
+            Destroy();
+
+            allocator_ = rhs.allocator_;
+            ownAllocator_ = rhs.ownAllocator_;
+            stack_ = rhs.stack_;
+            stackTop_ = rhs.stackTop_;
+            stackEnd_ = rhs.stackEnd_;
+            initialCapacity_ = rhs.initialCapacity_;
+
+            rhs.allocator_ = 0;
+            rhs.ownAllocator_ = 0;
+            rhs.stack_ = 0;
+            rhs.stackTop_ = 0;
+            rhs.stackEnd_ = 0;
+            rhs.initialCapacity_ = 0;
+        }
+        return *this;
+    }
+#endif
+
+    void Swap(Stack& rhs) RAPIDJSON_NOEXCEPT {
+        internal::Swap(allocator_, rhs.allocator_);
+        internal::Swap(ownAllocator_, rhs.ownAllocator_);
+        internal::Swap(stack_, rhs.stack_);
+        internal::Swap(stackTop_, rhs.stackTop_);
+        internal::Swap(stackEnd_, rhs.stackEnd_);
+        internal::Swap(initialCapacity_, rhs.initialCapacity_);
+    }
+
+    void Clear() { stackTop_ = stack_; }
+
+    void ShrinkToFit() { 
+        if (Empty()) {
+            // If the stack is empty, completely deallocate the memory.
+            Allocator::Free(stack_); // NOLINT (+clang-analyzer-unix.Malloc)
+            stack_ = 0;
+            stackTop_ = 0;
+            stackEnd_ = 0;
+        }
+        else
+            Resize(GetSize());
+    }
+
+    // Optimization note: try to minimize the size of this function for force inline.
+    // Expansion is run very infrequently, so it is moved to another (probably non-inline) function.
+    template<typename T>
+    RAPIDJSON_FORCEINLINE void Reserve(size_t count = 1) {
+         // Expand the stack if needed
+        if (RAPIDJSON_UNLIKELY(stackTop_ + sizeof(T) * count > stackEnd_))
+            Expand<T>(count);
+    }
+
+    template<typename T>
+    RAPIDJSON_FORCEINLINE T* Push(size_t count = 1) {
+        Reserve<T>(count);
+        return PushUnsafe<T>(count);
+    }
+
+    template<typename T>
+    RAPIDJSON_FORCEINLINE T* PushUnsafe(size_t count = 1) {
+        RAPIDJSON_ASSERT(stackTop_);
+        RAPIDJSON_ASSERT(stackTop_ + sizeof(T) * count <= stackEnd_);
+        T* ret = reinterpret_cast<T*>(stackTop_);
+        stackTop_ += sizeof(T) * count;
+        return ret;
+    }
+
+    template<typename T>
+    T* Pop(size_t count) {
+        RAPIDJSON_ASSERT(GetSize() >= count * sizeof(T));
+        stackTop_ -= count * sizeof(T);
+        return reinterpret_cast<T*>(stackTop_);
+    }
+
+    template<typename T>
+    T* Top() { 
+        RAPIDJSON_ASSERT(GetSize() >= sizeof(T));
+        return reinterpret_cast<T*>(stackTop_ - sizeof(T));
+    }
+
+    template<typename T>
+    const T* Top() const {
+        RAPIDJSON_ASSERT(GetSize() >= sizeof(T));
+        return reinterpret_cast<T*>(stackTop_ - sizeof(T));
+    }
+
+    template<typename T>
+    T* End() { return reinterpret_cast<T*>(stackTop_); }
+
+    template<typename T>
+    const T* End() const { return reinterpret_cast<T*>(stackTop_); }
+
+    template<typename T>
+    T* Bottom() { return reinterpret_cast<T*>(stack_); }
+
+    template<typename T>
+    const T* Bottom() const { return reinterpret_cast<T*>(stack_); }
+
+    bool HasAllocator() const {
+        return allocator_ != 0;
+    }
+
+    Allocator& GetAllocator() {
+        RAPIDJSON_ASSERT(allocator_);
+        return *allocator_;
+    }
+
+    bool Empty() const { return stackTop_ == stack_; }
+    size_t GetSize() const { return static_cast<size_t>(stackTop_ - stack_); }
+    size_t GetCapacity() const { return static_cast<size_t>(stackEnd_ - stack_); }
+
+private:
+    template<typename T>
+    void Expand(size_t count) {
+        // Only expand the capacity if the current stack exists. Otherwise just create a stack with initial capacity.
+        size_t newCapacity;
+        if (stack_ == 0) {
+            if (!allocator_)
+                ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+            newCapacity = initialCapacity_;
+        } else {
+            newCapacity = GetCapacity();
+            newCapacity += (newCapacity + 1) / 2;
+        }
+        size_t newSize = GetSize() + sizeof(T) * count;
+        if (newCapacity < newSize)
+            newCapacity = newSize;
+
+        Resize(newCapacity);
+    }
+
+    void Resize(size_t newCapacity) {
+        const size_t size = GetSize();  // Backup the current size
+        stack_ = static_cast<char*>(allocator_->Realloc(stack_, GetCapacity(), newCapacity));
+        stackTop_ = stack_ + size;
+        stackEnd_ = stack_ + newCapacity;
+    }
+
+    void Destroy() {
+        Allocator::Free(stack_);
+        RAPIDJSON_DELETE(ownAllocator_); // Only delete if it is owned by the stack
+    }
+
+    // Prohibit copy constructor & assignment operator.
+    Stack(const Stack&);
+    Stack& operator=(const Stack&);
+
+    Allocator* allocator_;
+    Allocator* ownAllocator_;
+    char *stack_;
+    char *stackTop_;
+    char *stackEnd_;
+    size_t initialCapacity_;
+};
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_STACK_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/strfunc.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/strfunc.h
new file mode 100644
index 000000000..226439a76
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/strfunc.h
@@ -0,0 +1,69 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_INTERNAL_STRFUNC_H_
+#define RAPIDJSON_INTERNAL_STRFUNC_H_
+
+#include "../stream.h"
+#include <cwchar>
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+//! Custom strlen() which works on different character types.
+/*! \tparam Ch Character type (e.g. char, wchar_t, short)
+    \param s Null-terminated input string.
+    \return Number of characters in the string. 
+    \note This has the same semantics as strlen(), the return value is not number of Unicode codepoints.
+*/
+template <typename Ch>
+inline SizeType StrLen(const Ch* s) {
+    RAPIDJSON_ASSERT(s != 0);
+    const Ch* p = s;
+    while (*p) ++p;
+    return SizeType(p - s);
+}
+
+template <>
+inline SizeType StrLen(const char* s) {
+    return SizeType(std::strlen(s));
+}
+
+template <>
+inline SizeType StrLen(const wchar_t* s) {
+    return SizeType(std::wcslen(s));
+}
+
+//! Returns number of code points in a encoded string.
+template<typename Encoding>
+bool CountStringCodePoint(const typename Encoding::Ch* s, SizeType length, SizeType* outCount) {
+    RAPIDJSON_ASSERT(s != 0);
+    RAPIDJSON_ASSERT(outCount != 0);
+    GenericStringStream<Encoding> is(s);
+    const typename Encoding::Ch* end = s + length;
+    SizeType count = 0;
+    while (is.src_ < end) {
+        unsigned codepoint;
+        if (!Encoding::Decode(is, &codepoint))
+            return false;
+        count++;
+    }
+    *outCount = count;
+    return true;
+}
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_INTERNAL_STRFUNC_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/strtod.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/strtod.h
new file mode 100644
index 000000000..dfca22b65
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/strtod.h
@@ -0,0 +1,290 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_STRTOD_
+#define RAPIDJSON_STRTOD_
+
+#include "ieee754.h"
+#include "biginteger.h"
+#include "diyfp.h"
+#include "pow10.h"
+#include <climits>
+#include <limits>
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+inline double FastPath(double significand, int exp) {
+    if (exp < -308)
+        return 0.0;
+    else if (exp >= 0)
+        return significand * internal::Pow10(exp);
+    else
+        return significand / internal::Pow10(-exp);
+}
+
+inline double StrtodNormalPrecision(double d, int p) {
+    if (p < -308) {
+        // Prevent expSum < -308, making Pow10(p) = 0
+        d = FastPath(d, -308);
+        d = FastPath(d, p + 308);
+    }
+    else
+        d = FastPath(d, p);
+    return d;
+}
+
+template <typename T>
+inline T Min3(T a, T b, T c) {
+    T m = a;
+    if (m > b) m = b;
+    if (m > c) m = c;
+    return m;
+}
+
+inline int CheckWithinHalfULP(double b, const BigInteger& d, int dExp) {
+    const Double db(b);
+    const uint64_t bInt = db.IntegerSignificand();
+    const int bExp = db.IntegerExponent();
+    const int hExp = bExp - 1;
+
+    int dS_Exp2 = 0, dS_Exp5 = 0, bS_Exp2 = 0, bS_Exp5 = 0, hS_Exp2 = 0, hS_Exp5 = 0;
+
+    // Adjust for decimal exponent
+    if (dExp >= 0) {
+        dS_Exp2 += dExp;
+        dS_Exp5 += dExp;
+    }
+    else {
+        bS_Exp2 -= dExp;
+        bS_Exp5 -= dExp;
+        hS_Exp2 -= dExp;
+        hS_Exp5 -= dExp;
+    }
+
+    // Adjust for binary exponent
+    if (bExp >= 0)
+        bS_Exp2 += bExp;
+    else {
+        dS_Exp2 -= bExp;
+        hS_Exp2 -= bExp;
+    }
+
+    // Adjust for half ulp exponent
+    if (hExp >= 0)
+        hS_Exp2 += hExp;
+    else {
+        dS_Exp2 -= hExp;
+        bS_Exp2 -= hExp;
+    }
+
+    // Remove common power of two factor from all three scaled values
+    int common_Exp2 = Min3(dS_Exp2, bS_Exp2, hS_Exp2);
+    dS_Exp2 -= common_Exp2;
+    bS_Exp2 -= common_Exp2;
+    hS_Exp2 -= common_Exp2;
+
+    BigInteger dS = d;
+    dS.MultiplyPow5(static_cast<unsigned>(dS_Exp5)) <<= static_cast<unsigned>(dS_Exp2);
+
+    BigInteger bS(bInt);
+    bS.MultiplyPow5(static_cast<unsigned>(bS_Exp5)) <<= static_cast<unsigned>(bS_Exp2);
+
+    BigInteger hS(1);
+    hS.MultiplyPow5(static_cast<unsigned>(hS_Exp5)) <<= static_cast<unsigned>(hS_Exp2);
+
+    BigInteger delta(0);
+    dS.Difference(bS, &delta);
+
+    return delta.Compare(hS);
+}
+
+inline bool StrtodFast(double d, int p, double* result) {
+    // Use fast path for string-to-double conversion if possible
+    // see http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
+    if (p > 22  && p < 22 + 16) {
+        // Fast Path Cases In Disguise
+        d *= internal::Pow10(p - 22);
+        p = 22;
+    }
+
+    if (p >= -22 && p <= 22 && d <= 9007199254740991.0) { // 2^53 - 1
+        *result = FastPath(d, p);
+        return true;
+    }
+    else
+        return false;
+}
+
+// Compute an approximation and see if it is within 1/2 ULP
+inline bool StrtodDiyFp(const char* decimals, int dLen, int dExp, double* result) {
+    uint64_t significand = 0;
+    int i = 0;   // 2^64 - 1 = 18446744073709551615, 1844674407370955161 = 0x1999999999999999    
+    for (; i < dLen; i++) {
+        if (significand  >  RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) ||
+            (significand == RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) && decimals[i] > '5'))
+            break;
+        significand = significand * 10u + static_cast<unsigned>(decimals[i] - '0');
+    }
+    
+    if (i < dLen && decimals[i] >= '5') // Rounding
+        significand++;
+
+    int remaining = dLen - i;
+    const int kUlpShift = 3;
+    const int kUlp = 1 << kUlpShift;
+    int64_t error = (remaining == 0) ? 0 : kUlp / 2;
+
+    DiyFp v(significand, 0);
+    v = v.Normalize();
+    error <<= -v.e;
+
+    dExp += remaining;
+
+    int actualExp;
+    DiyFp cachedPower = GetCachedPower10(dExp, &actualExp);
+    if (actualExp != dExp) {
+        static const DiyFp kPow10[] = {
+            DiyFp(RAPIDJSON_UINT64_C2(0xa0000000, 0x00000000), -60),  // 10^1
+            DiyFp(RAPIDJSON_UINT64_C2(0xc8000000, 0x00000000), -57),  // 10^2
+            DiyFp(RAPIDJSON_UINT64_C2(0xfa000000, 0x00000000), -54),  // 10^3
+            DiyFp(RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), -50),  // 10^4
+            DiyFp(RAPIDJSON_UINT64_C2(0xc3500000, 0x00000000), -47),  // 10^5
+            DiyFp(RAPIDJSON_UINT64_C2(0xf4240000, 0x00000000), -44),  // 10^6
+            DiyFp(RAPIDJSON_UINT64_C2(0x98968000, 0x00000000), -40)   // 10^7
+        };
+        int adjustment = dExp - actualExp;
+        RAPIDJSON_ASSERT(adjustment >= 1 && adjustment < 8);
+        v = v * kPow10[adjustment - 1];
+        if (dLen + adjustment > 19) // has more digits than decimal digits in 64-bit
+            error += kUlp / 2;
+    }
+
+    v = v * cachedPower;
+
+    error += kUlp + (error == 0 ? 0 : 1);
+
+    const int oldExp = v.e;
+    v = v.Normalize();
+    error <<= oldExp - v.e;
+
+    const int effectiveSignificandSize = Double::EffectiveSignificandSize(64 + v.e);
+    int precisionSize = 64 - effectiveSignificandSize;
+    if (precisionSize + kUlpShift >= 64) {
+        int scaleExp = (precisionSize + kUlpShift) - 63;
+        v.f >>= scaleExp;
+        v.e += scaleExp; 
+        error = (error >> scaleExp) + 1 + kUlp;
+        precisionSize -= scaleExp;
+    }
+
+    DiyFp rounded(v.f >> precisionSize, v.e + precisionSize);
+    const uint64_t precisionBits = (v.f & ((uint64_t(1) << precisionSize) - 1)) * kUlp;
+    const uint64_t halfWay = (uint64_t(1) << (precisionSize - 1)) * kUlp;
+    if (precisionBits >= halfWay + static_cast<unsigned>(error)) {
+        rounded.f++;
+        if (rounded.f & (DiyFp::kDpHiddenBit << 1)) { // rounding overflows mantissa (issue #340)
+            rounded.f >>= 1;
+            rounded.e++;
+        }
+    }
+
+    *result = rounded.ToDouble();
+
+    return halfWay - static_cast<unsigned>(error) >= precisionBits || precisionBits >= halfWay + static_cast<unsigned>(error);
+}
+
+inline double StrtodBigInteger(double approx, const char* decimals, int dLen, int dExp) {
+    RAPIDJSON_ASSERT(dLen >= 0);
+    const BigInteger dInt(decimals, static_cast<unsigned>(dLen));
+    Double a(approx);
+    int cmp = CheckWithinHalfULP(a.Value(), dInt, dExp);
+    if (cmp < 0)
+        return a.Value();  // within half ULP
+    else if (cmp == 0) {
+        // Round towards even
+        if (a.Significand() & 1)
+            return a.NextPositiveDouble();
+        else
+            return a.Value();
+    }
+    else // adjustment
+        return a.NextPositiveDouble();
+}
+
+inline double StrtodFullPrecision(double d, int p, const char* decimals, size_t length, size_t decimalPosition, int exp) {
+    RAPIDJSON_ASSERT(d >= 0.0);
+    RAPIDJSON_ASSERT(length >= 1);
+
+    double result = 0.0;
+    if (StrtodFast(d, p, &result))
+        return result;
+
+    RAPIDJSON_ASSERT(length <= INT_MAX);
+    int dLen = static_cast<int>(length);
+
+    RAPIDJSON_ASSERT(length >= decimalPosition);
+    RAPIDJSON_ASSERT(length - decimalPosition <= INT_MAX);
+    int dExpAdjust = static_cast<int>(length - decimalPosition);
+
+    RAPIDJSON_ASSERT(exp >= INT_MIN + dExpAdjust);
+    int dExp = exp - dExpAdjust;
+
+    // Make sure length+dExp does not overflow
+    RAPIDJSON_ASSERT(dExp <= INT_MAX - dLen);
+
+    // Trim leading zeros
+    while (dLen > 0 && *decimals == '0') {
+        dLen--;
+        decimals++;
+    }
+
+    // Trim trailing zeros
+    while (dLen > 0 && decimals[dLen - 1] == '0') {
+        dLen--;
+        dExp++;
+    }
+
+    if (dLen == 0) { // Buffer only contains zeros.
+        return 0.0;
+    }
+
+    // Trim right-most digits
+    const int kMaxDecimalDigit = 767 + 1;
+    if (dLen > kMaxDecimalDigit) {
+        dExp += dLen - kMaxDecimalDigit;
+        dLen = kMaxDecimalDigit;
+    }
+
+    // If too small, underflow to zero.
+    // Any x <= 10^-324 is interpreted as zero.
+    if (dLen + dExp <= -324)
+        return 0.0;
+
+    // If too large, overflow to infinity.
+    // Any x >= 10^309 is interpreted as +infinity.
+    if (dLen + dExp > 309)
+        return std::numeric_limits<double>::infinity();
+
+    if (StrtodDiyFp(decimals, dLen, dExp, &result))
+        return result;
+
+    // Use approximation from StrtodDiyFp and make adjustment with BigInteger comparison
+    return StrtodBigInteger(result, decimals, dLen, dExp);
+}
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_STRTOD_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/swap.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/swap.h
new file mode 100644
index 000000000..666e49f97
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/internal/swap.h
@@ -0,0 +1,46 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_INTERNAL_SWAP_H_
+#define RAPIDJSON_INTERNAL_SWAP_H_
+
+#include "../rapidjson.h"
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+namespace internal {
+
+//! Custom swap() to avoid dependency on C++ <algorithm> header
+/*! \tparam T Type of the arguments to swap, should be instantiated with primitive C++ types only.
+    \note This has the same semantics as std::swap().
+*/
+template <typename T>
+inline void Swap(T& a, T& b) RAPIDJSON_NOEXCEPT {
+    T tmp = a;
+        a = b;
+        b = tmp;
+}
+
+} // namespace internal
+RAPIDJSON_NAMESPACE_END
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_INTERNAL_SWAP_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/istreamwrapper.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/istreamwrapper.h
new file mode 100644
index 000000000..5f816982e
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/istreamwrapper.h
@@ -0,0 +1,113 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_ISTREAMWRAPPER_H_
+#define RAPIDJSON_ISTREAMWRAPPER_H_
+
+#include "stream.h"
+#include <iosfwd>
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#elif defined(_MSC_VER)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4351) // new behavior: elements of array 'array' will be default initialized
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Wrapper of \c std::basic_istream into RapidJSON's Stream concept.
+/*!
+    The classes can be wrapped including but not limited to:
+
+    - \c std::istringstream
+    - \c std::stringstream
+    - \c std::wistringstream
+    - \c std::wstringstream
+    - \c std::ifstream
+    - \c std::fstream
+    - \c std::wifstream
+    - \c std::wfstream
+
+    \tparam StreamType Class derived from \c std::basic_istream.
+*/
+   
+template <typename StreamType>
+class BasicIStreamWrapper {
+public:
+    typedef typename StreamType::char_type Ch;
+    BasicIStreamWrapper(StreamType& stream) : stream_(stream), count_(), peekBuffer_() {}
+
+    Ch Peek() const { 
+        typename StreamType::int_type c = stream_.peek();
+        return RAPIDJSON_LIKELY(c != StreamType::traits_type::eof()) ? static_cast<Ch>(c) : static_cast<Ch>('\0');
+    }
+
+    Ch Take() { 
+        typename StreamType::int_type c = stream_.get();
+        if (RAPIDJSON_LIKELY(c != StreamType::traits_type::eof())) {
+            count_++;
+            return static_cast<Ch>(c);
+        }
+        else
+            return '\0';
+    }
+
+    // tellg() may return -1 when failed. So we count by ourself.
+    size_t Tell() const { return count_; }
+
+    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    void Put(Ch) { RAPIDJSON_ASSERT(false); }
+    void Flush() { RAPIDJSON_ASSERT(false); }
+    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+    // For encoding detection only.
+    const Ch* Peek4() const {
+        RAPIDJSON_ASSERT(sizeof(Ch) == 1); // Only usable for byte stream.
+        int i;
+        bool hasError = false;
+        for (i = 0; i < 4; ++i) {
+            typename StreamType::int_type c = stream_.get();
+            if (c == StreamType::traits_type::eof()) {
+                hasError = true;
+                stream_.clear();
+                break;
+            }
+            peekBuffer_[i] = static_cast<Ch>(c);
+        }
+        for (--i; i >= 0; --i)
+            stream_.putback(peekBuffer_[i]);
+        return !hasError ? peekBuffer_ : 0;
+    }
+
+private:
+    BasicIStreamWrapper(const BasicIStreamWrapper&);
+    BasicIStreamWrapper& operator=(const BasicIStreamWrapper&);
+
+    StreamType& stream_;
+    size_t count_;  //!< Number of characters read. Note:
+    mutable Ch peekBuffer_[4];
+};
+
+typedef BasicIStreamWrapper<std::istream> IStreamWrapper;
+typedef BasicIStreamWrapper<std::wistream> WIStreamWrapper;
+
+#if defined(__clang__) || defined(_MSC_VER)
+RAPIDJSON_DIAG_POP
+#endif
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_ISTREAMWRAPPER_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/memorybuffer.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/memorybuffer.h
new file mode 100644
index 000000000..39bee1dec
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/memorybuffer.h
@@ -0,0 +1,70 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_MEMORYBUFFER_H_
+#define RAPIDJSON_MEMORYBUFFER_H_
+
+#include "stream.h"
+#include "internal/stack.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Represents an in-memory output byte stream.
+/*!
+    This class is mainly for being wrapped by EncodedOutputStream or AutoUTFOutputStream.
+
+    It is similar to FileWriteBuffer but the destination is an in-memory buffer instead of a file.
+
+    Differences between MemoryBuffer and StringBuffer:
+    1. StringBuffer has Encoding but MemoryBuffer is only a byte buffer. 
+    2. StringBuffer::GetString() returns a null-terminated string. MemoryBuffer::GetBuffer() returns a buffer without terminator.
+
+    \tparam Allocator type for allocating memory buffer.
+    \note implements Stream concept
+*/
+template <typename Allocator = CrtAllocator>
+struct GenericMemoryBuffer {
+    typedef char Ch; // byte
+
+    GenericMemoryBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {}
+
+    void Put(Ch c) { *stack_.template Push<Ch>() = c; }
+    void Flush() {}
+
+    void Clear() { stack_.Clear(); }
+    void ShrinkToFit() { stack_.ShrinkToFit(); }
+    Ch* Push(size_t count) { return stack_.template Push<Ch>(count); }
+    void Pop(size_t count) { stack_.template Pop<Ch>(count); }
+
+    const Ch* GetBuffer() const {
+        return stack_.template Bottom<Ch>();
+    }
+
+    size_t GetSize() const { return stack_.GetSize(); }
+
+    static const size_t kDefaultCapacity = 256;
+    mutable internal::Stack<Allocator> stack_;
+};
+
+typedef GenericMemoryBuffer<> MemoryBuffer;
+
+//! Implement specialized version of PutN() with memset() for better performance.
+template<>
+inline void PutN(MemoryBuffer& memoryBuffer, char c, size_t n) {
+    std::memset(memoryBuffer.stack_.Push<char>(n), c, n * sizeof(c));
+}
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_MEMORYBUFFER_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/memorystream.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/memorystream.h
new file mode 100644
index 000000000..1d71d8a4f
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/memorystream.h
@@ -0,0 +1,71 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_MEMORYSTREAM_H_
+#define RAPIDJSON_MEMORYSTREAM_H_
+
+#include "stream.h"
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(unreachable-code)
+RAPIDJSON_DIAG_OFF(missing-noreturn)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Represents an in-memory input byte stream.
+/*!
+    This class is mainly for being wrapped by EncodedInputStream or AutoUTFInputStream.
+
+    It is similar to FileReadBuffer but the source is an in-memory buffer instead of a file.
+
+    Differences between MemoryStream and StringStream:
+    1. StringStream has encoding but MemoryStream is a byte stream.
+    2. MemoryStream needs size of the source buffer and the buffer don't need to be null terminated. StringStream assume null-terminated string as source.
+    3. MemoryStream supports Peek4() for encoding detection. StringStream is specified with an encoding so it should not have Peek4().
+    \note implements Stream concept
+*/
+struct MemoryStream {
+    typedef char Ch; // byte
+
+    MemoryStream(const Ch *src, size_t size) : src_(src), begin_(src), end_(src + size), size_(size) {}
+
+    Ch Peek() const { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_; }
+    Ch Take() { return RAPIDJSON_UNLIKELY(src_ == end_) ? '\0' : *src_++; }
+    size_t Tell() const { return static_cast<size_t>(src_ - begin_); }
+
+    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    void Put(Ch) { RAPIDJSON_ASSERT(false); }
+    void Flush() { RAPIDJSON_ASSERT(false); }
+    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+    // For encoding detection only.
+    const Ch* Peek4() const {
+        return Tell() + 4 <= size_ ? src_ : 0;
+    }
+
+    const Ch* src_;     //!< Current read position.
+    const Ch* begin_;   //!< Original head of the string.
+    const Ch* end_;     //!< End of stream.
+    size_t size_;       //!< Size of the stream.
+};
+
+RAPIDJSON_NAMESPACE_END
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_MEMORYBUFFER_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/msinttypes/inttypes.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/msinttypes/inttypes.h
new file mode 100644
index 000000000..18111286b
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/msinttypes/inttypes.h
@@ -0,0 +1,316 @@
+// ISO C9x  compliant inttypes.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006-2013 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. Neither the name of the product nor the names of its contributors may
+//      be used to endorse or promote products derived from this software
+//      without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+// The above software in this distribution may have been modified by 
+// THL A29 Limited ("Tencent Modifications"). 
+// All Tencent Modifications are Copyright (C) 2015 THL A29 Limited.
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_INTTYPES_H_ // [
+#define _MSC_INTTYPES_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include "stdint.h"
+
+// miloyip: VC supports inttypes.h since VC2013
+#if _MSC_VER >= 1800
+#include <inttypes.h>
+#else
+
+// 7.8 Format conversion of integer types
+
+typedef struct {
+   intmax_t quot;
+   intmax_t rem;
+} imaxdiv_t;
+
+// 7.8.1 Macros for format specifiers
+
+#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [   See footnote 185 at page 198
+
+// The fprintf macros for signed integers are:
+#define PRId8       "d"
+#define PRIi8       "i"
+#define PRIdLEAST8  "d"
+#define PRIiLEAST8  "i"
+#define PRIdFAST8   "d"
+#define PRIiFAST8   "i"
+
+#define PRId16       "hd"
+#define PRIi16       "hi"
+#define PRIdLEAST16  "hd"
+#define PRIiLEAST16  "hi"
+#define PRIdFAST16   "hd"
+#define PRIiFAST16   "hi"
+
+#define PRId32       "I32d"
+#define PRIi32       "I32i"
+#define PRIdLEAST32  "I32d"
+#define PRIiLEAST32  "I32i"
+#define PRIdFAST32   "I32d"
+#define PRIiFAST32   "I32i"
+
+#define PRId64       "I64d"
+#define PRIi64       "I64i"
+#define PRIdLEAST64  "I64d"
+#define PRIiLEAST64  "I64i"
+#define PRIdFAST64   "I64d"
+#define PRIiFAST64   "I64i"
+
+#define PRIdMAX     "I64d"
+#define PRIiMAX     "I64i"
+
+#define PRIdPTR     "Id"
+#define PRIiPTR     "Ii"
+
+// The fprintf macros for unsigned integers are:
+#define PRIo8       "o"
+#define PRIu8       "u"
+#define PRIx8       "x"
+#define PRIX8       "X"
+#define PRIoLEAST8  "o"
+#define PRIuLEAST8  "u"
+#define PRIxLEAST8  "x"
+#define PRIXLEAST8  "X"
+#define PRIoFAST8   "o"
+#define PRIuFAST8   "u"
+#define PRIxFAST8   "x"
+#define PRIXFAST8   "X"
+
+#define PRIo16       "ho"
+#define PRIu16       "hu"
+#define PRIx16       "hx"
+#define PRIX16       "hX"
+#define PRIoLEAST16  "ho"
+#define PRIuLEAST16  "hu"
+#define PRIxLEAST16  "hx"
+#define PRIXLEAST16  "hX"
+#define PRIoFAST16   "ho"
+#define PRIuFAST16   "hu"
+#define PRIxFAST16   "hx"
+#define PRIXFAST16   "hX"
+
+#define PRIo32       "I32o"
+#define PRIu32       "I32u"
+#define PRIx32       "I32x"
+#define PRIX32       "I32X"
+#define PRIoLEAST32  "I32o"
+#define PRIuLEAST32  "I32u"
+#define PRIxLEAST32  "I32x"
+#define PRIXLEAST32  "I32X"
+#define PRIoFAST32   "I32o"
+#define PRIuFAST32   "I32u"
+#define PRIxFAST32   "I32x"
+#define PRIXFAST32   "I32X"
+
+#define PRIo64       "I64o"
+#define PRIu64       "I64u"
+#define PRIx64       "I64x"
+#define PRIX64       "I64X"
+#define PRIoLEAST64  "I64o"
+#define PRIuLEAST64  "I64u"
+#define PRIxLEAST64  "I64x"
+#define PRIXLEAST64  "I64X"
+#define PRIoFAST64   "I64o"
+#define PRIuFAST64   "I64u"
+#define PRIxFAST64   "I64x"
+#define PRIXFAST64   "I64X"
+
+#define PRIoMAX     "I64o"
+#define PRIuMAX     "I64u"
+#define PRIxMAX     "I64x"
+#define PRIXMAX     "I64X"
+
+#define PRIoPTR     "Io"
+#define PRIuPTR     "Iu"
+#define PRIxPTR     "Ix"
+#define PRIXPTR     "IX"
+
+// The fscanf macros for signed integers are:
+#define SCNd8       "d"
+#define SCNi8       "i"
+#define SCNdLEAST8  "d"
+#define SCNiLEAST8  "i"
+#define SCNdFAST8   "d"
+#define SCNiFAST8   "i"
+
+#define SCNd16       "hd"
+#define SCNi16       "hi"
+#define SCNdLEAST16  "hd"
+#define SCNiLEAST16  "hi"
+#define SCNdFAST16   "hd"
+#define SCNiFAST16   "hi"
+
+#define SCNd32       "ld"
+#define SCNi32       "li"
+#define SCNdLEAST32  "ld"
+#define SCNiLEAST32  "li"
+#define SCNdFAST32   "ld"
+#define SCNiFAST32   "li"
+
+#define SCNd64       "I64d"
+#define SCNi64       "I64i"
+#define SCNdLEAST64  "I64d"
+#define SCNiLEAST64  "I64i"
+#define SCNdFAST64   "I64d"
+#define SCNiFAST64   "I64i"
+
+#define SCNdMAX     "I64d"
+#define SCNiMAX     "I64i"
+
+#ifdef _WIN64 // [
+#  define SCNdPTR     "I64d"
+#  define SCNiPTR     "I64i"
+#else  // _WIN64 ][
+#  define SCNdPTR     "ld"
+#  define SCNiPTR     "li"
+#endif  // _WIN64 ]
+
+// The fscanf macros for unsigned integers are:
+#define SCNo8       "o"
+#define SCNu8       "u"
+#define SCNx8       "x"
+#define SCNX8       "X"
+#define SCNoLEAST8  "o"
+#define SCNuLEAST8  "u"
+#define SCNxLEAST8  "x"
+#define SCNXLEAST8  "X"
+#define SCNoFAST8   "o"
+#define SCNuFAST8   "u"
+#define SCNxFAST8   "x"
+#define SCNXFAST8   "X"
+
+#define SCNo16       "ho"
+#define SCNu16       "hu"
+#define SCNx16       "hx"
+#define SCNX16       "hX"
+#define SCNoLEAST16  "ho"
+#define SCNuLEAST16  "hu"
+#define SCNxLEAST16  "hx"
+#define SCNXLEAST16  "hX"
+#define SCNoFAST16   "ho"
+#define SCNuFAST16   "hu"
+#define SCNxFAST16   "hx"
+#define SCNXFAST16   "hX"
+
+#define SCNo32       "lo"
+#define SCNu32       "lu"
+#define SCNx32       "lx"
+#define SCNX32       "lX"
+#define SCNoLEAST32  "lo"
+#define SCNuLEAST32  "lu"
+#define SCNxLEAST32  "lx"
+#define SCNXLEAST32  "lX"
+#define SCNoFAST32   "lo"
+#define SCNuFAST32   "lu"
+#define SCNxFAST32   "lx"
+#define SCNXFAST32   "lX"
+
+#define SCNo64       "I64o"
+#define SCNu64       "I64u"
+#define SCNx64       "I64x"
+#define SCNX64       "I64X"
+#define SCNoLEAST64  "I64o"
+#define SCNuLEAST64  "I64u"
+#define SCNxLEAST64  "I64x"
+#define SCNXLEAST64  "I64X"
+#define SCNoFAST64   "I64o"
+#define SCNuFAST64   "I64u"
+#define SCNxFAST64   "I64x"
+#define SCNXFAST64   "I64X"
+
+#define SCNoMAX     "I64o"
+#define SCNuMAX     "I64u"
+#define SCNxMAX     "I64x"
+#define SCNXMAX     "I64X"
+
+#ifdef _WIN64 // [
+#  define SCNoPTR     "I64o"
+#  define SCNuPTR     "I64u"
+#  define SCNxPTR     "I64x"
+#  define SCNXPTR     "I64X"
+#else  // _WIN64 ][
+#  define SCNoPTR     "lo"
+#  define SCNuPTR     "lu"
+#  define SCNxPTR     "lx"
+#  define SCNXPTR     "lX"
+#endif  // _WIN64 ]
+
+#endif // __STDC_FORMAT_MACROS ]
+
+// 7.8.2 Functions for greatest-width integer types
+
+// 7.8.2.1 The imaxabs function
+#define imaxabs _abs64
+
+// 7.8.2.2 The imaxdiv function
+
+// This is modified version of div() function from Microsoft's div.c found
+// in %MSVC.NET%\crt\src\div.c
+#ifdef STATIC_IMAXDIV // [
+static
+#else // STATIC_IMAXDIV ][
+_inline
+#endif // STATIC_IMAXDIV ]
+imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
+{
+   imaxdiv_t result;
+
+   result.quot = numer / denom;
+   result.rem = numer % denom;
+
+   if (numer < 0 && result.rem > 0) {
+      // did division wrong; must fix up
+      ++result.quot;
+      result.rem -= denom;
+   }
+
+   return result;
+}
+
+// 7.8.2.3 The strtoimax and strtoumax functions
+#define strtoimax _strtoi64
+#define strtoumax _strtoui64
+
+// 7.8.2.4 The wcstoimax and wcstoumax functions
+#define wcstoimax _wcstoi64
+#define wcstoumax _wcstoui64
+
+#endif // _MSC_VER >= 1800
+
+#endif // _MSC_INTTYPES_H_ ]
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/msinttypes/stdint.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/msinttypes/stdint.h
new file mode 100644
index 000000000..3d4477b9a
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/msinttypes/stdint.h
@@ -0,0 +1,300 @@
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006-2013 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. Neither the name of the product nor the names of its contributors may
+//      be used to endorse or promote products derived from this software
+//      without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+// The above software in this distribution may have been modified by 
+// THL A29 Limited ("Tencent Modifications"). 
+// All Tencent Modifications are Copyright (C) 2015 THL A29 Limited.
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+// miloyip: Originally Visual Studio 2010 uses its own stdint.h. However it generates warning with INT64_C(), so change to use this file for vs2010.
+#if _MSC_VER >= 1600 // [
+#include <stdint.h>
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+#undef INT8_C
+#undef INT16_C
+#undef INT32_C
+#undef INT64_C
+#undef UINT8_C
+#undef UINT16_C
+#undef UINT32_C
+#undef UINT64_C
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
+// Check out Issue 9 for the details.
+#ifndef INTMAX_C //   [
+#  define INTMAX_C   INT64_C
+#endif // INTMAX_C    ]
+#ifndef UINTMAX_C //  [
+#  define UINTMAX_C  UINT64_C
+#endif // UINTMAX_C   ]
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+#else // ] _MSC_VER >= 1700 [
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
+// compiling for ARM we have to wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler would give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#if defined(__cplusplus) && !defined(_M_ARM)
+extern "C" {
+#endif
+#  include <wchar.h>
+#if defined(__cplusplus) && !defined(_M_ARM)
+}
+#endif
+
+// Define _W64 macros to mark types changing their size, like intptr_t.
+#ifndef _W64
+#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
+#     define _W64 __w64
+#  else
+#     define _W64
+#  endif
+#endif
+
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+
+// Visual Studio 6 and Embedded Visual C++ 4 doesn't
+// realize that, e.g. char has the same size as __int8
+// so we give up on __intX for them.
+#if (_MSC_VER < 1300)
+   typedef signed char       int8_t;
+   typedef signed short      int16_t;
+   typedef signed int        int32_t;
+   typedef unsigned char     uint8_t;
+   typedef unsigned short    uint16_t;
+   typedef unsigned int      uint32_t;
+#else
+   typedef signed __int8     int8_t;
+   typedef signed __int16    int16_t;
+   typedef signed __int32    int32_t;
+   typedef unsigned __int8   uint8_t;
+   typedef unsigned __int16  uint16_t;
+   typedef unsigned __int32  uint32_t;
+#endif
+typedef signed __int64       int64_t;
+typedef unsigned __int64     uint64_t;
+
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+   typedef signed __int64    intptr_t;
+   typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+   typedef _W64 signed int   intptr_t;
+   typedef _W64 unsigned int uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
+// Check out Issue 9 for the details.
+#ifndef INTMAX_C //   [
+#  define INTMAX_C   INT64_C
+#endif // INTMAX_C    ]
+#ifndef UINTMAX_C //  [
+#  define UINTMAX_C  UINT64_C
+#endif // UINTMAX_C   ]
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+#endif // _MSC_VER >= 1600 ]
+
+#endif // _MSC_STDINT_H_ ]
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/ostreamwrapper.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/ostreamwrapper.h
new file mode 100644
index 000000000..6f4667c08
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/ostreamwrapper.h
@@ -0,0 +1,81 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_OSTREAMWRAPPER_H_
+#define RAPIDJSON_OSTREAMWRAPPER_H_
+
+#include "stream.h"
+#include <iosfwd>
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Wrapper of \c std::basic_ostream into RapidJSON's Stream concept.
+/*!
+    The classes can be wrapped including but not limited to:
+
+    - \c std::ostringstream
+    - \c std::stringstream
+    - \c std::wpstringstream
+    - \c std::wstringstream
+    - \c std::ifstream
+    - \c std::fstream
+    - \c std::wofstream
+    - \c std::wfstream
+
+    \tparam StreamType Class derived from \c std::basic_ostream.
+*/
+   
+template <typename StreamType>
+class BasicOStreamWrapper {
+public:
+    typedef typename StreamType::char_type Ch;
+    BasicOStreamWrapper(StreamType& stream) : stream_(stream) {}
+
+    void Put(Ch c) {
+        stream_.put(c);
+    }
+
+    void Flush() {
+        stream_.flush();
+    }
+
+    // Not implemented
+    char Peek() const { RAPIDJSON_ASSERT(false); return 0; }
+    char Take() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
+    char* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    size_t PutEnd(char*) { RAPIDJSON_ASSERT(false); return 0; }
+
+private:
+    BasicOStreamWrapper(const BasicOStreamWrapper&);
+    BasicOStreamWrapper& operator=(const BasicOStreamWrapper&);
+
+    StreamType& stream_;
+};
+
+typedef BasicOStreamWrapper<std::ostream> OStreamWrapper;
+typedef BasicOStreamWrapper<std::wostream> WOStreamWrapper;
+
+#ifdef __clang__
+RAPIDJSON_DIAG_POP
+#endif
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_OSTREAMWRAPPER_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/pointer.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/pointer.h
new file mode 100644
index 000000000..3d339f246
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/pointer.h
@@ -0,0 +1,1357 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_POINTER_H_
+#define RAPIDJSON_POINTER_H_
+
+#include "document.h"
+#include "internal/itoa.h"
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(switch-enum)
+#elif defined(_MSC_VER)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+static const SizeType kPointerInvalidIndex = ~SizeType(0);  //!< Represents an invalid index in GenericPointer::Token
+
+//! Error code of parsing.
+/*! \ingroup RAPIDJSON_ERRORS
+    \see GenericPointer::GenericPointer, GenericPointer::GetParseErrorCode
+*/
+enum PointerParseErrorCode {
+    kPointerParseErrorNone = 0,                     //!< The parse is successful
+
+    kPointerParseErrorTokenMustBeginWithSolidus,    //!< A token must begin with a '/'
+    kPointerParseErrorInvalidEscape,                //!< Invalid escape
+    kPointerParseErrorInvalidPercentEncoding,       //!< Invalid percent encoding in URI fragment
+    kPointerParseErrorCharacterMustPercentEncode    //!< A character must percent encoded in URI fragment
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericPointer
+
+//! Represents a JSON Pointer. Use Pointer for UTF8 encoding and default allocator.
+/*!
+    This class implements RFC 6901 "JavaScript Object Notation (JSON) Pointer" 
+    (https://tools.ietf.org/html/rfc6901).
+
+    A JSON pointer is for identifying a specific value in a JSON document
+    (GenericDocument). It can simplify coding of DOM tree manipulation, because it
+    can access multiple-level depth of DOM tree with single API call.
+
+    After it parses a string representation (e.g. "/foo/0" or URI fragment 
+    representation (e.g. "#/foo/0") into its internal representation (tokens),
+    it can be used to resolve a specific value in multiple documents, or sub-tree 
+    of documents.
+
+    Contrary to GenericValue, Pointer can be copy constructed and copy assigned.
+    Apart from assignment, a Pointer cannot be modified after construction.
+
+    Although Pointer is very convenient, please aware that constructing Pointer
+    involves parsing and dynamic memory allocation. A special constructor with user-
+    supplied tokens eliminates these.
+
+    GenericPointer depends on GenericDocument and GenericValue.
+    
+    \tparam ValueType The value type of the DOM tree. E.g. GenericValue<UTF8<> >
+    \tparam Allocator The allocator type for allocating memory for internal representation.
+    
+    \note GenericPointer uses same encoding of ValueType.
+    However, Allocator of GenericPointer is independent of Allocator of Value.
+*/
+template <typename ValueType, typename Allocator = CrtAllocator>
+class GenericPointer {
+public:
+    typedef typename ValueType::EncodingType EncodingType;  //!< Encoding type from Value
+    typedef typename ValueType::Ch Ch;                      //!< Character type from Value
+
+    //! A token is the basic units of internal representation.
+    /*!
+        A JSON pointer string representation "/foo/123" is parsed to two tokens: 
+        "foo" and 123. 123 will be represented in both numeric form and string form.
+        They are resolved according to the actual value type (object or array).
+
+        For token that are not numbers, or the numeric value is out of bound
+        (greater than limits of SizeType), they are only treated as string form
+        (i.e. the token's index will be equal to kPointerInvalidIndex).
+
+        This struct is public so that user can create a Pointer without parsing and 
+        allocation, using a special constructor.
+    */
+    struct Token {
+        const Ch* name;             //!< Name of the token. It has null character at the end but it can contain null character.
+        SizeType length;            //!< Length of the name.
+        SizeType index;             //!< A valid array index, if it is not equal to kPointerInvalidIndex.
+    };
+
+    //!@name Constructors and destructor.
+    //@{
+
+    //! Default constructor.
+    GenericPointer(Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
+
+    //! Constructor that parses a string or URI fragment representation.
+    /*!
+        \param source A null-terminated, string or URI fragment representation of JSON pointer.
+        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
+    */
+    explicit GenericPointer(const Ch* source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+        Parse(source, internal::StrLen(source));
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Constructor that parses a string or URI fragment representation.
+    /*!
+        \param source A string or URI fragment representation of JSON pointer.
+        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
+        \note Requires the definition of the preprocessor symbol \ref RAPIDJSON_HAS_STDSTRING.
+    */
+    explicit GenericPointer(const std::basic_string<Ch>& source, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+        Parse(source.c_str(), source.size());
+    }
+#endif
+
+    //! Constructor that parses a string or URI fragment representation, with length of the source string.
+    /*!
+        \param source A string or URI fragment representation of JSON pointer.
+        \param length Length of source.
+        \param allocator User supplied allocator for this pointer. If no allocator is provided, it creates a self-owned one.
+        \note Slightly faster than the overload without length.
+    */
+    GenericPointer(const Ch* source, size_t length, Allocator* allocator = 0) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+        Parse(source, length);
+    }
+
+    //! Constructor with user-supplied tokens.
+    /*!
+        This constructor let user supplies const array of tokens.
+        This prevents the parsing process and eliminates allocation.
+        This is preferred for memory constrained environments.
+
+        \param tokens An constant array of tokens representing the JSON pointer.
+        \param tokenCount Number of tokens.
+
+        \b Example
+        \code
+        #define NAME(s) { s, sizeof(s) / sizeof(s[0]) - 1, kPointerInvalidIndex }
+        #define INDEX(i) { #i, sizeof(#i) - 1, i }
+
+        static const Pointer::Token kTokens[] = { NAME("foo"), INDEX(123) };
+        static const Pointer p(kTokens, sizeof(kTokens) / sizeof(kTokens[0]));
+        // Equivalent to static const Pointer p("/foo/123");
+
+        #undef NAME
+        #undef INDEX
+        \endcode
+    */
+    GenericPointer(const Token* tokens, size_t tokenCount) : allocator_(), ownAllocator_(), nameBuffer_(), tokens_(const_cast<Token*>(tokens)), tokenCount_(tokenCount), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {}
+
+    //! Copy constructor.
+    GenericPointer(const GenericPointer& rhs) : allocator_(rhs.allocator_), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+        *this = rhs;
+    }
+
+    //! Copy constructor.
+    GenericPointer(const GenericPointer& rhs, Allocator* allocator) : allocator_(allocator), ownAllocator_(), nameBuffer_(), tokens_(), tokenCount_(), parseErrorOffset_(), parseErrorCode_(kPointerParseErrorNone) {
+        *this = rhs;
+    }
+
+    //! Destructor.
+    ~GenericPointer() {
+        if (nameBuffer_)    // If user-supplied tokens constructor is used, nameBuffer_ is nullptr and tokens_ are not deallocated.
+            Allocator::Free(tokens_);
+        RAPIDJSON_DELETE(ownAllocator_);
+    }
+
+    //! Assignment operator.
+    GenericPointer& operator=(const GenericPointer& rhs) {
+        if (this != &rhs) {
+            // Do not delete ownAllcator
+            if (nameBuffer_)
+                Allocator::Free(tokens_);
+
+            tokenCount_ = rhs.tokenCount_;
+            parseErrorOffset_ = rhs.parseErrorOffset_;
+            parseErrorCode_ = rhs.parseErrorCode_;
+
+            if (rhs.nameBuffer_)
+                CopyFromRaw(rhs); // Normally parsed tokens.
+            else {
+                tokens_ = rhs.tokens_; // User supplied const tokens.
+                nameBuffer_ = 0;
+            }
+        }
+        return *this;
+    }
+
+    //@}
+
+    //!@name Append token
+    //@{
+
+    //! Append a token and return a new Pointer
+    /*!
+        \param token Token to be appended.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    GenericPointer Append(const Token& token, Allocator* allocator = 0) const {
+        GenericPointer r;
+        r.allocator_ = allocator;
+        Ch *p = r.CopyFromRaw(*this, 1, token.length + 1);
+        std::memcpy(p, token.name, (token.length + 1) * sizeof(Ch));
+        r.tokens_[tokenCount_].name = p;
+        r.tokens_[tokenCount_].length = token.length;
+        r.tokens_[tokenCount_].index = token.index;
+        return r;
+    }
+
+    //! Append a name token with length, and return a new Pointer
+    /*!
+        \param name Name to be appended.
+        \param length Length of name.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    GenericPointer Append(const Ch* name, SizeType length, Allocator* allocator = 0) const {
+        Token token = { name, length, kPointerInvalidIndex };
+        return Append(token, allocator);
+    }
+
+    //! Append a name token without length, and return a new Pointer
+    /*!
+        \param name Name (const Ch*) to be appended.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::NotExpr<internal::IsSame<typename internal::RemoveConst<T>::Type, Ch> >), (GenericPointer))
+    Append(T* name, Allocator* allocator = 0) const {
+        return Append(name, internal::StrLen(name), allocator);
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Append a name token, and return a new Pointer
+    /*!
+        \param name Name to be appended.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    GenericPointer Append(const std::basic_string<Ch>& name, Allocator* allocator = 0) const {
+        return Append(name.c_str(), static_cast<SizeType>(name.size()), allocator);
+    }
+#endif
+
+    //! Append a index token, and return a new Pointer
+    /*!
+        \param index Index to be appended.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    GenericPointer Append(SizeType index, Allocator* allocator = 0) const {
+        char buffer[21];
+        char* end = sizeof(SizeType) == 4 ? internal::u32toa(index, buffer) : internal::u64toa(index, buffer);
+        SizeType length = static_cast<SizeType>(end - buffer);
+        buffer[length] = '\0';
+
+        if (sizeof(Ch) == 1) {
+            Token token = { reinterpret_cast<Ch*>(buffer), length, index };
+            return Append(token, allocator);
+        }
+        else {
+            Ch name[21];
+            for (size_t i = 0; i <= length; i++)
+                name[i] = static_cast<Ch>(buffer[i]);
+            Token token = { name, length, index };
+            return Append(token, allocator);
+        }
+    }
+
+    //! Append a token by value, and return a new Pointer
+    /*!
+        \param token token to be appended.
+        \param allocator Allocator for the newly return Pointer.
+        \return A new Pointer with appended token.
+    */
+    GenericPointer Append(const ValueType& token, Allocator* allocator = 0) const {
+        if (token.IsString())
+            return Append(token.GetString(), token.GetStringLength(), allocator);
+        else {
+            RAPIDJSON_ASSERT(token.IsUint64());
+            RAPIDJSON_ASSERT(token.GetUint64() <= SizeType(~0));
+            return Append(static_cast<SizeType>(token.GetUint64()), allocator);
+        }
+    }
+
+    //!@name Handling Parse Error
+    //@{
+
+    //! Check whether this is a valid pointer.
+    bool IsValid() const { return parseErrorCode_ == kPointerParseErrorNone; }
+
+    //! Get the parsing error offset in code unit.
+    size_t GetParseErrorOffset() const { return parseErrorOffset_; }
+
+    //! Get the parsing error code.
+    PointerParseErrorCode GetParseErrorCode() const { return parseErrorCode_; }
+
+    //@}
+
+    //! Get the allocator of this pointer.
+    Allocator& GetAllocator() { return *allocator_; }
+
+    //!@name Tokens
+    //@{
+
+    //! Get the token array (const version only).
+    const Token* GetTokens() const { return tokens_; }
+
+    //! Get the number of tokens.
+    size_t GetTokenCount() const { return tokenCount_; }
+
+    //@}
+
+    //!@name Equality/inequality operators
+    //@{
+
+    //! Equality operator.
+    /*!
+        \note When any pointers are invalid, always returns false.
+    */
+    bool operator==(const GenericPointer& rhs) const {
+        if (!IsValid() || !rhs.IsValid() || tokenCount_ != rhs.tokenCount_)
+            return false;
+
+        for (size_t i = 0; i < tokenCount_; i++) {
+            if (tokens_[i].index != rhs.tokens_[i].index ||
+                tokens_[i].length != rhs.tokens_[i].length || 
+                (tokens_[i].length != 0 && std::memcmp(tokens_[i].name, rhs.tokens_[i].name, sizeof(Ch)* tokens_[i].length) != 0))
+            {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    //! Inequality operator.
+    /*!
+        \note When any pointers are invalid, always returns true.
+    */
+    bool operator!=(const GenericPointer& rhs) const { return !(*this == rhs); }
+
+    //@}
+
+    //!@name Stringify
+    //@{
+
+    //! Stringify the pointer into string representation.
+    /*!
+        \tparam OutputStream Type of output stream.
+        \param os The output stream.
+    */
+    template<typename OutputStream>
+    bool Stringify(OutputStream& os) const {
+        return Stringify<false, OutputStream>(os);
+    }
+
+    //! Stringify the pointer into URI fragment representation.
+    /*!
+        \tparam OutputStream Type of output stream.
+        \param os The output stream.
+    */
+    template<typename OutputStream>
+    bool StringifyUriFragment(OutputStream& os) const {
+        return Stringify<true, OutputStream>(os);
+    }
+
+    //@}
+
+    //!@name Create value
+    //@{
+
+    //! Create a value in a subtree.
+    /*!
+        If the value is not exist, it creates all parent values and a JSON Null value.
+        So it always succeed and return the newly created or existing value.
+
+        Remind that it may change types of parents according to tokens, so it 
+        potentially removes previously stored values. For example, if a document 
+        was an array, and "/foo" is used to create a value, then the document 
+        will be changed to an object, and all existing array elements are lost.
+
+        \param root Root value of a DOM subtree to be resolved. It can be any value other than document root.
+        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
+        \param alreadyExist If non-null, it stores whether the resolved value is already exist.
+        \return The resolved newly created (a JSON Null value), or already exists value.
+    */
+    ValueType& Create(ValueType& root, typename ValueType::AllocatorType& allocator, bool* alreadyExist = 0) const {
+        RAPIDJSON_ASSERT(IsValid());
+        ValueType* v = &root;
+        bool exist = true;
+        for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
+            if (v->IsArray() && t->name[0] == '-' && t->length == 1) {
+                v->PushBack(ValueType().Move(), allocator);
+                v = &((*v)[v->Size() - 1]);
+                exist = false;
+            }
+            else {
+                if (t->index == kPointerInvalidIndex) { // must be object name
+                    if (!v->IsObject())
+                        v->SetObject(); // Change to Object
+                }
+                else { // object name or array index
+                    if (!v->IsArray() && !v->IsObject())
+                        v->SetArray(); // Change to Array
+                }
+
+                if (v->IsArray()) {
+                    if (t->index >= v->Size()) {
+                        v->Reserve(t->index + 1, allocator);
+                        while (t->index >= v->Size())
+                            v->PushBack(ValueType().Move(), allocator);
+                        exist = false;
+                    }
+                    v = &((*v)[t->index]);
+                }
+                else {
+                    typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length));
+                    if (m == v->MemberEnd()) {
+                        v->AddMember(ValueType(t->name, t->length, allocator).Move(), ValueType().Move(), allocator);
+                        v = &(--v->MemberEnd())->value; // Assumes AddMember() appends at the end
+                        exist = false;
+                    }
+                    else
+                        v = &m->value;
+                }
+            }
+        }
+
+        if (alreadyExist)
+            *alreadyExist = exist;
+
+        return *v;
+    }
+
+    //! Creates a value in a document.
+    /*!
+        \param document A document to be resolved.
+        \param alreadyExist If non-null, it stores whether the resolved value is already exist.
+        \return The resolved newly created, or already exists value.
+    */
+    template <typename stackAllocator>
+    ValueType& Create(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, bool* alreadyExist = 0) const {
+        return Create(document, document.GetAllocator(), alreadyExist);
+    }
+
+    //@}
+
+    //!@name Query value
+    //@{
+
+    //! Query a value in a subtree.
+    /*!
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \param unresolvedTokenIndex If the pointer cannot resolve a token in the pointer, this parameter can obtain the index of unresolved token.
+        \return Pointer to the value if it can be resolved. Otherwise null.
+
+        \note
+        There are only 3 situations when a value cannot be resolved:
+        1. A value in the path is not an array nor object.
+        2. An object value does not contain the token.
+        3. A token is out of range of an array value.
+
+        Use unresolvedTokenIndex to retrieve the token index.
+    */
+    ValueType* Get(ValueType& root, size_t* unresolvedTokenIndex = 0) const {
+        RAPIDJSON_ASSERT(IsValid());
+        ValueType* v = &root;
+        for (const Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
+            switch (v->GetType()) {
+            case kObjectType:
+                {
+                    typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length));
+                    if (m == v->MemberEnd())
+                        break;
+                    v = &m->value;
+                }
+                continue;
+            case kArrayType:
+                if (t->index == kPointerInvalidIndex || t->index >= v->Size())
+                    break;
+                v = &((*v)[t->index]);
+                continue;
+            default:
+                break;
+            }
+
+            // Error: unresolved token
+            if (unresolvedTokenIndex)
+                *unresolvedTokenIndex = static_cast<size_t>(t - tokens_);
+            return 0;
+        }
+        return v;
+    }
+
+    //! Query a const value in a const subtree.
+    /*!
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \return Pointer to the value if it can be resolved. Otherwise null.
+    */
+    const ValueType* Get(const ValueType& root, size_t* unresolvedTokenIndex = 0) const { 
+        return Get(const_cast<ValueType&>(root), unresolvedTokenIndex);
+    }
+
+    //@}
+
+    //!@name Query a value with default
+    //@{
+
+    //! Query a value in a subtree with default value.
+    /*!
+        Similar to Get(), but if the specified value do not exists, it creates all parents and clone the default value.
+        So that this function always succeed.
+
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \param defaultValue Default value to be cloned if the value was not exists.
+        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
+        \see Create()
+    */
+    ValueType& GetWithDefault(ValueType& root, const ValueType& defaultValue, typename ValueType::AllocatorType& allocator) const {
+        bool alreadyExist;
+        ValueType& v = Create(root, allocator, &alreadyExist);
+        return alreadyExist ? v : v.CopyFrom(defaultValue, allocator);
+    }
+
+    //! Query a value in a subtree with default null-terminated string.
+    ValueType& GetWithDefault(ValueType& root, const Ch* defaultValue, typename ValueType::AllocatorType& allocator) const {
+        bool alreadyExist;
+        ValueType& v = Create(root, allocator, &alreadyExist);
+        return alreadyExist ? v : v.SetString(defaultValue, allocator);
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Query a value in a subtree with default std::basic_string.
+    ValueType& GetWithDefault(ValueType& root, const std::basic_string<Ch>& defaultValue, typename ValueType::AllocatorType& allocator) const {
+        bool alreadyExist;
+        ValueType& v = Create(root, allocator, &alreadyExist);
+        return alreadyExist ? v : v.SetString(defaultValue, allocator);
+    }
+#endif
+
+    //! Query a value in a subtree with default primitive value.
+    /*!
+        \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
+    GetWithDefault(ValueType& root, T defaultValue, typename ValueType::AllocatorType& allocator) const {
+        return GetWithDefault(root, ValueType(defaultValue).Move(), allocator);
+    }
+
+    //! Query a value in a document with default value.
+    template <typename stackAllocator>
+    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const ValueType& defaultValue) const {
+        return GetWithDefault(document, defaultValue, document.GetAllocator());
+    }
+
+    //! Query a value in a document with default null-terminated string.
+    template <typename stackAllocator>
+    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const Ch* defaultValue) const {
+        return GetWithDefault(document, defaultValue, document.GetAllocator());
+    }
+    
+#if RAPIDJSON_HAS_STDSTRING
+    //! Query a value in a document with default std::basic_string.
+    template <typename stackAllocator>
+    ValueType& GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const std::basic_string<Ch>& defaultValue) const {
+        return GetWithDefault(document, defaultValue, document.GetAllocator());
+    }
+#endif
+
+    //! Query a value in a document with default primitive value.
+    /*!
+        \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
+    */
+    template <typename T, typename stackAllocator>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
+    GetWithDefault(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, T defaultValue) const {
+        return GetWithDefault(document, defaultValue, document.GetAllocator());
+    }
+
+    //@}
+
+    //!@name Set a value
+    //@{
+
+    //! Set a value in a subtree, with move semantics.
+    /*!
+        It creates all parents if they are not exist or types are different to the tokens.
+        So this function always succeeds but potentially remove existing values.
+
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \param value Value to be set.
+        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
+        \see Create()
+    */
+    ValueType& Set(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator) = value;
+    }
+
+    //! Set a value in a subtree, with copy semantics.
+    ValueType& Set(ValueType& root, const ValueType& value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator).CopyFrom(value, allocator);
+    }
+
+    //! Set a null-terminated string in a subtree.
+    ValueType& Set(ValueType& root, const Ch* value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator) = ValueType(value, allocator).Move();
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Set a std::basic_string in a subtree.
+    ValueType& Set(ValueType& root, const std::basic_string<Ch>& value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator) = ValueType(value, allocator).Move();
+    }
+#endif
+
+    //! Set a primitive value in a subtree.
+    /*!
+        \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
+    */
+    template <typename T>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
+    Set(ValueType& root, T value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator) = ValueType(value).Move();
+    }
+
+    //! Set a value in a document, with move semantics.
+    template <typename stackAllocator>
+    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, ValueType& value) const {
+        return Create(document) = value;
+    }
+
+    //! Set a value in a document, with copy semantics.
+    template <typename stackAllocator>
+    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const ValueType& value) const {
+        return Create(document).CopyFrom(value, document.GetAllocator());
+    }
+
+    //! Set a null-terminated string in a document.
+    template <typename stackAllocator>
+    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const Ch* value) const {
+        return Create(document) = ValueType(value, document.GetAllocator()).Move();
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    //! Sets a std::basic_string in a document.
+    template <typename stackAllocator>
+    ValueType& Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, const std::basic_string<Ch>& value) const {
+        return Create(document) = ValueType(value, document.GetAllocator()).Move();
+    }
+#endif
+
+    //! Set a primitive value in a document.
+    /*!
+    \tparam T Either \ref Type, \c int, \c unsigned, \c int64_t, \c uint64_t, \c bool
+    */
+    template <typename T, typename stackAllocator>
+    RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T>, internal::IsGenericValue<T> >), (ValueType&))
+        Set(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, T value) const {
+            return Create(document) = value;
+    }
+
+    //@}
+
+    //!@name Swap a value
+    //@{
+
+    //! Swap a value with a value in a subtree.
+    /*!
+        It creates all parents if they are not exist or types are different to the tokens.
+        So this function always succeeds but potentially remove existing values.
+
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \param value Value to be swapped.
+        \param allocator Allocator for creating the values if the specified value or its parents are not exist.
+        \see Create()
+    */
+    ValueType& Swap(ValueType& root, ValueType& value, typename ValueType::AllocatorType& allocator) const {
+        return Create(root, allocator).Swap(value);
+    }
+
+    //! Swap a value with a value in a document.
+    template <typename stackAllocator>
+    ValueType& Swap(GenericDocument<EncodingType, typename ValueType::AllocatorType, stackAllocator>& document, ValueType& value) const {
+        return Create(document).Swap(value);
+    }
+
+    //@}
+
+    //! Erase a value in a subtree.
+    /*!
+        \param root Root value of a DOM sub-tree to be resolved. It can be any value other than document root.
+        \return Whether the resolved value is found and erased.
+
+        \note Erasing with an empty pointer \c Pointer(""), i.e. the root, always fail and return false.
+    */
+    bool Erase(ValueType& root) const {
+        RAPIDJSON_ASSERT(IsValid());
+        if (tokenCount_ == 0) // Cannot erase the root
+            return false;
+
+        ValueType* v = &root;
+        const Token* last = tokens_ + (tokenCount_ - 1);
+        for (const Token *t = tokens_; t != last; ++t) {
+            switch (v->GetType()) {
+            case kObjectType:
+                {
+                    typename ValueType::MemberIterator m = v->FindMember(GenericStringRef<Ch>(t->name, t->length));
+                    if (m == v->MemberEnd())
+                        return false;
+                    v = &m->value;
+                }
+                break;
+            case kArrayType:
+                if (t->index == kPointerInvalidIndex || t->index >= v->Size())
+                    return false;
+                v = &((*v)[t->index]);
+                break;
+            default:
+                return false;
+            }
+        }
+
+        switch (v->GetType()) {
+        case kObjectType:
+            return v->EraseMember(GenericStringRef<Ch>(last->name, last->length));
+        case kArrayType:
+            if (last->index == kPointerInvalidIndex || last->index >= v->Size())
+                return false;
+            v->Erase(v->Begin() + last->index);
+            return true;
+        default:
+            return false;
+        }
+    }
+
+private:
+    //! Clone the content from rhs to this.
+    /*!
+        \param rhs Source pointer.
+        \param extraToken Extra tokens to be allocated.
+        \param extraNameBufferSize Extra name buffer size (in number of Ch) to be allocated.
+        \return Start of non-occupied name buffer, for storing extra names.
+    */
+    Ch* CopyFromRaw(const GenericPointer& rhs, size_t extraToken = 0, size_t extraNameBufferSize = 0) {
+        if (!allocator_) // allocator is independently owned.
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+
+        size_t nameBufferSize = rhs.tokenCount_; // null terminators for tokens
+        for (Token *t = rhs.tokens_; t != rhs.tokens_ + rhs.tokenCount_; ++t)
+            nameBufferSize += t->length;
+
+        tokenCount_ = rhs.tokenCount_ + extraToken;
+        tokens_ = static_cast<Token *>(allocator_->Malloc(tokenCount_ * sizeof(Token) + (nameBufferSize + extraNameBufferSize) * sizeof(Ch)));
+        nameBuffer_ = reinterpret_cast<Ch *>(tokens_ + tokenCount_);
+        if (rhs.tokenCount_ > 0) {
+            std::memcpy(tokens_, rhs.tokens_, rhs.tokenCount_ * sizeof(Token));
+        }
+        if (nameBufferSize > 0) {
+            std::memcpy(nameBuffer_, rhs.nameBuffer_, nameBufferSize * sizeof(Ch));
+        }
+
+        // Adjust pointers to name buffer
+        std::ptrdiff_t diff = nameBuffer_ - rhs.nameBuffer_;
+        for (Token *t = tokens_; t != tokens_ + rhs.tokenCount_; ++t)
+            t->name += diff;
+
+        return nameBuffer_ + nameBufferSize;
+    }
+
+    //! Check whether a character should be percent-encoded.
+    /*!
+        According to RFC 3986 2.3 Unreserved Characters.
+        \param c The character (code unit) to be tested.
+    */
+    bool NeedPercentEncode(Ch c) const {
+        return !((c >= '0' && c <= '9') || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || c == '-' || c == '.' || c == '_' || c =='~');
+    }
+
+    //! Parse a JSON String or its URI fragment representation into tokens.
+#ifndef __clang__ // -Wdocumentation
+    /*!
+        \param source Either a JSON Pointer string, or its URI fragment representation. Not need to be null terminated.
+        \param length Length of the source string.
+        \note Source cannot be JSON String Representation of JSON Pointer, e.g. In "/\u0000", \u0000 will not be unescaped.
+    */
+#endif
+    void Parse(const Ch* source, size_t length) {
+        RAPIDJSON_ASSERT(source != NULL);
+        RAPIDJSON_ASSERT(nameBuffer_ == 0);
+        RAPIDJSON_ASSERT(tokens_ == 0);
+
+        // Create own allocator if user did not supply.
+        if (!allocator_)
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+
+        // Count number of '/' as tokenCount
+        tokenCount_ = 0;
+        for (const Ch* s = source; s != source + length; s++) 
+            if (*s == '/')
+                tokenCount_++;
+
+        Token* token = tokens_ = static_cast<Token *>(allocator_->Malloc(tokenCount_ * sizeof(Token) + length * sizeof(Ch)));
+        Ch* name = nameBuffer_ = reinterpret_cast<Ch *>(tokens_ + tokenCount_);
+        size_t i = 0;
+
+        // Detect if it is a URI fragment
+        bool uriFragment = false;
+        if (source[i] == '#') {
+            uriFragment = true;
+            i++;
+        }
+
+        if (i != length && source[i] != '/') {
+            parseErrorCode_ = kPointerParseErrorTokenMustBeginWithSolidus;
+            goto error;
+        }
+
+        while (i < length) {
+            RAPIDJSON_ASSERT(source[i] == '/');
+            i++; // consumes '/'
+
+            token->name = name;
+            bool isNumber = true;
+
+            while (i < length && source[i] != '/') {
+                Ch c = source[i];
+                if (uriFragment) {
+                    // Decoding percent-encoding for URI fragment
+                    if (c == '%') {
+                        PercentDecodeStream is(&source[i], source + length);
+                        GenericInsituStringStream<EncodingType> os(name);
+                        Ch* begin = os.PutBegin();
+                        if (!Transcoder<UTF8<>, EncodingType>().Validate(is, os) || !is.IsValid()) {
+                            parseErrorCode_ = kPointerParseErrorInvalidPercentEncoding;
+                            goto error;
+                        }
+                        size_t len = os.PutEnd(begin);
+                        i += is.Tell() - 1;
+                        if (len == 1)
+                            c = *name;
+                        else {
+                            name += len;
+                            isNumber = false;
+                            i++;
+                            continue;
+                        }
+                    }
+                    else if (NeedPercentEncode(c)) {
+                        parseErrorCode_ = kPointerParseErrorCharacterMustPercentEncode;
+                        goto error;
+                    }
+                }
+
+                i++;
+                
+                // Escaping "~0" -> '~', "~1" -> '/'
+                if (c == '~') {
+                    if (i < length) {
+                        c = source[i];
+                        if (c == '0')       c = '~';
+                        else if (c == '1')  c = '/';
+                        else {
+                            parseErrorCode_ = kPointerParseErrorInvalidEscape;
+                            goto error;
+                        }
+                        i++;
+                    }
+                    else {
+                        parseErrorCode_ = kPointerParseErrorInvalidEscape;
+                        goto error;
+                    }
+                }
+
+                // First check for index: all of characters are digit
+                if (c < '0' || c > '9')
+                    isNumber = false;
+
+                *name++ = c;
+            }
+            token->length = static_cast<SizeType>(name - token->name);
+            if (token->length == 0)
+                isNumber = false;
+            *name++ = '\0'; // Null terminator
+
+            // Second check for index: more than one digit cannot have leading zero
+            if (isNumber && token->length > 1 && token->name[0] == '0')
+                isNumber = false;
+
+            // String to SizeType conversion
+            SizeType n = 0;
+            if (isNumber) {
+                for (size_t j = 0; j < token->length; j++) {
+                    SizeType m = n * 10 + static_cast<SizeType>(token->name[j] - '0');
+                    if (m < n) {   // overflow detection
+                        isNumber = false;
+                        break;
+                    }
+                    n = m;
+                }
+            }
+
+            token->index = isNumber ? n : kPointerInvalidIndex;
+            token++;
+        }
+
+        RAPIDJSON_ASSERT(name <= nameBuffer_ + length); // Should not overflow buffer
+        parseErrorCode_ = kPointerParseErrorNone;
+        return;
+
+    error:
+        Allocator::Free(tokens_);
+        nameBuffer_ = 0;
+        tokens_ = 0;
+        tokenCount_ = 0;
+        parseErrorOffset_ = i;
+        return;
+    }
+
+    //! Stringify to string or URI fragment representation.
+    /*!
+        \tparam uriFragment True for stringifying to URI fragment representation. False for string representation.
+        \tparam OutputStream type of output stream.
+        \param os The output stream.
+    */
+    template<bool uriFragment, typename OutputStream>
+    bool Stringify(OutputStream& os) const {
+        RAPIDJSON_ASSERT(IsValid());
+
+        if (uriFragment)
+            os.Put('#');
+
+        for (Token *t = tokens_; t != tokens_ + tokenCount_; ++t) {
+            os.Put('/');
+            for (size_t j = 0; j < t->length; j++) {
+                Ch c = t->name[j];
+                if (c == '~') {
+                    os.Put('~');
+                    os.Put('0');
+                }
+                else if (c == '/') {
+                    os.Put('~');
+                    os.Put('1');
+                }
+                else if (uriFragment && NeedPercentEncode(c)) { 
+                    // Transcode to UTF8 sequence
+                    GenericStringStream<typename ValueType::EncodingType> source(&t->name[j]);
+                    PercentEncodeStream<OutputStream> target(os);
+                    if (!Transcoder<EncodingType, UTF8<> >().Validate(source, target))
+                        return false;
+                    j += source.Tell() - 1;
+                }
+                else
+                    os.Put(c);
+            }
+        }
+        return true;
+    }
+
+    //! A helper stream for decoding a percent-encoded sequence into code unit.
+    /*!
+        This stream decodes %XY triplet into code unit (0-255).
+        If it encounters invalid characters, it sets output code unit as 0 and 
+        mark invalid, and to be checked by IsValid().
+    */
+    class PercentDecodeStream {
+    public:
+        typedef typename ValueType::Ch Ch;
+
+        //! Constructor
+        /*!
+            \param source Start of the stream
+            \param end Past-the-end of the stream.
+        */
+        PercentDecodeStream(const Ch* source, const Ch* end) : src_(source), head_(source), end_(end), valid_(true) {}
+
+        Ch Take() {
+            if (*src_ != '%' || src_ + 3 > end_) { // %XY triplet
+                valid_ = false;
+                return 0;
+            }
+            src_++;
+            Ch c = 0;
+            for (int j = 0; j < 2; j++) {
+                c = static_cast<Ch>(c << 4);
+                Ch h = *src_;
+                if      (h >= '0' && h <= '9') c = static_cast<Ch>(c + h - '0');
+                else if (h >= 'A' && h <= 'F') c = static_cast<Ch>(c + h - 'A' + 10);
+                else if (h >= 'a' && h <= 'f') c = static_cast<Ch>(c + h - 'a' + 10);
+                else {
+                    valid_ = false;
+                    return 0;
+                }
+                src_++;
+            }
+            return c;
+        }
+
+        size_t Tell() const { return static_cast<size_t>(src_ - head_); }
+        bool IsValid() const { return valid_; }
+
+    private:
+        const Ch* src_;     //!< Current read position.
+        const Ch* head_;    //!< Original head of the string.
+        const Ch* end_;     //!< Past-the-end position.
+        bool valid_;        //!< Whether the parsing is valid.
+    };
+
+    //! A helper stream to encode character (UTF-8 code unit) into percent-encoded sequence.
+    template <typename OutputStream>
+    class PercentEncodeStream {
+    public:
+        PercentEncodeStream(OutputStream& os) : os_(os) {}
+        void Put(char c) { // UTF-8 must be byte
+            unsigned char u = static_cast<unsigned char>(c);
+            static const char hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+            os_.Put('%');
+            os_.Put(static_cast<typename OutputStream::Ch>(hexDigits[u >> 4]));
+            os_.Put(static_cast<typename OutputStream::Ch>(hexDigits[u & 15]));
+        }
+    private:
+        OutputStream& os_;
+    };
+
+    Allocator* allocator_;                  //!< The current allocator. It is either user-supplied or equal to ownAllocator_.
+    Allocator* ownAllocator_;               //!< Allocator owned by this Pointer.
+    Ch* nameBuffer_;                        //!< A buffer containing all names in tokens.
+    Token* tokens_;                         //!< A list of tokens.
+    size_t tokenCount_;                     //!< Number of tokens in tokens_.
+    size_t parseErrorOffset_;               //!< Offset in code unit when parsing fail.
+    PointerParseErrorCode parseErrorCode_;  //!< Parsing error code.
+};
+
+//! GenericPointer for Value (UTF-8, default allocator).
+typedef GenericPointer<Value> Pointer;
+
+//!@name Helper functions for GenericPointer
+//@{
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+typename T::ValueType& CreateValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::AllocatorType& a) {
+    return pointer.Create(root, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& CreateValueByPointer(T& root, const CharType(&source)[N], typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Create(root, a);
+}
+
+// No allocator parameter
+
+template <typename DocumentType>
+typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer) {
+    return pointer.Create(document);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& CreateValueByPointer(DocumentType& document, const CharType(&source)[N]) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Create(document);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+typename T::ValueType* GetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, size_t* unresolvedTokenIndex = 0) {
+    return pointer.Get(root, unresolvedTokenIndex);
+}
+
+template <typename T>
+const typename T::ValueType* GetValueByPointer(const T& root, const GenericPointer<typename T::ValueType>& pointer, size_t* unresolvedTokenIndex = 0) {
+    return pointer.Get(root, unresolvedTokenIndex);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType* GetValueByPointer(T& root, const CharType (&source)[N], size_t* unresolvedTokenIndex = 0) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Get(root, unresolvedTokenIndex);
+}
+
+template <typename T, typename CharType, size_t N>
+const typename T::ValueType* GetValueByPointer(const T& root, const CharType(&source)[N], size_t* unresolvedTokenIndex = 0) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Get(root, unresolvedTokenIndex);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::ValueType& defaultValue, typename T::AllocatorType& a) {
+    return pointer.GetWithDefault(root, defaultValue, a);
+}
+
+template <typename T>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::Ch* defaultValue, typename T::AllocatorType& a) {
+    return pointer.GetWithDefault(root, defaultValue, a);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename T>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, const std::basic_string<typename T::Ch>& defaultValue, typename T::AllocatorType& a) {
+    return pointer.GetWithDefault(root, defaultValue, a);
+}
+#endif
+
+template <typename T, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
+GetValueByPointerWithDefault(T& root, const GenericPointer<typename T::ValueType>& pointer, T2 defaultValue, typename T::AllocatorType& a) {
+    return pointer.GetWithDefault(root, defaultValue, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::ValueType& defaultValue, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const typename T::Ch* defaultValue, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& GetValueByPointerWithDefault(T& root, const CharType(&source)[N], const std::basic_string<typename T::Ch>& defaultValue, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+}
+#endif
+
+template <typename T, typename CharType, size_t N, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
+GetValueByPointerWithDefault(T& root, const CharType(&source)[N], T2 defaultValue, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).GetWithDefault(root, defaultValue, a);
+}
+
+// No allocator parameter
+
+template <typename DocumentType>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::ValueType& defaultValue) {
+    return pointer.GetWithDefault(document, defaultValue);
+}
+
+template <typename DocumentType>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::Ch* defaultValue) {
+    return pointer.GetWithDefault(document, defaultValue);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename DocumentType>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const std::basic_string<typename DocumentType::Ch>& defaultValue) {
+    return pointer.GetWithDefault(document, defaultValue);
+}
+#endif
+
+template <typename DocumentType, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
+GetValueByPointerWithDefault(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, T2 defaultValue) {
+    return pointer.GetWithDefault(document, defaultValue);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& defaultValue) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* defaultValue) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], const std::basic_string<typename DocumentType::Ch>& defaultValue) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+}
+#endif
+
+template <typename DocumentType, typename CharType, size_t N, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
+GetValueByPointerWithDefault(DocumentType& document, const CharType(&source)[N], T2 defaultValue) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).GetWithDefault(document, defaultValue);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::ValueType& value, typename T::AllocatorType& a) {
+    return pointer.Set(root, value, a);
+}
+
+template <typename T>
+typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::ValueType& value, typename T::AllocatorType& a) {
+    return pointer.Set(root, value, a);
+}
+
+template <typename T>
+typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const typename T::Ch* value, typename T::AllocatorType& a) {
+    return pointer.Set(root, value, a);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename T>
+typename T::ValueType& SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, const std::basic_string<typename T::Ch>& value, typename T::AllocatorType& a) {
+    return pointer.Set(root, value, a);
+}
+#endif
+
+template <typename T, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
+SetValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, T2 value, typename T::AllocatorType& a) {
+    return pointer.Set(root, value, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::ValueType& value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const typename T::Ch* value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& SetValueByPointer(T& root, const CharType(&source)[N], const std::basic_string<typename T::Ch>& value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
+}
+#endif
+
+template <typename T, typename CharType, size_t N, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename T::ValueType&))
+SetValueByPointer(T& root, const CharType(&source)[N], T2 value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Set(root, value, a);
+}
+
+// No allocator parameter
+
+template <typename DocumentType>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, typename DocumentType::ValueType& value) {
+    return pointer.Set(document, value);
+}
+
+template <typename DocumentType>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::ValueType& value) {
+    return pointer.Set(document, value);
+}
+
+template <typename DocumentType>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const typename DocumentType::Ch* value) {
+    return pointer.Set(document, value);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename DocumentType>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, const std::basic_string<typename DocumentType::Ch>& value) {
+    return pointer.Set(document, value);
+}
+#endif
+
+template <typename DocumentType, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
+SetValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, T2 value) {
+    return pointer.Set(document, value);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::ValueType& value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const typename DocumentType::Ch* value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
+}
+
+#if RAPIDJSON_HAS_STDSTRING
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& SetValueByPointer(DocumentType& document, const CharType(&source)[N], const std::basic_string<typename DocumentType::Ch>& value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
+}
+#endif
+
+template <typename DocumentType, typename CharType, size_t N, typename T2>
+RAPIDJSON_DISABLEIF_RETURN((internal::OrExpr<internal::IsPointer<T2>, internal::IsGenericValue<T2> >), (typename DocumentType::ValueType&))
+SetValueByPointer(DocumentType& document, const CharType(&source)[N], T2 value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Set(document, value);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+typename T::ValueType& SwapValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer, typename T::ValueType& value, typename T::AllocatorType& a) {
+    return pointer.Swap(root, value, a);
+}
+
+template <typename T, typename CharType, size_t N>
+typename T::ValueType& SwapValueByPointer(T& root, const CharType(&source)[N], typename T::ValueType& value, typename T::AllocatorType& a) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Swap(root, value, a);
+}
+
+template <typename DocumentType>
+typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const GenericPointer<typename DocumentType::ValueType>& pointer, typename DocumentType::ValueType& value) {
+    return pointer.Swap(document, value);
+}
+
+template <typename DocumentType, typename CharType, size_t N>
+typename DocumentType::ValueType& SwapValueByPointer(DocumentType& document, const CharType(&source)[N], typename DocumentType::ValueType& value) {
+    return GenericPointer<typename DocumentType::ValueType>(source, N - 1).Swap(document, value);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+
+template <typename T>
+bool EraseValueByPointer(T& root, const GenericPointer<typename T::ValueType>& pointer) {
+    return pointer.Erase(root);
+}
+
+template <typename T, typename CharType, size_t N>
+bool EraseValueByPointer(T& root, const CharType(&source)[N]) {
+    return GenericPointer<typename T::ValueType>(source, N - 1).Erase(root);
+}
+
+//@}
+
+RAPIDJSON_NAMESPACE_END
+
+#if defined(__clang__) || defined(_MSC_VER)
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_POINTER_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/prettywriter.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/prettywriter.h
new file mode 100644
index 000000000..45afb6949
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/prettywriter.h
@@ -0,0 +1,277 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_PRETTYWRITER_H_
+#define RAPIDJSON_PRETTYWRITER_H_
+
+#include "writer.h"
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Combination of PrettyWriter format flags.
+/*! \see PrettyWriter::SetFormatOptions
+ */
+enum PrettyFormatOptions {
+    kFormatDefault = 0,         //!< Default pretty formatting.
+    kFormatSingleLineArray = 1  //!< Format arrays on a single line.
+};
+
+//! Writer with indentation and spacing.
+/*!
+    \tparam OutputStream Type of output os.
+    \tparam SourceEncoding Encoding of source string.
+    \tparam TargetEncoding Encoding of output stream.
+    \tparam StackAllocator Type of allocator for allocating memory of stack.
+*/
+template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags>
+class PrettyWriter : public Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> {
+public:
+    typedef Writer<OutputStream, SourceEncoding, TargetEncoding, StackAllocator, writeFlags> Base;
+    typedef typename Base::Ch Ch;
+
+    //! Constructor
+    /*! \param os Output stream.
+        \param allocator User supplied allocator. If it is null, it will create a private one.
+        \param levelDepth Initial capacity of stack.
+    */
+    explicit PrettyWriter(OutputStream& os, StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
+        Base(os, allocator, levelDepth), indentChar_(' '), indentCharCount_(4), formatOptions_(kFormatDefault) {}
+
+
+    explicit PrettyWriter(StackAllocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
+        Base(allocator, levelDepth), indentChar_(' '), indentCharCount_(4) {}
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    PrettyWriter(PrettyWriter&& rhs) :
+        Base(std::forward<PrettyWriter>(rhs)), indentChar_(rhs.indentChar_), indentCharCount_(rhs.indentCharCount_), formatOptions_(rhs.formatOptions_) {}
+#endif
+
+    //! Set custom indentation.
+    /*! \param indentChar       Character for indentation. Must be whitespace character (' ', '\\t', '\\n', '\\r').
+        \param indentCharCount  Number of indent characters for each indentation level.
+        \note The default indentation is 4 spaces.
+    */
+    PrettyWriter& SetIndent(Ch indentChar, unsigned indentCharCount) {
+        RAPIDJSON_ASSERT(indentChar == ' ' || indentChar == '\t' || indentChar == '\n' || indentChar == '\r');
+        indentChar_ = indentChar;
+        indentCharCount_ = indentCharCount;
+        return *this;
+    }
+
+    //! Set pretty writer formatting options.
+    /*! \param options Formatting options.
+    */
+    PrettyWriter& SetFormatOptions(PrettyFormatOptions options) {
+        formatOptions_ = options;
+        return *this;
+    }
+
+    /*! @name Implementation of Handler
+        \see Handler
+    */
+    //@{
+
+    bool Null()                 { PrettyPrefix(kNullType);   return Base::EndValue(Base::WriteNull()); }
+    bool Bool(bool b)           { PrettyPrefix(b ? kTrueType : kFalseType); return Base::EndValue(Base::WriteBool(b)); }
+    bool Int(int i)             { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteInt(i)); }
+    bool Uint(unsigned u)       { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteUint(u)); }
+    bool Int64(int64_t i64)     { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteInt64(i64)); }
+    bool Uint64(uint64_t u64)   { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteUint64(u64));  }
+    bool Double(double d)       { PrettyPrefix(kNumberType); return Base::EndValue(Base::WriteDouble(d)); }
+
+    bool RawNumber(const Ch* str, SizeType length, bool copy = false) {
+        RAPIDJSON_ASSERT(str != 0);
+        (void)copy;
+        PrettyPrefix(kNumberType);
+        return Base::EndValue(Base::WriteString(str, length));
+    }
+
+    bool String(const Ch* str, SizeType length, bool copy = false) {
+        RAPIDJSON_ASSERT(str != 0);
+        (void)copy;
+        PrettyPrefix(kStringType);
+        return Base::EndValue(Base::WriteString(str, length));
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    bool String(const std::basic_string<Ch>& str) {
+        return String(str.data(), SizeType(str.size()));
+    }
+#endif
+
+    bool StartObject() {
+        PrettyPrefix(kObjectType);
+        new (Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(false);
+        return Base::WriteStartObject();
+    }
+
+    bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); }
+
+#if RAPIDJSON_HAS_STDSTRING
+    bool Key(const std::basic_string<Ch>& str) {
+        return Key(str.data(), SizeType(str.size()));
+    }
+#endif
+	
+    bool EndObject(SizeType memberCount = 0) {
+        (void)memberCount;
+        RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level)); // not inside an Object
+        RAPIDJSON_ASSERT(!Base::level_stack_.template Top<typename Base::Level>()->inArray); // currently inside an Array, not Object
+        RAPIDJSON_ASSERT(0 == Base::level_stack_.template Top<typename Base::Level>()->valueCount % 2); // Object has a Key without a Value
+       
+        bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
+
+        if (!empty) {
+            Base::os_->Put('\n');
+            WriteIndent();
+        }
+        bool ret = Base::EndValue(Base::WriteEndObject());
+        (void)ret;
+        RAPIDJSON_ASSERT(ret == true);
+        if (Base::level_stack_.Empty()) // end of json text
+            Base::Flush();
+        return true;
+    }
+
+    bool StartArray() {
+        PrettyPrefix(kArrayType);
+        new (Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(true);
+        return Base::WriteStartArray();
+    }
+
+    bool EndArray(SizeType memberCount = 0) {
+        (void)memberCount;
+        RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level));
+        RAPIDJSON_ASSERT(Base::level_stack_.template Top<typename Base::Level>()->inArray);
+        bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
+
+        if (!empty && !(formatOptions_ & kFormatSingleLineArray)) {
+            Base::os_->Put('\n');
+            WriteIndent();
+        }
+        bool ret = Base::EndValue(Base::WriteEndArray());
+        (void)ret;
+        RAPIDJSON_ASSERT(ret == true);
+        if (Base::level_stack_.Empty()) // end of json text
+            Base::Flush();
+        return true;
+    }
+
+    //@}
+
+    /*! @name Convenience extensions */
+    //@{
+
+    //! Simpler but slower overload.
+    bool String(const Ch* str) { return String(str, internal::StrLen(str)); }
+    bool Key(const Ch* str) { return Key(str, internal::StrLen(str)); }
+
+    //@}
+
+    //! Write a raw JSON value.
+    /*!
+        For user to write a stringified JSON as a value.
+
+        \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range.
+        \param length Length of the json.
+        \param type Type of the root of json.
+        \note When using PrettyWriter::RawValue(), the result json may not be indented correctly.
+    */
+    bool RawValue(const Ch* json, size_t length, Type type) {
+        RAPIDJSON_ASSERT(json != 0);
+        PrettyPrefix(type);
+        return Base::EndValue(Base::WriteRawValue(json, length));
+    }
+
+protected:
+    void PrettyPrefix(Type type) {
+        (void)type;
+        if (Base::level_stack_.GetSize() != 0) { // this value is not at root
+            typename Base::Level* level = Base::level_stack_.template Top<typename Base::Level>();
+
+            if (level->inArray) {
+                if (level->valueCount > 0) {
+                    Base::os_->Put(','); // add comma if it is not the first element in array
+                    if (formatOptions_ & kFormatSingleLineArray)
+                        Base::os_->Put(' ');
+                }
+
+                if (!(formatOptions_ & kFormatSingleLineArray)) {
+                    Base::os_->Put('\n');
+                    WriteIndent();
+                }
+            }
+            else {  // in object
+                if (level->valueCount > 0) {
+                    if (level->valueCount % 2 == 0) {
+                        Base::os_->Put(',');
+                        Base::os_->Put('\n');
+                    }
+                    else {
+                        Base::os_->Put(':');
+                        Base::os_->Put(' ');
+                    }
+                }
+                else
+                    Base::os_->Put('\n');
+
+                if (level->valueCount % 2 == 0)
+                    WriteIndent();
+            }
+            if (!level->inArray && level->valueCount % 2 == 0)
+                RAPIDJSON_ASSERT(type == kStringType);  // if it's in object, then even number should be a name
+            level->valueCount++;
+        }
+        else {
+            RAPIDJSON_ASSERT(!Base::hasRoot_);  // Should only has one and only one root.
+            Base::hasRoot_ = true;
+        }
+    }
+
+    void WriteIndent()  {
+        size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_;
+        PutN(*Base::os_, static_cast<typename OutputStream::Ch>(indentChar_), count);
+    }
+
+    Ch indentChar_;
+    unsigned indentCharCount_;
+    PrettyFormatOptions formatOptions_;
+
+private:
+    // Prohibit copy constructor & assignment operator.
+    PrettyWriter(const PrettyWriter&);
+    PrettyWriter& operator=(const PrettyWriter&);
+};
+
+RAPIDJSON_NAMESPACE_END
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_RAPIDJSON_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/rapidjson.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/rapidjson.h
new file mode 100644
index 000000000..065c8bb96
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/rapidjson.h
@@ -0,0 +1,654 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_RAPIDJSON_H_
+#define RAPIDJSON_RAPIDJSON_H_
+
+/*!\file rapidjson.h
+    \brief common definitions and configuration
+    
+    \see RAPIDJSON_CONFIG
+ */
+
+/*! \defgroup RAPIDJSON_CONFIG RapidJSON configuration
+    \brief Configuration macros for library features
+
+    Some RapidJSON features are configurable to adapt the library to a wide
+    variety of platforms, environments and usage scenarios.  Most of the
+    features can be configured in terms of overridden or predefined
+    preprocessor macros at compile-time.
+
+    Some additional customization is available in the \ref RAPIDJSON_ERRORS APIs.
+
+    \note These macros should be given on the compiler command-line
+          (where applicable)  to avoid inconsistent values when compiling
+          different translation units of a single application.
+ */
+
+#include <cstdlib>  // malloc(), realloc(), free(), size_t
+#include <cstring>  // memset(), memcpy(), memmove(), memcmp()
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_VERSION_STRING
+//
+// ALWAYS synchronize the following 3 macros with corresponding variables in /CMakeLists.txt.
+//
+
+//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
+// token stringification
+#define RAPIDJSON_STRINGIFY(x) RAPIDJSON_DO_STRINGIFY(x)
+#define RAPIDJSON_DO_STRINGIFY(x) #x
+
+// token concatenation
+#define RAPIDJSON_JOIN(X, Y) RAPIDJSON_DO_JOIN(X, Y)
+#define RAPIDJSON_DO_JOIN(X, Y) RAPIDJSON_DO_JOIN2(X, Y)
+#define RAPIDJSON_DO_JOIN2(X, Y) X##Y
+//!@endcond
+
+/*! \def RAPIDJSON_MAJOR_VERSION
+    \ingroup RAPIDJSON_CONFIG
+    \brief Major version of RapidJSON in integer.
+*/
+/*! \def RAPIDJSON_MINOR_VERSION
+    \ingroup RAPIDJSON_CONFIG
+    \brief Minor version of RapidJSON in integer.
+*/
+/*! \def RAPIDJSON_PATCH_VERSION
+    \ingroup RAPIDJSON_CONFIG
+    \brief Patch version of RapidJSON in integer.
+*/
+/*! \def RAPIDJSON_VERSION_STRING
+    \ingroup RAPIDJSON_CONFIG
+    \brief Version of RapidJSON in "<major>.<minor>.<patch>" string format.
+*/
+#define RAPIDJSON_MAJOR_VERSION 1
+#define RAPIDJSON_MINOR_VERSION 1
+#define RAPIDJSON_PATCH_VERSION 0
+#define RAPIDJSON_VERSION_STRING \
+    RAPIDJSON_STRINGIFY(RAPIDJSON_MAJOR_VERSION.RAPIDJSON_MINOR_VERSION.RAPIDJSON_PATCH_VERSION)
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_NAMESPACE_(BEGIN|END)
+/*! \def RAPIDJSON_NAMESPACE
+    \ingroup RAPIDJSON_CONFIG
+    \brief   provide custom rapidjson namespace
+
+    In order to avoid symbol clashes and/or "One Definition Rule" errors
+    between multiple inclusions of (different versions of) RapidJSON in
+    a single binary, users can customize the name of the main RapidJSON
+    namespace.
+
+    In case of a single nesting level, defining \c RAPIDJSON_NAMESPACE
+    to a custom name (e.g. \c MyRapidJSON) is sufficient.  If multiple
+    levels are needed, both \ref RAPIDJSON_NAMESPACE_BEGIN and \ref
+    RAPIDJSON_NAMESPACE_END need to be defined as well:
+
+    \code
+    // in some .cpp file
+    #define RAPIDJSON_NAMESPACE my::rapidjson
+    #define RAPIDJSON_NAMESPACE_BEGIN namespace my { namespace rapidjson {
+    #define RAPIDJSON_NAMESPACE_END   } }
+    #include "rapidjson/..."
+    \endcode
+
+    \see rapidjson
+ */
+/*! \def RAPIDJSON_NAMESPACE_BEGIN
+    \ingroup RAPIDJSON_CONFIG
+    \brief   provide custom rapidjson namespace (opening expression)
+    \see RAPIDJSON_NAMESPACE
+*/
+/*! \def RAPIDJSON_NAMESPACE_END
+    \ingroup RAPIDJSON_CONFIG
+    \brief   provide custom rapidjson namespace (closing expression)
+    \see RAPIDJSON_NAMESPACE
+*/
+#ifndef RAPIDJSON_NAMESPACE
+#define RAPIDJSON_NAMESPACE rapidjson
+#endif
+#ifndef RAPIDJSON_NAMESPACE_BEGIN
+#define RAPIDJSON_NAMESPACE_BEGIN namespace RAPIDJSON_NAMESPACE {
+#endif
+#ifndef RAPIDJSON_NAMESPACE_END
+#define RAPIDJSON_NAMESPACE_END }
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_HAS_STDSTRING
+
+#ifndef RAPIDJSON_HAS_STDSTRING
+#ifdef RAPIDJSON_DOXYGEN_RUNNING
+#define RAPIDJSON_HAS_STDSTRING 1 // force generation of documentation
+#else
+#define RAPIDJSON_HAS_STDSTRING 0 // no std::string support by default
+#endif
+/*! \def RAPIDJSON_HAS_STDSTRING
+    \ingroup RAPIDJSON_CONFIG
+    \brief Enable RapidJSON support for \c std::string
+
+    By defining this preprocessor symbol to \c 1, several convenience functions for using
+    \ref rapidjson::GenericValue with \c std::string are enabled, especially
+    for construction and comparison.
+
+    \hideinitializer
+*/
+#endif // !defined(RAPIDJSON_HAS_STDSTRING)
+
+#if RAPIDJSON_HAS_STDSTRING
+#include <string>
+#endif // RAPIDJSON_HAS_STDSTRING
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_NO_INT64DEFINE
+
+/*! \def RAPIDJSON_NO_INT64DEFINE
+    \ingroup RAPIDJSON_CONFIG
+    \brief Use external 64-bit integer types.
+
+    RapidJSON requires the 64-bit integer types \c int64_t and  \c uint64_t types
+    to be available at global scope.
+
+    If users have their own definition, define RAPIDJSON_NO_INT64DEFINE to
+    prevent RapidJSON from defining its own types.
+*/
+#ifndef RAPIDJSON_NO_INT64DEFINE
+//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
+#if defined(_MSC_VER) && (_MSC_VER < 1800)	// Visual Studio 2013
+#include "msinttypes/stdint.h"
+#include "msinttypes/inttypes.h"
+#else
+// Other compilers should have this.
+#include <stdint.h>
+#include <inttypes.h>
+#endif
+//!@endcond
+#ifdef RAPIDJSON_DOXYGEN_RUNNING
+#define RAPIDJSON_NO_INT64DEFINE
+#endif
+#endif // RAPIDJSON_NO_INT64TYPEDEF
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_FORCEINLINE
+
+#ifndef RAPIDJSON_FORCEINLINE
+//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
+#if defined(_MSC_VER) && defined(NDEBUG)
+#define RAPIDJSON_FORCEINLINE __forceinline
+#elif defined(__GNUC__) && __GNUC__ >= 4 && defined(NDEBUG)
+#define RAPIDJSON_FORCEINLINE __attribute__((always_inline))
+#else
+#define RAPIDJSON_FORCEINLINE
+#endif
+//!@endcond
+#endif // RAPIDJSON_FORCEINLINE
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_ENDIAN
+#define RAPIDJSON_LITTLEENDIAN  0   //!< Little endian machine
+#define RAPIDJSON_BIGENDIAN     1   //!< Big endian machine
+
+//! Endianness of the machine.
+/*!
+    \def RAPIDJSON_ENDIAN
+    \ingroup RAPIDJSON_CONFIG
+
+    GCC 4.6 provided macro for detecting endianness of the target machine. But other
+    compilers may not have this. User can define RAPIDJSON_ENDIAN to either
+    \ref RAPIDJSON_LITTLEENDIAN or \ref RAPIDJSON_BIGENDIAN.
+
+    Default detection implemented with reference to
+    \li https://gcc.gnu.org/onlinedocs/gcc-4.6.0/cpp/Common-Predefined-Macros.html
+    \li http://www.boost.org/doc/libs/1_42_0/boost/detail/endian.hpp
+*/
+#ifndef RAPIDJSON_ENDIAN
+// Detect with GCC 4.6's macro
+#  ifdef __BYTE_ORDER__
+#    if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#      define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#    elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#      define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
+#    else
+#      error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.
+#    endif // __BYTE_ORDER__
+// Detect with GLIBC's endian.h
+#  elif defined(__GLIBC__)
+#    include <endian.h>
+#    if (__BYTE_ORDER == __LITTLE_ENDIAN)
+#      define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#    elif (__BYTE_ORDER == __BIG_ENDIAN)
+#      define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
+#    else
+#      error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.
+#   endif // __GLIBC__
+// Detect with _LITTLE_ENDIAN and _BIG_ENDIAN macro
+#  elif defined(_LITTLE_ENDIAN) && !defined(_BIG_ENDIAN)
+#    define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#  elif defined(_BIG_ENDIAN) && !defined(_LITTLE_ENDIAN)
+#    define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
+// Detect with architecture macros
+#  elif defined(__sparc) || defined(__sparc__) || defined(_POWER) || defined(__powerpc__) || defined(__ppc__) || defined(__hpux) || defined(__hppa) || defined(_MIPSEB) || defined(_POWER) || defined(__s390__)
+#    define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
+#  elif defined(__i386__) || defined(__alpha__) || defined(__ia64) || defined(__ia64__) || defined(_M_IX86) || defined(_M_IA64) || defined(_M_ALPHA) || defined(__amd64) || defined(__amd64__) || defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || defined(__bfin__)
+#    define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#  elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
+#    define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#  elif defined(RAPIDJSON_DOXYGEN_RUNNING)
+#    define RAPIDJSON_ENDIAN
+#  else
+#    error Unknown machine endianness detected. User needs to define RAPIDJSON_ENDIAN.   
+#  endif
+#endif // RAPIDJSON_ENDIAN
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_64BIT
+
+//! Whether using 64-bit architecture
+#ifndef RAPIDJSON_64BIT
+#if defined(__LP64__) || (defined(__x86_64__) && defined(__ILP32__)) || defined(_WIN64) || defined(__EMSCRIPTEN__)
+#define RAPIDJSON_64BIT 1
+#else
+#define RAPIDJSON_64BIT 0
+#endif
+#endif // RAPIDJSON_64BIT
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_ALIGN
+
+//! Data alignment of the machine.
+/*! \ingroup RAPIDJSON_CONFIG
+    \param x pointer to align
+
+    Some machines require strict data alignment. The default is 8 bytes.
+    User can customize by defining the RAPIDJSON_ALIGN function macro.
+*/
+#ifndef RAPIDJSON_ALIGN
+#define RAPIDJSON_ALIGN(x) (((x) + static_cast<size_t>(7u)) & ~static_cast<size_t>(7u))
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_UINT64_C2
+
+//! Construct a 64-bit literal by a pair of 32-bit integer.
+/*!
+    64-bit literal with or without ULL suffix is prone to compiler warnings.
+    UINT64_C() is C macro which cause compilation problems.
+    Use this macro to define 64-bit constants by a pair of 32-bit integer.
+*/
+#ifndef RAPIDJSON_UINT64_C2
+#define RAPIDJSON_UINT64_C2(high32, low32) ((static_cast<uint64_t>(high32) << 32) | static_cast<uint64_t>(low32))
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_48BITPOINTER_OPTIMIZATION
+
+//! Use only lower 48-bit address for some pointers.
+/*!
+    \ingroup RAPIDJSON_CONFIG
+
+    This optimization uses the fact that current X86-64 architecture only implement lower 48-bit virtual address.
+    The higher 16-bit can be used for storing other data.
+    \c GenericValue uses this optimization to reduce its size form 24 bytes to 16 bytes in 64-bit architecture.
+*/
+#ifndef RAPIDJSON_48BITPOINTER_OPTIMIZATION
+#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+#define RAPIDJSON_48BITPOINTER_OPTIMIZATION 1
+#else
+#define RAPIDJSON_48BITPOINTER_OPTIMIZATION 0
+#endif
+#endif // RAPIDJSON_48BITPOINTER_OPTIMIZATION
+
+#if RAPIDJSON_48BITPOINTER_OPTIMIZATION == 1
+#if RAPIDJSON_64BIT != 1
+#error RAPIDJSON_48BITPOINTER_OPTIMIZATION can only be set to 1 when RAPIDJSON_64BIT=1
+#endif
+#define RAPIDJSON_SETPOINTER(type, p, x) (p = reinterpret_cast<type *>((reinterpret_cast<uintptr_t>(p) & static_cast<uintptr_t>(RAPIDJSON_UINT64_C2(0xFFFF0000, 0x00000000))) | reinterpret_cast<uintptr_t>(reinterpret_cast<const void*>(x))))
+#define RAPIDJSON_GETPOINTER(type, p) (reinterpret_cast<type *>(reinterpret_cast<uintptr_t>(p) & static_cast<uintptr_t>(RAPIDJSON_UINT64_C2(0x0000FFFF, 0xFFFFFFFF))))
+#else
+#define RAPIDJSON_SETPOINTER(type, p, x) (p = (x))
+#define RAPIDJSON_GETPOINTER(type, p) (p)
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_NEON/RAPIDJSON_SIMD
+
+/*! \def RAPIDJSON_SIMD
+    \ingroup RAPIDJSON_CONFIG
+    \brief Enable SSE2/SSE4.2/Neon optimization.
+
+    RapidJSON supports optimized implementations for some parsing operations
+    based on the SSE2, SSE4.2 or NEon SIMD extensions on modern Intel
+    or ARM compatible processors.
+
+    To enable these optimizations, three different symbols can be defined;
+    \code
+    // Enable SSE2 optimization.
+    #define RAPIDJSON_SSE2
+
+    // Enable SSE4.2 optimization.
+    #define RAPIDJSON_SSE42
+    \endcode
+
+    // Enable ARM Neon optimization.
+    #define RAPIDJSON_NEON
+    \endcode
+
+    \c RAPIDJSON_SSE42 takes precedence over SSE2, if both are defined.
+
+    If any of these symbols is defined, RapidJSON defines the macro
+    \c RAPIDJSON_SIMD to indicate the availability of the optimized code.
+*/
+#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) \
+    || defined(RAPIDJSON_NEON) || defined(RAPIDJSON_DOXYGEN_RUNNING)
+#define RAPIDJSON_SIMD
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_NO_SIZETYPEDEFINE
+
+#ifndef RAPIDJSON_NO_SIZETYPEDEFINE
+/*! \def RAPIDJSON_NO_SIZETYPEDEFINE
+    \ingroup RAPIDJSON_CONFIG
+    \brief User-provided \c SizeType definition.
+
+    In order to avoid using 32-bit size types for indexing strings and arrays,
+    define this preprocessor symbol and provide the type rapidjson::SizeType
+    before including RapidJSON:
+    \code
+    #define RAPIDJSON_NO_SIZETYPEDEFINE
+    namespace rapidjson { typedef ::std::size_t SizeType; }
+    #include "rapidjson/..."
+    \endcode
+
+    \see rapidjson::SizeType
+*/
+#ifdef RAPIDJSON_DOXYGEN_RUNNING
+#define RAPIDJSON_NO_SIZETYPEDEFINE
+#endif
+RAPIDJSON_NAMESPACE_BEGIN
+//! Size type (for string lengths, array sizes, etc.)
+/*! RapidJSON uses 32-bit array/string indices even on 64-bit platforms,
+    instead of using \c size_t. Users may override the SizeType by defining
+    \ref RAPIDJSON_NO_SIZETYPEDEFINE.
+*/
+typedef unsigned SizeType;
+RAPIDJSON_NAMESPACE_END
+#endif
+
+// always import std::size_t to rapidjson namespace
+RAPIDJSON_NAMESPACE_BEGIN
+using std::size_t;
+RAPIDJSON_NAMESPACE_END
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_ASSERT
+
+//! Assertion.
+/*! \ingroup RAPIDJSON_CONFIG
+    By default, rapidjson uses C \c assert() for internal assertions.
+    User can override it by defining RAPIDJSON_ASSERT(x) macro.
+
+    \note Parsing errors are handled and can be customized by the
+          \ref RAPIDJSON_ERRORS APIs.
+*/
+#ifndef RAPIDJSON_ASSERT
+#include <cassert>
+#define RAPIDJSON_ASSERT(x) assert(x)
+#endif // RAPIDJSON_ASSERT
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_STATIC_ASSERT
+
+// Prefer C++11 static_assert, if available
+#ifndef RAPIDJSON_STATIC_ASSERT
+#if __cplusplus >= 201103L || ( defined(_MSC_VER) && _MSC_VER >= 1800 )
+#define RAPIDJSON_STATIC_ASSERT(x) \
+   static_assert(x, RAPIDJSON_STRINGIFY(x))
+#endif // C++11
+#endif // RAPIDJSON_STATIC_ASSERT
+
+// Adopt C++03 implementation from boost
+#ifndef RAPIDJSON_STATIC_ASSERT
+#ifndef __clang__
+//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
+#endif
+RAPIDJSON_NAMESPACE_BEGIN
+template <bool x> struct STATIC_ASSERTION_FAILURE;
+template <> struct STATIC_ASSERTION_FAILURE<true> { enum { value = 1 }; };
+template <size_t x> struct StaticAssertTest {};
+RAPIDJSON_NAMESPACE_END
+
+#if defined(__GNUC__) || defined(__clang__)
+#define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE __attribute__((unused))
+#else
+#define RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE 
+#endif
+#ifndef __clang__
+//!@endcond
+#endif
+
+/*! \def RAPIDJSON_STATIC_ASSERT
+    \brief (Internal) macro to check for conditions at compile-time
+    \param x compile-time condition
+    \hideinitializer
+ */
+#define RAPIDJSON_STATIC_ASSERT(x) \
+    typedef ::RAPIDJSON_NAMESPACE::StaticAssertTest< \
+      sizeof(::RAPIDJSON_NAMESPACE::STATIC_ASSERTION_FAILURE<bool(x) >)> \
+    RAPIDJSON_JOIN(StaticAssertTypedef, __LINE__) RAPIDJSON_STATIC_ASSERT_UNUSED_ATTRIBUTE
+#endif // RAPIDJSON_STATIC_ASSERT
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_LIKELY, RAPIDJSON_UNLIKELY
+
+//! Compiler branching hint for expression with high probability to be true.
+/*!
+    \ingroup RAPIDJSON_CONFIG
+    \param x Boolean expression likely to be true.
+*/
+#ifndef RAPIDJSON_LIKELY
+#if defined(__GNUC__) || defined(__clang__)
+#define RAPIDJSON_LIKELY(x) __builtin_expect(!!(x), 1)
+#else
+#define RAPIDJSON_LIKELY(x) (x)
+#endif
+#endif
+
+//! Compiler branching hint for expression with low probability to be true.
+/*!
+    \ingroup RAPIDJSON_CONFIG
+    \param x Boolean expression unlikely to be true.
+*/
+#ifndef RAPIDJSON_UNLIKELY
+#if defined(__GNUC__) || defined(__clang__)
+#define RAPIDJSON_UNLIKELY(x) __builtin_expect(!!(x), 0)
+#else
+#define RAPIDJSON_UNLIKELY(x) (x)
+#endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// Helpers
+
+//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
+
+#define RAPIDJSON_MULTILINEMACRO_BEGIN do {  
+#define RAPIDJSON_MULTILINEMACRO_END \
+} while((void)0, 0)
+
+// adopted from Boost
+#define RAPIDJSON_VERSION_CODE(x,y,z) \
+  (((x)*100000) + ((y)*100) + (z))
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_DIAG_PUSH/POP, RAPIDJSON_DIAG_OFF
+
+#if defined(__GNUC__)
+#define RAPIDJSON_GNUC \
+    RAPIDJSON_VERSION_CODE(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__)
+#endif
+
+#if defined(__clang__) || (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,2,0))
+
+#define RAPIDJSON_PRAGMA(x) _Pragma(RAPIDJSON_STRINGIFY(x))
+#define RAPIDJSON_DIAG_PRAGMA(x) RAPIDJSON_PRAGMA(GCC diagnostic x)
+#define RAPIDJSON_DIAG_OFF(x) \
+    RAPIDJSON_DIAG_PRAGMA(ignored RAPIDJSON_STRINGIFY(RAPIDJSON_JOIN(-W,x)))
+
+// push/pop support in Clang and GCC>=4.6
+#if defined(__clang__) || (defined(RAPIDJSON_GNUC) && RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0))
+#define RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PRAGMA(push)
+#define RAPIDJSON_DIAG_POP  RAPIDJSON_DIAG_PRAGMA(pop)
+#else // GCC >= 4.2, < 4.6
+#define RAPIDJSON_DIAG_PUSH /* ignored */
+#define RAPIDJSON_DIAG_POP /* ignored */
+#endif
+
+#elif defined(_MSC_VER)
+
+// pragma (MSVC specific)
+#define RAPIDJSON_PRAGMA(x) __pragma(x)
+#define RAPIDJSON_DIAG_PRAGMA(x) RAPIDJSON_PRAGMA(warning(x))
+
+#define RAPIDJSON_DIAG_OFF(x) RAPIDJSON_DIAG_PRAGMA(disable: x)
+#define RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_PRAGMA(push)
+#define RAPIDJSON_DIAG_POP  RAPIDJSON_DIAG_PRAGMA(pop)
+
+#else
+
+#define RAPIDJSON_DIAG_OFF(x) /* ignored */
+#define RAPIDJSON_DIAG_PUSH   /* ignored */
+#define RAPIDJSON_DIAG_POP    /* ignored */
+
+#endif // RAPIDJSON_DIAG_*
+
+///////////////////////////////////////////////////////////////////////////////
+// C++11 features
+
+#ifndef RAPIDJSON_HAS_CXX11_RVALUE_REFS
+#if defined(__clang__)
+#if __has_feature(cxx_rvalue_references) && \
+    (defined(_MSC_VER) || defined(_LIBCPP_VERSION) || defined(__GLIBCXX__) && __GLIBCXX__ >= 20080306)
+#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
+#else
+#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 0
+#endif
+#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,3,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
+      (defined(_MSC_VER) && _MSC_VER >= 1600) || \
+      (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x5140 && defined(__GXX_EXPERIMENTAL_CXX0X__))
+
+#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 1
+#else
+#define RAPIDJSON_HAS_CXX11_RVALUE_REFS 0
+#endif
+#endif // RAPIDJSON_HAS_CXX11_RVALUE_REFS
+
+#ifndef RAPIDJSON_HAS_CXX11_NOEXCEPT
+#if defined(__clang__)
+#define RAPIDJSON_HAS_CXX11_NOEXCEPT __has_feature(cxx_noexcept)
+#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
+    (defined(_MSC_VER) && _MSC_VER >= 1900) || \
+    (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x5140 && defined(__GXX_EXPERIMENTAL_CXX0X__))
+#define RAPIDJSON_HAS_CXX11_NOEXCEPT 1
+#else
+#define RAPIDJSON_HAS_CXX11_NOEXCEPT 0
+#endif
+#endif
+#if RAPIDJSON_HAS_CXX11_NOEXCEPT
+#define RAPIDJSON_NOEXCEPT noexcept
+#else
+#define RAPIDJSON_NOEXCEPT /* noexcept */
+#endif // RAPIDJSON_HAS_CXX11_NOEXCEPT
+
+// no automatic detection, yet
+#ifndef RAPIDJSON_HAS_CXX11_TYPETRAITS
+#if (defined(_MSC_VER) && _MSC_VER >= 1700)
+#define RAPIDJSON_HAS_CXX11_TYPETRAITS 1
+#else
+#define RAPIDJSON_HAS_CXX11_TYPETRAITS 0
+#endif
+#endif
+
+#ifndef RAPIDJSON_HAS_CXX11_RANGE_FOR
+#if defined(__clang__)
+#define RAPIDJSON_HAS_CXX11_RANGE_FOR __has_feature(cxx_range_for)
+#elif (defined(RAPIDJSON_GNUC) && (RAPIDJSON_GNUC >= RAPIDJSON_VERSION_CODE(4,6,0)) && defined(__GXX_EXPERIMENTAL_CXX0X__)) || \
+      (defined(_MSC_VER) && _MSC_VER >= 1700) || \
+      (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x5140 && defined(__GXX_EXPERIMENTAL_CXX0X__))
+#define RAPIDJSON_HAS_CXX11_RANGE_FOR 1
+#else
+#define RAPIDJSON_HAS_CXX11_RANGE_FOR 0
+#endif
+#endif // RAPIDJSON_HAS_CXX11_RANGE_FOR
+
+//!@endcond
+
+//! Assertion (in non-throwing contexts).
+ /*! \ingroup RAPIDJSON_CONFIG
+    Some functions provide a \c noexcept guarantee, if the compiler supports it.
+    In these cases, the \ref RAPIDJSON_ASSERT macro cannot be overridden to
+    throw an exception.  This macro adds a separate customization point for
+    such cases.
+
+    Defaults to C \c assert() (as \ref RAPIDJSON_ASSERT), if \c noexcept is
+    supported, and to \ref RAPIDJSON_ASSERT otherwise.
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_NOEXCEPT_ASSERT
+
+#ifdef RAPIDJSON_ASSERT_THROWS
+#if RAPIDJSON_HAS_CXX11_NOEXCEPT
+#define RAPIDJSON_NOEXCEPT_ASSERT(x)
+#else
+#define RAPIDJSON_NOEXCEPT_ASSERT(x) RAPIDJSON_ASSERT(x)
+#endif // RAPIDJSON_HAS_CXX11_NOEXCEPT
+#else
+#define RAPIDJSON_NOEXCEPT_ASSERT(x) RAPIDJSON_ASSERT(x)
+#endif // RAPIDJSON_ASSERT_THROWS
+
+///////////////////////////////////////////////////////////////////////////////
+// new/delete
+
+#ifndef RAPIDJSON_NEW
+///! customization point for global \c new
+#define RAPIDJSON_NEW(TypeName) new TypeName
+#endif
+#ifndef RAPIDJSON_DELETE
+///! customization point for global \c delete
+#define RAPIDJSON_DELETE(x) delete x
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// Type
+
+/*! \namespace rapidjson
+    \brief main RapidJSON namespace
+    \see RAPIDJSON_NAMESPACE
+*/
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Type of JSON value
+enum Type {
+    kNullType = 0,      //!< null
+    kFalseType = 1,     //!< false
+    kTrueType = 2,      //!< true
+    kObjectType = 3,    //!< object
+    kArrayType = 4,     //!< array 
+    kStringType = 5,    //!< string
+    kNumberType = 6     //!< number
+};
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_RAPIDJSON_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/reader.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/reader.h
new file mode 100644
index 000000000..44a6bcd30
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/reader.h
@@ -0,0 +1,2230 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_READER_H_
+#define RAPIDJSON_READER_H_
+
+/*! \file reader.h */
+
+#include "allocators.h"
+#include "stream.h"
+#include "encodedstream.h"
+#include "internal/meta.h"
+#include "internal/stack.h"
+#include "internal/strtod.h"
+#include <limits>
+
+#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
+#include <intrin.h>
+#pragma intrinsic(_BitScanForward)
+#endif
+#ifdef RAPIDJSON_SSE42
+#include <nmmintrin.h>
+#elif defined(RAPIDJSON_SSE2)
+#include <emmintrin.h>
+#elif defined(RAPIDJSON_NEON)
+#include <arm_neon.h>
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(old-style-cast)
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(switch-enum)
+#elif defined(_MSC_VER)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4127)  // conditional expression is constant
+RAPIDJSON_DIAG_OFF(4702)  // unreachable code
+#endif
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
+//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
+#define RAPIDJSON_NOTHING /* deliberately empty */
+#ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
+#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
+    RAPIDJSON_MULTILINEMACRO_BEGIN \
+    if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \
+    RAPIDJSON_MULTILINEMACRO_END
+#endif
+#define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
+    RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
+//!@endcond
+
+/*! \def RAPIDJSON_PARSE_ERROR_NORETURN
+    \ingroup RAPIDJSON_ERRORS
+    \brief Macro to indicate a parse error.
+    \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
+    \param offset  position of the error in JSON input (\c size_t)
+
+    This macros can be used as a customization point for the internal
+    error handling mechanism of RapidJSON.
+
+    A common usage model is to throw an exception instead of requiring the
+    caller to explicitly check the \ref rapidjson::GenericReader::Parse's
+    return value:
+
+    \code
+    #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
+       throw ParseException(parseErrorCode, #parseErrorCode, offset)
+
+    #include <stdexcept>               // std::runtime_error
+    #include "rapidjson/error/error.h" // rapidjson::ParseResult
+
+    struct ParseException : std::runtime_error, rapidjson::ParseResult {
+      ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
+        : std::runtime_error(msg), ParseResult(code, offset) {}
+    };
+
+    #include "rapidjson/reader.h"
+    \endcode
+
+    \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
+ */
+#ifndef RAPIDJSON_PARSE_ERROR_NORETURN
+#define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
+    RAPIDJSON_MULTILINEMACRO_BEGIN \
+    RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
+    SetParseError(parseErrorCode, offset); \
+    RAPIDJSON_MULTILINEMACRO_END
+#endif
+
+/*! \def RAPIDJSON_PARSE_ERROR
+    \ingroup RAPIDJSON_ERRORS
+    \brief (Internal) macro to indicate and handle a parse error.
+    \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
+    \param offset  position of the error in JSON input (\c size_t)
+
+    Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
+
+    \see RAPIDJSON_PARSE_ERROR_NORETURN
+    \hideinitializer
+ */
+#ifndef RAPIDJSON_PARSE_ERROR
+#define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
+    RAPIDJSON_MULTILINEMACRO_BEGIN \
+    RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
+    RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
+    RAPIDJSON_MULTILINEMACRO_END
+#endif
+
+#include "error/error.h" // ParseErrorCode, ParseResult
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////
+// ParseFlag
+
+/*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
+    \ingroup RAPIDJSON_CONFIG
+    \brief User-defined kParseDefaultFlags definition.
+
+    User can define this as any \c ParseFlag combinations.
+*/
+#ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
+#define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
+#endif
+
+//! Combination of parseFlags
+/*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
+ */
+enum ParseFlag {
+    kParseNoFlags = 0,              //!< No flags are set.
+    kParseInsituFlag = 1,           //!< In-situ(destructive) parsing.
+    kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
+    kParseIterativeFlag = 4,        //!< Iterative(constant complexity in terms of function call stack size) parsing.
+    kParseStopWhenDoneFlag = 8,     //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
+    kParseFullPrecisionFlag = 16,   //!< Parse number in full precision (but slower).
+    kParseCommentsFlag = 32,        //!< Allow one-line (//) and multi-line (/**/) comments.
+    kParseNumbersAsStringsFlag = 64,    //!< Parse all numbers (ints/doubles) as strings.
+    kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays.
+    kParseNanAndInfFlag = 256,      //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles.
+    kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS  //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Handler
+
+/*! \class rapidjson::Handler
+    \brief Concept for receiving events from GenericReader upon parsing.
+    The functions return true if no error occurs. If they return false,
+    the event publisher should terminate the process.
+\code
+concept Handler {
+    typename Ch;
+
+    bool Null();
+    bool Bool(bool b);
+    bool Int(int i);
+    bool Uint(unsigned i);
+    bool Int64(int64_t i);
+    bool Uint64(uint64_t i);
+    bool Double(double d);
+    /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
+    bool RawNumber(const Ch* str, SizeType length, bool copy);
+    bool String(const Ch* str, SizeType length, bool copy);
+    bool StartObject();
+    bool Key(const Ch* str, SizeType length, bool copy);
+    bool EndObject(SizeType memberCount);
+    bool StartArray();
+    bool EndArray(SizeType elementCount);
+};
+\endcode
+*/
+///////////////////////////////////////////////////////////////////////////////
+// BaseReaderHandler
+
+//! Default implementation of Handler.
+/*! This can be used as base class of any reader handler.
+    \note implements Handler concept
+*/
+template<typename Encoding = UTF8<>, typename Derived = void>
+struct BaseReaderHandler {
+    typedef typename Encoding::Ch Ch;
+
+    typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
+
+    bool Default() { return true; }
+    bool Null() { return static_cast<Override&>(*this).Default(); }
+    bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
+    bool Int(int) { return static_cast<Override&>(*this).Default(); }
+    bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
+    bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
+    bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
+    bool Double(double) { return static_cast<Override&>(*this).Default(); }
+    /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length)
+    bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
+    bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
+    bool StartObject() { return static_cast<Override&>(*this).Default(); }
+    bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
+    bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
+    bool StartArray() { return static_cast<Override&>(*this).Default(); }
+    bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// StreamLocalCopy
+
+namespace internal {
+
+template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
+class StreamLocalCopy;
+
+//! Do copy optimization.
+template<typename Stream>
+class StreamLocalCopy<Stream, 1> {
+public:
+    StreamLocalCopy(Stream& original) : s(original), original_(original) {}
+    ~StreamLocalCopy() { original_ = s; }
+
+    Stream s;
+
+private:
+    StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
+
+    Stream& original_;
+};
+
+//! Keep reference.
+template<typename Stream>
+class StreamLocalCopy<Stream, 0> {
+public:
+    StreamLocalCopy(Stream& original) : s(original) {}
+
+    Stream& s;
+
+private:
+    StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
+};
+
+} // namespace internal
+
+///////////////////////////////////////////////////////////////////////////////
+// SkipWhitespace
+
+//! Skip the JSON white spaces in a stream.
+/*! \param is A input stream for skipping white spaces.
+    \note This function has SSE2/SSE4.2 specialization.
+*/
+template<typename InputStream>
+void SkipWhitespace(InputStream& is) {
+    internal::StreamLocalCopy<InputStream> copy(is);
+    InputStream& s(copy.s);
+
+    typename InputStream::Ch c;
+    while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t')
+        s.Take();
+}
+
+inline const char* SkipWhitespace(const char* p, const char* end) {
+    while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+        ++p;
+    return p;
+}
+
+#ifdef RAPIDJSON_SSE42
+//! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
+inline const char *SkipWhitespace_SIMD(const char* p) {
+    // Fast return for single non-whitespace
+    if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+        ++p;
+    else
+        return p;
+
+    // 16-byte align to the next boundary
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+    while (p != nextAligned)
+        if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+            ++p;
+        else
+            return p;
+
+    // The rest of string using SIMD
+    static const char whitespace[16] = " \n\r\t";
+    const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
+
+    for (;; p += 16) {
+        const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+        const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
+        if (r != 16)    // some of characters is non-whitespace
+            return p + r;
+    }
+}
+
+inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
+    // Fast return for single non-whitespace
+    if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+        ++p;
+    else
+        return p;
+
+    // The middle of string using SIMD
+    static const char whitespace[16] = " \n\r\t";
+    const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0]));
+
+    for (; p <= end - 16; p += 16) {
+        const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
+        const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY);
+        if (r != 16)    // some of characters is non-whitespace
+            return p + r;
+    }
+
+    return SkipWhitespace(p, end);
+}
+
+#elif defined(RAPIDJSON_SSE2)
+
+//! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
+inline const char *SkipWhitespace_SIMD(const char* p) {
+    // Fast return for single non-whitespace
+    if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+        ++p;
+    else
+        return p;
+
+    // 16-byte align to the next boundary
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+    while (p != nextAligned)
+        if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+            ++p;
+        else
+            return p;
+
+    // The rest of string
+    #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
+    static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
+    #undef C16
+
+    const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
+    const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
+    const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
+    const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
+
+    for (;; p += 16) {
+        const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+        __m128i x = _mm_cmpeq_epi8(s, w0);
+        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
+        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
+        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
+        unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
+        if (r != 0) {   // some of characters may be non-whitespace
+#ifdef _MSC_VER         // Find the index of first non-whitespace
+            unsigned long offset;
+            _BitScanForward(&offset, r);
+            return p + offset;
+#else
+            return p + __builtin_ffs(r) - 1;
+#endif
+        }
+    }
+}
+
+inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
+    // Fast return for single non-whitespace
+    if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+        ++p;
+    else
+        return p;
+
+    // The rest of string
+    #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c }
+    static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') };
+    #undef C16
+
+    const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0]));
+    const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0]));
+    const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0]));
+    const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0]));
+
+    for (; p <= end - 16; p += 16) {
+        const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
+        __m128i x = _mm_cmpeq_epi8(s, w0);
+        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
+        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
+        x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
+        unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x));
+        if (r != 0) {   // some of characters may be non-whitespace
+#ifdef _MSC_VER         // Find the index of first non-whitespace
+            unsigned long offset;
+            _BitScanForward(&offset, r);
+            return p + offset;
+#else
+            return p + __builtin_ffs(r) - 1;
+#endif
+        }
+    }
+
+    return SkipWhitespace(p, end);
+}
+
+#elif defined(RAPIDJSON_NEON)
+
+//! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once.
+inline const char *SkipWhitespace_SIMD(const char* p) {
+    // Fast return for single non-whitespace
+    if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+        ++p;
+    else
+        return p;
+
+    // 16-byte align to the next boundary
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+    while (p != nextAligned)
+        if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
+            ++p;
+        else
+            return p;
+
+    const uint8x16_t w0 = vmovq_n_u8(' ');
+    const uint8x16_t w1 = vmovq_n_u8('\n');
+    const uint8x16_t w2 = vmovq_n_u8('\r');
+    const uint8x16_t w3 = vmovq_n_u8('\t');
+
+    for (;; p += 16) {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+        uint8x16_t x = vceqq_u8(s, w0);
+        x = vorrq_u8(x, vceqq_u8(s, w1));
+        x = vorrq_u8(x, vceqq_u8(s, w2));
+        x = vorrq_u8(x, vceqq_u8(s, w3));
+
+        x = vmvnq_u8(x);                       // Negate
+        x = vrev64q_u8(x);                     // Rev in 64
+        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+        if (low == 0) {
+            if (high != 0) {
+                int lz =__builtin_clzll(high);;
+                return p + 8 + (lz >> 3);
+            }
+        } else {
+            int lz = __builtin_clzll(low);;
+            return p + (lz >> 3);
+        }
+    }
+}
+
+inline const char *SkipWhitespace_SIMD(const char* p, const char* end) {
+    // Fast return for single non-whitespace
+    if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t'))
+        ++p;
+    else
+        return p;
+
+    const uint8x16_t w0 = vmovq_n_u8(' ');
+    const uint8x16_t w1 = vmovq_n_u8('\n');
+    const uint8x16_t w2 = vmovq_n_u8('\r');
+    const uint8x16_t w3 = vmovq_n_u8('\t');
+
+    for (; p <= end - 16; p += 16) {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+        uint8x16_t x = vceqq_u8(s, w0);
+        x = vorrq_u8(x, vceqq_u8(s, w1));
+        x = vorrq_u8(x, vceqq_u8(s, w2));
+        x = vorrq_u8(x, vceqq_u8(s, w3));
+
+        x = vmvnq_u8(x);                       // Negate
+        x = vrev64q_u8(x);                     // Rev in 64
+        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+        if (low == 0) {
+            if (high != 0) {
+                int lz = __builtin_clzll(high);
+                return p + 8 + (lz >> 3);
+            }
+        } else {
+            int lz = __builtin_clzll(low);
+            return p + (lz >> 3);
+        }
+    }
+
+    return SkipWhitespace(p, end);
+}
+
+#endif // RAPIDJSON_NEON
+
+#ifdef RAPIDJSON_SIMD
+//! Template function specialization for InsituStringStream
+template<> inline void SkipWhitespace(InsituStringStream& is) {
+    is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
+}
+
+//! Template function specialization for StringStream
+template<> inline void SkipWhitespace(StringStream& is) {
+    is.src_ = SkipWhitespace_SIMD(is.src_);
+}
+
+template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) {
+    is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_);
+}
+#endif // RAPIDJSON_SIMD
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericReader
+
+//! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
+/*! GenericReader parses JSON text from a stream, and send events synchronously to an
+    object implementing Handler concept.
+
+    It needs to allocate a stack for storing a single decoded string during
+    non-destructive parsing.
+
+    For in-situ parsing, the decoded string is directly written to the source
+    text string, no temporary buffer is required.
+
+    A GenericReader object can be reused for parsing multiple JSON text.
+
+    \tparam SourceEncoding Encoding of the input stream.
+    \tparam TargetEncoding Encoding of the parse output.
+    \tparam StackAllocator Allocator type for stack.
+*/
+template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
+class GenericReader {
+public:
+    typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
+
+    //! Constructor.
+    /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
+        \param stackCapacity stack capacity in bytes for storing a single decoded string.  (Only use for non-destructive parsing)
+    */
+    GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) :
+        stack_(stackAllocator, stackCapacity), parseResult_(), state_(IterativeParsingStartState) {}
+
+    //! Parse JSON text.
+    /*! \tparam parseFlags Combination of \ref ParseFlag.
+        \tparam InputStream Type of input stream, implementing Stream concept.
+        \tparam Handler Type of handler, implementing Handler concept.
+        \param is Input stream to be parsed.
+        \param handler The handler to receive events.
+        \return Whether the parsing is successful.
+    */
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    ParseResult Parse(InputStream& is, Handler& handler) {
+        if (parseFlags & kParseIterativeFlag)
+            return IterativeParse<parseFlags>(is, handler);
+
+        parseResult_.Clear();
+
+        ClearStackOnExit scope(*this);
+
+        SkipWhitespaceAndComments<parseFlags>(is);
+        RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
+
+        if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) {
+            RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
+        }
+        else {
+            ParseValue<parseFlags>(is, handler);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
+
+            if (!(parseFlags & kParseStopWhenDoneFlag)) {
+                SkipWhitespaceAndComments<parseFlags>(is);
+                RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
+
+                if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) {
+                    RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
+                    RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
+                }
+            }
+        }
+
+        return parseResult_;
+    }
+
+    //! Parse JSON text (with \ref kParseDefaultFlags)
+    /*! \tparam InputStream Type of input stream, implementing Stream concept
+        \tparam Handler Type of handler, implementing Handler concept.
+        \param is Input stream to be parsed.
+        \param handler The handler to receive events.
+        \return Whether the parsing is successful.
+    */
+    template <typename InputStream, typename Handler>
+    ParseResult Parse(InputStream& is, Handler& handler) {
+        return Parse<kParseDefaultFlags>(is, handler);
+    }
+
+    //! Initialize JSON text token-by-token parsing
+    /*!
+     */
+    void IterativeParseInit() {
+        parseResult_.Clear();
+        state_ = IterativeParsingStartState;
+    }
+
+    //! Parse one token from JSON text
+    /*! \tparam InputStream Type of input stream, implementing Stream concept
+        \tparam Handler Type of handler, implementing Handler concept.
+        \param is Input stream to be parsed.
+        \param handler The handler to receive events.
+        \return Whether the parsing is successful.
+     */
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    bool IterativeParseNext(InputStream& is, Handler& handler) {
+        while (RAPIDJSON_LIKELY(is.Peek() != '\0')) {
+            SkipWhitespaceAndComments<parseFlags>(is);
+
+            Token t = Tokenize(is.Peek());
+            IterativeParsingState n = Predict(state_, t);
+            IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler);
+
+            // If we've finished or hit an error...
+            if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) {
+                // Report errors.
+                if (d == IterativeParsingErrorState) {
+                    HandleError(state_, is);
+                    return false;
+                }
+
+                // Transition to the finish state.
+                RAPIDJSON_ASSERT(d == IterativeParsingFinishState);
+                state_ = d;
+
+                // If StopWhenDone is not set...
+                if (!(parseFlags & kParseStopWhenDoneFlag)) {
+                    // ... and extra non-whitespace data is found...
+                    SkipWhitespaceAndComments<parseFlags>(is);
+                    if (is.Peek() != '\0') {
+                        // ... this is considered an error.
+                        HandleError(state_, is);
+                        return false;
+                    }
+                }
+
+                // Success! We are done!
+                return true;
+            }
+
+            // Transition to the new state.
+            state_ = d;
+
+            // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now.
+            if (!IsIterativeParsingDelimiterState(n))
+                return true;
+        }
+
+        // We reached the end of file.
+        stack_.Clear();
+
+        if (state_ != IterativeParsingFinishState) {
+            HandleError(state_, is);
+            return false;
+        }
+
+        return true;
+    }
+
+    //! Check if token-by-token parsing JSON text is complete
+    /*! \return Whether the JSON has been fully decoded.
+     */
+    RAPIDJSON_FORCEINLINE bool IterativeParseComplete() const {
+        return IsIterativeParsingCompleteState(state_);
+    }
+
+    //! Whether a parse error has occurred in the last parsing.
+    bool HasParseError() const { return parseResult_.IsError(); }
+
+    //! Get the \ref ParseErrorCode of last parsing.
+    ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
+
+    //! Get the position of last parsing error in input, 0 otherwise.
+    size_t GetErrorOffset() const { return parseResult_.Offset(); }
+
+protected:
+    void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
+
+private:
+    // Prohibit copy constructor & assignment operator.
+    GenericReader(const GenericReader&);
+    GenericReader& operator=(const GenericReader&);
+
+    void ClearStack() { stack_.Clear(); }
+
+    // clear stack on any exit from ParseStream, e.g. due to exception
+    struct ClearStackOnExit {
+        explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
+        ~ClearStackOnExit() { r_.ClearStack(); }
+    private:
+        GenericReader& r_;
+        ClearStackOnExit(const ClearStackOnExit&);
+        ClearStackOnExit& operator=(const ClearStackOnExit&);
+    };
+
+    template<unsigned parseFlags, typename InputStream>
+    void SkipWhitespaceAndComments(InputStream& is) {
+        SkipWhitespace(is);
+
+        if (parseFlags & kParseCommentsFlag) {
+            while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) {
+                if (Consume(is, '*')) {
+                    while (true) {
+                        if (RAPIDJSON_UNLIKELY(is.Peek() == '\0'))
+                            RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
+                        else if (Consume(is, '*')) {
+                            if (Consume(is, '/'))
+                                break;
+                        }
+                        else
+                            is.Take();
+                    }
+                }
+                else if (RAPIDJSON_LIKELY(Consume(is, '/')))
+                    while (is.Peek() != '\0' && is.Take() != '\n') {}
+                else
+                    RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
+
+                SkipWhitespace(is);
+            }
+        }
+    }
+
+    // Parse object: { string : value, ... }
+    template<unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseObject(InputStream& is, Handler& handler) {
+        RAPIDJSON_ASSERT(is.Peek() == '{');
+        is.Take();  // Skip '{'
+
+        if (RAPIDJSON_UNLIKELY(!handler.StartObject()))
+            RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+
+        SkipWhitespaceAndComments<parseFlags>(is);
+        RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+
+        if (Consume(is, '}')) {
+            if (RAPIDJSON_UNLIKELY(!handler.EndObject(0)))  // empty object
+                RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+            return;
+        }
+
+        for (SizeType memberCount = 0;;) {
+            if (RAPIDJSON_UNLIKELY(is.Peek() != '"'))
+                RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
+
+            ParseString<parseFlags>(is, handler, true);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+
+            SkipWhitespaceAndComments<parseFlags>(is);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+
+            if (RAPIDJSON_UNLIKELY(!Consume(is, ':')))
+                RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
+
+            SkipWhitespaceAndComments<parseFlags>(is);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+
+            ParseValue<parseFlags>(is, handler);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+
+            SkipWhitespaceAndComments<parseFlags>(is);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+
+            ++memberCount;
+
+            switch (is.Peek()) {
+                case ',':
+                    is.Take();
+                    SkipWhitespaceAndComments<parseFlags>(is);
+                    RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+                    break;
+                case '}':
+                    is.Take();
+                    if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
+                        RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+                    return;
+                default:
+                    RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy
+            }
+
+            if (parseFlags & kParseTrailingCommasFlag) {
+                if (is.Peek() == '}') {
+                    if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount)))
+                        RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+                    is.Take();
+                    return;
+                }
+            }
+        }
+    }
+
+    // Parse array: [ value, ... ]
+    template<unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseArray(InputStream& is, Handler& handler) {
+        RAPIDJSON_ASSERT(is.Peek() == '[');
+        is.Take();  // Skip '['
+
+        if (RAPIDJSON_UNLIKELY(!handler.StartArray()))
+            RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+
+        SkipWhitespaceAndComments<parseFlags>(is);
+        RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+
+        if (Consume(is, ']')) {
+            if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array
+                RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+            return;
+        }
+
+        for (SizeType elementCount = 0;;) {
+            ParseValue<parseFlags>(is, handler);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+
+            ++elementCount;
+            SkipWhitespaceAndComments<parseFlags>(is);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+
+            if (Consume(is, ',')) {
+                SkipWhitespaceAndComments<parseFlags>(is);
+                RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+            }
+            else if (Consume(is, ']')) {
+                if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
+                    RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+                return;
+            }
+            else
+                RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
+
+            if (parseFlags & kParseTrailingCommasFlag) {
+                if (is.Peek() == ']') {
+                    if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount)))
+                        RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+                    is.Take();
+                    return;
+                }
+            }
+        }
+    }
+
+    template<unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseNull(InputStream& is, Handler& handler) {
+        RAPIDJSON_ASSERT(is.Peek() == 'n');
+        is.Take();
+
+        if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) {
+            if (RAPIDJSON_UNLIKELY(!handler.Null()))
+                RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+        }
+        else
+            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
+    }
+
+    template<unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseTrue(InputStream& is, Handler& handler) {
+        RAPIDJSON_ASSERT(is.Peek() == 't');
+        is.Take();
+
+        if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) {
+            if (RAPIDJSON_UNLIKELY(!handler.Bool(true)))
+                RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+        }
+        else
+            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
+    }
+
+    template<unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseFalse(InputStream& is, Handler& handler) {
+        RAPIDJSON_ASSERT(is.Peek() == 'f');
+        is.Take();
+
+        if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) {
+            if (RAPIDJSON_UNLIKELY(!handler.Bool(false)))
+                RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
+        }
+        else
+            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell());
+    }
+
+    template<typename InputStream>
+    RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) {
+        if (RAPIDJSON_LIKELY(is.Peek() == expect)) {
+            is.Take();
+            return true;
+        }
+        else
+            return false;
+    }
+
+    // Helper function to parse four hexadecimal digits in \uXXXX in ParseString().
+    template<typename InputStream>
+    unsigned ParseHex4(InputStream& is, size_t escapeOffset) {
+        unsigned codepoint = 0;
+        for (int i = 0; i < 4; i++) {
+            Ch c = is.Peek();
+            codepoint <<= 4;
+            codepoint += static_cast<unsigned>(c);
+            if (c >= '0' && c <= '9')
+                codepoint -= '0';
+            else if (c >= 'A' && c <= 'F')
+                codepoint -= 'A' - 10;
+            else if (c >= 'a' && c <= 'f')
+                codepoint -= 'a' - 10;
+            else {
+                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset);
+                RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
+            }
+            is.Take();
+        }
+        return codepoint;
+    }
+
+    template <typename CharType>
+    class StackStream {
+    public:
+        typedef CharType Ch;
+
+        StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
+        RAPIDJSON_FORCEINLINE void Put(Ch c) {
+            *stack_.template Push<Ch>() = c;
+            ++length_;
+        }
+
+        RAPIDJSON_FORCEINLINE void* Push(SizeType count) {
+            length_ += count;
+            return stack_.template Push<Ch>(count);
+        }
+
+        size_t Length() const { return length_; }
+
+        Ch* Pop() {
+            return stack_.template Pop<Ch>(length_);
+        }
+
+    private:
+        StackStream(const StackStream&);
+        StackStream& operator=(const StackStream&);
+
+        internal::Stack<StackAllocator>& stack_;
+        SizeType length_;
+    };
+
+    // Parse string and generate String event. Different code paths for kParseInsituFlag.
+    template<unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
+        internal::StreamLocalCopy<InputStream> copy(is);
+        InputStream& s(copy.s);
+
+        RAPIDJSON_ASSERT(s.Peek() == '\"');
+        s.Take();  // Skip '\"'
+
+        bool success = false;
+        if (parseFlags & kParseInsituFlag) {
+            typename InputStream::Ch *head = s.PutBegin();
+            ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+            size_t length = s.PutEnd(head) - 1;
+            RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
+            const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
+            success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
+        }
+        else {
+            StackStream<typename TargetEncoding::Ch> stackStream(stack_);
+            ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+            SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
+            const typename TargetEncoding::Ch* const str = stackStream.Pop();
+            success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
+        }
+        if (RAPIDJSON_UNLIKELY(!success))
+            RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
+    }
+
+    // Parse string to an output is
+    // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
+    template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
+    RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
+//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
+#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+        static const char escape[256] = {
+            Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
+            Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
+            0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
+            0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
+        };
+#undef Z16
+//!@endcond
+
+        for (;;) {
+            // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation.
+            if (!(parseFlags & kParseValidateEncodingFlag))
+                ScanCopyUnescapedString(is, os);
+
+            Ch c = is.Peek();
+            if (RAPIDJSON_UNLIKELY(c == '\\')) {    // Escape
+                size_t escapeOffset = is.Tell();    // For invalid escaping, report the initial '\\' as error offset
+                is.Take();
+                Ch e = is.Peek();
+                if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) {
+                    is.Take();
+                    os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)]));
+                }
+                else if (RAPIDJSON_LIKELY(e == 'u')) {    // Unicode
+                    is.Take();
+                    unsigned codepoint = ParseHex4(is, escapeOffset);
+                    RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+                    if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
+                        // Handle UTF-16 surrogate pair
+                        if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
+                            RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
+                        unsigned codepoint2 = ParseHex4(is, escapeOffset);
+                        RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
+                        if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
+                            RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
+                        codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
+                    }
+                    TEncoding::Encode(os, codepoint);
+                }
+                else
+                    RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset);
+            }
+            else if (RAPIDJSON_UNLIKELY(c == '"')) {    // Closing double quote
+                is.Take();
+                os.Put('\0');   // null-terminate the string
+                return;
+            }
+            else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
+                if (c == '\0')
+                    RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
+                else
+                    RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
+            }
+            else {
+                size_t offset = is.Tell();
+                if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ?
+                    !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
+                    !Transcoder<SEncoding, TEncoding>::Transcode(is, os))))
+                    RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset);
+            }
+        }
+    }
+
+    template<typename InputStream, typename OutputStream>
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) {
+            // Do nothing for generic version
+    }
+
+#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
+    // StringStream -> StackStream<char>
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
+        const char* p = is.src_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        while (p != nextAligned)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = p;
+                return;
+            }
+            else
+                os.Put(*p++);
+
+        // The rest of string using SIMD
+        static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
+        static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
+        static const char space[16]  = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
+        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
+        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
+        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+
+        for (;; p += 16) {
+            const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+            const __m128i t1 = _mm_cmpeq_epi8(s, dq);
+            const __m128i t2 = _mm_cmpeq_epi8(s, bs);
+            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+            const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+            unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
+            if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+                SizeType length;
+    #ifdef _MSC_VER         // Find the index of first escaped
+                unsigned long offset;
+                _BitScanForward(&offset, r);
+                length = offset;
+    #else
+                length = static_cast<SizeType>(__builtin_ffs(r) - 1);
+    #endif
+                if (length != 0) {
+                    char* q = reinterpret_cast<char*>(os.Push(length));
+                    for (size_t i = 0; i < length; i++)
+                        q[i] = p[i];
+
+                    p += length;
+                }
+                break;
+            }
+            _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s);
+        }
+
+        is.src_ = p;
+    }
+
+    // InsituStringStream -> InsituStringStream
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
+        RAPIDJSON_ASSERT(&is == &os);
+        (void)os;
+
+        if (is.src_ == is.dst_) {
+            SkipUnescapedString(is);
+            return;
+        }
+
+        char* p = is.src_;
+        char *q = is.dst_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        while (p != nextAligned)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = p;
+                is.dst_ = q;
+                return;
+            }
+            else
+                *q++ = *p++;
+
+        // The rest of string using SIMD
+        static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
+        static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
+        static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
+        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
+        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
+        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+
+        for (;; p += 16, q += 16) {
+            const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+            const __m128i t1 = _mm_cmpeq_epi8(s, dq);
+            const __m128i t2 = _mm_cmpeq_epi8(s, bs);
+            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+            const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+            unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
+            if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+                size_t length;
+#ifdef _MSC_VER         // Find the index of first escaped
+                unsigned long offset;
+                _BitScanForward(&offset, r);
+                length = offset;
+#else
+                length = static_cast<size_t>(__builtin_ffs(r) - 1);
+#endif
+                for (const char* pend = p + length; p != pend; )
+                    *q++ = *p++;
+                break;
+            }
+            _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s);
+        }
+
+        is.src_ = p;
+        is.dst_ = q;
+    }
+
+    // When read/write pointers are the same for insitu stream, just skip unescaped characters
+    static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
+        RAPIDJSON_ASSERT(is.src_ == is.dst_);
+        char* p = is.src_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        for (; p != nextAligned; p++)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = is.dst_ = p;
+                return;
+            }
+
+        // The rest of string using SIMD
+        static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
+        static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
+        static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
+        const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
+        const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
+        const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+
+        for (;; p += 16) {
+            const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+            const __m128i t1 = _mm_cmpeq_epi8(s, dq);
+            const __m128i t2 = _mm_cmpeq_epi8(s, bs);
+            const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+            const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+            unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
+            if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+                size_t length;
+#ifdef _MSC_VER         // Find the index of first escaped
+                unsigned long offset;
+                _BitScanForward(&offset, r);
+                length = offset;
+#else
+                length = static_cast<size_t>(__builtin_ffs(r) - 1);
+#endif
+                p += length;
+                break;
+            }
+        }
+
+        is.src_ = is.dst_ = p;
+    }
+#elif defined(RAPIDJSON_NEON)
+    // StringStream -> StackStream<char>
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) {
+        const char* p = is.src_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        while (p != nextAligned)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = p;
+                return;
+            }
+            else
+                os.Put(*p++);
+
+        // The rest of string using SIMD
+        const uint8x16_t s0 = vmovq_n_u8('"');
+        const uint8x16_t s1 = vmovq_n_u8('\\');
+        const uint8x16_t s2 = vmovq_n_u8('\b');
+        const uint8x16_t s3 = vmovq_n_u8(32);
+
+        for (;; p += 16) {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+            uint8x16_t x = vceqq_u8(s, s0);
+            x = vorrq_u8(x, vceqq_u8(s, s1));
+            x = vorrq_u8(x, vceqq_u8(s, s2));
+            x = vorrq_u8(x, vcltq_u8(s, s3));
+
+            x = vrev64q_u8(x);                     // Rev in 64
+            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+            SizeType length = 0;
+            bool escaped = false;
+            if (low == 0) {
+                if (high != 0) {
+                    unsigned lz = (unsigned)__builtin_clzll(high);;
+                    length = 8 + (lz >> 3);
+                    escaped = true;
+                }
+            } else {
+                unsigned lz = (unsigned)__builtin_clzll(low);;
+                length = lz >> 3;
+                escaped = true;
+            }
+            if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
+                if (length != 0) {
+                    char* q = reinterpret_cast<char*>(os.Push(length));
+                    for (size_t i = 0; i < length; i++)
+                        q[i] = p[i];
+
+                    p += length;
+                }
+                break;
+            }
+            vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s);
+        }
+
+        is.src_ = p;
+    }
+
+    // InsituStringStream -> InsituStringStream
+    static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) {
+        RAPIDJSON_ASSERT(&is == &os);
+        (void)os;
+
+        if (is.src_ == is.dst_) {
+            SkipUnescapedString(is);
+            return;
+        }
+
+        char* p = is.src_;
+        char *q = is.dst_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        while (p != nextAligned)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = p;
+                is.dst_ = q;
+                return;
+            }
+            else
+                *q++ = *p++;
+
+        // The rest of string using SIMD
+        const uint8x16_t s0 = vmovq_n_u8('"');
+        const uint8x16_t s1 = vmovq_n_u8('\\');
+        const uint8x16_t s2 = vmovq_n_u8('\b');
+        const uint8x16_t s3 = vmovq_n_u8(32);
+
+        for (;; p += 16, q += 16) {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
+            uint8x16_t x = vceqq_u8(s, s0);
+            x = vorrq_u8(x, vceqq_u8(s, s1));
+            x = vorrq_u8(x, vceqq_u8(s, s2));
+            x = vorrq_u8(x, vcltq_u8(s, s3));
+
+            x = vrev64q_u8(x);                     // Rev in 64
+            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+            SizeType length = 0;
+            bool escaped = false;
+            if (low == 0) {
+                if (high != 0) {
+                    unsigned lz = (unsigned)__builtin_clzll(high);
+                    length = 8 + (lz >> 3);
+                    escaped = true;
+                }
+            } else {
+                unsigned lz = (unsigned)__builtin_clzll(low);
+                length = lz >> 3;
+                escaped = true;
+            }
+            if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
+                for (const char* pend = p + length; p != pend; ) {
+                    *q++ = *p++;
+                }
+                break;
+            }
+            vst1q_u8(reinterpret_cast<uint8_t *>(q), s);
+        }
+
+        is.src_ = p;
+        is.dst_ = q;
+    }
+
+    // When read/write pointers are the same for insitu stream, just skip unescaped characters
+    static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) {
+        RAPIDJSON_ASSERT(is.src_ == is.dst_);
+        char* p = is.src_;
+
+        // Scan one by one until alignment (unaligned load may cross page boundary and cause crash)
+        const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+        for (; p != nextAligned; p++)
+            if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) {
+                is.src_ = is.dst_ = p;
+                return;
+            }
+
+        // The rest of string using SIMD
+        const uint8x16_t s0 = vmovq_n_u8('"');
+        const uint8x16_t s1 = vmovq_n_u8('\\');
+        const uint8x16_t s2 = vmovq_n_u8('\b');
+        const uint8x16_t s3 = vmovq_n_u8(32);
+
+        for (;; p += 16) {
+            const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p));
+            uint8x16_t x = vceqq_u8(s, s0);
+            x = vorrq_u8(x, vceqq_u8(s, s1));
+            x = vorrq_u8(x, vceqq_u8(s, s2));
+            x = vorrq_u8(x, vcltq_u8(s, s3));
+
+            x = vrev64q_u8(x);                     // Rev in 64
+            uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+            uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+            if (low == 0) {
+                if (high != 0) {
+                    int lz = __builtin_clzll(high);
+                    p += 8 + (lz >> 3);
+                    break;
+                }
+            } else {
+                int lz = __builtin_clzll(low);
+                p += lz >> 3;
+                break;
+            }
+        }
+
+        is.src_ = is.dst_ = p;
+    }
+#endif // RAPIDJSON_NEON
+
+    template<typename InputStream, bool backup, bool pushOnTake>
+    class NumberStream;
+
+    template<typename InputStream>
+    class NumberStream<InputStream, false, false> {
+    public:
+        typedef typename InputStream::Ch Ch;
+
+        NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader;  }
+
+        RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
+        RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
+        RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
+		  RAPIDJSON_FORCEINLINE void Push(char) {}
+
+        size_t Tell() { return is.Tell(); }
+        size_t Length() { return 0; }
+        const char* Pop() { return 0; }
+
+    protected:
+        NumberStream& operator=(const NumberStream&);
+
+        InputStream& is;
+    };
+
+    template<typename InputStream>
+    class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> {
+        typedef NumberStream<InputStream, false, false> Base;
+    public:
+        NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {}
+
+        RAPIDJSON_FORCEINLINE Ch TakePush() {
+            stackStream.Put(static_cast<char>(Base::is.Peek()));
+            return Base::is.Take();
+        }
+
+        RAPIDJSON_FORCEINLINE void Push(char c) {
+            stackStream.Put(c);
+        }
+
+        size_t Length() { return stackStream.Length(); }
+
+        const char* Pop() {
+            stackStream.Put('\0');
+            return stackStream.Pop();
+        }
+
+    private:
+        StackStream<char> stackStream;
+    };
+
+    template<typename InputStream>
+    class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> {
+        typedef NumberStream<InputStream, true, false> Base;
+    public:
+        NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {}
+
+        RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); }
+    };
+
+    template<unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseNumber(InputStream& is, Handler& handler) {
+        internal::StreamLocalCopy<InputStream> copy(is);
+        NumberStream<InputStream,
+            ((parseFlags & kParseNumbersAsStringsFlag) != 0) ?
+                ((parseFlags & kParseInsituFlag) == 0) :
+                ((parseFlags & kParseFullPrecisionFlag) != 0),
+            (parseFlags & kParseNumbersAsStringsFlag) != 0 &&
+                (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s);
+
+        size_t startOffset = s.Tell();
+        double d = 0.0;
+        bool useNanOrInf = false;
+
+        // Parse minus
+        bool minus = Consume(s, '-');
+
+        // Parse int: zero / ( digit1-9 *DIGIT )
+        unsigned i = 0;
+        uint64_t i64 = 0;
+        bool use64bit = false;
+        int significandDigit = 0;
+        if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) {
+            i = 0;
+            s.TakePush();
+        }
+        else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) {
+            i = static_cast<unsigned>(s.TakePush() - '0');
+
+            if (minus)
+                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                    if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648
+                        if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) {
+                            i64 = i;
+                            use64bit = true;
+                            break;
+                        }
+                    }
+                    i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
+                    significandDigit++;
+                }
+            else
+                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                    if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295
+                        if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) {
+                            i64 = i;
+                            use64bit = true;
+                            break;
+                        }
+                    }
+                    i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
+                    significandDigit++;
+                }
+        }
+        // Parse NaN or Infinity here
+        else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) {
+            if (Consume(s, 'N')) {
+                if (Consume(s, 'a') && Consume(s, 'N')) {
+                    d = std::numeric_limits<double>::quiet_NaN();
+                    useNanOrInf = true;
+                }
+            }
+            else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) {
+                if (Consume(s, 'n') && Consume(s, 'f')) {
+                    d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity());
+                    useNanOrInf = true;
+
+                    if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n')
+                                                                && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) {
+                        RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
+                    }
+                }
+            }
+
+            if (RAPIDJSON_UNLIKELY(!useNanOrInf)) {
+                RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
+            }
+        }
+        else
+            RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
+
+        // Parse 64bit int
+        bool useDouble = false;
+        if (use64bit) {
+            if (minus)
+                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                     if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808
+                        if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) {
+                            d = static_cast<double>(i64);
+                            useDouble = true;
+                            break;
+                        }
+                    i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
+                    significandDigit++;
+                }
+            else
+                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                    if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615
+                        if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) {
+                            d = static_cast<double>(i64);
+                            useDouble = true;
+                            break;
+                        }
+                    i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
+                    significandDigit++;
+                }
+        }
+
+        // Force double for big integer
+        if (useDouble) {
+            while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                d = d * 10 + (s.TakePush() - '0');
+            }
+        }
+
+        // Parse frac = decimal-point 1*DIGIT
+        int expFrac = 0;
+        size_t decimalPosition;
+        if (Consume(s, '.')) {
+            decimalPosition = s.Length();
+
+            if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9')))
+                RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
+
+            if (!useDouble) {
+#if RAPIDJSON_64BIT
+                // Use i64 to store significand in 64-bit architecture
+                if (!use64bit)
+                    i64 = i;
+
+                while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                    if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
+                        break;
+                    else {
+                        i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
+                        --expFrac;
+                        if (i64 != 0)
+                            significandDigit++;
+                    }
+                }
+
+                d = static_cast<double>(i64);
+#else
+                // Use double to store significand in 32-bit architecture
+                d = static_cast<double>(use64bit ? i64 : i);
+#endif
+                useDouble = true;
+            }
+
+            while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                if (significandDigit < 17) {
+                    d = d * 10.0 + (s.TakePush() - '0');
+                    --expFrac;
+                    if (RAPIDJSON_LIKELY(d > 0.0))
+                        significandDigit++;
+                }
+                else
+                    s.TakePush();
+            }
+        }
+        else
+            decimalPosition = s.Length(); // decimal position at the end of integer.
+
+        // Parse exp = e [ minus / plus ] 1*DIGIT
+        int exp = 0;
+        if (Consume(s, 'e') || Consume(s, 'E')) {
+            if (!useDouble) {
+                d = static_cast<double>(use64bit ? i64 : i);
+                useDouble = true;
+            }
+
+            bool expMinus = false;
+            if (Consume(s, '+'))
+                ;
+            else if (Consume(s, '-'))
+                expMinus = true;
+
+            if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                exp = static_cast<int>(s.Take() - '0');
+                if (expMinus) {
+                    // (exp + expFrac) must not underflow int => we're detecting when -exp gets
+                    // dangerously close to INT_MIN (a pessimistic next digit 9 would push it into
+                    // underflow territory):
+                    //
+                    //        -(exp * 10 + 9) + expFrac >= INT_MIN
+                    //   <=>  exp <= (expFrac - INT_MIN - 9) / 10
+                    RAPIDJSON_ASSERT(expFrac <= 0);
+                    int maxExp = (expFrac + 2147483639) / 10;
+
+                    while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                        exp = exp * 10 + static_cast<int>(s.Take() - '0');
+                        if (RAPIDJSON_UNLIKELY(exp > maxExp)) {
+                            while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9'))  // Consume the rest of exponent
+                                s.Take();
+                        }
+                    }
+                }
+                else {  // positive exp
+                    int maxExp = 308 - expFrac;
+                    while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) {
+                        exp = exp * 10 + static_cast<int>(s.Take() - '0');
+                        if (RAPIDJSON_UNLIKELY(exp > maxExp))
+                            RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
+                    }
+                }
+            }
+            else
+                RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
+
+            if (expMinus)
+                exp = -exp;
+        }
+
+        // Finish parsing, call event according to the type of number.
+        bool cont = true;
+
+        if (parseFlags & kParseNumbersAsStringsFlag) {
+            if (parseFlags & kParseInsituFlag) {
+                s.Pop();  // Pop stack no matter if it will be used or not.
+                typename InputStream::Ch* head = is.PutBegin();
+                const size_t length = s.Tell() - startOffset;
+                RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
+                // unable to insert the \0 character here, it will erase the comma after this number
+                const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head);
+                cont = handler.RawNumber(str, SizeType(length), false);
+            }
+            else {
+                SizeType numCharsToCopy = static_cast<SizeType>(s.Length());
+                StringStream srcStream(s.Pop());
+                StackStream<typename TargetEncoding::Ch> dstStream(stack_);
+                while (numCharsToCopy--) {
+                    Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream);
+                }
+                dstStream.Put('\0');
+                const typename TargetEncoding::Ch* str = dstStream.Pop();
+                const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1;
+                cont = handler.RawNumber(str, SizeType(length), true);
+            }
+        }
+        else {
+           size_t length = s.Length();
+           const char* decimal = s.Pop();  // Pop stack no matter if it will be used or not.
+
+           if (useDouble) {
+               int p = exp + expFrac;
+               if (parseFlags & kParseFullPrecisionFlag)
+                   d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
+               else
+                   d = internal::StrtodNormalPrecision(d, p);
+
+               // Use > max, instead of == inf, to fix bogus warning -Wfloat-equal
+               if (d > (std::numeric_limits<double>::max)()) {
+                   // Overflow
+                   // TODO: internal::StrtodX should report overflow (or underflow)
+                   RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset);
+               }
+
+               cont = handler.Double(minus ? -d : d);
+           }
+           else if (useNanOrInf) {
+               cont = handler.Double(d);
+           }
+           else {
+               if (use64bit) {
+                   if (minus)
+                       cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
+                   else
+                       cont = handler.Uint64(i64);
+               }
+               else {
+                   if (minus)
+                       cont = handler.Int(static_cast<int32_t>(~i + 1));
+                   else
+                       cont = handler.Uint(i);
+               }
+           }
+        }
+        if (RAPIDJSON_UNLIKELY(!cont))
+            RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset);
+    }
+
+    // Parse any JSON value
+    template<unsigned parseFlags, typename InputStream, typename Handler>
+    void ParseValue(InputStream& is, Handler& handler) {
+        switch (is.Peek()) {
+            case 'n': ParseNull  <parseFlags>(is, handler); break;
+            case 't': ParseTrue  <parseFlags>(is, handler); break;
+            case 'f': ParseFalse <parseFlags>(is, handler); break;
+            case '"': ParseString<parseFlags>(is, handler); break;
+            case '{': ParseObject<parseFlags>(is, handler); break;
+            case '[': ParseArray <parseFlags>(is, handler); break;
+            default :
+                      ParseNumber<parseFlags>(is, handler);
+                      break;
+
+        }
+    }
+
+    // Iterative Parsing
+
+    // States
+    enum IterativeParsingState {
+        IterativeParsingFinishState = 0, // sink states at top
+        IterativeParsingErrorState,      // sink states at top
+        IterativeParsingStartState,
+
+        // Object states
+        IterativeParsingObjectInitialState,
+        IterativeParsingMemberKeyState,
+        IterativeParsingMemberValueState,
+        IterativeParsingObjectFinishState,
+
+        // Array states
+        IterativeParsingArrayInitialState,
+        IterativeParsingElementState,
+        IterativeParsingArrayFinishState,
+
+        // Single value state
+        IterativeParsingValueState,
+
+        // Delimiter states (at bottom)
+        IterativeParsingElementDelimiterState,
+        IterativeParsingMemberDelimiterState,
+        IterativeParsingKeyValueDelimiterState,
+
+        cIterativeParsingStateCount
+    };
+
+    // Tokens
+    enum Token {
+        LeftBracketToken = 0,
+        RightBracketToken,
+
+        LeftCurlyBracketToken,
+        RightCurlyBracketToken,
+
+        CommaToken,
+        ColonToken,
+
+        StringToken,
+        FalseToken,
+        TrueToken,
+        NullToken,
+        NumberToken,
+
+        kTokenCount
+    };
+
+    RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) const {
+
+//!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
+#define N NumberToken
+#define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
+        // Maps from ASCII to Token
+        static const unsigned char tokenMap[256] = {
+            N16, // 00~0F
+            N16, // 10~1F
+            N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
+            N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
+            N16, // 40~4F
+            N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
+            N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
+            N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
+            N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
+        };
+#undef N
+#undef N16
+//!@endcond
+
+        if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256)
+            return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]);
+        else
+            return NumberToken;
+    }
+
+    RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) const {
+        // current state x one lookahead token -> new state
+        static const char G[cIterativeParsingStateCount][kTokenCount] = {
+            // Finish(sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
+            // Error(sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
+            // Start
+            {
+                IterativeParsingArrayInitialState,  // Left bracket
+                IterativeParsingErrorState,         // Right bracket
+                IterativeParsingObjectInitialState, // Left curly bracket
+                IterativeParsingErrorState,         // Right curly bracket
+                IterativeParsingErrorState,         // Comma
+                IterativeParsingErrorState,         // Colon
+                IterativeParsingValueState,         // String
+                IterativeParsingValueState,         // False
+                IterativeParsingValueState,         // True
+                IterativeParsingValueState,         // Null
+                IterativeParsingValueState          // Number
+            },
+            // ObjectInitial
+            {
+                IterativeParsingErrorState,         // Left bracket
+                IterativeParsingErrorState,         // Right bracket
+                IterativeParsingErrorState,         // Left curly bracket
+                IterativeParsingObjectFinishState,  // Right curly bracket
+                IterativeParsingErrorState,         // Comma
+                IterativeParsingErrorState,         // Colon
+                IterativeParsingMemberKeyState,     // String
+                IterativeParsingErrorState,         // False
+                IterativeParsingErrorState,         // True
+                IterativeParsingErrorState,         // Null
+                IterativeParsingErrorState          // Number
+            },
+            // MemberKey
+            {
+                IterativeParsingErrorState,             // Left bracket
+                IterativeParsingErrorState,             // Right bracket
+                IterativeParsingErrorState,             // Left curly bracket
+                IterativeParsingErrorState,             // Right curly bracket
+                IterativeParsingErrorState,             // Comma
+                IterativeParsingKeyValueDelimiterState, // Colon
+                IterativeParsingErrorState,             // String
+                IterativeParsingErrorState,             // False
+                IterativeParsingErrorState,             // True
+                IterativeParsingErrorState,             // Null
+                IterativeParsingErrorState              // Number
+            },
+            // MemberValue
+            {
+                IterativeParsingErrorState,             // Left bracket
+                IterativeParsingErrorState,             // Right bracket
+                IterativeParsingErrorState,             // Left curly bracket
+                IterativeParsingObjectFinishState,      // Right curly bracket
+                IterativeParsingMemberDelimiterState,   // Comma
+                IterativeParsingErrorState,             // Colon
+                IterativeParsingErrorState,             // String
+                IterativeParsingErrorState,             // False
+                IterativeParsingErrorState,             // True
+                IterativeParsingErrorState,             // Null
+                IterativeParsingErrorState              // Number
+            },
+            // ObjectFinish(sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
+            // ArrayInitial
+            {
+                IterativeParsingArrayInitialState,      // Left bracket(push Element state)
+                IterativeParsingArrayFinishState,       // Right bracket
+                IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
+                IterativeParsingErrorState,             // Right curly bracket
+                IterativeParsingErrorState,             // Comma
+                IterativeParsingErrorState,             // Colon
+                IterativeParsingElementState,           // String
+                IterativeParsingElementState,           // False
+                IterativeParsingElementState,           // True
+                IterativeParsingElementState,           // Null
+                IterativeParsingElementState            // Number
+            },
+            // Element
+            {
+                IterativeParsingErrorState,             // Left bracket
+                IterativeParsingArrayFinishState,       // Right bracket
+                IterativeParsingErrorState,             // Left curly bracket
+                IterativeParsingErrorState,             // Right curly bracket
+                IterativeParsingElementDelimiterState,  // Comma
+                IterativeParsingErrorState,             // Colon
+                IterativeParsingErrorState,             // String
+                IterativeParsingErrorState,             // False
+                IterativeParsingErrorState,             // True
+                IterativeParsingErrorState,             // Null
+                IterativeParsingErrorState              // Number
+            },
+            // ArrayFinish(sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
+            // Single Value (sink state)
+            {
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
+                IterativeParsingErrorState
+            },
+            // ElementDelimiter
+            {
+                IterativeParsingArrayInitialState,      // Left bracket(push Element state)
+                IterativeParsingArrayFinishState,       // Right bracket
+                IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
+                IterativeParsingErrorState,             // Right curly bracket
+                IterativeParsingErrorState,             // Comma
+                IterativeParsingErrorState,             // Colon
+                IterativeParsingElementState,           // String
+                IterativeParsingElementState,           // False
+                IterativeParsingElementState,           // True
+                IterativeParsingElementState,           // Null
+                IterativeParsingElementState            // Number
+            },
+            // MemberDelimiter
+            {
+                IterativeParsingErrorState,         // Left bracket
+                IterativeParsingErrorState,         // Right bracket
+                IterativeParsingErrorState,         // Left curly bracket
+                IterativeParsingObjectFinishState,  // Right curly bracket
+                IterativeParsingErrorState,         // Comma
+                IterativeParsingErrorState,         // Colon
+                IterativeParsingMemberKeyState,     // String
+                IterativeParsingErrorState,         // False
+                IterativeParsingErrorState,         // True
+                IterativeParsingErrorState,         // Null
+                IterativeParsingErrorState          // Number
+            },
+            // KeyValueDelimiter
+            {
+                IterativeParsingArrayInitialState,      // Left bracket(push MemberValue state)
+                IterativeParsingErrorState,             // Right bracket
+                IterativeParsingObjectInitialState,     // Left curly bracket(push MemberValue state)
+                IterativeParsingErrorState,             // Right curly bracket
+                IterativeParsingErrorState,             // Comma
+                IterativeParsingErrorState,             // Colon
+                IterativeParsingMemberValueState,       // String
+                IterativeParsingMemberValueState,       // False
+                IterativeParsingMemberValueState,       // True
+                IterativeParsingMemberValueState,       // Null
+                IterativeParsingMemberValueState        // Number
+            },
+        }; // End of G
+
+        return static_cast<IterativeParsingState>(G[state][token]);
+    }
+
+    // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
+    // May return a new state on state pop.
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
+        (void)token;
+
+        switch (dst) {
+        case IterativeParsingErrorState:
+            return dst;
+
+        case IterativeParsingObjectInitialState:
+        case IterativeParsingArrayInitialState:
+        {
+            // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
+            // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
+            IterativeParsingState n = src;
+            if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
+                n = IterativeParsingElementState;
+            else if (src == IterativeParsingKeyValueDelimiterState)
+                n = IterativeParsingMemberValueState;
+            // Push current state.
+            *stack_.template Push<SizeType>(1) = n;
+            // Initialize and push the member/element count.
+            *stack_.template Push<SizeType>(1) = 0;
+            // Call handler
+            bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
+            // On handler short circuits the parsing.
+            if (!hr) {
+                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
+                return IterativeParsingErrorState;
+            }
+            else {
+                is.Take();
+                return dst;
+            }
+        }
+
+        case IterativeParsingMemberKeyState:
+            ParseString<parseFlags>(is, handler, true);
+            if (HasParseError())
+                return IterativeParsingErrorState;
+            else
+                return dst;
+
+        case IterativeParsingKeyValueDelimiterState:
+            RAPIDJSON_ASSERT(token == ColonToken);
+            is.Take();
+            return dst;
+
+        case IterativeParsingMemberValueState:
+            // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
+            ParseValue<parseFlags>(is, handler);
+            if (HasParseError()) {
+                return IterativeParsingErrorState;
+            }
+            return dst;
+
+        case IterativeParsingElementState:
+            // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
+            ParseValue<parseFlags>(is, handler);
+            if (HasParseError()) {
+                return IterativeParsingErrorState;
+            }
+            return dst;
+
+        case IterativeParsingMemberDelimiterState:
+        case IterativeParsingElementDelimiterState:
+            is.Take();
+            // Update member/element count.
+            *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
+            return dst;
+
+        case IterativeParsingObjectFinishState:
+        {
+            // Transit from delimiter is only allowed when trailing commas are enabled
+            if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) {
+                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell());
+                return IterativeParsingErrorState;
+            }
+            // Get member count.
+            SizeType c = *stack_.template Pop<SizeType>(1);
+            // If the object is not empty, count the last member.
+            if (src == IterativeParsingMemberValueState)
+                ++c;
+            // Restore the state.
+            IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
+            // Transit to Finish state if this is the topmost scope.
+            if (n == IterativeParsingStartState)
+                n = IterativeParsingFinishState;
+            // Call handler
+            bool hr = handler.EndObject(c);
+            // On handler short circuits the parsing.
+            if (!hr) {
+                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
+                return IterativeParsingErrorState;
+            }
+            else {
+                is.Take();
+                return n;
+            }
+        }
+
+        case IterativeParsingArrayFinishState:
+        {
+            // Transit from delimiter is only allowed when trailing commas are enabled
+            if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) {
+                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell());
+                return IterativeParsingErrorState;
+            }
+            // Get element count.
+            SizeType c = *stack_.template Pop<SizeType>(1);
+            // If the array is not empty, count the last element.
+            if (src == IterativeParsingElementState)
+                ++c;
+            // Restore the state.
+            IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
+            // Transit to Finish state if this is the topmost scope.
+            if (n == IterativeParsingStartState)
+                n = IterativeParsingFinishState;
+            // Call handler
+            bool hr = handler.EndArray(c);
+            // On handler short circuits the parsing.
+            if (!hr) {
+                RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
+                return IterativeParsingErrorState;
+            }
+            else {
+                is.Take();
+                return n;
+            }
+        }
+
+        default:
+            // This branch is for IterativeParsingValueState actually.
+            // Use `default:` rather than
+            // `case IterativeParsingValueState:` is for code coverage.
+
+            // The IterativeParsingStartState is not enumerated in this switch-case.
+            // It is impossible for that case. And it can be caught by following assertion.
+
+            // The IterativeParsingFinishState is not enumerated in this switch-case either.
+            // It is a "derivative" state which cannot triggered from Predict() directly.
+            // Therefore it cannot happen here. And it can be caught by following assertion.
+            RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
+
+            // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
+            ParseValue<parseFlags>(is, handler);
+            if (HasParseError()) {
+                return IterativeParsingErrorState;
+            }
+            return IterativeParsingFinishState;
+        }
+    }
+
+    template <typename InputStream>
+    void HandleError(IterativeParsingState src, InputStream& is) {
+        if (HasParseError()) {
+            // Error flag has been set.
+            return;
+        }
+
+        switch (src) {
+        case IterativeParsingStartState:            RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
+        case IterativeParsingFinishState:           RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
+        case IterativeParsingObjectInitialState:
+        case IterativeParsingMemberDelimiterState:  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
+        case IterativeParsingMemberKeyState:        RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
+        case IterativeParsingMemberValueState:      RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
+        case IterativeParsingKeyValueDelimiterState:
+        case IterativeParsingArrayInitialState:
+        case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return;
+        default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
+        }
+    }
+
+    RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) const {
+        return s >= IterativeParsingElementDelimiterState;
+    }
+
+    RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) const {
+        return s <= IterativeParsingErrorState;
+    }
+
+    template <unsigned parseFlags, typename InputStream, typename Handler>
+    ParseResult IterativeParse(InputStream& is, Handler& handler) {
+        parseResult_.Clear();
+        ClearStackOnExit scope(*this);
+        IterativeParsingState state = IterativeParsingStartState;
+
+        SkipWhitespaceAndComments<parseFlags>(is);
+        RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
+        while (is.Peek() != '\0') {
+            Token t = Tokenize(is.Peek());
+            IterativeParsingState n = Predict(state, t);
+            IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
+
+            if (d == IterativeParsingErrorState) {
+                HandleError(state, is);
+                break;
+            }
+
+            state = d;
+
+            // Do not further consume streams if a root JSON has been parsed.
+            if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
+                break;
+
+            SkipWhitespaceAndComments<parseFlags>(is);
+            RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
+        }
+
+        // Handle the end of file.
+        if (state != IterativeParsingFinishState)
+            HandleError(state, is);
+
+        return parseResult_;
+    }
+
+    static const size_t kDefaultStackCapacity = 256;    //!< Default stack capacity in bytes for storing a single decoded string.
+    internal::Stack<StackAllocator> stack_;  //!< A stack for storing decoded string temporarily during non-destructive parsing.
+    ParseResult parseResult_;
+    IterativeParsingState state_;
+}; // class GenericReader
+
+//! Reader with UTF8 encoding and default allocator.
+typedef GenericReader<UTF8<>, UTF8<> > Reader;
+
+RAPIDJSON_NAMESPACE_END
+
+#if defined(__clang__) || defined(_MSC_VER)
+RAPIDJSON_DIAG_POP
+#endif
+
+
+#ifdef __GNUC__
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_READER_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/schema.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/schema.h
new file mode 100644
index 000000000..57ec797ab
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/schema.h
@@ -0,0 +1,2496 @@
+// Tencent is pleased to support the open source community by making RapidJSON available->
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip-> All rights reserved->
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License-> You may obtain a copy of the License at
+//
+// http://opensource->org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied-> See the License for the 
+// specific language governing permissions and limitations under the License->
+
+#ifndef RAPIDJSON_SCHEMA_H_
+#define RAPIDJSON_SCHEMA_H_
+
+#include "document.h"
+#include "pointer.h"
+#include "stringbuffer.h"
+#include <cmath> // abs, floor
+
+#if !defined(RAPIDJSON_SCHEMA_USE_INTERNALREGEX)
+#define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 1
+#else
+#define RAPIDJSON_SCHEMA_USE_INTERNALREGEX 0
+#endif
+
+#if !RAPIDJSON_SCHEMA_USE_INTERNALREGEX && defined(RAPIDJSON_SCHEMA_USE_STDREGEX) && (__cplusplus >=201103L || (defined(_MSC_VER) && _MSC_VER >= 1800))
+#define RAPIDJSON_SCHEMA_USE_STDREGEX 1
+#else
+#define RAPIDJSON_SCHEMA_USE_STDREGEX 0
+#endif
+
+#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
+#include "internal/regex.h"
+#elif RAPIDJSON_SCHEMA_USE_STDREGEX
+#include <regex>
+#endif
+
+#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX || RAPIDJSON_SCHEMA_USE_STDREGEX
+#define RAPIDJSON_SCHEMA_HAS_REGEX 1
+#else
+#define RAPIDJSON_SCHEMA_HAS_REGEX 0
+#endif
+
+#ifndef RAPIDJSON_SCHEMA_VERBOSE
+#define RAPIDJSON_SCHEMA_VERBOSE 0
+#endif
+
+#if RAPIDJSON_SCHEMA_VERBOSE
+#include "stringbuffer.h"
+#endif
+
+RAPIDJSON_DIAG_PUSH
+
+#if defined(__GNUC__)
+RAPIDJSON_DIAG_OFF(effc++)
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_OFF(weak-vtables)
+RAPIDJSON_DIAG_OFF(exit-time-destructors)
+RAPIDJSON_DIAG_OFF(c++98-compat-pedantic)
+RAPIDJSON_DIAG_OFF(variadic-macros)
+#elif defined(_MSC_VER)
+RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////
+// Verbose Utilities
+
+#if RAPIDJSON_SCHEMA_VERBOSE
+
+namespace internal {
+
+inline void PrintInvalidKeyword(const char* keyword) {
+    printf("Fail keyword: %s\n", keyword);
+}
+
+inline void PrintInvalidKeyword(const wchar_t* keyword) {
+    wprintf(L"Fail keyword: %ls\n", keyword);
+}
+
+inline void PrintInvalidDocument(const char* document) {
+    printf("Fail document: %s\n\n", document);
+}
+
+inline void PrintInvalidDocument(const wchar_t* document) {
+    wprintf(L"Fail document: %ls\n\n", document);
+}
+
+inline void PrintValidatorPointers(unsigned depth, const char* s, const char* d) {
+    printf("S: %*s%s\nD: %*s%s\n\n", depth * 4, " ", s, depth * 4, " ", d);
+}
+
+inline void PrintValidatorPointers(unsigned depth, const wchar_t* s, const wchar_t* d) {
+    wprintf(L"S: %*ls%ls\nD: %*ls%ls\n\n", depth * 4, L" ", s, depth * 4, L" ", d);
+}
+
+} // namespace internal
+
+#endif // RAPIDJSON_SCHEMA_VERBOSE
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_INVALID_KEYWORD_RETURN
+
+#if RAPIDJSON_SCHEMA_VERBOSE
+#define RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword) internal::PrintInvalidKeyword(keyword)
+#else
+#define RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword)
+#endif
+
+#define RAPIDJSON_INVALID_KEYWORD_RETURN(keyword)\
+RAPIDJSON_MULTILINEMACRO_BEGIN\
+    context.invalidKeyword = keyword.GetString();\
+    RAPIDJSON_INVALID_KEYWORD_VERBOSE(keyword.GetString());\
+    return false;\
+RAPIDJSON_MULTILINEMACRO_END
+
+///////////////////////////////////////////////////////////////////////////////
+// Forward declarations
+
+template <typename ValueType, typename Allocator>
+class GenericSchemaDocument;
+
+namespace internal {
+
+template <typename SchemaDocumentType>
+class Schema;
+
+///////////////////////////////////////////////////////////////////////////////
+// ISchemaValidator
+
+class ISchemaValidator {
+public:
+    virtual ~ISchemaValidator() {}
+    virtual bool IsValid() const = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// ISchemaStateFactory
+
+template <typename SchemaType>
+class ISchemaStateFactory {
+public:
+    virtual ~ISchemaStateFactory() {}
+    virtual ISchemaValidator* CreateSchemaValidator(const SchemaType&) = 0;
+    virtual void DestroySchemaValidator(ISchemaValidator* validator) = 0;
+    virtual void* CreateHasher() = 0;
+    virtual uint64_t GetHashCode(void* hasher) = 0;
+    virtual void DestroryHasher(void* hasher) = 0;
+    virtual void* MallocState(size_t size) = 0;
+    virtual void FreeState(void* p) = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// IValidationErrorHandler
+
+template <typename SchemaType>
+class IValidationErrorHandler {
+public:
+    typedef typename SchemaType::Ch Ch;
+    typedef typename SchemaType::SValue SValue;
+
+    virtual ~IValidationErrorHandler() {}
+
+    virtual void NotMultipleOf(int64_t actual, const SValue& expected) = 0;
+    virtual void NotMultipleOf(uint64_t actual, const SValue& expected) = 0;
+    virtual void NotMultipleOf(double actual, const SValue& expected) = 0;
+    virtual void AboveMaximum(int64_t actual, const SValue& expected, bool exclusive) = 0;
+    virtual void AboveMaximum(uint64_t actual, const SValue& expected, bool exclusive) = 0;
+    virtual void AboveMaximum(double actual, const SValue& expected, bool exclusive) = 0;
+    virtual void BelowMinimum(int64_t actual, const SValue& expected, bool exclusive) = 0;
+    virtual void BelowMinimum(uint64_t actual, const SValue& expected, bool exclusive) = 0;
+    virtual void BelowMinimum(double actual, const SValue& expected, bool exclusive) = 0;
+
+    virtual void TooLong(const Ch* str, SizeType length, SizeType expected) = 0;
+    virtual void TooShort(const Ch* str, SizeType length, SizeType expected) = 0;
+    virtual void DoesNotMatch(const Ch* str, SizeType length) = 0;
+
+    virtual void DisallowedItem(SizeType index) = 0;
+    virtual void TooFewItems(SizeType actualCount, SizeType expectedCount) = 0;
+    virtual void TooManyItems(SizeType actualCount, SizeType expectedCount) = 0;
+    virtual void DuplicateItems(SizeType index1, SizeType index2) = 0;
+
+    virtual void TooManyProperties(SizeType actualCount, SizeType expectedCount) = 0;
+    virtual void TooFewProperties(SizeType actualCount, SizeType expectedCount) = 0;
+    virtual void StartMissingProperties() = 0;
+    virtual void AddMissingProperty(const SValue& name) = 0;
+    virtual bool EndMissingProperties() = 0;
+    virtual void PropertyViolations(ISchemaValidator** subvalidators, SizeType count) = 0;
+    virtual void DisallowedProperty(const Ch* name, SizeType length) = 0;
+
+    virtual void StartDependencyErrors() = 0;
+    virtual void StartMissingDependentProperties() = 0;
+    virtual void AddMissingDependentProperty(const SValue& targetName) = 0;
+    virtual void EndMissingDependentProperties(const SValue& sourceName) = 0;
+    virtual void AddDependencySchemaError(const SValue& souceName, ISchemaValidator* subvalidator) = 0;
+    virtual bool EndDependencyErrors() = 0;
+
+    virtual void DisallowedValue() = 0;
+    virtual void StartDisallowedType() = 0;
+    virtual void AddExpectedType(const typename SchemaType::ValueType& expectedType) = 0;
+    virtual void EndDisallowedType(const typename SchemaType::ValueType& actualType) = 0;
+    virtual void NotAllOf(ISchemaValidator** subvalidators, SizeType count) = 0;
+    virtual void NoneOf(ISchemaValidator** subvalidators, SizeType count) = 0;
+    virtual void NotOneOf(ISchemaValidator** subvalidators, SizeType count) = 0;
+    virtual void Disallowed() = 0;
+};
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Hasher
+
+// For comparison of compound value
+template<typename Encoding, typename Allocator>
+class Hasher {
+public:
+    typedef typename Encoding::Ch Ch;
+
+    Hasher(Allocator* allocator = 0, size_t stackCapacity = kDefaultSize) : stack_(allocator, stackCapacity) {}
+
+    bool Null() { return WriteType(kNullType); }
+    bool Bool(bool b) { return WriteType(b ? kTrueType : kFalseType); }
+    bool Int(int i) { Number n; n.u.i = i; n.d = static_cast<double>(i); return WriteNumber(n); }
+    bool Uint(unsigned u) { Number n; n.u.u = u; n.d = static_cast<double>(u); return WriteNumber(n); }
+    bool Int64(int64_t i) { Number n; n.u.i = i; n.d = static_cast<double>(i); return WriteNumber(n); }
+    bool Uint64(uint64_t u) { Number n; n.u.u = u; n.d = static_cast<double>(u); return WriteNumber(n); }
+    bool Double(double d) { 
+        Number n; 
+        if (d < 0) n.u.i = static_cast<int64_t>(d);
+        else       n.u.u = static_cast<uint64_t>(d); 
+        n.d = d;
+        return WriteNumber(n);
+    }
+
+    bool RawNumber(const Ch* str, SizeType len, bool) {
+        WriteBuffer(kNumberType, str, len * sizeof(Ch));
+        return true;
+    }
+
+    bool String(const Ch* str, SizeType len, bool) {
+        WriteBuffer(kStringType, str, len * sizeof(Ch));
+        return true;
+    }
+
+    bool StartObject() { return true; }
+    bool Key(const Ch* str, SizeType len, bool copy) { return String(str, len, copy); }
+    bool EndObject(SizeType memberCount) { 
+        uint64_t h = Hash(0, kObjectType);
+        uint64_t* kv = stack_.template Pop<uint64_t>(memberCount * 2);
+        for (SizeType i = 0; i < memberCount; i++)
+            h ^= Hash(kv[i * 2], kv[i * 2 + 1]);  // Use xor to achieve member order insensitive
+        *stack_.template Push<uint64_t>() = h;
+        return true;
+    }
+    
+    bool StartArray() { return true; }
+    bool EndArray(SizeType elementCount) { 
+        uint64_t h = Hash(0, kArrayType);
+        uint64_t* e = stack_.template Pop<uint64_t>(elementCount);
+        for (SizeType i = 0; i < elementCount; i++)
+            h = Hash(h, e[i]); // Use hash to achieve element order sensitive
+        *stack_.template Push<uint64_t>() = h;
+        return true;
+    }
+
+    bool IsValid() const { return stack_.GetSize() == sizeof(uint64_t); }
+
+    uint64_t GetHashCode() const {
+        RAPIDJSON_ASSERT(IsValid());
+        return *stack_.template Top<uint64_t>();
+    }
+
+private:
+    static const size_t kDefaultSize = 256;
+    struct Number {
+        union U {
+            uint64_t u;
+            int64_t i;
+        }u;
+        double d;
+    };
+
+    bool WriteType(Type type) { return WriteBuffer(type, 0, 0); }
+    
+    bool WriteNumber(const Number& n) { return WriteBuffer(kNumberType, &n, sizeof(n)); }
+    
+    bool WriteBuffer(Type type, const void* data, size_t len) {
+        // FNV-1a from http://isthe.com/chongo/tech/comp/fnv/
+        uint64_t h = Hash(RAPIDJSON_UINT64_C2(0x84222325, 0xcbf29ce4), type);
+        const unsigned char* d = static_cast<const unsigned char*>(data);
+        for (size_t i = 0; i < len; i++)
+            h = Hash(h, d[i]);
+        *stack_.template Push<uint64_t>() = h;
+        return true;
+    }
+
+    static uint64_t Hash(uint64_t h, uint64_t d) {
+        static const uint64_t kPrime = RAPIDJSON_UINT64_C2(0x00000100, 0x000001b3);
+        h ^= d;
+        h *= kPrime;
+        return h;
+    }
+
+    Stack<Allocator> stack_;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// SchemaValidationContext
+
+template <typename SchemaDocumentType>
+struct SchemaValidationContext {
+    typedef Schema<SchemaDocumentType> SchemaType;
+    typedef ISchemaStateFactory<SchemaType> SchemaValidatorFactoryType;
+    typedef IValidationErrorHandler<SchemaType> ErrorHandlerType;
+    typedef typename SchemaType::ValueType ValueType;
+    typedef typename ValueType::Ch Ch;
+
+    enum PatternValidatorType {
+        kPatternValidatorOnly,
+        kPatternValidatorWithProperty,
+        kPatternValidatorWithAdditionalProperty
+    };
+
+    SchemaValidationContext(SchemaValidatorFactoryType& f, ErrorHandlerType& eh, const SchemaType* s) :
+        factory(f),
+        error_handler(eh),
+        schema(s),
+        valueSchema(),
+        invalidKeyword(),
+        hasher(),
+        arrayElementHashCodes(),
+        validators(),
+        validatorCount(),
+        patternPropertiesValidators(),
+        patternPropertiesValidatorCount(),
+        patternPropertiesSchemas(),
+        patternPropertiesSchemaCount(),
+        valuePatternValidatorType(kPatternValidatorOnly),
+        propertyExist(),
+        inArray(false),
+        valueUniqueness(false),
+        arrayUniqueness(false)
+    {
+    }
+
+    ~SchemaValidationContext() {
+        if (hasher)
+            factory.DestroryHasher(hasher);
+        if (validators) {
+            for (SizeType i = 0; i < validatorCount; i++)
+                factory.DestroySchemaValidator(validators[i]);
+            factory.FreeState(validators);
+        }
+        if (patternPropertiesValidators) {
+            for (SizeType i = 0; i < patternPropertiesValidatorCount; i++)
+                factory.DestroySchemaValidator(patternPropertiesValidators[i]);
+            factory.FreeState(patternPropertiesValidators);
+        }
+        if (patternPropertiesSchemas)
+            factory.FreeState(patternPropertiesSchemas);
+        if (propertyExist)
+            factory.FreeState(propertyExist);
+    }
+
+    SchemaValidatorFactoryType& factory;
+    ErrorHandlerType& error_handler;
+    const SchemaType* schema;
+    const SchemaType* valueSchema;
+    const Ch* invalidKeyword;
+    void* hasher; // Only validator access
+    void* arrayElementHashCodes; // Only validator access this
+    ISchemaValidator** validators;
+    SizeType validatorCount;
+    ISchemaValidator** patternPropertiesValidators;
+    SizeType patternPropertiesValidatorCount;
+    const SchemaType** patternPropertiesSchemas;
+    SizeType patternPropertiesSchemaCount;
+    PatternValidatorType valuePatternValidatorType;
+    PatternValidatorType objectPatternValidatorType;
+    SizeType arrayElementIndex;
+    bool* propertyExist;
+    bool inArray;
+    bool valueUniqueness;
+    bool arrayUniqueness;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Schema
+
+template <typename SchemaDocumentType>
+class Schema {
+public:
+    typedef typename SchemaDocumentType::ValueType ValueType;
+    typedef typename SchemaDocumentType::AllocatorType AllocatorType;
+    typedef typename SchemaDocumentType::PointerType PointerType;
+    typedef typename ValueType::EncodingType EncodingType;
+    typedef typename EncodingType::Ch Ch;
+    typedef SchemaValidationContext<SchemaDocumentType> Context;
+    typedef Schema<SchemaDocumentType> SchemaType;
+    typedef GenericValue<EncodingType, AllocatorType> SValue;
+    typedef IValidationErrorHandler<Schema> ErrorHandler;
+    friend class GenericSchemaDocument<ValueType, AllocatorType>;
+
+    Schema(SchemaDocumentType* schemaDocument, const PointerType& p, const ValueType& value, const ValueType& document, AllocatorType* allocator) :
+        allocator_(allocator),
+        uri_(schemaDocument->GetURI(), *allocator),
+        pointer_(p),
+        typeless_(schemaDocument->GetTypeless()),
+        enum_(),
+        enumCount_(),
+        not_(),
+        type_((1 << kTotalSchemaType) - 1), // typeless
+        validatorCount_(),
+        notValidatorIndex_(),
+        properties_(),
+        additionalPropertiesSchema_(),
+        patternProperties_(),
+        patternPropertyCount_(),
+        propertyCount_(),
+        minProperties_(),
+        maxProperties_(SizeType(~0)),
+        additionalProperties_(true),
+        hasDependencies_(),
+        hasRequired_(),
+        hasSchemaDependencies_(),
+        additionalItemsSchema_(),
+        itemsList_(),
+        itemsTuple_(),
+        itemsTupleCount_(),
+        minItems_(),
+        maxItems_(SizeType(~0)),
+        additionalItems_(true),
+        uniqueItems_(false),
+        pattern_(),
+        minLength_(0),
+        maxLength_(~SizeType(0)),
+        exclusiveMinimum_(false),
+        exclusiveMaximum_(false),
+        defaultValueLength_(0)
+    {
+        typedef typename SchemaDocumentType::ValueType ValueType;
+        typedef typename ValueType::ConstValueIterator ConstValueIterator;
+        typedef typename ValueType::ConstMemberIterator ConstMemberIterator;
+
+        if (!value.IsObject())
+            return;
+
+        if (const ValueType* v = GetMember(value, GetTypeString())) {
+            type_ = 0;
+            if (v->IsString())
+                AddType(*v);
+            else if (v->IsArray())
+                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr)
+                    AddType(*itr);
+        }
+
+        if (const ValueType* v = GetMember(value, GetEnumString()))
+            if (v->IsArray() && v->Size() > 0) {
+                enum_ = static_cast<uint64_t*>(allocator_->Malloc(sizeof(uint64_t) * v->Size()));
+                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr) {
+                    typedef Hasher<EncodingType, MemoryPoolAllocator<> > EnumHasherType;
+                    char buffer[256u + 24];
+                    MemoryPoolAllocator<> hasherAllocator(buffer, sizeof(buffer));
+                    EnumHasherType h(&hasherAllocator, 256);
+                    itr->Accept(h);
+                    enum_[enumCount_++] = h.GetHashCode();
+                }
+            }
+
+        if (schemaDocument) {
+            AssignIfExist(allOf_, *schemaDocument, p, value, GetAllOfString(), document);
+            AssignIfExist(anyOf_, *schemaDocument, p, value, GetAnyOfString(), document);
+            AssignIfExist(oneOf_, *schemaDocument, p, value, GetOneOfString(), document);
+        }
+
+        if (const ValueType* v = GetMember(value, GetNotString())) {
+            schemaDocument->CreateSchema(&not_, p.Append(GetNotString(), allocator_), *v, document);
+            notValidatorIndex_ = validatorCount_;
+            validatorCount_++;
+        }
+
+        // Object
+
+        const ValueType* properties = GetMember(value, GetPropertiesString());
+        const ValueType* required = GetMember(value, GetRequiredString());
+        const ValueType* dependencies = GetMember(value, GetDependenciesString());
+        {
+            // Gather properties from properties/required/dependencies
+            SValue allProperties(kArrayType);
+
+            if (properties && properties->IsObject())
+                for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr)
+                    AddUniqueElement(allProperties, itr->name);
+            
+            if (required && required->IsArray())
+                for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr)
+                    if (itr->IsString())
+                        AddUniqueElement(allProperties, *itr);
+
+            if (dependencies && dependencies->IsObject())
+                for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) {
+                    AddUniqueElement(allProperties, itr->name);
+                    if (itr->value.IsArray())
+                        for (ConstValueIterator i = itr->value.Begin(); i != itr->value.End(); ++i)
+                            if (i->IsString())
+                                AddUniqueElement(allProperties, *i);
+                }
+
+            if (allProperties.Size() > 0) {
+                propertyCount_ = allProperties.Size();
+                properties_ = static_cast<Property*>(allocator_->Malloc(sizeof(Property) * propertyCount_));
+                for (SizeType i = 0; i < propertyCount_; i++) {
+                    new (&properties_[i]) Property();
+                    properties_[i].name = allProperties[i];
+                    properties_[i].schema = typeless_;
+                }
+            }
+        }
+
+        if (properties && properties->IsObject()) {
+            PointerType q = p.Append(GetPropertiesString(), allocator_);
+            for (ConstMemberIterator itr = properties->MemberBegin(); itr != properties->MemberEnd(); ++itr) {
+                SizeType index;
+                if (FindPropertyIndex(itr->name, &index))
+                    schemaDocument->CreateSchema(&properties_[index].schema, q.Append(itr->name, allocator_), itr->value, document);
+            }
+        }
+
+        if (const ValueType* v = GetMember(value, GetPatternPropertiesString())) {
+            PointerType q = p.Append(GetPatternPropertiesString(), allocator_);
+            patternProperties_ = static_cast<PatternProperty*>(allocator_->Malloc(sizeof(PatternProperty) * v->MemberCount()));
+            patternPropertyCount_ = 0;
+
+            for (ConstMemberIterator itr = v->MemberBegin(); itr != v->MemberEnd(); ++itr) {
+                new (&patternProperties_[patternPropertyCount_]) PatternProperty();
+                patternProperties_[patternPropertyCount_].pattern = CreatePattern(itr->name);
+                schemaDocument->CreateSchema(&patternProperties_[patternPropertyCount_].schema, q.Append(itr->name, allocator_), itr->value, document);
+                patternPropertyCount_++;
+            }
+        }
+
+        if (required && required->IsArray())
+            for (ConstValueIterator itr = required->Begin(); itr != required->End(); ++itr)
+                if (itr->IsString()) {
+                    SizeType index;
+                    if (FindPropertyIndex(*itr, &index)) {
+                        properties_[index].required = true;
+                        hasRequired_ = true;
+                    }
+                }
+
+        if (dependencies && dependencies->IsObject()) {
+            PointerType q = p.Append(GetDependenciesString(), allocator_);
+            hasDependencies_ = true;
+            for (ConstMemberIterator itr = dependencies->MemberBegin(); itr != dependencies->MemberEnd(); ++itr) {
+                SizeType sourceIndex;
+                if (FindPropertyIndex(itr->name, &sourceIndex)) {
+                    if (itr->value.IsArray()) {
+                        properties_[sourceIndex].dependencies = static_cast<bool*>(allocator_->Malloc(sizeof(bool) * propertyCount_));
+                        std::memset(properties_[sourceIndex].dependencies, 0, sizeof(bool)* propertyCount_);
+                        for (ConstValueIterator targetItr = itr->value.Begin(); targetItr != itr->value.End(); ++targetItr) {
+                            SizeType targetIndex;
+                            if (FindPropertyIndex(*targetItr, &targetIndex))
+                                properties_[sourceIndex].dependencies[targetIndex] = true;
+                        }
+                    }
+                    else if (itr->value.IsObject()) {
+                        hasSchemaDependencies_ = true;
+                        schemaDocument->CreateSchema(&properties_[sourceIndex].dependenciesSchema, q.Append(itr->name, allocator_), itr->value, document);
+                        properties_[sourceIndex].dependenciesValidatorIndex = validatorCount_;
+                        validatorCount_++;
+                    }
+                }
+            }
+        }
+
+        if (const ValueType* v = GetMember(value, GetAdditionalPropertiesString())) {
+            if (v->IsBool())
+                additionalProperties_ = v->GetBool();
+            else if (v->IsObject())
+                schemaDocument->CreateSchema(&additionalPropertiesSchema_, p.Append(GetAdditionalPropertiesString(), allocator_), *v, document);
+        }
+
+        AssignIfExist(minProperties_, value, GetMinPropertiesString());
+        AssignIfExist(maxProperties_, value, GetMaxPropertiesString());
+
+        // Array
+        if (const ValueType* v = GetMember(value, GetItemsString())) {
+            PointerType q = p.Append(GetItemsString(), allocator_);
+            if (v->IsObject()) // List validation
+                schemaDocument->CreateSchema(&itemsList_, q, *v, document);
+            else if (v->IsArray()) { // Tuple validation
+                itemsTuple_ = static_cast<const Schema**>(allocator_->Malloc(sizeof(const Schema*) * v->Size()));
+                SizeType index = 0;
+                for (ConstValueIterator itr = v->Begin(); itr != v->End(); ++itr, index++)
+                    schemaDocument->CreateSchema(&itemsTuple_[itemsTupleCount_++], q.Append(index, allocator_), *itr, document);
+            }
+        }
+
+        AssignIfExist(minItems_, value, GetMinItemsString());
+        AssignIfExist(maxItems_, value, GetMaxItemsString());
+
+        if (const ValueType* v = GetMember(value, GetAdditionalItemsString())) {
+            if (v->IsBool())
+                additionalItems_ = v->GetBool();
+            else if (v->IsObject())
+                schemaDocument->CreateSchema(&additionalItemsSchema_, p.Append(GetAdditionalItemsString(), allocator_), *v, document);
+        }
+
+        AssignIfExist(uniqueItems_, value, GetUniqueItemsString());
+
+        // String
+        AssignIfExist(minLength_, value, GetMinLengthString());
+        AssignIfExist(maxLength_, value, GetMaxLengthString());
+
+        if (const ValueType* v = GetMember(value, GetPatternString()))
+            pattern_ = CreatePattern(*v);
+
+        // Number
+        if (const ValueType* v = GetMember(value, GetMinimumString()))
+            if (v->IsNumber())
+                minimum_.CopyFrom(*v, *allocator_);
+
+        if (const ValueType* v = GetMember(value, GetMaximumString()))
+            if (v->IsNumber())
+                maximum_.CopyFrom(*v, *allocator_);
+
+        AssignIfExist(exclusiveMinimum_, value, GetExclusiveMinimumString());
+        AssignIfExist(exclusiveMaximum_, value, GetExclusiveMaximumString());
+
+        if (const ValueType* v = GetMember(value, GetMultipleOfString()))
+            if (v->IsNumber() && v->GetDouble() > 0.0)
+                multipleOf_.CopyFrom(*v, *allocator_);
+
+        // Default
+        if (const ValueType* v = GetMember(value, GetDefaultValueString()))
+            if (v->IsString())
+                defaultValueLength_ = v->GetStringLength();
+
+    }
+
+    ~Schema() {
+        AllocatorType::Free(enum_);
+        if (properties_) {
+            for (SizeType i = 0; i < propertyCount_; i++)
+                properties_[i].~Property();
+            AllocatorType::Free(properties_);
+        }
+        if (patternProperties_) {
+            for (SizeType i = 0; i < patternPropertyCount_; i++)
+                patternProperties_[i].~PatternProperty();
+            AllocatorType::Free(patternProperties_);
+        }
+        AllocatorType::Free(itemsTuple_);
+#if RAPIDJSON_SCHEMA_HAS_REGEX
+        if (pattern_) {
+            pattern_->~RegexType();
+            AllocatorType::Free(pattern_);
+        }
+#endif
+    }
+
+    const SValue& GetURI() const {
+        return uri_;
+    }
+
+    const PointerType& GetPointer() const {
+        return pointer_;
+    }
+
+    bool BeginValue(Context& context) const {
+        if (context.inArray) {
+            if (uniqueItems_)
+                context.valueUniqueness = true;
+
+            if (itemsList_)
+                context.valueSchema = itemsList_;
+            else if (itemsTuple_) {
+                if (context.arrayElementIndex < itemsTupleCount_)
+                    context.valueSchema = itemsTuple_[context.arrayElementIndex];
+                else if (additionalItemsSchema_)
+                    context.valueSchema = additionalItemsSchema_;
+                else if (additionalItems_)
+                    context.valueSchema = typeless_;
+                else {
+                    context.error_handler.DisallowedItem(context.arrayElementIndex);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetItemsString());
+                }
+            }
+            else
+                context.valueSchema = typeless_;
+
+            context.arrayElementIndex++;
+        }
+        return true;
+    }
+
+    RAPIDJSON_FORCEINLINE bool EndValue(Context& context) const {
+        if (context.patternPropertiesValidatorCount > 0) {
+            bool otherValid = false;
+            SizeType count = context.patternPropertiesValidatorCount;
+            if (context.objectPatternValidatorType != Context::kPatternValidatorOnly)
+                otherValid = context.patternPropertiesValidators[--count]->IsValid();
+
+            bool patternValid = true;
+            for (SizeType i = 0; i < count; i++)
+                if (!context.patternPropertiesValidators[i]->IsValid()) {
+                    patternValid = false;
+                    break;
+                }
+
+            if (context.objectPatternValidatorType == Context::kPatternValidatorOnly) {
+                if (!patternValid) {
+                    context.error_handler.PropertyViolations(context.patternPropertiesValidators, count);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString());
+                }
+            }
+            else if (context.objectPatternValidatorType == Context::kPatternValidatorWithProperty) {
+                if (!patternValid || !otherValid) {
+                    context.error_handler.PropertyViolations(context.patternPropertiesValidators, count + 1);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString());
+                }
+            }
+            else if (!patternValid && !otherValid) { // kPatternValidatorWithAdditionalProperty)
+                context.error_handler.PropertyViolations(context.patternPropertiesValidators, count + 1);
+                RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternPropertiesString());
+            }
+        }
+
+        if (enum_) {
+            const uint64_t h = context.factory.GetHashCode(context.hasher);
+            for (SizeType i = 0; i < enumCount_; i++)
+                if (enum_[i] == h)
+                    goto foundEnum;
+            context.error_handler.DisallowedValue();
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetEnumString());
+            foundEnum:;
+        }
+
+        if (allOf_.schemas)
+            for (SizeType i = allOf_.begin; i < allOf_.begin + allOf_.count; i++)
+                if (!context.validators[i]->IsValid()) {
+                    context.error_handler.NotAllOf(&context.validators[allOf_.begin], allOf_.count);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetAllOfString());
+                }
+        
+        if (anyOf_.schemas) {
+            for (SizeType i = anyOf_.begin; i < anyOf_.begin + anyOf_.count; i++)
+                if (context.validators[i]->IsValid())
+                    goto foundAny;
+            context.error_handler.NoneOf(&context.validators[anyOf_.begin], anyOf_.count);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetAnyOfString());
+            foundAny:;
+        }
+
+        if (oneOf_.schemas) {
+            bool oneValid = false;
+            for (SizeType i = oneOf_.begin; i < oneOf_.begin + oneOf_.count; i++)
+                if (context.validators[i]->IsValid()) {
+                    if (oneValid) {
+                        context.error_handler.NotOneOf(&context.validators[oneOf_.begin], oneOf_.count);
+                        RAPIDJSON_INVALID_KEYWORD_RETURN(GetOneOfString());
+                    } else
+                        oneValid = true;
+                }
+            if (!oneValid) {
+                context.error_handler.NotOneOf(&context.validators[oneOf_.begin], oneOf_.count);
+                RAPIDJSON_INVALID_KEYWORD_RETURN(GetOneOfString());
+            }
+        }
+
+        if (not_ && context.validators[notValidatorIndex_]->IsValid()) {
+            context.error_handler.Disallowed();
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetNotString());
+        }
+
+        return true;
+    }
+
+    bool Null(Context& context) const {
+        if (!(type_ & (1 << kNullSchemaType))) {
+            DisallowedType(context, GetNullString());
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+        }
+        return CreateParallelValidator(context);
+    }
+    
+    bool Bool(Context& context, bool) const {
+        if (!(type_ & (1 << kBooleanSchemaType))) {
+            DisallowedType(context, GetBooleanString());
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+        }
+        return CreateParallelValidator(context);
+    }
+
+    bool Int(Context& context, int i) const {
+        if (!CheckInt(context, i))
+            return false;
+        return CreateParallelValidator(context);
+    }
+
+    bool Uint(Context& context, unsigned u) const {
+        if (!CheckUint(context, u))
+            return false;
+        return CreateParallelValidator(context);
+    }
+
+    bool Int64(Context& context, int64_t i) const {
+        if (!CheckInt(context, i))
+            return false;
+        return CreateParallelValidator(context);
+    }
+
+    bool Uint64(Context& context, uint64_t u) const {
+        if (!CheckUint(context, u))
+            return false;
+        return CreateParallelValidator(context);
+    }
+
+    bool Double(Context& context, double d) const {
+        if (!(type_ & (1 << kNumberSchemaType))) {
+            DisallowedType(context, GetNumberString());
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+        }
+
+        if (!minimum_.IsNull() && !CheckDoubleMinimum(context, d))
+            return false;
+
+        if (!maximum_.IsNull() && !CheckDoubleMaximum(context, d))
+            return false;
+        
+        if (!multipleOf_.IsNull() && !CheckDoubleMultipleOf(context, d))
+            return false;
+        
+        return CreateParallelValidator(context);
+    }
+    
+    bool String(Context& context, const Ch* str, SizeType length, bool) const {
+        if (!(type_ & (1 << kStringSchemaType))) {
+            DisallowedType(context, GetStringString());
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+        }
+
+        if (minLength_ != 0 || maxLength_ != SizeType(~0)) {
+            SizeType count;
+            if (internal::CountStringCodePoint<EncodingType>(str, length, &count)) {
+                if (count < minLength_) {
+                    context.error_handler.TooShort(str, length, minLength_);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinLengthString());
+                }
+                if (count > maxLength_) {
+                    context.error_handler.TooLong(str, length, maxLength_);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxLengthString());
+                }
+            }
+        }
+
+        if (pattern_ && !IsPatternMatch(pattern_, str, length)) {
+            context.error_handler.DoesNotMatch(str, length);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetPatternString());
+        }
+
+        return CreateParallelValidator(context);
+    }
+
+    bool StartObject(Context& context) const {
+        if (!(type_ & (1 << kObjectSchemaType))) {
+            DisallowedType(context, GetObjectString());
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+        }
+
+        if (hasDependencies_ || hasRequired_) {
+            context.propertyExist = static_cast<bool*>(context.factory.MallocState(sizeof(bool) * propertyCount_));
+            std::memset(context.propertyExist, 0, sizeof(bool) * propertyCount_);
+        }
+
+        if (patternProperties_) { // pre-allocate schema array
+            SizeType count = patternPropertyCount_ + 1; // extra for valuePatternValidatorType
+            context.patternPropertiesSchemas = static_cast<const SchemaType**>(context.factory.MallocState(sizeof(const SchemaType*) * count));
+            context.patternPropertiesSchemaCount = 0;
+            std::memset(context.patternPropertiesSchemas, 0, sizeof(SchemaType*) * count);
+        }
+
+        return CreateParallelValidator(context);
+    }
+    
+    bool Key(Context& context, const Ch* str, SizeType len, bool) const {
+        if (patternProperties_) {
+            context.patternPropertiesSchemaCount = 0;
+            for (SizeType i = 0; i < patternPropertyCount_; i++)
+                if (patternProperties_[i].pattern && IsPatternMatch(patternProperties_[i].pattern, str, len)) {
+                    context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = patternProperties_[i].schema;
+                    context.valueSchema = typeless_;
+                }
+        }
+
+        SizeType index;
+        if (FindPropertyIndex(ValueType(str, len).Move(), &index)) {
+            if (context.patternPropertiesSchemaCount > 0) {
+                context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = properties_[index].schema;
+                context.valueSchema = typeless_;
+                context.valuePatternValidatorType = Context::kPatternValidatorWithProperty;
+            }
+            else
+                context.valueSchema = properties_[index].schema;
+
+            if (context.propertyExist)
+                context.propertyExist[index] = true;
+
+            return true;
+        }
+
+        if (additionalPropertiesSchema_) {
+            if (additionalPropertiesSchema_ && context.patternPropertiesSchemaCount > 0) {
+                context.patternPropertiesSchemas[context.patternPropertiesSchemaCount++] = additionalPropertiesSchema_;
+                context.valueSchema = typeless_;
+                context.valuePatternValidatorType = Context::kPatternValidatorWithAdditionalProperty;
+            }
+            else
+                context.valueSchema = additionalPropertiesSchema_;
+            return true;
+        }
+        else if (additionalProperties_) {
+            context.valueSchema = typeless_;
+            return true;
+        }
+
+        if (context.patternPropertiesSchemaCount == 0) { // patternProperties are not additional properties
+            context.error_handler.DisallowedProperty(str, len);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetAdditionalPropertiesString());
+        }
+
+        return true;
+    }
+
+    bool EndObject(Context& context, SizeType memberCount) const {
+        if (hasRequired_) {
+            context.error_handler.StartMissingProperties();
+            for (SizeType index = 0; index < propertyCount_; index++)
+                if (properties_[index].required && !context.propertyExist[index])
+                    if (properties_[index].schema->defaultValueLength_ == 0 )
+                        context.error_handler.AddMissingProperty(properties_[index].name);
+            if (context.error_handler.EndMissingProperties())
+                RAPIDJSON_INVALID_KEYWORD_RETURN(GetRequiredString());
+        }
+
+        if (memberCount < minProperties_) {
+            context.error_handler.TooFewProperties(memberCount, minProperties_);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinPropertiesString());
+        }
+
+        if (memberCount > maxProperties_) {
+            context.error_handler.TooManyProperties(memberCount, maxProperties_);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxPropertiesString());
+        }
+
+        if (hasDependencies_) {
+            context.error_handler.StartDependencyErrors();
+            for (SizeType sourceIndex = 0; sourceIndex < propertyCount_; sourceIndex++) {
+                const Property& source = properties_[sourceIndex];
+                if (context.propertyExist[sourceIndex]) {
+                    if (source.dependencies) {
+                        context.error_handler.StartMissingDependentProperties();
+                        for (SizeType targetIndex = 0; targetIndex < propertyCount_; targetIndex++)
+                            if (source.dependencies[targetIndex] && !context.propertyExist[targetIndex])
+                                context.error_handler.AddMissingDependentProperty(properties_[targetIndex].name);
+                        context.error_handler.EndMissingDependentProperties(source.name);
+                    }
+                    else if (source.dependenciesSchema) {
+                        ISchemaValidator* dependenciesValidator = context.validators[source.dependenciesValidatorIndex];
+                        if (!dependenciesValidator->IsValid())
+                            context.error_handler.AddDependencySchemaError(source.name, dependenciesValidator);
+                    }
+                }
+            }
+            if (context.error_handler.EndDependencyErrors())
+                RAPIDJSON_INVALID_KEYWORD_RETURN(GetDependenciesString());
+        }
+
+        return true;
+    }
+
+    bool StartArray(Context& context) const {
+        if (!(type_ & (1 << kArraySchemaType))) {
+            DisallowedType(context, GetArrayString());
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+        }
+
+        context.arrayElementIndex = 0;
+        context.inArray = true;
+
+        return CreateParallelValidator(context);
+    }
+
+    bool EndArray(Context& context, SizeType elementCount) const {
+        context.inArray = false;
+        
+        if (elementCount < minItems_) {
+            context.error_handler.TooFewItems(elementCount, minItems_);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinItemsString());
+        }
+        
+        if (elementCount > maxItems_) {
+            context.error_handler.TooManyItems(elementCount, maxItems_);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaxItemsString());
+        }
+
+        return true;
+    }
+
+    // Generate functions for string literal according to Ch
+#define RAPIDJSON_STRING_(name, ...) \
+    static const ValueType& Get##name##String() {\
+        static const Ch s[] = { __VA_ARGS__, '\0' };\
+        static const ValueType v(s, static_cast<SizeType>(sizeof(s) / sizeof(Ch) - 1));\
+        return v;\
+    }
+
+    RAPIDJSON_STRING_(Null, 'n', 'u', 'l', 'l')
+    RAPIDJSON_STRING_(Boolean, 'b', 'o', 'o', 'l', 'e', 'a', 'n')
+    RAPIDJSON_STRING_(Object, 'o', 'b', 'j', 'e', 'c', 't')
+    RAPIDJSON_STRING_(Array, 'a', 'r', 'r', 'a', 'y')
+    RAPIDJSON_STRING_(String, 's', 't', 'r', 'i', 'n', 'g')
+    RAPIDJSON_STRING_(Number, 'n', 'u', 'm', 'b', 'e', 'r')
+    RAPIDJSON_STRING_(Integer, 'i', 'n', 't', 'e', 'g', 'e', 'r')
+    RAPIDJSON_STRING_(Type, 't', 'y', 'p', 'e')
+    RAPIDJSON_STRING_(Enum, 'e', 'n', 'u', 'm')
+    RAPIDJSON_STRING_(AllOf, 'a', 'l', 'l', 'O', 'f')
+    RAPIDJSON_STRING_(AnyOf, 'a', 'n', 'y', 'O', 'f')
+    RAPIDJSON_STRING_(OneOf, 'o', 'n', 'e', 'O', 'f')
+    RAPIDJSON_STRING_(Not, 'n', 'o', 't')
+    RAPIDJSON_STRING_(Properties, 'p', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(Required, 'r', 'e', 'q', 'u', 'i', 'r', 'e', 'd')
+    RAPIDJSON_STRING_(Dependencies, 'd', 'e', 'p', 'e', 'n', 'd', 'e', 'n', 'c', 'i', 'e', 's')
+    RAPIDJSON_STRING_(PatternProperties, 'p', 'a', 't', 't', 'e', 'r', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(AdditionalProperties, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(MinProperties, 'm', 'i', 'n', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(MaxProperties, 'm', 'a', 'x', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'i', 'e', 's')
+    RAPIDJSON_STRING_(Items, 'i', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(MinItems, 'm', 'i', 'n', 'I', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(MaxItems, 'm', 'a', 'x', 'I', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(AdditionalItems, 'a', 'd', 'd', 'i', 't', 'i', 'o', 'n', 'a', 'l', 'I', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(UniqueItems, 'u', 'n', 'i', 'q', 'u', 'e', 'I', 't', 'e', 'm', 's')
+    RAPIDJSON_STRING_(MinLength, 'm', 'i', 'n', 'L', 'e', 'n', 'g', 't', 'h')
+    RAPIDJSON_STRING_(MaxLength, 'm', 'a', 'x', 'L', 'e', 'n', 'g', 't', 'h')
+    RAPIDJSON_STRING_(Pattern, 'p', 'a', 't', 't', 'e', 'r', 'n')
+    RAPIDJSON_STRING_(Minimum, 'm', 'i', 'n', 'i', 'm', 'u', 'm')
+    RAPIDJSON_STRING_(Maximum, 'm', 'a', 'x', 'i', 'm', 'u', 'm')
+    RAPIDJSON_STRING_(ExclusiveMinimum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'i', 'n', 'i', 'm', 'u', 'm')
+    RAPIDJSON_STRING_(ExclusiveMaximum, 'e', 'x', 'c', 'l', 'u', 's', 'i', 'v', 'e', 'M', 'a', 'x', 'i', 'm', 'u', 'm')
+    RAPIDJSON_STRING_(MultipleOf, 'm', 'u', 'l', 't', 'i', 'p', 'l', 'e', 'O', 'f')
+    RAPIDJSON_STRING_(DefaultValue, 'd', 'e', 'f', 'a', 'u', 'l', 't')
+
+#undef RAPIDJSON_STRING_
+
+private:
+    enum SchemaValueType {
+        kNullSchemaType,
+        kBooleanSchemaType,
+        kObjectSchemaType,
+        kArraySchemaType,
+        kStringSchemaType,
+        kNumberSchemaType,
+        kIntegerSchemaType,
+        kTotalSchemaType
+    };
+
+#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
+        typedef internal::GenericRegex<EncodingType, AllocatorType> RegexType;
+#elif RAPIDJSON_SCHEMA_USE_STDREGEX
+        typedef std::basic_regex<Ch> RegexType;
+#else
+        typedef char RegexType;
+#endif
+
+    struct SchemaArray {
+        SchemaArray() : schemas(), count() {}
+        ~SchemaArray() { AllocatorType::Free(schemas); }
+        const SchemaType** schemas;
+        SizeType begin; // begin index of context.validators
+        SizeType count;
+    };
+
+    template <typename V1, typename V2>
+    void AddUniqueElement(V1& a, const V2& v) {
+        for (typename V1::ConstValueIterator itr = a.Begin(); itr != a.End(); ++itr)
+            if (*itr == v)
+                return;
+        V1 c(v, *allocator_);
+        a.PushBack(c, *allocator_);
+    }
+
+    static const ValueType* GetMember(const ValueType& value, const ValueType& name) {
+        typename ValueType::ConstMemberIterator itr = value.FindMember(name);
+        return itr != value.MemberEnd() ? &(itr->value) : 0;
+    }
+
+    static void AssignIfExist(bool& out, const ValueType& value, const ValueType& name) {
+        if (const ValueType* v = GetMember(value, name))
+            if (v->IsBool())
+                out = v->GetBool();
+    }
+
+    static void AssignIfExist(SizeType& out, const ValueType& value, const ValueType& name) {
+        if (const ValueType* v = GetMember(value, name))
+            if (v->IsUint64() && v->GetUint64() <= SizeType(~0))
+                out = static_cast<SizeType>(v->GetUint64());
+    }
+
+    void AssignIfExist(SchemaArray& out, SchemaDocumentType& schemaDocument, const PointerType& p, const ValueType& value, const ValueType& name, const ValueType& document) {
+        if (const ValueType* v = GetMember(value, name)) {
+            if (v->IsArray() && v->Size() > 0) {
+                PointerType q = p.Append(name, allocator_);
+                out.count = v->Size();
+                out.schemas = static_cast<const Schema**>(allocator_->Malloc(out.count * sizeof(const Schema*)));
+                memset(out.schemas, 0, sizeof(Schema*)* out.count);
+                for (SizeType i = 0; i < out.count; i++)
+                    schemaDocument.CreateSchema(&out.schemas[i], q.Append(i, allocator_), (*v)[i], document);
+                out.begin = validatorCount_;
+                validatorCount_ += out.count;
+            }
+        }
+    }
+
+#if RAPIDJSON_SCHEMA_USE_INTERNALREGEX
+    template <typename ValueType>
+    RegexType* CreatePattern(const ValueType& value) {
+        if (value.IsString()) {
+            RegexType* r = new (allocator_->Malloc(sizeof(RegexType))) RegexType(value.GetString(), allocator_);
+            if (!r->IsValid()) {
+                r->~RegexType();
+                AllocatorType::Free(r);
+                r = 0;
+            }
+            return r;
+        }
+        return 0;
+    }
+
+    static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType) {
+        GenericRegexSearch<RegexType> rs(*pattern);
+        return rs.Search(str);
+    }
+#elif RAPIDJSON_SCHEMA_USE_STDREGEX
+    template <typename ValueType>
+    RegexType* CreatePattern(const ValueType& value) {
+        if (value.IsString())
+            RegexType *r = static_cast<RegexType*>(allocator_->Malloc(sizeof(RegexType)));
+            try {
+                return new (r) RegexType(value.GetString(), std::size_t(value.GetStringLength()), std::regex_constants::ECMAScript);
+            }
+            catch (const std::regex_error&) {
+                AllocatorType::Free(r);
+            }
+        return 0;
+    }
+
+    static bool IsPatternMatch(const RegexType* pattern, const Ch *str, SizeType length) {
+        std::match_results<const Ch*> r;
+        return std::regex_search(str, str + length, r, *pattern);
+    }
+#else
+    template <typename ValueType>
+    RegexType* CreatePattern(const ValueType&) { return 0; }
+
+    static bool IsPatternMatch(const RegexType*, const Ch *, SizeType) { return true; }
+#endif // RAPIDJSON_SCHEMA_USE_STDREGEX
+
+    void AddType(const ValueType& type) {
+        if      (type == GetNullString()   ) type_ |= 1 << kNullSchemaType;
+        else if (type == GetBooleanString()) type_ |= 1 << kBooleanSchemaType;
+        else if (type == GetObjectString() ) type_ |= 1 << kObjectSchemaType;
+        else if (type == GetArrayString()  ) type_ |= 1 << kArraySchemaType;
+        else if (type == GetStringString() ) type_ |= 1 << kStringSchemaType;
+        else if (type == GetIntegerString()) type_ |= 1 << kIntegerSchemaType;
+        else if (type == GetNumberString() ) type_ |= (1 << kNumberSchemaType) | (1 << kIntegerSchemaType);
+    }
+
+    bool CreateParallelValidator(Context& context) const {
+        if (enum_ || context.arrayUniqueness)
+            context.hasher = context.factory.CreateHasher();
+
+        if (validatorCount_) {
+            RAPIDJSON_ASSERT(context.validators == 0);
+            context.validators = static_cast<ISchemaValidator**>(context.factory.MallocState(sizeof(ISchemaValidator*) * validatorCount_));
+            context.validatorCount = validatorCount_;
+
+            if (allOf_.schemas)
+                CreateSchemaValidators(context, allOf_);
+
+            if (anyOf_.schemas)
+                CreateSchemaValidators(context, anyOf_);
+            
+            if (oneOf_.schemas)
+                CreateSchemaValidators(context, oneOf_);
+            
+            if (not_)
+                context.validators[notValidatorIndex_] = context.factory.CreateSchemaValidator(*not_);
+            
+            if (hasSchemaDependencies_) {
+                for (SizeType i = 0; i < propertyCount_; i++)
+                    if (properties_[i].dependenciesSchema)
+                        context.validators[properties_[i].dependenciesValidatorIndex] = context.factory.CreateSchemaValidator(*properties_[i].dependenciesSchema);
+            }
+        }
+
+        return true;
+    }
+
+    void CreateSchemaValidators(Context& context, const SchemaArray& schemas) const {
+        for (SizeType i = 0; i < schemas.count; i++)
+            context.validators[schemas.begin + i] = context.factory.CreateSchemaValidator(*schemas.schemas[i]);
+    }
+
+    // O(n)
+    bool FindPropertyIndex(const ValueType& name, SizeType* outIndex) const {
+        SizeType len = name.GetStringLength();
+        const Ch* str = name.GetString();
+        for (SizeType index = 0; index < propertyCount_; index++)
+            if (properties_[index].name.GetStringLength() == len && 
+                (std::memcmp(properties_[index].name.GetString(), str, sizeof(Ch) * len) == 0))
+            {
+                *outIndex = index;
+                return true;
+            }
+        return false;
+    }
+
+    bool CheckInt(Context& context, int64_t i) const {
+        if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType)))) {
+            DisallowedType(context, GetIntegerString());
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+        }
+
+        if (!minimum_.IsNull()) {
+            if (minimum_.IsInt64()) {
+                if (exclusiveMinimum_ ? i <= minimum_.GetInt64() : i < minimum_.GetInt64()) {
+                    context.error_handler.BelowMinimum(i, minimum_, exclusiveMinimum_);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString());
+                }
+            }
+            else if (minimum_.IsUint64()) {
+                context.error_handler.BelowMinimum(i, minimum_, exclusiveMinimum_);
+                RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString()); // i <= max(int64_t) < minimum.GetUint64()
+            }
+            else if (!CheckDoubleMinimum(context, static_cast<double>(i)))
+                return false;
+        }
+
+        if (!maximum_.IsNull()) {
+            if (maximum_.IsInt64()) {
+                if (exclusiveMaximum_ ? i >= maximum_.GetInt64() : i > maximum_.GetInt64()) {
+                    context.error_handler.AboveMaximum(i, maximum_, exclusiveMaximum_);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString());
+                }
+            }
+            else if (maximum_.IsUint64()) { }
+                /* do nothing */ // i <= max(int64_t) < maximum_.GetUint64()
+            else if (!CheckDoubleMaximum(context, static_cast<double>(i)))
+                return false;
+        }
+
+        if (!multipleOf_.IsNull()) {
+            if (multipleOf_.IsUint64()) {
+                if (static_cast<uint64_t>(i >= 0 ? i : -i) % multipleOf_.GetUint64() != 0) {
+                    context.error_handler.NotMultipleOf(i, multipleOf_);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString());
+                }
+            }
+            else if (!CheckDoubleMultipleOf(context, static_cast<double>(i)))
+                return false;
+        }
+
+        return true;
+    }
+
+    bool CheckUint(Context& context, uint64_t i) const {
+        if (!(type_ & ((1 << kIntegerSchemaType) | (1 << kNumberSchemaType)))) {
+            DisallowedType(context, GetIntegerString());
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetTypeString());
+        }
+
+        if (!minimum_.IsNull()) {
+            if (minimum_.IsUint64()) {
+                if (exclusiveMinimum_ ? i <= minimum_.GetUint64() : i < minimum_.GetUint64()) {
+                    context.error_handler.BelowMinimum(i, minimum_, exclusiveMinimum_);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString());
+                }
+            }
+            else if (minimum_.IsInt64())
+                /* do nothing */; // i >= 0 > minimum.Getint64()
+            else if (!CheckDoubleMinimum(context, static_cast<double>(i)))
+                return false;
+        }
+
+        if (!maximum_.IsNull()) {
+            if (maximum_.IsUint64()) {
+                if (exclusiveMaximum_ ? i >= maximum_.GetUint64() : i > maximum_.GetUint64()) {
+                    context.error_handler.AboveMaximum(i, maximum_, exclusiveMaximum_);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString());
+                }
+            }
+            else if (maximum_.IsInt64()) {
+                context.error_handler.AboveMaximum(i, maximum_, exclusiveMaximum_);
+                RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString()); // i >= 0 > maximum_
+            }
+            else if (!CheckDoubleMaximum(context, static_cast<double>(i)))
+                return false;
+        }
+
+        if (!multipleOf_.IsNull()) {
+            if (multipleOf_.IsUint64()) {
+                if (i % multipleOf_.GetUint64() != 0) {
+                    context.error_handler.NotMultipleOf(i, multipleOf_);
+                    RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString());
+                }
+            }
+            else if (!CheckDoubleMultipleOf(context, static_cast<double>(i)))
+                return false;
+        }
+
+        return true;
+    }
+
+    bool CheckDoubleMinimum(Context& context, double d) const {
+        if (exclusiveMinimum_ ? d <= minimum_.GetDouble() : d < minimum_.GetDouble()) {
+            context.error_handler.BelowMinimum(d, minimum_, exclusiveMinimum_);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMinimumString());
+        }
+        return true;
+    }
+
+    bool CheckDoubleMaximum(Context& context, double d) const {
+        if (exclusiveMaximum_ ? d >= maximum_.GetDouble() : d > maximum_.GetDouble()) {
+            context.error_handler.AboveMaximum(d, maximum_, exclusiveMaximum_);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMaximumString());
+        }
+        return true;
+    }
+
+    bool CheckDoubleMultipleOf(Context& context, double d) const {
+        double a = std::abs(d), b = std::abs(multipleOf_.GetDouble());
+        double q = std::floor(a / b);
+        double r = a - q * b;
+        if (r > 0.0) {
+            context.error_handler.NotMultipleOf(d, multipleOf_);
+            RAPIDJSON_INVALID_KEYWORD_RETURN(GetMultipleOfString());
+        }
+        return true;
+    }
+
+    void DisallowedType(Context& context, const ValueType& actualType) const {
+        ErrorHandler& eh = context.error_handler;
+        eh.StartDisallowedType();
+
+        if (type_ & (1 << kNullSchemaType)) eh.AddExpectedType(GetNullString());
+        if (type_ & (1 << kBooleanSchemaType)) eh.AddExpectedType(GetBooleanString());
+        if (type_ & (1 << kObjectSchemaType)) eh.AddExpectedType(GetObjectString());
+        if (type_ & (1 << kArraySchemaType)) eh.AddExpectedType(GetArrayString());
+        if (type_ & (1 << kStringSchemaType)) eh.AddExpectedType(GetStringString());
+
+        if (type_ & (1 << kNumberSchemaType)) eh.AddExpectedType(GetNumberString());
+        else if (type_ & (1 << kIntegerSchemaType)) eh.AddExpectedType(GetIntegerString());
+
+        eh.EndDisallowedType(actualType);
+    }
+
+    struct Property {
+        Property() : schema(), dependenciesSchema(), dependenciesValidatorIndex(), dependencies(), required(false) {}
+        ~Property() { AllocatorType::Free(dependencies); }
+        SValue name;
+        const SchemaType* schema;
+        const SchemaType* dependenciesSchema;
+        SizeType dependenciesValidatorIndex;
+        bool* dependencies;
+        bool required;
+    };
+
+    struct PatternProperty {
+        PatternProperty() : schema(), pattern() {}
+        ~PatternProperty() { 
+            if (pattern) {
+                pattern->~RegexType();
+                AllocatorType::Free(pattern);
+            }
+        }
+        const SchemaType* schema;
+        RegexType* pattern;
+    };
+
+    AllocatorType* allocator_;
+    SValue uri_;
+    PointerType pointer_;
+    const SchemaType* typeless_;
+    uint64_t* enum_;
+    SizeType enumCount_;
+    SchemaArray allOf_;
+    SchemaArray anyOf_;
+    SchemaArray oneOf_;
+    const SchemaType* not_;
+    unsigned type_; // bitmask of kSchemaType
+    SizeType validatorCount_;
+    SizeType notValidatorIndex_;
+
+    Property* properties_;
+    const SchemaType* additionalPropertiesSchema_;
+    PatternProperty* patternProperties_;
+    SizeType patternPropertyCount_;
+    SizeType propertyCount_;
+    SizeType minProperties_;
+    SizeType maxProperties_;
+    bool additionalProperties_;
+    bool hasDependencies_;
+    bool hasRequired_;
+    bool hasSchemaDependencies_;
+
+    const SchemaType* additionalItemsSchema_;
+    const SchemaType* itemsList_;
+    const SchemaType** itemsTuple_;
+    SizeType itemsTupleCount_;
+    SizeType minItems_;
+    SizeType maxItems_;
+    bool additionalItems_;
+    bool uniqueItems_;
+
+    RegexType* pattern_;
+    SizeType minLength_;
+    SizeType maxLength_;
+
+    SValue minimum_;
+    SValue maximum_;
+    SValue multipleOf_;
+    bool exclusiveMinimum_;
+    bool exclusiveMaximum_;
+    
+    SizeType defaultValueLength_;
+};
+
+template<typename Stack, typename Ch>
+struct TokenHelper {
+    RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) {
+        *documentStack.template Push<Ch>() = '/';
+        char buffer[21];
+        size_t length = static_cast<size_t>((sizeof(SizeType) == 4 ? u32toa(index, buffer) : u64toa(index, buffer)) - buffer);
+        for (size_t i = 0; i < length; i++)
+            *documentStack.template Push<Ch>() = static_cast<Ch>(buffer[i]);
+    }
+};
+
+// Partial specialized version for char to prevent buffer copying.
+template <typename Stack>
+struct TokenHelper<Stack, char> {
+    RAPIDJSON_FORCEINLINE static void AppendIndexToken(Stack& documentStack, SizeType index) {
+        if (sizeof(SizeType) == 4) {
+            char *buffer = documentStack.template Push<char>(1 + 10); // '/' + uint
+            *buffer++ = '/';
+            const char* end = internal::u32toa(index, buffer);
+             documentStack.template Pop<char>(static_cast<size_t>(10 - (end - buffer)));
+        }
+        else {
+            char *buffer = documentStack.template Push<char>(1 + 20); // '/' + uint64
+            *buffer++ = '/';
+            const char* end = internal::u64toa(index, buffer);
+            documentStack.template Pop<char>(static_cast<size_t>(20 - (end - buffer)));
+        }
+    }
+};
+
+} // namespace internal
+
+///////////////////////////////////////////////////////////////////////////////
+// IGenericRemoteSchemaDocumentProvider
+
+template <typename SchemaDocumentType>
+class IGenericRemoteSchemaDocumentProvider {
+public:
+    typedef typename SchemaDocumentType::Ch Ch;
+
+    virtual ~IGenericRemoteSchemaDocumentProvider() {}
+    virtual const SchemaDocumentType* GetRemoteDocument(const Ch* uri, SizeType length) = 0;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericSchemaDocument
+
+//! JSON schema document.
+/*!
+    A JSON schema document is a compiled version of a JSON schema.
+    It is basically a tree of internal::Schema.
+
+    \note This is an immutable class (i.e. its instance cannot be modified after construction).
+    \tparam ValueT Type of JSON value (e.g. \c Value ), which also determine the encoding.
+    \tparam Allocator Allocator type for allocating memory of this document.
+*/
+template <typename ValueT, typename Allocator = CrtAllocator>
+class GenericSchemaDocument {
+public:
+    typedef ValueT ValueType;
+    typedef IGenericRemoteSchemaDocumentProvider<GenericSchemaDocument> IRemoteSchemaDocumentProviderType;
+    typedef Allocator AllocatorType;
+    typedef typename ValueType::EncodingType EncodingType;
+    typedef typename EncodingType::Ch Ch;
+    typedef internal::Schema<GenericSchemaDocument> SchemaType;
+    typedef GenericPointer<ValueType, Allocator> PointerType;
+    typedef GenericValue<EncodingType, Allocator> URIType;
+    friend class internal::Schema<GenericSchemaDocument>;
+    template <typename, typename, typename>
+    friend class GenericSchemaValidator;
+
+    //! Constructor.
+    /*!
+        Compile a JSON document into schema document.
+
+        \param document A JSON document as source.
+        \param uri The base URI of this schema document for purposes of violation reporting.
+        \param uriLength Length of \c name, in code points.
+        \param remoteProvider An optional remote schema document provider for resolving remote reference. Can be null.
+        \param allocator An optional allocator instance for allocating memory. Can be null.
+    */
+    explicit GenericSchemaDocument(const ValueType& document, const Ch* uri = 0, SizeType uriLength = 0,
+        IRemoteSchemaDocumentProviderType* remoteProvider = 0, Allocator* allocator = 0) :
+        remoteProvider_(remoteProvider),
+        allocator_(allocator),
+        ownAllocator_(),
+        root_(),
+        typeless_(),
+        schemaMap_(allocator, kInitialSchemaMapSize),
+        schemaRef_(allocator, kInitialSchemaRefSize)
+    {
+        if (!allocator_)
+            ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)();
+
+        Ch noUri[1] = {0};
+        uri_.SetString(uri ? uri : noUri, uriLength, *allocator_);
+
+        typeless_ = static_cast<SchemaType*>(allocator_->Malloc(sizeof(SchemaType)));
+        new (typeless_) SchemaType(this, PointerType(), ValueType(kObjectType).Move(), ValueType(kObjectType).Move(), allocator_);
+
+        // Generate root schema, it will call CreateSchema() to create sub-schemas,
+        // And call AddRefSchema() if there are $ref.
+        CreateSchemaRecursive(&root_, PointerType(), document, document);
+
+        // Resolve $ref
+        while (!schemaRef_.Empty()) {
+            SchemaRefEntry* refEntry = schemaRef_.template Pop<SchemaRefEntry>(1);
+            if (const SchemaType* s = GetSchema(refEntry->target)) {
+                if (refEntry->schema)
+                    *refEntry->schema = s;
+
+                // Create entry in map if not exist
+                if (!GetSchema(refEntry->source)) {
+                    new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(refEntry->source, const_cast<SchemaType*>(s), false, allocator_);
+                }
+            }
+            else if (refEntry->schema)
+                *refEntry->schema = typeless_;
+
+            refEntry->~SchemaRefEntry();
+        }
+
+        RAPIDJSON_ASSERT(root_ != 0);
+
+        schemaRef_.ShrinkToFit(); // Deallocate all memory for ref
+    }
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    //! Move constructor in C++11
+    GenericSchemaDocument(GenericSchemaDocument&& rhs) RAPIDJSON_NOEXCEPT :
+        remoteProvider_(rhs.remoteProvider_),
+        allocator_(rhs.allocator_),
+        ownAllocator_(rhs.ownAllocator_),
+        root_(rhs.root_),
+        typeless_(rhs.typeless_),
+        schemaMap_(std::move(rhs.schemaMap_)),
+        schemaRef_(std::move(rhs.schemaRef_)),
+        uri_(std::move(rhs.uri_))
+    {
+        rhs.remoteProvider_ = 0;
+        rhs.allocator_ = 0;
+        rhs.ownAllocator_ = 0;
+        rhs.typeless_ = 0;
+    }
+#endif
+
+    //! Destructor
+    ~GenericSchemaDocument() {
+        while (!schemaMap_.Empty())
+            schemaMap_.template Pop<SchemaEntry>(1)->~SchemaEntry();
+
+        if (typeless_) {
+            typeless_->~SchemaType();
+            Allocator::Free(typeless_);
+        }
+
+        RAPIDJSON_DELETE(ownAllocator_);
+    }
+
+    const URIType& GetURI() const { return uri_; }
+
+    //! Get the root schema.
+    const SchemaType& GetRoot() const { return *root_; }
+
+private:
+    //! Prohibit copying
+    GenericSchemaDocument(const GenericSchemaDocument&);
+    //! Prohibit assignment
+    GenericSchemaDocument& operator=(const GenericSchemaDocument&);
+
+    struct SchemaRefEntry {
+        SchemaRefEntry(const PointerType& s, const PointerType& t, const SchemaType** outSchema, Allocator *allocator) : source(s, allocator), target(t, allocator), schema(outSchema) {}
+        PointerType source;
+        PointerType target;
+        const SchemaType** schema;
+    };
+
+    struct SchemaEntry {
+        SchemaEntry(const PointerType& p, SchemaType* s, bool o, Allocator* allocator) : pointer(p, allocator), schema(s), owned(o) {}
+        ~SchemaEntry() {
+            if (owned) {
+                schema->~SchemaType();
+                Allocator::Free(schema);
+            }
+        }
+        PointerType pointer;
+        SchemaType* schema;
+        bool owned;
+    };
+
+    void CreateSchemaRecursive(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) {
+        if (schema)
+            *schema = typeless_;
+
+        if (v.GetType() == kObjectType) {
+            const SchemaType* s = GetSchema(pointer);
+            if (!s)
+                CreateSchema(schema, pointer, v, document);
+
+            for (typename ValueType::ConstMemberIterator itr = v.MemberBegin(); itr != v.MemberEnd(); ++itr)
+                CreateSchemaRecursive(0, pointer.Append(itr->name, allocator_), itr->value, document);
+        }
+        else if (v.GetType() == kArrayType)
+            for (SizeType i = 0; i < v.Size(); i++)
+                CreateSchemaRecursive(0, pointer.Append(i, allocator_), v[i], document);
+    }
+
+    void CreateSchema(const SchemaType** schema, const PointerType& pointer, const ValueType& v, const ValueType& document) {
+        RAPIDJSON_ASSERT(pointer.IsValid());
+        if (v.IsObject()) {
+            if (!HandleRefSchema(pointer, schema, v, document)) {
+                SchemaType* s = new (allocator_->Malloc(sizeof(SchemaType))) SchemaType(this, pointer, v, document, allocator_);
+                new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(pointer, s, true, allocator_);
+                if (schema)
+                    *schema = s;
+            }
+        }
+    }
+
+    bool HandleRefSchema(const PointerType& source, const SchemaType** schema, const ValueType& v, const ValueType& document) {
+        static const Ch kRefString[] = { '$', 'r', 'e', 'f', '\0' };
+        static const ValueType kRefValue(kRefString, 4);
+
+        typename ValueType::ConstMemberIterator itr = v.FindMember(kRefValue);
+        if (itr == v.MemberEnd())
+            return false;
+
+        if (itr->value.IsString()) {
+            SizeType len = itr->value.GetStringLength();
+            if (len > 0) {
+                const Ch* s = itr->value.GetString();
+                SizeType i = 0;
+                while (i < len && s[i] != '#') // Find the first #
+                    i++;
+
+                if (i > 0) { // Remote reference, resolve immediately
+                    if (remoteProvider_) {
+                        if (const GenericSchemaDocument* remoteDocument = remoteProvider_->GetRemoteDocument(s, i)) {
+                            PointerType pointer(&s[i], len - i, allocator_);
+                            if (pointer.IsValid()) {
+                                if (const SchemaType* sc = remoteDocument->GetSchema(pointer)) {
+                                    if (schema)
+                                        *schema = sc;
+                                    new (schemaMap_.template Push<SchemaEntry>()) SchemaEntry(source, const_cast<SchemaType*>(sc), false, allocator_);
+                                    return true;
+                                }
+                            }
+                        }
+                    }
+                }
+                else if (s[i] == '#') { // Local reference, defer resolution
+                    PointerType pointer(&s[i], len - i, allocator_);
+                    if (pointer.IsValid()) {
+                        if (const ValueType* nv = pointer.Get(document))
+                            if (HandleRefSchema(source, schema, *nv, document))
+                                return true;
+
+                        new (schemaRef_.template Push<SchemaRefEntry>()) SchemaRefEntry(source, pointer, schema, allocator_);
+                        return true;
+                    }
+                }
+            }
+        }
+        return false;
+    }
+
+    const SchemaType* GetSchema(const PointerType& pointer) const {
+        for (const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>(); target != schemaMap_.template End<SchemaEntry>(); ++target)
+            if (pointer == target->pointer)
+                return target->schema;
+        return 0;
+    }
+
+    PointerType GetPointer(const SchemaType* schema) const {
+        for (const SchemaEntry* target = schemaMap_.template Bottom<SchemaEntry>(); target != schemaMap_.template End<SchemaEntry>(); ++target)
+            if (schema == target->schema)
+                return target->pointer;
+        return PointerType();
+    }
+
+    const SchemaType* GetTypeless() const { return typeless_; }
+
+    static const size_t kInitialSchemaMapSize = 64;
+    static const size_t kInitialSchemaRefSize = 64;
+
+    IRemoteSchemaDocumentProviderType* remoteProvider_;
+    Allocator *allocator_;
+    Allocator *ownAllocator_;
+    const SchemaType* root_;                //!< Root schema.
+    SchemaType* typeless_;
+    internal::Stack<Allocator> schemaMap_;  // Stores created Pointer -> Schemas
+    internal::Stack<Allocator> schemaRef_;  // Stores Pointer from $ref and schema which holds the $ref
+    URIType uri_;
+};
+
+//! GenericSchemaDocument using Value type.
+typedef GenericSchemaDocument<Value> SchemaDocument;
+//! IGenericRemoteSchemaDocumentProvider using SchemaDocument.
+typedef IGenericRemoteSchemaDocumentProvider<SchemaDocument> IRemoteSchemaDocumentProvider;
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericSchemaValidator
+
+//! JSON Schema Validator.
+/*!
+    A SAX style JSON schema validator.
+    It uses a \c GenericSchemaDocument to validate SAX events.
+    It delegates the incoming SAX events to an output handler.
+    The default output handler does nothing.
+    It can be reused multiple times by calling \c Reset().
+
+    \tparam SchemaDocumentType Type of schema document.
+    \tparam OutputHandler Type of output handler. Default handler does nothing.
+    \tparam StateAllocator Allocator for storing the internal validation states.
+*/
+template <
+    typename SchemaDocumentType,
+    typename OutputHandler = BaseReaderHandler<typename SchemaDocumentType::SchemaType::EncodingType>,
+    typename StateAllocator = CrtAllocator>
+class GenericSchemaValidator :
+    public internal::ISchemaStateFactory<typename SchemaDocumentType::SchemaType>, 
+    public internal::ISchemaValidator,
+    public internal::IValidationErrorHandler<typename SchemaDocumentType::SchemaType>
+{
+public:
+    typedef typename SchemaDocumentType::SchemaType SchemaType;
+    typedef typename SchemaDocumentType::PointerType PointerType;
+    typedef typename SchemaType::EncodingType EncodingType;
+    typedef typename SchemaType::SValue SValue;
+    typedef typename EncodingType::Ch Ch;
+    typedef GenericStringRef<Ch> StringRefType;
+    typedef GenericValue<EncodingType, StateAllocator> ValueType;
+
+    //! Constructor without output handler.
+    /*!
+        \param schemaDocument The schema document to conform to.
+        \param allocator Optional allocator for storing internal validation states.
+        \param schemaStackCapacity Optional initial capacity of schema path stack.
+        \param documentStackCapacity Optional initial capacity of document path stack.
+    */
+    GenericSchemaValidator(
+        const SchemaDocumentType& schemaDocument,
+        StateAllocator* allocator = 0, 
+        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
+        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
+        :
+        schemaDocument_(&schemaDocument),
+        root_(schemaDocument.GetRoot()),
+        stateAllocator_(allocator),
+        ownStateAllocator_(0),
+        schemaStack_(allocator, schemaStackCapacity),
+        documentStack_(allocator, documentStackCapacity),
+        outputHandler_(0),
+        error_(kObjectType),
+        currentError_(),
+        missingDependents_(),
+        valid_(true)
+#if RAPIDJSON_SCHEMA_VERBOSE
+        , depth_(0)
+#endif
+    {
+    }
+
+    //! Constructor with output handler.
+    /*!
+        \param schemaDocument The schema document to conform to.
+        \param allocator Optional allocator for storing internal validation states.
+        \param schemaStackCapacity Optional initial capacity of schema path stack.
+        \param documentStackCapacity Optional initial capacity of document path stack.
+    */
+    GenericSchemaValidator(
+        const SchemaDocumentType& schemaDocument,
+        OutputHandler& outputHandler,
+        StateAllocator* allocator = 0, 
+        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
+        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
+        :
+        schemaDocument_(&schemaDocument),
+        root_(schemaDocument.GetRoot()),
+        stateAllocator_(allocator),
+        ownStateAllocator_(0),
+        schemaStack_(allocator, schemaStackCapacity),
+        documentStack_(allocator, documentStackCapacity),
+        outputHandler_(&outputHandler),
+        error_(kObjectType),
+        currentError_(),
+        missingDependents_(),
+        valid_(true)
+#if RAPIDJSON_SCHEMA_VERBOSE
+        , depth_(0)
+#endif
+    {
+    }
+
+    //! Destructor.
+    ~GenericSchemaValidator() {
+        Reset();
+        RAPIDJSON_DELETE(ownStateAllocator_);
+    }
+
+    //! Reset the internal states.
+    void Reset() {
+        while (!schemaStack_.Empty())
+            PopSchema();
+        documentStack_.Clear();
+        error_.SetObject();
+        currentError_.SetNull();
+        missingDependents_.SetNull();
+        valid_ = true;
+    }
+
+    //! Checks whether the current state is valid.
+    // Implementation of ISchemaValidator
+    virtual bool IsValid() const { return valid_; }
+
+    //! Gets the error object.
+    ValueType& GetError() { return error_; }
+    const ValueType& GetError() const { return error_; }
+
+    //! Gets the JSON pointer pointed to the invalid schema.
+    PointerType GetInvalidSchemaPointer() const {
+        return schemaStack_.Empty() ? PointerType() : CurrentSchema().GetPointer();
+    }
+
+    //! Gets the keyword of invalid schema.
+    const Ch* GetInvalidSchemaKeyword() const {
+        return schemaStack_.Empty() ? 0 : CurrentContext().invalidKeyword;
+    }
+
+    //! Gets the JSON pointer pointed to the invalid value.
+    PointerType GetInvalidDocumentPointer() const {
+        if (documentStack_.Empty()) {
+            return PointerType();
+        }
+        else {
+            return PointerType(documentStack_.template Bottom<Ch>(), documentStack_.GetSize() / sizeof(Ch));
+        }
+    }
+
+    void NotMultipleOf(int64_t actual, const SValue& expected) {
+        AddNumberError(SchemaType::GetMultipleOfString(), ValueType(actual).Move(), expected);
+    }
+    void NotMultipleOf(uint64_t actual, const SValue& expected) {
+        AddNumberError(SchemaType::GetMultipleOfString(), ValueType(actual).Move(), expected);
+    }
+    void NotMultipleOf(double actual, const SValue& expected) {
+        AddNumberError(SchemaType::GetMultipleOfString(), ValueType(actual).Move(), expected);
+    }
+    void AboveMaximum(int64_t actual, const SValue& expected, bool exclusive) {
+        AddNumberError(SchemaType::GetMaximumString(), ValueType(actual).Move(), expected,
+            exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
+    }
+    void AboveMaximum(uint64_t actual, const SValue& expected, bool exclusive) {
+        AddNumberError(SchemaType::GetMaximumString(), ValueType(actual).Move(), expected,
+            exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
+    }
+    void AboveMaximum(double actual, const SValue& expected, bool exclusive) {
+        AddNumberError(SchemaType::GetMaximumString(), ValueType(actual).Move(), expected,
+            exclusive ? &SchemaType::GetExclusiveMaximumString : 0);
+    }
+    void BelowMinimum(int64_t actual, const SValue& expected, bool exclusive) {
+        AddNumberError(SchemaType::GetMinimumString(), ValueType(actual).Move(), expected,
+            exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
+    }
+    void BelowMinimum(uint64_t actual, const SValue& expected, bool exclusive) {
+        AddNumberError(SchemaType::GetMinimumString(), ValueType(actual).Move(), expected,
+            exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
+    }
+    void BelowMinimum(double actual, const SValue& expected, bool exclusive) {
+        AddNumberError(SchemaType::GetMinimumString(), ValueType(actual).Move(), expected,
+            exclusive ? &SchemaType::GetExclusiveMinimumString : 0);
+    }
+
+    void TooLong(const Ch* str, SizeType length, SizeType expected) {
+        AddNumberError(SchemaType::GetMaxLengthString(),
+            ValueType(str, length, GetStateAllocator()).Move(), SValue(expected).Move());
+    }
+    void TooShort(const Ch* str, SizeType length, SizeType expected) {
+        AddNumberError(SchemaType::GetMinLengthString(),
+            ValueType(str, length, GetStateAllocator()).Move(), SValue(expected).Move());
+    }
+    void DoesNotMatch(const Ch* str, SizeType length) {
+        currentError_.SetObject();
+        currentError_.AddMember(GetActualString(), ValueType(str, length, GetStateAllocator()).Move(), GetStateAllocator());
+        AddCurrentError(SchemaType::GetPatternString());
+    }
+
+    void DisallowedItem(SizeType index) {
+        currentError_.SetObject();
+        currentError_.AddMember(GetDisallowedString(), ValueType(index).Move(), GetStateAllocator());
+        AddCurrentError(SchemaType::GetAdditionalItemsString(), true);
+    }
+    void TooFewItems(SizeType actualCount, SizeType expectedCount) {
+        AddNumberError(SchemaType::GetMinItemsString(),
+            ValueType(actualCount).Move(), SValue(expectedCount).Move());
+    }
+    void TooManyItems(SizeType actualCount, SizeType expectedCount) {
+        AddNumberError(SchemaType::GetMaxItemsString(),
+            ValueType(actualCount).Move(), SValue(expectedCount).Move());
+    }
+    void DuplicateItems(SizeType index1, SizeType index2) {
+        ValueType duplicates(kArrayType);
+        duplicates.PushBack(index1, GetStateAllocator());
+        duplicates.PushBack(index2, GetStateAllocator());
+        currentError_.SetObject();
+        currentError_.AddMember(GetDuplicatesString(), duplicates, GetStateAllocator());
+        AddCurrentError(SchemaType::GetUniqueItemsString(), true);
+    }
+
+    void TooManyProperties(SizeType actualCount, SizeType expectedCount) {
+        AddNumberError(SchemaType::GetMaxPropertiesString(),
+            ValueType(actualCount).Move(), SValue(expectedCount).Move());
+    }
+    void TooFewProperties(SizeType actualCount, SizeType expectedCount) {
+        AddNumberError(SchemaType::GetMinPropertiesString(),
+            ValueType(actualCount).Move(), SValue(expectedCount).Move());
+    }
+    void StartMissingProperties() {
+        currentError_.SetArray();
+    }
+    void AddMissingProperty(const SValue& name) {
+        currentError_.PushBack(ValueType(name, GetStateAllocator()).Move(), GetStateAllocator());
+    }
+    bool EndMissingProperties() {
+        if (currentError_.Empty())
+            return false;
+        ValueType error(kObjectType);
+        error.AddMember(GetMissingString(), currentError_, GetStateAllocator());
+        currentError_ = error;
+        AddCurrentError(SchemaType::GetRequiredString());
+        return true;
+    }
+    void PropertyViolations(ISchemaValidator** subvalidators, SizeType count) {
+        for (SizeType i = 0; i < count; ++i)
+            MergeError(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError());
+    }
+    void DisallowedProperty(const Ch* name, SizeType length) {
+        currentError_.SetObject();
+        currentError_.AddMember(GetDisallowedString(), ValueType(name, length, GetStateAllocator()).Move(), GetStateAllocator());
+        AddCurrentError(SchemaType::GetAdditionalPropertiesString(), true);
+    }
+
+    void StartDependencyErrors() {
+        currentError_.SetObject();
+    }
+    void StartMissingDependentProperties() {
+        missingDependents_.SetArray();
+    }
+    void AddMissingDependentProperty(const SValue& targetName) {
+        missingDependents_.PushBack(ValueType(targetName, GetStateAllocator()).Move(), GetStateAllocator());
+    }
+    void EndMissingDependentProperties(const SValue& sourceName) {
+        if (!missingDependents_.Empty())
+            currentError_.AddMember(ValueType(sourceName, GetStateAllocator()).Move(),
+                missingDependents_, GetStateAllocator());
+    }
+    void AddDependencySchemaError(const SValue& sourceName, ISchemaValidator* subvalidator) {
+        currentError_.AddMember(ValueType(sourceName, GetStateAllocator()).Move(),
+            static_cast<GenericSchemaValidator*>(subvalidator)->GetError(), GetStateAllocator());
+    }
+    bool EndDependencyErrors() {
+        if (currentError_.ObjectEmpty())
+            return false;
+        ValueType error(kObjectType);
+        error.AddMember(GetErrorsString(), currentError_, GetStateAllocator());
+        currentError_ = error;
+        AddCurrentError(SchemaType::GetDependenciesString());
+        return true;
+    }
+
+    void DisallowedValue() {
+        currentError_.SetObject();
+        AddCurrentError(SchemaType::GetEnumString());
+    }
+    void StartDisallowedType() {
+        currentError_.SetArray();
+    }
+    void AddExpectedType(const typename SchemaType::ValueType& expectedType) {
+        currentError_.PushBack(ValueType(expectedType, GetStateAllocator()).Move(), GetStateAllocator());
+    }
+    void EndDisallowedType(const typename SchemaType::ValueType& actualType) {
+        ValueType error(kObjectType);
+        error.AddMember(GetExpectedString(), currentError_, GetStateAllocator());
+        error.AddMember(GetActualString(), ValueType(actualType, GetStateAllocator()).Move(), GetStateAllocator());
+        currentError_ = error;
+        AddCurrentError(SchemaType::GetTypeString());
+    }
+    void NotAllOf(ISchemaValidator** subvalidators, SizeType count) {
+        for (SizeType i = 0; i < count; ++i) {
+            MergeError(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError());
+        }
+    }
+    void NoneOf(ISchemaValidator** subvalidators, SizeType count) {
+        AddErrorArray(SchemaType::GetAnyOfString(), subvalidators, count);
+    }
+    void NotOneOf(ISchemaValidator** subvalidators, SizeType count) {
+        AddErrorArray(SchemaType::GetOneOfString(), subvalidators, count);
+    }
+    void Disallowed() {
+        currentError_.SetObject();
+        AddCurrentError(SchemaType::GetNotString());
+    }
+
+#define RAPIDJSON_STRING_(name, ...) \
+    static const StringRefType& Get##name##String() {\
+        static const Ch s[] = { __VA_ARGS__, '\0' };\
+        static const StringRefType v(s, static_cast<SizeType>(sizeof(s) / sizeof(Ch) - 1)); \
+        return v;\
+    }
+
+    RAPIDJSON_STRING_(InstanceRef, 'i', 'n', 's', 't', 'a', 'n', 'c', 'e', 'R', 'e', 'f')
+    RAPIDJSON_STRING_(SchemaRef, 's', 'c', 'h', 'e', 'm', 'a', 'R', 'e', 'f')
+    RAPIDJSON_STRING_(Expected, 'e', 'x', 'p', 'e', 'c', 't', 'e', 'd')
+    RAPIDJSON_STRING_(Actual, 'a', 'c', 't', 'u', 'a', 'l')
+    RAPIDJSON_STRING_(Disallowed, 'd', 'i', 's', 'a', 'l', 'l', 'o', 'w', 'e', 'd')
+    RAPIDJSON_STRING_(Missing, 'm', 'i', 's', 's', 'i', 'n', 'g')
+    RAPIDJSON_STRING_(Errors, 'e', 'r', 'r', 'o', 'r', 's')
+    RAPIDJSON_STRING_(Duplicates, 'd', 'u', 'p', 'l', 'i', 'c', 'a', 't', 'e', 's')
+
+#undef RAPIDJSON_STRING_
+
+#if RAPIDJSON_SCHEMA_VERBOSE
+#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_() \
+RAPIDJSON_MULTILINEMACRO_BEGIN\
+    *documentStack_.template Push<Ch>() = '\0';\
+    documentStack_.template Pop<Ch>(1);\
+    internal::PrintInvalidDocument(documentStack_.template Bottom<Ch>());\
+RAPIDJSON_MULTILINEMACRO_END
+#else
+#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_()
+#endif
+
+#define RAPIDJSON_SCHEMA_HANDLE_BEGIN_(method, arg1)\
+    if (!valid_) return false; \
+    if (!BeginValue() || !CurrentSchema().method arg1) {\
+        RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_();\
+        return valid_ = false;\
+    }
+
+#define RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2)\
+    for (Context* context = schemaStack_.template Bottom<Context>(); context != schemaStack_.template End<Context>(); context++) {\
+        if (context->hasher)\
+            static_cast<HasherType*>(context->hasher)->method arg2;\
+        if (context->validators)\
+            for (SizeType i_ = 0; i_ < context->validatorCount; i_++)\
+                static_cast<GenericSchemaValidator*>(context->validators[i_])->method arg2;\
+        if (context->patternPropertiesValidators)\
+            for (SizeType i_ = 0; i_ < context->patternPropertiesValidatorCount; i_++)\
+                static_cast<GenericSchemaValidator*>(context->patternPropertiesValidators[i_])->method arg2;\
+    }
+
+#define RAPIDJSON_SCHEMA_HANDLE_END_(method, arg2)\
+    return valid_ = EndValue() && (!outputHandler_ || outputHandler_->method arg2)
+
+#define RAPIDJSON_SCHEMA_HANDLE_VALUE_(method, arg1, arg2) \
+    RAPIDJSON_SCHEMA_HANDLE_BEGIN_   (method, arg1);\
+    RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(method, arg2);\
+    RAPIDJSON_SCHEMA_HANDLE_END_     (method, arg2)
+
+    bool Null()             { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Null,   (CurrentContext()), ( )); }
+    bool Bool(bool b)       { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Bool,   (CurrentContext(), b), (b)); }
+    bool Int(int i)         { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int,    (CurrentContext(), i), (i)); }
+    bool Uint(unsigned u)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint,   (CurrentContext(), u), (u)); }
+    bool Int64(int64_t i)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Int64,  (CurrentContext(), i), (i)); }
+    bool Uint64(uint64_t u) { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Uint64, (CurrentContext(), u), (u)); }
+    bool Double(double d)   { RAPIDJSON_SCHEMA_HANDLE_VALUE_(Double, (CurrentContext(), d), (d)); }
+    bool RawNumber(const Ch* str, SizeType length, bool copy)
+                                    { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); }
+    bool String(const Ch* str, SizeType length, bool copy)
+                                    { RAPIDJSON_SCHEMA_HANDLE_VALUE_(String, (CurrentContext(), str, length, copy), (str, length, copy)); }
+
+    bool StartObject() {
+        RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartObject, (CurrentContext()));
+        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartObject, ());
+        return valid_ = !outputHandler_ || outputHandler_->StartObject();
+    }
+    
+    bool Key(const Ch* str, SizeType len, bool copy) {
+        if (!valid_) return false;
+        AppendToken(str, len);
+        if (!CurrentSchema().Key(CurrentContext(), str, len, copy)) return valid_ = false;
+        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(Key, (str, len, copy));
+        return valid_ = !outputHandler_ || outputHandler_->Key(str, len, copy);
+    }
+    
+    bool EndObject(SizeType memberCount) { 
+        if (!valid_) return false;
+        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndObject, (memberCount));
+        if (!CurrentSchema().EndObject(CurrentContext(), memberCount)) return valid_ = false;
+        RAPIDJSON_SCHEMA_HANDLE_END_(EndObject, (memberCount));
+    }
+
+    bool StartArray() {
+        RAPIDJSON_SCHEMA_HANDLE_BEGIN_(StartArray, (CurrentContext()));
+        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(StartArray, ());
+        return valid_ = !outputHandler_ || outputHandler_->StartArray();
+    }
+    
+    bool EndArray(SizeType elementCount) {
+        if (!valid_) return false;
+        RAPIDJSON_SCHEMA_HANDLE_PARALLEL_(EndArray, (elementCount));
+        if (!CurrentSchema().EndArray(CurrentContext(), elementCount)) return valid_ = false;
+        RAPIDJSON_SCHEMA_HANDLE_END_(EndArray, (elementCount));
+    }
+
+#undef RAPIDJSON_SCHEMA_HANDLE_BEGIN_VERBOSE_
+#undef RAPIDJSON_SCHEMA_HANDLE_BEGIN_
+#undef RAPIDJSON_SCHEMA_HANDLE_PARALLEL_
+#undef RAPIDJSON_SCHEMA_HANDLE_VALUE_
+
+    // Implementation of ISchemaStateFactory<SchemaType>
+    virtual ISchemaValidator* CreateSchemaValidator(const SchemaType& root) {
+        return new (GetStateAllocator().Malloc(sizeof(GenericSchemaValidator))) GenericSchemaValidator(*schemaDocument_, root, documentStack_.template Bottom<char>(), documentStack_.GetSize(),
+#if RAPIDJSON_SCHEMA_VERBOSE
+        depth_ + 1,
+#endif
+        &GetStateAllocator());
+    }
+
+    virtual void DestroySchemaValidator(ISchemaValidator* validator) {
+        GenericSchemaValidator* v = static_cast<GenericSchemaValidator*>(validator);
+        v->~GenericSchemaValidator();
+        StateAllocator::Free(v);
+    }
+
+    virtual void* CreateHasher() {
+        return new (GetStateAllocator().Malloc(sizeof(HasherType))) HasherType(&GetStateAllocator());
+    }
+
+    virtual uint64_t GetHashCode(void* hasher) {
+        return static_cast<HasherType*>(hasher)->GetHashCode();
+    }
+
+    virtual void DestroryHasher(void* hasher) {
+        HasherType* h = static_cast<HasherType*>(hasher);
+        h->~HasherType();
+        StateAllocator::Free(h);
+    }
+
+    virtual void* MallocState(size_t size) {
+        return GetStateAllocator().Malloc(size);
+    }
+
+    virtual void FreeState(void* p) {
+        StateAllocator::Free(p);
+    }
+
+private:
+    typedef typename SchemaType::Context Context;
+    typedef GenericValue<UTF8<>, StateAllocator> HashCodeArray;
+    typedef internal::Hasher<EncodingType, StateAllocator> HasherType;
+
+    GenericSchemaValidator( 
+        const SchemaDocumentType& schemaDocument,
+        const SchemaType& root,
+        const char* basePath, size_t basePathSize,
+#if RAPIDJSON_SCHEMA_VERBOSE
+        unsigned depth,
+#endif
+        StateAllocator* allocator = 0,
+        size_t schemaStackCapacity = kDefaultSchemaStackCapacity,
+        size_t documentStackCapacity = kDefaultDocumentStackCapacity)
+        :
+        schemaDocument_(&schemaDocument),
+        root_(root),
+        stateAllocator_(allocator),
+        ownStateAllocator_(0),
+        schemaStack_(allocator, schemaStackCapacity),
+        documentStack_(allocator, documentStackCapacity),
+        outputHandler_(0),
+        error_(kObjectType),
+        currentError_(),
+        missingDependents_(),
+        valid_(true)
+#if RAPIDJSON_SCHEMA_VERBOSE
+        , depth_(depth)
+#endif
+    {
+        if (basePath && basePathSize)
+            memcpy(documentStack_.template Push<char>(basePathSize), basePath, basePathSize);
+    }
+
+    StateAllocator& GetStateAllocator() {
+        if (!stateAllocator_)
+            stateAllocator_ = ownStateAllocator_ = RAPIDJSON_NEW(StateAllocator)();
+        return *stateAllocator_;
+    }
+
+    bool BeginValue() {
+        if (schemaStack_.Empty())
+            PushSchema(root_);
+        else {
+            if (CurrentContext().inArray)
+                internal::TokenHelper<internal::Stack<StateAllocator>, Ch>::AppendIndexToken(documentStack_, CurrentContext().arrayElementIndex);
+
+            if (!CurrentSchema().BeginValue(CurrentContext()))
+                return false;
+
+            SizeType count = CurrentContext().patternPropertiesSchemaCount;
+            const SchemaType** sa = CurrentContext().patternPropertiesSchemas;
+            typename Context::PatternValidatorType patternValidatorType = CurrentContext().valuePatternValidatorType;
+            bool valueUniqueness = CurrentContext().valueUniqueness;
+            RAPIDJSON_ASSERT(CurrentContext().valueSchema);
+            PushSchema(*CurrentContext().valueSchema);
+
+            if (count > 0) {
+                CurrentContext().objectPatternValidatorType = patternValidatorType;
+                ISchemaValidator**& va = CurrentContext().patternPropertiesValidators;
+                SizeType& validatorCount = CurrentContext().patternPropertiesValidatorCount;
+                va = static_cast<ISchemaValidator**>(MallocState(sizeof(ISchemaValidator*) * count));
+                for (SizeType i = 0; i < count; i++)
+                    va[validatorCount++] = CreateSchemaValidator(*sa[i]);
+            }
+
+            CurrentContext().arrayUniqueness = valueUniqueness;
+        }
+        return true;
+    }
+
+    bool EndValue() {
+        if (!CurrentSchema().EndValue(CurrentContext()))
+            return false;
+
+#if RAPIDJSON_SCHEMA_VERBOSE
+        GenericStringBuffer<EncodingType> sb;
+        schemaDocument_->GetPointer(&CurrentSchema()).Stringify(sb);
+
+        *documentStack_.template Push<Ch>() = '\0';
+        documentStack_.template Pop<Ch>(1);
+        internal::PrintValidatorPointers(depth_, sb.GetString(), documentStack_.template Bottom<Ch>());
+#endif
+
+        uint64_t h = CurrentContext().arrayUniqueness ? static_cast<HasherType*>(CurrentContext().hasher)->GetHashCode() : 0;
+        
+        PopSchema();
+
+        if (!schemaStack_.Empty()) {
+            Context& context = CurrentContext();
+            if (context.valueUniqueness) {
+                HashCodeArray* a = static_cast<HashCodeArray*>(context.arrayElementHashCodes);
+                if (!a)
+                    CurrentContext().arrayElementHashCodes = a = new (GetStateAllocator().Malloc(sizeof(HashCodeArray))) HashCodeArray(kArrayType);
+                for (typename HashCodeArray::ConstValueIterator itr = a->Begin(); itr != a->End(); ++itr)
+                    if (itr->GetUint64() == h) {
+                        DuplicateItems(static_cast<SizeType>(itr - a->Begin()), a->Size());
+                        RAPIDJSON_INVALID_KEYWORD_RETURN(SchemaType::GetUniqueItemsString());
+                    }
+                a->PushBack(h, GetStateAllocator());
+            }
+        }
+
+        // Remove the last token of document pointer
+        while (!documentStack_.Empty() && *documentStack_.template Pop<Ch>(1) != '/')
+            ;
+
+        return true;
+    }
+
+    void AppendToken(const Ch* str, SizeType len) {
+        documentStack_.template Reserve<Ch>(1 + len * 2); // worst case all characters are escaped as two characters
+        *documentStack_.template PushUnsafe<Ch>() = '/';
+        for (SizeType i = 0; i < len; i++) {
+            if (str[i] == '~') {
+                *documentStack_.template PushUnsafe<Ch>() = '~';
+                *documentStack_.template PushUnsafe<Ch>() = '0';
+            }
+            else if (str[i] == '/') {
+                *documentStack_.template PushUnsafe<Ch>() = '~';
+                *documentStack_.template PushUnsafe<Ch>() = '1';
+            }
+            else
+                *documentStack_.template PushUnsafe<Ch>() = str[i];
+        }
+    }
+
+    RAPIDJSON_FORCEINLINE void PushSchema(const SchemaType& schema) { new (schemaStack_.template Push<Context>()) Context(*this, *this, &schema); }
+    
+    RAPIDJSON_FORCEINLINE void PopSchema() {
+        Context* c = schemaStack_.template Pop<Context>(1);
+        if (HashCodeArray* a = static_cast<HashCodeArray*>(c->arrayElementHashCodes)) {
+            a->~HashCodeArray();
+            StateAllocator::Free(a);
+        }
+        c->~Context();
+    }
+
+    void AddErrorLocation(ValueType& result, bool parent) {
+        GenericStringBuffer<EncodingType> sb;
+        PointerType instancePointer = GetInvalidDocumentPointer();
+        ((parent && instancePointer.GetTokenCount() > 0)
+            ? PointerType(instancePointer.GetTokens(), instancePointer.GetTokenCount() - 1)
+            : instancePointer).StringifyUriFragment(sb);
+        ValueType instanceRef(sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)),
+            GetStateAllocator());
+        result.AddMember(GetInstanceRefString(), instanceRef, GetStateAllocator());
+        sb.Clear();
+        memcpy(sb.Push(CurrentSchema().GetURI().GetStringLength()),
+            CurrentSchema().GetURI().GetString(),
+            CurrentSchema().GetURI().GetStringLength() * sizeof(Ch));
+        GetInvalidSchemaPointer().StringifyUriFragment(sb);
+        ValueType schemaRef(sb.GetString(), static_cast<SizeType>(sb.GetSize() / sizeof(Ch)),
+            GetStateAllocator());
+        result.AddMember(GetSchemaRefString(), schemaRef, GetStateAllocator());
+    }
+
+    void AddError(ValueType& keyword, ValueType& error) {
+        typename ValueType::MemberIterator member = error_.FindMember(keyword);
+        if (member == error_.MemberEnd())
+            error_.AddMember(keyword, error, GetStateAllocator());
+        else {
+            if (member->value.IsObject()) {
+                ValueType errors(kArrayType);
+                errors.PushBack(member->value, GetStateAllocator());
+                member->value = errors;
+            }
+            member->value.PushBack(error, GetStateAllocator());
+        }
+    }
+
+    void AddCurrentError(const typename SchemaType::ValueType& keyword, bool parent = false) {
+        AddErrorLocation(currentError_, parent);
+        AddError(ValueType(keyword, GetStateAllocator(), false).Move(), currentError_);
+    }
+
+    void MergeError(ValueType& other) {
+        for (typename ValueType::MemberIterator it = other.MemberBegin(), end = other.MemberEnd(); it != end; ++it) {
+            AddError(it->name, it->value);
+        }
+    }
+
+    void AddNumberError(const typename SchemaType::ValueType& keyword, ValueType& actual, const SValue& expected,
+        const typename SchemaType::ValueType& (*exclusive)() = 0) {
+        currentError_.SetObject();
+        currentError_.AddMember(GetActualString(), actual, GetStateAllocator());
+        currentError_.AddMember(GetExpectedString(), ValueType(expected, GetStateAllocator()).Move(), GetStateAllocator());
+        if (exclusive)
+            currentError_.AddMember(ValueType(exclusive(), GetStateAllocator()).Move(), true, GetStateAllocator());
+        AddCurrentError(keyword);
+    }
+
+    void AddErrorArray(const typename SchemaType::ValueType& keyword,
+        ISchemaValidator** subvalidators, SizeType count) {
+        ValueType errors(kArrayType);
+        for (SizeType i = 0; i < count; ++i)
+            errors.PushBack(static_cast<GenericSchemaValidator*>(subvalidators[i])->GetError(), GetStateAllocator());
+        currentError_.SetObject();
+        currentError_.AddMember(GetErrorsString(), errors, GetStateAllocator());
+        AddCurrentError(keyword);
+    }
+
+    const SchemaType& CurrentSchema() const { return *schemaStack_.template Top<Context>()->schema; }
+    Context& CurrentContext() { return *schemaStack_.template Top<Context>(); }
+    const Context& CurrentContext() const { return *schemaStack_.template Top<Context>(); }
+
+    static const size_t kDefaultSchemaStackCapacity = 1024;
+    static const size_t kDefaultDocumentStackCapacity = 256;
+    const SchemaDocumentType* schemaDocument_;
+    const SchemaType& root_;
+    StateAllocator* stateAllocator_;
+    StateAllocator* ownStateAllocator_;
+    internal::Stack<StateAllocator> schemaStack_;    //!< stack to store the current path of schema (BaseSchemaType *)
+    internal::Stack<StateAllocator> documentStack_;  //!< stack to store the current path of validating document (Ch)
+    OutputHandler* outputHandler_;
+    ValueType error_;
+    ValueType currentError_;
+    ValueType missingDependents_;
+    bool valid_;
+#if RAPIDJSON_SCHEMA_VERBOSE
+    unsigned depth_;
+#endif
+};
+
+typedef GenericSchemaValidator<SchemaDocument> SchemaValidator;
+
+///////////////////////////////////////////////////////////////////////////////
+// SchemaValidatingReader
+
+//! A helper class for parsing with validation.
+/*!
+    This helper class is a functor, designed as a parameter of \ref GenericDocument::Populate().
+
+    \tparam parseFlags Combination of \ref ParseFlag.
+    \tparam InputStream Type of input stream, implementing Stream concept.
+    \tparam SourceEncoding Encoding of the input stream.
+    \tparam SchemaDocumentType Type of schema document.
+    \tparam StackAllocator Allocator type for stack.
+*/
+template <
+    unsigned parseFlags,
+    typename InputStream,
+    typename SourceEncoding,
+    typename SchemaDocumentType = SchemaDocument,
+    typename StackAllocator = CrtAllocator>
+class SchemaValidatingReader {
+public:
+    typedef typename SchemaDocumentType::PointerType PointerType;
+    typedef typename InputStream::Ch Ch;
+    typedef GenericValue<SourceEncoding, StackAllocator> ValueType;
+
+    //! Constructor
+    /*!
+        \param is Input stream.
+        \param sd Schema document.
+    */
+    SchemaValidatingReader(InputStream& is, const SchemaDocumentType& sd) : is_(is), sd_(sd), invalidSchemaKeyword_(), error_(kObjectType), isValid_(true) {}
+
+    template <typename Handler>
+    bool operator()(Handler& handler) {
+        GenericReader<SourceEncoding, typename SchemaDocumentType::EncodingType, StackAllocator> reader;
+        GenericSchemaValidator<SchemaDocumentType, Handler> validator(sd_, handler);
+        parseResult_ = reader.template Parse<parseFlags>(is_, validator);
+
+        isValid_ = validator.IsValid();
+        if (isValid_) {
+            invalidSchemaPointer_ = PointerType();
+            invalidSchemaKeyword_ = 0;
+            invalidDocumentPointer_ = PointerType();
+            error_.SetObject();
+        }
+        else {
+            invalidSchemaPointer_ = validator.GetInvalidSchemaPointer();
+            invalidSchemaKeyword_ = validator.GetInvalidSchemaKeyword();
+            invalidDocumentPointer_ = validator.GetInvalidDocumentPointer();
+            error_.CopyFrom(validator.GetError(), allocator_);
+        }
+
+        return parseResult_;
+    }
+
+    const ParseResult& GetParseResult() const { return parseResult_; }
+    bool IsValid() const { return isValid_; }
+    const PointerType& GetInvalidSchemaPointer() const { return invalidSchemaPointer_; }
+    const Ch* GetInvalidSchemaKeyword() const { return invalidSchemaKeyword_; }
+    const PointerType& GetInvalidDocumentPointer() const { return invalidDocumentPointer_; }
+    const ValueType& GetError() const { return error_; }
+
+private:
+    InputStream& is_;
+    const SchemaDocumentType& sd_;
+
+    ParseResult parseResult_;
+    PointerType invalidSchemaPointer_;
+    const Ch* invalidSchemaKeyword_;
+    PointerType invalidDocumentPointer_;
+    StackAllocator allocator_;
+    ValueType error_;
+    bool isValid_;
+};
+
+RAPIDJSON_NAMESPACE_END
+RAPIDJSON_DIAG_POP
+
+#endif // RAPIDJSON_SCHEMA_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/stream.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/stream.h
new file mode 100644
index 000000000..7f2643e48
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/stream.h
@@ -0,0 +1,223 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+//
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+
+#include "rapidjson.h"
+
+#ifndef RAPIDJSON_STREAM_H_
+#define RAPIDJSON_STREAM_H_
+
+#include "encodings.h"
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////
+//  Stream
+
+/*! \class rapidjson::Stream
+    \brief Concept for reading and writing characters.
+
+    For read-only stream, no need to implement PutBegin(), Put(), Flush() and PutEnd().
+
+    For write-only stream, only need to implement Put() and Flush().
+
+\code
+concept Stream {
+    typename Ch;    //!< Character type of the stream.
+
+    //! Read the current character from stream without moving the read cursor.
+    Ch Peek() const;
+
+    //! Read the current character from stream and moving the read cursor to next character.
+    Ch Take();
+
+    //! Get the current read cursor.
+    //! \return Number of characters read from start.
+    size_t Tell();
+
+    //! Begin writing operation at the current read pointer.
+    //! \return The begin writer pointer.
+    Ch* PutBegin();
+
+    //! Write a character.
+    void Put(Ch c);
+
+    //! Flush the buffer.
+    void Flush();
+
+    //! End the writing operation.
+    //! \param begin The begin write pointer returned by PutBegin().
+    //! \return Number of characters written.
+    size_t PutEnd(Ch* begin);
+}
+\endcode
+*/
+
+//! Provides additional information for stream.
+/*!
+    By using traits pattern, this type provides a default configuration for stream.
+    For custom stream, this type can be specialized for other configuration.
+    See TEST(Reader, CustomStringStream) in readertest.cpp for example.
+*/
+template<typename Stream>
+struct StreamTraits {
+    //! Whether to make local copy of stream for optimization during parsing.
+    /*!
+        By default, for safety, streams do not use local copy optimization.
+        Stream that can be copied fast should specialize this, like StreamTraits<StringStream>.
+    */
+    enum { copyOptimization = 0 };
+};
+
+//! Reserve n characters for writing to a stream.
+template<typename Stream>
+inline void PutReserve(Stream& stream, size_t count) {
+    (void)stream;
+    (void)count;
+}
+
+//! Write character to a stream, presuming buffer is reserved.
+template<typename Stream>
+inline void PutUnsafe(Stream& stream, typename Stream::Ch c) {
+    stream.Put(c);
+}
+
+//! Put N copies of a character to a stream.
+template<typename Stream, typename Ch>
+inline void PutN(Stream& stream, Ch c, size_t n) {
+    PutReserve(stream, n);
+    for (size_t i = 0; i < n; i++)
+        PutUnsafe(stream, c);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// GenericStreamWrapper
+
+//! A Stream Wrapper
+/*! \tThis string stream is a wrapper for any stream by just forwarding any
+    \treceived message to the origin stream.
+    \note implements Stream concept
+*/
+
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4702)  // unreachable code
+RAPIDJSON_DIAG_OFF(4512)  // assignment operator could not be generated
+#endif
+
+template <typename InputStream, typename Encoding = UTF8<> >
+class GenericStreamWrapper {
+public:
+    typedef typename Encoding::Ch Ch;
+    GenericStreamWrapper(InputStream& is): is_(is) {}
+
+    Ch Peek() const { return is_.Peek(); }
+    Ch Take() { return is_.Take(); }
+    size_t Tell() { return is_.Tell(); }
+    Ch* PutBegin() { return is_.PutBegin(); }
+    void Put(Ch ch) { is_.Put(ch); }
+    void Flush() { is_.Flush(); }
+    size_t PutEnd(Ch* ch) { return is_.PutEnd(ch); }
+
+    // wrapper for MemoryStream
+    const Ch* Peek4() const { return is_.Peek4(); }
+
+    // wrapper for AutoUTFInputStream
+    UTFType GetType() const { return is_.GetType(); }
+    bool HasBOM() const { return is_.HasBOM(); }
+
+protected:
+    InputStream& is_;
+};
+
+#if defined(_MSC_VER) && _MSC_VER <= 1800
+RAPIDJSON_DIAG_POP
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// StringStream
+
+//! Read-only string stream.
+/*! \note implements Stream concept
+*/
+template <typename Encoding>
+struct GenericStringStream {
+    typedef typename Encoding::Ch Ch;
+
+    GenericStringStream(const Ch *src) : src_(src), head_(src) {}
+
+    Ch Peek() const { return *src_; }
+    Ch Take() { return *src_++; }
+    size_t Tell() const { return static_cast<size_t>(src_ - head_); }
+
+    Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+    void Put(Ch) { RAPIDJSON_ASSERT(false); }
+    void Flush() { RAPIDJSON_ASSERT(false); }
+    size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+    const Ch* src_;     //!< Current read position.
+    const Ch* head_;    //!< Original head of the string.
+};
+
+template <typename Encoding>
+struct StreamTraits<GenericStringStream<Encoding> > {
+    enum { copyOptimization = 1 };
+};
+
+//! String stream with UTF8 encoding.
+typedef GenericStringStream<UTF8<> > StringStream;
+
+///////////////////////////////////////////////////////////////////////////////
+// InsituStringStream
+
+//! A read-write string stream.
+/*! This string stream is particularly designed for in-situ parsing.
+    \note implements Stream concept
+*/
+template <typename Encoding>
+struct GenericInsituStringStream {
+    typedef typename Encoding::Ch Ch;
+
+    GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {}
+
+    // Read
+    Ch Peek() { return *src_; }
+    Ch Take() { return *src_++; }
+    size_t Tell() { return static_cast<size_t>(src_ - head_); }
+
+    // Write
+    void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; }
+
+    Ch* PutBegin() { return dst_ = src_; }
+    size_t PutEnd(Ch* begin) { return static_cast<size_t>(dst_ - begin); }
+    void Flush() {}
+
+    Ch* Push(size_t count) { Ch* begin = dst_; dst_ += count; return begin; }
+    void Pop(size_t count) { dst_ -= count; }
+
+    Ch* src_;
+    Ch* dst_;
+    Ch* head_;
+};
+
+template <typename Encoding>
+struct StreamTraits<GenericInsituStringStream<Encoding> > {
+    enum { copyOptimization = 1 };
+};
+
+//! Insitu string stream with UTF8 encoding.
+typedef GenericInsituStringStream<UTF8<> > InsituStringStream;
+
+RAPIDJSON_NAMESPACE_END
+
+#endif // RAPIDJSON_STREAM_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/stringbuffer.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/stringbuffer.h
new file mode 100644
index 000000000..4e38b82c3
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/stringbuffer.h
@@ -0,0 +1,121 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_STRINGBUFFER_H_
+#define RAPIDJSON_STRINGBUFFER_H_
+
+#include "stream.h"
+#include "internal/stack.h"
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+#include <utility> // std::move
+#endif
+
+#include "internal/stack.h"
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+//! Represents an in-memory output stream.
+/*!
+    \tparam Encoding Encoding of the stream.
+    \tparam Allocator type for allocating memory buffer.
+    \note implements Stream concept
+*/
+template <typename Encoding, typename Allocator = CrtAllocator>
+class GenericStringBuffer {
+public:
+    typedef typename Encoding::Ch Ch;
+
+    GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {}
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    GenericStringBuffer(GenericStringBuffer&& rhs) : stack_(std::move(rhs.stack_)) {}
+    GenericStringBuffer& operator=(GenericStringBuffer&& rhs) {
+        if (&rhs != this)
+            stack_ = std::move(rhs.stack_);
+        return *this;
+    }
+#endif
+
+    void Put(Ch c) { *stack_.template Push<Ch>() = c; }
+    void PutUnsafe(Ch c) { *stack_.template PushUnsafe<Ch>() = c; }
+    void Flush() {}
+
+    void Clear() { stack_.Clear(); }
+    void ShrinkToFit() {
+        // Push and pop a null terminator. This is safe.
+        *stack_.template Push<Ch>() = '\0';
+        stack_.ShrinkToFit();
+        stack_.template Pop<Ch>(1);
+    }
+
+    void Reserve(size_t count) { stack_.template Reserve<Ch>(count); }
+    Ch* Push(size_t count) { return stack_.template Push<Ch>(count); }
+    Ch* PushUnsafe(size_t count) { return stack_.template PushUnsafe<Ch>(count); }
+    void Pop(size_t count) { stack_.template Pop<Ch>(count); }
+
+    const Ch* GetString() const {
+        // Push and pop a null terminator. This is safe.
+        *stack_.template Push<Ch>() = '\0';
+        stack_.template Pop<Ch>(1);
+
+        return stack_.template Bottom<Ch>();
+    }
+
+    //! Get the size of string in bytes in the string buffer.
+    size_t GetSize() const { return stack_.GetSize(); }
+
+    //! Get the length of string in Ch in the string buffer.
+    size_t GetLength() const { return stack_.GetSize() / sizeof(Ch); }
+
+    static const size_t kDefaultCapacity = 256;
+    mutable internal::Stack<Allocator> stack_;
+
+private:
+    // Prohibit copy constructor & assignment operator.
+    GenericStringBuffer(const GenericStringBuffer&);
+    GenericStringBuffer& operator=(const GenericStringBuffer&);
+};
+
+//! String buffer with UTF8 encoding
+typedef GenericStringBuffer<UTF8<> > StringBuffer;
+
+template<typename Encoding, typename Allocator>
+inline void PutReserve(GenericStringBuffer<Encoding, Allocator>& stream, size_t count) {
+    stream.Reserve(count);
+}
+
+template<typename Encoding, typename Allocator>
+inline void PutUnsafe(GenericStringBuffer<Encoding, Allocator>& stream, typename Encoding::Ch c) {
+    stream.PutUnsafe(c);
+}
+
+//! Implement specialized version of PutN() with memset() for better performance.
+template<>
+inline void PutN(GenericStringBuffer<UTF8<> >& stream, char c, size_t n) {
+    std::memset(stream.stack_.Push<char>(n), c, n * sizeof(c));
+}
+
+RAPIDJSON_NAMESPACE_END
+
+#if defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_STRINGBUFFER_H_
diff --git a/inference-engine/thirdparty/clDNN/utils/rapidjson/writer.h b/inference-engine/thirdparty/clDNN/utils/rapidjson/writer.h
new file mode 100644
index 000000000..6f5b69034
--- /dev/null
+++ b/inference-engine/thirdparty/clDNN/utils/rapidjson/writer.h
@@ -0,0 +1,709 @@
+// Tencent is pleased to support the open source community by making RapidJSON available.
+// 
+// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
+//
+// Licensed under the MIT License (the "License"); you may not use this file except
+// in compliance with the License. You may obtain a copy of the License at
+//
+// http://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software distributed 
+// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the 
+// specific language governing permissions and limitations under the License.
+
+#ifndef RAPIDJSON_WRITER_H_
+#define RAPIDJSON_WRITER_H_
+
+#include "stream.h"
+#include "internal/meta.h"
+#include "internal/stack.h"
+#include "internal/strfunc.h"
+#include "internal/dtoa.h"
+#include "internal/itoa.h"
+#include "stringbuffer.h"
+#include <new>      // placement new
+
+#if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
+#include <intrin.h>
+#pragma intrinsic(_BitScanForward)
+#endif
+#ifdef RAPIDJSON_SSE42
+#include <nmmintrin.h>
+#elif defined(RAPIDJSON_SSE2)
+#include <emmintrin.h>
+#elif defined(RAPIDJSON_NEON)
+#include <arm_neon.h>
+#endif
+
+#ifdef __clang__
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(padded)
+RAPIDJSON_DIAG_OFF(unreachable-code)
+RAPIDJSON_DIAG_OFF(c++98-compat)
+#elif defined(_MSC_VER)
+RAPIDJSON_DIAG_PUSH
+RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant
+#endif
+
+RAPIDJSON_NAMESPACE_BEGIN
+
+///////////////////////////////////////////////////////////////////////////////
+// WriteFlag
+
+/*! \def RAPIDJSON_WRITE_DEFAULT_FLAGS 
+    \ingroup RAPIDJSON_CONFIG
+    \brief User-defined kWriteDefaultFlags definition.
+
+    User can define this as any \c WriteFlag combinations.
+*/
+#ifndef RAPIDJSON_WRITE_DEFAULT_FLAGS
+#define RAPIDJSON_WRITE_DEFAULT_FLAGS kWriteNoFlags
+#endif
+
+//! Combination of writeFlags
+enum WriteFlag {
+    kWriteNoFlags = 0,              //!< No flags are set.
+    kWriteValidateEncodingFlag = 1, //!< Validate encoding of JSON strings.
+    kWriteNanAndInfFlag = 2,        //!< Allow writing of Infinity, -Infinity and NaN.
+    kWriteDefaultFlags = RAPIDJSON_WRITE_DEFAULT_FLAGS  //!< Default write flags. Can be customized by defining RAPIDJSON_WRITE_DEFAULT_FLAGS
+};
+
+//! JSON writer
+/*! Writer implements the concept Handler.
+    It generates JSON text by events to an output os.
+
+    User may programmatically calls the functions of a writer to generate JSON text.
+
+    On the other side, a writer can also be passed to objects that generates events, 
+
+    for example Reader::Parse() and Document::Accept().
+
+    \tparam OutputStream Type of output stream.
+    \tparam SourceEncoding Encoding of source string.
+    \tparam TargetEncoding Encoding of output stream.
+    \tparam StackAllocator Type of allocator for allocating memory of stack.
+    \note implements Handler concept
+*/
+template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename StackAllocator = CrtAllocator, unsigned writeFlags = kWriteDefaultFlags>
+class Writer {
+public:
+    typedef typename SourceEncoding::Ch Ch;
+
+    static const int kDefaultMaxDecimalPlaces = 324;
+
+    //! Constructor
+    /*! \param os Output stream.
+        \param stackAllocator User supplied allocator. If it is null, it will create a private one.
+        \param levelDepth Initial capacity of stack.
+    */
+    explicit
+    Writer(OutputStream& os, StackAllocator* stackAllocator = 0, size_t levelDepth = kDefaultLevelDepth) : 
+        os_(&os), level_stack_(stackAllocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {}
+
+    explicit
+    Writer(StackAllocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) :
+        os_(0), level_stack_(allocator, levelDepth * sizeof(Level)), maxDecimalPlaces_(kDefaultMaxDecimalPlaces), hasRoot_(false) {}
+
+#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
+    Writer(Writer&& rhs) :
+        os_(rhs.os_), level_stack_(std::move(rhs.level_stack_)), maxDecimalPlaces_(rhs.maxDecimalPlaces_), hasRoot_(rhs.hasRoot_) {
+        rhs.os_ = 0;
+    }
+#endif
+
+    //! Reset the writer with a new stream.
+    /*!
+        This function reset the writer with a new stream and default settings,
+        in order to make a Writer object reusable for output multiple JSONs.
+
+        \param os New output stream.
+        \code
+        Writer<OutputStream> writer(os1);
+        writer.StartObject();
+        // ...
+        writer.EndObject();
+
+        writer.Reset(os2);
+        writer.StartObject();
+        // ...
+        writer.EndObject();
+        \endcode
+    */
+    void Reset(OutputStream& os) {
+        os_ = &os;
+        hasRoot_ = false;
+        level_stack_.Clear();
+    }
+
+    //! Checks whether the output is a complete JSON.
+    /*!
+        A complete JSON has a complete root object or array.
+    */
+    bool IsComplete() const {
+        return hasRoot_ && level_stack_.Empty();
+    }
+
+    int GetMaxDecimalPlaces() const {
+        return maxDecimalPlaces_;
+    }
+
+    //! Sets the maximum number of decimal places for double output.
+    /*!
+        This setting truncates the output with specified number of decimal places.
+
+        For example, 
+
+        \code
+        writer.SetMaxDecimalPlaces(3);
+        writer.StartArray();
+        writer.Double(0.12345);                 // "0.123"
+        writer.Double(0.0001);                  // "0.0"
+        writer.Double(1.234567890123456e30);    // "1.234567890123456e30" (do not truncate significand for positive exponent)
+        writer.Double(1.23e-4);                 // "0.0"                  (do truncate significand for negative exponent)
+        writer.EndArray();
+        \endcode
+
+        The default setting does not truncate any decimal places. You can restore to this setting by calling
+        \code
+        writer.SetMaxDecimalPlaces(Writer::kDefaultMaxDecimalPlaces);
+        \endcode
+    */
+    void SetMaxDecimalPlaces(int maxDecimalPlaces) {
+        maxDecimalPlaces_ = maxDecimalPlaces;
+    }
+
+    /*!@name Implementation of Handler
+        \see Handler
+    */
+    //@{
+
+    bool Null()                 { Prefix(kNullType);   return EndValue(WriteNull()); }
+    bool Bool(bool b)           { Prefix(b ? kTrueType : kFalseType); return EndValue(WriteBool(b)); }
+    bool Int(int i)             { Prefix(kNumberType); return EndValue(WriteInt(i)); }
+    bool Uint(unsigned u)       { Prefix(kNumberType); return EndValue(WriteUint(u)); }
+    bool Int64(int64_t i64)     { Prefix(kNumberType); return EndValue(WriteInt64(i64)); }
+    bool Uint64(uint64_t u64)   { Prefix(kNumberType); return EndValue(WriteUint64(u64)); }
+
+    //! Writes the given \c double value to the stream
+    /*!
+        \param d The value to be written.
+        \return Whether it is succeed.
+    */
+    bool Double(double d)       { Prefix(kNumberType); return EndValue(WriteDouble(d)); }
+
+    bool RawNumber(const Ch* str, SizeType length, bool copy = false) {
+        RAPIDJSON_ASSERT(str != 0);
+        (void)copy;
+        Prefix(kNumberType);
+        return EndValue(WriteString(str, length));
+    }
+
+    bool String(const Ch* str, SizeType length, bool copy = false) {
+        RAPIDJSON_ASSERT(str != 0);
+        (void)copy;
+        Prefix(kStringType);
+        return EndValue(WriteString(str, length));
+    }
+
+#if RAPIDJSON_HAS_STDSTRING
+    bool String(const std::basic_string<Ch>& str) {
+        return String(str.data(), SizeType(str.size()));
+    }
+#endif
+
+    bool StartObject() {
+        Prefix(kObjectType);
+        new (level_stack_.template Push<Level>()) Level(false);
+        return WriteStartObject();
+    }
+
+    bool Key(const Ch* str, SizeType length, bool copy = false) { return String(str, length, copy); }
+
+#if RAPIDJSON_HAS_STDSTRING
+    bool Key(const std::basic_string<Ch>& str)
+    {
+      return Key(str.data(), SizeType(str.size()));
+    }
+#endif
+	
+    bool EndObject(SizeType memberCount = 0) {
+        (void)memberCount;
+        RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level)); // not inside an Object
+        RAPIDJSON_ASSERT(!level_stack_.template Top<Level>()->inArray); // currently inside an Array, not Object
+        RAPIDJSON_ASSERT(0 == level_stack_.template Top<Level>()->valueCount % 2); // Object has a Key without a Value
+        level_stack_.template Pop<Level>(1);
+        return EndValue(WriteEndObject());
+    }
+
+    bool StartArray() {
+        Prefix(kArrayType);
+        new (level_stack_.template Push<Level>()) Level(true);
+        return WriteStartArray();
+    }
+
+    bool EndArray(SizeType elementCount = 0) {
+        (void)elementCount;
+        RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level));
+        RAPIDJSON_ASSERT(level_stack_.template Top<Level>()->inArray);
+        level_stack_.template Pop<Level>(1);
+        return EndValue(WriteEndArray());
+    }
+    //@}
+
+    /*! @name Convenience extensions */
+    //@{
+
+    //! Simpler but slower overload.
+    bool String(const Ch* const& str) { return String(str, internal::StrLen(str)); }
+    bool Key(const Ch* const& str) { return Key(str, internal::StrLen(str)); }
+    
+    //@}
+
+    //! Write a raw JSON value.
+    /*!
+        For user to write a stringified JSON as a value.
+
+        \param json A well-formed JSON value. It should not contain null character within [0, length - 1] range.
+        \param length Length of the json.
+        \param type Type of the root of json.
+    */
+    bool RawValue(const Ch* json, size_t length, Type type) {
+        RAPIDJSON_ASSERT(json != 0);
+        Prefix(type);
+        return EndValue(WriteRawValue(json, length));
+    }
+
+    //! Flush the output stream.
+    /*!
+        Allows the user to flush the output stream immediately.
+     */
+    void Flush() {
+        os_->Flush();
+    }
+
+protected:
+    //! Information for each nested level
+    struct Level {
+        Level(bool inArray_) : valueCount(0), inArray(inArray_) {}
+        size_t valueCount;  //!< number of values in this level
+        bool inArray;       //!< true if in array, otherwise in object
+    };
+
+    static const size_t kDefaultLevelDepth = 32;
+
+    bool WriteNull()  {
+        PutReserve(*os_, 4);
+        PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 'l'); return true;
+    }
+
+    bool WriteBool(bool b)  {
+        if (b) {
+            PutReserve(*os_, 4);
+            PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'r'); PutUnsafe(*os_, 'u'); PutUnsafe(*os_, 'e');
+        }
+        else {
+            PutReserve(*os_, 5);
+            PutUnsafe(*os_, 'f'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'l'); PutUnsafe(*os_, 's'); PutUnsafe(*os_, 'e');
+        }
+        return true;
+    }
+
+    bool WriteInt(int i) {
+        char buffer[11];
+        const char* end = internal::i32toa(i, buffer);
+        PutReserve(*os_, static_cast<size_t>(end - buffer));
+        for (const char* p = buffer; p != end; ++p)
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
+        return true;
+    }
+
+    bool WriteUint(unsigned u) {
+        char buffer[10];
+        const char* end = internal::u32toa(u, buffer);
+        PutReserve(*os_, static_cast<size_t>(end - buffer));
+        for (const char* p = buffer; p != end; ++p)
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
+        return true;
+    }
+
+    bool WriteInt64(int64_t i64) {
+        char buffer[21];
+        const char* end = internal::i64toa(i64, buffer);
+        PutReserve(*os_, static_cast<size_t>(end - buffer));
+        for (const char* p = buffer; p != end; ++p)
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
+        return true;
+    }
+
+    bool WriteUint64(uint64_t u64) {
+        char buffer[20];
+        char* end = internal::u64toa(u64, buffer);
+        PutReserve(*os_, static_cast<size_t>(end - buffer));
+        for (char* p = buffer; p != end; ++p)
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
+        return true;
+    }
+
+    bool WriteDouble(double d) {
+        if (internal::Double(d).IsNanOrInf()) {
+            if (!(writeFlags & kWriteNanAndInfFlag))
+                return false;
+            if (internal::Double(d).IsNan()) {
+                PutReserve(*os_, 3);
+                PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N');
+                return true;
+            }
+            if (internal::Double(d).Sign()) {
+                PutReserve(*os_, 9);
+                PutUnsafe(*os_, '-');
+            }
+            else
+                PutReserve(*os_, 8);
+            PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f');
+            PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y');
+            return true;
+        }
+
+        char buffer[25];
+        char* end = internal::dtoa(d, buffer, maxDecimalPlaces_);
+        PutReserve(*os_, static_cast<size_t>(end - buffer));
+        for (char* p = buffer; p != end; ++p)
+            PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(*p));
+        return true;
+    }
+
+    bool WriteString(const Ch* str, SizeType length)  {
+        static const typename OutputStream::Ch hexDigits[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+        static const char escape[256] = {
+#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+            //0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
+            'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f', 'r', 'u', 'u', // 00
+            'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', // 10
+              0,   0, '"',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, // 20
+            Z16, Z16,                                                                       // 30~4F
+              0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,'\\',   0,   0,   0, // 50
+            Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16                                // 60~FF
+#undef Z16
+        };
+
+        if (TargetEncoding::supportUnicode)
+            PutReserve(*os_, 2 + length * 6); // "\uxxxx..."
+        else
+            PutReserve(*os_, 2 + length * 12);  // "\uxxxx\uyyyy..."
+
+        PutUnsafe(*os_, '\"');
+        GenericStringStream<SourceEncoding> is(str);
+        while (ScanWriteUnescapedString(is, length)) {
+            const Ch c = is.Peek();
+            if (!TargetEncoding::supportUnicode && static_cast<unsigned>(c) >= 0x80) {
+                // Unicode escaping
+                unsigned codepoint;
+                if (RAPIDJSON_UNLIKELY(!SourceEncoding::Decode(is, &codepoint)))
+                    return false;
+                PutUnsafe(*os_, '\\');
+                PutUnsafe(*os_, 'u');
+                if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) {
+                    PutUnsafe(*os_, hexDigits[(codepoint >> 12) & 15]);
+                    PutUnsafe(*os_, hexDigits[(codepoint >>  8) & 15]);
+                    PutUnsafe(*os_, hexDigits[(codepoint >>  4) & 15]);
+                    PutUnsafe(*os_, hexDigits[(codepoint      ) & 15]);
+                }
+                else {
+                    RAPIDJSON_ASSERT(codepoint >= 0x010000 && codepoint <= 0x10FFFF);
+                    // Surrogate pair
+                    unsigned s = codepoint - 0x010000;
+                    unsigned lead = (s >> 10) + 0xD800;
+                    unsigned trail = (s & 0x3FF) + 0xDC00;
+                    PutUnsafe(*os_, hexDigits[(lead >> 12) & 15]);
+                    PutUnsafe(*os_, hexDigits[(lead >>  8) & 15]);
+                    PutUnsafe(*os_, hexDigits[(lead >>  4) & 15]);
+                    PutUnsafe(*os_, hexDigits[(lead      ) & 15]);
+                    PutUnsafe(*os_, '\\');
+                    PutUnsafe(*os_, 'u');
+                    PutUnsafe(*os_, hexDigits[(trail >> 12) & 15]);
+                    PutUnsafe(*os_, hexDigits[(trail >>  8) & 15]);
+                    PutUnsafe(*os_, hexDigits[(trail >>  4) & 15]);
+                    PutUnsafe(*os_, hexDigits[(trail      ) & 15]);                    
+                }
+            }
+            else if ((sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) && RAPIDJSON_UNLIKELY(escape[static_cast<unsigned char>(c)]))  {
+                is.Take();
+                PutUnsafe(*os_, '\\');
+                PutUnsafe(*os_, static_cast<typename OutputStream::Ch>(escape[static_cast<unsigned char>(c)]));
+                if (escape[static_cast<unsigned char>(c)] == 'u') {
+                    PutUnsafe(*os_, '0');
+                    PutUnsafe(*os_, '0');
+                    PutUnsafe(*os_, hexDigits[static_cast<unsigned char>(c) >> 4]);
+                    PutUnsafe(*os_, hexDigits[static_cast<unsigned char>(c) & 0xF]);
+                }
+            }
+            else if (RAPIDJSON_UNLIKELY(!(writeFlags & kWriteValidateEncodingFlag ? 
+                Transcoder<SourceEncoding, TargetEncoding>::Validate(is, *os_) :
+                Transcoder<SourceEncoding, TargetEncoding>::TranscodeUnsafe(is, *os_))))
+                return false;
+        }
+        PutUnsafe(*os_, '\"');
+        return true;
+    }
+
+    bool ScanWriteUnescapedString(GenericStringStream<SourceEncoding>& is, size_t length) {
+        return RAPIDJSON_LIKELY(is.Tell() < length);
+    }
+
+    bool WriteStartObject() { os_->Put('{'); return true; }
+    bool WriteEndObject()   { os_->Put('}'); return true; }
+    bool WriteStartArray()  { os_->Put('['); return true; }
+    bool WriteEndArray()    { os_->Put(']'); return true; }
+
+    bool WriteRawValue(const Ch* json, size_t length) {
+        PutReserve(*os_, length);
+        GenericStringStream<SourceEncoding> is(json);
+        while (RAPIDJSON_LIKELY(is.Tell() < length)) {
+            RAPIDJSON_ASSERT(is.Peek() != '\0');
+            if (RAPIDJSON_UNLIKELY(!(writeFlags & kWriteValidateEncodingFlag ? 
+                Transcoder<SourceEncoding, TargetEncoding>::Validate(is, *os_) :
+                Transcoder<SourceEncoding, TargetEncoding>::TranscodeUnsafe(is, *os_))))
+                return false;
+        }
+        return true;
+    }
+
+    void Prefix(Type type) {
+        (void)type;
+        if (RAPIDJSON_LIKELY(level_stack_.GetSize() != 0)) { // this value is not at root
+            Level* level = level_stack_.template Top<Level>();
+            if (level->valueCount > 0) {
+                if (level->inArray) 
+                    os_->Put(','); // add comma if it is not the first element in array
+                else  // in object
+                    os_->Put((level->valueCount % 2 == 0) ? ',' : ':');
+            }
+            if (!level->inArray && level->valueCount % 2 == 0)
+                RAPIDJSON_ASSERT(type == kStringType);  // if it's in object, then even number should be a name
+            level->valueCount++;
+        }
+        else {
+            RAPIDJSON_ASSERT(!hasRoot_);    // Should only has one and only one root.
+            hasRoot_ = true;
+        }
+    }
+
+    // Flush the value if it is the top level one.
+    bool EndValue(bool ret) {
+        if (RAPIDJSON_UNLIKELY(level_stack_.Empty()))   // end of json text
+            Flush();
+        return ret;
+    }
+
+    OutputStream* os_;
+    internal::Stack<StackAllocator> level_stack_;
+    int maxDecimalPlaces_;
+    bool hasRoot_;
+
+private:
+    // Prohibit copy constructor & assignment operator.
+    Writer(const Writer&);
+    Writer& operator=(const Writer&);
+};
+
+// Full specialization for StringStream to prevent memory copying
+
+template<>
+inline bool Writer<StringBuffer>::WriteInt(int i) {
+    char *buffer = os_->Push(11);
+    const char* end = internal::i32toa(i, buffer);
+    os_->Pop(static_cast<size_t>(11 - (end - buffer)));
+    return true;
+}
+
+template<>
+inline bool Writer<StringBuffer>::WriteUint(unsigned u) {
+    char *buffer = os_->Push(10);
+    const char* end = internal::u32toa(u, buffer);
+    os_->Pop(static_cast<size_t>(10 - (end - buffer)));
+    return true;
+}
+
+template<>
+inline bool Writer<StringBuffer>::WriteInt64(int64_t i64) {
+    char *buffer = os_->Push(21);
+    const char* end = internal::i64toa(i64, buffer);
+    os_->Pop(static_cast<size_t>(21 - (end - buffer)));
+    return true;
+}
+
+template<>
+inline bool Writer<StringBuffer>::WriteUint64(uint64_t u) {
+    char *buffer = os_->Push(20);
+    const char* end = internal::u64toa(u, buffer);
+    os_->Pop(static_cast<size_t>(20 - (end - buffer)));
+    return true;
+}
+
+template<>
+inline bool Writer<StringBuffer>::WriteDouble(double d) {
+    if (internal::Double(d).IsNanOrInf()) {
+        // Note: This code path can only be reached if (RAPIDJSON_WRITE_DEFAULT_FLAGS & kWriteNanAndInfFlag).
+        if (!(kWriteDefaultFlags & kWriteNanAndInfFlag))
+            return false;
+        if (internal::Double(d).IsNan()) {
+            PutReserve(*os_, 3);
+            PutUnsafe(*os_, 'N'); PutUnsafe(*os_, 'a'); PutUnsafe(*os_, 'N');
+            return true;
+        }
+        if (internal::Double(d).Sign()) {
+            PutReserve(*os_, 9);
+            PutUnsafe(*os_, '-');
+        }
+        else
+            PutReserve(*os_, 8);
+        PutUnsafe(*os_, 'I'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'f');
+        PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 'n'); PutUnsafe(*os_, 'i'); PutUnsafe(*os_, 't'); PutUnsafe(*os_, 'y');
+        return true;
+    }
+    
+    char *buffer = os_->Push(25);
+    char* end = internal::dtoa(d, buffer, maxDecimalPlaces_);
+    os_->Pop(static_cast<size_t>(25 - (end - buffer)));
+    return true;
+}
+
+#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
+template<>
+inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length) {
+    if (length < 16)
+        return RAPIDJSON_LIKELY(is.Tell() < length);
+
+    if (!RAPIDJSON_LIKELY(is.Tell() < length))
+        return false;
+
+    const char* p = is.src_;
+    const char* end = is.head_ + length;
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+    const char* endAligned = reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15));
+    if (nextAligned > end)
+        return true;
+
+    while (p != nextAligned)
+        if (*p < 0x20 || *p == '\"' || *p == '\\') {
+            is.src_ = p;
+            return RAPIDJSON_LIKELY(is.Tell() < length);
+        }
+        else
+            os_->PutUnsafe(*p++);
+
+    // The rest of string using SIMD
+    static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
+    static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
+    static const char space[16]  = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
+    const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
+    const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
+    const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
+
+    for (; p != endAligned; p += 16) {
+        const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
+        const __m128i t1 = _mm_cmpeq_epi8(s, dq);
+        const __m128i t2 = _mm_cmpeq_epi8(s, bs);
+        const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
+        const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
+        unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
+        if (RAPIDJSON_UNLIKELY(r != 0)) {   // some of characters is escaped
+            SizeType len;
+#ifdef _MSC_VER         // Find the index of first escaped
+            unsigned long offset;
+            _BitScanForward(&offset, r);
+            len = offset;
+#else
+            len = static_cast<SizeType>(__builtin_ffs(r) - 1);
+#endif
+            char* q = reinterpret_cast<char*>(os_->PushUnsafe(len));
+            for (size_t i = 0; i < len; i++)
+                q[i] = p[i];
+
+            p += len;
+            break;
+        }
+        _mm_storeu_si128(reinterpret_cast<__m128i *>(os_->PushUnsafe(16)), s);
+    }
+
+    is.src_ = p;
+    return RAPIDJSON_LIKELY(is.Tell() < length);
+}
+#elif defined(RAPIDJSON_NEON)
+template<>
+inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, size_t length) {
+    if (length < 16)
+        return RAPIDJSON_LIKELY(is.Tell() < length);
+
+    if (!RAPIDJSON_LIKELY(is.Tell() < length))
+        return false;
+
+    const char* p = is.src_;
+    const char* end = is.head_ + length;
+    const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15));
+    const char* endAligned = reinterpret_cast<const char*>(reinterpret_cast<size_t>(end) & static_cast<size_t>(~15));
+    if (nextAligned > end)
+        return true;
+
+    while (p != nextAligned)
+        if (*p < 0x20 || *p == '\"' || *p == '\\') {
+            is.src_ = p;
+            return RAPIDJSON_LIKELY(is.Tell() < length);
+        }
+        else
+            os_->PutUnsafe(*p++);
+
+    // The rest of string using SIMD
+    const uint8x16_t s0 = vmovq_n_u8('"');
+    const uint8x16_t s1 = vmovq_n_u8('\\');
+    const uint8x16_t s2 = vmovq_n_u8('\b');
+    const uint8x16_t s3 = vmovq_n_u8(32);
+
+    for (; p != endAligned; p += 16) {
+        const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p));
+        uint8x16_t x = vceqq_u8(s, s0);
+        x = vorrq_u8(x, vceqq_u8(s, s1));
+        x = vorrq_u8(x, vceqq_u8(s, s2));
+        x = vorrq_u8(x, vcltq_u8(s, s3));
+
+        x = vrev64q_u8(x);                     // Rev in 64
+        uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0);   // extract
+        uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1);  // extract
+
+        SizeType len = 0;
+        bool escaped = false;
+        if (low == 0) {
+            if (high != 0) {
+                unsigned lz = (unsigned)__builtin_clzll(high);
+                len = 8 + (lz >> 3);
+                escaped = true;
+            }
+        } else {
+            unsigned lz = (unsigned)__builtin_clzll(low);
+            len = lz >> 3;
+            escaped = true;
+        }
+        if (RAPIDJSON_UNLIKELY(escaped)) {   // some of characters is escaped
+            char* q = reinterpret_cast<char*>(os_->PushUnsafe(len));
+            for (size_t i = 0; i < len; i++)
+                q[i] = p[i];
+
+            p += len;
+            break;
+        }
+        vst1q_u8(reinterpret_cast<uint8_t *>(os_->PushUnsafe(16)), s);
+    }
+
+    is.src_ = p;
+    return RAPIDJSON_LIKELY(is.Tell() < length);
+}
+#endif // RAPIDJSON_NEON
+
+RAPIDJSON_NAMESPACE_END
+
+#if defined(_MSC_VER) || defined(__clang__)
+RAPIDJSON_DIAG_POP
+#endif
+
+#endif // RAPIDJSON_RAPIDJSON_H_
diff --git a/inference-engine/thirdparty/clDNN/version.json b/inference-engine/thirdparty/clDNN/version.json
index a26804e98..9bb5352f3 100644
--- a/inference-engine/thirdparty/clDNN/version.json
+++ b/inference-engine/thirdparty/clDNN/version.json
@@ -3,7 +3,7 @@
 {
     "major": 1,            # clDNN major version (major version of API).
     "minor": 4,            # clDNN minor version (correlated with major API version of Inference Engine).
-    "build": 14,           # clDNN build version (correlated with ordinal numeber of public release of clDNN).
+    "build": 23,           # clDNN build version (correlated with ordinal numeber of public release of clDNN).
     "revision_base":    0, # Offset that will be subtracted from environment variable provided by build system.
     "revision_min":    -1  # Minumum value of revision. Computed value of revision will be clamped from below by this value.
 }
 \ No newline at end of file
diff --git a/inference-engine/thirdparty/fluid/checksum.txt b/inference-engine/thirdparty/fluid/checksum.txt
index d912ec087..ba34e30ff 100644
--- a/inference-engine/thirdparty/fluid/checksum.txt
+++ b/inference-engine/thirdparty/fluid/checksum.txt
@@ -1 +1 @@
-5d28798fbe1b11d9c9d6fcd28c02f07e
+b4a07b700b3cd4537289644b593edbc4
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/CMakeLists.txt b/inference-engine/thirdparty/fluid/modules/gapi/CMakeLists.txt
index ec05b385c..cc4cef700 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/CMakeLists.txt
+++ b/inference-engine/thirdparty/fluid/modules/gapi/CMakeLists.txt
@@ -23,6 +23,7 @@ file(GLOB gapi_ext_hdrs
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/util/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/cpu/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/gpu/*.hpp"
+    "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/ocl/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/fluid/*.hpp"
     "${CMAKE_CURRENT_LIST_DIR}/include/opencv2/${name}/own/*.hpp"
     )
@@ -72,11 +73,11 @@ set(gapi_srcs
     src/backends/fluid/gfluidimgproc_func.dispatch.cpp
     src/backends/fluid/gfluidcore.cpp
 
-    # GPU Backend (currently built-in)
-    src/backends/gpu/ggpubackend.cpp
-    src/backends/gpu/ggpukernel.cpp
-    src/backends/gpu/ggpuimgproc.cpp
-    src/backends/gpu/ggpucore.cpp
+    # OCL Backend (currently built-in)
+    src/backends/ocl/goclbackend.cpp
+    src/backends/ocl/goclkernel.cpp
+    src/backends/ocl/goclimgproc.cpp
+    src/backends/ocl/goclcore.cpp
 
     # Compound
     src/backends/common/gcompoundbackend.cpp
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/cmake/init.cmake b/inference-engine/thirdparty/fluid/modules/gapi/cmake/init.cmake
index 9f6ebeff4..12e22120e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/cmake/init.cmake
+++ b/inference-engine/thirdparty/fluid/modules/gapi/cmake/init.cmake
@@ -1,3 +1,9 @@
+OCV_OPTION(WITH_ADE "Enable ADE framework (required for Graph API module)" ON)
+
+if(NOT WITH_ADE)
+  return()
+endif()
+
 if (ade_DIR)
   # if ade_DIR is set, use ADE-supplied CMake script
   # to set up variables to the prebuilt ADE
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi.hpp
index a043a83fc..b8f31e994 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/core.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/core.hpp
index 9af3620fe..597d251a7 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/core.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/core.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_CORE_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/core.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/core.hpp
index ec76fe5d5..6dbe8b084 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/core.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/core.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_CPU_CORE_API_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp
index facaab6aa..d44a99577 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/gcpukernel.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCPUKERNEL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/imgproc.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/imgproc.hpp
index 0b96db08a..c25ae612f 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/imgproc.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/cpu/imgproc.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_CPU_IMGPROC_API_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/core.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/core.hpp
index 8c21f5760..d5a49e8a3 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/core.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/core.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_FLUID_CORE_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/gfluidbuffer.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/gfluidbuffer.hpp
index 8965ec75b..aaf2f4d7d 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/gfluidbuffer.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/gfluidbuffer.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_FLUID_BUFFER_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/gfluidkernel.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/gfluidkernel.hpp
index c71c5aa2c..d6480e366 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/gfluidkernel.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/gfluidkernel.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_FLUID_KERNEL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/imgproc.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/imgproc.hpp
index dedfa9dbe..c83da863f 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/imgproc.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/fluid/imgproc.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_FLUID_IMGPROC_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/garg.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/garg.hpp
index f8a317006..7867ea35f 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/garg.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/garg.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GARG_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/garray.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/garray.hpp
index 87d00155b..7a9112734 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/garray.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/garray.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GARRAY_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcall.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcall.hpp
index baf4f44e2..777d30d6a 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcall.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcall.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCALL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcommon.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcommon.hpp
index 6a3f51f77..3066a3331 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcommon.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcommon.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCOMMON_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompiled.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompiled.hpp
index ad491b733..227f663d7 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompiled.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompiled.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCOMPILED_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompoundkernel.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompoundkernel.hpp
index c5ac8a7d2..a3df7135b 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompoundkernel.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcompoundkernel.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCOMPOUNDKERNEL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcomputation.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcomputation.hpp
index e89b9ae39..d4d0ba103 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcomputation.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gcomputation.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCOMPUTATION_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gkernel.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gkernel.hpp
index adc7da3c7..956e96db8 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gkernel.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gkernel.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GKERNEL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gmat.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gmat.hpp
index 0fa53427d..e1ef637ec 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gmat.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gmat.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GMAT_HPP
@@ -142,7 +142,7 @@ namespace gapi { namespace own {
     GAPI_EXPORTS GMatDesc descr_of(const Mat &mat);
 }}//gapi::own
 
-std::ostream& operator<<(std::ostream& os, const cv::GMatDesc &desc);
+GAPI_EXPORTS std::ostream& operator<<(std::ostream& os, const cv::GMatDesc &desc);
 
 } // namespace cv
 
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gmetaarg.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gmetaarg.hpp
index 473be342e..75179c32f 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gmetaarg.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gmetaarg.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GMETAARG_HPP
@@ -37,7 +37,7 @@ using GMetaArg = util::variant
     , GScalarDesc
     , GArrayDesc
     >;
-std::ostream& operator<<(std::ostream& os, const GMetaArg &);
+GAPI_EXPORTS std::ostream& operator<<(std::ostream& os, const GMetaArg &);
 
 using GMetaArgs = std::vector<GMetaArg>;
 
@@ -61,6 +61,15 @@ namespace detail
 
 } // namespace detail
 
+class Mat;
+class UMat;
+GAPI_EXPORTS cv::GMetaArgs descr_of(const std::vector<cv::Mat> &vec);
+GAPI_EXPORTS cv::GMetaArgs descr_of(const std::vector<cv::UMat> &vec);
+namespace gapi { namespace own {
+    class Mat;
+    GAPI_EXPORTS cv::GMetaArgs descr_of(const std::vector<Mat> &vec);
+}} // namespace gapi::own
+
 } // namespace cv
 
 #endif // OPENCV_GAPI_GMETAARG_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gproto.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gproto.hpp
index 8b53d9b64..8c8987960 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gproto.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gproto.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GPROTO_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/core.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/core.hpp
index 98d49b5b8..565102025 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/core.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/core.hpp
@@ -2,22 +2,22 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GPU_CORE_API_HPP
 #define OPENCV_GAPI_GPU_CORE_API_HPP
+/** @file
+* @deprecated Use "opencv2/gapi/ocl/core.hpp" instead.
+*/
 
-#include <opencv2/core/cvdef.h>     // GAPI_EXPORTS
-#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+#include "opencv2/gapi/ocl/core.hpp"
 
 namespace cv {
 namespace gapi {
 namespace core {
 namespace gpu {
-
-GAPI_EXPORTS GKernelPackage kernels();
-
+    using namespace ocl;
 } // namespace gpu
 } // namespace core
 } // namespace gapi
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/ggpukernel.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/ggpukernel.hpp
index e5a6215e3..34a18b806 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/ggpukernel.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/ggpukernel.hpp
@@ -2,243 +2,17 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GGPUKERNEL_HPP
 #define OPENCV_GAPI_GGPUKERNEL_HPP
+/** @file
+* @deprecated Use "opencv2/gapi/ocl/goclkernel.hpp" instead.
+*/
 
-#include <vector>
-#include <functional>
-#include <map>
-#include <unordered_map>
+#include "opencv2/gapi/ocl/goclkernel.hpp"
+#define GAPI_GPU_KERNEL GAPI_OCL_KERNEL
 
-#include <opencv2/core/mat.hpp>
-#include <opencv2/gapi/gcommon.hpp>
-#include <opencv2/gapi/gkernel.hpp>
-#include <opencv2/gapi/garg.hpp>
-
-// FIXME: namespace scheme for backends?
-namespace cv {
-
-namespace gimpl
-{
-    // Forward-declare an internal class
-    class GGPUExecutable;
-} // namespace gimpl
-
-namespace gapi
-{
-namespace gpu
-{
-    /**
-     * \addtogroup gapi_std_backends G-API Standard backends
-     * @{
-     */
-    /**
-     * @brief Get a reference to GPU backend.
-     *
-     * At the moment, the GPU backend is built atop of OpenCV
-     * "Transparent API" (T-API), see cv::UMat for details.
-     *
-     * @sa gapi_std_backends
-     */
-    GAPI_EXPORTS cv::gapi::GBackend backend();
-    /** @} */
-} // namespace gpu
-} // namespace gapi
-
-
-// Represents arguments which are passed to a wrapped GPU function
-// FIXME: put into detail?
-class GAPI_EXPORTS GGPUContext
-{
-public:
-    // Generic accessor API
-    template<typename T>
-    const T& inArg(int input) { return m_args.at(input).get<T>(); }
-
-    // Syntax sugar
-    const cv::UMat&  inMat(int input);
-    cv::UMat&  outMatR(int output); // FIXME: Avoid cv::Mat m = ctx.outMatR()
-
-    const cv::gapi::own::Scalar& inVal(int input);
-    cv::gapi::own::Scalar& outValR(int output); // FIXME: Avoid cv::gapi::own::Scalar s = ctx.outValR()
-    template<typename T> std::vector<T>& outVecR(int output) // FIXME: the same issue
-    {
-        return outVecRef(output).wref<T>();
-    }
-
-protected:
-    detail::VectorRef& outVecRef(int output);
-
-    std::vector<GArg> m_args;
-    std::unordered_map<std::size_t, GRunArgP> m_results;
-
-
-    friend class gimpl::GGPUExecutable;
-};
-
-class GAPI_EXPORTS GGPUKernel
-{
-public:
-    // This function is kernel's execution entry point (does the processing work)
-    using F = std::function<void(GGPUContext &)>;
-
-    GGPUKernel();
-    explicit GGPUKernel(const F& f);
-
-    void apply(GGPUContext &ctx);
-
-protected:
-    F m_f;
-};
-
-// FIXME: This is an ugly ad-hoc imlpementation. TODO: refactor
-
-namespace detail
-{
-template<class T> struct gpu_get_in;
-template<> struct gpu_get_in<cv::GMat>
-{
-    static cv::UMat    get(GGPUContext &ctx, int idx) { return ctx.inMat(idx); }
-};
-template<> struct gpu_get_in<cv::GScalar>
-{
-    static cv::Scalar get(GGPUContext &ctx, int idx) { return to_ocv(ctx.inVal(idx)); }
-};
-template<typename U> struct gpu_get_in<cv::GArray<U> >
-{
-    static const std::vector<U>& get(GGPUContext &ctx, int idx) { return ctx.inArg<VectorRef>(idx).rref<U>(); }
-};
-template<class T> struct gpu_get_in
-{
-    static T get(GGPUContext &ctx, int idx) { return ctx.inArg<T>(idx); }
-};
-
-struct tracked_cv_umat{
-    //TODO Think if T - API could reallocate UMat to a proper size - how do we handle this ?
-    //tracked_cv_umat(cv::UMat& m) : r{(m)}, original_data{m.getMat(ACCESS_RW).data} {}
-    tracked_cv_umat(cv::UMat& m) : r{ (m) }, original_data{ nullptr } {}
-    cv::UMat r;
-    uchar* original_data;
-
-    operator cv::UMat& (){ return r;}
-    void validate() const{
-        //if (r.getMat(ACCESS_RW).data != original_data)
-        //{
-        //    util::throw_error
-        //        (std::logic_error
-        //         ("OpenCV kernel output parameter was reallocated. \n"
-        //          "Incorrect meta data was provided ?"));
-        //}
-
-    }
-};
-
-struct scalar_wrapper_gpu
-{
-    //FIXME reuse CPU (OpenCV) plugin code
-    scalar_wrapper_gpu(cv::gapi::own::Scalar& s) : m_s{cv::gapi::own::to_ocv(s)}, m_org_s(s) {};
-    operator cv::Scalar& () { return m_s; }
-    void writeBack() const  { m_org_s = to_own(m_s); }
-
-    cv::Scalar m_s;
-    cv::gapi::own::Scalar& m_org_s;
-};
-
-template<typename... Outputs>
-void postprocess_gpu(Outputs&... outs)
-{
-    struct
-    {
-        void operator()(tracked_cv_umat* bm) { bm->validate(); }
-        void operator()(scalar_wrapper_gpu* sw) { sw->writeBack(); }
-        void operator()(...) {                  }
-
-    } validate;
-    //dummy array to unfold parameter pack
-    int dummy[] = { 0, (validate(&outs), 0)... };
-    cv::util::suppress_unused_warning(dummy);
-}
-
-template<class T> struct gpu_get_out;
-template<> struct gpu_get_out<cv::GMat>
-{
-    static tracked_cv_umat get(GGPUContext &ctx, int idx)
-    {
-        auto& r = ctx.outMatR(idx);
-        return{ r };
-    }
-};
-template<> struct gpu_get_out<cv::GScalar>
-{
-    static scalar_wrapper_gpu get(GGPUContext &ctx, int idx)
-    {
-        auto& s = ctx.outValR(idx);
-        return{ s };
-    }
-};
-template<typename U> struct gpu_get_out<cv::GArray<U> >
-{
-    static std::vector<U>& get(GGPUContext &ctx, int idx) { return ctx.outVecR<U>(idx);  }
-};
-
-template<typename, typename, typename>
-struct GPUCallHelper;
-
-// FIXME: probably can be simplified with std::apply or analogue.
-template<typename Impl, typename... Ins, typename... Outs>
-struct GPUCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
-{
-    template<typename... Inputs>
-    struct call_and_postprocess
-    {
-        template<typename... Outputs>
-        static void call(Inputs&&... ins, Outputs&&... outs)
-        {
-            //not using a std::forward on outs is deliberate in order to
-            //cause compilation error, by tring to bind rvalue references to lvalue references
-            Impl::run(std::forward<Inputs>(ins)..., outs...);
-
-            postprocess_gpu(outs...);
-        }
-    };
-
-    template<int... IIs, int... OIs>
-    static void call_impl(GGPUContext &ctx, detail::Seq<IIs...>, detail::Seq<OIs...>)
-    {
-        //TODO: Make sure that OpenCV kernels do not reallocate memory for output parameters
-        //by comparing it's state (data ptr) before and after the call.
-        //Convert own::Scalar to cv::Scalar before call kernel and run kernel
-        //convert cv::Scalar to own::Scalar after call kernel and write back results
-        call_and_postprocess<decltype(gpu_get_in<Ins>::get(ctx, IIs))...>::call(gpu_get_in<Ins>::get(ctx, IIs)..., gpu_get_out<Outs>::get(ctx, OIs)...);
-    }
-
-    static void call(GGPUContext &ctx)
-    {
-        call_impl(ctx,
-            typename detail::MkSeq<sizeof...(Ins)>::type(),
-            typename detail::MkSeq<sizeof...(Outs)>::type());
-    }
-};
-
-} // namespace detail
-
-template<class Impl, class K>
-class GGPUKernelImpl: public detail::GPUCallHelper<Impl, typename K::InArgs, typename K::OutArgs>
-{
-    using P = detail::GPUCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
-
-public:
-    using API = K;
-
-    static cv::gapi::GBackend backend()  { return cv::gapi::gpu::backend(); }
-    static cv::GGPUKernel     kernel()   { return GGPUKernel(&P::call);     }
-};
-
-#define GAPI_GPU_KERNEL(Name, API) struct Name: public cv::GGPUKernelImpl<Name, API>
-
-} // namespace cv
 
 #endif // OPENCV_GAPI_GGPUKERNEL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/imgproc.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/imgproc.hpp
index 6071dda98..d83081d52 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/imgproc.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gpu/imgproc.hpp
@@ -2,22 +2,23 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GPU_IMGPROC_API_HPP
 #define OPENCV_GAPI_GPU_IMGPROC_API_HPP
+/** @file
+* @deprecated Use "opencv2/gapi/ocl/imgproc.hpp" instead.
+*/
+
+#include "opencv2/gapi/ocl/imgproc.hpp"
 
-#include <opencv2/core/cvdef.h>     // GAPI_EXPORTS
-#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
 
 namespace cv {
 namespace gapi {
 namespace imgproc {
 namespace gpu {
-
-GAPI_EXPORTS GKernelPackage kernels();
-
+    using namespace ocl;
 } // namespace gpu
 } // namespace imgproc
 } // namespace gapi
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gscalar.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gscalar.hpp
index dd1205b63..ee2237db1 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gscalar.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gscalar.hpp
@@ -3,7 +3,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GSCALAR_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtype_traits.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
index d05e02e0e..09b49102a 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtype_traits.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GTYPE_TRAITS_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtyped.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtyped.hpp
index a966f263f..f32d05018 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtyped.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/gtyped.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GTYPED_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp
index aeed9fa6c..73b92d2ed 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_IMGPROC_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/ocl/core.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/ocl/core.hpp
new file mode 100644
index 000000000..784ee2097
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/ocl/core.hpp
@@ -0,0 +1,27 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OCL_CORE_API_HPP
+#define OPENCV_GAPI_OCL_CORE_API_HPP
+
+#include <opencv2/core/cvdef.h>     // GAPI_EXPORTS
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+
+namespace cv {
+namespace gapi {
+namespace core {
+namespace ocl {
+
+        GAPI_EXPORTS GKernelPackage kernels();
+
+} // namespace ocl
+} // namespace core
+} // namespace gapi
+} // namespace cv
+
+
+#endif // OPENCV_GAPI_OCL_CORE_API_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/ocl/goclkernel.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/ocl/goclkernel.hpp
new file mode 100644
index 000000000..8f5c867bd
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/ocl/goclkernel.hpp
@@ -0,0 +1,244 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GOCLKERNEL_HPP
+#define OPENCV_GAPI_GOCLKERNEL_HPP
+
+#include <vector>
+#include <functional>
+#include <map>
+#include <unordered_map>
+
+#include <opencv2/core/mat.hpp>
+#include <opencv2/gapi/gcommon.hpp>
+#include <opencv2/gapi/gkernel.hpp>
+#include <opencv2/gapi/garg.hpp>
+
+// FIXME: namespace scheme for backends?
+namespace cv {
+
+namespace gimpl
+{
+    // Forward-declare an internal class
+    class GOCLExecutable;
+} // namespace gimpl
+
+namespace gapi
+{
+namespace ocl
+{
+    /**
+     * \addtogroup gapi_std_backends G-API Standard backends
+     * @{
+     */
+    /**
+     * @brief Get a reference to OCL backend.
+     *
+     * At the moment, the OCL backend is built atop of OpenCV
+     * "Transparent API" (T-API), see cv::UMat for details.
+     *
+     * @sa gapi_std_backends
+     */
+    GAPI_EXPORTS cv::gapi::GBackend backend();
+    /** @} */
+} // namespace ocl
+} // namespace gapi
+
+
+// Represents arguments which are passed to a wrapped OCL function
+// FIXME: put into detail?
+class GAPI_EXPORTS GOCLContext
+{
+public:
+    // Generic accessor API
+    template<typename T>
+    const T& inArg(int input) { return m_args.at(input).get<T>(); }
+
+    // Syntax sugar
+    const cv::UMat&  inMat(int input);
+    cv::UMat&  outMatR(int output); // FIXME: Avoid cv::Mat m = ctx.outMatR()
+
+    const cv::gapi::own::Scalar& inVal(int input);
+    cv::gapi::own::Scalar& outValR(int output); // FIXME: Avoid cv::gapi::own::Scalar s = ctx.outValR()
+    template<typename T> std::vector<T>& outVecR(int output) // FIXME: the same issue
+    {
+        return outVecRef(output).wref<T>();
+    }
+
+protected:
+    detail::VectorRef& outVecRef(int output);
+
+    std::vector<GArg> m_args;
+    std::unordered_map<std::size_t, GRunArgP> m_results;
+
+
+    friend class gimpl::GOCLExecutable;
+};
+
+class GAPI_EXPORTS GOCLKernel
+{
+public:
+    // This function is kernel's execution entry point (does the processing work)
+    using F = std::function<void(GOCLContext &)>;
+
+    GOCLKernel();
+    explicit GOCLKernel(const F& f);
+
+    void apply(GOCLContext &ctx);
+
+protected:
+    F m_f;
+};
+
+// FIXME: This is an ugly ad-hoc imlpementation. TODO: refactor
+
+namespace detail
+{
+template<class T> struct ocl_get_in;
+template<> struct ocl_get_in<cv::GMat>
+{
+    static cv::UMat    get(GOCLContext &ctx, int idx) { return ctx.inMat(idx); }
+};
+template<> struct ocl_get_in<cv::GScalar>
+{
+    static cv::Scalar get(GOCLContext &ctx, int idx) { return to_ocv(ctx.inVal(idx)); }
+};
+template<typename U> struct ocl_get_in<cv::GArray<U> >
+{
+    static const std::vector<U>& get(GOCLContext &ctx, int idx) { return ctx.inArg<VectorRef>(idx).rref<U>(); }
+};
+template<class T> struct ocl_get_in
+{
+    static T get(GOCLContext &ctx, int idx) { return ctx.inArg<T>(idx); }
+};
+
+struct tracked_cv_umat{
+    //TODO Think if T - API could reallocate UMat to a proper size - how do we handle this ?
+    //tracked_cv_umat(cv::UMat& m) : r{(m)}, original_data{m.getMat(ACCESS_RW).data} {}
+    tracked_cv_umat(cv::UMat& m) : r{ (m) }, original_data{ nullptr } {}
+    cv::UMat r;
+    uchar* original_data;
+
+    operator cv::UMat& (){ return r;}
+    void validate() const{
+        //if (r.getMat(ACCESS_RW).data != original_data)
+        //{
+        //    util::throw_error
+        //        (std::logic_error
+        //         ("OpenCV kernel output parameter was reallocated. \n"
+        //          "Incorrect meta data was provided ?"));
+        //}
+
+    }
+};
+
+struct scalar_wrapper_ocl
+{
+    //FIXME reuse CPU (OpenCV) plugin code
+    scalar_wrapper_ocl(cv::gapi::own::Scalar& s) : m_s{cv::gapi::own::to_ocv(s)}, m_org_s(s) {};
+    operator cv::Scalar& () { return m_s; }
+    void writeBack() const  { m_org_s = to_own(m_s); }
+
+    cv::Scalar m_s;
+    cv::gapi::own::Scalar& m_org_s;
+};
+
+template<typename... Outputs>
+void postprocess_ocl(Outputs&... outs)
+{
+    struct
+    {
+        void operator()(tracked_cv_umat* bm) { bm->validate(); }
+        void operator()(scalar_wrapper_ocl* sw) { sw->writeBack(); }
+        void operator()(...) {                  }
+
+    } validate;
+    //dummy array to unfold parameter pack
+    int dummy[] = { 0, (validate(&outs), 0)... };
+    cv::util::suppress_unused_warning(dummy);
+}
+
+template<class T> struct ocl_get_out;
+template<> struct ocl_get_out<cv::GMat>
+{
+    static tracked_cv_umat get(GOCLContext &ctx, int idx)
+    {
+        auto& r = ctx.outMatR(idx);
+        return{ r };
+    }
+};
+template<> struct ocl_get_out<cv::GScalar>
+{
+    static scalar_wrapper_ocl get(GOCLContext &ctx, int idx)
+    {
+        auto& s = ctx.outValR(idx);
+        return{ s };
+    }
+};
+template<typename U> struct ocl_get_out<cv::GArray<U> >
+{
+    static std::vector<U>& get(GOCLContext &ctx, int idx) { return ctx.outVecR<U>(idx);  }
+};
+
+template<typename, typename, typename>
+struct OCLCallHelper;
+
+// FIXME: probably can be simplified with std::apply or analogue.
+template<typename Impl, typename... Ins, typename... Outs>
+struct OCLCallHelper<Impl, std::tuple<Ins...>, std::tuple<Outs...> >
+{
+    template<typename... Inputs>
+    struct call_and_postprocess
+    {
+        template<typename... Outputs>
+        static void call(Inputs&&... ins, Outputs&&... outs)
+        {
+            //not using a std::forward on outs is deliberate in order to
+            //cause compilation error, by tring to bind rvalue references to lvalue references
+            Impl::run(std::forward<Inputs>(ins)..., outs...);
+
+            postprocess_ocl(outs...);
+        }
+    };
+
+    template<int... IIs, int... OIs>
+    static void call_impl(GOCLContext &ctx, detail::Seq<IIs...>, detail::Seq<OIs...>)
+    {
+        //TODO: Make sure that OpenCV kernels do not reallocate memory for output parameters
+        //by comparing it's state (data ptr) before and after the call.
+        //Convert own::Scalar to cv::Scalar before call kernel and run kernel
+        //convert cv::Scalar to own::Scalar after call kernel and write back results
+        call_and_postprocess<decltype(ocl_get_in<Ins>::get(ctx, IIs))...>::call(ocl_get_in<Ins>::get(ctx, IIs)..., ocl_get_out<Outs>::get(ctx, OIs)...);
+    }
+
+    static void call(GOCLContext &ctx)
+    {
+        call_impl(ctx,
+            typename detail::MkSeq<sizeof...(Ins)>::type(),
+            typename detail::MkSeq<sizeof...(Outs)>::type());
+    }
+};
+
+} // namespace detail
+
+template<class Impl, class K>
+class GOCLKernelImpl: public detail::OCLCallHelper<Impl, typename K::InArgs, typename K::OutArgs>
+{
+    using P = detail::OCLCallHelper<Impl, typename K::InArgs, typename K::OutArgs>;
+
+public:
+    using API = K;
+
+    static cv::gapi::GBackend backend()  { return cv::gapi::ocl::backend(); }
+    static cv::GOCLKernel     kernel()   { return GOCLKernel(&P::call);     }
+};
+
+#define GAPI_OCL_KERNEL(Name, API) struct Name: public cv::GOCLKernelImpl<Name, API>
+
+} // namespace cv
+
+#endif // OPENCV_GAPI_GOCLKERNEL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/ocl/imgproc.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/ocl/imgproc.hpp
new file mode 100644
index 000000000..233034829
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/ocl/imgproc.hpp
@@ -0,0 +1,27 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_OCL_IMGPROC_API_HPP
+#define OPENCV_GAPI_OCL_IMGPROC_API_HPP
+
+#include <opencv2/core/cvdef.h>     // GAPI_EXPORTS
+#include <opencv2/gapi/gkernel.hpp> // GKernelPackage
+
+namespace cv {
+namespace gapi {
+namespace imgproc {
+namespace ocl {
+
+    GAPI_EXPORTS GKernelPackage kernels();
+
+} // namespace ocl
+} // namespace imgproc
+} // namespace gapi
+} // namespace cv
+
+
+#endif // OPENCV_GAPI_OCL_IMGPROC_API_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/opencv_includes.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/opencv_includes.hpp
index 5acf28023..51e131896 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/opencv_includes.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/opencv_includes.hpp
@@ -3,7 +3,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_OPENCV_INCLUDES_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/operators.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/operators.hpp
index 27a1d8012..2143b3ab5 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/operators.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/operators.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_OPERATORS_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/assert.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/assert.hpp
index 8d3feff01..5cdfdf88a 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/assert.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/assert.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_OWN_ASSERT_HPP
@@ -10,7 +10,8 @@
 
 #if !defined(GAPI_STANDALONE)
 #include <opencv2/core/base.hpp>
-#define GAPI_Assert(expr) CV_Assert(expr)
+#define GAPI_Assert CV_Assert
+#define GAPI_DbgAssert CV_DbgAssert
 
 #else
 #include <stdexcept>
@@ -30,7 +31,6 @@ namespace detail
 #define GAPI_Assert(expr) \
 { if (!(expr)) ::detail::assert_abort(#expr, __LINE__, __FILE__, __func__); }
 
-#endif
 
 #ifdef NDEBUG
 #  define GAPI_DbgAssert(expr)
@@ -38,4 +38,6 @@ namespace detail
 #  define GAPI_DbgAssert(expr) GAPI_Assert(expr)
 #endif
 
+#endif // GAPI_STANDALONE
+
 #endif // OPENCV_GAPI_OWN_ASSERT_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/convert.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/convert.hpp
index 8c1feb408..0fcc78118 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/convert.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/convert.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_OWN_CONVERT_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/cvdefs.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/cvdefs.hpp
index e11053692..696a3ed75 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/cvdefs.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/cvdefs.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_CV_DEFS_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/exports.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/exports.hpp
index 0d955d0e4..3c5c4b59b 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/exports.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/exports.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_OWN_TYPES_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/mat.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/mat.hpp
index 73f3afcbc..e761a3846 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/mat.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/mat.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_OWN_MAT_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/saturate.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/saturate.hpp
index 207dcde25..7b39e61a8 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/saturate.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/saturate.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_OWN_SATURATE_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/scalar.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/scalar.hpp
index bda91c83b..b538ba230 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/scalar.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/scalar.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GAPI_OWN_SCALAR_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/types.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/types.hpp
index 20445ee0f..8763234c6 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/types.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/own/types.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_TYPES_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/any.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/any.hpp
index 3146cb6fd..73087c6a8 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/any.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/any.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_UTIL_ANY_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/compiler_hints.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/compiler_hints.hpp
index 575655e8f..3204b006b 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/compiler_hints.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/compiler_hints.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 #ifndef OPENCV_GAPI_UTIL_COMPILER_HINTS_HPP
 #define OPENCV_GAPI_UTIL_COMPILER_HINTS_HPP
@@ -16,6 +16,4 @@ namespace util
 } // namespace util
 } // namespace cv
 
-#define UNUSED(x) cv::util::suppress_unused_warning(x)
-
 #endif /* OPENCV_GAPI_UTIL_COMPILER_HINTS_HPP */
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/optional.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/optional.hpp
index 54126d627..254d7eda8 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/optional.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/optional.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_UTIL_OPTIONAL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/throw.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/throw.hpp
index 689bf583c..191f66982 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/throw.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/throw.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_UTIL_THROW_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/util.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/util.hpp
index d0378e0e5..0cf81e6e8 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/util.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/util.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_UTIL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/variant.hpp b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/variant.hpp
index cb0270a73..4488d84bd 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/variant.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/include/opencv2/gapi/util/variant.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_UTIL_VARIANT_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests.cpp
index 2df4d8890..33cbba196 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "perf_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests.hpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests.hpp
index 8af7b1abf..77fe4270f 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_CORE_PERF_TESTS_HPP
@@ -50,9 +50,9 @@ namespace opencv_test
     class MaxPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
     class AbsDiffPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
     class AbsDiffCPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
-    class SumPerfTest : public TestPerfParams<tuple<cv::Size, MatType, double, cv::GCompileArgs>> {};
-    class AddWeightedPerfTest : public TestPerfParams<tuple<cv::Size, MatType, int, double, cv::GCompileArgs>> {};
-    class NormPerfTest : public TestPerfParams<tuple<NormTypes, cv::Size, MatType, double, cv::GCompileArgs>> {};
+    class SumPerfTest : public TestPerfParams<tuple<compare_scalar_f, cv::Size, MatType, cv::GCompileArgs>> {};
+    class AddWeightedPerfTest : public TestPerfParams<tuple<compare_f, cv::Size, MatType, int, cv::GCompileArgs>> {};
+    class NormPerfTest : public TestPerfParams<tuple<compare_scalar_f, NormTypes, cv::Size, MatType, cv::GCompileArgs>> {};
     class IntegralPerfTest : public TestPerfParams<tuple<cv::Size, MatType, cv::GCompileArgs>> {};
     class ThresholdPerfTest : public TestPerfParams<tuple<cv::Size, MatType, int, cv::GCompileArgs>> {};
     class ThresholdOTPerfTest : public TestPerfParams<tuple<cv::Size, MatType, int, cv::GCompileArgs>> {};
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
index f49e06161..cce548aff 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_CORE_PERF_TESTS_INL_HPP
@@ -900,13 +900,13 @@ PERF_TEST_P_(AbsDiffCPerfTest, TestPerformance)
 
 PERF_TEST_P_(SumPerfTest, TestPerformance)
 {
-    cv::Size sz_in = get<0>(GetParam());
-    MatType type = get<1>(GetParam());
-    double tolerance = get<2>(GetParam());
+    compare_scalar_f cmpF = get<0>(GetParam());
+    cv::Size sz_in = get<1>(GetParam());
+    MatType type = get<2>(GetParam());
     cv::GCompileArgs compile_args = get<3>(GetParam());
 
 
-    initMatrixRandU(type, sz_in, false);
+    initMatrixRandU(type, sz_in, type, false);
     cv::Scalar out_sum;
     cv::Scalar out_sum_ocv;
 
@@ -928,8 +928,7 @@ PERF_TEST_P_(SumPerfTest, TestPerformance)
 
     // Comparison ////////////////////////////////////////////////////////////
     {
-        EXPECT_LE(std::abs(out_sum[0] - out_sum_ocv[0]) / std::max(1.0, std::abs(out_sum_ocv[0])), tolerance)
-            << "OCV=" << out_sum_ocv[0] << "   GAPI=" << out_sum[0];
+        EXPECT_TRUE(cmpF(out_sum, out_sum_ocv));
     }
 
     SANITY_CHECK_NOTHING();
@@ -939,10 +938,10 @@ PERF_TEST_P_(SumPerfTest, TestPerformance)
 
 PERF_TEST_P_(AddWeightedPerfTest, TestPerformance)
 {
-    cv::Size sz_in = get<0>(GetParam());
-    MatType type = get<1>(GetParam());
-    int dtype = get<2>(GetParam());
-    double tolerance = get<3>(GetParam());
+    compare_f cmpF = get<0>(GetParam());
+    cv::Size sz_in = get<1>(GetParam());
+    MatType type = get<2>(GetParam());
+    int dtype = get<3>(GetParam());
     cv::GCompileArgs compile_args = get<4>(GetParam());
 
     auto& rng = cv::theRNG();
@@ -968,45 +967,9 @@ PERF_TEST_P_(AddWeightedPerfTest, TestPerformance)
     }
 
     // Comparison ////////////////////////////////////////////////////////////
-    // FIXIT unrealiable check
-    if (0)
-    {
-        // Note, that we cannot expect bitwise results for add-weighted:
-        //
-        //    tmp = src1*alpha + src2*beta + gamma;
-        //    dst = saturate<DST>( round(tmp) );
-        //
-        // Because tmp is floating-point, dst depends on compiler optimizations
-        //
-        // However, we must expect good accuracy of tmp, and rounding correctly
-
-        cv::Mat failures;
-
-        if (out_mat_ocv.type() == CV_32FC1)
-        {
-            // result: float - may vary in 7th decimal digit
-            failures = abs(out_mat_gapi - out_mat_ocv) > abs(out_mat_ocv) * 1e-6;
-        }
-        else
-        {
-            // result: integral - rounding may vary if fractional part of tmp
-            //                    is nearly 0.5
-
-            cv::Mat inexact, incorrect, diff, tmp;
-
-            inexact = out_mat_gapi != out_mat_ocv;
-
-            // even if rounded differently, check if still rounded correctly
-            cv::addWeighted(in_mat1, alpha, in_mat2, beta, gamma, tmp, CV_32F);
-            cv::subtract(out_mat_gapi, tmp, diff, cv::noArray(), CV_32F);
-            incorrect = abs(diff) >= tolerance;// 0.5000005f; // relative to 6 digits
-
-            failures = inexact & incorrect;
-        }
-
-        EXPECT_EQ(0, cv::countNonZero(failures));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
-    }
+    EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+    EXPECT_EQ(out_mat_gapi.size(), sz_in);
+
 
     SANITY_CHECK_NOTHING();
 }
@@ -1015,10 +978,10 @@ PERF_TEST_P_(AddWeightedPerfTest, TestPerformance)
 
 PERF_TEST_P_(NormPerfTest, TestPerformance)
 {
-    NormTypes opType = get<0>(GetParam());
-    cv::Size sz = get<1>(GetParam());
-    MatType type = get<2>(GetParam());
-    double tolerance = get<3>(GetParam());
+    compare_scalar_f cmpF = get<0>(GetParam());
+    NormTypes opType = get<1>(GetParam());
+    cv::Size sz = get<2>(GetParam());
+    MatType type = get<3>(GetParam());
     cv::GCompileArgs compile_args = get<4>(GetParam());
 
 
@@ -1051,8 +1014,7 @@ PERF_TEST_P_(NormPerfTest, TestPerformance)
 
     // Comparison ////////////////////////////////////////////////////////////
     {
-        EXPECT_LE(std::abs(out_norm[0] - out_norm_ocv[0]) / std::max(1.0, std::abs(out_norm_ocv[0])), tolerance)
-            << "OCV=" << out_norm_ocv[0] << "   GAPI=" << out_norm[0];
+        EXPECT_TRUE(cmpF(out_norm, out_norm_ocv));
     }
 
     SANITY_CHECK_NOTHING();
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests.cpp
index 5a2ffb88a..387ffb801 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "perf_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp
index 750c0692c..c2e65b920 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_IMGPROC_PERF_TESTS_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp
index 5a13cfeeb..e210bd07e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/common/gapi_imgproc_perf_tests_inl.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_IMGPROC_PERF_TESTS_INL_HPP
@@ -52,7 +52,7 @@ PERF_TEST_P_(SepFilterPerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-      c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+      c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -100,7 +100,7 @@ PERF_TEST_P_(Filter2DPerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -145,7 +145,7 @@ PERF_TEST_P_(BoxFilterPerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -188,7 +188,7 @@ PERF_TEST_P_(BlurPerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -230,7 +230,7 @@ PERF_TEST_P_(GaussianBlurPerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -271,7 +271,7 @@ PERF_TEST_P_(MedianBlurPerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -314,7 +314,7 @@ PERF_TEST_P_(ErodePerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -357,7 +357,7 @@ PERF_TEST_P_(Erode3x3PerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -400,7 +400,7 @@ PERF_TEST_P_(DilatePerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -443,7 +443,7 @@ PERF_TEST_P_(Dilate3x3PerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -526,7 +526,7 @@ PERF_TEST_P_(CannyPerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -564,7 +564,7 @@ PERF_TEST_P_(EqHistPerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
@@ -830,7 +830,7 @@ PERF_TEST_P_(LUV2BGRPerfTest, TestPerformance)
 
     TEST_CYCLE()
     {
-        c.apply(in_mat1, out_mat_gapi, std::move(compile_args));
+        c.apply(in_mat1, out_mat_gapi);
     }
 
     // Comparison //////////////////////////////////////////////////////////////
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
index 6957401ad..4a3a8c7ed 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../perf_precomp.hpp"
@@ -152,24 +152,24 @@ INSTANTIATE_TEST_CASE_P(AbsDiffCPerfTestCPU, AbsDiffCPerfTest,
         Values(cv::compile_args(CORE_CPU))));
 
 INSTANTIATE_TEST_CASE_P(SumPerfTestCPU, SumPerfTest,
-    Combine(Values(szSmall128, szVGA, sz720p, sz1080p),
+    Combine(Values(AbsToleranceScalar(0.0).to_compare_f()),
+        Values(szSmall128, szVGA, sz720p, sz1080p),
         Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-        Values(0.0),
+        //Values(0.0),
         Values(cv::compile_args(CORE_CPU))));
 
-// FIXME: Comparison introduced by YL doesn't work with C3
 INSTANTIATE_TEST_CASE_P(AddWeightedPerfTestCPU, AddWeightedPerfTest,
-    Combine(Values(szSmall128, szVGA, sz720p, sz1080p),
-        Values(CV_8UC1, /*CV_8UC3,*/ CV_16UC1, CV_16SC1, CV_32FC1),
+    Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
+        Values(szSmall128, szVGA, sz720p, sz1080p),
+        Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
         Values(-1, CV_8U, CV_16U, CV_32F),
-        Values(0.5000005),
         Values(cv::compile_args(CORE_CPU))));
 
 INSTANTIATE_TEST_CASE_P(NormPerfTestCPU, NormPerfTest,
-    Combine(Values(NORM_INF, NORM_L1, NORM_L2),
+    Combine(Values(AbsToleranceScalar(0.0).to_compare_f()),
+        Values(NORM_INF, NORM_L1, NORM_L2),
         Values(szSmall128, szVGA, sz720p, sz1080p),
         Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
-        Values(0.0),
         Values(cv::compile_args(CORE_CPU))));
 
 INSTANTIATE_TEST_CASE_P(IntegralPerfTestCPU, IntegralPerfTest,
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp
index ea3d753f2..4c842100a 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_cpu.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../perf_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp
index a5d13e661..964a03a9e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/cpu/gapi_imgproc_perf_tests_fluid.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../perf_precomp.hpp"
@@ -13,9 +13,101 @@
 namespace opencv_test
 {
 
-    INSTANTIATE_TEST_CASE_P(SobelPerfTestFluid, SobelPerfTest,
-        Combine(Values(AbsExact().to_compare_f()),
-            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),  // add CV_32FC1 when ready
+INSTANTIATE_TEST_CASE_P(SepFilterPerfTestFluid_8U, SepFilterPerfTest,
+    Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
+            Values(CV_8UC1, CV_8UC3),
+            Values(3),
+            Values(szVGA, sz720p, sz1080p),
+            Values(-1, CV_16S, CV_32F),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(SepFilterPerfTestFluid_other, SepFilterPerfTest,
+    Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
+            Values(CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(3),
+            Values(szVGA, sz720p, sz1080p),
+            Values(-1, CV_32F),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(Filter2DPerfTestFluid, Filter2DPerfTest,
+    Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(3),                                     // add 4, 5, 7 when kernel is ready
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::BORDER_DEFAULT),
+            Values(-1, CV_32F),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(BoxFilterPerfTestFluid, BoxFilterPerfTest,
+    Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(3),                                     // add size=5, when kernel is ready
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::BORDER_DEFAULT),
+            Values(-1, CV_32F),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(BlurPerfTestFluid, BlurPerfTest,
+    Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(3),                                     // add size=5, when kernel is ready
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::BORDER_DEFAULT),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(GaussianBlurPerfTestFluid, GaussianBlurPerfTest,
+    Combine(Values(ToleranceFilter(1e-3f, 0.01).to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(3),                                     // add size=5, when kernel is ready
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(MedianBlurPerfTestFluid, MedianBlurPerfTest,
+    Combine(Values(AbsExact().to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(3),                                     // add size=5, when kernel is ready
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(ErodePerfTestFluid, ErodePerfTest,
+    Combine(Values(AbsExact().to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(3),                                     // add size=5, when kernel is ready
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::MorphShapes::MORPH_RECT,
+                   cv::MorphShapes::MORPH_CROSS,
+                   cv::MorphShapes::MORPH_ELLIPSE),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+// GAPI/fluid does not support iterations parameter for the Erode kernel
+INSTANTIATE_TEST_CASE_P(DISABLED_Erode3x3PerfTestFluid, Erode3x3PerfTest,
+    Combine(Values(AbsExact().to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(szVGA, sz720p, sz1080p),
+            Values(1, 2, 4),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(DilatePerfTestFluid, DilatePerfTest,
+    Combine(Values(AbsExact().to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(3),                                     // add size=5, when kernel is ready
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::MorphShapes::MORPH_RECT,
+                   cv::MorphShapes::MORPH_CROSS,
+                   cv::MorphShapes::MORPH_ELLIPSE),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+// GAPI/fluid does not support iterations parameter for the Dilate kernel
+INSTANTIATE_TEST_CASE_P(DISABLED_Dilate3x3PerfTestFluid, Dilate3x3PerfTest,
+    Combine(Values(AbsExact().to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
+            Values(szVGA, sz720p, sz1080p),
+            Values(1, 2, 4),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(SobelPerfTestFluid, SobelPerfTest,
+    Combine(Values(AbsExact().to_compare_f()),
+            Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
             Values(3),                                     // add 5x5 once supported
             Values(szVGA, sz720p, sz1080p),
             Values(-1, CV_16S, CV_32F),
@@ -23,8 +115,8 @@ namespace opencv_test
             Values(1, 2),
             Values(cv::compile_args(IMGPROC_FLUID))));
 
-    INSTANTIATE_TEST_CASE_P(SobelPerfTestFluid32F, SobelPerfTest,
-        Combine(Values(ToleranceFilter(1e-3f, 0.0).to_compare_f()),
+INSTANTIATE_TEST_CASE_P(SobelPerfTestFluid32F, SobelPerfTest,
+    Combine(Values(ToleranceFilter(1e-3f, 0.0).to_compare_f()),
             Values(CV_32FC1),
             Values(3),                                     // add 5x5 once supported
             Values(szVGA, sz720p, sz1080p),
@@ -33,44 +125,44 @@ namespace opencv_test
             Values(1, 2),
             Values(cv::compile_args(IMGPROC_FLUID))));
 
-    INSTANTIATE_TEST_CASE_P(RGB2GrayPerfTestFluid, RGB2GrayPerfTest,
-        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
-                Values(szVGA, sz720p, sz1080p),
-                Values(cv::compile_args(IMGPROC_FLUID))));
-
-    INSTANTIATE_TEST_CASE_P(BGR2GrayPerfTestFluid, BGR2GrayPerfTest,
-        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
-                Values(szVGA, sz720p, sz1080p),
-                Values(cv::compile_args(IMGPROC_FLUID))));
-
-    INSTANTIATE_TEST_CASE_P(RGB2YUVPerfTestFluid, RGB2YUVPerfTest,
-        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
-                Values(szVGA, sz720p, sz1080p),
-                Values(cv::compile_args(IMGPROC_FLUID))));
-
-    INSTANTIATE_TEST_CASE_P(YUV2RGBPerfTestFluid, YUV2RGBPerfTest,
-        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
-                Values(szVGA, sz720p, sz1080p),
-                Values(cv::compile_args(IMGPROC_FLUID))));
-
-    INSTANTIATE_TEST_CASE_P(BGR2YUVPerfTestFluid, BGR2YUVPerfTest,
-        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
-                Values(szVGA, sz720p, sz1080p),
-                Values(cv::compile_args(IMGPROC_FLUID))));
-
-    INSTANTIATE_TEST_CASE_P(YUV2BGRPerfTestFluid, YUV2BGRPerfTest,
-        Combine(Values(ToleranceColor(1e-3).to_compare_f()),
-                Values(szVGA, sz720p, sz1080p),
-                Values(cv::compile_args(IMGPROC_FLUID))));
-
-    INSTANTIATE_TEST_CASE_P(BGR2LUVPerfTestFluid, BGR2LUVPerfTest,
-        Combine(Values(AbsSimilarPoints(1, 0.05).to_compare_f()),
-                Values(szVGA, sz720p, sz1080p),
-                Values(cv::compile_args(IMGPROC_FLUID))));
-
-    INSTANTIATE_TEST_CASE_P(RGB2LabPerfTestFluid, RGB2LabPerfTest,
-        Combine(Values(AbsSimilarPoints(1, 0.05).to_compare_f()),
-                Values(szVGA, sz720p, sz1080p),
-                Values(cv::compile_args(IMGPROC_FLUID))));
+INSTANTIATE_TEST_CASE_P(RGB2GrayPerfTestFluid, RGB2GrayPerfTest,
+    Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(BGR2GrayPerfTestFluid, BGR2GrayPerfTest,
+    Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(RGB2YUVPerfTestFluid, RGB2YUVPerfTest,
+    Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(YUV2RGBPerfTestFluid, YUV2RGBPerfTest,
+    Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(BGR2YUVPerfTestFluid, BGR2YUVPerfTest,
+    Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(YUV2BGRPerfTestFluid, YUV2BGRPerfTest,
+    Combine(Values(ToleranceColor(1e-3).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(BGR2LUVPerfTestFluid, BGR2LUVPerfTest,
+    Combine(Values(AbsSimilarPoints(1, 0.05).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
+
+INSTANTIATE_TEST_CASE_P(RGB2LabPerfTestFluid, RGB2LabPerfTest,
+    Combine(Values(AbsSimilarPoints(1, 0.05).to_compare_f()),
+            Values(szVGA, sz720p, sz1080p),
+            Values(cv::compile_args(IMGPROC_FLUID))));
 
 }
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
index 652cbae6b..b1ebc5d39 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp
@@ -2,12 +2,11 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../perf_precomp.hpp"
 #include "../common/gapi_core_perf_tests.hpp"
-#include "opencv2/gapi/gpu/core.hpp"
 
 #define CORE_GPU cv::gapi::core::gpu::kernels()
 
@@ -153,24 +152,23 @@ INSTANTIATE_TEST_CASE_P(AbsDiffCPerfTestGPU, AbsDiffCPerfTest,
                                 Values(cv::compile_args(CORE_GPU))));
 
 INSTANTIATE_TEST_CASE_P(SumPerfTestGPU, SumPerfTest,
-                        Combine(Values( szSmall128, szVGA, sz720p, sz1080p ),
+                        Combine(Values(AbsToleranceScalar(1e-5).to_compare_f()),
+                                Values( szSmall128, szVGA, sz720p, sz1080p ),
                                 Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(4.0), //TODO: too relaxed?
                                 Values(cv::compile_args(CORE_GPU))));
 
-// FIXME: Comparison introduced by YL doesn't work with C3
 INSTANTIATE_TEST_CASE_P(AddWeightedPerfTestGPU, AddWeightedPerfTest,
-                        Combine(Values( szSmall128, szVGA, sz720p, sz1080p ),
-                                Values( CV_8UC1, /*CV_8UC3,*/ CV_16UC1, CV_16SC1, CV_32FC1 ),
+                        Combine(Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
+                                Values( szSmall128, szVGA, sz720p, sz1080p ),
+                                Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
-                                Values(0.50005),
                                 Values(cv::compile_args(CORE_GPU))));
 
 INSTANTIATE_TEST_CASE_P(NormPerfTestGPU, NormPerfTest,
-                        Combine(Values(NORM_INF, NORM_L1, NORM_L2),
+                        Combine(Values(AbsToleranceScalar(1e-5).to_compare_f()),
+                                Values(NORM_INF, NORM_L1, NORM_L2),
                                 Values( szSmall128, szVGA, sz720p, sz1080p ),
                                 Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
-                                Values(4.0), //TODO: too relaxed?
                                 Values(cv::compile_args(CORE_GPU))));
 
 INSTANTIATE_TEST_CASE_P(IntegralPerfTestGPU, IntegralPerfTest,
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp
index 14ef60606..0976299f3 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/gpu/gapi_imgproc_perf_tests_gpu.cpp
@@ -2,12 +2,11 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../perf_precomp.hpp"
 #include "../common/gapi_imgproc_perf_tests.hpp"
-#include "opencv2/gapi/gpu/imgproc.hpp"
 
 #define IMGPROC_GPU cv::gapi::imgproc::gpu::kernels()
 
@@ -109,10 +108,20 @@ INSTANTIATE_TEST_CASE_P(Dilate3x3PerfTestGPU, Dilate3x3PerfTest,
 
 INSTANTIATE_TEST_CASE_P(SobelPerfTestGPU, SobelPerfTest,
                         Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1/*, CV_32FC1*/), //TODO: CV_32FC1 fails accuracy
+                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
                                 Values(3, 5),
                                 Values(szVGA, sz720p, sz1080p),
-                                Values(-1, CV_32F),
+                                Values(-1, CV_16S, CV_32F),
+                                Values(0, 1),
+                                Values(1, 2),
+                                Values(cv::compile_args(IMGPROC_GPU))));
+
+INSTANTIATE_TEST_CASE_P(SobelPerfTestGPU32F, SobelPerfTest,
+                        Combine(Values(ToleranceFilter(1e-4f, 0.01).to_compare_f()),
+                                Values(CV_32FC1),
+                                Values(3, 5),
+                                Values(szVGA, sz720p, sz1080p),
+                                Values(CV_32F),
                                 Values(0, 1),
                                 Values(1, 2),
                                 Values(cv::compile_args(IMGPROC_GPU))));
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/internal/gapi_compiler_perf_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/internal/gapi_compiler_perf_tests.cpp
index 48786b6a9..5ada23db4 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/internal/gapi_compiler_perf_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/internal/gapi_compiler_perf_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "perf_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/perf_main.cpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/perf_main.cpp
index 8d6d77edc..ff8aba0ca 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/perf_main.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/perf_main.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "perf_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/perf/perf_precomp.hpp b/inference-engine/thirdparty/fluid/modules/gapi/perf/perf_precomp.hpp
index abd7cbe66..f0eba6a5b 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/perf/perf_precomp.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/perf/perf_precomp.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef __OPENCV_GAPI_PERF_PRECOMP_HPP__
@@ -17,6 +17,8 @@
 #include "opencv2/gapi/core.hpp"
 #include "opencv2/gapi/cpu/gcpukernel.hpp"
 #include "opencv2/gapi/gpu/ggpukernel.hpp"
+#include "opencv2/gapi/gpu/imgproc.hpp"
+#include "opencv2/gapi/gpu/core.hpp"
 #include "opencv2/gapi/operators.hpp"
 
 #include "opencv2/gapi/fluid/core.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gapi_priv.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gapi_priv.cpp
index 744db1671..bb865c49d 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gapi_priv.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gapi_priv.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gapi_priv.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gapi_priv.hpp
index edab0a08b..fce1da57c 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gapi_priv.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gapi_priv.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_PRIV_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/garray.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/garray.cpp
index 0fd19a7e6..90a5d3da7 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/garray.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/garray.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gbackend.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gbackend.cpp
index 8144d21d4..37307ce0a 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gbackend.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gbackend.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gbackend_priv.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gbackend_priv.hpp
index 1c6e29715..b7f483b8d 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gbackend_priv.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gbackend_priv.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef GAPI_API_GBACKEND_PRIV_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcall.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcall.cpp
index 2dd823daa..e03505269 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcall.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcall.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
@@ -28,9 +28,14 @@ cv::GCall::GCall(const cv::GKernel &k)
 
 cv::GCall::~GCall()
 {
+    // FIXME: current behavior of the destructor can cause troubles in a threaded environment. GCall
+    // is not supposed to be accessed for modification within multiple threads. There should be a
+    // way to ensure somehow that no problem occurs in future. For now, this is a reminder that
+    // GCall is not supposed to be copied inside a code block that is executed in parallel.
+
     // When a GCall object is destroyed (and GCall::Priv is likely still alive,
     // as there might be other references), reset m_node to break cycle.
-   m_priv->m_node = GNode();
+    m_priv->m_node = GNode();
 }
 
 void cv::GCall::setArgs(std::vector<GArg> &&args)
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcall_priv.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcall_priv.hpp
index ffb122ec8..122303ac1 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcall_priv.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcall_priv.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GCALL_PRIV_HPP
@@ -19,13 +19,31 @@
 
 namespace cv {
 
+// GCall is used to capture details (arguments) passed to operation when the graph is
+// constructed. It is, in fact, just a "serialization" of a function call (to some extent). The
+// only place where new GCall objects are constructed is KernelName::on(). Note that GCall not
+// only stores its input arguments, but also yields operation's pseudo-results to return
+// "results".
+// GCall arguments are GArgs which can wrap either our special types (like GMat) or other
+// stuff user may pass according to operation's signature (opaque to us).
+// If a dynamic g-object is wrapped in GArg, it has origin - something where that object comes
+// from. It is either another function call (again, a GCall) or nothing (for graph's starting
+// points, for example). By using these links, we understand what the flow is and construct the
+// real graph. Origin is a node in a graph, represented by GNode.
+// When a GCall is created, it instantiates it's appropriate GNode since we need an origin for
+// objects we produce with this call. This is what is stored in m_node and then is used in every
+// yield() call (the framework calls yield() according to template signature which we strip then
+// - aka type erasure).
+// Here comes the recursion - GNode knows it is created for GCall, and GCall stores that node
+// object as origin for yield(). In order to break it, in GNode's object destructor this m_node
+// pointer is reset (note - GCall::Priv remains alive). Now GCall's ownership "moves" to GNode
+// and remains there until the API part is destroyed.
 class GCall::Priv
 {
 public:
     std::vector<GArg> m_args;
     const GKernel     m_k;
 
-    // FIXME: Document that there's no recursion here.
     // TODO: Rename to "constructionNode" or smt to reflect its lifetime
     GNode             m_node;
 
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcomputation.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcomputation.cpp
index ab761edf9..fe14b905f 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcomputation.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcomputation.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcomputation_priv.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcomputation_priv.hpp
index 13d1b9afa..035f56b36 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcomputation_priv.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gcomputation_priv.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCOMPUTATION_PRIV_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gkernel.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gkernel.cpp
index f8c851abf..ca4314d6d 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gkernel.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gkernel.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gmat.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gmat.cpp
index e8c528555..0477c9140 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gmat.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gmat.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
@@ -33,15 +33,39 @@ const cv::GOrigin& cv::GMat::priv() const
     return *m_priv;
 }
 
+namespace{
+    template <typename T> cv::GMetaArgs vec_descr_of(const std::vector<T> &vec)
+        {
+        cv::GMetaArgs vec_descr;
+        vec_descr.reserve(vec.size());
+        for(auto& mat : vec){
+            vec_descr.emplace_back(descr_of(mat));
+        }
+        return vec_descr;
+    }
+}
+
+
 #if !defined(GAPI_STANDALONE)
 cv::GMatDesc cv::descr_of(const cv::Mat &mat)
 {
     return GMatDesc{mat.depth(), mat.channels(), {mat.cols, mat.rows}};
 }
+
 cv::GMatDesc cv::descr_of(const cv::UMat &mat)
 {
     return GMatDesc{ mat.depth(), mat.channels(),{ mat.cols, mat.rows } };
 }
+
+cv::GMetaArgs cv::descr_of(const std::vector<cv::Mat> &vec)
+{
+    return vec_descr_of(vec);
+}
+
+cv::GMetaArgs cv::descr_of(const std::vector<cv::UMat> &vec)
+{
+    return vec_descr_of(vec);
+}
 #endif
 
 cv::GMatDesc cv::gapi::own::descr_of(const cv::gapi::own::Mat &mat)
@@ -49,6 +73,11 @@ cv::GMatDesc cv::gapi::own::descr_of(const cv::gapi::own::Mat &mat)
     return GMatDesc{mat.depth(), mat.channels(), {mat.cols, mat.rows}};
 }
 
+cv::GMetaArgs cv::gapi::own::descr_of(const std::vector<cv::gapi::own::Mat> &vec)
+{
+    return vec_descr_of(vec);
+}
+
 namespace cv {
 std::ostream& operator<<(std::ostream& os, const cv::GMatDesc &desc)
 {
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode.cpp
index efda5d542..05ee7dcc9 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode.hpp
index bd6c7901e..7f0aa4a2f 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GNODE_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode_priv.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode_priv.hpp
index 5425471f8..d5e305534 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode_priv.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gnode_priv.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GNODE_PRIV_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gproto.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gproto.cpp
index 2482d628b..e24ca8a28 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gproto.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gproto.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gproto_priv.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gproto_priv.hpp
index 2684924c7..8df4029ce 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gproto_priv.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gproto_priv.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GPROTO_PRIV_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gscalar.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gscalar.cpp
index 30f3dc944..8d0b0660e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/gscalar.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/gscalar.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/kernels_core.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/kernels_core.cpp
index c9fe19ed6..00088d0c6 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/kernels_core.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/kernels_core.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/kernels_imgproc.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/kernels_imgproc.cpp
index 7c4b522e9..e1fc4cda4 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/kernels_imgproc.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/kernels_imgproc.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/api/operators.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/api/operators.cpp
index 44fc4fa52..647f5bf2d 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/api/operators.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/api/operators.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gbackend.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gbackend.hpp
index 613022cb9..1229739c3 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gbackend.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gbackend.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GBACKEND_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gcompoundbackend.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gcompoundbackend.cpp
index 948898f28..c927cd1d9 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gcompoundbackend.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gcompoundbackend.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gcompoundkernel.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gcompoundkernel.cpp
index 89abcef59..ce74c168e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gcompoundkernel.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/common/gcompoundkernel.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpubackend.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpubackend.cpp
index 5cc8bb0b7..c17750c39 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpubackend.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpubackend.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpubackend.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpubackend.hpp
index 6ce8c4883..0525e3a14 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpubackend.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpubackend.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCPUBACKEND_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpucore.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpucore.cpp
index c42f863bf..8a3b3ab34 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpucore.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpucore.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpucore.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpucore.hpp
index 77e9e82a0..b1248a295 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpucore.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpucore.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCPUCORE_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpuimgproc.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpuimgproc.cpp
index d14584bfa..1f0251f25 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpuimgproc.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpuimgproc.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpuimgproc.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpuimgproc.hpp
index 172871a77..d6ea758a0 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpuimgproc.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpuimgproc.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCPUIMGPROC_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpukernel.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpukernel.cpp
index af13eed6c..cfa5257af 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpukernel.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/cpu/gcpukernel.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbackend.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbackend.cpp
index e6eaaae8c..2c8b88ec8 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbackend.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbackend.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
@@ -930,9 +930,13 @@ namespace
                 {
                     // FIXME: ASSERT(DATA), ASSERT(FLUIDDATA)
                     auto &fd     = fg.metadata(out_data_node).get<FluidData>();
-                    fd.latency   = out_latency;
+                    // If fluid node is external, it will be bound to a real image without
+                    // fluid buffer allocation, so set its latency to 0 not to confuse later latency propagation.
+                    // Latency is used in fluid buffer allocation process and is not used by the scheduler
+                    // so latency doesn't affect the execution and setting it to 0 is legal
+                    fd.latency   = fd.internal ? out_latency : 0;
                     fd.lpi_write = fu.k.m_lpi;
-                    GModel::log(g, out_data_node, "Latency: " + std::to_string(out_latency));
+                    GModel::log(g, out_data_node, "Latency: " + std::to_string(fd.latency));
                 }
             }
         }
@@ -1207,35 +1211,41 @@ void GFluidBackendImpl::addBackendPasses(ade::ExecutionEngineSetupContext &ectx)
 
         for (const auto& nh : gim.nodes())
         {
-            if (gim.metadata(nh).get<NodeKind>().k == NodeKind::ISLAND)
+            switch (gim.metadata(nh).get<NodeKind>().k)
+            {
+            case NodeKind::ISLAND:
             {
                 const auto isl = gim.metadata(nh).get<FusedIsland>().object;
                 if (isl->backend() == cv::gapi::fluid::backend())
                 {
-                    // add FluidData to all data nodes inside island
+                    // Add FluidData to all data nodes inside island,
+                    // set internal = true if node is not a slot in terms of higher-level GIslandModel
                     for (const auto node : isl->contents())
                     {
-                        if (g.metadata(node).get<NodeType>().t == NodeType::DATA)
+                        if (g.metadata(node).get<NodeType>().t == NodeType::DATA &&
+                            !fg.metadata(node).contains<FluidData>())
                             setFluidData(node, true);
                     }
-
-                    // add FluidData to slot if it's read/written by fluid
-                    std::vector<ade::NodeHandle> io_handles;
-                    for (const auto &in_op : isl->in_ops())
-                    {
-                        ade::util::copy(in_op->inNodes(), std::back_inserter(io_handles));
-                    }
-                    for (const auto &out_op : isl->out_ops())
-                    {
-                        ade::util::copy(out_op->outNodes(), std::back_inserter(io_handles));
-                    }
-                    for (const auto &io_node : io_handles)
-                    {
-                        if (!fg.metadata(io_node).contains<FluidData>())
-                            setFluidData(io_node, false);
-                    }
                 } // if (fluid backend)
-            } // if (ISLAND)
+            } break; // case::ISLAND
+            case NodeKind::SLOT:
+            {
+                // add FluidData to slot if it's read/written by fluid
+                // regardless if it is one fluid island (both writing to and reading from this object)
+                // or two distinct islands (both fluid)
+                auto isFluidIsland = [&](const ade::NodeHandle& node) {
+                    const auto isl = gim.metadata(node).get<FusedIsland>().object;
+                    return isl->backend() == cv::gapi::fluid::backend();
+                };
+
+                if (ade::util::any_of(ade::util::chain(nh->inNodes(), nh->outNodes()), isFluidIsland))
+                {
+                    auto data_node = gim.metadata(nh).get<DataSlot>().original_data_node;
+                    setFluidData(data_node, false);
+                }
+            } break; // case::SLOT
+            default: GAPI_Assert(false);
+            } // switch
         } // for (gim.nodes())
     });
     // FIXME:
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbackend.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbackend.hpp
index ba8b9771f..d34020242 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbackend.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbackend.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_FLUID_BACKEND_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbuffer.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbuffer.cpp
index 6672ea272..66705f24b 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbuffer.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbuffer.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbuffer_priv.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbuffer_priv.hpp
index 1f3eadc11..dd6e51810 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbuffer_priv.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidbuffer_priv.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_FLUID_BUFFER_PRIV_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidcore.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidcore.cpp
index 16a63e217..61dba0227 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidcore.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidcore.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 #if !defined(GAPI_STANDALONE)
 
@@ -340,7 +340,7 @@ static void run_arithm_s3(uchar out[], const uchar in[], int width, const uchar
         v_store_interleave(&out[3*w], x, y, z);
     }
 #endif
-    UNUSED(v_op);
+    cv::util::suppress_unused_warning(v_op);
     for (; w < width; w++)
     {
         out[3*w    ] = saturate<uchar>( s_op(in[3*w    ], scalar[0]) );
@@ -386,7 +386,7 @@ static void run_arithm_s1(uchar out[], const float in[], int width, const float
         v_store(&out[w], uc);
     }
 #endif
-    UNUSED(v_op);
+    cv::util::suppress_unused_warning(v_op);
     for (; w < width; w++)
     {
         out[w] = saturate<uchar>(s_op(in[w], scalar[0]), std::roundf);
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc.cpp
index e2e4c4f75..2cdc573cc 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 #if !defined(GAPI_STANDALONE)
 
@@ -344,7 +344,7 @@ static const int maxKernelSize = 9;
 
 template<typename DST, typename SRC>
 static void run_boxfilter(Buffer &dst, const View &src, const cv::Size &kernelSize,
-                          const cv::Point& /* anchor */, bool normalize)
+                          const cv::Point& /* anchor */, bool normalize, float *buf[])
 {
     GAPI_Assert(kernelSize.width <= maxKernelSize);
     GAPI_Assert(kernelSize.width == kernelSize.height);
@@ -365,36 +365,53 @@ static void run_boxfilter(Buffer &dst, const View &src, const cv::Size &kernelSi
     int width = dst.length();
     int chan  = dst.meta().chan;
 
-    GAPI_DbgAssert(chan <= 4);
+    if (kernelSize.width == 3 && kernelSize.height == 3)
+    {
+        int y  = dst.y();
+        int y0 = dst.priv().writeStart();
 
-    for (int w=0; w < width; w++)
+        float  kx[3] = {1, 1, 1};
+        float *ky = kx;
+
+        float scale=1, delta=0;
+        if (normalize)
+            scale = 1/9.f;
+
+        run_sepfilter3x3_impl(out, in, width, chan, kx, ky, border, scale, delta, buf, y, y0);
+    } else
     {
-        float sum[4] = {0, 0, 0, 0};
+        GAPI_DbgAssert(chan <= 4);
 
-        for (int i=0; i < kernel; i++)
+        for (int w=0; w < width; w++)
         {
-            for (int j=0; j < kernel; j++)
+            float sum[4] = {0, 0, 0, 0};
+
+            for (int i=0; i < kernel; i++)
             {
-                for (int c=0; c < chan; c++)
-                    sum[c] += in[i][(w + j - border)*chan + c];
+                for (int j=0; j < kernel; j++)
+                {
+                    for (int c=0; c < chan; c++)
+                        sum[c] += in[i][(w + j - border)*chan + c];
+                }
             }
-        }
 
-        for (int c=0; c < chan; c++)
-        {
-            float result = normalize? sum[c]/(kernel * kernel) : sum[c];
+            for (int c=0; c < chan; c++)
+            {
+                float result = normalize? sum[c]/(kernel * kernel) : sum[c];
 
-            out[w*chan + c] = saturate<DST>(result, rintf);
+                out[w*chan + c] = saturate<DST>(result, rintf);
+            }
         }
     }
 }
 
-GAPI_FLUID_KERNEL(GFluidBlur, cv::gapi::imgproc::GBlur, false)
+GAPI_FLUID_KERNEL(GFluidBlur, cv::gapi::imgproc::GBlur, true)
 {
     static const int Window = 3;
 
     static void run(const View &src, const cv::Size& kernelSize, const cv::Point& anchor,
-                    int /* borderType */, const cv::Scalar& /* borderValue */, Buffer &dst)
+                    int /* borderType */, const cv::Scalar& /* borderValue */, Buffer &dst,
+                    Buffer& scratch)
     {
         // TODO: support sizes 3, 5, 7, 9, ...
         GAPI_Assert(kernelSize.width  == 3 && kernelSize.height == 3);
@@ -404,14 +421,46 @@ GAPI_FLUID_KERNEL(GFluidBlur, cv::gapi::imgproc::GBlur, false)
 
         static const bool normalize = true;
 
+        int width = src.length();
+        int chan  = src.meta().chan;
+        int length = width * chan;
+
+        float *buf[3];
+        buf[0] = scratch.OutLine<float>();
+        buf[1] = buf[0] + length;
+        buf[2] = buf[1] + length;
+
         //     DST     SRC     OP             __VA_ARGS__
-        UNARY_(uchar , uchar , run_boxfilter, dst, src, kernelSize, anchor, normalize);
-        UNARY_(ushort, ushort, run_boxfilter, dst, src, kernelSize, anchor, normalize);
-        UNARY_( short,  short, run_boxfilter, dst, src, kernelSize, anchor, normalize);
+        UNARY_(uchar , uchar , run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
+        UNARY_(ushort, ushort, run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
+        UNARY_( short,  short, run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
+        UNARY_( float,  float, run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
 
         CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
     }
 
+    static void initScratch(const GMatDesc   & in,
+                            const cv::Size   & /* ksize */,
+                            const cv::Point  & /* anchor */,
+                                  int          /* borderType */,
+                            const cv::Scalar & /* borderValue */,
+                                  Buffer     & scratch)
+    {
+        int width = in.size.width;
+        int chan  = in.chan;
+
+        int buflen = width * chan * Window;  // work buffers
+
+        cv::gapi::own::Size bufsize(buflen, 1);
+        GMatDesc bufdesc = {CV_32F, 1, bufsize};
+        Buffer buffer(bufdesc);
+        scratch = std::move(buffer);
+    }
+
+    static void resetScratch(Buffer& /* scratch */)
+    {
+    }
+
     static Border getBorder(const cv::GMatDesc& /* src */,
                             const cv::Size    & /* kernelSize */,
                             const cv::Point   & /* anchor */,
@@ -422,18 +471,19 @@ GAPI_FLUID_KERNEL(GFluidBlur, cv::gapi::imgproc::GBlur, false)
     }
 };
 
-GAPI_FLUID_KERNEL(GFluidBoxFilter, cv::gapi::imgproc::GBoxFilter, false)
+GAPI_FLUID_KERNEL(GFluidBoxFilter, cv::gapi::imgproc::GBoxFilter, true)
 {
     static const int Window = 3;
 
     static void run(const     View  &    src,
                               int     /* ddepth */,
                     const cv::Size  &    kernelSize,
-                    const cv::Point &   anchor,
+                    const cv::Point &    anchor,
                               bool       normalize,
                               int     /* borderType */,
                     const cv::Scalar& /* borderValue */,
-                              Buffer&    dst)
+                              Buffer&    dst,
+                              Buffer&    scratch)
     {
         // TODO: support sizes 3, 5, 7, 9, ...
         GAPI_Assert(kernelSize.width  == 3 && kernelSize.height == 3);
@@ -441,17 +491,51 @@ GAPI_FLUID_KERNEL(GFluidBoxFilter, cv::gapi::imgproc::GBoxFilter, false)
         // TODO: suport non-trivial anchor
         GAPI_Assert(anchor.x == -1 && anchor.y == -1);
 
+        int width = src.length();
+        int chan  = src.meta().chan;
+        int length = width * chan;
+
+        float *buf[3];
+        buf[0] = scratch.OutLine<float>();
+        buf[1] = buf[0] + length;
+        buf[2] = buf[1] + length;
+
         //     DST     SRC     OP             __VA_ARGS__
-        UNARY_(uchar , uchar , run_boxfilter, dst, src, kernelSize, anchor, normalize);
-        UNARY_(ushort, ushort, run_boxfilter, dst, src, kernelSize, anchor, normalize);
-        UNARY_( short,  short, run_boxfilter, dst, src, kernelSize, anchor, normalize);
-        UNARY_( float, uchar , run_boxfilter, dst, src, kernelSize, anchor, normalize);
-        UNARY_( float, ushort, run_boxfilter, dst, src, kernelSize, anchor, normalize);
-        UNARY_( float,  short, run_boxfilter, dst, src, kernelSize, anchor, normalize);
+        UNARY_(uchar , uchar , run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
+        UNARY_( float, uchar , run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
+        UNARY_(ushort, ushort, run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
+        UNARY_( float, ushort, run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
+        UNARY_( short,  short, run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
+        UNARY_( float,  short, run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
+        UNARY_( float,  float, run_boxfilter, dst, src, kernelSize, anchor, normalize, buf);
 
         CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
     }
 
+    static void initScratch(const GMatDesc  & in,
+                                      int     /* ddepth */,
+                            const cv::Size  & /* kernelSize */,
+                            const cv::Point & /* anchor */,
+                                      bool    /*  normalize */,
+                                      int     /* borderType */,
+                            const cv::Scalar& /* borderValue */,
+                                  Buffer    &  scratch)
+    {
+        int width = in.size.width;
+        int chan  = in.chan;
+
+        int buflen = width * chan * Window;  // work buffers
+
+        cv::gapi::own::Size bufsize(buflen, 1);
+        GMatDesc bufdesc = {CV_32F, 1, bufsize};
+        Buffer buffer(bufdesc);
+        scratch = std::move(buffer);
+    }
+
+    static void resetScratch(Buffer& /* scratch */)
+    {
+    }
+
     static Border getBorder(const cv::GMatDesc& /* src */,
                                       int       /* ddepth */,
                             const cv::Size    & /* kernelSize */,
@@ -510,18 +594,21 @@ static void run_sepfilter(Buffer& dst, const View& src,
                           const float kx[], int kxLen,
                           const float ky[], int kyLen,
                           const cv::Point& /* anchor */,
-                          float delta=0)
+                          float scale, float delta,
+                          float *buf[])
 {
-    static const int maxLines = 9;
-    GAPI_Assert(kyLen <= maxLines);
+    constexpr int kMax = 11;
+    GAPI_Assert(kxLen <= kMax && kyLen <= kMax);
 
-    const SRC *in[ maxLines ];
+    const SRC *in[kMax];
           DST *out;
 
-    int border = (kyLen - 1) / 2;
+    int xborder = (kxLen - 1) / 2;
+    int yborder = (kyLen - 1) / 2;
+
     for (int i=0; i < kyLen; i++)
     {
-        in[i] = src.InLine<SRC>(i - border);
+        in[i] = src.InLine<SRC>(i - yborder);
     }
 
     out = dst.OutLine<DST>();
@@ -529,28 +616,52 @@ static void run_sepfilter(Buffer& dst, const View& src,
     int width = dst.length();
     int chan  = dst.meta().chan;
 
-    for (int w=0; w < width; w++)
+    // optimized 3x3 vs reference
+    if (kxLen == 3 && kyLen == 3)
     {
-        // TODO: make this cycle innermost
-        for (int c=0; c < chan; c++)
+        int y  = dst.y();
+        int y0 = dst.priv().writeStart();
+
+        int border = xborder;
+        run_sepfilter3x3_impl(out, in, width, chan, kx, ky, border, scale, delta, buf, y, y0);
+    }
+    else
+    {
+        int length = chan * width;
+        int xshift = chan * xborder;
+
+        // horizontal pass
+
+        for (int k=0; k < kyLen; k++)
         {
-            float sum=0;
+            const SRC *inp[kMax] = {nullptr};
 
-            for (int i=0; i < kyLen; i++)
+            for (int j=0; j < kxLen; j++)
             {
-                float sumi=0;
+                inp[j] = in[k] + (j - xborder)*xshift;
+            }
 
+            for (int l=0; l < length; l++)
+            {
+                float sum = 0;
                 for (int j=0; j < kxLen; j++)
                 {
-                    sumi += in[i][(w + j - border)*chan + c] * kx[j];
+                    sum += inp[j][l] * kx[j];
                 }
-
-                sum += sumi * ky[i];
+                buf[k][l] = sum;
             }
+        }
 
-            float result = sum + delta;
+        // vertical pass
 
-            out[w*chan + c] = saturate<DST>(result, rintf);
+        for (int l=0; l < length; l++)
+        {
+            float sum = 0;
+            for (int k=0; k < kyLen; k++)
+            {
+                sum += buf[k][l] * ky[k];
+            }
+            out[l] = saturate<DST>(sum*scale + delta, rintf);
         }
     }
 }
@@ -580,21 +691,37 @@ GAPI_FLUID_KERNEL(GFluidSepFilter, cv::gapi::imgproc::GSepFilter, true)
         int kxLen = kernX.rows * kernX.cols;
         int kyLen = kernY.rows * kernY.cols;
 
+        GAPI_Assert(kyLen == 3);
+
         float *kx = scratch.OutLine<float>();
         float *ky = kx + kxLen;
 
+        int width = src.meta().size.width;
+        int chan  = src.meta().chan;
+        int length = width * chan;
+
+        float *buf[3];
+        buf[0] = ky + kyLen;
+        buf[1] = buf[0] + length;
+        buf[2] = buf[1] + length;
+
+        float scale = 1;
         float delta = static_cast<float>(delta_[0]);
 
         //     DST     SRC     OP             __VA_ARGS__
-        UNARY_(uchar , uchar , run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, delta);
-        UNARY_(ushort, ushort, run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, delta);
-        UNARY_( short,  short, run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, delta);
-        UNARY_( float,  float, run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, delta);
+        UNARY_(uchar , uchar , run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, scale, delta, buf);
+        UNARY_( short, uchar , run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, scale, delta, buf);
+        UNARY_( float, uchar , run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, scale, delta, buf);
+        UNARY_(ushort, ushort, run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, scale, delta, buf);
+        UNARY_( float, ushort, run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, scale, delta, buf);
+        UNARY_( short,  short, run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, scale, delta, buf);
+        UNARY_( float,  short, run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, scale, delta, buf);
+        UNARY_( float,  float, run_sepfilter, dst, src, kx, kxLen, ky, kyLen, anchor, scale, delta, buf);
 
         CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
     }
 
-    static void initScratch(const GMatDesc& /* in */,
+    static void initScratch(const GMatDesc&    in,
                                   int       /* ddepth */,
                             const Mat     &    kernX,
                             const Mat     &    kernY,
@@ -607,7 +734,13 @@ GAPI_FLUID_KERNEL(GFluidSepFilter, cv::gapi::imgproc::GSepFilter, true)
         int kxLen = kernX.rows * kernX.cols;
         int kyLen = kernY.rows * kernY.cols;
 
-        cv::gapi::own::Size bufsize(kxLen + kyLen, 1);
+        int width = in.size.width;
+        int chan  = in.chan;
+
+        int buflen = kxLen + kyLen +         // x, y kernels
+                     width * chan * Window;  // work buffers
+
+        cv::gapi::own::Size bufsize(buflen, 1);
         GMatDesc bufdesc = {CV_32F, 1, bufsize};
         Buffer buffer(bufdesc);
         scratch = std::move(buffer);
@@ -664,29 +797,47 @@ GAPI_FLUID_KERNEL(GFluidGaussBlur, cv::gapi::imgproc::GGaussBlur, true)
         auto *kx = scratch.OutLine<float>(); // cached kernX data
         auto *ky = kx + kxsize;              // cached kernY data
 
+        int width = src.meta().size.width;
+        int chan  = src.meta().chan;
+        int length = width * chan;
+
+        float *buf[3];
+        buf[0] = ky + kysize;
+        buf[1] = buf[0] + length;
+        buf[2] = buf[1] + length;
+
         auto  anchor = cv::Point(-1, -1);
-        float delta = 0.f;
+
+        float scale = 1;
+        float delta = 0;
 
         //     DST     SRC     OP             __VA_ARGS__
-        UNARY_(uchar , uchar , run_sepfilter, dst, src, kx, kxsize, ky, kysize, anchor, delta);
-        UNARY_(ushort, ushort, run_sepfilter, dst, src, kx, kxsize, ky, kysize, anchor, delta);
-        UNARY_( short,  short, run_sepfilter, dst, src, kx, kxsize, ky, kysize, anchor, delta);
+        UNARY_(uchar , uchar , run_sepfilter, dst, src, kx, kxsize, ky, kysize, anchor, scale, delta, buf);
+        UNARY_(ushort, ushort, run_sepfilter, dst, src, kx, kxsize, ky, kysize, anchor, scale, delta, buf);
+        UNARY_( short,  short, run_sepfilter, dst, src, kx, kxsize, ky, kysize, anchor, scale, delta, buf);
+        UNARY_( float,  float, run_sepfilter, dst, src, kx, kxsize, ky, kysize, anchor, scale, delta, buf);
 
         CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
     }
 
-    static void initScratch(const GMatDesc& /* in */,
+    static void initScratch(const GMatDesc&    in,
                             const cv::Size &   ksize,
                                   double       sigmaX,
                                   double       sigmaY,
-                                  int       /* borderType */,
-                            const cv::Scalar  & /* borderValue */,
+                                  int          /* borderType */,
+                            const cv::Scalar & /* borderValue */,
                                   Buffer  &    scratch)
     {
         int kxsize = ksize.width;
         int kysize = ksize.height;
 
-        cv::gapi::own::Size bufsize(kxsize + kysize, 1);
+        int width = in.size.width;
+        int chan  = in.chan;
+
+        int buflen = kxsize + kysize +       // x, y kernels
+                     width * chan * Window;  // work buffers
+
+        cv::gapi::own::Size bufsize(buflen, 1);
         GMatDesc bufdesc = {CV_32F, 1, bufsize};
         Buffer buffer(bufdesc);
         scratch = std::move(buffer);
@@ -767,7 +918,7 @@ static void run_sobel(Buffer& dst,
     int y0 = dst.priv().writeStart();
 //  int y1 = dst.priv().writeEnd();
 
-    run_sobel_row(out, in, width, chan, kx, ky, border, scale, delta, buf, y, y0);
+    run_sepfilter3x3_impl(out, in, width, chan, kx, ky, border, scale, delta, buf, y, y0);
 }
 
 GAPI_FLUID_KERNEL(GFluidSobel, cv::gapi::imgproc::GSobel, true)
@@ -901,24 +1052,30 @@ static void run_filter2d(Buffer& dst, const View& src,
 
     int width = dst.length();
     int chan  = dst.meta().chan;
+    int length = width * chan;
 
-    for (int w=0; w < width; w++)
+    // manually optimized for 3x3
+    if (k_rows == 3 && k_cols == 3)
     {
-        // TODO: make this cycle innermost
-        for (int c=0; c < chan; c++)
-        {
-            float sum = 0;
-
-            for (int i=0; i < k_rows; i++)
-            for (int j=0; j < k_cols; j++)
-            {
-                sum += in[i][(w + j - border_x)*chan + c] * k[k_cols*i + j];
-            }
+        float scale = 1;
+        run_filter2d_3x3_impl(out, in, width, chan, k, scale, delta);
+        return;
+    }
 
-            float result = sum + delta;
+    // reference: any kernel size
+    for (int l=0; l < length; l++)
+    {
+        float sum = 0;
 
-            out[w*chan + c] = saturate<DST>(result, rintf);
+        for (int i=0; i < k_rows; i++)
+        for (int j=0; j < k_cols; j++)
+        {
+            sum += in[i][l + (j - border_x)*chan] * k[k_cols*i + j];
         }
+
+        float result = sum + delta;
+
+        out[l] = saturate<DST>(result, rintf);
     }
 }
 
@@ -946,6 +1103,7 @@ GAPI_FLUID_KERNEL(GFluidFilter2D, cv::gapi::imgproc::GFilter2D, true)
 
         int k_rows = kernel.rows;
         int k_cols = kernel.cols;
+
         const float *k = scratch.OutLine<float>(); // copy of kernel.data
 
         //     DST     SRC     OP            __VA_ARGS__
@@ -969,7 +1127,12 @@ GAPI_FLUID_KERNEL(GFluidFilter2D, cv::gapi::imgproc::GFilter2D, true)
                             const cv::Scalar  & /* borderValue */,
                                       Buffer  &    scratch)
     {
-        cv::gapi::own::Size bufsize(kernel.rows * kernel.cols, 1);
+        int krows = kernel.rows;
+        int kcols = kernel.cols;
+
+        int buflen = krows * kcols;  // kernel size
+
+        cv::gapi::own::Size bufsize(buflen, 1);
         GMatDesc bufdesc = {CV_32F, 1, bufsize};
         Buffer buffer(bufdesc);
         scratch = std::move(buffer);
@@ -1001,7 +1164,26 @@ GAPI_FLUID_KERNEL(GFluidFilter2D, cv::gapi::imgproc::GFilter2D, true)
 //
 //-----------------------------
 
-enum Morphology { M_ERODE, M_DILATE };
+static MorphShape detect_morph3x3_shape(const uchar kernel[])
+{
+    const uchar k[3][3] = {
+        { kernel[0], kernel[1], kernel[2]},
+        { kernel[3], kernel[4], kernel[5]},
+        { kernel[6], kernel[7], kernel[8]}
+    };
+
+    if (k[0][0] && k[0][1] && k[0][2] &&
+        k[1][0] && k[1][1] && k[1][2] &&
+        k[2][0] && k[2][1] && k[2][2])
+        return M_FULL;
+
+    if (!k[0][0] && k[0][1] && !k[0][2] &&
+         k[1][0] && k[1][1] &&  k[1][2] &&
+        !k[2][0] && k[2][1] && !k[2][2])
+        return M_CROSS;
+
+    return M_UNDEF;
+}
 
 template<typename DST, typename SRC>
 static void run_morphology(          Buffer&    dst,
@@ -1009,9 +1191,14 @@ static void run_morphology(          Buffer&    dst,
                            const     uchar      k[],
                                      int        k_rows,
                                      int        k_cols,
+                                     MorphShape k_type,
                            const cv::Point & /* anchor */,
                                      Morphology morphology)
 {
+    static_assert(std::is_same<DST, SRC>::value, "unsupported combination of types");
+
+    GAPI_Assert(M_ERODE == morphology || M_DILATE == morphology);
+
     static const int maxLines = 9;
     GAPI_Assert(k_rows <= maxLines);
 
@@ -1031,43 +1218,44 @@ static void run_morphology(          Buffer&    dst,
     int width = dst.length();
     int chan  = dst.meta().chan;
 
-    for (int w=0; w < width; w++)
+    // call optimized code, if 3x3
+    if (3 == k_rows && 3 == k_cols)
     {
-        // TODO: make this cycle innermost
-        for (int c=0; c < chan; c++)
+        run_morphology3x3_impl(out, in, width, chan, k, k_type, morphology);
+        return;
+    }
+
+    // reference: any size of k[]
+    int length = width * chan;
+    for (int l=0; l < length; l++)
+    {
+        SRC result;
+        if (M_ERODE == morphology)
         {
-            SRC result=0;
-            if (M_ERODE == morphology)
-            {
-                result = std::numeric_limits<SRC>::max();
-            }
-            else if (M_DILATE == morphology)
-            {
-                result = std::numeric_limits<SRC>::min();
-            }
-            else
-                CV_Error(cv::Error::StsBadArg, "unsupported morphology operation");
+            result = std::numeric_limits<SRC>::max();
+        }
+        else // if (M_DILATE == morphology)
+        {
+            result = std::numeric_limits<SRC>::min();
+        }
 
-            for (int i=0; i < k_rows; i++)
-            for (int j=0; j < k_cols; j++)
+        for (int i=0; i < k_rows; i++)
+        for (int j=0; j < k_cols; j++)
+        {
+            if ( k[k_cols*i + j] )
             {
-                if ( k[k_cols*i + j] )
+                if (M_ERODE == morphology)
+                {
+                    result = (std::min)(result, in[i][l + (j - border_x)*chan]);
+                }
+                else // if (M_DILATE == morphology)
                 {
-                    if (M_ERODE == morphology)
-                    {
-                        result = std::min(result, in[i][(w + j - border_x)*chan + c]);
-                    }
-                    else if (M_DILATE == morphology)
-                    {
-                        result = std::max(result, in[i][(w + j - border_x)*chan + c]);
-                    }
-                    else
-                        CV_Error(cv::Error::StsBadArg, "unsupported morphology operation");
+                    result = (std::max)(result, in[i][l + (j - border_x)*chan]);
                 }
             }
-
-            out[w*chan + c] = saturate<DST>(result, rintf);
         }
+
+        out[l] = saturate<DST>(result, rintf);
     }
 }
 
@@ -1095,13 +1283,16 @@ GAPI_FLUID_KERNEL(GFluidErode, cv::gapi::imgproc::GErode, true)
 
         int k_rows = kernel.rows;
         int k_cols = kernel.cols;
+        int k_size = k_rows * k_cols;
 
         auto *k = scratch.OutLine<uchar>(); // copy of kernel.data
+        auto k_type = static_cast<MorphShape>(k[k_size]);
 
         //     DST     SRC     OP              __VA_ARGS__
-        UNARY_(uchar , uchar , run_morphology, dst, src, k, k_rows, k_cols, anchor, M_ERODE);
-        UNARY_(ushort, ushort, run_morphology, dst, src, k, k_rows, k_cols, anchor, M_ERODE);
-        UNARY_( short,  short, run_morphology, dst, src, k, k_rows, k_cols, anchor, M_ERODE);
+        UNARY_(uchar , uchar , run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_ERODE);
+        UNARY_(ushort, ushort, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_ERODE);
+        UNARY_( short,  short, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_ERODE);
+        UNARY_( float,  float, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_ERODE);
 
         CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
     }
@@ -1109,15 +1300,16 @@ GAPI_FLUID_KERNEL(GFluidErode, cv::gapi::imgproc::GErode, true)
     static void initScratch(const GMatDesc& /* in */,
                             const Mat     &    kernel,
                             const Point   & /* anchor */,
-                              int           /* iterations */,
+                                  int       /* iterations */,
                                   int       /* borderType */,
                             const cv::Scalar  & /* borderValue */,
                                   Buffer  &    scratch)
     {
         int k_rows = kernel.rows;
         int k_cols = kernel.cols;
+        int k_size = k_rows * k_cols;
 
-        cv::gapi::own::Size bufsize(k_rows * k_cols, 1);
+        cv::gapi::own::Size bufsize(k_size + 1, 1);
         GMatDesc bufdesc = {CV_8U, 1, bufsize};
         Buffer buffer(bufdesc);
         scratch = std::move(buffer);
@@ -1125,6 +1317,11 @@ GAPI_FLUID_KERNEL(GFluidErode, cv::gapi::imgproc::GErode, true)
         // FIXME: move to resetScratch stage ?
         auto *k = scratch.OutLine<uchar>();
         getKernel(k, kernel);
+
+        if (3 == k_rows && 3 == k_cols)
+            k[k_size] = static_cast<uchar>(detect_morph3x3_shape(k));
+        else
+            k[k_size] = static_cast<uchar>(M_UNDEF);
     }
 
     static void resetScratch(Buffer& /* scratch */)
@@ -1172,13 +1369,16 @@ GAPI_FLUID_KERNEL(GFluidDilate, cv::gapi::imgproc::GDilate, true)
 
         int k_rows = kernel.rows;
         int k_cols = kernel.cols;
+        int k_size = k_rows * k_cols;
 
         auto *k = scratch.OutLine<uchar>(); // copy of kernel.data
+        auto k_type = static_cast<MorphShape>(k[k_size]);
 
         //     DST     SRC     OP              __VA_ARGS__
-        UNARY_(uchar , uchar , run_morphology, dst, src, k, k_rows, k_cols, anchor, M_DILATE);
-        UNARY_(ushort, ushort, run_morphology, dst, src, k, k_rows, k_cols, anchor, M_DILATE);
-        UNARY_( short,  short, run_morphology, dst, src, k, k_rows, k_cols, anchor, M_DILATE);
+        UNARY_(uchar , uchar , run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_DILATE);
+        UNARY_(ushort, ushort, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_DILATE);
+        UNARY_( short,  short, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_DILATE);
+        UNARY_( float,  float, run_morphology, dst, src, k, k_rows, k_cols, k_type, anchor, M_DILATE);
 
         CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
     }
@@ -1193,8 +1393,9 @@ GAPI_FLUID_KERNEL(GFluidDilate, cv::gapi::imgproc::GDilate, true)
     {
         int k_rows = kernel.rows;
         int k_cols = kernel.cols;
+        int k_size = k_rows * k_cols;
 
-        cv::gapi::own::Size bufsize(k_rows * k_cols, 1);
+        cv::gapi::own::Size bufsize(k_size + 1, 1);
         GMatDesc bufdesc = {CV_8U, 1, bufsize};
         Buffer buffer(bufdesc);
         scratch = std::move(buffer);
@@ -1202,6 +1403,11 @@ GAPI_FLUID_KERNEL(GFluidDilate, cv::gapi::imgproc::GDilate, true)
         // FIXME: move to resetScratch stage ?
         auto *k = scratch.OutLine<uchar>();
         getKernel(k, kernel);
+
+        if (3 == k_rows && 3 == k_cols)
+            k[k_size] = static_cast<uchar>(detect_morph3x3_shape(k));
+        else
+            k[k_size] = static_cast<uchar>(M_UNDEF);
     }
 
     static void resetScratch(Buffer& /* scratch */)
@@ -1236,7 +1442,9 @@ static void run_medianblur(      Buffer& dst,
                            const View  & src,
                                  int     ksize)
 {
-    static const int kmax = 9;
+    static_assert(std::is_same<DST, SRC>::value, "unsupported combination of types");
+
+    constexpr int kmax = 9;
     GAPI_Assert(ksize <= kmax);
 
     const SRC *in[ kmax ];
@@ -1254,24 +1462,33 @@ static void run_medianblur(      Buffer& dst,
     int width = dst.length();
     int chan  = dst.meta().chan;
 
-    for (int w=0; w < width; w++)
+    // optimized: if 3x3
+
+    if (3 == ksize)
     {
-        // TODO: make this cycle innermost
-        for (int c=0; c < chan; c++)
-        {
-            SRC neighbours[kmax * kmax];
+        run_medblur3x3_impl(out, in, width, chan);
+        return;
+    }
 
-            for (int i=0; i < ksize; i++)
-            for (int j=0; j < ksize; j++)
-            {
-                neighbours[i*ksize + j] = in[i][(w + j - border)*chan + c];
-            }
+    // reference: any ksize
 
-            int length = ksize * ksize;
-            std::nth_element(neighbours, neighbours + length/2, neighbours + length);
+    int length = width * chan;
+    int klength = ksize * ksize;
+    int klenhalf = klength / 2;
 
-            out[w*chan + c] = saturate<DST>(neighbours[length/2], rintf);
+    for (int l=0; l < length; l++)
+    {
+        SRC neighbours[kmax * kmax];
+
+        for (int i=0; i < ksize; i++)
+        for (int j=0; j < ksize; j++)
+        {
+            neighbours[i*ksize + j] = in[i][l + (j - border)*chan];
         }
+
+        std::nth_element(neighbours, neighbours + klenhalf, neighbours + klength);
+
+        out[l] = saturate<DST>(neighbours[klenhalf], rintf);
     }
 }
 
@@ -1290,6 +1507,7 @@ GAPI_FLUID_KERNEL(GFluidMedianBlur, cv::gapi::imgproc::GMedianBlur, false)
         UNARY_(uchar , uchar , run_medianblur, dst, src, ksize);
         UNARY_(ushort, ushort, run_medianblur, dst, src, ksize);
         UNARY_( short,  short, run_medianblur, dst, src, ksize);
+        UNARY_( float,  float, run_medianblur, dst, src, ksize);
 
         CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
     }
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp
index 9b217903e..3624de928 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.dispatch.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 #if !defined(GAPI_STANDALONE)
 
@@ -57,34 +57,102 @@ void run_yuv2rgb_impl(uchar out[], const uchar in[], int width, const float coef
     CV_CPU_DISPATCH(run_yuv2rgb_impl, (out, in, width, coef), CV_CPU_DISPATCH_MODES_ALL);
 }
 
-//---------------------
+//-------------------------
 //
-// Fluid kernels: Sobel
+// Fluid kernels: sepFilter
 //
-//---------------------
+//-------------------------
+
+#define RUN_SEPFILTER3X3_IMPL(DST, SRC)                                     \
+void run_sepfilter3x3_impl(DST out[], const SRC *in[], int width, int chan, \
+                           const float kx[], const float ky[], int border,  \
+                           float scale, float delta,                        \
+                           float *buf[], int y, int y0)                     \
+{                                                                           \
+    CV_CPU_DISPATCH(run_sepfilter3x3_impl,                                  \
+        (out, in, width, chan, kx, ky, border, scale, delta, buf,y, y0),    \
+        CV_CPU_DISPATCH_MODES_ALL);                                         \
+}
+
+RUN_SEPFILTER3X3_IMPL(uchar , uchar )
+RUN_SEPFILTER3X3_IMPL( short, uchar )
+RUN_SEPFILTER3X3_IMPL( float, uchar )
+RUN_SEPFILTER3X3_IMPL(ushort, ushort)
+RUN_SEPFILTER3X3_IMPL( short, ushort)
+RUN_SEPFILTER3X3_IMPL( float, ushort)
+RUN_SEPFILTER3X3_IMPL( short,  short)
+RUN_SEPFILTER3X3_IMPL( float,  short)
+RUN_SEPFILTER3X3_IMPL( float,  float)
+
+#undef RUN_SEPFILTER3X3_IMPL
+
+//-------------------------
+//
+// Fluid kernels: Filter 2D
+//
+//-------------------------
+
+#define RUN_FILTER2D_3X3_IMPL(DST, SRC)                                     \
+void run_filter2d_3x3_impl(DST out[], const SRC *in[], int width, int chan, \
+                           const float kernel[], float scale, float delta)  \
+{                                                                           \
+    CV_CPU_DISPATCH(run_filter2d_3x3_impl,                                  \
+        (out, in, width, chan, kernel, scale, delta),                       \
+        CV_CPU_DISPATCH_MODES_ALL);                                         \
+}
+
+RUN_FILTER2D_3X3_IMPL(uchar , uchar )
+RUN_FILTER2D_3X3_IMPL(ushort, ushort)
+RUN_FILTER2D_3X3_IMPL( short,  short)
+RUN_FILTER2D_3X3_IMPL( float, uchar )
+RUN_FILTER2D_3X3_IMPL( float, ushort)
+RUN_FILTER2D_3X3_IMPL( float,  short)
+RUN_FILTER2D_3X3_IMPL( float,  float)
+
+#undef RUN_FILTER2D_3X3_IMPL
+
+//-----------------------------
+//
+// Fluid kernels: Erode, Dilate
+//
+//-----------------------------
 
-#define RUN_SOBEL_ROW(DST, SRC)                                          \
-void run_sobel_row(DST out[], const SRC *in[], int width, int chan,      \
-                   const float kx[], const float ky[], int border,       \
-                   float scale, float delta, float *buf[],               \
-                   int y, int y0)                                        \
+#define RUN_MORPHOLOGY3X3_IMPL(T)                                        \
+void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \
+                            const uchar k[], MorphShape k_type,          \
+                            Morphology morphology)                       \
 {                                                                        \
-    CV_CPU_DISPATCH(run_sobel_row,                                       \
-        (out, in, width, chan, kx, ky, border, scale, delta, buf,y, y0), \
+    CV_CPU_DISPATCH(run_morphology3x3_impl,                              \
+        (out, in, width, chan, k, k_type, morphology),                   \
         CV_CPU_DISPATCH_MODES_ALL);                                      \
 }
 
-RUN_SOBEL_ROW(uchar , uchar )
-RUN_SOBEL_ROW(ushort, ushort)
-RUN_SOBEL_ROW( short, uchar )
-RUN_SOBEL_ROW( short, ushort)
-RUN_SOBEL_ROW( short,  short)
-RUN_SOBEL_ROW( float, uchar )
-RUN_SOBEL_ROW( float, ushort)
-RUN_SOBEL_ROW( float,  short)
-RUN_SOBEL_ROW( float,  float)
-
-#undef RUN_SOBEL_ROW
+RUN_MORPHOLOGY3X3_IMPL(uchar )
+RUN_MORPHOLOGY3X3_IMPL(ushort)
+RUN_MORPHOLOGY3X3_IMPL( short)
+RUN_MORPHOLOGY3X3_IMPL( float)
+
+#undef RUN_MORPHOLOGY3X3_IMPL
+
+//---------------------------
+//
+// Fluid kernels: Median blur
+//
+//---------------------------
+
+#define RUN_MEDBLUR3X3_IMPL(T)                                        \
+void run_medblur3x3_impl(T out[], const T *in[], int width, int chan) \
+{                                                                     \
+    CV_CPU_DISPATCH(run_medblur3x3_impl, (out, in, width, chan),      \
+        CV_CPU_DISPATCH_MODES_ALL);                                   \
+}
+
+RUN_MEDBLUR3X3_IMPL(uchar )
+RUN_MEDBLUR3X3_IMPL(ushort)
+RUN_MEDBLUR3X3_IMPL( short)
+RUN_MEDBLUR3X3_IMPL( float)
+
+#undef RUN_MEDBLUR3X3_IMPL
 
 } // namespace fliud
 } // namespace gapi
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp
index 1b6f1b8c0..1e28dfd63 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 #pragma once
 
@@ -33,29 +33,87 @@ void run_rgb2yuv_impl(uchar out[], const uchar in[], int width, const float coef
 
 void run_yuv2rgb_impl(uchar out[], const uchar in[], int width, const float coef[4]);
 
-//---------------------
+//-------------------------
 //
-// Fluid kernels: Sobel
+// Fluid kernels: sepFilter
 //
-//---------------------
-
-#define RUN_SOBEL_ROW(DST, SRC)                                     \
-void run_sobel_row(DST out[], const SRC *in[], int width, int chan, \
-                   const float kx[], const float ky[], int border,  \
-                   float scale, float delta, float *buf[],          \
-                   int y, int y0);
-
-RUN_SOBEL_ROW(uchar , uchar )
-RUN_SOBEL_ROW(ushort, ushort)
-RUN_SOBEL_ROW( short, uchar )
-RUN_SOBEL_ROW( short, ushort)
-RUN_SOBEL_ROW( short,  short)
-RUN_SOBEL_ROW( float, uchar )
-RUN_SOBEL_ROW( float, ushort)
-RUN_SOBEL_ROW( float,  short)
-RUN_SOBEL_ROW( float,  float)
-
-#undef RUN_SOBEL_ROW
+//-------------------------
+
+#define RUN_SEPFILTER3X3_IMPL(DST, SRC)                                     \
+void run_sepfilter3x3_impl(DST out[], const SRC *in[], int width, int chan, \
+                           const float kx[], const float ky[], int border,  \
+                           float scale, float delta,                        \
+                           float *buf[], int y, int y0);
+
+RUN_SEPFILTER3X3_IMPL(uchar , uchar )
+RUN_SEPFILTER3X3_IMPL( short, uchar )
+RUN_SEPFILTER3X3_IMPL( float, uchar )
+RUN_SEPFILTER3X3_IMPL(ushort, ushort)
+RUN_SEPFILTER3X3_IMPL( short, ushort)
+RUN_SEPFILTER3X3_IMPL( float, ushort)
+RUN_SEPFILTER3X3_IMPL( short,  short)
+RUN_SEPFILTER3X3_IMPL( float,  short)
+RUN_SEPFILTER3X3_IMPL( float,  float)
+
+#undef RUN_SEPFILTER3X3_IMPL
+
+//-------------------------
+//
+// Fluid kernels: Filter 2D
+//
+//-------------------------
+
+#define RUN_FILTER2D_3X3_IMPL(DST, SRC)                                     \
+void run_filter2d_3x3_impl(DST out[], const SRC *in[], int width, int chan, \
+                           const float kernel[], float scale, float delta);
+
+RUN_FILTER2D_3X3_IMPL(uchar , uchar )
+RUN_FILTER2D_3X3_IMPL(ushort, ushort)
+RUN_FILTER2D_3X3_IMPL( short,  short)
+RUN_FILTER2D_3X3_IMPL( float, uchar )
+RUN_FILTER2D_3X3_IMPL( float, ushort)
+RUN_FILTER2D_3X3_IMPL( float,  short)
+RUN_FILTER2D_3X3_IMPL( float,  float)
+
+#undef RUN_FILTER2D_3X3_IMPL
+
+//-----------------------------
+//
+// Fluid kernels: Erode, Dilate
+//
+//-----------------------------
+
+enum Morphology { M_ERODE, M_DILATE };
+
+enum MorphShape { M_FULL, M_CROSS, M_UNDEF };
+
+#define RUN_MORPHOLOGY3X3_IMPL(T)                                        \
+void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \
+                            const uchar k[], MorphShape k_type,          \
+                            Morphology morphology);
+
+RUN_MORPHOLOGY3X3_IMPL(uchar )
+RUN_MORPHOLOGY3X3_IMPL(ushort)
+RUN_MORPHOLOGY3X3_IMPL( short)
+RUN_MORPHOLOGY3X3_IMPL( float)
+
+#undef RUN_MORPHOLOGY3X3_IMPL
+
+//---------------------------
+//
+// Fluid kernels: Median blur
+//
+//---------------------------
+
+#define RUN_MEDBLUR3X3_IMPL(T) \
+void run_medblur3x3_impl(T out[], const T *in[], int width, int chan);
+
+RUN_MEDBLUR3X3_IMPL(uchar )
+RUN_MEDBLUR3X3_IMPL(ushort)
+RUN_MEDBLUR3X3_IMPL( short)
+RUN_MEDBLUR3X3_IMPL( float)
+
+#undef RUN_MEDBLUR3X3_IMPL
 
 }  // namespace fluid
 }  // namespace gapi
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp
index c87be085a..d455ae85d 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidimgproc_func.simd.hpp
@@ -2,19 +2,26 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 // NB: allow including this *.hpp several times!
 // #pragma once -- don't: this file is NOT once!
 
 #if !defined(GAPI_STANDALONE)
 
+#include "gfluidimgproc_func.hpp"
+
 #include "opencv2/gapi/own/saturate.hpp"
 
 #include "opencv2/core.hpp"
 #include "opencv2/core/hal/intrin.hpp"
 
 #include <cstdint>
+#include <cstring>
+
+#include <algorithm>
+#include <limits>
+#include <vector>
 
 #ifdef __GNUC__
 #  pragma GCC diagnostic push
@@ -48,34 +55,120 @@ void run_rgb2yuv_impl(uchar out[], const uchar in[], int width, const float coef
 
 void run_yuv2rgb_impl(uchar out[], const uchar in[], int width, const float coef[4]);
 
-//---------------------
+//-------------------------
+//
+// Fluid kernels: sepFilter
+//
+//-------------------------
+
+#define RUN_SEPFILTER3X3_IMPL(DST, SRC)                                     \
+void run_sepfilter3x3_impl(DST out[], const SRC *in[], int width, int chan, \
+                           const float kx[], const float ky[], int border,  \
+                           float scale, float delta,                        \
+                           float *buf[], int y, int y0);
+
+RUN_SEPFILTER3X3_IMPL(uchar , uchar )
+RUN_SEPFILTER3X3_IMPL( short, uchar )
+RUN_SEPFILTER3X3_IMPL( float, uchar )
+RUN_SEPFILTER3X3_IMPL(ushort, ushort)
+RUN_SEPFILTER3X3_IMPL( short, ushort)
+RUN_SEPFILTER3X3_IMPL( float, ushort)
+RUN_SEPFILTER3X3_IMPL( short,  short)
+RUN_SEPFILTER3X3_IMPL( float,  short)
+RUN_SEPFILTER3X3_IMPL( float,  float)
+
+#undef RUN_SEPFILTER3X3_IMPL
+
+//-------------------------
+//
+// Fluid kernels: Filter 2D
+//
+//-------------------------
+
+#define RUN_FILTER2D_3X3_IMPL(DST, SRC)                                     \
+void run_filter2d_3x3_impl(DST out[], const SRC *in[], int width, int chan, \
+                           const float kernel[], float scale, float delta);
+
+RUN_FILTER2D_3X3_IMPL(uchar , uchar )
+RUN_FILTER2D_3X3_IMPL(ushort, ushort)
+RUN_FILTER2D_3X3_IMPL( short,  short)
+RUN_FILTER2D_3X3_IMPL( float, uchar )
+RUN_FILTER2D_3X3_IMPL( float, ushort)
+RUN_FILTER2D_3X3_IMPL( float,  short)
+RUN_FILTER2D_3X3_IMPL( float,  float)
+
+#undef RUN_FILTER2D_3X3_IMPL
+
+//-----------------------------
 //
-// Fluid kernels: Sobel
+// Fluid kernels: Erode, Dilate
 //
-//---------------------
-
-#define RUN_SOBEL_ROW(DST, SRC)                                     \
-void run_sobel_row(DST out[], const SRC *in[], int width, int chan, \
-                  const float kx[], const float ky[], int border,   \
-                  float scale, float delta, float *buf[],           \
-                  int y, int y0);
-
-RUN_SOBEL_ROW(uchar , uchar )
-RUN_SOBEL_ROW(ushort, ushort)
-RUN_SOBEL_ROW( short, uchar )
-RUN_SOBEL_ROW( short, ushort)
-RUN_SOBEL_ROW( short,  short)
-RUN_SOBEL_ROW( float, uchar )
-RUN_SOBEL_ROW( float, ushort)
-RUN_SOBEL_ROW( float,  short)
-RUN_SOBEL_ROW( float,  float)
-
-#undef RUN_SOBEL_ROW
+//-----------------------------
+
+#define RUN_MORPHOLOGY3X3_IMPL(T)                                        \
+void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \
+                            const uchar k[], MorphShape k_type,          \
+                            Morphology morphology);
+
+RUN_MORPHOLOGY3X3_IMPL(uchar )
+RUN_MORPHOLOGY3X3_IMPL(ushort)
+RUN_MORPHOLOGY3X3_IMPL( short)
+RUN_MORPHOLOGY3X3_IMPL( float)
+
+#undef RUN_MORPHOLOGY3X3_IMPL
+
+//---------------------------
+//
+// Fluid kernels: Median blur
+//
+//---------------------------
+
+#define RUN_MEDBLUR3X3_IMPL(T) \
+void run_medblur3x3_impl(T out[], const T *in[], int width, int chan);
+
+RUN_MEDBLUR3X3_IMPL(uchar )
+RUN_MEDBLUR3X3_IMPL(ushort)
+RUN_MEDBLUR3X3_IMPL( short)
+RUN_MEDBLUR3X3_IMPL( float)
+
+#undef RUN_MEDBLUR3X3_IMPL
 
 //----------------------------------------------------------------------
 
 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
 
+#if CV_SIMD
+template<typename SRC>
+static inline v_float32 vx_load_f32(const SRC* ptr)
+{
+    if (std::is_same<SRC,uchar>::value)
+    {
+        v_uint32 tmp = vx_load_expand_q(reinterpret_cast<const uchar*>(ptr));
+        return v_cvt_f32(v_reinterpret_as_s32(tmp));
+    }
+
+    if (std::is_same<SRC,ushort>::value)
+    {
+        v_uint32 tmp = vx_load_expand(reinterpret_cast<const ushort*>(ptr));
+        return v_cvt_f32(v_reinterpret_as_s32(tmp));
+    }
+
+    if (std::is_same<SRC,short>::value)
+    {
+        v_int32 tmp = vx_load_expand(reinterpret_cast<const short*>(ptr));
+        return v_cvt_f32(tmp);
+    }
+
+    if (std::is_same<SRC,float>::value)
+    {
+        v_float32 tmp = vx_load(reinterpret_cast<const float*>(ptr));
+        return tmp;
+    }
+
+    CV_Error(cv::Error::StsBadArg, "unsupported type");
+}
+#endif  // CV_SIMD
+
 //----------------------------------
 //
 // Fluid kernels: RGB2Gray, BGR2Gray
@@ -309,247 +402,1375 @@ void run_yuv2rgb_impl(uchar out[], const uchar in[], int width, const float coef
     }
 }
 
-//---------------------
+//-------------------------
 //
-// Fluid kernels: Sobel
+// Fluid kernels: sepFilter
 //
-//---------------------
+//-------------------------
 
-// Sobel 3x3: vertical pass
-template<bool noscale, typename DST>
-static void run_sobel3x3_vert(DST out[], int length, const float ky[],
-                float scale, float delta, const int r[], float *buf[])
+#if CV_SIMD
+// this variant not using buf[] appears 15% faster than reference any-2-float code below
+template<bool noscale, typename SRC>
+static void run_sepfilter3x3_any2float(float out[], const SRC *in[], int width, int chan,
+                                       const float kx[], const float ky[], int border,
+                                       float scale, float delta)
 {
-    float ky0 = ky[0],
-          ky1 = ky[1],
-          ky2 = ky[2];
+    const int length = width * chan;
+    const int shift = border * chan;
 
-    int r0 = r[0],
-        r1 = r[1],
-        r2 = r[2];
+    const float kx0 = kx[0], kx1 = kx[1], kx2 = kx[2];
+    const float ky0 = ky[0], ky1 = ky[1], ky2 = ky[2];
 
-#if CV_SIMD
-    // for floating-point output,
-    // manual vectoring may be not better than compiler's optimization
-#define EXPLICIT_SIMD_32F 0  // 1=vectorize 32f case explicitly, 0=don't
-#if     EXPLICIT_SIMD_32F
-    if (std::is_same<DST, float>::value && length >= v_int16::nlanes)
+    for (int l=0; l < length; )
     {
-        constexpr static int nlanes = v_float32::nlanes;
+        static const int nlanes = v_float32::nlanes;
 
-        for (int l=0; l < length; )
+        // main part
+        for ( ; l <= length - nlanes; l += nlanes)
         {
-            for (; l <= length - nlanes; l += nlanes)
+            auto xsum = [l, shift, kx0, kx1, kx2](const SRC i[])
             {
-                v_float32 sum = vx_load(&buf[r0][l]) * vx_setall_f32(ky0);
-                    sum = v_fma(vx_load(&buf[r1][l]),  vx_setall_f32(ky1), sum);
-                    sum = v_fma(vx_load(&buf[r2][l]),  vx_setall_f32(ky2), sum);
+                v_float32 t0 = vx_load_f32(&i[l - shift]);
+                v_float32 t1 = vx_load_f32(&i[l        ]);
+                v_float32 t2 = vx_load_f32(&i[l + shift]);
+                v_float32 t = t0 * vx_setall_f32(kx0);
+                    t = v_fma(t1,  vx_setall_f32(kx1), t);
+                    t = v_fma(t2,  vx_setall_f32(kx2), t);
+                return t;
+            };
+
+            v_float32 s0 = xsum(in[0]);
+            v_float32 s1 = xsum(in[1]);
+            v_float32 s2 = xsum(in[2]);
+            v_float32 s = s0 * vx_setall_f32(ky0);
+                s = v_fma(s1,  vx_setall_f32(ky1), s);
+                s = v_fma(s2,  vx_setall_f32(ky2), s);
+
+            if (!noscale)
+            {
+                s = v_fma(s, vx_setall_f32(scale), vx_setall_f32(delta));
+            }
 
-                if (!noscale)
-                {
-                    sum = v_fma(sum, vx_setall_f32(scale), vx_setall_f32(delta));
-                }
+            v_store(&out[l], s);
+        }
+
+        // tail (if any)
+        if (l < length)
+        {
+            GAPI_DbgAssert(length >= nlanes);
+            l = length - nlanes;
+        }
+    }
+}
+
+// this variant with manually vectored rounding to short/ushort appears 10-40x faster
+// than reference code below
+template<bool noscale, typename DST, typename SRC>
+static void run_sepfilter3x3_any2short(DST out[], const SRC *in[], int width, int chan,
+                                       const float kx[], const float ky[], int border,
+                                       float scale, float delta,
+                                       float *buf[], int y, int y0)
+{
+    int r[3];
+    r[0] = (y - y0    ) % 3;  // buf[r[0]]: previous
+    r[1] = (y - y0 + 1) % 3;  //            this
+    r[2] = (y - y0 + 2) % 3;  //            next row
+
+    const int length = width * chan;
+    const int shift = border * chan;
+
+    const float kx0 = kx[0], kx1 = kx[1], kx2 = kx[2];
+    const float ky0 = ky[0], ky1 = ky[1], ky2 = ky[2];
+
+    // horizontal pass
+
+    int k0 = (y == y0)? 0: 2;
+
+    for (int k = k0; k < 3; k++)
+    {
+        //                      previous , this , next pixel
+        const SRC *s[3] = {in[k] - shift , in[k], in[k] + shift};
+
+        // rely on compiler vectoring
+        for (int l=0; l < length; l++)
+        {
+            buf[r[k]][l] = s[0][l]*kx0 + s[1][l]*kx1 + s[2][l]*kx2;
+        }
+    }
+
+    // vertical pass
+
+    const int r0=r[0], r1=r[1], r2=r[2];
+
+    for (int l=0; l < length;)
+    {
+        constexpr int nlanes = v_int16::nlanes;
+
+        // main part of row
+        for (; l <= length - nlanes; l += nlanes)
+        {
+            v_float32 sum0 = vx_load(&buf[r0][l])            * vx_setall_f32(ky0);
+                sum0 = v_fma(vx_load(&buf[r1][l]),             vx_setall_f32(ky1), sum0);
+                sum0 = v_fma(vx_load(&buf[r2][l]),             vx_setall_f32(ky2), sum0);
+
+            v_float32 sum1 = vx_load(&buf[r0][l + nlanes/2]) * vx_setall_f32(ky0);
+                sum1 = v_fma(vx_load(&buf[r1][l + nlanes/2]),  vx_setall_f32(ky1), sum1);
+                sum1 = v_fma(vx_load(&buf[r2][l + nlanes/2]),  vx_setall_f32(ky2), sum1);
+
+            if (!noscale)
+            {
+                sum0 = v_fma(sum0, vx_setall_f32(scale), vx_setall_f32(delta));
+                sum1 = v_fma(sum1, vx_setall_f32(scale), vx_setall_f32(delta));
+            }
+
+            v_int32 isum0 = v_round(sum0),
+                    isum1 = v_round(sum1);
+
+            if (std::is_same<DST, short>::value)
+            {
+                // signed short
+                v_int16 res = v_pack(isum0, isum1);
+                v_store(reinterpret_cast<short*>(&out[l]), res);
+            } else
+            {
+                // unsigned short
+                v_uint16 res = v_pack_u(isum0, isum1);
+                v_store(reinterpret_cast<ushort*>(&out[l]), res);
+            }
+        }
+
+        // tail (if any)
+        if (l < length)
+        {
+            GAPI_DbgAssert(length >= nlanes);
+            l = length - nlanes;
+        }
+    }
+}
+
+// this code with manually vectored rounding to uchar is 10-40x faster than reference
+template<bool noscale, typename SRC>
+static void run_sepfilter3x3_any2char(uchar out[], const SRC *in[], int width, int chan,
+                                      const float kx[], const float ky[], int border,
+                                      float scale, float delta,
+                                      float *buf[], int y, int y0)
+{
+    int r[3];
+    r[0] = (y - y0    ) % 3;  // buf[r[0]]: previous
+    r[1] = (y - y0 + 1) % 3;  //            this
+    r[2] = (y - y0 + 2) % 3;  //            next row
+
+    const int length = width * chan;
+    const int shift = border * chan;
+
+    const float kx0 = kx[0], kx1 = kx[1], kx2 = kx[2];
+    const float ky0 = ky[0], ky1 = ky[1], ky2 = ky[2];
+
+    // horizontal pass
+
+    int k0 = (y == y0)? 0: 2;
+
+    for (int k = k0; k < 3; k++)
+    {
+        //                      previous , this , next pixel
+        const SRC *s[3] = {in[k] - shift , in[k], in[k] + shift};
+
+        // rely on compiler vectoring
+        for (int l=0; l < length; l++)
+        {
+            buf[r[k]][l] = s[0][l]*kx0 + s[1][l]*kx1 + s[2][l]*kx2;
+        }
+    }
+
+    // vertical pass
+
+    const int r0=r[0], r1=r[1], r2=r[2];
+
+    for (int l=0; l < length;)
+    {
+        constexpr int nlanes = v_uint8::nlanes;
+
+        // main part of row
+        for (; l <= length - nlanes; l += nlanes)
+        {
+            v_float32 sum0 = vx_load(&buf[r0][l])              * vx_setall_f32(ky0);
+                sum0 = v_fma(vx_load(&buf[r1][l]),               vx_setall_f32(ky1), sum0);
+                sum0 = v_fma(vx_load(&buf[r2][l]),               vx_setall_f32(ky2), sum0);
+
+            v_float32 sum1 = vx_load(&buf[r0][l +   nlanes/4]) * vx_setall_f32(ky0);
+                sum1 = v_fma(vx_load(&buf[r1][l +   nlanes/4]),  vx_setall_f32(ky1), sum1);
+                sum1 = v_fma(vx_load(&buf[r2][l +   nlanes/4]),  vx_setall_f32(ky2), sum1);
+
+            v_float32 sum2 = vx_load(&buf[r0][l + 2*nlanes/4]) * vx_setall_f32(ky0);
+                sum2 = v_fma(vx_load(&buf[r1][l + 2*nlanes/4]),  vx_setall_f32(ky1), sum2);
+                sum2 = v_fma(vx_load(&buf[r2][l + 2*nlanes/4]),  vx_setall_f32(ky2), sum2);
+
+            v_float32 sum3 = vx_load(&buf[r0][l + 3*nlanes/4]) * vx_setall_f32(ky0);
+                sum3 = v_fma(vx_load(&buf[r1][l + 3*nlanes/4]),  vx_setall_f32(ky1), sum3);
+                sum3 = v_fma(vx_load(&buf[r2][l + 3*nlanes/4]),  vx_setall_f32(ky2), sum3);
+
+            if (!noscale)
+            {
+                sum0 = v_fma(sum0, vx_setall_f32(scale), vx_setall_f32(delta));
+                sum1 = v_fma(sum1, vx_setall_f32(scale), vx_setall_f32(delta));
+                sum2 = v_fma(sum2, vx_setall_f32(scale), vx_setall_f32(delta));
+                sum3 = v_fma(sum3, vx_setall_f32(scale), vx_setall_f32(delta));
+            }
+
+            v_int32 isum0 = v_round(sum0),
+                    isum1 = v_round(sum1),
+                    isum2 = v_round(sum2),
+                    isum3 = v_round(sum3);
+
+            v_int16 ires0 = v_pack(isum0, isum1),
+                    ires1 = v_pack(isum2, isum3);
+
+            v_uint8 res = v_pack_u(ires0, ires1);
+            v_store(reinterpret_cast<uchar*>(&out[l]), res);
+        }
+
+        // tail (if any)
+        if (l < length)
+        {
+            GAPI_DbgAssert(length >= nlanes);
+            l = length - nlanes;
+        }
+    }
+}
+
+// this code manually vectored for int16 not much faster than generic any-to-short code above
+#define USE_SEPFILTER3X3_CHAR2SHORT 1
+
+#if USE_SEPFILTER3X3_CHAR2SHORT
+template<bool noscale>
+static void run_sepfilter3x3_char2short(short out[], const uchar *in[], int width, int chan,
+                                        const float kx[], const float ky[], int border,
+                                        float scale, float delta,
+                                        float *buf[], int y, int y0)
+{
+    const schar ikx0 = saturate<schar>(kx[0], rintf);
+    const schar ikx1 = saturate<schar>(kx[1], rintf);
+    const schar ikx2 = saturate<schar>(kx[2], rintf);
+
+    const schar iky0 = saturate<schar>(ky[0], rintf);
+    const schar iky1 = saturate<schar>(ky[1], rintf);
+    const schar iky2 = saturate<schar>(ky[2], rintf);
+
+    const short iscale = saturate<short>(scale * (1 << 15), rintf);
+    const short idelta = saturate<short>(delta            , rintf);
+
+    // check if this code is applicable
+    if (ikx0 != kx[0] || ikx1 != kx[1] || ikx2 != kx[2] ||
+        iky0 != ky[0] || iky1 != ky[1] || iky2 != ky[2] ||
+        idelta != delta ||
+        std::abs(scale) > 1 || std::abs(scale) < 0.01)
+    {
+        run_sepfilter3x3_any2short<noscale>(out, in, width, chan, kx, ky, border, scale, delta,
+                                            buf, y, y0);
+        return;
+    }
 
-                v_store(reinterpret_cast<float*>(&out[l]), sum);
+    short *ibuf[3];
+    ibuf[0] = reinterpret_cast<short*>(buf[0]);
+    ibuf[1] = reinterpret_cast<short*>(buf[1]);
+    ibuf[2] = reinterpret_cast<short*>(buf[2]);
+
+    int r[3];
+    r[0] = (y - y0    ) % 3;  // buf[r[0]]: previous
+    r[1] = (y - y0 + 1) % 3;  //            this
+    r[2] = (y - y0 + 2) % 3;  //            next row
+
+    const int length = width * chan;
+    const int shift = border * chan;
+
+    // horizontal pass
+
+    int k0 = (y == y0)? 0: 2;
+
+    for (int k = k0; k < 3; k++)
+    {
+        for (int l=0; l < length;)
+        {
+            constexpr int nlanes = v_int16::nlanes;
+
+            // main part of output row
+            for (; l <= length - nlanes; l += nlanes)
+            {
+                v_uint16 t0 = vx_load_expand(&in[k][l - shift]);  // previous
+                v_uint16 t1 = vx_load_expand(&in[k][l        ]);  // current
+                v_uint16 t2 = vx_load_expand(&in[k][l + shift]);  // next pixel
+                v_int16 t = v_reinterpret_as_s16(t0) * vx_setall_s16(ikx0) +
+                            v_reinterpret_as_s16(t1) * vx_setall_s16(ikx1) +
+                            v_reinterpret_as_s16(t2) * vx_setall_s16(ikx2);
+                v_store(&ibuf[r[k]][l], t);
             }
 
+            // tail (if any)
             if (l < length)
             {
-                // tail: recalculate last pixels
                 GAPI_DbgAssert(length >= nlanes);
                 l = length - nlanes;
             }
         }
+    }
+
+    // vertical pass
+
+    for (int l=0; l < length;)
+    {
+        constexpr int nlanes = v_int16::nlanes;
+
+        // main part of output row
+        for (; l <= length - nlanes; l += nlanes)
+        {
+            v_int16 s0 = vx_load(&ibuf[r[0]][l]);  // previous
+            v_int16 s1 = vx_load(&ibuf[r[1]][l]);  // current
+            v_int16 s2 = vx_load(&ibuf[r[2]][l]);  // next row
+            v_int16 s = s0 * vx_setall_s16(iky0) +
+                        s1 * vx_setall_s16(iky1) +
+                        s2 * vx_setall_s16(iky2);
+
+            if (!noscale)
+            {
+                s = v_mul_hi(s << 1, vx_setall_s16(iscale)) + vx_setall_s16(idelta);
+            }
+
+            v_store(&out[l], s);
+        }
+
+        // tail (if any)
+        if (l < length)
+        {
+            GAPI_DbgAssert(length >= nlanes);
+            l = length - nlanes;
+        }
+    }
+}
+#endif
+
+#endif  // CV_SIMD
+
+template<bool noscale, typename DST, typename SRC>
+static void run_sepfilter3x3_reference(DST out[], const SRC *in[], int width, int chan,
+                                       const float kx[], const float ky[], int border,
+                                       float scale, float delta,
+                                       float *buf[], int y, int y0)
+{
+    int r[3];
+    r[0] = (y - y0)     % 3;  // buf[r[0]]: previous
+    r[1] = (y - y0 + 1) % 3;  //            this
+    r[2] = (y - y0 + 2) % 3;  //            next row
+
+    int length = width * chan;
+    int shift = border * chan;
+
+    // horizontal pass
+
+    // full horizontal pass is needed only if very 1st row in ROI;
+    // for 2nd and further rows, it is enough to convolve only the
+    // "next" row - as we can reuse buffers from previous calls to
+    // this kernel (Fluid does rows consequently: y=y0, y0+1, ...)
+
+    int k0 = (y == y0)? 0: 2;
+
+    for (int k = k0; k < 3; k++)
+    {
+        //                      previous , this , next pixel
+        const SRC *s[3] = {in[k] - shift , in[k], in[k] + shift};
+
+        // rely on compiler vectoring
+        for (int l=0; l < length; l++)
+        {
+            buf[r[k]][l] = s[0][l]*kx[0] + s[1][l]*kx[1] + s[2][l]*kx[2];
+        }
+    }
+
+    // vertical pass
+
+    for (int l=0; l < length; l++)
+    {
+        float sum = buf[r[0]][l]*ky[0] + buf[r[1]][l]*ky[1] + buf[r[2]][l]*ky[2];
+
+        if (!noscale)
+        {
+            sum = sum*scale + delta;
+        }
+
+        out[l] = saturate<DST>(sum, rintf);
+    }
+}
+
+template<bool noscale, typename DST, typename SRC>
+static void run_sepfilter3x3_code(DST out[], const SRC *in[], int width, int chan,
+                                  const float kx[], const float ky[], int border,
+                                  float scale, float delta,
+                                  float *buf[], int y, int y0)
+{
+#if CV_SIMD
+    int length = width * chan;
+
+    // length variable may be unused if types do not match at 'if' statements below
+    (void) length;
 
+#if USE_SEPFILTER3X3_CHAR2SHORT
+    if (std::is_same<DST, short>::value && std::is_same<SRC, uchar>::value &&
+        length >= v_int16::nlanes)
+    {
+        // only slightly faster than more generic any-to-short (see below)
+        run_sepfilter3x3_char2short<noscale>(reinterpret_cast<short*>(out),
+                                             reinterpret_cast<const uchar**>(in),
+                                             width, chan, kx, ky, border, scale, delta,
+                                             buf, y, y0);
         return;
     }
 #endif
 
-    if ((std::is_same<DST, short>::value || std::is_same<DST, ushort>::value)
-        && length >= v_int16::nlanes)
+    if (std::is_same<DST, float>::value && std::is_same<SRC, float>::value &&
+        length >= v_float32::nlanes)
     {
-        constexpr static int nlanes = v_int16::nlanes;
+        // appears 15% faster than reference any-to-float code (called below)
+        run_sepfilter3x3_any2float<noscale>(reinterpret_cast<float*>(out), in,
+                                            width, chan, kx, ky, border, scale, delta);
+        return;
+    }
 
-        for (int l=0; l < length; )
+    if (std::is_same<DST, short>::value && length >= v_int16::nlanes)
+    {
+        // appears 10-40x faster than reference due to much faster rounding
+        run_sepfilter3x3_any2short<noscale>(reinterpret_cast<short*>(out), in,
+                                            width, chan, kx, ky, border, scale, delta,
+                                            buf, y, y0);
+        return;
+    }
+
+    if (std::is_same<DST, ushort>::value && length >= v_uint16::nlanes)
+    {
+        // appears 10-40x faster than reference due to much faster rounding
+        run_sepfilter3x3_any2short<noscale>(reinterpret_cast<ushort*>(out), in,
+                                            width, chan, kx, ky, border, scale, delta,
+                                            buf, y, y0);
+        return;
+    }
+
+    if (std::is_same<DST, uchar>::value && length >= v_uint8::nlanes)
+    {
+        // appears 10-40x faster than reference due to much faster rounding
+        run_sepfilter3x3_any2char<noscale>(reinterpret_cast<uchar*>(out), in,
+                                           width, chan, kx, ky, border, scale, delta,
+                                           buf, y, y0);
+        return;
+    }
+#endif  // CV_SIMD
+
+    // reference code is quite fast for any-to-float case,
+    // but not for any-to-integral due to very slow rounding
+    run_sepfilter3x3_reference<noscale>(out, in, width, chan, kx, ky, border,
+                                        scale, delta, buf, y, y0);
+}
+
+#define RUN_SEPFILTER3X3_IMPL(DST, SRC)                                      \
+void run_sepfilter3x3_impl(DST out[], const SRC *in[], int width, int chan,  \
+                           const float kx[], const float ky[], int border,   \
+                           float scale, float delta,                         \
+                           float *buf[], int y, int y0)                      \
+{                                                                            \
+    if (scale == 1 && delta == 0)                                            \
+    {                                                                        \
+        constexpr bool noscale = true;                                       \
+        run_sepfilter3x3_code<noscale>(out, in, width, chan, kx, ky, border, \
+                                       scale, delta, buf, y, y0);            \
+    }                                                                        \
+    else                                                                     \
+    {                                                                        \
+        constexpr bool noscale = false;                                      \
+        run_sepfilter3x3_code<noscale>(out, in, width, chan, kx, ky, border, \
+                                       scale, delta, buf, y, y0);            \
+    }                                                                        \
+}
+
+RUN_SEPFILTER3X3_IMPL(uchar , uchar )
+RUN_SEPFILTER3X3_IMPL( short, uchar )
+RUN_SEPFILTER3X3_IMPL( float, uchar )
+RUN_SEPFILTER3X3_IMPL(ushort, ushort)
+RUN_SEPFILTER3X3_IMPL( short, ushort)
+RUN_SEPFILTER3X3_IMPL( float, ushort)
+RUN_SEPFILTER3X3_IMPL( short,  short)
+RUN_SEPFILTER3X3_IMPL( float,  short)
+RUN_SEPFILTER3X3_IMPL( float,  float)
+
+#undef RUN_SEPFILTER3X3_IMPL
+
+//-------------------------
+//
+// Fluid kernels: Filter 2D
+//
+//-------------------------
+
+template<bool noscale, typename DST, typename SRC>
+static void run_filter2d_3x3_reference(DST out[], const SRC *in[], int width, int chan,
+                                       const float kernel[], float scale, float delta)
+{
+    static constexpr int ksize = 3;
+    static constexpr int border = (ksize - 1) / 2;
+
+    const int length = width * chan;
+    const int shift = border * chan;
+
+    const float k[3][3] = {{ kernel[0], kernel[1], kernel[2] },
+                           { kernel[3], kernel[4], kernel[5] },
+                           { kernel[6], kernel[7], kernel[8] }};
+
+    for (int l=0; l < length; l++)
+    {
+        float sum = in[0][l - shift] * k[0][0] + in[0][l] * k[0][1] + in[0][l + shift] * k[0][2]
+                  + in[1][l - shift] * k[1][0] + in[1][l] * k[1][1] + in[1][l + shift] * k[1][2]
+                  + in[2][l - shift] * k[2][0] + in[2][l] * k[2][1] + in[2][l + shift] * k[2][2];
+
+        if (!noscale)
         {
-            for (; l <= length - nlanes; l += nlanes)
+            sum = sum*scale + delta;
+        }
+
+        out[l] = saturate<DST>(sum, rintf);
+    }
+}
+
+#if CV_SIMD
+// assume DST is short or ushort
+template<bool noscale, typename DST, typename SRC>
+static void run_filter2d_3x3_any2short(DST out[], const SRC *in[], int width, int chan,
+                                       const float kernel[], float scale, float delta)
+{
+    static constexpr int ksize = 3;
+    static constexpr int border = (ksize - 1) / 2;
+
+    const int length = width * chan;
+    const int shift = border * chan;
+
+    const float k[3][3] = {
+        { kernel[0], kernel[1], kernel[2] },
+        { kernel[3], kernel[4], kernel[5] },
+        { kernel[6], kernel[7], kernel[8] }
+    };
+
+    for (int l=0; l < length;)
+    {
+        static constexpr int nlanes = v_int16::nlanes;
+
+        // main part of output row
+        for (; l <= length - nlanes; l += nlanes)
+        {
+            auto sumx = [in, shift, &k](int i, int j)
             {
-                v_float32 sum0 = vx_load(&buf[r0][l])            * vx_setall_f32(ky0);
-                    sum0 = v_fma(vx_load(&buf[r1][l]),             vx_setall_f32(ky1), sum0);
-                    sum0 = v_fma(vx_load(&buf[r2][l]),             vx_setall_f32(ky2), sum0);
+                v_float32 s = vx_load_f32(&in[i][j - shift]) * vx_setall_f32(k[i][0]);
+                    s = v_fma(vx_load_f32(&in[i][j        ]),  vx_setall_f32(k[i][1]), s);
+                    s = v_fma(vx_load_f32(&in[i][j + shift]),  vx_setall_f32(k[i][2]), s);
+                return s;
+            };
+
+            int l0 = l;
+            int l1 = l + nlanes/2;
+            v_float32 sum0 = sumx(0, l0) + sumx(1, l0) + sumx(2, l0);
+            v_float32 sum1 = sumx(0, l1) + sumx(1, l1) + sumx(2, l1);
+
+            if (!noscale)
+            {
+                sum0 = v_fma(sum0, vx_setall_f32(scale), vx_setall_f32(delta));
+                sum1 = v_fma(sum1, vx_setall_f32(scale), vx_setall_f32(delta));
+            }
 
-                v_float32 sum1 = vx_load(&buf[r0][l + nlanes/2]) * vx_setall_f32(ky0);
-                    sum1 = v_fma(vx_load(&buf[r1][l + nlanes/2]),  vx_setall_f32(ky1), sum1);
-                    sum1 = v_fma(vx_load(&buf[r2][l + nlanes/2]),  vx_setall_f32(ky2), sum1);
+            v_int32 res0 = v_round(sum0);
+            v_int32 res1 = v_round(sum1);
 
-                if (!noscale)
+            if (std::is_same<DST, ushort>::value)
+            {
+                v_uint16 res = v_pack_u(res0, res1);
+                v_store(reinterpret_cast<ushort*>(&out[l]), res);
+            }
+            else // if DST == short
+            {
+                v_int16 res = v_pack(res0, res1);
+                v_store(reinterpret_cast<short*>(&out[l]), res);
+            }
+        }
+
+        // tail (if any)
+        if (l < length)
+        {
+            GAPI_DbgAssert(length >= nlanes);
+            l = length - nlanes;
+        }
+    }
+}
+
+template<bool noscale, typename SRC>
+static void run_filter2d_3x3_any2char(uchar out[], const SRC *in[], int width, int chan,
+                                      const float kernel[], float scale, float delta)
+{
+    static constexpr int ksize = 3;
+    static constexpr int border = (ksize - 1) / 2;
+
+    const int length = width * chan;
+    const int shift = border * chan;
+
+    const float k[3][3] = {
+        { kernel[0], kernel[1], kernel[2] },
+        { kernel[3], kernel[4], kernel[5] },
+        { kernel[6], kernel[7], kernel[8] }
+    };
+
+    for (int l=0; l < length;)
+    {
+        static constexpr int nlanes = v_uint8::nlanes;
+
+        // main part of output row
+        for (; l <= length - nlanes; l += nlanes)
+        {
+            auto sumx = [in, shift, &k](int i, int j)
+            {
+                v_float32 s = vx_load_f32(&in[i][j - shift]) * vx_setall_f32(k[i][0]);
+                    s = v_fma(vx_load_f32(&in[i][j        ]),  vx_setall_f32(k[i][1]), s);
+                    s = v_fma(vx_load_f32(&in[i][j + shift]),  vx_setall_f32(k[i][2]), s);
+                return s;
+            };
+
+            int l0 = l;
+            int l1 = l +   nlanes/4;
+            int l2 = l + 2*nlanes/4;
+            int l3 = l + 3*nlanes/4;
+            v_float32 sum0 = sumx(0, l0) + sumx(1, l0) + sumx(2, l0);
+            v_float32 sum1 = sumx(0, l1) + sumx(1, l1) + sumx(2, l1);
+            v_float32 sum2 = sumx(0, l2) + sumx(1, l2) + sumx(2, l2);
+            v_float32 sum3 = sumx(0, l3) + sumx(1, l3) + sumx(2, l3);
+
+            if (!noscale)
+            {
+                sum0 = v_fma(sum0, vx_setall_f32(scale), vx_setall_f32(delta));
+                sum1 = v_fma(sum1, vx_setall_f32(scale), vx_setall_f32(delta));
+                sum2 = v_fma(sum2, vx_setall_f32(scale), vx_setall_f32(delta));
+                sum3 = v_fma(sum3, vx_setall_f32(scale), vx_setall_f32(delta));
+            }
+
+            v_int32 res0 = v_round(sum0);
+            v_int32 res1 = v_round(sum1);
+            v_int32 res2 = v_round(sum2);
+            v_int32 res3 = v_round(sum3);
+
+            v_int16 resl = v_pack(res0, res1);
+            v_int16 resh = v_pack(res2, res3);
+            v_uint8 res = v_pack_u(resl, resh);
+
+            v_store(&out[l], res);
+        }
+
+        // tail (if any)
+        if (l < length)
+        {
+            GAPI_DbgAssert(length >= nlanes);
+            l = length - nlanes;
+        }
+    }
+}
+#endif
+
+template<bool noscale, typename DST, typename SRC>
+static void run_filter2d_3x3_code(DST out[], const SRC *in[], int width, int chan,
+                                  const float kernel[], float scale, float delta)
+{
+#if CV_SIMD
+    int length = width * chan;
+
+    // length variable may be unused if types do not match at 'if' statements below
+    (void) length;
+
+    if (std::is_same<DST, short>::value && length >= v_int16::nlanes)
+    {
+        run_filter2d_3x3_any2short<noscale>(reinterpret_cast<short*>(out), in,
+                                            width, chan, kernel, scale, delta);
+        return;
+    }
+
+    if (std::is_same<DST, ushort>::value && length >= v_uint16::nlanes)
+    {
+        run_filter2d_3x3_any2short<noscale>(reinterpret_cast<ushort*>(out), in,
+                                            width, chan, kernel, scale, delta);
+        return;
+    }
+
+
+    if (std::is_same<DST, uchar>::value && length >= v_uint8::nlanes)
+    {
+        run_filter2d_3x3_any2char<noscale>(reinterpret_cast<uchar*>(out), in,
+                                           width, chan, kernel, scale, delta);
+        return;
+    }
+#endif  // CV_SIMD
+
+    run_filter2d_3x3_reference<noscale>(out, in, width, chan, kernel, scale, delta);
+}
+
+#define RUN_FILTER2D_3X3_IMPL(DST, SRC)                                             \
+void run_filter2d_3x3_impl(DST out[], const SRC *in[], int width, int chan,         \
+                           const float kernel[], float scale, float delta)          \
+{                                                                                   \
+    if (scale == 1 && delta == 0)                                                   \
+    {                                                                               \
+        constexpr bool noscale = true;                                              \
+        run_filter2d_3x3_code<noscale>(out, in, width, chan, kernel, scale, delta); \
+    }                                                                               \
+    else                                                                            \
+    {                                                                               \
+        constexpr bool noscale = false;                                             \
+        run_filter2d_3x3_code<noscale>(out, in, width, chan, kernel, scale, delta); \
+    }                                                                               \
+}
+
+RUN_FILTER2D_3X3_IMPL(uchar , uchar )
+RUN_FILTER2D_3X3_IMPL(ushort, ushort)
+RUN_FILTER2D_3X3_IMPL( short,  short)
+RUN_FILTER2D_3X3_IMPL( float, uchar )
+RUN_FILTER2D_3X3_IMPL( float, ushort)
+RUN_FILTER2D_3X3_IMPL( float,  short)
+RUN_FILTER2D_3X3_IMPL( float,  float)
+
+#undef RUN_FILTER2D_3X3_IMPL
+
+//-----------------------------
+//
+// Fluid kernels: Erode, Dilate
+//
+//-----------------------------
+
+template<typename T>
+static void run_morphology3x3_reference(T out[], const T *in[], int width, int chan,
+                                        const uchar k[], MorphShape k_type,
+                                        Morphology morphology)
+{
+    constexpr int k_size = 3;
+    constexpr int border = (k_size - 1) / 2;
+
+    const uchar kernel[3][3] = {{k[0], k[1], k[2]}, {k[3], k[4], k[5]}, {k[6], k[7], k[8]}};
+
+    const int length = width * chan;
+    const int shift = border * chan;
+
+    if (M_ERODE == morphology)
+    {
+        if (M_FULL == k_type)
+        {
+            for (int l=0; l < length; l++)
+            {
+                T result = std::numeric_limits<T>::max();
+
+                result = (std::min)(result, in[0][l - shift]);
+                result = (std::min)(result, in[0][l        ]);
+                result = (std::min)(result, in[0][l + shift]);
+
+                result = (std::min)(result, in[1][l - shift]);
+                result = (std::min)(result, in[1][l        ]);
+                result = (std::min)(result, in[1][l + shift]);
+
+                result = (std::min)(result, in[2][l - shift]);
+                result = (std::min)(result, in[2][l        ]);
+                result = (std::min)(result, in[2][l + shift]);
+
+                out[l] = result;
+            }
+            return;
+        }
+
+        if (M_CROSS == k_type)
+        {
+            for (int l=0; l < length; l++)
+            {
+                T result = std::numeric_limits<T>::max();
+
+            //  result = (std::min)(result, in[0][l - shift]);
+                result = (std::min)(result, in[0][l        ]);
+            //  result = (std::min)(result, in[0][l + shift]);
+
+                result = (std::min)(result, in[1][l - shift]);
+                result = (std::min)(result, in[1][l        ]);
+                result = (std::min)(result, in[1][l + shift]);
+
+            //  result = (std::min)(result, in[2][l - shift]);
+                result = (std::min)(result, in[2][l        ]);
+            //  result = (std::min)(result, in[2][l + shift]);
+
+                out[l] = result;
+            }
+            return;
+        }
+
+        for (int l=0; l < length; l++)
+        {
+            T result = std::numeric_limits<T>::max();
+
+            result = kernel[0][0]? (std::min)(result, in[0][l - shift]): result;
+            result = kernel[0][1]? (std::min)(result, in[0][l        ]): result;
+            result = kernel[0][2]? (std::min)(result, in[0][l + shift]): result;
+
+            result = kernel[1][0]? (std::min)(result, in[1][l - shift]): result;
+            result = kernel[1][1]? (std::min)(result, in[1][l        ]): result;
+            result = kernel[1][2]? (std::min)(result, in[1][l + shift]): result;
+
+            result = kernel[2][0]? (std::min)(result, in[2][l - shift]): result;
+            result = kernel[2][1]? (std::min)(result, in[2][l        ]): result;
+            result = kernel[2][2]? (std::min)(result, in[2][l + shift]): result;
+
+            out[l] = result;
+        }
+        return;
+    }
+
+    if (M_DILATE == morphology)
+    {
+        if (M_FULL == k_type)
+        {
+            for (int l=0; l < length; l++)
+            {
+                T result = std::numeric_limits<T>::min();
+
+                result = (std::max)(result, in[0][l - shift]);
+                result = (std::max)(result, in[0][l        ]);
+                result = (std::max)(result, in[0][l + shift]);
+
+                result = (std::max)(result, in[1][l - shift]);
+                result = (std::max)(result, in[1][l        ]);
+                result = (std::max)(result, in[1][l + shift]);
+
+                result = (std::max)(result, in[2][l - shift]);
+                result = (std::max)(result, in[2][l        ]);
+                result = (std::max)(result, in[2][l + shift]);
+
+                out[l] = result;
+            }
+            return;
+        }
+
+        if (M_CROSS == k_type)
+        {
+            for (int l=0; l < length; l++)
+            {
+                T result = std::numeric_limits<T>::min();
+
+            //  result = (std::max)(result, in[0][l - shift]);
+                result = (std::max)(result, in[0][l        ]);
+            //  result = (std::max)(result, in[0][l + shift]);
+
+                result = (std::max)(result, in[1][l - shift]);
+                result = (std::max)(result, in[1][l        ]);
+                result = (std::max)(result, in[1][l + shift]);
+
+            //  result = (std::max)(result, in[2][l - shift]);
+                result = (std::max)(result, in[2][l        ]);
+            //  result = (std::max)(result, in[2][l + shift]);
+
+                out[l] = result;
+            }
+            return;
+        }
+
+        for (int l=0; l < length; l++)
+        {
+            T result = std::numeric_limits<T>::min();
+
+            result = kernel[0][0]? (std::max)(result, in[0][l - shift]): result;
+            result = kernel[0][1]? (std::max)(result, in[0][l        ]): result;
+            result = kernel[0][2]? (std::max)(result, in[0][l + shift]): result;
+
+            result = kernel[1][0]? (std::max)(result, in[1][l - shift]): result;
+            result = kernel[1][1]? (std::max)(result, in[1][l        ]): result;
+            result = kernel[1][2]? (std::max)(result, in[1][l + shift]): result;
+
+            result = kernel[2][0]? (std::max)(result, in[2][l - shift]): result;
+            result = kernel[2][1]? (std::max)(result, in[2][l        ]): result;
+            result = kernel[2][2]? (std::max)(result, in[2][l + shift]): result;
+
+            out[l] = result;
+        }
+        return;
+    }
+
+    CV_Error(cv::Error::StsBadArg, "unsupported morphology");
+}
+
+#if CV_SIMD
+template<typename T, typename VT, typename S>
+static void run_morphology3x3_simd(T out[], const T *in[], int width, int chan,
+                                   const uchar k[], MorphShape k_type,
+                                   Morphology morphology,
+                                   S setall)
+{
+    constexpr int k_size = 3;
+    constexpr int border = (k_size - 1) / 2;
+
+    const uchar kernel[3][3] = {{k[0], k[1], k[2]}, {k[3], k[4], k[5]}, {k[6], k[7], k[8]}};
+
+    const int length = width * chan;
+    const int shift = border * chan;
+
+    if (M_ERODE == morphology)
+    {
+        if (M_FULL == k_type)
+        {
+            for (int l=0; l < length;)
+            {
+                constexpr int nlanes = VT::nlanes;
+
+                // main part of output row
+                for (; l <= length - nlanes; l += nlanes)
                 {
-                    sum0 = v_fma(sum0, vx_setall_f32(scale), vx_setall_f32(delta));
-                    sum1 = v_fma(sum1, vx_setall_f32(scale), vx_setall_f32(delta));
+                    VT r = setall(std::numeric_limits<T>::max());
+
+                    r = v_min(r, vx_load(&in[0][l - shift]));
+                    r = v_min(r, vx_load(&in[0][l        ]));
+                    r = v_min(r, vx_load(&in[0][l + shift]));
+
+                    r = v_min(r, vx_load(&in[1][l - shift]));
+                    r = v_min(r, vx_load(&in[1][l        ]));
+                    r = v_min(r, vx_load(&in[1][l + shift]));
+
+                    r = v_min(r, vx_load(&in[2][l - shift]));
+                    r = v_min(r, vx_load(&in[2][l        ]));
+                    r = v_min(r, vx_load(&in[2][l + shift]));
+
+                    v_store(&out[l], r);
                 }
 
-                v_int32 isum0 = v_round(sum0),
-                        isum1 = v_round(sum1);
+                // tail (if any)
+                if (l < length)
+                {
+                    GAPI_DbgAssert(length >= nlanes);
+                    l = length - nlanes;
+                }
+            }
+            return;
+        }
+
+        if (M_CROSS == k_type)
+        {
+            for (int l=0; l < length;)
+            {
+                constexpr int nlanes = VT::nlanes;
 
-                if (std::is_same<DST, short>::value)
+                // main part of output row
+                for (; l <= length - nlanes; l += nlanes)
                 {
-                    // signed short
-                    v_int16 res = v_pack(isum0, isum1);
-                    v_store(reinterpret_cast<short*>(&out[l]), res);
-                } else
+                    VT r = setall(std::numeric_limits<T>::max());
+
+                //  r = v_min(r, vx_load(&in[0][l - shift]));
+                    r = v_min(r, vx_load(&in[0][l        ]));
+                //  r = v_min(r, vx_load(&in[0][l + shift]));
+
+                    r = v_min(r, vx_load(&in[1][l - shift]));
+                    r = v_min(r, vx_load(&in[1][l        ]));
+                    r = v_min(r, vx_load(&in[1][l + shift]));
+
+                //  r = v_min(r, vx_load(&in[2][l - shift]));
+                    r = v_min(r, vx_load(&in[2][l        ]));
+                //  r = v_min(r, vx_load(&in[2][l + shift]));
+
+                    v_store(&out[l], r);
+                }
+
+                // tail (if any)
+                if (l < length)
                 {
-                    // unsigned short
-                    v_uint16 res = v_pack_u(isum0, isum1);
-                    v_store(reinterpret_cast<ushort*>(&out[l]), res);
+                    GAPI_DbgAssert(length >= nlanes);
+                    l = length - nlanes;
                 }
             }
+            return;
+        }
+
+        for (int l=0; l < length;)
+        {
+            constexpr int nlanes = VT::nlanes;
+
+            // main part of output row
+            for (; l <= length - nlanes; l += nlanes)
+            {
+                VT r = setall(std::numeric_limits<T>::max());
+
+                if (kernel[0][0]) r = v_min(r, vx_load(&in[0][l - shift]));
+                if (kernel[0][1]) r = v_min(r, vx_load(&in[0][l        ]));
+                if (kernel[0][2]) r = v_min(r, vx_load(&in[0][l + shift]));
+
+                if (kernel[1][0]) r = v_min(r, vx_load(&in[1][l - shift]));
+                if (kernel[1][1]) r = v_min(r, vx_load(&in[1][l        ]));
+                if (kernel[1][2]) r = v_min(r, vx_load(&in[1][l + shift]));
+
+                if (kernel[2][0]) r = v_min(r, vx_load(&in[2][l - shift]));
+                if (kernel[2][1]) r = v_min(r, vx_load(&in[2][l        ]));
+                if (kernel[2][2]) r = v_min(r, vx_load(&in[2][l + shift]));
+
+                v_store(&out[l], r);
+            }
 
+            // tail (if any)
             if (l < length)
             {
-                // tail: recalculate last pixels
                 GAPI_DbgAssert(length >= nlanes);
                 l = length - nlanes;
             }
         }
-
         return;
     }
 
-    if (std::is_same<DST, uchar>::value && length >= v_uint8::nlanes)
+    if (M_DILATE == morphology)
     {
-        constexpr static int nlanes = v_uint8::nlanes;
+        if (M_FULL == k_type)
+        {
+            for (int l=0; l < length;)
+            {
+                constexpr int nlanes = VT::nlanes;
 
-        for (int l=0; l < length; )
+                // main part of output row
+                for (; l <= length - nlanes; l += nlanes)
+                {
+                    VT r = setall(std::numeric_limits<T>::min());
+
+                    r = v_max(r, vx_load(&in[0][l - shift]));
+                    r = v_max(r, vx_load(&in[0][l        ]));
+                    r = v_max(r, vx_load(&in[0][l + shift]));
+
+                    r = v_max(r, vx_load(&in[1][l - shift]));
+                    r = v_max(r, vx_load(&in[1][l        ]));
+                    r = v_max(r, vx_load(&in[1][l + shift]));
+
+                    r = v_max(r, vx_load(&in[2][l - shift]));
+                    r = v_max(r, vx_load(&in[2][l        ]));
+                    r = v_max(r, vx_load(&in[2][l + shift]));
+
+                    v_store(&out[l], r);
+                }
+
+                // tail (if any)
+                if (l < length)
+                {
+                    GAPI_DbgAssert(length >= nlanes);
+                    l = length - nlanes;
+                }
+            }
+            return;
+        }
+
+        if (M_CROSS == k_type)
         {
-            for (; l <= length - nlanes; l += nlanes)
+            for (int l=0; l < length;)
             {
-                v_float32 sum0 = vx_load(&buf[r0][l])              * vx_setall_f32(ky0);
-                    sum0 = v_fma(vx_load(&buf[r1][l]),               vx_setall_f32(ky1), sum0);
-                    sum0 = v_fma(vx_load(&buf[r2][l]),               vx_setall_f32(ky2), sum0);
+                constexpr int nlanes = VT::nlanes;
+
+                // main part of output row
+                for (; l <= length - nlanes; l += nlanes)
+                {
+                    VT r = setall(std::numeric_limits<T>::min());
+
+                //  r = v_max(r, vx_load(&in[0][l - shift]));
+                    r = v_max(r, vx_load(&in[0][l        ]));
+                //  r = v_max(r, vx_load(&in[0][l + shift]));
 
-                v_float32 sum1 = vx_load(&buf[r0][l +   nlanes/4]) * vx_setall_f32(ky0);
-                    sum1 = v_fma(vx_load(&buf[r1][l +   nlanes/4]),  vx_setall_f32(ky1), sum1);
-                    sum1 = v_fma(vx_load(&buf[r2][l +   nlanes/4]),  vx_setall_f32(ky2), sum1);
+                    r = v_max(r, vx_load(&in[1][l - shift]));
+                    r = v_max(r, vx_load(&in[1][l        ]));
+                    r = v_max(r, vx_load(&in[1][l + shift]));
 
-                v_float32 sum2 = vx_load(&buf[r0][l + 2*nlanes/4]) * vx_setall_f32(ky0);
-                    sum2 = v_fma(vx_load(&buf[r1][l + 2*nlanes/4]),  vx_setall_f32(ky1), sum2);
-                    sum2 = v_fma(vx_load(&buf[r2][l + 2*nlanes/4]),  vx_setall_f32(ky2), sum2);
+                //  r = v_max(r, vx_load(&in[2][l - shift]));
+                    r = v_max(r, vx_load(&in[2][l        ]));
+                //  r = v_max(r, vx_load(&in[2][l + shift]));
 
-                v_float32 sum3 = vx_load(&buf[r0][l + 3*nlanes/4]) * vx_setall_f32(ky0);
-                    sum3 = v_fma(vx_load(&buf[r1][l + 3*nlanes/4]),  vx_setall_f32(ky1), sum3);
-                    sum3 = v_fma(vx_load(&buf[r2][l + 3*nlanes/4]),  vx_setall_f32(ky2), sum3);
+                    v_store(&out[l], r);
+                }
 
-                if (!noscale)
+                // tail (if any)
+                if (l < length)
                 {
-                    sum0 = v_fma(sum0, vx_setall_f32(scale), vx_setall_f32(delta));
-                    sum1 = v_fma(sum1, vx_setall_f32(scale), vx_setall_f32(delta));
-                    sum2 = v_fma(sum2, vx_setall_f32(scale), vx_setall_f32(delta));
-                    sum3 = v_fma(sum3, vx_setall_f32(scale), vx_setall_f32(delta));
+                    GAPI_DbgAssert(length >= nlanes);
+                    l = length - nlanes;
                 }
+            }
+            return;
+        }
+
+        for (int l=0; l < length;)
+        {
+            constexpr int nlanes = VT::nlanes;
 
-                v_int32 isum0 = v_round(sum0),
-                        isum1 = v_round(sum1),
-                        isum2 = v_round(sum2),
-                        isum3 = v_round(sum3);
+            // main part of output row
+            for (; l <= length - nlanes; l += nlanes)
+            {
+                VT r = setall(std::numeric_limits<T>::min());
 
-                v_int16 ires0 = v_pack(isum0, isum1),
-                        ires1 = v_pack(isum2, isum3);
+                if (kernel[0][0]) r = v_max(r, vx_load(&in[0][l - shift]));
+                if (kernel[0][1]) r = v_max(r, vx_load(&in[0][l        ]));
+                if (kernel[0][2]) r = v_max(r, vx_load(&in[0][l + shift]));
 
-                v_uint8 res = v_pack_u(ires0, ires1);
-                v_store(reinterpret_cast<uchar*>(&out[l]), res);
+                if (kernel[1][0]) r = v_max(r, vx_load(&in[1][l - shift]));
+                if (kernel[1][1]) r = v_max(r, vx_load(&in[1][l        ]));
+                if (kernel[1][2]) r = v_max(r, vx_load(&in[1][l + shift]));
+
+                if (kernel[2][0]) r = v_max(r, vx_load(&in[2][l - shift]));
+                if (kernel[2][1]) r = v_max(r, vx_load(&in[2][l        ]));
+                if (kernel[2][2]) r = v_max(r, vx_load(&in[2][l + shift]));
+
+                v_store(&out[l], r);
             }
 
+            // tail (if any)
             if (l < length)
             {
-                // tail: recalculate last pixels
                 GAPI_DbgAssert(length >= nlanes);
                 l = length - nlanes;
             }
         }
-
         return;
     }
+
+    CV_Error(cv::Error::StsBadArg, "unsupported morphology");
+}
 #endif
 
-    // reference code
+template<typename T>
+static void run_morphology3x3_code(T out[], const T *in[], int width, int chan,
+                                   const uchar k[], MorphShape k_type,
+                                   Morphology morphology)
+{
+#if CV_SIMD
+    int length = width * chan;
+
+    // length variable may be unused if types do not match at 'if' statements below
+    (void) length;
+
+    if (std::is_same<T, float>::value && length >= v_float32::nlanes)
+    {
+        run_morphology3x3_simd<float, v_float32>(reinterpret_cast<float*>(out),
+                                                 reinterpret_cast<const float**>(in),
+                                                 width, chan, k, k_type, morphology,
+                                                 vx_setall_f32);
+        return;
+    }
+
+    if (std::is_same<T, short>::value && length >= v_int16::nlanes)
+    {
+        run_morphology3x3_simd<short, v_int16>(reinterpret_cast<short*>(out),
+                                               reinterpret_cast<const short**>(in),
+                                               width, chan, k, k_type, morphology,
+                                               vx_setall_s16);
+        return;
+    }
+
+    if (std::is_same<T, ushort>::value && length >= v_uint16::nlanes)
+    {
+        run_morphology3x3_simd<ushort, v_uint16>(reinterpret_cast<ushort*>(out),
+                                                 reinterpret_cast<const ushort**>(in),
+                                                 width, chan, k, k_type, morphology,
+                                                 vx_setall_u16);
+        return;
+    }
+
+    if (std::is_same<T, uchar>::value && length >= v_uint8::nlanes)
+    {
+        run_morphology3x3_simd<uchar, v_uint8>(reinterpret_cast<uchar*>(out),
+                                               reinterpret_cast<const uchar**>(in),
+                                               width, chan, k, k_type, morphology,
+                                               vx_setall_u8);
+        return;
+    }
+#endif  // CV_SIMD
+
+    run_morphology3x3_reference(out, in, width, chan, k, k_type, morphology);
+}
+
+#define RUN_MORPHOLOGY3X3_IMPL(T)                                        \
+void run_morphology3x3_impl(T out[], const T *in[], int width, int chan, \
+                            const uchar k[], MorphShape k_type,          \
+                            Morphology morphology)                       \
+{                                                                        \
+    run_morphology3x3_code(out, in, width, chan, k, k_type, morphology); \
+}
+
+RUN_MORPHOLOGY3X3_IMPL(uchar )
+RUN_MORPHOLOGY3X3_IMPL(ushort)
+RUN_MORPHOLOGY3X3_IMPL( short)
+RUN_MORPHOLOGY3X3_IMPL( float)
+
+#undef RUN_MORPHOLOGY3X3_IMPL
+
+//---------------------------
+//
+// Fluid kernels: Median blur
+//
+//---------------------------
+
+template<typename T>
+static void run_medblur3x3_reference(T out[], const T *in[], int width, int chan)
+{
+    constexpr int ksize = 3;
+    constexpr int border = (ksize - 1) / 2;
+
+    const int length = width * chan;
+    const int shift = border * chan;
+
     for (int l=0; l < length; l++)
     {
-        float sum = buf[r0][l]*ky0 + buf[r1][l]*ky1 + buf[r2][l]*ky2;
+        T t[3][3];
 
-        if (!noscale)
+        // neighbourhood 3x3
+        t[0][0] = in[0][l - shift];    t[0][1] = in[0][l];    t[0][2] = in[0][l + shift];
+        t[1][0] = in[1][l - shift];    t[1][1] = in[1][l];    t[1][2] = in[1][l + shift];
+        t[2][0] = in[2][l - shift];    t[2][1] = in[2][l];    t[2][2] = in[2][l + shift];
+
+        // sort 2 values
+        auto sort = [](T& a, T& b)
         {
-            sum = sum*scale + delta;
-        }
+            T u=a, v=b;
+            a = (std::min)(u, v);
+            b = (std::max)(u, v);
+        };
 
-        out[l] = cv::gapi::own::saturate<DST>(sum, rintf);
+        // horizontal: 3-elements bubble-sort per each row
+        sort(t[0][0], t[0][1]);    sort(t[0][1], t[0][2]);    sort(t[0][0], t[0][1]);
+        sort(t[1][0], t[1][1]);    sort(t[1][1], t[1][2]);    sort(t[1][0], t[1][1]);
+        sort(t[2][0], t[2][1]);    sort(t[2][1], t[2][2]);    sort(t[2][0], t[2][1]);
+
+        // vertical: columns bubble-sort (although partial)
+        sort(t[0][0], t[1][0]);    sort(t[0][1], t[1][1]);  /*sort(t[0][2], t[1][2]);*/
+        sort(t[1][0], t[2][0]);    sort(t[1][1], t[2][1]);    sort(t[1][2], t[2][2]);
+      /*sort(t[0][0], t[1][0]);*/  sort(t[0][1], t[1][1]);    sort(t[0][2], t[1][2]);
+
+        // diagonal: bubble-sort (in opposite order!)
+        sort(t[1][1], t[0][2]);    sort(t[2][0], t[1][1]);    sort(t[1][1], t[0][2]);
+
+        out[l] = t[1][1];
     }
 }
 
-template<typename DST, typename SRC>
-static void run_sobel_impl(DST out[], const SRC *in[], int width, int chan,
-                           const float kx[], const float ky[], int border,
-                           float scale, float delta, float *buf[],
-                           int y, int y0)
+#if CV_SIMD
+template<typename VT, typename T>
+static void run_medblur3x3_simd(T out[], const T *in[], int width, int chan)
 {
-    int r[3];
-    r[0] = (y - y0)     % 3;  // buf[r[0]]: previous
-    r[1] = (y - y0 + 1) % 3;  //            this
-    r[2] = (y - y0 + 2) % 3;  //            next row
+    constexpr int ksize = 3;
+    constexpr int border = (ksize - 1) / 2;
 
-    int length = width * chan;
+    const int length = width * chan;
+    const int shift = border * chan;
 
-    // horizontal pass
+    for (int l=0; l < length;)
+    {
+        constexpr int nlanes = VT::nlanes;
 
-    // full horizontal pass is needed only if very 1st row in ROI;
-    // for 2nd and further rows, it is enough to convolve only the
-    // "next" row - as we can reuse buffers from previous calls to
-    // this kernel (note that Fluid processes rows consequently)
-    int k0 = (y == y0)? 0: 2;
+        // main part of output row
+        for (; l <= length - nlanes; l += nlanes)
+        {
+            VT t00, t01, t02, t10, t11, t12, t20, t21, t22;
 
-    for (int k = k0; k < 3; k++)
-    {
-        //                             previous, this , next pixel
-        const SRC *s[3] = {in[k] - border*chan , in[k], in[k] + border*chan};
+            // neighbourhood 3x3
 
-        // rely on compiler vectoring
-        for (int l=0; l < length; l++)
+            t00 = vx_load(&in[0][l - shift]);
+            t01 = vx_load(&in[0][l        ]);
+            t02 = vx_load(&in[0][l + shift]);
+
+            t10 = vx_load(&in[1][l - shift]);
+            t11 = vx_load(&in[1][l        ]);
+            t12 = vx_load(&in[1][l + shift]);
+
+            t20 = vx_load(&in[2][l - shift]);
+            t21 = vx_load(&in[2][l        ]);
+            t22 = vx_load(&in[2][l + shift]);
+
+            // sort 2 values
+            auto sort = [](VT& a, VT& b)
+            {
+                VT u=a, v=b;
+                a = v_min(u, v);
+                b = v_max(u, v);
+            };
+
+            // horizontal: 3-elements bubble-sort per each row
+            sort(t00, t01);    sort(t01, t02);    sort(t00, t01);
+            sort(t10, t11);    sort(t11, t12);    sort(t10, t11);
+            sort(t20, t21);    sort(t21, t22);    sort(t20, t21);
+
+            // vertical: columns bubble-sort (although partial)
+            sort(t00, t10);    sort(t01, t11);  /*sort(t02, t12);*/
+            sort(t10, t20);    sort(t11, t21);    sort(t12, t22);
+          /*sort(t00, t10);*/  sort(t01, t11);    sort(t02, t12);
+
+            // diagonal: bubble-sort (in opposite order!)
+            sort(t11, t02);    sort(t20, t11);    sort(t11, t02);
+
+            v_store(&out[l], t11);
+        }
+
+        // tail (if any)
+        if (l < length)
         {
-            buf[r[k]][l] = s[0][l]*kx[0] + s[1][l]*kx[1] + s[2][l]*kx[2];
+            GAPI_DbgAssert(length >= nlanes);
+            l = length - nlanes;
         }
     }
+}
+#endif
 
-    // vertical pass
-    if (scale == 1 && delta == 0)
+template<typename T>
+static void run_medblur3x3_code(T out[], const T *in[], int width, int chan)
+{
+#if CV_SIMD
+    int length = width * chan;
+
+    // length variable may be unused if types do not match at 'if' statements below
+    (void) length;
+
+    if (std::is_same<T, float>::value && length >= v_float32::nlanes)
     {
-        constexpr static bool noscale = true;  // omit scaling
-        run_sobel3x3_vert<noscale, DST>(out, length, ky, scale, delta, r, buf);
-    } else
+        run_medblur3x3_simd<v_float32>(reinterpret_cast<float*>(out),
+                                       reinterpret_cast<const float**>(in),
+                                       width, chan);
+        return;
+    }
+
+    if (std::is_same<T, short>::value && length >= v_int16::nlanes)
     {
-        constexpr static bool noscale = false;  // do scaling
-        run_sobel3x3_vert<noscale, DST>(out, length, ky, scale, delta, r, buf);
+        run_medblur3x3_simd<v_int16>(reinterpret_cast<short*>(out),
+                                     reinterpret_cast<const short**>(in),
+                                     width, chan);
+        return;
+    }
+
+    if (std::is_same<T, ushort>::value && length >= v_uint16::nlanes)
+    {
+        run_medblur3x3_simd<v_uint16>(reinterpret_cast<ushort*>(out),
+                                      reinterpret_cast<const ushort**>(in),
+                                      width, chan);
+        return;
     }
+
+    if (std::is_same<T, uchar>::value && length >= v_uint8::nlanes)
+    {
+        run_medblur3x3_simd<v_uint8>(reinterpret_cast<uchar*>(out),
+                                     reinterpret_cast<const uchar**>(in),
+                                     width, chan);
+        return;
+    }
+#endif
+
+    run_medblur3x3_reference(out, in, width, chan);
 }
 
-#define RUN_SOBEL_ROW(DST, SRC)                                                    \
-void run_sobel_row(DST out[], const SRC *in[], int width, int chan,                \
-                   const float kx[], const float ky[], int border,                 \
-                   float scale, float delta, float *buf[],                         \
-                   int y, int y0)                                                  \
-{                                                                                  \
-    run_sobel_impl(out, in, width, chan, kx, ky, border, scale, delta, buf,y, y0); \
+#define RUN_MEDBLUR3X3_IMPL(T)                                        \
+void run_medblur3x3_impl(T out[], const T *in[], int width, int chan) \
+{                                                                     \
+    run_medblur3x3_code(out, in, width, chan);                        \
 }
 
-RUN_SOBEL_ROW(uchar , uchar )
-RUN_SOBEL_ROW(ushort, ushort)
-RUN_SOBEL_ROW( short, uchar )
-RUN_SOBEL_ROW( short, ushort)
-RUN_SOBEL_ROW( short,  short)
-RUN_SOBEL_ROW( float, uchar )
-RUN_SOBEL_ROW( float, ushort)
-RUN_SOBEL_ROW( float,  short)
-RUN_SOBEL_ROW( float,  float)
-
-#undef RUN_SOBEL_ROW
+RUN_MEDBLUR3X3_IMPL(uchar )
+RUN_MEDBLUR3X3_IMPL(ushort)
+RUN_MEDBLUR3X3_IMPL( short)
+RUN_MEDBLUR3X3_IMPL( float)
+
+#undef RUN_MEDBLUR3X3_IMPL
+
+//------------------------------------------------------------------------------
 
 #endif  // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
 
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidutils.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidutils.hpp
index a38b2f132..0a54f4ee5 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidutils.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/fluid/gfluidutils.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef GFLUIDUTILS_HPP
@@ -10,7 +10,7 @@
 
 #include <limits>
 #include <type_traits>
-#include <opencv2/gapi/util/compiler_hints.hpp> //UNUSED
+#include <opencv2/gapi/util/compiler_hints.hpp> //suppress_unused_warning
 #include <opencv2/gapi/own/saturate.hpp>
 
 namespace cv {
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpubackend.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpubackend.cpp
index eda6a5fd7..e2f4cd4b5 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpubackend.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpubackend.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpubackend.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpubackend.hpp
index 1fb128d5f..5ba2ff6f7 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpubackend.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpubackend.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GGPUBACKEND_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpucore.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpucore.cpp
index a1ee6a113..60367fee9 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpucore.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpucore.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpucore.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpucore.hpp
index 47cbfa6bd..c38d7f19e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpucore.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpucore.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GGPUCORE_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpuimgproc.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpuimgproc.cpp
index 9b7aca1a2..c90257be2 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpuimgproc.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpuimgproc.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpuimgproc.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpuimgproc.hpp
index cd2e324e6..29bd3fc9f 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpuimgproc.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpuimgproc.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GGPUIMGPROC_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpukernel.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpukernel.cpp
index 87e2aa97e..36f96dec4 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpukernel.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/gpu/ggpukernel.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include <cassert>
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclbackend.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclbackend.cpp
new file mode 100644
index 000000000..7fec9d160
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclbackend.cpp
@@ -0,0 +1,226 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#include "precomp.hpp"
+
+#include <functional>
+#include <unordered_set>
+
+#include <ade/util/algorithm.hpp>
+
+#include <ade/util/range.hpp>
+#include <ade/util/zip_range.hpp>
+#include <ade/util/chain_range.hpp>
+
+#include <ade/typed_graph.hpp>
+
+#include "opencv2/gapi/gcommon.hpp"
+#include "opencv2/gapi/util/any.hpp"
+#include "opencv2/gapi/gtype_traits.hpp"
+
+#include "compiler/gobjref.hpp"
+#include "compiler/gmodel.hpp"
+
+#include "backends/ocl/goclbackend.hpp"
+#include "backends/ocl/goclimgproc.hpp"
+#include "backends/ocl/goclcore.hpp"
+
+#include "api/gbackend_priv.hpp" // FIXME: Make it part of Backend SDK!
+
+// FIXME: Is there a way to take a typed graph (our GModel),
+// and create a new typed graph _ATOP_ of that (by extending with a couple of
+// new types?).
+// Alternatively, is there a way to compose types graphs?
+//
+// If not, we need to introduce that!
+using GOCLModel = ade::TypedGraph
+    < cv::gimpl::Unit
+    , cv::gimpl::Protocol
+    >;
+
+// FIXME: Same issue with Typed and ConstTyped
+using GConstGOCLModel = ade::ConstTypedGraph
+    < cv::gimpl::Unit
+    , cv::gimpl::Protocol
+    >;
+
+namespace
+{
+    class GOCLBackendImpl final: public cv::gapi::GBackend::Priv
+    {
+        virtual void unpackKernel(ade::Graph            &graph,
+                                  const ade::NodeHandle &op_node,
+                                  const cv::GKernelImpl &impl) override
+        {
+            GOCLModel gm(graph);
+            auto ocl_impl = cv::util::any_cast<cv::GOCLKernel>(impl.opaque);
+            gm.metadata(op_node).set(cv::gimpl::Unit{ocl_impl});
+        }
+
+        virtual EPtr compile(const ade::Graph &graph,
+                             const cv::GCompileArgs &,
+                             const std::vector<ade::NodeHandle> &nodes) const override
+        {
+            return EPtr{new cv::gimpl::GOCLExecutable(graph, nodes)};
+        }
+   };
+}
+
+cv::gapi::GBackend cv::gapi::ocl::backend()
+{
+    static cv::gapi::GBackend this_backend(std::make_shared<GOCLBackendImpl>());
+    return this_backend;
+}
+
+// GOCLExcecutable implementation //////////////////////////////////////////////
+cv::gimpl::GOCLExecutable::GOCLExecutable(const ade::Graph &g,
+                                          const std::vector<ade::NodeHandle> &nodes)
+    : m_g(g), m_gm(m_g)
+{
+    // Convert list of operations (which is topologically sorted already)
+    // into an execution script.
+    for (auto &nh : nodes)
+    {
+        switch (m_gm.metadata(nh).get<NodeType>().t)
+        {
+        case NodeType::OP: m_script.push_back({nh, GModel::collectOutputMeta(m_gm, nh)}); break;
+        case NodeType::DATA:
+        {
+            m_dataNodes.push_back(nh);
+            const auto &desc = m_gm.metadata(nh).get<Data>();
+            if (desc.storage == Data::Storage::CONST)
+            {
+                auto rc = RcDesc{desc.rc, desc.shape, desc.ctor};
+                magazine::bindInArg(m_res, rc, m_gm.metadata(nh).get<ConstValue>().arg);
+            }
+            //preallocate internal Mats in advance
+            if (desc.storage == Data::Storage::INTERNAL && desc.shape == GShape::GMAT)
+            {
+                const auto mat_desc = util::get<cv::GMatDesc>(desc.meta);
+                const auto type = CV_MAKETYPE(mat_desc.depth, mat_desc.chan);
+                m_res.slot<cv::UMat>()[desc.rc].create(mat_desc.size.height, mat_desc.size.width, type);
+            }
+            break;
+        }
+        default: util::throw_error(std::logic_error("Unsupported NodeType type"));
+        }
+    }
+}
+
+// FIXME: Document what it does
+cv::GArg cv::gimpl::GOCLExecutable::packArg(const GArg &arg)
+{
+    // No API placeholders allowed at this point
+    // FIXME: this check has to be done somewhere in compilation stage.
+    GAPI_Assert(   arg.kind != cv::detail::ArgKind::GMAT
+              && arg.kind != cv::detail::ArgKind::GSCALAR
+              && arg.kind != cv::detail::ArgKind::GARRAY);
+
+    if (arg.kind != cv::detail::ArgKind::GOBJREF)
+    {
+        // All other cases - pass as-is, with no transformations to GArg contents.
+        return arg;
+    }
+    GAPI_Assert(arg.kind == cv::detail::ArgKind::GOBJREF);
+
+    // Wrap associated CPU object (either host or an internal one)
+    // FIXME: object can be moved out!!! GExecutor faced that.
+    const cv::gimpl::RcDesc &ref = arg.get<cv::gimpl::RcDesc>();
+    switch (ref.shape)
+    {
+    case GShape::GMAT:    return GArg(m_res.slot<cv::UMat>()[ref.id]);
+    case GShape::GSCALAR: return GArg(m_res.slot<cv::gapi::own::Scalar>()[ref.id]);
+        // Note: .at() is intentional for GArray as object MUST be already there
+    //   (and constructed by either bindIn/Out or resetInternal)
+    case GShape::GARRAY:  return GArg(m_res.slot<cv::detail::VectorRef>().at(ref.id));
+    default:
+        util::throw_error(std::logic_error("Unsupported GShape type"));
+        break;
+    }
+}
+
+void cv::gimpl::GOCLExecutable::run(std::vector<InObj>  &&input_objs,
+                                    std::vector<OutObj> &&output_objs)
+{
+    // Update resources with run-time information - what this Island
+    // has received from user (or from another Island, or mix...)
+    // FIXME: Check input/output objects against GIsland protocol
+
+    for (auto& it : input_objs)   magazine::bindInArg (m_res, it.first, it.second, true);
+    for (auto& it : output_objs)  magazine::bindOutArg(m_res, it.first, it.second, true);
+
+    // Initialize (reset) internal data nodes with user structures
+    // before processing a frame (no need to do it for external data structures)
+    GModel::ConstGraph gm(m_g);
+    for (auto nh : m_dataNodes)
+    {
+        const auto &desc = gm.metadata(nh).get<Data>();
+
+        if (   desc.storage == Data::Storage::INTERNAL
+            && !util::holds_alternative<util::monostate>(desc.ctor))
+        {
+            // FIXME: Note that compile-time constant data objects (like
+            // a value-initialized GArray<T>) also satisfy this condition
+            // and should be excluded, but now we just don't support it
+            magazine::resetInternalData(m_res, desc);
+        }
+    }
+
+    // OpenCV backend execution is not a rocket science at all.
+    // Simply invoke our kernels in the proper order.
+    GConstGOCLModel gcm(m_g);
+    for (auto &op_info : m_script)
+    {
+        const auto &op = m_gm.metadata(op_info.nh).get<Op>();
+
+        // Obtain our real execution unit
+        // TODO: Should kernels be copyable?
+        GOCLKernel k = gcm.metadata(op_info.nh).get<Unit>().k;
+
+        // Initialize kernel's execution context:
+        // - Input parameters
+        GOCLContext context;
+        context.m_args.reserve(op.args.size());
+
+        using namespace std::placeholders;
+        ade::util::transform(op.args,
+                          std::back_inserter(context.m_args),
+                          std::bind(&GOCLExecutable::packArg, this, _1));
+
+        // - Output parameters.
+        // FIXME: pre-allocate internal Mats, etc, according to the known meta
+        for (const auto &out_it : ade::util::indexed(op.outs))
+        {
+            // FIXME: Can the same GArg type resolution mechanism be reused here?
+            const auto out_port  = ade::util::index(out_it);
+            const auto out_desc  = ade::util::value(out_it);
+            context.m_results[out_port] = magazine::getObjPtr(m_res, out_desc, true);
+        }
+
+        // Now trigger the executable unit
+        k.apply(context);
+
+        for (const auto &out_it : ade::util::indexed(op_info.expected_out_metas))
+        {
+            const auto out_index      = ade::util::index(out_it);
+            const auto expected_meta  = ade::util::value(out_it);
+            const auto out_meta       = descr_of(context.m_results[out_index]);
+
+            if (expected_meta != out_meta)
+            {
+                util::throw_error
+                    (std::logic_error
+                     ("Output meta doesn't "
+                      "coincide with the generated meta\n"
+                      "Expected: " + ade::util::to_string(expected_meta) + "\n"
+                      "Actual  : " + ade::util::to_string(out_meta)));
+            }
+        }
+    } // for(m_script)
+
+    for (auto &it : output_objs) magazine::writeBack(m_res, it.first, it.second, true);
+}
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclbackend.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclbackend.hpp
new file mode 100644
index 000000000..a86f3e6b4
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclbackend.hpp
@@ -0,0 +1,72 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GOCLBACKEND_HPP
+#define OPENCV_GAPI_GOCLBACKEND_HPP
+
+#include <map>                // map
+#include <unordered_map>      // unordered_map
+#include <tuple>              // tuple
+#include <ade/util/algorithm.hpp> // type_list_index
+
+#include "opencv2/gapi/garg.hpp"
+#include "opencv2/gapi/gproto.hpp"
+#include "opencv2/gapi/ocl/goclkernel.hpp"
+
+
+#include "api/gapi_priv.hpp"
+#include "backends/common/gbackend.hpp"
+#include "compiler/gislandmodel.hpp"
+
+namespace cv { namespace gimpl {
+
+struct Unit
+{
+    static const char *name() { return "OCLKernel"; }
+    GOCLKernel k;
+};
+
+class GOCLExecutable final: public GIslandExecutable
+{
+    const ade::Graph &m_g;
+    GModel::ConstGraph m_gm;
+
+    struct OperationInfo
+    {
+        ade::NodeHandle nh;
+        GMetaArgs expected_out_metas;
+    };
+
+    // Execution script, currently absolutely naive
+    std::vector<OperationInfo> m_script;
+    // List of all resources in graph (both internal and external)
+    std::vector<ade::NodeHandle> m_dataNodes;
+
+    // Actual data of all resources in graph (both internal and external)
+    Mag m_res;
+    GArg packArg(const GArg &arg);
+
+public:
+    GOCLExecutable(const ade::Graph                   &graph,
+                   const std::vector<ade::NodeHandle> &nodes);
+
+    virtual inline bool canReshape() const override { return false; }
+    virtual inline void reshape(ade::Graph&, const GCompileArgs&) override
+    {
+        // FIXME: OCL plugin is in fact reshapeable (as it was initially,
+        // even before outMeta() has been introduced), so this limitation
+        // should be dropped.
+        util::throw_error(std::logic_error("GOCLExecutable::reshape() should never be called"));
+    }
+
+    virtual void run(std::vector<InObj>  &&input_objs,
+                     std::vector<OutObj> &&output_objs) override;
+};
+
+}}
+
+#endif // OPENCV_GAPI_GOCLBACKEND_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclcore.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclcore.cpp
new file mode 100644
index 000000000..ba80ef325
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclcore.cpp
@@ -0,0 +1,582 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#include "precomp.hpp"
+
+#include "opencv2/gapi/core.hpp"
+#include "opencv2/gapi/ocl/core.hpp"
+#include "backends/ocl/goclcore.hpp"
+
+GAPI_OCL_KERNEL(GOCLAdd, cv::gapi::core::GAdd)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, int dtype, cv::UMat& out)
+    {
+        cv::add(a, b, out, cv::noArray(), dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLAddC, cv::gapi::core::GAddC)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, int dtype, cv::UMat& out)
+    {
+        cv::add(a, b, out, cv::noArray(), dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLSub, cv::gapi::core::GSub)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, int dtype, cv::UMat& out)
+    {
+        cv::subtract(a, b, out, cv::noArray(), dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLSubC, cv::gapi::core::GSubC)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, int dtype, cv::UMat& out)
+    {
+        cv::subtract(a, b, out, cv::noArray(), dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLSubRC, cv::gapi::core::GSubRC)
+{
+    static void run(const cv::Scalar& a, const cv::UMat& b, int dtype, cv::UMat& out)
+    {
+        cv::subtract(a, b, out, cv::noArray(), dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLMul, cv::gapi::core::GMul)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, double scale, int dtype, cv::UMat& out)
+    {
+        cv::multiply(a, b, out, scale, dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLMulCOld, cv::gapi::core::GMulCOld)
+{
+    static void run(const cv::UMat& a, double b, int dtype, cv::UMat& out)
+    {
+        cv::multiply(a, b, out, 1, dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLMulC, cv::gapi::core::GMulC)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, int dtype, cv::UMat& out)
+    {
+        cv::multiply(a, b, out, 1, dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLDiv, cv::gapi::core::GDiv)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, double scale, int dtype, cv::UMat& out)
+    {
+        cv::divide(a, b, out, scale, dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLDivC, cv::gapi::core::GDivC)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, double scale, int dtype, cv::UMat& out)
+    {
+        cv::divide(a, b, out, scale, dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLDivRC, cv::gapi::core::GDivRC)
+{
+    static void run(const cv::Scalar& a, const cv::UMat& b, double scale, int dtype, cv::UMat& out)
+    {
+        cv::divide(a, b, out, scale, dtype);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLMask, cv::gapi::core::GMask)
+{
+    static void run(const cv::UMat& in, const cv::UMat& mask, cv::UMat& out)
+    {
+        out = cv::UMat::zeros(in.size(), in.type());
+        in.copyTo(out, mask);
+    }
+};
+
+
+GAPI_OCL_KERNEL(GOCLMean, cv::gapi::core::GMean)
+{
+    static void run(const cv::UMat& in, cv::Scalar& out)
+    {
+        out = cv::mean(in);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLPolarToCart, cv::gapi::core::GPolarToCart)
+{
+    static void run(const cv::UMat& magn, const cv::UMat& angle, bool angleInDegrees, cv::UMat& outx, cv::UMat& outy)
+    {
+        cv::polarToCart(magn, angle, outx, outy, angleInDegrees);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCartToPolar, cv::gapi::core::GCartToPolar)
+{
+    static void run(const cv::UMat& x, const cv::UMat& y, bool angleInDegrees, cv::UMat& outmagn, cv::UMat& outangle)
+    {
+        cv::cartToPolar(x, y, outmagn, outangle, angleInDegrees);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpGT, cv::gapi::core::GCmpGT)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_GT);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpGE, cv::gapi::core::GCmpGE)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_GE);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpLE, cv::gapi::core::GCmpLE)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_LE);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpLT, cv::gapi::core::GCmpLT)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_LT);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpEQ, cv::gapi::core::GCmpEQ)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_EQ);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpNE, cv::gapi::core::GCmpNE)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_NE);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpGTScalar, cv::gapi::core::GCmpGTScalar)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_GT);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpGEScalar, cv::gapi::core::GCmpGEScalar)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_GE);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpLEScalar, cv::gapi::core::GCmpLEScalar)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_LE);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpLTScalar, cv::gapi::core::GCmpLTScalar)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_LT);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpEQScalar, cv::gapi::core::GCmpEQScalar)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_EQ);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCmpNEScalar, cv::gapi::core::GCmpNEScalar)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, cv::UMat& out)
+    {
+        cv::compare(a, b, out, cv::CMP_NE);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLAnd, cv::gapi::core::GAnd)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, cv::UMat& out)
+    {
+        cv::bitwise_and(a, b, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLAndS, cv::gapi::core::GAndS)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, cv::UMat& out)
+    {
+        cv::bitwise_and(a, b, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLOr, cv::gapi::core::GOr)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, cv::UMat& out)
+    {
+        cv::bitwise_or(a, b, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLOrS, cv::gapi::core::GOrS)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, cv::UMat& out)
+    {
+        cv::bitwise_or(a, b, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLXor, cv::gapi::core::GXor)
+{
+    static void run(const cv::UMat& a, const cv::UMat& b, cv::UMat& out)
+    {
+        cv::bitwise_xor(a, b, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLXorS, cv::gapi::core::GXorS)
+{
+    static void run(const cv::UMat& a, const cv::Scalar& b, cv::UMat& out)
+    {
+        cv::bitwise_xor(a, b, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLNot, cv::gapi::core::GNot)
+{
+    static void run(const cv::UMat& a, cv::UMat& out)
+    {
+        cv::bitwise_not(a, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLSelect, cv::gapi::core::GSelect)
+{
+    static void run(const cv::UMat& src1, const cv::UMat& src2, const cv::UMat& mask, cv::UMat& out)
+    {
+        src2.copyTo(out);
+        src1.copyTo(out, mask);
+    }
+};
+
+////TODO: doesn't compiled with UMat
+//GAPI_OCL_KERNEL(GOCLMin, cv::gapi::core::GMin)
+//{
+//    static void run(const cv::UMat& in1, const cv::UMat& in2, cv::UMat& out)
+//    {
+//        out = cv::min(in1, in2);
+//    }
+//};
+//
+////TODO: doesn't compiled with UMat
+//GAPI_OCL_KERNEL(GOCLMax, cv::gapi::core::GMax)
+//{
+//    static void run(const cv::UMat& in1, const cv::UMat& in2, cv::UMat& out)
+//    {
+//        out = cv::max(in1, in2);
+//    }
+//};
+
+
+GAPI_OCL_KERNEL(GOCLAbsDiff, cv::gapi::core::GAbsDiff)
+{
+    static void run(const cv::UMat& in1, const cv::UMat& in2, cv::UMat& out)
+    {
+        cv::absdiff(in1, in2, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLAbsDiffC, cv::gapi::core::GAbsDiffC)
+{
+    static void run(const cv::UMat& in1, const cv::Scalar& in2, cv::UMat& out)
+    {
+        cv::absdiff(in1, in2, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLSum, cv::gapi::core::GSum)
+{
+    static void run(const cv::UMat& in, cv::Scalar& out)
+    {
+        out = cv::sum(in);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLAddW, cv::gapi::core::GAddW)
+{
+    static void run(const cv::UMat& in1, double alpha, const cv::UMat& in2, double beta, double gamma, int dtype, cv::UMat& out)
+    {
+        cv::addWeighted(in1, alpha, in2, beta, gamma, out, dtype);
+    }
+};
+
+
+GAPI_OCL_KERNEL(GOCLNormL1, cv::gapi::core::GNormL1)
+{
+    static void run(const cv::UMat& in, cv::Scalar& out)
+    {
+        out = cv::norm(in, cv::NORM_L1);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLNormL2, cv::gapi::core::GNormL2)
+{
+    static void run(const cv::UMat& in, cv::Scalar& out)
+    {
+        out = cv::norm(in, cv::NORM_L2);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLNormInf, cv::gapi::core::GNormInf)
+{
+    static void run(const cv::UMat& in, cv::Scalar& out)
+    {
+        out = cv::norm(in, cv::NORM_INF);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLIntegral, cv::gapi::core::GIntegral)
+{
+    static void run(const cv::UMat& in, int sdepth, int sqdepth, cv::UMat& out, cv::UMat& outSq)
+    {
+        cv::integral(in, out, outSq, sdepth, sqdepth);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLThreshold, cv::gapi::core::GThreshold)
+{
+    static void run(const cv::UMat& in, const cv::Scalar& a, const cv::Scalar& b, int type, cv::UMat& out)
+    {
+        cv::threshold(in, out, a.val[0], b.val[0], type);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLThresholdOT, cv::gapi::core::GThresholdOT)
+{
+    static void run(const cv::UMat& in, const cv::Scalar& b, int type, cv::UMat& out, cv::Scalar& outScalar)
+    {
+        outScalar = cv::threshold(in, out, b.val[0], b.val[0], type);
+    }
+};
+
+
+GAPI_OCL_KERNEL(GOCLInRange, cv::gapi::core::GInRange)
+{
+    static void run(const cv::UMat& in, const cv::Scalar& low, const cv::Scalar& up, cv::UMat& out)
+    {
+        cv::inRange(in, low, up, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLSplit3, cv::gapi::core::GSplit3)
+{
+    static void run(const cv::UMat& in, cv::UMat &m1, cv::UMat &m2, cv::UMat &m3)
+    {
+        std::vector<cv::UMat> outMats = {m1, m2, m3};
+        cv::split(in, outMats);
+
+        // Write back FIXME: Write a helper or avoid this nonsence completely!
+        m1 = outMats[0];
+        m2 = outMats[1];
+        m3 = outMats[2];
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLSplit4, cv::gapi::core::GSplit4)
+{
+    static void run(const cv::UMat& in, cv::UMat &m1, cv::UMat &m2, cv::UMat &m3, cv::UMat &m4)
+    {
+        std::vector<cv::UMat> outMats = {m1, m2, m3, m4};
+        cv::split(in, outMats);
+
+        // Write back FIXME: Write a helper or avoid this nonsence completely!
+        m1 = outMats[0];
+        m2 = outMats[1];
+        m3 = outMats[2];
+        m4 = outMats[3];
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLMerge3, cv::gapi::core::GMerge3)
+{
+    static void run(const cv::UMat& in1, const cv::UMat& in2, const cv::UMat& in3, cv::UMat &out)
+    {
+        std::vector<cv::UMat> inMats = {in1, in2, in3};
+        cv::merge(inMats, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLMerge4, cv::gapi::core::GMerge4)
+{
+    static void run(const cv::UMat& in1, const cv::UMat& in2, const cv::UMat& in3, const cv::UMat& in4, cv::UMat &out)
+    {
+        std::vector<cv::UMat> inMats = {in1, in2, in3, in4};
+        cv::merge(inMats, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLResize, cv::gapi::core::GResize)
+{
+    static void run(const cv::UMat& in, cv::Size sz, double fx, double fy, int interp, cv::UMat &out)
+    {
+        cv::resize(in, out, sz, fx, fy, interp);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLRemap, cv::gapi::core::GRemap)
+{
+    static void run(const cv::UMat& in, const cv::Mat& x, const cv::Mat& y, int a, int b, cv::Scalar s, cv::UMat& out)
+    {
+        cv::remap(in, out, x, y, a, b, s);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLFlip, cv::gapi::core::GFlip)
+{
+    static void run(const cv::UMat& in, int code, cv::UMat& out)
+    {
+        cv::flip(in, out, code);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCrop, cv::gapi::core::GCrop)
+{
+    static void run(const cv::UMat& in, cv::Rect rect, cv::UMat& out)
+    {
+        cv::UMat(in, rect).copyTo(out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLConcatHor, cv::gapi::core::GConcatHor)
+{
+    static void run(const cv::UMat& in1, const cv::UMat& in2, cv::UMat& out)
+    {
+        cv::hconcat(in1, in2, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLConcatVert, cv::gapi::core::GConcatVert)
+{
+    static void run(const cv::UMat& in1, const cv::UMat& in2, cv::UMat& out)
+    {
+        cv::vconcat(in1, in2, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLLUT, cv::gapi::core::GLUT)
+{
+    static void run(const cv::UMat& in, const cv::Mat& lut, cv::UMat& out)
+    {
+        cv::LUT(in, lut, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLConvertTo, cv::gapi::core::GConvertTo)
+{
+    static void run(const cv::UMat& in, int rtype, double alpha, double beta, cv::UMat& out)
+    {
+        in.convertTo(out, rtype, alpha, beta);
+    }
+};
+
+cv::gapi::GKernelPackage cv::gapi::core::ocl::kernels()
+{
+    static auto pkg = cv::gapi::kernels
+        <  GOCLAdd
+         , GOCLAddC
+         , GOCLSub
+         , GOCLSubC
+         , GOCLSubRC
+         , GOCLMul
+         , GOCLMulC
+         , GOCLMulCOld
+         , GOCLDiv
+         , GOCLDivC
+         , GOCLDivRC
+         , GOCLMean
+         , GOCLMask
+         , GOCLPolarToCart
+         , GOCLCartToPolar
+         , GOCLCmpGT
+         , GOCLCmpGE
+         , GOCLCmpLE
+         , GOCLCmpLT
+         , GOCLCmpEQ
+         , GOCLCmpNE
+         , GOCLCmpGTScalar
+         , GOCLCmpGEScalar
+         , GOCLCmpLEScalar
+         , GOCLCmpLTScalar
+         , GOCLCmpEQScalar
+         , GOCLCmpNEScalar
+         , GOCLAnd
+         , GOCLAndS
+         , GOCLOr
+         , GOCLOrS
+         , GOCLXor
+         , GOCLXorS
+         , GOCLNot
+         , GOCLSelect
+         //, GOCLMin
+         //, GOCLMax
+         , GOCLAbsDiff
+         , GOCLAbsDiffC
+         , GOCLSum
+         , GOCLAddW
+         , GOCLNormL1
+         , GOCLNormL2
+         , GOCLNormInf
+         , GOCLIntegral
+         , GOCLThreshold
+         , GOCLThresholdOT
+         , GOCLInRange
+         , GOCLSplit3
+         , GOCLSplit4
+         , GOCLResize
+         , GOCLMerge3
+         , GOCLMerge4
+         , GOCLRemap
+         , GOCLFlip
+         , GOCLCrop
+         , GOCLConcatHor
+         , GOCLConcatVert
+         , GOCLLUT
+         , GOCLConvertTo
+         >();
+    return pkg;
+}
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclcore.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclcore.hpp
new file mode 100644
index 000000000..a36695bd0
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclcore.hpp
@@ -0,0 +1,24 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GOCLCORE_HPP
+#define OPENCV_GAPI_GOCLCORE_HPP
+
+#include <map>
+#include <string>
+
+#include "opencv2/gapi/ocl/goclkernel.hpp"
+
+namespace cv { namespace gimpl {
+
+// NB: This is what a "Kernel Package" from the original Wiki doc should be.
+void loadOCLCore(std::map<std::string, cv::GOCLKernel> &kmap);
+
+}
+}
+
+#endif // OPENCV_GAPI_GOCLCORE_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclimgproc.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclimgproc.cpp
new file mode 100644
index 000000000..860ebf46b
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclimgproc.cpp
@@ -0,0 +1,277 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#include "precomp.hpp"
+
+#include "opencv2/gapi/imgproc.hpp"
+#include "opencv2/gapi/ocl/imgproc.hpp"
+#include "backends/ocl/goclimgproc.hpp"
+
+
+GAPI_OCL_KERNEL(GOCLSepFilter, cv::gapi::imgproc::GSepFilter)
+{
+    static void run(const cv::UMat& in, int ddepth, const cv::Mat& kernX, const cv::Mat& kernY, const cv::Point& anchor, const cv::Scalar& delta,
+                    int border, const cv::Scalar& bordVal, cv::UMat &out)
+    {
+        if( border == cv::BORDER_CONSTANT )
+        {
+            cv::UMat temp_in;
+            int width_add = (kernY.cols - 1) / 2;
+            int height_add =  (kernX.rows - 1) / 2;
+            cv::copyMakeBorder(in, temp_in, height_add, height_add, width_add, width_add, border, bordVal);
+            cv::Rect rect = cv::Rect(height_add, width_add, in.cols, in.rows);
+            cv::sepFilter2D(temp_in(rect), out, ddepth, kernX, kernY, anchor, delta.val[0], border);
+        }
+        else
+            cv::sepFilter2D(in, out, ddepth, kernX, kernY, anchor, delta.val[0], border);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLBoxFilter, cv::gapi::imgproc::GBoxFilter)
+{
+    static void run(const cv::UMat& in, int ddepth, const cv::Size& ksize, const cv::Point& anchor, bool normalize, int borderType, const cv::Scalar& bordVal, cv::UMat &out)
+    {
+        if( borderType == cv::BORDER_CONSTANT )
+        {
+            cv::UMat temp_in;
+            int width_add = (ksize.width - 1) / 2;
+            int height_add =  (ksize.height - 1) / 2;
+            cv::copyMakeBorder(in, temp_in, height_add, height_add, width_add, width_add, borderType, bordVal);
+            cv::Rect rect = cv::Rect(height_add, width_add, in.cols, in.rows);
+            cv::boxFilter(temp_in(rect), out, ddepth, ksize, anchor, normalize, borderType);
+        }
+        else
+            cv::boxFilter(in, out, ddepth, ksize, anchor, normalize, borderType);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLBlur, cv::gapi::imgproc::GBlur)
+{
+    static void run(const cv::UMat& in, const cv::Size& ksize, const cv::Point& anchor, int borderType, const cv::Scalar& bordVal, cv::UMat &out)
+    {
+        if( borderType == cv::BORDER_CONSTANT )
+        {
+            cv::UMat temp_in;
+            int width_add = (ksize.width - 1) / 2;
+            int height_add =  (ksize.height - 1) / 2;
+            cv::copyMakeBorder(in, temp_in, height_add, height_add, width_add, width_add, borderType, bordVal);
+            cv::Rect rect = cv::Rect(height_add, width_add, in.cols, in.rows);
+            cv::blur(temp_in(rect), out, ksize, anchor, borderType);
+        }
+        else
+            cv::blur(in, out, ksize, anchor, borderType);
+    }
+};
+
+
+GAPI_OCL_KERNEL(GOCLFilter2D, cv::gapi::imgproc::GFilter2D)
+{
+    static void run(const cv::UMat& in, int ddepth, const cv::Mat& k, const cv::Point& anchor, const cv::Scalar& delta, int border,
+                    const cv::Scalar& bordVal, cv::UMat &out)
+    {
+        if( border == cv::BORDER_CONSTANT )
+        {
+            cv::UMat temp_in;
+            int width_add = (k.cols - 1) / 2;
+            int height_add =  (k.rows - 1) / 2;
+            cv::copyMakeBorder(in, temp_in, height_add, height_add, width_add, width_add, border, bordVal );
+            cv::Rect rect = cv::Rect(height_add, width_add, in.cols, in.rows);
+            cv::filter2D(temp_in(rect), out, ddepth, k, anchor, delta.val[0], border);
+        }
+        else
+            cv::filter2D(in, out, ddepth, k, anchor, delta.val[0], border);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLGaussBlur, cv::gapi::imgproc::GGaussBlur)
+{
+    static void run(const cv::UMat& in, const cv::Size& ksize, double sigmaX, double sigmaY, int borderType, const cv::Scalar& bordVal, cv::UMat &out)
+    {
+        if( borderType == cv::BORDER_CONSTANT )
+        {
+            cv::UMat temp_in;
+            int width_add = (ksize.width - 1) / 2;
+            int height_add =  (ksize.height - 1) / 2;
+            cv::copyMakeBorder(in, temp_in, height_add, height_add, width_add, width_add, borderType, bordVal );
+            cv::Rect rect = cv::Rect(height_add, width_add, in.cols, in.rows);
+            cv::GaussianBlur(temp_in(rect), out, ksize, sigmaX, sigmaY, borderType);
+        }
+        else
+            cv::GaussianBlur(in, out, ksize, sigmaX, sigmaY, borderType);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLMedianBlur, cv::gapi::imgproc::GMedianBlur)
+{
+    static void run(const cv::UMat& in, int ksize, cv::UMat &out)
+    {
+        cv::medianBlur(in, out, ksize);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLErode, cv::gapi::imgproc::GErode)
+{
+    static void run(const cv::UMat& in, const cv::Mat& kernel, const cv::Point& anchor, int iterations, int borderType, const cv::Scalar& borderValue, cv::UMat &out)
+    {
+        cv::erode(in, out, kernel, anchor, iterations, borderType, borderValue);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLDilate, cv::gapi::imgproc::GDilate)
+{
+    static void run(const cv::UMat& in, const cv::Mat& kernel, const cv::Point& anchor, int iterations, int borderType, const cv::Scalar& borderValue, cv::UMat &out)
+    {
+        cv::dilate(in, out, kernel, anchor, iterations, borderType, borderValue);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLSobel, cv::gapi::imgproc::GSobel)
+{
+    static void run(const cv::UMat& in, int ddepth, int dx, int dy, int ksize, double scale, double delta, int borderType,
+                    const cv::Scalar& bordVal, cv::UMat &out)
+    {
+        if( borderType == cv::BORDER_CONSTANT )
+        {
+            cv::UMat temp_in;
+            int add = (ksize - 1) / 2;
+            cv::copyMakeBorder(in, temp_in, add, add, add, add, borderType, bordVal );
+            cv::Rect rect = cv::Rect(add, add, in.cols, in.rows);
+            cv::Sobel(temp_in(rect), out, ddepth, dx, dy, ksize, scale, delta, borderType);
+        }
+        else
+        cv::Sobel(in, out, ddepth, dx, dy, ksize, scale, delta, borderType);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLEqualizeHist, cv::gapi::imgproc::GEqHist)
+{
+    static void run(const cv::UMat& in, cv::UMat &out)
+    {
+        cv::equalizeHist(in, out);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLCanny, cv::gapi::imgproc::GCanny)
+{
+    static void run(const cv::UMat& in, double thr1, double thr2, int apSize, bool l2gradient, cv::UMat &out)
+    {
+        cv::Canny(in, out, thr1, thr2, apSize, l2gradient);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLRGB2YUV, cv::gapi::imgproc::GRGB2YUV)
+{
+    static void run(const cv::UMat& in, cv::UMat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_RGB2YUV);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLYUV2RGB, cv::gapi::imgproc::GYUV2RGB)
+{
+    static void run(const cv::UMat& in, cv::UMat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_YUV2RGB);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLRGB2Lab, cv::gapi::imgproc::GRGB2Lab)
+{
+    static void run(const cv::UMat& in, cv::UMat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_RGB2Lab);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLBGR2LUV, cv::gapi::imgproc::GBGR2LUV)
+{
+    static void run(const cv::UMat& in, cv::UMat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_BGR2Luv);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLBGR2YUV, cv::gapi::imgproc::GBGR2YUV)
+{
+    static void run(const cv::UMat& in, cv::UMat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_BGR2YUV);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLLUV2BGR, cv::gapi::imgproc::GLUV2BGR)
+{
+    static void run(const cv::UMat& in, cv::UMat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_Luv2BGR);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLYUV2BGR, cv::gapi::imgproc::GYUV2BGR)
+{
+    static void run(const cv::UMat& in, cv::UMat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_YUV2BGR);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLRGB2Gray, cv::gapi::imgproc::GRGB2Gray)
+{
+    static void run(const cv::UMat& in, cv::UMat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_RGB2GRAY);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLBGR2Gray, cv::gapi::imgproc::GBGR2Gray)
+{
+    static void run(const cv::UMat& in, cv::UMat &out)
+    {
+        cv::cvtColor(in, out, cv::COLOR_BGR2GRAY);
+    }
+};
+
+GAPI_OCL_KERNEL(GOCLRGB2GrayCustom, cv::gapi::imgproc::GRGB2GrayCustom)
+{
+    //TODO: avoid copy
+    static void run(const cv::UMat& in, float rY, float bY, float gY, cv::UMat &out)
+    {
+        cv::Mat planes[3];
+        cv::split(in.getMat(cv::ACCESS_READ), planes);
+        cv::Mat tmp_out = (planes[0]*rY + planes[1]*bY + planes[2]*gY);
+        tmp_out.copyTo(out);
+    }
+};
+
+
+cv::gapi::GKernelPackage cv::gapi::imgproc::ocl::kernels()
+{
+    static auto pkg = cv::gapi::kernels
+        < GOCLFilter2D
+        , GOCLSepFilter
+        , GOCLBoxFilter
+        , GOCLBlur
+        , GOCLGaussBlur
+        , GOCLMedianBlur
+        , GOCLErode
+        , GOCLDilate
+        , GOCLSobel
+        , GOCLCanny
+        , GOCLEqualizeHist
+        , GOCLRGB2YUV
+        , GOCLYUV2RGB
+        , GOCLRGB2Lab
+        , GOCLBGR2LUV
+        , GOCLBGR2YUV
+        , GOCLYUV2BGR
+        , GOCLLUV2BGR
+        , GOCLBGR2Gray
+        , GOCLRGB2Gray
+        , GOCLRGB2GrayCustom
+        >();
+    return pkg;
+}
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclimgproc.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclimgproc.hpp
new file mode 100644
index 000000000..fc8bb9b2c
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclimgproc.hpp
@@ -0,0 +1,23 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#ifndef OPENCV_GAPI_GOCLIMGPROC_HPP
+#define OPENCV_GAPI_GOCLIMGPROC_HPP
+
+#include <map>
+#include <string>
+
+#include "opencv2/gapi/ocl/goclkernel.hpp"
+
+namespace cv { namespace gimpl {
+
+// NB: This is what a "Kernel Package" from the origianl Wiki doc should be.
+void loadOCLImgProc(std::map<std::string, cv::GOCLKernel> &kmap);
+
+}}
+
+#endif // OPENCV_GAPI_GOCLIMGPROC_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclkernel.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclkernel.cpp
new file mode 100644
index 000000000..2ae2e335f
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/backends/ocl/goclkernel.cpp
@@ -0,0 +1,50 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#include <cassert>
+
+#include "opencv2/gapi/ocl/goclkernel.hpp"
+
+const cv::UMat& cv::GOCLContext::inMat(int input)
+{
+    return (inArg<cv::UMat>(input));
+}
+
+cv::UMat& cv::GOCLContext::outMatR(int output)
+{
+    return (*(util::get<cv::UMat*>(m_results.at(output))));
+}
+
+const cv::gapi::own::Scalar& cv::GOCLContext::inVal(int input)
+{
+    return inArg<cv::gapi::own::Scalar>(input);
+}
+
+cv::gapi::own::Scalar& cv::GOCLContext::outValR(int output)
+{
+    return *util::get<cv::gapi::own::Scalar*>(m_results.at(output));
+}
+
+cv::detail::VectorRef& cv::GOCLContext::outVecRef(int output)
+{
+    return util::get<cv::detail::VectorRef>(m_results.at(output));
+}
+
+cv::GOCLKernel::GOCLKernel()
+{
+}
+
+cv::GOCLKernel::GOCLKernel(const GOCLKernel::F &f)
+    : m_f(f)
+{
+}
+
+void cv::GOCLKernel::apply(GOCLContext &ctx)
+{
+    CV_Assert(m_f);
+    m_f(ctx);
+}
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiled.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiled.cpp
index 876575d94..e0a6030b8 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiled.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiled.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiled_priv.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiled_priv.hpp
index e616b2bb7..82258c712 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiled_priv.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiled_priv.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCOMPILED_PRIV_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiler.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiler.cpp
index 32ce8e38f..1a4eb9cc4 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiler.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiler.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
@@ -104,11 +104,13 @@ cv::gimpl::GCompiler::GCompiler(const cv::GComputation &c,
 
     // Remove GCompoundBackend to avoid calling setupBackend() with it in the list
     m_all_kernels.remove(cv::gapi::compound::backend());
-    m_e.addPass("init", "resolve_kernels", std::bind(passes::resolveKernels, _1,
+
+    m_e.addPassStage("kernels");
+    m_e.addPass("kernels", "resolve_kernels", std::bind(passes::resolveKernels, _1,
                                                      std::ref(m_all_kernels), // NB: and not copied here
                                                      lookup_order));
+    m_e.addPass("kernels", "check_islands_content", passes::checkIslandsContent);
 
-    m_e.addPass("init", "check_islands_content", passes::checkIslandsContent);
     m_e.addPassStage("meta");
     m_e.addPass("meta", "initialize",   std::bind(passes::initMeta, _1, std::ref(m_metas)));
     m_e.addPass("meta", "propagate",    std::bind(passes::inferMeta, _1, false));
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiler.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiler.hpp
index b369c14d1..db40284ce 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiler.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gcompiler.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GCOMPILER_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gislandmodel.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gislandmodel.cpp
index 8e20302a3..2d554b1aa 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gislandmodel.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gislandmodel.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gislandmodel.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gislandmodel.hpp
index 03b42ff38..8cb247ddd 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gislandmodel.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gislandmodel.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GISLANDMODEL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodel.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodel.cpp
index 4b2455219..b21ab9fd6 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodel.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodel.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
@@ -26,7 +26,7 @@ ade::NodeHandle GModel::mkOpNode(GModel::Graph &g, const GKernel &k, const std::
     ade::NodeHandle op_h = g.createNode();
     g.metadata(op_h).set(NodeType{NodeType::OP});
     //These extra empty {} are to please GCC (-Wmissing-field-initializers)
-    g.metadata(op_h).set(Op{k, args, {}, {}, {}});
+    g.metadata(op_h).set(Op{k, args, {}, {}});
     if (!island.empty())
         g.metadata(op_h).set(Island{island});
     return op_h;
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodel.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodel.hpp
index 003519b82..5d4646108 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodel.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodel.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GMODEL_HPP
@@ -61,7 +61,6 @@ struct Op
     std::vector<RcDesc> outs; // TODO: Introduce a new type for resource references
 
     cv::gapi::GBackend  backend;
-    util::any           opaque;
 };
 
 struct Data
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodelbuilder.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodelbuilder.cpp
index c9b2fbbdf..e90b83127 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodelbuilder.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodelbuilder.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -128,7 +128,7 @@ cv::gimpl::Unrolled cv::gimpl::unrollExpr(const GProtoArgs &ins,
                 // then add its operands to stack to continue recursion.
                 ops.visit(&node.priv(), node);
 
-                const cv::GCall         call   = origin.node.call();
+                const cv::GCall&        call   = origin.node.call();
                 const cv::GCall::Priv&  call_p = call.priv();
 
                 // Put the outputs object description of the node
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodelbuilder.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodelbuilder.hpp
index ce12c7e11..41851084d 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodelbuilder.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gmodelbuilder.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GMODEL_BUILDER_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gobjref.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gobjref.hpp
index be365c90e..9191f7e99 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gobjref.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/gobjref.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GMATREF_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/dump_dot.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/dump_dot.cpp
index 8741089ba..32a30eda7 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/dump_dot.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/dump_dot.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/exec.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/exec.cpp
index 7119e3411..f6ca64ea1 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/exec.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/exec.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/helpers.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/helpers.cpp
index 60bf36afd..160d6c20c 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/helpers.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/helpers.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/helpers.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/helpers.hpp
index 3aa18e627..429292392 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/helpers.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/helpers.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_COMPILER_PASSES_HELPERS_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/islands.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/islands.cpp
index 942f738bd..8fc0b92f7 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/islands.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/islands.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/kernels.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/kernels.cpp
index 2703149e7..1fe2ab32d 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/kernels.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/kernels.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/meta.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/meta.cpp
index 528d84ce8..2a98e6cb1 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/meta.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/meta.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/passes.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/passes.hpp
index 14f6acdc0..ef086fcd7 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/passes.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/passes/passes.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_COMPILER_PASSES_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/transactions.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/transactions.hpp
index 54af8a6e6..ccb0a3215 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/transactions.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/compiler/transactions.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_COMPILER_TRANSACTIONS_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/executor/gexecutor.cpp b/inference-engine/thirdparty/fluid/modules/gapi/src/executor/gexecutor.cpp
index f117c0633..8b0af2a53 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/executor/gexecutor.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/executor/gexecutor.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/executor/gexecutor.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/executor/gexecutor.hpp
index e4128ba77..6e9be9bd8 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/executor/gexecutor.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/executor/gexecutor.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_GEXECUTOR_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/logger.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/logger.hpp
index ff4c7591b..1a8f24d6e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/logger.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/logger.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef __OPENCV_GAPI_LOGGER_HPP__
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/src/precomp.hpp b/inference-engine/thirdparty/fluid/modules/gapi/src/precomp.hpp
index eebe9d896..df59ed6c7 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/src/precomp.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/src/precomp.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef __OPENCV_GAPI_PRECOMP_HPP__
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_compoundkernel_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_compoundkernel_tests.cpp
index 1f5de7a92..731bc87c9 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_compoundkernel_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_compoundkernel_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 // FIXME: move out from Common
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests.cpp
index eb7761248..083da7d4b 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests.hpp
index 77a82dfd2..7268132e6 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_CORE_TESTS_HPP
@@ -124,9 +124,9 @@ struct MinTest           : public TestParams<std::tuple<int,cv::Size,bool, cv::G
 struct MaxTest           : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>>{};
 struct AbsDiffTest       : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>>{};
 struct AbsDiffCTest      : public TestParams<std::tuple<int,cv::Size,bool, cv::GCompileArgs>> {};
-struct SumTest           : public TestParams<std::tuple<int, cv::Size,bool,double,cv::GCompileArgs>> {};
-struct AddWeightedTest   : public TestParams<std::tuple<int,cv::Size,int,bool,double,cv::GCompileArgs>>{};
-struct NormTest          : public TestParams<std::tuple<NormTypes,int,cv::Size, double, cv::GCompileArgs>>{};
+struct SumTest           : public TestParams<std::tuple<int, cv::Size,bool, compare_scalar_f, cv::GCompileArgs>> {};
+struct AddWeightedTest   : public TestParams<std::tuple<int,cv::Size,int,bool, compare_f,cv::GCompileArgs>>{};
+struct NormTest          : public TestParams<std::tuple<NormTypes,int,cv::Size, compare_scalar_f, cv::GCompileArgs>>{};
 struct IntegralTest      : public TestWithParam<std::tuple<int,cv::Size, cv::GCompileArgs>> {};
 struct ThresholdTest     : public TestParams<std::tuple<int,cv::Size,int,bool, cv::GCompileArgs>> {};
 struct ThresholdOTTest   : public TestParams<std::tuple<int,cv::Size,int,bool, cv::GCompileArgs>> {};
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests_inl.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests_inl.hpp
index d33b5cc63..ca4190bf6 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests_inl.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_core_tests_inl.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_CORE_TESTS_INL_HPP
@@ -681,11 +681,11 @@ TEST_P(AbsDiffCTest, AccuracyTest)
 TEST_P(SumTest, AccuracyTest)
 {
     auto param = GetParam();
+    compare_scalar_f cmpF = get<3>(GetParam());
+    MatType type = std::get<0>(param);
     cv::Size sz_in = std::get<1>(param);
-    auto tolerance = std::get<3>(param);
     auto compile_args = std::get<4>(param);
-    //initMatrixRandU(std::get<0>(param), sz_in, std::get<2>(param));
-    initMatsRandN(std::get<0>(param), sz_in, std::get<2>(param)); //TODO: workaround trying to fix SumTest failures
+    initMatrixRandU(type, sz_in, type, std::get<2>(param));
 
 
     cv::Scalar out_sum;
@@ -703,8 +703,7 @@ TEST_P(SumTest, AccuracyTest)
     }
     // Comparison //////////////////////////////////////////////////////////////
     {
-        EXPECT_LE(std::abs(out_sum[0] - out_sum_ocv[0]) / std::max(1.0, std::abs(out_sum_ocv[0])), tolerance)
-            << "OCV=" << out_sum_ocv[0] << "   GAPI=" << out_sum[0];
+        EXPECT_TRUE(cmpF(out_sum, out_sum_ocv));
     }
 }
 
@@ -714,8 +713,8 @@ TEST_P(AddWeightedTest, AccuracyTest)
     cv::Size sz_in;
     bool initOut = false;
     cv::GCompileArgs compile_args;
-    double tolerance = 0.0;
-    std::tie(type, sz_in, dtype, initOut, tolerance, compile_args) = GetParam();
+    compare_f cmpF;
+    std::tie(type, sz_in, dtype, initOut, cmpF, compile_args) = GetParam();
 
     auto& rng = cv::theRNG();
     double alpha = rng.uniform(0.0, 1.0);
@@ -735,53 +734,19 @@ TEST_P(AddWeightedTest, AccuracyTest)
         cv::addWeighted(in_mat1, alpha, in_mat2, beta, gamma, out_mat_ocv, dtype);
     }
     // Comparison //////////////////////////////////////////////////////////////
-    {
-        // Note, that we cannot expect bitwise results for add-weighted:
-        //
-        //    tmp = src1*alpha + src2*beta + gamma;
-        //    dst = saturate<DST>( round(tmp) );
-        //
-        // Because tmp is floating-point, dst depends on compiler optimizations
-        //
-        // However, we must expect good accuracy of tmp, and rounding correctly
-
-        cv::Mat failures;
-
-        if (out_mat_ocv.type() == CV_32FC1)
-        {
-            // result: float - may vary in 7th decimal digit
-            failures = abs(out_mat_gapi - out_mat_ocv) > abs(out_mat_ocv) * 1e-6;
-        }
-        else
-        {
-            // result: integral - rounding may vary if fractional part of tmp
-            //                    is nearly 0.5
-
-            cv::Mat inexact, incorrect, diff, tmp;
-
-            inexact = out_mat_gapi != out_mat_ocv;
-
-            // even if rounded differently, check if still rounded correctly
-            cv::addWeighted(in_mat1, alpha, in_mat2, beta, gamma, tmp, CV_32F);
-            cv::subtract(out_mat_gapi, tmp, diff, cv::noArray(), CV_32F);
-            incorrect = abs(diff) >= tolerance;// 0.5000005f; // relative to 6 digits
-
-            failures = inexact & incorrect;
-        }
+    EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+    EXPECT_EQ(out_mat_gapi.size(), sz_in);
 
-        EXPECT_EQ(0, cv::countNonZero(failures));
-        EXPECT_EQ(out_mat_gapi.size(), sz_in);
-    }
 }
 
 TEST_P(NormTest, AccuracyTest)
 {
+    compare_scalar_f cmpF;
     NormTypes opType = NORM_INF;
     int type = 0;
     cv::Size sz;
-    double tolerance = 0.0;
     cv::GCompileArgs compile_args;
-    std::tie(opType, type, sz, tolerance, compile_args) = GetParam();
+    std::tie(opType, type, sz, cmpF, compile_args) = GetParam();
     initMatrixRandU(type, sz, type, false);
 
     cv::Scalar out_norm;
@@ -803,8 +768,7 @@ TEST_P(NormTest, AccuracyTest)
 
     // Comparison //////////////////////////////////////////////////////////////
     {
-        EXPECT_LE(std::abs(out_norm[0] - out_norm_ocv[0]) / std::max(1.0, std::abs(out_norm_ocv[0])), tolerance)
-            << "OCV=" << out_norm_ocv[0] << "   GAPI=" << out_norm[0];
+        EXPECT_TRUE(cmpF(out_norm, out_norm_ocv));
     }
 }
 
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests.cpp
index b7c027908..fcd5882c1 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests.hpp
index c21b26b68..94860bc0e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_IMGPROC_TESTS_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests_inl.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests_inl.hpp
index 3de428922..f13c2b1a7 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests_inl.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_imgproc_tests_inl.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_IMGPROC_TESTS_INL_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests.cpp
index 1f6f0ce20..db6dd18bf 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests.hpp
index 9f53d3685..1730eab09 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_OPERATOR_TESTS_COMMON_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests_inl.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests_inl.hpp
index 7ec702ae9..fa9a269ac 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests_inl.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_operators_tests_inl.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef OPENCV_GAPI_OPERATOR_TESTS_INL_COMMON_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_tests_common.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_tests_common.hpp
index be0fc3c7e..f226fbbb7 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_tests_common.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/common/gapi_tests_common.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include <iostream>
@@ -115,6 +115,9 @@ class TestPerfParams: public TestFunctional, public perf::TestBaseWithParam<T>{}
 
 using compare_f = std::function<bool(const cv::Mat &a, const cv::Mat &b)>;
 
+using compare_scalar_f = std::function<bool(const cv::Scalar &a, const cv::Scalar &b)>;
+
+
 template<typename T>
 struct Wrappable
 {
@@ -128,6 +131,20 @@ struct Wrappable
     }
 };
 
+template<typename T>
+struct WrappableScalar
+{
+    compare_scalar_f to_compare_f()
+    {
+        T t = *static_cast<T*const>(this);
+        return [t](const cv::Scalar &a, const cv::Scalar &b)
+        {
+            return t(a, b);
+        };
+    }
+};
+
+
 class AbsExact : public Wrappable<AbsExact>
 {
 public:
@@ -285,6 +302,28 @@ private:
     double _tol;
     double _inf_tol;
 };
+
+class AbsToleranceScalar : public WrappableScalar<AbsToleranceScalar>
+{
+public:
+    AbsToleranceScalar(double tol) : _tol(tol) {}
+    bool operator() (const cv::Scalar& in1, const cv::Scalar& in2) const
+    {
+        double abs_err = std::abs(in1[0] - in2[0]) / std::max(1.0, std::abs(in2[0]));
+        if (abs_err > _tol)
+        {
+            std::cout << "AbsToleranceScalar error: abs_err=" << abs_err << "  tolerance=" << _tol << " in1[0]" << in1[0] << " in2[0]" << in2[0] << std::endl;;
+            return false;
+        }
+        else
+        {
+            return true;
+        }
+    }
+private:
+    double _tol;
+};
+
 } // namespace opencv_test
 
 namespace
@@ -294,3 +333,11 @@ namespace
         return os << "compare_f";
     }
 }
+
+namespace
+{
+    inline std::ostream& operator<<(std::ostream& os, const opencv_test::compare_scalar_f&)
+    {
+        return os << "compare_scalar_f";
+    }
+}
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp
index 11e78bd99..52289dbc9 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_core_tests_cpu.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
@@ -203,7 +203,8 @@ INSTANTIATE_TEST_CASE_P(SumTestCPU, SumTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
 /*init output matrices or not*/ testing::Bool(),
-                                Values(1e-5),
+                                //Values(1e-5),
+                                Values(AbsToleranceScalar(1e-5).to_compare_f()),
                                 Values(cv::compile_args(CORE_CPU))));
 
 INSTANTIATE_TEST_CASE_P(AbsDiffTestCPU, AbsDiffTest,
@@ -222,15 +223,14 @@ INSTANTIATE_TEST_CASE_P(AbsDiffCTestCPU, AbsDiffCTest,
 /*init output matrices or not*/ testing::Bool(),
                                 Values(cv::compile_args(CORE_CPU))));
 
-// FIXME: Comparison introduced by YL doesn't work with C3
 INSTANTIATE_TEST_CASE_P(AddWeightedTestCPU, AddWeightedTest,
-                        Combine(Values( CV_8UC1/*, CV_8UC3*/, CV_16UC1, CV_16SC1, CV_32FC1 ),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
 /*init output matrices or not*/ testing::Bool(),
-                                Values(0.5000005),
+                                Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
                                 Values(cv::compile_args(CORE_CPU))));
 
 INSTANTIATE_TEST_CASE_P(NormTestCPU, NormTest,
@@ -239,7 +239,8 @@ INSTANTIATE_TEST_CASE_P(NormTestCPU, NormTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(1e-5),
+                                //Values(1e-5),
+                                Values(AbsToleranceScalar(1e-5).to_compare_f()),
                                 Values(cv::compile_args(CORE_CPU))),
                         opencv_test::PrintNormCoreParams());
 
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_core_tests_fluid.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_core_tests_fluid.cpp
index c65052b36..ea8b0701e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_core_tests_fluid.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_core_tests_fluid.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
@@ -121,7 +121,8 @@ INSTANTIATE_TEST_CASE_P(AddWeightedTestFluid, AddWeightedTest,
                                        cv::Size(128, 128)),
                                 Values(-1, CV_8U, CV_32F),
                                 testing::Bool(),
-                                Values(0.5000005),
+                                Values(Tolerance_FloatRel_IntAbs(1e-5, 2).to_compare_f()),
+                                //Values(0.5000005),
                                 Values(cv::compile_args(CORE_FLUID))));
 
 INSTANTIATE_TEST_CASE_P(LUTTestFluid, LUTTest,
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_imgproc_tests_cpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_imgproc_tests_cpu.cpp
index beda02240..43d3dc927 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_imgproc_tests_cpu.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_imgproc_tests_cpu.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_imgproc_tests_fluid.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_imgproc_tests_fluid.cpp
index 5dca2092a..41e67250e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_imgproc_tests_fluid.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_imgproc_tests_fluid.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_operators_tests_cpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_operators_tests_cpu.cpp
index 435c798c6..6d5fb66bd 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_operators_tests_cpu.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_operators_tests_cpu.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_operators_tests_fluid.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_operators_tests_fluid.cpp
index 4179fa53b..a6f807384 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_operators_tests_fluid.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/cpu/gapi_operators_tests_fluid.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_array_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_array_tests.cpp
index e5765624c..1fa858486 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_array_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_array_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_basic_hetero_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_basic_hetero_tests.cpp
index 62069d865..38e9b1e6f 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_basic_hetero_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_basic_hetero_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_desc_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_desc_tests.cpp
index 711211da2..3b1584495 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_desc_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_desc_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
@@ -40,6 +40,43 @@ TEST(GAPI_MetaDesc, MatDesc)
     EXPECT_EQ(480,     desc2.size.height);
 }
 
+TEST(GAPI_MetaDesc, VecMatDesc)
+{
+    std::vector<cv::Mat> vec1 = {
+    cv::Mat(240, 320, CV_8U)};
+
+    const auto desc1 = cv::descr_of(vec1);
+    EXPECT_EQ((GMatDesc{CV_8U, 1, {320, 240}}), get<GMatDesc>(desc1[0]));
+
+    std::vector<cv::UMat> vec2 = {
+    cv::UMat(480, 640, CV_8UC3)};
+
+    const auto desc2 = cv::descr_of(vec2);
+    EXPECT_EQ((GMatDesc{CV_8U, 3, {640, 480}}), get<GMatDesc>(desc2[0]));
+}
+
+TEST(GAPI_MetaDesc, VecOwnMatDesc)
+{
+    std::vector<cv::gapi::own::Mat> vec = {
+    cv::gapi::own::Mat(240, 320, CV_8U, nullptr),
+    cv::gapi::own::Mat(480, 640, CV_8UC3, nullptr)};
+
+    const auto desc = cv::gapi::own::descr_of(vec);
+    EXPECT_EQ((GMatDesc{CV_8U, 1, {320, 240}}), get<GMatDesc>(desc[0]));
+    EXPECT_EQ((GMatDesc{CV_8U, 3, {640, 480}}), get<GMatDesc>(desc[1]));
+}
+
+TEST(GAPI_MetaDesc, AdlVecOwnMatDesc)
+{
+    std::vector<cv::gapi::own::Mat> vec = {
+    cv::gapi::own::Mat(240, 320, CV_8U, nullptr),
+    cv::gapi::own::Mat(480, 640, CV_8UC3, nullptr)};
+
+    const auto desc = descr_of(vec);
+    EXPECT_EQ((GMatDesc{CV_8U, 1, {320, 240}}), get<GMatDesc>(desc[0]));
+    EXPECT_EQ((GMatDesc{CV_8U, 3, {640, 480}}), get<GMatDesc>(desc[1]));
+}
+
 TEST(GAPI_MetaDesc, Compare_Equal_MatDesc)
 {
     const auto desc1 = cv::GMatDesc{CV_8U, 1, {64, 64}};
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_resize_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_resize_test.cpp
index bc0b991e6..9640536f9 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_resize_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_resize_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_roi_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_roi_test.cpp
index ee8674ede..74ddd7b1a 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_roi_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_roi_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test.cpp
index 5b3501175..f7dac09cb 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test_kernels.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test_kernels.cpp
index 6bd06fe27..cc106ef54 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test_kernels.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test_kernels.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 #include "test_precomp.hpp"
 
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test_kernels.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test_kernels.hpp
index f5d83edf5..8082916fa 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test_kernels.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_fluid_test_kernels.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #ifndef GAPI_FLUID_TEST_KERNELS_HPP
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gcompiled_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gcompiled_tests.cpp
index e482e2e36..8cd4b0b1b 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gcompiled_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gcompiled_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gcomputation_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gcomputation_tests.cpp
index 070cea692..ffbb05e0a 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gcomputation_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gcomputation_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gpu_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gpu_test.cpp
new file mode 100644
index 000000000..771786977
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_gpu_test.cpp
@@ -0,0 +1,207 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+
+#include "test_precomp.hpp"
+
+
+#include "logger.hpp"
+#include "common/gapi_tests_common.hpp"
+#include "opencv2/gapi/gpu/ggpukernel.hpp"
+#include "opencl_kernels_test_gapi.hpp"
+
+
+namespace cv
+{
+
+#ifdef HAVE_OPENCL
+
+    static void reference_symm7x7_CPU(const cv::Mat& in, const cv::Mat& kernel_coeff, int shift, cv::Mat &out)
+    {
+        cv::Point anchor = { -1, -1 };
+        double delta = 0;
+
+        const int* ci = kernel_coeff.ptr<int>();
+
+        float c_float[10];
+        float divisor = (float)(1 << shift);
+        for (int i = 0; i < 10; i++)
+        {
+            c_float[i] = ci[i] / divisor;
+        }
+        // J & I & H & G & H & I & J
+        // I & F & E & D & E & F & I
+        // H & E & C & B & C & E & H
+        // G & D & B & A & B & D & G
+        // H & E & C & B & C & E & H
+        // I & F & E & D & E & F & I
+        // J & I & H & G & H & I & J
+
+        // A & B & C & D & E & F & G & H & I & J
+
+        // 9 & 8 & 7 & 6 & 7 & 8 & 9
+        // 8 & 5 & 4 & 3 & 4 & 5 & 8
+        // 7 & 4 & 2 & 1 & 2 & 4 & 7
+        // 6 & 3 & 1 & 0 & 1 & 3 & 6
+        // 7 & 4 & 2 & 1 & 2 & 4 & 7
+        // 8 & 5 & 4 & 3 & 4 & 5 & 8
+        // 9 & 8 & 7 & 6 & 7 & 8 & 9
+
+        float coefficients[49] =
+        {
+            c_float[9], c_float[8], c_float[7], c_float[6], c_float[7], c_float[8], c_float[9],
+            c_float[8], c_float[5], c_float[4], c_float[3], c_float[4], c_float[5], c_float[8],
+            c_float[7], c_float[4], c_float[2], c_float[1], c_float[2], c_float[4], c_float[7],
+            c_float[6], c_float[3], c_float[1], c_float[0], c_float[1], c_float[3], c_float[6],
+            c_float[7], c_float[4], c_float[2], c_float[1], c_float[2], c_float[4], c_float[7],
+            c_float[8], c_float[5], c_float[4], c_float[3], c_float[4], c_float[5], c_float[8],
+            c_float[9], c_float[8], c_float[7], c_float[6], c_float[7], c_float[8], c_float[9]
+        };
+
+        cv::Mat kernel = cv::Mat(7, 7, CV_32FC1);
+        float* cf = kernel.ptr<float>();
+        for (int i = 0; i < 49; i++)
+        {
+            cf[i] = coefficients[i];
+        }
+
+        cv::filter2D(in, out, CV_8UC1, kernel, anchor, delta, cv::BORDER_REPLICATE);
+    }
+
+    namespace gapi_test_kernels
+    {
+        G_TYPED_KERNEL(TSymm7x7_test, <GMat(GMat, Mat, int)>, "org.opencv.imgproc.symm7x7_test") {
+            static GMatDesc outMeta(GMatDesc in, Mat, int) {
+                return in.withType(CV_8U, 1);
+            }
+        };
+
+
+        GAPI_GPU_KERNEL(GGPUSymm7x7_test, TSymm7x7_test)
+        {
+            static void run(const cv::UMat& in, const cv::Mat& kernel_coeff, int shift, cv::UMat &out)
+            {
+                if (cv::ocl::isOpenCLActivated())
+                {
+                    cv::Size size = in.size();
+                    size_t globalsize[2] = { (size_t)size.width, (size_t)size.height };
+
+                    const cv::String moduleName = "gapi";
+                    cv::ocl::ProgramSource source(moduleName, "symm7x7", opencl_symm7x7_src, "");
+
+                    static const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_UNDEFINED" };
+                    std::string build_options = " -D BORDER_CONSTANT_VALUE=" + std::to_string(0) +
+                        " -D " + borderMap[1] +
+                        " -D SCALE=1.f/" + std::to_string(1 << shift) + ".f";
+
+                    cv::String errmsg;
+                    cv::ocl::Program program(source, build_options, errmsg);
+                    if (program.ptr() == NULL)
+                    {
+                        CV_Error_(cv::Error::OpenCLInitError, ("symm_7x7_test Can't compile OpenCL program: = %s with build_options = %s\n", errmsg.c_str(), build_options.c_str()));
+                    }
+                    if (!errmsg.empty())
+                    {
+                        std::cout << "OpenCL program build log:" << std::endl << errmsg << std::endl;
+                    }
+
+                    cv::ocl::Kernel kernel("symm_7x7_test", program);
+                    if (kernel.empty())
+                    {
+                        CV_Error(cv::Error::OpenCLInitError, "symm_7x7_test Can't get OpenCL kernel\n");
+                    }
+
+                    cv::UMat gKer;
+                    kernel_coeff.copyTo(gKer);
+
+                    int tile_y = 0;
+
+                    int idxArg = kernel.set(0, cv::ocl::KernelArg::PtrReadOnly(in));
+                    idxArg = kernel.set(idxArg, (int)in.step);
+                    idxArg = kernel.set(idxArg, (int)size.width);
+                    idxArg = kernel.set(idxArg, (int)size.height);
+                    idxArg = kernel.set(idxArg, cv::ocl::KernelArg::PtrWriteOnly(out));
+                    idxArg = kernel.set(idxArg, (int)out.step);
+                    idxArg = kernel.set(idxArg, (int)size.height);
+                    idxArg = kernel.set(idxArg, (int)size.width);
+                    idxArg = kernel.set(idxArg, (int)tile_y);
+                    idxArg = kernel.set(idxArg, cv::ocl::KernelArg::PtrReadOnly(gKer));
+
+                    if (!kernel.run(2, globalsize, NULL, false))
+                    {
+                        CV_Error(cv::Error::OpenCLApiCallError, "symm_7x7_test OpenCL kernel run failed\n");
+                    }
+                }
+                else
+                {
+                    //CPU fallback
+                    cv::Mat in_Mat, out_Mat;
+                    in_Mat = in.getMat(ACCESS_READ);
+                    out_Mat = out.getMat(ACCESS_WRITE);
+                    reference_symm7x7_CPU(in_Mat, kernel_coeff, shift, out_Mat);
+                }
+            }
+        };
+
+        cv::gapi::GKernelPackage gpuTestPackage = cv::gapi::kernels
+            <GGPUSymm7x7_test
+            >();
+
+    } // namespace gapi_test_kernels
+#endif //HAVE_OPENCL
+
+} // namespace cv
+
+
+namespace opencv_test
+{
+
+#ifdef HAVE_OPENCL
+
+using namespace cv::gapi_test_kernels;
+
+TEST(GPU, Symm7x7_test)
+{
+    const auto sz = cv::Size(1280, 720);
+    cv::Mat in_mat = cv::Mat::eye(sz, CV_8UC1);
+    cv::Mat out_mat_gapi(sz, CV_8UC1);
+    cv::Mat out_mat_ocv(sz, CV_8UC1);
+    cv::Scalar mean = cv::Scalar(127.0f);
+    cv::Scalar stddev = cv::Scalar(40.f);
+    cv::randn(in_mat, mean, stddev);
+
+    //Symm7x7 coefficients and shift
+    int coefficients_symm7x7[10] = { 1140, -118, 526, 290, -236, 64, -128, -5, -87, -7 };
+    int shift = 10;
+    cv::Mat kernel_coeff(10, 1, CV_32S);
+    int* ci = kernel_coeff.ptr<int>();
+    for (int i = 0; i < 10; i++)
+    {
+        ci[i] = coefficients_symm7x7[i];
+    }
+
+    // Run G-API
+    cv::GMat in;
+    auto out = TSymm7x7_test::on(in, kernel_coeff, shift);
+    cv::GComputation comp(cv::GIn(in), cv::GOut(out));
+
+    auto cc = comp.compile(cv::descr_of(in_mat), cv::compile_args(gpuTestPackage));
+    cc(cv::gin(in_mat), cv::gout(out_mat_gapi));
+
+    // Run OpenCV
+    reference_symm7x7_CPU(in_mat, kernel_coeff, shift, out_mat_ocv);
+
+    compare_f cmpF = AbsSimilarPoints(1, 0.05).to_compare_f();
+
+    // Comparison //////////////////////////////////////////////////////////////
+    {
+        EXPECT_TRUE(cmpF(out_mat_gapi, out_mat_ocv));
+        EXPECT_EQ(out_mat_gapi.size(), sz);
+    }
+}
+#endif
+
+} // namespace opencv_test
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_kernel_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_kernel_tests.cpp
index aeb47628e..ee0cdfa72 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_kernel_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_kernel_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_mock_kernels.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_mock_kernels.hpp
index cd876efdb..a7b35bceb 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_mock_kernels.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_mock_kernels.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "opencv2/gapi/cpu/gcpukernel.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_sample_pipelines.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_sample_pipelines.cpp
index 815aa0d87..ce87ba431 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_sample_pipelines.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_sample_pipelines.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_scalar_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_scalar_tests.cpp
index 7b4baa01d..705fd14af 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_scalar_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_scalar_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_smoke_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_smoke_test.cpp
index 9ac47f6d7..630a8fc10 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_smoke_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_smoke_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_typed_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_typed_tests.cpp
index 1716b5505..223a54654 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_typed_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_typed_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_util_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_util_tests.cpp
index 574c0ab54..b2d43534c 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_util_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gapi_util_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_core_tests_gpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_core_tests_gpu.cpp
index 6c331c033..34faddfb3 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_core_tests_gpu.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_core_tests_gpu.cpp
@@ -2,12 +2,11 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
 #include "../common/gapi_core_tests.hpp"
-#include "opencv2/gapi/gpu/core.hpp"
 
 #define CORE_GPU cv::gapi::core::gpu::kernels()
 
@@ -190,7 +189,7 @@ INSTANTIATE_TEST_CASE_P(SumTestGPU, SumTest,
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
 /*init output matrices or not*/ testing::Bool(),
-                                Values(1e-3), //TODO: too relaxed?
+                                Values(AbsToleranceScalar(1e-3).to_compare_f()),//TODO: too relaxed?
                                 Values(cv::compile_args(CORE_GPU))));
 
 INSTANTIATE_TEST_CASE_P(AbsDiffTestGPU, AbsDiffTest,
@@ -209,15 +208,14 @@ INSTANTIATE_TEST_CASE_P(AbsDiffCTestGPU, AbsDiffCTest,
 /*init output matrices or not*/ testing::Bool(),
                                 Values(cv::compile_args(CORE_GPU))));
 
-// FIXME: Comparison introduced by YL doesn't work with C3
 INSTANTIATE_TEST_CASE_P(AddWeightedTestGPU, AddWeightedTest,
-                        Combine(Values( CV_8UC1/*, CV_8UC3*/, CV_16UC1, CV_16SC1, CV_32FC1 ),
+                        Combine(Values( CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1 ),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
                                 Values( -1, CV_8U, CV_16U, CV_32F ),
 /*init output matrices or not*/ testing::Bool(),
-                                Values(0.50005),
+                                Values(Tolerance_FloatRel_IntAbs(1e-6, 1).to_compare_f()),
                                 Values(cv::compile_args(CORE_GPU))));
 
 INSTANTIATE_TEST_CASE_P(NormTestGPU, NormTest,
@@ -226,7 +224,7 @@ INSTANTIATE_TEST_CASE_P(NormTestGPU, NormTest,
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480),
                                        cv::Size(128, 128)),
-                                Values(1e-3), //TODO: too relaxed?
+                                Values(AbsToleranceScalar(1e-3).to_compare_f()), //TODO: too relaxed?
                                 Values(cv::compile_args(CORE_GPU))),
                         opencv_test::PrintNormCoreParams());
 
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_imgproc_tests_gpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_imgproc_tests_gpu.cpp
index 65d452c34..18c918c7c 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_imgproc_tests_gpu.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_imgproc_tests_gpu.cpp
@@ -2,13 +2,12 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
 
 #include "../common/gapi_imgproc_tests.hpp"
-#include "opencv2/gapi/gpu/imgproc.hpp"
 
 #define IMGPROC_GPU cv::gapi::imgproc::gpu::kernels()
 
@@ -131,11 +130,23 @@ INSTANTIATE_TEST_CASE_P(Dilate3x3TestGPU, Dilate3x3Test,
 
 INSTANTIATE_TEST_CASE_P(SobelTestGPU, SobelTest,
                         Combine(Values(Tolerance_FloatRel_IntAbs(1e-4, 2).to_compare_f()),
-                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1/*, CV_32FC1*/), //TODO: CV_32FC1 fails accuracy
+                                Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1),
                                 Values(3, 5),
                                 Values(cv::Size(1280, 720),
                                        cv::Size(640, 480)),
-                                Values(-1, CV_32F),
+                                Values(-1, CV_16S, CV_32F),
+                                Values(0, 1),
+                                Values(1, 2),
+/*init output matrices or not*/ testing::Bool(),
+                                Values(cv::compile_args(IMGPROC_GPU))));
+
+INSTANTIATE_TEST_CASE_P(SobelTestGPU32F, SobelTest,
+                        Combine(Values(Tolerance_FloatRel_IntAbs(1e-4, 2).to_compare_f()),
+                                Values(CV_32FC1),
+                                Values(3, 5),
+                                Values(cv::Size(1280, 720),
+                                       cv::Size(640, 480)),
+                                Values(CV_32F),
                                 Values(0, 1),
                                 Values(1, 2),
 /*init output matrices or not*/ testing::Bool(),
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_operators_tests_gpu.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_operators_tests_gpu.cpp
index 5a116bd35..62c080cc8 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_operators_tests_gpu.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/gpu/gapi_operators_tests_gpu.cpp
@@ -2,12 +2,11 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "../test_precomp.hpp"
 #include "../common/gapi_operators_tests.hpp"
-#include "opencv2/gapi/gpu/core.hpp"
 
 #define CORE_GPU cv::gapi::core::gpu::kernels()
 
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_backend_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_backend_tests.cpp
index 67b627313..28da490ee 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_backend_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_backend_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_executor_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_executor_tests.cpp
index 20aad89b6..00d13eec1 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_executor_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_executor_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_garg_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_garg_test.cpp
index 67696dbb0..602ec0098 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_garg_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_garg_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_gmetaarg_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_gmetaarg_test.cpp
index 6dbf7778f..0860a01cc 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_gmetaarg_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_gmetaarg_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp
index a815e0d22..6c80a7702 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_gmodel_builder_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_island_fusion_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_island_fusion_tests.cpp
index 91e55bed7..c5694c857 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_island_fusion_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_island_fusion_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_island_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_island_tests.cpp
index 09f188032..4ca1af84f 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_island_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_island_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_recompilation_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_recompilation_test.cpp
index 252af9c1a..d42aab11e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_recompilation_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_recompilation_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_resolve_kernel_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_resolve_kernel_test.cpp
index d4b16f627..09f9ca650 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_resolve_kernel_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_resolve_kernel_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_vectorref_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_vectorref_test.cpp
index 1b14e0670..28702ccd1 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_vectorref_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_int_vectorref_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_transactions_test.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_transactions_test.cpp
index f550340e8..24224bafb 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_transactions_test.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/internal/gapi_transactions_test.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/opencl_kernels_test_gapi.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/opencl_kernels_test_gapi.hpp
new file mode 100644
index 000000000..87fdd7029
--- /dev/null
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/opencl_kernels_test_gapi.hpp
@@ -0,0 +1,260 @@
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+//
+// Copyright (C) 2018-2019 Intel Corporation
+
+#include "opencv2/core/ocl.hpp"
+#include "opencv2/core/ocl_genbase.hpp"
+#include "opencv2/core/opencl/ocl_defs.hpp"
+
+#ifdef HAVE_OPENCL
+const char* opencl_symm7x7_src =
+"#if BORDER_REPLICATE\n"
+"#define GET_BORDER(elem) (elem)\n"
+"#define SET_ALL(i, j) a0[i] = a0[j]; a1[i] = a1[j]; a2[i] = a2[j]; b[i] = b[j]; c0[i] = c0[j]; c1[i] = c1[j]; c2[i] = c2[j];\n"
+"#else\n"
+"#define GET_BORDER(elem) (BORDER_CONSTANT_VALUE)\n"
+"#define SET_ALL(i, j) a0[i] = a1[i] = a2[i] = c0[i] = c1[i] = c2[i] = BORDER_CONSTANT_VALUE; b[i] = BORDER_CONSTANT_VALUE;\n"
+"#endif\n"
+"#define GET_A0(id, x, l_edge, a1) ((x) <= (l_edge + 2) ? GET_BORDER(a1) : (((const __global uchar*)(id))[-3]))\n"
+"#define GET_A1(id, x, l_edge, a2) ((x) <= (l_edge + 1) ? GET_BORDER(a2) : (((const __global uchar*)(id))[-2]))\n"
+"#define GET_A2(id, x, l_edge, b) ((x) <= (l_edge) ? GET_BORDER(b[0]) : (((const __global uchar*)(id))[-1]))\n"
+"#define GET_C0(id, x, r_edge, b) ((x) >= (r_edge) ? GET_BORDER(b[8 - 1]) : (((const __global uchar*)(id))[8]))\n"
+"#define GET_C1(id, x, r_edge, c0) ((x) >= (r_edge - 1) ? GET_BORDER(c0) : (((const __global uchar*)(id))[8 + 1]))\n"
+"#define GET_C2(id, x, r_edge, c1) ((x) >= (r_edge - 2) ? GET_BORDER(c1) : (((const __global uchar*)(id))[8 + 2]))\n"
+"__kernel void symm_7x7_test(\n"
+"__global const uchar * srcptr,\n"
+"int srcStep, int srcEndX, int srcEndY,\n"
+"__global uchar * dstptr, int dstStep,\n"
+"int rows, int cols,\n"
+"int tile_y_coord,\n"
+"__constant int * coeff)\n"
+"{\n"
+"int lEdge = 0, rEdge = cols - 8;\n"
+"int x = (get_global_id(0) < cols/8) ? get_global_id(0) * 8: cols - 8;\n"
+"int y = get_global_id(1);\n"
+"int yd = min(3, tile_y_coord);\n"
+"int dst_id = mad24(y, dstStep, x);\n"
+"y+=yd;\n"
+"int src_id = mad24(y, srcStep, x);\n"
+"int y_limit = y + tile_y_coord;\n"
+"y_limit-=yd;\n"
+"const __global uchar* psrc = (const __global uchar*)(srcptr + src_id);\n"
+"__global uchar* pdst = (__global uchar*)(dstptr + dst_id);\n"
+"#define BSIZE (7)\n"
+"float a0[BSIZE]; float a1[BSIZE]; float a2[BSIZE];\n"
+"float8 b[BSIZE];\n"
+"float c0[BSIZE]; float c1[BSIZE]; float c2[BSIZE];\n"
+"b[3] = convert_float8(vload8(0, (const __global uchar*)psrc));\n"
+"if( (y_limit <=2 ) || (y_limit >= srcEndY - 3) || (x >= rEdge-2) || (x <= lEdge + 2) )\n"
+"{\n"
+"a2[3] = GET_A2(psrc, x, lEdge, b[3]);\n"
+"a1[3] = GET_A1(psrc, x, lEdge, a2[3]);\n"
+"a0[3] = GET_A0(psrc, x, lEdge, a1[3]);\n"
+"c0[3] = GET_C0(psrc, x, rEdge, b[3]);\n"
+"c1[3] = GET_C1(psrc, x, rEdge, c0[3]);\n"
+"c2[3] = GET_C2(psrc, x, rEdge, c1[3]);\n"
+"if(y_limit > 0)\n"
+"{\n"
+"b[2] = convert_float8(vload8(0, (const __global uchar*)(psrc - srcStep)));\n"
+"a2[2] = GET_A2(psrc - srcStep, x, lEdge, b[2]);\n"
+"a1[2] = GET_A1(psrc - srcStep, x, lEdge, a2[2]);\n"
+"a0[2] = GET_A0(psrc - srcStep, x, lEdge, a1[2]);\n"
+"c0[2] = GET_C0(psrc - srcStep, x, rEdge, b[2]);\n"
+"c1[2] = GET_C1(psrc - srcStep, x, rEdge, c0[2]);\n"
+"c2[2] = GET_C2(psrc - srcStep, x, rEdge, c1[2]);\n"
+"}\n"
+"else\n"
+"{\n"
+"SET_ALL(2, 3);\n"
+"}\n"
+"if( y_limit > 1 )\n"
+"{\n"
+"b[1] = convert_float8(vload8(0, (const __global uchar*)(psrc - srcStep*2)));\n"
+"a2[1] = GET_A2(psrc - srcStep*2, x, lEdge, b[1]);\n"
+"a1[1] = GET_A1(psrc - srcStep*2, x, lEdge, a2[1]);\n"
+"a0[1] = GET_A0(psrc - srcStep*2, x, lEdge, a1[1]);\n"
+"c0[1] = GET_C0(psrc - srcStep*2, x, rEdge, b[1]);\n"
+"c1[1] = GET_C1(psrc - srcStep*2, x, rEdge, c0[1]);\n"
+"c2[1] = GET_C2(psrc - srcStep*2, x, rEdge, c1[1]);\n"
+"}\n"
+"else\n"
+"{\n"
+"SET_ALL(1, 2);\n"
+"}\n"
+"if( y_limit > 2 )\n"
+"{\n"
+"b[0] = convert_float8(vload8(0, (const __global uchar*)(psrc - srcStep*3)));\n"
+"a2[0] = GET_A2(psrc - srcStep*3, x, lEdge, b[0]);\n"
+"a1[0] = GET_A1(psrc - srcStep*3, x, lEdge, a2[0]);\n"
+"a0[0] = GET_A0(psrc - srcStep*3, x, lEdge, a1[0]);\n"
+"c0[0] = GET_C0(psrc - srcStep*3, x, rEdge, b[0]);\n"
+"c1[0] = GET_C1(psrc - srcStep*3, x, rEdge, c0[0]);\n"
+"c2[0] = GET_C2(psrc - srcStep*3, x, rEdge, c1[0]);\n"
+"}\n"
+"else\n"
+"{\n"
+"SET_ALL(0, 1);\n"
+"}\n"
+"if( y_limit < srcEndY - 1 )\n"
+"{\n"
+"b[4] = convert_float8(vload8(0, (const __global uchar*)(psrc + srcStep)));\n"
+"a2[4] = GET_A2(psrc + srcStep, x, lEdge, b[4]);\n"
+"a1[4] = GET_A1(psrc + srcStep, x, lEdge, a2[4]);\n"
+"a0[4] = GET_A0(psrc + srcStep, x, lEdge, a1[4]);\n"
+"c0[4] = GET_C0(psrc + srcStep, x, rEdge, b[4]);\n"
+"c1[4] = GET_C1(psrc + srcStep, x, rEdge, c0[4]);\n"
+"c2[4] = GET_C2(psrc + srcStep, x, rEdge, c1[4]);\n"
+"}\n"
+"else\n"
+"{\n"
+"SET_ALL(4, 3);\n"
+"}\n"
+"if( y_limit < srcEndY - 2 )\n"
+"{\n"
+"b[5] = convert_float8(vload8(0, (const __global uchar*)(psrc + srcStep*2)));\n"
+"a2[5] = GET_A2(psrc + srcStep*2, x, lEdge, b[5]);\n"
+"a1[5] = GET_A1(psrc + srcStep*2, x, lEdge, a2[5]);\n"
+"a0[5] = GET_A0(psrc + srcStep*2, x, lEdge, a1[5]);\n"
+"c0[5] = GET_C0(psrc + srcStep*2, x, rEdge, b[5]);\n"
+"c1[5] = GET_C1(psrc + srcStep*2, x, rEdge, c0[5]);\n"
+"c2[5] = GET_C2(psrc + srcStep*2, x, rEdge, c1[5]);\n"
+"}\n"
+"else\n"
+"{\n"
+"SET_ALL(5, 4);\n"
+"}\n"
+"if( y_limit < srcEndY - 3 )\n"
+"{\n"
+"b[6] = convert_float8(vload8(0, (const __global uchar*)(psrc + srcStep*3)));\n"
+"a2[6] = GET_A2(psrc + srcStep*3, x, lEdge, b[6]);\n"
+"a1[6] = GET_A1(psrc + srcStep*3, x, lEdge, a2[6]);\n"
+"a0[6] = GET_A0(psrc + srcStep*3, x, lEdge, a1[6]);\n"
+"c0[6] = GET_C0(psrc + srcStep*3, x, rEdge, b[6]);\n"
+"c1[6] = GET_C1(psrc + srcStep*3, x, rEdge, c0[6]);\n"
+"c2[6] = GET_C2(psrc + srcStep*3, x, rEdge, c1[6]);\n"
+"}\n"
+"else\n"
+"{\n"
+"SET_ALL(6, 5);\n"
+"}\n"
+"}\n"
+"else\n"
+"{\n"
+"a2[3] = (((const __global uchar*)(psrc))[-1]);\n"
+"a1[3] = (((const __global uchar*)(psrc))[-2]);\n"
+"a0[3] = (((const __global uchar*)(psrc))[-3]);\n"
+"c0[3] = (((const __global uchar*)(psrc))[8]);\n"
+"c1[3] = (((const __global uchar*)(psrc))[8 + 1]);\n"
+"c2[3] = (((const __global uchar*)(psrc))[8 + 2]);\n"
+"b[2] = convert_float8(vload8(0, (const __global uchar*)(psrc - srcStep)));\n"
+"a2[2] = (((const __global uchar*)(psrc - srcStep))[-1]);\n"
+"a1[2] = (((const __global uchar*)(psrc - srcStep))[-2]);\n"
+"a0[2] = (((const __global uchar*)(psrc - srcStep))[-3]);\n"
+"c0[2] = (((const __global uchar*)(psrc - srcStep))[8]);\n"
+"c1[2] = (((const __global uchar*)(psrc - srcStep))[8 + 1]);\n"
+"c2[2] = (((const __global uchar*)(psrc - srcStep))[8 + 2]);\n"
+"b[1] = convert_float8(vload8(0, (const __global uchar*)(psrc - srcStep*2)));\n"
+"a2[1] = (((const __global uchar*)(psrc - srcStep*2))[-1]);\n"
+"a1[1] = (((const __global uchar*)(psrc - srcStep*2))[-2]);\n"
+"a0[1] = (((const __global uchar*)(psrc - srcStep*2))[-3]);\n"
+"c0[1] = (((const __global uchar*)(psrc - srcStep*2))[8]);\n"
+"c1[1] = (((const __global uchar*)(psrc - srcStep*2))[8 + 1]);\n"
+"c2[1] = (((const __global uchar*)(psrc - srcStep*2))[8 + 2]);\n"
+"b[0] = convert_float8(vload8(0, (const __global uchar*)(psrc - srcStep*3)));\n"
+"a2[0] = (((const __global uchar*)(psrc - srcStep*3))[-1]);\n"
+"a1[0] = (((const __global uchar*)(psrc - srcStep*3))[-2]);\n"
+"a0[0] = (((const __global uchar*)(psrc - srcStep*3))[-3]);\n"
+"c0[0] = (((const __global uchar*)(psrc - srcStep*3))[8]);\n"
+"c1[0] = (((const __global uchar*)(psrc - srcStep*3))[8 + 1]);\n"
+"c2[0] = (((const __global uchar*)(psrc - srcStep*3))[8 + 2]);\n"
+"b[4] = convert_float8(vload8(0, (const __global uchar*)(psrc + srcStep)));\n"
+"a2[4] = (((const __global uchar*)(psrc + srcStep))[-1]);\n"
+"a1[4] = (((const __global uchar*)(psrc + srcStep))[-2]);\n"
+"a0[4] = (((const __global uchar*)(psrc + srcStep))[-3]);\n"
+"c0[4] = (((const __global uchar*)(psrc + srcStep))[8]);\n"
+"c1[4] = (((const __global uchar*)(psrc + srcStep))[8 + 1]);\n"
+"c2[4] = (((const __global uchar*)(psrc + srcStep))[8 + 2]);\n"
+"b[5] = convert_float8(vload8(0, (const __global uchar*)(psrc + srcStep*2)));\n"
+"a2[5] = (((const __global uchar*)(psrc + srcStep*2))[-1]);\n"
+"a1[5] = (((const __global uchar*)(psrc + srcStep*2))[-2]);\n"
+"a0[5] = (((const __global uchar*)(psrc + srcStep*2))[-3]);\n"
+"c0[5] = (((const __global uchar*)(psrc + srcStep*2))[8]);\n"
+"c1[5] = (((const __global uchar*)(psrc + srcStep*2))[8 + 1]);\n"
+"c2[5] = (((const __global uchar*)(psrc + srcStep*2))[8 + 2]);\n"
+"b[6] = convert_float8(vload8(0, (const __global uchar*)(psrc + srcStep*3)));\n"
+"a2[6] = (((const __global uchar*)(psrc + srcStep*3))[-1]);\n"
+"a1[6] = (((const __global uchar*)(psrc + srcStep*3))[-2]);\n"
+"a0[6] = (((const __global uchar*)(psrc + srcStep*3))[-3]);\n"
+"c0[6] = (((const __global uchar*)(psrc + srcStep*3))[8]);\n"
+"c1[6] = (((const __global uchar*)(psrc + srcStep*3))[8 + 1]);\n"
+"c2[6] = (((const __global uchar*)(psrc + srcStep*3))[8 + 2]);\n"
+"}\n"
+"float a0_sum[3]; float a1_sum[3]; float a2_sum[3];\n"
+"float8 b_sum[3];\n"
+"float c0_sum[3]; float c1_sum[3]; float c2_sum[3];\n"
+"a0_sum[0] = a0[0] + a0[6];\n"
+"a0_sum[1] = a0[1] + a0[5];\n"
+"a0_sum[2] = a0[2] + a0[4];\n"
+"a1_sum[0] = a1[0] + a1[6];\n"
+"a1_sum[1] = a1[1] + a1[5];\n"
+"a1_sum[2] = a1[2] + a1[4];\n"
+"a2_sum[0] = a2[0] + a2[6];\n"
+"a2_sum[1] = a2[1] + a2[5];\n"
+"a2_sum[2] = a2[2] + a2[4];\n"
+"c0_sum[0] = c0[0] + c0[6];\n"
+"c0_sum[1] = c0[1] + c0[5];\n"
+"c0_sum[2] = c0[2] + c0[4];\n"
+"c1_sum[0] = c1[0] + c1[6];\n"
+"c1_sum[1] = c1[1] + c1[5];\n"
+"c1_sum[2] = c1[2] + c1[4];\n"
+"c2_sum[0] = c2[0] + c2[6];\n"
+"c2_sum[1] = c2[1] + c2[5];\n"
+"c2_sum[2] = c2[2] + c2[4];\n"
+"b_sum[0] = b[0] + b[6];\n"
+"b_sum[1] = b[1] + b[5];\n"
+"b_sum[2] = b[2] + b[4];\n"
+"float8 A = b[3];\n"
+"float8 intermediate = A * (float)coeff[0];\n"
+"float8 B = b_sum[2] +\n"
+"(float8)(a2[3], b[3].s0123, b[3].s456) +\n"
+"(float8)(b[3].s123, b[3].s4567, c0[3]);\n"
+"intermediate += B * (float)coeff[1];\n"
+"float8 C = (float8)(a2_sum[2], b_sum[2].s0123, b_sum[2].s456) +\n"
+"(float8)(b_sum[2].s123, b_sum[2].s4567, c0_sum[2]);\n"
+"intermediate += C * (float)coeff[2];\n"
+"float8 D = b_sum[1] +\n"
+"(float8)(a1[3], a2[3], b[3].s0123, b[3].s45) +\n"
+"(float8)(b[3].s23, b[3].s4567, c0[3], c1[3]);\n"
+"intermediate += D * (float)coeff[3];\n"
+"float8 E = (float8)(a2_sum[1], b_sum[1].s0123, b_sum[1].s456) +\n"
+"(float8)( b_sum[1].s123, b_sum[1].s4567, c0_sum[1]) +\n"
+"(float8)( a1_sum[2], a2_sum[2], b_sum[2].s0123, b_sum[2].s45) +\n"
+"(float8)( b_sum[2].s23, b_sum[2].s4567, c0_sum[2], c1_sum[2]);\n"
+"intermediate += E * (float)coeff[4];\n"
+"float8 F = (float8)(a1_sum[1], a2_sum[1], b_sum[1].s0123, b_sum[1].s45) +\n"
+"(float8)(b_sum[1].s23, b_sum[1].s4567, c0_sum[1], c1_sum[1]);\n"
+"intermediate += F * (float)coeff[5];\n"
+"float8 G = b_sum[0] +\n"
+"(float8)(a0[3], a1[3], a2[3], b[3].s0123, b[3].s4) +\n"
+"(float8)(b[3].s3, b[3].s4567, c0[3], c1[3], c2[3]);\n"
+"intermediate += G * (float)coeff[6];\n"
+"float8 H = (float8)(a2_sum[0], b_sum[0].s0123, b_sum[0].s456) +\n"
+"(float8)(b_sum[0].s123, b_sum[0].s4567, c0_sum[0]) +\n"
+"(float8)(a0_sum[2], a1_sum[2], a2_sum[2], b_sum[2].s0123, b_sum[2].s4) +\n"
+"(float8)(b_sum[2].s3, b_sum[2].s4567, c0_sum[2], c1_sum[2], c2_sum[2]);\n"
+"intermediate += H * (float)coeff[7];\n"
+"float8 I = (float8)(a1_sum[0], a2_sum[0], b_sum[0].s0123, b_sum[0].s45) +\n"
+"(float8)(b_sum[0].s23, b_sum[0].s4567, c0_sum[0], c1_sum[0]) +\n"
+"(float8)(a0_sum[1], a1_sum[1], a2_sum[1], b_sum[1].s0123, b_sum[1].s4) +\n"
+"(float8)(b_sum[1].s3, b_sum[1].s4567, c0_sum[1], c1_sum[1], c2_sum[1]);\n"
+"intermediate += I * (float)coeff[8];\n"
+"float8 J = (float8)(a0_sum[0], a1_sum[0], a2_sum[0], b_sum[0].s0123, b_sum[0].s4) +\n"
+"(float8)(b_sum[0].s3, b_sum[0].s4567, c0_sum[0], c1_sum[0], c2_sum[0]);\n"
+"intermediate += J * (float)coeff[9];\n"
+"intermediate *= SCALE;\n"
+"vstore8(convert_uchar8_sat(intermediate), 0, (__global uchar*)(pdst));\n"
+"}\n"
+;
+#endif
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/own/gapi_types_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/own/gapi_types_tests.cpp
index c25435707..0b682299e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/own/gapi_types_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/own/gapi_types_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/own/mat_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/own/mat_tests.cpp
index ba2cd2df4..14db80d72 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/own/mat_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/own/mat_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/own/scalar_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/own/scalar_tests.cpp
index a9c5c0123..34c6a7380 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/own/scalar_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/own/scalar_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/test_main.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/test_main.cpp
index fa5862fa1..2caee865a 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/test_main.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/test_main.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 // FIXME: OpenCV license header
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/test_precomp.hpp b/inference-engine/thirdparty/fluid/modules/gapi/test/test_precomp.hpp
index bcab803ba..5e9adb06c 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/test_precomp.hpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/test_precomp.hpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 // FIXME: OpenCV header
@@ -19,6 +19,8 @@
 #include "opencv2/gapi/core.hpp"
 #include "opencv2/gapi/cpu/gcpukernel.hpp"
 #include "opencv2/gapi/gpu/ggpukernel.hpp"
+#include "opencv2/gapi/gpu/imgproc.hpp"
+#include "opencv2/gapi/gpu/core.hpp"
 #include "opencv2/gapi/gcompoundkernel.hpp"
 #include "opencv2/gapi/operators.hpp"
 #include "opencv2/gapi/fluid/imgproc.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/util/any_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/util/any_tests.cpp
index 60bbcc13b..d562df033 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/util/any_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/util/any_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/util/optional_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/util/optional_tests.cpp
index b7fabd530..df4484997 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/util/optional_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/util/optional_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/modules/gapi/test/util/variant_tests.cpp b/inference-engine/thirdparty/fluid/modules/gapi/test/util/variant_tests.cpp
index a95b6aa80..5c736e40e 100644
--- a/inference-engine/thirdparty/fluid/modules/gapi/test/util/variant_tests.cpp
+++ b/inference-engine/thirdparty/fluid/modules/gapi/test/util/variant_tests.cpp
@@ -2,7 +2,7 @@
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
-// Copyright (C) 2018 Intel Corporation
+// Copyright (C) 2018-2019 Intel Corporation
 
 
 #include "test_precomp.hpp"
diff --git a/inference-engine/thirdparty/fluid/revision.txt b/inference-engine/thirdparty/fluid/revision.txt
index e088afdce..c1186171d 100644
--- a/inference-engine/thirdparty/fluid/revision.txt
+++ b/inference-engine/thirdparty/fluid/revision.txt
@@ -1 +1 @@
-a3df05d93b188d4e86e23ffd1e988dbec0fc9211
+master / 2019-01-28
diff --git a/inference-engine/thirdparty/mkl-dnn/CMakeLists.txt b/inference-engine/thirdparty/mkl-dnn/CMakeLists.txt
index 939c81f2b..c522e4a61 100644
--- a/inference-engine/thirdparty/mkl-dnn/CMakeLists.txt
+++ b/inference-engine/thirdparty/mkl-dnn/CMakeLists.txt
@@ -16,6 +16,10 @@
 
 cmake_minimum_required(VERSION 2.8)
 
+if(POLICY CMP0022)
+    cmake_policy(SET CMP0022 NEW)
+endif()
+
 if(POLICY CMP0054)
     cmake_policy(SET CMP0054 NEW)
 endif()
@@ -40,7 +44,7 @@ endif()
 
 set(PROJECT_NAME "Intel(R) MKL-DNN")
 set(PROJECT_FULL_NAME "Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN)")
-set(PROJECT_VERSION "0.17")
+set(PROJECT_VERSION "0.18.0")
 
 set(LIB_NAME mkldnn)
 
@@ -64,6 +68,9 @@ set(CMAKE_SRC_CCXX_FLAGS)       # SRC specifics
 set(CMAKE_EXAMPLE_CCXX_FLAGS)   # EXAMPLE specifics
 set(CMAKE_TEST_CCXX_FLAGS)      # TESTS specifics
 
+include(GNUInstallDirs)
+include(CMakePackageConfigHelpers)
+
 include("cmake/utils.cmake")
 include("cmake/options.cmake")
 include("cmake/OpenMP.cmake")
@@ -73,6 +80,7 @@ include("cmake/SDL.cmake")
 include("cmake/MKL.cmake")
 include("cmake/Doxygen.cmake")
 include("cmake/profiling.cmake")
+include("cmake/version.cmake")
 
 enable_testing()
 
@@ -82,4 +90,5 @@ add_subdirectory(src)
 add_subdirectory(examples)
 add_subdirectory(tests)
 
-install(FILES LICENSE DESTINATION share/doc/${LIB_NAME})
+# Cannot use CMAKE_INSTALL_DOCDIR since it uses PROJECT_NAME and not LIB_NAME
+install(FILES LICENSE DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/doc/${LIB_NAME})
diff --git a/inference-engine/thirdparty/mkl-dnn/LICENSE b/inference-engine/thirdparty/mkl-dnn/LICENSE
index 8dada3eda..fde864dbc 100644
--- a/inference-engine/thirdparty/mkl-dnn/LICENSE
+++ b/inference-engine/thirdparty/mkl-dnn/LICENSE
@@ -199,3 +199,17 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+   ============================================================================
+    
+   Intel MKL-DNN includes components with separate copyright 
+   notices and license terms.
+   
+   XByak, 3-clause BSD license
+   Copyright (c) 2007 MITSUNARI Shigeo
+   See full copyright notice and license text in src/cpu/xbyak/COPYRIGHT
+   
+   gtest, 3-clause BSD license
+   Copyright 2008, Google Inc.
+   See full copyright notice and license text in tests/gtests/gtest/LICENSE
+   
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/mkl-dnn/README.md b/inference-engine/thirdparty/mkl-dnn/README.md
index 2a5b29e2f..3a453c9b8 100644
--- a/inference-engine/thirdparty/mkl-dnn/README.md
+++ b/inference-engine/thirdparty/mkl-dnn/README.md
@@ -1,42 +1,43 @@
 # Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN)
-![v0.17 beta](https://img.shields.io/badge/v0.17-beta-orange.svg)
+![v0.18 beta](https://img.shields.io/badge/v0.18-beta-orange.svg)
 
 Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN) is
-an open source performance library for deep learning applications. The library
-accelerates deep learning applications and framework on Intel(R) architecture. 
-Intel(R) MKL-DNN contains vectorized and threaded building blocks which you can
+an open-source performance library for deep-learning applications. The library
+accelerates deep-learning applications and frameworks on Intel architecture.
+Intel MKL-DNN contains vectorized and threaded building blocks that you can
 use to implement deep neural networks (DNN) with C and C++ interfaces.
 
-DNN functionality optimized for Intel architecture is also included in 
-[Intel(R) Math Kernel Library (Intel(R) MKL)](https://software.intel.com/en-us/mkl/features/deep-neural-networks).
-API in this implementation is not compatible with Intel MKL-DNN and does not
+DNN functionality optimized for Intel architecture is also included in
+[Intel Math Kernel Library (Intel MKL)](https://software.intel.com/en-us/mkl/features/deep-neural-networks).
+The API in that implementation is not compatible with Intel MKL-DNN and does not
 include certain new and experimental features.
 
-This release contains performance critical functions that improve performance of
-of the following deep learning topologies and variations of these.
+This release contains performance-critical functions that improve performance of
+the following deep learning topologies and variations of these:
 
 | Application                               | Example topology
 |:---                                       |:---
 | Image recognition                         | AlexNet, VGG, GoogleNet, ResNet, MobileNet
-| Image segmenation                         | FCN, SegNet, MaskRCNN, U-Net
+| Image segmentation                        | FCN, SegNet, MaskRCNN, U-Net
 | Volumetric segmentation                   | 3D-Unet
 | Object detection                          | SSD, Faster R-CNN, Yolo
-| Neural Machine Translation (experimental) | GNMT
-| Speech Recognition (experimental)         | DeepSpeech
-| Adversarial Networks                      | DCGAN, 3DGAN
-| Reinforcement Learning                    | A3C
-| Text-to-Speech                            | WaveNet
+| Neural machine translation                | GNMT
+| Speech recognition                        | DeepSpeech
+| Adversarial networks                      | DCGAN, 3DGAN
+| Reinforcement learning                    | A3C
+| Text-to-speech                            | WaveNet
 
 Intel MKL-DNN is used in the following software products:
 * [Caffe\* Optimized for Intel Architecture](https://github.com/intel/caffe)
 * [Chainer\*](https://chainer.org)
 * [DeepBench](https://github.com/baidu-research/DeepBench)
 * [PaddlePaddle\*](http://www.paddlepaddle.org)
+* [PyTorch\*](https://pytorch.org/)
 * [Tensorflow\*](https://www.tensorflow.org)
 * [Microsoft\* Cognitive Toolkit (CNTK)](https://docs.microsoft.com/en-us/cognitive-toolkit)
 * [Apache\* MXNet](https://mxnet.apache.org)
 * [OpenVINO(TM) toolkit](https://01.org/openvinotoolkit)
-* [Intel(R) Nervana(TM) Graph](https://github.com/NervanaSystems/ngraph)
+* [Intel Nervana Graph](https://github.com/NervanaSystems/ngraph)
 * [Menoh\*](https://github.com/pfnet-research/menoh)
 * [DeepLearning4J\*](https://deeplearning4j.org)
 * [BigDL](https://github.com/intel-analytics/BigDL)
@@ -44,49 +45,47 @@ Intel MKL-DNN is used in the following software products:
 ## License
 Intel MKL-DNN is licensed under
 [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). This
-software includes the following third party components:
+software includes the following third-party components:
 * [Xbyak](https://github.com/herumi/xbyak) distributed under [3-clause BSD licence](src/cpu/xbyak/COPYRIGHT)
 * [gtest](https://github.com/google/googletest) distributed under [3-clause BSD license](tests/gtests/gtest/LICENSE)
 
 ## Documentation
-* [Introduction](https://intel.github.io/mkl-dnn) explains programming model
+* [Introduction](https://intel.github.io/mkl-dnn) explains the programming model
 and basic concepts
 * [Reference manual](https://intel.github.io/mkl-dnn/modules.html) provides
 detailed functionality description
-* [Examples](https://github.com/intel/mkl-dnn/tree/master/examples) 
-demonstrate use of C and C++ APIs in simple topologies
-* [Tutorial](https://software.intel.com/en-us/articles/intel-mkl-dnn-part-1-library-overview-and-installation) 
-provides step by step installation instructions and an example walkthrough
+* [Examples](https://github.com/intel/mkl-dnn/tree/master/examples)
+demonstrates use of C and C++ APIs in simple topologies
+* [Tutorial](https://software.intel.com/en-us/articles/intel-mkl-dnn-part-1-library-overview-and-installation)
+provides step-by-step installation instructions and an example walkthrough
 
 ## Support
-Please submit your questions, feature requests and bug reports on
+Please submit your questions, feature requests, and bug reports on the
 [GitHub issues](https://github.com/intel/mkl-dnn/issues) page.
 
 **WARNING** The following functionality has preview status and might change
 without prior notification in future releases:
 * Convolutions with `s16` data type in source, weights or destination
-* Convolutions and auxiliary primitives for 3D spatial data
-* RNN, LSTM and GRU primitives
-* Intel Threading Building Blocks (Intel TBB\*) support
+* Threading Building Blocks (TBB) support
 
 ## How to Contribute
-We welcome community contributions to Intel MKL-DNN. If you have an idea how to improve the library:
+We welcome community contributions to Intel MKL-DNN. If you have an idea on how to improve the library:
 
 * Share your proposal via
  [GitHub issues](https://github.com/intel/mkl-dnn/issues).
-* Ensure you can build the product and run all the examples with your patch
-* In the case of a larger feature, create a test
-* Submit a [pull request](https://github.com/intel/mkl-dnn/pulls)
+* Ensure you can build the product and run all the examples with your patch.
+* In the case of a larger feature, create a test.
+* Submit a [pull request](https://github.com/intel/mkl-dnn/pulls).
 
 We will review your contribution and, if any additional fixes or modifications
 are necessary, may provide feedback to guide you. When accepted, your pull
-request will be merged the repository.
+request will be merged to the repository.
 
 ## System Requirements
-Intel MKL-DNN supports Intel(R) 64 architecture and compatible architectures.
+Intel MKL-DNN supports Intel 64 architecture and compatible architectures.
 The library is optimized for the systems based on
-* Intel Atom(R) processor with Intel(R) SSE4.1 support
-* 4th, 5th, 6th, 7th and 8th generation Intel(R) Core processor
+* Intel Atom(R) processor with Intel SSE4.1 support
+* 4th, 5th, 6th, 7th, and 8th generation Intel(R) Core(TM) processor
 * Intel(R) Xeon(R) processor E5 v3 family (formerly Haswell)
 * Intel Xeon processor E5 v4 family (formerly Broadwell)
 * Intel Xeon Platinum processor family (formerly Skylake)
@@ -100,24 +99,24 @@ The software dependencies are:
 * [Doxygen](http://www.stack.nl/~dimitri/doxygen/download.html#srcbin) 1.8.5 or later
 * C++ compiler with C++11 standard support
 * Optional dependencies:
-  * GNU OpenMP\*, LLVM OpenMP\*, or Intel OpenMP
-  * Threading Building Blocks (TBB)
-  * Intel MKL or Intel MKL small libraries
+  * GNU\* OpenMP\*, LLVM OpenMP, or Intel OpenMP
+  * Threading Building Blocks (TBB) 2017 or later
+  * Intel MKL 2017 Update 1 or Intel MKL small libraries
 
 > **Note**
-> Building Intel MKL-DNN with optinal dependencies may introduce additional
-> runtime dependencies for the library. Please refer to corresponding 
-> software system requirements for details.
+> Building Intel MKL-DNN with optional dependencies may introduce additional
+> runtime dependencies for the library. For details, refer to the corresponding
+> software system requirements.
 
 The software was validated on RedHat\* Enterprise Linux 7 with
-* GNU\* Compiler Collection 4.8, 5.4, 6.1, 7.2 and 8.1
+* GNU Compiler Collection 4.8, 5.4, 6.1, 7.2, and 8.1
 * Clang\* 3.8.0
-* [Intel(R) C/C++ Compiler](https://software.intel.com/en-us/intel-parallel-studio-xe)
-  17.0, 18.0 and 19.0
+* [Intel C/C++ Compiler](https://software.intel.com/en-us/intel-parallel-studio-xe)
+  17.0, 18.0, and 19.0
 
 on Windows Server\* 2012 R2 with
-* Microsoft\* Visual C++ 14.0 (Visual Studio 2015)
-* [Intel(R) C/C++ Compiler](https://software.intel.com/en-us/intel-parallel-studio-xe)
+* Microsoft Visual C++ 14.0 (Visual Studio 2015 Update 3)
+* [Intel C/C++ Compiler](https://software.intel.com/en-us/intel-parallel-studio-xe)
   17.0 and 19.0
 
 on macOS\* 10.13 (High Sierra) with
@@ -125,196 +124,300 @@ on macOS\* 10.13 (High Sierra) with
 * [Intel C/C++ Compiler](https://software.intel.com/en-us/intel-parallel-studio-xe)
   18.0 and 19.0
 
-The implementation uses OpenMP\* 4.0 SIMD extensions. We recommend using
-Intel(R) Compiler for the best performance results.
+The implementation uses OpenMP 4.0 SIMD extensions. We recommend using the
+Intel C++ Compiler for the best performance results.
 
 ## Installation
 
+### Build from source
+
+#### Download source code
 Download [Intel MKL-DNN source code](https://github.com/intel/mkl-dnn/archive/master.zip)
-or clone the repository to your system
+or clone [the repository](https://github.com/intel/mkl-dnn.git) to your system.
 
 ```
-	git clone https://github.com/intel/mkl-dnn.git
+git clone https://github.com/intel/mkl-dnn.git
 ```
 
-Ensure that all software dependencies are in place and have at least minimal
-supported version.
+#### Configure build
+Intel MKL-DNN uses a CMake-based build system. You can use CMake options to control the build.
+Along with the standard CMake options such as `CMAKE_INSTALL_PREFIX` and `CMAKE_BUILD_TYPE`,
+you can pass Intel MKL-DNN specific options:
+
+|Option                 | Possible Values (defaults in bold)   | Description
+|:---                   |:---                                  | :---
+|MKLDNN_LIBRARY_TYPE    | **SHARED**, STATIC                   | Defines the resulting library type
+|MKLDNN_THREADING       | **OMP**, OMP:INTEL, OMP:COMP, TBB    | Defines the threading type
+|WITH_EXAMPLE           | **ON**, OFF                          | Controls building the examples
+|WITH_TEST              | **ON**, OFF                          | Controls building the tests
+|ARCH_OPT_FLAGS         | *compiler flags*                     | Specifies compiler optimization flags (see warning note below)
+|VTUNEROOT              | *path*                               | Enables integration with Intel(R) VTune(TM) Amplifier
+
+> **WARNING**
+>
+> By default, Intel MKL-DNN is built specifically for the processor type of the
+> compiling machine (for example, `-march=native` in the case of GCC). While this option
+> gives better performance, the resulting library can be run only on systems
+> that are instruction-set compatible with the compiling machine.
+>
+> Therefore, if Intel MKL-DNN is to be shipped to other platforms (for example, built by
+> Linux distribution maintainers), consider setting `ARCH_OPT_FLAGS` to `""`.
+
+For more options and details, check [cmake/options.cmake](cmake/options.cmake).
 
-Intel MKL-DNN can take advantage of optimized
-matrix-matrix multiplication (GEMM) function from Intel MKL. The dynamic
-library with this functionality is included in the repository. If you choose 
-to build Intel MKL-DNN with the binary dependency download Intel MKL small
-libraries using provided script
+##### Using Intel MKL (optional)
+Intel MKL-DNN includes an optimized matrix-matrix multiplication (GEMM) implementation for modern platforms.
+The library can also take advantage of GEMM functions from Intel MKL to improve performance with older
+versions of compilers or on older platforms. This behavior is controlled by the `MKLDNN_USE_MKL` option.
 
-###### Linux/macOS
+|Option                 | Possible Values (defaults in bold)   | Description
+|:---                   |:---                                  | :---
+|MKLDNN_USE_MKL         | **DEF**, NONE, ML, FULL, FULL:STATIC | Defines the binary dependency on Intel MKL
+
+The dynamic library with this functionality is included in the repository.
+If you choose to build Intel MKL-DNN with the binary dependency, download the Intel MKL small
+libraries using the provided script:
+
+*Linux/macOS*
 ```
-	cd scripts && ./prepare_mkl.sh && cd ..
+cd scripts && ./prepare_mkl.sh && cd ..
 ```
 
-###### Windows
+*Windows\**
 ```
-	cd scripts && call prepare_mkl.bat && cd ..
+cd scripts && call prepare_mkl.bat && cd ..
 ```
 
-or manually from [GitHub release section](https://github.com/intel/mkl-dnn/releases)
+or manually from [GitHub release section](https://github.com/intel/mkl-dnn/releases),
 and unpack it to the `external` directory in the repository root. Intel MKL-DNN
-can also be built with full Intel MKL, if the latter is installed on the system.
-You might need to set `MKLROOT` environment variable to the path where full
-Intel MKL is installed to help cmake locate the library.
-
-You can choose to build Intel MKL-DNN without binary dependency. The resulting
-version will be fully functional, however performance of convolutions relying
-on GEMM-based algorithm, inner product, and mkldnn_?gemm functionality may be
-suboptimal.
+can also be built with full Intel MKL if the latter is installed on the system.
+You might need to set the `MKLROOT` environment variable to the path where the full
+Intel MKL is installed to help `cmake` locate the library.
 
 > **Note**
 >
-> Using Intel MKL small libraries currently work for Intel MKL-DNN built with
-> OpenMP\* only. Building with Intel TBB requires either full Intel MKL library
-> or standalone build.
+> Using Intel MKL small libraries currently works only for Intel MKL-DNN built with
+> OpenMP. Building with Intel TBB requires either the full Intel MKL library
+> or a standalone build.
 >
 > Using Intel MKL or Intel MKL small libraries will introduce additional
-> runtime dependencies. Please refer to Intel MKL 
-> [system requirements](https://software.intel.com/en-us/articles/intel-math-kernel-library-intel-mkl-2019-system-requirements)
-> for additional information.
+> runtime dependencies. For additional information, refer to Intel MKL
+> [system requirements](https://software.intel.com/en-us/articles/intel-math-kernel-library-intel-mkl-2019-system-requirements).
 
-Intel MKL-DNN uses a CMake-based build system
-
-```
-	mkdir -p build && cd build && cmake $CMAKE_OPTIONS .. && make
-```
-
-Here `$CMAKE_OPTIONS` are options to control the build. Along with the standard
-cmake options such as `CMAKE_INSTALL_PREFIX` or `CMAKE_BUILD_TYPE`,
-user can also pass Intel MKL-DNN specific ones:
+##### Threading
+Intel MKL-DNN is parallelized and can use the OpenMP or TBB threading runtime. OpenMP threading is the default build mode
+and is recommended for the best performance. TBB support is experimental. This behavior is controlled by the `MKLDNN_THREADING` option.
 
 |Option                 | Possible Values (defaults in bold)   | Description
 |:---                   |:---                                  | :---
-|MKLDNN_LIBRARY_TYPE    | **SHARED**, STATIC                   | Defines resulting library type
-|MKLDNN_THREADING       | **OMP**, OMP:INTEL, OMP:COMP, TBB    | Defines threading type
-|MKLDNN_USE_MKL         | **DEF**, NONE, ML, FULL, FULL:STATIC | Defines binary dependency on Intel MKL
-|WITH_EXAMPLE           | **ON**, OFF                          | Controls building examples
-|WITH_TEST              | **ON**, OFF                          | Controls building tests
-|ARCH_OPT_FLAGS (\*)    | *compiler flags*                     | Specifies compiler optimization flags
-|VTUNEROOT              | *path*                               | Enables integration with Intel(R) Vtune(tm) Amplifier
-
-Please check [cmake/options.cmake](cmake/options.cmake) for more options
-and details.
-
-> (\*) **WARNING**
->
-> By default Intel MKL-DNN is built specifically for the processor type of the
-> compiling machine (e.g. `-march=native` in case of GCC). While this option
-> gives better performance, the resulting library can only be run on systems
-> that are instruction-set compatible with the compiling machine.
+|MKLDNN_THREADING       | **OMP**, OMP:INTEL, OMP:COMP, TBB    | Defines the threading type
+
+##### OpenMP
+Intel MKL-DNN can use Intel, GNU or CLANG OpenMP runtime. Because different OpenMP runtimes may not be binary compatible,
+it's important to ensure that only one OpenMP runtime is used throughout the
+application. Having more than one OpenMP runtime initialized may lead to
+undefined behavior including incorrect results or crashes.
+
+Intel MKL-DNN library built with the binary dependency will link against the Intel OpenMP
+runtime included with the Intel MKL small libraries package. The Intel OpenMP runtime
+is binary compatible with the GNU OpenMP and Clang OpenMP runtimes and is
+recommended for the best performance results.
+
+Intel MKL-DNN library built standalone will use the OpenMP runtime supplied by
+the compiler, so as long as both the library and the application use the
+same compiler, the correct OpenMP runtime will be used.
+
+##### TBB
+TBB support is experimental. Intel MKL-DNN has limited optimizations done for Intel TBB and has some functional
+limitations if built with Intel TBB.
+
+Functional limitations:
+* Convolution with Winograd algorithm is not supported
+
+Performance limitations (mostly less parallelism than in case of OpenMP):
+* Batch normalization
+* Convolution backward by weights
+* mkldnn_sgemm
+
+> **WARNING**
 >
-> Hence if Intel MKL-DNN is to be shipped to other platforms (e.g. built by
-> Linux distribution maintainers) consider setting ARCH_OPT_FLAGS to "".
+> If the library is built with the full Intel MKL, the user is expected to set
+> the `MKL_THREADING_LAYER` environment variable to either `tbb` or `sequential` in order
+> to force Intel MKL to use Intel TBB for parallelization or to be sequential,
+> respectively. Without this setting, Intel MKL (RT library) tries
+> to use OpenMP for parallelization by default.
+
+#### Build on Linux/macOS
+Ensure that all software dependencies are in place and have at least the minimal
+supported version.
 
-Intel MKL-DNN includes unit tests implemented using the googletest framework. To validate your build, run:
+Configure CMake and create a makefile:
 
 ```
-	make test
+mkdir -p build && cd build && cmake $CMAKE_OPTIONS ..
 ```
 
-Documentation is provided inline and can be generated in HTML format with Doxygen:
+Build the application:
 
 ```
-	make doc
+make
 ```
 
-Documentation will reside in `build/reference/html` folder.
+The build can be validated with the unit-test suite:
 
-Finally,
 ```
-	make install
+ctest
 ```
-will place the  header files, libraries and documentation in `/usr/local`. To change
-the installation path, use the option `-DCMAKE_INSTALL_PREFIX=<prefix>` when invoking CMake.
 
-## Linking your application
+The reference manual is provided inline and can also be generated in HTML format with Doxygen:
 
-Intel MKL-DNN includes several header files providing C and C++ APIs for
-the functionality and one or several dynamic libraries depending on how
-Intel MKL-DNN was built. The minimal installation:
+```
+make doc
+```
 
-|File                   | Description
-|:---                   |:---
-|include/mkldnn.h       | C header
-|include/mkldnn.hpp     | C++ header
-|include/mkldnn_types.h | auxiliary C header
-|lib/libmkldnn.so       | Intel MKL-DNN dynamic library
-|lib/libmkldnn.a        | Intel MKL-DNN static library (if built with `MKLDNN_LIBRARY_TYPE=STATIC`)
+Documentation will reside in the `build/reference/html` folder.
 
+Finally:
 
-#### Intel MKL-DNN with OpenMP
+```
+make install
+```
 
-If Intel MKL-DNN is built with Intel MKL small libraries the following extra
-libraries would be installed:
+will place the header files, libraries, and documentation in `/usr/local`. To change
+the installation path, use the option `-DCMAKE_INSTALL_PREFIX=<prefix>` when invoking CMake.
 
-|File                   | Description
-|:---                   |:---
-|lib/libiomp5.so        | Intel OpenMP* runtime library
-|lib/libmklml_gnu.so    | Intel MKL small library for GNU* OpenMP runtime
-|lib/libmklml_intel.so  | Intel MKL small library for Intel(R) OpenMP runtime
+#### Build on Windows
+Ensure that all software dependencies are in place and have at least the minimal
+supported version.
 
-Intel MKL-DNN uses OpenMP\* for parallelism and requires an OpenMP runtime
-library to work. As different OpenMP runtimes may not be binary compatible
-it's important to ensure that only one OpenMP runtime is used throughout the
-application. Having more than one OpenMP runtime initialized may lead to
-undefined behavior resulting in incorrect results or crashes.
+> **NOTE**
+>
+> Building Intel MKL-DNN from a terminal requires using either the Intel Parallel Studio command prompt
+> or the Microsoft\* Visual Studio\* developer command prompt instead of the default Windows command prompt.
+>
+> The Intel(R) Parallel Studio command prompt is an item in the **Start** menu in the **Intel Parallel Studio
+> \<version\>** folder that has a Windows Command Prompt icon and a name like **Compiler 18.0 Update 5…**.
+>
+> The default for building the project for the Intel C++ Compiler is to use the Intel
+> Parallel Studio developer command prompt.
+
+Configure CMake and create a Microsoft Visual Studio solution:
 
-Intel MKL-DNN library built with binary dependency will link against Intel OpenMP
-runtime included with Intel MKL small libraries package. Intel OpenMP runtime
-is binary compatible with GNU OpenMP and CLANG OpenMP runtimes and is 
-recommended for the best performance results. Here are example linklines for 
-GNU C++ compiler and Intel C++ compiler.
-```
-	g++ -std=c++11 -I${MKLDNNROOT}/include -L${MKLDNNROOT}/lib simple_net.cpp -lmkldnn -lmklml_intel -liomp5
-```
 ```
-	icpc -std=c++11 -qopenmp -I${MKLDNNROOT}/include -L${MKLDNNROOT}/lib simple_net.cpp -lmkldnn -lmklml_intel
+mkdir build & cd build && cmake -G "Visual Studio 15 2017 Win64" ..
 ```
-Using GNU compiler with `-fopenmp` and `-liomp5` options will link the 
-application with both Intel and GNU OpenMP runtime libraries. This will lead
-to undefined behavior of the application.
 
-Intel MKL-DNN library built standalone will use OpenMP runtime supplied by
-the compiler, so as long as both the library and the application use the
-same compiler correct OpenMP runtime will be used. 
+For the solution to use Intel C++ Compiler:
+
 ```
-	g++ -std=c++11 -fopenmp -I${MKLDNNROOT}/include -L${MKLDNNROOT}/lib simple_net.cpp -lmkldnn
+cmake -G "Visual Studio 15 2017 Win64" -T "Intel C++ Compiler 18.0" ..
 ```
+
+After you have built the initial project using CMake, you can then open the project with
+Microsoft Visual Studio and build from there. You can also use msbuild command-line tool
+to build from the command line:
+
 ```
-	icpc -std=c++11 -qopenmp -I${MKLDNNROOT}/include -L${MKLDNNROOT}/lib simple_net.cpp -lmkldnn
+msbuild "Intel(R) MKL-DNN.sln" /p:Configuration=Release [/t:rebuild] /m
 ```
+where the optional argument `/t:rebuild` rebuilds the project.
 
-#### Intel MKL-DNN with Intel TBB
+The build can be validated with the unit-test suite:
 
-Intel MKL-DNN built with Intel TBB doesn't require special handling:
 ```
-	g++ -std=c++11 -I${MKLDNNROOT}/include -L${MKLDNNROOT}/lib simple_net.cpp -lmkldnn -ltbb
+ctest
 ```
 
-Please note that Intel MKL-DNN requires Intel TBB 2017 or above.
-Also, Intel MKL-DNN has limited optimizations done for Intel TBB
-and has some functional limitations if built with Intel TBB.
+## Linking Your Application
 
-Functional limitations:
-* Convolution with Winograd algorithm is not supported
+### Linux/macOS
+Intel MKL-DNN includes several header files providing C and C++ APIs for
+the functionality and one or several dynamic libraries depending on how
+Intel MKL-DNN was built.
 
-Performance limitations (mostly less parallelism than in case of OpenMP):
-* Batch normalization
-* Convolution backward by weights
-* mkldnn_sgemm
+**Linux**
+
+|File                   | Description
+|:---                   |:---
+|include/mkldnn.h       | C header
+|include/mkldnn.hpp     | C++ header
+|include/mkldnn_types.h | Auxiliary C header
+|lib/libmkldnn.so       | Intel MKL-DNN dynamic library
+|lib/libmkldnn.a        | Intel MKL-DNN static library (if built with `MKLDNN_LIBRARY_TYPE=STATIC`)
+|lib/libiomp5.so        | Intel OpenMP\* runtime library (if built with `MKLDNN_USE_MKL=ML`)
+|lib/libmklml_gnu.so    | Intel MKL small library for GNU OpenMP runtime (if built with `MKLDNN_USE_MKL=ML`)
+|lib/libmklml_intel.so  | Intel MKL small library for Intel OpenMP runtime (if built with `MKLDNN_USE_MKL=ML`)
+
+**macOS**
+
+|File                     | Description
+|:---                     |:---
+|include/mkldnn.h         | C header
+|include/mkldnn.hpp       | C++ header
+|include/mkldnn_types.h   | Auxiliary C header
+|lib/libmkldnn.dylib      | Intel MKL-DNN dynamic library
+|lib/libmkldnn.a          | Intel MKL-DNN static library (if built with `MKLDNN_LIBRARY_TYPE=STATIC`)
+|lib/libiomp5.dylib       | Intel OpenMP\* runtime library (if built with `MKLDNN_USE_MKL=ML`)
+|lib/libmklml_gnu.dylib   | Intel MKL small library for GNU OpenMP runtime (if built with `MKLDNN_USE_MKL=ML`)
+|lib/libmklml_intel.dylib | Intel MKL small library for Intel OpenMP runtime (if built with `MKLDNN_USE_MKL=ML`)
+
+Linkline examples below assume that Intel MKL-DNN is installed in the directory
+defined in the MKLDNNROOT environment variable.
+
+```
+g++ -std=c++11 -I${MKLDNNROOT}/include -L${MKLDNNROOT}/lib simple_net.cpp -lmkldnn
+clang -std=c++11 -I${MKLDNNROOT}/include -L${MKLDNNROOT}/lib simple_net.cpp -lmkldnn
+icpc -std=c++11 -I${MKLDNNROOT}/include -L${MKLDNNROOT}/lib simple_net.cpp -lmkldnn
+```
 
 > **WARNING**
 >
-> If the library is built with full Intel MKL user is expected to set
-> `MKL_THREADING_LAYER` environment variable to either `tbb` or `sequential`
-> to force Intel MKL to use Intel TBB for parallelization or to be sequential
-> respectively. Without this setting Intel MKL (RT library) by default would
-> try to use OpenMP for parallelization.
+> Using the GNU compiler with the `-fopenmp` and `-liomp5` options will link the
+> application with both the Intel and GNU OpenMP runtime libraries. This will lead
+> to undefined behavior in the application.
+
+> **NOTE**
+>
+> Applications linked dynamically will resolve the dependencies at runtime. 
+> Make sure that the dependencies are available in the standard locations
+> defined by the operating system, in the locatons listed in `LD_LIBRARY_PATH` (Linux),
+> `DYLD_LIBRARY_PATH` (macOS) environment variables, or `rpath` mechanism.
+
+### Windows
+Intel MKL-DNN includes several header files providing C and C++ APIs for
+the functionality and one or several dynamic libraries depending on how
+Intel MKL-DNN was built.
+
+|File                   | Description
+|:---                   |:---
+|bin\libmkldnn.dll      | Intel MKL-DNN dynamic library
+|bin\libiomp5.dll       | Intel OpenMP\* runtime library (if built with `MKLDNN_USE_MKL=ML`)
+|bin\libmklml.dll       | Intel MKL small library (if built with `MKLDNN_USE_MKL=ML`)
+|include\mkldnn.h       | C header
+|include\mkldnn.hpp     | C++ header
+|include\mkldnn_types.h | Auxiliary C header
+|lib\libmkldnn.lib      | Intel MKL-DNN import library
+|lib\libiomp5.lib       | Intel OpenMP\* runtime import library (if built with `MKLDNN_USE_MKL=ML`)
+|lib\libmklml.lib       | Intel MKL small library import library (if built with `MKLDNN_USE_MKL=ML`)
+
+To link the application from the command line, set up the `LIB` and `INCLUDE` environment variables to point to the locations of 
+the Intel MKL-DNN headers and libraries. The Linkline examples below assume that Intel MKL-DNN is installed in the directory
+defined in the MKLDNNROOT environment variable. 
+
+```
+set INCLUDE=%MKLDNNROOT%\include;%INCLUDE%
+set LIB=%MKLDNNROOT%\lib;%LIB%
+icl /Qstd=c++11 /qopenmp simple_net.cpp mkldnn.lib
+cl simple_net.cpp mkldnn.lib
+```
+
+Refer to [Microsoft Visual Studio documentation](https://docs.microsoft.com/en-us/cpp/build/walkthrough-creating-and-using-a-dynamic-link-library-cpp?view=vs-2017)
+on linking the application using MSVS solutions.
+
+> **NOTE**
+> Applications linked dynamically will resolve the dependencies at runtime.
+> Make sure that the dependencies are available in the standard locations
+> defined by the operating system or in the locatons listed in the `PATH` environment variable.
 
 --------
 
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/Doxygen.cmake b/inference-engine/thirdparty/mkl-dnn/cmake/Doxygen.cmake
index b6ed79a1c..d23c617f7 100644
--- a/inference-engine/thirdparty/mkl-dnn/cmake/Doxygen.cmake
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/Doxygen.cmake
@@ -35,11 +35,11 @@ if(DOXYGEN_FOUND)
         ${CMAKE_CURRENT_BINARY_DIR}/header.html
         @ONLY)
     file(GLOB_RECURSE HEADERS
-        ${CMAKE_SOURCE_DIR}/include/*.h
-        ${CMAKE_SOURCE_DIR}/include/*.hpp
+        ${PROJECT_SOURCE_DIR}/include/*.h
+        ${PROJECT_SOURCE_DIR}/include/*.hpp
         )
     file(GLOB_RECURSE DOX
-        ${CMAKE_SOURCE_DIR}/doc/*
+        ${PROJECT_SOURCE_DIR}/doc/*
         )
     add_custom_command(
         OUTPUT ${DOXYGEN_STAMP_FILE}
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/MKL.cmake b/inference-engine/thirdparty/mkl-dnn/cmake/MKL.cmake
index bb020595b..554bbd32f 100644
--- a/inference-engine/thirdparty/mkl-dnn/cmake/MKL.cmake
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/MKL.cmake
@@ -22,6 +22,8 @@ if(MKL_cmake_included)
     return()
 endif()
 set(MKL_cmake_included true)
+include("cmake/utils.cmake")
+include("cmake/options.cmake")
 
 # set SKIP_THIS_MKL to true if given configuration is not supported
 function(maybe_skip_this_mkl LIBNAME)
@@ -168,33 +170,18 @@ function(detect_mkl LIBNAME)
     string(FIND "${MKLLIBPATH}" ${CMAKE_CURRENT_SOURCE_DIR}/external __idx)
     if(${__idx} EQUAL 0)
         if(WIN32)
-            if(MINGW)
-                # We need to install *.dll into bin/ instead of lib/.
-                install(PROGRAMS ${MKLDLL} DESTINATION bin)
-            else()
-                install(PROGRAMS ${MKLDLL} DESTINATION lib)
-            endif()
+            install(PROGRAMS ${MKLDLL} ${MKLIOMP5DLL}
+                DESTINATION ${CMAKE_INSTALL_BINDIR})
         else()
-            install(PROGRAMS ${MKLLIB} DESTINATION lib)
-        endif()
-        if(MKLIOMP5LIB)
-            if(WIN32)
-                if(MINGW)
-                    # We need to install *.dll into bin/ instead of lib/.
-                    install(PROGRAMS ${MKLIOMP5DLL} DESTINATION bin)
-                else()
-                    install(PROGRAMS ${MKLIOMP5DLL} DESTINATION lib)
-                endif()
-            else()
-                install(PROGRAMS ${MKLIOMP5LIB} DESTINATION lib)
-            endif()
+            install(PROGRAMS ${MKLLIB} ${MKLIOMP5LIB}
+                DESTINATION ${CMAKE_INSTALL_LIBDIR})
         endif()
     endif()
 
     if(WIN32)
         # Add paths to DLL to %PATH% on Windows
         get_filename_component(MKLDLLPATH "${MKLDLL}" PATH)
-        set(CTESTCONFIG_PATH "${CTESTCONFIG_PATH}\;${MKLDLLPATH}")
+        append_to_windows_path_list(CTESTCONFIG_PATH "${MKLDLLPATH}")
         set(CTESTCONFIG_PATH "${CTESTCONFIG_PATH}" PARENT_SCOPE)
     endif()
 
@@ -203,6 +190,11 @@ function(detect_mkl LIBNAME)
     set(MKLINC ${MKLINC} PARENT_SCOPE)
     set(MKLLIB "${MKLLIB}" PARENT_SCOPE)
     set(MKLDLL "${MKLDLL}" PARENT_SCOPE)
+    if(LIBNAME MATCHES "mklml")
+        set(MKLDNN_USES_MKL "MKLML:SHARED" PARENT_SCOPE)
+    else()
+        set(MKLDNN_USES_MKL "FULL:SHARED" PARENT_SCOPE)
+    endif()
 
     set(MKLIOMP5LIB "${MKLIOMP5LIB}" PARENT_SCOPE)
     set(MKLIOMP5DLL "${MKLIOMP5DLL}" PARENT_SCOPE)
@@ -232,20 +224,25 @@ function(set_static_mkl_libs libpath)
     set(MKLLIB "${MKLLIB}" PARENT_SCOPE)
 endfunction()
 
+set(MKLDNN_USES_MKL "")
 detect_mkl("mklml_intel")
 detect_mkl("mklml_gnu")
 detect_mkl("mklml")
 detect_mkl("mkl_rt")
-if (MKLDNN_USE_MKL STREQUAL "FULL:STATIC" AND HAVE_MKL)
-    set(MKLDLL "")
-    get_filename_component(MKLLIBPATH "${MKLLIB}" PATH)
-    set_static_mkl_libs(${MKLLIBPATH})
-endif ()
 
 if(HAVE_MKL)
+    if (MKLDNN_USE_MKL STREQUAL "FULL:STATIC")
+        set(MKLDLL "")
+        get_filename_component(MKLLIBPATH "${MKLLIB}" PATH)
+        set_static_mkl_libs(${MKLLIBPATH})
+        list(APPEND EXTRA_STATIC_LIBS ${MKLLIB})
+        set(MKLDNN_USES_MKL "FULL:STATIC")
+    else()
+        list(APPEND EXTRA_SHARED_LIBS ${MKLLIB})
+    endif()
+
     add_definitions(-DUSE_MKL -DUSE_CBLAS)
     include_directories(AFTER ${MKLINC})
-    list(APPEND mkldnn_LINKER_LIBS ${MKLLIB})
 
     set(MSG "Intel(R) MKL:")
     message(STATUS "${MSG} include ${MKLINC}")
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/OpenMP.cmake b/inference-engine/thirdparty/mkl-dnn/cmake/OpenMP.cmake
index f9c3620eb..086c9c203 100644
--- a/inference-engine/thirdparty/mkl-dnn/cmake/OpenMP.cmake
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/OpenMP.cmake
@@ -21,10 +21,11 @@ if(OpenMP_cmake_included)
     return()
 endif()
 set(OpenMP_cmake_included true)
-
 include("cmake/Threading.cmake")
 include("cmake/MKL.cmake")
 
+set(MKLDNN_USES_INTEL_OPENMP FALSE)
+
 if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
     # OSX Clang doesn't have OpenMP by default.
     # But we still want to build the library.
@@ -33,13 +34,16 @@ else()
     set(_omp_severity "FATAL_ERROR")
 endif()
 
-
 macro(forbid_link_compiler_omp_rt)
     if (NOT WIN32)
-        set_if(OpenMP_C_FOUND CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OpenMP_C_FLAGS})
-        set_if(OpenMP_CXX_FOUND CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS ${OpenMP_CXX_FLAGS})
+        set_if(OpenMP_C_FOUND
+            CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
+            "${OpenMP_C_FLAGS}")
+        set_if(OpenMP_CXX_FOUND
+            CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS
+            "${OpenMP_CXX_FLAGS}")
         if (NOT APPLE)
-            set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--as-needed")
+            append(CMAKE_SHARED_LINKER_FLAGS "-Wl,--as-needed")
         endif()
     endif()
 endmacro()
@@ -47,30 +51,33 @@ endmacro()
 macro(use_intel_omp_rt)
     # fast return
     if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+        set(MKLDNN_USES_INTEL_OPENMP TRUE)
         return()
     endif()
 
     # Do not link with compiler-native OpenMP library if Intel MKL is present.
     # Rationale: Intel MKL comes with Intel OpenMP library which is compatible
     # with all libraries shipped with compilers that Intel MKL-DNN supports.
-    if(HAVE_MKL)
+    get_filename_component(MKLIOMP5LIB "${MKLIOMP5LIB}" PATH)
+    find_library(IOMP5LIB
+                NAMES "iomp5" "iomp5md" "libiomp5" "libiomp5md"
+                HINTS  ${MKLIOMP5LIB} )
+    if(IOMP5LIB)
         forbid_link_compiler_omp_rt()
-        if (UNIX AND NOT APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-            # For some reasons Clang ignores `-fopenmp=libiomp5` switch and
-            # links against libomp.so anyways.
-            # The workaround is to set the full path to libiomp5.so
-            add_library(libiomp5 SHARED IMPORTED)
-            set_property(TARGET libiomp5 PROPERTY IMPORTED_LOCATION "${MKLIOMP5LIB}")
-            list(APPEND EXTRA_LIBS libiomp5)
-        else()
-            list(APPEND EXTRA_LIBS ${MKLIOMP5LIB})
+        if (WIN32)
+            get_filename_component(MKLIOMP5DLL "${MKLIOMP5DLL}" PATH)
+            find_file(IOMP5DLL
+                NAMES "libiomp5.dll" "libiomp5md.dll"
+                HINTS ${MKLIOMP5DLL})
         endif()
+        list(APPEND EXTRA_SHARED_LIBS ${IOMP5LIB})
     else()
         if (MKLDNN_THREADING STREQUAL "OMP:INTEL")
             message(${_omp_severity} "Intel OpenMP runtime could not be found. "
                 "Please either use OpenMP runtime that comes with the compiler "
                 "(via -DMKLDNN_THREADING={OMP,OMP:COMP}), or "
-                "install Intel MKL / Intel MKL-ML (e.g. scripts/prepare_mkl.sh)")
+                "explicitely provide the path to libiomp with the "
+                "-DCMAKE_LIBRARY_PATH option")
         endif()
     endif()
 endmacro()
@@ -83,7 +90,7 @@ elseif(MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
     append(CMAKE_C_FLAGS "-Xclang -fopenmp")
     append(CMAKE_CXX_FLAGS "-Xclang -fopenmp")
     set(OpenMP_CXX_FOUND true)
-    list(APPEND EXTRA_LIBS ${MKLIOMP5LIB})
+    list(APPEND EXTRA_SHARED_LIBS ${IOMP5LIB})
 else()
     find_package(OpenMP)
     #newer version for findOpenMP (>= v. 3.9)
@@ -96,24 +103,29 @@ else()
         set(OpenMP_C_FOUND true)
         set(OpenMP_CXX_FOUND true)
     endif()
-    append_if(OpenMP_C_FOUND CMAKE_C_FLAGS "${OpenMP_C_FLAGS}")
-    append_if(OpenMP_CXX_FOUND CMAKE_CXX_FLAGS "${OpenMP_CXX_FLAGS}")
+    append_if(OpenMP_C_FOUND CMAKE_SRC_CCXX_FLAGS "${OpenMP_C_FLAGS}")
 endif()
 
 if (MKLDNN_THREADING MATCHES "OMP")
     if (OpenMP_CXX_FOUND)
         set_threading("OMP")
+        append(CMAKE_TEST_CCXX_FLAGS "${OpenMP_CXX_FLAGS}")
+        append(CMAKE_EXAMPLE_CCXX_FLAGS "${OpenMP_CXX_FLAGS}")
     else()
         message(${_omp_severity} "OpenMP library could not be found. "
             "Proceeding might lead to highly sub-optimal performance.")
     endif()
 
     if (MKLDNN_THREADING STREQUAL "OMP:COMP")
-        set(MKLIOMP5LIB "")
-        set(MKLIOMP5DLL "")
+        set(IOMP5LIB "")
+        set(IOMP5DLL "")
     else()
         use_intel_omp_rt()
     endif()
+
+    if(MKLIOMP5LIB)
+        set(MKLDNN_USES_INTEL_OPENMP TRUE)
+    endif()
 else()
     # Compilation happens with OpenMP to enable `#pragma omp simd`
     # but during linkage OpenMP dependency should be avoided
@@ -121,9 +133,9 @@ else()
     return()
 endif()
 
-set_ternary(_omp_lib_msg MKLIOMP5LIB "${MKLIOMP5LIB}" "provided by compiler")
+set_ternary(_omp_lib_msg IOMP5LIB "${IOMP5LIB}" "provided by compiler")
 message(STATUS "OpenMP lib: ${_omp_lib_msg}")
 if(WIN32)
-    set_ternary(_omp_dll_msg MKLIOMP5DLL "${MKLIOMP5LIB}" "provided by compiler")
+    set_ternary(_omp_dll_msg IOMP5DLL "${IOMP5LIB}" "provided by compiler")
     message(STATUS "OpenMP dll: ${_omp_dll_msg}")
 endif()
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/SDL.cmake b/inference-engine/thirdparty/mkl-dnn/cmake/SDL.cmake
index b494a0fc0..c4e0ab489 100644
--- a/inference-engine/thirdparty/mkl-dnn/cmake/SDL.cmake
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/SDL.cmake
@@ -21,16 +21,17 @@ if(SDL_cmake_included)
     return()
 endif()
 set(SDL_cmake_included true)
+include("cmake/utils.cmake")
 
 if(UNIX)
     set(CMAKE_CCXX_FLAGS "-fPIC -Wformat -Wformat-security")
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -D_FORTIFY_SOURCE=2")
-    set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -D_FORTIFY_SOURCE=2")
+    append(CMAKE_CXX_FLAGS_RELEASE "-D_FORTIFY_SOURCE=2")
+    append(CMAKE_C_FLAGS_RELEASE "-D_FORTIFY_SOURCE=2")
     if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
         if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9)
-            set(CMAKE_CCXX_FLAGS "${CMAKE_CCXX_FLAGS} -fstack-protector-all")
+            append(CMAKE_CCXX_FLAGS "-fstack-protector-all")
         else()
-            set(CMAKE_CCXX_FLAGS "${CMAKE_CCXX_FLAGS} -fstack-protector-strong")
+            append(CMAKE_CCXX_FLAGS "-fstack-protector-strong")
         endif()
 
         # GCC might be very paranoid for partial structure initialization, e.g.
@@ -39,21 +40,21 @@ if(UNIX)
         # only. To prevent warnings on users' side who use the library and turn
         # this warning on, let's use it too. Applicable for the library sources
         # and interfaces only (tests currently rely on that fact heavily)
-        set(CMAKE_SRC_CCXX_FLAGS "${CMAKE_SRC_CCXX_FLAGS} -Wmissing-field-initializers")
-        set(CMAKE_EXAMPLE_CCXX_FLAGS "${CMAKE_EXAMPLE_CCXX_FLAGS} -Wmissing-field-initializers")
+        append(CMAKE_SRC_CCXX_FLAGS "-Wmissing-field-initializers")
+        append(CMAKE_EXAMPLE_CCXX_FLAGS "-Wmissing-field-initializers")
     elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-        set(CMAKE_CCXX_FLAGS "${CMAKE_CCXX_FLAGS} -fstack-protector-all")
+        append(CMAKE_CCXX_FLAGS "-fstack-protector-all")
     elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstack-protector")
+        append(CMAKE_CXX_FLAGS "-fstack-protector")
     endif()
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_CCXX_FLAGS}")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CCXX_FLAGS}")
+    append(CMAKE_C_FLAGS "${CMAKE_CCXX_FLAGS}")
+    append(CMAKE_CXX_FLAGS "${CMAKE_CCXX_FLAGS}")
     if(APPLE)
-        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-bind_at_load")
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-bind_at_load")
+        append(CMAKE_SHARED_LINKER_FLAGS "-Wl,-bind_at_load")
+        append(CMAKE_EXE_LINKER_FLAGS "-Wl,-bind_at_load")
     else()
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie")
-        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now")
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now")
+        append(CMAKE_EXE_LINKER_FLAGS "-pie")
+        append(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now")
+        append(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now")
     endif()
 endif()
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/TBB.cmake b/inference-engine/thirdparty/mkl-dnn/cmake/TBB.cmake
index fb0cdc1e7..fe24e09b7 100644
--- a/inference-engine/thirdparty/mkl-dnn/cmake/TBB.cmake
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/TBB.cmake
@@ -21,6 +21,7 @@ if(TBB_cmake_included)
     return()
 endif()
 set(TBB_cmake_included true)
+include("cmake/Threading.cmake")
 
 if(NOT MKLDNN_THREADING STREQUAL "TBB")
     return()
@@ -43,6 +44,6 @@ elseif(UNIX)
 endif()
 
 set_threading("TBB")
-list(APPEND mkldnn_LINKER_LIBS ${TBB_IMPORTED_TARGETS})
+list(APPEND EXTRA_SHARED_LIBS ${TBB_IMPORTED_TARGETS})
 
 message(STATUS "Intel(R) TBB: ${TBBROOT}")
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/config.cmake.in b/inference-engine/thirdparty/mkl-dnn/cmake/config.cmake.in
new file mode 100644
index 000000000..53b7032f2
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/config.cmake.in
@@ -0,0 +1,6 @@
+@PACKAGE_INIT@
+include("${CMAKE_CURRENT_LIST_DIR}/@LIB_EXPORT_NAME@.cmake")
+set(MKLDNN_THREADING "@MKLDNN_THREADING@")
+set(MKLDNN_USES_INTEL_OPENMP @MKLDNN_USES_INTEL_OPENMP@)
+set(MKLDNN_USES_MKL "@MKLDNN_USES_MKL@")
+check_required_components("@LIB_NAME@")
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/options.cmake b/inference-engine/thirdparty/mkl-dnn/cmake/options.cmake
index e6ff2498e..2f7697028 100644
--- a/inference-engine/thirdparty/mkl-dnn/cmake/options.cmake
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/options.cmake
@@ -128,6 +128,15 @@ set(VTUNEROOT "" CACHE STRING
 # Miscellaneous
 # =============
 
+option(BENCHDNN_USE_RDPMC
+    "enables rdpms counter to report precise cpu frequency in benchdnn.
+     CAUTION: may not work on all cpus (hence disabled by default)"
+    OFF) # disabled by default
+
+# =============
+# Developer flags
+# =============
+
 set(MKLDNN_USE_CLANG_SANITIZER "" CACHE STRING
     "instructs build system to use a Clang sanitizer. Possible values:
     Address: enables MemorySanitizer
@@ -136,8 +145,7 @@ set(MKLDNN_USE_CLANG_SANITIZER "" CACHE STRING
     Undefined: enables UndefinedBehaviourSanitizer
     This feature is experimental and is only available on Linux.")
 
-
-option(BENCHDNN_USE_RDPMC
-    "enables rdpms counter to report precise cpu frequency in benchdnn.
-     CAUTION: may not work on all cpus (hence disabled by default)"
-    OFF) # disabled by default
+option(MKLDNN_PRODUCT_BUILD_MODE
+    "Enables/disables product build mode. For example,
+    setting MKLDNN_PRODUCT_BUILD_MODE=OFF makes warnings non-fatal"
+    ON)
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/platform.cmake b/inference-engine/thirdparty/mkl-dnn/cmake/platform.cmake
index 3597970a2..a5412150d 100644
--- a/inference-engine/thirdparty/mkl-dnn/cmake/platform.cmake
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/platform.cmake
@@ -22,6 +22,8 @@ if(platform_cmake_included)
 endif()
 set(platform_cmake_included true)
 
+include("cmake/utils.cmake")
+
 add_definitions(-DMKLDNN_DLL -DMKLDNN_DLL_EXPORTS)
 
 # UNIT8_MAX-like macros are a part of the C99 standard and not a part of the
@@ -50,6 +52,8 @@ if(MSVC)
         set(DEF_ARCH_OPT_FLAGS "-QxHOST")
         # disable: loop was not vectorized with "simd"
         append(CMAKE_CCXX_NOWARN_FLAGS "-Qdiag-disable:15552")
+        # disable: unknown pragma
+        append(CMAKE_CCXX_NOWARN_FLAGS "-Qdiag-disable:3180")
     endif()
     if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
         # Clang cannot vectorize some loops with #pragma omp simd and gets
@@ -58,7 +62,8 @@ if(MSVC)
         append(CMAKE_CCXX_FLAGS "-Wno-pass-failed")
     endif()
 elseif(UNIX OR MINGW)
-    append(CMAKE_CCXX_FLAGS "-Wall -Werror -Wno-unknown-pragmas")
+    append(CMAKE_CCXX_FLAGS "-Wall -Wno-unknown-pragmas")
+    append_if_product(CMAKE_CCXX_FLAGS "-Werror")
     append(CMAKE_CCXX_FLAGS "-fvisibility=internal")
     append(CMAKE_C_FLAGS "-std=c99")
     append(CMAKE_CXX_FLAGS "-std=c++11 -fvisibility-inlines-hidden")
@@ -125,11 +130,6 @@ elseif(UNIX OR MINGW)
     endif()
 endif()
 
-if(WIN32)
-    string(REPLACE ";" "\;" ENV_PATH "$ENV{PATH}")
-    set(CTESTCONFIG_PATH "${CTESTCONFIG_PATH}\;${MKLDLLPATH}\;${ENV_PATH}")
-endif()
-
 if(UNIX OR MINGW)
     if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
         # Link Intel libraries statically (except for iomp5)
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/profiling.cmake b/inference-engine/thirdparty/mkl-dnn/cmake/profiling.cmake
index c531d84c6..846135cf0 100644
--- a/inference-engine/thirdparty/mkl-dnn/cmake/profiling.cmake
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/profiling.cmake
@@ -23,6 +23,6 @@ if("${VTUNEROOT}" STREQUAL "")
     message(STATUS "VTune profiling environment is unset")
 else()
     set_ternary(JITPROFLIB MSVC "jitprofiling.lib" "libjitprofiling.a")
-    list(APPEND EXTRA_LIBS "${VTUNEROOT}/lib64/${JITPROFLIB}")
+    list(APPEND EXTRA_STATIC_LIBS "${VTUNEROOT}/lib64/${JITPROFLIB}")
     message(STATUS "VTune profiling environment is set")
 endif()
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/template.vcxproj.user b/inference-engine/thirdparty/mkl-dnn/cmake/template.vcxproj.user
new file mode 100644
index 000000000..68b6c8615
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/template.vcxproj.user
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup>
+    <LocalDebuggerEnvironment>PATH=@CTESTCONFIG_PATH@;$(PATH)</LocalDebuggerEnvironment>
+    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
+  </PropertyGroup>
+</Project>
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/utils.cmake b/inference-engine/thirdparty/mkl-dnn/cmake/utils.cmake
index d8680b71a..867ec08aa 100644
--- a/inference-engine/thirdparty/mkl-dnn/cmake/utils.cmake
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/utils.cmake
@@ -21,6 +21,17 @@ if(utils_cmake_included)
     return()
 endif()
 set(utils_cmake_included true)
+include("cmake/options.cmake")
+
+# Common configuration for tests / test cases on Windows
+function(maybe_configure_windows_test name kind)
+    if(WIN32 OR MINGW)
+        string(REPLACE  ";" "\;" PATH "${CTESTCONFIG_PATH};$ENV{PATH}")
+        set_property(${kind} ${name} PROPERTY ENVIRONMENT "PATH=${PATH}")
+        configure_file(${PROJECT_SOURCE_DIR}/cmake/template.vcxproj.user
+            ${name}.vcxproj.user @ONLY)
+    endif()
+endfunction()
 
 # Register new executable/test
 #   name -- name of the executable
@@ -29,13 +40,10 @@ set(utils_cmake_included true)
 #   arg4 -- (optional) list of extra library dependencies
 function(register_exe name srcs test)
     add_executable(${name} ${srcs})
-    target_link_libraries(${name} ${LIB_NAME} ${EXTRA_LIBS} ${ARGV3})
+    target_link_libraries(${name} ${LIB_NAME} ${EXTRA_SHARED_LIBS} ${ARGV3})
     if("${test}" STREQUAL "test")
         add_test(${name} ${name})
-        if(WIN32 OR MINGW)
-            set_property(TEST ${name} PROPERTY ENVIRONMENT "PATH=${CTESTCONFIG_PATH};$ENV{PATH}")
-            configure_file(${CMAKE_SOURCE_DIR}/config_template.vcxproj.user ${name}.vcxproj.user @ONLY)
-        endif()
+        maybe_configure_windows_test(${name} TEST)
     endif()
 endfunction()
 
@@ -45,6 +53,20 @@ macro(append var value)
     set(${var} "${${var}} ${value}")
 endmacro()
 
+# Append to a variable if building a product build (as opposed to a developer
+# build that is detected via the MKLDNN_PRODUCT_BUILD_MODE option)
+macro(append_if_product var value)
+    if(MKLDNN_PRODUCT_BUILD_MODE)
+        append(${var} "${value}")
+    endif()
+endmacro()
+
+if(MKLDNN_PRODUCT_BUILD_MODE)
+    message(STATUS "This is a product build")
+else()
+    message(WARNING "This is a developer build")
+endif()
+
 # Set variable depending on condition:
 #   var = cond ? val_if_true : val_if_false
 macro(set_ternary var condition val_if_true val_if_false)
@@ -70,3 +92,32 @@ macro(append_if condition var value)
         append(${var} "${value}")
     endif()
 endmacro()
+
+# Append a path to path_list variable (Windows-only version)
+macro(append_to_windows_path_list path_list path)
+    file(TO_NATIVE_PATH "${path}" append_to_windows_path_list_tmp__)
+    if(${path_list})
+        set(${path_list}
+            "${${path_list}};${append_to_windows_path_list_tmp__}")
+    else()
+        set(${path_list}
+            "${append_to_windows_path_list_tmp__}")
+    endif()
+endmacro()
+
+function(target_link_libraries_private target list)
+    # Foreach is required for compatibility with 2.8.11 ways
+    foreach(lib ${list})
+        target_link_libraries(${target} LINK_PRIVATE
+            "$<BUILD_INTERFACE:${lib}>")
+    endforeach(lib)
+endfunction()
+
+function(target_link_libraries_public target list)
+    # Foreach is required for compatibility with 2.8.11 ways
+    foreach(lib ${list})
+        get_filename_component(base "${lib}" NAME)
+        target_link_libraries(${target} LINK_PUBLIC
+            "$<INSTALL_INTERFACE:${base}>")
+    endforeach(lib)
+endfunction()
diff --git a/inference-engine/thirdparty/mkl-dnn/cmake/version.cmake b/inference-engine/thirdparty/mkl-dnn/cmake/version.cmake
new file mode 100644
index 000000000..4591880ab
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/cmake/version.cmake
@@ -0,0 +1,46 @@
+#===============================================================================
+# Copyright 2019 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+# Control generating version file
+#===============================================================================
+
+if(version_cmake_included)
+    return()
+endif()
+set(version_cmake_included true)
+
+string(REPLACE "." ";" VERSION_LIST ${PROJECT_VERSION})
+list(GET VERSION_LIST 0 MKLDNN_VERSION_MAJOR)
+list(GET VERSION_LIST 1 MKLDNN_VERSION_MINOR)
+list(GET VERSION_LIST 2 MKLDNN_VERSION_PATCH)
+
+find_package(Git)
+if (GIT_FOUND)
+    execute_process(COMMAND ${GIT_EXECUTABLE} log -1 --format=%H
+        WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+        RESULT_VARIABLE RESULT
+        OUTPUT_VARIABLE MKLDNN_VERSION_HASH
+        OUTPUT_STRIP_TRAILING_WHITESPACE)
+endif()
+
+if(NOT GIT_FOUND OR RESULT)
+    set(MKLDNN_VERSION_HASH "N/A")
+endif()
+
+configure_file(
+    "${PROJECT_SOURCE_DIR}/include/mkldnn_version.h.in"
+    "${PROJECT_BINARY_DIR}/include/mkldnn_version.h"
+)
diff --git a/inference-engine/thirdparty/mkl-dnn/doc/Doxyfile.in b/inference-engine/thirdparty/mkl-dnn/doc/Doxyfile.in
index d1c466ce3..8c38fd97e 100644
--- a/inference-engine/thirdparty/mkl-dnn/doc/Doxyfile.in
+++ b/inference-engine/thirdparty/mkl-dnn/doc/Doxyfile.in
@@ -158,7 +158,7 @@ FULL_PATH_NAMES        = YES
 # will be relative from the directory where doxygen is started.
 # This tag requires that the tag FULL_PATH_NAMES is set to YES.
 
-STRIP_FROM_PATH        = @CMAKE_SOURCE_DIR@
+STRIP_FROM_PATH        = @PROJECT_SOURCE_DIR@
 
 # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
 # path mentioned in the documentation of a class, which tells the reader which
diff --git a/inference-engine/thirdparty/mkl-dnn/doc/ex_simplenet.md b/inference-engine/thirdparty/mkl-dnn/doc/ex_simplenet.md
index ef6077582..ced53d8aa 100644
--- a/inference-engine/thirdparty/mkl-dnn/doc/ex_simplenet.md
+++ b/inference-engine/thirdparty/mkl-dnn/doc/ex_simplenet.md
@@ -59,7 +59,7 @@ auto conv1_src_md = memory::desc({conv1_src_tz},
 /* similarly create conv_weights_md and conv_dst_md in format::any */
 ~~~
 
-6. Create a convolution descriptor by specifying the algorithm, propagation
+6. Create a convolution descriptor by specifying the algorithm([convolution algorithms](@ref winograd_convolution), propagation
    kind, shapes of input, weights, bias, output, convolution strides,
    padding, and kind of padding. Propagation kind is set to *forward_inference*
    -optimized for inference execution and omits computations that are only necessary
diff --git a/inference-engine/thirdparty/mkl-dnn/doc/mainpage.md b/inference-engine/thirdparty/mkl-dnn/doc/mainpage.md
index 2a0c7a834..fdec54923 100644
--- a/inference-engine/thirdparty/mkl-dnn/doc/mainpage.md
+++ b/inference-engine/thirdparty/mkl-dnn/doc/mainpage.md
@@ -26,9 +26,9 @@ The table below summarizes the list of supported functions and their variants.
 |                   | 3D direct deconvolution  | x             | x              |                |
 | Inner Product     | 2D inner product         | x             | x              | x              |
 |                   | 3D inner product         | x             | x              |                |
-| RNN (experimental)| Vanilla RNN cell         | x             | x              |                |
-|                   | LSTM cell                | x             | x              |                |
-|                   | GRU cell                 | x             | x              |                |
+| RNN               | Vanilla RNN              | x             | x              |                |
+|                   | LSTM                     | x             | x              | x              |
+|                   | GRU                      | x             | x              |                |
 | Pooling           | 2D maximum pooling       | x             | x              | x              |
 |                   | 2D average pooling       | x             | x              | x              |
 |                   | 3D maximum pooling       | x             | x              |                |
@@ -36,19 +36,22 @@ The table below summarizes the list of supported functions and their variants.
 | Normalization     | 2D LRN (within channel)  | x             | x              |                |
 |                   | 2D LRN (across channels) | x             | x              |                |
 |                   | 2D batch normalization   | x             | x              |                |
-|                   | 3D Batch Normalization   | x             | x              |                |
-| Activation        | ReLU                     | x             | x              | x              |
-|                   | Tanh                     |               | x              |                |
-|                   | ELU                      |               | x              |                |
-|                   | Bounded ReLU             |               | x              |                |
-|                   | Soft ReLU                |               | x              |                |
-|                   | Logistic regression      |               | x              |                |
+|                   | 3D batch normalization   | x             | x              |                |
+| Activation and    | ReLU                     | x             | x              | x              |
+| elementwise       | Tanh                     | x             | x              |                |
+| functions         | ELU                      | x             | x              |                |
+|                   | Square                   | x             | x              |                |
+|                   | Sqrt                     | x             | x              |                |
+|                   | Abs                      | x             | x              |                |
+|                   | Linear                   | x             | x              |                |
+|                   | Bounded ReLU             | x             | x              |                |
+|                   | Soft ReLU                | x             | x              |                |
+|                   | Logistic                 | x             | x              |                |
 |                   | Softmax                  | x             | x              |                |
 | Data manipulation | Reorder/quantization     | x             | x              | x              |
 |                   | Sum                      | x             | x              | x              |
 |                   | Concat                   | x             | x              | x              |
-|                   | Elementwise operations   |               | x              |                |
-|                   | Channel Shuffle          | x             | x              | x              |
+|                   | Shuffle                  | x             | x              | x              |
 
 ## Programming Model
 
@@ -140,7 +143,7 @@ The following examples are available in the /examples directory and provide more
     - C: simple_training.c
     - C++: simple_training_net.cpp
 
-* Creation of forward propagation of GNMT topology (experimental support)
+* Creation of forward propagation of GNMT topology
     - C++: simple_rnn.cpp
 
 * Training RNN with sequences of variable length
@@ -152,6 +155,7 @@ The following examples are available in the /examples directory and provide more
    format `any` for input or output.
    The memory format chosen is based on different circumstances such as hardware and
    convolutional parameters.
+*  Convolution could be executed using the [Winograd algorithm](@ref winograd_convolution) for a significant performance boost.
 *  Operation primitives (such as ReLU, LRN, or pooling) following convolution or
    inner product, should have input in the same memory format as the
    convolution or inner-product. Reordering can be an expensive
@@ -162,6 +166,7 @@ The following examples are available in the /examples directory and provide more
    might need workspace memory for storing results of intermediate operations
    that help with backward propagation.
 
+
 The following link provides a guide to MKLDNN verbose mode for profiling execution:
 
 * [Performance profiling](@ref perf_profile)
diff --git a/inference-engine/thirdparty/mkl-dnn/doc/perf_profile.md b/inference-engine/thirdparty/mkl-dnn/doc/perf_profile.md
index 7c36ffe48..d0c28bf25 100644
--- a/inference-engine/thirdparty/mkl-dnn/doc/perf_profile.md
+++ b/inference-engine/thirdparty/mkl-dnn/doc/perf_profile.md
@@ -90,39 +90,42 @@ To dump JIT-kernels set MKLDNN_JIT_DUMP environment variable to `1`. For example
 ```
 
 This will produce the following output files:
-    mkldnn_dump_jit_avx2_conv_fwd_kernel_f32.0.bin
-    mkldnn_dump_jit_uni_lrn_fwd_kernel_f32.2.bin
+
+    mkldnn_dump_jit_uni_reorder_kernel_f32.0.bin
+    mkldnn_dump_jit_avx2_conv_fwd_kernel_f32.1.bin
+    mkldnn_dump_jit_uni_relu_kernel_f32.2.bin
     mkldnn_dump_jit_uni_lrn_fwd_kernel_f32.3.bin
     mkldnn_dump_jit_uni_lrn_fwd_kernel_f32.4.bin
-    mkldnn_dump_jit_uni_pool_kernel_f32.5.bin
-    mkldnn_dump_jit_uni_relu_kernel_f32.1.bin
-    
+    mkldnn_dump_jit_uni_lrn_fwd_kernel_f32.5.bin
+    mkldnn_dump_jit_uni_reorder_kernel_f32.6.bin
+    mkldnn_dump_jit_uni_pool_kernel_f32.7.bin
+
 To open these files any disassembler can be used. For example:
 
 ```
-    $ xed -ir mkldnn_dump_jit_avx2_conv_fwd_kernel_f32.0.bin
-    XDIS 0: PUSH      BASE       53                       push ebx
-    XDIS 1: PUSH      BASE       55                       push ebp
-    XDIS 2: BINARY    BASE       41                       inc ecx
-    XDIS 3: PUSH      BASE       54                       push esp
-    XDIS 4: BINARY    BASE       41                       inc ecx
-    XDIS 5: PUSH      BASE       55                       push ebp
-    XDIS 6: BINARY    BASE       41                       inc ecx
-    XDIS 7: PUSH      BASE       56                       push esi
-    XDIS 8: BINARY    BASE       41                       inc ecx
-    XDIS 9: PUSH      BASE       57                       push edi
-    XDIS a: BINARY    BASE       48                       dec eax
-    XDIS b: DATAXFER  BASE       8B07                     mov eax, dword ptr [edi]
-    XDIS d: BINARY    BASE       48                       dec eax
-    XDIS e: DATAXFER  BASE       8B7708                   mov esi, dword ptr [edi+0x8]
-    XDIS 11: BINARY    BASE       48                       dec eax
-    XDIS 12: DATAXFER  BASE       8B5710                   mov edx, dword ptr [edi+0x10]
-    XDIS 15: BINARY    BASE       48                       dec eax
-    XDIS 16: DATAXFER  BASE       8B5F18                   mov ebx, dword ptr [edi+0x18]
-    XDIS 19: BINARY    BASE       48                       dec eax
-    XDIS 1a: DATAXFER  BASE       8B4F40                   mov ecx, dword ptr [edi+0x40]
-    XDIS 1d: BINARY    BASE       44                       inc esp
-    XDIS 1e: DATAXFER  BASE       8B6F70                   mov ebp, dword ptr [edi+0x70]
+    $ xed -64 -ir mkldnn_dump_jit_avx2_conv_fwd_kernel_f32.1.bin
+    XDIS 0: PUSH      BASE       53                       push rbx
+    XDIS 1: PUSH      BASE       55                       push rbp
+    XDIS 2: PUSH      BASE       4154                     push r12
+    XDIS 4: PUSH      BASE       4155                     push r13
+    XDIS 6: PUSH      BASE       4156                     push r14
+    XDIS 8: PUSH      BASE       4157                     push r15
+    XDIS a: DATAXFER  BASE       488B07                   mov rax, qword ptr [rdi]
+    XDIS d: DATAXFER  BASE       488B7708                 mov rsi, qword ptr [rdi+0x8]
+    XDIS 11: DATAXFER  BASE       488B5710                 mov rdx, qword ptr [rdi+0x10]
+    XDIS 15: DATAXFER  BASE       488B5F18                 mov rbx, qword ptr [rdi+0x18]
+    XDIS 19: DATAXFER  BASE       488B8F98000000           mov rcx, qword ptr [rdi+0x98]
+    XDIS 20: DATAXFER  BASE       448BAF00010000           mov r13d, dword ptr [rdi+0x100]
+    XDIS 27: DATAXFER  BASE       4C8BB7D0000000           mov r14, qword ptr [rdi+0xd0]
+    XDIS 2e: BINARY    BASE       4983FE04                 cmp r14, 0x4
+    XDIS 32: COND_BR   BASE       0F85EF030000             jnz 0x427
+    XDIS 38: LOGICAL   BASE       4D31DB                   xor r11, r11
+    XDIS 3b: LOGICAL   BASE       41F7C510000000           test r13d, 0x10
+    XDIS 42: COND_BR   BASE       0F8558000000             jnz 0xa0
+    XDIS 48: DATAXFER  AVX        C5FC1006                 vmovups ymm0, ymmword ptr [rsi]
+    XDIS 4c: DATAXFER  AVX        C5FC104E20               vmovups ymm1, ymmword ptr [rsi+0x20]
+    XDIS 51: DATAXFER  AVX        C5FC105640               vmovups ymm2, ymmword ptr [rsi+0x40]
+    XDIS 56: DATAXFER  AVX        C5FC109E207A0100         vmovups ymm3, ymmword ptr [rsi+0x17a20]
     ...
 ```
 
diff --git a/inference-engine/thirdparty/mkl-dnn/doc/winograd_convolution.md b/inference-engine/thirdparty/mkl-dnn/doc/winograd_convolution.md
new file mode 100644
index 000000000..bbe3ebed0
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/doc/winograd_convolution.md
@@ -0,0 +1,93 @@
+Winograd Convolution {#winograd_convolution}
+==========================================
+## Why use a different convolution algorithm?
+Executing convolution using the **Winograd algorithm** often gives a significant performance boost compared with using the **Direct algorithm**.
+Details about the algorithm can be found in [<b>Fast Algorithms for Convolutional Neural Networks by A. Lavin and S. Gray</b>](https://arxiv.org/abs/1509.09308).
+
+## Winograd in Intel(R) MKL-DNN
+Intel(R) MKL-DNN supports the **Winograd algorithm** for convolutions with the following sizes:
+* 2D convolution (i.e. spatial depth `d=1`)
+* kernel sizes `kh=3,kw=3`.
+* strides `sh=sw=1`.
+
+* **Inference** - Based on convolution sizes, MKLDNN chooses between two different tile sizes F(2x2, 3x3) or F(4x4, 3x3)(refer to [Winograd paper](https://arxiv.org/abs/1509.09308) for more informartion on tile sizes).
+* **Training** - Uses F(4x4, 3x3) winograd.
+
+Create a Winograd convolution by simply creating a convolution descriptor (step 6 in [SimpleNet Example](@ref ex_simplenet)) with right algorithm.
+The rest of the steps for creating convolution are exactly the same as shown in the example.
+~~~cpp
+auto conv1_desc = convolution_forward::desc(
+    prop_kind::forward_inference, algorithm::convolution_winograd,
+    conv1_src_md, conv1_weights_md, conv1_bias_md, conv1_dst_md,
+    conv1_strides, conv1_padding, padding_kind::zero);
+~~~
+
+## Auto dispatching of convolution algorithm
+Instead of choosing a convolution algorithm for each and every convolution in a topology, a user could simply ask MKLDNN to make the choice.
+
+Creating a convolution by using `convolution_auto` allows MKLDNN to dispatch the *best* algorithm. 
+~~~cpp
+auto conv1_desc = convolution_forward::desc(
+    prop_kind::forward_inference, algorithm::convolution_auto,
+    conv1_src_md, conv1_weights_md, conv1_bias_md, conv1_dst_md,
+    conv1_strides, conv1_padding, padding_kind::zero);
+~~~
+
+MKLDNN would choose the algorithm which will potentially give *best performance* based on
+* convolution dimensions
+* number of logical processors available. (For auto-dispatching to work as intended,
+        use the same thread affinity settings when creating the convolution as when executing the convolution.)
+*The relationship between convolution sizes and the best performing algorithm is empirically based on performance observations*
+
+### Example using benchdnn
+The following examples use [<b>benchdnn</b>](https://github.com/intel/mkl-dnn/tree/master/tests/benchdnn) to illustrate the performance benefits of using `convolution_auto`.
+
+On a 2 Socket Intel Xeon 8180 processor with 28 cores/socket and HT off:
+~~~sh
+OMP_NUM_THREADS=56 KMP_AFFINITY=granularity=fine,compact numactl -l tests/benchdnn/benchdnn --mode=p --conv -v5 --alg=auto --dir=BWD_WB mb112ic64ih300oc64oh300kh3ph1n"ssd_300_voc0712:conv1_2"
+
+mkldnn implementation: jit_wino_4x3:avx512_core
+...
+mkldnn_verbose,exec,convolution,jit_wino_4x3:avx512_core,backward_weights,fsrc:nChw16c fwei:gOIhw16i16o fbia:x fdst:nChw16c,alg:convolution_winograd,mb112_g1ic64oc64_ih300oh300kh3sh1dh0ph1_iw300ow300kw3sw1dw0pw1,61.32
+...
+perf,ssd_300_voc0712:conv1_2,--dir=BWD_WB --alg=auto mb112ic64ih300oc64oh300kh3ph1nssd_300_voc0712:conv1_2,739.879,0,61.332,12063.5,62.503,11837.5
+~~~
+
+In the above test-case `convolution_auto` choses winograd convolution (using a heuristic based on the convolution sizes and number of threads), as winograd convolution is faster than direct in this case.
+~~~sh
+OMP_NUM_THREADS=56 KMP_AFFINITY=granularity=fine,compact numactl -l tests/benchdnn/benchdnn --mode=p --conv -v5 --alg=direct --dir=BWD_WB mb112ic64ih300oc64oh300kh3ph1n"ssd_300_voc0712:conv1_2"
+
+mkldnn implementation: jit:avx512_common
+...
+mkldnn_verbose,exec,convolution,jit:avx512_common,backward_weights,fsrc:nchw fwei:gOhwi16o fbia:x fdst:nChw16c,alg:convolution_direct,mb112_g1ic64oc64_ih300oh300kh3sh1dh0ph1_iw300ow300kw3sw1dw0pw1,176.10
+...
+perf,ssd_300_voc0712:conv1_2,--dir=BWD_WB mb112ic64ih300oc64oh300kh3ph1nssd_300_voc0712:conv1_2,739.879,0,175.422,4217.7,180.315,4103.26
+~~~
+
+<br/>
+
+In the following example, `convolution_auto` chooses direct convolution because the winograd implementation is slower than direct in this case.
+~~~sh
+OMP_NUM_THREADS=56 KMP_AFFINITY=granularity=fine,compact tests/benchdnn/benchdnn --mode=p --conv -v5 --alg=auto --dir=BWD_WB mb112ic64ih28oc64oh28kh3ph1n"googlenet_v2:inception_3a/3x3"
+
+mkldnn implementation: jit:avx512_common
+...
+mkldnn_verbose,exec,convolution,jit:avx512_common,backward_weights,fsrc:nChw16c fwei:gOIhw16i16o fbia:x fdst:nChw16c,alg:convolution_direct,mb112_g1ic64oc64_ih28oh28kh3sh1dh0ph1_iw28ow28kw3sw1dw0pw1,1.13
+perf,googlenet_v2:inception_3a/3x3,--dir=BWD_WB --alg=auto mb112ic64ih28oc64oh28kh3ph1ngooglenet_v2:inception_3a/3x3,6.1693,0,1.04272,5916.52,1.13284,5445.88
+~~~
+~~~sh
+OMP_NUM_THREADS=56 KMP_AFFINITY=granularity=fine,compact tests/benchdnn/benchdnn --mode=p --conv -v5 --alg=wino --dir=BWD_WB mb112ic64ih28oc64oh28kh3ph1n"googlenet_v2:inception_3a/3x3"
+
+mkldnn implementation: jit_wino_4x3:avx512_core
+...
+mkldnn_verbose,exec,convolution,jit_wino_4x3:avx512_core,backward_weights,fsrc:nChw16c fwei:gOIhw16i16o fbia:x fdst:nChw16c,alg:convolution_winograd,mb112_g1ic64oc64_ih28oh28kh3sh1dh0ph1_iw28ow28kw3sw1dw0pw1,2.15
+...
+perf,googlenet_v2:inception_3a/3x3,--dir=BWD_WB --alg=wino mb112ic64ih28oc64oh28kh3ph1ngooglenet_v2:inception_3a/3x3,6.1693,0,2.14404,2877.41,2.20445,2798.56
+~~~
+
+## Other considerations when using Winograd
+The following side-effects should be weighed against the performance boost achieved when using Winograd:
+* **Memory** - Transforms are intermmediate results in winograd, which often require significant memory. Currently this memory is allocated internally by MKLDNN as scratchpad memory. As more convolutions using winograd
+are added to the topology, this memory could grow significantly. This growth is mitigated when several convolutions using Winograd are created by the same instance and executed sequentially, because then
+this scratchpad can be shared between convolutions.
+* **Accuracy** - In some cases Winograd can be signficantly less accurate than direct as demontrated in [Winograd paper](https://arxiv.org/abs/1509.09308).
diff --git a/inference-engine/thirdparty/mkl-dnn/examples/CMakeLists.txt b/inference-engine/thirdparty/mkl-dnn/examples/CMakeLists.txt
index 3d05855d4..601ce1856 100644
--- a/inference-engine/thirdparty/mkl-dnn/examples/CMakeLists.txt
+++ b/inference-engine/thirdparty/mkl-dnn/examples/CMakeLists.txt
@@ -26,7 +26,7 @@ append(CMAKE_CXX_FLAGS "${CMAKE_EXAMPLE_CCXX_FLAGS}")
 append(CMAKE_C_FLAGS "${CMAKE_CCXX_SANITIZER_FLAGS}")
 append(CMAKE_CXX_FLAGS "${CMAKE_CCXX_SANITIZER_FLAGS}")
 
-include_directories(${CMAKE_SOURCE_DIR}/include)
+include_directories(${PROJECT_SOURCE_DIR}/include)
 
 set_if(UNIX LIBM m)
 
@@ -35,8 +35,6 @@ register_exe(simple-net-cpp simple_net.cpp "test")
 register_exe(simple-training-net-c simple_training_net.c "test" ${LIBM})
 register_exe(simple-training-net-cpp simple_training_net.cpp "test" ${LIBM})
 register_exe(simple-net-int8-cpp simple_net_int8.cpp "test")
-
-if(HAVE_MKL)
-    register_exe(simple-rnn-cpp simple_rnn.cpp "test")
-    register_exe(simple-rnn-training-cpp simple_rnn_training.cpp "test")
-endif()
+register_exe(simple-rnn-cpp simple_rnn.cpp "test")
+register_exe(simple-rnn-int8-cpp simple_rnn_int8.cpp "test")
+register_exe(simple-rnn-training-cpp simple_rnn_training.cpp "test")
diff --git a/inference-engine/thirdparty/mkl-dnn/examples/simple_net.c b/inference-engine/thirdparty/mkl-dnn/examples/simple_net.c
index 6a4e78a7d..a88d0a849 100644
--- a/inference-engine/thirdparty/mkl-dnn/examples/simple_net.c
+++ b/inference-engine/thirdparty/mkl-dnn/examples/simple_net.c
@@ -76,13 +76,13 @@ void _free(void *ptr) {
 }
 #endif
 
-static size_t product(int *arr, size_t size) {
+static size_t product(ptrdiff_t *arr, size_t size) {
     size_t prod = 1;
     for (size_t i = 0; i < size; ++i) prod *= arr[i];
     return prod;
 }
 
-static void init_data_memory(uint32_t dim, const int *dims,
+static void init_data_memory(uint32_t dim, const ptrdiff_t *dims,
         mkldnn_memory_format_t user_fmt, mkldnn_data_type_t mkldnn_f32,
         mkldnn_engine_t engine, float *data, mkldnn_primitive_t *memory)
 {
@@ -159,12 +159,12 @@ mkldnn_status_t simple_net() {
      * {BATCH, OC, CONV_OH, CONV_OW}
      * strides: {CONV_STRIDE, CONV_STRIDE}
      */
-    int conv_user_src_sizes[4] = { BATCH, IC, CONV_IH, CONV_IW };
-    int conv_user_weights_sizes[4] = { OC, IC, 11, 11 };
-    int conv_bias_sizes[4] = { OC };
-    int conv_user_dst_sizes[4] = { BATCH, OC, CONV_OH, CONV_OW };
-    int conv_strides[2] = { CONV_STRIDE, CONV_STRIDE };
-    int conv_padding[2] = { CONV_PAD, CONV_PAD };
+    ptrdiff_t conv_user_src_sizes[4] = { BATCH, IC, CONV_IH, CONV_IW };
+    ptrdiff_t conv_user_weights_sizes[4] = { OC, IC, 11, 11 };
+    ptrdiff_t conv_bias_sizes[4] = { OC };
+    ptrdiff_t conv_user_dst_sizes[4] = { BATCH, OC, CONV_OH, CONV_OW };
+    ptrdiff_t conv_strides[2] = { CONV_STRIDE, CONV_STRIDE };
+    ptrdiff_t conv_padding[2] = { CONV_PAD, CONV_PAD };
 
     float *conv_src = net_src;
     float *conv_weights = (float *)aligned_malloc(
@@ -350,10 +350,10 @@ mkldnn_status_t simple_net() {
      * strides: {POOL_STRIDE, POOL_STRIDE}
      */
 
-    int32_t pool_dst_sizes[4] = { BATCH, OC, POOL_OH, POOL_OW };
-    int32_t pool_kernel[2] = { 3, 3 };
-    int32_t pool_strides[2] = { POOL_STRIDE, POOL_STRIDE };
-    int32_t pool_padding[2] = { POOL_PAD, POOL_PAD };
+    ptrdiff_t pool_dst_sizes[4] = { BATCH, OC, POOL_OH, POOL_OW };
+    ptrdiff_t pool_kernel[2] = { 3, 3 };
+    ptrdiff_t pool_strides[2] = { POOL_STRIDE, POOL_STRIDE };
+    ptrdiff_t pool_padding[2] = { POOL_PAD, POOL_PAD };
 
     /* create pooling memory descriptor on dst descriptor
      *  from previous primitive */
diff --git a/inference-engine/thirdparty/mkl-dnn/examples/simple_net.cpp b/inference-engine/thirdparty/mkl-dnn/examples/simple_net.cpp
index 8ebc5c5bc..586b6f630 100644
--- a/inference-engine/thirdparty/mkl-dnn/examples/simple_net.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/examples/simple_net.cpp
@@ -45,7 +45,7 @@ void simple_net(int times = 100) {
     memory::dims conv1_bias_tz = { 96 };
     memory::dims conv1_dst_tz = { batch, 96, 55, 55 };
     memory::dims conv1_strides = { 4, 4 };
-    auto conv1_padding = { 0, 0 };
+    memory::dims conv1_padding = { 0, 0 };
 
     /* Allocate input and output buffers for user data */
     std::vector<float> user_src(batch * 3 * 227 * 227);
@@ -165,7 +165,7 @@ void simple_net(int times = 100) {
     memory::dims pool1_dst_tz = { batch, 96, 27, 27 };
     memory::dims pool1_kernel = { 3, 3 };
     memory::dims pool1_strides = { 2, 2 };
-    auto pool_padding = { 0, 0 };
+    memory::dims pool_padding = { 0, 0 };
 
     auto pool1_dst_md = memory::desc(
             { pool1_dst_tz }, memory::data_type::f32, memory::format::any);
@@ -191,7 +191,7 @@ void simple_net(int times = 100) {
     memory::dims conv2_bias_tz = { 256 };
     memory::dims conv2_dst_tz = { batch, 256, 27, 27 };
     memory::dims conv2_strides = { 1, 1 };
-    auto conv2_padding = { 2, 2 };
+    memory::dims conv2_padding = { 2, 2 };
 
     std::vector<float> conv2_weights(std::accumulate(
             conv2_weights_tz.begin(), conv2_weights_tz.end(), 1,
@@ -300,7 +300,7 @@ void simple_net(int times = 100) {
     memory::dims pool2_dst_tz = { batch, 256, 13, 13 };
     memory::dims pool2_kernel = { 3, 3 };
     memory::dims pool2_strides = { 2, 2 };
-    auto pool2_padding = { 0, 0 };
+    memory::dims pool2_padding = { 0, 0 };
 
     auto pool2_dst_md = memory::desc(
             { pool2_dst_tz }, memory::data_type::f32, memory::format::any);
@@ -328,7 +328,7 @@ void simple_net(int times = 100) {
     memory::dims conv3_bias_tz = { 384 };
     memory::dims conv3_dst_tz = { batch, 384, 13, 13 };
     memory::dims conv3_strides = { 1, 1 };
-    auto conv3_padding = { 1, 1 };
+    memory::dims conv3_padding = { 1, 1 };
 
     std::vector<float> conv3_weights(std::accumulate(
             conv3_weights_tz.begin(), conv3_weights_tz.end(), 1,
@@ -415,7 +415,7 @@ void simple_net(int times = 100) {
     memory::dims conv4_bias_tz = { 384 };
     memory::dims conv4_dst_tz = { batch, 384, 13, 13 };
     memory::dims conv4_strides = { 1, 1 };
-    auto conv4_padding = { 1, 1 };
+    memory::dims conv4_padding = { 1, 1 };
 
     std::vector<float> conv4_weights(std::accumulate(
             conv4_weights_tz.begin(), conv4_weights_tz.end(), 1,
@@ -501,7 +501,7 @@ void simple_net(int times = 100) {
     memory::dims conv5_bias_tz = { 256 };
     memory::dims conv5_dst_tz = { batch, 256, 13, 13 };
     memory::dims conv5_strides = { 1, 1 };
-    auto conv5_padding = { 1, 1 };
+    memory::dims conv5_padding = { 1, 1 };
 
     std::vector<float> conv5_weights(std::accumulate(
             conv5_weights_tz.begin(), conv5_weights_tz.end(), 1,
@@ -586,7 +586,7 @@ void simple_net(int times = 100) {
     memory::dims pool5_dst_tz = { batch, 256, 6, 6 };
     memory::dims pool5_kernel = { 3, 3 };
     memory::dims pool5_strides = { 2, 2 };
-    auto pool5_padding = { 0, 0 };
+    memory::dims pool5_padding = { 0, 0 };
 
     std::vector<float> pool5_dst(std::accumulate(pool5_dst_tz.begin(),
             pool5_dst_tz.end(), 1, std::multiplies<uint32_t>()));
diff --git a/inference-engine/thirdparty/mkl-dnn/examples/simple_net_int8.cpp b/inference-engine/thirdparty/mkl-dnn/examples/simple_net_int8.cpp
index ec7879b28..7ec0f4c0e 100644
--- a/inference-engine/thirdparty/mkl-dnn/examples/simple_net_int8.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/examples/simple_net_int8.cpp
@@ -38,7 +38,7 @@ void simple_net_int8() {
     memory::dims conv_bias_tz = { 384 };
     memory::dims conv_dst_tz = { batch, 384, 13, 13 };
     memory::dims conv_strides = { 1, 1 };
-    auto conv_padding = { 1, 1 };
+    memory::dims conv_padding = { 1, 1 };
 
     /* Set Scaling mode for int8 quantizing */
     const std::vector<float> src_scales = { 1.8f };
diff --git a/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn.cpp b/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn.cpp
index 105979ad1..029e3c44d 100644
--- a/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn.cpp
@@ -20,8 +20,6 @@
 #include <numeric>
 #include <string>
 
-#include "mkl_cblas.h"
-
 #include "mkldnn.hpp"
 
 // MSVC doesn't support collapse clause in omp parallel
@@ -49,6 +47,9 @@ std::vector<float> alignment_model(
 std::vector<float> alignments(src_seq_length_max *batch, 1.0f);
 std::vector<float> exp_sums(batch, 1.0f);
 
+const float onef = 1.0, zerof = 0.0;
+const int onei = 1;
+
 void compute_weighted_annotations(float *weighted_annotations,
         int src_seq_length_max, int batch, int feature_size,
         float *weights_annot, float *annotations) {
@@ -56,10 +57,11 @@ void compute_weighted_annotations(float *weighted_annotations,
     // weights_annot is (2c, c)
 
     // annotation[i] = GEMM(weights_annot, enc_dst_layer[i]);
-    cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, feature_size,
-            src_seq_length_max * batch, feature_size, 1.0f, weights_annot,
-            feature_size, annotations, feature_size, 0.0f, weighted_annotations,
-            feature_size);
+    int num_weighted_annotations = src_seq_length_max * batch;
+    mkldnn_sgemm("N", "N",
+            &feature_size, &num_weighted_annotations, &feature_size,
+            &onef, weights_annot, &feature_size, annotations, &feature_size,
+            &zerof, weighted_annotations, &feature_size);
 }
 
 void compute_attention(float *context_vectors, int src_seq_length_max,
@@ -77,13 +79,16 @@ void compute_attention(float *context_vectors, int src_seq_length_max,
     // p is (n, 1)
 
     // first we precompute the weighted_dec_src_layer
-    cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, feature_size, batch,
-            feature_size, 1.0f, weights_src_layer, feature_size, dec_src_layer,
-            feature_size, 0.0f, weighted_src_layer.data(), feature_size);
+    mkldnn_sgemm("N", "N",
+            &feature_size, &batch, &feature_size, &onef,
+            weights_src_layer, &feature_size, dec_src_layer, &feature_size,
+            &zerof, weighted_src_layer.data(), &feature_size);
 
     // then we compute the alignment model
     float *alignment_model_ptr = alignment_model.data();
+#ifdef _OPENMP
 #pragma omp parallel for collapse(2)
+#endif
     for (int i = 0; i < src_seq_length_max; i++) {
         for (int j = 0; j < batch * feature_size; j++)
             alignment_model_ptr[i * batch * feature_size + j] = tanhf(
@@ -92,15 +97,21 @@ void compute_attention(float *context_vectors, int src_seq_length_max,
     }
 
     // gemv with alignments weights. the resulting alignments are in alignments
-    cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, 1,
-            src_seq_length_max * batch, feature_size, 1.0f, weights_alignments,
-            1, alignment_model_ptr, feature_size, 0.0f, alignments.data(), 1);
-
-// softmax on alignments. the resulting context weights are in alignments
+    int num_weighted_annotations = src_seq_length_max * batch;
+    mkldnn_sgemm("N", "N",
+            &onei, &num_weighted_annotations, &feature_size, &onef,
+            weights_alignments, &onei, alignment_model_ptr, &feature_size,
+            &zerof, alignments.data(), &onei);
+
+    // softmax on alignments. the resulting context weights are in alignments
+#ifdef _OPENMP
 #pragma omp parallel for
+#endif
     for (int i = 0; i < batch; i++)
         exp_sums[i] = 0.0f;
+#ifdef _OPENMP
 #pragma omp parallel for collapse(2)
+#endif
     for (int i = 0; i < src_seq_length_max; i++) {
         for (int j = 0; j < batch; j++) {
             alignments[i * batch + j] = expf(alignments[i * batch + j]);
@@ -108,20 +119,26 @@ void compute_attention(float *context_vectors, int src_seq_length_max,
         }
     }
 
+#ifdef _OPENMP
 #pragma omp parallel for collapse(2)
+#endif
     for (int i = 0; i < src_seq_length_max; i++)
         for (int j = 0; j < batch; j++)
             alignments[i * batch + j] /= exp_sums[j];
 
-// then we compute the context vectors
+    // then we compute the context vectors
+#ifdef _OPENMP
 #pragma omp parallel for collapse(2)
+#endif
     for (int i = 0; i < batch; i++)
         for (int j = 0; j < feature_size; j++)
             context_vectors[i * (feature_size + feature_size) + feature_size
                     + j]
                     = 0.0f;
 
+#ifdef _OPENMP
 #pragma omp parallel for collapse(3)
+#endif
     for (int i = 0; i < batch; i++)
         for (int k = 0; k < src_seq_length_max; k++)
             for (int j = 0; j < feature_size; j++)
@@ -133,8 +150,10 @@ void compute_attention(float *context_vectors, int src_seq_length_max,
 
 void copy_context(float *src_iter, int n_layers, int n_states, int batch,
         int feature_size) {
-// we copy the context from the first layer to all other layers
+    // we copy the context from the first layer to all other layers
+#ifdef _OPENMP
 #pragma omp parallel for collapse(3)
+#endif
     for (int k = 1; k < n_layers; k++)
         for (int j = 0; j < batch; j++)
             for (int i = 0; i < feature_size; i++)
@@ -162,6 +181,7 @@ void simple_net() {
         for the context vectors in MKL-DNN yet
      */
 
+    std::vector<primitive> weights_reorders;
     std::vector<primitive> encoder_net;
     std::vector<primitive> decoder_net;
 
@@ -181,8 +201,7 @@ void simple_net() {
     memory::dims enc_bidir_dst_layer_tz
             = { src_seq_length_max, batch, 2 * feature_size };
 
-    /* GNMT encoder: 1 bidirectional layer and 7 unidirectional layers
-     */
+    /* GNMT encoder: 1 bidirectional layer and 7 unidirectional layers */
 
     std::vector<float> user_enc_bidir_wei_layer(
             enc_bidir_n_layers * 2 * feature_size * lstm_n_gates * feature_size,
@@ -193,7 +212,7 @@ void simple_net() {
     std::vector<float> user_enc_bidir_bias(
             enc_bidir_n_layers * 2 * lstm_n_gates * feature_size, 1.0f);
 
-    // We create the memory descriptors used by the user
+    /* Create the memory for user data */
     auto user_enc_bidir_src_layer_md = mkldnn::memory::desc(
             { enc_bidir_src_layer_tz }, mkldnn::memory::data_type::f32,
             mkldnn::memory::format::tnc);
@@ -209,11 +228,6 @@ void simple_net() {
     auto user_enc_bidir_bias_md = mkldnn::memory::desc({ enc_bidir_bias_tz },
             mkldnn::memory::data_type::f32, mkldnn::memory::format::ldgo);
 
-    auto enc_bidir_dst_layer_md = mkldnn::memory::desc(
-            { enc_bidir_dst_layer_tz }, mkldnn::memory::data_type::f32,
-            mkldnn::memory::format::tnc);
-
-    /* We create memories */
     auto user_enc_bidir_src_layer_memory = mkldnn::memory(
             { user_enc_bidir_src_layer_md, cpu_engine }, net_src.data());
     auto user_enc_bidir_wei_layer_memory
@@ -225,40 +239,57 @@ void simple_net() {
     auto user_enc_bidir_bias_memory = mkldnn::memory(
             { user_enc_bidir_bias_md, cpu_engine }, user_enc_bidir_bias.data());
 
-#if 0
-    /// These will be null memories
-    /// @todo introduce predefined null_memory() ?
-    auto enc_bidir_src_iter_memory = mkldnn::memory({enc_bidir_src_iter_md, cpu_engine});
-    auto enc_bidir_dst_iter_memory = mkldnn::memory({enc_bidir_dst_iter_md, cpu_engine});
-#endif
+    /* Create memory descriptors for RNN data w/o specified layout */
+    auto enc_bidir_wei_layer_md = memory::desc({ enc_bidir_weights_layer_tz },
+            memory::data_type::f32, memory::format::any);
+
+    auto enc_bidir_wei_iter_md = memory::desc({ enc_bidir_weights_iter_tz },
+            memory::data_type::f32, memory::format::any);
 
-    /// @todo fix this once cell desc is merged with rnn_desc
+    auto enc_bidir_dst_layer_md = memory::desc({ enc_bidir_dst_layer_tz },
+            memory::data_type::f32, memory::format::any);
+
+    /* Create bidirectional RNN */
     rnn_cell::desc bi_cell(algorithm::vanilla_lstm);
     rnn_forward::desc bi_layer_desc(prop_kind::forward_inference, bi_cell,
             rnn_direction::bidirectional_concat, user_enc_bidir_src_layer_md,
-            zero_md(), user_enc_bidir_wei_layer_md, user_enc_bidir_wei_iter_md,
+            zero_md(), enc_bidir_wei_layer_md, enc_bidir_wei_iter_md,
             user_enc_bidir_bias_md, enc_bidir_dst_layer_md, zero_md());
 
     auto enc_bidir_prim_desc
             = mkldnn::rnn_forward::primitive_desc(bi_layer_desc, cpu_engine);
 
-    // there are currently no reorders
-    /// @todo add a reorder when they will be available
+    /* Create memory primitives for input data and use reorders to reorder
+     * user data to internal representation
+     */
+    auto enc_bidir_wei_layer_memory
+            = memory(enc_bidir_prim_desc.weights_layer_primitive_desc());
+    auto enc_bidir_wei_layer_reorder_pd = reorder::primitive_desc(
+            user_enc_bidir_wei_layer_memory.get_primitive_desc(),
+            enc_bidir_wei_layer_memory.get_primitive_desc());
+    weights_reorders.push_back(reorder(enc_bidir_wei_layer_reorder_pd,
+            user_enc_bidir_wei_layer_memory, enc_bidir_wei_layer_memory));
+
+    auto enc_bidir_wei_iter_memory
+            = memory(enc_bidir_prim_desc.weights_iter_primitive_desc());
+    auto enc_bidir_wei_iter_reorder_pd = reorder::primitive_desc(
+            user_enc_bidir_wei_iter_memory.get_primitive_desc(),
+            enc_bidir_wei_iter_memory.get_primitive_desc());
+    weights_reorders.push_back(reorder(enc_bidir_wei_iter_reorder_pd,
+            user_enc_bidir_wei_iter_memory, enc_bidir_wei_iter_memory));
 
     auto enc_bidir_dst_layer_memory
             = mkldnn::memory(enc_bidir_prim_desc.dst_layer_primitive_desc());
 
     encoder_net.push_back(
             rnn_forward(enc_bidir_prim_desc, user_enc_bidir_src_layer_memory,
-                    null_memory_, user_enc_bidir_wei_layer_memory,
-                    user_enc_bidir_wei_iter_memory, user_enc_bidir_bias_memory,
+                    null_memory_, enc_bidir_wei_layer_memory,
+                    enc_bidir_wei_iter_memory, user_enc_bidir_bias_memory,
                     enc_bidir_dst_layer_memory, null_memory_, null_memory_));
 
-    /* GNMT encoder: unidirectional layers
-     */
-    // First unidirectinal layer, the scaling from 2*feature size features
-    // comming from the previous layer come
-    /// memories
+    /* GNMT encoder: unidirectional layers */
+    // First unidirectinal layer scales 2 * feature_size output of bidirectional
+    // layer to feature_size output
     std::vector<float> user_enc_uni_first_wei_layer(
             1 * 1 * 2 * feature_size * lstm_n_gates * feature_size, 1.0f);
     std::vector<float> user_enc_uni_first_wei_iter(
@@ -282,13 +313,9 @@ void simple_net() {
     auto user_enc_uni_first_bias_md = mkldnn::memory::desc(
             { user_enc_uni_first_bias_dims }, mkldnn::memory::data_type::f32,
             mkldnn::memory::format::ldgo);
-    auto enc_uni_first_dst_layer_md = mkldnn::memory::desc(
-            { enc_uni_first_dst_layer_dims }, mkldnn::memory::data_type::f32,
-            mkldnn::memory::format::tnc);
     auto user_enc_uni_first_wei_layer_memory
             = mkldnn::memory({ user_enc_uni_first_wei_layer_md, cpu_engine },
                     user_enc_uni_first_wei_layer.data());
-    ;
     auto user_enc_uni_first_wei_iter_memory
             = mkldnn::memory({ user_enc_uni_first_wei_iter_md, cpu_engine },
                     user_enc_uni_first_wei_iter.data());
@@ -296,29 +323,55 @@ void simple_net() {
             = mkldnn::memory({ user_enc_uni_first_bias_md, cpu_engine },
                     user_enc_uni_first_bias.data());
 
+    auto enc_uni_first_wei_layer_md
+            = memory::desc({ user_enc_uni_first_wei_layer_dims },
+                    memory::data_type::f32, memory::format::any);
+    auto enc_uni_first_wei_iter_md
+            = memory::desc({ user_enc_uni_first_wei_iter_dims },
+                    memory::data_type::f32, memory::format::any);
+    auto enc_uni_first_dst_layer_md
+            = memory::desc({ enc_uni_first_dst_layer_dims },
+                    memory::data_type::f32, memory::format::any);
+
     /// @todo add suport for residual connections
     /// should it be a set residual in op_desc or a field to set manually?
     /// should be an integer to specify at which layer to start
     rnn_cell::desc enc_uni_first_cell(algorithm::vanilla_lstm);
     rnn_forward::desc enc_uni_first_layer_desc(prop_kind::forward_inference,
             enc_uni_first_cell, rnn_direction::unidirectional_left2right,
-            enc_bidir_dst_layer_md, zero_md(), user_enc_uni_first_wei_layer_md,
-            user_enc_uni_first_wei_iter_md, user_enc_uni_first_bias_md,
+            enc_bidir_dst_layer_md, zero_md(), enc_uni_first_wei_layer_md,
+            enc_uni_first_wei_iter_md, user_enc_uni_first_bias_md,
             enc_uni_first_dst_layer_md, zero_md());
     auto enc_uni_first_prim_desc = mkldnn::rnn_forward::primitive_desc(
             enc_uni_first_layer_desc, cpu_engine);
+
+    auto enc_uni_first_wei_layer_memory
+            = memory(enc_uni_first_prim_desc.weights_layer_primitive_desc());
+    auto enc_uni_first_wei_layer_reorder_pd = reorder::primitive_desc(
+            user_enc_uni_first_wei_layer_memory.get_primitive_desc(),
+            enc_uni_first_wei_layer_memory.get_primitive_desc());
+    weights_reorders.push_back(reorder(enc_uni_first_wei_layer_reorder_pd,
+            user_enc_uni_first_wei_layer_memory,
+            enc_uni_first_wei_layer_memory));
+
+    auto enc_uni_first_wei_iter_memory
+            = memory(enc_uni_first_prim_desc.weights_iter_primitive_desc());
+    auto enc_uni_first_wei_iter_reorder_pd = reorder::primitive_desc(
+            user_enc_uni_first_wei_iter_memory.get_primitive_desc(),
+            enc_uni_first_wei_iter_memory.get_primitive_desc());
+    weights_reorders.push_back(reorder(enc_uni_first_wei_iter_reorder_pd,
+            user_enc_uni_first_wei_iter_memory, enc_uni_first_wei_iter_memory));
+
     auto enc_uni_first_dst_layer_memory = mkldnn::memory(
             enc_uni_first_prim_desc.dst_layer_primitive_desc());
 
-    /// @todo add a reorder when they will be available
     encoder_net.push_back(rnn_forward(enc_uni_first_prim_desc,
             enc_bidir_dst_layer_memory, null_memory_,
-            user_enc_uni_first_wei_layer_memory,
-            user_enc_uni_first_wei_iter_memory, user_enc_uni_first_bias_memory,
+            enc_uni_first_wei_layer_memory,
+            enc_uni_first_wei_iter_memory, user_enc_uni_first_bias_memory,
             enc_uni_first_dst_layer_memory, null_memory_, null_memory_));
 
-    // Remainging Unidirectional layers
-    /// memories
+    /* Remainging unidirectional layers */
     std::vector<float> user_enc_uni_wei_layer((enc_unidir_n_layers - 1) * 1
                     * feature_size * lstm_n_gates * feature_size, 1.0f);
     std::vector<float> user_enc_uni_wei_iter((enc_unidir_n_layers - 1) * 1
@@ -341,43 +394,60 @@ void simple_net() {
             mkldnn::memory::format::ldigo);
     auto user_enc_uni_bias_md = mkldnn::memory::desc({ user_enc_uni_bias_dims },
             mkldnn::memory::data_type::f32, mkldnn::memory::format::ldgo);
-    auto enc_dst_layer_md = mkldnn::memory::desc({ enc_dst_layer_dims },
-            mkldnn::memory::data_type::f32, mkldnn::memory::format::tnc);
     auto user_enc_uni_wei_layer_memory
             = mkldnn::memory({ user_enc_uni_wei_layer_md, cpu_engine },
                     user_enc_uni_wei_layer.data());
-    ;
     auto user_enc_uni_wei_iter_memory
             = mkldnn::memory({ user_enc_uni_wei_iter_md, cpu_engine },
                     user_enc_uni_wei_iter.data());
     auto user_enc_uni_bias_memory = mkldnn::memory(
             { user_enc_uni_bias_md, cpu_engine }, user_enc_uni_bias.data());
 
+    auto enc_uni_wei_layer_md = memory::desc({ user_enc_uni_wei_layer_dims },
+            memory::data_type::f32, memory::format::any);
+    auto enc_uni_wei_iter_md = memory::desc({ user_enc_uni_wei_iter_dims },
+            memory::data_type::f32, memory::format::any);
+    auto enc_dst_layer_md = memory::desc({ enc_dst_layer_dims },
+            memory::data_type::f32, memory::format::any);
+
     /// @todo add suport for residual connections
     /// should it be a set residual in op_desc or a field to set manually?
     /// should be an integer to specify at which layer to start
     rnn_cell::desc enc_uni_cell(algorithm::vanilla_lstm);
     rnn_forward::desc enc_uni_layer_desc(prop_kind::forward_inference,
             enc_uni_cell, rnn_direction::unidirectional_left2right,
-            enc_uni_first_dst_layer_md, zero_md(), user_enc_uni_wei_layer_md,
-            user_enc_uni_wei_iter_md, user_enc_uni_bias_md, enc_dst_layer_md,
+            enc_uni_first_dst_layer_md, zero_md(), enc_uni_wei_layer_md,
+            enc_uni_wei_iter_md, user_enc_uni_bias_md, enc_dst_layer_md,
             zero_md());
     auto enc_uni_prim_desc = mkldnn::rnn_forward::primitive_desc(
             enc_uni_layer_desc, cpu_engine);
+
+    auto enc_uni_wei_layer_memory
+            = memory(enc_uni_prim_desc.weights_layer_primitive_desc());
+    auto enc_uni_wei_layer_reorder_pd = reorder::primitive_desc(
+            user_enc_uni_wei_layer_memory.get_primitive_desc(),
+            enc_uni_wei_layer_memory.get_primitive_desc());
+    weights_reorders.push_back(reorder(enc_uni_wei_layer_reorder_pd,
+            user_enc_uni_wei_layer_memory, enc_uni_wei_layer_memory));
+
+    auto enc_uni_wei_iter_memory
+            = memory(enc_uni_prim_desc.weights_iter_primitive_desc());
+    auto enc_uni_wei_iter_reorder_pd = reorder::primitive_desc(
+            user_enc_uni_wei_iter_memory.get_primitive_desc(),
+            enc_uni_wei_iter_memory.get_primitive_desc());
+    weights_reorders.push_back(reorder(enc_uni_wei_iter_reorder_pd,
+            user_enc_uni_wei_iter_memory, enc_uni_wei_iter_memory));
+
     auto enc_dst_layer_memory
             = mkldnn::memory(enc_uni_prim_desc.dst_layer_primitive_desc());
 
-    /// @todo add a reorder when they will be available
     encoder_net.push_back(
             rnn_forward(enc_uni_prim_desc, enc_uni_first_dst_layer_memory,
-                    null_memory_, user_enc_uni_wei_layer_memory,
-                    user_enc_uni_wei_iter_memory, user_enc_uni_bias_memory,
+                    null_memory_, enc_uni_wei_layer_memory,
+                    enc_uni_wei_iter_memory, user_enc_uni_bias_memory,
                     enc_dst_layer_memory, null_memory_, null_memory_));
 
-    /*
-     * GNMT: decoder with attention mechanism
-     */
-    // user provided memories
+    /* GNMT: decoder with attention mechanism */
     std::vector<float> user_dec_wei_layer(
             dec_n_layers * 1 * feature_size * lstm_n_gates * feature_size,
             1.0f);
@@ -402,8 +472,7 @@ void simple_net() {
             = { dec_n_layers, 1, lstm_n_gates, feature_size };
 
     memory::dims dec_src_layer_dims = { 1, batch, feature_size };
-    memory::dims dec_dst_layer_dims
-            = { tgt_seq_length_max, batch, feature_size };
+    memory::dims dec_dst_layer_dims = { 1, batch, feature_size };
 
     // We will use the same memory for dec_src_iter and dec_dst_iter
     // However, dec_src_iter has a context vector but not
@@ -434,7 +503,6 @@ void simple_net() {
             mkldnn::memory::data_type::f32, mkldnn::memory::format::ldsnc);
     auto user_dec_wei_layer_memory = mkldnn::memory(
             { user_dec_wei_layer_md, cpu_engine }, user_dec_wei_layer.data());
-    ;
     auto user_dec_wei_iter_memory = mkldnn::memory(
             { user_dec_wei_iter_md, cpu_engine }, user_dec_wei_iter.data());
     auto user_dec_bias_memory = mkldnn::memory(
@@ -444,6 +512,12 @@ void simple_net() {
     auto dec_src_layer_memory
             = mkldnn::memory({ dec_src_layer_md, cpu_engine });
 
+    auto dec_wei_layer_md = mkldnn::memory::desc(
+            { user_dec_wei_layer_dims }, mkldnn::memory::data_type::f32,
+            mkldnn::memory::format::any);
+    auto dec_wei_iter_md = mkldnn::memory::desc({ user_dec_wei_iter_dims },
+            mkldnn::memory::data_type::f32, mkldnn::memory::format::any);
+
     // As mentioned above, we create a view without context out of the
     // memory with context.
     auto dec_dst_iter_memory = mkldnn::memory({ dec_dst_iter_md, cpu_engine });
@@ -457,15 +531,30 @@ void simple_net() {
     rnn_cell::desc dec_cell(algorithm::vanilla_lstm);
     rnn_forward::desc dec_ctx_desc(prop_kind::forward_inference, dec_cell,
             rnn_direction::unidirectional_left2right, dec_src_layer_md,
-            dec_dst_iter_md, user_dec_wei_layer_md, user_dec_wei_iter_md,
+            dec_dst_iter_md, dec_wei_layer_md, dec_wei_iter_md,
             user_dec_bias_md, dec_dst_layer_md, dec_dst_iter_noctx_md);
     auto dec_ctx_prim_desc
             = mkldnn::rnn_forward::primitive_desc(dec_ctx_desc, cpu_engine);
 
-    /// @todo add a reorder when they will be available
+    auto dec_wei_layer_memory
+            = memory(dec_ctx_prim_desc.weights_layer_primitive_desc());
+    auto dec_wei_layer_reorder_pd = reorder::primitive_desc(
+            user_dec_wei_layer_memory.get_primitive_desc(),
+            dec_wei_layer_memory.get_primitive_desc());
+    weights_reorders.push_back(reorder(dec_wei_layer_reorder_pd,
+            user_dec_wei_layer_memory, dec_wei_layer_memory));
+
+    auto dec_wei_iter_memory
+            = memory(dec_ctx_prim_desc.weights_iter_primitive_desc());
+    auto dec_wei_iter_reorder_pd = reorder::primitive_desc(
+            user_dec_wei_iter_memory.get_primitive_desc(),
+            dec_wei_iter_memory.get_primitive_desc());
+    weights_reorders.push_back(reorder(dec_wei_iter_reorder_pd,
+            user_dec_wei_iter_memory, dec_wei_iter_memory));
+
     decoder_net.push_back(rnn_forward(dec_ctx_prim_desc, dec_src_layer_memory,
-            dec_dst_iter_memory, user_dec_wei_layer_memory,
-            user_dec_wei_iter_memory, user_dec_bias_memory,
+            dec_dst_iter_memory, dec_wei_layer_memory,
+            dec_wei_iter_memory, user_dec_bias_memory,
             user_dec_dst_layer_memory, dec_dst_iter_memory, null_memory_));
 
     // allocating temporary buffer for attention mechanism
@@ -476,10 +565,8 @@ void simple_net() {
        Execution
      */
     auto execute = [&]() {
-        // We save the original handle on dst_layer as we will modify it at each
-        // iteration
-        void *dst_layer_original_handle
-                = user_dec_dst_layer_memory.get_data_handle();
+        // reorder weights to MKLDNN internal representation
+        stream(stream::kind::eager).submit(weights_reorders).wait();
 
         // run encoder (1 stream)
         stream(stream::kind::eager).submit(encoder_net).wait();
@@ -490,43 +577,40 @@ void simple_net() {
                 user_weights_annotation.data(),
                 (float *)enc_dst_layer_memory.get_data_handle());
 
-        // We initialise dst_layer[0] to the embedding of </s>, which are
-        // assumed to
-        // be 0 here
-        memset(dst_layer_original_handle, 0,
-                batch * feature_size * sizeof(float));
+        // We initialise src_layer to the embedding of </s>, which
+        // are assumed to be 0 here
+        memset(dec_src_layer_memory.get_data_handle(), 0,
+               dec_src_layer_memory.get_primitive_desc().get_size());
+        // From now on, src points to the output of the last iteration
 
         for (int i = 0; i < tgt_seq_length_max; i++) {
-            float *dst_layer_handle
-                    = (float *)user_dec_dst_layer_memory.get_data_handle();
-            float *dst_iter_handle
-                    = (float *)dec_dst_iter_memory.get_data_handle();
+            float *src_att_layer_handle
+                    = (float *) dec_src_layer_memory.get_data_handle();
+            float *src_att_iter_handle
+                    = (float *) dec_dst_iter_memory.get_data_handle();
 
             // Compute attention context vector into the first layer src_iter
-            compute_attention(dst_iter_handle, src_seq_length_max, batch,
+            compute_attention(src_att_iter_handle, src_seq_length_max, batch,
                     feature_size, user_weights_attention_src_layer.data(),
-                    dst_layer_handle,
+                    src_att_layer_handle,
                     (float *)enc_bidir_dst_layer_memory.get_data_handle(),
                     weighted_annotations.data(),
                     user_weights_alignments.data());
 
             // copy the context vectors to all layers of src_iter
-            copy_context(dst_iter_handle, dec_n_layers, lstm_n_states, batch,
+            copy_context(src_att_iter_handle, dec_n_layers, lstm_n_states, batch,
                     feature_size);
 
-            // We set src_layer to be the previously
-            dec_src_layer_memory.set_data_handle(dst_layer_handle);
-
             // run the decoder iteration
             stream(stream::kind::eager).submit(decoder_net).wait();
 
-            // Move the handle on the dst layer to the next iteration
+            // Move the handle on the src/dst layer to the next iteration
+            auto dst_layer_handle = (float *) user_dec_dst_layer_memory.get_data_handle();
+            dec_src_layer_memory.set_data_handle(dst_layer_handle);
             user_dec_dst_layer_memory.set_data_handle(
                     dst_layer_handle + batch * feature_size);
         }
-        // we restore the handle to the begining of the buffer
-        user_dec_dst_layer_memory.set_data_handle(dst_layer_original_handle);
-        /// @todo run the softmax after each iteration or not?
+
     };
 
     execute();
diff --git a/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn_int8.cpp b/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn_int8.cpp
new file mode 100644
index 000000000..78220287f
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn_int8.cpp
@@ -0,0 +1,709 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <cstring>
+#include <iostream>
+#include <math.h>
+#include <numeric>
+#include <string>
+
+#include "mkldnn.hpp"
+
+// MSVC doesn't support collapse clause in omp parallel
+#if defined(_MSC_VER) && !defined(__clang__) && !defined(__INTEL_COMPILER)
+#define collapse(x)
+#endif
+
+using namespace mkldnn;
+
+const int batch = 64;
+const int src_seq_length_max = 25;
+const int tgt_seq_length_max = 27;
+
+const int feature_size = 1024;
+
+const int enc_bidir_n_layers = 1;
+const int enc_unidir_n_layers = 7;
+const int dec_n_layers = 8;
+
+const int lstm_n_gates = 4;
+const int lstm_n_states = 2;
+std::vector<int32_t> weighted_src_layer(batch *feature_size, 1);
+std::vector<float> alignment_model(
+        src_seq_length_max *batch *feature_size, 1.0f);
+std::vector<float> alignments(src_seq_length_max *batch, 1.0f);
+std::vector<float> exp_sums(batch, 1.0f);
+
+const float onef = 1.0, zerof = 0.0;
+const int onei = 1;
+
+void compute_weighted_annotations(float *weighted_annotations,
+        int src_seq_length_max, int batch, int feature_size,
+        float *weights_annot, float *annotations) {
+    // annotations(aka enc_dst_layer) is (t, n, 2c)
+    // weights_annot is (2c, c)
+
+    int num_weighted_annotations = src_seq_length_max * batch;
+    // annotation[i] = GEMM(weights_annot, enc_dst_layer[i]);
+    mkldnn_sgemm("N", "N", &feature_size, &num_weighted_annotations,
+            &feature_size, &onef, weights_annot, &feature_size, annotations,
+            &feature_size, &zerof, weighted_annotations, &feature_size);
+}
+
+void compute_sum_of_rows(int8_t *a, int rows, int cols, int32_t *a_reduced) {
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+    for (int i = 0; i < cols; i++) {
+        a_reduced[i] = 0;
+        for (int j = 0; j < rows; j++) {
+            a_reduced[i] += (int32_t)a[i * rows + j];
+        }
+    }
+}
+
+void compute_attention(float *context_vectors, int src_seq_length_max,
+        int batch, int feature_size, int8_t *weights_src_layer,
+        float weights_src_layer_scale, int32_t *compensation,
+        uint8_t *dec_src_layer, float dec_src_layer_scale,
+        float dec_src_layer_shift, uint8_t *annotations,
+        float *weighted_annotations, float *weights_alignments) {
+    // dst_iter : (n, c) matrix
+    // src_layer: (n, c) matrix
+    // weighted_annotations (t, n, c)
+
+    // weights_yi is (c, c)
+    // weights_ai is (c, 1)
+    // tmp[i] is (n, c)
+    // a[i] is (n, 1)
+    // p is (n, 1)
+
+    // first we precompute the weighted_dec_src_layer
+    int8_t ao = 0;
+    int8_t bo = 0;
+    int32_t co = 0;
+    mkldnn_gemm_s8u8s32("N", "N", "F", &feature_size, &batch, &feature_size,
+            &onef, weights_src_layer, &feature_size, &ao, dec_src_layer,
+            &feature_size, &bo, &zerof, weighted_src_layer.data(),
+            &feature_size, &co);
+
+    // then we compute the alignment model
+    float *alignment_model_ptr = alignment_model.data();
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2)
+#endif
+    for (int i = 0; i < src_seq_length_max; i++) {
+        for (int j = 0; j < batch; j++) {
+            for (int k = 0; k < feature_size; k++) {
+                size_t tnc_offset
+                        = i * batch * feature_size + j * feature_size + k;
+                alignment_model_ptr[tnc_offset] = tanhf(
+                        (float)(weighted_src_layer.data()[j * feature_size + k]
+                                - dec_src_layer_shift * compensation[k])
+                                / (dec_src_layer_scale
+                                          * weights_src_layer_scale)
+                        + weighted_annotations[tnc_offset]);
+            }
+        }
+    }
+
+    // gemv with alignments weights. the resulting alignments are in alignments
+    int num_weighted_annotations = src_seq_length_max * batch;
+    mkldnn_sgemm("N", "N", &onei, &num_weighted_annotations, &feature_size,
+            &onef, weights_alignments, &onei, alignment_model_ptr,
+            &feature_size, &zerof, alignments.data(), &onei);
+
+// softmax on alignments. the resulting context weights are in alignments
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+    for (int i = 0; i < batch; i++)
+        exp_sums[i] = 0.0f;
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2)
+#endif
+    for (int i = 0; i < src_seq_length_max; i++) {
+        for (int j = 0; j < batch; j++) {
+            alignments[i * batch + j] = expf(alignments[i * batch + j]);
+            exp_sums[j] += alignments[i * batch + j];
+        }
+    }
+
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2)
+#endif
+    for (int i = 0; i < src_seq_length_max; i++)
+        for (int j = 0; j < batch; j++)
+            alignments[i * batch + j] /= exp_sums[j];
+
+// then we compute the context vectors
+#ifdef _OPENMP
+#pragma omp parallel for collapse(2)
+#endif
+    for (int i = 0; i < batch; i++)
+        for (int j = 0; j < feature_size; j++)
+            context_vectors[i * (feature_size + feature_size) + feature_size
+                    + j]
+                    = 0.0f;
+
+#ifdef _OPENMP
+#pragma omp parallel for collapse(3)
+#endif
+    for (int i = 0; i < batch; i++)
+        for (int k = 0; k < src_seq_length_max; k++)
+            for (int j = 0; j < feature_size; j++)
+                context_vectors[i * (feature_size + feature_size) + feature_size
+                        + j]
+                        += alignments[k * batch + i]
+                        * (((float)annotations[j
+                                   + feature_size * (i + batch * k)]
+                                   - dec_src_layer_shift)
+                        / dec_src_layer_scale);
+}
+
+void copy_context(float *src_iter, int n_layers, int n_states, int batch,
+        int feature_size) {
+// we copy the context from the first layer to all other layers
+#ifdef _OPENMP
+#pragma omp parallel for collapse(3)
+#endif
+    for (int k = 1; k < n_layers; k++)
+        for (int j = 0; j < batch; j++)
+            for (int i = 0; i < feature_size; i++)
+                src_iter[(k * n_states * batch + j)
+                                * (feature_size + feature_size)
+                        + i]
+                        = src_iter[j * (feature_size + feature_size) + i];
+}
+
+void simple_net() {
+    auto cpu_engine = engine(engine::cpu, 0);
+    auto null_memory_ = null_memory(cpu_engine);
+
+    /*
+      GNMT low precicion example.
+      Note, we do not implement connection yet.
+      For the encoder we use:
+      - one primitive for the bidirectional layer of the encoder
+      - one primitive for all remaining unidirectional layers in the encoder
+      For the decoder we use:
+      - one primitive for the first iteration
+      - one primitive for all subsequent iterations in the decoder. Note that
+        in this example, this primitive computes the states in place.
+      - the attention mechanism is implemented separately as there is no support
+        for the context vectors in MKL-DNN yet
+     */
+
+    std::vector<primitive> weights_reorders;
+    std::vector<primitive> encoder_net;
+    std::vector<primitive> decoder_net;
+
+    std::vector<float> net_src(batch * src_seq_length_max * feature_size, 0.1f);
+    std::vector<float> net_dst(batch * tgt_seq_length_max * feature_size, 0.1f);
+
+    /* Quantization factors for fp32 data */
+
+    const float data_shift = 64.;
+    const float data_scale = 63.;
+    const int weights_scale_mask = 3; // 11 for last two dimensions of ldigo
+    std::vector<float> weights_scales(lstm_n_gates * feature_size);
+    /* assign halves of vector with arbitrary values */
+    const int scales_half = lstm_n_gates * feature_size / 2;
+    std::fill(
+            weights_scales.begin(), weights_scales.begin() + scales_half, 30.f);
+    std::fill(weights_scales.begin() + scales_half + 1, weights_scales.end(),
+            65.5f);
+
+    /* Encoder */
+
+    memory::dims enc_bidir_src_layer_tz
+            = { src_seq_length_max, batch, feature_size };
+    memory::dims enc_bidir_weights_layer_tz = { enc_bidir_n_layers, 2,
+        feature_size, lstm_n_gates, feature_size };
+    memory::dims enc_bidir_weights_iter_tz = { enc_bidir_n_layers, 2,
+        feature_size, lstm_n_gates, feature_size };
+    memory::dims enc_bidir_bias_tz
+            = { enc_bidir_n_layers, 2, lstm_n_gates, feature_size };
+    memory::dims enc_bidir_dst_layer_tz
+            = { src_seq_length_max, batch, 2 * feature_size };
+
+    /* GNMT encoder: 1 bidirectional layer and 7 unidirectional layers */
+
+    std::vector<float> user_enc_bidir_wei_layer(
+            enc_bidir_n_layers * 2 * feature_size * lstm_n_gates * feature_size,
+            0.3f);
+    std::vector<float> user_enc_bidir_wei_iter(
+            enc_bidir_n_layers * 2 * feature_size * lstm_n_gates * feature_size,
+            0.2f);
+    std::vector<float> user_enc_bidir_bias(
+            enc_bidir_n_layers * 2 * lstm_n_gates * feature_size, 1.0f);
+
+    /* Create the memory for user data */
+    auto user_enc_bidir_src_layer_md = memory::desc({ enc_bidir_src_layer_tz },
+            memory::data_type::f32, memory::format::tnc);
+
+    auto user_enc_bidir_wei_layer_md
+            = memory::desc({ enc_bidir_weights_layer_tz },
+                    memory::data_type::f32, memory::format::ldigo);
+
+    auto user_enc_bidir_wei_iter_md
+            = memory::desc({ enc_bidir_weights_iter_tz },
+                    memory::data_type::f32, memory::format::ldigo);
+
+    auto user_enc_bidir_bias_md = memory::desc({ enc_bidir_bias_tz },
+            memory::data_type::f32, memory::format::ldgo);
+
+    auto user_enc_bidir_src_layer_memory = memory(
+            { user_enc_bidir_src_layer_md, cpu_engine }, net_src.data());
+    auto user_enc_bidir_wei_layer_memory
+            = memory({ user_enc_bidir_wei_layer_md, cpu_engine },
+                    user_enc_bidir_wei_layer.data());
+    auto user_enc_bidir_wei_iter_memory
+            = memory({ user_enc_bidir_wei_iter_md, cpu_engine },
+                    user_enc_bidir_wei_iter.data());
+    auto user_enc_bidir_bias_memory = memory(
+            { user_enc_bidir_bias_md, cpu_engine }, user_enc_bidir_bias.data());
+
+    /* Create memory descriptors for RNN data w/o specified layout */
+    auto enc_bidir_src_layer_md = memory::desc({ enc_bidir_src_layer_tz },
+            memory::data_type::u8, memory::format::any);
+
+    auto enc_bidir_wei_layer_md = memory::desc({ enc_bidir_weights_layer_tz },
+            memory::data_type::s8, memory::format::any);
+
+    auto enc_bidir_wei_iter_md = memory::desc({ enc_bidir_weights_iter_tz },
+            memory::data_type::s8, memory::format::any);
+
+    auto enc_bidir_dst_layer_md = memory::desc({ enc_bidir_dst_layer_tz },
+            memory::data_type::u8, memory::format::any);
+
+    /* Create bidirectional RNN */
+    rnn_cell::desc bi_cell(algorithm::vanilla_lstm);
+
+    /* Check if int8 RNN is supported */
+    try {
+        rnn_forward::desc bi_layer_desc(prop_kind::forward_inference, bi_cell,
+                rnn_direction::bidirectional_concat, enc_bidir_src_layer_md,
+                zero_md(), enc_bidir_wei_layer_md, enc_bidir_wei_iter_md,
+                user_enc_bidir_bias_md, enc_bidir_dst_layer_md, zero_md());
+    } catch (error &e) {
+        if (e.status == mkldnn_unimplemented) {
+            std::cerr
+                    << "Dependency on Intel(R) MKL version 2019u2 or newer is "
+                       "required for int8 RNN"
+                    << std::endl;
+        }
+        throw;
+    }
+
+    rnn_forward::desc bi_layer_desc(prop_kind::forward_inference, bi_cell,
+            rnn_direction::bidirectional_concat, enc_bidir_src_layer_md,
+            zero_md(), enc_bidir_wei_layer_md, enc_bidir_wei_iter_md,
+            user_enc_bidir_bias_md, enc_bidir_dst_layer_md, zero_md());
+
+    /* Define RNN attributes that store quantization parameters */
+    primitive_attr attr;
+    attr.set_int_output_round_mode(round_mode::round_nearest);
+    attr.set_rnn_data_qparams(data_scale, data_shift);
+    attr.set_rnn_weights_qparams(weights_scale_mask, weights_scales);
+
+    auto enc_bidir_prim_desc
+            = rnn_forward::primitive_desc(bi_layer_desc, attr, cpu_engine);
+
+    /* Create memory primitives for input data and use reorders to quantize
+     * values to int8
+     * NOTE: same attributes are used when creating RNN primitive and reorders
+     */
+    auto enc_bidir_src_layer_memory
+            = memory(enc_bidir_prim_desc.src_layer_primitive_desc());
+    auto enc_bidir_src_layer_reorder_pd = reorder::primitive_desc(
+            user_enc_bidir_src_layer_memory.get_primitive_desc(),
+            enc_bidir_src_layer_memory.get_primitive_desc(), attr);
+    encoder_net.push_back(reorder(enc_bidir_src_layer_reorder_pd,
+            user_enc_bidir_src_layer_memory, enc_bidir_src_layer_memory));
+
+    auto enc_bidir_wei_layer_memory
+            = memory(enc_bidir_prim_desc.weights_layer_primitive_desc());
+    auto enc_bidir_wei_layer_reorder_pd = reorder::primitive_desc(
+            user_enc_bidir_wei_layer_memory.get_primitive_desc(),
+            enc_bidir_wei_layer_memory.get_primitive_desc(), attr);
+    weights_reorders.push_back(reorder(enc_bidir_wei_layer_reorder_pd,
+            user_enc_bidir_wei_layer_memory, enc_bidir_wei_layer_memory));
+
+    auto enc_bidir_wei_iter_memory
+            = memory(enc_bidir_prim_desc.weights_iter_primitive_desc());
+    auto enc_bidir_wei_iter_reorder_pd = reorder::primitive_desc(
+            user_enc_bidir_wei_iter_memory.get_primitive_desc(),
+            enc_bidir_wei_iter_memory.get_primitive_desc(), attr);
+    weights_reorders.push_back(reorder(enc_bidir_wei_iter_reorder_pd,
+            user_enc_bidir_wei_iter_memory, enc_bidir_wei_iter_memory));
+
+    auto enc_bidir_dst_layer_memory
+            = memory(enc_bidir_prim_desc.dst_layer_primitive_desc());
+
+    encoder_net.push_back(
+            rnn_forward(enc_bidir_prim_desc, enc_bidir_src_layer_memory,
+                    null_memory_, enc_bidir_wei_layer_memory,
+                    enc_bidir_wei_iter_memory, user_enc_bidir_bias_memory,
+                    enc_bidir_dst_layer_memory, null_memory_, null_memory_));
+
+    /* GNMT encoder: unidirectional layers */
+    // First unidirectinal layer scales 2 * feature_size output of bidirectional
+    // layer to feature_size output
+    std::vector<float> user_enc_uni_first_wei_layer(
+            1 * 1 * 2 * feature_size * lstm_n_gates * feature_size, 0.3f);
+    std::vector<float> user_enc_uni_first_wei_iter(
+            1 * 1 * feature_size * lstm_n_gates * feature_size, 0.2f);
+    std::vector<float> user_enc_uni_first_bias(
+            1 * 1 * lstm_n_gates * feature_size, 1.0f);
+
+    memory::dims user_enc_uni_first_wei_layer_dims
+            = { 1, 1, 2 * feature_size, lstm_n_gates, feature_size };
+    memory::dims user_enc_uni_first_wei_iter_dims
+            = { 1, 1, feature_size, lstm_n_gates, feature_size };
+    memory::dims user_enc_uni_first_bias_dims
+            = { 1, 1, lstm_n_gates, feature_size };
+    memory::dims enc_uni_first_dst_layer_dims
+            = { src_seq_length_max, batch, feature_size };
+
+    auto user_enc_uni_first_wei_layer_md
+            = memory::desc({ user_enc_uni_first_wei_layer_dims },
+                    memory::data_type::f32, memory::format::ldigo);
+    auto user_enc_uni_first_wei_iter_md
+            = memory::desc({ user_enc_uni_first_wei_iter_dims },
+                    memory::data_type::f32, memory::format::ldigo);
+    auto user_enc_uni_first_bias_md
+            = memory::desc({ user_enc_uni_first_bias_dims },
+                    memory::data_type::f32, memory::format::ldgo);
+    auto user_enc_uni_first_wei_layer_memory
+            = memory({ user_enc_uni_first_wei_layer_md, cpu_engine },
+                    user_enc_uni_first_wei_layer.data());
+    auto user_enc_uni_first_wei_iter_memory
+            = memory({ user_enc_uni_first_wei_iter_md, cpu_engine },
+                    user_enc_uni_first_wei_iter.data());
+    auto user_enc_uni_first_bias_memory
+            = memory({ user_enc_uni_first_bias_md, cpu_engine },
+                    user_enc_uni_first_bias.data());
+
+    auto enc_uni_first_wei_layer_md
+            = memory::desc({ user_enc_uni_first_wei_layer_dims },
+                    memory::data_type::s8, memory::format::any);
+    auto enc_uni_first_wei_iter_md
+            = memory::desc({ user_enc_uni_first_wei_iter_dims },
+                    memory::data_type::s8, memory::format::any);
+    auto enc_uni_first_dst_layer_md
+            = memory::desc({ enc_uni_first_dst_layer_dims },
+                    memory::data_type::u8, memory::format::any);
+
+    rnn_cell::desc enc_uni_first_cell(algorithm::vanilla_lstm);
+    rnn_forward::desc enc_uni_first_layer_desc(prop_kind::forward_inference,
+            enc_uni_first_cell, rnn_direction::unidirectional_left2right,
+            enc_bidir_dst_layer_md, zero_md(), enc_uni_first_wei_layer_md,
+            enc_uni_first_wei_iter_md, user_enc_uni_first_bias_md,
+            enc_uni_first_dst_layer_md, zero_md());
+
+    auto enc_uni_first_prim_desc = rnn_forward::primitive_desc(
+            enc_uni_first_layer_desc, attr, cpu_engine);
+
+    auto enc_uni_first_wei_layer_memory
+            = memory(enc_uni_first_prim_desc.weights_layer_primitive_desc());
+    auto enc_uni_first_wei_layer_reorder_pd = reorder::primitive_desc(
+            user_enc_uni_first_wei_layer_memory.get_primitive_desc(),
+            enc_uni_first_wei_layer_memory.get_primitive_desc(), attr);
+    weights_reorders.push_back(reorder(enc_uni_first_wei_layer_reorder_pd,
+            user_enc_uni_first_wei_layer_memory,
+            enc_uni_first_wei_layer_memory));
+
+    auto enc_uni_first_wei_iter_memory
+            = memory(enc_uni_first_prim_desc.weights_iter_primitive_desc());
+    auto enc_uni_first_wei_iter_reorder_pd = reorder::primitive_desc(
+            user_enc_uni_first_wei_iter_memory.get_primitive_desc(),
+            enc_uni_first_wei_iter_memory.get_primitive_desc(), attr);
+    weights_reorders.push_back(reorder(enc_uni_first_wei_iter_reorder_pd,
+            user_enc_uni_first_wei_iter_memory, enc_uni_first_wei_iter_memory));
+
+    auto enc_uni_first_dst_layer_memory
+            = memory(enc_uni_first_prim_desc.dst_layer_primitive_desc());
+
+    encoder_net.push_back(rnn_forward(enc_uni_first_prim_desc,
+            enc_bidir_dst_layer_memory, null_memory_,
+            enc_uni_first_wei_layer_memory, enc_uni_first_wei_iter_memory,
+            user_enc_uni_first_bias_memory, enc_uni_first_dst_layer_memory,
+            null_memory_, null_memory_));
+
+    /* Remainging unidirectional layers */
+    std::vector<float> user_enc_uni_wei_layer((enc_unidir_n_layers - 1) * 1
+                    * feature_size * lstm_n_gates * feature_size,
+            0.3f);
+    std::vector<float> user_enc_uni_wei_iter((enc_unidir_n_layers - 1) * 1
+                    * feature_size * lstm_n_gates * feature_size,
+            0.2f);
+    std::vector<float> user_enc_uni_bias(
+            (enc_unidir_n_layers - 1) * 1 * lstm_n_gates * feature_size, 1.0f);
+
+    memory::dims user_enc_uni_wei_layer_dims = { (enc_unidir_n_layers - 1), 1,
+        feature_size, lstm_n_gates, feature_size };
+    memory::dims user_enc_uni_wei_iter_dims = { (enc_unidir_n_layers - 1), 1,
+        feature_size, lstm_n_gates, feature_size };
+    memory::dims user_enc_uni_bias_dims
+            = { (enc_unidir_n_layers - 1), 1, lstm_n_gates, feature_size };
+    memory::dims enc_dst_layer_dims
+            = { src_seq_length_max, batch, feature_size };
+
+    auto user_enc_uni_wei_layer_md
+            = memory::desc({ user_enc_uni_wei_layer_dims },
+                    memory::data_type::f32, memory::format::ldigo);
+    auto user_enc_uni_wei_iter_md = memory::desc({ user_enc_uni_wei_iter_dims },
+            memory::data_type::f32, memory::format::ldigo);
+    auto user_enc_uni_bias_md = memory::desc({ user_enc_uni_bias_dims },
+            memory::data_type::f32, memory::format::ldgo);
+
+    auto user_enc_uni_wei_layer_memory
+            = memory({ user_enc_uni_wei_layer_md, cpu_engine },
+                    user_enc_uni_wei_layer.data());
+    auto user_enc_uni_wei_iter_memory
+            = memory({ user_enc_uni_wei_iter_md, cpu_engine },
+                    user_enc_uni_wei_iter.data());
+    auto user_enc_uni_bias_memory = memory(
+            { user_enc_uni_bias_md, cpu_engine }, user_enc_uni_bias.data());
+
+    auto enc_uni_wei_layer_md = memory::desc({ user_enc_uni_wei_layer_dims },
+            memory::data_type::s8, memory::format::any);
+    auto enc_uni_wei_iter_md = memory::desc({ user_enc_uni_wei_iter_dims },
+            memory::data_type::s8, memory::format::any);
+    auto enc_dst_layer_md = memory::desc({ enc_dst_layer_dims },
+            memory::data_type::f32, memory::format::any);
+
+    rnn_cell::desc enc_uni_cell(algorithm::vanilla_lstm);
+    rnn_forward::desc enc_uni_layer_desc(prop_kind::forward_inference,
+            enc_uni_cell, rnn_direction::unidirectional_left2right,
+            enc_uni_first_dst_layer_md, zero_md(), enc_uni_wei_layer_md,
+            enc_uni_wei_iter_md, user_enc_uni_bias_md, enc_dst_layer_md,
+            zero_md());
+    auto enc_uni_prim_desc
+            = rnn_forward::primitive_desc(enc_uni_layer_desc, attr, cpu_engine);
+
+    auto enc_uni_wei_layer_memory
+            = memory(enc_uni_prim_desc.weights_layer_primitive_desc());
+    auto enc_uni_wei_layer_reorder_pd = reorder::primitive_desc(
+            user_enc_uni_wei_layer_memory.get_primitive_desc(),
+            enc_uni_wei_layer_memory.get_primitive_desc(), attr);
+    weights_reorders.push_back(reorder(enc_uni_wei_layer_reorder_pd,
+            user_enc_uni_wei_layer_memory, enc_uni_wei_layer_memory));
+
+    auto enc_uni_wei_iter_memory
+            = memory(enc_uni_prim_desc.weights_iter_primitive_desc());
+    auto enc_uni_wei_iter_reorder_pd = reorder::primitive_desc(
+            user_enc_uni_wei_iter_memory.get_primitive_desc(),
+            enc_uni_wei_iter_memory.get_primitive_desc(), attr);
+    weights_reorders.push_back(reorder(enc_uni_wei_iter_reorder_pd,
+            user_enc_uni_wei_iter_memory, enc_uni_wei_iter_memory));
+
+    auto enc_dst_layer_memory
+            = memory(enc_uni_prim_desc.dst_layer_primitive_desc());
+
+    encoder_net.push_back(
+            rnn_forward(enc_uni_prim_desc, enc_uni_first_dst_layer_memory,
+                    null_memory_, enc_uni_wei_layer_memory,
+                    enc_uni_wei_iter_memory, user_enc_uni_bias_memory,
+                    enc_dst_layer_memory, null_memory_, null_memory_));
+
+    /* Decoder with attention mechanism */
+    std::vector<float> user_dec_wei_layer(
+            dec_n_layers * 1 * feature_size * lstm_n_gates * feature_size,
+            0.2f);
+    std::vector<float> user_dec_wei_iter(dec_n_layers * 1
+                    * (feature_size + feature_size) * lstm_n_gates
+                    * feature_size,
+            0.3f);
+    std::vector<float> user_dec_bias(
+            dec_n_layers * 1 * lstm_n_gates * feature_size, 1.0f);
+    std::vector<int8_t> user_weights_attention_src_layer(
+            feature_size * feature_size, 1);
+    float weights_attention_scale = 127.;
+    std::vector<float> user_weights_annotation(
+            feature_size * feature_size, 1.0f);
+    std::vector<float> user_weights_alignments(feature_size, 1.0f);
+    // Buffer to store decoder output for all iterations
+    std::vector<uint8_t> dec_dst(tgt_seq_length_max * batch * feature_size, 0);
+
+    memory::dims user_dec_wei_layer_dims
+            = { dec_n_layers, 1, feature_size, lstm_n_gates, feature_size };
+    memory::dims user_dec_wei_iter_dims = { dec_n_layers, 1,
+        feature_size + feature_size, lstm_n_gates, feature_size };
+    memory::dims user_dec_bias_dims
+            = { dec_n_layers, 1, lstm_n_gates, feature_size };
+    memory::dims dec_src_layer_dims = { 1, batch, feature_size };
+    memory::dims dec_dst_layer_dims = { 1, batch, feature_size };
+
+    // We will use the same memory for dec_src_iter and dec_dst_iter
+    // However, dec_src_iter has a context vector but not
+    // dec_dst_iter.
+    // To resolve this we will create one memory that holds the
+    // context vector as well as the both the hidden and cell states.
+    // For the dst_iter, we will use a view on this memory.
+    // Note that the cell state will be padded by
+    // feature_size values. However, we do not compute or
+    // access those.
+    memory::dims dec_dst_iter_dims = { dec_n_layers, 1, lstm_n_states, batch,
+        feature_size + feature_size };
+    memory::dims dec_dst_iter_noctx_dims
+            = { dec_n_layers, 1, lstm_n_states, batch, feature_size };
+
+    auto user_dec_wei_layer_md = memory::desc({ user_dec_wei_layer_dims },
+            memory::data_type::f32, memory::format::ldigo);
+    auto user_dec_wei_iter_md = memory::desc({ user_dec_wei_iter_dims },
+            memory::data_type::f32, memory::format::ldigo);
+    auto user_dec_bias_md = memory::desc({ user_dec_bias_dims },
+            memory::data_type::f32, memory::format::ldgo);
+    auto dec_src_layer_md = memory::desc(
+            { dec_src_layer_dims }, memory::data_type::u8, memory::format::tnc);
+    auto dec_dst_layer_md = memory::desc(
+            { dec_dst_layer_dims }, memory::data_type::u8, memory::format::tnc);
+    auto dec_dst_iter_md = memory::desc({ dec_dst_iter_dims },
+            memory::data_type::f32, memory::format::ldsnc);
+
+    auto user_dec_wei_layer_memory = memory(
+            { user_dec_wei_layer_md, cpu_engine }, user_dec_wei_layer.data());
+    auto user_dec_wei_iter_memory = memory(
+            { user_dec_wei_iter_md, cpu_engine }, user_dec_wei_iter.data());
+    auto user_dec_bias_memory
+            = memory({ user_dec_bias_md, cpu_engine }, user_dec_bias.data());
+    auto dec_src_layer_memory = memory({ dec_src_layer_md, cpu_engine });
+    auto dec_dst_layer_memory
+            = memory({ dec_dst_layer_md, cpu_engine }, dec_dst.data());
+
+    /* Create memory descriptors for RNN data w/o specified layout */
+    auto dec_wei_layer_md = memory::desc({ user_dec_wei_layer_dims },
+            memory::data_type::s8, memory::format::any);
+    auto dec_wei_iter_md = memory::desc({ user_dec_wei_iter_dims },
+            memory::data_type::s8, memory::format::any);
+
+    /* As mentioned above, we create a view without context out of the
+     memory with context. */
+    auto dec_dst_iter_memory = memory({ dec_dst_iter_md, cpu_engine });
+    auto dec_dst_iter_noctx_md
+            = view::primitive_desc(dec_dst_iter_memory.get_primitive_desc(),
+                      dec_dst_iter_noctx_dims, { 0, 0, 0, 0, 0 })
+                      .dst_primitive_desc()
+                      .desc();
+
+    rnn_cell::desc dec_cell(algorithm::vanilla_lstm);
+    rnn_forward::desc dec_ctx_desc(prop_kind::forward_inference, dec_cell,
+            rnn_direction::unidirectional_left2right, dec_src_layer_md,
+            dec_dst_iter_md, dec_wei_layer_md, dec_wei_iter_md,
+            user_dec_bias_md, dec_dst_layer_md, dec_dst_iter_noctx_md);
+    auto dec_ctx_prim_desc
+            = rnn_forward::primitive_desc(dec_ctx_desc, attr, cpu_engine);
+
+    /* Create memory primitives for input data and use reorders to quantize
+     * values to int8 */
+    auto dec_wei_layer_memory
+            = memory(dec_ctx_prim_desc.weights_layer_primitive_desc());
+    auto dec_wei_layer_reorder_pd = reorder::primitive_desc(
+            user_dec_wei_layer_memory.get_primitive_desc(),
+            dec_wei_layer_memory.get_primitive_desc(), attr);
+    weights_reorders.push_back(reorder(dec_wei_layer_reorder_pd,
+            user_dec_wei_layer_memory, dec_wei_layer_memory));
+
+    auto dec_wei_iter_memory
+            = memory(dec_ctx_prim_desc.weights_iter_primitive_desc());
+    auto dec_wei_iter_reorder_pd = reorder::primitive_desc(
+            user_dec_wei_iter_memory.get_primitive_desc(),
+            dec_wei_iter_memory.get_primitive_desc(), attr);
+    weights_reorders.push_back(reorder(dec_wei_iter_reorder_pd,
+            user_dec_wei_iter_memory, dec_wei_iter_memory));
+
+    decoder_net.push_back(rnn_forward(dec_ctx_prim_desc, dec_src_layer_memory,
+            dec_dst_iter_memory, dec_wei_layer_memory, dec_wei_iter_memory,
+            user_dec_bias_memory, dec_dst_layer_memory, dec_dst_iter_memory,
+            null_memory_));
+
+    /* Allocating temporary buffers for attention mechanism */
+    std::vector<float> weighted_annotations(
+            src_seq_length_max * batch * feature_size, 1.0f);
+    std::vector<int32_t> weights_attention_sum_rows(feature_size, 1);
+
+    /*
+       Execution
+     */
+    auto execute = [&]() {
+        // reorder weights to MKLDNN internal representation
+        stream(stream::kind::eager).submit(weights_reorders).wait();
+
+        // run encoder (1 stream)
+        stream(stream::kind::eager).submit(encoder_net).wait();
+
+        // compute the weighted annotations once before the decoder
+        compute_weighted_annotations(weighted_annotations.data(),
+                src_seq_length_max, batch, feature_size,
+                user_weights_annotation.data(),
+                (float *)enc_dst_layer_memory.get_data_handle());
+        // precompute compensation for s8u8s32 gemm in compute attention
+        compute_sum_of_rows(user_weights_attention_src_layer.data(),
+                feature_size, feature_size, weights_attention_sum_rows.data());
+
+        // We initialise src_layer to the embedding of </s>, which
+        // are assumed to be 0 here
+        memset(dec_src_layer_memory.get_data_handle(), 0,
+                dec_src_layer_memory.get_primitive_desc().get_size());
+        // From now on, src points to the output of the last iteration
+
+        for (int i = 0; i < tgt_seq_length_max; i++) {
+            uint8_t *src_att_layer_handle
+                    = (uint8_t *)dec_src_layer_memory.get_data_handle();
+            float *src_att_iter_handle
+                    = (float *)dec_dst_iter_memory.get_data_handle();
+
+            // Compute attention context vector into the first layer src_iter
+            compute_attention(src_att_iter_handle, src_seq_length_max, batch,
+                    feature_size, user_weights_attention_src_layer.data(),
+                    weights_attention_scale, weights_attention_sum_rows.data(),
+                    src_att_layer_handle, data_scale, data_shift,
+                    (uint8_t *)enc_bidir_dst_layer_memory.get_data_handle(),
+                    weighted_annotations.data(),
+                    user_weights_alignments.data());
+
+            // copy the context vectors to all layers of src_iter
+            copy_context(src_att_iter_handle, dec_n_layers, lstm_n_states,
+                    batch, feature_size);
+
+            // run the decoder iteration
+            stream(stream::kind::eager).submit(decoder_net).wait();
+
+            // Move the handle on the src/dst layer to the next iteration
+            auto dst_layer_handle
+                    = (uint8_t *)dec_dst_layer_memory.get_data_handle();
+            dec_src_layer_memory.set_data_handle(dst_layer_handle);
+            dec_dst_layer_memory.set_data_handle(
+                    dst_layer_handle + batch * feature_size);
+        }
+
+    };
+
+    execute();
+}
+
+int main(int argc, char **argv) {
+    try {
+        simple_net();
+        std::cout << "ok\n";
+    } catch (error &e) {
+        std::cerr << "status: " << e.status << std::endl;
+        std::cerr << "message: " << e.message << std::endl;
+    }
+    return 0;
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn_training.cpp b/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn_training.cpp
index d63e675d4..bde52ce02 100644
--- a/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn_training.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/examples/simple_rnn_training.cpp
@@ -219,6 +219,14 @@ void simple_net() {
                            memory::format::ldigo), cpu_engine },
                          user_common_weights_layer.data());
 
+    std::vector<float> user_common_weights_iter(
+            tz_volume(common_weights_iter_dims),
+            1.0f);
+    auto user_common_weights_iter_memory
+        = mkldnn::memory({ formatted_md(common_weights_iter_dims,
+                           memory::format::ldigo), cpu_engine },
+                         user_common_weights_layer.data());
+
     std::vector<float> user_common_bias(
             tz_volume(common_bias_dims),
             1.0f);
@@ -325,10 +333,22 @@ void simple_net() {
         reorder_common_weights_layer = true;
     }
 
-    // Assume same memory would work for weights between leftmost and rightmost
-    // Allocate memory here based on the layout suggested by the primitive.
-    auto common_weights_iter_memory
-        = mkldnn::memory(leftmost_prim_desc.weights_iter_primitive_desc());
+    auto common_weights_iter_memory = user_common_weights_iter_memory;
+    primitive common_weights_iter_reorder;
+    auto reorder_common_weights_iter = false;
+    if (memory::primitive_desc(
+            leftmost_prim_desc.weights_iter_primitive_desc())
+        != memory::primitive_desc(
+            common_weights_iter_memory.get_primitive_desc())
+    ) {
+        common_weights_iter_memory
+            = mkldnn::memory(leftmost_prim_desc.weights_iter_primitive_desc());
+        common_weights_iter_reorder
+            = reorder(user_common_weights_iter_memory,
+                        common_weights_iter_memory);
+        reorder_common_weights_iter = true;
+    }
+
 
     auto common_bias_memory = user_common_bias_memory;
     primitive common_bias_reorder;
@@ -426,6 +446,8 @@ void simple_net() {
     // Enqueue primitives for forward execution
     if (reorder_common_weights_layer)
         fwd_net.push_back(common_weights_layer_reorder);
+    if (reorder_common_weights_iter)
+        fwd_net.push_back(common_weights_iter_reorder);
     if (reorder_common_bias)
         fwd_net.push_back(common_bias_reorder);
     if (reorder_leftmost_dst_layer)
diff --git a/inference-engine/thirdparty/mkl-dnn/examples/simple_training_net.c b/inference-engine/thirdparty/mkl-dnn/examples/simple_training_net.c
index dbe1ac0b0..964308cf3 100644
--- a/inference-engine/thirdparty/mkl-dnn/examples/simple_training_net.c
+++ b/inference-engine/thirdparty/mkl-dnn/examples/simple_training_net.c
@@ -78,7 +78,7 @@ void _free(void *ptr) {
 }
 #endif
 
-static size_t product(int *arr, size_t size)
+static size_t product(ptrdiff_t *arr, size_t size)
 {
     size_t prod = 1;
     for (size_t i = 0; i < size; ++i)
@@ -86,7 +86,7 @@ static size_t product(int *arr, size_t size)
     return prod;
 }
 
-static void init_net_data(float *data, uint32_t dim, const int *dims)
+static void init_net_data(float *data, uint32_t dim, const ptrdiff_t *dims)
 {
     if (dim == 1) {
         for (int i = 0; i < dims[0]; ++i) {
@@ -107,7 +107,7 @@ static void init_net_data(float *data, uint32_t dim, const int *dims)
     }
 }
 
-static void init_data_memory(uint32_t dim, const int *dims,
+static void init_data_memory(uint32_t dim, const ptrdiff_t *dims,
                              mkldnn_memory_format_t user_fmt,
                              mkldnn_data_type_t data_type,
                              mkldnn_engine_t engine, float *data,
@@ -177,8 +177,8 @@ mkldnn_status_t simple_net()
     mkldnn_engine_t engine;
     CHECK(mkldnn_engine_create(&engine, mkldnn_cpu, 0 /* idx */));
 
-    int net_src_sizes[4] = { BATCH, IC, CONV_IH, CONV_IW };
-    int net_dst_sizes[4] = { BATCH, OC, POOL_OH, POOL_OW };
+    ptrdiff_t net_src_sizes[4] = { BATCH, IC, CONV_IH, CONV_IW };
+    ptrdiff_t net_dst_sizes[4] = { BATCH, OC, POOL_OH, POOL_OW };
 
     float *net_src =
         (float *)aligned_malloc(product(net_src_sizes,4)*sizeof(float), 64);
@@ -195,12 +195,12 @@ mkldnn_status_t simple_net()
      * {BATCH, OC, CONV_OH, CONV_OW}
      * strides: {CONV_STRIDE, CONV_STRIDE}
      */
-    int *conv_user_src_sizes = net_src_sizes;
-    int conv_user_weights_sizes[4] = { OC, IC, 11, 11 };
-    int conv_bias_sizes[4] = { OC };
-    int conv_user_dst_sizes[4] = { BATCH, OC, CONV_OH, CONV_OW };
-    int conv_strides[2] = { CONV_STRIDE, CONV_STRIDE };
-    int conv_padding[2] = { CONV_PAD, CONV_PAD };
+    ptrdiff_t *conv_user_src_sizes = net_src_sizes;
+    ptrdiff_t conv_user_weights_sizes[4] = { OC, IC, 11, 11 };
+    ptrdiff_t conv_bias_sizes[4] = { OC };
+    ptrdiff_t conv_user_dst_sizes[4] = { BATCH, OC, CONV_OH, CONV_OW };
+    ptrdiff_t conv_strides[2] = { CONV_STRIDE, CONV_STRIDE };
+    ptrdiff_t conv_padding[2] = { CONV_PAD, CONV_PAD };
 
     float *conv_src = net_src;
     float *conv_weights = (float *)aligned_malloc(
@@ -394,10 +394,10 @@ mkldnn_status_t simple_net()
      * kernel: {3, 3}
      * strides: {POOL_STRIDE, POOL_STRIDE}
      */
-    int32_t *pool_dst_sizes = net_dst_sizes;
-    int32_t pool_kernel[2] = { 3, 3 };
-    int32_t pool_strides[2] = { POOL_STRIDE, POOL_STRIDE };
-    int32_t pool_padding[2] = { POOL_PAD, POOL_PAD };
+    ptrdiff_t *pool_dst_sizes = net_dst_sizes;
+    ptrdiff_t pool_kernel[2] = { 3, 3 };
+    ptrdiff_t pool_strides[2] = { POOL_STRIDE, POOL_STRIDE };
+    ptrdiff_t pool_padding[2] = { POOL_PAD, POOL_PAD };
 
     /* create pooling src memory descriptor using dst descriptor
      *  from previous primitive */
diff --git a/inference-engine/thirdparty/mkl-dnn/examples/simple_training_net.cpp b/inference-engine/thirdparty/mkl-dnn/examples/simple_training_net.cpp
index 836a08b51..070d3dd17 100644
--- a/inference-engine/thirdparty/mkl-dnn/examples/simple_training_net.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/examples/simple_training_net.cpp
@@ -44,7 +44,7 @@ void simple_net()
     memory::dims conv_bias_tz = { 96 };
     memory::dims conv_dst_tz = { batch, 96, 55, 55 };
     memory::dims conv_strides = { 4, 4 };
-    auto conv_padding = { 0, 0 };
+    memory::dims conv_padding = { 0, 0 };
 
     std::vector<float> conv_weights(
             std::accumulate(conv_weights_tz.begin(), conv_weights_tz.end(), 1,
@@ -180,7 +180,7 @@ void simple_net()
     memory::dims pool_dst_tz = { batch, 96, 27, 27 };
     memory::dims pool_kernel = { 3, 3 };
     memory::dims pool_strides = { 2, 2 };
-    auto pool_padding = { 0, 0 };
+    memory::dims pool_padding = { 0, 0 };
 
     /* create memory for pool dst data in user format */
     auto pool_user_dst_memory = memory(
diff --git a/inference-engine/thirdparty/mkl-dnn/include/mkldnn.h b/inference-engine/thirdparty/mkl-dnn/include/mkldnn.h
index 73853adde..a0a2d1aa5 100644
--- a/inference-engine/thirdparty/mkl-dnn/include/mkldnn.h
+++ b/inference-engine/thirdparty/mkl-dnn/include/mkldnn.h
@@ -52,6 +52,7 @@
 #endif
 
 #include "mkldnn_types.h"
+#include "mkldnn_version.h"
 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
 
 #ifdef __cplusplus
@@ -88,15 +89,15 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_iterator_create_v2(
         const_mkldnn_primitive_desc_t hint_forward_primitive_desc);
 
 /** Iterates over primitive descriptors. Returns #mkldnn_iterator_ends if no
- * more primitive descriptors are available */
+ * more primitive descriptors are available. */
 mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_iterator_next(
         mkldnn_primitive_desc_iterator_t iterator);
 
-/** Fetches current primitive descriptor.
+/** Fetches the current primitive descriptor.
  *
  * @note
- *     fetched primitive descriptor should be deleted by user using
- *     mkldnn_primitive_desc_destroy() once becomes unneeded */
+ *     The user should delete the fetched primitive descriptor using
+ *     mkldnn_primitive_desc_destroy() once it is no longer needed. */
 mkldnn_primitive_desc_t MKLDNN_API mkldnn_primitive_desc_iterator_fetch(
         const_mkldnn_primitive_desc_iterator_t iterator);
 
@@ -106,8 +107,8 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_iterator_destroy(
 
 /** Creates a @p primitive_desc using @p op_desc, @p engine, and optionally a
  * hint primitive descriptor from forward propagation. The call is equivalent
- * to create a primitive descriptor iterator, instantly fetch a primitive_desc
- * and destroy the iterator. */
+ * to creating a primitive descriptor iterator, immediately fetching a
+ * primitive descriptor, and then destroying the iterator. */
 mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_create(
         mkldnn_primitive_desc_t *primitive_desc,
         const_mkldnn_op_desc_t op_desc, mkldnn_engine_t engine,
@@ -115,8 +116,8 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_create(
 
 /** Creates a @p primitive_desc using @p op_desc, @p attr, @p engine, and
  * optionally a hint primitive descriptor from forward propagation. The call is
- * equivalent to create a primitive descriptor iterator, instantly fetch a @p
- * primitive_desc and destroy the iterator. */
+ * equivalent to creating a primitive descriptor iterator, immediately fetching
+ * a primitive descriptor, and then destroying the iterator. */
 mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_create_v2(
         mkldnn_primitive_desc_t *primitive_desc,
         const_mkldnn_op_desc_t op_desc, const_mkldnn_primitive_attr_t attr,
@@ -131,11 +132,12 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_clone(
 /** Returns a constant reference to the attribute of a @p primitive_desc.
  *
  * @warning
- *      User should not destroy obtained @p attr
+ *      The user should not destroy the obtained @p attr.
  *
  * @warning
- *      The lifetime of an @p attr is same as @p primitive_desc, so it is
- *      illegal to use the @p attr once @p primitive_desc is destroyed */
+ *      The lifetime of an @p attr is the same as that of a @p primitive_desc,
+ *      so it is illegal to use the @p attr once @p primitive_desc has been
+ *      destroyed. */
 mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_get_attr(
         const_mkldnn_primitive_desc_t primitive_desc,
         const_mkldnn_primitive_attr_t *attr);
@@ -147,7 +149,7 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_destroy(
 /** Queries primitive descriptor
  *
  * One of the most typical use cases is to query a convolution primitive
- * descriptor created with source, weights and destination formats equal
+ * descriptor created with source, weights, and destination formats equal
  * to #mkldnn_any about the corresponding memory primitive descriptors
  * (@p what equals #mkldnn_query_src_pd, #mkldnn_query_weights_pd, and
  * #mkldnn_query_dst_pd respectively) to be able to prepare memory and
@@ -155,15 +157,15 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_destroy(
  *
  * Another quite typical use case is to query an operation primitive
  * descriptor for a workspace (@p what equals #mkldnn_query_workspace_pd).
- * Returned status #mkldnn_not_required indicates that workspace is
+ * The returned status #mkldnn_not_required indicates that a workspace is
  * not required.
  *
- * Few other possibilities:
+ * A few other possibilities:
  *  - query a memory primitive descriptor for the underlying memory
  *    descriptor (#mkldnn_query_memory_d)
  *  - query an operation primitive descriptor for the underlying operation
  *    descriptor (#mkldnn_query_convolution_d, #mkldnn_query_eltwise_d,
- *    #mkldnn_query_rnn_d, etc)
+ *    #mkldnn_query_rnn_d, etc.)
  *  - query an operation primitive descriptor for the implementation
  *    information string (#mkldnn_query_impl_info_str)
  *  - query an operation primitive descriptor for the number of inputs and
@@ -178,7 +180,7 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_desc_query(
 
 /** Queries primitive descriptor for memory descriptor
  *
- * @returns NULL in case of any error (in particular if queried entity is
+ * @returns NULL in case of any error (in particular if the queried entity is
  * not of type mkldnn_memory_desc_t).
  *
  * This is just a specialized version of mkldnn_primitive_desc_query
@@ -189,16 +191,16 @@ const mkldnn_memory_desc_t MKLDNN_API *mkldnn_primitive_desc_query_memory_d(
 
 /** Queries primitive descriptor for primitive descriptor
  *
- * @returns NULL in case of any error (in particular if queried entity is
+ * @returns NULL in case of any error (in particular if the queried entity is
  * not of type const_mkldnn_primitive_desc_t).
  *
  * This is just a specialized version of mkldnn_primitive_desc_query
  * used for convenience.
  *
- * Example: query an operation primitive descriptor for a workspace
+ * Example: Query an operation primitive descriptor for a workspace
  *         (@p what equals #mkldnn_query_workspace_pd). Returned
- *         NULL indicates the primitive does not require a workspace.
- *         Otherwise a user should prepare the workspace and pass it
+ *         NULL indicates that the primitive does not require a workspace.
+ *         Otherwise, a user should prepare the workspace and pass it
  *         to the corresponding primitive.
  */
 const_mkldnn_primitive_desc_t MKLDNN_API mkldnn_primitive_desc_query_pd(
@@ -207,7 +209,7 @@ const_mkldnn_primitive_desc_t MKLDNN_API mkldnn_primitive_desc_query_pd(
 
 /** Queries primitive descriptor for signed 32bit int
  *
- * @returns 0 in case of any error (in particular if queried entity is
+ * @returns 0 in case of any error (in particular if the queried entity is
  * not of type int32_t). Note that 0 might also be the actual returned
  * value.
  *
@@ -230,8 +232,8 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_create(
  * primitive.
  *
  * @warning
- *     Returned object must not be destroyed by user. 'const' qualifier of the
- *     returned object prevents such attempts. */
+ *     The returned object must not be destroyed by the user. The @c const
+ *     qualifier of the returned object prevents such attempts. */
 mkldnn_status_t MKLDNN_API mkldnn_primitive_get_primitive_desc(
         const_mkldnn_primitive_t primitive,
         const_mkldnn_primitive_desc_t *primitive_desc);
@@ -252,7 +254,7 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_destroy(
 
 /** Creates an #mkldnn_primitive_at_t structure from a @p primitive and @p
  * output_index. This function only fills in the data structure
- * and does not check whether parameters are correct. The actual error checking
+ * and does not check whether arguments are correct. The actual error checking
  * is done when the resulting #mkldnn_primitive_at structure is passed to a
  * primitive creation function. */
 mkldnn_primitive_at_t MKLDNN_API mkldnn_primitive_at(
@@ -264,11 +266,11 @@ mkldnn_primitive_at_t MKLDNN_API mkldnn_primitive_at(
  * An extension for controlling primitive behavior.
  * @{ */
 
-/** Creates an empty (default) @p attr attribute. All the parameters set to
+/** Creates an empty (default) @p attr attribute. All the parameters are set to
  * default values.
  *
- * An empty attribute is used in primitive descriptor creating whenever it is
- * not passed explicitly, e.g. in mkldnn_primitive_desc_create.
+ * An empty attribute is used in primitive descriptor creation whenever it
+ * is not passed explicitly, e.g. in mkldnn_primitive_desc_create.
  */
 mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_create(
         mkldnn_primitive_attr_t *attr);
@@ -295,17 +297,17 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_int_output_round_mode(
 mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_int_output_round_mode(
         mkldnn_primitive_attr_t attr, mkldnn_round_mode_t round_mode);
 
-/** Returns @p count, correspondence scale @p mask, and pointer to a constant
+/** Returns @p count, correspondence scale @p mask, and a pointer to a constant
  * floating point array of output @p scales for given @p attr, previously set
  * by mkldnn_primitive_attr_set_output_scales.
  *
  * @warning
- *      @p scales array points to the internal @p attr field, so user should
- *      not modify/destroy @p scales.
+ *      The @p scales array points to the internal @p attr field, so the user
+ *      should not modify or destroy @p scales.
  *
  * @warning
- *      The lifetime of @p scales is same as @p attr it belongs to, so it is
- *      illegal to use the @p scales after @p attr is destroyed
+ *      The lifetime of @p scales is the same as that of the @p attr to which it
+ *      belongs, so it is illegal to use @p scales after @p attr is destroyed.
  */
 mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_output_scales(
         const_mkldnn_primitive_attr_t attr, int *count, int *mask,
@@ -314,10 +316,11 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_output_scales(
 /** Sets output @p scales for primitive operations. The number of elements @p
  * count and correspondence scale @p mask are stored for future use.
  *
- * The @p mask argument defines correspondence between output tensor dimensions
- * and the @p scales array. Set i-th bit of @p mask to 1 to use dedicated
- * scaling factor for each slice of the output tensor over i-th dimension. Set
- * @p mask to 0 to use common scaling factor for the whole output tensor.
+ * The @p mask argument defines the correspondence between the output tensor
+ * dimensions and the @p scales array. Set the i-th bit of @p mask to 1 to use a
+ * dedicated scaling factor for each slice of the output tensor over the i-th
+ * dimension. Set @p mask to 0 to use a common scaling factor for the whole
+ * output tensor.
  *
  * @note
  *      The dimension order is always native and does not depend on the actual
@@ -344,8 +347,8 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_output_scales(
  *
  * @note
  *      There is no way to check that @p count corresponds to @p mask until an
- *      actual primitive descriptor is created, so it is user's responsibility
- *      to set proper values. The following formula must be hold:
+ *      actual primitive descriptor is created, so it is the user's
+ *      responsibility to set proper values. The following formula must hold:
  *
  *      \f[count = \prod\limits_{d \in mask} output.dims[d]\f]
  */
@@ -353,31 +356,31 @@ mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_output_scales(
         mkldnn_primitive_attr_t attr, int count, int mask,
         const float *scales);
 
-/** Returns @p post_ops for given attr.
+/** Returns @p post_ops for given @p attr.
  *
  * @warning
- *      @p post_ops points to the internal @p attr field, so user should not
- *      modify/destroy @p post_ops. Also the lifetime of @p post_ops is the
- *      same as @p attr it belongs to, so it is illegal to use @p post_ops once
- *      @p attr is destroyed.
+ *      @p post_ops points to the internal @p attr field, so the user should not
+ *      modify or destroy @p post_ops. Also, the lifetime of @p post_ops is the
+ *      same as that of the @p attr it belongs to, so it is illegal to use @p
+ *      post_ops after @p attr has been destroyed.
  */
 mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_get_post_ops(
         const_mkldnn_primitive_attr_t attr, const_mkldnn_post_ops_t *post_ops);
 
 /** Sets configured @p post_ops to an attribute @p attr for future use (when
- * primitive descriptor is being created.
+ * primitive descriptor is being created).
  *
  * @note
- *      At this point of time there is no way to check whether primitive
- *      descriptor does or does not support given sequence of post operations.
- *      That means that user should handle an error that might happen at
+ *      At this point in time, there is no way to check whether the primitive
+ *      descriptor does or does not support a given sequence of post operations.
+ *      Therefore the user should handle an error that might occur at the
  *      mkldnn_primitive_desc_create call.
  */
 mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_post_ops(
         mkldnn_primitive_attr_t attr, const_mkldnn_post_ops_t post_ops);
 
 /** @addtogroup c_api_attributes_post_ops Sequence of post operations
- * An extension for performing extra operations after base operation.
+ * An extension for performing extra operations after a base operation.
  * @{ */
 
 /** Creates an empty sequence of post operations @p post_ops. */
@@ -390,19 +393,19 @@ mkldnn_status_t MKLDNN_API mkldnn_post_ops_destroy(mkldnn_post_ops_t post_ops);
 int MKLDNN_API mkldnn_post_ops_len(const_mkldnn_post_ops_t post_ops);
 
 /** Returns the type of post operation with index @p index in given
- * @p post_ops. In case of error returns #mkldnn_undefined_primitive. */
+ * @p post_ops. In case of error, returns #mkldnn_undefined_primitive. */
 mkldnn_primitive_kind_t MKLDNN_API mkldnn_post_ops_get_kind(
         const_mkldnn_post_ops_t post_ops, int index);
 
 /** Appends accumulation (sum) post operation to the @p post_ops. Prior to
- * accumulating the result the previous value would be multiplied by @p scale.
+ * accumulating the result, the previous value would be multiplied by @p scale.
  *
  * The kind of this post operation is #mkldnn_sum.
  *
- * This feature might improve performance for the cases like residual learning
+ * This feature might improve performance for cases like residual learning
  * blocks, where the result of convolution is accumulated to the previously
- * computed activations. Scale parameter @p scale might be extremely for the
- * integer-based computations, when the result and previous activations have
+ * computed activations. The parameter @p scale might be extreme for the
+ * integer-based computations when the result and previous activations have
  * different logical scaling factors.
  *
  * In the simplest case when the accumulation is the only post operation, the
@@ -410,9 +413,10 @@ mkldnn_primitive_kind_t MKLDNN_API mkldnn_post_ops_get_kind(
  * dst[] <- scale * dst[] + op(...) // instead of dst[] <- op(...)
  *
  * @note
- *      This post op (as well as all the others) disregards the original layout
- *      of dst, i.e. the layout of the original dst is expected to be the same
- *      as the layout of stored dst.
+ *      This post operation (as well as all the others) disregards the original
+ *      layout of the destination; that is, the layout of the original
+ *      destination is expected to be the same as the layout of the stored
+ *      destination.
  */
 mkldnn_status_t MKLDNN_API mkldnn_post_ops_append_sum(
         mkldnn_post_ops_t post_ops, float scale);
@@ -422,13 +426,13 @@ mkldnn_status_t MKLDNN_API mkldnn_post_ops_append_sum(
  *
  * @note
  *      If index @p index would not correspond to the accumulation post
- *      operation, the function return #mkldnn_invalid_arguments.
+ *      operation, the function returns #mkldnn_invalid_arguments.
  */
 mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_sum(
         const_mkldnn_post_ops_t post_ops, int index, float *scale);
 
 /** Appends eltwise post operation to the @p post_ops with given parameters
- * @p kind, @p alpha and @p beta (@sa mkldnn_eltwise_forward_desc_init and
+ * @p kind, @p alpha, and @p beta (@sa mkldnn_eltwise_forward_desc_init and
  * mkldnn_eltwise_desc_t).
  *
  * The kind of this post operation is #mkldnn_eltwise.
@@ -436,7 +440,7 @@ mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_sum(
  * In the simplest case when the eltwise is the only post operation, the
  * computations would be:
  * dst[] <- scale * eltwise_op ( op(...) ) // instead of dst[] <- op(...)
- * where eltwise_op is configured with given parameters.
+ * where eltwise_op is configured with the given parameters.
  */
 mkldnn_status_t MKLDNN_API mkldnn_post_ops_append_eltwise(
         mkldnn_post_ops_t post_ops, float scale, mkldnn_alg_kind_t alg,
@@ -489,6 +493,27 @@ mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_dw_conv(
         int* ker_h, int* ker_w, int* str_h, int* str_w, const float** weights_data,
         const float** biases_data);
 
+/** Appends binarization post operation to the @p post_ops with given parameters
+ * @p kind and @p weights (@sa mkldnn_binarization_forward_desc_init and
+ * mkldnn_binarization_desc_t).
+ *
+ * The kind of this post operation is #mkldnn_binarization.
+ *
+ * In the simplest case when the binarization is the only post operation, the
+ * computations would be:
+ * dst[] <- binarization_op ( op(...) ) // instead of dst[] <- op(...)
+ * where binarization_op is configured with given parameters.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_post_ops_append_binarization(
+        mkldnn_post_ops_t post_ops, mkldnn_alg_kind_t alg, const float* weights_data);
+
+/** Gets the binarization parameters of the post operation with index @p index in
+ * the sequence of @p post_ops.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_binarization(
+        const_mkldnn_post_ops_t post_ops, int index,
+        mkldnn_alg_kind_t *alg, const float** weights_data);
+
 /** @} */
 
 /** @} */
@@ -499,21 +524,21 @@ mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_dw_conv(
  * The library supports various data types and formats. Memory hierarchy
  * consists of three levels of abstraction:
  * 1. **Memory descriptor** -- engine agnostic logical description of data
- *      (number of dimensions, dimensions themselves and data type), and
+ *      (number of dimensions, dimensions themselves, and data type), and
  *      optionally the format/layout that describes the physical representation
- *      of data in memory. If the format/layout is not known yet one can pass
- *      #mkldnn_any. This approach is used to allow compute intensive
- *      primitives to specify the most appropriate layout on their own with
- *      users required to reorder the data if the incoming layout doesn't match
- *      the primitive's selection. Memory descriptor can be created with
+ *      of data in memory. If the format is not known yet, one can pass
+ *      #mkldnn_any. This approach is used to allow compute-intensive
+ *      primitives to specify the most appropriate format on their own with
+ *      users required to reorder the data if the incoming format doesn't match
+ *      the primitive's selection. Memory descriptor can be created with the
  *      mkldnn_memory_desc_init() function or by directly filling the
- *      mkldnn_memory_desc_t structure. The later requires deep knowledge of
+ *      mkldnn_memory_desc_t structure. The latter requires deep knowledge of
  *      how the physical data representation is mapped to the structure. The
  *      @ref understanding_memory_formats topic should shed some light on that.
  * 2. **Memory primitive descriptor** -- logical description of data that is
- *      fully defined, i.e. cannot contain #mkldnn_any as a format. It also
- *      has the engine specified. A memory primitive descriptor is created by
- *      calling mkldnn_memory_primitive_desc_create() with two arguments: an
+ *      fully defined; that is, it cannot contain #mkldnn_any as a format. It
+ *      also has the engine specified. A memory primitive descriptor is created
+ *      by calling mkldnn_memory_primitive_desc_create() with two arguments: an
  *      mkldnn_memory_desc_t and an mkldnn_engine_t. It has the same type as
  *      other primitive descriptors and can be:
  *      - queried to return the underlying memory descriptor using
@@ -521,51 +546,52 @@ mkldnn_status_t MKLDNN_API mkldnn_post_ops_get_params_dw_conv(
  *        mkldnn_primitive_desc_query_memory_d().
  *      - compared with another memory primitive descriptor using
  *        mkldnn_memory_primitive_desc_equal(). This is especially useful when
- *        checking whether a primitive requires reorder from user's data layout
- *        to the primitive's one.
+ *        checking whether a primitive requires reorder from the user's data
+ *        format to the primitive's format.
  *      - queried to return the size of the data using
  *        mkldnn_memory_primitive_desc_get_size(). As described in
- *        @ref understanding_memory_formats the size of data sometimes cannot
- *        be computed as a product of dimensions times the size of data type.
- *        So users are encouraged to use this function to have better code
+ *        @ref understanding_memory_formats, the size of data sometimes cannot
+ *        be computed as the product of dimensions times the size of the data
+ *        type. So users are encouraged to use this function for better code
  *        portability.
  * 3. **Memory primitive** or simply **memory** -- a pseudo-primitive that is
  *      defined by a memory primitive descriptor and a handle to the data
- *      itself (in case of CPU engine the handle is simply a pointer `void*`).
- *      The data handle can be queried using mkldnn_memory_get_data_handle()
- *      and be set using mkldnn_memory_set_data_handle(). The latter function
- *      always sets the memory in the padding region to zero which is the
- *      invariant maintained by all the primitives in Intel MKL-DNN. See
+ *      itself. (In the case of CPU engine, the handle is simply a pointer to
+ *      @c void.) The data handle can be queried using
+ *      mkldnn_memory_get_data_handle() and set using
+ *      mkldnn_memory_set_data_handle(). The latter function always sets the
+ *      memory in the padding region to zero, which is the invariant maintained
+ *      by all the primitives in Intel MKL-DNN. See
  *      @ref understanding_memory_formats for more details.
  *      A memory primitive can be created using mkldnn_primitive_create() with
  *      empty inputs and outputs. In this case, the memory primitive's data
- *      handle needs to be set manually using mkldnn_memory_set_data_handle().
+ *      handle must be set manually using mkldnn_memory_set_data_handle().
  *
  * Along with ordinary memory with all dimensions being positive, Intel
  * MKL-DNN supports *zero-volume* memory with one or more dimensions set to
- * zero. This is to support NumPy\* convention.
- * If a *zero-volume* memory is passed to a primitive, the primitive would
+ * zero. This is to support the NumPy\* convention.
+ * If a *zero-volume* memory is passed to a primitive, the primitive does
  * not perform any computations on this memory. For example:
  *  - Convolution with `(0 batch, 3 input channels, 13 height, 13 width)`
  *    source and `(16 output channels, 3 inputs, channel, 3 height, 3 width)`
  *    weights would produce `(0 batch, 16 ouput channels, 11 height, 11 width)`
  *    destination (assuming strides are `1` and paddings are zero) and perform
  *    zero multiply-add operations.
- *  - Concatenation of 3 memories of shapes `(3, 4, 13, 13)`, `(3, 0, 13, 13)`,
- *    and `(3, 1, 13, 13)` along the second axis would produce the output of
- *    the shape `(3, 5, 13, 13)`, effectively ignoring the second input
- *    (however if user created a concatenation primitive descriptor with 3
- *    inputs they should also provide all 3 memories to the concatenation
- *    primitive, including the one with zero second dimension).
+ *  - Concatenation of three memories of shapes `(3, 4, 13, 13)`,
+ *    `(3, 0, 13, 13)`, and `(3, 1, 13, 13)` along the second axis would produce
+ *    the output of the shape `(3, 5, 13, 13)`, effectively ignoring the second
+ *    input (however, if the user created a concatenation primitive descriptor
+ *    with three inputs they should also provide all three memories to the
+ *    concatenation primitive, including the one with zero second dimension).
  *  - However, Intel MKL-DNN would return an error when attempting to create a
- *    convolution with *zero-volume* memory passed for weights because such
+ *    convolution with *zero-volume* memory passed for weights because such a
  *    convolution is not well-defined:
  *    ~~~
  *    dst(1, 16, 11, 11) <-- src(1, 0, 13, 13) (*) wei(16, 0, 3, 3)
  *    ~~~
  *    Should the values in the destination be zeroes or just not accessed at
- *    all? Moreover, backward pass w.r.t. weights in such cases is not
- *    well-defined as well.
+ *    all? Moreover, backward pass w.r.t. weights in such cases is also not
+ *    well-defined.
  *
  *  Data handle of *zero-volume* memory is never accessed and hence can be
  *  unset (NULL in case of CPU engine).
@@ -581,15 +607,16 @@ mkldnn_status_t MKLDNN_API mkldnn_memory_desc_init(
         mkldnn_data_type_t data_type, mkldnn_memory_format_t format);
 
 /** Creates a @p memory_primitive_desc memory primitive descriptor using @p
- * memory_desc and @p engine. @p memory_desc cannot be uncertain, that is,
- * initialized with #mkldnn_any. */
+ * memory_desc and @p engine. @p memory_desc cannot be uncertain; that is, it
+ * cannot be initialized with #mkldnn_any. */
 mkldnn_status_t MKLDNN_API mkldnn_memory_primitive_desc_create(
         mkldnn_primitive_desc_t *memory_primitive_desc,
         const mkldnn_memory_desc_t *memory_desc, mkldnn_engine_t engine);
 
 /** Creates a @p view_primitive_desc for a given @p memory_primitive_desc, with
- * @p dims sizes and @p offset offsets. May fail if layout used does not allow
- * obtain desired view. In this case consider using extract primitive */
+ * @p dims sizes and @p offsets offsets. May fail if the format used does not
+ * allow obtaining the desired view. In this case, consider using the extract
+ * primitive. */
 mkldnn_status_t MKLDNN_API mkldnn_view_primitive_desc_create(
         mkldnn_primitive_desc_t *view_primitive_desc,
         const_mkldnn_primitive_desc_t memory_primitive_desc,
@@ -660,13 +687,13 @@ mkldnn_status_t MKLDNN_API mkldnn_reorder_primitive_desc_create_v2(
 /** @} */
 
 /** @addtogroup c_api_concat Concat
- * A primitive to concatenate data by arbitrary dimension
+ * A primitive to concatenate data by arbitrary dimension.
  * @{ */
 
 /** Creates out-of-place @p concat_primitive_desc for concatenation of @p n
  * inputs by @p concat_dimension with resulting @p output_desc memory
- * descriptor. @p output_desc can be NULL or be specified with #mkldnn_any
- * format -- in this case appropriate memory format would be chosen
+ * descriptor. @p output_desc can be NULL or specified with the #mkldnn_any
+ * format -- in this case, the appropriate memory format would be chosen
  * automatically.
  *
  * Order of inputs:
@@ -684,28 +711,28 @@ mkldnn_status_t MKLDNN_API mkldnn_concat_primitive_desc_create(
         const_mkldnn_primitive_desc_t *input_pds);
 
 #if 0
-/** Creates in-place @p concat_primitive_desc for given @p n @p inputs memory
- * primitive descriptors along @p concat_dimension. All inputs must have the
- * same memory format. Output memory format would be the same. Likewise
- * view_primitive_desc_create the call may fail, if memory format of inputs do
- * not allow inplace concatenation for given sizes.
+/** Creates in-place @p concat_primitive_desc for given @p n and @p inputs
+ * memory primitive descriptors along @p concat_dimension. All inputs must have
+ * the same memory format. Output memory format would be the same. Likewise, the
+ * view_primitive_desc_create call may fail if the memory format of the inputs
+ * does not allow in-place concatenation for the given sizes.
  *
- * @note this primitive is more like a synchronization stub for concatenation,
- * since concat_inplace does no operation during execution.
+ * @note This primitive is more like a synchronization stub for concatenation,
+ * because concat_inplace performs no operation during execution.
  *
- * @note since not operation happens user must ensure that input */
+ * @note Because no operation occurs, the user must ensure the input. */
 mkldnn_status_t MKLDNN_API mkldnn_concat_inplace_by_input_primitive_desc_create(
         mkldnn_primitive_desc_t *concat_primitive_desc,
         int n, int concat_dimension, const_mkldnn_primitive_desc_t *inputs);
 
 /** Creates in-place @p concat_primitive_desc for given @p output memory
- * descriptor and @n inputs with @p sizes sizes along @p concat_dimension. As
- * opposed to out-of-place concatenation @p output must be fully defined here.
- * Likewise view_primitive_desc_create the call may fail, because given memory
- * format does not allow inplace concatenation for given sizes.
+ * descriptor and @n inputs with @p sizes sizes along @p concat_dimension.
+ * Unlike out-of-place concatenation, @p output must be fully defined here.
+ * Likewise, the view_primitive_desc_create call may fail if the given memory
+ * format does not allow inplace concatenation for the given sizes.
  *
- * @note this primitive is more like a synchronization stub for concatenation,
- * since concat_inplace does no operation during execution. */
+ * @note This primitive is more like a synchronization stub for concatenation,
+ * because concat_inplace performs no operation during execution. */
 mkldnn_status_t MKLDNN_API mkldnn_concat_inplace_by_output_primitive_desc_create(
         mkldnn_primitive_desc_t *concat_primitive_desc,
         const mkldnn_primitive_desc_t output, int n, int concat_dimension,
@@ -715,13 +742,13 @@ mkldnn_status_t MKLDNN_API mkldnn_concat_inplace_by_output_primitive_desc_create
 /** @} */
 
 /** @addtogroup c_api_sum Sum
- * A primitive to sum data
+ * A primitive to sum data.
  * @{ */
 
 /** Creates out-of-place @p sum_primitive_desc for sum of @p n
  * inputs multiplied by scale with resulting @p output_desc memory
- * descriptor. @p output_desc can be NULL or be specified with #mkldnn_any
- * format -- in this case appropriate memory format would be chosen
+ * descriptor. @p output_desc can be NULL or specified with the #mkldnn_any
+ * format -- in this case, the appropriate memory format would be chosen
  * automatically.
  *
  * Order of inputs:
@@ -761,15 +788,15 @@ mkldnn_status_t MKLDNN_API mkldnn_sum_primitive_desc_create(
  * @{ */
 
 /** Initializes a convolution descriptor @p conv_desc for forward propagation
- * using @p prop_kind (possible values are #mkldnn_forward_training or
+ * using @p prop_kind (possible values are #mkldnn_forward_training and
  * #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides, @p
  * padding_l, @p padding_r, and @p padding_kind. In order to create a
- * convolution without bias, @p bias_desc should be either @c NULL or point to
- * a descriptor with memory format equals to #mkldnn_format_undef.
+ * convolution without bias, @p bias_desc should either be @c NULL or point to
+ * a descriptor with memory format equal to #mkldnn_format_undef.
  *
- * @note if @p padding_r is @c NULL, the padding is supposed to be symmetric
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -791,15 +818,15 @@ mkldnn_status_t MKLDNN_API mkldnn_convolution_forward_desc_init(
 
 /** Initializes a dilated convolution descriptor @p conv_desc for forward
  * propagation using @p prop_kind (possible values are #mkldnn_forward_training
- * or #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
+ * and #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
  * @p dilates, @p padding_l, @p padding_r, and @p padding_kind.
  * In order to create a dilated convolution without bias, @p bias_desc
- * should be either @c NULL or point to a descriptor with memory format equals
+ * should either be @c NULL or point to a descriptor with memory format equal
  * to #mkldnn_format_undef.
  *
- * @note if @p padding_r is @c NULL, the padding is supposed to be symmetric
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -823,7 +850,7 @@ mkldnn_status_t MKLDNN_API mkldnn_dilated_convolution_forward_desc_init(
  * with respect to data using @p alg_kind, memory descriptors, @p strides, @p
  * padding_l, @p padding_r, and @p padding_kind.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -845,7 +872,7 @@ mkldnn_status_t MKLDNN_API mkldnn_convolution_backward_data_desc_init(
  * propagation with respect to data using @p alg_kind, memory descriptors, @p
  * strides, @p dilates @p padding_l, @p padding_r, and @p padding_kind.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -867,7 +894,7 @@ mkldnn_status_t MKLDNN_API mkldnn_dilated_convolution_backward_data_desc_init(
  * with respect to weights using @p alg_kind, memory descriptors, @p strides,
  * @p padding_l, @p padding_r, and @p padding_kind.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -891,7 +918,7 @@ mkldnn_status_t MKLDNN_API mkldnn_convolution_backward_weights_desc_init(
  * with respect to weights using @p alg_kind, memory descriptors, @p strides,
  * @p dilates @p padding_l, @p padding_r, and @p padding_kind.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -920,16 +947,16 @@ mkldnn_dilated_convolution_backward_weights_desc_init(
  * @{ */
 
 
-/** Initializes a deconvolution descriptor @p deconv_desc for forward propagation
- * using @p prop_kind (possible values are #mkldnn_forward_training or
- * #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides, @p
- * padding_l, @p padding_r, and @p padding_kind. In order to create a
- * deconvolution without bias, @p bias_desc should be either @c NULL or point to
- * a descriptor with memory format equals to #mkldnn_format_undef.
+/** Initializes a deconvolution descriptor @p deconv_desc for forward
+ * propagation using @p prop_kind (possible values are #mkldnn_forward_training
+ * and #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
+ * @p padding_l, @p padding_r, and @p padding_kind. In order to create a
+ * deconvolution without bias, @p bias_desc should either be @c NULL or point to
+ * a descriptor with memory format equal to #mkldnn_format_undef.
  *
- * @note if @p padding_r is @c NULL, the padding is supposed to be symmetric
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -951,15 +978,15 @@ mkldnn_status_t MKLDNN_API mkldnn_deconvolution_forward_desc_init(
 
 /** Initializes a dilated deconvolution descriptor @p deconv_desc for forward
  * propagation using @p prop_kind (possible values are #mkldnn_forward_training
- * or #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
+ * and #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
  * @p dilates, @p padding_l, @p padding_r, and @p padding_kind. In order to
- * create a dilated deconvolution without bias, @p bias_desc should be either
- * @c NULL or point to a descriptor with memory format equals to
+ * create a dilated deconvolution without bias, @p bias_desc should either be
+ * @c NULL or point to a descriptor with memory format equal to
  * #mkldnn_format_undef.
  *
- * @note if @p padding_r is @c NULL, the padding is supposed to be symmetric
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -983,7 +1010,7 @@ mkldnn_status_t MKLDNN_API mkldnn_dilated_deconvolution_forward_desc_init(
  * with respect to data using @p alg_kind, memory descriptors, @p strides, @p
  * padding_l, @p padding_r, and @p padding_kind.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -1005,7 +1032,7 @@ mkldnn_status_t MKLDNN_API mkldnn_deconvolution_backward_data_desc_init(
  * propagation with respect to data using @p alg_kind, memory descriptors, @p
  * strides, @p dilates, @p padding_l, @p padding_r, and @p padding_kind.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -1027,7 +1054,7 @@ mkldnn_status_t MKLDNN_API mkldnn_dilated_deconvolution_backward_data_desc_init(
  * with respect to weights using @p alg_kind, memory descriptors, @p strides,
  * @p padding_l, @p padding_r, and @p padding_kind.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -1051,7 +1078,7 @@ mkldnn_status_t MKLDNN_API mkldnn_deconvolution_backward_weights_desc_init(
  * propagation with respect to weights using @p alg_kind, memory descriptors,
  * @p strides, @p dilates, @p padding_l, @p padding_r, and @p padding_kind.
  *
- * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * @note Memory descriptors are allowed to be initialized with #mkldnn_any
  * value of @p format_kind.
  *
  * Order of inputs:
@@ -1078,8 +1105,7 @@ mkldnn_status_t MKLDNN_API mkldnn_dilated_deconvolution_backward_weights_desc_in
  * @{ */
 
 /** Initializes a @p shuffle_desc for forward propagation using @p prop_kind,
- * @p memory descriptor @p data_desc, @p axis and @p group
- * number.
+ * memory descriptor @p data_desc, @p axis, and @p group_size.
  *
  * Order of inputs:
  *  - src (#mkldnn_query_src_pd, 0)
@@ -1092,8 +1118,8 @@ mkldnn_status_t MKLDNN_API mkldnn_shuffle_forward_desc_init(
         mkldnn_shuffle_desc_t *shuffle_desc, mkldnn_prop_kind_t prop_kind,
         const mkldnn_memory_desc_t *data_desc, int axis, int group_size);
 
-/** Initializes a @p shuffle_desc for backward propagation using @p memory
- * descriptor @p diff_data_desc, @p axis and @p group number.
+/** Initializes a @p shuffle_desc for backward propagation using memory
+ * descriptor @p diff_data_desc, @p axis, and @p group_size.
  *
  *
  * Order of inputs:
@@ -1110,27 +1136,27 @@ mkldnn_status_t MKLDNN_API mkldnn_shuffle_backward_desc_init(
 /** @} */
 
 /** @addtogroup c_api_eltwise Eltwise
- * A primitive to compute element wise operations like parametric rectifier
+ * A primitive to compute element-wise operations like parametric rectifier
  * linear unit (ReLU).
  *
- * Both forward and backward passes support in-place operation, i.e. src
- * and dst point to the same memory for forward, and diff_dst and diff_src
+ * Both forward and backward passes support in-place operation; that is, src
+ * and dst point to the same memory for forward pass, and diff_dst and diff_src
  * point to the same memory for backward pass.
  *
- * @warning Since for backward pass original src is required, in-place forward
- * pass in general cannot be applied during training. However for some kinds of
- * element wise operations (namely ReLU with alpha parameter equals 0) dst and
- * src can be interchangeable for the backward pass, which allows performing
- * in-place forward even for training.
+ * @warning Because the original src is required for backward pass, in-place
+ * forward pass in general cannot be applied during training. However, for some
+ * kinds of element-wise operations (namely ReLU with alpha parameter equals 0),
+ * dst and src can be interchangeable for the backward pass, which enables
+ * performing in-place forward even for training.
  *
  * @{ */
 
-/** Initializes a @p eltwise_desc for forward propagation using @p prop_kind
- * (possible values are #mkldnn_forward_training or #mkldnn_forward_inference),
- * @p alg_kind algorithm, memory descriptor @p data_desc, and @p alpha,
+/** Initializes an @p eltwise_desc for forward propagation using @p prop_kind
+ * (possible values are #mkldnn_forward_training and #mkldnn_forward_inference),
+ * @p alg_kind algorithm, memory descriptor @p data_desc, @p alpha, and
  * @p beta parameters.
  *
- * @sa mkldnn_eltwise_desc_t for details
+ * @sa mkldnn_eltwise_desc_t for details.
  *
  * Order of inputs:
  *  - src (#mkldnn_query_src_pd, 0)
@@ -1143,11 +1169,11 @@ mkldnn_status_t MKLDNN_API mkldnn_eltwise_forward_desc_init(
         mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *data_desc,
         float alpha, float beta);
 
-/** Initializes a @p eltwise_desc for backward propagation using @p alg_kind
- * algorithm memory descriptors @p diff_data_desc and @p data_desc, and
- * @p alpha, @p beta parameters.
+/** Initializes an @p eltwise_desc for backward propagation using @p alg_kind
+ * algorithm memory descriptors @p diff_data_desc and @p data_desc, and the
+ * @p alpha and @p beta parameters.
  *
- * @sa mkldnn_eltwise_desc_t for details
+ * @sa mkldnn_eltwise_desc_t for details.
  *
  * Order of inputs:
  *  - src (#mkldnn_query_src_pd, 0)
@@ -1163,52 +1189,6 @@ mkldnn_status_t MKLDNN_API mkldnn_eltwise_backward_desc_init(
 
 /** @} */
 
-/** @addtogroup c_api_relu ReLU (deprecated, use Eltwise instead)
- * A primitive to compute a parametric rectifier linear unit (ReLU).
- *
- * \f[dst[n][c][h][w] = \max(src[n][c][h][w], 0) +
- *                      \min(src[n][c][h][w], 0) \cdot negative\_slope\f]
- * @{ */
-
-/** Initializes a @p relu_desc for forward propagation using @p prop_kind
- * (possible values are #mkldnn_forward_training or #mkldnn_forward_inference),
- * @p negative_slope and memory descriptor @p data_desc.
- *
- * @deprecated use mkldnn_eltwise_forward_desc_init() instead, with @p alpha
- * equals @p negative_slope
- *
- * Order of inputs:
- *  - src (#mkldnn_query_src_pd, 0)
- *
- * Order of outputs:
- *  - dst (#mkldnn_query_dst_pd, 0)
- */
-MKLDNN_DEPRECATED
-mkldnn_status_t MKLDNN_API mkldnn_relu_forward_desc_init(
-        mkldnn_relu_desc_t *relu_desc, mkldnn_prop_kind_t prop_kind,
-        const mkldnn_memory_desc_t *data_desc, float negative_slope);
-
-/** Initializes a @p relu_desc for backward propagation using @p negative_slope
- * and memory descriptors @p diff_data_desc and @p data_desc.
- *
- * @deprecated use mkldnn_eltwise_backward_desc_init() instead, with @p alpha
- * equals @p negative_slope
- *
- * Order of inputs:
- *  - src (#mkldnn_query_src_pd, 0)
- *  - diff_dst (#mkldnn_query_diff_dst_pd, 0)
- *
- * Order of outputs:
- *  - diff_src (#mkldnn_query_diff_src_pd, 0)
- */
-MKLDNN_DEPRECATED
-mkldnn_status_t MKLDNN_API mkldnn_relu_backward_desc_init(
-        mkldnn_relu_desc_t *relu_desc,
-        const mkldnn_memory_desc_t *diff_data_desc,
-        const mkldnn_memory_desc_t *data_desc, float negative_slope);
-
-/** @} */
-
 /** @addtogroup c_api_depthwise Depthwise
  * A primitive to compute channel wise operations like scale and shift
  * @{ */
@@ -1237,7 +1217,7 @@ mkldnn_status_t MKLDNN_API mkldnn_depthwise_forward_desc_init(
  * @{ */
 
 /** Initializes a @p softmax_desc for forward propagation using @p prop_kind
- * (possible value are #mkldnn_forward_training or #mkldnn_forward_inference)
+ * (possible values are #mkldnn_forward_training and #mkldnn_forward_inference)
  * and memory descriptor @p data_desc.
  *
  * Order of inputs:
@@ -1280,25 +1260,25 @@ mkldnn_status_t MKLDNN_API mkldnn_softmax_backward_desc_init(
  *     \frac{1}{KW \cdot KH}\sum\limits_{kw,kh}
  *     src[n][ic][oh \cdot s_h - p_l[0] + kh][ow \cdot s_w - p_r[1] + kw],\f]
  *
- * where \f$p_l, p_r\f$ are @p padding_l and @p padding_r
- * respectively and output spatial dimensions are calculated
- * similarly as done in convolution.
+ * where \f$p_l, p_r\f$ are @p padding_l and @p padding_r respectively, and
+ * output spatial dimensions are calculated similarly to how they are done in
+ * convolution.
  *
- * During training max pooling requires workspace on forward
+ * During training, max pooling requires a workspace on forward
  * (#mkldnn_forward_training) and backward (#mkldnn_backward) passes to
- * save indices where maximum was found. Workspace layout is opaque and
- * the indices cannot be restored from it. However one can use backward
+ * save indices where maximum was found. The workspace layout is opaque, and
+ * the indices cannot be restored from it. However, one can use backward
  * pooling to perform up-sampling (used in some detection topologies).
  *
  * @{ */
 
 /** Initializes a pooling descriptor @p pool_desc for forward propagation using
- * @p prop_kind (possible values are #mkldnn_forward_training or
+ * @p prop_kind (possible values are #mkldnn_forward_training and
  * #mkldnn_forward_inference), @p alg_kind, memory descriptors, and pooling
- * parameters in spatial domain: @p strides, @p kernel sizes, @p padding_l, @p
- * padding_r, and @p padding_kind.
+ * parameters in the spatial domain: @p strides, @p kernel sizes, @p padding_l,
+ * @p padding_r, and @p padding_kind.
  *
- * @note if @p padding_r is @c NULL, the padding is supposed to be symmetric
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
  *
  * Order of inputs:
  *  - src (#mkldnn_query_src_pd, 0)
@@ -1317,11 +1297,11 @@ mkldnn_status_t MKLDNN_API mkldnn_pooling_forward_desc_init(
         const mkldnn_dims_t padding_r, mkldnn_padding_kind_t padding_kind);
 
 /** Initializes a pooling descriptor @p pool_desc for backward propagation
- * using @p alg_kind, memory descriptors, and pooling parameters in spatial
+ * using @p alg_kind, memory descriptors, and pooling parameters in the spatial
  * domain: @p strides, @p kernel sizes, @p padding_l, @p padding_r, and @p
  * padding_kind.
  *
- * @note if @p padding_r is @c NULL, the padding is supposed to be symmetric
+ * @note If @p padding_r is @c NULL, the padding is supposed to be symmetric.
  *
  * Order of inputs:
  *  - diff_dst (#mkldnn_query_diff_dst_pd, 0)
@@ -1358,21 +1338,21 @@ mkldnn_status_t MKLDNN_API mkldnn_pooling_backward_desc_init(
  *
  * where \f$n_{l}\f$ is the @p local_size.
  *
- * During training LRN might or might not require workspace on forward
+ * During training, LRN might or might not require a workspace on forward
  * (#mkldnn_forward_training) and backward (#mkldnn_backward) passes. The
  * behavior is implementation specific. Optimized implementations typically
- * require workspace and use it to save some intermediate results from the
+ * require a workspace and use it to save some intermediate results from the
  * forward pass that accelerate computations on the backward pass.
  *
- * To check whether workspace is required one should query the LRN primitive
- * descriptor for the workspace (#mkldnn_query_workspace_pd). Success would
- * indicate the workspace is required and its description would be returned.
+ * To check whether a workspace is required, query the LRN primitive descriptor
+ * for the workspace (#mkldnn_query_workspace_pd). Success indicates that the
+ * workspace is required and its description will be returned.
  * @sa mkldnn_primitive_desc_query and mkldnn_primitive_desc_query_pd
  *
  * @{ */
 
 /** Initializes an @p lrn_desc for forward propagation using @p prop_kind
- * (possible values are #mkldnn_forward_training or #mkldnn_forward_inference),
+ * (possible values are #mkldnn_forward_training and #mkldnn_forward_inference),
  * @p alg_kind, memory descriptor @p data_desc, and regularization
  * parameters @p local_size, @p alpha, @p beta, and @p k.
  *
@@ -1390,7 +1370,7 @@ mkldnn_status_t MKLDNN_API mkldnn_lrn_forward_desc_init(
         int local_size, float alpha, float beta, float k);
 
 /** Initializes an @p lrn_desc for backward propagation using @p alg_kind,
- * memory descriptors @p data_desc, and @p diff_data_desc, and regularization
+ * memory descriptors @p data_desc and @p diff_data_desc, and regularization
  * parameters @p local_size, @p alpha, @p beta, and @p k.
  *
  * Order of inputs:
@@ -1422,26 +1402,26 @@ mkldnn_status_t MKLDNN_API mkldnn_lrn_backward_desc_init(
  * \f$\sigma[c] = \frac{1}{NHW} \sum\limits_{whn}
  *                              (src[n][c][h][w] - \mu[c])^2\f$,
  *
- * and eps is a constant to improve numerical stability.
+ * and @c eps is a constant to improve numerical stability.
  *
- * Both forward and backward passes support in-place operation, i.e. src
- * and dst point to the same memory for forward, and diff_dst and diff_src
+ * Both forward and backward passes support in-place operation; that is, src
+ * and dst point to the same memory for forward pass, and diff_dst and diff_src
  * point to the same memory for backward pass.
  *
  * Batch normalization supports different flavors controlled by
- * mkldnn_batch_normalization_desc_t. For example batch normalization can
- * compute the mean and variance on its own or can take them as inputs.
- * It can either perform scaling and shifting using gamma and beta parameters
- * or not. Optionally it can also perform a fused ReLU, which in case of
- * training would also require a workspace.
+ * mkldnn_batch_normalization_desc_t. For example, batch normalization can
+ * compute the mean and variance on its own or take them as inputs. It can
+ * either perform scaling and shifting using gamma and beta parameters or not.
+ * Optionally it can also perform a fused ReLU, which in case of training would
+ * also require a workspace.
  *
  * @sa mkldnn_batch_normalization_desc_t
  * @{ */
 
 /** Initializes a batch normalization descriptor @p bnrm_desc for forward
- * propagation using @p prop_kind, (possible values are
- * #mkldnn_forward_training or #mkldnn_forward_inference), memory descriptor
- * @p data_desc, normalization parameter @p epsilon and @p flags set using bit
+ * propagation using @p prop_kind (possible values are
+ * #mkldnn_forward_training and #mkldnn_forward_inference), memory descriptor
+ * @p data_desc, normalization parameter @p epsilon, and @p flags set using bit
  * flags of type mkldnn_batch_normalization_desc_t.
  *
  * Order of inputs:
@@ -1465,8 +1445,8 @@ mkldnn_status_t MKLDNN_API mkldnn_lrn_backward_desc_init(
  *      if #mkldnn_fuse_bn_relu bit-flags is set in @p flags
  *      and @p prop_kind = #mkldnn_forward_training
  *
- * @note in-place operation is supported,
- *       i.e. dst points to the same memory as src.
+ * @note In-place operation is supported; that is, dst points to the same memory
+ *       as src.
  *
  * @sa mkldnn_batch_normalization_desc_t
  */
@@ -1477,8 +1457,8 @@ mkldnn_status_t MKLDNN_API mkldnn_batch_normalization_forward_desc_init(
 
 /** Initializes a batch normalization descriptor @p bnrm_desc for backward
  * propagation with respect to data and scale-shift parameters using memory
- * descriptors @p data_desc and @p diff_data_desc, and normalization parameter
- * @p epsilon and @p flags set using bit flags of type
+ * descriptors @p data_desc and @p diff_data_desc, normalization parameter
+ * @p epsilon, and @p flags set using bit flags of type
  * mkldnn_batch_normalization_desc_t.
  *
  * Order of inputs:
@@ -1515,7 +1495,7 @@ mkldnn_status_t MKLDNN_API mkldnn_batch_normalization_backward_desc_init(
  * A primitive to compute an inner product.
  *
  * Inner product layer is also known as fully connected layer.
- * with spatial dimension:
+ * With spatial dimension:
  *
  * \f[dst[n][oc] = \sum\limits_{ic, kh, kw}
  *                 src[n][ic][kh][kw] \cdot weights[oc][ic][kh][kw]
@@ -1523,13 +1503,13 @@ mkldnn_status_t MKLDNN_API mkldnn_batch_normalization_backward_desc_init(
  * @{ */
 
 /** Initializes an inner product descriptor @p ip_desc for forward propagation
- * using @p prop_kind (possible values are #mkldnn_forward_training or
+ * using @p prop_kind (possible values are #mkldnn_forward_training and
  * #mkldnn_forward_inference) and memory descriptors. In order to create an
  * inner product without bias, @p bias_desc should be either @c NULL or a
- * pointer to descriptor with memory format equals to #mkldnn_format_undef.
+ * pointer to a descriptor with memory format equal to #mkldnn_format_undef.
  *
  * @note
- *     memory descriptors are allowed to be initialized with #mkldnn_any value
+ *     Memory descriptors are allowed to be initialized with #mkldnn_any value
  *     of @p format_kind.
  *
  * Order of inputs:
@@ -1551,7 +1531,7 @@ mkldnn_status_t MKLDNN_API mkldnn_inner_product_forward_desc_init(
  * with respect to data using memory descriptors.
  *
  * @note
- *     memory descriptors are allowed to be initialized with #mkldnn_any value
+ *     Memory descriptors are allowed to be initialized with #mkldnn_any value
  *     of @p format_kind.
  *
  * Order of inputs:
@@ -1571,7 +1551,7 @@ mkldnn_status_t MKLDNN_API mkldnn_inner_product_backward_data_desc_init(
  * with respect to weights using memory descriptors.
  *
  * @note
- *     memory descriptors are allowed to be initialized with #mkldnn_any value
+ *     Memory descriptors are allowed to be initialized with #mkldnn_any value
  *     of @p format_kind.
  *
  * Order of inputs:
@@ -1591,43 +1571,17 @@ mkldnn_status_t MKLDNN_API mkldnn_inner_product_backward_weights_desc_init(
 
 /** @} */
 
-/** @addtogroup c_api_convolution_relu Convolution followed by ReLU (deprecated)
- * A merged primitive to compute a convolution followed by relu.
- * @{ */
-
-/** Initializes a merged convolution-relu descriptor @p conv_relu_desc for
- * forward propagation (supported inference mode only) using convolution
- * descriptor @p conv_desc and ReLU parameter @p negative slope.
- *
- * @deprecated use mkldnn_convolution_desc_init with
- * mkldnn_post_ops_append_eltwise to append ReLU
- *
- * Order of inputs:
- *  - src (#mkldnn_query_src_pd, 0)
- *  - weights (#mkldnn_query_weights_pd, 0)
- *  - bias (#mkldnn_query_weights_pd, 1),
- *      if convolution is created with bias
- *
- * Order of outputs:
- *  - dst (#mkldnn_query_dst_pd, 0)
- */
-mkldnn_status_t MKLDNN_API mkldnn_convolution_relu_desc_init(
-        mkldnn_convolution_relu_desc_t *conv_relu_desc,
-        const mkldnn_convolution_desc_t *conv_desc, float negative_slope);
-
-/** @} */
-
 /** @addtogroup c_api_rnn RNN
- * A primitive to compute common recurrent layer.
+ * A primitive to compute the common recurrent layer.
  * @todo add additional description for the group
  * @{ */
 
 /**
  * Initializes a recurrent cell descriptor @p rnn_cell_desc
  * using @p rnn_cell_desc, @p kind (possible values are
- *  #mkldnn_vanilla_rnn, #mkldnn_vanilla_lstm, #mkldnn_vanilla_gru,
+ *  #mkldnn_vanilla_rnn, #mkldnn_vanilla_lstm, #mkldnn_vanilla_gru, and
  *  #mkldnn_gru_linear_before_reset),
- *  @p f (possible values are #mkldnn_eltwise_relu,
+ *  @p f (possible values are #mkldnn_eltwise_relu and
  *   #mkldnn_eltwise_tanh), @p flags, @p alpha, and @p clipping.
  */
 mkldnn_status_t MKLDNN_API mkldnn_rnn_cell_desc_init(
@@ -1643,16 +1597,94 @@ int MKLDNN_API mkldnn_rnn_cell_get_gates_count(
 int MKLDNN_API mkldnn_rnn_cell_get_states_count(
         const mkldnn_rnn_cell_desc_t *rnn_cell_desc);
 
+/** Sets quantization @p scale and @p shift for RNN data tensors.
+ *  For performance reasons, low precision configuration of RNN primitive
+ *  expects input activations to have unsigned int8 data type. Scale and shift
+ *  used to quantize floating point data to unsigned integer must be passed to
+ *  RNN primitive using attributes.
+ *  Example usage:
+ * @code
+ *      // rnn parameters
+ *      int l = 2, t = 2, mb = 32, sic = 32, slc = 32, dic = 32, dlc = 32;
+ *      // activations quantization parameters
+ *      float scale = ..., shift = ..;
+ *
+ *      mkldnn_primitive_attr_t rnn_attr;
+ *      // create default attributes
+ *      mkldnn_primitive_attr_create(&rnn_attr);
+ *
+ *      // set scale and shift for int8 quantization of activation
+ *      mkldnn_primitive_attr_set_rnn_data_qparams(rnn_attr, scale, shift);
+ *
+ *      // create & configure rnn op_desc
+ *      mkldnn_rnn_desc_t rnn_d;
+ *      mkldnn_primitive_desc_t rnn_pd;
+ *      mkldnn_primitive_desc_create_v2(&rnn_pd, &rnn_d, attr, NULL);
+ * @endcode
+ * @note
+ *      Quantization scale and shift are common for src_layer, src_iter,
+ *      dst_iter and dst_layer.
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_rnn_data_qparams(
+        mkldnn_primitive_attr_t attr, const float scale, const float shift);
+
+/** Sets quantization scales @p weights_scales for RNN weights tensors.
+ * Low precision configuration of RNN primitive expects input weights to have
+ * signed int8 data type. Scales used to quantize floating point data
+ * to signed integer must be passed to RNN primitive using attributes.
+ * The @p mask argument defines correspondence between output tensor dimensions
+ * and the @p weights_scales array. Set i-th bit of @p mask to 1 to use
+ * dedicated scaling factor for each slice of the output tensor over i-th
+ * dimension. Set @p mask to 0 to use common scaling factor for the whole output
+ * tensor. Example usage:
+ * @code
+ *      // rnn parameters
+ *      int l = 2, t = 2, mb = 32, sic = 32, slc = 32, dic = 32, dlc = 32;
+ *      // unique output scales per output channel
+ *      float weights_scales[dic * n_gates] = { ... };
+ *      // mask that specifies last two dimensions of ldigo format
+ *      int mask = 0x3;
+ *
+ *      mkldnn_primitive_attr_t attr;
+ *      // create default attributes
+ *      mkldnn_primitive_attr_create(&attr);
+ *
+ *      // set output channel-wise weights scales
+ *      mkldnn_primitive_attr_set_rnn_weights_qparams(attr, dic * n_gates, mask,
+ *              weights_scales);
+ *
+ *      // create & configure rnn op_desc
+ *      mkldnn_rnn_desc_t rnn_d;
+ *      mkldnn_primitive_desc_t rnn_pd;
+ *      mkldnn_primitive_desc_create_v2(&rnn_pd, &rnn_d, attr, NULL);
+ * @endcode
+ * @note
+ *      The dimension order is always native and does not depend on the actual
+ *      layout used. For example, 5 dimensional weights always have
+ *      (l, d, i, g, o) logical dimension ordering.
+ * @note
+ *      Quantization sales are common for weights_layer and weights_iteration
+ * @note
+ *      There is no way to check that @p count corresponds to @p mask until an
+ *      actual primitive descriptor is created, so it is user's responsibility
+ *      to set proper values. The following formula must be held:
+ *
+ *      \f[count = \prod\limits_{d \in mask} output.dims[d]\f]
+ */
+mkldnn_status_t MKLDNN_API mkldnn_primitive_attr_set_rnn_weights_qparams (
+        mkldnn_primitive_attr_t attr, int count, int mask,
+                const float *weights_scales);
+
 /** Initializes a rnn descriptor @p rnn_desc for forward propagation
  * using @p prop_kind, @p rnn_cell_desc, @p direction, and memory descriptors.
- * @note if @p prop_kind equals #mkldnn_forward_training, you need to query a
+ * @note If @p prop_kind equals #mkldnn_forward_training, you must query a
  * workspace memory descriptor before creating the primitive.
  *
- * @p src_iter_desc, @p bias_desc, and @p dst_iter_desc are allowed to be
- * either NULL or point to a zero memory descriptor that would indicate
+ * @p src_iter_desc, @p bias_desc, and @p dst_iter_desc are allowed to either be
+ * @c NULL or point to a zero memory descriptor, which would indicate that the
  * RNN primitive should not use them.
  *
- * @note all memory descriptors except @p src_iter_desc are allowed to be
+ * @note All memory descriptors except @p src_iter_desc are allowed to be
  * initialized with #mkldnn_any value of @p format_kind.
  *
  * Order of inputs:
@@ -1682,14 +1714,14 @@ mkldnn_status_t MKLDNN_API mkldnn_rnn_forward_desc_init(
 
 /** Initializes a rnn descriptor @p rnn_desc for backward propagation
  * using @p prop_kind, @p rnn_cell_desc, @p direction, and memory descriptors.
- * @note all memory descriptors are allowed to be initialized with
+ * @note All memory descriptors are allowed to be initialized with
  * #mkldnn_any value of @p format_kind.
  *
  * @p src_iter_desc (simultaneously with @p diff_src_iter_desc),
  * @p bias_desc (simultaneously with @p diff_bias_desc), and
- * @p dst_iter_desc (simultaneously with @p diff_src_iter_desc) are allowed
- * to be either NULL or point to a zero memory descriptor that would indicate
- * RNN primitive should not use them.
+ * @p dst_iter_desc (simultaneously with @p diff_src_iter_desc) are allowed to
+ * either be @c NULL or point to a zero memory descriptor, which would indicate
+ * that the RNN primitive should not use them.
  *
  * Order of inputs:
  *  - src_layer (#mkldnn_query_src_pd, 0)
@@ -1747,6 +1779,50 @@ mkldnn_status_t MKLDNN_API mkldnn_roi_pooling_forward_desc_init(
 
 /** @} */
 
+/** @addtogroup c_api_binary_convolution Binary convolution
+ * A primitive to compute binary convolution using different algorithms.
+ * @{ */
+
+/** Initializes a dilated binary convolution descriptor @p bin_conv_desc for forward
+ * propagation using @p prop_kind (possible values are #mkldnn_forward_training
+ * or #mkldnn_forward_inference), @p alg_kind, memory descriptors, @p strides,
+ * @p dilates, @p padding_l, @p padding_r, and @p padding_kind.
+ *
+ * @note if @p padding_r is @c NULL, the padding is supposed to be symmetric
+ *
+ * @note memory descriptors are allowed to be initialized with #mkldnn_any
+ * value of @p format_kind.
+ *
+ * Order of inputs:
+ *  - src (#mkldnn_query_src_pd, 0)
+ *  - weights (#mkldnn_query_weights_pd, 0)
+ *
+ * Order of outputs:
+ *  - dst (#mkldnn_query_dst_pd, 0)
+ */
+mkldnn_status_t MKLDNN_API mkldnn_dilated_binary_convolution_forward_desc_init(
+        mkldnn_binary_convolution_desc_t *bin_conv_desc, mkldnn_prop_kind_t prop_kind,
+        mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *weights_desc,
+        const mkldnn_memory_desc_t *dst_desc, const mkldnn_dims_t strides,
+        const mkldnn_dims_t dilates, const mkldnn_dims_t padding_l,
+        const mkldnn_dims_t padding_r, float pad_value);
+
+/** @} */
+
+/** @addtogroup c_api_binarization Binarization
+ * A primitive to binarize input using different approaches
+ * @{ */
+
+/** Initializes a @p binarization_desc for forward propagation using @p prop_kind
+ * (possible values are #mkldnn_forward_training or #mkldnn_forward_inference),
+ * @p alg_kind algorithm and memory descriptors.
+ * @sa mkldnn_binarization_desc_t for details */
+mkldnn_status_t MKLDNN_API mkldnn_binarization_forward_desc_init(
+        mkldnn_binarization_desc_t *binarization_desc, mkldnn_prop_kind_t prop_kind,
+        mkldnn_alg_kind_t alg_kind, const mkldnn_memory_desc_t *src_desc,
+        const mkldnn_memory_desc_t *dst_desc, const mkldnn_memory_desc_t *weights_desc);
+
 /** @} */
 
 /** @addtogroup c_api_engine Engine operations
@@ -1803,13 +1879,31 @@ mkldnn_status_t MKLDNN_API mkldnn_stream_destroy(mkldnn_stream_t stream);
 
 /** Sets verbosity level (print information to stdout).
  * Possible levels are:
- *  - 0 -- no verbose output
+ *  - 0 -- no verbose output (default)
  *  - 1 -- primitive information at execution
  *  - 2 -- primitive information at creation and execution
  *
  * @note
- *     Dumping information might affect performance */
-mkldnn_status_t MKLDNN_API mkldnn_verbose_set(int level);
+ *     Dumping information might affect performance.
+ *     This setting overrides the MKLDNN_VERBOSE environment variable. */
+mkldnn_status_t MKLDNN_API mkldnn_set_verbose(int level);
+
+/** Sets jit dump control.
+ * dump equals:
+ *  - zero -- turn jit dump off (default)
+ *  - non-zero -- turn jit dump on
+ *
+ * @note
+ *     This setting overrides the MKLDNN_JIT_DUMP environment variable. */
+mkldnn_status_t MKLDNN_API mkldnn_set_jit_dump(int dump);
+
+/** Gets library version information.
+ * Version information includes:
+ *  - major -- major version number
+ *  - minor -- minor version number
+ *  - patch -- patch release number
+ *  - hash -- git commit hash */
+const mkldnn_version_t MKLDNN_API *mkldnn_version();
 
 /** Returns cache size for specified level in bytes.
  * @note
@@ -1820,44 +1914,60 @@ unsigned int MKLDNN_API mkldnn_get_cache_size(int level, int per_core);
 /** @} */
 
 /** @addtogroup c_api_blas BLAS functions
+ * A subset of Basic Linear ALgebra (BLAS) functions to perform
+ * matrix-matrix multiplication.
  * @{ */
 
-/** SGEMM performs matrix-matrix multiplication operation
- * C := alpha*op( A )*op( B ) + beta*C,
- * where  op( X ) is one of
- * op( X ) = X or op( X ) = X**T,
- * alpha and beta are scalars, and A, B and C are matrices, with op( A )
- * an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
+/** SGEMM performs a matrix-matrix multiplication operation defined as
+ *
+ * C := alpha*op( A )*op( B ) + beta*C
+ *
+ * where
+ *  - op( X ) is one of op( X ) = X or op( X ) = X**T,
+ *  - alpha and beta are scalars,
+ *  - A, B and C are matrices, with op( A ) an m by k matrix, op( B ) a k by n matrix
+ *    and C an m by n matrix.
+ *
+ * The matrices are assumed to be stored in column-major order (the elements
+ * in a matrix columns are contiguous in memory).
+ *
  * @note
- *      API is different compared to standard BLAS routine
- *      as it returns mkldnn_status_t for error handling.
+ *      The API is different from the standard BLAS routine
+ *      because it returns mkldnn_status_t for error handling.
  *      XERBLA is not supported: no error message will be printed
- *      in case of incorrect parameters */
+ *      in case of incorrect parameters. */
 mkldnn_status_t MKLDNN_API mkldnn_sgemm(const char *transa, const char *transb,
         const int *M, const int *N, const int *K,
         const float *alpha, const float *A, const int *lda,
         const float *B, const int *ldb,
         const float *beta, float *C, const int *ldc);
 
-/** gemm_s8u8s32 and gemm_s8s8s32 perform matrix-matrix multiplication operation
- * and add the result to a scalar-matrix product. To get the final result,
- * a vector is added to each row or column of the output matrix.
+/** gemm_s8u8s32 and gemm_s8s8s32 perform a matrix-matrix multiplication
+ * operation and add the result to a scalar-matrix product. For the final
+ * result, a vector is added to each row or column of the output matrix.
  * The operation is defined as:
+ *
  * C := alpha*(op(A) + A_offset) * (op(B) + B_offset) + beta*C + C_offset
- * where op( X ) = X or op( X ) = X**T,
- * A_offset is an m-by-k matrix with every element equal to the value oa,
- * B_offset is an k-by-n matrix with every element equal to the value ob,
- * C_offset is an m-by-n matrix defined by the oc array, size len:
- * if offsetc = F: len must be at least 1
- * if offsetc = C: len must be at least max(1, m)
- * if offsetc = R: len must be at least max(1, n)
- * alpha and beta are scalars, and A, B and C are matrices, with op( A )
- * an m-by-k matrix, op( B ) a k-by-n matrix and C an m-by-n matrix.
+ *
+ * where
+ *  - op( X ) = X or op( X ) = X**T,
+ *  - A_offset is an m-by-k matrix with every element equal to the value oa,
+ *  - B_offset is an k-by-n matrix with every element equal to the value ob,
+ *  - C_offset is an m-by-n matrix defined by the oc array, size len:
+ *    - if offsetc = F: len must be at least 1
+ *    - if offsetc = C: len must be at least max(1, m)
+ *    - if offsetc = R: len must be at least max(1, n)
+ *  - alpha and beta are scalars, and A, B and C are matrices, with op( A )
+ *    an m-by-k matrix, op( B ) a k-by-n matrix and C an m-by-n matrix.
+ *
+ * The matrices are assumed to be stored in column-major order (the elements
+ * in a matrix columns are contiguous in memory).
+ *
  * @note
- *      API is different compared to standard BLAS routine
- *      as it returns mkldnn_status_t for error handling.
+ *      The API is different compared with the standard BLAS routine
+ *      because it returns mkldnn_status_t for error handling.
  *      XERBLA is not supported: no error message will be printed
- *      in case of incorrect parameters */
+ *      in case of incorrect parameters. */
 mkldnn_status_t MKLDNN_API mkldnn_gemm_s8u8s32(const char *transa,
         const char *transb, const char *offsetc, const int *M, const int *N,
         const int *K, const float *alpha, const int8_t *A, const int *lda,
diff --git a/inference-engine/thirdparty/mkl-dnn/include/mkldnn.hpp b/inference-engine/thirdparty/mkl-dnn/include/mkldnn.hpp
index b0869e77a..2ce46c91f 100644
--- a/inference-engine/thirdparty/mkl-dnn/include/mkldnn.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/include/mkldnn.hpp
@@ -123,14 +123,14 @@ public:
         shuffle = mkldnn_shuffle,
         eltwise = mkldnn_eltwise,
         depthwise = mkldnn_depthwise,
-        relu = mkldnn_relu,
         softmax = mkldnn_softmax,
         pooling = mkldnn_pooling,
         lrn = mkldnn_lrn,
         batch_normalization = mkldnn_batch_normalization,
         inner_product = mkldnn_inner_product,
-        convolution_relu = mkldnn_convolution_relu,
         rnn = mkldnn_rnn,
+        binary_convolution = mkldnn_binary_convolution,
+        binarization = mkldnn_binarization,
     };
 
     /// A wrapper structure to specify a particular output of a primitive.
@@ -149,7 +149,7 @@ public:
         inline operator primitive() const;
     };
 
-    /// Returns the descriptor of the underlying C API primitive
+    /// Returns the descriptor of the underlying C API primitive.
     inline const_mkldnn_primitive_desc_t get_primitive_desc() const;
     // TODO: use the C++ API wrapper structure.
 };
@@ -257,6 +257,7 @@ inline mkldnn_prop_kind_t convert_to_c(prop_kind kind) {
 
 enum algorithm {
     algorithm_undef = mkldnn_alg_kind_undef,
+    convolution_auto = mkldnn_convolution_auto,
     convolution_direct = mkldnn_convolution_direct,
     convolution_winograd = mkldnn_convolution_winograd,
     deconvolution_direct = mkldnn_deconvolution_direct,
@@ -272,6 +273,8 @@ enum algorithm {
     eltwise_soft_relu = mkldnn_eltwise_soft_relu,
     eltwise_logistic = mkldnn_eltwise_logistic,
     eltwise_clamp = mkldnn_eltwise_clamp,
+    eltwise_exp = mkldnn_eltwise_exp,
+    eltwise_not = mkldnn_eltwise_not,
     depthwise_scale_shift = mkldnn_depthwise_scale_shift,
     depthwise_prelu = mkldnn_depthwise_prelu,
     lrn_across_channels = mkldnn_lrn_across_channels,
@@ -285,7 +288,9 @@ enum algorithm {
     vanilla_gru = mkldnn_vanilla_gru,
     gru_linear_before_reset = mkldnn_gru_linear_before_reset,
     roi_pooling_max = mkldnn_roi_pooling_max,
-    roi_pooling_bilinear = mkldnn_roi_pooling_bilinear
+    roi_pooling_bilinear = mkldnn_roi_pooling_bilinear,
+    binary_convolution_direct = mkldnn_binary_convolution_direct,
+    binarization_depthwise = mkldnn_binarization_depthwise
 };
 
 inline mkldnn_alg_kind_t convert_to_c(algorithm aalgorithm) {
@@ -295,7 +300,6 @@ inline mkldnn_alg_kind_t convert_to_c(algorithm aalgorithm) {
 enum batch_normalization_flag {
     use_global_stats = mkldnn_use_global_stats,
     use_scale_shift = mkldnn_use_scaleshift,
-    omit_stats = mkldnn_omit_stats,
     fuse_bn_relu = mkldnn_fuse_bn_relu
 };
 
@@ -337,14 +341,14 @@ enum query {
     shuffle_d = mkldnn_query_shuffle_d,
     eltwise_d = mkldnn_query_eltwise_d,
     depthwise_d = mkldnn_query_depthwise_d,
-    relu_d = mkldnn_query_relu_d,
     softmax_d = mkldnn_query_softmax_d,
     pooling_d = mkldnn_query_pooling_d,
     lrn_d = mkldnn_query_lrn_d,
     batch_normalization_d = mkldnn_query_batch_normalization_d,
     inner_product_d = mkldnn_query_inner_product_d,
-    convolution_relu_d = mkldnn_query_convolution_relu_d,
     rnn_d = mkldnn_query_rnn_d,
+    binary_convolution_d = mkldnn_query_binary_convolution_d,
+    binarization_d = mkldnn_query_binarization_d,
 
     input_pd = mkldnn_query_input_pd,
     output_pd = mkldnn_query_output_pd,
@@ -448,6 +452,18 @@ struct post_ops: public handle<mkldnn_post_ops_t> {
                 &in_h, &in_w, &ker_h, &ker_w, &str_h, &str_w, weights_data, biases_data),
                           "could not get dw conv params");
     }
+
+    void append_binarization(algorithm alg, const float* weights_data) {
+        error::wrap_c_api(mkldnn_post_ops_append_binarization(get(), convert_to_c(alg), weights_data),
+                "could not append binarization");
+    }
+
+    void get_params_binarization(int index, algorithm &alg, const float** weights_data) const {
+        mkldnn_alg_kind_t c_alg;
+        error::wrap_c_api(mkldnn_post_ops_get_params_binarization(get(), index, &c_alg, weights_data),
+                "could not get binarization params");
+        alg = static_cast<algorithm>(c_alg);
+    }
 };
 
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
@@ -511,12 +527,25 @@ struct primitive_attr: public handle<mkldnn_primitive_attr_t> {
         error::wrap_c_api(mkldnn_primitive_attr_set_post_ops(get(), ops.get()),
                 "could not set post operation sequence");
     }
+
+    void set_rnn_data_qparams(const float scale, const float shift)
+    {
+        error::wrap_c_api(mkldnn_primitive_attr_set_rnn_data_qparams(get(),
+                    scale, shift), "could not set rnn data int scale/shift");
+    }
+
+    void set_rnn_weights_qparams(int mask, const std::vector<float> &scales)
+    {
+        error::wrap_c_api(mkldnn_primitive_attr_set_rnn_weights_qparams(get(),
+                    (int)scales.size(), mask, &scales[0]),
+                "could not set rnn weights int scales");
+    }
 };
 
 /// @}
 
 /// @addtogroup cpp_api_engine Engine
-/// Engine operations
+/// Engine operations.
 ///
 /// @sa @ref c_api_engine in @ref c_api
 /// @{
@@ -532,7 +561,7 @@ struct engine: public handle<mkldnn_engine_t> {
     friend class primitive;
     // gcc bug??? using handle::handle;
 
-    /// Kinds of engines
+    /// Kinds of engines.
     enum kind {
         /// An unspecified engine
         any = mkldnn_any_engine,
@@ -600,7 +629,7 @@ private:
 /// @addtogroup cpp_api_memory Memory
 /// A primitive to describe and store data.
 ///
-/// For more information please refer to @ref c_api_memory in @ref c_api
+/// For more information, refer to @ref c_api_memory in @ref c_api.
 /// @{
 
 /// Memory primitive that describes the data.
@@ -626,6 +655,7 @@ struct memory: public primitive  {
         s16 = mkldnn_s16,
         s8 = mkldnn_s8,
         u8 = mkldnn_u8,
+        bin = mkldnn_bin,
     };
 
     /// Memory format specification. See #mkldnn_memory_format_t
@@ -642,22 +672,28 @@ struct memory: public primitive  {
         nchw = mkldnn_nchw,
         nhwc = mkldnn_nhwc,
         chwn = mkldnn_chwn,
+        nCw4c = mkldnn_nCw4c,
         nCw8c = mkldnn_nCw8c,
+        nChw4c = mkldnn_nChw4c,
         nChw8c = mkldnn_nChw8c,
         nChw16c = mkldnn_nChw16c,
         ncdhw = mkldnn_ncdhw,
         ndhwc = mkldnn_ndhwc,
+        nCdhw4c = mkldnn_nCdhw4c,
         nCdhw8c = mkldnn_nCdhw8c,
         nCdhw16c = mkldnn_nCdhw16c,
         oi = mkldnn_oi,
         io = mkldnn_io,
         oiw = mkldnn_oiw,
         wio = mkldnn_wio,
+        Owi4o = mkldnn_Owi4o,
+        OIw4i4o = mkldnn_OIw4i4o,
         Owi8o = mkldnn_Owi8o,
         OIw8o8i = mkldnn_OIw8o8i,
         OIw8i8o = mkldnn_OIw8i8o,
         OIw16i16o = mkldnn_OIw16i16o,
         OIw16o16i = mkldnn_OIw16o16i,
+        Oiw4o = mkldnn_Oiw4o,
         Oiw16o = mkldnn_Oiw16o,
         Owi16o = mkldnn_Owi16o,
         OIw8i16o2i = mkldnn_OIw8i16o2i,
@@ -666,20 +702,25 @@ struct memory: public primitive  {
         oihw = mkldnn_oihw,
         ihwo = mkldnn_ihwo,
         hwio = mkldnn_hwio,
+        iohw = mkldnn_iohw,
         hwio_s8s8 = mkldnn_hwio_s8s8,
         dhwio = mkldnn_dhwio,
         oidhw = mkldnn_oidhw,
+        OIdhw4i4o = mkldnn_OIdhw4i4o,
+        Odhwi4o = mkldnn_Odhwi4o,
         OIdhw8i8o = mkldnn_OIdhw8i8o,
         OIdhw8o8i = mkldnn_OIdhw8o8i,
         Odhwi8o = mkldnn_Odhwi8o,
         OIdhw16i16o = mkldnn_OIdhw16i16o,
         OIdhw16o16i = mkldnn_OIdhw16o16i,
+        Oidhw4o = mkldnn_Oidhw4o,
         Oidhw16o = mkldnn_Oidhw16o,
         Odhwi16o = mkldnn_Odhwi16o,
         oIhw8i = mkldnn_oIhw8i,
         oIhw16i = mkldnn_oIhw16i,
         oIdhw8i = mkldnn_oIdhw8i,
         oIdhw16i = mkldnn_oIdhw16i,
+        OIhw4i4o = mkldnn_OIhw4i4o,
         OIhw8i8o = mkldnn_OIhw8i8o,
         OIhw16i16o = mkldnn_OIhw16i16o,
         OIhw8o8i = mkldnn_OIhw8o8i,
@@ -691,18 +732,25 @@ struct memory: public primitive  {
         OIhw4i16o4i = mkldnn_OIhw4i16o4i,
         OIhw4i16o4i_s8s8 = mkldnn_OIhw4i16o4i_s8s8,
         Oihw8o = mkldnn_Oihw8o,
+        Oihw4o = mkldnn_Oihw4o,
         Oihw16o = mkldnn_Oihw16o,
         Ohwi8o = mkldnn_Ohwi8o,
+        Ohwi4o = mkldnn_Ohwi4o,
         Ohwi16o = mkldnn_Ohwi16o,
         OhIw16o4i = mkldnn_OhIw16o4i,
         OhIw8o4i = mkldnn_OhIw8o4i,
+        OhIw8o32i = mkldnn_OhIw8o32i,
+        OhIw16o32i = mkldnn_OhIw16o32i,
         OhIw8o4i_s8s8 = mkldnn_OhIw8o4i_s8s8,
         goiw = mkldnn_goiw,
+        gOwi4o = mkldnn_gOwi4o,
+        gOIw4i4o = mkldnn_gOIw4i4o,
         gOwi8o = mkldnn_gOwi8o,
         gOIw8o8i = mkldnn_gOIw8o8i,
         gOIw8i8o = mkldnn_gOIw8i8o,
         gOIw16i16o = mkldnn_gOIw16i16o,
         gOIw16o16i = mkldnn_gOIw16o16i,
+        gOiw4o = mkldnn_gOiw4o,
         gOiw16o = mkldnn_gOiw16o,
         gOwi16o = mkldnn_gOwi16o,
         gOIw8i16o2i = mkldnn_gOIw8i16o2i,
@@ -710,10 +758,14 @@ struct memory: public primitive  {
         gOIw8o16i2o = mkldnn_gOIw8o16i2o,
         goihw = mkldnn_goihw,
         hwigo = mkldnn_hwigo,
+        giohw = mkldnn_giohw,
         hwigo_s8s8 = mkldnn_hwigo_s8s8,
+        gOIdhw4i4o = mkldnn_gOIdhw4i4o,
+        gOdhwi4o = mkldnn_gOdhwi4o,
         gOIdhw8i8o = mkldnn_gOIdhw8i8o,
         gOIdhw8o8i = mkldnn_gOIdhw8o8i,
         gOdhwi8o = mkldnn_gOdhwi8o,
+        gOIhw4i4o = mkldnn_gOIhw4i4o,
         gOIhw8i8o = mkldnn_gOIhw8i8o,
         gOIhw16i16o = mkldnn_gOIhw16i16o,
         gOIhw8i16o2i = mkldnn_gOIhw8i16o2i,
@@ -721,12 +773,19 @@ struct memory: public primitive  {
         gOIhw8o16i2o = mkldnn_gOIhw8o16i2o,
         gOIhw4i16o4i = mkldnn_gOIhw4i16o4i,
         gOIhw4i16o4i_s8s8 = mkldnn_gOIhw4i16o4i_s8s8,
+        gOIhw2i8o4i = mkldnn_gOIhw2i8o4i,
+        gOIhw2i8o4i_s8s8 = mkldnn_gOIhw2i8o4i_s8s8,
         gOihw8o = mkldnn_gOihw8o,
+        gOihw4o = mkldnn_gOihw4o,
         gOihw16o = mkldnn_gOihw16o,
+        gOhwi4o = mkldnn_gOhwi4o,
         gOhwi8o = mkldnn_gOhwi8o,
         gOhwi16o = mkldnn_gOhwi16o,
         Goihw8g = mkldnn_Goihw8g,
         Goihw16g = mkldnn_Goihw16g,
+        Goihw16g_s8s8 = mkldnn_Goihw16g_s8s8,
+        gOIhw4o4i = mkldnn_gOIhw4o4i,
+        gOIhw4o4i_s8s8 = mkldnn_gOIhw4o4i_s8s8,
         gOIhw8o8i = mkldnn_gOIhw8o8i,
         gOIhw16o16i = mkldnn_gOIhw16o16i,
         gIOhw16o16i = mkldnn_gIOhw16o16i,
@@ -736,16 +795,16 @@ struct memory: public primitive  {
         goidhw = mkldnn_goidhw,
         gOIdhw16i16o = mkldnn_gOIdhw16i16o,
         gOIdhw16o16i = mkldnn_gOIdhw16o16i,
+        gOidhw4o = mkldnn_gOidhw4o,
         gOidhw16o = mkldnn_gOidhw16o,
         gOdhwi16o = mkldnn_gOdhwi16o,
         ntc = mkldnn_ntc,
         tnc = mkldnn_tnc,
         ldsnc = mkldnn_ldsnc,
         ldigo = mkldnn_ldigo,
-        ldigo_p = mkldnn_ldigo_p,
         ldgoi = mkldnn_ldgoi,
-        ldgoi_p = mkldnn_ldgoi_p,
         ldgo = mkldnn_ldgo,
+        rnn_packed = mkldnn_rnn_packed,
         wino_fmt = mkldnn_wino_fmt,
         format_last = mkldnn_format_last,
     };
@@ -1080,7 +1139,7 @@ struct view : public primitive {
 /// @}
 
 /// @addtogroup cpp_api_concat Concat
-/// A primitive to concatenate data by arbitrary dimension
+/// A primitive to concatenate data by arbitrary dimension.
 ///
 /// @sa @ref c_api_concat in @ref c_api
 /// @{
@@ -1157,7 +1216,7 @@ struct concat : public primitive {
 /// @}
 
 /// @addtogroup cpp_api_sum Sum
-/// A primitive to sum data
+/// A primitive to sum data.
 ///
 /// @sa @ref c_api_sum in @ref c_api
 /// @{
@@ -1211,38 +1270,6 @@ struct sum : public primitive {
             reset(result);
         }
 
-        /** @deprecated: api backwards compatibility for double scales type */
-        MKLDNN_DEPRECATED
-        primitive_desc(const memory::desc &output, std::vector<double> scale,
-                std::vector<memory::primitive_desc> inputs) {
-            mkldnn_primitive_desc_t result;
-
-            auto c_api_inputs = cpp_to_c(inputs);
-            auto scale_f = scale_to_float(scale);
-
-            error::wrap_c_api(mkldnn_sum_primitive_desc_create(
-                    &result, &output.data, (int)c_api_inputs.size(),
-                    &scale_f[0], &c_api_inputs[0]),
-                "could not create a sum primitive descriptor");
-            reset(result);
-        }
-
-        /** @deprecated: api backwards compatibility for double scales type */
-        MKLDNN_DEPRECATED
-        primitive_desc(std::vector<double> scale,
-                std::vector<memory::primitive_desc> inputs) {
-            mkldnn_primitive_desc_t result;
-
-            auto c_api_inputs = cpp_to_c(inputs);
-            auto scale_f = scale_to_float(scale);
-
-            error::wrap_c_api(mkldnn_sum_primitive_desc_create(
-                    &result, nullptr, (int)c_api_inputs.size(), &scale_f[0],
-                    &c_api_inputs[0]),
-                "could not create a sum primitive descriptor");
-            reset(result);
-        }
-
         memory::primitive_desc dst_primitive_desc() const {
             memory::primitive_desc adesc;
             mkldnn_primitive_desc_t cdesc;
@@ -1273,14 +1300,6 @@ struct sum : public primitive {
                 "could not create a sum primitive");
         reset(result);
     }
-
-private:
-    static std::vector<float> scale_to_float(const std::vector<double> &vd) {
-        std::vector<float> vf(vd.size());
-        std::transform(vd.begin(), vd.end(), vf.begin(),
-                [=](double x){return (float)x;});
-        return vf;
-    }
 };
 
 /// @}
@@ -1293,7 +1312,7 @@ private:
 /// @addtogroup cpp_api_primitive_descriptors Primitive descriptors
 /// @{
 
-/// A base class for all primitive descriptors
+/// A base class for all primitive descriptors.
 struct primitive_desc : public handle<mkldnn_primitive_desc_t> {
     primitive_desc(const_mkldnn_op_desc_t desc, const primitive_attr *attr,
             const engine &e, const_mkldnn_primitive_desc_t hint_fwd_pd) {
@@ -1331,7 +1350,7 @@ struct primitive_desc : public handle<mkldnn_primitive_desc_t> {
         return res;
     }
 
-    /// Advances the next implementation for the given op descriptor
+    /// Advances the next implementation for the given op descriptor.
     ///
     /// Returns:
     /// - @c true on success
@@ -1347,7 +1366,7 @@ struct primitive_desc : public handle<mkldnn_primitive_desc_t> {
         return true;
     }
 
-    /// Queries and returns requested memory primitive descriptor
+    /// Queries and returns requested memory primitive descriptor.
     memory::primitive_desc query_mpd(query what, int idx = 0) const {
         std::vector<query> valid_w{input_pd, output_pd, src_pd, diff_src_pd,
             weights_pd, diff_weights_pd, dst_pd, diff_dst_pd, workspace_pd};
@@ -1727,66 +1746,6 @@ struct convolution_backward_weights : public primitive {
     }
 };
 
-/// A merged convolution-relu primitive for inference mode only
-///
-/// @deprecated consider using convolution_forward with post_ops
-/// (e.g. post_ops::append_eltwise(1.f, #eltwise_relu, negative_slope, 0.f)
-struct convolution_relu_forward : public primitive {
-    struct desc {
-        mkldnn_convolution_relu_desc_t data;
-
-        desc(const convolution_forward::desc conv_desc,
-                const float negative_slope) {
-            error::wrap_c_api(mkldnn_convolution_relu_desc_init(&data,
-                        &conv_desc.data, negative_slope),
-                    "could not create a convolution_relu_forward descriptor");
-        }
-    };
-
-    struct primitive_desc : public mkldnn::primitive_desc {
-        primitive_desc(const desc &desc, const engine &e)
-            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
-
-        REG_QUERY_MPD(src, src, 0);
-        REG_QUERY_MPD(weights, weights, 0);
-        REG_QUERY_MPD(bias, weights, 1);
-        REG_QUERY_MPD(dst, dst, 0);
-    };
-
-    /// @deprecated consider using convolution_forward + post_ops
-    MKLDNN_DEPRECATED
-    convolution_relu_forward(const primitive_desc &aprimitive_desc,
-            const primitive::at &src, const primitive::at &weights,
-            const primitive::at &bias, const memory &dst) {
-        mkldnn_primitive_t result;
-        mkldnn_primitive_at_t inputs[] = { src.data, weights.data,
-                bias.data };
-        const_mkldnn_primitive_t outputs[] = { dst.get() };
-        check_num_parameters(aprimitive_desc.get(), 3, 1,
-            "convolution relu forward");
-        error::wrap_c_api(mkldnn_primitive_create(&result,
-                aprimitive_desc.get(), inputs, outputs),
-            "could not create a convolution relu forward primitive");
-        reset(result);
-    }
-
-    /// @deprecated consider using convolution_forward + post_ops
-    MKLDNN_DEPRECATED
-    convolution_relu_forward(const primitive_desc &aprimitive_desc,
-            const primitive::at &src, const primitive::at &weights,
-            const memory &dst) {
-        mkldnn_primitive_t result;
-        mkldnn_primitive_at_t inputs[] = { src.data, weights.data };
-        const_mkldnn_primitive_t outputs[] = { dst.get() };
-        check_num_parameters(aprimitive_desc.get(), 2, 1,
-            "convolution relu forward");
-        error::wrap_c_api(mkldnn_primitive_create(&result,
-                aprimitive_desc.get(), inputs, outputs),
-            "could not create a convolution relu forward primitive");
-        reset(result);
-    }
-};
-
 /// @}
 
 /// @addtogroup cpp_api_deconvolution Deconvolution
@@ -2450,7 +2409,7 @@ struct pooling_backward : public primitive {
 /// @}
 
 /// @addtogroup cpp_api_eltwise Eltwise
-/// A primitive to compute element wise operations like parametric rectifier
+/// A primitive to compute element-wise operations like parametric rectifier
 /// linear unit (ReLU).
 ///
 /// @sa @ref c_api_eltwise in @ref c_api
@@ -2468,13 +2427,6 @@ struct eltwise_forward : public primitive {
                         static_cast<float>(alpha), static_cast<float>(beta)),
                     "could not create a eltwise forward descriptor");
         }
-
-        /** @deprecated: api backward compatibility for relu */
-        template <typename T>
-        MKLDNN_DEPRECATED
-        desc(prop_kind aprop_kind, const memory::desc &src_desc,
-                T negative_slope)
-        : desc(aprop_kind, eltwise_relu, src_desc, negative_slope) {}
     };
 
     struct primitive_desc : public mkldnn::primitive_desc {
@@ -2501,8 +2453,6 @@ struct eltwise_forward : public primitive {
     }
 };
 
-typedef eltwise_forward relu_forward;
-
 struct eltwise_backward : public primitive {
     struct desc {
         mkldnn_eltwise_desc_t data;
@@ -2516,13 +2466,6 @@ struct eltwise_backward : public primitive {
                         static_cast<float>(beta)),
                     "could not create a eltwise backward descriptor");
         }
-
-        /** @deprecated: api backward compatibility for relu */
-        template <typename T>
-        MKLDNN_DEPRECATED
-        desc(const memory::desc &diff_data_desc, const memory::desc &data_desc,
-            T negative_slope): desc(eltwise_relu, diff_data_desc, data_desc,
-                negative_slope) {}
     };
 
     struct primitive_desc : public mkldnn::primitive_desc {
@@ -2553,8 +2496,6 @@ struct eltwise_backward : public primitive {
     }
 };
 
-typedef eltwise_backward relu_backward;
-
 /// @}
 
 /// @addtogroup cpp_api_depthwise Depthwise
@@ -2569,8 +2510,8 @@ struct depthwise_forward : public primitive {
              const memory::desc &bias_desc) {
             error::wrap_c_api(mkldnn_depthwise_forward_desc_init(&data,
                                                                  mkldnn::convert_to_c(aprop_kind),
-                                                                 mkldnn::convert_to_c(alg_kind),
-                                                                 &src_desc.data, &dst_desc.data,
+	                                                             mkldnn::convert_to_c(alg_kind),
+	                                                             &src_desc.data, &dst_desc.data,
                                                                  &weights_desc.data, &bias_desc.data),
                               "could not create a depthwise forward descriptor");
         }
@@ -2586,16 +2527,15 @@ struct depthwise_forward : public primitive {
         }
     };
 
-    struct primitive_desc : public handle<mkldnn_primitive_desc_t> {
-        primitive_desc(const desc &adesc, const engine &aengine) {
-            mkldnn_primitive_desc_t result;
-            error::wrap_c_api(mkldnn_primitive_desc_create(
-                    &result, &adesc.data, aengine.get(), nullptr),
-                              "could not create a depthwise forward primitive descriptor");
-            reset(result);
-        }
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
 
-        engine get_engine() { return engine::query(*this); }
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MPD(src, src, 0);
+        REG_QUERY_MPD(dst, dst, 0);
     };
 
     depthwise_forward(const primitive_desc &aprimitive_desc,
@@ -2787,12 +2727,12 @@ struct batch_normalization_forward : public primitive {
         reset(result);
     }
 
-    /// @warning batch_normalization_forward has 2 constructors with very
+    /// @warning batch_normalization_forward has two constructors with very
     ///          similar signatures:
     ///           - (pd, src, weights, dst, mean, variance) // 2 in, 3 out
     ///           - (pd, src, dst, mean, variance, workspace) // 1 in, 4 out
-    ///          The only way to distinguish between those is to explicitly
-    ///          cast all input parameters to their type, i.e. to
+    ///          The only way to distinguish between them is to explicitly
+    ///          cast all input parameters to their type; that is, to
     ///          const primitive:at &.
     batch_normalization_forward(const primitive_desc &aprimitive_desc,
             const primitive::at &src, const primitive::at &weights,
@@ -2840,17 +2780,16 @@ struct batch_normalization_forward : public primitive {
         reset(result);
     }
 
-    /// @warning batch_normalization_forward has 2 constructors with very
+    /// @warning batch_normalization_forward has two constructors with very
     ///          similar signatures:
     ///           - (pd, src, weights, dst, mean, variance) // 2 in, 3 out
     ///           - (pd, src, dst, mean, variance, workspace) // 1 in, 4 out
-    ///          The only way to distinguish between those is to explicitly
-    ///          cast all input parameters to their type, i.e. to
+    ///          The only way to distinguish between them is to explicitly
+    ///          cast all input parameters to their type; that is, to
     ///          const primitive:at &.
-    /// @note to make users' experience a little bit better this constructor
-    ///       checks if whether parameters match corresponding primitive
-    ///       descriptor, and if they are not -- call the other (proper)
-    ///       constructor. Yeah, this is still very ugly...
+    /// @note To make users' experience a little better, this constructor
+    ///       checks whether parameters match the corresponding primitive
+    ///       descriptor, and if not, calls the other (proper) constructor.
     batch_normalization_forward(const primitive_desc &aprimitive_desc,
             const primitive::at &src, const memory &dst, const memory &mean,
             const memory &variance, const memory &workspace) {
@@ -3365,10 +3304,6 @@ struct rnn_backward : public primitive {
     };
 
     struct primitive_desc : public mkldnn::primitive_desc {
-        MKLDNN_DEPRECATED
-        primitive_desc(const desc &desc, const engine &e)
-            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
-
         primitive_desc(const desc &desc, const engine &e,
                 const rnn_forward::primitive_desc &hint_fwd_pd)
             : mkldnn::primitive_desc(&desc.data, nullptr, e, hint_fwd_pd.get()) {}
@@ -3520,10 +3455,113 @@ struct shuffle_backward : public primitive {
 
 /// @}
 
+/// @addtogroup cpp_api_binary_convolution Binary convolution
+/// A primitive to compute binary convolution using different algorithms.
+///
+/// @sa @ref c_api_binary_convolution in @ref c_api
+/// @{
+
+struct binary_convolution_forward: public primitive {
+    struct desc {
+        mkldnn_binary_convolution_desc_t data;
+        desc(prop_kind aprop_kind, algorithm aalgorithm,
+                const memory::desc &src_desc,
+                const memory::desc &weights_desc,
+                const memory::desc &dst_desc,
+                const memory::dims strides,
+                const memory::dims dilates,
+                const memory::dims padding_l,
+                const memory::dims padding_r,
+                const float pad_value) {
+            memory::validate_dims(strides);
+            memory::validate_dims(dilates);
+            memory::validate_dims(padding_l);
+            memory::validate_dims(padding_r);
+            error::wrap_c_api(
+                mkldnn_dilated_binary_convolution_forward_desc_init(&data,
+                    mkldnn::convert_to_c(aprop_kind), convert_to_c(aalgorithm),
+                        &src_desc.data, &weights_desc.data, &dst_desc.data,
+                        &strides[0], &dilates[0], &padding_l[0], &padding_r[0],
+                        pad_value),
+                    "could not create a dilated binary convolution forward descriptor");
+        }
+    };
+
+    struct primitive_desc : public mkldnn::primitive_desc {
+        primitive_desc(const desc &desc, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, nullptr, e, nullptr) {}
+
+        primitive_desc(const desc &desc, const primitive_attr &attr, const engine &e)
+            : mkldnn::primitive_desc(&desc.data, &attr, e, nullptr) {}
+
+        REG_QUERY_MPD(src, src, 0);
+        REG_QUERY_MPD(weights, weights, 0);
+        REG_QUERY_MPD(dst, dst, 0);
+    };
+
+    binary_convolution_forward(const primitive_desc &aprimitive_desc,
+            const primitive::at &src, const primitive::at &weights, const memory &dst) {
+        mkldnn_primitive_t result;
+        mkldnn_primitive_at_t inputs[] = { src.data, weights.data };
+        const_mkldnn_primitive_t outputs[] = { dst.get() };
+        check_num_parameters(aprimitive_desc.get(), 2, 1,
+            "binary convolution forward");
+        error::wrap_c_api(mkldnn_primitive_create(&result,
+                    aprimitive_desc.get(), inputs, outputs),
+                "could not create a binary convolution forward primitive");
+        reset(result);
+    }
+};
+
+/// @}
+
+/// @addtogroup cpp_api_binarization Binarization
+/// @{
+
+struct binarization_forward : public primitive {
+    struct desc {
+        mkldnn_binarization_desc_t data;
+
+        desc(prop_kind aprop_kind, algorithm alg_kind,
+             const memory::desc &src_desc, const memory::desc &weights_desc, const memory::desc &dst_desc) {
+            error::wrap_c_api(mkldnn_binarization_forward_desc_init(&data,
+                                                                 mkldnn::convert_to_c(aprop_kind),
+                                                                 mkldnn::convert_to_c(alg_kind),
+                                                                 &src_desc.data, &dst_desc.data,
+                                                                 &weights_desc.data),
+                              "could not create a binarization forward descriptor");
+        }
+    };
+
+    struct primitive_desc : public handle<mkldnn_primitive_desc_t> {
+        primitive_desc(const desc &adesc, const engine &aengine) {
+            mkldnn_primitive_desc_t result;
+            error::wrap_c_api(mkldnn_primitive_desc_create(
+                    &result, &adesc.data, aengine.get(), nullptr),
+                              "could not create a binarization forward primitive descriptor");
+            reset(result);
+        }
+
+        engine get_engine() { return engine::query(*this); }
+    };
+
+    binarization_forward(const primitive_desc &aprimitive_desc,
+                      const primitive::at &src, const primitive::at &weights, const memory &dst) {
+        mkldnn_primitive_t result;
+        mkldnn_primitive_at_t inputs[] = { src.data, weights.data };
+        const_mkldnn_primitive_t outputs[] = { dst.get() };
+        error::wrap_c_api(mkldnn_primitive_create(&result, aprimitive_desc.get(), inputs, outputs),
+                          "could not create a binarization forward primitive");
+        reset(result);
+    }
+};
+
+/// @}
+
 /// @} Primitives
 
 /// @addtogroup cpp_api_stream Stream
-/// Execution stream operations
+/// Execution stream operations.
 ///
 /// @sa @ref c_api_stream in @ref c_api
 /// @{
@@ -3580,8 +3618,8 @@ struct stream: public handle<mkldnn_stream_t> {
 
     /// Waits for all computations submitted to the stream to complete.
     ///
-    /// @param block Specifies whether the operation should wait indefinitely or return
-    ///              immediately.
+    /// @param block Specifies whether the operation should wait indefinitely or
+    ///              return immediately.
     /// @returns @c true if all computations completed.
     /// @returns @c false if not all computations completed.
     bool wait(bool block = true) {
diff --git a/inference-engine/thirdparty/mkl-dnn/include/mkldnn_debug.h b/inference-engine/thirdparty/mkl-dnn/include/mkldnn_debug.h
index 568e91f6a..7ccba0cc1 100644
--- a/inference-engine/thirdparty/mkl-dnn/include/mkldnn_debug.h
+++ b/inference-engine/thirdparty/mkl-dnn/include/mkldnn_debug.h
@@ -67,6 +67,7 @@ const char MKLDNN_API *mkldnn_fmt2str(mkldnn_memory_format_t v);
 const char MKLDNN_API *mkldnn_prop_kind2str(mkldnn_prop_kind_t v);
 const char MKLDNN_API *mkldnn_prim_kind2str(mkldnn_primitive_kind_t v);
 const char MKLDNN_API *mkldnn_alg_kind2str(mkldnn_alg_kind_t v);
+const char MKLDNN_API *mkldnn_rnn_direction2str(mkldnn_rnn_direction_t v);
 
 #ifdef __cplusplus
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/include/mkldnn_types.h b/inference-engine/thirdparty/mkl-dnn/include/mkldnn_types.h
index b0ea527d7..a86eb666b 100644
--- a/inference-engine/thirdparty/mkl-dnn/include/mkldnn_types.h
+++ b/inference-engine/thirdparty/mkl-dnn/include/mkldnn_types.h
@@ -35,6 +35,14 @@ extern "C" {
  *  @addtogroup c_api_types_generic Generic
  *  @{ */
 
+/** Intel(R) MKL-DNN Version type */
+typedef struct {
+    int    major;
+    int    minor;
+    int    patch;
+    const char *hash;
+} mkldnn_version_t;
+
 /** Status values returned by Intel(R) MKL-DNN functions. */
 typedef enum {
     /** The operation was successful */
@@ -72,6 +80,8 @@ typedef enum {
     mkldnn_s8 = 5,
     /** 8-bit unsigned integer. */
     mkldnn_u8 = 6,
+    /** 1-bit integer. */
+    mkldnn_bin = 7,
 } mkldnn_data_type_t;
 
 /** Rounding mode */
@@ -88,12 +98,12 @@ typedef enum {
  * is described as a sequence of the dimensions as they are laid out in the
  * memory (from the outer-most to the inner-most). Note that this order
  * doesn't affect the logical order of the dimensions that is kept in the
- * `dims` field of mkldnn_memory_desc_t structure. The logical order of the
+ * `dims` field of the mkldnn_memory_desc_t structure. The logical order of the
  * dimensions is specified by the type of tensor.
  *
- * For example, CNN 5D tensor always has its logical dimensions in order
- * `(batch, channels, depth, height, width)`, while physical layout might
- * be #mkldnn_ncdhw or #mkldnn_ndhwc:
+ * For example, CNN 5D tensor always has its logical dimensions in the order
+ * `(batch, channels, depth, height, width)`, while the physical layout might be
+ * #mkldnn_ncdhw or #mkldnn_ndhwc:
  *
  * ~~~cpp
  * int batch = 2, channels = 16, depth = 13, height = 13, width = 13;
@@ -109,7 +119,7 @@ typedef enum {
  * mkldnn_memory_desc_init(&data_in_ndhwc, 5, dims, mlkdnn_ndhwc);
  * ~~~
  *
- * The following notation for memory format names:
+ * The following notation applies to memory format names:
  *  - @c 'n' denotes the mini-batch dimension
  *  - @c 'c' denotes a channels dimension
  *  - When there are multiple channel dimensions (for example, in convolution
@@ -119,14 +129,14 @@ typedef enum {
  *    respectively
  *  - Upper-case letters indicate that the data is laid out in blocks
  *    for a particular dimension. In such cases, the format name contains both
- *    upper- and lower-case letters for that dimension with lower-case letter
+ *    upper- and lower-case letters for that dimension with a lower-case letter
  *    preceded by the block size. For example: @c 'mkldnn_nChw8c' describes a
  *    format where the outermost dimension is mini-batch, followed by the
  *    channel block number, followed by the spatial height and width, and
  *    finally followed by 8-element channel blocks.
  *
  * @note
- *    Channel designations can be different. For example: both the @c
+ *    Channel designations can be different. For example, both the @c
  *    'mkldnn_nc' and @c 'mkldnn_io' formats can be used to describe a 2D
  *    tensor.
  *
@@ -188,6 +198,9 @@ typedef enum {
     /** 4D weights tensor with physical layout @c ihwo.
      * Logical dimensions come in the order: (o, i, h, w) */
     mkldnn_ihwo,
+    /** 4D weights tensor with physical layout @c iohw.
+     * Logical dimensions come in the order: (o, i, h, w) */
+    mkldnn_iohw,
     /** 5D weights tensor with physical layout @c iodhw, used in Caffe.
      * Logical dimensions come in the order: (o, i, d, h, w) */
     mkldnn_oidhw,
@@ -205,6 +218,9 @@ typedef enum {
      * used in TensorFlow.
      * Logical dimensions come in the order: (g, o, i, h, w) */
     mkldnn_hwigo,
+    /** 5D grouped weights tensor with the physical layout @c giohw.
+     * Logical dimensions come in the order: (g, o, i, h, w) */
+    mkldnn_giohw,
     /** 6D grouped weights tensor with the physical layout @c goidhw,
      * used in Caffe.
      * Logical dimensions come in the order: (g, o, i, d, h, w) */
@@ -235,25 +251,31 @@ typedef enum {
      *
      *  - For LSTM cells, the gates order is input, forget, candidate
      *    and output gate.
-     * - For GRU cells, the gates order is update, reset and output gate. */
+     *  - For GRU cells, the gates order is update, reset and output gate. */
     mkldnn_ldgo,
 
     /* Opaque data types, are not to be used explicitly */
 
     /* data */
+    mkldnn_nCw4c /** blocked data format */,
     mkldnn_nCw8c /** blocked data format */,
     mkldnn_nCw16c /** blocked data format */,
+    mkldnn_nChw4c /** blocked data format */,
     mkldnn_nChw8c /** blocked data format */,
     mkldnn_nChw16c /** blocked data format */,
+    mkldnn_nCdhw4c /** blocked data format */,
     mkldnn_nCdhw8c /** blocked data format */,
     mkldnn_nCdhw16c /** blocked data format */,
 
     /* weights, 3D */
+    mkldnn_Owi4o /** blocked weights format */,
+    mkldnn_OIw4i4o /** blocked weights format */,
     mkldnn_Owi8o /** blocked weights format */,
     mkldnn_OIw8i8o /** blocked weights format */,
     mkldnn_OIw8o8i /** blocked weights format */,
     mkldnn_OIw16i16o /** blocked weights format */,
     mkldnn_OIw16o16i /** blocked weights format */,
+    mkldnn_Oiw4o /** blocked weights format */,
     mkldnn_Oiw16o /** blocked weights format */,
     mkldnn_Owi16o /** blocked weights format */,
     mkldnn_OIw8i16o2i /** blocked weights format */,
@@ -268,6 +290,7 @@ typedef enum {
     mkldnn_hwio_s8s8,
     mkldnn_oIhw8i /** blocked weights format */,
     mkldnn_oIhw16i /** blocked weights format */,
+    mkldnn_OIhw4i4o /** blocked weights format */,
     mkldnn_OIhw8i8o /** blocked weights format */,
     mkldnn_OIhw16i16o /** blocked weights format */,
     mkldnn_OIhw4i16o4i /** blocked weights format */,
@@ -282,8 +305,10 @@ typedef enum {
     mkldnn_OIhw16o16i /** blocked weights format */,
     mkldnn_IOhw16o16i /** blocked weights format */,
     mkldnn_Oihw8o /** blocked weights format */,
+    mkldnn_Oihw4o /** blocked weights format */,
     mkldnn_Oihw16o /** blocked weights format */,
     mkldnn_Ohwi8o /** blocked weights format */,
+    mkldnn_Ohwi4o /** blocked weights format */,
     mkldnn_Ohwi16o /** blocked weights format */,
     mkldnn_OhIw16o4i /** blocked weights format */,
     mkldnn_OhIw8o4i /** blocked weights format */,
@@ -292,25 +317,33 @@ typedef enum {
      * and containing the values:
      * O[i:0,OC] = -128 * SUM(j:0,IC;h:0,H;w:0,W)(weights(i,j,h,w))*/
     mkldnn_OhIw8o4i_s8s8,
+    mkldnn_OhIw8o32i /** blocked weights format */,
+    mkldnn_OhIw16o32i /** blocked weights format */,
 
     /* weights, 5D */
     mkldnn_oIdhw8i /** blocked weights format */,
     mkldnn_oIdhw16i /** blocked weights format */,
+    mkldnn_OIdhw4i4o /** blocked weights format */,
+    mkldnn_Odhwi4o /** blocked weights format */,
     mkldnn_OIdhw8i8o /** blocked weights format */,
     mkldnn_OIdhw8o8i /** blocked weights format */,
     mkldnn_Odhwi8o /** blocked weights format */,
     mkldnn_OIdhw16i16o /** blocked weights format */,
     mkldnn_OIdhw16o16i /** blocked weights format */,
+    mkldnn_Oidhw4o /** blocked weights format */,
     mkldnn_Oidhw16o /** blocked weights format */,
     mkldnn_Odhwi16o /** blocked weights format */,
     mkldnn_OIdhw8i16o2i /** blocked weights format */,
 
     /* weights w/ groups, 4D */
+    mkldnn_gOwi4o /** blocked weights format */,
+    mkldnn_gOIw4i4o /** blocked weights format */,
     mkldnn_gOwi8o /** blocked weights format */,
     mkldnn_gOIw8o8i /** blocked weights format */,
     mkldnn_gOIw8i8o /** blocked weights format */,
     mkldnn_gOIw16i16o /** blocked weights format */,
     mkldnn_gOIw16o16i /** blocked weights format */,
+    mkldnn_gOiw4o /** blocked weights format */,
     mkldnn_gOiw16o /** blocked weights format */,
     mkldnn_gOwi16o /** blocked weights format */,
     mkldnn_gOIw8i16o2i /** blocked weights format */,
@@ -323,6 +356,7 @@ typedef enum {
      * multiplied by number of groups and containing the values:
      * O[i:0,G*OC] = -128 * SUM(j:0,IC;h:0,H;w:0,W)(weights(i,j,h,w))*/
     mkldnn_hwigo_s8s8,
+    mkldnn_gOIhw4i4o /** blocked weights format */,
     mkldnn_gOIhw8i8o /** blocked weights format */,
     mkldnn_gOIhw16i16o /** blocked weights format */,
     mkldnn_gOIhw4i16o4i /** blocked weights format */,
@@ -331,17 +365,35 @@ typedef enum {
      * multiplied by number of groups and containing the values:
      * O[i:0,G*OC] = -128 * SUM(j:0,IC;h:0,H;w:0,W)(weights(i,j,h,w))*/
     mkldnn_gOIhw4i16o4i_s8s8,
+    mkldnn_gOIhw2i8o4i /** blocked weights format */,
+    /** blocked weights format with additional buffer
+     * with size equal to the number of output channels
+     * multiplied by number of groups and containing the values:
+     * O[i:0,G*OC] = -128 * SUM(j:0,IC;h:0,H;w:0,W)(weights(i,j,h,w))*/
+    mkldnn_gOIhw2i8o4i_s8s8,
     mkldnn_gOIhw8i16o2i /** blocked weights format */,
     mkldnn_gOIhw8o16i2o /** blocked weights format */,
+    mkldnn_gOIhw4o4i /** blocked weights format */,
+    /** blocked weights format with additional buffer
+     * with size equal to the number of output channels
+     * and containing the values:
+     * O[i:0,OC] = -128 * SUM(j:0,IC;h:0,H;w:0,W)(weights(i,j,h,w))*/
+    mkldnn_gOIhw4o4i_s8s8 /** blocked weights format */,
     mkldnn_gOIhw8o8i /** blocked weights format */,
     mkldnn_gOIhw16o16i /** blocked weights format */,
     mkldnn_gIOhw16o16i /** blocked weights format */,
     mkldnn_gOihw8o /** blocked weights format */,
+    mkldnn_gOihw4o /** blocked weights format */,
     mkldnn_gOihw16o /** blocked weights format */,
     mkldnn_gOhwi8o /** blocked weights format */,
+    mkldnn_gOhwi4o /** blocked weights format */,
     mkldnn_gOhwi16o /** blocked weights format */,
     mkldnn_Goihw8g /** blocked weights format */,
     mkldnn_Goihw16g /** blocked weights format */,
+    /** blocked weights format with additional buffer
+     * with size equal to the number of groups and containing the values:
+     * O[i:0,G] = -128 * SUM(h:0,H;w:0,W)(weights(i,i,h,w))*/
+    mkldnn_Goihw16g_s8s8,
     mkldnn_gOhIw16o4i /** blocked weights format */,
     mkldnn_gOhIw8o4i /** blocked weights format */,
     /** blocked weights format with additional buffer
@@ -351,20 +403,21 @@ typedef enum {
     mkldnn_gOhIw8o4i_s8s8,
 
     /* weights w/ groups, 6D */
+    mkldnn_gOIdhw4i4o /** blocked weights format */,
+    mkldnn_gOdhwi4o /** blocked weights format */,
     mkldnn_gOIdhw8i8o /** blocked weights format */,
     mkldnn_gOIdhw8o8i /** blocked weights format */,
     mkldnn_gOdhwi8o /** blocked weights format */,
     mkldnn_gOIdhw8i16o2i /** blocked weights format */,
     mkldnn_gOIdhw16i16o /** blocked weights format */,
     mkldnn_gOIdhw16o16i /** blocked weights format */,
+    mkldnn_gOidhw4o /** blocked weights format */,
     mkldnn_gOidhw16o /** blocked weights format */,
     mkldnn_gOdhwi16o /** blocked weights format */,
 
     mkldnn_wino_fmt /** Weights format used in 8bit Winograd convolution */,
 
-    /* RNN packed weights */
-    mkldnn_ldigo_p /** RNN packed weights (unused) */,
-    mkldnn_ldgoi_p /** RNN packed weights (unused) */,
+    mkldnn_rnn_packed /** Packed weights format used in RNN */,
 
     /** Just a sentinel, not real memory format. Must be changed after new
      * format is added. */
@@ -385,9 +438,9 @@ typedef enum {
     /** Forward data propagation (training mode). In this mode primitives
      * perform computations necessary for subsequent backward propagation. */
     mkldnn_forward_training = 64,
-    /** Forward data propagation (inference mode). In this mode primitives only
-     * perform computations that are necessary for inference and omit
-     * computations that are only necessary for backward propagation. */
+    /** Forward data propagation (inference mode). In this mode primitives
+     * perform only computations that are necessary for inference and omit
+     * computations that are necessary only for backward propagation. */
     mkldnn_forward_inference = 96,
     /** Forward data propagation (alias for @c mkldnn_forward_inference) */
     mkldnn_forward_scoring = mkldnn_forward_inference,
@@ -428,8 +481,6 @@ typedef enum {
     mkldnn_deconvolution,
     /** An element-wise primitive. */
     mkldnn_eltwise,
-    /** A ReLU primitive. @deprecated */
-    mkldnn_relu = mkldnn_eltwise,
     /** A Softmax primitive. */
     mkldnn_softmax,
     /** A pooling primitive. */
@@ -440,83 +491,95 @@ typedef enum {
     mkldnn_batch_normalization,
     /** An inner product primitive. */
     mkldnn_inner_product,
-    /** A convolution primitive merged with ReLU. @deprecated */
-    mkldnn_convolution_relu,
     /** A rnn primitive. */
     mkldnn_rnn,
     /** A ROI pooling primitive. */
     mkldnn_roi_pooling,
     /** An channel-wise primitive. */
     mkldnn_depthwise,
+    /** A binary convolution primitive. */
+    mkldnn_binary_convolution,
+    /** A binarization primitive. */
+    mkldnn_binarization,
 } mkldnn_primitive_kind_t;
 
 /** Kinds of algorithms. */
 typedef enum {
     mkldnn_alg_kind_undef,
     /** Direct convolution */
-    mkldnn_convolution_direct = 1,
+    mkldnn_convolution_direct = 0x1,
     /** Winograd convolution */
-    mkldnn_convolution_winograd = 2,
+    mkldnn_convolution_winograd = 0x2,
+    /** Convolution algorithm(either direct or Winograd) is chosen just in time **/
+    mkldnn_convolution_auto = 0x3,
+    /** Direct deconvolution */
+    mkldnn_deconvolution_direct = 0xa,
+    /** Winograd deconvolution */
+    mkldnn_deconvolution_winograd = 0xb,
     /** Eltwise: ReLU */
-    mkldnn_eltwise_relu = 8,
+    mkldnn_eltwise_relu = 0x1f,
     /** Eltwise: hyperbolic tangent non-linearity (tanh) */
-    mkldnn_eltwise_tanh = 9,
+    mkldnn_eltwise_tanh = 0x2f,
     /** Eltwise: parametric exponential linear unit (elu) */
-    mkldnn_eltwise_elu = 10,
+    mkldnn_eltwise_elu = 0x3f,
     /** Eltwise: square */
-    mkldnn_eltwise_square = 11,
+    mkldnn_eltwise_square = 0x4f,
     /** Eltwise: abs */
-    mkldnn_eltwise_abs = 12,
+    mkldnn_eltwise_abs = 0x5f,
     /** Eltwise: square root */
-    mkldnn_eltwise_sqrt = 13,
+    mkldnn_eltwise_sqrt = 0x6f,
     /** Eltwise: linear */
-    mkldnn_eltwise_linear = 14,
+    mkldnn_eltwise_linear = 0x7f,
     /** Eltwise: bounded_relu */
-    mkldnn_eltwise_bounded_relu = 15,
+    mkldnn_eltwise_bounded_relu = 0x8f,
     /** Eltwise: soft_relu */
-    mkldnn_eltwise_soft_relu = 16,
+    mkldnn_eltwise_soft_relu = 0x9f,
     /** Eltwise: logistic */
-    mkldnn_eltwise_logistic = 17,
+    mkldnn_eltwise_logistic = 0xaf,
     /** Eltwise: clamp */
-    mkldnn_eltwise_clamp = 18,
+    mkldnn_eltwise_clamp = 0xbf,
+    /** Eltwise: exp */
+    mkldnn_eltwise_exp = 0xcf,
+    /** Eltwise: not */
+    mkldnn_eltwise_not = 0xdf,
     /** Max pooling */
-    mkldnn_pooling_max = 34,
+    mkldnn_pooling_max = 0x1ff,
     /** Average pooling include padding */
-    mkldnn_pooling_avg_include_padding = 40,
+    mkldnn_pooling_avg_include_padding = 0x2ff,
     /** Average pooling exclude padding */
-    mkldnn_pooling_avg_exclude_padding = 41,
+    mkldnn_pooling_avg_exclude_padding = 0x3ff,
     mkldnn_pooling_avg = mkldnn_pooling_avg_exclude_padding,
     /** Local response normalization (LRN) across multiple channels */
-    mkldnn_lrn_across_channels = 65,
+    mkldnn_lrn_across_channels = 0xaff,
     /** LRN within a single channel */
-    mkldnn_lrn_within_channel = 66,
-    /** Direct deconvolution */
-    mkldnn_deconvolution_direct = 71,
-    /** Winograd deconvolution */
-    mkldnn_deconvolution_winograd = 72,
+    mkldnn_lrn_within_channel = 0xbff,
     /** RNN cell */
-    mkldnn_vanilla_rnn = 80,
+    mkldnn_vanilla_rnn = 0x1fff,
     /** LSTM cell */
-    mkldnn_vanilla_lstm = 81,
+    mkldnn_vanilla_lstm = 0x2fff,
     /** GRU cell */
-    mkldnn_vanilla_gru = 82,
+    mkldnn_vanilla_gru = 0x3fff,
     /** GRU cell with linear before reset
      *
      * Modification of original GRU cell. Differs from #mkldnn_vanilla_gru
      * in how the new memory gate is calculated:
-     * \f[ c_t = tanh(W_c*x_t + b_{c_h} + r_t*(U_c*h_{t-1}+b_{c_h})) \f]
+     * \f[ c_t = tanh(W_c*x_t + b_{c_x} + r_t*(U_c*h_{t-1}+b_{c_h})) \f]
      * Primitive expects 4 biases on input:
      * \f$[b_{u}, b_{r}, b_{c_x}, b_{c_h}]\f$
      * */
-    mkldnn_gru_linear_before_reset = 83,
-    /** Depthwise: scale_shift */
-    mkldnn_depthwise_scale_shift = 100,
-    /** Depthwise: prelu */
-    mkldnn_depthwise_prelu = 101,
+    mkldnn_gru_linear_before_reset = 0x4fff,
     /** ROI max pooling **/
-    mkldnn_roi_pooling_max = 128,
+    mkldnn_roi_pooling_max = 0xafff,
     /** ROI pooling with bilinear interpolation**/
-    mkldnn_roi_pooling_bilinear = 129
+    mkldnn_roi_pooling_bilinear = 0xbfff,
+    /** Depthwise: scale_shift */
+    mkldnn_depthwise_scale_shift = 0x1ffff,
+    /** Depthwise: prelu */
+    mkldnn_depthwise_prelu = 0x2ffff,
+    /** Direct binary convolution */
+    mkldnn_binary_convolution_direct = 0x1fffff,
+    /** Depthwise binarization */
+    mkldnn_binarization_depthwise = 0xafffff
 } mkldnn_alg_kind_t;
 
 /** Flags for batch-normalization primititve. */
@@ -547,15 +610,6 @@ typedef enum {
      *    same behavior as prop_kind == #mkldnn_backward
      */
     mkldnn_use_scaleshift = 0x2U,
-    /** Omit statistics
-     *
-     * @deprecated use #mkldnn_use_global_stats instead
-     *
-     * For time being had an affect on backward propagation only which allowed
-     * skipping some computations (the same semantics as
-     * #mkldnn_use_global_stats)
-     */
-    mkldnn_omit_stats = mkldnn_use_global_stats,
     /** Fuse with ReLU
      *
      * If specified:
@@ -578,7 +632,7 @@ typedef enum {
 #define TENSOR_MAX_DIMS 12
 
 /** A type to describe tensor dimensions. */
-typedef int mkldnn_dims_t[TENSOR_MAX_DIMS];
+typedef ptrdiff_t mkldnn_dims_t[TENSOR_MAX_DIMS];
 /** A type to describe strides within a tensor. */
 typedef ptrdiff_t mkldnn_strides_t[TENSOR_MAX_DIMS];
 
@@ -627,6 +681,27 @@ typedef struct {
     size_t size;
 } mkldnn_wino_desc_t;
 
+typedef enum {
+    mkldnn_packed_format_undef = 0,
+    mkldnn_ldigo_p,
+    mkldnn_ldgoi_p
+} mkldnn_rnn_packed_memory_format_t;
+
+/* Maximum number of parts of RNN weights tensor that require separate
+ * computation. */
+#define MKLDNN_RNN_MAX_N_PARTS 4
+
+/** Description of tensor of packed weights for rnn. */
+typedef struct {
+    mkldnn_rnn_packed_memory_format_t format;
+    int n_parts;
+    int n;
+    int parts[MKLDNN_RNN_MAX_N_PARTS];
+    size_t part_pack_size[MKLDNN_RNN_MAX_N_PARTS];
+    size_t offset_compensation;
+    size_t size;
+} mkldnn_rnn_packed_desc_t;
+
 /** @addtogroup c_api_types_op_descs Operation descriptors
  *  @{*/
 
@@ -640,7 +715,7 @@ typedef const void *const_mkldnn_op_desc_t;
  * format. Additionally, contains format-specific descriptions of the data
  * layout. */
 typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
+    /** The kind of primitive. Used for self-identifying the primitive
      * descriptor. Must be #mkldnn_memory. */
     mkldnn_primitive_kind_t primitive_kind;
     /** Number of dimensions */
@@ -657,8 +732,8 @@ typedef struct {
      *
      * @note
      *    The order of dimensions does not depend on the memory format, so
-     *    no matter whether the data is laid in #mkldnn_nchw or #mkldnn_nhwc
-     *    the dims for 4D CN data tensor would be <code>{N, C, H, W}</code>
+     *    whether the data is laid out in #mkldnn_nchw or #mkldnn_nhwc
+     *    the dims for 4D CN data tensor would be <code>{N, C, H, W}</code>.
      */
     mkldnn_dims_t dims;
     /** Data type of the tensor elements. */
@@ -671,6 +746,8 @@ typedef struct {
         mkldnn_blocking_desc_t blocking;
         /** Tensor of weights for integer 8bit winograd convolution. */
         mkldnn_wino_desc_t wino_desc;
+        /** Tensor of packed weights for RNN. */
+        mkldnn_rnn_packed_desc_t rnn_packed_desc;
         /* ... other descriptions possible */
     } layout_desc;
 } mkldnn_memory_desc_t;
@@ -679,7 +756,7 @@ typedef struct {
 
 /** A descriptor of a convolution operation. */
 typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
+    /** The kind of primitive. Used for self-identifying the primitive
      * descriptor. Must be #mkldnn_convolution. */
     mkldnn_primitive_kind_t primitive_kind;
     /** The kind of propagation. Possible values: #mkldnn_forward_training,
@@ -724,13 +801,13 @@ typedef mkldnn_convolution_desc_t mkldnn_deconvolution_desc_t;
 
 /** A descriptor of a shuffle operation. */
 typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
+    /** The kind of primitive. Used for self-identifying the primitive
      * descriptor. Must be #mkldnn_convolution. */
     mkldnn_primitive_kind_t primitive_kind;
     /** The kind of propagation. Possible values: #mkldnn_forward_training,
-     * #mkldnn_forward_inference, #mkldnn_backward_data*/
+     * #mkldnn_forward_inference, and #mkldnn_backward_data. */
     mkldnn_prop_kind_t prop_kind;
-    /** Source and destination memory descriptor.
+    /** Source and destination memory descriptor,
      *  and source and destination gradient memory descriptor. */
     mkldnn_memory_desc_t data_desc;
     /** axis for shuffling. */
@@ -741,7 +818,7 @@ typedef struct {
 
 /** A descriptor of a element-wise operation. */
 typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
+    /** The kind of primitive. Used for self-identifying the primitive
      * descriptor. Must be #mkldnn_eltwise. */
     mkldnn_primitive_kind_t primitive_kind;
     /** The kind of propagation. Possible values: #mkldnn_forward_training,
@@ -751,7 +828,7 @@ typedef struct {
     /** The kind of eltwise algorithm. Possible values: #mkldnn_eltwise_relu,
      * #mkldnn_eltwise_tanh, #mkldnn_eltwise_elu, #mkldnn_eltwise_square,
      * #mkldnn_eltwise_abs, #mkldnn_eltwise_sqrt, #mkldnn_eltwise_linear,
-     * #mkldnn_eltwise_bounded_relu, #mkldnn_eltwise_soft_relu,
+     * #mkldnn_eltwise_bounded_relu, #mkldnn_eltwise_soft_relu, and
      * #mkldnn_eltwise_logistic. */
     mkldnn_alg_kind_t alg_kind;
     /** Source and destination memory descriptor. */
@@ -772,10 +849,6 @@ typedef struct {
      *  - #mkldnn_eltwise_logistic: @p alpha and @p beta ignored
      */
     float alpha, beta;
-    /** ReLU scaling factor for negative values.
-     * @deprecated: use alpha instead
-     * @warning: read-only value */
-    float negative_slope;
 } mkldnn_eltwise_desc_t;
 
 /** A descriptor of a channel-wise operation. */
@@ -790,25 +863,22 @@ typedef struct {
     /** The kind of depthwise algorithm. Possible values: #mkldnn_depthwise_scale_shift
      * #mkldnn_depthwise_prelu */
     mkldnn_alg_kind_t alg_kind;
-    /** Source memory descriptor. */
-    mkldnn_memory_desc_t src_desc;
-    /** Destination memory descriptor. */
-    mkldnn_memory_desc_t dst_desc;
+	/** Source memory descriptor. */
+	mkldnn_memory_desc_t src_desc;
+	/** Destination memory descriptor. */
+	mkldnn_memory_desc_t dst_desc;
     /** Weights memory descriptor. */
     mkldnn_memory_desc_t weights_desc;
     /** Bias memory descriptor. */
     mkldnn_memory_desc_t bias_desc;
 } mkldnn_depthwise_desc_t;
 
-/* @deprecated: use mkldnn_eltwise_desc_t */
-typedef mkldnn_eltwise_desc_t mkldnn_relu_desc_t;
-
 /** A descriptor of a Softmax operation. */
 typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
+    /** The kind of primitive. Used for self-identifying the primitive
     * descriptor. Must be #mkldnn_softmax. */
     mkldnn_primitive_kind_t primitive_kind;
-    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+    /** The kind of propagation. Possible values: #mkldnn_forward_training and
      * #mkldnn_forward_inference. */
     mkldnn_prop_kind_t prop_kind;
     /** Source and destination memory descriptor. */
@@ -821,14 +891,14 @@ typedef struct {
 
 /** A descriptor of a pooling operation. */
 typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
+    /** The kind of primitive. Used for self-identifying the primitive
      * descriptor. Must be #mkldnn_pooling. */
     mkldnn_primitive_kind_t primitive_kind;
     /** The kind of propagation. Possible values: #mkldnn_forward_training,
      * #mkldnn_forward_inference, #mkldnn_backward, and #mkldnn_backward_data.
      */
     mkldnn_prop_kind_t prop_kind;
-    /** The kind of pooling algorithm. Possible values: #mkldnn_pooling_max,
+    /** The kind of pooling algorithm. Possible values: #mkldnn_pooling_max and
      * #mkldnn_pooling_avg. */
     mkldnn_alg_kind_t alg_kind;
     /** Source memory descriptor. */
@@ -855,14 +925,14 @@ typedef struct {
 
 /** A descriptor of a Local Response Normalization (LRN) operation. */
 typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
+    /** The kind of primitive. Used for self-identifying the primitive
      * descriptor. Must be #mkldnn_lrn. */
     mkldnn_primitive_kind_t primitive_kind;
     /** The kind of propagation. Possible values: #mkldnn_forward_training,
      * #mkldnn_forward_inference, #mkldnn_backward, and #mkldnn_backward_data.
      */
     mkldnn_prop_kind_t prop_kind;
-    /** LRN algorithm. Possible values #mkldnn_lrn_within_channel or
+    /** LRN algorithm. Possible values: #mkldnn_lrn_within_channel and
      * #mkldnn_lrn_across_channels. */
     mkldnn_alg_kind_t alg_kind;
     /** Source and destination memory descriptor. */
@@ -882,7 +952,7 @@ typedef struct {
 
 /** A descriptor of a Batch Normalization operation. */
 typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
+    /** The kind of primitive. Used for self-identifying the primitive
      * descriptor. Must be #mkldnn_batch_normalization. */
     mkldnn_primitive_kind_t primitive_kind;
     /** The kind of propagation. Possible values: #mkldnn_forward_training,
@@ -913,7 +983,7 @@ typedef struct {
 
 /** A descriptor of an inner product operation. */
 typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
+    /** The kind of primitive. Used for self-identifying the primitive
      * descriptor. Must be #mkldnn_inner_product. */
     mkldnn_primitive_kind_t primitive_kind;
     /** The kind of propagation. Possible values: #mkldnn_forward_training,
@@ -940,18 +1010,6 @@ typedef struct {
     mkldnn_data_type_t accum_data_type;
 } mkldnn_inner_product_desc_t;
 
-/** A descriptor of a convolution followed by relu operation. */
-typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
-     * descriptor. Must be #mkldnn_convolution_relu. */
-    mkldnn_primitive_kind_t primitive_kind;
-    /** A descriptor of a convolution operation. */
-    mkldnn_convolution_desc_t convolution_desc;
-    /** Scaling factor for negative values, stored as float-precision but
-     * interpreted in a way specific to the data type in each implementation */
-    float negative_slope;
-} mkldnn_convolution_relu_desc_t;
-
 /** Flags for RNN cell. */
 typedef enum {
     mkldnn_rnn_cell_with_relu = 0x1U,
@@ -960,23 +1018,23 @@ typedef enum {
 
 typedef struct {
     /** RNN cell kind. Must be one of #mkldnn_vanilla_rnn,
-     * #mkldnn_vanilla_lstm, #mkldnn_vanilla_gru
+     * #mkldnn_vanilla_lstm, #mkldnn_vanilla_gru,
      * or #mkldnn_gru_linear_before_reset. */
     mkldnn_alg_kind_t cell_kind;
-    /** Activation function used. Must be one of #mkldnn_eltwise_relu,
+    /** Activation function used. Must be either #mkldnn_eltwise_relu or
      * #mkldnn_eltwise_tanh. */
     mkldnn_alg_kind_t activation_kind;
     /** RNN cell flags */
     unsigned int flags;
-    /** alpha is a negative slope parameter (used only if
-     * (flags & #mkldnn_rnn_cell_with_relu) != 0) */
+    /** @c alpha is a negative slope parameter (used only if
+     * `(flags & #mkldnn_rnn_cell_with_relu) != 0`) */
     float alpha;
     /** clipping parameter (used only if
-     * (flags & #mkldnn_rnn_cell_with_clipping) != 0) */
+     * `(flags & #mkldnn_rnn_cell_with_clipping) != 0`) */
     float clipping;
 } mkldnn_rnn_cell_desc_t;
 
-/** A direction of RNN primitive execution */
+/** A direction of RNN primitive execution. */
 typedef enum {
     /* Unidirectional execution of RNN primitive from left to right. */
     mkldnn_unidirectional_left2right,
@@ -991,13 +1049,13 @@ typedef enum {
     mkldnn_unidirectional = mkldnn_unidirectional_left2right,
 } mkldnn_rnn_direction_t;
 
-/** A descriptor for an rnn operation */
+/** A descriptor for an RNN operation. */
 typedef struct {
-    /** The kind of primitive. Used for self identifying the primitive
+    /** The kind of primitive. Used for self-identifying the primitive
      * descriptor. Must be #mkldnn_rnn. */
     mkldnn_primitive_kind_t primitive_kind;
     /** The kind of propagation. Possible values: #mkldnn_forward_training,
-     * #mkldnn_forward_inference, #mkldnn_backward. */
+     * #mkldnn_forward_inference, and #mkldnn_backward. */
     mkldnn_prop_kind_t prop_kind;
     /** The RNN cell desc. */
     mkldnn_rnn_cell_desc_t cell_desc;
@@ -1053,6 +1111,56 @@ typedef struct {
     mkldnn_alg_kind_t alg_kind;
 } mkldnn_roi_pooling_desc_t;
 
+/** A descriptor of a binary convolution operation. */
+typedef struct {
+    /** The kind of primitive. Used for self identifying the primitive
+     * descriptor. Must be #mkldnn_binary_convolution. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+     * #mkldnn_forward_inference */
+    mkldnn_prop_kind_t prop_kind;
+    /** The kind of the binary convolution algorithm. Possible values:
+     * #mkldnn_binary_convolution_direct. */
+    mkldnn_alg_kind_t alg_kind;
+    /** Source memory descriptor. */
+    mkldnn_memory_desc_t src_desc;
+    /** Weights memory descriptor. */
+    mkldnn_memory_desc_t weights_desc;
+    /** Destination memory descriptor. */
+    mkldnn_memory_desc_t dst_desc;
+    /** Convolution strides in each spatial dimension. */
+    mkldnn_dims_t strides;
+    /** Convolution dilates in each spatial dimension. */
+    mkldnn_dims_t dilates;
+    /** Padding in each spatial dimension. padding[0] is a padding in the
+     * beginning (@p padding_l), padding[1] is a padding in the end (@p
+     * padding_r). */
+    mkldnn_dims_t padding[2];
+    /** The accumulator data type. Initialized automatically. */
+    mkldnn_data_type_t accum_data_type;
+    /** Logic value of elements in padding area */
+    float pad_value;
+} mkldnn_binary_convolution_desc_t;
+
+/** A descriptor of a binarization operation. */
+typedef struct {
+    /** The kind of primitive. Used for self identifying the primitive
+     * descriptor. Must be #mkldnn_binarization. */
+    mkldnn_primitive_kind_t primitive_kind;
+    /** The kind of propagation. Possible values: #mkldnn_forward_training,
+     * #mkldnn_forward_inference, #mkldnn_backward, and #mkldnn_backward_data.
+     */
+    mkldnn_prop_kind_t prop_kind;
+    /** The kind of binarization algorithm. Possible values: #mkldnn_binarization_depthwise */
+    mkldnn_alg_kind_t alg_kind;
+    /** Source memory descriptor. */
+    mkldnn_memory_desc_t src_desc;
+    /** Destination memory descriptor. */
+    mkldnn_memory_desc_t dst_desc;
+    /** Weights memory descriptor. */
+    mkldnn_memory_desc_t weights_desc;
+} mkldnn_binarization_desc_t;
+
 /** @} */
 
 /** @addtogroup c_api_engine_types Engine
@@ -1083,7 +1191,7 @@ typedef const struct mkldnn_engine *const_mkldnn_engine_t;
  * @{ */
 
 /** @struct mkldnn_primitive_desc_iterator
- * @brief An opaque structure to describe a primitive descriptor iterator . */
+ * @brief An opaque structure to describe a primitive descriptor iterator. */
 struct mkldnn_primitive_desc_iterator;
 
 /** @brief A primitive descriptor iterator handle. */
@@ -1100,7 +1208,7 @@ typedef const struct mkldnn_primitive_desc_iterator
  * @{ */
 
 /** @struct mkldnn_primitive_desc
- * @brief An opaque structure to describe a primitive descriptor . */
+ * @brief An opaque structure to describe a primitive descriptor. */
 struct mkldnn_primitive_desc;
 
 /** @brief A primitive descriptor handle. */
@@ -1138,12 +1246,12 @@ typedef const struct mkldnn_primitive_attr *const_mkldnn_primitive_attr_t;
  *
  * Post operations might be combined together, making a chain of post
  * operations. For instance one can configure convolution followed by
- * accumulation followed by eltwise (relu). This might be especially beneficial
+ * accumulation followed by eltwise. This might be especially beneficial
  * for residual learning blocks.
  *
  * @warning
- *      Of course not all the combinations are supported, so user should handle
- *      error accordingly.
+ *      Of course not all combinations are supported, so the user should handle
+ *      errors accordingly.
  *
  * Supported post operations:
  *  - accumulation (base primitive: convolution)
@@ -1185,8 +1293,8 @@ typedef struct {
 
 /** Primitive descriptor query specification
  *
- * For generic function mkldnn_primitive_desc_query() the type of result must
- * be agreed with queried argument. The correspondence table:
+ * For generic function mkldnn_primitive_desc_query(), the type of result must
+ * agree with the queried argument. The correspondence table:
  *      Query                        | type of result
  *      --------------------------------------------------------------
  *      #mkldnn_query_engine         | mkldnn_engine_t *
@@ -1205,10 +1313,10 @@ typedef struct {
  *     reference. All numbers are returned by value.
  *
  * @warning
- *     All returned references point to constant objects and valid only during
- *     the lifetime of queried primitive descriptor. Returned objects must not
- *     be destroyed by user. If there is a need to keep the object longer than
- *     a lifetime of queried primitive descriptor use
+ *     All returned references point to constant objects and are valid only
+ *     during the lifetime of the queried primitive descriptor. Returned objects
+ *     must not be destroyed by the user. If you need to keep the object longer
+ *     than the lifetime of the queried primitive descriptor, use
  *     mkldnn_primitive_desc_clone() to make a copy. */
 typedef enum {
     mkldnn_query_undef = 0,  /**< no query */
@@ -1234,16 +1342,16 @@ typedef enum {
     mkldnn_query_deconvolution_d, /**< deconvolution descriptor */
     mkldnn_query_shuffle_d, /**< shuffle descriptor */
     mkldnn_query_eltwise_d, /**< eltwise descriptor */
-    mkldnn_query_relu_d = mkldnn_query_eltwise_d, /**< @deprecated */
     mkldnn_query_softmax_d, /**< softmax descriptor */
     mkldnn_query_pooling_d, /**< pooling descriptor */
     mkldnn_query_lrn_d, /**< lrn descriptor */
     mkldnn_query_batch_normalization_d, /**< batch normalization descriptor */
     mkldnn_query_inner_product_d, /**< inner product descriptor */
-    mkldnn_query_convolution_relu_d, /**< @deprecated */
     mkldnn_query_rnn_d, /**< rnn descriptor */
     mkldnn_query_roi_pooling_d, /**< roi descriptor */
     mkldnn_query_depthwise_d, /**< eltwise descriptor */
+    mkldnn_query_binary_convolution_d, /**< binary convolution descriptor */
+    mkldnn_query_binarization_d, /**< binarization descriptor */
 
     /* (memory) primitive descriptor section */
     mkldnn_query_some_pd = 128, /**< stub */
diff --git a/inference-engine/thirdparty/mkl-dnn/include/mkldnn_version.h.in b/inference-engine/thirdparty/mkl-dnn/include/mkldnn_version.h.in
new file mode 100644
index 000000000..5ee012618
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/include/mkldnn_version.h.in
@@ -0,0 +1,32 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef MKLDNN_VERSION_H
+#define MKLDNN_VERSION_H
+
+/* Major version of MKL-DNN */
+#define MKLDNN_VERSION_MAJOR @MKLDNN_VERSION_MAJOR@
+
+/* Minor version of MKL-DNN */
+#define MKLDNN_VERSION_MINOR @MKLDNN_VERSION_MINOR@
+
+/* Patch version of MKL-DNN */
+#define MKLDNN_VERSION_PATCH @MKLDNN_VERSION_PATCH@
+
+/* Git Commit Hash of MKL-DNN */
+#define MKLDNN_VERSION_HASH  "@MKLDNN_VERSION_HASH@"
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/scripts/generate_mkldnn_debug.py b/inference-engine/thirdparty/mkl-dnn/scripts/generate_mkldnn_debug.py
index 9c5353605..4f6efe20d 100644
--- a/inference-engine/thirdparty/mkl-dnn/scripts/generate_mkldnn_debug.py
+++ b/inference-engine/thirdparty/mkl-dnn/scripts/generate_mkldnn_debug.py
@@ -120,7 +120,6 @@ def maybe_skip(enum):
         'mkldnn_batch_normalization_flag_t',
         'mkldnn_wino_memory_format_t',
         'mkldnn_rnn_cell_flags_t',
-        'mkldnn_rnn_direction_t',
         'mkldnn_engine_kind_t',
         'mkldnn_query_t',
         'mkldnn_stream_kind_t',
@@ -136,6 +135,7 @@ def enum_abbrev(enum):
         'mkldnn_prop_kind_t': 'prop_kind',
         'mkldnn_primitive_kind_t': 'prim_kind',
         'mkldnn_alg_kind_t': 'alg_kind',
+        'mkldnn_rnn_direction_t': 'rnn_direction',
     }.get(enum, enum)
 
 
diff --git a/inference-engine/thirdparty/mkl-dnn/scripts/prepare_mkl.bat b/inference-engine/thirdparty/mkl-dnn/scripts/prepare_mkl.bat
index 48979c3f4..04939a9ca 100644
--- a/inference-engine/thirdparty/mkl-dnn/scripts/prepare_mkl.bat
+++ b/inference-engine/thirdparty/mkl-dnn/scripts/prepare_mkl.bat
@@ -18,8 +18,8 @@ rem ============================================================================
 rem req: PowerShell 3.0+
 powershell.exe -command "if ($PSVersionTable.PSVersion.Major -ge 3) {exit 1} else {Write-Host \"The script requires PowerShell 3.0 or above (current version: $($PSVersionTable.PSVersion.Major).$($PSVersionTable.PSVersion.Minor))\"}" && goto Error_load
 
-set MKLURLROOT=https://github.com/intel/mkl-dnn/releases/download/v0.17-rc/
-set MKLVERSION=2019.0.1.20180928
+set MKLURLROOT=https://github.com/intel/mkl-dnn/releases/download/v0.18-rc/
+set MKLVERSION=2019.0.3.20190125
 
 set MKLPACKAGE=mklml_win_%MKLVERSION%
 
diff --git a/inference-engine/thirdparty/mkl-dnn/scripts/prepare_mkl.sh b/inference-engine/thirdparty/mkl-dnn/scripts/prepare_mkl.sh
index 27115ef20..3e2e39ded 100644
--- a/inference-engine/thirdparty/mkl-dnn/scripts/prepare_mkl.sh
+++ b/inference-engine/thirdparty/mkl-dnn/scripts/prepare_mkl.sh
@@ -15,8 +15,8 @@
 # limitations under the License.
 #===============================================================================
 
-MKLURLROOT="https://github.com/intel/mkl-dnn/releases/download/v0.17-rc/"
-MKLVERSION="2019.0.1.20180928"
+MKLURLROOT="https://github.com/intel/mkl-dnn/releases/download/v0.18-rc/"
+MKLVERSION="2019.0.3.20190125"
 
 os=`uname`
 if [ "$os" = "Linux" ]; then
diff --git a/inference-engine/thirdparty/mkl-dnn/src/CMakeLists.txt b/inference-engine/thirdparty/mkl-dnn/src/CMakeLists.txt
index 83ed499a4..f10feb20e 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/CMakeLists.txt
+++ b/inference-engine/thirdparty/mkl-dnn/src/CMakeLists.txt
@@ -14,9 +14,8 @@
 # limitations under the License.
 #===============================================================================
 
-set(TARGET_NAME ${LIB_NAME})
-
 file(GLOB_RECURSE HEADERS
+    ${PROJECT_BINARY_DIR}/include/*.h
     ${CMAKE_CURRENT_SOURCE_DIR}/../include/*.h
     ${CMAKE_CURRENT_SOURCE_DIR}/../include/*.hpp
     )
@@ -27,8 +26,10 @@ file(GLOB_RECURSE SOURCES
     ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp
     )
 include_directories(
+    ${PROJECT_BINARY_DIR}/include
     ${CMAKE_CURRENT_SOURCE_DIR}
     ${CMAKE_CURRENT_SOURCE_DIR}/common
+    ${CMAKE_CURRENT_SOURCE_DIR}/cpu
     ${CMAKE_CURRENT_SOURCE_DIR}/cpu/xbyak
     )
 
@@ -88,28 +89,68 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
     endif()
 endif()
 
-add_library(${TARGET_NAME} ${MKLDNN_LIBRARY_TYPE} ${HEADERS} ${SOURCES})
+add_library(${LIB_NAME} ${MKLDNN_LIBRARY_TYPE} ${HEADERS} ${SOURCES})
+set_property(TARGET ${LIB_NAME} PROPERTY CXX_STANDARD 11)
+set_property(TARGET ${LIB_NAME} PROPERTY CXX_STANDARD_REQUIRED ON)
+set_property(TARGET ${LIB_NAME} PROPERTY VERSION "${PROJECT_VERSION}.0")
+set_property(TARGET ${LIB_NAME} PROPERTY SOVERSION "0")
+set_property(TARGET ${LIB_NAME} PROPERTY PUBLIC_HEADER ${HEADERS})
+
+target_include_directories(${LIB_NAME} PUBLIC
+    $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/../include>
+    # $<INSTALL_PREFIX> is required for compatibility with cmake 2.8
+    $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/${CMAKE_INSTALL_INCLUDEDIR}>
+    )
 
-#Add mkldnn.dll to execution PATH
-if(NOT(MINGW))
-    set(CTESTCONFIG_PATH "${CTESTCONFIG_PATH}\;${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}" PARENT_SCOPE)
-else()
-    # CMake with "MSYS Makefiles" generator seems to build libmkldnn.dll in a directory without build type.
-    set(CTESTCONFIG_PATH "${CTESTCONFIG_PATH}\;${CMAKE_CURRENT_BINARY_DIR}" PARENT_SCOPE)
+target_link_libraries_private(${LIB_NAME}
+    "${EXTRA_SHARED_LIBS};${EXTRA_STATIC_LIBS}")
+target_link_libraries_public(${LIB_NAME} "${EXTRA_SHARED_LIBS}")
+if(MKLDNN_LIBRARY_TYPE STREQUAL "STATIC")
+    target_link_libraries_public(${LIB_NAME} "${EXTRA_STATIC_LIBS}")
 endif()
-target_link_libraries(${TARGET_NAME} ${${TARGET_NAME}_LINKER_LIBS} ${EXTRA_LIBS})
-set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD 11)
-set_property(TARGET ${TARGET_NAME} PROPERTY CXX_STANDARD_REQUIRED ON)
-set_property(TARGET ${TARGET_NAME} PROPERTY VERSION "${PROJECT_VERSION}.0")
-set_property(TARGET ${TARGET_NAME} PROPERTY SOVERSION "0")
 
-if(MINGW)
-    # We need to install *.dll into bin/ and *.a into lib/. 
-    install(TARGETS ${TARGET_NAME}
-        RUNTIME DESTINATION bin
-        ARCHIVE DESTINATION lib${LIB_SUFFIX}
-    )
-else()
-    install(TARGETS ${TARGET_NAME} DESTINATION lib${LIB_SUFFIX})
+set(LIB_EXPORT_NAME "${LIB_NAME}-targets")
+install(TARGETS ${LIB_NAME}
+    EXPORT "${LIB_EXPORT_NAME}"
+    RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+    PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+
+# Write version and package config files
+set(LIB_CONFIG_GENERATE_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated")
+set(LIB_CONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${LIB_NAME}")
+set(LIB_VERSION_FILE
+    "${LIB_CONFIG_GENERATE_DIR}/${LIB_NAME}-config-version.cmake")
+set(LIB_CONFIG_FILE
+    "${LIB_CONFIG_GENERATE_DIR}/${LIB_NAME}-config.cmake")
+write_basic_package_version_file(
+    "${LIB_VERSION_FILE}"
+    VERSION ${PROJECT_VERSION}
+    COMPATIBILITY SameMajorVersion)
+configure_package_config_file(
+    "../cmake/config.cmake.in"
+    "${LIB_CONFIG_FILE}"
+    INSTALL_DESTINATION ${LIB_CONFIG_INSTALL_DIR})
+install(FILES ${LIB_CONFIG_FILE} ${LIB_VERSION_FILE}
+    DESTINATION ${LIB_CONFIG_INSTALL_DIR})
+string(TOUPPER "${LIB_NAME}::" LIB_NAMESPACE)
+install(EXPORT ${LIB_EXPORT_NAME}
+    NAMESPACE ${LIB_NAMESPACE}
+    DESTINATION ${LIB_CONFIG_INSTALL_DIR})
+
+# On Windows we need to add mkldnn.dll path to CTESTCONFIG_PATH which is later
+# passed to ctest and Visual Studio solutions
+if(WIN32)
+    if(NOT(MINGW))
+        foreach(BUILD_TYPE Release Debug RelWithDebInfo MinSizeRel)
+            append_to_windows_path_list(CTESTCONFIG_PATH
+                "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_TYPE}")
+        endforeach()
+    else()
+        append_to_windows_path_list(CTESTCONFIG_PATH
+            "${CMAKE_CURRENT_BINARY_DIR}")
+    endif()
+    set(CTESTCONFIG_PATH "${CTESTCONFIG_PATH}" PARENT_SCOPE)
 endif()
-install(FILES ${HEADERS} DESTINATION include)
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/batch_normalization_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/batch_normalization_pd.hpp
index 96f9cf90f..bd04302f5 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/batch_normalization_pd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/batch_normalization_pd.hpp
@@ -63,7 +63,7 @@ struct batch_normalization_pd_t: public primitive_desc_t {
     inline bool use_scaleshift() const
     { return desc_.flags & mkldnn_use_scaleshift; }
 
-    inline bool omit_stats() const { return desc_.flags & mkldnn_omit_stats; }
+    inline bool use_global_stats() const { return desc_.flags & mkldnn_use_global_stats; }
 
     inline bool is_training() const
     { return desc_.prop_kind == prop_kind::forward_training; }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/binarization.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/binarization.cpp
new file mode 100644
index 000000000..f6ab0c074
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/binarization.cpp
@@ -0,0 +1,66 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <assert.h>
+#include <mkldnn_types.h>
+#include "mkldnn.h"
+
+#include "c_types_map.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
+
+using namespace mkldnn::impl;
+using namespace mkldnn::impl::utils;
+using namespace mkldnn::impl::status;
+using namespace mkldnn::impl::prop_kind;
+using namespace mkldnn::impl::alg_kind;
+using namespace mkldnn::impl::types;
+
+namespace {
+status_t binarization_desc_init(binarization_desc_t *binarization_desc, prop_kind_t prop_kind,
+        alg_kind_t alg_kind, const memory_desc_t *src_desc, const memory_desc_t *dst_desc,
+        const memory_desc_t *weights_desc) {
+    bool args_ok = true
+        && !any_null(binarization_desc, src_desc, dst_desc, weights_desc)
+        && one_of(prop_kind, forward_training, forward_inference)
+        && one_of(alg_kind, binarization_depthwise);
+    if (!args_ok) return invalid_arguments;
+
+    auto bd = binarization_desc_t();
+    bd.primitive_kind = primitive_kind::binarization;
+    bd.prop_kind = prop_kind;
+    bd.alg_kind = alg_kind;
+    bd.src_desc = *src_desc;
+    bd.dst_desc = *dst_desc;
+    bd.weights_desc = *weights_desc;
+
+    bool consistency = true
+        && memory_desc_wrapper(bd.src_desc).nelems()
+        && memory_desc_wrapper(bd.dst_desc).nelems();
+    if (!consistency) return invalid_arguments;
+
+    *binarization_desc = bd;
+    return success;
+}
+}
+
+status_t mkldnn_binarization_forward_desc_init(binarization_desc_t *binarization_desc,
+        prop_kind_t prop_kind, alg_kind_t alg_kind,
+        const memory_desc_t *src_desc, const memory_desc_t *dst_desc, const memory_desc_t *weights_desc) {
+    if (!one_of(prop_kind, forward_training, forward_inference))
+        return invalid_arguments;
+    return binarization_desc_init(binarization_desc, prop_kind, alg_kind, src_desc, dst_desc, weights_desc);
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/binarization_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/binarization_pd.hpp
new file mode 100644
index 000000000..145023015
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/binarization_pd.hpp
@@ -0,0 +1,89 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef BINARIZATION_PD_HPP
+#define BINARIZATION_PD_HPP
+
+#include <mkldnn_types.h>
+#include "mkldnn.h"
+
+#include "c_types_map.hpp"
+#include "primitive_desc.hpp"
+#include "memory_pd.hpp"
+
+namespace mkldnn {
+namespace impl {
+
+struct binarization_fwd_pd_t: public primitive_desc_t {
+    typedef binarization_fwd_pd_t base_class;
+    typedef binarization_fwd_pd_t hint_class;
+    static constexpr auto base_pkind = primitive_kind::binarization;
+
+    binarization_fwd_pd_t(mkldnn::impl::engine_t *engine,
+            const binarization_desc_t *adesc, const primitive_attr_t *attr,
+            const binarization_fwd_pd_t *hint_fwd_pd)
+        : primitive_desc_t(engine, attr, primitive_kind::binarization)
+        , desc_(*adesc), hint_fwd_pd_(hint_fwd_pd) {}
+    virtual ~binarization_fwd_pd_t() {}
+
+    const binarization_desc_t *desc() const { return &desc_; }
+    virtual const op_desc_t *op_desc() const override
+    { return reinterpret_cast<const op_desc_t *>(this->desc()); }
+    virtual void init_info() override { init_info_binarization(this, this->info_); }
+
+    virtual const memory_pd_t *input_pd(int index = 0) const override {
+        switch (index) {
+        case 0: return src_pd();
+        case 1: return weights_pd(index - 1);
+        default: return nullptr;
+        }
+    }
+    virtual const memory_pd_t *output_pd(int index = 0) const override
+    { return index == 0 ? dst_pd() : nullptr; }
+
+    virtual int n_inputs() const override { return 2; }
+    virtual int n_outputs() const override { return 1; }
+
+    virtual status_t query(query_t what, int idx, void *result) const override
+    {
+        switch (what) {
+        case query::binarization_d:
+            *(const binarization_desc_t**)result = desc(); break;
+        default: return primitive_desc_t::query(what, idx, result);
+        }
+        return status::success;
+    }
+
+    /* common binarization aux functions */
+
+    inline int MB() const { return input_pd()->desc()->ndims > 0 ? input_pd()->desc()->dims[0] : 1; }
+    inline int C()  const { return input_pd()->desc()->ndims > 1 ? input_pd()->desc()->dims[1] : 1; }
+    inline int D()  const { return input_pd()->desc()->ndims > 4 ? input_pd()->desc()->dims[2] : 1; }
+    inline int H()  const { return input_pd()->desc()->ndims > 4 ? input_pd()->desc()->dims[3] :
+                                   input_pd()->desc()->ndims > 2 ? input_pd()->desc()->dims[2] : 1; }
+    inline int W()  const { return input_pd()->desc()->ndims > 4 ? input_pd()->desc()->dims[4] :
+                                   input_pd()->desc()->ndims > 3 ? input_pd()->desc()->dims[3] : 1; }
+
+protected:
+    binarization_desc_t desc_;
+    const binarization_fwd_pd_t *hint_fwd_pd_;
+};
+
+}
+}
+
+#endif
+
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/binary_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/binary_convolution.cpp
new file mode 100644
index 000000000..76d5531c5
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/binary_convolution.cpp
@@ -0,0 +1,120 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <assert.h>
+#include "mkldnn.h"
+
+#include "c_types_map.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
+
+using namespace mkldnn::impl;
+using namespace mkldnn::impl::utils;
+using namespace mkldnn::impl::status;
+using namespace mkldnn::impl::prop_kind;
+using namespace mkldnn::impl::alg_kind;
+using namespace mkldnn::impl::types;
+
+namespace mkldnn {
+namespace impl {
+status_t bin_conv_desc_init(binary_convolution_desc_t *bin_conv_desc,
+        prop_kind_t prop_kind, alg_kind_t alg_kind,
+        const memory_desc_t *src_desc, const memory_desc_t *weights_desc,
+        const memory_desc_t *dst_desc,
+        const dims_t strides, const dims_t dilates,
+        const dims_t padding_l, const dims_t padding_r,
+        float pad_value) {
+    bool args_ok = true
+        && !any_null(bin_conv_desc, src_desc, weights_desc, dst_desc, strides,
+                padding_l)
+        && one_of(alg_kind, binary_convolution_direct)
+        && one_of(pad_value, -1.f, 0.f, 1.f);
+    if (!args_ok) return invalid_arguments;
+
+    if (padding_r == nullptr) padding_r = padding_l;
+
+    auto bcd = binary_convolution_desc_t();
+    bcd.primitive_kind = primitive_kind::binary_convolution;
+    bcd.prop_kind = prop_kind;
+    bcd.alg_kind = alg_kind;
+
+    bcd.src_desc = zero_md();
+    bcd.dst_desc = zero_md();
+    bcd.weights_desc = zero_md();
+
+    const bool with_groups = weights_desc->ndims == src_desc->ndims + 1;
+
+    bcd.src_desc = *src_desc;
+    bcd.dst_desc = *dst_desc;
+    bcd.weights_desc = *weights_desc;
+
+    int sp_dims = src_desc->ndims - 2;
+    utils::array_copy(bcd.strides, strides, sp_dims);
+    utils::array_copy(bcd.padding[0], padding_l, sp_dims);
+    utils::array_copy(bcd.padding[1], padding_r, sp_dims);
+    if (dilates)
+        utils::array_copy(bcd.dilates, dilates, sp_dims);
+    else
+        utils::array_set(bcd.dilates, 0, sp_dims);
+
+    bcd.pad_value = pad_value;
+    bcd.accum_data_type = types::default_accum_data_type(src_desc->data_type,
+            weights_desc->data_type, dst_desc->data_type, prop_kind);
+
+    bool consistency = true
+        && memory_desc_wrapper(weights_desc).nelems()
+        && src_desc->ndims == dst_desc->ndims
+        && utils::one_of(src_desc->ndims, 3, 4, 5)
+        && utils::one_of(weights_desc->ndims, src_desc->ndims, src_desc->ndims + 1)
+        && src_desc->dims[0] == dst_desc->dims[0];
+    for (int i = 2; i < src_desc->ndims; ++i)
+    {
+        int src = src_desc->dims[i];
+        int ker = weights_desc->dims[with_groups + i];
+        int dil = bcd.dilates[i - 2];
+        int pad_l = padding_l[i - 2];
+        int pad_r = padding_r[i - 2];
+        int str = strides[i - 2];
+        int dst = dst_desc->dims[i];
+        int ker_range = 1 + (ker - 1) * (dil + 1);
+
+        if (str < 1) return invalid_arguments;
+        consistency = consistency
+            && dil >= 0
+            && pad_l >= 0
+//            && pad_r + str > 0 // TODO: [dmitrygo] Commented as WA to support dw conv fusing
+            && (src - ker_range + pad_l + pad_r) / str + 1 == dst;
+    }
+    if (!consistency) return invalid_arguments;
+
+    *bin_conv_desc = bcd;
+    return success;
+}
+}
+}
+
+status_t mkldnn_dilated_binary_convolution_forward_desc_init(
+        binary_convolution_desc_t *bin_conv_desc, prop_kind_t prop_kind,
+        alg_kind_t alg_kind, const memory_desc_t *src_desc,
+        const memory_desc_t *weights_desc, const memory_desc_t *dst_desc, const dims_t strides,
+        const dims_t dilates, const dims_t padding_l,
+        const dims_t padding_r,
+        const float pad_value) {
+    if (!one_of(prop_kind, forward_training, forward_inference))
+        return invalid_arguments;
+    return mkldnn::impl::bin_conv_desc_init(bin_conv_desc, prop_kind, alg_kind, src_desc,
+            weights_desc, dst_desc, strides, dilates, padding_l, padding_r, pad_value);
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/binary_convolution_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/binary_convolution_pd.hpp
new file mode 100644
index 000000000..22fb486ce
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/binary_convolution_pd.hpp
@@ -0,0 +1,153 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef BINARY_CONVOLUTION_PD_HPP
+#define BINARY_CONVOLUTION_PD_HPP
+
+#include "mkldnn.h"
+
+#include "c_types_map.hpp"
+#include "primitive_desc.hpp"
+#include "memory_pd.hpp"
+#include "utils.hpp"
+
+namespace mkldnn {
+namespace impl {
+
+status_t bin_conv_desc_init(binary_convolution_desc_t *bin_conv_desc,
+        prop_kind_t prop_kind, alg_kind_t alg_kind,
+        const memory_desc_t *src_desc, const memory_desc_t *weights_desc,
+        const memory_desc_t *dst_desc,
+        const dims_t strides, const dims_t dilates,
+        const dims_t padding_l, const dims_t padding_r,
+        padding_kind_t padding_kind);
+
+struct _binary_convolution_fwd_pd_t: public primitive_desc_t {
+    typedef _binary_convolution_fwd_pd_t base_class;
+    typedef _binary_convolution_fwd_pd_t hint_class;
+    typedef binary_convolution_desc_t base_desc_t;
+    static constexpr auto base_pkind = primitive_kind::binary_convolution;
+
+    _binary_convolution_fwd_pd_t(mkldnn::impl::engine_t *engine,
+            const base_desc_t *adesc, const primitive_attr_t *attr,
+            const _binary_convolution_fwd_pd_t *hint_fwd_pd)
+        : primitive_desc_t(engine, attr, base_pkind), desc_(*adesc)
+        , hint_fwd_pd_(hint_fwd_pd) {}
+    virtual ~_binary_convolution_fwd_pd_t() {}
+
+    const base_desc_t *desc() const { return &desc_; }
+    inline const binary_convolution_desc_t *cdesc() const { return &cdesc_(); }
+    virtual const op_desc_t *op_desc() const override
+    { return reinterpret_cast<const op_desc_t *>(this->desc()); }
+    virtual void init_info() override { init_info_bin_conv(this, this->info_); }
+
+    virtual const memory_pd_t *input_pd(int index = 0) const override {
+        switch (index) {
+        case 0: return src_pd();
+        case 1: return weights_pd(index - 1);
+        default: return nullptr;
+        }
+    }
+    virtual const memory_pd_t *output_pd(int index = 0) const override
+    { return index == 0 ? dst_pd() : nullptr; }
+
+    virtual int n_inputs() const override { return 2; }
+    virtual int n_outputs() const override { return 1; }
+
+    virtual status_t query(query_t what, int idx, void *result) const override
+    {
+        switch (what) {
+        case pkind_traits<base_pkind>::query_d:
+            *(const base_desc_t**)result = desc(); break;
+        default: return primitive_desc_t::query(what, idx, result);
+        }
+        return status::success;
+    }
+
+    /* common conv aux functions */
+
+    inline int MB() const { return input_pd()->desc()->dims[0]; }
+
+    inline int IC() const { return input_pd()->desc()->dims[1]; }
+    inline int OC() const { return output_pd()->desc()->dims[1]; }
+    inline int G() const
+    { return with_groups() ? cdesc_().weights_desc.dims[0] : 1; }
+
+    inline int ID() const { return (ndims() == 5) ? input_pd()->desc()->dims[2] : 1; }
+    inline int IH() const { return (ndims() == 3) ? 1 : input_pd()->desc()->dims[ndims()-2]; }
+    inline int IW() const { return input_pd()->desc()->dims[ndims()-1]; }
+    inline int OD() const { return (ndims() == 5) ? output_pd()->desc()->dims[2] : 1; }
+    inline int OH() const { return (ndims() == 3) ? 1 : output_pd()->desc()->dims[ndims()-2]; }
+    inline int OW() const { return output_pd()->desc()->dims[ndims()-1]; }
+    inline int KD() const { return (ndims() == 5)
+        ? cdesc_().weights_desc.dims[2 + with_groups()] : 1; }
+    inline int KH() const
+    { return (ndims() == 3)
+        ? 1 : cdesc_().weights_desc.dims[ndims() - (2 - with_groups())]; }
+    inline int KW() const
+    { return cdesc_().weights_desc.dims[ndims() - (1 - with_groups())]; }
+
+    inline int KSD() const { return (ndims() == 5) ? cdesc_().strides[0] : 1; }
+    inline int KSH() const { return (ndims() == 3)
+        ? 1 : cdesc_().strides[ndims()-4]; }
+    inline int KSW() const { return cdesc_().strides[ndims()-3]; }
+
+    inline int KDD() const { return (ndims() == 5) ? cdesc_().dilates[0] : 0; }
+    inline int KDH() const { return (ndims() == 3)
+        ? 0 : cdesc_().dilates[ndims()-4]; }
+    inline int KDW() const { return cdesc_().dilates[ndims()-3]; }
+
+    inline int padFront() const
+        { return (ndims() == 5) ? cdesc_().padding[0][0] : 0; }
+    inline int padBack() const
+        { return (ndims() == 5) ? cdesc_().padding[1][0] : 0; }
+    inline int padT() const { return (ndims() == 3)
+        ? 0 : cdesc_().padding[0][ndims()-4]; }
+    inline int padB() const { return (ndims() == 3)
+        ? 0 : cdesc_().padding[1][ndims()-4]; }
+    inline int padL() const { return cdesc_().padding[0][ndims()-3]; }
+    inline int padR() const { return cdesc_().padding[1][ndims()-3]; }
+
+    inline float pad_value() const { return cdesc_().pad_value; }
+
+    inline bool with_groups() const
+    { return cdesc_().weights_desc.ndims == cdesc_().src_desc.ndims + 1; }
+
+    inline int ndims() const { return cdesc_().src_desc.ndims; }
+
+    bool has_zero_dim_memory() const {
+        return false
+            || memory_desc_wrapper(cdesc_().src_desc).has_zero_dim()
+            || memory_desc_wrapper(cdesc_().dst_desc).has_zero_dim();
+    }
+
+protected:
+    base_desc_t desc_;
+    const _binary_convolution_fwd_pd_t *hint_fwd_pd_;
+
+    inline const binary_convolution_desc_t &cdesc_() const;
+
+    virtual status_t init() = 0;
+};
+
+using binary_convolution_fwd_pd_t = mkldnn::impl::_binary_convolution_fwd_pd_t;
+
+inline const binary_convolution_desc_t &_binary_convolution_fwd_pd_t::cdesc_() const { return desc_; }
+
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/c_types_map.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/c_types_map.hpp
index 5bc02ae59..939289019 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/c_types_map.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/c_types_map.hpp
@@ -28,7 +28,7 @@ using dims_t = mkldnn_dims_t;
 using strides_t = mkldnn_strides_t;
 
 /* FIXME: to inference from correspoding types */
-using dim_t = int;
+using dim_t = ptrdiff_t;
 using stride_t = ptrdiff_t;
 
 using status_t = mkldnn_status_t;
@@ -60,6 +60,7 @@ namespace prop_kind {
 using alg_kind_t = mkldnn_alg_kind_t;
 namespace alg_kind {
     const alg_kind_t undef = mkldnn_alg_kind_undef;
+    const alg_kind_t convolution_auto = mkldnn_convolution_auto;
     const alg_kind_t convolution_direct = mkldnn_convolution_direct;
     const alg_kind_t convolution_winograd = mkldnn_convolution_winograd;
     const alg_kind_t deconvolution_direct = mkldnn_deconvolution_direct;
@@ -75,6 +76,8 @@ namespace alg_kind {
     const alg_kind_t eltwise_soft_relu = mkldnn_eltwise_soft_relu;
     const alg_kind_t eltwise_logistic = mkldnn_eltwise_logistic;
     const alg_kind_t eltwise_clamp = mkldnn_eltwise_clamp;
+    const alg_kind_t eltwise_exp = mkldnn_eltwise_exp;
+    const alg_kind_t eltwise_not = mkldnn_eltwise_not;
     const alg_kind_t depthwise_scale_shift = mkldnn_depthwise_scale_shift;
     const alg_kind_t depthwise_prelu = mkldnn_depthwise_prelu;
     const alg_kind_t pooling_max = mkldnn_pooling_max;
@@ -89,6 +92,8 @@ namespace alg_kind {
     const alg_kind_t gru_linear_before_reset = mkldnn_gru_linear_before_reset;
     const alg_kind_t roi_pooling_max = mkldnn_roi_pooling_max;
     const alg_kind_t roi_pooling_bilinear = mkldnn_roi_pooling_bilinear;
+    const alg_kind_t binary_convolution_direct = mkldnn_binary_convolution_direct;
+    const alg_kind_t binarization_depthwise = mkldnn_binarization_depthwise;
 }
 
 using data_type_t = mkldnn_data_type_t;
@@ -99,6 +104,7 @@ namespace data_type {
     const data_type_t s16 = mkldnn_s16;
     const data_type_t s8 = mkldnn_s8;
     const data_type_t u8 = mkldnn_u8;
+    const data_type_t bin = mkldnn_bin;
 }
 
 using round_mode_t = mkldnn_round_mode_t;
@@ -107,6 +113,13 @@ namespace round_mode {
     const round_mode_t down = mkldnn_round_down;
 }
 
+using rnn_packed_format_t = mkldnn_rnn_packed_memory_format_t;
+namespace rnn_packed_format {
+    const rnn_packed_format_t undef = mkldnn_packed_format_undef;
+    const rnn_packed_format_t ldigo_p = mkldnn_ldigo_p;
+    const rnn_packed_format_t ldgoi_p = mkldnn_ldgoi_p;
+}
+
 using memory_format_t = mkldnn_memory_format_t;
 namespace memory_format {
     const memory_format_t undef = mkldnn_format_undef;
@@ -116,27 +129,33 @@ namespace memory_format {
     const memory_format_t nc = mkldnn_nc;
     const memory_format_t ncw = mkldnn_ncw;
     const memory_format_t nwc = mkldnn_nwc;
+    const memory_format_t nCw4c = mkldnn_nCw4c;
     const memory_format_t nCw8c = mkldnn_nCw8c;
     const memory_format_t nCw16c = mkldnn_nCw16c;
     const memory_format_t nchw = mkldnn_nchw;
     const memory_format_t nhwc = mkldnn_nhwc;
     const memory_format_t chwn = mkldnn_chwn;
+    const memory_format_t nChw4c = mkldnn_nChw4c;
     const memory_format_t nChw8c = mkldnn_nChw8c;
     const memory_format_t nChw16c = mkldnn_nChw16c;
     const memory_format_t ncdhw = mkldnn_ncdhw;
     const memory_format_t ndhwc = mkldnn_ndhwc;
+    const memory_format_t nCdhw4c = mkldnn_nCdhw4c;
     const memory_format_t nCdhw8c = mkldnn_nCdhw8c;
     const memory_format_t nCdhw16c = mkldnn_nCdhw16c;
     const memory_format_t oi = mkldnn_oi;
     const memory_format_t io = mkldnn_io;
     const memory_format_t oiw = mkldnn_oiw;
     const memory_format_t wio = mkldnn_wio;
+    const memory_format_t Owi4o = mkldnn_Owi4o;
+    const memory_format_t OIw4i4o = mkldnn_OIw4i4o;
     const memory_format_t Owi8o = mkldnn_Owi8o;
     const memory_format_t OIw8i8o = mkldnn_OIw8i8o;
     const memory_format_t OIw8o8i = mkldnn_OIw8o8i;
     const memory_format_t OIw16i16o = mkldnn_OIw16i16o;
     const memory_format_t OIw16o16i = mkldnn_OIw16o16i;
     const memory_format_t Oiw16o = mkldnn_Oiw16o;
+    const memory_format_t Oiw4o = mkldnn_Oiw4o;
     const memory_format_t Owi16o = mkldnn_Owi16o;
     const memory_format_t OIw8i16o2i = mkldnn_OIw8i16o2i;
     const memory_format_t IOw16o16i = mkldnn_IOw16o16i;
@@ -144,20 +163,25 @@ namespace memory_format {
     const memory_format_t oihw = mkldnn_oihw;
     const memory_format_t ihwo = mkldnn_ihwo;
     const memory_format_t hwio = mkldnn_hwio;
+    const memory_format_t iohw = mkldnn_iohw;
     const memory_format_t hwio_s8s8 = mkldnn_hwio_s8s8;
     const memory_format_t dhwio = mkldnn_dhwio;
     const memory_format_t oidhw = mkldnn_oidhw;
+    const memory_format_t OIdhw4i4o = mkldnn_OIdhw4i4o;
+    const memory_format_t Odhwi4o = mkldnn_Odhwi4o;
     const memory_format_t OIdhw8i8o = mkldnn_OIdhw8i8o;
     const memory_format_t OIdhw8o8i = mkldnn_OIdhw8o8i;
     const memory_format_t Odhwi8o = mkldnn_Odhwi8o;
     const memory_format_t OIdhw16i16o = mkldnn_OIdhw16i16o;
     const memory_format_t OIdhw16o16i = mkldnn_OIdhw16o16i;
+    const memory_format_t Oidhw4o = mkldnn_Oidhw4o;
     const memory_format_t Oidhw16o = mkldnn_Oidhw16o;
     const memory_format_t Odhwi16o = mkldnn_Odhwi16o;
     const memory_format_t oIhw8i = mkldnn_oIhw8i;
     const memory_format_t oIhw16i = mkldnn_oIhw16i;
     const memory_format_t oIdhw8i = mkldnn_oIdhw8i;
     const memory_format_t oIdhw16i = mkldnn_oIdhw16i;
+    const memory_format_t OIhw4i4o = mkldnn_OIhw4i4o;
     const memory_format_t OIhw8i8o = mkldnn_OIhw8i8o;
     const memory_format_t OIhw16i16o = mkldnn_OIhw16i16o;
     const memory_format_t OIhw4i16o4i = mkldnn_OIhw4i16o4i;
@@ -168,46 +192,65 @@ namespace memory_format {
     const memory_format_t OIhw8o8i = mkldnn_OIhw8o8i;
     const memory_format_t OIhw16o16i = mkldnn_OIhw16o16i;
     const memory_format_t IOhw16o16i = mkldnn_IOhw16o16i;
+    const memory_format_t Oihw4o = mkldnn_Oihw4o;
     const memory_format_t Oihw16o = mkldnn_Oihw16o;
     const memory_format_t Ohwi8o = mkldnn_Ohwi8o;
+    const memory_format_t Ohwi4o = mkldnn_Ohwi4o;
     const memory_format_t Ohwi16o = mkldnn_Ohwi16o;
     const memory_format_t OhIw8o4i = mkldnn_OhIw8o4i;
+    const memory_format_t OhIw8o32i = mkldnn_OhIw8o32i;
+    const memory_format_t OhIw16o32i = mkldnn_OhIw16o32i;
     const memory_format_t OhIw8o4i_s8s8 = mkldnn_OhIw8o4i_s8s8;
     const memory_format_t goiw = mkldnn_goiw;
+    const memory_format_t gOwi4o = mkldnn_gOwi4o;
+    const memory_format_t gOIw4i4o = mkldnn_gOIw4i4o;
     const memory_format_t gOwi8o = mkldnn_gOwi8o;
     const memory_format_t gOIw8i8o = mkldnn_gOIw8i8o;
     const memory_format_t gOIw8o8i = mkldnn_gOIw8o8i;
     const memory_format_t gOIw16i16o = mkldnn_gOIw16i16o;
     const memory_format_t gOIw16o16i = mkldnn_gOIw16o16i;
     const memory_format_t gOiw16o = mkldnn_gOiw16o;
+    const memory_format_t gOiw4o = mkldnn_gOiw4o;
     const memory_format_t gOwi16o = mkldnn_gOwi16o;
     const memory_format_t gOIw8i16o2i = mkldnn_gOIw8i16o2i;
     const memory_format_t gIOw16o16i = mkldnn_gIOw16o16i;
     const memory_format_t gOIw8o16i2o = mkldnn_gOIw8o16i2o;
     const memory_format_t goihw = mkldnn_goihw;
     const memory_format_t hwigo = mkldnn_hwigo;
+    const memory_format_t giohw = mkldnn_giohw;
     const memory_format_t hwigo_s8s8 = mkldnn_hwigo_s8s8;
+    const memory_format_t gOIhw4i4o = mkldnn_gOIhw4i4o;
     const memory_format_t gOIhw8i8o = mkldnn_gOIhw8i8o;
     const memory_format_t gOIhw16i16o = mkldnn_gOIhw16i16o;
     const memory_format_t gOIhw4i16o4i = mkldnn_gOIhw4i16o4i;
     const memory_format_t gOIhw4i16o4i_s8s8 = mkldnn_gOIhw4i16o4i_s8s8;
+    const memory_format_t gOIhw2i8o4i = mkldnn_gOIhw2i8o4i;
+    const memory_format_t gOIhw2i8o4i_s8s8 = mkldnn_gOIhw2i8o4i_s8s8;
     const memory_format_t gOIhw8i16o2i = mkldnn_gOIhw8i16o2i;
     const memory_format_t gOIdhw8i16o2i = mkldnn_gOIdhw8i16o2i;
     const memory_format_t gOIhw8o16i2o = mkldnn_gOIhw8o16i2o;
+    const memory_format_t gOIhw4o4i = mkldnn_gOIhw4o4i;
+    const memory_format_t gOIhw4o4i_s8s8 = mkldnn_gOIhw4o4i_s8s8;
     const memory_format_t gOIhw8o8i = mkldnn_gOIhw8o8i;
     const memory_format_t gOIhw16o16i = mkldnn_gOIhw16o16i;
     const memory_format_t gIOhw16o16i = mkldnn_gIOhw16o16i;
+    const memory_format_t gOihw4o = mkldnn_gOihw4o;
     const memory_format_t gOihw16o = mkldnn_gOihw16o;
     const memory_format_t gOhwi8o = mkldnn_gOhwi8o;
+    const memory_format_t gOhwi4o = mkldnn_gOhwi4o;
     const memory_format_t gOhwi16o = mkldnn_gOhwi16o;
     const memory_format_t Goihw8g = mkldnn_Goihw8g;
     const memory_format_t Goihw16g = mkldnn_Goihw16g;
+    const memory_format_t Goihw16g_s8s8 = mkldnn_Goihw16g_s8s8;
     const memory_format_t goidhw = mkldnn_goidhw;
+    const memory_format_t gOIdhw4i4o = mkldnn_gOIdhw4i4o;
+    const memory_format_t gOdhwi4o = mkldnn_gOdhwi4o;
     const memory_format_t gOIdhw8i8o = mkldnn_gOIdhw8i8o;
     const memory_format_t gOIdhw8o8i = mkldnn_gOIdhw8o8i;
     const memory_format_t gOdhwi8o = mkldnn_gOdhwi8o;
     const memory_format_t gOIdhw16i16o = mkldnn_gOIdhw16i16o;
     const memory_format_t gOIdhw16o16i = mkldnn_gOIdhw16o16i;
+    const memory_format_t gOidhw4o = mkldnn_gOidhw4o;
     const memory_format_t gOidhw16o = mkldnn_gOidhw16o;
     const memory_format_t gOdhwi16o = mkldnn_gOdhwi16o;
     const memory_format_t gOhIw8o4i = mkldnn_gOhIw8o4i;
@@ -216,11 +259,10 @@ namespace memory_format {
     const memory_format_t tnc = mkldnn_tnc;
     const memory_format_t ldsnc = mkldnn_ldsnc;
     const memory_format_t ldigo = mkldnn_ldigo;
-    const memory_format_t ldigo_p = mkldnn_ldigo_p;
     const memory_format_t ldgoi = mkldnn_ldgoi;
-    const memory_format_t ldgoi_p = mkldnn_ldgoi_p;
     const memory_format_t ldgo = mkldnn_ldgo;
     const memory_format_t wino_fmt = mkldnn_wino_fmt;
+    const memory_format_t rnn_packed = mkldnn_rnn_packed;
 }
 
 using padding_kind_t = mkldnn_padding_kind_t;
@@ -253,9 +295,10 @@ namespace primitive_kind {
     const primitive_kind_t lrn = mkldnn_lrn;
     const primitive_kind_t batch_normalization = mkldnn_batch_normalization;
     const primitive_kind_t inner_product = mkldnn_inner_product;
-    const primitive_kind_t convolution_relu = mkldnn_convolution_relu;
     const primitive_kind_t rnn = mkldnn_rnn;
     const primitive_kind_t roi_pooling = mkldnn_roi_pooling;
+    const primitive_kind_t binary_convolution = mkldnn_binary_convolution;
+    const primitive_kind_t binarization = mkldnn_binarization;
 }
 
 using query_t = mkldnn_query_t;
@@ -286,9 +329,10 @@ namespace query {
     const query_t lrn_d = mkldnn_query_lrn_d;
     const query_t batch_normalization_d = mkldnn_query_batch_normalization_d;
     const query_t inner_product_d = mkldnn_query_inner_product_d;
-    const query_t convolution_relu_d = mkldnn_query_convolution_relu_d;
     const query_t rnn_d = mkldnn_query_rnn_d;
     const query_t roi_pooling_d = mkldnn_query_roi_pooling_d;
+    const query_t binary_convolution_d = mkldnn_query_binary_convolution_d;
+    const query_t binarization_d = mkldnn_query_binarization_d;
 
     const query_t some_pd = mkldnn_query_some_pd;
     const query_t input_pd = mkldnn_query_input_pd;
@@ -304,6 +348,7 @@ namespace query {
 }
 
 using blocking_desc_t = mkldnn_blocking_desc_t;
+using rnn_packed_data_t = mkldnn_rnn_packed_desc_t;
 using wino_data_t = mkldnn_wino_desc_t;
 using memory_desc_t = mkldnn_memory_desc_t;
 using convolution_desc_t = mkldnn_convolution_desc_t;
@@ -315,9 +360,10 @@ using softmax_desc_t = mkldnn_softmax_desc_t;
 using lrn_desc_t = mkldnn_lrn_desc_t;
 using batch_normalization_desc_t = mkldnn_batch_normalization_desc_t;
 using inner_product_desc_t = mkldnn_inner_product_desc_t;
-using convolution_relu_desc_t = mkldnn_convolution_relu_desc_t;
 using roi_pooling_desc_t = mkldnn_roi_pooling_desc_t;
 using depthwise_desc_t = mkldnn_depthwise_desc_t;
+using binary_convolution_desc_t = mkldnn_binary_convolution_desc_t;
+using binarization_desc_t = mkldnn_binarization_desc_t;
 
 using rnn_direction_t = mkldnn_rnn_direction_t;
 using rnn_cell_desc_t = mkldnn_rnn_cell_desc_t;
@@ -340,10 +386,11 @@ struct op_desc_t {
         lrn_desc_t lrn;
         batch_normalization_desc_t batch_normalization;
         inner_product_desc_t inner_product;
-        convolution_relu_desc_t convolution_relu;
         rnn_desc_t rnn;
         roi_pooling_desc_t roi_pooling;
         depthwise_desc_t depthwise;
+        binary_convolution_desc_t binary_convolution;
+        binarization_desc_t binarization;
     };
 
     op_desc_t(const primitive_kind_t &_): kind(_) {}
@@ -365,9 +412,10 @@ struct op_desc_t {
     DECL_CTOR_AND_CONVERTERS(lrn_desc_t, lrn);
     DECL_CTOR_AND_CONVERTERS(batch_normalization_desc_t, batch_normalization);
     DECL_CTOR_AND_CONVERTERS(inner_product_desc_t, inner_product);
-    DECL_CTOR_AND_CONVERTERS(convolution_relu_desc_t, convolution_relu);
     DECL_CTOR_AND_CONVERTERS(rnn_desc_t, rnn);
     DECL_CTOR_AND_CONVERTERS(roi_pooling_desc_t, roi_pooling);
+    DECL_CTOR_AND_CONVERTERS(binary_convolution_desc_t, binary_convolution);
+    DECL_CTOR_AND_CONVERTERS(binarization_desc_t, binarization);
 
 #   undef DECL_CTOR_AND_CONVERTERS
 };
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/convolution.cpp
index 8340220cc..12b956902 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/convolution.cpp
@@ -40,7 +40,7 @@ status_t conv_desc_init(convolution_desc_t *conv_desc,
     bool args_ok = true
         && !any_null(conv_desc, src_desc, weights_desc, dst_desc, strides,
                 padding_l)
-        && one_of(alg_kind, convolution_direct, convolution_winograd)
+        && one_of(alg_kind, convolution_auto, convolution_direct, convolution_winograd)
         && one_of(padding_kind, padding_kind::padding_zero);
     if (!args_ok) return invalid_arguments;
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/convolution_pd.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/convolution_pd.cpp
new file mode 100644
index 000000000..e9b596523
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/convolution_pd.cpp
@@ -0,0 +1,56 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "utils.hpp"
+
+#include "convolution_pd.hpp"
+
+namespace mkldnn {
+namespace impl {
+
+using namespace prop_kind;
+
+memory_desc_t *conv_prop_agnostic_src_d(convolution_desc_t *desc) {
+    return desc->prop_kind == backward_data
+        ? &desc->diff_src_desc : &desc->src_desc;
+}
+
+memory_desc_t *conv_prop_agnostic_wei_d(convolution_desc_t *desc) {
+    return desc->prop_kind == backward_weights
+        ? &desc->diff_weights_desc : &desc->weights_desc;
+}
+
+memory_desc_t *conv_prop_agnostic_bia_d(convolution_desc_t *desc) {
+    return desc->prop_kind == backward_weights
+        ? &desc->diff_bias_desc : &desc->bias_desc;
+}
+
+memory_desc_t *conv_prop_agnostic_dst_d(convolution_desc_t *desc) {
+    return utils::one_of(desc->prop_kind, forward_inference, forward_training)
+        ? &desc->diff_bias_desc : &desc->bias_desc;
+}
+
+const memory_desc_t *conv_prop_agnostic_src_d(const convolution_desc_t *desc)
+{ return conv_prop_agnostic_src_d(const_cast<convolution_desc_t *>(desc)); }
+const memory_desc_t *conv_prop_agnostic_wei_d(const convolution_desc_t *desc)
+{ return conv_prop_agnostic_wei_d(const_cast<convolution_desc_t *>(desc)); }
+const memory_desc_t *conv_prop_agnostic_bia_d(const convolution_desc_t *desc)
+{ return conv_prop_agnostic_bia_d(const_cast<convolution_desc_t *>(desc)); }
+const memory_desc_t *conv_prop_agnostic_dst_d(const convolution_desc_t *desc)
+{ return conv_prop_agnostic_dst_d(const_cast<convolution_desc_t *>(desc)); }
+
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/convolution_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/convolution_pd.hpp
index 90b6629af..99e6e3202 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/convolution_pd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/convolution_pd.hpp
@@ -35,25 +35,28 @@ status_t conv_desc_init(convolution_desc_t *conv_desc,
         const dims_t padding_l, const dims_t padding_r,
         padding_kind_t padding_kind);
 
-template <bool with_relu>
-struct _convolution_fwd_pd_t: public primitive_desc_t {
-    typedef _convolution_fwd_pd_t base_class;
-    typedef _convolution_fwd_pd_t hint_class;
-    typedef typename utils::conditional<with_relu,
-            convolution_relu_desc_t, convolution_desc_t>::type base_desc_t;
-    static constexpr auto base_pkind =
-        utils::conditional_v<with_relu, primitive_kind_t,
-        primitive_kind::convolution_relu, primitive_kind::convolution>::value;
-
-    _convolution_fwd_pd_t(mkldnn::impl::engine_t *engine,
-            const base_desc_t *adesc, const primitive_attr_t *attr,
-            const _convolution_fwd_pd_t *hint_fwd_pd)
+memory_desc_t *conv_prop_agnostic_src_d(convolution_desc_t *desc);
+memory_desc_t *conv_prop_agnostic_wei_d(convolution_desc_t *desc);
+memory_desc_t *conv_prop_agnostic_bia_d(convolution_desc_t *desc);
+memory_desc_t *conv_prop_agnostic_dst_d(convolution_desc_t *desc);
+const memory_desc_t *conv_prop_agnostic_src_d(const convolution_desc_t *desc);
+const memory_desc_t *conv_prop_agnostic_wei_d(const convolution_desc_t *desc);
+const memory_desc_t *conv_prop_agnostic_bia_d(const convolution_desc_t *desc);
+const memory_desc_t *conv_prop_agnostic_dst_d(const convolution_desc_t *desc);
+
+struct convolution_fwd_pd_t: public primitive_desc_t {
+    typedef convolution_fwd_pd_t base_class;
+    typedef convolution_fwd_pd_t hint_class;
+    static constexpr auto base_pkind = primitive_kind::convolution;
+
+    convolution_fwd_pd_t(mkldnn::impl::engine_t *engine,
+            const convolution_desc_t *adesc, const primitive_attr_t *attr,
+            const convolution_fwd_pd_t *hint_fwd_pd)
         : primitive_desc_t(engine, attr, base_pkind), desc_(*adesc)
         , hint_fwd_pd_(hint_fwd_pd) {}
-    virtual ~_convolution_fwd_pd_t() {}
+    virtual ~convolution_fwd_pd_t() {}
 
-    const base_desc_t *desc() const { return &desc_; }
-    inline const convolution_desc_t *cdesc() const { return &cdesc_(); }
+    const convolution_desc_t *desc() const { return &desc_; }
     virtual const op_desc_t *op_desc() const override
     { return reinterpret_cast<const op_desc_t *>(this->desc()); }
     virtual void init_info() override { init_info_conv(this, this->info_); }
@@ -75,7 +78,7 @@ struct _convolution_fwd_pd_t: public primitive_desc_t {
     {
         switch (what) {
         case pkind_traits<base_pkind>::query_d:
-            *(const base_desc_t**)result = desc(); break;
+            *(const convolution_desc_t**)result = desc(); break;
         default: return primitive_desc_t::query(what, idx, result);
         }
         return status::success;
@@ -88,7 +91,7 @@ struct _convolution_fwd_pd_t: public primitive_desc_t {
     inline int IC() const { return input_pd()->desc()->dims[1]; }
     inline int OC() const { return output_pd()->desc()->dims[1]; }
     inline int G() const
-    { return with_groups() ? cdesc_().weights_desc.dims[0] : 1; }
+    { return with_groups() ? desc_.weights_desc.dims[0] : 1; }
 
     inline int ID() const { return (ndims() == 5) ? input_pd()->desc()->dims[2] : 1; }
     inline int IH() const { return (ndims() == 3) ? 1 : input_pd()->desc()->dims[ndims()-2]; }
@@ -97,73 +100,61 @@ struct _convolution_fwd_pd_t: public primitive_desc_t {
     inline int OH() const { return (ndims() == 3) ? 1 : output_pd()->desc()->dims[ndims()-2]; }
     inline int OW() const { return output_pd()->desc()->dims[ndims()-1]; }
     inline int KD() const { return (ndims() == 5)
-        ? cdesc_().weights_desc.dims[2 + with_groups()] : 1; }
+        ? desc_.weights_desc.dims[2 + with_groups()] : 1; }
     inline int KH() const
     { return (ndims() == 3)
-        ? 1 : cdesc_().weights_desc.dims[ndims() - (2 - with_groups())]; }
+        ? 1 : desc_.weights_desc.dims[ndims() - (2 - with_groups())]; }
     inline int KW() const
-    { return cdesc_().weights_desc.dims[ndims() - (1 - with_groups())]; }
+    { return desc_.weights_desc.dims[ndims() - (1 - with_groups())]; }
 
-    inline int KSD() const { return (ndims() == 5) ? cdesc_().strides[0] : 1; }
+    inline int KSD() const { return (ndims() == 5) ? desc_.strides[0] : 1; }
     inline int KSH() const { return (ndims() == 3)
-        ? 1 : cdesc_().strides[ndims()-4]; }
-    inline int KSW() const { return cdesc_().strides[ndims()-3]; }
+        ? 1 : desc_.strides[ndims()-4]; }
+    inline int KSW() const { return desc_.strides[ndims()-3]; }
 
-    inline int KDD() const { return (ndims() == 5) ? cdesc_().dilates[0] : 0; }
+    inline int KDD() const { return (ndims() == 5) ? desc_.dilates[0] : 0; }
     inline int KDH() const { return (ndims() == 3)
-        ? 0 : cdesc_().dilates[ndims()-4]; }
-    inline int KDW() const { return cdesc_().dilates[ndims()-3]; }
+        ? 0 : desc_.dilates[ndims()-4]; }
+    inline int KDW() const { return desc_.dilates[ndims()-3]; }
 
     inline int padFront() const
-        { return (ndims() == 5) ? cdesc_().padding[0][0] : 0; }
+        { return (ndims() == 5) ? desc_.padding[0][0] : 0; }
     inline int padBack() const
-        { return (ndims() == 5) ? cdesc_().padding[1][0] : 0; }
+        { return (ndims() == 5) ? desc_.padding[1][0] : 0; }
     inline int padT() const { return (ndims() == 3)
-        ? 0 : cdesc_().padding[0][ndims()-4]; }
+        ? 0 : desc_.padding[0][ndims()-4]; }
     inline int padB() const { return (ndims() == 3)
-        ? 0 : cdesc_().padding[1][ndims()-4]; }
-    inline int padL() const { return cdesc_().padding[0][ndims()-3]; }
-    inline int padR() const { return cdesc_().padding[1][ndims()-3]; }
-
-    inline float negative_slope() const;
+        ? 0 : desc_.padding[1][ndims()-4]; }
+    inline int padL() const { return desc_.padding[0][ndims()-3]; }
+    inline int padR() const { return desc_.padding[1][ndims()-3]; }
 
     inline bool with_bias() const
-    { return !memory_desc_wrapper(cdesc_().bias_desc).is_zero(); }
+    { return !memory_desc_wrapper(desc_.bias_desc).is_zero(); }
     inline bool with_groups() const
-    { return cdesc_().weights_desc.ndims == cdesc_().src_desc.ndims + 1; }
+    { return desc_.weights_desc.ndims == desc_.src_desc.ndims + 1; }
 
-    inline int ndims() const { return cdesc_().src_desc.ndims; }
+    inline int ndims() const { return desc_.src_desc.ndims; }
+
+    virtual status_t set_alg_kind(alg_kind_t alg) {
+        if (alg == alg_kind::undef) return status::invalid_arguments;
+        desc_.alg_kind = alg;
+        return status::success;
+    }
 
     bool has_zero_dim_memory() const {
         return false
-            || memory_desc_wrapper(cdesc_().src_desc).has_zero_dim()
-            || memory_desc_wrapper(cdesc_().dst_desc).has_zero_dim();
+            || memory_desc_wrapper(desc_.src_desc).has_zero_dim()
+            || memory_desc_wrapper(desc_.dst_desc).has_zero_dim();
     }
 
-protected:
-    base_desc_t desc_;
-    const _convolution_fwd_pd_t *hint_fwd_pd_;
 
-    inline const convolution_desc_t &cdesc_() const;
+protected:
+    convolution_desc_t desc_;
+    const convolution_fwd_pd_t *hint_fwd_pd_;
 
     virtual status_t init() = 0;
 };
 
-using convolution_fwd_pd_t = mkldnn::impl::_convolution_fwd_pd_t<false>;
-using convolution_relu_fwd_pd_t = mkldnn::impl::_convolution_fwd_pd_t<true>;
-
-template<> inline float convolution_fwd_pd_t::negative_slope() const
-{ return 0.; }
-template<> inline float convolution_relu_fwd_pd_t::negative_slope() const
-{ return desc()->negative_slope; }
-
-template<bool with_relu> inline const
-convolution_desc_t &_convolution_fwd_pd_t<with_relu>::cdesc_() const
-{ return desc_; }
-template<>
-inline const convolution_desc_t &convolution_relu_fwd_pd_t::cdesc_() const
-{ return desc_.convolution_desc; }
-
 struct convolution_bwd_data_pd_t: public primitive_desc_t {
     typedef convolution_bwd_data_pd_t base_class;
     typedef convolution_fwd_pd_t hint_class;
@@ -178,7 +169,6 @@ struct convolution_bwd_data_pd_t: public primitive_desc_t {
     virtual ~convolution_bwd_data_pd_t() {}
 
     const convolution_desc_t *desc() const { return &desc_; }
-    const convolution_desc_t *cdesc() const { return desc(); }
     virtual const op_desc_t *op_desc() const override
     { return reinterpret_cast<const op_desc_t *>(this->desc()); }
     virtual void init_info() override { init_info_conv(this, this->info_); }
@@ -257,6 +247,12 @@ struct convolution_bwd_data_pd_t: public primitive_desc_t {
     inline int ndims() const { return desc_.diff_src_desc.ndims; }
     virtual bool support_bias() const { return false; }
 
+    virtual status_t set_alg_kind(alg_kind_t alg) {
+        if (alg == alg_kind::undef) return status::invalid_arguments;
+        desc_.alg_kind = alg;
+        return status::success;
+    }
+
     bool has_zero_dim_memory() const {
         return false
             || memory_desc_wrapper(desc_.diff_src_desc).has_zero_dim()
@@ -284,7 +280,6 @@ struct convolution_bwd_weights_pd_t: public primitive_desc_t {
     virtual ~convolution_bwd_weights_pd_t() {}
 
     const convolution_desc_t *desc() const { return &desc_; }
-    const convolution_desc_t *cdesc() const { return desc(); }
     virtual const op_desc_t *op_desc() const override
     { return reinterpret_cast<const op_desc_t *>(this->desc()); }
     virtual void init_info() override { init_info_conv(this, this->info_); }
@@ -372,6 +367,12 @@ struct convolution_bwd_weights_pd_t: public primitive_desc_t {
 
     inline int ndims() const { return desc_.src_desc.ndims; }
 
+    virtual status_t set_alg_kind(alg_kind_t alg) {
+        if (alg == alg_kind::undef) return status::invalid_arguments;
+        desc_.alg_kind = alg;
+        return status::success;
+    }
+
     bool has_zero_dim_memory() const {
         return false
             || memory_desc_wrapper(desc_.src_desc).has_zero_dim()
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/convolution_relu.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/convolution_relu.cpp
deleted file mode 100644
index 1df198f29..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/common/convolution_relu.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-/*******************************************************************************
-* Copyright 2016-2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#include <assert.h>
-#include "mkldnn.h"
-
-#include "c_types_map.hpp"
-#include "type_helpers.hpp"
-#include "utils.hpp"
-
-using namespace mkldnn::impl;
-using namespace mkldnn::impl::utils;
-using namespace mkldnn::impl::status;
-using namespace mkldnn::impl::prop_kind;
-using namespace mkldnn::impl::alg_kind;
-
-status_t mkldnn_convolution_relu_desc_init(
-        convolution_relu_desc_t *conv_relu_desc,
-        const convolution_desc_t *conv_desc, float negative_slope) {
-    bool args_ok = !any_null(conv_relu_desc, conv_desc)
-        && utils::one_of(conv_desc->prop_kind, prop_kind::forward_training,
-                prop_kind::forward_inference);
-    if (!args_ok) return invalid_arguments;
-    conv_relu_desc->primitive_kind = primitive_kind::convolution_relu;
-    conv_relu_desc->convolution_desc = *conv_desc;
-    conv_relu_desc->negative_slope = negative_slope;
-    return success;
-}
-
-// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/deconvolution_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/deconvolution_pd.hpp
index ba699c527..a98a749ba 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/deconvolution_pd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/deconvolution_pd.hpp
@@ -39,7 +39,6 @@ struct deconvolution_fwd_pd_t : public primitive_desc_t {
     virtual ~deconvolution_fwd_pd_t() {}
 
     const deconvolution_desc_t *desc() const { return &desc_; }
-    inline const deconvolution_desc_t *cdesc() const { return &desc_; }
     virtual const op_desc_t *op_desc() const override {
         return reinterpret_cast<const op_desc_t *>(this->desc());
     }
@@ -118,6 +117,12 @@ struct deconvolution_fwd_pd_t : public primitive_desc_t {
     }
     inline int ndims() const { return desc_.src_desc.ndims; }
 
+    bool has_zero_dim_memory() const {
+        return false
+            || memory_desc_wrapper(desc_.src_desc).has_zero_dim()
+            || memory_desc_wrapper(desc_.dst_desc).has_zero_dim();
+    }
+
 protected:
     deconvolution_desc_t desc_;
     const deconvolution_fwd_pd_t *hint_fwd_pd_;
@@ -138,7 +143,6 @@ struct deconvolution_bwd_data_pd_t : public primitive_desc_t {
     virtual ~deconvolution_bwd_data_pd_t() {}
 
     const deconvolution_desc_t *desc() const { return &desc_; }
-    const deconvolution_desc_t *cdesc() const { return desc(); }
     virtual const op_desc_t *op_desc() const override {
         return reinterpret_cast<const op_desc_t *>(this->desc());
     }
@@ -214,7 +218,7 @@ struct deconvolution_bwd_data_pd_t : public primitive_desc_t {
     inline bool with_groups() const {
         return desc_.weights_desc.ndims == desc_.diff_src_desc.ndims + 1;
     }
-    inline int ndims() const { return desc_.src_desc.ndims; }
+    inline int ndims() const { return desc_.diff_src_desc.ndims; }
 
 protected:
     deconvolution_desc_t desc_;
@@ -236,7 +240,6 @@ struct deconvolution_bwd_weights_pd_t : public primitive_desc_t {
     virtual ~deconvolution_bwd_weights_pd_t() {}
 
     const deconvolution_desc_t *desc() const { return &desc_; }
-    const deconvolution_desc_t *cdesc() const { return desc(); }
     virtual const op_desc_t *op_desc() const override {
         return reinterpret_cast<const op_desc_t *>(this->desc());
     }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/depthwise.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/depthwise.cpp
index 1a8220ef4..d206c365c 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/depthwise.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/depthwise.cpp
@@ -39,7 +39,7 @@ status_t depthwise_desc_init(depthwise_desc_t *depthwise_desc, prop_kind_t prop_
         && one_of(alg_kind, depthwise_scale_shift, depthwise_prelu);
     if (!args_ok) return invalid_arguments;
 
-    depthwise_desc_t dd = {};
+    auto dd = depthwise_desc_t();
     dd.primitive_kind = primitive_kind::depthwise;
     dd.prop_kind = prop_kind;
     dd.alg_kind = alg_kind;
@@ -62,7 +62,7 @@ status_t depthwise_desc_init(depthwise_desc_t *depthwise_desc, prop_kind_t prop_
 
 status_t mkldnn_depthwise_forward_desc_init(depthwise_desc_t *depthwise_desc,
         prop_kind_t prop_kind, alg_kind_t alg_kind,
-        const memory_desc_t *src_desc, const memory_desc_t *dst_desc,  const memory_desc_t *weights_desc,
+        const memory_desc_t *src_desc, const memory_desc_t *dst_desc, const memory_desc_t *weights_desc,
         const memory_desc_t *bias_desc) {
     if (!one_of(prop_kind, forward_training, forward_inference))
         return invalid_arguments;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/eltwise.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/eltwise.cpp
index 815d2d7e7..5d9a6dd1d 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/eltwise.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/eltwise.cpp
@@ -39,7 +39,7 @@ status_t eltwise_desc_init(eltwise_desc_t *eltwise_desc, prop_kind_t prop_kind,
         && one_of(alg_kind, eltwise_relu, eltwise_tanh, eltwise_elu,
                   eltwise_square, eltwise_abs, eltwise_sqrt, eltwise_linear,
                   eltwise_bounded_relu, eltwise_soft_relu, eltwise_logistic,
-                  eltwise_clamp)
+                  eltwise_clamp, eltwise_exp, eltwise_not)
         && IMPLICATION(prop_kind == backward_data, diff_data_desc != nullptr);
     if (!args_ok) return invalid_arguments;
 
@@ -54,7 +54,6 @@ status_t eltwise_desc_init(eltwise_desc_t *eltwise_desc, prop_kind_t prop_kind,
 
     ed.alpha = alpha;
     ed.beta = beta;
-    ed.negative_slope = ed.alpha;
 
     bool consistency = true
         && IMPLICATION(ed.prop_kind == backward_data,
@@ -83,19 +82,4 @@ status_t mkldnn_eltwise_backward_desc_init(eltwise_desc_t *eltwise_desc,
             diff_data_desc, alpha, beta);
 }
 
-status_t mkldnn_relu_forward_desc_init(eltwise_desc_t *relu_desc,
-        prop_kind_t prop_kind, const memory_desc_t *data_desc,
-        float negative_slope) {
-    return mkldnn_eltwise_forward_desc_init(relu_desc, prop_kind, eltwise_relu,
-            data_desc, negative_slope, 0.);
-}
-
-status_t mkldnn_relu_backward_desc_init(eltwise_desc_t *relu_desc,
-        const memory_desc_t *diff_data_desc, const memory_desc_t *data_desc,
-        float negative_slope) {
-    return mkldnn_eltwise_backward_desc_init(relu_desc, eltwise_relu,
-            diff_data_desc, data_desc, negative_slope, 0.);
-}
-
-
 // vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/eltwise_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/eltwise_pd.hpp
index bf457a998..16120e5c3 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/eltwise_pd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/eltwise_pd.hpp
@@ -72,10 +72,8 @@ struct eltwise_fwd_pd_t: public primitive_desc_t {
     inline int W() const { return input_pd()->desc()->ndims == 4
         ? input_pd()->desc()->dims[3] : input_pd()->desc()->dims[4]; }
 
-    inline bool is_zero_preserved() const {
-        return !utils::one_of(desc_.alg_kind, alg_kind::eltwise_linear,
-        alg_kind::eltwise_soft_relu, alg_kind::eltwise_logistic, alg_kind::eltwise_clamp);
-    }
+    inline bool is_zero_preserved() const
+    { return math::eltwise_fwd_preserves_zero(desc_.alg_kind); }
 
     bool has_zero_dim_memory() const
     { return memory_desc_wrapper(desc_.data_desc).has_zero_dim(); }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/format_traits.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/format_traits.hpp
index 0a13a3345..7afe129b4 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/format_traits.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/format_traits.hpp
@@ -35,12 +35,17 @@ enum class data_kind_t {
 
 enum class block_format_t {
     _,
+    _4c, _4i, _4o,
     _8c, _8g, _8i, _8o,
-    _8i8o, _8o8i, _8o4i, _8o4i_s8s8,
-    _16c, _16g, _16i, _16o,
+    _4i4o, _4o4i, _4o4i_s8s8,
+    _8i8o, _8o8i,
+    _8o4i, _8o4i_s8s8,
+    _8o32i, _16o32i,
+    _16c, _16g, _16g_s8s8, _16i, _16o,
     _16i16o, _16o16i,
     _8i16o2i, _8o16i2o,
     _4i16o4i, _4i16o4i_s8s8,
+    _2i8o4i, _2i8o4i_s8s8
 };
 
 template <block_format_t f> struct block_format_traits {
@@ -48,15 +53,20 @@ template <block_format_t f> struct block_format_traits {
     static constexpr int levels = f == bf::_
         ? 0
         : utils::one_of(f, bf::_8i16o2i, bf::_8o16i2o,
-                           bf::_4i16o4i, bf::_4i16o4i_s8s8) ? 2 : 1;
+                           bf::_4i16o4i, bf::_4i16o4i_s8s8,
+                           bf::_2i8o4i, bf::_2i8o4i_s8s8) ? 2 : 1;
     static constexpr int blk_ndims = f == bf::_
         ? 0
-        : utils::one_of(f, bf::_8c, bf::_8g, bf::_8i, bf::_8o, bf::_16c,
-                bf::_16g, bf::_16i, bf::_16o) ? 1 : 2;
+        : utils::one_of(f, bf::_4c, bf::_4i, bf::_4o, bf::_8c, bf::_8g, bf::_8i, bf::_8o, bf::_16c,
+                bf::_16g, bf::_16g_s8s8, bf::_16i, bf::_16o) ? 1 : 2;
     static constexpr int blk_size = f == bf::_
         ? 1
-        : utils::one_of(f, bf::_8c, bf::_8g, bf::_8i, bf::_8o, bf::_8i8o,
-                bf::_8o8i, bf::_8o4i, bf::_8o4i_s8s8) ? 8 : 16;
+        : (utils::one_of(f, bf::_4c, bf::_4i, bf::_4o, bf::_4i4o, bf::_4o4i, bf::_4o4i_s8s8) ? 4
+                : (utils::one_of(f, bf::_8c, bf::_8g, bf::_8i, bf::_8o,
+                        bf::_8i8o, bf::_8o8i,
+                        bf::_8o4i, bf::_8o4i_s8s8,
+                        bf::_2i8o4i, bf::_2i8o4i_s8s8,
+                        bf::_8o32i) ? 8 : 16));
 };
 
 template <memory_format_t> struct format_traits {
@@ -64,7 +74,7 @@ template <memory_format_t> struct format_traits {
     // block_format_t blk_fmt;  -- the format of blocks (e.g. 8c or 4i16o4i)
     // int ndims;               -- # of dimensions
     // int ndims_sp;            -- # of spatial dimensions
-    // int blk_size;            -- block size (1, 8, or 16)
+    // int blk_size;            -- block size (1, 4, 8, or 16)
 };
 
 #define DECL_TRAITS(_fmt, _data_kind, _blk_fmt, _ndims, _ndims_sp) \
@@ -87,6 +97,7 @@ DECL_TRAITS(nc, data, _, 2, 0);
 /* data: 3D */
 DECL_TRAITS(ncw, data, _, 3, 1);
 DECL_TRAITS(nwc, data, _, 3, 1);
+DECL_TRAITS(nCw4c, data, _4c, 3, 1);
 DECL_TRAITS(nCw8c, data, _8c, 3, 1);
 DECL_TRAITS(nCw16c, data, _16c, 3, 1);
 
@@ -94,12 +105,14 @@ DECL_TRAITS(nCw16c, data, _16c, 3, 1);
 DECL_TRAITS(nchw, data, _, 4, 2);
 DECL_TRAITS(nhwc, data, _, 4, 2);
 DECL_TRAITS(chwn, data, _, 4, 2);
+DECL_TRAITS(nChw4c, data, _4c, 4, 2);
 DECL_TRAITS(nChw8c, data, _8c, 4, 2);
 DECL_TRAITS(nChw16c, data, _16c, 4, 2);
 
 /* data: 5D */
 DECL_TRAITS(ncdhw, data, _, 5, 3);
 DECL_TRAITS(ndhwc, data, _, 5, 3);
+DECL_TRAITS(nCdhw4c, data, _4c, 5, 3);
 DECL_TRAITS(nCdhw8c, data, _8c, 5, 3);
 DECL_TRAITS(nCdhw16c, data, _16c, 5, 3);
 
@@ -110,11 +123,14 @@ DECL_TRAITS(io, wei, _, 2, 0);
 /* wei: 3D */
 DECL_TRAITS(oiw, wei, _, 3, 1);
 DECL_TRAITS(wio, wei, _, 3, 1);
+DECL_TRAITS(Owi4o, wei, _4o, 3, 1);
+DECL_TRAITS(OIw4i4o, wei, _4i4o, 3, 1);
 DECL_TRAITS(Owi8o, wei, _8o, 3, 1);
 DECL_TRAITS(OIw8i8o, wei, _8i8o, 3, 1);
 DECL_TRAITS(OIw8o8i, wei, _8o8i, 3, 1);
 DECL_TRAITS(OIw16i16o, wei, _16i16o, 3, 1);
 DECL_TRAITS(OIw16o16i, wei, _16o16i, 3, 1);
+DECL_TRAITS(Oiw4o, wei, _4o, 3, 1);
 DECL_TRAITS(Oiw16o, wei, _16o, 3, 1);
 DECL_TRAITS(Owi16o, wei, _16o, 3, 1);
 DECL_TRAITS(OIw8i16o2i, wei, _8i16o2i, 3, 1);
@@ -125,10 +141,14 @@ DECL_TRAITS(OIw8o16i2o, wei, _8o16i2o, 3, 1);
 DECL_TRAITS(oihw, wei, _, 4, 2);
 DECL_TRAITS(ihwo, wei, _, 4, 2);
 DECL_TRAITS(hwio, wei, _, 4, 2);
+DECL_TRAITS(iohw, wei, _, 4, 2);
 DECL_TRAITS(hwio_s8s8, wei, _, 4, 2);
 DECL_TRAITS(oIhw8i, wei, _8i, 4, 2);
 DECL_TRAITS(oIhw16i, wei, _16i, 4, 2);
+DECL_TRAITS(OIhw4i4o, wei, _4i4o, 4, 2);
 DECL_TRAITS(OIhw8i8o, wei, _8i8o, 4, 2);
+DECL_TRAITS(OhIw8o32i, wei, _8o32i, 4, 2);
+DECL_TRAITS(OhIw16o32i, wei, _16o32i, 4, 2);
 DECL_TRAITS(OhIw8o4i, wei, _8o4i, 4, 2);
 DECL_TRAITS(OhIw8o4i_s8s8, wei, _8o4i_s8s8, 4, 2);
 DECL_TRAITS(OIhw16i16o, wei, _16i16o, 4, 2);
@@ -139,18 +159,23 @@ DECL_TRAITS(OIhw8o16i2o, wei, _8o16i2o, 4, 2);
 DECL_TRAITS(OIhw8o8i, wei, _8o8i, 4, 2);
 DECL_TRAITS(OIhw16o16i, wei, _16o16i, 4, 2);
 DECL_TRAITS(IOhw16o16i, wei, _16o16i, 4, 2);
+DECL_TRAITS(Oihw4o, wei, _4o, 4, 2);
 DECL_TRAITS(Oihw16o, wei, _16o, 4, 2);
 DECL_TRAITS(Ohwi8o, wei, _8o, 4, 2);
+DECL_TRAITS(Ohwi4o, wei, _4o, 4, 2);
 DECL_TRAITS(Ohwi16o, wei, _16o, 4, 2);
 
 /* wei: 5D */
 DECL_TRAITS(dhwio, wei, _, 5, 3);
 DECL_TRAITS(oidhw, wei, _, 5, 3);
+DECL_TRAITS(OIdhw4i4o, wei, _4i4o, 5, 3);
+DECL_TRAITS(Odhwi4o, wei, _4o, 5, 3);
 DECL_TRAITS(OIdhw8i8o, wei, _8i8o, 5, 3);
 DECL_TRAITS(OIdhw8o8i, wei, _8o8i, 5, 3);
 DECL_TRAITS(Odhwi8o, wei, _8o, 5, 3);
 DECL_TRAITS(OIdhw16i16o, wei, _16i16o, 5, 3);
 DECL_TRAITS(OIdhw16o16i, wei, _16o16i, 5, 3);
+DECL_TRAITS(Oidhw4o, wei, _4o, 5, 3);
 DECL_TRAITS(Oidhw16o, wei, _16o, 5, 3);
 DECL_TRAITS(Odhwi16o, wei, _16o, 5, 3);
 DECL_TRAITS(oIdhw8i, wei, _8i, 5, 3);
@@ -159,11 +184,14 @@ DECL_TRAITS(OIdhw8i16o2i, wei, _8i16o2i, 5, 3);
 
 /* gwei: 4D */
 DECL_TRAITS(goiw, gwei, _, 4, 1);
+DECL_TRAITS(gOwi4o, gwei, _4o, 4, 1);
+DECL_TRAITS(gOIw4i4o, gwei, _4i4o, 4, 1);
 DECL_TRAITS(gOwi8o, gwei, _8o, 4, 1);
 DECL_TRAITS(gOIw8i8o, gwei, _8i8o, 4, 1);
 DECL_TRAITS(gOIw8o8i, gwei, _8o8i, 4, 1);
 DECL_TRAITS(gOIw16i16o, gwei, _16i16o, 4, 1);
 DECL_TRAITS(gOIw16o16i, gwei, _16o16i, 4, 1);
+DECL_TRAITS(gOiw4o, gwei, _4o, 4, 1);
 DECL_TRAITS(gOiw16o, gwei, _16o, 4, 1);
 DECL_TRAITS(gOwi16o, gwei, _16o, 4, 1);
 DECL_TRAITS(gOIw8i16o2i, gwei, _8i16o2i, 4, 1);
@@ -173,32 +201,43 @@ DECL_TRAITS(gOIw8o16i2o, gwei, _8o16i2o, 4, 1);
 /* gwei: 5D */
 DECL_TRAITS(goihw, gwei, _, 5, 2);
 DECL_TRAITS(hwigo, gwei, _, 5, 2);
+DECL_TRAITS(giohw, gwei, _, 5, 2);
 DECL_TRAITS(hwigo_s8s8, gwei, _, 5, 2);
+DECL_TRAITS(gOIhw4i4o, gwei, _4i4o, 5, 2);
 DECL_TRAITS(gOIhw8i8o, gwei, _8i8o, 5, 2);
 DECL_TRAITS(gOhIw8o4i, gwei, _8o4i, 5, 2);
 DECL_TRAITS(gOhIw8o4i_s8s8, gwei, _8o4i_s8s8, 5, 2);
 DECL_TRAITS(gOIhw16i16o, gwei, _16i16o, 5, 2);
 DECL_TRAITS(gOIhw4i16o4i, gwei, _4i16o4i, 5, 2);
 DECL_TRAITS(gOIhw4i16o4i_s8s8, gwei, _4i16o4i_s8s8, 5, 2);
+DECL_TRAITS(gOIhw2i8o4i, gwei, _2i8o4i, 5, 2);
+DECL_TRAITS(gOIhw2i8o4i_s8s8, gwei, _2i8o4i_s8s8, 5, 2);
 DECL_TRAITS(gOIhw8i16o2i, gwei, _8i16o2i, 5, 2);
 DECL_TRAITS(gOIdhw8i16o2i, gwei, _8i16o2i, 5, 2);
 DECL_TRAITS(gOIhw8o16i2o, gwei, _8o16i2o, 5, 2);
 DECL_TRAITS(gOIhw8o8i, gwei, _8o8i, 5, 2);
+DECL_TRAITS(gOIhw4o4i, gwei, _4o4i, 5, 2);
+DECL_TRAITS(gOIhw4o4i_s8s8, gwei, _4o4i_s8s8, 5, 2);
 DECL_TRAITS(gOIhw16o16i, gwei, _16o16i, 5, 2);
 DECL_TRAITS(gIOhw16o16i, gwei, _16o16i, 5, 2);
+DECL_TRAITS(gOihw4o, gwei, _4o, 5, 2);
 DECL_TRAITS(gOihw16o, gwei, _16o, 5, 2);
 DECL_TRAITS(gOhwi8o, gwei, _8o, 5, 2);
+DECL_TRAITS(gOhwi4o, gwei, _4o, 5, 2);
 DECL_TRAITS(gOhwi16o, gwei, _16o, 5, 2);
 DECL_TRAITS(Goihw8g, gwei, _8g, 5, 2);
 DECL_TRAITS(Goihw16g, gwei, _16g, 5, 2);
+DECL_TRAITS(Goihw16g_s8s8, gwei, _16g_s8s8, 5, 2);
 
 /* gwei: 6D */
 DECL_TRAITS(goidhw, gwei, _, 6, 3);
+DECL_TRAITS(gOIdhw4i4o, gwei, _4i4o, 6, 3);
 DECL_TRAITS(gOIdhw8i8o, gwei, _8i8o, 6, 3);
 DECL_TRAITS(gOIdhw8o8i, gwei, _8o8i, 6, 3);
 DECL_TRAITS(gOdhwi8o, gwei, _8o, 6, 3);
 DECL_TRAITS(gOIdhw16i16o, gwei, _16i16o, 6, 3);
 DECL_TRAITS(gOIdhw16o16i, gwei, _16o16i, 6, 3);
+DECL_TRAITS(gOidhw4o, gwei, _4o, 6, 3);
 DECL_TRAITS(gOidhw16o, gwei, _16o, 6, 3);
 DECL_TRAITS(gOdhwi16o, gwei, _16o, 6, 3);
 
@@ -216,21 +255,28 @@ DECL_TRAITS(ldgo, rnn, _, 4, 0);
 template <block_format_t f>
 constexpr int OI_blk_off(int oc, int ic) {
     using bf = block_format_t;
-    static_assert(utils::one_of(f, bf::_8i8o, bf::_8o8i, bf::_8o4i, bf::_8o4i_s8s8,
-                bf::_16i16o, bf::_16o16i, bf::_8i16o2i, bf::_8o16i2o,
-                bf::_4i16o4i, bf::_4i16o4i_s8s8),
+    static_assert(utils::one_of(f, bf::_4i4o, bf::_4o4i, bf::_4o4i_s8s8,
+                bf::_8i8o, bf::_8o8i, bf::_16i16o,
+                bf::_16o16i, bf::_8i16o2i, bf::_8o16i2o,
+                bf::_4i16o4i, bf::_4i16o4i_s8s8,
+                bf::_2i8o4i, bf::_2i8o4i_s8s8,
+                bf::_8o4i, bf::_8o4i_s8s8,
+                bf::_8o32i, bf::_16o32i),
             "unexpected blocked format");
 #   define blksize block_format_traits<f>::blk_size
     return f == bf::_8i16o2i
         ? (ic / 2) * blksize * 2 + 2 * oc + ic % 2
-        : (f == bf::_4i16o4i || f == bf::_4i16o4i_s8s8)
+        : (f == bf::_4i16o4i || f == bf::_4i16o4i_s8s8
+        || f == bf::_2i8o4i || f == bf::_2i8o4i_s8s8)
         ? (ic / 4) * blksize * 4 + oc * 4 + ic % 4
         : f == bf::_8o16i2o
         ? (oc / 2) * blksize * 2 + 2 * ic + oc % 2
-        : utils::one_of(f, bf::_8i8o, bf::_16i16o)
+        : utils::one_of(f, bf::_4i4o, bf::_8i8o, bf::_16i16o)
         ? ic * blksize + oc
         : (f == bf::_8o4i || f == bf::_8o4i_s8s8)
         ? (ic / 4) * blksize * 4 + 4 * oc + ic % 4
+        : (f == bf::_8o32i || f == bf::_16o32i)
+        ? 32 * oc + 32
         : oc * blksize + ic;
 #   undef blksize // if only we program in C++14...
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/math_utils.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/math_utils.hpp
index 0ae70935c..6e2e285d4 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/math_utils.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/math_utils.hpp
@@ -22,6 +22,7 @@
 
 #include "utils.hpp"
 #include "nstl.hpp"
+#include "mkldnn_traits.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -107,118 +108,203 @@ inline int ilog2q(size_t v) {
     return p;
 }
 
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U one_m_square(T x) {
+    return (U)(1 - x) * (1 + x);
+}
+
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U x_m_square(T x) {
+    return (U)(1 - x) * x;
+}
+
 /* activation */
-template <typename T, typename A> inline T relu_fwd(T s, A alpha) {
-    return s > 0 ? s : (T)(s * alpha);
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U relu_fwd(T s, A alpha) {
+    return s > 0 ? s : (U)(s * alpha);
 }
-template <typename T, typename A> inline T relu_bwd(T dd, T s, A alpha) {
-    return s > 0 ? dd : (T)(dd * alpha);
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U relu_bwd(T dd, T s, A alpha) {
+    return s > 0 ? dd : (U)(dd * alpha);
 }
 
-template <typename T> inline T tanh_fwd(T s) {
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U tanh_fwd(T s) {
     const float e = tanhf((float) s);
-    return (T) e;
+    return (U)e;
 }
-template <typename T> inline T tanh_bwd(T dd, T s) {
+
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U tanh_bwd(T dd, T s) {
     const float e = tanh_fwd<float>((float) s);
-    return (T)(dd * (1 - e) * (1 + e));
+    return (U)(dd * (1 - e) * (1 + e));
 }
 
-template <typename T, typename A> inline T elu_fwd(T s, A alpha) {
-    return s > 0 ? s : (T)(alpha * (::expm1f((float)s)));
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U elu_fwd(T s, A alpha) {
+    return s > 0 ? s : (U)(alpha * (::expm1f((float)s)));
 }
-template <typename T, typename A> inline T elu_bwd(T dd, T s, A alpha) {
-    return (T)(dd * (s > 0 ? 1 : alpha * ::expf((float)s)));
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+ inline U elu_bwd(T dd, T s, A alpha) {
+    return (U)(dd * (s > 0 ? 1 : alpha * ::expf((float)s)));
 }
 
-template <typename T>
-inline T square_fwd(T s) {
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U square_fwd(T s) {
     return s * s;
 }
 
-template <typename T>
-inline T square_bwd(T dd, T s) {
-    return dd * 2*s;
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U square_bwd(T dd, T s) {
+    return dd * 2 * s;
 }
 
-template <typename T>
-inline T abs_fwd(T s) {
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U abs_fwd(T s) {
     return s > 0 ? s : -s;
 }
 
-template <typename T>
-inline T abs_bwd(T dd, T s) {
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U abs_bwd(T dd, T s) {
     return s > 0 ? dd : s < 0 ? -dd : 0;
 }
 
-template <typename T>
-inline T sqrt_fwd(T s) {
-    return s > 0 ? (T)(::sqrtf((float)(s))) : 0;
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U sqrt_fwd(T s) {
+    return s > 0 ? (U)(::sqrtf((float)(s))) : 0;
 }
 
-template <typename T>
-inline T sqrt_bwd(T dd, T s) {
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U sqrt_bwd(T dd, T s) {
     return s > 0
-        ? (T)(dd / (2 * ::sqrtf((float)(s))))
+        ? (U)(dd / (2 * ::sqrtf((float)(s))))
         : 0;
 }
 
-template <typename T, typename A>
-inline T linear_fwd(T s, A alpha, A beta) {
-    return (T)(alpha * s + beta);
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U linear_fwd(T s, A alpha, A beta) {
+    return (U)(alpha * s + beta);
 }
 
-template <typename T, typename A>
-inline T linear_bwd(T dd, T s, A alpha, A beta) {
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U linear_bwd(T dd, T s, A alpha, A beta) {
     (void) s;
     (void) beta;
-    return (T)(dd * alpha);
+    return (U)(dd * alpha);
 }
 
-template <typename T, typename A>
-inline T bounded_relu_fwd(T s, A alpha) {
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U bounded_relu_fwd(T s, A alpha) {
     s = s > 0 ? s : 0;
-    return s > alpha ? (T)(alpha) : s;
+    return s > alpha ? (U)(alpha) : s;
 }
 
-template <typename T, typename A>
-inline T bounded_relu_bwd(T dd, T s, A alpha) {
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U bounded_relu_bwd(T dd, T s, A alpha) {
     return dd * (0 < s && s < alpha ? 1 : 0);
 }
 
-template <typename T>
-inline T soft_relu_fwd(T s) {
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U soft_relu_fwd(T s) {
     float max_logf = 8.872284e+01; //::logf(FLT_MAX)
-    return s < max_logf ? (T)(::log1pf(::expf((float)s))) : s;
+    return s < max_logf ? (U)(::log1pf(::expf((float)s))) : s;
 }
 
-template <typename T>
-inline T soft_relu_bwd(T dd, T s) {
-    return (T)(dd / (1 + ::expf((float)(-s))));
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U soft_relu_bwd(T dd, T s) {
+    return (U)(dd / (1 + ::expf((float)(-s))));
 }
 
-template <typename T>
-inline T logistic_fwd(T s) {
-    T v = (T)(::expf((float) -s));
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U logistic_fwd(T s) {
+    U v = (U)(::expf((float) -s));
     return 1 / (1 + v);
 }
 
-template <typename T>
-inline T logistic_bwd(T dd, T s) {
-    T v = logistic_fwd<T>(s);
+template <typename T, typename U = typename utils::remove_reference<T>::type>
+inline U logistic_bwd(T dd, T s) {
+    U v = logistic_fwd<T, U>(s);
     return dd * v * (1 - v);
 }
 
-template <typename T, typename A>
-T clamp_fwd(T s, A alpha, A beta) {
-    return s > alpha ? (T)(alpha) : s < beta ? (T)(beta) : s;
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U clamp_fwd(T s, A alpha, A beta) {
+    return (U)(s > alpha ? alpha : s < beta ? beta : s);
 }
 
-template <typename T, typename A>
-T clamp_bwd(T dd, T s, A alpha, A beta) {
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U clamp_bwd(T dd, T s, A alpha, A beta) {
     return dd * (beta < s && s < alpha ? 1 : 0);
 }
 
+template <typename T,
+         typename U = typename utils::remove_reference<T>::type>
+inline U exp_fwd(T s) {
+    return (U)(::expf((float)s));
+}
+
+template <typename T,
+         typename U = typename utils::remove_reference<T>::type>
+ inline U exp_bwd(T dd, T s) {
+    return (U)(::expf((float)s));
+}
+
+template <typename T,
+        typename U = typename utils::remove_reference<T>::type>
+inline U not_fwd(T s) {
+    return (U)(!s);
+}
+
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U scale_shift_fwd(T s_val, A w_val, A b_val) {
+    return (U)(s_val*w_val + b_val);
+}
+
+template <typename T, typename A,
+         typename U = typename utils::remove_reference<T>::type>
+inline U prelu_fwd(T s_val, A w_val) {
+    return (U)(s_val >= 0 ? s_val : w_val*s_val);
+}
+
+inline bool eltwise_fwd_preserves_zero(alg_kind_t alg, bool jit_impl = false) {
+    using namespace alg_kind;
+    using namespace utils;
+    const bool preserves_zero = true
+        && !one_of(alg, eltwise_linear, eltwise_soft_relu, eltwise_logistic, eltwise_clamp, eltwise_exp, eltwise_not)
+        && IMPLICATION(jit_impl, !one_of(alg, eltwise_elu, eltwise_tanh, eltwise_clamp, eltwise_exp, eltwise_not));
+    return preserves_zero;
+}
+
+inline float get_bias(const char *bias, size_t offset, data_type_t data_type)
+{
+    if (!bias)
+        return 0.0f;
+
+#define CASE(dt) \
+    case dt: return (float)((const prec_traits<dt>::type *)bias)[offset]
+
+    switch (data_type) {
+    CASE(data_type::s8);
+    CASE(data_type::u8);
+    CASE(data_type::s32);
+    CASE(data_type::f32);
+    default: assert(!"unimplemented");
+    }
+    return 0; // never happens (should probably be a NaN)
+#undef CASE
+}
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/memory.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/memory.cpp
index efecc5e97..082901c9f 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/memory.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/memory.cpp
@@ -40,7 +40,7 @@ bool memory_desc_sanity_check(int ndims,const dims_t dims,
     bool ok = true
         && dims != nullptr
         && 0 < ndims && ndims <= TENSOR_MAX_DIMS
-        && one_of(data_type, f32, s32, s16, s8, u8)
+        && one_of(data_type, f32, s32, s16, s8, u8, bin)
         && format != memory_format::undef;
     if (!ok) return false;
     for (int d = 0; d < ndims; ++d)
@@ -77,8 +77,7 @@ status_t mkldnn_memory_desc_init(memory_desc_t *memory_desc, int ndims,
     md.format = format;
 
     status_t status = success;
-    if (one_of(format, memory_format::undef, blocked, ldigo_p, ldgoi_p,
-                wino_fmt)) {
+    if (one_of(format, memory_format::undef, blocked, wino_fmt, rnn_packed)) {
         status = invalid_arguments;
     } else if (format == any) {
         // nop
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/memory_desc_wrapper.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/memory_desc_wrapper.cpp
index 3df9295e1..61d1fd5b4 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/memory_desc_wrapper.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/memory_desc_wrapper.cpp
@@ -63,7 +63,7 @@ inline void set_default_strides(strides_t strides, const dims_t dims,
 
         strides[curr_idx] = dims[curr_idx] == 0
             ? 1
-            : strides[prev_idx] * nstl::max(1, dims[prev_idx]);
+            : strides[prev_idx] * nstl::max((ptrdiff_t)1, dims[prev_idx]);
     }
 }
 
@@ -72,10 +72,26 @@ status_t fill_nonblocked(memory_desc_t &md, const int perm[]) {
     blocking_desc_t &blk = md.layout_desc.blocking;
     array_set(blk.block_dims, 1, ndims);
     array_set(blk.strides[1], 1, ndims);
-    set_default_strides(blk.strides[0], md.dims, ndims, perm);
-    array_copy(blk.padding_dims, md.dims, ndims);
+
+    if (md.format == mkldnn_nhwc && md.data_type == mkldnn_bin) {
+        dims_t padding_dims;
+
+        const dims_t block_dims = {1, 8, 1, 1};
+        for (int d = 0; d < ndims; ++d) {
+            padding_dims[d] = rnd_up(md.dims[d], block_dims[d]);
+        }
+
+        set_default_strides(blk.strides[0], padding_dims, ndims, perm);
+        array_copy(blk.padding_dims, padding_dims, ndims);
+
+    } else {
+        set_default_strides(blk.strides[0], md.dims, ndims, perm);
+        array_copy(blk.padding_dims, md.dims, ndims);
+    }
+
     array_set(blk.offset_padding_to_data, 0, ndims);
     blk.offset_padding = 0;
+
     return success;
 }
 
@@ -126,6 +142,17 @@ status_t fill_nwc(memory_desc_t &md) {
     return fill_nonblocked(md, perm);
 }
 
+status_t fill_nCw4c(memory_desc_t &md) {
+    if (md.ndims != 3) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 4, 1 };
+    const int perm[] = {
+        0, 1, 2,
+        3, 4, 5 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
+
 status_t fill_nCw8c(memory_desc_t &md) {
     if (md.ndims != 3) return invalid_arguments;
 
@@ -195,6 +222,16 @@ status_t fill_chwn(memory_desc_t &md) {
     return fill_nonblocked(md, perm);
 }
 
+status_t fill_nChw4c(memory_desc_t &md) {
+    if (md.ndims != 4) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 4, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3,
+        4, 5, 6, 7 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_nChw8c(memory_desc_t &md) {
     if (md.ndims != 4) return invalid_arguments;
 
@@ -225,6 +262,16 @@ status_t fill_nCdhw16c(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_nCdhw4c(memory_desc_t &md) {
+    if (md.ndims != 5) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 4, 1, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3, 4,
+        5, 6, 7, 8, 9 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_nCdhw8c(memory_desc_t &md) {
     if (md.ndims != 5) return invalid_arguments;
 
@@ -263,6 +310,16 @@ status_t fill_wio(memory_desc_t &md) {
     return fill_nonblocked(md, perm);
 }
 
+status_t fill_Owi4o(memory_desc_t &md) {
+    if (md.ndims != 3) return invalid_arguments;
+
+    const dims_t block_dims = { 4, 1, 1 };
+    const int perm[] = {
+        0, 2, 1,
+        3, 4, 5 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_Owi8o(memory_desc_t &md) {
     if (md.ndims != 3) return invalid_arguments;
 
@@ -283,6 +340,16 @@ status_t fill_OIw8o8i(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_OIw4i4o(memory_desc_t &md) {
+    if (md.ndims != 3) return invalid_arguments;
+
+    const dims_t block_dims = { 4, 4, 1 };
+    const int perm[] = {
+        0, 1, 2,
+        4, 3, 5 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_OIw8i8o(memory_desc_t &md) {
     if (md.ndims != 3) return invalid_arguments;
 
@@ -313,16 +380,26 @@ status_t fill_OIw16o16i(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
-status_t fill_Oiw16o(memory_desc_t &md) {
+status_t fill_Oiw4o(memory_desc_t &md) {
     if (md.ndims != 3) return invalid_arguments;
 
-    const dims_t block_dims = {16, 1, 1};
+    const dims_t block_dims = {4, 1, 1};
     const int perm[] = {
         0, 1, 2,
         3, 4, 5};
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_Oiw16o(memory_desc_t &md) {
+    if (md.ndims != 3) return invalid_arguments;
+
+    const dims_t block_dims = { 16, 1, 1 };
+    const int perm[] = {
+        0, 1, 2,
+        3, 4, 5 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_Owi16o(memory_desc_t &md) {
     if (md.ndims != 3) return invalid_arguments;
 
@@ -384,6 +461,13 @@ status_t fill_hwio(memory_desc_t &md) {
     return fill_nonblocked(md, perm);
 }
 
+status_t fill_iohw(memory_desc_t &md) {
+    if (md.ndims != 4) return invalid_arguments;
+
+    const int perm[4] = {1, 0, 2, 3};
+    return fill_nonblocked(md, perm);
+}
+
 status_t fill_dhwio(memory_desc_t &md) {
     if (md.ndims != 5) return invalid_arguments;
 
@@ -391,6 +475,16 @@ status_t fill_dhwio(memory_desc_t &md) {
     return fill_nonblocked(md, perm);
 }
 
+status_t fill_OIhw4i4o(memory_desc_t &md) {
+    if (md.ndims != 4) return invalid_arguments;
+
+    const dims_t block_dims = { 4, 4, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3,
+        5, 4, 6, 7 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_OIhw8i8o(memory_desc_t &md) {
     if (md.ndims != 4) return invalid_arguments;
 
@@ -421,6 +515,16 @@ status_t fill_OIdhw16i16o(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_OIdhw4i4o(memory_desc_t &md) {
+    if (md.ndims != 5) return invalid_arguments;
+
+    const dims_t block_dims = { 4, 4, 1, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3, 4,
+        6, 5, 7, 8, 9 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_OIdhw8i8o(memory_desc_t &md) {
     if (md.ndims != 5) return invalid_arguments;
 
@@ -451,6 +555,26 @@ status_t fill_OhIw8o4i(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_OhIw8o32i(memory_desc_t &md) {
+    if (md.ndims != 4) return invalid_arguments;
+
+    const dims_t block_dims = {8, 32, 1, 1};
+    const int perm[] = {
+        0, 2, 1, 3,
+        4, 5, 6, 7};
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
+status_t fill_OhIw16o32i(memory_desc_t &md) {
+    if (md.ndims != 4) return invalid_arguments;
+
+    const dims_t block_dims = {16, 32, 1, 1};
+    const int perm[] = {
+        0, 2, 1, 3,
+        4, 5, 6, 7};
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_OIhw8i16o2i(memory_desc_t &md) {
     if (md.ndims != 4) return invalid_arguments;
 
@@ -531,16 +655,36 @@ status_t fill_OIhw8o16i2o(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
-status_t fill_Oihw16o(memory_desc_t &md) {
+status_t fill_Oihw4o(memory_desc_t &md) {
     if (md.ndims != 4) return invalid_arguments;
 
-    const dims_t block_dims = {16, 1, 1, 1};
+    const dims_t block_dims = {4, 1, 1, 1};
     const int perm[] = {
         0, 1, 2, 3,
         4, 5, 6, 7};
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_Oihw16o(memory_desc_t &md) {
+    if (md.ndims != 4) return invalid_arguments;
+
+    const dims_t block_dims = { 16, 1, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3,
+        4, 5, 6, 7 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
+status_t fill_Oidhw4o(memory_desc_t &md) {
+    if (md.ndims != 5) return invalid_arguments;
+
+    const dims_t block_dims = { 4, 1, 1, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3, 4,
+        5, 6, 7, 8, 9 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_Oidhw16o(memory_desc_t &md) {
     if (md.ndims != 5) return invalid_arguments;
 
@@ -561,16 +705,26 @@ status_t fill_Ohwi8o(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
-status_t fill_Ohwi16o(memory_desc_t &md) {
+status_t fill_Ohwi4o(memory_desc_t &md) {
     if (md.ndims != 4) return invalid_arguments;
 
-    const dims_t block_dims = {16, 1, 1, 1};
+    const dims_t block_dims = {4, 1, 1, 1};
     const int perm[] = {
         0, 2, 3, 1,
         4, 5, 6, 7};
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_Ohwi16o(memory_desc_t &md) {
+    if (md.ndims != 4) return invalid_arguments;
+
+    const dims_t block_dims = { 16, 1, 1, 1 };
+    const int perm[] = {
+        0, 2, 3, 1,
+        4, 5, 6, 7 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_Odhwi16o(memory_desc_t &md) {
     if (md.ndims != 5) return invalid_arguments;
 
@@ -598,23 +752,43 @@ status_t fill_goiw(memory_desc_t &md) {
     return fill_nonblocked(md, perm);
 }
 
-status_t fill_gOwi8o(memory_desc_t &md) {
+status_t fill_gOwi4o(memory_desc_t &md) {
     if (md.ndims != 4) return invalid_arguments;
 
-    const dims_t block_dims = {1, 8, 1, 1};
+    const dims_t block_dims = {1, 4, 1, 1};
     const int perm[] = {
         0, 1, 3, 2,
         4, 5, 6, 7};
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_gOwi8o(memory_desc_t &md) {
+    if (md.ndims != 4) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 8, 1, 1 };
+    const int perm[] = {
+        0, 1, 3, 2,
+        4, 5, 6, 7 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_gOIw8o8i(memory_desc_t &md) {
     if (md.ndims != 4) return invalid_arguments;
 
-    const dims_t block_dims = {1, 8, 8, 1};
+    const dims_t block_dims = { 1, 8, 8, 1 };
     const int perm[] = {
         0, 1, 2, 3,
-        4, 5, 6, 7};
+        4, 5, 6, 7 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
+status_t fill_gOIw4i4o(memory_desc_t &md) {
+    if (md.ndims != 4) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 4, 4, 1 };
+    const int perm[] = {
+        0, 1, 2, 3,
+        4, 6, 5, 7 };
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
@@ -648,6 +822,16 @@ status_t fill_gOIw16o16i(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_gOiw4o(memory_desc_t &md) {
+    if (md.ndims != 4) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 4, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3,
+        4, 5, 6, 7 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_gOiw16o(memory_desc_t &md) {
     if (md.ndims != 4) return invalid_arguments;
 
@@ -712,13 +896,40 @@ status_t fill_hwigo(memory_desc_t &md) {
     return fill_nonblocked(md, perm);
 }
 
+status_t fill_giohw(memory_desc_t &md) {
+    if (md.ndims != 5) return invalid_arguments;
+
+    const int perm[5] = {0, 2, 1, 3, 4};
+    return fill_nonblocked(md, perm);
+}
+
+status_t fill_gOIhw4o4i(memory_desc_t &md) {
+    if (md.ndims != 5) return invalid_arguments;
+
+    const dims_t block_dims = {1, 4, 4, 1, 1};
+    const int perm[] = {
+        0, 1, 2, 3, 4,
+        5, 6, 7, 8, 9};
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
+status_t fill_gOIhw4i4o(memory_desc_t &md) {
+    if (md.ndims != 5) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 4, 4, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3, 4,
+        5, 7, 6, 8, 9 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_gOIhw8i8o(memory_desc_t &md) {
     if (md.ndims != 5) return invalid_arguments;
 
-    const dims_t block_dims = {1, 8, 8, 1, 1};
+    const dims_t block_dims = { 1, 8, 8, 1, 1 };
     const int perm[] = {
         0, 1, 2, 3, 4,
-        5, 7, 6, 8, 9};
+        5, 7, 6, 8, 9 };
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
@@ -742,36 +953,66 @@ status_t fill_gOIdhw16i16o(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
-status_t fill_gOIdhw8i8o(memory_desc_t &md) {
+status_t fill_gOIdhw4i4o(memory_desc_t &md) {
     if (md.ndims != 6) return invalid_arguments;
 
-    const dims_t block_dims = {1, 8, 8, 1, 1, 1};
+    const dims_t block_dims = {1, 4, 4, 1, 1, 1};
     const int perm[] = {
         0, 1, 2, 3, 4, 5,
         6, 8, 7, 9, 10, 11};
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
-status_t fill_gOihw16o(memory_desc_t &md) {
+status_t fill_gOIdhw8i8o(memory_desc_t &md) {
+    if (md.ndims != 6) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 8, 8, 1, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3, 4, 5,
+        6, 8, 7, 9, 10, 11 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
+status_t fill_gOihw4o(memory_desc_t &md) {
     if (md.ndims != 5) return invalid_arguments;
 
-    const dims_t block_dims = {1, 16, 1, 1, 1};
+    const dims_t block_dims = {1, 4, 1, 1, 1};
     const int perm[] = {
         0, 1, 2, 3, 4,
         5, 6, 7, 8, 9};
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
-status_t fill_gOidhw16o(memory_desc_t &md) {
+status_t fill_gOihw16o(memory_desc_t &md) {
+    if (md.ndims != 5) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 16, 1, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3, 4,
+        5, 6, 7, 8, 9 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
+status_t fill_gOidhw4o(memory_desc_t &md) {
     if (md.ndims != 6) return invalid_arguments;
 
-    const dims_t block_dims = {1, 16, 1, 1, 1, 1};
+    const dims_t block_dims = {1, 4, 1, 1, 1, 1};
     const int perm[] = {
         0, 1, 2, 3, 4, 5,
         6, 7, 8, 9, 10, 11};
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_gOidhw16o(memory_desc_t &md) {
+    if (md.ndims != 6) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 16, 1, 1, 1, 1 };
+    const int perm[] = {
+        0, 1, 2, 3, 4, 5,
+        6, 7, 8, 9, 10, 11 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_gOhwi8o(memory_desc_t &md) {
     if (md.ndims != 5) return invalid_arguments;
 
@@ -782,16 +1023,26 @@ status_t fill_gOhwi8o(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
-status_t fill_gOhwi16o(memory_desc_t &md) {
+status_t fill_gOhwi4o(memory_desc_t &md) {
     if (md.ndims != 5) return invalid_arguments;
 
-    const dims_t block_dims = {1, 16, 1, 1, 1};
+    const dims_t block_dims = {1, 4, 1, 1, 1};
     const int perm[] = {
         0, 1, 3, 4, 2,
         5, 6, 7, 8, 9};
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_gOhwi16o(memory_desc_t &md) {
+    if (md.ndims != 5) return invalid_arguments;
+
+    const dims_t block_dims = { 1, 16, 1, 1, 1 };
+    const int perm[] = {
+        0, 1, 3, 4, 2,
+        5, 6, 7, 8, 9 };
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_gOdhwi16o(memory_desc_t &md) {
     if (md.ndims != 6) return invalid_arguments;
 
@@ -822,6 +1073,16 @@ status_t fill_gOIhw4i16o4i(memory_desc_t &md) {
     return fill_contiguous_blocked(md, block_dims, perm);
 }
 
+status_t fill_gOIhw2i8o4i(memory_desc_t &md) {
+    if (md.ndims != 5) return invalid_arguments;
+
+    const dims_t block_dims = {1, 8, 8, 1, 1};
+    const int perm[] = {
+        0, 1, 2, 3, 4,
+        5, 7, 6, 8, 9};
+    return fill_contiguous_blocked(md, block_dims, perm);
+}
+
 status_t fill_gOhIw8o4i(memory_desc_t &md) {
     if (md.ndims != 5) return invalid_arguments;
 
@@ -983,22 +1244,27 @@ status_t memory_desc_wrapper::compute_blocking(memory_desc_t &memory_desc)
     case nc: return fill_nc(memory_desc);
     case ncw: return fill_ncw(memory_desc);
     case nwc: return fill_nwc(memory_desc);
+    case nCw4c: return fill_nCw4c(memory_desc);
     case nCw8c: return fill_nCw8c(memory_desc);
     case nCw16c: return fill_nCw16c(memory_desc);
     case nchw: return fill_nchw(memory_desc);
     case nhwc: return fill_nhwc(memory_desc);
     case chwn: return fill_chwn(memory_desc);
+    case nChw4c: return fill_nChw4c(memory_desc);
     case nChw8c: case oIhw8i: return fill_nChw8c(memory_desc);
     case nChw16c: case oIhw16i: return fill_nChw16c(memory_desc);
     case oi: return fill_oi(memory_desc);
     case io: return fill_io(memory_desc);
     case oiw: return fill_oiw(memory_desc);
     case wio: return fill_wio(memory_desc);
+    case Owi4o: return fill_Owi4o(memory_desc);
+    case OIw4i4o: return fill_OIw4i4o(memory_desc);
     case Owi8o: return fill_Owi8o(memory_desc);
     case OIw8o8i: return fill_OIw8o8i(memory_desc);
     case OIw8i8o: return fill_OIw8i8o(memory_desc);
     case OIw16i16o: return fill_OIw16i16o(memory_desc);
     case OIw16o16i: return fill_OIw16o16i(memory_desc);
+    case Oiw4o: return fill_Oiw4o(memory_desc);
     case Oiw16o: return fill_Oiw16o(memory_desc);
     case Owi16o: return fill_Owi16o(memory_desc);
     case OIw8i16o2i: return fill_OIw8i16o2i(memory_desc);
@@ -1007,12 +1273,16 @@ status_t memory_desc_wrapper::compute_blocking(memory_desc_t &memory_desc)
     case oihw: return fill_oihw(memory_desc);
     case ihwo: return fill_ihwo(memory_desc);
     case hwio: return fill_hwio(memory_desc);
+    case iohw: return fill_iohw(memory_desc);
     case hwio_s8s8: return fill_hwio(memory_desc);
     case dhwio: return fill_dhwio(memory_desc);
+    case OIhw4i4o: return fill_OIhw4i4o(memory_desc);
     case OIhw8i8o: return fill_OIhw8i8o(memory_desc);
     case OIhw16i16o: return fill_OIhw16i16o(memory_desc);
     case OIhw4i16o4i: return fill_OIhw4i16o4i(memory_desc);
     case OhIw8o4i: return fill_OhIw8o4i(memory_desc);
+    case OhIw8o32i: return fill_OhIw8o32i(memory_desc);
+    case OhIw16o32i: return fill_OhIw16o32i(memory_desc);
     case OhIw8o4i_s8s8: return fill_OhIw8o4i(memory_desc);
     case OIhw4i16o4i_s8s8: return fill_OIhw4i16o4i(memory_desc);
     case OIhw8i16o2i: return fill_OIhw8i16o2i(memory_desc);
@@ -1021,15 +1291,20 @@ status_t memory_desc_wrapper::compute_blocking(memory_desc_t &memory_desc)
     case OIhw8o8i: return fill_OIhw8o8i(memory_desc);
     case OIhw16o16i: return fill_OIhw16o16i(memory_desc);
     case IOhw16o16i: return fill_IOhw16o16i(memory_desc);
+    case Oihw4o: return fill_Oihw4o(memory_desc);
     case Oihw16o: return fill_Oihw16o(memory_desc);
     case Ohwi8o: return fill_Ohwi8o(memory_desc);
+    case Ohwi4o: return fill_Ohwi4o(memory_desc);
     case Ohwi16o: return fill_Ohwi16o(memory_desc);
     case goiw: return fill_goiw(memory_desc);
+    case gOwi4o: return fill_gOwi4o(memory_desc);
+    case gOIw4i4o: return fill_gOIw4i4o(memory_desc);
     case gOwi8o: return fill_gOwi8o(memory_desc);
     case gOIw8o8i: return fill_gOIw8o8i(memory_desc);
     case gOIw8i8o: return fill_gOIw8i8o(memory_desc);
     case gOIw16i16o: return fill_gOIw16i16o(memory_desc);
     case gOIw16o16i: return fill_gOIw16o16i(memory_desc);
+    case gOiw4o: return fill_gOiw4o(memory_desc);
     case gOiw16o: return fill_gOiw16o(memory_desc);
     case gOwi16o: return fill_gOwi16o(memory_desc);
     case gOIw8i16o2i: return fill_gOIw8i16o2i(memory_desc);
@@ -1037,41 +1312,55 @@ status_t memory_desc_wrapper::compute_blocking(memory_desc_t &memory_desc)
     case gIOw16o16i: return fill_gIOw16o16i(memory_desc);
     case goihw: return fill_goihw(memory_desc);
     case hwigo: return fill_hwigo(memory_desc);
+    case giohw: return fill_giohw(memory_desc);
     case hwigo_s8s8: return fill_hwigo(memory_desc);
+    case gOIhw4i4o: return fill_gOIhw4i4o(memory_desc);
     case gOIhw8i8o: return fill_gOIhw8i8o(memory_desc);
     case gOIhw16i16o: return fill_gOIhw16i16o(memory_desc);
     case gOIhw4i16o4i: return fill_gOIhw4i16o4i(memory_desc);
     case gOhIw8o4i: return fill_gOhIw8o4i(memory_desc);
     case gOhIw8o4i_s8s8: return fill_gOhIw8o4i(memory_desc);
     case gOIhw4i16o4i_s8s8: return fill_gOIhw4i16o4i(memory_desc);
+    case gOIhw2i8o4i: return fill_gOIhw2i8o4i(memory_desc);
+    case gOIhw2i8o4i_s8s8: return fill_gOIhw2i8o4i(memory_desc);
     case gOIhw8i16o2i: return fill_gOIhw8i16o2i(memory_desc);
     case gOIdhw8i16o2i: return fill_gOIdhw8i16o2i(memory_desc);
     case gOIhw8o16i2o: return fill_gOIhw8o16i2o(memory_desc);
+    case gOIhw4o4i: return fill_gOIhw4o4i(memory_desc);
+    case gOIhw4o4i_s8s8: return fill_gOIhw4o4i(memory_desc);
     case gOIhw8o8i: return fill_gOIhw8o8i(memory_desc);
     case gOIhw16o16i: return fill_gOIhw16o16i(memory_desc);
     case gIOhw16o16i: return fill_gIOhw16o16i(memory_desc);
+    case gOihw4o: return fill_gOihw4o(memory_desc);
     case gOihw16o: return fill_gOihw16o(memory_desc);
     case gOhwi8o: return fill_gOhwi8o(memory_desc);
+    case gOhwi4o: return fill_gOhwi4o(memory_desc);
     case gOhwi16o: return fill_gOhwi16o(memory_desc);
     case Goihw8g: return fill_Goihw8g(memory_desc);
     case Goihw16g: return fill_Goihw16g(memory_desc);
+    case Goihw16g_s8s8: return fill_Goihw16g(memory_desc);
     case ncdhw: return fill_ncdhw(memory_desc);
     case ndhwc: return fill_ndhwc(memory_desc);
     case oidhw: return fill_oidhw(memory_desc);
     case goidhw: return fill_goidhw(memory_desc);
+    case nCdhw4c: return fill_nCdhw4c(memory_desc);
     case nCdhw8c: case oIdhw8i: return fill_nCdhw8c(memory_desc);
     case nCdhw16c: case oIdhw16i: return fill_nCdhw16c(memory_desc);
     case OIdhw16i16o: return fill_OIdhw16i16o(memory_desc);
     case gOIdhw16i16o: return fill_gOIdhw16i16o(memory_desc);
+    case OIdhw4i4o: return fill_OIdhw4i4o(memory_desc);
+    case gOIdhw4i4o: return fill_gOIdhw4i4o(memory_desc);
     case OIdhw8i8o: return fill_OIdhw8i8o(memory_desc);
     case gOIdhw8i8o: return fill_gOIdhw8i8o(memory_desc);
     case OIdhw16o16i: return fill_OIdhw16o16i(memory_desc);
     case gOIdhw16o16i: return fill_gOIdhw16o16i(memory_desc);
     case OIdhw8o8i: return fill_OIdhw8o8i(memory_desc);
     case gOIdhw8o8i: return fill_gOIdhw8o8i(memory_desc);
+    case Oidhw4o: return fill_Oidhw4o(memory_desc);
     case Oidhw16o: return fill_Oidhw16o(memory_desc);
     case Odhwi16o: return fill_Odhwi16o(memory_desc);
     case Odhwi8o: return fill_Odhwi8o(memory_desc);
+    case gOidhw4o: return fill_gOidhw4o(memory_desc);
     case gOidhw16o: return fill_gOidhw16o(memory_desc);
     case gOdhwi16o: return fill_gOdhwi16o(memory_desc);
     case gOdhwi8o: return fill_gOdhwi8o(memory_desc);
@@ -1081,7 +1370,8 @@ status_t memory_desc_wrapper::compute_blocking(memory_desc_t &memory_desc)
     case ldigo: return fill_ldigo(memory_desc);
     case ldgoi: return fill_ldgoi(memory_desc);
     case ldgo: return fill_ldgo(memory_desc);
-    case wino_fmt: return success;
+    case wino_fmt:
+    case rnn_packed: return success;
     default: break;
     }
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/memory_desc_wrapper.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/memory_desc_wrapper.hpp
index 91e18cf1d..7c2f8ef7c 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/memory_desc_wrapper.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/memory_desc_wrapper.hpp
@@ -46,12 +46,16 @@ struct memory_desc_wrapper: public c_compatible {
     memory_format_t format() const { return _md->format; }
     bool is_blocking_desc() const {
         return (format() != memory_format::wino_fmt
+                && format() != memory_format::rnn_packed
                 && format() != memory_format::any
                 && format() != memory_format::undef);
     }
     bool is_wino_desc() const {
         return (format() == memory_format::wino_fmt);
     }
+    bool is_rnn_packed_desc() const {
+        return (format() == memory_format::rnn_packed);
+    }
     const blocking_desc_t &blocking_desc() const {
         assert(is_blocking_desc());
         return _md->layout_desc.blocking;
@@ -60,6 +64,10 @@ struct memory_desc_wrapper: public c_compatible {
         assert(is_wino_desc());
         return _md->layout_desc.wino_desc;
     }
+    const rnn_packed_data_t &rnn_packed_desc() const {
+        assert(is_rnn_packed_desc());
+        return _md->layout_desc.rnn_packed_desc;
+    }
 
     /* some useful function */
 
@@ -67,7 +75,7 @@ struct memory_desc_wrapper: public c_compatible {
      * is true, and the number of data elements otherwise */
     size_t nelems(bool with_padding = false) const {
         if (is_zero()) return 0;
-        return (utils::array_product<int, size_t>(with_padding
+        return (utils::array_product<ptrdiff_t, size_t>(with_padding
                 ? blocking_desc().padding_dims : dims(), ndims()));
     }
 
@@ -85,7 +93,11 @@ struct memory_desc_wrapper: public c_compatible {
     size_t additional_buffer_data_size() const {
         using namespace mkldnn::impl::memory_format;
         return (utils::one_of(format(), hwio_s8s8, hwigo_s8s8,
-                    gOIhw4i16o4i_s8s8, OIhw4i16o4i_s8s8, OhIw8o4i_s8s8, gOhIw8o4i_s8s8))
+                    gOIhw4o4i_s8s8,
+                    gOIhw4i16o4i_s8s8, OIhw4i16o4i_s8s8,
+                    gOIhw2i8o4i_s8s8,
+                    gOhIw8o4i_s8s8, OhIw8o4i_s8s8,
+                    Goihw16g_s8s8))
             ? sizeof(int32_t) : 0;
     }
 
@@ -93,7 +105,11 @@ struct memory_desc_wrapper: public c_compatible {
     bool is_additional_buffer() const {
         using namespace mkldnn::impl::memory_format;
         return (utils::one_of(format(), hwio_s8s8, hwigo_s8s8,
-                    gOIhw4i16o4i_s8s8, OIhw4i16o4i_s8s8, OhIw8o4i_s8s8, gOhIw8o4i_s8s8))
+                    gOIhw4o4i_s8s8,
+                    gOIhw4i16o4i_s8s8, OIhw4i16o4i_s8s8,
+                    gOIhw2i8o4i_s8s8,
+                    gOhIw8o4i_s8s8, OhIw8o4i_s8s8,
+                    Goihw16g_s8s8))
             ? true : false;
     }
 
@@ -103,10 +119,13 @@ struct memory_desc_wrapper: public c_compatible {
         const auto &padding_dims = blocking_desc().padding_dims;
         switch(format()) {
             case hwigo_s8s8:
+            case gOIhw4o4i_s8s8:
+            case gOIhw2i8o4i_s8s8:
             case gOIhw4i16o4i_s8s8:
             case gOhIw8o4i_s8s8:
                 return size_t(padding_dims[0]) * size_t(padding_dims[1])
                     * additional_buffer_data_size();
+            case Goihw16g_s8s8:
             case hwio_s8s8:
             case OIhw4i16o4i_s8s8:
             case OhIw8o4i_s8s8:
@@ -126,11 +145,14 @@ struct memory_desc_wrapper: public c_compatible {
         assert((false
                     || types::format_normalize(format()) == blocked
                     || types::is_format_double_blocked(format())
-                    || format() == wino_fmt)
+                    || format() == wino_fmt
+                    || format() == rnn_packed)
                 && "unknown format");
 
         if (format() == wino_fmt) {
             return wino_desc().size;
+        } else if (format() == rnn_packed) {
+            return rnn_packed_desc().size;
         } else {
             if (blocking_desc().offset_padding != 0) return 0;
 
@@ -147,7 +169,8 @@ struct memory_desc_wrapper: public c_compatible {
                     max_size = nstl::max(max_size,
                             size_t(block * strides[1][d]));
             }
-            return max_size * data_type_size() + additional_buffer_size();;
+
+            return max_size * data_type_size() + additional_buffer_size();
         }
     }
 
@@ -231,6 +254,13 @@ struct memory_desc_wrapper: public c_compatible {
             const int ic_4  = pos[with_groups + 1] % 4;
             phys_offset += 4 * oc_16 + ic_4 - (oc_16 + 16 * ic_4);
         }
+        if (utils::one_of(format(), gOIhw2i8o4i,  gOIhw2i8o4i_s8s8)) {
+            // TODO: Fix temporary workaround for formats with double blocking
+            const bool with_groups = true;
+            const int oc_8 = pos[with_groups + 0] % 8;
+            const int ic_4 = pos[with_groups + 1] % 4;
+            phys_offset += 4 * oc_8 + ic_4 - (oc_8 + 8 * ic_4);
+        }
         if (format() == gOIw8i16o2i || format() == OIw8i16o2i) {
             // TODO: Fix temporary workaround for formats with double blocking
             const bool with_groups = format() == gOIw8i16o2i;
@@ -362,13 +392,18 @@ inline bool memory_desc_wrapper::operator==(const memory_desc_wrapper &rhs)
             && utils::array_cmp(dims(), rhs.dims(), ndims())
             && data_type() == rhs.data_type()
             && ((is_blocking_desc() && rhs.is_blocking_desc())
-                       || (is_wino_desc() && rhs.is_wino_desc()))
+                       || (is_wino_desc() && rhs.is_wino_desc())
+                       || (is_rnn_packed_desc() && rhs.is_rnn_packed_desc()))
             && (is_blocking_desc() ? blocking_desc_is_equal(blocking_desc(),
                                              rhs.blocking_desc(), ndims()) :
                                      true)
             && (is_wino_desc() ? wino_desc_is_equal(
                                          wino_desc(), rhs.wino_desc()) :
-                                 true);
+                                 true)
+            && (is_rnn_packed_desc() ?
+                               rnn_packed_desc_is_equal(rnn_packed_desc(),
+                                       rhs.rnn_packed_desc()) :
+                               true);
 }
 
 inline bool memory_desc_wrapper::similar_to(const memory_desc_wrapper &rhs,
@@ -377,7 +412,8 @@ inline bool memory_desc_wrapper::similar_to(const memory_desc_wrapper &rhs,
     using namespace utils;
     if (utils::one_of(format(), memory_format::undef, memory_format::any))
         return false;
-    if (is_wino_desc() || rhs.is_wino_desc())
+    if (is_wino_desc() || rhs.is_wino_desc() || is_rnn_packed_desc()
+            || rhs.is_rnn_packed_desc())
         return false;
 
     const int ds = dim_start;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/memory_tracking.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/memory_tracking.hpp
new file mode 100644
index 000000000..f47536c30
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/memory_tracking.hpp
@@ -0,0 +1,297 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef MEMORY_TRACKING_HPP
+#define MEMORY_TRACKING_HPP
+
+#include <assert.h>
+#include <unordered_map>
+
+#include "nstl.hpp"
+#include "utils.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace memory_tracking {
+
+/* Memory tracking capabilities
+ *
+ * The main purpose of this header file is to provide uniform way to register
+ * required memory for a scratchpad at a primitive descriptor creation time
+ * and then easily access it having only the base address of the scratchpad.
+ *
+ * Primitives might contain multiple disjoint parts that require temporary
+ * buffers (known as scratchpad) during their execution. A primitive descriptor
+ * should summarize all the needs into one single number -- the buffer size
+ * that would be requested from a user. At execution time, the corresponding
+ * primitive will receive a base pointer to a scratchpad. It then needs to
+ * provide each part of algorithm the corresponding piece of memory. Three main
+ * challenges here are:
+ * 1. Track correct offset (from the base scratchpad address) for each piece
+ * 2. Algorithm might require that different memory pieces to be aligned, so
+ *    the scratchpad size is no more just a sum of size of the corresponding
+ *    subparts.
+ * 3. While a primitive is responsible for its scratchpad, the implementation
+ *    might use some other basic blocks (e.g. cpu_reducer) that also require
+ *    scratchpad memory. So there should be a simple way of passing the
+ *    information back and force between the main algorithm (a primitive) and
+ *    auxiliary stuff that lives completely separately from it (e.g. reducer).
+ *
+ * To address these challenges this header file provides 3 structures:
+ * 1. registry_t  -- the class the stores the information about requested
+ *                   memory. The information includes required size and desired
+ *                   alignment for each piece. This class is also responsible
+ *                   for computing the right offset to a given piece using the
+ *                   base pointer.
+ *                   This class is basically a ledger with all entries.
+ *                   Lives in primitive descriptors.
+ *
+ * 2. registrar_t -- the interface to a registry_t to book memory. Used at
+ *                   primitive descriptor creation time only. Contains a
+ *                   reference to the corresponding *mutable* registry.
+ *                   Always modifiable.
+ *                   Allows chaining (using prefixes).
+ *
+ * 3. grantor_t   -- the interface to a registry_t to access memory. Used at
+ *                   primitive execution time only. Contains a reference to
+ *                   the corresponding *constant* registry and base pointer.
+ *                   Always constant.
+ *                   Allows chaining (using prefixes).
+ *
+ * Both registrar_t and grantor_t allow chaining with extra prefix provided.
+ * The feature is useful when a primitive offload a part of computations to
+ * some other primitives which require their own scratchpad space
+ * (e.g. reducer). Prefixes are used to avoid key collision in cases when
+ * multiple sub-primitive (e.g. multiple reducers) are used.
+ *
+ * A short example below demonstrates how to use aforementioned classes. In it
+ * the main primitive is convolution that uses scratchpad for keeping padded
+ * bias. It also needs a reducer, that needs its own space as well.
+ *
+ *  ``` c++
+ *  struct reducer_t {
+ *      static void init(registrar_t &scratchpad) {
+ *          // preserve space for the reduction (one page aligned)
+ *          scratchpad.book(key_space, sizeof(float) * 980 * 1024, 4096);
+ *      }
+ *
+ *      void exec(const grantor_t &scratchpad) {
+ *          // get the pointer to preserved space. scratchpad came from
+ *          // upper primitive (convolution in this example)
+ *          auto space = scratchpad.get<float>(key_reducer_space);
+ *
+ *          space[:] += ...;
+ *      }
+ *  };
+ *
+ *  struct conv_t {
+ *      struct pd_t {
+ *          void init() {
+ *              registrar_t scratchpad(scratchpad_registry_);
+ *
+ *              // preserve a space for padded bias (using default alignment)
+ *              scratchpad.book(key_conv_padded_bias, 128);
+ *
+ *              // create a proxy registrar for the reducer All entries made
+ *              // by reducer would live in convolution's registry, but would
+ *              // have their own `prefix`, so no interference with conv's
+ *              // buffers.
+ *              registrar_t reducer_scratchpad(scratchpad, prefix_reducer);
+ *
+ *              reducer_t::init(reducer_scratchpad);
+ *          }
+ *
+ *          registry_t scratchpad_registry_;
+ *      }
+ *
+ *      void exec() {
+ *          // get the base pointer to a scratchpad memory from a user
+ *          void *scratchpad_ptr = this->input(MKLDNN_MEM_SCRATCHPAD);
+ *
+ *          // create a grantor to the scratchpad (and provide the base
+ *          // pointer).
+ *          grantor_t scratchpad(pd()->scratchpad_registry_, scratchpad_ptr);
+ *
+ *          // access the padded_bias (need only key name and the grantor)
+ *          auto padded_bias = scratchpad.get<float>(key_conv_padded_bias);
+ *
+ *          // to give the `right` grantor to reducer we need to add the
+ *          // corresponding prefix, so that reducer would be able to access
+ *          // its keys. The call is very similar to the one in pd_t::init
+ *          // with only difference in types: grantor_t vs registrar_t.
+ *          grantor_t reducer_scratchpad(scratchpad, prefix_reducer);
+ *          reducer->exec(reducer_scratchpad);
+ *      }
+ *  };
+ *  ```
+ */
+
+
+/* namespace with common keys and prefixes */
+namespace names {
+enum {
+    key_none = 0,
+    key_bnorm_tmp_mean,
+    key_bnorm_tmp_var,
+    key_bnorm_tmp_diff_ss,
+    key_bnorm_tmp_stats,
+    key_bnorm_reduction,
+    key_concat_iptrs,
+    key_concat_istrides,
+    key_concat_nelems,
+    key_concat_optrs,
+    key_conv_adjusted_scales,
+    key_conv_bia_reduction,
+    key_conv_gemm_col,
+    key_conv_int_dat_in_acc_dt,
+    key_conv_padded_bias,
+    key_conv_rtus_space,
+    key_conv_tr_diff_dst,
+    key_conv_tr_diff_dst_bctx,
+    key_conv_tr_src,
+    key_conv_tr_src_bctx,
+    key_conv_wei_reduction,
+    key_conv_wei_bia_reduction,
+    key_conv_wei_bia_reduction_bctx,
+    key_iprod_int_dat_in_acc_dt,
+    key_reducer_space,
+    key_reducer_space_bctx,
+    key_reorder_wino_plain,
+    key_reorder_wino_transform_space,
+    key_reorder_rnn_weights_quantization,
+    key_reorder_rnn_weights_reduction,
+    key_rnn_space,
+    key_rnn_ptrs_bia,
+    key_rnn_ptrs_wei_layer,
+    key_rnn_ptrs_wei_iter,
+    key_softmax_reduction,
+    key_wino_U,
+    key_wino_V,
+    key_wino_M,
+    key_barrier,
+    key_dw_conv_buffer,
+    key_dw_conv_padded_bias,
+    key_conv_padded_compensation,
+};
+
+enum {
+    prefix_none = 0,
+    prefix_reducer_bia,
+    prefix_reducer_wei,
+};
+}
+
+// level 0: 00 00 00 xxx
+// level 1: 00 00 aa xxx
+// level 2: 00 aa bb xxx
+// level 3: aa bb cc xxx
+// max # of levels: 3 + 1 (base_level)
+// here:
+//      xxx        : [1 ..    MAX_KEY) : key
+//      aa, bb, cc : [1 .. MAX_PREFIX) : prefixes for levels 1, 2, and 3
+
+using key_t = uint32_t;
+enum { MAX_KEY = (1u << 10), MAX_PREFIX = (1u << 7), };
+
+/// generates global key based on a prefix and a local key
+inline key_t make_key(key_t prefix, key_t key) { return prefix + key; }
+
+/// generates global prefix based on the global parent and the local ones
+inline key_t make_prefix(key_t parent_prefix, key_t prefix)
+{ return MAX_PREFIX * parent_prefix + MAX_KEY * prefix; }
+
+struct registrar_t;
+struct grantor_t;
+
+struct registry_t {
+    void book(const key_t &key, size_t size, size_t alignment) {
+        if (size == 0) return;
+        assert(offset_map_.count(key) == 0);
+
+        size = utils::rnd_up(size, minimal_alignment);
+        alignment = nstl::max<size_t>(alignment, minimal_alignment);
+        offset_map_[key] = entry_t{size_, size, alignment};
+
+        size_ += size + alignment - minimal_alignment;
+    }
+
+    void *get(const key_t &key, void *base_ptr) const {
+        if (base_ptr == nullptr) { assert(size() == 0); return nullptr; }
+        if (offset_map_.count(key) != 1) return nullptr;
+
+        const auto &e = offset_map_.at(key);
+        base_ptr = utils::align_ptr<void>(base_ptr, minimal_alignment);
+        char *ptr = (char *)base_ptr + e.offset;
+        return utils::align_ptr<void>(ptr, e.alignment);
+    }
+
+    size_t size() const
+    { return size_ > 0 ? size_ + minimal_alignment - 1 : 0; }
+
+    registrar_t registrar();
+    grantor_t grantor(void *base_ptr) const;
+
+protected:
+    enum { minimal_alignment = 64 };
+    struct entry_t { size_t offset, size, alignment; };
+
+    std::unordered_map<key_t, entry_t> offset_map_;
+    size_t size_ = 0;
+};
+
+struct registrar_t {
+    enum { default_alignment = 64 };
+
+    registrar_t(registry_t &registry): registry_(registry), prefix_(0) {}
+    registrar_t(registrar_t &parent, const key_t &prefix)
+        : registry_(parent.registry_)
+        , prefix_(make_prefix(parent.prefix_, prefix)) {}
+
+    void book(const key_t &key, size_t size,
+            size_t alignment = default_alignment)
+    { registry_.book(make_key(prefix_, key), size, alignment); }
+
+protected:
+    registry_t &registry_;
+    const key_t prefix_;
+};
+
+struct grantor_t {
+    grantor_t(const registry_t &registry, void *base_ptr)
+        : registry_(registry), prefix_(0), base_ptr_(base_ptr) {}
+    grantor_t(const grantor_t &parent, const key_t &prefix)
+        : registry_(parent.registry_)
+        , prefix_(make_prefix(parent.prefix_, prefix))
+        , base_ptr_(parent.base_ptr_) {}
+
+    template <typename T = void> T *get(const key_t &key) const
+    { return (T *)registry_.get(make_key(prefix_, key), base_ptr_); }
+
+protected:
+    const registry_t &registry_;
+    const key_t prefix_;
+    void *base_ptr_;
+};
+
+inline registrar_t registry_t::registrar() { return registrar_t(*this); }
+inline grantor_t registry_t::grantor(void *base_ptr) const
+{ return grantor_t(*this, base_ptr); }
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_debug.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_debug.cpp
index b54848f63..07840081a 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_debug.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_debug.cpp
@@ -42,6 +42,7 @@ const char *mkldnn_dt2str(mkldnn_data_type_t v) {
     if (v == mkldnn_s16) return "s16";
     if (v == mkldnn_s8) return "s8";
     if (v == mkldnn_u8) return "u8";
+    if (v == mkldnn_bin) return "bin";
     assert(!"unknown dt");
     return "unknown dt";
 }
@@ -72,14 +73,14 @@ const char *mkldnn_fmt2str(mkldnn_memory_format_t v) {
     if (v == mkldnn_wio) return "wio";
     if (v == mkldnn_oihw) return "oihw";
     if (v == mkldnn_hwio) return "hwio";
-    if (v == mkldnn_hwio_s8s8) return "hwio_s8s8";
     if (v == mkldnn_ihwo) return "ihwo";
+    if (v == mkldnn_iohw) return "iohw";
     if (v == mkldnn_oidhw) return "oidhw";
     if (v == mkldnn_dhwio) return "dhwio";
     if (v == mkldnn_goiw) return "goiw";
     if (v == mkldnn_goihw) return "goihw";
     if (v == mkldnn_hwigo) return "hwigo";
-    if (v == mkldnn_hwigo_s8s8) return "hwigo_s8s8";
+    if (v == mkldnn_giohw) return "giohw";
     if (v == mkldnn_goidhw) return "goidhw";
     if (v == mkldnn_ntc) return "ntc";
     if (v == mkldnn_tnc) return "tnc";
@@ -87,24 +88,32 @@ const char *mkldnn_fmt2str(mkldnn_memory_format_t v) {
     if (v == mkldnn_ldigo) return "ldigo";
     if (v == mkldnn_ldgoi) return "ldgoi";
     if (v == mkldnn_ldgo) return "ldgo";
+    if (v == mkldnn_nCw4c) return "nCw4c";
     if (v == mkldnn_nCw8c) return "nCw8c";
     if (v == mkldnn_nCw16c) return "nCw16c";
+    if (v == mkldnn_nChw4c) return "nChw4c";
     if (v == mkldnn_nChw8c) return "nChw8c";
     if (v == mkldnn_nChw16c) return "nChw16c";
+    if (v == mkldnn_nCdhw4c) return "nCdhw4c";
     if (v == mkldnn_nCdhw8c) return "nCdhw8c";
     if (v == mkldnn_nCdhw16c) return "nCdhw16c";
+    if (v == mkldnn_Owi4o) return "Owi4o";
+    if (v == mkldnn_OIw4i4o) return "OIw4i4o";
     if (v == mkldnn_Owi8o) return "Owi8o";
     if (v == mkldnn_OIw8i8o) return "OIw8i8o";
     if (v == mkldnn_OIw8o8i) return "OIw8o8i";
     if (v == mkldnn_OIw16i16o) return "OIw16i16o";
     if (v == mkldnn_OIw16o16i) return "OIw16o16i";
+    if (v == mkldnn_Oiw4o) return "Oiw4o";
     if (v == mkldnn_Oiw16o) return "Oiw16o";
     if (v == mkldnn_Owi16o) return "Owi16o";
     if (v == mkldnn_OIw8i16o2i) return "OIw8i16o2i";
     if (v == mkldnn_OIw8o16i2o) return "OIw8o16i2o";
     if (v == mkldnn_IOw16o16i) return "IOw16o16i";
+    if (v == mkldnn_hwio_s8s8) return "hwio_s8s8";
     if (v == mkldnn_oIhw8i) return "oIhw8i";
     if (v == mkldnn_oIhw16i) return "oIhw16i";
+    if (v == mkldnn_OIhw4i4o) return "OIhw4i4o";
     if (v == mkldnn_OIhw8i8o) return "OIhw8i8o";
     if (v == mkldnn_OIhw16i16o) return "OIhw16i16o";
     if (v == mkldnn_OIhw4i16o4i) return "OIhw4i16o4i";
@@ -115,48 +124,69 @@ const char *mkldnn_fmt2str(mkldnn_memory_format_t v) {
     if (v == mkldnn_OIhw16o16i) return "OIhw16o16i";
     if (v == mkldnn_IOhw16o16i) return "IOhw16o16i";
     if (v == mkldnn_Oihw8o) return "Oihw8o";
+    if (v == mkldnn_Oihw4o) return "Oihw4o";
     if (v == mkldnn_Oihw16o) return "Oihw16o";
     if (v == mkldnn_Ohwi8o) return "Ohwi8o";
+    if (v == mkldnn_Ohwi4o) return "Ohwi4o";
     if (v == mkldnn_Ohwi16o) return "Ohwi16o";
     if (v == mkldnn_OhIw16o4i) return "OhIw16o4i";
     if (v == mkldnn_OhIw8o4i) return "OhIw8o4i";
     if (v == mkldnn_OhIw8o4i_s8s8) return "OhIw8o4i_s8s8";
+    if (v == mkldnn_OhIw8o32i) return "OhIw8o32i";
+    if (v == mkldnn_OhIw16o32i) return "OhIw16o32i";
     if (v == mkldnn_oIdhw8i) return "oIdhw8i";
     if (v == mkldnn_oIdhw16i) return "oIdhw16i";
+    if (v == mkldnn_OIdhw4i4o) return "OIdhw4i4o";
+    if (v == mkldnn_Odhwi4o) return "Odhwi4o";
     if (v == mkldnn_OIdhw8i8o) return "OIdhw8i8o";
     if (v == mkldnn_OIdhw8o8i) return "OIdhw8o8i";
     if (v == mkldnn_Odhwi8o) return "Odhwi8o";
     if (v == mkldnn_OIdhw16i16o) return "OIdhw16i16o";
     if (v == mkldnn_OIdhw16o16i) return "OIdhw16o16i";
+    if (v == mkldnn_Oidhw4o) return "Oidhw4o";
     if (v == mkldnn_Oidhw16o) return "Oidhw16o";
     if (v == mkldnn_Odhwi16o) return "Odhwi16o";
     if (v == mkldnn_OIdhw8i16o2i) return "OIdhw8i16o2i";
+    if (v == mkldnn_gOwi4o) return "gOwi4o";
+    if (v == mkldnn_gOIw4i4o) return "gOIw4i4o";
     if (v == mkldnn_gOwi8o) return "gOwi8o";
     if (v == mkldnn_gOIw8o8i) return "gOIw8o8i";
     if (v == mkldnn_gOIw8i8o) return "gOIw8i8o";
     if (v == mkldnn_gOIw16i16o) return "gOIw16i16o";
     if (v == mkldnn_gOIw16o16i) return "gOIw16o16i";
+    if (v == mkldnn_gOiw4o) return "gOiw4o";
     if (v == mkldnn_gOiw16o) return "gOiw16o";
     if (v == mkldnn_gOwi16o) return "gOwi16o";
     if (v == mkldnn_gOIw8i16o2i) return "gOIw8i16o2i";
     if (v == mkldnn_gOIw8o16i2o) return "gOIw8o16i2o";
     if (v == mkldnn_gIOw16o16i) return "gIOw16o16i";
+    if (v == mkldnn_hwigo_s8s8) return "hwigo_s8s8";
+    if (v == mkldnn_gOIhw4i4o) return "gOIhw4i4o";
     if (v == mkldnn_gOIhw8i8o) return "gOIhw8i8o";
     if (v == mkldnn_gOIhw16i16o) return "gOIhw16i16o";
     if (v == mkldnn_gOIhw4i16o4i) return "gOIhw4i16o4i";
     if (v == mkldnn_gOIhw4i16o4i_s8s8) return "gOIhw4i16o4i_s8s8";
+    if (v == mkldnn_gOIhw2i8o4i) return "gOIhw2i8o4i";
+    if (v == mkldnn_gOIhw2i8o4i_s8s8) return "gOIhw2i8o4i_s8s8";
     if (v == mkldnn_gOIhw8i16o2i) return "gOIhw8i16o2i";
     if (v == mkldnn_gOIhw8o16i2o) return "gOIhw8o16i2o";
+    if (v == mkldnn_gOIhw4o4i) return "gOIhw4o4i";
+    if (v == mkldnn_gOIhw4o4i_s8s8) return "gOIhw4o4i_s8s8";
     if (v == mkldnn_gOIhw8o8i) return "gOIhw8o8i";
     if (v == mkldnn_gOIhw16o16i) return "gOIhw16o16i";
     if (v == mkldnn_gIOhw16o16i) return "gIOhw16o16i";
     if (v == mkldnn_gOihw8o) return "gOihw8o";
+    if (v == mkldnn_gOihw4o) return "gOihw4o";
     if (v == mkldnn_gOihw16o) return "gOihw16o";
     if (v == mkldnn_gOhwi8o) return "gOhwi8o";
+    if (v == mkldnn_gOhwi4o) return "gOhwi4o";
     if (v == mkldnn_gOhwi16o) return "gOhwi16o";
     if (v == mkldnn_Goihw8g) return "Goihw8g";
     if (v == mkldnn_Goihw16g) return "Goihw16g";
+    if (v == mkldnn_Goihw16g_s8s8) return "Goihw16g_s8s8";
     if (v == mkldnn_gOhIw16o4i) return "gOhIw16o4i";
+    if (v == mkldnn_gOIdhw4i4o) return "gOIdhw4i4o";
+    if (v == mkldnn_gOdhwi4o) return "gOdhwi4o";
     if (v == mkldnn_gOhIw8o4i) return "gOhIw8o4i";
     if (v == mkldnn_gOhIw8o4i_s8s8) return "gOhIw8o4i_s8s8";
     if (v == mkldnn_gOIdhw8i8o) return "gOIdhw8i8o";
@@ -165,11 +195,11 @@ const char *mkldnn_fmt2str(mkldnn_memory_format_t v) {
     if (v == mkldnn_gOIdhw8i16o2i) return "gOIdhw8i16o2i";
     if (v == mkldnn_gOIdhw16i16o) return "gOIdhw16i16o";
     if (v == mkldnn_gOIdhw16o16i) return "gOIdhw16o16i";
+    if (v == mkldnn_gOidhw4o) return "gOidhw4o";
     if (v == mkldnn_gOidhw16o) return "gOidhw16o";
     if (v == mkldnn_gOdhwi16o) return "gOdhwi16o";
     if (v == mkldnn_wino_fmt) return "wino_fmt";
-    if (v == mkldnn_ldigo_p) return "ldigo_p";
-    if (v == mkldnn_ldgoi_p) return "ldgoi_p";
+    if (v == mkldnn_rnn_packed) return "rnn_packed";
     if (v == mkldnn_format_last) return "format_last";
     assert(!"unknown fmt");
     return "unknown fmt";
@@ -202,21 +232,22 @@ const char *mkldnn_prim_kind2str(mkldnn_primitive_kind_t v) {
     if (v == mkldnn_deconvolution) return "deconvolution";
     if (v == mkldnn_eltwise) return "eltwise";
     if (v == mkldnn_depthwise) return "depthwise";
-    if (v == mkldnn_relu) return "relu";
     if (v == mkldnn_softmax) return "softmax";
     if (v == mkldnn_pooling) return "pooling";
     if (v == mkldnn_lrn) return "lrn";
     if (v == mkldnn_batch_normalization) return "batch_normalization";
     if (v == mkldnn_inner_product) return "inner_product";
-    if (v == mkldnn_convolution_relu) return "convolution_relu";
     if (v == mkldnn_rnn) return "rnn";
     if (v == mkldnn_roi_pooling) return "roi_pooling";
+    if (v == mkldnn_binary_convolution) return "binary_convolution";
+    if (v == mkldnn_binarization) return "binarization";
     assert(!"unknown prim_kind");
     return "unknown prim_kind";
 }
 
 const char *mkldnn_alg_kind2str(mkldnn_alg_kind_t v) {
     if (v == mkldnn_alg_kind_undef) return "undef";
+    if (v == mkldnn_convolution_auto) return "convolution_auto";
     if (v == mkldnn_convolution_direct) return "convolution_direct";
     if (v == mkldnn_convolution_winograd) return "convolution_winograd";
     if (v == mkldnn_eltwise_relu) return "eltwise_relu";
@@ -230,6 +261,8 @@ const char *mkldnn_alg_kind2str(mkldnn_alg_kind_t v) {
     if (v == mkldnn_eltwise_soft_relu) return "eltwise_soft_relu";
     if (v == mkldnn_eltwise_logistic) return "eltwise_logistic";
     if (v == mkldnn_eltwise_clamp) return "eltwise_clamp";
+    if (v == mkldnn_eltwise_exp) return "eltwise_exp";
+    if (v == mkldnn_eltwise_not) return "eltwise_not";
     if (v == mkldnn_pooling_max) return "pooling_max";
     if (v == mkldnn_pooling_avg_include_padding) return "pooling_avg_include_padding";
     if (v == mkldnn_pooling_avg_exclude_padding) return "pooling_avg_exclude_padding";
@@ -246,8 +279,20 @@ const char *mkldnn_alg_kind2str(mkldnn_alg_kind_t v) {
     if (v == mkldnn_depthwise_prelu) return "depthwise_prelu";
     if (v == mkldnn_roi_pooling_max) return "roi_pooling_max";
     if (v == mkldnn_roi_pooling_bilinear) return "roi_pooling_bilinear";
+    if (v == mkldnn_binary_convolution_direct) return "binary_convolution_direct";
+    if (v == mkldnn_binarization_depthwise) return "binarization_depthwise";
     assert(!"unknown alg_kind");
     return "unknown alg_kind";
 }
 
+const char *mkldnn_rnn_direction2str(mkldnn_rnn_direction_t v) {
+    if (v == mkldnn_unidirectional_left2right) return "unidirectional_left2right";
+    if (v == mkldnn_unidirectional_right2left) return "unidirectional_right2left";
+    if (v == mkldnn_bidirectional_concat) return "bidirectional_concat";
+    if (v == mkldnn_bidirectional_sum) return "bidirectional_sum";
+    if (v == mkldnn_unidirectional) return "unidirectional";
+    assert(!"unknown rnn_direction");
+    return "unknown rnn_direction";
+}
+
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_thread.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_thread.hpp
index 9741c21dc..b65ddb1f0 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_thread.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_thread.hpp
@@ -43,6 +43,8 @@ inline int mkldnn_get_thread_num() { return 0; }
 inline int mkldnn_in_parallel() { return 0; }
 inline void mkldnn_thr_barrier() {}
 
+#define PRAGMA_OMP(...)
+
 #elif MKLDNN_THR == MKLDNN_THR_OMP
 #include <omp.h>
 #define MKLDNN_THR_SYNC 1
@@ -55,6 +57,8 @@ inline void mkldnn_thr_barrier() {
 #   pragma omp barrier
 }
 
+#define PRAGMA_OMP(...) PRAGMA_MACRO(CHAIN2(omp, __VA_ARGS__))
+
 #elif MKLDNN_THR == MKLDNN_THR_TBB
 #include "tbb/task_arena.h"
 #include "tbb/parallel_for.h"
@@ -67,6 +71,9 @@ inline int mkldnn_get_thread_num()
 { return tbb::this_task_arena::current_thread_index(); }
 inline int mkldnn_in_parallel() { return 0; }
 inline void mkldnn_thr_barrier() { assert(!"no barrier in TBB"); }
+
+#define PRAGMA_OMP(...)
+
 #endif
 
 /* MSVC still supports omp 2.0 only */
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_thread_parallel_nd.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_thread_parallel_nd.hpp
index 77bf53b66..4a1f48767 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_thread_parallel_nd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_thread_parallel_nd.hpp
@@ -56,9 +56,9 @@ void parallel(int nthr, F f) {
 
 template <typename T0, typename F>
 void for_nd(const int ithr, const int nthr, const T0 &D0, F f) {
-    T0 d0{0}, end{0};
-    balance211(D0, nthr, ithr, d0, end);
-    for (; d0 < end; ++d0) f(d0);
+    T0 start{0}, end{0};
+    balance211(D0, nthr, ithr, start, end);
+    for (T0 d0 = start; d0 < end; ++d0) f(d0);
 }
 
 template <typename T0, typename T1, typename F>
@@ -143,6 +143,13 @@ void for_nd(const int ithr, const int nthr, const T0 &D0, const T1 &D1,
     }
 }
 
+// Skip a lambda function in the parameter pack.
+template <typename T>
+constexpr size_t get_work_amount(const T &v) { return 1; }
+template <typename T, typename ...Args>
+constexpr size_t get_work_amount(const T &v, Args &&...args)
+{ return (size_t)v * get_work_amount(utils::forward<Args>(args)...); }
+
 /* parallel_nd and parallel_nd_in_omp section */
 
 #if MKLDNN_THR != MKLDNN_THR_TBB
@@ -151,9 +158,13 @@ void parallel_nd(Args &&...args) {
 #if MKLDNN_THR == MKLDNN_THR_SEQ
     for_nd(0, 1, utils::forward<Args>(args)...);
 #elif MKLDNN_THR == MKLDNN_THR_OMP
-#   pragma omp parallel
-    for_nd(mkldnn_get_thread_num(), mkldnn_get_num_threads(),
-            utils::forward<Args>(args)...);
+    const bool do_parallel = get_work_amount(utils::forward<Args>(args)...) > 1;
+#   pragma omp parallel if (do_parallel)
+    {
+        const int nthr = !do_parallel ? 1 : mkldnn_get_num_threads();
+        const int ithr = !do_parallel ? 0 : mkldnn_get_thread_num();
+        for_nd(ithr, nthr, utils::forward<Args>(args)...);
+    }
 #endif
 }
 #else // MKLDNN_THR != MKLDNN_THR_TBB
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_traits.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_traits.hpp
index f5512b873..367a02ac4 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_traits.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/mkldnn_traits.hpp
@@ -39,6 +39,7 @@ template <> struct prec_traits<data_type::s32> { typedef int32_t type; };
 template <> struct prec_traits<data_type::s16> { typedef int16_t type; };
 template <> struct prec_traits<data_type::s8> { typedef int8_t type; };
 template <> struct prec_traits<data_type::u8> { typedef uint8_t type; };
+template <> struct prec_traits<data_type::bin> { typedef uint8_t type; };
 
 template <> struct data_traits<float>
 { static constexpr data_type_t data_type = data_type::f32; };
@@ -71,9 +72,10 @@ PKIND_TRAITS_INST(pooling);
 PKIND_TRAITS_INST(lrn);
 PKIND_TRAITS_INST(batch_normalization);
 PKIND_TRAITS_INST(inner_product);
-PKIND_TRAITS_INST(convolution_relu);
 PKIND_TRAITS_INST(rnn);
 PKIND_TRAITS_INST(roi_pooling);
+PKIND_TRAITS_INST(binary_convolution);
+PKIND_TRAITS_INST(binarization);
 #undef PKIND_TRAITS_INST
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/nstl.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/nstl.hpp
index d9d03a561..5e42c3fc9 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/nstl.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/nstl.hpp
@@ -47,8 +47,8 @@ inline const T& min(const T& a, const T& b) {
 
 template<typename T> void swap(T& t1, T& t2) {
     T tmp(t1);
-    t1=t2;
-    t2=tmp;
+    t1 = t2;
+    t2 = tmp;
 }
 
 // Rationale: MKL-DNN needs numeric limits implementation that does not
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/primitive.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/primitive.hpp
index d1c4742d9..e91a627b7 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/primitive.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/primitive.hpp
@@ -52,11 +52,11 @@ struct mkldnn_primitive: public mkldnn::impl::c_compatible {
 
     mkldnn_primitive(const mkldnn::impl::primitive_desc_t *pd,
             const input_vector &inputs, const output_vector &outputs)
-        : pd_(pd)
+        : pd_(pd->clone())
         , inputs_(inputs)
         , outputs_(outputs)
     {}
-    virtual ~mkldnn_primitive() {}
+    virtual ~mkldnn_primitive() { delete pd_; }
 
     /** returns primitive's engine */
     mkldnn::impl::engine_t *engine() const { return pd_->engine(); }
@@ -79,7 +79,7 @@ struct mkldnn_primitive: public mkldnn::impl::c_compatible {
      *   Suppose engine has a task pool and for some reasons submission failed.
      *   In this case primitive will set @p e's state to event::error
      */
-    virtual void execute(mkldnn::impl::event_t *e) = 0;
+    virtual void execute(mkldnn::impl::event_t *e) const = 0;
 
     /** returns data handle. Applicable for memory primitives only. */
     virtual mkldnn::impl::status_t get_data_handle(void **handle) const {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.cpp
index 866c93444..d48ab95ed 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.cpp
@@ -49,13 +49,6 @@ status_t scales_t::set(int count, int mask, const float *scales) {
     return status::success;
 }
 
-mkldnn::impl::status_t scales_t::scale(float factor) {
-    int cnt = (count_ == 1) ? scales_buf_size : count_;
-    for (int c = 0; c < cnt; ++c)
-        scales_[c] *= factor;
-    return status::success;
-}
-
 }
 }
 
@@ -77,7 +70,7 @@ status_t post_ops_t::append_eltwise(float scale, alg_kind_t alg, float alpha,
     bool known_alg = one_of(alg, eltwise_relu, eltwise_tanh, eltwise_elu,
             eltwise_square, eltwise_abs, eltwise_sqrt, eltwise_linear,
             eltwise_bounded_relu, eltwise_soft_relu, eltwise_logistic,
-            eltwise_clamp);
+            eltwise_clamp, eltwise_exp, eltwise_not);
     if (!known_alg)
         return invalid_arguments;
 
@@ -136,6 +129,24 @@ status_t post_ops_t::append_dw_conv(int in_h, int in_w, int ker_h, int ker_w, in
     return success;
 }
 
+status_t post_ops_t::append_binarization(alg_kind_t alg, const float* weights_data) {
+    using namespace mkldnn::impl::alg_kind;
+    bool known_alg = one_of(alg, binarization_depthwise);
+    if (!known_alg)
+        return invalid_arguments;
+
+    if (len_ == capacity)
+        return out_of_memory;
+
+    entry_[len_].kind = primitive_kind::binarization;
+    entry_[len_].binarization.alg = alg;
+    entry_[len_].binarization.weights_data = weights_data;
+
+    len_++;
+
+    return success;
+}
+
 status_t primitive_attr_t::set_round_mode(round_mode_t round_mode) {
     using namespace mkldnn::impl::round_mode;
 
@@ -320,6 +331,23 @@ status_t mkldnn_post_ops_get_params_eltwise(const post_ops_t *post_ops,
     return success;
 }
 
+status_t mkldnn_primitive_attr_set_rnn_data_qparams(
+        primitive_attr_t *attr, const float scale, const float shift) {
+    if (attr == nullptr)
+        return invalid_arguments;
+
+    return attr->rnn_data_qparams_.set(scale, shift);
+}
+
+status_t mkldnn_primitive_attr_set_rnn_weights_qparams(
+        primitive_attr_t *attr, int count, int mask, const float *scales) {
+    bool ok = !any_null(attr, scales) && count > 0 && mask >= 0;
+    if (!ok)
+        return invalid_arguments;
+
+    return attr->rnn_weights_qparams_.set(count, mask, scales);
+}
+
 status_t mkldnn_post_ops_append_depthwise(post_ops_t *post_ops,
         alg_kind_t kind, const float* weights_data, const float* biases_data) {
     if (post_ops == nullptr)
@@ -375,4 +403,26 @@ status_t mkldnn_post_ops_get_params_dw_conv(const post_ops_t *post_ops,
     *biases_data = e.biases_data;
 
     return success;
-}
-\ No newline at end of file
+}
+
+status_t mkldnn_post_ops_append_binarization(post_ops_t *post_ops, alg_kind_t kind, const float* weights_data) {
+    if (post_ops == nullptr)
+        return invalid_arguments;
+
+    return post_ops->append_binarization(kind, weights_data);
+}
+
+status_t mkldnn_post_ops_get_params_binarization(const post_ops_t *post_ops, int index, alg_kind_t *alg,
+        const float** weights_data) {
+    bool ok = true
+        && simple_get_params_check(post_ops, index, primitive_kind::binarization)
+        && !any_null(alg, weights_data);
+    if (!ok)
+        return invalid_arguments;
+
+    const auto &e = post_ops->entry_[index].binarization;
+    *alg = e.alg;
+    *weights_data = e.weights_data;
+
+    return success;
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.hpp
index 3f56d9963..949449f4a 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_attr.hpp
@@ -27,6 +27,20 @@
 namespace mkldnn {
 namespace impl {
 
+struct rnn_data_qparams_t : public c_compatible {
+    rnn_data_qparams_t() : scale_(1.), shift_(0.) {}
+    bool has_default_values() const { return (scale_ == 1. && shift_ == 0.); }
+
+    status_t set(float scale, float shift) {
+        scale_ = scale;
+        shift_ = shift;
+        return status::success;
+    }
+
+    float scale_;
+    float shift_;
+};
+
 struct scales_t: public c_compatible {
     scales_t(): count_(1), mask_(0), scales_(scales_buf_)
     { set(1.); }
@@ -54,7 +68,6 @@ struct scales_t: public c_compatible {
 
     status_t set(int count, int mask, const float *scales);
     status_t set(float single_scale) { return this->set(1, 0, &single_scale); }
-    status_t scale(float factor);
 
     int count_;
     int mask_;
@@ -79,13 +92,15 @@ private:
 
 struct mkldnn_post_ops: public mkldnn::impl::c_compatible {
     struct entry_t {
+        struct eltwise_t {
+            mkldnn::impl::alg_kind_t alg;
+            float scale, alpha, beta;
+        };
+
         mkldnn::impl::primitive_kind_t kind;
         union {
             struct { float scale; } sum;
-            struct {
-                mkldnn::impl::alg_kind_t alg;
-                float scale, alpha, beta;
-            } eltwise;
+            eltwise_t eltwise;
             struct {
                 mkldnn::impl::alg_kind_t alg;
                 const float* weights_data;
@@ -101,34 +116,45 @@ struct mkldnn_post_ops: public mkldnn::impl::c_compatible {
                 const float* weights_data;
                 const float* biases_data;
             } dw_conv;
+            struct {
+                mkldnn::impl::alg_kind_t alg;
+                const float* weights_data;
+            } binarization;
         };
 
+        bool is_eltwise(bool require_scale_one = true) const {
+            using namespace mkldnn::impl;
+            return kind == primitive_kind::eltwise
+                && IMPLICATION(require_scale_one, eltwise.scale == 1.f);
+        }
+
         bool is_relu(bool require_scale_one = true,
                 bool require_nslope_zero = true) const {
             using namespace mkldnn::impl;
-            return kind == primitive_kind::eltwise
-                && IMPLICATION(require_scale_one, eltwise.scale == 1.f)
+            return is_eltwise(require_scale_one)
                 && eltwise.alg == alg_kind::eltwise_relu
                 && IMPLICATION(require_nslope_zero, eltwise.alpha == 0.f);
         }
+
         bool is_sum(bool require_scale_one = true) const {
             using namespace mkldnn::impl;
             return kind == primitive_kind::sum
                 && IMPLICATION(require_scale_one, sum.scale == 1.f);
         }
-        bool is_eltwise(bool require_scale_one = true) const {
-            using namespace mkldnn::impl;
-            return kind == primitive_kind::eltwise
-                   && IMPLICATION(require_scale_one, eltwise.scale == 1.f);
-        }
+
         bool is_depthwise() const {
             using namespace mkldnn::impl;
             return kind == primitive_kind::depthwise;
         }
+
         bool is_dw_conv() const {
             using namespace mkldnn::impl;
             return kind == primitive_kind::convolution;
         }
+        bool is_binarization() const {
+            using namespace mkldnn::impl;
+            return kind == primitive_kind::binarization;
+        }
     };
 
     mkldnn_post_ops(): len_(0) {}
@@ -141,6 +167,7 @@ struct mkldnn_post_ops: public mkldnn::impl::c_compatible {
     mkldnn::impl::status_t append_dw_conv(int in_h, int in_w, int ker_h, int ker_w, int str_h, int str_w,
                                           const float* weights_data,
                                           const float* biases_data);
+    mkldnn::impl::status_t append_binarization(mkldnn::impl::alg_kind_t alg, const float* weights_data);
 
     int find(mkldnn::impl::primitive_kind_t kind, int start = 0,
             int stop = -1) const {
@@ -173,7 +200,9 @@ struct mkldnn_primitive_attr: public mkldnn::impl::c_compatible {
        return true
             && round_mode_ == mkldnn::impl::round_mode::nearest
             && output_scales_.has_default_values()
-            && post_ops_.has_default_values() ;
+            && post_ops_.has_default_values()
+            && rnn_data_qparams_.has_default_values()
+            && rnn_weights_qparams_.has_default_values();
     }
 
     mkldnn::impl::status_t set_round_mode(
@@ -184,6 +213,8 @@ struct mkldnn_primitive_attr: public mkldnn::impl::c_compatible {
     mkldnn::impl::round_mode_t round_mode_;
     mkldnn::impl::scales_t output_scales_;
     mkldnn::impl::post_ops_t post_ops_;
+    mkldnn::impl::rnn_data_qparams_t rnn_data_qparams_;
+    mkldnn::impl::scales_t rnn_weights_qparams_;
 };
 
 #endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_desc.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_desc.cpp
index c88aaeb5b..c288aef57 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_desc.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_desc.cpp
@@ -35,6 +35,9 @@ status_t primitive_desc_t::query(query_t what, int idx, void *result) const {
         case query::engine: *(engine_t**)result = engine(); break;
         case query::primitive_kind: *(primitive_kind_t*)result = kind(); break;
 
+        case query::memory_consumption_s64:
+            *(ptrdiff_t*)result = scratchpad_registry().size(); break;
+
         case query::op_d:
             if (idx != 0 || op_desc() == nullptr) return invalid_arguments;
             *(const_c_op_desc_t *)result
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_desc.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_desc.hpp
index e13b15631..542d38d82 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/primitive_desc.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/primitive_desc.hpp
@@ -20,6 +20,7 @@
 #include "mkldnn.h"
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
 #include "nstl.hpp"
 #include "type_helpers.hpp"
 #include "primitive_attr.hpp"
@@ -47,6 +48,11 @@ struct mkldnn_primitive_desc: public mkldnn::impl::c_compatible {
     virtual void init_info() {}
     const char *info() const { return info_; }
 
+    mkldnn::impl::memory_tracking::registry_t &scratchpad_registry()
+    { return scratchpad_registry_; }
+    const mkldnn::impl::memory_tracking::registry_t &scratchpad_registry() const
+    { return scratchpad_registry_; }
+
     virtual const mkldnn::impl::op_desc_t *op_desc() const = 0;
 
 #   define DECLARE_PD_STUB(stub) \
@@ -101,6 +107,8 @@ protected:
     mkldnn::impl::primitive_kind_t kind_;
 
     char info_[MKLDNN_VERBOSE_BUF_LEN];
+
+    mkldnn::impl::memory_tracking::registry_t scratchpad_registry_;
 };
 
 #define DECLARE_COMMON_PD_t(impl_name, ...) \
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/rnn.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/rnn.cpp
index 432763b07..36967431a 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/rnn.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/rnn.cpp
@@ -19,6 +19,7 @@
 #include "c_types_map.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
+#include "cpu/gemm/os_blas.hpp"
 
 using namespace mkldnn::impl;
 using namespace mkldnn::impl::status;
@@ -63,7 +64,7 @@ status_t mkldnn_rnn_cell_desc_init(rnn_cell_desc_t *rnn_cell_desc,
             && IMPLICATION(cell_kind == vanilla_rnn,
                     one_of(act_f, eltwise_relu, eltwise_tanh, eltwise_logistic));
     if (!args_ok)
-        return status::invalid_arguments;
+        return invalid_arguments;
 
     auto rcd = mkldnn_rnn_cell_desc_t();
 
@@ -75,7 +76,7 @@ status_t mkldnn_rnn_cell_desc_init(rnn_cell_desc_t *rnn_cell_desc,
 
     *rnn_cell_desc = rcd;
 
-    return status::success;
+    return success;
 }
 
 int mkldnn_rnn_cell_get_gates_count(const rnn_cell_desc_t *rnn_cell_desc) {
@@ -100,6 +101,161 @@ int mkldnn_rnn_cell_get_states_count(const rnn_cell_desc_t *rnn_cell_desc) {
     return 0;
 }
 
+status_t check_data_type_consistency_fwd(const rnn_cell_desc_t *rnn_cell_desc,
+        prop_kind_t prop_kind, const memory_desc_t *src_layer_desc,
+        const memory_desc_t *src_iter_desc,
+        const memory_desc_t *weights_layer_desc,
+        const memory_desc_t *weights_iter_desc, const memory_desc_t *bias_desc,
+        const memory_desc_t *dst_layer_desc,
+        const memory_desc_t *dst_iter_desc) {
+    using namespace data_type;
+    data_type_t src_layer_dt = src_layer_desc->data_type;
+    data_type_t dst_layer_dt = dst_layer_desc->data_type;
+    data_type_t weights_iter_dt = weights_iter_desc->data_type;
+    data_type_t weights_layer_dt = weights_layer_desc->data_type;
+
+    bool is_f32 = everyone_is(f32, src_layer_dt, dst_layer_dt, weights_iter_dt,
+                          weights_layer_dt)
+            && IMPLICATION(!is_zero_md(src_iter_desc),
+                          src_iter_desc->data_type == f32)
+            && IMPLICATION(!is_zero_md(dst_iter_desc),
+                          dst_iter_desc->data_type == f32)
+            && IMPLICATION(!is_zero_md(bias_desc), bias_desc->data_type == f32);
+
+#if USE_MKL_PACKED_GEMM
+    bool is_u8u8u8 = src_layer_dt == u8
+            && IMPLICATION(!is_zero_md(src_iter_desc),
+                             src_iter_desc->data_type == u8)
+            && IMPLICATION(!is_zero_md(dst_iter_desc),
+                             dst_iter_desc->data_type == u8)
+            && one_of(dst_layer_dt, u8, f32)
+            && everyone_is(s8, weights_iter_dt, weights_layer_dt)
+            && IMPLICATION(!is_zero_md(bias_desc), bias_desc->data_type == f32);
+
+    bool is_f32u8f32 = src_layer_dt == u8
+            && IMPLICATION(!is_zero_md(src_iter_desc),
+                               src_iter_desc->data_type == f32)
+            && IMPLICATION(!is_zero_md(dst_iter_desc),
+                               dst_iter_desc->data_type == f32)
+            && one_of(dst_layer_dt, u8, f32)
+            && everyone_is(s8, weights_iter_dt, weights_layer_dt)
+            && IMPLICATION(!is_zero_md(bias_desc), bias_desc->data_type == f32);
+
+    bool is_inference = prop_kind == prop_kind::forward_inference;
+    bool is_lstm = rnn_cell_desc->cell_kind == mkldnn_vanilla_lstm;
+
+    return (is_f32 || ((is_u8u8u8 || is_f32u8f32) && is_lstm && is_inference))
+            ? success
+            : unimplemented;
+#else
+    return is_f32 ? success : unimplemented;
+#endif
+}
+
+status_t check_dim_consistency(const rnn_cell_desc_t *rnn_cell_desc,
+        rnn_direction_t direction, int L, int D, int T, int N, int S, int G,
+        int SLC, int SIC, int DLC, int DIC, const memory_desc_t *src_layer_desc,
+        const memory_desc_t *src_iter_desc,
+        const memory_desc_t *weights_layer_desc,
+        const memory_desc_t *weights_iter_desc, const memory_desc_t *bias_desc,
+        const memory_desc_t *dst_layer_desc,
+        const memory_desc_t *dst_iter_desc) {
+    bool args_ok;
+
+    // * algorithm specific
+    args_ok = true
+        && IMPLICATION(rnn_cell_desc->cell_kind == alg_kind::vanilla_gru,
+                       DIC == SIC);
+    if (!args_ok) return invalid_arguments;
+    int extra_bias =
+            rnn_cell_desc->cell_kind == alg_kind::gru_linear_before_reset;
+
+    // * on num layers
+    args_ok = true
+        && L == weights_layer_desc->dims[0]
+        && L == weights_iter_desc->dims[0]
+        && IMPLICATION(!is_zero_md(bias_desc), L == bias_desc->dims[0])
+        && IMPLICATION(!is_zero_md(src_iter_desc), L == src_iter_desc->dims[0])
+        && IMPLICATION(!is_zero_md(dst_iter_desc), L == dst_iter_desc->dims[0]);
+    if (!args_ok) return invalid_arguments;
+
+    // * on num directions
+    args_ok = true
+        && D == weights_layer_desc->dims[1]
+        && D == weights_iter_desc->dims[1]
+        && IMPLICATION(!is_zero_md(bias_desc), D == bias_desc->dims[1])
+        && IMPLICATION(!is_zero_md(src_iter_desc), D == src_iter_desc->dims[1])
+        && IMPLICATION(!is_zero_md(dst_iter_desc), D == dst_iter_desc->dims[1]);
+    if (!args_ok) return invalid_arguments;
+
+    // * on num iterations
+    args_ok = true
+        && T == src_layer_desc->dims[0]
+        && T == dst_layer_desc->dims[0];
+    if (!args_ok) return invalid_arguments;
+
+    // * on mb
+    args_ok = true
+        && N == src_layer_desc->dims[1]
+        && N == dst_layer_desc->dims[1]
+        && IMPLICATION(!is_zero_md(src_iter_desc), N == src_iter_desc->dims[3])
+        && IMPLICATION(!is_zero_md(dst_iter_desc), N == dst_iter_desc->dims[3]);
+    if (!args_ok) return invalid_arguments;
+
+    // * on num gates
+    args_ok = true
+        && G == mkldnn_rnn_cell_get_gates_count(rnn_cell_desc)
+        && G == weights_layer_desc->dims[3]
+        && G == weights_iter_desc->dims[3]
+        && IMPLICATION(!is_zero_md(bias_desc),
+                G + extra_bias == bias_desc->dims[2]);
+    if (!args_ok) return invalid_arguments;
+
+    // * on num states
+    args_ok = true
+        && S == mkldnn_rnn_cell_get_states_count(rnn_cell_desc)
+        && IMPLICATION(!is_zero_md(src_iter_desc), S == src_iter_desc->dims[2])
+        && IMPLICATION(!is_zero_md(dst_iter_desc), S == dst_iter_desc->dims[2]);
+    if (!args_ok) return invalid_arguments;
+
+    // * on slc
+    args_ok = true
+        && SLC == weights_layer_desc->dims[2]
+        && SLC == src_layer_desc->dims[2];
+    if (!args_ok) return invalid_arguments;
+
+    // * on sic
+    args_ok = true
+        && SIC == weights_iter_desc->dims[2]
+        && IMPLICATION(!is_zero_md(src_iter_desc),
+                SIC == src_iter_desc->dims[4]);
+    if (!args_ok) return invalid_arguments;
+
+    // * on dlc
+    int dlc_multiplier = (direction == mkldnn_bidirectional_concat) ? 2 : 1;
+    args_ok = true
+        && DLC == dlc_multiplier * DIC
+        && DLC == dst_layer_desc->dims[2];
+    if (!args_ok) return invalid_arguments;
+
+    // * on dic
+    args_ok = true
+        && DIC == weights_layer_desc->dims[4]
+        && DIC == weights_iter_desc->dims[4]
+        && IMPLICATION(!is_zero_md(bias_desc), DIC == bias_desc->dims[3])
+        && IMPLICATION(!is_zero_md(dst_iter_desc),
+                DIC == dst_iter_desc->dims[4]);
+    if (!args_ok) return invalid_arguments;
+
+    // * unrolling/fusion conditions
+    args_ok = true
+        && IMPLICATION(L > 1, (dlc_multiplier * SLC) == DLC)
+        && IMPLICATION(T > 1, SIC == DIC);
+    if (!args_ok) return invalid_arguments;
+
+    return success;
+}
+
 status_t MKLDNN_API mkldnn_rnn_forward_desc_init(mkldnn_rnn_desc_t *rnn_desc,
         prop_kind_t prop_kind, const rnn_cell_desc_t *rnn_cell_desc,
         const rnn_direction_t direction, const memory_desc_t *src_layer_desc,
@@ -111,43 +267,33 @@ status_t MKLDNN_API mkldnn_rnn_forward_desc_init(mkldnn_rnn_desc_t *rnn_desc,
     bool args_ok = true && rnn_cell_desc != nullptr
             && !any_null(src_layer_desc, weights_layer_desc, weights_iter_desc,
                        dst_layer_desc);
-    if (!args_ok)
-        return invalid_arguments;
-
-    int DIC = 0, L = 0;
-    if (weights_layer_desc->ndims) {
-        DIC = weights_layer_desc->dims[4];
-        L = weights_layer_desc->dims[0];
-    } else if (weights_iter_desc->ndims) {
-        DIC = weights_iter_desc->dims[4];
-        L = weights_iter_desc->dims[0];
-    } else {
-        assert(!"cannot query cell state size");
-        return unimplemented;
-    }
+    if (!args_ok) return invalid_arguments;
 
+    //check dimensions consistency
+    int L = weights_layer_desc->dims[0];
+    int T = src_layer_desc->dims[0];
+    int N = src_layer_desc->dims[1];
     const int D = one_of(direction, mkldnn_unidirectional_left2right,
                           mkldnn_unidirectional_right2left) ?
             1 :
             2;
-    const int DLC = (direction == mkldnn_bidirectional_concat ? 2 : 1) * DIC;
-
-    args_ok = args_ok && D == weights_layer_desc->dims[1]
-            && D == weights_iter_desc->dims[1]
-            && DIC == weights_layer_desc->dims[4]
-            && DIC == weights_iter_desc->dims[4]
-            && DLC == dst_layer_desc->dims[2] && L == weights_iter_desc->dims[0]
-            && IMPLICATION(!is_zero_md(dst_iter_desc), true
-                               && DIC == dst_iter_desc->dims[4]
-                               && L == dst_iter_desc->dims[0])
-            && IMPLICATION(!is_zero_md(bias_desc), L == bias_desc->dims[0])
-            && IMPLICATION(
-                       !is_zero_md(src_iter_desc), L == src_iter_desc->dims[0])
-            && IMPLICATION(rnn_cell_desc->cell_kind == alg_kind::vanilla_gru,
-                       DIC == weights_iter_desc->dims[2]);
-    if (!args_ok)
-        return invalid_arguments;
-
+    int G = mkldnn_rnn_cell_get_gates_count(rnn_cell_desc);
+    int S = mkldnn_rnn_cell_get_states_count(rnn_cell_desc);
+    int SLC = src_layer_desc->dims[2];
+    int SIC = weights_iter_desc->dims[2];
+    int DLC = dst_layer_desc->dims[2];
+    int DIC = weights_layer_desc->dims[4];
+
+    CHECK(check_dim_consistency(rnn_cell_desc, direction, L, D, T, N, S,
+            G, SLC, SIC, DLC, DIC, src_layer_desc, src_iter_desc,
+            weights_layer_desc, weights_iter_desc, bias_desc, dst_layer_desc,
+            dst_iter_desc));
+
+    CHECK(check_data_type_consistency_fwd(rnn_cell_desc, prop_kind,
+            src_layer_desc, src_iter_desc, weights_layer_desc,
+            weights_iter_desc, bias_desc, dst_layer_desc, dst_iter_desc));
+
+    // Create the descriptor
     mkldnn_rnn_desc_t rd = zero_rnn_desc();
 
     rd.primitive_kind = primitive_kind::rnn;
@@ -179,28 +325,16 @@ status_t MKLDNN_API mkldnn_rnn_backward_desc_init(mkldnn_rnn_desc_t *rnn_desc,
         const memory_desc_t *diff_weights_layer_desc,
         const memory_desc_t *diff_weights_iter_desc,
         const memory_desc_t *diff_bias_desc,
-        const memory_desc_t *diff_dst_layer,
+        const memory_desc_t *diff_dst_layer_desc,
         const memory_desc_t *diff_dst_iter_desc) {
     bool args_ok = true
             && !any_null(src_layer_desc, weights_layer_desc, weights_iter_desc,
                        dst_layer_desc, diff_src_layer_desc,
                        diff_weights_layer_desc, diff_weights_iter_desc,
-                       diff_dst_layer);
+                       diff_dst_layer_desc);
     if (!args_ok)
         return invalid_arguments;
 
-    int DIC = 0, L = 0;
-    if (weights_layer_desc->ndims) {
-        DIC = weights_layer_desc->dims[4];
-        L = weights_layer_desc->dims[0];
-    } else if (weights_iter_desc->ndims) {
-        DIC = weights_iter_desc->dims[4];
-        L = weights_iter_desc->dims[0];
-    } else {
-        assert(!"cannot query cell state size");
-        return unimplemented;
-    }
-
     auto xnor_md = [=](const memory_desc_t *a_md, const memory_desc_t *b_md) {
         return is_zero_md(a_md) == is_zero_md(b_md);
     };
@@ -211,27 +345,32 @@ status_t MKLDNN_API mkldnn_rnn_backward_desc_init(mkldnn_rnn_desc_t *rnn_desc,
     if (!args_ok)
         return invalid_arguments;
 
-    int D = one_of(direction, mkldnn_unidirectional_left2right,
-                    mkldnn_unidirectional_right2left) ?
+    //check dimensions consistency
+    int L = weights_layer_desc->dims[0];
+    int T = src_layer_desc->dims[0];
+    int N = src_layer_desc->dims[1];
+    const int D = one_of(direction, mkldnn_unidirectional_left2right,
+                          mkldnn_unidirectional_right2left) ?
             1 :
             2;
-    int DLC = (direction == mkldnn_bidirectional_concat ? 2 : 1) * DIC;
-
-    args_ok = args_ok && D == weights_layer_desc->dims[1]
-            && D == weights_iter_desc->dims[1]
-            && DIC == weights_layer_desc->dims[4]
-            && DIC == weights_iter_desc->dims[4]
-            && DLC == dst_layer_desc->dims[2] && L == weights_iter_desc->dims[0]
-            && IMPLICATION(!is_zero_md(dst_iter_desc), true
-                               && DIC == dst_iter_desc->dims[4]
-                               && L == dst_iter_desc->dims[0])
-            && IMPLICATION(!is_zero_md(bias_desc), L == bias_desc->dims[0])
-            && IMPLICATION(
-                       !is_zero_md(src_iter_desc), L == src_iter_desc->dims[0])
-            && IMPLICATION(rnn_cell_desc->cell_kind == alg_kind::vanilla_gru,
-                       DIC == weights_iter_desc->dims[2]);
-    if (!args_ok)
-        return invalid_arguments;
+    int G = mkldnn_rnn_cell_get_gates_count(rnn_cell_desc);
+    int S = mkldnn_rnn_cell_get_states_count(rnn_cell_desc);
+    int SLC = src_layer_desc->dims[2];
+    int SIC = weights_iter_desc->dims[2];
+    int DLC = dst_layer_desc->dims[2];
+    int DIC = weights_layer_desc->dims[4];
+
+    status_t st = check_dim_consistency(rnn_cell_desc, direction, L, D, T, N, S,
+            G, SLC, SIC, DLC, DIC, src_layer_desc, src_iter_desc,
+            weights_layer_desc, weights_iter_desc, bias_desc, dst_layer_desc,
+            dst_iter_desc);
+    if (st != success) return st;
+
+    st = check_dim_consistency(rnn_cell_desc, direction, L, D, T, N, S,
+            G, SLC, SIC, DLC, DIC, diff_src_layer_desc, diff_src_iter_desc,
+            diff_weights_layer_desc, diff_weights_iter_desc, diff_bias_desc,
+            diff_dst_layer_desc, diff_dst_iter_desc);
+    if (st != success) return st;
 
     mkldnn_rnn_desc_t rd = zero_rnn_desc();
 
@@ -252,7 +391,7 @@ status_t MKLDNN_API mkldnn_rnn_backward_desc_init(mkldnn_rnn_desc_t *rnn_desc,
     rd.diff_weights_layer_desc = copy_maybe_null(diff_weights_layer_desc);
     rd.diff_weights_iter_desc = copy_maybe_null(diff_weights_iter_desc);
     rd.diff_bias_desc = copy_maybe_null(diff_bias_desc);
-    rd.diff_dst_layer_desc = copy_maybe_null(diff_dst_layer);
+    rd.diff_dst_layer_desc = copy_maybe_null(diff_dst_layer_desc);
     rd.diff_dst_iter_desc = copy_maybe_null(diff_dst_iter_desc);
 
     *rnn_desc = rd;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/rnn_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/rnn_pd.hpp
index 5b11d5ad0..53facc8bb 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/rnn_pd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/rnn_pd.hpp
@@ -62,153 +62,6 @@ struct rnn_pd_t : public primitive_desc_t {
                 prop_kind::forward_inference);
     }
 
-    inline size_t ws_states_size() {
-        return (size_t)(L() + 1) * D() * (T() + 1) * S() * MB() * S_GLD();
-    }
-
-    inline size_t ws_diff_states_size() {
-        return (size_t)(L() + 1) * D() * (T() + 1) * (S() + 1) * MB() * S_GLD();
-    }
-
-    inline size_t ws_weights_layer_size() {
-        size_t ld = is_fwd() ? G_GLD() : S_GLD();
-        size_t not_ld =  is_fwd() ? SLC() : G() * DIC();
-        return (size_t)(L() * D() * ld * not_ld);
-    }
-
-    inline size_t ws_weights_iter_size() {
-        size_t ld = is_fwd() ? G_GLD() : S_GLD();
-        size_t not_ld =  is_fwd() ? SIC() : G() * DIC();
-        return (size_t)(L() * D() * ld * not_ld);
-    }
-
-    inline size_t ws_diff_weights_layer_size() {
-        return (size_t)(L() * D() * SLC() * GC());
-    }
-
-    inline size_t ws_diff_weights_iter_size() {
-        return (size_t)(L() * D() * SIC() * GC());
-    }
-
-    inline size_t ws_gates_size() {
-        return (size_t) L() * D() * T() * MB() * GC();
-    }
-
-    inline size_t ws_cell_comp_size() {
-        return (size_t)is_lbr() * MB() * GC();
-    }
-
-    inline size_t ws_grid_comp_size() {
-        return (size_t)is_lbr() * is_training() * L() * D() * T() * MB() * DIC();
-    }
-
-    inline int ws_per_cell() {
-        return is_lbr() *  MB() * DIC();
-    }
-
-    // returns the scratchpad size if use_workspace is true
-    // returns the workspace size if use_workspace is false,
-    // and all scratchpad boolean are false
-    inline size_t set_offsets( bool use_workspace,
-        size_t &ws_gates_offset, size_t &ws_states_offset,
-        size_t &ws_diff_states_offset, size_t &ws_grid_comp_offset,
-        bool use_ws_cell_comp, size_t &ws_cell_comp_offset,
-        bool copy_weights_layer_, size_t &ws_weights_layer_offset,
-        bool copy_weights_iter_, size_t &ws_weights_iter_offset,
-        bool copy_diff_weights_layer, size_t &ws_diff_weights_layer_offset,
-        bool copy_diff_weights_iter, size_t &ws_diff_weights_iter_offset) {
-        const size_t page_size = 4096; // 2097152;
-        size_t current_offset;
-
-        /* Mandatory workspaces: go to workspace if use_workspace, scratchpad otherwise */
-        current_offset = 0;  // assumes the workspace base pointer is page aligned
-        ws_gates_offset = current_offset;
-        current_offset += ws_gates_size();
-
-        current_offset = utils::rnd_up(current_offset, page_size);
-        ws_states_offset = current_offset;
-        current_offset += ws_states_size();
-
-        current_offset = utils::rnd_up(current_offset, page_size);
-        ws_diff_states_offset = current_offset;
-        current_offset += ws_diff_states_size();
-
-        current_offset = utils::rnd_up(current_offset, page_size);
-        ws_grid_comp_offset = current_offset;
-        current_offset += ws_grid_comp_size();
-
-        // ws_cell_comp is optional
-        if (use_ws_cell_comp) {
-            current_offset = utils::rnd_up(current_offset, page_size);
-            ws_cell_comp_offset = current_offset;
-            current_offset += ws_cell_comp_size();
-        }
-
-        /* Optional scratchpads */
-        // Assumes the scratchpad base pointer is page aligned.
-        // If use_workspace, the following goes to scratchpad alone,
-        // otherwise, all goes to scratchpad and continue incrementing offset
-        current_offset = use_workspace ? 0 : current_offset;
-
-        if (copy_weights_layer_) {
-            current_offset = utils::rnd_up(current_offset, page_size);
-            ws_weights_layer_offset = current_offset;
-            current_offset += ws_weights_layer_size();
-        }
-
-        if (copy_weights_iter_) {
-            current_offset = utils::rnd_up(current_offset, page_size);
-            ws_weights_iter_offset = current_offset;
-            current_offset += ws_weights_iter_size();
-        }
-
-        if (copy_diff_weights_layer) {
-            current_offset = utils::rnd_up(current_offset, page_size);
-            ws_diff_weights_layer_offset = current_offset;
-            current_offset += ws_diff_weights_layer_size();
-        }
-
-        if (copy_diff_weights_iter) {
-            current_offset = utils::rnd_up(current_offset, page_size);
-            ws_diff_weights_iter_offset = current_offset;
-            current_offset += ws_diff_weights_iter_size();
-        }
-
-        return current_offset;
-    }
-
-    inline size_t get_ws_size() {
-        size_t ws_gates_offset, ws_states_offset,
-            ws_diff_states_offset,ws_grid_comp_offset,
-            ws_cell_comp_offset, ws_weights_layer_offset,
-            ws_weights_iter_offset, ws_diff_weights_layer_offset,
-            ws_diff_weights_iter_offset;
-        return set_offsets( false,
-                     ws_gates_offset, ws_states_offset,
-                     ws_diff_states_offset, ws_grid_comp_offset,
-                     is_lbr(), ws_cell_comp_offset,
-                     false, ws_weights_layer_offset,
-                     false, ws_weights_iter_offset,
-                     false, ws_diff_weights_layer_offset,
-                     false, ws_diff_weights_iter_offset);
-    }
-
-    inline size_t get_scratchpad_size(bool use_workspace) {
-        size_t ws_gates_offset, ws_states_offset,
-            ws_diff_states_offset,ws_grid_comp_offset,
-            ws_cell_comp_offset, ws_weights_layer_offset,
-            ws_weights_iter_offset, ws_diff_weights_layer_offset,
-            ws_diff_weights_iter_offset;
-        return set_offsets(use_workspace,
-                     ws_gates_offset, ws_states_offset,
-                     ws_diff_states_offset, ws_grid_comp_offset,
-                     false, ws_cell_comp_offset,
-                     false, ws_weights_layer_offset,
-                     false, ws_weights_iter_offset,
-                     false, ws_diff_weights_layer_offset,
-                     false, ws_diff_weights_iter_offset);
-    }
-
     int T() const { return desc_.src_layer_desc.dims[0]; }
     int MB() const { return desc_.src_layer_desc.dims[1]; }
 
@@ -223,110 +76,6 @@ struct rnn_pd_t : public primitive_desc_t {
 
     int DLC() const { return desc_.dst_layer_desc.dims[2]; }
 
-    int get_good_ld(int dim){
-        // we want matrices leading dimentions to be 64-byte aligned,
-        // and not divisible by 256 to avoid 4K aliasing effects
-        int ld = utils::rnd_up(dim, (int)(64/sizeof(float)));
-        return (ld % 256 == 0) ? ld + 64/sizeof(float) : ld;
-    }
-
-    int WIC() {
-        // wic will be the leading dimension of our B matrices
-        return get_good_ld(nstl::max(SLC(), nstl::max(SIC(), DIC())));
-    }
-
-    int GC() {
-        // gc will be the leading dimension of our C matrices
-        return get_good_ld(G() * DIC());
-    }
-
-    /* replacement functions for meaningless WIC and GC:
-       - LD stands for leading dimension
-       - GLD stands for good leading dimension
-       - NLD stands for not leading dimension (so the other dim)
-    */
-    int G_GLD() {
-        // good leading dimension for the gates
-        // C matrices for fwd, B matrices for bwd
-        return get_good_ld(G() * DIC());
-    }
-
-    int S_GLD() {
-        // good leading dimension for the states
-        // B matrices for fwd, B matrices for bwd_w, C matrices for bwd_d
-        return get_good_ld(nstl::max(SLC(), nstl::max(SIC(), DIC())));
-    }
-
-    int W_GLD() {
-        // good leading dimension for the weights
-        return is_fwd() ? G_GLD() : S_GLD();
-    }
-
-    int DW_GLD() {
-        // good leading dimension for the diff weights
-        return weights_copy_enabled() ? G_GLD() : G() * DIC();
-    }
-
-    int weights_copy_enabled() { return (T() > 1); }
-
-    int get_weights_ld(int feature_dim) {
-        return is_fwd() ? G() * DIC() : feature_dim;
-    }
-
-    int get_weights_nld(int feature_dim) {
-        return !(is_fwd()) ? G() * DIC() : feature_dim;
-    }
-
-    int WL_LD() {
-        return get_weights_ld(SLC());
-    }
-
-    int WL_GLD() {
-        return weights_copy_enabled() ? get_good_ld(WL_LD()) : WL_LD();
-    }
-
-    int WI_LD() {
-        return get_weights_ld(SIC());
-    }
-
-    int WI_GLD() {
-        return weights_copy_enabled() ? get_good_ld(WI_LD()) : WI_LD();
-    }
-
-    int DWL_LD() {
-        return G() * DIC();
-    }
-
-    int DWL_GLD() {
-        return weights_copy_enabled() ? get_good_ld(DWL_LD()) : DWL_LD();
-    }
-
-    int DWI_LD() {
-        return G() * DIC();
-    }
-
-    int DWI_GLD() {
-        return weights_copy_enabled() ? get_good_ld(DWI_LD()) : DWI_LD();
-    }
-
-    int WL_NLD() {
-        return get_weights_nld(SLC());
-    }
-
-    int WI_NLD() {
-        return get_weights_nld(SIC());
-    }
-
-    int DWL_NLD() {
-        return SLC();
-    }
-
-    int DWI_NLD() {
-        return SIC();
-    }
-
-    int S() const { return mkldnn_rnn_cell_get_states_count(&desc_.cell_desc); }
-
     bool with_bias() const {
         return !memory_desc_wrapper(desc_.bias_desc).is_zero();
     }
@@ -397,7 +146,7 @@ struct rnn_fwd_pd_t : public rnn_pd_t {
 
 struct rnn_bwd_pd_t : public rnn_pd_t {
     typedef rnn_bwd_pd_t base_class;
-    typedef rnn_bwd_pd_t hint_class;
+    typedef rnn_fwd_pd_t hint_class;
 
     using rnn_pd_t::rnn_pd_t;
     virtual ~rnn_bwd_pd_t() {}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/roi_pooling.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/roi_pooling.cpp
index ba78dbd46..f1f2334e4 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/roi_pooling.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/roi_pooling.cpp
@@ -33,7 +33,7 @@ status_t roi_pooling_desc_init(roi_pooling_desc_t *roi_pool_desc,
         memory_desc_t *src_descs, int num_src, const memory_desc_t *dst_desc,
         int pooled_h, int pooled_w, double spatial_scale) {
     
-    roi_pooling_desc_t pd = {};
+    auto pd = roi_pooling_desc_t();
     pd.primitive_kind = primitive_kind::roi_pooling;
     pd.prop_kind = prop_kind;
     pd.pooled_h = pooled_h;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/scratchpad.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/scratchpad.cpp
index 30de4a416..31a56c2de 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/scratchpad.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/scratchpad.cpp
@@ -79,14 +79,14 @@ struct global_scratchpad_t : public scratchpad_t {
     }
 
 private:
-    THREAD_LOCAL static char *scratchpad_;
-    THREAD_LOCAL static size_t size_;
-    THREAD_LOCAL static unsigned int reference_count_;
+    thread_local static char *scratchpad_;
+    thread_local static size_t size_;
+    thread_local static unsigned int reference_count_;
 };
 
-THREAD_LOCAL char *global_scratchpad_t::scratchpad_ = nullptr;
-THREAD_LOCAL size_t global_scratchpad_t::size_ = 0;
-THREAD_LOCAL unsigned int global_scratchpad_t::reference_count_ = 0;
+thread_local char *global_scratchpad_t::scratchpad_ = nullptr;
+thread_local size_t global_scratchpad_t::size_ = 0;
+thread_local unsigned int global_scratchpad_t::reference_count_ = 0;
 
 
 /*
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/softmax_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/softmax_pd.hpp
index cb156e7c8..44032f7a9 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/softmax_pd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/softmax_pd.hpp
@@ -102,9 +102,9 @@ struct softmax_bwd_pd_t: public primitive_desc_t {
     virtual const memory_pd_t *output_pd(int index = 0) const override
     { return index == 0 ? diff_src_pd() : nullptr; }
 
-    virtual int n_inputs() const override { return 2; }
-    virtual int n_outputs() const override
-    { return 1 + (workspace_pd() != nullptr); }
+    virtual int n_inputs() const override
+    { return 2 + (workspace_pd() != nullptr); }
+    virtual int n_outputs() const override { return 1; }
 
     virtual status_t query(query_t what, int idx, void *result) const override
     {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/type_helpers.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/type_helpers.hpp
index a7cf1a160..06a0e2fd1 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/type_helpers.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/type_helpers.hpp
@@ -64,6 +64,7 @@ inline size_t data_type_size(data_type_t data_type) {
     case s16: return sizeof(prec_traits<s16>::type);
     case s8: return sizeof(prec_traits<s8>::type);
     case u8: return sizeof(prec_traits<u8>::type);
+    case bin: return sizeof(prec_traits<u8>::type);
     case data_type::undef:
     default: assert(!"unknown data_type");
     }
@@ -94,26 +95,32 @@ inline memory_format_t format_normalize(const memory_format_t fmt) {
             nc,
             ncw,
             nwc,
+            nCw4c,
             nCw8c,
             nCw16c,
             nchw,
             nhwc,
             chwn,
+            nChw4c,
             nChw8c,
             nChw16c,
             ncdhw,
             ndhwc,
+            nCdhw4c,
             nCdhw8c,
             nCdhw16c,
             oi,
             io,
             oiw,
             wio,
+            Owi4o,
+            OIw4i4o,
             Owi8o,
             OIw8i8o,
             OIw8o8i,
             OIw16i16o,
             OIw16o16i,
+            Oiw4o,
             Oiw16o,
             Owi16o,
             OIw8i16o2i,
@@ -122,20 +129,25 @@ inline memory_format_t format_normalize(const memory_format_t fmt) {
             oihw,
             ihwo,
             hwio,
+            iohw,
             hwio_s8s8,
             dhwio,
             oidhw,
+            OIdhw4i4o,
+            Odhwi4o,
             OIdhw8i8o,
             OIdhw8o8i,
             Odhwi8o,
             OIdhw16i16o,
             OIdhw16o16i,
+            Oidhw4o,
             Oidhw16o,
             Odhwi16o,
             oIhw8i,
             oIhw16i,
             oIdhw8i,
             oIdhw16i,
+            OIhw4i4o,
             OIhw8i8o,
             OIhw16i16o,
             OIhw4i16o4i,
@@ -145,18 +157,25 @@ inline memory_format_t format_normalize(const memory_format_t fmt) {
             OIhw8o16i2o,
             OIhw8o8i,
             OhIw8o4i,
+            OhIw8o32i,
+            OhIw16o32i,
             OhIw8o4i_s8s8,
             OIhw16o16i,
             IOhw16o16i,
+            Oihw4o,
             Oihw16o,
             Ohwi8o,
+            Ohwi4o,
             Ohwi16o,
             goiw,
+            gOwi4o,
+            gOIw4i4o,
             gOwi8o,
             gOIw8i8o,
             gOIw8o8i,
             gOIw16i16o,
             gOIw16o16i,
+            gOiw4o,
             gOiw16o,
             gOwi16o,
             gOIw8i16o2i,
@@ -164,31 +183,43 @@ inline memory_format_t format_normalize(const memory_format_t fmt) {
             gIOw16o16i,
             goihw,
             hwigo,
+            giohw,
             hwigo_s8s8,
+            gOIhw4i4o,
             gOIhw8i8o,
             gOIhw16i16o,
             gOIhw4i16o4i,
             gOIhw4i16o4i_s8s8,
+            gOIhw2i8o4i,
+            gOIhw2i8o4i_s8s8,
             gOIhw8i16o2i,
             gOIdhw8i16o2i,
             gOIhw8o16i2o,
+            gOIhw4o4i,
+            gOIhw4o4i_s8s8,
             gOIhw8o8i,
             gOhIw8o4i,
             gOhIw8o4i_s8s8,
             gOIhw16o16i,
             gIOhw16o16i,
+            gOihw4o,
             gOihw16o,
             gOhwi8o,
+            gOhwi4o,
             gOhwi16o,
             Goihw8g,
             Goihw16g,
+            Goihw16g_s8s8,
             goidhw,
+            gOIdhw4i4o,
+            gOdhwi4o,
             gOIdhw8i8o,
             gOIdhw8o8i,
             gOdhwi8o,
             gOIdhw16i16o,
             gOIdhw16o16i,
             gOidhw16o,
+            gOidhw4o,
             gOdhwi16o,
             ntc,
             tnc,
@@ -202,9 +233,9 @@ inline memory_format_t format_normalize(const memory_format_t fmt) {
 inline bool is_format_double_blocked(memory_format_t fmt) {
     using namespace memory_format;
     return utils::one_of(OIw8o16i2o, OIw8i16o2i, OIhw8i16o2i, OIdhw8i16o2i,
-            OIhw8o16i2o, OIhw4i16o4i, OIhw4i16o4i_s8s8, gOIw8o16i2o, gOIw8i16o2i,
-            gOIhw8i16o2i, gOIdhw8i16o2i, gOIhw8o16i2o, gOIhw4i16o4i,
-            gOIhw4i16o4i_s8s8);
+            OIhw8o16i2o, OIhw4i16o4i, OIhw4i16o4i_s8s8,
+            gOIw8o16i2o, gOIw8i16o2i, gOIhw8i16o2i, gOIdhw8i16o2i, gOIhw8o16i2o,
+            gOIhw4i16o4i, gOIhw4i16o4i_s8s8, gOIhw2i8o4i, gOIhw2i8o4i_s8s8);
 }
 
 inline bool blocking_desc_is_equal(const blocking_desc_t &lhs,
@@ -232,6 +263,22 @@ inline bool wino_desc_is_equal(const wino_data_t &lhs,
         && lhs.r == rhs.r;
 }
 
+inline bool rnn_packed_desc_is_equal(
+        const rnn_packed_data_t &lhs, const rnn_packed_data_t &rhs) {
+    bool ok = lhs.format == rhs.format && lhs.n_parts == rhs.n_parts
+            && lhs.offset_compensation == rhs.offset_compensation
+            && lhs.size == rhs.size
+            && lhs.n == rhs.n;
+    if (!ok)
+        return false;
+
+    for (int i = 0; i < rhs.n_parts; i++)
+        ok = ok && lhs.parts[i] == rhs.parts[i];
+    for (int i = 0; i < rhs.n_parts; i++)
+        ok = ok && lhs.part_pack_size[i] == rhs.part_pack_size[i];
+    return ok;
+}
+
 inline bool operator==(const memory_desc_t &lhs, const memory_desc_t &rhs) {
     assert(lhs.primitive_kind == mkldnn::impl::primitive_kind::memory);
     assert(rhs.primitive_kind == mkldnn::impl::primitive_kind::memory);
@@ -247,6 +294,9 @@ inline bool operator==(const memory_desc_t &lhs, const memory_desc_t &rhs) {
     else if (lhs.format == memory_format::wino_fmt)
         return wino_desc_is_equal(lhs.layout_desc.wino_desc,
             rhs.layout_desc.wino_desc);
+    else if (lhs.format == memory_format::rnn_packed)
+        return rnn_packed_desc_is_equal(lhs.layout_desc.rnn_packed_desc,
+                rhs.layout_desc.rnn_packed_desc);
     return true;
 }
 
@@ -276,6 +326,7 @@ inline data_type_t default_accum_data_type(data_type_t src_dt,
     if (one_of(f32, src_dt, dst_dt)) return f32;
     if (one_of(s32, src_dt, dst_dt)) return s32;
     if (one_of(s16, src_dt, dst_dt)) return s32;
+    if (one_of(bin, src_dt, dst_dt)) return s32;
 
     if (one_of(s8, src_dt, dst_dt) || one_of(u8, src_dt, dst_dt)) return s32;
 
@@ -298,10 +349,13 @@ inline data_type_t default_accum_data_type(data_type_t src_dt,
         if ((src_dt == u8 || src_dt == s8)
             && wei_dt == s8 && one_of(dst_dt, f32, s32, s8, u8))
             return s32;
+        if (src_dt == bin && wei_dt == bin && (dst_dt == f32 || dst_dt == bin))
+            return s32;
     } else if (prop_kind == backward_data) {
         if (src_dt == s32 && wei_dt == s16 && dst_dt == s16)
             return s32;
-        if (one_of(src_dt, f32, s32, s8, u8) && wei_dt == s8 && dst_dt == u8)
+        if (one_of(src_dt, f32, s32, s8, u8) && wei_dt == s8 &&
+                one_of(dst_dt, s8, u8))
             return s32;
     } else if (prop_kind == backward_weights) {
         if (src_dt == s16 && wei_dt == s32 && dst_dt == s16)
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/utils.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/utils.cpp
index 055681f6c..dd3f21aca 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/utils.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/utils.cpp
@@ -25,12 +25,16 @@
 #include <malloc.h>
 #include <windows.h>
 #endif
-#include "xmmintrin.h"
 
+#include "mkldnn.h"
 #include "utils.hpp"
 #include "mkldnn_thread.hpp"
 #include "mkldnn.h"
 
+#if defined(MKLDNN_X86_64)
+#include "xmmintrin.h"
+#endif
+
 namespace mkldnn {
 namespace impl {
 
@@ -66,9 +70,9 @@ int mkldnn_getenv(char *value, const char *name, int length) {
 }
 
 static bool dump_jit_code;
+static bool initialized;
 
 bool mkldnn_jit_dump() {
-    static bool initialized = false;
     if (!initialized) {
         const int len = 2;
         char env_dump[len] = {0};
@@ -89,9 +93,10 @@ FILE *mkldnn_fopen(const char *filename, const char *mode) {
 #endif
 }
 
-THREAD_LOCAL unsigned int mxcsr_save;
+thread_local unsigned int mxcsr_save;
 
 void set_rnd_mode(round_mode_t rnd_mode) {
+#if defined(MKLDNN_X86_64)
     mxcsr_save = _mm_getcsr();
     unsigned int mxcsr = mxcsr_save & ~(3u << 13);
     switch (rnd_mode) {
@@ -100,10 +105,15 @@ void set_rnd_mode(round_mode_t rnd_mode) {
     default: assert(!"unreachable");
     }
     if (mxcsr != mxcsr_save) _mm_setcsr(mxcsr);
+#else
+    UNUSED(rnd_mode);
+#endif
 }
 
 void restore_rnd_mode() {
+#if defined(MKLDNN_X86_64)
     _mm_setcsr(mxcsr_save);
+#endif
 }
 
 void *malloc(size_t size, int alignment) {
@@ -127,13 +137,22 @@ void free(void *p) {
 #endif
 }
 
+// Atomic operations
+int32_t mkldnn_fetch_and_add(int32_t *dst, int32_t val) {
+#ifdef _WIN32
+    return InterlockedExchangeAdd(reinterpret_cast<long*>(dst), val);
+#else
+    return __sync_fetch_and_add(dst, val);
+#endif
+}
+
 static Xbyak::util::Cpu cpu_;
 
 unsigned int get_cache_size(int level, bool per_core) {
     unsigned int l = level - 1;
     // Currently, if XByak is not able to fetch the cache topology
     // we default to 32KB of L1, 512KB of L2 and 1MB of L3 per core.
-    if (cpu_.data_cache_levels == 0){
+    if (cpu_.getDataCacheLevels() == 0){
         const int L1_cache_per_core = 32000;
         const int L2_cache_per_core = 512000;
         const int L3_cache_per_core = 1024000;
@@ -145,9 +164,9 @@ unsigned int get_cache_size(int level, bool per_core) {
             default: return 0;
         }
     }
-    if (l < cpu_.data_cache_levels) {
-        return cpu_.data_cache_size[l]
-               / (per_core ? cpu_.cores_sharing_data_cache[l] : 1);
+    if (l < cpu_.getDataCacheLevels()) {
+        return cpu_.getDataCacheSize(l)
+               / (per_core ? cpu_.getCoresSharingDataCache(l) : 1);
     } else
         return 0;
 }
@@ -155,7 +174,14 @@ unsigned int get_cache_size(int level, bool per_core) {
 }
 }
 
+mkldnn_status_t mkldnn_set_jit_dump(int dump) {
+    using namespace mkldnn::impl::status;
+    if (dump < 0) return invalid_arguments;
+    mkldnn::impl::dump_jit_code = dump;
+    mkldnn::impl::initialized = true;
+    return success;
+}
+
 unsigned int mkldnn_get_cache_size(int level, int per_core) {
     return mkldnn::impl::get_cache_size(level, per_core != 0);
 }
-
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/utils.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/utils.hpp
index 01fa46783..59b8add4d 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/utils.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/utils.hpp
@@ -21,6 +21,11 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
+#include <stdint.h>
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define MKLDNN_X86_64
+#endif
 
 #define MSAN_ENABLED 0
 #if defined(__has_feature)
@@ -50,17 +55,10 @@ static_assert(sizeof(void*) == 8, "Intel(R) MKL-DNN supports 64 bit only");
 
 #define IMPLICATION(cause, effect) (!(cause) || !!(effect))
 
-#ifdef _WIN32
+#if defined(_WIN32) && !defined(__GNUC__)
 #define __PRETTY_FUNCTION__ __FUNCSIG__
 #endif
 
-#ifdef __APPLE__
-// older XCode doesn't support thread_local
-#define THREAD_LOCAL __thread
-#else
-#define THREAD_LOCAL thread_local
-#endif
-
 namespace utils {
 
 /* a bunch of std:: analogues to be compliant with any msvs version
@@ -181,6 +179,9 @@ inline typename remove_reference<T>::type rnd_dn(const T a, const U b) {
     return (a / b) * b;
 }
 
+template <typename T> T *align_ptr(T *ptr, uintptr_t alignment)
+{ return (T *)(((uintptr_t)ptr + alignment - 1) & ~(alignment - 1)); }
+
 template <typename T, typename U, typename V>
 inline U this_block_size(const T offset, const U max, const V block_size) {
     assert(offset < max);
@@ -245,6 +246,24 @@ inline T pick(size_t i, const T &x0, Args &&... args) {
     return i == 0 ? x0 : pick(i - 1, utils::forward<Args>(args)...);
 }
 
+template <typename T>
+T pick_by_prop_kind(prop_kind_t prop_kind, const T &val_fwd_inference,
+        const T &val_fwd_training, const T &val_bwd_d, const T &val_bwd_w) {
+    switch (prop_kind) {
+    case prop_kind::forward_inference: return val_fwd_inference;
+    case prop_kind::forward_training: return val_fwd_training;
+    case prop_kind::backward_data: return val_bwd_d;
+    case prop_kind::backward_weights: return val_bwd_w;
+    default: assert(!"unsupported prop_kind");
+    }
+    return T();
+}
+
+template <typename T>
+T pick_by_prop_kind(prop_kind_t prop_kind,
+        const T &val_fwd, const T &val_bwd_d, const T &val_bwd_w)
+{ return pick_by_prop_kind(prop_kind, val_fwd, val_fwd, val_bwd_d, val_bwd_w); }
+
 template <typename Telem, size_t Tdims>
 struct array_offset_calculator {
     template <typename... Targs>
@@ -287,6 +306,7 @@ private:
 
 void *malloc(size_t size, int alignment);
 void free(void *p);
+int32_t mkldnn_fetch_and_add(int32_t *dst, int32_t val);
 
 struct c_compatible {
     enum { default_alignment = 64 };
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/verbose.cpp b/inference-engine/thirdparty/mkl-dnn/src/common/verbose.cpp
index e1af6584f..f2a0e1724 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/verbose.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/verbose.cpp
@@ -15,31 +15,58 @@
 *******************************************************************************/
 
 #include <stdlib.h>
-#ifdef _WIN32
-#include <windows.h>
-#else
+#ifndef _WIN32
 #include <sys/time.h>
 #endif
 
 #include "mkldnn.h"
+#include "mkldnn_version.h"
 #include "c_types_map.hpp"
 #include "verbose.hpp"
+#include "cpu_isa_traits.hpp"
+
+/* MKL-DNN CPU ISA info */
+#define ISA_ANY "No instruction set specific optimizations"
+#define SSE42 "Intel(R) Streaming SIMD Extensions 4.2 (Intel(R) SSE4.2)"
+#define AVX "Intel(R) Advanced Vector Extensions (Intel(R) AVX)"
+#define AVX2 "Intel(R) Advanced Vector Extensions 2 (Intel(R) AVX2)"
+#define AVX512_COMMON "Intel(R) Advanced Vector Extensions 512 (Intel(R) " \
+                      "AVX-512)"
+#define AVX512_CORE "Intel(R) Advanced Vector Extensions 512 (Intel(R) " \
+                    "AVX-512) with AVX512BW, AVX512VL, and AVX512DQ extensions"
+#define AVX512_CORE_VNNI "Intel(R) AVX512-Deep Learning Boost (Intel(R) " \
+                         "AVX512-DL Boost)"
+#define AVX512_MIC "Intel(R) Advanced Vector Extensions 512 (Intel(R) " \
+                   "AVX-512) with AVX512CD, AVX512ER, and AVX512PF extensions"
+#define AVX512_MIC_4OPS "Intel(R) Advanced Vector Extensions 512 (Intel(R) " \
+                   "AVX-512) with AVX512_4FMAPS and AVX512_4VNNIW extensions"
 
 namespace mkldnn {
 namespace impl {
 
 static verbose_t verbose;
+static bool initialized;
+static bool version_printed = false;
 
 const verbose_t *mkldnn_verbose() {
 #if !defined(DISABLE_VERBOSE)
-    static int initialized = 0;
     if (!initialized) {
         const int len = 2;
         char val[len] = {0};
         if (mkldnn_getenv(val, "MKLDNN_VERBOSE", len) == 1)
             verbose.level = atoi(val);
-        initialized = 1;
+        initialized = true;
     }
+    if (!version_printed && verbose.level > 0) {
+         printf("mkldnn_verbose,info,"
+                 "Intel(R) MKL-DNN v%d.%d.%d (Git Hash %s),%s\n",
+                mkldnn_version()->major, mkldnn_version()->minor,
+                mkldnn_version()->patch, mkldnn_version()->hash,
+                get_isa_info());
+         version_printed = true;
+    }
+#else
+    verbose.level = 0;
 #endif
     return &verbose;
 }
@@ -59,12 +86,36 @@ double get_msec() {
 #endif
 }
 
+const char *get_isa_info() {
+    using namespace mkldnn::impl::cpu;
+    if (mayiuse(avx512_mic_4ops))  return AVX512_MIC_4OPS;
+    if (mayiuse(avx512_mic))       return AVX512_MIC;
+    if (mayiuse(avx512_core_vnni)) return AVX512_CORE_VNNI;
+    if (mayiuse(avx512_core))      return AVX512_CORE;
+    if (mayiuse(avx512_common))    return AVX512_COMMON;
+    if (mayiuse(avx2))             return AVX2;
+    if (mayiuse(avx))              return AVX;
+    if (mayiuse(sse42))            return SSE42;
+    return ISA_ANY;
+}
+
 }
 }
 
-mkldnn_status_t mkldnn_verbose_set(int level) {
+mkldnn_status_t mkldnn_set_verbose(int level) {
     using namespace mkldnn::impl::status;
     if (level < 0 || level > 2) return invalid_arguments;
     mkldnn::impl::verbose.level = level;
+    mkldnn::impl::initialized = true;
     return success;
 }
+
+const mkldnn_version_t *mkldnn_version() {
+    static mkldnn_version_t ver = {
+        MKLDNN_VERSION_MAJOR,
+        MKLDNN_VERSION_MINOR,
+        MKLDNN_VERSION_PATCH,
+        MKLDNN_VERSION_HASH};
+    return &ver;
+}
+
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/verbose.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/verbose.hpp
index e48e94afd..3e4381c53 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/verbose.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/verbose.hpp
@@ -31,13 +31,14 @@ struct verbose_t {
 
 const verbose_t *mkldnn_verbose();
 double get_msec();
+const char *get_isa_info();
 
 #if !defined(DISABLE_VERBOSE)
 #include <stdio.h>
 
 #define MKLDNN_VERBOSE_BUF_LEN 1024
 
-#define MKLDNN_VERBOSE_DAT_LEN 64
+#define MKLDNN_VERBOSE_DAT_LEN 128
 #define MKLDNN_VERBOSE_AUX_LEN 384
 #define MKLDNN_VERBOSE_PRB_LEN 384
 
@@ -55,6 +56,36 @@ inline void verbose_templ(char *buffer, mkldnn_primitive_kind_t prim_kind,
             mkldnn_prop_kind2str(prop_kind), data_str, aux_str, prb_str);
 }
 
+inline void format_mem_desc_str_generic(char *str, int len,
+        const memory_desc_t *md) {
+    auto ndims = md->ndims;
+    auto dims = md->dims;
+    int l = 0;
+    for (int d = 0; d < ndims - 1; ++d)
+        l += snprintf(str + l, len - l, "%tdx", dims[d]);
+    snprintf(str + l, len - l, "%td", dims[ndims - 1]);
+}
+
+// XXX: Outputs strings corresponding to memory formats used for data tensors.
+inline void format_mem_desc_str(char *str, int len, const memory_desc_t *md) {
+    auto ndims = md->ndims;
+    auto dims = md->dims;
+    if (ndims == 1)
+        snprintf(str, len, "x%td", dims[0]);
+    else if (ndims == 2)
+        snprintf(str, len, "mb%tdic%td", dims[0], dims[1]);
+    else if (ndims == 3)
+        snprintf(str, len, "mb%tdic%tdiw%td", dims[0], dims[1], dims[2]);
+    else if (ndims == 4)
+        snprintf(str, len, "mb%tdic%tdih%tdiw%td",
+                dims[0], dims[1], dims[2], dims[3]);
+    else if (ndims == 5)
+        snprintf(str, len, "mb%tdic%tdid%tdih%tdiw%td",
+                dims[0], dims[1], dims[2], dims[3], dims[4]);
+    else
+        format_mem_desc_str_generic(str, len, md);
+}
+
 template <typename pd_t> static void init_info_bnorm(pd_t *s, char *buffer) {
     DECL_DAT_AUX_PRB_STRS();
 
@@ -66,17 +97,7 @@ template <typename pd_t> static void init_info_bnorm(pd_t *s, char *buffer) {
 
     snprintf(aux_str, MKLDNN_VERBOSE_AUX_LEN, "flags:%u", s->desc()->flags);
 
-    if (s->ndims() == 5)
-    {
-        snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
-            "mb%dic%did%dih%diw%d", s->MB(), s->C(), s->D(), s->H(), s->W());
-    } else if (s->ndims() == 4) {
-        snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
-            "mb%dic%dih%diw%d", s->MB(), s->C(), s->H(), s->W());
-    } else if (s->ndims() == 2) {
-        snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
-            "mb%dic%d", s->MB(), s->C());
-    }
+    format_mem_desc_str(prb_str, MKLDNN_VERBOSE_PRB_LEN, s->src_pd()->desc());
 
     verbose_templ(buffer, s->kind(), s->name(), s->desc()->prop_kind, dat_str,
             aux_str, prb_str);
@@ -85,16 +106,16 @@ template <typename pd_t> static void init_info_bnorm(pd_t *s, char *buffer) {
 template <typename pd_t> static void init_info_conv(pd_t *s, char *buffer) {
     DECL_DAT_AUX_PRB_STRS();
 
-    auto fmt_src = (s->cdesc()->prop_kind == prop_kind::backward_data
+    auto fmt_src = (s->desc()->prop_kind == prop_kind::backward_data
             ? s->diff_src_pd() : s->src_pd())->desc()->format;
-    auto fmt_wei = (s->cdesc()->prop_kind == prop_kind::backward_weights
+    auto fmt_wei = (s->desc()->prop_kind == prop_kind::backward_weights
             ? s->diff_weights_pd(0) : s->weights_pd(0))->desc()->format;
     auto fmt_bia = s->with_bias()
-        ? (s->cdesc()->prop_kind == prop_kind::backward_weights
+        ? (s->desc()->prop_kind == prop_kind::backward_weights
                 ? s->diff_weights_pd(1) : s->weights_pd(1))->desc()->format
         : memory_format::undef;
-    auto fmt_dst = (s->cdesc()->prop_kind == prop_kind::backward_data
-            || s->cdesc()->prop_kind == prop_kind::backward_weights
+    auto fmt_dst = (s->desc()->prop_kind == prop_kind::backward_data
+            || s->desc()->prop_kind == prop_kind::backward_weights
         ? s->diff_dst_pd() : s->dst_pd())->desc()->format;
     snprintf(dat_str, MKLDNN_VERBOSE_DAT_LEN,
             "fsrc:%s fwei:%s fbia:%s fdst:%s",
@@ -102,29 +123,49 @@ template <typename pd_t> static void init_info_conv(pd_t *s, char *buffer) {
             mkldnn_fmt2str(fmt_bia), mkldnn_fmt2str(fmt_dst));
 
     snprintf(aux_str, MKLDNN_VERBOSE_AUX_LEN,
-            "alg:%s", mkldnn_alg_kind2str(s->cdesc()->alg_kind));
+            "alg:%s", mkldnn_alg_kind2str(s->desc()->alg_kind));
 
     if (s->ndims() == 5) {
-        snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
-            "mb%d_g%dic%doc%d"
-            "_id%dod%dkd%dsd%ddd%dpd%d"
-            "_ih%doh%dkh%dsh%ddh%dph%d"
-            "_iw%dow%dkw%dsw%ddw%dpw%d",
-            s->MB(), s->G(), s->IC(), s->OC(),
-            s->ID(), s->OD(), s->KD(), s->KSD(), s->KDD(), s->padFront(),
-            s->IH(), s->OH(), s->KH(), s->KSH(), s->KDH(), s->padT(),
-            s->IW(), s->OW(), s->KW(), s->KSW(), s->KDW(), s->padL());
+        if (s->with_groups())
+            snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
+                "mb%d_g%dic%doc%d"
+                "_id%dod%dkd%dsd%ddd%dpd%d"
+                "_ih%doh%dkh%dsh%ddh%dph%d"
+                "_iw%dow%dkw%dsw%ddw%dpw%d",
+                s->MB(), s->G(), s->IC(), s->OC(),
+                s->ID(), s->OD(), s->KD(), s->KSD(), s->KDD(), s->padFront(),
+                s->IH(), s->OH(), s->KH(), s->KSH(), s->KDH(), s->padT(),
+                s->IW(), s->OW(), s->KW(), s->KSW(), s->KDW(), s->padL());
+        else
+            snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
+                "mb%d_ic%doc%d"
+                "_id%dod%dkd%dsd%ddd%dpd%d"
+                "_ih%doh%dkh%dsh%ddh%dph%d"
+                "_iw%dow%dkw%dsw%ddw%dpw%d",
+                s->MB(), s->IC(), s->OC(),
+                s->ID(), s->OD(), s->KD(), s->KSD(), s->KDD(), s->padFront(),
+                s->IH(), s->OH(), s->KH(), s->KSH(), s->KDH(), s->padT(),
+                s->IW(), s->OW(), s->KW(), s->KSW(), s->KDW(), s->padL());
     } else {
-        snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
-            "mb%d_g%dic%doc%d"
-            "_ih%doh%dkh%dsh%ddh%dph%d"
-            "_iw%dow%dkw%dsw%ddw%dpw%d",
-            s->MB(), s->G(), s->IC(), s->OC(),
-            s->IH(), s->OH(), s->KH(), s->KSH(), s->KDH(), s->padT(),
-            s->IW(), s->OW(), s->KW(), s->KSW(), s->KDW(), s->padL());
+        if (s->with_groups())
+            snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
+                "mb%d_g%dic%doc%d"
+                "_ih%doh%dkh%dsh%ddh%dph%d"
+                "_iw%dow%dkw%dsw%ddw%dpw%d",
+                s->MB(), s->G(), s->IC(), s->OC(),
+                s->IH(), s->OH(), s->KH(), s->KSH(), s->KDH(), s->padT(),
+                s->IW(), s->OW(), s->KW(), s->KSW(), s->KDW(), s->padL());
+        else
+            snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
+                "mb%d_ic%doc%d"
+                "_ih%doh%dkh%dsh%ddh%dph%d"
+                "_iw%dow%dkw%dsw%ddw%dpw%d",
+                s->MB(), s->IC(), s->OC(),
+                s->IH(), s->OH(), s->KH(), s->KSH(), s->KDH(), s->padT(),
+                s->IW(), s->OW(), s->KW(), s->KSW(), s->KDW(), s->padL());
     }
 
-    verbose_templ(buffer, s->kind(), s->name(), s->cdesc()->prop_kind, dat_str,
+    verbose_templ(buffer, s->kind(), s->name(), s->desc()->prop_kind, dat_str,
             aux_str, prb_str);
 }
 
@@ -140,12 +181,7 @@ template <typename pd_t> static void init_info_shuffle(pd_t *s, char *buffer) {
     snprintf(aux_str, MKLDNN_VERBOSE_AUX_LEN, "axis:%d group_size:%d",
             s->axis(), s->group_size());
 
-    int l = 0;
-    for (int d = 0; d < md->ndims - 1; ++d)
-        l += snprintf(prb_str + l, MKLDNN_VERBOSE_PRB_LEN - l,
-                "%dx", md->dims[d]);
-    snprintf(prb_str + l, MKLDNN_VERBOSE_PRB_LEN - l,
-                "%d", md->dims[md->ndims - 1]);
+    format_mem_desc_str_generic(prb_str, MKLDNN_VERBOSE_PRB_LEN, md);
 
     verbose_templ(buffer, s->kind(), s->name(), s->desc()->prop_kind, dat_str,
             aux_str, prb_str);
@@ -163,8 +199,7 @@ template <typename pd_t> static void init_info_eltwise(pd_t *s, char *buffer) {
     snprintf(aux_str, MKLDNN_VERBOSE_AUX_LEN,
             "alg:%s", mkldnn_alg_kind2str(s->desc()->alg_kind));
 
-    snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
-            "mb%dic%dih%diw%d", s->MB(), s->C(), s->H(), s->W());
+    format_mem_desc_str(prb_str, MKLDNN_VERBOSE_PRB_LEN, s->src_pd()->desc());
 
     verbose_templ(buffer, s->kind(), s->name(), s->desc()->prop_kind, dat_str,
             aux_str, prb_str);
@@ -227,8 +262,7 @@ template <typename pd_t> static void init_info_lrn(pd_t *s, char *buffer) {
     snprintf(aux_str, MKLDNN_VERBOSE_AUX_LEN,
             "alg:%s", mkldnn_alg_kind2str(s->desc()->alg_kind));
 
-    snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
-            "mb%dic%dih%diw%d", s->MB(), s->C(), s->H(), s->W());
+    format_mem_desc_str(prb_str, MKLDNN_VERBOSE_PRB_LEN, s->src_pd()->desc());
 
     verbose_templ(buffer, s->kind(), s->name(), s->desc()->prop_kind, dat_str,
             aux_str, prb_str);
@@ -246,12 +280,7 @@ template <typename pd_t> static void init_info_mem(pd_t *s, char *buffer) {
 
     snprintf(aux_str, MKLDNN_VERBOSE_AUX_LEN, "num:%d", s->n_inputs());
 
-    int l = 0;
-    for (int d = 0; d < o_md->ndims - 1; ++d)
-        l += snprintf(prb_str + l, MKLDNN_VERBOSE_PRB_LEN - l,
-                "%dx", o_md->dims[d]);
-    snprintf(prb_str + l, MKLDNN_VERBOSE_PRB_LEN - l,
-            "%d", o_md->dims[o_md->ndims - 1]);
+    format_mem_desc_str_generic(prb_str, MKLDNN_VERBOSE_PRB_LEN, o_md);
 
     verbose_templ(buffer, s->kind(), s->name(), prop_kind::undef, dat_str,
             aux_str, prb_str);
@@ -293,15 +322,15 @@ template <typename pd_t> static void init_info_pool(pd_t *s, char *buffer) {
 template <typename pd_t> static void init_info_softmax(pd_t *s, char *buffer) {
     DECL_DAT_AUX_PRB_STRS();
 
-    auto fmt_data = (s->desc()->prop_kind == prop_kind::backward_data
-            ? s->diff_src_pd() : s->src_pd())->desc()->format;
+    auto md = (s->desc()->prop_kind == prop_kind::backward_data
+        ? s->diff_src_pd() : s->src_pd())->desc();
+    auto fmt_data = md->format;
     auto fmt_diff = s->desc()->prop_kind == prop_kind::backward_data
         ? s->diff_src_pd()->desc()->format : memory_format::undef;
     snprintf(dat_str, MKLDNN_VERBOSE_DAT_LEN, "fdata:%s fdiff:%s",
             mkldnn_fmt2str(fmt_data), mkldnn_fmt2str(fmt_diff));
 
-    snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
-            "mb%dic%dih%diw%d", s->MB(), s->C(), s->H(), s->W());
+    format_mem_desc_str(prb_str, MKLDNN_VERBOSE_PRB_LEN, md);
 
     verbose_templ(buffer, s->kind(), s->name(), s->desc()->prop_kind, dat_str,
             aux_str, prb_str);
@@ -311,15 +340,50 @@ template <typename pd_t> static void init_info_softmax(pd_t *s, char *buffer) {
 template <typename pd_t> static void init_info_rnn(pd_t *s, char *buffer) {
     DECL_DAT_AUX_PRB_STRS();
 
-    alg_kind_t alg_kind = s->desc()->cell_desc.cell_kind;
+    const mkldnn::impl::memory_desc_t *src_lay_md, *src_iter_md, *wei_lay_md,
+            *wei_iter_md, *bias_md, *dst_lay_md, *dst_iter_md;
+    if (s->desc()->prop_kind != prop_kind::backward_data) {
+        src_lay_md = s->src_pd(0)->desc();
+        src_iter_md = s->src_pd(1) ? s->src_pd(1)->desc() : nullptr;
+        wei_lay_md = s->weights_pd(0)->desc();
+        wei_iter_md = s->weights_pd(1)->desc();
+        bias_md = s->weights_pd(2)->desc();
+        dst_lay_md = s->dst_pd(0)->desc();
+        dst_iter_md = s->dst_pd(1) ? s->dst_pd(1)->desc() : nullptr;
+    } else {
+        src_lay_md = s->diff_src_pd(0)->desc();
+        src_iter_md = s->diff_src_pd(1) ? s->diff_src_pd(1)->desc() : nullptr;
+        wei_lay_md = s->diff_weights_pd(0)->desc();
+        wei_iter_md = s->diff_weights_pd(1)->desc();
+        bias_md = s->diff_weights_pd(2)->desc();
+        dst_lay_md = s->diff_dst_pd(0)->desc();
+        dst_iter_md = s->diff_dst_pd(1) ? s->diff_dst_pd(1)->desc() : nullptr;
+    }
+
+    alg_kind_t alg_kind = s->cell_kind();
+    rnn_direction_t rnn_dir = s->direction();
     snprintf(aux_str, MKLDNN_VERBOSE_AUX_LEN,
-            "alg:%s", mkldnn_alg_kind2str(alg_kind));
+            "alg:%s_%s", mkldnn_alg_kind2str(alg_kind), mkldnn_rnn_direction2str(rnn_dir));
+    snprintf(dat_str, MKLDNN_VERBOSE_DAT_LEN, "fdata:%s-%s-%s-%s fwei:%s-%s-%s ddata:%s%s-%s%s dwei:%s%s%s",
+             mkldnn_fmt2str(src_lay_md->format),
+             mkldnn_fmt2str(src_iter_md ? src_iter_md->format : memory_format::undef),
+             mkldnn_fmt2str(dst_lay_md->format),
+             mkldnn_fmt2str(dst_iter_md ? dst_iter_md->format : memory_format::undef),
+             mkldnn_fmt2str(wei_lay_md->format),
+             mkldnn_fmt2str(wei_iter_md->format),
+             mkldnn_fmt2str(bias_md->format),
+             mkldnn_dt2str(src_lay_md->data_type),
+             mkldnn_dt2str(src_iter_md ? src_iter_md->data_type : data_type::undef),
+             mkldnn_dt2str(dst_lay_md->data_type),
+             mkldnn_dt2str(dst_iter_md ? dst_iter_md->data_type : data_type::undef),
+             mkldnn_dt2str(wei_lay_md->data_type),
+             mkldnn_dt2str(wei_iter_md->data_type),
+             mkldnn_dt2str(bias_md->data_type));
 
     snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
-            "l%dd%dmb%dt%d_ic%dsc%doc%d_wi%dws%d",
-             s->L(), s->D(), s->MB(), s->T(),
-             s->SLC(), s->DIC(), s->DIC(),
-             s->SLC(), s->SIC());
+            "l%dt%dmb%dsic%dslc%ddic%ddlc%d",
+             s->L(), s->T(), s->MB(),
+             s->SIC(), s->SLC(), s->DIC(), s->DLC());
 
     verbose_templ(buffer, s->kind(), s->name(), s->desc()->prop_kind, dat_str,
             aux_str, prb_str);
@@ -343,6 +407,63 @@ template <typename pd_t> static void init_info_roi_pooling(pd_t *s, char *buffer
                   aux_str, prb_str);
 }
 
+template <typename pd_t> static void init_info_bin_conv(pd_t *s, char *buffer) {
+    DECL_DAT_AUX_PRB_STRS();
+
+    auto fmt_src = s->src_pd()->desc()->format;
+    auto fmt_wei = s->weights_pd(0)->desc()->format;
+    auto fmt_dst = s->dst_pd()->desc()->format;
+
+    snprintf(dat_str, MKLDNN_VERBOSE_DAT_LEN,
+            "fsrc:%s fwei:%s fdst:%s",
+            mkldnn_fmt2str(fmt_src), mkldnn_fmt2str(fmt_wei), mkldnn_fmt2str(fmt_dst));
+
+    snprintf(aux_str, MKLDNN_VERBOSE_AUX_LEN,
+            "alg:%s", mkldnn_alg_kind2str(s->cdesc()->alg_kind));
+
+    if (s->ndims() == 5) {
+        snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
+            "mb%d_g%dic%doc%d"
+            "_id%dod%dkd%dsd%ddd%dpd%d"
+            "_ih%doh%dkh%dsh%ddh%dph%d"
+            "_iw%dow%dkw%dsw%ddw%dpw%d",
+            s->MB(), s->G(), s->IC(), s->OC(),
+            s->ID(), s->OD(), s->KD(), s->KSD(), s->KDD(), s->padFront(),
+            s->IH(), s->OH(), s->KH(), s->KSH(), s->KDH(), s->padT(),
+            s->IW(), s->OW(), s->KW(), s->KSW(), s->KDW(), s->padL());
+    } else {
+        snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
+            "mb%d_g%dic%doc%d"
+            "_ih%doh%dkh%dsh%ddh%dph%d"
+            "_iw%dow%dkw%dsw%ddw%dpw%d",
+            s->MB(), s->G(), s->IC(), s->OC(),
+            s->IH(), s->OH(), s->KH(), s->KSH(), s->KDH(), s->padT(),
+            s->IW(), s->OW(), s->KW(), s->KSW(), s->KDW(), s->padL());
+    }
+
+    verbose_templ(buffer, s->kind(), s->name(), s->cdesc()->prop_kind, dat_str,
+            aux_str, prb_str);
+}
+
+template <typename pd_t> static void init_info_binarization(pd_t *s, char *buffer) {
+    DECL_DAT_AUX_PRB_STRS();
+
+    auto fmt_data = s->src_pd()->desc()->format;
+    auto fmt_diff = s->desc()->prop_kind == prop_kind::backward_data
+        ? s->diff_src_pd()->desc()->format : memory_format::undef;
+    snprintf(dat_str, MKLDNN_VERBOSE_DAT_LEN, "fdata:%s fdiff:%s",
+            mkldnn_fmt2str(fmt_data), mkldnn_fmt2str(fmt_diff));
+
+    snprintf(aux_str, MKLDNN_VERBOSE_AUX_LEN,
+            "alg:%s", mkldnn_alg_kind2str(s->desc()->alg_kind));
+
+    snprintf(prb_str, MKLDNN_VERBOSE_PRB_LEN,
+            "mb%dic%dih%diw%d", s->MB(), s->C(), s->H(), s->W());
+
+    verbose_templ(buffer, s->kind(), s->name(), s->desc()->prop_kind, dat_str,
+            aux_str, prb_str);
+}
+
 #else /* !defined(DISABLE_VERBOSE) */
 #define MKLDNN_VERBOSE_BUF_LEN 1
 
@@ -361,7 +482,10 @@ DEFINE_STUB(mem);
 DEFINE_STUB(pool);
 DEFINE_STUB(softmax);
 DEFINE_STUB(rnn);
+DEFINE_STUB(shuffle);
 DEFINE_STUB(roi_pooling);
+DEFINE_STUB(bin_conv);
+DEFINE_STUB(binarization);
 #undef DEFINE_STUB
 #endif /* !defined(DISABLE_VERBOSE) */
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/common/z_magic.hpp b/inference-engine/thirdparty/mkl-dnn/src/common/z_magic.hpp
index 0c7d3c525..818a7dc91 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/common/z_magic.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/common/z_magic.hpp
@@ -26,8 +26,13 @@
 #define STRINGIFy(s) #s
 #define STRINGIFY(s) STRINGIFy(s)
 
-#define PRAGMA_MACRo(x) _Pragma(#x)
-#define PRAGMA_MACRO(x) PRAGMA_MACRo(x)
+#ifdef _MSC_VER
+#   define PRAGMA_MACRo(x) __pragma(x)
+#   define PRAGMA_MACRO(x) PRAGMA_MACRo(x)
+#else
+#   define PRAGMA_MACRo(x) _Pragma(#x)
+#   define PRAGMA_MACRO(x) PRAGMA_MACRo(x)
+#endif
 
 #endif
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp
index 81b91e76c..370a92501 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_batch_normalization_utils.cpp
@@ -14,130 +14,126 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include "mkldnn_types.h"
-
 #include "c_types_map.hpp"
 #include "utils.hpp"
+
 #include "jit_generator.hpp"
-#include "cpu_batch_normalization_pd.hpp"
-#include "utils.hpp"
+#include "cpu_batch_normalization_utils.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
-
 namespace bnorm_utils {
-    void cache_balance(size_t working_set_size, int C_blks, int &C_blks_per_iter,
-            int &iters) {
-        int nthrs = mkldnn_get_max_threads();
-        int l3_size = get_cache_size(3, true) * nthrs / 2;
 
-        C_blks_per_iter = l3_size / working_set_size;
+void cache_balance(size_t working_set_size, int C_blks, int &C_blks_per_iter,
+        int &iters) {
+    int nthrs = mkldnn_get_max_threads();
+    int l3_size = get_cache_size(3, true) * nthrs / 2;
 
-        if (C_blks_per_iter == 0)
-            C_blks_per_iter = 1;
-        if (C_blks_per_iter > C_blks)
-            C_blks_per_iter = C_blks;
+    C_blks_per_iter = l3_size / working_set_size;
 
-        iters = (C_blks + C_blks_per_iter - 1) / C_blks_per_iter;
-    }
+    if (C_blks_per_iter == 0)
+        C_blks_per_iter = 1;
+    if (C_blks_per_iter > C_blks)
+        C_blks_per_iter = C_blks;
 
-    bool thread_balance(bool do_blocking, bool spatial_thr_allowed, int ithr,
-            int nthr, int N, int C_blks, int SP, int &C_ithr, int &C_nthr,
-            int &C_blk_s, int &C_blk_e, int &N_ithr, int &N_nthr, int &N_s,
-            int &N_e, int &S_ithr, int &S_nthr, int &S_s, int &S_e) {
-        if (nthr <= C_blks || !mkldnn_thr_syncable()) {
-            C_ithr = ithr; C_nthr = nthr;
-            N_ithr = 0; N_nthr = 1;
-            S_ithr = 0; S_nthr = 1;
-            N_s = 0; N_e = N; S_s = 0; S_e = SP;
-            balance211(C_blks, C_nthr, C_ithr, C_blk_s, C_blk_e);
+    iters = (C_blks + C_blks_per_iter - 1) / C_blks_per_iter;
+}
+
+bool thread_balance(bool do_blocking, bool spatial_thr_allowed, int ithr,
+        int nthr, int N, int C_blks, int SP, int &C_ithr, int &C_nthr,
+        int &C_blk_s, int &C_blk_e, int &N_ithr, int &N_nthr, int &N_s,
+        int &N_e, int &S_ithr, int &S_nthr, int &S_s, int &S_e) {
+    if (nthr <= C_blks || !mkldnn_thr_syncable()) {
+        C_ithr = ithr; C_nthr = nthr;
+        N_ithr = 0; N_nthr = 1;
+        S_ithr = 0; S_nthr = 1;
+        N_s = 0; N_e = N; S_s = 0; S_e = SP;
+        balance211(C_blks, C_nthr, C_ithr, C_blk_s, C_blk_e);
+    } else {
+        if (do_blocking) {
+            N_nthr = nstl::min(N, nthr);
+            C_nthr = nstl::min(C_blks, nthr / N_nthr);
+            S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr));
         } else {
-            if (do_blocking) {
-                N_nthr = nstl::min(N, nthr);
-                C_nthr = nstl::min(C_blks, nthr / N_nthr);
-                S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr));
-            } else {
-                C_nthr = math::gcd(nthr, C_blks);
-                N_nthr = nstl::min(N, nthr / C_nthr);
-                S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr));
-            }
-
-            if (!spatial_thr_allowed)
-                S_nthr = 1;
-
-            if (S_nthr < 1) S_nthr = 1;
-            if (ithr < C_nthr * N_nthr * S_nthr) {
-                N_ithr = (ithr / S_nthr) % N_nthr ;
-                C_ithr = ithr / (N_nthr * S_nthr);
-                S_ithr = ithr % S_nthr;
-                balance211(C_blks, C_nthr, C_ithr, C_blk_s, C_blk_e);
-                balance211(N, N_nthr, N_ithr, N_s, N_e);
-                balance211(SP, S_nthr, S_ithr, S_s, S_e);
-            } else {
-                S_ithr = N_ithr = C_ithr = -ithr;
-                S_s = S_e = N_s = N_e = C_blk_s = C_blk_e = -1;
-            }
+            C_nthr = math::gcd(nthr, C_blks);
+            N_nthr = nstl::min(N, nthr / C_nthr);
+            S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr));
         }
 
-        // spatial_thr_allowed is meant to help maintain
-        // consistent decisions about spatial threading
-        // between mutiple invocations of this routine.
-        // It is caller's responsibility to check the
-        // return value and pass it as a flag to the
-        // next call if needed.
-        if (S_nthr == 1)
-            spatial_thr_allowed = false;
+        if (!spatial_thr_allowed)
+            S_nthr = 1;
 
-        return spatial_thr_allowed;
+        if (S_nthr < 1) S_nthr = 1;
+        if (ithr < C_nthr * N_nthr * S_nthr) {
+            N_ithr = (ithr / S_nthr) % N_nthr ;
+            C_ithr = ithr / (N_nthr * S_nthr);
+            S_ithr = ithr % S_nthr;
+            balance211(C_blks, C_nthr, C_ithr, C_blk_s, C_blk_e);
+            balance211(N, N_nthr, N_ithr, N_s, N_e);
+            balance211(SP, S_nthr, S_ithr, S_s, S_e);
+        } else {
+            S_ithr = N_ithr = C_ithr = -ithr;
+            S_s = S_e = N_s = N_e = C_blk_s = C_blk_e = -1;
+        }
     }
 
-    void set_spatial_thr(const batch_normalization_pd_t *bdesc,
-        const int simd_w, const int data_size, int &is_spatial_thr) {
-        if (!mkldnn_thr_syncable()) { is_spatial_thr = 0; return; }
+    // spatial_thr_allowed is meant to help maintain
+    // consistent decisions about spatial threading
+    // between mutiple invocations of this routine.
+    // It is caller's responsibility to check the
+    // return value and pass it as a flag to the
+    // next call if needed.
+    if (S_nthr == 1)
+        spatial_thr_allowed = false;
 
-        int nthr = mkldnn_get_max_threads();
-        int SP = bdesc->W() * bdesc->D() * bdesc->H();
-        int C_PADDED = memory_desc_wrapper(bdesc->src_pd())
-            .blocking_desc().padding_dims[1];
-        assert(C_PADDED % simd_w == 0);
-
-        size_t data = bdesc->MB() * C_PADDED * SP * data_size;
-        size_t l3_size_ = get_cache_size(3, true) * nthr / 2;
-        bool do_blocking = (data >= l3_size_ / 2 && l3_size_ > 0);
-        int C_blks_per_iter{ 1 }, iters{ 1 };
-        int C_blks = C_PADDED / simd_w;
-
-        if (do_blocking) {
-            int num_tensors = bdesc->is_fwd() ? 1 : 2;
-            size_t working_set_size
-                = (bdesc->MB() * SP * simd_w * data_size) * num_tensors;
-            cache_balance(working_set_size, C_blks, C_blks_per_iter, iters);
-        }
+    return spatial_thr_allowed;
+}
 
-        // Spatial threading decision made in this function shall be consistent
-        // with thread_balance() behavior.
-        C_blks = do_blocking ? C_blks_per_iter : C_blks;
+bool is_spatial_thr(const batch_normalization_pd_t *bdesc, int simd_w,
+        int data_size) {
+    if (!mkldnn_thr_syncable()) return false;
+
+    int nthr = mkldnn_get_max_threads();
+    int SP = bdesc->W() * bdesc->D() * bdesc->H();
+    int C_PADDED = memory_desc_wrapper(bdesc->src_pd())
+        .blocking_desc().padding_dims[1];
+    assert(C_PADDED % simd_w == 0);
+
+    size_t data = bdesc->MB() * C_PADDED * SP * data_size;
+    size_t l3_size_ = get_cache_size(3, true) * nthr / 2;
+    bool do_blocking = (data >= l3_size_ / 2 && l3_size_ > 0);
+    int C_blks_per_iter{ 1 }, iters{ 1 };
+    int C_blks = C_PADDED / simd_w;
+
+    if (do_blocking) {
+        int num_tensors = bdesc->is_fwd() ? 1 : 2;
+        size_t working_set_size
+            = (bdesc->MB() * SP * simd_w * data_size) * num_tensors;
+        cache_balance(working_set_size, C_blks, C_blks_per_iter, iters);
+    }
 
-        if (nthr <= C_blks) {
-            is_spatial_thr = 0;
-        } else {
-            int S_nthr = 1;
-            if (do_blocking) {
-                int N_nthr = nstl::min(bdesc->MB(), nthr);
-                int C_nthr = nstl::min(C_blks, nthr / N_nthr);
-                S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr));
-            } else {
-                int C_nthr = math::gcd(nthr, C_blks);
-                int N_nthr = nstl::min(bdesc->MB(), nthr / C_nthr);
-                S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr));
-            }
-            if (S_nthr < 1) S_nthr = 1;
-            is_spatial_thr = (S_nthr > 1) ? 1 : 0;
-        }
+    // Spatial threading decision made in this function shall be consistent
+    // with thread_balance() behavior.
+    C_blks = do_blocking ? C_blks_per_iter : C_blks;
+
+    if (nthr <= C_blks) return false;
+
+    int S_nthr = 1;
+    if (do_blocking) {
+        int N_nthr = nstl::min(bdesc->MB(), nthr);
+        int C_nthr = nstl::min(C_blks, nthr / N_nthr);
+        S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr));
+    } else {
+        int C_nthr = math::gcd(nthr, C_blks);
+        int N_nthr = nstl::min(bdesc->MB(), nthr / C_nthr);
+        S_nthr = nstl::min(SP, nthr / (C_nthr * N_nthr));
     }
 
-};
+    return S_nthr > 1;
+}
+
+}
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_batch_normalization_utils.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_batch_normalization_utils.hpp
index 5be96fcbb..4c83515b4 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_batch_normalization_utils.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_batch_normalization_utils.hpp
@@ -17,29 +17,27 @@
 #ifndef CPU_BATCH_NORMALIZATION_UTILS_HPP
 #define CPU_BATCH_NORMALIZATION_UTILS_HPP
 
-#include "c_types_map.hpp"
-#include "cpu_batch_normalization_pd.hpp"
+#include "batch_normalization_pd.hpp"
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
-
 namespace bnorm_utils {
 
-    void cache_balance(size_t working_set_size, int C_blks, int &C_blks_per_iter,
-            int &iters);
+void cache_balance(size_t working_set_size, int C_blks, int &C_blks_per_iter,
+        int &iters);
 
-    bool thread_balance(bool do_blocking, bool spatial_thr_allowed, int ithr,
-            int nthr, int N, int C_blks, int SP, int &C_ithr, int &C_nthr,
-            int &C_blk_s, int &C_blk_e, int &N_ithr, int &N_nthr, int &N_s,
-            int &N_e, int &S_ithr, int &S_nthr, int &S_s, int &S_e);
+bool thread_balance(bool do_blocking, bool spatial_thr_allowed, int ithr,
+        int nthr, int N, int C_blks, int SP, int &C_ithr, int &C_nthr,
+        int &C_blk_s, int &C_blk_e, int &N_ithr, int &N_nthr, int &N_s,
+        int &N_e, int &S_ithr, int &S_nthr, int &S_s, int &S_e);
 
-    void set_spatial_thr(const batch_normalization_pd_t *bdesc,
-        const int simd_w, const int data_size, int &is_spatial_thr);
-
-};
+bool is_spatial_thr(const batch_normalization_pd_t *bdesc, int simd_w,
+        int data_size);
 
 }
 }
 }
+}
 
 #endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_binarization_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_binarization_pd.hpp
new file mode 100644
index 000000000..05d1059f4
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_binarization_pd.hpp
@@ -0,0 +1,86 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef CPU_BINARIZATION_PD_HPP
+#define CPU_BINARIZATION_PD_HPP
+
+#include <assert.h>
+
+#include "c_types_map.hpp"
+#include "binarization_pd.hpp"
+#include "cpu_engine.hpp"
+#include "cpu_memory.hpp"
+#include "cpu_primitive.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+struct cpu_binarization_fwd_pd_t: public binarization_fwd_pd_t {
+    using cpu_memory_pd_t = cpu_memory_t::pd_t;
+
+    cpu_binarization_fwd_pd_t(engine_t *engine, const binarization_desc_t *adesc,
+            const primitive_attr_t *attr, const binarization_fwd_pd_t *hint_fwd_pd)
+        : binarization_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+        , src_pd_(engine_, &desc_.src_desc)
+        , dst_pd_(engine_, &desc_.dst_desc)
+        , weights_pd_(engine_, &desc_.weights_desc) {}
+    virtual ~cpu_binarization_fwd_pd_t() {}
+
+    virtual const cpu_memory_pd_t *src_pd(int index = 0) const override
+    { return index == 0 ? &src_pd_ : nullptr; }
+    virtual const cpu_memory_pd_t *dst_pd(int index = 0) const override
+    { return index == 0 ? &dst_pd_ : nullptr; }
+    virtual const cpu_memory_pd_t *weights_pd(int index = 0) const override {
+        if (index == 0) return &weights_pd_;
+        return nullptr;
+    }
+
+protected:
+    cpu_memory_pd_t src_pd_, dst_pd_, weights_pd_;
+
+    inline memory_format_t src_format()
+    {
+        using namespace memory_format;
+        return utils::pick(desc_.src_desc.ndims - 3, ncw, nchw, ncdhw);
+    }
+    inline memory_format_t wei_format()
+    {
+        using namespace memory_format;
+        return x;
+    }
+
+    virtual status_t set_default_params() {
+        using namespace memory_format;
+        if (src_pd_.desc()->format == any)
+            CHECK(src_pd_.set_format(src_format()));
+        if (dst_pd_.desc()->format == any)
+            CHECK(dst_pd_.set_format(src_pd_.desc()->format));
+        if (weights_pd_.desc()->format == any)
+            CHECK(weights_pd_.set_format(wei_format()));
+        return status::success;
+    }
+
+    virtual status_t init() = 0;
+};
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_binary_convolution_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_binary_convolution_pd.hpp
new file mode 100644
index 000000000..a2474ef49
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_binary_convolution_pd.hpp
@@ -0,0 +1,91 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef CPU_BINARY_CONVOLUTION_FWD_PD_HPP
+#define CPU_BINARY_CONVOLUTION_FWD_PD_HPP
+
+#include <assert.h>
+
+#include "c_types_map.hpp"
+#include "binary_convolution_pd.hpp"
+#include "cpu_engine.hpp"
+#include "cpu_memory.hpp"
+#include "cpu_primitive.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+struct _cpu_binary_convolution_fwd_pd_t: public _binary_convolution_fwd_pd_t {
+    using cpu_memory_pd_t = cpu_memory_t::pd_t;
+
+    _cpu_binary_convolution_fwd_pd_t(engine_t *engine,
+            const typename _cpu_binary_convolution_fwd_pd_t::base_desc_t *adesc,
+            const primitive_attr_t *attr,
+            const typename _cpu_binary_convolution_fwd_pd_t::base_class *hint_fwd_pd)
+        : _binary_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+        , src_pd_(this->engine_, &this->cdesc_().src_desc)
+        , dst_pd_(this->engine_, &this->cdesc_().dst_desc)
+        , weights_pd_(this->engine_, &this->cdesc_().weights_desc) {}
+    virtual ~_cpu_binary_convolution_fwd_pd_t() {}
+
+    virtual const cpu_memory_pd_t *src_pd(int index = 0) const override
+    { return index == 0 ? &src_pd_ : nullptr; }
+    virtual const cpu_memory_pd_t *dst_pd(int index = 0) const override
+    { return index == 0 ? &dst_pd_ : nullptr; }
+    virtual const cpu_memory_pd_t *weights_pd(int index = 0) const override {
+        if (index == 0) return &weights_pd_;
+        return nullptr;
+    }
+
+protected:
+    cpu_memory_pd_t src_pd_, dst_pd_;
+    cpu_memory_pd_t weights_pd_;
+
+    inline memory_format_t src_format()
+    {
+        using namespace memory_format;
+        return utils::pick(this->cdesc_().src_desc.ndims - 3, ncw, nchw, ncdhw);
+    }
+    inline memory_format_t wei_format()
+    {
+        using namespace memory_format;
+        return this->with_groups()
+            ? utils::pick(this->cdesc_().src_desc.ndims - 3, goiw, goihw, goidhw)
+            : utils::pick(this->cdesc_().src_desc.ndims - 3, oiw, oihw, oidhw);
+    }
+
+    virtual status_t set_default_params() {
+        using namespace memory_format;
+        if (src_pd_.desc()->format == any)
+            CHECK(src_pd_.set_format(src_format()));
+        if (dst_pd_.desc()->format == any)
+            CHECK(dst_pd_.set_format(src_pd_.desc()->format));
+        if (weights_pd_.desc()->format == any)
+            CHECK(weights_pd_.set_format(wei_format()));
+        return status::success;
+    }
+};
+
+using cpu_binary_convolution_fwd_pd_t = _cpu_binary_convolution_fwd_pd_t;
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_concat.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_concat.hpp
index 477566bae..edfb26495 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_concat.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_concat.hpp
@@ -55,7 +55,7 @@ namespace cpu {
         } \
         return ret; \
     } \
-    virtual pd_t *clone() const override { return nullptr; } \
+    virtual pd_t *clone() const override { return new pd_t(*this); } \
     virtual const char *name() const override { return impl_name; }
 #define DECLARE_CPU_CONCAT_PD_T(impl_name, ...) \
     DECLARE_CPU_CONCAT_PD_t(impl_name, __VA_ARGS__)
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_convolution_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_convolution_pd.hpp
index 1db3f4aa8..f50287a93 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_convolution_pd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_convolution_pd.hpp
@@ -31,20 +31,19 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu>
-struct _cpu_convolution_fwd_pd_t: public _convolution_fwd_pd_t<with_relu> {
+struct cpu_convolution_fwd_pd_t: public convolution_fwd_pd_t {
     using cpu_memory_pd_t = cpu_memory_t::pd_t;
 
-    _cpu_convolution_fwd_pd_t(engine_t *engine,
-            const typename _cpu_convolution_fwd_pd_t::base_desc_t *adesc,
+    cpu_convolution_fwd_pd_t(engine_t *engine,
+            const convolution_desc_t *adesc,
             const primitive_attr_t *attr,
-            const typename _cpu_convolution_fwd_pd_t::base_class *hint_fwd_pd)
-        : _convolution_fwd_pd_t<with_relu>(engine, adesc, attr, hint_fwd_pd)
-        , src_pd_(this->engine_, &this->cdesc_().src_desc)
-        , dst_pd_(this->engine_, &this->cdesc_().dst_desc)
-        , weights_pd_(this->engine_, &this->cdesc_().weights_desc)
-        , bias_pd_(this->engine_, &this->cdesc_().bias_desc) {}
-    virtual ~_cpu_convolution_fwd_pd_t() {}
+            const typename cpu_convolution_fwd_pd_t::base_class *hint_fwd_pd)
+        : convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+        , src_pd_(this->engine_, &this->desc()->src_desc)
+        , dst_pd_(this->engine_, &this->desc()->dst_desc)
+        , weights_pd_(this->engine_, &this->desc()->weights_desc)
+        , bias_pd_(this->engine_, &this->desc()->bias_desc) {}
+    virtual ~cpu_convolution_fwd_pd_t() {}
 
     virtual const cpu_memory_pd_t *src_pd(int index = 0) const override
     { return index == 0 ? &src_pd_ : nullptr; }
@@ -56,13 +55,26 @@ struct _cpu_convolution_fwd_pd_t: public _convolution_fwd_pd_t<with_relu> {
         return nullptr;
     }
 
-    bool want_padded_bias() const {
-        if (!this->with_bias()) return false;
+    bool has_padded_dst() const {
         memory_desc_wrapper dst_d(&dst_pd_);
         if (!dst_d.is_blocking_desc()) return false;
         return this->OC() != dst_d.blocking_desc().padding_dims[1];
     }
 
+    bool wants_padded_bias() const {
+        if (!this->with_bias()) return false;
+        return has_padded_dst();
+    }
+
+    bool wants_zero_pad_dst(bool jit_impl = true) const {
+        if (!has_padded_dst()) return false;
+        const auto &po = this->attr()->post_ops_;
+        int idx;
+        if ((idx = po.find(primitive_kind::eltwise)) == -1) return false;
+        return !math::eltwise_fwd_preserves_zero(po.entry_[idx].eltwise.alg,
+                jit_impl);
+    }
+
 protected:
     cpu_memory_pd_t src_pd_, dst_pd_;
     cpu_memory_pd_t weights_pd_, bias_pd_;
@@ -70,14 +82,14 @@ protected:
     inline memory_format_t src_format()
     {
         using namespace memory_format;
-        return utils::pick(this->cdesc_().src_desc.ndims - 3, ncw, nchw, ncdhw);
+        return utils::pick(this->desc()->src_desc.ndims - 3, ncw, nchw, ncdhw);
     }
     inline memory_format_t wei_format()
     {
         using namespace memory_format;
         return this->with_groups()
-            ? utils::pick(this->cdesc_().src_desc.ndims - 3, goiw, goihw, goidhw)
-            : utils::pick(this->cdesc_().src_desc.ndims - 3, oiw, oihw, oidhw);
+            ? utils::pick(this->desc()->src_desc.ndims - 3, goiw, goihw, goidhw)
+            : utils::pick(this->desc()->src_desc.ndims - 3, oiw, oihw, oidhw);
     }
 
     virtual status_t set_default_params() {
@@ -90,13 +102,12 @@ protected:
             CHECK(weights_pd_.set_format(wei_format()));
         if (bias_pd_.desc()->format == any)
             CHECK(bias_pd_.set_format(x));
+        if (this->desc()->alg_kind == alg_kind::convolution_auto)
+            CHECK(this->set_alg_kind(alg_kind::convolution_direct));
         return status::success;
     }
 };
 
-using cpu_convolution_fwd_pd_t = _cpu_convolution_fwd_pd_t<false>;
-using cpu_convolution_relu_fwd_pd_t = _cpu_convolution_fwd_pd_t<true>;
-
 struct cpu_convolution_bwd_data_pd_t: public convolution_bwd_data_pd_t {
     using cpu_memory_pd_t = cpu_memory_t::pd_t;
 
@@ -148,6 +159,8 @@ protected:
            CHECK(weights_pd_.set_format(wei_format()));
         if (bias_pd_.desc()->format == any)
             CHECK(bias_pd_.set_format(x));
+        if (this->desc()->alg_kind == alg_kind::convolution_auto)
+            CHECK(this->set_alg_kind(alg_kind::convolution_direct));
         return status::success;
     }
 };
@@ -177,7 +190,7 @@ struct cpu_convolution_bwd_weights_pd_t: public convolution_bwd_weights_pd_t {
             return  nullptr;
         }
 
-    bool want_padded_bias() const {
+    bool wants_padded_bias() const {
         if (!this->with_bias()) return false;
         memory_desc_wrapper diff_dst_d(&diff_dst_pd_);
         if (!diff_dst_d.is_blocking_desc()) return false;
@@ -212,6 +225,8 @@ protected:
             CHECK(diff_weights_pd_.set_format(wei_format()));
         if (diff_bias_pd_.desc()->format == any)
             CHECK(diff_bias_pd_.set_format(x));
+        if (this->desc()->alg_kind == alg_kind::convolution_auto)
+            CHECK(this->set_alg_kind(alg_kind::convolution_direct));
         return status::success;
     }
 };
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_deconvolution_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_deconvolution_pd.hpp
index cd9cdfede..d236c23c7 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_deconvolution_pd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_deconvolution_pd.hpp
@@ -28,6 +28,38 @@
 #include "type_helpers.hpp"
 #include "utils.hpp"
 
+#define DECLARE_DECONVOLUTION_PD_t(...)                                        \
+    virtual pd_t *clone() const override { return new pd_t(*this); }           \
+    virtual status_t create_primitive(primitive_t **primitive,                 \
+            const primitive_at_t *inputs, const primitive_t **outputs)         \
+            const override {                                                   \
+        double ms = get_msec();                                                \
+        using namespace prop_kind;                                             \
+        primitive_t::input_vector ins(inputs, inputs + this->n_inputs());      \
+        primitive_t::output_vector outs(outputs, outputs + this->n_outputs()); \
+        auto ret = safe_ptr_assign<primitive_t>(                               \
+                *primitive, new (__VA_ARGS__)(this, ins, outs));               \
+        primitive_t *conv_primitive;                                           \
+        if (this->desc()->prop_kind == backward_weights) {                     \
+            primitive_at_t conv_inputs[2];                                     \
+            conv_inputs[0] = inputs[1];                                        \
+            conv_inputs[1] = inputs[0];                                        \
+            conv_pd_->create_primitive(                                        \
+                    (&conv_primitive), conv_inputs, outputs);                  \
+        } else                                                                 \
+            conv_pd_->create_primitive((&conv_primitive), inputs, outputs);    \
+        ((__VA_ARGS__ *)(*primitive))->conv_p_ = conv_primitive;               \
+        ms = get_msec() - ms;                                                  \
+        if (mkldnn_verbose()->level >= 2) {                                    \
+            printf("mkldnn_verbose,create,%s,%g\n", this->info(), ms);         \
+            fflush(0);                                                         \
+        }                                                                      \
+        return ret;                                                            \
+    }                                                                          \
+    virtual const char *name() const override { return conv_pd_->name(); }
+
+#define DECLARE_DECONVOLUTION_PD_T(...) DECLARE_DECONVOLUTION_PD_t(__VA_ARGS__)
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_engine.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_engine.cpp
index 104ce8886..738725d7f 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_engine.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_engine.cpp
@@ -24,7 +24,7 @@
 #include "cpu_concat.hpp"
 #include "cpu_sum.hpp"
 
-#include "cpu/ref_rnn.hpp"
+#include "cpu/rnn/ref_rnn.hpp"
 
 #include "cpu/jit_avx512_core_x8s8s32x_1x1_convolution.hpp"
 #include "cpu/jit_avx512_common_1x1_convolution.hpp"
@@ -39,14 +39,15 @@
 #include "cpu/gemm_convolution.hpp"
 #include "cpu/gemm_x8s8s32x_convolution.hpp"
 #include "cpu/ref_convolution.hpp"
-#include "cpu/jit_avx512_core_u8s8s32x_deconvolution.hpp"
+#include "cpu/jit_avx512_core_x8s8s32x_deconvolution.hpp"
+#include "cpu/jit_avx512_core_x8s8s32x_1x1_deconvolution.hpp"
 #include "cpu/ref_deconvolution.hpp"
 #include "cpu/ref_shuffle.hpp"
 #include "cpu/jit_uni_eltwise.hpp"
 #include "cpu/ref_eltwise.hpp"
 #include "cpu/ref_softmax.hpp"
 #include "cpu/jit_uni_pooling.hpp"
-#include "cpu/jit_avx512_core_i8i8_pooling.hpp"
+#include "cpu/jit_uni_i8i8_pooling.hpp"
 #include "cpu/ref_pooling.hpp"
 #include "cpu/nchw_pooling.hpp"
 #include "cpu/nhwc_pooling.hpp"
@@ -59,7 +60,7 @@
 #include "cpu/nspc_batch_normalization.hpp"
 #include "cpu/ref_inner_product.hpp"
 #include "cpu/gemm_inner_product.hpp"
-#include "cpu/gemm_u8s8s32x_inner_product.hpp"
+#include "cpu/gemm_x8s8s32x_inner_product.hpp"
 #include "cpu/jit_uni_dw_convolution.hpp"
 #include "cpu/jit_avx512_core_u8s8s32x_wino_convolution.hpp"
 #include "cpu/jit_avx512_core_fp32_wino_conv_2x3.hpp"
@@ -69,9 +70,13 @@
 #include "cpu/jit_uni_depthwise.hpp"
 #include "cpu/ref_depthwise.hpp"
 #include "cpu/jit_uni_x8s8s32x_convolution.hpp"
-#include "cpu/jit_uni_x8s8s32x_1x1_convolution.hpp"
 #include "cpu/jit_uni_x8s8s32x_dw_convolution.hpp"
-#include "cpu/jit_uni_i8i8_pooling.hpp"
+#include "cpu/jit_sse42_i8i8_pooling.hpp"
+#include "cpu/jit_uni_planar_convolution.hpp"
+#include "cpu/jit_uni_binary_convolution.hpp"
+#include "cpu/ref_binary_convolution.hpp"
+#include "cpu/jit_uni_binarization.hpp"
+#include "cpu/ref_binarization.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -105,9 +110,11 @@ using namespace mkldnn::impl::data_type;
 #define INSTANCE(...) &primitive_desc_t::create<__VA_ARGS__::pd_t>
 static const pd_create_f cpu_impl_list[] = {
     /* RNN */
-    INSTANCE(ref_rnn_fwd_t),
-    INSTANCE(ref_rnn_bwd_t),
+    INSTANCE(ref_rnn_fwd_f32_t),
+    INSTANCE(ref_rnn_fwd_u8s8_t),
+    INSTANCE(ref_rnn_bwd_f32_t),
     /* conv */
+    INSTANCE(jit_avx512_common_planar_convolution_fwd_t),
     INSTANCE(jit_avx512_common_dw_convolution_fwd_t),
     INSTANCE(jit_avx512_common_dw_convolution_bwd_data_t),
     INSTANCE(jit_avx512_common_dw_convolution_bwd_weights_t),
@@ -126,6 +133,7 @@ static const pd_create_f cpu_impl_list[] = {
     INSTANCE(jit_avx512_common_convolution_fwd_t<f32>),
     INSTANCE(jit_avx512_common_convolution_bwd_data_t<f32>),
     INSTANCE(jit_avx512_common_convolution_bwd_weights_t<f32>),
+    INSTANCE(jit_avx2_planar_convolution_fwd_t),
     INSTANCE(jit_avx2_dw_convolution_fwd_t),
     INSTANCE(jit_avx2_dw_convolution_bwd_data_t),
     INSTANCE(jit_avx2_dw_convolution_bwd_weights_t),
@@ -194,14 +202,14 @@ static const pd_create_f cpu_impl_list[] = {
     INSTANCE(jit_sse42_x8s8s32x_convolution_fwd_t<s8,s32>),
     INSTANCE(jit_sse42_x8s8s32x_convolution_fwd_t<s8,u8>),
     INSTANCE(jit_sse42_x8s8s32x_convolution_fwd_t<s8,s8>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<false, u8, s32>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<false, u8, u8>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<false, u8, s8>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<false, u8, f32>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<false, s8, s32>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<false, s8, u8>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<false, s8, s8>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<false, s8, f32>),
+    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<u8, s32>),
+    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<u8, u8>),
+    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<u8, s8>),
+    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<u8, f32>),
+    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<s8, s32>),
+    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<s8, u8>),
+    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<s8, s8>),
+    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<s8, f32>),
     INSTANCE(_gemm_u8s8s32x_convolution_bwd_data_t<s32>),
     INSTANCE(_gemm_u8s8s32x_convolution_bwd_data_t<u8>),
     INSTANCE(_gemm_u8s8s32x_convolution_bwd_data_t<s8>),
@@ -218,10 +226,22 @@ static const pd_create_f cpu_impl_list[] = {
     INSTANCE(ref_convolution_bwd_data_t<u8, s8, u8, s32>),
     INSTANCE(ref_convolution_bwd_weights_t<s16, s32, s16, s32>),
     /* deconv */
-    INSTANCE(_jit_avx512_core_u8s8s32x_deconvolution_fwd_t<s32>),
-    INSTANCE(_jit_avx512_core_u8s8s32x_deconvolution_fwd_t<u8>),
-    INSTANCE(_jit_avx512_core_u8s8s32x_deconvolution_fwd_t<s8>),
-    INSTANCE(_jit_avx512_core_u8s8s32x_deconvolution_fwd_t<f32>),
+    INSTANCE(jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t<u8,f32>),
+    INSTANCE(jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t<u8,s32>),
+    INSTANCE(jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t<u8,u8>),
+    INSTANCE(jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t<u8,s8>),
+    INSTANCE(jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t<s8,f32>),
+    INSTANCE(jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t<s8,s32>),
+    INSTANCE(jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t<s8,u8>),
+    INSTANCE(jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t<s8,s8>),
+    INSTANCE(_jit_avx512_core_x8s8s32x_deconvolution_fwd_t<u8,s32>),
+    INSTANCE(_jit_avx512_core_x8s8s32x_deconvolution_fwd_t<u8,u8>),
+    INSTANCE(_jit_avx512_core_x8s8s32x_deconvolution_fwd_t<u8,s8>),
+    INSTANCE(_jit_avx512_core_x8s8s32x_deconvolution_fwd_t<u8,f32>),
+    INSTANCE(_jit_avx512_core_x8s8s32x_deconvolution_fwd_t<s8,s32>),
+    INSTANCE(_jit_avx512_core_x8s8s32x_deconvolution_fwd_t<s8,u8>),
+    INSTANCE(_jit_avx512_core_x8s8s32x_deconvolution_fwd_t<s8,s8>),
+    INSTANCE(_jit_avx512_core_x8s8s32x_deconvolution_fwd_t<s8,f32>),
     INSTANCE(ref_deconvolution_bwd_weights_t),
     INSTANCE(ref_deconvolution_bwd_data_t),
     INSTANCE(ref_deconvolution_fwd_t),
@@ -269,9 +289,9 @@ static const pd_create_f cpu_impl_list[] = {
     INSTANCE(ref_pooling_fwd_t<f32>),
     INSTANCE(ref_pooling_bwd_t<f32>),
     /* pool (int) */
-    INSTANCE(jit_avx512_core_i8i8_pooling_fwd_t),
+    INSTANCE(jit_uni_i8i8_pooling_fwd_t<avx512_core>),
     INSTANCE(jit_uni_i8i8_pooling_fwd_t<avx2>),
-    INSTANCE(jit_uni_i8i8_pooling_fwd_t<sse42>),
+    INSTANCE(jit_sse42_i8i8_pooling_fwd_t),
     INSTANCE(ref_pooling_fwd_t<s32>),
     INSTANCE(ref_pooling_fwd_t<s16, s32>),
     INSTANCE(ref_pooling_fwd_t<s8, s32>),
@@ -307,69 +327,35 @@ static const pd_create_f cpu_impl_list[] = {
     INSTANCE(ref_inner_product_bwd_data_t<f32, f32, f32, f32>),
     INSTANCE(ref_inner_product_bwd_weights_t<f32>),
     /* inner product (int) */
-    INSTANCE(gemm_u8s8s32x_inner_product_fwd_t<u8>),
-    INSTANCE(gemm_u8s8s32x_inner_product_fwd_t<s8>),
-    INSTANCE(gemm_u8s8s32x_inner_product_fwd_t<s32>),
-    INSTANCE(gemm_u8s8s32x_inner_product_fwd_t<f32>),
+    INSTANCE(gemm_x8s8s32x_inner_product_fwd_t<u8, u8>),
+    INSTANCE(gemm_x8s8s32x_inner_product_fwd_t<u8, s8>),
+    INSTANCE(gemm_x8s8s32x_inner_product_fwd_t<u8, s32>),
+    INSTANCE(gemm_x8s8s32x_inner_product_fwd_t<u8, f32>),
+    INSTANCE(gemm_x8s8s32x_inner_product_fwd_t<s8, u8>),
+    INSTANCE(gemm_x8s8s32x_inner_product_fwd_t<s8, s8>),
+    INSTANCE(gemm_x8s8s32x_inner_product_fwd_t<s8, s32>),
+    INSTANCE(gemm_x8s8s32x_inner_product_fwd_t<s8, f32>),
     INSTANCE(ref_inner_product_fwd_t<u8, s8, u8, s32>),
     INSTANCE(ref_inner_product_fwd_t<u8, s8, s8, s32>),
     INSTANCE(ref_inner_product_fwd_t<u8, s8, s32, s32>),
     INSTANCE(ref_inner_product_fwd_t<u8, s8, f32, s32>),
     INSTANCE(ref_inner_product_fwd_t<s16, s16, s32, s32>),
     INSTANCE(ref_inner_product_bwd_data_t<s32, s16, s16, s32>),
-    /* conv_eltwise */
-    INSTANCE(jit_avx512_common_dw_convolution_relu_t),
-    INSTANCE(jit_avx512_common_convolution_winograd_relu_t),
-    INSTANCE(jit_avx512_common_1x1_convolution_relu_f32_t),
-    INSTANCE(jit_avx512_common_convolution_relu_t<f32>),
-    INSTANCE(jit_avx2_dw_convolution_relu_t),
-    INSTANCE(jit_avx2_1x1_convolution_relu_t),
-    INSTANCE(jit_sse42_dw_convolution_relu_t),
-    INSTANCE(jit_sse42_1x1_convolution_relu_t),
-    INSTANCE(jit_avx2_convolution_relu_t),
-    INSTANCE(jit_sse42_convolution_relu_t),
-    INSTANCE(gemm_convolution_relu_t),
-    INSTANCE(ref_convolution_relu_t<f32>),
-    /* conv_eltwise (int) */
-    INSTANCE(jit_avx512_core_u8s8s32x_wino_convolution_relu_t<f32>),
-    INSTANCE(jit_avx512_core_u8s8s32x_wino_convolution_relu_t<s32>),
-    INSTANCE(jit_avx512_core_u8s8s32x_wino_convolution_relu_t<s8>),
-    INSTANCE(jit_avx512_core_u8s8s32x_wino_convolution_relu_t<u8>),
-    INSTANCE(jit_avx512_common_1x1_convolution_relu_s16s16s32_t),
-    INSTANCE(jit_avx512_common_convolution_relu_t<s16, s16, s32>),
-    INSTANCE(jit_avx512_core_x8s8s32x_1x1_convolution_relu_t<u8,f32>),
-    INSTANCE(jit_avx512_core_x8s8s32x_1x1_convolution_relu_t<u8,s32>),
-    INSTANCE(jit_avx512_core_x8s8s32x_1x1_convolution_relu_t<u8,s8>),
-    INSTANCE(jit_avx512_core_x8s8s32x_1x1_convolution_relu_t<u8,u8>),
-    INSTANCE(jit_avx512_core_x8s8s32x_1x1_convolution_relu_t<s8,f32>),
-    INSTANCE(jit_avx512_core_x8s8s32x_1x1_convolution_relu_t<s8,s32>),
-    INSTANCE(jit_avx512_core_x8s8s32x_1x1_convolution_relu_t<s8,s8>),
-    INSTANCE(jit_avx512_core_x8s8s32x_1x1_convolution_relu_t<s8,u8>),
-    INSTANCE(jit_avx512_core_x8s8s32x_convolution_relu_t<u8,f32>),
-    INSTANCE(jit_avx512_core_x8s8s32x_convolution_relu_t<u8,s32>),
-    INSTANCE(jit_avx512_core_x8s8s32x_convolution_relu_t<u8,u8>),
-    INSTANCE(jit_avx512_core_x8s8s32x_convolution_relu_t<u8,s8>),
-    INSTANCE(jit_avx512_core_x8s8s32x_convolution_relu_t<s8,f32>),
-    INSTANCE(jit_avx512_core_x8s8s32x_convolution_relu_t<s8,s32>),
-    INSTANCE(jit_avx512_core_x8s8s32x_convolution_relu_t<s8,u8>),
-    INSTANCE(jit_avx512_core_x8s8s32x_convolution_relu_t<s8,s8>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<true, u8, s32>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<true, u8, u8>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<true, u8, s8>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<true, u8, f32>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<true, s8, s32>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<true, s8, u8>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<true, s8, s8>),
-    INSTANCE(_gemm_x8s8s32x_convolution_fwd_t<true, s8, f32>),
-    INSTANCE(ref_convolution_relu_t<s16, s16, s32, s32>),
-    INSTANCE(ref_convolution_relu_t<u8, s8, s32, s32>),
-    INSTANCE(ref_convolution_relu_t<u8, s8, s8, s32>),
-    INSTANCE(ref_convolution_relu_t<u8, s8, u8, s32>),
     /* roi pooling */
     INSTANCE(jit_uni_roi_pooling_fwd_t<avx512_common>),
     INSTANCE(jit_uni_roi_pooling_fwd_t<avx2>),
     INSTANCE(jit_uni_roi_pooling_fwd_t<sse42>),
     INSTANCE(ref_roi_pooling_fwd_t<data_type::f32>),
+    /* binary convolution */
+//    INSTANCE(jit_uni_binary_convolution_fwd_t<avx512_common>),
+    INSTANCE(jit_uni_binary_convolution_fwd_t<avx2>),
+    INSTANCE(jit_uni_binary_convolution_fwd_t<sse42>),
+    INSTANCE(ref_binary_convolution_fwd_t),
+    /* binarization */
+    INSTANCE(jit_uni_binarization_fwd_t<avx512_common>),
+    INSTANCE(jit_uni_binarization_fwd_t<avx2>),
+    INSTANCE(jit_uni_binarization_fwd_t<sse42>),
+    INSTANCE(ref_binarization_fwd_t<f32>),
     /* eol */
     nullptr,
 };
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_memory.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_memory.cpp
index 4bbff2223..e1c2dd66b 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_memory.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_memory.cpp
@@ -63,6 +63,7 @@ typed_zero_pad_data(
 
 template <data_type_t dt, memory_format_t fmt>
 typename utils::enable_if<false
+|| format_traits<fmt>::blk_fmt == bf::_4o
 || format_traits<fmt>::blk_fmt == bf::_8o
 || format_traits<fmt>::blk_fmt == bf::_16o
 >::type typed_zero_pad_weights(const memory_desc_wrapper &m_d,
@@ -234,10 +235,10 @@ void typed_zero_pad_generic_blocked(const memory_desc_wrapper &m_d,
     assert(step_dim >= 0 && "no zero padding is required");
     if (step_dim < 0) return;
 
-    parallel_nd(nelems, [&](ptrdiff_t e) {
+    parallel_nd(nelems / step, [&](ptrdiff_t e1) {
         bool need_zero = false;
 
-        ptrdiff_t idx = e / step;
+        ptrdiff_t idx = e1;
         for (int d = step_dim; d >= 0; --d) {
             if (idx % pdims[d] >= dims[d]) {
                 need_zero = true;
@@ -248,14 +249,14 @@ void typed_zero_pad_generic_blocked(const memory_desc_wrapper &m_d,
 
         if (need_zero) {
             for (ptrdiff_t e0 = 0; e0 < step; ++e0)
-                data[m_d.off_l(e + e0, true)] = 0;
+                data[m_d.off_l(e1 * step + e0, true)] = 0;
         }
     });
 }
 
 template <data_type_t dt>
-status_t cpu_memory_t::typed_zero_pad() {
-    const memory_desc_wrapper mpd(&conf_);
+status_t cpu_memory_t::typed_zero_pad() const {
+    const memory_desc_wrapper mpd(pd());
 
     // FIXME: guard this check for non-blocked layout
     if (mpd.nelems(false) == mpd.nelems(true))
@@ -267,9 +268,12 @@ status_t cpu_memory_t::typed_zero_pad() {
     /* data */
 #   define MAYBE_DATA(f) if (fmt == f) \
     { typed_zero_pad_data<dt, f>(mpd, data); return success; }
+    MAYBE_DATA(nCw4c);
     MAYBE_DATA(nCw8c);
     MAYBE_DATA(nCw16c);
+    MAYBE_DATA(nChw4c);
     MAYBE_DATA(nChw8c);
+    MAYBE_DATA(nCdhw4c);
     MAYBE_DATA(nCdhw8c);
     MAYBE_DATA(nChw16c);
     MAYBE_DATA(nCdhw16c);
@@ -277,10 +281,12 @@ status_t cpu_memory_t::typed_zero_pad() {
     /* weights */
 #   define MAYBE_WEIGHTS(f) if (fmt == f) \
     { typed_zero_pad_weights<dt, f>(mpd, data); return success; }
+    MAYBE_WEIGHTS(OIdhw4i4o);
     MAYBE_WEIGHTS(OIdhw8i8o);
     MAYBE_WEIGHTS(OIdhw8o8i);
     MAYBE_WEIGHTS(OIdhw16i16o);
     MAYBE_WEIGHTS(OIdhw16o16i);
+    MAYBE_WEIGHTS(Oidhw4o);
     MAYBE_WEIGHTS(Oidhw16o);
     MAYBE_WEIGHTS(Odhwi16o);
     MAYBE_WEIGHTS(Odhwi8o);
@@ -288,15 +294,18 @@ status_t cpu_memory_t::typed_zero_pad() {
     MAYBE_WEIGHTS(oIhw16i);
     MAYBE_WEIGHTS(oIdhw8i);
     MAYBE_WEIGHTS(oIdhw16i);
+    MAYBE_WEIGHTS(OIhw4i4o);
     MAYBE_WEIGHTS(OIhw8i8o);
     MAYBE_WEIGHTS(OIhw16i16o);
     MAYBE_WEIGHTS(OIhw4i16o4i);
     MAYBE_WEIGHTS(OIhw4i16o4i_s8s8);
+    MAYBE_WEIGHTS(OIw4i4o);
     MAYBE_WEIGHTS(Owi8o);
     MAYBE_WEIGHTS(OIw8i8o);
     MAYBE_WEIGHTS(OIw8o8i);
     MAYBE_WEIGHTS(OIw16i16o);
     MAYBE_WEIGHTS(OIw16o16i);
+    MAYBE_WEIGHTS(Oiw4o);
     MAYBE_WEIGHTS(Oiw16o);
     MAYBE_WEIGHTS(Owi16o);
     MAYBE_WEIGHTS(OIw8i16o2i);
@@ -308,18 +317,27 @@ status_t cpu_memory_t::typed_zero_pad() {
     MAYBE_WEIGHTS(OIhw8o8i);
     MAYBE_WEIGHTS(OIhw16o16i);
     MAYBE_WEIGHTS(IOhw16o16i);
+    MAYBE_WEIGHTS(Oihw4o);
     MAYBE_WEIGHTS(Oihw16o);
     MAYBE_WEIGHTS(Ohwi8o);
+    MAYBE_WEIGHTS(Ohwi4o);
     MAYBE_WEIGHTS(Ohwi16o);
+    MAYBE_WEIGHTS(gOIhw4o4i_s8s8);
+    MAYBE_WEIGHTS(gOIhw4o4i_s8s8);
+    MAYBE_WEIGHTS(gOIhw4i4o);
     MAYBE_WEIGHTS(gOIhw8i8o);
     MAYBE_WEIGHTS(gOIhw16i16o);
     MAYBE_WEIGHTS(gOIhw4i16o4i);
     MAYBE_WEIGHTS(gOIhw4i16o4i_s8s8);
+    MAYBE_WEIGHTS(gOIhw2i8o4i);
+    MAYBE_WEIGHTS(gOIhw2i8o4i_s8s8);
+    MAYBE_WEIGHTS(gOIw4i4o);
     MAYBE_WEIGHTS(gOwi8o);
     MAYBE_WEIGHTS(gOIw8i8o);
     MAYBE_WEIGHTS(gOIw8o8i);
     MAYBE_WEIGHTS(gOIw16i16o);
     MAYBE_WEIGHTS(gOIw16o16i);
+    MAYBE_WEIGHTS(gOiw4o);
     MAYBE_WEIGHTS(gOiw16o);
     MAYBE_WEIGHTS(gOwi16o);
     MAYBE_WEIGHTS(gOIw8i16o2i);
@@ -331,13 +349,17 @@ status_t cpu_memory_t::typed_zero_pad() {
     MAYBE_WEIGHTS(gOIhw8o8i);
     MAYBE_WEIGHTS(gOIhw16o16i);
     MAYBE_WEIGHTS(gIOhw16o16i);
+    MAYBE_WEIGHTS(gOihw4o);
     MAYBE_WEIGHTS(gOihw16o);
     MAYBE_WEIGHTS(gOhwi8o);
+    MAYBE_WEIGHTS(gOhwi4o);
     MAYBE_WEIGHTS(gOhwi16o);
+    MAYBE_WEIGHTS(gOIdhw4i4o);
     MAYBE_WEIGHTS(gOIdhw8i8o);
     MAYBE_WEIGHTS(gOIdhw8o8i);
     MAYBE_WEIGHTS(gOIdhw16i16o);
     MAYBE_WEIGHTS(gOIdhw16o16i);
+    MAYBE_WEIGHTS(gOidhw4o);
     MAYBE_WEIGHTS(gOidhw16o);
     MAYBE_WEIGHTS(gOdhwi16o);
     MAYBE_WEIGHTS(gOdhwi8o);
@@ -354,8 +376,8 @@ status_t cpu_memory_t::typed_zero_pad() {
     return unimplemented;
 }
 
-status_t cpu_memory_t::zero_pad() {
-    memory_desc_wrapper md(&conf_);
+status_t cpu_memory_t::zero_pad() const {
+    memory_desc_wrapper md(pd());
     const bool skip_zeroing = false
         || data_ == nullptr
         || md.is_zero()
@@ -368,6 +390,7 @@ status_t cpu_memory_t::zero_pad() {
         case s16: return typed_zero_pad<s16>();
         case s8: return typed_zero_pad<s8>();
         case u8: return typed_zero_pad<u8>();
+        case bin: return typed_zero_pad<u8>();
         default: assert(!"memory is undefined"); return unimplemented;
     }
     return unimplemented;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_memory.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_memory.hpp
index 9932e7b8e..830adcc63 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_memory.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_memory.hpp
@@ -49,12 +49,12 @@ struct cpu_memory_t: public cpu_primitive_t {
         }
     };
 
-    cpu_memory_t(const pd_t *mpd)
-        : cpu_primitive_t(&conf_, input_vector(), output_vector(1, this))
-        , conf_(*mpd), data_(nullptr) {}
+    cpu_memory_t(const pd_t *apd)
+        : cpu_primitive_t(apd, input_vector(), output_vector(1, this))
+        , data_(nullptr) {}
     virtual ~cpu_memory_t() {}
 
-    virtual void execute(mkldnn::impl::event_t *e)
+    virtual void execute(mkldnn::impl::event_t *e) const
     { e->set_state(event_t::ready); }
 
     virtual status_t get_data_handle(void **handle) const {
@@ -71,13 +71,14 @@ struct cpu_memory_t: public cpu_primitive_t {
     virtual const char* const_memory(size_t output_index = 0) const
     { assert(output_index == 0); return data_; }
 
+    mkldnn::impl::status_t zero_pad() const;
+
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     char *data_;
 
     template <mkldnn::impl::data_type_t>
-    mkldnn::impl::status_t typed_zero_pad();
-    mkldnn::impl::status_t zero_pad();
+    mkldnn::impl::status_t typed_zero_pad() const;
 };
 
 struct cpu_view_t: public cpu_primitive_t {
@@ -168,12 +169,12 @@ struct cpu_view_t: public cpu_primitive_t {
             : view_pd_t(src_pd.engine()), src_pd_(src_pd), dst_pd_(dst_pd) {}
     };
 
-    cpu_view_t(const pd_t *conf, const input_vector &inputs)
-        : cpu_primitive_t(&conf_, inputs, output_vector(1, this)), conf_(*conf)
+    cpu_view_t(const pd_t *apd, const input_vector &inputs)
+        : cpu_primitive_t(apd, inputs, output_vector(1, this))
     {}
     virtual ~cpu_view_t() {}
 
-    virtual void execute(mkldnn::impl::event_t *e)
+    virtual void execute(mkldnn::impl::event_t *e) const
     { e->set_state(event_t::ready); }
 
     virtual char *memory(size_t output_index = 0) const
@@ -182,7 +183,7 @@ struct cpu_view_t: public cpu_primitive_t {
     { assert(output_index == 0); return input_memory(); }
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_relu_forward_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_primitive.cpp
index 92e447c05..80e06e74b 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_relu_forward_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_primitive.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2018 Intel Corporation
+* Copyright 2018 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -14,27 +14,20 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include "mkldnn_test_common.hpp"
-#include "gtest/gtest.h"
+#include "cpu_primitive.hpp"
+#include "cpu_memory.hpp"
 
-#include "mkldnn.hpp"
-#include "test_convolution_relu_forward_common.hpp"
 namespace mkldnn {
+namespace impl {
+namespace cpu {
 
-using convolution_test = convolution_relu_test<float, float, float, float>;
-
-TEST_P(convolution_test, TestConvolution)
+const cpu_memory_t *cpu_primitive_t::output_memory_primitive(size_t index) const
 {
+    return static_cast<const cpu_memory_t *>(outputs()[index]);
 }
 
-#define FP32
-#define DIRECTION_FORWARD
-#include "convolution_common.h"
-
-#undef ELTWISE_ALPHA
-#define ELTWISE_ALPHA 0.2f
 
-#undef ELTWISE_BETA
-#define ELTWISE_BETA 0.0f
-#include "convolution_common.h"
 }
+}
+}
+
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_primitive.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_primitive.hpp
index 136aa26f4..13aa0785b 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_primitive.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_primitive.hpp
@@ -21,18 +21,33 @@
 
 #include "c_types_map.hpp"
 #include "event.hpp"
+#include "memory_tracking.hpp"
 #include "primitive.hpp"
+#include "scratchpad.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
+struct cpu_memory_t;
+
 struct cpu_primitive_t: public primitive_t {
     cpu_primitive_t(const primitive_desc_t *pd, const input_vector &inputs,
-            const output_vector &outputs)
-        : primitive_t(pd, inputs, outputs)
-    {}
-    virtual ~cpu_primitive_t() {}
+            const output_vector &outputs, bool use_global_scratchpad = false)
+        : primitive_t(pd, inputs, outputs), scratchpad_buffer_(nullptr)
+        , global_scratchpad_(nullptr)
+    {
+        size_t scratchpad_size = this->pd()->scratchpad_registry().size();
+        if (use_global_scratchpad)
+            global_scratchpad_ = create_scratchpad(scratchpad_size);
+        else
+            scratchpad_buffer_ = malloc(scratchpad_size, 64);
+    }
+
+    virtual ~cpu_primitive_t() {
+        delete global_scratchpad_;
+        free(scratchpad_buffer_);
+    }
 
     virtual char *memory(size_t output_index = 0) const {
         if (output_index >= this->outputs().size()) return nullptr;
@@ -54,6 +69,19 @@ struct cpu_primitive_t: public primitive_t {
                 this->inputs()[index].primitive);
         return p->const_memory(oi);
     }
+
+    const cpu_memory_t *output_memory_primitive(size_t index = 0) const;
+
+protected:
+    memory_tracking::grantor_t scratchpad() const {
+        return pd()->scratchpad_registry().grantor(global_scratchpad_
+                ? global_scratchpad_->get() : scratchpad_buffer_);
+    }
+
+private:
+    /* quite ugly, but luckily both will get away in v1.0 */
+    void *scratchpad_buffer_;
+    scratchpad_t *global_scratchpad_;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reducer.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reducer.cpp
index 116c4a8ba..1d41ac5ce 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reducer.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reducer.cpp
@@ -27,6 +27,8 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
+using namespace memory_tracking::names;
+
 void reduce_balancer_t::balance() {
     using namespace nstl;
     using namespace utils;
@@ -277,90 +279,88 @@ inline reducer_2d_driver_t<data_type> *create_reduce_2d_drv(int n_src,
 /* cpu_reducer_t */
 
 template <impl::data_type_t data_type>
-cpu_reducer_t<data_type>::cpu_reducer_t(const reduce_balancer_t &balancer)
-    : balancer_(balancer), workspace_(nullptr)
-    , drv_(nullptr), barriers_(nullptr)
-{
-    allocate_workspace();
-    if (balancer_.nthr_per_group_ > 1) {
-        barriers_ = (simple_barrier::ctx_t *)malloc(
-                balancer_.ngroups_ * sizeof(simple_barrier::ctx_t), 64);
-        for (int i = 0; i < balancer_.ngroups_; ++i)
-            simple_barrier::ctx_init(&barriers_[i]);
-        drv_ = create_reduce_2d_drv<data_type>(balancer_.nthr_per_group_ - 1,
-                ws_per_thread(), 0, 0, false);
-    }
-}
+void cpu_reducer_t<data_type>::conf_t::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad) const {
+    if (balancer_.nthr_per_group_ == 1) return;
 
-template <impl::data_type_t data_type>
-cpu_reducer_t<data_type>::~cpu_reducer_t() {
-    deallocate_workspace();
-    free(barriers_);
-    delete drv_;
+    const size_t space_size = balancer_.ngroups_
+        * (balancer_.nthr_per_group_ - 1)
+        * cpu_reducer_t<data_type>::space_per_thread(balancer_);
+    scratchpad.book(key_reducer_space, sizeof(data_t) * space_size, PAGE_4K);
+    scratchpad.book(key_reducer_space_bctx,
+            sizeof(simple_barrier::ctx_t) * balancer_.ngroups_);
 }
 
 template <impl::data_type_t data_type>
-void cpu_reducer_t<data_type>::allocate_workspace() {
-    if (balancer_.nthr_per_group_ == 1) return;
+cpu_reducer_t<data_type>::cpu_reducer_t(const conf_t &conf)
+    : conf_(conf), drv_(nullptr)
+{
+    if (balancer().nthr_per_group_ == 1) return;
 
-    const size_t ws_size = balancer_.ngroups_ * (balancer_.nthr_per_group_ - 1)
-        * ws_per_thread();
-    workspace_ = (data_t *)malloc(ws_size * sizeof(data_t), PAGE_4K);
+    drv_ = create_reduce_2d_drv<data_type>(balancer().nthr_per_group_ - 1,
+            space_per_thread(balancer()), 0, 0, false);
 }
 
 template <impl::data_type_t data_type>
+cpu_reducer_t<data_type>::~cpu_reducer_t() { delete drv_; }
+
+template <impl::data_type_t data_type>
 typename cpu_reducer_t<data_type>::data_t *
-cpu_reducer_t<data_type>::get_local_ptr(int ithr, data_t *dst) {
-    const int id_in_grp = balancer_.id_in_group(ithr);
+cpu_reducer_t<data_type>::get_local_ptr(int ithr, data_t *dst,
+        const memory_tracking::grantor_t &scratchpad) const {
+    const int id_in_grp = balancer().id_in_group(ithr);
 
     /* threads 0 from each group writes directly to the destination */
     if (id_in_grp == 0)
-        return dst + balancer_.ithr_job_off(ithr) * balancer_.job_size_;
+        return dst + balancer().ithr_job_off(ithr) * balancer().job_size_;
 
-    const int grp_id = balancer_.group_id(ithr);
-    const int offset_factor = grp_id * (balancer_.nthr_per_group_ - 1)
+    const int grp_id = balancer().group_id(ithr);
+    const int offset_factor = grp_id * (balancer().nthr_per_group_ - 1)
         + (id_in_grp - 1);
-    return workspace_ + offset_factor * ws_per_thread();
+
+    auto space = scratchpad.template get<data_t>(key_reducer_space);
+    return space + offset_factor * space_per_thread(balancer());
 }
 
 template <impl::data_type_t data_type>
-void cpu_reducer_t<data_type>::reduce_nolock(int ithr, data_t *dst) {
-    bool redundant_reduction = balancer_.nthr_per_group_ == 1
-        || balancer_.idle(ithr);
+void cpu_reducer_t<data_type>::reduce_nolock(int ithr, data_t *dst,
+        const memory_tracking::grantor_t &scratchpad) const {
+    bool redundant_reduction = balancer().nthr_per_group_ == 1
+        || balancer().idle(ithr);
     if (redundant_reduction) return;
 
 #ifdef SIMPLE_IMPL
-    if (balancer_.id_in_group(ithr) != 0)
+    if (balancer().id_in_group(ithr) != 0)
         return; /* only threads 0 do the reduction */
 
-    const int njobs_in_grp = balancer_.ithr_njobs(ithr);
-    data_t *d = get_local_ptr(ithr, dst);
+    const int njobs_in_grp = balancer().ithr_njobs(ithr);
+    data_t *d = get_local_ptr(ithr, dst, scratchpad);
     for (int id_in_grp = 1; id_in_grp < balancer_.nthr_per_group_; ++id_in_grp)
     {
-        const data_t *wspace = get_local_ptr(ithr + id_in_grp, dst);
-        for (size_t i = 0; i < (size_t)njobs_in_grp * balancer_.job_size_; ++i)
-            d[i] += wspace[i];
+        const data_t *space = get_local_ptr(ithr + id_in_grp, dst, scratchpad);
+        for (size_t i = 0; i < (size_t)njobs_in_grp * balancer().job_size_; ++i)
+            d[i] += space[i];
     }
 #else
     using namespace utils;
 
-    const int id_in_grp = balancer_.id_in_group(ithr);
-    const int njobs_in_grp = balancer_.ithr_njobs(ithr);
+    const int id_in_grp = balancer().id_in_group(ithr);
+    const int njobs_in_grp = balancer().ithr_njobs(ithr);
     const size_t cl = 64 / sizeof(data_t);
 
-    const size_t reduction_size = njobs_in_grp * balancer_.job_size_;
+    const size_t reduction_size = njobs_in_grp * balancer().job_size_;
     size_t start{0}, end{0};
-    balance211(div_up(reduction_size, cl), balancer_.nthr_per_group_,
+    balance211(div_up(reduction_size, cl), balancer().nthr_per_group_,
             id_in_grp, start, end);
 
     if (start == end) return;
 
-    data_t *d = get_local_ptr(ithr - id_in_grp, dst) + start * cl;
-    const data_t *wspace = get_local_ptr(ithr - id_in_grp + 1, dst)
+    data_t *d = get_local_ptr(ithr - id_in_grp, dst, scratchpad) + start * cl;
+    const data_t *space = get_local_ptr(ithr - id_in_grp + 1, dst, scratchpad)
         + start * cl;
     const size_t len = nstl::min(end * cl, reduction_size) - start * cl;
 
-    (*drv_)(d, wspace, 1, len);
+    (*drv_)(d, space, 1, len);
 #endif
 }
 
@@ -370,69 +370,48 @@ template struct cpu_reducer_t<data_type::s32>;
 /* cpu_reducer_2d_t */
 
 template <impl::data_type_t data_type>
-cpu_reducer_2d_t<data_type>::cpu_reducer_2d_t(
-        const reduce_balancer_t &balancer,
-        int job_size_x, int job_size_y, int x_block,
-        int dst_x, int dst_y, bool master_uses_dst)
-    : balancer_(balancer), master_uses_dst_(master_uses_dst)
-    , job_size_x_(job_size_x), job_size_y_(job_size_y), x_block_(x_block)
-    , dst_x_(dst_x), dst_y_(dst_y), workspace_(nullptr), drv_(nullptr)
-    , barriers_(nullptr)
-{
-    allocate_workspace();
-    if (balancer_.nthr_per_group_ > 1) {
-        barriers_ = (simple_barrier::ctx_t *)malloc(
-                balancer_.ngroups_ * sizeof(simple_barrier::ctx_t), 64);
-        for (int i = 0; i < balancer_.ngroups_; ++i)
-            simple_barrier::ctx_init(&barriers_[i]);
-        const int n_src = balancer_.nthr_per_group_ - master_uses_dst_;
-        drv_ = create_reduce_2d_drv<data_type>(n_src, ws_per_thread(),
-                job_size_x_, dst_x_, !master_uses_dst_);
-    }
-}
+void cpu_reducer_2d_t<data_type>::conf_t::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad) const {
+    if (balancer_.nthr_per_group_ == 1) return;
 
-template <impl::data_type_t data_type>
-cpu_reducer_2d_t<data_type>::~cpu_reducer_2d_t() {
-    deallocate_workspace();
-    free(barriers_);
-    delete drv_;
+    const size_t space_size = balancer_.ngroups_ * balancer_.nthr_per_group_
+        * cpu_reducer_2d_t<data_type>::space_per_thread(balancer_);
+    scratchpad.book(key_reducer_space, sizeof(data_t) * space_size);
+    scratchpad.book(key_reducer_space_bctx,
+            sizeof(simple_barrier::ctx_t) * balancer_.ngroups_);
 }
 
 template <impl::data_type_t data_type>
-void cpu_reducer_2d_t<data_type>::allocate_workspace() {
-    if (balancer_.nthr_per_group_ == 1) return;
+cpu_reducer_2d_t<data_type>::cpu_reducer_2d_t(const conf_t &conf)
+    : conf_(conf), drv_(nullptr)
+{
+    if (balancer().nthr_per_group_ == 1) return;
 
-    const size_t ws_size = balancer_.ngroups_
-        * (balancer_.nthr_per_group_ - master_uses_dst_)
-        * ws_per_thread();
-    workspace_ = (data_t *)malloc(ws_size * sizeof(data_t), 64);
+    drv_ = create_reduce_2d_drv<data_type>(balancer().nthr_per_group_,
+            space_per_thread(balancer()), conf_.job_size_x_, conf_.dst_x_,
+            true);
 }
 
 template <impl::data_type_t data_type>
-typename cpu_reducer_2d_t<data_type>::data_t *
-cpu_reducer_2d_t<data_type>::get_local_ptr(int ithr, data_t *dst) {
-    const int id_in_grp = balancer_.id_in_group(ithr);
-
-    /* master threads from each group should write directly to the destination
-     * if they are allowed to use it */
-    if (master_uses_dst_ && id_in_grp == 0) {
-        assert(!"unsupported");
-        return dst + balancer_.ithr_job_off(ithr) * balancer_.job_size_;
-    }
+cpu_reducer_2d_t<data_type>::~cpu_reducer_2d_t() { delete drv_; }
 
-    const int grp_id = balancer_.group_id(ithr);
-    const int offset_factor
-        = grp_id * (balancer_.nthr_per_group_ - master_uses_dst_)
-        + (id_in_grp - master_uses_dst_);
-    return workspace_ + offset_factor * ws_per_thread();
+template <impl::data_type_t data_type>
+typename cpu_reducer_2d_t<data_type>::data_t *cpu_reducer_2d_t<data_type>::
+get_local_ptr(int ithr, const memory_tracking::grantor_t &scratchpad) const {
+    const int id_in_grp = balancer().id_in_group(ithr);
+    const int grp_id = balancer().group_id(ithr);
+    const int offset_factor = grp_id * balancer().nthr_per_group_ + id_in_grp;
+    auto space = scratchpad.template get<data_t>(key_reducer_space);
+    return space + offset_factor * space_per_thread(balancer());
 }
 
 template <impl::data_type_t data_type>
 int cpu_reducer_2d_t<data_type>::choose_x_blocking(int nx, int ny,
-                                                    int nthr_per_grp) {
+        int nthr_per_grp) const {
     // find x_blocking for better balance reducing work between threads
-    assert(x_block_ > 0 && nx > x_block_ && nx % x_block_ == 0);
-    int x_blocking = nx / x_block_;
+    assert(conf_.x_block_ > 0 && nx > conf_.x_block_
+            && nx % conf_.x_block_ == 0);
+    int x_blocking = nx / conf_.x_block_;
     int min_x_blocking =
             utils::div_up(x_blocking, nstl::max(1, nthr_per_grp / ny));
     while (true) {
@@ -444,48 +423,49 @@ int cpu_reducer_2d_t<data_type>::choose_x_blocking(int nx, int ny,
             break;
     }
     if (x_blocking >= min_x_blocking * 4) x_blocking = 1;
-    x_blocking *= x_block_;
+    x_blocking *= conf_.x_block_;
     return x_blocking;
 }
 
 template <impl::data_type_t data_type>
-void cpu_reducer_2d_t<data_type>::reduce_block(const data_t* wspace_base,
-            data_t *dst, int job, int start_y, int start_x,
-            int ny_start, int nx_start, int ny_step, int nx_step) {
-    data_t *d = dst + (start_y + ny_start) * dst_x_
+void cpu_reducer_2d_t<data_type>::reduce_block(const data_t* space_base,
+        data_t *dst, int job, int start_y, int start_x,
+        int ny_start, int nx_start, int ny_step, int nx_step) const {
+    data_t *d = dst + (start_y + ny_start) * conf_.dst_x_
                     + start_x + nx_start;
-    const data_t *wspace = wspace_base + job * balancer_.job_size_
-                            + ny_start * job_size_x_ + nx_start;
+    const data_t *space = space_base + job * balancer().job_size_
+                            + ny_start * conf_.job_size_x_ + nx_start;
 #ifdef SIMPLE_IMPL
-    const int idg_start = master_uses_dst_ ? 1 : 0;
-    for (int idg = idg_start; idg < balancer_.nthr_per_group_; ++idg) {
-        const data_t *w = &wspace[(idg - idg_start) * ws_per_thread()];
+    for (int idg = 0; idg < balancer().nthr_per_group_; ++idg) {
+        const data_t *w = &space[idg * space_per_thread(balancer())];
         for (int y = 0; y < ny_step; ++y)
             for (int x = 0; x < nx_step; ++x) {
-                d[y * dst_x_ + x] = (idg == 0 ? 0 : d[y * dst_x_ + x])
-                    + w[y * job_size_x_ + x];
+                d[y * conf_.dst_x_ + x]
+                    = (idg == 0 ? 0 : d[y * conf_.dst_x_ + x])
+                    + w[y * conf_.job_size_x_ + x];
             }
     }
 #else
-    (*drv_)(d, wspace, ny_step, nx_step);
+    (*drv_)(d, space, ny_step, nx_step);
 #endif
 }
 
 template <impl::data_type_t data_type>
-void cpu_reducer_2d_t<data_type>::reduce_nolock(int ithr, data_t *dst) {
-    bool redundant_reduction = balancer_.nthr_per_group_ == 1
-        || balancer_.idle(ithr);
+void cpu_reducer_2d_t<data_type>::reduce_nolock(int ithr, data_t *dst,
+        const memory_tracking::grantor_t &scratchpad) const {
+    bool redundant_reduction = balancer().nthr_per_group_ == 1
+        || balancer().idle(ithr);
     if (redundant_reduction) return;
 
-    const int id_in_grp = balancer_.id_in_group(ithr);
-    const int njobs_in_grp = balancer_.ithr_njobs(ithr);
-    const int njobs_x = utils::div_up(dst_x_, job_size_x_);
-    const int global_job_start = balancer_.ithr_job_off(ithr);
+    const int id_in_grp = balancer().id_in_group(ithr);
+    const int njobs_in_grp = balancer().ithr_njobs(ithr);
+    const int njobs_x = utils::div_up(conf_.dst_x_, conf_.job_size_x_);
+    const int global_job_start = balancer().ithr_job_off(ithr);
 
-    const data_t *wspace_base = get_local_ptr(ithr - id_in_grp, nullptr);
+    const data_t *space_base = get_local_ptr(ithr - id_in_grp, scratchpad);
 
-    const int pr_grps = nstl::min(njobs_in_grp, balancer_.nthr_per_group_);
-    const int pr_nthr_per_grp = balancer_.nthr_per_group_ / pr_grps;
+    const int pr_grps = nstl::min(njobs_in_grp, balancer().nthr_per_group_);
+    const int pr_nthr_per_grp = balancer().nthr_per_group_ / pr_grps;
 
     if (id_in_grp >= pr_grps * pr_nthr_per_grp)
         return; /* idle */
@@ -500,10 +480,10 @@ void cpu_reducer_2d_t<data_type>::reduce_nolock(int ithr, data_t *dst) {
         const int global_job = global_job_start + j;
         const int j_y = global_job / njobs_x;
         const int j_x = global_job % njobs_x;
-        const int start_y = j_y * job_size_y_;
-        const int start_x = j_x * job_size_x_;
-        const int ny = nstl::min(dst_y_ - start_y, job_size_y_);
-        const int nx = nstl::min(dst_x_ - start_x, job_size_x_);
+        const int start_y = j_y * conf_.job_size_y_;
+        const int start_x = j_x * conf_.job_size_x_;
+        const int ny = nstl::min(conf_.dst_y_ - start_y, conf_.job_size_y_);
+        const int nx = nstl::min(conf_.dst_x_ - start_x, conf_.job_size_x_);
         int x_blocking = choose_x_blocking(nx, ny, pr_nthr_per_grp);
 
         int nxy_start{0}, nxy_end{0};
@@ -516,18 +496,18 @@ void cpu_reducer_2d_t<data_type>::reduce_nolock(int ithr, data_t *dst) {
         int nxy = nxy_start;
         if (nxy % nx != 0) {
             int nx_step = nstl::min(nx - nxy % nx, nxy_end - nxy);
-            reduce_block(wspace_base, dst, j, start_y, start_x,
+            reduce_block(space_base, dst, j, start_y, start_x,
                         nxy / nx, nxy % nx, 1, nx_step);
             nxy += nx_step;
         }
         if ((nxy_end - nxy) > nx) {
             int ny_step = (nxy_end - nxy) / nx;
-            reduce_block(wspace_base, dst, j, start_y, start_x,
+            reduce_block(space_base, dst, j, start_y, start_x,
                         nxy / nx, nxy % nx, ny_step, nx);
             nxy += nx * ny_step;
         }
         if ((nxy_end - nxy) > 0) {
-            reduce_block(wspace_base, dst, j, start_y, start_x,
+            reduce_block(space_base, dst, j, start_y, start_x,
                         nxy / nx, nxy % nx, 1, nxy_end - nxy);
         }
     }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reducer.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reducer.hpp
index 6c364193e..27f5939cd 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reducer.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reducer.hpp
@@ -20,6 +20,7 @@
 #include <assert.h>
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
 #include "mkldnn_thread.hpp"
 #include "mkldnn_types.h"
 #include "nstl.hpp"
@@ -63,12 +64,23 @@ namespace cpu {
  * Intel(R) TBB) the # of thread per group is enforced to be 1.
  */
 struct reduce_balancer_t {
+    reduce_balancer_t() { init(1, 1, 1, 1, 0); } /* trivial balance */
     reduce_balancer_t(int nthr, int job_size, int njobs, int reduction_size,
             size_t max_buffer_size)
-        : syncable_(mkldnn_thr_syncable()), nthr_(nthr), job_size_(job_size)
-        , njobs_(njobs), reduction_size_(reduction_size)
-        , max_buffer_size_(max_buffer_size)
-    { balance(); }
+    { init(nthr, job_size, njobs, reduction_size, max_buffer_size); }
+
+    reduce_balancer_t &init(int nthr, int job_size, int njobs,
+            int reduction_size, size_t max_buffer_size)
+    {
+        syncable_ = mkldnn_thr_syncable();
+        nthr_ = nthr;
+        job_size_ = job_size;
+        njobs_ = njobs;
+        reduction_size_ = reduction_size;
+        max_buffer_size_ = max_buffer_size;
+        balance();
+        return *this;
+    }
 
     bool syncable_;
     int nthr_;
@@ -154,14 +166,29 @@ template <impl::data_type_t data_type>
 struct cpu_reducer_t {
     typedef typename prec_traits<data_type>::type data_t;
 
-    cpu_reducer_t(const reduce_balancer_t &balancer);
+    struct conf_t {
+        conf_t() = default;
+        conf_t &init(const reduce_balancer_t &balancer)
+        { balancer_ = balancer; return *this; }
+
+        void init_scratchpad(memory_tracking::registrar_t &scratchpad) const;
+
+        reduce_balancer_t balancer_;
+    };
+
+    cpu_reducer_t(const conf_t &conf);
     ~cpu_reducer_t();
 
-    /** allocates internal buffer for partial computations. */
-    void allocate_workspace();
+    /** initializes reducer.
+     * Must be called from a single thread prior to actual usage */
+    void init(const memory_tracking::grantor_t &scratchpad) const {
+        if (balancer().nthr_per_group_ == 1) return;
 
-    /** deallocates internal buffer. */
-    void deallocate_workspace() { if (workspace_) free(workspace_); }
+        auto bctx = scratchpad.template get<simple_barrier::ctx_t>(
+                memory_tracking::names::key_reducer_space_bctx);
+        for (int i = 0; i < balancer().ngroups_; ++i)
+            simple_barrier::ctx_init(&bctx[i]);
+    }
 
     /** for given thread returns the pointer where to put partial results.
      * Reduction destination @p dst must be provided as well (master threads
@@ -172,86 +199,118 @@ struct cpu_reducer_t {
      *        threads should start writing from the very beginning of returned
      *        address.
      */
-    data_t *get_local_ptr(int ithr, data_t *dst);
+    data_t *get_local_ptr(int ithr, data_t *dst,
+            const memory_tracking::grantor_t &scratchpad) const;
 
     /** performs the reduction with built-in synchronization. */
-    void reduce(int ithr, data_t *dst) {
-        bool redundant_reduction = balancer_.nthr_per_group_ == 1
-            || balancer_.idle(ithr);
+    void reduce(int ithr, data_t *dst,
+            const memory_tracking::grantor_t &scratchpad) const {
+        bool redundant_reduction = balancer().nthr_per_group_ == 1
+            || balancer().idle(ithr);
         if (redundant_reduction) return;
 
-        simple_barrier::barrier(&barriers_[balancer_.group_id(ithr)],
-                balancer_.nthr_per_group_);
-        reduce_nolock(ithr, dst);
+        auto bctx = scratchpad.template get<simple_barrier::ctx_t>(
+                memory_tracking::names::key_reducer_space_bctx);
+        simple_barrier::barrier(&bctx[balancer().group_id(ithr)],
+                balancer().nthr_per_group_);
+
+        reduce_nolock(ithr, dst, scratchpad);
     }
 
-    reduce_balancer_t balancer_;
+    const reduce_balancer_t &balancer() const { return conf_.balancer_; }
 
 private:
-    size_t ws_per_thread() const
-    { return balancer_.njobs_per_group_ub_ * balancer_.job_size_; }
+    static size_t space_per_thread(const reduce_balancer_t &balancer)
+    { return balancer.njobs_per_group_ub_ * balancer.job_size_; }
+
+    /* The scratchpad is organized as follows:
+     *
+     * data_t space[nthr_][njobs_per_group_ub_][jobs_size_];
+     * simple_barrier::ctx_t barriers[groups_]; */
 
-    data_t *workspace_; /** data_t[nthr_][njobs_per_group_ub_][jobs_size_] */
+    const conf_t conf_;
     reducer_2d_driver_t<data_type> *drv_;
-    simple_barrier::ctx_t *barriers_; /** barrier::ctx_t[groups_] */
 
-    void reduce_nolock(int ithr, data_t *dst);
+    void reduce_nolock(int ithr, data_t *dst,
+            const memory_tracking::grantor_t &scratchpad) const;
 };
 
 template <impl::data_type_t data_type>
 struct cpu_reducer_2d_t {
     typedef typename prec_traits<data_type>::type data_t;
 
-    cpu_reducer_2d_t(const reduce_balancer_t &balancer, int job_size_x,
-            int job_size_y, int x_block, int dst_x, int dst_y,
-            bool master_uses_dst);
+    struct conf_t {
+        conf_t() = default;
+        conf_t &init(const reduce_balancer_t &balancer, int job_size_x,
+                int job_size_y, int x_block, int dst_x, int dst_y) {
+            balancer_ = balancer;
+            job_size_x_ = job_size_x;
+            job_size_y_ = job_size_y;
+            x_block_ = x_block;
+            dst_x_ = dst_x;
+            dst_y_ = dst_y;
+            return *this;
+        }
+
+        void init_scratchpad(memory_tracking::registrar_t &scratchpad) const;
+
+        reduce_balancer_t balancer_;
+        int job_size_x_, job_size_y_, x_block_, dst_x_, dst_y_;
+    };
+
+    cpu_reducer_2d_t(const conf_t &conf);
     ~cpu_reducer_2d_t();
 
-    /** allocates internal buffer for partial computations. */
-    void allocate_workspace();
+    /** initializes reducer.
+     * Must be called from a single thread prior to actual usage */
+    void init(const memory_tracking::grantor_t &scratchpad) const {
+        if (balancer().nthr_per_group_ == 1) return;
 
-    /** deallocates internal buffer. */
-    void deallocate_workspace() { if (workspace_) free(workspace_); }
+        auto bctx = scratchpad.template get<simple_barrier::ctx_t>(
+                memory_tracking::names::key_reducer_space_bctx);
+        for (int i = 0; i < balancer().ngroups_; ++i)
+            simple_barrier::ctx_init(&bctx[i]);
+    }
 
-    /** for given thread returns the pointer where to put partial results.
-     * Depending on @p master_uses_dst_ returned pointer for master threads
-     * would be either equal to the destination memory or to the workspace (in
-     * contrast, cpu_reducer_t struct always use destination memory for master
-     * threads).
-     *
-     * @note: @p master_uses_dst_ == #false is unimplemented at the moment
-     */
-    data_t *get_local_ptr(int ithr, data_t *dst);
+    /** for given thread returns the pointer where to put partial results */
+    data_t *get_local_ptr(int ithr,
+            const memory_tracking::grantor_t &scratchpad) const;
 
     /** performs the reduction with built-in synchronization. */
-    void reduce(int ithr, data_t *dst) {
-        bool redundant_reduction = balancer_.nthr_per_group_ == 1
-            || balancer_.idle(ithr);
+    void reduce(int ithr, data_t *dst,
+            const memory_tracking::grantor_t &scratchpad) const {
+        bool redundant_reduction = balancer().nthr_per_group_ == 1
+            || balancer().idle(ithr);
         if (redundant_reduction) return;
 
-        simple_barrier::barrier(&barriers_[balancer_.group_id(ithr)],
-                balancer_.nthr_per_group_);
-        reduce_nolock(ithr, dst);
+        auto bctx = scratchpad.template get<simple_barrier::ctx_t>(
+                memory_tracking::names::key_reducer_space_bctx);
+        simple_barrier::barrier(&bctx[balancer().group_id(ithr)],
+                balancer().nthr_per_group_);
+
+        reduce_nolock(ithr, dst, scratchpad);
     }
 
-    reduce_balancer_t balancer_;
-    bool master_uses_dst_;
+    const reduce_balancer_t &balancer() const { return conf_.balancer_; }
 
 private:
-    int job_size_x_, job_size_y_, x_block_, dst_x_, dst_y_;
+    static size_t space_per_thread(const reduce_balancer_t &balancer)
+    { return balancer.njobs_per_group_ub_ * balancer.job_size_; }
 
-    size_t ws_per_thread() const
-    { return balancer_.njobs_per_group_ub_ * balancer_.job_size_; }
+    /* The scratchpad is organized as follows:
+     *
+     * data_t space[nthr_][njobs_per_group_ub_][jobs_size_];
+     * simple_barrier::ctx_t barriers[groups_]; */
 
-    data_t *workspace_; /** data_t[nthr_][njobs_per_group_ub_][jobs_size_] */
+    const conf_t conf_;
     reducer_2d_driver_t<data_type> *drv_;
-    simple_barrier::ctx_t *barriers_; /** barrier::ctx_t[groups_] */
 
-    int choose_x_blocking(int nx, int ny, int nthr_per_grp);
-    void reduce_block(const data_t* wspace_base,
-            data_t *dst, int job, int start_y, int start_x,
-            int ny_start, int nx_start, int ny_step, int nx_step);
-    void reduce_nolock(int ithr, data_t *dst);
+    int choose_x_blocking(int nx, int ny, int nthr_per_grp) const;
+    void reduce_block(const data_t* space_base, data_t *dst,
+            int job, int start_y, int start_x,
+            int ny_start, int nx_start, int ny_step, int nx_step) const;
+    void reduce_nolock(int ithr, data_t *dst,
+            const memory_tracking::grantor_t &scratchpad) const;
 };
 
 /** simple 1d accumulator: y[:] += x[:] */
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reorder.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reorder.cpp
index eee668b79..30208473e 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reorder.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reorder.cpp
@@ -23,6 +23,7 @@
 #include "cpu/jit_uni_reorder.hpp"
 #include "cpu/simple_reorder.hpp"
 #include "cpu/wino_reorder.hpp"
+#include "cpu/rnn/rnn_reorders.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -50,9 +51,22 @@ static const rpd_create_f cpu_reorder_impl_list[] = {
     wino_reorder_t<f32, f32>::pd_t::create,
     wino_reorder_t<f32, s8>::pd_t::create,
 
+    /* rnn reorders */
+    rnn_data_reorder_t<f32, u8>::pd_t::create,
+    rnn_weights_reorder_t<f32, f32>::pd_t::create,
+    rnn_weights_reorder_t<f32, s8>::pd_t::create,
+
+#if defined(__INTEL_COMPILER) || (defined(__GNUC__) && !defined(__clang__))
+    /* Direct copy for icc which is faster than jitted code;
+     * Direct copy for gcc which might or might not be faster than jitted
+     * code, but still worth it because doesn't require jitting, i.e. much
+     * faster creation time. This is tentative solution and should be removed
+     * later (when we will cache jitted code?...). */
+    REG_SR_DIRECT_COPY(f32, f32),
+#endif
+
 #ifdef __INTEL_COMPILER
     /* direct copy for icc, which is faster than jitted code */
-    REG_SR_DIRECT_COPY(f32, f32),
     REG_SR_DIRECT_COPY(f32, s32),
     REG_SR_DIRECT_COPY(f32, s8),
 //    REG_SR_DIRECT_COPY(f32, u8), FIXME: Disabled due to accuracy failure on int8 network
@@ -73,10 +87,18 @@ static const rpd_create_f cpu_reorder_impl_list[] = {
     /* jit */
     jit_uni_reorder_create,
 
-    /* fp32: flat <-> blocked with< tail */
+    /* fp32: flat <-> blocked with tail */
+    REG_SR_BIDIR(f32, any, f32, nCw4c),
+
+    REG_SR_BIDIR(f32, nchw, bin, nhwc),
+    REG_SR_BIDIR(f32, nhwc, bin, nhwc),
+    REG_SR_DIRECT_COPY(bin, bin),
+
     REG_SR_BIDIR(f32, any, f32, nCw8c),
+    REG_SR_BIDIR(f32, any, f32, OIw4i4o),
     REG_SR_BIDIR(f32, any, f32, OIw8i8o),
     REG_SR_BIDIR(f32, any, f32, OIw8o8i),
+    REG_SR_BIDIR(f32, any, f32, gOIw4i4o),
     REG_SR_BIDIR(f32, any, f32, gOIw8i8o),
     REG_SR_BIDIR(f32, any, f32, gOIw8o8i),
 
@@ -88,46 +110,57 @@ static const rpd_create_f cpu_reorder_impl_list[] = {
     REG_SR_BIDIR(f32, any, f32, gOIw16i16o),
     REG_SR_BIDIR(f32, any, f32, gIOw16o16i),
 
+    REG_SR_BIDIR(f32, any, f32, nChw4c),
     REG_SR_BIDIR(f32, any, f32, nChw8c),
+    REG_SR_BIDIR(f32, any, f32, OIhw4i4o),
     REG_SR_BIDIR(f32, any, f32, Ohwi8o),
     REG_SR_BIDIR(f32, any, f32, OIhw8i8o),
     REG_SR_BIDIR(f32, any, f32, OIhw8o8i),
+    REG_SR_BIDIR(f32, any, f32, gOIhw4i4o),
+    REG_SR_BIDIR(f32, any, f32, gOIhw4o4i),
     REG_SR_BIDIR(f32, any, f32, gOhwi8o),
     REG_SR_BIDIR(f32, any, f32, gOIhw8i8o),
     REG_SR_BIDIR(f32, any, f32, gOIhw8o8i),
 
     REG_SR_BIDIR(f32, any, f32, nChw16c),
+    REG_SR_BIDIR(f32, any, f32, Oihw4o),
     REG_SR_BIDIR(f32, any, f32, Oihw16o),
+    REG_SR_BIDIR(f32, any, f32, Ohwi4o),
     REG_SR_BIDIR(f32, any, f32, Ohwi16o),
     REG_SR_BIDIR(f32, any, f32, OIhw16o16i),
     REG_SR_BIDIR(f32, any, f32, OIhw16i16o),
     REG_SR_BIDIR(f32, any, f32, IOhw16o16i),
+    REG_SR_BIDIR(f32, any, f32, gOihw4o),
     REG_SR_BIDIR(f32, any, f32, gOihw16o),
+    REG_SR_BIDIR(f32, any, f32, gOhwi4o),
     REG_SR_BIDIR(f32, any, f32, gOhwi16o),
     REG_SR_BIDIR(f32, any, f32, gOIhw16o16i),
     REG_SR_BIDIR(f32, any, f32, gOIhw16i16o),
     REG_SR_BIDIR(f32, any, f32, gIOhw16o16i),
 
+    REG_SR_BIDIR(f32, any, f32, nCdhw4c),
     REG_SR_BIDIR(f32, any, f32, nCdhw8c),
+    REG_SR_BIDIR(f32, any, f32, OIdhw4i4o),
     REG_SR_BIDIR(f32, any, f32, Odhwi8o),
     REG_SR_BIDIR(f32, any, f32, OIdhw8i8o),
     REG_SR_BIDIR(f32, any, f32, OIdhw8o8i),
+    REG_SR_BIDIR(f32, any, f32, gOIdhw4i4o),
     REG_SR_BIDIR(f32, any, f32, gOdhwi8o),
     REG_SR_BIDIR(f32, any, f32, gOIdhw8i8o),
     REG_SR_BIDIR(f32, any, f32, gOIdhw8o8i),
 
     REG_SR_BIDIR(f32, any, f32, nCdhw16c),
+    REG_SR_BIDIR(f32, any, f32, Oidhw4o),
     REG_SR_BIDIR(f32, any, f32, Oidhw16o),
     REG_SR_BIDIR(f32, any, f32, Odhwi16o),
     REG_SR_BIDIR(f32, any, f32, OIdhw16o16i),
     REG_SR_BIDIR(f32, any, f32, OIdhw16i16o),
+    REG_SR_BIDIR(f32, any, f32, gOidhw4o),
     REG_SR_BIDIR(f32, any, f32, gOidhw16o),
     REG_SR_BIDIR(f32, any, f32, gOdhwi16o),
     REG_SR_BIDIR(f32, any, f32, gOIdhw16o16i),
     REG_SR_BIDIR(f32, any, f32, gOIdhw16i16o),
 
-    REG_SR_BIDIR(f32, nChw8c, f32, nChw16c),
-
     /* WA to prevent fallback on reference implementations */
     REG_SR_DIRECT_COPY(u8, f32),
     REG_SR_DIRECT_COPY(u8, s8),
@@ -135,6 +168,11 @@ static const rpd_create_f cpu_reorder_impl_list[] = {
     REG_SR_DIRECT_COPY(u8, u8),
     REG_SR_DIRECT_COPY(s8, s8),
 
+ /* fp32: blocked <-> blocked with tail */
+    REG_SR_BIDIR(f32, nCw8c, f32, nCw16c),
+    REG_SR_BIDIR(f32, nChw8c, f32, nChw16c),
+    REG_SR_BIDIR(f32, nCdhw8c, f32, nCdhw16c),
+
     /* int: flat <-> blocked with tail */
     REG_SR(f32, nChw8c, u8, nhwc, fmt_order::keep),
     REG_SR(f32, nChw8c, s8, nhwc, fmt_order::keep),
@@ -207,15 +245,27 @@ static const rpd_create_f cpu_reorder_impl_list[] = {
     REG_SR(f32, goihw, s8, gOhIw8o4i_s8s8, fmt_order::keep),
     REG_SR(s8, goihw, s8, gOhIw8o4i_s8s8, fmt_order::keep),
 
+    REG_SR(bin, any, bin, OhIw8o32i, fmt_order::keep),
+    REG_SR(bin, any, bin, OhIw16o32i, fmt_order::keep),
+
     REG_SR(f32, any, s8, hwio_s8s8, fmt_order::keep),
-    REG_SR(s8, any, s8, hwio_s8s8, fmt_order::keep),
     REG_SR(f32, any, s8, hwigo_s8s8, fmt_order::keep),
+    REG_SR(s8, any, s8, hwio_s8s8, fmt_order::keep),
     REG_SR(s8, any, s8, hwigo_s8s8, fmt_order::keep),
+
+    REG_SR(f32, goihw, s8, gOIhw4o4i_s8s8, fmt_order::keep),
+    REG_SR(s8, goihw, s8, gOIhw4o4i_s8s8, fmt_order::keep),
+
     REG_SR(f32, oihw, s8, OIhw4i16o4i_s8s8, fmt_order::keep),
-    REG_SR(s8, oihw, s8, OIhw4i16o4i_s8s8, fmt_order::keep),
     REG_SR(f32, goihw, s8, gOIhw4i16o4i_s8s8, fmt_order::keep),
+    REG_SR(s8, oihw, s8, OIhw4i16o4i_s8s8, fmt_order::keep),
     REG_SR(s8, goihw, s8, gOIhw4i16o4i_s8s8, fmt_order::keep),
 
+    REG_SR(f32, goihw, s8, gOIhw2i8o4i_s8s8, fmt_order::keep),
+    REG_SR(s8, goihw, s8, gOIhw2i8o4i_s8s8, fmt_order::keep),
+
+    REG_SR(f32, goihw, s8, Goihw16g_s8s8, fmt_order::keep),
+    REG_SR(s8, goihw, s8, Goihw16g_s8s8, fmt_order::keep),
     /* s16 <-> s16 */
     REG_SR_DIRECT_COPY(s16, s16),
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reorder_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reorder_pd.hpp
index f929a9ed5..2fac7c712 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reorder_pd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_reorder_pd.hpp
@@ -40,7 +40,7 @@ struct cpu_reorder_pd_t: public reorder_pd_t {
         , input_pd_(*input_pd), output_pd_(*output_pd) {}
     virtual ~cpu_reorder_pd_t() {}
 
-    virtual status_t init() const {
+    virtual status_t init() {
         const auto &post_ops = attr()->post_ops_;
         bool args_ok = true
             && IMPLICATION(post_ops.len_ != 0,
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_sum.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_sum.hpp
index 00769ad94..34a0f4fe9 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_sum.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_sum.hpp
@@ -53,7 +53,7 @@ namespace cpu {
         } \
         return ret; \
     } \
-    virtual pd_t *clone() const override { return nullptr; } \
+    virtual pd_t *clone() const override { return new pd_t(*this); } \
     virtual const char *name() const override { return impl_name; }
 #define DECLARE_CPU_SUM_PD_T(impl_name, ...) \
     DECLARE_CPU_SUM_PD_t(impl_name, __VA_ARGS__)
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm_utils.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/gemm_utils_f32.cpp
index e3b6cff8a..a9810dec2 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm_utils.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/gemm_utils_f32.cpp
@@ -13,10 +13,11 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
-#include <math.h>
+#include <cmath>
 
 #include "mkldnn_thread.hpp"
 #include "utils.hpp"
+#include "gemm_utils_f32.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -344,8 +345,9 @@ void partition_unit_diff(
 // Sum the m*n values from p_src into p_dst, assuming the two-dimensional
 // arrays have leading dimensions ld_src and ld_dst, respectively
 template<typename data_t>
-void sum_two_matrices(
-        int m, int n, data_t *p_src, int ld_src, data_t *p_dst, int ld_dst)
+void sum_two_matrices(int m, int n,
+        data_t * __restrict p_src, dim_t ld_src,
+        data_t * __restrict p_dst, dim_t ld_dst)
 {
     int i, j;
     for (j = 0; j < n; j++) {
@@ -355,11 +357,15 @@ void sum_two_matrices(
     }
 }
 
-template void sum_two_matrices<float>(
-        int m, int n, float *p_src, int ld_src, float *p_dst, int ld_dst);
+template
+void sum_two_matrices<float>(int m, int n,
+        float * __restrict p_src, dim_t ld_src,
+        float * __restrict p_dst, dim_t ld_dst);
 
-template void sum_two_matrices<double>(
-        int m, int n, double *p_src, int ld_src, double *p_dst, int ld_dst);
+template
+void sum_two_matrices<double>(int m, int n,
+        double * __restrict p_src, dim_t ld_src,
+        double * __restrict p_dst, dim_t ld_dst);
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm_utils.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/gemm_utils_f32.hpp
index 0888787b9..3352298b4 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm_utils.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/gemm_utils_f32.hpp
@@ -22,6 +22,8 @@ namespace impl {
 namespace cpu {
 
 namespace gemm_utils {
+// Alias for any dimension related variable.
+typedef ptrdiff_t dim_t;
 
 template <typename T, bool isTransA, bool isTransB>
 struct gemm_traits {};
@@ -47,9 +49,10 @@ struct gemm_traits<float, isTransA, isTransB> {
 template <typename T>
 using unroll_factor = gemm_traits<T, false, false>;
 
-template <typename data_type>
-void sum_two_matrices(
-        int m, int n, data_type *p_src, int ld_src, data_type *p_dst, int ld_dst);
+template <typename data_t>
+void sum_two_matrices(int m, int n,
+        data_t * __restrict p_src, dim_t ld_src,
+        data_t * __restrict p_dst, dim_t ld_dst);
 
 void calc_nthr_nocopy_avx512_common(int m,
         int n, int k, int nthrs, int *nthrs_m, int *nthrs_n, int *nthrs_k,
@@ -61,8 +64,6 @@ void calc_nthr_nocopy_avx(int m, int n, int k,
 
 void partition_unit_diff(
         int ithr, int nthr, int n, int *t_offset, int *t_block);
-
-inline double saturate(double value, double min, double max);
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/jit_avx512_common_gemm_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/jit_avx512_common_gemm_f32.cpp
index 8aee85fbd..d7be43e39 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/jit_avx512_common_gemm_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/jit_avx512_common_gemm_f32.cpp
@@ -14,24 +14,24 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include <math.h>
+#include <cmath>
+#include <mutex>
 
 #include "mkldnn_thread.hpp"
 #include "utils.hpp"
 
-#include "gemm_utils.hpp"
+#include "ref_gemm_f32.hpp"
+#include "gemm_utils_f32.hpp"
 #include "jit_avx512_common_gemm_f32.hpp"
 
-#define CACHE_LINE_SIZE 64
+#include "jit_generator.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-using namespace mkldnn::impl::memory_format;
-using namespace mkldnn::impl::utils;
+#define CACHE_LINE_SIZE 64
 
-using namespace Xbyak;
 #define STACKSIZE get_size_of_abi_save_regs()
 #ifdef _WIN32
 #define STACK_K_CAPACITY 32
@@ -45,17 +45,22 @@ using namespace Xbyak;
 #define UNROLL_M 48
 #define UNROLL_N 8
 
-struct jit_avx512_common_gemm_f32::xbyak_gemm : public jit_generator {
-    xbyak_gemm(char transa, char transb, float beta, bool hasBias = false,
+namespace avx512_common_gemm_f32 {
+using namespace gemm_utils;
+
+struct xbyak_gemm : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_common_gemm_f32_xbyak_gemm)
+
+    xbyak_gemm(char isTransA, char isTransB, float beta, bool hasBias = false,
             void *code_ptr = nullptr,
             size_t code_size = 80 * Xbyak::DEFAULT_MAX_CODE_SIZE)
         : jit_generator(code_ptr, code_size)
     {
+        using namespace Xbyak;
+
         enum { ver_avx512_core, ver_avx512_mic } ver =
             mayiuse(avx512_core) ? ver_avx512_core : ver_avx512_mic;
 
-        bool isTransA = (transa == 'T' || transa == 't');
-        bool isTransB = (transb == 'T' || transb == 't');
         bool isBeta0 = (beta == 0.0);
         bool isBetaN = (!isBeta0 && beta != 1.0);
 
@@ -1698,34 +1703,55 @@ struct jit_avx512_common_gemm_f32::xbyak_gemm : public jit_generator {
         vzeroupper();
         postamble();
 
-        ker_ = reinterpret_cast<decltype(ker_)>(
-                const_cast<uint8_t *>(this->getCode()));
+        ker_ = this->getCode<ker_t>();
     }
 
-    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_common_gemm_f32_xbyak_gemm)
+    typedef void (*ker_t)(dim_t m, dim_t n, dim_t k,
+            const float *alpha, const float *a, dim_t lda,
+            const float *b, dim_t ldb, const float *beta, float *c,
+            dim_t ldc, const float *bias, float *ws);
 
-    void operator()(long long int m, long long int n, long long int k,
-            const float *alpha, const float *a, long long int lda,
-            const float *b, long long int ldb, const float *beta, float *c,
-            long long int ldc, const float *bias, float *ws)
+    void operator()(dim_t m, dim_t n, dim_t k,
+            const float *alpha, const float *a, dim_t lda,
+            const float *b, dim_t ldb, const float *beta, float *c,
+            dim_t ldc, const float *bias, float *ws) const
     {
-        (*ker_)(m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, bias, ws);
+        ker_(m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, bias, ws);
     }
 
 private:
-    void (*ker_)(long long int m, long long int n, long long int k,
-            const float *alpha, const float *a, long long int lda,
-            const float *b, long long int ldb, const float *beta, float *c,
-            long long int ldc, const float *bias, float *ws);
+    ker_t ker_;
 };
 
-typedef void (*ker)(long long int, long long int, long long int, float *,
-        float *, long long int, float *, long long int, float *, float *,
-        long long int, float *, float *);
-void jit_avx512_common_gemm_f32::sgemm_nocopy_driver(const char *transa,
+const xbyak_gemm *get_xbyak_gemm(
+        bool isTransA, bool isTransB, float beta, bool hasBias) {
+    auto beta_idx = [](float beta) {
+        return (beta == 0.0) ? 0 : (beta == 1.0 ? 1 : 2);
+    };
+
+    // Kernel table [isTransA][isTransB][hasBias][beta (0, 1, other)]
+    static xbyak_gemm *kernel_table[2][2][2][3];
+    static std::once_flag initialized;
+    std::call_once(initialized, [=]{
+            for (bool isTransA: {false, true})
+            for (bool isTransB: {false, true})
+            for (bool hasBias: {false, true})
+            for (float beta: {0.0f, 1.0f, 2.0f}) {
+                // nocopy sgemm with bias for beta != 0.0 is not supported
+                if (hasBias && beta != 0.0)
+                    continue;
+                kernel_table[isTransA][isTransB][hasBias][beta_idx(beta)] =
+                    new xbyak_gemm(isTransA, isTransB, beta, hasBias);
+            }
+    });
+
+    return kernel_table[isTransA][isTransB][hasBias][beta_idx(beta)];
+}
+
+void sgemm_nocopy_driver(const char *transa,
         const char *transb, int m, int n, int k, const float *alpha,
-        const float *a, int lda, const float *b, int ldb, const float *beta,
-        float *c, int ldc, const float *bias, float *ws)
+        const float *a, dim_t lda, const float *b, dim_t ldb, const float *beta,
+        float *c, dim_t ldc, const float *bias, float *ws)
 {
     bool isTransA = (*transa == 'T' || *transa == 't');
     bool isTransB = (*transb == 'T' || *transb == 't');
@@ -1752,6 +1778,15 @@ void jit_avx512_common_gemm_f32::sgemm_nocopy_driver(const char *transa,
         return;
     }
 
+    assert(IMPLICATION(bias != nullptr, *beta == 0.0));
+
+    // XXX: this happens on every thread...
+    bool hasBias = (bias != nullptr);
+    auto ker_bn = get_xbyak_gemm(isTransA, isTransB, *beta, hasBias);
+    auto ker_b1 = get_xbyak_gemm(isTransA, isTransB, 1.0, false);
+    auto ker_b0 = get_xbyak_gemm(isTransA, isTransB, 0.0, false);
+    assert(ker_bn && ker_b1 && ker_b0);
+
     int BM = 4032, BN, BK;
     if (mayiuse(avx512_core)) {
         BN = isTransA ? 384 : 64;
@@ -1793,14 +1828,14 @@ void jit_avx512_common_gemm_f32::sgemm_nocopy_driver(const char *transa,
                 }
 
                 if (!isTransA) {
-                    curA = a + Bm + (size_t)Bk * lda;
+                    curA = a + Bm + Bk * lda;
                 } else {
-                    curA = a + Bk + (size_t)Bm * lda;
+                    curA = a + Bk + Bm * lda;
                 }
                 if (!isTransB) {
-                    curB = b + Bk + (size_t)Bn * ldb;
+                    curB = b + Bk + Bn * ldb;
                 } else {
-                    curB = b + Bn + (size_t)Bk * ldb;
+                    curB = b + Bn + Bk * ldb;
                 }
                 curC = c + Bm + (size_t)Bn * ldc;
                 if (bias != nullptr) {
@@ -1812,52 +1847,54 @@ void jit_avx512_common_gemm_f32::sgemm_nocopy_driver(const char *transa,
                 }
                 if (Bk == 0) {
                     if (*beta == 0.0 && bias == nullptr)
-                        (*ker_b0_)((long long int)sizeM, (long long int)sizeN,
-                                (long long int)sizeK, alpha, curA,
-                                (long long int)lda, curB, (long long int)ldb,
-                                beta, curC, (long long int)ldc, curBias, ws);
+                        (*ker_b0)((dim_t)sizeM, (dim_t)sizeN, (dim_t)sizeK,
+                                alpha, curA, lda, curB, ldb, beta, curC, ldc,
+                                curBias, ws);
                     else
-                        (*ker_bn_)((long long int)sizeM, (long long int)sizeN,
-                                (long long int)sizeK, alpha, curA,
-                                (long long int)lda, curB, (long long int)ldb,
-                                beta, curC, (long long int)ldc, curBias, ws);
+                        (*ker_bn)((dim_t)sizeM, (dim_t)sizeN, (dim_t)sizeK,
+                                alpha, curA, lda, curB, ldb, beta, curC, ldc,
+                                curBias, ws);
                 } else {
-                    (*ker_b1_)((long long int)sizeM, (long long int)sizeN,
-                            (long long int)sizeK, alpha, curA,
-                            (long long int)lda, curB, (long long int)ldb, beta,
-                            curC, (long long int)ldc, curBias, ws);
+                    (*ker_b1)((dim_t)sizeM, (dim_t)sizeN, (dim_t)sizeK,
+                            alpha, curA, lda, curB, ldb, beta, curC, ldc,
+                            curBias, ws);
                 }
             }
         }
     }
-    return;
 }
 
-void jit_avx512_common_gemm_f32::sgemm(const char *transa, const char *transb,
+}
+
+mkldnn_status_t jit_avx512_common_gemm_f32(
+        const char *transa, const char *transb,
         const int *p_m, const int *p_n, const int *p_k, const float *p_alpha,
         const float *A, const int *p_lda, const float *B, const int *p_ldb,
         const float *p_beta, float *C, const int *p_ldc, const float *bias)
 {
-    if (beta_ == 0. || beta_ == 1.)
-        assert(*p_beta == beta_);
-    assert((one_of(*transa, 'T', 't') == one_of(transa_, 'T', 't')));
+    using namespace mkldnn::impl::utils;
+    using namespace avx512_common_gemm_f32;
+    using namespace gemm_utils;
+
+    if (*p_beta != 0 && bias)
+        return ref_gemm(transa, transb, p_m, p_n, p_k,
+                p_alpha, A, p_lda, B, p_lda, p_beta, C, p_ldc, bias);
 
     int nthr = (mkldnn_in_parallel()) ? 1 : mkldnn_get_max_threads();
+
     int m = *p_m;
     int n = *p_n;
     int k = *p_k;
-    int lda = *p_lda;
-    int ldb = *p_ldb;
-    int ldc = *p_ldc;
+    dim_t lda = *p_lda;
+    dim_t ldb = *p_ldb;
+    dim_t ldc = *p_ldc;
     float beta = *p_beta;
     int MB, NB, KB;
 
     int nthr_m, nthr_n, nthr_k, nthr_mn;
 
-    assert(nthr <= nthrs_);
-
     // Determine threading partitioning
-    gemm_utils::calc_nthr_nocopy_avx512_common(
+    calc_nthr_nocopy_avx512_common(
             m, n, k, nthr, &nthr_m, &nthr_n, &nthr_k, &MB, &NB, &KB);
     assert(IMPLICATION(!mkldnn_thr_syncable(), nthr_k == 1));
 
@@ -1879,6 +1916,7 @@ void jit_avx512_common_gemm_f32::sgemm(const char *transa, const char *transb,
                 CACHE_LINE_SIZE);
         ompstatus = (unsigned char volatile *) ompstatus_;
         assert(ompstatus);
+
         for (int i = 0; i < nthr; i++)
             ompstatus[i * CACHE_LINE_SIZE] = 0;
 
@@ -1886,14 +1924,14 @@ void jit_avx512_common_gemm_f32::sgemm(const char *transa, const char *transb,
                 * sizeof(float), PAGE_4K);
     }
 
-    const size_t ws_elems_per_thr = k * 48 + 64;
+    const size_t ws_elems_per_thr = (size_t)k * 48 + 64;
     const size_t ws_size_per_thr
-            = utils::rnd_up(ws_elems_per_thr * sizeof(float), PAGE_4K);
+            = rnd_up(ws_elems_per_thr * sizeof(float), PAGE_4K);
     if (k > STACK_K_CAPACITY) {
         ws_buffers = (float *)malloc(nthr * ws_size_per_thr, PAGE_4K);
     }
 
-    parallel(nthr, [&](const int ithr, const int nthr) {
+    parallel_nd(nthr, [&](const int ithr) {
         int ithr_m, ithr_n, ithr_k, ithr_mn;
         int m_from, m_to, myM;
         int n_from, n_to, myN;
@@ -1903,7 +1941,9 @@ void jit_avx512_common_gemm_f32::sgemm(const char *transa, const char *transb,
         float *myC = C, myBeta;
         float *ws = ws_buffers ?
                 ws_buffers + ithr * ws_size_per_thr / sizeof(float) : 0;
-        int ld = ldc;
+        dim_t ld = ldc;
+
+        int sum_later = (mkldnn_get_num_threads() < nthr_m * nthr_n * nthr_k);
 
         if (ithr < nthr_m * nthr_n * nthr_k) {
 
@@ -1955,10 +1995,10 @@ void jit_avx512_common_gemm_f32::sgemm(const char *transa, const char *transb,
                     myC = &(C[m_from + n_from * ldc]);
                     myBeta = beta;
                     ld = ldc;
-                    if (hasBias_)
+                    if (bias)
                         myBias = &(bias[m_from]);
                 } else {
-                    myC = c_buffers + MB * NB * (cbase + ithr_k - 1);
+                    myC = c_buffers + (dim_t)MB * NB * (cbase + ithr_k - 1);
                     myBeta = 0.0;
                     ld = MB;
                     myBias = nullptr;
@@ -1967,40 +2007,40 @@ void jit_avx512_common_gemm_f32::sgemm(const char *transa, const char *transb,
                 sgemm_nocopy_driver(transa, transb, myM, myN, myK, p_alpha, myA,
                         lda, myB, ldb, &myBeta, myC, ld, myBias, ws);
 
-                if (nthr_k > 1)
+                if (nthr_k > 1 && !sum_later)
                     ompstatus[(ibase + ithr_k) * CACHE_LINE_SIZE] = 1;
             }
 
-            if (nthr_k > 1) {
+            if (nthr_k > 1 && !sum_later) {
 
                 // sum matrices partitioned along K dimension
                 int n1, n2;
 
-                gemm_utils::partition_unit_diff(ithr_k, nthr_k, myN, &n1, &n2);
+                partition_unit_diff(ithr_k, nthr_k, myN, &n1, &n2);
 
                 if (ithr_k > 0) {
 
-                    myC = c_buffers + MB * NB * (cbase + ithr_k - 1);
-                    myC = myC + n1 * MB;
+                    myC = c_buffers + (dim_t)MB * NB * (cbase + ithr_k - 1)
+                        + (dim_t)n1 * MB;
                     /* need to wait until main thread finishes */
                     while (ompstatus[ibase * CACHE_LINE_SIZE] != 1) {
                     };
 
                     /* my cache is hot */
-                    gemm_utils::sum_two_matrices(myM, n2, myC, MB,
+                    sum_two_matrices(myM, n2, myC, MB,
                             &C[m_from + (n_from + n1) * ldc], ldc);
                 }
 
                 for (int ik = 1; ik < nthr_k; ++ik) {
                     if (ik != ithr_k) {
 
-                        myC = c_buffers + MB * NB * (cbase + ik - 1);
-                        myC = myC + n1 * MB;
+                        myC = c_buffers + (dim_t)MB * NB * (cbase + ik - 1)
+                            + (dim_t)n1 * MB;
 
                         while (ompstatus[(ibase + ik) * CACHE_LINE_SIZE] != 1) {
                         };
 
-                        gemm_utils::sum_two_matrices(myM, n2, myC, MB,
+                        sum_two_matrices(myM, n2, myC, MB,
                                 &C[m_from + (n_from + n1) * ldc], ldc);
                     }
                 }
@@ -2008,44 +2048,82 @@ void jit_avx512_common_gemm_f32::sgemm(const char *transa, const char *transb,
         }
     });
 
+
+    // handle C summation later
+    if (nthr_k > 1 && ompstatus[0] == 0) {
+
+        parallel_nd(nthr, [&](const int ithr) {
+            int ithr_m, ithr_n, ithr_k, ithr_mn;
+            int m_from, m_to, myM;
+            int n_from, n_to, myN;
+            int cbase;
+            float *myC = C;
+
+            if (ithr < nthr_m * nthr_n * nthr_k) {
+
+                ithr_mn = ithr % nthr_mn;
+                ithr_m = ithr_mn % nthr_m;
+                ithr_n = ithr_mn / nthr_m;
+                ithr_k = ithr / nthr_mn;
+
+                /* swap ithr_k for performance improvement */
+                if (ithr_k == 0)
+                    ithr_k = nthr_k - 1;
+                else if (ithr_k == nthr_k - 1)
+                    ithr_k = 0;
+
+                m_from = MB * (ithr_m);
+                m_to = MB * (ithr_m + 1);
+                if (m_to > m)
+                    m_to = m;
+                myM = m_to - m_from;
+
+                n_from = NB * (ithr_n);
+                n_to = NB * (ithr_n + 1);
+                if (n_to > n)
+                    n_to = n;
+                myN = n_to - n_from;
+
+                cbase = (ithr_m + nthr_m * ithr_n) * (nthr_k - 1);
+
+                if (nthr_k > 1) {
+                    // sum matrices partitioned along K dimension
+                    int n1, n2;
+
+                    partition_unit_diff(ithr_k, nthr_k, myN, &n1, &n2);
+
+                    if (ithr_k > 0) {
+
+                        myC = c_buffers + (dim_t)MB * NB * (cbase + ithr_k - 1)
+                            + (dim_t)n1 * MB;
+
+                        /* my cache is hot */
+                        sum_two_matrices(myM, n2, myC, MB,
+                                         &C[m_from + (n_from + n1) * ldc], ldc);
+                    }
+
+                    for (int ik = 1; ik < nthr_k; ++ik) {
+                        if (ik != ithr_k) {
+
+                            myC = c_buffers + (dim_t)MB * NB * (cbase + ik - 1)
+                                + (dim_t)n1 * MB;
+
+                            sum_two_matrices(myM, n2, myC, MB,
+                                             &C[m_from + (n_from + n1) * ldc], ldc);
+                        }
+                    }
+                }
+            }
+        });
+    }
+
     free(c_buffers);
     free(ompstatus_);
     free(ws_buffers);
-}
 
-jit_avx512_common_gemm_f32::jit_avx512_common_gemm_f32(
-        char transa, char transb, float beta, bool hasBias)
-{
-    transa_ = transa;
-    transb_ = transb;
-    beta_ = beta;
-    hasBias_ = hasBias;
-    if (hasBias) {
-        assert(beta == 0.0);
-    }
-    ker_bn_ = new xbyak_gemm(transa, transb, beta, hasBias);
-    if (beta != 1.0) {
-        ker_b1_ = new xbyak_gemm(transa, transb, 1.0);
-    } else {
-        ker_b1_ = ker_bn_;
-    }
-    if (beta != 0.0 || (beta == 0.0 && hasBias)) {
-        ker_b0_ = new xbyak_gemm(transa, transb, 0.0);
-    } else {
-        ker_b0_ = ker_bn_;
-    }
-
-    nthrs_ = mkldnn_get_max_threads();
+    return mkldnn_success;
 }
 
-jit_avx512_common_gemm_f32::~jit_avx512_common_gemm_f32()
-{
-    delete ker_bn_;
-    if (beta_ != 1.0)
-        delete ker_b1_;
-    if (beta_ != 0.0 || (beta_ == 0.0 && hasBias_))
-        delete ker_b0_;
-}
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/jit_avx512_common_gemm_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/jit_avx512_common_gemm_f32.hpp
new file mode 100644
index 000000000..d581b7fd7
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/jit_avx512_common_gemm_f32.hpp
@@ -0,0 +1,36 @@
+/*******************************************************************************
+* Copyright 2017-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef JIT_AVX512_COMMON_GEMM_F32_HPP
+#define JIT_AVX512_COMMON_GEMM_F32_HPP
+
+#include "mkldnn_types.h"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+mkldnn_status_t jit_avx512_common_gemm_f32(
+        const char *transa, const char *transb, const int *M,
+        const int *N, const int *K, const float *alpha, const float *A,
+        const int *lda, const float *B, const int *ldb, const float *beta,
+        float *C, const int *ldc, const float *bias = nullptr);
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/jit_avx_gemm_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/jit_avx_gemm_f32.cpp
index 354fa0bc7..60d422083 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/jit_avx_gemm_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/jit_avx_gemm_f32.cpp
@@ -14,23 +14,24 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include <math.h>
+#include <cmath>
+#include <mutex>
 
 #include "mkldnn_thread.hpp"
 #include "utils.hpp"
-#include "gemm_utils.hpp"
+
+#include "ref_gemm_f32.hpp"
+#include "gemm_utils_f32.hpp"
 #include "jit_avx_gemm_f32.hpp"
 
-#define CACHE_LINE_SIZE 64
+#include "jit_generator.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-using namespace mkldnn::impl::memory_format;
-using namespace mkldnn::impl::utils;
+#define CACHE_LINE_SIZE 64
 
-using namespace Xbyak;
 #define STACKSIZE get_size_of_abi_save_regs()
 #if _WIN32
 #define STACK_K_CAPACITY 128
@@ -42,22 +43,25 @@ using namespace Xbyak;
 #define BASE_SHIFT 2
 #define SECOND_FETCH 14
 
-struct jit_avx_gemm_f32::xbyak_gemm : public jit_generator {
+namespace avx_gemm_f32 {
+using namespace gemm_utils;
+
+struct xbyak_gemm : public jit_generator {
     DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx_gemm_f32_xbyak_gemm)
 
-    xbyak_gemm(char transa, char transb, float beta, bool hasBias = false,
+    xbyak_gemm(char isTransA, char isTransB, float beta, bool hasBias = false,
             void *code_ptr = nullptr,
             size_t code_size = 80 * Xbyak::DEFAULT_MAX_CODE_SIZE)
         : jit_generator(code_ptr, code_size)
     {
+        using namespace Xbyak;
+
         const bool is_avx2 = mayiuse(avx2);
         assert(IMPLICATION(!is_avx2, mayiuse(avx)));
 
         const int UNROLL_M = is_avx2 ? 16 : 8;
         const int UNROLL_N = 6;
 
-        bool isTransA = (transa == 'T' || transa == 't');
-        bool isTransB = (transb == 'T' || transb == 't');
         bool isBeta0 = (beta == 0.0);
         bool isBetaN = (!isBeta0 && beta != 1.0);
 
@@ -2275,38 +2279,60 @@ struct jit_avx_gemm_f32::xbyak_gemm : public jit_generator {
 
         L(main999);
         // Restore original stack
-        mov(rax, ORIG_SP);
-        mov(rsp, rax);
+        mov(rsp, ORIG_SP);
 
         vzeroupper();
         postamble();
 
-        ker_ = reinterpret_cast<decltype(ker_)>(
-                const_cast<uint8_t *>(this->getCode()));
+        ker_ = this->getCode<ker_t>();
     }
 
-    void operator()(long long int m, long long int n, long long int k,
-            const float *alpha, const float *a, long long int lda,
-            const float *b, long long int ldb, const float *beta, float *c,
-            long long int ldc, const float *bias, float *ws)
+    typedef void (*ker_t)(dim_t m, dim_t n, dim_t k,
+            const float *alpha, const float *a, dim_t lda,
+            const float *b, dim_t ldb, const float *beta, float *c,
+            dim_t ldc, const float *bias, float *ws);
+
+    void operator()(dim_t  m, dim_t n, dim_t k,
+            const float *alpha, const float *a, dim_t lda,
+            const float *b, dim_t ldb, const float *beta, float *c,
+            dim_t ldc, const float *bias, float *ws) const
     {
-        (*ker_)(m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, bias, ws);
+        ker_(m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, bias, ws);
     }
 
 private:
-    void (*ker_)(long long int m, long long int n, long long int k,
-            const float *alpha, const float *a, long long int lda,
-            const float *b, long long int ldb, const float *beta, float *c,
-            long long int ldc, const float *bias, float *ws);
+    ker_t ker_;
 };
 
-typedef void (*ker)(long long int, long long int, long long int, float *,
-        float *, long long int, float *, long long int, float *, float *,
-        long long int, float *);
-void jit_avx_gemm_f32::sgemm_nocopy_driver(const char *transa,
+const xbyak_gemm *get_xbyak_gemm(
+        bool isTransA, bool isTransB, float beta, bool hasBias) {
+    auto beta_idx = [](float beta) {
+        return (beta == 0.0) ? 0 : (beta == 1.0 ? 1 : 2);
+    };
+
+    // Kernel table [isTransA][isTransB][hasBias][beta (0, 1, other)]
+    static xbyak_gemm *kernel_table[2][2][2][3];
+    static std::once_flag initialized;
+    std::call_once(initialized, [=]{
+            for (bool isTransA: {false, true})
+            for (bool isTransB: {false, true})
+            for (bool hasBias: {false, true})
+            for (float beta: {0.0f, 1.0f, 2.0f}) {
+                // nocopy sgemm with bias for beta != 0.0 is not supported
+                if (hasBias && beta != 0.0)
+                    continue;
+                kernel_table[isTransA][isTransB][hasBias][beta_idx(beta)] =
+                    new xbyak_gemm(isTransA, isTransB, beta, hasBias);
+            }
+    });
+
+    return kernel_table[isTransA][isTransB][hasBias][beta_idx(beta)];
+}
+
+void sgemm_nocopy_driver(const char *transa,
         const char *transb, int m, int n, int k, const float *alpha,
-        const float *a, int lda, const float *b, int ldb, const float *beta,
-        float *c, int ldc, const float *bias, float *ws)
+        const float *a, dim_t lda, const float *b, dim_t ldb, const float *beta,
+        float *c, dim_t ldc, const float *bias, float *ws)
 {
     bool isTransA = (*transa == 'T' || *transa == 't');
     bool isTransB = (*transb == 'T' || *transb == 't');
@@ -2333,6 +2359,15 @@ void jit_avx_gemm_f32::sgemm_nocopy_driver(const char *transa,
         return;
     }
 
+    assert(IMPLICATION(bias != nullptr, *beta == 0.0));
+
+    // XXX: this happens on every thread...
+    bool hasBias = (bias != nullptr);
+    auto ker_bn = get_xbyak_gemm(isTransA, isTransB, *beta, hasBias);
+    auto ker_b1 = get_xbyak_gemm(isTransA, isTransB, 1.0, false);
+    auto ker_b0 = get_xbyak_gemm(isTransA, isTransB, 0.0, false);
+    assert(ker_bn && ker_b1 && ker_b0);
+
     int BM = 4032;
     int BN = isTransA ? 96 : 48;
     int BK = isTransB ? 96 : 256;
@@ -2367,14 +2402,14 @@ void jit_avx_gemm_f32::sgemm_nocopy_driver(const char *transa,
                 }
 
                 if (!isTransA) {
-                    curA = a + Bm + (size_t)Bk * lda;
+                    curA = a + Bm + Bk * lda;
                 } else {
-                    curA = a + Bk + (size_t)Bm * lda;
+                    curA = a + Bk + Bm * lda;
                 }
                 if (!isTransB) {
-                    curB = b + Bk + (size_t)Bn * ldb;
+                    curB = b + Bk + Bn * ldb;
                 } else {
-                    curB = b + Bn + (size_t)Bk * ldb;
+                    curB = b + Bn + Bk * ldb;
                 }
                 curC = c + Bm + (size_t)Bn * ldc;
                 if (bias != nullptr) {
@@ -2386,51 +2421,54 @@ void jit_avx_gemm_f32::sgemm_nocopy_driver(const char *transa,
                 }
                 if (Bk == 0) {
                     if (*beta == 0.0 && bias == nullptr)
-                        (*ker_b0_)((long long int)sizeM, (long long int)sizeN,
-                                (long long int)sizeK, alpha, curA,
-                                (long long int)lda, curB, (long long int)ldb,
-                                beta, curC, (long long int)ldc, curBias, ws);
+                        (*ker_b0)((dim_t)sizeM, (dim_t)sizeN, (dim_t)sizeK,
+                                alpha, curA, lda, curB, ldb, beta, curC, ldc,
+                                curBias, ws);
                     else
-                        (*ker_bn_)((long long int)sizeM, (long long int)sizeN,
-                                (long long int)sizeK, alpha, curA,
-                                (long long int)lda, curB, (long long int)ldb,
-                                beta, curC, (long long int)ldc, curBias, ws);
+                        (*ker_bn)((dim_t)sizeM, (dim_t)sizeN, (dim_t)sizeK,
+                                alpha, curA, lda, curB, ldb, beta, curC, ldc,
+                                curBias, ws);
                 } else {
-                    (*ker_b1_)((long long int)sizeM, (long long int)sizeN,
-                            (long long int)sizeK, alpha, curA,
-                            (long long int)lda, curB, (long long int)ldb, beta,
-                            curC, (long long int)ldc, curBias, ws);
+                    (*ker_b1)((dim_t)sizeM, (dim_t)sizeN, (dim_t)sizeK,
+                            alpha, curA, lda, curB, ldb, beta, curC, ldc,
+                            curBias, ws);
                 }
             }
         }
     }
-    return;
 }
-void jit_avx_gemm_f32::sgemm(const char *transa, const char *transb,
+
+}
+
+mkldnn_status_t jit_avx_gemm_f32(
+        const char *transa, const char *transb,
         const int *p_m, const int *p_n, const int *p_k, const float *p_alpha,
         const float *A, const int *p_lda, const float *B, const int *p_ldb,
         const float *p_beta, float *C, const int *p_ldc, const float *bias)
 {
-    if (beta_ == 0. || beta_ == 1.)
-        assert(*p_beta == beta_);
-    assert((one_of(*transa, 'T', 't') == one_of(transa_, 'T', 't')));
+    using namespace mkldnn::impl::utils;
+    using namespace avx_gemm_f32;
+    using namespace gemm_utils;
+
+    if (*p_beta != 0 && bias)
+        return ref_gemm(transa, transb, p_m, p_n, p_k,
+                p_alpha, A, p_lda, B, p_lda, p_beta, C, p_ldc, bias);
+
+    int nthr = (mkldnn_in_parallel()) ? 1 : mkldnn_get_max_threads();
 
-    int nthr = mkldnn_in_parallel() ? 1 : mkldnn_get_max_threads();
     int m = *p_m;
     int n = *p_n;
     int k = *p_k;
-    int lda = *p_lda;
-    int ldb = *p_ldb;
-    int ldc = *p_ldc;
+    dim_t lda = *p_lda;
+    dim_t ldb = *p_ldb;
+    dim_t ldc = *p_ldc;
     float beta = *p_beta;
     int MB, NB, KB;
 
     int nthr_m, nthr_n, nthr_k, nthr_mn;
 
-    assert(nthr <= nthrs_);
-
     // Determine threading partitioning
-    gemm_utils::calc_nthr_nocopy_avx(
+    calc_nthr_nocopy_avx(
             m, n, k, nthr, &nthr_m, &nthr_n, &nthr_k, &MB, &NB, &KB);
     assert(IMPLICATION(!mkldnn_thr_syncable(), nthr_k == 1));
 
@@ -2460,14 +2498,14 @@ void jit_avx_gemm_f32::sgemm(const char *transa, const char *transb,
                 * sizeof(float), PAGE_4K);
     }
 
-    const size_t ws_elems_per_thr = k * 16 + 64;
+    const size_t ws_elems_per_thr = (size_t)k * 16 + 64;
     const size_t ws_size_per_thr
-            = utils::rnd_up(ws_elems_per_thr * sizeof(float), PAGE_4K);
+            = rnd_up(ws_elems_per_thr * sizeof(float), PAGE_4K);
     if (k > STACK_K_CAPACITY) {
         ws_buffers = (float *)malloc(nthr * ws_size_per_thr, PAGE_4K);
     }
 
-    parallel(nthr, [&](const int ithr, const int nthr) {
+    parallel_nd(nthr, [&](const int ithr) {
         int ithr_m, ithr_n, ithr_k, ithr_mn;
         int m_from, m_to, myM;
         int n_from, n_to, myN;
@@ -2477,7 +2515,9 @@ void jit_avx_gemm_f32::sgemm(const char *transa, const char *transb,
         float *myC = C, myBeta;
         float *ws = ws_buffers ?
                 ws_buffers + ithr * ws_size_per_thr / sizeof(float) : 0;
-        int ld = ldc;
+        dim_t ld = ldc;
+
+        int sum_later = (mkldnn_get_num_threads() < nthr_m * nthr_n * nthr_k);
 
         if (ithr < nthr_m * nthr_n * nthr_k) {
 
@@ -2529,10 +2569,10 @@ void jit_avx_gemm_f32::sgemm(const char *transa, const char *transb,
                     myC = &(C[m_from + n_from * ldc]);
                     myBeta = beta;
                     ld = ldc;
-                    if (hasBias_)
+                    if (bias)
                         myBias = &(bias[m_from]);
                 } else {
-                    myC = c_buffers + MB * NB * (cbase + ithr_k - 1);
+                    myC = c_buffers + (dim_t)MB * NB * (cbase + ithr_k - 1);
                     myBeta = 0.0;
                     ld = MB;
                     myBias = nullptr;
@@ -2541,40 +2581,40 @@ void jit_avx_gemm_f32::sgemm(const char *transa, const char *transb,
                 sgemm_nocopy_driver(transa, transb, myM, myN, myK, p_alpha, myA,
                         lda, myB, ldb, &myBeta, myC, ld, myBias, ws);
 
-                if (nthr_k > 1)
+                if (nthr_k > 1 && !sum_later)
                     ompstatus[(ibase + ithr_k) * CACHE_LINE_SIZE] = 1;
             }
 
-            if (nthr_k > 1) {
+            if (nthr_k > 1 && !sum_later) {
 
                 // sum matrices partitioned along K dimension
                 int n1, n2;
 
-                gemm_utils::partition_unit_diff(ithr_k, nthr_k, myN, &n1, &n2);
+                partition_unit_diff(ithr_k, nthr_k, myN, &n1, &n2);
 
                 if (ithr_k > 0) {
 
-                    myC = c_buffers + MB * NB * (cbase + ithr_k - 1);
-                    myC = myC + n1 * MB;
+                    myC = c_buffers + (dim_t)MB * NB * (cbase + ithr_k - 1)
+                        + (dim_t)n1 * MB;
                     /* need to wait until main thread finishes */
                     while (ompstatus[ibase * CACHE_LINE_SIZE] != 1) {
                     };
 
                     /* my cache is hot */
-                    gemm_utils::sum_two_matrices(myM, n2, myC, MB,
+                    sum_two_matrices(myM, n2, myC, MB,
                             &C[m_from + (n_from + n1) * ldc], ldc);
                 }
 
                 for (int ik = 1; ik < nthr_k; ++ik) {
                     if (ik != ithr_k) {
 
-                        myC = c_buffers + MB * NB * (cbase + ik - 1);
-                        myC = myC + n1 * MB;
+                        myC = c_buffers + (dim_t)MB * NB * (cbase + ik - 1)
+                            + (dim_t)n1 * MB;
 
                         while (ompstatus[(ibase + ik) * CACHE_LINE_SIZE] != 1) {
                         };
 
-                        gemm_utils::sum_two_matrices(myM, n2, myC, MB,
+                        sum_two_matrices(myM, n2, myC, MB,
                                 &C[m_from + (n_from + n1) * ldc], ldc);
                     }
                 }
@@ -2582,42 +2622,80 @@ void jit_avx_gemm_f32::sgemm(const char *transa, const char *transb,
         }
     });
 
+    // handle C summation later
+    if (nthr_k > 1 && ompstatus[0] == 0) {
+
+        parallel_nd(nthr, [&](const int ithr) {
+            int ithr_m, ithr_n, ithr_k, ithr_mn;
+            int m_from, m_to, myM;
+            int n_from, n_to, myN;
+            int cbase;
+            float *myC = C;
+
+            if (ithr < nthr_m * nthr_n * nthr_k) {
+
+                ithr_mn = ithr % nthr_mn;
+                ithr_m = ithr_mn % nthr_m;
+                ithr_n = ithr_mn / nthr_m;
+                ithr_k = ithr / nthr_mn;
+
+                /* swap ithr_k for performance improvement */
+                if (ithr_k == 0)
+                    ithr_k = nthr_k - 1;
+                else if (ithr_k == nthr_k - 1)
+                    ithr_k = 0;
+
+                m_from = MB * (ithr_m);
+                m_to = MB * (ithr_m + 1);
+                if (m_to > m)
+                    m_to = m;
+                myM = m_to - m_from;
+
+                n_from = NB * (ithr_n);
+                n_to = NB * (ithr_n + 1);
+                if (n_to > n)
+                    n_to = n;
+                myN = n_to - n_from;
+
+                cbase = (ithr_m + nthr_m * ithr_n) * (nthr_k - 1);
+
+                if (nthr_k > 1) {
+                    // sum matrices partitioned along K dimension
+                    int n1, n2;
+
+                    partition_unit_diff(ithr_k, nthr_k, myN, &n1, &n2);
+
+                    if (ithr_k > 0) {
+
+                        myC = c_buffers + (dim_t)MB * NB * (cbase + ithr_k - 1)
+                            + (dim_t)n1 * MB;
+
+                        /* my cache is hot */
+                        sum_two_matrices(myM, n2, myC, MB,
+                                         &C[m_from + (n_from + n1) * ldc], ldc);
+                    }
+
+                    for (int ik = 1; ik < nthr_k; ++ik) {
+                        if (ik != ithr_k) {
+
+                            myC = c_buffers + (dim_t)MB * NB * (cbase + ik - 1)
+                                + (dim_t)n1 * MB;
+
+                            sum_two_matrices(myM, n2, myC, MB,
+                                             &C[m_from + (n_from + n1) * ldc], ldc);
+                        }
+                    }
+                }
+            }
+        });
+    }
+
+
     free(c_buffers);
     free(ompstatus_);
     free(ws_buffers);
-}
-
-jit_avx_gemm_f32::jit_avx_gemm_f32(
-        char transa, char transb, float beta, bool hasBias)
-{
-    transa_ = transa;
-    transb_ = transb;
-    beta_ = beta;
-    hasBias_ = hasBias;
-    if (hasBias) {
-        assert(beta == 0.0);
-    }
-    ker_bn_ = new xbyak_gemm(transa, transb, beta, hasBias);
-    if (beta != 1.0) {
-        ker_b1_ = new xbyak_gemm(transa, transb, 1.0);
-    } else {
-        ker_b1_ = ker_bn_;
-    }
-    if (beta != 0.0 || (beta == 0.0 && hasBias)) {
-        ker_b0_ = new xbyak_gemm(transa, transb, 0.0);
-    } else {
-        ker_b0_ = ker_bn_;
-    }
-    nthrs_ = mkldnn_get_max_threads();
-}
 
-jit_avx_gemm_f32::~jit_avx_gemm_f32()
-{
-    delete ker_bn_;
-    if (beta_ != 1.0)
-        delete ker_b1_;
-    if (beta_ != 0.0 || (beta_ == 0.0 && hasBias_))
-        delete ker_b0_;
+    return mkldnn_success;
 }
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_relu_forward_s16s16s32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/jit_avx_gemm_f32.hpp
index a7c720c40..aabf520a3 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_relu_forward_s16s16s32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/jit_avx_gemm_f32.hpp
@@ -14,23 +14,24 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include <stdint.h>
-#include "mkldnn_test_common.hpp"
-#include "gtest/gtest.h"
+#ifndef JIT_AVX_GEMM_F32_HPP
+#define JIT_AVX_GEMM_F32_HPP
+
+#include "mkldnn_types.h"
 
-#include "mkldnn.hpp"
-#include "test_convolution_relu_forward_common.hpp"
 namespace mkldnn {
+namespace impl {
+namespace cpu {
 
-using convolution_test = convolution_relu_test<int16_t, int16_t,
-                                               int32_t, int32_t>;
+mkldnn_status_t jit_avx_gemm_f32(
+        const char *transa, const char *transb, const int *M,
+        const int *N, const int *K, const float *alpha, const float *A,
+        const int *lda, const float *B, const int *ldb, const float *beta,
+        float *C, const int *ldc, const float *bias = nullptr);
 
-TEST_P(convolution_test, TestConvolution)
-{
-}
 
-#define S16S16S32
-#define DIRECTION_FORWARD
-#include "convolution_common.h"
+}
+}
+}
 
-}
-\ No newline at end of file
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/ref_gemm.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/ref_gemm_f32.cpp
index e0331e0ef..5147885a8 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/ref_gemm.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/ref_gemm_f32.cpp
@@ -14,13 +14,16 @@
 * limitations under the License.
 *******************************************************************************/
 
+#include "mkldnn_types.h"
+
 #include "mkldnn_thread.hpp"
 #include "nstl.hpp"
 #include "utils.hpp"
 
-#include "../jit_generator.hpp"
+#include "jit_generator.hpp"
 
-#include "gemm_utils.hpp"
+#include "gemm_utils_f32.hpp"
+#include "ref_gemm_f32.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -29,13 +32,14 @@ namespace cpu {
 using namespace mkldnn::impl::utils;
 using namespace gemm_utils;
 
+namespace {
 
 template <typename data_t>
-static void copy_A(
-        bool isTransA, int K, const data_t *A, const int lda, data_t *ws) {
+void copy_A(
+        bool isTransA, int K, const data_t *A, const dim_t lda, data_t *ws) {
     for (int k = 0; k < K; k++) {
         PRAGMA_OMP_SIMD()
-        for (int i = 0; i < gemm_utils::unroll_factor<data_t>::m; i++) {
+        for (int i = 0; i < unroll_factor<data_t>::m; i++) {
             ws[i] = isTransA ? A[i * lda + k] : A[i + k * lda];
         }
         ws += unroll_factor<data_t>::m;
@@ -43,8 +47,8 @@ static void copy_A(
 }
 
 template <typename data_t, bool isTransA, bool isTransB>
-static void kernel_mxn(int K, const data_t *A, const int lda,
-        const data_t *B, const int ldb, data_t *C, const int ldc,
+void kernel_mxn(int K, const data_t *A, const dim_t lda,
+        const data_t *B, const dim_t ldb, data_t *C, const dim_t ldc,
         const data_t alpha, const data_t beta) {
     data_t c[unroll_factor<data_t>::m * unroll_factor<data_t>::n] =
         { static_cast<data_t>(0.) };
@@ -70,9 +74,9 @@ static void kernel_mxn(int K, const data_t *A, const int lda,
 }
 
 template <typename data_t, bool isTransA, bool isTransB>
-static void block_ker(const int M, const int N, const int K,
-        const data_t *A, const int lda, const data_t *B, const int ldb,
-        data_t *C, const int ldc, const data_t alpha, const data_t beta,
+void block_ker(const int M, const int N, const int K,
+        const data_t *A, const dim_t lda, const data_t *B, const dim_t ldb,
+        data_t *C, const dim_t ldc, const data_t alpha, const data_t beta,
         data_t *ws, bool do_copy) {
     int Nu = rnd_dn(N, unroll_factor<data_t>::n);
     int Mu = rnd_dn(M, unroll_factor<data_t>::m);
@@ -124,8 +128,9 @@ static void block_ker(const int M, const int N, const int K,
 
 template <typename data_t, bool isTransA, bool isTransB>
 void gemm_ithr(const int M, const int N, const int K, const data_t alpha,
-        const data_t *A, const int lda, const data_t *B, const int ldb,
-        const data_t beta, data_t *C, const int ldc, bool do_copy, data_t *ws) {
+        const data_t *A, const dim_t lda, const data_t *B, const dim_t ldb,
+        const data_t beta, data_t *C, const dim_t ldc, bool do_copy,
+        data_t *ws) {
     constexpr int BM = gemm_traits<data_t, isTransA, isTransB>::BM;
     constexpr int BN = gemm_traits<data_t, isTransA, isTransB>::BN;
     constexpr int BK = gemm_traits<data_t, isTransA, isTransB>::BK;
@@ -138,12 +143,12 @@ void gemm_ithr(const int M, const int N, const int K, const data_t alpha,
         return;
 
     if ((K <= 0) || (alpha == static_cast<data_t>(0))) {
-        ptrdiff_t MN = (ptrdiff_t)N * M;
+        dim_t MN = N * M;
         if (beta == static_cast<data_t>(0.)) {
-            for (ptrdiff_t j = 0; j < MN; j++)
+            for (dim_t j = 0; j < MN; j++)
                 C[j] = static_cast<data_t>(0.);
         } else if (beta != static_cast<data_t>(1.)) {
-            for (ptrdiff_t j = 0; j < MN; j++)
+            for (dim_t j = 0; j < MN; j++)
                 C[j] *= beta;
         }
         return;
@@ -171,21 +176,26 @@ void gemm_ithr(const int M, const int N, const int K, const data_t alpha,
     }
 }
 
+}
+
 template <typename data_t>
-void ref_gemm(const char *transa_, const char *transb_, const int *M_,
+mkldnn_status_t ref_gemm(
+        const char *transa_, const char *transb_, const int *M_,
         const int *N_, const int *K_, const data_t *alpha_, const data_t *A,
         const int *lda_, const data_t *B, const int *ldb_, const data_t *beta_,
         data_t *C, const int *ldc_, const data_t *bias) {
+
     bool isTransA = (*transa_ == 'T' || *transa_ == 't');
     bool isTransB = (*transb_ == 'T' || *transb_ == 't');
-    const int M = *M_, N = *N_, K = *K_, lda = *lda_, ldb = *ldb_, ldc = *ldc_;
+    const int M = *M_, N = *N_, K = *K_;
+    const dim_t lda = *lda_, ldb = *ldb_, ldc = *ldc_;
     const data_t alpha = *alpha_, beta = *beta_;
 
     int max_nthr = mkldnn_in_parallel() ? 1 : mkldnn_get_max_threads();
     int nthr_m, nthr_n, nthr_k;
     int MB, NB, KB;
     // thread balancing over M, N, K & size of blocking dimensions
-    gemm_utils::calc_nthr_nocopy_avx(
+    calc_nthr_nocopy_avx(
             M, N, K, max_nthr, &nthr_m, &nthr_n, &nthr_k, &MB, &NB, &KB);
     assert(IMPLICATION(!mkldnn_thr_syncable(), nthr_k == 1));
 
@@ -205,14 +215,23 @@ void ref_gemm(const char *transa_, const char *transb_, const int *M_,
     const int nthr = nthr_mn * nthr_k;
     const size_t ws_elems_per_thr = K * unroll_factor<data_t>::m;
     const size_t ws_size_per_thr
-            = utils::rnd_up(ws_elems_per_thr * sizeof(data_t), PAGE_4K);
+            = rnd_up(ws_elems_per_thr * sizeof(data_t), PAGE_4K);
     if (do_copy) {
         ws_buffers = (data_t*)malloc(nthr * ws_size_per_thr, PAGE_4K);
         if (!ws_buffers)
             do_copy = false;
     }
 
-    parallel(nthr, [&](const int ithr, const int nthr) {
+    auto get_thr_block = [&](int &from, int &to, int &myN, int NB, int N,
+                             int ithr) {
+        from = NB * (ithr);
+        to = NB * (ithr + 1);
+        if (to > N)
+            to = N;
+        myN = to - from;
+    };
+
+    parallel_nd(nthr, [&](const int ithr) {
         int ithr_mn = ithr % nthr_mn;
         int ithr_m = ithr_mn % nthr_m;
         int ithr_n = ithr_mn / nthr_m;
@@ -226,27 +245,20 @@ void ref_gemm(const char *transa_, const char *transb_, const int *M_,
 
         int m_from = 0, m_to = 0, myM = 0, n_from = 0, n_to = 0, myN = 0,
                 k_from = 0, k_to = 0, myK = 0;
-        auto get_thr_block = [&](int &from, int &to, int &myN, int NB, int N,
-                int ithr) {
-            from = NB * (ithr);
-            to = NB * (ithr + 1);
-            if (to > N)
-                to = N;
-            myN = to - from;
-        };
+
         get_thr_block(m_from, m_to, myM, MB, M, ithr_m);
         get_thr_block(n_from, n_to, myN, NB, N, ithr_n);
         get_thr_block(k_from, k_to, myK, KB, K, ithr_k);
 
         if (myM > 0 && myN > 0) {
             data_t myBeta, *myC;
-            int ld;
+            dim_t ld;
             if (ithr_k == 0) {
                 myC = &(C[m_from + n_from * ldc]);
                 myBeta = beta;
                 ld = ldc;
             } else {
-                myC = c_buffers + MB * NB * (cbase + ithr_k - 1);
+                myC = c_buffers + (dim_t)MB * NB * (cbase + ithr_k - 1);
                 myBeta = 0.0f;
                 ld = MB;
             }
@@ -275,23 +287,36 @@ void ref_gemm(const char *transa_, const char *transb_, const int *M_,
                 }
             }
         }
+    });
 
-        if (nthr_k > 1) {
-            assert(mkldnn_thr_syncable());
-            mkldnn_thr_barrier();
+    if (nthr_k > 1) {
+        parallel_nd(nthr, [&](const int ithr) {
+            int ithr_mn = ithr % nthr_mn;
+            int ithr_m = ithr_mn % nthr_m;
+            int ithr_k = ithr / nthr_mn;
+            int ithr_n = ithr_mn / nthr_m;
+
+            int n_from = 0, n_to = 0, myN = 0;
+            int m_from = 0, m_to = 0, myM = 0;
+
+            int cbase = (ithr_m + nthr_m * ithr_n) * (nthr_k - 1);
+
+            get_thr_block(n_from, n_to, myN, NB, N, ithr_n);
+            get_thr_block(m_from, m_to, myM, MB, M, ithr_m);
 
             // sum matrices partitioned along K dimension
             int offset = 0, block = 0;
             gemm_utils::partition_unit_diff(ithr_k, nthr_k, myN, &offset,
                     &block);
             for (int ik = 1; ik < nthr_k; ++ik) {
-                data_t *myC = c_buffers + MB * (NB * (cbase + ik - 1) + offset);
+                data_t *myC = c_buffers
+                            + MB * ((dim_t)NB * (cbase + ik - 1) + offset);
 
                 gemm_utils::sum_two_matrices(myM, block, myC, MB,
                         &C[m_from + (n_from + offset) * ldc], ldc);
             }
-        }
-    });
+        });
+    }
 
     if (bias) {
         parallel_nd(N, M, [&](int i, int j) {
@@ -301,14 +326,18 @@ void ref_gemm(const char *transa_, const char *transb_, const int *M_,
 
     free(ws_buffers);
     free(c_buffers);
+
+    return mkldnn_success;
 }
 
-template void ref_gemm<float>(const char *transa_, const char *transb_,
+template mkldnn_status_t ref_gemm<float>(
+        const char *transa_, const char *transb_,
         const int *M_, const int *N_, const int *K_, const float *alpha_,
         const float *A, const int *lda_, const float *B, const int *ldb_,
         const float *beta_, float *C, const int *ldc_, const float *bias);
 
-template void ref_gemm<double>(const char *transa_, const char *transb_,
+template mkldnn_status_t ref_gemm<double>(
+        const char *transa_, const char *transb_,
         const int *M_, const int *N_, const int *K_, const double *alpha_,
         const double *A, const int *lda_, const double *B, const int *ldb_,
         const double *beta_, double *C, const int *ldc_, const double *bias);
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/ref_gemm_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/ref_gemm_f32.hpp
new file mode 100644
index 000000000..7c90ba627
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/f32/ref_gemm_f32.hpp
@@ -0,0 +1,36 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef REF_GEMM_F32_HPP
+#define REF_GEMM_F32_HPP
+
+#include "mkldnn_types.h"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <typename data_t>
+mkldnn_status_t ref_gemm(const char *transa, const char *transb, const int *M,
+        const int *N, const int *K, const data_t *alpha, const data_t *A,
+        const int *lda, const data_t *B, const int *ldb, const data_t *beta,
+        data_t *C, const int *ldc, const data_t *bias);
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm.cpp
index 146e68887..ac619b115 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm.cpp
@@ -13,20 +13,25 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
-#include <mutex>
 
 #include "mkldnn.h"
 
-#include "verbose.hpp"
+#include "mkldnn_traits.hpp"
+#include "nstl.hpp"
+
+#include "jit_generator.hpp"
 
-#include "jit_avx_gemm_f32.hpp"
-#include "jit_avx512_common_gemm_f32.hpp"
 #include "gemm.hpp"
-#include "../jit_generator.hpp"
-#include "nstl.hpp"
+
+#include "f32/jit_avx512_common_gemm_f32.hpp"
+#include "f32/jit_avx_gemm_f32.hpp"
+#include "f32/ref_gemm_f32.hpp"
+
+#include "s8x8s32/jit_avx512_core_gemm_s8u8s32.hpp"
+#include "s8x8s32/jit_avx512_core_gemm_s8s8s32.hpp"
+#include "s8x8s32/ref_gemm_s8x8s32.hpp"
+
 #include "os_blas.hpp"
-#include "math_utils.hpp"
-#include "mkldnn_traits.hpp"
 
 /* USE_MKL      USE_CBLAS       effect
  * -------      ---------       ------
@@ -39,15 +44,15 @@
 namespace mkldnn {
 namespace impl {
 namespace cpu {
-using namespace mkldnn::impl::status;
+
 mkldnn_status_t check_gemm_input(const char *transa, const char *transb,
         const int *M, const int *N, const int *K, const int *lda,
         const int *ldb, const int *ldc, const float *alpha, const float *beta,
         const bool with_bias) {
     if (utils::any_null(transa, transb, M, N, K, lda, ldb, ldc, alpha, beta))
-        return invalid_arguments;
+        return mkldnn_invalid_arguments;
     if (with_bias && *beta != 0)
-        return unimplemented;
+        return mkldnn_unimplemented;
     bool consistency = true
         && utils::one_of(*transa, 'T', 't', 'N', 'n')
         && utils::one_of(*transb, 'T', 't', 'N', 'n')
@@ -55,7 +60,8 @@ mkldnn_status_t check_gemm_input(const char *transa, const char *transb,
         && *N >= 0
         && *K >= 0;
 
-    if (!consistency) return invalid_arguments;
+    if (!consistency)
+        return mkldnn_invalid_arguments;
     bool isTransA = utils::one_of(*transa, 'T', 't');
     bool isTransB = utils::one_of(*transb, 'T', 't');
     int nrowA = isTransA ? *K : *M;
@@ -64,136 +70,65 @@ mkldnn_status_t check_gemm_input(const char *transa, const char *transb,
         && *lda >= nstl::max(1, nrowA)
         && *ldb >= nstl::max(1, nrowB)
         && *ldc >= nstl::max(1, *M);
-    if (!consistency) return invalid_arguments;
+    if (!consistency)
+        return mkldnn_invalid_arguments;
 
-    return success;
+    return mkldnn_success;
 }
 
 mkldnn_status_t check_gemm_x8x8x32_input(const char *offsetc,
         const char *transa, const char *transb, const int *M, const int *N,
         const int *K, const int *lda, const int *ldb, const int *ldc,
         const float *alpha, const float *beta, const bool with_bias) {
-
-    if (offsetc == nullptr) return invalid_arguments;
+    if (offsetc == nullptr)
+        return mkldnn_invalid_arguments;
     if (!utils::one_of(*offsetc, 'F', 'f', 'C', 'c', 'R', 'r'))
-        return invalid_arguments;
+        return mkldnn_invalid_arguments;
 
     return check_gemm_input(transa, transb, M, N, K, lda, ldb, ldc, alpha,
         beta, with_bias);
 }
 
-struct gemm_impl_t {
-    gemm_impl_t(char transa, char transb, bool zero_beta, bool with_bias) {
-        //jit kernel has three codepaths: beta is 0, 1 or arbitrary
-        //we will generate kernel for 0 and arbitrary beta
-        float zero = 0.0f, arbitrary_float = 2.0f;
-        if (mayiuse(avx512_common)) {
-            isa_ = avx512_common;
-            ker_ = (void *)new jit_avx512_common_gemm_f32(
-                    transa, transb, zero_beta ? zero : arbitrary_float,
-                    with_bias);
-        }
-        else if (mayiuse(avx)) {
-            isa_ = avx;
-            ker_ = (void *)new jit_avx_gemm_f32(
-                    transa, transb, zero_beta ? zero : arbitrary_float,
-                    with_bias);
-        }
-    }
-
-    mkldnn_status_t call(const char *transa, const char *transb, const int *M,
-            const int *N, const int *K, const float *alpha, const float *A,
-            const int *lda, const float *B, const int *ldb, const float *beta,
-            float *C, const int *ldc, const float *bias = nullptr) {
-        switch (isa_) {
-            case avx:
-                ((jit_avx_gemm_f32*)ker_)->sgemm(transa, transb, M, N, K,
-                    alpha, A, lda, B, ldb, beta, C, ldc, bias);
-                break;
-            case avx512_common:
-                ((jit_avx512_common_gemm_f32*)ker_)->sgemm(transa, transb,
-                    M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, bias);
-                break;
-            default:
-                ref_gemm(transa, transb, M, N, K, alpha, A, lda, B, ldb, beta,
-                        C, ldc, bias);
-                break;
-        }
-        return mkldnn_success;
-    }
-
-    void *ker_;
-    cpu_isa_t isa_;
-};
-//Gemm implementations for: zero/nonzero beta, transA, transB
-static gemm_impl_t *gemm_impl[2][2][2];
-//Gemm with bias implementations for: transA, transB
-//Gemm with bias for beta!=0. is not supported
-static gemm_impl_t *gemm_bias_impl[2][2];
-
-void initialize() {
-    for (int i = 0; i < 2; ++i) {
-        gemm_impl[i][0][0] = new gemm_impl_t('n', 'n', (bool)i, false);
-        gemm_impl[i][0][1] = new gemm_impl_t('n', 't', (bool)i, false);
-        gemm_impl[i][1][0] = new gemm_impl_t('t', 'n', (bool)i, false);
-        gemm_impl[i][1][1] = new gemm_impl_t('t', 't', (bool)i, false);
-    }
-    gemm_bias_impl[0][0] = new gemm_impl_t('n', 'n', true, true);
-    gemm_bias_impl[0][1] = new gemm_impl_t('n', 't', true, true);
-    gemm_bias_impl[1][0] = new gemm_impl_t('t', 'n', true, true);
-    gemm_bias_impl[1][1] = new gemm_impl_t('t', 't', true, true);
-}
-
 mkldnn_status_t extended_sgemm(const char *transa, const char *transb,
         const int *M, const int *N, const int *K, const float *alpha,
         const float *A, const int *lda, const float *B, const int *ldb,
         const float *beta, float *C, const int *ldc,
         const float *bias, const bool force_jit_gemm) {
-    //Check input
     mkldnn_status_t status = check_gemm_input(transa, transb, M, N, K,
             lda, ldb, ldc, alpha, beta, bias != nullptr);
     if (status != mkldnn_success)
         return status;
-    if (*M == 0 || *N == 0 || *K == 0)
-        return mkldnn_success;
-    int trA = *transa == 't' || *transa == 'T';
-    int trB = *transb == 't' || *transb == 'T';
+
 #ifdef USE_CBLAS
     if (!force_jit_gemm) {
-        //Call cblas
+        bool trA = *transa == 't' || *transa == 'T';
+        bool trB = *transb == 't' || *transb == 'T';
         CBLAS_TRANSPOSE Cblas_trA = trA ? CblasTrans : CblasNoTrans;
         CBLAS_TRANSPOSE Cblas_trB = trB ? CblasTrans : CblasNoTrans;
         cblas_sgemm(CblasColMajor, Cblas_trA, Cblas_trB,
                 *M, *N, *K, *alpha, A, *lda, B, *ldb, *beta, C, *ldc);
-        //Add bias if necessary (bias is applied to columns of C)
+
         if (bias) {
+            // Add bias if necessary (bias is applied to columns of C)
             cblas_int incx = 1, incy = 1;
             parallel_nd(*N, [&](int n) {
-                cblas_saxpy(*M, 1.0, bias, incx, C + n*(*ldc), incy);
+                ptrdiff_t offset = (ptrdiff_t)n * (*ldc);
+                cblas_saxpy(*M, 1.0, bias, incx, C + offset, incy);
             });
         }
         return mkldnn_success;
     }
 #endif
-    //Generate jit kernel and call sgemm with bias
-    volatile static int initialized = 0;
-    if (!initialized) {
-        static std::mutex mtx;
-        std::lock_guard<std::mutex> lock(mtx);
-        if (!initialized) {
-            mkldnn::impl::cpu::initialize();
-            initialized = 1;
-        }
-    }
-    if (bias)
-        gemm_bias_impl[trA][trB]->call(
-                transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc,
-                bias);
-    else
-        gemm_impl[*beta == 0.f][trA][trB]->call(
-                transa, transb, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
 
-    return mkldnn_success;
+    if (mayiuse(avx512_common))
+        return jit_avx512_common_gemm_f32(transa, transb,
+                M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, bias);
+    else if (mayiuse(avx))
+        return jit_avx_gemm_f32(transa, transb,
+                M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, bias);
+    else
+        return ref_gemm<float>(transa, transb,
+                M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, bias);
 }
 
 template <typename b_dt>
@@ -202,22 +137,20 @@ mkldnn_status_t gemm_s8x8s32(const char *transa, const char *transb,
         const float *alpha, const int8_t *A, const int *LDA, const int8_t *ao,
         const b_dt *B, const int *LDB, const int8_t *bo, const float *beta,
         int32_t *C, const int *LDC, const int32_t *co) {
-
     mkldnn_status_t status = check_gemm_x8x8x32_input(offsetc, transa, transb,
         M, N, K, LDA, LDB, LDC, alpha, beta, false);
-
     if (status != mkldnn_success)
         return status;
 
     if (*M == 0 || *N == 0 || *K == 0)
         return mkldnn_success;
 
-    bool OCisR = (*offsetc == 'R' || *offsetc == 'r');
-    bool OCisC = (*offsetc == 'C' || *offsetc == 'c');
-    bool AisN = (*transa == 'N' || *transa == 'n');
-    bool BisN = (*transb == 'N' || *transb == 'n');
+#if USE_MKL_IGEMM
+        bool OCisR = (*offsetc == 'R' || *offsetc == 'r');
+        bool OCisC = (*offsetc == 'C' || *offsetc == 'c');
+        bool AisN = (*transa == 'N' || *transa == 'n');
+        bool BisN = (*transb == 'N' || *transb == 'n');
 
-#if defined(USE_MKL) && defined(USE_CBLAS)
     if (data_traits<b_dt>::data_type == data_type::u8) {
         CBLAS_TRANSPOSE Cblas_trA = AisN ? CblasNoTrans : CblasTrans;
         CBLAS_TRANSPOSE Cblas_trB = BisN ? CblasNoTrans : CblasTrans;
@@ -228,64 +161,58 @@ mkldnn_status_t gemm_s8x8s32(const char *transa, const char *transb,
             ? CblasColOffset
             : CblasFixOffset;
         cblas_gemm_s8u8s32(CblasColMajor, Cblas_trA, Cblas_trB, Cblas_offsetc,
-            *M, *N, *K, *alpha, A, *LDA, *ao, (b_dt*)B, *LDB, *bo, *beta, C, *LDC, co);
+                *M, *N, *K, *alpha, A, *LDA, *ao, (uint8_t *)B, *LDB, *bo,
+                *beta, C, *LDC, co);
         return mkldnn_success;
+    } else {
+        assert(data_traits<b_dt>::data_type == data_type::s8);
+        // TODO CBLAS implementation of gemm_s8s8s32 goes here.
+        // mkldnn_gemm_s8s8s32 doesn't support non-zero ao and bo
+        if ((mayiuse(avx512_core) || mayiuse(avx512_core_vnni))
+                && *ao == 0 && *bo == 0) {
+            return jit_avx512_core_gemm_s8s8s32(transa, transb, offsetc, M,
+                    N, K, alpha, A, LDA, ao, (int8_t *)B, LDB, bo, beta,
+                    C, LDC, co);
+        } else {
+            return ref_gemm_s8x8s32(transa, transb, offsetc, M, N, K,
+                    alpha, A, LDA, ao, B, LDB, bo, beta, C, LDC, co);
+        }
     }
-#endif
-    int m = *M, n = *N, k = *K, lda = *LDA, ldb = *LDB, ldc = *LDC;
-    size_t sizeA = AisN ? lda * k : lda * m;
-    size_t sizeB = BisN ? ldb * n : ldb * k;
-    size_t sizeC = ldc * n;
-
-    double *dA = (double *)malloc(sizeA * sizeof(double), PAGE_4K);
-    double *dB = (double *)malloc(sizeB * sizeof(double), PAGE_4K);
-    double *dC = (double *)malloc(sizeC * sizeof(double), PAGE_4K);
-
-    if (utils::any_null(dA, dB, dC)) {
-        free(dA);
-        free(dB);
-        free(dC);
-        return mkldnn_out_of_memory;
+#else
+    cpu_isa_t isa = isa_any;
+    if (mayiuse(avx512_core_vnni)) {
+        isa = avx512_core_vnni;
+    } else if (mayiuse(avx512_core)) {
+        isa = avx512_core;
     }
 
-    auto da_setter = [=] (int i, int j, double v) { dA[j * lda + i] = v; };
-    auto db_setter = [=] (int i, int j, double v) { dB[j * ldb + i] = v; };
-
-    auto ia_accessor = [=] (int i, int j) { return A[j * lda + i]; };
-    auto ib_accessor = [=] (int i, int j) { return B[j * ldb + i]; };
-
-    const int a_rows = AisN ? m : k;
-    const int a_cols = AisN ? k : m;
-    mkldnn::impl::parallel_nd(a_cols, a_rows, [&](int j, int i) {
-        da_setter(i, j,
-            static_cast<double>(ia_accessor(i, j)) + static_cast<double>(ao[0]));
-    });
-
-    const int b_rows = BisN ? k : n;
-    const int b_cols = BisN ? n : k;
-    mkldnn::impl::parallel_nd(b_cols, b_rows, [&](int j, int i) {
-        db_setter(i, j,
-            static_cast<double>(ib_accessor(i, j)) + static_cast<double>(bo[0]));
-    });
-    double one = 1.0, zero = 0.0;
-    ref_gemm<double>(transa, transb, M, N, K, &one, dA, LDA, dB, LDB, &zero,
-        dC, LDC, nullptr);
-
-    auto i2d = [=] (int32_t v) { return static_cast<double>(v); };
-    auto f2d = [=] (float v)   { return static_cast<double>(v); };
-
-    mkldnn::impl::parallel_nd(n, m, [&] (int j, int i) {
-        double coffset = OCisR ? i2d(co[j]) : OCisC ? i2d(co[i]) : i2d(co[0]);
-        double val = ((*beta == 0.0f) ? 0.0 : f2d(*beta) * i2d(C[i + j * ldc]))
-            + f2d(*alpha) * dC[i + j * ldc] + coffset;
-        C[i + j * ldc] = math::out_round<int32_t>(math::saturate<int32_t>(val));
-    });
-
-    free(dA);
-    free(dB);
-    free(dC);
-    return mkldnn_success;
+    if (data_traits<b_dt>::data_type == data_type::u8) {
+        switch (isa) {
+        case avx512_core:
+        case avx512_core_vnni:
+            return jit_avx512_core_gemm_s8u8s32(transa, transb, offsetc, M,
+                    N, K, alpha, A, LDA, ao, (uint8_t *)B, LDB, bo, beta,
+                    C, LDC, co);
+        default:
+            return ref_gemm_s8x8s32(transa, transb, offsetc, M, N, K,
+                    alpha, A, LDA, ao, B, LDB, bo, beta, C, LDC, co);
+        }
+    } else {
+        assert(data_traits<b_dt>::data_type == data_type::s8);
+        // mkldnn_gemm_s8s8s32 doesn't support non-zero ao and bo
+        if ((mayiuse(avx512_core) || mayiuse(avx512_core_vnni))
+                && *ao == 0 && *bo == 0) {
+            return jit_avx512_core_gemm_s8s8s32(transa, transb, offsetc, M,
+                    N, K, alpha, A, LDA, ao, (int8_t *)B, LDB, bo, beta,
+                    C, LDC, co);
+        } else {
+            return ref_gemm_s8x8s32(transa, transb, offsetc, M, N, K,
+                    alpha, A, LDA, ao, B, LDB, bo, beta, C, LDC, co);
+        }
+    }
+#endif
 }
+
 }
 }
 }
@@ -305,18 +232,18 @@ mkldnn_status_t mkldnn_gemm_s8u8s32(const char *transa, const char *transb,
         const char *offsetc, const int *M, const int *N, const int *K,
         const float *alpha, const int8_t *A, const int *lda, const int8_t *ao,
         const uint8_t *B, const int *ldb, const int8_t *bo, const float *beta,
-        int32_t *c, const int *ldc, const int32_t *co) {
+        int32_t *C, const int *ldc, const int32_t *co) {
     return gemm_s8x8s32(
         transa, transb, offsetc, M, N, K, alpha, A, lda, ao, B, ldb, bo,
-        beta, c, ldc, co);
+        beta, C, ldc, co);
 }
 
 mkldnn_status_t mkldnn_gemm_s8s8s32(const char *transa, const char *transb,
         const char *offsetc, const int *M, const int *N, const int *K,
         const float *alpha, const int8_t *A, const int *lda, const int8_t *ao,
         const int8_t *B, const int *ldb, const int8_t *bo, const float *beta,
-        int32_t *c, const int *ldc, const int32_t *co) {
+        int32_t *C, const int *ldc, const int32_t *co) {
     return gemm_s8x8s32(
         transa, transb, offsetc, M, N, K, alpha, A, lda, ao, B, ldb, bo,
-        beta, c, ldc, co);
+        beta, C, ldc, co);
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm.hpp
index 3f33a3713..dc15ff713 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/gemm.hpp
@@ -13,11 +13,17 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
+
 #ifndef GEMM_HPP
 #define GEMM_HPP
+
+#include "mkldnn_types.h"
+#include "os_blas.hpp"
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
+
 mkldnn_status_t extended_sgemm(const char *transa, const char *transb,
         const int *M, const int *N, const int *K, const float *alpha,
         const float *A, const int *lda, const float *B, const int *ldb,
@@ -31,17 +37,22 @@ mkldnn_status_t gemm_s8x8s32(const char *transa, const char *transb,
         const b_dt *B, const int *ldb, const int8_t *bo, const float *beta,
         int32_t *c, const int *ldc, const int32_t *co);
 
-template <typename data_t>
-void ref_gemm(const char *transa, const char *transb, const int *M,
-        const int *N, const int *K, const data_t *alpha, const data_t *A,
-        const int *lda, const data_t *B, const int *ldb, const data_t *beta,
-        data_t *C, const int *ldc, const data_t *bias);
 #ifdef USE_CBLAS
 #define GEMM_IMPL_STR "gemm:blas"
 #else
 #define GEMM_IMPL_STR "gemm:jit"
 #endif
+
+#if USE_MKL_IGEMM
+#define IGEMM_S8U8S32_IMPL_STR "igemm_s8u8s32:blas"
+#define IGEMM_S8S8S32_IMPL_STR "igemm_s8s8s32:blas"
+#else
+#define IGEMM_S8U8S32_IMPL_STR "igemm_s8u8s32:jit"
+#define IGEMM_S8S8S32_IMPL_STR "igemm_s8s8s32:jit"
+#endif
+
 }
 }
 }
+
 #endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/jit_avx512_common_gemm_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/jit_avx512_common_gemm_f32.hpp
deleted file mode 100644
index c05733581..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/jit_avx512_common_gemm_f32.hpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*******************************************************************************
-* Copyright 2017-2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#ifndef JIT_AVX512_COMMON_GEMM_F32_HPP
-#define JIT_AVX512_COMMON_GEMM_F32_HPP
-
-#include "c_types_map.hpp"
-#include "../jit_generator.hpp"
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-class jit_avx512_common_gemm_f32 {
-public:
-    void sgemm(const char *transa, const char *transb, const int *M,
-            const int *N, const int *K, const float *alpha, const float *A,
-            const int *lda, const float *B, const int *ldb, const float *beta,
-            float *C, const int *ldc, const float *bias = NULL);
-
-    jit_avx512_common_gemm_f32(
-            char transa, char transb, float beta, bool hasBias = false);
-    ~jit_avx512_common_gemm_f32();
-
-private:
-    typedef void (*ker)(long long int, long long int, long long int, float *,
-            float *, long long int, float *, long long int, float *, float *,
-            long long int, float *, float *);
-    void sgemm_nocopy_driver(const char *transa, const char *transb, int m,
-            int n, int k, const float *alpha, const float *a, int lda,
-            const float *b, int ldb, const float *beta, float *c, int ldc,
-            const float *bias, float *ws);
-
-    char transa_, transb_;
-    float beta_;
-    bool hasBias_;
-    struct xbyak_gemm;
-    xbyak_gemm *ker_bn_, *ker_b1_, *ker_b0_;
-    int nthrs_;
-};
-}
-}
-}
-
-#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/jit_avx_gemm_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/jit_avx_gemm_f32.hpp
deleted file mode 100644
index dd34e09f0..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/jit_avx_gemm_f32.hpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*******************************************************************************
-* Copyright 2016-2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#ifndef JIT_AVX_GEMM_F32_HPP
-#define JIT_AVX_GEMM_F32_HPP
-
-#include "c_types_map.hpp"
-#include "../jit_generator.hpp"
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-class jit_avx_gemm_f32 {
-public:
-    void sgemm(const char *transa, const char *transb, const int *M,
-            const int *N, const int *K, const float *alpha, const float *A,
-            const int *lda, const float *B, const int *ldb, const float *beta,
-            float *C, const int *ldc, const float *bias = NULL);
-
-    jit_avx_gemm_f32(
-            char transa, char transb, float beta, bool hasBias = false);
-    ~jit_avx_gemm_f32();
-
-private:
-    typedef void (*ker)(long long int, long long int, long long int, float *,
-            float *, long long int, float *, long long int, float *, float *,
-            long long int, float *);
-    void sgemm_nocopy_driver(const char *transa, const char *transb, int m,
-            int n, int k, const float *alpha, const float *a, int lda,
-            const float *b, int ldb, const float *beta, float *c, int ldc,
-            const float *bias, float *ws);
-
-    char transa_, transb_;
-    float beta_;
-    bool hasBias_;
-    struct xbyak_gemm;
-    xbyak_gemm *ker_bn_, *ker_b1_, *ker_b0_;
-    int nthrs_;
-};
-}
-}
-}
-
-#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/os_blas.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/os_blas.hpp
index 6afe40d29..85acfa158 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/os_blas.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/os_blas.hpp
@@ -32,7 +32,7 @@
 
 #include "mkl_version.h"
 
-#define USE_MKL_PACKED_GEMM (INTEL_MKL_VERSION >= 20170000)
+#define USE_MKL_PACKED_GEMM 0
 #define USE_MKL_IGEMM \
     (INTEL_MKL_VERSION >= 20180000 && __INTEL_MKL_BUILD_DATE >= 20170628)
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/common.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/common.hpp
new file mode 100644
index 000000000..dde72f4a1
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/common.hpp
@@ -0,0 +1,206 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef COMMON_H
+#define COMMON_H
+
+#define GEMM_CODE_SIZE          (4096L * 32)
+
+#define AVX512_UNROLL_M                   48
+#define AVX512_UNROLL_N                    8
+#define AVX512_UNROLL_K                    1
+#define AVX512_BM                       9984
+#define AVX512_BN                        384
+#define AVX512_BK                        768
+#define AVX512_BK_VNNI                  1536
+#define AVX512_BK_TRADITIONAL            384
+#define AVX512_BLOCKING_SMALL_K           48
+#define AVX512_BN_SMALL_K                 24
+
+
+#define PAGESIZE 4096
+
+#define PADD_BYTESIZE_ONPAGE(x, size) (((x) * (size) + PAGESIZE - 1) / PAGESIZE) * PAGESIZE
+#define NEXT_THR_STRIDE(x, size) (PADD_BYTESIZE_ONPAGE(x, size)) / size
+
+#include "jit_generator.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+enum {
+    PARTITION_1D_ROW,
+    PARTITION_1D_COL,
+    PARTITION_2D_COL_MAJOR,
+    PARTITION_2D = PARTITION_2D_COL_MAJOR,
+};
+
+enum {
+    COPY_NONE,
+    COPY_A,
+};
+
+enum {
+    NO_OFFSET,
+    FIX_OFFSET,
+    COL_OFFSET,
+    ROW_OFFSET,
+};
+
+// Alias for any dimension related variable.
+typedef long long int dim_t;
+
+typedef struct {
+    // Interface arguments.
+    int transa, transb, offsetc;
+    dim_t m, n, k;
+    dim_t lda, ldb, ldc;
+    const int8_t *a;
+    const uint8_t *b;
+    int32_t *c;
+    const float *alpha, *beta;
+
+    int8_t ao, bo;
+    const int32_t *co;
+
+    // Kernel parameters.
+    dim_t um, un, uk, bm, bn, bk;
+    dim_t bn_small_k, bk_traditional, blocking_small_k;
+
+    int (*copyA)(const dim_t *m, const dim_t *n, const int8_t *a,
+            const dim_t *lda, const int8_t *alpha, int8_t *b,
+            const dim_t *dummy1, const dim_t *dummy2, int32_t *row_col_sum);
+
+    int (*copyB)(const dim_t *m, const dim_t *n, const uint8_t *a,
+            const dim_t *lda, const uint8_t *alpha, uint8_t *b,
+            const dim_t *dummy1, const dim_t *dummy2, int32_t *row_col_sum);
+
+    int (*kernel)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    int (*kernel_b)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    int (*kernel_r)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    int (*kernel_c)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    int (*kernel_b0)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    int (*kernel_b0_b)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    int (*kernel_b0_r)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    int (*kernel_b0_c)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    // Gemv kernels
+    void (*gemv_s8u8s32_kernel)(const dim_t, const dim_t, const float,
+                                const int8_t*, const dim_t, const uint8_t*,
+                                const float, int32_t*);
+
+    void (*gemv_u8s8s32_kernel)(const dim_t, const dim_t, const float,
+                                const uint8_t*, const dim_t, const int8_t*,
+                                const float, int32_t*);
+
+    // Gemv parameters
+    int swap;
+
+} blas_t;
+
+
+class jit_avx512_core_u8_copy_an_kern : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_an_kern);
+
+    public:
+        jit_avx512_core_u8_copy_an_kern();
+};
+
+class jit_avx512_core_u8_copy_at_kern : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_at_kern);
+
+    public:
+        jit_avx512_core_u8_copy_at_kern();
+};
+
+class jit_avx512_core_u8_copy_bn_kern : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_bn_kern);
+
+    public:
+        jit_avx512_core_u8_copy_bn_kern();
+};
+
+class jit_avx512_core_u8_copy_bt_kern : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_bt_kern);
+
+    public:
+        jit_avx512_core_u8_copy_bt_kern();
+};
+
+class jit_avx512_core_u8_copy_sum_an_kern : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_an_kern);
+
+    public:
+        jit_avx512_core_u8_copy_sum_an_kern();
+};
+
+class jit_avx512_core_u8_copy_sum_at_kern : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_at_kern);
+
+    public:
+        jit_avx512_core_u8_copy_sum_at_kern();
+};
+
+class jit_avx512_core_u8_copy_sum_bn_kern : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_bn_kern);
+
+    public:
+        jit_avx512_core_u8_copy_sum_bn_kern();
+};
+
+class jit_avx512_core_u8_copy_sum_bt_kern : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8_copy_sum_bt_kern);
+
+    public:
+        jit_avx512_core_u8_copy_sum_bt_kern();
+};
+
+}
+}
+}
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/gemv.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/gemv.hpp
new file mode 100644
index 000000000..db9dd9ef9
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/gemv.hpp
@@ -0,0 +1,28 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "common.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+int gemm_s8u8s32_jump_to_gemv_s8u8s32(blas_t *arg);
+int gemv_threading_driver(blas_t *arg);
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8s8s32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8s8s32.cpp
new file mode 100644
index 000000000..07a13961b
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8s8s32.cpp
@@ -0,0 +1,155 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "common.hpp"
+#include "nstl.hpp"
+#include "math_utils.hpp"
+#include "jit_avx512_core_gemm_s8u8s32.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+void compensation_init(const char *offsetC, int32_t *compensation, int len,
+        const int32_t *oc) {
+    bool OCisC = (*offsetC == 'C' || *offsetC == 'c');
+    bool OCisF = (*offsetC == 'F' || *offsetC == 'f');
+
+   if (OCisF && (*oc) != 0) {
+       for (int i = 0; i < len; i++)
+           compensation[i] = *oc;
+   } else if (OCisC) {
+       for (int i = 0; i < len; i++)
+           compensation[i] = oc[i];
+   } else {
+       parallel_nd(len, [=](int i) { compensation[i] = 0; });
+   }
+}
+
+void compensation_compute(bool transa, int m, int k, float alpha,
+        const int8_t *a, int lda, int32_t *compensation) {
+    if (!transa) {
+        const int L2_cache_size = get_cache_size(2, true);
+        const int blocking_factor = nstl::min(k, L2_cache_size / lda + 1);
+        const int npanels = k / blocking_factor;
+        const bool has_tile = k % blocking_factor > 0;
+
+        parallel_nd(npanels, m, [&](int j, int i) {
+            int32_t val = 0;
+            for (int jb = 0; jb < blocking_factor; jb++) {
+                val += a[(i + (ptrdiff_t)j * blocking_factor * lda)
+                    + (ptrdiff_t)jb * lda];
+            }
+            if (alpha != 1.0f) {
+                val = math::out_round<int32_t>(math::saturate<int32_t>(
+                    (double)val * alpha * -128.0));
+            } else {
+                val *= -128;
+            }
+            mkldnn_fetch_and_add(&compensation[i], val);
+        });
+
+        if (has_tile) {
+            parallel_nd(m, [=](int i) {
+                int32_t val = 0;
+                for (int j = npanels * blocking_factor; j < k; j++) {
+                    val += a[i + (ptrdiff_t)j * lda];
+                }
+                if (alpha != 1.0f) {
+                    val = math::out_round<int32_t>(math::saturate<int32_t>(
+                        (double)val * alpha * -128.0));
+                } else {
+                    val *= -128;
+                }
+                mkldnn_fetch_and_add(&compensation[i], val);
+            });
+        }
+    } else {
+        parallel_nd(m, [=](int i) {
+            int32_t val = 0;
+            for (int j = 0; j < k; j++) {
+                val += a[j + (ptrdiff_t)i * lda];
+            }
+            if (alpha != 1.0f) {
+                val = math::out_round<int32_t>(math::saturate<int32_t>(
+                    (double)val * alpha * -128.0));
+            } else {
+                val *= -128;
+            }
+            compensation[i] += val;
+        });
+    }
+}
+
+void copy_and_shift_b(bool transb, int k, int n, uint8_t *b_u8, int ldb_u8,
+        const int8_t *b_s8, int ldb_s8) {
+    const int b_cols = transb ? k : n;
+
+    parallel_nd(b_cols, [=](int j) {
+        const int b_rows = transb ? n : k;
+
+        uint8_t *pb_u8 = b_u8 + j * ldb_u8;
+        const int8_t *pb_s8 = b_s8 + j * ldb_s8;
+
+        for (int i = 0; i < b_rows; i++) {
+            (*pb_u8) = (*pb_s8) + 128;
+            pb_u8++;
+            pb_s8++;
+        }
+    });
+}
+
+mkldnn_status_t jit_avx512_core_gemm_s8s8s32(
+        const char *transA, const char *transB, const char *offsetC,
+        const int *m, const int *n, const int *k,
+        const float *alpha, const int8_t *a, const int *lda, const int8_t *oa,
+        const int8_t *b, const int *ldb, const int8_t *ob,
+        const float *beta, int32_t *c, const int *ldc, const int32_t *oc) {
+    if (*oa != 0 || *ob != 0) return mkldnn_unimplemented;
+
+    int M = *m, N = *n, K = *k;
+    bool transa = (*transA == 'T' || *transA == 't');
+    bool transb = (*transB == 'T' || *transB == 't');
+    int ld = transb ? N : K;
+
+    uint8_t *b_u8 = (uint8_t *)malloc(sizeof(uint8_t) * K * N, 64);
+    int32_t *compensation = (int32_t *)malloc(sizeof(int32_t) * M, 64);
+
+    if (utils::any_null(b_u8, compensation)) {
+        free(b_u8);
+        free(compensation);
+        return mkldnn_out_of_memory;
+    }
+
+    compensation_init(offsetC, compensation, M, oc);
+    compensation_compute(transa, M, K, *alpha, a, *lda, compensation);
+    copy_and_shift_b(transb, K, N, b_u8, ld, b, *ldb);
+
+    mkldnn_gemm_s8u8s32(transA, transB, "C", m, n, k, alpha, a, lda, oa, b_u8,
+        &ld, ob, beta, c, ldc, compensation);
+
+    if ((*offsetC == 'R' || *offsetC == 'r'))
+        parallel_nd(M, N,
+            [=](int i, int j) { c[i + (ptrdiff_t)j * *ldc] += oc[j]; });
+
+    free(b_u8);
+    free(compensation);
+
+    return mkldnn_success;
+}
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8s8s32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8s8s32.hpp
new file mode 100644
index 000000000..dc9d43b17
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8s8s32.hpp
@@ -0,0 +1,37 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef JIT_AVX512_CORE_GEMM_S8S8S32_HPP
+#define JIT_AVX512_CORE_GEMM_S8S8S32_HPP
+
+#include <stdint.h>
+#include "mkldnn_types.h"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+mkldnn_status_t jit_avx512_core_gemm_s8s8s32(
+        const char *transA, const char *transB, const char *offsetC,
+        const int *m, const int *n, const int *k,
+        const float *alpha, const int8_t *a, const int *lda, const int8_t *oa,
+        const int8_t *b, const int *ldb, const int8_t *ob,
+        const float *beta, int32_t *c, const int *ldc, const int32_t *oc);
+}
+}
+}
+
+#endif // JIT_AVX512_CORE_GEMM_S8S8S32_HPP
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32.cpp
new file mode 100644
index 000000000..e4b8e1cde
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32.cpp
@@ -0,0 +1,1409 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <cstdint>
+#include <mutex>
+
+#include "common.hpp"
+#include "mkldnn_types.h"
+#include "nstl.hpp"
+#include "utils.hpp"
+
+#include "jit_avx512_core_gemm_s8u8s32.hpp"
+#include "jit_avx512_core_gemm_s8u8s32_kern.hpp"
+#include "jit_avx512_core_kernel_gemv_s8u8s32_kern.hpp"
+#include "gemv.hpp"
+
+#if defined(_MSC_VER)
+#include <malloc.h>
+#endif
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+typedef struct {
+    int nthrs_m, nthrs_n;
+    int partition;
+    int copy_type;
+} blas_thread_t;
+
+static inline void round_to_nearest(int32_t *rounded_val, double fp_val) {
+    if (fp_val >= 0.) {
+        fp_val += 0.5;
+        if (fp_val > INT32_MAX) {
+            fp_val = INT32_MAX;
+        }
+    } else {
+        fp_val -= 0.5;
+        if (fp_val < INT32_MIN) {
+            fp_val = INT32_MIN;
+        }
+    }
+    *rounded_val = (int32_t) fp_val;
+}
+
+static inline void add_results(const dim_t m, const dim_t n, const dim_t k,
+        const float alpha, const float beta, const int32_t *c_partial_sum,
+        const dim_t ldcp, int32_t *c_data, const dim_t ldc,
+        const int32_t *a_row_sum, const int32_t *b_col_sum, const int8_t ao,
+        const int8_t bo, const int32_t *co, const int offsetc)
+{
+    for (dim_t j = 0; j < n; ++j) {
+        for (dim_t i = 0; i < m; ++i) {
+            int32_t ctemp = c_partial_sum[i + j * ldcp];
+
+            if (alpha == 1.0f) {
+                if (beta == 0.0f) {
+                    c_data[i + j * ldc] = ctemp;
+                } else {
+                    double c_float = (double) beta
+                        * (double) c_data[i + j * ldc];
+                    c_float += (double) ctemp;
+                    round_to_nearest(&c_data[i + j * ldc], c_float);
+                }
+            } else if (alpha == -1.0f) {
+                if (beta == 0.0f) {
+                    c_data[i + j * ldc] = -ctemp;
+                } else {
+                    double c_float = (double) beta
+                        * (double) c_data[i + j * ldc];
+                    c_float -= (double) ctemp;
+                    round_to_nearest(&c_data[i + j * ldc], c_float);
+                }
+            } else {
+                if (beta == 0.0f) {
+                    double c_float = alpha * (double) ctemp;
+                    round_to_nearest(&c_data[i + j * ldc], c_float);
+                } else {
+                    double c_float = alpha * (double) ctemp +
+                        beta * (double) c_data[i + j * ldc];
+                    round_to_nearest(&c_data[i + j * ldc], c_float);
+                }
+            }
+
+            if (offsetc == FIX_OFFSET) {
+                c_data[i + j * ldc] += co[0];
+            } else if (offsetc == ROW_OFFSET) {
+                c_data[i + j * ldc] += co[j];
+            } else if (offsetc == COL_OFFSET) {
+                c_data[i + j * ldc] += co[i];
+            }
+        }
+    }
+}
+
+// TODO Find a better place for those functions.
+static inline dim_t ld_padd(const dim_t x)
+{
+    return ((x + ((2048 / sizeof(int32_t)) - 1)) / (2048 / sizeof(int32_t)))
+        * (2048 / sizeof(int32_t)) +  (64 / sizeof(int32_t));
+}
+
+void igemm_inner_kernel(const dim_t m, const dim_t n, const dim_t k,
+        const int8_t *a, const uint8_t *b, float beta, int32_t *c,
+        const dim_t ldc, const int32_t *a_row_sum, const int32_t *b_col_sum,
+        const int32_t *co, const int offsetc, const blas_t *arg)
+{
+    int8_t ao = arg->ao;
+    int8_t bo = arg->bo;
+    int32_t co_0 = (offsetc == NO_OFFSET)? 0 : co[0];
+
+    // Since m and n are limited by blocking, stack overflow may not happen;
+    // it's up to 32kB
+#if !defined(_MSC_VER)
+    int32_t col_offset[m];
+    int32_t row_offset[n];
+#else
+    int32_t *col_offset = (int32_t *) _alloca(sizeof(*col_offset) * m);
+    int32_t *row_offset = (int32_t *) _alloca(sizeof(*row_offset) * n);
+#endif
+
+    int col_req = 0;
+    int row_req = 0;
+
+    if ((bo != 0) || (offsetc == COL_OFFSET))
+        col_req = 1;
+    if ((ao != 0) || (offsetc == ROW_OFFSET))
+        row_req = 1;
+
+    // It needs one of colum or row offsets, but it doesn't need both
+    if (((ao != 0) && (bo != 0)) || ((offsetc == FIX_OFFSET) && (co_0 != 0))) {
+        if ((col_req == 0) && (row_req == 0)) {
+            if (m <= n) {
+                col_req = 1;
+            } else {
+                row_req = 1;
+            }
+        }
+    }
+
+    if (col_req) {
+        for (dim_t i = 0; i < m; i++)
+            col_offset[i] = 0;
+
+        if (offsetc == COL_OFFSET) {
+            for (dim_t i = 0; i < m; i++)
+                col_offset[i] += co[i];
+        }
+
+        if (bo != 0) {
+            for (dim_t i = 0; i < m; i++)
+                col_offset[i] += bo * a_row_sum[i];
+        }
+    }
+
+    if (row_req) {
+        for (dim_t i = 0; i < n; i++)
+            row_offset[i] = 0;
+
+        if (offsetc == ROW_OFFSET) {
+            for (dim_t i = 0; i < n; i++)
+                row_offset[i] += co[i];
+        }
+
+        if (ao != 0) {
+            for (dim_t i = 0; i < n; i++)
+                row_offset[i] += ao * b_col_sum[i];
+        }
+    }
+
+    if ((offsetc == FIX_OFFSET) && (co_0 != 0)) {
+        if (col_req) {
+            for (dim_t i = 0; i < m; i++)
+                col_offset[i] += co_0;
+        } else {
+            for (dim_t i = 0; i < n; i++)
+                row_offset[i] += co_0;
+        }
+    }
+
+    if ((ao != 0) && (bo != 0)) {
+        if (col_req) {
+            for (dim_t i = 0; i < m; i++)
+                col_offset[i] += (int32_t) k * ao * bo;
+        } else {
+            for (dim_t i = 0; i < n; i++)
+                row_offset[i] += (int32_t) k * ao * bo;
+        }
+    }
+
+    if (col_req == 0) {
+        if (row_req == 0) {
+            if (beta == 0.0) {
+                arg->kernel_b0(&m, &n, &k, NULL, a, b, c, ldc, col_offset,
+                        row_offset);
+            } else {
+                arg->kernel(&m, &n, &k, NULL, a, b, c, ldc, col_offset,
+                        row_offset);
+            }
+        } else {
+            if (beta == 0.0) {
+                arg->kernel_b0_r(&m, &n, &k, NULL, a, b, c, ldc, col_offset,
+                        row_offset);
+            } else {
+                arg->kernel_r(&m, &n, &k, NULL, a, b, c, ldc, col_offset,
+                        row_offset);
+            }
+        }
+    } else {
+        if (row_req == 0) {
+            if (beta == 0.0) {
+                arg->kernel_b0_c(&m, &n, &k, NULL, a, b, c, ldc, col_offset,
+                        row_offset);
+            } else {
+                arg->kernel_c(&m, &n, &k, NULL, a, b, c, ldc, col_offset,
+                        row_offset);
+            }
+        } else {
+            if (beta == 0.0) {
+                arg->kernel_b0_b(&m, &n, &k, NULL, a, b, c, ldc, col_offset,
+                        row_offset);
+            } else {
+                arg->kernel_b(&m, &n, &k, NULL, a, b, c, ldc, col_offset,
+                        row_offset);
+            }
+        }
+    }
+}
+
+static inline void *align(void *ptr, size_t alignment)
+{
+    return (void *) utils::rnd_up((uintptr_t) ptr, alignment);
+}
+
+static int gemm_kernel_driver(const dim_t m, const dim_t n, const dim_t k,
+        const int8_t *a, const uint8_t *b, int32_t *c, const int32_t *co,
+        const blas_t *arg)
+{
+    dim_t   lda   = arg->lda;
+    dim_t   ldb   = arg->ldb;
+    dim_t   ldc   = arg->ldc;
+    int8_t  ao    = arg->ao;
+    int8_t  bo    = arg->bo;
+    float   alpha = *arg->alpha;
+    float   beta  = *arg->beta;
+
+    if (m <= 0 || n <= 0) {
+        return 0;
+    }
+
+    // Padding along K dimension.
+    dim_t k_padd = 0;
+    if (k <= arg->bk_traditional) {
+        k_padd = utils::rnd_up(k, arg->uk);
+        k_padd = nstl::max(128LL, k_padd);
+    } else if (k < 2 * arg->bk) {
+        k_padd = utils::rnd_up(k / 2, arg->uk);
+    } else {
+        k_padd = arg->bk;
+    }
+
+    // Padding along M dimension.
+    dim_t m_padd = utils::rnd_up(nstl::min(nstl::max(m, arg->um), arg->bm),
+            arg->um);
+
+    // Padding along N dimension.
+    dim_t n_padd = 0;
+    if (k < arg->blocking_small_k) {
+        n_padd = utils::rnd_up(nstl::min(nstl::max(n, arg->un),
+                    arg->bn_small_k), arg->un);
+    } else {
+        n_padd = utils::rnd_up(nstl::min(nstl::max(n, arg->un), arg->bn),
+                arg->un);
+    }
+
+    // Padding for temporary buffer for C
+    dim_t ldc_buf = ld_padd(m_padd);
+
+    dim_t strideAm = (arg->transa == 0)? 1 : lda;
+    dim_t strideAn = (arg->transa != 0)? 1 : lda;
+    dim_t strideBm = (arg->transb == 0)? 1 : ldb;
+    dim_t strideBn = (arg->transb != 0)? 1 : ldb;
+
+    size_t a_buf_nelems = m_padd * k_padd;
+    size_t b_buf_nelems = k_padd * n_padd;
+    size_t a_row_sum_nelems = m_padd;
+    size_t b_col_sum_nelems = n_padd;
+
+    size_t mem_size = a_buf_nelems * sizeof(*a) + PAGE_4K
+        + b_buf_nelems * sizeof(*b) + PAGE_4K
+        + a_row_sum_nelems * sizeof(*c) + PAGE_4K
+        + b_col_sum_nelems * sizeof(*c) + PAGE_4K;
+
+    bool need_c_buffer = alpha != 1.0f || (beta != 1 && beta != 0);
+    if (need_c_buffer) {
+        size_t c_buf_nelems = ldc_buf * n_padd;
+        mem_size += c_buf_nelems * sizeof(*c) + PAGE_4K;
+    }
+
+    char *mem = (char *) malloc(mem_size, 128);
+
+    if (!mem) {
+        return -1;
+    }
+
+    int8_t *bufferA = (int8_t *) align(mem, PAGE_4K);
+    uint8_t *bufferB = (uint8_t *) align(bufferA + a_buf_nelems, PAGE_4K);
+    int32_t *a_row_sum = (int32_t *) align(bufferB + b_buf_nelems, PAGE_4K);
+    int32_t *b_col_sum = (int32_t *) align(a_row_sum + a_row_sum_nelems,
+            PAGE_4K);
+
+    int32_t *bufferC = NULL;
+    if (need_c_buffer) {
+        bufferC = (int32_t *) align(b_col_sum + b_col_sum_nelems, PAGE_4K);
+    }
+
+    float beta_saved = beta;
+
+    int a_block_copied = 0;
+    dim_t sizeM = 0;
+    for (dim_t Bm = 0; Bm < m; Bm += sizeM) {
+        sizeM = m - Bm;
+        if (sizeM > m_padd)
+            sizeM = m_padd;
+
+        dim_t sizeK = 0;
+        for (dim_t Bk = 0; Bk < k; Bk += sizeK) {
+            sizeK = k - Bk;
+            if (sizeK > k_padd)
+                sizeK = k_padd;
+
+            // Scale C blocks by beta only for the first time
+            if (Bk == 0)
+                beta = beta_saved;
+            else
+                beta = 1.0f;
+
+            // Apply C offset when to the last k-block of the partial sum.
+            int offsetc = NO_OFFSET;
+            if (Bk + sizeK == k)
+                offsetc = arg->offsetc;
+
+            dim_t sizeN = 0;
+            for (dim_t Bn = 0; Bn < n; Bn += sizeN) {
+                sizeN = n - Bn;
+                if (sizeN > n_padd)
+                    sizeN = n_padd;
+
+                const uint8_t *b_block = b + Bk * strideBm + Bn * strideBn;
+                arg->copyB(&sizeK, &sizeN, b_block, &ldb, NULL, bufferB, NULL,
+                        NULL, b_col_sum);
+
+                dim_t sizeUM = 0;
+                for (dim_t Um = 0; Um < sizeM; Um += sizeUM) {
+                    sizeUM = sizeM - Um;
+                    if (sizeUM > arg->um)
+                        sizeUM = arg->um;
+
+                    /*
+                     * Use the whole A buffer only if we have multiple B blocks
+                     * for k-dimension, otherwise we are wasting cache to store
+                     * B and C blocks.
+                     */
+                    dim_t Um_forA = 0;
+                    if (sizeN < n)
+                        Um_forA = Um;
+
+                    const int8_t *a_block = a + (Bm + Um) * strideAm
+                        + Bk * strideAn;
+                    if (!a_block_copied) {
+                        arg->copyA(&sizeK, &sizeUM, a_block, &lda, NULL,
+                                bufferA + Um_forA * sizeK, NULL, NULL,
+                                a_row_sum + Um_forA);
+                    }
+
+                    int32_t *c_block = c + (Bm + Um) + Bn * ldc;
+                    dim_t co_stride = 0;
+                    if (offsetc == FIX_OFFSET) {
+                        co_stride = 0;
+                    } else if (offsetc == ROW_OFFSET) {
+                        co_stride = Bn;
+                    } else if (offsetc == COL_OFFSET) {
+                        co_stride = Bm + Um;
+                    }
+                    if (need_c_buffer) {
+                        igemm_inner_kernel(sizeUM, sizeN, sizeK,
+                                bufferA + Um_forA * sizeK, bufferB, 0.0f,
+                                bufferC + Um, ldc_buf, a_row_sum + Um_forA,
+                                b_col_sum, NULL, NO_OFFSET, arg);
+
+                        // Finish the block adding the necessary alpha, beta
+                        // and offsets.
+                        add_results(sizeUM, sizeN, sizeK, alpha, beta,
+                                bufferC + Um, ldc_buf, c_block, ldc,
+                                a_row_sum + Um_forA, b_col_sum, ao, bo,
+                                co + co_stride, offsetc);
+                    } else {
+                        igemm_inner_kernel(sizeUM, sizeN, sizeK,
+                                bufferA + Um_forA * sizeK, bufferB, beta,
+                                c_block, ldc, a_row_sum + Um_forA, b_col_sum,
+                                co + co_stride, offsetc, arg);
+                    }
+                }
+                a_block_copied = 1;
+            }
+            a_block_copied = 0;
+        }
+    }
+
+    free(mem);
+
+    return 0;
+}
+
+static int kernel_driver_parallel_acopiedbcopy(const dim_t m, const dim_t n,
+        const dim_t k, const int8_t *bufferA, const uint8_t *b,
+        const float beta, int32_t *c, const int offsetc, const int32_t *co,
+        const int32_t *a_row_sum, const blas_t *arg)
+{
+    dim_t   ldb   = arg->ldb;
+    dim_t   ldc   = arg->ldc;
+    int8_t  ao    = arg->ao;
+    int8_t  bo    = arg->bo;
+    float   alpha = *arg->alpha;
+
+    if (m <= 0 || n <= 0) {
+        return 0;
+    }
+
+    // Padding along N dimension.
+    dim_t n_padd = 0;
+    if (k < arg->blocking_small_k) {
+        n_padd = utils::rnd_up(nstl::min(nstl::max(n, arg->un),
+                    arg->bn_small_k), arg->un);
+    } else {
+        n_padd = utils::rnd_up(nstl::min(nstl::max(n, arg->un), arg->bn),
+                arg->un);
+    }
+
+    // Padding for temporary buffer for C
+    dim_t ldc_buf = ld_padd(m);
+
+    dim_t strideBn = (arg->transb != 0)? 1 : ldb;
+
+    size_t b_buf_nelems = k * n_padd;
+    size_t b_col_sum_nelems = n_padd;
+
+    size_t mem_size = b_buf_nelems * sizeof(*b) + PAGE_4K
+        + b_col_sum_nelems * sizeof(*c) + PAGE_4K;
+
+    bool need_c_buffer = alpha != 1.0f || (beta != 1 && beta != 0);
+    if (need_c_buffer) {
+        size_t c_buf_nelems = ldc_buf * n_padd;
+        mem_size += c_buf_nelems * sizeof(*c) + PAGE_4K;
+    }
+
+    char *mem = (char *) malloc(mem_size, 128);
+
+    if (!mem) {
+        return -1;
+    }
+
+    uint8_t *bufferB = (uint8_t *) align(mem, PAGE_4K);
+    int32_t *b_col_sum = (int32_t *) align(bufferB + b_buf_nelems, PAGE_4K);
+
+    int32_t *bufferC = NULL;
+    if (need_c_buffer) {
+        bufferC = (int32_t *) align(b_col_sum + b_col_sum_nelems, PAGE_4K);
+    }
+
+    dim_t sizeN = 0;
+    for (dim_t Bn = 0; Bn < n; Bn += sizeN) {
+        sizeN = n - Bn;
+        if (sizeN > n_padd)
+            sizeN = n_padd;
+
+        // Implement the kernel here.
+        const uint8_t *b_block = b + Bn * strideBn;
+        arg->copyB(&k, &sizeN, b_block, &ldb, NULL, bufferB, NULL, NULL,
+                b_col_sum);
+
+            dim_t co_stride = 0;
+            if (offsetc == FIX_OFFSET) {
+                co_stride = 0;
+            } else if (offsetc == ROW_OFFSET) {
+                co_stride = Bn;
+            } else if (offsetc == COL_OFFSET) {
+                co_stride = 0;
+            }
+        int32_t *c_block = c + Bn * ldc;
+        if (need_c_buffer) {
+            igemm_inner_kernel(m, sizeN, k, bufferA, bufferB, 0.0f, bufferC,
+                    ldc_buf, a_row_sum, b_col_sum, NULL, NO_OFFSET, arg);
+
+            // Finish the block adding the necessary alpha, beta and offsets.
+            add_results(m, sizeN, k, alpha, beta, bufferC, ldc_buf, c_block,
+                    ldc, a_row_sum, b_col_sum, ao, bo, co + co_stride,
+                    offsetc);
+        } else {
+            igemm_inner_kernel(m, sizeN, k, bufferA, bufferB, beta, c_block,
+                    ldc, a_row_sum, b_col_sum, co + co_stride, offsetc, arg);
+        }
+    }
+
+    free(mem);
+
+    return 0;
+
+}
+
+#define N2D_MAX_AVX512 384
+#define M2D_MIN_AVX512 384
+#define VECLEN         16
+#define NCONS          1
+static inline void set_thread_opts_avx512(int *p_nthrs,
+        blas_thread_t *thread_info, const blas_t *arg)
+{
+    int nthrs = *p_nthrs;
+    dim_t m = arg->m;
+    dim_t n = arg->n;
+
+    thread_info->nthrs_m = 0;
+    thread_info->nthrs_n = 0;
+    thread_info->copy_type = COPY_NONE; // By default don't do parallel copy.
+
+    int condition_2D_bsrc = -1;
+    if ((256 * m > nthrs * n) && (nthrs * m < 256 * n)) {
+        condition_2D_bsrc = 1;
+    } else {
+        condition_2D_bsrc = 0;
+    }
+
+    int condition_1D_copya = 0;
+    if ((m >= 1000) && (n >= nthrs * N2D_MAX_AVX512 / 4)) {
+        condition_2D_bsrc  = 0;
+        condition_1D_copya = 1;
+    }
+
+    // If offset is non-zero, we need to keep 1D_copya to reduce update overhead
+    if (arg->ao != 0 || arg->bo != 0 || arg->co[0] != 0
+            || arg->offsetc != FIX_OFFSET) {
+        condition_2D_bsrc  = 0;
+        condition_1D_copya = 1;
+    }
+
+    if (condition_2D_bsrc == 1) {
+        int nthrs_m = 1;
+        int nthrs_n = nthrs;
+
+        while ((nthrs_n % 2 == 0) &&
+                (n / nthrs > N2D_MAX_AVX512 ||
+                 n / nthrs_n <= N2D_MAX_AVX512 / 2) &&
+                (m / nthrs_m >= 2 * M2D_MIN_AVX512) &&
+                (nthrs_m < 4)) {
+            nthrs_m *= 2;
+            nthrs_n /= 2;
+        }
+
+        thread_info->nthrs_m = nthrs_m;
+        thread_info->nthrs_n = nthrs_n;
+        thread_info->partition = PARTITION_2D;
+
+        // Reset the total number of threads that will be used.
+        *p_nthrs = nthrs_m * nthrs_n;
+
+    } else if (condition_1D_copya && mkldnn_thr_syncable()) {
+        // Use parallel copy A algorithm
+        thread_info->copy_type = COPY_A;
+        thread_info->partition = PARTITION_1D_COL;
+    } else {
+        if ((m > n) && (m / nthrs >= VECLEN || n < NCONS * nthrs)) {
+            thread_info->partition = PARTITION_1D_ROW;
+        } else {
+            thread_info->partition = PARTITION_1D_COL;
+        }
+    }
+}
+#undef N2D_MAX_AVX512
+#undef M2D_MIN_AVX512
+#undef VECLEN
+#undef NCONS
+
+static inline void partition_1d(const int ithr, const int nthrs, const dim_t n,
+        dim_t *t_offset, dim_t *t_block)
+{
+    dim_t band = n / nthrs;
+
+    dim_t tail = n - (nthrs - 1) * band;
+    if (tail > (band + 1))
+        band++;
+    tail = n - (nthrs - 1) * band;
+
+    if (ithr < (nthrs - 1))
+        *t_block = band;
+    else
+        *t_block = tail;
+
+    *t_offset = ithr * band;
+
+    if (*t_offset >= n) {
+        *t_block = 0;
+        *t_offset = 0;
+    } else if ((*t_offset + *t_block) > n) {
+        *t_block = n - *t_offset;
+    }
+}
+
+static inline void partition_2d(const int ithr, int *nthrs, const int ithr_i,
+        const int ithr_j, const int nthrs_m, const int nthrs_n, const dim_t m,
+        const dim_t n, dim_t *p_m_disp, dim_t *p_m_band, dim_t *p_n_disp,
+        dim_t *p_n_band)
+{
+    dim_t m_disp = 0, n_disp = 0;
+    dim_t m_band = 0, n_band = 0;
+
+    int mdiv = nthrs_m;
+    int ndiv = nthrs_n;
+
+    dim_t m_bandt = m / mdiv; /* size per thread */
+    dim_t n_bandt = n / ndiv; /* size per thread */
+    int firstmgroup = mdiv - 1;
+    int firstngroup = ndiv - 1;
+    dim_t firstmval = m_bandt;
+    dim_t firstnval = n_bandt;
+
+    int mthr_used = mdiv;
+    if (m - (mdiv - 1) * m_bandt > m_bandt + 1) {
+        if (m - (mdiv - 1) * m_bandt > mdiv)
+            ++m_bandt;
+
+        firstmval = m_bandt + 1;
+        mthr_used = (int) (m / firstmval);
+
+        if (mthr_used * firstmval < m)
+            ++mthr_used;
+
+        firstmgroup = mthr_used - 1;
+    }
+
+    int nthr_used = ndiv;
+    if (n - (ndiv - 1) * n_bandt > n_bandt + 1) {
+        firstnval = n_bandt + 1;
+        nthr_used = (int) (n / firstnval);
+
+        if (nthr_used * firstnval < n)
+            ++nthr_used;
+
+        firstngroup = nthr_used - 1;
+    }
+
+    *nthrs = mthr_used * nthr_used;
+
+    if (ithr < *nthrs) {
+        if (ithr_i < firstmgroup) {
+            m_band = firstmval;
+            m_disp = ithr_i * firstmval;
+        } else if (ithr_i <= mthr_used - 2) {
+            m_band = m_bandt;
+            m_disp = firstmgroup * firstmval + (ithr_i - firstmgroup) * m_bandt;
+        } else {
+            m_disp = firstmgroup * firstmval
+                + (mthr_used - 1 - firstmgroup) * m_bandt;
+            m_band = nstl::max(0LL, m - m_disp);
+        }
+
+        if (ithr_j < firstngroup) {
+            n_band = firstnval;
+            n_disp = ithr_j * firstnval;
+        } else if (ithr_j <= nthr_used - 2) {
+            n_band = n_bandt;
+            n_disp = firstngroup * firstnval + (ithr_j - firstngroup) * n_bandt;
+        } else {
+            n_disp = firstngroup * firstnval
+                + (nthr_used - 1 - firstngroup) * n_bandt;
+            n_band = nstl::max(0LL, n - n_disp);
+        }
+        m_disp = nstl::max(nstl::min(m_disp, m - 1), 0LL);
+        n_disp = nstl::max(nstl::min(n_disp, n - 1), 0LL);
+    }
+
+    if (ithr < *nthrs) {
+        *p_m_disp = m_disp;
+        *p_n_disp = n_disp;
+        *p_m_band = m_band;
+        *p_n_band = n_band;
+    } else {
+        *p_m_disp = 0;
+        *p_n_disp = 0;
+        *p_m_band = 0;
+        *p_n_band = 0;
+    }
+
+    return;
+}
+
+static inline void decompose_matrices(const int ithr, int *nthrs, dim_t *m,
+        dim_t *n, dim_t *k, const int8_t **a, const uint8_t **b, int32_t **c,
+        const int32_t **co, const blas_thread_t *thread_info, const blas_t *arg)
+{
+    dim_t strideAm = (arg->transa == 0)? 1 : arg->lda;
+    dim_t strideBn = (arg->transb != 0)? 1 : arg->ldb;
+    int offsetc = arg->offsetc;
+
+    switch (thread_info->partition) {
+    case PARTITION_1D_ROW:
+        {
+            dim_t offset = 0;
+            dim_t block = 0;
+            partition_1d(ithr, *nthrs, arg->m, &offset, &block);
+
+            *m = block;
+            *n = arg->n;
+            *k = arg->k;
+
+            // Set matrix A.
+            *a = arg->a + offset * strideAm;
+
+            // Set matrix B.
+            *b = arg->b;
+
+            // Set matrix C.
+            *c = arg->c + offset;
+
+            // Set offset vector for C matrix
+            dim_t co_stride = 0;
+            if (offsetc == FIX_OFFSET) {
+                co_stride = 0;
+            } else if (offsetc == ROW_OFFSET) {
+                co_stride = 0;
+            } else if (offsetc == COL_OFFSET) {
+                co_stride = offset;
+            }
+            *co = arg->co + co_stride;
+            break;
+        }
+
+    case PARTITION_1D_COL:
+        {
+            dim_t offset = 0;
+            dim_t block = 0;
+            partition_1d(ithr, *nthrs, arg->n, &offset, &block);
+
+            *m = arg->m;
+            *n = block;
+            *k = arg->k;
+
+            // Set matrix A.
+            *a = arg->a;
+
+            // Set matrix B.
+            *b = arg->b + offset * strideBn;
+
+            // Set matrix C.
+            *c = arg->c + offset * arg->ldc;
+
+            // Set offset vector for C matrix
+            dim_t co_stride = 0;
+            if (offsetc == FIX_OFFSET) {
+                co_stride = 0;
+            } else if (offsetc == ROW_OFFSET) {
+                co_stride = offset;
+            } else if (offsetc == COL_OFFSET) {
+                co_stride = 0;
+            }
+            *co = arg->co + co_stride;
+            break;
+        }
+
+    case PARTITION_2D_COL_MAJOR:
+        {
+            int nthrs_m = thread_info->nthrs_m;
+            int nthrs_n = thread_info->nthrs_n;
+            int ithr_i = ithr % nthrs_m;
+            int ithr_j = ithr / nthrs_m;
+
+            dim_t m_disp = 0;
+            dim_t m_band = 0;
+            dim_t n_disp = 0;
+            dim_t n_band = 0;
+
+            partition_2d(ithr, nthrs, ithr_i, ithr_j, nthrs_m, nthrs_n,
+                    arg->m, arg->n, &m_disp, &m_band, &n_disp, &n_band);
+
+            *m = m_band;
+            *n = n_band;
+            *k = arg->k;
+
+            // Set matrix A.
+            *a = arg->a + m_disp * strideAm;
+
+            // Set matrix B.
+            *b = arg->b + n_disp * strideBn;
+
+            // Set matrix C.
+            *c = arg->c + m_disp + n_disp * arg->ldc;
+
+            // Set offset vector for C matrix
+            dim_t co_stride = 0;
+            if (offsetc == FIX_OFFSET) {
+                co_stride = 0;
+            } else if (offsetc == ROW_OFFSET) {
+                co_stride = n_disp;
+            } else if (offsetc == COL_OFFSET) {
+                co_stride = m_disp;
+            }
+            *co = arg->co + co_stride;
+            break;
+        }
+    }
+}
+
+#define MULTIPLIER 10
+static int parallel_a_copy(const int ithr, const int nthrs, const dim_t m,
+        const dim_t n, const dim_t k, const int8_t *a, const uint8_t *b,
+        int32_t *c, const int32_t *co, const blas_t *arg,
+        char **p_shared_mem)
+{
+    const dim_t lda = arg->lda;
+    const dim_t ldb = arg->ldb;
+    const dim_t strideAm = (arg->transa == 0)? 1 : lda;
+    const dim_t strideAn = (arg->transa != 0)? 1 : lda;
+    const dim_t strideBm = (arg->transb == 0)? 1 : ldb;
+
+    // Padding along M dimension.
+    dim_t m_padd = utils::rnd_up(nstl::min(nstl::max(m, arg->um), arg->bm),
+            arg->um);
+
+    // Padding along K dimension.
+    dim_t k_padd = 0;
+    if (k <= arg->bk_traditional) {
+        k_padd = utils::rnd_up(k, arg->uk);
+        k_padd = nstl::max(128LL, k_padd);
+    } else if (k < 2 * arg->bk) {
+        k_padd = utils::rnd_up(k / 2, arg->uk);
+    } else {
+        k_padd = arg->bk;
+    }
+
+    m_padd *= nthrs > MULTIPLIER ? MULTIPLIER : nthrs;
+    if (m_padd > m) {
+        m_padd = utils::rnd_up(m, arg->um);
+    }
+
+    size_t a_buf_nelems = m_padd * k_padd;
+
+    // Allocate shared memory for A and its row sum buffers in master thread.
+    if (ithr == 0) { // If thread master
+        size_t a_row_sum_nelems = m_padd;
+
+        size_t mem_size = (a_buf_nelems * sizeof(*a) + PAGE_4K)
+            + a_row_sum_nelems * sizeof(*c) + PAGE_4K;
+
+        *p_shared_mem = (char *) malloc(mem_size, 128);
+
+    }
+    mkldnn_thr_barrier();
+
+    char *mem = *p_shared_mem;
+    int8_t *bufferA = (int8_t *) align(mem, PAGE_4K);
+    int32_t *a_row_sum = (int32_t *) align(bufferA + a_buf_nelems, PAGE_4K);
+
+    if (!mem) {
+        return -1;
+    }
+
+    int result = 0; // Return status
+
+    dim_t sizeK = 0;
+    for (dim_t Bk = 0; Bk < k; Bk += sizeK) {
+        sizeK = k - Bk;
+        if (sizeK > k_padd)
+            sizeK = k_padd;
+
+        // Scale C blocks by beta only for the first term of partial sum.
+        float beta = 1.0f;
+        if (Bk == 0)
+            beta = *(arg->beta);
+
+        // Apply C offset for the last k-block of the partial sum.
+        int offsetc = NO_OFFSET;
+        if (Bk + sizeK == k)
+            offsetc = arg->offsetc;
+
+        dim_t sizeM = 0;
+        for (dim_t Bm = 0; Bm < m; Bm += sizeM) {
+            sizeM = m - Bm;
+            if (sizeM > m_padd)
+                sizeM = m_padd;
+
+            if (ithr < nthrs) {
+                dim_t band = (sizeM + nthrs - 1) / nthrs;
+                band = utils::rnd_up(band, arg->um);
+
+                dim_t offset = band * ithr;
+
+                // If offset is too large don't use that thread for copying.
+                if (offset >= sizeM) {
+                    offset = 0;
+                    band = 0;
+                }
+
+                // Handle the tail of the copy.
+                if (offset + band > sizeM) {
+                    band = sizeM - offset;
+                }
+
+                if (band > 0) {
+                    const int8_t *a_block = a + (Bm + offset) * strideAm
+                        + Bk * strideAn;
+                    arg->copyA(&sizeK, &band, a_block, &lda, NULL,
+                            bufferA + offset * sizeK, NULL, NULL,
+                            a_row_sum + offset);
+                }
+            }
+            mkldnn_thr_barrier(); // Wait for finishing parallel copy.
+
+            const uint8_t *b_block = b + Bk * strideBm;
+            int32_t *c_block = c + Bm;
+            dim_t co_stride = 0;
+            if (offsetc == FIX_OFFSET) {
+                co_stride = 0;
+            } else if (offsetc == ROW_OFFSET) {
+                co_stride = 0;
+            } else if (offsetc == COL_OFFSET) {
+                co_stride = Bm;
+            }
+
+            result = kernel_driver_parallel_acopiedbcopy(sizeM, n, sizeK,
+                    bufferA, b_block, beta, c_block, offsetc, co + co_stride,
+                    a_row_sum, arg);
+
+            mkldnn_thr_barrier(); // Wait for kernel computations to finish.
+        }
+    }
+
+    // Free memory allocated in master thread
+    if (ithr == 0) {
+        free(mem);
+    }
+
+    return result;
+}
+#undef MULTIPLIER
+
+static inline void get_omp_thread_count(dim_t m, dim_t n, dim_t k,
+        double fp_per_cycle, int *nthrs)
+{
+    double omp_overhead_small_core = 3.0e+3;
+    double omp_intercept_big_core = 4.0e+3;
+    double omp_slope_big_core = 5.0e+2;
+
+    double gemm_cycles = 8.0 * m * n * k / fp_per_cycle;
+
+    int i = *nthrs;
+
+    // Use a different model for omp overheads if nthrs is <= 4
+    if (*nthrs <= 4 && omp_overhead_small_core > 0) {
+        double omp_cycles = omp_overhead_small_core;
+        if (gemm_cycles < omp_cycles) {
+            *nthrs = 1;
+            return;
+        } else {
+            while (i > 1) {
+                if (omp_cycles * i < gemm_cycles * (i - 1)) break;
+                --i;
+            }
+        }
+    } else {
+        if (gemm_cycles < (omp_intercept_big_core + 2 * omp_slope_big_core)) {
+            *nthrs = 1;
+            return;
+        }
+
+        // adaptive decrement to march faster·
+        while (i > 1) {
+            double omp_cycles = omp_intercept_big_core + i * omp_slope_big_core;
+            if (omp_cycles * i < gemm_cycles * (i - 1))
+                break;
+
+            if (i < 10)
+                i -= 2;
+            else if (i < 30)
+                i -= 4;
+            else
+                i -= 8;
+        }
+    }
+
+    if (i < 1)
+        i = 1;
+
+    *nthrs = i;
+}
+
+#define CACHE_LINE_SIZE 64
+static int gemm_threading_driver(blas_t *arg)
+{
+    if ((arg->m <= 0) || (arg->n <= 0))
+        return mkldnn_success;
+
+    if (gemm_s8u8s32_jump_to_gemv_s8u8s32(arg)) {
+        return mkldnn_success;
+    }
+
+    int nthr = (mkldnn_in_parallel()) ? 1 : mkldnn_get_max_threads();
+    get_omp_thread_count(arg->m, arg->n, arg->k, 64.0, &nthr);
+
+    if (nthr == 1) {
+        return gemm_kernel_driver(arg->m, arg->n, arg->k, arg->a, arg->b,
+                arg->c, arg->co, arg);
+    }
+
+    int *results = (int *) malloc(sizeof(*results) * nthr * CACHE_LINE_SIZE,
+            PAGE_4K);
+
+    if (!results) {
+        return -1;
+    }
+
+    for (int i = 0; i < nthr; i++) {
+        results[i * CACHE_LINE_SIZE] = 0; // Initialize to success
+    }
+
+    char *shared_mem = NULL;
+
+    parallel(nthr, [&](const int ithr, const int nthr) {
+        int nthrs = nthr;
+        if (nthrs == 1) {
+            results[0] = gemm_kernel_driver(arg->m, arg->n, arg->k, arg->a,
+                arg->b, arg->c, arg->co, arg);
+        } else {
+            blas_thread_t thread_info;
+            set_thread_opts_avx512(&nthrs, &thread_info, arg);
+
+            const int8_t *a = NULL;
+            const uint8_t *b = NULL;
+            int32_t *c = NULL;
+            const int32_t *co = NULL;
+            dim_t m = -1;
+            dim_t n = -1;
+            dim_t k = -1;
+            decompose_matrices(ithr, &nthrs, &m, &n, &k, &a, &b, &c, &co,
+                &thread_info, arg);
+
+            if (ithr < nthrs) {
+                switch (thread_info.copy_type) {
+                case COPY_A:
+                    results[ithr * CACHE_LINE_SIZE] =
+                        parallel_a_copy(ithr, nthrs, m, n, k, a, b, c, co, arg,
+                                &shared_mem);
+                    break;
+
+                default:
+                case COPY_NONE:
+                    results[ithr * CACHE_LINE_SIZE] =
+                        gemm_kernel_driver(m, n, k, a, b, c, co, arg);
+                    break;
+                }
+            }
+        }
+    });
+
+    int result = 0;  // Initialize to success
+    for (int i = 0; i < nthr; i++) {
+        if (results[i] != 0) {
+            result = results[i * CACHE_LINE_SIZE];
+            break;
+        }
+    }
+
+    free(results);
+
+    return result;
+}
+#undef CACHE_LINE_SIZE
+
+static jit_avx512_core_u8_copy_an_kern *copy_an;
+static jit_avx512_core_u8_copy_at_kern *copy_at;
+static jit_avx512_core_u8_copy_bn_kern *copy_bn;
+static jit_avx512_core_u8_copy_bt_kern *copy_bt;
+static jit_avx512_core_u8_copy_sum_an_kern *copy_sum_an;
+static jit_avx512_core_u8_copy_sum_at_kern *copy_sum_at;
+static jit_avx512_core_u8_copy_sum_bn_kern *copy_sum_bn;
+static jit_avx512_core_u8_copy_sum_bt_kern *copy_sum_bt;
+static jit_avx512_core_gemm_s8u8s32_kern *kernel;
+static jit_avx512_core_gemm_s8u8s32_kern *kernel_b;
+static jit_avx512_core_gemm_s8u8s32_kern *kernel_r;
+static jit_avx512_core_gemm_s8u8s32_kern *kernel_c;
+static jit_avx512_core_gemm_s8u8s32_kern *kernel_b0;
+static jit_avx512_core_gemm_s8u8s32_kern *kernel_b0_b;
+static jit_avx512_core_gemm_s8u8s32_kern *kernel_b0_r;
+static jit_avx512_core_gemm_s8u8s32_kern *kernel_b0_c;
+static jit_avx512_core_gemv_s8u8s32_kern *gemv_s8u8s32_kernel;
+static jit_avx512_core_gemv_s8u8s32_kern *gemv_u8s8s32_kernel;
+
+static void jit_init(blas_t *arg)
+{
+    static int (*copyAn)(const dim_t *m, const dim_t *n, const int8_t *a,
+            const dim_t *lda, const int8_t *alpha, int8_t *b,
+            const dim_t *dummy1, const dim_t *dummy2, int32_t *row_col_sum);
+
+    static int (*copyAt)(const dim_t *m, const dim_t *n, const int8_t  *a,
+            const dim_t *lda, const int8_t  *alpha, int8_t  *b,
+            const dim_t *dummy1, const dim_t *dummy2, int32_t *row_col_sum);
+
+    static int (*copyBn)(const dim_t *m, const dim_t *n, const uint8_t *a,
+            const dim_t *lda, const uint8_t *alpha, uint8_t *b,
+            const dim_t *dummy1, const dim_t *dummy2, int32_t *row_col_sum);
+
+    static int (*copyBt)(const dim_t *m, const dim_t *n, const uint8_t *a,
+            const dim_t *lda, const uint8_t *alpha, uint8_t *b,
+            const dim_t *dummy1, const dim_t *dummy2, int32_t *row_col_sum);
+
+    static int (*copySumAn)(const dim_t *m, const dim_t *n, const int8_t  *a,
+            const dim_t *lda, const int8_t  *alpha, int8_t  *b,
+            const dim_t *dummy1, const dim_t *dummy2, int32_t *row_col_sum);
+
+    static int (*copySumAt)(const dim_t *m, const dim_t *n, const int8_t  *a,
+            const dim_t *lda, const int8_t  *alpha, int8_t  *b,
+            const dim_t *dummy1, const dim_t *dummy2, int32_t *row_col_sum);
+
+    static int (*copySumBn)(const dim_t *m, const dim_t *n, const uint8_t *a,
+            const dim_t *lda, const uint8_t *alpha, uint8_t *b,
+            const dim_t *dummy1, const dim_t *dummy2, int32_t *row_col_sum);
+
+    static int (*copySumBt)(const dim_t *m, const dim_t *n, const uint8_t *a,
+            const dim_t *lda, const uint8_t *alpha, uint8_t *b,
+            const dim_t *dummy1, const dim_t *dummy2, int32_t *row_col_sum);
+
+    static int (*kern)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    static int (*kern_b)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    static int (*kern_r)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    static int (*kern_c)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    static int (*kern_b0)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    static int (*kern_b0_b)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    static int (*kern_b0_r)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    static int (*kern_b0_c)(const dim_t *m, const dim_t *n, const dim_t *k,
+            const float *alpha, const int8_t *a, const uint8_t *b, int32_t *c,
+            const dim_t ldc, const int32_t *col_offset,
+            const int32_t *row_offset);
+
+    static void (*gemv_s8u8s32_kern)(const dim_t, const dim_t, const float,
+                                     const int8_t*, const dim_t, const uint8_t*,
+                                     const float, int32_t*);
+
+    static void (*gemv_u8s8s32_kern)(const dim_t, const dim_t, const float,
+                                     const uint8_t*, const dim_t, const int8_t*,
+                                     const float, int32_t*);
+
+    if (mayiuse(avx512_core_vnni)) {
+            arg->um = AVX512_UNROLL_M;
+            arg->un = AVX512_UNROLL_N;
+            arg->uk = AVX512_UNROLL_K;
+            arg->bm = AVX512_BM;
+            arg->bn = AVX512_BN;
+            arg->bk = AVX512_BK_VNNI;
+
+            arg->bk_traditional   = AVX512_BK_TRADITIONAL;
+            arg->bn_small_k       = AVX512_BN_SMALL_K;
+            arg->blocking_small_k = AVX512_BLOCKING_SMALL_K;
+    } else {
+            arg->um = AVX512_UNROLL_M;
+            arg->un = AVX512_UNROLL_N;
+            arg->uk = AVX512_UNROLL_K;
+            arg->bm = AVX512_BM;
+            arg->bn = AVX512_BN;
+            arg->bk = AVX512_BK;
+
+            arg->bk_traditional   = AVX512_BK_TRADITIONAL;
+            arg->bn_small_k       = AVX512_BN_SMALL_K;
+            arg->blocking_small_k = AVX512_BLOCKING_SMALL_K;
+    }
+
+    static std::once_flag initialized;
+    std::call_once(initialized, []{
+
+        copy_an = new jit_avx512_core_u8_copy_an_kern();
+        copy_at = new jit_avx512_core_u8_copy_at_kern();
+        copy_bn = new jit_avx512_core_u8_copy_bn_kern();
+        copy_bt = new jit_avx512_core_u8_copy_bt_kern();
+
+        copy_sum_an = new jit_avx512_core_u8_copy_sum_an_kern();
+        copy_sum_at = new jit_avx512_core_u8_copy_sum_at_kern();
+        copy_sum_bn = new jit_avx512_core_u8_copy_sum_bn_kern();
+        copy_sum_bt = new jit_avx512_core_u8_copy_sum_bt_kern();
+
+        kernel      = new jit_avx512_core_gemm_s8u8s32_kern(false, false, false);
+        kernel_b    = new jit_avx512_core_gemm_s8u8s32_kern(false, true,  true);
+        kernel_r    = new jit_avx512_core_gemm_s8u8s32_kern(false, false, true);
+        kernel_c    = new jit_avx512_core_gemm_s8u8s32_kern(false, true,  false);
+        kernel_b0   = new jit_avx512_core_gemm_s8u8s32_kern(true,  false, false);
+        kernel_b0_b = new jit_avx512_core_gemm_s8u8s32_kern(true,  true,  true);
+        kernel_b0_r = new jit_avx512_core_gemm_s8u8s32_kern(true,  false, true);
+        kernel_b0_c = new jit_avx512_core_gemm_s8u8s32_kern(true,  true,  false);
+
+        gemv_s8u8s32_kernel = new jit_avx512_core_gemv_s8u8s32_kern();
+        gemv_u8s8s32_kernel = new jit_avx512_core_gemv_s8u8s32_kern();
+
+
+        copyAn = copy_an->getCode<int (*)(const dim_t *, const dim_t *,
+                const int8_t *, const dim_t *, const int8_t *, int8_t *,
+                const dim_t *, const dim_t *, int32_t *)>();
+
+        copyAt = copy_at->getCode<int (*)(const dim_t *, const dim_t *,
+                const int8_t *, const dim_t *, const int8_t *, int8_t *,
+                const dim_t *, const dim_t *, int32_t *)>();
+
+        copyBn = copy_bn->getCode<int (*)(const dim_t *, const dim_t *,
+                const uint8_t *, const dim_t *, const uint8_t *, uint8_t *,
+                const dim_t *, const dim_t *, int32_t *)>();
+
+        copyBt = copy_bt->getCode<int (*)(const dim_t *, const dim_t *,
+                const uint8_t *, const dim_t *, const uint8_t *, uint8_t *,
+                const dim_t *, const dim_t *, int32_t *)>();
+
+        copySumAn = copy_sum_an->getCode<int (*)(const dim_t *, const dim_t *,
+                const int8_t *, const dim_t *, const int8_t *, int8_t *,
+                const dim_t *, const dim_t *, int32_t *)>();
+
+        copySumAt = copy_sum_at->getCode<int (*)(const dim_t *, const dim_t *,
+                const int8_t *, const dim_t *, const int8_t *, int8_t *,
+                const dim_t *, const dim_t *, int32_t *)>();
+
+        copySumBn = copy_sum_bn->getCode<int (*)(const dim_t *, const dim_t *,
+                const uint8_t *, const dim_t *, const uint8_t *, uint8_t *,
+                const dim_t *, const dim_t *, int32_t *)>();
+
+        copySumBt = copy_sum_bt->getCode<int (*)(const dim_t *, const dim_t *,
+                const uint8_t *, const dim_t *, const uint8_t *, uint8_t *,
+                const dim_t *, const dim_t *, int32_t *)>();
+
+        kern = kernel->getCode<int (*)(const dim_t *, const dim_t *,
+                const dim_t *, const float *, const int8_t *, const uint8_t *,
+                int32_t *, const dim_t, const int32_t *, const int32_t *)>();
+
+        kern_b = kernel_b->getCode<int (*)(const dim_t *, const dim_t *,
+                const dim_t *, const float *, const int8_t *, const uint8_t *,
+                int32_t *, const dim_t, const int32_t *, const int32_t *)>();
+
+        kern_r = kernel_r->getCode<int (*)(const dim_t *, const dim_t *,
+                const dim_t *, const float *, const int8_t *, const uint8_t *,
+                int32_t *, const dim_t, const int32_t *, const int32_t *)>();
+
+        kern_c = kernel_c->getCode<int (*)(const dim_t *, const dim_t *,
+                const dim_t *, const float *, const int8_t *, const uint8_t *,
+                int32_t *, const dim_t, const int32_t *, const int32_t *)>();
+
+        kern_b0 = kernel_b0->getCode<int (*)(const dim_t *, const dim_t *,
+                const dim_t *, const float *, const int8_t *, const uint8_t *,
+                int32_t *, const dim_t, const int32_t *, const int32_t *)>();
+
+        kern_b0_b = kernel_b0_b->getCode<int (*)(const dim_t *, const dim_t *,
+                const dim_t *, const float *, const int8_t *, const uint8_t *,
+                int32_t *, const dim_t, const int32_t *, const int32_t *)>();
+
+        kern_b0_r = kernel_b0_r->getCode<int (*)(const dim_t *, const dim_t *,
+                const dim_t *, const float *, const int8_t *, const uint8_t *,
+                int32_t *, const dim_t, const int32_t *, const int32_t *)>();
+
+        kern_b0_c = kernel_b0_c->getCode<int (*)(const dim_t *, const dim_t *,
+                const dim_t *, const float *, const int8_t *, const uint8_t *,
+                int32_t *, const dim_t, const int32_t *, const int32_t *)>();
+
+        gemv_s8u8s32_kern =
+            gemv_s8u8s32_kernel -> generate<jit_avx512_core_gemv_s8u8s32_kern::gemv_s8u8s32_kernel_t>
+            (mayiuse(avx512_core_vnni));
+        gemv_u8s8s32_kern =
+            gemv_u8s8s32_kernel -> generate<jit_avx512_core_gemv_s8u8s32_kern::gemv_u8s8s32_kernel_t>
+            (mayiuse(avx512_core_vnni));
+    });
+
+    if (arg->bo == 0) { // No need to compute A row sum if bo is zero
+        if (arg->transa == 0) {
+            arg->copyA = copyAn;
+        } else {
+            arg->copyA = copyAt;
+        }
+    } else {
+        if (arg->transa == 0) {
+            arg->copyA = copySumAn;
+        } else {
+            arg->copyA = copySumAt;
+        }
+    }
+
+    if (arg->ao == 0) { // No need to compute B column sum if ao is zero
+        if (arg->transb == 0) {
+            arg->copyB = copyBn;
+        } else {
+            arg->copyB = copyBt;
+        }
+    } else {
+        if (arg->transb == 0) {
+            arg->copyB = copySumBn;
+        } else {
+            arg->copyB = copySumBt;
+        }
+    }
+
+    arg->kernel      = kern;
+    arg->kernel_b    = kern_b;
+    arg->kernel_r    = kern_r;
+    arg->kernel_c    = kern_c;
+    arg->kernel_b0   = kern_b0;
+    arg->kernel_b0_b = kern_b0_b;
+    arg->kernel_b0_r = kern_b0_r;
+    arg->kernel_b0_c = kern_b0_c;
+    arg -> gemv_s8u8s32_kernel = gemv_s8u8s32_kern;
+    arg -> gemv_u8s8s32_kernel = gemv_u8s8s32_kern;
+}
+
+mkldnn_status_t jit_avx512_core_gemm_s8u8s32(
+        const char *transA, const char *transB, const char *offsetC,
+        const int *m, const int *n, const int *k,
+        const float *alpha, const int8_t *a, const int *lda, const int8_t *oa,
+        const uint8_t *b, const int *ldb, const int8_t *ob,
+        const float *beta, int32_t *c, const int *ldc, const int32_t *oc)
+{
+    char transa  = *transA;
+    char transb  = *transB;
+    char offsetc = *offsetC;
+
+    blas_t args;
+
+    // Initialize blas structure
+    args.m         = *m;
+    args.n         = *n;
+    args.k         = *k;
+    args.alpha     = alpha;
+    args.a         = a;
+    args.lda       = *lda;
+    args.b         = b;
+    args.ldb       = *ldb;
+    args.beta      = beta;
+    args.c         = c;
+    args.ldc       = *ldc;
+    args.transa    = (transa == 'N' || transa == 'n') ? 0 : 1;
+    args.transb    = (transb == 'N' || transb == 'n') ? 0 : 1;
+    args.um        = 0;
+    args.un        = 0;
+    args.bm        = 0;
+    args.bn        = 0;
+    args.bk        = 0;
+    args.copyA     = NULL;
+    args.copyB     = NULL;
+    args.kernel    = NULL;
+    args.kernel_b0 = NULL;
+    args.ao        = *oa;
+    args.bo        = *ob;
+    args.co        = oc;
+
+    if (offsetc == 'F' || offsetc == 'f') {
+        args.offsetc = FIX_OFFSET;
+    } else if (offsetc == 'R' || offsetc == 'r') {
+        args.offsetc = ROW_OFFSET;
+    } else { // offsetc == 'C' || offsetc == 'c'
+        args.offsetc = COL_OFFSET;
+    }
+
+    jit_init(&args);
+    int result = gemm_threading_driver(&args);
+
+    return (result < 0) ? mkldnn_out_of_memory : mkldnn_success;
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32.hpp
new file mode 100644
index 000000000..b2e2902a1
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32.hpp
@@ -0,0 +1,38 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef JIT_AVX512_CORE_GEMM_S8U8S32_HPP
+#define JIT_AVX512_CORE_GEMM_S8U8S32_HPP
+
+#include <cstdint>
+#include "mkldnn_types.h"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+mkldnn_status_t jit_avx512_core_gemm_s8u8s32(
+        const char *transA, const char *transB, const char *offsetC,
+        const int *m, const int *n, const int *k,
+        const float *alpha, const int8_t *a, const int *lda, const int8_t *oa,
+        const uint8_t *b, const int *ldb, const int8_t *ob,
+        const float *beta, int32_t *c, const int *ldc, const int32_t *oc);
+
+}
+}
+}
+
+#endif // JIT_AVX512_CORE_GEMM_S8U8S32_HPP
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32_kern.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32_kern.cpp
new file mode 100644
index 000000000..57554a185
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32_kern.cpp
@@ -0,0 +1,539 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "jit_avx512_core_gemm_s8u8s32_kern.hpp"
+
+
+#ifdef _WIN32
+static const bool is_windows = 1;
+#else
+static const bool is_windows = 0;
+#endif
+
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace Xbyak;
+
+
+
+
+// Convert between vector register lengths.
+static inline Xmm make_xmm(const Xmm &v) { return Xmm(v.getIdx()); }
+static inline Ymm make_ymm(const Xmm &v) { return Ymm(v.getIdx()); }
+
+// Load from or store to C.
+void jit_avx512_core_gemm_s8u8s32_kern::c_load(const Xbyak::Xmm &dst,
+    const Xbyak::Address &src, int nelems)
+{
+    switch (nelems) {
+    default: vmovups(dst, src); break;
+    case 8:  vmovups(make_ymm(dst), src); break;
+    case 4:  vmovups(make_xmm(dst), src); break;
+    case 2:  vmovlps(make_xmm(dst), src); break;
+    case 1:  vmovss(make_xmm(dst), src); break;
+    }
+}
+void jit_avx512_core_gemm_s8u8s32_kern::c_store(const Xbyak::Address &dst,
+    const Xbyak::Xmm &src, int nelems)
+{
+    switch (nelems) {
+    default: vmovups(dst, src); break;
+    case 8:  vmovups(dst, make_ymm(src)); break;
+    case 4:  vmovups(dst, make_xmm(src)); break;
+    case 2:  vmovsd(dst, make_xmm(src)); break;
+    case 1:  vmovss(dst, make_xmm(src)); break;
+    }
+}
+
+// Perform length-4 dot product accumulations of unsigned and signed bytes
+//  in parallel.
+// Use vpdpbusd if VNNI available, otherwise emulate.
+void jit_avx512_core_gemm_s8u8s32_kern::dot_product(const Xmm &dst,
+    const Xmm &src1, const Xmm &src2)
+{
+    if (vnni)
+        vpdpbusd(dst, src1, src2);
+    else {
+        vpmaddubsw(dp_scratch, src1, src2);
+        vpmaddwd(dp_scratch, ones, dp_scratch);
+        vpaddd(dst, dst, dp_scratch);
+    }
+}
+
+// Inner kernel.
+void jit_avx512_core_gemm_s8u8s32_kern::kernel_loop(int unroll_m, int unroll_n,
+        bool cfetch)
+{
+    int um_vecs = (unroll_m + 15) >> 4;
+    Label label_kernel_loop;
+
+    L_aligned(label_kernel_loop); {
+        for (int h = 0; h < 4; h++) {
+            for (int j = 0; j < unroll_n; j++) {
+                const Zmm b = b_regs[j & 1];
+
+                vpbroadcastd(b, ptr[BO + isize *
+                    (2 * j + 2 * h * unroll_n - offset_b)]);
+                dot_product(c_regs[0][j], b, a_regs[0]);
+
+                if (j == 1 && !(h & 1))
+                    prefetch_b(ptr[BO + isize * (prefetch_size_b
+                        + 2 * h * unroll_n - offset_b)]);
+                else if (j % 3 == 0)
+                    prefetch_a(ptr[AO + isize * (prefetch_size_a
+                        + 32 * (j / 3) + 2 * h * unroll_m - offset_a)]);
+
+                for (int i = 1; i < um_vecs; i++)
+                    dot_product(c_regs[i][j], b, a_regs[i]);
+
+                if (cfetch && (j == std::min(1, unroll_n - 1))) {
+                    if (h == 3)
+                        lea(CO2, ptr[CO2 + LDC]);
+                    else if (h < um_vecs)
+                        prefetch_c(ptr[CO2 + (16 * h * size)]);
+                }
+
+                if (h == 3 && j == std::min(3, unroll_n - 1))
+                    lea(AA, ptr[AA + (32 * isize)]);
+            }
+
+            for (int i = 0; i < um_vecs; i++)
+                vmovups(a_regs[i], ptr[AO + isize *
+                (32 * i + 2 * (h + 1) * unroll_m - offset_a)]);
+
+            if (h == 2)
+                prefetch_x(ptr[AA - (offset_a * isize)]);
+        }
+
+        add(AO, 8 * isize * unroll_m);
+        add(BO, 8 * isize * unroll_n);
+        sub(LoopCount, 1);
+        jg(label_kernel_loop, T_NEAR);
+    }
+}
+
+// k remainder loop for kernel.
+void jit_avx512_core_gemm_s8u8s32_kern::remainder_kernel(int unroll_m,
+        int unroll_n, int unroll_k, int bwidth)
+{
+    if ((unroll_m > IGEMM_UNROLL_M) || (unroll_n > IGEMM_UNROLL_N)
+            || (unroll_m < 0)  || (unroll_n < 0))
+        return;
+
+    int um_vecs = (unroll_m + 15) >> 4;
+
+    for (int h = 0; h < unroll_k; h++) {
+        for (int j = 0; j < unroll_n; j++) {
+            Zmm b = b_regs[j & 1];
+            auto b_src = ptr[BO + (-isize * offset_b
+                + bwidth * (j + h * unroll_n))];
+
+            switch (bwidth) {
+            case 4:
+                vpbroadcastd(b, b_src);
+                break;
+            case 2:
+                vpbroadcastw(b, b_src);
+                break;
+            case 1:
+                vpbroadcastb(b, b_src);
+                break;
+            }
+            for (int i = 0; i < um_vecs; i++)
+                dot_product(c_regs[i][j], b, a_regs[i]);
+        }
+
+        if (unroll_k > 1) {
+            for (int i = 0; i < um_vecs; i++)
+                vmovups(a_regs[i], ptr[AO + isize * (32 * i
+                    + (h + 1) * 2 * unroll_m - offset_a)]);
+        }
+    }
+
+    add(AO, unroll_k * unroll_m * bwidth);
+    add(BO, unroll_k * unroll_n * bwidth);
+}
+
+// Inner loop.
+void jit_avx512_core_gemm_s8u8s32_kern::innerloop(int unroll_m, int unroll_n)
+{
+    if ((unroll_m > IGEMM_UNROLL_M) || (unroll_n > IGEMM_UNROLL_N)
+            || (unroll_m < 0)  || (unroll_n < 0))
+        return;
+
+    int um_vecs = (unroll_m + 15) >> 4;
+    int stage1 = unroll_n, stage2 = unroll_n;
+
+    Label label_kernel_loop_1, label_k_main_loop_2, label_kernel_loop_2;
+    Label label_k_main_loop_3, label_kernel_loop_3;
+    Label label_k_remainder_loop_begin, label_k_rem_4, label_k_rem_2;
+    Label label_k_rem_1, label_update_begin;
+
+    mov(AO, A);
+    for (int i = 0; i < um_vecs; i++)
+        vmovups(a_regs[i], ptr[AO + isize * (32 * i - offset_a)]);
+
+    mov(LoopCount, K);
+    sar(LoopCount, 4);
+    jle(label_k_remainder_loop_begin, T_NEAR);
+
+    // Main k loops, broken into three parts to time C prefetching.
+    sub(LoopCount, stage1 + stage2);
+    jle(label_k_main_loop_2, T_NEAR);
+
+    kernel_loop(unroll_m, unroll_n, false);
+
+    L_aligned(label_k_main_loop_2);
+    lea(CO2, ptr[CO1 + size * (std::min(unroll_m, 16) - 1)]);
+    add(LoopCount, stage1);
+    jle(label_k_main_loop_3, T_NEAR);
+
+    kernel_loop(unroll_m, unroll_n, true);
+
+    L_aligned(label_k_main_loop_3);
+    lea(CO2, ptr[CO1 + size * (std::min(unroll_m, 16) - 1)]);
+    add(LoopCount, stage2);
+    jle(label_k_remainder_loop_begin, T_NEAR);
+
+    kernel_loop(unroll_m, unroll_n, true);
+
+    // k remainder handling
+    L_aligned(label_k_remainder_loop_begin);
+    mov(LoopCount, K);
+    test(LoopCount, 8);
+    je(label_k_rem_4, T_NEAR);
+
+    remainder_kernel(unroll_m, unroll_n, 2, 4);
+
+    L_aligned(label_k_rem_4);
+    mov(LoopCount, K);
+    test(LoopCount, 4);
+    je(label_k_rem_2, T_NEAR);
+
+    remainder_kernel(unroll_m, unroll_n, 1, 4);
+
+    L_aligned(label_k_rem_2);
+    mov(LoopCount, K);
+    test(LoopCount, 2);
+    je(label_k_rem_1, T_NEAR);
+
+    Zmm zero = zmm6;
+    Zmm tmp = zmm5;
+
+    vpxorq(zero, zero, zero);
+    for (int i = 0; i < um_vecs; i++) {
+        Zmm a = a_regs[i];
+        vbroadcasti64x4(a, ptr[AO + isize * (16 * i - offset_a)]);
+        vpunpcklwd(tmp, a, zero);
+        vpunpckhwd(a, a, zero);
+        vshufi32x4(a, tmp, a, 0x44);
+        vshufi32x4(a, a, a, 0xD8);
+    }
+
+    remainder_kernel(unroll_m, unroll_n, 1, 2);
+
+    L_aligned(label_k_rem_1);
+    mov(LoopCount, K);
+    test(LoopCount, 1);
+    je(label_update_begin, T_NEAR);
+
+    vpxorq(zero, zero, zero);
+    for (int i = 0; i < um_vecs; i++) {
+        Zmm a = a_regs[i];
+        vbroadcasti32x4(a, ptr[AO + isize * (8 * i - offset_a)]);
+        vpunpcklbw(tmp, a, zero);
+        vpunpckhbw(a, a, zero);
+        vinsertf128(make_ymm(a), make_ymm(tmp), make_xmm(a), 1);
+        vpunpcklwd(tmp, a, zero);
+        vpunpckhwd(a, a, zero);
+        vshufi32x4(a, tmp, a, 0x44);
+        vshufi32x4(a, a, a, 0xD8);
+    }
+
+    remainder_kernel(unroll_m, unroll_n, 1, 1);
+
+    // Add offsets and update C.
+    L_aligned(label_update_begin);
+
+    if (enable_offset_r) {
+        // Add row offsets.
+        mov(rax, coffset_ry);
+        for (int j = 0; j < unroll_n; j++) {
+            Zmm row_offset = zmm0;
+
+            vbroadcastss(row_offset, ptr[rax + size * j]);
+
+            for (int i = 0; i < um_vecs; i++)
+                vpaddd(c_regs[i][j], c_regs[i][j], row_offset);
+        }
+        add(coffset_ry, size * unroll_n);
+    }
+
+    if (enable_offset_c) {
+        // Add column offsets.
+        mov(rax, coffset_cy);
+        for (int i = 0; i < um_vecs; i++) {
+            Zmm col_offset = zmm0;
+
+            c_load(col_offset, ptr[rax + size * 16 * i], unroll_m);
+
+            for (int j = 0; j < unroll_n; j++)
+                vpaddd(c_regs[i][j], c_regs[i][j], col_offset);
+        }
+    }
+
+    Reg64 LDC3 = rax;
+    lea(LDC3, ptr[LDC + LDC * 2]);
+
+    // C updates.
+    int c_off_j = 0;
+    for (int j = 0; j < unroll_n; j++) {
+        if (j > 0 && (j & 3) == 0) {
+            lea(CO1, ptr[CO1 + LDC * 4]);
+            c_off_j += 4;
+        }
+
+        int jj = j - c_off_j;
+
+        for (int i = 0; i < um_vecs; i++) {
+            Zmm c = c_regs[i][j];
+            Zmm c_old = zmm0;
+            decltype(LDC * jj) ldc_mult = (jj == 3) ? LDC3 : LDC * jj;
+
+            auto c_mem = ptr[CO1 + ldc_mult + size * 16 * i];
+
+            if (beta_zero)
+                c_store(c_mem, c, unroll_m);
+            else {
+                c_load(c_old, c_mem, unroll_m);
+                vpaddd(c_old, c, c_old);
+                c_store(c_mem, c_old, unroll_m);
+            }
+
+            vpxorq(c, c, c);
+        }
+    }
+
+    lea(CO1, ptr[CO1 + LDC * (unroll_n - c_off_j)]);
+}
+
+// Outer loop.
+void jit_avx512_core_gemm_s8u8s32_kern::outerloop(int unroll_x, int unroll_y,
+    Label *&cur_outerloop_label)
+{
+    Label label_m_loop, label_n_loop, label_n_remainder_loops[6];
+
+    L(*cur_outerloop_label);
+    cur_outerloop_label++;
+    if (unroll_x >= IGEMM_UNROLL_M) {
+        mov(J, M);
+        cmp(J, unroll_x);
+        jl(*cur_outerloop_label, T_NEAR);    // Jump to next outerloop label.
+    } else {
+        test(J, unroll_x);
+        jle(*cur_outerloop_label, T_NEAR);
+    }
+
+    L_aligned(label_m_loop); {
+        mov(CO1, C);
+        add(C, unroll_x * size);
+
+        mov(BO, B);
+
+        mov(AA, K);
+        imul(AA, AA, unroll_x * isize);
+        lea(AA, ptr[A + AA + isize * prefetch_size_a]);
+
+        if (enable_offset_c) {
+            mov(rax, coffset_cx);
+            mov(coffset_cy, rax);
+            add(rax, unroll_x * size);
+            mov(coffset_cx, rax);
+        }
+
+        if (enable_offset_r) {
+            mov(rax, coffset_rx);
+            mov(coffset_ry, rax);
+        }
+
+        mov(I, N);
+        cmp(I, unroll_y);
+        jl(label_n_remainder_loops[0], T_NEAR);
+
+        L_aligned(label_n_loop); {
+            innerloop(unroll_x, unroll_y);
+            sub(I, unroll_y);
+            cmp(I, unroll_y);
+            jge(label_n_loop, T_NEAR);
+        }
+
+        align(16);
+
+        int label_idx = 0;
+        for (int uy = 16; uy > 0; uy >>= 1) {
+            L(label_n_remainder_loops[label_idx++]);
+            if (unroll_y > uy) {
+                test(I, uy);
+                jle(label_n_remainder_loops[label_idx], T_NEAR);
+
+                innerloop(unroll_x, uy);
+                align(16);
+            }
+        }
+        L(label_n_remainder_loops[label_idx]);
+
+        mov(A, AO);
+        if (unroll_x >= IGEMM_UNROLL_M) {
+            sub(J, unroll_x);
+            cmp(J, unroll_x);
+            jge(label_m_loop);
+        }
+    }
+
+    align(16);
+}
+
+void jit_avx512_core_gemm_s8u8s32_kern::generate()
+{
+    // Prologue
+    preamble();
+    sub(rsp, stack_alloc_size);
+
+    if (is_windows) {
+        mov(A, arg_a);
+        mov(B, arg_b);
+    }
+
+    mov(C, arg_c);
+    mov(LDC, arg_ldc);
+
+    sub(A, -offset_a * isize);
+    sub(B, -offset_b * isize);
+
+    mov(M, qword[M]);
+    mov(N, qword[N]);
+    mov(K, qword[K]);
+
+    lea(LDC, ptr[LDC * size]);
+
+    if (enable_offset_c) {
+        mov(rax, arg_coffset_c);
+        mov(coffset_cx, rax);
+    }
+    if (enable_offset_r) {
+        mov(rax, arg_coffset_r);
+        mov(coffset_rx, rax);
+    }
+
+    for (int i = 0; i < (max_unroll_m >> 4); i++) {
+        for (int j = 0; j < max_unroll_n; j++) {
+            auto &c = c_regs[i][j];
+            vpxorq(c, c, c);
+        }
+    }
+
+    if (!vnni) {
+        mov(rax, 1);
+        movq(make_xmm(ones), rax);
+        vpbroadcastw(ones, make_xmm(ones));
+    }
+
+    Label outerloop_labels[8];
+    Label *cur_outerloop_label = &outerloop_labels[0];
+
+    // Main m loop.
+    outerloop(IGEMM_UNROLL_M, IGEMM_UNROLL_N, cur_outerloop_label);
+
+    // m remainder loops.
+    for (int um = 32; um > 0; um >>= 1)
+        if (IGEMM_UNROLL_M > um)
+            outerloop(um, IGEMM_UNROLL_N, cur_outerloop_label);
+
+    L(*cur_outerloop_label);
+
+    // Epilogue.
+    add(rsp, stack_alloc_size);
+    postamble();
+}
+
+
+jit_avx512_core_gemm_s8u8s32_kern::jit_avx512_core_gemm_s8u8s32_kern(bool
+        beta_zero_, bool enable_offset_c_, bool enable_offset_r_) :
+    jit_generator(nullptr, 100000), arg_a(0), arg_b(0), arg_c(0), arg_ldc(0),
+    arg_coffset_c(0), arg_coffset_r(0), coffset_cx(0), coffset_cy(0),
+    coffset_rx(0), coffset_ry(0)
+{
+    beta_zero = beta_zero_;
+    enable_offset_c = enable_offset_c_;
+    enable_offset_r = enable_offset_r_;
+    vnni = mayiuse(avx512_core_vnni);
+
+    // Assign integer registers
+    M = is_windows ? rcx : rdi;
+    N = is_windows ? rdx : rsi;
+    K = is_windows ? r8 : rdx;
+    A = is_windows ? rsi : r8;
+    B = r9;
+    C = r10;
+    LDC = r11;
+    I = r12;
+    J = r13;
+    LoopCount = rax;
+    AO = r14;
+    BO = r15;
+    CO1 = rbx;
+    CO2 = rbp;
+    AA = is_windows ? rdi : rcx;
+
+    // Assign vector registers
+    dp_scratch = zmm6;
+    ones = zmm7;
+    for (int i = 0; i < (max_unroll_m >> 4); i++)
+        a_regs[i] = Zmm(i);
+    b_regs[0] = zmm4;
+    b_regs[1] = zmm5;
+
+    int rn = 0;
+    for (int i = 0; i < (max_unroll_m >> 4); i++)
+        for (int j = 0; j < max_unroll_n; j++)
+            c_regs[i][j] = Zmm(8 + rn++);
+
+    // Assign stack variables.
+    stack_alloc_size = 32;
+    auto args_offset = stack_alloc_size + get_size_of_abi_save_regs()
+        + 8 + (is_windows ? 48 : 0);
+
+    arg_a         = ptr[rsp + (args_offset - 16)];
+    arg_b         = ptr[rsp + (args_offset - 8)];
+    arg_c         = ptr[rsp + (args_offset + 0)];
+    arg_ldc       = ptr[rsp + (args_offset + 8)];
+    arg_coffset_c = ptr[rsp + (args_offset + 16)];
+    arg_coffset_r = ptr[rsp + (args_offset + 24)];
+
+    coffset_cx = qword[rsp + 0];
+    coffset_cy = qword[rsp + 8];
+    coffset_rx = qword[rsp + 16];
+    coffset_ry = qword[rsp + 24];
+
+    generate();
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32_kern.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32_kern.hpp
new file mode 100644
index 000000000..e8efcc1cc
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemm_s8u8s32_kern.hpp
@@ -0,0 +1,101 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef IGEMM_KERNEL_GENERATOR_HPP
+#define IGEMM_KERNEL_GENERATOR_HPP
+
+#include "jit_generator.hpp"
+
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+class jit_avx512_core_gemm_s8u8s32_kern : public jit_generator {
+public:
+    jit_avx512_core_gemm_s8u8s32_kern(bool beta_zero_, bool enable_offset_c_,
+        bool enable_offset_r_);
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_gemm_s8u8s32_kern);
+
+protected:
+    bool beta_zero;
+    bool enable_offset_c, enable_offset_r;
+    bool vnni;
+
+    void prefetch_a(const Xbyak::Address &src) {
+        prefetcht0(src);
+    }
+    void prefetch_b(const Xbyak::Address &src) {
+        prefetcht0(src);
+    }
+    void prefetch_c(const Xbyak::Address &src) {
+        prefetchw(src);
+    }
+    void prefetch_x(const Xbyak::Address &src) {
+        prefetcht0(src);
+    }
+
+    void c_load(const Xbyak::Xmm &dst, const Xbyak::Address &src, int nelems);
+    void c_store(const Xbyak::Address &dst, const Xbyak::Xmm &src, int nelems);
+
+    void dot_product(const Xbyak::Xmm &dst, const Xbyak::Xmm &src1,
+        const Xbyak::Xmm &src2);
+    void kernel_loop(int unroll_m, int unroll_n, bool cfetch);
+    void remainder_kernel(int unroll_m, int unroll_n, int unroll_k, int bwidth);
+    void innerloop(int unroll_m, int unroll_n);
+    void outerloop(int unroll_x, int unroll_y, Xbyak::Label *&outerloop_label);
+
+    void generate();
+
+
+private:
+    static const int IGEMM_UNROLL_M = 48;
+    static const int IGEMM_UNROLL_N = 8;
+
+    static const int isize = 2;
+    static const int size = 4;
+
+    // Prefetch configuration
+    static const int prefetch_size_a = 32 * 5;
+    static const int prefetch_size_b = 32 * 4;
+
+    static const int offset_a = 256, offset_b = 256;
+    static const int max_unroll_m = 48, max_unroll_n = 8;
+
+    // Integer register assignments
+    Xbyak::Reg64 M, N, K, A, B, C, LDC, I, J, LoopCount;
+    Xbyak::Reg64 AO, BO, CO1, CO2, AA;
+
+    // Vector register assignments
+    Xbyak::Zmm dp_scratch, ones, a_regs[max_unroll_m >> 4], b_regs[2];
+    Xbyak::Zmm c_regs[max_unroll_m >> 4][max_unroll_n];
+
+    // Stack variable assignments
+    int stack_alloc_size;
+    Xbyak::Address arg_a, arg_b, arg_c, arg_ldc, arg_coffset_c, arg_coffset_r;
+    Xbyak::Address coffset_cx, coffset_cy, coffset_rx, coffset_ry;
+
+    void L_aligned(Xbyak::Label &label, int alignment = 16) {
+        align(alignment);
+        L(label);
+    }
+};
+
+}
+}
+}
+
+#endif /* header guard */
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemv_s8u8s32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemv_s8u8s32.cpp
new file mode 100644
index 000000000..4f0b10dad
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_gemv_s8u8s32.cpp
@@ -0,0 +1,290 @@
+/*******************************************************************************
+ * Copyright 2019 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include "gemv.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+int gemm_s8u8s32_jump_to_gemv_s8u8s32(blas_t *arg) {
+
+    blas_t arg_gemv = *arg;
+
+    if ((arg -> offsetc == FIX_OFFSET) && // Fix offset
+        (arg -> ao == 0) &&
+        (arg -> bo == 0) &&
+        (arg -> co[0] == 0) &&
+        (*(arg -> alpha) == 1.0f) &&
+        ((*(arg -> beta) == 1.0f) || *(arg -> beta) == 0.0f)) {
+
+        if (arg -> n == 1) {
+
+            if (arg -> transa == 1) { // A transpose
+                arg_gemv.n = arg -> k;
+                arg_gemv.ldc = 1;
+                arg_gemv.swap = 0;
+                if (arg -> transb == 0) { // B non transpose
+                    arg_gemv.ldb = 1;
+                }
+                // B transpose arg_gemv.ldb = arg -> ldb
+                gemv_threading_driver(&arg_gemv);
+                return 1;
+            }
+        }
+
+        if (arg -> m == 1) {
+
+            if (arg -> transb == 0) { // B non transpose
+                arg_gemv.transa = 1;
+                arg_gemv.m = arg -> n;
+                arg_gemv.n = arg -> k;
+                arg_gemv.a = (int8_t *) arg -> b;
+                arg_gemv.lda = arg -> ldb;
+                arg_gemv.b = (uint8_t *) arg -> a;
+                arg_gemv.swap = 1;
+                if (arg -> transa == 0) { // A non transpose
+                    arg_gemv.ldb = arg -> lda;
+                }
+                else { // A transpose
+                    arg_gemv.ldb = 1;
+                }
+                gemv_threading_driver(&arg_gemv);
+                return 1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+int gemv_kernel_driver(blas_t *arg) {
+
+    dim_t m = arg -> m;
+    dim_t n = arg -> n;
+    uint8_t *a = (uint8_t *) arg -> a;
+    dim_t lda = arg -> lda;
+    int8_t *b = (int8_t *) arg -> b;
+    float beta = *(arg -> beta);
+
+    if (arg -> swap) {
+        arg -> gemv_u8s8s32_kernel(m, n, 1.0f, a, lda, b, beta, arg -> c);
+    }
+    else {
+        arg -> gemv_s8u8s32_kernel(arg -> m, arg -> n, 1.0f, arg -> a,
+                                   arg -> lda, arg -> b, *(arg -> beta), arg -> c);
+    }
+
+    return 0;
+}
+
+int gemv_threading_driver(blas_t *arg) {
+
+    dim_t nthr_m, nthr_n = 1;
+    dim_t MB, NB, UM = 16, UN = 64;
+    dim_t BLOCKM = 192, BLOCKN = 3072;
+    int status;
+    dim_t i;
+
+    dim_t nthr = (mkldnn_in_parallel()) ? 1 : mkldnn_get_max_threads();
+
+    uint8_t *new_x = NULL;
+    int32_t *tmp_y = NULL, *new_y = NULL;
+
+    dim_t m = arg -> m, n = arg -> n;
+
+    blas_t arg_seq = *arg;
+    float zero = 0.0f;
+
+    nthr_m = std::min(std::max(m / BLOCKM, (dim_t) 1), nthr);
+    MB = m / nthr_m;
+    MB = (((MB / UM) * UM) == MB) ? MB : (MB / UM) * UM + UM;
+    nthr_m = (((m / MB) * MB) == m) ? m / MB : m / MB + 1;
+    nthr_m = std::min(std::max(nthr_m, (dim_t) 1), nthr);
+
+    while ((nthr_m * (nthr_n + 1) <= nthr) && ((n / (nthr_n + 1)) >= BLOCKN)) {
+        nthr_n++;
+    }
+
+    NB = n / nthr_n;
+    NB = (((NB / UN) * UN) == NB) ? NB : (NB / UN) * UN + UN;
+    nthr_n = (((n / NB) * NB) == n) ? n / NB : n / NB + 1;
+    nthr_n = std::min(std::max(nthr_n, (dim_t) 1), nthr / nthr_m);
+
+    nthr = nthr_m * nthr_n;
+
+    if (arg -> ldb != 1) {
+        new_x = (uint8_t *)malloc(n, 64);
+        if (new_x == NULL)
+            return 1;
+        for (i = 0; i < n; i++) {
+            new_x[i] = (arg -> b)[i * arg -> ldb];
+        }
+        arg_seq.b = new_x;
+        arg_seq.ldb = 1;
+    }
+    else new_x = (uint8_t *) arg -> b;
+
+    if (arg -> ldc != 1) {
+        new_y = (int32_t *) malloc(nthr_m * PADD_BYTESIZE_ONPAGE(MB, sizeof(int32_t)), 64);
+        if (new_y == NULL) {
+            if (arg -> ldb != 1) {
+                free(new_x);
+            }
+            return 1;
+        }
+    }
+
+    // GEMV computation
+    if (nthr == 1) {
+
+        if (arg -> ldc != 1) {
+            if (*(arg -> beta) != 0.0f) {
+                for (i = 0; i < m; i++) {
+                    new_y[i] = arg -> c[i * arg -> ldc];
+                }
+            }
+        }
+
+        status = gemv_kernel_driver(&arg_seq);
+
+        if (arg -> ldc != 1) {
+            for (i = 0; i < m; i++) {
+                arg -> c[i * arg -> ldc] = new_y[i];
+            }
+        }
+
+        if (arg -> ldb != 1) {
+            free(new_x);
+        }
+        if (arg -> ldc != 1) {
+            free(new_y);
+        }
+        return status;
+    }
+
+    if (nthr_n > 1) {
+        tmp_y = (int32_t *) malloc((nthr_n - 1) * PADD_BYTESIZE_ONPAGE(m, sizeof(int32_t)), PAGESIZE);
+        if (tmp_y == NULL) {
+            if (arg -> ldb != 1) {
+                free(new_x);
+            }
+            return 1;
+        }
+    }
+
+    parallel_nd((int) nthr, [&](const dim_t ithr) {
+
+            dim_t m_from, m_to, myM;
+            dim_t n_from, n_to, myN;
+
+            dim_t n_id, m_id;
+            dim_t loc_incy = 1;
+            int32_t *loc_y;
+
+            blas_t arg_loc = arg_seq;
+            int j;
+
+            m_id = ithr / nthr_n;
+            n_id = ithr % nthr_n;
+
+            m_from = MB * m_id;
+            m_to = MB * (m_id + 1);
+            if ((m_to > m) || (m_id == nthr_m - 1))
+                m_to = m;
+
+            myM = m_to - m_from;
+
+            n_from = NB * n_id;
+            n_to = NB * (n_id + 1);
+            if ((n_to > n) || (n_id == nthr_n - 1))
+                n_to = n;
+
+            myN = n_to - n_from;
+
+            if (n_id != 0) {
+                arg_loc.beta = &zero;
+                loc_y = tmp_y + (NEXT_THR_STRIDE(m, sizeof(int32_t))) * (n_id - 1) + m_from;
+            }
+            else {
+                if (arg -> ldc == 1) {
+                    loc_y = arg_seq.c + m_from;
+                }
+                else {
+                    // need to copy the block of c in new_y
+                    loc_y = new_y + m_id * NEXT_THR_STRIDE(MB, sizeof(int32_t));
+                    if (*(arg -> beta) != 0.0f) {
+                        for (j = 0; j < myM; j++) {
+                            loc_y[j] = arg -> c[(m_from + j) * arg -> ldc];
+                        }
+                    }
+                }
+            }
+
+            arg_loc.m = myM;
+            arg_loc.n = myN;
+            arg_loc.a = arg_seq.a + m_from * arg_seq.lda + n_from;
+            arg_loc.b = arg_seq.b + n_from;
+            arg_loc.c = loc_y;
+            arg_loc.ldc = loc_incy;
+
+            gemv_kernel_driver(&arg_loc);
+
+            if ((n_id == 0) && (arg -> ldc != 1)) {
+                for (j = 0; j < myM; j++) {
+                    arg -> c[(m_from + j) * arg -> ldc] = loc_y[j];
+                }
+            }
+
+        });
+
+    if (nthr_n > 1) {
+        parallel_nd((int) nthr_m, [&](const dim_t ithr) {
+
+                dim_t j, j_from, j_to, ii;
+                int32_t acc;
+
+                j_from = MB * ithr;
+                j_to = MB * (ithr + 1);
+                if ((j_to > m) || (ithr == nthr - 1))
+                    j_to = m;
+
+                for (j = j_from; j < j_to; j++) {
+                    acc = 0;
+                    for (ii = 0; ii < nthr_n - 1; ii++) {
+                        acc += tmp_y[ii * NEXT_THR_STRIDE(m, sizeof(int32_t)) + j];
+                    }
+                    (arg -> c)[j * arg -> ldc] += acc;
+                }
+            });
+        free(tmp_y);
+    }
+
+    if (arg -> ldb != 1) {
+        free(new_x);
+    }
+
+    if (arg -> ldc != 1) {
+        free(new_y);
+    }
+
+    return 0;
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_kernel_gemv_s8u8s32_kern.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_kernel_gemv_s8u8s32_kern.cpp
new file mode 100644
index 000000000..c57a8c1d1
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_kernel_gemv_s8u8s32_kern.cpp
@@ -0,0 +1,411 @@
+/*******************************************************************************
+ * Copyright 2019 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include "jit_avx512_core_kernel_gemv_s8u8s32_kern.hpp"
+
+#ifdef _WIN32
+#define is_windows 1
+#else
+#define is_windows 0
+#endif
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+void jit_avx512_core_gemv_s8u8s32_kern::vnni(Xbyak::Zmm acc, Xbyak::Zmm b,
+                                             Xbyak::Zmm a, Xbyak::Zmm tmp,
+                                             Xbyak::Zmm one, bool swap,
+                                             int use_vnni) {
+
+    if (use_vnni) {
+        if (swap)
+            vpdpbusd(acc, a, b);
+        else
+            vpdpbusd(acc, b, a);
+    }
+
+    else {
+        if (swap)
+            vpmaddubsw(tmp, a, b);
+        else
+            vpmaddubsw(tmp, b, a);
+        vpmaddwd(tmp, tmp, one);
+        vpaddd(acc, tmp, acc);
+    }
+
+}
+
+void jit_avx512_core_gemv_s8u8s32_kern::n_loop_body(int start_a_idx, int start_acc_idx,
+                                                    int b_idx, int nreg_acc,
+                                                    Xbyak::Reg64 A, Xbyak::Reg64 lda,
+                                                    Xbyak::Reg64 X, Xbyak::Zmm tmp,
+                                                    Xbyak::Zmm one, bool swap, int use_vnni,
+                                                    int use_mask, Xbyak::Opmask mask_n) {
+
+    int i;
+    int nreg_A = nreg_acc / 2 + (nreg_acc % 2);
+
+    // load X + j
+    if (use_mask)
+        vmovdqu8(Xbyak::Zmm(b_idx) | mask_n | T_z, ptr[X]);
+    else
+        vmovdqu8(Xbyak::Zmm(b_idx), ptr[X]);
+
+    xor_(r14, r14);
+    // load values of A
+    for (i = 0; i < nreg_A; i++) {
+        if (use_mask)
+            vmovdqu8(Xbyak::Zmm(start_a_idx + i) | mask_n | T_z, ptr[A + r14]);
+        else
+            vmovdqu8(Xbyak::Zmm(start_a_idx + i), ptr[A + r14]);
+        add(r14, lda);
+    }
+
+    for (i = 0; i < nreg_A; i++) {
+        // vnni (acc, b, a, tmp, one, swap, use_vnni)
+        vnni(Xbyak::Zmm(start_acc_idx + i), Xbyak::Zmm(b_idx),
+             Xbyak::Zmm(start_a_idx + i), tmp, one, swap, use_vnni);
+    }
+
+    for (i = 0; i < nreg_A - (nreg_acc % 2); i++) {
+        if (use_mask)
+            vmovdqu8(Xbyak::Zmm(start_a_idx + i) | mask_n | T_z, ptr[A + r14]);
+        else
+            vmovdqu8(Xbyak::Zmm(start_a_idx + i), ptr[A + r14]);
+        add(r14, lda);
+    }
+
+    for (i = 0; i < nreg_A - (nreg_acc % 2); i++) {
+        vnni(Xbyak::Zmm(start_acc_idx + i + nreg_A), Xbyak::Zmm(b_idx),
+             Xbyak::Zmm(start_a_idx + i), tmp, one, swap, use_vnni);
+    }
+
+}
+
+void jit_avx512_core_gemv_s8u8s32_kern::shuffle_and_add(Xbyak::Zmm dest, Xbyak::Zmm A,
+                                                        Xbyak::Zmm B, Xbyak::Zmm C,
+                                                        Xbyak::Zmm D) {
+
+    vshufi32x4(dest, A, C, 0x44);
+    vshufi32x4(A, A, C, 0xEE);
+    vpaddd(C, dest, A); // C = A0 + A2|A1 + A3|C0 + C2|C1 + C3
+
+    vshufi32x4(dest, B, D, 0x44);
+    vshufi32x4(B, B, D, 0xEE);
+    vpaddd(D, dest, B); // D = B0 + B2|B1 + B3|D0 + D2|D1 + D3
+
+    vshufi32x4(A, C, D, 0x88);
+    vshufi32x4(B, C, D, 0xDD);
+    vpaddd(dest, A, B); // dest = SAi|SBi|SCi|SDi
+
+}
+
+void jit_avx512_core_gemv_s8u8s32_kern::update_c(int nreg_acc, Xbyak::Reg64 Y,
+                                                 int start_a_idx, int start_acc_idx,
+                                                 Xbyak::Xmm beta, int use_mask,
+                                                 Xbyak::Opmask mask_m) {
+
+    int l, i, k, j, last_it;
+    Xbyak::Label store_label;
+
+    l = 0;
+    for (k = 0; k < nreg_acc; k += 8) {
+        for (i = 0, j = k; i < 8; i += 4, j += 2) {
+            if (j < nreg_acc) {
+                // shuffle per block of 4 registers
+                shuffle_and_add(Xbyak::Zmm(start_a_idx + l), // dest
+                                Xbyak::Zmm(start_acc_idx + j), // A = acc0
+                                Xbyak::Zmm(start_acc_idx + 1 + j), // B = acc1
+                                Xbyak::Zmm(start_acc_idx + 4 + j), // C = acc4
+                                Xbyak::Zmm(start_acc_idx + 5 + j)); // D = acc5
+
+                // extract low and high from dest and hadd
+                vextracti32x8(Xbyak::Ymm(start_a_idx + l + 1), Xbyak::Zmm(start_a_idx + l), 0);
+                vextracti32x8(Xbyak::Ymm(start_a_idx + l + 2), Xbyak::Zmm(start_a_idx + l), 1);
+                vphaddd(Xbyak::Ymm(start_a_idx + l),
+                        Xbyak::Ymm(start_a_idx + l + 1),
+                        Xbyak::Ymm(start_a_idx + l + 2));
+            }
+            l++;
+        }
+
+        vphaddd(Xbyak::Ymm(start_a_idx + l),
+                Xbyak::Ymm(start_a_idx + l - 2),
+                Xbyak::Ymm(start_a_idx + l - 1));
+
+        l++;
+    }
+
+    // eventually add with C and store new value
+    vxorps(Xbyak::Ymm(start_a_idx),
+           Xbyak::Ymm(start_a_idx),
+           Xbyak::Ymm(start_a_idx));
+    vucomiss(beta, Xbyak::Ymm(start_a_idx));
+    je(store_label, T_NEAR);
+
+    // beta = 1
+    for (k = 0, l = 2; k < nreg_acc; k += 8, l += 3) {
+        // load Y and add
+        last_it = (k + 8) > nreg_acc;
+        if (use_mask && last_it)
+            vmovdqu32(Xbyak::Ymm(start_a_idx + k / 8) | mask_m | T_z, ptr[Y + (k / 8) * 32]);
+        else
+            vmovdqu32(Xbyak::Ymm(start_a_idx + k / 8), ptr[Y + (k / 8) * 32]);
+
+        vpaddd(Xbyak::Ymm(start_a_idx + l),
+               Xbyak::Ymm(start_a_idx + l),
+               Xbyak::Ymm(start_a_idx + k / 8));
+    }
+
+    // store
+    aligned_label(store_label);
+    for (k = 0, l = 2; k < nreg_acc; k += 8, l += 3) {
+        last_it = (k + 8) > nreg_acc;
+        if (use_mask && last_it)
+            vmovdqu32(ptr[Y + (k / 8) * 32], Xbyak::Ymm(start_a_idx + l) | mask_m);
+        else
+            vmovdqu32(ptr[Y + (k / 8) * 32], Xbyak::Ymm(start_a_idx + l));
+    }
+
+}
+
+template <typename T>
+T jit_avx512_core_gemv_s8u8s32_kern::generate(int use_vnni) {
+
+    Xbyak::Opmask mask_n = k1, mask_m = k2;
+    Xbyak::Label one_label, m_tail_label, m_loop_label, n_loop_label;
+    Xbyak::Label n_tail_label, update_c_label, end_label;
+    constexpr unsigned int n_labels = (1 << unroll_m) - 1;
+    Xbyak::Label m_tail_label_case[n_labels];
+    Xbyak::Label n_loop_label_case[n_labels];
+    Xbyak::Label n_tail_label_case[n_labels];
+    Xbyak::Label update_c_label_case[n_labels];
+
+    int i, ii;
+
+    Xbyak::Zmm one, tmp;
+    Xbyak::Reg64 n = abi_param2, m = abi_param1;
+    Xbyak::Reg64 A = is_windows ? abi_param4 : abi_param3;
+    Xbyak::Reg64 lda = is_windows ? abi_param3 : abi_param4;
+    Xbyak::Reg64 X = is_windows ? rdi : r8;
+    Xbyak::Xmm beta = xmm1;
+    Xbyak::Reg64 Y = is_windows ? rsi : r9;
+
+    bool swap = !std::is_same<T, gemv_s8u8s32_kernel_t>::value;
+
+    // Windows: read on the stack lda, X, beta, Y
+
+    int zmm_idx = 1;
+    int nreg_acc = 1 << unroll_m;
+    int nreg_A = 1 << (unroll_m - 1);
+    int nreg_A_acc = nreg_acc + nreg_A;
+
+    if (!use_vnni) {
+        // set a zmm register to one
+        tmp = Xbyak::Zmm(0);
+        one = Xbyak::Zmm(zmm_idx + 1);
+        zmm_idx += 2; // one + tmp
+    }
+    else {
+        beta = xmm0;
+    }
+
+    preamble();
+
+    if (is_windows) {
+        mov(lda, ptr[rsp + get_size_of_abi_save_regs() + 40]);
+        mov(X, ptr[rsp + get_size_of_abi_save_regs() + 48]);
+        movss(beta, ptr[rsp + get_size_of_abi_save_regs() + 56]);
+        mov(Y, ptr[rsp + get_size_of_abi_save_regs() + 64]);
+    }
+
+    if (use_vnni && !is_windows) {
+        movaps(beta, xmm1);
+    }
+
+    mov(rax, (1 << unroll_n) - 1);
+    kmovq(k3, rax);
+
+    and_(rax, n); // rax contains n & ((1 << unroll_n) - 1)
+    mov(rbx, 1);
+    shlx(rbx, rbx, rax);
+    sub(rbx, 1);
+    kmovq(mask_n, rbx);
+    // mask_n set (AVX512 only), can use rax and rbx again
+
+    // set mask_m for update of the C matrix
+    // load/store on the C matrix use Ymm so tail according to Ymm size
+    mov(rax, 7); // 8 * 32 = 256 Ymm size
+    and_(rax, m); // rax contains m & 7
+    mov(rbx, 1);
+    shlx(rbx, rbx, rax);
+    sub(rbx, 1);
+    kmovq(mask_m, rbx);
+    // mask_m set (AVX512 only), can use rax and rbx again
+
+    // setup register of ones when VNNI instructions not available
+    if (!use_vnni) {
+        vmovdqu16(one, ptr[rip + one_label]);
+    }
+
+    // M loop
+    // base pointer for A rax contains a + i * lda
+    // Loop stop when rax >= a + (m & mask_um) * lda = rbx
+    // loop increment r10 = um * lda
+    // rbp = Y + i
+    mov(rax, A); // i = 0
+    mov(rbx, m);
+    and_(rbx, mask_um);
+    imul(rbx, lda);
+    add(rbx, A);
+    mov(r10, lda);
+    sal(r10, unroll_m);
+    mov(rbp, Y);
+
+    // N loop
+    // base pointer for X r11 contains x + j
+    // Loop stop when r11 >= x + n & mask_un = r12
+    // loop increment un
+    // r13 = rax + j = A + i * lda + j
+    mov(r12, n);
+    and_(r12, mask_un);
+    add(r12, X);
+
+    // M loop
+    aligned_label(m_loop_label);
+    cmp(rax, rbx);
+    jge(m_tail_label, T_NEAR);
+
+    // enter M loop
+    for(i = 0; i < nreg_acc; i++) {
+        vpxorq(Xbyak::Zmm(i + zmm_idx + nreg_A),
+               Xbyak::Zmm(i + zmm_idx + nreg_A),
+               Xbyak::Zmm(i + zmm_idx + nreg_A));
+    }
+
+    // N loop
+    mov(r11, X); // j = 0
+    mov(r13, rax);
+    aligned_label(n_loop_label);
+    cmp(r11, r12);
+    jge(n_tail_label, T_NEAR);
+
+    // enter N loop
+
+    n_loop_body(zmm_idx, zmm_idx + nreg_A, zmm_idx + nreg_A_acc, nreg_acc,
+                r13, lda, r11, tmp, one, swap, use_vnni, 0, mask_n);
+
+    // increment rax with un
+    add(r11, 1 << unroll_n);
+    add(r13, 1 << unroll_n);
+    jmp(n_loop_label, T_NEAR);
+    // end N loop
+
+    // N tail
+    aligned_label(n_tail_label);
+
+    ktestq(mask_n, k3);
+    je(update_c_label, T_NEAR);
+    n_loop_body(zmm_idx, zmm_idx + nreg_A, zmm_idx + nreg_A_acc, nreg_acc,
+                r13, lda, r11, tmp, one, swap, use_vnni, 1, mask_n);
+
+    // update C matrix
+    aligned_label(update_c_label);
+
+    update_c(nreg_acc, rbp, zmm_idx, zmm_idx + nreg_A, beta, 0, mask_m);
+
+    // increment rax with um * lda
+    add(rax, r10);
+    add(rbp, 1 << (unroll_m + 2));
+    jmp(m_loop_label, T_NEAR);
+    // end M loop
+
+    // M tail
+    aligned_label(m_tail_label);
+
+    // r10 will contain m_tail = m % unroll_m = m & (1 << unroll_m) - 1
+    mov(r10, m);
+    and_(r10, (1 << unroll_m) - 1);
+    for (ii = 1; ii < 1 << unroll_m; ii++) {
+        aligned_label(m_tail_label_case[ii-1]);
+        cmp(r10, ii);
+        if (ii == (1 << unroll_m) - 1)
+            jne(end_label, T_NEAR);
+        else
+            jne(m_tail_label_case[ii], T_NEAR);
+
+        // m_tail = i, use i accumulators
+
+        for(i = 0; i < ii; i++) {
+            vpxorq(Xbyak::Zmm(i + zmm_idx + nreg_A),
+                   Xbyak::Zmm(i + zmm_idx + nreg_A),
+                   Xbyak::Zmm(i + zmm_idx + nreg_A));
+        }
+
+        // N loop
+        mov(r11, X); // j = 0
+        mov(r13, rax);
+        aligned_label(n_loop_label_case[ii - 1]);
+        cmp(r11, r12);
+        jge(n_tail_label_case[ii - 1], T_NEAR);
+
+        n_loop_body(zmm_idx, zmm_idx + nreg_A, zmm_idx + nreg_A_acc, ii, r13,
+                    lda, r11, tmp, one, swap, use_vnni, 0, mask_n);
+
+        // increment rax with un
+        add(r11, 1 << unroll_n);
+        add(r13, 1 << unroll_n);
+        jmp(n_loop_label_case[ii - 1], T_NEAR);
+        // end N loop
+
+        // N tail
+        aligned_label(n_tail_label_case[ii - 1]);
+        ktestq(mask_n, k3);
+        je(update_c_label_case[ii - 1], T_NEAR);
+        n_loop_body(zmm_idx, zmm_idx + nreg_A, zmm_idx + nreg_A_acc, ii, r13,
+                    lda, r11, tmp, one, swap, use_vnni, 1, mask_n);
+
+        // update C matrix
+        aligned_label(update_c_label_case[ii - 1]);
+        update_c(ii, rbp, zmm_idx, zmm_idx + nreg_A, beta, 1, mask_m);
+
+        if (ii < ((1 << unroll_m) - 1))
+            jmp(end_label, T_NEAR);
+    }
+
+    aligned_label(end_label);
+
+    postamble();
+
+    if (!use_vnni) {
+        aligned_label(one_label);
+        for (i = 0; i < size_vec_reg/8; i++)
+            dq(0x0001000100010001);
+    }
+
+    return (T) getCode();
+}
+
+template jit_avx512_core_gemv_s8u8s32_kern::gemv_s8u8s32_kernel_t
+jit_avx512_core_gemv_s8u8s32_kern::generate<jit_avx512_core_gemv_s8u8s32_kern::gemv_s8u8s32_kernel_t>(int);
+
+template jit_avx512_core_gemv_s8u8s32_kern::gemv_u8s8s32_kernel_t
+jit_avx512_core_gemv_s8u8s32_kern::generate<jit_avx512_core_gemv_s8u8s32_kern::gemv_u8s8s32_kernel_t>(int);
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_kernel_gemv_s8u8s32_kern.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_kernel_gemv_s8u8s32_kern.hpp
new file mode 100644
index 000000000..9ea23a5f5
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_kernel_gemv_s8u8s32_kern.hpp
@@ -0,0 +1,64 @@
+/*******************************************************************************
+ * Copyright 2019 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#include "jit_generator.hpp"
+#include "common.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+class jit_avx512_core_gemv_s8u8s32_kern : jit_generator {
+
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_gemv_s8u8s32_kern);
+
+    // assumes untoll_{m,n} are a power of 2
+    static constexpr unsigned int unroll_m = 4; // real unrolling factor is 2^unroll_m
+    const int mask_um = 0xFFFFFFF0;
+    static constexpr unsigned int unroll_n = 6; // real unrolling factor is 2^unroll_n
+    const int mask_un = 0xFFFFFFC0;
+    const int size_vec_reg = 64; // bytes
+
+    void aligned_label(Xbyak::Label &label, int alignment = 16) {
+        align(alignment);
+        L(label);
+    }
+
+    void vnni(Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm, bool, int);
+    void n_loop_body(int, int, int, int, Xbyak::Reg64, Xbyak::Reg64,
+                     Xbyak::Reg64, Xbyak::Zmm, Xbyak::Zmm, bool, int, int, Xbyak::Opmask);
+    void shuffle_and_add(Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm, Xbyak::Zmm);
+    void update_c(int, Xbyak::Reg64, int, int, Xbyak::Xmm, int, Xbyak::Opmask);
+
+public:
+    jit_avx512_core_gemv_s8u8s32_kern() : jit_generator(nullptr, GEMM_CODE_SIZE) {};
+
+    // m, n, alpha, a, lda, x, beta, y
+    typedef void (*gemv_s8u8s32_kernel_t)(const dim_t, const dim_t, const float,
+                                          const int8_t*, const dim_t, const uint8_t*,
+                                          const float, int32_t*);
+    typedef void (*gemv_u8s8s32_kernel_t)(const dim_t, const dim_t, const float,
+                                          const uint8_t*, const dim_t, const int8_t*,
+                                          const float, int32_t*);
+
+    template <typename T>
+    T generate(int use_vnni);
+
+};
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_an_kern.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_an_kern.cpp
new file mode 100644
index 000000000..544cd2ff2
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_an_kern.cpp
@@ -0,0 +1,819 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "jit_generator.hpp"
+#include "common.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+jit_avx512_core_u8_copy_an_kern::jit_avx512_core_u8_copy_an_kern(): jit_generator(nullptr, GEMM_CODE_SIZE)
+{
+
+#ifndef _WIN32
+#define M	rdi
+#define N	rsi
+#define A	rdx
+#define LDA	rcx
+#define ALPHA	r8
+#define B	r9
+
+#define I	rax
+#define A1	r10
+#define A2	r8
+#define LDA3	r11
+
+#else
+
+#define M	rcx
+#define N	rdx
+#define A	r8
+#define LDA	r9
+#define ALPHA	rax
+#define B	rdi
+
+#define I	rax
+#define A1	rsi
+#define A2	r10
+#define LDA3	r11
+
+#define ARG_ALPHA	40+stacksize+rsp
+#define ARG_B		48+stacksize+rsp
+
+#endif
+
+inLocalLabel();
+{
+
+Xbyak::Label l170;
+Xbyak::Label l1f0;
+Xbyak::Label l20;
+Xbyak::Label l224;
+Xbyak::Label l234;
+Xbyak::Label l240;
+Xbyak::Label l254;
+Xbyak::Label l32c;
+Xbyak::Label l34;
+Xbyak::Label l388;
+Xbyak::Label l3b0;
+Xbyak::Label l3c0;
+Xbyak::Label l3cc;
+Xbyak::Label l3dc;
+Xbyak::Label l454;
+Xbyak::Label l48c;
+Xbyak::Label l4a8;
+Xbyak::Label l4b8;
+Xbyak::Label l4c4;
+Xbyak::Label l4d8;
+Xbyak::Label l570;
+Xbyak::Label l5c4;
+Xbyak::Label l5f0;
+Xbyak::Label l60c;
+Xbyak::Label l61c;
+Xbyak::Label l628;
+Xbyak::Label l638;
+Xbyak::Label l6b0;
+Xbyak::Label l6f4;
+Xbyak::Label l720;
+Xbyak::Label l73c;
+Xbyak::Label l74c;
+Xbyak::Label l758;
+Xbyak::Label l76c;
+Xbyak::Label l804;
+Xbyak::Label l858;
+Xbyak::Label l88c;
+Xbyak::Label l8a4;
+Xbyak::Label l8b2;
+Xbyak::Label l8bc;
+Xbyak::Label l8cc;
+Xbyak::Label l944;
+Xbyak::Label l98c;
+Xbyak::Label l9b0;
+Xbyak::Label l9c8;
+Xbyak::Label l9d8;
+
+	preamble();
+#ifdef _WIN32
+	auto stacksize = get_size_of_abi_save_regs();
+	mov(ALPHA, ptr[ARG_ALPHA]);
+	mov(B, ptr[ARG_B]);
+#endif
+
+	mov(M, qword[M]);
+	mov(N, qword[N]);
+	mov(LDA, qword[LDA]);
+	lea(LDA3, ptr[LDA+LDA*2]);
+	sub(A, -128);
+	sub(B, -128);
+	cmp(N, 0x30);
+	jl(l234, T_NEAR);
+	align(4);
+
+L(l20);
+	mov(A1, A);
+	add(A, 0x30);
+	mov(I, M);
+	sar(I, 0x2);
+	jle(l170, T_NEAR);
+	align(4);
+
+L(l34);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpcklbw(xmm2, xmm3);
+	punpckhbw(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqa(xmm2, xmm4);
+	punpcklwd(xmm4, xmm5);
+	punpckhwd(xmm2, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	movdqu(xword[B-0x60], xmm4);
+	movdqu(xword[B-0x50], xmm2);
+	movdqu(xmm0, xword[A1-0x70]);
+	movdqu(xmm1, xword[A1+LDA*1-0x70]);
+	movdqu(xmm2, xword[A1+LDA*2-0x70]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x70]);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpcklbw(xmm2, xmm3);
+	punpckhbw(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqa(xmm2, xmm4);
+	punpcklwd(xmm4, xmm5);
+	punpckhwd(xmm2, xmm5);
+	movdqu(xword[B-0x40], xmm0);
+	movdqu(xword[B-0x30], xmm1);
+	movdqu(xword[B-0x20], xmm4);
+	movdqu(xword[B-0x10], xmm2);
+	movdqu(xmm0, xword[A1-0x60]);
+	movdqu(xmm1, xword[A1+LDA*1-0x60]);
+	movdqu(xmm2, xword[A1+LDA*2-0x60]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x60]);
+	lea(A1, ptr[A1+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpcklbw(xmm2, xmm3);
+	punpckhbw(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqa(xmm2, xmm4);
+	punpcklwd(xmm4, xmm5);
+	punpckhwd(xmm2, xmm5);
+	movdqu(xword[B], xmm0);
+	movdqu(xword[B+0x10], xmm1);
+	movdqu(xword[B+0x20], xmm4);
+	movdqu(xword[B+0x30], xmm2);
+	sub(B, -192);
+	dec(I);
+	jg(l34, T_NEAR);
+	align(4);
+
+L(l170);
+	test(M, 0x2);
+	jle(l1f0, T_NEAR);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1-0x70]);
+	movdqu(xmm2, xword[A1-0x60]);
+	add(A1, LDA);
+	movdqu(xmm3, xword[A1-0x80]);
+	movdqu(xmm4, xword[A1-0x70]);
+	movdqu(xmm5, xword[A1-0x60]);
+	add(A1, LDA);
+	movdqa(xmm6, xmm0);
+	punpcklbw(xmm0, xmm3);
+	punpckhbw(xmm6, xmm3);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm6);
+	movdqa(xmm6, xmm1);
+	punpcklbw(xmm1, xmm4);
+	punpckhbw(xmm6, xmm4);
+	movdqu(xword[B-0x60], xmm1);
+	movdqu(xword[B-0x50], xmm6);
+	movdqa(xmm6, xmm2);
+	punpcklbw(xmm2, xmm5);
+	punpckhbw(xmm6, xmm5);
+	movdqu(xword[B-0x40], xmm2);
+	movdqu(xword[B-0x30], xmm6);
+	sub(B, -96);
+	align(4);
+
+L(l1f0);
+	test(M, 0x1);
+	jle(l224, T_NEAR);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1-0x70]);
+	movdqu(xmm2, xword[A1-0x60]);
+	add(A1, LDA);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	movdqu(xword[B-0x60], xmm2);
+	sub(B, -48);
+	align(4);
+
+L(l224);
+	sub(N, 0x30);
+	cmp(N, 0x30);
+	jge(l20, T_NEAR);
+	align(4);
+
+L(l234);
+	cmp(N, 0x20);
+	jl(l3c0, T_NEAR);
+	align(4);
+
+L(l240);
+	mov(A1, A);
+	add(A, 0x20);
+	mov(I, M);
+	sar(I, 0x2);
+	jle(l32c, T_NEAR);
+	align(4);
+
+L(l254);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpcklbw(xmm2, xmm3);
+	punpckhbw(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqa(xmm2, xmm4);
+	punpcklwd(xmm4, xmm5);
+	punpckhwd(xmm2, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	movdqu(xword[B-0x60], xmm4);
+	movdqu(xword[B-0x50], xmm2);
+	movdqu(xmm0, xword[A1-0x70]);
+	movdqu(xmm1, xword[A1+LDA*1-0x70]);
+	movdqu(xmm2, xword[A1+LDA*2-0x70]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x70]);
+	lea(A1, ptr[A1+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpcklbw(xmm2, xmm3);
+	punpckhbw(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqa(xmm2, xmm4);
+	punpcklwd(xmm4, xmm5);
+	punpckhwd(xmm2, xmm5);
+	movdqu(xword[B-0x40], xmm0);
+	movdqu(xword[B-0x30], xmm1);
+	movdqu(xword[B-0x20], xmm4);
+	movdqu(xword[B-0x10], xmm2);
+	sub(B, -128);
+	dec(I);
+	jg(l254, T_NEAR);
+	align(4);
+
+L(l32c);
+	test(M, 0x2);
+	jle(l388, T_NEAR);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1-0x70]);
+	add(A1, LDA);
+	movdqu(xmm2, xword[A1-0x80]);
+	movdqu(xmm3, xword[A1-0x70]);
+	add(A1, LDA);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm2);
+	punpckhbw(xmm4, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm4);
+	movdqa(xmm4, xmm1);
+	punpcklbw(xmm1, xmm3);
+	punpckhbw(xmm4, xmm3);
+	movdqu(xword[B-0x60], xmm1);
+	movdqu(xword[B-0x50], xmm4);
+	sub(B, -64);
+	align(4);
+
+L(l388);
+	test(M, 0x1);
+	jle(l3b0, T_NEAR);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1-0x70]);
+	add(A1, LDA);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	sub(B, -32);
+	align(4);
+
+L(l3b0);
+	sub(N, 0x20);
+	cmp(N, 0x20);
+	jge(l240, T_NEAR);
+	align(4);
+
+L(l3c0);
+	cmp(N, 0x10);
+	jl(l4b8, T_NEAR);
+	align(4);
+
+L(l3cc);
+	mov(A1, A);
+	add(A, 0x10);
+	mov(I, M);
+	sar(I, 0x2);
+	jle(l454, T_NEAR);
+	align(4);
+
+L(l3dc);
+	movdqu(xmm0, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqu(xmm1, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqu(xmm2, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqu(xmm3, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm4, xmm1);
+	movdqa(xmm1, xmm2);
+	punpcklbw(xmm2, xmm3);
+	punpckhbw(xmm1, xmm3);
+	movdqa(xmm3, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm3, xmm2);
+	movdqa(xmm2, xmm4);
+	punpcklwd(xmm4, xmm1);
+	punpckhwd(xmm2, xmm1);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm3);
+	movdqu(xword[B-0x60], xmm4);
+	movdqu(xword[B-0x50], xmm2);
+	sub(B, -64);
+	dec(I);
+	jg(l3dc, T_NEAR);
+	align(4);
+
+L(l454);
+	test(M, 0x2);
+	jle(l48c, T_NEAR);
+	movdqu(xmm0, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqu(xmm1, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqa(xmm2, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm2, xmm1);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm2);
+	sub(B, -32);
+	align(4);
+
+L(l48c);
+	test(M, 0x1);
+	jle(l4a8, T_NEAR);
+	movdqu(xmm0, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l4a8);
+	sub(N, 0x10);
+	cmp(N, 0x10);
+	jge(l3cc, T_NEAR);
+	align(4);
+
+L(l4b8);
+	cmp(N, 0x8);
+	jl(l61c, T_NEAR);
+	align(4);
+
+L(l4c4);
+	mov(A1, A);
+	add(A, 0x8);
+	mov(I, M);
+	sar(I, 0x3);
+	jle(l570, T_NEAR);
+	align(4);
+
+L(l4d8);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqu(xword[B-0x60], xmm0);
+	movdqu(xword[B-0x50], xmm1);
+	sub(B, -64);
+	dec(I);
+	jg(l4d8, T_NEAR);
+	align(4);
+
+L(l570);
+	test(M, 0x4);
+	jle(l5c4, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	sub(B, -32);
+	align(4);
+
+L(l5c4);
+	test(M, 0x2);
+	jle(l5f0, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l5f0);
+	test(M, 0x1);
+	jle(l60c, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l60c);
+	sub(N, 0x8);
+	cmp(N, 0x8);
+	jge(l4c4, T_NEAR);
+	align(4);
+
+L(l61c);
+	cmp(N, 0x4);
+	jl(l74c, T_NEAR);
+	align(4);
+
+L(l628);
+	mov(A1, A);
+	add(A, 0x4);
+	mov(I, M);
+	sar(I, 0x3);
+	jle(l6b0, T_NEAR);
+	align(4);
+
+L(l638);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	sub(B, -32);
+	dec(I);
+	jg(l638, T_NEAR);
+	align(4);
+
+L(l6b0);
+	test(M, 0x4);
+	jle(l6f4, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l6f4);
+	test(M, 0x2);
+	jle(l720, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l720);
+	test(M, 0x1);
+	jle(l73c, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l73c);
+	sub(N, 0x4);
+	cmp(N, 0x4);
+	jge(l628, T_NEAR);
+	align(4);
+
+L(l74c);
+	cmp(N, 0x2);
+	jl(l8b2, T_NEAR);
+	align(4);
+
+L(l758);
+	mov(A1, A);
+	add(A, 0x2);
+	mov(LDA3, M);
+	sar(LDA3, 0x3);
+	jle(l804, T_NEAR);
+	align(4);
+
+L(l76c);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm4, eax, 0x0);
+	punpcklbw(xmm1, xmm2);
+	punpcklbw(xmm3, xmm4);
+	punpcklwd(xmm1, xmm3);
+	punpcklqdq(xmm0, xmm1);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	dec(LDA3);
+	jg(l76c, T_NEAR);
+	align(4);
+
+L(l804);
+	test(M, 0x4);
+	jle(l858, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l858);
+	test(M, 0x2);
+	jle(l88c, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l88c);
+	test(M, 0x1);
+	jle(l8a4, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	mov(word[B-0x80], ax);
+	sub(B, -2);
+	align(4);
+
+L(l8a4);
+	sub(N, 0x2);
+	cmp(N, 0x2);
+	jge(l758, T_NEAR);
+	align(4);
+
+L(l8b2);
+	cmp(N, 0x1);
+	jl(l9d8, T_NEAR);
+	align(4);
+
+L(l8bc);
+	mov(A1, A);
+	add(A, 0x1);
+	mov(LDA3, M);
+	sar(LDA3, 0x3);
+	jle(l944, T_NEAR);
+	align(4);
+
+L(l8cc);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x7);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	dec(LDA3);
+	jg(l8cc, T_NEAR);
+	align(4);
+
+L(l944);
+	test(M, 0x4);
+	jle(l98c, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x3);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l98c);
+	test(M, 0x2);
+	jle(l9b0, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	mov(byte[B-0x80], al);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	mov(byte[B-0x7f], al);
+	sub(B, -2);
+	align(4);
+
+L(l9b0);
+	test(M, 0x1);
+	jle(l9c8, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	mov(byte[B-0x80], al);
+	sub(B, -1);
+	align(4);
+
+L(l9c8);
+	sub(N, 0x1);
+	cmp(N, 0x1);
+	jge(l8bc, T_NEAR);
+	align(4);
+
+L(l9d8);
+
+	postamble();
+}
+outLocalLabel();
+
+#undef M
+#undef N
+#undef A
+#undef LDA
+#undef ALPHA
+#undef B
+#undef I
+#undef A1
+#undef A2
+#undef LDA3
+#ifdef _WIN32
+#undef ARG_ALPHA
+#undef ARG_B
+#endif
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_at_kern.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_at_kern.cpp
new file mode 100644
index 000000000..1c11fc6ce
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_at_kern.cpp
@@ -0,0 +1,2209 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "jit_generator.hpp"
+#include "common.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+jit_avx512_core_u8_copy_at_kern::jit_avx512_core_u8_copy_at_kern(): jit_generator(nullptr, GEMM_CODE_SIZE)
+{
+
+#ifndef _WIN32
+#define M	rdi
+#define N	rsi
+#define A	rdx
+#define LDA	rcx
+#define ALPHA	r8
+#define B	r9
+
+#define I	rax
+#define A1	r10
+#define A2	r8
+#define LDA3	r11
+
+#else
+
+#define M	rcx
+#define N	rdx
+#define A	r8
+#define LDA	r9
+#define ALPHA	rax
+#define B	rdi
+
+#define I	rax
+#define A1	rsi
+#define A2	r10
+#define LDA3	r11
+
+#define ARG_ALPHA	40+stacksize+rsp
+#define ARG_B		48+stacksize+rsp
+
+#endif
+
+inLocalLabel();
+{
+
+Xbyak::Label l1014;
+Xbyak::Label l1390;
+Xbyak::Label l159c;
+Xbyak::Label l173c;
+Xbyak::Label l18e4;
+Xbyak::Label l1a7c;
+Xbyak::Label l1a8c;
+Xbyak::Label l1a98;
+Xbyak::Label l1ab4;
+Xbyak::Label l1c64;
+Xbyak::Label l1d74;
+Xbyak::Label l1e50;
+Xbyak::Label l1f2c;
+Xbyak::Label l1ffc;
+Xbyak::Label l20;
+Xbyak::Label l200c;
+Xbyak::Label l2018;
+Xbyak::Label l2034;
+Xbyak::Label l2110;
+Xbyak::Label l21a0;
+Xbyak::Label l2210;
+Xbyak::Label l2284;
+Xbyak::Label l22f0;
+Xbyak::Label l2300;
+Xbyak::Label l230c;
+Xbyak::Label l2324;
+Xbyak::Label l2398;
+Xbyak::Label l23e8;
+Xbyak::Label l242c;
+Xbyak::Label l2474;
+Xbyak::Label l24b4;
+Xbyak::Label l24c4;
+Xbyak::Label l24d0;
+Xbyak::Label l24e8;
+Xbyak::Label l2520;
+Xbyak::Label l254c;
+Xbyak::Label l2578;
+Xbyak::Label l25a8;
+Xbyak::Label l25c8;
+Xbyak::Label l25d6;
+Xbyak::Label l25e0;
+Xbyak::Label l25f0;
+Xbyak::Label l260c;
+Xbyak::Label l262c;
+Xbyak::Label l264c;
+Xbyak::Label l2668;
+Xbyak::Label l2680;
+Xbyak::Label l2690;
+Xbyak::Label l44;
+Xbyak::Label l58c;
+Xbyak::Label l8b0;
+Xbyak::Label lb14;
+Xbyak::Label ld84;
+Xbyak::Label lfdc;
+Xbyak::Label lfec;
+Xbyak::Label lff8;
+
+	preamble();
+#ifdef _WIN32
+	auto stacksize = get_size_of_abi_save_regs();
+	mov(ALPHA, ptr[ARG_ALPHA]);
+	mov(B, ptr[ARG_B]);
+#endif
+
+	mov(N, qword[N]);
+	mov(M, qword[M]);
+	mov(LDA, qword[LDA]);
+	sub(A, -128);
+	sub(B, -128);
+	lea(LDA3, ptr[LDA+LDA*2]);
+	cmp(N, 0x30);
+	jl(lfec, T_NEAR);
+	align(4);
+
+L(l20);
+	mov(A1, A);
+	mov(I, LDA);
+	shl(I, 0x5);
+	lea(I, ptr[I+LDA*8]);
+	lea(I, ptr[I+LDA*8]);
+	add(A, I);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l58c, T_NEAR);
+	align(4);
+
+L(l44);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B+0x40], xmm1);
+	movdqu(xword[B+0x100], xmm4);
+	movdqu(xword[B+0x1c0], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	movdqu(xword[B+0x50], xmm1);
+	movdqu(xword[B+0x110], xmm4);
+	movdqu(xword[B+0x1d0], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	movdqu(xword[B+0x60], xmm1);
+	movdqu(xword[B+0x120], xmm4);
+	movdqu(xword[B+0x1e0], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x50], xmm0);
+	movdqu(xword[B+0x70], xmm1);
+	movdqu(xword[B+0x130], xmm4);
+	movdqu(xword[B+0x1f0], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x40], xmm0);
+	movdqu(xword[B+0x80], xmm1);
+	movdqu(xword[B+0x140], xmm4);
+	movdqu(xword[B+0x200], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x30], xmm0);
+	movdqu(xword[B+0x90], xmm1);
+	movdqu(xword[B+0x150], xmm4);
+	movdqu(xword[B+0x210], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x20], xmm0);
+	movdqu(xword[B+0xa0], xmm1);
+	movdqu(xword[B+0x160], xmm4);
+	movdqu(xword[B+0x220], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x10], xmm0);
+	movdqu(xword[B+0xb0], xmm1);
+	movdqu(xword[B+0x170], xmm4);
+	movdqu(xword[B+0x230], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B], xmm0);
+	movdqu(xword[B+0xc0], xmm1);
+	movdqu(xword[B+0x180], xmm4);
+	movdqu(xword[B+0x240], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B+0x10], xmm0);
+	movdqu(xword[B+0xd0], xmm1);
+	movdqu(xword[B+0x190], xmm4);
+	movdqu(xword[B+0x250], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B+0x20], xmm0);
+	movdqu(xword[B+0xe0], xmm1);
+	movdqu(xword[B+0x1a0], xmm4);
+	movdqu(xword[B+0x260], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B+0x30], xmm0);
+	movdqu(xword[B+0xf0], xmm1);
+	movdqu(xword[B+0x1b0], xmm4);
+	movdqu(xword[B+0x270], xmm3);
+	sub(A1, -16);
+	sub(B, -768);
+	dec(I);
+	jg(l44, T_NEAR);
+	align(4);
+
+L(l58c);
+	test(M, 0x8);
+	jle(l8b0, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	movq(xmm2, qword[A1+LDA*2-0x80]);
+	movq(xmm3, qword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B+0x40], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	movdqu(xword[B+0x50], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x60], xmm0);
+	movdqu(xword[B+0x60], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x50], xmm0);
+	movdqu(xword[B+0x70], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x40], xmm0);
+	movdqu(xword[B+0x80], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x30], xmm0);
+	movdqu(xword[B+0x90], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x20], xmm0);
+	movdqu(xword[B+0xa0], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x10], xmm0);
+	movdqu(xword[B+0xb0], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B], xmm0);
+	movdqu(xword[B+0xc0], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B+0x10], xmm0);
+	movdqu(xword[B+0xd0], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B+0x20], xmm0);
+	movdqu(xword[B+0xe0], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B+0x30], xmm0);
+	movdqu(xword[B+0xf0], xmm1);
+	sub(A1, -8);
+	sub(B, -384);
+	align(4);
+
+L(l8b0);
+	test(M, 0x4);
+	jle(lb14, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	movd(xmm2, dword[A1+LDA*2-0x80]);
+	movd(xmm3, dword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x60], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x50], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x40], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x30], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x20], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x10], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B+0x10], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B+0x20], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B+0x30], xmm0);
+	sub(A1, -4);
+	sub(B, -192);
+	align(4);
+
+L(lb14);
+	test(M, 0x2);
+	jle(ld84, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A1+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x7);
+	movdqu(xword[B-0x80], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqu(xword[B-0x70], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqu(xword[B-0x60], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqu(xword[B-0x50], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqu(xword[B-0x40], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqu(xword[B-0x30], xmm0);
+	sub(A1, -2);
+	sub(B, -96);
+	align(4);
+
+L(ld84);
+	test(M, 0x1);
+	jle(lfdc, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xf);
+	movdqu(xword[B-0x80], xmm0);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xf);
+	movdqu(xword[B-0x70], xmm0);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xf);
+	movdqu(xword[B-0x60], xmm0);
+	sub(B, -48);
+	align(4);
+
+L(lfdc);
+	sub(N, 0x30);
+	cmp(N, 0x30);
+	jge(l20, T_NEAR);
+	align(4);
+
+L(lfec);
+	cmp(N, 0x20);
+	jl(l1a8c, T_NEAR);
+	align(4);
+
+L(lff8);
+	mov(A1, A);
+	mov(I, LDA);
+	shl(I, 0x5);
+	add(A, I);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l1390, T_NEAR);
+	align(4);
+
+L(l1014);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B], xmm1);
+	movdqu(xword[B+0x80], xmm4);
+	movdqu(xword[B+0x100], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	movdqu(xword[B+0x10], xmm1);
+	movdqu(xword[B+0x90], xmm4);
+	movdqu(xword[B+0x110], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	movdqu(xword[B+0x20], xmm1);
+	movdqu(xword[B+0xa0], xmm4);
+	movdqu(xword[B+0x120], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x50], xmm0);
+	movdqu(xword[B+0x30], xmm1);
+	movdqu(xword[B+0xb0], xmm4);
+	movdqu(xword[B+0x130], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x40], xmm0);
+	movdqu(xword[B+0x40], xmm1);
+	movdqu(xword[B+0xc0], xmm4);
+	movdqu(xword[B+0x140], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x30], xmm0);
+	movdqu(xword[B+0x50], xmm1);
+	movdqu(xword[B+0xd0], xmm4);
+	movdqu(xword[B+0x150], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x20], xmm0);
+	movdqu(xword[B+0x60], xmm1);
+	movdqu(xword[B+0xe0], xmm4);
+	movdqu(xword[B+0x160], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x10], xmm0);
+	movdqu(xword[B+0x70], xmm1);
+	movdqu(xword[B+0xf0], xmm4);
+	movdqu(xword[B+0x170], xmm3);
+	sub(A1, -16);
+	sub(B, -512);
+	dec(I);
+	jg(l1014, T_NEAR);
+	align(4);
+
+L(l1390);
+	test(M, 0x8);
+	jle(l159c, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	movq(xmm2, qword[A1+LDA*2-0x80]);
+	movq(xmm3, qword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	movdqu(xword[B+0x10], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x60], xmm0);
+	movdqu(xword[B+0x20], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x50], xmm0);
+	movdqu(xword[B+0x30], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x40], xmm0);
+	movdqu(xword[B+0x40], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x30], xmm0);
+	movdqu(xword[B+0x50], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x20], xmm0);
+	movdqu(xword[B+0x60], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x10], xmm0);
+	movdqu(xword[B+0x70], xmm1);
+	sub(A1, -8);
+	sub(B, -256);
+	align(4);
+
+L(l159c);
+	test(M, 0x4);
+	jle(l173c, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	movd(xmm2, dword[A1+LDA*2-0x80]);
+	movd(xmm3, dword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x60], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x50], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x40], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x30], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x20], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x10], xmm0);
+	sub(A1, -4);
+	sub(B, -128);
+	align(4);
+
+L(l173c);
+	test(M, 0x2);
+	jle(l18e4, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A1+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x7);
+	movdqu(xword[B-0x80], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqu(xword[B-0x70], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqu(xword[B-0x60], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqu(xword[B-0x50], xmm0);
+	sub(A1, -2);
+	sub(B, -64);
+	align(4);
+
+L(l18e4);
+	test(M, 0x1);
+	jle(l1a7c, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xf);
+	movdqu(xword[B-0x80], xmm0);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xf);
+	movdqu(xword[B-0x70], xmm0);
+	sub(B, -32);
+	align(4);
+
+L(l1a7c);
+	sub(N, 0x20);
+	cmp(N, 0x20);
+	jge(lff8, T_NEAR);
+	align(4);
+
+L(l1a8c);
+	cmp(N, 0x10);
+	jl(l200c, T_NEAR);
+	align(4);
+
+L(l1a98);
+	mov(A1, A);
+	mov(I, LDA);
+	shl(I, 0x4);
+	add(A, I);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l1c64, T_NEAR);
+	align(4);
+
+L(l1ab4);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x40], xmm1);
+	movdqu(xword[B], xmm4);
+	movdqu(xword[B+0x40], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	movdqu(xword[B-0x30], xmm1);
+	movdqu(xword[B+0x10], xmm4);
+	movdqu(xword[B+0x50], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	movdqu(xword[B-0x20], xmm1);
+	movdqu(xword[B+0x20], xmm4);
+	movdqu(xword[B+0x60], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x50], xmm0);
+	movdqu(xword[B-0x10], xmm1);
+	movdqu(xword[B+0x30], xmm4);
+	movdqu(xword[B+0x70], xmm3);
+	sub(A1, -16);
+	sub(B, -256);
+	dec(I);
+	jg(l1ab4, T_NEAR);
+	align(4);
+
+L(l1c64);
+	test(M, 0x8);
+	jle(l1d74, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	movq(xmm2, qword[A1+LDA*2-0x80]);
+	movq(xmm3, qword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x40], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	movdqu(xword[B-0x30], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x60], xmm0);
+	movdqu(xword[B-0x20], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x50], xmm0);
+	movdqu(xword[B-0x10], xmm1);
+	sub(A1, -8);
+	sub(B, -128);
+	align(4);
+
+L(l1d74);
+	test(M, 0x4);
+	jle(l1e50, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	movd(xmm2, dword[A1+LDA*2-0x80]);
+	movd(xmm3, dword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x60], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x50], xmm0);
+	sub(A1, -4);
+	sub(B, -64);
+	align(4);
+
+L(l1e50);
+	test(M, 0x2);
+	jle(l1f2c, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A1+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x7);
+	movdqu(xword[B-0x80], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	movdqu(xword[B-0x70], xmm0);
+	sub(A1, -2);
+	sub(B, -32);
+	align(4);
+
+L(l1f2c);
+	test(M, 0x1);
+	jle(l1ffc, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	pinsrb(xmm0, eax, 0xf);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l1ffc);
+	sub(N, 0x10);
+	cmp(N, 0x10);
+	jge(l1a98, T_NEAR);
+	align(4);
+
+L(l200c);
+	cmp(N, 0x8);
+	jl(l2300, T_NEAR);
+	align(4);
+
+L(l2018);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*4]);
+	lea(I, ptr[A1+LDA*8]);
+	mov(A, I);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l2110, T_NEAR);
+	align(4);
+
+L(l2034);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	sub(A1, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x60], xmm1);
+	movdqu(xword[B-0x40], xmm4);
+	movdqu(xword[B-0x20], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	sub(A2, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	movdqu(xword[B-0x50], xmm1);
+	movdqu(xword[B-0x30], xmm4);
+	movdqu(xword[B-0x10], xmm3);
+	sub(B, -128);
+	dec(I);
+	jg(l2034, T_NEAR);
+	align(4);
+
+L(l2110);
+	test(M, 0x8);
+	jle(l21a0, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	movq(xmm2, qword[A1+LDA*2-0x80]);
+	movq(xmm3, qword[A1+LDA3*1-0x80]);
+	sub(A1, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x60], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	movdqu(xword[B-0x50], xmm1);
+	sub(B, -64);
+	align(4);
+
+L(l21a0);
+	test(M, 0x4);
+	jle(l2210, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	movd(xmm2, dword[A1+LDA*2-0x80]);
+	movd(xmm3, dword[A1+LDA3*1-0x80]);
+	sub(A1, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	sub(B, -32);
+	align(4);
+
+L(l2210);
+	test(M, 0x2);
+	jle(l2284, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A1+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A1+LDA3*1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x7);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l2284);
+	test(M, 0x1);
+	jle(l22f0, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1+LDA3*1-0x80]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	pinsrb(xmm0, eax, 0x7);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l22f0);
+	sub(N, 0x8);
+	cmp(N, 0x8);
+	jge(l2018, T_NEAR);
+	align(4);
+
+L(l2300);
+	cmp(N, 0x4);
+	jl(l24c4, T_NEAR);
+	align(4);
+
+L(l230c);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*2]);
+	lea(I, ptr[A1+LDA*4]);
+	mov(A, I);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l2398, T_NEAR);
+	align(4);
+
+L(l2324);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	sub(A1, -16);
+	movdqu(xmm2, xword[A2-0x80]);
+	movdqu(xmm3, xword[A2+LDA*1-0x80]);
+	sub(A2, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	movdqu(xword[B-0x60], xmm4);
+	movdqu(xword[B-0x50], xmm3);
+	sub(B, -64);
+	dec(I);
+	jg(l2324, T_NEAR);
+	align(4);
+
+L(l2398);
+	test(M, 0x8);
+	jle(l23e8, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	sub(A1, -8);
+	movq(xmm2, qword[A2-0x80]);
+	movq(xmm3, qword[A2+LDA*1-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	sub(B, -32);
+	align(4);
+
+L(l23e8);
+	test(M, 0x4);
+	jle(l242c, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	sub(A1, -4);
+	movd(xmm2, dword[A2-0x80]);
+	movd(xmm3, dword[A2+LDA*1-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l242c);
+	test(M, 0x2);
+	jle(l2474, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA*1-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x3);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l2474);
+	test(M, 0x1);
+	jle(l24b4, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x3);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l24b4);
+	sub(N, 0x4);
+	cmp(N, 0x4);
+	jge(l230c, T_NEAR);
+	align(4);
+
+L(l24c4);
+	cmp(N, 0x2);
+	jl(l25d6, T_NEAR);
+	align(4);
+
+L(l24d0);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*1]);
+	lea(I, ptr[A1+LDA*2]);
+	mov(A, I);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l2520, T_NEAR);
+	align(4);
+
+L(l24e8);
+	movdqu(xmm0, xword[A1-0x80]);
+	sub(A1, -16);
+	movdqu(xmm1, xword[A2-0x80]);
+	sub(A2, -16);
+	movdqa(xmm2, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm2, xmm1);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm2);
+	sub(B, -32);
+	dec(I);
+	jg(l24e8, T_NEAR);
+	align(4);
+
+L(l2520);
+	test(M, 0x8);
+	jle(l254c, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	sub(A1, -8);
+	movq(xmm1, qword[A2-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l254c);
+	test(M, 0x4);
+	jle(l2578, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	sub(A1, -4);
+	movd(xmm1, dword[A2-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l2578);
+	test(M, 0x2);
+	jle(l25a8, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x1);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l25a8);
+	test(M, 0x1);
+	jle(l25c8, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	mov(byte[B-0x80], al);
+	mov(al, byte[A2-0x80]);
+	mov(byte[B-0x7f], al);
+	sub(B, -2);
+	align(4);
+
+L(l25c8);
+	sub(N, 0x2);
+	cmp(N, 0x2);
+	jge(l24d0, T_NEAR);
+	align(4);
+
+L(l25d6);
+	cmp(N, 0x1);
+	jl(l2690, T_NEAR);
+	align(4);
+
+L(l25e0);
+	mov(A1, A);
+	add(A, LDA);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l260c, T_NEAR);
+	align(4);
+
+L(l25f0);
+	movdqu(xmm0, xword[A1-0x80]);
+	sub(A1, -16);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	dec(I);
+	jg(l25f0, T_NEAR);
+	align(4);
+
+L(l260c);
+	test(M, 0x8);
+	jle(l262c, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	sub(A1, -8);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l262c);
+	test(M, 0x4);
+	jle(l264c, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	sub(A1, -4);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l264c);
+	test(M, 0x2);
+	jle(l2668, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	mov(word[B-0x80], ax);
+	sub(A1, -2);
+	sub(B, -2);
+	align(4);
+
+L(l2668);
+	test(M, 0x1);
+	jle(l2680, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	mov(byte[B-0x80], al);
+	sub(B, -1);
+	align(4);
+
+L(l2680);
+	sub(N, 0x1);
+	cmp(N, 0x1);
+	jge(l25e0, T_NEAR);
+	align(4);
+
+L(l2690);
+
+	postamble();
+}
+outLocalLabel();
+
+#undef M
+#undef N
+#undef A
+#undef LDA
+#undef ALPHA
+#undef B
+#undef I
+#undef A1
+#undef A2
+#undef LDA3
+#ifdef _WIN32
+#undef ARG_ALPHA
+#undef ARG_B
+#endif
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_bn_kern.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_bn_kern.cpp
new file mode 100644
index 000000000..56c36ee14
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_bn_kern.cpp
@@ -0,0 +1,564 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "jit_generator.hpp"
+#include "common.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+jit_avx512_core_u8_copy_bn_kern::jit_avx512_core_u8_copy_bn_kern(): jit_generator(nullptr, GEMM_CODE_SIZE)
+{
+
+#ifndef _WIN32
+#define M	rdi
+#define N	rsi
+#define A	rdx
+#define LDA	rcx
+#define ALPHA	r8
+#define B	r9
+
+#define I	rax
+#define A1	r10
+#define A2	r8
+#define LDA3	r11
+
+#else
+
+#define M	rcx
+#define N	rdx
+#define A	r8
+#define LDA	r9
+#define ALPHA	rax
+#define B	rdi
+
+#define I	rax
+#define A1	rsi
+#define A2	r10
+#define LDA3	r11
+
+#define ARG_ALPHA	40+stacksize+rsp
+#define ARG_B		48+stacksize+rsp
+
+#endif
+
+inLocalLabel();
+{
+
+Xbyak::Label l118;
+Xbyak::Label l1a8;
+Xbyak::Label l20;
+Xbyak::Label l218;
+Xbyak::Label l28c;
+Xbyak::Label l2f8;
+Xbyak::Label l308;
+Xbyak::Label l314;
+Xbyak::Label l32c;
+Xbyak::Label l3a0;
+Xbyak::Label l3c;
+Xbyak::Label l3f0;
+Xbyak::Label l434;
+Xbyak::Label l47c;
+Xbyak::Label l4bc;
+Xbyak::Label l4cc;
+Xbyak::Label l4d8;
+Xbyak::Label l4f0;
+Xbyak::Label l528;
+Xbyak::Label l554;
+Xbyak::Label l580;
+Xbyak::Label l5b0;
+Xbyak::Label l5d0;
+Xbyak::Label l5de;
+Xbyak::Label l5e8;
+Xbyak::Label l5f8;
+Xbyak::Label l614;
+Xbyak::Label l634;
+Xbyak::Label l654;
+Xbyak::Label l670;
+Xbyak::Label l688;
+Xbyak::Label l698;
+
+	preamble();
+#ifdef _WIN32
+	auto stacksize = get_size_of_abi_save_regs();
+	mov(ALPHA, ptr[ARG_ALPHA]);
+	mov(B, ptr[ARG_B]);
+#endif
+
+	mov(N, qword[N]);
+	mov(M, qword[M]);
+	mov(LDA, qword[LDA]);
+	sub(A, -128);
+	sub(B, -128);
+	lea(LDA3, ptr[LDA+LDA*2]);
+	cmp(N, 0x8);
+	jl(l308, T_NEAR);
+	align(4);
+
+L(l20);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*4]);
+	lea(I, ptr[A1+LDA*8]);
+	mov(A, I);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l118, T_NEAR);
+	align(4);
+
+L(l3c);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	sub(A1, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x60], xmm1);
+	movdqu(xword[B-0x40], xmm4);
+	movdqu(xword[B-0x20], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	sub(A2, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	movdqu(xword[B-0x50], xmm1);
+	movdqu(xword[B-0x30], xmm4);
+	movdqu(xword[B-0x10], xmm3);
+	sub(B, -128);
+	dec(I);
+	jg(l3c, T_NEAR);
+	align(4);
+
+L(l118);
+	test(M, 0x8);
+	jle(l1a8, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	movq(xmm2, qword[A1+LDA*2-0x80]);
+	movq(xmm3, qword[A1+LDA3*1-0x80]);
+	sub(A1, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x60], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	movdqu(xword[B-0x50], xmm1);
+	sub(B, -64);
+	align(4);
+
+L(l1a8);
+	test(M, 0x4);
+	jle(l218, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	movd(xmm2, dword[A1+LDA*2-0x80]);
+	movd(xmm3, dword[A1+LDA3*1-0x80]);
+	sub(A1, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	sub(B, -32);
+	align(4);
+
+L(l218);
+	test(M, 0x2);
+	jle(l28c, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A1+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A1+LDA3*1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x7);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l28c);
+	test(M, 0x1);
+	jle(l2f8, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1+LDA3*1-0x80]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	pinsrb(xmm0, eax, 0x7);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l2f8);
+	sub(N, 0x8);
+	cmp(N, 0x8);
+	jge(l20, T_NEAR);
+	align(4);
+
+L(l308);
+	cmp(N, 0x4);
+	jl(l4cc, T_NEAR);
+	align(4);
+
+L(l314);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*2]);
+	lea(I, ptr[A1+LDA*4]);
+	mov(A, I);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l3a0, T_NEAR);
+	align(4);
+
+L(l32c);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	sub(A1, -16);
+	movdqu(xmm2, xword[A2-0x80]);
+	movdqu(xmm3, xword[A2+LDA*1-0x80]);
+	sub(A2, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	movdqu(xword[B-0x60], xmm4);
+	movdqu(xword[B-0x50], xmm3);
+	sub(B, -64);
+	dec(I);
+	jg(l32c, T_NEAR);
+	align(4);
+
+L(l3a0);
+	test(M, 0x8);
+	jle(l3f0, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	sub(A1, -8);
+	movq(xmm2, qword[A2-0x80]);
+	movq(xmm3, qword[A2+LDA*1-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	sub(B, -32);
+	align(4);
+
+L(l3f0);
+	test(M, 0x4);
+	jle(l434, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	sub(A1, -4);
+	movd(xmm2, dword[A2-0x80]);
+	movd(xmm3, dword[A2+LDA*1-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l434);
+	test(M, 0x2);
+	jle(l47c, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA*1-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x3);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l47c);
+	test(M, 0x1);
+	jle(l4bc, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x3);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l4bc);
+	sub(N, 0x4);
+	cmp(N, 0x4);
+	jge(l314, T_NEAR);
+	align(4);
+
+L(l4cc);
+	cmp(N, 0x2);
+	jl(l5de, T_NEAR);
+	align(4);
+
+L(l4d8);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*1]);
+	lea(I, ptr[A1+LDA*2]);
+	mov(A, I);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l528, T_NEAR);
+	align(4);
+
+L(l4f0);
+	movdqu(xmm0, xword[A1-0x80]);
+	sub(A1, -16);
+	movdqu(xmm1, xword[A2-0x80]);
+	sub(A2, -16);
+	movdqa(xmm2, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm2, xmm1);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm2);
+	sub(B, -32);
+	dec(I);
+	jg(l4f0, T_NEAR);
+	align(4);
+
+L(l528);
+	test(M, 0x8);
+	jle(l554, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	sub(A1, -8);
+	movq(xmm1, qword[A2-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l554);
+	test(M, 0x4);
+	jle(l580, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	sub(A1, -4);
+	movd(xmm1, dword[A2-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l580);
+	test(M, 0x2);
+	jle(l5b0, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x1);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l5b0);
+	test(M, 0x1);
+	jle(l5d0, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	mov(byte[B-0x80], al);
+	mov(al, byte[A2-0x80]);
+	mov(byte[B-0x7f], al);
+	sub(B, -2);
+	align(4);
+
+L(l5d0);
+	sub(N, 0x2);
+	cmp(N, 0x2);
+	jge(l4d8, T_NEAR);
+	align(4);
+
+L(l5de);
+	cmp(N, 0x1);
+	jl(l698, T_NEAR);
+	align(4);
+
+L(l5e8);
+	mov(A1, A);
+	add(A, LDA);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l614, T_NEAR);
+	align(4);
+
+L(l5f8);
+	movdqu(xmm0, xword[A1-0x80]);
+	sub(A1, -16);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	dec(I);
+	jg(l5f8, T_NEAR);
+	align(4);
+
+L(l614);
+	test(M, 0x8);
+	jle(l634, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	sub(A1, -8);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l634);
+	test(M, 0x4);
+	jle(l654, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	sub(A1, -4);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l654);
+	test(M, 0x2);
+	jle(l670, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	mov(word[B-0x80], ax);
+	sub(A1, -2);
+	sub(B, -2);
+	align(4);
+
+L(l670);
+	test(M, 0x1);
+	jle(l688, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	mov(byte[B-0x80], al);
+	sub(B, -1);
+	align(4);
+
+L(l688);
+	sub(N, 0x1);
+	cmp(N, 0x1);
+	jge(l5e8, T_NEAR);
+	align(4);
+
+L(l698);
+
+	postamble();
+}
+outLocalLabel();
+
+#undef M
+#undef N
+#undef A
+#undef LDA
+#undef ALPHA
+#undef B
+#undef I
+#undef A1
+#undef A2
+#undef LDA3
+#ifdef _WIN32
+#undef ARG_ALPHA
+#undef ARG_B
+#endif
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_bt_kern.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_bt_kern.cpp
new file mode 100644
index 000000000..53e99d94d
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_bt_kern.cpp
@@ -0,0 +1,501 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "jit_generator.hpp"
+#include "common.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+jit_avx512_core_u8_copy_bt_kern::jit_avx512_core_u8_copy_bt_kern(): jit_generator(nullptr, GEMM_CODE_SIZE)
+{
+
+#ifndef _WIN32
+#define M	rdi
+#define N	rsi
+#define A	rdx
+#define LDA	rcx
+#define ALPHA	r8
+#define B	r9
+
+#define I	rax
+#define A1	r10
+#define A2	r8
+#define LDA3	r11
+
+#else
+
+#define M	rcx
+#define N	rdx
+#define A	r8
+#define LDA	r9
+#define ALPHA	rax
+#define B	rdi
+
+#define I	rax
+#define A1	rsi
+#define A2	r10
+#define LDA3	r11
+
+#define ARG_ALPHA	40+stacksize+rsp
+#define ARG_B		48+stacksize+rsp
+
+#endif
+
+inLocalLabel();
+{
+
+Xbyak::Label l120;
+Xbyak::Label l14c;
+Xbyak::Label l168;
+Xbyak::Label l178;
+Xbyak::Label l184;
+Xbyak::Label l194;
+Xbyak::Label l20;
+Xbyak::Label l20c;
+Xbyak::Label l250;
+Xbyak::Label l27c;
+Xbyak::Label l298;
+Xbyak::Label l2a8;
+Xbyak::Label l2b4;
+Xbyak::Label l2c8;
+Xbyak::Label l34;
+Xbyak::Label l360;
+Xbyak::Label l3b4;
+Xbyak::Label l3e8;
+Xbyak::Label l400;
+Xbyak::Label l40e;
+Xbyak::Label l418;
+Xbyak::Label l428;
+Xbyak::Label l4a0;
+Xbyak::Label l4e8;
+Xbyak::Label l50c;
+Xbyak::Label l524;
+Xbyak::Label l534;
+Xbyak::Label lcc;
+
+	preamble();
+#ifdef _WIN32
+	auto stacksize = get_size_of_abi_save_regs();
+	mov(ALPHA, ptr[ARG_ALPHA]);
+	mov(B, ptr[ARG_B]);
+#endif
+
+	mov(M, qword[M]);
+	mov(N, qword[N]);
+	mov(LDA, qword[LDA]);
+	lea(LDA3, ptr[LDA+LDA*2]);
+	sub(A, -128);
+	sub(B, -128);
+	cmp(N, 0x8);
+	jl(l178, T_NEAR);
+	align(4);
+
+L(l20);
+	mov(A1, A);
+	add(A, 0x8);
+	mov(I, M);
+	sar(I, 0x3);
+	jle(lcc, T_NEAR);
+	align(4);
+
+L(l34);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqu(xword[B-0x60], xmm0);
+	movdqu(xword[B-0x50], xmm1);
+	sub(B, -64);
+	dec(I);
+	jg(l34, T_NEAR);
+	align(4);
+
+L(lcc);
+	test(M, 0x4);
+	jle(l120, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	sub(B, -32);
+	align(4);
+
+L(l120);
+	test(M, 0x2);
+	jle(l14c, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l14c);
+	test(M, 0x1);
+	jle(l168, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l168);
+	sub(N, 0x8);
+	cmp(N, 0x8);
+	jge(l20, T_NEAR);
+	align(4);
+
+L(l178);
+	cmp(N, 0x4);
+	jl(l2a8, T_NEAR);
+	align(4);
+
+L(l184);
+	mov(A1, A);
+	add(A, 0x4);
+	mov(I, M);
+	sar(I, 0x3);
+	jle(l20c, T_NEAR);
+	align(4);
+
+L(l194);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	movdqu(xword[B-0x70], xmm0);
+	sub(B, -32);
+	dec(I);
+	jg(l194, T_NEAR);
+	align(4);
+
+L(l20c);
+	test(M, 0x4);
+	jle(l250, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l250);
+	test(M, 0x2);
+	jle(l27c, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l27c);
+	test(M, 0x1);
+	jle(l298, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l298);
+	sub(N, 0x4);
+	cmp(N, 0x4);
+	jge(l184, T_NEAR);
+	align(4);
+
+L(l2a8);
+	cmp(N, 0x2);
+	jl(l40e, T_NEAR);
+	align(4);
+
+L(l2b4);
+	mov(A1, A);
+	add(A, 0x2);
+	mov(LDA3, M);
+	sar(LDA3, 0x3);
+	jle(l360, T_NEAR);
+	align(4);
+
+L(l2c8);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm4, eax, 0x0);
+	punpcklbw(xmm1, xmm2);
+	punpcklbw(xmm3, xmm4);
+	punpcklwd(xmm1, xmm3);
+	punpcklqdq(xmm0, xmm1);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	dec(LDA3);
+	jg(l2c8, T_NEAR);
+	align(4);
+
+L(l360);
+	test(M, 0x4);
+	jle(l3b4, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l3b4);
+	test(M, 0x2);
+	jle(l3e8, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l3e8);
+	test(M, 0x1);
+	jle(l400, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	mov(word[B-0x80], ax);
+	sub(B, -2);
+	align(4);
+
+L(l400);
+	sub(N, 0x2);
+	cmp(N, 0x2);
+	jge(l2b4, T_NEAR);
+	align(4);
+
+L(l40e);
+	cmp(N, 0x1);
+	jl(l534, T_NEAR);
+	align(4);
+
+L(l418);
+	mov(A1, A);
+	add(A, 0x1);
+	mov(LDA3, M);
+	sar(LDA3, 0x3);
+	jle(l4a0, T_NEAR);
+	align(4);
+
+L(l428);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x7);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	dec(LDA3);
+	jg(l428, T_NEAR);
+	align(4);
+
+L(l4a0);
+	test(M, 0x4);
+	jle(l4e8, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x3);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l4e8);
+	test(M, 0x2);
+	jle(l50c, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	mov(byte[B-0x80], al);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	mov(byte[B-0x7f], al);
+	sub(B, -2);
+	align(4);
+
+L(l50c);
+	test(M, 0x1);
+	jle(l524, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	mov(byte[B-0x80], al);
+	sub(B, -1);
+	align(4);
+
+L(l524);
+	sub(N, 0x1);
+	cmp(N, 0x1);
+	jge(l418, T_NEAR);
+	align(4);
+
+L(l534);
+
+	postamble();
+}
+outLocalLabel();
+
+#undef M
+#undef N
+#undef A
+#undef LDA
+#undef ALPHA
+#undef B
+#undef I
+#undef A1
+#undef A2
+#undef LDA3
+#ifdef _WIN32
+#undef ARG_ALPHA
+#undef ARG_B
+#endif
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_an_kern.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_an_kern.cpp
new file mode 100644
index 000000000..49a312fc8
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_an_kern.cpp
@@ -0,0 +1,1283 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "jit_generator.hpp"
+#include "common.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+jit_avx512_core_u8_copy_sum_an_kern::jit_avx512_core_u8_copy_sum_an_kern(): jit_generator(nullptr, GEMM_CODE_SIZE)
+{
+
+#ifndef _WIN32
+#define M	rdi
+#define N	rsi
+#define A	rdx
+#define LDA	rcx
+#define ALPHA	r8
+#define B	r9
+
+#define I	rax
+#define A1	r10
+#define A2	r8
+#define LDA3	r11
+
+#define ARG_BIAS	24+stacksize+rsp
+
+#else
+
+#define M	rcx
+#define N	rdx
+#define A	r8
+#define LDA	r9
+#define ALPHA	rax
+#define B	rdi
+
+#define I	rax
+#define A1	rsi
+#define A2	r10
+#define LDA3	r11
+
+#define ARG_ALPHA	40+stacksize+rsp
+#define ARG_B		48+stacksize+rsp
+#define ARG_BIAS	72+stacksize+rsp
+
+#endif
+
+inLocalLabel();
+{
+
+Xbyak::Label l1024;
+Xbyak::Label l1090;
+Xbyak::Label l10d4;
+Xbyak::Label l10fc;
+Xbyak::Label l111a;
+Xbyak::Label l1124;
+Xbyak::Label l113c;
+Xbyak::Label l11d4;
+Xbyak::Label l1234;
+Xbyak::Label l1278;
+Xbyak::Label l129c;
+Xbyak::Label l12bc;
+Xbyak::Label l20;
+Xbyak::Label l2a0;
+Xbyak::Label l3c0;
+Xbyak::Label l438;
+Xbyak::Label l480;
+Xbyak::Label l48c;
+Xbyak::Label l4c8;
+Xbyak::Label l5c;
+Xbyak::Label l6a8;
+Xbyak::Label l7b4;
+Xbyak::Label l850;
+Xbyak::Label l89c;
+Xbyak::Label l8a8;
+Xbyak::Label l8d0;
+Xbyak::Label l9d0;
+Xbyak::Label la64;
+Xbyak::Label lab8;
+Xbyak::Label lae8;
+Xbyak::Label laf4;
+Xbyak::Label lb14;
+Xbyak::Label lc30;
+Xbyak::Label lcc8;
+Xbyak::Label ld1c;
+Xbyak::Label ld54;
+Xbyak::Label ld78;
+Xbyak::Label ld84;
+Xbyak::Label ld9c;
+Xbyak::Label le58;
+Xbyak::Label lebc;
+Xbyak::Label lef8;
+Xbyak::Label lf1c;
+Xbyak::Label lf3c;
+Xbyak::Label lf48;
+Xbyak::Label lf60;
+
+	preamble();
+	auto stacksize = get_size_of_abi_save_regs();
+#ifdef _WIN32
+	mov(ALPHA, ptr[ARG_ALPHA]);
+	mov(B, ptr[ARG_B]);
+#endif
+
+	mov(M, qword[M]);
+	mov(N, qword[N]);
+	mov(LDA, qword[LDA]);
+	lea(LDA3, ptr[LDA+LDA*2]);
+	sub(A, -128);
+	sub(B, -128);
+	cmp(N, 0x30);
+	jl(l480, T_NEAR);
+	align(4);
+
+L(l20);
+	mov(A1, A);
+	add(A, 0x30);
+	vxorps(ymm8, ymm8, ymm8);
+	vxorps(ymm9, ymm9, ymm9);
+	vxorps(ymm10, ymm10, ymm10);
+	vxorps(ymm11, ymm11, ymm11);
+	vxorps(ymm12, ymm12, ymm12);
+	vxorps(ymm13, ymm13, ymm13);
+	vxorps(ymm14, ymm14, ymm14);
+	vxorps(ymm15, ymm15, ymm15);
+	mov(I, M);
+	sar(I, 0x2);
+	jle(l2a0, T_NEAR);
+	align(4);
+
+L(l5c);
+	vmovdqu(xmm0, xword[A1-0x80]);
+	vmovdqu(xmm1, xword[A1+LDA*1-0x80]);
+	vmovdqu(xmm2, xword[A1+LDA*2-0x80]);
+	vmovdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	vpunpcklbw(xmm4, xmm0, xmm1);
+	vpunpckhbw(xmm5, xmm0, xmm1);
+	vpunpcklbw(xmm6, xmm2, xmm3);
+	vpunpckhbw(xmm7, xmm2, xmm3);
+	vpunpcklwd(xmm0, xmm4, xmm6);
+	vpunpckhwd(xmm1, xmm4, xmm6);
+	vpunpcklwd(xmm2, xmm5, xmm7);
+	vpunpckhwd(xmm3, xmm5, xmm7);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm1);
+	vmovhlps(xmm7, xmm1, xmm1);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm8, ymm8, ymm5);
+	vmovdqu(xword[B-0x80], xmm0);
+	vmovdqu(xword[B-0x70], xmm1);
+	vpmovsxbw(ymm5, xmm2);
+	vmovhlps(xmm6, xmm2, xmm2);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm3);
+	vmovhlps(xmm7, xmm3, xmm3);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm9, ymm9, ymm5);
+	vmovdqu(xword[B-0x60], xmm2);
+	vmovdqu(xword[B-0x50], xmm3);
+	vmovdqu(xmm0, xword[A1-0x70]);
+	vmovdqu(xmm1, xword[A1+LDA*1-0x70]);
+	vmovdqu(xmm2, xword[A1+LDA*2-0x70]);
+	vmovdqu(xmm3, xword[A1+LDA3*1-0x70]);
+	vpunpcklbw(xmm4, xmm0, xmm1);
+	vpunpckhbw(xmm5, xmm0, xmm1);
+	vpunpcklbw(xmm6, xmm2, xmm3);
+	vpunpckhbw(xmm7, xmm2, xmm3);
+	vpunpcklwd(xmm0, xmm4, xmm6);
+	vpunpckhwd(xmm1, xmm4, xmm6);
+	vpunpcklwd(xmm2, xmm5, xmm7);
+	vpunpckhwd(xmm3, xmm5, xmm7);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm1);
+	vmovhlps(xmm7, xmm1, xmm1);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm10, ymm10, ymm5);
+	vmovdqu(xword[B-0x40], xmm0);
+	vmovdqu(xword[B-0x30], xmm1);
+	vpmovsxbw(ymm5, xmm2);
+	vmovhlps(xmm6, xmm2, xmm2);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm3);
+	vmovhlps(xmm7, xmm3, xmm3);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm11, ymm11, ymm5);
+	vmovdqu(xword[B-0x20], xmm2);
+	vmovdqu(xword[B-0x10], xmm3);
+	vmovdqu(xmm0, xword[A1-0x60]);
+	vmovdqu(xmm1, xword[A1+LDA*1-0x60]);
+	vmovdqu(xmm2, xword[A1+LDA*2-0x60]);
+	vmovdqu(xmm3, xword[A1+LDA3*1-0x60]);
+	lea(A1, ptr[A1+LDA*4]);
+	vpunpcklbw(xmm4, xmm0, xmm1);
+	vpunpckhbw(xmm5, xmm0, xmm1);
+	vpunpcklbw(xmm6, xmm2, xmm3);
+	vpunpckhbw(xmm7, xmm2, xmm3);
+	vpunpcklwd(xmm0, xmm4, xmm6);
+	vpunpckhwd(xmm1, xmm4, xmm6);
+	vpunpcklwd(xmm2, xmm5, xmm7);
+	vpunpckhwd(xmm3, xmm5, xmm7);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm1);
+	vmovhlps(xmm7, xmm1, xmm1);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm12, ymm12, ymm5);
+	vmovdqu(xword[B], xmm0);
+	vmovdqu(xword[B+0x10], xmm1);
+	vpmovsxbw(ymm5, xmm2);
+	vmovhlps(xmm6, xmm2, xmm2);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm3);
+	vmovhlps(xmm7, xmm3, xmm3);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm13, ymm13, ymm5);
+	vmovdqu(xword[B+0x20], xmm2);
+	vmovdqu(xword[B+0x30], xmm3);
+	sub(B, -192);
+	dec(I);
+	jg(l5c, T_NEAR);
+	align(4);
+
+L(l2a0);
+	test(M, 0x2);
+	jle(l3c0, T_NEAR);
+	vmovdqu(xmm0, xword[A1-0x80]);
+	vmovdqu(xmm1, xword[A1-0x70]);
+	vmovdqu(xmm2, xword[A1-0x60]);
+	add(A1, LDA);
+	vmovdqu(xmm6, xword[A1-0x80]);
+	vmovdqu(xmm4, xword[A1-0x70]);
+	vmovdqu(xmm5, xword[A1-0x60]);
+	add(A1, LDA);
+	vpunpcklbw(xmm3, xmm0, xmm6);
+	vpunpckhbw(xmm0, xmm0, xmm6);
+	vpmovsxbw(ymm7, xmm3);
+	vmovhlps(xmm6, xmm3, xmm3);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm7, ymm7, ymm6);
+	vpmovsxwd(ymm7, xmm7);
+	vpaddd(ymm8, ymm8, ymm7);
+	vmovdqu(xword[B-0x80], xmm3);
+	vpmovsxbw(ymm7, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm7, ymm7, ymm6);
+	vpmovsxwd(ymm7, xmm7);
+	vpaddd(ymm9, ymm9, ymm7);
+	vmovdqu(xword[B-0x70], xmm0);
+	vpunpcklbw(xmm3, xmm1, xmm4);
+	vpunpckhbw(xmm0, xmm1, xmm4);
+	vpmovsxbw(ymm7, xmm3);
+	vmovhlps(xmm6, xmm3, xmm3);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm7, ymm7, ymm6);
+	vpmovsxwd(ymm7, xmm7);
+	vpaddd(ymm10, ymm10, ymm7);
+	vmovdqu(xword[B-0x60], xmm3);
+	vpmovsxbw(ymm7, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm7, ymm7, ymm6);
+	vpmovsxwd(ymm7, xmm7);
+	vpaddd(ymm11, ymm11, ymm7);
+	vmovdqu(xword[B-0x50], xmm0);
+	vpunpcklbw(xmm3, xmm2, xmm5);
+	vpunpckhbw(xmm0, xmm2, xmm5);
+	vpmovsxbw(ymm7, xmm3);
+	vmovhlps(xmm6, xmm3, xmm3);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm7, ymm7, ymm6);
+	vpmovsxwd(ymm7, xmm7);
+	vpaddd(ymm12, ymm12, ymm7);
+	vmovdqu(xword[B-0x40], xmm3);
+	vpmovsxbw(ymm7, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm7, ymm7, ymm6);
+	vpmovsxwd(ymm7, xmm7);
+	vpaddd(ymm13, ymm13, ymm7);
+	vmovdqu(xword[B-0x30], xmm0);
+	sub(B, -96);
+	align(4);
+
+L(l3c0);
+	test(M, 0x1);
+	jle(l438, T_NEAR);
+	vmovdqu(xmm0, xword[A1-0x80]);
+	vmovdqu(xmm1, xword[A1-0x70]);
+	vmovdqu(xmm2, xword[A1-0x60]);
+	add(A1, LDA);
+	vpmovsxbd(ymm7, xmm0);
+	vpaddd(ymm8, ymm8, ymm7);
+	vmovhlps(xmm7, xmm0, xmm0);
+	vpmovsxbd(ymm7, xmm7);
+	vpaddd(ymm9, ymm9, ymm7);
+	vmovdqu(xword[B-0x80], xmm0);
+	vpmovsxbd(ymm7, xmm1);
+	vpaddd(ymm10, ymm10, ymm7);
+	vmovhlps(xmm7, xmm1, xmm1);
+	vpmovsxbd(ymm7, xmm7);
+	vpaddd(ymm11, ymm11, ymm7);
+	vmovdqu(xword[B-0x70], xmm1);
+	vpmovsxbd(ymm7, xmm2);
+	vpaddd(ymm12, ymm12, ymm7);
+	vmovhlps(xmm7, xmm2, xmm2);
+	vpmovsxbd(ymm7, xmm7);
+	vpaddd(ymm13, ymm13, ymm7);
+	vmovdqu(xword[B-0x60], xmm2);
+	sub(B, -48);
+	align(4);
+
+L(l438);
+	mov(A1, qword[ARG_BIAS]);
+	vmovdqu(yword[A1], ymm8);
+	vmovdqu(yword[A1+0x20], ymm9);
+	vmovdqu(yword[A1+0x40], ymm10);
+	vmovdqu(yword[A1+0x60], ymm11);
+	vmovdqu(yword[A1+0x80], ymm12);
+	vmovdqu(yword[A1+0xa0], ymm13);
+	add(qword[ARG_BIAS], 0xc0);
+	sub(N, 0x30);
+	cmp(N, 0x30);
+	jge(l20, T_NEAR);
+	vzeroupper();
+	align(4);
+
+L(l480);
+	cmp(N, 0x20);
+	jl(l89c, T_NEAR);
+	align(4);
+
+L(l48c);
+	mov(A1, A);
+	add(A, 0x20);
+	pxor(xmm8, xmm8);
+	pxor(xmm9, xmm9);
+	pxor(xmm10, xmm10);
+	pxor(xmm11, xmm11);
+	pxor(xmm12, xmm12);
+	pxor(xmm13, xmm13);
+	pxor(xmm14, xmm14);
+	pxor(xmm15, xmm15);
+	mov(I, M);
+	sar(I, 0x2);
+	jle(l6a8, T_NEAR);
+	align(4);
+
+L(l4c8);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpcklbw(xmm2, xmm3);
+	punpckhbw(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqa(xmm2, xmm4);
+	punpcklwd(xmm4, xmm5);
+	punpckhwd(xmm2, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B-0x60], xmm4);
+	pmovsxbw(xmm5, xmm2);
+	movhlps(xmm6, xmm2);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B-0x50], xmm2);
+	movdqu(xmm0, xword[A1-0x70]);
+	movdqu(xmm1, xword[A1+LDA*1-0x70]);
+	movdqu(xmm2, xword[A1+LDA*2-0x70]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x70]);
+	lea(A1, ptr[A1+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpcklbw(xmm2, xmm3);
+	punpckhbw(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	movdqa(xmm2, xmm4);
+	punpcklwd(xmm4, xmm5);
+	punpckhwd(xmm2, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm12, xmm5);
+	movdqu(xword[B-0x40], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm13, xmm5);
+	movdqu(xword[B-0x30], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	movdqu(xword[B-0x20], xmm4);
+	pmovsxbw(xmm5, xmm2);
+	movhlps(xmm6, xmm2);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm15, xmm5);
+	movdqu(xword[B-0x10], xmm2);
+	sub(B, -128);
+	dec(I);
+	jg(l4c8, T_NEAR);
+	align(4);
+
+L(l6a8);
+	test(M, 0x2);
+	jle(l7b4, T_NEAR);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1-0x70]);
+	add(A1, LDA);
+	movdqu(xmm2, xword[A1-0x80]);
+	movdqu(xmm3, xword[A1-0x70]);
+	add(A1, LDA);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm2);
+	punpckhbw(xmm4, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm4);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm11, xmm6);
+	movdqu(xword[B-0x70], xmm4);
+	movdqa(xmm4, xmm1);
+	punpcklbw(xmm1, xmm3);
+	punpckhbw(xmm4, xmm3);
+	pmovsxbw(xmm5, xmm1);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm12, xmm5);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm13, xmm6);
+	movdqu(xword[B-0x60], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm15, xmm6);
+	movdqu(xword[B-0x50], xmm4);
+	sub(B, -64);
+	align(4);
+
+L(l7b4);
+	test(M, 0x1);
+	jle(l850, T_NEAR);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1-0x70]);
+	add(A1, LDA);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm8, xmm5);
+	pshufd(xmm6, xmm0, 0x55);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	pshufd(xmm5, xmm0, 0xaa);
+	pmovsxbd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	pshufd(xmm6, xmm0, 0xff);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm11, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbd(xmm5, xmm1);
+	paddd(xmm12, xmm5);
+	pshufd(xmm6, xmm1, 0x55);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm13, xmm6);
+	pshufd(xmm5, xmm1, 0xaa);
+	pmovsxbd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	pshufd(xmm6, xmm1, 0xff);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm15, xmm6);
+	movdqu(xword[B-0x70], xmm1);
+	sub(B, -32);
+	align(4);
+
+L(l850);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm8);
+	movdqu(xword[A1+0x10], xmm9);
+	movdqu(xword[A1+0x20], xmm10);
+	movdqu(xword[A1+0x30], xmm11);
+	movdqu(xword[A1+0x40], xmm12);
+	movdqu(xword[A1+0x50], xmm13);
+	movdqu(xword[A1+0x60], xmm14);
+	movdqu(xword[A1+0x70], xmm15);
+	add(qword[ARG_BIAS], 0x80);
+	sub(N, 0x20);
+	cmp(N, 0x20);
+	jge(l48c, T_NEAR);
+	align(4);
+
+L(l89c);
+	cmp(N, 0x10);
+	jl(lae8, T_NEAR);
+	align(4);
+
+L(l8a8);
+	mov(A1, A);
+	add(A, 0x10);
+	pxor(xmm8, xmm8);
+	pxor(xmm9, xmm9);
+	pxor(xmm10, xmm10);
+	pxor(xmm11, xmm11);
+	mov(I, M);
+	sar(I, 0x2);
+	jle(l9d0, T_NEAR);
+	align(4);
+
+L(l8d0);
+	movdqu(xmm0, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqu(xmm1, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqu(xmm2, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqu(xmm3, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqa(xmm4, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm4, xmm1);
+	movdqa(xmm1, xmm2);
+	punpcklbw(xmm2, xmm3);
+	punpckhbw(xmm1, xmm3);
+	movdqa(xmm3, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm3, xmm2);
+	movdqa(xmm2, xmm4);
+	punpcklwd(xmm4, xmm1);
+	punpckhwd(xmm2, xmm1);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm3);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	pmovsxbw(xmm5, xmm2);
+	movhlps(xmm6, xmm2);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B-0x60], xmm4);
+	movdqu(xword[B-0x50], xmm2);
+	sub(B, -64);
+	dec(I);
+	jg(l8d0, T_NEAR);
+	align(4);
+
+L(l9d0);
+	test(M, 0x2);
+	jle(la64, T_NEAR);
+	movdqu(xmm0, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqu(xmm1, xword[A1-0x80]);
+	add(A1, LDA);
+	movdqa(xmm2, xmm0);
+	punpcklbw(xmm0, xmm1);
+	punpckhbw(xmm2, xmm1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	pmovsxbw(xmm5, xmm2);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movhlps(xmm6, xmm2);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm11, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm2);
+	sub(B, -32);
+	align(4);
+
+L(la64);
+	test(M, 0x1);
+	jle(lab8, T_NEAR);
+	movdqu(xmm0, xword[A1-0x80]);
+	add(A1, LDA);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm8, xmm5);
+	pshufd(xmm6, xmm0, 0x55);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	pshufd(xmm5, xmm0, 0xaa);
+	pmovsxbd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	pshufd(xmm6, xmm0, 0xff);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm11, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(lab8);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm8);
+	movdqu(xword[A1+0x10], xmm9);
+	movdqu(xword[A1+0x20], xmm10);
+	movdqu(xword[A1+0x30], xmm11);
+	add(qword[ARG_BIAS], 0x40);
+	sub(N, 0x10);
+	cmp(N, 0x10);
+	jge(l8a8, T_NEAR);
+	align(4);
+
+L(lae8);
+	cmp(N, 0x8);
+	jl(ld78, T_NEAR);
+	align(4);
+
+L(laf4);
+	mov(A1, A);
+	add(A, 0x8);
+	pxor(xmm8, xmm8);
+	pxor(xmm9, xmm9);
+	mov(I, M);
+	sar(I, 0x3);
+	jle(lc30, T_NEAR);
+	align(4);
+
+L(lb14);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	movdqu(xword[B-0x50], xmm1);
+	sub(B, -64);
+	dec(I);
+	jg(lb14, T_NEAR);
+	align(4);
+
+L(lc30);
+	test(M, 0x4);
+	jle(lcc8, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	sub(B, -32);
+	align(4);
+
+L(lcc8);
+	test(M, 0x2);
+	jle(ld1c, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(ld1c);
+	test(M, 0x1);
+	jle(ld54, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	pmovsxbd(xmm5, xmm0);
+	pshufd(xmm6, xmm0, 0x55);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm8, xmm5);
+	paddd(xmm9, xmm6);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(ld54);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm8);
+	movdqu(xword[A1+0x10], xmm9);
+	add(qword[ARG_BIAS], 0x20);
+	sub(N, 0x8);
+	cmp(N, 0x8);
+	jge(laf4, T_NEAR);
+	align(4);
+
+L(ld78);
+	cmp(N, 0x4);
+	jl(lf3c, T_NEAR);
+	align(4);
+
+L(ld84);
+	mov(A1, A);
+	add(A, 0x4);
+	pxor(xmm7, xmm7);
+	mov(I, M);
+	sar(I, 0x3);
+	jle(le58, T_NEAR);
+	align(4);
+
+L(ld9c);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	sub(B, -32);
+	dec(I);
+	jg(ld9c, T_NEAR);
+	align(4);
+
+L(le58);
+	test(M, 0x4);
+	jle(lebc, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(lebc);
+	test(M, 0x2);
+	jle(lef8, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(lef8);
+	test(M, 0x1);
+	jle(lf1c, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(lf1c);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x10);
+	sub(N, 0x4);
+	cmp(N, 0x4);
+	jge(ld84, T_NEAR);
+	align(4);
+
+L(lf3c);
+	cmp(N, 0x2);
+	jl(l111a, T_NEAR);
+	align(4);
+
+L(lf48);
+	mov(A1, A);
+	add(A, 0x2);
+	pxor(xmm7, xmm7);
+	mov(LDA3, M);
+	sar(LDA3, 0x3);
+	jle(l1024, T_NEAR);
+	align(4);
+
+L(lf60);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm4, eax, 0x0);
+	punpcklbw(xmm1, xmm2);
+	punpcklbw(xmm3, xmm4);
+	punpcklwd(xmm1, xmm3);
+	punpcklqdq(xmm0, xmm1);
+	pshufd(xmm6, xmm0, 0xd8);
+	pmovsxbw(xmm5, xmm6);
+	movhlps(xmm6, xmm6);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	dec(LDA3);
+	jg(lf60, T_NEAR);
+	align(4);
+
+L(l1024);
+	test(M, 0x4);
+	jle(l1090, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l1090);
+	test(M, 0x2);
+	jle(l10d4, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l10d4);
+	test(M, 0x1);
+	jle(l10fc, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	mov(word[B-0x80], ax);
+	sub(B, -2);
+	align(4);
+
+L(l10fc);
+	mov(A1, qword[ARG_BIAS]);
+	movq(qword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x8);
+	sub(N, 0x2);
+	cmp(N, 0x2);
+	jge(lf48, T_NEAR);
+	align(4);
+
+L(l111a);
+	cmp(N, 0x1);
+	jl(l12bc, T_NEAR);
+	align(4);
+
+L(l1124);
+	mov(A1, A);
+	add(A, 0x1);
+	pxor(xmm7, xmm7);
+	mov(LDA3, M);
+	sar(LDA3, 0x3);
+	jle(l11d4, T_NEAR);
+	align(4);
+
+L(l113c);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x7);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	dec(LDA3);
+	jg(l113c, T_NEAR);
+	align(4);
+
+L(l11d4);
+	test(M, 0x4);
+	jle(l1234, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x3);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l1234);
+	test(M, 0x2);
+	jle(l1278, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x0);
+	mov(byte[B-0x80], al);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	mov(byte[B-0x7f], al);
+	sub(B, -2);
+	align(4);
+
+L(l1278);
+	test(M, 0x1);
+	jle(l129c, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	mov(byte[B-0x80], al);
+	sub(B, -1);
+	align(4);
+
+L(l129c);
+	mov(A1, qword[ARG_BIAS]);
+	movd(dword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x4);
+	sub(N, 0x1);
+	cmp(N, 0x1);
+	jge(l1124, T_NEAR);
+	align(4);
+
+L(l12bc);
+
+	postamble();
+}
+outLocalLabel();
+
+#undef M
+#undef N
+#undef A
+#undef LDA
+#undef ALPHA
+#undef B
+#undef I
+#undef A1
+#undef A2
+#undef LDA3
+#ifdef _WIN32
+#undef ARG_ALPHA
+#undef ARG_B
+#endif
+#undef ARG_BIAS
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_at_kern.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_at_kern.cpp
new file mode 100644
index 000000000..a4f4ff09c
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_at_kern.cpp
@@ -0,0 +1,3163 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "jit_generator.hpp"
+#include "common.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+jit_avx512_core_u8_copy_sum_at_kern::jit_avx512_core_u8_copy_sum_at_kern(): jit_generator(nullptr, GEMM_CODE_SIZE)
+{
+
+#ifndef _WIN32
+#define M	rdi
+#define N	rsi
+#define A	rdx
+#define LDA	rcx
+#define ALPHA	r8
+#define B	r9
+
+#define I	rax
+#define A1	r10
+#define A2	r8
+#define LDA3	r11
+
+#define ARG_BIAS	24+stacksize+rsp
+
+#else
+
+#define M	rcx
+#define N	rdx
+#define A	r8
+#define LDA	r9
+#define ALPHA	rax
+#define B	rdi
+
+#define I	rax
+#define A1	rsi
+#define A2	r10
+#define LDA3	r11
+
+#define ARG_ALPHA	40+stacksize+rsp
+#define ARG_B		48+stacksize+rsp
+#define ARG_BIAS	72+stacksize+rsp
+
+#endif
+
+inLocalLabel();
+{
+
+Xbyak::Label l1750;
+Xbyak::Label l1b6c;
+Xbyak::Label l1e14;
+Xbyak::Label l20;
+Xbyak::Label l2068;
+Xbyak::Label l226c;
+Xbyak::Label l22b8;
+Xbyak::Label l22c4;
+Xbyak::Label l22f4;
+Xbyak::Label l26b4;
+Xbyak::Label l28cc;
+Xbyak::Label l2a2c;
+Xbyak::Label l2b5c;
+Xbyak::Label l2c64;
+Xbyak::Label l2c94;
+Xbyak::Label l2ca0;
+Xbyak::Label l2cc8;
+Xbyak::Label l2eac;
+Xbyak::Label l2fc0;
+Xbyak::Label l3078;
+Xbyak::Label l3118;
+Xbyak::Label l319c;
+Xbyak::Label l31c0;
+Xbyak::Label l31cc;
+Xbyak::Label l31ec;
+Xbyak::Label l32e4;
+Xbyak::Label l3378;
+Xbyak::Label l33dc;
+Xbyak::Label l3434;
+Xbyak::Label l347c;
+Xbyak::Label l349c;
+Xbyak::Label l34a8;
+Xbyak::Label l34c8;
+Xbyak::Label l3558;
+Xbyak::Label l35b0;
+Xbyak::Label l35f4;
+Xbyak::Label l3638;
+Xbyak::Label l366c;
+Xbyak::Label l368a;
+Xbyak::Label l3694;
+Xbyak::Label l36a8;
+Xbyak::Label l36ec;
+Xbyak::Label l3728;
+Xbyak::Label l3760;
+Xbyak::Label l3794;
+Xbyak::Label l37b8;
+Xbyak::Label l37d8;
+Xbyak::Label l5cc;
+Xbyak::Label l6c;
+Xbyak::Label l968;
+Xbyak::Label lc80;
+Xbyak::Label lf1c;
+Xbyak::Label lf64;
+Xbyak::Label lf70;
+Xbyak::Label lfb4;
+
+	preamble();
+	auto stacksize = get_size_of_abi_save_regs();
+#ifdef _WIN32
+	mov(ALPHA, ptr[ARG_ALPHA]);
+	mov(B, ptr[ARG_B]);
+#endif
+
+	mov(N, qword[N]);
+	mov(M, qword[M]);
+	mov(LDA, qword[LDA]);
+	sub(A, -128);
+	sub(B, -128);
+	lea(LDA3, ptr[LDA+LDA*2]);
+	cmp(N, 0x30);
+	jl(lf64, T_NEAR);
+	align(4);
+
+L(l20);
+	mov(A1, A);
+	mov(I, LDA);
+	shl(I, 0x5);
+	lea(I, ptr[I+LDA*8]);
+	lea(I, ptr[I+LDA*8]);
+	add(A, I);
+	vxorps(ymm8, ymm8, ymm8);
+	vxorps(ymm9, ymm9, ymm9);
+	vxorps(ymm10, ymm10, ymm10);
+	vxorps(ymm11, ymm11, ymm11);
+	vxorps(ymm12, ymm12, ymm12);
+	vxorps(ymm13, ymm13, ymm13);
+	vxorps(ymm14, ymm14, ymm14);
+	vxorps(ymm15, ymm15, ymm15);
+	mov(I, M);
+	sar(I, 0x3);
+	jle(l5cc, T_NEAR);
+	align(4);
+
+L(l6c);
+	vmovq(xmm0, qword[A1-0x80]);
+	vmovq(xmm1, qword[A1+LDA*1-0x80]);
+	vmovq(xmm2, qword[A1+LDA*2-0x80]);
+	vmovq(xmm3, qword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	vpunpckldq(xmm1, xmm0, xmm1);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm1, xmm3);
+	vpunpckhqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B-0x80], xmm0);
+	vmovdqu(xword[B+0x40], xmm1);
+	vmovq(xmm2, qword[A2-0x80]);
+	vmovq(xmm3, qword[A2+LDA*1-0x80]);
+	vmovq(xmm4, qword[A2+LDA*2-0x80]);
+	vmovq(xmm5, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpckldq(xmm5, xmm4, xmm5);
+	vpunpcklqdq(xmm2, xmm3, xmm5);
+	vpunpckhqdq(xmm3, xmm3, xmm5);
+	vmovdqu(xword[B-0x70], xmm2);
+	vmovdqu(xword[B+0x50], xmm3);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm2);
+	vmovhlps(xmm7, xmm2, xmm2);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm8, ymm8, ymm5);
+	vpmovsxbw(ymm5, xmm1);
+	vmovhlps(xmm6, xmm1, xmm1);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm3);
+	vmovhlps(xmm7, xmm3, xmm3);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm8, ymm8, ymm5);
+	vmovq(xmm0, qword[A2-0x80]);
+	vmovq(xmm1, qword[A2+LDA*1-0x80]);
+	vmovq(xmm2, qword[A2+LDA*2-0x80]);
+	vmovq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm0, xmm1);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm1, xmm3);
+	vpunpckhqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B-0x60], xmm0);
+	vmovdqu(xword[B+0x60], xmm1);
+	vmovq(xmm2, qword[A2-0x80]);
+	vmovq(xmm3, qword[A2+LDA*1-0x80]);
+	vmovq(xmm4, qword[A2+LDA*2-0x80]);
+	vmovq(xmm5, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpckldq(xmm5, xmm4, xmm5);
+	vpunpcklqdq(xmm2, xmm3, xmm5);
+	vpunpckhqdq(xmm3, xmm3, xmm5);
+	vmovdqu(xword[B-0x50], xmm2);
+	vmovdqu(xword[B+0x70], xmm3);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm2);
+	vmovhlps(xmm7, xmm2, xmm2);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm9, ymm9, ymm5);
+	vpmovsxbw(ymm5, xmm1);
+	vmovhlps(xmm6, xmm1, xmm1);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm3);
+	vmovhlps(xmm7, xmm3, xmm3);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm9, ymm9, ymm5);
+	vmovq(xmm0, qword[A2-0x80]);
+	vmovq(xmm1, qword[A2+LDA*1-0x80]);
+	vmovq(xmm2, qword[A2+LDA*2-0x80]);
+	vmovq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm0, xmm1);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm1, xmm3);
+	vpunpckhqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B-0x40], xmm0);
+	vmovdqu(xword[B+0x80], xmm1);
+	vmovq(xmm2, qword[A2-0x80]);
+	vmovq(xmm3, qword[A2+LDA*1-0x80]);
+	vmovq(xmm4, qword[A2+LDA*2-0x80]);
+	vmovq(xmm5, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpckldq(xmm5, xmm4, xmm5);
+	vpunpcklqdq(xmm2, xmm3, xmm5);
+	vpunpckhqdq(xmm3, xmm3, xmm5);
+	vmovdqu(xword[B-0x30], xmm2);
+	vmovdqu(xword[B+0x90], xmm3);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm2);
+	vmovhlps(xmm7, xmm2, xmm2);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm10, ymm10, ymm5);
+	vpmovsxbw(ymm5, xmm1);
+	vmovhlps(xmm6, xmm1, xmm1);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm3);
+	vmovhlps(xmm7, xmm3, xmm3);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm10, ymm10, ymm5);
+	vmovq(xmm0, qword[A2-0x80]);
+	vmovq(xmm1, qword[A2+LDA*1-0x80]);
+	vmovq(xmm2, qword[A2+LDA*2-0x80]);
+	vmovq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm0, xmm1);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm1, xmm3);
+	vpunpckhqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B-0x20], xmm0);
+	vmovdqu(xword[B+0xa0], xmm1);
+	vmovq(xmm2, qword[A2-0x80]);
+	vmovq(xmm3, qword[A2+LDA*1-0x80]);
+	vmovq(xmm4, qword[A2+LDA*2-0x80]);
+	vmovq(xmm5, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpckldq(xmm5, xmm4, xmm5);
+	vpunpcklqdq(xmm2, xmm3, xmm5);
+	vpunpckhqdq(xmm3, xmm3, xmm5);
+	vmovdqu(xword[B-0x10], xmm2);
+	vmovdqu(xword[B+0xb0], xmm3);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm2);
+	vmovhlps(xmm7, xmm2, xmm2);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm11, ymm11, ymm5);
+	vpmovsxbw(ymm5, xmm1);
+	vmovhlps(xmm6, xmm1, xmm1);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm3);
+	vmovhlps(xmm7, xmm3, xmm3);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm11, ymm11, ymm5);
+	vmovq(xmm0, qword[A2-0x80]);
+	vmovq(xmm1, qword[A2+LDA*1-0x80]);
+	vmovq(xmm2, qword[A2+LDA*2-0x80]);
+	vmovq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm0, xmm1);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm1, xmm3);
+	vpunpckhqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B], xmm0);
+	vmovdqu(xword[B+0xc0], xmm1);
+	vmovq(xmm2, qword[A2-0x80]);
+	vmovq(xmm3, qword[A2+LDA*1-0x80]);
+	vmovq(xmm4, qword[A2+LDA*2-0x80]);
+	vmovq(xmm5, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpckldq(xmm5, xmm4, xmm5);
+	vpunpcklqdq(xmm2, xmm3, xmm5);
+	vpunpckhqdq(xmm3, xmm3, xmm5);
+	vmovdqu(xword[B+0x10], xmm2);
+	vmovdqu(xword[B+0xd0], xmm3);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm2);
+	vmovhlps(xmm7, xmm2, xmm2);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm12, ymm12, ymm5);
+	vpmovsxbw(ymm5, xmm1);
+	vmovhlps(xmm6, xmm1, xmm1);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm3);
+	vmovhlps(xmm7, xmm3, xmm3);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm12, ymm12, ymm5);
+	vmovq(xmm0, qword[A2-0x80]);
+	vmovq(xmm1, qword[A2+LDA*1-0x80]);
+	vmovq(xmm2, qword[A2+LDA*2-0x80]);
+	vmovq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm0, xmm1);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm1, xmm3);
+	vpunpckhqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B+0x20], xmm0);
+	vmovdqu(xword[B+0xe0], xmm1);
+	vmovq(xmm2, qword[A2-0x80]);
+	vmovq(xmm3, qword[A2+LDA*1-0x80]);
+	vmovq(xmm4, qword[A2+LDA*2-0x80]);
+	vmovq(xmm5, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm3, xmm2, xmm3);
+	vpunpckldq(xmm5, xmm4, xmm5);
+	vpunpcklqdq(xmm2, xmm3, xmm5);
+	vpunpckhqdq(xmm3, xmm3, xmm5);
+	vmovdqu(xword[B+0x30], xmm2);
+	vmovdqu(xword[B+0xf0], xmm3);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm2);
+	vmovhlps(xmm7, xmm2, xmm2);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm13, ymm13, ymm5);
+	vpmovsxbw(ymm5, xmm1);
+	vmovhlps(xmm6, xmm1, xmm1);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm3);
+	vmovhlps(xmm7, xmm3, xmm3);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm13, ymm13, ymm5);
+	sub(A1, -8);
+	sub(B, -384);
+	dec(I);
+	jg(l6c, T_NEAR);
+	align(4);
+
+L(l5cc);
+	test(M, 0x4);
+	jle(l968, T_NEAR);
+	vmovd(xmm0, dword[A1-0x80]);
+	vmovd(xmm1, dword[A1+LDA*1-0x80]);
+	vmovd(xmm2, dword[A1+LDA*2-0x80]);
+	vmovd(xmm3, dword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	vpunpckldq(xmm0, xmm0, xmm1);
+	vpunpckldq(xmm2, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm0, xmm2);
+	vmovdqu(xword[B-0x80], xmm0);
+	vmovd(xmm1, dword[A2-0x80]);
+	vmovd(xmm2, dword[A2+LDA*1-0x80]);
+	vmovd(xmm3, dword[A2+LDA*2-0x80]);
+	vmovd(xmm4, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm1, xmm2);
+	vpunpckldq(xmm3, xmm3, xmm4);
+	vpunpcklqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B-0x70], xmm1);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm1);
+	vmovhlps(xmm7, xmm1, xmm1);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm8, ymm8, ymm5);
+	vmovd(xmm0, dword[A2-0x80]);
+	vmovd(xmm1, dword[A2+LDA*1-0x80]);
+	vmovd(xmm2, dword[A2+LDA*2-0x80]);
+	vmovd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm0, xmm0, xmm1);
+	vpunpckldq(xmm2, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm0, xmm2);
+	vmovdqu(xword[B-0x60], xmm0);
+	vmovd(xmm1, dword[A2-0x80]);
+	vmovd(xmm2, dword[A2+LDA*1-0x80]);
+	vmovd(xmm3, dword[A2+LDA*2-0x80]);
+	vmovd(xmm4, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm1, xmm2);
+	vpunpckldq(xmm3, xmm3, xmm4);
+	vpunpcklqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B-0x50], xmm1);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm1);
+	vmovhlps(xmm7, xmm1, xmm1);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm9, ymm9, ymm5);
+	vmovd(xmm0, dword[A2-0x80]);
+	vmovd(xmm1, dword[A2+LDA*1-0x80]);
+	vmovd(xmm2, dword[A2+LDA*2-0x80]);
+	vmovd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm0, xmm0, xmm1);
+	vpunpckldq(xmm2, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm0, xmm2);
+	vmovdqu(xword[B-0x40], xmm0);
+	vmovd(xmm1, dword[A2-0x80]);
+	vmovd(xmm2, dword[A2+LDA*1-0x80]);
+	vmovd(xmm3, dword[A2+LDA*2-0x80]);
+	vmovd(xmm4, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm1, xmm2);
+	vpunpckldq(xmm3, xmm3, xmm4);
+	vpunpcklqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B-0x30], xmm1);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm1);
+	vmovhlps(xmm7, xmm1, xmm1);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm10, ymm10, ymm5);
+	vmovd(xmm0, dword[A2-0x80]);
+	vmovd(xmm1, dword[A2+LDA*1-0x80]);
+	vmovd(xmm2, dword[A2+LDA*2-0x80]);
+	vmovd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm0, xmm0, xmm1);
+	vpunpckldq(xmm2, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm0, xmm2);
+	vmovdqu(xword[B-0x20], xmm0);
+	vmovd(xmm1, dword[A2-0x80]);
+	vmovd(xmm2, dword[A2+LDA*1-0x80]);
+	vmovd(xmm3, dword[A2+LDA*2-0x80]);
+	vmovd(xmm4, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm1, xmm2);
+	vpunpckldq(xmm3, xmm3, xmm4);
+	vpunpcklqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B-0x10], xmm1);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm1);
+	vmovhlps(xmm7, xmm1, xmm1);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm11, ymm11, ymm5);
+	vmovd(xmm0, dword[A2-0x80]);
+	vmovd(xmm1, dword[A2+LDA*1-0x80]);
+	vmovd(xmm2, dword[A2+LDA*2-0x80]);
+	vmovd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm0, xmm0, xmm1);
+	vpunpckldq(xmm2, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm0, xmm2);
+	vmovdqu(xword[B], xmm0);
+	vmovd(xmm1, dword[A2-0x80]);
+	vmovd(xmm2, dword[A2+LDA*1-0x80]);
+	vmovd(xmm3, dword[A2+LDA*2-0x80]);
+	vmovd(xmm4, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm1, xmm2);
+	vpunpckldq(xmm3, xmm3, xmm4);
+	vpunpcklqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B+0x10], xmm1);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm1);
+	vmovhlps(xmm7, xmm1, xmm1);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm12, ymm12, ymm5);
+	vmovd(xmm0, dword[A2-0x80]);
+	vmovd(xmm1, dword[A2+LDA*1-0x80]);
+	vmovd(xmm2, dword[A2+LDA*2-0x80]);
+	vmovd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm0, xmm0, xmm1);
+	vpunpckldq(xmm2, xmm2, xmm3);
+	vpunpcklqdq(xmm0, xmm0, xmm2);
+	vmovdqu(xword[B+0x20], xmm0);
+	vmovd(xmm1, dword[A2-0x80]);
+	vmovd(xmm2, dword[A2+LDA*1-0x80]);
+	vmovd(xmm3, dword[A2+LDA*2-0x80]);
+	vmovd(xmm4, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpunpckldq(xmm1, xmm1, xmm2);
+	vpunpckldq(xmm3, xmm3, xmm4);
+	vpunpcklqdq(xmm1, xmm1, xmm3);
+	vmovdqu(xword[B+0x30], xmm1);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxbw(ymm6, xmm1);
+	vmovhlps(xmm7, xmm1, xmm1);
+	vpmovsxbw(ymm7, xmm7);
+	vphaddw(ymm6, ymm6, ymm7);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm13, ymm13, ymm5);
+	sub(A1, -4);
+	sub(B, -192);
+	align(4);
+
+L(l968);
+	test(M, 0x2);
+	jle(lc80, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x1);
+	mov(ax, word[A1+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x2);
+	mov(ax, word[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	vpinsrw(xmm0, xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrw(xmm0, xmm0, eax, 0x7);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm8, ymm8, ymm5);
+	vmovdqu(xword[B-0x80], xmm0);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrw(xmm0, xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm9, ymm9, ymm5);
+	vmovdqu(xword[B-0x70], xmm0);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrw(xmm0, xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm10, ymm10, ymm5);
+	vmovdqu(xword[B-0x60], xmm0);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrw(xmm0, xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm11, ymm11, ymm5);
+	vmovdqu(xword[B-0x50], xmm0);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrw(xmm0, xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm12, ymm12, ymm5);
+	vmovdqu(xword[B-0x40], xmm0);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrw(xmm0, xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	vpinsrw(xmm0, xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	vpmovsxbw(ymm5, xmm0);
+	vmovhlps(xmm6, xmm0, xmm0);
+	vpmovsxbw(ymm6, xmm6);
+	vphaddw(ymm5, ymm5, ymm6);
+	vpmovsxwd(ymm5, xmm5);
+	vpaddd(ymm13, ymm13, ymm5);
+	vmovdqu(xword[B-0x30], xmm0);
+	sub(A1, -2);
+	sub(B, -96);
+	align(4);
+
+L(lc80);
+	test(M, 0x1);
+	jle(lf1c, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x1);
+	mov(al, byte[A1+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x2);
+	mov(al, byte[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0xf);
+	vpmovsxbd(ymm7, xmm0);
+	vpaddd(ymm8, ymm8, ymm7);
+	vmovhlps(xmm7, xmm0, xmm0);
+	vpmovsxbd(ymm7, xmm7);
+	vpaddd(ymm9, ymm9, ymm7);
+	vmovdqu(xword[B-0x80], xmm0);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x0);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x1);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x2);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0xf);
+	vpmovsxbd(ymm7, xmm0);
+	vpaddd(ymm10, ymm10, ymm7);
+	vmovhlps(xmm7, xmm0, xmm0);
+	vpmovsxbd(ymm7, xmm7);
+	vpaddd(ymm11, ymm11, ymm7);
+	vmovdqu(xword[B-0x70], xmm0);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x0);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x1);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x2);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	vpinsrb(xmm0, xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	vpinsrb(xmm0, xmm0, eax, 0xf);
+	vpmovsxbd(ymm7, xmm0);
+	vpaddd(ymm12, ymm12, ymm7);
+	vmovhlps(xmm7, xmm0, xmm0);
+	vpmovsxbd(ymm7, xmm7);
+	vpaddd(ymm13, ymm13, ymm7);
+	vmovdqu(xword[B-0x60], xmm0);
+	sub(B, -48);
+	align(4);
+
+L(lf1c);
+	mov(A1, qword[ARG_BIAS]);
+	vmovdqu(yword[A1], ymm8);
+	vmovdqu(yword[A1+0x20], ymm9);
+	vmovdqu(yword[A1+0x40], ymm10);
+	vmovdqu(yword[A1+0x60], ymm11);
+	vmovdqu(yword[A1+0x80], ymm12);
+	vmovdqu(yword[A1+0xa0], ymm13);
+	add(qword[ARG_BIAS], 0xc0);
+	sub(N, 0x30);
+	cmp(N, 0x30);
+	jge(l20, T_NEAR);
+	vzeroupper();
+	align(4);
+
+L(lf64);
+	cmp(N, 0x20);
+	jl(l22b8, T_NEAR);
+	align(4);
+
+L(lf70);
+	mov(A1, A);
+	mov(I, LDA);
+	shl(I, 0x5);
+	add(A, I);
+	pxor(xmm8, xmm8);
+	pxor(xmm9, xmm9);
+	pxor(xmm10, xmm10);
+	pxor(xmm11, xmm11);
+	pxor(xmm12, xmm12);
+	pxor(xmm13, xmm13);
+	pxor(xmm14, xmm14);
+	pxor(xmm15, xmm15);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l1750, T_NEAR);
+	align(4);
+
+L(lfb4);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B+0x80], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B+0x100], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B+0x10], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B+0x90], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B+0x110], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B+0x20], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B+0xa0], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B+0x120], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B-0x50], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B+0x30], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B+0xb0], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B+0x130], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm12, xmm5);
+	movdqu(xword[B-0x40], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm12, xmm5);
+	movdqu(xword[B+0x40], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm12, xmm5);
+	movdqu(xword[B+0xc0], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm12, xmm5);
+	movdqu(xword[B+0x140], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm13, xmm5);
+	movdqu(xword[B-0x30], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm13, xmm5);
+	movdqu(xword[B+0x50], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm13, xmm5);
+	movdqu(xword[B+0xd0], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm13, xmm5);
+	movdqu(xword[B+0x150], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	movdqu(xword[B-0x20], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	movdqu(xword[B+0x60], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	movdqu(xword[B+0xe0], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	movdqu(xword[B+0x160], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm15, xmm5);
+	movdqu(xword[B-0x10], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm15, xmm5);
+	movdqu(xword[B+0x70], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm15, xmm5);
+	movdqu(xword[B+0xf0], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm15, xmm5);
+	movdqu(xword[B+0x170], xmm3);
+	sub(A1, -16);
+	sub(B, -512);
+	dec(I);
+	jg(lfb4, T_NEAR);
+	align(4);
+
+L(l1750);
+	test(M, 0x8);
+	jle(l1b6c, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	movq(xmm2, qword[A1+LDA*2-0x80]);
+	movq(xmm3, qword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B+0x10], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B+0x20], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B-0x50], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B+0x30], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm12, xmm5);
+	movdqu(xword[B-0x40], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm12, xmm5);
+	movdqu(xword[B+0x40], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm13, xmm5);
+	movdqu(xword[B-0x30], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm13, xmm5);
+	movdqu(xword[B+0x50], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	movdqu(xword[B-0x20], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	movdqu(xword[B+0x60], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm15, xmm5);
+	movdqu(xword[B-0x10], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm15, xmm5);
+	movdqu(xword[B+0x70], xmm1);
+	sub(A1, -8);
+	sub(B, -256);
+	align(4);
+
+L(l1b6c);
+	test(M, 0x4);
+	jle(l1e14, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	movd(xmm2, dword[A1+LDA*2-0x80]);
+	movd(xmm3, dword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B-0x50], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm12, xmm5);
+	movdqu(xword[B-0x40], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm13, xmm5);
+	movdqu(xword[B-0x30], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	movdqu(xword[B-0x20], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm15, xmm5);
+	movdqu(xword[B-0x10], xmm0);
+	sub(A1, -4);
+	sub(B, -128);
+	align(4);
+
+L(l1e14);
+	test(M, 0x2);
+	jle(l2068, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A1+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x7);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm11, xmm6);
+	movdqu(xword[B-0x70], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm12, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm13, xmm6);
+	movdqu(xword[B-0x60], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	lea(A2, ptr[A2+LDA*4]);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm15, xmm6);
+	movdqu(xword[B-0x50], xmm0);
+	sub(A1, -2);
+	sub(B, -64);
+	align(4);
+
+L(l2068);
+	test(M, 0x1);
+	jle(l226c, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xf);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm8, xmm5);
+	pshufd(xmm6, xmm0, 0x55);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	pshufd(xmm5, xmm0, 0xaa);
+	pmovsxbd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	pshufd(xmm6, xmm0, 0xff);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm11, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xf);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm12, xmm5);
+	pshufd(xmm6, xmm0, 0x55);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm13, xmm6);
+	pshufd(xmm5, xmm0, 0xaa);
+	pmovsxbd(xmm5, xmm5);
+	paddd(xmm14, xmm5);
+	pshufd(xmm6, xmm0, 0xff);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm15, xmm6);
+	movdqu(xword[B-0x70], xmm0);
+	sub(B, -32);
+	align(4);
+
+L(l226c);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm8);
+	movdqu(xword[A1+0x10], xmm9);
+	movdqu(xword[A1+0x20], xmm10);
+	movdqu(xword[A1+0x30], xmm11);
+	movdqu(xword[A1+0x40], xmm12);
+	movdqu(xword[A1+0x50], xmm13);
+	movdqu(xword[A1+0x60], xmm14);
+	movdqu(xword[A1+0x70], xmm15);
+	add(qword[ARG_BIAS], 0x80);
+	sub(N, 0x20);
+	cmp(N, 0x20);
+	jge(lf70, T_NEAR);
+	align(4);
+
+L(l22b8);
+	cmp(N, 0x10);
+	jl(l2c94, T_NEAR);
+	align(4);
+
+L(l22c4);
+	mov(A1, A);
+	mov(I, LDA);
+	shl(I, 0x4);
+	add(A, I);
+	pxor(xmm8, xmm8);
+	pxor(xmm9, xmm9);
+	pxor(xmm10, xmm10);
+	pxor(xmm11, xmm11);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l26b4, T_NEAR);
+	align(4);
+
+L(l22f4);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x40], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B+0x40], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x30], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B+0x10], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B+0x50], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B-0x20], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B+0x20], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B+0x60], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B-0x50], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B-0x10], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B+0x30], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B+0x70], xmm3);
+	sub(A1, -16);
+	sub(B, -256);
+	dec(I);
+	jg(l22f4, T_NEAR);
+	align(4);
+
+L(l26b4);
+	test(M, 0x8);
+	jle(l28cc, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	movq(xmm2, qword[A1+LDA*2-0x80]);
+	movq(xmm3, qword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x40], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x30], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B-0x20], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B-0x50], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B-0x10], xmm1);
+	sub(A1, -8);
+	sub(B, -128);
+	align(4);
+
+L(l28cc);
+	test(M, 0x4);
+	jle(l2a2c, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	movd(xmm2, dword[A1+LDA*2-0x80]);
+	movd(xmm3, dword[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm11, xmm5);
+	movdqu(xword[B-0x50], xmm0);
+	sub(A1, -4);
+	sub(B, -64);
+	align(4);
+
+L(l2a2c);
+	test(M, 0x2);
+	jle(l2b5c, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A1+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x7);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	pinsrw(xmm0, eax, 0x7);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm11, xmm6);
+	movdqu(xword[B-0x70], xmm0);
+	sub(A1, -2);
+	sub(B, -32);
+	align(4);
+
+L(l2b5c);
+	test(M, 0x1);
+	jle(l2c64, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1+LDA3*1-0x80]);
+	lea(A2, ptr[A1+LDA*4]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0x7);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x8);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x9);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xa);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	lea(A2, ptr[A2+LDA*4]);
+	pinsrb(xmm0, eax, 0xb);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0xc);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0xd);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0xe);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	pinsrb(xmm0, eax, 0xf);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm8, xmm5);
+	pshufd(xmm6, xmm0, 0x55);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	pshufd(xmm5, xmm0, 0xaa);
+	pmovsxbd(xmm5, xmm5);
+	paddd(xmm10, xmm5);
+	pshufd(xmm6, xmm0, 0xff);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm11, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l2c64);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm8);
+	movdqu(xword[A1+0x10], xmm9);
+	movdqu(xword[A1+0x20], xmm10);
+	movdqu(xword[A1+0x30], xmm11);
+	add(qword[ARG_BIAS], 0x40);
+	sub(N, 0x10);
+	cmp(N, 0x10);
+	jge(l22c4, T_NEAR);
+	align(4);
+
+L(l2c94);
+	cmp(N, 0x8);
+	jl(l31c0, T_NEAR);
+	align(4);
+
+L(l2ca0);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*4]);
+	lea(I, ptr[A1+LDA*8]);
+	mov(A, I);
+	pxor(xmm8, xmm8);
+	pxor(xmm9, xmm9);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l2eac, T_NEAR);
+	align(4);
+
+L(l2cc8);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	sub(A1, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x60], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x40], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x20], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	sub(A2, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x50], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x30], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x10], xmm3);
+	sub(B, -128);
+	dec(I);
+	jg(l2cc8, T_NEAR);
+	align(4);
+
+L(l2eac);
+	test(M, 0x8);
+	jle(l2fc0, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	movq(xmm2, qword[A1+LDA*2-0x80]);
+	movq(xmm3, qword[A1+LDA3*1-0x80]);
+	sub(A1, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x60], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x50], xmm1);
+	sub(B, -64);
+	align(4);
+
+L(l2fc0);
+	test(M, 0x4);
+	jle(l3078, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	movd(xmm2, dword[A1+LDA*2-0x80]);
+	movd(xmm3, dword[A1+LDA3*1-0x80]);
+	sub(A1, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	sub(B, -32);
+	align(4);
+
+L(l3078);
+	test(M, 0x2);
+	jle(l3118, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A1+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A1+LDA3*1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x7);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l3118);
+	test(M, 0x1);
+	jle(l319c, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1+LDA3*1-0x80]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	pinsrb(xmm0, eax, 0x7);
+	pmovsxbd(xmm5, xmm0);
+	pshufd(xmm6, xmm0, 0x55);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm8, xmm5);
+	paddd(xmm9, xmm6);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l319c);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm8);
+	movdqu(xword[A1+0x10], xmm9);
+	add(qword[ARG_BIAS], 0x20);
+	sub(N, 0x8);
+	cmp(N, 0x8);
+	jge(l2ca0, T_NEAR);
+	align(4);
+
+L(l31c0);
+	cmp(N, 0x4);
+	jl(l349c, T_NEAR);
+	align(4);
+
+L(l31cc);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*2]);
+	lea(I, ptr[A1+LDA*4]);
+	mov(A, I);
+	pxor(xmm7, xmm7);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l32e4, T_NEAR);
+	align(4);
+
+L(l31ec);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	sub(A1, -16);
+	movdqu(xmm2, xword[A2-0x80]);
+	movdqu(xmm3, xword[A2+LDA*1-0x80]);
+	sub(A2, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x70], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x60], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x50], xmm3);
+	sub(B, -64);
+	dec(I);
+	jg(l31ec, T_NEAR);
+	align(4);
+
+L(l32e4);
+	test(M, 0x8);
+	jle(l3378, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	sub(A1, -8);
+	movq(xmm2, qword[A2-0x80]);
+	movq(xmm3, qword[A2+LDA*1-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x70], xmm1);
+	sub(B, -32);
+	align(4);
+
+L(l3378);
+	test(M, 0x4);
+	jle(l33dc, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	sub(A1, -4);
+	movd(xmm2, dword[A2-0x80]);
+	movd(xmm3, dword[A2+LDA*1-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l33dc);
+	test(M, 0x2);
+	jle(l3434, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA*1-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x3);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l3434);
+	test(M, 0x1);
+	jle(l347c, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x3);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l347c);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x10);
+	sub(N, 0x4);
+	cmp(N, 0x4);
+	jge(l31cc, T_NEAR);
+	align(4);
+
+L(l349c);
+	cmp(N, 0x2);
+	jl(l368a, T_NEAR);
+	align(4);
+
+L(l34a8);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*1]);
+	lea(I, ptr[A1+LDA*2]);
+	mov(A, I);
+	pxor(xmm7, xmm7);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l3558, T_NEAR);
+	align(4);
+
+L(l34c8);
+	movdqu(xmm0, xword[A1-0x80]);
+	sub(A1, -16);
+	movdqu(xmm1, xword[A2-0x80]);
+	sub(A2, -16);
+	movdqa(xmm2, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm2, xmm1);
+	pshufd(xmm6, xmm0, 0xd8);
+	pmovsxbw(xmm5, xmm6);
+	movhlps(xmm6, xmm6);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pshufd(xmm6, xmm2, 0xd8);
+	pmovsxbw(xmm5, xmm6);
+	movhlps(xmm6, xmm6);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x70], xmm2);
+	sub(B, -32);
+	dec(I);
+	jg(l34c8, T_NEAR);
+	align(4);
+
+L(l3558);
+	test(M, 0x8);
+	jle(l35b0, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	sub(A1, -8);
+	movq(xmm1, qword[A2-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	pshufd(xmm6, xmm0, 0xd8);
+	pmovsxbw(xmm5, xmm6);
+	movhlps(xmm6, xmm6);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l35b0);
+	test(M, 0x4);
+	jle(l35f4, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	sub(A1, -4);
+	movd(xmm1, dword[A2-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l35f4);
+	test(M, 0x2);
+	jle(l3638, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l3638);
+	test(M, 0x1);
+	jle(l366c, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(byte[B-0x80], al);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(byte[B-0x7f], al);
+	sub(B, -2);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	align(4);
+
+L(l366c);
+	mov(A1, qword[ARG_BIAS]);
+	movq(qword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x8);
+	sub(N, 0x2);
+	cmp(N, 0x2);
+	jge(l34a8, T_NEAR);
+	align(4);
+
+L(l368a);
+	cmp(N, 0x1);
+	jl(l37d8, T_NEAR);
+	align(4);
+
+L(l3694);
+	mov(A1, A);
+	add(A, LDA);
+	pxor(xmm7, xmm7);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l36ec, T_NEAR);
+	align(4);
+
+L(l36a8);
+	movdqu(xmm0, xword[A1-0x80]);
+	sub(A1, -16);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	dec(I);
+	jg(l36a8, T_NEAR);
+	align(4);
+
+L(l36ec);
+	test(M, 0x8);
+	jle(l3728, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	sub(A1, -8);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l3728);
+	test(M, 0x4);
+	jle(l3760, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	sub(A1, -4);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l3760);
+	test(M, 0x2);
+	jle(l3794, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	mov(word[B-0x80], ax);
+	sub(A1, -2);
+	sub(B, -2);
+	align(4);
+
+L(l3794);
+	test(M, 0x1);
+	jle(l37b8, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	mov(byte[B-0x80], al);
+	sub(B, -1);
+	align(4);
+
+L(l37b8);
+	mov(A1, qword[ARG_BIAS]);
+	movd(dword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x4);
+	sub(N, 0x1);
+	cmp(N, 0x1);
+	jge(l3694, T_NEAR);
+	align(4);
+
+L(l37d8);
+
+	postamble();
+}
+outLocalLabel();
+
+#undef M
+#undef N
+#undef A
+#undef LDA
+#undef ALPHA
+#undef B
+#undef I
+#undef A1
+#undef A2
+#undef LDA3
+#ifdef _WIN32
+#undef ARG_ALPHA
+#undef ARG_B
+#endif
+#undef ARG_BIAS
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_bn_kern.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_bn_kern.cpp
new file mode 100644
index 000000000..c7f1393c9
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_bn_kern.cpp
@@ -0,0 +1,821 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "jit_generator.hpp"
+#include "common.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+jit_avx512_core_u8_copy_sum_bn_kern::jit_avx512_core_u8_copy_sum_bn_kern(): jit_generator(nullptr, GEMM_CODE_SIZE)
+{
+
+#ifndef _WIN32
+#define M	rdi
+#define N	rsi
+#define A	rdx
+#define LDA	rcx
+#define ALPHA	r8
+#define B	r9
+
+#define I	rax
+#define A1	r10
+#define A2	r8
+#define LDA3	r11
+
+#define ARG_BIAS	24+stacksize+rsp
+
+#else
+
+#define M	rcx
+#define N	rdx
+#define A	r8
+#define LDA	r9
+#define ALPHA	rax
+#define B	rdi
+
+#define I	rax
+#define A1	rsi
+#define A2	r10
+#define LDA3	r11
+
+#define ARG_ALPHA	40+stacksize+rsp
+#define ARG_B		48+stacksize+rsp
+#define ARG_BIAS	72+stacksize+rsp
+
+#endif
+
+inLocalLabel();
+{
+
+Xbyak::Label l20;
+Xbyak::Label l22c;
+Xbyak::Label l340;
+Xbyak::Label l3f8;
+Xbyak::Label l48;
+Xbyak::Label l498;
+Xbyak::Label l51c;
+Xbyak::Label l540;
+Xbyak::Label l54c;
+Xbyak::Label l56c;
+Xbyak::Label l664;
+Xbyak::Label l6f8;
+Xbyak::Label l75c;
+Xbyak::Label l7b4;
+Xbyak::Label l7fc;
+Xbyak::Label l81c;
+Xbyak::Label l828;
+Xbyak::Label l848;
+Xbyak::Label l8d8;
+Xbyak::Label l930;
+Xbyak::Label l974;
+Xbyak::Label l9b8;
+Xbyak::Label l9ec;
+Xbyak::Label la0a;
+Xbyak::Label la14;
+Xbyak::Label la28;
+Xbyak::Label la6c;
+Xbyak::Label laa8;
+Xbyak::Label lae0;
+Xbyak::Label lb14;
+Xbyak::Label lb38;
+Xbyak::Label lb58;
+
+	preamble();
+	auto stacksize = get_size_of_abi_save_regs();
+#ifdef _WIN32
+	mov(ALPHA, ptr[ARG_ALPHA]);
+	mov(B, ptr[ARG_B]);
+#endif
+
+	mov(N, qword[N]);
+	mov(M, qword[M]);
+	mov(LDA, qword[LDA]);
+	sub(A, -128);
+	sub(B, -128);
+	lea(LDA3, ptr[LDA+LDA*2]);
+	cmp(N, 0x8);
+	jl(l540, T_NEAR);
+	align(4);
+
+L(l20);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*4]);
+	lea(I, ptr[A1+LDA*8]);
+	mov(A, I);
+	pxor(xmm8, xmm8);
+	pxor(xmm9, xmm9);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l22c, T_NEAR);
+	align(4);
+
+L(l48);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	movdqu(xmm2, xword[A1+LDA*2-0x80]);
+	movdqu(xmm3, xword[A1+LDA3*1-0x80]);
+	sub(A1, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x60], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x40], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x20], xmm3);
+	movdqu(xmm0, xword[A2-0x80]);
+	movdqu(xmm1, xword[A2+LDA*1-0x80]);
+	movdqu(xmm2, xword[A2+LDA*2-0x80]);
+	movdqu(xmm3, xword[A2+LDA3*1-0x80]);
+	sub(A2, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x50], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x30], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x10], xmm3);
+	sub(B, -128);
+	dec(I);
+	jg(l48, T_NEAR);
+	align(4);
+
+L(l22c);
+	test(M, 0x8);
+	jle(l340, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	movq(xmm2, qword[A1+LDA*2-0x80]);
+	movq(xmm3, qword[A1+LDA3*1-0x80]);
+	sub(A1, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x60], xmm1);
+	movq(xmm0, qword[A2-0x80]);
+	movq(xmm1, qword[A2+LDA*1-0x80]);
+	movq(xmm2, qword[A2+LDA*2-0x80]);
+	movq(xmm3, qword[A2+LDA3*1-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x50], xmm1);
+	sub(B, -64);
+	align(4);
+
+L(l340);
+	test(M, 0x4);
+	jle(l3f8, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	movd(xmm2, dword[A1+LDA*2-0x80]);
+	movd(xmm3, dword[A1+LDA3*1-0x80]);
+	sub(A1, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A2-0x80]);
+	movd(xmm1, dword[A2+LDA*1-0x80]);
+	movd(xmm2, dword[A2+LDA*2-0x80]);
+	movd(xmm3, dword[A2+LDA3*1-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	sub(B, -32);
+	align(4);
+
+L(l3f8);
+	test(M, 0x2);
+	jle(l498, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A1+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A1+LDA3*1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x3);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x4);
+	mov(ax, word[A2+LDA*1-0x80]);
+	pinsrw(xmm0, eax, 0x5);
+	mov(ax, word[A2+LDA*2-0x80]);
+	pinsrw(xmm0, eax, 0x6);
+	mov(ax, word[A2+LDA3*1-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x7);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l498);
+	test(M, 0x1);
+	jle(l51c, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1+LDA3*1-0x80]);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A2+LDA*2-0x80]);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A2+LDA3*1-0x80]);
+	pinsrb(xmm0, eax, 0x7);
+	pmovsxbd(xmm5, xmm0);
+	pshufd(xmm6, xmm0, 0x55);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm8, xmm5);
+	paddd(xmm9, xmm6);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l51c);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm8);
+	movdqu(xword[A1+0x10], xmm9);
+	add(qword[ARG_BIAS], 0x20);
+	sub(N, 0x8);
+	cmp(N, 0x8);
+	jge(l20, T_NEAR);
+	align(4);
+
+L(l540);
+	cmp(N, 0x4);
+	jl(l81c, T_NEAR);
+	align(4);
+
+L(l54c);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*2]);
+	lea(I, ptr[A1+LDA*4]);
+	mov(A, I);
+	pxor(xmm7, xmm7);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l664, T_NEAR);
+	align(4);
+
+L(l56c);
+	movdqu(xmm0, xword[A1-0x80]);
+	movdqu(xmm1, xword[A1+LDA*1-0x80]);
+	sub(A1, -16);
+	movdqu(xmm2, xword[A2-0x80]);
+	movdqu(xmm3, xword[A2+LDA*1-0x80]);
+	sub(A2, -16);
+	movdqa(xmm4, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm4, xmm1);
+	movdqa(xmm5, xmm2);
+	punpckldq(xmm2, xmm3);
+	punpckhdq(xmm5, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	movdqa(xmm3, xmm4);
+	punpcklqdq(xmm4, xmm5);
+	punpckhqdq(xmm3, xmm5);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x70], xmm1);
+	pmovsxbw(xmm5, xmm4);
+	movhlps(xmm6, xmm4);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x60], xmm4);
+	pmovsxbw(xmm5, xmm3);
+	movhlps(xmm6, xmm3);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x50], xmm3);
+	sub(B, -64);
+	dec(I);
+	jg(l56c, T_NEAR);
+	align(4);
+
+L(l664);
+	test(M, 0x8);
+	jle(l6f8, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	movq(xmm1, qword[A1+LDA*1-0x80]);
+	sub(A1, -8);
+	movq(xmm2, qword[A2-0x80]);
+	movq(xmm3, qword[A2+LDA*1-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklqdq(xmm0, xmm2);
+	punpckhqdq(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x70], xmm1);
+	sub(B, -32);
+	align(4);
+
+L(l6f8);
+	test(M, 0x4);
+	jle(l75c, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	movd(xmm1, dword[A1+LDA*1-0x80]);
+	sub(A1, -4);
+	movd(xmm2, dword[A2-0x80]);
+	movd(xmm3, dword[A2+LDA*1-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	punpckldq(xmm2, xmm3);
+	punpcklqdq(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l75c);
+	test(M, 0x2);
+	jle(l7b4, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1+LDA*1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x1);
+	mov(ax, word[A2-0x80]);
+	pinsrw(xmm0, eax, 0x2);
+	mov(ax, word[A2+LDA*1-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x3);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l7b4);
+	test(M, 0x1);
+	jle(l7fc, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A2+LDA*1-0x80]);
+	pinsrb(xmm0, eax, 0x3);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l7fc);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x10);
+	sub(N, 0x4);
+	cmp(N, 0x4);
+	jge(l54c, T_NEAR);
+	align(4);
+
+L(l81c);
+	cmp(N, 0x2);
+	jl(la0a, T_NEAR);
+	align(4);
+
+L(l828);
+	mov(A1, A);
+	lea(A2, ptr[A1+LDA*1]);
+	lea(I, ptr[A1+LDA*2]);
+	mov(A, I);
+	pxor(xmm7, xmm7);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(l8d8, T_NEAR);
+	align(4);
+
+L(l848);
+	movdqu(xmm0, xword[A1-0x80]);
+	sub(A1, -16);
+	movdqu(xmm1, xword[A2-0x80]);
+	sub(A2, -16);
+	movdqa(xmm2, xmm0);
+	punpckldq(xmm0, xmm1);
+	punpckhdq(xmm2, xmm1);
+	pshufd(xmm6, xmm0, 0xd8);
+	pmovsxbw(xmm5, xmm6);
+	movhlps(xmm6, xmm6);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	pshufd(xmm6, xmm2, 0xd8);
+	pmovsxbw(xmm5, xmm6);
+	movhlps(xmm6, xmm6);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x70], xmm2);
+	sub(B, -32);
+	dec(I);
+	jg(l848, T_NEAR);
+	align(4);
+
+L(l8d8);
+	test(M, 0x8);
+	jle(l930, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	sub(A1, -8);
+	movq(xmm1, qword[A2-0x80]);
+	sub(A2, -8);
+	punpckldq(xmm0, xmm1);
+	pshufd(xmm6, xmm0, 0xd8);
+	pmovsxbw(xmm5, xmm6);
+	movhlps(xmm6, xmm6);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l930);
+	test(M, 0x4);
+	jle(l974, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	sub(A1, -4);
+	movd(xmm1, dword[A2-0x80]);
+	sub(A2, -4);
+	punpckldq(xmm0, xmm1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l974);
+	test(M, 0x2);
+	jle(l9b8, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	sub(A1, -2);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A2-0x80]);
+	sub(A2, -2);
+	pinsrw(xmm0, eax, 0x1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l9b8);
+	test(M, 0x1);
+	jle(l9ec, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	mov(byte[B-0x80], al);
+	mov(al, byte[A2-0x80]);
+	pinsrb(xmm0, eax, 0x1);
+	mov(byte[B-0x7f], al);
+	sub(B, -2);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	align(4);
+
+L(l9ec);
+	mov(A1, qword[ARG_BIAS]);
+	movq(qword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x8);
+	sub(N, 0x2);
+	cmp(N, 0x2);
+	jge(l828, T_NEAR);
+	align(4);
+
+L(la0a);
+	cmp(N, 0x1);
+	jl(lb58, T_NEAR);
+	align(4);
+
+L(la14);
+	mov(A1, A);
+	add(A, LDA);
+	pxor(xmm7, xmm7);
+	mov(I, M);
+	sar(I, 0x4);
+	jle(la6c, T_NEAR);
+	align(4);
+
+L(la28);
+	movdqu(xmm0, xword[A1-0x80]);
+	sub(A1, -16);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	dec(I);
+	jg(la28, T_NEAR);
+	align(4);
+
+L(la6c);
+	test(M, 0x8);
+	jle(laa8, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	sub(A1, -8);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(laa8);
+	test(M, 0x4);
+	jle(lae0, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	sub(A1, -4);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(lae0);
+	test(M, 0x2);
+	jle(lb14, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	mov(word[B-0x80], ax);
+	sub(A1, -2);
+	sub(B, -2);
+	align(4);
+
+L(lb14);
+	test(M, 0x1);
+	jle(lb38, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrb(xmm0, eax, 0x0);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	mov(byte[B-0x80], al);
+	sub(B, -1);
+	align(4);
+
+L(lb38);
+	mov(A1, qword[ARG_BIAS]);
+	movd(dword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x4);
+	sub(N, 0x1);
+	cmp(N, 0x1);
+	jge(la14, T_NEAR);
+	align(4);
+
+L(lb58);
+
+	postamble();
+}
+outLocalLabel();
+
+#undef M
+#undef N
+#undef A
+#undef LDA
+#undef ALPHA
+#undef B
+#undef I
+#undef A1
+#undef A2
+#undef LDA3
+#ifdef _WIN32
+#undef ARG_ALPHA
+#undef ARG_B
+#endif
+#undef ARG_BIAS
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_bt_kern.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_bt_kern.cpp
new file mode 100644
index 000000000..afe4f1713
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_sum_bt_kern.cpp
@@ -0,0 +1,647 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "jit_generator.hpp"
+#include "common.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+jit_avx512_core_u8_copy_sum_bt_kern::jit_avx512_core_u8_copy_sum_bt_kern(): jit_generator(nullptr, GEMM_CODE_SIZE)
+{
+
+#ifndef _WIN32
+#define M	rdi
+#define N	rsi
+#define A	rdx
+#define LDA	rcx
+#define ALPHA	r8
+#define B	r9
+
+#define I	rax
+#define A1	r10
+#define A2	r8
+#define LDA3	r11
+
+#define ARG_BIAS	24+stacksize+rsp
+
+#else
+
+#define M	rcx
+#define N	rdx
+#define A	r8
+#define LDA	r9
+#define ALPHA	rax
+#define B	rdi
+
+#define I	rax
+#define A1	rsi
+#define A2	r10
+#define LDA3	r11
+
+#define ARG_ALPHA	40+stacksize+rsp
+#define ARG_B		48+stacksize+rsp
+#define ARG_BIAS	72+stacksize+rsp
+
+#endif
+
+inLocalLabel();
+{
+
+Xbyak::Label l15c;
+Xbyak::Label l1f4;
+Xbyak::Label l20;
+Xbyak::Label l248;
+Xbyak::Label l280;
+Xbyak::Label l2a4;
+Xbyak::Label l2b0;
+Xbyak::Label l2c8;
+Xbyak::Label l384;
+Xbyak::Label l3e8;
+Xbyak::Label l40;
+Xbyak::Label l424;
+Xbyak::Label l448;
+Xbyak::Label l468;
+Xbyak::Label l474;
+Xbyak::Label l48c;
+Xbyak::Label l550;
+Xbyak::Label l5bc;
+Xbyak::Label l600;
+Xbyak::Label l628;
+Xbyak::Label l646;
+Xbyak::Label l650;
+Xbyak::Label l668;
+Xbyak::Label l700;
+Xbyak::Label l760;
+Xbyak::Label l7a4;
+Xbyak::Label l7c8;
+Xbyak::Label l7e8;
+
+	preamble();
+	auto stacksize = get_size_of_abi_save_regs();
+#ifdef _WIN32
+	mov(ALPHA, ptr[ARG_ALPHA]);
+	mov(B, ptr[ARG_B]);
+#endif
+
+	mov(M, qword[M]);
+	mov(N, qword[N]);
+	mov(LDA, qword[LDA]);
+	lea(LDA3, ptr[LDA+LDA*2]);
+	sub(A, -128);
+	sub(B, -128);
+	cmp(N, 0x8);
+	jl(l2a4, T_NEAR);
+	align(4);
+
+L(l20);
+	mov(A1, A);
+	add(A, 0x8);
+	pxor(xmm8, xmm8);
+	pxor(xmm9, xmm9);
+	mov(I, M);
+	sar(I, 0x3);
+	jle(l15c, T_NEAR);
+	align(4);
+
+L(l40);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x60], xmm0);
+	movdqu(xword[B-0x50], xmm1);
+	sub(B, -64);
+	dec(I);
+	jg(l40, T_NEAR);
+	align(4);
+
+L(l15c);
+	test(M, 0x4);
+	jle(l1f4, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm2, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm3, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	movdqa(xmm1, xmm0);
+	punpcklwd(xmm0, xmm2);
+	punpckhwd(xmm1, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	pmovsxbw(xmm5, xmm1);
+	movhlps(xmm6, xmm1);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm9, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movdqu(xword[B-0x70], xmm1);
+	sub(B, -32);
+	align(4);
+
+L(l1f4);
+	test(M, 0x2);
+	jle(l248, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	movq(xmm1, qword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm8, xmm5);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm6, xmm6);
+	pmovsxwd(xmm6, xmm6);
+	paddd(xmm9, xmm6);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l248);
+	test(M, 0x1);
+	jle(l280, T_NEAR);
+	movq(xmm0, qword[A1-0x80]);
+	add(A1, LDA);
+	pmovsxbd(xmm5, xmm0);
+	pshufd(xmm6, xmm0, 0x55);
+	pmovsxbd(xmm6, xmm6);
+	paddd(xmm8, xmm5);
+	paddd(xmm9, xmm6);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l280);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm8);
+	movdqu(xword[A1+0x10], xmm9);
+	add(qword[ARG_BIAS], 0x20);
+	sub(N, 0x8);
+	cmp(N, 0x8);
+	jge(l20, T_NEAR);
+	align(4);
+
+L(l2a4);
+	cmp(N, 0x4);
+	jl(l468, T_NEAR);
+	align(4);
+
+L(l2b0);
+	mov(A1, A);
+	add(A, 0x4);
+	pxor(xmm7, xmm7);
+	mov(I, M);
+	sar(I, 0x3);
+	jle(l384, T_NEAR);
+	align(4);
+
+L(l2c8);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x70], xmm0);
+	sub(B, -32);
+	dec(I);
+	jg(l2c8, T_NEAR);
+	align(4);
+
+L(l384);
+	test(M, 0x4);
+	jle(l3e8, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm2, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm3, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	movhlps(xmm6, xmm0);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	align(4);
+
+L(l3e8);
+	test(M, 0x2);
+	jle(l424, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	add(A1, LDA);
+	movd(xmm1, dword[A1-0x80]);
+	add(A1, LDA);
+	punpcklbw(xmm0, xmm1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l424);
+	test(M, 0x1);
+	jle(l448, T_NEAR);
+	movd(xmm0, dword[A1-0x80]);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l448);
+	mov(A1, qword[ARG_BIAS]);
+	movdqu(xword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x10);
+	sub(N, 0x4);
+	cmp(N, 0x4);
+	jge(l2b0, T_NEAR);
+	align(4);
+
+L(l468);
+	cmp(N, 0x2);
+	jl(l646, T_NEAR);
+	align(4);
+
+L(l474);
+	mov(A1, A);
+	add(A, 0x2);
+	pxor(xmm7, xmm7);
+	mov(LDA3, M);
+	sar(LDA3, 0x3);
+	jle(l550, T_NEAR);
+	align(4);
+
+L(l48c);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm4, eax, 0x0);
+	punpcklbw(xmm1, xmm2);
+	punpcklbw(xmm3, xmm4);
+	punpcklwd(xmm1, xmm3);
+	punpcklqdq(xmm0, xmm1);
+	pshufd(xmm6, xmm0, 0xd8);
+	pmovsxbw(xmm5, xmm6);
+	movhlps(xmm6, xmm6);
+	pmovsxbw(xmm6, xmm6);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movdqu(xword[B-0x80], xmm0);
+	sub(B, -16);
+	dec(LDA3);
+	jg(l48c, T_NEAR);
+	align(4);
+
+L(l550);
+	test(M, 0x4);
+	jle(l5bc, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm2, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm3, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	punpcklbw(xmm2, xmm3);
+	punpcklwd(xmm0, xmm2);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	align(4);
+
+L(l5bc);
+	test(M, 0x2);
+	jle(l600, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm0, eax, 0x0);
+	mov(ax, word[A1-0x80]);
+	add(A1, LDA);
+	pinsrw(xmm1, eax, 0x0);
+	punpcklbw(xmm0, xmm1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l600);
+	test(M, 0x1);
+	jle(l628, T_NEAR);
+	mov(ax, word[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	mov(word[B-0x80], ax);
+	sub(B, -2);
+	align(4);
+
+L(l628);
+	mov(A1, qword[ARG_BIAS]);
+	movq(qword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x8);
+	sub(N, 0x2);
+	cmp(N, 0x2);
+	jge(l474, T_NEAR);
+	align(4);
+
+L(l646);
+	cmp(N, 0x1);
+	jl(l7e8, T_NEAR);
+	align(4);
+
+L(l650);
+	mov(A1, A);
+	add(A, 0x1);
+	pxor(xmm7, xmm7);
+	mov(LDA3, M);
+	sar(LDA3, 0x3);
+	jle(l700, T_NEAR);
+	align(4);
+
+L(l668);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x3);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x4);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x5);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x6);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x7);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm6);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movq(qword[B-0x80], xmm0);
+	sub(B, -8);
+	dec(LDA3);
+	jg(l668, T_NEAR);
+	align(4);
+
+L(l700);
+	test(M, 0x4);
+	jle(l760, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x0);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x1);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x2);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x3);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	movd(dword[B-0x80], xmm0);
+	sub(B, -4);
+	align(4);
+
+L(l760);
+	test(M, 0x2);
+	jle(l7a4, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x0);
+	mov(byte[B-0x80], al);
+	mov(al, byte[A1-0x80]);
+	add(A1, LDA);
+	pinsrb(xmm0, eax, 0x1);
+	pmovsxbw(xmm5, xmm0);
+	phaddw(xmm5, xmm5);
+	pmovsxwd(xmm5, xmm5);
+	paddd(xmm7, xmm5);
+	mov(byte[B-0x7f], al);
+	sub(B, -2);
+	align(4);
+
+L(l7a4);
+	test(M, 0x1);
+	jle(l7c8, T_NEAR);
+	mov(al, byte[A1-0x80]);
+	pinsrw(xmm0, eax, 0x0);
+	pmovsxbd(xmm5, xmm0);
+	paddd(xmm7, xmm5);
+	mov(byte[B-0x80], al);
+	sub(B, -1);
+	align(4);
+
+L(l7c8);
+	mov(A1, qword[ARG_BIAS]);
+	movd(dword[A1], xmm7);
+	add(qword[ARG_BIAS], 0x4);
+	sub(N, 0x1);
+	cmp(N, 0x1);
+	jge(l650, T_NEAR);
+	align(4);
+
+L(l7e8);
+
+	postamble();
+}
+outLocalLabel();
+
+#undef M
+#undef N
+#undef A
+#undef LDA
+#undef ALPHA
+#undef B
+#undef I
+#undef A1
+#undef A2
+#undef LDA3
+#ifdef _WIN32
+#undef ARG_ALPHA
+#undef ARG_B
+#endif
+#undef ARG_BIAS
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/ref_gemm_s8x8s32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/ref_gemm_s8x8s32.cpp
new file mode 100644
index 000000000..4fc11afcb
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/ref_gemm_s8x8s32.cpp
@@ -0,0 +1,116 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <cstdint>
+
+#include "math_utils.hpp"
+#include "mkldnn_thread.hpp"
+#include "utils.hpp"
+
+#include "../f32/ref_gemm_f32.hpp"
+#include "jit_generator.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <typename b_dt>
+mkldnn_status_t ref_gemm_s8x8s32(const char *transa, const char *transb,
+        const char *offsetc, const int *M, const int *N, const int *K,
+        const float *alpha, const int8_t *A, const int *LDA, const int8_t *ao,
+        const b_dt *B, const int *LDB, const int8_t *bo, const float *beta,
+        int32_t *C, const int *LDC, const int32_t *co) {
+
+    if (*M == 0 || *N == 0 || *K == 0)
+        return mkldnn_success;
+
+    bool OCisR = (*offsetc == 'R' || *offsetc == 'r');
+    bool OCisC = (*offsetc == 'C' || *offsetc == 'c');
+    bool AisN = (*transa == 'N' || *transa == 'n');
+    bool BisN = (*transb == 'N' || *transb == 'n');
+
+    int m = *M, n = *N, k = *K, lda = *LDA, ldb = *LDB, ldc = *LDC;
+    size_t sizeA = AisN ? lda * k : lda * m;
+    size_t sizeB = BisN ? ldb * n : ldb * k;
+    size_t sizeC = ldc * n;
+
+    double *dA = (double *)malloc(sizeA * sizeof(double), PAGE_4K);
+    double *dB = (double *)malloc(sizeB * sizeof(double), PAGE_4K);
+    double *dC = (double *)malloc(sizeC * sizeof(double), PAGE_4K);
+
+    if (utils::any_null(dA, dB, dC)) {
+        free(dA);
+        free(dB);
+        free(dC);
+        return mkldnn_out_of_memory;
+    }
+
+    auto da_setter = [=] (int i, int j, double v) { dA[j * lda + i] = v; };
+    auto db_setter = [=] (int i, int j, double v) { dB[j * ldb + i] = v; };
+
+    auto ia_accessor = [=] (int i, int j) { return A[j * lda + i]; };
+    auto ib_accessor = [=] (int i, int j) { return B[j * ldb + i]; };
+
+    const int a_rows = AisN ? m : k;
+    const int a_cols = AisN ? k : m;
+    mkldnn::impl::parallel_nd(a_cols, a_rows, [&](int j, int i) {
+        da_setter(i, j,
+            static_cast<double>(ia_accessor(i, j)) + static_cast<double>(ao[0]));
+    });
+
+    const int b_rows = BisN ? k : n;
+    const int b_cols = BisN ? n : k;
+    mkldnn::impl::parallel_nd(b_cols, b_rows, [&](int j, int i) {
+        db_setter(i, j,
+            static_cast<double>(ib_accessor(i, j)) + static_cast<double>(bo[0]));
+    });
+    double one = 1.0, zero = 0.0;
+    ref_gemm<double>(transa, transb, M, N, K, &one, dA, LDA, dB, LDB, &zero,
+        dC, LDC, nullptr);
+
+    auto i2d = [=] (int32_t v) { return static_cast<double>(v); };
+    auto f2d = [=] (float v)   { return static_cast<double>(v); };
+
+    mkldnn::impl::parallel_nd(n, m, [&] (int j, int i) {
+        double coffset = OCisR ? i2d(co[j]) : OCisC ? i2d(co[i]) : i2d(co[0]);
+        double val = ((*beta == 0.0f) ? 0.0 : f2d(*beta) * i2d(C[i + j * ldc]))
+            + f2d(*alpha) * dC[i + j * ldc] + coffset;
+        C[i + j * ldc] = math::out_round<int32_t>(math::saturate<int32_t>(val));
+    });
+
+    free(dA);
+    free(dB);
+    free(dC);
+    return mkldnn_success;
+}
+
+template mkldnn_status_t ref_gemm_s8x8s32<uint8_t>(
+        const char *transa, const char *transb, const char *offsetc,
+        const int *M, const int *N, const int *K,
+        const float *alpha, const int8_t *A, const int *LDA, const int8_t *ao,
+        const uint8_t *B, const int *LDB, const int8_t *bo,
+        const float *beta, int32_t *C, const int *LDC, const int32_t *co);
+
+template mkldnn_status_t ref_gemm_s8x8s32<int8_t>(
+        const char *transa, const char *transb, const char *offsetc,
+        const int *M, const int *N, const int *K,
+        const float *alpha, const int8_t *A, const int *LDA, const int8_t *ao,
+        const int8_t *B, const int *LDB, const int8_t *bo,
+        const float *beta, int32_t *C, const int *LDC, const int32_t *co);
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/ref_gemm_s8x8s32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/ref_gemm_s8x8s32.hpp
new file mode 100644
index 000000000..67b6c594a
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm/s8x8s32/ref_gemm_s8x8s32.hpp
@@ -0,0 +1,39 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef REF_GEMM_S8X8S32_HPP
+#define REF_GEMM_S8X8S32_HPP
+
+#include <stdint.h>
+
+#include "mkldnn_types.h"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <typename b_dt>
+mkldnn_status_t ref_gemm_s8x8s32(const char *transa, const char *transb,
+        const char *offsetc, const int *M, const int *N, const int *K,
+        const float *alpha, const int8_t *A, const int *LDA, const int8_t *ao,
+        const b_dt *B, const int *LDB, const int8_t *bo, const float *beta,
+        int32_t *C, const int *LDC, const int32_t *co);
+
+}
+}
+}
+#endif
+
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution.cpp
index c403e4599..154b5c303 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution.cpp
@@ -14,7 +14,6 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include <common/primitive_attr.hpp>
 #include "mkldnn_types.h"
 
 #include "c_types_map.hpp"
@@ -22,7 +21,6 @@
 #include "utils.hpp"
 #include "type_helpers.hpp"
 #include "mkldnn_thread.hpp"
-
 #include "ref_eltwise.hpp"
 
 namespace mkldnn {
@@ -31,20 +29,22 @@ namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
-template <bool with_relu>
-void _gemm_convolution_fwd_t<with_relu>::execute_forward() {
+void gemm_convolution_fwd_t::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t*>(this->memory());
 
-    jit_gemm_conv_conf_t &jcp = this->conf_.jcp_;
-    const int MB = conf_.MB();
+    auto col = scratchpad().get<data_t>(key_conv_gemm_col);
+
+    const auto &jcp = this->pd()->jcp_;
+    const int MB = pd()->MB();
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
 
     const int M = jcp.os * jcp.od;
     const size_t src_step = (src_d.blk_off(1) - src_d.off_l(0)) / jcp.ngroups;
@@ -53,60 +53,68 @@ void _gemm_convolution_fwd_t<with_relu>::execute_forward() {
     src += src_d.off_l(0);
     dst += dst_d.off_l(0);
 
+    assert(IMPLICATION(
+            jcp.id != 1, jcp.oh_block == jcp.oh && jcp.ow_block == jcp.ow));
+    assert(IMPLICATION(jcp.ow_block != jcp.ow, jcp.oh_block == 1));
+
     const int K = jcp.ic * jcp.ks;
     const int N = jcp.oc;
-    const int m = jcp.os;
-    const int LDA = jcp.im2col_sz ? m : M;
-
-    const data_t one = 1.0;
-
-    data_t *col = (jcp.im2col_sz)
-        ? (data_t *)this->scratchpad_->get()
-        : nullptr;
 
-    parallel_nd(jcp.im2col_sz * jcp.nthr,
-            [&](ptrdiff_t i) { col[i] = (data_t)0; });
+    if (jcp.im2col_sz && jcp.id != 1)
+        parallel_nd(jcp.im2col_sz * jcp.nthr,
+                [&](ptrdiff_t i) { col[i] = (data_t)0; });
 
-    const size_t work_amount = jcp.ngroups * MB * jcp.od;
+    const int nb_oh = div_up(jcp.oh, jcp.oh_block);
+    const int nb_ow = div_up(jcp.ow, jcp.ow_block);
+    const size_t work_amount = jcp.ngroups * MB * jcp.od * nb_oh * nb_ow;
     parallel(jcp.nthr, [&](const int ithr, const int nthr) {
         data_t *_col = col + (ptrdiff_t)ithr * jcp.im2col_sz;
 
-        int g{0}, n{0}, od{0};
+        int g{ 0 }, n{ 0 }, od{ 0 }, ohb{ 0 }, owb{ 0 };
         size_t start = 0, end = 0;
 
         balance211(work_amount, nthr, ithr, start, end);
-        nd_iterator_init(start, g, jcp.ngroups, n, MB, od, jcp.od);
-
+        nd_iterator_init(start, g, jcp.ngroups, n, MB, od, jcp.od, ohb,
+                nb_oh, owb, nb_ow);
         for (size_t iwork = start; iwork < end; ++iwork) {
+            int oh = ohb * jcp.oh_block;
+            int ow = owb * jcp.ow_block;
             const data_t *_src = src + (n * jcp.ngroups + g) * src_step;
             const data_t *_weights = weights + g * weights_g_size;
-            data_t *_dst = dst + (n * jcp.ngroups + g) * dst_step;
-
+            data_t *_dst_im = dst + (n * jcp.ngroups + g) * dst_step;
+            const int h_step = nstl::min(jcp.oh_block, jcp.oh - oh);
+            const int w_step = nstl::min(jcp.ow_block, jcp.ow - ow);
             if (jcp.im2col_sz) {
                 if (jcp.id == 1)
-                    jit_gemm_convolution_utils::im2col(jcp, _src, _col);
+                    jit_gemm_convolution_utils::im2col(
+                            jcp, _src, _col, oh, h_step, ow, w_step);
                 else
                     jit_gemm_convolution_utils::im2col_3d(jcp, _src, _col, od);
             }
 
             const data_t one = 1.0;
+
+            const int m = h_step * w_step;
+            const int LDA = jcp.im2col_sz ? m : M;
+            data_t *_dst = _dst_im + od * jcp.os + oh * jcp.ow + ow;
+
             extended_sgemm("N", "N", &m, &N, &K, &one,
                     jcp.im2col_sz ? _col : _src + od * m, &LDA, _weights, &K,
-                    &this->beta_, _dst + od * m, &M);
+                    &this->beta_, _dst, &M);
 
-            const auto &p = conf_.attr()->post_ops_;
+            data_t *d = _dst;
+            const auto &p = pd()->attr()->post_ops_;
             bool need_bias = jcp.with_bias;
             if (use_fast_relu) {
-                data_t *d = _dst + od * m;
-
-                for (int oc = 0; oc < jcp.oc; ++oc) {
+                parallel_nd(jcp.oc, [&](const int oc) {
                     data_t b = need_bias ? bias[g * jcp.oc + oc] : 0;
+                    data_t *d_ = d + oc * M;
+                    PRAGMA_OMP_SIMD()
                     for (int oS = 0; oS < m; ++oS) {
-                        d[oS] += b;
-                        if (d[oS] < 0) d[oS] *= fast_relu_ns;
+                        d_[oS] += b;
+                        if (d_[oS] < 0) d_[oS] *= fast_relu_ns;
                     }
-                    d += M;
-                }
+                });
 
                 need_bias = false;
             } else if (p.len_ > 0) {
@@ -114,17 +122,17 @@ void _gemm_convolution_fwd_t<with_relu>::execute_forward() {
                 int depthwise_inj_idx = 0;
 
                 for (int i = 0; i < p.len_; i++) {
-                    data_t *d = _dst + od * m;
                     auto& post_op = p.entry_[i];
                     if (post_op.is_eltwise()) {
-                        for (int oc = 0; oc < jcp.oc; ++oc) {
+                        parallel_nd(jcp.oc, [&](const int oc) {
                             data_t b = need_bias ? bias[g * jcp.oc + oc] : 0;
+                            data_t *d_ = d + oc * M;
+                            PRAGMA_OMP_SIMD()
                             for (int oS = 0; oS < m; ++oS) {
-                                d[oS] += b;
-                                d[oS] = eltwise_injectors[eltwise_inj_idx]->compute_scalar(d[oS]);
+                                d_[oS] += b;
+                                d_[oS] = eltwise_injectors[eltwise_inj_idx]->compute_scalar(d_[oS]);
                             }
-                            d += M;
-                        }
+                        });
 
                         eltwise_inj_idx++;
                         need_bias = false;
@@ -132,16 +140,17 @@ void _gemm_convolution_fwd_t<with_relu>::execute_forward() {
                         auto depthwise_weights = post_op.depthwise.weights_data;
                         auto depthwise_bias = post_op.depthwise.biases_data;
 
-                        for (int oc = 0; oc < jcp.oc; ++oc) {
+                        parallel_nd(jcp.oc, [&](const int oc) {
                             data_t b = need_bias ? bias[g * jcp.oc + oc] : 0;
+                            data_t *d_ = d + oc * M;
+                            PRAGMA_OMP_SIMD()
                             for (int oS = 0; oS < m; ++oS) {
-                                d[oS] += b;
-                                d[oS] = depthwise_injectors[depthwise_inj_idx]->compute_scalar(d[oS],
+                                d_[oS] += b;
+                                d_[oS] = depthwise_injectors[depthwise_inj_idx]->compute_scalar(d_[oS],
                                                                   depthwise_weights + g * jcp.oc + oc,
                                                                   depthwise_bias + g * jcp.oc + oc);
                             }
-                            d += M;
-                        }
+                        });
 
                         depthwise_inj_idx++;
                         need_bias = false;
@@ -150,46 +159,53 @@ void _gemm_convolution_fwd_t<with_relu>::execute_forward() {
             }
 
             if (need_bias) {
-                data_t *d = _dst + od * m;
-
-                for (int oc = 0; oc < jcp.oc; ++oc) {
+                parallel_nd(jcp.oc, [&](const int oc) {
                     data_t b = bias[g * jcp.oc + oc];
+                    data_t *d_ = d + oc * M;
+                    PRAGMA_OMP_SIMD()
                     for (int oS = 0; oS < m; ++oS) {
-                        d[oS] += b;
+                        d_[oS] += b;
                     }
-                    d += M;
-                }
+                });
             }
 
-            nd_iterator_step(g, jcp.ngroups, n, MB, od, jcp.od);
+            nd_iterator_step(g, jcp.ngroups, n, MB, od, jcp.od, ohb, nb_oh,
+                    owb, nb_ow);
         }
     });
 }
 
-void gemm_convolution_bwd_data_t::execute_backward_data() {
+void gemm_convolution_bwd_data_t::execute_backward_data() const {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t*>(this->memory());
 
-    jit_gemm_conv_conf_t &jcp = this->conf_.jcp_;
-    const int MB = conf_.MB();
+    auto col = scratchpad().get<data_t>(key_conv_gemm_col);
+
+    const auto &jcp = this->pd()->jcp_;
+    const int MB = pd()->MB();
 
     const int M = jcp.os * jcp.od;
-    const size_t src_step = jcp.ic * jcp.ih * jcp.iw * jcp.id;
-    const size_t dst_step = jcp.oc * M;
+    const size_t src_step_to_clean = jcp.ic * jcp.ih * jcp.iw * jcp.id;
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const size_t src_step = diff_src_d.blk_off(1) / jcp.ngroups;
+    const size_t dst_step = diff_dst_d.blk_off(1) / jcp.ngroups;
     const size_t weights_g_size = jcp.ic * jcp.oc * jcp.ks;
 
     const int m = jcp.os;
     const int K = jcp.oc;
     const int N = jcp.ic * jcp.ks;
     const int LDC = jcp.im2col_sz ? m : M;
-    data_t *col = jcp.im2col_sz ? (data_t *)this->scratchpad_->get() : nullptr;
 
     const size_t work_amount = (size_t)jcp.ngroups * MB;
 
     if (jcp.id > 1) {
-        const ptrdiff_t diff_src_sz = (ptrdiff_t)(work_amount * src_step);
-        parallel_nd(diff_src_sz, [&](ptrdiff_t i) { diff_src[i] = (data_t)0; });
+        for (size_t j = 0; j < work_amount; j++) {
+            int j_step = src_step * j;
+            const ptrdiff_t diff_src_sz = (ptrdiff_t)(src_step_to_clean);
+            parallel_nd(diff_src_sz, [&](ptrdiff_t i) { diff_src[j_step + i] = (data_t)0; });
+        }
     }
 
     parallel(jcp.nthr, [&](const int ithr, const int nthr) {
@@ -201,7 +217,7 @@ void gemm_convolution_bwd_data_t::execute_backward_data() {
         nd_iterator_init(start, g, jcp.ngroups, n, MB);
         for (size_t iwork = start; iwork < end; ++iwork) {
 
-            data_t *_diff_src = diff_src + (n * jcp.ngroups + g)*src_step;
+            data_t *_diff_src = diff_src + (n * jcp.ngroups + g) * src_step;
             const data_t *_weights = weights + g * weights_g_size;
             for (int od = 0; od < jcp.od; ++od) {
                 const data_t *_diff_dst = diff_dst + (n * jcp.ngroups + g)
@@ -226,13 +242,17 @@ void gemm_convolution_bwd_data_t::execute_backward_data() {
     });
 }
 
-void gemm_convolution_bwd_weights_t::execute_backward_weights() {
+void gemm_convolution_bwd_weights_t::execute_backward_weights() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_weights = reinterpret_cast<data_t*>(this->memory(0));
     auto diff_bias = reinterpret_cast<data_t *>(this->memory(1));
 
-    jit_gemm_conv_conf_t &jcp = this->conf_.jcp_;
+    auto col = scratchpad().get<data_t>(key_conv_gemm_col);
+    auto wei_reduction = scratchpad().get<data_t>(key_conv_wei_reduction);
+
+    const jit_gemm_conv_conf_t &jcp = this->pd()->jcp_;
+
     const int K = jcp.os * jcp.od;
     const size_t src_step = jcp.ic * jcp.ih * jcp.iw * jcp.id;
     const size_t dst_step = jcp.oc * K;
@@ -243,15 +263,6 @@ void gemm_convolution_bwd_weights_t::execute_backward_weights() {
     const int M = jcp.ic * jcp.ks;
     const int LDA = jcp.im2col_sz ? k : K;
 
-    data_t *col = nullptr, *wei_reduction = nullptr;
-    ptrdiff_t wei_offset = 0;
-    if (jcp.im2col_sz) {
-        col = (data_t *)this->scratchpad_->get();
-        wei_offset = jcp.im2col_sz * jcp.nthr;
-    }
-    if (jcp.need_wei_reduction)
-        wei_reduction = (data_t *)this->scratchpad_->get() + wei_offset;
-
     parallel_nd(jcp.im2col_sz * jcp.nthr,
             [&](ptrdiff_t i) { col[i] = (data_t)0; });
 
@@ -289,7 +300,8 @@ void gemm_convolution_bwd_weights_t::execute_backward_weights() {
 
                     if (jcp.im2col_sz) {
                         if (jcp.id == 1)
-                            jit_gemm_convolution_utils::im2col(jcp, _src, _col);
+                            jit_gemm_convolution_utils::im2col(
+                                    jcp, _src, _col, 0, jcp.oh, 0, jcp.ow);
                         else
                             jit_gemm_convolution_utils::im2col_3d(jcp, _src,
                                 _col, od);
@@ -331,13 +343,10 @@ void gemm_convolution_bwd_weights_t::execute_backward_weights() {
                 }
             }
             diff_bias[g*jcp.oc+oc] = db;
-            nd_iterator_step(g, jcp.ngroups, oc, jcp.oc);
         });
     }
 }
 
-template struct _gemm_convolution_fwd_t<true>;
-template struct _gemm_convolution_fwd_t<false>;
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution.hpp
index d0d65c1e1..2a0da52b5 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution.hpp
@@ -18,11 +18,12 @@
 #define CPU_JIT_GEMM_CONVOLUTION_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+
 #include "cpu_convolution_pd.hpp"
 #include "cpu_engine.hpp"
 #include "gemm_convolution_utils.hpp"
 #include "gemm/gemm.hpp"
-#include "scratchpad.hpp"
 #include "ref_eltwise.hpp"
 #include "ref_depthwise.hpp"
 
@@ -30,34 +31,15 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu>
-struct _gemm_convolution_fwd_t: public cpu_primitive_t {
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
+struct gemm_convolution_fwd_t: public cpu_primitive_t {
+    struct pd_t: public cpu_convolution_fwd_pd_t {
         pd_t(engine_t *engine,
-                const typename pd_t::base_desc_t *adesc,
-                const primitive_attr_t *attr,
+                const convolution_desc_t *adesc, const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
             , jcp_() {}
 
-        DECLARE_COMMON_PD_T(GEMM_IMPL_STR, _gemm_convolution_fwd_t<with_relu>);
-
-        inline memory_format_t src_format()
-        {
-            using namespace memory_format;
-            return (utils::pick(this->cdesc_().src_desc.ndims - 3,
-                ncw, nchw, ncdhw));
-        }
-        inline memory_format_t wei_format()
-        {
-            using namespace memory_format;
-            return (this->with_groups()
-                ? utils::pick(this->cdesc_().src_desc.ndims - 3,
-                    goiw, goihw, goidhw)
-                : utils::pick(this->cdesc_().src_desc.ndims - 3,
-                    oiw, oihw, oidhw));
-        }
+        DECLARE_COMMON_PD_T(GEMM_IMPL_STR, gemm_convolution_fwd_t);
 
         virtual status_t init() override {
             using namespace prop_kind;
@@ -67,26 +49,47 @@ struct _gemm_convolution_fwd_t: public cpu_primitive_t {
 
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                            forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                        alg_kind::convolution_auto,
+                        alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
-                           this->cdesc_().src_desc.data_type,
-                           this->cdesc_().weights_desc.data_type,
-                           this->cdesc_().dst_desc.data_type)
+                           this->desc()->src_desc.data_type,
+                           this->desc()->weights_desc.data_type,
+                           this->desc()->dst_desc.data_type)
                 && IMPLICATION(this->with_bias(), data_type::f32
-                                   == this->cdesc_().bias_desc.data_type)
+                                   == this->desc()->bias_desc.data_type)
                 && this->src_pd_.desc()->format == src_format()
                 && this->dst_pd_.desc()->format == src_format()
                 && this->weights_pd_.desc()->format == wei_format()
                 && this->is_gemm_conv_format();
-            return ok ? status::success : status::unimplemented;
+            if (!ok) return status::unimplemented;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            return jit_gemm_convolution_utils::init_conf(jcp_, scratchpad,
+                    *desc(), src_pd(), weights_pd(0), dst_pd(),
+                    mkldnn_get_max_threads());
         }
 
         jit_gemm_conv_conf_t jcp_;
 
     protected:
+        memory_format_t src_format() const {
+            using namespace memory_format;
+            const int ndims_sp = this->desc()->src_desc.ndims - 2;
+            return (utils::pick(ndims_sp - 1, ncw, nchw, ncdhw));
+        }
+
+        memory_format_t wei_format() const {
+            using namespace memory_format;
+            const int ndims_sp = this->desc()->src_desc.ndims - 2;
+            return (this->with_groups()
+                ? utils::pick(ndims_sp - 1, goiw, goihw, goidhw)
+                : utils::pick(ndims_sp - 1, oiw, oihw, oidhw));
+        }
+
         virtual status_t set_default_params() override {
             using namespace memory_format;
             if (this->src_pd_.desc()->format == any)
@@ -97,11 +100,12 @@ struct _gemm_convolution_fwd_t: public cpu_primitive_t {
                 CHECK(this->weights_pd_.set_format(wei_format()));
             if (this->bias_pd_.desc()->format == any)
                 CHECK(this->bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
 
         virtual bool is_gemm_conv_format() const {
-            bool ok = true;
             auto const &po = this->attr()->post_ops_;
 
             auto is_eltwise = [&](int idx) { return po.entry_[idx].is_eltwise(); };
@@ -110,48 +114,24 @@ struct _gemm_convolution_fwd_t: public cpu_primitive_t {
             auto is_simple = [&](int idx) { return (is_eltwise(idx) || is_depthwise(idx)); };
 
             switch (po.len_) {
-                using namespace mkldnn::impl::primitive_kind;
-            case 0: // no post_ops
-                break;
-            case 1:
-                ok = ok && // sum OR eltwise/depthwise
-                        (is_simple(0) || is_sum(0));
-                break;
-            case 2:
-                ok = ok && // sum->eltwise/depthwise OR eltwise/depthwise->eltwise/depthwise
-                           ((is_sum(0) && is_simple(1)) || (is_simple(0) && is_simple(1)));
-                break;
-            case 3:
-                ok = ok && // sum->eltwise/depthwise->eltwise/depthwise
-                     (is_sum(0) && is_simple(1) && is_simple(2));
-                break;
-
-            default: ok = false;
+            case 0: return true;
+            case 1: return is_simple(0) || is_sum(0);
+            case 2: return (is_sum(0) && is_simple(1)) || (is_simple(0) && is_simple(1));
+            case 3: return is_sum(0) && is_simple(1) && is_simple(2);
+            default: return false;
             }
-            return ok;
+            return false;
         }
     };
 
-    _gemm_convolution_fwd_t(const pd_t *pd, const input_vector &inputs,
+    gemm_convolution_fwd_t(const pd_t *apd, const input_vector &inputs,
            const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , scratchpad_(nullptr)
+        : cpu_primitive_t(apd, inputs, outputs, true)
     {
-        using namespace prop_kind;
-
-        const auto &post_ops = conf_.attr()->post_ops_;
+        const auto &post_ops = pd()->attr()->post_ops_;
         const data_t one = 1.0, zero = 0.0;
         beta_ = post_ops.find(primitive_kind::sum) >= 0 ? one : zero;
 
-        jit_gemm_convolution_utils::init_conf(conf_.jcp_,
-            *(conf_.cdesc()), conf_.src_pd(), conf_.weights_pd(0),
-            conf_.dst_pd(), mkldnn_get_max_threads(), with_relu,
-            conf_.negative_slope());
-
-        size_t size = (size_t)conf_.jcp_.im2col_sz * sizeof(data_t);
-        jit_gemm_convolution_utils::prepare_scratchpad(this->conf_.jcp_,
-                &this->scratchpad_, size, this->conf_.jcp_.nthr);
-
         for (int i = 0; i < post_ops.len_; i++) {
             auto &post_op = post_ops.entry_[i];
             if (post_op.is_eltwise()) {
@@ -168,10 +148,7 @@ struct _gemm_convolution_fwd_t: public cpu_primitive_t {
         }
 
         use_fast_relu = false;
-        if (conf_.jcp_.with_relu && post_ops.len_ == 0) {
-            use_fast_relu = true;
-            fast_relu_ns = conf_.jcp_.relu_negative_slope;
-        } else if (post_ops.len_ == 1 && post_ops.entry_[0].is_relu(true, false)) {
+        if (post_ops.len_ == 1 && post_ops.entry_[0].is_relu(true, false)) {
             use_fast_relu = true;
             fast_relu_ns = post_ops.entry_[0].eltwise.alpha;
         } else if (post_ops.len_ == 2 && post_ops.entry_[0].is_sum() && post_ops.entry_[1].is_relu(true, false)) {
@@ -180,9 +157,7 @@ struct _gemm_convolution_fwd_t: public cpu_primitive_t {
         }
     }
 
-    ~_gemm_convolution_fwd_t() {
-        delete this->scratchpad_;
-
+    ~gemm_convolution_fwd_t() {
         for (auto inj : eltwise_injectors)
             delete inj;
         eltwise_injectors.clear();
@@ -190,19 +165,19 @@ struct _gemm_convolution_fwd_t: public cpu_primitive_t {
         for (auto inj : depthwise_injectors)
             delete inj;
         depthwise_injectors.clear();
-    };
+    }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
-    scratchpad_t *scratchpad_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     data_t beta_;
 
     nstl::vector<ref_eltwise_scalar_fwd_t*> eltwise_injectors;
@@ -212,39 +187,16 @@ private:
     float fast_relu_ns;
 };
 
-using gemm_convolution_fwd_t =
-                         _gemm_convolution_fwd_t<false>;
-using gemm_convolution_relu_t =
-                         _gemm_convolution_fwd_t<true>;
-
 struct gemm_convolution_bwd_data_t: public cpu_primitive_t {
     struct pd_t: public cpu_convolution_bwd_data_pd_t {
         pd_t(engine_t *engine,
-                const convolution_desc_t *adesc,
-                const primitive_attr_t *attr,
+                const convolution_desc_t *adesc, const primitive_attr_t *attr,
                 const convolution_fwd_pd_t *hint_fwd_pd)
             : cpu_convolution_bwd_data_pd_t(engine, adesc, attr, hint_fwd_pd)
-            , jcp_()
-        {}
+            , jcp_() {}
 
         DECLARE_COMMON_PD_T(GEMM_IMPL_STR, gemm_convolution_bwd_data_t);
 
-        inline memory_format_t src_format()
-        {
-            using namespace memory_format;
-            return (utils::pick(this->desc()->diff_src_desc.ndims - 3,
-                ncw, nchw, ncdhw));
-        }
-        inline memory_format_t wei_format()
-        {
-            using namespace memory_format;
-            return (this->with_groups()
-                ? utils::pick(this->desc()->diff_src_desc.ndims - 3,
-                    goiw, goihw, goidhw)
-                : utils::pick(this->desc()->diff_src_desc.ndims - 3,
-                    oiw, oihw, oidhw));
-        }
-
         virtual status_t init() override {
             using namespace prop_kind;
             using namespace memory_format;
@@ -254,7 +206,8 @@ struct gemm_convolution_bwd_data_t: public cpu_primitive_t {
             bool ok = true
                 && this->set_default_params() == status::success
                 && this->desc()->prop_kind == backward_data
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind, alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
                         this->desc()->diff_src_desc.data_type,
@@ -263,12 +216,31 @@ struct gemm_convolution_bwd_data_t: public cpu_primitive_t {
                 && this->diff_src_pd_.desc()->format == src_format()
                 && this->diff_dst_pd_.desc()->format == src_format()
                 && this->weights_pd_.desc()->format == wei_format();
-            return ok ? status::success : status::unimplemented;
+            if (!ok) return status::unimplemented;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            return jit_gemm_convolution_utils::init_conf(jcp_, scratchpad,
+                    *desc(), diff_src_pd(), weights_pd(0), diff_dst_pd(),
+                    mkldnn_get_max_threads());
         }
 
         jit_gemm_conv_conf_t jcp_;
 
     protected:
+        memory_format_t src_format() const {
+            using namespace memory_format;
+            const int ndims_sp = this->desc()->diff_src_desc.ndims - 2;
+            return (utils::pick(ndims_sp - 1, ncw, nchw, ncdhw));
+        }
+
+        memory_format_t wei_format() const {
+            using namespace memory_format;
+            const int ndims_sp = this->desc()->diff_src_desc.ndims - 2;
+            return (this->with_groups()
+                ? utils::pick(ndims_sp - 1, goiw, goihw, goidhw)
+                : utils::pick(ndims_sp - 1, oiw, oihw, oidhw));
+        }
+
         virtual status_t set_default_params() override {
             using namespace memory_format;
             if (this->diff_src_pd_.desc()->format == any)
@@ -277,34 +249,21 @@ struct gemm_convolution_bwd_data_t: public cpu_primitive_t {
                 CHECK(this->diff_dst_pd_.set_format(src_format()));
             if (this->weights_pd_.desc()->format == any)
                 CHECK(this->weights_pd_.set_format(wei_format()));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
 
-    gemm_convolution_bwd_data_t(const pd_t *pd, const input_vector &inputs,
+    gemm_convolution_bwd_data_t(const pd_t *apd, const input_vector &inputs,
               const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , scratchpad_(nullptr)
-    {
-        using namespace prop_kind;
-
-        jit_gemm_convolution_utils::init_conf(conf_.jcp_,
-            *(conf_.desc()), conf_.diff_src_pd(), conf_.weights_pd(0),
-            conf_.diff_dst_pd(), mkldnn_get_max_threads());
-
-        size_t size = (size_t)conf_.jcp_.im2col_sz * sizeof(data_t);
-        jit_gemm_convolution_utils::prepare_scratchpad(this->conf_.jcp_,
-                &this->scratchpad_, size, this->conf_.jcp_.nthr);
-    }
-
-    ~gemm_convolution_bwd_data_t() {
-        delete this->scratchpad_;
-    };
+        : cpu_primitive_t(apd, inputs, outputs, true) {}
+    ~gemm_convolution_bwd_data_t() {}
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_data:
             execute_backward_data();
             break;
@@ -315,9 +274,8 @@ struct gemm_convolution_bwd_data_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_data();
-    pd_t conf_;
-    scratchpad_t *scratchpad_;
+    void execute_backward_data() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 struct gemm_convolution_bwd_weights_t: public cpu_primitive_t {
@@ -327,27 +285,10 @@ struct gemm_convolution_bwd_weights_t: public cpu_primitive_t {
                 const primitive_attr_t *attr,
                 const convolution_fwd_pd_t *hint_fwd_pd)
             : cpu_convolution_bwd_weights_pd_t(engine, adesc, attr, hint_fwd_pd)
-            , jcp_()
-        {}
+            , jcp_() {}
 
         DECLARE_COMMON_PD_T(GEMM_IMPL_STR, gemm_convolution_bwd_weights_t);
 
-        inline memory_format_t src_format()
-        {
-            using namespace memory_format;
-            return (utils::pick(this->desc()->src_desc.ndims - 3,
-                ncw, nchw, ncdhw));
-        }
-        inline memory_format_t wei_format()
-        {
-            using namespace memory_format;
-            return (this->with_groups()
-                ? utils::pick(this->desc()->src_desc.ndims - 3,
-                    goiw, goihw, goidhw)
-                : utils::pick(this->desc()->src_desc.ndims - 3,
-                    oiw, oihw, oidhw));
-        }
-
         virtual status_t init() override {
             using namespace prop_kind;
             using namespace memory_format;
@@ -357,7 +298,8 @@ struct gemm_convolution_bwd_weights_t: public cpu_primitive_t {
             bool ok = true
             && this->set_default_params() == status::success
             && this->desc()->prop_kind == backward_weights
-            && this->desc()->alg_kind == alg_kind::convolution_direct
+            && utils::one_of(this->desc()->alg_kind, alg_kind::convolution_auto,
+                       alg_kind::convolution_direct)
             && !this->has_zero_dim_memory()
             && utils::everyone_is(data_type::f32,
                     this->desc()->src_desc.data_type,
@@ -368,12 +310,31 @@ struct gemm_convolution_bwd_weights_t: public cpu_primitive_t {
             && this->src_pd_.desc()->format == src_format()
             && this->diff_dst_pd_.desc()->format == src_format()
             && this->diff_weights_pd_.desc()->format == wei_format();
-            return ok ? status::success : status::unimplemented;
+            if (!ok) return status::unimplemented;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            return jit_gemm_convolution_utils::init_conf(jcp_, scratchpad,
+                    *desc(), src_pd(), diff_weights_pd(0), diff_dst_pd(),
+                    mkldnn_get_max_threads());
         }
 
         jit_gemm_conv_conf_t jcp_;
 
     protected:
+        memory_format_t src_format() const {
+            using namespace memory_format;
+            const int ndims_sp = this->desc()->src_desc.ndims - 2;
+            return (utils::pick(ndims_sp - 1, ncw, nchw, ncdhw));
+        }
+
+        memory_format_t wei_format() const {
+            using namespace memory_format;
+            const int ndims_sp = this->desc()->src_desc.ndims - 2;
+            return (this->with_groups()
+                ? utils::pick(ndims_sp - 1, goiw, goihw, goidhw)
+                : utils::pick(ndims_sp - 1, oiw, oihw, oidhw));
+        }
+
         virtual status_t set_default_params() override {
             using namespace memory_format;
             if (this->src_pd_.desc()->format == any)
@@ -384,38 +345,21 @@ struct gemm_convolution_bwd_weights_t: public cpu_primitive_t {
                 CHECK(this->diff_weights_pd_.set_format(wei_format()));
             if (this->diff_bias_pd_.desc()->format == any)
                 CHECK(this->diff_bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
 
-    gemm_convolution_bwd_weights_t(const pd_t *pd, const input_vector &inputs,
+    gemm_convolution_bwd_weights_t(const pd_t *apd, const input_vector &inputs,
               const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , scratchpad_(nullptr)
-    {
-        using namespace prop_kind;
-
-        jit_gemm_convolution_utils::init_conf(conf_.jcp_,
-            *(conf_.desc()), conf_.src_pd(), conf_.diff_weights_pd(0),
-            conf_.diff_dst_pd(), mkldnn_get_max_threads());
-        const memory_desc_wrapper weights_d(conf_.diff_weights_pd(0));
-
-        size_t size = (size_t)conf_.jcp_.im2col_sz  * sizeof(data_t);
-        if (conf_.jcp_.need_wei_reduction)
-            size += (size_t)conf_.jcp_.ngroups * weights_d.size();
-
-        jit_gemm_convolution_utils::prepare_scratchpad(this->conf_.jcp_,
-                &this->scratchpad_, size, conf_.jcp_.nthr);
-    }
-
-    ~gemm_convolution_bwd_weights_t() {
-        delete this->scratchpad_;
-     };
+        : cpu_primitive_t(apd, inputs, outputs, true) {}
+    ~gemm_convolution_bwd_weights_t() {}
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_weights:
             execute_backward_weights();
             break;
@@ -426,9 +370,8 @@ struct gemm_convolution_bwd_weights_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_weights();
-    pd_t conf_;
-    scratchpad_t *scratchpad_;
+    void execute_backward_weights() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution_utils.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution_utils.cpp
index 80dfe9f19..2b7cea217 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution_utils.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution_utils.cpp
@@ -23,6 +23,7 @@
 #include "cpu_isa_traits.hpp"
 
 #include "gemm_convolution_utils.hpp"
+#include "jit_generator.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -36,17 +37,19 @@ using namespace data_type;
 
 namespace jit_gemm_convolution_utils {
 
-void im2col_3d(jit_gemm_conv_conf_t &jcp, const float *im, float *col, int od) {
+void im2col_3d(const jit_gemm_conv_conf_t &jcp, const float *im, float *col,
+        int od)
+{
     const size_t OHW = jcp.oh * jcp.ow;
     const size_t im_step = jcp.ih * jcp.iw * jcp.id;
     const size_t col_step = jcp.ks * OHW;
 
     parallel_nd(jcp.ic, [&](int ic) {
-        const float *im_loc = im + ic * im_step;
-        float *col_loc = col + ic * col_step;
+        const float *__restrict im_loc = im + ic * im_step;
+        float *__restrict col_loc = col + ic * col_step;
         int id = od * jcp.stride_d - jcp.f_pad;
         for (int kd = 0; kd < jcp.kd; ++kd) {
-            float *col_ = col_loc + kd * jcp.kh * jcp.kw * OHW;
+            float *__restrict col_ = col_loc + kd * jcp.kh * jcp.kw * OHW;
             if (id < 0 || id >= jcp.id) {
                 int ih_ = -jcp.t_pad;
                 for (int kh = 0; kh < jcp.kh; ++kh) {
@@ -79,7 +82,7 @@ void im2col_3d(jit_gemm_conv_conf_t &jcp, const float *im, float *col, int od) {
                     col_ += jcp.kw * OHW;
                 }
             } else {
-                const float *im_ = im_loc + id * jcp.ih * jcp.iw;
+                const float *__restrict im_ = im_loc + id * jcp.ih * jcp.iw;
                 int ih_ = -jcp.t_pad;
                 for (int kh = 0; kh < jcp.kh; ++kh) {
                     int ih = ih_;
@@ -117,88 +120,226 @@ void im2col_3d(jit_gemm_conv_conf_t &jcp, const float *im, float *col, int od) {
     });
 }
 
-void im2col(jit_gemm_conv_conf_t &jcp, const float *im, float *col) {
-    if (jcp.ic == 1) {
-        parallel_nd(jcp.kh, jcp.oh, [&](int kh, int oh) {
-            const int ih = oh * jcp.stride_h - jcp.t_pad + kh * (1 + jcp.dilate_h);
-            if (ih < 0 || ih >= jcp.ih) return;
-
-            for (int kw = 0; kw < jcp.kw; ++kw) {
-            for (int ow = 0; ow < jcp.ow; ++ow) {
-                const int iw = ow * jcp.stride_w - jcp.l_pad + kw * (1 + jcp.dilate_w);
-                if (iw < 0 || iw >= jcp.iw) continue;
-
-                const size_t col_idx = ((kh*jcp.kw + kw)*jcp.oh+oh)*jcp.ow+ow;
-                const size_t im_idx = ih*jcp.iw + iw;
-                col[col_idx] = im[im_idx];
-            }}
+/* col[ic][kh][kw][oh][ow] <-- im2col(im[ic][ih][iw]) */
+void im2col(const jit_gemm_conv_conf_t &jcp, const float *__restrict im,
+       float *__restrict col, int hs, int hb, int ws, int wb) {
+    const size_t im_step = jcp.is;
+    const size_t col_step = jcp.ks * hb * wb;
+    if (jcp.stride_w == 1) {
+        // Generated code is more optimized for stride_w == 1
+        // because innermost loop is by width
+        auto ker = [&](int ic, int kh, int kw, int oh) {
+            const float *__restrict im_ = im + ic * im_step;
+            float *__restrict col_
+                = col + ic * col_step + ((kh * jcp.kw + kw) * hb + oh) * wb;
+
+            const int ih = (oh + hs) * jcp.stride_h - jcp.t_pad
+                + kh * (1 + jcp.dilate_h);
+            if (ih < 0 || ih >= jcp.ih) {
+                for (int ow = 0; ow < wb; ++ow)
+                    col_[ow] = 0.f;
+            } else {
+                for (int ow = 0; ow < wb; ++ow) {
+                    const int iw = ow + ws - jcp.l_pad + kw * (1 + jcp.dilate_w);
+                    if (iw < 0 || iw >= jcp.iw)
+                        col_[ow] = 0.f;
+                    else {
+                        const size_t im_idx = ih * jcp.iw + iw;
+                        col_[ow] = im_[im_idx];
+                    }
+                }
+            }
+        };
+
+        if (jcp.outer_threading) {
+            for (int ic = 0; ic < jcp.ic; ic++)
+                for (int kh = 0; kh < jcp.kh; kh++)
+                    for (int kw = 0; kw < jcp.kw; kw++)
+                        for (int oh = 0; oh < hb; oh++)
+                            ker(ic, kh, kw, oh);
+        }
+        else {
+            parallel_nd(jcp.ic, jcp.kh, jcp.kw, hb, ker);
+        }
+    } else if (jcp.ic == 1) {
+        parallel_nd(jcp.kh, hb, [&](int kh, int oh) {
+            const int ih = (oh + hs) * jcp.stride_h - jcp.t_pad
+                    + kh * (1 + jcp.dilate_h);
+            if (ih < 0 || ih >= jcp.ih)
+                for (int kw = 0; kw < jcp.kw; ++kw) {
+                    for (int ow = 0; ow < wb; ++ow) {
+                        const size_t col_idx
+                                = ((kh * jcp.kw + kw) * hb + oh) * wb + ow;
+                        col[col_idx] = 0;
+                    }
+                }
+            else
+                for (int kw = 0; kw < jcp.kw; ++kw) {
+                    for (int ow = 0; ow < wb; ++ow) {
+                        const int iw = (ow + ws) * jcp.stride_w - jcp.l_pad
+                                + kw * (1 + jcp.dilate_w);
+                        const size_t col_idx
+                                = ((kh * jcp.kw + kw) * hb + oh) * wb + ow;
+                        const size_t im_idx = ih * jcp.iw + iw;
+                        if (iw < 0 || iw >= jcp.iw)
+                            col[col_idx] = 0;
+                        else
+                            col[col_idx] = im[im_idx];
+                    }
+                }
         });
     } else {
-        const size_t im_step = jcp.ih * jcp.iw;
-        const size_t col_step = jcp.ks * jcp.os;
-
-        parallel_nd(jcp.ic, [&](int ic) {
-            const float *im_ = im + ic * im_step;
-            float *col_ = col + ic * col_step;
-
-            for (int kh = 0; kh < jcp.kh; ++kh) {
-            for (int oh = 0; oh < jcp.oh; ++oh) {
-                const int ih = oh * jcp.stride_h
-                               - jcp.t_pad + kh * (1 + jcp.dilate_h);
-                if (ih < 0 || ih >= jcp.ih) continue;
-
-                for (int kw = 0; kw < jcp.kw; ++kw) {
-                for (int ow = 0; ow < jcp.ow; ++ow) {
-                    const int iw = ow * jcp.stride_w
-                                   - jcp.l_pad + kw * (1 + jcp.dilate_w);
-                    if (iw < 0 || iw >= jcp.iw) continue;
 
-                    const size_t col_idx = ((kh * jcp.kw + kw) * jcp.oh+oh)
-                                           * jcp.ow + ow;
-                    const size_t im_idx = ih*jcp.iw + iw;
-                    col_[col_idx] = im_[im_idx];
-                }}
-            }}
+        parallel_nd(jcp.ic, jcp.kh, jcp.kw, hb,
+            [&](int ic, int kh, int kw, int oh) {
+            const float *__restrict im_ = im + ic * im_step;
+            float *__restrict col_ = col + ic * col_step
+                + ((kh * jcp.kw + kw) * hb + oh) * wb;
+
+            const int ih = (oh + hs) * jcp.stride_h - jcp.t_pad
+                + kh * (1 + jcp.dilate_h);
+            if (ih < 0 || ih >= jcp.ih) {
+                for (int ow = 0; ow < wb; ++ow)
+                    col_[ow] = 0.f;
+            } else {
+                for (int ow = 0; ow < wb; ++ow) {
+                    const int iw = (ow + ws) * jcp.stride_w - jcp.l_pad
+                        + kw * (1 + jcp.dilate_w);
+                    const size_t im_idx = ih * jcp.iw + iw;
+                    if (iw < 0 || iw >= jcp.iw)
+                        col_[ow] = 0.f;
+                    else
+                        col_[ow] = im_[im_idx];
+                }
+            }
         });
     }
 }
 
 /* col[oh][ow][kh][kw][ic] <-- im2col_u8(im[ih][iw][ic]) */
 template <typename T>
-void im2col_u8(jit_gemm_conv_conf_t &jcp, const T *im, uint8_t *col) {
-    parallel_nd(jcp.oh, jcp.ow, [&](int oh, int ow) {
-            for (int kh = 0; kh < jcp.kh; ++kh) {
-                const int ih = oh * jcp.stride_h
-                    - jcp.t_pad + kh * (1 + jcp.dilate_h);
-                if (ih < 0 || ih >= jcp.ih) continue;
+void im2col_u8(const jit_gemm_conv_conf_t &jcp, const T *__restrict im,
+        uint8_t *__restrict col) {
+    uint8_t shift = jcp.signed_input ? 128 : 0;
+    const int dh = 1 + jcp.dilate_h;
+    const int dw = 1 + jcp.dilate_w;
+    const int sh = jcp.stride_h;
+    const int sw = jcp.stride_w;
+    if (sh == 1 && sw == 1 && jcp.oh > 2 * mkldnn_get_max_threads()) {
+        const int ihp = jcp.ih + jcp.t_pad;
+        const int iwp = jcp.iw + jcp.l_pad;
+        const int col_kw_step = jcp.ic;
+        const int col_kh_step = jcp.kw * col_kw_step;
+        const int col_ow_step = jcp.kh * col_kh_step;
+        const int col_oh_step = jcp.ow * col_ow_step;
+        const int im_iw_step = jcp.ngroups * jcp.ic;
+        const int im_ih_step = jcp.iw * im_iw_step;
+
+        const int nb_ic = jcp.ic / 4;
+        const int ic_blocked = nb_ic * 4;
+
+        parallel_nd(jcp.oh, [&](int oh) {
+            const int kh_start = nstl::max(div_up(jcp.t_pad - oh, dh), 0);
+            const int kh_end = nstl::min(div_up(ihp - oh, dh), jcp.kh);
+            const int ih_start = oh - jcp.t_pad + kh_start * dh;
+            const int col_oh_idx = oh * col_oh_step;
+
+            for (int kh = kh_start, ih = ih_start; kh < kh_end; ++kh, ih += dh)
+            {
+                const int col_kh_idx = col_oh_idx + kh * col_kh_step;
+                const int im_kh_idx = ih * im_ih_step;
 
                 for (int kw = 0; kw < jcp.kw; ++kw) {
-                    const int iw = ow * jcp.stride_w
-                        - jcp.l_pad + kw * (1 + jcp.dilate_w);
-                    if (iw < 0 || iw >= jcp.iw) continue;
-
-                    const size_t col_idx = (((oh * jcp.ow + ow) * jcp.kh + kh)
-                            * jcp.kw + kw) * jcp.ic;
-                    const size_t im_idx
-                        = (ih * jcp.iw + iw) * jcp.ngroups * jcp.ic;
-                    PRAGMA_OMP_SIMD()
-                    for (int ic = 0; ic < jcp.ic; ++ic) {
-                        col[col_idx + ic] = jcp.signed_input
-                        ? im[im_idx + ic] + 128
-                        : im[im_idx + ic];
+                    const int ow_start = nstl::max(jcp.l_pad - kw * dw, 0);
+                    const int ow_end = nstl::min(iwp - kw * dw, jcp.ow);
+                    const int iw_start = ow_start - jcp.l_pad + kw * dw;
+                    const int col_kw_idx = col_kh_idx + kw * col_kw_step;
+
+                    const int col_idx_start
+                            = col_kw_idx + ow_start * col_ow_step;
+                    const int im_idx_start = im_kh_idx + iw_start * im_iw_step;
+                    const int col_idx_end = col_kw_idx + ow_end * col_ow_step;
+
+                    // loop by iw and ow
+                    if (nb_ic > 0) {
+                        for (int col_idx = col_idx_start, im_idx = im_idx_start;
+                                col_idx < col_idx_end;
+                                col_idx += col_ow_step, im_idx += im_iw_step) {
+                            for (int icb = 0; icb < 4 * nb_ic; icb += 4) {
+                                PRAGMA_OMP_SIMD()
+                                for (int ic = 0; ic < 4; ++ic) {
+                                    col[col_idx + icb + ic]
+                                            = im[im_idx + icb + ic] + shift;
+                                }
+                            }
+                        }
+                    }
+                    if (ic_blocked != jcp.ic) {
+                        for (int col_idx = col_idx_start, im_idx = im_idx_start;
+                                col_idx < col_idx_end;
+                                col_idx += col_ow_step, im_idx += im_iw_step) {
+                            PRAGMA_OMP_SIMD()
+                            for (int ic = ic_blocked; ic < jcp.ic; ++ic) {
+                                col[col_idx + ic] = im[im_idx + ic] + shift;
+                            }
+                        }
                     }
                 }
             }
-        }
-    );
+        });
+    }
+    else {
+        const size_t col_kh_step = jcp.kw * jcp.ic;
+        const size_t col_ow_step = jcp.kh * col_kh_step;
+        const size_t col_oh_step = jcp.ow * col_ow_step;
+        const size_t im_ih_step = jcp.iw * jcp.ngroups * jcp.ic;
+        const size_t im_iw_step = jcp.ngroups * jcp.ic;
+        const int ih_pad = jcp.ih + jcp.t_pad;
+        const int iw_pad = jcp.iw + jcp.l_pad;
+        parallel_nd(jcp.oh, jcp.ow, [&](int oh, int ow) {
+            const int ihs = oh * sh;
+            const int ihsp = jcp.t_pad - ihs;
+            const int kh_start = nstl::max(div_up(ihsp, dh), 0);
+            const int kh_end = nstl::min(div_up(ih_pad - ihs, dh), jcp.kh);
+            const int ih_start = kh_start * dh - ihsp;
+            const int iws = ow * sw;
+            const int iwsp = jcp.l_pad - iws;
+            const int kw_start = nstl::max(div_up(iwsp, dw), 0);
+            const int kw_end = nstl::min(div_up(iw_pad - iws, dw), jcp.kw);
+            const int iw_start = kw_start * dw - iwsp;
+
+            uint8_t *__restrict col_base
+                    = col + oh * col_oh_step + ow * col_ow_step;
+            for (int kh = kh_start, ih = ih_start; kh < kh_end;
+                    ++kh, ih += dh) {
+                uint8_t *__restrict col_ = col_base + kh * col_kh_step;
+                const T *__restrict im_ = im + ih * im_ih_step;
+
+                for (int kw = kw_start, iw = iw_start; kw < kw_end;
+                    ++kw, iw += dw) {
+
+                    const size_t col_idx = kw * jcp.ic;
+                    const size_t im_idx = iw * im_iw_step;
+                    PRAGMA_OMP_SIMD()
+                        for (int ic = 0; ic < jcp.ic; ++ic) {
+                            col_[col_idx + ic] = im_[im_idx + ic] + shift;
+                        }
+                }
+            }
+        });
+    }
+
 }
-template void im2col_u8<int8_t>(
-        jit_gemm_conv_conf_t &jcp, const int8_t *im, uint8_t *col);
-template void im2col_u8<uint8_t>(
-        jit_gemm_conv_conf_t &jcp, const uint8_t *im, uint8_t *col);
+
+template void im2col_u8<int8_t>(const jit_gemm_conv_conf_t &jcp,
+        const int8_t *__restrict im, uint8_t *__restrict col);
+template void im2col_u8<uint8_t>(const jit_gemm_conv_conf_t &jcp,
+        const uint8_t *__restrict im, uint8_t *__restrict col);
 
 /* im[ih][iw][ic] <-- col2im_s32(col[oh][ow][kh][kw][ic]) */
-void col2im_s32(jit_gemm_conv_conf_t &jcp, const int32_t *col, int32_t *im) {
+void col2im_s32(const jit_gemm_conv_conf_t &jcp, const int32_t *__restrict col,
+        int32_t *__restrict im)
+{
     parallel(0, [&](const int ithr, const int nthr) {
         int h_nthr = nstl::min(jcp.ih, nthr);
         int w_nthr = nstl::min(jcp.iw, nthr / h_nthr);
@@ -250,10 +391,12 @@ void col2im_s32(jit_gemm_conv_conf_t &jcp, const int32_t *col, int32_t *im) {
     });
 }
 
-void col2im_3d(jit_gemm_conv_conf_t &jcp, const float *col, float *im, int od) {
+void col2im_3d(const jit_gemm_conv_conf_t &jcp, const float *col, float *im,
+        int od)
+{
     parallel_nd(jcp.ic, [&](int ic) {
-        const float *col_ = col + (size_t)ic * jcp.ks * jcp.os;
-        float *im_ic = im + (size_t)ic * jcp.ih * jcp.iw * jcp.id;
+        const float *__restrict col_ = col + (size_t)ic * jcp.ks * jcp.os;
+        float *__restrict im_ic = im + (size_t)ic * jcp.ih * jcp.iw * jcp.id;
 
         int id = od * jcp.stride_d - jcp.f_pad;
         for (int kd = 0; kd < jcp.kd; ++kd) {
@@ -263,7 +406,7 @@ void col2im_3d(jit_gemm_conv_conf_t &jcp, const float *col, float *im, int od) {
                 continue;
             }
 
-            float *im_ = im_ic + id * jcp.ih * jcp.iw;
+            float *__restrict im_ = im_ic + id * jcp.ih * jcp.iw;
 
             for (int oh = 0; oh < jcp.oh; ++oh) {
             for (int kh = 0; kh < jcp.kh; ++kh) {
@@ -289,16 +432,14 @@ void col2im_3d(jit_gemm_conv_conf_t &jcp, const float *col, float *im, int od) {
     });
 }
 
-void col2im(
-    jit_gemm_conv_conf_t &jcp, const float *col, float *im) {
-
+void col2im(const jit_gemm_conv_conf_t &jcp, const float *col, float *im) {
     const size_t col_step = jcp.ks * jcp.os;
     const size_t im_step = jcp.ih * jcp.iw;
     const int iS = jcp.ih * jcp.iw;
 
     parallel_nd(jcp.ic, [&](int ic) {
-        float *im_ = im + ic * im_step;
-        const float *col_ = col + ic * col_step;
+        float *__restrict im_ = im + ic * im_step;
+        const float *__restrict col_ = col + ic * col_step;
         PRAGMA_OMP_SIMD()
         for (int is = 0; is < iS; ++is) im_[is] = 0.;
 
@@ -322,18 +463,17 @@ void col2im(
     });
 }
 
-void init_conf(
-    jit_gemm_conv_conf_t &jcp, const convolution_desc_t &cd,
-    const memory_desc_wrapper &src_d, const memory_desc_wrapper &weights_d,
-    const memory_desc_wrapper &dst_d, int max_threads,
-    bool with_relu, float relu_negative_slope) {
-
+status_t init_conf(jit_gemm_conv_conf_t &jcp,
+        memory_tracking::registrar_t &scratchpad, const convolution_desc_t &cd,
+        const memory_desc_wrapper &src_d, const memory_desc_wrapper &weights_d,
+        const memory_desc_wrapper &dst_d, int max_threads) {
     const bool with_groups = weights_d.ndims() == src_d.ndims() + 1;
-    jcp.prop_kind = cd.prop_kind;
     const int ndims = src_d.ndims();
     const int is_1d = ndims == 3;
     const int is_3d = ndims == 5;
 
+    jcp.prop_kind = cd.prop_kind;
+
     jcp.ngroups = with_groups ? weights_d.dims()[0] : 1;
     jcp.mb = src_d.dims()[0];
 
@@ -363,59 +503,198 @@ void init_conf(
     jcp.dilate_w = cd.dilates[ndims - 3];
 
     jcp.src_fmt = src_d.format();
-    jcp.with_bias
-        = cd.bias_desc.format != memory_format::undef
+    jcp.with_bias = cd.bias_desc.format != memory_format::undef
         || cd.diff_bias_desc.format != memory_format::undef;
-    jcp.with_relu = with_relu;
-    jcp.relu_negative_slope = relu_negative_slope;
 
     jcp.is = jcp.ih * jcp.iw;
     jcp.os = jcp.oh * jcp.ow;
     jcp.ks = jcp.kh * jcp.kw * jcp.kd;
 
-    jcp.signed_input = (src_d.data_type() == data_type::s8);
-    jcp.wei_adj_scale = (!jcp.signed_input || mayiuse(avx512_core_vnni))
-            ? 1.0f
-            : (1.0f / 2.0f);
+    jcp.signed_input = src_d.data_type() == data_type::s8;
+    jcp.wei_adj_scale =
+        !jcp.signed_input || mayiuse(avx512_core_vnni) ? 1.f : 0.5f;
+
     jcp.im2col_sz = !everyone_is(true,
             jcp.ow == jcp.iw, jcp.oh == jcp.ih, jcp.od == jcp.id,
             jcp.stride_w == 1, jcp.stride_h == 1, jcp.stride_d == 1,
             jcp.ks == 1, !jcp.signed_input)
-        ? (ptrdiff_t)jcp.ic * jcp.ks * jcp.os
-        : 0;
-
-    bool do_outer_threading = false;
-    bool is_int8_conv
-            = (utils::one_of(cd.src_desc.data_type == u8, cd.src_desc.data_type == s8)
-                    && cd.weights_desc.data_type == s8);
+        ? (ptrdiff_t)jcp.ic * jcp.ks * jcp.os : 0;
+
+    jcp.outer_threading = false;
+    jcp.oh_block = jcp.oh;
+    jcp.ow_block = jcp.ow;
+
+    bool is_int8_conv = utils::one_of(src_d.data_type(), s32, s8, u8)
+        && weights_d.data_type() == s8;
+
+    const int vlen = mayiuse(avx512_common)
+        ? cpu_isa_traits<avx512_common>::vlen
+        : mayiuse(avx)
+            ? cpu_isa_traits<avx>::vlen
+            : mayiuse(sse42) ? cpu_isa_traits<sse42>::vlen : 4;
+    const int simd_w = vlen / (is_int8_conv ? 1 : 4);
+
+    const bool is_bwd_d = jcp.prop_kind == backward_data;
+    const bool is_bwd_w = jcp.prop_kind == backward_weights;
+    const bool is_fwd = !is_bwd_d && !is_bwd_w;
+
+    using namespace memory_tracking::names;
+    //  For threading selection we do:
+    //  1. Rough estimation of efficiency for inner and outer threading.
+    //  2. Gemm size estimation in assumption that it does not work
+    //  so effectively for small sizes.
+    //  64K - this is heuristic gemm size per thread threshold.
+    const int gemm_threshold = 64 * 1024;
     if (is_int8_conv) {
-        bool is_depthwise =
-                utils::everyone_is(1, jcp.ic, jcp.oc) && jcp.ngroups != 1;
-        do_outer_threading
-                = (is_depthwise || (jcp.os / max_threads < 64 && jcp.mb != 1));
+        bool is_depthwise = jcp.ic == 1 && jcp.oc == 1 && jcp.ngroups != 1;
+
+        const int bs = is_fwd ? jcp.os : jcp.is;
+        const int ls = is_fwd ? jcp.oc : jcp.ic;
+        const size_t outer_work_amount = jcp.ngroups * jcp.mb;
+        const float outer_thr_eff = (float)outer_work_amount
+                / rnd_up(outer_work_amount, max_threads);
+        const size_t inner_work_amount
+                = div_up(bs, simd_w) * div_up(ls, simd_w);
+        const float inner_thr_eff = (float)inner_work_amount
+                / rnd_up(inner_work_amount, max_threads);
+        jcp.outer_threading = (is_depthwise
+                || (bs  / max_threads < 64 && jcp.mb != 1))
+            && (outer_thr_eff / inner_thr_eff >= 1.f
+                   || (bs * jcp.ic * jcp.oc) / max_threads < gemm_threshold);
+        jcp.nthr = jcp.outer_threading ? max_threads : 1;
+
+        if (is_fwd) {
+            scratchpad.book(key_conv_gemm_col,
+                    sizeof(int8_t) * jcp.nthr * jcp.im2col_sz);
+            scratchpad.book(key_conv_int_dat_in_acc_dt,
+                    sizeof(int32_t) * jcp.nthr * jcp.os * jcp.oc);
+        } else if (is_bwd_d) {
+            scratchpad.book(key_conv_gemm_col,
+                    sizeof(int32_t) * jcp.nthr * jcp.im2col_sz);
+            scratchpad.book(key_conv_int_dat_in_acc_dt,
+                    sizeof(int32_t) * jcp.nthr * jcp.is * jcp.ic);
+        } else if (is_bwd_w) {
+            assert(!"unimplemented prop_kind");
+            return status::unimplemented;
+        }
     } else {
-        if (utils::one_of(jcp.prop_kind, forward_training, forward_inference))
-            do_outer_threading = jcp.os / max_threads < 512
-                && IMPLICATION(jcp.od == 1, (jcp.mb != 1 || jcp.ngroups > 2));
-        else if (jcp.prop_kind == backward_data)
-            do_outer_threading = (jcp.mb != 1 || jcp.ngroups > 2);
-        else //(jcp.prop_kind == backward_weights)
-            do_outer_threading = jcp.os / max_threads < 256
-                       && (jcp.mb != 1 || jcp.ngroups > 2);
-    }
-    jcp.nthr = do_outer_threading ? max_threads : 1;
-    jcp.need_wei_reduction = mkldnn_thr_syncable()
-        ? (jcp.mb != 1 && jcp.nthr != 1) : false;
-}
+        if (is_fwd) {
+            const int L2 = get_cache_size(2, true) / sizeof(float);
+            const int wei_size = jcp.oc * jcp.ic * jcp.kh * jcp.kw;
+
+            // It makes sense to try blocking for some special cases:
+            // when weights size is small and we have to do im2col
+            if (wei_size < L2/2 && jcp.im2col_sz && jcp.id == 1 && jcp.od == 1) {
+                // looking for oh and ow blocking
+                int h_block{ jcp.oh }, w_block{ jcp.ow };
+                // 1. cache requirement
+                // !!! used memory (assuming strides = 1 and dilate = 0 etc):
+                const int row_size = jcp.ic * jcp.kh * jcp.kw * jcp.ow
+                    + 2 * jcp.ic * jcp.iw + 2 * jcp.oc * jcp.ow;
+                h_block = nstl::max(
+                    1, nstl::min(jcp.oh, div_up(L2 - wei_size, row_size)));
+                if (h_block == 1) {
+                    const int col_size = jcp.ic * jcp.kh * jcp.kw + 2 * jcp.ic
+                        + 2 * jcp.oc;
+                    w_block = nstl::max(
+                        1, nstl::min(jcp.ow, div_up(L2 - wei_size, col_size)));
+                }
 
-status_t prepare_scratchpad(jit_gemm_conv_conf_t &jcp,
-                scratchpad_t **scratchpad_, size_t size, const int nthr) {
-    if (size > 0) {
-        *scratchpad_ = create_scratchpad(nthr * size);
-        if (*scratchpad_ == nullptr) return status::out_of_memory;
-    } else {
-        *scratchpad_ = nullptr;
+                // 2. threading requirement
+                if (h_block != jcp.oh)
+                    h_block = nstl::max(1, rnd_dn(h_block, 4));
+                if (w_block != jcp.ow)
+                    w_block = nstl::max(1, rnd_dn(w_block, simd_w));
+
+                float thr_eff = 0.f;
+                float thr_eff_treshold = 0.9f;
+                if (w_block == jcp.ow) {
+                    do {
+                        int nb_oh = div_up(jcp.oh, h_block);
+                        size_t work = jcp.ngroups * jcp.mb * jcp.od * nb_oh;
+                        float disb = (float)jcp.oh / rnd_up(jcp.oh, h_block);
+                        thr_eff = (float)work
+                            / rnd_up(work, max_threads);
+                        thr_eff = (thr_eff + disb) / 2.f;
+                        if (thr_eff >= thr_eff_treshold)
+                            break;
+                        h_block = rnd_dn(h_block - 4, 4);
+                    } while (h_block > 0);
+                }
+                if (thr_eff < thr_eff_treshold) // we didn't find suitable h_block
+                {
+                    h_block = 1;
+                    int nb_oh = jcp.oh;
+                    do {
+                        int nb_ow = div_up(jcp.ow, w_block);
+                        size_t work_amount
+                            = jcp.ngroups * jcp.mb * jcp.od * nb_oh * nb_ow;
+                        float disb = (float)jcp.ow / rnd_up(jcp.ow, w_block);
+                        thr_eff = (float)work_amount
+                            / rnd_up(work_amount, max_threads);
+                        thr_eff = (thr_eff + disb) / 2.f;
+                        if (thr_eff > thr_eff_treshold)
+                            break;
+                        w_block = rnd_dn(w_block - simd_w, simd_w);
+                    } while (w_block > 0);
+                }
+                const size_t inner_work_amount
+                    = div_up(jcp.os, simd_w) * div_up(jcp.oc, simd_w);
+                const float inner_thr_eff = (float)inner_work_amount
+                    / rnd_up(inner_work_amount, max_threads);
+                if (thr_eff >= inner_thr_eff / 2 && h_block > 0 && w_block > 0) {
+                    jcp.oh_block = h_block;
+                    jcp.ow_block = w_block;
+                    jcp.outer_threading = true;
+                }
+                // updating jcp.im2col_sz
+                if (jcp.oh_block != 1)
+                    jcp.ow_block = jcp.ow;
+                jcp.im2col_sz
+                    = (ptrdiff_t)jcp.ic * jcp.ks * jcp.oh_block * jcp.ow_block;
+            } else {
+                const size_t outer_work_amount = jcp.ngroups * jcp.mb * jcp.od;
+                const float outer_thr_eff = (float)outer_work_amount
+                        / rnd_up(outer_work_amount, max_threads);
+                const size_t inner_work_amount
+                        = div_up(jcp.os, simd_w) * div_up(jcp.oc, simd_w);
+                const float inner_thr_eff = (float)inner_work_amount
+                        / rnd_up(inner_work_amount, max_threads);
+                jcp.outer_threading = jcp.os / max_threads < 512
+                    && IMPLICATION(jcp.od == 1, jcp.mb != 1 || jcp.ngroups > 2)
+                    && (outer_thr_eff / inner_thr_eff >= 1.f
+                      || (jcp.os * jcp.ic * jcp.oc) / max_threads < gemm_threshold);
+            }
+        } else if (is_bwd_d) {
+            const size_t outer_work_amount = jcp.ngroups * jcp.mb;
+            const float outer_thr_eff = (float)outer_work_amount
+                / rnd_up(outer_work_amount, max_threads);
+            const size_t inner_work_amount
+                = div_up(jcp.is, simd_w) * div_up(jcp.ic, simd_w);
+            const float inner_thr_eff = (float)inner_work_amount
+                / rnd_up(inner_work_amount, max_threads);
+            jcp.outer_threading = (jcp.os / max_threads < 512 || jcp.ks < 64)
+                && (jcp.mb != 1 || jcp.ngroups > 2)
+                && (outer_thr_eff / inner_thr_eff >= 1.f
+                  || (jcp.os * jcp.ic * jcp.oc) / max_threads < gemm_threshold);
+        } else if (is_bwd_w)
+            jcp.outer_threading = jcp.os / max_threads < 256
+                && (jcp.mb != 1 || jcp.ngroups > 2);
+
+        jcp.nthr = jcp.outer_threading ? max_threads : 1;
+
+        scratchpad.book(key_conv_gemm_col,
+                sizeof(float) * jcp.nthr * jcp.im2col_sz);
+
+        if (is_bwd_w) {
+            jcp.need_wei_reduction = mkldnn_thr_syncable()
+                ? jcp.mb != 1 && jcp.nthr != 1 : false;
+
+            scratchpad.book(key_conv_wei_reduction,
+                    sizeof(float) * jcp.nthr * jcp.ngroups * weights_d.size());
+        }
     }
+
     return status::success;
 }
 
@@ -431,8 +710,9 @@ void bwd_weights_balance(int ithr, int nthr, int ngroups, int mb, int &ithr_g,
     }
 }
 
-void bwd_weights_reduction_par(int ithr, int nthr, const jit_gemm_conv_conf_t &jcp,
-        const float *weights_reduce_ws, float *weights) {
+void bwd_weights_reduction_par(int ithr, int nthr,
+        const jit_gemm_conv_conf_t &jcp, const float *weights_reduce_ws,
+        float *weights) {
     const size_t weights_g_size = jcp.ic * jcp.oc * jcp.ks;
 
     size_t weights_start{0}, weights_end{0};
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution_utils.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution_utils.hpp
index c2ebc4550..1bcfcc35f 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution_utils.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_convolution_utils.hpp
@@ -18,11 +18,12 @@
 #define CPU_JIT_GEMM_CONVOLUTION_UTILS_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+#include "mkldnn_thread.hpp"
+
 #include "cpu_convolution_pd.hpp"
 #include "cpu_engine.hpp"
 #include "jit_primitive_conf.hpp"
-#include "mkldnn_thread.hpp"
-#include "scratchpad.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -30,32 +31,32 @@ namespace cpu {
 
 namespace jit_gemm_convolution_utils {
 
-    void im2col_3d(jit_gemm_conv_conf_t &jcp, const float *im, float *col,
+void im2col_3d(const jit_gemm_conv_conf_t &jcp, const float *im, float *col,
         int od);
-    void im2col(jit_gemm_conv_conf_t &jcp, const float *im, float *col);
-    template <typename T>
-    void im2col_u8(jit_gemm_conv_conf_t &jcp, const T *im, uint8_t *col);
+void im2col(const jit_gemm_conv_conf_t &jcp, const float *__restrict im,
+       float *__restrict col, int hs, int hb, int ws, int wb);
+template <typename T>
+void im2col_u8(const jit_gemm_conv_conf_t &jcp, const T *__restrict im,
+        uint8_t *__restrict col);
 
-    void col2im_s32(jit_gemm_conv_conf_t &jcp, const int32_t *col, int32_t *im);
-    void col2im_3d(jit_gemm_conv_conf_t &jcp, const float *col, float *im,
+void col2im_s32(const jit_gemm_conv_conf_t &jcp, const int32_t *__restrict col,
+        int32_t *__restrict im);
+void col2im_3d(const jit_gemm_conv_conf_t &jcp, const float *col, float *im,
         int od);
-    void col2im(jit_gemm_conv_conf_t &jcp, const float *col, float *im);
+void col2im(const jit_gemm_conv_conf_t &jcp, const float *col, float *im);
 
-    void init_conf(jit_gemm_conv_conf_t &jcp,
-        const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
-        const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d,
-        int max_threads, bool with_relu = false, float relu_negative_slope = -1.0);
+status_t init_conf(jit_gemm_conv_conf_t &jcp,
+        memory_tracking::registrar_t &scratchpad, const convolution_desc_t &cd,
+        const memory_desc_wrapper &src_d, const memory_desc_wrapper &weights_d,
+        const memory_desc_wrapper &dst_d, int max_threads);
 
-    status_t prepare_scratchpad(jit_gemm_conv_conf_t &jcp,
-                scratchpad_t **col_scratchpad_, size_t size, const int nthr);
-
-    void bwd_weights_balance(int ithr, int nthr,
-        int ngroups, int mb, int &ithr_g, int &nthr_g, int &ithr_mb,
-            int &nthr_mb);
-    void bwd_weights_reduction_par(int ithr, int nthr,
+void bwd_weights_balance(int ithr, int nthr, int ngroups, int mb,
+        int &ithr_g, int &nthr_g, int &ithr_mb, int &nthr_mb);
+void bwd_weights_reduction_par(int ithr, int nthr,
         const jit_gemm_conv_conf_t &jcp, const float *weights_reduce_ws,
-            float *weights);
-};
+        float *weights);
+
+}
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_inner_product.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_inner_product.cpp
index d9a8fe5d3..7f62c6b89 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_inner_product.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_inner_product.cpp
@@ -31,20 +31,20 @@ using namespace mkldnn::impl::memory_format;
 using namespace mkldnn::impl::primitive_kind;
 
 template <impl::data_type_t data_type>
-void gemm_inner_product_fwd_t<data_type>::execute_forward() {
+void gemm_inner_product_fwd_t<data_type>::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t*>(this->memory());
 
-    const int MB = conf_.MB();
-    const int OC = conf_.OC();
-    const int IC = conf_.IC_total_padded();
+    const int MB = pd()->MB();
+    const int OC = pd()->OC();
+    const int IC = pd()->IC_total_padded();
 
-    bool wei_tr = !utils::one_of(conf_.weights_pd()->desc()->format,
+    bool wei_tr = !utils::one_of(pd()->weights_pd()->desc()->format,
              hwio, dhwio, io);
 
-    const auto &post_ops = conf_.attr()->post_ops_;
+    const auto &post_ops = pd()->attr()->post_ops_;
     const bool do_relu = post_ops.len_ == 1;
 
     float alpha = 1.0, beta = 0.0;
@@ -62,16 +62,16 @@ void gemm_inner_product_fwd_t<data_type>::execute_forward() {
 }
 
 template <impl::data_type_t data_type>
-void gemm_inner_product_bwd_data_t<data_type>::execute_backward_data() {
+void gemm_inner_product_bwd_data_t<data_type>::execute_backward_data() const {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t*>(this->memory());
 
-    const int MB = conf_.MB();
-    const int OC = conf_.OC();
-    const int IC = conf_.IC_total_padded();
+    const int MB = pd()->MB();
+    const int OC = pd()->OC();
+    const int IC = pd()->IC_total_padded();
 
-    bool wei_tr = utils::one_of(conf_.weights_pd()->desc()->format,
+    bool wei_tr = utils::one_of(pd()->weights_pd()->desc()->format,
              hwio, dhwio, io);
 
     float alpha = 1.0, beta = 0.0;
@@ -80,22 +80,22 @@ void gemm_inner_product_bwd_data_t<data_type>::execute_backward_data() {
 }
 
 template <impl::data_type_t data_type>
-void gemm_inner_product_bwd_weights_t<data_type>::execute_backward_weights() {
+void gemm_inner_product_bwd_weights_t<data_type>::execute_backward_weights() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_weights = reinterpret_cast<data_t *>(this->memory(0));
     auto diff_bias = reinterpret_cast<data_t *>(this->memory(1));
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_bias_d(conf_.diff_weights_pd(1));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_bias_d(pd()->diff_weights_pd(1));
 
     diff_dst += diff_dst_d.blocking_desc().offset_padding;
 
-    const int MB = conf_.MB();
-    const int OC = conf_.OC();
-    const int IC = conf_.IC_total_padded();
+    const int MB = pd()->MB();
+    const int OC = pd()->OC();
+    const int IC = pd()->IC_total_padded();
 
-    bool wei_tr = utils::one_of(conf_.diff_weights_pd()->desc()->format,
+    bool wei_tr = utils::one_of(pd()->diff_weights_pd()->desc()->format,
              hwio, dhwio, io);
 
     float alpha = 1.0, beta = 0.0;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_inner_product.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_inner_product.hpp
index 6e7806eaa..dcd9041f1 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_inner_product.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_inner_product.hpp
@@ -64,19 +64,19 @@ struct gemm_inner_product_fwd_t: public cpu_primitive_t {
         }
     };
 
-    gemm_inner_product_fwd_t(const pd_t *pd, const input_vector &inputs,
+    gemm_inner_product_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t data_type>
@@ -108,19 +108,19 @@ struct gemm_inner_product_bwd_data_t: public cpu_primitive_t {
         }
     };
 
-    gemm_inner_product_bwd_data_t(const pd_t *pd, const input_vector &inputs,
+    gemm_inner_product_bwd_data_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward_data();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward_data();
-    pd_t conf_;
+    void execute_backward_data() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t data_type>
@@ -152,19 +152,19 @@ struct gemm_inner_product_bwd_weights_t: public cpu_primitive_t {
         }
     };
 
-    gemm_inner_product_bwd_weights_t(const pd_t *pd, const input_vector &inputs,
+    gemm_inner_product_bwd_weights_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward_weights();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward_weights();
-    pd_t conf_;
+    void execute_backward_weights() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_u8s8s32x_inner_product.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_u8s8s32x_inner_product.cpp
deleted file mode 100644
index eb902a138..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_u8s8s32x_inner_product.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/*******************************************************************************
-* Copyright 2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#include "mkldnn_types.h"
-#include "mkldnn_thread.hpp"
-#include "simple_q10n.hpp"
-#include "gemm_u8s8s32x_inner_product.hpp"
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-using namespace math;
-using namespace memory_format;
-
-template <data_type_t dst_type>
-void gemm_u8s8s32x_inner_product_fwd_t<dst_type>::execute_forward() {
-#if USE_MKL_IGEMM
-    auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
-    auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
-    auto bias = reinterpret_cast<const char *>(this->input_memory(2));
-    auto dst = reinterpret_cast<dst_data_t *>(this->memory());
-
-    const int MB = conf_.MB();
-    const int OC = conf_.OC();
-
-    bool wei_tr = utils::one_of(conf_.weights_pd()->desc()->format,
-             oihw, oidhw, oi);
-
-    const int M = OC;
-    const int N = MB;
-    const int K = conf_.IC_total_padded();
-    const int8_t off_a = 0, off_b = 0;
-    const int32_t off_c = 0;
-
-    const int scale_idx_mult = conf_.attr()->output_scales_.mask_ == (1 << 1);
-    const float *scales = conf_.attr()->output_scales_.scales_;
-    const auto rmode = conf_.attr()->round_mode_;
-
-    const auto &post_ops = conf_.attr()->post_ops_;
-    const bool do_relu = post_ops.len_ == 1;
-    const float nslope = do_relu ? post_ops.entry_[0].eltwise.alpha : 0.f;
-
-    acc_data_t *acc = this->dst_is_acc_
-        ? (acc_data_t *)dst
-        : (acc_data_t *)this->scratchpad_->get();
-
-    auto get_bias = [=, &bias](size_t off) -> acc_data_t {
-#       define CASE(dt) case dt: return (acc_data_t)\
-        (*((const prec_traits<dt>::type *)bias + off))
-        switch (conf_.desc()->bias_desc.data_type) {
-        CASE(data_type::s8);
-        CASE(data_type::u8);
-        CASE(data_type::s32);
-        CASE(data_type::f32);
-        default: assert(!"unimplemented");
-        }
-#       undef CASE
-        return 0;
-    };
-
-    cblas_gemm_s8u8s32(CblasColMajor, wei_tr ? CblasTrans : CblasNoTrans,
-            CblasNoTrans, CblasFixOffset, M, N, K, 1., weights,
-            wei_tr ? K : M, off_a, src, K, off_b, 0., acc, M, &off_c);
-
-    parallel_nd(MB, OC, [&](int mb, int oc) {
-        size_t dst_off = mb * OC + oc;
-        float d = (float)acc[dst_off];
-        if (bias)
-            d += get_bias(oc);
-        d *= scales[oc * scale_idx_mult];
-        if (do_relu && d < 0)
-            d *= nslope;
-        dst[dst_off] = qz_a1b0<float, dst_data_t>()(d, rmode);
-    });
-#endif
-}
-
-using namespace data_type;
-
-template struct gemm_u8s8s32x_inner_product_fwd_t<f32>;
-template struct gemm_u8s8s32x_inner_product_fwd_t<s32>;
-template struct gemm_u8s8s32x_inner_product_fwd_t<s8>;
-template struct gemm_u8s8s32x_inner_product_fwd_t<u8>;
-}
-}
-}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_convolution.cpp
index 551262606..d9b820540 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_convolution.cpp
@@ -32,99 +32,547 @@ namespace cpu {
 
 using namespace mkldnn::impl::utils;
 using namespace mkldnn::impl::math;
+using namespace mkldnn::impl::memory_tracking::names;
 
-template <bool with_relu, data_type_t src_type, data_type_t dst_type>
-void _gemm_x8s8s32x_convolution_fwd_t<with_relu, src_type,
-        dst_type>::execute_forward() {
+template <data_type_t src_type, data_type_t dst_type>
+void _gemm_x8s8s32x_convolution_fwd_t<src_type, dst_type>::
+execute_forward() const {
     auto src_base = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto wei_base = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bia_base = reinterpret_cast<const char *>(this->input_memory(2));
     auto dst_base = reinterpret_cast<dst_data_t *>(this->memory());
 
-    jit_gemm_conv_conf_t &jcp = this->conf_.jcp_;
+    auto scratchpad = this->scratchpad();
 
-    char *scratchpad = (char *)this->scratchpad_->get();
-    uint8_t *col = (uint8_t *)scratchpad;
+    const jit_gemm_conv_conf_t &jcp = this->pd()->jcp_;
+
+    auto col = scratchpad.template get<uint8_t>(key_conv_gemm_col);
     parallel_nd(jcp.im2col_sz * jcp.nthr, [&](ptrdiff_t i) {
         col[i] = jcp.signed_input ? (uint8_t)128 : (uint8_t)0;
     });
 
     parallel(jcp.nthr, [&](const int ithr, const int nthr) {
-        execute_forward_thr(ithr, nthr, src_base, wei_base, bia_base,
-                dst_base, scratchpad);
+        execute_forward_thr(ithr, nthr, src_base, wei_base, bia_base, dst_base,
+                scratchpad);
     });
 }
 
-template <bool with_relu, data_type_t src_type, data_type_t dst_type>
-void _gemm_x8s8s32x_convolution_fwd_t<with_relu, src_type,
-        dst_type>::execute_forward_thr(const int ithr, const int nthr,
-        const src_data_t *src_base, const wei_data_t *wei_base,
-        const char *bia_base, dst_data_t *dst_base, char *scratchpad) {
-#if USE_MKL_IGEMM
-    jit_gemm_conv_conf_t &jcp = this->conf_.jcp_;
+template <data_type_t src_type, data_type_t dst_type>
+_gemm_x8s8s32x_convolution_fwd_t<src_type, dst_type>::pp_ker_t::pp_ker_t(
+    const pd_t *pd)
+    : ker_(nullptr)
+    , jcp_(pd->jcp_)
+    , OC_(pd->jcp_.oc)
+    , OS_(pd->jcp_.os)
+    , bias_data_type_(data_type::undef)
+    , bias_data_type_size_(0)
+    , scale_idx_mult_(0)
+    , rmode_(round_mode::nearest)
+    , do_bias_(false)
+    , do_relu_(false)
+    , do_sum_(false)
+{
+    using namespace types;
 
-    const auto src_md = memory_desc_wrapper(conf_.src_pd());
-    const size_t src_mb_stride = src_md.blk_off(1);
-    const size_t src_g_stride = src_md.blk_off(0, 1) * jcp.ic;
+    const auto dst_md = memory_desc_wrapper(pd->dst_pd());
+    dst_os_stride_ = dst_md.blk_off(0, 0, 0, 1);
 
-    const auto wei_md = memory_desc_wrapper(conf_.weights_pd(0));
-    const size_t wei_g_stride = conf_.with_groups() ? wei_md.blk_off(1) : 0;
+    scale_idx_mult_ = (pd->attr()->output_scales_.mask_ == (1 << 1));
+    rmode_ = pd->attr()->round_mode_;
 
-    const auto dst_md = memory_desc_wrapper(conf_.dst_pd());
-    const size_t dst_mb_stride = dst_md.blk_off(1);
-    const size_t dst_g_stride = dst_md.blk_off(0, 1) * jcp.oc;
-    const size_t dst_os_stride = dst_md.blk_off(0, 0, 0, 1);
-
-    auto get_bias = [=, &bia_base](size_t off) -> acc_data_t {
-#       define CASE(dt) case dt: return (acc_data_t)\
-        (*((const prec_traits<dt>::type *)bia_base + off))
-        switch (conf_.cdesc()->bias_desc.data_type) {
-        CASE(data_type::s8);
-        CASE(data_type::u8);
-        CASE(data_type::s32);
-        CASE(data_type::f32);
+    auto &post_ops = pd->attr()->post_ops_;
+
+    int entry_idx = -1;
+    for (int idx = 0; idx < post_ops.len_; ++idx) {
+        const auto &e = post_ops.entry_[idx];
+        if (e.is_relu(true, false)) {
+            entry_idx = idx;
+            break;
+        }
+    }
+    do_relu_ = entry_idx >= 0;
+
+    do_signed_scaling_ = jcp_.signed_input;
+
+    do_sum_ = post_ops.contain(primitive_kind::sum, 0);
+    do_bias_ = pd->with_bias();
+    bias_data_type_ = pd->desc()->bias_desc.data_type;
+    if (do_bias_) {
+        assert(bias_data_type_ != data_type::undef);
+        bias_data_type_size_ = data_type_size(bias_data_type_);
+    }
+    const size_t vlen_start
+            = cpu_isa_traits<avx512_common>::vlen / sizeof(float);
+
+    for (size_t i = vlen_start; i > 0; i--) {
+        if (OC_ % i == 0) {
+            vlen_ = i;
+            break;
+        }
+    }
+
+    if (!mayiuse(avx512_core))
+        // use fallback code for older CPUs
+        return;
+    else
+        generate();
+}
+
+template <data_type_t src_type, data_type_t dst_type>
+void _gemm_x8s8s32x_convolution_fwd_t<src_type, dst_type>::pp_ker_t::generate()
+{
+    using namespace Xbyak;
+    using namespace utils;
+    using namespace round_mode;
+
+    // TODO: clean-up
+    Reg64 reg_param = abi_param1;
+    Reg64 reg_dst = rdx;
+    Reg64 reg_acc = rax;
+    Reg64 reg_bias = rbx;
+    Reg64 reg_scales = rsi;
+
+    Reg64 reg_len = r8;
+    Reg64 reg_tmp = rcx; // intentional for shifting purposes
+    Reg64 reg_oc_offset = r9;
+    Reg64 reg_rem_mask_short = r10;
+    Reg64 reg_rem_mask_vlen = r11;
+    Opmask kreg_rem_mask_short = k1;
+    Opmask kreg_rem_mask_vlen = k3;
+    Opmask kreg_relu_cmp = k2;
+
+    const size_t vlen = 4;
+
+    Zmm vreg_zero = Zmm(0);
+    Zmm vreg_scale = Zmm(1);
+    Zmm vreg_nslope = Zmm(2);
+    Zmm vreg_sum_scale = Zmm(3);
+    Zmm vreg_signed_scale = Zmm(4);
+
+    size_t def_unroll = 4;
+    size_t max_unroll = 12;
+    size_t zmm_step = 2;
+    if (do_sum_) {
+        max_unroll = 8;
+        zmm_step = 3;
+    }
+
+    auto vreg_dst = [&](int idx) {
+        return Zmm(5 + idx * zmm_step + 0);
+    };
+    auto vreg_bias = [&](int idx) {
+        return Zmm(5 + idx * zmm_step + 1);
+    };
+    auto vreg_prev_dst = [&](int idx) {
+        return Zmm(5 + idx * zmm_step + 2);
+    };
+
+    preamble();
+
+#define PARAM_OFF(x) offsetof(ker_args, x)
+    mov(reg_dst, ptr[reg_param + PARAM_OFF(dst)]);
+    mov(reg_acc, ptr[reg_param + PARAM_OFF(acc)]);
+    mov(reg_bias, ptr[reg_param + PARAM_OFF(bias)]);
+    mov(reg_scales, ptr[reg_param + PARAM_OFF(scales)]);
+    mov(reg_len, ptr[reg_param + PARAM_OFF(len)]);
+    mov(reg_oc_offset, ptr[reg_param + PARAM_OFF(oc_offset)]);
+    vbroadcastss(vreg_nslope, ptr[reg_param + PARAM_OFF(nslope)]);
+    vbroadcastss(vreg_sum_scale, ptr[reg_param + PARAM_OFF(sum_scale)]);
+    vbroadcastss(vreg_signed_scale, ptr[reg_param + PARAM_OFF(signed_scale)]);
+    if (scale_idx_mult_ == 0)
+        vbroadcastss(vreg_scale, dword[reg_scales]);
+
+#undef PARAM_OFF
+
+    mov(reg_rem_mask_vlen, 1);
+    shl(reg_rem_mask_vlen, vlen);
+    sub(reg_rem_mask_vlen, 1);
+    kmovq(kreg_rem_mask_vlen, reg_rem_mask_vlen);
+
+    if (do_relu_ || dst_type == data_type::u8)
+        vxorps(vreg_zero, vreg_zero, vreg_zero);
+
+    // Load accumulated value, convert to float, apply sum (if any),
+    // bias (if any), scaling, and relu (if any);
+    // then convert to destination type and store
+    auto compute = [&](size_t offset, int idx, bool apply_mask) {
+        auto acc_addr = ptr[reg_acc + offset * sizeof(acc_data_t)];
+
+        if (scale_idx_mult_ > 0) {
+            assert(scale_idx_mult_ == 1);
+            auto scale_addr = ptr[reg_scales + offset * sizeof(float)];
+            auto vreg_scale_ = vreg_scale;
+            if (apply_mask)
+                vreg_scale_ = vreg_scale_ | kreg_rem_mask_short;
+            else
+                vreg_scale_ = vreg_scale_ | kreg_rem_mask_vlen;
+            vmovups(vreg_scale_, scale_addr);
+        }
+
+        auto vreg_dst_ = vreg_dst(idx);
+        if (apply_mask)
+            vreg_dst_ = vreg_dst_ | kreg_rem_mask_short;
+        else
+            vreg_dst_ = vreg_dst_ | kreg_rem_mask_vlen;
+        vcvtdq2ps(vreg_dst_, acc_addr);
+
+        if (do_signed_scaling_)
+            vmulps(vreg_dst(idx), vreg_dst(idx), vreg_signed_scale);
+
+        if (do_bias_) {
+            auto bias_addr = ptr[reg_bias + offset * bias_data_type_size_];
+            auto vreg_bias_ = vreg_bias(idx);
+            if (apply_mask)
+                vreg_bias_ = vreg_bias_ | kreg_rem_mask_short;
+            else
+                vreg_bias_ = vreg_bias_ | kreg_rem_mask_vlen;
+
+            switch (bias_data_type_) {
+            case data_type::s8:
+                vpmovsxbd(vreg_bias_, bias_addr);
+                break;
+            case data_type::u8:
+                vpmovzxbd(vreg_bias_, bias_addr);
+                break;
+            case data_type::s32:
+                vcvtdq2ps(vreg_bias_, bias_addr);
+                break;
+            case data_type::f32:
+                vmovups(vreg_bias_, bias_addr);
+                break;
+            default: assert(!"unimplemented");
+            }
+            vaddps(vreg_dst(idx), vreg_dst(idx), vreg_bias(idx));
+        }
+
+        vmulps(vreg_dst(idx), vreg_dst(idx), vreg_scale);
+
+        auto dst_addr = ptr[reg_dst + offset * sizeof(dst_data_t)];
+
+        if (do_sum_)
+        {
+            auto vreg_prev_dst_ = vreg_prev_dst(idx);
+            if (apply_mask)
+                vreg_prev_dst_ = vreg_prev_dst_ | kreg_rem_mask_short;
+            else
+                vreg_prev_dst_ = vreg_prev_dst_ | kreg_rem_mask_vlen;
+
+            switch (dst_type) {
+            case data_type::f32:
+            case data_type::s32: vmovups(vreg_prev_dst_, dst_addr); break;
+            case data_type::s8: vpmovsxbd(vreg_prev_dst_, dst_addr); break;
+            case data_type::u8: vpmovzxbd(vreg_prev_dst_, dst_addr); break;
+            default: assert(!"unsupported data type");
+            }
+            if (dst_type != data_type::f32)
+                vcvtdq2ps(vreg_prev_dst(idx), vreg_prev_dst(idx));
+
+            vfmadd231ps(vreg_dst(idx), vreg_prev_dst(idx), vreg_sum_scale);
+        }
+
+        if (do_relu_) {
+            vcmpps(kreg_relu_cmp, vreg_dst(idx), vreg_zero, _cmp_lt_os);
+            vmulps(vreg_dst(idx) | kreg_relu_cmp, vreg_dst(idx), vreg_nslope);
+        }
+
+        if (dst_type != data_type::f32) {
+            auto rmode_control = (rmode_ == nearest ? T_rn_sae : T_rd_sae);
+            vcvtps2dq(vreg_dst(idx) | rmode_control, vreg_dst(idx));
+        }
+
+        if (dst_type == data_type::u8)
+            vpmaxsd(vreg_dst(idx), vreg_dst(idx), vreg_zero);
+
+        switch (dst_type) {
+        case data_type::s8:
+            vpmovsdb(dst_addr, vreg_dst_);
+            break;
+        case data_type::u8:
+            vpmovusdb(dst_addr, vreg_dst_);
+            break;
+        case data_type::f32:
+        case data_type::s32:
+            vmovups(dst_addr, vreg_dst_);
+            break;
         default: assert(!"unimplemented");
         }
-#       undef CASE
-        return 0;
     };
 
-    /* scale_idx_mult = 1 for per_oc scales and 0, otherwise */
-    const int scale_idx_mult = conf_.attr()->output_scales_.mask_ == (1 << 1);
-    const float *scales = conf_.attr()->output_scales_.scales_;
+    // Advance all pointers by an immediate
+    auto advance_ptrs_imm = [&](size_t offset) {
+        add(reg_dst, offset * sizeof(dst_data_t));
+        add(reg_acc, offset * sizeof(acc_data_t));
+        if (scale_idx_mult_) {
+            assert(scale_idx_mult_ == 1);
+            add(reg_scales, offset * sizeof(float));
+        }
+        if (do_bias_)
+            add(reg_bias, offset * bias_data_type_size_);
+    };
+
+    // Advance all pointers by a value stored in a register
+    auto advance_ptrs_reg = [&](Reg64 offset) {
+        lea(reg_dst, ptr[reg_dst + offset * sizeof(dst_data_t)]);
+        lea(reg_acc, ptr[reg_acc + offset * sizeof(acc_data_t)]);
+        if (scale_idx_mult_) {
+            assert(scale_idx_mult_ == 1);
+            lea(reg_scales, ptr[reg_scales + offset * sizeof(float)]);
+        }
+        if (do_bias_)
+            lea(reg_bias, ptr[reg_bias + offset * bias_data_type_size_]);
+    };
+
+    // Rewind pointers that point to data that is indexed by output channel
+    // (bias or per-oc scaling factors)
+    auto rewind_ptrs = [&]() {
+        if (do_bias_)
+            sub(reg_bias, OC_ * bias_data_type_size_);
+        if (scale_idx_mult_) {
+            assert(scale_idx_mult_ == 1);
+            sub(reg_scales, OC_ * sizeof(float));
+        }
+        add(reg_dst, (dst_os_stride_ - OC_) * sizeof(dst_data_t));
+    };
+
+    //                    <--------- OC --------------->
+    //
+    // ^  ................+..............+-------------+.......................
+    // |  .               : not accessed |Prologue loop|                      .
+    // |  .               +--------------+-------------+                      .
+    //    .               |                            |                      .
+    // O  .               |  Main loop (unrolled)      |                      .
+    // S  .               |                            |                      .
+    //    .               +--------------+-------------+                      .
+    // |  .               | Epilogue loop|not accessed :                      .
+    // v  ................+--------------+.............+.......................
+
+    Label prologue_end;
+    cmp(reg_oc_offset, 0);
+    je(prologue_end, T_NEAR);
+
+    // Prologue loop
+    {
+        mov(reg_tmp, OC_);
+        sub(reg_tmp, reg_oc_offset);
+        cmp(reg_tmp, reg_len);
+        cmovg(reg_tmp, reg_len);
+        sub(reg_len, reg_tmp);
+
+        Label prologue_loop, prologue_loop_tail, prologue_loop_end;
+        cmp(reg_tmp, vlen);
+        jle(prologue_loop_tail, T_NEAR);
+        L(prologue_loop); {
+            compute(0, 0, false);
+            advance_ptrs_imm(vlen);
+            sub(reg_tmp, vlen);
+            cmp(reg_tmp, vlen);
+            jge(prologue_loop, T_NEAR);
+        }
+
+        L(prologue_loop_tail);
+        mov(reg_rem_mask_short, 1);
+        // cl == reg_tmp because reg_tmp <= vlen here
+        shl(reg_rem_mask_short, cl);
+        sub(reg_rem_mask_short, 1);
+        jz(prologue_loop_end, T_NEAR);
+
+        kmovq(kreg_rem_mask_short, reg_rem_mask_short);
+        compute(0, 0, true);
+        advance_ptrs_reg(reg_tmp);
 
-    const auto rmode = conf_.attr()->round_mode_;
+        L(prologue_loop_end);
+        rewind_ptrs();
+    }
+    L(prologue_end);
+
+    // Main loop
+    Label main_loop_end;
+    {
+        cmp(reg_len, OC_);
+        jle(main_loop_end, T_NEAR);
+
+        Label main_loop;
+        L(main_loop); {
+            size_t OC_loop, OC_tail;
+            if (OC_ < max_unroll * vlen) {
+                // Fully unroll small loops
+                OC_loop = 0;
+                OC_tail = OC_;
+            }
+            else {
+                OC_loop = vlen * def_unroll;
+                OC_tail = OC_ % OC_loop;
+            }
+
+            assert(!!OC_loop || !!OC_tail);
+
+            if (OC_tail % vlen) {
+                int vlen_tail = OC_tail % vlen;
+                unsigned tail_mask = (1 << vlen_tail) - 1;
+                mov(reg_tmp, tail_mask);
+                kmovq(kreg_rem_mask_short, reg_tmp);
+            }
+
+            if (OC_loop) {
+                mov(reg_tmp, rnd_dn(OC_, OC_loop));
+                Label oc_loop;
+                L(oc_loop); {
+                    for (size_t offset = 0; offset < OC_loop; offset += vlen)
+                        compute(offset, offset / vlen, false);
+                    advance_ptrs_imm(OC_loop);
+                    sub(reg_tmp, OC_loop);
+                    jnz(oc_loop);
+                }
+            }
+
+            if (OC_tail) {
+                for (size_t offset = 0; offset < OC_tail; offset += vlen) {
+                    bool use_mask = (offset + vlen) > OC_tail;
+                    compute(offset, offset / vlen, use_mask);
+                }
+                advance_ptrs_imm(OC_tail);
+            }
+
+            rewind_ptrs();
+            sub(reg_len, OC_);
+            cmp(reg_len, OC_);
+            jge(main_loop, T_NEAR);
+        }
+    }
+    L(main_loop_end);
+
+    // Epilogue loop
+    Label epilogue_end;
+    {
+        cmp(reg_len, 0);
+        je(epilogue_end, T_NEAR);
+
+        Label epilogue_loop, epilogue_loop_tail;
+        cmp(reg_len, vlen);
+        jle(epilogue_loop_tail, T_NEAR);
+        L(epilogue_loop); {
+            compute(0, 0, false);
+            sub(reg_len, vlen);
+            advance_ptrs_imm(vlen);
+            cmp(reg_len, vlen);
+            jge(epilogue_loop, T_NEAR);
+        }
+
+        L(epilogue_loop_tail);
+        mov(reg_tmp, reg_len); // reg_tmp is rcx, and we need cl for the shift
+        mov(reg_rem_mask_short, 1);
+        shl(reg_rem_mask_short, cl); // reg_tmp == rcx and reg_tail < vlen
+        sub(reg_rem_mask_short, 1);
+        jz(epilogue_end, T_NEAR);
+        kmovq(kreg_rem_mask_short, reg_rem_mask_short);
+        compute(0, 0, true);
+    }
 
-    const bool use_fast_path = true
-        && scale_idx_mult == 0
-        && jcp.ngroups == 1
-        && !jcp.with_bias;
-    const float fast_path_alpha = scales[0] / jcp.wei_adj_scale;
+    L(epilogue_end);
 
-    const auto &post_ops = conf_.attr()->post_ops_;
+    postamble();
+
+    ker_ = getCode<decltype(ker_)>();
+}
+
+template <data_type_t src_type, data_type_t dst_type>
+void _gemm_x8s8s32x_convolution_fwd_t<src_type, dst_type>::pp_ker_t::operator ()
+    (dst_data_t *dst, const acc_data_t *acc, const char *bias,
+        const float *scales, float nslope, float sum_scale, float signed_scale,
+        int g, size_t start, size_t end)
+{
+    using math::get_bias;
+
+    if (end <= start)
+        return;
+
+    if (ker_) {
+        // JIT
+        ker_args args;
+        size_t oc_offset = start % OC_;
+        size_t os_offset = start / OC_;
+        args.acc = acc + start;
+        args.dst = dst + os_offset * dst_os_stride_ + oc_offset;
+        args.bias = bias + (g * jcp_.oc + oc_offset) * bias_data_type_size_;
+        args.scales = scales + scale_idx_mult_ * (g * jcp_.oc + oc_offset);
+        args.nslope = nslope;
+        args.sum_scale = sum_scale;
+        args.signed_scale = signed_scale;
+        args.len = end - start;
+        args.oc_offset = oc_offset;
+        ker_(&args);
+    }
+    else {
+        // Fallback
+        const size_t first_oc = start % OC_;
+        const size_t last_oc = (end - 1) % OC_;
+        const size_t first_os = start / OC_;
+        const size_t last_os = (end - 1) / OC_;
+        for (size_t os = first_os; os <= last_os; os++) {
+            const size_t start_oc = (os == first_os) ? first_oc : 0;
+            const size_t end_oc = (os == last_os) ? last_oc : OC_ - 1;
+            for (size_t oc = start_oc; oc <= end_oc; oc++) {
+                const size_t acc_off = os * jcp_.oc + oc;
+                const size_t dst_off = os * dst_os_stride_ + oc;
+
+                float d = (float)(acc[acc_off]);
+                if (jcp_.signed_input)
+                    d *= signed_scale;
+
+                if (do_bias_)
+                    d += get_bias(bias, g * jcp_.oc + oc,
+                        bias_data_type_);
+
+                d *= scales[(g * jcp_.oc + oc) * scale_idx_mult_];
+                if (do_sum_)
+                    d += sum_scale * dst[dst_off];
+                if (do_relu_ && d < 0)
+                    d *= nslope;
+                dst[dst_off] = qz_a1b0<float, dst_data_t>()(d, rmode_);
+            }
+        }
+    }
+};
+
+template <data_type_t src_type, data_type_t dst_type>
+void _gemm_x8s8s32x_convolution_fwd_t<src_type, dst_type>::
+execute_forward_thr(const int ithr, const int nthr, const src_data_t *src_base,
+        const wei_data_t *wei_base, const char *bia_base, dst_data_t *dst_base,
+        const memory_tracking::grantor_t &scratchpad) const {
+    const jit_gemm_conv_conf_t &jcp = this->pd()->jcp_;
+
+    const auto src_md = memory_desc_wrapper(pd()->src_pd());
+    const size_t src_mb_stride = src_md.blk_off(1);
+    const size_t src_g_stride = src_md.blk_off(0, 1) * jcp.ic;
+
+    const auto wei_md = memory_desc_wrapper(pd()->weights_pd(0));
+    const size_t wei_g_stride = pd()->with_groups() ? wei_md.blk_off(1) : 0;
+
+    const auto dst_md = memory_desc_wrapper(pd()->dst_pd());
+    const size_t dst_mb_stride = dst_md.blk_off(1);
+    const size_t dst_g_stride = dst_md.blk_off(0, 1) * jcp.oc;
+
+    const float *scales = pd()->attr()->output_scales_.scales_;
+
+    const auto &post_ops = pd()->attr()->post_ops_;
     const bool do_sum = post_ops.contain(primitive_kind::sum, 0);
     const float sum_scale = do_sum ? post_ops.entry_[0].sum.scale : 0;
 
-    float nslope = jcp.with_relu ? jcp.relu_negative_slope : 0;
-    int entry_idx = -1;
+    float nslope = 0;
     for (int idx = 0; idx < post_ops.len_; ++idx) {
         const auto &e = post_ops.entry_[idx];
         if (e.is_relu(true, false)) {
-            entry_idx = idx;
             nslope = e.eltwise.alpha;
             break;
         }
     }
-    const bool do_relu = jcp.with_relu || (entry_idx >= 0);
-
-    uint8_t *_col = (uint8_t *)scratchpad;
-    ptrdiff_t offset = (ptrdiff_t)jcp.im2col_sz * sizeof(uint8_t) * jcp.nthr;
-    acc_data_t *_acc = (acc_data_t *)(scratchpad + offset);
 
-    uint8_t *col = _col + (ptrdiff_t)ithr * jcp.im2col_sz;
-    acc_data_t *acc = _acc + (ptrdiff_t)ithr * jcp.os * jcp.oc;
+    auto col = scratchpad.get<uint8_t>(key_conv_gemm_col)
+        + (ptrdiff_t)ithr * jcp.im2col_sz;
+    auto acc = scratchpad.get<acc_data_t>(key_conv_int_dat_in_acc_dt)
+        + (ptrdiff_t)ithr * jcp.os * jcp.oc;
 
-    offset = (ptrdiff_t)jcp.ngroups * jcp.ks * jcp.ic * jcp.oc;
+    const ptrdiff_t offset = (ptrdiff_t)jcp.ngroups * jcp.ks * jcp.ic * jcp.oc;
     const int32_t *_wei_comp = (const int32_t *)(wei_base + offset);
 
     int n{0}, g{0};
@@ -147,62 +595,40 @@ void _gemm_x8s8s32x_convolution_fwd_t<with_relu, src_type,
         const int M = jcp.oc;
         const int K = jcp.ks * jcp.ic;
         const int N = jcp.os;
-        const CBLAS_OFFSET offsetc
-                = jcp.signed_input ? CblasColOffset : CblasFixOffset;
+        const int LD = M * jcp.ngroups;
         const int8_t off_a = 0, off_b = 0;
         const int32_t off_c = 0;
+        const float onef = 1.0, zerof = 0.0;
+
+        mkldnn_gemm_s8u8s32("N", "N", jcp.signed_input ? "C" : "F",
+                &M, &N, &K, &onef, wei, &LD, &off_a,
+                jcp.im2col_sz ? col : (uint8_t *)src, &K, &off_b,
+                &zerof, acc, &M, jcp.signed_input ? wei_comp : &off_c);
+
+        parallel(0, [&](int ithr, int nthr) {
+            size_t start, end;
+            balance211((size_t)jcp.os * jcp.oc, nthr, ithr, start, end);
+            (*pp_ker_)(dst, acc, bia_base, scales, nslope, sum_scale,
+                    jcp.signed_input ? 1.f / jcp.wei_adj_scale : 1.f,
+                    g, start, end);
+        });
 
-        cblas_gemm_s8u8s32(CblasColMajor, CblasNoTrans, CblasNoTrans, offsetc,
-                M, N, K, 1.0f, wei, M * jcp.ngroups, off_a,
-                jcp.im2col_sz ? col : (uint8_t *)src, K, off_b, 0.0f, acc, M,
-                jcp.signed_input ? wei_comp : &off_c);
-
-        if (use_fast_path) {
-            auto body = [&](int o) {
-                float d = fast_path_alpha * acc[o] + sum_scale * dst[o];
-                if (do_relu && d < 0) d *= nslope;
-                dst[o] = qz_a1b0<float, dst_data_t>()(d, rmode);
-            };
-
-#           if MKLDNN_THR == MKLDNN_THR_OMP && _OPENMP >= 201307
-#           pragma omp parallel for simd
-            for (int o = 0; o < jcp.os * jcp.oc; ++o) body(o);
-#           else
-            parallel_nd(jcp.os * jcp.oc, body);
-#           endif
-        } else {
-            parallel_nd(jcp.os, jcp.oc, [&](const int os, const int oc) {
-                const size_t acc_off = os * jcp.oc + oc;
-                float d = (float)acc[acc_off];
-                if (jcp.signed_input)
-                    d /= jcp.wei_adj_scale;
-
-                if (jcp.with_bias)
-                    d += get_bias(g * jcp.oc + oc);
-
-                d *= scales[(g * jcp.oc + oc) * scale_idx_mult];
-
-                const size_t dst_off = os * dst_os_stride + oc;
-                if (do_sum) d += sum_scale * dst[dst_off];
-                if (do_relu && d < 0) d *= nslope;
-                dst[dst_off] = qz_a1b0<float, dst_data_t>()(d, rmode);
-            });
-        }
         nd_iterator_step(n, jcp.mb, g, jcp.ngroups);
     }
-#endif
 }
 
 template <data_type_t dst_type>
-void _gemm_u8s8s32x_convolution_bwd_data_t<dst_type>::execute_backward_data() {
+void _gemm_u8s8s32x_convolution_bwd_data_t<dst_type>::
+execute_backward_data() const {
     auto diff_dst_base = reinterpret_cast<const diff_dst_data_t *>
             (this->input_memory(0));
     auto wei_base = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bia_base = reinterpret_cast<const char *>(this->input_memory(2));
     auto diff_src_base = reinterpret_cast<diff_src_data_t *>(this->memory());
 
-    jit_gemm_conv_conf_t &jcp = this->conf_.jcp_;
-    char *scratchpad = (char *)this->scratchpad_->get();
+    auto scratchpad = this->scratchpad();
+
+    const jit_gemm_conv_conf_t &jcp = this->pd()->jcp_;
 
     parallel(jcp.nthr, [&](const int ithr, const int nthr) {
         execute_backward_data_thr(ithr, nthr, diff_dst_base, wei_base,
@@ -211,53 +637,36 @@ void _gemm_u8s8s32x_convolution_bwd_data_t<dst_type>::execute_backward_data() {
 }
 
 template <data_type_t dst_type>
-void _gemm_u8s8s32x_convolution_bwd_data_t<dst_type>
-::execute_backward_data_thr(const int ithr, const int nthr,
+void _gemm_u8s8s32x_convolution_bwd_data_t<dst_type>::
+execute_backward_data_thr(const int ithr, const int nthr,
         const diff_dst_data_t *diff_dst_base, const wei_data_t *wei_base,
-        const char *bia_base, diff_src_data_t *diff_src_base, char *scratchpad)
+        const char *bia_base, diff_src_data_t *diff_src_base,
+        const memory_tracking::grantor_t &scratchpad) const
 {
-#if USE_MKL_IGEMM
-    jit_gemm_conv_conf_t &jcp = this->conf_.jcp_;
+    const jit_gemm_conv_conf_t &jcp = this->pd()->jcp_;
 
-    const auto diff_dst_md = memory_desc_wrapper(conf_.diff_dst_pd());
+    const auto diff_dst_md = memory_desc_wrapper(pd()->diff_dst_pd());
     const size_t diff_dst_mb_stride = diff_dst_md.blk_off(1);
     const size_t diff_dst_g_stride = diff_dst_md.blk_off(0, 1) * jcp.oc;
 
-    const auto wei_md = memory_desc_wrapper(conf_.weights_pd(0));
-    const size_t wei_g_stride = conf_.with_groups() ? wei_md.blk_off(1) : 0;
+    const auto wei_md = memory_desc_wrapper(pd()->weights_pd(0));
+    const size_t wei_g_stride = pd()->with_groups() ? wei_md.blk_off(1) : 0;
 
-    const auto diff_src_md = memory_desc_wrapper(conf_.diff_src_pd());
+    const auto diff_src_md = memory_desc_wrapper(pd()->diff_src_pd());
     const size_t diff_src_mb_stride = diff_src_md.blk_off(1);
     const size_t diff_src_g_stride = diff_src_md.blk_off(0, 1) * jcp.ic;
     const size_t diff_src_os_stride = diff_src_md.blk_off(0, 0, 0, 1);
 
-    auto get_bias = [=, &bia_base](size_t off) -> acc_data_t {
-#       define CASE(dt) case dt: return (acc_data_t)\
-        (*((const prec_traits<dt>::type *)bia_base + off))
-        switch (conf_.desc()->bias_desc.data_type) {
-        CASE(data_type::s8);
-        CASE(data_type::u8);
-        CASE(data_type::s32);
-        CASE(data_type::f32);
-        default: assert(!"unimplemented");
-        }
-#       undef CASE
-        return 0;
-    };
-
     /* scale_idx_mult = 1 for per_oc scales and 0, otherwise */
-    const int scale_idx_mult = conf_.attr()->output_scales_.mask_ == (1 << 1);
-    const float *scales = conf_.attr()->output_scales_.scales_;
-    const auto rmode = conf_.attr()->round_mode_;
+    const int scale_idx_mult = pd()->attr()->output_scales_.mask_ == (1 << 1);
+    const float *scales = pd()->attr()->output_scales_.scales_;
+    const auto rmode = pd()->attr()->round_mode_;
     const size_t work_amount = jcp.ngroups * jcp.mb;
 
-    acc_data_t *_col = (acc_data_t *)scratchpad;
-    ptrdiff_t offset = (ptrdiff_t)jcp.im2col_sz
-                                    * sizeof(acc_data_t) * jcp.nthr;
-    acc_data_t *_acc = (acc_data_t *)(scratchpad + offset);
-
-    acc_data_t *col = _col + (ptrdiff_t)ithr * jcp.im2col_sz;
-    acc_data_t *acc = _acc + (ptrdiff_t)ithr * jcp.is * jcp.ic;
+    auto col = scratchpad.get<acc_data_t>(key_conv_gemm_col)
+        + (ptrdiff_t)ithr * jcp.im2col_sz;
+    auto acc = scratchpad.get<acc_data_t>(key_conv_int_dat_in_acc_dt)
+        + (ptrdiff_t)ithr * jcp.is * jcp.ic;
 
     int n{0}, g{0};
     size_t start = 0, end = 0;
@@ -277,11 +686,12 @@ void _gemm_u8s8s32x_convolution_bwd_data_t<dst_type>
         const int K = jcp.oc;
         const int8_t off_a = 0, off_b = 0;
         const int32_t off_c = 0;
+        const float onef = 1.0, zerof = 0.0;
+        const int LD = K * jcp.ngroups;
 
-        cblas_gemm_s8u8s32(CblasColMajor, CblasTrans, CblasNoTrans,
-                CblasFixOffset, M, N, K, 1., wei, K * jcp.ngroups, off_a,
-                diff_dst, K * jcp.ngroups, off_b, 0., jcp.im2col_sz ? col
-                : acc, M, &off_c);
+        mkldnn_gemm_s8u8s32("T", "N", "F", &M, &N, &K, &onef,
+                wei, &LD, &off_a, diff_dst, &LD, &off_b,
+                &zerof, jcp.im2col_sz ? col : acc, &M, &off_c);
 
         if (jcp.im2col_sz)
             jit_gemm_convolution_utils::col2im_s32(jcp, col, acc);
@@ -289,7 +699,8 @@ void _gemm_u8s8s32x_convolution_bwd_data_t<dst_type>
         parallel_nd(jcp.is, jcp.ic, [&](int is, int ic) {
             float d = (float)acc[is * jcp.ic + ic];
             if (jcp.with_bias)
-                d += get_bias(g * jcp.ic + ic);
+                d += get_bias(bia_base, g * jcp.ic + ic,
+                        pd()->desc()->bias_desc.data_type);
             d *= scales[(g * jcp.ic + ic) * scale_idx_mult];
             const size_t diff_src_off = is * diff_src_os_stride + ic;
             diff_src[diff_src_off] =
@@ -297,28 +708,19 @@ void _gemm_u8s8s32x_convolution_bwd_data_t<dst_type>
         });
         nd_iterator_step(n, jcp.mb, g, jcp.ngroups);
     }
-#endif
 }
 
 using namespace data_type;
 
-template struct _gemm_x8s8s32x_convolution_fwd_t<true, u8, f32>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<true, u8, s32>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<true, u8, s8>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<true, u8, u8>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<false, u8, f32>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<false, u8, s32>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<false, u8, s8>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<false, u8, u8>;
-
-template struct _gemm_x8s8s32x_convolution_fwd_t<true, s8, f32>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<true, s8, s32>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<true, s8, s8>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<true, s8, u8>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<false, s8, f32>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<false, s8, s32>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<false, s8, s8>;
-template struct _gemm_x8s8s32x_convolution_fwd_t<false, s8, u8>;
+template struct _gemm_x8s8s32x_convolution_fwd_t<u8, f32>;
+template struct _gemm_x8s8s32x_convolution_fwd_t<u8, s32>;
+template struct _gemm_x8s8s32x_convolution_fwd_t<u8, s8>;
+template struct _gemm_x8s8s32x_convolution_fwd_t<u8, u8>;
+
+template struct _gemm_x8s8s32x_convolution_fwd_t<s8, f32>;
+template struct _gemm_x8s8s32x_convolution_fwd_t<s8, s32>;
+template struct _gemm_x8s8s32x_convolution_fwd_t<s8, s8>;
+template struct _gemm_x8s8s32x_convolution_fwd_t<s8, u8>;
 
 template struct _gemm_u8s8s32x_convolution_bwd_data_t<f32>;
 template struct _gemm_u8s8s32x_convolution_bwd_data_t<s32>;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_convolution.hpp
index 3bc0cc474..e7943ac14 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_convolution.hpp
@@ -18,28 +18,31 @@
 #define GEMM_X8S8S32X_CONVOLUTION_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+
 #include "cpu_convolution_pd.hpp"
 #include "cpu_engine.hpp"
 #include "jit_primitive_conf.hpp"
+#include "jit_generator.hpp"
 #include "gemm_convolution_utils.hpp"
 
-#include "gemm/os_blas.hpp"
+#include "gemm/gemm.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu, data_type_t src_type, data_type_t dst_type>
+template <data_type_t src_type, data_type_t dst_type>
 struct _gemm_x8s8s32x_convolution_fwd_t: public cpu_primitive_t {
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine, const typename pd_t::base_desc_t *adesc,
+    struct pd_t: public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd), jcp_() {}
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+            , jcp_() {}
 
-        DECLARE_COMMON_PD_T("gemm:blas",
-                _gemm_x8s8s32x_convolution_fwd_t<with_relu, src_type, dst_type>);
+        DECLARE_COMMON_PD_T(IGEMM_S8U8S32_IMPL_STR,
+                _gemm_x8s8s32x_convolution_fwd_t<src_type, dst_type>);
 
         virtual status_t init() override {
             using namespace data_type;
@@ -48,30 +51,33 @@ struct _gemm_x8s8s32x_convolution_fwd_t: public cpu_primitive_t {
             assert(this->engine()->kind() == engine_kind::cpu);
 
             bool ok = true
-#if !USE_MKL_IGEMM
-                && false
-#endif
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind,
+                && utils::one_of(this->desc()->prop_kind,
                         prop_kind::forward_training,
                         prop_kind::forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                        alg_kind::convolution_auto,
+                        alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
-                && this->cdesc_().src_desc.data_type == src_type
-                && this->cdesc_().dst_desc.data_type == dst_type
-                && this->cdesc_().weights_desc.data_type == s8
+                && this->desc()->src_desc.data_type == src_type
+                && this->desc()->dst_desc.data_type == dst_type
+                && this->desc()->weights_desc.data_type == s8
                 && IMPLICATION(this->with_bias(), utils::one_of(
-                            this->cdesc_().bias_desc.data_type, f32, s32, s8,
+                            this->desc()->bias_desc.data_type, f32, s32, s8,
                             u8))
-                && this->cdesc_().accum_data_type == data_type::s32
+                && this->desc()->accum_data_type == data_type::s32
                 && utils::everyone_is(nhwc, this->src_pd_.desc()->format,
                         this->dst_pd_.desc()->format)
                 && this->weights_pd_.desc()->format == (this->with_groups()
                         ? ((src_type == data_type::s8) ? hwigo_s8s8 : hwigo)
                         : ((src_type == data_type::s8) ? hwio_s8s8 : hwio))
                 && this->is_gemm_conv_format();
+            if (!ok) return status::unimplemented;
 
-            return ok ? status::success : status::unimplemented;
+            auto scratchpad = scratchpad_registry().registrar();
+            return jit_gemm_convolution_utils::init_conf(jcp_, scratchpad,
+                    *this->desc(), this->src_pd(), this->weights_pd(0),
+                    this->dst_pd(), mkldnn_get_max_threads());
         }
 
         jit_gemm_conv_conf_t jcp_;
@@ -79,94 +85,127 @@ struct _gemm_x8s8s32x_convolution_fwd_t: public cpu_primitive_t {
     protected:
         virtual status_t set_default_params() override {
             using namespace memory_format;
-            bool is_sign_input =
-                    (this->cdesc_().src_desc.data_type == data_type::s8);
+            const bool is_sign_input =
+                this->desc()->src_desc.data_type == data_type::s8;
+
             if (this->src_pd_.desc()->format == any)
                 CHECK(this->src_pd_.set_format(nhwc));
             if (this->dst_pd_.desc()->format == any)
                 CHECK(this->dst_pd_.set_format(nhwc));
             if (this->weights_pd_.desc()->format == any)
                 CHECK(this->weights_pd_.set_format(this->with_groups()
-                            ? ((is_sign_input) ? hwigo_s8s8 : hwigo)
-                            : ((is_sign_input) ? hwio_s8s8 : hwio)));
+                            ? (is_sign_input ? hwigo_s8s8 : hwigo)
+                            : (is_sign_input ? hwio_s8s8 : hwio)));
             if (this->bias_pd_.desc()->format == any)
                 CHECK(this->bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
 
         virtual bool is_gemm_conv_format() const {
             using namespace mkldnn::impl::primitive_kind;
-            bool ok = true;
             auto const &po = this->attr()->post_ops_;
+            auto is_relu = [&](int idx) {
+                return po.entry_[idx].is_relu(true, false); };
+
             switch (po.len_) {
-            case 0: break;
-            case 1: ok = ok
-                    && (po.entry_[0].is_relu() || po.contain(sum, 0));
-                break;
-            case 2: ok = ok
-                    && (po.contain(sum, 0) && po.entry_[1].is_relu());
-                break;
-            default: ok = false;
+            case 0: return true;
+            case 1: return is_relu(0) || po.contain(sum, 0);
+            case 2: return po.contain(sum, 0) && is_relu(1);
+            default: return false;
             }
-            return ok;
+            return false;
         }
     };
 
-    _gemm_x8s8s32x_convolution_fwd_t(const pd_t *pd, const input_vector &inputs,
+    _gemm_x8s8s32x_convolution_fwd_t(const pd_t *apd, const input_vector &inputs,
            const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , scratchpad_(nullptr)
-    {
-        jit_gemm_convolution_utils::init_conf(conf_.jcp_,
-            *conf_.cdesc(), conf_.src_pd(), conf_.weights_pd(0),
-            conf_.dst_pd(), mkldnn_get_max_threads(), with_relu, conf_.negative_slope());
-
-        size_t col_size = (size_t)conf_.jcp_.im2col_sz * sizeof(src_data_t);
-        size_t acc_size = (size_t)conf_.jcp_.os * conf_.jcp_.oc
-                            * sizeof(acc_data_t);
-        size_t size = col_size + acc_size;
-
-        jit_gemm_convolution_utils::prepare_scratchpad(this->conf_.jcp_,
-                &this->scratchpad_, size, this->conf_.jcp_.nthr);
+        : cpu_primitive_t(apd, inputs, outputs, true) {
+        pp_ker_ = new pp_ker_t(apd);
     }
-
     ~_gemm_x8s8s32x_convolution_fwd_t() {
-        delete this->scratchpad_;
-    };
+        delete pp_ker_;
+    }
 
     typedef typename prec_traits<src_type>::type src_data_t;
     typedef typename prec_traits<data_type::s8>::type wei_data_t;
     typedef typename prec_traits<dst_type>::type dst_data_t;
     typedef typename prec_traits<data_type::s32>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+    void execute_forward() const;
+    // XXX: this is throwaway code that will become unnecessary when we have a
+    // sufficiently advanced igemm jit generator that supports quantization,
+    // relu, and whatnot
+    class pp_ker_t : jit_generator {
+    public:
+        DECLARE_CPU_JIT_AUX_FUNCTIONS(
+        _gemm_x8s8s32x_convolution_fwd_t::pp_kernel);
+        pp_ker_t(const pd_t *pd);
+
+        void operator()(dst_data_t *dst, const acc_data_t *acc,
+            const char *bias, const float *scales,
+            float nslope, float sum_scale, float signed_scale,
+            int g, size_t start, size_t end);
+    private:
+        void generate();
+
+        struct ker_args {
+            dst_data_t *dst;
+            const acc_data_t *acc;
+            const char *bias;
+            const float *scales;
+            float nslope;
+            float sum_scale;
+            float signed_scale;
+            size_t len;
+            size_t oc_offset;
+        };
+        void(*ker_)(const ker_args *args);
+
+        const jit_gemm_conv_conf_t jcp_;
+        size_t OC_;
+        size_t OS_;
+        data_type_t bias_data_type_;
+        size_t bias_data_type_size_;
+        size_t scale_idx_mult_;
+        round_mode_t rmode_;
+        bool do_bias_;
+        bool do_relu_;
+        bool do_sum_;
+        bool do_signed_scaling_;
+        size_t dst_os_stride_;
+        size_t vlen_;
+    };
+
+
     void execute_forward_thr(const int ithr, const int nthr,
             const src_data_t *src_base, const wei_data_t *wei_base,
             const char *bia_base, dst_data_t *dst_base,
-            char *scratchpad);
-    pd_t conf_;
-    scratchpad_t *scratchpad_;
+            const memory_tracking::grantor_t &scratchpad) const;
+
     int nthr_;
+    pp_ker_t *pp_ker_;
+
 };
 
 template <data_type_t dst_type>
 struct _gemm_u8s8s32x_convolution_bwd_data_t: public cpu_primitive_t {
     struct pd_t: public cpu_convolution_bwd_data_pd_t{
         pd_t(engine_t *engine,
-                const convolution_desc_t *adesc,
-                const primitive_attr_t *attr,
+                const convolution_desc_t *adesc, const primitive_attr_t *attr,
                 const convolution_fwd_pd_t *hint_fwd_pd)
             : cpu_convolution_bwd_data_pd_t(engine, adesc, attr, hint_fwd_pd)
-            , jcp_()
-        {}
+            , jcp_() {}
 
-        DECLARE_COMMON_PD_T("gemm:blas",
+        DECLARE_COMMON_PD_T(IGEMM_S8U8S32_IMPL_STR,
                 _gemm_u8s8s32x_convolution_bwd_data_t<dst_type>);
 
         virtual status_t init() override {
@@ -176,12 +215,10 @@ struct _gemm_u8s8s32x_convolution_bwd_data_t: public cpu_primitive_t {
             assert(this->engine()->kind() == engine_kind::cpu);
 
             bool ok = true
-#if !USE_MKL_IGEMM
-                && false
-#endif
                 && this->set_default_params() == status::success
                 && this->desc()->prop_kind == prop_kind::backward_data
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind, alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && this->desc()->diff_src_desc.data_type == dst_type
                 && this->desc()->diff_dst_desc.data_type == u8
@@ -195,8 +232,12 @@ struct _gemm_u8s8s32x_convolution_bwd_data_t: public cpu_primitive_t {
                 && this->weights_pd_.desc()->format == (this->with_groups()
                         ? hwigo : hwio)
                 && attr()->post_ops_.has_default_values();
+            if (!ok) return status::unimplemented;
 
-            return ok ? status::success : status::unimplemented;
+            auto scratchpad = scratchpad_registry().registrar();
+            return jit_gemm_convolution_utils::init_conf(jcp_, scratchpad,
+                    *this->desc(), this->diff_src_pd(), this->weights_pd(0),
+                    this->diff_dst_pd(), mkldnn_get_max_threads());
         }
 
         virtual bool support_bias() const override { return true; }
@@ -206,59 +247,44 @@ struct _gemm_u8s8s32x_convolution_bwd_data_t: public cpu_primitive_t {
     protected:
         virtual status_t set_default_params() override {
             using namespace memory_format;
+
             if (this->diff_src_pd_.desc()->format == any)
                 CHECK(this->diff_src_pd_.set_format(nhwc));
             if (this->diff_dst_pd_.desc()->format == any)
                 CHECK(this->diff_dst_pd_.set_format(nhwc));
             if (this->weights_pd_.desc()->format == any)
-                CHECK(this->weights_pd_.set_format(this->with_groups()
-                            ? hwigo : hwio));
+                CHECK(this->weights_pd_.set_format(
+                            this->with_groups() ? hwigo : hwio));
             if (bias_pd_.desc()->format == any)
                 CHECK(bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
              return status::success;
         }
     };
 
-    _gemm_u8s8s32x_convolution_bwd_data_t(const pd_t *pd, const input_vector &inputs,
+    _gemm_u8s8s32x_convolution_bwd_data_t(const pd_t *apd, const input_vector &inputs,
            const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , scratchpad_(nullptr)
-    {
-        jit_gemm_convolution_utils::init_conf(conf_.jcp_,
-            *conf_.desc(), conf_.diff_src_pd(), conf_.weights_pd(0),
-            conf_.diff_dst_pd(), mkldnn_get_max_threads());
-
-        size_t col_size = (size_t)conf_.jcp_.im2col_sz * sizeof(acc_data_t);
-        size_t acc_size = (size_t)conf_.jcp_.is * conf_.jcp_.ic
-                            * sizeof(acc_data_t);
-        size_t size = col_size + acc_size;
-
-        jit_gemm_convolution_utils::prepare_scratchpad(this->conf_.jcp_,
-                &this->scratchpad_, size, this->conf_.jcp_.nthr);
-    }
-
-    ~_gemm_u8s8s32x_convolution_bwd_data_t() {
-        delete this->scratchpad_;
-    };
+        : cpu_primitive_t(apd, inputs, outputs, true) {}
+    ~_gemm_u8s8s32x_convolution_bwd_data_t() {}
 
     typedef typename prec_traits<data_type::u8>::type diff_dst_data_t;
     typedef typename prec_traits<data_type::s8>::type wei_data_t;
     typedef typename prec_traits<dst_type>::type diff_src_data_t;
     typedef typename prec_traits<data_type::s32>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward_data();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward_data();
+    void execute_backward_data() const;
     void execute_backward_data_thr(const int ithr, const int nthr,
             const diff_dst_data_t *diff_dst_base, const wei_data_t *wei_base,
             const char *bia_base, diff_src_data_t *diff_src_base,
-            char *scratchpad);
-    pd_t conf_;
-    scratchpad_t *scratchpad_;
+            const memory_tracking::grantor_t &scratchpad) const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_inner_product.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_inner_product.cpp
new file mode 100644
index 000000000..d49a78180
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_inner_product.cpp
@@ -0,0 +1,461 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "math_utils.hpp"
+#include "mkldnn_thread.hpp"
+#include "simple_q10n.hpp"
+#include "gemm_x8s8s32x_inner_product.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace math;
+using namespace memory_format;
+using namespace memory_tracking::names;
+
+template<data_type_t src_type, data_type_t dst_type>
+gemm_x8s8s32x_inner_product_fwd_t<src_type, dst_type>::pp_kernel_t::pp_kernel_t(
+        const pd_t *pd, bool dst_is_acc)
+    : ker_(nullptr), OC_(pd->OC())
+    , bias_data_type_(data_type::undef), bias_data_type_size_(0)
+    , scale_idx_mult_(0), rmode_(round_mode::nearest)
+    , do_bias_(false), do_relu_(false)
+{
+    using namespace types;
+
+    scale_idx_mult_ = (pd->attr()->output_scales_.mask_ == (1 << 1));
+    rmode_ = pd->attr()->round_mode_;
+
+    auto &post_ops = pd->attr()->post_ops_;
+    do_relu_ = post_ops.len_ == 1;
+    do_bias_ = pd->with_bias();
+    bias_data_type_ = pd->desc()->bias_desc.data_type;
+    if (do_bias_) {
+        assert(bias_data_type_ != data_type::undef);
+        bias_data_type_size_ = data_type_size(bias_data_type_);
+    }
+
+    if (!mayiuse(avx512_core))
+        // use fallback code for older CPUs since they do not have optimized
+        // x8s8s32 GEMM anyways. The configuration variables above are used by
+        // the fallback code.
+        return;
+    else
+        generate();
+}
+
+template<data_type_t src_type, data_type_t dst_type>
+void gemm_x8s8s32x_inner_product_fwd_t<src_type, dst_type>::pp_kernel_t::generate()
+{
+    using namespace Xbyak;
+    using namespace utils;
+    using namespace round_mode;
+
+    // TODO: clean-up
+    Reg64 reg_param = abi_param1;
+    Reg64 reg_dst = rdx;
+    Reg64 reg_acc = rax;
+    Reg64 reg_bias = rbx;
+    Reg64 reg_scales = rsi;
+
+    Reg64 reg_len = r8;
+    Reg64 reg_tmp = rcx; // intentional for shifting purposes
+    Reg64 reg_oc_offset = r9;
+    Reg64 reg_rem_mask = r10;
+    Opmask kreg_rem_mask = k1;
+    Opmask kreg_relu_cmp = k2;
+
+    const size_t vlen = cpu_isa_traits<avx512_common>::vlen / sizeof(float);
+
+    Zmm vreg_zero = Zmm(0);
+    Zmm vreg_scale = Zmm(1);
+    Zmm vreg_nslope = Zmm(2);
+
+    auto vreg_dst = [&](int idx) { return Zmm(3 + idx * 2 + 0); };
+    auto vreg_bias = [&](int idx) { return Zmm(3 + idx * 2 + 1); };
+
+    preamble();
+
+#define PARAM_OFF(x) offsetof(ker_args, x)
+    mov(reg_dst, ptr[reg_param + PARAM_OFF(dst)]);
+    mov(reg_acc, ptr[reg_param + PARAM_OFF(acc)]);
+    mov(reg_bias, ptr[reg_param + PARAM_OFF(bias)]);
+    mov(reg_scales, ptr[reg_param + PARAM_OFF(scales)]);
+    mov(reg_len, ptr[reg_param + PARAM_OFF(len)]);
+    mov(reg_oc_offset, ptr[reg_param + PARAM_OFF(oc_offset)]);
+    vbroadcastss(vreg_nslope, ptr[reg_param + PARAM_OFF(nslope)]);
+    if (scale_idx_mult_ == 0)
+        vbroadcastss(vreg_scale, dword[reg_scales]);
+#undef PARAM_OFF
+
+    if (do_relu_ || dst_type == data_type::u8)
+        vxorps(vreg_zero, vreg_zero, vreg_zero);
+
+    // Load accumulated value, convert to float, apply bias (if any), scaling,
+    // and relu (if any); then convert to destination type and store
+    auto compute = [&](size_t offset, int idx, bool apply_mask) {
+        auto acc_addr = ptr[reg_acc + offset * sizeof(acc_data_t)];
+
+        if (scale_idx_mult_ > 0) {
+            assert(scale_idx_mult_ == 1);
+            auto scale_addr = ptr[reg_scales + offset * sizeof(float)];
+            auto vreg_scale_ = vreg_scale;
+            if (apply_mask)
+                vreg_scale_ = vreg_scale_ | kreg_rem_mask;
+            vmovups(vreg_scale, scale_addr);
+        }
+
+        auto vreg_dst_ = vreg_dst(idx);
+        if (apply_mask)
+            vreg_dst_ = vreg_dst_ | kreg_rem_mask;
+        vcvtdq2ps(vreg_dst_, acc_addr);
+
+        if (do_bias_) {
+            auto bias_addr = ptr[reg_bias + offset * bias_data_type_size_];
+            auto vreg_bias_ = vreg_bias(idx);
+            if (apply_mask)
+                vreg_bias_ = vreg_bias_ | kreg_rem_mask;
+
+            switch (bias_data_type_) {
+            case data_type::s8:
+                vpmovsxbd(vreg_bias_, bias_addr);
+                break;
+            case data_type::u8:
+                vpmovzxbd(vreg_bias_, bias_addr);
+                break;
+            case data_type::s32:
+            case data_type::f32:
+                vmovups(vreg_bias_, bias_addr);
+                break;
+            default: assert(!"unimplemented");
+            }
+            if (bias_data_type_ != data_type::f32)
+                vcvtdq2ps(vreg_bias(idx), vreg_bias(idx));
+            vaddps(vreg_dst(idx), vreg_dst(idx), vreg_bias(idx));
+        }
+
+        vmulps(vreg_dst(idx), vreg_dst(idx), vreg_scale);
+        if (do_relu_) {
+            vcmpps(kreg_relu_cmp, vreg_dst(idx), vreg_zero, _cmp_lt_os);
+            vmulps(vreg_dst(idx) | kreg_relu_cmp, vreg_dst(idx), vreg_nslope);
+        }
+
+        if (dst_type == data_type::u8)
+            vmaxps(vreg_dst(idx), vreg_dst(idx), vreg_zero);
+
+        if (dst_type != data_type::f32) {
+            auto rmode_control = (rmode_ == nearest ? T_rn_sae : T_rd_sae);
+            vcvtps2dq(vreg_dst(idx) | rmode_control, vreg_dst(idx));
+        }
+
+        auto dst_addr = ptr[reg_dst + offset * sizeof(dst_data_t)];
+        switch (dst_type) {
+        case data_type::s8:
+            vpmovsdb(dst_addr, vreg_dst_);
+            break;
+        case data_type::u8:
+            vpmovusdb(dst_addr, vreg_dst_);
+            break;
+        case data_type::f32:
+        case data_type::s32:
+            vmovups(dst_addr, vreg_dst_);
+            break;
+        default: assert(!"unimplemented");
+        }
+    };
+
+    // Advance all pointers by an immediate
+    auto advance_ptrs_imm = [&](size_t offset) {
+        add(reg_dst, offset * sizeof(dst_data_t));
+        add(reg_acc, offset * sizeof(acc_data_t));
+        if (scale_idx_mult_) {
+            assert(scale_idx_mult_ == 1);
+            add(reg_scales, offset * sizeof(float));
+        }
+        if (do_bias_)
+            add(reg_bias, offset * bias_data_type_size_);
+    };
+
+    // Advance all pointers by a value stored in a register
+    auto advance_ptrs_reg = [&](Reg64 offset) {
+        lea(reg_dst, ptr[reg_dst + offset * sizeof(dst_data_t)]);
+        lea(reg_acc, ptr[reg_acc + offset * sizeof(acc_data_t)]);
+        if (scale_idx_mult_) {
+            assert(scale_idx_mult_ == 1);
+            lea(reg_scales, ptr[reg_scales + offset * sizeof(float)]);
+        }
+        if (do_bias_)
+            lea(reg_bias, ptr[reg_bias + offset * bias_data_type_size_]);
+    };
+
+    // Rewind pointers that point to data that is indixed by output channel
+    // (bias or per-oc scaling factors)
+    auto rewind_ptrs = [&]() {
+        if (do_bias_)
+            sub(reg_bias, OC_ * bias_data_type_size_);
+        if (scale_idx_mult_) {
+            assert(scale_idx_mult_ == 1);
+            sub(reg_scales, OC_ * sizeof(float));
+        }
+    };
+
+    //      <-------------------- OC ------------------------------->
+    //
+    // ^    +....................+----------------------------------+
+    // |    :   not accessed     |          Prologue loop           |
+    // |    +--------------------+----------------------------------+
+    //      |                                                       |
+    // M    |                 Main loop (unrolled)                  |
+    // B    |                                                       |
+    //      +--------------------------------+----------------------+
+    // |    |       Epilogue loop            |      not accessed    :
+    // v    +--------------------------------+......................+
+
+    Label prologue_end;
+    cmp(reg_oc_offset, 0);
+    je(prologue_end, T_NEAR);
+
+    // Prologue loop
+    {
+        mov(reg_tmp, OC_);
+        sub(reg_tmp, reg_oc_offset);
+        cmp(reg_tmp, reg_len);
+        cmovg(reg_tmp, reg_len);
+        sub(reg_len, reg_tmp);
+
+        Label prologue_loop, prologue_loop_tail, prologue_loop_end;
+        cmp(reg_tmp, vlen);
+        jle(prologue_loop_tail, T_NEAR); // Skips for reg_tmp == 16 too (?)
+        L(prologue_loop); {
+            compute(0, 0, false);
+            advance_ptrs_imm(vlen);
+            sub(reg_tmp, vlen);
+            cmp(reg_tmp, vlen);
+            jge(prologue_loop, T_NEAR);
+        }
+
+        L(prologue_loop_tail);
+        mov(reg_rem_mask, 1);
+        shl(reg_rem_mask, cl); // cl == reg_tmp because reg_tmp <= vlen here
+        sub(reg_rem_mask, 1);
+        jz(prologue_loop_end, T_NEAR);
+
+        kmovq(kreg_rem_mask, reg_rem_mask);
+        compute(0, 0, true);
+        advance_ptrs_reg(reg_tmp);
+
+        L(prologue_loop_end);
+        rewind_ptrs();
+    }
+    L(prologue_end);
+
+    // Main loop
+    Label main_loop_end;
+    {
+        cmp(reg_len, OC_);
+        jle(main_loop_end, T_NEAR);
+
+        Label main_loop;
+        L(main_loop); {
+            size_t def_unroll = 4;
+            size_t max_unroll = 13;
+
+            size_t OC_loop, OC_tail;
+            if (OC_ < max_unroll * vlen) {
+                // Fully unroll small loops
+                OC_loop = 0;
+                OC_tail = OC_;
+            } else {
+                OC_loop = vlen * def_unroll;
+                OC_tail = OC_ % OC_loop;
+            }
+
+            assert(!!OC_loop || !!OC_tail);
+
+            if (OC_tail % vlen) {
+                int vlen_tail = OC_tail % vlen;
+                unsigned tail_mask = (1 << vlen_tail) - 1;
+                mov(reg_tmp, tail_mask);
+                kmovq(kreg_rem_mask, reg_tmp);
+            }
+
+            if (OC_loop) {
+                mov(reg_tmp, rnd_dn(OC_, OC_loop));
+                Label oc_loop;
+                L(oc_loop); {
+                    for (size_t offset = 0; offset < OC_loop; offset += vlen)
+                        compute(offset, offset / vlen, false);
+                    advance_ptrs_imm(OC_loop);
+                    sub(reg_tmp, OC_loop);
+                    jnz(oc_loop);
+                }
+            }
+
+            if (OC_tail) {
+                for (size_t offset = 0; offset < OC_tail; offset += vlen) {
+                    bool use_mask = (offset + vlen) > OC_tail;
+                    compute(offset, offset / vlen, use_mask);
+                }
+                advance_ptrs_imm(OC_tail);
+            }
+
+            rewind_ptrs();
+            sub(reg_len, OC_);
+            cmp(reg_len, OC_);
+            jge(main_loop, T_NEAR);
+        }
+    }
+    L(main_loop_end);
+
+    // Epilogue loop
+    Label epilogue_end;
+    {
+        cmp(reg_len, 0);
+        je(epilogue_end, T_NEAR);
+
+        Label epilogue_loop, epilogue_loop_tail;
+        cmp(reg_len, vlen);
+        jle(epilogue_loop_tail, T_NEAR); // Skips for reg_len == 16 (?)
+        L(epilogue_loop); {
+            compute(0, 0, false);
+            sub(reg_len, vlen);
+            advance_ptrs_imm(vlen);
+            cmp(reg_len, vlen);
+            jge(epilogue_loop, T_NEAR);
+        }
+
+        L(epilogue_loop_tail);
+        mov(reg_tmp, reg_len); // reg_tmp is rcx, and we need cl for the shift
+        mov(reg_rem_mask, 1);
+        shl(reg_rem_mask, cl); // reg_tmp == rcx and reg_tail < vlen == 16
+        sub(reg_rem_mask, 1);
+        jz(epilogue_end, T_NEAR);
+        kmovq(kreg_rem_mask, reg_rem_mask);
+        compute(0, 0, true);
+    }
+
+    L(epilogue_end);
+
+    postamble();
+
+    ker_ = getCode<decltype(ker_)>();
+}
+
+template<data_type_t src_type, data_type_t dst_type>
+void gemm_x8s8s32x_inner_product_fwd_t<src_type, dst_type>::pp_kernel_t::operator ()(
+        dst_data_t *dst, const acc_data_t *acc,
+        const char *bias, const float *scales, float nslope,
+        size_t start, size_t end)
+{
+    using math::get_bias;
+
+    if (end <= start)
+        return;
+
+    if (ker_) {
+        // JIT
+        ker_args args;
+        size_t oc_offset = start % OC_;
+        args.dst = dst + start;
+        args.acc = acc + start;
+        args.bias = bias + oc_offset * bias_data_type_size_;
+        args.scales = scales + scale_idx_mult_ * oc_offset;
+        args.nslope = nslope;
+        args.len = end - start;
+        args.oc_offset = oc_offset;
+        ker_(&args);
+    } else {
+        // Fallback
+        size_t oc = start % OC_;
+        for (size_t i = start; i < end; i++) {
+            float d = (float)acc[i];
+            float b = get_bias(bias, oc, bias_data_type_);
+            d = d + b;
+            d *= scales[oc * scale_idx_mult_];
+            if (do_relu_ && d < 0)
+                d *= nslope;
+            dst[i] = qz_a1b0<float, dst_data_t>()(d, rmode_);
+            oc = (oc == OC_ - 1) ? 0 : oc + 1;
+        }
+    }
+};
+
+template <data_type_t src_type, data_type_t dst_type>
+void gemm_x8s8s32x_inner_product_fwd_t<src_type, dst_type
+        >::execute_forward() const {
+    auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
+    auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
+    auto bias = reinterpret_cast<const char *>(this->input_memory(2));
+    auto dst = reinterpret_cast<dst_data_t *>(this->memory());
+
+    const int MB = pd()->MB();
+    const int OC = pd()->OC();
+
+    bool wei_tr = utils::one_of(pd()->weights_pd()->desc()->format,
+             oihw, oidhw, oi);
+
+    const int M = OC;
+    const int N = MB;
+    const int K = pd()->IC_total_padded();
+    const int8_t off_a = 0, off_b = 0;
+    const int32_t off_c = 0;
+
+    const float *scales = pd()->attr()->output_scales_.scales_;
+
+    const auto &post_ops = pd()->attr()->post_ops_;
+    const bool do_relu = post_ops.len_ == 1;
+    const float nslope = do_relu ? post_ops.entry_[0].eltwise.alpha : 0.f;
+
+    acc_data_t *acc = pd()->dst_is_acc_
+        ? (acc_data_t *)dst
+        : scratchpad().template get<acc_data_t>(key_iprod_int_dat_in_acc_dt);
+
+    const float onef = 1.0, zerof = 0.0;
+
+    if (src_type == data_type::u8) {
+        mkldnn_gemm_s8u8s32(wei_tr ? "T" : "N", "N", "F", &M, &N, &K, &onef,
+                weights, wei_tr ? &K : &M, &off_a, (uint8_t *)src, &K, &off_b, &zerof,
+                acc, &M, &off_c);
+    } else if (src_type == data_type::s8) {
+        mkldnn_gemm_s8s8s32(wei_tr ? "T" : "N", "N", "F", &M, &N, &K, &onef,
+                weights, wei_tr ? &K : &M, &off_a, (int8_t *)src, &K, &off_b, &zerof,
+                acc, &M, &off_c);
+    } else {
+        assert(!"incorrect src type");
+    }
+
+    const bool force_sequential = MB * OC < 2000;
+    parallel(force_sequential ? 1 : 0, [&](int ithr, int nthr) {
+            size_t start, end;
+            balance211((size_t)OC * MB, nthr, ithr, start, end);
+            (*pp_kernel_)(dst, acc, bias, scales, nslope, start, end);
+            });
+}
+
+using namespace data_type;
+
+template struct gemm_x8s8s32x_inner_product_fwd_t<u8, f32>;
+template struct gemm_x8s8s32x_inner_product_fwd_t<u8, s32>;
+template struct gemm_x8s8s32x_inner_product_fwd_t<u8, s8>;
+template struct gemm_x8s8s32x_inner_product_fwd_t<u8, u8>;
+template struct gemm_x8s8s32x_inner_product_fwd_t<s8, f32>;
+template struct gemm_x8s8s32x_inner_product_fwd_t<s8, s32>;
+template struct gemm_x8s8s32x_inner_product_fwd_t<s8, s8>;
+template struct gemm_x8s8s32x_inner_product_fwd_t<s8, u8>;
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_u8s8s32x_inner_product.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_inner_product.hpp
index a4163fe7a..0fadd1748 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_u8s8s32x_inner_product.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/gemm_x8s8s32x_inner_product.hpp
@@ -14,33 +14,37 @@
 * limitations under the License.
 *******************************************************************************/
 
-#ifndef GEMM_U8S8S32X_INNER_PRODUCT_HPP
-#define GEMM_U8S8S32X_INNER_PRODUCT_HPP
+#ifndef GEMM_X8S8S32X_INNER_PRODUCT_HPP
+#define GEMM_X8S8S32X_INNER_PRODUCT_HPP
 
 #include <assert.h>
 
 #include "c_types_map.hpp"
-#include "cpu_inner_product_pd.hpp"
-#include "cpu_engine.hpp"
+#include "memory_tracking.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
-#include "scratchpad.hpp"
 
-#include "gemm/os_blas.hpp"
+#include "gemm/gemm.hpp"
+#include "jit_generator.hpp"
+
+#include "cpu_inner_product_pd.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <impl::data_type_t dst_type>
-struct gemm_u8s8s32x_inner_product_fwd_t: public cpu_primitive_t {
+template <impl::data_type_t src_type, impl::data_type_t dst_type>
+struct gemm_x8s8s32x_inner_product_fwd_t: public cpu_primitive_t {
     struct pd_t: public cpu_inner_product_fwd_pd_t {
         pd_t(engine_t *engine, const inner_product_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const inner_product_fwd_pd_t *hint_fwd_pd)
             : cpu_inner_product_fwd_pd_t(engine, adesc, attr, hint_fwd_pd) {}
 
-        DECLARE_COMMON_PD_T("gemm:blas", gemm_u8s8s32x_inner_product_fwd_t);
+        DECLARE_COMMON_PD_T(src_type == data_type::u8
+                ? IGEMM_S8U8S32_IMPL_STR
+                : IGEMM_S8S8S32_IMPL_STR,
+                gemm_x8s8s32x_inner_product_fwd_t);
 
         virtual status_t init() override {
             using namespace utils;
@@ -49,14 +53,11 @@ struct gemm_u8s8s32x_inner_product_fwd_t: public cpu_primitive_t {
             assert(engine()->kind() == engine_kind::cpu);
 
             bool ok = true
-#if !USE_MKL_IGEMM
-                && false
-#endif
                 && this->set_default_params() == status::success
                 && one_of(desc()->prop_kind, prop_kind::forward_training,
                         prop_kind::forward_inference)
                 && !has_zero_dim_memory()
-                && this->desc()->src_desc.data_type == u8
+                && this->desc()->src_desc.data_type == src_type
                 && this->desc()->dst_desc.data_type == dst_type
                 && this->desc()->weights_desc.data_type == s8
                 && IMPLICATION(this->with_bias(), utils::one_of(
@@ -67,63 +68,108 @@ struct gemm_u8s8s32x_inner_product_fwd_t: public cpu_primitive_t {
                         attr()->post_ops_.entry_[0].is_relu(true, false))
                 && dense_gemm_consitency_check(src_pd(), weights_pd(),
                         dst_pd());
-            return ok ? status::success : status::unimplemented;
+            if (!ok) return status::unimplemented;
+
+            dst_is_acc_ = one_of(dst_type, s32, f32);
+
+            init_scratchpad();
+
+            return status::success;
         }
 
+        bool dst_is_acc_;
+
     protected:
         virtual status_t set_default_params() override {
             using namespace memory_format;
 
-            if (this->src_pd_.desc()->format == any)
-            {
+            if (this->src_pd_.desc()->format == any) {
                 if (ndims() == 4) CHECK(this->src_pd_.set_format(nhwc));
                 else if (ndims() == 5) CHECK(this->src_pd_.set_format(ndhwc));
                 else CHECK(this->src_pd_.set_format(nc));
             }
             if (this->dst_pd_.desc()->format == any)
                 CHECK(this->dst_pd_.set_format(nc));
-            if (this->weights_pd_.desc()->format == any)
-            {
+            if (this->weights_pd_.desc()->format == any) {
                 if (ndims() == 4) CHECK(this->weights_pd_.set_format(hwio));
                 else if (ndims() == 5) CHECK(this->weights_pd_.set_format(dhwio));
                 else CHECK(this->weights_pd_.set_format(io));
             }
             if (this->bias_pd_.desc()->format == any)
                 CHECK(this->bias_pd_.set_format(x));
+
             return status::success;
         }
+
+    private:
+        void init_scratchpad() {
+            if (!dst_is_acc_) {
+                auto scratchpad = scratchpad_registry().registrar();
+                scratchpad.book(
+                        memory_tracking::names::key_iprod_int_dat_in_acc_dt,
+                        sizeof(acc_data_t) * MB() * OC());
+            }
+        }
     };
 
-    gemm_u8s8s32x_inner_product_fwd_t(const pd_t *pd, const input_vector &inputs,
+    gemm_x8s8s32x_inner_product_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), dst_is_acc_(false),
-        scratchpad_(nullptr)
-    {
-        dst_is_acc_ = utils::one_of(dst_type, data_type::s32, data_type::f32);
-        if (!dst_is_acc_) {
-            size_t size = conf_.MB() * conf_.OC() * sizeof(acc_data_t);
-            scratchpad_ = create_scratchpad(size);
-        }
-    }
-    ~gemm_u8s8s32x_inner_product_fwd_t() { delete scratchpad_; };
+        : cpu_primitive_t(apd, inputs, outputs, true)
+    { pp_kernel_ = new pp_kernel_t(apd, pd()->dst_is_acc_); }
+    ~gemm_x8s8s32x_inner_product_fwd_t() { delete pp_kernel_; }
 
     typedef typename prec_traits<dst_type>::type data_t;
 
-    typedef typename prec_traits<data_type::u8>::type src_data_t;
+    typedef typename prec_traits<src_type>::type src_data_t;
     typedef typename prec_traits<data_type::s8>::type wei_data_t;
     typedef typename prec_traits<dst_type>::type dst_data_t;
     typedef typename prec_traits<data_type::s32>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
-    bool dst_is_acc_;
-    scratchpad_t *scratchpad_;
+    // XXX: this is throwaway code that will become unnecessary when we have a
+    // sufficiently advanced igemm jit generator that supports quantization,
+    // relu, and whatnot
+    class pp_kernel_t: jit_generator {
+    public:
+        DECLARE_CPU_JIT_AUX_FUNCTIONS(
+                gemm_x8s8s32x_inner_product_fwd_t::pp_kernel);
+        pp_kernel_t(const pd_t *pd, bool dst_is_acc);
+
+        void operator()(dst_data_t *dst, const acc_data_t *acc,
+                const char *bias, const float *scales, float nslope,
+                size_t start, size_t end);
+    private:
+        void generate();
+
+        struct ker_args {
+            dst_data_t *dst;
+            const acc_data_t *acc;
+            const char *bias;
+            const float *scales;
+            float nslope;
+            size_t len;
+            size_t oc_offset;
+        };
+        void (*ker_)(const ker_args *args);
+
+        size_t OC_;
+        data_type_t bias_data_type_;
+        size_t bias_data_type_size_;
+        size_t scale_idx_mult_;
+        round_mode_t rmode_;
+        bool do_bias_;
+        bool do_relu_;
+    };
+
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
+    pp_kernel_t *pp_kernel_;
 };
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.cpp
index 9ef255807..73f01f50e 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.cpp
@@ -15,10 +15,14 @@
 * limitations under the License.
 *******************************************************************************/
 
+#include <assert.h>
+
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
 #include "nstl.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
+
 #include "cpu_memory.hpp"
 
 #include "jit_avx2_1x1_conv_kernel_f32.hpp"
@@ -140,7 +144,7 @@ void jit_avx2_1x1_conv_kernel_f32::generate_reduce_loop(
         default:
             if (jcp.with_dw_conv) {
                 return ptr[aux_reg_output_data +
-                           (i * jcp.dw_conv_ker_h * jcp.ow + j) * jcp.oc_block * sizeof(float)];
+                           (i * jcp_dw.kh * jcp.ow + j) * jcp.oc_block * sizeof(float)];
             } else {
                 return ptr[aux_reg_output_data +
                            (i * jcp.os + j) * jcp.oc_block * sizeof(float)];
@@ -176,7 +180,7 @@ void jit_avx2_1x1_conv_kernel_f32::generate_reduce_loop(
     };
 
     auto store = [=]() {
-        Label store_done, store_noadd;
+        Label store_noadd;
 
         if (!jcp.with_sum) {
             test(reg_reduce_pos_flag, FLAG_REDUCE_FIRST);
@@ -198,9 +202,6 @@ void jit_avx2_1x1_conv_kernel_f32::generate_reduce_loop(
         int eltwise_inj_idx = 0;
         int depthwise_inj_idx = 0;
         const auto &p = attr_.post_ops_;
-        if (p.len_ == 0 && eltwise_injectors.size() == 1) {
-            eltwise_injectors[0]->compute_vector_range(0, ur * load_loop_blk);
-        }
 
         int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
         for (int i = 0; i < end_idx; i++) {
@@ -236,8 +237,6 @@ void jit_avx2_1x1_conv_kernel_f32::generate_reduce_loop(
             for (int i = 0; i < load_loop_blk; ++i) {
                 vmovups(output_ptr(i, j), vreg_accum(i, j));
             }
-
-        L(store_done);
     };
 
     auto fma_block = [=](bool last_block) {
@@ -247,9 +246,8 @@ void jit_avx2_1x1_conv_kernel_f32::generate_reduce_loop(
                     if (mayiuse(avx2))
                         vfmadd231ps(vreg_accum(i, j), vreg_load(i), vreg_bcast);
                     else { // Intel(R) Advanced Vector Extensions (Intel(R) AVX) support
-                        auto tmp = vmask;
-                        vmulps(tmp, vreg_bcast, vreg_load(i));
-                        vaddps(vreg_accum(i, j), vreg_accum(i, j), tmp);
+                        vmulps(vtmp, vreg_bcast, vreg_load(i));
+                        vaddps(vreg_accum(i, j), vreg_accum(i, j), vtmp);
                     }
                     if (j == ur - 1 && !(last_block
                                 && u == jcp.reduce_loop_unroll - 1))
@@ -347,12 +345,6 @@ void jit_avx2_1x1_conv_kernel_f32::generate_diff_bias_loop(int load_loop_blk)
 
 void jit_avx2_1x1_conv_kernel_f32::generate()
 {
-    if (jcp.with_eltwise) {
-        eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx2>(
-                this, jcp.eltwise_alg, jcp.eltwise_alpha, 0
-        ));
-    }
-
     const auto &p = attr_.post_ops_;
     int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
     for (int i = 0; i < end_idx; i++) {
@@ -485,24 +477,15 @@ bool jit_avx2_1x1_conv_kernel_f32::post_ops_ok(
     auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
-        case 0: return true; // no post_ops
-        case 1:
-            return true // sum OR eltwise OR dw_conv
-                   && !jcp.with_eltwise && (is_simple(0) || is_sum(0) || is_dw_conv(0));
-        case 2:
-            return true // sum->eltwise OR dw_conv->eltwise OR eltwise->dw_conv OR dw_conv->sum OR sum->depthwise OR
-                   // eltwise->depthwise OR depthwise->depthwise
-                   && !jcp.with_eltwise && ((is_sum(0) && is_simple(1)) || (is_dw_conv(0) && is_eltwise(1)) ||
-                                            (is_eltwise(0) && is_dw_conv(1)) || (is_dw_conv(0) && is_sum(1)) ||
-                                            (is_simple(0) && is_simple(1)));
-        case 3:
-            return true // eltwise->dw_conv->eltwise OR dw_conv->sum->eltwise OR sum->eltwise->depthwise OR
-                   // sum->depthwise->eltwise OR sum->depthwise->depthwise
-                   && !jcp.with_eltwise && ((is_eltwise(0) && is_dw_conv(1) && is_eltwise(2)) ||
-                                            (is_dw_conv(0) && is_sum(1) && is_eltwise(2)) ||
-                                            (is_sum(0) && is_simple(1) && is_simple(2)));
-        case 4: return true // eltwise->dw_conv->sum->eltwise
-                       && !jcp.with_eltwise && (is_eltwise(0) && is_dw_conv(1) && is_sum(2) && is_eltwise(3));
+        case 0: return true;
+        case 1: return is_simple(0) || is_sum(0) || is_dw_conv(0);
+        case 2: return (is_sum(0) && is_simple(1)) || (is_dw_conv(0) && is_eltwise(1)) ||
+                       (is_eltwise(0) && is_dw_conv(1)) || (is_dw_conv(0) && is_sum(1)) ||
+                       (is_simple(0) && is_simple(1));
+        case 3: return (is_eltwise(0) && is_dw_conv(1) && is_eltwise(2)) ||
+                       (is_dw_conv(0) && is_sum(1) && is_eltwise(2)) ||
+                       (is_sum(0) && is_simple(1) && is_simple(2));
+        case 4: return (is_eltwise(0) && is_dw_conv(1) && is_sum(2) && is_eltwise(3));
         default: return false;
     }
 
@@ -512,7 +495,7 @@ bool jit_avx2_1x1_conv_kernel_f32::post_ops_ok(
 status_t jit_avx2_1x1_conv_kernel_f32::init_conf(jit_1x1_conv_conf_t &jcp,
         const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
         const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d,
-        const primitive_attr_t &attr, bool with_relu, float relu_negative_slope)
+        const primitive_attr_t &attr)
 {
     if (!mayiuse(avx)) return status::unimplemented;
 
@@ -547,51 +530,41 @@ status_t jit_avx2_1x1_conv_kernel_f32::init_conf(jit_1x1_conv_conf_t &jcp,
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
 
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alg = mkldnn_eltwise_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
-
-    if (!post_ops_ok(jcp, attr)) {
+    if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
-    }
 
     const auto &p = attr.post_ops_;
-    jcp.with_dw_conv = false;
+
     int dw_conv_ind = p.find(primitive_kind::convolution);
-    if (dw_conv_ind != -1) {
-        jcp.with_dw_conv = true;
-        jcp.dw_conv_in_h = p.entry_[dw_conv_ind].dw_conv.in_h;
-        jcp.dw_conv_in_w = p.entry_[dw_conv_ind].dw_conv.in_w;
-        jcp.dw_conv_ker_h = p.entry_[dw_conv_ind].dw_conv.ker_h;
-        jcp.dw_conv_ker_w = p.entry_[dw_conv_ind].dw_conv.ker_w;
-        jcp.dw_conv_str_h = p.entry_[dw_conv_ind].dw_conv.str_h;
-        jcp.dw_conv_str_w = p.entry_[dw_conv_ind].dw_conv.str_w;
-        jcp.dw_conv_weights = p.entry_[dw_conv_ind].dw_conv.weights_data;
-        jcp.dw_conv_biases = p.entry_[dw_conv_ind].dw_conv.biases_data;
+    jcp.with_dw_conv = dw_conv_ind != -1;
+    jcp.with_dw_conv = dw_conv_ind != -1;
+    if (jcp.with_dw_conv) {
+        jcp.dw_conv_oh = jcp.oh;
+        jcp.dw_conv_ow = jcp.ow;
+        jcp.oh = p.entry_[dw_conv_ind].dw_conv.in_h;
+        jcp.ow = p.entry_[dw_conv_ind].dw_conv.in_w;
     }
 
     if (jcp.with_dw_conv && !mayiuse(avx2))
         return status::unimplemented;
 
-    if (jcp.with_dw_conv) {
-        int dw_conv_eltwise_ind = p.find(primitive_kind::eltwise, dw_conv_ind);
-        if (dw_conv_eltwise_ind != -1) {
-            jcp.dw_conv_with_eltwise = true;
-            jcp.dw_conv_eltwise_alg = p.entry_[dw_conv_eltwise_ind].eltwise.alg;
-            jcp.dw_conv_eltwise_alpha = p.entry_[dw_conv_eltwise_ind].eltwise.alpha;
-            jcp.dw_conv_eltwise_beta = p.entry_[dw_conv_eltwise_ind].eltwise.beta;
+    if (!mayiuse(avx2)) {
+        for (int i = 0; i < p.len_; i++) {
+            auto &post_op = p.entry_[i];
+            if (post_op.is_eltwise()) {
+                if (post_op.eltwise.alg != alg_kind::eltwise_relu)
+                    return status::unimplemented;
+            } else if (post_op.is_depthwise()) {
+                return status::unimplemented;
+            }
         }
     }
 
     jcp.with_sum = p.find(primitive_kind::sum, 0, dw_conv_ind) != -1;
-    if (jcp.with_dw_conv) {
-        jcp.dw_conv_with_sum = p.find(primitive_kind::sum, dw_conv_ind) != -1;
-    }
 
-    if (jcp.with_dw_conv) {
-        jcp.oh = jcp.dw_conv_in_h;
-        jcp.ow = jcp.dw_conv_in_w;
-    }
+    jcp.src_dt = cd.src_desc.data_type;
+    jcp.bia_dt = jcp.with_bias ? cd.bias_desc.data_type : data_type::undef;
+    jcp.dst_dt = cd.dst_desc.data_type;
 
     jcp.os = jcp.oh * jcp.ow;
     jcp.is = jcp.ih * jcp.iw;
@@ -770,6 +743,24 @@ status_t jit_avx2_1x1_conv_kernel_f32::init_conf(jit_1x1_conv_conf_t &jcp,
     return status::success;
 }
 
+void jit_avx2_1x1_conv_kernel_f32::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad,
+        const jit_1x1_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw) {
+    using namespace mkldnn::impl::memory_tracking::names;
+
+    if (jcp.prop_kind != backward_data && jcp.oc != jcp.oc_without_padding)
+        scratchpad.book(key_conv_padded_bias, sizeof(float) * jcp.oc);
+
+    if (jcp.with_dw_conv) {
+        const int nthreads = mkldnn_get_max_threads();
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block * (jcp.oc / jcp.oc_block);
+        scratchpad.book(key_dw_conv_buffer, sizeof(float) * dw_conv_buffer_size_ * nthreads);
+
+        if (jcp.oc != jcp.oc_without_padding)
+            scratchpad.book(key_dw_conv_padded_bias, sizeof(float) * jcp.oc);
+    }
+}
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.hpp
index 2c10b8500..e856140bb 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_conv_kernel_f32.hpp
@@ -18,9 +18,11 @@
 #define JIT_AVX2_1x1_CONV_KERNEL_F32_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+
+#include "cpu_memory.hpp"
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
-#include "cpu_memory.hpp"
 #include "jit_uni_eltwise.hpp"
 #include "jit_uni_depthwise.hpp"
 
@@ -31,8 +33,9 @@ namespace cpu {
 struct jit_avx2_1x1_conv_kernel_f32: public jit_generator {
     DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx2_1x1_conv_kernel_f32)
 
-    jit_avx2_1x1_conv_kernel_f32(jit_1x1_conv_conf_t ajcp,
-           const primitive_attr_t &attr): jcp(ajcp), attr_(attr)
+    jit_avx2_1x1_conv_kernel_f32(jit_1x1_conv_conf_t ajcp, jit_conv_conf_t ajcp_dw,
+           const primitive_attr_t &attr)
+        : jcp(ajcp), jcp_dw(ajcp_dw), attr_(attr)
     {
         this->generate();
         jit_ker = (void (*)(jit_1x1_conv_call_s *))this->getCode();
@@ -56,20 +59,13 @@ struct jit_avx2_1x1_conv_kernel_f32: public jit_generator {
             const memory_desc_wrapper &src_d,
             const memory_desc_wrapper &weights_d,
             const memory_desc_wrapper &dst_d,
-            const primitive_attr_t &attr,
-            bool with_relu, float relu_negative_slope);
+            const primitive_attr_t &attr);
 
-    static status_t init_conf(jit_1x1_conv_conf_t &jcp,
-            const convolution_desc_t &cd,
-            const memory_desc_wrapper &src_d,
-            const memory_desc_wrapper &weights_d,
-            const memory_desc_wrapper &dst_d,
-            const primitive_attr_t &attr)
-    {
-        return init_conf(jcp, cd, src_d, weights_d, dst_d, attr, false, 0.0);
-    }
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_1x1_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw = jit_conv_conf_t());
 
     jit_1x1_conv_conf_t jcp;
+    jit_conv_conf_t jcp_dw;
     const primitive_attr_t &attr_;
     void (*jit_ker)(jit_1x1_conv_call_s *);
 
@@ -104,7 +100,7 @@ private:
     int stack_space_needed = 8;
 
     ymm_t vreg_bcast = ymm_t(15);
-    Xbyak::Ymm vmask = Xbyak::Ymm(14);
+    ymm_t vtmp = ymm_t(14);
 
     void generate_bcast_loop(int load_loop_blk);
     void generate_reduce_loop(int load_loop_blk, int ur);
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_convolution.cpp
index 7a6e17c58..5f888a292 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_convolution.cpp
@@ -14,25 +14,22 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include <cstring>
-#include <mkldnn_types.h>
-#include <iostream>
-#include "mkldnn_types.h"
-
 #include "c_types_map.hpp"
-#include "jit_avx2_1x1_convolution.hpp"
-#include "utils.hpp"
 #include "mkldnn_thread.hpp"
 #include "type_helpers.hpp"
-
+#include "utils.hpp"
+#include <cstring>
 #include "jit_generator.hpp"
 
+#include "jit_avx2_1x1_convolution.hpp"
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 #define data_blk_off(f, n, c, h, w) \
@@ -42,27 +39,28 @@ using namespace mkldnn::impl::utils;
 
 /* convolution forward */
 
-template <bool with_relu>
-void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward() {
+void jit_avx2_1x1_convolution_fwd_t::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+    auto rtus_space = scratchpad().get<data_t>(key_conv_rtus_space);
 
     const auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
+    const int MB = pd()->MB();
 
     const int work_amount = MB * jcp.ngroups * jcp.nb_bcast;
     const int ndims = dst_d.ndims();
 
-    const int stride_h = (ndims == 3) ? 1 : conf_.cdesc()->strides[0];
-    const int stride_w = conf_.cdesc()->strides[ndims - 3];
-    const int pad_t = (ndims == 3) ? 0 : conf_.cdesc()->padding[0][0];
-    const int pad_l = conf_.cdesc()->padding[0][ndims - 3];
+    const int stride_h = (ndims == 3) ? 1 : pd()->desc()->strides[0];
+    const int stride_w = pd()->desc()->strides[ndims - 3];
+    const int pad_t = (ndims == 3) ? 0 : pd()->desc()->padding[0][0];
+    const int pad_l = pd()->desc()->padding[0][ndims - 3];
 
     auto step = [](int default_step, int remaining, int tail_step) {
         assert(default_step <= tail_step);
@@ -73,8 +71,8 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward() {
         // TODO (Roma): remove this restriction
         assert(jcp.stride_w == 1 && jcp.stride_h == 1);
 
-        jit_1x1_conv_call_s p = {};
-        rtus_driver_t<avx2>::call_params_t rp = {};
+	    auto p = jit_1x1_conv_call_s();
+	    auto rp = rtus_driver_t<avx2>::call_params_t();
 
         const int nb_oc = jcp.nb_load;
         const int nb_ic = jcp.nb_reduce;
@@ -129,13 +127,14 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward() {
                             nb_ic_blocking * jcp.ic_block);
                     rp.icb = p.reduce_dim / jcp.reduce_block;
 
-                    p.load_data = &weights[conf_.with_groups()
+                    p.load_data = &weights[pd()->with_groups()
                         ? weights_d.blk_off(g, ocb, icb)
                         : weights_d.blk_off(ocb, icb)];
 
                     const int _icb = g * nb_ic + icb;
-                    if (conf_.rtus_.reduce_src_) {
-                        rp.ws = scratch_ + ithr * ws_per_thread_
+                    if (pd()->rtus_.reduce_src_) {
+                        rp.ws = rtus_space
+                            + ithr * pd()->rtus_.space_per_thread_
                             + _icb * jcp.is * jcp.ic_block;
 
                         if (ocb == 0) {
@@ -159,29 +158,37 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward() {
         }
     };
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad().get<data_t>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, bias, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bias = padded_bias;
     }
 
     parallel(0, ker);
+
+    if (pd()->wants_zero_pad_dst())
+        output_memory_primitive(0)->zero_pad();
 }
 
-template <bool with_relu>
-void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
+void jit_avx2_1x1_convolution_fwd_t::execute_forward_with_dw_conv() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+    auto rtus_space = scratchpad().get<data_t>(key_conv_rtus_space);
 
     const auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
+    const auto &jcp_dw = kernel_dw_->jcp;
+    const int MB = pd()->MB();
 
-    auto dw_bias = jcp.dw_conv_biases;
+    auto dw_bias = jcp_dw.conv_biases;
 
     int ocb_work = jcp.with_dw_conv ? utils::div_up(jcp.nb_load, jcp.nb_load_blocking) : 1;
     const int work_amount = MB * jcp.ngroups * ocb_work * jcp.nb_bcast;
@@ -205,8 +212,8 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
 
                 if ((oh + h) < 0 || (oh + h) >= jcp.ih) {
                     for (int chb = ocb; chb < ocb + load_step; chb++) {
-                        memset(ws_p + (((oh + h) + 1) % jcp.dw_conv_ker_h) * jcp.ow * jcp.oc_block +
-                               (chb - ocb) * jcp.dw_conv_ker_h * jcp.ow * jcp.oc_block, 0, jcp.ow * jcp.oc_block * sizeof(float));
+                        memset(ws_p + (((oh + h) + 1) % jcp_dw.kh) * jcp.ow * jcp.oc_block +
+                               (chb - ocb) * jcp_dw.kh * jcp.ow * jcp.oc_block, 0, jcp.ow * jcp.oc_block * sizeof(float));
                     }
                 } else {
                     const int _ocb = g * jcp.nb_load + ocb;
@@ -217,7 +224,7 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                     rp.os = p.bcast_dim;
                     p.load_dim = this_block_size(ocb * jcp.oc_block, jcp.oc, load_step * jcp.oc_block);
 
-                    p.output_data = &ws_p[(((oh + h) + 1) % jcp.dw_conv_ker_h) * jcp.ow * jcp.oc_block];
+                    p.output_data = &ws_p[(((oh + h) + 1) % jcp_dw.kh) * jcp.ow * jcp.oc_block];
 
                     p.bias_data = &bias[_ocb * jcp.oc_block];
 
@@ -231,13 +238,14 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                                                        jcp.nb_reduce_blocking * jcp.ic_block);
                         rp.icb = p.reduce_dim / jcp.reduce_block;
 
-                        p.load_data = &weights[conf_.with_groups()
+                        p.load_data = &weights[pd()->with_groups()
                                                ? weights_d.blk_off(g, ocb, icb)
                                                : weights_d.blk_off(ocb, icb)];
 
                         const int _icb = g * jcp.nb_reduce + icb;
-                        if (conf_.rtus_.reduce_src_) {
-                            rp.ws = scratch_ + ithr * ws_per_thread_
+                        if (pd()->rtus_.reduce_src_) {
+                            rp.ws = rtus_space
+                                    + ithr * pd()->rtus_.space_per_thread_
                                     + _icb * jcp.is * jcp.ic_block;
 
                             if (ocb == 0) {
@@ -259,7 +267,6 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
         };
 
         auto compute_row_dw = [&](const float* ws_p, int n, int ocb, int load_step, int dst_idx) {
-            const auto &jcp_dw = kernel_dw_->jcp;
 
             for (int chb = ocb; chb < ocb + load_step; chb++) {
                 auto par_conv_dw = jit_conv_call_s();
@@ -275,9 +282,11 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                                        dst_idx/jcp_dw.stride_h*jcp_dw.ow*jcp_dw.ch_block];
 
                 par_conv_dw.kh_padding = jcp_dw.kh;
-                par_conv_dw.filt = &jcp.dw_conv_weights[chb * jcp_dw.kh * jcp_dw.kw * jcp_dw.ch_block];
+                par_conv_dw.filt = &jcp_dw.conv_weights[chb * jcp_dw.kh * jcp_dw.kw * jcp_dw.ch_block];
                 par_conv_dw.bias = &dw_bias[chb * jcp_dw.ch_block];
                 par_conv_dw.ur_w = (size_t)(jcp_dw.ow);
+                par_conv_dw.oc_work = nstl::min((chb + 1) * jcp_dw.ch_block, (int)jcp_dw.oc) - chb*jcp_dw.ch_block;
+                par_conv_dw.oc_off = chb * jcp_dw.ch_block * sizeof(float);
 
                 kernel_dw_->jit_ker(&par_conv_dw);
             }
@@ -288,7 +297,9 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
         int start{0}, end{0};
         balance211(work_amount, nthr, ithr, start, end);
 
-        auto pbuf = dw_conv_buffer_ + ithr * dw_conv_buffer_size_;
+        auto dw_conv_buffer = scratchpad().get<data_t>(key_dw_conv_buffer);
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block * (jcp.oc / jcp.oc_block);
+        auto pbuf = dw_conv_buffer + ithr * dw_conv_buffer_size_;
 
         const int os_block = jcp.iw;
 
@@ -319,7 +330,7 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                 compute_block_1x1(pbuf, n, g, oh + 1, ow, ih, iw, os, os_block, bcast_step, ocb, load_step, bcast_step);
             }
 
-            if ((oh % jcp.dw_conv_str_h == 0)) {
+            if ((oh % jcp_dw.stride_h == 0)) {
                 compute_row_dw(pbuf, n, ocb, load_step, oh);
             }
 
@@ -327,44 +338,50 @@ void _jit_avx2_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
         }
     };
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
-
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            dw_padded_bias_[oc] = dw_bias[oc];
-        dw_bias = dw_padded_bias_;
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad().get<data_t>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, bias, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bias = padded_bias;
+
+        auto dw_padded_bias = scratchpad().get<data_t>(key_dw_conv_padded_bias);
+        utils::array_copy(dw_padded_bias, dw_bias, jcp.oc_without_padding);
+        utils::array_set(dw_padded_bias + jcp.oc_without_padding, 0.f,
+                         jcp.oc - jcp.oc_without_padding);
+        dw_bias = dw_padded_bias;
     }
 
     parallel(0, ker);
-}
 
-template struct _jit_avx2_1x1_convolution_fwd_t<true>;
-template struct _jit_avx2_1x1_convolution_fwd_t<false>;
+    if (pd()->wants_zero_pad_dst())
+        output_memory_primitive(0)->zero_pad();
+}
 
 /* convolution backward wtr data */
 
-void jit_avx2_1x1_convolution_bwd_data_t::execute_backward_data() {
+void jit_avx2_1x1_convolution_bwd_data_t::execute_backward_data() const {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+
+    auto rtus_space = scratchpad().get<data_t>(key_conv_rtus_space);
 
     const auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
+    const int MB = pd()->MB();
 
     // TODO (Roma): remove this restriction
     assert(jcp.stride_w == 1 && jcp.stride_h == 1);
     const int ndims = diff_dst_d.ndims();
 
-    const int stride_h = (ndims == 3) ? 1 : conf_.desc()->strides[0];
-    const int stride_w = conf_.desc()->strides[ndims - 3];
-    const int pad_t = (ndims == 3) ? 0 : conf_.desc()->padding[0][0];
-    const int pad_l = conf_.desc()->padding[0][ndims - 3];
+    const int stride_h = (ndims == 3) ? 1 : pd()->desc()->strides[0];
+    const int stride_w = pd()->desc()->strides[ndims - 3];
+    const int pad_t = (ndims == 3) ? 0 : pd()->desc()->padding[0][0];
+    const int pad_l = pd()->desc()->padding[0][ndims - 3];
 
     const int nb_ic = jcp.nb_load;
     const int nb_oc = jcp.nb_reduce;
@@ -417,8 +434,9 @@ void jit_avx2_1x1_convolution_bwd_data_t::execute_backward_data() {
 
                 const int _icb = g * nb_ic + icb;
                 rp.src = diff_src + data_blk_off(diff_src_d, n, _icb, ih, iw);
-                if (conf_.rtus_.reduce_src_) {
-                    rp.ws = scratch_ + ithr * ws_per_thread_;
+                if (pd()->rtus_.reduce_src_) {
+                    rp.ws = rtus_space
+                        + ithr * pd()->rtus_.space_per_thread_;
                     p.output_data = rp.ws;
                 } else
                     p.output_data = rp.src;
@@ -430,7 +448,7 @@ void jit_avx2_1x1_convolution_bwd_data_t::execute_backward_data() {
                         ow);
                     p.bcast_data = &diff_dst[diff_dst_off];
 
-                    p.load_data = &weights[conf_.with_groups()
+                    p.load_data = &weights[pd()->with_groups()
                         ? weights_d.blk_off(g, ocb, icb)
                         : weights_d.blk_off(ocb, icb)];
 
@@ -442,7 +460,7 @@ void jit_avx2_1x1_convolution_bwd_data_t::execute_backward_data() {
                     kernel_->jit_ker(&p);
                 }
 
-                if (conf_.rtus_.reduce_src_)
+                if (pd()->rtus_.reduce_src_)
                     rtus_driver_->ker_(&rp);
             }
         }
@@ -454,64 +472,46 @@ void jit_avx2_1x1_convolution_bwd_data_t::execute_backward_data() {
 /* convolution backward wtr weights */
 
 jit_avx2_1x1_convolution_bwd_weights_t::jit_avx2_1x1_convolution_bwd_weights_t(
-        const pd_t *pd, const input_vector &inputs,
+        const pd_t *apd, const input_vector &inputs,
         const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), kernel_(nullptr)
-    , rtus_driver_(nullptr), ws_per_thread_(0), scratch_(nullptr)
-    , padded_bias_(nullptr)
+    : cpu_primitive_t(apd, inputs, outputs), kernel_(nullptr)
+    , rtus_driver_(nullptr)
 {
-    kernel_ = new jit_avx2_1x1_conv_kernel_f32(conf_.jcp_, *conf_.attr());
-
-    const auto &jcp = kernel_->jcp;
-
-    const int ic_block = jcp.bcast_block;
-    const int nb_ic = jcp.nb_bcast;
-    const int nb_ic_blocking = jcp.nb_bcast_blocking;
-    const int bcast_work = utils::div_up(nb_ic, nb_ic_blocking);
-
-    const int oc_block = jcp.load_block;
-    const int nb_oc = jcp.nb_load;
-    const int nb_oc_blocking = jcp.nb_load_blocking;
-    const int load_work = utils::div_up(nb_oc, nb_oc_blocking);
-
-    const int job_size
-        = nb_oc_blocking * nb_ic_blocking * ic_block * oc_block;
-    const int njobs_x = bcast_work;
-    const int njobs_y = jcp.ngroups * load_work;
-
-    const int max_threads = mkldnn_get_max_threads();
-    const size_t max_buffer_size = max_threads * job_size * 8;
-
-    reducer_weights_ = new cpu_reducer_2d_t<data_type::f32>(
-            reduce_balancer_t(max_threads, job_size, njobs_y * njobs_x,
-                jcp.mb * jcp.nb_reduce, max_buffer_size),
-            job_size / nb_oc_blocking, nb_oc_blocking, ic_block,
-            nb_ic * ic_block * oc_block, nb_oc, false);
-
-    reducer_bias_ = !conf_.with_bias() ? nullptr
-        : new cpu_reducer_t<data_type::f32>(reduce_balancer_t(max_threads,
-                    oc_block, jcp.ngroups * jcp.oc / oc_block,
-                    jcp.mb, max_buffer_size));
-
-    if (conf_.want_padded_bias())
-        padded_bias_ = (data_t *)malloc(sizeof(data_t) * jcp.oc, 64);
-
+    kernel_ = new jit_avx2_1x1_conv_kernel_f32(pd()->jcp_, jit_conv_conf_t(), *pd()->attr());
+    reducer_weights_ =
+        new cpu_reducer_2d_t<data_type::f32>(pd()->reducer_wei_conf_);
+    reducer_bias_ = new cpu_reducer_t<data_type::f32>(pd()->reducer_bia_conf_);
     init_rtus_driver<avx2>(this);
 }
 
-void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() {
+void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_weights = reinterpret_cast<data_t *>(this->memory(0));
     auto diff_bias_in = reinterpret_cast<data_t *>(this->memory(1));
-    data_t *diff_bias = conf_.want_padded_bias() ? padded_bias_ : diff_bias_in;
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper diff_weights_d(conf_.diff_weights_pd(0));
-    const memory_desc_wrapper diff_bias_d(conf_.diff_weights_pd(1));
+    auto scratchpad = this->scratchpad();
+
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper diff_weights_d(pd()->diff_weights_pd(0));
+    const memory_desc_wrapper diff_bias_d(pd()->diff_weights_pd(1));
 
     const auto &jcp = kernel_->jcp;
+    auto rtus_space = scratchpad.get<data_t>(key_conv_rtus_space);
+
+    data_t *diff_bias = pd()->wants_padded_bias()
+        ? scratchpad.get<data_t>(key_conv_padded_bias) : diff_bias_in;
+
+    auto reducer_bia_scratchpad = memory_tracking::grantor_t(scratchpad,
+            prefix_reducer_bia);
+    auto rb = this->reducer_bias_;
+    rb->init(reducer_bia_scratchpad);
+
+    auto reducer_wei_scratchpad = memory_tracking::grantor_t(scratchpad,
+            prefix_reducer_wei);
+    auto rw = this->reducer_weights_;
+    rw->init(reducer_wei_scratchpad);
 
     const int ndims = diff_dst_d.ndims();
     // TODO (Roma): remove this restriction
@@ -528,10 +528,10 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() {
     const int sp_dim = jcp.reduce_dim;
     const int mb_sp_work = jcp.mb * sp_dim;
 
-    const int stride_h = (ndims == 3) ? 1 : conf_.desc()->strides[0];
-    const int stride_w = conf_.desc()->strides[ndims - 3];
-    const int pad_t = (ndims == 3) ? 0 : conf_.desc()->padding[0][0];
-    const int pad_l = conf_.desc()->padding[0][ndims - 3];
+    const int stride_h = (ndims == 3) ? 1 : pd()->desc()->strides[0];
+    const int stride_w = pd()->desc()->strides[ndims - 3];
+    const int pad_t = (ndims == 3) ? 0 : pd()->desc()->padding[0][0];
+    const int pad_l = pd()->desc()->padding[0][ndims - 3];
 
     auto step = [](int default_step, int remaining, int tail_step) {
         assert(default_step <= tail_step);
@@ -574,7 +574,7 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() {
                     p.load_data = diff_dst
                         + (oc_b * jcp.reduce_dim + sp) * jcp.oc_block;
 
-                    if (conf_.rtus_.reduce_src_) {
+                    if (pd()->rtus_.reduce_src_) {
                         const int oh = sp / jcp.ow;
                         const int ow = sp % jcp.ow;
 
@@ -582,7 +582,8 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() {
                         const int iw = nstl::max(ow * stride_w - pad_l, 0);
                         rp.iw_start = iw;
 
-                        rp.ws = scratch_ + ithr * ws_per_thread_
+                        rp.ws = rtus_space
+                            + ithr * pd()->rtus_.space_per_thread_
                             + (ic_b * jcp.is + sp) * jcp.ic_block;
                         if (ndims == 3)
                             rp.src = src
@@ -607,22 +608,21 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() {
     };
 
     auto ker = [&](const int ithr, const int nthr) {
-        auto rw = this->reducer_weights_;
-        assert(nthr == rw->balancer_.nthr_);
+        assert(nthr == rw->balancer().nthr_);
 
-        const int w_njobs = rw->balancer_.ithr_njobs(ithr);
+        const int w_njobs = rw->balancer().ithr_njobs(ithr);
         if (w_njobs == 0) return;
 
         /* setup: independent work (oc, ic) */
-        const int w_job_start = rw->balancer_.ithr_job_off(ithr);
+        const int w_job_start = rw->balancer().ithr_job_off(ithr);
         int g{0}, load_i{0}, bcast_i{0};
         nd_iterator_init(w_job_start, g, jcp.ngroups, load_i, load_work,
                 bcast_i, bcast_work);
 
         /* setup: reduction work (mb, sp) */
         int mb_sp_start{0}, mb_sp_end{0};
-        balance211(mb_sp_work, rw->balancer_.nthr_per_group_,
-                rw->balancer_.id_in_group(ithr), mb_sp_start, mb_sp_end);
+        balance211(mb_sp_work, rw->balancer().nthr_per_group_,
+                rw->balancer().id_in_group(ithr), mb_sp_start, mb_sp_end);
         int img_start{0}, sp_start{0};
         nd_iterator_init(mb_sp_start, img_start, jcp.mb, sp_start, sp_dim);
 
@@ -637,16 +637,16 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() {
             data_t *store_to;
             size_t store_to_ld;
 
-            if (rw->balancer_.nthr_per_group_ == 1 ||
-                    (rw->balancer_.master(ithr) && rw->master_uses_dst_)) {
-                const size_t off = conf_.with_groups()
+            if (rw->balancer().nthr_per_group_ == 1) {
+                const size_t off = pd()->with_groups()
                     ? diff_weights_d.blk_off(g, oc_b, ic_b)
                     : diff_weights_d.blk_off(oc_b, ic_b);
                 store_to = &diff_weights[off];
                 store_to_ld = jcp.ic * jcp.oc_block;
             } else {
-                const size_t off = iwork * rw->balancer_.job_size_;
-                store_to = &rw->get_local_ptr(ithr, nullptr)[off];
+                const size_t off = iwork * rw->balancer().job_size_;
+                store_to =
+                    rw->get_local_ptr(ithr, reducer_wei_scratchpad) + off;
                 store_to_ld = nb_ic_blocking * jcp.ic_block * jcp.oc_block;
             }
 
@@ -670,22 +670,21 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() {
             nd_iterator_step(g, jcp.ngroups, load_i, load_work, bcast_i,
                              bcast_work);
         }
-        rw->reduce(ithr, diff_weights);
+        rw->reduce(ithr, diff_weights, reducer_wei_scratchpad);
     };
 
     auto ker_bias = [&](int ithr, int nthr) {
-        auto rb = this->reducer_bias_;
-        assert(nthr == rb->balancer_.nthr_);
+        assert(nthr == rb->balancer().nthr_);
 
-        const int b_job_start = rb->balancer_.ithr_job_off(ithr);
-        const int b_njobs = rb->balancer_.ithr_njobs(ithr);
+        const int b_job_start = rb->balancer().ithr_job_off(ithr);
+        const int b_njobs = rb->balancer().ithr_njobs(ithr);
 
         if (b_njobs == 0) return;
 
         /* reduction dimension */
         int img_start{0}, img_end{0};
-        balance211(jcp.mb, rb->balancer_.nthr_per_group_,
-                rb->balancer_.id_in_group(ithr), img_start, img_end);
+        balance211(jcp.mb, rb->balancer().nthr_per_group_,
+                rb->balancer().id_in_group(ithr), img_start, img_end);
 
         /* jobs */
         int g_start{0}, ocb_start{0};
@@ -697,8 +696,9 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() {
                 const size_t _oc = g * nb_oc + ocb;
 
                 const data_t *d_dst = &diff_dst[diff_dst_d.blk_off(img, _oc)];
-                data_t *d_bias = &rb->get_local_ptr(ithr, diff_bias)[
-                    b_job_loc * rb->balancer_.job_size_];
+                data_t *d_bias =
+                    rb->get_local_ptr(ithr, diff_bias, reducer_bia_scratchpad)
+                    + b_job_loc * rb->balancer().job_size_;
 
                 if (img == img_start)
                     for (int o = 0; o < 8; ++o) d_bias[o] = 0.;
@@ -713,17 +713,17 @@ void jit_avx2_1x1_convolution_bwd_weights_t::execute_backward_weights() {
                 nd_iterator_step(g, jcp.ngroups, ocb, nb_oc);
             }
         }
-        rb->reduce(ithr, diff_bias);
+        rb->reduce(ithr, diff_bias, reducer_bia_scratchpad);
     };
 
     parallel(0, [&](const int ithr, const int nthr) {
         ker(ithr, nthr);
-        if (conf_.with_bias())
+        if (pd()->with_bias())
             ker_bias(ithr, nthr);
     });
 
     /* TODO: put this in ker_bias */
-    if (conf_.want_padded_bias()) {
+    if (pd()->wants_padded_bias()) {
         assert(jcp.ngroups == 1);
         for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
             diff_bias_in[oc] = diff_bias[oc];
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_convolution.hpp
index 784625203..ede597855 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_1x1_convolution.hpp
@@ -19,85 +19,81 @@
 
 #include <common/primitive_attr.hpp>
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+#include "mkldnn_thread.hpp"
+#include "utils.hpp"
+
 #include "cpu_convolution_pd.hpp"
 #include "cpu_engine.hpp"
 #include "cpu_reducer.hpp"
+
 #include "jit_avx2_1x1_conv_kernel_f32.hpp"
 #include "jit_uni_1x1_conv_utils.hpp"
-#include "mkldnn_thread.hpp"
-#include "utils.hpp"
+
 #include "jit_uni_depthwise.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu>
-struct _jit_avx2_1x1_convolution_fwd_t: public cpu_primitive_t {
+struct jit_avx2_1x1_convolution_fwd_t: public cpu_primitive_t {
     // TODO: (Roma) Code duplication duplication! Remove with templates
     //              (maybe...)!
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine,
-                const typename pd_t::base_desc_t *adesc,
+    struct pd_t: public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
-            , jcp_(), jcp_dw(), rtus_() {}
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+            , jcp_(), jcp_dw_(), rtus_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_1x1:", avx2, ""),
-                _jit_avx2_1x1_convolution_fwd_t<with_relu>);
+                jit_avx2_1x1_convolution_fwd_t);
 
         virtual status_t init() override {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                         forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                        alg_kind::convolution_auto,
+                        alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
-                        this->cdesc_().src_desc.data_type,
-                        this->cdesc_().weights_desc.data_type,
-                        this->cdesc_().dst_desc.data_type)
+                        this->desc()->src_desc.data_type,
+                        this->desc()->weights_desc.data_type,
+                        this->desc()->dst_desc.data_type)
                 && IMPLICATION(this->with_bias(),
-                        data_type::f32 == this->cdesc_().bias_desc.data_type);
+                        data_type::f32 == this->desc()->bias_desc.data_type);
             if (!ok) return status::unimplemented;
 
-            const convolution_desc_t *conv_d = &this->cdesc_();
+            const convolution_desc_t *conv_d = this->desc();
             const memory_desc_t *src_d = this->src_pd_.desc();
             rtus_prepare(this, conv_d, src_d, this->dst_pd_.desc());
 
             status_t sts_1x1 = jit_avx2_1x1_conv_kernel_f32::init_conf(jcp_,
                     *conv_d, *src_d, *this->weights_pd_.desc(),
-                    *this->dst_pd_.desc(), *this->attr(),
-                    with_relu, this->negative_slope());
+                    *this->dst_pd_.desc(), *this->attr());
             if (sts_1x1 != status::success) return sts_1x1;
 
             if (jcp_.with_dw_conv) {
-                int dw_conv_oh = (jcp_.oh - ((jcp_.dw_conv_ker_h - 1) + 1) + 2) / jcp_.dw_conv_str_h + 1;
-                int dw_conv_ow = (jcp_.ow - ((jcp_.dw_conv_ker_w - 1) + 1) + 2) / jcp_.dw_conv_str_w + 1;
-
-                status_t sts_dw = jit_uni_dw_conv_row_f32<avx2>::init_conf(jcp_dw,
-                                                                           jcp_.oc, jcp_.oh, jcp_.ow, dw_conv_oh, dw_conv_ow,
-                                                                           jcp_.dw_conv_ker_h, jcp_.dw_conv_ker_w,
-                                                                           jcp_.dw_conv_str_h, jcp_.dw_conv_str_w,
-                                                                           jcp_.dw_conv_eltwise_alg, jcp_.dw_conv_eltwise_alpha,
-                                                                           jcp_.dw_conv_eltwise_beta, jcp_.dw_conv_with_sum);
+                status_t sts_dw = jit_uni_dw_conv_row_f32<avx2>::init_conf(jcp_, jcp_dw_, *this->attr());
                 if (sts_dw != status::success) return sts_dw;
             }
 
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx2_1x1_conv_kernel_f32::init_scratchpad(scratchpad, jcp_, jcp_dw_);
+
+            rtus_prepare_space_info(this, scratchpad);
+
             return status::success;
         }
 
         jit_1x1_conv_conf_t jcp_;
-        jit_conv_conf_t jcp_dw;
-        struct reduce_to_unit_stride_t {
-            convolution_desc_t conv_d_;
-            bool reduce_src_;
-        } rtus_;
+        jit_conv_conf_t jcp_dw_;
+        reduce_to_unit_stride_t rtus_;
 
     protected:
         virtual status_t set_default_params() override {
@@ -114,6 +110,8 @@ struct _jit_avx2_1x1_convolution_fwd_t: public cpu_primitive_t {
                     : utils::pick(this->ndims() - 3, OIw8i8o, OIhw8i8o)));
             if (this->bias_pd_.desc()->format == any)
                 CHECK(this->bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
@@ -121,61 +119,33 @@ struct _jit_avx2_1x1_convolution_fwd_t: public cpu_primitive_t {
     template <cpu_isa_t isa, typename conv_t>
     friend void init_rtus_driver(conv_t *self);
 
-    _jit_avx2_1x1_convolution_fwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_avx2_1x1_convolution_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , kernel_(nullptr), rtus_driver_(nullptr), ws_per_thread_(0)
-        , scratch_(nullptr), padded_bias_(nullptr), dw_conv_buffer_size_(0), dw_conv_buffer_(nullptr), dw_padded_bias_(nullptr)
+        : cpu_primitive_t(apd, inputs, outputs)
+        , kernel_(nullptr), rtus_driver_(nullptr)
     {
-        kernel_ = new jit_avx2_1x1_conv_kernel_f32(conf_.jcp_, *conf_.attr());
-        if (conf_.jcp_.with_dw_conv) {
-            kernel_dw_ = new jit_uni_dw_conv_row_f32<avx2>(conf_.jcp_dw);
-        }
-
+        kernel_ = new jit_avx2_1x1_conv_kernel_f32(pd()->jcp_, pd()->jcp_dw_, *pd()->attr());
         init_rtus_driver<avx2>(this);
 
-        if (conf_.want_padded_bias()) {
-            const auto &j = conf_.jcp_;
-            assert(j.ngroups == 1);
-            padded_bias_ = (data_t *)malloc(sizeof(data_t) * j.oc, 64);
-            for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-                padded_bias_[oc] = 0;
-        }
-
-        if (conf_.jcp_.with_dw_conv) {
-            const int nthreads = mkldnn_get_max_threads();
-
-            dw_conv_buffer_size_ = (size_t) conf_.jcp_dw.kh * conf_.jcp_dw.iw * conf_.jcp_dw.ch_block *
-                                   (conf_.jcp_.oc / conf_.jcp_.oc_block);
-            dw_conv_buffer_ = (data_t *) malloc(dw_conv_buffer_size_ * nthreads * sizeof(data_t), 64);
-
-            if (conf_.want_padded_bias()) {
-                const auto &j = conf_.jcp_;
-                assert(j.ngroups == 1);
-                dw_padded_bias_ = (data_t *)malloc(sizeof(data_t) * j.oc, 64);
-                for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-                    dw_padded_bias_[oc] = 0;
-            }
+        if (pd()->jcp_.with_dw_conv) {
+            kernel_dw_ = new jit_uni_dw_conv_row_f32<avx2>(pd()->jcp_dw_, *pd()->attr(), pd()->jcp_dw_.ch_block);
         }
     }
-    ~_jit_avx2_1x1_convolution_fwd_t() {
+
+    ~jit_avx2_1x1_convolution_fwd_t() {
         delete kernel_;
         delete rtus_driver_;
-        free(scratch_);
-        free(padded_bias_);
 
-        if (conf_.jcp_.with_dw_conv) {
+        if (pd()->jcp_.with_dw_conv) {
             delete kernel_dw_;
-            free(dw_conv_buffer_);
-            free(dw_padded_bias_);
         }
     }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        if (conf_.jcp_.with_dw_conv)
-            execute_forward_fusing();
+    virtual void execute(event_t *e) const {
+        if (pd()->jcp_.with_dw_conv)
+            execute_forward_with_dw_conv();
         else
             execute_forward();
 
@@ -183,28 +153,15 @@ struct _jit_avx2_1x1_convolution_fwd_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_forward();
-    void execute_forward_fusing();
+    void execute_forward() const;
+    void execute_forward_with_dw_conv() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    pd_t conf_;
     jit_avx2_1x1_conv_kernel_f32 *kernel_;
     jit_uni_dw_conv_row_f32<avx2> *kernel_dw_;
-
-    /* reduction to unit stride */
     rtus_driver_t<avx2> *rtus_driver_;
-    size_t ws_per_thread_;
-    data_t *scratch_;
-    data_t *padded_bias_;
-
-    /* fuse with dw conv */
-    size_t dw_conv_buffer_size_;
-    data_t *dw_conv_buffer_;
-    data_t *dw_padded_bias_;
 };
 
-using jit_avx2_1x1_convolution_fwd_t = _jit_avx2_1x1_convolution_fwd_t<false>;
-using jit_avx2_1x1_convolution_relu_t = _jit_avx2_1x1_convolution_fwd_t<true>;
-
 struct jit_avx2_1x1_convolution_bwd_data_t: public cpu_primitive_t {
     struct pd_t: public cpu_convolution_bwd_data_pd_t {
         pd_t(engine_t *engine,
@@ -224,7 +181,8 @@ struct jit_avx2_1x1_convolution_bwd_data_t: public cpu_primitive_t {
             bool ok = true
                 && this->set_default_params() == status::success
                 && this->desc()->prop_kind == backward_data
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind, alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
                         this->desc()->diff_src_desc.data_type,
@@ -236,17 +194,22 @@ struct jit_avx2_1x1_convolution_bwd_data_t: public cpu_primitive_t {
             const memory_desc_t *diff_src_d = this->diff_src_pd_.desc();
             rtus_prepare(this, conv_d, diff_src_d, this->diff_dst_pd_.desc());
 
-            return jit_avx2_1x1_conv_kernel_f32::init_conf(jcp_, *conv_d,
-                    *diff_src_d, *this->weights_pd_.desc(),
+            status_t status = jit_avx2_1x1_conv_kernel_f32::init_conf(jcp_,
+                    *conv_d, *diff_src_d, *this->weights_pd_.desc(),
                     *this->diff_dst_pd_.desc(), *this->attr());
+            if (status != status::success) return status;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx2_1x1_conv_kernel_f32::init_scratchpad(scratchpad, jcp_);
+
+            rtus_prepare_space_info(this, scratchpad);
+
+            return status::success;
         }
 
         // TODO (Roma): structs conf header cleanup
         jit_1x1_conv_conf_t jcp_;
-        struct reduce_to_unit_stride_t {
-            convolution_desc_t conv_d_;
-            bool reduce_src_;
-        } rtus_;
+        reduce_to_unit_stride_t rtus_;
 
     protected:
         virtual status_t set_default_params() override {
@@ -262,6 +225,8 @@ struct jit_avx2_1x1_convolution_bwd_data_t: public cpu_primitive_t {
                 CHECK(this->weights_pd_.set_format(this->with_groups()
                     ? utils::pick(this->ndims() - 3, gOIw8o8i, gOIhw8o8i)
                     : utils::pick(this->ndims() - 3, OIw8o8i, OIhw8o8i)));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
@@ -269,25 +234,24 @@ struct jit_avx2_1x1_convolution_bwd_data_t: public cpu_primitive_t {
     template <cpu_isa_t isa, typename conv_t>
     friend void init_rtus_driver(conv_t *self);
 
-    jit_avx2_1x1_convolution_bwd_data_t(const pd_t *pd,
+    jit_avx2_1x1_convolution_bwd_data_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , kernel_(nullptr), rtus_driver_(nullptr), ws_per_thread_(0)
-        , scratch_(nullptr)
+        : cpu_primitive_t(apd, inputs, outputs)
+        , kernel_(nullptr), rtus_driver_(nullptr)
     {
-        kernel_ = new jit_avx2_1x1_conv_kernel_f32(conf_.jcp_, *conf_.attr());
+        kernel_ = new jit_avx2_1x1_conv_kernel_f32(pd()->jcp_, jit_conv_conf_t(), *pd()->attr());
         init_rtus_driver<avx2>(this);
     }
+
     ~jit_avx2_1x1_convolution_bwd_data_t() {
         delete kernel_;
         delete rtus_driver_;
-        free(scratch_);
     }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_data:
             execute_backward_data();
             break;
@@ -298,20 +262,16 @@ struct jit_avx2_1x1_convolution_bwd_data_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_data();
-    pd_t conf_;
-    jit_avx2_1x1_conv_kernel_f32 *kernel_;
+    void execute_backward_data() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    /* reduction to unit stride */
+    jit_avx2_1x1_conv_kernel_f32 *kernel_;
     rtus_driver_t<avx2> *rtus_driver_;
-    size_t ws_per_thread_;
-    data_t *scratch_;
 };
 
 struct jit_avx2_1x1_convolution_bwd_weights_t: public cpu_primitive_t {
     struct pd_t: public cpu_convolution_bwd_weights_pd_t {
-        pd_t(engine_t *engine,
-                const convolution_desc_t *adesc,
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const convolution_fwd_pd_t *hint_fwd_pd)
             : cpu_convolution_bwd_weights_pd_t(engine, adesc, attr, hint_fwd_pd)
@@ -327,7 +287,8 @@ struct jit_avx2_1x1_convolution_bwd_weights_t: public cpu_primitive_t {
             bool ok = true
                 && this->set_default_params() == status::success
                 && this->desc()->prop_kind == backward_weights
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind, alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
                         this->desc()->src_desc.data_type,
@@ -341,18 +302,33 @@ struct jit_avx2_1x1_convolution_bwd_weights_t: public cpu_primitive_t {
             const memory_desc_t *src_d = this->src_pd_.desc();
             rtus_prepare(this, conv_d, src_d, this->diff_dst_pd_.desc());
 
-            return jit_avx2_1x1_conv_kernel_f32::init_conf(jcp_, *conv_d,
-                    *src_d, *this->diff_weights_pd_.desc(),
+            status_t status = jit_avx2_1x1_conv_kernel_f32::init_conf(jcp_,
+                    *conv_d, *src_d, *this->diff_weights_pd_.desc(),
                     *this->diff_dst_pd_.desc(), *this->attr());
+            if (status != status::success) return status;
+
+            init_balancers();
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx2_1x1_conv_kernel_f32::init_scratchpad(scratchpad, jcp_);
+
+            rtus_prepare_space_info(this, scratchpad);
+
+            auto reducer_bia_scratchpad = memory_tracking::registrar_t(
+                    scratchpad, memory_tracking::names::prefix_reducer_bia);
+            reducer_bia_conf_.init_scratchpad(reducer_bia_scratchpad);
+
+            auto reducer_wei_scratchpad = memory_tracking::registrar_t(
+                    scratchpad, memory_tracking::names::prefix_reducer_wei);
+            reducer_wei_conf_.init_scratchpad(reducer_wei_scratchpad);
+
+            return status::success;
         }
 
-        // TODO (Roma): structs conf header cleanup
         jit_1x1_conv_conf_t jcp_;
-
-        struct reduce_to_unit_stride_t {
-            convolution_desc_t conv_d_;
-            bool reduce_src_;
-        } rtus_;
+        cpu_reducer_t<data_type::f32>::conf_t reducer_bia_conf_;
+        cpu_reducer_2d_t<data_type::f32>::conf_t reducer_wei_conf_;
+        reduce_to_unit_stride_t rtus_;
 
     protected:
         virtual status_t set_default_params() override {
@@ -370,28 +346,62 @@ struct jit_avx2_1x1_convolution_bwd_weights_t: public cpu_primitive_t {
                     : utils::pick(this->ndims() - 3, OIw8i8o, OIhw8i8o)));
             if (this->diff_bias_pd_.desc()->format == any)
                 CHECK(this->diff_bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
+
+    private:
+        void init_balancers() {
+            const int ic_block = jcp_.bcast_block;
+            const int nb_ic = jcp_.nb_bcast;
+            const int nb_ic_blocking = jcp_.nb_bcast_blocking;
+            const int bcast_work = utils::div_up(nb_ic, nb_ic_blocking);
+
+            const int oc_block = jcp_.load_block;
+            const int nb_oc = jcp_.nb_load;
+            const int nb_oc_blocking = jcp_.nb_load_blocking;
+            const int load_work = utils::div_up(nb_oc, nb_oc_blocking);
+
+            const int job_size
+                = nb_oc_blocking * nb_ic_blocking * ic_block * oc_block;
+            const int njobs_x = bcast_work;
+            const int njobs_y = jcp_.ngroups * load_work;
+
+            const int max_threads = mkldnn_get_max_threads();
+            const size_t max_buffer_size = max_threads * job_size * 8;
+
+            if (with_bias()) {
+                reducer_bia_conf_.init(reduce_balancer_t(max_threads,
+                            oc_block, jcp_.ngroups * jcp_.oc / oc_block,
+                            jcp_.mb, max_buffer_size));
+            }
+
+            reducer_wei_conf_.init(
+                    reduce_balancer_t(max_threads, job_size, njobs_y * njobs_x,
+                        jcp_.mb * jcp_.nb_reduce, max_buffer_size),
+                    job_size / nb_oc_blocking, nb_oc_blocking, ic_block,
+                    nb_ic * ic_block * oc_block, nb_oc);
+        }
     };
 
     template <cpu_isa_t isa, typename conv_t>
     friend void init_rtus_driver(conv_t *self);
 
-    jit_avx2_1x1_convolution_bwd_weights_t(const pd_t *pd,
+    jit_avx2_1x1_convolution_bwd_weights_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs);
+
     ~jit_avx2_1x1_convolution_bwd_weights_t() {
         delete kernel_;
         delete rtus_driver_;
         delete reducer_weights_;
         delete reducer_bias_;
-        free(scratch_);
-        free(padded_bias_);
     }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_weights:
             execute_backward_weights();
             break;
@@ -402,17 +412,13 @@ struct jit_avx2_1x1_convolution_bwd_weights_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_weights();
-    pd_t conf_;
+    void execute_backward_weights() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     jit_avx2_1x1_conv_kernel_f32 *kernel_;
     cpu_reducer_2d_t<data_type::f32> *reducer_weights_;
     cpu_reducer_t<data_type::f32> *reducer_bias_;
-
-    /* reduction to unit stride */
     rtus_driver_t<avx2> *rtus_driver_;
-    size_t ws_per_thread_;
-    data_t *scratch_;
-    data_t *padded_bias_;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.cpp
index 392622a5a..0caa4b440 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.cpp
@@ -15,7 +15,6 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include <common/primitive_attr.hpp>
 #include "c_types_map.hpp"
 #include "nstl.hpp"
 #include "type_helpers.hpp"
@@ -32,6 +31,7 @@ namespace cpu {
 
 using namespace mkldnn::impl::prop_kind;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 using namespace Xbyak;
@@ -77,9 +77,8 @@ void jit_avx2_conv_fwd_kernel_f32::oh_step_unroll_kw(int ur_w,
                         vfmadd231ps(Ymm(ur_w * ii + jj),
                                 Ymm(oc_blocks * ur_w + jj), ymm15);
                     else { // Intel(R) Advanced Vector Extensions (Intel(R) AVX) support
-                        Ymm tmp = ymask;
-                        vmulps(tmp, ymm15, Ymm(oc_blocks * ur_w + jj));
-                        vaddps(Ymm(ur_w * ii + jj), Ymm(ur_w * ii + jj), tmp);
+                        vmulps(ytmp, ymm15, Ymm(oc_blocks * ur_w + jj));
+                        vaddps(Ymm(ur_w * ii + jj), Ymm(ur_w * ii + jj), ytmp);
                     }
             }
         }
@@ -131,9 +130,8 @@ void jit_avx2_conv_fwd_kernel_f32::oh_step_nopad(int ur_w,
                         vfmadd231ps(Ymm(ur_w * ii + jj),
                                 Ymm(oc_blocks * ur_w + jj), ymm15);
                     else { // Intel AVX support
-                        Ymm tmp = ymask;
-                        vmulps(tmp, ymm15, Ymm(oc_blocks * ur_w + jj));
-                        vaddps(Ymm(ur_w * ii + jj), Ymm(ur_w * ii + jj), tmp);
+                        vmulps(ytmp, ymm15, Ymm(oc_blocks * ur_w + jj));
+                        vaddps(Ymm(ur_w * ii + jj), Ymm(ur_w * ii + jj), ytmp);
                     }
             }
         }
@@ -176,7 +174,7 @@ void jit_avx2_conv_fwd_kernel_f32::width_blk_step(int ur_w,
         for (int jj = 0; jj < ur_w; jj++) {
             size_t offt;
             if (jcp.with_dw_conv)
-                offt = sizeof(float) * ((size_t)ii * od * jcp.dw_conv_ker_h * ow + jj) * oc_blk;
+                offt = sizeof(float) * ((size_t)ii * od * jcp_dw.kh * ow + jj) * oc_blk;
             else
                 offt = sizeof(float) * ((size_t)ii * od * oh * ow + jj) * oc_blk;
             vmovups(Ymm(ur_w * ii + jj),
@@ -224,7 +222,8 @@ void jit_avx2_conv_fwd_kernel_f32::width_blk_step(int ur_w,
         mov(aux_reg_ker_d, ptr[param1 + GET_OFF(filt)]);
         mov(aux_reg_inp_d, reg_input);
 
-        if ((jcp.kd - 1) * (jcp.dilate_d + 1) < jcp.f_pad) {
+        if ((jcp.dilate_d >= jcp.id)
+                || (jcp.kd - 1) * (jcp.dilate_d + 1) < jcp.f_pad) {
             cmp(reg_ki, 0);
             je(skip_kd_loop, T_NEAR);
         }
@@ -239,7 +238,8 @@ void jit_avx2_conv_fwd_kernel_f32::width_blk_step(int ur_w,
         mov(aux_reg_kernel, aux_reg_ker_d);
     }
 
-    if ((jcp.kh - 1) * (jcp.dilate_h + 1) < nstl::max(jcp.t_pad, jcp.b_pad)) {
+    if ((jcp.dilate_h >= jcp.ih)
+            || (jcp.kh - 1) * (jcp.dilate_h + 1) < nstl::max(jcp.t_pad, jcp.b_pad)) {
         cmp(kj, 0);
         je(skip_kh_loop, T_NEAR);
     }
@@ -279,8 +279,7 @@ void jit_avx2_conv_fwd_kernel_f32::width_blk_step(int ur_w,
         pop(reg_output);
     }
 
-
-    Label done, regular_store;
+    Label regular_store;
 
     test(reg_ci_flag, FLAG_IC_LAST);
     je(regular_store, T_NEAR);
@@ -289,10 +288,6 @@ void jit_avx2_conv_fwd_kernel_f32::width_blk_step(int ur_w,
     int depthwise_inj_idx = 0;
     const auto &p = attr_.post_ops_;
 
-    if (p.len_ == 0 && eltwise_injectors.size() == 1) {
-        eltwise_injectors[0]->compute_vector_range(0, oc_blocks * ur_w);
-    }
-
     int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
     for (int i = 0; i < end_idx; i++) {
         auto& post_op = p.entry_[i];
@@ -324,14 +319,13 @@ void jit_avx2_conv_fwd_kernel_f32::width_blk_step(int ur_w,
         for (int jj = 0; jj < ur_w; jj++) {
             size_t o_off;
             if (jcp.with_dw_conv)
-                o_off = sizeof(float) * ((size_t)ii * od * jcp.dw_conv_ker_h * ow + jj) * oc_blk;
+                o_off = sizeof(float) * ((size_t)ii * od * jcp_dw.kh * ow + jj) * oc_blk;
             else
                 o_off = sizeof(float) * ((size_t)ii * od * oh * ow + jj) * oc_blk;
             Ymm reg_out = Ymm(ur_w * ii + jj);
             vmovups(make_safe_addr(reg_output, o_off, reg_long_offt), reg_out);
         }
     }
-    L(done);
 }
 
 inline void jit_avx2_conv_fwd_kernel_f32::solve_common(
@@ -397,12 +391,6 @@ inline void jit_avx2_conv_fwd_kernel_f32::solve_common(
 
 void jit_avx2_conv_fwd_kernel_f32::generate()
 {
-    if (jcp.with_eltwise) {
-        eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx2>(
-                this, jcp.eltwise_alg, jcp.eltwise_alpha, 0
-        ));
-    }
-
     const auto &p = attr_.post_ops_;
     int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
     for (int i = 0; i < end_idx; i++) {
@@ -474,25 +462,16 @@ bool jit_avx2_conv_fwd_kernel_f32::post_ops_ok(
     auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
-    case 0: return true; // no post_ops
-    case 1:
-        return true // sum OR eltwise OR dw_conv
-                && !jcp.with_eltwise && (is_simple(0) || is_sum(0) || is_dw_conv(0));
-    case 2:
-        return true // sum->eltwise OR dw_conv->eltwise OR eltwise->dw_conv OR dw_conv->sum OR sum->depthwise OR
-                    // eltwise->depthwise OR depthwise->depthwise
-                && !jcp.with_eltwise && ((is_sum(0) && is_simple(1)) || (is_dw_conv(0) && is_eltwise(1)) ||
-                                         (is_eltwise(0) && is_dw_conv(1)) || (is_dw_conv(0) && is_sum(1)) ||
-                                         (is_simple(0) && is_simple(1)));
-    case 3:
-        return true // eltwise->dw_conv->eltwise OR dw_conv->sum->eltwise OR sum->eltwise->depthwise OR
-                    // sum->depthwise->eltwise OR sum->depthwise->depthwise
-                && !jcp.with_eltwise && ((is_eltwise(0) && is_dw_conv(1) && is_eltwise(2)) ||
-                                         (is_dw_conv(0) && is_sum(1) && is_eltwise(2)) ||
-                                         (is_sum(0) && is_simple(1) && is_simple(2)));
-    case 4: return true // eltwise->dw_conv->sum->eltwise
-            && !jcp.with_eltwise && (is_eltwise(0) && is_dw_conv(1) && is_sum(2) && is_eltwise(3));
-    default: return false;
+        case 0: return true;
+        case 1: return is_simple(0) || is_sum(0) || is_dw_conv(0);
+        case 2: return (is_sum(0) && is_simple(1)) || (is_dw_conv(0) && is_eltwise(1)) ||
+                       (is_eltwise(0) && is_dw_conv(1)) || (is_dw_conv(0) && is_sum(1)) ||
+                       (is_simple(0) && is_simple(1));
+        case 3: return (is_eltwise(0) && is_dw_conv(1) && is_eltwise(2)) ||
+                       (is_dw_conv(0) && is_sum(1) && is_eltwise(2)) ||
+                       (is_sum(0) && is_simple(1) && is_simple(2));
+        case 4: return (is_eltwise(0) && is_dw_conv(1) && is_sum(2) && is_eltwise(3));
+        default: return false;
     }
 
     return false;
@@ -501,7 +480,7 @@ bool jit_avx2_conv_fwd_kernel_f32::post_ops_ok(
 status_t jit_avx2_conv_fwd_kernel_f32::init_conf(jit_conv_conf_t &jcp,
         const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
         const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d,
-        const primitive_attr_t &attr, bool with_relu, float relu_negative_slope)
+        const primitive_attr_t &attr)
 {
     if (!mayiuse(avx)) return status::unimplemented;
 
@@ -539,63 +518,62 @@ status_t jit_avx2_conv_fwd_kernel_f32::init_conf(jit_conv_conf_t &jcp,
     jcp.dilate_h = (ndims == 3) ? 0 : cd.dilates[ndims-4];
     jcp.dilate_w = cd.dilates[ndims-3];
 
-    jcp.b_pad = (jcp.oh - 1) * jcp.stride_h + (jcp.kh - 1) * (jcp.dilate_h + 1)
-            - (jcp.ih + jcp.t_pad - 1);
-
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alg = mkldnn_eltwise_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
 
     if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
 
     const auto &p = attr.post_ops_;
-    jcp.with_dw_conv = false;
+
     int dw_conv_ind = p.find(primitive_kind::convolution);
-    if (dw_conv_ind != -1) {
-        jcp.with_dw_conv = true;
-        jcp.dw_conv_in_h = p.entry_[dw_conv_ind].dw_conv.in_h;
-        jcp.dw_conv_in_w = p.entry_[dw_conv_ind].dw_conv.in_w;
-        jcp.dw_conv_ker_h = p.entry_[dw_conv_ind].dw_conv.ker_h;
-        jcp.dw_conv_ker_w = p.entry_[dw_conv_ind].dw_conv.ker_w;
-        jcp.dw_conv_str_h = p.entry_[dw_conv_ind].dw_conv.str_h;
-        jcp.dw_conv_str_w = p.entry_[dw_conv_ind].dw_conv.str_w;
-        jcp.dw_conv_weights = p.entry_[dw_conv_ind].dw_conv.weights_data;
-        jcp.dw_conv_biases = p.entry_[dw_conv_ind].dw_conv.biases_data;
+    jcp.with_dw_conv = dw_conv_ind != -1;
+    if (jcp.with_dw_conv) {
+        jcp.dw_conv_oh = jcp.oh;
+        jcp.dw_conv_ow = jcp.ow;
+        jcp.oh = p.entry_[dw_conv_ind].dw_conv.in_h;
+        jcp.ow = p.entry_[dw_conv_ind].dw_conv.in_w;
     }
 
+    jcp.b_pad = (jcp.oh - 1) * jcp.stride_h + (jcp.kh - 1) * (jcp.dilate_h + 1)
+                - (jcp.ih + jcp.t_pad - 1);
+
     if (jcp.with_dw_conv && !mayiuse(avx2))
         return status::unimplemented;
 
     if (jcp.with_dw_conv && jcp.ndims == 5)
         return status::unimplemented;
 
-    if (jcp.with_dw_conv) {
-        int dw_conv_eltwise_ind = p.find(primitive_kind::eltwise, dw_conv_ind);
-        if (dw_conv_eltwise_ind != -1) {
-            jcp.dw_conv_with_eltwise = true;
-            jcp.dw_conv_eltwise_alg = p.entry_[dw_conv_eltwise_ind].eltwise.alg;
-            jcp.dw_conv_eltwise_alpha = p.entry_[dw_conv_eltwise_ind].eltwise.alpha;
-            jcp.dw_conv_eltwise_beta = p.entry_[dw_conv_eltwise_ind].eltwise.beta;
+    if (!mayiuse(avx2)) {
+        for (int i = 0; i < p.len_; i++) {
+            auto &post_op = p.entry_[i];
+            if (post_op.is_eltwise()) {
+                if (post_op.eltwise.alg != alg_kind::eltwise_relu)
+                    return status::unimplemented;
+            } else if (post_op.is_depthwise()) {
+                return status::unimplemented;
+            }
         }
     }
 
     jcp.with_sum = p.find(primitive_kind::sum, 0, dw_conv_ind) != -1;
-    if (jcp.with_dw_conv) {
-        jcp.dw_conv_with_sum = p.find(primitive_kind::sum, dw_conv_ind) != -1;
-    }
 
-    if (jcp.with_dw_conv) {
-        jcp.oh = jcp.dw_conv_in_h;
-        jcp.ow = jcp.dw_conv_in_w;
-    }
+    jcp.src_dt = cd.src_desc.data_type;
+    jcp.bia_dt = jcp.with_bias ? cd.bias_desc.data_type : data_type::undef;
+    jcp.dst_dt = cd.dst_desc.data_type;
 
     const int simd_w = 8;
     const bool flat = jcp.ic < simd_w;
     const bool mimo = !flat;
 
+
+    /* Grouped channel offset to support 'non-blocked data' format for
+     * convolution sizes with '(input_channel / ngroups) < simd' */
+    jcp.nonblk_group_off
+            = (one_of(src_d.format(), ncw, nchw, ncdhw) && jcp.ngroups > 1) ?
+            jcp.ic :
+            1;
+
     bool ok_to_pad_channels = true
         && jcp.ngroups == 1;
 
@@ -686,8 +664,23 @@ status_t jit_avx2_conv_fwd_kernel_f32::init_conf(jit_conv_conf_t &jcp,
     return status::success;
 }
 
-void jit_avx2_conv_bwd_data_kernel_f32::hsw_iter(int ur_w, int l_overflow,
-        int r_overflow, int start_off)
+void jit_avx2_conv_fwd_kernel_f32::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw) {
+    if (jcp.with_bias && jcp.oc != jcp.oc_without_padding)
+        scratchpad.book(key_conv_padded_bias, sizeof(float) * jcp.oc);
+
+    if (jcp.with_dw_conv) {
+        const int nthreads = mkldnn_get_max_threads();
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block * jcp.nb_oc_blocking;
+        scratchpad.book(key_dw_conv_buffer, sizeof(float) * dw_conv_buffer_size_ * nthreads);
+
+        if (jcp.oc != jcp.oc_without_padding)
+            scratchpad.book(key_dw_conv_padded_bias, sizeof(float) * jcp.oc);
+    }
+}
+
+void jit_avx2_conv_bwd_data_kernel_f32::compute_loop(int ur_w, int l_overflow,
+        int r_overflow)
 {
     int kw = jcp.kw;
     int kh = jcp.kh;
@@ -696,29 +689,37 @@ void jit_avx2_conv_bwd_data_kernel_f32::hsw_iter(int ur_w, int l_overflow,
     int ih = jcp.ih;
     int id = jcp.id;
     int ow = jcp.ow;
-    int stride_w = jcp.stride_w;
-    int stride_h = jcp.stride_h;
 
     int ic_block = jcp.ic_block;
     int oc_block = jcp.oc_block;
     int nb_ic_block = jcp.nb_ic_blocking;
+    int stride_w = jcp.stride_w;
+    int stride_h = jcp.stride_h;
 
     Label kd_loop, skip_kd_loop;
+    Label oc_loop, skip_oc_loop;
 
     for (int ii = 0; ii < nb_ic_block; ii++)
         for (int jj = 0; jj < ur_w; jj++) {
-            size_t offt = sizeof(float) * ((size_t)ii * id * ih * iw + jj)
-                * ic_block;
-            vmovups(Ymm(ur_w * ii + jj),
-                    make_safe_addr(reg_dsrc, offt, reg_long_offt));
+            uni_vpxor(Ymm(ur_w * ii + jj), Ymm(ur_w * ii + jj),
+                      Ymm(ur_w * ii + jj));
         }
 
     if (one_of(jcp.ndims, 3, 4)) {
-        mov(aux_reg_ddst, reg_ddst);
-        mov(aux_reg_kernel, reg_kernel);
+        cmp(reg_channel_work, 0);
+        jle(skip_oc_loop, T_NEAR);
+        xor_(reg_channel, reg_channel);
+
+        mov(aux_reg_ddst_oc_loop, reg_ddst);
+        mov(aux_reg_kernel_oc_loop, reg_kernel);
+
+        L(oc_loop);
+        mov(aux_reg_ddst, aux_reg_ddst_oc_loop);
+        mov(aux_reg_kernel, aux_reg_kernel_oc_loop);
     }
 
     if (jcp.ndims == 5) {
+        assert(jcp.nb_oc_blocking == 1);
         push(oi_iter);
 
         mov(reg_ki, ptr[this->param1 + GET_OFF(kd_padding)]);
@@ -736,42 +737,46 @@ void jit_avx2_conv_bwd_data_kernel_f32::hsw_iter(int ur_w, int l_overflow,
         mov(aux_reg_kernel, aux_reg_ker_d);
     }
 
-    mov(kj, reg_kh);
-
-    Label kh_label;
-
-    L(kh_label); {
+    Label kh_loop, skip_kh_loop;
+    cmp(kj, 0);
+    jle(skip_kh_loop, T_NEAR);
+    L(kh_loop); {
         for (int ki = 0; ki < kw; ki++) {
-            int jj_start = nstl::max(0, l_overflow - (kw - 1) + ki) ; // 0;
-            int jj_end = ur_w - nstl::max(0, r_overflow - ki); // ur_w;
+            int jj_start = get_iw_start(ki, l_overflow); // 0;
+            int jj_end = get_iw_end(ur_w, ki, r_overflow); // ur_w;
             for (int ofm2 = 0; ofm2 < jcp.oc_block; ofm2++) {
 
-                for (int jj = jj_start; jj < jj_end; jj++) {
-                   if ((jj - ki + jcp.l_pad + start_off) % stride_w == 0) {
-                        int aux_output_offset = ((jj - ki + jcp.l_pad + start_off) / stride_w) * jcp.oc_block + ofm2;
-                        vbroadcastss(Ymm(nb_ic_block * ur_w + jj), ptr[aux_reg_ddst + sizeof(float) * aux_output_offset]);
-                   }
+                for (int jj = jj_start ; jj < jj_end; jj += stride_w) {
+                    int aux_output_offset
+                      = (jj + jcp.l_pad - ki) / stride_w * jcp.oc_block + ofm2;
+                    vbroadcastss(Ymm(nb_ic_block * ur_w + jj / stride_w),
+                            ptr[aux_reg_ddst
+                            + sizeof(float) * aux_output_offset]);
                 }
 
-                for (int ii = 0; ii < nb_ic_block; ii++) {
-                    int aux_kernel_offset = ii * kd * kh * kw * jcp.ic_block * jcp.oc_block + ki * jcp.ic_block * jcp.oc_block + ofm2 * jcp.ic_block;
-                    vmovups(ymm15, ptr[aux_reg_kernel + sizeof(float) * aux_kernel_offset]);
-
-                    for (int jj = jj_start; jj < jj_end; jj++) {
-                       if ((jj - ki + jcp.l_pad + start_off) % stride_w == 0) {
-                            vfmadd231ps(Ymm(ur_w * ii + jj), Ymm(nb_ic_block * ur_w + jj), ymm15);
-                       }
-                    }
+                for (int ii = 0; ii  < nb_ic_block; ii++) {
+                    int aux_kernel_offset
+                        = ii * kd * kh * kw * jcp.ic_block * jcp.oc_block
+                        + ki * jcp.ic_block * jcp.oc_block
+                        + ofm2 * jcp.ic_block;
+                    vmovups(ymm15,
+                            ptr[aux_reg_kernel
+                            + sizeof(float) * aux_kernel_offset]);
+                    for (int jj = jj_start; jj  < jj_end; jj += stride_w)
+                        vfmadd231ps(Ymm(ur_w * ii + jj),
+                                Ymm(nb_ic_block * ur_w + jj / stride_w), ymm15);
                 }
             }
         }
-        add(aux_reg_kernel, sizeof(float) * kw  * oc_block * ic_block * stride_h);
+        add(aux_reg_kernel, sizeof(float) * stride_h * kw  * oc_block
+                                          * ic_block);
         sub(aux_reg_ddst, sizeof(float) * ow * oc_block);
 
-        sub(kj, stride_h);
+        dec(kj);
         cmp(kj, 0);
-        jg(kh_label, T_NEAR);
+        jg(kh_loop, T_NEAR);
     }
+    L(skip_kh_loop);
 
     if (jcp.ndims == 5) {
         sub(aux_reg_dst_d,
@@ -787,6 +792,39 @@ void jit_avx2_conv_bwd_data_kernel_f32::hsw_iter(int ur_w, int l_overflow,
         pop(oi_iter);
     }
 
+    if (one_of(jcp.ndims, 3, 4)) {
+        int ddst_oc_shift = sizeof(float) * jcp.od * jcp.oh * jcp.ow
+                          * jcp.oc_block;
+        int kernel_oc_shift = sizeof(float) * jcp.kd * jcp.kh * jcp.kw
+                          * jcp.ic * jcp.oc_block;
+
+        add(aux_reg_ddst_oc_loop, ddst_oc_shift);
+        add(aux_reg_kernel_oc_loop, kernel_oc_shift);
+
+        inc(reg_channel);
+        cmp(reg_channel, reg_channel_work);
+        jl(oc_loop, T_NEAR);
+
+        L(skip_oc_loop);
+        mov(reg_channel, ptr[param1 + GET_OFF(channel)]);
+    }
+
+    Label no_update_label;
+    cmp(reg_channel, 0);
+    je(no_update_label, T_NEAR);
+    for (int ii = 0; ii < nb_ic_block; ii++) {
+        for (int jj = 0; jj < ur_w; jj++) {
+            size_t offt =
+                sizeof(float) * ((size_t)ii * id * ih * iw + jj) * ic_block;
+            vmovups(Ymm(15),
+                    make_safe_addr(reg_dsrc, offt, reg_long_offt));
+            vaddps(Ymm(ur_w * ii + jj), Ymm(ur_w * ii + jj),
+                    Ymm(15));
+
+        }
+    }
+    L(no_update_label);
+
     for (int ii = 0; ii < nb_ic_block; ii++)
         for (int jj = 0; jj < ur_w; jj++) {
             size_t offt =
@@ -799,79 +837,63 @@ void jit_avx2_conv_bwd_data_kernel_f32::hsw_iter(int ur_w, int l_overflow,
 void jit_avx2_conv_bwd_data_kernel_f32::generate() {
     preamble();
 
-    auto hsw_iter_body = [=] (int ur_w, int l_overflow, int r_overflow) {
-        if (jcp.stride_w == 1) {
-            hsw_iter(ur_w, l_overflow, r_overflow, 0);
-            add(reg_dsrc, sizeof(float) * jcp.ur_w * jcp.ic_block);
-            add(reg_ddst, sizeof(float) * jcp.ur_w * jcp.oc_block);
-        } else {
-            Label hsw_iter_off_0;
-            Label hsw_iter_off_1;
-            Label hsw_iter_exit;
-
-            int dst_off = jcp.ur_w / jcp.stride_w;
-
-            and_(start_off_reg, 1);
-
-            L(hsw_iter_off_0); {
-                cmp(start_off_reg, 0);
-                jg(hsw_iter_off_1, T_NEAR);
-
-                hsw_iter(ur_w, l_overflow, r_overflow, 0);
-                add(reg_dsrc, sizeof(float) * jcp.ur_w * jcp.ic_block);
-                add(reg_ddst, sizeof(float) * dst_off * jcp.oc_block);
-
-                jmp(hsw_iter_exit, T_NEAR);
-            }
-
-            L(hsw_iter_off_1); {
-                hsw_iter(ur_w, l_overflow, r_overflow, 1);
-                add(reg_dsrc, sizeof(float) * jcp.ur_w * jcp.ic_block);
-                add(reg_ddst, sizeof(float) * (dst_off + 1) * jcp.oc_block);
-            }
-
-            L(hsw_iter_exit);
-            add(start_off_reg, std::abs(jcp.ur_w - jcp.stride_w));
-        }
-    };
-
     mov(reg_dsrc, ptr[this->param1 + GET_OFF(src)]);
     mov(reg_ddst, ptr[this->param1 + GET_OFF(dst)]);
     mov(reg_kernel, ptr[this->param1 + GET_OFF(filt)]);
     mov(reg_kh, ptr[this->param1 + GET_OFF(kh_padding)]);
+    mov(reg_channel, ptr[param1 + GET_OFF(channel)]);
+    mov(reg_channel_work, ptr[param1 + GET_OFF(ch_blocks)]);
 
-    int n_oi = jcp.iw / jcp.ur_w;
-    xor_(oi_iter, oi_iter);
-    xor_(start_off_reg, start_off_reg);
+    int ddst_shift = sizeof(float) * (jcp.ur_w / jcp.stride_w) * jcp.ic_block;
+    int dsrc_shift = sizeof(float) * jcp.ur_w * jcp.oc_block;
 
-    int l_overflow = nstl::max(0, jcp.kw - 1 - jcp.l_pad);
-    if (l_overflow > 0) {
-        hsw_iter_body(jcp.ur_w, l_overflow, 0);
-        inc(oi_iter);
-    }
+    int l_overflow = nstl::max(0, (jcp.kw - 1 - jcp.l_pad) / jcp.stride_w);
+    int r_overflow = nstl::max(0, (jcp.kw - 1
+                    - nstl::max(0, jcp.r_pad)) / jcp.stride_w);
+    int r_overflow1 = nstl::max(0, (jcp.kw - 1
+                    - nstl::max(0, jcp.r_pad) - jcp.ur_w_tail) / jcp.stride_w);
 
-    int r_pad = jcp.iwp - jcp.iw - jcp.l_pad;
-    int r_overflow1
-        = nstl::max(0, jcp.kw - 1 - (jcp.iw - jcp.ur_w * n_oi) - r_pad);
-    int r_overflow = nstl::max(0, jcp.kw - 1 - r_pad);
+    int n_oi = jcp.iw / jcp.ur_w;
     if (r_overflow1 > 0)
         n_oi--;
 
-    if ((l_overflow <= 0 && n_oi > 0) || (l_overflow >  0 && n_oi > 1)) {
-        Label ow_loop;
-        L(ow_loop); {
-            hsw_iter_body(jcp.ur_w, 0, 0);
+    if (jcp.ur_w == jcp.iw) {
+        compute_loop(jcp.ur_w, l_overflow, r_overflow);
+    } else if (n_oi == 0) {
+        compute_loop(jcp.ur_w, l_overflow, r_overflow1);
+        add(reg_dsrc, dsrc_shift);
+        add(reg_ddst, ddst_shift);
+        if (jcp.ur_w_tail != 0)
+            compute_loop(jcp.ur_w_tail, 0, r_overflow);
+    } else {
+        xor_(oi_iter, oi_iter);
+        if (l_overflow > 0) {
+            compute_loop(jcp.ur_w, l_overflow, 0);
+            add(reg_dsrc, dsrc_shift);
+            add(reg_ddst, ddst_shift);
             inc(oi_iter);
-            cmp(oi_iter, n_oi);
-            jl(ow_loop, T_NEAR);
         }
-    }
 
-    if (r_overflow1 > 0 )
-        hsw_iter_body(jcp.ur_w, 0, r_overflow1);
+        if ((l_overflow <= 0 && n_oi > 0) || (l_overflow >  0 && n_oi > 1)) {
+            Label ow_loop;
+            L(ow_loop); {
+                compute_loop(jcp.ur_w, 0, 0);
+                add(reg_dsrc, dsrc_shift);
+                add(reg_ddst, ddst_shift);
+                inc(oi_iter);
+                cmp(oi_iter, n_oi); jl(ow_loop, T_NEAR);
+            }
+        }
 
-    if (jcp.ur_w_tail != 0)
-        hsw_iter_body(jcp.ur_w_tail, 0, r_overflow);
+        if (r_overflow1 > 0 ) {
+            compute_loop(jcp.ur_w, 0, r_overflow1);
+            add(reg_dsrc, dsrc_shift);
+            add(reg_ddst, ddst_shift);
+        }
+
+        if (jcp.ur_w_tail != 0)
+            compute_loop(jcp.ur_w_tail, 0, r_overflow);
+    }
 
     this->postamble();
 }
@@ -930,6 +952,10 @@ status_t jit_avx2_conv_bwd_data_kernel_f32::init_conf(jit_conv_conf_t &jcp,
     bool ok_to_pad_channels = true
         && jcp.ngroups == 1;
 
+    /* gemm-based convolution performs better in these cases */
+    if (jcp.ic < simd_w && jcp.kw > 3 && jcp.stride_w > 1)
+        return status::unimplemented;
+
     if (ok_to_pad_channels) {
         jcp.oc = rnd_up(jcp.oc, simd_w);
         jcp.ic = rnd_up(jcp.ic, simd_w);
@@ -945,16 +971,19 @@ status_t jit_avx2_conv_bwd_data_kernel_f32::init_conf(jit_conv_conf_t &jcp,
     jcp.ur_h = 1; /* no code-unrolling by h so far */
     jcp.nb_ic_blocking = 1;
     jcp.nb_oc_blocking = 1;
+    jcp.ur_w = 1;
+
+    if(one_of(ndims, 3, 4) && jcp.ow < 40)
+        jcp.nb_oc_blocking = jcp.ow < 15 ? 4 : 2;
 
     jcp.src_fmt = diff_src_d.format();
-    jcp.with_eltwise = false;
 
     bool args_ok = true
         && one_of(diff_src_d.format(), nCw8c, nChw8c, nCdhw8c)
         && one_of(weights_d.format(), gOIw8o8i, OIw8i8o, gOIhw8o8i, OIhw8o8i,
                 gOIdhw8o8i, OIdhw8o8i)
         && one_of(diff_dst_d.format(), nCw8c, nChw8c, nCdhw8c)
-        && (jcp.stride_w == 1 || jcp.stride_w == 2)
+        && jcp.stride_w == jcp.stride_h
         && jcp.stride_d == 1
         && jcp.dilate_d == 0
         && jcp.dilate_h == 0
@@ -965,34 +994,69 @@ status_t jit_avx2_conv_bwd_data_kernel_f32::init_conf(jit_conv_conf_t &jcp,
         && jcp.oh == (jcp.ihp - jcp.kh) / jcp.stride_h + 1
         && jcp.ow == (jcp.iwp - jcp.kw) / jcp.stride_w + 1;
     if (!args_ok) return status::unimplemented;
+    jcp.r_pad = (jcp.ow - 1) * jcp.stride_w + jcp.kw - jcp.iw - jcp.l_pad;
+    jcp.b_pad = (jcp.oh - 1) * jcp.stride_h + jcp.kh - jcp.ih - jcp.t_pad;
+    int l_overflow = nstl::max(0, (jcp.kw - 1 - jcp.l_pad) / jcp.stride_w);
+
+    const int max_regs = 15; /* Maximun number of registers available for
+                                result accumulation and delta dst data.
+                                One additional register is reserved for weights
+                                data. */
+
+    /* Find the best blocking with maximum number of fma instructions
+       per ur_w * nb_ic_blocking compute loops. Number of required registers
+       is num_regs = ur_w * nb_ic_blocking + ur_w / stride_w <= max_regs.
+       ur_w must be divisible by stride_w */
+    if (jcp.stride_w + 1 > max_regs)  /* Minimal possible registers
+                                         distribution exceeds max_regs */
+        return status::unimplemented;
 
-    jcp.ur_w = 3;
-
-    for (int b = 4; b > 1; b--)
+    int best_nfmas = 0;
+    for (int b = 1; b <= 4; b++)
     {
-        if (jcp.nb_ic % b == 0)
+        if (jcp.nb_ic % b != 0)
+            continue;
+
+        for (int u = jcp.stride_w;
+             u * b + u / jcp.stride_w <= max_regs && u < jcp.iw + jcp.stride_w;
+             u += jcp.stride_w)
         {
-            jcp.nb_ic_blocking = b;
-            break;
+            int ur_w = nstl::min(u, jcp.iw);
+            /* maximum 1 step with l_overflow so far */
+            if (l_overflow * jcp.stride_w > ur_w && ur_w != jcp.iw)
+                continue;
+            int nfmas = utils::div_up(ur_w, jcp.stride_w) * b;
+            if (nfmas > best_nfmas
+               || (nfmas == best_nfmas && jcp.ur_w < ur_w)) {
+                jcp.ur_w = ur_w;
+                jcp.nb_ic_blocking = b;
+                best_nfmas = nfmas;
+            }
         }
     }
+    if (best_nfmas == 0) /* can't find appropriate blocking */
+        return status::unimplemented;
 
     jcp.ur_w_tail = jcp.iw % jcp.ur_w;
-    int l_overflow = nstl::max(0, jcp.kw - 1 - jcp.l_pad);
-    if (l_overflow > jcp.ur_w) /* maximum 1 step with l_overflow so far */
-        return status::unimplemented;
-    int r_pad = jcp.iwp - jcp.iw - jcp.l_pad;
-    int r_overflow_step0 = nstl::max(0, jcp.kw - 1 - (jcp.iw - jcp.ur_w) - r_pad);
-    if (l_overflow > 0 && r_overflow_step0 > 0) /* no steps with both left and
-                                                   right overflow so far */
+
+    int r_overflow_no_tail = nstl::max(0, (jcp.kw - 1 - jcp.ur_w_tail
+                    - nstl::max(0, jcp.r_pad) - jcp.ur_w_tail) / jcp.stride_w);
+    /* maximum 1 ur_w block with r_overflow so far */
+    if (r_overflow_no_tail * jcp.stride_w > jcp.ur_w)
         return status::unimplemented;
-    int r_overflow_no_tail = nstl::max(0,jcp.kw - 1 - jcp.ur_w_tail - r_pad);
-    if (r_overflow_no_tail > jcp.ur_w) /* maximum 1 ur_w block with
-                                          r_overflow so far */
+
+    if ((jcp.iw > jcp.ur_w) && (jcp.ur_w % jcp.stride_w != 0))
         return status::unimplemented;
+
     return status::success;
 }
 
+void jit_avx2_conv_bwd_data_kernel_f32::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp) {
+    UNUSED(scratchpad);
+    UNUSED(jcp);
+}
+
 void jit_avx2_conv_bwd_weights_kernel_f32::generate() {
     this->preamble();
 
@@ -1045,8 +1109,6 @@ status_t jit_avx2_conv_bwd_weights_kernel_f32::init_conf(jit_conv_conf_t &jcp,
 
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.diff_bias_desc.format != memory_format::undef;
-    jcp.with_eltwise = false;
-    jcp.eltwise_alpha = 0;
 
     const bool flat = jcp.ic == 3;
     const bool mimo = !flat;
@@ -1097,9 +1159,16 @@ status_t jit_avx2_conv_bwd_weights_kernel_f32::init_conf(jit_conv_conf_t &jcp,
     jcp.oc_block = simd_w;
     jcp.nb_oc = jcp.oc / jcp.oc_block;
     jcp.nb_ic_blocking = jcp.nb_oc_blocking = 1;
+
     return status::success;
 }
 
+void jit_avx2_conv_bwd_weights_kernel_f32::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp) {
+    if (jcp.with_bias && jcp.oc != jcp.oc_without_padding)
+        scratchpad.book(key_conv_padded_bias, sizeof(float) * jcp.oc);
+}
+
 inline void jit_avx2_conv_bwd_weights_kernel_f32::od_step_comeback_pointers()
 {
     Label kd_comeback_loop;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.hpp
index f37005440..0c4eb319d 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_conv_kernel_f32.hpp
@@ -18,9 +18,11 @@
 #define JIT_AVX2_CONV_KERNEL_F32_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+
+#include "cpu_memory.hpp"
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
-#include "cpu_memory.hpp"
 #include "jit_uni_eltwise.hpp"
 #include "jit_uni_depthwise.hpp"
 
@@ -29,8 +31,9 @@ namespace impl {
 namespace cpu {
 
 struct jit_avx2_conv_fwd_kernel_f32: public jit_generator {
-    jit_avx2_conv_fwd_kernel_f32(jit_conv_conf_t ajcp,
-            const primitive_attr_t &attr): jcp(ajcp), attr_(attr)
+    jit_avx2_conv_fwd_kernel_f32(jit_conv_conf_t ajcp, jit_conv_conf_t ajcp_dw,
+            const primitive_attr_t &attr)
+        : jcp(ajcp), jcp_dw(ajcp_dw), attr_(attr)
     {
         this->generate();
         jit_ker = (void (*)(jit_conv_call_s *))this->getCode();
@@ -54,11 +57,12 @@ struct jit_avx2_conv_fwd_kernel_f32: public jit_generator {
             const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
             const memory_desc_wrapper &weights_d,
             const memory_desc_wrapper &dst_d,
-            const primitive_attr_t &attr,
-            bool with_relu = false,
-            float relu_negative_slope = 0.);
+            const primitive_attr_t &attr);
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw = jit_conv_conf_t());
 
     jit_conv_conf_t jcp;
+    jit_conv_conf_t jcp_dw;
     const primitive_attr_t &attr_;
     void (*jit_ker)(jit_conv_call_s *);
 
@@ -84,7 +88,7 @@ private:
     reg64_t reg_long_offt = r15;
     Xbyak::Reg32 reg_ci_flag = r13d;
 
-    Xbyak::Ymm ymask = Xbyak::Ymm(14);
+    Xbyak::Ymm ytmp = Xbyak::Ymm(14);
 
     reg64_t reg_d_weights = imm_addr64;
     reg64_t reg_d_bias = ki_iter;
@@ -116,6 +120,8 @@ struct jit_avx2_conv_bwd_data_kernel_f32: public jit_generator {
             const convolution_desc_t &cd, const memory_desc_wrapper &diff_src_d,
             const memory_desc_wrapper &weights_d,
             const memory_desc_wrapper &diff_dst_d);
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp);
 
     jit_conv_conf_t jcp;
     void (*jit_ker)(jit_conv_call_s *);
@@ -123,33 +129,52 @@ struct jit_avx2_conv_bwd_data_kernel_f32: public jit_generator {
 private:
     using reg64_t = const Xbyak::Reg64;
 
-    reg64_t reg_input      = rax;
     reg64_t reg_ddst       = rax;
-    reg64_t aux_reg_input  = r8;
     reg64_t aux_reg_ddst   = r8;
-    reg64_t aux1_reg_input = r9;
     reg64_t reg_kernel     = rdx;
     reg64_t aux_reg_kernel = r10;
-    reg64_t reg_output     = rsi;
     reg64_t reg_dsrc       = rsi;
-    reg64_t aux_reg_output = rbx;
-    reg64_t aux_reg_dsrc = rbx;
+    reg64_t aux_reg_ddst_oc_loop  = rbx; // used in ndims < 5 case only
+    reg64_t aux_reg_kernel_oc_loop = abi_not_param1; /* used in ndims < 5
+                                                        case only */
 
-    reg64_t aux_reg_dst_d = r12;
-    reg64_t aux_reg_ker_d = r14;
+    reg64_t aux_reg_dst_d = r12; // used in ndims == 5 case only
+    reg64_t aux_reg_ker_d = r14; // used in ndims == 5 case only
 
-    reg64_t reg_ki  = abi_not_param1;
+    reg64_t reg_ki  = abi_not_param1; // used in ndims == 5 case only
     reg64_t kj      = r11;
     reg64_t oi_iter = r12;
     reg64_t reg_kh  = r14;
-    reg64_t ki_iter = r13;
+    reg64_t reg_channel = r13;  // used in ndims < 5 case only
+    reg64_t reg_channel_work = r9;  // used in ndims < 5 case only
     reg64_t reg_long_offt = r15;
-    reg64_t start_off_reg = aux1_reg_input;
 
-    inline void hsw_iter(int ur_w, int l_overflow, int r_overflow,
-            int start_off);
+    inline void compute_loop(int ur_w, int l_overflow, int r_overflow);
 
     void generate();
+
+    inline int get_iw_start(int ki, int l_overflow)
+    {
+        int res = (jcp.iw - 1 + jcp.r_pad) % jcp.stride_w
+                + l_overflow * jcp.stride_w
+                - (jcp.kw - 1 - ki) * (jcp.dilate_w + 1);
+        while (res < 0)
+            res += jcp.stride_w;
+
+        return res;
+    }
+
+    inline int get_iw_end(int ur_w, int ki, int r_overflow)
+    {
+        if (utils::one_of(ur_w, jcp.iw, jcp.ur_w_tail))
+            ur_w += nstl::min(0, jcp.r_pad); // remove negative padding
+        int res = (ur_w - 1 + jcp.l_pad) % jcp.stride_w
+                + r_overflow * jcp.stride_w - ki * (jcp.dilate_w + 1);
+        while (res < 0)
+            res += jcp.stride_w;
+
+        return ur_w - res;
+    }
 };
 
 struct jit_avx2_conv_bwd_weights_kernel_f32: public jit_generator {
@@ -165,6 +190,8 @@ struct jit_avx2_conv_bwd_weights_kernel_f32: public jit_generator {
             const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
             const memory_desc_wrapper &diff_weights_d,
             const memory_desc_wrapper &diff_dst_d);
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp);
 
     jit_conv_conf_t jcp;
     void (*jit_ker)(jit_conv_call_s *);
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_convolution.cpp
index e9ccf6f95..d7ea64b4e 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_convolution.cpp
@@ -14,14 +14,13 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include <cstring>
-#include "mkldnn_types.h"
-
 #include "c_types_map.hpp"
-#include "jit_avx2_convolution.hpp"
-#include "utils.hpp"
 #include "mkldnn_thread.hpp"
 #include "type_helpers.hpp"
+#include "utils.hpp"
+#include <cstring>
+
+#include "jit_avx2_convolution.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -29,39 +28,38 @@ namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
-
 #define src_blk_off(f, n, c, d, h, w) \
-    (conf_.ndims() == 3) \
+    (pd()->ndims() == 3) \
     ? (f).blk_off(n, c, w) \
-    : (conf_.ndims() == 4) \
+    : (pd()->ndims() == 4) \
     ? (f).blk_off(n, c, h, w) \
     : (f).blk_off(n, c, d, h, w)
 
 #define wht_blk_off_(f, g, ...) \
-    conf_.with_groups() ? (f).blk_off(g, __VA_ARGS__) : (f).blk_off(__VA_ARGS__)
+    pd()->with_groups() ? (f).blk_off(g, __VA_ARGS__) : (f).blk_off(__VA_ARGS__)
 #define wht_blk_off(f, g, oc, ic, kd, kh, kw) \
-    (conf_.ndims() == 3) \
+    (pd()->ndims() == 3) \
     ? wht_blk_off_(f, g, oc, ic, kw) \
-    : (conf_.ndims() == 4) \
+    : (pd()->ndims() == 4) \
     ? wht_blk_off_(f, g, oc, ic, kh, kw) \
     : wht_blk_off_(f, g, oc, ic, kd, kh, kw)
 
-template <bool with_relu>
-void _jit_avx2_convolution_fwd_t<with_relu>::execute_forward() {
+void jit_avx2_convolution_fwd_t::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
     const auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
+    const int MB = pd()->MB();
 
     int ocb_work = div_up(jcp.nb_oc, jcp.nb_oc_blocking);
     const size_t work_amount = MB * jcp.ngroups * ocb_work * jcp.od
@@ -86,7 +84,7 @@ void _jit_avx2_convolution_fwd_t<with_relu>::execute_forward() {
                 int ocb_num = jcp.nb_oc_blocking;
 
                 for (int icb = icbb; icb < icbb + icb_step; ++icb) {
-                    jit_conv_call_s par_conv = {};
+                    auto par_conv = jit_conv_call_s();
 
                     const int ij = oh * jcp.stride_h;
                     const int i_t_overflow = nstl::max(0, jcp.t_pad - ij);
@@ -99,7 +97,7 @@ void _jit_avx2_convolution_fwd_t<with_relu>::execute_forward() {
                         + (jcp.kd-1) * (jcp.dilate_d+1) - jcp.f_pad+1) - jcp.id;
 
                     const size_t _oc = g * jcp.nb_oc + ocb;
-                    const size_t _ic = g * jcp.nb_ic + icb;
+                    const size_t _ic = g * jcp.nb_ic * jcp.nonblk_group_off + icb;
 
                     const int ih = nstl::max(ij - jcp.t_pad
                         + div_up(i_t_overflow,
@@ -155,31 +153,35 @@ void _jit_avx2_convolution_fwd_t<with_relu>::execute_forward() {
         }
     };
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad().get<data_t>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, bias, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bias = padded_bias;
     }
 
     parallel(0, ker);
+
+    if (pd()->wants_zero_pad_dst())
+        output_memory_primitive(0)->zero_pad();
 }
 
-template <bool with_relu>
-void _jit_avx2_convolution_fwd_t<with_relu>::execute_forward_fusing() {
+void jit_avx2_convolution_fwd_t::execute_forward_with_dw_conv() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
     const auto &jcp = kernel_->jcp;
     const auto &jcp_dw = kernel_dw_->jcp;
-    const int MB = conf_.MB();
+    const int MB = pd()->MB();
 
-    auto dw_bias = jcp.dw_conv_biases;
+    auto dw_bias = jcp_dw.conv_biases;
 
     int ocb_work = div_up(jcp.nb_oc, jcp.nb_oc_blocking);
     const size_t work_amount = MB * jcp.ngroups * ocb_work * jcp.oh;
@@ -189,8 +191,8 @@ void _jit_avx2_convolution_fwd_t<with_relu>::execute_forward_fusing() {
             for (int h = 0; h < num_rows; h++) {
                 if ((oh + h) < 0 || (oh + h) >= jcp.oh) {
                     for (int chb = ocb; chb < ocb + ocb_num; chb++) {
-                        memset(ws_p + (((oh + h) + 1) % jcp.dw_conv_ker_h) * jcp.ow * jcp.oc_block +
-                               (chb - ocb) * jcp.dw_conv_ker_h * jcp.ow * jcp.oc_block, 0, jcp.ow * jcp.oc_block * sizeof(float));
+                        memset(ws_p + (((oh + h) + 1) % jcp_dw.kh) * jcp.ow * jcp.oc_block +
+                               (chb - ocb) * jcp_dw.kh * jcp.ow * jcp.oc_block, 0, jcp.ow * jcp.oc_block * sizeof(float));
                     }
                 } else {
                     for (int icb = 0; icb < jcp.nb_ic; ++icb) {
@@ -211,11 +213,11 @@ void _jit_avx2_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                         par_conv.src = &src[src_d.blk_off(n,
                                                           jcp.ic == 3 ? 0 : _ic, ih, 0)];
 
-                        par_conv.dst = &ws_p[(((oh + h) + 1) % jcp.dw_conv_ker_h) * jcp.ow *
+                        par_conv.dst = &ws_p[(((oh + h) + 1) % jcp_dw.kh) * jcp.ow *
                                              jcp.oc_block];
 
                         const int wh = div_up(i_t_overflow, (jcp.dilate_h + 1));
-                        par_conv.filt = &weights[conf_.with_groups()
+                        par_conv.filt = &weights[pd()->with_groups()
                                                  ? weights_d.blk_off(g, ocb,
                                                                      jcp.ic == 3 ? 0 : icb, wh, 0)
                                                  : weights_d.blk_off(ocb,
@@ -264,9 +266,11 @@ void _jit_avx2_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                                        dst_idx/jcp_dw.stride_h*jcp_dw.ow*jcp_dw.ch_block];
 
                 par_conv_dw.kh_padding = jcp_dw.kh;
-                par_conv_dw.filt = &jcp.dw_conv_weights[chb * jcp_dw.kh * jcp_dw.kw * jcp_dw.ch_block];
+                par_conv_dw.filt = &jcp_dw.conv_weights[chb * jcp_dw.kh * jcp_dw.kw * jcp_dw.ch_block];
                 par_conv_dw.bias = &dw_bias[chb * jcp_dw.ch_block];
                 par_conv_dw.ur_w = (size_t)(jcp_dw.ow);
+                par_conv_dw.oc_work = nstl::min((chb + 1) * jcp_dw.ch_block, (int)jcp_dw.oc) - chb*jcp_dw.ch_block;
+                par_conv_dw.oc_off = chb * jcp_dw.ch_block * sizeof(float);
 
                 kernel_dw_->jit_ker(&par_conv_dw);
             }
@@ -275,7 +279,9 @@ void _jit_avx2_convolution_fwd_t<with_relu>::execute_forward_fusing() {
         size_t start{0}, end{0};
         balance211(work_amount, nthr, ithr, start, end);
 
-        auto pbuf = dw_conv_buffer_ + ithr * dw_conv_buffer_size_;
+        auto dw_conv_buffer = scratchpad().get<data_t>(key_dw_conv_buffer);
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block * jcp.nb_oc_blocking;
+        auto pbuf = dw_conv_buffer + ithr * dw_conv_buffer_size_;
 
         size_t n{0}, g{0}, ocbb{0}, oh{0};
         nd_iterator_init(start, n, MB, g, jcp.ngroups, ocbb, ocb_work,
@@ -304,138 +310,156 @@ void _jit_avx2_convolution_fwd_t<with_relu>::execute_forward_fusing() {
         }
     };
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
-
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            dw_padded_bias_[oc] = dw_bias[oc];
-        dw_bias = dw_padded_bias_;
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad().get<data_t>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, bias, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bias = padded_bias;
+
+        auto dw_padded_bias = scratchpad().get<data_t>(key_dw_conv_padded_bias);
+        utils::array_copy(dw_padded_bias, dw_bias, jcp.oc_without_padding);
+        utils::array_set(dw_padded_bias + jcp.oc_without_padding, 0.f,
+                         jcp.oc - jcp.oc_without_padding);
+        dw_bias = dw_padded_bias;
     }
 
     parallel(0, ker);
-}
 
-template void _jit_avx2_convolution_fwd_t<true>::execute_forward();
-template void _jit_avx2_convolution_fwd_t<false>::execute_forward();
-template void _jit_avx2_convolution_fwd_t<true>::execute_forward_fusing();
-template void _jit_avx2_convolution_fwd_t<false>::execute_forward_fusing();
+    if (pd()->wants_zero_pad_dst())
+        output_memory_primitive(0)->zero_pad();
+}
 
-void jit_avx2_convolution_bwd_data_t::execute_backward_data() {
+void jit_avx2_convolution_bwd_data_t::execute_backward_data() const {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
+    const int MB = pd()->MB();
 
     int icb_work = jcp.nb_ic / jcp.nb_ic_blocking;
-    const size_t work_amount = MB * jcp.ngroups * icb_work * jcp.ih;
+    int ih_block_size = jcp.ih;
+    int num_ih_blocks = utils::div_up(jcp.ih, ih_block_size);
+    size_t work_amount = MB * jcp.ngroups * icb_work * num_ih_blocks;
+    if (work_amount < (size_t)2 * mkldnn_get_max_threads()) {
+        ih_block_size = 1;
+        num_ih_blocks = utils::div_up(jcp.ih, ih_block_size);
+        work_amount *= num_ih_blocks;
+    }
 
     auto ker = [&](const int ithr, const int nthr) {
         size_t start{0}, end{0};
         balance211(work_amount, nthr, ithr, start, end);
 
-        size_t n{0}, g{0}, icbb{0}, ih{0};
-        nd_iterator_init(start, n, MB, g, jcp.ngroups, icbb, icb_work, ih, jcp.ih);
+        size_t n{0}, g{0}, icbb{0}, ihb{0};
+        nd_iterator_init(start, n, MB, g, jcp.ngroups, icbb, icb_work,
+                         ihb, num_ih_blocks);
+
         for (size_t iwork = start; iwork < end; ++iwork) {
-            for (int oc = 0; oc < jcp.nb_oc; ++oc)
+            for (int oc = 0; oc < jcp.nb_oc; oc += jcp.nb_oc_blocking)
             for (int id = 0; id < jcp.id; ++id) {
                 auto par_conv = jit_conv_call_s();
 
                 const int idp = jcp.id + 2 * jcp.f_pad;
                 const int d_t_overflow = nstl::max(0,
-                                                   jcp.kd - 1 - id - jcp.f_pad);
+                        jcp.kd - 1 - id - jcp.f_pad);
                 const int back_pad = idp - jcp.id - jcp.f_pad;
                 const int d_b_overflow = nstl::max(0,
-                                                   jcp.kd - 1 - (jcp.id - 1 - id) - back_pad);
+                        jcp.kd - 1 - (jcp.id - 1 - id) - back_pad);
                 const int od = id + jcp.f_pad - d_b_overflow;
 
-                const int simd_w = 8;
-
-                const int i_t_overflow = nstl::max(0,
-                                                   jcp.kh - 1 - (int)ih - jcp.t_pad);
-                const int b_pad = jcp.ihp - jcp.ih - jcp.t_pad;
-                const int i_b_overflow = nstl::max(0,
-                                                   jcp.kh - 1 - (jcp.ih - 1 - (int)ih) - b_pad);
-                int oh = ih + jcp.t_pad - i_b_overflow;
-
-                int stride_off_h = oh % jcp.stride_h;
-                oh /= jcp.stride_h;
-
-                par_conv.src = &diff_src[src_blk_off(diff_src_d, n,
-                                         /*jcp.ic == 3 ? 0 :*/
-                                                     g * jcp.nb_ic + jcp.nb_ic_blocking * icbb, id, ih, 0)];
-                par_conv.dst = &diff_dst[src_blk_off(diff_dst_d,
-                                                     n, g * jcp.nb_oc + oc, od, oh, 0)];
-                par_conv.filt = &weights[wht_blk_off(weights_d, g, oc,
-                                                     jcp.ic == 3 ? 0 : jcp.nb_ic_blocking * icbb,
-                                                     d_b_overflow, i_b_overflow + stride_off_h, 0)];
-
-                par_conv.src_prf = nullptr;
-                par_conv.dst_prf = nullptr;
-                par_conv.filt_prf = nullptr;
-                // TODO: move initialization into the kernel
-                if (oc == 0) {
-                    for (int iw = 0; iw < jcp.iw; iw++) {
-                        for (int b = 0; b < jcp.nb_ic_blocking; b++) {
-                            int current_ic =
-                                    (jcp.ic == 3 ? 0 : g * jcp.nb_ic)
-                                    + jcp.nb_ic_blocking * icbb + b;
-                            int current_idx =
-                                    src_blk_off(diff_src_d, n, current_ic,
-                                                id, ih, iw);
-                            for (int v = 0; v < simd_w; v++)
-                                diff_src[current_idx + v] = 0.0;
-                        }
-                    }
-                }
+                int ih_start = ihb * ih_block_size;
+                int ih_end = nstl::min(jcp.ih, ih_start + ih_block_size);
+                for (int ih = ih_start; ih < ih_end; ++ih) {
+
+                    const int i_t_overflow = nstl::max(0, (jcp.kh - 1
+                                        - ih - jcp.t_pad) / jcp.stride_h);
+                    const int i_b_overflow = nstl::max(0, (jcp.kh - jcp.ih
+                                        + ih - jcp.b_pad) / jcp.stride_h);
+                    int overflow_kh_hi = jcp.kh - 1 - abs((jcp.ih - 1
+                                + jcp.b_pad - ih) % jcp.stride_h);
+                    int overflow_kh_lo = (ih + jcp.t_pad) % jcp.stride_h;
+
+                    par_conv.kd_padding = jcp.kd - d_t_overflow - d_b_overflow;
+                    par_conv.kh_padding = (overflow_kh_hi - overflow_kh_lo)
+                              / jcp.stride_h + 1 - i_t_overflow - i_b_overflow;
+                    par_conv.kw_padding = 0;
 
-                par_conv.kd_padding = jcp.kd - d_t_overflow - d_b_overflow;
-                par_conv.kh_padding = nstl::max(0, jcp.kh - i_t_overflow - i_b_overflow - stride_off_h);
-                par_conv.kw_padding = 0;
+                    const int k_lo = overflow_kh_lo
+                                   + i_b_overflow * jcp.stride_h;
+                    const int oh = (ih + jcp.t_pad - k_lo) / jcp.stride_h;
+
+                    par_conv.src = &diff_src[src_blk_off(diff_src_d, n,
+                        /*jcp.ic == 3 ? 0 :*/
+                        g * jcp.nb_ic + jcp.nb_ic_blocking * icbb, id, ih, 0)];
+                    par_conv.dst = &diff_dst[src_blk_off(diff_dst_d,
+                            n, g * jcp.nb_oc + oc, od, oh, 0)];
+                    par_conv.filt = &weights[wht_blk_off(weights_d, g, oc,
+                                jcp.ic == 3 ? 0 : jcp.nb_ic_blocking * icbb,
+                                d_b_overflow, k_lo, 0)];
+
+                    par_conv.src_prf = nullptr;
+                    par_conv.dst_prf = nullptr;
+                    par_conv.filt_prf = nullptr;
+                    par_conv.channel = oc;
+                    par_conv.ch_blocks = nstl::min(jcp.nb_oc - oc,
+                                       jcp.nb_oc_blocking);
 
-                if (par_conv.kh_padding > 0)
                     kernel_->jit_ker(&par_conv);
+                }
             }
-            nd_iterator_step(n, MB, g, jcp.ngroups, icbb, icb_work, ih, jcp.ih);
+            nd_iterator_step(n, MB, g, jcp.ngroups, icbb, icb_work, ihb,
+                             num_ih_blocks);
         }
     };
 
     parallel(0, ker);
 }
 
-void jit_avx2_convolution_bwd_weights_t::execute_backward_weights() {
+void jit_avx2_convolution_bwd_weights_t::execute_backward_weights() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_weights = reinterpret_cast<data_t *>(this->memory(0));
     auto diff_bias_in = reinterpret_cast<data_t *>(this->memory(1));
-    data_t *diff_bias = conf_.want_padded_bias() ? padded_bias_ : diff_bias_in;
 
-    const memory_desc_wrapper src_d(conf_.src_pd(0));
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_weights_d(conf_.diff_weights_pd(0));
+    auto scratchpad = this->scratchpad();
+
+    data_t *diff_bias = pd()->wants_padded_bias()
+        ? scratchpad.get<data_t>(key_conv_padded_bias) : diff_bias_in;
+
+    const memory_desc_wrapper src_d(pd()->src_pd(0));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_weights_d(pd()->diff_weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
 
+    auto reducer_bia_scratchpad = memory_tracking::grantor_t(scratchpad,
+            prefix_reducer_bia);
+    auto rb = this->reducer_bias_;
+    rb->init(reducer_bia_scratchpad);
+
+    auto reducer_wei_scratchpad = memory_tracking::grantor_t(scratchpad,
+            prefix_reducer_wei);
+    auto rw = this->reducer_weights_;
+    rw->init(reducer_wei_scratchpad);
+
     auto ker = [&](int ithr, int nthr) {
-        auto rw = this->reducer_weights_;
-        assert(nthr == rw->balancer_.nthr_);
+        assert(nthr == rw->balancer().nthr_);
 
-        const int w_job_start = rw->balancer_.ithr_job_off(ithr);
-        const int w_njobs = rw->balancer_.ithr_njobs(ithr);
+        const int w_job_start = rw->balancer().ithr_job_off(ithr);
+        const int w_njobs = rw->balancer().ithr_njobs(ithr);
 
         if (w_njobs == 0) return;
 
         /* reduction dimension */
         int img_od_start{0}, img_od_end{0}, img{0}, od_s{0};
-        balance211(jcp.mb * jcp.od, rw->balancer_.nthr_per_group_,
-                rw->balancer_.id_in_group(ithr), img_od_start, img_od_end);
+        balance211(jcp.mb * jcp.od, rw->balancer().nthr_per_group_,
+                rw->balancer().id_in_group(ithr), img_od_start, img_od_end);
 
         int img_start = img_od_start, img_end = img_od_end;
         nd_iterator_init(img_start, img, jcp.mb, od_s, jcp.od);
@@ -461,9 +485,10 @@ void jit_avx2_convolution_bwd_weights_t::execute_backward_weights() {
 
                 /* TODO: put dw <-- 0 in kernel */
                 if (img == img_first)
-                    array_set((data_t *)&rw->get_local_ptr(ithr, diff_weights)[
-                        w_job_loc * rw->balancer_.job_size_], 0,
-                            rw->balancer_.job_size_);
+                    array_set(rw->get_local_ptr(ithr, diff_weights,
+                                reducer_wei_scratchpad) +
+                            w_job_loc * rw->balancer().job_size_, 0,
+                            rw->balancer().job_size_);
 
                 for (int od = od_s; od < od_e; ++od) {
                     const int id = od * jcp.stride_d;
@@ -473,8 +498,9 @@ void jit_avx2_convolution_bwd_weights_t::execute_backward_weights() {
                     par_conv.src = &src[src_blk_off(src_d, img, _ic, id, 0, 0)];
                     par_conv.dst =
                         &diff_dst[src_blk_off(diff_dst_d, img, _oc, od, 0, 0)];
-                    par_conv.filt = &rw->get_local_ptr(ithr, diff_weights)[
-                        w_job_loc * rw->balancer_.job_size_];
+                    par_conv.filt = rw->get_local_ptr(ithr, diff_weights,
+                            reducer_wei_scratchpad) +
+                        w_job_loc * rw->balancer().job_size_;
 
                     kernel_->jit_ker(&par_conv);
                 }
@@ -483,22 +509,21 @@ void jit_avx2_convolution_bwd_weights_t::execute_backward_weights() {
             }
             nd_iterator_jump(img_start, img_end, img, jcp.mb, od_s, jcp.od);
         }
-        rw->reduce(ithr, diff_weights);
+        rw->reduce(ithr, diff_weights, reducer_wei_scratchpad);
     };
 
     auto ker_bias = [&](int ithr, int nthr) {
-        auto rb = this->reducer_bias_;
-        assert(nthr == rb->balancer_.nthr_);
+        assert(nthr == rb->balancer().nthr_);
 
-        const int b_job_start = rb->balancer_.ithr_job_off(ithr);
-        const int b_njobs = rb->balancer_.ithr_njobs(ithr);
+        const int b_job_start = rb->balancer().ithr_job_off(ithr);
+        const int b_njobs = rb->balancer().ithr_njobs(ithr);
 
         if (b_njobs == 0) return;
 
         /* reduction dimension */
         int img_start{0}, img_end{0};
-        balance211(jcp.mb, rb->balancer_.nthr_per_group_,
-                rb->balancer_.id_in_group(ithr), img_start, img_end);
+        balance211(jcp.mb, rb->balancer().nthr_per_group_,
+                rb->balancer().id_in_group(ithr), img_start, img_end);
 
         /* jobs */
         int g_start{0}, ocb_start{0};
@@ -511,8 +536,9 @@ void jit_avx2_convolution_bwd_weights_t::execute_backward_weights() {
                 const size_t _oc = g * jcp.nb_oc + ocb;
 
                 const data_t *d_dst = &diff_dst[diff_dst_d.blk_off(img, _oc)];
-                data_t *d_bias = &rb->get_local_ptr(ithr, diff_bias)[
-                    b_job_loc * rb->balancer_.job_size_];
+                data_t *d_bias = rb->get_local_ptr(ithr, diff_bias,
+                        reducer_bia_scratchpad) +
+                    b_job_loc * rb->balancer().job_size_;
 
                 if (img == img_start)
                     for (int o = 0; o < 8; ++o)
@@ -528,18 +554,17 @@ void jit_avx2_convolution_bwd_weights_t::execute_backward_weights() {
                 nd_iterator_step(g, jcp.ngroups, ocb, jcp.nb_oc);
             }
         }
-        rb->reduce(ithr, diff_bias);
+        rb->reduce(ithr, diff_bias, reducer_bia_scratchpad);
     };
 
-
     parallel(0, [&](const int ithr, const int nthr) {
         ker(ithr, nthr);
-        if (conf_.with_bias())
+        if (pd()->with_bias())
             ker_bias(ithr, nthr);
     });
 
     /* TODO: put this in ker_bias */
-    if (conf_.want_padded_bias()) {
+    if (pd()->wants_padded_bias()) {
         assert(jcp.ngroups == 1);
         for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
             diff_bias_in[oc] = diff_bias[oc];
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_convolution.hpp
index bd151dd05..1dff656bf 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx2_convolution.hpp
@@ -18,74 +18,73 @@
 #define CPU_JIT_AVX2_CONVOLUTION_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+#include "mkldnn_thread.hpp"
+#include "utils.hpp"
+
 #include "cpu_convolution_pd.hpp"
-#include "cpu_engine.hpp"
 #include "cpu_reducer.hpp"
-#include "jit_primitive_conf.hpp"
+
 #include "jit_avx2_conv_kernel_f32.hpp"
-#include "mkldnn_thread.hpp"
 #include "jit_uni_depthwise.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu>
-struct _jit_avx2_convolution_fwd_t: public cpu_primitive_t {
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
+struct jit_avx2_convolution_fwd_t: public cpu_primitive_t {
+    struct pd_t: public cpu_convolution_fwd_pd_t {
         pd_t(engine_t *engine,
-                const typename pd_t::base_desc_t *adesc,
+                const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
-            , jcp_(), jcp_dw() {}
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+            , jcp_(), jcp_dw_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit:", avx2, ""),
-                _jit_avx2_convolution_fwd_t<with_relu>);
+                jit_avx2_convolution_fwd_t);
 
         virtual status_t init() override {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                         forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                        alg_kind::convolution_auto,
+                        alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
-                        this->cdesc_().src_desc.data_type,
-                        this->cdesc_().weights_desc.data_type,
-                        this->cdesc_().dst_desc.data_type)
+                        this->desc()->src_desc.data_type,
+                        this->desc()->weights_desc.data_type,
+                        this->desc()->dst_desc.data_type)
                 && IMPLICATION(this->with_bias(),
-                        data_type::f32 == this->cdesc_().bias_desc.data_type);
+                        data_type::f32 == this->desc()->bias_desc.data_type);
             if (!ok) return status::unimplemented;
 
-            status_t sts = jit_avx2_conv_fwd_kernel_f32::init_conf(jcp_, this->cdesc_(),
-                    *this->src_pd_.desc(), *this->weights_pd_.desc(),
-                    *this->dst_pd_.desc(), *this->attr(),
-                    with_relu, this->negative_slope());
+
+
+            status_t sts = jit_avx2_conv_fwd_kernel_f32::init_conf(jcp_,
+                    *this->desc(), *this->src_pd_.desc(),
+                    *this->weights_pd_.desc(), *this->dst_pd_.desc(),
+                    *this->attr());
             if (sts != status::success) return sts;
 
             if (jcp_.with_dw_conv) {
-                int dw_conv_oh = (jcp_.oh - ((jcp_.dw_conv_ker_h - 1) + 1) + 2) / jcp_.dw_conv_str_h + 1;
-                int dw_conv_ow = (jcp_.ow - ((jcp_.dw_conv_ker_w - 1) + 1) + 2) / jcp_.dw_conv_str_w + 1;
-
-                status_t sts_dw = jit_uni_dw_conv_row_f32<avx2>::init_conf(jcp_dw,
-                                                                      jcp_.oc, jcp_.oh, jcp_.ow, dw_conv_oh, dw_conv_ow,
-                                                                      jcp_.dw_conv_ker_h, jcp_.dw_conv_ker_w,
-                                                                      jcp_.dw_conv_str_h, jcp_.dw_conv_str_w,
-                                                                      jcp_.dw_conv_eltwise_alg, jcp_.dw_conv_eltwise_alpha,
-                                                                      jcp_.dw_conv_eltwise_beta, jcp_.dw_conv_with_sum);
+                status_t sts_dw = jit_uni_dw_conv_row_f32<avx2>::init_conf(jcp_, jcp_dw_, *this->attr());
                 if (sts_dw != status::success) return sts_dw;
             }
 
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx2_conv_fwd_kernel_f32::init_scratchpad(scratchpad, jcp_, jcp_dw_);
+
             return status::success;
         }
 
         jit_conv_conf_t jcp_;
-        jit_conv_conf_t jcp_dw;
+        jit_conv_conf_t jcp_dw_;
 
     protected:
         virtual status_t set_default_params() override {
@@ -109,62 +108,36 @@ struct _jit_avx2_convolution_fwd_t: public cpu_primitive_t {
 
             if (this->bias_pd_.desc()->format == any)
                 CHECK(this->bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
 
-    _jit_avx2_convolution_fwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_avx2_convolution_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd),
-          padded_bias_(nullptr),
-          dw_conv_buffer_size_(0), dw_conv_buffer_(nullptr), dw_padded_bias_(nullptr)
+        : cpu_primitive_t(apd, inputs, outputs)
     {
-        kernel_ = new jit_avx2_conv_fwd_kernel_f32(conf_.jcp_, *conf_.attr());
-
-        if (conf_.want_padded_bias()) {
-            const auto &j = conf_.jcp_;
-            assert(j.ngroups == 1);
-            padded_bias_ = (data_t *)malloc(sizeof(data_t) * j.oc, 64);
-            for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-                padded_bias_[oc] = 0;
-        }
+        kernel_ = new jit_avx2_conv_fwd_kernel_f32(pd()->jcp_, pd()->jcp_dw_, *pd()->attr());
 
-        if (conf_.jcp_.with_dw_conv) {
-            kernel_dw_ = new jit_uni_dw_conv_row_f32<avx2>(conf_.jcp_dw);
-        }
-
-        if (conf_.jcp_.with_dw_conv) {
-            const int nthreads = mkldnn_get_max_threads();
-            dw_conv_buffer_size_ = (size_t)conf_.jcp_dw.kh * conf_.jcp_dw.iw * conf_.jcp_dw.ch_block *
-                                      conf_.jcp_.nb_oc_blocking;
-            dw_conv_buffer_ = (float *)malloc(nthreads * dw_conv_buffer_size_ * sizeof(float), 64);
-
-            if (conf_.want_padded_bias()) {
-                const auto &j = conf_.jcp_;
-                assert(j.ngroups == 1);
-                dw_padded_bias_ = (data_t *)malloc(sizeof(data_t) * j.oc, 64);
-                for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-                    dw_padded_bias_[oc] = 0;
-            }
+        if (pd()->jcp_.with_dw_conv) {
+            kernel_dw_ = new jit_uni_dw_conv_row_f32<avx2>(pd()->jcp_dw_, *pd()->attr(), pd()->jcp_dw_.ch_block);
         }
     }
 
-    ~_jit_avx2_convolution_fwd_t() {
+    ~jit_avx2_convolution_fwd_t() {
         delete kernel_;
-        free(padded_bias_);
 
-        if (conf_.jcp_.with_dw_conv) {
+        if (pd()->jcp_.with_dw_conv) {
             delete kernel_dw_;
-            free(dw_conv_buffer_);
-            free(dw_padded_bias_);
         }
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        if (conf_.jcp_.with_dw_conv)
-            execute_forward_fusing();
+    virtual void execute(event_t *e) const {
+        if (pd()->jcp_.with_dw_conv)
+            execute_forward_with_dw_conv();
         else
             execute_forward();
 
@@ -172,23 +145,14 @@ struct _jit_avx2_convolution_fwd_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_forward();
-    void execute_forward_fusing();
+    void execute_forward() const;
+    void execute_forward_with_dw_conv() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    pd_t conf_;
     jit_avx2_conv_fwd_kernel_f32 *kernel_;
-    data_t *padded_bias_;
     jit_uni_dw_conv_row_f32<avx2> *kernel_dw_;
-
-    /* fuse with dw conv */
-    size_t dw_conv_buffer_size_;
-    data_t *dw_conv_buffer_;
-    data_t *dw_padded_bias_;
 };
 
-using jit_avx2_convolution_fwd_t = _jit_avx2_convolution_fwd_t<false>;
-using jit_avx2_convolution_relu_t = _jit_avx2_convolution_fwd_t<true>;
-
 struct jit_avx2_convolution_bwd_data_t: public cpu_primitive_t {
     struct pd_t: public cpu_convolution_bwd_data_pd_t {
         pd_t(engine_t *engine,
@@ -209,7 +173,8 @@ struct jit_avx2_convolution_bwd_data_t: public cpu_primitive_t {
             bool ok = true
                 && this->set_default_params() == status::success
                 && utils::one_of(this->desc()->prop_kind, backward_data)
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind, alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
                         this->desc()->diff_src_desc.data_type,
@@ -217,9 +182,16 @@ struct jit_avx2_convolution_bwd_data_t: public cpu_primitive_t {
                         this->desc()->diff_dst_desc.data_type);
             if (!ok) return status::unimplemented;
 
-            return jit_avx2_conv_bwd_data_kernel_f32::init_conf(jcp_,
-                    *this->desc(), *this->diff_src_pd_.desc(),
+            status_t status = jit_avx2_conv_bwd_data_kernel_f32::init_conf(
+                    jcp_, *this->desc(), *this->diff_src_pd_.desc(),
                     *this->weights_pd_.desc(), *this->diff_dst_pd_.desc());
+            if (status != status::success) return status;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx2_conv_bwd_data_kernel_f32::init_scratchpad(scratchpad,
+                    jcp_);
+
+            return status::success;
         }
 
         jit_conv_conf_t jcp_;
@@ -240,20 +212,22 @@ struct jit_avx2_convolution_bwd_data_t: public cpu_primitive_t {
                         gOIdhw8o8i)
                     : utils::pick(this->ndims() - 3, OIw8o8i, OIhw8o8i,
                         OIdhw8o8i)));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
 
-    jit_avx2_convolution_bwd_data_t(const pd_t *pd, const input_vector &inputs,
+    jit_avx2_convolution_bwd_data_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-    { kernel_ = new jit_avx2_conv_bwd_data_kernel_f32(conf_.jcp_); }
-    ~jit_avx2_convolution_bwd_data_t() { delete kernel_; };
+        : cpu_primitive_t(apd, inputs, outputs)
+    { kernel_ = new jit_avx2_conv_bwd_data_kernel_f32(pd()->jcp_); }
+    ~jit_avx2_convolution_bwd_data_t() { delete kernel_; }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_data:
             execute_backward_data();
             break;
@@ -264,8 +238,9 @@ struct jit_avx2_convolution_bwd_data_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_data();
-    pd_t conf_;
+    void execute_backward_data() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     jit_avx2_conv_bwd_data_kernel_f32 *kernel_;
 };
 
@@ -286,7 +261,8 @@ struct jit_avx2_convolution_bwd_weights_t: public cpu_primitive_t {
             bool ok = true
                 && this->set_default_params() == status::success
                 && this->desc()->prop_kind == prop_kind::backward_weights
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind, alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
                         this->desc()->src_desc.data_type,
@@ -294,13 +270,32 @@ struct jit_avx2_convolution_bwd_weights_t: public cpu_primitive_t {
                         this->desc()->diff_weights_desc.data_type);
             if (!ok) return status::unimplemented;
 
-            return jit_avx2_conv_bwd_weights_kernel_f32::init_conf(jcp_,
-                    *this->desc(), *this->src_pd_.desc(),
+            status_t status = jit_avx2_conv_bwd_weights_kernel_f32::init_conf(
+                    jcp_, *this->desc(), *this->src_pd_.desc(),
                     *this->diff_weights_pd_.desc(),
                     *this->diff_dst_pd_.desc());
+            if (status != status::success) return status;
+
+            init_balancers();
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx2_conv_bwd_weights_kernel_f32::init_scratchpad(scratchpad,
+                    jcp_);
+
+            auto reducer_bia_scratchpad = memory_tracking::registrar_t(
+                    scratchpad, memory_tracking::names::prefix_reducer_bia);
+            reducer_bia_conf_.init_scratchpad(reducer_bia_scratchpad);
+
+            auto reducer_wei_scratchpad = memory_tracking::registrar_t(
+                    scratchpad, memory_tracking::names::prefix_reducer_wei);
+            reducer_wei_conf_.init_scratchpad(reducer_wei_scratchpad);
+
+            return status::success;
         }
 
         jit_conv_conf_t jcp_;
+        cpu_reducer_t<data_type::f32>::conf_t reducer_bia_conf_;
+        cpu_reducer_t<data_type::f32>::conf_t reducer_wei_conf_;
 
     protected:
         virtual status_t set_default_params() override {
@@ -322,54 +317,61 @@ struct jit_avx2_convolution_bwd_weights_t: public cpu_primitive_t {
                         OIhw8i8o, Ohwi8o, OIdhw8i8o, Odhwi8o)));
             if (this->diff_bias_pd_.desc()->format == any)
                 CHECK(this->diff_bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
+
+    private:
+        void init_balancers() {
+            const int max_threads = mkldnn_get_max_threads();
+            const size_t max_buffer_size = 1<<21; /* just a heuristic */
+
+            if(with_bias()) {
+                reducer_bia_conf_.init(reduce_balancer_t(max_threads,
+                            jcp_.oc_block, jcp_.ngroups * jcp_.nb_oc, jcp_.mb,
+                            max_buffer_size));
+            }
+
+            reducer_wei_conf_.init(reduce_balancer_t(max_threads,
+                        jcp_.kd * jcp_.kh * jcp_.kw
+                        * jcp_.ic_block * jcp_.oc_block,
+                        jcp_.ngroups * jcp_.nb_ic * jcp_.nb_oc,
+                        jcp_.mb * jcp_.od, max_buffer_size));
+        }
     };
 
-    jit_avx2_convolution_bwd_weights_t(const pd_t *pd,
+    jit_avx2_convolution_bwd_weights_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
+        : cpu_primitive_t(apd, inputs, outputs)
         , kernel_(nullptr), reducer_weights_(nullptr), reducer_bias_(nullptr)
-        , padded_bias_(nullptr)
     {
-        kernel_ = new jit_avx2_conv_bwd_weights_kernel_f32(conf_.jcp_);
-
-        const int max_threads = mkldnn_get_max_threads();
-        const size_t max_buffer_size = 1<<21; /* just a heuristic */
-        const auto &j = conf_.jcp_;
-        reducer_weights_ = new cpu_reducer_t<data_type::f32>(reduce_balancer_t(
-                max_threads, j.kd * j.kh * j.kw * j.ic_block * j.oc_block,
-                j.ngroups * j.nb_ic * j.nb_oc, j.mb * j.od, max_buffer_size));
-        if (conf_.with_bias()) {
-            reducer_bias_ = new cpu_reducer_t<data_type::f32>(
-                    reduce_balancer_t(max_threads, j.oc_block,
-                        j.ngroups * j.nb_oc, j.mb, max_buffer_size));
-
-            if (conf_.want_padded_bias())
-                padded_bias_ = (data_t *)
-                    malloc(sizeof(data_t) * j.oc, 64);
-        }
+        kernel_ = new jit_avx2_conv_bwd_weights_kernel_f32(pd()->jcp_);
+        reducer_bias_ =
+            new cpu_reducer_t<data_type::f32>(pd()->reducer_bia_conf_);
+        reducer_weights_ =
+            new cpu_reducer_t<data_type::f32>(pd()->reducer_wei_conf_);
     }
+
     ~jit_avx2_convolution_bwd_weights_t() {
         delete kernel_;
         delete reducer_weights_;
         delete reducer_bias_;
-        free(padded_bias_);
-    };
+    }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward_weights();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward_weights();
-    pd_t conf_;
+    void execute_backward_weights() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     jit_avx2_conv_bwd_weights_kernel_f32 *kernel_;
     cpu_reducer_t<data_type::f32> *reducer_weights_, *reducer_bias_;
-    data_t *padded_bias_;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_conv_kernel.cpp
index 30f1823d6..bdfee8110 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_conv_kernel.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_conv_kernel.cpp
@@ -13,13 +13,19 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
+
+#include <assert.h>
 #include <float.h>
+
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+#include "mkldnn_thread.hpp"
 #include "nstl.hpp"
 #include "type_helpers.hpp"
-#include "mkldnn_thread.hpp"
 #include "utils.hpp"
+
 #include "cpu_memory.hpp"
+#include "cpu_barrier.hpp"
 
 #include "jit_uni_1x1_conv_utils.hpp"
 #include "jit_avx512_common_1x1_conv_kernel.hpp"
@@ -257,14 +263,23 @@ void jit_avx512_common_1x1_conv_kernel::reduce_loop(int load_loop_blk,
         int depthwise_inj_idx = 0;
         const auto &p = attr_.post_ops_;
 
-        if (p.len_ == 0 && eltwise_injectors.size() == 1) {
-            eltwise_injectors[0]->compute_vector_range(0, ur * load_loop_blk);
-        }
-
         for (int i = 0; i < p.len_; i++) {
             auto& post_op = p.entry_[i];
             if (post_op.is_eltwise()) {
-                eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0, ur * load_loop_blk);
+                if (jcp.ver == ver_4vnni) {
+                    zmm_t zmm_zero = vreg_bcast;
+                    vpxord(zmm_zero, zmm_zero, zmm_zero);
+
+                    for (int i_ur = 0; i_ur < ur; ++i_ur) {
+                        for (int i_load = 0; i_load < load_loop_blk; ++i_load) {
+                            Zmm zmm = vreg_accum(i_load, i_ur);
+                            vpcmpd(k1, zmm, zmm_zero, _cmp_lt_os);
+                            vpmulld(zmm | k1, zmm, zmm_zero);
+                        }
+                    }
+                } else {
+                    eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0, ur * load_loop_blk);
+                }
                 eltwise_inj_idx++;
             } else if (post_op.is_depthwise()) {
                 mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
@@ -502,12 +517,6 @@ void jit_avx512_common_1x1_conv_kernel::reduce_loop(int load_loop_blk,
 
 void jit_avx512_common_1x1_conv_kernel::generate()
 {
-    if (jcp.with_eltwise) {
-        eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(
-                this, jcp.eltwise_alg, jcp.eltwise_alpha, 0
-        ));
-    }
-
     const auto &p = attr_.post_ops_;
     for (int i = 0; i < p.len_; i++) {
         auto &post_op = p.entry_[i];
@@ -542,6 +551,8 @@ void jit_avx512_common_1x1_conv_kernel::generate()
     mov(EVEX_compress_addr(rsp, bcast_loop_work_offt), reg_bcast_loop_work);
     mov(reg_reduce_loop_work, ptr[param1 + GET_OFF(reduce_dim)]);
     mov(reg_reduce_pos_flag, ptr[param1 + GET_OFF(first_last_flag)]);
+    if (one_of(jcp.prop_kind, forward_training, forward_inference))
+        mov(reg_relu_ns, reinterpret_cast<size_t>(&jcp.eltwise.alpha));
     if (jcp.prop_kind == backward_weights)
         mov(reg_output_stride, ptr[param1 + GET_OFF(output_stride)]);
     mov(reg_oc_off, ptr[param1 + GET_OFF(oc_off)]);
@@ -653,30 +664,20 @@ bool jit_avx512_common_1x1_conv_kernel::post_ops_ok(
     auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
-    case 0: return true; // no post_ops
-    case 1:
-        return true // sum OR eltwise OR depthwise
-                && !jcp.with_eltwise && (is_simple(0) || is_sum(0));
-    case 2:
-        return true // sum->relu
-                && !jcp.with_eltwise && ((is_sum(0) && is_simple(1)) ||
-                                         (is_simple(0) && is_simple(1)));
-    case 3:
-        return true // sum->relu
-                && !jcp.with_eltwise && (is_sum(0) && is_simple(1) && is_simple(2));
+    case 0: return true;
+    case 1: return is_simple(0) || is_sum(0);
+    case 2: return (is_sum(0) && is_simple(1)) || (is_simple(0) && is_simple(1));
+    case 3: return is_sum(0) && is_simple(1) && is_simple(2);
     default: return false;
     }
 
     return false;
 }
 
-status_t jit_avx512_common_1x1_conv_kernel::init_conf(
-        jit_1x1_conv_conf_t &jcp, const convolution_desc_t &cd,
-        const memory_desc_wrapper &src_d, const memory_desc_wrapper &weights_d,
-        const memory_desc_wrapper &dst_d, const primitive_attr_t &attr,
-        bool with_relu, float relu_negative_slope,
-        int nthreads, bool reduce_src)
-{
+status_t jit_avx512_common_1x1_conv_kernel::init_conf(jit_1x1_conv_conf_t &jcp,
+        const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
+        const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d,
+        const primitive_attr_t &attr, int nthreads, bool reduce_src) {
     if (!mayiuse(avx512_common)) return status::unimplemented;
 
     const bool with_groups = weights_d.ndims() == src_d.ndims() + 1;
@@ -715,11 +716,9 @@ status_t jit_avx512_common_1x1_conv_kernel::init_conf(
     jcp.stride_w = cd.strides[ndims - 3];
 
     jcp.src_fmt = src_d.format();
-    jcp.with_bias = one_of(jcp.prop_kind, forward_training, forward_inference)
-        ? cd.bias_desc.format != memory_format::undef : false;
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alg = mkldnn_eltwise_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
+    jcp.with_bias = pick_by_prop_kind(jcp.prop_kind, cd.bias_desc.format,
+            memory_format::undef, cd.diff_bias_desc.format)
+        != memory_format::undef;
 
     jcp.os = jcp.oh * jcp.ow;
     jcp.is = jcp.ih * jcp.iw;
@@ -730,6 +729,12 @@ status_t jit_avx512_common_1x1_conv_kernel::init_conf(
 
     const auto &p = attr.post_ops_;
     jcp.with_sum = p.find(primitive_kind::sum) != -1;
+    const int eltwise_ind = p.find(primitive_kind::eltwise);
+    jcp.with_eltwise = eltwise_ind != -1;
+    if (jcp.with_eltwise) {
+        jcp.eltwise = p.entry_[eltwise_ind].eltwise;
+        if (dst_d.data_type() == data_type::s32) return status::unimplemented;
+    }
 
     bool args_ok = true
         && jcp.ngroups == 1
@@ -894,9 +899,7 @@ status_t jit_avx512_common_1x1_conv_kernel::init_conf(
         } else {
             bool is4ops = (jcp.ver == ver_4fma || jcp.ver == ver_4vnni);
 
-//            max_regs = is4ops ? 28 : 30;
-            // FIXME (ichuraev): it is a fix for densnet-121
-            max_regs = 28;
+            max_regs = is4ops ? 28 : 30;
             min_regs = 9;
             size_treshold = is4ops ? 28 : 14;
             ur_step = is4ops ? 4 : 1;
@@ -1062,6 +1065,48 @@ status_t jit_avx512_common_1x1_conv_kernel::init_conf(
             load_blocking = jcp.load_block;
         }
 
+        if (jcp.ver == ver_4fma && jcp.bcast_dim * jcp.mb < jcp.load_dim
+                && jcp.oh * jcp.ow > 64
+                && IMPLICATION(reduce_src, jcp.load_dim < 1024)) {
+            /* Looking for best loading dimension blocking
+            * to get the best thread and data read/write efficiency
+            * by finding the optimal 'load_chunk' value
+            * Example:
+            * for 72 threads and convolution with mb=1, ih=iw=7, oc = 512
+            * the 'best' load_chunk value should be 1
+            * TODO: remove heuristic constants in above condition
+            * TODO: check this blocking for other ISA
+            */
+            float best_eff = -1.f;
+            int best_lgc = 1;
+
+            for (int load_chunk = 1; load_chunk <= nb_load; load_chunk++) {
+                int lgc = div_up(nb_load, load_chunk);
+                if (lgc > nthreads)
+                    continue;
+                int thr_per_grp = div_up(nthreads, lgc);
+                int bcast_per_thr = div_up(jcp.mb * nb_bcast, thr_per_grp)
+                        * jcp.bcast_block;
+                int load_per_thr = load_chunk * simd_w;
+                float data_norm = (bcast_per_thr + load_per_thr) / 2.f;
+                float data_eff = (bcast_per_thr * load_per_thr)
+                        / (data_norm * data_norm);
+                float thr_eff_over_grp = (float)nstl::max(1, nthreads / lgc)
+                        / div_up(nthreads, lgc);
+                float thr_eff_in_grp = ((float)jcp.mb * nb_bcast)
+                        / rnd_up(jcp.mb * nb_bcast, thr_per_grp);
+                float thr_eff = thr_eff_over_grp * thr_eff_in_grp;
+                float load_eff = (float)nb_load / rnd_up(nb_load, lgc);
+                float overall_eff = data_eff + thr_eff + load_eff;
+                if (overall_eff > best_eff) {
+                    best_eff = overall_eff;
+                    best_lgc = lgc;
+                }
+            }
+            jcp.load_grp_count = best_lgc;
+            load_blocking
+                    = div_up(nb_load, jcp.load_grp_count) * jcp.load_block;
+        }
         bcast_blocking = div_up(jcp.mb * jcp.ngroups * nb_bcast,
                                  div_up(nthreads, jcp.load_grp_count))
                 * jcp.bcast_block;
@@ -1230,6 +1275,30 @@ status_t jit_avx512_common_1x1_conv_kernel::init_conf(
     return status::success;
 }
 
+void jit_avx512_common_1x1_conv_kernel::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad,
+        const jit_1x1_conv_conf_t &jcp) {
+    using namespace mkldnn::impl::memory_tracking::names;
+
+    if (jcp.prop_kind != backward_data && jcp.with_bias
+            && jcp.oc != jcp.oc_without_padding)
+        scratchpad.book(key_conv_padded_bias, jcp.typesize_out * jcp.oc);
+
+    if (jcp.prop_kind == backward_weights) {
+        const size_t wei_size = (size_t)jcp.ngroups * jcp.oc * jcp.ic;
+        scratchpad.book(key_conv_wei_reduction,
+                jcp.typesize_out * wei_size * (jcp.nthr_mb - 1));
+    }
+
+    if (jcp.transpose_src) {
+        const size_t tr_src_size =
+            (size_t)jcp.nthr_mb * jcp.ngroups * jcp.ic * jcp.tr_is;
+        scratchpad.book(key_conv_tr_src, jcp.typesize_out * tr_src_size);
+        scratchpad.book(key_conv_tr_src_bctx,
+                sizeof(simple_barrier::ctx_t) * jcp.nthr);
+    }
+}
+
 void jit_avx512_common_1x1_conv_kernel::balance(jit_1x1_conv_conf_t &jcp,
         int nthreads)
 {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_conv_kernel.hpp
index 31d5b62ed..af7ca95a4 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_conv_kernel.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_conv_kernel.hpp
@@ -18,6 +18,8 @@
 #define JIT_AVX512_COMMON_1x1_CONV_KERNEL_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
 #include "jit_uni_eltwise.hpp"
@@ -29,7 +31,8 @@ namespace cpu {
 
 struct jit_avx512_common_1x1_conv_kernel : public jit_generator {
     jit_avx512_common_1x1_conv_kernel(jit_1x1_conv_conf_t ajcp,
-            const primitive_attr_t &attr) : jcp(ajcp), attr_(attr)
+            const primitive_attr_t &attr)
+        : jcp(ajcp), attr_(attr)
     {
         this->generate();
         jit_ker = (void (*)(jit_1x1_conv_call_s *)) this->getCode();
@@ -51,25 +54,15 @@ struct jit_avx512_common_1x1_conv_kernel : public jit_generator {
                                 const primitive_attr_t &attr);
 
     static status_t init_conf(jit_1x1_conv_conf_t &jcp,
-                                const convolution_desc_t &cd,
-                                const memory_desc_wrapper &src_d,
-                                const memory_desc_wrapper &weights_d,
-                                const memory_desc_wrapper &dst_d,
-                                const primitive_attr_t &attr,
-                                bool with_relu, float relu_negative_slope,
-                                int nthreads, bool reduce_src);
+            const convolution_desc_t &cd,
+            const memory_desc_wrapper &src_d,
+            const memory_desc_wrapper &weights_d,
+            const memory_desc_wrapper &dst_d,
+            const primitive_attr_t &attr,
+            int nthreads, bool reduce_src);
 
-    static status_t init_conf(jit_1x1_conv_conf_t &jcp,
-                              const convolution_desc_t &cd,
-                              const memory_desc_wrapper &src_d,
-                              const memory_desc_wrapper &weights_d,
-                              const memory_desc_wrapper &dst_d,
-                              const primitive_attr_t &attr,
-                              int nthreads, bool reduce_src)
-    {
-        return init_conf(jcp, cd, src_d, weights_d, dst_d, attr, false, 0.0,
-        nthreads, reduce_src);
-    }
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_1x1_conv_conf_t &jcp);
 
     jit_1x1_conv_conf_t jcp;
     const primitive_attr_t &attr_;
@@ -78,7 +71,6 @@ struct jit_avx512_common_1x1_conv_kernel : public jit_generator {
   private:
     using reg64_t = const Xbyak::Reg64;
     using zmm_t = const Xbyak::Zmm;
-    using mask_t = const Xbyak::Opmask;
 
     reg64_t reg_bcast_data = r8;
     reg64_t reg_load_data = r10;
@@ -95,6 +87,7 @@ struct jit_avx512_common_1x1_conv_kernel : public jit_generator {
     reg64_t reg_reduce_pos_flag = rax;
     reg64_t reg_output_stride = r13;
     reg64_t reg_bias_data = r12;
+    reg64_t reg_relu_ns = r13;
     reg64_t reg_bcast_loop_work = aux1_reg_bcast_data;
 
     Xbyak::Zmm vreg_bcast = Xbyak::Zmm(31);
@@ -115,6 +108,7 @@ struct jit_avx512_common_1x1_conv_kernel : public jit_generator {
     void generate();
     static void balance(jit_1x1_conv_conf_t &jcp, int nthreads);
 };
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_convolution.cpp
index da381219f..099f1bda9 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_convolution.cpp
@@ -14,22 +14,22 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include "mkldnn_types.h"
-
 #include "c_types_map.hpp"
-#include "jit_avx512_common_1x1_convolution.hpp"
-#include "utils.hpp"
 #include "mkldnn_thread.hpp"
 #include "type_helpers.hpp"
+#include "utils.hpp"
 
 #include "jit_generator.hpp"
 
+#include "jit_avx512_common_1x1_convolution.hpp"
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 #define data_blk_off(f, n, c, h, w) \
@@ -37,74 +37,84 @@ using namespace mkldnn::impl::utils;
     ? (f).blk_off(n, c, w) \
     : (f).blk_off(n, c, h, w))
 
+
 namespace {
 template <typename T, typename U>
 void balance2D(U nthr, U ithr, T ny, T &ny_start, T &ny_end,
     T nx, T &nx_start, T &nx_end, T nx_divider)
 {
-    const T grp_size = utils::div_up(nthr, nx_divider);
-    const T grp_count = utils::div_up(nthr, grp_size);
-
-    T grp = ithr / grp_size;
-    T grp_ithr = ithr % grp_size;
-    T grp_nthr = grp_size;
-    T first_grps = nthr % grp_count;
-    if (first_grps > 0 && grp >= first_grps) {
-        ithr -= first_grps * grp_size;
-        grp_nthr--;
-        grp = ithr / grp_nthr + first_grps;
-        grp_ithr = ithr % grp_nthr;
+    const int grp_count = nstl::min(nx_divider, nthr);
+    const int grp_size_big = nthr / grp_count + 1;
+    const int grp_size_small = nthr / grp_count;
+    const int n_grp_big = nthr % grp_count;
+    const int threads_in_big_groups = n_grp_big * grp_size_big;
+
+    const int ithr_bound_distance = ithr - threads_in_big_groups;
+    T grp, grp_ithr, grp_nthr;
+    if (ithr_bound_distance < 0) { // ithr in first groups
+        grp = ithr / grp_size_big;
+        grp_ithr = ithr % grp_size_big;
+        grp_nthr = grp_size_big;
+    } else { // ithr in last groups
+        grp = n_grp_big + ithr_bound_distance / grp_size_small;
+        grp_ithr = ithr_bound_distance % grp_size_small;
+        grp_nthr = grp_size_small;
     }
+
     balance211(nx, grp_count, grp, nx_start, nx_end);
     balance211(ny, grp_nthr, grp_ithr, ny_start, ny_end);
 }
 }
 /* convolution forward */
 
-template <bool with_relu, data_type_t src_type, data_type_t wei_type,
-        data_type_t dst_type>
-void _jit_avx512_common_1x1_convolution_fwd_t
-    <with_relu, src_type, wei_type, dst_type>::execute_forward()
-{
+template <data_type_t src_type, data_type_t wei_type, data_type_t dst_type>
+void jit_avx512_common_1x1_convolution_fwd_t<src_type, wei_type, dst_type>::
+execute_forward() const {
     auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto weights =
         reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const dst_data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(this->memory());
 
+    auto scratchpad = this->scratchpad();
+
     auto &jcp = kernel_->jcp;
-    if (conf_.want_padded_bias()) {
-        assert(jcp.ngroups == 1);
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad.template get<dst_data_t>(
+                key_conv_padded_bias);
+        utils::array_copy(padded_bias, bias, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bias = padded_bias;
     }
 
     parallel(0, [&](const int ithr, const int nthr) {
-        execute_forward_thr(ithr, nthr, src, weights, bias, dst);
+        execute_forward_thr(ithr, nthr, src, weights, bias, dst, scratchpad);
     });
+
+    if (pd()->wants_zero_pad_dst())
+        output_memory_primitive(0)->zero_pad();
 }
 
-template <bool with_relu, data_type_t src_type, data_type_t wei_type,
-        data_type_t dst_type>
-void _jit_avx512_common_1x1_convolution_fwd_t
-    <with_relu, src_type, wei_type, dst_type>::execute_forward_thr(
-            const int ithr, const int nthr,
-            const src_data_t *src, const wei_data_t *weights,
-            const dst_data_t *bias, dst_data_t *dst)
-{
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+template <data_type_t src_type, data_type_t wei_type, data_type_t dst_type>
+void jit_avx512_common_1x1_convolution_fwd_t<src_type, wei_type, dst_type>::
+execute_forward_thr(const int ithr, const int nthr, const src_data_t *src,
+        const wei_data_t *weights, const dst_data_t *bias, dst_data_t *dst,
+        const memory_tracking::grantor_t &scratchpad) const {
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+    auto rtus_space = scratchpad.get<src_data_t>(key_conv_rtus_space);
 
     const int ndims = src_d.ndims();
-    const int stride_h = (ndims == 3) ? 1 : conf_.cdesc()->strides[0];
-    const int stride_w = conf_.cdesc()->strides[ndims - 3];
-    const int pad_t = (ndims == 3) ? 0 : conf_.cdesc()->padding[0][0];
-    const int pad_l = conf_.cdesc()->padding[0][ndims - 3];
+    const int stride_h = (ndims == 3) ? 1 : pd()->desc()->strides[0];
+    const int stride_w = pd()->desc()->strides[ndims - 3];
+    const int pad_t = (ndims == 3) ? 0 : pd()->desc()->padding[0][0];
+    const int pad_l = pd()->desc()->padding[0][ndims - 3];
 
-    auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
+    const auto &jcp = kernel_->jcp;
+    const int MB = pd()->MB();
     const int work_amount = MB * jcp.ngroups * jcp.nb_bcast;
 
     auto step = [](int default_step, int remaining, int tail_step) {
@@ -179,13 +189,13 @@ void _jit_avx512_common_1x1_convolution_fwd_t
 
         p.output_data = &dst[dst_off];
         p.bias_data = &bias[_ocb * jcp.oc_block];
-        p.load_data = &weights[conf_.with_groups()
+        p.load_data = &weights[pd()->with_groups()
             ? weights_d.blk_off(g, ocb, icb)
             : weights_d.blk_off(ocb, icb)];
 
         const int _icb = g * nb_ic + icb;
-        if (conf_.rtus_.reduce_src_) {
-            rp.ws = scratch_ + ithr * ws_per_thread_
+        if (pd()->rtus_.reduce_src_) {
+            rp.ws = rtus_space + ithr * pd()->rtus_.space_per_thread_
                 + _icb * jcp.is * jcp.ic_block;
             if (ocb == ocb_start) {
                 rp.src = src + data_blk_off(src_d, n, _icb, ih, iw);
@@ -274,40 +284,39 @@ void _jit_avx512_common_1x1_convolution_fwd_t
 }
 
 
-template struct _jit_avx512_common_1x1_convolution_fwd_t<true, data_type::f32>;
-template struct _jit_avx512_common_1x1_convolution_fwd_t<false, data_type::f32>;
-template struct _jit_avx512_common_1x1_convolution_fwd_t<false, data_type::s16,
-    data_type::s16, data_type::s32>;
-template struct _jit_avx512_common_1x1_convolution_fwd_t<true, data_type::s16,
+template struct jit_avx512_common_1x1_convolution_fwd_t<data_type::f32>;
+template struct jit_avx512_common_1x1_convolution_fwd_t<data_type::s16,
     data_type::s16, data_type::s32>;
 /* convolution backward wtr data */
 
 template <data_type_t diff_dst_type, data_type_t wei_type,
-    data_type_t diff_src_type>
-void _jit_avx512_common_1x1_convolution_bwd_data_t
-    <diff_dst_type, wei_type, diff_src_type>::execute_backward_data()
-{
+         data_type_t diff_src_type>
+void jit_avx512_common_1x1_convolution_bwd_data_t<diff_dst_type, wei_type,
+     diff_src_type>::execute_backward_data() const {
     auto diff_dst = reinterpret_cast<const diff_dst_data_t *>
         (this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>
         (this->input_memory(1));
     auto diff_src = reinterpret_cast<diff_src_data_t *>(this->memory());
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+
+    auto rtus_space = scratchpad().template get<diff_src_data_t>(
+            key_conv_rtus_space);
 
     const int ndims = diff_src_d.ndims();
     const auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
+    const int MB = pd()->MB();
 
     // TODO (Roma): remove this restriction
     assert(jcp.stride_w == 1 && jcp.stride_h == 1);
 
-    const int stride_h = (ndims == 3) ? 1 : conf_.desc()->strides[0];
-    const int stride_w = conf_.desc()->strides[ndims - 3];
-    const int pad_t = (ndims == 3) ? 0 : conf_.desc()->padding[0][0];
-    const int pad_l = conf_.desc()->padding[0][ndims - 3];
+    const int stride_h = (ndims == 3) ? 1 : pd()->desc()->strides[0];
+    const int stride_w = pd()->desc()->strides[ndims - 3];
+    const int pad_t = (ndims == 3) ? 0 : pd()->desc()->padding[0][0];
+    const int pad_l = pd()->desc()->padding[0][ndims - 3];
 
     const int nb_ic = jcp.nb_load;
     const int nb_oc = jcp.nb_reduce;
@@ -376,8 +385,9 @@ void _jit_avx512_common_1x1_convolution_bwd_data_t
 
                     const int _icb = g * nb_ic + icb;
                     rp.src = diff_src + data_blk_off(diff_src_d, n, _icb, ih, iw);
-                    if (conf_.rtus_.reduce_src_) {
-                        rp.ws = scratch_ + ithr * ws_per_thread_;
+                    if (pd()->rtus_.reduce_src_) {
+                        rp.ws = rtus_space
+                            + ithr * pd()->rtus_.space_per_thread_;
                         p.output_data = rp.ws;
                     } else
                         p.output_data = rp.src;
@@ -395,7 +405,7 @@ void _jit_avx512_common_1x1_convolution_bwd_data_t
                         size_t diff_dst_off = data_blk_off(diff_dst_d, n, _ocb, oh, ow);
                         p.bcast_data = &diff_dst[diff_dst_off];
 
-                        p.load_data = &weights[conf_.with_groups()
+                        p.load_data = &weights[pd()->with_groups()
                             ? weights_d.blk_off(g, ocb, icb)
                             : weights_d.blk_off(ocb, icb)];
 
@@ -406,7 +416,7 @@ void _jit_avx512_common_1x1_convolution_bwd_data_t
 
                         kernel_->jit_ker(&p);
                     }
-                    if (conf_.rtus_.reduce_src_)
+                    if (pd()->rtus_.reduce_src_)
                         rtus_driver_->ker_(&rp);
                 }
             }
@@ -414,87 +424,81 @@ void _jit_avx512_common_1x1_convolution_bwd_data_t
     });
 }
 
-template struct _jit_avx512_common_1x1_convolution_bwd_data_t<data_type::f32>;
-template struct _jit_avx512_common_1x1_convolution_bwd_data_t<data_type::s16,
+template struct jit_avx512_common_1x1_convolution_bwd_data_t<data_type::f32>;
+template struct jit_avx512_common_1x1_convolution_bwd_data_t<data_type::s16,
     data_type::s16, data_type::s32>;
 
 /* convolution backward wtr weights */
 
 #define wht_blk_off(d, g, ...) \
-        (conf_.with_groups() \
+        (pd()->with_groups() \
          ? (d).blk_off((g), __VA_ARGS__) \
          : (d).blk_off(__VA_ARGS__))
 
 jit_avx512_common_1x1_convolution_bwd_weights_t ::
-        jit_avx512_common_1x1_convolution_bwd_weights_t(const pd_t *pd,
+        jit_avx512_common_1x1_convolution_bwd_weights_t(const pd_t *apd,
                 const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs)
-    , conf_(*pd), kernel_(nullptr), acc_ker_(nullptr), reducer_bias_(nullptr)
-    , trans_kernel_(nullptr), rtus_driver_(nullptr), ws_per_thread_(0)
-    , scratch_(nullptr), padded_bias_(nullptr), bctx_(nullptr)
-    , tr_src_(nullptr), ws_reduction_(nullptr)
+    : cpu_primitive_t(apd, inputs, outputs)
+    , kernel_(nullptr), acc_ker_(nullptr), reducer_bias_(nullptr)
+    , trans_kernel_(nullptr), rtus_driver_(nullptr)
 {
-    kernel_ = new jit_avx512_common_1x1_conv_kernel(conf_.jcp_, *conf_.attr());
-
-    const auto &jcp = kernel_->jcp;
-
-    const int wei_size = jcp.ngroups * jcp.oc * jcp.ic;
-    ws_reduction_ =
-        (data_t *)malloc((jcp.nthr_mb - 1) * wei_size * sizeof(data_t), 64);
+    kernel_ = new jit_avx512_common_1x1_conv_kernel(pd()->jcp_, *pd()->attr());
     acc_ker_ = new cpu_accumulator_1d_t<data_type::f32>();
+    reducer_bias_ = new cpu_reducer_t<data_type::f32>(pd()->reducer_bia_conf_);
+    init_rtus_driver<avx512_common>(this);
 
-    if (conf_.with_bias()) {
-        const size_t max_buffer_size = jcp.nthr * 3 * 5 * 5 * 16 * 16;
-        reducer_bias_ = new cpu_reducer_t<data_type::f32>(
-                reduce_balancer_t(jcp.nthr, jcp.oc_block,
-                        jcp.ngroups * jcp.nb_load, jcp.mb, max_buffer_size));
-
-        if (conf_.want_padded_bias()) {
-            assert(jcp.ngroups == 1);
-            padded_bias_ = (data_t *)malloc(sizeof(data_t) * jcp.oc, 64);
-        }
-    }
+    const auto &jcp = kernel_->jcp;
 
     if (jcp.transpose_src) {
-        const ptrdiff_t tr_src_size = (ptrdiff_t)jcp.nthr_mb
-            * (ptrdiff_t)jcp.ngroups * (ptrdiff_t)jcp.ic * jcp.tr_is;
-        tr_src_ = (data_t *)malloc(tr_src_size * sizeof(data_t), 64);
-        parallel_nd(tr_src_size, [&](ptrdiff_t i) { tr_src_[i] = 0; });
         auto tp = jit_transpose4x16_src_t();
         tp.src_pf0_distance = 4;
         tp.tr_src_pf0_distance = 0;
         tp.src_pf1 = true;
         tp.tr_src_pf1 = false;
         trans_kernel_ = new jit_transpose4x16_src(&jcp, &tp);
-
-        bctx_ = (simple_barrier::ctx_t *)malloc(
-                jcp.nthr * sizeof(simple_barrier::ctx_t), 64);
-        for (int i = 0; i < jcp.nthr; ++i)
-            simple_barrier::ctx_init(&bctx_[i]);
     }
-
-    init_rtus_driver<avx512_common>(this);
 }
 
-void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
+void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights() const
 {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_weights = reinterpret_cast<data_t *>(this->memory(0));
     auto diff_bias_in = reinterpret_cast<data_t *>(this->memory(1));
-    data_t *diff_bias = conf_.want_padded_bias() ? padded_bias_ : diff_bias_in;
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper diff_weights_d(conf_.diff_weights_pd(0));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper diff_weights_d(pd()->diff_weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
+
+    const auto scratchpad = this->scratchpad();
+
+    auto rtus_space = scratchpad.get<data_t>(key_conv_rtus_space);
+    data_t *diff_bias = pd()->wants_padded_bias()
+        ? scratchpad.get<data_t>(key_conv_padded_bias) : diff_bias_in;
+    auto wei_reduction = scratchpad.get<data_t>(key_conv_wei_reduction);
+
+    /* prepare src transposition barriers */
+    auto tr_src = scratchpad.get<data_t>(key_conv_tr_src);
+    auto tr_src_bctx = scratchpad.get<simple_barrier::ctx_t>(
+            key_conv_tr_src_bctx);
+    if (jcp.transpose_src) {
+        for (int i = 0; i < jcp.nthr; ++i)
+            simple_barrier::ctx_init(&tr_src_bctx[i]);
+    }
+
     const int ndims = src_d.ndims();
     const int wei_size = jcp.ngroups * jcp.oc * jcp.ic;
 
     simple_barrier::ctx_t reduction_barrier;
     simple_barrier::ctx_init(&reduction_barrier);
 
+    const auto reducer_bia_scratchpad = memory_tracking::grantor_t(scratchpad,
+            prefix_reducer_bia);
+    auto rb = this->reducer_bias_;
+    rb->init(reducer_bia_scratchpad);
+
     // TODO (Roma): remove this restriction
     assert(jcp.stride_w == 1 && jcp.stride_h == 1);
 
@@ -507,10 +511,10 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
     const int sp_nb = jcp.nb_reduce;
     const int mb_sp_work = jcp.mb * sp_nb;
 
-    const int stride_h = (ndims == 3) ? 1 : conf_.desc()->strides[0];
-    const int stride_w = conf_.desc()->strides[ndims - 3];
-    const int pad_t = (ndims == 3) ? 0 : conf_.desc()->padding[0][0];
-    const int pad_l = conf_.desc()->padding[0][ndims - 3];
+    const int stride_h = (ndims == 3) ? 1 : pd()->desc()->strides[0];
+    const int stride_w = pd()->desc()->strides[ndims - 3];
+    const int pad_t = (ndims == 3) ? 0 : pd()->desc()->padding[0][0];
+    const int pad_l = pd()->desc()->padding[0][ndims - 3];
 
     auto step = [](int default_step, int remaining, int tail_step) {
         assert(default_step <= tail_step);
@@ -548,7 +552,7 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
 
         const int src1_off = data_blk_off(src_d, img, _ic, ih, iw);
         data_t *src1 = (data_t *)&src[src1_off];
-        data_t *tr_src1 = &tr_src_[tr_src_off(ithr_mb, ic_b_tr, is)];
+        data_t *tr_src1 = &tr_src[tr_src_off(ithr_mb, ic_b_tr, is)];
 
         assert(jcp.ic_block == 16);
         const int src_stride = jcp.is * jcp.ic_block;
@@ -611,9 +615,8 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
         const int oc_b_work = oc_b_end - oc_b_start;
         const int ic_b_work = ic_b_end - ic_b_start;
 
-        data_t *diff_wei = ithr_mb == 0 ?
-                diff_weights :
-                ws_reduction_ + (ithr_mb - 1) * wei_size;
+        data_t *diff_wei = ithr_mb == 0
+            ? diff_weights : wei_reduction + (ithr_mb - 1) * wei_size;
 
         int sp_b_step = 0;
         for (int mb_sp_b = mb_sp_b_start; mb_sp_b < mb_sp_b_end;
@@ -634,7 +637,7 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
                     if (jcp.transpose_src) {
                         if (jcp.nthr_oc_b > 1)
                             simple_barrier::barrier(
-                                    &bctx_[ithr_but_oc], jcp.nthr_oc_b);
+                                    &tr_src_bctx[ithr_but_oc], jcp.nthr_oc_b);
                         const int sp_size
                                 = nstl::min(sp_b_step * jcp.reduce_block,
                                         jcp.is - sp_b * jcp.reduce_block);
@@ -642,7 +645,7 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
                             bcast_step, ithr_oc_b, jcp.nthr_oc_b, ic_b_start);
                         if (jcp.nthr_oc_b > 1)
                             simple_barrier::barrier(
-                                    &bctx_[ithr_but_oc], jcp.nthr_oc_b);
+                                    &tr_src_bctx[ithr_but_oc], jcp.nthr_oc_b);
                     }
 
                     for (int oc_b = oc_b_start; oc_b < oc_b_end;
@@ -660,7 +663,7 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
                         store_to = diff_wei + off;
 
                         const data_t *diff_src = jcp.transpose_src ?
-                                &tr_src_[tr_src_off(ithr_mb, _ic_b_tr, 0)] :
+                                &tr_src[tr_src_off(ithr_mb, _ic_b_tr, 0)] :
                                 &src[src_d.blk_off(img, _ic_b)];
 
                         int sp_b_end = sp_b + sp_b_step;
@@ -690,7 +693,7 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
                         int sp = sp_b * jcp.reduce_block;
                         p.load_data = pdiff_dst + sp * jcp.oc_block;
 
-                        if (conf_.rtus_.reduce_src_) {
+                        if (pd()->rtus_.reduce_src_) {
                             const int oh = sp / jcp.ow;
                             const int ow = sp % jcp.ow;
 
@@ -698,8 +701,9 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
                             const int iw = nstl::max(ow * stride_w - pad_l, 0);
                             rp.iw_start = iw;
 
-                            rp.ws = scratch_ + ithr * ws_per_thread_
-                                    + sp * jcp.ic_block;
+                            rp.ws = rtus_space
+                                + ithr * pd()->rtus_.space_per_thread_
+                                + sp * jcp.ic_block;
 
                             if (ndims == 3)
                                 rp.src = local_src + iw
@@ -720,7 +724,7 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
             }
         }
 
-        /* diff_weights[:] += sum(ws_reduction_[thr_mb][:]) */
+        /* diff_weights[:] += sum(wei_reduction[thr_mb][:]) */
         if (jcp.nthr_mb > 1) {
             simple_barrier::barrier(&reduction_barrier, jcp.nthr);
             const int work = g_work * oc_b_work * ic_b_work;
@@ -747,7 +751,7 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
                     const size_t off
                             = wht_blk_off(diff_weights_d, g, oc_b, ic_b);
                     data_t *d = diff_weights + off;
-                    data_t *s = ws_reduction_ + (thr_mb - 1) * wei_size + off;
+                    data_t *s = wei_reduction + (thr_mb - 1) * wei_size + off;
 
                     acc_ker_->accumulate(d, s, acc_size);
 
@@ -760,11 +764,10 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
     };
 
     auto ker_bias = [&](int ithr, int nthr) {
-        auto rb = this->reducer_bias_;
-        assert(nthr == rb->balancer_.nthr_);
+        assert(nthr == rb->balancer().nthr_);
 
-        const int b_job_start = rb->balancer_.ithr_job_off(ithr);
-        const int b_njobs = rb->balancer_.ithr_njobs(ithr);
+        const int b_job_start = rb->balancer().ithr_job_off(ithr);
+        const int b_njobs = rb->balancer().ithr_njobs(ithr);
 
         if (b_njobs == 0)
             return;
@@ -772,8 +775,8 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
         /* reduction dimension */
         int img_start{ 0 }, img_end{ 0 };
 
-        balance211(jcp.mb, rb->balancer_.nthr_per_group_,
-                rb->balancer_.id_in_group(ithr), img_start, img_end);
+        balance211(jcp.mb, rb->balancer().nthr_per_group_,
+                rb->balancer().id_in_group(ithr), img_start, img_end);
 
         /* jobs */
         int g_start{ 0 }, ocb_start{ 0 };
@@ -786,8 +789,9 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
                 const size_t _oc = g * jcp.nb_load + ocb;
 
                 const data_t *d_dst = &diff_dst[diff_dst_d.blk_off(img, _oc)];
-                data_t *d_bias = &rb->get_local_ptr(
-                        ithr, diff_bias)[b_job_loc * rb->balancer_.job_size_];
+                data_t *d_bias = rb->get_local_ptr(ithr, diff_bias,
+                        reducer_bia_scratchpad)
+                    + b_job_loc * rb->balancer().job_size_;
 
                 if (img == img_start)
                     for (int o = 0; o < 16; ++o)
@@ -803,20 +807,19 @@ void jit_avx512_common_1x1_convolution_bwd_weights_t::execute_backward_weights()
                 nd_iterator_step(g, jcp.ngroups, ocb, jcp.nb_load);
             }
         }
-        rb->reduce(ithr, diff_bias);
+        rb->reduce(ithr, diff_bias, reducer_bia_scratchpad);
     };
 
     parallel(jcp.nthr, [&](const int ithr, const int nthr) {
         ker(ithr, jcp.nthr);
-        if (conf_.with_bias())
+        if (pd()->with_bias())
             ker_bias(ithr, jcp.nthr);
     });
 
     /* TODO: put this in ker_bias */
-    if (conf_.want_padded_bias()) {
+    if (pd()->wants_padded_bias()) {
         assert(jcp.ngroups == 1);
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            diff_bias_in[oc] = diff_bias[oc];
+        utils::array_copy(diff_bias_in, diff_bias, jcp.oc_without_padding);
     }
 }
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_convolution.hpp
index 787869774..67e8dabbe 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_1x1_convolution.hpp
@@ -18,37 +18,38 @@
 #define CPU_JIT_AVX512_COMMON_1x1_CONVOLUTION_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+#include "mkldnn_thread.hpp"
+#include "utils.hpp"
+
 #include "cpu_convolution_pd.hpp"
 #include "cpu_engine.hpp"
 #include "cpu_reducer.hpp"
+
 #include "jit_avx512_common_1x1_conv_kernel.hpp"
 #include "jit_uni_1x1_conv_utils.hpp"
 #include "jit_transpose_src_utils.hpp"
-#include "mkldnn_thread.hpp"
-#include "utils.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu, impl::data_type_t src_type,
+template <impl::data_type_t src_type,
          impl::data_type_t wei_type = src_type,
          impl::data_type_t dst_type = src_type>
-struct _jit_avx512_common_1x1_convolution_fwd_t : public cpu_primitive_t {
+struct jit_avx512_common_1x1_convolution_fwd_t : public cpu_primitive_t {
     // TODO: (Roma) Code duplication duplication! Remove with templates
     //              (maybe...)!
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine,
-                const typename pd_t::base_desc_t *adesc,
+    struct pd_t: public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
             , jcp_(), rtus_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_1x1:", avx512_common, ""),
-                _jit_avx512_common_1x1_convolution_fwd_t);
+                jit_avx512_common_1x1_convolution_fwd_t);
 
         virtual status_t init() override {
             using namespace prop_kind;
@@ -56,37 +57,42 @@ struct _jit_avx512_common_1x1_convolution_fwd_t : public cpu_primitive_t {
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                         forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
-                && this->cdesc_().src_desc.data_type == src_type
-                && this->cdesc_().weights_desc.data_type == wei_type
-                && this->cdesc_().dst_desc.data_type == dst_type
+                && this->desc()->src_desc.data_type == src_type
+                && this->desc()->weights_desc.data_type == wei_type
+                && this->desc()->dst_desc.data_type == dst_type
                 && IMPLICATION(this->with_bias(),
-                    dst_type == this->cdesc_().bias_desc.data_type)
-                && IMPLICATION(with_relu && dst_type == data_type::s32
-                    && everyone_is(data_type::s16, src_type, wei_type),
-                    this->negative_slope() == 0.);
+                    dst_type == this->desc()->bias_desc.data_type);
             if (!ok) return status::unimplemented;
 
-            const convolution_desc_t *conv_d = &this->cdesc_();
+            const convolution_desc_t *conv_d = this->desc();
             const memory_desc_t *src_d = this->src_pd_.desc();
             rtus_prepare(this, conv_d, src_d, this->dst_pd_.desc());
-            return jit_avx512_common_1x1_conv_kernel::init_conf(jcp_,
-                    *conv_d, *src_d, *this->weights_pd_.desc(),
+
+            status_t status = jit_avx512_common_1x1_conv_kernel::init_conf(
+                    jcp_, *conv_d, *src_d, *this->weights_pd_.desc(),
                     *this->dst_pd_.desc(), *this->attr(),
-                    with_relu, this->negative_slope(),
                     mkldnn_get_max_threads(), rtus_.reduce_src_);
+            if (status != status::success) return status;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx512_common_1x1_conv_kernel::init_scratchpad(scratchpad,
+                    jcp_);
+
+            rtus_prepare_space_info(this, scratchpad);
+
+            return status::success;
         }
 
         jit_1x1_conv_conf_t jcp_;
-        struct reduce_to_unit_stride_t {
-            convolution_desc_t conv_d_;
-            bool reduce_src_;
-        } rtus_;
+        reduce_to_unit_stride_t rtus_;
 
-      protected:
+    protected:
         virtual status_t set_default_params() override {
             using namespace memory_format;
             if (this->src_pd_.desc()->format == any)
@@ -110,78 +116,61 @@ struct _jit_avx512_common_1x1_convolution_fwd_t : public cpu_primitive_t {
             }
             if (this->bias_pd_.desc()->format == any)
                 CHECK(this->bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
 
     template <cpu_isa_t isa, typename conv_t>
     friend void init_rtus_driver(conv_t *self);
-    _jit_avx512_common_1x1_convolution_fwd_t(const pd_t *pd,
-                                          const input_vector &inputs,
-                                          const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , kernel_(nullptr), rtus_driver_(nullptr), ws_per_thread_(0)
-        , scratch_(nullptr), padded_bias_(nullptr)
-    {
-        kernel_ = new jit_avx512_common_1x1_conv_kernel(conf_.jcp_,
-                    *conf_.attr());
 
+    jit_avx512_common_1x1_convolution_fwd_t(const pd_t *apd,
+            const input_vector &inputs, const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs)
+        , kernel_(nullptr), rtus_driver_(nullptr)
+    {
+        kernel_ =
+            new jit_avx512_common_1x1_conv_kernel(pd()->jcp_, *pd()->attr());
         init_rtus_driver<avx512_common>(this);
-
-        if (conf_.want_padded_bias()) {
-            const auto &j = conf_.jcp_;
-            assert(j.ngroups == 1);
-            padded_bias_ = (dst_data_t *)malloc(sizeof(dst_data_t) * j.oc, 64);
-            for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-                padded_bias_[oc] = 0;
-        }
     }
 
-    ~_jit_avx512_common_1x1_convolution_fwd_t() {
+    ~jit_avx512_common_1x1_convolution_fwd_t() {
         delete kernel_;
         delete rtus_driver_;
-        free(scratch_);
-        free(padded_bias_);
     }
 
     typedef typename prec_traits<src_type>::type src_data_t;
     typedef typename prec_traits<wei_type>::type wei_data_t;
     typedef typename prec_traits<dst_type>::type dst_data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
   private:
-    void execute_forward();
+    void execute_forward() const;
     void execute_forward_thr(const int ithr, const int nthr,
             const src_data_t *src, const wei_data_t *weights,
-            const dst_data_t *bias, dst_data_t *dst);
-    pd_t conf_;
+            const dst_data_t *bias, dst_data_t *dst,
+            const memory_tracking::grantor_t &scratchpad) const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     jit_avx512_common_1x1_conv_kernel *kernel_;
-    /* reduction to unit stride */
     rtus_driver_t<avx512_common> *rtus_driver_;
-    size_t ws_per_thread_;
-    src_data_t *scratch_;
-    dst_data_t *padded_bias_;
 };
 
 using jit_avx512_common_1x1_convolution_fwd_f32_t
-        = _jit_avx512_common_1x1_convolution_fwd_t<false, data_type::f32>;
-using jit_avx512_common_1x1_convolution_relu_f32_t
-        = _jit_avx512_common_1x1_convolution_fwd_t<true, data_type::f32>;
+        = jit_avx512_common_1x1_convolution_fwd_t<data_type::f32>;
 using jit_avx512_common_1x1_convolution_fwd_s16s16s32_t
-        = _jit_avx512_common_1x1_convolution_fwd_t<false, data_type::s16,
-            data_type::s16, data_type::s32>;
-using jit_avx512_common_1x1_convolution_relu_s16s16s32_t
-        = _jit_avx512_common_1x1_convolution_fwd_t<true, data_type::s16,
+        = jit_avx512_common_1x1_convolution_fwd_t<data_type::s16,
             data_type::s16, data_type::s32>;
 
 template <impl::data_type_t diff_dst_type,
           impl::data_type_t wei_type = diff_dst_type,
           impl::data_type_t diff_src_type = diff_dst_type>
-struct _jit_avx512_common_1x1_convolution_bwd_data_t : public cpu_primitive_t {
+struct jit_avx512_common_1x1_convolution_bwd_data_t : public cpu_primitive_t {
     struct pd_t : public cpu_convolution_bwd_data_pd_t {
         pd_t(engine_t *engine,
                 const convolution_desc_t *adesc,
@@ -192,7 +181,7 @@ struct _jit_avx512_common_1x1_convolution_bwd_data_t : public cpu_primitive_t {
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_1x1:", avx512_common, ""),
-                _jit_avx512_common_1x1_convolution_bwd_data_t);
+                jit_avx512_common_1x1_convolution_bwd_data_t);
 
         virtual status_t init() override {
             using namespace prop_kind;
@@ -200,7 +189,8 @@ struct _jit_avx512_common_1x1_convolution_bwd_data_t : public cpu_primitive_t {
             bool ok = true
                 && this->set_default_params() == status::success
                 && this->desc()->prop_kind == backward_data
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind, alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && this->desc()->diff_dst_desc.data_type == diff_dst_type
                 && this->desc()->weights_desc.data_type == wei_type
@@ -210,18 +200,25 @@ struct _jit_avx512_common_1x1_convolution_bwd_data_t : public cpu_primitive_t {
             const convolution_desc_t *conv_d = this->desc();
             const memory_desc_t *diff_src_d = this->diff_src_pd_.desc();
             rtus_prepare(this, conv_d, diff_src_d, this->diff_dst_pd_.desc());
-            return jit_avx512_common_1x1_conv_kernel::init_conf(jcp_,
-                            *conv_d, *diff_src_d, *this->weights_pd_.desc(),
-                            *this->diff_dst_pd_.desc(), *this->attr(),
-                            mkldnn_get_max_threads(), rtus_.reduce_src_);
+
+            status_t status = jit_avx512_common_1x1_conv_kernel::init_conf(
+                    jcp_, *conv_d, *diff_src_d, *this->weights_pd_.desc(),
+                    *this->diff_dst_pd_.desc(), *this->attr(),
+                    mkldnn_get_max_threads(), rtus_.reduce_src_);
+            if (status != status::success) return status;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx512_common_1x1_conv_kernel::init_scratchpad(scratchpad,
+                    jcp_);
+
+            rtus_prepare_space_info(this, scratchpad);
+
+            return status::success;
         }
 
         // TODO (Roma): structs conf header cleanup
         jit_1x1_conv_conf_t jcp_;
-        struct reduce_to_unit_stride_t {
-            convolution_desc_t conv_d_;
-            bool reduce_src_;
-        } rtus_;
+        reduce_to_unit_stride_t rtus_;
 
     protected:
         virtual status_t set_default_params() override {
@@ -248,6 +245,8 @@ struct _jit_avx512_common_1x1_convolution_bwd_data_t : public cpu_primitive_t {
                             ? pick(this->ndims() - 3, gOIw8o16i2o, gOIhw8o16i2o)
                             : pick(this->ndims() - 3, OIw8o16i2o, OIhw8o16i2o)));
             }
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
 
             return status::success;
         }
@@ -255,30 +254,28 @@ struct _jit_avx512_common_1x1_convolution_bwd_data_t : public cpu_primitive_t {
 
     template <cpu_isa_t isa, typename conv_t>
     friend void init_rtus_driver(conv_t *self);
-    _jit_avx512_common_1x1_convolution_bwd_data_t(const pd_t *pd,
-                                              const input_vector &inputs,
-                                              const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , kernel_(nullptr), rtus_driver_(nullptr), ws_per_thread_(0)
-        , scratch_(nullptr)
+
+    jit_avx512_common_1x1_convolution_bwd_data_t(const pd_t *apd,
+            const input_vector &inputs, const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs)
+        , kernel_(nullptr), rtus_driver_(nullptr)
     {
-        kernel_ = new jit_avx512_common_1x1_conv_kernel(conf_.jcp_,
-                    *conf_.attr());
+        kernel_ = new jit_avx512_common_1x1_conv_kernel(pd()->jcp_,
+                    *pd()->attr());
         init_rtus_driver<avx512_common>(this);
     }
-    ~_jit_avx512_common_1x1_convolution_bwd_data_t()
-    {
+
+    ~jit_avx512_common_1x1_convolution_bwd_data_t() {
         delete kernel_;
         delete rtus_driver_;
-        free(scratch_);
     }
 
     typedef typename prec_traits<diff_dst_type>::type diff_dst_data_t;
     typedef typename prec_traits<wei_type>::type wei_data_t;
     typedef typename prec_traits<diff_src_type>::type diff_src_data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_data:
             execute_backward_data();
             break;
@@ -289,19 +286,17 @@ struct _jit_avx512_common_1x1_convolution_bwd_data_t : public cpu_primitive_t {
     }
 
   private:
-    void execute_backward_data();
-    pd_t conf_;
+    void execute_backward_data() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     jit_avx512_common_1x1_conv_kernel *kernel_;
-    /* reduction to unit stride */
     rtus_driver_t<avx512_common> *rtus_driver_;
-    size_t ws_per_thread_;
-    diff_src_data_t *scratch_;
 };
 
 using jit_avx512_common_1x1_convolution_bwd_data_f32_t
-        = _jit_avx512_common_1x1_convolution_bwd_data_t<data_type::f32>;
+        = jit_avx512_common_1x1_convolution_bwd_data_t<data_type::f32>;
 using jit_avx512_common_1x1_convolution_bwd_data_s16s16s32_t
-        = _jit_avx512_common_1x1_convolution_bwd_data_t<data_type::s16,
+        = jit_avx512_common_1x1_convolution_bwd_data_t<data_type::s16,
             data_type::s16, data_type::s32>;
 
 struct jit_avx512_common_1x1_convolution_bwd_weights_t : public cpu_primitive_t
@@ -324,7 +319,9 @@ struct jit_avx512_common_1x1_convolution_bwd_weights_t : public cpu_primitive_t
             bool ok = true
                 && this->set_default_params() == status::success
                 && this->desc()->prop_kind == backward_weights
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
                         this->desc()->src_desc.data_type,
@@ -337,19 +334,32 @@ struct jit_avx512_common_1x1_convolution_bwd_weights_t : public cpu_primitive_t
             const convolution_desc_t *conv_d = this->desc();
             const memory_desc_t *src_d = this->src_pd_.desc();
             rtus_prepare(this, conv_d, src_d, this->diff_dst_pd_.desc());
-            return jit_avx512_common_1x1_conv_kernel::init_conf(jcp_,
-                            *conv_d, *src_d, *this->diff_weights_pd_.desc(),
-                            *this->diff_dst_pd_.desc(), *this->attr(),
-                            mkldnn_get_max_threads(), rtus_.reduce_src_);
+
+            status_t status = jit_avx512_common_1x1_conv_kernel::init_conf(
+                    jcp_, *conv_d, *src_d, *this->diff_weights_pd_.desc(),
+                    *this->diff_dst_pd_.desc(), *this->attr(),
+                    mkldnn_get_max_threads(), rtus_.reduce_src_);
+            if (status != status::success) return status;
+
+            init_balancers();
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx512_common_1x1_conv_kernel::init_scratchpad(scratchpad,
+                    jcp_);
+
+            auto reducer_bia_scratchpad = memory_tracking::registrar_t(
+                    scratchpad, memory_tracking::names::prefix_reducer_bia);
+            reducer_bia_conf_.init_scratchpad(reducer_bia_scratchpad);
+
+            rtus_prepare_space_info(this, scratchpad);
+
+            return status::success;
         }
 
         // TODO (Roma): structs conf header cleanup
         jit_1x1_conv_conf_t jcp_;
-
-        struct reduce_to_unit_stride_t {
-            convolution_desc_t conv_d_;
-            bool reduce_src_;
-        } rtus_;
+        cpu_reducer_t<data_type::f32>::conf_t reducer_bia_conf_;
+        reduce_to_unit_stride_t rtus_;
 
     protected:
         virtual status_t set_default_params() override {
@@ -367,32 +377,40 @@ struct jit_avx512_common_1x1_convolution_bwd_weights_t : public cpu_primitive_t
                     : pick(this->ndims() - 3, OIw16i16o, OIhw16i16o)));
             if (this->diff_bias_pd_.desc()->format == any)
                 CHECK(this->diff_bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
+
+    private:
+        void init_balancers() {
+            const size_t max_buffer_size = jcp_.nthr * 3 * 5 * 5 * 16 * 16;
+            if (with_bias()) {
+                reducer_bia_conf_.init(reduce_balancer_t(jcp_.nthr,
+                            jcp_.oc_block, jcp_.ngroups * jcp_.nb_load,
+                            jcp_.mb, max_buffer_size));
+            }
+        }
     };
 
     template <cpu_isa_t isa, typename conv_t>
     friend void init_rtus_driver(conv_t *self);
-    jit_avx512_common_1x1_convolution_bwd_weights_t(const pd_t *pd,
-                                                 const input_vector &inputs,
-                                                 const output_vector &outputs);
+
+    jit_avx512_common_1x1_convolution_bwd_weights_t(const pd_t *apd,
+            const input_vector &inputs, const output_vector &outputs);
+
     ~jit_avx512_common_1x1_convolution_bwd_weights_t() {
         delete kernel_;
         delete acc_ker_;
         delete reducer_bias_;
         delete rtus_driver_;
         delete trans_kernel_;
-        free(bctx_);
-        free(ws_reduction_);
-        free(scratch_);
-        free(tr_src_);
-        free(padded_bias_);
     }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_weights:
             execute_backward_weights();
             break;
@@ -403,23 +421,14 @@ struct jit_avx512_common_1x1_convolution_bwd_weights_t : public cpu_primitive_t
     }
 
   private:
-    void execute_backward_weights();
+    void execute_backward_weights() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    pd_t conf_;
     jit_avx512_common_1x1_conv_kernel *kernel_;
     cpu_accumulator_1d_t<data_type::f32> *acc_ker_;
     cpu_reducer_t<data_type::f32> *reducer_bias_;
     jit_transpose4x16_src *trans_kernel_;
-
-    /* reduction to unit stride */
     rtus_driver_t<avx512_common> *rtus_driver_;
-    size_t ws_per_thread_;
-    data_t *scratch_;
-    data_t *padded_bias_;
-
-    simple_barrier::ctx_t *bctx_;
-    data_t *tr_src_;
-    data_t *ws_reduction_;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_kernel.cpp
index 7f00356c0..320627022 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_kernel.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_kernel.cpp
@@ -18,6 +18,8 @@
 #include "nstl.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
+
+#include "cpu_barrier.hpp"
 #include "cpu_memory.hpp"
 
 #include "jit_avx512_common_conv_kernel.hpp"
@@ -30,6 +32,7 @@ namespace impl {
 namespace cpu {
 
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 using namespace Xbyak;
 
@@ -59,32 +62,29 @@ inline void pick_loop_order(jit_conv_conf_t &jcp) {
 
 inline bool is_1stconv(const jit_conv_conf_t &jcp) {
     if (mayiuse(avx512_core) && !mayiuse(avx512_core_vnni))
-        return jcp.ic < 16;
+        return (jcp.ic < 16 && jcp.ngroups == 1);
     else
         return one_of(jcp.ic, 1, 3);
 }
-inline bool is_1D_conv(const jit_conv_conf_t &jcp) {
-    return (jcp.ih == 1 && jcp.kh == 1);
-}
-inline bool is_ow_threading_available(const jit_conv_conf_t &jcp) {
-    return (is_1D_conv(jcp) && one_of(jcp.ndims, 3, 4)
-        && !(jcp.ver == ver_fma && mayiuse(avx512_mic)));
-}
+
 inline bool is_ow_threading_on(const jit_conv_conf_t &jcp) {
     return (jcp.nb_ow > 1);
 }
-inline bool is_1D_prefetching(const jit_conv_conf_t &jcp) {
-    return (jcp.ver == ver_4fma && is_1D_conv(jcp) && is_ow_threading_on(jcp));
+
+inline bool is_owb_prefetching(const jit_conv_conf_t &jcp) {
+    return (jcp.ver == ver_4fma && is_ow_threading_on(jcp));
 }
+
 }
 
-void jit_avx512_common_conv_fwd_kernel::prepare_output(int ur_w)
+template<typename Vmm>
+void _jit_avx512_common_conv_fwd_kernel<Vmm>::prepare_output(int ur_w)
 {
     for (int k = 0; k < jcp.nb_oc_blocking; k++)
         for (int j = 0; j < ur_w; j++) {
-            Zmm zmm = zmm_out(j, k);
-            vpxord(zmm, zmm, zmm);
-            if (!is_1D_prefetching(jcp)) {
+            Vmm vmm = vmm_out(j, k);
+            vpxord(vmm, vmm, vmm);
+            if (!is_owb_prefetching(jcp)) {
                 size_t aux_output_offset = get_output_offset(j, k);
                 mic_prefetcht1(EVEX_compress_addr_safe(reg_out_prf,
                             aux_output_offset, reg_out_long_offt));
@@ -92,7 +92,8 @@ void jit_avx512_common_conv_fwd_kernel::prepare_output(int ur_w)
         }
 }
 
-void jit_avx512_common_conv_fwd_kernel::store_output(int ur_w)
+template<typename Vmm>
+void _jit_avx512_common_conv_fwd_kernel<Vmm>::store_output(int ur_w)
 {
     Label no_update_label, store_label, postproc_label;
 
@@ -108,9 +109,9 @@ void jit_avx512_common_conv_fwd_kernel::store_output(int ur_w)
 
     for (int k = 0; k < jcp.nb_oc_blocking; k++)
         for (int j = 0; j < ur_w; j++) {
-            Zmm zmm = zmm_out(j, k);
+            Vmm vmm = vmm_out(j, k);
             size_t aux_output_offset = get_output_offset(j, k);
-            vadd(zmm,
+            vadd(vmm,
                 make_safe_addr(reg_out, aux_output_offset, reg_out_long_offt));
         }
 
@@ -126,8 +127,8 @@ void jit_avx512_common_conv_fwd_kernel::store_output(int ur_w)
         for (int k = 0; k < jcp.nb_oc_blocking; k++) {
             int bias_offset = jcp.typesize_out * k * jcp.oc_block;
             for (int j = 0; j < ur_w; j++) {
-                Zmm zmm = zmm_out(j, k);
-                vadd(zmm, EVEX_compress_addr(reg_bias, bias_offset));
+                Vmm vmm = vmm_out(j, k);
+                vadd(vmm, EVEX_compress_addr(reg_bias, bias_offset));
             }
             mic_prefetcht1(EVEX_compress_addr(reg_bias, bias_offset + 64));
         }
@@ -142,18 +143,29 @@ void jit_avx512_common_conv_fwd_kernel::store_output(int ur_w)
     int depthwise_inj_idx = 0;
     const auto &p = attr_.post_ops_;
 
-    if (p.len_ == 0 && eltwise_injectors.size() == 1) {
-        for (int k = 0; k < jcp.nb_oc_blocking; k++)
-            eltwise_injectors[0]->compute_vector_range(
-                    k*jcp.ur_w, k*jcp.ur_w + ur_w);
-    }
-
     for (int i = 0; i < p.len_; i++) {
         auto& post_op = p.entry_[i];
         if (post_op.is_eltwise()) {
-            for (int k = 0; k < jcp.nb_oc_blocking; k++)
-                eltwise_injectors[eltwise_inj_idx]->compute_vector_range(
-                        k*jcp.ur_w, k*jcp.ur_w + ur_w);
+            if (jcp.ver == ver_4vnni || jcp.ver == ver_vnni) {
+                Vmm vmm_zero = vmm_wei;
+                vpxord(vmm_zero, vmm_zero, vmm_zero);
+
+                for (int k = 0; k < jcp.nb_oc_blocking; k++)
+                    for (int j = 0; j < ur_w; j++) {
+                        Vmm vmm = vmm_out(j, k);
+                        vpcmpd(k1, vmm, vmm_zero, _cmp_lt_os);
+                        vpmulld(vmm | k1, vmm, vmm_zero);
+                    }
+            } else {
+                if (ur_w == jcp.ur_w) {
+                    eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0,
+                                                            jcp.nb_oc_blocking * jcp.ur_w);
+                } else {
+                    for (int k = 0; k < jcp.nb_oc_blocking; k++)
+                        eltwise_injectors[eltwise_inj_idx]->compute_vector_range(k * jcp.ur_w,
+                                                                                 k * jcp.ur_w + ur_w);
+                }
+            }
 
             eltwise_inj_idx++;
         } else if (post_op.is_depthwise()) {
@@ -178,18 +190,25 @@ void jit_avx512_common_conv_fwd_kernel::store_output(int ur_w)
     L(store_label);
     for (int k = 0; k < jcp.nb_oc_blocking; k++)
         for (int j = 0; j < ur_w; j++) {
-            Zmm zmm = zmm_out(j, k);
+            Vmm vmm = vmm_out(j, k);
             size_t aux_output_offset = (size_t)typesize *
                 ((size_t)k * jcp.od * jcp.oh * jcp.ow + j) * jcp.oc_block;
             vmovups(EVEX_compress_addr_safe(reg_out, aux_output_offset,
-                        reg_out_long_offt), zmm);
-            if (!is_1D_prefetching(jcp))
+                        reg_out_long_offt), vmm);
+            if (!is_owb_prefetching(jcp))
                 mic_prefetcht0(EVEX_compress_addr_safe(reg_out_prf,
                             aux_output_offset, reg_out_long_offt));
         }
 }
 
-void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma_1st(int ur_w,
+template<typename Vmm>
+void _jit_avx512_common_conv_fwd_kernel<Vmm>::compute_loop_4fma_1st(int ur_w,
+    int pad_l, int pad_r)
+{
+}
+
+template<>
+void _jit_avx512_common_conv_fwd_kernel<Zmm>::compute_loop_4fma_1st(int ur_w,
         int pad_l, int pad_r)
 {
     assert(jcp.dilate_d == 0 && jcp.dilate_h == 0 && jcp.dilate_w == 0);
@@ -201,9 +220,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma_1st(int ur_w,
     int ic_block = jcp.ic_block;
     int oc_block = jcp.oc_block;
 
-    Label kh_label, kd_label, skip_kd_loop;
-
-    prepare_output(ur_w);
+    Label kh_label, kd_label;
 
     if (one_of(jcp.ndims, 3, 4)) {
         mov(aux_reg_inp, reg_inp);
@@ -226,18 +243,9 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma_1st(int ur_w,
         mov(aux_reg_inp_d, reg_inp);
         mov(aux_reg_inp_d_prf, reg_inp_prf);
 
-        if ((jcp.kd - 1) < nstl::max(jcp.f_pad, jcp.back_pad)) {
-            cmp(reg_ki, 0);
-            je(skip_kd_loop, T_NEAR);
-        }
         L(kd_label);
     }
     mov(reg_kj, reg_kh);
-    Label skip_kh_loop;
-    if ((jcp.kh - 1) < nstl::max(jcp.t_pad, jcp.b_pad)) {
-        cmp(reg_kj, 0);
-        je(skip_kh_loop, T_NEAR);
-    }
     if (jcp.ndims == 5) {
         mov(aux_reg_inp, aux_reg_inp_d);
         mov(aux_reg_ker, aux_reg_ker_d);
@@ -253,10 +261,10 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma_1st(int ur_w,
                         * ((ki + i) * oc_block
                                   + ic * kw * jcp.kh * jcp.kd * oc_block);
                 if (ki + i < kw)
-                    vmovups(zmm_ker(i),
+                    vmovups(vmm_ker(i),
                         EVEX_compress_addr(aux_reg_ker, aux_ker_offset));
                 else
-                    vpxord(zmm_ker(i), zmm_ker(i), zmm_ker(i));
+                    vpxord(vmm_ker(i), vmm_ker(i), vmm_ker(i));
             }
 
             int j_start = get_ow_start(ki, pad_l);
@@ -266,7 +274,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma_1st(int ur_w,
                 size_t aux_input_offset = (size_t)jcp.typesize_in
                         * ((size_t)(ki + j * stride_w
                             - pad_l) + (size_t)ic * iw * ih * jcp.id);
-                v4fmaddps(zmm_out(j, 0), zmm_ker(0),
+                v4fmaddps(vmm_out(j, 0), vmm_ker(0),
                         EVEX_compress_addr_safe(aux_reg_inp, aux_input_offset,
                         reg_long_offt));
                 if (ki + prf_count < kw && prf_count < 4
@@ -299,8 +307,6 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma_1st(int ur_w,
     cmp(reg_kj, 0);
     jg(kh_label, T_NEAR);
 
-    L(skip_kh_loop);
-
     if (jcp.ndims == 5) {
         add(aux_reg_inp_d, typesize * jcp.ih * jcp.iw);
         add(aux_reg_ker_d, typesize * jcp.kw * jcp.kh * oc_block);
@@ -309,23 +315,28 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma_1st(int ur_w,
         dec(reg_ki);
         cmp(reg_ki, 0);
         jg(kd_label, T_NEAR);
-        L(skip_kd_loop);
 
         pop(reg_out);
         pop(reg_out_prf);
     }
 
-    store_output(ur_w);
     if (max_input_offset > INT_MAX) pop(reg_inp_prf);
 }
 
-void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma(int ur_w,
+template<typename Vmm>
+void _jit_avx512_common_conv_fwd_kernel<Vmm>::compute_loop_4fma(int ur_w,
+    int pad_l, int pad_r)
+{
+}
+
+template<>
+void _jit_avx512_common_conv_fwd_kernel<Zmm>::compute_loop_4fma(int ur_w,
         int pad_l, int pad_r)
 {
     int stride_w = jcp.stride_w;
     int ic_block = jcp.ic_block;
     int oc_block = jcp.oc_block;
-    Label kh_label, last_iter_label, loop_end_label, kd_label, skip_kd_loop;
+    Label kh_label, last_iter_label, loop_end_label, kd_label;
     int ker_load_number = 4;
     int shift_kernel_ptr = typesize * jcp.kw * jcp.oc_block * jcp.ic_block;
     int shift_input_ptr = typesize * (jcp.dilate_h + 1) * jcp.iw * jcp.ic_block;
@@ -347,7 +358,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma(int ur_w,
     auto kernel_loads = [=](int ki, int ic, int kk) {
         for (int ii = 0; ii < ker_load_number; ii++) {
             int aux_kernel_offset = kernel_offset(kk, ic + ii, ki);
-            vmovups(zmm_ker(ii),
+            vmovups(vmm_ker(ii),
                 EVEX_compress_addr(aux_reg_ker, aux_kernel_offset));
         }
     };
@@ -364,8 +375,6 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma(int ur_w,
         }
     };
 
-    prepare_output(ur_w);
-
     if (one_of(jcp.ndims, 3, 4)) {
         mov(aux_reg_inp, reg_inp);
         mov(aux_reg_ker, reg_ker);
@@ -382,21 +391,11 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma(int ur_w,
         mov(aux_reg_inp_d, reg_inp);
         mov(aux_reg_inp_d_prf, reg_inp_prf);
         mov(aux_reg_ker_d_prf, reg_ker_prf);
-
-        if ((jcp.kd - 1) * (jcp.dilate_d + 1) < nstl::max(jcp.f_pad, jcp.back_pad)) {
-            cmp(reg_ki, 0);
-            je(skip_kd_loop, T_NEAR);
-        }
         L(kd_label);
         mov(reg_kj, ptr[param1 + GET_OFF(kh_padding)]);
     } else {
         mov(reg_kj, reg_kh);
     }
-    Label skip_kh_loop;
-    if ((jcp.kh - 1) * (jcp.dilate_h + 1) < nstl::max(jcp.t_pad, jcp.b_pad)) {
-        cmp(reg_kj, 0);
-        je(skip_kh_loop, T_NEAR);
-    }
     if (jcp.ndims == 5) {
         mov(aux_reg_inp, aux_reg_inp_d);
         mov(aux_reg_ker, aux_reg_ker_d);
@@ -427,7 +426,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma(int ur_w,
                                 * ((ki * (jcp.dilate_w + 1) + oi * stride_w
                                            - pad_l) * ic_block
                                                        + ic);
-                        v4fmaddps(zmm_out(oi, kk), zmm_ker(0),
+                        v4fmaddps(vmm_out(oi, kk), vmm_ker(0),
                             EVEX_compress_addr(aux_reg_inp, aux_input_offset));
 
                         if (oi % 2) {
@@ -468,7 +467,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma(int ur_w,
                                     * ((ki * (jcp.dilate_w + 1) + oi * stride_w
                                                - pad_l) * ic_block
                                                            + ic);
-                            v4fmaddps(zmm_out(oi, kk), zmm_ker(0),
+                            v4fmaddps(vmm_out(oi, kk), vmm_ker(0),
                                 EVEX_compress_addr(aux_reg_inp,
                                     aux_input_offset));
                             if (oi % 2) {
@@ -499,11 +498,11 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma(int ur_w,
                         int aux_input_offset = typesize
                                 * ((ki * (jcp.dilate_w + 1) + oi * stride_w
                                 - pad_l) * ic_block + ic);
-                        v4fmaddps(zmm_out(oi, kk), zmm_ker(0),
+                        v4fmaddps(vmm_out(oi, kk), vmm_ker(0),
                             EVEX_compress_addr(aux_reg_inp,
                                 aux_input_offset));
 
-                        if (!is_1D_prefetching(jcp)) {
+                        if (!is_owb_prefetching(jcp)) {
                             if ((oi % 2) && (prf_count_t1 < 4)) {
                                 mic_prefetcht1(EVEX_compress_addr(
                                     aux_reg_ker_prf, kernel_offset(kk,
@@ -521,7 +520,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma(int ur_w,
                                 prf_count_t0++;
                             }
                         }
-                        if (!is_1D_prefetching(jcp)) {
+                        if (!is_owb_prefetching(jcp)) {
                             if (pref_current_inp) {
                                 if (ki == 0 && ic == 0 && kk == 0)
                                     mic_prefetcht0(EVEX_compress_addr(
@@ -560,8 +559,6 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma(int ur_w,
     cmp(reg_kj, 0);
     jg(kh_label, T_NEAR);
 
-    L(skip_kh_loop);
-
     if (jcp.ndims == 5) {
         add(aux_reg_inp_d,
                 typesize * (jcp.dilate_d + 1) * jcp.ih * jcp.iw * jcp.ic_block);
@@ -575,16 +572,14 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_4fma(int ur_w,
         dec(reg_ki);
         cmp(reg_ki, 0);
         jg(kd_label, T_NEAR);
-        L(skip_kd_loop);
 
         pop(reg_out);
         pop(reg_out_prf);
     }
-
-    store_output(ur_w);
 }
 
-void jit_avx512_common_conv_fwd_kernel::compute_loop_fma(int ur_w,
+template<typename Vmm>
+void _jit_avx512_common_conv_fwd_kernel<Vmm>::compute_loop_fma(int ur_w,
         int pad_l, int pad_r)
 {
     bool prf_ker = true;
@@ -597,20 +592,19 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma(int ur_w,
     int ic_block = jcp.ic_block;
     int oc_block = jcp.oc_block;
     int nb_oc_block = jcp.nb_oc_blocking;
-    Label kh_label, kd_label, skip_kd_loop;
+    Label kh_label, kd_label;
 
     int ker_pipeline_depth = 4;
     assert(ker_reg_base_idx + ker_pipeline_depth <= 32);
     assert(oc_block >= ker_pipeline_depth);
 
     int num_ker_loads = ic_block * nb_oc_block * kw;
-    const int simd_w = 16;
     int num_ker_prfs = prf_ker ? num_ker_loads : 0;
     int num_inp_prfs = prf_inp ?
             ur_w * nstl::min(kw, stride_w) + nstl::max(0, kw - stride_w) :
             0;
     if (jcp.is_1stconv && prf_inp) {
-        num_inp_prfs = div_up(num_inp_prfs, simd_w) * ic_block;
+        num_inp_prfs = div_up(num_inp_prfs, jcp.simd_w) * ic_block;
     }
     int num_prfs = num_ker_prfs + num_inp_prfs;
     int num_fmas = num_ker_loads * ur_w;
@@ -619,8 +613,6 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma(int ur_w,
     int prf_inst_trigger = (num_fmas % prf_inst_spacing) / 2;
     int inp_mul = !jcp.is_1stconv ? ic_block : 1;
 
-    prepare_output(ur_w);
-
     if (one_of(jcp.ndims, 3, 4)) {
         mov(aux_reg_inp, reg_inp);
         mov(aux_reg_ker, reg_ker);
@@ -643,20 +635,11 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma(int ur_w,
         mov(aux_reg_inp_d_prf, reg_inp_prf);
         mov(aux_reg_ker_d_prf, reg_ker_prf);
 
-        if ((jcp.kd - 1) * (jcp.dilate_d + 1) < nstl::max(jcp.f_pad, jcp.back_pad)) {
-            cmp(reg_ki, 0);
-            je(skip_kd_loop, T_NEAR);
-        }
         L(kd_label);
         mov(reg_kj, ptr[param1 + GET_OFF(kh_padding)]);
     } else {
         mov(reg_kj, reg_kh);
     }
-    Label skip_kh_loop;
-    if ((jcp.kh - 1) * (jcp.dilate_h + 1) < nstl::max(jcp.t_pad, jcp.b_pad)) {
-        cmp(reg_kj, 0);
-        je(skip_kh_loop, T_NEAR);
-    }
 
     if (jcp.ndims == 5) {
         mov(aux_reg_inp, aux_reg_inp_d);
@@ -676,7 +659,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma(int ur_w,
                 if (step == 0) {
                     for (int i = 0; i < ker_pipeline_depth; i++) {
                         aux_kernel_offset = get_kernel_offset(ki, ic, 0, i);
-                        vmovups(zmm_ker(i), EVEX_compress_addr(
+                        vmovups(vmm_ker(i), EVEX_compress_addr(
                                         aux_reg_ker, aux_kernel_offset));
                     }
                 } else if (step < num_ker_loads - ker_pipeline_depth + 1) {
@@ -685,19 +668,19 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma(int ur_w,
                         = (step + load_offset) % ker_pipeline_depth;
                     aux_kernel_offset
                             = get_kernel_offset(ki, ic, 0, load_offset);
-                    vmovups(zmm_ker(ker_load_reg_idx),
+                    vmovups(vmm_ker(ker_load_reg_idx),
                             EVEX_compress_addr(aux_reg_ker, aux_kernel_offset));
                 }
 
                 bool ker_prf_inserted = false;
-                Zmm zmm_kernel = zmm_ker(step % ker_pipeline_depth);
+                Vmm vmm_kernel = vmm_ker(step % ker_pipeline_depth);
                 int j_start = get_ow_start(ki, pad_l);
                 int j_end = get_ow_end(ur_w, ki, pad_r);
                 for (int j = j_start; j < j_end; j++) {
                     size_t aux_input_offset = get_input_offset(ki, ic, j, pad_l);
                     auto addr = EVEX_compress_addr_safe(aux_reg_inp,
                             aux_input_offset, reg_long_offt, true);
-                    vfmadd231ps(zmm_out(j, 0), zmm_kernel, addr);
+                    vfmadd231ps(vmm_out(j, 0), vmm_kernel, addr);
                     int fma_idx = step * ur_w + j;
                     int prf_slot_idx = fma_idx / prf_inst_spacing;
                     if (fma_idx % prf_inst_spacing == prf_inst_trigger) {
@@ -724,7 +707,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma(int ur_w,
                                     size_t ic_prf_stride =
                                         (size_t)jcp.typesize_in * iw * ih * id;
                                     size_t iw_prf_stride
-                                            = jcp.typesize_in * simd_w;
+                                            = jcp.typesize_in * jcp.simd_w;
                                     inp_prf_offset = ((inp_prf_idx / ic_block)
                                             * iw_prf_stride
                                             + (inp_prf_idx % ic_block)
@@ -752,7 +735,6 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma(int ur_w,
         jg(kh_label, T_NEAR);
     }
 
-    L(skip_kh_loop);
 
     if (jcp.ndims == 5) {
         add(aux_reg_inp_d,
@@ -767,16 +749,15 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma(int ur_w,
         dec(reg_ki);
         cmp(reg_ki, 0);
         jg(kd_label, T_NEAR);
-        L(skip_kd_loop);
 
         pop(reg_out);
         pop(reg_out_prf);
     }
     if (max_input_offset > INT_MAX) pop(reg_inp_prf);
-    store_output(ur_w);
 }
 
-void jit_avx512_common_conv_fwd_kernel::compute_loop_fma_core(int ur_w,
+template<typename Vmm>
+void _jit_avx512_common_conv_fwd_kernel<Vmm>::compute_loop_fma_core(int ur_w,
     int pad_l, int pad_r)
 {
     int kw = jcp.kw;
@@ -784,7 +765,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma_core(int ur_w,
     int ic_block = jcp.ic_block;
     int oc_block = jcp.oc_block;
     int nb_oc_block = jcp.nb_oc_blocking;
-    Label kh_label, skip_kh_loop, kd_label, skip_kd_loop;
+    Label kh_label, kd_label;
     int shift_kernel_ptr = jcp.typesize_in * jcp.kw * jcp.oc_block
         * jcp.ic_block;
     int inp_mul = !jcp.is_1stconv ? ic_block : 1;
@@ -799,8 +780,6 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma_core(int ur_w,
                 * (!jcp.is_1stconv ? 1 : (size_t)jcp.iw * jcp.ih * jcp.id));
     };
 
-    prepare_output(ur_w);
-
     if (one_of(jcp.ndims, 3, 4)) {
         mov(aux_reg_inp, reg_inp);
         mov(aux_reg_ker, reg_ker);
@@ -813,19 +792,11 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma_core(int ur_w,
         mov(aux_reg_ker_d, ptr[param1 + GET_OFF(filt)]);
         mov(aux_reg_inp_d, reg_inp);
 
-        if ((jcp.kd - 1) * (jcp.dilate_d + 1) < nstl::max(jcp.f_pad, jcp.back_pad)) {
-            cmp(reg_ki, 0);
-            je(skip_kd_loop, T_NEAR);
-        }
         L(kd_label);
         mov(reg_kj, ptr[param1 + GET_OFF(kh_padding)]);
     } else {
         mov(reg_kj, reg_kh);
     }
-    if ((jcp.kh - 1) * (jcp.dilate_h + 1) < nstl::max(jcp.t_pad, jcp.b_pad)) {
-        cmp(reg_kj, 0);
-        je(skip_kh_loop, T_NEAR);
-    }
 
     if (jcp.ndims == 5) {
         mov(aux_reg_inp, aux_reg_inp_d);
@@ -841,7 +812,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma_core(int ur_w,
                 if (jcp.kernel_kind == expl_bcast) {
                     for (int jj = jj_start; jj < jj_end; jj++) {
                         size_t aux_input_offset = input_offset(jj, ic, ki);
-                        vbroadcastss(zmm_inp(jj, nb_oc_block),
+                        vbroadcastss(vmm_inp(jj, nb_oc_block),
                             EVEX_compress_addr_safe(aux_reg_inp,
                             aux_input_offset, reg_long_offt));
                     }
@@ -851,15 +822,15 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma_core(int ur_w,
                         * (ii * jcp.nb_ic * jcp.kh * jcp.kw * jcp.kd * ic_block
                         * oc_block + ki * ic_block * oc_block + ic * oc_block);
                     if (jj_end - jj_start > 0)
-                        vmovups(zmm_wei, EVEX_compress_addr(aux_reg_ker,
+                        vmovups(vmm_wei, EVEX_compress_addr(aux_reg_ker,
                             aux_kernel_offset));
                     for (int jj = jj_start; jj < jj_end; jj++)
                         if (jcp.kernel_kind == expl_bcast)
-                            vfmadd231ps(zmm_out(jj, ii),
-                                zmm_inp(jj, nb_oc_block), zmm_wei);
+                            vfmadd231ps(vmm_out(jj, ii),
+                                vmm_inp(jj, nb_oc_block), vmm_wei);
                         else {
                             size_t aux_input_offset = input_offset(jj, ic, ki);
-                            vfmadd231ps(zmm_out(jj, ii), zmm_wei,
+                            vfmadd231ps(vmm_out(jj, ii), vmm_wei,
                                 EVEX_compress_addr_safe(aux_reg_inp,
                                 aux_input_offset, reg_long_offt, true));
                         }
@@ -872,7 +843,6 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma_core(int ur_w,
         cmp(reg_kj, 0);
         jg(kh_label, T_NEAR);
     }
-    L(skip_kh_loop);
 
     if (jcp.ndims == 5) {
         add(aux_reg_inp_d,
@@ -883,15 +853,19 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_fma_core(int ur_w,
         dec(reg_ki);
         cmp(reg_ki, 0);
         jg(kd_label, T_NEAR);
-        L(skip_kd_loop);
 
         pop(reg_out);
     }
+}
 
-    store_output(ur_w);
+template<typename Vmm>
+void _jit_avx512_common_conv_fwd_kernel<Vmm>::compute_loop_vnni(
+    int ur_w, int pad_l, int pad_r)
+{
 }
 
-void jit_avx512_common_conv_fwd_kernel::compute_loop_vnni(
+template<>
+void _jit_avx512_common_conv_fwd_kernel<Zmm>::compute_loop_vnni(
         int ur_w, int pad_l, int pad_r)
 {
     Label kh_label, kd_label;
@@ -908,7 +882,6 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_vnni(
     assert(reg_inp_prf == reg_long_offt);
     if (max_input_offset > INT_MAX) push(reg_inp_prf);
 
-    prepare_output(ur_w);
 
     if (one_of(jcp.ndims, 3, 4)) {
         mov(aux_reg_inp, reg_inp);
@@ -917,8 +890,6 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_vnni(
         mov(aux_reg_inp_prf, reg_inp_prf);
     }
 
-    Label skip_kh_loop, skip_kd_loop;
-
     if (jcp.ndims == 5) {
         push(reg_out_prf);
         push(reg_out);
@@ -929,19 +900,11 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_vnni(
         mov(aux_reg_inp_d_prf, reg_inp_prf);
         mov(aux_reg_ker_d_prf, reg_ker_prf);
 
-        if ((jcp.kd - 1) * (jcp.dilate_d + 1) < nstl::max(jcp.f_pad, jcp.back_pad)) {
-            cmp(reg_ki, 0);
-            je(skip_kd_loop, T_NEAR);
-        }
         L(kd_label);
         mov(reg_kj, ptr[param1 + GET_OFF(kh_padding)]);
     } else {
         mov(reg_kj, reg_kh);
     }
-    if ((jcp.kh - 1) * (jcp.dilate_h + 1) < nstl::max(jcp.t_pad, jcp.b_pad)) {
-        cmp(reg_kj, 0);
-        je(skip_kh_loop, T_NEAR);
-    }
     if (jcp.ndims == 5) {
         mov(aux_reg_inp, aux_reg_inp_d);
         mov(aux_reg_ker, aux_reg_ker_d);
@@ -957,7 +920,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_vnni(
                 if (jcp.kernel_kind == expl_bcast) {
                     for (int oi = ow_start; oi < ow_end; oi++) {
                         size_t input_offset = get_input_offset(ki, ic, oi, pad_l);
-                        vpbroadcastd(zmm_inp(oi, jcp.nb_oc_blocking),
+                        vpbroadcastd(vmm_inp(oi, jcp.nb_oc_blocking),
                             EVEX_compress_addr_safe(aux_reg_inp, input_offset,
                             reg_long_offt));
                     }
@@ -965,7 +928,7 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_vnni(
                 for (int kk = 0; kk < jcp.nb_oc_blocking; kk++) {
                     if (jcp.kernel_kind == expl_bcast) {
                         int kernel_offset = get_kernel_offset(ki, ic, kk, 0);
-                        vmovups(zmm_wei,
+                        vmovups(vmm_wei,
                             EVEX_compress_addr(aux_reg_ker, kernel_offset));
                     } else {
                         for (int ii = 0; ii < ker_load_number; ii++) {
@@ -979,12 +942,17 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_vnni(
                     for (int oi = ow_start, prf_count = 0; oi < ow_end; oi++) {
                         size_t input_offset = get_input_offset(ki, ic, oi, pad_l);
                         if (jcp.kernel_kind == expl_bcast) {
-                            vpdpwssd(zmm_out(oi, kk), zmm_wei,
-                                zmm_inp(oi, jcp.nb_oc_blocking));
+                            vpdpwssd(vmm_out(oi, kk), vmm_wei,
+                                vmm_inp(oi, jcp.nb_oc_blocking));
                         } else {
-                            vpXdpwssd(zmm_out(oi, kk), Zmm(ker_reg_base_idx),
-                            EVEX_compress_addr_safe(aux_reg_inp, input_offset,
-                            reg_long_offt, jcp.ver != ver_4vnni));
+                            if (jcp.ver == ver_4vnni)
+                                vp4dpwssd(vmm_out(oi, kk), Zmm(ker_reg_base_idx),
+                                EVEX_compress_addr_safe(aux_reg_inp,
+                                    input_offset, reg_long_offt, false));
+                            else
+                                vpdpwssd(vmm_out(oi, kk), Zmm(ker_reg_base_idx),
+                                EVEX_compress_addr_safe(aux_reg_inp,
+                                    input_offset, reg_long_offt, true));
                         }
                         if ((oi % 2) && (prf_count < ker_load_number)) {
                             int kernel_offset = get_kernel_offset(
@@ -1014,8 +982,6 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_vnni(
         jg(kh_label, T_NEAR);
     }
 
-    L(skip_kh_loop);
-
     if (jcp.ndims == 5) {
         add(aux_reg_inp_d, jcp.typesize_in * jcp.ih * jcp.iw * jcp.ic_block);
         add(aux_reg_ker_d, jcp.typesize_in * jcp.kw * jcp.kh * jcp.oc_block
@@ -1027,19 +993,37 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop_vnni(
         dec(reg_ki);
         cmp(reg_ki, 0);
         jg(kd_label, T_NEAR);
-        L(skip_kd_loop);
 
         pop(reg_out);
         pop(reg_out_prf);
     }
     if (max_input_offset > INT_MAX) pop(reg_inp_prf);
-    store_output(ur_w);
 }
 
-void jit_avx512_common_conv_fwd_kernel::compute_loop(int ur_w,
+template<typename Vmm>
+void _jit_avx512_common_conv_fwd_kernel<Vmm>::compute_loop(int ur_w,
         int pad_l, int pad_r)
 {
     if (jcp.ndims == 5) push(reg_oi);
+
+    prepare_output(ur_w);
+
+    Label skip_compute_loop;
+    if (jcp.ndims == 5) {
+        if ((jcp.dilate_d >= jcp.id)
+                || (jcp.kd - 1) * (jcp.dilate_d + 1) < nstl::max(jcp.f_pad, jcp.back_pad)) {
+            mov(reg_kj, ptr[param1 + GET_OFF(kd_padding)]);
+            cmp(reg_kj, 0);
+            je(skip_compute_loop, T_NEAR);
+        }
+    }
+    if ((jcp.dilate_h >= jcp.ih)
+            || (jcp.kh - 1) * (jcp.dilate_h + 1) < nstl::max(jcp.t_pad, jcp.b_pad)) {
+        mov(reg_kj, ptr[param1 + GET_OFF(kh_padding)]);
+        cmp(reg_kj, 0);
+        je(skip_compute_loop, T_NEAR);
+    }
+
     if (jcp.ver == ver_4vnni || jcp.ver == ver_vnni)
         compute_loop_vnni(ur_w, pad_l, pad_r);
     else if (jcp.ver == ver_4fma)
@@ -1058,17 +1042,15 @@ void jit_avx512_common_conv_fwd_kernel::compute_loop(int ur_w,
                 compute_loop_fma_core(ur_w, pad_l, pad_r);
     else
         assert(!"unknown convolution version");
+
+    L(skip_compute_loop);
+    store_output(ur_w);
     if (jcp.ndims == 5) pop(reg_oi);
 }
 
-void jit_avx512_common_conv_fwd_kernel::generate()
+template<typename Vmm>
+void _jit_avx512_common_conv_fwd_kernel<Vmm>::generate()
 {
-    if (jcp.with_eltwise) {
-        eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(
-                this, jcp.eltwise_alg, jcp.eltwise_alpha, 0
-        ));
-    }
-
     const auto &p = attr_.post_ops_;
     for (int i = 0; i < p.len_; i++) {
         auto &post_op = p.entry_[i];
@@ -1318,17 +1300,10 @@ bool jit_avx512_common_conv_fwd_kernel::post_ops_ok(
     auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
-    case 0: return true; // no post_ops
-    case 1:
-        return true // sum OR eltwise OR depthwise
-                && !jcp.with_eltwise && (is_simple(0) || is_sum(0));
-    case 2:
-        return true // sum->relu
-                && !jcp.with_eltwise && ((is_sum(0) && is_simple(1)) ||
-                                         (is_simple(0) && is_simple(1)));
-    case 3:
-        return true // sum->relu
-                && !jcp.with_eltwise && (is_sum(0) && is_simple(1) && is_simple(2));
+    case 0: return true;
+    case 1: return is_simple(0) || is_sum(0);
+    case 2: return (is_sum(0) && is_simple(1)) || (is_simple(0) && is_simple(1));
+    case 3: return is_sum(0) && is_simple(1) && is_simple(2);
     default: return false;
     }
 
@@ -1336,25 +1311,22 @@ bool jit_avx512_common_conv_fwd_kernel::post_ops_ok(
 }
 
 status_t jit_avx512_common_conv_fwd_kernel::init_conf(
-            jit_conv_conf_t &jcp,
-            const convolution_desc_t &cd, cpu_memory_t::pd_t &src_pd,
-            cpu_memory_t::pd_t &weights_pd, cpu_memory_t::pd_t &dst_pd,
-            cpu_memory_t::pd_t &bias_pd, const primitive_attr_t &attr,
-            int nthreads, bool with_relu, float relu_negative_slope)
+            jit_conv_conf_t &jcp, const convolution_desc_t &cd,
+            cpu_memory_t::pd_t &src_pd, cpu_memory_t::pd_t &weights_pd,
+            cpu_memory_t::pd_t &dst_pd, cpu_memory_t::pd_t &bias_pd,
+            const primitive_attr_t &attr, int nthreads)
 {
     using namespace prop_kind;
 
     if (!mayiuse(avx512_common))
         return status::unimplemented;
 
-    const int simd_w = cpu_isa_traits<avx512_common>::vlen / sizeof(float);
-
     const memory_desc_wrapper src_d(&src_pd);
     const memory_desc_wrapper weights_d(&weights_pd);
     const memory_desc_wrapper dst_d(&dst_pd);
     const memory_desc_wrapper bias_d(&bias_pd);
 
-    int regs = 28;
+    const int regs = 28;
     const bool with_groups = weights_d.ndims() == src_d.ndims() + 1;
     int ndims = src_d.ndims();
 
@@ -1382,9 +1354,6 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
     jcp.stride_h = (ndims == 3) ? 1 : cd.strides[ndims-4];
     jcp.stride_w = cd.strides[ndims-3];
     jcp.src_fmt = src_d.format();
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alg = mkldnn_eltwise_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
 
     jcp.dilate_d = (ndims == 5) ? cd.dilates[0] : 0;
     jcp.dilate_h = (ndims == 3) ? 0 : cd.dilates[ndims-4];
@@ -1397,14 +1366,26 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
 
     jcp.is_1stconv = is_1stconv(jcp);
 
-    jcp.oc_block = simd_w;
-    jcp.ic_block = jcp.is_1stconv ? jcp.ic : simd_w;
-    jcp.aligned_threads = 0;
-
     bool ok_to_pad_channels = true
         && jcp.ngroups == 1
         && src_d.data_type() == data_type::f32;
 
+    const int full_simd_w = cpu_isa_traits<avx512_common>::vlen / sizeof(float);
+    jcp.simd_w = full_simd_w;
+    bool ok_to_try_xmm = true
+        && mayiuse(avx512_core)
+        && src_d.data_type() == data_type::f32
+        && !jcp.is_1stconv
+        && !ok_to_pad_channels
+        && (jcp.ic % jcp.simd_w != 0 || jcp.oc % jcp.simd_w != 0)
+        && (jcp.ic % 8 != 0 || jcp.oc % 8 != 0);
+    if (ok_to_try_xmm)
+        jcp.simd_w = 4;
+
+    jcp.oc_block = jcp.simd_w;
+    jcp.ic_block = jcp.is_1stconv ? jcp.ic : jcp.simd_w;
+    jcp.aligned_threads = 0;
+
     if (ok_to_pad_channels) {
         jcp.oc = rnd_up(jcp.oc, jcp.oc_block);
         jcp.ic = rnd_up(jcp.ic, jcp.ic_block);
@@ -1420,14 +1401,28 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
 
     const auto &p = attr.post_ops_;
     jcp.with_sum = p.find(primitive_kind::sum) != -1;
+    const int eltwise_ind = p.find(primitive_kind::eltwise);
+    jcp.with_eltwise = eltwise_ind != -1;
+    if (jcp.with_eltwise) {
+        jcp.eltwise = p.entry_[eltwise_ind].eltwise;
+        if (dst_d.data_type() == data_type::s32) return status::unimplemented;
+    }
 
     auto src_format = jcp.is_1stconv
         ? pick(ndims - 3, ncw, nchw, ncdhw)
+        : ((jcp.simd_w == 4)
+            ? pick(ndims - 3, nCw4c, nChw4c, nCdhw4c)
+            : pick(ndims - 3, nCw16c, nChw16c, nCdhw16c));
+    auto dst_format = (jcp.simd_w == 4)
+        ? pick(ndims - 3, nCw4c, nChw4c, nCdhw4c)
         : pick(ndims - 3, nCw16c, nChw16c, nCdhw16c);
-    auto dst_format = pick(ndims - 3, nCw16c, nChw16c, nCdhw16c);
     auto wei_format = with_groups
-        ? pick(ndims - 3, gOIw16i16o, gOIhw16i16o, gOIdhw16i16o)
-        : pick(ndims - 3, OIw16i16o, OIhw16i16o, OIdhw16i16o);
+        ? ((jcp.simd_w == 4)
+            ? pick(ndims - 3, gOIw4i4o, gOIhw4i4o, gOIdhw4i4o)
+            : pick(ndims - 3, gOIw16i16o, gOIhw16i16o, gOIdhw16i16o))
+        : ((jcp.simd_w == 4)
+            ? pick(ndims - 3, OIw4i4o, OIhw4i4o, OIdhw4i4o)
+            : pick(ndims - 3, OIw16i16o, OIhw16i16o, OIdhw16i16o));
 
     if (src_d.format() == any)
         CHECK(src_pd.set_format(src_format));
@@ -1491,16 +1486,24 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
                 jcp.ver = ver_fma;
             if (jcp.ver == ver_4fma) {
                 const auto w_format = with_groups
-                    ? pick(ndims - 3, gOiw16o, gOihw16o, gOidhw16o)
-                    : pick(ndims - 3, Oiw16o, Oihw16o, Oidhw16o);
+                    ? ((jcp.simd_w == 4)
+                        ? pick(ndims - 3, gOiw4o, gOihw4o, gOidhw4o)
+                        : pick(ndims - 3, gOiw16o, gOihw16o, gOidhw16o))
+                    : ((jcp.simd_w == 4)
+                        ? pick(ndims - 3, Oiw4o, Oihw4o, Oidhw4o)
+                        : pick(ndims - 3, Oiw16o, Oihw16o, Oidhw16o));
                 if (weights_d.format() == any)
                     CHECK(weights_pd.set_format(w_format));
                 if (weights_d.format() != w_format)
                     return status::unimplemented;
             } else {
                 const auto w_format = with_groups
-                    ? pick(ndims - 3, gOwi16o, gOhwi16o, gOdhwi16o)
-                    : pick(ndims - 3, Owi16o, Ohwi16o, Odhwi16o);
+                    ? ((jcp.simd_w == 4)
+                        ? pick(ndims - 3, gOwi4o, gOhwi4o, gOdhwi4o)
+                        : pick(ndims - 3, gOwi16o, gOhwi16o, gOdhwi16o))
+                    : ((jcp.simd_w == 4)
+                        ? pick(ndims - 3, Owi4o, Ohwi4o, Odhwi4o)
+                        : pick(ndims - 3, Owi16o, Ohwi16o, Odhwi16o));
                 if (weights_d.format() == any)
                     CHECK(weights_pd.set_format(w_format));
                 if (weights_d.format() != w_format)
@@ -1561,10 +1564,25 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
         }
     }
 
+    /* Grouped channel offset to support 'non-blocked data' format for
+     * convolution sizes with '(input_channel / ngroups) < simd' */
+    jcp.nonblk_group_off
+            = (jcp.ngroups > 1 && one_of(src_d.format(), ncw, nchw, ncdhw)) ?
+            jcp.ic :
+            1;
+
     jcp.nb_ic = jcp.ic / jcp.ic_block;
     jcp.nb_oc = jcp.oc / jcp.oc_block;
     jcp.nb_ic_blocking = jcp.nb_oc_blocking = 1;
 
+    auto is_ow_threading_applicable = [=]() {
+        return (true && !jcp.is_1stconv && one_of(jcp.ndims, 3, 4)
+                && IMPLICATION(mayiuse(avx512_mic),
+                           jcp.ver == ver_4fma
+                                   && IMPLICATION(jcp.mb != 1,
+                                              jcp.ih == 1 && jcp.kh == 1)));
+    };
+
     if (jcp.ver == ver_4vnni) {
         jcp.kernel_kind = embd_bcast;
     }
@@ -1593,9 +1611,13 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
     }
 
     if (one_of(jcp.ver, ver_4vnni, ver_4fma) && !jcp.is_1stconv) {
-        if (jcp.kw == 3 && jcp.kh == 3 && jcp.ow == 7 && jcp.oh == 7) {
-            if (jcp.nb_oc % 2 == 0)
+        if ((jcp.kw <= 5 && jcp.kh <= 5 && jcp.kw == jcp.kh && jcp.ow <= 8
+                    && jcp.oh <= 8 && jcp.ow == jcp.oh)
+                || (jcp.stride_h != 1 && jcp.ur_w < jcp.ow)) {
+            if (jcp.nb_oc % 2 == 0) {
                 jcp.nb_oc_blocking = 2;
+                jcp.ur_w = nstl::min(jcp.ow, regs / jcp.nb_oc_blocking);
+            }
         } else {
             for (int i = jcp.nb_oc; i > 0; i--)
                 if (i * jcp.ur_w <= regs && jcp.nb_oc % i == 0) {
@@ -1603,15 +1625,74 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
                     break;
                 }
         }
-        if (jcp.ver == ver_4fma
-            && is_1D_conv(jcp) && one_of(jcp.ndims, 3, 4)) {
-            if (jcp.nb_oc % 2 == 0) {
+        if (jcp.ver == ver_4fma && is_ow_threading_applicable()) {
+            if (jcp.nb_oc % 2 == 0 && jcp.ur_w < jcp.ow
+                    && jcp.ow != 2 * jcp.ur_w) {
                 jcp.nb_oc_blocking = 2;
                 jcp.ur_w = nstl::min(jcp.ow, regs / jcp.nb_oc_blocking);
             }
         }
     }
 
+    jcp.ow_block = jcp.ow;
+
+    auto get_thr_eff = [=](int nb_oc_blocking, int ow_block) {
+        int nb_ow = div_up(jcp.ow, ow_block);
+        int nb_oc_chunks = div_up(jcp.nb_oc, nb_oc_blocking);
+        int work_amount = jcp.mb * jcp.oh * nb_oc_chunks * nb_ow;
+        float disbalance = (float)jcp.ow / rnd_up(jcp.ow, ow_block);
+        float thr_eff = disbalance * (float)work_amount
+            / rnd_up(work_amount, nthreads);
+        return thr_eff;
+    };
+
+    auto get_ow_block = [=](int nb_oc_blocking, int ur_w, float &eff) {
+        int res_ow_block = jcp.ow;
+        eff = get_thr_eff(nb_oc_blocking, res_ow_block);
+        if (!is_ow_threading_applicable())
+            return res_ow_block;
+
+        int L2_part = (get_cache_size(2) * 7 / 8) / typesize;
+        if (jcp.ver == ver_4fma)
+            L2_part /= 2;
+        int size_src_chunk = jcp.ic_block * ur_w * jcp.kh;
+        int size_dst_chunk = jcp.oc_block * nb_oc_blocking * ur_w;
+        int size_wei_chunk = jcp.oc_block * nb_oc_blocking * jcp.ic_block
+            * jcp.kw * jcp.kh;
+        int nurw_cache = (L2_part - 2 * size_wei_chunk)
+            / (2 * size_dst_chunk + 2 * size_src_chunk);
+        // current design of generate() requires ow_block >= 2 * ur_w
+        int ow_block_cache = ur_w * nstl::max(2, nurw_cache);
+
+        int ow_block_thr = ow_block_cache;
+        eff = get_thr_eff(nb_oc_blocking, ow_block_thr);
+
+        int max_nb_ow = div_up(jcp.ow, 2 * ur_w);
+        int start_nb_ow = div_up(jcp.ow, ow_block_thr);
+        for (int nb_ow = start_nb_ow; nb_ow <= max_nb_ow; nb_ow++) {
+            int ow_block
+                = nstl::min(rnd_up(div_up(jcp.ow, nb_ow), ur_w), jcp.ow);
+            float eff_threshold = (jcp.ver == ver_4fma) ? 0.8f : 0.9f;
+            if (ow_block < nb_oc_blocking * jcp.oc_block && eff > eff_threshold)
+                break;
+            if (div_up(jcp.ow, ow_block) != nb_ow)
+                continue;
+            float thr_eff = get_thr_eff(nb_oc_blocking, ow_block);
+            float eff_step = (jcp.ver == ver_4fma) ? 1.1f : 1.f;
+            if (ow_block >= 2 * ur_w && thr_eff > eff_step * eff) {
+                ow_block_thr = ow_block;
+                eff = thr_eff;
+            }
+            eff_threshold = (jcp.ver == ver_4fma) ? 0.9f : 0.98f;
+            if (eff > eff_threshold)
+                break;
+        }
+        res_ow_block = nstl::min(jcp.ow, nstl::max(2 * ur_w, ow_block_thr));
+        eff = get_thr_eff(nb_oc_blocking, res_ow_block);
+        return res_ow_block;
+    };
+
+
     if (jcp.ver == ver_fma && mayiuse(avx512_core)) {
         int try_nb_oc_blocking = 2;
         unsigned int ker_inp_size = typesize * div_up(jcp.iw, jcp.stride_w)
@@ -1629,7 +1710,6 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
             && !(jcp.kw == 3 && jcp.ow == 28 && jcp.ic >= 512);
 
         if (jcp.mb == 1) {
-            jcp.kernel_kind = embd_bcast;
             unsigned int inp_size = jcp.mb * div_up(jcp.ih, jcp.stride_h)
                     * div_up(jcp.iw, jcp.stride_w) * jcp.ic;
             unsigned int wei_size = jcp.ic * jcp.oc * jcp.kh * jcp.kw;
@@ -1662,59 +1742,52 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
                     }
                 }
             }
-        } else if (jcp.kw > 3
-            || (jcp.stride_w == 1 && jcp.stride_h == 1
-                && embd_bcast_condition)
-            || ((jcp.stride_w != 1 || jcp.stride_h != 1)
-                && ((jcp.mb <= 16 && (jcp.oc <= 192 || jcp.oh <= 10)
-                     && embd_bcast_condition)))
-            ) {
+        }
+
+        if (jcp.kw > 3
+                || (jcp.stride_w == 1 && jcp.stride_h == 1
+                           && embd_bcast_condition)
+                || ((jcp.stride_w != 1 || jcp.stride_h != 1)
+                           && ((jcp.mb <= 16 && (jcp.oc <= 192 || jcp.oh <= 10)
+                                      && embd_bcast_condition)))
+                || (jcp.mb == 1
+                           && (jcp.ur_w >= jcp.ow || jcp.is_1stconv
+                                      || (jcp.ow <= 147 && jcp.oc <= 96)))) {
             jcp.kernel_kind = embd_bcast;
             jcp.ur_w = nstl::min(jcp.ow, regs);
             jcp.nb_ic_blocking = jcp.nb_oc_blocking = 1;
             if (ker_total_size < L1_cache_size && jcp.ow <= 8 && jcp.kh <= 3
-                && jcp.kw <= 3) {
-                if (jcp.nb_oc % try_nb_oc_blocking == 0 && !jcp.is_1stconv) {
-                    jcp.nb_oc_blocking = try_nb_oc_blocking;
-                    jcp.ur_w = 31 / (jcp.nb_oc_blocking + 1);
-                    if (jcp.ow < jcp.ur_w)
-                        jcp.ur_w = jcp.ow;
-                }
+                    && jcp.kw <= 3 && jcp.nb_oc % try_nb_oc_blocking == 0
+                    && IMPLICATION(jcp.is_1stconv, jcp.mb == 1)
+                    && IMPLICATION(jcp.mb == 1, jcp.ur_w < jcp.ow)) {
+                jcp.nb_oc_blocking = try_nb_oc_blocking;
+                jcp.ur_w = nstl::min(jcp.ow, 31 / (jcp.nb_oc_blocking + 1));
             }
         } else {
             jcp.kernel_kind = expl_bcast;
             jcp.nb_ic_blocking = 1;
-            jcp.nb_oc_blocking = 4;
-            if (jcp.nb_oc < jcp.nb_oc_blocking) jcp.nb_oc_blocking = jcp.nb_oc;
-            if (jcp.nb_oc % jcp.nb_oc_blocking != 0)
-                for (int i = jcp.nb_oc_blocking; i > 0; i--)
+            if (IMPLICATION(jcp.is_1stconv, jcp.mb > 1)) {
+                float best_thr_eff = 0.f;
+                int best_nb_oc_blocking = 1;
+                for (int i = nstl::min(jcp.nb_oc, 5); i > 0; i--) {
                     if (jcp.nb_oc % i == 0) {
-                        jcp.nb_oc_blocking = i;
-                        break;
+                        float thr_eff;
+                        int ur_w = nstl::min(jcp.ow, 31 / (i + 1));
+                        get_ow_block(i, ur_w, thr_eff);
+                        if (thr_eff > 1.05f * best_thr_eff) {
+                            best_nb_oc_blocking = i;
+                            best_thr_eff = thr_eff;
+                        }
                     }
-            jcp.ur_w = 31 / (jcp.nb_oc_blocking + 1);
-            if (jcp.ow < jcp.ur_w)
-                jcp.ur_w = jcp.ow;
+                }
+                jcp.nb_oc_blocking = best_nb_oc_blocking;
+                jcp.ur_w = nstl::min(jcp.ow, 31 / (jcp.nb_oc_blocking + 1));
+            }
         }
     }
 
     jcp.ur_w_tail = jcp.ow % jcp.ur_w;
 
-    jcp.ow_block = jcp.ow;
-    if (is_ow_threading_available(jcp)) {
-        const int L1_part = get_cache_size(1) * 5 / 8;
-        int size_src_chunk = typesize * jcp.ic_block * jcp.ur_w;
-        int size_dst_chunk = typesize
-            * jcp.oc_block * jcp.nb_oc_blocking * jcp.ur_w;
-        int size_wei_chunk = typesize
-            * jcp.oc_block * jcp.ic_block * jcp.nb_oc_blocking * jcp.kw;
-        int nurw = (L1_part - size_wei_chunk)
-            / (size_dst_chunk + size_src_chunk);
-        // current design of generate() requires ow_block >= 2 * ur_w
-        jcp.ow_block = jcp.ur_w * nstl::max(2, nurw);
-    }
-    jcp.nb_ow = div_up(jcp.ow, jcp.ow_block);
-
     args_ok = true
         && jcp.l_pad <= jcp.ur_w
         && jcp.ic <= src_d.blocking_desc().padding_dims[1]
@@ -1734,10 +1807,14 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
 
     jcp.nb_ic_L2 = jcp.nb_ic;
 
+    float thr_eff;
+    jcp.ow_block = get_ow_block(jcp.nb_oc_blocking, jcp.ur_w, thr_eff);
+    jcp.nb_ow = div_up(jcp.ow, jcp.ow_block);
+
     const int L2_size = get_cache_size(2, true) / sizeof(float);
     // Source and output data needs to fit in L2,
     // leaving some space for weights and prefetching.
-    int h_L2 = int(((0.6f * L2_size) / simd_w
+    int h_L2 = int(((0.6f * L2_size) / jcp.simd_w
                            - nstl::min(0, jcp.kh - jcp.stride_h) * jcp.iw)
             / (jcp.stride_h * jcp.iw + jcp.ow));
     jcp.h_blocking = nstl::max(1, nstl::min(jcp.oh, h_L2));
@@ -1765,7 +1842,7 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
                     break;
                 }
             }
-        } else {
+        } else if (jcp.ic > 64) {
             jcp.nb_ic_L2 = 2; /* according to performance data*/
         }
     }
@@ -1773,6 +1850,12 @@ status_t jit_avx512_common_conv_fwd_kernel::init_conf(
     return status::success;
 }
 
+void jit_avx512_common_conv_fwd_kernel::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp) {
+    if (jcp.with_bias && jcp.oc != jcp.oc_without_padding)
+        scratchpad.book(key_conv_padded_bias, jcp.typesize_out * jcp.oc);
+}
+
 void jit_avx512_common_conv_bwd_data_kernel_f32::prepare_output(int ur_w)
 {
     for (int k = 0; k < jcp.nb_ic_blocking; k++) {
@@ -1826,7 +1909,7 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_4fma(
     int kw = jcp.kw;
     int ic_block = jcp.ic_block;
     int oc_block = jcp.oc_block;
-    Label kh_label, last_iter_label, loop_end_label, kd_label, skip_kd_loop;
+    Label kh_label, last_iter_label, loop_end_label, kd_label;
     int ker_load_number = 4;
     int shift_ker_ptr = typesize * kw * oc_block * ic_block;
     int shift_dst_ptr = typesize * ow * oc_block;
@@ -1857,8 +1940,6 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_4fma(
         }
     };
 
-    prepare_output(ur_w);
-
     if (one_of(jcp.ndims, 3, 4)) {
         mov(aux_reg_dst, reg_dst);
         mov(aux_reg_ker, reg_ker);
@@ -2004,13 +2085,10 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_4fma(
         dec(reg_ki);
         cmp(reg_ki, 0);
         jg(kd_label, T_NEAR);
-        L(skip_kd_loop);
 
         pop(reg_src);
         pop(reg_src_prf);
     }
-
-    store_output(ur_w);
 }
 
 void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_vnni(
@@ -2031,8 +2109,6 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_vnni(
         return jcp.typesize_in * (blk_offset + oc_offset);
     };
 
-    prepare_output(ur_w);
-
     mov(aux_reg_dst, reg_dst);
     mov(aux_reg_ker, reg_ker);
     mov(aux_reg_dst_prf, reg_dst_prf);
@@ -2108,15 +2184,12 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_vnni(
         cmp(reg_kj, 0);
         jg(kh_label, T_NEAR);
     }
-
-    store_output(ur_w);
 }
 
 void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_fma(
         int ur_w, int l_overflow, int r_overflow)
 {
-    Label kh_label, kd_label, skip_kd_loop;
-    Label store_output_label;
+    Label kh_label, kd_label;
     int kw = jcp.kw;
     int ow = jcp.ow;
 
@@ -2139,8 +2212,6 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_fma(
     int prf_inst_spacing = nstl::max(1, num_fmas / num_prfs);
     int prf_inst_trigger = (num_fmas % prf_inst_spacing) / 2;
 
-    prepare_output(ur_w);
-
     if (one_of(jcp.ndims, 3, 4)) {
         mov(aux_reg_dst, reg_dst);
         mov(aux_reg_ker, reg_ker);
@@ -2154,9 +2225,6 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_fma(
         push(reg_src);
 
         mov(reg_ki, ptr[param + GET_OFF(kd_padding)]);
-        cmp(reg_ki, 0);
-        je(store_output_label, T_NEAR);
-
         mov(aux_reg_dst_d, reg_dst);
         mov(aux_reg_ker_d, ptr[param + GET_OFF(filt)]);
         mov(aux_reg_dst_d_prf, reg_dst_prf);
@@ -2167,8 +2235,6 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_fma(
     } else {
         mov(reg_kj, reg_kh);
     }
-    cmp(reg_kj, 0);
-    je(store_output_label, T_NEAR);
 
     if (jcp.ndims == 5) {
         mov(aux_reg_dst, aux_reg_dst_d);
@@ -2268,16 +2334,12 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_fma(
         dec(reg_ki);
         cmp(reg_ki, 0);
         jg(kd_label, T_NEAR);
-        L(skip_kd_loop);
     }
 
-    L(store_output_label); {
-        if (jcp.ndims == 5)
-        {
-            pop(reg_src);
-            pop(reg_src_prf);
-        }
-        store_output(ur_w);
+    if (jcp.ndims == 5)
+    {
+        pop(reg_src);
+        pop(reg_src_prf);
     }
 }
 
@@ -2291,7 +2353,7 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_fma_core(
     int ic_block = jcp.ic_block;
     int oc_block = jcp.oc_block;
     int nb_ic_block = jcp.nb_ic_blocking;
-    Label kh_label, skip_kh_loop, kd_label, skip_kd_loop;
+    Label kh_label, kd_label;
 
     int shift_ker_ptr = typesize * kw * oc_block * ic_block;
     int shift_dst_ptr = typesize * (jcp.dilate_h + 1) * ow * oc_block;
@@ -2307,8 +2369,6 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_fma_core(
         return typesize * (blk_offset + oc_offset);
     };
 
-    prepare_output(ur_w);
-
     if (one_of(jcp.ndims, 3, 4)) {
         mov(aux_reg_dst, reg_dst);
         mov(aux_reg_ker, reg_ker);
@@ -2327,8 +2387,6 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_fma_core(
     } else {
         mov(reg_kj, reg_kh);
     }
-    cmp(reg_kj, 0);
-    je(skip_kh_loop, T_NEAR);
 
     if (jcp.ndims == 5) {
         mov(aux_reg_dst, aux_reg_dst_d);
@@ -2370,7 +2428,6 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_fma_core(
         cmp(reg_kj, 0);
         jg(kh_label, T_NEAR);
     }
-    L(skip_kh_loop);
 
     if (jcp.ndims == 5) {
         sub(aux_reg_dst_d,
@@ -2380,19 +2437,29 @@ void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop_fma_core(
         dec(reg_ki);
         cmp(reg_ki, 0);
         jg(kd_label, T_NEAR);
-        L(skip_kd_loop);
 
         pop(reg_src);
         pop(reg_src_prf);
     }
-
-    store_output(ur_w);
 }
 
 inline void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop(
         int ur_w, int l_overflow, int r_overflow)
 {
     if (jcp.ndims == 5) push(reg_oi);
+
+    prepare_output(ur_w);
+
+    Label skip_compute_loop;
+    if (jcp.ndims == 5) {
+        mov(reg_kj, ptr[param + GET_OFF(kd_padding)]);
+        cmp(reg_kj, 0);
+        je(skip_compute_loop, T_NEAR);
+    }
+    mov(reg_kj, ptr[param + GET_OFF(kh_padding)]);
+    cmp(reg_kj, 0);
+    je(skip_compute_loop, T_NEAR);
+
     if (jcp.ver == ver_4vnni || jcp.ver == ver_vnni)
         compute_loop_vnni(ur_w, l_overflow, r_overflow);
     else if (jcp.ver == ver_4fma)
@@ -2407,6 +2474,9 @@ inline void jit_avx512_common_conv_bwd_data_kernel_f32::compute_loop(
               compute_loop_fma_core(ur_w, l_overflow, r_overflow);
     else
         assert("!unknown convolution version");
+
+    L(skip_compute_loop);
+    store_output(ur_w);
     if (jcp.ndims == 5) pop(reg_oi);
 }
 
@@ -2504,7 +2574,9 @@ status_t jit_avx512_common_conv_bwd_data_kernel_f32::init_conf(
 {
     if (!mayiuse(avx512_common)) return status::unimplemented;
 
-    const int simd_w = cpu_isa_traits<avx512_common>::vlen / sizeof(float);
+    jcp = zero<decltype(jcp)>();
+
+    jcp.simd_w = cpu_isa_traits<avx512_common>::vlen / sizeof(float);
     const bool with_groups = weights_d.ndims() == diff_src_d.ndims() + 1;
     int ndims = diff_src_d.ndims();
 
@@ -2556,8 +2628,8 @@ status_t jit_avx512_common_conv_bwd_data_kernel_f32::init_conf(
 
     jcp.is_1stconv = false;
 
-    jcp.oc_block = simd_w;
-    jcp.ic_block = jcp.is_1stconv ? jcp.ic : simd_w;
+    jcp.oc_block = jcp.simd_w;
+    jcp.ic_block = jcp.is_1stconv ? jcp.ic : jcp.simd_w;
 
     bool ok_to_pad_channels = true
         && jcp.ngroups == 1
@@ -2777,8 +2849,15 @@ status_t jit_avx512_common_conv_bwd_data_kernel_f32::init_conf(
         && jcp.oc <= diff_dst_d.blocking_desc().padding_dims[1]
         && jcp.ic <= weights_d.blocking_desc().padding_dims[with_groups + 1]
         && jcp.oc <= weights_d.blocking_desc().padding_dims[with_groups + 0];
+    if (!args_ok) return status::unimplemented;
 
-    return args_ok ? status::success : status::unimplemented;
+    return status::success;
+}
+
+void jit_avx512_common_conv_bwd_data_kernel_f32::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp) {
+    UNUSED(scratchpad);
+    UNUSED(jcp);
 }
 
 const int jit_avx512_common_conv_bwd_weights_kernel_f32::max_ur_w = 28;
@@ -4464,13 +4543,10 @@ void jit_avx512_common_conv_bwd_weights_kernel_f32::generate()
 status_t jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf(
     jit_conv_conf_t &jcp, const convolution_desc_t &cd,
     cpu_memory_t::pd_t &src_pd, cpu_memory_t::pd_t &diff_weights_pd,
-    cpu_memory_t::pd_t &diff_bias_pd, cpu_memory_t::pd_t &diff_dst_pd)
-{
+    cpu_memory_t::pd_t &diff_bias_pd, cpu_memory_t::pd_t &diff_dst_pd) {
     if (!mayiuse(avx512_common))
         return status::unimplemented;
 
-    const int simd_w = cpu_isa_traits<avx512_common>::vlen / sizeof(float);
-
     const memory_desc_wrapper src_d(&src_pd);
     const memory_desc_wrapper diff_weights_d(&diff_weights_pd);
     const memory_desc_wrapper diff_bias_d(&diff_bias_pd);
@@ -4480,6 +4556,8 @@ status_t jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf(
     int ndims = src_d.ndims();
 
     jcp = zero<decltype(jcp)>();
+
+    jcp.simd_w = cpu_isa_traits<avx512_common>::vlen / sizeof(float);
     jcp.ndims = ndims;
     jcp.prop_kind = cd.prop_kind;
 
@@ -4545,14 +4623,14 @@ status_t jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf(
     /* check for the 1st convolution */
     jcp.is_1stconv = is_1stconv(jcp);
 
-    jcp.oc_block = simd_w;
+    jcp.oc_block = jcp.simd_w;
 
     bool ok_to_pad_channels = true
         && jcp.ngroups == 1
         && src_d.data_type() == data_type::f32;
 
     if (ok_to_pad_channels)
-        jcp.oc = rnd_up(jcp.oc, simd_w);
+        jcp.oc = rnd_up(jcp.oc, jcp.simd_w);
 
     if (jcp.oc % jcp.oc_block)
         return status::unimplemented;
@@ -4628,7 +4706,7 @@ status_t jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf(
             && everyone_is(0, jcp.l_pad, jcp.r_pad, jcp.t_pad, jcp.b_pad)
             && jcp.kw <= 28 - jcp.with_bias
             && jcp.stride_w == 4
-            && tr_ld / simd_w <= 4 /* [bwd_w:tr_src:r1] */
+            && tr_ld / jcp.simd_w <= 4 /* [bwd_w:tr_src:r1] */
             && IMPLICATION(jcp.with_bias, kh_step_rem == 1) /* [bwd_w:b:r1] */
             && IMPLICATION(diff_weights_d.format() != any,
                     diff_weights_d.format() == want_4fma_wfmt);
@@ -4667,7 +4745,7 @@ status_t jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf(
         if (!ok)
             return status::unimplemented;
 
-        jcp.ic_block = simd_w;
+        jcp.ic_block = jcp.simd_w;
         if (ok_to_pad_channels)
             jcp.ic = rnd_up(jcp.ic, jcp.ic_block);
         jcp.nb_ic = jcp.ic / jcp.ic_block;
@@ -4735,10 +4813,209 @@ status_t jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf(
         && jcp.oc <= diff_dst_d.blocking_desc().padding_dims[1]
         && jcp.ic <= diff_weights_d.blocking_desc().padding_dims[with_groups + 1]
         && jcp.oc <= diff_weights_d.blocking_desc().padding_dims[with_groups + 0];
+    if (!args_ok) return status::unimplemented;
+
+    {   // balancing
+        int nthr, nthr_mb, nthr_g, nthr_oc_b, nthr_ic_b;
+        balance(jcp, nthr, nthr_mb, nthr_g, nthr_oc_b, nthr_ic_b);
+        jcp.nthr = nthr;
+        jcp.nthr_mb = nthr_mb;
+        jcp.nthr_g = nthr_g;
+        jcp.nthr_oc_b = nthr_oc_b;
+        jcp.nthr_ic_b = nthr_ic_b;
+    }
+
+    return status::success;
+}
+
+void jit_avx512_common_conv_bwd_weights_kernel_f32::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp) {
+    if (utils::one_of(jcp.ver, ver_4fma, ver_4vnni, ver_vnni)) {
+        if (jcp.is_1stconv) {
+            const size_t tr_src_size =
+                jcp.nthr / jcp.nthr_oc_b * jcp.ih * jcp.stride_w * jcp.tr_ld;
+            scratchpad.book(key_conv_tr_src, jcp.typesize_in * tr_src_size);
+        } else {
+            // XXX: See the comment about tr_iw and guarding elements in
+            // jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf()
+            const size_t max_nthr = jcp.nthr_mb * jcp.ngroups * jcp.nb_ic;
+            const size_t min_tr_src_size_per_thr
+                = jcp.ih * jcp.ic_block * jcp.tr_iw;
+            const size_t tr_src_size = max_nthr * min_tr_src_size_per_thr
+                + jcp.tr_src_num_guard_elems;
+            scratchpad.book(key_conv_tr_src, jcp.typesize_in * tr_src_size);
+        }
+
+        /* prepare synchronization contexts */
+        if (jcp.nthr_oc_b > 1) {
+            const int tr_src_bctx_size = jcp.nthr / jcp.nthr_oc_b;
+            scratchpad.book(key_conv_tr_src_bctx,
+                    sizeof(simple_barrier::ctx_t) * tr_src_bctx_size);
+        }
+
+        if (utils::one_of(jcp.ver, ver_4vnni, ver_vnni)) {
+            const size_t tr_diff_dst_size = jcp.nthr_mb * jcp.ngroups
+                * jcp.nb_oc * jcp.oc_block * jcp.tr_ow * jcp.oh;
+            scratchpad.book(key_conv_tr_diff_dst,
+                    jcp.typesize_in * tr_diff_dst_size);
+
+            /* prepare synchronization contexts */
+            if (jcp.nthr_ic_b > 1) {
+                const size_t tr_diff_dst_bctx_size = jcp.nthr / jcp.nthr_ic_b;
+                scratchpad.book(key_conv_tr_diff_dst_bctx,
+                        sizeof(simple_barrier::ctx_t) * tr_diff_dst_bctx_size);
+            }
+        }
+    }
+
+    if (jcp.nthr_mb > 1) {
+        const int wei_size = jcp.ngroups * jcp.oc * jcp.ic
+            * jcp.kh * jcp.kw * jcp.kd;
+        const int bia_size = jcp.ngroups * jcp.oc;
+        const size_t wei_bia_reduction_size = wei_size + bia_size;
+
+        scratchpad.book(key_conv_wei_bia_reduction,
+                jcp.typesize_out * wei_bia_reduction_size * (jcp.nthr_mb - 1));
+        scratchpad.book(key_conv_wei_bia_reduction_bctx,
+                sizeof(simple_barrier::ctx_t));
+    }
+
+    if (jcp.with_bias && jcp.oc != jcp.oc_without_padding)
+        scratchpad.book(key_conv_padded_bias, jcp.typesize_out * jcp.oc);
+}
 
-    return args_ok ? status::success : status::unimplemented;
+void jit_avx512_common_conv_bwd_weights_kernel_f32::balance(
+        const jit_conv_conf_t &j, int &nthr_, int &nthr_mb_, int &nthr_g_,
+        int &nthr_oc_b_, int &nthr_ic_b_)
+{
+    nthr_ = nthr_mb_ = nthr_g_ = nthr_oc_b_ = nthr_ic_b_ = 1;
+
+    const int max_threads = mkldnn_get_max_threads();
+
+    if (max_threads < j.ngroups) {
+        /* simplification... fortunately it doesn't hurt much */
+        return;
+    }
+
+    if (!mkldnn_thr_syncable()
+            && utils::one_of(j.ver, ver_4fma, ver_4vnni, ver_vnni)) {
+        // should not happen -- the driver is not ready
+        // for TBB-like non-synchronous threading yet
+        return;
+    }
+
+    if (j.ver == ver_4fma && j.is_1stconv) {
+        nthr_g_ = 1;
+        nthr_oc_b_ = 1;
+        nthr_ic_b_ = nstl::min(j.nb_ic, max_threads);
+        nthr_mb_ = nstl::min(max_threads / nthr_ic_b_, j.mb);
+        nthr_ = nthr_mb_ * nthr_oc_b_ * nthr_ic_b_ * nthr_g_;
+        return;
+    }
+
+    nthr_g_ = j.ngroups;
+    const int nthr = max_threads / nthr_g_;
+
+    auto calc_mem_cost = [=](int nthr_mb, int nthr_oc_b, int nthr_ic_b) {
+        /* calculate per thread memory cost (read/write). high level optimizer
+         * tries to minimize memory consumption. few notes:
+         *  (n1) unclear why, but that essentially helps first convolution...
+         *  (n2) assuming the reduction over minibatch is always there:
+         *    - instead of 8 it should be 5 here (write ~= 2 read):
+         *      kernel: temporal workspace 1 write
+         *      reduction: 1 read from workspace and 1 write to the diff_wei
+         *    - but experiments showed 8 works better than 5 or 6... */
+
+        const int src_coef = j.ver == ver_4fma || j.ver == ver_vnni ? 4 : 1;
+        const int dst_coef = 1;
+        const int wei_coef = j.ver == ver_vnni ? 4 : 8;
+
+        return 0
+            + src_coef
+            * div_up(j.mb, nthr_mb) * div_up(j.ngroups, nthr_g_)
+            * div_up(j.nb_ic, nthr_ic_b) * j.ic_block * j.ih * j.iw * j.id
+            / j.stride_d / j.stride_h / j.stride_w /* (n1) */
+            + dst_coef
+            * div_up(j.mb, nthr_mb) * div_up(j.ngroups, nthr_g_)
+            * div_up(j.nb_oc, nthr_oc_b) * j.oc_block * j.oh * j.ow * j.od
+            + wei_coef /* (n2) */
+            * div_up(j.ngroups, nthr_g_)
+            * div_up(j.nb_oc, nthr_oc_b) * div_up(j.nb_ic, nthr_ic_b)
+            * j.kh * j.kw * j.kd * j.ic_block * j.oc_block;
+    };
+
+    int best_mem_cost = calc_mem_cost(nthr_mb_, nthr_oc_b_, nthr_ic_b_);
+
+    /* step 1: find the best thread distribution with lowest memory cost */
+    const int nthr_mb_max = nstl::min(nthr, j.mb * j.od);
+    for (int nthr_mb = 1; nthr_mb <= nthr_mb_max; ++nthr_mb) {
+        const int nthr_par = nthr / nthr_mb;
+        const int nthr_oc_b_max = nstl::min(nthr_par, j.nb_oc);
+        for (int nthr_oc_b = 1; nthr_oc_b <= nthr_oc_b_max; ++nthr_oc_b) {
+            int nthr_ic_b = nstl::min(nthr_par / nthr_oc_b, j.nb_ic);
+
+            int mem_cost = calc_mem_cost(nthr_mb, nthr_oc_b, nthr_ic_b);
+            if (mem_cost <= best_mem_cost) {
+                best_mem_cost = mem_cost;
+                nthr_mb_ = nthr_mb;
+                nthr_oc_b_ = nthr_oc_b;
+                nthr_ic_b_ = nthr_ic_b;
+            }
+        }
+
+        if (!mkldnn_thr_syncable()) { assert(nthr_mb == 1); break; }
+    }
+
+    if (j.ver != ver_vnni && !mayiuse(avx512_mic)) {
+        auto calc_comp_cost = [=](int nthr_mb, int nthr_oc_b, int nthr_ic_b) {
+            return 1
+                * div_up(j.mb, nthr_mb)
+                * div_up(j.ngroups, nthr_g_)
+                * div_up(j.nb_oc, nthr_oc_b)
+                * div_up(j.nb_ic, nthr_ic_b);
+        };
+
+        /* step 2: search for a thread distribution with lower compute cost.
+         * the constrains:
+         *  - memory cost cannot exceed 110% of the best found in the step 1
+         *  - unless compute cost is 133% lower than the current best case
+         * note: both constants were found empirically */
+        int best_comp_cost = calc_comp_cost(nthr_mb_, nthr_oc_b_, nthr_ic_b_);
+        for (int nthr_mb = 1; nthr_mb <= nthr_mb_max; ++nthr_mb) {
+            const int nthr_par = nthr / nthr_mb;
+            const int nthr_oc_b_max = nstl::min(nthr_par, j.nb_oc);
+            for (int nthr_oc_b = 1; nthr_oc_b <= nthr_oc_b_max; ++nthr_oc_b) {
+                int nthr_ic_b = nstl::min(nthr_par / nthr_oc_b, j.nb_ic);
+                int mem_cost = calc_mem_cost(nthr_mb, nthr_oc_b, nthr_ic_b);
+                int comp_cost = calc_comp_cost(nthr_mb, nthr_oc_b, nthr_ic_b);
+
+                const bool opt1 = comp_cost <= best_comp_cost
+                    && mem_cost < 1.1 * best_mem_cost;
+                const bool opt2 = 4 * comp_cost <= 3 * best_comp_cost;
+
+                if (opt1 || opt2) {
+                    best_comp_cost = comp_cost;
+                    nthr_mb_ = nthr_mb;
+                    nthr_oc_b_ = nthr_oc_b;
+                    nthr_ic_b_ = nthr_ic_b;
+                }
+            }
+
+            if (!mkldnn_thr_syncable()) { assert(nthr_mb == 1); break; }
+        }
+    }
+
+    if (nthr_mb_ > max_threads/2 && nthr_mb_ < max_threads)
+        nthr_mb_ = nstl::min(j.mb * j.od, max_threads);
+    nthr_ = nthr_mb_ * nthr_g_ * nthr_oc_b_ * nthr_ic_b_;
+
+    assert(nthr_ <= max_threads);
+    assert(IMPLICATION(!mkldnn_thr_syncable(), nthr_mb_ == 1));
 }
 
+template struct  _jit_avx512_common_conv_fwd_kernel<Zmm>;
+template struct  _jit_avx512_common_conv_fwd_kernel<Xmm>;
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_kernel.hpp
index ec6e18599..4641292d7 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_kernel.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_kernel.hpp
@@ -18,8 +18,9 @@
 #define JIT_AVX512_COMMON_CONV_KERNEL_F32_HPP
 
 #include "c_types_map.hpp"
-#include "cpu_memory.hpp"
+#include "memory_tracking.hpp"
 
+#include "cpu_memory.hpp"
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
 #include "jit_uni_eltwise.hpp"
@@ -29,16 +30,18 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-struct jit_avx512_common_conv_fwd_kernel : public jit_generator {
+template<typename Vmm>
+struct _jit_avx512_common_conv_fwd_kernel : public jit_generator {
 
-    jit_avx512_common_conv_fwd_kernel(jit_conv_conf_t ajcp,
-            const primitive_attr_t &attr) : jcp(ajcp), attr_(attr)
+    _jit_avx512_common_conv_fwd_kernel(jit_conv_conf_t ajcp,
+            const primitive_attr_t &attr)
+        : jcp(ajcp), attr_(attr)
     {
         generate();
-        jit_ker = (void (*)(jit_conv_call_s *))getCode();
+        jit_ker_ = (void (*)(jit_conv_call_s *))getCode();
     }
 
-    ~jit_avx512_common_conv_fwd_kernel() {
+    ~_jit_avx512_common_conv_fwd_kernel() {
         for (auto inj : eltwise_injectors)
             delete inj;
         eltwise_injectors.clear();
@@ -48,24 +51,11 @@ struct jit_avx512_common_conv_fwd_kernel : public jit_generator {
         depthwise_injectors.clear();
     }
 
-    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_common_conv_fwd_kernel)
-
-    static bool post_ops_ok(jit_conv_conf_t &jcp,
-            const primitive_attr_t &attr);
-    static status_t init_conf(jit_conv_conf_t &jcp,
-            const convolution_desc_t &cd,
-            cpu_memory_t::pd_t &src_pd,
-            cpu_memory_t::pd_t &weights_pd,
-            cpu_memory_t::pd_t &dst_pd,
-            cpu_memory_t::pd_t &bias_pd,
-            const primitive_attr_t &attr,
-            int nthreads,
-            bool with_relu,
-            float relu_negative_slope);
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(_jit_avx512_common_conv_fwd_kernel)
 
     jit_conv_conf_t jcp;
     const primitive_attr_t &attr_;
-    void (*jit_ker)(jit_conv_call_s *);
+    void (*jit_ker_)(jit_conv_call_s *);
 
 private:
     using reg64_t = const Xbyak::Reg64;
@@ -121,25 +111,25 @@ private:
     reg64_t reg_long_offt = r11;
     reg64_t reg_out_long_offt = r14;
 
-    inline Xbyak::Zmm zmm_ker(int i_ic) {
+    inline Vmm vmm_ker(int i_ic) {
         assert(i_ic < 4);
-        return Xbyak::Zmm(ker_reg_base_idx + i_ic);
+        return Vmm(ker_reg_base_idx + i_ic);
     }
 
-    inline Xbyak::Zmm zmm_out(int i_ur, int i_oc) {
+    inline Vmm vmm_out(int i_ur, int i_oc) {
         int idx = i_ur + i_oc * jcp.ur_w;
         assert(idx < ker_reg_base_idx);
-        return Xbyak::Zmm(idx);
+        return Vmm(idx);
     }
 
-    inline Xbyak::Zmm zmm_inp(int i_ic, int nb_x_blocking) {
+    inline Vmm vmm_inp(int i_ic, int nb_x_blocking) {
         int idx = i_ic + nb_x_blocking * jcp.ur_w;
         assert(idx < 31);
-        return Xbyak::Zmm(idx);
+        return Vmm(idx);
     }
 
     Xbyak::Reg64 imm_addr64 = r15;
-    Xbyak::Zmm zmm_wei = Xbyak::Zmm(31);
+    Vmm vmm_wei = Vmm(31);
 
     reg64_t reg_d_weights = imm_addr64;
     reg64_t reg_d_bias = reg_kj;
@@ -158,35 +148,11 @@ private:
 
     void generate();
 
-    inline void vpXdpwssd(Xbyak::Zmm zmm1, Xbyak::Zmm zmm2,
-        const Xbyak::Address& op) {
-        if (jcp.ver == ver_4vnni)
-            vp4dpwssd(zmm1, zmm2, op);
-        else
-            vpdpwssd(zmm1, zmm2, op);
-    }
-
-    inline void vadd(Xbyak::Zmm zmm, const Xbyak::Operand& op) {
+    inline void vadd(Vmm vmm, const Xbyak::Operand& op) {
         if (jcp.ver == ver_4vnni || jcp.ver == ver_vnni)
-            vpaddd(zmm, zmm, op);
+            vpaddd(vmm, vmm, op);
         else
-            vaddps(zmm, zmm, op);
-    }
-
-    inline void vcmp(Xbyak::Opmask kmask,
-        Xbyak::Zmm zmm_src1, Xbyak::Zmm zmm_src2, const unsigned char cmp) {
-        if (jcp.ver == ver_4vnni || jcp.ver == ver_vnni)
-            vpcmpd(kmask, zmm_src1, zmm_src2, cmp);
-        else
-            vcmpps(kmask, zmm_src1, zmm_src2, cmp);
-    }
-
-    inline void vmul(Xbyak::Zmm zmm_dst, Xbyak::Opmask kmask,
-                     Xbyak::Zmm zmm_src1, Xbyak::Zmm zmm_src2) {
-        if (jcp.ver == ver_4vnni || jcp.ver == ver_vnni)
-            vpmulld(zmm_dst | kmask, zmm_src1, zmm_src2);
-        else
-            vmulps(zmm_dst | kmask, zmm_src1, zmm_src2);
+            vaddps(vmm, vmm, op);
     }
 
     inline size_t get_output_offset(int oi, int n_oc_block) {
@@ -224,6 +190,59 @@ private:
     }
 };
 
+struct jit_avx512_common_conv_fwd_kernel {
+
+    jit_avx512_common_conv_fwd_kernel(jit_conv_conf_t ajcp,
+        const primitive_attr_t &attr) :
+        jit_ker(nullptr),
+        zmm_kernel_(nullptr),
+        xmm_kernel_(nullptr) {
+        int ch_block = ajcp.is_depthwise ? ajcp.ch_block : ajcp.oc_block;
+        switch (ch_block) {
+        case 16:
+            zmm_kernel_ =
+                new _jit_avx512_common_conv_fwd_kernel<Xbyak::Zmm>(
+                    ajcp, attr);
+            jit_ker = zmm_kernel_->jit_ker_;
+            return;
+        case 4:
+            xmm_kernel_ =
+                new _jit_avx512_common_conv_fwd_kernel<Xbyak::Xmm>(
+                    ajcp, attr);
+            jit_ker = xmm_kernel_->jit_ker_;
+            return;
+        default:
+            assert(!"invalid channel blocking");
+        }
+    }
+
+    ~jit_avx512_common_conv_fwd_kernel() {
+        delete xmm_kernel_;
+        delete zmm_kernel_;
+    }
+
+    enum {
+        typesize = sizeof(float)
+    };
+
+    static bool post_ops_ok(jit_conv_conf_t &jcp,
+        const primitive_attr_t &attr);
+    static status_t init_conf(jit_conv_conf_t &jcp,
+        const convolution_desc_t &cd,
+        cpu_memory_t::pd_t &src_pd,
+        cpu_memory_t::pd_t &weights_pd,
+        cpu_memory_t::pd_t &dst_pd,
+        cpu_memory_t::pd_t &bias_pd,
+        const primitive_attr_t &attr,
+        int nthreads);
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+        const jit_conv_conf_t &jcp);
+
+    void(*jit_ker)(jit_conv_call_s *);
+    _jit_avx512_common_conv_fwd_kernel<Xbyak::Zmm> *zmm_kernel_;
+    _jit_avx512_common_conv_fwd_kernel<Xbyak::Xmm> *xmm_kernel_;
+};
+
 struct jit_avx512_common_conv_bwd_data_kernel_f32: public jit_generator {
 
     jit_avx512_common_conv_bwd_data_kernel_f32(jit_conv_conf_t ajcp): jcp(ajcp)
@@ -239,6 +258,8 @@ struct jit_avx512_common_conv_bwd_data_kernel_f32: public jit_generator {
             const memory_desc_wrapper &diff_src_d,
             const memory_desc_wrapper &weights_d,
             const memory_desc_wrapper &diff_dst_d);
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp);
 
     jit_conv_conf_t jcp;
     void (*jit_ker)(jit_conv_call_s *);
@@ -358,6 +379,8 @@ struct jit_avx512_common_conv_bwd_weights_kernel_f32 : public jit_generator {
             const convolution_desc_t &cd, cpu_memory_t::pd_t &src_pd,
             cpu_memory_t::pd_t &diff_weights_pd,
             cpu_memory_t::pd_t &diff_bias_pd, cpu_memory_t::pd_t &diff_dst_pd);
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp);
 
     jit_conv_conf_t jcp;
     void (*jit_ker)(jit_conv_call_s *);
@@ -423,6 +446,9 @@ private:
     inline void compute_loop();
 
     void generate();
+
+    static void balance(const jit_conv_conf_t &j, int &nthr, int &nthr_mb,
+            int &nthr_g, int &nthr_oc_b, int &nthr_ic_b);
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp
index 0405eee0e..63cd07471 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.cpp
@@ -66,6 +66,15 @@ int get_divisor_satisfying_cond(jit_conv_winograd_conf_t &jcp, int number,
     return best_divisor;
 }
 
+namespace {
+bool is_winograd_faster_than_direct(const jit_conv_winograd_conf_t &jcp) {
+    if (jcp.ver == ver_4fma)
+        return jcp.mb >= 32;
+    else
+        return jcp.mb >= 16;
+}
+}
+
 /* assumes 512 bits registers */
 /* TODO: add support for strides */
 /* TODO: handle the prefetch distance automatically */
@@ -137,29 +146,6 @@ private:
 };
 
 // utilities to support kernel parameter selection
-bool check_L2_block_per_thread(jit_conv_winograd_conf_t &jcp,
-        int dimN_block, float C2_min, float C2_max) {
-    /* V_L2_block + M_L2_block + W */
-    float block_size = (alpha * alpha * (jcp.oc + jcp.ic)
-                     * dimN_block * jcp.dimN_reg_block
-                     + jcp.ic * jcp.oc) * (float)sizeof(float);
-    float L2_lb = C2_min * L2_cache_size;
-    float L2_ub =  C2_max * L2_cache_size;
-    return (block_size > L2_lb && block_size < L2_ub);
-}
-
-bool check_L1_block_gemm(jit_conv_winograd_conf_t &jcp, int dimK_block,
-        int dimM_block, float C1_min, float C1_max) {
-    float gemm_block_size = (dimM_block * jcp.dimM_simd_block * dimK_block
-                             * jcp.dimK_reg_block
-                     + dimK_block * jcp.dimK_reg_block * jcp.dimN_reg_block
-                     + dimM_block * jcp.dimM_simd_block * jcp.dimN_reg_block)
-                     * (float)sizeof(float);
-    float L1_lb = C1_min * L1_cache_size;
-    float L1_ub = C1_max * L1_cache_size;
-    return (gemm_block_size > L1_lb && gemm_block_size < L1_ub);
-}
-
 bool check_cond1(int dimN_reg_block, int dimK_block, int dimK_reg_block,
         int dimM_block, int dimM_simd_block, float C)
 {
@@ -311,10 +297,8 @@ void _jit_avx512_common_conv_winograd_data_kernel_f32::gemm_loop_generate(
             auto store_output = [=](bool output_is_aligned) {
                 for (int tile = 0; tile < jcp.dimN_reg_block; tile++) {
                     Zmm zmm(jcp.zmm_start + tile);
-                    // In W_SGD, output will be reused.
                     if (output_is_aligned
                         && jcp.dimK_nb_block == 1
-                        && jcp.sched_policy == WSCHED_DATA_W_S_G_D
                         && (jcp.dimN * jcp.dimM * alpha * alpha
                             * sizeof(float) > 2 * LLC_data_size))
                         vmovntps(zword[reg_dstC + 64 * tile], zmm);
@@ -359,15 +343,17 @@ status_t _jit_avx512_common_conv_winograd_data_kernel_f32::init_conf_common(
         const memory_desc_wrapper &dst_d)
 {
 
-    if (!mayiuse(avx512_common))
+    if (mayiuse(avx512_core))
+        return status::unimplemented;
+    else if (!mayiuse(avx512_common))
         return status::unimplemented;
-    else if (mayiuse(avx512_core))
-        jcp.ver = ver_avx512_core;
     else if (mayiuse(avx512_mic_4ops))
         jcp.ver = ver_4fma;
     else
         jcp.ver = ver_fma;
 
+    jcp.nthr = mkldnn_get_max_threads();
+
     const bool with_groups = weights_d.ndims() == src_d.ndims() + 1;
 
     jcp.ngroups = with_groups ? weights_d.dims()[0] : 1;
@@ -402,6 +388,10 @@ status_t _jit_avx512_common_conv_winograd_data_kernel_f32::init_conf_common(
         jcp.ic = rnd_up(jcp.ic, simd_w);
     }
 
+    if (!IMPLICATION(cd.alg_kind == alg_kind::convolution_auto,
+                is_winograd_faster_than_direct(jcp)))
+        return status::unimplemented;
+
     // Checking conditions not supported by these kernels
     if (jcp.ngroups != 1)
         return status::unimplemented;
@@ -431,83 +421,6 @@ status_t _jit_avx512_common_conv_winograd_data_kernel_f32::init_conf_common(
     return status::success;
 }
 
-status_t set_wsched_DATA_W_SGD_avx512_common(jit_conv_winograd_conf_t &jcp) {
-
-    if (jcp.ver != ver_avx512_core)
-        return status::unimplemented;
-
-    /* ----------- dimN reg block ---------------------*/
-    auto test_cond_dimN_reg_block = [](jit_conv_winograd_conf_t &jcp,
-            int dimN_reg_block, int current_best) {
-        return (dimN_reg_block >= MIN_REQUIRED_DIMN_REG_BLOCK)
-            && (dimN_reg_block <= jcp.nb_reg)
-            && (dimN_reg_block < current_best);
-    };
-
-    jcp.dimN_reg_block = get_divisor_satisfying_cond(
-            jcp, jcp.dimN, jcp.dimN, test_cond_dimN_reg_block);
-
-    if (jcp.dimN_reg_block >= jcp.nb_reg) {
-        auto test_cond_dimN_reg_block = [](jit_conv_winograd_conf_t &jcp,
-                int dimN_reg_block, int current_best) {
-            return (dimN_reg_block < jcp.nb_reg)
-                    && (dimN_reg_block > current_best);
-        };
-
-        jcp.dimN_reg_block = get_divisor_satisfying_cond(
-                jcp, jcp.dimN, 1, test_cond_dimN_reg_block);
-    }
-
-    /*-------------- L2 blocking for dimN block ---------*/
-
-    auto test_cond_dimN_block = [](jit_conv_winograd_conf_t &jcp,
-            int dimN_block, int current_best) {
-        return check_L2_block_per_thread(jcp, dimN_block, 0.1, 1.3)
-            && (dimN_block > current_best)
-            && ((jcp.dimN / dimN_block / jcp.dimN_reg_block) > 2 * mkldnn_get_max_threads());
-    };
-
-    jcp.dimN_block = get_divisor_satisfying_cond(
-            jcp, jcp.dimN / jcp.dimN_reg_block, 1, test_cond_dimN_block);
-
-    if (check_L2_block_per_thread(jcp, jcp.dimN_block, 0.1, 1.3)
-        && jcp.dimN/ jcp.dimN_block/ jcp.dimN_reg_block > 2 * mkldnn_get_max_threads()) {
-        jcp.dimN_nb_block = jcp.dimN / jcp.dimN_block / jcp.dimN_reg_block;
-
-        /* ------------------- L1 blocking for GEMM --------------*/
-        /* -------------------- Choose dimK block ----------------*/
-        auto test_cond_dimK_block = [](jit_conv_winograd_conf_t &jcp,
-                int dimK_block, int current_best) {
-            return check_L1_block_gemm(jcp, dimK_block, 1, 0.1, 0.6)
-                && (dimK_block > current_best);
-        };
-
-        jcp.dimK_block = get_divisor_satisfying_cond(
-                jcp, jcp.dimK / jcp.dimK_reg_block, 1, test_cond_dimK_block);
-
-        if (check_L1_block_gemm(jcp, jcp.dimK_block, 1, 0.1, 0.6)) {
-            jcp.dimK_nb_block = jcp.dimK / jcp.dimK_block / jcp.dimK_reg_block;
-
-            /* -------------- Choose dimM block -------------------*/
-            auto test_cond_dimM_block = [](jit_conv_winograd_conf_t &jcp,
-                    int dimM_block, int current_best) {
-                return check_L1_block_gemm(jcp, jcp.dimK_block, dimM_block, 0.1, 0.7)
-                    && (dimM_block > current_best);
-            };
-
-            jcp.dimM_block = get_divisor_satisfying_cond(
-                    jcp, jcp.dimM / jcp.dimM_simd_block, 1, test_cond_dimM_block);
-            jcp.dimM_nb_block = jcp.dimM / jcp.dimM_block / jcp.dimM_simd_block;
-
-            jcp.sched_policy = WSCHED_DATA_W_SGD;
-            return status::success;
-        }
-
-    }
-    return status::unimplemented;
-
-}
-
 
 status_t set_wsched_DATA_W_S_G_D_avx512_common(jit_conv_winograd_conf_t &jcp) {
 
@@ -593,7 +506,6 @@ status_t set_wsched_DATA_W_S_G_D_avx512_common(jit_conv_winograd_conf_t &jcp) {
     jcp.dimN_nb_block = jcp.dimN / (jcp.dimN_reg_block * jcp.dimN_block);
     jcp.sched_policy = WSCHED_DATA_W_S_G_D;
     return status::success;
-    //return status::unimplemented;
 }
 
 status_t _jit_avx512_common_conv_winograd_data_kernel_f32::init_conf_kernel(
@@ -618,10 +530,9 @@ status_t _jit_avx512_common_conv_winograd_data_kernel_f32::init_conf_kernel(
     jcp.dimM = dimM;
 
     jcp.sched_policy = WSCHED_INVALID;
-    if (!(set_wsched_DATA_W_SGD_avx512_common(jcp) == status::success))
-        set_wsched_DATA_W_S_G_D_avx512_common(jcp);
+    set_wsched_DATA_W_S_G_D_avx512_common(jcp);
 
-    assert(jcp.sched_policy != WSCHED_INVALID);
+    assert(jcp.sched_policy == WSCHED_DATA_W_S_G_D);
     return status::success;
 }
 
@@ -629,28 +540,16 @@ bool jit_avx512_common_conv_winograd_fwd_kernel_f32::post_ops_ok(
         jit_conv_conf_t &jcp, const primitive_attr_t &attr) {
     const auto &p = attr.post_ops_;
 
-    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
+    auto is_relu = [&](int idx) { return p.entry_[idx].is_relu(); };
     auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
 
     switch (p.len_) {
-    case 0:
-        return true; // no post_ops
-    case 1:
-        return true // relu or sum
-                && IMPLICATION(jcp.with_eltwise, is_sum(0))
-                && IMPLICATION(!jcp.with_eltwise, is_eltwise(0) || is_sum(0));
-    case 2:
-        return true // sum->relu or relu->sum
-                && IMPLICATION(jcp.with_eltwise, is_sum(0) && is_eltwise(1))
-                && IMPLICATION(!jcp.with_eltwise, false
-                                   || (is_sum(0) && is_eltwise(1))
-                                   || (is_eltwise(0) && is_sum(1)));
-    case 3:
-        return true // relu->sum->relu
-                && jcp.with_eltwise == false
-                && (is_eltwise(0) && is_sum(1) && is_eltwise(2));
-    default:
-        return false;
+    case 0: return true; // no post_ops
+    case 1: return is_relu(0) || is_sum(0); // relu or sum
+    case 2: return (is_sum(0) && is_relu(1)) ||
+                       (is_relu(0) && is_sum(1)); // sum->relu or relu->sum
+    case 3: return is_relu(0) && is_sum(1) && is_relu(2); // relu->sum->relu
+    default: return false;
     }
 
     return false;
@@ -659,8 +558,7 @@ bool jit_avx512_common_conv_winograd_fwd_kernel_f32::post_ops_ok(
 status_t jit_avx512_common_conv_winograd_fwd_kernel_f32::init_conf(
         jit_conv_winograd_conf_t &jcp, const convolution_desc_t &cd,
         const memory_desc_wrapper &src_d, const memory_desc_wrapper &weights_d,
-        const memory_desc_wrapper &dst_d, const primitive_attr_t &attr,
-        bool with_relu, float relu_negative_slope) {
+        const memory_desc_wrapper &dst_d, const primitive_attr_t &attr) {
     status_t st = init_conf_common(jcp, cd, src_d, weights_d, dst_d);
 
     if (st != status::success)
@@ -672,18 +570,14 @@ status_t jit_avx512_common_conv_winograd_fwd_kernel_f32::init_conf(
     jcp.ntiles = jcp.mb * jcp.itiles * jcp.jtiles;
 
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
 
     if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
 
     const auto &p = attr.post_ops_;
-    if (!jcp.with_eltwise) {
-        /* PostOps ReLU before SUM is handled the same as ReLU primitive */
-        jcp.with_eltwise = p.find(primitive_kind::eltwise, 0, 1) != -1;
-        jcp.eltwise_alpha = 0.f;
-    }
+    const int eltwise_ind = p.find(primitive_kind::eltwise, 0, 1);
+    jcp.with_eltwise = eltwise_ind != -1;
+    if (jcp.with_eltwise) jcp.eltwise = p.entry_[eltwise_ind].eltwise;
     jcp.with_sum = p.find(primitive_kind::sum, 0) != -1;
 
     status_t res = init_conf_kernel(jcp, jcp.oc, jcp.ntiles, jcp.ic);
@@ -1014,7 +908,7 @@ bool check_cond2_wu(int dimM_block, int dimM_simdw, int dimK_block,
 }
 } // namespace
 
-bool set_wsched_WEI_S_D_G_W_avx512_common(jit_conv_winograd_conf_t &jcp)
+status_t set_wsched_WEI_S_D_G_W_avx512_common(jit_conv_winograd_conf_t &jcp)
 {
     /*************** Choose dimN_reg_block (ic_simd_block)
      * *******************************/
@@ -1113,245 +1007,7 @@ bool set_wsched_WEI_S_D_G_W_avx512_common(jit_conv_winograd_conf_t &jcp)
     jcp.dimM_nb_block = (jcp.dimM / jcp.dimM_simd_block) / jcp.dimM_block;
 
     jcp.sched_policy = WSCHED_WEI_S_D_G_W;
-    return true;
-}
-
-namespace {
-bool is_in_L1_range(int v, float C1, float C2)
-{
-    return ((v > C1 * L1_cache_size) && (v < C2 * L1_cache_size));
-}
-
-bool is_in_L2_range(int v, float C1, float C2)
-{
-    return ((v > C1 * L2_cache_size) && (v < C2 * L2_cache_size));
-}
-
-void set_jcp_WEI_params(jit_conv_winograd_conf_t &jcp, int tile_block_ur,
-        int tile_block, int nb_ic, int nb_oc)
-{
-    jcp.tile_block_ur = tile_block_ur;
-    jcp.tile_block = tile_block;
-    jcp.nb_ic = nb_ic;
-    jcp.nb_oc = nb_oc;
-
-    jcp.nb_tile_block_ur = jcp.ntiles / jcp.tile_block / jcp.tile_block_ur;
-    jcp.ic_block = jcp.ic / jcp.ic_simd_block / jcp.nb_ic;
-    jcp.oc_block = jcp.oc / jcp.oc_simd_block / jcp.nb_oc;
-
-    jcp.dimK_reg_block = jcp.tile_block_ur;
-    jcp.dimK_block = jcp.nb_tile_block_ur;
-    jcp.dimK_nb_block = jcp.tile_block;
-    jcp.dimN_reg_block = jcp.ic_simd_block;
-    jcp.dimN_block = jcp.ic_block;
-    jcp.dimN_nb_block = jcp.nb_ic;
-    jcp.dimM_simd_block = jcp.oc_simd_block;
-    jcp.dimM_block = jcp.oc_block;
-    jcp.dimM_nb_block = jcp.nb_oc;
-}
-}
-
-bool set_wsched_WEI_SDGt_W_avx512_common(jit_conv_winograd_conf_t &jcp)
-{
-    jcp.ic_simd_block = jcp.oc_simd_block = 16;
-    int nb_ic_simd_block = jcp.ic / jcp.ic_simd_block;
-    int nb_oc_simd_block = jcp.oc / jcp.oc_simd_block;
-
-    int min_tile_block_ur = 8;
-    int max_tile_block_ur = 64;
-    int max_tile_block = jcp.ntiles / min_tile_block_ur;
-
-    // Consider L2 + L3 together on SKX
-    const float C1_min = .1, C1_0 = .4, C1_max = .5;
-    const float C2_0 = .4, C2_max = .5;
-    const float TC2_0 = .7, TC2_max = 1.2;
-    const int T_min = 2, T0 = 20;
-    float C1, C2, TC2;
-    int T, tile_block, tile_block_ur, nb_oc, nb_ic;
-
-    auto blocking_ok = [&]() -> bool {
-        // V:tile_block + M:tile_block + U
-        int thread_size = alpha * alpha * jcp.oc
-                        * (jcp.ntiles / tile_block) * sizeof(float)
-                + alpha * alpha * jcp.ic * (jcp.ntiles / tile_block)
-                        * sizeof(float)
-                + alpha * alpha * jcp.ic * jcp.oc * sizeof(float);
-        // V:tile_block + M:tile_block
-        int L2_reuse = alpha * alpha * jcp.oc
-                        * (jcp.ntiles / tile_block) * sizeof(float)
-                + alpha * alpha * jcp.ic * (jcp.ntiles / tile_block)
-                        * sizeof(float);
-        // V:nb_ic + M:nb_tile_block_ur
-        // Use M:nb_oc + V:nb_ic as an superset estimation
-        int L1_reuse
-                = (jcp.ic / nb_ic) * (jcp.ntiles / tile_block) * sizeof(float)
-                + (jcp.oc / nb_oc) * (jcp.ntiles / tile_block) * sizeof(float);
-
-        return jcp.ntiles % tile_block == 0
-                && (jcp.ntiles / tile_block) % tile_block_ur == 0
-                && is_in_L2_range(thread_size, TC2, TC2_max)
-                && is_in_L2_range(L2_reuse, C2, C2_max)
-                && tile_block > T * mkldnn_get_max_threads()
-                && nb_oc_simd_block % nb_oc == 0
-                && nb_ic_simd_block % nb_ic == 0
-                && is_in_L1_range(L1_reuse, C1, C1_max);
-    };
-
-    for (C1 = C1_0, C2 = C2_0, TC2 = TC2_0; C1 > C1_min;
-            C1 -= .02, C2 -= .02, TC2 -= .04) {
-        for (T = T0; T >= T_min; --T) {
-            for (tile_block = 1; tile_block <= max_tile_block; ++tile_block) {
-                for (tile_block_ur = max_tile_block_ur;
-                        tile_block_ur >= min_tile_block_ur; --tile_block_ur) {
-                    for (nb_oc = 1; nb_oc <= nb_oc_simd_block; ++nb_oc) {
-                        for (nb_ic = nb_ic_simd_block; nb_ic >= 1; --nb_ic) {
-                            if (blocking_ok()) {
-                                set_jcp_WEI_params(jcp, tile_block_ur,
-                                        tile_block, nb_ic, nb_oc);
-                                jcp.sched_policy = WSCHED_WEI_SDGt_W;
-                                return true;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    return false;
-}
-
-bool set_wsched_WEI_SDGtWo_avx512_common(jit_conv_winograd_conf_t &jcp)
-{
-    jcp.ic_simd_block = jcp.oc_simd_block = 16;
-    int nb_ic_simd_block = jcp.ic / jcp.ic_simd_block;
-    int nb_oc_simd_block = jcp.oc / jcp.oc_simd_block;
-
-    int min_tile_block_ur = 12;
-    int max_tile_block_ur = 64;
-    int max_tile_block = jcp.ntiles / min_tile_block_ur;
-
-    const float C1_min = .1, C1_0 = .4, C1_max = .5;
-    const float C2_0 = .4, C2_max = .6;
-    const float TC2_0 = .7, TC2_max = 1.6;
-
-    const int max_nb_oc = 2; // Limit the # of sequential execution
-    const int T0 = 12, T_min = 8;
-    float C1, C2, TC2;
-    int T, tile_block, tile_block_ur, nb_oc, nb_ic;
-
-    auto blocking_ok = [&]() -> bool {
-        // M:tile_block:nb_oc + V:tile_block + U:nb_oc
-        int thread_size = alpha * alpha * (jcp.oc / nb_oc)
-                        * (jcp.ntiles / tile_block) * sizeof(float)
-                + alpha * alpha * jcp.ic * (jcp.ntiles / tile_block)
-                        * sizeof(float)
-                + alpha * alpha * jcp.ic * (jcp.oc / nb_oc)
-                        * sizeof(float);
-        // M:tile_block:nb_oc + V:tile_block
-        int L2_reuse = alpha * alpha * (jcp.oc / nb_oc)
-                        * (jcp.ntiles / tile_block) * sizeof(float)
-                + alpha * alpha * jcp.ic * (jcp.ntiles / tile_block)
-                        * sizeof(float);
-        // V:nb_ic + M:nb_tile_block_ur
-        // Use M:nb_oc + V:nb_ic as an superset estimation
-        int L1_reuse
-                = (jcp.ic / nb_ic) * (jcp.ntiles / tile_block) * sizeof(float)
-                + (jcp.oc / nb_oc) * (jcp.ntiles / tile_block) * sizeof(float);
-
-        return jcp.ntiles % tile_block == 0
-                && (jcp.ntiles / tile_block) % tile_block_ur == 0
-                && is_in_L2_range(thread_size, TC2, TC2_max)
-                && is_in_L2_range(L2_reuse, C2, C2_max)
-                && tile_block > T * mkldnn_get_max_threads()
-                && nb_oc_simd_block % nb_oc == 0
-                && nb_ic_simd_block % nb_ic == 0
-                && is_in_L1_range(L1_reuse, C1, C1_max);
-    };
-
-    for (T = T0; T >= T_min; --T) {
-        for (C1 = C1_0, C2 = C2_0, TC2 = TC2_0; C1 > C1_min;
-                C1 -= .02, C2 -= .02, TC2 -= .04) {
-            for (nb_oc = 1; nb_oc <= max_nb_oc; ++nb_oc) {
-                for (tile_block = max_tile_block; tile_block >= 1;
-                        --tile_block) {
-                    for (tile_block_ur = min_tile_block_ur;
-                            tile_block_ur <= max_tile_block_ur;
-                            ++tile_block_ur) {
-                        for (nb_ic = 1; nb_ic <= nb_ic_simd_block; ++nb_ic) {
-                            if (blocking_ok()) {
-                                set_jcp_WEI_params(jcp, tile_block_ur,
-                                        tile_block, nb_ic, nb_oc);
-                                jcp.sched_policy = WSCHED_WEI_SDGtWo;
-                                return true;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    return false;
-}
-
-bool set_wsched_WEI_S_D_Giot_W_avx512_common(jit_conv_winograd_conf_t &jcp)
-{
-    jcp.ic_simd_block = jcp.oc_simd_block = 16;
-    int nb_ic_simd_block = jcp.ic / jcp.ic_simd_block;
-
-    int min_tile_block_ur = 8;
-    int max_tile_block_ur = 64;
-    const float C1_min = .2, C1_0 = .4, C1_max = .9;
-    const float C2_min = .1, C2_0 = .4, C2_max = .5;
-    const int T0 = 16, T_min = 12;
-    float C1, C2;
-    int T, tile_block, tile_block_ur, nb_ic;
-    int nb_oc = 1; // Keep nb_oc small to increase
-                   // oc_block, for better reuse of V in
-                   // L2
-
-    auto blocking_ok = [&]() -> bool {
-        // V[:ic_block][][][]
-        int L2_reuse
-                = (jcp.ic / nb_ic) * (jcp.ntiles / tile_block) * sizeof(float);
-        // M[:nb_tile_block_ur][][] + V[:nb_tile_block_ur][][]
-        int L1_reuse
-                = (jcp.ntiles / tile_block) * jcp.oc_simd_block * sizeof(float);
-
-        int work_amount = tile_block * nb_ic * nb_oc * alpha * alpha;
-
-        return (jcp.ntiles / tile_block_ur) % tile_block == 0
-                && jcp.ntiles % tile_block_ur == 0
-                && nb_ic_simd_block % nb_ic == 0
-                && is_in_L2_range(L2_reuse, C2, C2_max)
-                && is_in_L1_range(L1_reuse, C1, C1_max)
-                && work_amount > T * mkldnn_get_max_threads();
-    };
-
-    for (T = T0; T >= T_min; --T) {
-        for (C1 = C1_0; C1 > C1_min; C1 -= .02) {
-            for (C2 = C2_0; C2 > C2_min; C2 -= .02) {
-                for (nb_ic = 1; nb_ic <= nb_ic_simd_block; ++nb_ic) {
-                    for (tile_block_ur = min_tile_block_ur;
-                            tile_block_ur <= max_tile_block_ur;
-                            ++tile_block_ur) {
-                        for (tile_block = 1;
-                                tile_block <= jcp.ntiles / min_tile_block_ur;
-                                ++tile_block) {
-                            if (blocking_ok()) {
-                                set_jcp_WEI_params(jcp, tile_block_ur,
-                                        tile_block, nb_ic, nb_oc);
-                                jcp.sched_policy = WSCHED_WEI_S_D_Giot_W;
-                                return true;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    return false;
+    return status::success;
 }
 
 status_t jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::init_conf(
@@ -1359,8 +1015,7 @@ status_t jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::init_conf(
         const memory_desc_wrapper &src_d, const memory_desc_wrapper &diff_dst_d,
         const memory_desc_wrapper &diff_weights_d)
 {
-    if (!mayiuse(avx512_common))
-        return status::unimplemented;
+    jcp.nthr = mkldnn_get_max_threads();
 
     const bool with_groups = diff_weights_d.ndims() == src_d.ndims() + 1;
 
@@ -1397,15 +1052,18 @@ status_t jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::init_conf(
         jcp.ic = rnd_up(jcp.ic, simd_w);
     }
 
+    if (mayiuse(avx512_core))
+        return status::unimplemented;
     if (!mayiuse(avx512_common))
         return status::unimplemented;
-    else if (mayiuse(avx512_core))
-        jcp.ver = ver_avx512_core;
     else if (mayiuse(avx512_mic_4ops))
         jcp.ver = ver_4fma;
     else
         jcp.ver = ver_fma;
 
+    if (!IMPLICATION(cd.alg_kind == alg_kind::convolution_auto,
+                is_winograd_faster_than_direct(jcp)))
+        return status::unimplemented;
     // Winograd specific initialization
     jcp.itiles = (jcp.ow + tile_size - 1) / tile_size;
     jcp.jtiles = (jcp.oh + tile_size - 1) / tile_size;
@@ -1474,16 +1132,9 @@ status_t jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::init_conf(
         jcp.zmm_start = jcp.ver == ver_4fma ? 4 : 1;
     jcp.nb_reg = 32 - jcp.zmm_start;
 
-    status_t res;
     jcp.sched_policy = WSCHED_INVALID;
-    if ((jcp.ver == ver_avx512_core &&
-            (set_wsched_WEI_SDGt_W_avx512_common(jcp)
-            || set_wsched_WEI_SDGtWo_avx512_common(jcp)
-            || set_wsched_WEI_S_D_Giot_W_avx512_common(jcp)))
-        || set_wsched_WEI_S_D_G_W_avx512_common(jcp))
-        res = status::success;
-    else
-        return status::unimplemented;
+    status_t res = set_wsched_WEI_S_D_G_W_avx512_common(jcp);
+    assert(jcp.sched_policy == WSCHED_WEI_S_D_G_W);
 
     jcp.tile_block_ur = jcp.dimK_reg_block;
     jcp.nb_tile_block_ur = jcp.dimK_block;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.hpp
index f6fb2dae4..6c117143f 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_conv_winograd_kernel_f32.hpp
@@ -91,8 +91,7 @@ struct jit_avx512_common_conv_winograd_fwd_kernel_f32
     static status_t init_conf(jit_conv_winograd_conf_t &jcp,
             const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
             const memory_desc_wrapper &weights_d,
-            const memory_desc_wrapper &dst_d, const primitive_attr_t &attr,
-            bool with_relu = false, float relu_negative_slope = 0.);
+            const memory_desc_wrapper &dst_d, const primitive_attr_t &attr);
 };
 
 struct jit_avx512_common_conv_winograd_bwd_data_kernel_f32
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution.cpp
index 876720702..da07a52ed 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution.cpp
@@ -14,19 +14,20 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include "mkldnn_types.h"
 #include "c_types_map.hpp"
-#include "jit_avx512_common_convolution.hpp"
 #include "mkldnn_thread.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
 
+#include "jit_avx512_common_convolution.hpp"
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 using namespace nstl;
@@ -127,25 +128,40 @@ void jit_conv_3d_ker_bwd_w_pipeline(jit_conv_ker_t ker, jit_conv_call_s &p,
         ker(&p);
 }
 #define wht_blk_off(d, g, ...) \
-        (conf_.with_groups() \
+        (pd()->with_groups() \
          ? (d).blk_off((g), __VA_ARGS__) \
          : (d).blk_off(__VA_ARGS__))
 
-template <bool with_relu, data_type_t src_type, data_type_t wei_type,
+template <data_type_t src_type, data_type_t wei_type, data_type_t dst_type>
+void jit_avx512_common_convolution_fwd_t<src_type, wei_type, dst_type>::
+prepare_padded_bias(const dst_data_t *&bias) const {
+    if (!pd()->wants_padded_bias()) return;
+
+    auto padded_bias = scratchpad().template get<dst_data_t>(
+            key_conv_padded_bias);
+    utils::array_copy(padded_bias, bias, pd()->jcp_.oc_without_padding);
+    utils::array_set(padded_bias + pd()->jcp_.oc_without_padding,
+            (dst_data_t)0, pd()->jcp_.oc - pd()->jcp_.oc_without_padding);
+    bias = padded_bias;
+}
+
+template <data_type_t src_type, data_type_t wei_type,
           data_type_t dst_type>
-void _jit_avx512_common_convolution_fwd_t
-    <with_relu, src_type, wei_type, dst_type>::execute_forward_1d()
+void jit_avx512_common_convolution_fwd_t
+    <src_type, wei_type, dst_type>::execute_forward_1d() const
 {
     auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const dst_data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    prepare_padded_bias(bias);
 
-    const auto &jcp = kernel_->jcp;
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+    const auto &jcp = pd()->jcp_;
     assert(jcp.nb_oc % jcp.nb_oc_blocking == 0);
 
     int oc_chunks = jcp.nb_oc / jcp.nb_oc_blocking;
@@ -157,11 +173,6 @@ void _jit_avx512_common_convolution_fwd_t
     else
         nthr = mkldnn_get_max_threads();
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
-    }
     parallel(nthr, [&](const int ithr, const int nthr) {
         int start{0}, end{0}, start_copy;
         balance211(work_amount, nthr, ithr, start, end);
@@ -191,7 +202,7 @@ void _jit_avx512_common_convolution_fwd_t
                 int ocb = occ * jcp.nb_oc_blocking;
                 int g_ocb = g * jcp.nb_oc + ocb;
                 int g_oc = g_ocb * jcp.oc_block;
-                int g_icb = g * jcp.nb_ic;
+                int g_icb = g * jcp.nb_ic * jcp.nonblk_group_off;
 
                 int ow_s =  owb * jcp.ow_block;
                 int iw_s =  ow_s * jcp.stride_w;
@@ -228,22 +239,24 @@ void _jit_avx512_common_convolution_fwd_t
     });
 }
 
-template <bool with_relu, data_type_t src_type, data_type_t wei_type,
+template <data_type_t src_type, data_type_t wei_type,
           data_type_t dst_type>
-void _jit_avx512_common_convolution_fwd_t
-    <with_relu, src_type, wei_type, dst_type>::execute_forward_2d()
+void jit_avx512_common_convolution_fwd_t
+    <src_type, wei_type, dst_type>::execute_forward_2d() const
 {
     auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const dst_data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    prepare_padded_bias(bias);
 
-    const auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+    const auto &jcp = pd()->jcp_;
+    const int MB = pd()->MB();
     assert(jcp.nb_oc % jcp.nb_oc_blocking == 0);
 
     int oc_chunks = jcp.nb_oc / jcp.nb_oc_blocking;
@@ -255,12 +268,6 @@ void _jit_avx512_common_convolution_fwd_t
     else
         nthr = mkldnn_get_max_threads();
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
-    }
-
     parallel(nthr, [&](const int ithr, const int nthr) {
         int start{0}, end{0}, start_copy;
         balance211(work_amount, nthr, ithr, start, end);
@@ -290,7 +297,7 @@ void _jit_avx512_common_convolution_fwd_t
                 int ocb = occ * jcp.nb_oc_blocking;
                 int g_ocb = g * jcp.nb_oc + ocb;
                 int g_oc = g_ocb * jcp.oc_block;
-                int g_icb = g * jcp.nb_ic;
+                int g_icb = g * jcp.nb_ic * jcp.nonblk_group_off;
 
                 int work_rem = end - start;
 
@@ -357,30 +364,26 @@ void _jit_avx512_common_convolution_fwd_t
     });
 }
 
-template <bool with_relu, data_type_t src_type, data_type_t wei_type,
+template <data_type_t src_type, data_type_t wei_type,
           data_type_t dst_type>
-void _jit_avx512_common_convolution_fwd_t
-    <with_relu, src_type, wei_type, dst_type>::execute_forward_3d()
+void jit_avx512_common_convolution_fwd_t
+    <src_type, wei_type, dst_type>::execute_forward_3d() const
 {
     auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const dst_data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    prepare_padded_bias(bias);
 
-    const auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
-    assert(jcp.nb_oc % jcp.nb_oc_blocking == 0);
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
-    }
+    const auto &jcp = pd()->jcp_;
+    const int MB = pd()->MB();
+    assert(jcp.nb_oc % jcp.nb_oc_blocking == 0);
 
     parallel(0, [&](const int ithr, const int nthr) {
         int oc_chunks = jcp.nb_oc / jcp.nb_oc_blocking;
@@ -418,7 +421,7 @@ void _jit_avx512_common_convolution_fwd_t
                 int ocb = occ * jcp.nb_oc_blocking;
                 int g_ocb = g * jcp.nb_oc + ocb;
                 int g_oc = g_ocb * jcp.oc_block;
-                int g_icb = g * jcp.nb_ic;
+                int g_icb = g * jcp.nb_ic * jcp.nonblk_group_off;
 
                 int work_rem = end - start;
                 int ih_s = -jcp.t_pad + oh_s * jcp.stride_h;
@@ -491,25 +494,22 @@ void _jit_avx512_common_convolution_fwd_t
     });
 }
 
-template struct _jit_avx512_common_convolution_fwd_t<false, data_type::f32>;
-template struct _jit_avx512_common_convolution_fwd_t<true, data_type::f32>;
-template struct _jit_avx512_common_convolution_fwd_t<false, data_type::s16,
-        data_type::s16, data_type::s32>;
-template struct _jit_avx512_common_convolution_fwd_t<true, data_type::s16,
+template struct jit_avx512_common_convolution_fwd_t<data_type::f32>;
+template struct jit_avx512_common_convolution_fwd_t<data_type::s16,
         data_type::s16, data_type::s32>;
 
 template <data_type_t diff_dst_type, data_type_t wei_type,
           data_type_t diff_src_type>
 void jit_avx512_common_convolution_bwd_data_t<diff_dst_type, wei_type,
-          diff_src_type>::execute_backward_data_1d() {
+          diff_src_type>::execute_backward_data_1d() const {
     auto diff_dst = reinterpret_cast<const diff_dst_data_t *>
                                                        (this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<diff_src_data_t*>(this->memory());
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
 
@@ -579,18 +579,18 @@ void jit_avx512_common_convolution_bwd_data_t<diff_dst_type, wei_type,
 template <data_type_t diff_dst_type, data_type_t wei_type,
           data_type_t diff_src_type>
 void jit_avx512_common_convolution_bwd_data_t<diff_dst_type, wei_type,
-          diff_src_type>::execute_backward_data_2d() {
+          diff_src_type>::execute_backward_data_2d() const {
     auto diff_dst = reinterpret_cast<const diff_dst_data_t *>
                                                        (this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<diff_src_data_t*>(this->memory());
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
+    const int MB = pd()->MB();
 
     parallel(0, [&](const int ithr, const int nthr) {
         int start{0}, end{0}, start_copy;
@@ -704,18 +704,18 @@ void jit_avx512_common_convolution_bwd_data_t<diff_dst_type, wei_type,
 template <data_type_t diff_dst_type, data_type_t wei_type,
           data_type_t diff_src_type>
 void jit_avx512_common_convolution_bwd_data_t<diff_dst_type, wei_type,
-          diff_src_type>::execute_backward_data_3d() {
+          diff_src_type>::execute_backward_data_3d() const {
     auto diff_dst = reinterpret_cast<const diff_dst_data_t *>
                                                        (this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<diff_src_data_t*>(this->memory());
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
-    const int MB = conf_.MB();
+    const int MB = pd()->MB();
 
     parallel(0, [&](const int ithr, const int nthr) {
         int start{0}, end{0}, start_copy;
@@ -881,89 +881,33 @@ template <data_type_t src_type, data_type_t diff_dst_type,
           data_type_t diff_weights_type>
 jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
           diff_weights_type>::
-jit_avx512_common_convolution_bwd_weights_t(const pd_t *pd,
+jit_avx512_common_convolution_bwd_weights_t(const pd_t *apd,
         const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), kernel_(nullptr)
+    : cpu_primitive_t(apd, inputs, outputs), kernel_(nullptr)
     , trans_kernel_(nullptr), trans_dst_kernel_(nullptr), acc_ker_(nullptr)
-    , reducer_bias_(nullptr), padded_bias_(nullptr), tr_src_(nullptr)
-    , tr_diff_dst_(nullptr), ws_reduction_(nullptr), tr_src_bctx_(nullptr)
-    , tr_diff_dst_bctx_(nullptr)
+    , reducer_bias_(nullptr)
 {
-    const auto &j = conf_.jcp_;
-    kernel_ = new jit_avx512_common_conv_bwd_weights_kernel_f32(j);
+    const auto &j = pd()->jcp_;
 
-    balance();
+    nthr_ = j.nthr;
+    nthr_mb_ = j.nthr_mb;
+    nthr_g_ = j.nthr_g;
+    nthr_oc_b_ = j.nthr_oc_b;
+    nthr_ic_b_ = j.nthr_ic_b;
+
+    kernel_ = new jit_avx512_common_conv_bwd_weights_kernel_f32(j);
 
     if (utils::one_of(j.ver, ver_4fma, ver_4vnni, ver_vnni)) {
         trans_kernel_ = create_trans_src(&j);
         if (utils::one_of(j.ver, ver_4vnni, ver_vnni))
             trans_dst_kernel_ = create_trans_dst(&j);
-        if (j.is_1stconv) {
-            const int tr_src_size =
-                nthr_ / nthr_oc_b_ * j.ih * j.stride_w * j.tr_ld;
-            tr_src_ = (src_data_t *)malloc(tr_src_size * sizeof(src_data_t), 64);
-        } else {
-            // XXX: See the comment about tr_iw and guarding elements in
-            // jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf()
-            const int max_nthr = nthr_mb_ * j.ngroups * j.nb_ic;
-            const int min_tr_src_size_per_thr = j.ih * j.ic_block * j.tr_iw;
-            const int tr_src_size = max_nthr * min_tr_src_size_per_thr
-                + j.tr_src_num_guard_elems;
-            tr_src_ = (src_data_t *)malloc(tr_src_size * sizeof(src_data_t), 64);
-            /* to avoid NaNs in computations we zero tail num_guard_elems for
-             * each possible thread group */
-            for (int ithr = 1; ithr <= max_nthr; ++ithr) {
-                src_data_t *ts = &tr_src_[ithr * min_tr_src_size_per_thr];
-                for (int i = 0; i < j.tr_src_num_guard_elems; ++i)
-                    ts[i] = 0;
-            }
-        }
-
-        /* prepare synchronization contexts */
-        if (nthr_oc_b_ > 1) {
-            const int tr_src_bctx_size = nthr_ / nthr_oc_b_;
-            tr_src_bctx_ = (simple_barrier::ctx_t *)malloc(
-                    tr_src_bctx_size * sizeof(simple_barrier::ctx_t), 64);
-            for (int i = 0; i < tr_src_bctx_size; ++i)
-                simple_barrier::ctx_init(&tr_src_bctx_[i]);
-        }
-
-        if (utils::one_of(j.ver, ver_4vnni, ver_vnni)) {
-            const size_t tr_diff_dst_size =
-                nthr_mb_ * j.ngroups * j.nb_oc * j.oc_block * j.tr_ow * j.oh;
-            tr_diff_dst_ = (diff_dst_data_t *)malloc(
-                    tr_diff_dst_size * sizeof(diff_dst_data_t), 64);
-
-            /* prepare synchronization contexts */
-            if (nthr_ic_b_ > 1) {
-                const size_t tr_diff_dst_bctx_size = nthr_ / nthr_ic_b_;
-                tr_diff_dst_bctx_ = (simple_barrier::ctx_t *)malloc(
-                        tr_diff_dst_bctx_size * sizeof(simple_barrier::ctx_t),
-                        64);
-                for (size_t i = 0; i < tr_diff_dst_bctx_size; ++i)
-                    simple_barrier::ctx_init(&tr_diff_dst_bctx_[i]);
-            }
-        }
     }
 
-    if (nthr_mb_ > 1) {
-        const int wei_size = j.ngroups * j.oc * j.ic * j.kh * j.kw * j.kd;
-        const int bia_size = j.ngroups * j.oc;
-        ws_reduction_ = (diff_weights_data_t *)malloc((nthr_mb_ - 1)
-            * (wei_size + bia_size) * sizeof(diff_weights_data_t), 64);
+    if (nthr_mb_ > 1)
         acc_ker_ = new cpu_accumulator_1d_t<diff_weights_type>();
-        simple_barrier::ctx_init(&reduction_bctx_);
-    }
 
-    if (conf_.with_bias()) {
-        const size_t max_buffer_size = nthr_ * 3 * 5 * 5 * 16 * 16;
-        reducer_bias_ = new cpu_reducer_t<diff_weights_type>(reduce_balancer_t(
-                    nthr_, j.oc_block, j.ngroups * j.nb_oc, j.mb,
-                    max_buffer_size));
-        if (conf_.want_padded_bias())
-            padded_bias_ = (diff_weights_data_t *)
-                malloc(sizeof(diff_weights_data_t) * j.oc, 64);
-    }
+    reducer_bias_ =
+        new cpu_reducer_t<diff_weights_type>(pd()->reducer_bia_conf_);
 }
 
 template <data_type_t src_type, data_type_t diff_dst_type,
@@ -975,6 +919,17 @@ struct jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
     const diff_weights_data_t *diff_weights;
     diff_weights_data_t *diff_bias;
 
+    const memory_tracking::grantor_t scratchpad;
+
+    src_data_t *tr_src;
+    simple_barrier::ctx_t *tr_src_bctx;
+
+    diff_dst_data_t *tr_diff_dst;
+    simple_barrier::ctx_t *tr_diff_dst_bctx;
+
+    diff_weights_data_t *wei_bia_reduction;
+    simple_barrier::ctx_t *wei_bia_reduction_bctx;
+
     int ithr;
     int ithr_ic_b, ithr_oc_b, ithr_g, ithr_mb;
     int ithr_but_oc;
@@ -986,16 +941,30 @@ struct jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
     int ic_b_start = 0, ic_b_end = 0, ic_b_work;
 
     thread_info_t(const jit_avx512_common_convolution_bwd_weights_t *self,
-            int ithr): ithr(ithr) {
-
+            int ithr): scratchpad(self->scratchpad()), ithr(ithr) {
         src = reinterpret_cast<const src_data_t *>(self->input_memory(0));
         diff_dst = reinterpret_cast<const diff_dst_data_t *>(
             self->input_memory(1));
         diff_weights = reinterpret_cast<diff_weights_data_t *>(self->memory(0));
-        diff_bias = self->conf_.want_padded_bias()
-            ? self->padded_bias_
+        diff_bias = self->pd()->wants_padded_bias()
+            ? scratchpad.template get<diff_weights_data_t>(
+                    key_conv_padded_bias)
             : reinterpret_cast<diff_weights_data_t *>(self->memory(1));
 
+        tr_src = scratchpad.template get<src_data_t>(key_conv_tr_src);
+        tr_src_bctx = scratchpad.template get<simple_barrier::ctx_t>(
+                key_conv_tr_src_bctx);
+
+        tr_diff_dst = scratchpad.template get<diff_dst_data_t>(
+                key_conv_tr_diff_dst);
+        tr_diff_dst_bctx = scratchpad.template get<simple_barrier::ctx_t>(
+                key_conv_tr_diff_dst_bctx);
+
+        wei_bia_reduction = scratchpad.template get<diff_weights_data_t>(
+                key_conv_wei_bia_reduction);
+        wei_bia_reduction_bctx = scratchpad.template get<simple_barrier::ctx_t>(
+                key_conv_wei_bia_reduction_bctx);
+
         ithr_ic_b = ithr % self->nthr_ic_b_;
         ithr_oc_b = ithr / self->nthr_ic_b_ % self->nthr_oc_b_;
         ithr_g = ithr / self->nthr_ic_b_ / self->nthr_oc_b_ % self->nthr_g_;
@@ -1030,20 +999,20 @@ struct jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
 template <data_type_t src_type, data_type_t diff_dst_type,
           data_type_t diff_weights_type>
 void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
-    diff_weights_type>::compute_diff_weights(const thread_info_t *ti) {
-    const memory_desc_wrapper src_d(conf_.src_pd(0));
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_weights_d(conf_.diff_weights_pd(0));
+    diff_weights_type>::compute_diff_weights(const thread_info_t *ti) const {
+    const memory_desc_wrapper src_d(pd()->src_pd(0));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_weights_d(pd()->diff_weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
     const int wei_size = jcp.ngroups * jcp.oc * jcp.ic * jcp.kh*jcp.kw*jcp.kd;
 
     diff_weights_data_t *diff_wei = ti->ithr_mb == 0
         ? (diff_weights_data_t*)ti->diff_weights
-        : (diff_weights_data_t*)ws_reduction_ + (ti->ithr_mb - 1) * wei_size;
+        : ti->wei_bia_reduction + (ti->ithr_mb - 1) * wei_size;
     diff_weights_data_t *diff_bia = ti->ithr_mb == 0
         ? (diff_weights_data_t*)ti->diff_bias
-        : (diff_weights_data_t*)ws_reduction_ + (nthr_mb_ - 1) * wei_size
+        : ti->wei_bia_reduction + (nthr_mb_ - 1) * wei_size
           + (ti->ithr_mb - 1) * jcp.ngroups * jcp.oc;
 
     // TODO: use memory descriptor with the same fmt as src (or use a macro :))
@@ -1069,7 +1038,7 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
 
         const int _ic = g * jcp.nb_ic + ic_b;
         src_data_t *src1 = (src_data_t*)&ti->src[src_d.blk_off(img, _ic, j)];
-        src_data_t *tr_src1 = &tr_src_[tr_src_off(ti->ithr_mb, _ic, j)];
+        src_data_t *tr_src1 = &ti->tr_src[tr_src_off(ti->ithr_mb, _ic, j)];
 
         assert(jcp.ic_block == 16);
         const int src_stride = jcp.iw * jcp.ic_block;
@@ -1147,7 +1116,7 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
         const diff_dst_data_t *diff_dst1
             = &ti->diff_dst[diff_dst_d.blk_off(img, oc, j)];
         diff_dst_data_t *tr_diff_dst1
-            = &tr_diff_dst_[tr_diff_dst_off(img, oc, j)];
+            = &ti->tr_diff_dst[tr_diff_dst_off(img, oc, j)];
 
 
         assert(jcp.ic_block == 16);
@@ -1206,7 +1175,7 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
     if (jcp.is_1stconv && jcp.ver == ver_4fma) {
         /* prepare contexts */
         auto tr_ctx = jit_trans_src_t::ctx_t();
-        tr_ctx.tr_src = tr_src_
+        tr_ctx.tr_src = ti->tr_src
             + ti->ithr_but_oc * jcp.ih * jcp.stride_w * jcp.tr_ld;
 
         assert(IMPLICATION(!mkldnn_thr_syncable(), nthr_oc_b_ == 1));
@@ -1215,7 +1184,7 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
         balance211(jcp.ih, nthr_oc_b_, ti->ithr_oc_b, ih_start, ih_end);
         tr_ctx.tr_src_ih_start = ih_start;
         tr_ctx.tr_src_ih_end = ih_end;
-        tr_ctx.tr_src_bctx = tr_src_bctx_ + ti->ithr_but_oc;
+        tr_ctx.tr_src_bctx = ti->tr_src_bctx + ti->ithr_but_oc;
 
         auto p = jit_conv_call_s();
         p.src = tr_ctx.tr_src;
@@ -1267,20 +1236,20 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
                 /* tr_src[nb_ic][ih][16][~iw~] <- src[nb_ic][ih][iw][16] */
                 using simple_barrier::barrier;
                 if (nthr_oc_b_ > 1)
-                    barrier(&tr_src_bctx_[ti->ithr_but_oc], nthr_oc_b_);
+                    barrier(&ti->tr_src_bctx[ti->ithr_but_oc], nthr_oc_b_);
                 uker_trans(img);
                 if (nthr_oc_b_ > 1)
-                    barrier(&tr_src_bctx_[ti->ithr_but_oc], nthr_oc_b_);
+                    barrier(&ti->tr_src_bctx[ti->ithr_but_oc], nthr_oc_b_);
             }
 
             if (utils::one_of(jcp.ver, ver_4vnni, ver_vnni)) {
                 /* tr_diff_dst[nb_oc][OW][oh][16c][2ow]
                  *  <- diff_dst[nb_oc][oh][ow][16c] */
                 if (nthr_ic_b_ > 1)
-                    barrier(&tr_diff_dst_bctx_[ti->ithr_but_ic], nthr_ic_b_);
+                    barrier(&ti->tr_diff_dst_bctx[ti->ithr_but_ic], nthr_ic_b_);
                 diff_dst_trans(img);
                 if (nthr_ic_b_ > 1)
-                    barrier(&tr_diff_dst_bctx_[ti->ithr_but_ic], nthr_ic_b_);
+                    barrier(&ti->tr_diff_dst_bctx[ti->ithr_but_ic], nthr_ic_b_);
             }
 
             for (int g = ti->g_start; g < ti->g_end; ++g) {
@@ -1291,10 +1260,10 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
 
                 jit_conv_ker_pipeline(kernel_->jit_ker, p,
                          (utils::one_of(jcp.ver, ver_4fma, ver_4vnni, ver_vnni)
-                         ? &tr_src_[tr_src_off(ti->ithr_mb, _ic, 0)]
+                         ? &ti->tr_src[tr_src_off(ti->ithr_mb, _ic, 0)]
                          : &ti->src[src_d.blk_off(img, _ic)]),
                          utils::one_of(jcp.ver, ver_4vnni, ver_vnni)
-                         ? &tr_diff_dst_[tr_diff_dst_off(ti->ithr_mb, _oc, 0)]
+                         ? &ti->tr_diff_dst[tr_diff_dst_off(ti->ithr_mb, _oc, 0)]
                          : &ti->diff_dst[diff_dst_d.blk_off(img, _oc)],
                         diff_wei + wht_blk_off(diff_weights_d, g, oc_b, ic_b),
                         0, (img == ti->img_start), 0, 0);
@@ -1307,10 +1276,10 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
             const int _ic = ti->g_start * jcp.nb_ic + ti->ic_b_start;
             jit_conv_ker_pipeline(kernel_->jit_ker, p,
                     (utils::one_of(jcp.ver, ver_4fma, ver_4vnni, ver_vnni)
-                     ? &tr_src_[tr_src_off(ti->ithr_mb, _ic, 0)]
+                     ? &ti->tr_src[tr_src_off(ti->ithr_mb, _ic, 0)]
                      : &ti->src[src_d.blk_off(img + 1, _ic)]),
                     utils::one_of(jcp.ver, ver_4vnni, ver_vnni)
-                    ? &tr_diff_dst_[tr_diff_dst_off(ti->ithr_mb, _oc, 0)]
+                    ? &ti->tr_diff_dst[tr_diff_dst_off(ti->ithr_mb, _oc, 0)]
                     : &ti->diff_dst[diff_dst_d.blk_off(img + 1, _oc)],
                     diff_wei + wht_blk_off(
                         diff_weights_d, ti->g_start,
@@ -1323,10 +1292,11 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
 template <data_type_t src_type, data_type_t diff_dst_type,
           data_type_t diff_weights_type>
 void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
-    diff_weights_type>::compute_diff_weights_3d(const thread_info_t *ti) {
-    const memory_desc_wrapper src_d(conf_.src_pd(0));
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_weights_d(conf_.diff_weights_pd(0));
+    diff_weights_type>::compute_diff_weights_3d(const thread_info_t *ti) const
+{
+    const memory_desc_wrapper src_d(pd()->src_pd(0));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_weights_d(pd()->diff_weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
     const int wei_size
@@ -1334,10 +1304,10 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
 
     diff_weights_data_t *diff_wei = ti->ithr_mb == 0
         ? (diff_weights_data_t*)ti->diff_weights
-        : (diff_weights_data_t*)ws_reduction_ + (ti->ithr_mb - 1) * wei_size;
+        : ti->wei_bia_reduction + (ti->ithr_mb - 1) * wei_size;
     diff_weights_data_t *diff_bia = ti->ithr_mb == 0
         ? (diff_weights_data_t*)ti->diff_bias
-        : (diff_weights_data_t*)ws_reduction_ + (nthr_mb_ - 1) * wei_size
+        : ti->wei_bia_reduction + (nthr_mb_ - 1) * wei_size
           + (ti->ithr_mb - 1) * jcp.ngroups * jcp.oc;
 
     const int inp_mult = jcp.is_1stconv ? 1 : jcp.ic_block;
@@ -1397,17 +1367,17 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
 template <data_type_t src_type, data_type_t diff_dst_type,
           data_type_t diff_weights_type>
 void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
-    diff_weights_type>::reduce_diff_weights(const thread_info_t *ti) {
-    const memory_desc_wrapper diff_weights_d(conf_.diff_weights_pd(0));
+    diff_weights_type>::reduce_diff_weights(const thread_info_t *ti) const {
+    const memory_desc_wrapper diff_weights_d(pd()->diff_weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
     const int wei_size = jcp.ngroups * jcp.oc * jcp.ic * jcp.kh * jcp.kw;
     const int bia_size = jcp.ngroups * jcp.oc;
     const diff_weights_data_t *diff_bias_ws
-        = ws_reduction_ + (nthr_mb_ - 1) * wei_size;
+        = ti->wei_bia_reduction + (nthr_mb_ - 1) * wei_size;
 
-    /* diff_weights[:] += sum(ws_reduction_[thr_mb][:]) */
-    simple_barrier::barrier(&reduction_bctx_, nthr_);
+    /* diff_weights[:] += sum(wei_reduction_[thr_mb][:]) */
+    simple_barrier::barrier(ti->wei_bia_reduction_bctx, nthr_);
 
     const int ic_b_kh_work = ti->ic_b_work * jcp.kh;
     const int work = ti->g_work * ti->oc_b_work * ic_b_kh_work;
@@ -1437,7 +1407,7 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
             diff_weights_data_t *d
                 = (diff_weights_data_t *)ti->diff_weights + off;
             diff_weights_data_t *s
-                = ws_reduction_ + (thr_mb - 1) * wei_size + off;
+                = ti->wei_bia_reduction + (thr_mb - 1) * wei_size + off;
 
             acc_ker_->accumulate(d, s, acc_size);
 
@@ -1457,15 +1427,15 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
 template <data_type_t src_type, data_type_t diff_dst_type,
           data_type_t diff_weights_type>
 void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
-    diff_weights_type>::reduce_diff_weights_3d(const thread_info_t *ti) {
-    const memory_desc_wrapper diff_weights_d(conf_.diff_weights_pd(0));
+    diff_weights_type>::reduce_diff_weights_3d(const thread_info_t *ti) const {
+    const memory_desc_wrapper diff_weights_d(pd()->diff_weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
     const int wei_size = jcp.ngroups * jcp.oc * jcp.ic * jcp.kh * jcp.kw
         * jcp.kd;
 
-    /* diff_weights[:] += sum(ws_reduction_[thr_mb][:]) */
-    simple_barrier::barrier(&reduction_bctx_, nthr_);
+    /* diff_weights[:] += sum(wei_reduction_[thr_mb][:]) */
+    simple_barrier::barrier(ti->wei_bia_reduction_bctx, nthr_);
 
     const int ic_b_kh_work = ti->ic_b_work * jcp.kd;
     const int work = ti->g_work * ti->oc_b_work * ic_b_kh_work;
@@ -1494,7 +1464,7 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
             diff_weights_data_t *d
                 = (diff_weights_data_t *)ti->diff_weights + off;
             diff_weights_data_t *s
-                = ws_reduction_ + (thr_mb - 1) * wei_size + off;
+                = ti->wei_bia_reduction + (thr_mb - 1) * wei_size + off;
             acc_ker_->accumulate(d, s, acc_size);
 
             nd_iterator_jump(w, end, sub_g_start, ti->g_work, sub_oc_b_start,
@@ -1506,25 +1476,28 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
 template <data_type_t src_type, data_type_t diff_dst_type,
           data_type_t diff_weights_type>
 void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
-    diff_weights_type>::compute_diff_bias(const thread_info_t *ti) {
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
+    diff_weights_type>::compute_diff_bias(const thread_info_t *ti) const {
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
 
     auto rb = this->reducer_bias_;
-    assert(nthr_ == rb->balancer_.nthr_);
+    assert(nthr_ == rb->balancer().nthr_);
+
+    const auto reducer_bia_scratchpad = memory_tracking::grantor_t(
+            ti->scratchpad, prefix_reducer_bia);
 
     const auto &jcp = kernel_->jcp;
 
     if (jcp.with_bias && jcp.is_1stconv && jcp.ver == ver_4fma) return;
 
-    const int b_job_start = rb->balancer_.ithr_job_off(ti->ithr);
-    const int b_njobs = rb->balancer_.ithr_njobs(ti->ithr);
+    const int b_job_start = rb->balancer().ithr_job_off(ti->ithr);
+    const int b_njobs = rb->balancer().ithr_njobs(ti->ithr);
 
     if (b_njobs == 0) return;
 
     /* reduction dimension */
     int img_start{0}, img_end{0};
-    balance211(jcp.mb, rb->balancer_.nthr_per_group_,
-            rb->balancer_.id_in_group(ti->ithr), img_start, img_end);
+    balance211(jcp.mb, rb->balancer().nthr_per_group_,
+            rb->balancer().id_in_group(ti->ithr), img_start, img_end);
 
     /* jobs */
     int g_start{0}, ocb_start{0};
@@ -1536,9 +1509,9 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
 
             const diff_dst_data_t *d_dst
                 = &ti->diff_dst[diff_dst_d.blk_off(img, _oc)];
-            diff_weights_data_t *d_bias = &rb->get_local_ptr(ti->ithr,
-                (diff_weights_data_t *)ti->diff_bias)[
-                b_job_loc * rb->balancer_.job_size_];
+            diff_weights_data_t *d_bias = rb->get_local_ptr(ti->ithr,
+                    ti->diff_bias, reducer_bia_scratchpad)
+                + b_job_loc * rb->balancer().job_size_;
 
             if (img == img_start)
                 for (int o = 0; o < 16; ++o)
@@ -1554,13 +1527,13 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
         }
     }
 
-    rb->reduce(ti->ithr, ti->diff_bias);
+    rb->reduce(ti->ithr, ti->diff_bias, reducer_bia_scratchpad);
 }
 
 template <data_type_t src_type, data_type_t diff_dst_type,
           data_type_t diff_weights_type>
 void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
-    diff_weights_type>::compute_diff_bias_3d(const thread_info_t *ti) {
+    diff_weights_type>::compute_diff_bias_3d(const thread_info_t *ti) const {
 
     const auto &jcp = kernel_->jcp;
 
@@ -1568,7 +1541,7 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
         * jcp.kw * jcp.kd;
     const int bia_size = jcp.ngroups * jcp.oc;
     const diff_weights_data_t *diff_bias_ws
-            = ws_reduction_ + (size_t)(nthr_mb_ - 1) * wei_size;
+            = ti->wei_bia_reduction + (size_t)(nthr_mb_ - 1) * wei_size;
 
     if (nthr_mb_ > 1) mkldnn_thr_barrier();
 
@@ -1584,161 +1557,91 @@ void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
 template <data_type_t src_type, data_type_t diff_dst_type,
           data_type_t diff_weights_type>
 void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
-    diff_weights_type>::execute_backward_weights() {
+    diff_weights_type>::prepare_scratchpad_data() const
+{
+    const auto &j = pd()->jcp_;
+    auto scratchpad = this->scratchpad();
+
+    if (utils::one_of(j.ver, ver_4fma, ver_4vnni, ver_vnni)) {
+        if (!j.is_1stconv) {
+            // XXX: See the comment about tr_iw and guarding elements in
+            // jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf()
+            const int max_nthr = j.nthr_mb * j.ngroups * j.nb_ic;
+            const int min_tr_src_size_per_thr = j.ih * j.ic_block * j.tr_iw;
+
+            auto tr_src = scratchpad.template get<src_data_t>(key_conv_tr_src);
+            /* to avoid NaNs in computations we zero tail num_guard_elems for
+             * each possible thread group */
+
+            for (int ithr = 1; ithr <= max_nthr; ++ithr) {
+                src_data_t *ts = &tr_src[ithr * min_tr_src_size_per_thr];
+                for (int i = 0; i < j.tr_src_num_guard_elems; ++i)
+                    ts[i] = 0;
+            }
+        }
+
+        if (j.nthr_oc_b > 1) {
+            const int tr_src_bctx_size = j.nthr / j.nthr_oc_b;
+            auto tr_src_bctx = scratchpad.template get<simple_barrier::ctx_t>(
+                    key_conv_tr_src_bctx);
+            for (int i = 0; i < tr_src_bctx_size; ++i)
+                simple_barrier::ctx_init(&tr_src_bctx[i]);
+        }
+
+        if (utils::one_of(j.ver, ver_4vnni, ver_vnni) && j.nthr_ic_b > 1) {
+            const int tr_diff_dst_bctx_size = j.nthr / j.nthr_ic_b;
+            auto tr_diff_dst_bctx =
+                scratchpad.template get<simple_barrier::ctx_t>(
+                        key_conv_tr_diff_dst_bctx);
+                for (int i = 0; i < tr_diff_dst_bctx_size; ++i)
+                    simple_barrier::ctx_init(&tr_diff_dst_bctx[i]);
+        }
+    }
+
+    if (nthr_mb_ > 1) {
+        simple_barrier::ctx_init(scratchpad.template get<simple_barrier::ctx_t>(
+                    key_conv_wei_bia_reduction_bctx));
+    }
+
+    const auto reducer_bia_scratchpad = memory_tracking::grantor_t(scratchpad,
+            prefix_reducer_bia);
+    auto rb = this->reducer_bias_;
+    rb->init(reducer_bia_scratchpad);
+}
+
+template <data_type_t src_type, data_type_t diff_dst_type,
+          data_type_t diff_weights_type>
+void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
+    diff_weights_type>::execute_backward_weights() const {
+    prepare_scratchpad_data();
+
     parallel(nthr_, [&](const int ithr, const int nthr) {
         assert(nthr_ == nthr);
 
         thread_info_t thread_info(this, ithr);
 
-        if (utils::one_of(conf_.ndims(), 3, 4)) {
+        if (utils::one_of(pd()->ndims(), 3, 4)) {
             compute_diff_weights(&thread_info);
             if (nthr_mb_ > 1) reduce_diff_weights(&thread_info);
-            if (conf_.with_bias()) compute_diff_bias(&thread_info);
-        } else if (conf_.ndims() == 5) {
+            if (pd()->with_bias()) compute_diff_bias(&thread_info);
+        } else if (pd()->ndims() == 5) {
             compute_diff_weights_3d(&thread_info);
             if (nthr_mb_ > 1) reduce_diff_weights_3d(&thread_info);
-            if (conf_.with_bias()) compute_diff_bias_3d(&thread_info);
+            if (pd()->with_bias()) compute_diff_bias_3d(&thread_info);
         } else {
             assert(false);
         }
     });
 
     /* TODO: put that into compute_diff_bias() */
-    if (conf_.want_padded_bias()) {
+    if (pd()->wants_padded_bias()) {
+        auto diff_bias = scratchpad().template get<const diff_weights_data_t>(
+                key_conv_padded_bias);
         auto diff_bias_in
             = reinterpret_cast<diff_weights_data_t *>(this->memory(1));
-        for (int oc = 0; oc < conf_.jcp_.oc_without_padding; ++oc)
-            diff_bias_in[oc] = this->padded_bias_[oc];
-    }
-}
-
-template <data_type_t src_type, data_type_t diff_dst_type,
-          data_type_t diff_weights_type>
-void jit_avx512_common_convolution_bwd_weights_t<src_type, diff_dst_type,
-    diff_weights_type>::balance() {
-    const int max_threads = mkldnn_get_max_threads();
-    const auto &j = conf_.jcp_;
-
-    nthr_ = nthr_mb_ = nthr_g_ = nthr_oc_b_ = nthr_ic_b_ = 1;
-
-    if (max_threads < j.ngroups) {
-        /* simplification... fortunately it doesn't hurt much */
-        return;
-    }
-
-    if (!mkldnn_thr_syncable()
-            && utils::one_of(j.ver, ver_4fma, ver_4vnni, ver_vnni)) {
-        // should not happen -- the driver is not ready
-        // for TBB-like non-synchronous threading yet
-        return;
+        for (int oc = 0; oc < pd()->jcp_.oc_without_padding; ++oc)
+            diff_bias_in[oc] = diff_bias[oc];
     }
-
-    if (j.ver == ver_4fma && j.is_1stconv) {
-        nthr_g_ = 1;
-        nthr_oc_b_ = 1;
-        nthr_ic_b_ = nstl::min(j.nb_ic, max_threads);
-        nthr_mb_ = nstl::min(max_threads / nthr_ic_b_, j.mb);
-        nthr_ = nthr_mb_ * nthr_oc_b_ * nthr_ic_b_ * nthr_g_;
-        return;
-    }
-
-    nthr_g_ = j.ngroups;
-    const int nthr = max_threads / nthr_g_;
-
-    auto calc_mem_cost = [=](int nthr_mb, int nthr_oc_b, int nthr_ic_b) {
-        /* calculate per thread memory cost (read/write). high level optimizer
-         * tries to minimize memory consumption. few notes:
-         *  (n1) unclear why, but that essentially helps first convolution...
-         *  (n2) assuming the reduction over minibatch is always there:
-         *    - instead of 8 it should be 5 here (write ~= 2 read):
-         *      kernel: temporal workspace 1 write
-         *      reduction: 1 read from workspace and 1 write to the diff_wei
-         *    - but experiments showed 8 works better than 5 or 6... */
-
-        const int src_coef = j.ver == ver_4fma || j.ver == ver_vnni ? 4 : 1;
-        const int dst_coef = 1;
-        const int wei_coef = j.ver == ver_vnni ? 4 : 8;
-
-        return 0
-            + src_coef
-            * div_up(j.mb, nthr_mb) * div_up(j.ngroups, nthr_g_)
-            * div_up(j.nb_ic, nthr_ic_b) * j.ic_block * j.ih * j.iw * j.id
-            / j.stride_d / j.stride_h / j.stride_w /* (n1) */
-            + dst_coef
-            * div_up(j.mb, nthr_mb) * div_up(j.ngroups, nthr_g_)
-            * div_up(j.nb_oc, nthr_oc_b) * j.oc_block * j.oh * j.ow * j.od
-            + wei_coef /* (n2) */
-            * div_up(j.ngroups, nthr_g_)
-            * div_up(j.nb_oc, nthr_oc_b) * div_up(j.nb_ic, nthr_ic_b)
-            * j.kh * j.kw * j.kd * j.ic_block * j.oc_block;
-    };
-
-    int best_mem_cost = calc_mem_cost(nthr_mb_, nthr_oc_b_, nthr_ic_b_);
-
-    /* step 1: find the best thread distribution with lowest memory cost */
-    const int nthr_mb_max = nstl::min(nthr, j.mb * j.od);
-    for (int nthr_mb = 1; nthr_mb <= nthr_mb_max; ++nthr_mb) {
-        const int nthr_par = nthr / nthr_mb;
-        const int nthr_oc_b_max = nstl::min(nthr_par, j.nb_oc);
-        for (int nthr_oc_b = 1; nthr_oc_b <= nthr_oc_b_max; ++nthr_oc_b) {
-            int nthr_ic_b = nstl::min(nthr_par / nthr_oc_b, j.nb_ic);
-
-            int mem_cost = calc_mem_cost(nthr_mb, nthr_oc_b, nthr_ic_b);
-            if (mem_cost <= best_mem_cost) {
-                best_mem_cost = mem_cost;
-                nthr_mb_ = nthr_mb;
-                nthr_oc_b_ = nthr_oc_b;
-                nthr_ic_b_ = nthr_ic_b;
-            }
-        }
-
-        if (!mkldnn_thr_syncable()) { assert(nthr_mb == 1); break; }
-    }
-
-    if (j.ver != ver_vnni && !mayiuse(avx512_mic)) {
-        auto calc_comp_cost = [=](int nthr_mb, int nthr_oc_b, int nthr_ic_b) {
-            return 1
-                * div_up(j.mb, nthr_mb)
-                * div_up(j.ngroups, nthr_g_)
-                * div_up(j.nb_oc, nthr_oc_b)
-                * div_up(j.nb_ic, nthr_ic_b);
-        };
-
-        /* step 2: search for a thread distribution with lower compute cost.
-         * the constrains:
-         *  - memory cost cannot exceed 110% of the best found in the step 1
-         *  - unless compute cost is 133% lower than the current best case
-         * note: both constants were found empirically */
-        int best_comp_cost = calc_comp_cost(nthr_mb_, nthr_oc_b_, nthr_ic_b_);
-        for (int nthr_mb = 1; nthr_mb <= nthr_mb_max; ++nthr_mb) {
-            const int nthr_par = nthr / nthr_mb;
-            const int nthr_oc_b_max = nstl::min(nthr_par, j.nb_oc);
-            for (int nthr_oc_b = 1; nthr_oc_b <= nthr_oc_b_max; ++nthr_oc_b) {
-                int nthr_ic_b = nstl::min(nthr_par / nthr_oc_b, j.nb_ic);
-                int mem_cost = calc_mem_cost(nthr_mb, nthr_oc_b, nthr_ic_b);
-                int comp_cost = calc_comp_cost(nthr_mb, nthr_oc_b, nthr_ic_b);
-
-                const bool opt1 = comp_cost <= best_comp_cost
-                    && mem_cost < 1.1 * best_mem_cost;
-                const bool opt2 = 4 * comp_cost <= 3 * best_comp_cost;
-
-                if (opt1 || opt2) {
-                    best_comp_cost = comp_cost;
-                    nthr_mb_ = nthr_mb;
-                    nthr_oc_b_ = nthr_oc_b;
-                    nthr_ic_b_ = nthr_ic_b;
-                }
-            }
-
-            if (!mkldnn_thr_syncable()) { assert(nthr_mb == 1); break; }
-        }
-    }
-
-    if (nthr_mb_ > max_threads/2 && nthr_mb_ < max_threads)
-        nthr_mb_ = min(j.mb * j.od, max_threads);
-    nthr_ = nthr_mb_ * nthr_g_ * nthr_oc_b_ * nthr_ic_b_;
-    assert(nthr_ <= max_threads);
-    assert(IMPLICATION(!mkldnn_thr_syncable(), nthr_mb_ == 1));
 }
 
 template struct jit_avx512_common_convolution_bwd_weights_t<data_type::f32>;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution.hpp
index 42080cc58..e50021846 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution.hpp
@@ -18,124 +18,116 @@
 #define CPU_JIT_AVX512_COMMON_CONVOLUTION_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+#include "mkldnn_thread.hpp"
+#include "utils.hpp"
+
+#include "cpu_barrier.hpp"
 #include "cpu_convolution_pd.hpp"
-#include "cpu_engine.hpp"
-#include "jit_avx512_common_conv_kernel.hpp"
-#include "jit_transpose_src_utils.hpp"
 #include "cpu_reducer.hpp"
-#include "cpu_barrier.hpp"
+
+#include "jit_transpose_src_utils.hpp"
+#include "jit_avx512_common_conv_kernel.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu, impl::data_type_t src_type,
+template <impl::data_type_t src_type,
          impl::data_type_t wei_type = src_type,
          impl::data_type_t dst_type = src_type>
-struct _jit_avx512_common_convolution_fwd_t : public cpu_primitive_t {
-    struct pd_t : public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine, const typename pd_t::base_desc_t *adesc,
+struct jit_avx512_common_convolution_fwd_t : public cpu_primitive_t {
+    struct pd_t : public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
             , jcp_()
         {
         }
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit:", avx512_common, ""),
-                _jit_avx512_common_convolution_fwd_t);
+                jit_avx512_common_convolution_fwd_t);
 
         virtual status_t init() override
         {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
-                    && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                    && utils::one_of(this->desc()->prop_kind, forward_training,
                                forward_inference)
-                    && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                    && utils::one_of(this->desc()->alg_kind,
+                               alg_kind::convolution_auto,
+                               alg_kind::convolution_direct)
                     && !this->has_zero_dim_memory()
-                    && this->cdesc_().src_desc.data_type == src_type
-                    && this->cdesc_().weights_desc.data_type == wei_type
-                    && this->cdesc_().dst_desc.data_type == dst_type
+                    && this->desc()->src_desc.data_type == src_type
+                    && this->desc()->weights_desc.data_type == wei_type
+                    && this->desc()->dst_desc.data_type == dst_type
                     && IMPLICATION(this->with_bias(), dst_type
-                                       == this->cdesc_().bias_desc.data_type)
-                    && !(with_relu && this->negative_slope()!= 0.
-                                   && dst_type == data_type::s32
-                                   && src_type == data_type::s16
-                                   && wei_type == data_type::s16);
+                                       == this->desc()->bias_desc.data_type);
             if (!ok)
                 return status::unimplemented;
 
-            return jit_avx512_common_conv_fwd_kernel::init_conf(
-                    jcp_, this->cdesc_(), this->src_pd_, this->weights_pd_,
+            status_t status = jit_avx512_common_conv_fwd_kernel::init_conf(
+                    jcp_, *this->desc(), this->src_pd_, this->weights_pd_,
                     this->dst_pd_,this->bias_pd_, *this->attr(),
-                    mkldnn_get_max_threads(), with_relu, this->negative_slope());
-        }
+                    mkldnn_get_max_threads());
+            if (status != status::success) return status;
 
-        inline int ndims() { return this->cdesc_().src_desc.ndims; }
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx512_common_conv_fwd_kernel::init_scratchpad(scratchpad,
+                    jcp_);
+
+            if (status == status::success
+                    && this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
+            return status;
+        }
 
         jit_conv_conf_t jcp_;
     };
 
-    _jit_avx512_common_convolution_fwd_t(const pd_t *pd,
+    jit_avx512_common_convolution_fwd_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , padded_bias_(nullptr)
+        : cpu_primitive_t(apd, inputs, outputs)
     {
-        kernel_ = new jit_avx512_common_conv_fwd_kernel(conf_.jcp_,
-                    *conf_.attr());
-
-        if (conf_.want_padded_bias()) {
-            const auto &j = conf_.jcp_;
-            assert(j.ngroups == 1);
-            padded_bias_ = (dst_data_t *)malloc(sizeof(dst_data_t) * j.oc, 64);
-            for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-                padded_bias_[oc] = 0;
-        }
+        kernel_ = new jit_avx512_common_conv_fwd_kernel(pd()->jcp_,
+                    *pd()->attr());
     }
-    ~_jit_avx512_common_convolution_fwd_t() {
-        delete kernel_;
-        free(padded_bias_);
-    };
+    ~jit_avx512_common_convolution_fwd_t() { delete kernel_; }
 
     typedef typename prec_traits<src_type>::type src_data_t;
     typedef typename prec_traits<wei_type>::type wei_data_t;
     typedef typename prec_traits<dst_type>::type dst_data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
-        if (conf_.ndims() == 3)
+        if (pd()->ndims() == 3)
             execute_forward_1d();
-        else if (conf_.ndims() == 4)
+        else if (pd()->ndims() == 4)
             execute_forward_2d();
-        else if (conf_.ndims() == 5)
+        else if (pd()->ndims() == 5)
             execute_forward_3d();
         else
             assert(false);
+
+        if (pd()->wants_zero_pad_dst())
+            output_memory_primitive(0)->zero_pad();
+
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward_1d();
-    void execute_forward_2d();
-    void execute_forward_3d();
-    pd_t conf_;
+    void prepare_padded_bias(const dst_data_t *&bias) const;
+    void execute_forward_1d() const;
+    void execute_forward_2d() const;
+    void execute_forward_3d() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     jit_avx512_common_conv_fwd_kernel *kernel_;
-    dst_data_t *padded_bias_;
 };
 
-template <impl::data_type_t src_type, impl::data_type_t wei_type = src_type,
-         impl::data_type_t dst_type = src_type>
-using jit_avx512_common_convolution_fwd_t =
-    _jit_avx512_common_convolution_fwd_t<false, src_type, wei_type, dst_type>;
-
-template <impl::data_type_t src_type, impl::data_type_t wei_type = src_type,
-         impl::data_type_t dst_type = src_type>
-using jit_avx512_common_convolution_relu_t =
-    _jit_avx512_common_convolution_fwd_t<true, src_type, wei_type, dst_type>;
-
 template <impl::data_type_t diff_dst_type,
           impl::data_type_t wei_type = diff_dst_type,
           impl::data_type_t diff_src_type = diff_dst_type>
@@ -159,19 +151,27 @@ struct jit_avx512_common_convolution_bwd_data_t: public cpu_primitive_t {
             bool ok = true
                 && this->set_default_params() == status::success
                 && utils::one_of(this->desc()->prop_kind, backward_data) // XXX (this->!)
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
-                && this->desc()->alg_kind == alg_kind::convolution_direct
                 && this->desc()->diff_dst_desc.data_type == diff_dst_type
                 && this->desc()->weights_desc.data_type == wei_type
                 && this->desc()->diff_src_desc.data_type == diff_src_type;
             if (!ok) return status::unimplemented;
 
-            return jit_avx512_common_conv_bwd_data_kernel_f32::init_conf(
-                    jcp_,*this->desc(), *this->diff_src_pd_.desc(),
-                    *this->weights_pd_.desc(), *this->diff_dst_pd_.desc());
-        }
+            status_t status =
+                jit_avx512_common_conv_bwd_data_kernel_f32::init_conf(jcp_,
+                        *this->desc(), *this->diff_src_pd_.desc(),
+                        *this->weights_pd_.desc(), *this->diff_dst_pd_.desc());
+            if (status != status::success) return status;
 
-        inline int ndims() { return this->desc()->diff_src_desc.ndims; }
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx512_common_conv_bwd_data_kernel_f32::init_scratchpad(
+                    scratchpad, jcp_);
+
+            return status::success;
+        }
 
         inline memory_format_t src_format()
         {
@@ -206,30 +206,30 @@ struct jit_avx512_common_convolution_bwd_data_t: public cpu_primitive_t {
                 CHECK(this->diff_dst_pd_.set_format(src_format()));
             if (this->weights_pd_.desc()->format == any)
                 CHECK(this->weights_pd_.set_format(wei_format()));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
 
-    jit_avx512_common_convolution_bwd_data_t(const pd_t *pd,
+    jit_avx512_common_convolution_bwd_data_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-    {
-        kernel_ = new jit_avx512_common_conv_bwd_data_kernel_f32(conf_.jcp_);
-    }
+        : cpu_primitive_t(apd, inputs, outputs)
+    { kernel_ = new jit_avx512_common_conv_bwd_data_kernel_f32(pd()->jcp_); }
     ~jit_avx512_common_convolution_bwd_data_t() { delete kernel_; };
 
     typedef typename prec_traits<diff_dst_type>::type diff_dst_data_t;
     typedef typename prec_traits<wei_type>::type wei_data_t;
     typedef typename prec_traits<diff_src_type>::type diff_src_data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_data:
-            if (conf_.ndims() == 3)
+            if (pd()->ndims() == 3)
                 execute_backward_data_1d();
-            else if (conf_.ndims() == 4)
+            else if (pd()->ndims() == 4)
                 execute_backward_data_2d();
-            else if (conf_.ndims() == 5)
+            else if (pd()->ndims() == 5)
                 execute_backward_data_3d();
             else
                 assert(false);
@@ -241,10 +241,11 @@ struct jit_avx512_common_convolution_bwd_data_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_data_1d();
-    void execute_backward_data_2d();
-    void execute_backward_data_3d();
-    pd_t conf_;
+    void execute_backward_data_1d() const;
+    void execute_backward_data_2d() const;
+    void execute_backward_data_3d() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     jit_avx512_common_conv_bwd_data_kernel_f32 *kernel_;
 };
 
@@ -267,7 +268,9 @@ struct jit_avx512_common_convolution_bwd_weights_t: public cpu_primitive_t {
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->desc()->prop_kind == prop_kind::backward_weights
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && this->desc()->src_desc.data_type == src_type
                 && this->desc()->diff_dst_desc.data_type == diff_dst_type
@@ -275,12 +278,27 @@ struct jit_avx512_common_convolution_bwd_weights_t: public cpu_primitive_t {
                     == diff_weights_type;
             if (!ok) return status::unimplemented;
 
-            return jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf(
-                    jcp_, *this->desc(), this->src_pd_, this->diff_weights_pd_,
-                    this->diff_bias_pd_, this->diff_dst_pd_);
-        }
+            status_t status =
+                jit_avx512_common_conv_bwd_weights_kernel_f32::init_conf(jcp_,
+                        *this->desc(), this->src_pd_, this->diff_weights_pd_,
+                        this->diff_bias_pd_, this->diff_dst_pd_);
+            if (status != status::success) return status;
+
+            init_balancers();
 
-        inline int ndims() { return this->desc()->src_desc.ndims; }
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx512_common_conv_bwd_weights_kernel_f32::init_scratchpad(
+                    scratchpad, jcp_);
+
+            auto reducer_bia_scratchpad = memory_tracking::registrar_t(
+                    scratchpad, memory_tracking::names::prefix_reducer_bia);
+            reducer_bia_conf_.init_scratchpad(reducer_bia_scratchpad);
+
+            if (status == status::success &&
+                    this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
+            return status;
+        }
 
         inline memory_format_t src_format()
         {
@@ -297,29 +315,37 @@ struct jit_avx512_common_convolution_bwd_weights_t: public cpu_primitive_t {
                       OIdhw16o16i);
         }
 
-
         jit_conv_conf_t jcp_;
+        typename cpu_reducer_t<diff_weights_type>::conf_t reducer_bia_conf_;
 
-        protected:
-            virtual status_t set_default_params() override {
-                using namespace memory_format;
+    protected:
+        virtual status_t set_default_params() override {
+            using namespace memory_format;
 
-                if (this->src_pd_.desc()->format == any)
-                    CHECK(this->src_pd_.set_format(src_format()));
-                if (this->diff_weights_pd_.desc()->format == any)
-                    CHECK(this->diff_weights_pd_.set_format(wei_format()));
-                if (this->diff_dst_pd_.desc()->format == any)
-                    CHECK(this->diff_dst_pd_.set_format(src_format()));
+            if (this->src_pd_.desc()->format == any)
+                CHECK(this->src_pd_.set_format(src_format()));
+            if (this->diff_weights_pd_.desc()->format == any)
+                CHECK(this->diff_weights_pd_.set_format(wei_format()));
+            if (this->diff_dst_pd_.desc()->format == any)
+                CHECK(this->diff_dst_pd_.set_format(src_format()));
 
-                return status::success;
-            }
+            return status::success;
+        }
 
+    private:
+        void init_balancers() {
+            const size_t max_buffer_size = jcp_.nthr * 3 * 5 * 5 * 16 * 16;
+            if (with_bias()) {
+                reducer_bia_conf_.init(reduce_balancer_t(jcp_.nthr,
+                            jcp_.oc_block, jcp_.ngroups * jcp_.nb_oc, jcp_.mb,
+                            max_buffer_size));
+            }
+        }
     };
 
-    jit_avx512_common_convolution_bwd_weights_t(const pd_t *pd,
+    jit_avx512_common_convolution_bwd_weights_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs);
     ~jit_avx512_common_convolution_bwd_weights_t() {
-
         delete kernel_;
         if (trans_kernel_)
             delete trans_kernel_;
@@ -328,53 +354,37 @@ struct jit_avx512_common_convolution_bwd_weights_t: public cpu_primitive_t {
         if (acc_ker_)
             delete acc_ker_;
         delete reducer_bias_;
-        free(padded_bias_);
-
-        free(tr_src_);
-        free(ws_reduction_);
-
-        free(tr_src_bctx_);
-        free(tr_diff_dst_bctx_);
-
-        free(tr_diff_dst_);
     }
 
     typedef typename prec_traits<src_type>::type src_data_t;
     typedef typename prec_traits<diff_dst_type>::type diff_dst_data_t;
     typedef typename prec_traits<diff_weights_type>::type diff_weights_data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward_weights();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward_weights();
-    void balance();
-
+    void execute_backward_weights() const;
+    void prepare_scratchpad_data() const;
     struct thread_info_t;
-    void compute_diff_weights(const thread_info_t *);
-    void compute_diff_weights_3d(const thread_info_t *);
-    void reduce_diff_weights(const thread_info_t *);
-    void reduce_diff_weights_3d(const thread_info_t *);
-    void compute_diff_bias(const thread_info_t *);
-    void compute_diff_bias_3d(const thread_info_t *);
+    void compute_diff_weights(const thread_info_t *) const;
+    void compute_diff_weights_3d(const thread_info_t *) const;
+    void reduce_diff_weights(const thread_info_t *) const;
+    void reduce_diff_weights_3d(const thread_info_t *) const;
+    void compute_diff_bias(const thread_info_t *) const;
+    void compute_diff_bias_3d(const thread_info_t *) const;
 
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
+    int nthr_, nthr_mb_, nthr_g_, nthr_oc_b_, nthr_ic_b_;
 
     jit_avx512_common_conv_bwd_weights_kernel_f32 *kernel_;
     jit_trans_src_t *trans_kernel_;
     jit_trans_dst_t *trans_dst_kernel_;
     cpu_accumulator_1d_t<diff_weights_type> *acc_ker_;
     cpu_reducer_t<diff_weights_type> *reducer_bias_;
-    diff_weights_data_t *padded_bias_;
-
-    src_data_t *tr_src_;
-    diff_dst_data_t *tr_diff_dst_;
-    diff_weights_data_t *ws_reduction_;
-
-    int nthr_, nthr_mb_, nthr_g_, nthr_oc_b_, nthr_ic_b_;
-    simple_barrier::ctx_t *tr_src_bctx_, *tr_diff_dst_bctx_, reduction_bctx_;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution_winograd.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution_winograd.cpp
index 93db55ead..eb45ba9d0 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution_winograd.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution_winograd.cpp
@@ -37,6 +37,8 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
+using namespace memory_tracking::names;
+
 namespace {
 
 unsigned int LLC_cache_size = get_cache_size(3, false);
@@ -511,80 +513,6 @@ void input_transform_data(int image, const jit_conv_winograd_conf_t &jcp,
 }
 
 template <bool is_fwd>
-void input_transform_tileblock_data(int tile_block,
-        const jit_conv_winograd_conf_t &jcp,
-        float *inp, float *tinp)
-{
-    const int inph = is_fwd ? jcp.ih : jcp.oh;
-    const int inpw = is_fwd ? jcp.iw : jcp.ow;
-    const int t_pad = is_fwd ? jcp.t_pad : jcp.ih + jcp.t_pad - jcp.oh;
-    const int l_pad = is_fwd ? jcp.l_pad : jcp.iw + jcp.r_pad - jcp.ow;
-    const int wp_max = inpw + l_pad;
-    const int hp_max = inph + t_pad;
-    float Iw[alpha][alpha][simd_w];
-    float I[alpha][alpha][simd_w];
-
-    array_offset_calculator<float, 5> input(inp,
-            jcp.mb, jcp.dimK/simd_w, inph, inpw, simd_w);
-    array_offset_calculator<float, 7> output(tinp,
-            alpha, alpha,
-            jcp.dimN_block, jcp.dimK_nb_block, jcp.dimK_block,
-            jcp.dimN_reg_block, jcp.dimK_reg_block);
-
-    int tile_index = tile_block * jcp.nb_tile_block_ur * jcp.tile_block_ur;
-
-    for (int nb_tile_block_ur = 0;
-            nb_tile_block_ur < jcp.nb_tile_block_ur;
-            nb_tile_block_ur++) {
-        for (int tile_block_ur = 0; tile_block_ur < jcp.tile_block_ur;
-                tile_block_ur++) {
-
-            int img = tile_index / (jcp.jtiles * jcp.itiles);
-            int ti = tile_index % jcp.itiles;
-            int tj = (tile_index / jcp.itiles) % jcp.jtiles;
-            float *pinp_b = &(input(img, 0, 0, 0, 0));
-
-            for (int j = 0; j < alpha; j++) {
-                int ydim = tj * tile_size + j;
-                if ((t_pad <= ydim) && (ydim < hp_max)) {
-                    float *pinp_j = pinp_b + (ydim - t_pad) * inpw * simd_w;
-                    for (int i = 0; i < alpha; i++) {
-                        int xdim = ti * tile_size + i;
-                        if ((l_pad <= xdim) && (xdim < wp_max)) {
-                            float *pinp_i = pinp_j + (xdim - l_pad) * simd_w;
-                            load_ps(I[j][i], pinp_i);
-                        } else {
-                            PRAGMA_OMP_SIMD()
-                            for (int v = 0; v < simd_w; v++) {
-                                I[j][i][v] = 0.0f;
-                            }
-                        }
-                    }
-                } else {
-                    for (int i = 0; i < alpha; i++) {
-                        PRAGMA_OMP_SIMD()
-                        for (int v = 0; v < simd_w; v++) {
-                            I[j][i][v] = 0.0f;
-                        }
-                    }
-                }
-            }
-
-            trans_I_4x4_3x3(Iw, I);
-            for (int j = 0; j < alpha; j++) {
-                for (int i = 0; i < alpha; i++) {
-                    store_output(&(output(j, i,
-                                    nb_tile_block_ur, 0, 0,
-                                    tile_block_ur, 0)),
-                                 Iw[j][i], false);
-                }
-            }
-            tile_index++;
-        }
-    }
-}
-
-template <bool is_fwd>
 void weight_transform_data(const jit_conv_winograd_conf_t &jcp,
         float *wp, float *twp)
 {
@@ -691,7 +619,7 @@ void output_transform_data(int image, const jit_conv_winograd_conf_t &jcp,
                                     O[j][i][v] = true
                                         && with_relu_presum && O[j][i][v] < 0.f
                                                 ? O[j][i][v]
-                                                * jcp.eltwise_alpha
+                                                * jcp.eltwise.alpha
                                                 : O[j][i][v];
                                 }
                             }
@@ -717,83 +645,6 @@ void output_transform_data(int image, const jit_conv_winograd_conf_t &jcp,
     }
 }
 
-template <bool is_fwd, bool with_bias, bool with_relu_presum, bool with_sum>
-void output_transform_tileblock_data(int tile_block,
-        const jit_conv_winograd_conf_t &jcp, const post_ops_t &p_ops,
-        float *toutp, float *outp, float *bias, bool streamout) {
-    float Ow[alpha][alpha][simd_w];
-    float O[tile_size][tile_size][simd_w];
-    int outw = is_fwd ? jcp.ow : jcp.iw;
-    int outh = is_fwd ? jcp.oh : jcp.ih;
-
-    /* Prepare for PostOps */
-    bool with_relu_postsum = p_ops.find(primitive_kind::eltwise, 1) != -1;
-
-    array_offset_calculator<float, 6> input(toutp,
-            alpha, alpha,
-            jcp.dimN_block, jcp.dimM_block,
-            jcp.dimN_reg_block, jcp.dimM_simd_block);
-    array_offset_calculator<float, 5> output(outp,
-            jcp.mb, jcp.dimM/jcp.dimM_simd_block, outh, outw,
-            jcp.dimM_simd_block);
-
-    int tile_index = tile_block * jcp.nb_tile_block_ur * jcp.tile_block_ur;
-
-    for (int nb_tile_block_ur = 0;
-            nb_tile_block_ur < jcp.nb_tile_block_ur;
-            nb_tile_block_ur++) {
-
-        for (int tile_block_ur = 0; tile_block_ur < jcp.tile_block_ur;
-                tile_block_ur++) {
-            int img = tile_index / (jcp.jtiles * jcp.itiles);
-            int ti = tile_index % jcp.itiles;
-            int tj = (tile_index / jcp.itiles) % jcp.jtiles;
-
-            for (int j = 0; j < alpha; j++) {
-                for (int i = 0; i < alpha; i++) {
-                    float *pinp_tile = &(input(j, i, nb_tile_block_ur, 0,
-                                tile_block_ur, 0));
-                    load_ps(Ow[j][i], pinp_tile);
-                }
-            }
-
-            trans_O_4x4_3x3(Ow, O);
-
-            float *pout_b = &(output(img, 0, 0, 0, 0));
-            for (int j = 0; j < tile_size; j++) {
-                int ydim = tj * tile_size + j;
-                if (ydim < outh) {
-                    float *pout_j = pout_b + ydim * outw * simd_w;
-                    for (int i = 0; i < tile_size; i++) {
-                        int xdim = ti * tile_size + i;
-                        if (xdim < outw) {
-                            float *pout_i = pout_j + xdim * simd_w;
-                            if (is_fwd) {
-                                PRAGMA_OMP_SIMD()
-                                for (int v = 0; v < simd_w; v++) {
-                                    O[j][i][v] += with_bias ? bias[v] : 0.f;
-                                    O[j][i][v] = true
-                                        && with_relu_presum && O[j][i][v] < 0.f
-                                                ? O[j][i][v]
-                                                * jcp.eltwise_alpha
-                                                : O[j][i][v];
-
-                                }
-                            }
-                            if (with_sum)
-                                accum_output(pout_i, O[j][i], streamout,
-                                        with_relu_postsum);
-                            else
-                                store_output(pout_i, O[j][i], streamout);
-                        }
-                    }
-                }
-            }
-            tile_index++;
-        }
-    }
-}
-
 template <bool ver_4fma>
 void diff_src_transform_bwd_weights(int image, jit_conv_winograd_conf_t conv,
         float *inp, float *tinp, float *Iw_temp,
@@ -1049,7 +900,8 @@ void diff_weights_transform_bwd_weights(jit_conv_winograd_conf_t conv,
 
 template <bool is_fwd>
 void _jit_avx512_common_convolution_winograd_t<is_fwd>::_execute_data_W_S_G_D(
-        const int MB, float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr) {
+        const int MB, float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr,
+        const memory_tracking::grantor_t &scratchpad) const{
     const auto &jcp = kernel_->jcp;
     const auto &p_ops = attr_->post_ops_;
 
@@ -1058,7 +910,7 @@ void _jit_avx512_common_convolution_winograd_t<is_fwd>::_execute_data_W_S_G_D(
     const int outh = is_fwd ? jcp.oh : jcp.ih;
     const int outw = is_fwd ? jcp.ow : jcp.iw;
 
-    /* Note that jcp.with_relu is true for both fused conv+relu primitive
+    /* Note that jcp.with_eltwise is true for both fused conv+relu primitive
      * and conv primitive with PostOps with relu before sum
      * (PostOps relu after sum is handled later) */
     auto output_transform = jcp.with_bias
@@ -1094,24 +946,23 @@ void _jit_avx512_common_convolution_winograd_t<is_fwd>::_execute_data_W_S_G_D(
     array_offset_calculator<float, 2> bias(bias_ptr,
             jcp.dimM/jcp.dimM_simd_block, jcp.dimM_simd_block);
 
-    array_offset_calculator<float, 8> M(
-            (float *)((is_fwd
-                    ? (this->scratchpad_)->M_ptr()
-                    : (this->scratchpad_)->V_ptr())),
+    array_offset_calculator<float, 8> M(is_fwd
+            ? scratchpad.template get<float>(key_wino_M)
+            : scratchpad.template get<float>(key_wino_V),
             jcp.dimN_nb_block, jcp.dimM_nb_block,
             alpha, alpha,
             jcp.dimN_block, jcp.dimM_block,
             jcp.dimN_reg_block, jcp.dimM_simd_block);
-    array_offset_calculator<float, 8> U((float *)((this->scratchpad_)->U_ptr()),
+    array_offset_calculator<float, 8> U(
+            scratchpad.template get<float>(key_wino_U),
             jcp.dimM_nb_block,
             alpha, alpha,
             jcp.dimK_nb_block,
             jcp.dimM_block, jcp.dimK_block,
             jcp.dimK_reg_block, jcp.dimM_simd_block);
-    array_offset_calculator<float, 8> V(
-            (float *)((is_fwd
-                    ? (this->scratchpad_)->V_ptr()
-                    : (this->scratchpad_)->M_ptr())),
+    array_offset_calculator<float, 8> V(is_fwd
+            ? scratchpad.template get<float>(key_wino_V)
+            : scratchpad.template get<float>(key_wino_M),
             jcp.dimN_nb_block, alpha, alpha,
             jcp.dimN_block, jcp.dimK_nb_block,
             jcp.dimK_block, jcp.dimN_reg_block, jcp.dimK_reg_block);
@@ -1121,15 +972,15 @@ void _jit_avx512_common_convolution_winograd_t<is_fwd>::_execute_data_W_S_G_D(
 
     const bool output_is_aligned = ((size_t)out_ptr & (64 - 1)) == 0;
 
-    const bool want_padded_bias = jcp.with_bias
+    const bool wants_padded_bias = jcp.with_bias
         && jcp.oc_without_padding != jcp.oc;
     float last_slice_bias[simd_w] = {0};
-    if (want_padded_bias) {
+    if (wants_padded_bias) {
         for (int oc = 0; oc < jcp.oc_without_padding % jcp.oc_simd_block; ++oc)
             last_slice_bias[oc] = bias(jcp.dimM / jcp.dimM_simd_block - 1, oc);
     }
 
-#pragma omp parallel
+PRAGMA_OMP(parallel)
     {
         parallel_nd_in_omp(MB, jcp.dimK_nb_block, jcp.dimK_block,
             [&](int img, int K_blk1, int K_blk2) {
@@ -1148,7 +999,7 @@ void _jit_avx512_common_convolution_winograd_t<is_fwd>::_execute_data_W_S_G_D(
                 ifm1 * jcp.ic_block + ifm2, 0, 0, 0, 0)), U_base_ptr);
         });
 
-#pragma omp barrier
+PRAGMA_OMP(barrier)
 
         parallel_nd_in_omp(jcp.dimN_nb_block, alpha, alpha, jcp.dimM_nb_block, jcp.dimN_block,
             [&](int N_blk1, int oj, int oi, int M_blk1, int N_blk2) {
@@ -1174,14 +1025,14 @@ void _jit_avx512_common_convolution_winograd_t<is_fwd>::_execute_data_W_S_G_D(
         });
 
 
-#pragma omp barrier
+PRAGMA_OMP(barrier)
 
         parallel_nd_in_omp(MB, jcp.dimM_nb_block, jcp.dimM_block,
                     [&](int img, int M_blk1, int M_blk2) {
 
             const int M_blk = M_blk1 * jcp.dimM_block + M_blk2;
 
-            float *bias_ptr = want_padded_bias
+            float *bias_ptr = wants_padded_bias
                 && M_blk == jcp.dimM / jcp.dimM_simd_block - 1
                 ? last_slice_bias : &bias(M_blk, 0);
 
@@ -1194,180 +1045,25 @@ void _jit_avx512_common_convolution_winograd_t<is_fwd>::_execute_data_W_S_G_D(
     }
 }
 
-template void
-_jit_avx512_common_convolution_winograd_t<true>::_execute_data_W_S_G_D(
-        const int, float *, float *, float *, float *);
-template void
-_jit_avx512_common_convolution_winograd_t<false>::_execute_data_W_S_G_D(
-        const int, float *, float *, float *, float *);
-
-template <bool is_fwd>
-void _jit_avx512_common_convolution_winograd_t<is_fwd>::_execute_data_W_SGD(
-        const int MB, float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr) {
-    const auto &jcp = kernel_->jcp;
-    const auto &p_ops = attr_->post_ops_;
-
-    const int inph = is_fwd ? jcp.ih : jcp.oh;
-    const int inpw = is_fwd ? jcp.iw : jcp.ow;
-    const int outh = is_fwd ? jcp.oh : jcp.ih;
-    const int outw = is_fwd ? jcp.ow : jcp.iw;
-
-    /* Note that jcp.with_relu is true for both fused conv+relu primitive
-     * and conv primitive with PostOps with relu before sum
-     * (PostOps relu after sum is handled later) */
-    auto output_transform_tileblock = jcp.with_bias
-            ? (jcp.with_eltwise
-                ? (jcp.with_sum
-                    ? output_transform_tileblock_data<is_fwd, true, true, true>
-                    : output_transform_tileblock_data<is_fwd, true, true, false>)
-                : (jcp.with_sum
-                    ? output_transform_tileblock_data<is_fwd, true, false, true>
-                    : output_transform_tileblock_data<is_fwd, true, false, false>))
-            : (jcp.with_eltwise
-                ? (jcp.with_sum
-                    ? output_transform_tileblock_data<is_fwd, false, true, true>
-                    : output_transform_tileblock_data<is_fwd, false, true, false>)
-                : (jcp.with_sum
-                    ? output_transform_tileblock_data<is_fwd, false, false, true>
-                    : output_transform_tileblock_data<is_fwd, false, false, false>));
-
-    array_offset_calculator<float, 5> input(inp_ptr,
-            MB, jcp.dimK/jcp.dimK_reg_block, inph, inpw, jcp.dimK_reg_block);
-    array_offset_calculator<float, 5> output(out_ptr,
-            MB, jcp.dimM/jcp.dimM_simd_block, outh, outw, jcp.dimM_simd_block);
-    array_offset_calculator<float, 6> weights(wei_ptr,
-            jcp.oc/jcp.oc_simd_block, jcp.ic/jcp.ic_simd_block, jcp.kh, jcp.kw,
-            jcp.ic_simd_block, jcp.oc_simd_block);
-    array_offset_calculator<float, 2> bias(bias_ptr,
-            jcp.oc/jcp.oc_simd_block, jcp.oc_simd_block);
-
-    array_offset_calculator<float, 8> U((float *)((this->scratchpad_)->U_ptr()),
-            jcp.dimM_nb_block,
-            alpha, alpha,
-            jcp.dimK_nb_block,
-            jcp.dimM_block, jcp.dimK_block,
-            jcp.dimK_reg_block, jcp.dimM_simd_block);
-
-    array_offset_calculator<float, 8> M(
-            (float *)((is_fwd
-                    ? (this->scratchpad_)->M_ptr()
-                    : (this->scratchpad_)->V_ptr())),
-            0, jcp.dimM_nb_block, alpha, alpha,
-            jcp.dimN_block, jcp.dimM_block,
-            jcp.dimN_reg_block, jcp.dimM_simd_block);
-
-    array_offset_calculator<float, 8> V(
-            (float *)((is_fwd
-                    ? (this->scratchpad_)->V_ptr()
-                    : (this->scratchpad_)->M_ptr())),
-            0, alpha, alpha, jcp.dimN_block,
-            jcp.dimK_nb_block, jcp.dimK_block,
-            jcp.dimN_reg_block, jcp.dimK_reg_block);
-
-    const bool output_is_aligned = ((size_t)out_ptr & (64 - 1)) == 0;
-
-    const bool want_padded_bias = jcp.with_bias
-        && jcp.oc_without_padding != jcp.oc;
-    float last_slice_bias[simd_w] = {0};
-    if (want_padded_bias) {
-        for (int oc = 0; oc < jcp.oc_without_padding % jcp.oc_simd_block; ++oc)
-            last_slice_bias[oc] = bias(jcp.dimM / jcp.dimM_simd_block - 1, oc);
-    }
-
-#pragma omp parallel
-    {
-        parallel_nd_in_omp(jcp.nb_oc, jcp.nb_ic, jcp.oc_block, jcp.ic_block,
-            [&](int ofm1, int ifm1, int ofm2, int ifm2) {
-
-            float *U_base_ptr = is_fwd
-                              ? &(U(ofm1, 0, 0, ifm1, ofm2, ifm2, 0, 0))
-                              : &(U(ifm1, 0, 0, ofm1, ifm2, ofm2, 0, 0));
-            weight_transform_data<is_fwd>(jcp,
-                    &(weights(ofm1 * jcp.oc_block + ofm2,
-                            ifm1 * jcp.ic_block + ifm2,
-                            0, 0, 0, 0)),
-                    U_base_ptr);
-        });
-
-#pragma omp barrier
-
-    int ithr = mkldnn_get_thread_num();
-
-#pragma omp for schedule(static)
-    for (int tile_block = 0; tile_block < jcp.tile_block; tile_block++) {
-        for (int K_blk1 = 0; K_blk1 < jcp.dimK_nb_block; K_blk1++) {
-            for (int K_blk2 = 0; K_blk2 < jcp.dimK_block; K_blk2++) {
-                input_transform_tileblock_data<is_fwd>(
-                        tile_block, jcp,
-                        &(input(0, K_blk1 * jcp.dimK_block + K_blk2, 0, 0, 0)),
-                        &(V(ithr, 0, 0, 0, K_blk1, K_blk2, 0, 0)));
-            }
-        }
-
-        for (int oj = 0; oj < alpha; oj++) {
-            for (int oi = 0; oi < alpha; oi++) {
-                for (int M_blk1 = 0; M_blk1 < jcp.dimM_nb_block; M_blk1++) {
-                    for (int N_blk = 0; N_blk < jcp.dimN_block; N_blk++) {
-                        kernel_->gemm_loop_ker_first_iter(
-                                (float *)&(M(ithr, M_blk1, oj, oi,
-                                        N_blk, 0, 0, 0)),
-                                (const float *)&(U(M_blk1, oj, oi, 0,
-                                        0, 0, 0, 0)),
-                                (const float *)&(V(ithr, oj, oi,
-                                        N_blk, 0, 0, 0, 0)));
-                        for (int K_blk1 = 1; K_blk1 < jcp.dimK_nb_block; K_blk1++) {
-                            kernel_->gemm_loop_ker(
-                                    (float *)&(M(ithr, M_blk1, oj, oi,
-                                            N_blk, 0, 0, 0)),
-                                    (const float *)&(U(M_blk1, oj, oi, K_blk1,
-                                            0, 0, 0, 0)),
-                                    (const float *)&(V(ithr, oj, oi,
-                                            N_blk, K_blk1, 0, 0, 0)));
-                        }
-                    }
-                }
-            }
-        }
-
-        for (int M_blk1 = 0; M_blk1 < jcp.dimM_nb_block; M_blk1++) {
-            for (int M_blk2 = 0; M_blk2 < jcp.dimM_block; M_blk2++) {
-                const int M_blk = M_blk1 * jcp.dimM_block + M_blk2;
-
-                float *bias_ptr = want_padded_bias
-                    && M_blk == jcp.dimM / jcp.dimM_simd_block - 1
-                    ? last_slice_bias : &bias(M_blk, 0);
-
-                output_transform_tileblock(tile_block, jcp, p_ops,
-                        &(M(ithr, M_blk1, 0, 0, 0, M_blk2, 0, 0)),
-                        &(output(0, M_blk, 0, 0, 0)),
-                        bias_ptr, output_is_aligned);
-            }
-        }
-    }
-    }
-}
-
-template void
-_jit_avx512_common_convolution_winograd_t<true>::_execute_data_W_SGD(
-        const int, float *, float *, float *, float *);
-template void
-_jit_avx512_common_convolution_winograd_t<false>::_execute_data_W_SGD(
-        const int, float *, float *, float *, float *);
+template struct _jit_avx512_common_convolution_winograd_t<true>;
+template struct _jit_avx512_common_convolution_winograd_t<false>;
 
 void jit_avx512_common_convolution_winograd_bwd_weights_t::
-_maybe_execute_diff_bias_copy() {
-    if (conf_.want_padded_bias()) {
+_maybe_execute_diff_bias_copy(
+        const memory_tracking::grantor_t &scratchpad) const {
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad.get<float>(key_conv_padded_bias);
         float *diff_bias = (float *)this->memory(1);
-        for (int oc = 0; oc < conf_.jcp_.oc_without_padding; ++oc)
-            diff_bias[oc] = this->padded_bias_[oc];
+        for (int oc = 0; oc < pd()->jcp_.oc_without_padding; ++oc)
+            diff_bias[oc] = padded_bias[oc];
     }
 }
 
 void jit_avx512_common_convolution_winograd_bwd_weights_t::
-_execute_backward_weights_S_D_G_W()
-{
+_execute_backward_weights_S_D_G_W(
+        const memory_tracking::grantor_t &scratchpad) const {
     const auto &jcp = kernel_->jcp;
-    const int nthreads = scratchpad_->num_threads();
+    const int nthreads = jcp.nthr;
 
     auto diff_src_transform_bwd_weights_ver = jcp.ver == ver_4fma ?
             diff_src_transform_bwd_weights<true> :
@@ -1382,25 +1078,25 @@ _execute_backward_weights_S_D_G_W()
             jcp.mb, jcp.oc/simd_w, jcp.oh, jcp.ow, simd_w);
     array_offset_calculator<float, 6> diff_weights((float *)this->memory(0),
             jcp.oc/simd_w, jcp.ic/simd_w, jcp.kh, jcp.kw, simd_w, simd_w);
-    array_offset_calculator<float, 2> diff_bias(
-            conf_.want_padded_bias() ? padded_bias_ : (float *)this->memory(1),
-            jcp.oc/simd_w, simd_w);
+    array_offset_calculator<float, 2> diff_bias(pd()->wants_padded_bias()
+            ? scratchpad.get<float>(key_conv_padded_bias)
+            : (float *)this->memory(1), jcp.oc/simd_w, simd_w);
 
     array_offset_calculator<float, 8> U(
-            (float *)(scratchpad_->U_ptr()),
+            scratchpad.get<float>(key_wino_U),
             jcp.nb_ic, jcp.nb_oc,
             alpha, alpha,
             jcp.oc_block, jcp.ic_block,
             jcp.ic_simd_block, jcp.oc_simd_block);
 
     array_offset_calculator<float, 8> M(
-            (float *)(scratchpad_->M_ptr()),
+            scratchpad.get<float>(key_wino_M),
             jcp.nb_oc, alpha, alpha,
             jcp.tile_block, jcp.oc_block,
             jcp.nb_tile_block_ur, jcp.tile_block_ur * jcp.tile_4fma,
             jcp.oc_simd_block);
     array_offset_calculator<float, 8> V(
-            (float *)(scratchpad_->V_ptr()),
+            scratchpad.get<float>(key_wino_V),
             jcp.nb_ic, alpha, alpha,
             jcp.tile_block, jcp.ic_block,
             jcp.nb_tile_block_ur, jcp.tile_block_ur,
@@ -1409,23 +1105,23 @@ _execute_backward_weights_S_D_G_W()
     const int trans_buffer_size = alpha * alpha * jcp.tile_4fma
                                 * jcp.ic_simd_block;
     array_offset_calculator<float, 2> trans_buffer(
-            (float *)(scratchpad_->src_transpose_ptr()),
+            scratchpad.get<float>(key_conv_tr_src),
             nthreads,
             trans_buffer_size);
 
     array_offset_calculator<float, 2> diff_bias_prv(
-            (float *)(scratchpad_->bias_ptr()),
-            mkldnn_get_max_threads(),
+            scratchpad.get<float>(key_conv_bia_reduction),
+            nthreads,
             jcp.oc);
 
-#pragma omp parallel num_threads(nthreads)
+PRAGMA_OMP(parallel num_threads(nthreads))
     {
         if (jcp.with_bias) {
             parallel_nd_in_omp(nthreads, jcp.oc, [&](int ithr, int ofm) {
                 diff_bias_prv(ithr, ofm) = 0.0f;
             });
 
-#pragma omp for nowait
+PRAGMA_OMP(for nowait)
             for (int bofm = 0; bofm < jcp.oc / simd_w; bofm++) {
                 PRAGMA_OMP_SIMD()
                 for (int v = 0; v < simd_w; v++)
@@ -1461,7 +1157,7 @@ _execute_backward_weights_S_D_G_W()
                     dbias);
         });
 
-#pragma omp barrier
+PRAGMA_OMP(barrier)
 
         for (int ifm1 = 0; ifm1 < jcp.nb_ic; ifm1++) {
             parallel_nd_in_omp(alpha, alpha, jcp.nb_oc,
@@ -1486,7 +1182,7 @@ _execute_backward_weights_S_D_G_W()
             });
         }
 
-#pragma omp barrier
+PRAGMA_OMP(barrier)
 
         parallel_nd_in_omp(jcp.nb_ic, jcp.nb_oc, jcp.oc_block, jcp.ic_block,
             [&](int ifm1, int ofm1, int ofm2, int ifm2) {
@@ -1497,7 +1193,7 @@ _execute_backward_weights_S_D_G_W()
         });
 
         if (jcp.with_bias) {
-#pragma omp for
+PRAGMA_OMP(for)
             for (int ofm1 = 0; ofm1 < jcp.oc / simd_w; ofm1++) {
                 for (int ithr = 0; ithr < nthreads; ithr++) {
                     float* base_bias_ptr = &(diff_bias(ofm1, 0));
@@ -1512,806 +1208,9 @@ _execute_backward_weights_S_D_G_W()
         }
     }
 
-    _maybe_execute_diff_bias_copy();
-}
-
-namespace {
-
-const int max_threads_number = 1024;
-
-template <bool ver_4fma>
-void diff_src_transform_bwd_weights_tile(int tile_block,
-    jit_conv_winograd_conf_t conv, float *inp, float *tinp,
-    void(*transpose_4fma_ker)(float *, float *))
-{
-    const int ifwp = conv.iw + conv.l_pad;
-    const int ifhp = conv.ih + conv.t_pad;
-    float I[alpha][alpha][simd_w];
-    float Iw[alpha][alpha][simd_w];
-
-    float *Iw_buffer = nullptr;
-    if (ver_4fma) {
-        Iw_buffer = (float *)malloc(alpha * alpha * conv.tile_4fma
-            * simd_w * sizeof(float), 64);
-    }
-    array_offset_calculator<float, 4> Iw_scratchpad(Iw_buffer,
-        alpha, alpha, conv.tile_4fma, simd_w);
-    array_offset_calculator<float, 5> input(inp,
-        conv.mb, conv.ic / simd_w, conv.ih, conv.iw, simd_w);
-    array_offset_calculator<float, 7> output(tinp,
-        0, alpha, alpha,
-        conv.ic_block,
-        conv.nb_tile_block_ur, conv.tile_block_ur,
-        conv.ic_simd_block * conv.tile_4fma);
-
-    int tile_4fma = 0;
-
-    int n_tiles = tile_block * conv.nb_tile_block_ur * conv.tile_block_ur;
-    for (int nb_tile_block_ur = 0; nb_tile_block_ur < conv.nb_tile_block_ur;
-        nb_tile_block_ur++) {
-        for (int tile_block_ur = 0; tile_block_ur < conv.tile_block_ur;
-            tile_block_ur++) {
-
-            int img = n_tiles / (conv.jtiles * conv.itiles);
-            int no_tile = n_tiles % (conv.jtiles * conv.itiles);
-            int ti = no_tile % conv.itiles;
-            int tj = no_tile / conv.itiles;
-
-            for (int j = 0; j < alpha; j++) {
-                int ydim = tj * tile_size + j;
-                if ((conv.t_pad <= ydim) && ydim < ifhp) {
-                    for (int i = 0; i < alpha; i++) {
-                        int xdim = ti * tile_size + i;
-                        if ((conv.l_pad <= xdim) && xdim < ifwp) {
-                            PRAGMA_OMP_SIMD()
-                            for (int v = 0; v < simd_w; v++) {
-                                I[j][i][v] = input(img, 0,
-                                    ydim - conv.t_pad,
-                                    xdim - conv.l_pad, v);
-                            }
-                        }
-                        else {
-                            PRAGMA_OMP_SIMD()
-                            for (int v = 0; v < simd_w; v++) {
-                                I[j][i][v] = 0.0f;
-                            }
-                        }
-                    }
-                }
-                else {
-                    for (int i = 0; i < alpha; i++) {
-                        PRAGMA_OMP_SIMD()
-                        for (int v = 0; v < simd_w; v++) {
-                            I[j][i][v] = 0.0f;
-                        }
-                    }
-                }
-            }
-
-            trans_I_4x4_3x3(Iw, I);
-
-            if (ver_4fma) {
-                for (int j = 0; j < alpha; j++) {
-                    for (int i = 0; i < alpha; i++) {
-                        PRAGMA_OMP_SIMD()
-                        for (int v = 0; v < simd_w; v++) {
-                            Iw_scratchpad(j, i, tile_4fma, v) = Iw[j][i][v];
-                        }
-                    }
-                }
-                tile_4fma++;
-                if (tile_4fma == conv.tile_4fma) {
-                    float *outp = &(output(0, 0, 0, 0,
-                        nb_tile_block_ur, tile_block_ur, 0));
-                    transpose_4fma_ker(outp, (float *)Iw_buffer);
-                    tile_4fma = 0;
-                }
-            }
-            else {
-                for (int j = 0; j < alpha; j++) {
-                    for (int i = 0; i < alpha; i++) {
-                        store_output(
-                            &(output(0, j, i, 0,
-                                nb_tile_block_ur, tile_block_ur, 0)),
-                            Iw[j][i], false);
-
-                    }
-                }
-            }
-            n_tiles++;
-        }
-    }
-}
-
-template <bool with_bias>
-void diff_dst_transform_bwd_weights_tile(int tile_block,
-    jit_conv_winograd_conf_t conv, float *inp, float *tinp, float *dbias)
-{
-    float I[alpha][alpha][simd_w];
-    float Iw[alpha][alpha][simd_w];
-
-    array_offset_calculator<float, 5> input(inp,
-        conv.mb, conv.oc / simd_w, conv.oh, conv.ow, conv.oc_simd_block);
-    array_offset_calculator<float, 7> output(tinp,
-        conv.nb_oc, alpha, alpha,
-        conv.oc_block,
-        conv.nb_tile_block_ur,
-        conv.tile_block_ur * conv.tile_4fma, conv.oc_simd_block);
-
-    int n_tiles = tile_block * conv.nb_tile_block_ur * conv.tile_block_ur;
-    for (int nb_tile_block_ur = 0; nb_tile_block_ur < conv.nb_tile_block_ur;
-        nb_tile_block_ur++) {
-        for (int tile_block_ur = 0; tile_block_ur < conv.tile_block_ur;
-            tile_block_ur++) {
-
-            int img = n_tiles / (conv.jtiles * conv.itiles);
-            int no_tile = n_tiles % (conv.jtiles * conv.itiles);
-            int ti = no_tile % conv.itiles;
-            int tj = no_tile / conv.itiles;
-
-            for (int j = 0; j < alpha; j++) {
-                int ydim = tj * tile_size + j;
-                if (ydim < conv.oh) {
-                    for (int i = 0; i < alpha; i++) {
-                        int xdim = ti * tile_size + i;
-                        if (xdim < conv.ow) {
-                            float *input_base = &input(img, 0, ydim, xdim, 0);
-
-                            PRAGMA_OMP_SIMD()
-                            for (int v = 0; v < simd_w; v++) {
-                                I[j][i][v] = input_base[v];
-                            }
-                            if (with_bias && j < tile_size && i < tile_size) {
-                                PRAGMA_OMP_SIMD()
-                                for (int v = 0; v < simd_w; v++) {
-                                    dbias[v] += input_base[v];
-                                }
-                            }
-                        }
-                        else {
-                            PRAGMA_OMP_SIMD()
-                            for (int v = 0; v < simd_w; v++) {
-                                I[j][i][v] = 0.0f;
-                            }
-                        }
-                    }
-                }
-                else {
-                    for (int i = 0; i < alpha; i++) {
-                        PRAGMA_OMP_SIMD()
-                        for (int v = 0; v < simd_w; v++) {
-                            I[j][i][v] = 0.0f;
-                        }
-                    }
-                }
-            }
-
-            trans_W_3x3_4x4_wu(Iw, I);
-
-            for (int j = 0; j < alpha; j++) {
-                for (int i = 0; i < alpha; i++) {
-                    /*TODO: Try instrinsic for casting into __m512*/
-                    store_output(&(output(0, j, i, 0,
-                        nb_tile_block_ur, tile_block_ur, 0)),
-                        Iw[j][i], false);
-                }
-            }
-            n_tiles++;
-        }
-    }
-}
-
-// Sum to the first buffer array
-void array_sum(int num_arrs, float *output,
-    size_t nelems, float *input_ptrs[], bool reduce_to_first = true)
-{
-    const size_t block_size = 16 * 1024 / sizeof(float);
-    const size_t blocks_number = nelems / block_size;
-    const size_t tail = nelems % block_size;
-
-#pragma omp parallel
-    {
-        const int ithr = mkldnn_get_thread_num();
-        const int nthr = mkldnn_get_num_threads();
-        size_t start{ 0 }, end{ 0 };
-        balance211(blocks_number, nthr, ithr, start, end);
-
-        for (size_t nb = start; nb < end; ++nb) {
-            size_t start_e = nb * block_size;
-            size_t end_e = start_e + block_size;
-            if (!reduce_to_first) {
-                PRAGMA_OMP_SIMD()
-                for (size_t e = start_e; e < end_e; e++) {
-                    output[e] = input_ptrs[0][e];
-                }
-            }
-            for (int a = 1; a < num_arrs; a++) {
-                PRAGMA_OMP_SIMD()
-                for (size_t e = start_e; e < end_e; e++) {
-                    output[e] += input_ptrs[a][e];
-                }
-            }
-        }
-
-        if (tail != 0 && ithr == nthr - 1) {
-            size_t start_e = nelems - tail;
-            size_t end_e = nelems;
-            if (!reduce_to_first) {
-                PRAGMA_OMP_SIMD()
-                for (size_t e = start_e; e < end_e; e++) {
-                    output[e] = input_ptrs[0][e];
-                }
-            }
-            for (int a = 1; a < num_arrs; a++) {
-                PRAGMA_OMP_SIMD()
-                for (size_t e = start_e; e < end_e; e++) {
-                    output[e] += input_ptrs[a][e];
-                }
-            }
-        }
-    }
+    _maybe_execute_diff_bias_copy(scratchpad);
 }
 
-void subarray_sum(int num_arrs, float *output, size_t nelems,
-        float *input_ptrs[], size_t input_starts[], size_t input_ends[])
-{
-    using namespace nstl;
-    const size_t block_size = 16 * 1024 / sizeof(float);
-    const size_t blocks_number = nelems / block_size;
-    const size_t tail = nelems % block_size;
-
-#pragma omp parallel
-    {
-        const int ithr = mkldnn_get_thread_num();
-        const int nthr = mkldnn_get_num_threads();
-        size_t start{ 0 }, end{ 0 };
-        balance211(blocks_number, nthr, ithr, start, end);
-
-        for (size_t nb = start; nb < end; ++nb) {
-            size_t start_e = nb * block_size;
-            size_t end_e = start_e + block_size;
-            size_t input_start = max(start_e, min(input_starts[0], end_e));
-            size_t input_end = max(start_e, min(input_ends[0], end_e));
-
-            PRAGMA_OMP_SIMD()
-            for (size_t e = start_e; e < input_start; e++) {
-                output[e] = 0.f;
-            }
-
-            PRAGMA_OMP_SIMD()
-            for (size_t e = input_start; e < input_end; e++) {
-                output[e] = input_ptrs[0][e];
-            }
-
-            PRAGMA_OMP_SIMD()
-            for (size_t e = input_end; e < end_e; e++) {
-                output[e] = 0.f;
-            }
-            for (int a = 1; a < num_arrs; a++) {
-                input_start = max(start_e, input_starts[a]);
-                input_end = min(input_ends[a], end_e);
-
-                PRAGMA_OMP_SIMD()
-                for (size_t e = input_start; e < input_end; e++) {
-                    output[e] += input_ptrs[a][e];
-                }
-            }
-        }
-
-        if (tail != 0 && ithr == nthr - 1) {
-            size_t start_e = nelems - tail;
-            size_t end_e = nelems;
-            size_t input_start = max(start_e, min(input_starts[0], end_e));
-            size_t input_end = max(start_e, min(input_ends[0], end_e));
-
-            PRAGMA_OMP_SIMD()
-            for (size_t e = start_e; e < input_start; e++) {
-                output[e] = 0.f;
-            }
-
-            PRAGMA_OMP_SIMD()
-            for (size_t e = input_start; e < input_end; e++) {
-                output[e] = input_ptrs[0][e];
-            }
-
-            PRAGMA_OMP_SIMD()
-            for (size_t e = input_end; e < end_e; e++) {
-                output[e] = 0.f;
-            }
-            for (int a = 1; a < num_arrs; a++) {
-                input_start = max(start_e, input_starts[a]);
-                input_end = min(input_ends[a], end_e);
-
-                PRAGMA_OMP_SIMD()
-                for (size_t e = start_e; e < end_e; e++) {
-                    output[e] += input_ptrs[a][e];
-                }
-            }
-        }
-    }
-}
-} // namespace
-
-void jit_avx512_common_convolution_winograd_bwd_weights_t::
-_execute_backward_weights_S_D_Giot_W()
-{
-    const auto &jcp = kernel_->jcp;
-    const int nthreads = scratchpad_->num_threads();
-    int U_size = jcp.oc * jcp.ic * alpha * alpha * sizeof(float);
-
-    auto diff_src_transform_bwd_weights_ver = jcp.ver == ver_4fma ?
-            diff_src_transform_bwd_weights<true> :
-            diff_src_transform_bwd_weights<false>;
-    auto diff_dst_transform_bwd_weights_ver = jcp.with_bias
-                                        ? diff_dst_transform_bwd_weights<true>
-                                        : diff_dst_transform_bwd_weights<false>;
-
-    array_offset_calculator<float, 5> diff_src((float *)this->input_memory(0),
-            jcp.mb, jcp.ic / simd_w, jcp.ih, jcp.iw, simd_w);
-    array_offset_calculator<float, 5> diff_dst((float *)this->input_memory(1),
-            jcp.mb, jcp.oc / simd_w, jcp.oh, jcp.ow, simd_w);
-    array_offset_calculator<float, 6> diff_weights((float *)this->memory(0),
-            jcp.oc / simd_w, jcp.ic / simd_w, jcp.kh, jcp.kw, simd_w, simd_w);
-    array_offset_calculator<float, 2> diff_bias(
-            conf_.want_padded_bias() ? padded_bias_ : (float *)this->memory(1),
-            jcp.oc / simd_w, simd_w);
-
-    array_offset_calculator<float, 8> U((float *)(scratchpad_->U_ptr()),
-            jcp.nb_ic, jcp.nb_oc,
-            alpha, alpha,
-            jcp.oc_block, jcp.ic_block,
-            jcp.ic_simd_block, jcp.oc_simd_block);
-
-    array_offset_calculator<float, 9> Us(
-            (float *)(scratchpad_->U_ptr() + U_size),
-            0, jcp.nb_ic, jcp.nb_oc,
-            alpha, alpha,
-            jcp.oc_block, jcp.ic_block,
-            jcp.ic_simd_block, jcp.oc_simd_block);
-
-    array_offset_calculator<float, 8> M((float *)(scratchpad_->M_ptr()),
-            jcp.nb_oc, alpha, alpha,
-            jcp.tile_block, jcp.oc_block,
-            jcp.nb_tile_block_ur, jcp.tile_block_ur * jcp.tile_4fma,
-            jcp.oc_simd_block);
-
-    array_offset_calculator<float, 8> V((float *)(scratchpad_->V_ptr()),
-            jcp.nb_ic, alpha, alpha,
-            jcp.tile_block, jcp.ic_block,
-            jcp.nb_tile_block_ur, jcp.tile_block_ur,
-            jcp.ic_simd_block * jcp.tile_4fma);
-
-    const int trans_buffer_size = alpha * alpha * jcp.tile_4fma
-        * jcp.ic_simd_block;
-    array_offset_calculator<float, 2> trans_buffer(
-        (float *)(scratchpad_->src_transpose_ptr()),
-        nthreads,
-        trans_buffer_size);
-
-    array_offset_calculator<float, 2> diff_bias_prv(
-            (float *)(scratchpad_->bias_ptr()), nthreads, jcp.oc);
-
-#pragma omp parallel
-    {
-        if (jcp.with_bias) {
-            parallel_nd_in_omp(nthreads, jcp.oc, [&](int ithr, int ofm) {
-                diff_bias_prv(ithr, ofm) = 0.0f;
-            });
-#pragma omp for nowait
-            for (int bofm = 0; bofm < jcp.oc / simd_w; bofm++) {
-                PRAGMA_OMP_SIMD()
-                for (int v = 0; v < simd_w; v++)
-                    diff_bias(bofm, v) = 0.0f;
-            }
-        }
-    }
-
-#pragma omp parallel
-    {
-        const int ithread = mkldnn_get_thread_num();
-        parallel_nd_in_omp(jcp.mb, jcp.nb_ic, jcp.ic_block,
-            [&](int img, int ifm1, int ifm2) {
-                float *transb = jcp.ver == ver_4fma
-                    ? &(trans_buffer(ithread, 0))
-                    : NULL;
-                diff_src_transform_bwd_weights_ver(img, jcp,
-                    &(diff_src(img, ifm1 * jcp.ic_block + ifm2,
-                            0, 0, 0)),
-                    &(V(ifm1, 0, 0, 0, ifm2, 0, 0, 0)),
-                    transb,
-                    kernel_->transpose_4fma_ker);
-        });
-    }
-
-#pragma omp parallel num_threads(nthreads)
-    {
-        parallel_nd_in_omp(jcp.mb, jcp.nb_oc, jcp.oc_block,
-            [&](int img, int ofm1, int ofm2) {
-                const int ithread = mkldnn_get_thread_num();
-                float *dbias = jcp.with_bias
-                    ? &(diff_bias_prv(ithread,
-                        simd_w * (ofm1 * jcp.oc_block + ofm2)))
-                    : NULL;
-                diff_dst_transform_bwd_weights_ver(img, jcp,
-                    &(diff_dst(img, ofm1 * jcp.oc_block + ofm2, 0, 0, 0)),
-                    &(M(ofm1, 0, 0, 0, ofm2, 0, 0, 0)), dbias);
-        });
-    }
-
-    size_t input_starts[max_threads_number];
-    size_t input_ends[max_threads_number];
-    int th_counter = 0;
-#pragma omp parallel firstprivate(th_counter) num_threads(nthreads)
-    {
-        parallel_nd_in_omp(jcp.nb_ic, jcp.nb_oc, alpha, alpha, jcp.tile_block,
-            [&](int ifm1, int ofm1, int oj, int oi, int tile_block) {
-                int ithr = mkldnn_get_thread_num();
-                if (th_counter == 0) {
-                    input_starts[ithr] = (float *)&(Us(ithr, ifm1, ofm1,
-                            oj, oi, 0, 0, 0, 0)) - (float *)&(Us(ithr, 0, 0,
-                            0, 0, 0, 0, 0, 0));
-                    input_ends[ithr] = input_starts[ithr]
-                        + jcp.oc_block * jcp.ic_block
-                        * jcp.ic_simd_block * jcp.oc_simd_block;
-                }
-                else if (tile_block == 0) {
-                    input_ends[ithr] += jcp.oc_block * jcp.ic_block
-                        * jcp.ic_simd_block * jcp.oc_simd_block;
-                }
-
-                if (th_counter == 0 || tile_block == 0) {
-                    kernel_->gemm_loop_ker_first_iter(
-                        &(Us(ithr, ifm1, ofm1, oj, oi, 0, 0, 0, 0)),
-                        &(M(ofm1, oj, oi, tile_block, 0, 0, 0, 0)),
-                        &(V(ifm1, oj, oi, tile_block, 0, 0, 0, 0)));
-                } else {
-                    kernel_->gemm_loop_ker(
-                        &(Us(ithr, ifm1, ofm1, oj, oi, 0, 0, 0, 0)),
-                        &(M(ofm1, oj, oi, tile_block, 0, 0, 0, 0)),
-                        &(V(ifm1, oj, oi, tile_block, 0, 0, 0, 0)));
-                }
-                th_counter++;
-        });
-    }
-
-
-    // Reduce diff-weights
-    {
-        float *output = &(U(0, 0, 0, 0, 0, 0, 0, 0));
-        size_t nelems = jcp.ic * jcp.oc * alpha * alpha;
-        float *input_ptrs[max_threads_number];
-        for (int i = 0; i < nthreads; i++)
-            input_ptrs[i] = output + nelems * (i + 1);
-        subarray_sum(
-                nthreads, output, nelems, input_ptrs, input_starts, input_ends);
-    }
-
-    parallel_nd(jcp.nb_ic, jcp.nb_oc, jcp.oc_block, jcp.ic_block,
-        [&](int ifm1, int ofm1, int ofm2, int ifm2) {
-            diff_weights_transform_bwd_weights(jcp,
-                    &(diff_weights(ofm1 * jcp.oc_block + ofm2,
-                            ifm1 * jcp.ic_block + ifm2,
-                            0, 0, 0, 0)),
-                    &(U(ifm1, ofm1, 0, 0, ofm2, ifm2, 0, 0)));
-    });
-
-#pragma omp parallel
-    if (jcp.with_bias) {
-#pragma omp for
-        for (int ofm1 = 0; ofm1 < jcp.oc / simd_w; ofm1++) {
-            for (int ithr = 0; ithr < nthreads; ithr++) {
-                float* base_bias_ptr = &(diff_bias(ofm1, 0));
-                float* base_bias_prv_ptr = &(diff_bias_prv(
-                            ithr * jcp.oc + ofm1 * simd_w));
-                PRAGMA_OMP_SIMD()
-                for (int ofm2 = 0; ofm2 < simd_w; ofm2++) {
-                    base_bias_ptr[ofm2] += base_bias_prv_ptr[ofm2];
-                }
-            }
-        }
-    }
-
-    _maybe_execute_diff_bias_copy();
-}
-
-void jit_avx512_common_convolution_winograd_bwd_weights_t::
-_execute_backward_weights_SDGtWo()
-{
-    const auto &jcp = kernel_->jcp;
-    const int nthreads = scratchpad_->num_threads();
-
-    auto diff_src_transform_bwd_weights_ver_tile = jcp.ver == ver_4fma ?
-            diff_src_transform_bwd_weights_tile<true> :
-            diff_src_transform_bwd_weights_tile<false>;
-    auto diff_dst_transform_bwd_weights_ver = jcp.with_bias
-                                  ? diff_dst_transform_bwd_weights_tile<true>
-                                  : diff_dst_transform_bwd_weights_tile<false>;
-
-    array_offset_calculator<float, 5> diff_src((float *)this->input_memory(0),
-            jcp.mb, jcp.ic / simd_w, jcp.ih, jcp.iw, simd_w);
-    array_offset_calculator<float, 5> diff_dst((float *)this->input_memory(1),
-            jcp.mb, jcp.oc / simd_w, jcp.oh, jcp.ow, simd_w);
-    array_offset_calculator<float, 6> diff_weights((float *)this->memory(0),
-            jcp.oc / simd_w, jcp.ic / simd_w, jcp.kh, jcp.kw, simd_w, simd_w);
-    array_offset_calculator<float, 3> diff_bias(
-            conf_.want_padded_bias() ? padded_bias_ : (float *)this->memory(1),
-            jcp.nb_oc, jcp.oc_block, simd_w);
-
-    array_offset_calculator<float, 8> Us((float *)(scratchpad_->U_ptr()),
-            0, jcp.nb_ic, alpha, alpha,
-            jcp.oc_block, jcp.ic_block,
-            jcp.ic_simd_block, jcp.oc_simd_block);
-
-    array_offset_calculator<float, 7> M((float *)(scratchpad_->M_ptr()),
-            0, alpha, alpha,
-            jcp.oc_block,
-            jcp.nb_tile_block_ur, jcp.tile_block_ur * jcp.tile_4fma,
-            jcp.oc_simd_block);
-
-    array_offset_calculator<float, 8> V((float *)(scratchpad_->V_ptr()),
-            0, jcp.nb_ic, alpha, alpha,
-            jcp.ic_block,
-            jcp.nb_tile_block_ur, jcp.tile_block_ur,
-            jcp.ic_simd_block * jcp.tile_4fma);
-
-    array_offset_calculator<float, 2> diff_bias_prv(
-            (float *)(scratchpad_->bias_ptr()),
-            nthreads, jcp.oc / jcp.nb_oc);
-
-    for (int ofm1 = 0; ofm1 < jcp.nb_oc; ++ofm1) {
-        int th_counter = 0;
-
-#pragma omp parallel
-        {
-            if (jcp.with_bias) {
-                parallel_nd_in_omp(nthreads, jcp.oc / jcp.nb_oc,
-                    [&](int ithr, int ofm) {
-                        diff_bias_prv(ithr, ofm) = 0.0f;
-                });
-#pragma omp for nowait
-                for (int bofm = 0; bofm < jcp.oc_block; bofm++) {
-                    PRAGMA_OMP_SIMD()
-                    for (int v = 0; v < simd_w; v++)
-                        diff_bias(ofm1, bofm, v) = 0.0f;
-                }
-            }
-        }
-
-#pragma omp parallel firstprivate(th_counter) num_threads(nthreads)
-#pragma omp for nowait
-        for (int tile_block = 0; tile_block < jcp.tile_block; tile_block++) {
-            int ithr = mkldnn_get_thread_num();
-            for (int ifm1 = 0; ifm1 < jcp.nb_ic; ++ifm1) {
-                for (int ifm2 = 0; ifm2 < jcp.ic_block; ++ifm2) {
-                    diff_src_transform_bwd_weights_ver_tile(tile_block, jcp,
-                            &(diff_src(0, ifm1 * jcp.ic_block + ifm2, 0, 0, 0)),
-                            &(V(ithr, ifm1, 0, 0, ifm2, 0, 0, 0)),
-                            kernel_->transpose_4fma_ker);
-                }
-            }
-
-            for (int ofm2 = 0; ofm2 < jcp.oc_block; ofm2++) {
-                float *dbias = jcp.with_bias
-                    ? &(diff_bias_prv(ithr, simd_w * ofm2))
-                    : NULL;
-                diff_dst_transform_bwd_weights_ver(tile_block, jcp,
-                        &(diff_dst(0, ofm1 * jcp.oc_block + ofm2, 0, 0, 0)),
-                        &(M(ithr, 0, 0, ofm2, 0, 0, 0)),
-                        dbias);
-            }
-
-            for (int ifm1 = 0; ifm1 < jcp.nb_ic; ifm1++) {
-                for (int oj = 0; oj < alpha; oj++) {
-                    for (int oi = 0; oi < alpha; oi++) {
-                        if (th_counter == 0)
-                            kernel_->gemm_loop_ker_first_iter(
-                                    &(Us(ithr, ifm1, oj, oi, 0, 0, 0, 0)),
-                                    &(M(ithr, oj, oi, 0, 0, 0, 0)),
-                                    &(V(ithr, ifm1, oj, oi, 0, 0, 0, 0)));
-                        else
-                            kernel_->gemm_loop_ker(
-                                    &(Us(ithr, ifm1, oj, oi, 0, 0, 0, 0)),
-                                    &(M(ithr, oj, oi, 0, 0, 0, 0)),
-                                    &(V(ithr, ifm1, oj, oi, 0, 0, 0, 0)));
-                    }
-                }
-            }
-            th_counter++;
-        }
-        // Reduce diff-weights
-        {
-            float *output = (float *)(scratchpad_->U_ptr());
-            size_t nelems
-                    = jcp.ic * (jcp.oc / jcp.nb_oc) * alpha * alpha;
-            float *input_ptrs[max_threads_number];
-            for (int i = 0; i < nthreads; i++) {
-                input_ptrs[i] = output + nelems * i;
-            }
-            array_sum(nthreads, output, nelems, input_ptrs);
-        }
-
-        parallel_nd(jcp.nb_ic, jcp.oc_block, jcp.ic_block,
-            [&](int ifm1, int ofm2, int ifm2) {
-            diff_weights_transform_bwd_weights(jcp,
-                    &(diff_weights(ofm1 * jcp.oc_block + ofm2,
-                            ifm1 * jcp.ic_block + ifm2,
-                            0, 0, 0, 0)),
-                    &(Us(0, ifm1, 0, 0, ofm2, ifm2, 0, 0)));
-        });
-
-#pragma omp parallel
-        if (jcp.with_bias) {
-#pragma omp for
-            for (int ofm2 = 0; ofm2 < jcp.oc_block; ofm2++) {
-                for (int ithr = 0; ithr < nthreads; ithr++) {
-                    float* base_bias_ptr = &(diff_bias(ofm1, ofm2, 0));
-                    float* base_bias_prv_ptr = &(diff_bias_prv(
-                                ithr * jcp.oc_block * simd_w + ofm2 * simd_w));
-                    PRAGMA_OMP_SIMD()
-                    for (int ofm3 = 0; ofm3 < simd_w; ofm3++) {
-                        base_bias_ptr[ofm3] += base_bias_prv_ptr[ofm3];
-                    }
-                }
-            }
-        }
-    }
-
-    _maybe_execute_diff_bias_copy();
-}
-
-void jit_avx512_common_convolution_winograd_bwd_weights_t::
-_execute_backward_weights_SDGt_W()
-{
-    const auto &jcp = kernel_->jcp;
-    const int nthreads = scratchpad_->num_threads();
-
-    auto diff_src_transform_bwd_weights_ver_tile = jcp.ver == ver_4fma ?
-            diff_src_transform_bwd_weights_tile<true> :
-            diff_src_transform_bwd_weights_tile<false>;
-    auto diff_dst_transform_bwd_weights_ver = jcp.with_bias
-                                  ? diff_dst_transform_bwd_weights_tile<true>
-                                  : diff_dst_transform_bwd_weights_tile<false>;
-
-    array_offset_calculator<float, 5> diff_src((float *)this->input_memory(0),
-            jcp.mb, jcp.ic / simd_w, jcp.ih, jcp.iw, simd_w);
-    array_offset_calculator<float, 5> diff_dst((float *)this->input_memory(1),
-            jcp.mb, jcp.oc / simd_w, jcp.oh, jcp.ow, simd_w);
-    array_offset_calculator<float, 6> diff_weights((float *)this->memory(0),
-            jcp.oc / simd_w, jcp.ic / simd_w, jcp.kh, jcp.kw, simd_w, simd_w);
-    array_offset_calculator<float, 2> diff_bias(
-            conf_.want_padded_bias() ? padded_bias_ : (float *)this->memory(1),
-            jcp.oc / simd_w, simd_w);
-
-    array_offset_calculator<float, 8> U((float *)(scratchpad_->U_ptr()),
-            jcp.nb_oc, jcp.nb_ic,
-            alpha, alpha,
-            jcp.oc_block, jcp.ic_block,
-            jcp.ic_simd_block, jcp.oc_simd_block);
-
-    array_offset_calculator<float, 9> Us((float *)(scratchpad_->U_ptr()),
-            0, jcp.nb_oc, jcp.nb_ic,
-            alpha, alpha,
-            jcp.oc_block, jcp.ic_block,
-            jcp.ic_simd_block, jcp.oc_simd_block);
-
-    array_offset_calculator<float, 8> M((float *)(scratchpad_->M_ptr()),
-            0, jcp.nb_oc, alpha, alpha, jcp.oc_block,
-            jcp.nb_tile_block_ur, jcp.tile_block_ur * jcp.tile_4fma,
-            jcp.oc_simd_block);
-
-    array_offset_calculator<float, 8> V((float *)(scratchpad_->V_ptr()),
-            0, jcp.nb_ic, alpha, alpha, jcp.ic_block,
-            jcp.nb_tile_block_ur, jcp.tile_block_ur,
-            jcp.ic_simd_block * jcp.tile_4fma);
-
-    array_offset_calculator<float, 2> diff_bias_prv(
-            (float *)(scratchpad_->bias_ptr()),
-            nthreads, jcp.oc);
-
-#pragma omp parallel
-    {
-        if (jcp.with_bias) {
-            parallel_nd_in_omp(nthreads, jcp.oc,
-                [&](int ithr, int ofm) {
-                    diff_bias_prv(ithr, ofm) = 0.0f;
-            });
-#pragma omp for nowait
-            for (int bofm = 0; bofm < jcp.oc / simd_w; bofm++) {
-                PRAGMA_OMP_SIMD()
-                for (int v = 0; v < simd_w; v++)
-                    diff_bias(bofm, v) = 0.0f;
-            }
-        }
-    }
-
-    int th_counter = 0;
-#pragma omp parallel firstprivate(th_counter) num_threads(nthreads)
-#pragma omp for nowait
-    for (int tile_block = 0; tile_block < jcp.tile_block; tile_block++) {
-        int ithr = mkldnn_get_thread_num();
-
-        for (int ifm1 = 0; ifm1 < jcp.nb_ic; ++ifm1) {
-            for (int ifm2 = 0; ifm2 < jcp.ic_block; ++ifm2) {
-                diff_src_transform_bwd_weights_ver_tile(tile_block, jcp,
-                        &(diff_src(0, ifm1 * jcp.ic_block + ifm2,
-                                0, 0, 0)),
-                        &(V(ithr, ifm1, 0, 0, ifm2, 0, 0, 0)),
-                        kernel_->transpose_4fma_ker);
-            }
-        }
-
-        for (int ofm1 = 0; ofm1 < jcp.nb_oc; ofm1++) {
-            for (int ofm2 = 0; ofm2 < jcp.oc_block; ofm2++) {
-                float *dbias = jcp.with_bias
-                    ? &(diff_bias_prv(ithr,
-                                simd_w * (ofm1 * jcp.oc_block + ofm2)))
-                    : NULL;
-                diff_dst_transform_bwd_weights_ver(tile_block, jcp,
-                        &(diff_dst(0, ofm1 * jcp.oc_block + ofm2,
-                                0, 0, 0)),
-                        &(M(ithr, ofm1, 0, 0, ofm2, 0, 0, 0)),
-                        dbias);
-            }
-        }
-
-        for (int ofm1 = 0; ofm1 < jcp.nb_oc; ofm1++) {
-            for (int oj = 0; oj < alpha; oj++) {
-                for (int oi = 0; oi < alpha; oi++) {
-                    for (int ifm1 = 0; ifm1 < jcp.nb_ic; ifm1++) {
-                        if (th_counter == 0)
-                            kernel_->gemm_loop_ker_first_iter(
-                                    &(Us(ithr, ofm1, ifm1, oj, oi, 0, 0, 0, 0)),
-                                    &(M(ithr, ofm1, oj, oi, 0, 0, 0, 0)),
-                                    &(V(ithr, ifm1, oj, oi, 0, 0, 0, 0)));
-                        else
-                            kernel_->gemm_loop_ker(
-                                    &(Us(ithr, ofm1, ifm1, oj, oi, 0, 0, 0, 0)),
-                                    &(M(ithr, ofm1, oj, oi, 0, 0, 0, 0)),
-                                    &(V(ithr, ifm1, oj, oi, 0, 0, 0, 0)));
-                    }
-                }
-            }
-        }
-        th_counter++;
-    }
-
-    // Reduce diff-weights
-    {
-        float *output = (float *)(scratchpad_->U_ptr());
-        size_t nelems = jcp.ic * jcp.oc * alpha * alpha;
-        float *input_ptrs[max_threads_number];
-        for (int i = 0; i < nthreads; i++) {
-            input_ptrs[i] = output + nelems * i;
-        }
-        array_sum(nthreads, output, nelems, input_ptrs);
-    }
-
-    parallel_nd(jcp.nb_oc, jcp.nb_ic, jcp.oc_block, jcp.ic_block,
-        [&](int ofm1, int ifm1, int ofm2, int ifm2) {
-        diff_weights_transform_bwd_weights(jcp,
-                &(diff_weights(ofm1 * jcp.oc_block + ofm2,
-                        ifm1 * jcp.ic_block + ifm2, 0, 0, 0, 0)),
-                &(U(ofm1, ifm1, 0, 0, ofm2, ifm2, 0, 0)));
-    });
-
-#pragma omp parallel
-    if (jcp.with_bias) {
-#pragma omp for
-        for (int ofm1 = 0; ofm1 < jcp.oc / simd_w; ofm1++) {
-            for (int ithr = 0; ithr < nthreads; ithr++) {
-                float* base_bias_ptr = &(diff_bias(ofm1, 0));
-                float* base_bias_prv_ptr = &(diff_bias_prv(
-                            ithr * jcp.oc + ofm1 * simd_w));
-                PRAGMA_OMP_SIMD()
-                for (int ofm2 = 0; ofm2 < simd_w; ofm2++) {
-                    base_bias_ptr[ofm2] += base_bias_prv_ptr[ofm2];
-                }
-            }
-        }
-    }
-
-    _maybe_execute_diff_bias_copy();
-}
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution_winograd.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution_winograd.hpp
index fbdf9ebaf..6f6bb0f73 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution_winograd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_convolution_winograd.hpp
@@ -18,9 +18,9 @@
 #define CPU_JIT_AVX512_COMMON_CONVOLUTION_WINOGRAD_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
 #include "cpu_convolution_pd.hpp"
 #include "cpu_engine.hpp"
-#include "scratchpad.hpp"
 #include "mkldnn_thread.hpp"
 
 #include "jit_avx512_common_conv_winograd_kernel_f32.hpp"
@@ -29,152 +29,36 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-namespace winograd {
+namespace winograd_avx512_common {
+inline void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+        const jit_conv_winograd_conf_t &jcp) {
+    using namespace memory_tracking::names;
 
-struct winograd_scratchpad_t {
-    public:
-        winograd_scratchpad_t(const jit_conv_winograd_conf_t &jcp)
-        {
-            get_scratchpad_size_(jcp);
-            allocate_scratchpad_(jcp);
-        }
-
-        ~winograd_scratchpad_t() {
-            if (scratchpad_ != nullptr)
-                delete scratchpad_;
-        }
-
-        char *U_ptr() {
-            /* buffer for wei transform U*/
-            return scratchpad_->get() + U_offset_;
-        }
+    size_t U_sz = (size_t)alpha * alpha * jcp.ic * jcp.oc;
+    size_t V_sz = (size_t)alpha * alpha * jcp.mb * jcp.ic
+        * (jcp.itiles * jcp.jtiles + jcp.tile_4fma_padding);
+    size_t M_sz = (size_t)alpha * alpha * jcp.mb * jcp.oc
+        * (jcp.itiles * jcp.jtiles + jcp.tile_4fma_padding);
 
-        char *V_ptr() {
-            /* buffer for src transform V*/
-            return scratchpad_->get() + V_offset_;
-        }
-
-        char *M_ptr() {
-            /* buffer for dst transform M*/
-            return scratchpad_->get() + M_offset_;
-        }
+    scratchpad.book(key_wino_U, sizeof(float) * U_sz, PAGE_2M);
+    scratchpad.book(key_wino_V, sizeof(float) * V_sz, PAGE_2M);
+    scratchpad.book(key_wino_M, sizeof(float) * M_sz, PAGE_2M);
 
-        char *bias_ptr() {
-            /* buffer for bias update in bwdw*/
-            return scratchpad_->get() + bias_offset_;
-        }
+    if (jcp.sched_policy == WSCHED_WEI_S_D_G_W) {
+        const int nthr = mkldnn_get_max_threads();
 
-        char *src_transpose_ptr() {
-            /* buffer for src transpose in bwdw using qfma*/
-            return scratchpad_->get() + src_transpose_offset_;
-        }
+        size_t tr_src_sz = jcp.ver != ver_4fma ? 0 : (size_t)nthr
+            * alpha * alpha * jcp.tile_4fma * jcp.ic_simd_block;
+        scratchpad.book(key_conv_tr_src, sizeof(float) * tr_src_sz, PAGE_2M);
 
-        int num_threads(){
-            return nthreads_;
-        }
+        size_t br_sz = jcp.with_bias ? nthr * jcp.oc : 0;
+        scratchpad.book(key_conv_bia_reduction, sizeof(float) * br_sz, PAGE_2M);
 
-    private:
-        inline void get_scratchpad_size_(const jit_conv_winograd_conf_t &jcp) {
-            nthreads_ = mkldnn_get_max_threads();
-
-            U_sz_ = (size_t)alpha * alpha * jcp.ic * jcp.oc * sizeof(float);
-            V_sz_ = (size_t)alpha * alpha * jcp.mb * jcp.ic
-                           * (jcp.itiles * jcp.jtiles + jcp.tile_4fma_padding)
-                           * sizeof(float);
-            M_sz_ = (size_t)alpha * alpha * jcp.mb * jcp.oc
-                           * (jcp.itiles * jcp.jtiles + jcp.tile_4fma_padding)
-                           * sizeof(float);
-
-            switch (jcp.sched_policy) {
-            case WSCHED_DATA_W_SGD:
-                V_sz_ = (size_t)nthreads_ * alpha * alpha
-                    * jcp.nb_tile_block_ur * jcp.tile_block_ur
-                    * jcp.ic * sizeof(float);
-                M_sz_ = (size_t)nthreads_* alpha * alpha
-                    * jcp.nb_tile_block_ur * jcp.tile_block_ur
-                    * jcp.oc * sizeof(float);
-                break;
-            case WSCHED_WEI_SDGt_W:
-                U_sz_ = (size_t)nthreads_ * U_sz_;
-                V_sz_ = (size_t)nthreads_ * alpha * alpha
-                        * (jcp.nb_tile_block_ur * jcp.tile_block_ur
-                                  + jcp.tile_4fma_padding)
-                        * jcp.ic * sizeof(float);
-                M_sz_ = (size_t)nthreads_ * alpha * alpha
-                        * (jcp.nb_tile_block_ur * jcp.tile_block_ur
-                                  + jcp.tile_4fma_padding)
-                        * jcp.oc * sizeof(float);
-                bias_sz_ = nthreads_ * jcp.oc * sizeof(float);
-                break;
-            case WSCHED_WEI_SDGtWo:
-                U_sz_ = (size_t)nthreads_ * alpha * alpha
-                    * jcp.oc_block * jcp.oc_simd_block * jcp.ic * sizeof(float);
-                M_sz_ = (size_t)nthreads_ * alpha * alpha
-                        * (jcp.nb_tile_block_ur * jcp.tile_block_ur
-                                  + jcp.tile_4fma_padding)
-                        * jcp.oc_simd_block * jcp.oc_block * sizeof(float);
-                bias_sz_ = nthreads_ * jcp.oc * sizeof(float);
-                break;
-            case WSCHED_WEI_S_D_Giot_W:
-                U_sz_ = (size_t)(nthreads_ + 1) * alpha * alpha
-                    * jcp.ic * jcp.oc * sizeof(float);
-                V_sz_ = (size_t)alpha * alpha
-                    * (jcp.itiles * jcp.jtiles + jcp.tile_4fma_padding)
-                    * jcp.ic * jcp.mb * sizeof(float);
-                M_sz_ = (size_t)alpha * alpha
-                    * (jcp.itiles * jcp.jtiles + jcp.tile_4fma_padding)
-                    * jcp.oc * jcp.mb * sizeof(float);
-                bias_sz_ = nthreads_ * jcp.oc * sizeof(float);
-                src_transpose_sz_ = jcp.ver == ver_4fma
-                    ? ((size_t)nthreads_ * alpha * alpha
-                        * jcp.tile_4fma
-                        * jcp.ic_simd_block * sizeof(float))
-                    : 0;
-                break;
-            case WSCHED_WEI_S_D_G_W:
-                src_transpose_sz_ = jcp.ver == ver_4fma
-                                  ? ((size_t)nthreads_ * alpha * alpha
-                                     * jcp.tile_4fma
-                                     * jcp.ic_simd_block * sizeof(float))
-                                  : 0;
-                bias_sz_ = jcp.with_bias ? nthreads_ * jcp.oc * sizeof(float) : 0;
-                break;
-            default:
-                break;
-            }
-        }
-
-        inline void allocate_scratchpad_(const jit_conv_winograd_conf_t &jcp) {
-            const size_t page_size = PAGE_2M;
-            U_offset_ = 0;
-            V_offset_ = utils::rnd_up(U_sz_, page_size);
-            M_offset_ = V_offset_ + utils::rnd_up(V_sz_, page_size);
-            scratchpad_sz_ = M_offset_ + M_sz_;
-            if (src_transpose_sz_) {
-                src_transpose_offset_ = M_offset_
-                                      + utils::rnd_up(M_sz_, page_size);
-                scratchpad_sz_ = src_transpose_offset_ + src_transpose_sz_;
-            }
-            if (bias_sz_) {
-                bias_offset_ = src_transpose_sz_
-                             ? src_transpose_offset_
-                                 + utils::rnd_up(src_transpose_sz_, page_size)
-                             : M_offset_ + utils::rnd_up(M_sz_, page_size);
-                scratchpad_sz_ = bias_offset_ + bias_sz_;
-            }
-            scratchpad_ = create_scratchpad(scratchpad_sz_);
-        }
-
-        scratchpad_t *scratchpad_;
-        int nthreads_;
-        size_t scratchpad_sz_ = 0, U_sz_ = 0, V_sz_ = 0, M_sz_ = 0,
-               bias_sz_ = 0, src_transpose_sz_ = 0;
-        size_t U_offset_ = 0;
-        size_t V_offset_ = 0;
-        size_t M_offset_ = 0;
-        size_t bias_offset_ = 0;
-        size_t src_transpose_offset_ = 0; // only relevant for bwdw using qfma
-};
+        size_t padded_bias_sz =
+            jcp.with_bias && jcp.oc_without_padding != jcp.oc ? jcp.oc : 0;
+        scratchpad.book(key_conv_padded_bias, sizeof(float) * padded_bias_sz);
+    }
+}
 }
 
 template <bool is_fwd>
@@ -182,67 +66,72 @@ struct _jit_avx512_common_convolution_winograd_t {
 
     _jit_avx512_common_convolution_winograd_t(
             const jit_conv_winograd_conf_t &jcp, const primitive_attr_t *attr)
-        : kernel_(nullptr), scratchpad_(nullptr), attr_(attr) {
+        : kernel_(nullptr), attr_(attr) {
         kernel_ = new _jit_avx512_common_conv_winograd_data_kernel_f32(jcp);
-        scratchpad_ = new winograd::winograd_scratchpad_t(jcp);
         }
 
-    ~_jit_avx512_common_convolution_winograd_t() {
-        delete kernel_;
-        delete scratchpad_;
-    };
+    ~_jit_avx512_common_convolution_winograd_t() { delete kernel_; }
 
     protected:
         void _execute_data_W_S_G_D(const int MB, float *inp_ptr, float *out_ptr,
-                float *wei_ptr, float *bias_ptr = NULL);
-        void _execute_data_W_SGD(const int MB, float *inp_ptr, float *out_ptr,
-                float *wei_ptr, float *bias_ptr = NULL);
+                float *wei_ptr, float *bias_ptr,
+                const memory_tracking::grantor_t &scratchpad) const;
         _jit_avx512_common_conv_winograd_data_kernel_f32 *kernel_;
-        // Buffer required to store transforms in the frequency domain
-        winograd::winograd_scratchpad_t *scratchpad_;
         const primitive_attr_t *attr_;
 };
 
-template <bool with_relu>
-struct _jit_avx512_common_convolution_winograd_fwd_t
+struct jit_avx512_common_convolution_winograd_fwd_t
      : _jit_avx512_common_convolution_winograd_t<true>
      , public cpu_primitive_t
     {
-    struct pd_t : public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine, const typename pd_t::base_desc_t *adesc,
+    struct pd_t : public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
             , jcp_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_wino:", avx512_common, ""),
-                _jit_avx512_common_convolution_winograd_fwd_t<with_relu>);
+                jit_avx512_common_convolution_winograd_fwd_t);
 
         virtual status_t init() override
         {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true && this->set_default_params() == status::success
-                    && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                    && utils::one_of(this->desc()->prop_kind, forward_training,
                                forward_inference)
-                    && this->cdesc_().alg_kind == alg_kind::convolution_winograd
+                    && utils::one_of(this->desc()->alg_kind,
+                               alg_kind::convolution_auto,
+                               alg_kind::convolution_winograd)
                     && !this->has_zero_dim_memory()
                     && utils::everyone_is(data_type::f32,
-                               this->cdesc_().src_desc.data_type,
-                               this->cdesc_().weights_desc.data_type,
-                               this->cdesc_().dst_desc.data_type)
+                               this->desc()->src_desc.data_type,
+                               this->desc()->weights_desc.data_type,
+                               this->desc()->dst_desc.data_type)
                     && IMPLICATION(this->with_bias(), data_type::f32
-                                       == this->cdesc_().bias_desc.data_type)
+                                       == this->desc()->bias_desc.data_type)
                     && mkldnn_thr_syncable();
+
             if (!ok)
                 return status::unimplemented;
 
-            return jit_avx512_common_conv_winograd_fwd_kernel_f32::init_conf(
-                    jcp_, this->cdesc_(), *this->src_pd_.desc(),
-                    *this->weights_pd_.desc(), *this->dst_pd_.desc(),
-                    *this->attr(), with_relu, this->negative_slope());
+            status_t status =
+                jit_avx512_common_conv_winograd_fwd_kernel_f32::init_conf(
+                        jcp_, *this->desc(), *this->src_pd_.desc(),
+                        *this->weights_pd_.desc(), *this->dst_pd_.desc(),
+                        *this->attr());
+            if (status != status::success) return status;
+
+            auto scratchpad = this->scratchpad_registry().registrar();
+            winograd_avx512_common::init_scratchpad(scratchpad, jcp_);
+
+            if (status == status::success
+                    && this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_winograd));
+
+            return status;
         }
 
         jit_conv_winograd_conf_t jcp_;
@@ -264,45 +153,32 @@ struct _jit_avx512_common_convolution_winograd_fwd_t
         }
     };
 
-    _jit_avx512_common_convolution_winograd_fwd_t(const pd_t *pd,
+    jit_avx512_common_convolution_winograd_fwd_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : _jit_avx512_common_convolution_winograd_t<true>(pd->jcp_, pd->attr())
-        , cpu_primitive_t(&conf_, inputs, outputs)
-        , conf_(*pd) {}
+        : _jit_avx512_common_convolution_winograd_t<true>(apd->jcp_, apd->attr())
+        , cpu_primitive_t(apd, inputs, outputs, true) {}
 
-    ~_jit_avx512_common_convolution_winograd_fwd_t(){};
+    ~jit_avx512_common_convolution_winograd_fwd_t(){};
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
         float *src = (float *)this->input_memory(0);
         float *dst = (float *)this->memory();
         float *weights = (float *)this->input_memory(1);
         float *bias = (float *)this->input_memory(2);
 
-        switch ((conf_.jcp_).sched_policy) {
-        case WSCHED_DATA_W_S_G_D:
-            this->_execute_data_W_S_G_D(conf_.MB(), src, dst, weights, bias);
-            break;
-        case WSCHED_DATA_W_SGD:
-            this->_execute_data_W_SGD(conf_.MB(), src, dst, weights, bias);
-            break;
-        default:
-            break;
-        }
+        this->_execute_data_W_S_G_D(pd()->MB(), src, dst, weights, bias,
+                this->scratchpad());
+
         e->set_state(event_t::ready);
     }
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
-using jit_avx512_common_convolution_winograd_fwd_t
-        = _jit_avx512_common_convolution_winograd_fwd_t<false>;
-using jit_avx512_common_convolution_winograd_relu_t
-        = _jit_avx512_common_convolution_winograd_fwd_t<true>;
-
 struct jit_avx512_common_convolution_winograd_bwd_data_t
         : _jit_avx512_common_convolution_winograd_t<false>,
         public cpu_primitive_t {
@@ -323,20 +199,33 @@ struct jit_avx512_common_convolution_winograd_bwd_data_t
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true && this->set_default_params() == status::success
                     && utils::one_of(this->desc()->prop_kind, backward_data)
-                    && this->desc()->alg_kind == alg_kind::convolution_winograd
+                    && utils::one_of(this->desc()->alg_kind,
+                               alg_kind::convolution_auto,
+                               alg_kind::convolution_winograd)
                     && !this->has_zero_dim_memory()
                     && utils::everyone_is(data_type::f32,
                                this->desc()->diff_src_desc.data_type,
                                this->desc()->weights_desc.data_type,
                                this->desc()->diff_dst_desc.data_type)
                     && mkldnn_thr_syncable();
+
             if (!ok)
                 return status::unimplemented;
 
-            return jit_avx512_common_conv_winograd_bwd_data_kernel_f32::
-                    init_conf(jcp_, *this->desc(), *this->diff_src_pd_.desc(),
-                            *this->weights_pd_.desc(),
-                            *this->diff_dst_pd_.desc());
+            status_t status =
+                jit_avx512_common_conv_winograd_bwd_data_kernel_f32::init_conf(
+                        jcp_, *this->desc(), *this->diff_src_pd_.desc(),
+                        *this->weights_pd_.desc(), *this->diff_dst_pd_.desc());
+            if (status != status::success) return status;
+
+            auto scratchpad = this->scratchpad_registry().registrar();
+            winograd_avx512_common::init_scratchpad(scratchpad, jcp_);
+
+            if (status == status::success
+                    && this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_winograd));
+
+            return status;
         }
 
         jit_conv_winograd_conf_t jcp_;
@@ -357,44 +246,32 @@ struct jit_avx512_common_convolution_winograd_bwd_data_t
         }
     };
 
-    jit_avx512_common_convolution_winograd_bwd_data_t(const pd_t *pd,
+    jit_avx512_common_convolution_winograd_bwd_data_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : _jit_avx512_common_convolution_winograd_t<false>(pd->jcp_, pd->attr())
-        , cpu_primitive_t(&conf_, inputs, outputs)
-        , conf_(*pd) {}
+        : _jit_avx512_common_convolution_winograd_t<false>(apd->jcp_, apd->attr())
+        , cpu_primitive_t(apd, inputs, outputs, true) {}
 
     ~jit_avx512_common_convolution_winograd_bwd_data_t(){};
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
+        assert(pd()->desc()->prop_kind == prop_kind::backward_data
+                && "invalid prop_kind");
+
         float *diff_dst = (float *)this->input_memory(0);
         float *diff_src = (float *)this->memory();
         float *weights = (float *)this->input_memory(1);
 
-        if (conf_.desc()->prop_kind == prop_kind::backward_data) {
-            switch ((conf_.jcp_).sched_policy) {
-            case WSCHED_DATA_W_S_G_D:
-                this->_execute_data_W_S_G_D(conf_.MB(), diff_dst, diff_src, weights, NULL);
-                break;
-
-            case WSCHED_DATA_W_SGD:
-                this->_execute_data_W_SGD(conf_.MB(), diff_dst, diff_src, weights, NULL);
-                break;
-
-            default:
-                break;
-            }
-        } else {
-            assert(!"invalid prop_kind");
-        }
+        this->_execute_data_W_S_G_D(pd()->MB(), diff_dst, diff_src, weights, nullptr,
+                this->scratchpad());
 
         e->set_state(event_t::ready);
     }
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 struct jit_avx512_common_convolution_winograd_bwd_weights_t
@@ -417,7 +294,9 @@ struct jit_avx512_common_convolution_winograd_bwd_weights_t
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true && this->set_default_params() == status::success
                     && utils::one_of(this->desc()->prop_kind, backward_weights)
-                    && this->desc()->alg_kind == alg_kind::convolution_winograd
+                    && utils::one_of(this->desc()->alg_kind,
+                               alg_kind::convolution_auto,
+                               alg_kind::convolution_winograd)
                     && !this->has_zero_dim_memory()
                     && utils::everyone_is(data_type::f32,
                                this->desc()->src_desc.data_type,
@@ -427,10 +306,21 @@ struct jit_avx512_common_convolution_winograd_bwd_weights_t
             if (!ok)
                 return status::unimplemented;
 
-            return jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::
-                    init_conf(jcp_, *this->desc(), *this->src_pd_.desc(),
-                            *this->diff_dst_pd_.desc(),
-                            *this->diff_weights_pd_.desc());
+            status_t status =
+                jit_avx512_common_conv_winograd_bwd_weights_kernel_f32::
+                init_conf(jcp_, *this->desc(), *this->src_pd_.desc(),
+                        *this->diff_dst_pd_.desc(),
+                        *this->diff_weights_pd_.desc());
+            if (status != status::success) return status;
+
+            auto scratchpad = this->scratchpad_registry().registrar();
+            winograd_avx512_common::init_scratchpad(scratchpad, jcp_);
+
+            if (status == status::success
+                    && this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_winograd));
+
+            return status;
         }
 
         jit_conv_winograd_conf_t jcp_;
@@ -453,72 +343,35 @@ struct jit_avx512_common_convolution_winograd_bwd_weights_t
         }
     };
 
-    jit_avx512_common_convolution_winograd_bwd_weights_t(const pd_t *pd,
+    jit_avx512_common_convolution_winograd_bwd_weights_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs)
-        , conf_(*pd)
-        , kernel_(nullptr)
-        , scratchpad_(nullptr)
-        , padded_bias_(nullptr)
+        : cpu_primitive_t(apd, inputs, outputs, true), kernel_(nullptr)
     {
-        auto jcp = conf_.jcp_;
         kernel_ = new jit_avx512_common_conv_winograd_bwd_weights_kernel_f32(
-                jcp);
-        scratchpad_ = new winograd::winograd_scratchpad_t(jcp);
-        if (conf_.want_padded_bias())
-            padded_bias_ = (float *)malloc(sizeof(float) * jcp.oc, 64);
+                pd()->jcp_);
     }
 
     ~jit_avx512_common_convolution_winograd_bwd_weights_t()
-    {
-        delete kernel_;
-        delete scratchpad_;
-        free(padded_bias_);
-    };
+    { delete kernel_; }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
-        if (conf_.desc()->prop_kind == prop_kind::backward_weights) {
-            const auto &jcp = kernel_->jcp;
-            switch (jcp.sched_policy) {
-            case WSCHED_WEI_S_D_G_W:
-                _execute_backward_weights_S_D_G_W();
-                break;
-            case WSCHED_WEI_S_D_Giot_W:
-                _execute_backward_weights_S_D_Giot_W();
-                break;
-            case WSCHED_WEI_SDGtWo:
-                _execute_backward_weights_SDGtWo();
-                break;
-            case WSCHED_WEI_SDGt_W:
-                _execute_backward_weights_SDGt_W();
-                break;
-            default:
-                assert(!"Unknown Winograd schedule policy!");
-                break;
-            }
-        }
-        else
-            assert(!"invalid prop_kind");
+        assert(pd()->desc()->prop_kind == prop_kind::backward_weights
+                && "invalid prop_kind");
+        _execute_backward_weights_S_D_G_W(scratchpad());
         e->set_state(event_t::ready);
     }
 
 private:
-    void _execute_backward_weights_S_D_G_W();
-    void _execute_backward_weights_S_D_Giot_W();
-    void _execute_backward_weights_SDGtWo();
-    void _execute_backward_weights_SDGt_W();
-    void _maybe_execute_diff_bias_copy();
+    void _execute_backward_weights_S_D_G_W(
+            const memory_tracking::grantor_t &scratchpad) const;
+    void _maybe_execute_diff_bias_copy(
+            const memory_tracking::grantor_t &scratchpad) const;
 
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_avx512_common_conv_winograd_bwd_weights_kernel_f32 *kernel_;
-
-    // Buffer required to store transforms in the frequency domain
-    winograd::winograd_scratchpad_t *scratchpad_;
-
-    float *padded_bias_;
 };
 
 void trans_W_4x4_3x3(float Fw_[6][6][16][16], float F[3][3][16][16]);
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_lrn.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_lrn.cpp
index 3d1701f0b..1953182a4 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_lrn.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_lrn.cpp
@@ -349,20 +349,20 @@ status_t jit_avx512_common_lrn_fwd_t::pd_t::init() {
     return args_ok_across ? success : unimplemented;
 }
 
-jit_avx512_common_lrn_fwd_t::jit_avx512_common_lrn_fwd_t(const pd_t *pd,
+jit_avx512_common_lrn_fwd_t::jit_avx512_common_lrn_fwd_t(const pd_t *apd,
         const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
+    : cpu_primitive_t(apd, inputs, outputs)
     , use_h_parallelism(0), ker_(nullptr), ker_first_(nullptr)
     , ker_last_(nullptr) {
     using namespace alg_kind;
-    const int C = conf_.C();
-    const int H = conf_.H();
-    const int W = conf_.W();
-    const int ls = conf_.desc()->local_size;
-    const float alpha = conf_.desc()->lrn_alpha / ls;
-    const float k = conf_.desc()->lrn_k;
+    const int C = pd()->C();
+    const int H = pd()->H();
+    const int W = pd()->W();
+    const int ls = pd()->desc()->local_size;
+    const float alpha = pd()->desc()->lrn_alpha / ls;
+    const float k = pd()->desc()->lrn_k;
 
-    auto pk = conf_.desc()->prop_kind;
+    auto pk = pd()->desc()->prop_kind;
 
     use_h_parallelism = H > 28 ? 1 : 0;
 
@@ -382,15 +382,15 @@ jit_avx512_common_lrn_fwd_t::jit_avx512_common_lrn_fwd_t(const pd_t *pd,
 jit_avx512_common_lrn_fwd_t::~jit_avx512_common_lrn_fwd_t()
 { delete ker_; delete ker_first_; delete ker_last_; }
 
-void jit_avx512_common_lrn_fwd_t::execute_forward() {
+void jit_avx512_common_lrn_fwd_t::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
     auto ws = reinterpret_cast<data_t*>(this->memory(1));
 
-    const int N = conf_.MB();
-    const int C = conf_.C();
-    const int H = conf_.H();
-    const int W = conf_.W();
+    const int N = pd()->MB();
+    const int C = pd()->C();
+    const int H = pd()->H();
+    const int W = pd()->W();
 
     parallel(0, [&](const int ithr, const int nthr) {
         size_t start{0}, end{0};
@@ -761,17 +761,17 @@ status_t jit_avx512_common_lrn_bwd_t::pd_t::init() {
     return args_ok_across ? success : unimplemented;
 }
 
-jit_avx512_common_lrn_bwd_t::jit_avx512_common_lrn_bwd_t(const pd_t *pd,
+jit_avx512_common_lrn_bwd_t::jit_avx512_common_lrn_bwd_t(const pd_t *apd,
         const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
+    : cpu_primitive_t(apd, inputs, outputs)
     , use_h_parallelism(0),  ker_(nullptr), ker_first_(nullptr)
     , ker_last_(nullptr) {
-    const int C = conf_.C();
-    const int H = conf_.H();
-    const int W = conf_.W();
-    const int ls = conf_.desc()->local_size;
-    const float alpha = conf_.desc()->lrn_alpha / ls;
-    const float beta = conf_.desc()->lrn_beta;
+    const int C = pd()->C();
+    const int H = pd()->H();
+    const int W = pd()->W();
+    const int ls = pd()->desc()->local_size;
+    const float alpha = pd()->desc()->lrn_alpha / ls;
+    const float beta = pd()->desc()->lrn_beta;
 
     use_h_parallelism = H > 28 ? 1 : 0;
 
@@ -791,16 +791,16 @@ jit_avx512_common_lrn_bwd_t::jit_avx512_common_lrn_bwd_t(const pd_t *pd,
 jit_avx512_common_lrn_bwd_t::~jit_avx512_common_lrn_bwd_t()
 { delete ker_; delete ker_first_; delete ker_last_; }
 
-void jit_avx512_common_lrn_bwd_t::execute_backward() {
+void jit_avx512_common_lrn_bwd_t::execute_backward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto ws = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto diff_src = reinterpret_cast<data_t *>(this->memory(0));
 
-    const int N = conf_.MB();
-    const int C = conf_.C();
-    const int H = conf_.H();
-    const int W = conf_.W();
+    const int N = pd()->MB();
+    const int C = pd()->C();
+    const int H = pd()->H();
+    const int W = pd()->W();
 
     parallel(0, [&](const int ithr, const int nthr) {
         size_t start{0}, end{0};
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_lrn.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_lrn.hpp
index 10b5bb8f3..8ec624ac9 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_lrn.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_common_lrn.hpp
@@ -39,20 +39,20 @@ struct jit_avx512_common_lrn_fwd_t: public cpu_primitive_t {
         virtual status_t init() override;
     };
 
-    jit_avx512_common_lrn_fwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_avx512_common_lrn_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs);
     ~jit_avx512_common_lrn_fwd_t();
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
     int use_h_parallelism;
     struct jit_avx512_common_lrn_kernel_f32;
@@ -73,20 +73,20 @@ struct jit_avx512_common_lrn_bwd_t: public cpu_primitive_t {
         virtual status_t init() override;
     };
 
-    jit_avx512_common_lrn_bwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_avx512_common_lrn_bwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs);
     ~jit_avx512_common_lrn_bwd_t();
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward();
-    pd_t conf_;
+    void execute_backward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
     int use_h_parallelism;
     struct jit_avx512_common_lrn_kernel_f32;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_2x3.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_2x3.cpp
index 1239186b4..82a18b6ac 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_2x3.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_2x3.cpp
@@ -32,6 +32,7 @@ namespace impl {
 namespace cpu {
 
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 using namespace Xbyak;
 
@@ -247,7 +248,6 @@ bool jit_avx512_core_fp32_wino_conv_2x3_dst_trans_t::maybe_relu(int position) {
     if (position == 0) {
         /* relu before sum */
         return false
-            || jcp.with_relu
             || p.contain(eltwise, 0);
     } else if (position == 1) {
         /* relu after sum */
@@ -411,7 +411,6 @@ struct jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t: public jit_generator {
             cpu_memory_t::pd_t &src_pd, cpu_memory_t::pd_t &weights_pd,
             cpu_memory_t::pd_t &dst_pd, cpu_memory_t::pd_t &bias_pd,
             const primitive_attr_t &attr,
-            bool with_relu, float relu_negative_slope,
             memory_desc_t& expect_wei_md);
 
     Zmm vreg_out(int n, int m) {
@@ -448,26 +447,14 @@ bool jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t::post_ops_ok(
     using namespace primitive_kind;
     const auto &p = attr.post_ops_;
 
-    auto is_relu = [&](int idx) {
-        return p.entry_[idx].kind == eltwise
-            && p.entry_[idx].eltwise.scale == 1.
-            && p.entry_[idx].eltwise.alg == alg_kind::eltwise_relu
-            && p.entry_[idx].eltwise.alpha == 0.;
-    };
+    auto is_relu = [&](int idx) { return p.entry_[idx].is_relu(); };
 
-   switch (p.len_) {
+    switch (p.len_) {
     case 0: return true;
-    case 1: return true
-                && IMPLICATION(jcp.with_relu, p.contain(sum, 0))
-                && IMPLICATION(!jcp.with_relu, is_relu(0) || p.contain(sum, 0));
-    case 2: return true
-                && IMPLICATION(jcp.with_relu, p.contain(sum, 0) && is_relu(1))
-                && IMPLICATION(!jcp.with_relu, false
-                        || (p.contain(sum, 0) && is_relu(1))
-                        || (p.contain(sum, 1) && is_relu(0)));
-    case 3: return true
-                && jcp.with_relu == false
-                && (is_relu(0) && p.contain(sum, 1) && is_relu(2));
+    case 1: return is_relu(0) || p.contain(sum, 0);
+    case 2: return (p.contain(sum, 0) && is_relu(1)) ||
+                       (p.contain(sum, 1) && is_relu(0));
+    case 3: return is_relu(0) && p.contain(sum, 1) && is_relu(2);
     default: return false;
     }
 
@@ -577,12 +564,17 @@ void jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t::generate() {
     postamble();
 }
 
+namespace {
+bool is_winograd_faster_than_direct(const jit_conv_conf_2x3_wino_t &jcp) {
+    return jcp.mb >= 4;
+}
+}
+
 status_t jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t ::init_conf(
         jit_conv_conf_2x3_wino_t &jcp, const convolution_desc_t &cd,
         cpu_memory_t::pd_t &src_pd, cpu_memory_t::pd_t &wei_pd,
         cpu_memory_t::pd_t &dst_pd, cpu_memory_t::pd_t &bias_pd,
-        const primitive_attr_t &attr, bool with_relu, float relu_negative_slope,
-        memory_desc_t &expect_wei_md) {
+        const primitive_attr_t &attr, memory_desc_t &expect_wei_md) {
     const memory_desc_wrapper src_d(&src_pd);
     const memory_desc_wrapper wei_d(&wei_pd);
     const memory_desc_wrapper dst_d(&dst_pd);
@@ -590,6 +582,8 @@ status_t jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t ::init_conf(
 
     const bool with_groups = wei_d.ndims() == src_d.ndims() + 1;
 
+    jcp.nthr = mkldnn_get_max_threads();
+
     jcp.ngroups = with_groups ? wei_d.dims()[0] : 1;
     jcp.mb = src_d.dims()[0];
     jcp.oc = dst_d.dims()[1] / jcp.ngroups;
@@ -616,10 +610,7 @@ status_t jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t ::init_conf(
     int simdw = 16;
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_relu = with_relu;
-    jcp.relu_negative_slope = relu_negative_slope;
-    if (!IMPLICATION(with_relu, relu_negative_slope == 0.))
-        return status::unimplemented;
+
     if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
 
@@ -639,6 +630,10 @@ status_t jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t ::init_conf(
     if (!(mayiuse(avx512_core)))
         return status::unimplemented;
 
+    if (!IMPLICATION(cd.alg_kind == alg_kind::convolution_auto,
+               is_winograd_faster_than_direct(jcp)))
+        return status::unimplemented;
+
     if (src_d.data_type() != data_type::f32)
         return status::unimplemented;
     if (wei_d.data_type() != data_type::f32)
@@ -673,7 +668,6 @@ status_t jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t ::init_conf(
     auto wei_sz = (float)aa * ic * oc;
     auto inp_sz = (float)mb * ih * iw * ic;
     auto sp_sz = (float)mb * ih * iw;
-    const int nthr = mkldnn_get_max_threads();
 
     /* Heuristics here. Numbers '28','196' is an observation from data. */
     if (wei_sz / inp_sz > 5)
@@ -681,10 +675,10 @@ status_t jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t ::init_conf(
     else
         jcp.small_mb = false;
 
-    if (mb > nstl::min(nthr, 28)
+    if (mb > nstl::min(jcp.nthr, 28)
         || (!jcp.small_mb
             && (wei_sz >= 0.9f * L2_cap
-                || inp_sz > L2_cap * nthr + L3_capacity))
+                || inp_sz > L2_cap * jcp.nthr + L3_capacity))
         || (jcp.small_mb && sp_sz > 196))
         return unimplemented;
 
@@ -749,7 +743,7 @@ status_t jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t ::init_conf(
 
             /* outer parallelization */
             int nblocks = mb * div_up(ih, iy) * div_up(iw, ix);
-            thr_eff = (float)nblocks / rnd_up(nblocks, nthr);
+            thr_eff = (float)nblocks / rnd_up(nblocks, jcp.nthr);
 
             mem_eff = 1.f;
             req_mem = (((float)ix + 2) * (iy + 2) + aa * M) * Z + aa * Y;
@@ -765,14 +759,15 @@ status_t jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t ::init_conf(
             /* inner parallelization */
             int bsz = iy * ix / a;
             int gemmw = aa * (nb_oc / n2_b);
-            int bsz_r = rnd_up(bsz, nthr);
-            int gemmw_r = rnd_up(gemmw, nthr);
+            int bsz_r = rnd_up(bsz, jcp.nthr);
+            int gemmw_r = rnd_up(gemmw, jcp.nthr);
             thr_eff = ((float)Z * bsz / bsz_r + Y * gemmw / gemmw_r) / (Z + Y);
 
             req_mem = (float)ix * iy * (ic + simdw * n2_b) + simdw * n2_b * ic;
             mem_eff = nstl::min(1.f, L2_cap / req_mem);
-            int M_per_thr = nstl::max(2, div_up(aa, nthr));
-            int oc_per_thr = nstl::min(oc, div_up(aa * (nb_oc / n2_b), nthr));
+            int M_per_thr = nstl::max(2, div_up(aa, jcp.nthr));
+            int oc_per_thr =
+                nstl::min(oc, div_up(aa * (nb_oc / n2_b), jcp.nthr));
             req_mem = (float)aa * oc_per_thr * ic + M_per_thr * M * Z;
             if (req_mem > L2_cap)
                 mem_eff = 0.1f;
@@ -839,63 +834,34 @@ status_t jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t ::init_conf(
 }
 ////////////////////////////////////////////////////////////////////////////////
 
-template <bool with_relu>
-status_t _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>
+status_t jit_avx512_core_fp32_wino_conv_2x3_fwd_t
     ::pd_t::jit_conf(memory_desc_t& expect_wei_md) {
     return jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t::init_conf(
-            jcp_, this->cdesc_(), this->src_pd_, this->weights_pd_,
-            this->dst_pd_,this->bias_pd_, *this->attr(),
-            with_relu, this->negative_slope(), expect_wei_md);
+            jcp_, *this->desc(), this->src_pd_, this->weights_pd_,
+            this->dst_pd_,this->bias_pd_, *this->attr(), expect_wei_md);
 }
 
-template <bool with_relu>
-_jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>::
-        _jit_avx512_core_fp32_wino_conv_2x3_fwd_t(const pd_t *pd,
+jit_avx512_core_fp32_wino_conv_2x3_fwd_t::
+        jit_avx512_core_fp32_wino_conv_2x3_fwd_t(const pd_t *apd,
                 const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs)
-    , conf_(*pd), padded_bias_(nullptr) {
-    const int nthreads = mkldnn_get_max_threads();
+    : cpu_primitive_t(apd, inputs, outputs)
+{
     kernel_ = new jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t(
-            conf_.jcp_, *conf_.attr());
+            pd()->jcp_, *pd()->attr());
     src_trans_ = new jit_avx512_core_fp32_wino_conv_2x3_src_trans_t(
-            conf_.jcp_, *conf_.attr());
+            pd()->jcp_, *pd()->attr());
     dst_trans_ = new jit_avx512_core_fp32_wino_conv_2x3_dst_trans_t(
-            conf_.jcp_, *conf_.attr());
-
-    int wino_size_offset
-            = (conf_.jcp_.yb / 2) * (conf_.jcp_.xb / 2) + (conf_.jcp_.xb);
-
-    size_wino_src = (conf_.jcp_.ic * 16) * (wino_size_offset);
-    size_wino_dst = (conf_.jcp_.oc * 16) * (wino_size_offset);
-
-    wino_src_ = (float *)malloc(sizeof(float) * nthreads * size_wino_src, 4096);
-    wino_dst_ = (float *)malloc(sizeof(float) * nthreads * size_wino_dst, 4096);
-    if (conf_.want_padded_bias()) {
-        const auto &j = conf_.jcp_;
-        assert(j.ngroups == 1);
-        padded_bias_ = (float *)malloc(sizeof(float) * j.oc, 64);
-        for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-            padded_bias_[oc] = 0;
-    }
-
-
+            pd()->jcp_, *pd()->attr());
 }
 
-template <bool with_relu>
-_jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>
-    ::~_jit_avx512_core_fp32_wino_conv_2x3_fwd_t() {
+jit_avx512_core_fp32_wino_conv_2x3_fwd_t
+    ::~jit_avx512_core_fp32_wino_conv_2x3_fwd_t() {
     delete kernel_;
     delete src_trans_;
     delete dst_trans_;
-
-    free(wino_src_);
-    free(wino_dst_);
-    free(padded_bias_);
 }
 
-template <bool with_relu>
-void _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<
-        with_relu>::execute_forward() {
+void jit_avx512_core_fp32_wino_conv_2x3_fwd_t::execute_forward() const {
     const auto &jcp = kernel_->jcp;
 
     if (jcp.small_mb)
@@ -904,33 +870,41 @@ void _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<
         execute_forward_mbN();
 }
 
-template <bool with_relu>
-void _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>
-::execute_forward_mbN() {
+void jit_avx512_core_fp32_wino_conv_2x3_fwd_t::execute_forward_mbN() const {
     auto src = reinterpret_cast<const float *>(input_memory(0));
     auto wei = reinterpret_cast<const float *>(input_memory(1));
     auto bia = reinterpret_cast<const float *>(input_memory(2));
     auto dst = reinterpret_cast<float *>(memory(0));
 
-    const auto &jcp = kernel_->jcp;
-    const auto &oscales = conf_.attr()->output_scales_;
-
-    wino_wei_ = wei;
+    auto scratchpad = this->scratchpad();
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bia[oc];
-        bia = padded_bias_;
+    const auto &jcp = kernel_->jcp;
+    const auto &oscales = pd()->attr()->output_scales_;
+
+    const size_t wino_size_offset =
+        (size_t)(pd()->jcp_.yb / 2) * (pd()->jcp_.xb / 2) + (pd()->jcp_.xb);
+    const size_t size_wino_src = wino_size_offset * pd()->jcp_.ic * 16;
+    const size_t size_wino_dst = wino_size_offset * pd()->jcp_.oc * 16;
+
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad.get<float>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, bia, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bia = padded_bias;
     }
 
+    auto ptr_V = scratchpad.get<float>(key_wino_V);
+    auto ptr_M = scratchpad.get<float>(key_wino_M);
+
     parallel_nd(jcp.mb, div_up(jcp.oh,jcp.yb), div_up(jcp.ow, jcp.xb),
         [&](int mb, int tile_y_b, int tile_x_b) {
         int tile_y = tile_y_b * jcp.yb;
         int tile_x = tile_x_b * jcp.xb;
 
         int ithr = mkldnn_get_thread_num();
-        auto wino_src = wino_src_ + size_wino_src * ithr;
-        auto wino_dst = wino_dst_ + size_wino_dst * ithr;
+        auto wino_src = ptr_V + size_wino_src * ithr;
+        auto wino_dst = ptr_M + size_wino_dst * ithr;
 
         auto src_trans_p =
             jit_avx512_core_fp32_wino_conv_2x3_src_trans_t
@@ -985,7 +959,7 @@ void _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>
             int offset = (tile_ij + ithr) % 16;
             gemm_p.src = wino_src + jcp.inp_stride * offset;
             gemm_p.dst = wino_dst + jcp.out_stride * offset;
-            gemm_p.wei = wino_wei_ + jcp.wei_stride * offset;
+            gemm_p.wei = wei + jcp.wei_stride * offset;
 
             kernel_->ker_(&gemm_p);
         }
@@ -1027,25 +1001,29 @@ void _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>
     });
 }
 
-template <bool with_relu>
-void _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>
-    ::execute_forward_small_mb() {
+void jit_avx512_core_fp32_wino_conv_2x3_fwd_t::execute_forward_small_mb() const
+{
     auto src = reinterpret_cast<const float *>(input_memory(0));
     auto wei = reinterpret_cast<const float *>(input_memory(1));
     auto bia = reinterpret_cast<const float *>(input_memory(2));
     auto dst = reinterpret_cast<float *>(memory(0));
 
-    const auto &jcp = kernel_->jcp;
-    const auto &oscales = conf_.attr()->output_scales_;
-
-    wino_wei_ = wei;
+    auto scratchpad = this->scratchpad();
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bia[oc];
-        bia = padded_bias_;
+    const auto &jcp = kernel_->jcp;
+    const auto &oscales = pd()->attr()->output_scales_;
+
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad.get<float>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, bia, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bia = padded_bias;
     }
 
+    auto ptr_V = scratchpad.get<float>(key_wino_V);
+    auto ptr_M = scratchpad.get<float>(key_wino_M);
+
     for (int mb = 0; mb < jcp.mb; mb++) {
     for (int tile_y = 0; tile_y < jcp.oh; tile_y += jcp.yb) {
     for (int tile_x = 0; tile_x < jcp.ow; tile_x += jcp.xb) {
@@ -1080,7 +1058,7 @@ void _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>
             auto local_s = src
                     + mb * jcp.nb_ic * jcp.ih * jcp.iw * jcp.ic_block
                     + y * jcp.iw * jcp.ic_block + x * jcp.ic_block;
-            auto local_w = wino_src_ + m * jcp.ic;
+            auto local_w = ptr_V + m * jcp.ic;
 
             src_trans_p.src = local_s;
             src_trans_p.wino_src = local_w;
@@ -1095,10 +1073,10 @@ void _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>
             auto gemm_p = jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t ::
                     call_params_t();
 
-            gemm_p.src = wino_src_ + jcp.inp_stride * tile_ij;
-            gemm_p.dst = wino_dst_ + jcp.out_stride * tile_ij
+            gemm_p.src = ptr_V + jcp.inp_stride * tile_ij;
+            gemm_p.dst = ptr_M + jcp.out_stride * tile_ij
                     + nnb * jcp.n2_block * jcp.n_block;
-            gemm_p.wei = wino_wei_ + jcp.wei_stride * tile_ij
+            gemm_p.wei = wei + jcp.wei_stride * tile_ij
                     + nnb * jcp.n2_block * jcp.n_block * jcp.K;
 
             kernel_->ker_(&gemm_p);
@@ -1128,7 +1106,7 @@ void _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>
             auto local_d = dst
                     + mb * jcp.nb_oc * jcp.oh * jcp.ow * jcp.oc_block
                     + y * jcp.ow * jcp.oc_block + x * jcp.oc_block;
-            auto local_w = wino_dst_ + m * jcp.oc;
+            auto local_w = ptr_M + m * jcp.oc;
 
             auto scales = oscales.scales_;
             dst_trans_p.dst = local_d;
@@ -1144,9 +1122,6 @@ void _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>
     }}}
 }
 
-template struct _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<true>;
-template struct _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<false>;
-
 } // namespace cpu
 } // namespace impl
 } // namespace mkldnn
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_2x3.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_2x3.hpp
index cd4d5daf3..ec7d05b71 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_2x3.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_2x3.hpp
@@ -37,46 +37,52 @@ struct jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t;
 struct jit_avx512_core_fp32_wino_conv_2x3_src_trans_t;
 struct jit_avx512_core_fp32_wino_conv_2x3_dst_trans_t;
 
-template <bool with_relu>
-struct _jit_avx512_core_fp32_wino_conv_2x3_fwd_t : public cpu_primitive_t {
-    struct pd_t : public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine, const typename pd_t::base_desc_t *adesc,
+struct jit_avx512_core_fp32_wino_conv_2x3_fwd_t : public cpu_primitive_t {
+    struct pd_t : public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            :  _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-            hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
             , jcp_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_fp32_wino_2x3:", avx512_core, ""),
-                _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<with_relu>);
+                jit_avx512_core_fp32_wino_conv_2x3_fwd_t);
 
         virtual status_t init() override {
             using namespace prop_kind;
             using namespace memory_format;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true && this->set_default_params() == status::success
-                    && utils::one_of(this->cdesc_().prop_kind, forward_inference)
-                    && this->cdesc_().alg_kind == alg_kind::convolution_winograd
-                    && this->cdesc_().src_desc.data_type == data_type::f32
-                    && this->cdesc_().dst_desc.data_type == data_type::f32
-                    && this->cdesc_().weights_desc.data_type == data_type::f32
+                    && utils::one_of(this->desc()->prop_kind, forward_inference)
+                    && utils::one_of(this->desc()->alg_kind,
+                               alg_kind::convolution_auto,
+                               alg_kind::convolution_winograd)
+                    && this->desc()->src_desc.data_type == data_type::f32
+                    && this->desc()->dst_desc.data_type == data_type::f32
+                    && this->desc()->weights_desc.data_type == data_type::f32
                     && IMPLICATION(this->with_bias(),
-                               utils::one_of(this->cdesc_().bias_desc.data_type,
+                               utils::one_of(this->desc()->bias_desc.data_type,
                                        data_type::f32));
             if (!ok)
                 return status::unimplemented;
 
             memory_desc_t expect_wei_md = *(this->weights_pd_.desc());
             status_t jit_conf_result = jit_conf(expect_wei_md);
-            if (jit_conf_result == success) {
-                cpu_memory_t::pd_t new_weights_pd(this->engine_, &expect_wei_md);
-                if (this->weights_pd_.desc()->format == any)
-                    this->weights_pd_ = new_weights_pd;
-                if (!this->weights_pd_.is_equal(&new_weights_pd))
-                    return status::unimplemented;
-            }
-            return jit_conf_result;
+            if (jit_conf_result != success) return jit_conf_result;
+
+            cpu_memory_t::pd_t new_weights_pd(this->engine_, &expect_wei_md);
+            if (this->weights_pd_.desc()->format == any)
+                this->weights_pd_ = new_weights_pd;
+            if (!this->weights_pd_.is_equal(&new_weights_pd))
+                return unimplemented;
+
+            init_scratchpad();
+
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+               CHECK(this->set_alg_kind(alg_kind::convolution_winograd));
+
+            return success;
         }
 
         jit_conv_conf_2x3_wino_t jcp_;
@@ -84,6 +90,25 @@ struct _jit_avx512_core_fp32_wino_conv_2x3_fwd_t : public cpu_primitive_t {
     protected:
         status_t jit_conf(memory_desc_t& expect_wei_md);
 
+        void init_scratchpad() {
+            using namespace memory_tracking::names;
+
+            auto scratchpad = this->scratchpad_registry().registrar();
+
+            int wino_size_offset = (jcp_.yb / 2) * (jcp_.xb / 2) + jcp_.xb;
+
+            size_t V_sz = (size_t)jcp_.ic * 16 * wino_size_offset * jcp_.nthr;
+            scratchpad.book(key_wino_V, sizeof(float) * V_sz, PAGE_4K);
+
+            size_t M_sz = (size_t)jcp_.oc * 16 * wino_size_offset * jcp_.nthr;
+            scratchpad.book(key_wino_M, sizeof(float) * M_sz, PAGE_4K);
+
+            if (wants_padded_bias()) {
+                assert(jcp_.ngroups == 1);
+                scratchpad.book(key_conv_padded_bias, sizeof(float) * jcp_.oc);
+            }
+        }
+
         virtual status_t set_default_params() override {
             using namespace memory_format;
             if (this->src_pd_.desc()->format == any)
@@ -96,43 +121,27 @@ struct _jit_avx512_core_fp32_wino_conv_2x3_fwd_t : public cpu_primitive_t {
         }
     };
 
-    _jit_avx512_core_fp32_wino_conv_2x3_fwd_t(const pd_t *pd,
+    jit_avx512_core_fp32_wino_conv_2x3_fwd_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs);
-    
-    ~_jit_avx512_core_fp32_wino_conv_2x3_fwd_t();
 
-    virtual void execute(event_t *e) {
+    ~jit_avx512_core_fp32_wino_conv_2x3_fwd_t();
+
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    void execute_forward_small_mb();
-    void execute_forward_mbN();
-    pd_t conf_;
+    void execute_forward() const;
+    void execute_forward_small_mb() const;
+    void execute_forward_mbN() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
     jit_avx512_core_fp32_wino_conv_2x3_fwd_ker_t *kernel_;
     jit_avx512_core_fp32_wino_conv_2x3_src_trans_t *src_trans_;
     jit_avx512_core_fp32_wino_conv_2x3_dst_trans_t *dst_trans_;
-
-    size_t size_wino_wei;
-    size_t size_wino_src;
-    size_t size_wino_dst;
-
-    const float *wino_wei_;
-    const float *dst_bias_;
-
-    float *wino_src_;
-    float *wino_dst_;
-    float *padded_bias_;
 };
 
-using jit_avx512_core_fp32_wino_conv_2x3_fwd_t =
-    _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<false>;
-
-using jit_avx512_core_fp32_wino_convolution_relu_t =
-    _jit_avx512_core_fp32_wino_conv_2x3_fwd_t<true>;
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3.cpp
index 4b9fbd6d7..60e2a69cd 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3.cpp
@@ -25,7 +25,6 @@
 #include "type_helpers.hpp"
 #include "utils.hpp"
 
-#include "jit_avx512_common_convolution_winograd.hpp"
 #include "jit_avx512_core_fp32_wino_conv_4x3.hpp"
 
 #ifndef _MSC_VER
@@ -41,12 +40,13 @@ namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 template <bool is_fwd>
 void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>
 ::weight_transform_data(const jit_conv_winograd_conf_t &jcp,
-        float *wp, float *twp)
+        float *wp, float *twp) const
 {
     float G[] = {0.26890756302521f, 0.688403361344538f, 0.119514472455649f,
                  1.13777777777778f, 0.430252100840336f, 0.179271708683473f};
@@ -70,7 +70,7 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>
 template<bool is_fwd>
 void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::output_transform_data
 (int image, const jit_conv_winograd_conf_t &jcp,
-    const post_ops_t &p_ops, float *toutp, float *pout_b, float *bias) {
+    const post_ops_t &p_ops, float *toutp, float *pout_b, float *bias) const {
 
     float G[] = {0.625f, 1.5f, 0.390625f, 2.25f, 0.244140625f, 3.375f};
     float Ow[alpha][alpha][simd_w];
@@ -121,7 +121,7 @@ template<bool is_fwd>
 void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>
 ::output_transform_tileblock_data(int tile_block,
     const jit_conv_winograd_conf_t &jcp, const post_ops_t &p_ops,
-    float *toutp, float *outp, float *bias) {
+    float *toutp, float *outp, float *bias) const {
 
     float G[] = {0.625f, 1.5f, 0.390625f, 2.25f, 0.244140625f, 3.375f};
     float Ow[alpha][alpha][simd_w];
@@ -171,7 +171,7 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>
 template<bool is_fwd>
 void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>
     ::input_transform_data(int image, const jit_conv_winograd_conf_t &jcp,
-        float *inp, float *tinp)
+        float *inp, float *tinp) const
 {
     float G[] = {-2.25f, -0.390625f, 0.87890625f, -2.640625f,
                  0.625f, -0.625f, 1.5f, -1.5f, -2.640625f};
@@ -224,7 +224,7 @@ template <bool is_fwd>
 void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>
     ::input_transform_tileblock_data(int tile_block,
         const jit_conv_winograd_conf_t &jcp,
-        float *inp, float *tinp)
+        float *inp, float *tinp) const
 {
     float G[] = {-2.25f, -0.390625f, 0.87890625f, -2.640625f,
                0.625f, -0.625f, 1.5f, -1.5f, -2.640625f};
@@ -280,7 +280,8 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>
 
 template <bool is_fwd>
 void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_S_G_D(
-        const int MB, float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr) {
+        const int MB, float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr,
+        const memory_tracking::grantor_t &scratchpad) const {
     const auto &jcp = kernel_->jcp;
     const auto &p_ops = attr_->post_ops_;
 
@@ -306,10 +307,9 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_S_G_D(
     array_offset_calculator<float, 2> bias(bias_ptr,
             jcp.dimM/jcp.dimM_simd_block, jcp.dimM_simd_block);
 
-    array_offset_calculator<float, 8> M(
-            (float *)((is_fwd
-                    ? (this->scratchpad_)->M_ptr()
-                    : (this->scratchpad_)->V_ptr())),
+    array_offset_calculator<float, 8> M(is_fwd
+            ? scratchpad.template get<float>(key_wino_M)
+            : scratchpad.template get<float>(key_wino_V),
             jcp.dimN_nb_block, jcp.dimM_nb_block,
             alpha, alpha,
             jcp.dimN_block, jcp.dimM_block * jcp.dimM_reg_block,
@@ -317,7 +317,7 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_S_G_D(
 
     auto wino_wei = (jcp.prop_kind == prop_kind::forward_inference)
             ? wei_ptr
-            : (float *)(this->scratchpad_)->U_ptr();
+            : scratchpad.template get<float>(key_wino_U);
 
     array_offset_calculator<float, 8> U(wino_wei,
             jcp.dimM_nb_block,
@@ -325,23 +325,22 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_S_G_D(
             jcp.dimK_nb_block,
             jcp.dimM_block * jcp.dimM_reg_block, jcp.dimK_block,
             jcp.dimK_reg_block, jcp.dimM_simd_block);
-    array_offset_calculator<float, 8> V(
-            (float *)((is_fwd
-                    ? (this->scratchpad_)->V_ptr()
-                    : (this->scratchpad_)->M_ptr())),
+    array_offset_calculator<float, 8> V(is_fwd
+            ? scratchpad.template get<float>(key_wino_V)
+            : scratchpad.template get<float>(key_wino_M),
             jcp.dimN_nb_block, alpha, alpha,
             jcp.dimN_block, jcp.dimK_nb_block,
             jcp.dimK_block, jcp.dimN_reg_block, jcp.dimK_reg_block);
 
-    const bool want_padded_bias = jcp.with_bias
+    const bool wants_padded_bias = jcp.with_bias
         && jcp.oc_without_padding != jcp.oc;
     float last_slice_bias[simd_w] = {0};
-    if (want_padded_bias) {
+    if (wants_padded_bias) {
         for (int oc = 0; oc < jcp.oc_without_padding % jcp.oc_simd_block; ++oc)
             last_slice_bias[oc] = bias(jcp.dimM / jcp.dimM_simd_block - 1, oc);
     }
 
-#pragma omp parallel
+PRAGMA_OMP(parallel)
     {
         parallel_nd_in_omp(MB, jcp.dimK_nb_block, jcp.dimK_block,
                 [&](int img, int K_blk1, int K_blk2) {
@@ -367,7 +366,7 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_S_G_D(
             });
         }
 
-#pragma omp barrier
+PRAGMA_OMP(barrier)
 
         parallel_nd_in_omp(jcp.dimN_nb_block, alpha, alpha, jcp.dimM_nb_block,
             [&](int N_blk1, int oj, int oi, int M_blk1) {
@@ -383,14 +382,14 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_S_G_D(
                             N_blk2, K_blk1, 0, 0, 0)), K_blk1);
         });
 
-#pragma omp barrier
+PRAGMA_OMP(barrier)
 
         parallel_nd_in_omp(MB, jcp.dimM_nb_block, (jcp.dimM_block * jcp.dimM_reg_block),
                     [&](int img, int M_blk1, int M_blk2) {
             const int M_blk =
                 M_blk1 * jcp.dimM_block  * jcp.dimM_reg_block + M_blk2;
 
-            float *bias_ptr = want_padded_bias
+            float *bias_ptr = wants_padded_bias
                 && M_blk == jcp.dimM / jcp.dimM_simd_block - 1
                 ? last_slice_bias : &bias(M_blk, 0);
             output_transform_data(img, jcp, p_ops,
@@ -400,16 +399,11 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_S_G_D(
     }
 }
 
-template void
-_jit_avx512_core_fp32_wino_conv_4x3_t<true>::_execute_data_W_S_G_D(
-        const int, float *, float *, float *, float *);
-template void
-_jit_avx512_core_fp32_wino_conv_4x3_t<false>::_execute_data_W_S_G_D(
-        const int, float *, float *, float *, float *);
-
 template <bool is_fwd>
-void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_SGD(
-        const int MB, float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr) {
+void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_SGD(const int MB,
+        float *inp_ptr, float *out_ptr, float *wei_ptr, float *bias_ptr,
+        const memory_tracking::grantor_t &scratchpad) const {
+
     const auto &jcp = kernel_->jcp;
     const auto &p_ops = attr_->post_ops_;
 
@@ -430,7 +424,7 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_SGD(
 
     auto wino_wei = (jcp.prop_kind == prop_kind::forward_inference)
                 ? wei_ptr
-                : (float *)(this->scratchpad_)->U_ptr();
+                : scratchpad.template get<float>(key_wino_U);
 
     array_offset_calculator<float, 8> U(wino_wei,
             jcp.dimM_nb_block,
@@ -439,25 +433,23 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_SGD(
             jcp.dimM_block  * jcp.dimM_reg_block, jcp.dimK_block,
             jcp.dimK_reg_block, jcp.dimM_simd_block);
 
-    array_offset_calculator<float, 8> M(
-            (float *)((is_fwd
-                    ? (this->scratchpad_)->M_ptr()
-                    : (this->scratchpad_)->V_ptr())),
+    array_offset_calculator<float, 8> M(is_fwd
+            ? scratchpad.template get<float>(key_wino_M)
+            : scratchpad.template get<float>(key_wino_V),
             0, jcp.dimM_nb_block, alpha, alpha,
             jcp.dimN_block, jcp.dimM_block * jcp.dimM_reg_block,
             jcp.dimN_reg_block, jcp.dimM_simd_block);
-    array_offset_calculator<float, 8> V(
-            (float *)((is_fwd
-                    ? (this->scratchpad_)->V_ptr()
-                    : (this->scratchpad_)->M_ptr())),
+    array_offset_calculator<float, 8> V(is_fwd
+            ? scratchpad.template get<float>(key_wino_V)
+            : scratchpad.template get<float>(key_wino_M),
             0, alpha, alpha, jcp.dimN_block,
             jcp.dimK_nb_block, jcp.dimK_block,
             jcp.dimN_reg_block, jcp.dimK_reg_block);
 
-    const bool want_padded_bias = jcp.with_bias
+    const bool wants_padded_bias = jcp.with_bias
         && jcp.oc_without_padding != jcp.oc;
     float last_slice_bias[simd_w] = {0};
-    if (want_padded_bias) {
+    if (wants_padded_bias) {
         for (int oc = 0; oc < jcp.oc_without_padding % jcp.oc_simd_block; ++oc)
             last_slice_bias[oc] = bias(jcp.dimM / jcp.dimM_simd_block - 1, oc);
     }
@@ -478,12 +470,12 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_SGD(
         });
     }
 
-#pragma omp parallel
+PRAGMA_OMP(parallel)
     {
 
     int ithr = mkldnn_get_thread_num();
 
-#pragma omp for schedule(static)
+PRAGMA_OMP(for schedule(static))
     for (int tile_block = 0; tile_block < jcp.tile_block; tile_block++) {
         for (int K_blk1 = 0; K_blk1 < jcp.dimK_nb_block; K_blk1++) {
             for (int K_blk2 = 0; K_blk2 < jcp.dimK_block; K_blk2++) {
@@ -516,7 +508,7 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_SGD(
                 const int M_blk =
                     M_blk1 * jcp.dimM_block  * jcp.dimM_reg_block + M_blk2;
 
-                float *bias_ptr = want_padded_bias
+                float *bias_ptr = wants_padded_bias
                     && M_blk == jcp.dimM / jcp.dimM_simd_block - 1
                     ? last_slice_bias : &bias(M_blk, 0);
 
@@ -529,12 +521,8 @@ void _jit_avx512_core_fp32_wino_conv_4x3_t<is_fwd>::_execute_data_W_SGD(
     }
 }
 
-template void
-_jit_avx512_core_fp32_wino_conv_4x3_t<true>::_execute_data_W_SGD(
-        const int, float *, float *, float *, float *);
-template void
-_jit_avx512_core_fp32_wino_conv_4x3_t<false>::_execute_data_W_SGD(
-        const int, float *, float *, float *, float *);
+template struct _jit_avx512_core_fp32_wino_conv_4x3_t<true>;
+template struct _jit_avx512_core_fp32_wino_conv_4x3_t<false>;
 
 namespace {
 
@@ -545,7 +533,7 @@ void subarray_sum(size_t num_arrs, float *output, size_t nelems,
     const size_t blocks_number = nelems / block_size;
     const size_t tail = nelems % block_size;
 
-#pragma omp parallel
+PRAGMA_OMP(parallel)
     {
         const int ithr = mkldnn_get_thread_num();
         const int nthr = mkldnn_get_num_threads();
@@ -627,7 +615,7 @@ void array_sum(size_t num_arrs, float *output,
     const size_t blocks_number = nelems / block_size;
     const size_t tail = nelems % block_size;
 
-#pragma omp parallel
+PRAGMA_OMP(parallel)
     {
         const size_t ithr = mkldnn_get_thread_num();
         const size_t nthr = mkldnn_get_num_threads();
@@ -672,9 +660,10 @@ void array_sum(size_t num_arrs, float *output,
 } //bwdw namespace
 
 void jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_t::
-_execute_backward_weights_SDGtWo() {
+_execute_backward_weights_SDGtWo(
+        const memory_tracking::grantor_t &scratchpad) const {
     const auto &jcp = kernel_->jcp;
-    const int nthreads = scratchpad_->num_threads();
+    const int nthreads = jcp.nthr;
 
     array_offset_calculator<float, 5> src((float *)this->input_memory(0),
             jcp.mb, jcp.ic / simd_w, jcp.ih, jcp.iw, simd_w);
@@ -683,20 +672,20 @@ _execute_backward_weights_SDGtWo() {
     array_offset_calculator<float, 6> diff_weights((float *)this->memory(0),
             jcp.oc / simd_w, jcp.ic / simd_w, jcp.kh, jcp.kw, simd_w, simd_w);
 
-    array_offset_calculator<float, 8> Us((float *)(scratchpad_->U_ptr()),
+    array_offset_calculator<float, 8> Us(scratchpad.get<float>(key_wino_U),
             0, alpha, alpha,
             jcp.oc_block, jcp.ic_block,
             jcp.ic_simd_block,
             jcp.oc_reg_block,
             jcp.oc_simd_block);
 
-    int U_sz = nthreads * alpha * alpha * jcp.oc / jcp.nb_oc
-        * jcp.ic / jcp.nb_ic * sizeof(float);
+    const int U_sz = nthreads * alpha * alpha * jcp.oc / jcp.nb_oc
+        * jcp.ic / jcp.nb_ic;
     array_offset_calculator<float, 7>diff_weights_prv(
-            (float *)(scratchpad_->U_ptr() + U_sz),
+            scratchpad.get<float>(key_wino_U) + U_sz,
             0, jcp.oc / simd_w, jcp.ic / simd_w, jcp.kh, jcp.kw, simd_w, simd_w);
 
-    array_offset_calculator<float, 8> M((float *)(scratchpad_->M_ptr()),
+    array_offset_calculator<float, 8> M(scratchpad.get<float>(key_wino_M),
             0, alpha, alpha,
             jcp.oc_block,
             jcp.nb_tile_block_ur,
@@ -704,7 +693,7 @@ _execute_backward_weights_SDGtWo() {
             jcp.oc_reg_block,
             jcp.oc_simd_block);
 
-    array_offset_calculator<float, 7> V((float *)(scratchpad_->V_ptr()),
+    array_offset_calculator<float, 7> V(scratchpad.get<float>(key_wino_V),
             0, alpha, alpha,
             jcp.ic_block,
             jcp.nb_tile_block_ur,
@@ -712,7 +701,7 @@ _execute_backward_weights_SDGtWo() {
             jcp.ic_simd_block);
 
     array_offset_calculator<float, 2> diff_bias_prv(
-            (float *)(scratchpad_->bias_ptr()), nthreads, jcp.oc);
+            scratchpad.get<float>(key_conv_bia_reduction), nthreads, jcp.oc);
 
     auto trans_ker_p = jit_wino_transform_call_s();
     float I[alpha][alpha][simd_w];
@@ -724,7 +713,7 @@ _execute_backward_weights_SDGtWo() {
        1.13777777777778f};
     float G_O_3x3_4x4[4] = {2.25f, 0.625f, 1.5f, 0.390625f};
 
-#pragma omp parallel num_threads(nthreads) firstprivate(trans_ker_p, I, T)
+PRAGMA_OMP(parallel num_threads(nthreads) firstprivate(trans_ker_p, I, T))
 {
     if (jcp.with_bias) {
         parallel_nd_in_omp(nthreads, jcp.oc / simd_w,
@@ -740,7 +729,7 @@ _execute_backward_weights_SDGtWo() {
     int ithr = mkldnn_get_thread_num();
     for (int ifm1 = 0; ifm1 < jcp.nb_ic; ++ifm1) {
         int first_tblk = 0;
-#pragma omp for
+PRAGMA_OMP(for)
         for (int tblk1 = 0; tblk1 < jcp.tile_block; ++tblk1) {
             int tile_index = tblk1 * jcp.nb_tile_block_ur * jcp.tile_block_ur;
             int img = tile_index / (jcp.itiles * jcp.jtiles);
@@ -806,7 +795,7 @@ _execute_backward_weights_SDGtWo() {
     // Reduce diff-weights
     {
         float *output = (float *)(this->memory(0));
-        float *input_base = (float *)(scratchpad_->U_ptr() + U_sz);
+        float *input_base = scratchpad.get<float>(key_wino_U) + U_sz;
         int nelems = jcp.oc * jcp.ic * jcp.kh * jcp.kw;
         float *input_ptrs[max_threads_number];
         for (int i = 0; i < nthreads; ++i) {
@@ -816,7 +805,7 @@ _execute_backward_weights_SDGtWo() {
 
         if (jcp.with_bias) {
             output = (float *)(this->memory(1));
-            input_base = (float *)(scratchpad_->bias_ptr());
+            input_base = scratchpad.get<float>(key_conv_bia_reduction);
             for (int i = 0; i < nthreads; ++i) {
                 input_ptrs[i] = input_base + jcp.oc * i;
             }
@@ -827,9 +816,10 @@ _execute_backward_weights_SDGtWo() {
 }
 
 void jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_t::
-_execute_backward_weights_S_D_Giot_W() {
+_execute_backward_weights_S_D_Giot_W(
+        const memory_tracking::grantor_t &scratchpad) const {
     const auto &jcp = kernel_->jcp;
-    const int nthreads = scratchpad_->num_threads();
+    const int nthreads = jcp.nthr;
 
     array_offset_calculator<float, 5> src((float *)this->input_memory(0),
             jcp.mb, jcp.ic / simd_w, jcp.ih, jcp.iw, simd_w);
@@ -839,7 +829,7 @@ _execute_backward_weights_S_D_Giot_W() {
             jcp.oc / simd_w, jcp.ic / simd_w, jcp.kh, jcp.kw, simd_w, simd_w);
     array_offset_calculator<float, 1> diff_bias((float *)this->memory(1), jcp.oc);
 
-    array_offset_calculator<float, 9> U((float *)(scratchpad_->U_ptr()),
+    array_offset_calculator<float, 9> U(scratchpad.get<float>(key_wino_U),
             jcp.nb_ic, jcp.nb_oc,
             alpha, alpha,
             jcp.oc_block, jcp.ic_block,
@@ -847,9 +837,9 @@ _execute_backward_weights_S_D_Giot_W() {
             jcp.oc_reg_block,
             jcp.oc_simd_block);
 
-    int U_size = jcp.oc * jcp.ic * alpha * alpha * sizeof(float);
+    const int U_size = jcp.oc * jcp.ic * alpha * alpha;
     array_offset_calculator<float, 10> Us(
-            (float *)(scratchpad_->U_ptr() + U_size),
+            scratchpad.get<float>(key_wino_U) + U_size,
             0, jcp.nb_ic, jcp.nb_oc,
             alpha, alpha,
             jcp.oc_block, jcp.ic_block,
@@ -857,7 +847,7 @@ _execute_backward_weights_S_D_Giot_W() {
             jcp.oc_reg_block,
             jcp.oc_simd_block);
 
-    array_offset_calculator<float, 9> M((float *)(scratchpad_->M_ptr()),
+    array_offset_calculator<float, 9> M(scratchpad.get<float>(key_wino_M),
             jcp.nb_oc,
             jcp.tile_block,
             alpha, alpha,
@@ -867,7 +857,7 @@ _execute_backward_weights_S_D_Giot_W() {
             jcp.oc_reg_block,
             jcp.oc_simd_block);
 
-    array_offset_calculator<float, 8> V((float *)(scratchpad_->V_ptr()),
+    array_offset_calculator<float, 8> V(scratchpad.get<float>(key_wino_V),
             jcp.nb_ic,
             jcp.tile_block,
             alpha, alpha,
@@ -876,7 +866,7 @@ _execute_backward_weights_S_D_Giot_W() {
             jcp.ic_simd_block);
 
     array_offset_calculator<float, 2> diff_bias_prv(
-            (float *)(scratchpad_->bias_ptr()), nthreads, jcp.oc);
+            scratchpad.get<float>(key_conv_bia_reduction), nthreads, jcp.oc);
 
     size_t input_starts[max_threads_number] = {0};
     size_t input_ends[max_threads_number] = {0};
@@ -892,7 +882,7 @@ _execute_backward_weights_S_D_Giot_W() {
     float I[alpha][alpha][simd_w];
     float T[alpha][alpha][simd_w];
 
-#pragma omp parallel firstprivate(first_tblk, trans_ker_p, I, T)
+PRAGMA_OMP(parallel firstprivate(first_tblk, trans_ker_p, I, T))
 {
     if (jcp.with_bias) {
         parallel_nd_in_omp(nthreads, jcp.oc, [&](int ithr, int ofm) {
@@ -941,7 +931,7 @@ _execute_backward_weights_S_D_Giot_W() {
         }
     });
 
-    #pragma omp barrier
+    PRAGMA_OMP(barrier)
 
     parallel_nd_in_omp(jcp.nb_ic, jcp.nb_oc, alpha, alpha, jcp.tile_block,
         [&](int ifm1, int ofm1, int oj, int oi, int tblk1){
@@ -991,7 +981,7 @@ _execute_backward_weights_S_D_Giot_W() {
     }
 
     trans_ker_p.G = G_O_3x3_4x4;
-#pragma omp parallel firstprivate(trans_ker_p)
+PRAGMA_OMP(parallel firstprivate(trans_ker_p))
     {
         parallel_nd_in_omp(jcp.nb_ic, jcp.nb_oc, jcp.oc_block, jcp.ic_block, jcp.oc_reg_block,
             [&](int ifm1, int ofm1, int ofm2, int ifm2, int ofm3){
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3.hpp
index e4ef2861a..8f4f7a55d 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3.hpp
@@ -18,9 +18,9 @@
 #define CPU_JIT_AVX512_CORE_FP32_WINO_CONV_4x3_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
 #include "cpu_convolution_pd.hpp"
 #include "cpu_engine.hpp"
-#include "scratchpad.hpp"
 
 #include "jit_avx512_core_fp32_wino_conv_4x3_kernel.hpp"
 
@@ -28,116 +28,50 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-namespace winograd {
-
-struct winograd_scratchpad_avx512_core_t {
-    public:
-        winograd_scratchpad_avx512_core_t(const jit_conv_winograd_conf_t &jcp)
-        {
-            get_scratchpad_size_(jcp);
-            allocate_scratchpad_(jcp);
-        }
-
-        ~winograd_scratchpad_avx512_core_t() {
-            if (scratchpad_ != nullptr)
-                delete scratchpad_;
-        }
-
-        char *U_ptr() {
-            /* buffer for wei transform U*/
-            return scratchpad_->get() + U_offset_;
-        }
-
-        char *V_ptr() {
-            /* buffer for src transform V*/
-            return scratchpad_->get() + V_offset_;
-        }
-
-        char *M_ptr() {
-            /* buffer for dst transform M*/
-            return scratchpad_->get() + M_offset_;
-        }
-
-        char *bias_ptr() {
-            /* buffer for bias update in bwdw*/
-            return scratchpad_->get() + bias_offset_;
-        }
-
-        int num_threads(){
-            return nthreads_;
-        }
-
-    private:
-        inline void get_scratchpad_size_(const jit_conv_winograd_conf_t &jcp) {
-            nthreads_ = mkldnn_get_max_threads();
-
-            U_sz_ = size_t(alpha) * alpha * jcp.ic * jcp.oc * sizeof(float);
-            V_sz_ = size_t(alpha) * alpha * jcp.mb * jcp.ic
-                           * jcp.itiles * jcp.jtiles
-                           * sizeof(float);
-            M_sz_ = size_t(alpha) * alpha * jcp.mb * jcp.oc
-                           * jcp.itiles * jcp.jtiles
-                           * sizeof(float);
-
-            switch (jcp.sched_policy) {
-            case WSCHED_DATA_W_SGD:
-                V_sz_ = nthreads_ * alpha * alpha
-                    * jcp.nb_tile_block_ur * jcp.tile_block_ur
-                    * jcp.ic * sizeof(float);
-                M_sz_ = nthreads_* alpha * alpha
-                    * jcp.nb_tile_block_ur * jcp.tile_block_ur
-                    * jcp.oc * sizeof(float);
-                break;
-            case WSCHED_WEI_SDGtWo:
-                nthreads_ = nstl::min(mkldnn_get_max_threads(), jcp.tile_block);
-
-                U_sz_ = nthreads_
-                    * (alpha * alpha * jcp.oc * (jcp.ic / jcp.nb_ic)
-                      + jcp.ic * jcp.oc * jcp.kh * jcp.kw)
-                    * sizeof(float);
-                M_sz_ = nthreads_ * alpha * alpha
-                        * (jcp.ntiles / jcp.tile_block)
-                        * (jcp.oc / jcp.nb_oc) * sizeof(float);
-                V_sz_ = nthreads_ * alpha * alpha
-                        * (jcp.ntiles / jcp.tile_block)
-                        * (jcp.ic / jcp.nb_ic)
-                        * sizeof(float);
-                bias_sz_ = nthreads_ * jcp.oc * sizeof(float);
-                break;
-            case WSCHED_WEI_S_D_Giot_W:
-                U_sz_ = (nthreads_ + 1) * alpha * alpha * jcp.ic * jcp.oc
-                      * sizeof(float);
-                M_sz_ = size_t(alpha) * alpha * jcp.oc * jcp.ntiles * sizeof(float);
-                V_sz_ = size_t(alpha) * alpha * jcp.ic * jcp.ntiles * sizeof(float);
-                bias_sz_ = nthreads_ * jcp.oc * sizeof(float);
-                break;
-            default:
-                break;
-            }
-        }
+namespace winograd_avx512_core {
+inline void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+        const jit_conv_winograd_conf_t &jcp) {
+    using namespace utils;
+    using namespace memory_tracking::names;
+
+    size_t U_sz = (size_t)alpha * alpha * jcp.ic * jcp.oc;
+    size_t V_sz = (size_t)alpha * alpha * jcp.mb * jcp.ic * jcp.itiles
+        * jcp.jtiles;
+    size_t M_sz = (size_t)alpha * alpha * jcp.mb * jcp.oc * jcp.itiles
+        * jcp.jtiles;
+
+    switch (jcp.sched_policy) {
+    case WSCHED_DATA_W_SGD:
+        V_sz = (size_t)jcp.nthr * alpha * alpha * jcp.nb_tile_block_ur
+            * jcp.tile_block_ur * jcp.ic;
+        M_sz = (size_t)jcp.nthr * alpha * alpha * jcp.nb_tile_block_ur
+            * jcp.tile_block_ur * jcp.oc;
+        break;
+    case WSCHED_WEI_SDGtWo:
+        U_sz = (size_t)jcp.nthr * (alpha * alpha * jcp.oc
+                * (jcp.ic / jcp.nb_ic) + jcp.ic * jcp.oc * jcp.kh * jcp.kw);
+        M_sz = (size_t)jcp.nthr * alpha * alpha * (jcp.ntiles / jcp.tile_block)
+            * (jcp.oc / jcp.nb_oc);
+        V_sz = (size_t)jcp.nthr * alpha * alpha * (jcp.ntiles / jcp.tile_block)
+            * (jcp.ic / jcp.nb_ic);
+        break;
+    case WSCHED_WEI_S_D_Giot_W:
+        U_sz = (size_t)(jcp.nthr + 1) * alpha * alpha * jcp.ic * jcp.oc;
+        M_sz = (size_t)alpha * alpha * jcp.oc * jcp.ntiles;
+        V_sz = (size_t)alpha * alpha * jcp.ic * jcp.ntiles;
+        break;
+    default: break;
+    }
 
-        inline void allocate_scratchpad_(const jit_conv_winograd_conf_t &jcp) {
-            const size_t page_size = PAGE_2M;
-            U_offset_ = 0;
-            V_offset_ = utils::rnd_up(U_sz_, page_size);
-            M_offset_ = V_offset_ + utils::rnd_up(V_sz_, page_size);
-            scratchpad_sz_ = M_offset_ + M_sz_;
-            if (bias_sz_) {
-                bias_offset_ = M_offset_ + utils::rnd_up(M_sz_, page_size);
-                scratchpad_sz_ = bias_offset_ + bias_sz_;
-            }
-            scratchpad_ = create_scratchpad(scratchpad_sz_);
-        }
+    scratchpad.book(key_wino_U, sizeof(float) * U_sz, PAGE_2M);
+    scratchpad.book(key_wino_V, sizeof(float) * V_sz, PAGE_2M);
+    scratchpad.book(key_wino_M, sizeof(float) * M_sz, PAGE_2M);
 
-        scratchpad_t *scratchpad_;
-        size_t nthreads_;
-        size_t scratchpad_sz_ = 0, U_sz_ = 0, V_sz_ = 0, M_sz_ = 0,
-               bias_sz_ = 0;
-        size_t U_offset_ = 0;
-        size_t V_offset_ = 0;
-        size_t M_offset_ = 0;
-        size_t bias_offset_ = 0;
-};
+    if (one_of(jcp.sched_policy, WSCHED_WEI_SDGtWo, WSCHED_WEI_S_D_Giot_W)) {
+        size_t br_sz = (size_t)jcp.nthr * jcp.oc;
+        scratchpad.book(key_conv_bia_reduction, sizeof(float) * br_sz, PAGE_2M);
+    }
+}
 }
 
 template <bool is_fwd>
@@ -145,80 +79,86 @@ struct _jit_avx512_core_fp32_wino_conv_4x3_t {
 
     _jit_avx512_core_fp32_wino_conv_4x3_t(
             const jit_conv_winograd_conf_t &jcp, const primitive_attr_t *attr)
-        : kernel_(nullptr), scratchpad_(nullptr), attr_(attr) {
+        : kernel_(nullptr), attr_(attr) {
             kernel_ =  new _jit_avx512_core_fp32_wino_conv_4x3_data_kernel(jcp);
-            scratchpad_ = new winograd::winograd_scratchpad_avx512_core_t(jcp);
         }
 
-    ~_jit_avx512_core_fp32_wino_conv_4x3_t() {
-        delete kernel_;
-        delete scratchpad_;
-    };
+    ~_jit_avx512_core_fp32_wino_conv_4x3_t() { delete kernel_; }
 
     protected:
         void weight_transform_data(const jit_conv_winograd_conf_t &jcp,
-            float *wp, float *twp);
+            float *wp, float *twp) const;
         void input_transform_data(int image,
             const jit_conv_winograd_conf_t &jcp,
-            float *inp, float *tinp);
+            float *inp, float *tinp) const;
         void input_transform_tileblock_data(int tile_block,
             const jit_conv_winograd_conf_t &jcp,
-            float *inp, float *tinp);
+            float *inp, float *tinp) const;
         void output_transform_data(int image,
             const jit_conv_winograd_conf_t &jcp,
-            const post_ops_t &p_ops, float *toutp, float *pout_b, float *bias);
+            const post_ops_t &p_ops, float *toutp, float *pout_b,
+            float *bias) const;
         void output_transform_tileblock_data(int tile_block,
             const jit_conv_winograd_conf_t &jcp, const post_ops_t &p_ops,
-            float *toutp, float *outp, float *bias);
+            float *toutp, float *outp, float *bias) const;
         void _execute_data_W_S_G_D(const int MB, float *inp_ptr, float *out_ptr,
-                float *wei_ptr, float *bias_ptr = NULL);
+                float *wei_ptr, float *bias_ptr,
+                const memory_tracking::grantor_t &scratchpad) const;
         void _execute_data_W_SGD(const int MB, float *inp_ptr, float *out_ptr,
-                float *wei_ptr, float *bias_ptr = NULL);
+                float *wei_ptr, float *bias_ptr,
+                const memory_tracking::grantor_t &scratchpad) const;
         _jit_avx512_core_fp32_wino_conv_4x3_data_kernel *kernel_;
-        // Buffer required to store transforms in the frequency domain
-        winograd::winograd_scratchpad_avx512_core_t *scratchpad_;
         const primitive_attr_t *attr_;
 };
 
-template <bool with_relu>
-struct _jit_avx512_core_fp32_wino_conv_4x3_fwd_t
+struct jit_avx512_core_fp32_wino_conv_4x3_fwd_t
      : _jit_avx512_core_fp32_wino_conv_4x3_t<true>
      , public cpu_primitive_t
     {
-    struct pd_t : public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine, const typename pd_t::base_desc_t *adesc,
+    struct pd_t : public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
             , jcp_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_wino_4x3:", avx512_core, ""),
-                _jit_avx512_core_fp32_wino_conv_4x3_fwd_t<with_relu>);
+                jit_avx512_core_fp32_wino_conv_4x3_fwd_t);
 
         virtual status_t init() override
         {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true && this->set_default_params() == status::success
-                    && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                    && utils::one_of(this->desc()->prop_kind, forward_training,
                                forward_inference)
-                    && this->cdesc_().alg_kind == alg_kind::convolution_winograd
+                    && utils::one_of(this->desc()->alg_kind,
+                               alg_kind::convolution_auto,
+                               alg_kind::convolution_winograd)
                     && utils::everyone_is(data_type::f32,
-                               this->cdesc_().src_desc.data_type,
-                               this->cdesc_().weights_desc.data_type,
-                               this->cdesc_().dst_desc.data_type)
+                               this->desc()->src_desc.data_type,
+                               this->desc()->weights_desc.data_type,
+                               this->desc()->dst_desc.data_type)
                     && IMPLICATION(this->with_bias(), data_type::f32
-                                       == this->cdesc_().bias_desc.data_type)
+                                       == this->desc()->bias_desc.data_type)
                     && mkldnn_thr_syncable();
             if (!ok)
                 return status::unimplemented;
 
-            return jit_avx512_core_fp32_wino_conv_4x3_fwd_kernel::init_conf(jcp_,
-                    this->cdesc_(), this->src_pd_,
-                    this->weights_pd_, this->dst_pd_,
-                    *this->attr(), with_relu, this->negative_slope());
+            status_t status =
+                jit_avx512_core_fp32_wino_conv_4x3_fwd_kernel::init_conf(jcp_,
+                        *this->desc(), this->src_pd_, this->weights_pd_,
+                        this->dst_pd_, *this->attr());
+            if (status != status::success) return status;
+
+            auto scratchpad = this->scratchpad_registry().registrar();
+            winograd_avx512_core::init_scratchpad(scratchpad, jcp_);
+            if (status == status::success
+                    && this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_winograd));
+
+            return status;
         }
 
         jit_conv_winograd_conf_t jcp_;
@@ -232,7 +172,7 @@ struct _jit_avx512_core_fp32_wino_conv_4x3_fwd_t
             if (this->dst_pd_.desc()->format == any)
                 CHECK(this->dst_pd_.set_format(nChw16c));
             if (this->weights_pd_.desc()->format == any
-                    && (this->cdesc_().prop_kind != mkldnn_forward_inference))
+                    && (this->desc()->prop_kind != mkldnn_forward_inference))
                 CHECK(this->weights_pd_.set_format(
                         this->with_groups() ? gOIhw16i16o : OIhw16i16o));
             if (this->bias_pd_.desc()->format == any)
@@ -241,29 +181,30 @@ struct _jit_avx512_core_fp32_wino_conv_4x3_fwd_t
         }
     };
 
-    _jit_avx512_core_fp32_wino_conv_4x3_fwd_t(const pd_t *pd,
+    jit_avx512_core_fp32_wino_conv_4x3_fwd_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : _jit_avx512_core_fp32_wino_conv_4x3_t<true>(pd->jcp_, pd->attr())
-        , cpu_primitive_t(&conf_, inputs, outputs)
-        , conf_(*pd) {}
+        : _jit_avx512_core_fp32_wino_conv_4x3_t<true>(apd->jcp_, apd->attr())
+        , cpu_primitive_t(apd, inputs, outputs, true)
+         {}
 
-    ~_jit_avx512_core_fp32_wino_conv_4x3_fwd_t(){};
+    ~jit_avx512_core_fp32_wino_conv_4x3_fwd_t(){};
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
         float *src = (float *)this->input_memory(0);
         float *dst = (float *)this->memory();
         float *weights = (float *)this->input_memory(1);
         float *bias = (float *)this->input_memory(2);
+        auto scratchpad = this->scratchpad();
 
-        switch ((conf_.jcp_).sched_policy) {
+        switch ((pd()->jcp_).sched_policy) {
         case WSCHED_DATA_W_S_G_D:
-            this->_execute_data_W_S_G_D(conf_.MB(), src, dst, weights, bias);
+            this->_execute_data_W_S_G_D(pd()->MB(), src, dst, weights, bias, scratchpad);
             break;
         case WSCHED_DATA_W_SGD:
-            this->_execute_data_W_SGD(conf_.MB(), src, dst, weights, bias);
+            this->_execute_data_W_SGD(pd()->MB(), src, dst, weights, bias, scratchpad);
             break;
         default:
             break;
@@ -272,14 +213,9 @@ struct _jit_avx512_core_fp32_wino_conv_4x3_fwd_t
     }
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
-using jit_avx512_core_fp32_wino_conv_4x3_fwd_t
-        = _jit_avx512_core_fp32_wino_conv_4x3_fwd_t<false>;
-using jit_avx512_core_fp32_wino_conv_4x3_relu_t
-        = _jit_avx512_core_fp32_wino_conv_4x3_fwd_t<true>;
-
 struct jit_avx512_core_fp32_wino_conv_4x3_bwd_data_t
         : _jit_avx512_core_fp32_wino_conv_4x3_t<false>,
         public cpu_primitive_t {
@@ -300,7 +236,9 @@ struct jit_avx512_core_fp32_wino_conv_4x3_bwd_data_t
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true && this->set_default_params() == status::success
                     && utils::one_of(this->desc()->prop_kind, backward_data)
-                    && this->desc()->alg_kind == alg_kind::convolution_winograd
+                    && utils::one_of(this->desc()->alg_kind,
+                               alg_kind::convolution_auto,
+                               alg_kind::convolution_winograd)
                     && utils::everyone_is(data_type::f32,
                                this->desc()->diff_src_desc.data_type,
                                this->desc()->weights_desc.data_type,
@@ -309,10 +247,20 @@ struct jit_avx512_core_fp32_wino_conv_4x3_bwd_data_t
             if (!ok)
                 return status::unimplemented;
 
-            return jit_avx512_core_fp32_wino_conv_4x3_bwd_data_kernel::
-                    init_conf(jcp_, *this->desc(), *this->diff_src_pd_.desc(),
-                            *this->weights_pd_.desc(),
-                            *this->diff_dst_pd_.desc());
+            status_t status =
+                jit_avx512_core_fp32_wino_conv_4x3_bwd_data_kernel::init_conf(
+                        jcp_, *this->desc(), *this->diff_src_pd_.desc(),
+                        *this->weights_pd_.desc(), *this->diff_dst_pd_.desc());
+            if (status != status::success) return status;
+
+            auto scratchpad = this->scratchpad_registry().registrar();
+            winograd_avx512_core::init_scratchpad(scratchpad, jcp_);
+
+            if (status == status::success
+                    && this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_winograd));
+
+            return status;
         }
 
         jit_conv_winograd_conf_t jcp_;
@@ -333,30 +281,33 @@ struct jit_avx512_core_fp32_wino_conv_4x3_bwd_data_t
         }
     };
 
-    jit_avx512_core_fp32_wino_conv_4x3_bwd_data_t(const pd_t *pd,
+    jit_avx512_core_fp32_wino_conv_4x3_bwd_data_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : _jit_avx512_core_fp32_wino_conv_4x3_t<false>(pd->jcp_, pd->attr())
-        , cpu_primitive_t(&conf_, inputs, outputs)
-        , conf_(*pd) {}
+        : _jit_avx512_core_fp32_wino_conv_4x3_t<false>(apd->jcp_, apd->attr())
+        , cpu_primitive_t(apd, inputs, outputs, true)
+         {}
 
     ~jit_avx512_core_fp32_wino_conv_4x3_bwd_data_t(){};
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
         float *diff_dst = (float *)this->input_memory(0);
         float *diff_src = (float *)this->memory();
         float *weights = (float *)this->input_memory(1);
+        auto scratchpad = this->scratchpad();
 
-        if (conf_.desc()->prop_kind == prop_kind::backward_data) {
-            switch ((conf_.jcp_).sched_policy) {
+        if (pd()->desc()->prop_kind == prop_kind::backward_data) {
+            switch ((pd()->jcp_).sched_policy) {
             case WSCHED_DATA_W_S_G_D:
-                this->_execute_data_W_S_G_D(conf_.MB(), diff_dst, diff_src, weights, NULL);
+                this->_execute_data_W_S_G_D(pd()->MB(), diff_dst, diff_src, weights, NULL,
+                        scratchpad);
                 break;
 
             case WSCHED_DATA_W_SGD:
-                this->_execute_data_W_SGD(conf_.MB(), diff_dst, diff_src, weights, NULL);
+                this->_execute_data_W_SGD(pd()->MB(), diff_dst, diff_src, weights, NULL,
+                        scratchpad);
                 break;
 
             default:
@@ -370,7 +321,7 @@ struct jit_avx512_core_fp32_wino_conv_4x3_bwd_data_t
     }
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 struct jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_t
@@ -393,7 +344,9 @@ struct jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_t
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true && this->set_default_params() == status::success
                     && utils::one_of(this->desc()->prop_kind, backward_weights)
-                    && this->desc()->alg_kind == alg_kind::convolution_winograd
+                    && utils::one_of(this->desc()->alg_kind,
+                               alg_kind::convolution_auto,
+                               alg_kind::convolution_winograd)
                     && utils::everyone_is(data_type::f32,
                                this->desc()->src_desc.data_type,
                                this->desc()->diff_dst_desc.data_type,
@@ -402,10 +355,21 @@ struct jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_t
             if (!ok)
                 return status::unimplemented;
 
-            return jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_kernel::
-                    init_conf(jcp_, *this->desc(), *this->src_pd_.desc(),
-                            *this->diff_dst_pd_.desc(),
-                            *this->diff_weights_pd_.desc());
+            status_t status =
+                jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_kernel::
+                init_conf(jcp_, *this->desc(), *this->src_pd_.desc(),
+                        *this->diff_dst_pd_.desc(),
+                        *this->diff_weights_pd_.desc());
+            if (status != status::success) return status;
+
+            auto scratchpad = this->scratchpad_registry().registrar();
+            winograd_avx512_core::init_scratchpad(scratchpad, jcp_);
+
+            if (status == status::success
+                    && this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_winograd));
+
+            return status;
         }
 
         jit_conv_winograd_conf_t jcp_;
@@ -428,37 +392,32 @@ struct jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_t
         }
     };
 
-    jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_t(const pd_t *pd,
+    jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs)
-        , conf_(*pd)
+        : cpu_primitive_t(apd, inputs, outputs, true)
         , kernel_(nullptr)
-        , scratchpad_(nullptr)
     {
-        auto jcp = conf_.jcp_;
         kernel_ = new jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_kernel(
-                jcp);
-        scratchpad_ = new winograd::winograd_scratchpad_avx512_core_t(jcp);
+                pd()->jcp_);
     }
 
     ~jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_t()
     {
         delete kernel_;
-        delete scratchpad_;
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
-        if (conf_.desc()->prop_kind == prop_kind::backward_weights) {
+        if (pd()->desc()->prop_kind == prop_kind::backward_weights) {
             const auto &jcp = kernel_->jcp;
             switch (jcp.sched_policy) {
             case WSCHED_WEI_SDGtWo:
-                _execute_backward_weights_SDGtWo();
+                _execute_backward_weights_SDGtWo(scratchpad());
                 break;
             case WSCHED_WEI_S_D_Giot_W:
-                _execute_backward_weights_S_D_Giot_W();
+                _execute_backward_weights_S_D_Giot_W(scratchpad());
                 break;
             default:
                 assert(jcp.sched_policy != WSCHED_INVALID);
@@ -471,14 +430,13 @@ struct jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_t
     }
 
 private:
-    void _execute_backward_weights_SDGtWo();
-    void _execute_backward_weights_S_D_Giot_W();
+    void _execute_backward_weights_SDGtWo(
+            const memory_tracking::grantor_t &scratchpad) const;
+    void _execute_backward_weights_S_D_Giot_W(
+            const memory_tracking::grantor_t &scratchpad) const;
 
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_kernel *kernel_;
-
-    // Buffer required to store transforms in the frequency domain
-    winograd::winograd_scratchpad_avx512_core_t *scratchpad_;
 };
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3_kernel.cpp
index 831f182aa..164bbe088 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3_kernel.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3_kernel.cpp
@@ -62,6 +62,41 @@ int get_divisor_satisfying_cond(jit_conv_winograd_conf_t &jcp, int number,
     return best_divisor;
 }
 
+namespace {
+bool is_winograd_faster_than_direct(const jit_conv_winograd_conf_t &jcp) {
+    /* Determines if current winograd implementation is faster than direct.
+       Following conditions are empirical and based on performance data */
+    unsigned int ncores_per_socket =
+        cpu.getNumCores(Xbyak::util::IntelCpuTopologyLevel::CoreLevel);
+    unsigned int nthreads = mkldnn_get_max_threads();
+
+    if (jcp.prop_kind == prop_kind::forward_inference) {
+        return jcp.mb >= 4;
+    } else if (nthreads > ncores_per_socket) {
+        double src_dst_transforms_per_core = alpha * alpha
+            * (jcp.ic + jcp.oc)
+            * jcp.mb * ((jcp.oh + tile_size - 1) / tile_size)
+            * ((jcp.ow + tile_size - 1) / tile_size)
+            * sizeof(float) / 1024. / 1024. / nthreads;
+        double wei_transform = alpha * alpha
+            * jcp.ic * jcp.oc * sizeof(float) /1024. / 1024.;
+
+        if (jcp.prop_kind == prop_kind::backward_weights) {
+            if (src_dst_transforms_per_core < 0.3
+                    || (src_dst_transforms_per_core <= 28 && wei_transform < 4))
+                return false;
+            else
+                return true;
+        } else {
+            if (src_dst_transforms_per_core < 2.0 || wei_transform < 0.02)
+                return false;
+        }
+    }
+
+    return jcp.mb > 8;
+}
+}
+
 /* assumes 512 bits registers */
 /* TODO: add support for strides */
 /* TODO: handle the prefetch distance automatically */
@@ -730,16 +765,16 @@ void _jit_avx512_core_fp32_wino_conv_4x3_data_kernel
                     vaddps(zmm_O, zmm_O, ptr[oreg_bias]);
                 }
                 if (with_relu) {
-                    Opmask kmask = Opmask(7);
-                    if (jcp.eltwise_alpha == 0) {
-                        zmm_relu_ns = zmm_zero;
+                    if (jcp.eltwise.alpha == 0) {
+                        vmaxps(zmm_O, zmm_O, zmm_zero);
                     } else {
-                        mov(imm_addr64, float2int(jcp.eltwise_alpha));
+                        Opmask kmask = Opmask(7);
+                        mov(imm_addr64, float2int(jcp.eltwise.alpha));
                         vmovq(xmm_relu_ns, imm_addr64);
                         vbroadcastss(zmm_relu_ns, xmm_relu_ns);
+                        vcmpps(kmask, zmm_O, zmm_zero, _cmp_lt_os);
+                        vmulps(zmm_O | kmask, zmm_O, zmm_relu_ns);
                     }
-                    vcmpps(kmask, zmm_O, zmm_zero, _cmp_lt_os);
-                    vmulps(zmm_O | kmask, zmm_O, zmm_relu_ns);
                 }
             }
             if (with_sum) {
@@ -1095,6 +1130,9 @@ status_t _jit_avx512_core_fp32_wino_conv_4x3_data_kernel::init_conf_common(
     if (!mayiuse(avx512_core)) {
         return status::unimplemented;
     }
+
+    jcp.nthr = mkldnn_get_max_threads();
+
     jcp.ver = ver_avx512_core;
     jcp.prop_kind = cd.prop_kind;
 
@@ -1133,6 +1171,10 @@ status_t _jit_avx512_core_fp32_wino_conv_4x3_data_kernel::init_conf_common(
     }
 
     // Checking conditions not supported by these kernels
+    if (!IMPLICATION(cd.alg_kind == alg_kind::convolution_auto,
+               is_winograd_faster_than_direct(jcp)))
+        return status::unimplemented;
+
     if (jcp.ngroups != 1)
         return status::unimplemented;
     if ((jcp.kh != 3) || (jcp.kw != 3))
@@ -1366,28 +1408,16 @@ bool jit_avx512_core_fp32_wino_conv_4x3_fwd_kernel::post_ops_ok(
         jit_conv_conf_t &jcp, const primitive_attr_t &attr) {
     const auto &p = attr.post_ops_;
 
-    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
+    auto is_relu = [&](int idx) { return p.entry_[idx].is_relu(); };
     auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
 
     switch (p.len_) {
-    case 0:
-        return true; // no post_ops
-    case 1:
-        return true // relu or sum
-                && IMPLICATION(jcp.with_eltwise, is_sum(0))
-                && IMPLICATION(!jcp.with_eltwise, is_eltwise(0) || is_sum(0));
-    case 2:
-        return true // sum->relu or relu->sum
-                && IMPLICATION(jcp.with_eltwise, is_sum(0) && is_eltwise(1))
-                && IMPLICATION(!jcp.with_eltwise, false
-                                   || (is_sum(0) && is_eltwise(1))
-                                   || (is_eltwise(0) && is_sum(1)));
-    case 3:
-        return true // relu->sum->relu
-                && jcp.with_eltwise == false
-                && (is_eltwise(0) && is_sum(1) && is_eltwise(2));
-    default:
-        return false;
+    case 0: return true; // no post_ops
+    case 1: return is_relu(0) || is_sum(0); // relu or sum
+    case 2: return (is_sum(0) && is_relu(1))
+                      || (is_relu(0) && is_sum(1)); // sum->relu or relu->sum
+    case 3: return is_relu(0) && is_sum(1) && is_relu(2); // relu->sum->relu
+    default: return false;
     }
 
     return false;
@@ -1396,8 +1426,7 @@ bool jit_avx512_core_fp32_wino_conv_4x3_fwd_kernel::post_ops_ok(
 status_t jit_avx512_core_fp32_wino_conv_4x3_fwd_kernel::init_conf(
         jit_conv_winograd_conf_t &jcp, const convolution_desc_t &cd,
         const cpu_memory_t::pd_t &src_pd, cpu_memory_t::pd_t &weights_pd,
-        const cpu_memory_t::pd_t &dst_pd, const primitive_attr_t &attr,
-        bool with_relu, float relu_negative_slope) {
+        const cpu_memory_t::pd_t &dst_pd, const primitive_attr_t &attr) {
 
     status_t st = init_conf_common(jcp, cd,
                         *src_pd.desc(), *weights_pd.desc(), *dst_pd.desc());
@@ -1411,18 +1440,16 @@ status_t jit_avx512_core_fp32_wino_conv_4x3_fwd_kernel::init_conf(
     jcp.ntiles = jcp.mb * jcp.itiles * jcp.jtiles;
 
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
 
     if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
 
     const auto &p = attr.post_ops_;
-    if (!jcp.with_eltwise) {
-        /* PostOps ReLU before SUM is handled the same as ReLU primitive */
-        jcp.with_eltwise = p.find(primitive_kind::eltwise, 0, 1) != -1;
-        jcp.eltwise_alpha = 0.f;
-    }
+    const int eltwise_ind = p.find(primitive_kind::eltwise, 0, 1);
+    jcp.with_eltwise = eltwise_ind != -1;
+    if (jcp.with_eltwise)
+        jcp.eltwise = p.entry_[eltwise_ind].eltwise;
+
     jcp.with_sum = p.find(primitive_kind::sum, 0) != -1;
     jcp.with_relu_postsum = p.find(primitive_kind::eltwise, 1) != -1;
 
@@ -2376,6 +2403,8 @@ status_t set_wsched_WEI_SDGtWo(jit_conv_winograd_conf_t &jcp) {
                                 jcp.dimM_block = M_blk;
                                 jcp.sched_policy = WSCHED_WEI_SDGtWo;
                                 set_jcp_WEI_params(jcp);
+                                jcp.nthr = nstl::min(mkldnn_get_max_threads(),
+                                        jcp.tile_block);
                                 return status::success;
                             }
                         }
@@ -2467,6 +2496,9 @@ status_t jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_kernel::init_conf(
     else
         jcp.ver = ver_avx512_core;
 
+    jcp.nthr = mkldnn_get_max_threads();
+
+    jcp.prop_kind = cd.prop_kind;
     const bool with_groups = diff_weights_d.ndims() == src_d.ndims() + 1;
     jcp.mb = src_d.dims()[0];
     jcp.ngroups = with_groups ? diff_weights_d.dims()[0] : 1;
@@ -2507,6 +2539,10 @@ status_t jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_kernel::init_conf(
     jcp.ntiles = jcp.mb * jcp.itiles * jcp.jtiles;
 
     // Winograd kernel works only for 3x3 convolution with stride 1
+    if (!IMPLICATION(cd.alg_kind == alg_kind::convolution_auto,
+               is_winograd_faster_than_direct(jcp)))
+        return status::unimplemented;
+
     if (jcp.ngroups != 1)
         return status::unimplemented;
     if ((jcp.kh != 3) || (jcp.kw != 3))
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3_kernel.hpp
index eb9d7fd2d..c9f155931 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3_kernel.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_fp32_wino_conv_4x3_kernel.hpp
@@ -161,8 +161,7 @@ struct jit_avx512_core_fp32_wino_conv_4x3_fwd_kernel
     static status_t init_conf(jit_conv_winograd_conf_t &jcp,
             const convolution_desc_t &cd, const cpu_memory_t::pd_t &src_pd,
             cpu_memory_t::pd_t &weights_pd, const cpu_memory_t::pd_t &dst_pd,
-            const primitive_attr_t &attr, bool with_relu,
-            float relu_negative_slope);
+            const primitive_attr_t &attr);
 };
 
 struct jit_avx512_core_fp32_wino_conv_4x3_bwd_data_kernel
@@ -188,7 +187,7 @@ struct jit_avx512_core_fp32_wino_conv_4x3_bwd_weights_kernel
         //******************* First iter kernel ********************//
         this->gemm_loop_generate(true);
         gemm_loop_ker_first_iter = (decltype(gemm_loop_ker_first_iter))this->getCode();
-        
+
         align();
         const Xbyak::uint8 *addr = getCurr();
         this->src_transform_generate();
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_i8i8_pooling.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_i8i8_pooling.cpp
deleted file mode 100644
index f51c956f5..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_i8i8_pooling.cpp
+++ /dev/null
@@ -1,582 +0,0 @@
-/*******************************************************************************
-* Copyright 2017-2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#include <math.h>
-
-#include "mkldnn_types.h"
-
-#include "mkldnn_thread.hpp"
-#include "utils.hpp"
-
-#include "jit_generator.hpp"
-
-#include "jit_avx512_core_i8i8_pooling.hpp"
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-using namespace Xbyak;
-
-using namespace mkldnn::impl::utils;
-using namespace mkldnn::impl::memory_format;
-using namespace mkldnn::impl::utils;
-using namespace mkldnn::impl::types;
-using namespace alg_kind;
-
-struct jit_avx512_core_i8i8_pool_fwd_ker_t: public jit_generator {
-    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_i8i8_pool_fwd_ker_t)
-
-    struct call_params_t {
-        const char *src_i8;
-        const char *dst_i8;
-        size_t kw_range;
-        size_t kh_range;
-        float idivider;
-    };
-
-    Reg64 reg_ptr_src_i8 = r8;
-    Reg64 reg_ptr_dst_i8 = r9;
-
-    Reg64 ki = r10;
-    Reg64 kj = r11;
-    Reg64 reg_kw = r12;
-    Reg64 reg_kh = r13;
-    Reg64 c_iter = r14;
-
-    Reg64 aux_reg_src_h = rax;
-    Reg64 aux_reg_src_w = rbx;
-
-    Reg64 reg_tmp = rdx;
-
-    Reg64 reg_mask = r15;
-
-    Opmask k_cmp_mask = Opmask(7);
-
-    Opmask mask(int idx) {
-        return Opmask(6 - idx);
-    }
-
-    Xmm xmm_tmp = Xmm(0);
-    Zmm vreg_tmp = Zmm(30);
-    Zmm vreg_zeros = Zmm(31);
-
-    size_t sizeof_src_dt() const { return data_type_size(jpp.src_dt); }
-    size_t sizeof_dst_dt() const { return data_type_size(jpp.dst_dt); }
-
-    /* max pooling */
-    Zmm vreg_src(int idx) {
-        return Zmm(idx);
-    }
-
-    Zmm vreg_dst(int idx) {
-        return Zmm(jpp.ur_c + idx);
-    }
-
-    /* avg pooling */
-    Zmm vreg_src_s32(int jj, int ll) {
-        return Zmm(12*jj + ll);
-    }
-
-    Zmm vreg_dst_s32(int jj, int ll) {
-        return Zmm(12*jj + ll + 4);
-    }
-
-    Zmm vreg_dst_f32(int jj, int ll) {
-        return Zmm(12*jj + ll + 8);
-    }
-
-    void (*ker_)(const call_params_t *);
-    jit_pool_conf_t jpp;
-
-    void init_tmp_reg();
-    void init_mask();
-
-    void load_src(int jj, int ll, int c_tail);
-    void store_dst(int jj, int ll, int c_tail);
-
-    void compute_avg_step(int ur_c, int c_tail);
-    void compute_max_step(int ur_c, int c_tail);
-    void compute_step(int ur_c, int c_tail);
-
-    void compute_c_block();
-    void generate();
-
-    static status_t init_conf(jit_pool_conf_t &jpp,
-        const pooling_desc_t &pd, const memory_desc_wrapper &src_d,
-        const memory_desc_wrapper &dst_d);
-
-    jit_avx512_core_i8i8_pool_fwd_ker_t(const jit_pool_conf_t &jpp_)
-           : jpp(jpp_) {
-        generate();
-        ker_ = reinterpret_cast<decltype(ker_)>(const_cast<uint8_t*>(
-                       getCode()));
-    }
-};
-
-void jit_avx512_core_i8i8_pool_fwd_ker_t::load_src(int jj, int ll, int c_tail) {
-    using namespace data_type;
-
-    int c_block = jpp.c_block;
-    int ur_c = jpp.ur_c;
-
-    switch (jpp.alg) {
-        case pooling_max: {
-            auto offset = jj*c_block*sizeof_src_dt();
-            if (jj == ur_c - 1 && c_tail) {
-                if (jpp.src_dt == data_type::s32) {
-                    vmovups(vreg_src(jj) | mask(0),
-                            ptr[aux_reg_src_w + offset]);
-                } else {
-                    vmovdqu8(vreg_src(jj) | mask(0),
-                            ptr[aux_reg_src_w + offset]);
-                }
-            } else {
-                vmovups(vreg_src(jj), ptr[aux_reg_src_w + offset]);
-            }
-            break;
-        }
-        case pooling_avg_include_padding:
-        case pooling_avg_exclude_padding: {
-            auto offset = (ll*(c_block/4) + jj*c_block)*sizeof_src_dt();
-            if (jj == jpp.ur_c - 1 && c_tail) {
-                if (jpp.tail[ll]) {
-                    switch (jpp.src_dt) {
-                        case s32:
-                            vmovups(vreg_src_s32(jj, ll) | mask(ll),
-                                    ptr[aux_reg_src_w + offset]);
-                            break;
-                        case s8:
-                            vpmovsxbd(vreg_src_s32(jj, ll) | mask(ll),
-                                    ptr[aux_reg_src_w + offset]);
-                            break;
-                        case u8:
-                            vpmovzxbd(vreg_src_s32(jj, ll) | mask(ll),
-                                    ptr[aux_reg_src_w + offset]);
-                            break;
-                        default: assert(!"unsupported src data type");
-                    }
-                }
-            } else {
-                switch (jpp.src_dt) {
-                    case s32:
-                        vmovups(vreg_src_s32(jj, ll),
-                                ptr[aux_reg_src_w + offset]);
-                        break;
-                    case s8:
-                        vpmovsxbd(vreg_src_s32(jj, ll),
-                                ptr[aux_reg_src_w + offset]);
-                        break;
-                    case u8:
-                        vpmovzxbd(vreg_src_s32(jj, ll),
-                                ptr[aux_reg_src_w + offset]);
-                        break;
-                    default: assert(!"unsupported src data type");
-                }
-            }
-            break;
-        }
-        default: assert(!"unsupported algorithm");
-    }
-}
-
-void jit_avx512_core_i8i8_pool_fwd_ker_t::store_dst(int jj, int ll,
-        int c_tail) {
-    using namespace data_type;
-
-    int c_block = jpp.c_block;
-    int ur_c = jpp.ur_c;
-
-    switch(jpp.alg) {
-        case pooling_max: {
-            auto offset = jj*c_block*sizeof_dst_dt();
-            if (jj == ur_c - 1 && c_tail) {
-                if (jpp.src_dt == data_type::s32) {
-                    vmovups(ptr[reg_ptr_dst_i8 + offset],
-                           vreg_dst(jj) | mask(0));
-                } else {
-                    vmovdqu8(ptr[reg_ptr_dst_i8 + offset],
-                            vreg_dst(jj) | mask(0));
-                }
-            } else {
-                vmovups(ptr[reg_ptr_dst_i8 + offset], vreg_dst(jj));
-            }
-            break;
-        }
-        case pooling_avg_include_padding:
-        case pooling_avg_exclude_padding: {
-            auto offset = (ll*(c_block/4) + jj*c_block)*sizeof_dst_dt();
-            if (jj == ur_c - 1 && c_tail) {
-                if (jpp.tail[ll]) {
-                    switch (jpp.dst_dt) {
-                        case s32:
-                            vmovups(ptr[reg_ptr_dst_i8 + offset],
-                                vreg_dst_s32(jj, ll) | mask(ll));
-                            break;
-                        case s8:
-                            vpmovdb(ptr[reg_ptr_dst_i8 + offset],
-                                vreg_dst_s32(jj, ll) | mask(ll));
-                            break;
-                        case u8:
-                            vpmovusdb(ptr[reg_ptr_dst_i8 + offset],
-                                vreg_dst_s32(jj, ll) | mask(ll));
-                            break;
-                        default: assert(!"unsupported dst data_type");
-                    }
-                }
-            } else {
-                switch (jpp.dst_dt) {
-                    case s32:
-                        vmovups(ptr[reg_ptr_dst_i8 + offset],
-                            vreg_dst_s32(jj, ll));
-                        break;
-                    case s8:
-                        vpmovdb(ptr[reg_ptr_dst_i8 + offset],
-                            vreg_dst_s32(jj, ll));
-                        break;
-                    case u8:
-                        vpmovusdb(ptr[reg_ptr_dst_i8 + offset],
-                            vreg_dst_s32(jj, ll));
-                        break;
-                    default: assert(!"unsuppotred dst data_type");
-                }
-            }
-            break;
-        }
-        default: assert(!"unsupported pooling algorithm");
-    }
-}
-
-void jit_avx512_core_i8i8_pool_fwd_ker_t::compute_max_step(int ur_c, int c_tail)
-{
-    Label l_kw, l_kh;
-
-    int iw = jpp.iw;
-    int c = jpp.c;
-
-    for (int jj = 0; jj < ur_c; jj++)
-        vmovups(vreg_dst(jj), vreg_tmp);
-
-    mov(aux_reg_src_h, reg_ptr_src_i8);
-
-    xor_(kj, kj);
-    L(l_kh);
-    {
-        mov(aux_reg_src_w, aux_reg_src_h);
-        xor_(ki, ki);
-        L(l_kw);
-        {
-            for (int jj = 0; jj < ur_c; jj++) {
-                load_src(jj, 0, c_tail);
-                if (jpp.src_dt == data_type::s32) {
-                    vpcmpd(k_cmp_mask, vreg_dst(jj), vreg_src(jj), _cmp_lt_os);
-                    vpblendmd(vreg_dst(jj) | k_cmp_mask, vreg_dst(jj),
-                            vreg_src(jj));
-                } else {
-                    if (jpp.src_dt == data_type::s8)
-                        vpcmpb(k_cmp_mask, vreg_dst(jj), vreg_src(jj),
-                                _cmp_lt_os);
-                    else
-                        vpcmpub(k_cmp_mask, vreg_dst(jj), vreg_src(jj),
-                                _cmp_lt_os);
-                    vpblendmb(vreg_dst(jj) | k_cmp_mask, vreg_dst(jj),
-                            vreg_src(jj));
-                }
-            }
-            add(aux_reg_src_w, c * sizeof_src_dt());
-            inc(ki);
-            cmp(ki, reg_kw);
-            jl(l_kw, T_NEAR);
-        }
-        add(aux_reg_src_h, iw * c * sizeof_src_dt());
-        inc(kj);
-        cmp(kj, reg_kh);
-        jl(l_kh, T_NEAR);
-    }
-
-    for (int jj = 0; jj < ur_c; jj++)
-        store_dst(jj, 0, c_tail);
-}
-
-void jit_avx512_core_i8i8_pool_fwd_ker_t::compute_avg_step(int ur_c, int c_tail)
-{
-    using namespace data_type;
-
-    Label l_kw, l_kh;
-
-    int iw = jpp.iw;
-    int c = jpp.c;
-
-    int num_ll = jpp.src_dt == data_type::s32 ? 1 : 4;
-
-    for (int jj = 0; jj < ur_c; jj++) {
-        for (int ll = 0; ll < 4; ll++) {
-            uni_vpxor(vreg_src_s32(jj, ll),
-                    vreg_src_s32(jj, ll), vreg_src_s32(jj, ll));
-            uni_vpxor(vreg_dst_s32(jj, ll),
-                    vreg_dst_s32(jj, ll), vreg_dst_s32(jj, ll));
-        }
-    }
-
-    mov(aux_reg_src_h, reg_ptr_src_i8);
-
-    xor_(kj, kj);
-    L(l_kh);
-    {
-        mov(aux_reg_src_w, aux_reg_src_h);
-        xor_(ki, ki);
-        L(l_kw);
-        {
-            for (int jj = 0; jj < ur_c; jj++) {
-                for (int ll = 0; ll < num_ll; ll++) {
-                    load_src(jj, ll, c_tail);
-                    vpaddd(vreg_dst_s32(jj, ll),
-                            vreg_dst_s32(jj, ll), vreg_src_s32(jj, ll));
-                }
-            }
-            add(aux_reg_src_w, c * sizeof_src_dt());
-            inc(ki);
-            cmp(ki, reg_kw);
-            jl(l_kw, T_NEAR);
-        }
-        add(aux_reg_src_h, iw * c * sizeof_src_dt());
-        inc(kj);
-        cmp(kj, reg_kh);
-        jl(l_kh, T_NEAR);
-    }
-
-    for (int jj = 0; jj < ur_c; jj++) {
-        for (int ll = 0; ll < num_ll; ll++) {
-            vcvtdq2ps(vreg_dst_f32(jj, ll), vreg_dst_s32(jj, ll));
-            vfmadd132ps(vreg_dst_f32(jj, ll), vreg_zeros, vreg_tmp);
-            vcvtps2dq(vreg_dst_s32(jj, ll) | T_rn_sae, vreg_dst_f32(jj, ll));
-
-            store_dst(jj, ll, c_tail);
-        }
-    }
-}
-
-void jit_avx512_core_i8i8_pool_fwd_ker_t::compute_step(int ur_c, int c_tail) {
-    switch (jpp.alg) {
-        case pooling_max:
-            compute_max_step(ur_c, c_tail); break;
-        case pooling_avg_include_padding:
-        case pooling_avg_exclude_padding:
-            compute_avg_step(ur_c, c_tail); break;
-        default: assert(!"unsupported pooling algorithm");
-    }
-}
-
-void jit_avx512_core_i8i8_pool_fwd_ker_t::compute_c_block(){
-    Label l_main_loop;
-
-    int nb_c = jpp.nb_c;
-    int c_block = jpp.c_block;
-    int ur_c = jpp.ur_c;
-    int ur_c_tail = jpp.ur_c_tail;
-    int c_steps = nb_c / ur_c;
-    int c_tail = jpp.c_tail;
-
-    xor_(c_iter, c_iter);
-    if (c_steps > 0) {
-        L(l_main_loop); {
-            compute_step(ur_c, 0);
-            add(reg_ptr_src_i8, ur_c*c_block*sizeof_src_dt());
-            add(reg_ptr_dst_i8, ur_c*c_block*sizeof_dst_dt());
-            inc(c_iter);
-            cmp(c_iter, c_steps);
-            jl(l_main_loop, T_NEAR);
-        }
-    }
-
-    if (ur_c_tail != 0) {
-        compute_step(ur_c_tail, c_tail);
-    }
-}
-
-void jit_avx512_core_i8i8_pool_fwd_ker_t::init_mask() {
-    for (int i = 0; i < 4; i++) {
-        mov(reg_mask, jpp.tail[i]);
-        kmovq(mask(i), reg_mask);
-    }
-}
-
-void jit_avx512_core_i8i8_pool_fwd_ker_t::init_tmp_reg() {
-    using namespace data_type;
-
-    switch (jpp.alg) {
-        case pooling_avg_include_padding:
-        case pooling_avg_exclude_padding:
-            mov(reg_tmp, ptr[abi_param1 + offsetof(call_params_t, idivider)]);
-            movq(xmm_tmp, reg_tmp);
-            vpbroadcastd(vreg_tmp, xmm_tmp);
-            break;
-        case pooling_max:
-            switch (jpp.src_dt) {
-                case s32:
-                    mov(reg_tmp, nstl::numeric_limits<int32_t>::lowest());
-                    break;
-                case s8:
-                    mov(reg_tmp, nstl::numeric_limits<int8_t>::lowest());
-                    break;
-                case u8:
-                    mov(reg_tmp, nstl::numeric_limits<uint8_t>::lowest());
-                    break;
-                default: assert(!"unsupported src data_type");
-            }
-
-            movq(xmm_tmp, reg_tmp);
-            if (jpp.src_dt == s32)
-                vpbroadcastd(vreg_tmp, xmm_tmp);
-            else
-                vpbroadcastb(vreg_tmp, xmm_tmp);
-            break;
-        default: assert(!"unsupported pooling algorithm");
-    }
-
-}
-
-void jit_avx512_core_i8i8_pool_fwd_ker_t::generate() {
-    preamble();
-
-#   define READ_PARAM(reg, field) \
-        mov(reg, ptr[abi_param1 + offsetof(call_params_t, field)])
-    READ_PARAM(reg_ptr_src_i8, src_i8);
-    READ_PARAM(reg_ptr_dst_i8, dst_i8);
-    READ_PARAM(reg_kw, kw_range);
-    READ_PARAM(reg_kh, kh_range);
-
-#   undef READ_PARAM
-
-    init_tmp_reg();
-    init_mask();
-
-    uni_vpxor(vreg_zeros, vreg_zeros, vreg_zeros);
-
-    compute_c_block();
-
-    postamble();
-}
-
-status_t jit_avx512_core_i8i8_pool_fwd_ker_t::init_conf(jit_pool_conf_t &jpp,
-        const pooling_desc_t &pd, const memory_desc_wrapper &src_d,
-        const memory_desc_wrapper &dst_d) {
-    if (!mayiuse(avx512_core)) {
-        return status::unimplemented;
-    }
-
-    jpp.mb = src_d.dims()[0];
-    jpp.c = src_d.dims()[1];
-    jpp.ih = src_d.dims()[2];
-    jpp.iw = src_d.dims()[3];
-    jpp.oh = dst_d.dims()[2];
-    jpp.ow = dst_d.dims()[3];
-
-    jpp.stride_h = pd.strides[0];
-    jpp.stride_w = pd.strides[1];
-    jpp.kh = pd.kernel[0];
-    jpp.kw = pd.kernel[1];
-
-    jpp.t_pad = pd.padding[0][0];
-    jpp.l_pad = pd.padding[0][1];
-
-    jpp.alg = pd.alg_kind;
-
-    jpp.src_dt = pd.src_desc.data_type;
-    jpp.dst_dt = pd.dst_desc.data_type;
-
-    jpp.c_block = 64 / (jpp.src_dt == data_type::s32 ? 4 : 1);
-    jpp.c_tail = jpp.c % jpp.c_block;
-    jpp.nb_c = jpp.c / jpp.c_block;
-    jpp.ur_c = 1;
-    jpp.ur_c_tail = jpp.nb_c - (jpp.nb_c / jpp.ur_c)*jpp.ur_c +
-            (jpp.c_tail != 0);
-
-    size_t tail_mask = (1ULL << jpp.c_tail) - 1;
-
-    switch(jpp.alg) {
-        case pooling_max:
-            jpp.tail[0] = tail_mask;
-            jpp.tail[1] = 0;
-            jpp.tail[2] = 0;
-            jpp.tail[3] = 0;
-            break;
-        case pooling_avg_include_padding:
-        case pooling_avg_exclude_padding:
-            jpp.tail[0] = tail_mask & 0xffff;
-            for (size_t i = 1, m = tail_mask; i < 4; i++) {
-                m = m >> 16;
-                jpp.tail[i] = m & 0xffff;
-            }
-            break;
-        default: return status::unimplemented;
-    }
-
-    return status::success;
-}
-
-status_t jit_avx512_core_i8i8_pooling_fwd_t::pd_t::jit_conf() {
-    return jit_avx512_core_i8i8_pool_fwd_ker_t::init_conf(jpp_,
-       desc_, src_pd_.desc(), dst_pd_.desc());
-}
-
-jit_avx512_core_i8i8_pooling_fwd_t::
-jit_avx512_core_i8i8_pooling_fwd_t(const pd_t *pd,
-          const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), ker_(nullptr)
-{ ker_ = new jit_avx512_core_i8i8_pool_fwd_ker_t(conf_.jpp_); }
-
-jit_avx512_core_i8i8_pooling_fwd_t::
-~jit_avx512_core_i8i8_pooling_fwd_t() { delete ker_; }
-
-void jit_avx512_core_i8i8_pooling_fwd_t::execute_forward() {
-    auto src_i8 = reinterpret_cast<const char *>(input_memory(0));
-    auto dst_i8 = reinterpret_cast<char *>(memory());
-
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-
-    const auto &jpp = conf_.jpp_;
-
-    parallel_nd(jpp.mb, jpp.oh, jpp.ow,
-            [&](int n, int oh, int ow) {
-        const int ih = nstl::max(oh*jpp.stride_h - jpp.t_pad, 0);
-        const int iw = nstl::max(ow*jpp.stride_w - jpp.l_pad, 0);
-
-        const int kh_start = nstl::max(0, jpp.t_pad - oh * jpp.stride_h);
-        const int kh_end = nstl::min(jpp.kh,
-                jpp.ih + jpp.t_pad - oh * jpp.stride_h);
-        const int kw_start = nstl::max(0, jpp.l_pad - ow * jpp.stride_w);
-        const int kw_end = nstl::min(jpp.kw,
-                jpp.iw + jpp.l_pad - ow * jpp.stride_w);
-
-        auto p = jit_avx512_core_i8i8_pool_fwd_ker_t::call_params_t();
-        p.src_i8 = &src_i8[
-            src_d.blk_off(n, 0, ih, iw) * src_d.data_type_size()];
-        p.dst_i8 = &dst_i8[
-            dst_d.blk_off(n, 0, oh, ow) * dst_d.data_type_size()];
-        p.kw_range = (size_t)(kw_end - kw_start);
-        p.kh_range = (size_t)(kh_end - kh_start);
-        p.idivider = 1.0f / ((jpp.alg == pooling_avg_exclude_padding) ?
-            p.kh_range*p.kw_range : jpp.kw*jpp.kh);
-
-        ker_->ker_(&p);
-    });
-}
-
-}
-}
-}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_deconvolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_deconvolution.cpp
deleted file mode 100644
index 6ea154286..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_deconvolution.cpp
+++ /dev/null
@@ -1,602 +0,0 @@
-/*******************************************************************************
-* Copyright 2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#include "jit_avx512_core_u8s8s32x_deconvolution.hpp"
-
-#define GET_OFF(field) offsetof(jit_deconv_call_s, field)
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-using namespace mkldnn::impl::status;
-using namespace mkldnn::impl::memory_format;
-using namespace mkldnn::impl::utils;
-
-using namespace nstl;
-
-#define wht_blk_off(d, g, ...) \
-        (conf_.with_groups() \
-         ? (d).blk_off((g), __VA_ARGS__) \
-         : (d).blk_off(__VA_ARGS__))
-
-status_t jit_avx512_core_u8s8s32x_deconv_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
-        const deconvolution_desc_t &cd, cpu_memory_t::pd_t &src_pd,
-        cpu_memory_t::pd_t &weights_pd, cpu_memory_t::pd_t &dst_pd,
-        const bool with_bias, cpu_memory_t::pd_t &bias_pd,
-        const primitive_attr_t &attr) {
-    const memory_desc_wrapper src_d(&src_pd);
-    const memory_desc_wrapper dst_d(&dst_pd);
-    const memory_desc_wrapper weights_d(&weights_pd);
-    const memory_desc_wrapper bias_d(&bias_pd);
-
-    if (!(mayiuse(avx512_core) &&
-            src_d.data_type() == data_type::u8
-         && weights_d.data_type() == data_type::s8
-         && one_of(dst_d.data_type(), data_type::f32, data_type::s32,
-            data_type::s8, data_type::u8)))
-        return status::unimplemented;
-
-    jcp = zero<decltype(jcp)>();
-
-    const bool with_groups = weights_d.ndims() == src_d.ndims() + 1;
-
-    jcp.ngroups = with_groups ? weights_d.dims()[0] : 1;
-    jcp.oc = dst_d.dims()[1] / jcp.ngroups;
-    jcp.ic = src_d.dims()[1] / jcp.ngroups;
-    jcp.oc_without_padding = dst_d.dims()[1] / jcp.ngroups;
-    jcp.ic_without_padding = src_d.dims()[1] / jcp.ngroups;
-    jcp.is_depthwise = true && with_groups && utils::everyone_is(1,
-            jcp.ic_without_padding, jcp.oc_without_padding);
-
-    const auto w_format = with_groups
-        ? (jcp.is_depthwise ? Goihw16g : gOIhw4i16o4i)
-        : OIhw4i16o4i;
-
-    if (dst_d.format() == any)
-        CHECK(dst_pd.set_format(nhwc));
-    if (dst_d.format() != nhwc)
-        return status::unimplemented;
-    if (src_d.format() == any)
-        CHECK(src_pd.set_format(nhwc));
-    if (src_d.format() != nhwc)
-        return status::unimplemented;
-    if (weights_d.format() == any)
-        CHECK(weights_pd.set_format(w_format));
-    if (weights_d.format() != w_format)
-        return status::unimplemented;
-
-    jcp.with_bias = with_bias;
-    if (jcp.with_bias) {
-        if (bias_d.format() == any)
-            CHECK(bias_pd.set_format(x));
-        if (bias_d.format() != x)
-            return status::unimplemented;
-    }
-
-    jcp.ndims = dst_d.ndims();
-    jcp.prop_kind = cd.prop_kind;
-    jcp.mb = src_d.dims()[0];
-    jcp.ih = src_d.dims()[2];
-    jcp.iw = src_d.dims()[3];
-    jcp.oh = dst_d.dims()[2];
-    jcp.ow = dst_d.dims()[3];
-    jcp.kh = weights_d.dims()[with_groups + 2];
-    jcp.kw = weights_d.dims()[with_groups + 3];
-    jcp.t_pad = cd.padding[0][0];
-    jcp.l_pad = cd.padding[0][1];
-    jcp.stride_h = cd.strides[0];
-    jcp.stride_w = cd.strides[1];
-    jcp.src_fmt = src_d.format();
-    jcp.with_eltwise = false;/*TODO: support post-ops*/
-
-    if (jcp.is_depthwise) {
-        jcp.ch_block = 16;
-        jcp.oc_block = 1;
-        jcp.ic_block = 1;
-    } else {
-        jcp.ch_block = 1;
-        jcp.oc_block = 16;
-        jcp.ic_block = 16;
-
-        if (jcp.ngroups == 1) {
-            jcp.oc = utils::rnd_up(jcp.oc_without_padding, jcp.oc_block);
-            jcp.ic = utils::rnd_up(jcp.ic_without_padding, jcp.ic_block);
-        }
-        if (jcp.ic % jcp.ic_block != 0)
-            return status::unimplemented;
-    }
-
-    jcp.dilate_h = cd.dilates[0];
-    jcp.dilate_w = cd.dilates[1];
-
-    if (!IMPLICATION(jcp.dilate_h, jcp.stride_h == 1)
-            || !IMPLICATION(jcp.dilate_w, jcp.stride_w == 1))
-            return status::unimplemented;
-
-    /*bottom and right :padding*/
-    jcp.b_pad = (jcp.ih - 1) * jcp.stride_h + (jcp.kh - 1) * (jcp.dilate_h + 1)
-            - (jcp.oh + jcp.t_pad - 1);
-    jcp.r_pad = (jcp.iw - 1) * jcp.stride_w + (jcp.kw - 1) * (jcp.dilate_w + 1)
-            - (jcp.ow + jcp.l_pad - 1);
-
-    if (!attr.post_ops_.has_default_values())
-        return status::unimplemented;
-
-    jcp.ver = ver_avx512_core;
-    if (mayiuse(avx512_core_vnni))
-        jcp.ver = ver_vnni;
-    const auto &oscales = attr.output_scales_;
-    jcp.is_oc_scale = oscales.mask_ == 1 << 1;
-
-    jcp.dst_dt = dst_d.data_type();
-    jcp.bia_dt = jcp.with_bias ? bias_d.data_type() : data_type::undef;
-    jcp.typesize_bia = jcp.with_bias ? types::data_type_size(bias_d.data_type()) : 0;
-    jcp.typesize_in = types::data_type_size(src_d.data_type());
-    jcp.typesize_out = types::data_type_size(dst_d.data_type());
-
-    jcp.nb_ch = div_up(jcp.ngroups, jcp.ch_block);
-    jcp.nb_oc = jcp.oc / jcp.oc_block;
-    jcp.nb_ic = jcp.ic / jcp.ic_block;
-
-    /*kernel blocking params*/
-    const int regs = jcp.ver == ver_vnni ? 31 : 29;
-    jcp.nb_oc_blocking = nstl::min(4, jcp.nb_oc);
-    for (; jcp.nb_oc_blocking > 1; jcp.nb_oc_blocking--)
-        if (jcp.nb_oc % jcp.nb_oc_blocking == 0
-                && jcp.l_pad <= regs / (jcp.nb_oc_blocking + 1))
-            break;
-
-    jcp.ur_w = regs / (jcp.nb_oc_blocking + 1);
-    int l_overflow = max(0, ((jcp.kw - 1) * (jcp.dilate_w + 1) - jcp.l_pad) / jcp.stride_w);
-    int r_overflow = max(0, ((jcp.kw - 1) * (jcp.dilate_w + 1)
-                     - max(0, jcp.r_pad)) / jcp.stride_w);
-    if (jcp.ow < jcp.ur_w)
-        jcp.ur_w = jcp.ow;
-    for (; jcp.ur_w > 1; jcp.ur_w--)
-        if (jcp.ur_w % jcp.stride_w == 0
-                && max(l_overflow,
-                    r_overflow - (jcp.ow % jcp.ur_w) / jcp.stride_w) * jcp.stride_w <= jcp.ur_w)
-            break;
-    jcp.ur_w_tail = jcp.ow % jcp.ur_w;
-
-    jcp.loop_order = jcp.ngroups > 1 ? loop_ngc : loop_cgn;
-    return status::success;
-}
-
-void jit_avx512_core_u8s8s32x_deconv_fwd_kernel::compute_ker(
-        int ur_w, int l_overflow, int r_overflow, ker_block_t last_block) {
-
-    int ch_block_all = jcp.ch_block * jcp.ic_block * jcp.oc_block;
-    int shift_src_ih = jcp.typesize_in * (jcp.dilate_h + 1)
-        * jcp.iw * jcp.ngroups * jcp.ic_without_padding;
-    int shift_filt_kh = jcp.typesize_in *  jcp.kw * jcp.stride_h * ch_block_all;
-
-    auto src_offset = [=] (int oj, int icb, int ki) {
-         return jcp.typesize_in *
-           (((oj + jcp.l_pad - ki * (jcp.dilate_w + 1)) / jcp.stride_w) * jcp.ngroups * jcp.ic_without_padding + icb * 4);
-    };
-
-    auto kernel_offset = [=] (int ocb, int icb, int ki) {
-        return jcp.typesize_in *
-            (ocb * jcp.nb_ic * jcp.kh * jcp.kw * ch_block_all + icb * jcp.oc_block * jcp.ic_block/4
-             + ki * ch_block_all);
-    };
-
-    auto compute = [=](zmm_t vreg_acc, zmm_t vreg_wei, zmm_t vreg_src) {
-        if (jcp.ver == ver_vnni) {
-            vpdpbusd(vreg_acc, vreg_src, vreg_wei);
-        } else if (jcp.is_depthwise) {
-            vpmulld(zmm_tmp, vreg_src, vreg_wei);
-            vpaddd(vreg_acc, vreg_acc, zmm_tmp);
-        } else {
-            vpmaddubsw(zmm_tmp, vreg_src, vreg_wei);
-            vpmaddwd(zmm_tmp, zmm_tmp, zmm_one);
-            vpaddd(vreg_acc, vreg_acc, zmm_tmp);
-        }
-    };
-
-    mov(aux_reg_src, reg_src);
-    mov(aux_reg_filt, reg_filt);
-    mov(reg_kj, reg_kh);
-    Xbyak::Label kh_loop_label;
-    L(kh_loop_label); {
-       for (int ki = 0; ki < jcp.kw; ki++) {
-           int jj_start = get_ow_start(ki, l_overflow);
-           int jj_end = get_ow_end(ur_w, ki, r_overflow);
-           int tail_size = jcp.ic_without_padding % 4;
-           int n_ic_blocks = jcp.is_depthwise
-                           ? 1
-                           : (last_block &  ~no_last_block
-                                   ? div_up(jcp.ic_without_padding % jcp.ic_block, 4)
-                                   : jcp.ic_block / 4);
-           for (int icb1 = 0; icb1 < n_ic_blocks; icb1++) {
-               for (int jj = jj_start; jj < jj_end; jj += jcp.stride_w) {
-                    assert((jj + jcp.l_pad - ki) % jcp.stride_w == 0);
-
-                   int aux_src_off = src_offset(jj, icb1, ki);
-                   if (jcp.is_depthwise) {
-                       vpmovzxbd(zmm_inp(jj, jcp.nb_oc_blocking),
-                                   EVEX_compress_addr(aux_reg_src, aux_src_off));
-                   } else if ((last_block & last_sp_block)
-                           && tail_size != 0 && icb1 == n_ic_blocks - 1) {
-                       xmm_t xmm_tmp = xmm_t(zmm_inp(jj, jcp.nb_oc_blocking).getIdx());
-                       for (int r = 0; r < tail_size; ++r)
-                           vpinsrb(xmm_tmp, xmm_tmp,
-                                   ptr[aux_reg_src + aux_src_off + r], r);
-                       vpbroadcastd(zmm_inp(jj, jcp.nb_oc_blocking), xmm_tmp);
-                   } else {
-                       vpbroadcastd(zmm_inp(jj, jcp.nb_oc_blocking),
-                               EVEX_compress_addr(aux_reg_src, aux_src_off));
-                   }
-               }
-
-               for (int ocb = 0; ocb < jcp.nb_oc_blocking; ocb++) {
-                   int aux_filt_off = kernel_offset(ocb, icb1, ki);
-                   if (jj_end - jj_start > 0) {
-                       if (jcp.is_depthwise)
-                           vpmovsxbd(zmm_wei,
-                               EVEX_compress_addr(aux_reg_filt, aux_filt_off));
-                       else
-                           vmovups(zmm_wei,
-                                   EVEX_compress_addr(aux_reg_filt, aux_filt_off));
-                   }
-                   for (int jj = jj_start; jj < jj_end; jj += jcp.stride_w) {
-                       compute(zmm_out(jj, ocb),
-                               zmm_wei, zmm_inp(jj, jcp.nb_oc_blocking));
-                   }
-               }
-           }
-       }
-       sub(aux_reg_src, shift_src_ih);
-       add(aux_reg_filt, shift_filt_kh);
-       dec(reg_kj);
-       cmp(reg_kj, 0);
-       jg(kh_loop_label, T_NEAR);
-    }
-}
-
-void jit_avx512_core_u8s8s32x_deconv_fwd_kernel::prepare_output(int ur_w) {
-    for (int ocb = 0; ocb < jcp.nb_oc_blocking; ocb++) {
-        for (int ur = 0; ur < ur_w; ur++) {
-                zmm_t zmm = zmm_out(ur, ocb);
-                vpxord(zmm, zmm, zmm);
-        }
-    }
-}
-
-void jit_avx512_core_u8s8s32x_deconv_fwd_kernel::cvt2ps(data_type_t type_in,
-        zmm_t zmm_in, const Xbyak::Operand &op, bool mask_flag) {
-    zmm_t zmm = mask_flag ? zmm_in | ktail_mask | T_z : zmm_in;
-    switch (type_in) {
-    case data_type::f32:
-    case data_type::s32: vmovups(zmm, op); break;
-    case data_type::s8: vpmovsxbd(zmm, op); break;
-    case data_type::u8: vpmovzxbd(zmm, op); break;
-    default: assert(!"unsupported data type");
-    }
-    if (type_in != data_type::f32)
-        vcvtdq2ps(zmm_in, zmm_in);
-}
-
-void jit_avx512_core_u8s8s32x_deconv_fwd_kernel::store_output(int ur_w, bool last_oc_block) {
-    mov(reg_bias, ptr[param1 + GET_OFF(bias)]);
-    mov(reg_ptr_scales, ptr[param1 + GET_OFF(scales)]);
-
-    vpxord(zmm_zero, zmm_zero, zmm_zero);
-    for (int ocb = 0; ocb < jcp.nb_oc_blocking; ocb++) {
-        const bool mask_flag = last_oc_block && ocb == jcp.nb_oc_blocking - 1;
-        int scale_offset = jcp.is_oc_scale * (sizeof(float) * ocb * jcp.oc_block);
-
-        auto zmm_bias = zmm_tmp;
-        if (jcp.with_bias) {
-            int bias_offset = jcp.typesize_bia * ocb * jcp.oc_block;
-            auto bias_addr = EVEX_compress_addr(reg_bias, bias_offset);
-            cvt2ps(jcp.bia_dt, zmm_bias, bias_addr, mask_flag);
-        }
-
-        for (int ur = 0; ur < ur_w; ur++) {
-            zmm_t zmm = zmm_out(ur, ocb);
-            vcvtdq2ps(zmm, zmm);
-            if (jcp.with_bias) vaddps(zmm, zmm, zmm_bias);
-            zmm_t mask_zmm = mask_flag
-                           ? zmm | ktail_mask | T_z
-                           : zmm;
-            vmulps(mask_zmm, zmm,
-                    EVEX_compress_addr(reg_ptr_scales, scale_offset));
-
-            if (jcp.dst_dt == data_type::u8) vmaxps(zmm, zmm_zero, zmm);
-
-            if (jcp.dst_dt != data_type::f32) {
-                if (attr_.round_mode_ == round_mode::nearest)
-                    vcvtps2dq(zmm | T_rn_sae, zmm);
-                else if (attr_.round_mode_ == round_mode::down)
-                    vcvtps2dq(zmm | T_rd_sae, zmm);
-                else
-                    assert(!"unimplemented");
-            }
-        }
-        for (int ur = 0; ur < ur_w; ur++) {
-            int aux_dst_off = jcp.typesize_out
-                * (ur * jcp.ngroups * jcp.oc_without_padding + ocb * jcp.oc_block);
-            auto addr = EVEX_compress_addr(reg_dst, aux_dst_off);
-
-            zmm_t zmm = zmm_out(ur, ocb);
-            zmm_t r_zmm = mask_flag
-                        ? zmm | ktail_mask
-                        : zmm;
-            switch (jcp.dst_dt) {
-            case data_type::f32:
-            case data_type::s32: vmovups(addr, r_zmm); break;
-            case data_type::s8: vpmovsdb(addr, r_zmm); break;
-            case data_type::u8: vpmovusdb(addr, r_zmm); break;
-            default: assert(!"unknown dst_dt");
-            }
-        }
-    }
-}
-
-void jit_avx512_core_u8s8s32x_deconv_fwd_kernel::compute_loop(
-        int ur_w, int l_overflow, int r_overflow, bool is_last_sp_block) {
-
-    int shift_src_icb = jcp.typesize_in * jcp.ic_block;
-    int shift_filt_icb = jcp.typesize_in * jcp.kh * jcp.kw * jcp.ic_block * jcp.oc_block;
-
-    prepare_output(ur_w);
-
-    Xbyak::Label icb_loop_label;
-    mov(reg_icb, jcp.nb_ic);
-    L(icb_loop_label); {
-
-        if (jcp.ic_without_padding != jcp.ic) {
-            Xbyak::Label common_ker, end_ker;
-            cmp(reg_icb, 1);
-            jg(common_ker, T_NEAR);
-
-            compute_ker(ur_w, l_overflow, r_overflow,
-                    is_last_sp_block ? last_sp_block : last_ic_block);
-            jmp(end_ker, T_NEAR);
-
-            L(common_ker);
-            compute_ker(ur_w, l_overflow, r_overflow, no_last_block);
-
-            L(end_ker);
-        } else {
-            compute_ker(ur_w, l_overflow, r_overflow, no_last_block);
-        }
-
-        add(reg_src, shift_src_icb);
-        add(reg_filt, shift_filt_icb);
-        dec(reg_icb);
-        cmp(reg_icb, 0);
-        jg(icb_loop_label, T_NEAR);
-    }
-    sub(reg_src, jcp.nb_ic * shift_src_icb);
-    sub(reg_filt, jcp.nb_ic * shift_filt_icb);
-
-    if (jcp.ngroups % jcp.ch_block != 0 || jcp.oc_without_padding != jcp.oc) {
-        Xbyak::Label common_store, end_store;
-        mov(reg_oc_blocks, ptr[param1 + GET_OFF(oc_blocks)]);
-        if (jcp.is_depthwise)
-            cmp(reg_oc_blocks, jcp.nb_ch - 1);
-        else
-            cmp(reg_oc_blocks, jcp.nb_oc - jcp.nb_oc_blocking);
-        jne(common_store, T_NEAR);
-
-        store_output(ur_w, true);
-        jmp(end_store, T_NEAR);
-
-        L(common_store);
-        store_output(ur_w, false);
-
-        L(end_store);
-
-    } else {
-        store_output(ur_w, false);
-    }
-}
-
-void jit_avx512_core_u8s8s32x_deconv_fwd_kernel::generate() {
-    preamble();
-
-    Xbyak::Reg16 _t = reg_scratch.cvt16();
-    mov(_t, 0x1);
-    vpbroadcastw(zmm_one, _t);
-
-    if (jcp.ngroups % jcp.ch_block != 0 || jcp.oc_without_padding != jcp.oc) {
-        int tail_size = jcp.is_depthwise
-            ? jcp.ngroups % jcp.ch_block
-            : jcp.oc_without_padding % jcp.oc_block;
-        int mask = (1 << tail_size) - 1;
-        Xbyak::Reg32 regw_tmp = reg_nur_w.cvt32();
-        mov(regw_tmp, mask);
-        kmovw(ktail_mask, regw_tmp);
-    }
-
-    mov(reg_src, ptr[param1 + GET_OFF(src)]);
-    mov(reg_filt, ptr[param1 + GET_OFF(filt)]);
-    mov(reg_dst, ptr[param1 + GET_OFF(dst)]);
-    mov(reg_kh, ptr[param1 + GET_OFF(kh_padding)]);
-
-    int dst_shift = jcp.typesize_out * jcp.ur_w * jcp.ngroups * jcp.oc_without_padding;
-    int src_shift = jcp.typesize_in * (jcp.ur_w / jcp.stride_w) * jcp.ngroups * jcp.ic_without_padding;
-
-    int l_overflow = max(0, ((jcp.kw - 1) * (jcp.dilate_w + 1) - jcp.l_pad) / jcp.stride_w);
-    int r_overflow = max(0, ((jcp.kw - 1) * (jcp.dilate_w + 1)
-                     - max(0, jcp.r_pad)) / jcp.stride_w);
-
-    int r_overflow1 = nstl::max(0, ((jcp.kw -1) * (jcp.dilate_w + 1)
-                - nstl::max(0, jcp.r_pad) - jcp.ur_w_tail) / jcp.stride_w);
-    int nur_w = jcp.ow / jcp.ur_w;
-    if (r_overflow1 > 0) nur_w--;
-
-    if (jcp.ur_w == jcp.ow) {
-        compute_loop(jcp.ur_w, l_overflow, r_overflow, true);
-    } else if (nur_w == 0) {
-        compute_loop(jcp.ur_w, l_overflow, r_overflow1, jcp.ur_w_tail == 0);
-        add(reg_src, src_shift);
-        add(reg_dst, dst_shift);
-        if (jcp.ur_w_tail != 0)
-            compute_loop(jcp.ur_w_tail, 0, r_overflow, true);
-    } else {
-        xor_(reg_nur_w, reg_nur_w);
-        if (l_overflow > 0) {
-            compute_loop(jcp.ur_w, l_overflow, 0, false);
-            add(reg_src, src_shift);
-            add(reg_dst, dst_shift);
-            inc(reg_nur_w);
-        }
-        if ((l_overflow <= 0 && nur_w > 0)
-                || (l_overflow > 0 && nur_w > 1)) {
-            Xbyak::Label ow_loop_label;
-            L(ow_loop_label); {
-                compute_loop(jcp.ur_w, 0, 0, false);
-                add(reg_src, src_shift);
-                add(reg_dst, dst_shift);
-                inc(reg_nur_w);
-                cmp(reg_nur_w, nur_w);
-                jl(ow_loop_label, T_NEAR);
-            }
-        }
-        if (r_overflow1 > 0) {
-            compute_loop(jcp.ur_w, 0, r_overflow1, jcp.ur_w_tail == 0);
-            add(reg_src, src_shift);
-            add(reg_dst, dst_shift);
-        }
-        if (jcp.ur_w_tail != 0) {
-            compute_loop(jcp.ur_w_tail, 0, r_overflow, true);
-        }
-    }
-    postamble();
-}
-
-template <data_type_t dst_type>
-void _jit_avx512_core_u8s8s32x_deconvolution_fwd_t<dst_type>::
-execute_forward()
-{
-    auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
-    auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
-    auto bias = reinterpret_cast<const char *>(this->input_memory(2));
-    auto dst = reinterpret_cast<dst_data_t *>(this->memory());
-
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
-
-    auto &jcp = kernel_->jcp;
-
-    int oc_chunks = jcp.nb_oc / jcp.nb_oc_blocking;
-    int nb_groups = jcp.nb_ch;
-
-    size_t src_h_stride = src_d.blk_off(0, 0, 1);
-    size_t dst_h_stride = dst_d.blk_off(0, 0, 1);
-    size_t wht_kh_stride = wht_blk_off(weights_d, 0, 0, 0, 1);
-
-    const auto &oscales = conf_.attr()->output_scales_;
-
-    parallel(0,
-            [&](const int ithr, const int nthr) {
-            int start{0}, end{0};
-            int work_amount = jcp.mb * nb_groups * oc_chunks * jcp.oh;
-            balance211(work_amount, nthr, ithr, start, end);
-
-            auto p = jit_deconv_call_s();
-
-            /*loop order = cgn*/
-            int n{0}, g{0}, occ{0}, oh_s{0};
-            if (jcp.loop_order == loop_ngc)
-                nd_iterator_init(start, n, jcp.mb, g, nb_groups, occ, oc_chunks,
-                    oh_s, jcp.oh);
-            else if (jcp.loop_order == loop_cgn)
-                nd_iterator_init(start, occ, oc_chunks, g, nb_groups, n, jcp.mb,
-                    oh_s, jcp.oh);
-            else
-                assert(!"unsupported loop order");
-            while (start < end) {
-
-                int ocb = occ * jcp.nb_oc_blocking;
-                int g_oc = (g * jcp.ch_block * jcp.nb_oc + ocb) * jcp.oc_block;
-                int g_ic = g * jcp.ch_block * jcp.ic;
-                int work_rem = end - start;
-                int oh_e = oh_s + work_rem > jcp.oh ? jcp.oh : oh_s + work_rem;
-
-                auto dst_w = dst + dst_d.blk_off(n, g_oc);
-                auto src_w = src + src_d.blk_off(n, g_ic);
-                auto wht_w = weights + wht_blk_off(weights_d, g, ocb, 0);
-                auto bias_w = jcp.with_bias
-                            ? bias + (bias_d.blk_off(g_oc) * jcp.typesize_bia)
-                            : 0;
-
-                auto scales = &oscales.scales_[jcp.is_oc_scale * g_oc];
-                for (int oj = oh_s; oj < oh_e; oj++) {
-                    int ih_max, kh_lo, kh_len;
-                    if (jcp.dilate_h != 0 && jcp.stride_h == 1) {
-                            int dilate_h = jcp.dilate_h + 1;
-                            // Note: use div_up to account for "holes" in filter
-                            int o_t_overflow
-                                = div_up(max(0, (jcp.kh - 1) * dilate_h
-                                        - oj - jcp.t_pad), dilate_h);
-                            int o_b_overflow
-                                = div_up(max(0, (jcp.kh - 1) * dilate_h + 1
-                                        - jcp.ih + oj - jcp.b_pad), dilate_h);
-                            kh_len = jcp.kh - o_t_overflow - o_b_overflow;
-                            kh_lo = o_b_overflow;
-                            ih_max = oj + jcp.t_pad - o_b_overflow * dilate_h;
-                    } else {
-                        int o_t_overflow = max(0,
-                                (jcp.kh - (oj + 1 + jcp.t_pad)) / jcp.stride_h); 
-                        int o_b_overflow = max(0,
-                                ((oj + 1 + jcp.kh - 1)
-                                 - (jcp.oh + jcp.b_pad)) / jcp.stride_h);
-                        int overflow_kh_hi = jcp.kh - 1
-                            - abs(jcp.oh + jcp.b_pad - (oj + 1)) % jcp.stride_h;
-                        int overflow_kh_lo = ((oj + 1 + jcp.t_pad) - 1) % jcp.stride_h;
-
-                        kh_len = (overflow_kh_hi - overflow_kh_lo) / jcp.stride_h
-                            + 1 - o_t_overflow - o_b_overflow;
-                        kh_lo = overflow_kh_lo + o_b_overflow * jcp.stride_h;
-                        ih_max = (oj + jcp.t_pad - kh_lo) / jcp.stride_h;
-                    }
-
-                    p.src = src_w + ih_max * src_h_stride;
-                    p.dst = dst_w + oj * dst_h_stride;
-                    p.filt = wht_w + kh_lo * wht_kh_stride;
-                    p.bias = bias_w;
-                    p.kh_padding = kh_len;
-                    p.scales = scales;
-                    p.oc_blocks = jcp.is_depthwise ? g : ocb;
-                    kernel_->jit_ker(&p);
-                }
-                if (jcp.loop_order == loop_ngc)
-                    nd_iterator_jump(start, end,
-                            n, jcp.mb, g, nb_groups, occ, oc_chunks, oh_s, jcp.oh);
-                else if (jcp.loop_order == loop_cgn)
-                    nd_iterator_jump(start, end,
-                            occ, oc_chunks, g, nb_groups, n, jcp.mb, oh_s, jcp.oh);
-                else
-                    assert(!"unsupported loop order");
-            }
-    });
-}
-
-template struct _jit_avx512_core_u8s8s32x_deconvolution_fwd_t<data_type::u8>;
-template struct _jit_avx512_core_u8s8s32x_deconvolution_fwd_t<data_type::s8>;
-template struct _jit_avx512_core_u8s8s32x_deconvolution_fwd_t<data_type::f32>;
-template struct _jit_avx512_core_u8s8s32x_deconvolution_fwd_t<data_type::s32>;
-}
-}
-}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_wino_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_wino_convolution.cpp
index 45f516c80..13772904d 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_wino_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_wino_convolution.cpp
@@ -17,6 +17,7 @@
 #include <assert.h>
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
 #include "cpu_convolution_pd.hpp"
 #include "cpu_engine.hpp"
 #include "mkldnn_thread.hpp"
@@ -33,6 +34,7 @@ namespace impl {
 namespace cpu {
 
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 using namespace Xbyak;
 
@@ -100,7 +102,6 @@ struct jit_avx512_core_u8s8s32x_wino_conv_src_trans_t: public jit_generator {
         return Opmask(3 + id);
     }
 
-    Reg64 reg_ptr_offset = r15;
     Reg64 reg_ptr_src = r14;
     Reg64 reg_ptr_dst = r13;
 
@@ -117,12 +118,49 @@ struct jit_avx512_core_u8s8s32x_wino_conv_src_trans_t: public jit_generator {
     Reg64 reg_scratch_src_alpha = rdx;
     Xmm xmm_src_alpha = Xmm(0);
     Zmm zmm_src_alpha = Zmm(0);
+
+    Reg64 reg_shift = rax;
+    Xmm xmm_shift = Xmm(1);
+    Xmm xmm_zero = Xmm(0);
+
+    Reg64 reg_maskx = rbx;
+    Reg64 reg_masky = rsi;
+    Reg64 reg_nomask = reg_maskx;
 };
 
 void jit_avx512_core_u8s8s32x_wino_conv_src_trans_t::generate() {
     Label ic_block_label;
+    Label end_label;
+    Label mask_label;
+    Label nomask_label;
+
+    auto load_src = [=](bool mask) {
+        for (int y = 0; y < jcp.alpha; y++) {
+            if (mask)
+                kmovw(y_mask, ptr[reg_ptr_v_y_masks + sizeof(uint16_t) * y]);
+            for (int x = 0; x < jcp.alpha; x++) {
+                Zmm zmm_i = zmm_inp(y * jcp.alpha + x);
+                Xmm vreg_i = vreg_inp(y * jcp.alpha + x);
+                int inp_offset = sizeof(uint8_t)
+                        * ((-jcp.t_pad + y) * jcp.iw * jcp.ic
+                                + (-jcp.l_pad + x) * jcp.ic);
+                if (mask) {
+                    kandw(r_mask, y_mask, x_mask(x));
+                    vmovdqu8(vreg_i | r_mask | T_z,
+                            EVEX_compress_addr(reg_aux_ptr_src, inp_offset));
+                } else {
+                    vmovdqu8(vreg_i,
+                            EVEX_compress_addr(reg_aux_ptr_src, inp_offset));
+                }
+                vpmovzxbd(zmm_i, vreg_i); // to int32
+                vcvtdq2ps(zmm_i, zmm_i); // to fp32
+                vmulps(zmm_i, zmm_i, zmm_src_alpha); // *alpha
+                vcvtps2dq(zmm_i | T_rn_sae, zmm_i); // to int32
+                vpmovusdb(vreg_i, zmm_i); // to u8
+            }
+        }
+    };
 
-    int out_offset = 0, inp_offset = 0;
     preamble();
 
 #   define READ_PARAM(reg, field) \
@@ -133,14 +171,24 @@ void jit_avx512_core_u8s8s32x_wino_conv_src_trans_t::generate() {
     READ_PARAM(reg_ptr_v_x_masks, v_x_masks);
 #   undef READ_PARAM
 
-    xor_(eax, eax);
-    mov(ax, (int8_t)-128);
+    mov(reg_maskx, ptr[reg_ptr_v_x_masks]);
+    mov(reg_masky, ptr[reg_ptr_v_y_masks]);
+    test(reg_maskx, reg_maskx);
+    jz(end_label, T_NEAR); // skip kernel if x mask is all 0's
+    test(reg_masky, reg_masky);
+    jz(end_label, T_NEAR); // skip kernel if y mask is all 0's
+    and_(reg_maskx, reg_masky);
+    mov(reg_nomask, reg_maskx);
+    not_(reg_nomask); // zero if x and y masks are all 1's
+
+    xor_(reg_shift, reg_shift);
+    mov(reg_shift.cvt8(), (int8_t)-128);
 
     mov(reg_aux_ptr_src, reg_ptr_src);
     mov(reg_aux_ptr_dst, reg_ptr_dst);
 
     for (int i = 0; i < jcp.alpha; i++) {
-        kmovw(x_mask(i), ptr[reg_ptr_v_x_masks + sizeof(int16_t) * i]);
+        kmovw(x_mask(i), ptr[reg_ptr_v_x_masks + sizeof(uint16_t) * i]);
     }
 
     mov(reg_scratch_src_alpha, float2int(adj_src_scale));
@@ -151,24 +199,14 @@ void jit_avx512_core_u8s8s32x_wino_conv_src_trans_t::generate() {
         vmovq(xmm_src_alpha, reg_scratch_src_alpha);
         vbroadcastss(zmm_src_alpha, xmm_src_alpha);
 
-        for(int y = 0; y < jcp.alpha; y++) {
-            kmovw(y_mask, ptr[reg_ptr_v_y_masks + sizeof(int16_t) * y]);
-            for(int x = 0; x < jcp.alpha; x++) {
-                Zmm zmm_i = zmm_inp(y*jcp.alpha + x);
-                Xmm vreg_i = vreg_inp(y*jcp.alpha + x);
-                vpxord(vreg_i, vreg_i, vreg_i);
-                kandw(r_mask, y_mask, x_mask(x));
-                inp_offset = sizeof(uint8_t) *
-                   ((-jcp.t_pad + y) * jcp.iw * jcp.ic
-                        + (-jcp.l_pad + x) * jcp.ic);
-                vmovdqu8(vreg_i | r_mask, EVEX_compress_addr(reg_aux_ptr_src, inp_offset));
-                vpmovzxbd(zmm_i, vreg_i); // to int32
-                vcvtdq2ps(zmm_i, zmm_i); // to fp32
-                vmulps(zmm_i, zmm_i, zmm_src_alpha); // *alpha
-                vcvtps2dq(zmm_i | T_rn_sae, zmm_i); // to int32
-                vpmovusdb(vreg_i, zmm_i); // to u8
-            }
-        }
+        test(reg_nomask, reg_nomask);
+        jz(nomask_label, T_NEAR);
+        load_src(true);
+        jmp(mask_label, T_NEAR);
+        L(nomask_label);
+        load_src(false);
+        L(mask_label);
+
         for(int y = 0; y < 4; y++) {
             vpsubb(vreg_tmp(y*4+0), vreg_inp(y*4+0), vreg_inp(y*4+2));
             vpaddb(vreg_tmp(y*4+1), vreg_inp(y*4+1), vreg_inp(y*4+2));
@@ -182,12 +220,12 @@ void jit_avx512_core_u8s8s32x_wino_conv_src_trans_t::generate() {
             vpsubb(vreg_out(x+3*4), vreg_tmp(x+4*1), vreg_tmp(x+4*3));
         }
 
-        movd(Xmm(1), eax);
-        pxor(Xmm(0), Xmm(0));
-        pshufb(Xmm(1), Xmm(0));
+        vmovd(xmm_shift, reg_shift.cvt32());
+        vpxor(xmm_zero, xmm_zero, xmm_zero);
+        vpshufb(xmm_shift, xmm_shift, xmm_zero);
 
         for (int i = 0; i < 16; i++) {
-            out_offset = sizeof(uint8_t) * (jcp.inp_stride * i);
+            int out_offset = sizeof(uint8_t) * (jcp.inp_stride * i);
             if (i != unsign_val_in_wino_domain)
                 vpsubb(vreg_out(i), vreg_out(i), Xmm(1));
             vmovups(EVEX_compress_addr(reg_aux_ptr_dst, out_offset), vreg_out(i));
@@ -199,6 +237,7 @@ void jit_avx512_core_u8s8s32x_wino_conv_src_trans_t::generate() {
     dec(reg_ic_block);
     jnz(ic_block_label, T_NEAR);
 
+    L(end_label);
     postamble();
 }
 
@@ -294,7 +333,6 @@ bool jit_avx512_core_u8s8s32x_wino_conv_dst_trans_t::maybe_relu(int position) {
     if (position == 0) {
         /* relu before sum */
         return false
-            || jcp.with_relu
             || p.contain(eltwise, 0)
             || (jcp.dst_dt == data_type::u8 && !p.contain(sum, 0));
     } else if (position == 1) {
@@ -362,7 +400,7 @@ void jit_avx512_core_u8s8s32x_wino_conv_dst_trans_t::generate() {
             vmulps(vreg_bias, vreg_bias, zmm_bias_alpha); // *alpha
         }
         for(int y = 0; y < jcp.m; y++) {
-            kmovw(y_mask, ptr[ reg_ptr_v_y_masks + sizeof(int16_t) * y ]);
+            kmovw(y_mask, ptr[ reg_ptr_v_y_masks + sizeof(uint16_t) * y ]);
             for(int x = 0; x < jcp.m; x++) {
                 kandw(r_mask, y_mask, x_mask(x));
 
@@ -442,11 +480,9 @@ void jit_avx512_core_u8s8s32x_wino_conv_dst_trans_t::generate() {
     mov(reg_aux_ptr_dst, reg_ptr_dst);
 
     vpxord(vreg_zero, vreg_zero, vreg_zero);
-    for (int i = 0; i < jcp.alpha * jcp.alpha; i++)
-        vpxord(vreg_inp(i), vreg_inp(i), vreg_inp(i));
 
-    for (int i = 0; i < jcp.alpha; i++)
-        kmovw(x_mask(i), ptr[reg_ptr_v_x_masks + sizeof(int16_t) * i]);
+    for (int i = 0; i < jcp.m; i++)
+        kmovw(x_mask(i), ptr[reg_ptr_v_x_masks + sizeof(uint16_t) * i]);
 
     int oc_blocks = jcp.oc / load_block;
     mov(reg_oc_block, oc_blocks);
@@ -461,9 +497,6 @@ void jit_avx512_core_u8s8s32x_wino_conv_dst_trans_t::generate() {
     dec(reg_oc_block);
     jnz(oc_block_label, T_NEAR);
 
-    sub(reg_ptr_scales, jcp.is_oc_scale *  sizeof(float) * load_block);
-    sub(reg_ptr_bias, oc_blocks * sizeof(jcp.typesize_bia) * load_block);
-
     postamble();
 
 }
@@ -498,8 +531,7 @@ struct jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t: public jit_generator {
             jit_conv_conf_2x3_wino_t &jcp, const convolution_desc_t &cd,
             cpu_memory_t::pd_t &src_pd, cpu_memory_t::pd_t &weights_pd,
             cpu_memory_t::pd_t &dst_pd, cpu_memory_t::pd_t &bias_pd,
-            const primitive_attr_t &attr,
-            bool with_relu, float relu_negative_slope);
+            const primitive_attr_t &attr);
 
     Zmm vreg_out(int n, int m) {
         const int id_reg_out = n * jcp.m_block + m;
@@ -536,26 +568,14 @@ bool jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t::post_ops_ok(
     using namespace primitive_kind;
     const auto &p = attr.post_ops_;
 
-    auto is_relu = [&](int idx) {
-        return p.entry_[idx].kind == eltwise
-            && p.entry_[idx].eltwise.scale == 1.
-            && p.entry_[idx].eltwise.alg == alg_kind::eltwise_relu
-            && p.entry_[idx].eltwise.alpha == 0.;
-    };
+    auto is_relu = [&](int idx) { return p.entry_[idx].is_relu(); };
 
-   switch (p.len_) {
+    switch (p.len_) {
     case 0: return true;
-    case 1: return true
-                && IMPLICATION(jcp.with_relu, p.contain(sum, 0))
-                && IMPLICATION(!jcp.with_relu, is_relu(0) || p.contain(sum, 0));
-    case 2: return true
-                && IMPLICATION(jcp.with_relu, p.contain(sum, 0) && is_relu(1))
-                && IMPLICATION(!jcp.with_relu, false
-                        || (p.contain(sum, 0) && is_relu(1))
-                        || (p.contain(sum, 1) && is_relu(0)));
-    case 3: return true
-                && jcp.with_relu == false
-                && (is_relu(0) && p.contain(sum, 1) && is_relu(2));
+    case 1: return is_relu(0) || p.contain(sum, 0);
+    case 2: return (p.contain(sum, 0) && is_relu(1)) ||
+                       (p.contain(sum, 1) && is_relu(0));
+    case 3: return is_relu(0) && p.contain(sum, 1) && is_relu(2);
     default: return false;
     }
 
@@ -657,13 +677,24 @@ void jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t::generate() {
 
     postamble();
 }
+namespace {
+bool is_winograd_faster_than_direct(const jit_conv_conf_2x3_wino_t &jcp) {
+    if (jcp.ver == ver_vnni) {
+        return (jcp.mb <= mkldnn_get_max_threads()
+            && (jcp.mb > 4
+                && jcp.ic > 64
+                && !(jcp.oc > 128 && jcp.ih < 14)))
+            || jcp.mb > mkldnn_get_max_threads();
+    }
+    return true;
+}
+}
 
 status_t jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t
 ::init_conf(jit_conv_conf_2x3_wino_t &jcp,
             const convolution_desc_t &cd, cpu_memory_t::pd_t &src_pd,
             cpu_memory_t::pd_t &wei_pd, cpu_memory_t::pd_t &dst_pd,
-            cpu_memory_t::pd_t &bias_pd, const primitive_attr_t &attr,
-            bool with_relu, float relu_negative_slope) {
+            cpu_memory_t::pd_t &bias_pd, const primitive_attr_t &attr) {
     const memory_desc_wrapper src_d(&src_pd);
     const memory_desc_wrapper wei_d(&wei_pd);
     const memory_desc_wrapper dst_d(&dst_pd);
@@ -671,6 +702,8 @@ status_t jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t
 
     const bool with_groups = wei_d.ndims() == src_d.ndims() + 1;
 
+    jcp.nthr = mkldnn_get_max_threads();
+
     jcp.ngroups = with_groups ? wei_d.dims()[0] : 1;
     jcp.mb = src_d.dims()[0];
     jcp.oc = dst_d.dims()[1] / jcp.ngroups;
@@ -700,6 +733,10 @@ status_t jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t
     if (mayiuse(avx512_core_vnni))
         jcp.ver = ver_vnni;
 
+    if (!IMPLICATION(cd.alg_kind == alg_kind::convolution_auto,
+               is_winograd_faster_than_direct(jcp)))
+        return status::unimplemented;
+
     // block sizes needed for GEMM kernel
     jcp.ic_block = 4;
     jcp.oc_block = 16;
@@ -718,10 +755,7 @@ status_t jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t
 
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_relu = with_relu;
-    jcp.relu_negative_slope = relu_negative_slope;
-    if (!IMPLICATION(with_relu, relu_negative_slope == 0.))
-        return status::unimplemented;
+
     if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
 
@@ -743,7 +777,6 @@ status_t jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t
     jcp.alpha = jcp.m + jcp.r - 1;
 
     int aa = jcp.alpha * jcp.alpha;
-    int nthr = mkldnn_get_max_threads();
     int L1_cap = get_cache_size(1, true);
     int L2_cap = get_cache_size(2, true);
     // need 1 extra reg for bcast, and 2 tmp regs for non-vnni
@@ -755,12 +788,12 @@ status_t jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t
         float Y = (float)jcp.ic * jcp.oc;
         if (small_mb == 0) { // outer par
             int nblocks = jcp.mb * div_up(jcp.oh, iy) * div_up(jcp.ow, ix);
-            thr_eff = (float)nblocks / rnd_up(nblocks, nthr);
+            thr_eff = (float)nblocks / rnd_up(nblocks, jcp.nthr);
         } else { // inner par
             int tranw = iy * ix / jcp.alpha;
             int gemmw = aa * (jcp.nb_oc / n2_b);
-            int tranw_r = rnd_up(tranw, nthr);
-            int gemmw_r = rnd_up(gemmw, nthr);
+            int tranw_r = rnd_up(tranw, jcp.nthr);
+            int gemmw_r = rnd_up(gemmw, jcp.nthr);
             thr_eff = (Z * tranw / tranw_r + Y * gemmw / gemmw_r) / (Z + Y);
         }
         return thr_eff;
@@ -779,7 +812,7 @@ status_t jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t
             req_mem = (float)jcp.ic * (M + N) + jcp.typesize_acc * M * N;
             mem_eff = nstl::min(1.f, L2_cap / req_mem);
             // memory used during wino transforms
-            int M_per_thr = div_up(M, nthr);
+            int M_per_thr = div_up(M, jcp.nthr);
             req_mem = (float)aa * M_per_thr
                     * (jcp.ic + jcp.typesize_acc * jcp.oc);
             if (req_mem > L2_cap)
@@ -868,15 +901,34 @@ status_t jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t
     assert((jcp.m_block + 1) * jcp.n2_block <= free_regs);
     assert(jcp.xb % 2 == 0 && jcp.yb % 2 == 0);
 
-    jcp.inp_stride = jcp.yb * jcp.xb / 4 * jcp.ic;
-    jcp.out_stride = jcp.yb * jcp.xb / 4 * jcp.oc;
-    jcp.wei_stride = jcp.ic * jcp.oc;
-    jcp.bia_stride = jcp.oc;
+    jcp.mb_block = 1;
+    if (jcp.small_mb) {
+        // For small mb harness, set mb_block as large as possible subject to
+        // the constraint that winograd activations fit into available L3 cache
+        int L3_cap = get_cache_size(3, true);
+        int M = jcp.xb * jcp.yb / 4;
+        int wino_src_size = 16 * M * jcp.ic * jcp.typesize_in;
+        int wino_dst_size = 16 * M * jcp.oc * jcp.typesize_acc;
+        int max_mb_block = nstl::min(
+                jcp.mb, jcp.nthr * L3_cap / (wino_src_size + wino_dst_size));
+        for (int i = max_mb_block; i > 1; i--) {
+            if (jcp.mb % i == 0) {
+                jcp.mb_block = i;
+                break;
+            }
+        }
+    }
+    jcp.nb_mb = jcp.mb / jcp.mb_block;
 
-    jcp.M = jcp.xb * jcp.yb / 4;
+    jcp.M = jcp.mb_block * jcp.xb * jcp.yb / 4;
     jcp.N = jcp.oc;
     jcp.K = jcp.ic;
 
+    jcp.inp_stride = jcp.M * jcp.ic;
+    jcp.out_stride = jcp.M * jcp.oc;
+    jcp.wei_stride = jcp.ic * jcp.oc;
+    jcp.bia_stride = jcp.oc;
+
     jcp.n_block = jcp.oc_block;
     jcp.k_block = jcp.ic_block;
 
@@ -922,69 +974,82 @@ status_t jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t
     if (!wei_pd.is_equal(&new_weights_pd))
         return status::unimplemented;
 
+    const int tilesize = jcp.alpha * jcp.alpha;
+    const int numtiles = jcp.M;
+    const int alltiles = numtiles * tilesize;
+
+    jcp.size_wino_src
+        = utils::rnd_up(jcp.typesize_in * alltiles * jcp.ic, PAGE_4K)
+        / jcp.typesize_in;
+    jcp.size_wino_wei = tilesize * jcp.oc * jcp.ic;
+    jcp.size_wino_dst = alltiles * jcp.oc;
+
     return status::success;
 }
 ////////////////////////////////////////////////////////////////////////////////
 
-template <bool with_relu, data_type_t dst_data_type>
-status_t _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
-        dst_data_type>::pd_t::jit_conf() {
+template <data_type_t dst_data_type>
+status_t jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<dst_data_type>::
+        pd_t::jit_conf() {
     return jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t::init_conf(
-            jcp_, this->cdesc_(), this->src_pd_, this->weights_pd_,
-            this->dst_pd_,this->bias_pd_, *this->attr(),
-            with_relu, this->negative_slope());
+            jcp_, *this->desc(), this->src_pd_, this->weights_pd_,
+            this->dst_pd_,this->bias_pd_, *this->attr());
 }
 
-template <bool with_relu, data_type_t dst_data_type>
-_jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu, dst_data_type>::
-        _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t(const pd_t *pd,
+template <data_type_t dst_data_type>
+void jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<dst_data_type>::pd_t::
+init_scratchpad() {
+    auto scratchpad = this->scratchpad_registry().registrar();
+
+    int nthr_multiplier = jcp_.small_mb ? 1 : jcp_.nthr;
+    scratchpad.book(key_wino_V,
+            sizeof(src_data_t) * jcp_.size_wino_src * nthr_multiplier, PAGE_4K);
+    scratchpad.book(key_wino_M,
+            sizeof(acc_data_t) * jcp_.size_wino_dst * nthr_multiplier, PAGE_4K);
+
+    scratchpad.book(key_conv_adjusted_scales,
+            sizeof(float) * nstl::max(attr()->output_scales_.count_, 16));
+}
+
+template <data_type_t dst_data_type>
+jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<dst_data_type>::
+        jit_avx512_core_u8s8s32x_wino_convolution_fwd_t(const pd_t *apd,
                 const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs)
-    , conf_(*pd)
-    , scratchpad_(nullptr) {
-    const int nthreads = mkldnn_get_max_threads();
+    : cpu_primitive_t(apd, inputs, outputs, true)
+{
     kernel_ = new jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t(
-            conf_.jcp_, *conf_.attr());
+            pd()->jcp_, *pd()->attr());
     src_trans_ = new jit_avx512_core_u8s8s32x_wino_conv_src_trans_t(
-            conf_.jcp_, *conf_.attr());
+            pd()->jcp_, *pd()->attr());
     dst_trans_ = new jit_avx512_core_u8s8s32x_wino_conv_dst_trans_t(
-            conf_.jcp_, *conf_.attr());
-
-    const int tilesize = conf_.jcp_.alpha * conf_.jcp_.alpha;
-    const int numtiles = (conf_.jcp_.yb / 2) * (conf_.jcp_.xb / 2);
-    const int alltiles = tilesize * numtiles;
-    size_wino_wei_ = tilesize * conf_.jcp_.oc * conf_.jcp_.ic;
-    size_wino_src_ = sizeof(src_data_t) * alltiles * conf_.jcp_.ic;
-    size_wino_src_ = rnd_up(size_wino_src_, PAGE_4K);
-    size_wino_src_ /= sizeof(src_data_t);
-    size_wino_dst_ = alltiles * conf_.jcp_.oc;
-
-    size_t workspace_size = (conf_.jcp_.small_mb ? 1 : nthreads)
-            * (sizeof(src_data_t) * size_wino_src_
-                                    + sizeof(acc_data_t) * size_wino_dst_);
-
-    scratchpad_ = create_scratchpad(workspace_size);
-    assert(scratchpad_); // TODO: add proper check and raise exception?
-
-    wino_shift_ = (conf_.jcp_.small_mb ? 1 : nthreads) * sizeof(src_data_t)
-            * size_wino_src_;
-
-    updated_output_scales_ = conf_.attr()->output_scales_;
-    updated_output_scales_.scale(1.f / (adj_src_scale * adj_wei_scale));
+            pd()->jcp_, *pd()->attr());
 }
 
-template <bool with_relu, data_type_t dst_data_type>
-_jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
-        dst_data_type>::~_jit_avx512_core_u8s8s32x_wino_convolution_fwd_t() {
+template <data_type_t dst_data_type>
+jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<dst_data_type>::
+        ~jit_avx512_core_u8s8s32x_wino_convolution_fwd_t() {
     delete kernel_;
     delete src_trans_;
     delete dst_trans_;
-    delete scratchpad_;
 }
 
-template <bool with_relu, data_type_t dst_data_type>
-void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
-        dst_data_type>::execute_forward() {
+template <data_type_t dst_data_type>
+const float *jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<dst_data_type>::
+adjust_oscales(const memory_tracking::grantor_t &scratchpad) const {
+    const float *oscales = pd()->attr()->output_scales_.scales_;
+    auto loc_scales = scratchpad.template get<float>(key_conv_adjusted_scales);
+    size_t count = pd()->attr()->output_scales_.count_;
+    float factor = 1.f / (adj_src_scale * adj_wei_scale);
+    if (count == 1)
+        utils::array_set(loc_scales, oscales[0] * factor, 16);
+    else
+        for (size_t c = 0; c < count; c++) loc_scales[c] = oscales[c] * factor;
+    return loc_scales;
+}
+
+template <data_type_t dst_data_type>
+void jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<dst_data_type>::
+execute_forward() const {
     const auto &jcp = kernel_->jcp;
     if (jcp.small_mb)
         execute_forward_small_mb();
@@ -992,21 +1057,22 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
         execute_forward_mbN();
 }
 
-template <bool with_relu, data_type_t dst_data_type>
-void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
-        dst_data_type>::execute_forward_mbN() {
+template <data_type_t dst_data_type>
+void jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<dst_data_type>::
+execute_forward_mbN() const {
     auto src = reinterpret_cast<const src_data_t *>(input_memory(0));
     auto wei = reinterpret_cast<const wei_data_t *>(input_memory(1));
     auto bia = reinterpret_cast<const char *>(input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(memory(0));
 
+    auto scratchpad = this->scratchpad();
+
     const auto &jcp = kernel_->jcp;
-    const auto &oscales = updated_output_scales_;
+    const float *oscales = adjust_oscales(scratchpad);
 
-    auto wino_wei = wei;
-    auto dst_bias = (const acc_data_t *)(wei + size_wino_wei_);
-    auto wino_src_base = (src_data_t *)scratchpad_->get();
-    auto wino_dst_base = (acc_data_t *)(scratchpad_->get() + wino_shift_);
+    auto dst_bias = (const acc_data_t *)(wei + jcp.size_wino_wei);
+    auto wino_src_base = scratchpad.template get<src_data_t>(key_wino_V);
+    auto wino_dst_base = scratchpad.template get<acc_data_t>(key_wino_M);
 
     parallel_nd(jcp.mb, div_up(jcp.oh, jcp.yb), div_up(jcp.ow, jcp.xb),
             [&](int mb, int tile_y_b, int tile_x_b) {
@@ -1015,8 +1081,8 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
         int tile_x = tile_x_b * jcp.xb;
 
         int ithr = mkldnn_get_thread_num();
-        auto wino_src = wino_src_base + size_wino_src_ * ithr;
-        auto wino_dst = wino_dst_base + size_wino_dst_ * ithr;
+        auto wino_src = wino_src_base + jcp.size_wino_src * ithr;
+        auto wino_dst = wino_dst_base + jcp.size_wino_dst * ithr;
 
         auto src_trans_p =
             jit_avx512_core_u8s8s32x_wino_conv_src_trans_t::call_params_t();
@@ -1028,7 +1094,7 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
         /* transformation of input tensor to winograd domain */
         for (int y_in_block = 0; y_in_block < jcp.yb; y_in_block += 2) {
             for (int x_in_block = 0; x_in_block < jcp.xb; x_in_block += 2) {
-                unsigned short v_y_masks[4], v_x_masks[4];
+                uint16_t v_y_masks[4], v_x_masks[4];
 
                 int y = y_in_block + tile_y;
                 int x = x_in_block + tile_x;
@@ -1044,8 +1110,8 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
 
 #pragma unroll(4)
                 for (int i = 0; i < jcp.alpha; i++) {
-                    v_y_masks[i] = (i < v_ys || i >= v_ye) ? 0 : 0xffff;
-                    v_x_masks[i] = (i < v_xs || i >= v_xe) ? 0 : 0xffff;
+                    v_y_masks[i] = uint16_t(i < v_ys || i >= v_ye ? 0 : 0xffff);
+                    v_x_masks[i] = uint16_t(i < v_xs || i >= v_xe ? 0 : 0xffff);
                 }
                 auto local_s = src
                         + mb * jcp.ih * jcp.iw * jcp.ic
@@ -1066,7 +1132,7 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
             int offset = (tile_ij + ithr) % 16;
             gemm_p.src = wino_src + jcp.inp_stride * offset;
             gemm_p.dst = wino_dst + jcp.out_stride * offset;
-            gemm_p.wei = wino_wei + jcp.wei_stride * offset;
+            gemm_p.wei = wei + jcp.wei_stride * offset;
             gemm_p.dst_b = dst_bias + jcp.bia_stride * offset;
 
             kernel_->ker_(&gemm_p);
@@ -1075,7 +1141,7 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
         /* transformation from winograd domain to output tensor */
         for (int y_in_block = 0; y_in_block < jcp.yb; y_in_block += 2) {
             for (int x_in_block = 0; x_in_block < jcp.xb; x_in_block += 2) {
-                unsigned short v_y_masks[2], v_x_masks[2];
+                uint16_t v_y_masks[2], v_x_masks[2];
 
                 int y = y_in_block + tile_y;
                 int x = x_in_block + tile_x;
@@ -1083,15 +1149,15 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
 
 #pragma unroll(2)
                 for (int i = 0; i < jcp.m; i++) {
-                    v_x_masks[i] = (x + i < jcp.ow) ? 0xffff : 0;
-                    v_y_masks[i] = (y + i < jcp.oh) ? 0xffff : 0;
+                    v_x_masks[i] = uint16_t(x + i < jcp.ow ? 0xffff : 0);
+                    v_y_masks[i] = uint16_t(y + i < jcp.oh ? 0xffff : 0);
                 }
                 auto local_d = dst
                         + mb * jcp.oh * jcp.ow * jcp.oc
                         + y * jcp.ow * jcp.oc + x * jcp.oc;
                 auto local_w = wino_dst + m * jcp.oc;
 
-                auto scales = oscales.scales_;
+                auto scales = oscales;
                 dst_trans_p.dst = local_d;
                 dst_trans_p.wino_dst = local_w;
                 dst_trans_p.v_y_masks = v_y_masks;
@@ -1106,39 +1172,41 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
     });
 }
 
-template <bool with_relu, data_type_t dst_data_type>
-void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
-        dst_data_type>::execute_forward_small_mb() {
+template <data_type_t dst_data_type>
+void jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<dst_data_type>::
+execute_forward_small_mb() const {
     auto src = reinterpret_cast<const src_data_t *>(input_memory(0));
     auto wei = reinterpret_cast<const wei_data_t *>(input_memory(1));
     auto bia = reinterpret_cast<const char *>(input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(memory(0));
 
+    auto scratchpad = this->scratchpad();
+
     const auto &jcp = kernel_->jcp;
-    const auto &oscales = updated_output_scales_;
+    const float *oscales = adjust_oscales(scratchpad);
 
-    auto wino_wei = wei;
-    auto dst_bias = (const acc_data_t *)(wei + size_wino_wei_);
-    auto wino_src = (src_data_t *)scratchpad_->get();
-    auto wino_dst = (acc_data_t *)(scratchpad_->get() + wino_shift_);
+    auto dst_bias = (const acc_data_t *)(wei + jcp.size_wino_wei);
+    auto wino_src = scratchpad.template get<src_data_t>(key_wino_V);
+    auto wino_dst = scratchpad.template get<acc_data_t>(key_wino_M);
 
-    for (int mb = 0; mb < jcp.mb; mb++) {
+    for (int mbb = 0; mbb < jcp.nb_mb; mbb++) {
     for (int tile_y = 0; tile_y < jcp.oh; tile_y += jcp.yb) {
     for (int tile_x = 0; tile_x < jcp.ow; tile_x += jcp.xb) {
         /* transformation of input tensor to winograd domain */
-        parallel_nd(div_up(jcp.yb, 2), div_up(jcp.xb, 2),
-            [&](int y_in_block_b, int x_in_block_b) {
+        parallel_nd(div_up(jcp.yb, 2), div_up(jcp.xb, 2), jcp.mb_block,
+            [&](int y_in_block_b, int x_in_block_b, int mb) {
             int y_in_block = y_in_block_b * 2;
             int x_in_block = x_in_block_b * 2;
 
             auto src_trans_p =
                 jit_avx512_core_u8s8s32x_wino_conv_src_trans_t::call_params_t();
 
-            unsigned short v_y_masks[4], v_x_masks[4];
+            uint16_t v_y_masks[4], v_x_masks[4];
 
             int y = y_in_block + tile_y;
             int x = x_in_block + tile_x;
-            int m = (y_in_block / 2) * (jcp.xb / 2) + (x_in_block / 2);
+            int m = (mb * (jcp.yb / 2) + (y_in_block / 2)) * (jcp.xb / 2)
+                    + (x_in_block / 2);
 
             int v_ys = nstl::max(0, jcp.t_pad - y);
             int v_ye = nstl::min(
@@ -1150,11 +1218,11 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
 
 #pragma unroll(4)
             for (int i = 0; i < jcp.alpha; i++) {
-                v_y_masks[i] = (i < v_ys || i >= v_ye) ? 0 : 0xffff;
-                v_x_masks[i] = (i < v_xs || i >= v_xe) ? 0 : 0xffff;
+                v_y_masks[i] = uint16_t(i < v_ys || i >= v_ye ? 0 : 0xffff);
+                v_x_masks[i] = uint16_t(i < v_xs || i >= v_xe ? 0 : 0xffff);
             }
             auto local_s = src
-                    + mb * jcp.ih * jcp.iw * jcp.ic
+                    + (mbb * jcp.mb_block + mb) * jcp.ih * jcp.iw * jcp.ic
                     + y * jcp.iw * jcp.ic + x * jcp.ic;
             auto local_w = wino_src + m * jcp.ic;
 
@@ -1174,7 +1242,7 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
             gemm_p.src = wino_src + jcp.inp_stride * tile_ij;
             gemm_p.dst = wino_dst + jcp.out_stride * tile_ij
                     + nnb * jcp.n2_block * jcp.n_block;
-            gemm_p.wei = wino_wei + jcp.wei_stride * tile_ij
+            gemm_p.wei = wei + jcp.wei_stride * tile_ij
                     + nnb * jcp.n2_block * jcp.n_block * jcp.K;
             gemm_p.dst_b = dst_bias + jcp.bia_stride * tile_ij
                     + nnb * jcp.n2_block * jcp.n_block;
@@ -1183,31 +1251,32 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
         });
 
         /* transformation from winograd domain to output tensor */
-        parallel_nd(div_up(jcp.yb, 2), div_up(jcp.xb, 2),
-            [&](int y_in_block_b, int x_in_block_b) {
+        parallel_nd(div_up(jcp.yb, 2), div_up(jcp.xb, 2), jcp.mb_block,
+            [&](int y_in_block_b, int x_in_block_b, int mb) {
             int y_in_block = y_in_block_b * 2;
             int x_in_block = x_in_block_b * 2;
 
             auto dst_trans_p =
                 jit_avx512_core_u8s8s32x_wino_conv_dst_trans_t::call_params_t();
 
-            unsigned short v_y_masks[2], v_x_masks[2];
+            uint16_t v_y_masks[2], v_x_masks[2];
 
             int y = y_in_block + tile_y;
             int x = x_in_block + tile_x;
-            int m = (y_in_block / 2) * (jcp.xb / 2) + (x_in_block / 2);
+            int m = (mb * (jcp.yb / 2) + (y_in_block / 2)) * (jcp.xb / 2)
+                    + (x_in_block / 2);
 
 #pragma unroll(2)
             for (int i = 0; i < jcp.m; i++) {
-                v_x_masks[i] = (x + i < jcp.ow) ? 0xffff : 0;
-                v_y_masks[i] = (y + i < jcp.oh) ? 0xffff : 0;
+                v_x_masks[i] = uint16_t(x + i < jcp.ow ? 0xffff : 0);
+                v_y_masks[i] = uint16_t(y + i < jcp.oh ? 0xffff : 0);
             }
             auto local_d = dst
-                    + mb * jcp.oh * jcp.ow * jcp.oc
+                    + (mbb * jcp.mb_block + mb) * jcp.oh * jcp.ow * jcp.oc
                     + y * jcp.ow * jcp.oc + x * jcp.oc;
             auto local_w = wino_dst + m * jcp.oc;
 
-            auto scales = oscales.scales_;
+            auto scales = oscales;
             dst_trans_p.dst = local_d;
             dst_trans_p.wino_dst = local_w;
             dst_trans_p.v_y_masks = v_y_masks;
@@ -1221,22 +1290,10 @@ void _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
     }}}
 }
 
-template struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<true,
-        data_type::s8>;
-template struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<false,
-        data_type::s8>;
-template struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<true,
-        data_type::u8>;
-template struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<false,
-        data_type::u8>;
-template struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<true,
-        data_type::s32>;
-template struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<false,
-        data_type::s32>;
-template struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<true,
-        data_type::f32>;
-template struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<false,
-        data_type::f32>;
+template struct jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<data_type::s8>;
+template struct jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<data_type::u8>;
+template struct jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<data_type::s32>;
+template struct jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<data_type::f32>;
 
 } // namespace cpu
 } // namespace impl
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_wino_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_wino_convolution.hpp
index 83392ab67..5c1c8cbec 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_wino_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_wino_convolution.hpp
@@ -23,7 +23,6 @@
 #include "cpu_convolution_pd.hpp"
 #include "cpu_engine.hpp"
 #include "mkldnn_thread.hpp"
-#include "scratchpad.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
 
@@ -39,20 +38,18 @@ struct jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t;
 struct jit_avx512_core_u8s8s32x_wino_conv_src_trans_t;
 struct jit_avx512_core_u8s8s32x_wino_conv_dst_trans_t;
 
-template <bool with_relu, data_type_t dst_data_type>
-struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t : public cpu_primitive_t {
-    struct pd_t : public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine, const typename pd_t::base_desc_t *adesc,
+template <data_type_t dst_data_type>
+struct jit_avx512_core_u8s8s32x_wino_convolution_fwd_t : public cpu_primitive_t {
+    struct pd_t : public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            :  _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-            hint_fwd_pd)
+            :  cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
             , jcp_()
         {}
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_int8_wino:", avx512_core, ""),
-                _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<with_relu,
-                dst_data_type>);
+                jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<dst_data_type>);
 
         virtual status_t init() override {
             using namespace prop_kind;
@@ -60,28 +57,39 @@ struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t : public cpu_primitive_t
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind,
+                && utils::one_of(this->desc()->prop_kind,
                                     forward_training, forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_winograd
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_winograd)
                 && !this->has_zero_dim_memory()
-                && this->cdesc_().src_desc.data_type == data_type::u8
-                && this->cdesc_().dst_desc.data_type == dst_data_type
-                && this->cdesc_().weights_desc.data_type == data_type::s8
+                && this->desc()->src_desc.data_type == data_type::u8
+                && this->desc()->dst_desc.data_type == dst_data_type
+                && this->desc()->weights_desc.data_type == data_type::s8
                 && IMPLICATION(this->with_bias(),
-                    utils::one_of(this->cdesc_().bias_desc.data_type,
+                    utils::one_of(this->desc()->bias_desc.data_type,
                                                 data_type::f32, data_type::s32,
                                                 data_type::s8, data_type::u8))
-                && this->cdesc_().accum_data_type == data_type::s32;
+                && this->desc()->accum_data_type == data_type::s32;
 
             if (!ok) return status::unimplemented;
 
-            return jit_conf();
+            status_t status = jit_conf();
+            if (status != status::success) return status;
+
+            init_scratchpad();
+
+            if (status == status::success
+                    && this->desc()->alg_kind == alg_kind::convolution_auto)
+                this->set_alg_kind(alg_kind::convolution_winograd);
+            return status;
         }
 
         jit_conv_conf_2x3_wino_t jcp_;
 
     protected:
         status_t jit_conf();
+        void init_scratchpad();
 
         virtual status_t set_default_params() override {
             using namespace memory_format;
@@ -100,42 +108,28 @@ struct _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t : public cpu_primitive_t
     typedef typename prec_traits<data_type::s32>::type acc_data_t;
     typedef typename prec_traits<dst_data_type>::type dst_data_t;
 
-    _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t(const pd_t *pd,
+    jit_avx512_core_u8s8s32x_wino_convolution_fwd_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs);
-    ~_jit_avx512_core_u8s8s32x_wino_convolution_fwd_t();
+    ~jit_avx512_core_u8s8s32x_wino_convolution_fwd_t();
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    void execute_forward_small_mb();
-    void execute_forward_mbN();
-    pd_t conf_;
+    const float *adjust_oscales(const memory_tracking::grantor_t &scratchpad)
+        const;
+    void execute_forward() const;
+    void execute_forward_small_mb() const;
+    void execute_forward_mbN() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
     jit_avx512_core_u8s8s32x_wino_conv_fwd_ker_t *kernel_;
     jit_avx512_core_u8s8s32x_wino_conv_src_trans_t *src_trans_;
     jit_avx512_core_u8s8s32x_wino_conv_dst_trans_t *dst_trans_;
-
-    size_t size_wino_wei_;
-    size_t size_wino_src_;
-    size_t size_wino_dst_;
-    size_t wino_shift_;
-
-    scratchpad_t *scratchpad_;
-
-    mkldnn::impl::scales_t updated_output_scales_;
 };
 
-template <impl::data_type_t dst_type>
-using jit_avx512_core_u8s8s32x_wino_convolution_fwd_t =
-    _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<false, dst_type>;
-
-template <impl::data_type_t dst_type>
-using jit_avx512_core_u8s8s32x_wino_convolution_relu_t =
-    _jit_avx512_core_u8s8s32x_wino_convolution_fwd_t<true, dst_type>;
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp
index 40ca5f098..011db2420 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_conv_kernel.cpp
@@ -13,12 +13,15 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
-#include <float.h>
+
+#include <assert.h>
+
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
 #include "nstl.hpp"
 #include "type_helpers.hpp"
-#include "mkldnn_thread.hpp"
 #include "utils.hpp"
+
 #include "cpu_memory.hpp"
 
 #include "jit_uni_1x1_conv_utils.hpp"
@@ -35,32 +38,6 @@ using namespace mkldnn::impl::utils;
 
 using namespace Xbyak;
 
-bool jit_avx512_core_x8s8s32x_1x1_conv_kernel::maybe_relu(int position)
-{
-    using namespace primitive_kind;
-    const auto &p = attr_.post_ops_;
-
-    if (position == 0) {
-        /* relu before sum */
-        return false
-            || jcp.with_eltwise
-            || p.contain(eltwise, 0)
-            || (jcp.dst_dt == data_type::u8 && !p.contain(sum, 0));
-    } else if (position == 1) {
-        /* relu after sum */
-        const int sum_idx = p.contain(sum, 0)
-            ? 0 : (p.contain(sum, 1) ? 1 : -1);
-        if (sum_idx == -1)
-            return false;
-
-        return false
-            || p.contain(eltwise, sum_idx + 1)
-            || jcp.dst_dt == data_type::u8;
-    }
-
-    return false;
-}
-
 void jit_avx512_core_x8s8s32x_1x1_conv_kernel::bcast_loop(int load_loop_blk)
 {
     mov(aux1_reg_bcast_data, reg_bcast_data);
@@ -131,7 +108,7 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::reduce_loop(int load_loop_blk,
     };
 
     auto vreg_accum = [=](int i_load, int i_ur) {
-        return Zmm(i_ur * load_loop_blk + i_load);
+        return Zmm(i_ur + i_load * ur);
     };
 
     auto zmm_bias_alpha = [=]() {
@@ -242,23 +219,60 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::reduce_loop(int load_loop_blk,
 
                 zmm_t mask_zmm = mask_flag ? r | ktail_mask | T_z : r;
                 vmulps(mask_zmm, r, scale_ptr(i_load));
-                if (maybe_relu(0)) {
-                    vpxord(zmm_zero, zmm_zero, zmm_zero);
-                    vmaxps(r, zmm_zero, r);
-                }
-                if (p_sum_scale) { // post_op: sum
-                    vpxord(zmm_zero, zmm_zero, zmm_zero);
-                    auto zmm_prev_dst = zmm_zero;
+            }
+        }
+
+        int eltwise_inj_idx = 0;
+        int depthwise_inj_idx = 0;
+        for (int i = 0; i < p.len_; i++) {
+            auto& post_op = p.entry_[i];
+            if (post_op.is_eltwise()) {
+                eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0, ur * load_loop_blk);
+
+                eltwise_inj_idx++;
+            } else if (post_op.is_depthwise()) {
+                mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+                mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+                add(reg_d_weights, reg_oc_off);
+                add(reg_d_bias, reg_oc_off);
 
-                    cvt2ps(jcp.dst_dt, zmm_prev_dst, output_ptr(i_load, i_ur),
-                        mask_flag);
+                for (int k = 0; k < load_loop_blk; k++) {
+                    depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
+                            k * ur, k * ur + ur, reg_d_weights, reg_d_bias);
 
-                    if (*p_sum_scale == 1.f)
-                        vaddps(r, zmm_prev_dst);
-                    else
-                        vfmadd231ps(r, zmm_prev_dst, zword_b[reg_ptr_sum_scale]);
+                    add(reg_d_weights, jcp.oc_block * sizeof(float));
+                    add(reg_d_bias, jcp.oc_block * sizeof(float));
                 }
-                if (maybe_relu(1)) {
+
+                depthwise_inj_idx++;
+            } else if (post_op.is_sum(false)) {
+                for (int i_load = 0; i_load < load_loop_blk; ++i_load) {
+                    const bool mask_flag = mask_flag_in &&
+                                               i_load == load_loop_blk - 1;
+                    for (int i_ur = 0; i_ur < ur; ++i_ur) {
+                        vpxord(zmm_zero, zmm_zero, zmm_zero);
+                        auto zmm_prev_dst = zmm_zero;
+
+                        auto r = vreg_accum(i_load, i_ur);
+                        cvt2ps(jcp.dst_dt, zmm_prev_dst, output_ptr(i_load, i_ur),
+                            mask_flag);
+
+                        if (*p_sum_scale == 1.f)
+                            vaddps(r, zmm_prev_dst);
+                        else
+                            vfmadd231ps(r, zmm_prev_dst, zword_b[reg_ptr_sum_scale]);
+                    }
+                }
+            }
+        }
+
+        for (int i_load = 0; i_load < load_loop_blk; ++i_load) {
+            const bool mask_flag = mask_flag_in &&
+                                       i_load == load_loop_blk - 1;
+            for (int i_ur = 0; i_ur < ur; ++i_ur) {
+                auto r = vreg_accum(i_load, i_ur);
+                if (jcp.dst_dt == data_type::u8) {
                     vpxord(zmm_zero, zmm_zero, zmm_zero);
                     vmaxps(r, zmm_zero, r);
                 }
@@ -274,6 +288,7 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::reduce_loop(int load_loop_blk,
             for (int i_ur = 0; i_ur < ur; ++i_ur) {
                 auto r = vreg_accum(i_load, i_ur);
                 zmm_t r_zmm = mask_flag ? r | ktail_mask : r;
+
                 switch (jcp.dst_dt) {
                 case data_type::f32:
                 case data_type::s32:
@@ -335,6 +350,8 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::reduce_loop(int load_loop_blk,
     Label reduce_loop;
     Label reduce_loop_tail;
 
+    push(reg_oc_off);
+
     mov(aux_reg_load_data, reg_load_data);
 
     mov(aux_reg_bcast_data, aux1_reg_bcast_data);
@@ -359,6 +376,8 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::reduce_loop(int load_loop_blk,
         fma_block(false);
     }
 
+    pop(reg_oc_off);
+
     if (jcp.oc_without_padding != jcp.oc) {
         Label end_store, common_store;
         mov(EVEX_compress_addr(rsp, reg_bcast_data_off), reg_bcast_data);
@@ -388,6 +407,24 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::reduce_loop(int load_loop_blk,
 
 void jit_avx512_core_x8s8s32x_1x1_conv_kernel::generate()
 {
+    const auto &p = attr_.post_ops_;
+    for (int i = 0; i < p.len_; i++) {
+        auto &post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(
+                    this,
+                    post_op.eltwise.alg,
+                    post_op.eltwise.alpha,
+                    post_op.eltwise.beta
+            ));
+        } else if (post_op.is_depthwise()) {
+            depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<avx512_common>(
+                    this,
+                    post_op.depthwise.alg
+            ));
+        }
+    }
+
     preamble();
 
     xor_(reg_scratch, reg_scratch);
@@ -423,7 +460,7 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::generate()
     mov(EVEX_compress_addr(rsp, bcast_loop_work_off), reg_bcast_loop_work);
     mov(reg_reduce_loop_work, ptr[param1 + GET_OFF(reduce_dim)]);
     mov(reg_reduce_pos_flag, ptr[param1 + GET_OFF(first_last_flag)]);
-
+    mov(reg_oc_off, ptr[param1 + GET_OFF(oc_off)]);
 
     auto load_loop_body = [=](int load_loop_blk) {
         bcast_loop(load_loop_blk);
@@ -451,6 +488,7 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::generate()
         add(reg_output_data,
             load_loop_blk * jcp.load_block * jcp.typesize_out);
         sub(reg_load_loop_work, load_loop_blk * jcp.load_loop_iter_step);
+        add(reg_oc_off, load_loop_blk * jcp.oc_block * sizeof(float));
     };
 
     const int simd_w = 16;
@@ -480,6 +518,12 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::generate()
                     cmp(reg_load_loop_work, 0);
                     je(load_loop_blk[num_ur_cases], T_NEAR);
                 }
+
+                for (int _i = 1; _i <= label_idx + 1; _i++) {
+                    prefetcht0(ptr [ reg_load_data + _i * jcp.ic * jcp.oc_block ]);
+                    prefetcht1(ptr [ reg_output_data + _i * jcp.oc_block ]);
+                }
+
                 load_loop_body(label_idx + 1);
                 if (label_idx - 1 > 0) {
                     cmp(reg_load_loop_work, 2 * label_idx * simd_w);
@@ -503,6 +547,9 @@ void jit_avx512_core_x8s8s32x_1x1_conv_kernel::generate()
     add(rsp, stack_space_needed);
 
     postamble();
+
+    for (auto& inj : eltwise_injectors)
+        inj->prepare_table();
 }
 
 bool jit_avx512_core_x8s8s32x_1x1_conv_kernel::post_ops_ok(
@@ -510,27 +557,18 @@ bool jit_avx512_core_x8s8s32x_1x1_conv_kernel::post_ops_ok(
     using namespace primitive_kind;
     const auto &p = attr.post_ops_;
 
-    auto is_relu = [&](int idx) {
-        return p.entry_[idx].kind == eltwise
-            && p.entry_[idx].eltwise.scale == 1.
-            && p.entry_[idx].eltwise.alg == alg_kind::eltwise_relu
-            && p.entry_[idx].eltwise.alpha == 0.;
-    };
+    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
+    auto is_depthwise = [&](int idx) { return p.entry_[idx].is_depthwise(); };
+    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(false); };
+    auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
-    case 0: return true;
-    case 1: return true
-                && IMPLICATION(jcp.with_eltwise, p.contain(sum, 0))
-                && IMPLICATION(!jcp.with_eltwise, is_relu(0) || p.contain(sum, 0));
-    case 2: return true
-                && IMPLICATION(jcp.with_eltwise, p.contain(sum, 0) && is_relu(1))
-                && IMPLICATION(!jcp.with_eltwise, false
-                        || (p.contain(sum, 0) && is_relu(1))
-                        || (p.contain(sum, 1) && is_relu(0)));
-    case 3: return true
-                && jcp.with_eltwise == false
-                && (is_relu(0) && p.contain(sum, 1) && is_relu(2));
-    default: return false;
+        case 0: return true;
+        case 1: return is_simple(0) || is_sum(0);
+        case 2: return (is_sum(0) && is_simple(1)) || (is_simple(0) && is_sum(1)) ||
+                       (is_simple(0) && is_simple(1));
+        case 3: return (is_simple(0) && is_sum(1) && is_simple(2));
+        default: return false;
     }
 
     return false;
@@ -540,9 +578,7 @@ status_t jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_conf(
         jit_1x1_conv_conf_t &jcp, const convolution_desc_t &cd,
         const memory_desc_wrapper &src_d, const memory_desc_wrapper &weights_d,
         const memory_desc_wrapper &dst_d, const memory_desc_wrapper &bias_d,
-        const primitive_attr_t &attr, bool with_relu, float relu_negative_slope,
-        int nthreads, bool reduce_src)
-{
+        const primitive_attr_t &attr, int nthreads, bool reduce_src) {
     if (!mayiuse(avx512_core)) return status::unimplemented;
 
     const bool with_groups = weights_d.ndims() == src_d.ndims() + 1;
@@ -577,10 +613,6 @@ status_t jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_conf(
     jcp.stride_w = cd.strides[1];
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
-    if (!IMPLICATION(with_relu, relu_negative_slope == 0.))
-        return status::unimplemented;
 
     jcp.signed_input = (src_d.data_type() == data_type::s8) ? true : false;
 
@@ -646,25 +678,30 @@ status_t jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_conf(
         max_regs = 8;
     jcp.expl_bcast = true;
 
-    const int spatial = jcp.oh;
-    jcp.ur = 1;
-    for (int ur_w = max_regs; ur_w >= min_regs; ur_w--) {
-        if ((spatial >= size_treshold && spatial % ur_w == 0)
-                || (spatial < size_treshold && jcp.os % ur_w == 0)) {
-            jcp.ur = ur_w;
-            break;
-        }
-    }
-    if (jcp.ur == 1) {
+    if (jcp.mb == 1 && jcp.ic > 128
+        && (jcp.oh <= size_treshold && jcp.ow <= size_treshold)) {
         jcp.ur = nstl::min(max_regs, jcp.os);
-        int os_tail = jcp.os % max_regs;
-        for (int i = max_regs; i >= min_regs; i--) {
-            int i_tail = jcp.os % i;
-            if (i_tail > os_tail || i_tail == 0) {
-                jcp.ur = i;
-                os_tail = i_tail;
-                if (i_tail == 0)
-                    break;
+    } else {
+        const int spatial = jcp.oh;
+        jcp.ur = 1;
+        for (int ur_w = max_regs; ur_w >= min_regs; ur_w--) {
+            if ((spatial >= size_treshold && spatial % ur_w == 0)
+                    || (spatial < size_treshold && jcp.os % ur_w == 0)) {
+                jcp.ur = ur_w;
+                break;
+            }
+        }
+        if (jcp.ur == 1) {
+            jcp.ur = nstl::min(max_regs, jcp.os);
+            int os_tail = jcp.os % max_regs;
+            for (int i = max_regs; i >= min_regs; i--) {
+                int i_tail = jcp.os % i;
+                if (i_tail > os_tail || i_tail == 0) {
+                    jcp.ur = i;
+                    os_tail = i_tail;
+                    if (i_tail == 0)
+                        break;
+                }
             }
         }
     }
@@ -786,6 +823,17 @@ status_t jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_conf(
     return status::success;
 }
 
+void jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad,
+        const jit_1x1_conv_conf_t &jcp, const primitive_attr_t &attr) {
+    using namespace mkldnn::impl::memory_tracking::names;
+
+    if (jcp.signed_input && jcp.ver != ver_vnni) {
+        size_t count = nstl::max(attr.output_scales_.count_, 16);
+        scratchpad.book(key_conv_adjusted_scales, sizeof(float) * count);
+    }
+}
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_conv_kernel.hpp
index 9765de92f..4e3ff510e 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_conv_kernel.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_conv_kernel.hpp
@@ -18,8 +18,12 @@
 #define JIT_AVX512_CORE_X8S8S32X_1X1_CONV_KERNEL_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
+#include "jit_uni_eltwise.hpp"
+#include "jit_uni_depthwise.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -34,38 +38,39 @@ struct jit_avx512_core_x8s8s32x_1x1_conv_kernel: public jit_generator {
         jit_ker = (void (*)(jit_1x1_conv_call_s *)) this->getCode();
     }
 
+    ~jit_avx512_core_x8s8s32x_1x1_conv_kernel() {
+        for (auto inj : eltwise_injectors)
+            delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
+    }
+
     static bool post_ops_ok(jit_1x1_conv_conf_t &jcp,
                                 const primitive_attr_t &attr);
 
     static status_t init_conf(jit_1x1_conv_conf_t &jcp,
-                                const convolution_desc_t &cd,
-                                const memory_desc_wrapper &src_d,
-                                const memory_desc_wrapper &weights_d,
-                                const memory_desc_wrapper &dst_d,
-                                const memory_desc_wrapper &bias_d,
-                                const primitive_attr_t &attr,
-                                bool with_relu, float relu_negative_slope,
-                                int nthreads, bool reduce_src);
+            const convolution_desc_t &cd,
+            const memory_desc_wrapper &src_d,
+            const memory_desc_wrapper &weights_d,
+            const memory_desc_wrapper &dst_d,
+            const memory_desc_wrapper &bias_d,
+            const primitive_attr_t &attr,
+            int nthreads, bool reduce_src);
 
-    static status_t init_conf(jit_1x1_conv_conf_t &jcp,
-                              const convolution_desc_t &cd,
-                              const memory_desc_wrapper &src_d,
-                              const memory_desc_wrapper &weights_d,
-                              const memory_desc_wrapper &dst_d,
-                              const memory_desc_wrapper &bias_d,
-                              const primitive_attr_t &attr,
-                              int nthreads, bool reduce_src)
-    {
-        return init_conf(jcp, cd, src_d, weights_d, dst_d, bias_d, attr, false,
-            0.0, nthreads, reduce_src);
-    }
-    bool maybe_relu(int position);
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_1x1_conv_conf_t &jcp, const primitive_attr_t &attr);
 
     jit_1x1_conv_conf_t jcp;
     const primitive_attr_t &attr_;
     void (*jit_ker)(jit_1x1_conv_call_s *);
 
   private:
+    nstl::vector<jit_uni_eltwise_injector_f32<avx512_common>*> eltwise_injectors;
+    nstl::vector<jit_uni_depthwise_injector_f32<avx512_common>*> depthwise_injectors;
+
     using reg64_t = const Xbyak::Reg64;
     using zmm_t = const Xbyak::Zmm;
     using mask_t = const Xbyak::Opmask;
@@ -90,6 +95,10 @@ struct jit_avx512_core_x8s8s32x_1x1_conv_kernel: public jit_generator {
     reg64_t aux_reg_output_data = abi_not_param1;
     reg64_t reduce_loop_iter = abi_param1;
 
+    const Xbyak::Reg64 reg_d_weights = aux_reg_bcast_data;
+    const Xbyak::Reg64 reg_d_bias = reduce_loop_iter;
+    const Xbyak::Reg64 reg_oc_off = aux_reg_load_data;
+
     reg64_t reg_last_load = r8;
     mask_t ktail_mask = k6;
 
@@ -109,18 +118,17 @@ struct jit_avx512_core_x8s8s32x_1x1_conv_kernel: public jit_generator {
     int reg_bcast_data_off = 16;
     int reg_load_data_off = 24;
     int reg_ptr_sum_scale_off = 32;
-    int reg_last_load_off = 40;
-    int reg_comp_data_off = 48;
-    int stack_space_needed = 56;
+    int reg_comp_data_off = 40;
+    int stack_space_needed = 48;
 
     void bcast_loop(int load_loop_blk);
     void reduce_loop(int load_loop_blk, int ur, int substep, bool wraparound);
 
     void generate();
-    static void balance(jit_1x1_conv_conf_t &jcp, int nthreads);
     void cvt2ps(data_type_t type_in, zmm_t zmm_in, const Xbyak::Operand &op,
         bool mask_flag);
 };
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
index a71f285ed..1bab22e9d 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.cpp
@@ -14,12 +14,11 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include "mkldnn_types.h"
-
 #include "c_types_map.hpp"
-#include "utils.hpp"
 #include "mkldnn_thread.hpp"
 #include "type_helpers.hpp"
+#include "utils.hpp"
+
 #include "jit_generator.hpp"
 
 #include "jit_avx512_core_x8s8s32x_1x1_convolution.hpp"
@@ -30,6 +29,7 @@ namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 namespace {
@@ -56,41 +56,61 @@ void balance2D(U nthr, U ithr, T ny, T &ny_start, T &ny_end,
 }
 
 /* convolution forward */
-template <bool with_relu, data_type_t src_type, data_type_t dst_type>
-void _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t
-                              <with_relu, src_type, dst_type>::execute_forward()
+template <data_type_t src_type, data_type_t dst_type>
+void jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t
+                              <src_type, dst_type>::execute_forward() const
 {
     auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto weights =
         reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const char *>(this->input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(this->memory());
+
+    auto scratchpad = this->scratchpad();
+
+    if (pd()->jcp_.signed_input && pd()->jcp_.ver != ver_vnni) {
+        auto local_scales = scratchpad.template get<float>(
+                key_conv_adjusted_scales);
+        auto scales = pd()->attr()->output_scales_.scales_;
+        size_t count = pd()->attr()->output_scales_.count_;
+        float factor = 1.f / pd()->jcp_.wei_adj_scale;
+        if (count == 1) {
+            utils::array_set(local_scales, scales[0] * factor, 16);
+        } else {
+            for (size_t c = 0; c < count; c++)
+                local_scales[c] = scales[c] * factor;
+        }
+    }
+
     parallel(kernel_->jcp.nthr, [&](const int ithr, const int nthr) {
-        execute_forward_thr(ithr, nthr, src, weights, bias, dst);
+        execute_forward_thr(ithr, nthr, src, weights, bias, dst, scratchpad);
     });
 }
 
-template <bool with_relu, data_type_t src_type, data_type_t dst_type>
-void _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<with_relu, src_type, dst_type>
+template <data_type_t src_type, data_type_t dst_type>
+void jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<src_type, dst_type>
 ::execute_forward_thr(const int ithr, const int nthr, const src_data_t *src,
-        const wei_data_t *weights, const char *bias, dst_data_t *dst) {
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+        const wei_data_t *weights, const char *bias, dst_data_t *dst,
+        const memory_tracking::grantor_t &scratchpad) const {
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
 
-    const size_t bia_dt_size = conf_.with_bias()
-        ? types::data_type_size(conf_.cdesc()->bias_desc.data_type) : 0;
+    const size_t bia_dt_size = pd()->with_bias()
+        ? types::data_type_size(pd()->desc()->bias_desc.data_type) : 0;
 
     const auto &jcp = kernel_->jcp;
+    auto rtus_space = scratchpad.get<src_data_t>(key_conv_rtus_space);
+    auto local_scales = scratchpad.get<float>(key_conv_adjusted_scales);
 
     const int work_amount = jcp.mb * jcp.ngroups * jcp.nb_bcast;
 
-    const int stride_h = conf_.cdesc()->strides[0];
-    const int stride_w = conf_.cdesc()->strides[1];
-    const int pad_t = conf_.cdesc()->padding[0][0];
-    const int pad_l = conf_.cdesc()->padding[0][1];
+    const int stride_h = pd()->desc()->strides[0];
+    const int stride_w = pd()->desc()->strides[1];
+    const int pad_t = pd()->desc()->padding[0][0];
+    const int pad_l = pd()->desc()->padding[0][1];
 
-    const auto &oscales = conf_.attr()->output_scales_;
+    const auto &oscales = pd()->attr()->output_scales_;
 
     int offset = jcp.ngroups * (jcp.oc / jcp.oc_block) * (jcp.ic / jcp.ic_block)
         * jcp.oc_block * jcp.ic_block;
@@ -167,17 +187,17 @@ void _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<with_relu, src_type, dst_ty
         const size_t dst_off = dst_d.blk_off(n, _ocb * jcp.oc_block, oh, ow);
 
         p.output_data = &dst[dst_off];
-        p.load_data = &weights[conf_.with_groups()
+        p.load_data = &weights[pd()->with_groups()
             ? weights_d.blk_off(g, ocb, icb)
             : weights_d.blk_off(ocb, icb)];
         p.bias_data = &bias[_ocb * jcp.oc_block * bia_dt_size];
         p.compensation = (jcp.signed_input)
             ? &compensation[_ocb * jcp.oc_block] : 0;
         p.scales = (jcp.signed_input && jcp.ver != ver_vnni)
-            ? &local_scales_[jcp.is_oc_scale * _ocb * jcp.oc_block]
+            ? &local_scales[jcp.is_oc_scale * _ocb * jcp.oc_block]
             : &oscales.scales_[jcp.is_oc_scale * _ocb * jcp.oc_block];
-        if (conf_.rtus_.reduce_src_) {
-            rp.ws = scratch_ + ithr * ws_per_thread_
+        if (pd()->rtus_.reduce_src_) {
+            rp.ws = rtus_space + ithr * pd()->rtus_.space_per_thread_
                 + _icb * jcp.is * jcp.ic_block;
             if (ocb == ocb_start) {
                 rp.src = src + src_d.blk_off(n, _icb * jcp.ic_block, ih, iw);
@@ -187,6 +207,8 @@ void _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<with_relu, src_type, dst_ty
         } else
             p.bcast_data = src + src_d.blk_off(n, _icb * jcp.ic_block, ih, iw);
 
+        p.oc_off = _ocb * jcp.oc_block * sizeof(float);
+
         kernel_->jit_ker(&p);
     };
 
@@ -255,38 +277,16 @@ void _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<with_relu, src_type, dst_ty
     }
 }
 
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<false,
-                                                  data_type::u8, data_type::u8>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<true,
-                                                  data_type::u8, data_type::u8>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<false,
-                                                  data_type::s8, data_type::u8>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<true,
-                                                  data_type::s8, data_type::u8>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<false,
-                                                  data_type::u8, data_type::s8>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<true,
-                                                  data_type::u8, data_type::s8>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<false,
-                                                  data_type::s8, data_type::s8>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<true,
-                                                  data_type::s8, data_type::s8>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<false,
-                                                 data_type::u8, data_type::s32>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<true,
-                                                 data_type::u8, data_type::s32>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<false,
-                                                 data_type::s8, data_type::s32>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<true,
-                                                 data_type::s8, data_type::s32>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<false,
-                                                 data_type::u8, data_type::f32>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<true,
-                                                 data_type::u8, data_type::f32>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<false,
-                                                 data_type::s8, data_type::f32>;
-template struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<true,
-                                                 data_type::s8, data_type::f32>;
+using namespace data_type;
+template struct jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<u8, u8>;
+template struct jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<s8, u8>;
+template struct jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<u8, s8>;
+template struct jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<s8, s8>;
+template struct jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<u8, s32>;
+template struct jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<s8, s32>;
+template struct jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<u8, f32>;
+template struct jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<s8, f32>;
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.hpp
index 23e0aabc0..850cb9760 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_convolution.hpp
@@ -18,33 +18,32 @@
 #define CPU_JIT_AVX512_CORE_X8S8S32X_1X1_CONVOLUTION_HPP
 
 #include "c_types_map.hpp"
-#include "cpu_convolution_pd.hpp"
-#include "cpu_engine.hpp"
-#include "cpu_reducer.hpp"
+#include "memory_tracking.hpp"
 #include "mkldnn_thread.hpp"
 #include "utils.hpp"
 
-#include "jit_uni_1x1_conv_utils.hpp"
+#include "cpu_convolution_pd.hpp"
+#include "cpu_engine.hpp"
+
 #include "jit_avx512_core_x8s8s32x_1x1_conv_kernel.hpp"
+#include "jit_uni_1x1_conv_utils.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template<bool with_relu, impl::data_type_t src_type, impl::data_type_t dst_type>
-struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t : public cpu_primitive_t {
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine,
-                const typename pd_t::base_desc_t *adesc,
+template<impl::data_type_t src_type, impl::data_type_t dst_type>
+struct jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t : public cpu_primitive_t {
+    struct pd_t: public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
             , jcp_(), rtus_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_int8_1x1:", avx512_core, ""),
-                _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<with_relu,
+                jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<
                 src_type, dst_type>);
 
         virtual status_t init() override {
@@ -53,84 +52,84 @@ struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t : public cpu_primitive_t
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                         forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                        alg_kind::convolution_auto,
+                        alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
-                && this->cdesc_().src_desc.data_type == src_type
-                && this->cdesc_().dst_desc.data_type == dst_type
-                && this->cdesc_().weights_desc.data_type == data_type::s8
+                && this->desc()->src_desc.data_type == src_type
+                && this->desc()->dst_desc.data_type == dst_type
+                && this->desc()->weights_desc.data_type == data_type::s8
                 && IMPLICATION(this->with_bias(), utils::one_of(
-                            this->cdesc_().bias_desc.data_type, data_type::f32,
+                            this->desc()->bias_desc.data_type, data_type::f32,
                             data_type::s32, data_type::s8, data_type::u8))
-                && this->cdesc_().accum_data_type == data_type::s32;
-
+                && this->desc()->accum_data_type == data_type::s32;
             if (!ok) return status::unimplemented;
 
-            const convolution_desc_t *conv_d = &this->cdesc_();
+            const convolution_desc_t *conv_d = this->desc();
             const memory_desc_t *src_d = this->src_pd_.desc();
             rtus_prepare(this, conv_d, src_d, this->dst_pd_.desc());
-            return jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_conf(jcp_,
-                    *conv_d, *src_d, *this->weights_pd_.desc(),
-                    *this->dst_pd_.desc(), *this->bias_pd_.desc(), *this->attr(),
-                    with_relu, this->negative_slope(),
-                    mkldnn_get_max_threads(), rtus_.reduce_src_);
+
+            status_t status =
+                jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_conf(jcp_,
+                        *conv_d, *src_d, *this->weights_pd_.desc(),
+                        *this->dst_pd_.desc(), *this->bias_pd_.desc(),
+                        *this->attr(), mkldnn_get_max_threads(),
+                        rtus_.reduce_src_);
+            if (status != status::success) return status;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx512_core_x8s8s32x_1x1_conv_kernel::init_scratchpad(
+                    scratchpad, jcp_, *this->attr());
+
+            rtus_prepare_space_info(this, scratchpad);
+
+            return status::success;
         }
 
         jit_1x1_conv_conf_t jcp_;
-        struct reduce_to_unit_stride_t {
-            convolution_desc_t conv_d_;
-            bool reduce_src_;
-        } rtus_;
-
-        protected:
-            virtual status_t set_default_params() override {
-                using namespace memory_format;
-                bool is_sign_input =
-                    (this->cdesc_().src_desc.data_type == data_type::s8)
-                        ? true : false;
-                if (this->src_pd_.desc()->format == any)
-                    CHECK(this->src_pd_.set_format(nhwc));
-                if (this->dst_pd_.desc()->format == any)
-                    CHECK(this->dst_pd_.set_format(nhwc));
-                if (this->weights_pd_.desc()->format == any)
-                    CHECK(this->weights_pd_.set_format(this->with_groups()
-                        ? ((is_sign_input) ? gOIhw4i16o4i_s8s8 : gOIhw4i16o4i)
-                        : ((is_sign_input) ? OIhw4i16o4i_s8s8 : OIhw4i16o4i)));
-                if (this->bias_pd_.desc()->format == any)
-                    CHECK(this->bias_pd_.set_format(x));
-                return status::success;
-            }
+        reduce_to_unit_stride_t rtus_;
+
+    protected:
+        virtual status_t set_default_params() override {
+            using namespace memory_format;
+            bool is_sign_input =
+                this->desc()->src_desc.data_type == data_type::s8;
+
+            if (this->src_pd_.desc()->format == any)
+                CHECK(this->src_pd_.set_format(nhwc));
+            if (this->dst_pd_.desc()->format == any)
+                CHECK(this->dst_pd_.set_format(nhwc));
+            if (this->weights_pd_.desc()->format == any)
+                CHECK(this->weights_pd_.set_format(this->with_groups()
+                    ? (is_sign_input ? gOIhw4i16o4i_s8s8 : gOIhw4i16o4i)
+                    : (is_sign_input ? OIhw4i16o4i_s8s8 : OIhw4i16o4i)));
+            if (this->bias_pd_.desc()->format == any)
+                CHECK(this->bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
+
+            return status::success;
+        }
     };
 
     template <cpu_isa_t isa, typename conv_t>
     friend void init_rtus_driver(conv_t *self);
-    _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t(const pd_t *pd,
-                                          const input_vector &inputs,
-                                          const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , kernel_(nullptr), rtus_driver_(nullptr), ws_per_thread_(0)
-        , scratch_(nullptr), local_scales_(nullptr)
+
+    jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t(const pd_t *apd,
+            const input_vector &inputs, const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs)
+        , kernel_(nullptr), rtus_driver_(nullptr)
     {
-        kernel_ = new jit_avx512_core_x8s8s32x_1x1_conv_kernel(conf_.jcp_,
-                    *conf_.attr());
+        kernel_ = new jit_avx512_core_x8s8s32x_1x1_conv_kernel(pd()->jcp_,
+                    *pd()->attr());
         init_rtus_driver<avx512_common>(this);
-        if (conf_.jcp_.signed_input && conf_.jcp_.ver != ver_vnni) {
-            size_t scales_size = ((conf_.attr()->output_scales_.count_ == 1)
-                    ? 16
-                    : conf_.attr()->output_scales_.count_);
-            local_scales_ = (float *)malloc(sizeof(float) * scales_size, 64);
-            for (size_t i = 0; i < scales_size; i++) {
-                local_scales_[i] = conf_.attr()->output_scales_.scales_[i] *
-                                        (1.f / conf_.jcp_.wei_adj_scale);
-            }
-        }
     }
-    ~_jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t() {
+
+    ~jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t() {
         delete kernel_;
         delete rtus_driver_;
-        free(scratch_);
-        if (local_scales_) free(local_scales_);
     }
 
     typedef typename prec_traits<src_type>::type src_data_t;
@@ -138,32 +137,23 @@ struct _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t : public cpu_primitive_t
     typedef typename prec_traits<dst_type>::type dst_data_t;
     typedef typename prec_traits<data_type::s32>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
   private:
-    void execute_forward();
+    void execute_forward() const;
     void execute_forward_thr(const int ithr, const int nthr,
             const src_data_t *src, const wei_data_t *weights,
-            const char *bias, dst_data_t *dst);
-    pd_t conf_;
-    jit_avx512_core_x8s8s32x_1x1_conv_kernel *kernel_;
+            const char *bias, dst_data_t *dst,
+            const memory_tracking::grantor_t &scratchpad) const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
+    jit_avx512_core_x8s8s32x_1x1_conv_kernel *kernel_;
     rtus_driver_t<avx512_common> *rtus_driver_;
-    size_t ws_per_thread_;
-    src_data_t *scratch_;
-    float* local_scales_;
 };
 
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t =
-    _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<false, src_type, dst_type>;
-
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_avx512_core_x8s8s32x_1x1_convolution_relu_t =
-    _jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<true, src_type, dst_type>;
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_deconvolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_deconvolution.hpp
new file mode 100644
index 000000000..426c13fd0
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_1x1_deconvolution.hpp
@@ -0,0 +1,162 @@
+
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef CPU_JIT_AVX512_CORE_X8S8S32X_1X1_DECONVOLUTION_HPP
+#define CPU_JIT_AVX512_CORE_X8S8S32X_1X1_DECONVOLUTION_HPP
+
+#include "c_types_map.hpp"
+#include "cpu_deconvolution_pd.hpp"
+#include "cpu_engine.hpp"
+#include "cpu_reducer.hpp"
+#include "mkldnn_thread.hpp"
+#include "utils.hpp"
+#include "cpu_convolution_pd.hpp"
+#include "type_helpers.hpp"
+#include "primitive_iterator.hpp"
+
+#include "jit_uni_1x1_conv_utils.hpp"
+#include "jit_avx512_core_x8s8s32x_1x1_convolution.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <impl::data_type_t src_type, impl::data_type_t dst_type>
+struct jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t
+        : public cpu_primitive_t {
+    struct pd_t : public cpu_deconvolution_fwd_pd_t {
+        pd_t(engine_t *engine, const deconvolution_desc_t *adesc,
+                const primitive_attr_t *attr,
+                const deconvolution_fwd_pd_t *hint_fwd_pd)
+            : cpu_deconvolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+            , conv_pd_(nullptr) {}
+
+        pd_t(const pd_t &other)
+            : cpu_deconvolution_fwd_pd_t(other)
+            , conv_pd_(other.conv_pd_->clone())
+            , conv_supports_bias_(other.conv_supports_bias_) {}
+
+        ~pd_t() { delete conv_pd_; }
+
+        DECLARE_DECONVOLUTION_PD_T(
+                jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t<src_type,
+                        dst_type>);
+
+        status_t init_convolution() {
+
+            convolution_desc_t cd;
+            status_t status;
+
+            auto dd = this->desc();
+            status = conv_desc_init(&cd, prop_kind::forward_training,
+                    alg_kind::convolution_direct, &(dd->src_desc),
+                    &(dd->weights_desc), &(dd->bias_desc), &(dd->dst_desc),
+                    dd->strides, dd->dilates, dd->padding[0], dd->padding[1],
+                    dd->padding_kind);
+
+            if (status == status::success) {
+                status = mkldnn_primitive_desc::create<
+                        typename mkldnn::impl::cpu::
+                                jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<src_type,
+                                        dst_type>::pd_t>(&conv_pd_,
+                        (op_desc_t *)&cd, &(this->attr_), this->engine_,
+                        nullptr);
+            }
+
+            if (status == status::success) {
+                status = set_default_params();
+            }
+
+            return status;
+        };
+
+        virtual status_t init() override {
+            using namespace prop_kind;
+            status_t status;
+
+            assert(this->engine()->kind() == engine_kind::cpu);
+            bool ok = true && utils::one_of(this->desc()->prop_kind,
+                                      prop_kind::forward_training,
+                                      prop_kind::forward_inference)
+                    && this->desc()->alg_kind == alg_kind::deconvolution_direct
+                    && !this->has_zero_dim_memory()
+                    && this->desc()->src_desc.data_type == src_type
+                    && this->desc()->dst_desc.data_type == dst_type
+                    && this->desc()->weights_desc.data_type == data_type::s8
+                    && IMPLICATION(this->with_bias(),
+                               utils::one_of(this->desc()->bias_desc.data_type,
+                                           data_type::f32, data_type::s32,
+                                           data_type::s8, data_type::u8))
+                    && this->desc()->accum_data_type == data_type::s32;
+
+            if (ok)
+                status = init_convolution();
+            else
+                status = status::unimplemented;
+
+            return status;
+        }
+
+    protected:
+        virtual status_t set_default_params() {
+            using namespace memory_format;
+            auto conv_1x1_pd_ = static_cast<typename mkldnn::impl::cpu::
+                            jit_avx512_core_x8s8s32x_1x1_convolution_fwd_t<src_type,
+                                    dst_type>::pd_t *>(conv_pd_);
+            CHECK(this->src_pd_.set_format(
+                    conv_1x1_pd_->src_pd()->desc()->format));
+            CHECK(this->dst_pd_.set_format(
+                    conv_1x1_pd_->dst_pd()->desc()->format));
+            CHECK(this->weights_pd_.set_format(
+                    conv_1x1_pd_->weights_pd()->desc()->format));
+            if (this->with_bias())
+                CHECK(this->bias_pd_.set_format(
+                        conv_1x1_pd_->weights_pd(1)->desc()->format));
+            return status::success;
+        }
+
+        primitive_desc_t *conv_pd_;
+        bool conv_supports_bias_;
+    };
+
+    jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t(const pd_t *apd,
+            const input_vector &inputs, const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs), conv_p_(nullptr) {}
+
+    ~jit_avx512_core_x8s8s32x_1x1_deconvolution_fwd_t() {
+        delete this->conv_p_;
+    }
+
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
+        case prop_kind::forward_training:
+        case prop_kind::forward_inference: (conv_p_)->execute(e); break;
+        default: assert(!"invalid prop_kind");
+        }
+        e->set_state(event_t::ready);
+    }
+
+private:
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+    primitive_t *conv_p_;
+};
+
+}
+}
+}
+
+#endif /* CPU_JIT_AVX512_CORE_X8S8S32X_1X1_DECONVOLUTION_HPP */
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_conv_kernel.cpp
index 9acad2e69..054fe4e6e 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_conv_kernel.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_conv_kernel.cpp
@@ -15,9 +15,11 @@
 *******************************************************************************/
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
 #include "nstl.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
+
 #include "cpu_memory.hpp"
 
 #include "jit_avx512_core_x8s8s32x_conv_kernel.hpp"
@@ -29,77 +31,85 @@ namespace impl {
 namespace cpu {
 
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 using namespace Xbyak;
 
 namespace {
-void pick_loop_order(jit_conv_conf_t &jcp)
+void pick_loop_order(jit_conv_conf_t &jcp, int nthr)
 {
     jcp.loop_order = loop_cwgn;
-    if (jcp.ngroups > 1)
+    if (jcp.ngroups > 1) {
         jcp.loop_order = loop_ngcw;
-}
-}
-
-bool jit_avx512_core_x8s8s32x_fwd_kernel::maybe_relu(int position)
-{
-    using namespace primitive_kind;
-    const auto &p = attr_.post_ops_;
-
-    if (position == 0) {
-        /* relu before sum */
-        return false
-            || jcp.with_eltwise
-            || p.contain(eltwise, 0)
-            || (jcp.dst_dt == data_type::u8 && !p.contain(sum, 0));
-    } else if (position == 1) {
-        /* relu after sum */
-        const int sum_idx = p.contain(sum, 0)
-            ? 0 : (p.contain(sum, 1) ? 1 : -1);
-        if (sum_idx == -1)
-            return false;
-
-        return false
-            || p.contain(eltwise, sum_idx + 1)
-            || jcp.dst_dt == data_type::u8;
+        if (jcp.mb < nthr)
+            jcp.loop_order = loop_nhwcg;
     }
-
-    return false;
+}
 }
 
-void jit_avx512_core_x8s8s32x_fwd_kernel::prepare_output(int ur_w)
+template<typename Vmm>
+void _jit_avx512_core_x8s8s32x_fwd_kernel<Vmm>::prepare_output(int ur_w)
 {
-    for (int k = 0; k < jcp.nb_oc_blocking; k++)
+    int nb_oc_block
+            = jcp.is_depthwise ? jcp.nb_ch_blocking : jcp.nb_oc_blocking;
+    for (int k = 0; k < nb_oc_block; k++)
         for (int j = 0; j < ur_w; j++) {
-            Zmm zmm = zmm_out(j, k);
-            vpxord(zmm, zmm, zmm);
+            Vmm vmm = vmm_out(j, k);
+            vpxord(vmm, vmm, vmm);
         }
     if (jcp.signed_input) {
         xor_(reg_scratch, reg_scratch);
-        Reg8 _t8 = reg_scratch.cvt8();
-        mov(_t8, (int8_t)-128);
-        vpbroadcastb(zmm_shift, _t8);
+        if (jcp.is_depthwise && !jcp.is_fast_depthwise) {
+            Reg32 _t32 = reg_scratch.cvt32();
+            mov(_t32, (uint32_t)128);
+            vpbroadcastd(vmm_shift, _t32);
+        } else {
+            Reg8 _t8 = reg_scratch.cvt8();
+            mov(_t8, (int8_t)128);
+            vpbroadcastb(vmm_shift, _t8);
+        }
     }
+    if (jcp.is_fast_depthwise) {
+       vpxord(zmm_zero_blend, zmm_zero_blend, zmm_zero_blend);
+    }
+}
+
+template<typename Vmm>
+const Vmm _jit_avx512_core_x8s8s32x_fwd_kernel<Vmm>::
+    vmm_mask(const Vmm vmm_in, bool mask_flag, bool store) {
+    return vmm_in;
 }
 
-void jit_avx512_core_x8s8s32x_fwd_kernel::cvt2ps(data_type_t type_in,
-        zmm_t zmm_in, const Xbyak::Operand &op, bool mask_flag) {
-    zmm_t zmm = mask_flag ? zmm_in | ktail_mask | T_z : zmm_in;
+template<>
+const Zmm _jit_avx512_core_x8s8s32x_fwd_kernel<Zmm>::
+    vmm_mask(const Zmm zmm_in, bool mask_flag, bool store) {
+    return mask_flag ? (store ? zmm_in | ktail_mask : zmm_in | ktail_mask | T_z)
+                     : zmm_in;
+}
+
+
+template<typename Vmm>
+void _jit_avx512_core_x8s8s32x_fwd_kernel<Vmm>::cvt2ps(data_type_t type_in,
+        const Vmm vmm_in, const Operand &op, bool mask_flag) {
+    //const Vmm vmm = mask_flag ? vmm_in | ktail_mask | T_z : vmm_in;
+    const Vmm vmm = vmm_mask(vmm_in, mask_flag);
     switch (type_in) {
     case data_type::f32:
-    case data_type::s32: vmovups(zmm, op); break;
-    case data_type::s8: vpmovsxbd(zmm, op); break;
-    case data_type::u8: vpmovzxbd(zmm, op); break;
+    case data_type::s32: vmovups(vmm, op); break;
+    case data_type::s8: vpmovsxbd(vmm, op); break;
+    case data_type::u8: vpmovzxbd(vmm, op); break;
     default: assert(!"unsupported data type");
     }
     if (type_in != data_type::f32)
-        vcvtdq2ps(zmm_in, zmm_in);
+        vcvtdq2ps(vmm_in, vmm_in);
 }
 
-void jit_avx512_core_x8s8s32x_fwd_kernel::store_output(int ur_w,
-        int last_oc_block_flag)
-{
-    int nb_oc_block = jcp.nb_oc_blocking;
+template<typename Vmm>
+void _jit_avx512_core_x8s8s32x_fwd_kernel<Vmm>::store_output(
+        int ur_w, bool last_oc_block_flag) {
+    int nb_oc_block
+            = jcp.is_depthwise ? jcp.nb_ch_blocking : jcp.nb_oc_blocking;
+    int oc_block = jcp.is_depthwise ? jcp.ch_block : jcp.oc_block;
 
     mov(reg_bias, ptr[param1 + GET_OFF(bias)]);
     mov(reg_ptr_scales, ptr[param1 + GET_OFF(scales)]);
@@ -108,71 +118,122 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::store_output(int ur_w,
 
     const auto &p = attr_.post_ops_;
     const int sum_idx = p.find(primitive_kind::sum);
-    const float *p_sum_scale = (sum_idx != -1)
-            ? &p.entry_[sum_idx].sum.scale
-            : nullptr;
+    const float *p_sum_scale = nullptr;
+    if (sum_idx != -1) {
+        const auto &p_entry = p.entry_[sum_idx];
+        p_sum_scale = &p_entry.sum.scale;
+    }
+
     if (p_sum_scale && *p_sum_scale != 1.f)
         mov(reg_ptr_sum_scale, (size_t)p_sum_scale);
 
-    if (jcp. signed_input && jcp.ver != ver_vnni) {
+    if (jcp.signed_input && jcp.ver != ver_vnni) {
+        /* put 'wei_adj_scale = 0.5' for bias calculation */
         mov(reg_bias_alpha, float2int(jcp.wei_adj_scale));
         vmovq(xmm_bias_alpha(), reg_bias_alpha);
-        vbroadcastss(zmm_bias_alpha(), xmm_bias_alpha());
+        vbroadcastss(vmm_bias_alpha(), xmm_bias_alpha());
     }
 
     for (int k = 0; k < nb_oc_block; k++) {
-        const bool mask_flag = last_oc_block_flag == 1 && k == nb_oc_block - 1;
-        int scale_offset = jcp.is_oc_scale * (sizeof(float) * k * jcp.oc_block);
-        auto zmm_bias = zmm_tmp;
-        auto zmm_comp = zmm_shift;
+        const bool mask_flag = last_oc_block_flag && k == nb_oc_block - 1;
+        int scale_offset = jcp.is_oc_scale * (sizeof(float) * k * oc_block);
         if (jcp.with_bias) {
-            int bias_offset = jcp.typesize_bia * k * jcp.oc_block;
+            int bias_offset = jcp.typesize_bia * k * oc_block;
             auto bias_addr = EVEX_compress_addr(reg_bias, bias_offset);
 
-            cvt2ps(jcp.bia_dt, zmm_bias, bias_addr, mask_flag);
-            if (jcp. signed_input && jcp.ver != ver_vnni)
-                vmulps(zmm_bias, zmm_bias, zmm_bias_alpha());
+            cvt2ps(jcp.bia_dt, vmm_bias, bias_addr, mask_flag);
+            if (jcp.signed_input && jcp.ver != ver_vnni)
+                /* bias *= 0.5 */
+                vmulps(vmm_bias, vmm_bias, vmm_bias_alpha());
         }
         if (jcp.signed_input) {
-            int comp_offset = sizeof(int32_t) * k * jcp.oc_block;
+            int comp_offset = sizeof(int32_t) * k * oc_block;
             auto comp_addr = EVEX_compress_addr(reg_compensation, comp_offset);
 
-            cvt2ps(data_type::s32, zmm_comp, comp_addr, mask_flag);
+            cvt2ps(data_type::s32, vmm_comp, comp_addr, mask_flag);
         }
+        /* add to zmm_accum: compensation, bias and permute */
         for (int j = 0; j < ur_w; j++) {
-            int aux_output_offset
-                = jcp.typesize_out * (k * jcp.oc_block
-                        + j * jcp.oc_without_padding * jcp.ngroups);
-            auto addr = EVEX_compress_addr(reg_out, aux_output_offset);
-
-            Zmm zmm = zmm_out(j, k);
-            vcvtdq2ps(zmm, zmm);
+            Vmm vmm = vmm_out(j, k);
+            if (jcp.is_fast_depthwise)
+                vpermd(zmm_out(j, k), zmm_permute, zmm_out(j, k));
+            vcvtdq2ps(vmm, vmm);
             if (jcp.signed_input)
-                vaddps(zmm, zmm, zmm_comp);
+                vaddps(vmm, vmm, vmm_comp);
             if (jcp.with_bias)
-                vaddps(zmm, zmm, zmm_bias);
+                vaddps(vmm, vmm, vmm_bias);
 
-            zmm_t mask_zmm = mask_flag ? zmm | ktail_mask | T_z : zmm;
-            vmulps(mask_zmm, zmm,
+            const Vmm vmm_k = vmm_mask(vmm, mask_flag);
+            vmulps(vmm_k, vmm,
                     EVEX_compress_addr(reg_ptr_scales, scale_offset));
-            if (maybe_relu(0)) {
-                vpxord(zmm_zero, zmm_zero, zmm_zero);
-                vmaxps(zmm, zmm_zero, zmm);
+        }
+    }
+
+    int eltwise_inj_idx = 0;
+    int depthwise_inj_idx = 0;
+    for (int i = 0; i < p.len_; i++) {
+        auto& post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            if (ur_w == jcp.ur_w)
+               eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0, nb_oc_block * jcp.ur_w);
+            else
+                for (int k = 0; k < nb_oc_block; k++)
+                    eltwise_injectors[eltwise_inj_idx]->compute_vector_range(k * jcp.ur_w, k * jcp.ur_w + ur_w);
+
+            eltwise_inj_idx++;
+        } else if (post_op.is_depthwise()) {
+            mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+            mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+            add(reg_d_weights, ptr[param1 + GET_OFF(oc_off)]);
+            add(reg_d_bias, ptr[param1 + GET_OFF(oc_off)]);
+
+            for (int k = 0; k < nb_oc_block; k++) {
+                depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
+                        k * jcp.ur_w, k * jcp.ur_w + ur_w, reg_d_weights, reg_d_bias);
+
+                add(reg_d_weights, oc_block * sizeof(float));
+                add(reg_d_bias, oc_block * sizeof(float));
             }
-            if (p_sum_scale) { // post_op: sum
-                vpxord(zmm_zero, zmm_zero, zmm_zero);
-                auto zmm_prev_dst = zmm_zero;
-                cvt2ps(jcp.dst_dt, zmm_prev_dst, addr, mask_flag);
-                if (*p_sum_scale == 1.f)
-                    vaddps(zmm, zmm_prev_dst);
-                else
-                    vfmadd231ps(zmm, zmm_prev_dst, zword_b[reg_ptr_sum_scale]);
+
+            depthwise_inj_idx++;
+        } else if (post_op.is_sum(false)) {
+            for (int k = 0; k < nb_oc_block; k++) {
+                const bool mask_flag = last_oc_block_flag && k == nb_oc_block - 1;
+                for (int j = 0; j < ur_w; j++) {
+                    int aux_output_offset
+                            = jcp.typesize_out
+                            * (k * oc_block
+                                      + j * jcp.oc_without_padding * jcp.ngroups);
+                    auto addr = EVEX_compress_addr(reg_out, aux_output_offset);
+                    Zmm zmm = zmm_out(j, k);
+                    cvt2ps(jcp.dst_dt, vmm_prev_dst, addr, mask_flag);
+                    if (*p_sum_scale == 1.f)
+                        vaddps(zmm, vmm_prev_dst);
+                    else
+                        vfmadd231ps(zmm, vmm_prev_dst, zword_b[reg_ptr_sum_scale]);
+                }
             }
-            if (maybe_relu(1)) {
-                vpxord(zmm_zero, zmm_zero, zmm_zero);
-                vmaxps(zmm, zmm_zero, zmm);
+        }
+    }
+
+    /* write out register to output_addr */
+    for (int k = 0; k < nb_oc_block; k++) {
+        const bool mask_flag = last_oc_block_flag && k == nb_oc_block - 1;
+        for (int j = 0; j < ur_w; j++) {
+            Vmm vmm = vmm_out(j, k);
+            if (jcp.dst_dt == data_type::u8) {
+                vpxord(vmm_zero, vmm_zero, vmm_zero);
+                vmaxps(vmm, vmm_zero, vmm);
             }
+
             if (jcp.dst_dt != data_type::f32) {
+                /* Note: using Zmm for rounding in Xmm/Ymm kernel
+                   because there is no instruction to do rounding
+                   from Xmm/Ymm -> Xmm/Ymm.
+                   Embedded rounding is not supported for Xmm.
+                   TODO: maybe avoid Zmm if it helps performance.*/
+                Zmm zmm = zmm_out(j, k);
                 if (attr_.round_mode_ == round_mode::nearest)
                     vcvtps2dq(zmm | T_rn_sae, zmm);
                 else if (attr_.round_mode_ == round_mode::down)
@@ -183,26 +244,120 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::store_output(int ur_w,
         }
 
         for (int j = 0; j < ur_w; j++) {
-            int aux_output_offset = jcp.typesize_out * (k * jcp.oc_block
-                + j * jcp.oc_without_padding * jcp.ngroups);
+            int aux_output_offset = jcp.typesize_out
+                    * (k * oc_block + j * jcp.oc_without_padding * jcp.ngroups);
             auto addr = EVEX_compress_addr(reg_out, aux_output_offset);
 
-            Zmm zmm = zmm_out(j, k);
-            zmm_t r_zmm = mask_flag ? zmm | ktail_mask : zmm;
+            Vmm vmm = vmm_out(j, k);
+            const Vmm r_vmm = vmm_mask(vmm, mask_flag, true);
+
             switch (jcp.dst_dt) {
             case data_type::f32:
-            case data_type::s32: vmovups(addr, r_zmm); break;
-            case data_type::s8: vpmovsdb(addr, r_zmm); break;
-            case data_type::u8: vpmovusdb(addr, r_zmm); break;
+            case data_type::s32: vmovups(addr, r_vmm); break;
+            case data_type::s8: vpmovsdb(addr, r_vmm); break;
+            case data_type::u8: vpmovusdb(addr, r_vmm); break;
             default: assert(!"unknown dst_dt");
             }
         }
     }
+
 }
 
-void jit_avx512_core_x8s8s32x_fwd_kernel::compute_ker(int ur_w,
-    int pad_l, int pad_r, int last_ic_block_flag, bool h_padded)
-{
+template <typename Vmm>
+void _jit_avx512_core_x8s8s32x_fwd_kernel<Vmm>::compute_ker_dw(
+        int ur_w, int pad_l, int pad_r, ic_block_t last_ic_block_flag, bool h_padded) {
+    assert(!"invalid group blocking for depthwise convolution");
+}
+
+template <>
+void _jit_avx512_core_x8s8s32x_fwd_kernel<Zmm>::compute_ker_dw(
+        int ur_w, int pad_l, int pad_r, ic_block_t last_ic_block_flag, bool h_padded) {
+    auto input_offset = [=](int oi, int ii, int ki) {
+        return jcp.typesize_in
+                * ((ki * (jcp.dilate_w + 1) + oi * jcp.stride_w - pad_l)
+                                  * jcp.ngroups
+                          + ii * jcp.ch_block);
+    };
+
+    auto kernel_offset = [=](int ii, int ki) {
+        return jcp.typesize_in * ((ii * jcp.kh * jcp.kw + ki) * jcp.ch_block);
+    };
+
+    auto compute = [=](Zmm vreg_acc, Zmm vreg_wei,
+            Zmm vreg_src) {
+        // okay for depthwise since src is zero-extended
+        if (jcp.ver == ver_vnni) {
+            vpdpbusd(vreg_acc, vreg_src, vreg_wei);
+        } else {
+            // zmm_src is a tmp register that can be safely overwritten here
+            vpmaddwd(vreg_src, vreg_src, vreg_wei);
+            vpaddd(vreg_acc, vreg_acc, vreg_src);
+        }
+    };
+
+    for (int ki = 0; ki < jcp.kw; ki++) {
+        for (int ii = 0; ii < jcp.nb_ch_blocking; ii++) {
+            int aux_kernel_offset = kernel_offset(ii, ki);
+            if (jcp.is_fast_depthwise) {
+                vbroadcasti32x4(zmm_wei,
+                        EVEX_compress_addr(aux_reg_ker, aux_kernel_offset));
+                vpblendmb(zmm_wei | kblend_mask, zmm_zero_blend, zmm_wei);
+            } else {
+                vpmovsxbd(zmm_wei,
+                         EVEX_compress_addr(aux_reg_ker, aux_kernel_offset));
+            }
+            if (h_padded) {
+                if (jcp.ver == ver_vnni) {
+                    vpxord(zmm_src, zmm_src, zmm_src);
+                    vpaddb(zmm_src, zmm_src, vmm_shift);
+                }
+                for (int jj = 0; jj < ur_w; jj++) {
+                    if (jcp.ver != ver_vnni) {
+                        vpxord(zmm_src, zmm_src, zmm_src);
+                        vpaddb(zmm_src, zmm_src, vmm_shift);
+                    }
+                    compute(zmm_out(jj, ii), zmm_wei, zmm_src);
+                }
+            } else {
+                const bool mask_flag = last_ic_block_flag != no_last_block
+                    && ii == jcp.nb_ch_blocking - 1;
+                const Zmm r_zmm_src = mask_flag ? zmm_src | ktail_mask : zmm_src;
+                int jj_start = get_ow_start(ki, pad_l);
+                int jj_end = get_ow_end(ur_w, ki, pad_r);
+                int start_ = jcp.signed_input ? 0 : jj_start;
+                int end_ = jcp.signed_input ? ur_w : jj_end;
+                for (int jj = start_; jj < end_; jj++) {
+                    if (jj >= jj_start && jj < jj_end) {
+                        int aux_input_offset = input_offset(jj, ii, ki);
+                        if (jcp.is_fast_depthwise) {
+                           vbroadcasti32x4(zmm_src,
+                                    EVEX_compress_addr(aux_reg_inp, aux_input_offset));
+                        } else {
+                            vpmovzxbd(r_zmm_src,
+                                    EVEX_compress_addr(aux_reg_inp, aux_input_offset));
+                        }
+                        if (jcp.signed_input) {
+                            vpaddb(zmm_src, zmm_src, vmm_shift);
+                        }
+                    } else {
+                        if (jcp.signed_input) {
+                            vpxord(zmm_src, zmm_src, zmm_src);
+                            vpaddb(zmm_src, zmm_src, vmm_shift);
+                        }
+                    }
+                    compute(zmm_out(jj, ii), zmm_wei, zmm_src);
+                }
+            }
+        }
+    }
+}
+
+template<typename Vmm>
+void _jit_avx512_core_x8s8s32x_fwd_kernel<Vmm>::compute_ker(int ur_w, int pad_l,
+        int pad_r, ic_block_t last_ic_block_flag, bool h_padded) {
+    if (jcp.is_depthwise)
+        return compute_ker_dw(ur_w, pad_l, pad_r, last_ic_block_flag, h_padded);
+
     int kw = jcp.kw;
     int stride_w = jcp.stride_w;
     int ic_block = jcp.ic_block;
@@ -221,17 +376,13 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::compute_ker(int ur_w,
                 * ((ii * jcp.nb_ic * jcp.kh * jcp.kw + ki) * ch_block_all
                     + 4 * ic * oc_block);
     };
-    auto compute = [=](Zmm vreg_acc, Zmm vreg_wei, Zmm vreg_src) {
+    auto compute = [=](Vmm vreg_acc, Vmm vreg_wei, Vmm vreg_src) {
         if (jcp.ver == ver_vnni) {
-            // also okay for depthwise since src is zero-extended
             vpdpbusd(vreg_acc, vreg_src, vreg_wei);
-        } else if (jcp.is_depthwise) {
-            vpmulld(zmm_tmp, vreg_src, vreg_wei);
-            vpaddd(vreg_acc, vreg_acc, zmm_tmp);
         } else {
-            vpmaddubsw(zmm_tmp, vreg_src, vreg_wei);
-            vpmaddwd(zmm_tmp, zmm_tmp, zmm_one);
-            vpaddd(vreg_acc, vreg_acc, zmm_tmp);
+            vpmaddubsw(vmm_tmp, vreg_src, vreg_wei);
+            vpmaddwd(vmm_tmp, vmm_tmp, vmm_one);
+            vpaddd(vreg_acc, vreg_acc, vmm_tmp);
         }
     };
 
@@ -242,69 +393,61 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::compute_ker(int ur_w,
         int _start = (jcp.signed_input) ? 0 : jj_start;
         int _end = (jcp.signed_input) ? ur_w : jj_end;
         /* Skip the last loads of input if (ic%16)/4 < ic_block/4 */
-        int icb = jcp.is_depthwise
-            ? 1
-            : (last_ic_block_flag != no_last_block)
-                ? div_up((jcp.ic_without_padding % ic_block), 4)
-                : ic_block / 4;
+        int icb = (last_ic_block_flag != no_last_block)
+            ? div_up((jcp.ic_without_padding % ic_block), 4)
+            : ic_block / 4;
         for (int ic = 0; ic < icb; ic++) {
             if (h_padded == true) {
-                Zmm inp = zmm_inp(0,nb_oc_block);
+                /* fill padded area with shifted values */
+                Vmm inp = vmm_inp(0,nb_oc_block);
                 vpxord(inp, inp, inp);
-                vpsubb(inp, inp, zmm_shift);
+                vpaddb(inp, inp, vmm_shift);
             } else {
                 for (int jj = _start; jj < _end; jj++) {
                     int aux_input_offset = input_offset(jj, ic, ki);
                     if (jj >= jj_start && jj < jj_end) {
-                        if (jcp.is_depthwise) {
-                            vpmovzxbd(zmm_inp(jj, nb_oc_block),
-                                    EVEX_compress_addr(
-                                              aux_reg_inp, aux_input_offset));
-                        } else if (last_ic_block_flag == last_sp_block
+                        if (last_ic_block_flag == last_sp_block
                                 && tail_size != 0 && ic == icb - 1) {
-                            Xmm xmm_tmp = Xmm(zmm_inp(jj, nb_oc_block).getIdx());
+                            Xmm xmm_tmp = Xmm(vmm_inp(jj, nb_oc_block).getIdx());
                             for (int r = 0; r < tail_size; ++r)
                                 vpinsrb(xmm_tmp, xmm_tmp,
                                     ptr[aux_reg_inp + aux_input_offset + r], r);
-                            vpbroadcastd(zmm_inp(jj, nb_oc_block), xmm_tmp);
+                            vpbroadcastd(vmm_inp(jj, nb_oc_block), xmm_tmp);
                         } else {
-                            vpbroadcastd(zmm_inp(jj, nb_oc_block),
+                            vpbroadcastd(vmm_inp(jj, nb_oc_block),
                                     EVEX_compress_addr(
                                                  aux_reg_inp, aux_input_offset));
                         }
                         if (jcp.signed_input)
-                            vpsubb(zmm_inp(jj, nb_oc_block),
-                                   zmm_inp(jj, nb_oc_block), zmm_shift);
+                            vpaddb(vmm_inp(jj, nb_oc_block),
+                                   vmm_inp(jj, nb_oc_block), vmm_shift);
                     } else {
+                        /* fill padded area with shifted values */
                         if (jcp.signed_input) {
-                            Zmm inp = zmm_inp(jj, nb_oc_block);
+                            Vmm inp = vmm_inp(jj, nb_oc_block);
                             vpxord(inp, inp, inp);
-                            vpsubb(inp, inp, zmm_shift);
+                            vpaddb(inp, inp, vmm_shift);
                         }
                     }
                 }
             }
             for (int ii = 0; ii < nb_oc_block; ii++) {
                 int aux_kernel_offset = kernel_offset(ii, ic, ki);
-                if (jcp.is_depthwise)
-                    vpmovsxbd(
-                            zmm_wei, EVEX_compress_addr(aux_reg_ker,
-                                             aux_kernel_offset));
-                else
-                    vmovups(zmm_wei, EVEX_compress_addr(aux_reg_ker,
-                                             aux_kernel_offset));
+                vmovups(vmm_wei,
+                        EVEX_compress_addr(aux_reg_ker, aux_kernel_offset));
                 for (int jj = _start; jj < _end; jj++)  {
-                    Zmm inp = (h_padded == true)
-                        ? zmm_inp(0,nb_oc_block) : zmm_inp(jj, nb_oc_block);
-                    compute(zmm_out(jj, ii), zmm_wei, inp);
+                    Vmm inp = (h_padded == true)
+                        ? vmm_inp(0,nb_oc_block) : vmm_inp(jj, nb_oc_block);
+                    compute(vmm_out(jj, ii), vmm_wei, inp);
                 }
             }
         }
     }
 }
-void jit_avx512_core_x8s8s32x_fwd_kernel::kh_loop(int ur_w,
-    int pad_l, int pad_r, int last_ic_block_flag)
-{
+
+template<typename Vmm>
+void _jit_avx512_core_x8s8s32x_fwd_kernel<Vmm>::kh_loop(
+        int ur_w, int pad_l, int pad_r, ic_block_t last_ic_block_flag) {
     Label kh_label, skip_kh_loop;
     Label t_overflow_label, no_t_overflow_label,
           b_overflow_label, no_b_overflow_label;
@@ -318,7 +461,7 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::kh_loop(int ur_w,
     mov(aux_reg_ker, reg_ker);
 
     if (jcp.signed_input) {
-        mov(reg_overflow,  ptr[param1 + GET_OFF(t_overflow)]);
+        mov(reg_overflow, ptr[param1 + GET_OFF(t_overflow)]);
         cmp(reg_overflow, 0);
         je(no_t_overflow_label, T_NEAR);
         L(t_overflow_label); {
@@ -348,7 +491,7 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::kh_loop(int ur_w,
     }
     L(skip_kh_loop);
     if (jcp.signed_input) {
-        mov(reg_overflow,  ptr[param1 + GET_OFF(b_overflow)]);
+        mov(reg_overflow, ptr[param1 + GET_OFF(b_overflow)]);
         cmp(reg_overflow, 0);
         je(no_b_overflow_label, T_NEAR);
         L(b_overflow_label); {
@@ -363,7 +506,8 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::kh_loop(int ur_w,
     }
 }
 
-void jit_avx512_core_x8s8s32x_fwd_kernel::icb_loop(
+template<typename Vmm>
+void _jit_avx512_core_x8s8s32x_fwd_kernel<Vmm>::icb_loop(
         int ur_w, int pad_l, int pad_r, bool is_last_sp_block)
 {
     prepare_output(ur_w);
@@ -372,7 +516,7 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::icb_loop(
     Label icb_label;
     mov(reg_icb, jcp.nb_ic);
     L(icb_label);
-    if (jcp.ic_without_padding != jcp.ic) {
+    if (jcp.ngroups % jcp.ch_block != 0 || jcp.ic_without_padding != jcp.ic) {
         Label common_ker, end_ker;
 
         cmp(reg_icb, 1); // The last IC block
@@ -406,26 +550,46 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::icb_loop(
         Label common_store, end_store;
 
         if (jcp.is_depthwise)
-            cmp(reg_oc_blocks, jcp.nb_ch - 1);
+            cmp(reg_oc_blocks, jcp.nb_ch - jcp.nb_ch_blocking);
         else
             cmp(reg_oc_blocks, jcp.nb_oc - jcp.nb_oc_blocking);
 
         jne(common_store, T_NEAR);
 
-        store_output(ur_w, 1);
+        store_output(ur_w, true); // last oc block
         jmp(end_store, T_NEAR);
 
         L(common_store);
-        store_output(ur_w, 0);
+        store_output(ur_w, false);
 
         L(end_store);
     } else {
-        store_output(ur_w, 0);
+        store_output(ur_w, false);
     }
 }
 
-void jit_avx512_core_x8s8s32x_fwd_kernel::generate()
+template<typename Vmm>
+void _jit_avx512_core_x8s8s32x_fwd_kernel<Vmm>::generate()
 {
+    const auto &p = attr_.post_ops_;
+    for (int i = 0; i < p.len_; i++) {
+        auto &post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<avx512_common>(
+                    this,
+                    post_op.eltwise.alg,
+                    post_op.eltwise.alpha,
+                    post_op.eltwise.beta
+            ));
+        } else if (post_op.is_depthwise()) {
+            depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<avx512_common>(
+                    this,
+                    post_op.depthwise.alg
+            ));
+        }
+    }
+
+    Label permute_index_table;
     int inp_shift_pad = jcp.typesize_in * (jcp.ur_w * jcp.stride_w - jcp.l_pad)
         * jcp.ic_without_padding * jcp.ngroups;
     int inp_shift_pad_second_block = -1 * jcp.typesize_in * jcp.l_pad
@@ -437,10 +601,20 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::generate()
                         (jcp.ur_w * jcp.oc_without_padding * jcp.ngroups);
     preamble();
 
-    xor_(reg_scratch, reg_scratch);
-    Reg16 _t16 = reg_scratch.cvt16();
-    mov(_t16, 0x1);
-    vpbroadcastw(zmm_one, _t16);
+    if (jcp.is_depthwise) {
+        zmm_src = Zmm(jcp.max_regs_ur);
+        if (jcp.is_fast_depthwise) {
+            zmm_zero_blend = Zmm(jcp.max_regs_ur + 1);
+            zmm_permute = Zmm(jcp.max_regs_ur + 2);
+        }
+    }
+
+    if (!jcp.is_depthwise && jcp.ver != ver_vnni) {
+        xor_(reg_scratch, reg_scratch);
+        Reg16 _t16 = reg_scratch.cvt16();
+        mov(_t16, 0x1);
+        vpbroadcastw(vmm_one, _t16);
+    }
 
     mov(reg_inp, ptr[param1 + GET_OFF(src)]);
     mov(reg_out, ptr[param1 + GET_OFF(dst)]);
@@ -456,6 +630,14 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::generate()
         mov(regw_tmp, mask);
         kmovw(ktail_mask, regw_tmp);
     }
+    if (jcp.is_fast_depthwise) {
+        // prepare mask register for blending weights
+        mov(reg_scratch, 0x8888444422221111);
+        kmovq(kblend_mask, reg_scratch);
+        // load permute indices from data section
+        mov(reg_scratch, permute_index_table);
+        vmovdqu32(zmm_permute, ptr[reg_scratch]);
+    }
 
     int r_pad = nstl::max(0, (jcp.ow - 1) * jcp.stride_w
                     + (jcp.kw - 1) * (jcp.dilate_w + 1)
@@ -626,6 +808,18 @@ void jit_avx512_core_x8s8s32x_fwd_kernel::generate()
         L(end_label);
     }
     postamble();
+
+    for (auto& inj : eltwise_injectors)
+        inj->prepare_table();
+
+    if (jcp.is_fast_depthwise) {
+        align(64);
+        L(permute_index_table);
+        const uint32_t _idx[]
+                = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
+        for (size_t i = 0; i < sizeof(_idx) / sizeof(_idx[0]); ++i)
+            dd(_idx[i]);
+    }
 }
 
 bool jit_avx512_core_x8s8s32x_fwd_kernel::post_ops_ok(
@@ -634,27 +828,18 @@ bool jit_avx512_core_x8s8s32x_fwd_kernel::post_ops_ok(
     using namespace primitive_kind;
     const auto &p = attr.post_ops_;
 
-    auto is_relu = [&](int idx) {
-        return p.entry_[idx].kind == eltwise
-            && p.entry_[idx].eltwise.scale == 1.
-            && p.entry_[idx].eltwise.alg == alg_kind::eltwise_relu
-            && p.entry_[idx].eltwise.alpha == 0.;
-    };
+    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
+    auto is_depthwise = [&](int idx) { return p.entry_[idx].is_depthwise(); };
+    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(false); };
+    auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
-    case 0: return true;
-    case 1: return true
-                && IMPLICATION(jcp.with_eltwise, p.contain(sum, 0))
-                && IMPLICATION(!jcp.with_eltwise, is_relu(0) || p.contain(sum, 0));
-    case 2: return true
-                && IMPLICATION(jcp.with_eltwise, p.contain(sum, 0) && is_relu(1))
-                && IMPLICATION(!jcp.with_eltwise, false
-                        || (p.contain(sum, 0) && is_relu(1))
-                        || (p.contain(sum, 1) && is_relu(0)));
-    case 3: return true
-                && jcp.with_eltwise == false
-                && (is_relu(0) && p.contain(sum, 1) && is_relu(2));
-    default: return false;
+        case 0: return true;
+        case 1: return is_simple(0) || is_sum(0);
+        case 2: return (is_sum(0) && is_simple(1)) || (is_simple(0) && is_sum(1)) ||
+                       (is_simple(0) && is_simple(1));
+        case 3: return (is_simple(0) && is_sum(1) && is_simple(2));
+        default: return false;
     }
 
     return false;
@@ -664,7 +849,7 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
             const convolution_desc_t &cd, cpu_memory_t::pd_t &src_pd,
             cpu_memory_t::pd_t &weights_pd, cpu_memory_t::pd_t &dst_pd,
             cpu_memory_t::pd_t &bias_pd, const primitive_attr_t &attr,
-            int nthreads, bool with_relu, float relu_negative_slope)
+            int nthreads)
 {
     using namespace prop_kind;
 
@@ -702,22 +887,15 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
     jcp.stride_w = cd.strides[1];
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
+
     jcp.ur_h = 1;
 
     jcp.dilate_h = cd.dilates[0];
     jcp.dilate_w = cd.dilates[1];
 
-    if (!IMPLICATION(with_relu, relu_negative_slope == 0.))
-        return status::unimplemented;
-
     jcp.signed_input = (src_d.data_type() == data_type::s8) ? true : false;
     jcp.is_depthwise = true && with_groups && everyone_is(1, jcp.ic, jcp.oc);
 
-    if (jcp.is_depthwise && jcp.signed_input)
-        return status::unimplemented;
-
     if (jcp.is_depthwise) {
         jcp.ch_block = 16;
         jcp.ic_block = 1;
@@ -728,11 +906,17 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
         jcp.oc_block = 16;
 
         if (jcp.ngroups == 1) {
+            /* For non grouped convolutions, pad channels by 16 if needed */
             jcp.oc = rnd_up(jcp.oc, jcp.oc_block);
             jcp.ic = rnd_up(jcp.ic, jcp.ic_block);
+        } else if (jcp.ngroups != 1 && jcp.ic % jcp.ic_block != 0) {
+            /* For grouped convolutions, MKL-DNN doesn't support padding.
+               Use Ymm when channels per group is multiple of 8,
+               Xmm when channels per group is multiple of 4 */
+            jcp.ic_block = jcp.ic % 8 == 0 ? 8 : 4;
+            jcp.oc_block = jcp.ic_block;
         }
-
-        if (jcp.ic % jcp.ic_block != 0)
+        if (jcp.ic % jcp.ic_block !=0 || jcp.oc % jcp.oc_block != 0)
             return status::unimplemented;
     }
 
@@ -742,16 +926,30 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
     if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
 
-    jcp.ver = ver_avx512_core;
-    if (mayiuse(avx512_core_vnni))
-        jcp.ver = ver_vnni;
+    jcp.ver = mayiuse(avx512_core_vnni) ? ver_vnni : ver_avx512_core;
+    jcp.is_fast_depthwise = true && jcp.is_depthwise && jcp.ver == ver_vnni
+        && jcp.ngroups % jcp.ch_block == 0; // for groups not multiple of 16 would require byte masking for load from src
+    if (jcp.is_depthwise) {
+        jcp.max_regs_ur = jcp.is_fast_depthwise
+            ? (jcp.signed_input ? 27 : 28)
+            : (jcp.signed_input ? 29 : 30);
+    } else {
+        jcp.max_regs_ur = jcp.ver == ver_vnni ? 31 : 28;
+    }
 
-    const int regs = (jcp.ver == ver_vnni && !jcp.is_depthwise) ? 31 : 28;
+    memory_format_t w_format;
+    if (jcp.ic_block == 16 || jcp.ch_block == 16) {
+        w_format = with_groups
+            ? (jcp.is_depthwise ? (jcp.signed_input ? Goihw16g_s8s8 : Goihw16g)
+                    : (jcp.signed_input) ? gOIhw4i16o4i_s8s8 : gOIhw4i16o4i)
+            : (jcp.signed_input) ? OIhw4i16o4i_s8s8 : OIhw4i16o4i;
+     /* Non-grouped conv will always be padded by 16*/
+    } else if (with_groups && jcp.ic_block == 8) {
+        w_format = jcp.signed_input ? gOIhw2i8o4i_s8s8 : gOIhw2i8o4i;
+    } else {
+        w_format = jcp.signed_input ? gOIhw4o4i_s8s8 : gOIhw4o4i;
+    }
 
-    const auto w_format = with_groups
-        ? (jcp.is_depthwise ? Goihw16g
-                : (jcp.signed_input) ? gOIhw4i16o4i_s8s8 : gOIhw4i16o4i)
-        : (jcp.signed_input) ? OIhw4i16o4i_s8s8 : OIhw4i16o4i;
     if (weights_d.format() == any)
         CHECK(weights_pd.set_format(w_format));
     if (weights_d.format() != w_format)
@@ -785,20 +983,26 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
     jcp.nb_ic = jcp.ic / jcp.ic_block;
     jcp.nb_oc = jcp.oc / jcp.oc_block;
 
+    // Try to use 4 channel-groups at a time to avoid false sharing (depthwise)
+    jcp.nb_ch_blocking = jcp.is_depthwise
+        ? (jcp.nb_ch % 4 == 0 ? 4 : jcp.nb_ch % 2 == 0 ? 2 : 1)
+        : 1;
+
     // If OC blocking is incommensurate with the number of OC blocks (general
     // requirement for all convolutions), or if it results in an unrolling
     // factor smaller than the left padding (special requirement for SSD:fc6),
     // then search for a smaller OC blocking that satisfies both constraints.
     jcp.nb_oc_blocking = nstl::min(4, jcp.nb_oc);
     for (; jcp.nb_oc_blocking > 1; jcp.nb_oc_blocking--) {
-        int ur_w = regs / (jcp.nb_oc_blocking + 1);
+        int ur_w = jcp.max_regs_ur / (jcp.nb_oc_blocking + 1);
         if (jcp.nb_oc % jcp.nb_oc_blocking == 0
                 && (jcp.l_pad <= ur_w
                          && IMPLICATION(jcp.ow != 1, jcp.ow % ur_w != 1)))
             break;
     }
 
-    jcp.ur_w = regs / (jcp.nb_oc_blocking + 1);
+    jcp.ur_w = jcp.max_regs_ur
+            / (jcp.is_depthwise ? jcp.nb_ch_blocking : jcp.nb_oc_blocking + 1);
     if (jcp.ow < jcp.ur_w)
         jcp.ur_w = jcp.ow;
     jcp.ur_w_tail = jcp.ow % jcp.ur_w;
@@ -840,7 +1044,7 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
     if (r_pad_no_tail > jcp.ur_w)
         return status::unimplemented;
 
-    pick_loop_order(jcp);
+    pick_loop_order(jcp, nthreads);
 
     jcp.nb_ic_L2 = jcp.nb_ic;
 
@@ -854,6 +1058,18 @@ status_t jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(jit_conv_conf_t &jcp,
     return status::success;
 }
 
+void jit_avx512_core_x8s8s32x_fwd_kernel::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp,
+        const primitive_attr_t &attr) {
+    if (jcp.signed_input && jcp.ver != ver_vnni) {
+        size_t count = nstl::max(attr.output_scales_.count_, jcp.ic_block);
+        scratchpad.book(key_conv_adjusted_scales, sizeof(float) * count);
+    }
+}
+
+template struct  _jit_avx512_core_x8s8s32x_fwd_kernel<Zmm>;
+template struct  _jit_avx512_core_x8s8s32x_fwd_kernel<Ymm>;
+template struct  _jit_avx512_core_x8s8s32x_fwd_kernel<Xmm>;
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_conv_kernel.hpp
index d243004b7..0e8e7ca11 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_conv_kernel.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_conv_kernel.hpp
@@ -18,109 +18,134 @@
 #define CPU_JIT_AVX512_CORE_X8S8S32X_CONV_KERNEL_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+
 #include "cpu_memory.hpp"
 
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
+#include "jit_uni_eltwise.hpp"
+#include "jit_uni_depthwise.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-struct jit_avx512_core_x8s8s32x_fwd_kernel : public jit_generator {
-    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_x8s8s32x_conv_fwd_ker_t)
+template<typename Vmm>
+struct _jit_avx512_core_x8s8s32x_fwd_kernel : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(_jit_avx512_core_x8s8s32x_conv_fwd_ker_t)
 
     enum { STATE_FIRST_DST_LOAD = 0x1U };
 
-    jit_avx512_core_x8s8s32x_fwd_kernel(jit_conv_conf_t ajcp,
+    _jit_avx512_core_x8s8s32x_fwd_kernel(jit_conv_conf_t ajcp,
             const primitive_attr_t &attr) : jcp(ajcp), attr_(attr)
     {
         generate();
-        jit_ker = (void (*)(jit_conv_call_s *))getCode();
+        jit_ker_ = (void (*)(jit_conv_call_s *))getCode();
+    }
+
+    ~_jit_avx512_core_x8s8s32x_fwd_kernel() {
+        for (auto inj : eltwise_injectors)
+            delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
     }
-    static bool post_ops_ok(jit_conv_conf_t &jcp,
-            const primitive_attr_t &attr);
-    static status_t init_conf(jit_conv_conf_t &jcp,
-            const convolution_desc_t &cd,
-            cpu_memory_t::pd_t &src_pd,
-            cpu_memory_t::pd_t &weights_pd,
-            cpu_memory_t::pd_t &dst_pd,
-            cpu_memory_t::pd_t &bias_pd,
-            const primitive_attr_t &attr,
-            int nthreads,
-            bool with_relu = false,
-            float relu_negative_slope = 0.);
 
     jit_conv_conf_t jcp;
     const primitive_attr_t &attr_;
-    void (*jit_ker)(jit_conv_call_s *);
+    void (*jit_ker_)(jit_conv_call_s *);
 
 private:
-    using reg64_t = const Xbyak::Reg64;
-    using zmm_t = const Xbyak::Zmm;
-    using xmm_t = const Xbyak::Xmm;
+    nstl::vector<jit_uni_eltwise_injector_f32<avx512_common>*> eltwise_injectors;
+    nstl::vector<jit_uni_depthwise_injector_f32<avx512_common>*> depthwise_injectors;
+
     enum {
         typesize = sizeof(float),
         ker_reg_base_idx = 28,
+        ker_dw_reg_base_idx = 30,
     };
-    enum {
+    typedef enum {
         no_last_block,
         last_ic_block,
         last_sp_block,
-    };
-
-    reg64_t reg_inp = r8;
-    reg64_t reg_ker = r9;
-    reg64_t reg_out = r10;
-    reg64_t aux_reg_inp = r11;
-    reg64_t reg_ptr_sum_scale = r11;
-    reg64_t aux_reg_ker = r12;
-    reg64_t reg_owb = r12;
-
-    reg64_t reg_scratch = r14;
-    reg64_t reg_kj = rax;
-    reg64_t reg_overflow = rax;
-    reg64_t reg_ptr_scales = rax;
-    reg64_t reg_oi = rbx;
-    reg64_t reg_bias = rdx;
-    reg64_t reg_compensation = reg_scratch;
-    reg64_t reg_kh = abi_not_param1;
-    reg64_t param = abi_param1;
-    reg64_t reg_tmp = rbp;
-    reg64_t imm_addr64 = r15;
-    reg64_t reg_oc_blocks = rsi;
-    reg64_t reg_icb = reg_bias;
-    reg64_t reg_bias_alpha = reg_kh;
-
-    Xbyak::Opmask ktail_mask = Xbyak::Opmask(2);
-
-    zmm_t zmm_tmp = zmm_t(28);
-    zmm_t zmm_one = zmm_t(29);
-    zmm_t zmm_scales = zmm_t(30);
-    zmm_t zmm_shift = zmm_t(30);
-    zmm_t zmm_zero = zmm_t(31);
-    zmm_t zmm_wei = zmm_t(31);
-
-    zmm_t zmm_out(int i_ur, int i_oc) {
+    } ic_block_t;
+
+    /* data regs */
+    const Xbyak::Reg64 reg_ptr_scales = rax;
+    const Xbyak::Reg64 reg_inp = r8;
+    const Xbyak::Reg64 reg_ker = r9;
+    const Xbyak::Reg64 reg_out = r10;
+    const Xbyak::Reg64 aux_reg_inp = r11;
+    const Xbyak::Reg64 reg_ptr_sum_scale = r11;
+    const Xbyak::Reg64 aux_reg_ker = r12;
+    const Xbyak::Reg64 reg_compensation = r14;
+    /* counter regs */
+    const Xbyak::Reg64 reg_bias_alpha = abi_not_param1;
+    const Xbyak::Reg64 reg_oi = rbx;
+    const Xbyak::Reg64 reg_bias = rdx;
+    const Xbyak::Reg64 reg_oc_blocks = rsi;
+    const Xbyak::Reg64 reg_owb = aux_reg_ker;
+    const Xbyak::Reg64 reg_scratch = reg_compensation;
+    const Xbyak::Reg64 reg_kj = reg_ptr_scales;
+    const Xbyak::Reg64 reg_overflow = reg_ptr_scales;
+    const Xbyak::Reg64 reg_icb = reg_bias;
+
+    const Xbyak::Reg64 reg_d_weights = r15;
+    const Xbyak::Reg64 reg_d_bias = r13;
+
+    const Xbyak::Opmask ktail_mask = Xbyak::Opmask(2);
+    const Xbyak::Opmask kblend_mask = Xbyak::Opmask(3);
+
+    const Vmm vmm_wei = Vmm(31);
+    /* used during bias section of store_output */
+    const Vmm vmm_comp = Vmm(30); // only for signed input
+    const Vmm vmm_bias = Vmm(31);
+    /* used during post_op sum section of store_output */
+    const Vmm vmm_prev_dst = Vmm(31);
+    /* used during write-out section of store_output */
+    const Vmm vmm_zero = Vmm(31);
+
+    /* used in compute_ker (but set during prepare_output) */
+    const Vmm vmm_shift = vmm_comp; // only for signed input
+    /* used in compute_ker (but only for pre-VNNI machines) */
+    const Vmm vmm_tmp = Vmm(28); // not used for depthwise
+    const Vmm vmm_one = Vmm(29); // set at start of kernel, not used for depthwise.
+
+    /* registers use only for depthwise
+       groups are always blocked by 16(padded if needed),
+       hence use only Zmm registers */
+    const Xbyak::Zmm zmm_wei = Xbyak::Zmm(31);
+    Xbyak::Zmm zmm_src;
+    Xbyak::Zmm zmm_permute;
+    Xbyak::Zmm zmm_zero_blend; // used only for fast depthwise
+
+    Vmm vmm_out(int i_ur, int i_oc) {
         int idx = i_ur + i_oc * jcp.ur_w;
-        assert(idx < ker_reg_base_idx);
-        return zmm_t(idx);
+        assert(idx < (jcp.is_depthwise
+                    ? ker_dw_reg_base_idx : ker_reg_base_idx));
+        return Vmm(idx);
     }
-    xmm_t xmm_out(int i_ur, int i_oc) {
+    Xbyak::Zmm zmm_out(int i_ur, int i_oc) {
         int idx = i_ur + i_oc * jcp.ur_w;
-        assert(idx < ker_reg_base_idx);
-        return xmm_t(idx);
+        assert(idx < (jcp.is_depthwise
+                    ? ker_dw_reg_base_idx : ker_reg_base_idx));
+        return Xbyak::Zmm(idx);
     }
-    zmm_t zmm_inp(int i_ic, int nb_x_blocking) {
+    Vmm vmm_inp(int i_ic, int nb_x_blocking) {
         int idx = i_ic + nb_x_blocking * jcp.ur_w;
         assert(idx < 31);
-        return zmm_t(idx);
+        return Vmm(idx);
     }
-    zmm_t zmm_bias_alpha() {
-        return zmm_t(jcp.nb_oc_blocking * jcp.ur_w);
+    Vmm vmm_bias_alpha() {
+        int nb_c_block = jcp.is_depthwise ? jcp.nb_ch_blocking : jcp.nb_oc_blocking;
+        return Vmm(nb_c_block * jcp.ur_w);
     }
-    xmm_t xmm_bias_alpha() {
-        return xmm_t(jcp.nb_oc_blocking * jcp.ur_w);
+    Xbyak::Xmm xmm_bias_alpha() {
+        int nb_c_block = jcp.is_depthwise ? jcp.nb_ch_blocking : jcp.nb_oc_blocking;
+        return Xbyak::Xmm(nb_c_block * jcp.ur_w);
     }
     int get_ow_start(int ki, int pad_l) {
         return nstl::max(0,
@@ -132,17 +157,79 @@ private:
                                                            * (jcp.dilate_w + 1),
                                            jcp.stride_w));
     }
-    bool maybe_relu(int position);
+
     void prepare_output(int ur_w);
-    void store_output(int ur_w, int last_oc_block_flag);
-    void compute_ker(int ur_w, int pad_l, int pad_r, int last_ic_block_flag,
-                                                        bool h_padded = false);
-    void kh_loop(int ur_w, int pad_l, int pad_r, int last_ic_block_flag);
+    void store_output(int ur_w, bool last_oc_block_flag);
+    void compute_ker_dw(
+            int ur_w, int pad_l, int pad_r, ic_block_t last_ic_block_flag, bool h_padded);
+    void compute_ker(int ur_w, int pad_l, int pad_r,
+            ic_block_t last_ic_block_flag, bool h_padded = false);
+    void kh_loop(int ur_w, int pad_l, int pad_r, ic_block_t last_ic_block_flag);
     void icb_loop(
             int ur_w, int pad_l, int pad_r, bool is_last_spatial_block);
     void generate();
-    void cvt2ps(data_type_t type_in, zmm_t zmm_in, const Xbyak::Operand &op,
+    void cvt2ps(data_type_t type_in, Vmm ymm_in, const Xbyak::Operand &op,
         bool mask_flag);
+    const Vmm vmm_mask(const Vmm vmm_in, bool mask_flag, bool store = false);
+};
+
+struct jit_avx512_core_x8s8s32x_fwd_kernel {
+
+    jit_avx512_core_x8s8s32x_fwd_kernel(jit_conv_conf_t ajcp,
+            const primitive_attr_t &attr) :
+        jit_ker(nullptr),
+        zmm_kernel_(nullptr),
+        ymm_kernel_(nullptr),
+        xmm_kernel_(nullptr) {
+            int ch_block = ajcp.is_depthwise ? ajcp.ch_block : ajcp.ic_block;
+            switch (ch_block) {
+                case 16:
+                    zmm_kernel_ =
+                        new _jit_avx512_core_x8s8s32x_fwd_kernel<Xbyak::Zmm>(
+                                ajcp, attr);
+                    jit_ker = zmm_kernel_->jit_ker_;
+                    return;
+                case 8:
+                    ymm_kernel_ =
+                        new _jit_avx512_core_x8s8s32x_fwd_kernel<Xbyak::Ymm>(
+                                ajcp, attr);
+                    jit_ker = ymm_kernel_->jit_ker_;
+                    return;
+                case 4:
+                    xmm_kernel_ =
+                        new _jit_avx512_core_x8s8s32x_fwd_kernel<Xbyak::Xmm>(
+                                ajcp, attr);
+                    jit_ker = xmm_kernel_->jit_ker_;
+                    return;
+                default:
+                    assert(!"invalid channel blocking");
+            }
+    }
+
+    ~jit_avx512_core_x8s8s32x_fwd_kernel() {
+        delete xmm_kernel_;
+        delete ymm_kernel_;
+        delete zmm_kernel_;
+    }
+
+    static bool post_ops_ok(jit_conv_conf_t &jcp,
+            const primitive_attr_t &attr);
+
+    static status_t init_conf(jit_conv_conf_t &jcp,
+            const convolution_desc_t &cd,
+            cpu_memory_t::pd_t &src_pd,
+            cpu_memory_t::pd_t &weights_pd,
+            cpu_memory_t::pd_t &dst_pd,
+            cpu_memory_t::pd_t &bias_pd,
+            const primitive_attr_t &attr,
+            int nthreads);
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp, const primitive_attr_t &attr);
+
+    void (*jit_ker)(jit_conv_call_s *);
+    _jit_avx512_core_x8s8s32x_fwd_kernel<Xbyak::Zmm> *zmm_kernel_;
+    _jit_avx512_core_x8s8s32x_fwd_kernel<Xbyak::Ymm> *ymm_kernel_;
+    _jit_avx512_core_x8s8s32x_fwd_kernel<Xbyak::Xmm> *xmm_kernel_;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_convolution.cpp
index 8d1297f55..e5cdcb1f9 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_convolution.cpp
@@ -14,7 +14,6 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include "mkldnn_types.h"
 #include "c_types_map.hpp"
 #include "mkldnn_thread.hpp"
 #include "type_helpers.hpp"
@@ -28,6 +27,7 @@ namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 using namespace nstl;
@@ -35,37 +35,52 @@ using namespace nstl;
 using jit_conv_ker_t = void (*)(jit_conv_call_s *);
 
 #define wht_blk_off(d, g, ...) \
-        (conf_.with_groups() \
+        (pd()->with_groups() \
          ? (d).blk_off((g), __VA_ARGS__) \
          : (d).blk_off(__VA_ARGS__))
 
-template <bool with_relu, data_type_t src_type, data_type_t dst_type>
-void _jit_avx512_core_x8s8s32x_convolution_fwd_t<with_relu, src_type, dst_type>::
-execute_forward()
+template <data_type_t src_type, data_type_t dst_type>
+void jit_avx512_core_x8s8s32x_convolution_fwd_t<src_type, dst_type>::
+execute_forward() const
 {
     auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const char *>(this->input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
-    const size_t bia_dt_size = conf_.with_bias()
-        ? types::data_type_size(conf_.cdesc()->bias_desc.data_type) : 0;
+    const size_t bia_dt_size = pd()->with_bias()
+        ? types::data_type_size(pd()->desc()->bias_desc.data_type) : 0;
 
-    const auto &jcp = kernel_->jcp;
+    const auto &jcp = pd()->jcp_;
     assert(jcp.nb_oc % jcp.nb_oc_blocking == 0);
+    assert(jcp.nb_ch % jcp.nb_ch_blocking == 0);
+
+    const float *oscales = pd()->attr()->output_scales_.scales_;
+    if (jcp.signed_input && jcp.ver != ver_vnni) {
+        auto local_scales = scratchpad().template get<float>(
+                key_conv_adjusted_scales);
+        size_t count = pd()->attr()->output_scales_.count_;
+        float factor = 1.f / pd()->jcp_.wei_adj_scale;
+        if (count == 1) {
+            utils::array_set(local_scales, oscales[0] * factor, 16);
+        } else {
+            for (size_t c = 0; c < count; c++)
+                local_scales[c] = oscales[c] * factor;
+        }
+        oscales = local_scales;
+    }
 
-    size_t offset = (size_t)jcp.ngroups * jcp.oc * jcp.ic * jcp.kh * jcp.kw;
+    size_t offset = weights_d.size() - weights_d.additional_buffer_size();
     auto w = const_cast<wei_data_t *>(weights);
     int32_t* compensation = (jcp.signed_input)
                                 ? reinterpret_cast<int32_t *>(&w[offset]) : 0;
-    const auto &oscales = conf_.attr()->output_scales_;
     int oc_chunks = jcp.nb_oc / jcp.nb_oc_blocking;
-    int nb_groups = jcp.nb_ch;
+    int nb_groups = jcp.nb_ch / jcp.nb_ch_blocking;
     int group_block = jcp.ch_block;
     int work_amount = jcp.mb * nb_groups * oc_chunks * jcp.oh * jcp.nb_ow;
 
@@ -80,20 +95,24 @@ execute_forward()
         size_t dst_h_stride = dst_d.blk_off(0, 0, 1);
         size_t wht_h_stride = wht_blk_off(weights_d, 0, 0, 0, 1);
 
-        int n{ 0 }, gb{ 0 }, occ{ 0 }, oh_s{ 0 }, owb{ 0 };
+        int n{ 0 }, gg{ 0 }, occ{ 0 }, oh_s{ 0 }, owb{ 0 };
         if (jcp.loop_order == loop_cwgn)
-            nd_iterator_init(start, occ, oc_chunks, owb, jcp.nb_ow, gb,
+            nd_iterator_init(start, occ, oc_chunks, owb, jcp.nb_ow, gg,
                     nb_groups, n, jcp.mb, oh_s, jcp.oh);
         else if (jcp.loop_order == loop_gncw)
-            nd_iterator_init(start, gb, nb_groups, n, jcp.mb, occ, oc_chunks,
+            nd_iterator_init(start, gg, nb_groups, n, jcp.mb, occ, oc_chunks,
                     owb, jcp.nb_ow, oh_s, jcp.oh);
         else if (jcp.loop_order == loop_ngcw)
-            nd_iterator_init(start, n, jcp.mb, gb, nb_groups, occ, oc_chunks,
+            nd_iterator_init(start, n, jcp.mb, gg, nb_groups, occ, oc_chunks,
                     owb, jcp.nb_ow, oh_s, jcp.oh);
+        else if (jcp.loop_order == loop_nhwcg)
+            nd_iterator_init(start, n, jcp.mb, oh_s, jcp.oh, owb, jcp.nb_ow,
+                    occ, oc_chunks, gg, nb_groups);
         else
             assert(!"unsupported loop order");
         while (start < end) {
             int ocb = occ * jcp.nb_oc_blocking;
+            int gb = gg * jcp.nb_ch_blocking;
             int g = gb * group_block;
             int g_oc = (g * jcp.nb_oc + ocb) * jcp.oc_block;
 
@@ -102,6 +121,7 @@ execute_forward()
             int work_rem = end - start;
             int ih_s = -jcp.t_pad + oh_s * jcp.stride_h;
             int oh_e = oh_s + work_rem > jcp.oh ? jcp.oh : oh_s + work_rem;
+            if (jcp.loop_order == loop_nhwcg) oh_e = oh_s + 1; // step instead
             int ow_s = owb * jcp.ow_block;
             int iw_s = ow_s * jcp.stride_w;
 
@@ -115,9 +135,7 @@ execute_forward()
             auto src_w = src + src_d.blk_off(n, g_ic, ih_s, iw_s);
             auto wht_w = weights + wht_blk_off(weights_d, gb, ocb, 0);
 
-            auto scales = (jcp.signed_input && jcp.ver != ver_vnni)
-                ? &local_scales_[jcp.is_oc_scale * g_oc]
-                : &oscales.scales_[jcp.is_oc_scale * g_oc];
+            auto scales = &oscales[jcp.is_oc_scale * g_oc];
 
             for (int oj = oh_s, ij = ih_s; oj < oh_e;
                 ++oj, ij += jcp.stride_h) {
@@ -144,57 +162,48 @@ execute_forward()
                 p.b_overflow = i_b_overflow;
                 p.owb = owb;
 
+                p.oc_off = g_oc * sizeof(float);
+
                 kernel_->jit_ker(&p);
 
                 src_w += src_h_stride * jcp.stride_h;
                 dst_w += dst_h_stride;
             }
             if (jcp.loop_order == loop_cwgn)
-                nd_iterator_jump(start, end, occ, oc_chunks, owb, jcp.nb_ow, gb,
+                nd_iterator_jump(start, end, occ, oc_chunks, owb, jcp.nb_ow, gg,
                         nb_groups, n, jcp.mb, oh_s, jcp.oh);
             else if (jcp.loop_order == loop_gncw)
-                nd_iterator_jump(start, end, gb, nb_groups, n, jcp.mb, occ,
+                nd_iterator_jump(start, end, gg, nb_groups, n, jcp.mb, occ,
                         oc_chunks, owb, jcp.nb_ow, oh_s, jcp.oh);
             else if (jcp.loop_order == loop_ngcw)
-                nd_iterator_jump(start, end, n, jcp.mb, gb, nb_groups, occ,
+                nd_iterator_jump(start, end, n, jcp.mb, gg, nb_groups, occ,
                         oc_chunks, owb, jcp.nb_ow, oh_s, jcp.oh);
+            else if (jcp.loop_order == loop_nhwcg) {
+                ++start;
+                nd_iterator_step(n, jcp.mb, oh_s, jcp.oh, owb, jcp.nb_ow, occ,
+                        oc_chunks, gg, nb_groups);
+            }
             else
                 assert(!"unsupported loop order");
         }
     });
 }
 
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<false,
+template struct jit_avx512_core_x8s8s32x_convolution_fwd_t<
                                                 data_type::s8, data_type::u8>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<true,
-                                                data_type::s8, data_type::u8>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<false,
-                                                data_type::u8, data_type::u8>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<true,
+template struct jit_avx512_core_x8s8s32x_convolution_fwd_t<
                                                 data_type::u8, data_type::u8>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<false,
-                                                data_type::s8, data_type::s8>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<true,
+template struct jit_avx512_core_x8s8s32x_convolution_fwd_t<
                                                 data_type::s8, data_type::s8>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<false,
+template struct jit_avx512_core_x8s8s32x_convolution_fwd_t<
                                                 data_type::u8, data_type::s8>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<true,
-                                                data_type::u8, data_type::s8>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<false,
-                                                data_type::s8, data_type::s32>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<true,
+template struct jit_avx512_core_x8s8s32x_convolution_fwd_t<
                                                 data_type::s8, data_type::s32>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<false,
-                                                data_type::u8, data_type::s32>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<true,
+template struct jit_avx512_core_x8s8s32x_convolution_fwd_t<
                                                 data_type::u8, data_type::s32>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<false,
+template struct jit_avx512_core_x8s8s32x_convolution_fwd_t<
                                                 data_type::s8, data_type::f32>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<true,
-                                                data_type::s8, data_type::f32>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<false,
-                                                data_type::u8, data_type::f32>;
-template struct _jit_avx512_core_x8s8s32x_convolution_fwd_t<true,
+template struct jit_avx512_core_x8s8s32x_convolution_fwd_t<
                                                 data_type::u8, data_type::f32>;
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_convolution.hpp
index 6ac59f996..1afcda647 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_convolution.hpp
@@ -18,11 +18,11 @@
 #define CPU_JIT_AVX512_CORE_X8S8S32X_CONVOLUTION_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+#include "mkldnn_thread.hpp"
+#include "utils.hpp"
+
 #include "cpu_convolution_pd.hpp"
-#include "cpu_engine.hpp"
-#include "jit_transpose_src_utils.hpp"
-#include "cpu_reducer.hpp"
-#include "cpu_barrier.hpp"
 
 #include "jit_avx512_core_x8s8s32x_conv_kernel.hpp"
 
@@ -30,99 +30,85 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu, impl::data_type_t src_type, impl::data_type_t dst_type>
-struct _jit_avx512_core_x8s8s32x_convolution_fwd_t : public cpu_primitive_t {
-    struct pd_t : public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine, const typename pd_t::base_desc_t *adesc,
+template <impl::data_type_t src_type, impl::data_type_t dst_type>
+struct jit_avx512_core_x8s8s32x_convolution_fwd_t : public cpu_primitive_t {
+    struct pd_t : public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
             , jcp_()
-        {
-        }
+        {}
+
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_int8:", avx512_core, ""),
-                _jit_avx512_core_x8s8s32x_convolution_fwd_t<with_relu, src_type,
-                dst_type>);
+                jit_avx512_core_x8s8s32x_convolution_fwd_t<src_type, dst_type>);
 
-        virtual status_t init() override
-        {
+        virtual status_t init() override {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
+
             bool ok = true
-                    && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                    && utils::one_of(this->desc()->prop_kind, forward_training,
                                forward_inference)
-                    && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                    && utils::one_of(this->desc()->alg_kind,
+                            alg_kind::convolution_auto,
+                            alg_kind::convolution_direct)
                     && !this->has_zero_dim_memory()
-                    && this->cdesc_().src_desc.data_type == src_type
-                    && this->cdesc_().dst_desc.data_type == dst_type
+                    && this->desc()->src_desc.data_type == src_type
+                    && this->desc()->dst_desc.data_type == dst_type
                     && IMPLICATION(this->with_bias(), utils::one_of(
-                            this->cdesc_().bias_desc.data_type, data_type::f32,
+                            this->desc()->bias_desc.data_type, data_type::f32,
                             data_type::s32, data_type::s8, data_type::u8))
-                    && this->cdesc_().accum_data_type == data_type::s32;
-            if (!ok)
-                return status::unimplemented;
+                    && this->desc()->accum_data_type == data_type::s32;
+            if (!ok) return status::unimplemented;
 
-            return jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(
-                    jcp_, this->cdesc_(), this->src_pd_, this->weights_pd_,
+            status_t status = jit_avx512_core_x8s8s32x_fwd_kernel::init_conf(
+                    jcp_, *this->desc(), this->src_pd_, this->weights_pd_,
                     this->dst_pd_,this->bias_pd_, *this->attr(),
-                    mkldnn_get_max_threads(),
-                    with_relu, this->negative_slope());
+                    mkldnn_get_max_threads());
+            if (status != status::success) return status;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx512_core_x8s8s32x_fwd_kernel::init_scratchpad(scratchpad,
+                    jcp_, *this->attr());
+
+            if (status == status::success
+                    && this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
+            return status;
         }
 
         jit_conv_conf_t jcp_;
     };
 
-    _jit_avx512_core_x8s8s32x_convolution_fwd_t(const pd_t *pd,
+    jit_avx512_core_x8s8s32x_convolution_fwd_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , local_scales_(nullptr)
+        : cpu_primitive_t(apd, inputs, outputs)
     {
-        kernel_ = new jit_avx512_core_x8s8s32x_fwd_kernel(conf_.jcp_,
-                    *conf_.attr());
-        if (conf_.jcp_.signed_input && conf_.jcp_.ver != ver_vnni) {
-            size_t scales_size = (conf_.attr()->output_scales_.count_ == 1)
-                ? 16
-                : conf_.attr()->output_scales_.count_;
-            local_scales_ = (float *)malloc(sizeof(float) * scales_size, 64);
-            for (size_t i = 0; i < scales_size; i++) {
-                local_scales_[i] = conf_.attr()->output_scales_.scales_[i] *
-                                        (1.f / conf_.jcp_.wei_adj_scale);
-            }
-        }
+        kernel_ = new jit_avx512_core_x8s8s32x_fwd_kernel(pd()->jcp_,
+                    *pd()->attr());
     }
 
-    ~_jit_avx512_core_x8s8s32x_convolution_fwd_t() {
-        delete kernel_;
-        if (local_scales_) free(local_scales_);
-    };
+    ~jit_avx512_core_x8s8s32x_convolution_fwd_t() { delete kernel_; }
 
     typedef typename prec_traits<src_type>::type src_data_t;
     typedef typename prec_traits<data_type::s8>::type wei_data_t;
     typedef typename prec_traits<dst_type>::type dst_data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     jit_avx512_core_x8s8s32x_fwd_kernel *kernel_;
-    float *local_scales_;
 };
 
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_avx512_core_x8s8s32x_convolution_fwd_t =
-    _jit_avx512_core_x8s8s32x_convolution_fwd_t<false, src_type, dst_type>;
-
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_avx512_core_x8s8s32x_convolution_relu_t =
-    _jit_avx512_core_x8s8s32x_convolution_fwd_t<true, src_type, dst_type>;
-
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_deconvolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_deconvolution.cpp
new file mode 100644
index 000000000..5c69879e1
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_deconvolution.cpp
@@ -0,0 +1,928 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "jit_avx512_core_x8s8s32x_deconvolution.hpp"
+
+#define GET_OFF(field) offsetof(jit_deconv_call_s, field)
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::status;
+using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
+using namespace mkldnn::impl::utils;
+using namespace Xbyak;
+
+using namespace nstl;
+
+#define wht_blk_off(d, g, ...)                             \
+    (pd()->with_groups() ? (d).blk_off((g), __VA_ARGS__) : \
+                           (d).blk_off(__VA_ARGS__))
+
+status_t jit_avx512_core_x8s8s32x_deconv_fwd_kernel::init_conf(
+        jit_conv_conf_t &jcp, const deconvolution_desc_t &cd,
+        cpu_memory_t::pd_t &src_pd, cpu_memory_t::pd_t &weights_pd,
+        cpu_memory_t::pd_t &dst_pd, const bool with_bias,
+        cpu_memory_t::pd_t &bias_pd, const primitive_attr_t &attr) {
+    const memory_desc_wrapper src_d(&src_pd);
+    const memory_desc_wrapper dst_d(&dst_pd);
+    const memory_desc_wrapper weights_d(&weights_pd);
+    const memory_desc_wrapper bias_d(&bias_pd);
+
+    if (!(mayiuse(avx512_core)
+                && one_of(src_d.data_type(), data_type::u8, data_type::s8)
+                && weights_d.data_type() == data_type::s8
+                && one_of(dst_d.data_type(), data_type::f32, data_type::s32,
+                           data_type::s8, data_type::u8)))
+        return status::unimplemented;
+
+    jcp = zero<decltype(jcp)>();
+
+    const bool with_groups = weights_d.ndims() == src_d.ndims() + 1;
+    jcp.signed_input = src_d.data_type() == data_type::s8;
+
+    jcp.ngroups = with_groups ? weights_d.dims()[0] : 1;
+    jcp.oc = dst_d.dims()[1] / jcp.ngroups;
+    jcp.ic = src_d.dims()[1] / jcp.ngroups;
+    jcp.oc_without_padding = dst_d.dims()[1] / jcp.ngroups;
+    jcp.ic_without_padding = src_d.dims()[1] / jcp.ngroups;
+    jcp.is_depthwise = true && with_groups
+            && utils::everyone_is(1, jcp.ic_without_padding,
+                               jcp.oc_without_padding);
+
+    /* TODO: future work, on hold until depthwise specialized kernel is
+     * implemented. */
+    if (jcp.is_depthwise && jcp.signed_input)
+        return status::unimplemented;
+
+    const auto w_format = jcp.is_depthwise ? Goihw16g : with_groups ?
+            (jcp.signed_input ? gOIhw4i16o4i_s8s8 : gOIhw4i16o4i) :
+            (jcp.signed_input ? OIhw4i16o4i_s8s8 : OIhw4i16o4i);
+
+    if (dst_d.format() == any)
+        CHECK(dst_pd.set_format(nhwc));
+    if (dst_d.format() != nhwc)
+        return status::unimplemented;
+    if (src_d.format() == any)
+        CHECK(src_pd.set_format(nhwc));
+    if (src_d.format() != nhwc)
+        return status::unimplemented;
+    if (weights_d.format() == any)
+        CHECK(weights_pd.set_format(w_format));
+    if (weights_d.format() != w_format)
+        return status::unimplemented;
+
+    jcp.with_bias = with_bias;
+    if (jcp.with_bias) {
+        if (bias_d.format() == any)
+            CHECK(bias_pd.set_format(x));
+        if (bias_d.format() != x)
+            return status::unimplemented;
+    }
+
+    jcp.ndims = dst_d.ndims();
+    jcp.prop_kind = cd.prop_kind;
+    jcp.mb = src_d.dims()[0];
+    jcp.ih = src_d.dims()[2];
+    jcp.iw = src_d.dims()[3];
+    jcp.oh = dst_d.dims()[2];
+    jcp.ow = dst_d.dims()[3];
+    jcp.kh = weights_d.dims()[with_groups + 2];
+    jcp.kw = weights_d.dims()[with_groups + 3];
+    jcp.t_pad = cd.padding[0][0];
+    jcp.l_pad = cd.padding[0][1];
+    jcp.stride_h = cd.strides[0];
+    jcp.stride_w = cd.strides[1];
+    jcp.src_fmt = src_d.format();
+
+    if (jcp.is_depthwise) {
+        jcp.ch_block = 16;
+        jcp.oc_block = 1;
+        jcp.ic_block = 1;
+    } else {
+        jcp.ch_block = 1;
+        jcp.oc_block = 16;
+        jcp.ic_block = 16;
+
+        if (jcp.ngroups == 1) {
+            jcp.oc = utils::rnd_up(jcp.oc_without_padding, jcp.oc_block);
+            jcp.ic = utils::rnd_up(jcp.ic_without_padding, jcp.ic_block);
+        }
+        if (jcp.ic % jcp.ic_block != 0)
+            return status::unimplemented;
+    }
+
+    jcp.dilate_h = cd.dilates[0];
+    jcp.dilate_w = cd.dilates[1];
+
+    if (!IMPLICATION(jcp.dilate_h, jcp.stride_h == 1)
+            || !IMPLICATION(jcp.dilate_w, jcp.stride_w == 1))
+        return status::unimplemented;
+
+    /* padding: bottom and right */
+    jcp.b_pad = (jcp.ih - 1) * jcp.stride_h + (jcp.kh - 1) * (jcp.dilate_h + 1)
+            - (jcp.oh + jcp.t_pad - 1);
+    jcp.r_pad = (jcp.iw - 1) * jcp.stride_w + (jcp.kw - 1) * (jcp.dilate_w + 1)
+            - (jcp.ow + jcp.l_pad - 1);
+
+    if (!post_ops_ok(jcp, attr))
+        return status::unimplemented;
+
+    const auto &p = attr.post_ops_;
+    const int eltwise_ind = p.find(primitive_kind::eltwise);
+    jcp.with_eltwise = eltwise_ind != -1;
+    if (jcp.with_eltwise)
+        jcp.eltwise = p.entry_[eltwise_ind].eltwise;
+
+    jcp.ver = ver_avx512_core;
+    if (mayiuse(avx512_core_vnni))
+        jcp.ver = ver_vnni;
+    const auto &oscales = attr.output_scales_;
+    jcp.is_oc_scale = oscales.mask_ == 1 << 1;
+
+    assert(IMPLICATION(!jcp.is_oc_scale, oscales.mask_ == 0));
+
+    jcp.dst_dt = dst_d.data_type();
+    jcp.bia_dt = jcp.with_bias ? bias_d.data_type() : data_type::undef;
+    jcp.typesize_bia
+            = jcp.with_bias ? types::data_type_size(bias_d.data_type()) : 0;
+    jcp.typesize_in = types::data_type_size(src_d.data_type());
+    jcp.typesize_out = types::data_type_size(dst_d.data_type());
+
+    jcp.nb_ch = div_up(jcp.ngroups, jcp.ch_block);
+    jcp.nb_oc = jcp.oc / jcp.oc_block;
+    jcp.nb_ic = jcp.ic / jcp.ic_block;
+
+    /* kernel blocking params */
+    const int regs = jcp.ver == ver_vnni ? 30 : 28;
+    jcp.nb_oc_blocking = nstl::min(4, jcp.nb_oc);
+    for (; jcp.nb_oc_blocking > 1; jcp.nb_oc_blocking--)
+        if (jcp.nb_oc % jcp.nb_oc_blocking == 0
+                && jcp.l_pad <= regs / (jcp.nb_oc_blocking + 1))
+            break;
+
+    jcp.ur_w = regs / (jcp.nb_oc_blocking + 1);
+    int l_overflow = max(
+            0, ((jcp.kw - 1) * (jcp.dilate_w + 1) - jcp.l_pad) / jcp.stride_w);
+
+    if (jcp.ow < jcp.ur_w) {
+        jcp.ur_w = jcp.ow;
+        jcp.ur_w_tail = 0;
+    } else {
+        for (; jcp.ur_w >= 1; jcp.ur_w--) {
+            /* ur_w should be multiple of stride_w in order
+               to simplify logic for get_ow_start and get_ow_end */
+            bool is_multiple_of_stride = jcp.ur_w % jcp.stride_w == 0;
+
+            /* boundary conditions:
+               These conditions ensure all elements close to boundary
+               are computed in a single call of compute loop */
+            bool left_boundary_covered = jcp.ur_w >= l_overflow * jcp.stride_w;
+            jcp.ur_w_tail = jcp.ow % jcp.ur_w;
+            int r_overflow_no_tail
+                    = max(0, ((jcp.kw - 1) * (jcp.dilate_w + 1)
+                                     - max(0, jcp.r_pad) - jcp.ur_w_tail)
+                                    / jcp.stride_w);
+            bool right_boundary_covered
+                    = jcp.ur_w >= r_overflow_no_tail * jcp.stride_w;
+
+            if (is_multiple_of_stride && left_boundary_covered
+                    && right_boundary_covered)
+                break;
+            else if (jcp.ur_w == 1)
+                /* The boundary conditions above are also important
+                   to maintain simplicity of calls to icb_loop,
+                   if those conditions are not satisfied,
+                   then special cases will need to be added
+                   to use correct l_overflow/r_overflow values
+                   when different iterations of compute loop
+                   work on the locations close to boundary.
+                   So to keep code simple, return unimplemented
+                   for extreme case when a good ur_w cannot be found.
+                 */
+                return status::unimplemented;
+        }
+    }
+
+    jcp.wei_adj_scale
+            = (jcp.signed_input && (jcp.ver != ver_vnni)) ? (1.f / 2.f) : 1.f;
+
+    jcp.loop_order = jcp.ngroups > 1 ? loop_ngc : loop_cgn;
+    return status::success;
+}
+
+bool jit_avx512_core_x8s8s32x_deconv_fwd_kernel::maybe_eltwise(int position) {
+    using namespace primitive_kind;
+    const auto &p = attr_.post_ops_;
+
+    if (position == 0) {
+        /* eltwise before sum */
+        return p.contain(eltwise, 0);
+    } else if (position == 1) {
+        /* eltwise after sum */
+        return p.contain(sum, 0) && p.contain(eltwise, 1);
+    }
+    return false;
+}
+
+void jit_avx512_core_x8s8s32x_deconv_fwd_kernel::compute_eltwise(int ur_w) {
+    int nb_oc_block
+            = jcp.is_depthwise ? jcp.nb_ch_blocking : jcp.nb_oc_blocking;
+    if (ur_w == jcp.ur_w)
+        eltwise_injector_->compute_vector_range(0, nb_oc_block * jcp.ur_w);
+    else
+        for (int k = 0; k < nb_oc_block; k++)
+            eltwise_injector_->compute_vector_range(
+                    k * jcp.ur_w, k * jcp.ur_w + ur_w);
+}
+
+bool jit_avx512_core_x8s8s32x_deconv_fwd_kernel::post_ops_ok(
+        jit_conv_conf_t &jcp, const primitive_attr_t &attr) {
+    using namespace primitive_kind;
+    const auto &p = attr.post_ops_;
+
+    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
+
+    switch (p.len_) {
+    case 0: return true;
+    case 1: return is_eltwise(0) || p.contain(sum, 0);
+    case 2:
+        return (p.contain(sum, 0) && is_eltwise(1))
+                || (p.contain(sum, 1) && is_eltwise(0));
+    default: return false;
+    }
+
+    return false;
+}
+
+void jit_avx512_core_x8s8s32x_deconv_fwd_kernel::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp,
+        const primitive_attr_t &attr) {
+    if (jcp.signed_input && jcp.ver != ver_vnni) {
+        size_t count = nstl::max(attr.output_scales_.count_, 16);
+        scratchpad.book(key_conv_adjusted_scales, sizeof(float) * count);
+    }
+}
+
+void jit_avx512_core_x8s8s32x_deconv_fwd_kernel::compute_ker(int ur_w,
+        int l_overflow, int r_overflow, ker_block_t last_ic_block_flag,
+        bool h_padded) {
+
+    const int ch_block_all = jcp.ch_block * jcp.ic_block * jcp.oc_block;
+    const int ur_w_stride = jcp.signed_input ? 1 : jcp.stride_w;
+
+    auto src_offset = [=](int oj, int icb, int ki) {
+        return jcp.typesize_in
+                * (((oj + jcp.l_pad - ki * (jcp.dilate_w + 1)) / jcp.stride_w)
+                                  * jcp.ngroups * jcp.ic_without_padding
+                          + icb * 4);
+    };
+
+    auto kernel_offset = [=](int ocb, int icb, int ki) {
+        return jcp.typesize_in
+                * (ocb * jcp.nb_ic * jcp.kh * jcp.kw * ch_block_all
+                          + icb * jcp.oc_block * jcp.ic_block / 4
+                          + ki * ch_block_all);
+    };
+
+    auto compute = [=](zmm_t vreg_acc, zmm_t vreg_wei, zmm_t vreg_src) {
+        if (jcp.ver == ver_vnni) {
+            vpdpbusd(vreg_acc, vreg_src, vreg_wei);
+        } else if (jcp.is_depthwise) {
+            vpmulld(zmm_tmp, vreg_src, vreg_wei);
+            vpaddd(vreg_acc, vreg_acc, zmm_tmp);
+        } else {
+            vpmaddubsw(zmm_tmp, vreg_src, vreg_wei);
+            vpmaddwd(zmm_tmp, zmm_tmp, zmm_one);
+            vpaddd(vreg_acc, vreg_acc, zmm_tmp);
+        }
+    };
+
+    for (int ki = 0; ki < jcp.kw; ki++) {
+
+        int jj_start = get_ow_start(ki, l_overflow);
+        int jj_end = get_ow_end(ur_w, ki, r_overflow);
+
+        int _start = (jcp.signed_input) ? 0 : jj_start;
+        int _end = (jcp.signed_input) ? ur_w : jj_end;
+
+        int tail_size = jcp.ic_without_padding % 4;
+        int n_ic_blocks = jcp.is_depthwise ?
+                1 :
+                (last_ic_block_flag & ~no_last_block ?
+                                div_up(jcp.ic_without_padding % jcp.ic_block,
+                                        4) :
+                                jcp.ic_block / 4);
+
+        for (int icb1 = 0; icb1 < n_ic_blocks; icb1++) {
+            if (h_padded == true) {
+                /* fill padded area with shifted values */
+                Zmm inp = zmm_inp(0, jcp.nb_oc_blocking);
+                vpxord(inp, inp, inp);
+                vpsubb(inp, inp, zmm_shift);
+            } else {
+
+                for (int jj = _start; jj < _end; jj += ur_w_stride) {
+
+                    int aux_src_off = src_offset(jj, icb1, ki);
+
+                    if (jj >= jj_start && jj < jj_end
+                            && ((jj + jcp.l_pad - ki) % jcp.stride_w == 0)) {
+                        if (jcp.is_depthwise) {
+                            vpmovzxbd(zmm_inp(jj, jcp.nb_oc_blocking),
+                                    EVEX_compress_addr(
+                                              aux_reg_src, aux_src_off));
+                        } else if ((last_ic_block_flag & last_sp_block)
+                                && tail_size != 0 && icb1 == n_ic_blocks - 1) {
+                            xmm_t xmm_tmp = xmm_t(
+                                    zmm_inp(jj, jcp.nb_oc_blocking).getIdx());
+                            for (int r = 0; r < tail_size; ++r)
+                                vpinsrb(xmm_tmp, xmm_tmp,
+                                        ptr[aux_reg_src + aux_src_off + r], r);
+                            vpbroadcastd(
+                                    zmm_inp(jj, jcp.nb_oc_blocking), xmm_tmp);
+                        } else {
+                            vpbroadcastd(zmm_inp(jj, jcp.nb_oc_blocking),
+                                    EVEX_compress_addr(
+                                                 aux_reg_src, aux_src_off));
+                        }
+                        if (jcp.signed_input)
+                            vpsubb(zmm_inp(jj, jcp.nb_oc_blocking),
+                                    zmm_inp(jj, jcp.nb_oc_blocking), zmm_shift);
+                    } else {
+                        /* fill padded area with shifted values */
+                        if (jcp.signed_input) {
+                            Zmm inp = zmm_inp(jj, jcp.nb_oc_blocking);
+                            vpxord(inp, inp, inp);
+                            vpsubb(inp, inp, zmm_shift);
+                        }
+                    }
+                }
+            }
+            for (int ocb = 0; ocb < jcp.nb_oc_blocking; ocb++) {
+                int aux_filt_off = kernel_offset(ocb, icb1, ki);
+
+                if (_end - _start > 0) {
+                    if (jcp.is_depthwise)
+                        vpmovsxbd(zmm_wei,
+                                EVEX_compress_addr(aux_reg_filt, aux_filt_off));
+                    else
+                        vmovups(zmm_wei,
+                                EVEX_compress_addr(aux_reg_filt, aux_filt_off));
+                }
+                for (int jj = _start; jj < _end; jj += ur_w_stride) {
+                    Zmm inp = (h_padded == true) ?
+                            zmm_inp(0, jcp.nb_oc_blocking) :
+                            zmm_inp(jj, jcp.nb_oc_blocking);
+                    compute(zmm_out(jj, ocb), zmm_wei, inp);
+                }
+            }
+        }
+    }
+}
+
+void jit_avx512_core_x8s8s32x_deconv_fwd_kernel::kh_loop(int ur_w,
+        int l_overflow, int r_overflow, ker_block_t last_ic_block_flag) {
+
+    int ch_block_all = jcp.ch_block * jcp.ic_block * jcp.oc_block;
+    int shift_src_ih = jcp.typesize_in * (jcp.dilate_h + 1) * jcp.iw
+            * jcp.ngroups * jcp.ic_without_padding;
+    const int stride_h = jcp.signed_input ? 1 : jcp.stride_h;
+    int shift_filt_kh = jcp.typesize_in * jcp.kw * ch_block_all * stride_h;
+
+    Label kh_loop_label, skip_kh_loop;
+    Label t_overflow_label, no_t_overflow_label, b_overflow_label,
+            no_b_overflow_label;
+
+    mov(aux_reg_src, reg_src);
+    mov(aux_reg_filt, reg_filt);
+
+    if (jcp.signed_input) {
+        /* Weights are transposed, so first compute 'bottom' padding. */
+        mov(reg_overflow, ptr[param1 + GET_OFF(b_overflow)]);
+        cmp(reg_overflow, 0);
+        je(no_b_overflow_label, T_NEAR);
+        L(b_overflow_label); {
+            compute_ker(ur_w, 0, 0, last_ic_block_flag, true);
+
+            add(aux_reg_filt, shift_filt_kh);
+            dec(reg_overflow);
+            cmp(reg_overflow, 0);
+            jg(b_overflow_label, T_NEAR);
+        }
+        L(no_b_overflow_label);
+    }
+
+    mov(reg_kh, ptr[param1 + GET_OFF(kh_padding)]);
+
+    if (jcp.signed_input || ((!jcp.signed_input)
+        && ((min(jcp.t_pad, jcp.b_pad) < 0)
+            || ((jcp.kh - 1) * (jcp.dilate_h + 1)
+                < nstl::max(jcp.t_pad, jcp.b_pad))))) {
+        cmp(reg_kh, 0);
+        je(skip_kh_loop, T_NEAR);
+    }
+
+    L(kh_loop_label); {
+        compute_ker(ur_w, l_overflow, r_overflow, last_ic_block_flag, false);
+        sub(aux_reg_src, shift_src_ih);
+        add(aux_reg_filt, shift_filt_kh);
+        dec(reg_kh);
+
+        /* Insert weight compensation in stride 'holes' */
+        if (jcp.signed_input && jcp.stride_h > 1) {
+            Label kh_comp_loop;
+
+            cmp(reg_kh, 0);
+            je(skip_kh_loop, T_NEAR);
+            mov(reg_comp_strides, jcp.stride_h - 1);
+            L(kh_comp_loop);
+            {
+                compute_ker(
+                        ur_w, 0, 0, last_ic_block_flag, true);
+                add(aux_reg_filt, shift_filt_kh);
+                dec(reg_comp_strides);
+                cmp(reg_comp_strides, 0);
+                jg(kh_comp_loop, T_NEAR);
+            }
+        }
+        cmp(reg_kh, 0);
+        jg(kh_loop_label, T_NEAR);
+    }
+    L(skip_kh_loop);
+    if (jcp.signed_input) {
+        mov(reg_overflow, ptr[param1 + GET_OFF(t_overflow)]);
+        cmp(reg_overflow, 0);
+        je(no_t_overflow_label, T_NEAR);
+        L(t_overflow_label); {
+            compute_ker(ur_w, 0, 0, last_ic_block_flag, true);
+
+            add(aux_reg_filt, shift_filt_kh);
+            dec(reg_overflow);
+            cmp(reg_overflow, 0);
+            jg(t_overflow_label, T_NEAR);
+        }
+        L(no_t_overflow_label);
+    }
+}
+
+void jit_avx512_core_x8s8s32x_deconv_fwd_kernel::prepare_output(int ur_w) {
+    for (int ocb = 0; ocb < jcp.nb_oc_blocking; ocb++) {
+        for (int ur = 0; ur < ur_w; ur++) {
+            zmm_t zmm = zmm_out(ur, ocb);
+            vpxord(zmm, zmm, zmm);
+        }
+    }
+    if (jcp.signed_input) {
+        xor_(reg_scratch, reg_scratch);
+        Reg8 _t8 = reg_scratch.cvt8();
+        mov(_t8, (int8_t)-128);
+        vpbroadcastb(zmm_shift, _t8);
+    }
+}
+
+void jit_avx512_core_x8s8s32x_deconv_fwd_kernel::cvt2ps(
+        data_type_t type_in, zmm_t zmm_in, const Operand &op, bool mask_flag) {
+    zmm_t zmm = mask_flag ? zmm_in | ktail_mask | T_z : zmm_in;
+    switch (type_in) {
+    case data_type::f32:
+    case data_type::s32: vmovups(zmm, op); break;
+    case data_type::s8: vpmovsxbd(zmm, op); break;
+    case data_type::u8: vpmovzxbd(zmm, op); break;
+    default: assert(!"unsupported data type");
+    }
+    if (type_in != data_type::f32)
+        vcvtdq2ps(zmm_in, zmm_in);
+}
+
+void jit_avx512_core_x8s8s32x_deconv_fwd_kernel::store_output(
+        int ur_w, bool last_oc_block) {
+    mov(reg_bias, ptr[param1 + GET_OFF(bias)]);
+    mov(reg_ptr_scales, ptr[param1 + GET_OFF(scales)]);
+
+    if (jcp.signed_input)
+        mov(reg_compensation, ptr[param1 + GET_OFF(compensation)]);
+
+    const auto &p = attr_.post_ops_;
+    const int sum_idx = p.find(primitive_kind::sum);
+    const float *p_sum_scale
+            = (sum_idx != -1) ? &p.entry_[sum_idx].sum.scale : nullptr;
+    if (p_sum_scale && *p_sum_scale != 1.f)
+        mov(reg_ptr_sum_scale, (size_t)p_sum_scale);
+
+    if (jcp.with_bias && jcp.signed_input && jcp.ver != ver_vnni) {
+        mov(reg_bias_alpha, float2int(jcp.wei_adj_scale));
+        vmovq(xmm_bias_alpha(), reg_bias_alpha);
+        vbroadcastss(zmm_bias_alpha(), xmm_bias_alpha());
+    }
+
+    for (int ocb = 0; ocb < jcp.nb_oc_blocking; ocb++) {
+        const bool mask_flag = last_oc_block && ocb == jcp.nb_oc_blocking - 1;
+        int scale_offset
+                = jcp.is_oc_scale * (sizeof(float) * ocb * jcp.oc_block);
+
+        auto zmm_bias = zmm_tmp;
+        if (jcp.with_bias) {
+            int bias_offset = jcp.typesize_bia * ocb * jcp.oc_block;
+            auto bias_addr = EVEX_compress_addr(reg_bias, bias_offset);
+            cvt2ps(jcp.bia_dt, zmm_bias, bias_addr, mask_flag);
+            if (jcp.signed_input && jcp.ver != ver_vnni)
+                vmulps(zmm_bias, zmm_bias, zmm_bias_alpha());
+        }
+        if (jcp.signed_input) {
+            int comp_offset = sizeof(int32_t) * ocb * jcp.oc_block;
+            auto comp_addr = EVEX_compress_addr(reg_compensation, comp_offset);
+            cvt2ps(data_type::s32, zmm_comp, comp_addr, mask_flag);
+        }
+
+        for (int ur = 0; ur < ur_w; ur++) {
+            zmm_t zmm = zmm_out(ur, ocb);
+            vcvtdq2ps(zmm, zmm);
+            if (jcp.signed_input)
+                vaddps(zmm, zmm, zmm_comp);
+            if (jcp.with_bias)
+                vaddps(zmm, zmm, zmm_bias);
+            zmm_t mask_zmm = mask_flag ? zmm | ktail_mask | T_z : zmm;
+            vmulps(mask_zmm, zmm,
+                    EVEX_compress_addr(reg_ptr_scales, scale_offset));
+        }
+    }
+    if (maybe_eltwise(0))
+        compute_eltwise(ur_w);
+    if (p_sum_scale) { // post_op: sum
+        for (int k = 0; k < jcp.nb_oc_blocking; k++) {
+            const bool mask_flag
+                    = last_oc_block == 1 && k == jcp.nb_oc_blocking - 1;
+            for (int j = 0; j < ur_w; j++) {
+                int aux_output_offset
+                        = jcp.typesize_out
+                        * (k * jcp.oc_block
+                                  + j * jcp.oc_without_padding * jcp.ngroups);
+                auto addr = EVEX_compress_addr(reg_dst, aux_output_offset);
+                Zmm zmm = zmm_out(j, k);
+                cvt2ps(jcp.dst_dt, zmm_prev_dst, addr, mask_flag);
+                if (*p_sum_scale == 1.f)
+                    vaddps(zmm, zmm_prev_dst);
+                else
+                    vfmadd231ps(zmm, zmm_prev_dst, zword_b[reg_ptr_sum_scale]);
+            }
+        }
+    }
+    if (maybe_eltwise(1))
+        compute_eltwise(ur_w);
+
+    for (int ocb = 0; ocb < jcp.nb_oc_blocking; ocb++) {
+        const bool mask_flag = last_oc_block && ocb == jcp.nb_oc_blocking - 1;
+        for (int ur = 0; ur < ur_w; ur++) {
+            zmm_t zmm = zmm_out(ur, ocb);
+            if (jcp.dst_dt == data_type::u8) {
+                vpxord(zmm_zero, zmm_zero, zmm_zero);
+                vmaxps(zmm, zmm_zero, zmm);
+            }
+            if (jcp.dst_dt != data_type::f32) {
+                if (attr_.round_mode_ == round_mode::nearest)
+                    vcvtps2dq(zmm | T_rn_sae, zmm);
+                else if (attr_.round_mode_ == round_mode::down)
+                    vcvtps2dq(zmm | T_rd_sae, zmm);
+                else
+                    assert(!"unimplemented");
+            }
+        }
+        for (int ur = 0; ur < ur_w; ur++) {
+            int aux_dst_off = jcp.typesize_out
+                    * (ur * jcp.ngroups * jcp.oc_without_padding
+                                      + ocb * jcp.oc_block);
+            auto addr = EVEX_compress_addr(reg_dst, aux_dst_off);
+
+            zmm_t zmm = zmm_out(ur, ocb);
+            zmm_t r_zmm = mask_flag ? zmm | ktail_mask : zmm;
+            switch (jcp.dst_dt) {
+            case data_type::f32:
+            case data_type::s32: vmovups(addr, r_zmm); break;
+            case data_type::s8: vpmovsdb(addr, r_zmm); break;
+            case data_type::u8: vpmovusdb(addr, r_zmm); break;
+            default: assert(!"unknown dst_dt");
+            }
+        }
+    }
+}
+
+void jit_avx512_core_x8s8s32x_deconv_fwd_kernel::icb_loop(
+        int ur_w, int l_overflow, int r_overflow, bool is_last_sp_block) {
+
+    int shift_src_icb = jcp.typesize_in * jcp.ic_block;
+    int shift_filt_icb
+            = jcp.typesize_in * jcp.kh * jcp.kw * jcp.ic_block * jcp.oc_block;
+
+    prepare_output(ur_w);
+
+    Label skip_icb_loop, icb_loop_label;
+
+    mov(reg_icb, jcp.nb_ic);
+    L(icb_loop_label); {
+
+        if (jcp.ic_without_padding != jcp.ic) {
+            Label common_ker, end_ker;
+            cmp(reg_icb, 1);
+            jg(common_ker, T_NEAR);
+
+            kh_loop(ur_w, l_overflow, r_overflow,
+                    is_last_sp_block ? last_sp_block : last_ic_block);
+            jmp(end_ker, T_NEAR);
+
+            L(common_ker);
+            kh_loop(ur_w, l_overflow, r_overflow, no_last_block);
+
+            L(end_ker);
+        } else {
+            kh_loop(ur_w, l_overflow, r_overflow, no_last_block);
+        }
+
+        add(reg_src, shift_src_icb);
+        add(reg_filt, shift_filt_icb);
+        dec(reg_icb);
+        cmp(reg_icb, 0);
+        jg(icb_loop_label, T_NEAR);
+    }
+
+    /* come-back pointers */
+    sub(reg_src, jcp.nb_ic * shift_src_icb);
+    sub(reg_filt, jcp.nb_ic * shift_filt_icb);
+    L(skip_icb_loop);
+
+    if (jcp.ngroups % jcp.ch_block != 0 || jcp.oc_without_padding != jcp.oc) {
+        Label common_store, end_store;
+        mov(reg_oc_blocks, ptr[param1 + GET_OFF(oc_blocks)]);
+        if (jcp.is_depthwise)
+            cmp(reg_oc_blocks, jcp.nb_ch - 1);
+        else
+            cmp(reg_oc_blocks, jcp.nb_oc - jcp.nb_oc_blocking);
+        jne(common_store, T_NEAR);
+
+        store_output(ur_w, true);
+        jmp(end_store, T_NEAR);
+
+        L(common_store);
+        store_output(ur_w, false);
+
+        L(end_store);
+
+    } else {
+        store_output(ur_w, false);
+    }
+}
+
+void jit_avx512_core_x8s8s32x_deconv_fwd_kernel::generate() {
+    preamble();
+
+    xor_(reg_scratch, reg_scratch);
+    Reg16 _t = reg_scratch.cvt16();
+    mov(_t, 0x1);
+    vpbroadcastw(zmm_one, _t);
+
+    if (jcp.ngroups % jcp.ch_block != 0 || jcp.oc_without_padding != jcp.oc) {
+        int tail_size = jcp.is_depthwise ?
+                jcp.ngroups % jcp.ch_block :
+                jcp.oc_without_padding % jcp.oc_block;
+        int mask = (1 << tail_size) - 1;
+        Reg32 regw_tmp = reg_nur_w.cvt32();
+        mov(regw_tmp, mask);
+        kmovw(ktail_mask, regw_tmp);
+    }
+
+    mov(reg_src, ptr[param1 + GET_OFF(src)]);
+    mov(reg_filt, ptr[param1 + GET_OFF(filt)]);
+    mov(reg_dst, ptr[param1 + GET_OFF(dst)]);
+
+    int dst_shift = jcp.typesize_out * jcp.ur_w * jcp.ngroups
+            * jcp.oc_without_padding;
+    int src_shift = jcp.typesize_in * (jcp.ur_w / jcp.stride_w) * jcp.ngroups
+            * jcp.ic_without_padding;
+
+    int l_overflow = max(
+            0, ((jcp.kw - 1) * (jcp.dilate_w + 1) - jcp.l_pad) / jcp.stride_w);
+    int r_overflow
+            = max(0, ((jcp.kw - 1) * (jcp.dilate_w + 1) - max(0, jcp.r_pad))
+                            / jcp.stride_w);
+
+    int r_overflow1
+            = nstl::max(0, ((jcp.kw - 1) * (jcp.dilate_w + 1)
+                                   - nstl::max(0, jcp.r_pad) - jcp.ur_w_tail)
+                            / jcp.stride_w);
+    int nur_w = jcp.ow / jcp.ur_w;
+    if (r_overflow1 > 0)
+        nur_w--;
+
+    if (jcp.ur_w == jcp.ow) {
+        icb_loop(jcp.ur_w, l_overflow, r_overflow, true);
+    } else if (nur_w == 0) {
+        icb_loop(jcp.ur_w, l_overflow, r_overflow1, jcp.ur_w_tail == 0);
+        add(reg_src, src_shift);
+        add(reg_dst, dst_shift);
+        if (jcp.ur_w_tail != 0)
+            icb_loop(jcp.ur_w_tail, 0, r_overflow, true);
+    } else {
+        xor_(reg_nur_w, reg_nur_w);
+        if (l_overflow > 0) {
+            icb_loop(jcp.ur_w, l_overflow, 0, false);
+            add(reg_src, src_shift);
+            add(reg_dst, dst_shift);
+            inc(reg_nur_w);
+        }
+        if ((l_overflow <= 0 && nur_w > 0) || (l_overflow > 0 && nur_w > 1)) {
+            Label ow_loop_label;
+            L(ow_loop_label);
+            {
+                icb_loop(jcp.ur_w, 0, 0, false);
+                add(reg_src, src_shift);
+                add(reg_dst, dst_shift);
+                inc(reg_nur_w);
+                cmp(reg_nur_w, nur_w);
+                jl(ow_loop_label, T_NEAR);
+            }
+        }
+        if (r_overflow1 > 0) {
+            icb_loop(jcp.ur_w, 0, r_overflow1, jcp.ur_w_tail == 0);
+            add(reg_src, src_shift);
+            add(reg_dst, dst_shift);
+        }
+        if (jcp.ur_w_tail != 0) {
+            icb_loop(jcp.ur_w_tail, 0, r_overflow, true);
+        }
+    }
+    postamble();
+
+    if (jcp.with_eltwise)
+        eltwise_injector_->prepare_table();
+}
+
+template <data_type_t src_type, data_type_t dst_type>
+void _jit_avx512_core_x8s8s32x_deconvolution_fwd_t<src_type,
+        dst_type>::execute_forward() const {
+    auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
+    auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
+    auto bias = reinterpret_cast<const char *>(this->input_memory(2));
+    auto dst = reinterpret_cast<dst_data_t *>(this->memory());
+
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
+
+    auto &jcp = kernel_->jcp;
+
+    int oc_chunks = jcp.nb_oc / jcp.nb_oc_blocking;
+    int nb_groups = jcp.nb_ch;
+
+    size_t src_h_stride = src_d.blk_off(0, 0, 1);
+    size_t dst_h_stride = dst_d.blk_off(0, 0, 1);
+    size_t wht_kh_stride = wht_blk_off(weights_d, 0, 0, 0, 1);
+
+    const float *oscales = pd()->attr()->output_scales_.scales_;
+    if (jcp.signed_input && jcp.ver != ver_vnni) {
+        auto local_scales
+                = scratchpad().template get<float>(key_conv_adjusted_scales);
+        size_t count = pd()->attr()->output_scales_.count_;
+        float factor = 1.f / pd()->jcp_.wei_adj_scale;
+        if (count == 1) {
+            utils::array_set(local_scales, oscales[0] * factor, 16);
+        } else {
+            for (size_t c = 0; c < count; c++)
+                local_scales[c] = oscales[c] * factor;
+        }
+        oscales = local_scales;
+    }
+    size_t offset = (size_t)jcp.ngroups * jcp.oc * jcp.ic * jcp.kh * jcp.kw;
+    auto w = const_cast<wei_data_t *>(weights);
+    int32_t *compensation
+            = (jcp.signed_input) ? reinterpret_cast<int32_t *>(&w[offset]) : 0;
+
+    parallel(0, [&](const int ithr, const int nthr) {
+        int start{ 0 }, end{ 0 };
+        int work_amount = jcp.mb * nb_groups * oc_chunks * jcp.oh;
+        balance211(work_amount, nthr, ithr, start, end);
+
+        auto p = jit_deconv_call_s();
+
+        /*loop order = cgn*/
+        int n{ 0 }, g{ 0 }, occ{ 0 }, oh_s{ 0 };
+        if (jcp.loop_order == loop_ngc)
+            nd_iterator_init(start, n, jcp.mb, g, nb_groups, occ, oc_chunks,
+                    oh_s, jcp.oh);
+        else if (jcp.loop_order == loop_cgn)
+            nd_iterator_init(start, occ, oc_chunks, g, nb_groups, n, jcp.mb,
+                    oh_s, jcp.oh);
+        else
+            assert(!"unsupported loop order");
+        while (start < end) {
+
+            int ocb = occ * jcp.nb_oc_blocking;
+            int g_oc = (g * jcp.ch_block * jcp.nb_oc + ocb) * jcp.oc_block;
+            int g_ic = g * jcp.ch_block * jcp.ic;
+            int work_rem = end - start;
+            int oh_e = oh_s + work_rem > jcp.oh ? jcp.oh : oh_s + work_rem;
+
+            auto dst_w = dst + dst_d.blk_off(n, g_oc);
+            auto src_w = src + src_d.blk_off(n, g_ic);
+            auto wht_w = weights + wht_blk_off(weights_d, g, ocb, 0);
+            auto bias_w = jcp.with_bias ?
+                    bias + (bias_d.blk_off(g_oc) * jcp.typesize_bia) :
+                    0;
+            int32_t *compensation_w
+                    = (jcp.signed_input) ? compensation + g_oc : 0;
+
+            auto scales = &oscales[jcp.is_oc_scale * g_oc];
+            for (int oj = oh_s; oj < oh_e; oj++) {
+                int ih_max = 0, kh_lo = 0, kh_len = 0;
+                if (jcp.dilate_h != 0 && jcp.stride_h == 1) {
+                    /* dilation */
+                    int dilate_h = jcp.dilate_h + 1;
+                    // Note: use div_up to account for "holes" in filter
+                    int o_t_overflow = div_up(
+                            max(0, (jcp.kh - 1) * dilate_h - oj - jcp.t_pad),
+                            dilate_h);
+                    int o_b_overflow
+                            = div_up(max(0, (jcp.kh - 1) * dilate_h + 1 - jcp.oh
+                                                     + oj - jcp.b_pad),
+                                    dilate_h);
+                    kh_len = jcp.kh - o_t_overflow - o_b_overflow;
+                    kh_lo = o_b_overflow;
+                    ih_max = oj + jcp.t_pad - o_b_overflow * dilate_h;
+                } else {
+                    int o_t_overflow = max(
+                            0, (jcp.kh - (oj + 1 + jcp.t_pad)) / jcp.stride_h);
+                    int o_b_overflow
+                            = max(0, ((oj + jcp.kh) - (jcp.oh + jcp.b_pad))
+                                            / jcp.stride_h);
+                    int overflow_kh_hi = jcp.kh - 1
+                            - abs(jcp.oh + jcp.b_pad - (oj + 1)) % jcp.stride_h;
+                    int overflow_kh_lo = (oj + jcp.t_pad) % jcp.stride_h;
+
+                    kh_len = (overflow_kh_hi - overflow_kh_lo) / jcp.stride_h
+                            + 1 - o_t_overflow - o_b_overflow;
+                    kh_lo = overflow_kh_lo + o_b_overflow * jcp.stride_h;
+                    ih_max = (oj + jcp.t_pad - kh_lo) / jcp.stride_h;
+                }
+
+                int wei_stride
+                        = (!jcp.signed_input) ? kh_lo * wht_kh_stride : 0;
+                p.src = src_w + ih_max * src_h_stride;
+                p.dst = dst_w + oj * dst_h_stride;
+                p.filt = wht_w + wei_stride;
+                p.bias = bias_w;
+                p.compensation = compensation_w;
+                p.t_overflow = max(
+                        0, jcp.kh - (kh_lo + max(0, kh_len - 1) * jcp.stride_h
+                                            + 1));
+                p.b_overflow = kh_lo;
+                p.kh_padding = kh_len;
+                p.scales = scales;
+                p.oc_blocks = jcp.is_depthwise ? g : ocb;
+                kernel_->jit_ker(&p);
+            }
+            if (jcp.loop_order == loop_ngc)
+                nd_iterator_jump(start, end, n, jcp.mb, g, nb_groups, occ,
+                        oc_chunks, oh_s, jcp.oh);
+            else if (jcp.loop_order == loop_cgn)
+                nd_iterator_jump(start, end, occ, oc_chunks, g, nb_groups, n,
+                        jcp.mb, oh_s, jcp.oh);
+            else
+                assert(!"unsupported loop order");
+        }
+    });
+}
+
+template struct _jit_avx512_core_x8s8s32x_deconvolution_fwd_t<data_type::u8,
+        data_type::u8>;
+template struct _jit_avx512_core_x8s8s32x_deconvolution_fwd_t<data_type::u8,
+        data_type::s8>;
+template struct _jit_avx512_core_x8s8s32x_deconvolution_fwd_t<data_type::u8,
+        data_type::f32>;
+template struct _jit_avx512_core_x8s8s32x_deconvolution_fwd_t<data_type::u8,
+        data_type::s32>;
+template struct _jit_avx512_core_x8s8s32x_deconvolution_fwd_t<data_type::s8,
+        data_type::u8>;
+template struct _jit_avx512_core_x8s8s32x_deconvolution_fwd_t<data_type::s8,
+        data_type::s8>;
+template struct _jit_avx512_core_x8s8s32x_deconvolution_fwd_t<data_type::s8,
+        data_type::f32>;
+template struct _jit_avx512_core_x8s8s32x_deconvolution_fwd_t<data_type::s8,
+        data_type::s32>;
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_deconvolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_deconvolution.hpp
index 17f3a52a0..8053db811 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_u8s8s32x_deconvolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_x8s8s32x_deconvolution.hpp
@@ -29,6 +29,7 @@
 #include "cpu_deconvolution_pd.hpp"
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
+#include "jit_uni_eltwise.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -38,18 +39,28 @@ typedef enum {
     no_last_block = 0x1U,
     last_ic_block = 0x2U,
     last_sp_block = 0x4U,
-    last_ic
 } ker_block_t;
 
-struct jit_avx512_core_u8s8s32x_deconv_fwd_kernel : public jit_generator {
-    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_u8s8s32x_deconv_fwd_ker_t);
+struct jit_avx512_core_x8s8s32x_deconv_fwd_kernel : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_avx512_core_x8s8s32x_deconv_fwd_ker_t);
 
-    jit_avx512_core_u8s8s32x_deconv_fwd_kernel(jit_conv_conf_t ajcp,
-            const primitive_attr_t &attr) : jcp(ajcp), attr_(attr) {
+    jit_avx512_core_x8s8s32x_deconv_fwd_kernel(
+            jit_conv_conf_t ajcp, const primitive_attr_t &attr)
+        : jcp(ajcp), attr_(attr), eltwise_injector_(nullptr) {
+        if (jcp.with_eltwise)
+            eltwise_injector_ = new jit_uni_eltwise_injector_f32<avx512_common>(
+                    this, jcp.eltwise);
         generate();
         jit_ker = (void (*)(jit_deconv_call_s *))getCode();
     }
 
+    ~jit_avx512_core_x8s8s32x_deconv_fwd_kernel() {
+            delete eltwise_injector_;
+    }
+
+    static bool post_ops_ok(jit_conv_conf_t &jcp,
+            const primitive_attr_t &attr);
+
     static status_t init_conf(jit_conv_conf_t &jcp,
             const deconvolution_desc_t &cd,
             cpu_memory_t::pd_t &src_pd,
@@ -59,10 +70,14 @@ struct jit_avx512_core_u8s8s32x_deconv_fwd_kernel : public jit_generator {
             cpu_memory_t::pd_t &bias_pd,
             const primitive_attr_t &attr);
 
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp, const primitive_attr_t &attr);
+
     jit_conv_conf_t jcp;
     const primitive_attr_t &attr_;
     void (*jit_ker)(jit_deconv_call_s *);
 private:
+    jit_uni_eltwise_injector_f32<avx512_common> *eltwise_injector_;
     using reg64_t = const Xbyak::Reg64;
     using zmm_t = const Xbyak::Zmm;
     using xmm_t = const Xbyak::Xmm;
@@ -78,17 +93,29 @@ private:
     reg64_t reg_ptr_scales = rax;
     reg64_t reg_oc_blocks = rsi;
 
-    reg64_t reg_scratch = r14;
     reg64_t aux_reg_src = r11;
     reg64_t aux_reg_filt = r12;
-    reg64_t reg_kj = rax;
+
+    reg64_t reg_compensation = r14;
+    reg64_t reg_scratch = r14;
+    reg64_t reg_ptr_sum_scale = r11;
+    reg64_t reg_bias_alpha = abi_not_param1;
+    reg64_t reg_overflow = rax;
+    reg64_t reg_comp_strides = reg_overflow;
 
     Xbyak::Opmask ktail_mask = Xbyak::Opmask(2);
-    zmm_t zmm_tmp = zmm_t(29);
-    zmm_t zmm_one = zmm_t(30);
+    zmm_t zmm_tmp = zmm_t(28);
+    zmm_t zmm_one = zmm_t(29);
+    /* used during write-out section of store_output */
     zmm_t zmm_zero = zmm_t(31);
     zmm_t zmm_wei = zmm_t(31);
 
+    /* signed input */
+    zmm_t zmm_shift = zmm_t(30);
+    zmm_t zmm_comp = zmm_t(30);
+    zmm_t zmm_bias = zmm_t(31);
+    zmm_t zmm_prev_dst = zmm_t(31);
+
     zmm_t zmm_out(int i_ur, int i_oc) {
         int idx = i_ur * jcp.nb_oc_blocking + i_oc;
         assert(idx < 31);
@@ -99,6 +126,12 @@ private:
         assert(idx < 31);
         return zmm_t(idx);
     }
+    zmm_t zmm_bias_alpha() {
+        return zmm_t(jcp.nb_oc_blocking * jcp.ur_w);
+    }
+    xmm_t xmm_bias_alpha() {
+        return xmm_t(jcp.nb_oc_blocking * jcp.ur_w);
+    }
 
     int get_ow_start(int ki, int l_overflow) {
         int res = (jcp.ow - 1 + jcp.r_pad) % jcp.stride_w
@@ -111,25 +144,28 @@ private:
 
     int get_ow_end(int ur_w, int ki, int r_overflow) {
         if (utils::one_of(ur_w, jcp.ow, jcp.ur_w_tail))
-                ur_w += nstl::min(0, jcp.r_pad);
+                ur_w += nstl::min(0, jcp.r_pad); // remove negative padding
         int res = (ur_w - 1 + jcp.l_pad) % jcp.stride_w
             + r_overflow * jcp.stride_w - ki * (jcp.dilate_w + 1);
         while (res < 0)
             res += jcp.stride_w;
         return ur_w - res;
     }
-
+    bool maybe_eltwise(int position);
+    void compute_eltwise(int ur_w);
     void prepare_output(int ur_w);
     void store_output(int ur_w, bool last_oc_block);
-    void compute_ker(int ur_w, int pad_l, int pad_r, ker_block_t last_ker_block);
-    void compute_loop(int ur_w, int pad_l, int pad_r, bool last_block);
+    void compute_ker(int ur_w, int l_overflow, int r_overflow,
+             ker_block_t last_ic_block_flag, bool h_padded = false);
+    void kh_loop(int ur_w, int pad_l, int pad_r, ker_block_t last_ker_block);
+    void icb_loop(int ur_w, int pad_l, int pad_r, bool last_block);
     void generate();
     void cvt2ps(data_type_t type_in, zmm_t zmm_in, const Xbyak::Operand &op,
         bool mask_flag);
 };
 
-template <impl::data_type_t dst_type>
-struct _jit_avx512_core_u8s8s32x_deconvolution_fwd_t : public cpu_primitive_t {
+template <impl::data_type_t src_type, impl::data_type_t dst_type>
+struct _jit_avx512_core_x8s8s32x_deconvolution_fwd_t : public cpu_primitive_t {
     struct pd_t : public cpu_deconvolution_fwd_pd_t {
         pd_t(engine_t *engine,
                 const deconvolution_desc_t *adesc,
@@ -138,7 +174,7 @@ struct _jit_avx512_core_u8s8s32x_deconvolution_fwd_t : public cpu_primitive_t {
             : cpu_deconvolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd) {}
 
         DECLARE_COMMON_PD_T(JIT_IMPL_NAME_HELPER("jit_deconvolution:", avx512_core, ""),
-                _jit_avx512_core_u8s8s32x_deconvolution_fwd_t<dst_type>);
+                _jit_avx512_core_x8s8s32x_deconvolution_fwd_t<src_type, dst_type>);
 
         virtual status_t init() override {
             assert(this->engine()->kind() == engine_kind::cpu);
@@ -147,6 +183,7 @@ struct _jit_avx512_core_u8s8s32x_deconvolution_fwd_t : public cpu_primitive_t {
                 && utils::one_of(this->desc()->prop_kind, prop_kind::forward_training,
                             prop_kind::forward_inference)
                 && this->desc()->alg_kind & alg_kind::deconvolution_direct
+                && this->desc()->src_desc.data_type == src_type
                 && this->desc()->dst_desc.data_type == dst_type
                 && IMPLICATION(this->with_bias(), utils::one_of(
                             this->desc()->bias_desc.data_type, data_type::f32,
@@ -154,41 +191,48 @@ struct _jit_avx512_core_u8s8s32x_deconvolution_fwd_t : public cpu_primitive_t {
                 && this->desc()->accum_data_type == data_type::s32;
             if (!ok) return status::unimplemented;
 
-            /*TODO: support signed input and postops */
-            return jit_avx512_core_u8s8s32x_deconv_fwd_kernel::init_conf(
+            status_t status = jit_avx512_core_x8s8s32x_deconv_fwd_kernel::init_conf(
                     jcp_, *this->desc(), this->src_pd_,
                     this->weights_pd_, this->dst_pd_,
                     this->with_bias(), this->bias_pd_,
                     *this->attr());
+
+            if (status != status::success) return status;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_avx512_core_x8s8s32x_deconv_fwd_kernel::init_scratchpad(scratchpad,
+                    jcp_, *this->attr());
+
+            return status::success;
         }
         jit_conv_conf_t jcp_;
     };
 
-    _jit_avx512_core_u8s8s32x_deconvolution_fwd_t(const pd_t *pd,
+    _jit_avx512_core_x8s8s32x_deconvolution_fwd_t(const pd_t *apd,
            const input_vector &inputs, const output_vector &outputs)
-       : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {
-           kernel_ = new jit_avx512_core_u8s8s32x_deconv_fwd_kernel(conf_.jcp_,
-                   *conf_.attr());
+       : cpu_primitive_t(apd, inputs, outputs) {
+           kernel_ = new jit_avx512_core_x8s8s32x_deconv_fwd_kernel(pd()->jcp_,
+                   *pd()->attr());
        }
 
-    ~_jit_avx512_core_u8s8s32x_deconvolution_fwd_t() {
+    ~_jit_avx512_core_x8s8s32x_deconvolution_fwd_t() {
         delete kernel_;
     }
 
-    typedef typename prec_traits<data_type::u8>::type src_data_t;
+    typedef typename prec_traits<src_type>::type src_data_t;
     typedef typename prec_traits<data_type::s8>::type wei_data_t;
     typedef typename prec_traits<dst_type>::type dst_data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
-    jit_avx512_core_u8s8s32x_deconv_fwd_kernel *kernel_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+    jit_avx512_core_x8s8s32x_deconv_fwd_kernel *kernel_;
 };
 
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_generator.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_generator.hpp
index b72ed2d9e..b2477240a 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_generator.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_generator.hpp
@@ -102,6 +102,8 @@ static const Xbyak::Reg64 abi_param1(Xbyak::Operand::RDI),
              abi_param2(Xbyak::Operand::RSI),
              abi_param3(Xbyak::Operand::RDX),
              abi_param4(Xbyak::Operand::RCX),
+             abi_param5(Xbyak::Operand::R8),
+             abi_param6(Xbyak::Operand::R9),
              abi_not_param1(Xbyak::Operand::RCX);
 #endif
 #endif
@@ -110,7 +112,7 @@ inline unsigned int get_cache_size(int level, bool per_core = true){
     unsigned int l = level - 1;
     // Currently, if XByak is not able to fetch the cache topology
     // we default to 32KB of L1, 512KB of L2 and 1MB of L3 per core.
-    if (cpu.data_cache_levels == 0){
+    if (cpu.getDataCacheLevels() == 0){
         const int L1_cache_per_core = 32000;
         const int L2_cache_per_core = 512000;
         const int L3_cache_per_core = 1024000;
@@ -122,31 +124,15 @@ inline unsigned int get_cache_size(int level, bool per_core = true){
         default: return 0;
         }
     }
-    if (l < cpu.data_cache_levels) {
-        return cpu.data_cache_size[l]
-            / (per_core ? cpu.cores_sharing_data_cache[l] : 1);
+    if (l < cpu.getDataCacheLevels()) {
+        return cpu.getDataCacheSize(l)
+            / (per_core ? cpu.getCoresSharingDataCache(l) : 1);
     } else
         return 0;
 }
 
 }
 
-// TODO (Roma): move all_same to a more appropriate location
-
-template <typename T, typename U, typename... Us>
-struct all_same : std::false_type {};
-
-template <typename T, typename... Us>
-struct all_same<T, T, Us...> : all_same<T, Us...> { };
-
-template <typename T>
-struct all_same<T, T> : std::true_type {};
-
-struct jit_code_injection {
-    const Xbyak::uint8* code;
-    size_t size;
-};
-
 class jit_generator : public Xbyak::CodeGenerator
 {
 private:
@@ -174,6 +160,8 @@ public:
         _cmp_neq_uq = 4u,
         _cmp_nlt_us = 5u,
         _cmp_nle_us = 6u,
+
+        _op_floor = 1u,
     };
 
     Xbyak::Reg64 param1 = abi_param1;
@@ -302,7 +290,7 @@ public:
 
     // Disallow char-based labels completely
     void L(const char *label) = delete;
-    void L(const Xbyak::Label& label) { Xbyak::CodeGenerator::L(label); }
+    void L(Xbyak::Label& label) { Xbyak::CodeGenerator::L(label); }
 
     void uni_vpxor(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
                    const Xbyak::Operand &op) {
@@ -322,6 +310,32 @@ public:
         vpxord(x1, x2, op);
     }
 
+    void uni_vmovss(const Xbyak::Address& addr, const Xbyak::Xmm &x) {
+        movss(addr, x);
+    }
+    void uni_vmovss(const Xbyak::Address& addr, const Xbyak::Ymm &x) {
+        vmovss(addr, x);
+    }
+    void uni_vmovss(const Xbyak::Xmm &x, const Xbyak::Address& addr) {
+        movss(x, addr);
+    }
+    void uni_vmovss(const Xbyak::Ymm &x, const Xbyak::Address& addr) {
+        vmovss(x, addr);
+    }
+
+    void uni_vmovsd(const Xbyak::Address& addr, const Xbyak::Xmm &x) {
+        movsd(addr, x);
+    }
+    void uni_vmovsd(const Xbyak::Address& addr, const Xbyak::Ymm &x) {
+        vmovsd(addr, x);
+    }
+    void uni_vmovsd(const Xbyak::Xmm &x, const Xbyak::Address& addr) {
+        movsd(x, addr);
+    }
+    void uni_vmovsd(const Xbyak::Ymm &x, const Xbyak::Address& addr) {
+        vmovsd(x, addr);
+    }
+
     void uni_vmovdqu(const Xbyak::Address &addr, const Xbyak::Xmm &x) {
         movdqu(addr, x);
     }
@@ -393,6 +407,29 @@ public:
         }
     }
 
+    void uni_vrcpss(const Xbyak::Xmm &x, const Xbyak::Operand &op) {
+        rcpss(x, op);
+    }
+    void uni_vrcpss(const Xbyak::Ymm &x1, const Xbyak::Xmm &x2) {
+        Xbyak::Xmm x1_(x1.getIdx());
+        Xbyak::Xmm x2_(x2.getIdx());
+        vrcpss(x1_, x1_, x2_);
+    }
+    void uni_vrcpss(const Xbyak::Ymm &x, const Xbyak::Address &op) {
+        Xbyak::Xmm x_(x.getIdx());
+        vrcpss(x_, x_, op);
+    }
+
+    void uni_vrcpps(const Xbyak::Xmm &x, const Xbyak::Operand &op) {
+        rcpps(x, op);
+    }
+    void uni_vrcpps(const Xbyak::Ymm &x, const Xbyak::Operand &op) {
+        vrcpps(x, op);
+    }
+    void uni_vrcpps(const Xbyak::Zmm &x, const Xbyak::Operand &op) {
+        vrcp14ps(x, op);
+    }
+
     void uni_vdivps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
                     const Xbyak::Operand &op2 = Xbyak::Operand()) {
         assert(x.getIdx() == op1.getIdx());
@@ -519,24 +556,30 @@ public:
         vpaddd(x1, x2, op);
     }
 
-    void uni_vandps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
-                    const Xbyak::Operand &op2 = Xbyak::Operand()) {
-        assert(x.getIdx() == op1.getIdx());
-        andps(x, op2);
+    void uni_vandps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
+                    const Xbyak::Operand &op = Xbyak::Operand()) {
+        assert(x1.getIdx() == x2.getIdx());
+        andps(x1, op);
     }
-    void uni_vandps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
-                    const Xbyak::Operand &op2 = Xbyak::Operand()) {
-        vandps(x, op1, op2);
+    void uni_vandps(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
+                    const Xbyak::Operand &op = Xbyak::Operand()) {
+        if (!mayiuse(avx512_common) || x1.getBit() < 512)
+            vandps(x1, x2, op);
+        else
+            vpandd(x1, x2, op);
     }
 
-    void uni_vorps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
-                    const Xbyak::Operand &op2 = Xbyak::Operand()) {
-        assert(x.getIdx() == op1.getIdx());
-        orps(x, op2);
+    void uni_vorps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
+                    const Xbyak::Operand &op = Xbyak::Operand()) {
+        assert(x1.getIdx() == x2.getIdx());
+        orps(x1, op);
     }
-    void uni_vorps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
-                    const Xbyak::Operand &op2 = Xbyak::Operand()) {
-        vorps(x, op1, op2);
+    void uni_vorps(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
+                    const Xbyak::Operand &op = Xbyak::Operand()) {
+        if (!mayiuse(avx512_common) || x1.getBit() < 512)
+            vorps(x1, x2, op);
+        else
+            vpord(x1, x2, op);
     }
 
     void uni_vpslld(const Xbyak::Xmm &x, const Xbyak::Operand &op,
@@ -582,16 +625,38 @@ public:
     void uni_vcmpgtps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
                       const Xbyak::Operand &op) {
         assert(x1.getIdx() == x2.getIdx());
-        cmpps(x1, op, 0x6);
+        cmpps(x1, op, _cmp_nle_us);
     }
+
     void uni_vcmpgtps(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
                       const Xbyak::Operand &op) {
         vcmpgtps(x1, x2, op);
     }
 
+    void uni_vcmpgeps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
+                      const Xbyak::Operand &op) {
+        assert(x1.getIdx() == x2.getIdx());
+        cmpps(x1, op, _cmp_nlt_us);
+    }
+
+    void uni_vcmpgeps(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
+                      const Xbyak::Operand &op) {
+        vcmpps(x1, x2, op, _cmp_nlt_us);
+    }
+
+    void uni_vtestps(const Xbyak::Xmm &x1, const Xbyak::Operand &op) {
+        ptest(x1, op);
+    }
+
+    void uni_vtestps(const Xbyak::Ymm &x1, const Xbyak::Operand &op) {
+        assert(!(x1.isZMM() || op.isZMM()));
+        vtestps(x1, op);
+    }
+
     void uni_vblendvps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
                        const Xbyak::Operand &op, const Xbyak::Xmm &msk) {
         assert(x1.getIdx() == x2.getIdx());
+        assert(msk.getIdx() == 0);
         blendvps(x1, op);
     }
     void uni_vblendvps(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
@@ -629,6 +694,22 @@ public:
         vmovmskps(x1, x2);
     }
 
+    void uni_vpackssdw(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2, const Xbyak::Operand &op){
+        assert(x1.getIdx() == x1.getIdx());
+        packssdw(x1, op);
+    }
+    void uni_vpackssdw(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2, const Xbyak::Operand &op){
+        vpackssdw(x1, x2, op);
+    }
+
+    void uni_vpackuswb(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2, const Xbyak::Operand &op){
+        assert(x1.getIdx() == x1.getIdx());
+        packuswb(x1, op);
+    }
+    void uni_vpackuswb(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2, const Xbyak::Operand &op){
+        vpackuswb(x1, x2, op);
+    }
+
     void uni_vpmovsxbd(const Xbyak::Xmm &x, const Xbyak::Operand &op) {
         pmovsxbd(x, op);
     }
@@ -643,14 +724,6 @@ public:
         vpmovzxbd(x, op);
     }
 
-    void uni_vpackssdw(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2, const Xbyak::Operand &op) {
-        assert(x1.getIdx() == x2.getIdx());
-        packssdw(x1, op);
-    }
-    void uni_vpackssdw(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2, const Xbyak::Operand &op) {
-        vpackssdw(x1, x2, op);
-    }
-
     void uni_vpackusdw(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2, const Xbyak::Operand &op) {
         assert(x1.getIdx() == x2.getIdx());
         packusdw(x1, op);
@@ -667,14 +740,6 @@ public:
         vpacksswb(x1, x2, op);
     }
 
-    void uni_vpackuswb(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2, const Xbyak::Operand &op) {
-        assert(x1.getIdx() == x2.getIdx());
-        packuswb(x1, op);
-    }
-    void uni_vpackuswb(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2, const Xbyak::Operand &op) {
-        vpackuswb(x1, x2, op);
-    }
-
     void uni_vpmaxsd(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2, const Xbyak::Operand &op) {
         assert(x1.getIdx() == x2.getIdx());
         pmaxsd(x1, op);
@@ -731,6 +796,45 @@ public:
         vpsubb(x1, x2, op);
     }
 
+    void uni_vpslldq(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2, const Xbyak::uint8 &op) {
+        assert(x1.getIdx() == x2.getIdx());
+        pslldq(x1, op);
+    }
+    void uni_vpslldq(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2, const Xbyak::uint8 &op) {
+        vpslldq(x1, x2, op);
+    }
+
+    void uni_vpand(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
+                   const Xbyak::Operand &op = Xbyak::Operand()) {
+        assert(x1.getIdx() == x2.getIdx());
+        pand(x1, op);
+    }
+    void uni_vpand(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
+                    const Xbyak::Operand &op = Xbyak::Operand()) {
+        vpand(x1, x2, op);
+    }
+
+    void uni_vpaddb(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
+                    const Xbyak::Operand &op) {
+        assert(x1.getIdx() == x2.getIdx());
+        paddb(x2, op);
+    }
+    void uni_vpaddb(const Xbyak::Ymm &x1, const Xbyak::Xmm &x2,
+                    const Xbyak::Operand &op) {
+        vpaddb(x1, x2, op);
+    }
+
+    void uni_vpshufb(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
+                     const Xbyak::Operand &op) {
+        assert(x1.getIdx() == x2.getIdx());
+        pshufb(x1, op);
+    }
+
+    void uni_vpshufb(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
+                     const Xbyak::Operand &op) {
+        vpshufb(x1, x2, op);
+    }
+
     void mul_by_const(const Xbyak::Reg &out,
             const Xbyak::Reg64 &tmp, int value) {
         // Generates a shift + add sequence for multiplicating contents of the
@@ -764,10 +868,6 @@ public:
         mov(out, tmp);
     }
 
-    void inject(jit_code_injection&& in) {
-        db(in.code, in.size);
-    }
-
     void dump_code(const Xbyak::uint8 *code) const {
         if (code) {
             static int counter = 0;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_primitive_conf.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_primitive_conf.hpp
index 47c9799c6..9de97fee1 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_primitive_conf.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_primitive_conf.hpp
@@ -19,6 +19,8 @@
 
 #include <stdint.h>
 
+#include "common/primitive_attr.hpp"
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
@@ -27,7 +29,7 @@ namespace cpu {
 enum conv_version_t {ver_unused, ver_fma, ver_avx512_core, ver_4fma, ver_4vnni,
                      ver_vnni};
 enum conv_loop_order_t {loop_cgn, loop_gnc, loop_ngc, loop_gncw, loop_cwgn,
-                            loop_ngcw};
+                            loop_ngcw, loop_nhwcg};
 enum conv_1x1_loop_order_t {loop_rbl, loop_rlb, loop_lbr, loop_lrb, loop_blr,
                             loop_brl};
 enum conv_kernel_kind_t {embd_bcast, expl_bcast};
@@ -53,6 +55,7 @@ struct jit_conv_conf_t {
     conv_version_t ver;
     conv_loop_order_t loop_order;
 
+    int simd_w;
     int ndims;
     int mb;
     int ngroups, ic, oc, oc_without_padding, ic_without_padding;
@@ -64,32 +67,22 @@ struct jit_conv_conf_t {
     int stride_d, stride_h, stride_w;
     int dilate_d, dilate_h, dilate_w;
     memory_format_t src_fmt;
+    memory_format_t dst_fmt;
     bool with_bias;
     bool with_sum;
     bool with_eltwise;
     bool with_dw_conv;
+    bool with_binarization;
+
+    post_ops_t::entry_t::eltwise_t eltwise;
 
-    alg_kind_t eltwise_alg;
-    float eltwise_alpha;
-    float eltwise_beta;
-    float eltwise_scale;
+    int nthr, nthr_mb, nthr_g, nthr_oc_b, nthr_ic_b;
 
     int idp, ihp, iwp, ohp, owp;
 
-    int dw_conv_in_h;
-    int dw_conv_in_w;
-    int dw_conv_ker_h;
-    int dw_conv_ker_w;
-    int dw_conv_str_h;
-    int dw_conv_str_w;
-    const float* dw_conv_weights;
-    const float* dw_conv_biases;
-
-    bool dw_conv_with_sum;
-    bool dw_conv_with_eltwise;
-    alg_kind_t dw_conv_eltwise_alg;
-    float dw_conv_eltwise_alpha;
-    float dw_conv_eltwise_beta;
+    const float* conv_weights;
+    const float* conv_biases;
+    int dw_conv_oh, dw_conv_ow;
 
     int nb_ic, ic_block;
     int nb_oc, oc_block;
@@ -102,6 +95,7 @@ struct jit_conv_conf_t {
     int ur_h, ur_w;
     int ur_w_tail;
     bool is_1stconv;
+    int nonblk_group_off;
     /* fma avx512_core */
     conv_kernel_kind_t kernel_kind;
     /* 4fma */
@@ -121,6 +115,7 @@ struct jit_conv_conf_t {
     int oc_nb1;
     int ur_ow_max, ur_ow, ur_ow_tail;
     int ur_ow_nsteps;
+    data_type_t src_dt;
     data_type_t bia_dt;
     data_type_t dst_dt;
     /* avx512: max possible value is nregs(32) - aux_regs(4) */
@@ -129,16 +124,22 @@ struct jit_conv_conf_t {
     bool expl_bcast;
     bool large_spatial;
     int is_oc_scale;
+    int max_regs_ur; // maximum accumulation registers
     // dw conv
     int nb_ch, ch_block, nb_ch_blocking;
-    bool is_depthwise;
+    bool is_depthwise, is_fast_depthwise;
     int aligned_threads;
     // large spatial
     int oh_blk_size;
-    int ow_blk_size;
     // s8s8 convolution
     bool signed_input;
     float wei_adj_scale;
+    // planar conv
+    int nb_ow_blocking;
+
+    int oh_block;
+    int nb_oh_blocking;
+    int oh_block_step;
 };
 
 struct jit_conv_conf_2x3_wino_t {
@@ -173,9 +174,7 @@ struct jit_conv_conf_2x3_wino_t {
     int typesize_acc;
 
     memory_format_t src_fmt;
-    bool with_bias, with_relu;
-    float relu_negative_slope;
-    bool with_sum;
+    bool with_bias;
     bool small_mb;
 
     int xb, yb;
@@ -188,6 +187,12 @@ struct jit_conv_conf_2x3_wino_t {
     int m_block, n_block, k_block;
     int n2_block, n_chunks;
     int k2_block, k_chunks;
+
+    int mb_block, nb_mb;
+
+    size_t size_wino_src, size_wino_wei, size_wino_dst;
+
+    int nthr;
 };
 
 /*
@@ -267,6 +272,47 @@ struct jit_conv_winograd_conf_t : public jit_conv_conf_t {
     winograd_sched_t sched_policy;
 };
 
+struct jit_bin_conv_conf_t {
+    prop_kind_t prop_kind;
+    conv_version_t ver;
+    conv_loop_order_t loop_order;
+
+    int ndims;
+    int mb;
+    int ngroups, ic, oc, oc_padded, ic_padded;
+    int id, ih, iw, od, oh, ow;
+    int f_pad, l_pad, t_pad;
+    int back_pad, r_pad, b_pad;
+    int kd, kh, kw;
+    int stride_d, stride_h, stride_w;
+    int dilate_d, dilate_h, dilate_w;
+    memory_format_t src_fmt;
+    bool with_bias;
+    bool with_sum;
+    bool with_eltwise;
+    bool with_dw_conv;
+    bool with_binarization;
+
+    float pad_value;
+    bool exclude_pad;
+
+    int dw_conv_oh;
+    int dw_conv_ow;
+
+    int nb_ic, ic_block;
+    int nb_oc, oc_block;
+    int nb_ic_blocking, nb_oc_blocking; // blocking of nb_ic and nb_ic
+    int ur_h, ur_w;
+    int ur_w_tail;
+    int typesize_in;
+    int typesize_out;
+    int typesize_bia;
+    int typesize_acc;
+    data_type_t src_dt;
+    data_type_t bia_dt;
+    data_type_t dst_dt;
+};
+
 struct jit_conv_call_s {
     const void *src; /* hack, non-const for backward_data */
     const void *dst; /* hack, non-const for forward */
@@ -302,6 +348,7 @@ struct jit_conv_call_s {
     size_t ch_work;
     size_t t_overflow;
     size_t b_overflow;
+    size_t oh_blocks;
     int flags;
 
     const void *src_row0; /* hack, non-const for backward_data */
@@ -318,6 +365,9 @@ struct jit_deconv_call_s {
     const void *filt; /* hack, non-const for backward_weights */
     const void *bias; /* hack, non-const for backward_bias */
     const void *scales;
+    const void *compensation;
+    size_t t_overflow;
+    size_t b_overflow;
     size_t kh_padding;
     size_t oc_blocks;
 };
@@ -327,19 +377,12 @@ struct jit_dw_conv_call_s {
     const void *output;
     const void *filter;
     const void *bias;
-    union {
-        size_t table_flags; /* This allows both bytes to be read simultaneously
-                               */
-        struct {
-            unsigned char
-                    table_idx; /* Indicates the table entry for the
-                                        JIT-generated values that control the
-                                        inner loop execution. The entry is
-                                        determined by the oh_block exectuion. */
-            unsigned char
-                    exec_flag; /* Flags passed by driver execution to inner kernel */
-        };
-    };
+    size_t kh_count;
+    size_t oh_count;
+    size_t oh_index;
+    size_t filter_pad_off;
+    unsigned char
+            exec_flags; /* Flags passed by driver execution to inner kernel */
 };
 
 struct jit_wino_transform_call_s {
@@ -370,30 +413,13 @@ struct jit_1x1_conv_conf_t {
     int kh, kw;
     int stride_h, stride_w;
     memory_format_t src_fmt;
+    memory_format_t dst_fmt;
     bool with_bias;
     bool with_sum;
     bool with_eltwise;
     bool with_dw_conv;
 
-    alg_kind_t eltwise_alg;
-    float eltwise_alpha;
-    float eltwise_beta;
-    float eltwise_scale;
-
-    int dw_conv_in_h;
-    int dw_conv_in_w;
-    int dw_conv_ker_h;
-    int dw_conv_ker_w;
-    int dw_conv_str_h;
-    int dw_conv_str_w;
-    const float* dw_conv_weights;
-    const float* dw_conv_biases;
-
-    bool dw_conv_with_sum;
-    bool dw_conv_with_eltwise;
-    alg_kind_t dw_conv_eltwise_alg;
-    float dw_conv_eltwise_alpha;
-    float dw_conv_eltwise_beta;
+    post_ops_t::entry_t::eltwise_t eltwise;
 
     int is, os;
     int ic_block, oc_block;
@@ -427,10 +453,12 @@ struct jit_1x1_conv_conf_t {
     int tr_is;
     int nthr, nthr_mb, nthr_g, nthr_oc_b, nthr_ic_b;
     int is_oc_scale;
+    data_type_t src_dt;
     data_type_t bia_dt;
     data_type_t dst_dt;
     bool signed_input;
     float wei_adj_scale;
+    int dw_conv_oh, dw_conv_ow;
 
     /* u8s8s32x */
     int ic_dim, nb_ic, nb_ic_blocking, nb_ic_blocking_max;
@@ -454,8 +482,7 @@ struct jit_gemm_conv_conf_t {
     int stride_h, stride_w, stride_d;
     int dilate_h, dilate_w, dilate_d;
     memory_format_t src_fmt;
-    bool with_bias, with_relu;
-    float relu_negative_slope;
+    bool with_bias;
 
     int is, os, ks;
     int ic_block, oc_block;
@@ -465,6 +492,9 @@ struct jit_gemm_conv_conf_t {
     bool need_wei_reduction;
     bool signed_input;
     float wei_adj_scale;
+    int oh_block;
+    int ow_block;
+    bool outer_threading;
 };
 
 struct jit_1x1_conv_call_s {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_conv_kernel_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_conv_kernel_f32.cpp
index cbce262f0..3ba4715cc 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_conv_kernel_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_conv_kernel_f32.cpp
@@ -139,7 +139,7 @@ void jit_sse42_1x1_conv_kernel_f32::generate_reduce_loop(
         default:
             if (jcp.with_dw_conv)
                 return ptr[aux_reg_output_data +
-                           (i * jcp.dw_conv_ker_h * jcp.ow + j) * jcp.oc_block * sizeof(float) + n*4*sizeof(float)];
+                           (i * jcp_dw.kh * jcp.ow + j) * jcp.oc_block * sizeof(float) + n*4*sizeof(float)];
             else
                 return ptr[aux_reg_output_data +
                     (i * jcp.os + j) * jcp.oc_block * sizeof(float) + n*4*sizeof(float)];
@@ -185,7 +185,6 @@ void jit_sse42_1x1_conv_kernel_f32::generate_reduce_loop(
     }; // init()
 
     auto store = [=]() {
-        Label store_done;
         Label store_noadd;
 
         if (!jcp.with_sum) {
@@ -203,16 +202,13 @@ void jit_sse42_1x1_conv_kernel_f32::generate_reduce_loop(
 
         L(store_noadd);
 
-        Label store_norelu;
+        Label store_no_postops;
         test(reg_reduce_pos_flag, FLAG_REDUCE_LAST);
-        jz(store_norelu, T_NEAR);
+        jz(store_no_postops, T_NEAR);
 
         int eltwise_inj_idx = 0;
         int depthwise_inj_idx = 0;
         const auto &p = attr_.post_ops_;
-        if (p.len_ == 0 && eltwise_injectors.size() == 1) {
-            eltwise_injectors[0]->compute_vector_range(1, 2 * ur * load_loop_blk + 1);
-        }
 
         int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
         for (int i = 0; i < end_idx; i++) {
@@ -244,15 +240,13 @@ void jit_sse42_1x1_conv_kernel_f32::generate_reduce_loop(
             }
         }
 
-        L(store_norelu);
+        L(store_no_postops);
 
         for (int j = 0; j < ur; ++j)
             for (int i = 0; i < load_loop_blk; ++i) {
                 movups(output_ptr(i, j, 0), reg_accum(i, j, 0));
                 movups(output_ptr(i, j, 1), reg_accum(i, j, 1));
             }
-
-        L(store_done);
     };
 
     auto fma_block = [=](bool last_block) {
@@ -375,12 +369,6 @@ void jit_sse42_1x1_conv_kernel_f32::generate_diff_bias_loop(int load_loop_blk)
 
 void jit_sse42_1x1_conv_kernel_f32::generate()
 {
-    if (jcp.with_eltwise) {
-        eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<sse42>(
-                this, jcp.eltwise_alg, jcp.eltwise_alpha, 0
-        ));
-    }
-
     const auto &p = attr_.post_ops_;
     int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
     for (int i = 0; i < end_idx; i++) {
@@ -513,24 +501,15 @@ bool jit_sse42_1x1_conv_kernel_f32::post_ops_ok(
     auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
-        case 0: return true; // no post_ops
-        case 1:
-            return true // sum OR eltwise OR dw_conv
-                   && !jcp.with_eltwise && (is_simple(0) || is_sum(0) || is_dw_conv(0));
-        case 2:
-            return true // sum->eltwise OR dw_conv->eltwise OR eltwise->dw_conv OR dw_conv->sum OR sum->depthwise OR
-                   // eltwise->depthwise OR depthwise->depthwise
-                   && !jcp.with_eltwise && ((is_sum(0) && is_simple(1)) || (is_dw_conv(0) && is_eltwise(1)) ||
-                                            (is_eltwise(0) && is_dw_conv(1)) || (is_dw_conv(0) && is_sum(1)) ||
-                                            (is_simple(0) && is_simple(1)));
-        case 3:
-            return true // eltwise->dw_conv->eltwise OR dw_conv->sum->eltwise OR sum->eltwise->depthwise OR
-                   // sum->depthwise->eltwise OR sum->depthwise->depthwise
-                   && !jcp.with_eltwise && ((is_eltwise(0) && is_dw_conv(1) && is_eltwise(2)) ||
-                                            (is_dw_conv(0) && is_sum(1) && is_eltwise(2)) ||
-                                            (is_sum(0) && is_simple(1) && is_simple(2)));
-        case 4: return true // eltwise->dw_conv->sum->eltwise
-                       && !jcp.with_eltwise && (is_eltwise(0) && is_dw_conv(1) && is_sum(2) && is_eltwise(3));
+        case 0: return true;
+        case 1: return is_simple(0) || is_sum(0) || is_dw_conv(0);
+        case 2: return (is_sum(0) && is_simple(1)) || (is_dw_conv(0) && is_eltwise(1)) ||
+                       (is_eltwise(0) && is_dw_conv(1)) || (is_dw_conv(0) && is_sum(1)) ||
+                       (is_simple(0) && is_simple(1));
+        case 3: return (is_eltwise(0) && is_dw_conv(1) && is_eltwise(2)) ||
+                       (is_dw_conv(0) && is_sum(1) && is_eltwise(2)) ||
+                       (is_sum(0) && is_simple(1) && is_simple(2));
+        case 4: return (is_eltwise(0) && is_dw_conv(1) && is_sum(2) && is_eltwise(3));
         default: return false;
     }
 
@@ -540,7 +519,7 @@ bool jit_sse42_1x1_conv_kernel_f32::post_ops_ok(
 status_t jit_sse42_1x1_conv_kernel_f32::init_conf(jit_1x1_conv_conf_t &jcp,
         const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
         const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d,
-        const primitive_attr_t &attr, bool with_relu, float relu_negative_slope)
+        const primitive_attr_t &attr)
 {
     if (!mayiuse(sse42))
         return status::unimplemented;
@@ -576,47 +555,25 @@ status_t jit_sse42_1x1_conv_kernel_f32::init_conf(jit_1x1_conv_conf_t &jcp,
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
 
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alg = mkldnn_eltwise_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
-
     if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
 
     const auto &p = attr.post_ops_;
-    jcp.with_dw_conv = false;
-    int dw_conv_ind = p.find(primitive_kind::convolution);
-    if (dw_conv_ind != -1) {
-        jcp.with_dw_conv = true;
-        jcp.dw_conv_in_h = p.entry_[dw_conv_ind].dw_conv.in_h;
-        jcp.dw_conv_in_w = p.entry_[dw_conv_ind].dw_conv.in_w;
-        jcp.dw_conv_ker_h = p.entry_[dw_conv_ind].dw_conv.ker_h;
-        jcp.dw_conv_ker_w = p.entry_[dw_conv_ind].dw_conv.ker_w;
-        jcp.dw_conv_str_h = p.entry_[dw_conv_ind].dw_conv.str_h;
-        jcp.dw_conv_str_w = p.entry_[dw_conv_ind].dw_conv.str_w;
-        jcp.dw_conv_weights = p.entry_[dw_conv_ind].dw_conv.weights_data;
-        jcp.dw_conv_biases = p.entry_[dw_conv_ind].dw_conv.biases_data;
-    }
 
+    int dw_conv_ind = p.find(primitive_kind::convolution);
+    jcp.with_dw_conv = dw_conv_ind != -1;
     if (jcp.with_dw_conv) {
-        int dw_conv_eltwise_ind = p.find(primitive_kind::eltwise, dw_conv_ind);
-        if (dw_conv_eltwise_ind != -1) {
-            jcp.dw_conv_with_eltwise = true;
-            jcp.dw_conv_eltwise_alg = p.entry_[dw_conv_eltwise_ind].eltwise.alg;
-            jcp.dw_conv_eltwise_alpha = p.entry_[dw_conv_eltwise_ind].eltwise.alpha;
-            jcp.dw_conv_eltwise_beta = p.entry_[dw_conv_eltwise_ind].eltwise.beta;
-        }
+        jcp.dw_conv_oh = jcp.oh;
+        jcp.dw_conv_ow = jcp.ow;
+        jcp.oh = p.entry_[dw_conv_ind].dw_conv.in_h;
+        jcp.ow = p.entry_[dw_conv_ind].dw_conv.in_w;
     }
 
     jcp.with_sum = p.find(primitive_kind::sum, 0, dw_conv_ind) != -1;
-    if (jcp.with_dw_conv) {
-        jcp.dw_conv_with_sum = p.find(primitive_kind::sum, dw_conv_ind) != -1;
-    }
 
-    if (jcp.with_dw_conv) {
-        jcp.oh = jcp.dw_conv_in_h;
-        jcp.ow = jcp.dw_conv_in_w;
-    }
+    jcp.src_dt = cd.src_desc.data_type;
+    jcp.bia_dt = jcp.with_bias ? cd.bias_desc.data_type : data_type::undef;
+    jcp.dst_dt = cd.dst_desc.data_type;
 
     jcp.os = jcp.oh * jcp.ow;
     jcp.is = jcp.ih * jcp.iw;
@@ -791,6 +748,24 @@ status_t jit_sse42_1x1_conv_kernel_f32::init_conf(jit_1x1_conv_conf_t &jcp,
     return status::success;
 }
 
+void jit_sse42_1x1_conv_kernel_f32::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad,
+        const jit_1x1_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw) {
+    using namespace mkldnn::impl::memory_tracking::names;
+
+    if (jcp.prop_kind != backward_data && jcp.oc != jcp.oc_without_padding)
+        scratchpad.book(key_conv_padded_bias, sizeof(float) * jcp.oc);
+
+    if (jcp.with_dw_conv) {
+        const int nthreads = mkldnn_get_max_threads();
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block * (jcp.oc / jcp.oc_block);
+        scratchpad.book(key_dw_conv_buffer, sizeof(float) * dw_conv_buffer_size_ * nthreads);
+
+        if (jcp.oc != jcp.oc_without_padding)
+            scratchpad.book(key_dw_conv_padded_bias, sizeof(float) * jcp.oc);
+    }
+}
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_conv_kernel_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_conv_kernel_f32.hpp
index f2b7edd87..f41daf1b8 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_conv_kernel_f32.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_conv_kernel_f32.hpp
@@ -18,9 +18,9 @@
 #define JIT_SSE42_1x1_CONV_KERNEL_F32_HPP
 
 #include "c_types_map.hpp"
+#include "cpu_memory.hpp"
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
-#include "cpu_memory.hpp"
 #include "jit_uni_eltwise.hpp"
 #include "jit_uni_depthwise.hpp"
 
@@ -29,8 +29,10 @@ namespace impl {
 namespace cpu {
 
 struct jit_sse42_1x1_conv_kernel_f32: public jit_generator {
-    jit_sse42_1x1_conv_kernel_f32(jit_1x1_conv_conf_t ajcp,
-            const primitive_attr_t &attr): jcp(ajcp), attr_(attr) {
+    jit_sse42_1x1_conv_kernel_f32(jit_1x1_conv_conf_t ajcp, jit_conv_conf_t ajcp_dw,
+            const primitive_attr_t &attr)
+        : jcp(ajcp), jcp_dw(ajcp_dw), attr_(attr)
+    {
         this->generate();
         jit_ker = (void (*)(jit_1x1_conv_call_s *))this->getCode();
     }
@@ -53,22 +55,15 @@ struct jit_sse42_1x1_conv_kernel_f32: public jit_generator {
             const memory_desc_wrapper &src_d,
             const memory_desc_wrapper &weights_d,
             const memory_desc_wrapper &dst_d,
-            const primitive_attr_t &attr,
-            bool with_relu, float relu_negative_slope);
+            const primitive_attr_t &attr);
 
-    static status_t init_conf(jit_1x1_conv_conf_t &jcp,
-            const convolution_desc_t &cd,
-            const memory_desc_wrapper &src_d,
-            const memory_desc_wrapper &weights_d,
-            const memory_desc_wrapper &dst_d,
-            const primitive_attr_t &attr)
-    {
-        return init_conf(jcp, cd, src_d, weights_d, dst_d, attr, false, 0.0);
-    }
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_1x1_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw = jit_conv_conf_t());
 
     DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse42_1x1_conv_kernel_f32)
 
     jit_1x1_conv_conf_t jcp;
+    jit_conv_conf_t jcp_dw;
     const primitive_attr_t &attr_;
     void (*jit_ker)(jit_1x1_conv_call_s *);
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_convolution.cpp
index 3b95a103b..2fe6e8f7e 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_convolution.cpp
@@ -34,36 +34,38 @@ namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
-template <bool with_relu>
-void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward() {
+void jit_sse42_1x1_convolution_fwd_t::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
 
     const int ndims = src_d.ndims();
     const auto &jcp = kernel_->jcp;
-    int MB = conf_.MB();
+    int MB = pd()->MB();
 
     const int work_amount = MB * jcp.ngroups * jcp.nb_bcast;
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad().get<data_t>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, bias, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bias = padded_bias;
     }
 
     parallel(0, [&](const int ithr, const int nthr) {
         // TODO (Roma): remove this restriction
         assert(jcp.stride_w == 1 && jcp.stride_h == 1);
 
-        jit_1x1_conv_call_s par_conv = {};
+        auto par_conv = jit_1x1_conv_call_s();
 
         const int nb_oc = jcp.nb_load;
         const int nb_ic = jcp.nb_reduce;
@@ -120,7 +122,7 @@ void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward() {
                     const size_t src_off = data_blk_off(src_d, n, _icb, ih, iw);
                     par_conv.bcast_data = &src[src_off];
 
-                    par_conv.load_data = &weights[conf_.with_groups()
+                    par_conv.load_data = &weights[pd()->with_groups()
                         ? weights_d.blk_off(g, ocb, icb)
                         : weights_d.blk_off(ocb, icb)];
 
@@ -135,22 +137,25 @@ void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward() {
             iwork += bcast_step;
         }
     });
+
+    if (pd()->wants_zero_pad_dst())
+        output_memory_primitive(0)->zero_pad();
 }
 
-template <bool with_relu>
-void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
+void jit_sse42_1x1_convolution_fwd_t::execute_forward_with_dw_conv() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
 
-    auto &jcp = kernel_->jcp;
-    int MB = conf_.MB();
+    const auto &jcp = kernel_->jcp;
+    const auto &jcp_dw = kernel_dw_->jcp;
+    int MB = pd()->MB();
 
-    auto dw_bias = jcp.dw_conv_biases;
+    auto dw_bias = jcp_dw.conv_biases;
 
     int ocb_work = jcp.with_dw_conv ? utils::div_up(jcp.nb_load, jcp.nb_load_blocking) : 1;
     const int work_amount = MB * jcp.ngroups * ocb_work * jcp.nb_bcast;
@@ -173,8 +178,8 @@ void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
 
                 if ((oh + h) < 0 || (oh + h) >= jcp.ih) {
                     for (int chb = ocb; chb < ocb + load_step; chb++) {
-                        memset(ws_p + (((oh + h) + 1) % jcp.dw_conv_ker_h) * jcp.ow * jcp.oc_block +
-                               (chb - ocb) * jcp.dw_conv_ker_h * jcp.ow * jcp.oc_block, 0, jcp.ow * jcp.oc_block * sizeof(float));
+                        memset(ws_p + (((oh + h) + 1) % jcp_dw.kh) * jcp.ow * jcp.oc_block +
+                               (chb - ocb) * jcp_dw.kh * jcp.ow * jcp.oc_block, 0, jcp.ow * jcp.oc_block * sizeof(float));
                     }
                 } else {
                     const int _ocb = g * jcp.nb_load + ocb;
@@ -182,7 +187,7 @@ void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                     p.bcast_dim = this_block_size(os, jcp.os, bcast_step * os_block);
                     p.load_dim = this_block_size(ocb * jcp.oc_block, jcp.oc, load_step * jcp.oc_block);
 
-                    p.output_data = &ws_p[(((oh + h) + 1) % jcp.dw_conv_ker_h) * jcp.ow * jcp.oc_block];
+                    p.output_data = &ws_p[(((oh + h) + 1) % jcp_dw.kh) * jcp.ow * jcp.oc_block];
 
                     p.bias_data = &bias[_ocb * jcp.oc_block];
 
@@ -194,7 +199,7 @@ void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
 
                         p.reduce_dim = this_block_size(icb * jcp.ic_block, jcp.ic,
                                                        jcp.nb_reduce_blocking * jcp.ic_block);
-                        p.load_data = &weights[conf_.with_groups()
+                        p.load_data = &weights[pd()->with_groups()
                                                ? weights_d.blk_off(g, ocb, icb)
                                                : weights_d.blk_off(ocb, icb)];
 
@@ -210,8 +215,6 @@ void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
         };
 
         auto compute_row_dw = [&](const float* ws_p, int n, int ocb, int load_step, int dst_idx) {
-            const auto &jcp_dw = kernel_dw_->jcp;
-
             for (int chb = ocb; chb < ocb + load_step; chb++) {
                 auto par_conv_dw = jit_conv_call_s();
 
@@ -226,9 +229,11 @@ void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                                        dst_idx/jcp_dw.stride_h*jcp_dw.ow*jcp_dw.ch_block];
 
                 par_conv_dw.kh_padding = jcp_dw.kh;
-                par_conv_dw.filt = &jcp.dw_conv_weights[chb * jcp_dw.kh * jcp_dw.kw * jcp_dw.ch_block];
+                par_conv_dw.filt = &jcp_dw.conv_weights[chb * jcp_dw.kh * jcp_dw.kw * jcp_dw.ch_block];
                 par_conv_dw.bias = &dw_bias[chb * jcp_dw.ch_block];
                 par_conv_dw.ur_w = (size_t)(jcp_dw.ow);
+                par_conv_dw.oc_work = nstl::min((chb + 1) * jcp_dw.ch_block, (int)jcp_dw.oc) - chb*jcp_dw.ch_block;
+                par_conv_dw.oc_off = chb * jcp_dw.ch_block * sizeof(float);
 
                 kernel_dw_->jit_ker(&par_conv_dw);
             }
@@ -239,11 +244,12 @@ void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
         int start{0}, end{0};
         balance211(work_amount, nthr, ithr, start, end);
 
-        auto pbuf = dw_conv_buffer_ + ithr * dw_conv_buffer_size_;
+        auto dw_conv_buffer = scratchpad().get<data_t>(key_dw_conv_buffer);
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block * (jcp.oc / jcp.oc_block);
+        auto pbuf = dw_conv_buffer + ithr * dw_conv_buffer_size_;
 
         const int os_block = jcp.iw;
 
-
         int iwork = start;
         while (iwork < end) {
             int n{0}, g{0}, ocbb{0}, osb{0};
@@ -272,7 +278,7 @@ void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                 compute_block_1x1(pbuf, n, g, oh + 1, ow, ih, iw, os, os_block, bcast_step, ocb, load_step, bcast_step);
             }
 
-            if ((oh % jcp.dw_conv_str_h == 0)) {
+            if ((oh % jcp_dw.stride_h == 0)) {
                 compute_row_dw(pbuf, n, ocb, load_step, oh);
             }
 
@@ -280,23 +286,25 @@ void _jit_sse42_1x1_convolution_fwd_t<with_relu>::execute_forward_fusing() {
         }
     };
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
-
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            dw_padded_bias_[oc] = dw_bias[oc];
-        dw_bias = dw_padded_bias_;
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad().get<data_t>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, bias, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bias = padded_bias;
+
+        auto dw_padded_bias = scratchpad().get<data_t>(key_dw_conv_padded_bias);
+        utils::array_copy(dw_padded_bias, dw_bias, jcp.oc_without_padding);
+        utils::array_set(dw_padded_bias + jcp.oc_without_padding, 0.f,
+                         jcp.oc - jcp.oc_without_padding);
+        dw_bias = dw_padded_bias;
     }
 
     parallel(0, ker);
-}
 
-template void _jit_sse42_1x1_convolution_fwd_t<true>::execute_forward();
-template void _jit_sse42_1x1_convolution_fwd_t<false>::execute_forward();
-template void _jit_sse42_1x1_convolution_fwd_t<true>::execute_forward_fusing();
-template void _jit_sse42_1x1_convolution_fwd_t<false>::execute_forward_fusing();
+    if (pd()->wants_zero_pad_dst())
+        output_memory_primitive(0)->zero_pad();
+}
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_convolution.hpp
index a98619d37..59311024a 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_1x1_convolution.hpp
@@ -20,7 +20,6 @@
 #include "c_types_map.hpp"
 #include "cpu_convolution_pd.hpp"
 #include "cpu_engine.hpp"
-#include "cpu_reducer.hpp"
 #include "jit_sse42_1x1_conv_kernel_f32.hpp"
 #include "mkldnn_thread.hpp"
 #include "utils.hpp"
@@ -30,65 +29,59 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu>
-struct _jit_sse42_1x1_convolution_fwd_t: public cpu_primitive_t {
+struct jit_sse42_1x1_convolution_fwd_t: public cpu_primitive_t {
     // TODO: (Roma) Code duplication duplication! Remove with templates
     //              (maybe...)!
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
+    struct pd_t: public cpu_convolution_fwd_pd_t {
         pd_t(engine_t *engine,
-                const typename pd_t::base_desc_t *adesc,
+                const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
-            , jcp_(), jcp_dw() {}
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+            , jcp_(), jcp_dw_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_1x1:", sse42, ""),
-                _jit_sse42_1x1_convolution_fwd_t<with_relu>);
+                jit_sse42_1x1_convolution_fwd_t);
 
         virtual status_t init() override {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                         forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
-                        this->cdesc_().src_desc.data_type,
-                        this->cdesc_().weights_desc.data_type,
-                        this->cdesc_().dst_desc.data_type)
+                        this->desc()->src_desc.data_type,
+                        this->desc()->weights_desc.data_type,
+                        this->desc()->dst_desc.data_type)
                 && IMPLICATION(this->with_bias(),
-                        data_type::f32 == this->cdesc_().bias_desc.data_type);
+                        data_type::f32 == this->desc()->bias_desc.data_type);
             if (!ok) return status::unimplemented;
 
             status_t sts_1x1 = jit_sse42_1x1_conv_kernel_f32::init_conf(jcp_,
-                    this->cdesc_(),
+                    *this->desc(),
                     *this->src_pd_.desc(), *this->weights_pd_.desc(),
-                    *this->dst_pd_.desc(), *this->attr(), with_relu,
-                    this->negative_slope());
+                    *this->dst_pd_.desc(), *this->attr());
             if (sts_1x1 != status::success) return sts_1x1;
 
             if (jcp_.with_dw_conv) {
-                int dw_conv_oh = (jcp_.oh - ((jcp_.dw_conv_ker_h - 1) + 1) + 2) / jcp_.dw_conv_str_h + 1;
-                int dw_conv_ow = (jcp_.ow - ((jcp_.dw_conv_ker_w - 1) + 1) + 2) / jcp_.dw_conv_str_w + 1;
-
-                status_t sts_dw = jit_uni_dw_conv_row_f32<sse42>::init_conf(jcp_dw,
-                                                                           jcp_.oc, jcp_.oh, jcp_.ow, dw_conv_oh, dw_conv_ow,
-                                                                           jcp_.dw_conv_ker_h, jcp_.dw_conv_ker_w,
-                                                                           jcp_.dw_conv_str_h, jcp_.dw_conv_str_w,
-                                                                           jcp_.dw_conv_eltwise_alg, jcp_.dw_conv_eltwise_alpha,
-                                                                           jcp_.dw_conv_eltwise_beta, jcp_.dw_conv_with_sum);
+                status_t sts_dw = jit_uni_dw_conv_row_f32<sse42>::init_conf(jcp_, jcp_dw_, *this->attr());
                 if (sts_dw != status::success) return sts_dw;
             }
 
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_sse42_1x1_conv_kernel_f32::init_scratchpad(scratchpad, jcp_, jcp_dw_);
+
             return status::success;
         }
 
         jit_1x1_conv_conf_t jcp_;
-        jit_conv_conf_t jcp_dw;
+        jit_conv_conf_t jcp_dw_;
 
     protected:
         virtual status_t set_default_params() override {
@@ -105,56 +98,36 @@ struct _jit_sse42_1x1_convolution_fwd_t: public cpu_primitive_t {
                     : utils::pick(this->ndims() - 3, OIw8i8o, OIhw8i8o)));
             if (this->bias_pd_.desc()->format == any)
                 CHECK(this->bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
 
-    _jit_sse42_1x1_convolution_fwd_t(const pd_t *pd,
+    jit_sse42_1x1_convolution_fwd_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd),
-        dw_conv_buffer_size_(0), dw_conv_buffer_(nullptr), padded_bias_(nullptr), dw_padded_bias_(nullptr)
+        : cpu_primitive_t(apd, inputs, outputs)
     {
-        kernel_ = new jit_sse42_1x1_conv_kernel_f32(conf_.jcp_, *conf_.attr());
-        if (conf_.jcp_.with_dw_conv) {
-            kernel_dw_ = new jit_uni_dw_conv_row_f32<sse42>(conf_.jcp_dw);
-
-            const int nthreads = mkldnn_get_max_threads();
-            dw_conv_buffer_size_ = (size_t) conf_.jcp_dw.kh * conf_.jcp_dw.iw * conf_.jcp_dw.ch_block *
-                                   (conf_.jcp_.oc / conf_.jcp_.oc_block);
-            dw_conv_buffer_ = (data_t *) malloc(dw_conv_buffer_size_ * nthreads * sizeof(data_t), 64);
-
-        }
-
-        if (conf_.want_padded_bias()) {
-            const auto &j = conf_.jcp_;
-            assert(j.ngroups == 1);
-            padded_bias_ = (data_t *)malloc(sizeof(data_t) * j.oc, 64);
-            for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-                padded_bias_[oc] = 0;
+        kernel_ = new jit_sse42_1x1_conv_kernel_f32(pd()->jcp_, pd()->jcp_dw_, *pd()->attr());
 
-            dw_padded_bias_ = (data_t *)malloc(sizeof(data_t) * j.oc, 64);
-            for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-                dw_padded_bias_[oc] = 0;
+        if (pd()->jcp_.with_dw_conv) {
+            kernel_dw_ = new jit_uni_dw_conv_row_f32<sse42>(pd()->jcp_dw_, *pd()->attr(), pd()->jcp_dw_.ch_block);
         }
     }
 
-    ~_jit_sse42_1x1_convolution_fwd_t() {
+    ~jit_sse42_1x1_convolution_fwd_t() {
         delete kernel_;
 
-        if (conf_.jcp_.with_dw_conv) {
+        if (pd()->jcp_.with_dw_conv) {
             delete kernel_dw_;
-            free(dw_conv_buffer_);
-            free(dw_padded_bias_);
         }
-
-        free(padded_bias_);
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        if (conf_.jcp_.with_dw_conv)
-            execute_forward_fusing();
+    virtual void execute(event_t *e) const {
+        if (pd()->jcp_.with_dw_conv)
+            execute_forward_with_dw_conv();
         else
             execute_forward();
 
@@ -162,24 +135,14 @@ struct _jit_sse42_1x1_convolution_fwd_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_forward();
-    void execute_forward_fusing();
+    void execute_forward() const;
+    void execute_forward_with_dw_conv() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    pd_t conf_;
     jit_sse42_1x1_conv_kernel_f32 *kernel_;
     jit_uni_dw_conv_row_f32<sse42> *kernel_dw_;
-
-    /* fuse with dw conv */
-    size_t dw_conv_buffer_size_;
-    data_t *dw_conv_buffer_;
-
-    data_t *padded_bias_;
-    data_t *dw_padded_bias_;
 };
 
-using jit_sse42_1x1_convolution_fwd_t = _jit_sse42_1x1_convolution_fwd_t<false>;
-using jit_sse42_1x1_convolution_relu_t = _jit_sse42_1x1_convolution_fwd_t<true>;
-
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_conv_kernel_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_conv_kernel_f32.cpp
index 32f1903be..c19250433 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_conv_kernel_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_conv_kernel_f32.cpp
@@ -29,6 +29,7 @@ namespace cpu {
 
 using namespace mkldnn::impl::prop_kind;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 using namespace Xbyak;
@@ -170,7 +171,7 @@ void jit_sse42_conv_fwd_kernel_f32::width_blk_step(int ur_w,
         for (int jj = 0; jj < ur_w; jj++) {
             int o_off;
             if (jcp.with_dw_conv)
-                o_off = (ii * jcp.dw_conv_ker_h * ow + jj) * oc_blk;
+                o_off = (ii * jcp_dw.kh * ow + jj) * oc_blk;
             else
                 o_off = (ii * oh * ow + jj) * oc_blk;
 
@@ -206,7 +207,8 @@ void jit_sse42_conv_fwd_kernel_f32::width_blk_step(int ur_w,
 
     Label skip_kh_loop;
     mov(kj, reg_kh);
-    if ((jcp.kh - 1) * (jcp.dilate_h + 1) < nstl::max(jcp.t_pad, jcp.b_pad)) {
+    if ((jcp.dilate_h >= jcp.ih)
+            || (jcp.kh - 1) * (jcp.dilate_h + 1) < nstl::max(jcp.t_pad, jcp.b_pad)) {
         cmp(kj, 0);
         je(skip_kh_loop, T_NEAR);
     }
@@ -240,10 +242,6 @@ void jit_sse42_conv_fwd_kernel_f32::width_blk_step(int ur_w,
     int depthwise_inj_idx = 0;
     const auto &p = attr_.post_ops_;
 
-    if (p.len_ == 0 && eltwise_injectors.size() == 1) {
-        eltwise_injectors[0]->compute_vector_range(1, oc_blocks * ur_w + 1);
-    }
-
     int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
     for (int i = 0; i < end_idx; i++) {
         auto& post_op = p.entry_[i];
@@ -275,7 +273,7 @@ void jit_sse42_conv_fwd_kernel_f32::width_blk_step(int ur_w,
         for (int jj = 0; jj < ur_w; jj++) {
             int o_off;
             if (jcp.with_dw_conv)
-                o_off = (ii * jcp.dw_conv_ker_h * ow + jj) * oc_blk;
+                o_off = (ii * jcp_dw.kh * ow + jj) * oc_blk;
             else
                 o_off = (ii * oh * ow + jj) * oc_blk;
 
@@ -284,8 +282,6 @@ void jit_sse42_conv_fwd_kernel_f32::width_blk_step(int ur_w,
         }
     }
 
-    L(done);
-
     mov(aux_reg_kernel, reg_kernel);
     mov(aux_reg_input, reg_input);
     add(aux_reg_kernel, sizeof(float) * 4);
@@ -359,12 +355,6 @@ inline void jit_sse42_conv_fwd_kernel_f32::solve_common(int oc_blocks)
 
 void jit_sse42_conv_fwd_kernel_f32::generate()
 {
-    if (jcp.with_eltwise) {
-        eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<sse42>(
-                this, jcp.eltwise_alg, jcp.eltwise_alpha, 0
-        ));
-    }
-
     const auto &p = attr_.post_ops_;
     int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
     for (int i = 0; i < end_idx; i++) {
@@ -431,24 +421,15 @@ bool jit_sse42_conv_fwd_kernel_f32::post_ops_ok(
     auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
-        case 0: return true; // no post_ops
-        case 1:
-            return true // sum OR eltwise OR dw_conv
-                   && !jcp.with_eltwise && (is_simple(0) || is_sum(0) || is_dw_conv(0));
-        case 2:
-            return true // sum->eltwise OR dw_conv->eltwise OR eltwise->dw_conv OR dw_conv->sum OR sum->depthwise OR
-                   // eltwise->depthwise OR depthwise->depthwise
-                   && !jcp.with_eltwise && ((is_sum(0) && is_simple(1)) || (is_dw_conv(0) && is_eltwise(1)) ||
-                                            (is_eltwise(0) && is_dw_conv(1)) || (is_dw_conv(0) && is_sum(1)) ||
-                                            (is_simple(0) && is_simple(1)));
-        case 3:
-            return true // eltwise->dw_conv->eltwise OR dw_conv->sum->eltwise OR sum->eltwise->depthwise OR
-                   // sum->depthwise->eltwise OR sum->depthwise->depthwise
-                   && !jcp.with_eltwise && ((is_eltwise(0) && is_dw_conv(1) && is_eltwise(2)) ||
-                                            (is_dw_conv(0) && is_sum(1) && is_eltwise(2)) ||
-                                            (is_sum(0) && is_simple(1) && is_simple(2)));
-        case 4: return true // eltwise->dw_conv->sum->eltwise
-                       && !jcp.with_eltwise && (is_eltwise(0) && is_dw_conv(1) && is_sum(2) && is_eltwise(3));
+        case 0: return true;
+        case 1: return is_simple(0) || is_sum(0) || is_dw_conv(0);
+        case 2: return (is_sum(0) && is_simple(1)) || (is_dw_conv(0) && is_eltwise(1)) ||
+                       (is_eltwise(0) && is_dw_conv(1)) || (is_dw_conv(0) && is_sum(1)) ||
+                       (is_simple(0) && is_simple(1));
+        case 3: return (is_eltwise(0) && is_dw_conv(1) && is_eltwise(2)) ||
+                       (is_dw_conv(0) && is_sum(1) && is_eltwise(2)) ||
+                       (is_sum(0) && is_simple(1) && is_simple(2));
+        case 4: return (is_eltwise(0) && is_dw_conv(1) && is_sum(2) && is_eltwise(3));
         default: return false;
     }
 
@@ -458,7 +439,7 @@ bool jit_sse42_conv_fwd_kernel_f32::post_ops_ok(
 status_t jit_sse42_conv_fwd_kernel_f32::init_conf(jit_conv_conf_t &jcp,
         const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
         const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d,
-        const primitive_attr_t &attr, bool with_relu, float relu_negative_slope)
+        const primitive_attr_t &attr)
 {
     if (!mayiuse(sse42)) return status::unimplemented;
 
@@ -496,47 +477,26 @@ status_t jit_sse42_conv_fwd_kernel_f32::init_conf(jit_conv_conf_t &jcp,
 
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alg = mkldnn_eltwise_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
 
     if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
 
     const auto &p = attr.post_ops_;
-    jcp.with_dw_conv = false;
-    int dw_conv_ind = p.find(primitive_kind::convolution);
-    if (dw_conv_ind != -1) {
-        jcp.with_dw_conv = true;
-        jcp.dw_conv_in_h = p.entry_[dw_conv_ind].dw_conv.in_h;
-        jcp.dw_conv_in_w = p.entry_[dw_conv_ind].dw_conv.in_w;
-        jcp.dw_conv_ker_h = p.entry_[dw_conv_ind].dw_conv.ker_h;
-        jcp.dw_conv_ker_w = p.entry_[dw_conv_ind].dw_conv.ker_w;
-        jcp.dw_conv_str_h = p.entry_[dw_conv_ind].dw_conv.str_h;
-        jcp.dw_conv_str_w = p.entry_[dw_conv_ind].dw_conv.str_w;
-        jcp.dw_conv_weights = p.entry_[dw_conv_ind].dw_conv.weights_data;
-        jcp.dw_conv_biases = p.entry_[dw_conv_ind].dw_conv.biases_data;
-    }
 
+    int dw_conv_ind = p.find(primitive_kind::convolution);
+    jcp.with_dw_conv = dw_conv_ind != -1;
     if (jcp.with_dw_conv) {
-        int dw_conv_eltwise_ind = p.find(primitive_kind::eltwise, dw_conv_ind);
-        if (dw_conv_eltwise_ind != -1) {
-            jcp.dw_conv_with_eltwise = true;
-            jcp.dw_conv_eltwise_alg = p.entry_[dw_conv_eltwise_ind].eltwise.alg;
-            jcp.dw_conv_eltwise_alpha = p.entry_[dw_conv_eltwise_ind].eltwise.alpha;
-            jcp.dw_conv_eltwise_beta = p.entry_[dw_conv_eltwise_ind].eltwise.beta;
-        }
+        jcp.dw_conv_oh = jcp.oh;
+        jcp.dw_conv_ow = jcp.ow;
+        jcp.oh = p.entry_[dw_conv_ind].dw_conv.in_h;
+        jcp.ow = p.entry_[dw_conv_ind].dw_conv.in_w;
     }
 
     jcp.with_sum = p.find(primitive_kind::sum, 0, dw_conv_ind) != -1;
-    if (jcp.with_dw_conv) {
-        jcp.dw_conv_with_sum = p.find(primitive_kind::sum, dw_conv_ind) != -1;
-    }
 
-    if (jcp.with_dw_conv) {
-        jcp.oh = jcp.dw_conv_in_h;
-        jcp.ow = jcp.dw_conv_in_w;
-    }
+    jcp.src_dt = cd.src_desc.data_type;
+    jcp.bia_dt = jcp.with_bias ? cd.bias_desc.data_type : data_type::undef;
+    jcp.dst_dt = cd.dst_desc.data_type;
 
     const bool flat = jcp.ic == 3 || jcp.ic == 1;
     const bool mimo = !flat;
@@ -613,6 +573,21 @@ status_t jit_sse42_conv_fwd_kernel_f32::init_conf(jit_conv_conf_t &jcp,
     return status::success;
 }
 
+void jit_sse42_conv_fwd_kernel_f32::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw) {
+    if (jcp.with_bias && jcp.oc != jcp.oc_without_padding)
+        scratchpad.book(key_conv_padded_bias, sizeof(float) * jcp.oc);
+
+    if (jcp.with_dw_conv) {
+        const int nthreads = mkldnn_get_max_threads();
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block * jcp.nb_oc_blocking;
+        scratchpad.book(key_dw_conv_buffer, sizeof(float) * dw_conv_buffer_size_ * nthreads);
+
+        if (jcp.oc != jcp.oc_without_padding)
+            scratchpad.book(key_dw_conv_padded_bias, sizeof(float) * jcp.oc);
+    }
+}
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_conv_kernel_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_conv_kernel_f32.hpp
index ea30028a4..f30952f46 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_conv_kernel_f32.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_conv_kernel_f32.hpp
@@ -18,9 +18,9 @@
 #define JIT_SSE42_CONV_KERNEL_F32_HPP
 
 #include "c_types_map.hpp"
+#include "cpu_memory.hpp"
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
-#include "cpu_memory.hpp"
 #include "jit_uni_eltwise.hpp"
 #include "jit_uni_depthwise.hpp"
 
@@ -29,8 +29,9 @@ namespace impl {
 namespace cpu {
 
 struct jit_sse42_conv_fwd_kernel_f32: public jit_generator {
-    jit_sse42_conv_fwd_kernel_f32(jit_conv_conf_t ajcp,
-            const primitive_attr_t &attr): jcp(ajcp), attr_(attr)
+    jit_sse42_conv_fwd_kernel_f32(jit_conv_conf_t ajcp, jit_conv_conf_t ajcp_dw,
+            const primitive_attr_t &attr)
+        : jcp(ajcp), jcp_dw(ajcp_dw), attr_(attr)
     {
         this->generate();
         jit_ker = (void (*)(jit_conv_call_s *))this->getCode();
@@ -52,11 +53,13 @@ struct jit_sse42_conv_fwd_kernel_f32: public jit_generator {
     static status_t init_conf(jit_conv_conf_t &jcp,
             const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
             const memory_desc_wrapper &weights_d,
-            const memory_desc_wrapper &dst_d, const primitive_attr_t &attr,
-            bool with_relu = false, float relu_negative_slope = 0.);
+            const memory_desc_wrapper &dst_d, const primitive_attr_t &attr);
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw = jit_conv_conf_t());
 
     DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse42_conv_fwd_kernel_f32)
     jit_conv_conf_t jcp;
+    jit_conv_conf_t jcp_dw;
     const primitive_attr_t &attr_;
     void (*jit_ker)(jit_conv_call_s *);
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_convolution.cpp
index a37c31763..e025af7f0 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_convolution.cpp
@@ -27,44 +27,46 @@ namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 #define src_blk_off(f, n, c, h, w) \
-    (conf_.ndims() == 3) \
+    (pd()->ndims() == 3) \
     ? (f).blk_off(n, c, w) \
     : (f).blk_off(n, c, h, w)
 
 #define wht_blk_off_(f, g, ...) \
-    conf_.with_groups() \
+    pd()->with_groups() \
     ? (f).blk_off(g, __VA_ARGS__) \
     : (f).blk_off(__VA_ARGS__)
 #define wht_blk_off(f, g, oc, ic, kh, kw) \
-        conf_.ndims() == 3 \
+        pd()->ndims() == 3 \
         ? wht_blk_off_(f, g, oc, ic, kw) \
         : wht_blk_off_(f, g, oc, ic, kh, kw)
 
-template <bool with_relu>
-void _jit_sse42_convolution_fwd_t<with_relu>::execute_forward() {
+void jit_sse42_convolution_fwd_t::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
     const auto &jcp = kernel_->jcp;
-    int MB = conf_.MB();
+    int MB = pd()->MB();
 
     int ocb_work = div_up(jcp.nb_oc, jcp.nb_oc_blocking);
     const size_t work_amount = MB * jcp.ngroups * ocb_work * jcp.oh;
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad().get<data_t>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, bias, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bias = padded_bias;
     }
 
     parallel(0, [&](const int ithr, const int nthr) {
@@ -86,7 +88,7 @@ void _jit_sse42_convolution_fwd_t<with_relu>::execute_forward() {
                 int ocb_num = jcp.nb_oc_blocking;
 
                 for (int icb = icbb; icb < icbb + icb_step; ++icb) {
-                    jit_conv_call_s par_conv = {};
+                    auto par_conv = jit_conv_call_s();
 
                     const int ij = oh * jcp.stride_h;
                     const int i_t_overflow = nstl::max(0, jcp.t_pad - ij);
@@ -138,24 +140,26 @@ void _jit_sse42_convolution_fwd_t<with_relu>::execute_forward() {
             icbb += icb_step;
         }
     });
+
+    if (pd()->wants_zero_pad_dst())
+        output_memory_primitive(0)->zero_pad();
 }
 
-template <bool with_relu>
-void _jit_sse42_convolution_fwd_t<with_relu>::execute_forward_fusing() {
+void jit_sse42_convolution_fwd_t::execute_forward_with_dw_conv() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
     const auto &jcp = kernel_->jcp;
     const auto &jcp_dw = kernel_dw_->jcp;
-    int MB = conf_.MB();
+    int MB = pd()->MB();
 
-    auto dw_bias = jcp.dw_conv_biases;
+    auto dw_bias = jcp_dw.conv_biases;
 
     int ocb_work = div_up(jcp.nb_oc, jcp.nb_oc_blocking);
     const size_t work_amount = MB * jcp.ngroups * ocb_work * jcp.oh;
@@ -165,8 +169,8 @@ void _jit_sse42_convolution_fwd_t<with_relu>::execute_forward_fusing() {
             for (int h = 0; h < num_rows; h++) {
                 if ((oh + h) < 0 || (oh + h) >= jcp.oh) {
                     for (int chb = ocb; chb < ocb + ocb_num; chb++) {
-                        memset(ws_p + (((oh + h) + 1) % jcp.dw_conv_ker_h) * jcp.ow * jcp.oc_block +
-                               (chb - ocb) * jcp.dw_conv_ker_h * jcp.ow * jcp.oc_block, 0, jcp.ow * jcp.oc_block * sizeof(float));
+                        memset(ws_p + (((oh + h) + 1) % jcp_dw.kh) * jcp.ow * jcp.oc_block +
+                               (chb - ocb) * jcp_dw.kh * jcp.ow * jcp.oc_block, 0, jcp.ow * jcp.oc_block * sizeof(float));
                     }
                 } else {
                     for (int icb = 0; icb < jcp.nb_ic; ++icb) {
@@ -187,11 +191,11 @@ void _jit_sse42_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                         par_conv.src = &src[src_d.blk_off(n,
                                                           jcp.ic == 3 ? 0 : _ic, ih, 0)];
 
-                        par_conv.dst = &ws_p[(((oh + h) + 1) % jcp.dw_conv_ker_h) * jcp.ow *
+                        par_conv.dst = &ws_p[(((oh + h) + 1) % jcp_dw.kh) * jcp.ow *
                                              jcp.oc_block];
 
                         const int wh = div_up(i_t_overflow, (jcp.dilate_h + 1));
-                        par_conv.filt = &weights[conf_.with_groups()
+                        par_conv.filt = &weights[pd()->with_groups()
                                                  ? weights_d.blk_off(g, ocb,
                                                                      jcp.ic == 3 ? 0 : icb, wh, 0)
                                                  : weights_d.blk_off(ocb,
@@ -241,9 +245,11 @@ void _jit_sse42_convolution_fwd_t<with_relu>::execute_forward_fusing() {
                                        dst_idx/jcp_dw.stride_h*jcp_dw.ow*jcp_dw.ch_block];
 
                 par_conv_dw.kh_padding = jcp_dw.kh;
-                par_conv_dw.filt = &jcp.dw_conv_weights[chb * jcp_dw.kh * jcp_dw.kw * jcp_dw.ch_block];
+                par_conv_dw.filt = &jcp_dw.conv_weights[chb * jcp_dw.kh * jcp_dw.kw * jcp_dw.ch_block];
                 par_conv_dw.bias = &dw_bias[chb * jcp_dw.ch_block];
                 par_conv_dw.ur_w = (size_t)(jcp_dw.ow);
+                par_conv_dw.oc_work = nstl::min((chb + 1) * jcp_dw.ch_block, (int)jcp_dw.oc) - chb*jcp_dw.ch_block;
+                par_conv_dw.oc_off = chb * jcp_dw.ch_block * sizeof(float);
 
                 kernel_dw_->jit_ker(&par_conv_dw);
             }
@@ -252,7 +258,9 @@ void _jit_sse42_convolution_fwd_t<with_relu>::execute_forward_fusing() {
         size_t start{0}, end{0};
         balance211(work_amount, nthr, ithr, start, end);
 
-        auto pbuf = dw_conv_buffer_ + ithr * dw_conv_buffer_size_;
+        auto dw_conv_buffer = scratchpad().get<data_t>(key_dw_conv_buffer);
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block * jcp.nb_oc_blocking;
+        auto pbuf = dw_conv_buffer + ithr * dw_conv_buffer_size_;
 
         size_t n{0}, g{0}, ocbb{0}, oh{0};
         nd_iterator_init(start, n, MB, g, jcp.ngroups, ocbb, ocb_work,
@@ -281,23 +289,25 @@ void _jit_sse42_convolution_fwd_t<with_relu>::execute_forward_fusing() {
         }
     };
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
-
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            dw_padded_bias_[oc] = dw_bias[oc];
-        dw_bias = dw_padded_bias_;
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = scratchpad().get<data_t>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, bias, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bias = padded_bias;
+
+        auto dw_padded_bias = scratchpad().get<data_t>(key_dw_conv_padded_bias);
+        utils::array_copy(dw_padded_bias, dw_bias, jcp.oc_without_padding);
+        utils::array_set(dw_padded_bias + jcp.oc_without_padding, 0.f,
+                         jcp.oc - jcp.oc_without_padding);
+        dw_bias = dw_padded_bias;
     }
 
     parallel(0, ker);
-}
 
-template void _jit_sse42_convolution_fwd_t<true>::execute_forward();
-template void _jit_sse42_convolution_fwd_t<false>::execute_forward();
-template void _jit_sse42_convolution_fwd_t<true>::execute_forward_fusing();
-template void _jit_sse42_convolution_fwd_t<false>::execute_forward_fusing();
+    if (pd()->wants_zero_pad_dst())
+        output_memory_primitive(0)->zero_pad();
+}
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_convolution.hpp
index 192349588..5eb720c34 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_convolution.hpp
@@ -28,62 +28,56 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu>
-struct _jit_sse42_convolution_fwd_t: public cpu_primitive_t {
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
+struct jit_sse42_convolution_fwd_t: public cpu_primitive_t {
+    struct pd_t: public cpu_convolution_fwd_pd_t {
         pd_t(engine_t *engine,
-                const typename pd_t::base_desc_t *adesc,
+                const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
-            , jcp_(), jcp_dw() {}
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+            , jcp_(), jcp_dw_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit:", sse42, ""),
-                _jit_sse42_convolution_fwd_t<with_relu>);
+                jit_sse42_convolution_fwd_t);
 
         virtual status_t init() override {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                         forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
-                        this->cdesc_().src_desc.data_type,
-                        this->cdesc_().weights_desc.data_type,
-                        this->cdesc_().dst_desc.data_type)
+                        this->desc()->src_desc.data_type,
+                        this->desc()->weights_desc.data_type,
+                        this->desc()->dst_desc.data_type)
                 && IMPLICATION(this->with_bias(),
-                        data_type::f32 == this->cdesc_().bias_desc.data_type);
+                        data_type::f32 == this->desc()->bias_desc.data_type);
             if (!ok) return status::unimplemented;
 
-            status_t sts = jit_sse42_conv_fwd_kernel_f32::init_conf(jcp_, this->cdesc_(),
+            status_t status = jit_sse42_conv_fwd_kernel_f32::init_conf(jcp_, *this->desc(),
                     *this->src_pd_.desc(), *this->weights_pd_.desc(),
-                    *this->dst_pd_.desc(), *this->attr(), with_relu,
-                    this->negative_slope());
-            if (sts != status::success) return sts;
+                    *this->dst_pd_.desc(), *this->attr());
+            if (status != status::success) return status;
 
             if (jcp_.with_dw_conv) {
-                int dw_conv_oh = (jcp_.oh - ((jcp_.dw_conv_ker_h - 1) + 1) + 2) / jcp_.dw_conv_str_h + 1;
-                int dw_conv_ow = (jcp_.ow - ((jcp_.dw_conv_ker_w - 1) + 1) + 2) / jcp_.dw_conv_str_w + 1;
-
-                status_t sts_dw = jit_uni_dw_conv_row_f32<sse42>::init_conf(jcp_dw,
-                                                                           jcp_.oc, jcp_.oh, jcp_.ow, dw_conv_oh, dw_conv_ow,
-                                                                           jcp_.dw_conv_ker_h, jcp_.dw_conv_ker_w,
-                                                                           jcp_.dw_conv_str_h, jcp_.dw_conv_str_w,
-                                                                           jcp_.dw_conv_eltwise_alg, jcp_.dw_conv_eltwise_alpha,
-                                                                           jcp_.dw_conv_eltwise_beta, jcp_.dw_conv_with_sum);
+                status_t sts_dw = jit_uni_dw_conv_row_f32<sse42>::init_conf(jcp_, jcp_dw_, *this->attr());
                 if (sts_dw != status::success) return sts_dw;
             }
 
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_sse42_conv_fwd_kernel_f32::init_scratchpad(scratchpad, jcp_, jcp_dw_);
+
             return status::success;
         }
 
         jit_conv_conf_t jcp_;
-        jit_conv_conf_t jcp_dw;
+        jit_conv_conf_t jcp_dw_;
 
     protected:
         virtual status_t set_default_params() override {
@@ -105,57 +99,36 @@ struct _jit_sse42_convolution_fwd_t: public cpu_primitive_t {
                         OIhw8i8o, Ohwi8o)));
             if (this->bias_pd_.desc()->format == any)
                 CHECK(this->bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
 
-    _jit_sse42_convolution_fwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_sse42_convolution_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd),
-          dw_conv_buffer_size_(0), dw_conv_buffer_(nullptr), padded_bias_(nullptr), dw_padded_bias_(nullptr)
+        : cpu_primitive_t(apd, inputs, outputs)
     {
-        kernel_ = new jit_sse42_conv_fwd_kernel_f32(conf_.jcp_, *conf_.attr());
-        if (conf_.jcp_.with_dw_conv) {
-            kernel_dw_ = new jit_uni_dw_conv_row_f32<sse42>(conf_.jcp_dw);
-        }
-
-        if (conf_.jcp_.with_dw_conv) {
-            const int nthreads = mkldnn_get_max_threads();
-            dw_conv_buffer_size_ = (size_t)conf_.jcp_dw.kh * conf_.jcp_dw.iw * conf_.jcp_dw.ch_block *
-                                   conf_.jcp_.nb_oc_blocking;
-            dw_conv_buffer_ = (float *)malloc(nthreads * dw_conv_buffer_size_ * sizeof(float), 64);
-        }
-
-        if (conf_.want_padded_bias()) {
-            const auto &j = conf_.jcp_;
-            assert(j.ngroups == 1);
-            padded_bias_ = (data_t *)malloc(sizeof(data_t) * j.oc, 64);
-            for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-                padded_bias_[oc] = 0;
+        kernel_ = new jit_sse42_conv_fwd_kernel_f32(pd()->jcp_, pd()->jcp_dw_, *pd()->attr());
 
-            dw_padded_bias_ = (data_t *)malloc(sizeof(data_t) * j.oc, 64);
-            for (int oc = j.oc_without_padding; oc < j.oc; ++oc)
-                dw_padded_bias_[oc] = 0;
+        if (pd()->jcp_.with_dw_conv) {
+            kernel_dw_ = new jit_uni_dw_conv_row_f32<sse42>(pd()->jcp_dw_, *pd()->attr(), pd()->jcp_dw_.ch_block);
         }
     }
 
-    ~_jit_sse42_convolution_fwd_t() {
+    ~jit_sse42_convolution_fwd_t() {
         delete kernel_;
 
-        if (conf_.jcp_.with_dw_conv) {
+        if (pd()->jcp_.with_dw_conv) {
             delete kernel_dw_;
-            free(dw_conv_buffer_);
-            free(dw_padded_bias_);
         }
-
-        free(padded_bias_);
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        if (conf_.jcp_.with_dw_conv)
-            execute_forward_fusing();
+    virtual void execute(event_t *e) const {
+        if (pd()->jcp_.with_dw_conv)
+            execute_forward_with_dw_conv();
         else
             execute_forward();
 
@@ -163,24 +136,14 @@ struct _jit_sse42_convolution_fwd_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_forward();
-    void execute_forward_fusing();
+    void execute_forward() const;
+    void execute_forward_with_dw_conv() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    pd_t conf_;
     jit_sse42_conv_fwd_kernel_f32 *kernel_;
     jit_uni_dw_conv_row_f32<sse42> *kernel_dw_;
-
-    /* fuse with dw conv */
-    size_t dw_conv_buffer_size_;
-    data_t *dw_conv_buffer_;
-
-    data_t *padded_bias_;
-    data_t *dw_padded_bias_;
 };
 
-using jit_sse42_convolution_fwd_t = _jit_sse42_convolution_fwd_t<false>;
-using jit_sse42_convolution_relu_t = _jit_sse42_convolution_fwd_t<true>;
-
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_i8i8_pooling.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_i8i8_pooling.cpp
new file mode 100644
index 000000000..cefecbd9f
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_i8i8_pooling.cpp
@@ -0,0 +1,586 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <math.h>
+
+#include "mkldnn_types.h"
+
+#include "mkldnn_thread.hpp"
+#include "utils.hpp"
+
+#include "jit_generator.hpp"
+
+#include "jit_sse42_i8i8_pooling.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace Xbyak;
+
+using namespace mkldnn::impl::utils;
+using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::utils;
+using namespace mkldnn::impl::types;
+using namespace alg_kind;
+
+struct call_params_t {
+    const char *src_i8;
+    const char *dst_i8;
+    size_t kw_range;
+    size_t kh_range;
+    float idivider;
+};
+
+struct jit_sse42_i8i8_pool_fwd_ker_t : public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_sse42_i8i8_pool_fwd_ker_t)
+
+    Reg64 reg_ptr_src_i8 = r8;
+    Reg64 reg_ptr_dst_i8 = r9;
+
+    Reg64 ki = r10;
+    Reg64 kj = r11;
+    Reg64 reg_kw = r12;
+    Reg64 reg_kh = r13;
+    Reg64 c_iter = r14;
+
+    Reg64 aux_reg_src_h = rax;
+    Reg64 aux_reg_src_w = rbx;
+
+    Reg64 reg_tmp = rdx;
+    Reg64 reg_src_64 = r15;
+    Reg32 reg_src_32 = r15d;
+    Reg8 reg_src_8 = r15b;
+
+    size_t sizeof_src_dt() const { return data_type_size(jpp.src_dt); }
+    size_t sizeof_dst_dt() const { return data_type_size(jpp.dst_dt); }
+
+    Xmm xmm_tmp = Xmm(0);
+    Xmm vreg_tmp = Xmm(14);
+    Xmm vreg_zeros = Xmm(15);
+
+    /* max pooling */
+    Xmm vmm_src(int jj, int ii) {
+        return Xmm(2*jj + ii);
+    }
+
+    Xmm xmm_src(int jj) {
+        return Xmm(2*jj);
+    }
+
+    Xmm vmm_dst(int jj, int ii) {
+        return Xmm(2*jj + ii + 2 * jpp.ur_c);
+    }
+
+    Xmm xmm_dst(int jj) {
+        return Xmm(2*jj + 2 * jpp.ur_c);
+    }
+
+    /* avg pooling */
+    Xmm vmm_src_s32(int jj, int ii) {
+        return Xmm(2*jj + ii);
+    }
+
+    Xmm xmm_src_s32(int jj, int ii) {
+        return Xmm(2*jj + ii);
+    }
+
+    Xmm vmm_dst_s32(int jj, int ii) {
+        return Xmm(2*jj + ii + 2 * jpp.ur_c);
+    }
+
+    Ymm ymm_dst_s32(int jj, int ii) {
+        return Ymm(2*jj + ii + 2 * jpp.ur_c);
+    }
+
+    Xmm xmm_dst_s32(int jj, int ii) {
+        return Xmm(2*jj + ii + 2 * jpp.ur_c);
+    }
+
+    Xmm vmm_dst_f32(int jj, int ii) {
+        return Xmm(2*jj + ii + 4 * jpp.ur_c);
+    }
+
+    void (*ker_)(const call_params_t *);
+    jit_pool_conf_t jpp;
+
+    void init_tmp_reg();
+
+    void load_src(int jj, int c_step);
+    void store_dst(int jj, int c_step);
+
+    void compute_avg_step(int ur_c, int c_step);
+    void compute_max_step(int ur_c, int c_step);
+    void compute_step(int ur_c, int c_step);
+
+    void compute_c_block();
+    void generate();
+
+    static status_t init_conf(jit_pool_conf_t &jpp,
+        const pooling_desc_t &pd, const memory_desc_wrapper &src_d,
+        const memory_desc_wrapper &dst_d);
+
+    jit_sse42_i8i8_pool_fwd_ker_t(const jit_pool_conf_t &jpp_)
+           : jpp(jpp_) {
+        generate();
+        ker_ = reinterpret_cast<decltype(ker_)>(const_cast<uint8_t*>(
+                       getCode()));
+    }
+};
+
+void jit_sse42_i8i8_pool_fwd_ker_t::load_src(int jj, int c_step) {
+    using namespace data_type;
+
+    int repeats = c_step != 1 ? 2 : 1;
+    switch (jpp.alg) {
+        case pooling_max: {
+            auto offset = jj*c_step*sizeof_src_dt();
+            if (c_step == jpp.c_block) {
+                for (int ii = 0; ii < repeats; ii++)
+                    uni_vmovups(vmm_src(jj, ii), ptr[aux_reg_src_w + offset + (jpp.c_block / 2) * ii * sizeof_src_dt()]);
+            } else if (c_step == 1) {
+                if (jpp.src_dt == s32) {
+                    movsd(xmm_src(jj), ptr[aux_reg_src_w + offset]);
+                } else {
+                    mov(reg_src_8, ptr[aux_reg_src_w + offset]);
+                    movq(xmm_src(jj), reg_src_64);
+                }
+            }
+            break;
+        }
+        case pooling_avg_include_padding:
+        case pooling_avg_exclude_padding: {
+            auto offset = jj*c_step*sizeof_src_dt();
+            switch (jpp.src_dt) {
+                case s32:
+                    if (c_step == jpp.c_block) {
+                        for (int ii = 0; ii < repeats; ii++)
+                            uni_vmovups(vmm_src_s32(jj, ii), ptr[aux_reg_src_w + offset + (jpp.c_block / 2) * ii * sizeof_src_dt()]);
+                    } else if (c_step == 1) {
+                        movsd(xmm_src_s32(jj, 0), ptr[aux_reg_src_w + offset]);
+                    }
+                    break;
+                case s8:
+                    if (c_step == jpp.c_block) {
+                        for (int ii = 0; ii < repeats; ii++) {
+                            movd(xmm_src_s32(jj, ii), ptr[aux_reg_src_w + offset + (jpp.c_block / 2) * ii * sizeof_src_dt()]);
+
+                            uni_vpmovsxbd(vmm_src_s32(jj, ii), xmm_src_s32(jj, ii));
+                        }
+                    } else if (c_step == 1) {
+                        movsx(reg_src_32, ptr[aux_reg_src_w + offset]);
+                        movq(xmm_src_s32(jj, 0), reg_src_64);
+                    }
+                    break;
+                case u8:
+                    if (c_step == jpp.c_block) {
+                        for (int ii = 0; ii < repeats; ii++) {
+                            movd(xmm_src_s32(jj, ii), ptr[aux_reg_src_w + offset + (jpp.c_block / 2) * ii * sizeof_src_dt()]);
+
+                            uni_vpmovzxbd(vmm_src_s32(jj, ii), xmm_src_s32(jj, ii));
+                        }
+                    } else if (c_step == 1) {
+                        movzx(reg_src_32, ptr[aux_reg_src_w + offset]);
+                        movq(xmm_src_s32(jj, 0), reg_src_64);
+                    }
+                    break;
+                default: assert(!"unsupported src data type");
+            }
+            break;
+        }
+        default: assert(!"unsupported algorithm");
+    }
+}
+
+void jit_sse42_i8i8_pool_fwd_ker_t::store_dst(int jj, int c_step) {
+    using namespace data_type;
+
+    int repeats = c_step != 1 ? 2 : 1;
+    switch(jpp.alg) {
+        case pooling_max: {
+            auto offset = jj*c_step*sizeof_dst_dt();
+            if (c_step == jpp.c_block) {
+                for (int ii = 0; ii < repeats; ii++)
+                    uni_vmovups(ptr[reg_ptr_dst_i8 + offset + (jpp.c_block / 2) * ii * sizeof_dst_dt()], vmm_dst(jj, ii));
+            } else if (c_step == 1) {
+                if (jpp.src_dt == s32) {
+                    movq(reg_src_64, xmm_dst(jj));
+                    mov(ptr[reg_ptr_dst_i8 + offset], reg_src_32);
+                } else {
+                    movq(reg_src_64, xmm_dst(jj));
+                    mov(ptr[reg_ptr_dst_i8 + offset], reg_src_8);
+                }
+            }
+            break;
+        }
+        case pooling_avg_include_padding:
+        case pooling_avg_exclude_padding: {
+            auto offset = jj*c_step*sizeof_dst_dt();
+            switch (jpp.dst_dt) {
+                case s32:
+                    if (c_step == jpp.c_block) {
+                        for (int ii = 0; ii < repeats; ii++)
+                            uni_vmovups(ptr[reg_ptr_dst_i8 + offset + (jpp.c_block / 2) * ii * sizeof_dst_dt()], vmm_dst_s32(jj, ii));
+                    } else if (c_step == 1) {
+                        movq(reg_src_64, xmm_dst_s32(jj, 0));
+                        mov(ptr[reg_ptr_dst_i8 + offset], reg_src_32);
+                    }
+                    break;
+                case s8:
+                    if (c_step == jpp.c_block) {
+                        for (int ii = 0; ii < repeats; ii++) {
+                            uni_vpackssdw(vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii));
+                            uni_vpacksswb(xmm_dst_s32(jj, ii), xmm_dst_s32(jj, ii), xmm_dst_s32(jj, ii));
+
+                            movd(ptr[reg_ptr_dst_i8 + offset + (jpp.c_block / 2) * ii * sizeof_dst_dt()], xmm_dst_s32(jj, ii));
+                        }
+                    } else if (c_step == 1) {
+                        vpackssdw(vmm_dst_s32(jj, 0), vmm_dst_s32(jj, 0), vmm_dst_s32(jj, 0));
+                        vpacksswb(xmm_dst_s32(jj, 0), xmm_dst_s32(jj, 0), xmm_dst_s32(jj, 0));
+                        movq(reg_src_64, xmm_dst_s32(jj, 0));
+                        mov(ptr[reg_ptr_dst_i8 + offset], reg_src_8);
+                    }
+                    break;
+                case u8:
+                    if (c_step == jpp.c_block) {
+                        for (int ii = 0; ii < repeats; ii++) {
+                            uni_vpackusdw(vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii));
+                            uni_vpackuswb(xmm_dst_s32(jj, ii), xmm_dst_s32(jj, ii), xmm_dst_s32(jj, ii));
+
+                            movd(ptr[reg_ptr_dst_i8 + offset + (jpp.c_block / 2) * ii * sizeof_dst_dt()], xmm_dst_s32(jj, ii));
+                        }
+                    } else if (c_step == 1) {
+                        vpackusdw(vmm_dst_s32(jj, 0), vmm_dst_s32(jj, 0), vmm_dst_s32(jj, 0));
+                        vpackuswb(xmm_dst_s32(jj, 0), xmm_dst_s32(jj, 0), xmm_dst_s32(jj, 0));
+                        movq(reg_src_64, xmm_dst_s32(jj, 0));
+                        mov(ptr[reg_ptr_dst_i8 + offset], reg_src_8);
+                    }
+                    break;
+                default: assert(!"unsuppotred dst data_type");
+            }
+            break;
+        }
+        default: assert(!"unsupported pooling algorithm");
+    }
+}
+
+void jit_sse42_i8i8_pool_fwd_ker_t::compute_max_step(int ur_c, int c_step)
+{
+    Label l_kw, l_kh;
+
+    int iw = jpp.iw;
+    int c = jpp.c;
+
+    int repeats = c_step != 1 ? 2 : 1;
+
+    for (int jj = 0; jj < ur_c; jj++) {
+        for (int ii = 0; ii < repeats; ii++) {
+            uni_vmovups(vmm_dst(jj, ii), vreg_tmp);
+        }
+    }
+
+    mov(aux_reg_src_h, reg_ptr_src_i8);
+
+    xor_(kj, kj);
+    L(l_kh);
+    {
+        mov(aux_reg_src_w, aux_reg_src_h);
+        xor_(ki, ki);
+        L(l_kw);
+        {
+            for (int jj = 0; jj < ur_c; jj++) {
+                load_src(jj, c_step);
+
+                for (int ii = 0; ii < repeats; ii++) {
+                    if (jpp.src_dt == data_type::s32) {
+                        uni_vpmaxsd(vmm_dst(jj, ii), vmm_dst(jj, ii), vmm_src(jj, ii));
+                    } else {
+                        if (jpp.src_dt == data_type::s8)
+                            uni_vpmaxsb(vmm_dst(jj, ii), vmm_dst(jj, ii), vmm_src(jj, ii));
+                        else
+                            uni_vpmaxub(vmm_dst(jj, ii), vmm_dst(jj, ii), vmm_src(jj, ii));
+                    }
+                }
+            }
+            add(aux_reg_src_w, c * sizeof_src_dt());
+            inc(ki);
+            cmp(ki, reg_kw);
+            jl(l_kw, T_NEAR);
+        }
+        add(aux_reg_src_h, iw * c * sizeof_src_dt());
+        inc(kj);
+        cmp(kj, reg_kh);
+        jl(l_kh, T_NEAR);
+    }
+
+    for (int jj = 0; jj < ur_c; jj++)
+        store_dst(jj, c_step);
+}
+
+void jit_sse42_i8i8_pool_fwd_ker_t::compute_avg_step(int ur_c, int c_step)
+{
+    using namespace data_type;
+
+    Label l_kw, l_kh;
+
+    int iw = jpp.iw;
+    int c = jpp.c;
+
+    int repeats = c_step != 1 ? 2 : 1;
+
+    for (int jj = 0; jj < ur_c; jj++) {
+        for (int ii = 0; ii < repeats; ii++) {
+            uni_vpxor(vmm_src_s32(jj, ii), vmm_src_s32(jj, ii), vmm_src_s32(jj, ii));
+            uni_vpxor(vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii));
+        }
+    }
+
+    mov(aux_reg_src_h, reg_ptr_src_i8);
+
+    xor_(kj, kj);
+    L(l_kh);
+    {
+        mov(aux_reg_src_w, aux_reg_src_h);
+        xor_(ki, ki);
+        L(l_kw);
+        {
+            for (int jj = 0; jj < ur_c; jj++) {
+                load_src(jj, c_step);
+
+                for (int ii = 0; ii < repeats; ii++) {
+                    uni_vpaddd(vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii), vmm_src_s32(jj, ii));
+                }
+            }
+            add(aux_reg_src_w, c * sizeof_src_dt());
+            inc(ki);
+            cmp(ki, reg_kw);
+            jl(l_kw, T_NEAR);
+        }
+        add(aux_reg_src_h, iw * c * sizeof_src_dt());
+        inc(kj);
+        cmp(kj, reg_kh);
+        jl(l_kh, T_NEAR);
+    }
+
+    for (int jj = 0; jj < ur_c; jj++) {
+        for (int ii = 0; ii < repeats; ii++) {
+            uni_vcvtdq2ps(vmm_dst_f32(jj, ii), vmm_dst_s32(jj, ii));
+
+            mulps(vmm_dst_f32(jj, ii), vreg_tmp);
+
+            uni_vcvtps2dq(vmm_dst_s32(jj, ii), vmm_dst_f32(jj, ii));
+        }
+
+        store_dst(jj, c_step);
+    }
+}
+
+void jit_sse42_i8i8_pool_fwd_ker_t::compute_step(int ur_c, int c_step) {
+    switch (jpp.alg) {
+        case pooling_max:
+            compute_max_step(ur_c, c_step); break;
+        case pooling_avg_include_padding:
+        case pooling_avg_exclude_padding:
+            compute_avg_step(ur_c, c_step); break;
+        default: assert(!"unsupported pooling algorithm");
+    }
+}
+
+void jit_sse42_i8i8_pool_fwd_ker_t::compute_c_block() {
+    Label l_main_loop;
+    Label l_tail_loop;
+    Label exit;
+
+    int ur_c = jpp.ur_c;
+
+    xor_(c_iter, c_iter);
+
+    L(l_main_loop);
+    {
+        cmp(c_iter, jpp.c - ur_c * jpp.c_block);
+        jg(l_tail_loop, T_NEAR);
+
+        compute_step(ur_c, jpp.c_block);
+
+        add(reg_ptr_src_i8, ur_c * jpp.c_block * sizeof_src_dt());
+        add(reg_ptr_dst_i8, ur_c * jpp.c_block * sizeof_dst_dt());
+        add(c_iter, ur_c * jpp.c_block);
+        jmp(l_main_loop);
+    }
+
+    L(l_tail_loop);
+    {
+        cmp(c_iter, jpp.c - ur_c);
+        jg(exit, T_NEAR);
+
+        compute_step(ur_c, 1);
+
+        add(reg_ptr_src_i8, ur_c * sizeof_src_dt());
+        add(reg_ptr_dst_i8, ur_c * sizeof_dst_dt());
+        add(c_iter, ur_c);
+        jmp(l_tail_loop);
+    }
+
+    L(exit);
+}
+
+void jit_sse42_i8i8_pool_fwd_ker_t::init_tmp_reg() {
+    using namespace data_type;
+
+    switch (jpp.alg) {
+        case pooling_avg_include_padding:
+        case pooling_avg_exclude_padding:
+            mov(reg_tmp, ptr[abi_param1 + offsetof(call_params_t, idivider)]);
+            movq(xmm_tmp, reg_tmp);
+            uni_vpbroadcastd(vreg_tmp, xmm_tmp);
+            break;
+        case pooling_max:
+            switch (jpp.src_dt) {
+                case s32:
+                    mov(reg_tmp, nstl::numeric_limits<int32_t>::lowest());
+                    break;
+                case s8:
+                    mov(reg_tmp, nstl::numeric_limits<int8_t>::lowest());
+                    break;
+                case u8:
+                    mov(reg_tmp, nstl::numeric_limits<uint8_t>::lowest());
+                    break;
+                default: assert(!"unsupported src data_type");
+            }
+
+            movq(xmm_tmp, reg_tmp);
+            if (jpp.src_dt == s32) {
+                uni_vpbroadcastd(vreg_tmp, xmm_tmp);
+            } else {
+                movups(vreg_tmp, xmm_tmp);
+                uni_vpxor(xmm_tmp, xmm_tmp, xmm_tmp);
+                pshufb(vreg_tmp, xmm_tmp);
+            }
+            break;
+        default: assert(!"unsupported pooling algorithm");
+    }
+
+}
+
+void jit_sse42_i8i8_pool_fwd_ker_t::generate() {
+    preamble();
+
+#   define READ_PARAM(reg, field) \
+        mov(reg, ptr[abi_param1 + offsetof(call_params_t, field)])
+    READ_PARAM(reg_ptr_src_i8, src_i8);
+    READ_PARAM(reg_ptr_dst_i8, dst_i8);
+    READ_PARAM(reg_kw, kw_range);
+    READ_PARAM(reg_kh, kh_range);
+
+#   undef READ_PARAM
+
+    init_tmp_reg();
+
+    uni_vpxor(vreg_zeros, vreg_zeros, vreg_zeros);
+
+    compute_c_block();
+
+    postamble();
+}
+
+status_t jit_sse42_i8i8_pool_fwd_ker_t::init_conf(jit_pool_conf_t &jpp,
+        const pooling_desc_t &pd, const memory_desc_wrapper &src_d,
+        const memory_desc_wrapper &dst_d) {
+    if (!mayiuse(sse42)) {
+        return status::unimplemented;
+    }
+
+    jpp.mb = src_d.dims()[0];
+    jpp.c = src_d.dims()[1];
+    jpp.ih = src_d.dims()[2];
+    jpp.iw = src_d.dims()[3];
+    jpp.oh = dst_d.dims()[2];
+    jpp.ow = dst_d.dims()[3];
+
+    jpp.stride_h = pd.strides[0];
+    jpp.stride_w = pd.strides[1];
+    jpp.kh = pd.kernel[0];
+    jpp.kw = pd.kernel[1];
+
+    jpp.t_pad = pd.padding[0][0];
+    jpp.l_pad = pd.padding[0][1];
+
+    jpp.alg = pd.alg_kind;
+
+    jpp.src_dt = pd.src_desc.data_type;
+    jpp.dst_dt = pd.dst_desc.data_type;
+
+    jpp.c_block = jpp.alg == pooling_max ? 32 / (jpp.src_dt == data_type::s32 ? 4 : 1) : 8;
+    jpp.c_tail = jpp.c % jpp.c_block;
+    jpp.nb_c = jpp.c / jpp.c_block;
+    jpp.ur_c = 1;
+    jpp.ur_c_tail = jpp.nb_c - (jpp.nb_c / jpp.ur_c)*jpp.ur_c + (jpp.c_tail != 0);
+
+    return status::success;
+}
+
+status_t jit_sse42_i8i8_pooling_fwd_t::pd_t::jit_conf() {
+    return jit_sse42_i8i8_pool_fwd_ker_t::init_conf(jpp_,
+       desc_, src_pd_.desc(), dst_pd_.desc());
+}
+
+jit_sse42_i8i8_pooling_fwd_t::jit_sse42_i8i8_pooling_fwd_t(const pd_t *apd,
+          const input_vector &inputs, const output_vector &outputs)
+    : cpu_primitive_t(apd, inputs, outputs), ker_(nullptr)
+{ ker_ = new jit_sse42_i8i8_pool_fwd_ker_t(pd()->jpp_); }
+
+jit_sse42_i8i8_pooling_fwd_t::~jit_sse42_i8i8_pooling_fwd_t() {
+    delete ker_;
+}
+
+void jit_sse42_i8i8_pooling_fwd_t::execute_forward() const {
+    auto src_i8 = reinterpret_cast<const char *>(input_memory(0));
+    auto dst_i8 = reinterpret_cast<char *>(memory());
+
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+
+    const auto &jpp = pd()->jpp_;
+
+    parallel_nd(jpp.mb, jpp.oh, jpp.ow,
+        [&](int n, int oh, int ow) {
+        const int ih = nstl::max(oh * jpp.stride_h - jpp.t_pad, 0);
+        const int iw = nstl::max(ow * jpp.stride_w - jpp.l_pad, 0);
+
+        const int kh_start = nstl::max(0, jpp.t_pad - oh * jpp.stride_h);
+        const int kh_end = nstl::min(jpp.kh,
+                                     jpp.ih + jpp.t_pad - oh * jpp.stride_h);
+        const int kw_start = nstl::max(0, jpp.l_pad - ow * jpp.stride_w);
+        const int kw_end = nstl::min(jpp.kw,
+                                     jpp.iw + jpp.l_pad - ow * jpp.stride_w);
+
+        auto p = call_params_t();
+        p.src_i8 = &src_i8[
+                src_d.blk_off(n, 0, ih, iw) * src_d.data_type_size()];
+        p.dst_i8 = &dst_i8[
+                dst_d.blk_off(n, 0, oh, ow) * dst_d.data_type_size()];
+        p.kw_range = (size_t) (kw_end - kw_start);
+        p.kh_range = (size_t) (kh_end - kh_start);
+        p.idivider = 1.0f / ((jpp.alg == pooling_avg_exclude_padding) ?
+                             p.kh_range * p.kw_range : jpp.kw * jpp.kh);
+
+        ker_->ker_(&p);
+    });
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_i8i8_pooling.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_i8i8_pooling.hpp
index a63984eff..bd4192bf8 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_avx512_core_i8i8_pooling.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_sse42_i8i8_pooling.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2017-2018 Intel Corporation
+* Copyright 2018 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -14,22 +14,22 @@
 * limitations under the License.
 *******************************************************************************/
 
-#ifndef CPU_JIT_AVX512_CORE_I8I8_POOLING_HPP
-#define CPU_JIT_AVX512_CORE_I8I8_POOLING_HPP
+#ifndef CPU_JIT_uni_I8I8_POOLING_HPP
+#define CPU_JIT_uni_I8I8_POOLING_HPP
 
 #include "c_types_map.hpp"
 #include "cpu_pooling_pd.hpp"
 #include "cpu_engine.hpp"
-
+#include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-struct jit_avx512_core_i8i8_pool_fwd_ker_t;
+struct jit_sse42_i8i8_pool_fwd_ker_t;
 
-struct jit_avx512_core_i8i8_pooling_fwd_t : public cpu_primitive_t {
+struct jit_sse42_i8i8_pooling_fwd_t : public cpu_primitive_t {
     struct pd_t : public cpu_pooling_fwd_pd_t {
         pd_t(engine_t *engine, const pooling_desc_t  *adesc,
                 const primitive_attr_t *attr,
@@ -37,8 +37,8 @@ struct jit_avx512_core_i8i8_pooling_fwd_t : public cpu_primitive_t {
         : cpu_pooling_fwd_pd_t(engine, adesc, attr, hint_fwd_pd) {}
 
         DECLARE_COMMON_PD_T(
-                JIT_IMPL_NAME_HELPER("jit:", avx512_core, ""),
-                jit_avx512_core_i8i8_pooling_fwd_t);
+                JIT_IMPL_NAME_HELPER("jit:", sse42, ""),
+                jit_sse42_i8i8_pooling_fwd_t);
 
         virtual status_t init() override {
             assert(this->engine()->kind() == engine_kind::cpu);
@@ -73,20 +73,20 @@ struct jit_avx512_core_i8i8_pooling_fwd_t : public cpu_primitive_t {
         }
     };
 
-    jit_avx512_core_i8i8_pooling_fwd_t(const pd_t *pd,
+    jit_sse42_i8i8_pooling_fwd_t(const pd_t *pd,
             const input_vector &inputs, const output_vector &outputs);
-    ~jit_avx512_core_i8i8_pooling_fwd_t();
+    ~jit_sse42_i8i8_pooling_fwd_t();
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    jit_avx512_core_i8i8_pool_fwd_ker_t *ker_;
+    jit_sse42_i8i8_pool_fwd_ker_t *ker_;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_1x1_conv_utils.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_1x1_conv_utils.hpp
index d360a140b..a3ed769a8 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_1x1_conv_utils.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_1x1_conv_utils.hpp
@@ -17,9 +17,11 @@
 #ifndef JIT_UNI_1x1_CONV_UTILS_HPP
 #define JIT_UNI_1x1_CONV_UTILS_HPP
 
+#include "memory_tracking.hpp"
 #include "mkldnn_thread.hpp"
-#include "utils.hpp"
 #include "nstl.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
 
 #include "jit_generator.hpp"
 
@@ -29,6 +31,12 @@ namespace cpu {
 
 using namespace mkldnn::impl::utils;
 
+struct reduce_to_unit_stride_t {
+    convolution_desc_t conv_d_;
+    bool reduce_src_;
+    size_t space_per_thread_;
+};
+
 /* 1x1-kernel does not support non-unit strides so far, so the idea is:
  *  - for fwd or bwd_weights: to copy src to a scratch memory (with strides
  *    equal to 1) and then call the kernel
@@ -38,7 +46,7 @@ using namespace mkldnn::impl::utils;
 template <typename conv_pd_t>
 inline void rtus_prepare(conv_pd_t *self, const convolution_desc_t *&conv_d,
         const memory_desc_t *&src_d, const memory_desc_t *dst_d) {
-    const bool is_bwd_data = self->cdesc()->prop_kind
+    const bool is_bwd_data = self->desc()->prop_kind
         == prop_kind::backward_data;
 
     const int ndims = src_d->ndims;
@@ -83,6 +91,22 @@ inline void rtus_prepare(conv_pd_t *self, const convolution_desc_t *&conv_d,
     }
 }
 
+template <typename conv_pd_t>
+inline void rtus_prepare_space_info(conv_pd_t *self,
+        memory_tracking::registrar_t &scratchpad) {
+    const auto &jcp = self->jcp_;
+
+    const int max_threads = mkldnn_get_max_threads();
+    const size_t factor = utils::pick_by_prop_kind(self->desc()->prop_kind,
+            jcp.nb_reduce, jcp.nb_load_blocking_max, jcp.nb_bcast_blocking);
+    size_t typesize = types::data_type_size(
+            conv_prop_agnostic_src_d(self->desc())->data_type);
+
+    self->rtus_.space_per_thread_ = factor * jcp.is * jcp.ic_block;
+    scratchpad.book(memory_tracking::names::key_conv_rtus_space,
+            typesize * max_threads * self->rtus_.space_per_thread_);
+}
+
 template <cpu_isa_t isa>
 struct rtus_driver_t: public jit_generator {
 
@@ -246,62 +270,44 @@ struct rtus_driver_t: public jit_generator {
 
 template <cpu_isa_t isa, typename conv_t>
 inline void init_rtus_driver(conv_t *self) {
-    const auto &conf = self->conf_;
-    const auto &cd = *conf.cdesc();
-    const bool is_bwd_data = cd.prop_kind == prop_kind::backward_data;
-    const int ndims = conf.ndims();
-
+    const auto &conf = *self->pd();
     if (!conf.rtus_.reduce_src_) return;
 
-    const int max_threads = mkldnn_get_max_threads();
-    size_t factor = 0;
-    switch (cd.prop_kind) {
-    case prop_kind::forward_training: case prop_kind::forward_inference:
-        factor = conf.jcp_.nb_reduce; break;
-    case prop_kind::backward_data:
-        factor = conf.jcp_.nb_load_blocking_max; break;
-    case prop_kind::backward_weights:
-        factor = conf.jcp_.nb_bcast_blocking; break;
-    default: assert(!"unsupported prop_kind");
-    }
-
-    size_t typesize = sizeof(decltype(*self->scratch_));
-
-    self->ws_per_thread_ = factor * conf.jcp_.is * conf.jcp_.ic_block;
-    self->scratch_ = (decltype(self->scratch_))malloc(
-            max_threads * self->ws_per_thread_ * typesize, 64);
-
+    const auto &cd = *conf.desc();
+    const int ndims = conf.ndims();
     const int stride_h = (conf.ndims() == 3) ? 1 : cd.strides[0];
     const int stride_w = cd.strides[ndims - 3];
 
+    const bool is_bwd_data = cd.prop_kind == prop_kind::backward_data;
     const auto &src_d = is_bwd_data ? *conf.diff_src_pd()->desc()
                                     : *conf.src_pd()->desc();
     assert((isa == avx2 && utils::one_of(src_d.format, memory_format::nCw8c,
         memory_format::nChw8c)) || (isa == avx512_common && utils::one_of(
             src_d.format, memory_format::nCw16c, memory_format::nChw16c)));
 
-    const int ih = (ndims == 3) ? 1 : src_d.dims[2];
+    const int ih = ndims == 3 ? 1 : src_d.dims[2];
     const int iw = src_d.dims[ndims - 1];
 
     const int src_step_h = stride_h * iw;
     const int src_step_icb = ih * iw;
     const int ws_step_icb = conf.jcp_.is;
     const bool src_to_ws = !is_bwd_data;
+    const size_t typesize = types::data_type_size(
+            conv_prop_agnostic_src_d(self->pd()->desc())->data_type);
+
     self->rtus_driver_ = new rtus_driver_t<isa>(iw, stride_w, src_step_h,
             src_step_icb, ws_step_icb, src_to_ws, typesize);
 }
 
-inline float loss_ratio(int amount, int divider)
-{
-    return float(rnd_up(amount, divider) - amount) / rnd_up(amount, divider);
-}
-
 inline int best_divider(int value, int min_divider, int max_divider,
-                        bool find_max, int step = 1)
+        bool find_max, int step = 1)
 {
     max_divider = nstl::max(1, nstl::min(max_divider, value));
     min_divider = nstl::max(1, nstl::min(min_divider, max_divider));
 
+    auto loss_ratio = [](int total, int chunk)
+    { return float(rnd_up(total, chunk) - total) / rnd_up(total, chunk); };
+
     float min_loss = FLT_MAX;
     int x_divider = max_divider;
     for (int divider = max_divider; divider >= min_divider; divider -= step) {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_batch_normalization.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_batch_normalization.cpp
index 3a667ac83..38e4f4881 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_batch_normalization.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_batch_normalization.cpp
@@ -17,17 +17,18 @@
 #include <assert.h>
 
 #include "c_types_map.hpp"
+#include "math_utils.hpp"
+#include "memory_tracking.hpp"
+#include "mkldnn_thread.hpp"
 #include "nstl.hpp"
 #include "type_helpers.hpp"
-#include "mkldnn_thread.hpp"
-#include "math_utils.hpp"
 #include "utils.hpp"
 
-#include "jit_generator.hpp"
 #include "cpu_barrier.hpp"
+#include "cpu_batch_normalization_utils.hpp"
+#include "jit_generator.hpp"
 
 #include "jit_uni_batch_normalization.hpp"
-#include "cpu_batch_normalization_utils.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -35,6 +36,8 @@ namespace cpu {
 
 namespace {
 
+using namespace memory_tracking::names;
+
 using namespace Xbyak;
 namespace barrier = simple_barrier;
 
@@ -71,7 +74,7 @@ struct jit_bnorm_t: public jit_generator {
     const int vlen = isa == sse42 ? 32 : cpu_isa_traits<isa>::vlen;
 
     const batch_normalization_pd_t *bdesc_;
-    int is_spatial_thr_;
+    bool is_spatial_thr_;
 
     void (*ker)(const call_params_t *);
     void operator()(const call_params_t *p) { (*ker)(p); }
@@ -846,7 +849,7 @@ struct jit_bnorm_t: public jit_generator {
                                 else
                                     assert(false);
                             }
-                            if (!bdesc_->omit_stats()) {
+                            if (!bdesc_->use_global_stats()) {
                                 uni_vsubps(v, v, vdiff_beta);
                                 uni_vmovups(t, vmmword[reg_src + reg_soff
                                         + offt]);
@@ -1006,11 +1009,15 @@ struct jit_bnorm_t: public jit_generator {
         }
     }
 
-    jit_bnorm_t(const batch_normalization_pd_t *bdesc, int is_spatial_thr):
-        bdesc_(bdesc), is_spatial_thr_(is_spatial_thr) {
+    jit_bnorm_t(const batch_normalization_pd_t *bdesc): bdesc_(bdesc) {
         static_assert(isa == sse42 || isa == avx2 || isa == avx512_common
                 || isa == avx512_mic, "unsupported isa");
 
+        const int simd_w = isa == sse42 ? 8 :
+            cpu_isa_traits<isa>::vlen / sizeof(data_t);
+        is_spatial_thr_ =
+            bnorm_utils::is_spatial_thr(bdesc_, simd_w, sizeof(data_t));
+
         unroll_blocks = isa == avx512_common && !is_spatial_thr_ ? 4 : 1;
         unroll_regs = isa == avx512_common && !is_spatial_thr_ ? 4 : 1;
 
@@ -1044,52 +1051,51 @@ struct jit_bnorm_t: public jit_generator {
 
 template <cpu_isa_t isa>
 struct uni_bnorm_driver_t: public c_compatible {
-    uni_bnorm_driver_t(const batch_normalization_pd_t *bdesc,
-        int is_spatial_thr) : bdesc_(bdesc), ker_(bdesc_,is_spatial_thr),
-        buf_(nullptr), barriers_(nullptr)
+    uni_bnorm_driver_t(const batch_normalization_pd_t *bdesc)
+        : bdesc_(bdesc), ker_(bdesc_)
     {
-        use_tmp_stats_ = !bdesc_->stats_is_src()
-            && bdesc_->desc()->prop_kind == prop_kind::forward_inference;
-        use_tmp_diff_scale_shift_ = false
-            || (bdesc_->is_bwd() && !bdesc_->use_scaleshift())
-            || bdesc_->desc()->prop_kind == prop_kind::backward_data;
-        int num_sbufs = 2 * use_tmp_stats_;
-        int num_pbufs = 2 * use_tmp_diff_scale_shift_;
-        int num_rbufs = bdesc_->is_fwd() ? 1 : 2;
+        const int nthrs = mkldnn_get_max_threads();
+        const int C_PADDED = get_c_padded(bdesc_);
+
+        size_t data_size = sizeof(data_t) * bdesc_->MB() * C_PADDED
+            * bdesc_->D() * bdesc_->H() * bdesc_->W();
+        l3_size_ = get_cache_size(3, true) * nthrs / 2;
+        do_blocking_ = (data_size >= l3_size_ / 2 && l3_size_ > 0);
+    }
+
+    ~uni_bnorm_driver_t() {}
+
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const batch_normalization_pd_t *bdesc) {
         int nthrs = mkldnn_get_max_threads();
-        int C_PADDED = memory_desc_wrapper(bdesc_->src_pd()).blocking_desc()
-            .padding_dims[1];
+        int C_PADDED = get_c_padded(bdesc);
 
-        int buf_size = (num_sbufs + num_pbufs + num_rbufs * nthrs) * C_PADDED;
-        buf_ = (data_t *)malloc(buf_size * sizeof(data_t), 64);
+        int sbuf_sz = use_tmp_stats(bdesc) * 2 * C_PADDED;
+        int pbuf_sz = use_tmp_diff_scale_shift(bdesc) * 2 * C_PADDED;
+        int rbuf_sz = (bdesc->is_fwd() ? 1 : 2) * C_PADDED * nthrs;
 
-        sbuf_ = buf_;
-        pbuf_ = sbuf_ + num_sbufs * C_PADDED;
-        rbuf_ = pbuf_ + num_pbufs * C_PADDED;
+        scratchpad.book(key_bnorm_tmp_stats, sizeof(data_t) * sbuf_sz);
+        scratchpad.book(key_bnorm_tmp_diff_ss, sizeof(data_t) * pbuf_sz);
+        scratchpad.book(key_bnorm_reduction, sizeof(data_t) * rbuf_sz);
 
-        int num_barriers = C_PADDED / simd_w;
         if (mkldnn_thr_syncable()) {
-            barriers_ = (barrier::ctx_t *)malloc(
-                    num_barriers * sizeof(barrier::ctx_t), 64);
-            for (int i = 0; i < num_barriers; ++i)
-                barrier::ctx_init(&barriers_[i]);
+            int n_barriers = C_PADDED / simd_w;
+            scratchpad.book(key_barrier, sizeof(barrier::ctx_t) * n_barriers);
         }
-
-        size_t data_size = bdesc_->MB() * C_PADDED * bdesc_->H()
-                * bdesc_->W() * bdesc_->D() * sizeof(data_t);
-        l3_size_ = get_cache_size(3, true) * nthrs / 2;
-        do_blocking_ = (data_size >= l3_size_ / 2 && l3_size_ > 0);
     }
-    ~uni_bnorm_driver_t() { free(buf_); free(barriers_); }
 
     void exec(int ithr, int nthr, const data_t *src, data_t *diff_src,
             data_t *dst, const data_t *diff_dst, const data_t *scale_shift,
             data_t *diff_scale_shift, const data_t *mean, const data_t *var,
-            const uint8_t *ws) {
+            const uint8_t *ws, const memory_tracking::grantor_t &scratchpad) {
+        auto sbuf = scratchpad.get<data_t>(key_bnorm_tmp_stats);
+        auto pbuf = scratchpad.get<data_t>(key_bnorm_tmp_diff_ss);
+        auto rbuf = scratchpad.get<data_t>(key_bnorm_reduction);
+        auto barriers = scratchpad.get<barrier::ctx_t>(key_barrier);
+
         size_t N = bdesc_->MB();
         size_t C = bdesc_->C();
-        size_t C_PADDED = memory_desc_wrapper(bdesc_->src_pd()).blocking_desc()
-            .padding_dims[1];
+        size_t C_PADDED = get_c_padded(bdesc_);
         size_t D = bdesc_->D();
         size_t H = bdesc_->H();
         size_t W = bdesc_->W();
@@ -1162,12 +1168,11 @@ struct uni_bnorm_driver_t: public c_compatible {
             p.S_s = S_s * vlen;
             p.S_tail = (p.spat_size - S_e) * vlen;
             p.coff_max = C_blks_thr * simd_w;
-            p.mean = (use_tmp_stats_ ? sbuf_ : mean) + coff_base;
-            p.var = (use_tmp_stats_ ? sbuf_ + C_PADDED : var) + coff_base;
+            p.mean = (use_tmp_stats(bdesc_) ? sbuf : mean) + coff_base;
+            p.var = (use_tmp_stats(bdesc_) ? sbuf + C_PADDED : var) + coff_base;
             p.scale_shift = scale_shift + coff_base;
-            p.diff_scale_shift
-                    = (use_tmp_diff_scale_shift_ ? pbuf_ : diff_scale_shift)
-                    + coff_base;
+            p.diff_scale_shift = (use_tmp_diff_scale_shift(bdesc_)
+                    ? pbuf : diff_scale_shift) + coff_base;
 
             p.soff_max = N_thr * img_size;
             p.src = src + soff_base;
@@ -1180,10 +1185,8 @@ struct uni_bnorm_driver_t: public c_compatible {
 
             // use SP_N_nthr which is the same as p.N_nthr except maybe for
             // the last iteration.
-            p.rbuf1 = rbuf_
-                    + ((it * C_blks_per_iter) * SP_N_nthr + C_blk_s * p.N_nthr
-                              + p.N_ithr * C_blks_thr)
-                            * simd_w;
+            p.rbuf1 = rbuf + ((it * C_blks_per_iter) * SP_N_nthr
+                    + C_blk_s * p.N_nthr + p.N_ithr * C_blks_thr) * simd_w;
             // rbuf1 and rbuf2 have to be disjoint
             p.rbuf2 = p.rbuf1 + C_PADDED * nthr;
             p.is_cblk_tail =
@@ -1191,89 +1194,193 @@ struct uni_bnorm_driver_t: public c_compatible {
 
             size_t iter_bariers
                     = do_blocking_ ? it * global_barriers_per_iter : 0;
-            p.barrier = barriers_ + C_ithr + iter_bariers;
+            p.barrier = barriers + C_ithr + iter_bariers;
             if (p.soff_max != 0 && p.coff_max != 0)
                 ker_(&p);
         }
     }
 
+    void init_barriers(const memory_tracking::grantor_t &scratchpad) {
+        auto barriers = scratchpad.get<barrier::ctx_t>(key_barrier);
+        if (barriers) {
+            const int n_barriers = get_c_padded(bdesc_) / simd_w;
+            for (int i = 0; i < n_barriers; ++i)
+                barrier::ctx_init(&barriers[i]);
+        }
+    }
+
 private:
-    const int simd_w = isa == sse42 ? 8 :
-        cpu_isa_traits<isa>::vlen / sizeof(data_t);
+    enum {
+        simd_w = isa == sse42 ? 8 : cpu_isa_traits<isa>::vlen / sizeof(data_t)
+    };
+
+    static bool use_tmp_stats(const batch_normalization_pd_t *bdesc) {
+        return true
+            && !bdesc->stats_is_src()
+            && bdesc->desc()->prop_kind == prop_kind::forward_inference;
+    }
+
+    static bool use_tmp_diff_scale_shift(const batch_normalization_pd_t *bdesc)
+    {
+        return false
+            || (bdesc->is_bwd() && !bdesc->use_scaleshift())
+            || bdesc->desc()->prop_kind == prop_kind::backward_data;
+    }
+
+    static int get_c_padded(const batch_normalization_pd_t *bdesc)
+    { return bdesc->src_pd()->desc()->layout_desc.blocking.padding_dims[1]; }
 
     const batch_normalization_pd_t *bdesc_;
-    jit_bnorm_t<isa> ker_;
-    bool use_tmp_stats_, use_tmp_diff_scale_shift_;
     bool do_blocking_;
     size_t l3_size_;
 
-    data_t *buf_, *sbuf_, *rbuf_, *pbuf_;
-
-    barrier::ctx_t *barriers_;
+    jit_bnorm_t<isa> ker_;
 };
 
 }
 
+using namespace data_type;
+using namespace memory_format;
+using namespace utils;
+
+/* fwd */
+
 template <cpu_isa_t isa>
-jit_uni_batch_normalization_fwd_t<isa>::jit_uni_batch_normalization_fwd_t(
-        const pd_t *pd, const input_vector &inputs,
-        const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-{
-    int is_spatial_thr = 0;
-    const int simd_w = isa == sse42 ? 8 :
-        cpu_isa_traits<isa>::vlen / sizeof(data_t);
+status_t jit_uni_batch_normalization_fwd_t<isa>::pd_t::init() {
+    assert(engine()->kind() == engine_kind::cpu);
+    auto desired_fmt = (ndims() == 4)
+        ? isa == avx512_common ? nChw16c : nChw8c
+        : isa == avx512_common ? nCdhw16c : nCdhw8c;
+
+    bool ok = true
+        && mayiuse(isa)
+        && is_fwd()
+        && !has_zero_dim_memory()
+        && one_of(ndims(), 4, 5)
+        && desc()->data_desc.data_type == f32
+        && IMPLICATION(use_scaleshift(),
+                desc()->data_scaleshift_desc.data_type == f32)
+        && desc()->data_desc.format == desired_fmt
+        && (attr()->has_default_values() || this->with_relu_post_op());
+    if (!ok) return status::unimplemented;
+
+    if (is_training() && fuse_bn_relu()) {
+        if (isa < avx2) return status::unimplemented;
+        bn_init_default_ws(this, this->workspace_pd_, 1);
+    }
 
-    bnorm_utils::set_spatial_thr(&conf_,simd_w,sizeof(data_t),is_spatial_thr);
+    if (memory_desc_wrapper(&data_pd_).blocking_desc().padding_dims[1]
+            != this->C() && isa < avx2)
+        return status::unimplemented;
 
-    bnorm_driver_ = new uni_bnorm_driver_t<isa>(&conf_,is_spatial_thr);
+    if (stats_is_src() || is_training()) {
+        memory_desc_t stats_d;
+        dims_t stats_dims = { C() };
+        mkldnn_memory_desc_init(&stats_d, 1, stats_dims, f32, x);
+        mean_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
+        variance_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
+    }
+
+    auto scratchpad = scratchpad_registry().registrar();
+    uni_bnorm_driver_t<isa>::init_scratchpad(scratchpad, this);
+
+    return status::success;
 }
 
 template <cpu_isa_t isa>
-void jit_uni_batch_normalization_fwd_t<isa>::execute(event_t *e) {
+jit_uni_batch_normalization_fwd_t<isa>::jit_uni_batch_normalization_fwd_t(
+        const pd_t *apd, const input_vector &inputs,
+        const output_vector &outputs)
+    : cpu_primitive_t(apd, inputs, outputs)
+{ bnorm_driver_ = new uni_bnorm_driver_t<isa>(pd()); }
+
+template <cpu_isa_t isa>
+void jit_uni_batch_normalization_fwd_t<isa>::execute(event_t *e) const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
-    auto mean = reinterpret_cast<data_t*>(conf_.stats_is_src()
+    auto mean = reinterpret_cast<data_t*>(pd()->stats_is_src()
             ? const_cast<char*>(this->input_memory(1))
             : this->memory(1));
-    auto var = reinterpret_cast<data_t*>(conf_.stats_is_src()
+    auto var = reinterpret_cast<data_t*>(pd()->stats_is_src()
             ? const_cast<char*>(this->input_memory(2))
             : this->memory(2));
 
-    auto idx_scale_shift = 1 + 2*conf_.stats_is_src();
+    auto idx_scale_shift = 1 + 2*pd()->stats_is_src();
     auto scale_shift =
         reinterpret_cast<const data_t *>(this->input_memory(idx_scale_shift));
-    auto ws = reinterpret_cast<uint8_t *>(this->memory(conf_.ws_idx()));
+    auto ws = reinterpret_cast<uint8_t *>(this->memory(pd()->ws_idx()));
+
+    auto scratchpad = this->scratchpad();
+
+    bnorm_driver_->init_barriers(scratchpad);
 
     parallel(0, [&](const int ithr, const int nthr) {
-        bnorm_driver_->exec(ithr, nthr, src,
-                nullptr, dst, nullptr, scale_shift, nullptr, mean, var, ws);
+        bnorm_driver_->exec(ithr, nthr, src, nullptr, dst, nullptr,
+                scale_shift, nullptr, mean, var, ws, scratchpad);
     });
     e->set_state(event_t::ready);
 }
 
 template <cpu_isa_t isa>
-jit_uni_batch_normalization_fwd_t<isa>::~jit_uni_batch_normalization_fwd_t() {
-    delete bnorm_driver_;
-}
+jit_uni_batch_normalization_fwd_t<isa>::~jit_uni_batch_normalization_fwd_t()
+{ delete bnorm_driver_; }
+
+/* bwd */
 
 template <cpu_isa_t isa>
-jit_uni_batch_normalization_bwd_t<isa>::jit_uni_batch_normalization_bwd_t(
-        const pd_t *pd, const input_vector &inputs,
-        const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-{
-    int is_spatial_thr = 0;
-    const int simd_w = isa == sse42 ? 8 :
-        cpu_isa_traits<isa>::vlen / sizeof(data_t);
+status_t jit_uni_batch_normalization_bwd_t<isa>::pd_t::init() {
+    assert(engine()->kind() == engine_kind::cpu);
+    auto desired_fmt = (ndims() == 4)
+        ? one_of(isa, sse42, avx2) ? nChw8c : nChw16c
+        : one_of(isa, sse42, avx2) ? nCdhw8c : nCdhw16c;
+
+    bool ok = true
+        && mayiuse(isa)
+        && is_bwd()
+        && !has_zero_dim_memory()
+        && one_of(ndims(), 4, 5)
+        && everyone_is(f32, desc()->data_desc.data_type,
+                desc()->diff_data_desc.data_type)
+        && IMPLICATION(use_scaleshift(),
+                desc()->data_scaleshift_desc.data_type == f32)
+        && everyone_is(desired_fmt, desc()->diff_data_desc.format,
+                desc()->data_desc.format)
+        && attr()->has_default_values();
+    if (!ok) return status::unimplemented;
+
+    if (memory_desc_wrapper(&data_pd_).blocking_desc()
+            .padding_dims[1] != this->C() && isa < avx2)
+        return status::unimplemented;
+
+    if (fuse_bn_relu()) {
+        if (isa < avx2) return status::unimplemented;
+        bn_init_default_ws(this, this->workspace_pd_, 1);
+        size_t this_ws_sz = memory_desc_wrapper(this->workspace_pd()).size();
+
+        bool ws_ok = true
+            && hint_fwd_pd_->workspace_pd()
+            && memory_desc_wrapper(hint_fwd_pd_->workspace_pd()).size()
+            == this_ws_sz;
+        if (!ws_ok) return status::unimplemented;
+    }
+
+    /* TODO: extra checks required */
 
-    bnorm_utils::set_spatial_thr(&conf_,simd_w,sizeof(data_t),is_spatial_thr);
+    auto scratchpad = scratchpad_registry().registrar();
+    uni_bnorm_driver_t<isa>::init_scratchpad(scratchpad, this);
 
-    bnorm_driver_ = new uni_bnorm_driver_t<isa>(&conf_,is_spatial_thr);
+    return status::success;
 }
 
 template <cpu_isa_t isa>
-void jit_uni_batch_normalization_bwd_t<isa>::execute(event_t *e) {
+jit_uni_batch_normalization_bwd_t<isa>::jit_uni_batch_normalization_bwd_t(
+        const pd_t *apd, const input_vector &inputs,
+        const output_vector &outputs)
+    : cpu_primitive_t(apd, inputs, outputs)
+{ bnorm_driver_ = new uni_bnorm_driver_t<isa>(pd()); }
+
+template <cpu_isa_t isa>
+void jit_uni_batch_normalization_bwd_t<isa>::execute(event_t *e) const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto mean = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto var = reinterpret_cast<const data_t *>(this->input_memory(2));
@@ -1282,20 +1389,22 @@ void jit_uni_batch_normalization_bwd_t<isa>::execute(event_t *e) {
     auto diff_src = reinterpret_cast<data_t*>(this->memory(0));
     auto diff_scale_shift = reinterpret_cast<data_t *>(this->memory(1));
     auto ws = reinterpret_cast<const uint8_t *>(
-            this->input_memory(conf_.ws_idx()));
+            this->input_memory(pd()->ws_idx()));
+
+    auto scratchpad = this->scratchpad();
+
+    bnorm_driver_->init_barriers(scratchpad);
 
     parallel(0, [&](const int ithr, const int nthr) {
-        bnorm_driver_->exec(ithr, nthr, src,
-                diff_src, nullptr, diff_dst, scale_shift, diff_scale_shift,
-                mean, var, ws);
+        bnorm_driver_->exec(ithr, nthr, src, diff_src, nullptr, diff_dst,
+                scale_shift, diff_scale_shift, mean, var, ws, scratchpad);
     });
     e->set_state(event_t::ready);
 }
 
 template <cpu_isa_t isa>
-jit_uni_batch_normalization_bwd_t<isa>::~jit_uni_batch_normalization_bwd_t() {
-    delete bnorm_driver_;
-}
+jit_uni_batch_normalization_bwd_t<isa>::~jit_uni_batch_normalization_bwd_t()
+{ delete bnorm_driver_; }
 
 /* struct instantiation */
 template struct jit_uni_batch_normalization_fwd_t<sse42>;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_batch_normalization.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_batch_normalization.hpp
index 7dbc47a3b..857e3a09a 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_batch_normalization.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_batch_normalization.hpp
@@ -20,11 +20,10 @@
 #include <assert.h>
 
 #include "c_types_map.hpp"
-#include "cpu_batch_normalization_pd.hpp"
-#include "cpu_engine.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
 
+#include "cpu_batch_normalization_pd.hpp"
 #include "jit_generator.hpp"
 
 namespace mkldnn {
@@ -46,58 +45,21 @@ struct jit_uni_batch_normalization_fwd_t: public cpu_primitive_t {
                 JIT_IMPL_NAME_HELPER("jit:", isa, ""),
                 jit_uni_batch_normalization_fwd_t<isa>);
 
-        virtual status_t init() override {
-            using namespace prop_kind;
-            using namespace data_type;
-            using namespace memory_format;
-            assert(engine()->kind() == engine_kind::cpu);
-            auto desired_fmt = (ndims() == 4)
-                ? isa == avx512_common ? nChw16c : nChw8c
-                : isa == avx512_common ? nCdhw16c : nCdhw8c;
-            bool ok = true
-                && mayiuse(isa)
-                && is_fwd()
-                && !has_zero_dim_memory()
-                && utils::one_of(ndims(), 4, 5)
-                && desc()->data_desc.data_type == f32
-                && IMPLICATION(use_scaleshift(),
-                        desc()->data_scaleshift_desc.data_type == f32)
-                && desc()->data_desc.format == desired_fmt
-                && (attr()->has_default_values() || this->with_relu_post_op());
-            if (!ok) return status::unimplemented;
-
-            if (is_training() && fuse_bn_relu()) {
-                if (isa < avx2) return status::unimplemented;
-                bn_init_default_ws(this, this->workspace_pd_, 1);
-            }
-            if (memory_desc_wrapper(&data_pd_).blocking_desc()
-                .padding_dims[1] != this->C() && isa < avx2)
-                return status::unimplemented;
-
-            if (stats_is_src() || is_training()) {
-                memory_desc_t stats_d;
-                dims_t stats_dims = { C() };
-                mkldnn_memory_desc_init(&stats_d, 1, stats_dims,
-                        data_type::f32, x);
-                mean_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
-                variance_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
-            }
-
-            return success;
-        }
+        virtual status_t init() override;
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    jit_uni_batch_normalization_fwd_t(const pd_t *pd,
+    jit_uni_batch_normalization_fwd_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs);
     ~jit_uni_batch_normalization_fwd_t();
-    virtual void execute(event_t *e);
+
+    virtual void execute(event_t *e) const;
 
 private:
-    uni_bnorm_driver_t<isa> *bnorm_driver_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    pd_t conf_;
+    uni_bnorm_driver_t<isa> *bnorm_driver_;
 };
 
 template <cpu_isa_t isa>
@@ -113,63 +75,21 @@ struct jit_uni_batch_normalization_bwd_t: public cpu_primitive_t {
                 JIT_IMPL_NAME_HELPER("jit:", isa, ""),
                 jit_uni_batch_normalization_bwd_t<isa>);
 
-        virtual status_t init() override {
-            using namespace prop_kind;
-            using namespace data_type;
-            using namespace utils;
-            using namespace memory_format;
-            assert(engine()->kind() == engine_kind::cpu);
-            auto desired_fmt = (ndims() == 4)
-                ? utils::one_of(isa, sse42, avx2) ? nChw8c : nChw16c
-                : utils::one_of(isa, sse42, avx2) ? nCdhw8c : nCdhw16c;
-            bool ok = true
-                && mayiuse(isa)
-                && is_bwd()
-                && !has_zero_dim_memory()
-                && utils::one_of(ndims(), 4, 5)
-                && everyone_is(f32, desc()->data_desc.data_type,
-                        desc()->diff_data_desc.data_type)
-                && IMPLICATION(use_scaleshift(),
-                        desc()->data_scaleshift_desc.data_type == f32)
-                && everyone_is(desired_fmt, desc()->diff_data_desc.format,
-                        desc()->data_desc.format)
-                && attr()->has_default_values();
-            if (!ok) return status::unimplemented;
-            if (memory_desc_wrapper(&data_pd_).blocking_desc()
-                .padding_dims[1] != this->C() && isa < avx2)
-                return status::unimplemented;
-
-            if (fuse_bn_relu()) {
-                if (isa < avx2) return status::unimplemented;
-                bn_init_default_ws(this, this->workspace_pd_, 1);
-                const size_t this_ws_sz
-                    = memory_desc_wrapper(this->workspace_pd()).size();
-
-                bool ws_ok = true
-                    && hint_fwd_pd_->workspace_pd()
-                    && memory_desc_wrapper(hint_fwd_pd_->workspace_pd()).size()
-                            == this_ws_sz;
-                if (!ws_ok)
-                    return status::unimplemented;
-            }
-
-            /* TODO: extra checks required */
-
-            return success;
-        }
+        virtual status_t init() override;
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    jit_uni_batch_normalization_bwd_t(const pd_t *pd,
+    jit_uni_batch_normalization_bwd_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs);
     ~jit_uni_batch_normalization_bwd_t();
-    virtual void execute(event_t *e);
+
+    virtual void execute(event_t *e) const;
 
 private:
-    uni_bnorm_driver_t<isa> *bnorm_driver_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    pd_t conf_;
+    uni_bnorm_driver_t<isa> *bnorm_driver_;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_bin_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_bin_conv_kernel.cpp
new file mode 100644
index 000000000..447a017fd
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_bin_conv_kernel.cpp
@@ -0,0 +1,925 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <common/primitive_attr.hpp>
+#include "c_types_map.hpp"
+#include "nstl.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
+#include "cpu_memory.hpp"
+
+#include "jit_uni_bin_conv_kernel.hpp"
+
+#define GET_OFF(field) offsetof(jit_conv_call_s, field)
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::prop_kind;
+using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
+using namespace mkldnn::impl::utils;
+
+using namespace Xbyak;
+
+template <cpu_isa_t isa>
+void jit_uni_bin_conv_fwd_kernel<isa>::cvt2ps(data_type_t type_in, Vmm vmm_in, const Operand &op, bool scalar_load) {
+    Xmm xmm_in = Xmm(vmm_in.getIdx());
+
+    switch (type_in) {
+        case data_type::f32:
+        case data_type::s32:
+            if (scalar_load) {
+                mov(reg_tmp_32, op);
+                movq(xmm_in, reg_tmp_64);
+            } else {
+                uni_vmovups(vmm_in, op);
+            }
+            break;
+        case data_type::s8:
+            if (scalar_load) {
+                movsx(reg_tmp_32, op);
+                movq(xmm_in, reg_tmp_64);
+            } else {
+                uni_vpmovsxbd(vmm_in, op);
+            }
+            break;
+        case data_type::u8:
+            if (scalar_load) {
+                movzx(reg_tmp_32, op);
+                movq(xmm_in, reg_tmp_64);
+            } else {
+                uni_vpmovzxbd(vmm_in, op);
+            }
+            break;
+        default: assert(!"unsupported data type");
+    }
+
+    if (type_in != data_type::f32)
+        uni_vcvtdq2ps(vmm_in, vmm_in);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_bin_conv_fwd_kernel<isa>::store_dst(const Xbyak::Address &op, Vmm vmm_dst, bool scalar_store) {
+    Ymm ymm_dst = Ymm(vmm_dst.getIdx());
+    Xmm xmm_dst = Xmm(vmm_dst.getIdx());
+
+    switch (jcp.dst_dt) {
+        case data_type::f32:
+        case data_type::s32:
+            if (scalar_store) {
+                movq(reg_tmp_64, xmm_dst);
+                mov(op, reg_tmp_32);
+            } else {
+                uni_vmovups(op, vmm_dst);
+            }
+            break;
+        case data_type::s8:
+            uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst);
+
+            if (isa != sse42 && !scalar_store)
+                vpermq(ymm_dst, ymm_dst, 0x08);
+
+            uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst);
+
+            if (scalar_store) {
+                movq(reg_tmp_64, xmm_dst);
+                mov(op, reg_tmp_8);
+            } else {
+                if (isa != sse42)
+                    vmovq(op, xmm_dst);
+                else
+                    movd(op, xmm_dst);
+            }
+            break;
+        case data_type::u8:
+        case data_type::bin:
+            uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst);
+
+            if (isa != sse42 && !scalar_store)
+                vpermq(ymm_dst, ymm_dst, 0x08);
+
+            uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst);
+
+            if (scalar_store) {
+                movq(reg_tmp_64, xmm_dst);
+                mov(op, reg_tmp_8);
+            } else {
+                if (isa != sse42)
+                    vmovq(op, xmm_dst);
+                else
+                    movd(op, xmm_dst);
+            }
+
+            break;
+        default:
+            assert(!"unknown dst_dt");
+    }
+}
+
+template <cpu_isa_t isa>
+void jit_uni_bin_conv_fwd_kernel<isa>::apply_filter(int ur_w, int pad_l, int pad_r, int oc_blocks, int oc_step,
+        int ic_blocks, bool last_icb, bool h_padded)
+{
+    int kw = jcp.kw;
+    int kh = jcp.kh;
+    int stride_w = jcp.stride_w;
+    int dilate_w = jcp.dilate_w + 1;
+    int ic_blk = jcp.ic_block;
+    int oc_blk = jcp.oc_block;
+
+    int repeats = isa == sse42 && oc_step > (oc_blk / 2) ? 2 : 1;
+    int nbits = 8;
+
+    for (int ki = 0; ki < kw; ki++) {
+        int jj_start = nstl::max(0, div_up(pad_l - ki * dilate_w, stride_w));
+        int jj_end = ur_w  - nstl::max(0, div_up(ki*dilate_w+pad_r-(kw-1)*dilate_w, stride_w));
+
+        int _start = (!jcp.exclude_pad) ? 0 : jj_start;
+        int _end = (!jcp.exclude_pad) ? ur_w : jj_end;
+
+        for (int ifm2 = 0; ifm2 < ic_blocks; ifm2++) {
+            for (int jj = _start; jj < _end; jj++) {
+                int inp_off = ((ki*dilate_w + jj*stride_w - pad_l)*div_up(jcp.ic, nbits) + ifm2 * div_up(ic_blk, nbits)) * jcp.typesize_in;
+
+                if (h_padded || jj < jj_start || jj >= jj_end) {
+                    uni_vmovups(vmm_src, ptr[reg_table + 256]);
+                } else {
+                    uni_vpbroadcastd(vmm_src, ptr[aux1_reg_input + inp_off]);
+                }
+
+                for (int r = 0; r < repeats; r++) {
+                    for (int ii = 0; ii < oc_blocks; ii++) {
+                        int ker_off = (ifm2 * kw * div_up(ic_blk, nbits) * oc_blk
+                                       + ii * jcp.nb_ic * div_up(ic_blk, nbits) * kh * kw * oc_blk
+                                       + ki * div_up(ic_blk, nbits) * oc_blk + r * div_up(ic_blk, nbits) * (oc_blk / 2)) * jcp.typesize_in;
+
+                        uni_vmovups(vmm_tmp, ptr[aux1_reg_kernel + ker_off]);
+
+                        uni_vpxor(vmm_tmp, vmm_tmp, vmm_src);
+                        if (jcp.ic_padded != jcp.ic && last_icb && ifm2 == (ic_blocks - 1))
+                            uni_vandps(vmm_tmp, vmm_tmp, ptr[reg_table + 224]);
+
+                        if (isa == sse42) {
+                            movups(vmm_tmp1, vmm_tmp);
+                            pand(vmm_tmp1, vmm_mask);
+                        } else {
+                            uni_vandps(vmm_tmp1, vmm_mask, vmm_tmp);
+                        }
+
+                        uni_vpsrld(vmm_tmp, vmm_tmp, 4);
+                        uni_vandps(vmm_tmp, vmm_tmp, vmm_mask);
+
+                        if (isa == sse42) {
+                            movups(vmm_tmp2, vmm_lookup);
+                            pshufb(vmm_tmp2, vmm_tmp);
+                            movups(vmm_tmp, vmm_lookup);
+                            pshufb(vmm_tmp, vmm_tmp1);
+                            paddb(vmm_tmp, vmm_tmp2);
+                        } else {
+                            uni_vpshufb(vmm_tmp, vmm_lookup, vmm_tmp);
+                            uni_vpshufb(vmm_tmp1, vmm_lookup, vmm_tmp1);
+                            uni_vpaddb(vmm_tmp, vmm_tmp, vmm_tmp1);
+                        }
+
+                        uni_vpmaddubsw(vmm_tmp, vmm_tmp, vmm_one_u8);
+                        uni_vpmaddwd(vmm_tmp, vmm_tmp, vmm_one_s16);
+                        uni_vpaddd(Vmm(1 + r*jcp.ur_w*jcp.nb_oc_blocking + ur_w * ii + jj),
+                                   Vmm(1 + r*jcp.ur_w*jcp.nb_oc_blocking + ur_w * ii + jj), vmm_tmp);
+                    }
+                }
+            }
+        }
+    }
+}
+
+template <cpu_isa_t isa>
+void jit_uni_bin_conv_fwd_kernel<isa>::oh_step_unroll_kw(int ur_w, int pad_l, int pad_r, int oc_blocks, int oc_step, bool h_padded) {
+    int kw = jcp.kw;
+
+    int nbits = 8;
+    int inp_mult = div_up(jcp.ic_block, nbits);
+    int out_mult = jcp.oc_block;
+
+    Label icb_main_loop;
+    Label icb_tail;
+
+    mov(aux1_reg_input, aux_reg_input);
+    mov(aux1_reg_kernel, aux_reg_kernel);
+
+    mov(reg_icb_iter, jcp.nb_ic);
+    L(icb_main_loop);
+    {
+        cmp(reg_icb_iter, 1);
+        jle(icb_tail, T_NEAR);
+
+        apply_filter(ur_w, pad_l, pad_r, oc_blocks, oc_step, 1, false, h_padded);
+
+        add(aux1_reg_input, inp_mult * jcp.typesize_in);
+        add(aux1_reg_kernel, kw * inp_mult * out_mult * jcp.typesize_in);
+        sub(reg_icb_iter, 1);
+        jmp(icb_main_loop, T_NEAR);
+    }
+
+    L(icb_tail);
+
+    apply_filter(ur_w, pad_l, pad_r, oc_blocks, oc_step, 1, true, h_padded);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_bin_conv_fwd_kernel<isa>::kh_loop(int ur_w, int pad_l, int pad_r, int oc_blocks, int oc_step) {
+    int iw = jcp.iw;
+    int kw = jcp.kw;
+    int dilate_h = jcp.dilate_h + 1;
+
+    int nbits = 8;
+    const int inp_mult = dilate_h * div_up(jcp.ic, nbits);
+
+    Label t_overflow_label, no_t_overflow_label,
+          b_overflow_label, no_b_overflow_label;
+
+    mov(aux_reg_input, reg_input);
+    mov(aux_reg_kernel, reg_kernel_base);
+
+    uni_vmovups(vmm_lookup,  ptr[reg_table]);
+    uni_vmovups(vmm_mask,    ptr[reg_table + 32]);
+    uni_vmovups(vmm_one_u8,  ptr[reg_table + 160]);
+    uni_vmovups(vmm_one_s16, ptr[reg_table + 192]);
+
+    if (!jcp.exclude_pad) {
+        mov(reg_overflow,  ptr[param1 + GET_OFF(t_overflow)]);
+        cmp(reg_overflow, 0);
+        je(no_t_overflow_label, T_NEAR);
+        L(t_overflow_label); {
+            oh_step_unroll_kw(ur_w, pad_l, pad_r, oc_blocks, oc_step, true);
+
+            add(aux_reg_kernel, jcp.typesize_in * kw * jcp.oc_block * jcp.nb_ic * div_up(jcp.ic_block, nbits));
+            dec(reg_overflow);
+            cmp(reg_overflow, 0);
+            jg(t_overflow_label, T_NEAR);
+        }
+        L(no_t_overflow_label);
+    }
+
+    Label skip_kh_loop;
+    mov(reg_kh, ptr[this->param1 + GET_OFF(kh_padding)]);
+    if (!jcp.exclude_pad || (jcp.exclude_pad &&
+                               (jcp.kh - 1) * (jcp.dilate_h + 1) < nstl::max(jcp.t_pad, jcp.b_pad))) {
+        cmp(reg_kh, 0);
+        je(skip_kh_loop, T_NEAR);
+    }
+
+    Label kh_label;
+    L(kh_label);
+    {
+        oh_step_unroll_kw(ur_w, pad_l, pad_r, oc_blocks, oc_step, false);
+
+        add(aux_reg_kernel, jcp.typesize_in * kw * jcp.oc_block * jcp.nb_ic * div_up(jcp.ic_block, nbits));
+        add(aux_reg_input, jcp.typesize_in * iw * inp_mult);
+
+        dec(reg_kh);
+        cmp(reg_kh, 0);
+        jg(kh_label, T_NEAR);
+    }
+
+    L(skip_kh_loop);
+
+    if (!jcp.exclude_pad) {
+        mov(reg_overflow,  ptr[param1 + GET_OFF(b_overflow)]);
+        cmp(reg_overflow, 0);
+        je(no_b_overflow_label, T_NEAR);
+        L(b_overflow_label); {
+            oh_step_unroll_kw(ur_w, pad_l, pad_r, oc_blocks, oc_step, true);
+
+            add(aux_reg_kernel, jcp.typesize_in * kw * jcp.oc_block * jcp.nb_ic * div_up(jcp.ic_block, nbits));
+            dec(reg_overflow);
+            cmp(reg_overflow, 0);
+            jg(b_overflow_label, T_NEAR);
+        }
+        L(no_b_overflow_label);
+    }
+}
+
+template <cpu_isa_t isa>
+void jit_uni_bin_conv_fwd_kernel<isa>::width_blk_step(int ur_w, int pad_l, int pad_r, int oc_blocks, int oc_step)
+{
+    int nbits = 8;
+    int repeats = isa == sse42 && oc_step > (jcp.oc_block / 2) ? 2 : 1;
+
+    for (int r = 0; r < repeats; r++)
+        for (int ii = 0; ii < oc_blocks; ii++)
+            for (int jj = 0; jj < ur_w; jj++)
+                uni_vpxor(Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj),
+                          Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj),
+                          Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj));
+
+    kh_loop(ur_w, pad_l, pad_r, oc_blocks, oc_step);
+
+    const auto &p = attr_.post_ops_;
+    for (int r = 0; r < repeats; r++) {
+        int tail_size = isa == sse42 ? nstl::min(jcp.oc_block / 2, oc_step - r * jcp.oc_block / 2) : oc_step;
+        bool is_scalar_store = isa == sse42 ? tail_size < jcp.oc_block / 2 : tail_size < jcp.oc_block;
+
+        int kw_padding[ur_w];
+        if (jcp.exclude_pad) {
+            mov(reg_tmp_32, jcp.ic);
+            imul(reg_tmp_32,  ptr[param1 + GET_OFF(kh_padding)]);
+
+            for (int jj = 0; jj < ur_w; jj++)
+                kw_padding[jj] = 0;
+
+            for (int ki = 0; ki < jcp.kw; ki++) {
+                int jj_start = nstl::max(0, div_up(pad_l - ki * (jcp.dilate_w + 1), jcp.stride_w));
+                int jj_end = ur_w - nstl::max(0, div_up(ki * (jcp.dilate_w + 1) + pad_r -
+                                                        (jcp.kw - 1) * (jcp.dilate_w + 1), jcp.stride_w));
+                for (int jj = jj_start; jj < jj_end; jj++) {
+                    kw_padding[jj]++;
+                }
+            }
+        } else {
+            uni_vmovups(vmm_shift, ptr[reg_table + 128]);
+        }
+        uni_vmovups(vmm_scale, ptr[reg_table + 96]);
+
+        for (int jj = 0; jj < ur_w; jj++) {
+            if (jcp.exclude_pad) {
+                mov(reg_shift, kw_padding[jj]);
+                imul(reg_shift, reg_tmp_32);
+                movq(Xmm(vmm_shift.getIdx()), reg_shift);
+                uni_vbroadcastss(vmm_shift, Xmm(vmm_shift.getIdx()));
+                uni_vcvtdq2ps(vmm_shift, vmm_shift);
+            }
+
+            for (int ii = 0; ii < oc_blocks; ii++) {
+                uni_vcvtdq2ps(Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj), Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj));
+                uni_vfmadd213ps(Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj), vmm_scale, vmm_shift);
+            }
+        }
+
+        int eltwise_inj_idx = 0;
+        int depthwise_inj_idx = 0;
+        int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
+        for (int i = 0; i < end_idx; i++) {
+            int start_idx = 1 + r * jcp.ur_w * jcp.nb_oc_blocking;
+
+            auto& post_op = p.entry_[i];
+            if (post_op.is_eltwise()) {
+                eltwise_injectors[eltwise_inj_idx]->compute_vector_range(start_idx, start_idx + oc_blocks * ur_w);
+                eltwise_inj_idx++;
+            } else if (post_op.is_depthwise()) {
+                pop(reg_oc_off);
+
+                mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+                mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+                add(reg_d_weights, reg_oc_off);
+                add(reg_d_bias, reg_oc_off);
+
+                if (r == 1) {
+                    add(reg_d_weights, (jcp.oc_block / 2) * sizeof(float));
+                    add(reg_d_bias, (jcp.oc_block / 2) * sizeof(float));
+                }
+
+                for (int ii = 0; ii < oc_blocks; ii++) {
+                    depthwise_injectors[depthwise_inj_idx]->compute_vector_range(start_idx + ur_w * ii,
+                            start_idx + ur_w * ii + ur_w, reg_d_weights, reg_d_bias);
+
+                    add(reg_d_weights, jcp.oc_block * sizeof(float));
+                    add(reg_d_bias, jcp.oc_block * sizeof(float));
+                }
+
+                depthwise_inj_idx++;
+
+                push(reg_oc_off);
+            } else if (post_op.is_sum(false)) {
+                for (int ii = 0; ii < oc_blocks; ii++) {
+                    for (int jj = 0; jj < ur_w; jj++) {
+                        Vmm vmm_dst = Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj);
+
+                        if (is_scalar_store) {
+                            for (int oc = 0; oc < tail_size; oc++) {
+                                int o_off =  jj * jcp.oc * jcp.ngroups + r * (jcp.oc_block / 2) + oc;
+
+                                uni_vpxor(vmm_sum, vmm_sum, vmm_sum);
+                                cvt2ps(jcp.dst_dt, vmm_sum, ptr[reg_output + o_off * jcp.typesize_out], true);
+
+                                if (oc < jcp.oc_block / 2) {
+                                    uni_vpslldq(vmm_sum, vmm_sum, oc * sizeof(float));
+                                } else {
+                                    Ymm ymm_prev_dst = Ymm(vmm_sum.getIdx());
+                                    vperm2i128(ymm_prev_dst, ymm_prev_dst, ymm_prev_dst, 0x01);
+                                    vpslldq(vmm_sum, vmm_sum, (oc - jcp.oc_block / 2) * sizeof(float));
+                                }
+
+                                uni_vaddps(vmm_dst, vmm_dst, vmm_sum);
+                            }
+                        } else {
+                            size_t o_off = ii * jcp.oc_block + jj * jcp.oc * jcp.ngroups + r * (jcp.oc_block / 2);
+
+                            cvt2ps(jcp.dst_dt, vmm_sum, ptr[reg_output + o_off * jcp.typesize_out], false);
+                            uni_vaddps(vmm_dst, vmm_dst, vmm_sum);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    if (jcp.with_binarization) {
+        int binarization_idx = p.find(primitive_kind::binarization);
+
+        pop(reg_oc_off);
+
+        mov(reg_b_weights, reinterpret_cast<size_t>(p.entry_[binarization_idx].binarization.weights_data));
+        add(reg_b_weights, reg_oc_off);
+
+        push(reg_oc_off);
+
+        for (int ii = 0; ii < oc_blocks; ii++) {
+            for (int jj = 0; jj < ur_w; jj++) {
+                for (int r = 0; r < repeats; r++) {
+                    int tail_size = isa == sse42 ? nstl::min(jcp.oc_block / 2, oc_step - r * jcp.oc_block / 2) : oc_step;
+                    mov(reg_b_mask, (1 << tail_size) - 1);
+                    uni_vmovups(vmm_thr, ptr[reg_b_weights + (ii * jcp.oc_block + r * (jcp.oc_block / 2)) * sizeof(float)]);
+
+                    Vmm vmm_dst = Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj);
+
+                    uni_vcmpgtps(vmm_dst, vmm_dst, vmm_thr);
+
+                    if (r == 0) {
+                        uni_vmovmskps(reg_tmp_32, vmm_dst);
+                        and_(reg_tmp_64, reg_b_mask);
+                    } else {
+                        uni_vmovmskps(reg_tmp2_32, vmm_dst);
+                        and_(reg_tmp2_64, reg_b_mask);
+                        shl(reg_tmp2_32, 4);
+                        or_(reg_tmp_32, reg_tmp2_32);
+                    }
+
+                    if (r == repeats - 1) {
+                        const size_t o_off = (ii + jj * div_up(jcp.oc, nbits));
+                        mov(ptr[reg_output + o_off * jcp.typesize_out], reg_tmp_8);
+                    }
+                }
+            }
+        }
+    } else {
+        for (int r = 0; r < repeats; r++) {
+            int tail_size = isa == sse42 ? nstl::min(jcp.oc_block / 2, oc_step - r * jcp.oc_block / 2) : oc_step;
+            bool is_scalar_store = isa == sse42 ? tail_size < jcp.oc_block / 2 : tail_size < jcp.oc_block;
+            if (is_scalar_store) {
+                for (int jj = 0; jj < ur_w; jj++) {
+                    Vmm vmm_dst = Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + jj);
+                    Ymm ymm_dst = Ymm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + jj);
+
+                    for (int oc = 0; oc < tail_size; oc++) {
+                        size_t o_off;
+                        if (jcp.with_dw_conv)
+                            o_off = jj * jcp.oc_block + oc + r * (jcp.oc_block / 2);
+                        else
+                            o_off = jj * jcp.oc * jcp.ngroups + r * (jcp.oc_block / 2) + oc;
+
+                        store_dst(ptr[reg_output + o_off * jcp.typesize_out], vmm_dst, true);
+
+                        if (isa == sse42) {
+                            psrldq(vmm_dst, jcp.typesize_out);
+                        } else {
+                            vperm2i128(ymm_tmp, ymm_dst, ymm_dst, 0x01);
+                            vpalignr(ymm_dst, vmm_tmp, ymm_dst, jcp.typesize_out);
+                        }
+                    }
+                }
+            } else {
+                for (int ii = 0; ii < oc_blocks; ii++) {
+                    for (int jj = 0; jj < ur_w; jj++) {
+                        Vmm vmm_dst = Vmm(1 + r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj);
+
+                        size_t o_off;
+                        if (jcp.with_dw_conv)
+                            o_off = ((size_t) ii * jcp_dw_conv.kh * jcp.ow + jj) * jcp.oc_block +
+                                    r * (jcp.oc_block / 2);
+                        else
+                            o_off = ii * jcp.oc_block + jj * jcp.oc * jcp.ngroups + r * (jcp.oc_block / 2);
+
+                        store_dst(ptr[reg_output + o_off * jcp.typesize_out], vmm_dst, false);
+                    }
+                }
+            }
+        }
+    }
+}
+
+template <cpu_isa_t isa>
+inline void jit_uni_bin_conv_fwd_kernel<isa>::solve_common(int oc_blocks, int oc_step)
+{
+    int ur_w = jcp.ur_w;
+    int ur_w_tail = jcp.ur_w_tail;
+    int n_oi = jcp.ow / ur_w;
+    int iw = jcp.iw;
+    int kw = jcp.kw;
+    int dilate_w = jcp.dilate_w + 1;
+    int str_w = jcp.stride_w;
+
+    int nbits = 8;
+    const int inp_mult = div_up(jcp.ic, nbits);
+    const int out_mult = jcp.with_dw_conv ? jcp.oc_block : jcp.with_binarization ? div_up(jcp.oc, nbits) : jcp.oc;
+
+    int l_pad = jcp.l_pad;
+    int r_pad = nstl::max(0, (jcp.ow - 1) * str_w + (kw - 1) * dilate_w
+            - (iw + l_pad - 1));
+    int r_pad1 = (ur_w * n_oi - 1) * str_w + (kw - 1) * dilate_w
+            - (iw + l_pad - 1);
+    if (r_pad1 > 0) n_oi--;
+
+    mov(reg_input, reg_input_base);
+    mov(reg_output, reg_output_base);
+
+    push(reg_input_base);
+    push(reg_output_base);
+    push(reg_oc_work);
+    push(reg_oc_off);
+
+    if (l_pad > 0) {
+        n_oi--;
+        if (n_oi < 0 && r_pad1 > 0)
+            width_blk_step(ur_w, l_pad, r_pad1, oc_blocks, oc_step); // "lrpad"
+        else
+            width_blk_step(ur_w, l_pad, 0, oc_blocks, oc_step); // "lpad"
+        add(reg_input, jcp.typesize_in * (ur_w * str_w - l_pad) * inp_mult);
+        add(reg_output, jcp.typesize_out * ur_w * out_mult);
+    }
+
+    Label ow_loop_label;
+    xor_(oi_iter, oi_iter);
+
+    if (n_oi > 0) {
+        L(ow_loop_label);
+
+        width_blk_step(ur_w, 0, 0, oc_blocks, oc_step); // "middle"
+        add(reg_input, jcp.typesize_in * ur_w * str_w * inp_mult);
+        add(reg_output, jcp.typesize_out * ur_w * out_mult);
+
+        inc(oi_iter);
+        cmp(oi_iter, n_oi);
+        jl(ow_loop_label, T_NEAR);
+    }
+
+    if (r_pad1 > 0 && n_oi >=0) {
+        width_blk_step(ur_w, 0, r_pad1, oc_blocks, oc_step); // "rpad"
+        add(reg_input, jcp.typesize_in * ur_w * str_w * inp_mult);
+        add(reg_output, jcp.typesize_out * ur_w * out_mult);
+    }
+
+    if (ur_w_tail != 0)
+        width_blk_step(ur_w_tail, 0, r_pad, oc_blocks, oc_step); // "tail"
+
+    pop(reg_oc_off);
+    pop(reg_oc_work);
+    pop(reg_output_base);
+    pop(reg_input_base);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_bin_conv_fwd_kernel<isa>::generate()
+{
+    const auto &p = attr_.post_ops_;
+    int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
+    for (int i = 0; i < end_idx; i++) {
+        auto &post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<isa>(
+                    this,
+                    post_op.eltwise.alg,
+                    post_op.eltwise.alpha,
+                    post_op.eltwise.beta
+            ));
+        } else if (post_op.is_depthwise()) {
+            depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<isa>(
+                    this,
+                    post_op.depthwise.alg
+            ));
+        }
+    }
+
+    this->preamble();
+
+    mov(reg_input_base, ptr[this->param1 + GET_OFF(src)]);
+    mov(reg_output_base, ptr[this->param1 + GET_OFF(dst)]);
+    mov(reg_kernel_base, ptr[this->param1 + GET_OFF(filt)]);
+
+    mov(reg_kh, ptr[this->param1 + GET_OFF(kh_padding)]);
+    mov(reg_oc_work, ptr[this->param1 + GET_OFF(oc_work)]);
+
+    mov(reg_oc_off,  ptr[param1 + GET_OFF(oc_off)]);
+    mov(reg_table, l_table);
+
+    Label main_loop_label;
+    Label tail_label;
+    Label exit_label;
+
+    cmp(reg_oc_work, jcp.nb_oc_blocking * jcp.oc_block);
+    jne(main_loop_label, T_NEAR);
+
+    solve_common(jcp.nb_oc_blocking, jcp.oc_block);
+
+    sub(reg_oc_work, jcp.nb_oc_blocking * jcp.oc_block);
+
+    jmp(exit_label, T_NEAR);
+
+    int nbits = 8;
+
+    L(main_loop_label); {
+        cmp(reg_oc_work, jcp.oc_block);
+        jl(tail_label, T_NEAR);
+
+        solve_common(1, jcp.oc_block);
+
+        sub(reg_oc_work, jcp.oc_block);
+        add(reg_kernel_base, jcp.oc_block * jcp.nb_ic * jcp.kh * jcp.kw * div_up(jcp.ic_block, nbits) * jcp.typesize_in);
+
+        if (jcp.with_dw_conv) {
+            add(reg_output_base, jcp.oc_block * jcp_dw_conv.kh * jcp.ow * jcp.typesize_out);
+        } else {
+            if (jcp.with_binarization)
+                add(reg_output_base, jcp.typesize_out);
+            else
+                add(reg_output_base, jcp.oc_block * jcp.typesize_out);
+        }
+
+        add(reg_oc_off, jcp.oc_block * sizeof(float));
+
+        jmp(main_loop_label, T_NEAR);
+    }
+
+    L(tail_label);
+
+    if (jcp.oc % jcp.oc_block != 0)
+        solve_common(1, jcp.oc % jcp.oc_block);
+
+    L(exit_label);
+
+    this->postamble();
+
+    prepare_table();
+
+    for (auto& inj : eltwise_injectors)
+        inj->prepare_table();
+}
+
+template <cpu_isa_t isa>
+void jit_uni_bin_conv_fwd_kernel<isa>::prepare_table() {
+    const unsigned int cvals[] = {
+            0x02010100, // 0 1 1 2
+            0x03020201, // 1 2 2 3
+            0x03020201, // 1 2 2 3
+            0x04030302,  // 2 3 3 4
+            0x02010100, // 0 1 1 2
+            0x03020201, // 1 2 2 3
+            0x03020201, // 1 2 2 3
+            0x04030302,  // 2 3 3 4
+            0x0f0f0f0f,
+            0x000000ff,
+            0xc0000000, // -2.0f
+            0x01010101,
+            0x00010001
+    };
+
+    align(64);
+    L(l_table);
+    // offset = 0
+    for (size_t d = 0; d < 8; ++d) {
+        dd(cvals[d % 8]);
+    }
+    // offset = 32
+    for (size_t d = 0; d < 8; ++d) {
+        dd(cvals[8]);
+    }
+    // offset = 64
+    for (size_t d = 0; d < 8; ++d) {
+        dd(cvals[9]);
+    }
+    // offset = 96
+    for (size_t d = 0; d < 8; ++d) {
+        dd(cvals[10]);
+    }
+
+    // offset = 128
+    for (size_t d = 0; d < 8; ++d) {
+        dd(float2int(jcp.ic * jcp.kw * jcp.kh));
+    }
+
+    // offset = 160
+    for (size_t d = 0; d < 8; ++d) {
+        dd(cvals[11]);
+    }
+    // offset = 192
+    for (size_t d = 0; d < 8; ++d) {
+        dd(cvals[12]);
+    }
+    // offset = 224
+    for (size_t d = 0; d < 8; ++d) {
+        uint32_t mask = 0xffffffff >> (jcp.ic_padded - jcp.ic);
+        dd(mask);
+    }
+    // offset = 256
+    for (size_t d = 0; d < 8; ++d) {
+        uint32_t val = jcp.pad_value == 1.0f ? 0xffffffff : 0x00000000;
+        dd(val);
+    }
+}
+
+template <cpu_isa_t isa>
+bool jit_uni_bin_conv_fwd_kernel<isa>::post_ops_ok(jit_bin_conv_conf_t &jcp, const primitive_attr_t &attr) {
+    const auto &p = attr.post_ops_;
+
+    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
+    auto is_depthwise = [&](int idx) { return p.entry_[idx].is_depthwise(); };
+    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
+    auto is_dw_conv = [&](int idx) { return p.entry_[idx].is_dw_conv(); };
+    auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
+    auto is_binarization = [&](int idx) { return p.entry_[idx].is_binarization(); };
+
+    switch (p.len_) {
+    case 0: return true; // no post_ops
+    case 1:
+        return (is_simple(0) || is_sum(0) || is_dw_conv(0) || is_binarization(0));
+    case 2:
+        return ((is_sum(0) && is_simple(1)) || (is_dw_conv(0) && is_simple(1)) ||
+                (is_simple(0) && is_dw_conv(1)) || (is_dw_conv(0) && is_sum(1)) ||
+                (is_simple(0) && is_simple(1)) || (is_simple(0) && is_binarization(1)) ||
+                (is_dw_conv(0) && is_binarization(1)) || (is_simple(0) && is_sum(1)));
+    case 3:
+        return ((is_simple(0) && is_dw_conv(1) && is_simple(2)) ||
+                (is_dw_conv(0) && is_sum(1) && is_simple(2)) ||
+                (is_sum(0) && is_simple(1) && is_simple(2)) ||
+                (is_simple(0) && is_sum(1) && is_simple(2)) ||
+                (is_simple(0) && is_dw_conv(1) && is_binarization(2)) ||
+                (is_simple(0) && is_simple(1) && is_dw_conv(2)));
+    case 4: return ((is_simple(0) && is_dw_conv(1) && is_sum(2) && is_simple(3)) ||
+                    (is_simple(0) && is_dw_conv(1) && is_simple(2) && is_binarization(3)) ||
+                    (is_simple(0) && is_simple(1) && is_dw_conv(2) && is_binarization(3)) ||
+                    (is_simple(0) && is_simple(1) && is_simple(2) && is_binarization(3)) ||
+                    (is_simple(0) && is_simple(1) && is_dw_conv(2) && is_simple(3)));
+    default: return false;
+    }
+
+    return false;
+}
+
+template <cpu_isa_t isa>
+status_t jit_uni_bin_conv_fwd_kernel<isa>::init_conf(jit_bin_conv_conf_t &jcp,
+        const binary_convolution_desc_t &cd, const memory_desc_wrapper &src_d,
+        const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d, const primitive_attr_t &attr)
+{
+    if (!mayiuse(isa)) return status::unimplemented;
+
+    jcp.prop_kind = cd.prop_kind;
+
+    jcp.dst_dt = cd.dst_desc.data_type;
+
+    const bool with_groups = weights_d.ndims() == src_d.ndims() + 1;
+
+    jcp.ngroups = with_groups ? weights_d.dims()[0] : 1;
+
+    if (jcp.ngroups != 1)
+        return status::unimplemented;
+
+    jcp.mb = src_d.dims()[0];
+
+    int simd_w = isa == avx512_common ? 16 : 8;
+
+    jcp.ic = src_d.dims()[1] / jcp.ngroups;
+    jcp.oc = dst_d.dims()[1] / jcp.ngroups;
+
+    jcp.oc_padded = rnd_up(jcp.oc, simd_w);
+
+    jcp.ih = src_d.dims()[2];
+    jcp.iw = src_d.dims()[3];
+    jcp.oh = dst_d.dims()[2];
+    jcp.ow = dst_d.dims()[3];
+
+    jcp.kh = weights_d.dims()[with_groups + 2];
+    jcp.kw = weights_d.dims()[with_groups + 3];
+
+    jcp.t_pad = cd.padding[0][0];
+    jcp.l_pad = cd.padding[0][1];
+
+    jcp.stride_h = cd.strides[0];
+    jcp.stride_w = cd.strides[1];
+
+    jcp.dilate_h = cd.dilates[0];
+    jcp.dilate_w = cd.dilates[1];
+
+    jcp.src_fmt = src_d.format();
+
+    if (!post_ops_ok(jcp, attr))
+        return status::unimplemented;
+
+    jcp.pad_value = cd.pad_value;
+    jcp.exclude_pad = jcp.pad_value == 0.0f;
+
+    const auto &p = attr.post_ops_;
+    int dw_conv_ind = p.find(primitive_kind::convolution);
+    jcp.with_dw_conv = dw_conv_ind != -1;
+    if (jcp.with_dw_conv) {
+        jcp.dw_conv_oh = jcp.oh;
+        jcp.dw_conv_ow = jcp.ow;
+        jcp.oh = p.entry_[dw_conv_ind].dw_conv.in_h;
+        jcp.ow = p.entry_[dw_conv_ind].dw_conv.in_w;
+    }
+    jcp.with_sum = p.find(primitive_kind::sum, 0, dw_conv_ind) != -1;
+    jcp.with_binarization = p.find(primitive_kind::binarization, 0, dw_conv_ind) != -1;
+
+    if (with_groups)
+        return status::unimplemented;
+
+    auto desired_weights_format = isa == avx512_common ? OhIw16o32i : OhIw8o32i;
+    bool args_ok = true
+        && src_d.format() == nhwc
+        && weights_d.format() == desired_weights_format
+        && dst_d.format() == nhwc;
+    if (!args_ok) return status::unimplemented;
+
+    jcp.ur_h = 1; /* no code-unrolling by h so far */
+    jcp.ur_w = 2;
+    if (jcp.ow < jcp.ur_w) jcp.ur_w = jcp.ow;
+    jcp.ur_w_tail = jcp.ow % jcp.ur_w;
+
+    jcp.nb_oc_blocking = isa == sse42 ? 2 : 4; /* the optimal value for the kernel */
+
+    args_ok = true
+        && jcp.l_pad <= jcp.ur_w
+        && IMPLICATION(jcp.kw > 7, (jcp.t_pad == 0 && jcp.l_pad == 0)
+                || (jcp.stride_w == 1 && jcp.stride_h == 1));
+    if (!args_ok) return status::unimplemented;
+
+    int r_pad_no_tail = nstl::max(0, (jcp.ow - jcp.ur_w_tail - 1) * jcp.stride_w
+        + (jcp.kw - 1) * (jcp.dilate_w + 1) - (jcp.iw + jcp.l_pad - 1));
+
+    if (r_pad_no_tail > jcp.ur_w) {
+        /* recalculate ur_w, nb_oc_blocking and ur_w_tail */
+        jcp.ur_w = r_pad_no_tail + 1;
+        jcp.nb_oc_blocking = ((16 - 1)-jcp.ur_w)/jcp.ur_w;
+        jcp.ur_w_tail = jcp.ow % jcp.ur_w;
+        /* check again ... */
+        r_pad_no_tail = nstl::max(0, (jcp.ow - jcp.ur_w_tail - 1) * jcp.stride_w
+            + (jcp.kw - 1) * (jcp.dilate_w + 1) - (jcp.iw + jcp.l_pad - 1));
+        if ((r_pad_no_tail > jcp.ur_w) || (jcp.ow < jcp.ur_w))
+            return status::unimplemented;
+    }
+    if (jcp.l_pad > jcp.ur_w) return status::unimplemented;
+
+    jcp.ic_block = 32;
+    jcp.nb_ic = div_up(jcp.ic, jcp.ic_block);
+    jcp.ic_padded = rnd_up(jcp.ic, jcp.ic_block);
+
+    jcp.oc_block = simd_w;
+    jcp.nb_oc = div_up(jcp.oc, jcp.oc_block);
+
+    jcp.nb_ic_blocking = 1;
+
+    jcp.src_dt = cd.src_desc.data_type;
+    jcp.bia_dt = mkldnn_f32;
+    jcp.dst_dt = jcp.with_binarization ? mkldnn_bin : mkldnn_f32;
+
+    jcp.typesize_in = types::data_type_size(jcp.src_dt);
+    jcp.typesize_out = types::data_type_size(jcp.dst_dt);
+    jcp.typesize_acc = sizeof(int32_t);
+
+    return status::success;
+}
+
+template <cpu_isa_t isa>
+void jit_uni_bin_conv_fwd_kernel<isa>::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_bin_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw_conv) {
+    if (jcp.with_dw_conv) {
+        const int nthreads = mkldnn_get_max_threads();
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw_conv.kh * jcp_dw_conv.iw * jcp_dw_conv.ch_block * jcp.nb_oc_blocking;
+        scratchpad.book(key_dw_conv_buffer, sizeof(float) * dw_conv_buffer_size_ * nthreads);
+
+        if (jcp.oc != jcp.oc_padded)
+            scratchpad.book(key_dw_conv_padded_bias, sizeof(float) * jcp.oc_padded);
+    }
+}
+
+template struct jit_uni_bin_conv_fwd_kernel<sse42>;
+template struct jit_uni_bin_conv_fwd_kernel<avx2>;
+template struct jit_uni_bin_conv_fwd_kernel<avx512_common>;
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_bin_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_bin_conv_kernel.hpp
new file mode 100644
index 000000000..83f6f6a6a
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_bin_conv_kernel.hpp
@@ -0,0 +1,140 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef JIT_UNI_BIN_CONV_KERNEL_HPP
+#define JIT_UNI_BIN_CONV_KERNEL_HPP
+
+#include "c_types_map.hpp"
+#include "jit_generator.hpp"
+#include "jit_primitive_conf.hpp"
+#include "cpu_memory.hpp"
+#include "jit_uni_eltwise.hpp"
+#include "jit_uni_depthwise.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <cpu_isa_t isa>
+struct jit_uni_bin_conv_fwd_kernel: public jit_generator {
+    jit_uni_bin_conv_fwd_kernel(jit_bin_conv_conf_t ajcp, jit_conv_conf_t ajcp_dw_conv,
+            const primitive_attr_t &attr): jcp(ajcp), jcp_dw_conv(ajcp_dw_conv), attr_(attr)
+    {
+        this->generate();
+        jit_ker = (void (*)(jit_conv_call_s *))this->getCode();
+    }
+
+    ~jit_uni_bin_conv_fwd_kernel() {
+        for (auto inj : eltwise_injectors)
+           delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
+    }
+
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_bin_conv_fwd_kernel)
+
+    static bool post_ops_ok(jit_bin_conv_conf_t &jcp, const primitive_attr_t &attr);
+    static status_t init_conf(jit_bin_conv_conf_t &jcp,
+            const binary_convolution_desc_t &cd, const memory_desc_wrapper &src_d,
+            const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d, const primitive_attr_t &attr);
+    static void init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_bin_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw_conv);
+
+    jit_bin_conv_conf_t jcp;
+    jit_conv_conf_t jcp_dw_conv;
+    const primitive_attr_t &attr_;
+    void (*jit_ker)(jit_conv_call_s *);
+
+private:
+    using Vmm = typename utils::conditional3<isa == sse42, Xbyak::Xmm,
+            isa == avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
+    using Ymm = const Xbyak::Ymm;
+    using reg64_t = const Xbyak::Reg64;
+    using reg32_t = const Xbyak::Reg32;
+    using reg8_t = const Xbyak::Reg8;
+
+    reg64_t reg_input = r13;
+    reg64_t reg_output = rbp;
+    reg64_t reg_input_base = rax;
+    reg64_t aux_reg_input = r8;
+    reg64_t reg_kernel_base = rdx;
+    reg64_t aux_reg_kernel = r9;
+    reg64_t reg_output_base = rsi;
+    reg64_t aux1_reg_input = reg_input_base;
+    reg64_t aux1_reg_kernel = reg_output_base;
+
+    reg64_t kj = r10;
+    reg64_t oi_iter = r11;
+    reg64_t reg_kh = abi_not_param1;
+    reg64_t reg_overflow = reg_kh;
+    reg64_t reg_oc_work = r14;
+    reg64_t reg_table = r15;
+    reg64_t reg_icb_iter = reg_oc_work;
+
+    reg32_t reg_tmp_32 = r12d;
+    reg64_t reg_tmp_64 = r12;
+    reg8_t reg_tmp_8 = r12b;
+
+    reg64_t reg_d_weights = aux_reg_input;
+    reg64_t reg_d_bias = aux_reg_kernel;
+    reg64_t reg_oc_off = kj;
+    reg64_t reg_tmp2_64 = reg_oc_off;
+    reg32_t reg_tmp2_32 = reg_oc_off.cvt32();
+
+    reg64_t reg_b_weights = aux_reg_input;
+    reg64_t reg_b_mask = aux_reg_kernel;
+
+    reg64_t reg_shift = aux_reg_input;
+
+    Vmm vmm_scale = Vmm(14);
+    Vmm vmm_shift = Vmm(15);
+    Vmm vmm_sum = Vmm(10);
+    Vmm vmm_lookup = Vmm(12);
+    Vmm vmm_mask = Vmm(13);
+    Vmm vmm_one_u8 = Vmm(14);
+    Vmm vmm_one_s16 = Vmm(15);
+    Ymm ymm_tmp = Ymm(10);
+    Vmm vmm_tmp = Vmm(10);
+    Vmm vmm_tmp1 = Vmm(11);
+    Vmm vmm_src = Vmm(0);
+    Vmm vmm_tmp2 = Vmm(9);
+    Vmm vmm_thr = Vmm(10);
+
+    Xbyak::Label l_table;
+
+    nstl::vector<jit_uni_eltwise_injector_f32<isa>*> eltwise_injectors;
+    nstl::vector<jit_uni_depthwise_injector_f32<isa>*> depthwise_injectors;
+
+    inline void cvt2ps(data_type_t type_in, Vmm vmm_in, const Xbyak::Operand &op, bool scalar_load);
+    inline void store_dst(const Xbyak::Address &op, Vmm vmm_dst, bool scalar_store);
+    inline void apply_filter(int ur_w, int pad_l, int pad_r, int oc_blocks, int oc_step, int ic_blocks, bool last_icb, bool h_padded);
+    inline void oh_step_unroll_kw(int ur_w, int pad_l, int pad_r, int oc_blocks, int oc_step, bool h_padded);
+    inline void kh_loop(int ur_w, int pad_l, int pad_r, int oc_blocks, int oc_step);
+    inline void width_blk_step(int ur_w, int pad_l, int pad_r, int oc_blocks, int oc_step);
+    inline void solve_common(int oc_blocks, int oc_step);
+    inline void prepare_table();
+
+    void generate();
+};
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binarization.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binarization.cpp
new file mode 100644
index 000000000..be3b284fe
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binarization.cpp
@@ -0,0 +1,276 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_types.h"
+#include "mkldnn_thread.hpp"
+#include "nstl.hpp"
+#include "utils.hpp"
+#include "jit_uni_binarization.hpp"
+
+#define GET_OFF(field) offsetof(jit_args, field)
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace Xbyak;
+using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::utils;
+
+struct jit_args {
+    const float* from;
+    const uint8_t* to;
+    const float* weights;
+    size_t work_amount;
+};
+
+struct jit_uni_binarization_kernel_f32 : public c_compatible {
+    const binarization_desc_t &desc_;
+    void (*ker_)(const jit_args *);
+
+    void operator()(const jit_args *args) { assert(ker_); ker_(args); }
+
+    jit_uni_binarization_kernel_f32(const binarization_desc_t &desc)
+        : desc_(desc), ker_(nullptr) {}
+    virtual ~jit_uni_binarization_kernel_f32() {}
+};
+
+/* jit kernels */
+namespace {
+
+template <cpu_isa_t isa>
+struct jit_uni_bin_depthwise_kernel_f32 : public jit_uni_binarization_kernel_f32,
+    public jit_generator
+{
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_bin_depthwise_kernel_f32)
+    jit_uni_bin_depthwise_kernel_f32(const binarization_desc_t &desc)
+        : jit_uni_binarization_kernel_f32(desc), jit_generator() {
+        assert(desc.alg_kind == alg_kind::binarization_depthwise);
+        assert(isa == sse42 || isa == avx2 || isa == avx512_common);
+
+        this->preamble();
+
+        mov(reg_from, ptr[param + GET_OFF(from)]);
+        mov(reg_to, ptr[param + GET_OFF(to)]);
+        mov(reg_weights, ptr[param + GET_OFF(weights)]);
+        mov(reg_work_amount, ptr[param + GET_OFF(work_amount)]);
+
+        const int nbits = 8;
+	int simd_w = isa == avx512_common ? 16 : 8;
+        const int C = desc.src_desc.dims[1];
+        const int tail_size = C % simd_w;
+
+        Label unrolled_loop_label;
+        Label main_loop_label;
+        Label tail_label;
+        Label exit_label;
+
+        L(unrolled_loop_label); {
+            int step = isa == sse42 ? nbits / 2 : isa == avx2 ? nbits : 2 * nbits;
+            const int ur_ch = isa == sse42 ? nbits : isa == avx2 ? nbits / 2 : nbits / 4;
+            const int unrolled_loop_step = ur_ch * step;
+
+            cmp(reg_work_amount, unrolled_loop_step);
+            jl(main_loop_label, T_NEAR);
+
+            xor_(reg_bin_32, reg_bin_32);
+            for (int ch = 0; ch < ur_ch; ch++) {
+                uni_vmovups(vmm_src(0), ptr[reg_from + ch*step*sizeof(float)]);
+                uni_vmovups(vmm_wei(0), ptr[reg_weights + ch*step*sizeof(float)]);
+		if (isa == avx512_common) {
+		    vcmpps(k_mask, vmm_src(0), vmm_wei(0), _cmp_gt_os);
+		    kmovw(reg_src_32, k_mask);
+		} else {
+                    uni_vcmpgtps(vmm_src(0), vmm_src(0), vmm_wei(0));
+	            uni_vmovmskps(reg_src_32, vmm_src(0));
+		}
+                shl(reg_src_32, ch * step);
+                or_(reg_bin_32, reg_src_32);
+            }
+            mov(ptr[reg_to], reg_bin_32);
+
+            add(reg_from, unrolled_loop_step*sizeof(float));
+            add(reg_weights, unrolled_loop_step*sizeof(float));
+            add(reg_to, sizeof(uint32_t));
+            sub(reg_work_amount, unrolled_loop_step);
+
+            jmp(unrolled_loop_label, T_NEAR);
+        }
+
+        L(main_loop_label); {
+            int repeats = isa == sse42 ? 2 : 1;
+            int step = isa == sse42 ? nbits / 2 : isa == avx2 ? nbits : nbits * 2;
+            const int main_loop_step = step * repeats;
+
+            cmp(reg_work_amount, main_loop_step);
+            jl(tail_label, T_NEAR);
+
+            xor_(reg_bin_32, reg_bin_32);
+            for (int i = 0; i < repeats; i++) {
+                uni_vmovups(vmm_src(0), ptr[reg_from + i*step*sizeof(float)]);
+                uni_vmovups(vmm_wei(0), ptr[reg_weights + i*step*sizeof(float)]);
+                if (isa == avx512_common) {
+                    vcmpps(k_mask, vmm_src(0), vmm_wei(0), _cmp_gt_os);
+		    kmovw(reg_src_32, k_mask);
+                } else {
+                    uni_vcmpgtps(vmm_src(0), vmm_src(0), vmm_wei(0));
+	            uni_vmovmskps(reg_src_32, vmm_src(0));
+                }
+                shl(reg_src_32, i * step);
+                or_(reg_bin_32, reg_src_32);
+            }
+	    if (isa == avx512_common)
+                mov(ptr[reg_to], reg_bin_16);
+	    else 	
+                mov(ptr[reg_to], reg_bin_8);
+
+            add(reg_from, main_loop_step*sizeof(float));
+            add(reg_weights, main_loop_step*sizeof(float));
+            add(reg_to, isa == avx512_common ? sizeof(uint16_t) : sizeof(uint8_t));
+            sub(reg_work_amount, main_loop_step);
+
+            jmp(main_loop_label, T_NEAR);
+        }
+
+        L(tail_label); {
+            if (tail_size != 0) {
+                xor_(reg_bin_32, reg_bin_32);
+                for (int c = 0; c < tail_size; c++) {
+                    uni_vpxor(xmm_src(0), xmm_src(0), xmm_src(0));
+                    uni_vpxor(xmm_wei(0), xmm_wei(0), xmm_wei(0));
+
+                    movss(xmm_src(0), ptr[reg_from + c * sizeof(float)]);
+                    movss(xmm_wei(0), ptr[reg_weights + c * sizeof(float)]);
+                    uni_vcmpgtps(xmm_src(0), xmm_src(0), xmm_wei(0));
+                    uni_vmovmskps(reg_src_32, xmm_src(0));
+
+                    shl(reg_src_32, c);
+                    or_(reg_bin_32, reg_src_32);
+                }
+		if (isa == avx512_common && tail_size > nbits)
+                    mov(ptr[reg_to], reg_bin_16);
+		else
+		    mov(ptr[reg_to], reg_bin_8);
+            }
+        }
+
+        L(exit_label);
+
+        this->postamble();
+
+        ker_ = (decltype(ker_))this->getCode();
+    }
+
+private:
+    using Vmm = typename utils::conditional3<isa == sse42, Xmm,
+                                             isa == avx2, Ymm, Zmm>::type;
+
+    inline Vmm vmm_src(int idx) { return Vmm(idx); }
+    inline Xmm xmm_src(int idx) { return Xmm(idx); }
+    inline Vmm vmm_wei(int idx) { return Vmm(idx + 4); }
+    inline Xmm xmm_wei(int idx) { return Xmm(idx + 4); }
+
+    Reg64 param = abi_param1;
+    Reg64 reg_from = r8;
+    Reg64 reg_to = r9;
+    Reg64 reg_work_amount = r10;
+    Reg64 reg_weights = r11;
+    Reg16 reg_bin_16 = r12w;
+    Reg32 reg_bin_32 = r12d;
+    Reg8 reg_bin_8 = r12b;
+    Reg32 reg_src_32 = r13d;
+    Reg64 reg_src_64 = r13;
+
+    const unsigned char _cmp_gt_os = 6;
+    Xbyak::Opmask k_mask = Xbyak::Opmask(1);
+};
+
+} /* namespace */
+
+template <cpu_isa_t isa>
+status_t jit_uni_binarization_fwd_t<isa>::pd_t::init() {
+    using namespace alg_kind;
+
+    auto desired_fmt = nhwc;
+
+    assert(engine()->kind() == engine_kind::cpu);
+    bool ok = true && mayiuse(isa)
+        && utils::one_of(desc()->prop_kind, prop_kind::forward_training, prop_kind::forward_inference)
+        && utils::everyone_is(data_type::f32, desc()->src_desc.data_type, desc()->weights_desc.data_type)
+        && utils::everyone_is(data_type::bin, desc()->dst_desc.data_type)
+        && desc()->src_desc.format == desc()->dst_desc.format
+        && utils::one_of(desc()->src_desc.format, desired_fmt)
+        && utils::one_of(desc()->dst_desc.format, desired_fmt)
+        && utils::one_of(desc()->weights_desc.format, x)
+        && attr()->has_default_values();
+
+    return ok ? status::success : status::unimplemented;
+}
+
+template <cpu_isa_t isa>
+jit_uni_binarization_fwd_t<isa>::jit_uni_binarization_fwd_t(const pd_t *apd,
+        const input_vector &inputs, const output_vector &outputs)
+    : cpu_primitive_t(apd, inputs, outputs), kernel_(nullptr) {
+    const auto &desc = *pd()->desc();
+    switch (desc.alg_kind) {
+        case alg_kind::binarization_depthwise:
+            kernel_ = new jit_uni_bin_depthwise_kernel_f32<isa>(desc); break;
+        default: assert(!"unknown binarization alg_kind");
+    }
+}
+
+template <cpu_isa_t isa>
+jit_uni_binarization_fwd_t<isa>::~jit_uni_binarization_fwd_t() {
+    delete kernel_;
+}
+
+template <cpu_isa_t isa>
+void jit_uni_binarization_fwd_t<isa>::execute_forward() const {
+    auto src = reinterpret_cast<const src_data_t*>(this->input_memory(0));
+    auto weights = reinterpret_cast<const src_data_t*>(this->input_memory(1));
+    auto dst = reinterpret_cast<uint8_t*>(this->memory());
+
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+    const int N = src_d.dims()[0];
+    const int C = src_d.dims()[1];
+    const int H = src_d.dims()[2];
+    const int W = src_d.dims()[3];
+
+    int nbits = 8;
+
+    parallel_nd(N, H, W,
+        [&](int n, int h, int w) {
+	auto arg = jit_args();
+
+        arg.from    = &src[src_d.blk_off(n, 0, h, w)];
+        arg.to      = &dst[dst_d.blk_off(n, 0, h, w) / nbits];
+        arg.weights = &weights[weights_d.blk_off(0)];
+        arg.work_amount = (size_t)C;
+
+        (*kernel_)(&arg);
+    });
+}
+
+template struct jit_uni_binarization_fwd_t<sse42>;
+template struct jit_uni_binarization_fwd_t<avx2>;
+template struct jit_uni_binarization_fwd_t<avx512_common>;
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binarization.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binarization.hpp
new file mode 100644
index 000000000..1c29a3e77
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binarization.hpp
@@ -0,0 +1,73 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef CPU_JIT_UNI_BINARIZATION_HPP
+#define CPU_JIT_UNI_BINARIZATION_HPP
+
+#include <assert.h>
+
+#include "c_types_map.hpp"
+#include "cpu_binarization_pd.hpp"
+#include "cpu_engine.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
+#include "jit_primitive_conf.hpp"
+#include "jit_generator.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+struct jit_uni_binarization_kernel_f32;
+
+template <cpu_isa_t isa>
+struct jit_uni_binarization_fwd_t : public cpu_primitive_t {
+    struct pd_t : public cpu_binarization_fwd_pd_t {
+        pd_t(engine_t *engine, const binarization_desc_t *adesc,
+                const primitive_attr_t *attr,
+                const binarization_fwd_pd_t *hint_fwd_pd)
+            : cpu_binarization_fwd_pd_t(engine, adesc, attr, hint_fwd_pd) {}
+
+        DECLARE_COMMON_PD_T(
+                JIT_IMPL_NAME_HELPER("jit:", isa, ""),
+                jit_uni_binarization_fwd_t<isa>);
+
+        virtual status_t init() override;
+    };
+
+    jit_uni_binarization_fwd_t(const pd_t *apd, const input_vector &inputs,
+                       const output_vector &outputs);
+    ~jit_uni_binarization_fwd_t();
+
+    typedef typename prec_traits<data_type::f32>::type src_data_t;
+
+    virtual void execute(event_t *e) const
+    {
+        execute_forward();
+        e->set_state(event_t::ready);
+    }
+
+private:
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+    jit_uni_binarization_kernel_f32 *kernel_;
+};
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binary_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binary_convolution.cpp
new file mode 100644
index 000000000..fa9f0d927
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binary_convolution.cpp
@@ -0,0 +1,251 @@
+    /*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <cstring>
+#include "mkldnn_types.h"
+
+#include "c_types_map.hpp"
+#include "jit_uni_binary_convolution.hpp"
+#include "utils.hpp"
+#include "mkldnn_thread.hpp"
+#include "type_helpers.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::status;
+using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
+using namespace mkldnn::impl::utils;
+
+template <cpu_isa_t isa>
+void jit_uni_binary_convolution_fwd_t<isa>::execute_forward() const {
+    auto src = reinterpret_cast<const uint8_t*>(this->input_memory(0));
+    auto weights = reinterpret_cast<const uint8_t*>(this->input_memory(1));
+    auto dst_u8 = reinterpret_cast<uint8_t*>(this->memory());
+    auto dst_f32 = reinterpret_cast<float*>(this->memory());
+
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+    const auto &jcp = kernel_->jcp;
+    const int MB = pd()->MB();
+
+    int ocb_work = div_up(jcp.nb_oc, jcp.nb_oc_blocking);
+    const size_t work_amount = MB * jcp.ngroups * ocb_work * jcp.oh;
+
+    int nbits = 8;
+
+    auto ker = [&](const int ithr, const int nthr) {
+        size_t start{0}, end{0};
+        balance211(work_amount, nthr, ithr, start, end);
+
+        size_t n{0}, g{0}, ocbb{0}, oh{0};
+        nd_iterator_init(start, n, MB, g, jcp.ngroups, ocbb, ocb_work, oh, jcp.oh);
+        for (size_t iwork = start; iwork < end; ++iwork) {
+            int ocb = ocbb * jcp.nb_oc_blocking;
+            int ocb_num = jcp.nb_oc_blocking;
+
+            auto par_conv = jit_conv_call_s();
+
+            const int ij = oh * jcp.stride_h;
+            const int i_t_overflow = nstl::min(jcp.kh, div_up(nstl::max(0, jcp.t_pad - ij), (jcp.dilate_h+1)));
+            const int i_b_overflow = nstl::min(jcp.kh, div_up(nstl::max(jcp.ih, ij + (jcp.kh-1) * (jcp.dilate_h+1) -
+                                                                                jcp.t_pad+1) - jcp.ih, (jcp.dilate_h + 1)));
+
+            const size_t _oc = g * jcp.nb_oc + ocb;
+            const size_t _ic = g * jcp.nb_ic;
+
+            const int ih = nstl::max(ij - jcp.t_pad + i_t_overflow * (jcp.dilate_h + 1), 0);
+            par_conv.src = &src[src_d.blk_off(n, _ic*jcp.ic_block, ih, 0) / nbits];
+
+            if (jcp.with_binarization) {
+                par_conv.dst = &dst_u8[dst_d.blk_off(n, _oc*jcp.oc_block, oh, 0) / nbits];
+            } else {
+                par_conv.dst = &dst_f32[dst_d.blk_off(n, _oc*jcp.oc_block, oh, 0)];
+            }
+
+            const int wh = jcp.exclude_pad ? i_t_overflow : 0;
+            int widx = weights_d.blk_off(ocb, 0, wh, 0);
+            par_conv.filt = &weights[widx / nbits];
+
+            par_conv.oc_work = nstl::min((ocb + ocb_num) * jcp.oc_block, jcp.oc) - ocb*jcp.oc_block;
+
+            par_conv.kw_padding = 0;
+            const int kh_padding = jcp.kh - i_t_overflow - i_b_overflow;
+            par_conv.kh_padding = nstl::max(0, kh_padding);
+            par_conv.t_overflow = i_t_overflow;
+            par_conv.b_overflow = i_b_overflow;
+
+            par_conv.oc_off = _oc * jcp.oc_block * sizeof(float);
+
+            kernel_->jit_ker(&par_conv);
+
+            nd_iterator_step(n, MB, g, jcp.ngroups, ocbb, ocb_work, oh, jcp.oh);
+        }
+    };
+
+    parallel(0, ker);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_binary_convolution_fwd_t<isa>::execute_forward_with_dw_conv() const {
+    auto src = reinterpret_cast<const uint8_t*>(this->input_memory(0));
+    auto weights = reinterpret_cast<const uint8_t*>(this->input_memory(1));
+    auto dst_u8 = reinterpret_cast<uint8_t*>(this->memory());
+    auto dst_f32 = reinterpret_cast<float*>(this->memory());
+
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+    const auto &jcp = kernel_->jcp;
+    const auto &jcp_dw_conv = dw_conv_kernel_->jcp;
+    const int MB = pd()->MB();
+
+    auto dw_conv_bias = jcp_dw_conv.conv_biases;
+    auto dw_conv_weights = reinterpret_cast<const float*>(jcp_dw_conv.conv_weights);
+
+    int ocb_work = div_up(jcp.nb_oc, jcp.nb_oc_blocking);
+    const size_t work_amount = MB * jcp.ngroups * ocb_work * jcp.oh;
+
+    int nbits = 8;
+
+    auto ker = [&](const int ithr, const int nthr) {
+        auto compute_row_generic_conv = [&](float* ws_p, int n, int g, int ocb, int ocb_num, int oh, int num_rows) {
+            for (int h = 0; h < num_rows; h++) {
+                if ((oh + h) < 0 || (oh + h) >= jcp.oh) {
+                    for (int chb = ocb; chb < ocb + ocb_num; chb++) {
+                        memset(ws_p + (((oh + h) + 1) % jcp_dw_conv.kh) * jcp.ow * jcp.oc_block +
+                               (chb - ocb) * jcp_dw_conv.kh * jcp.ow * jcp.oc_block, 0, jcp.ow * jcp.oc_block * sizeof(float));
+                    }
+                } else {
+		    auto par_conv = jit_conv_call_s();
+
+                    const int ij = (oh + h) * jcp.stride_h;
+                    const int i_t_overflow = nstl::min(jcp.kh, div_up(nstl::max(0, jcp.t_pad - ij), (jcp.dilate_h+1)));
+                    const int i_b_overflow = nstl::min(jcp.kh, div_up(nstl::max(jcp.ih, ij + (jcp.kh-1) * (jcp.dilate_h+1) -
+                                                                                        jcp.t_pad+1) - jcp.ih, (jcp.dilate_h + 1)));
+
+                    const size_t _oc = g * jcp.nb_oc + ocb;
+                    const size_t _ic = g * jcp.nb_ic;
+
+                    const int ih = nstl::max(ij - jcp.t_pad + i_t_overflow * (jcp.dilate_h + 1), 0);
+                    par_conv.src = &src[src_d.blk_off(n, _ic*jcp.ic_block, ih, 0) / nbits];
+
+                    par_conv.dst = &ws_p[(((oh + h) + 1) % jcp_dw_conv.kh) * jcp.ow * jcp.oc_block];
+
+                    const int wh = jcp.exclude_pad ? i_t_overflow : 0;
+                    int widx = weights_d.blk_off(ocb, 0, wh, 0);
+                    par_conv.filt = &weights[widx / nbits];
+
+                    par_conv.oc_work = nstl::min((ocb + ocb_num) * jcp.oc_block, jcp.oc) - ocb*jcp.oc_block;
+
+                    par_conv.kw_padding = 0;
+                    const int kh_padding = jcp.kh - i_t_overflow - i_b_overflow;
+                    par_conv.kh_padding = nstl::max(0, kh_padding);
+                    par_conv.t_overflow = i_t_overflow;
+                    par_conv.b_overflow = i_b_overflow;
+
+                    par_conv.oc_off = _oc * jcp.oc_block * sizeof(float);
+
+                    kernel_->jit_ker(&par_conv);
+                }
+            }
+        };
+
+        auto compute_row_dw_conv = [&](const float* ws_p, int n, int ocb, int ocb_num, int dst_idx) {
+            for (int chb = ocb; chb < nstl::min(ocb + ocb_num, jcp.nb_oc); chb++) {
+                auto par_conv_dw = jit_conv_call_s();
+
+                par_conv_dw.src_row0 = &ws_p[(((dst_idx+1) - 1) % jcp_dw_conv.kh) * jcp_dw_conv.iw * jcp_dw_conv.ch_block +
+                                             (chb - ocb) * jcp_dw_conv.kh * jcp_dw_conv.iw * jcp_dw_conv.ch_block];
+                par_conv_dw.src_row1 = &ws_p[(((dst_idx+1) - 0) % jcp_dw_conv.kh) * jcp_dw_conv.iw * jcp_dw_conv.ch_block +
+                                             (chb - ocb) * jcp_dw_conv.kh * jcp_dw_conv.iw * jcp_dw_conv.ch_block];
+                par_conv_dw.src_row2 = &ws_p[(((dst_idx+1) + 1) % jcp_dw_conv.kh) * jcp_dw_conv.iw * jcp_dw_conv.ch_block +
+                                             (chb - ocb) * jcp_dw_conv.kh * jcp_dw_conv.iw * jcp_dw_conv.ch_block];
+
+                if (jcp_dw_conv.with_binarization) {
+                    int nbits = 8;
+
+                    int didx = n*jcp_dw_conv.oc*jcp_dw_conv.oh*jcp_dw_conv.ow +
+                               dst_idx/jcp_dw_conv.stride_h*jcp_dw_conv.ow*jcp_dw_conv.oc + chb*jcp_dw_conv.ch_block;
+                    par_conv_dw.dst = &dst_u8[didx / nbits];
+                } else {
+                    par_conv_dw.dst = &dst_f32[n*jcp_dw_conv.oc*jcp_dw_conv.oh*jcp_dw_conv.ow +
+                                               dst_idx/jcp_dw_conv.stride_h*jcp_dw_conv.ow*jcp_dw_conv.oc + chb*jcp_dw_conv.ch_block];
+                }
+
+                par_conv_dw.kh_padding = jcp_dw_conv.kh;
+                par_conv_dw.filt = &dw_conv_weights[chb * jcp_dw_conv.kh * jcp_dw_conv.kw * jcp_dw_conv.ch_block];
+                par_conv_dw.bias = &dw_conv_bias[chb * jcp_dw_conv.ch_block];
+                par_conv_dw.ur_w = (size_t)(jcp_dw_conv.ow);
+                par_conv_dw.oc_work = nstl::min((chb + 1) * jcp_dw_conv.ch_block, jcp_dw_conv.oc) - chb*jcp_dw_conv.ch_block;
+                par_conv_dw.oc_off = chb * jcp_dw_conv.ch_block * sizeof(float);
+
+                dw_conv_kernel_->jit_ker(&par_conv_dw);
+            }
+        };
+
+        size_t start{0}, end{0};
+        balance211(work_amount, nthr, ithr, start, end);
+        auto dw_conv_buffer_ = scratchpad().template get<float>(key_dw_conv_buffer);
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw_conv.kh * jcp_dw_conv.iw * jcp_dw_conv.ch_block * jcp.nb_oc_blocking;
+        auto pbuf = dw_conv_buffer_ + ithr * dw_conv_buffer_size_;
+
+        size_t n{0}, g{0}, ocbb{0}, oh{0};
+        nd_iterator_init(start, n, MB, g, jcp.ngroups, ocbb, ocb_work, oh, jcp.oh);
+        for (size_t iwork = start; iwork < end; ++iwork) {
+            int ocb = ocbb * jcp.nb_oc_blocking;
+            int ocb_num = jcp.nb_oc_blocking;
+
+            if (iwork == start || oh == 0) {
+                compute_row_generic_conv(pbuf, n, g, ocb, ocb_num, oh - 1, 2);
+            } else {
+                compute_row_generic_conv(pbuf, n, g, ocb, ocb_num, oh, 1);
+            }
+
+            if (iwork > start && ((oh - 1) % jcp_dw_conv.stride_h == 0) && oh > 0) {
+                compute_row_dw_conv(pbuf, n, ocb, ocb_num, oh - 1);
+            }
+
+            if ((iwork == end - 1 || (int) oh == jcp.oh - 1) && ((oh) % jcp_dw_conv.stride_h == 0)) {
+                compute_row_generic_conv(pbuf, n, g, ocb, ocb_num, oh + 1, 1);
+                compute_row_dw_conv(pbuf, n, ocb, ocb_num, oh);
+            }
+
+            nd_iterator_step(n, MB, g, jcp.ngroups, ocbb, ocb_work, oh, jcp.oh);
+        }
+    };
+
+    if (jcp.oc != jcp.oc_padded) {
+        auto dw_conv_padded_bias = scratchpad().template get<float>(key_dw_conv_padded_bias);
+        utils::array_copy(dw_conv_padded_bias, dw_conv_bias, jcp.oc);
+        utils::array_set(dw_conv_padded_bias + jcp.oc, 0.f, jcp.oc_padded - jcp.oc);
+        dw_conv_bias = dw_conv_padded_bias;
+    }
+
+    parallel(0, ker);
+}
+
+template struct jit_uni_binary_convolution_fwd_t<avx512_common>;
+template struct jit_uni_binary_convolution_fwd_t<avx2>;
+template struct jit_uni_binary_convolution_fwd_t<sse42>;
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binary_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binary_convolution.hpp
new file mode 100644
index 000000000..c5a188e74
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_binary_convolution.hpp
@@ -0,0 +1,138 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef CPU_JIT_UNI_BINARY_CONVOLUTION_HPP
+#define CPU_JIT_UNI_BINARY_CONVOLUTION_HPP
+
+#include "c_types_map.hpp"
+#include "cpu_binary_convolution_pd.hpp"
+#include "cpu_engine.hpp"
+#include "cpu_reducer.hpp"
+#include "jit_primitive_conf.hpp"
+#include "jit_uni_bin_conv_kernel.hpp"
+#include "mkldnn_thread.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <cpu_isa_t isa>
+struct jit_uni_binary_convolution_fwd_t: public cpu_primitive_t {
+    struct pd_t: public _cpu_binary_convolution_fwd_pd_t {
+        pd_t(engine_t *engine,
+                const binary_convolution_desc_t *adesc,
+                const primitive_attr_t *attr,
+                const typename pd_t::base_class *hint_fwd_pd)
+            : _cpu_binary_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+            , jcp_(), jcp_dw_conv() {}
+
+        DECLARE_COMMON_PD_T(
+                JIT_IMPL_NAME_HELPER("jit:", isa, ""),
+                jit_uni_binary_convolution_fwd_t<isa>);
+
+        virtual status_t init() override {
+            using namespace prop_kind;
+            assert(this->engine()->kind() == engine_kind::cpu);
+            bool ok = true
+                && this->set_default_params() == status::success
+                && utils::one_of(this->cdesc_().prop_kind, forward_training, forward_inference)
+                && this->cdesc_().alg_kind == alg_kind::binary_convolution_direct
+                && utils::everyone_is(data_type::bin,
+                        this->cdesc_().src_desc.data_type,
+                        this->cdesc_().weights_desc.data_type)
+                && utils::one_of(this->cdesc_().dst_desc.data_type,
+                        memory::data_type::f32,
+                        memory::data_type::bin);
+            if (!ok) return status::unimplemented;
+
+            status_t sts = jit_uni_bin_conv_fwd_kernel<isa>::init_conf(jcp_, *this->desc(),
+                    *this->src_pd_.desc(), *this->weights_pd_.desc(),
+                    *this->dst_pd_.desc(), *this->attr());
+            if (sts != status::success) return sts;
+
+            if (jcp_.with_dw_conv) {
+                status_t sts_dw = jit_uni_dw_conv_row_f32<isa>::init_conf(jcp_, jcp_dw_conv, *this->attr());
+                if (sts_dw != status::success) return sts_dw;
+            }
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_uni_bin_conv_fwd_kernel<isa>::init_scratchpad(scratchpad, jcp_, jcp_dw_conv);
+
+            return status::success;
+        }
+
+        jit_bin_conv_conf_t jcp_;
+        jit_conv_conf_t jcp_dw_conv;
+
+    protected:
+        virtual status_t set_default_params() override {
+            using namespace memory_format;
+
+            auto desired_weights_format = isa == avx512_common ? OhIw16o32i : OhIw8o32i;
+
+            if (this->src_pd_.desc()->format == any)
+                CHECK(this->src_pd_.set_format(nhwc));
+            if (this->dst_pd_.desc()->format == any)
+                CHECK(this->dst_pd_.set_format(nhwc));
+            if (this->weights_pd_.desc()->format == any)
+                CHECK(this->weights_pd_.set_format(desired_weights_format));
+            return status::success;
+        }
+    };
+
+    jit_uni_binary_convolution_fwd_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {
+        kernel_ = new jit_uni_bin_conv_fwd_kernel<isa>(pd()->jcp_, pd()->jcp_dw_conv, *pd()->attr());
+
+        if (pd()->jcp_.with_dw_conv) {
+            dw_conv_kernel_ = new jit_uni_dw_conv_row_f32<isa>(pd()->jcp_dw_conv, *pd()->attr(), pd()->jcp_dw_conv.oc);
+        }
+    }
+
+    ~jit_uni_binary_convolution_fwd_t() {
+        delete kernel_;
+
+        if (pd()->jcp_.with_dw_conv) {
+            delete dw_conv_kernel_;
+        }
+    };
+
+    virtual void execute(event_t *e) const {
+        if (pd()->jcp_.with_dw_conv)
+            execute_forward_with_dw_conv();
+        else
+            execute_forward();
+
+        e->set_state(event_t::ready);
+    }
+
+private:
+    void execute_forward() const;
+    void execute_forward_with_dw_conv() const;
+
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
+    jit_uni_bin_conv_fwd_kernel<isa> *kernel_;
+    /* fuse with dw conv */
+    jit_uni_dw_conv_row_f32<isa> *dw_conv_kernel_;
+};
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_depthwise.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_depthwise.cpp
index 634e9f9cf..9aad4f1c0 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_depthwise.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_depthwise.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018 Intel Corporation
+* Copyright 2018-2019 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -56,7 +56,7 @@ struct jit_uni_depthwise_kernel_f32 : public c_compatible {
 template <cpu_isa_t isa>
 int jit_uni_depthwise_injector_f32<isa>::aux_vecs_count(alg_kind_t depthwise_alg) {
     switch (depthwise_alg) {
-        case alg_kind::depthwise_scale_shift: return 0;
+        case alg_kind::depthwise_scale_shift: return isa == sse42 ? 1 : 0;
         case alg_kind::depthwise_prelu: return 2;
         default: assert(!"unsupported depthwise algorithm");
     }
@@ -132,8 +132,15 @@ void jit_uni_depthwise_injector_f32<isa>::assign_regs() {
 template <cpu_isa_t isa>
 void jit_uni_depthwise_injector_f32<isa>::scale_shift_compute_vector(const Vmm &vmm_src,
         const Xbyak::Reg64& p_weights, const Xbyak::Reg64& p_bias) {
-    h->uni_vmulps(vmm_src, vmm_src, h->ptr[p_weights]);
-    h->uni_vaddps(vmm_src, vmm_src, h->ptr[p_bias]);
+    if (isa == sse42) {
+        h->movups(vmm_mask, h->ptr[p_weights]);
+        h->mulps(vmm_src, vmm_mask);
+        h->movups(vmm_mask, h->ptr[p_bias]);
+        h->addps(vmm_src, vmm_mask);
+    } else {
+        h->uni_vmulps(vmm_src, vmm_src, h->ptr[p_weights]);
+        h->uni_vaddps(vmm_src, vmm_src, h->ptr[p_bias]);
+    };
 }
 
 template <cpu_isa_t isa>
@@ -145,8 +152,8 @@ void jit_uni_depthwise_injector_f32<isa>::prelu_compute_vector(const Vmm &vmm_sr
     if (isa == sse42) {
         h->pxor(vmm_mask, vmm_mask);
         h->cmpps(vmm_mask, vmm_src, _cmp_gt_os);
-        h->movups(vmm_aux0, vmm_src);
-        h->mulps(vmm_aux0, h->ptr[p_weights]);
+        h->movups(vmm_aux0, h->ptr[p_weights]);
+        h->mulps(vmm_aux0, vmm_src);
         h->blendvps(vmm_src, vmm_aux0);
     } else if (isa == avx2) {
         h->vxorps(vmm_mask, vmm_mask, vmm_mask);
@@ -202,7 +209,7 @@ struct jit_uni_scale_shift_kernel_f32 : public jit_uni_depthwise_kernel_f32,
         assert(desc.alg_kind == alg_kind::depthwise_scale_shift);
         assert(isa == sse42 || isa == avx2 || isa == avx512_common);
 
-        bool isFlat = desc.src_desc.format == nchw && desc.dst_desc.format == nchw ;
+        bool isFlat = desc.src_desc.format == nchw && desc.dst_desc.format == nchw;
 
         Reg64 param = abi_param1;
 
@@ -465,30 +472,30 @@ status_t jit_uni_depthwise_fwd_t<isa>::pd_t::init() {
 }
 
 template <cpu_isa_t isa>
-jit_uni_depthwise_fwd_t<isa>::jit_uni_depthwise_fwd_t(const pd_t *pd,
+jit_uni_depthwise_fwd_t<isa>::jit_uni_depthwise_fwd_t(const pd_t *apd,
         const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), kernel_(nullptr),
+    : cpu_primitive_t(apd, inputs, outputs), kernel_(nullptr),
       padded_weights_(nullptr), padded_bias_(nullptr) {
-    const auto &desc = *conf_.desc();
+    const auto &desc = *pd()->desc();
     switch (desc.alg_kind) {
         case alg_kind::depthwise_scale_shift:
-            kernel_ = new jit_uni_scale_shift_kernel_f32<isa>(desc, pd->with_bias()); break;
+            kernel_ = new jit_uni_scale_shift_kernel_f32<isa>(desc, pd()->with_bias()); break;
         case alg_kind::depthwise_prelu:
-            kernel_ = new jit_uni_prelu_kernel_f32<isa>(desc, pd->with_bias()); break;
+            kernel_ = new jit_uni_prelu_kernel_f32<isa>(desc, pd()->with_bias()); break;
         default: assert(!"unknown depthwise alg_kind");
     }
 
     const int simd_w = isa == avx512_common ? 16 : 8;
-    const memory_desc_wrapper data_d(conf_.src_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
     const int c_without_padding = data_d.dims()[1];
     const int c_padded = rnd_up(c_without_padding, simd_w);
 
-    if (conf_.want_padded_weights()) {
+    if (pd()->want_padded_weights()) {
         padded_weights_ = (data_t *)malloc(sizeof(data_t) * c_padded, 64);
         for (int oc = c_without_padding; oc < c_padded; ++oc)
             padded_weights_[oc] = 0;
 
-        if (conf_.with_bias()) {
+        if (pd()->with_bias()) {
             padded_bias_ = (data_t *)malloc(sizeof(data_t) * c_padded, 64);
             for (int oc = c_without_padding; oc < c_padded; ++oc)
                 padded_bias_[oc] = 0;
@@ -504,15 +511,15 @@ jit_uni_depthwise_fwd_t<isa>::~jit_uni_depthwise_fwd_t() {
 }
 
 template <cpu_isa_t isa>
-void jit_uni_depthwise_fwd_t<isa>::execute_forward() {
+void jit_uni_depthwise_fwd_t<isa>::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper data_d(pd()->src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
     const int N = data_d.dims()[0];
     const int C = data_d.dims()[1];
@@ -523,12 +530,12 @@ void jit_uni_depthwise_fwd_t<isa>::execute_forward() {
     const int ch_block_size = data_d.format() == nchw ? 1 : simd_w;
     const int CB = div_up(C, ch_block_size);
 
-    if (conf_.want_padded_weights()) {
+    if (pd()->want_padded_weights()) {
         for (int oc = 0; oc < C; ++oc)
             padded_weights_[oc] = weights[oc];
         weights = padded_weights_;
 
-        if (conf_.with_bias()) {
+        if (pd()->with_bias()) {
             for (int oc = 0; oc < C; ++oc)
                 padded_bias_[oc] = bias[oc];
             bias = padded_bias_;
@@ -537,7 +544,7 @@ void jit_uni_depthwise_fwd_t<isa>::execute_forward() {
 
     parallel_nd(N, CB, H,
         [&](int n, int cb, int h) {
-        jit_args arg = {};
+        auto arg = jit_args();
 
         arg.from    = &src[data_d.blk_off(n, cb, h)];
         arg.to      = &dst[data_d.blk_off(n, cb, h)];
@@ -564,21 +571,38 @@ void jit_uni_dw_conv_row_f32<isa>::load_src(int ur_w) {
         for (int ow = 0; ow < ur_w; ow++) {
             Vmm vmm_acc = get_acc_reg(i*ur_w + ow);
 
-            if (this->jcp.with_bias)
-                uni_vmovups(vmm_acc, vmmword[reg_bias + i*4*sizeof(float)]);
-            else
-                uni_vpxor(vmm_acc, vmm_acc, vmm_acc);
-
-            int o_off = ow*jcp.ch_block + i*4;
-            if (this->jcp.with_sum)
-                uni_vaddps(vmm_acc, vmm_acc,
-                           vmmword[reg_output + o_off*sizeof(float)]);
+            uni_vpxor(vmm_acc, vmm_acc, vmm_acc);
         }
     }
 }
 
 template <cpu_isa_t isa>
 void jit_uni_dw_conv_row_f32<isa>::apply_filter(int ur_w, int kw_size) {
+    auto load_src = [=](Vmm vmm_src, const Xbyak::Address &op) {
+        if (jcp.src_dt == data_type::u8) {
+            uni_vpmovzxbd(vmm_src, op);
+        } else {
+            uni_vmovups(vmm_src, op);
+        }
+    };
+
+    auto load_ker = [=](Vmm vmm_ker, const Xbyak::Address &op) {
+        if (jcp.src_dt == data_type::u8) {
+            uni_vpmovsxbd(vmm_ker, op);
+        } else {
+            uni_vmovups(vmm_ker, op);
+        }
+    };
+
+    auto compute = [=](Vmm vmm_acc, Vmm vmm_src, Vmm vmm_ker) {
+        if (jcp.src_dt == data_type::u8) {
+            uni_vpmulld(vmm_src, vmm_src, vmm_ker);
+            uni_vpaddd(vmm_acc, vmm_acc, vmm_src);
+        } else {
+            uni_vfmadd231ps(vmm_acc, vmm_src, vmm_ker);
+        }
+    };
+
     int ch_blk = jcp.ch_block;
     int stride_w = jcp.stride_w;
 
@@ -590,69 +614,63 @@ void jit_uni_dw_conv_row_f32<isa>::apply_filter(int ur_w, int kw_size) {
     jl(exit_label, T_NEAR);
     for (int i = 0; i < repeats; i++) {
         for (int kw = 0; kw < kw_size; kw++) {
-            int ker_off = kw * ch_blk + i*4;
+            int ker_off = kw * ch_blk + i*(jcp.ch_block / 2);
 
             Vmm vmm_ker = get_ker_reg(0);
-            uni_vmovups(vmm_ker, ptr[aux_reg_kernel
-                                     + ker_off * sizeof(float)]);
+            load_ker(vmm_ker, ptr[aux_reg_kernel + ker_off * jcp.typesize_in]);
 
             for (int ow = 0; ow < ur_w; ow++) {
-                int inp_off = ow * stride_w * ch_blk + kw * ch_blk + i*4;
+                int inp_off = ow * stride_w * ch_blk + kw * ch_blk + i*(jcp.ch_block / 2);
 
                 Vmm vmm_src = get_src_reg(0);
-                uni_vmovups(vmm_src, ptr[aux_reg_input0
-                                         + inp_off * sizeof(float)]);
+                load_src(vmm_src, ptr[aux_reg_input0 + inp_off * jcp.typesize_in]);
 
                 Vmm vmm_acc = get_acc_reg(i*ur_w + ow);
-                uni_vfmadd231ps(vmm_acc, vmm_src, vmm_ker);
+                compute(vmm_acc, vmm_src, vmm_ker);
             }
         }
     }
-    add(aux_reg_kernel, jcp.kw*ch_blk*sizeof(float));
+    add(aux_reg_kernel, jcp.kw*ch_blk*jcp.typesize_in);
 
     cmp(reg_kh, 2);
     jl(exit_label, T_NEAR);
     for (int i = 0; i < repeats; i++) {
         for (int kw = 0; kw < kw_size; kw++) {
-            int ker_off = kw * ch_blk + i*4;
+            int ker_off = kw * ch_blk + i*(jcp.ch_block / 2);
 
             Vmm vmm_ker = get_ker_reg(0);
-            uni_vmovups(vmm_ker, ptr[aux_reg_kernel
-                                     + ker_off * sizeof(float)]);
+            load_ker(vmm_ker, ptr[aux_reg_kernel + ker_off * jcp.typesize_in]);
 
             for (int ow = 0; ow < ur_w; ow++) {
-                int inp_off = ow * stride_w * ch_blk + kw * ch_blk + i*4;
+                int inp_off = ow * stride_w * ch_blk + kw * ch_blk + i*(jcp.ch_block / 2);
 
                 Vmm vmm_src = get_src_reg(0);
-                uni_vmovups(vmm_src, ptr[aux_reg_input1
-                                         + inp_off * sizeof(float)]);
+                load_src(vmm_src, ptr[aux_reg_input1 + inp_off * jcp.typesize_in]);
 
                 Vmm vmm_acc = get_acc_reg(i*ur_w + ow);
-                uni_vfmadd231ps(vmm_acc, vmm_src, vmm_ker);
+                compute(vmm_acc, vmm_src, vmm_ker);
             }
         }
     }
-    add(aux_reg_kernel, jcp.kw*ch_blk*sizeof(float));
+    add(aux_reg_kernel, jcp.kw*ch_blk*jcp.typesize_in);
 
     cmp(reg_kh, 3);
     jl(exit_label, T_NEAR);
     for (int i = 0; i < repeats; i++) {
         for (int kw = 0; kw < kw_size; kw++) {
-            int ker_off = kw * ch_blk + i*4;
+            int ker_off = kw * ch_blk + i*(jcp.ch_block / 2);
 
             Vmm vmm_ker = get_ker_reg(0);
-            uni_vmovups(vmm_ker, ptr[aux_reg_kernel
-                                     + ker_off * sizeof(float)]);
+            load_ker(vmm_ker, ptr[aux_reg_kernel + ker_off * jcp.typesize_in]);
 
             for (int ow = 0; ow < ur_w; ow++) {
-                int inp_off = ow * stride_w * ch_blk + kw * ch_blk + i*4;
+                int inp_off = ow * stride_w * ch_blk + kw * ch_blk + i*(jcp.ch_block / 2);
 
                 Vmm vmm_src = get_src_reg(0);
-                uni_vmovups(vmm_src, ptr[aux_reg_input2
-                                         + inp_off * sizeof(float)]);
+                load_src(vmm_src, ptr[aux_reg_input2 + inp_off * jcp.typesize_in]);
 
                 Vmm vmm_acc = get_acc_reg(i*ur_w + ow);
-                uni_vfmadd231ps(vmm_acc, vmm_src, vmm_ker);
+                compute(vmm_acc, vmm_src, vmm_ker);
             }
         }
     }
@@ -661,34 +679,276 @@ void jit_uni_dw_conv_row_f32<isa>::apply_filter(int ur_w, int kw_size) {
 }
 
 template <cpu_isa_t isa>
-void jit_uni_dw_conv_row_f32<isa>::apply_activation(int ur_w) {
-    if (this->jcp.with_eltwise) {
-        int repeats = isa == sse42 ? 2 : 1;
-        eltwise_injector->compute_vector_range(4, repeats * ur_w + 4);
+void jit_uni_dw_conv_row_f32<isa>::cvt2ps(data_type_t type_in, Vmm vmm_in, const Operand &op, bool scalar_load) {
+    Xmm xmm_in = Xmm(vmm_in.getIdx());
+
+    switch (type_in) {
+        case data_type::f32:
+        case data_type::s32:
+            if (scalar_load) {
+                mov(reg_tmp_32, op);
+                movq(xmm_in, reg_tmp_64);
+            } else {
+                uni_vmovups(vmm_in, op);
+            }
+            break;
+        case data_type::s8:
+            if (scalar_load) {
+                movsx(reg_tmp_32, op);
+                movq(xmm_in, reg_tmp_64);
+            } else {
+                uni_vpmovsxbd(vmm_in, op);
+            }
+            break;
+        case data_type::u8:
+            if (scalar_load) {
+                movzx(reg_tmp_32, op);
+                movq(xmm_in, reg_tmp_64);
+            } else {
+                uni_vpmovzxbd(vmm_in, op);
+            }
+            break;
+        default: assert(!"unsupported data type");
     }
+
+    if (type_in != data_type::f32)
+        uni_vcvtdq2ps(vmm_in, vmm_in);
 }
 
 template <cpu_isa_t isa>
-void jit_uni_dw_conv_row_f32<isa>::store_dst(int ur_w) {
+void jit_uni_dw_conv_row_f32<isa>::apply_postprocessing(int ur_w, int oc_step) {
     int repeats = isa == sse42 ? 2 : 1;
+
+    for (int r = 0; r < repeats; r++) {
+        for (int ow = 0; ow < ur_w; ow++) {
+            if (jcp.src_dt == data_type::u8) {
+                uni_vcvtdq2ps(get_acc_reg(r * ur_w + ow), get_acc_reg(r * ur_w + ow));
+            }
+
+            if (jcp.with_bias) {
+                int b_off = r * (jcp.ch_block / 2);
+                cvt2ps(jcp.bia_dt, vmm_bias, ptr[reg_bias + b_off * jcp.typesize_bia], false);
+                uni_vaddps(get_acc_reg(r * ur_w + ow), get_acc_reg(r * ur_w + ow), vmm_bias);
+            }
+        }
+    }
+
+    if (jcp.with_sum) {
+        for (int r = 0; r < repeats; r++) {
+            int tail_size = isa == sse42 ? nstl::min(jcp.ch_block / 2, oc_step - r * jcp.ch_block / 2) : oc_step;
+            bool is_scalar_store = isa == sse42 ? tail_size < jcp.ch_block / 2 : tail_size < jcp.ch_block;
+
+            for (int ow = 0; ow < ur_w; ow++) {
+                if (is_scalar_store) {
+                    for (int oc = 0; oc < tail_size; oc++) {
+                        int o_off = ow * ow_stride_ + r * (jcp.ch_block / 2) + oc;
+
+                        uni_vpxor(vmm_sum, vmm_sum, vmm_sum);
+                        cvt2ps(jcp.dst_dt, vmm_sum, ptr[reg_output + o_off * jcp.typesize_out], true);
+
+                        if (oc >= jcp.ch_block / 2) {
+                            vperm2i128(Ymm(vmm_sum.getIdx()), Ymm(vmm_sum.getIdx()), Ymm(vmm_sum.getIdx()), 0x01);
+                        }
+                        uni_vpslldq(vmm_sum, vmm_sum, jcp.typesize_out * (oc % (jcp.ch_block / 2)));
+
+                        uni_vaddps(get_acc_reg(r * ur_w + ow), get_acc_reg(r * ur_w + ow), vmm_sum);
+                    }
+                } else {
+                    int o_off = ow * ow_stride_ + r * (jcp.ch_block / 2);
+
+                    uni_vpxor(vmm_sum, vmm_sum, vmm_sum);
+                    cvt2ps(jcp.dst_dt, vmm_sum, ptr[reg_output + o_off * jcp.typesize_out], false);
+
+                    uni_vaddps(get_acc_reg(r * ur_w + ow), get_acc_reg(r * ur_w + ow), vmm_sum);
+                }
+            }
+        }
+    }
+
+    const auto &p = attr_.post_ops_;
+    int eltwise_inj_idx = 0;
+    int depthwise_inj_idx = 0;
+    int start_idx = p.find(primitive_kind::convolution) + 1;
+    for (int i = start_idx; i < p.len_; i++) {
+        auto& post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors[eltwise_inj_idx]->compute_vector_range(4, 4 + repeats * ur_w);
+            eltwise_inj_idx++;
+        } else if (post_op.is_depthwise()) {
+            mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+            mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+            add(reg_d_weights, reg_oc_off);
+            add(reg_d_bias, reg_oc_off);
+
+            depthwise_injectors[depthwise_inj_idx]->compute_vector_range(4, 4 + ur_w, reg_d_weights, reg_d_bias);
+
+            if (repeats == 2) {
+                add(reg_d_weights, (jcp.ch_block / 2) * sizeof(float));
+                add(reg_d_bias, (jcp.ch_block / 2) * sizeof(float));
+
+                depthwise_injectors[depthwise_inj_idx]->compute_vector_range(4 + ur_w, 4 + 2 * ur_w, reg_d_weights, reg_d_bias);
+            }
+
+            depthwise_inj_idx++;
+        }
+    }
+}
+
+template <cpu_isa_t isa>
+void jit_uni_dw_conv_row_f32<isa>::store_dst_typed(const Xbyak::Address &op, Vmm vmm_dst, bool scalar_store) {
+    Ymm ymm_dst = Ymm(vmm_dst.getIdx());
+    Xmm xmm_dst = Xmm(vmm_dst.getIdx());
+
+    switch (jcp.dst_dt) {
+        case data_type::f32:
+        case data_type::s32:
+            if (scalar_store) {
+                movq(reg_tmp_64, xmm_dst);
+                mov(op, reg_tmp_32);
+            } else {
+                uni_vmovups(op, vmm_dst);
+            }
+            break;
+        case data_type::s8:
+            uni_vpackssdw(vmm_dst, vmm_dst, vmm_dst);
+
+            if (isa != sse42 && !scalar_store)
+                vpermq(ymm_dst, ymm_dst, 0x08);
+
+            uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst);
+
+            if (scalar_store) {
+                movq(reg_tmp_64, xmm_dst);
+                mov(op, reg_tmp_8);
+            } else {
+                if (isa != sse42)
+                    vmovq(op, xmm_dst);
+                else
+                    movd(op, xmm_dst);
+            }
+            break;
+        case data_type::u8:
+        case data_type::bin:
+            uni_vpackusdw(vmm_dst, vmm_dst, vmm_dst);
+
+            if (isa != sse42 && !scalar_store)
+                vpermq(ymm_dst, ymm_dst, 0x08);
+
+            uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst);
+
+            if (scalar_store) {
+                movq(reg_tmp_64, xmm_dst);
+                mov(op, reg_tmp_8);
+            } else {
+                if (isa != sse42)
+                    vmovq(op, xmm_dst);
+                else
+                    movd(op, xmm_dst);
+            }
+            break;
+        default:
+            assert(!"unknown dst_dt");
+    }
+}
+
+template <cpu_isa_t isa>
+void jit_uni_dw_conv_row_f32<isa>::store_dst(int ur_w, int oc_step) {
+    int repeats = isa == sse42 && oc_step > (jcp.ch_block / 2) ? 2 : 1;
+
     for (int i = 0; i < repeats; i++) {
         for (int ow = 0; ow < ur_w; ow++) {
-            int o_off = ow*jcp.ch_block + i*4;
-            Vmm vmm_dst = get_acc_reg(i*ur_w + ow);
+            Vmm vmm_dst = get_acc_reg(i * ur_w + ow);
+            if (jcp.dst_dt != data_type::f32 && jcp.dst_dt != data_type::bin) {
+                if (attr_.round_mode_ == round_mode::nearest)
+                    uni_vcvtps2dq(vmm_dst, vmm_dst);
+                else if (attr_.round_mode_ == round_mode::down) {
+                    uni_vroundps(vmm_dst, vmm_dst, 1);
+                    uni_vcvtps2dq(vmm_dst, vmm_dst);
+                } else
+                    assert(!"unimplemented");
+            }
+        }
+    }
+
+    if (jcp.with_binarization) {
+        int output_step = div_up(ow_stride_, 8);
+
+        const auto &p = attr_.post_ops_;
+        int binarization_idx = p.find(primitive_kind::binarization);
+
+        mov(reg_b_weights, reinterpret_cast<size_t>(p.entry_[binarization_idx].binarization.weights_data));
+        add(reg_b_weights, reg_oc_off);
+
+        for (int ow = 0; ow < ur_w; ow++) {
+            for (int i = 0; i < repeats; i++) {
+                int tail_size = isa == sse42 ? nstl::min(jcp.ch_block / 2, oc_step - i * jcp.ch_block / 2) : oc_step;
+                mov(reg_b_mask, (1 << tail_size) - 1);
+                uni_vmovups(vmm_thr, ptr[reg_b_weights + i * (jcp.ch_block / 2) * sizeof(float)]);
+
+                Vmm vmm_dst = get_acc_reg(i * ur_w + ow);
+
+                uni_vcmpgtps(vmm_dst, vmm_dst, vmm_thr);
+
+                if (i == 0) {
+                    uni_vmovmskps(reg_tmp_32, vmm_dst);
+                    and_(reg_tmp_64, reg_b_mask);
+                } else {
+                    uni_vmovmskps(reg_tmp2_32, vmm_dst);
+                    and_(reg_tmp2_64, reg_b_mask);
+                    shl(reg_tmp2_32, 4);
+                    or_(reg_tmp_32, reg_tmp2_32);
+                }
+
+                if (i == repeats - 1) {
+                    const size_t o_off = ow * output_step;
+                    mov(ptr[reg_output + o_off * jcp.typesize_out], reg_tmp_8);
+                }
+            }
+        }
+    } else {
+        for (int i = 0; i < repeats; i++) {
+            int tail_size = isa == sse42 ? nstl::min(jcp.ch_block / 2, oc_step - i * jcp.ch_block / 2) : oc_step;
+            bool is_scalar_store = isa == sse42 ? tail_size < jcp.ch_block / 2 : tail_size < jcp.ch_block;
+            if (is_scalar_store) {
+                for (int ow = 0; ow < ur_w; ow++) {
+                    Vmm vmm_dst = get_acc_reg(i * ur_w + ow);
+                    Ymm ymm_dst = Ymm(vmm_dst.getIdx());
+
+                    for (int oc = 0; oc < tail_size; oc++) {
+                        int o_off = ow * ow_stride_ + i * (jcp.ch_block / 2) + oc;
+                        store_dst_typed(ptr[reg_output + o_off * jcp.typesize_out], vmm_dst, true);
+
+                        if (isa == sse42) {
+                            psrldq(vmm_dst, jcp.typesize_out);
+                        } else {
+                            vperm2i128(ymm_tmp, ymm_dst, ymm_dst, 0x01);
+                            vpalignr(ymm_dst, vmm_tmp, ymm_dst, jcp.typesize_out);
+                        }
+                    }
+                }
+            } else {
+                for (int ow = 0; ow < ur_w; ow++) {
+                    int o_off = ow * ow_stride_ + i * (jcp.ch_block / 2);
+                    Vmm vmm_dst = get_acc_reg(i * ur_w + ow);
 
-            uni_vmovups(vmmword[reg_output + o_off*sizeof(float)], vmm_dst);
+                    store_dst_typed(ptr[reg_output + o_off * jcp.typesize_out], vmm_dst, false);
+                }
+            }
         }
     }
 }
 
 template <cpu_isa_t isa>
-void jit_uni_dw_conv_row_f32<isa>::loop_body() {
+void jit_uni_dw_conv_row_f32<isa>::loop_body(int oc_step) {
     Label left_pad_label;
     Label right_pad_label;
     Label unrolled_w_label;
     Label tail_w_label;
     Label exit_label;
 
+    int output_step = jcp.with_binarization ? div_up(ow_stride_, 8) : ow_stride_;
+
     L(left_pad_label); {
         int ur_w = 1;
         int kw = jcp.iw == 1 ? jcp.kw - 2 : jcp.kw - 1;
@@ -697,18 +957,17 @@ void jit_uni_dw_conv_row_f32<isa>::loop_body() {
         mov(aux_reg_input1, reg_input1);
         mov(aux_reg_input2, reg_input2);
         mov(aux_reg_kernel, reg_kernel);
-        add(aux_reg_kernel, jcp.ch_block*sizeof(float));
+        add(aux_reg_kernel, jcp.ch_block*jcp.typesize_in);
 
         load_src(ur_w);
         apply_filter(ur_w, kw);
-        apply_activation(ur_w);
-        store_dst(ur_w);
+        apply_postprocessing(ur_w, oc_step);
+        store_dst(ur_w, oc_step);
 
-        add(reg_input0, sizeof(float) * ur_w * jcp.ch_block * (jcp.stride_w-1));
-        add(reg_input1, sizeof(float) * ur_w * jcp.ch_block * (jcp.stride_w-1));
-        add(reg_input2, sizeof(float) * ur_w * jcp.ch_block * (jcp.stride_w-1));
-
-        add(reg_output, sizeof(float) * ur_w * jcp.ch_block);
+        add(reg_input0, jcp.typesize_in * ur_w * jcp.ch_block * (jcp.stride_w-1));
+        add(reg_input1, jcp.typesize_in * ur_w * jcp.ch_block * (jcp.stride_w-1));
+        add(reg_input2, jcp.typesize_in * ur_w * jcp.ch_block * (jcp.stride_w-1));
+        add(reg_output, jcp.typesize_out * ur_w * output_step);
 
         sub(reg_ur_w, ur_w);
     }
@@ -727,13 +986,13 @@ void jit_uni_dw_conv_row_f32<isa>::loop_body() {
 
         load_src(ur_w);
         apply_filter(ur_w, kw);
-        apply_activation(ur_w);
-        store_dst(ur_w);
+        apply_postprocessing(ur_w, oc_step);
+        store_dst(ur_w, oc_step);
 
-        add(reg_input0, sizeof(float) * ur_w * jcp.ch_block * jcp.stride_w);
-        add(reg_input1, sizeof(float) * ur_w * jcp.ch_block * jcp.stride_w);
-        add(reg_input2, sizeof(float) * ur_w * jcp.ch_block * jcp.stride_w);
-        add(reg_output, sizeof(float) * ur_w * jcp.ch_block);
+        add(reg_input0, jcp.typesize_in * ur_w * jcp.ch_block * jcp.stride_w);
+        add(reg_input1, jcp.typesize_in * ur_w * jcp.ch_block * jcp.stride_w);
+        add(reg_input2, jcp.typesize_in * ur_w * jcp.ch_block * jcp.stride_w);
+        add(reg_output, jcp.typesize_out * ur_w * output_step);
 
         sub(reg_ur_w, ur_w);
         jmp(unrolled_w_label, T_NEAR);
@@ -756,13 +1015,13 @@ void jit_uni_dw_conv_row_f32<isa>::loop_body() {
 
         load_src(ur_w);
         apply_filter(ur_w, kw);
-        apply_activation(ur_w);
-        store_dst(ur_w);
+        apply_postprocessing(ur_w, oc_step);
+        store_dst(ur_w, oc_step);
 
-        add(reg_input0, sizeof(float) * ur_w * jcp.ch_block * jcp.stride_w);
-        add(reg_input1, sizeof(float) * ur_w * jcp.ch_block * jcp.stride_w);
-        add(reg_input2, sizeof(float) * ur_w * jcp.ch_block * jcp.stride_w);
-        add(reg_output, sizeof(float) * ur_w * jcp.ch_block);
+        add(reg_input0, jcp.typesize_in * ur_w * jcp.ch_block * jcp.stride_w);
+        add(reg_input1, jcp.typesize_in * ur_w * jcp.ch_block * jcp.stride_w);
+        add(reg_input2, jcp.typesize_in * ur_w * jcp.ch_block * jcp.stride_w);
+        add(reg_output, jcp.typesize_out * ur_w * output_step);
 
         sub(reg_ur_w, ur_w);
         jmp(tail_w_label, T_NEAR);
@@ -780,8 +1039,8 @@ void jit_uni_dw_conv_row_f32<isa>::loop_body() {
 
             load_src(ur_w);
             apply_filter(ur_w, kw);
-            apply_activation(ur_w);
-            store_dst(ur_w);
+            apply_postprocessing(ur_w, oc_step);
+            store_dst(ur_w, oc_step);
 
             sub(reg_ur_w, ur_w);
         }
@@ -791,8 +1050,26 @@ void jit_uni_dw_conv_row_f32<isa>::loop_body() {
 }
 
 template <cpu_isa_t isa>
-void jit_uni_dw_conv_row_f32<isa>::generate()
-{
+void jit_uni_dw_conv_row_f32<isa>::generate() {
+    const auto &p = attr_.post_ops_;
+    int start_idx = p.find(primitive_kind::convolution) + 1;
+    for (int i = start_idx; i < p.len_; i++) {
+        auto &post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<isa>(
+                    this,
+                    post_op.eltwise.alg,
+                    post_op.eltwise.alpha,
+                    post_op.eltwise.beta
+            ));
+        } else if (post_op.is_depthwise()) {
+            depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<isa>(
+                    this,
+                    post_op.depthwise.alg
+            ));
+        }
+    }
+
     this->preamble();
 
     mov(reg_input0, ptr[this->param1 + GET_OFF_DW(src_row0)]);
@@ -804,45 +1081,196 @@ void jit_uni_dw_conv_row_f32<isa>::generate()
         mov(reg_bias, ptr[this->param1 + GET_OFF_DW(bias)]);
     mov(reg_kh, ptr[this->param1 + GET_OFF_DW(kh_padding)]);
     mov(reg_ur_w, ptr[this->param1 + GET_OFF_DW(ur_w)]);
+    mov(reg_oc_work, ptr[this->param1 + GET_OFF_DW(oc_work)]);
+    mov(reg_oc_off, ptr[this->param1 + GET_OFF_DW(oc_off)]);
+
+    Label(tail_label);
+    Label(exit_label);
 
-    loop_body();
+    cmp(reg_oc_work, jcp.ch_block);
+    jl(tail_label, T_NEAR);
+
+    loop_body(jcp.ch_block);
+    jmp(exit_label, T_NEAR);
+
+    L(tail_label);
+
+    if (jcp.oc % jcp.ch_block != 0)
+        loop_body(jcp.oc % jcp.ch_block);
+
+    L(exit_label);
 
     this->postamble();
 
-    if (jcp.with_eltwise)
-        eltwise_injector->prepare_table();
+    for (auto& inj : eltwise_injectors)
+        inj->prepare_table();
+}
+
+template <cpu_isa_t isa>
+bool jit_uni_dw_conv_row_f32<isa>::post_ops_ok(jit_conv_conf_t &jcp, const primitive_attr_t &attr) {
+    const auto &p = attr.post_ops_;
+
+    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
+    auto is_depthwise = [&](int idx) { return p.entry_[idx].is_depthwise(); };
+    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
+    auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
+    auto is_binarization = [&](int idx) { return p.entry_[idx].is_binarization(); };
+
+    int start_idx = p.find(primitive_kind::convolution) + 1;
+
+    switch (p.len_ - start_idx) {
+    case 0: return true; // no post_ops
+    case 1: return is_simple(start_idx) || is_sum(start_idx) || is_binarization(start_idx);
+    case 2: return (is_sum(start_idx) && is_simple(start_idx+1)) || (is_simple(start_idx) && is_simple(start_idx+1)) ||
+                   (is_simple(start_idx) && is_binarization(start_idx+1));
+    case 3: return (is_sum(start_idx) && is_simple(start_idx+1) && is_simple(start_idx+2));
+    default: return false;
+    }
+
+    return false;
+}
+
+template <cpu_isa_t isa>
+status_t jit_uni_dw_conv_row_f32<isa>::init_conf(jit_1x1_conv_conf_t &jcp, jit_conv_conf_t &jcp_dw,
+        const primitive_attr_t &attr) {
+    if (!mayiuse(isa)) return status::unimplemented;
+    const int simd_w = isa == avx512_common ? 16 : 8;
+
+    const auto &p = attr.post_ops_;
+
+    int dw_conv_ind = p.find(primitive_kind::convolution);
+    jcp_dw.with_sum = p.find(primitive_kind::sum, dw_conv_ind) != -1;
+
+    jcp_dw.ch_block = simd_w;
+    jcp_dw.with_bias = true;
+
+    jcp_dw.kh = p.entry_[dw_conv_ind].dw_conv.ker_h;
+    jcp_dw.kw = p.entry_[dw_conv_ind].dw_conv.ker_w;
+    jcp_dw.ic = jcp.oc;
+    jcp_dw.oc = jcp.oc;
+    jcp_dw.ih = p.entry_[dw_conv_ind].dw_conv.in_h;
+    jcp_dw.iw = p.entry_[dw_conv_ind].dw_conv.in_w;
+    jcp_dw.oh = jcp.dw_conv_oh;
+    jcp_dw.ow = jcp.dw_conv_ow;
+    jcp_dw.stride_h = p.entry_[dw_conv_ind].dw_conv.str_h;
+    jcp_dw.stride_w = p.entry_[dw_conv_ind].dw_conv.str_w;
+    jcp_dw.conv_weights = p.entry_[dw_conv_ind].dw_conv.weights_data;
+    jcp_dw.conv_biases = p.entry_[dw_conv_ind].dw_conv.biases_data;
+
+    if (jcp_dw.kh != 3 || jcp_dw.kw != 3)
+        return status::unimplemented;
+
+    if (!post_ops_ok(jcp_dw, attr))
+        return status::unimplemented;
+
+    jcp_dw.ur_w = 4;
+
+    jcp_dw.src_dt = jcp.src_dt;
+    jcp_dw.dst_dt = jcp.dst_dt;
+    jcp_dw.bia_dt = jcp.bia_dt;
+    jcp_dw.typesize_in = (int)types::data_type_size(jcp.src_dt);
+    jcp_dw.typesize_bia = (int)types::data_type_size(jcp.bia_dt);
+    jcp_dw.typesize_out = (int)types::data_type_size(jcp.dst_dt);
+
+    if (jcp_dw.src_dt != mkldnn_f32 && jcp_dw.src_dt != mkldnn_u8)
+        return status::unimplemented;
+
+    return status::success;
+}
+
+template <cpu_isa_t isa>
+status_t jit_uni_dw_conv_row_f32<isa>::init_conf(jit_conv_conf_t &jcp, jit_conv_conf_t &jcp_dw,
+        const primitive_attr_t &attr) {
+    if (!mayiuse(isa)) return status::unimplemented;
+    const int simd_w = isa == avx512_common ? 16 : 8;
+
+    const auto &p = attr.post_ops_;
+
+    int dw_conv_ind = p.find(primitive_kind::convolution);
+    jcp_dw.with_sum = p.find(primitive_kind::sum, dw_conv_ind) != -1;
+
+    jcp_dw.ch_block = simd_w;
+    jcp_dw.with_bias = true;
+
+    jcp_dw.kh = p.entry_[dw_conv_ind].dw_conv.ker_h;
+    jcp_dw.kw = p.entry_[dw_conv_ind].dw_conv.ker_w;
+    jcp_dw.ic = jcp.oc;
+    jcp_dw.oc = jcp.oc;
+    jcp_dw.ih = p.entry_[dw_conv_ind].dw_conv.in_h;
+    jcp_dw.iw = p.entry_[dw_conv_ind].dw_conv.in_w;
+    jcp_dw.oh = jcp.dw_conv_oh;
+    jcp_dw.ow = jcp.dw_conv_ow;
+    jcp_dw.stride_h = p.entry_[dw_conv_ind].dw_conv.str_h;
+    jcp_dw.stride_w = p.entry_[dw_conv_ind].dw_conv.str_w;
+    jcp_dw.conv_weights = p.entry_[dw_conv_ind].dw_conv.weights_data;
+    jcp_dw.conv_biases = p.entry_[dw_conv_ind].dw_conv.biases_data;
+
+    if (jcp_dw.kh != 3 || jcp_dw.kw != 3)
+        return status::unimplemented;
+
+    if (!post_ops_ok(jcp_dw, attr))
+        return status::unimplemented;
+
+    jcp_dw.ur_w = 4;
+
+    jcp_dw.src_dt = jcp.dst_dt;
+    jcp_dw.dst_dt = jcp.dst_dt;
+    jcp_dw.bia_dt = jcp.bia_dt;
+    jcp_dw.typesize_in = (int)types::data_type_size(jcp.src_dt);
+    jcp_dw.typesize_bia = (int)types::data_type_size(jcp.bia_dt);
+    jcp_dw.typesize_out = (int)types::data_type_size(jcp.dst_dt);
+
+    if (jcp_dw.src_dt != mkldnn_f32 && jcp_dw.src_dt != mkldnn_u8)
+        return status::unimplemented;
+
+    return status::success;
 }
 
 template <cpu_isa_t isa>
-status_t jit_uni_dw_conv_row_f32<isa>::init_conf(jit_conv_conf_t &jcp,
-        int ic, int ih, int iw, int oh, int ow, int ker_h, int ker_w, int str_h, int str_w, alg_kind_t eltwise_alg,
-        float eltwise_alpha, float eltwise_beta, bool with_sum) {
+status_t jit_uni_dw_conv_row_f32<isa>::init_conf(jit_bin_conv_conf_t &jcp, jit_conv_conf_t &jcp_dw,
+        const primitive_attr_t &attr) {
     if (!mayiuse(isa)) return status::unimplemented;
     const int simd_w = isa == avx512_common ? 16 : 8;
 
-    jcp.kh = ker_h;
-    jcp.kw = ker_w;
-    jcp.ch_block = simd_w;
-    jcp.with_bias = true;
-    jcp.ic = ic;
-    jcp.oc = ic;
-    jcp.ih = ih;
-    jcp.iw = iw;
-    jcp.oh = oh;
-    jcp.ow = ow;
-    jcp.stride_h = str_h;
-    jcp.stride_w = str_w;
-
-    if (jcp.kh != 3 || jcp.kw != 3)
-        return  status::unimplemented;
-
-    jcp.ur_w = 4;
-
-    jcp.with_eltwise  = eltwise_alg != mkldnn_alg_kind_undef;
-    jcp.eltwise_alg   = eltwise_alg;
-    jcp.eltwise_alpha = eltwise_alpha;
-    jcp.eltwise_beta  = eltwise_beta;
-    jcp.with_sum = with_sum;
+    const auto &p = attr.post_ops_;
+
+    int dw_conv_ind = p.find(primitive_kind::convolution);
+    jcp_dw.with_sum = p.find(primitive_kind::sum, dw_conv_ind) != -1;
+    jcp_dw.with_binarization = p.find(primitive_kind::binarization, dw_conv_ind) != -1;
+
+    jcp_dw.ch_block = simd_w;
+    jcp_dw.with_bias = true;
+
+    jcp_dw.kh = p.entry_[dw_conv_ind].dw_conv.ker_h;
+    jcp_dw.kw = p.entry_[dw_conv_ind].dw_conv.ker_w;
+    jcp_dw.ic = jcp.oc;
+    jcp_dw.oc = jcp.oc;
+    jcp_dw.ih = p.entry_[dw_conv_ind].dw_conv.in_h;
+    jcp_dw.iw = p.entry_[dw_conv_ind].dw_conv.in_w;
+    jcp_dw.oh = jcp.dw_conv_oh;
+    jcp_dw.ow = jcp.dw_conv_ow;
+    jcp_dw.stride_h = p.entry_[dw_conv_ind].dw_conv.str_h;
+    jcp_dw.stride_w = p.entry_[dw_conv_ind].dw_conv.str_w;
+    jcp_dw.conv_weights = p.entry_[dw_conv_ind].dw_conv.weights_data;
+    jcp_dw.conv_biases = p.entry_[dw_conv_ind].dw_conv.biases_data;
+
+    if (jcp_dw.kh != 3 || jcp_dw.kw != 3)
+        return status::unimplemented;
+
+    if (!post_ops_ok(jcp_dw, attr))
+        return status::unimplemented;
+
+    jcp_dw.ur_w = 4;
+
+    jcp_dw.src_dt = mkldnn_f32;
+    jcp_dw.dst_dt = jcp_dw.with_binarization ? mkldnn_bin : mkldnn_f32;
+    jcp_dw.bia_dt = mkldnn_f32;
+    jcp_dw.typesize_in = (int)types::data_type_size(jcp_dw.src_dt);
+    jcp_dw.typesize_bia = (int)types::data_type_size(jcp_dw.bia_dt);
+    jcp_dw.typesize_out = (int)types::data_type_size(jcp_dw.dst_dt);
+
+    if (jcp_dw.src_dt != mkldnn_f32 && jcp_dw.src_dt != mkldnn_u8)
+        return status::unimplemented;
 
     return status::success;
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_depthwise.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_depthwise.hpp
index 111999260..47d93c838 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_depthwise.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_depthwise.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018 Intel Corporation
+* Copyright 2018-2019 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -93,21 +93,21 @@ struct jit_uni_depthwise_fwd_t : public cpu_primitive_t {
         virtual status_t init() override;
     };
 
-    jit_uni_depthwise_fwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_uni_depthwise_fwd_t(const pd_t *apd, const input_vector &inputs,
                        const output_vector &outputs);
     ~jit_uni_depthwise_fwd_t();
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_uni_depthwise_kernel_f32 *kernel_;
     data_t *padded_weights_;
     data_t *padded_bias_;
@@ -118,37 +118,39 @@ template <cpu_isa_t isa>
 struct jit_uni_dw_conv_row_f32: public jit_generator {
     DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_ds_dw_conv_kernel_f32)
 
-    jit_uni_dw_conv_row_f32(jit_conv_conf_t ajcp): jcp(ajcp) {
-        if (jcp.with_eltwise) {
-            eltwise_injector = new jit_uni_eltwise_injector_f32<isa>(this,
-                    jcp.eltwise_alg, jcp.eltwise_alpha, jcp.eltwise_beta);
-        }
-
+    jit_uni_dw_conv_row_f32(jit_conv_conf_t ajcp, const primitive_attr_t &attr, int ow_stride)
+        : jcp(ajcp), attr_(attr), ow_stride_(ow_stride) {
         this->generate();
         jit_ker = (void (*)(jit_conv_call_s *))this->getCode();
     }
 
     ~jit_uni_dw_conv_row_f32() {
-        if (jcp.with_eltwise) {
-            delete eltwise_injector;
-        }
+        for (auto inj : eltwise_injectors)
+            delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
     }
 
     static bool post_ops_ok(jit_conv_conf_t &jcp,
             const primitive_attr_t &attr);
-    static status_t init_conf(jit_conv_conf_t &jcp,
-            int ic, int ih, int iw, int oh, int ow,
-            int ker_h, int ker_w, int str_h, int str_w,
-            alg_kind_t eltwise_alg,
-            float eltwise_alpha, float eltwise_beta, bool with_sum);
+    static status_t init_conf(jit_1x1_conv_conf_t &jcp, jit_conv_conf_t &jcp_dw, const primitive_attr_t &attr);
+    static status_t init_conf(jit_conv_conf_t &jcp, jit_conv_conf_t &jcp_dw, const primitive_attr_t &attr);
+    static status_t init_conf(jit_bin_conv_conf_t &jcp, jit_conv_conf_t &jcp_dw, const primitive_attr_t &attr);
 
     jit_conv_conf_t jcp;
+    const primitive_attr_t &attr_;
     void (*jit_ker)(jit_conv_call_s *);
+    int ow_stride_;
 
 private:
     using Vmm = typename utils::conditional3<isa == sse42, Xbyak::Xmm,
         isa == avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
     using reg64_t = const Xbyak::Reg64;
+    using reg32_t = const Xbyak::Reg32;
+    using reg8_t = const Xbyak::Reg8;
     const Xbyak::AddressFrame &vmmword = (isa == sse42)
         ? xword : (isa == avx2) ? yword : zword;
     const int vlen = cpu_isa_traits<isa>::vlen;
@@ -161,29 +163,50 @@ private:
     reg64_t aux_reg_input1 = r12;
     reg64_t aux_reg_input2 = r13;
 
-
     reg64_t reg_kernel = r14;
     reg64_t aux_reg_kernel = r15;
     reg64_t reg_output = rdx;
     reg64_t reg_bias = rbx;
     reg64_t reg_kh = rax;
     reg64_t reg_ur_w = rbp;
+    reg64_t reg_oc_work = abi_not_param1;
+
+    reg64_t reg_oc_off = rsi;
+    reg64_t reg_d_weights = aux_reg_input0;
+    reg64_t reg_d_bias = aux_reg_input1;
 
-    reg64_t imm_addr64 = aux_reg_input0;
+    reg64_t reg_b_weights = r15;
+    reg64_t reg_b_mask = reg_d_bias;
+
+    reg32_t reg_tmp_32 = r11d;
+    reg64_t reg_tmp_64 = r11;
+    reg8_t reg_tmp_8 = r11b;
+
+    reg32_t reg_tmp2_32 = r13d;
+    reg64_t reg_tmp2_64 = r13;
 
     inline Vmm get_ker_reg(int idx) { return Vmm(idx + 0); }
     inline Vmm get_src_reg(int idx) { return Vmm(idx + 1); }
     inline Vmm get_acc_reg(int idx) { return Vmm(idx + 4); }
 
+    Xbyak::Ymm ymm_tmp = Xbyak::Ymm(0);
+    Vmm vmm_tmp = Vmm(0);
+    Vmm vmm_sum = Vmm(0);
+    Vmm vmm_bias = Vmm(0);
+    Vmm vmm_thr = Vmm(0);
+
     inline void load_src(int ur_w);
     inline void apply_filter(int ur_w, int kw_size);
-    inline void apply_activation(int ur_w);
-    inline void store_dst(int ur_w);
-    inline void loop_body();
+    inline void cvt2ps(data_type_t type_in, Vmm vmm_in, const Xbyak::Operand &op, bool scalar_load);
+    inline void apply_postprocessing(int ur_w, int oc_step);
+    inline void store_dst_typed(const Xbyak::Address &op, Vmm vmm_dst, bool scalar_store);
+    inline void store_dst(int ur_w, int oc_step);
+    inline void loop_body(int oc_step);
 
     void generate();
 
-    jit_uni_eltwise_injector_f32<isa>* eltwise_injector;
+    nstl::vector<jit_uni_eltwise_injector_f32<isa>*> eltwise_injectors;
+    nstl::vector<jit_uni_depthwise_injector_f32<isa>*> depthwise_injectors;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_f32.cpp
index 0d97cce11..db6454c34 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_f32.cpp
@@ -30,6 +30,7 @@ namespace cpu {
 
 using namespace mkldnn::impl::prop_kind;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 using namespace Xbyak;
@@ -183,13 +184,6 @@ void jit_uni_dw_conv_fwd_kernel_f32<isa>::apply_postprocess(int ur_ch_blocks, in
     int depthwise_inj_idx = 0;
     const auto &p = attr_.post_ops_;
 
-    if (p.len_ == 0 && eltwise_injectors.size() == 1) {
-        int start_idx = get_acc_reg(0).getIdx();
-        int end_idx = get_acc_reg(repeats * ur_w * ur_ch_blocks).getIdx();
-
-        eltwise_injectors[0]->compute_vector_range(start_idx, end_idx);
-    }
-
     for (int i = 0; i < p.len_; i++) {
         auto& post_op = p.entry_[i];
         if (post_op.is_eltwise()) {
@@ -293,14 +287,7 @@ void jit_uni_dw_conv_fwd_kernel_f32<isa>::loop_body(int ur_ch_blocks) {
 }
 
 template <cpu_isa_t isa>
-void jit_uni_dw_conv_fwd_kernel_f32<isa>::generate()
-{
-    if (jcp.with_eltwise) {
-        eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<isa>(
-                this, jcp.eltwise_alg, jcp.eltwise_alpha, 0
-        ));
-    }
-
+void jit_uni_dw_conv_fwd_kernel_f32<isa>::generate() {
     const auto &p = attr_.post_ops_;
     for (int i = 0; i < p.len_; i++) {
         auto &post_op = p.entry_[i];
@@ -369,14 +356,10 @@ bool jit_uni_dw_conv_fwd_kernel_f32<isa>::post_ops_ok(
     auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
-    case 0: return true; // no post_ops
-    case 1: return true  // sum OR eltwise OR deptwise
-                    && !jcp.with_eltwise && (is_simple(0) || is_sum(0));
-    case 2: return true // sum->relu OR sum->depthwise OR eltwise->depthwise OR depthwise->depthwise
-                    && !jcp.with_eltwise && ((is_sum(0) && is_simple(1)) ||
-                                             (is_simple(0) && is_simple(1)));
-    case 3: return true // sum->eltwise->depthwise OR sum->depthwise->eltwise OR sum->depthwise->depthwise
-                   && !jcp.with_eltwise && ((is_sum(0) && is_simple(1) && is_simple(2)));
+    case 0: return true;
+    case 1: return is_simple(0) || is_sum(0);
+    case 2: return (is_sum(0) && is_simple(1)) || (is_simple(0) && is_simple(1));
+    case 3: return is_sum(0) && is_simple(1) && is_simple(2);
     default: return false;
     }
 
@@ -387,7 +370,7 @@ template <cpu_isa_t isa>
 status_t jit_uni_dw_conv_fwd_kernel_f32<isa>::init_conf(jit_conv_conf_t &jcp,
         const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
         const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d,
-        const primitive_attr_t &attr, bool with_relu, float relu_negative_slope)
+        const primitive_attr_t &attr)
 {
     if (!mayiuse(isa)) return status::unimplemented;
 
@@ -426,9 +409,6 @@ status_t jit_uni_dw_conv_fwd_kernel_f32<isa>::init_conf(jit_conv_conf_t &jcp,
 
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alg = mkldnn_eltwise_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
 
     if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
@@ -473,6 +453,13 @@ status_t jit_uni_dw_conv_fwd_kernel_f32<isa>::init_conf(jit_conv_conf_t &jcp,
     return status::success;
 }
 
+template <cpu_isa_t isa>
+void jit_uni_dw_conv_fwd_kernel_f32<isa>::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp) {
+    if (jcp.with_bias && jcp.oc_without_padding != jcp.oc)
+        scratchpad.book(key_conv_padded_bias, sizeof(float) * jcp.oc);
+}
+
 template struct jit_uni_dw_conv_fwd_kernel_f32<avx512_common>;
 template struct jit_uni_dw_conv_fwd_kernel_f32<avx2>;
 template struct jit_uni_dw_conv_fwd_kernel_f32<sse42>;
@@ -754,6 +741,13 @@ status_t jit_uni_dw_conv_bwd_data_kernel_f32<isa>::init_conf(
     return status::success;
 }
 
+template <cpu_isa_t isa>
+void jit_uni_dw_conv_bwd_data_kernel_f32<isa>::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp) {
+    UNUSED(scratchpad);
+    UNUSED(jcp);
+}
+
 template struct jit_uni_dw_conv_bwd_data_kernel_f32<avx512_common>;
 template struct jit_uni_dw_conv_bwd_data_kernel_f32<avx2>;
 template struct jit_uni_dw_conv_bwd_data_kernel_f32<sse42>;
@@ -776,7 +770,7 @@ inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::load_filter() {
             int off_filter = (reg_set + i) * simd_w;
             Vmm vmm_acc = get_acc_reg(reg_set + i);
             uni_vmovups(vmm_acc,
-                    vmmword[tmp_reg_filter + off_filter * sizeof(float)]);
+                    vmmword[reg_tmp_filter + off_filter * sizeof(float)]);
         }
     }
 }
@@ -800,58 +794,59 @@ inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::load_bias() {
 
 template <cpu_isa_t isa>
 inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::compute_ow_step_unroll(
-        int l_pad, int r_pad, int pad_offset, int ow_block) {
-    const int pad = nstl::max(jcp.l_pad, jcp.r_pad);
-    const int iw_overlap = jcp.iw + jcp.kw - 1 - jcp.l_pad - jcp.r_pad;
-    const int unroll_w = nstl::min(jcp.ur_w, iw_overlap);
-    const int right_border = iw_overlap - ow_block;
+        int unroll_w, int l_pad, int pad_offset, int ow_block) {
+
+    const int iw_block = ow_block * jcp.stride_w;
+    const int right_border = jcp.iw - iw_block;
+
+    const int cascade_input = nstl::min(jcp.stride_w, jcp.kw);
 
     /* preamble count for number of cascaded LOAD + FMA operation */
-    const int input_preamble_count
-            = nstl::max(jcp.kw - jcp.stride_w - l_pad, 0);
+    const int input_overlap = nstl::max(jcp.kw - l_pad, 0);
 
     /* LOAD initial input registers, then cascade LOADs and FMAs*/
     for (int r = 0; r < reg_repeats; ++r) {
-        for (int i = 0; i < input_preamble_count; i++) {
-            int off_input = ((i - pad_offset) * reg_repeats + r) * simd_w;
-            Vmm vmm_input = get_input_reg((i + l_pad) * reg_repeats + r);
-            uni_vmovups(vmm_input,
-                    ptr[tmp_reg_idx_input + off_input * sizeof(float)]);
-        }
-
-        for (int i = 0; i < unroll_w; ++i) {
-            int off_output = (i * reg_repeats + r) * simd_w;
+        for (int i_ur = 0; i_ur < unroll_w; ++i_ur) {
+            int off_output = (i_ur * reg_repeats + r) * simd_w;
             Vmm vmm_output = get_output_reg(r);
             uni_vmovups(vmm_output,
-                    ptr[tmp_reg_idx_output + off_output * sizeof(float)]);
-
-            int input_load_overlap = i * jcp.stride_w + input_preamble_count;
-
-            /* Cascade 'input' loads for the corresponding FMAs */
-            const int cascade_input = nstl::min(jcp.stride_w, jcp.kw);
-            for (int c = 0; c < cascade_input; ++c) {
-                int off_input
-                        = ((c + input_load_overlap - pad_offset) * reg_repeats
-                                  + r)
-                        * simd_w;
-                Vmm vmm_input = get_input_reg(
-                        ((c + input_load_overlap + l_pad) % jcp.kw)
-                                * reg_repeats
-                        + r);
-                uni_vmovups(vmm_input,
-                        ptr[tmp_reg_idx_input + off_input * sizeof(float)]);
+                    ptr[reg_tmp_output + off_output * sizeof(float)]);
+            if (i_ur == 0) {
+                for (int c = 0; c < input_overlap; ++c) {
+                    int off_input
+                            = ((c - pad_offset) * reg_repeats + r) * simd_w;
+                    Vmm vmm_input
+                            = get_input_reg((c % jcp.kw) * reg_repeats + r);
+                    uni_vmovups(vmm_input,
+                            ptr[reg_tmp_input + off_input * sizeof(float)]);
+                }
+            } else {
+                for (int c = 0; c < cascade_input; ++c) {
+                    int overlap = (i_ur - 1) * jcp.stride_w + input_overlap;
+                    int off_input
+                            = ((overlap + c - pad_offset) * reg_repeats + r)
+                            * simd_w;
+                    Vmm vmm_input = get_input_reg(
+                            ((overlap + c) % jcp.kw) * reg_repeats + r);
+                    uni_vmovups(vmm_input,
+                            ptr[reg_tmp_input + off_input * sizeof(float)]);
+                }
             }
 
-            for (int j = 0; j < jcp.kw; ++j) {
+            for (int i_kw = 0; i_kw < jcp.kw; ++i_kw) {
+                int io_overlap = i_kw + (i_ur * jcp.stride_w);
 
                 /* Don't apply FMAs that fall into the padded region */
-                if (i + j < l_pad || i + j - pad >= right_border)
+                if (io_overlap - l_pad < 0
+                        || io_overlap - jcp.l_pad >= right_border)
                     continue;
+
                 Vmm vmm_input = get_input_reg(
-                        ((i * jcp.stride_w + j) % jcp.kw) * reg_repeats + r);
-                Vmm vmm_acc = get_acc_reg(j * reg_repeats + r);
+                        ((io_overlap - l_pad) % jcp.kw) * reg_repeats + r);
+                Vmm vmm_acc = get_acc_reg(i_kw * reg_repeats + r);
                 Vmm vmm_aux = isa == sse42 ? get_aux_reg() : vmm_input;
-                if( isa == sse42 ) uni_vmovups(vmm_aux, vmm_input);
+                if (isa == sse42)
+                    uni_vmovups(vmm_aux, vmm_input);
                 uni_vfmadd231ps(vmm_acc, vmm_aux, vmm_output);
             }
         }
@@ -866,8 +861,16 @@ jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::compute_bias_step_unroll(
         for (int i = 0; i < unroll_w; ++i) {
             Vmm vmm_bias = get_bias_reg(r);
             int off_output = (i * reg_repeats + r) * simd_w;
-            uni_vaddps(vmm_bias, vmm_bias,
-                    vmmword[tmp_reg_idx_output + off_output * sizeof(float)]);
+            if (isa == sse42) {
+                /* Need to support unaligned address loads for SSE42*/
+                Vmm vmm_output = get_output_reg(1 + r);
+                uni_vmovups(vmm_output,
+                        ptr[reg_tmp_output + off_output * sizeof(float)]);
+                uni_vaddps(vmm_bias, vmm_bias, vmm_output);
+            } else {
+                uni_vaddps(vmm_bias, vmm_bias,
+                        vmmword[reg_tmp_output + off_output * sizeof(float)]);
+            }
         }
     }
 }
@@ -879,7 +882,7 @@ inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::store_filter() {
         for (int i = 0; i < jcp.kw; ++i) {
             int off_filter = (i + reg_set) * simd_w;
             Vmm vmm_acc = get_acc_reg(i + reg_set);
-            uni_vmovups(vmmword[tmp_reg_filter + off_filter * sizeof(float)],
+            uni_vmovups(vmmword[reg_tmp_filter + off_filter * sizeof(float)],
                     vmm_acc);
         }
     }
@@ -895,343 +898,304 @@ inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::store_bias() {
 }
 
 template <cpu_isa_t isa>
-inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::create_h_bounds_table() {
-    /* Bounds are stored on an 8-bit sized element.
-     * XXX: potential issues if bounds exceed 255.
-     */
-    const bool handle_padding = (jcp.t_pad > 0) || (jcp.b_pad > 0);
-    if (handle_padding) {
-
-        /* Calculate how many 'h_start' bounds are needed */
-        const int h_bounds_count = get_loop_bounds_count(
-                nstl::max(jcp.t_pad, jcp.b_pad), jcp.oh, jcp.oh_blk_size);
-
-        align(64);
-        L(bound_start_table);
-        /* Generate starting bounds for 'oh' loop. This value also determines
-         * the overlap (computed as an address offset) between the output over
-         * the input for that loop iteration. */
-        for (int oh_block = 0; oh_block < h_bounds_count; ++oh_block) {
-            for (int kh = 0; kh < jcp.kh; ++kh) {
-                te_size start_bound = nstl::max(
-                        jcp.t_pad - oh_block * jcp.oh_blk_size - kh, 0);
-                write_table(start_bound);
-            }
-        }
-        /* Write offset count for 'input' address calculation. The offset for
-         * the input address is conditioned by the 'h' padding intersection over
-         * the output rows. */
-        for (int kh = 1; kh < jcp.kh; ++kh) {
-            te_size kh_accum_value = nstl::max(nstl::min(kh - jcp.t_pad, 1), 0);
-            write_table(kh_accum_value);
-        }
-        /* Last value is not used for offset calculation, write 'nop'
-         * equivalent*/
-        write_table(0);
-
-        /* Non-padded blocks always increment 'kh' dimension */
-        for (int oh_block = 0; oh_block < h_bounds_count - 1; oh_block++) {
-            for (int kh = 0; kh < jcp.kh; ++kh) {
-                te_size kh_accum_value = 1;
-                write_table(kh_accum_value);
-            }
-        }
-
-        /* number of input elements that overlap over output */
-        int ih_overlap = jcp.oh_blk_size + jcp.kh - 1 - jcp.t_pad - jcp.b_pad;
-
-        /* End Bounds for 'oh' default to 'OH' or OH_BLOCK_SIZE, unless
-         * the 'oh_block' is within the 'bottom_padding' region. */
-        int oh_end_blk = 0;
-        for (; oh_end_blk < h_bounds_count - 1; ++oh_end_blk) {
-            for (int kh = 0; kh < jcp.kh; ++kh) {
-                te_size end_bound = nstl::min((jcp.ih / jcp.stride_h)
-                                - jcp.oh_blk_size - oh_end_blk * jcp.oh_blk_size
-                                + ih_overlap + 1 - kh,
-                        jcp.oh_blk_size);
-                write_table(end_bound);
-            }
-        }
-        /* Write bounds for the special case of when 'oh_block' falls within the
-         * 'bottom_paddin' region - this always executes since at least 1 row of
-         * bounds should exist. */
-        const int pad = nstl::max(jcp.b_pad, jcp.t_pad);
-        ih_overlap
-                = (jcp.ih / jcp.stride_h + jcp.kh - 1 - jcp.t_pad - jcp.b_pad);
-        oh_end_blk = jcp.oh - jcp.oh_blk_size;
-        for (int kh = 0; kh < jcp.kh; ++kh) {
-            te_size end_bound = nstl::min(
-                    jcp.oh_blk_size, ih_overlap - oh_end_blk + pad - kh);
-            write_table(end_bound);
-        }
-    }
-}
-
-template <cpu_isa_t isa>
-inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::compute_bias_loop() {
-
+inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::compute_bias_loop(
+        const int block_size) {
     Label oh_label;
     Label ow_blk_label;
 
-    const int oh_block_size = jcp.oh_blk_size;
-    const int ow_unroll = jcp.ur_w;
-    const int ow_block_count = jcp.ow / ow_unroll;
+    const int unroll_w = nstl::min(block_size, jcp.ow);
+    const int unroll_w_trips = jcp.ow / unroll_w;
+    const int tail_w = jcp.ow > block_size ? jcp.ow % block_size : 0;
+
     const int ch_offset = jcp.ch_block;
 
-    mov(tmp_reg_idx_output, reg_output_baddr);
+    mov(reg_oh, ptr[this->param1 + offsetof(jit_dw_conv_call_s, oh_index)]);
+    mov(reg_oh_worksize,
+            ptr[this->param1 + offsetof(jit_dw_conv_call_s, oh_count)]);
 
-    xor_(iter_oh, iter_oh);
+    mov(reg_tmp_output, reg_output_baddr);
     L(oh_label);
     {
 
-        xor_(iter_ow_blk, iter_ow_blk);
+        mov(iter_ow_blk, unroll_w_trips);
         L(ow_blk_label);
         {
 
-            compute_bias_step_unroll(ow_unroll);
+            compute_bias_step_unroll(unroll_w);
+            add(reg_tmp_output, unroll_w * ch_offset * sizeof(float));
 
-            add(tmp_reg_idx_output, ow_unroll * ch_offset * sizeof(float));
+            dec(iter_ow_blk);
+            cmp(iter_ow_blk, 0);
+            jg(ow_blk_label, T_NEAR);
+        }
 
-            inc(iter_ow_blk);
-            cmp(iter_ow_blk, ow_block_count);
-            jl(ow_blk_label, T_NEAR);
+        if (tail_w > 0) {
+            compute_bias_step_unroll(tail_w);
+            add(reg_tmp_output, tail_w * ch_offset * sizeof(float));
         }
 
-        inc(iter_oh);
-        cmp(iter_oh, oh_block_size);
+        inc(reg_oh);
+        cmp(reg_oh, reg_oh_worksize);
         jl(oh_label, T_NEAR);
     }
 }
 
 template <cpu_isa_t isa>
-inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::compute_kh_loop(
-        int l_pad, int r_pad, int pad_offset, bool first_iteration,
-        int ow_block) {
+inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::compute_zero_filter() {
 
-    Label kh_label;
-    Label oh_label;
-    Label exit_innerloop_label;
-    Label skip_load_acc;
+    const int ch_offset = jcp.ch_block;
 
-    const int table_row_count = get_loop_bounds_count(
-            nstl::max(jcp.t_pad, jcp.b_pad), jcp.oh, jcp.oh_blk_size);
-    const int ih_table_off = 1 * table_row_count * jcp.kh * sizeof(te_size);
-    const int end_bound_table_off
-            = 2 * table_row_count * jcp.kh * sizeof(te_size);
+    Label kh_loop_label, skip_zeroing_label;
+
+    mov(reg_exec_flags,
+            ptr[this->param1 + offsetof(jit_dw_conv_call_s, exec_flags)]);
+    and_(reg_exec_flags, FLAG_ZERO_FILTER);
+    test(reg_exec_flags, reg_exec_flags);
+    je(skip_zeroing_label);
+
+    zero_filter();
+
+    mov(reg_tmp_filter, reg_filter_baddr);
+    mov(reg_kh, jcp.kh);
+    L(kh_loop_label);
+    {
+        store_filter();
+
+        add(reg_tmp_filter, jcp.kw * ch_offset * sizeof(float));
+        dec(reg_kh);
+        cmp(reg_kh, 0);
+        jg(kh_loop_label);
+    }
+
+    /* Comeback pointers */
+    sub(reg_tmp_filter, jcp.kh * jcp.kw * ch_offset * sizeof(float));
+
+    L(skip_zeroing_label);
+}
+
+template <cpu_isa_t isa>
+inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::compute_h_step(
+        int unroll_w, int l_pad, int pad_offset, int ow_block) {
 
     const int ch_offset = jcp.ch_block;
 
-    const bool handle_padding = (jcp.t_pad > 0) || (jcp.b_pad > 0);
+    Label kh_loop_label, skip_loop_label;
 
-    mov(tmp_reg_filter, reg_filter_baddr);
-    mov(tmp_reg_kh_input, reg_input_baddr);
-    xor_(reg_tmp_off, reg_tmp_off);
+    cmp(reg_kh_count, 0);
+    je(skip_loop_label, T_NEAR);
 
-    if (handle_padding) {
-        mov(reg_bound_table_addr, bound_start_table);
+    mov(reg_kh, reg_kh_count);
+    L(kh_loop_label);
+    {
+        load_filter();
+        compute_ow_step_unroll(unroll_w, l_pad, pad_offset, ow_block);
+        store_filter();
 
-        /* move to the row containing the indices for the current 'h' block */
-        mov(reg_tmp_off, reg_table_idx);
-        imul(reg_tmp_off, reg_tmp_off, jcp.kh * sizeof(unsigned char));
-        add(reg_bound_table_addr, reg_tmp_off);
+        add(reg_tmp_filter, jcp.kw * ch_offset * sizeof(float));
+        add(reg_tmp_input, jcp.iw * ch_offset * sizeof(float));
+        dec(reg_kh);
+        cmp(reg_kh, 0);
+        jg(kh_loop_label);
     }
 
-    xor_(iter_kh, iter_kh);
-    L(kh_label);
+    /* Comeback pointers */
+    Label kh_comeback_label;
+    mov(reg_kh, reg_kh_count);
+    L(kh_comeback_label);
     {
+        sub(reg_tmp_input, jcp.iw * ch_offset * sizeof(float));
+        sub(reg_tmp_filter, jcp.kw * ch_offset * sizeof(float));
+        dec(reg_kh);
+        cmp(reg_kh, 0);
+        jg(kh_comeback_label, T_NEAR);
+    }
 
-        mov(tmp_reg_idx_output, reg_output_baddr);
-        mov(tmp_reg_idx_input, tmp_reg_kh_input);
+    L(skip_loop_label);
+}
 
-        if (first_iteration) {
+template <cpu_isa_t isa>
+inline void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::compute_h_loop(
+        int unroll_w, int l_pad, int pad_offset, int ow_block) {
 
-            /* apply zero filter */
-            zero_filter();
+    const size_t io_overlap = jcp.ih / jcp.stride_h < jcp.oh ?
+            jcp.ih / jcp.stride_h - 1 :
+            jcp.oh - jcp.b_pad - 1;
+    const int ch_offset = jcp.ch_block;
+    const int t_overlap_off = jcp.t_pad % jcp.stride_h == 0 ? jcp.stride_h : 1;
+    const int b_overlap_off = jcp.b_pad % jcp.stride_h == 0 ? jcp.stride_h : 1;
 
-            /* if zero_filter_flag is set to '1', load filter memory into
-             * reg_accum */
-            if (jcp.with_bias) {
-                mov(reg_tmp_al, reg_exec_flag);
-                and_(reg_tmp_al, FLAG_ZERO_FILTER);
-                cmp(reg_tmp_al, 0);
-            } else {
-                /* none of the other flags are active, so we can use the
-                 * register directly */
-                cmp(reg_exec_flag, 0);
-            }
-            je(skip_load_acc);
-            load_filter();
-            L(skip_load_acc);
+    Label tpad_loop_label, h_loop_label, skip_tpad_label, skip_bpad_label,
+            end_h_loop_label;
 
-        } else {
-            load_filter();
-        }
+    mov(reg_oh, ptr[this->param1 + offsetof(jit_dw_conv_call_s, oh_index)]);
+    mov(reg_oh_worksize,
+            ptr[this->param1 + offsetof(jit_dw_conv_call_s, oh_count)]);
+    mov(reg_kh_count,
+            ptr[this->param1 + offsetof(jit_dw_conv_call_s, kh_count)]);
 
-        xor_(iter_oh, iter_oh);
+    mov(reg_tmp_output, reg_output_baddr);
+    mov(reg_tmp_input, reg_input_baddr);
+    mov(reg_tmp_filter, reg_filter_baddr);
 
-        if (handle_padding) {
+    L(h_loop_label);
+    {
 
-            /* 'oh loop' initial bounds are stored in bound_table */
-            mov(iter_oh_lb, byte[reg_bound_table_addr]);
+        compute_h_step(unroll_w, l_pad, pad_offset, ow_block);
 
-            /* skip 'oh' row that intersects with top padding */
-            xor_(reg_tmp_off, reg_tmp_off);
-            mov(reg_tmp_off, iter_oh);
-            imul(reg_tmp_off, reg_tmp_off, jcp.ow * ch_offset * sizeof(float));
-            add(tmp_reg_idx_output, reg_tmp_off);
+        add(reg_tmp_output, jcp.ow * ch_offset * sizeof(float));
 
-            /* forward the input address by 'stride_h' */
-            if (jcp.stride_h > 1) {
-                xor_(reg_tmp_off, reg_tmp_off);
-                mov(reg_tmp_off, iter_oh);
-                imul(reg_tmp_off, reg_tmp_off,
-                        (jcp.stride_h - 1) * jcp.iw * ch_offset * sizeof(float));
-                add(tmp_reg_idx_input, reg_tmp_off);
-            }
-        }
-
-        L(oh_label);
-        {
+        /* If within the top_pad region */
+        if (jcp.t_pad > 0) {
+            /* Skip t_pad area if no longer in initial h_block */
+            cmp(reg_oh, jcp.t_pad);
+            jg(skip_tpad_label, T_NEAR);
 
-            compute_ow_step_unroll(l_pad, r_pad, pad_offset, ow_block);
+            cmp(reg_kh_count, jcp.kh);
+            jge(skip_tpad_label, T_NEAR);
 
-            add(tmp_reg_idx_input,
-                    jcp.stride_h * jcp.iw * ch_offset * sizeof(float));
-            add(tmp_reg_idx_output, jcp.ow * ch_offset * sizeof(float));
+            add(reg_kh_count, t_overlap_off);
+            sub(reg_tmp_filter,
+                    t_overlap_off * jcp.kw * ch_offset * sizeof(float));
 
-            inc(iter_oh);
-            if (handle_padding) {
-                /* 'oh loop' end bounds are stored in bound_table (precomputed
-                 * during JIT generation) */
-                cmp(iter_oh_lb,
-                        byte[reg_bound_table_addr + end_bound_table_off]);
-            } else {
-                cmp(iter_oh, jcp.oh_blk_size);
+            /* kernel has moved beyond padding (adjust for stride effects) */
+            if (jcp.t_pad % jcp.stride_h != 0) {
+                int inp_corr = jcp.stride_h - jcp.t_pad % jcp.stride_h;
+                add(reg_tmp_input,
+                        inp_corr * jcp.iw * ch_offset * sizeof(float));
             }
-            jl(oh_label, T_NEAR);
+            jmp(tpad_loop_label, T_NEAR);
         }
 
-        store_filter();
+        L(skip_tpad_label);
 
-        add(tmp_reg_filter, jcp.kw * ch_offset * sizeof(float));
+        cmp(reg_oh, io_overlap);
+        jl(skip_bpad_label, T_NEAR);
+        sub(reg_kh_count, b_overlap_off);
 
-        if (handle_padding) {
-            xor_(kh_offset, kh_offset);
-            mov(kh_offset_lb, byte[reg_bound_table_addr + ih_table_off]);
-            /* increase 'ih' row in regards to 'kh'. */
-            imul(kh_offset, kh_offset, jcp.iw * ch_offset * sizeof(float));
-            add(tmp_reg_kh_input, kh_offset);
+        L(skip_bpad_label);
+        add(reg_tmp_input, jcp.stride_h * jcp.iw * ch_offset * sizeof(float));
 
-            /* increase bound_table idx for the next 'kh' value in table*/
-            add(reg_bound_table_addr, sizeof(te_size));
-        } else {
-            add(tmp_reg_kh_input, jcp.iw * ch_offset * sizeof(float));
-        }
+        L(tpad_loop_label);
+
+        cmp(reg_oh, jcp.ih / jcp.stride_h);
+        jge(end_h_loop_label, T_NEAR);
 
-        inc(iter_kh);
-        cmp(iter_kh, jcp.kh);
-        jl(kh_label, T_NEAR);
+        inc(reg_oh);
+
+        cmp(reg_oh, reg_oh_worksize);
+        jl(h_loop_label, T_NEAR);
     }
+    L(end_h_loop_label);
 }
 
 template <cpu_isa_t isa>
 inline void
 jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::compute_ow_block_unroll() {
 
-    Label skip_load_bias;
-
-    /* Only apply zero_filter (xor'ing accum_reg) on the left edge */
-    bool zero_filter_1st_iter = true;
-
     const int ch_offset = jcp.ch_block;
-
-    const int ow_block_size = jcp.ow_blk_size;
-    const int iw_block_size = jcp.ow_blk_size * jcp.stride_w;
-
-    int w_unrolled_loop_count = jcp.ow / ow_block_size;
-
-    const bool handle_padding = (jcp.l_pad > 0) || (jcp.r_pad > 0);
-
-    int pad_offset = jcp.l_pad;
-
-    int ow_block = 0;
-
+    int ow = jcp.ow;
+    int pad_offset = 0;
+    int l_pad = jcp.l_pad;
+
+    /* Calculate effective padding */
+    int r_pad = nstl::max(0, (ow - 1) * jcp.stride_w
+                    + (jcp.kw - 1) * (jcp.dilate_w + 1)
+                    - (jcp.iw + jcp.l_pad - 1));
+
+    /* Is this strictly defined by:
+     * -code-size (?)
+     * -address size (?) */
+    const int max_unroll_w = 30;
+    const int block_size = 15;
+
+    int unroll_w_tail = 0;
+    int unroll_w = 0;
+    int unroll_w_trips = 0;
+
+    if (jcp.ow > max_unroll_w) {
+        unroll_w = nstl::min(block_size, jcp.ow);
+        unroll_w_trips = ow / unroll_w;
+        /* calculate tail */
+        unroll_w_tail = ow % unroll_w;
+        /* Perform some rebalancing if tail too small*/
+        if ((unroll_w_tail == 0 && r_pad != 0)
+                || (r_pad > 0 && r_pad >= unroll_w_tail)) {
+            if (unroll_w_trips > 1) {
+                unroll_w_tail += unroll_w;
+                unroll_w_trips--;
+            } else {
+                /* Idealy, this case shouldn't happen */
+                unroll_w_tail += (unroll_w - unroll_w / 2);
+                unroll_w = unroll_w / 2;
+            }
+        }
+    } else {
+        unroll_w = jcp.ow;
+        unroll_w_trips = nstl::max(1, ow / unroll_w);
+    }
     if (jcp.with_bias) {
+        Label skip_load_bias;
+        mov(reg_bias_baddr,
+                ptr[this->param1 + offsetof(jit_dw_conv_call_s, bias)]);
 
         zero_bias();
 
-        /* if zero_bias is '1', load bias accumulator from memory. This happens
-         * after the first iteration is executed  */
-        mov(reg_tmp_al, reg_exec_flag);
-        and_(reg_tmp_al, FLAG_ZERO_BIAS);
-        cmp(reg_tmp_al, 0);
-        je(skip_load_bias);
+        mov(reg_exec_flags,
+                ptr[this->param1 + offsetof(jit_dw_conv_call_s, exec_flags)]);
+        and_(reg_exec_flags, FLAG_ZERO_BIAS);
+        test(reg_exec_flags, reg_exec_flags);
+        jne(skip_load_bias);
+
         load_bias();
-        L(skip_load_bias);
 
-        compute_bias_loop();
+        L(skip_load_bias);
+        compute_bias_loop(block_size);
 
         store_bias();
     }
 
-    /* compute left padded block */
-    if (handle_padding) {
-
-        const int r_pad = jcp.iw - ow_block_size > 0 ? 0 : jcp.r_pad;
-
-        compute_kh_loop(jcp.l_pad, r_pad, 0, zero_filter_1st_iter, ow_block);
-        zero_filter_1st_iter = false;
+    /* Pass filter address, then offset for h_padding. */
+    compute_zero_filter();
+    mov(reg_kh_offset,
+            ptr[this->param1 + offsetof(jit_dw_conv_call_s, filter_pad_off)]);
+    add(reg_filter_baddr, reg_kh_offset);
 
-        w_unrolled_loop_count--;
-
-        if (w_unrolled_loop_count >= 1) {
-            add(reg_output_baddr, ow_block_size * ch_offset * sizeof(float));
-            add(reg_input_baddr, iw_block_size * ch_offset * sizeof(float));
-        }
+    /* compute left padded block */
+    if (l_pad) {
+        compute_h_loop(unroll_w, l_pad, 0, 0);
+        add(reg_output_baddr, unroll_w * ch_offset * sizeof(float));
+        add(reg_input_baddr,
+                unroll_w * jcp.stride_w * ch_offset * sizeof(float));
+        unroll_w_trips--;
+        pad_offset = l_pad;
+        l_pad = 0;
     }
 
-    /* This block may execute under 2 different scenarios:
-     * 1) When padding is present, this executes the middle loop (if any).
-     * 2) With no padding, it writes the full loop of the micro-kernel. */
-    int middle_loop_count = handle_padding ? w_unrolled_loop_count - 1 :
-                                             w_unrolled_loop_count;
-    if (middle_loop_count >= 1) {
-        Label ow_blk_label;
-
-        /* Insert loop for 'ow' block when middle block needs to execute more
-         * than once */
-        bool do_ow_blk_loop = middle_loop_count > 1;
-        if (do_ow_blk_loop) {
-            mov(iter_ow_blk, middle_loop_count);
-            L(ow_blk_label);
-        }
-
-        compute_kh_loop(0, 0, pad_offset, zero_filter_1st_iter);
-        /* disable zero_filter for the rest of the iterations i.e. from now on
-         * load contents of 'filter' from memory */
-        mov(reg_exec_flag, FLAG_ZERO_FILTER);
-
-        if (do_ow_blk_loop || handle_padding) {
-            add(reg_output_baddr, ow_block_size * ch_offset * sizeof(float));
-            add(reg_input_baddr, iw_block_size * ch_offset * sizeof(float));
-        }
-
-        if (do_ow_blk_loop) {
-            dec(iter_ow_blk);
-            cmp(iter_ow_blk, 0);
-            jg(ow_blk_label, T_NEAR);
-        }
+    /* compute middle block */
+    Label ow_blk_label;
 
-        w_unrolled_loop_count -= middle_loop_count;
+    /* Insert loop for 'ow' block when middle block needs to execute more
+     * than once */
+    bool do_ow_blk_loop = unroll_w_trips > 1;
+    if (do_ow_blk_loop) {
+        mov(iter_ow_blk, unroll_w_trips);
+        L(ow_blk_label);
+    }
+    if (unroll_w_trips > 0) {
+        compute_h_loop(unroll_w, l_pad, pad_offset, 0);
+        add(reg_output_baddr, unroll_w * ch_offset * sizeof(float));
+        add(reg_input_baddr,
+                unroll_w * jcp.stride_w * ch_offset * sizeof(float));
+    }
+    if (do_ow_blk_loop) {
+        dec(iter_ow_blk);
+        cmp(iter_ow_blk, 0);
+        jg(ow_blk_label, T_NEAR);
     }
 
-    /* compute right padded block: ow_blk = LAST */
-    if (handle_padding && w_unrolled_loop_count >= 1) {
-        ow_block = jcp.ow - ow_block_size;
-        compute_kh_loop(
-                0, jcp.r_pad, pad_offset, zero_filter_1st_iter, ow_block);
-
-        w_unrolled_loop_count--;
+    /* compute right padded block */
+    if (unroll_w_tail) {
+        compute_h_loop(unroll_w_tail, 0, pad_offset, jcp.ow - unroll_w_tail);
     }
 }
 
@@ -1245,17 +1209,10 @@ void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::generate() {
             ptr[this->param1 + offsetof(jit_dw_conv_call_s, output)]);
     mov(reg_filter_baddr,
             ptr[this->param1 + offsetof(jit_dw_conv_call_s, filter)]);
-    if (jcp.with_bias)
-        mov(reg_bias_baddr,
-                ptr[this->param1 + offsetof(jit_dw_conv_call_s, bias)]);
-    mov(reg_table_flags,
-            ptr[this->param1 + offsetof(jit_dw_conv_call_s, table_flags)]);
 
     compute_ow_block_unroll();
 
     this->postamble();
-
-    create_h_bounds_table();
 }
 
 template <cpu_isa_t isa>
@@ -1263,8 +1220,7 @@ status_t jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::init_conf(
         jit_conv_conf_t &jcp, const convolution_desc_t &cd,
         const memory_desc_wrapper &src_d,
         const memory_desc_wrapper &diff_weights_d,
-        const memory_desc_wrapper &diff_dst_d) {
-
+        const memory_desc_wrapper &diff_dst_d, int nthreads) {
     if (!mayiuse(isa))
         return status::unimplemented;
 
@@ -1295,8 +1251,6 @@ status_t jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::init_conf(
     jcp.stride_w = cd.strides[1];
 
     jcp.t_pad = cd.padding[0][0];
-    /* bottom padding should equal top padding to generate the proper 'h' loop
-     * bounds. */
     jcp.b_pad = cd.padding[1][0];
 
     jcp.l_pad = cd.padding[0][1];
@@ -1315,53 +1269,71 @@ status_t jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::init_conf(
     auto desired_act_fmt = isa == avx512_common ? nChw16c : nChw8c;
     auto desired_wei_fmt = isa == avx512_common ? Goihw16g : Goihw8g;
 
-    bool args_ok = true
-                   && src_d.format() == desired_act_fmt
-                   && diff_weights_d.format() == desired_wei_fmt
-                   && diff_dst_d.format() == desired_act_fmt
-                   && one_of(cd.bias_desc.format, memory_format::undef, any, x)
-                   //&& jcp.ngroups % simd_w == 0
-                   && jcp.ngroups % jcp.ch_block == 0
-                   && jcp.dilate_h == 0
-                   && jcp.dilate_w == 0
-                   && jcp.kw <= 3
-                   && jcp.oh == (jcp.ihp - jcp.kh) / jcp.stride_h + 1
-                   && jcp.ow == (jcp.iwp - jcp.kw) / jcp.stride_w + 1;
-    if (!args_ok) return status::unimplemented;
-
-    /* Note: this IMPLICATION-check does not allow 'negative padding' execution
-     */
-    bool ok = true && IMPLICATION(jcp.r_pad > 0, jcp.r_pad == jcp.l_pad)
-            && IMPLICATION(jcp.b_pad > 0, jcp.b_pad == jcp.t_pad);
-    if (!ok)
+    bool args_ok = true && src_d.format() == desired_act_fmt
+            && diff_weights_d.format() == desired_wei_fmt
+            && diff_dst_d.format() == desired_act_fmt
+            && one_of(cd.bias_desc.format, memory_format::undef, any, x)
+            && jcp.ngroups % jcp.ch_block == 0 && jcp.dilate_h == 0
+            && jcp.dilate_w == 0 && jcp.kw <= 3
+            && jcp.oh == (jcp.ihp - jcp.kh) / jcp.stride_h + 1
+            && jcp.ow == (jcp.iwp - jcp.kw) / jcp.stride_w + 1;
+    if (!args_ok)
         return status::unimplemented;
 
     jcp.nb_ch = jcp.ngroups / jcp.ch_block;
 
-    /* Values for block size to try; order gives priority */
-    constexpr int BLOCK_SIZE[] = { 14, 16, 7, 8 };
-
-    int block_size_h = 1;
-    int block_size_w = 1;
+    /* kernel applicability check wrt boundaries
+     * the conditions are quite general across the kernels we have,
+     * but ideally the check should belong to a specific kernel... */
+    const int max_hpad = (jcp.kh - 1 + 1) / 2;
+    const int max_wpad = (jcp.kw - 1 + 1) / 2;
+    const bool boundaries_ok = true && jcp.t_pad <= max_hpad
+            && jcp.b_pad <= max_hpad && jcp.l_pad <= max_wpad
+            && jcp.r_pad <= max_wpad;
+    if (!boundaries_ok)
+        return status::unimplemented;
 
-    /* *Try different block sizes for convolution */
-    for (int block : BLOCK_SIZE) {
+    balance(jcp, nthreads);
 
-        block_size_h = block / jcp.stride_h;
-        block_size_w = block / jcp.stride_w;
+    return status::success;
+}
 
-        if ((jcp.oh % block_size_h == 0) && (jcp.ow % block_size_w == 0))
-            break;
+template <cpu_isa_t isa>
+void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp) {
+    /* Notes: if splitting thread work on 'mb', then a reduction has to take
+     * place. Hence, book a per-thread, local weights-buffer for the
+     * reduction */
+    if (jcp.nthr_mb > 1) {
+        const size_t wei_size = jcp.ngroups * jcp.kh * jcp.kw;
+        scratchpad.book(key_conv_wei_reduction,
+                sizeof(float) * wei_size * (jcp.nthr_mb - 1));
+
+        if (jcp.with_bias)
+            scratchpad.book(key_conv_bia_reduction,
+                    sizeof(float) * jcp.ngroups * (jcp.nthr_mb - 1));
     }
+}
 
-    if (jcp.oh % block_size_h != 0 || jcp.ow % block_size_w != 0)
-        return status::unimplemented;
-
-    jcp.oh_blk_size = block_size_h;
-
-    jcp.ur_w = jcp.ow_blk_size = block_size_w;
+template <cpu_isa_t isa>
+void jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::balance(jit_conv_conf_t &jcp,
+        int nthreads) {
+    jcp.nthr = nthreads;
+    jcp.nthr_g = jcp.nthr_mb = 1;
+
+    /* Basic-Heuristics for parallel strategy:
+     * 1) Tries to parallel on the number of Groups (g) where tasks are
+     * independent. Otherwise,
+     * 2) Tries to split the work across g and MiniBatch (mb).
+     * Parallelizing on mb requires computing a reduction for weights.
+     *
+     * NOTE: because of 'task partitioning' scheme, there will be unbalanced
+     * per-thread load when the number of threads is high (e.g. > 16).
+     */
+    jcp.nthr_g = nstl::min(jcp.nb_ch, jcp.nthr);
+    jcp.nthr_mb = nstl::min(nstl::max(1, jcp.nthr / jcp.nthr_g), jcp.mb);
 
-    return status::success;
+    jcp.nthr = jcp.nthr_g * jcp.nthr_mb;
 }
 
 template struct jit_uni_dw_conv_bwd_weights_kernel_f32<avx512_common>;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_f32.hpp
index 103687b5d..6a6aa27da 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_f32.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_conv_kernel_f32.hpp
@@ -18,6 +18,8 @@
 #define JIT_UNI_DW_CONV_KERNEL_F32_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
 #include "jit_uni_eltwise.hpp"
@@ -52,8 +54,10 @@ struct jit_uni_dw_conv_fwd_kernel_f32: public jit_generator {
     static status_t init_conf(jit_conv_conf_t &jcp,
             const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
             const memory_desc_wrapper &weights_d,
-            const memory_desc_wrapper &dst_d, const primitive_attr_t &attr,
-            bool with_relu = false, float relu_negative_slope = 0.f);
+            const memory_desc_wrapper &dst_d, const primitive_attr_t &attr);
+
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp);
 
     jit_conv_conf_t jcp;
     const primitive_attr_t &attr_;
@@ -114,10 +118,14 @@ struct jit_uni_dw_conv_bwd_data_kernel_f32: public jit_generator {
     }
 
     static status_t init_conf(jit_conv_conf_t &jcp,
-            const convolution_desc_t &cd, const memory_desc_wrapper &diff_src_d,
+            const convolution_desc_t &cd,
+            const memory_desc_wrapper &diff_src_d,
             const memory_desc_wrapper &weights_d,
             const memory_desc_wrapper &diff_dst_d);
 
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp);
+
     jit_conv_conf_t jcp;
     void (*jit_ker)(jit_conv_call_s *);
 
@@ -167,23 +175,23 @@ struct jit_uni_dw_conv_bwd_weights_kernel_f32 : public jit_generator {
     static status_t init_conf(jit_conv_conf_t &jcp,
             const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
             const memory_desc_wrapper &diff_weights_d,
-            const memory_desc_wrapper &diff_dst_d);
+            const memory_desc_wrapper &diff_dst_d, int nthreads);
+
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp);
+
+    static void balance(jit_conv_conf_t &jcp, int nthreads);
 
     jit_conv_conf_t jcp;
     void (*jit_ker)(jit_dw_conv_call_s *);
 
 private:
-    //using Vmm = Xbyak::Zmm;
     using Vmm = typename utils::conditional3<isa == sse42, Xbyak::Xmm,
             isa == avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
     using reg64_t = const Xbyak::Reg64;
-    using te_size
-            = unsigned char; /* set the 'table_entry' data size. For this
-                                implementation, only values > 255 are needed. */
     const int simd_w = cpu_isa_traits<isa>::vlen / sizeof(float);
     const int reg_repeats = (isa == sse42) ? 2 : 1;
-    inline void write_table(te_size data) { db(data); }
-    //const Xbyak::AddressFrame &vmmword = zword;
+
     const Xbyak::AddressFrame &vmmword
             = (isa == sse42) ? xword : (isa == avx2) ? yword : zword;
 
@@ -195,116 +203,51 @@ private:
     inline Vmm get_acc_reg(int idx) { return Vmm(idx + 1 * reg_repeats + 1); }
     inline Vmm get_aux_reg() { return Vmm(0); }
 
-    reg64_t tmp_reg_idx_input = r8;
-    reg64_t tmp_reg_kh_input = r9;
-    reg64_t tmp_reg_idx_output = r10;
-    reg64_t tmp_reg_filter = r11;
+    reg64_t reg_tmp_input = r9;
+    reg64_t reg_tmp_output = r10;
+    reg64_t reg_tmp_filter = r13;
+    reg64_t reg_kh_offset = rax;
 
     /* parameter passed by driver into kernel */
-    reg64_t reg_table_flags = rbx;
-    Xbyak::Reg8 reg_table_idx = bl;
-    Xbyak::Reg8 reg_exec_flag = bh;
-
-    /* holds the address for the 'bounds table' that is generated during JIT */
-    reg64_t reg_bound_table_addr = r13;
+    Xbyak::Reg8 reg_exec_flags = bl;
 
-    reg64_t reg_tmp_off = rax;
-    Xbyak::Reg8 reg_tmp_al = al;
+    reg64_t reg_oh_worksize = r14;
+    reg64_t reg_oh = rax;
 
-    reg64_t iter_oh = rdx;
-    Xbyak::Reg8 iter_oh_lb = dl;
-    reg64_t kh_offset = rdx;
-    Xbyak::Reg8 kh_offset_lb = dl;
+    reg64_t iter_ow_blk = r11;
 
-    reg64_t iter_ow_blk = rbp;
-    reg64_t iter_kh  = rsi;
+    reg64_t reg_kh = rsi;
+    reg64_t reg_kh_count = rdx;
 
     /* Base addresses for convolution parameters. */
     reg64_t reg_input_baddr = r15;
     reg64_t reg_output_baddr = r12;
     reg64_t reg_filter_baddr = abi_not_param1;
-    reg64_t reg_bias_baddr = r14;
-
-    Xbyak::Label bound_start_table;
-
-    /* Return the amount of blocks to execute depending on the convolution
-     * dimensions and block_size e.g.
-     *  {ow = 112, ow_block_size = 14} -> requires:
-     *      1 left block,
-     *      1 middle block,
-     *      1 right block;
-     * {ow = 28, ow_block_size = * 14} -> requires:
-     *      1 left block,
-     *      1 right block. */
-    inline int get_loop_bounds_count(
-            const int padding, const int h_dimension, const int block_size) {
-        const int num_top_padded_blk = utils::div_up(padding, block_size);
-        const int num_tail_blk
-                = (h_dimension - num_top_padded_blk * block_size > 0) ? 1 : 0;
-        const int num_middle_blk
-                = (h_dimension
-                    - (num_top_padded_blk + num_tail_blk) * block_size
-                          > 0) ? 1 : 0;
-        return num_top_padded_blk + num_middle_blk + num_tail_blk;
-    }
-
-    /* Create a table containing the values that define the kernel's loop
-     * behavior. The purpose of using this table is to eliminate the
-     * implementation complexities and performance impact of in-execution
-     * computation of loop bounds in regards to stride and padding.  The table
-     * consists of 3 sections:
-     * 1) Initial Bounds for 'oh' loop.
-     * 2) Input address offset flag: '1' indicates an input address increment,
-     *    '0' results in no increment.
-     * 3) End-bounds for 'oh' loop.
-     *
-     * The table is written into memory as the following format:
-     * Filter_size:    |--- kh ---|
-     * Table:           __________
-     * 1st section:    |          |
-     *                 |- - - - - |
-     * 2nd section:    |          |
-     *                 |- - - - - |
-     * 3rd section:    |__________|
-     *
-     * Example for convolution: ih=112, oh=112, kh=3, ph=1
-     *   __________
-     *  | 1,  0,  0| -> upper 'oh' loop initial bounds
-     *  | 0,  0,  0| -> middle 'oh' loop initial bounds
-     *  | 0,  0,  0| -> bottom loop initial bounds
-     *  |----------|
-     *  | 0,  1,  0| -> *There is no input offset for kh = 0, i.e. the
-     *  | 1,  1,  1|    offset_flag is '0' becase of padding.
-     *  | 1,  1,  1|
-     *  |----------|
-     *  |14, 14, 14| -> lower 'oh' loop end bounds
-     *  |14, 14, 14| -> (etc)
-     *  |14, 14, 13| -> *The last 'kh' loop has an upper bound of 13
-     *  |__________|    because of padding.
-     *    0,  1,  2  -> kh values
-     * */
-    inline void create_h_bounds_table();
+    reg64_t reg_bias_baddr = r13;
 
     /* Micro-kernel JIT'ing, fusing 'kw' and 'ow_block' loops into unrolled FMAs
      */
     inline void compute_ow_step_unroll(
-            int l_pad, int r_pad, int pad_offset, int ow_block);
+            int unroll_w, int l_pad, int pad_offset, int ow_block);
 
     /* JIT'ing the outer loops for the micro-kernel -> {kh, oh_block} */
-    inline void compute_kh_loop(int l_pad, int r_pad, int pad_offset,
-            bool first_iteration, int ow_block = 0);
+    inline void compute_h_step(
+            int unroll_w, int l_pad, int pad_offset, int ow_block);
+    inline void compute_h_loop(
+            int unroll_w, int l_pad, int pad_offset, int ow_block);
 
     /* Write 'width' micro-kernel JITs; depending on the padding and convolution
      * size, write a micro-kernel for the left ow-block, middle ow-block(s), and
      * right ow-block.*/
     inline void compute_ow_block_unroll();
 
+    inline void compute_zero_filter();
     inline void load_filter();
     inline void zero_filter();
     inline void load_bias();
     inline void zero_bias();
     inline void compute_bias_step_unroll(const int unroll_w);
-    inline void compute_bias_loop();
+    inline void compute_bias_loop(const int block_size);
     inline void store_filter();
     inline void store_bias();
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_convolution.cpp
index 48c196101..82a7a9dba 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_convolution.cpp
@@ -14,38 +14,42 @@
 * limitations under the License.
 *******************************************************************************/
 
-#include "mkldnn_types.h"
-
 #include "c_types_map.hpp"
-#include "jit_uni_dw_convolution.hpp"
+#include "memory_tracking.hpp"
 #include "mkldnn_thread.hpp"
 
+#include "jit_uni_dw_convolution.hpp"
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
-template <cpu_isa_t isa, bool with_relu>
-void _jit_uni_dw_convolution_fwd_t<isa, with_relu>::execute_forward() {
+template <cpu_isa_t isa>
+void _jit_uni_dw_convolution_fwd_t<isa>::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
     const auto &jcp = kernel_->jcp;
 
-    if (conf_.want_padded_bias()) {
-        for (int oc = 0; oc < jcp.oc_without_padding; ++oc)
-            padded_bias_[oc] = bias[oc];
-        bias = padded_bias_;
+    if (pd()->wants_padded_bias()) {
+        auto padded_bias = this->scratchpad().template get<data_t>(
+                key_conv_padded_bias);
+        utils::array_copy(padded_bias, bias, jcp.oc_without_padding);
+        utils::array_set(padded_bias + jcp.oc_without_padding, 0.f,
+                jcp.oc - jcp.oc_without_padding);
+        bias = padded_bias;
     }
 
     int dil_h = jcp.dilate_h + 1;
@@ -85,7 +89,7 @@ void _jit_uni_dw_convolution_fwd_t<isa, with_relu>::execute_forward() {
         return par_conv;
     };
 
-    int MB = conf_.MB();
+    int MB = pd()->MB();
     const int chb_work = utils::div_up(jcp.nb_ch, jcp.nb_ch_blocking);
     parallel_nd(MB, chb_work, jcp.oh,
             [&](int n, int chb, int oh) {
@@ -134,31 +138,24 @@ void _jit_uni_dw_convolution_fwd_t<isa, with_relu>::execute_forward() {
             kernel_->jit_ker(&par_conv);
         }
     });
-}
 
-template void _jit_uni_dw_convolution_fwd_t<avx512_common, false>
-    ::execute_forward();
-template void _jit_uni_dw_convolution_fwd_t<avx2, false>
-    ::execute_forward();
-template void _jit_uni_dw_convolution_fwd_t<sse42, false>
-    ::execute_forward();
+    if (pd()->wants_zero_pad_dst())
+        output_memory_primitive(0)->zero_pad();
+}
 
-template void _jit_uni_dw_convolution_fwd_t<avx512_common, true>
-    ::execute_forward();
-template void _jit_uni_dw_convolution_fwd_t<avx2, true>
-    ::execute_forward();
-template void _jit_uni_dw_convolution_fwd_t<sse42, true>
-    ::execute_forward();
+template struct _jit_uni_dw_convolution_fwd_t<avx512_common>;
+template struct _jit_uni_dw_convolution_fwd_t<avx2>;
+template struct _jit_uni_dw_convolution_fwd_t<sse42>;
 
 template <cpu_isa_t isa>
-void _jit_uni_dw_convolution_bwd_data_t<isa>::execute_backward_data() {
+void _jit_uni_dw_convolution_bwd_data_t<isa>::execute_backward_data() const {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
 
     const auto &jcp = kernel_->jcp;
 
@@ -192,7 +189,7 @@ void _jit_uni_dw_convolution_bwd_data_t<isa>::execute_backward_data() {
         return par_conv;
     };
 
-    int MB = conf_.MB();
+    int MB = pd()->MB();
     const int chb_work = utils::div_up(jcp.nb_ch, jcp.nb_ch_blocking);
     parallel_nd(MB, chb_work, jcp.ih,
         [&](int n, int chb, int ih) {
@@ -247,264 +244,185 @@ void _jit_uni_dw_convolution_bwd_data_t<isa>::execute_backward_data() {
     });
 }
 
-template void _jit_uni_dw_convolution_bwd_data_t<avx512_common>
-    ::execute_backward_data();
-template void _jit_uni_dw_convolution_bwd_data_t<avx2>
-    ::execute_backward_data();
-template void _jit_uni_dw_convolution_bwd_data_t<sse42>
-    ::execute_backward_data();
+template struct _jit_uni_dw_convolution_bwd_data_t<avx512_common>;
+template struct _jit_uni_dw_convolution_bwd_data_t<avx2>;
+template struct _jit_uni_dw_convolution_bwd_data_t<sse42>;
 
 template <cpu_isa_t isa>
 _jit_uni_dw_convolution_bwd_weights_t<isa>::
-        _jit_uni_dw_convolution_bwd_weights_t(const pd_t *pd,
-                const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {
-
-    const auto &jcp = conf_.jcp_;
-
-    kernel_ = new jit_uni_dw_conv_bwd_weights_kernel_f32<isa>(jcp);
-
-    const int max_threads
-            = (mkldnn_in_parallel()) ? 1 : mkldnn_get_max_threads();
-    nthr_ = max_threads;
-
-    nthr_g_ = nthr_mb_ = 1;
-
-    /* Basic-Heuristics for parallel strategy:
-     * 1) Tries to parallel on the number of Groups (g) where tasks are
-     * independent. Otherwise,
-     * 2) Tries to split the work across g and MiniBatch (mb).
-     * Parallelizing on mb requires computing a reduction for weights.
-     *
-     * NOTE: because of 'task partitioning' scheme, there will be unbalanced
-     * per-thread load when the number of threads is high (e.g. > 16).
-     */
-    nthr_g_ = nstl::min(jcp.nb_ch, nthr_);
-    nthr_mb_ = nstl::min(nstl::max(1, nthr_ / nthr_g_), jcp.mb);
-
-    nthr_ = nthr_g_ * nthr_mb_;
-
-    /* Notes: if splitting thread work on 'mb', then a reduction has to take
-     * place. Hence, allocate a per-thread, local weights-buffer for the
-     * reduction */
-    if (nthr_mb_ > 1) {
-        const size_t wei_size = jcp.ngroups * jcp.kh * jcp.kw;
-        ws_reduction_ = (data_t *)malloc(
-                (nthr_mb_ - 1) * wei_size * sizeof(data_t), 64);
-
-        if (jcp.with_bias) {
-            const size_t bias_size = jcp.ngroups;
-            bias_reduction_ = (data_t *)malloc(
-                    (nthr_mb_ - 1) * bias_size * sizeof(data_t), 64);
-        }
-
-        /* Used when executing a parallel reduction */
-        if(do_parallel_reduction()){
-            acc_ker_ = new cpu_accumulator_1d_t<data_type::f32>();
-            simple_barrier::ctx_init(&reduction_bctx_);
-        }
-    }
+_jit_uni_dw_convolution_bwd_weights_t(const pd_t *apd,
+        const input_vector &inputs, const output_vector &outputs)
+    : cpu_primitive_t(apd, inputs, outputs)
+    , kernel_(nullptr), acc_ker_(nullptr)
+{
+    kernel_ = new jit_uni_dw_conv_bwd_weights_kernel_f32<isa>(pd()->jcp_);
+    if (pd()->jcp_.nthr_mb > 1 && do_parallel_reduction())
+        acc_ker_ = new cpu_accumulator_1d_t<data_type::f32>();
 }
+
 template <cpu_isa_t isa>
-void _jit_uni_dw_convolution_bwd_weights_t<isa>::execute_backward_weights() {
+void _jit_uni_dw_convolution_bwd_weights_t<isa>::execute_backward_weights() const {
+    auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
+    auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
+    auto diff_weights = reinterpret_cast<data_t *>(this->memory(0));
+    auto diff_bias = reinterpret_cast<data_t *>(this->memory(1));
+
+    auto diff_wei_reduction_buf =
+        scratchpad().template get<data_t>(key_conv_wei_reduction);
+    auto diff_bia_reduction_buf =
+        scratchpad().template get<data_t>(key_conv_bia_reduction);
 
-    auto src
-            = (data_t *)reinterpret_cast<const data_t *>(this->input_memory(0));
-    auto diff_dst
-            = (data_t *)reinterpret_cast<const data_t *>(this->input_memory(1));
     const auto &jcp = kernel_->jcp;
 
-    /* JIT-code skips the unnecessary computations within the padded region. */
-    const int SKIP_TOP_PADDING = 0;
+    /* Used when executing a parallel reduction */
+    simple_barrier::ctx_t reduction_bctx;
+    simple_barrier::ctx_init(&reduction_bctx);
 
     const size_t wei_size = jcp.ngroups * jcp.kh * jcp.kw;
     const size_t bias_size = jcp.with_bias ? jcp.ngroups : 0;
 
-    const int oh_blk_size = jcp.oh_blk_size;
-
-    //const int simd_w = jcp.ch_block;
     const int ch_block = jcp.ch_block;
 
     auto set_kernel_params = [&](jit_dw_conv_call_s *conv_params,
-            const int batch, const int group, const int oh_block,
-            const unsigned char table_idx, const int negative_padding_offset,
-            const unsigned char exec_flag) {
+            const int batch, const int group, const int oh_start,
+            const int work_size, const unsigned char exec_flag,
+            const size_t kh_padding, const size_t filter_off) {
+        const int tpad_underflow_off = jcp.t_pad - filter_off;
+
+        conv_params->exec_flags = exec_flag;
+        conv_params->kh_count = jcp.kh - kh_padding;
 
-        const int ih_block = oh_block * jcp.stride_h;
+        const int oh_s = oh_start;
+        const int oh_e = oh_start + work_size;
+        const int ih_s = oh_s * jcp.stride_h;
 
-        conv_params->table_idx = table_idx;
-        conv_params->exec_flag = exec_flag;
+        conv_params->filter_pad_off
+                = filter_off * jcp.kw * ch_block * sizeof(float);
+        conv_params->oh_index = oh_s;
+        conv_params->oh_count = oh_e;
 
         size_t diff_dst_off
-                = ((batch * (jcp.ngroups / ch_block) + group) * jcp.oh + oh_block)
+                = ((batch * (jcp.ngroups / ch_block) + group) * jcp.oh
+                          + oh_start)
                 * jcp.ow;
 
         size_t src_off = ((batch * (jcp.ngroups / ch_block) + group) * jcp.ih
-                              + ih_block - negative_padding_offset)
-                * jcp.iw;
+                + ih_s - tpad_underflow_off) * jcp.iw;
 
         conv_params->output = &diff_dst[diff_dst_off * ch_block];
         conv_params->input = &src[src_off * ch_block];
     };
 
-    parallel(nthr_, [&](const int ithr, const int nthr_) {
+    parallel(jcp.nthr, [&](const int ithr, const int nthr) {
+        assert(nthr == jcp.nthr);
+
         auto conv_params = jit_dw_conv_call_s();
+        const int h_block_size = 15;
 
         /* assign iteration space to thread */
-        const int ithr_g = ithr % nthr_g_;
-        const int ithr_mb = (ithr / nthr_g_) % nthr_mb_;
+        const int ithr_g = ithr % jcp.nthr_g;
+        const int ithr_mb = (ithr / jcp.nthr_g) % jcp.nthr_mb;
 
         /* split dimensions */
         int g_start{ 0 }, g_end{ 0 };
-        balance211(jcp.nb_ch, nthr_g_, ithr_g, g_start, g_end);
+        balance211(jcp.nb_ch, jcp.nthr_g, ithr_g, g_start, g_end);
 
         int mb_start{ 0 }, mb_end{ 0 };
-        balance211(jcp.mb, nthr_mb_, ithr_mb, mb_start, mb_end);
-
-        auto diff_wei = ithr_mb == 0 ?
-                (data_t *)reinterpret_cast<data_t *>(this->memory(0)) :
-                (data_t *)ws_reduction_ + (ithr_mb - 1) * wei_size;
+        balance211(jcp.mb, jcp.nthr_mb, ithr_mb, mb_start, mb_end);
 
-        auto diff_bias = ithr_mb == 0 ?
-                (data_t *)reinterpret_cast<const data_t *>(this->memory(1)) :
-                (data_t *)bias_reduction_ + (ithr_mb - 1) * bias_size;
+        auto diff_wei = ithr_mb == 0
+            ? diff_weights : diff_wei_reduction_buf + (ithr_mb - 1) * wei_size;
+        auto diff_bia = ithr_mb == 0
+            ? diff_bias : diff_bia_reduction_buf + (ithr_mb - 1) * bias_size;
 
         for (int g = g_start; g < g_end; ++g) {
-
-            /* This flag controls whether the kernel loads weights from memory
-             * or initializes the 'weight accummulator' registers to '0'. The
-             * latter happens at the beginning of each group/16 computation. */
-            unsigned char zero_filter_flag = ~FLAG_ZERO_FILTER;
-            unsigned char zero_bias_flag = jcp.with_bias ? ~FLAG_ZERO_BIAS : 0;
+            unsigned char zero_filter_flag = FLAG_ZERO_FILTER;
+            unsigned char zero_bias_flag = jcp.with_bias ? FLAG_ZERO_BIAS : 0;
 
             size_t diff_wei_off = g * jcp.kh * jcp.kw;
             conv_params.filter = &diff_wei[diff_wei_off * ch_block];
 
             if (jcp.with_bias)
-                conv_params.bias = &diff_bias[g * ch_block];
+                conv_params.bias = &diff_bia[g * ch_block];
 
             for (int mb = mb_start; mb < mb_end; ++mb) {
-
-                /* The 'table index' parameter controls the table entry for the
-                 * inner kernel execution. For more details see
-                 * jit_uni_dw_conv_kernel_f32. */
-                int table_idx = 0;
-
-                /* OH_BLOCK is unrolled to separate the computations according
-                 * to numerous condition-setting 'h' parameter. */
-                int oh_blk = 0;
-
-                /* Top-padding case - this case always executes. */
-                set_kernel_params(&conv_params, mb, g, oh_blk, table_idx,
-                        SKIP_TOP_PADDING, zero_filter_flag & zero_bias_flag);
-                kernel_->jit_ker(&conv_params);
-
-                zero_bias_flag |= FLAG_ZERO_BIAS;
-                zero_filter_flag |= FLAG_ZERO_FILTER;
-                oh_blk += oh_blk_size;
-
-                /* Middle OH_BLOCK cases. */
-                for (; oh_blk < (jcp.oh - oh_blk_size); oh_blk += oh_blk_size) {
-                    table_idx = 1;
-                    set_kernel_params(&conv_params, mb, g, oh_blk, table_idx,
-                            jcp.t_pad, zero_filter_flag & zero_bias_flag);
+                int oh = 0;
+                while (oh < jcp.oh) {
+                    const int h_work = nstl::min(h_block_size, jcp.oh - oh);
+                    auto kh_t_padding = nstl::max(0, jcp.t_pad - oh);
+                    auto kh_b_padding
+                            = (oh * jcp.stride_h + jcp.kh - 1 > jcp.ih) ?
+                            jcp.b_pad - (h_work - 1) :
+                            0;
+
+                    set_kernel_params(&conv_params, mb, g, oh, h_work,
+                            zero_filter_flag | zero_bias_flag,
+                            kh_t_padding + kh_b_padding, kh_t_padding);
                     kernel_->jit_ker(&conv_params);
-                }
-                table_idx++;
 
-                /* Bottom block */
-                if (oh_blk < jcp.oh) {
-                    set_kernel_params(&conv_params, mb, g, oh_blk, table_idx,
-                            jcp.t_pad, zero_filter_flag & zero_bias_flag);
-                    kernel_->jit_ker(&conv_params);
+                    zero_bias_flag &= ~FLAG_ZERO_BIAS;
+                    zero_filter_flag &= ~FLAG_ZERO_FILTER;
+                    oh += h_work;
                 }
             }
         }
-        if (do_parallel_reduction() && nthr_mb_ > 1) {
 
+        if (do_parallel_reduction() && jcp.nthr_mb > 1) {
             size_t reduct_start{ 0 }, reduct_end{ 0 };
-            balance211(wei_size, nthr_, ithr, reduct_start, reduct_end);
-
-            const size_t reduct_off = reduct_start;
-
-            auto *acc_data
-                    = (data_t *)reinterpret_cast<data_t *>(this->memory(0))
-                    + reduct_off;
+            balance211(wei_size, nthr, ithr, reduct_start, reduct_end);
 
             const int acc_size = reduct_end - reduct_start;
+            const size_t reduct_off = reduct_start;
+            auto *acc_data = diff_weights + reduct_off;
 
-            simple_barrier::barrier(&reduction_bctx_, nthr_);
-
-            for (int thr_mb = 1; thr_mb < nthr_mb_; ++thr_mb) {
+            simple_barrier::barrier(&reduction_bctx, nthr);
 
-                auto *src_data = (data_t *)ws_reduction_
+            for (int thr_mb = 1; thr_mb < jcp.nthr_mb; ++thr_mb) {
+                auto *src_data = diff_wei_reduction_buf
                         + (thr_mb - 1) * wei_size + reduct_off;
-
                 acc_ker_->accumulate(acc_data, src_data, acc_size);
             }
         }
     });
 
-    /* Apply single-threaded 'mb' reduction */
-    if (nthr_mb_ > 1) {
-
-        auto diff_weights
-                = (data_t *)reinterpret_cast<data_t *>(this->memory(0));
-        auto diff_bias
-                = (data_t *)reinterpret_cast<const data_t *>(this->memory(1));
-
-        for (int thr_mb = 1; thr_mb < nthr_mb_; ++thr_mb) {
-
-            size_t mb_accum_offset = (thr_mb - 1) * wei_size;
-            size_t b_accum_offset = (thr_mb - 1) * bias_size;
+    if (jcp.nthr_mb <= 1) return;
 
-            for (int g = 0; g < jcp.nb_ch; ++g) {
-
-                /* Reduction on Bias */
-                if (jcp.with_bias) {
-                    PRAGMA_OMP_SIMD()
-                    for (int g_block = 0; g_block < ch_block; ++g_block) {
-                        size_t bias_offset = g * ch_block + g_block;
-                        diff_bias[bias_offset] += bias_reduction_[b_accum_offset
-                                + bias_offset];
-                    }
+    /* Apply single-threaded 'mb' reduction */
+    for (int thr_mb = 1; thr_mb < jcp.nthr_mb; ++thr_mb) {
+        size_t mb_accum_offset = (thr_mb - 1) * wei_size;
+        size_t b_accum_offset = (thr_mb - 1) * bias_size;
+
+        for (int g = 0; g < jcp.nb_ch; ++g) {
+            /* Reduction on Bias */
+            if (jcp.with_bias) {
+                PRAGMA_OMP_SIMD()
+                for (int g_block = 0; g_block < ch_block; ++g_block) {
+                    size_t bias_offset = g * ch_block + g_block;
+                    diff_bias[bias_offset] += diff_bia_reduction_buf[
+                        b_accum_offset + bias_offset];
                 }
-                if (!do_parallel_reduction()) {
-                    for (int kh = 0; kh < jcp.kh; ++kh) {
-                        for (int kw = 0; kw < jcp.kw; ++kw) {
-
-                            size_t wei_offset = (g * jcp.kh + kh) * jcp.kw + kw;
-                            PRAGMA_OMP_SIMD()
-                            for (int g_block = 0; g_block < ch_block; ++g_block) {
-                                diff_weights[wei_offset * ch_block + g_block]
-                                        += ws_reduction_[mb_accum_offset
-                                                + wei_offset * ch_block
-                                                + g_block];
-                            }
-                        }
-                    }
+            }
+
+            if (do_parallel_reduction()) continue;
+
+            for (int kh = 0; kh < jcp.kh; ++kh)
+            for (int kw = 0; kw < jcp.kw; ++kw)
+            {
+                size_t wei_offset = (g * jcp.kh + kh) * jcp.kw + kw;
+                PRAGMA_OMP_SIMD()
+                for (int g_block = 0; g_block < ch_block; ++g_block) {
+                    const size_t off = wei_offset * ch_block + g_block;
+                    diff_weights[off] +=
+                        diff_wei_reduction_buf[mb_accum_offset + off];
                 }
             }
         }
     }
 }
 
-template _jit_uni_dw_convolution_bwd_weights_t<avx512_common>::
-        _jit_uni_dw_convolution_bwd_weights_t(const pd_t *pd,
-                const input_vector &inputs, const output_vector &outputs);
-template _jit_uni_dw_convolution_bwd_weights_t<avx2>::
-        _jit_uni_dw_convolution_bwd_weights_t(const pd_t *pd,
-                const input_vector &inputs, const output_vector &outputs);
-template _jit_uni_dw_convolution_bwd_weights_t<sse42>::
-        _jit_uni_dw_convolution_bwd_weights_t(const pd_t *pd,
-                const input_vector &inputs, const output_vector &outputs);
-
-template void _jit_uni_dw_convolution_bwd_weights_t<avx512_common>::
-        execute_backward_weights();
-template void _jit_uni_dw_convolution_bwd_weights_t<avx2>::
-        execute_backward_weights();
-template void _jit_uni_dw_convolution_bwd_weights_t<sse42>::
-        execute_backward_weights();
+template struct _jit_uni_dw_convolution_bwd_weights_t<avx512_common>;
+template struct _jit_uni_dw_convolution_bwd_weights_t<avx2>;
+template struct _jit_uni_dw_convolution_bwd_weights_t<sse42>;
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_convolution.hpp
index b723c1c89..2f2cc7a3c 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_dw_convolution.hpp
@@ -18,54 +18,62 @@
 #define CPU_JIT_UNI_DW_CONVOLUTION_HPP
 
 #include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+
+#include "cpu_barrier.hpp"
 #include "cpu_convolution_pd.hpp"
-#include "cpu_engine.hpp"
-#include "jit_primitive_conf.hpp"
-#include "jit_uni_dw_conv_kernel_f32.hpp"
 #include "cpu_reducer.hpp"
-#include "cpu_barrier.hpp"
+
+#include "jit_uni_dw_conv_kernel_f32.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <cpu_isa_t isa, bool with_relu>
+template <cpu_isa_t isa>
 struct _jit_uni_dw_convolution_fwd_t: public cpu_primitive_t {
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine, const typename pd_t::base_desc_t *adesc,
+    struct pd_t: public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
             , jcp_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_dw:", isa, ""),
-                _jit_uni_dw_convolution_fwd_t<isa, with_relu>);
+                _jit_uni_dw_convolution_fwd_t<isa>);
 
         virtual status_t init() override {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                         forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
-                        this->cdesc_().src_desc.data_type,
-                        this->cdesc_().weights_desc.data_type,
-                        this->cdesc_().dst_desc.data_type)
+                        this->desc()->src_desc.data_type,
+                        this->desc()->weights_desc.data_type,
+                        this->desc()->dst_desc.data_type)
                 && IMPLICATION(this->with_bias(),
-                        data_type::f32 == this->cdesc_().bias_desc.data_type);
+                        data_type::f32 == this->desc()->bias_desc.data_type);
 
             if (!ok) return status::unimplemented;
 
-            return jit_uni_dw_conv_fwd_kernel_f32<isa>::init_conf(jcp_,
-                        this->cdesc_(),
-                        this->src_pd_.desc(), *this->weights_pd_.desc(),
-                        *this->dst_pd_.desc(), *this->attr(),
-                        with_relu, this->negative_slope());
+            status_t status = jit_uni_dw_conv_fwd_kernel_f32<isa>::init_conf(
+                    jcp_, *this->desc(), this->src_pd_.desc(),
+                    *this->weights_pd_.desc(), *this->dst_pd_.desc(),
+                    *this->attr());
+            if (status != status::success) return status;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_uni_dw_conv_fwd_kernel_f32<isa>::init_scratchpad(scratchpad,
+                    jcp_);
+
+            return status::success;
         }
 
         jit_conv_conf_t jcp_;
@@ -84,54 +92,37 @@ struct _jit_uni_dw_convolution_fwd_t: public cpu_primitive_t {
                 CHECK(this->weights_pd_.set_format(desired_wei_fmt));
             if (this->bias_pd_.desc()->format == any)
                 CHECK(this->bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
             return status::success;
         }
     };
 
-    _jit_uni_dw_convolution_fwd_t(const pd_t *pd, const input_vector &inputs,
+    _jit_uni_dw_convolution_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , padded_bias_(nullptr) {
-        kernel_ = new jit_uni_dw_conv_fwd_kernel_f32<isa>(conf_.jcp_, *conf_.attr());
-        if (conf_.want_padded_bias()) {
-            padded_bias_ = (float *)malloc(sizeof(float) * conf_.jcp_.oc, 64);
-            for (int c = conf_.jcp_.oc_without_padding; c < conf_.jcp_.oc; ++c)
-                padded_bias_[c] = 0;
-        }
-    }
+        : cpu_primitive_t(apd, inputs, outputs), kernel_(nullptr)
+    { kernel_ = new jit_uni_dw_conv_fwd_kernel_f32<isa>(pd()->jcp_, *pd()->attr()); }
 
-    ~_jit_uni_dw_convolution_fwd_t() {
-        delete kernel_;
-        free(padded_bias_);
-    }
+    ~_jit_uni_dw_convolution_fwd_t() { delete kernel_; }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     jit_uni_dw_conv_fwd_kernel_f32<isa> *kernel_;
-    float *padded_bias_;
 };
 
 using jit_avx512_common_dw_convolution_fwd_t =
-    _jit_uni_dw_convolution_fwd_t<avx512_common, false>;
-using jit_avx2_dw_convolution_fwd_t =
-    _jit_uni_dw_convolution_fwd_t<avx2, false>;
-using jit_sse42_dw_convolution_fwd_t =
-    _jit_uni_dw_convolution_fwd_t<sse42, false>;
-
-using jit_avx512_common_dw_convolution_relu_t =
-    _jit_uni_dw_convolution_fwd_t<avx512_common, true>;
-using jit_avx2_dw_convolution_relu_t =
-    _jit_uni_dw_convolution_fwd_t<avx2, true>;
-using jit_sse42_dw_convolution_relu_t =
-    _jit_uni_dw_convolution_fwd_t<sse42, true>;
+    _jit_uni_dw_convolution_fwd_t<avx512_common>;
+using jit_avx2_dw_convolution_fwd_t = _jit_uni_dw_convolution_fwd_t<avx2>;
+using jit_sse42_dw_convolution_fwd_t = _jit_uni_dw_convolution_fwd_t<sse42>;
 
 template <cpu_isa_t isa>
 struct _jit_uni_dw_convolution_bwd_data_t: public cpu_primitive_t {
@@ -156,7 +147,9 @@ struct _jit_uni_dw_convolution_bwd_data_t: public cpu_primitive_t {
                 && this->set_default_params() == status::success
                 && utils::one_of(this->desc()->prop_kind, backward,
                         backward_data)
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && !this->has_zero_dim_memory()
                 && utils::everyone_is(data_type::f32,
                         this->desc()->diff_src_desc.data_type,
@@ -165,16 +158,23 @@ struct _jit_uni_dw_convolution_bwd_data_t: public cpu_primitive_t {
 
             if (!ok) return status::unimplemented;
 
-            return jit_uni_dw_conv_bwd_data_kernel_f32<isa>::init_conf(jcp_,
+            status_t status =
+                jit_uni_dw_conv_bwd_data_kernel_f32<isa>::init_conf(jcp_,
                         *this->desc(), *this->diff_src_pd_.desc(),
                         *this->weights_pd_.desc(), *this->diff_dst_pd_.desc());
+            if (status != status::success) return status;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_uni_dw_conv_bwd_data_kernel_f32<isa>::init_scratchpad(
+                    scratchpad, jcp_);
+
+            return status::success;
         }
 
         jit_conv_conf_t jcp_;
 
     protected:
         virtual status_t set_default_params() override {
-
             using namespace memory_format;
             auto desired_act_fmt = isa == avx512_common ? nChw16c : nChw8c;
             auto desired_wei_fmt = isa == avx512_common ? Goihw16g : Goihw8g;
@@ -185,21 +185,23 @@ struct _jit_uni_dw_convolution_bwd_data_t: public cpu_primitive_t {
                 CHECK(this->diff_dst_pd_.set_format(desired_act_fmt));
             if (this->weights_pd_.desc()->format == any)
                 CHECK(this->weights_pd_.set_format(desired_wei_fmt));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
 
             return status::success;
         }
     };
 
-    _jit_uni_dw_convolution_bwd_data_t(const pd_t *pd,
+    _jit_uni_dw_convolution_bwd_data_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-    { kernel_ = new jit_uni_dw_conv_bwd_data_kernel_f32<isa>(conf_.jcp_); }
+        : cpu_primitive_t(apd, inputs, outputs)
+    { kernel_ = new jit_uni_dw_conv_bwd_data_kernel_f32<isa>(pd()->jcp_); }
     ~_jit_uni_dw_convolution_bwd_data_t() { delete kernel_; };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_data:
             execute_backward_data();
             break;
@@ -210,8 +212,9 @@ struct _jit_uni_dw_convolution_bwd_data_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_data();
-    pd_t conf_;
+    void execute_backward_data() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
     jit_uni_dw_conv_bwd_data_kernel_f32<isa> *kernel_;
 };
 
@@ -243,7 +246,9 @@ struct _jit_uni_dw_convolution_bwd_weights_t: public cpu_primitive_t {
             bool ok = true
                 && this->set_default_params() == status::success
                 && this->desc()->prop_kind == prop_kind::backward_weights
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && utils::everyone_is(data_type::f32,
                         this->desc()->src_desc.data_type,
                         this->desc()->diff_weights_desc.data_type,
@@ -251,16 +256,27 @@ struct _jit_uni_dw_convolution_bwd_weights_t: public cpu_primitive_t {
 
             if (!ok) return status::unimplemented;
 
-            return jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::init_conf(jcp_,
+            const int max_threads = mkldnn_in_parallel()
+                ? 1 : mkldnn_get_max_threads();
+
+            status_t status =
+                jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::init_conf(jcp_,
                         *this->desc(), *this->src_pd_.desc(),
-                        *this->diff_weights_pd_.desc(), *this->diff_dst_pd_.desc());
+                        *this->diff_weights_pd_.desc(),
+                        *this->diff_dst_pd_.desc(), max_threads);
+            if (status != status::success) return status;
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_uni_dw_conv_bwd_weights_kernel_f32<isa>::init_scratchpad(
+                    scratchpad, jcp_);
+
+            return status::success;
         }
 
         jit_conv_conf_t jcp_;
 
     protected:
         virtual status_t set_default_params() override {
-
             using namespace memory_format;
             auto desired_act_fmt = isa == avx512_common ? nChw16c : nChw8c;
             auto desired_wei_fmt = isa == avx512_common ? Goihw16g : Goihw8g;
@@ -273,49 +289,35 @@ struct _jit_uni_dw_convolution_bwd_weights_t: public cpu_primitive_t {
                 CHECK(this->diff_weights_pd_.set_format(desired_wei_fmt));
             if (this->diff_bias_pd_.desc()->format == any)
                 CHECK(this->diff_bias_pd_.set_format(x));
+            if (this->desc()->alg_kind == alg_kind::convolution_auto)
+                CHECK(this->set_alg_kind(alg_kind::convolution_direct));
 
             return status::success;
         }
     };
 
-    _jit_uni_dw_convolution_bwd_weights_t(const pd_t *pd,
+    _jit_uni_dw_convolution_bwd_weights_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs);
+
     ~_jit_uni_dw_convolution_bwd_weights_t() {
         delete kernel_;
-        if (acc_ker_)
-            delete acc_ker_;
-
-        free(ws_reduction_);
-        free(bias_reduction_);
+        delete acc_ker_;
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward_weights();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward_weights();
+    void execute_backward_weights() const;
+    bool do_parallel_reduction() const { return false; }
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    pd_t conf_;
     jit_uni_dw_conv_bwd_weights_kernel_f32<isa> *kernel_;
-
-    data_t *ws_reduction_ = nullptr;
-    data_t *bias_reduction_ = nullptr;
-
-    /* Used when executing a parallel reduction */
-    cpu_accumulator_1d_t<data_type::f32> *acc_ker_ = nullptr;
-    simple_barrier::ctx_t reduction_bctx_;
-
-    /* For parallel implementation details see '.cpp' file in the
-     * backwards-by-wights section. */
-    int nthr_, nthr_g_, nthr_mb_;
-
-    inline bool do_parallel_reduction(){
-        return false;
-    }
+    cpu_accumulator_1d_t<data_type::f32> *acc_ker_;
 };
 
 using jit_avx512_common_dw_convolution_bwd_weights_t =
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_eltwise.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_eltwise.cpp
index 2896b1b24..f659fdcca 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_eltwise.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_eltwise.cpp
@@ -32,21 +32,10 @@ namespace cpu {
 using namespace Xbyak;
 
 template <cpu_isa_t isa>
-bool jit_uni_eltwise_injector_f32<isa>::is_free_vec(size_t idx) {
-    for (size_t i = 0; i < preserved_vecs_count; i++) {
-        if (preserved_vec_idxs[i] == idx) {
-            return false;
-        }
-    }
-    return true;
-}
-
-template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::injector_preamble(size_t start_idx,
         size_t end_idx) {
     preserved_vecs_count = 0;
-    vecs_to_preserve = (size_t)jit_uni_eltwise_injector_f32<isa>::
-            aux_vecs_count(elt_alg);
+    vecs_to_preserve = (size_t)aux_vecs_count(alg_);
     start_idx_tail = start_idx;
 
     // For sse42 mask register has to be Xmm(0)
@@ -56,78 +45,80 @@ void jit_uni_eltwise_injector_f32<isa>::injector_preamble(size_t start_idx,
         preserved_vec_idxs[preserved_vecs_count++] = idx;
     }
 
-    for (size_t i = 0; i < vecs_count; i++) {
-        if (preserved_vecs_count >= vecs_to_preserve)
-            break;
+    for (size_t idx = preserved_vecs_count; idx < vecs_count; idx++) {
+        if (preserved_vecs_count >= vecs_to_preserve) break;
+        if (start_idx <= idx && idx < end_idx) continue;
 
-        size_t idx = i;
-        if (is_free_vec(idx) && (idx < start_idx || idx >= end_idx)) {
-            preserved_vec_idxs[preserved_vecs_count++] = idx;
-        }
+        preserved_vec_idxs[preserved_vecs_count++] = idx;
     }
 
     size_t preserved_vecs_count_tail = vecs_to_preserve - preserved_vecs_count;
     for (size_t i = 0; i < preserved_vecs_count_tail; i++) {
-        size_t idx = start_idx_tail;
-        if (is_free_vec(idx)) {
-            preserved_vec_idxs[preserved_vecs_count++] = idx;
-            start_idx_tail++;
-        }
+        preserved_vec_idxs[preserved_vecs_count++] = start_idx_tail++;
     }
 
     assert(preserved_vecs_count == vecs_to_preserve);
 
-    if (save_vecs_state) {
+    if (save_state_) {
         h->push(p_table);
 
-        h->sub(h->rsp, preserved_vecs_count * vlen);
+        if (preserved_vecs_count)
+            h->sub(h->rsp, preserved_vecs_count * vlen);
+
         for (size_t i = 0; i < preserved_vecs_count; ++i)
             h->uni_vmovups(h->ptr[h->rsp + i * vlen],
                     Vmm(preserved_vec_idxs[i]));
+
+        load_table_addr();
     }
 
     assign_regs();
 }
 
 template <cpu_isa_t isa>
-void jit_uni_eltwise_injector_f32<isa>::injector_preamble_tail(
-        size_t start_idx) {
+void jit_uni_eltwise_injector_f32<isa>::injector_preamble_tail(size_t start_idx)
+{
     size_t tail_vecs_to_preserve = start_idx_tail - start_idx;
-    int idx_off = (vecs_to_preserve - tail_vecs_to_preserve);
+    if (tail_vecs_to_preserve == 0) return;
+
+    const int idx_off = vecs_to_preserve - tail_vecs_to_preserve;
 
-    if (tail_vecs_to_preserve > 0) {
-        if (save_vecs_state) {
+    if (save_state_) {
+        if (idx_off)
             h->add(h->rsp, idx_off * vlen);
-            for (size_t i = 0; i < tail_vecs_to_preserve; ++i)
-                h->uni_vmovups(Vmm(preserved_vec_idxs[idx_off + i]),
-                        h->ptr[h->rsp + i * vlen]);
-        }
 
-        for (size_t i = 0; i < tail_vecs_to_preserve; ++i) {
-            preserved_vec_idxs[idx_off + i] += tail_vecs_to_preserve;
-        }
+        for (size_t i = 0; i < tail_vecs_to_preserve; ++i)
+            h->uni_vmovups(Vmm(preserved_vec_idxs[idx_off + i]),
+                    h->ptr[h->rsp + i * vlen]);
+    }
 
-        if (save_vecs_state) {
-            for (size_t i = 0; i < tail_vecs_to_preserve; ++i)
-                h->uni_vmovups(h->ptr[h->rsp + i * vlen],
-                        Vmm(preserved_vec_idxs[idx_off + i]));
-            h->sub(h->rsp, idx_off * vlen);
-        }
+    for (size_t i = 0; i < tail_vecs_to_preserve; ++i)
+        preserved_vec_idxs[idx_off + i] += tail_vecs_to_preserve;
+
+    if (save_state_) {
+        for (size_t i = 0; i < tail_vecs_to_preserve; ++i)
+            h->uni_vmovups(h->ptr[h->rsp + i * vlen],
+                    Vmm(preserved_vec_idxs[idx_off + i]));
 
-        assign_regs();
+        if (idx_off)
+            h->sub(h->rsp, idx_off * vlen);
     }
+
+    assign_regs();
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::injector_postamble() {
-    if (save_vecs_state) {
-        for (size_t i = 0; i < preserved_vecs_count; ++i)
-            h->uni_vmovups(Vmm(preserved_vec_idxs[i]),
-                    h->ptr[h->rsp + i * vlen]);
+    if (!save_state_) return;
+
+    for (size_t i = 0; i < preserved_vecs_count; ++i)
+        h->uni_vmovups(Vmm(preserved_vec_idxs[i]),
+                h->ptr[h->rsp + i * vlen]);
+
+    if (preserved_vecs_count)
         h->add(h->rsp, preserved_vecs_count * vlen);
 
-        h->pop(p_table);
-    }
+    h->pop(p_table);
 }
 
 template <cpu_isa_t isa>
@@ -137,33 +128,26 @@ void jit_uni_eltwise_injector_f32<isa>::assign_regs() {
     vmm_aux1 = Vmm(preserved_vec_idxs[1]);
     vmm_aux2 = Vmm(preserved_vec_idxs[2]);
     vmm_aux3 = Vmm(preserved_vec_idxs[3]);
-
-    p_table = Xbyak::Reg64(table_reg_idx);
-    k_mask = Xbyak::Opmask(opmask_idx);
+    vmm_aux4 = Vmm(preserved_vec_idxs[4]);
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::exp_compute_vector(const Vmm &vmm_src) {
-    const unsigned char _op_floor = 1;
-
-    h->uni_vminps(vmm_src, vmm_src, h->ptr[p_table + 10 * vlen]);
-    h->uni_vmaxps(vmm_src, vmm_src, h->ptr[p_table + 11 * vlen]);
+    h->uni_vminps(vmm_src, vmm_src, table_val(10));
+    h->uni_vmaxps(vmm_src, vmm_src, table_val(11));
     h->uni_vmovups(vmm_aux0, vmm_src);
     //calculate exp(x)
     // fx = x * log2ef + 0.5
-    h->uni_vmulps(vmm_src, vmm_src, h->ptr[p_table + 2 * vlen]);
-    h->uni_vaddps(vmm_src, vmm_src, h->ptr[p_table + 1 * vlen]);
+    h->uni_vmulps(vmm_src, vmm_src, table_val(2));
+    h->uni_vaddps(vmm_src, vmm_src, table_val(1));
 
     // tmp = floorf(fx)
     if (isa == avx512_common) {
         h->vcvtps2dq(vmm_aux1 | h->T_rd_sae, vmm_src);
         h->vcvtdq2ps(vmm_aux1, vmm_aux1);
 
-        unsigned char _cmp_gt_os = 14;
-        Xbyak::Opmask k_mask_tmp = Xbyak::Opmask(2);
-        h->vcmpps(k_mask_tmp, vmm_aux1, vmm_src, _cmp_gt_os);
-        h->vmovups(vmm_aux3 | k_mask_tmp | h->T_z,
-                h->zword[p_table + 0 * vlen]);
+        h->vcmpps(k_mask, vmm_aux1, vmm_src, _cmp_nle_us);
+        h->vmovups(vmm_aux3 | k_mask | h->T_z, table_val(0));
 
         h->uni_vsubps(vmm_aux1, vmm_aux1, vmm_aux3);
     } else {
@@ -174,105 +158,213 @@ void jit_uni_eltwise_injector_f32<isa>::exp_compute_vector(const Vmm &vmm_src) {
     h->uni_vmovups(vmm_src, vmm_aux1); //vmm_src = fx
 
     //x = x - fx * ln2
-    h->uni_vfnmadd231ps(vmm_aux0, vmm_aux1, h->ptr[p_table + 3 * vlen]);
+    h->uni_vfnmadd231ps(vmm_aux0, vmm_aux1, table_val(3));
 
     // compute 2^n
     h->uni_vcvtps2dq(vmm_aux1, vmm_src);
-    h->uni_vpaddd(vmm_aux1, vmm_aux1, h->ptr[p_table + 4 * vlen]);
+    h->uni_vpaddd(vmm_aux1, vmm_aux1, table_val(4));
     h->uni_vpslld(vmm_aux1, vmm_aux1, 23); //Vmm(6) = 2^-fx
 
     // y = p5
-    h->uni_vmovups(vmm_src, h->ptr[p_table + 9 * vlen]);
+    h->uni_vmovups(vmm_src, table_val(9));
     // y = y * x + p4
-    h->uni_vfmadd213ps(vmm_src, vmm_aux0, h->ptr[p_table + 8 * vlen]);
+    h->uni_vfmadd213ps(vmm_src, vmm_aux0, table_val(8));
     // y = y * x + p3
-    h->uni_vfmadd213ps(vmm_src, vmm_aux0, h->ptr[p_table + 7 * vlen]);
+    h->uni_vfmadd213ps(vmm_src, vmm_aux0, table_val(7));
     // y = y * x + p2
-    h->uni_vfmadd213ps(vmm_src, vmm_aux0, h->ptr[p_table + 6 * vlen]);
+    h->uni_vfmadd213ps(vmm_src, vmm_aux0, table_val(6));
     // y = y * x + p1
-    h->uni_vfmadd213ps(vmm_src, vmm_aux0, h->ptr[p_table + 0 * vlen]);
+    h->uni_vfmadd213ps(vmm_src, vmm_aux0, table_val(0));
     // y = y * x + p0
-    h->uni_vfmadd213ps(vmm_src, vmm_aux0, h->ptr[p_table + 5 * vlen]);  //exp(q)
+    h->uni_vfmadd213ps(vmm_src, vmm_aux0, table_val(5));  //exp(q)
     // y = y * 2^n
     h->uni_vmulps(vmm_src, vmm_src, vmm_aux1);
 }
 
 template <cpu_isa_t isa>
-void jit_uni_eltwise_injector_f32<isa>::relu_compute_vector(
-        const Vmm &vmm_src) {
-    unsigned char _cmp_gt_os = isa == avx512_common ? 14 : 6;
-
-    int alpha_off = 0 * vlen;
-    int zero_off = 1 * vlen;
+void jit_uni_eltwise_injector_f32<isa>::relu_compute_vector(const Vmm &vmm_src)
+{
+    const int alpha_off = 0, zero_off = 1;
 
     h->uni_vmovups(vmm_aux1, vmm_src);
     if (isa == sse42) {
         h->movups(vmm_mask, vmm_src);
-        h->mulps(vmm_src, h->ptr[p_table + alpha_off]);
-        h->cmpps(vmm_mask, h->ptr[p_table + zero_off], _cmp_gt_os);
+        h->mulps(vmm_src, table_val(alpha_off));
+        h->cmpps(vmm_mask, table_val(zero_off), _cmp_nle_us);
         h->blendvps(vmm_src, vmm_aux1);
     } else if (isa == avx2) {
-        h->vmulps(vmm_src, vmm_src, h->ptr[p_table + alpha_off]);
-        h->vcmpgtps(vmm_mask, vmm_aux1, h->ptr[p_table + zero_off]);
+        h->vmulps(vmm_src, vmm_src, table_val(alpha_off));
+        h->vcmpgtps(vmm_mask, vmm_aux1, table_val(zero_off));
         h->vblendvps(vmm_src, vmm_src, vmm_aux1, vmm_mask);
     } else if (isa == avx512_common) {
-        h->vmulps(vmm_src, vmm_src, h->ptr[p_table + alpha_off]);
-        h->vcmpps(k_mask, vmm_aux1, h->ptr[p_table + zero_off], _cmp_gt_os);
-        h->vblendmps(vmm_src | k_mask, vmm_src,
-                     vmm_aux1);
+        h->vmulps(vmm_src, vmm_src, table_val(alpha_off));
+        h->vcmpps(k_mask, vmm_aux1, table_val(zero_off), _cmp_nle_us);
+        h->vblendmps(vmm_src | k_mask, vmm_src, vmm_aux1);
     }
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::relu_zero_ns_compute_vector(
         const Vmm &vmm_src) {
-    int zero_off = 1 * vlen;
-    h->uni_vmaxps(vmm_src, vmm_src, h->ptr[p_table + zero_off]);
+    const int zero_off = 1;
+    h->uni_vmaxps(vmm_src, vmm_src, table_val(zero_off));
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::elu_compute_vector(const Vmm &vmm_src) {
-    const unsigned char _cmp_gt_os = 6;
-    const unsigned char _cmp_let_os = 2;
-    int alpha_off = 12 * vlen;
-    int zero_off = 13 * vlen;
+    const int alpha_off = 23, zero_off = 24;
 
     // compute exponent
     h->uni_vmovups(vmm_aux2, vmm_src);
     exp_compute_vector(vmm_src);
 
     // alpha * (exp(x) - 1)
-    h->uni_vsubps(vmm_src, vmm_src, h->ptr[p_table + 0 * 32]);
-    h->uni_vmulps(vmm_src, vmm_src, h->ptr[p_table + alpha_off]);
+    h->uni_vsubps(vmm_src, vmm_src, table_val(0));
+    h->uni_vmulps(vmm_src, vmm_src, table_val(alpha_off));
 
     // combine with mask
     if (isa == sse42) {
         h->pxor(vmm_mask, vmm_mask);
-        h->cmpps(vmm_mask,  vmm_aux2, _cmp_let_os);
+        h->cmpps(vmm_mask,  vmm_aux2, _cmp_le_os);
         h->blendvps(vmm_src, vmm_aux2);
     } else if (isa == avx2) {
-        h->uni_vcmpgtps(vmm_mask, vmm_aux2, h->ptr[p_table + zero_off]);
+        h->uni_vcmpgtps(vmm_mask, vmm_aux2, table_val(zero_off));
         h->uni_vblendvps(vmm_src, vmm_src, vmm_aux2, vmm_mask);
     } else if (isa == avx512_common) {
-        h->vcmpps(k_mask, vmm_aux2, h->ptr[p_table + zero_off], _cmp_gt_os);
+        h->vcmpps(k_mask, vmm_aux2, table_val(zero_off), _cmp_nle_us);
         h->vblendmps(vmm_src | k_mask, vmm_src, vmm_aux2);
     }
 }
 
 template <cpu_isa_t isa>
-void jit_uni_eltwise_injector_f32<isa>::tanh_compute_vector(
-        const Vmm &vmm_src) {
-    // compute exp(2x)
-    h->uni_vaddps(vmm_src, vmm_src, vmm_src);
-    exp_compute_vector(vmm_src);
-    // dup exp(2x)
-    h->uni_vmovups(vmm_aux0, vmm_src);
-    // (exp(2x) - 1)
-    h->uni_vsubps(vmm_src, vmm_src, h->ptr[p_table + 0 * vlen]);
-    // (exp(2x) + 1)
-    h->uni_vaddps(vmm_aux0, vmm_aux0, h->ptr[p_table + 0 * vlen]);
-    // y = (exp(2x) - 1) / (exp(2x) + 1)
-    h->uni_vdivps(vmm_src, vmm_src, vmm_aux0);
+void jit_uni_eltwise_injector_f32<isa>::tanh_compute_vector(const Vmm &vmm_src)
+{
+    // # comes from Taylor expansion error bound
+    //  > linear_sat_point = single(sqrt(3) * 1b-12);
+    // # comes from the exp formula cancellation
+    //  > exp_bound_point = (single(log(3)/2));
+    // # comes from rounding accuracy in float
+    //  > one_sat_point = round(atanh(1 - 1b-25), single, RU);
+    //  > P = fpminimax(f, [|1, 3, 5, 7, 9|], [|24... |],
+    //            [linear_sat_point, exp_bound_point], relative, floating);
+    //  > err_bound = D(sup(supnorm(P, tanh(x),
+    //          [linear_sat_point, exp_bound_point], relative, theta)));
+    //    0x1.fffd6f00b9539p-25
+    //  > P;
+    //    x * (0x1.fffffep-1 + x^0x1p1 * (-0x1.55539ep-2 + x^0x1p1 *
+    //        (0x1.10be3ep-3 + x^0x1p1 * (-0x1.ae57b4p-5
+    //        + x^0x1p1 * 0x1.09fa1p-6))))
+
+    // register mapping
+    // vmm_src contains input
+    // vmm_aux0 contains mask of currently valid results.
+    //     1 is need computation, 0 is already computed
+    // vmm_aux1 contains current output
+    // vmm_aux2, vmm_aux3 contains auxiliary values
+    // vmm_aux4 contains the original sign of inputs
+
+    Label end_tanh_label;
+
+    auto test_exit =[&](Xbyak::Address threshold){
+        // is not necessary for >AVX, but should not matter on perf
+        h->uni_vmovups(vmm_aux0, vmm_src);
+        if (isa == avx512_common){
+            h->vcmpps(k_mask, vmm_aux0, threshold, 0x5);
+            h->kortestw(k_mask, k_mask);
+        } else {
+            h->uni_vcmpgeps(vmm_aux0, vmm_aux0, threshold);
+            h->uni_vtestps(vmm_aux0, vmm_aux0);
+        }
+        h->jz(end_tanh_label, Xbyak::CodeGenerator::T_NEAR);
+    };
+
+    auto blend_results=[&](Vmm vmm_partial_res){
+        if (isa == avx512_common)
+            h->vblendmps(vmm_aux1 | k_mask, vmm_aux1, vmm_partial_res);
+        else
+            h->uni_vblendvps(vmm_aux1, vmm_aux1, vmm_partial_res, vmm_aux0);
+    };
+
+    // because tanh(x) = -tanh(-x), we extract sign to make x postive
+    // and reapply sign at the end
+    // mov is not necessary for >AVX, but should not matter for performance
+    h->uni_vmovups(vmm_aux4, vmm_src);
+    h->uni_vandps(vmm_aux4, vmm_aux4, table_val(12));
+    h->uni_vandps(vmm_src, vmm_src, table_val(17));
+
+    // if x < linear_sat_point for all inputs, we just return the input
+    h->uni_vmovups(vmm_aux1, vmm_src);
+    test_exit(table_val(13));
+
+    // if one of the mask is one, we have to compute an better approx
+    h->uni_vmovups(vmm_aux2, vmm_src);
+    h->uni_vmulps(vmm_aux2, vmm_aux2, vmm_aux2);
+    h->uni_vmovups(vmm_aux3, table_val(22));
+    h->uni_vfmadd213ps(vmm_aux3, vmm_aux2, table_val(21));
+    h->uni_vfmadd213ps(vmm_aux3, vmm_aux2, table_val(20));
+    h->uni_vfmadd213ps(vmm_aux3, vmm_aux2, table_val(19));
+    h->uni_vfmadd213ps(vmm_aux3, vmm_aux2, table_val(18));
+    h->uni_vmulps(vmm_aux3, vmm_aux3, vmm_src);
+
+    // we blend only the result that need update
+    blend_results(vmm_aux3);
+
+    // if x < exp_bound_point, we go to return point
+    test_exit(table_val(14));
+
+    // if not we use a better approx 1 - 2 / (1 + exp(2x))
+    // compute 2x
+    h->uni_vmovups(vmm_aux3, vmm_src);
+    h->uni_vaddps(vmm_aux3, vmm_aux3, vmm_aux3);
+
+    // Compute exp(2x)
+    // We need to save kmask, vmm_aux0, vmm_aux1 and vmm_src as exp can use them
+    // vmm_src is not more read afterwards, so we do not have to save it
+    auto stack_size = 3 * vlen + (isa == avx512_common) * 4;
+    h->sub(h->rsp, stack_size);
+    h->uni_vmovups(h->ptr[h->rsp + 0 * vlen], vmm_aux0);
+    h->uni_vmovups(h->ptr[h->rsp + 1 * vlen], vmm_aux1);
+    h->uni_vmovups(h->ptr[h->rsp + 2 * vlen], vmm_src);
+    if (isa == avx512_common)
+        h->kmovw(h->ptr[h->rsp + 3 * vlen], k_mask);
+
+    exp_compute_vector(vmm_aux3);
+
+    h->uni_vmovups(vmm_aux0, h->ptr[h->rsp + 0 * vlen]);
+    h->uni_vmovups(vmm_aux1, h->ptr[h->rsp + 1 * vlen]);
+    h->uni_vmovups(vmm_src, h->ptr[h->rsp + 2 * vlen]);
+    if (isa == avx512_common)
+        h->kmovw(k_mask, h->ptr[h->rsp + 3 * vlen]);
+    h->add(h->rsp, stack_size);
+
+    // 1 + exp(2x)
+    h->uni_vaddps(vmm_aux3, vmm_aux3, table_val(0));
+
+    // 1 - 2 / (1 + exp(2x))
+    h->uni_vmovups(vmm_aux2, table_val(16));
+    h->uni_vdivps(vmm_aux2, vmm_aux2, vmm_aux3);
+    h->uni_vaddps(vmm_aux2, vmm_aux2, table_val(0));
+
+    // we blend only the result that need update
+    blend_results(vmm_aux2);
+
+    // finally, we saturate to 1 if needed
+    // TODO: maybe move that up if most inputs saturate in practice
+    if (isa == avx512_common)
+        h->vcmpps(k_mask, vmm_aux0, table_val(15), 0x5);
+    else {
+        h->uni_vmovups(vmm_aux0, vmm_src);
+        h->uni_vcmpgeps(vmm_aux0, vmm_aux0, table_val(15));
+    }
+    h->uni_vmovups(vmm_aux2, table_val(0));
+    blend_results(vmm_aux2);
+
+    h->L(end_tanh_label);
+    {
+        // we apply the sign of x to the result and we are done
+        h->uni_vmovups(vmm_src, vmm_aux1);
+        h->uni_vpxor(vmm_src, vmm_src, vmm_aux4);
+    }
 }
 
 template <cpu_isa_t isa>
@@ -284,24 +376,22 @@ void jit_uni_eltwise_injector_f32<isa>::square_compute_vector(
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::abs_compute_vector(const Vmm &vmm_src) {
     // compute abs(x) = _mm_and_ps(x, 01111..111));
-    h->uni_vandps(vmm_src, vmm_src, h->ptr[p_table + 0*vlen]);
+    h->uni_vandps(vmm_src, vmm_src, table_val(0));
 }
 
 template <cpu_isa_t isa>
-void jit_uni_eltwise_injector_f32<isa>::sqrt_compute_vector(
-        const Vmm &vmm_src) {
+void jit_uni_eltwise_injector_f32<isa>::sqrt_compute_vector(const Vmm &vmm_src)
+{
     if (isa == avx512_common) {
-        unsigned char _cmp_gt_os = 6;
-
-        h->vcmpps(k_mask, vmm_src, h->ptr[p_table + 0 * vlen], _cmp_gt_os);
+        h->vcmpps(k_mask, vmm_src, table_val(0), _cmp_nle_us);
         h->uni_vsqrtps(vmm_aux1, vmm_src);
-        h->uni_vmovups(vmm_src, h->ptr[p_table + 0*vlen]);
+        h->uni_vmovups(vmm_src, table_val(0));
         h->vblendmps(vmm_src | k_mask, vmm_src, vmm_aux1);
     } else {
         h->uni_vmovups(vmm_mask, vmm_src);
-        h->uni_vcmpgtps(vmm_mask, vmm_mask, h->ptr[p_table + 0*vlen]);
+        h->uni_vcmpgtps(vmm_mask, vmm_mask, table_val(0));
         h->uni_vsqrtps(vmm_aux1, vmm_src);
-        h->uni_vmovups(vmm_src, h->ptr[p_table + 0*vlen]);
+        h->uni_vmovups(vmm_src, table_val(0));
         h->uni_vblendvps(vmm_src, vmm_src, vmm_aux1, vmm_mask);
     }
 }
@@ -310,48 +400,39 @@ template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::linear_compute_vector(
         const Vmm &vmm_src) {
     // compute x = alpha * x + beta;
-    h->uni_vmovups(vmm_aux0, h->ptr[p_table + 0*vlen]);
-    h->uni_vfmadd213ps(vmm_src, vmm_aux0, h->ptr[p_table + 1*vlen]);
+    h->uni_vmovups(vmm_aux0, table_val(0));
+    h->uni_vfmadd213ps(vmm_src, vmm_aux0, table_val(1));
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::bounded_relu_compute_vector(
         const Vmm &vmm_src) {
     // compute bounded relu */
-    h->uni_vmaxps(vmm_src, vmm_src, h->ptr[p_table + 1*vlen]);
-    h->uni_vminps(vmm_src, vmm_src, h->ptr[p_table + 0*vlen]);
-}
-
-template <cpu_isa_t isa>
-void jit_uni_eltwise_injector_f32<isa>::clamp_compute_vector(
-        const Vmm &vmm_src) {
-    h->uni_vmaxps(vmm_src, vmm_src, h->ptr[p_table + 1*vlen]);
-    h->uni_vminps(vmm_src, vmm_src, h->ptr[p_table + 0*vlen]);
+    h->uni_vmaxps(vmm_src, vmm_src, table_val(1));
+    h->uni_vminps(vmm_src, vmm_src, table_val(0));
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::soft_relu_compute_vector(
         const Vmm &vmm_src) {
-    const unsigned char _op_floor = 1;
     // duplicate src
     h->uni_vmovups(vmm_aux2, vmm_src);
 
-    h->uni_vminps(vmm_src, vmm_src, h->ptr[p_table + 24 * vlen]);
-    h->uni_vmaxps(vmm_src, vmm_src, h->ptr[p_table + 25 * vlen]);
+    h->uni_vminps(vmm_src, vmm_src, table_val(24));
+    h->uni_vmaxps(vmm_src, vmm_src, table_val(25));
     h->uni_vmovups(vmm_aux1, vmm_src);
     // calculate exp(x)
     // fx = x * log2ef + 0.5
-    h->uni_vmulps(vmm_src, vmm_src, h->ptr[p_table + 2 * vlen]);
-    h->uni_vaddps(vmm_src, vmm_src, h->ptr[p_table + 1 * vlen]);
+    h->uni_vmulps(vmm_src, vmm_src, table_val(2));
+    h->uni_vaddps(vmm_src, vmm_src, table_val(1));
 
     // tmp = floorf(fx)
     if (isa == avx512_common) {
         h->vcvtps2dq(vmm_aux0 | h->T_rd_sae, vmm_src);
         h->vcvtdq2ps(vmm_aux0, vmm_aux0);
 
-        unsigned char _cmp_gt_os = 14;
-        h->vcmpps(k_mask, vmm_aux0, vmm_src, _cmp_gt_os);
-        h->vmovups(vmm_aux3 | k_mask | h->T_z, h->ptr[p_table + 0 * vlen]);
+        h->vcmpps(k_mask, vmm_aux0, vmm_src, _cmp_nle_us);
+        h->vmovups(vmm_aux3 | k_mask | h->T_z, table_val(0));
 
         h->vsubps(vmm_aux0, vmm_aux0, vmm_aux3);
     } else {
@@ -361,32 +442,32 @@ void jit_uni_eltwise_injector_f32<isa>::soft_relu_compute_vector(
     // keep fx for further computations
     h->uni_vmovups(vmm_src, vmm_aux0); //vmm_src = fx
     // calculation fx * ln2
-    h->uni_vmulps(vmm_aux0, vmm_aux0, h->ptr[p_table + 3 * vlen]);
+    h->uni_vmulps(vmm_aux0, vmm_aux0, table_val(3));
     // x = x - fx * ln2
     h->uni_vsubps(vmm_aux1, vmm_aux1, vmm_aux0);
     // y = p5
-    h->uni_vmovups(vmm_aux3, h->ptr[p_table + 22 * vlen]);
+    h->uni_vmovups(vmm_aux3, table_val(22));
     // y = y * x + p4
-    h->uni_vfmadd213ps(vmm_aux3, vmm_aux1, h->ptr[p_table + 21 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux3, vmm_aux1, table_val(21));
     // y = y * x + p3
-    h->uni_vfmadd213ps(vmm_aux3, vmm_aux1, h->ptr[p_table + 20 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux3, vmm_aux1, table_val(20));
     // y = y * x + p2
-    h->uni_vfmadd213ps(vmm_aux3, vmm_aux1, h->ptr[p_table + 19 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux3, vmm_aux1, table_val(19));
     // y = y * x + p1
-    h->uni_vfmadd213ps(vmm_aux3, vmm_aux1, h->ptr[p_table + 0 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux3, vmm_aux1, table_val(0));
     // y = y * x + p0
-    h->uni_vfmadd213ps(vmm_aux3, vmm_aux1, h->ptr[p_table + 17 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux3, vmm_aux1, table_val(17));
 
     // compute 2^(-n)
     if (isa == avx512_common) {
-        h->vmulps(vmm_aux1, vmm_src, h->ptr[p_table + 23 * vlen]);
+        h->vmulps(vmm_aux1, vmm_src, table_val(23));
         h->vcvtps2dq(vmm_aux1, vmm_aux1);
     } else {
         h->uni_vcvtps2dq(vmm_aux1, vmm_src);
-        h->uni_vpsignd(vmm_aux1, vmm_aux1, h->ptr[p_table + 23 * vlen]);
+        h->uni_vpsignd(vmm_aux1, vmm_aux1, table_val(23));
     }
 
-    h->uni_vpaddd(vmm_aux1, vmm_aux1, h->ptr[p_table + 4 * vlen]);
+    h->uni_vpaddd(vmm_aux1, vmm_aux1, table_val(4));
     h->uni_vpslld(vmm_aux1, vmm_aux1, 23); //vmm_aux1 = 2^-fx
     // calculate ln(1 + y)
     h->uni_vaddps(vmm_aux3, vmm_aux3, vmm_aux1);
@@ -396,46 +477,45 @@ void jit_uni_eltwise_injector_f32<isa>::soft_relu_compute_vector(
     h->uni_vpsrld(vmm_src, vmm_src, 23);
     h->uni_vcvtdq2ps(vmm_src, vmm_src);
     // got n. where n is x = 2^n * y. y = 0.5 .. 1
-    h->uni_vsubps(vmm_src, vmm_src, h->ptr[p_table + 5 * vlen]);
+    h->uni_vsubps(vmm_src, vmm_src, table_val(5));
 
-    h->uni_vandps(vmm_aux3, vmm_aux3, h->ptr[p_table + 6 * vlen]);
+    h->uni_vandps(vmm_aux3, vmm_aux3, table_val(6));
     // got y. (mantisa)  0.5 < y < 1
-    h->uni_vorps(vmm_aux3, vmm_aux3, h->ptr[p_table + 7 * vlen]);
+    h->uni_vorps(vmm_aux3, vmm_aux3, table_val(7));
     // y  = y - 1
-    h->uni_vsubps(vmm_aux3, vmm_aux3, h->ptr[p_table + 0 * vlen]);
+    h->uni_vsubps(vmm_aux3, vmm_aux3, table_val(0));
     // y = p8
-    h->uni_vmovups(vmm_aux1, h->ptr[p_table + 16 * vlen]);
+    h->uni_vmovups(vmm_aux1, table_val(16));
     // y = y * x + p7
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, h->ptr[p_table + 15 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val(15));
     // y = y * x + p6
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, h->ptr[p_table + 14 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val(14));
     // y = y * x + p5
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, h->ptr[p_table + 13 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val(13));
     // y = y * x + p4
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, h->ptr[p_table + 12 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val(12));
     // y = y * x + p3
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, h->ptr[p_table + 11 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val(11));
     // y = y * x + p2
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, h->ptr[p_table + 10 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val(10));
     // y = y * x + p1
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, h->ptr[p_table + 9 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val(9));
     // y = y * x + p0 ; p0 = 0
-    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, h->ptr[p_table + 8 * vlen]);
+    h->uni_vfmadd213ps(vmm_aux1, vmm_aux3, table_val(8));
     //calculate ln(2) * n
-    h->uni_vmulps(vmm_src, vmm_src, h->ptr[p_table + 3 * vlen]);
+    h->uni_vmulps(vmm_src, vmm_src, table_val(3));
     h->uni_vaddps(vmm_aux1, vmm_aux1, vmm_src);
     h->uni_vaddps(vmm_aux1, vmm_aux1, vmm_aux0);
 
     // get vmm_mask = src > max logf
     h->uni_vmovups(vmm_mask, vmm_aux2);
     if (isa == avx512_common) {
-        unsigned char _cmp_gt_os = 6;
         // y = (x < max log f) ? soft_relu(x) : x
-        h->vcmpps(k_mask, vmm_mask, h->ptr[p_table + 24 * vlen], _cmp_gt_os);
+        h->vcmpps(k_mask, vmm_mask, table_val(24), _cmp_nle_us);
         h->vblendmps(vmm_aux1 | k_mask, vmm_aux1, vmm_aux2);
     } else {
         // y = (x < max log f) ? soft_relu(x) : x
-        h->uni_vcmpgtps(vmm_mask, vmm_mask, h->ptr[p_table + 24 * vlen]);
+        h->uni_vcmpgtps(vmm_mask, vmm_mask, table_val(24));
         h->uni_vblendvps(vmm_aux1, vmm_aux1, vmm_aux2, vmm_mask);
     }
 
@@ -445,23 +525,46 @@ void jit_uni_eltwise_injector_f32<isa>::soft_relu_compute_vector(
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::logistic_compute_vector(
         const Vmm &vmm_src) {
+    // we store the original sign and make x negative
+    // IMPORTANT: we assume vmm_aux0 to be xmm0, as for sse4.2 path it is required
+    // IMPORTANT: we use vmm_aux2 for the mask as exp_compute does not use it.
+    h->uni_vmovups(vmm_aux2, vmm_src);
+    h->uni_vandps(vmm_aux2, vmm_aux2, table_val(12));
+    h->uni_vorps(vmm_src, vmm_src, table_val(12));
+
     exp_compute_vector(vmm_src);
     // dup exp(x)
-    h->uni_vmovups(vmm_aux0, vmm_src);
+    h->uni_vmovups(vmm_aux1, vmm_src);
     // (exp(x) + 1)
-    h->uni_vaddps(vmm_aux0, vmm_aux0, h->ptr[p_table + 0 * vlen]);
+    h->uni_vaddps(vmm_aux1, vmm_aux1, table_val(0));
     // y = exp(x) / (exp(x) + 1)
-    h->uni_vdivps(vmm_src, vmm_src, vmm_aux0);
+    h->uni_vdivps(vmm_src, vmm_src, vmm_aux1);
+
+    // Now we have to apply the "symmetry" based on original sign
+    h->uni_vmovups(vmm_aux3, table_val(0));
+    h->uni_vsubps(vmm_aux3, vmm_aux3, vmm_src);
+    if (isa == avx512_common) {
+        h->vptestmd(k_mask, vmm_aux2, vmm_aux2);
+        h->vblendmps(vmm_aux3 | k_mask, vmm_aux3, vmm_src);
+    } else {
+        h->uni_vmovups(vmm_aux0, vmm_aux2);// The mask should be xmm0 for sse4.2
+        h->uni_vblendvps(vmm_aux3, vmm_aux3, vmm_src, vmm_aux0);
+    }
+    h->uni_vmovups(vmm_src, vmm_aux3);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_eltwise_injector_f32<isa>::clamp_compute_vector(
+        const Vmm &vmm_src) {
+    // compute clamp */
+    h->uni_vmaxps(vmm_src, vmm_src, table_val(1));
+    h->uni_vminps(vmm_src, vmm_src, table_val(0));
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::relu_prepare_table() {
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(float2int(alpha));
-    }
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(0);
-    }
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(float2int(alpha_));
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(0);
 }
 
 template <cpu_isa_t isa>
@@ -479,20 +582,28 @@ void jit_uni_eltwise_injector_f32<isa>::elu_prepare_table() {
             0x3d2bb1b1, // [8] p4 = 0.041917507f
             0x3c091ec1, // [9] p5 = 0.008369149f
             0x42b0c0a5, //[10] max logf = 88.3762589f
-            0xc1766666  //[11] min logf = -14.5f
+            0xc1766666, //[11] min logf = -14.5f
+            // tanh(x) constants,
+            0x80000000, //[12] mask to extract sign
+            0x39ddb3d7, //[13] arg below which tanh(x) = x
+            0x3f0c9f54, //[14] arg below which pol approx is valid
+            0x41102cb4, //[15] arg after which tanh(x) = 1
+            0xc0000000, //[16] -2.0f
+            0x7fffffff, //[17] mask to make positive
+            // tanh pol approx
+            0x3f7fffff, //[18] p0
+            0xbeaaa9cf, //[19] p1
+            0x3e085f1f, //[20] p2
+            0xbd572bda, //[21] p3
+            0x3c84fd08, //[22] p4
     };
 
     for (size_t i = 0; i < sizeof(cvals) / sizeof(cvals[0]); ++i) {
-        for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-            h->dd(cvals[i]);
-        }
-    }
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(float2int(alpha));
-    }
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(0);
+        for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(cvals[i]);
     }
+
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(float2int(alpha_));
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(0);
 }
 
 template <cpu_isa_t isa>
@@ -537,63 +648,48 @@ void jit_uni_eltwise_injector_f32<isa>::soft_relu_prepare_table() {
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::abs_prepare_table() {
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(0x7fffffff);
-    }
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(0x7fffffff);
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::sqrt_prepare_table() {
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(0);
-    }
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(0);
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::linear_prepare_table() {
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(float2int(alpha));
-    }
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(float2int(beta));
-    }
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(float2int(alpha_));
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(float2int(beta_));
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::bounded_relu_prepare_table() {
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(float2int(alpha));
-    }
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(0);
-    }
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(float2int(alpha_));
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(0);
 }
 
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::clamp_prepare_table() {
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(float2int(alpha));
-    }
-    for (size_t d = 0; d < vlen / sizeof(float); ++d) {
-        h->dd(float2int(beta));
-    }
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(float2int(alpha_));
+    for (size_t d = 0; d < vlen / sizeof(float); ++d) h->dd(float2int(beta_));
 }
 
 template <cpu_isa_t isa>
-int jit_uni_eltwise_injector_f32<isa>::aux_vecs_count(alg_kind_t elt_alg) {
-    switch (elt_alg) {
-        case alg_kind::eltwise_relu: return (alpha == 0.f) ? 0 : 2;
-        case alg_kind::eltwise_elu: return 4;
-        case alg_kind::eltwise_tanh: return 4;
-        case alg_kind::eltwise_square: return 0;
-        case alg_kind::eltwise_abs: return 0;
-        case alg_kind::eltwise_sqrt: return 2;
-        case alg_kind::eltwise_linear: return 1;
-        case alg_kind::eltwise_bounded_relu: return 0;
-        case alg_kind::eltwise_soft_relu: return 4;
-        case alg_kind::eltwise_logistic: return 4;
-        case alg_kind::eltwise_clamp: return 0;
-        default: assert(!"unsupported eltwise algorithm");
+int jit_uni_eltwise_injector_f32<isa>::aux_vecs_count(alg_kind_t alg_) {
+    switch (alg_) {
+    case alg_kind::eltwise_relu: return (alpha_ == 0.f) ? 0 : 2;
+    case alg_kind::eltwise_elu: return 4;
+    case alg_kind::eltwise_tanh: return 5;
+    case alg_kind::eltwise_square: return 0;
+    case alg_kind::eltwise_abs: return 0;
+    case alg_kind::eltwise_sqrt: return 2;
+    case alg_kind::eltwise_linear: return 1;
+    case alg_kind::eltwise_bounded_relu: return 0;
+    case alg_kind::eltwise_soft_relu: return 4;
+    case alg_kind::eltwise_logistic: return 4;
+    case alg_kind::eltwise_clamp: return 0;
+    case alg_kind::eltwise_exp: return 4;
+    default: assert(!"unsupported eltwise algorithm");
     }
 
     return 0;
@@ -602,37 +698,25 @@ int jit_uni_eltwise_injector_f32<isa>::aux_vecs_count(alg_kind_t elt_alg) {
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::compute_body(size_t start_idx,
         size_t end_idx) {
-    h->mov(p_table, l_table);
-
+    using namespace alg_kind;
     for (size_t idx = start_idx; idx < end_idx; idx++) {
-        switch (elt_alg) {
-            case alg_kind::eltwise_relu:
-                if (alpha == 0.f)
-                    relu_zero_ns_compute_vector(Vmm(idx));
-                else
-                    relu_compute_vector(Vmm(idx));
-                break;
-            case alg_kind::eltwise_elu:
-                elu_compute_vector(Vmm(idx)); break;
-            case alg_kind::eltwise_tanh:
-                tanh_compute_vector(Vmm(idx)); break;
-            case alg_kind::eltwise_square:
-                square_compute_vector(Vmm(idx)); break;
-            case alg_kind::eltwise_abs:
-                abs_compute_vector(Vmm(idx)); break;
-            case alg_kind::eltwise_sqrt:
-                sqrt_compute_vector(Vmm(idx)); break;
-            case alg_kind::eltwise_linear:
-                linear_compute_vector(Vmm(idx)); break;
-            case alg_kind::eltwise_bounded_relu:
-                bounded_relu_compute_vector(Vmm(idx)); break;
-            case alg_kind::eltwise_soft_relu:
-                soft_relu_compute_vector(Vmm(idx)); break;
-            case alg_kind::eltwise_logistic:
-                logistic_compute_vector(Vmm(idx)); break;
-            case alg_kind::eltwise_clamp:
-                clamp_compute_vector(Vmm(idx)); break;
-            default: assert(!"unsupported eltwise algorithm");
+        switch (alg_) {
+        case eltwise_relu:
+            if (alpha_ == 0.f) relu_zero_ns_compute_vector(Vmm(idx));
+            else relu_compute_vector(Vmm(idx));
+            break;
+        case eltwise_elu: elu_compute_vector(Vmm(idx)); break;
+        case eltwise_tanh: tanh_compute_vector(Vmm(idx)); break;
+        case eltwise_square: square_compute_vector(Vmm(idx)); break;
+        case eltwise_abs: abs_compute_vector(Vmm(idx)); break;
+        case eltwise_sqrt: sqrt_compute_vector(Vmm(idx)); break;
+        case eltwise_linear: linear_compute_vector(Vmm(idx)); break;
+        case eltwise_bounded_relu: bounded_relu_compute_vector(Vmm(idx)); break;
+        case eltwise_soft_relu: soft_relu_compute_vector(Vmm(idx)); break;
+        case eltwise_logistic: logistic_compute_vector(Vmm(idx)); break;
+        case eltwise_clamp: clamp_compute_vector(Vmm(idx)); break;
+        case eltwise_exp: exp_compute_vector(Vmm(idx)); break;
+        default: assert(!"unsupported eltwise algorithm");
         }
     }
 }
@@ -640,9 +724,7 @@ void jit_uni_eltwise_injector_f32<isa>::compute_body(size_t start_idx,
 template <cpu_isa_t isa>
 void jit_uni_eltwise_injector_f32<isa>::compute_vector_range(size_t start_idx,
         size_t end_idx) {
-    assert(start_idx < vecs_count);
-    assert(end_idx <= vecs_count);
-    assert(start_idx < end_idx);
+    assert(start_idx < end_idx && end_idx <= vecs_count);
 
     injector_preamble(start_idx, end_idx);
     compute_body(start_idx_tail, end_idx);
@@ -652,38 +734,30 @@ void jit_uni_eltwise_injector_f32<isa>::compute_vector_range(size_t start_idx,
 }
 
 template <cpu_isa_t isa>
-void jit_uni_eltwise_injector_f32<isa>::compute_vector(size_t idx) {
-    compute_vector_range(idx, idx + 1);
-}
+void jit_uni_eltwise_injector_f32<isa>::prepare_table(bool gen_table) {
+    using namespace alg_kind;
 
-template <cpu_isa_t isa>
-void jit_uni_eltwise_injector_f32<isa>::prepare_table() {
     h->align(64);
     h->L(l_table);
 
-    switch (elt_alg) {
-        case alg_kind::eltwise_relu:
-            relu_prepare_table(); break;
-        case alg_kind::eltwise_elu:
-        case alg_kind::eltwise_tanh:
-        case alg_kind::eltwise_logistic:
+    if (gen_table) {
+        switch (alg_) {
+        case eltwise_relu: relu_prepare_table(); break;
+        case eltwise_elu:
+        case eltwise_tanh:
+        case eltwise_logistic:
+        case eltwise_exp:
             elu_prepare_table(); break;
-        case alg_kind::eltwise_soft_relu:
-            soft_relu_prepare_table(); break;
-        case alg_kind::eltwise_abs:
-            abs_prepare_table(); break;
-        case alg_kind::eltwise_sqrt:
-            sqrt_prepare_table(); break;
-        case alg_kind::eltwise_linear:
-            linear_prepare_table(); break;
-        case alg_kind::eltwise_bounded_relu:
-            bounded_relu_prepare_table(); break;
-        case alg_kind::eltwise_square:
-            break;
-        case alg_kind::eltwise_clamp:
-            clamp_prepare_table(); break;
+        case eltwise_soft_relu: soft_relu_prepare_table(); break;
+        case eltwise_abs: abs_prepare_table(); break;
+        case eltwise_sqrt: sqrt_prepare_table(); break;
+        case eltwise_linear: linear_prepare_table(); break;
+        case eltwise_bounded_relu: bounded_relu_prepare_table(); break;
+        case eltwise_square: break;
+        case eltwise_clamp: clamp_prepare_table(); break;
         default: assert(!"unsupported eltwise algorithm");
     }
+    }
 }
 
 template struct jit_uni_eltwise_injector_f32<avx512_common>;
@@ -861,27 +935,27 @@ struct jit_uni_kernel_fwd_f32: public jit_uni_eltwise_kernel_f32,
     jit_uni_kernel_fwd_f32(const eltwise_desc_t &desc)
         : jit_uni_eltwise_kernel_f32(desc), jit_generator() {
 
-        eltwise_injector = new jit_uni_eltwise_injector_f32<isa>(this,
-                desc.alg_kind, desc.alpha, desc.beta, false, 9, 1);
+        eltwise_injector_ = new jit_uni_eltwise_injector_f32<isa>(this,
+                desc.alg_kind, desc.alpha, desc.beta, false, r9, Opmask(1));
 
         using namespace alg_kind;
 
         assert(is_bwd() == false);
         assert(utils::one_of(desc.alg_kind, eltwise_tanh, eltwise_elu,
                     eltwise_square, eltwise_abs, eltwise_sqrt, eltwise_linear,
-                    eltwise_bounded_relu, eltwise_soft_relu, eltwise_logistic));
+                    eltwise_bounded_relu, eltwise_soft_relu, eltwise_logistic,
+                    eltwise_clamp, eltwise_exp));
 
         preamble();
 
-        Label vectorized_loop_start;
-        Label reminder_loop_start;
-        Label vectorized_loop_end;
-        Label reminder_loop_end;
-
         Reg64 param = abi_param1;
         mov(reg_from, ptr[param + GET_OFF(from)]);
         mov(reg_to, ptr[param + GET_OFF(to)]);
         mov(reg_work_amount, ptr[param + GET_OFF(work_amount)]);
+        eltwise_injector_->load_table_addr();
+
+        Label reminder_loop_start, reminder_loop_end;
+        Label vectorized_loop_start, vectorized_loop_end;
 
         cmp(reg_work_amount, simd_w);
         jl(reminder_loop_start, T_NEAR);
@@ -889,7 +963,7 @@ struct jit_uni_kernel_fwd_f32: public jit_uni_eltwise_kernel_f32,
         L(vectorized_loop_start);
 
         uni_vmovups(vmm_src, ptr[reg_from]);
-        eltwise_injector->compute_vector(vmm_src.getIdx());
+        eltwise_injector_->compute_vector(vmm_src.getIdx());
         uni_vmovups(ptr[reg_to], vmm_src);
 
         add(reg_from, vlen);
@@ -907,7 +981,7 @@ struct jit_uni_kernel_fwd_f32: public jit_uni_eltwise_kernel_f32,
         jle(reminder_loop_end, T_NEAR);
 
         movss(xmm_src, ptr[reg_from]);
-        eltwise_injector->compute_vector(xmm_src.getIdx());
+        eltwise_injector_->compute_vector(xmm_src.getIdx());
         movss(ptr[reg_to], xmm_src);
 
         add(reg_from, sizeof(float));
@@ -920,14 +994,12 @@ struct jit_uni_kernel_fwd_f32: public jit_uni_eltwise_kernel_f32,
 
         postamble();
 
-        eltwise_injector->prepare_table();
+        eltwise_injector_->prepare_table();
 
         ker_ = (decltype(ker_))this->getCode();
     }
 
-    ~jit_uni_kernel_fwd_f32() {
-        delete eltwise_injector;
-    }
+    ~jit_uni_kernel_fwd_f32() { delete eltwise_injector_; }
 
 private:
     using Vmm = typename utils::conditional3<isa == sse42, Xmm,
@@ -944,7 +1016,7 @@ private:
     Xmm xmm_src = Xmm(1);
     Vmm vmm_src = Vmm(1);
 
-    jit_uni_eltwise_injector_f32<isa>* eltwise_injector;
+    jit_uni_eltwise_injector_f32<isa> *eltwise_injector_;
 };
 
 } /* namespace */
@@ -959,23 +1031,23 @@ status_t jit_uni_eltwise_fwd_t<isa>::pd_t::init() {
                 prop_kind::forward_inference)
         && utils::everyone_is(data_type::f32, desc()->data_desc.data_type)
         && !has_zero_dim_memory()
-        && IMPLICATION(isa > avx2, utils::one_of(desc()->alg_kind,
-                eltwise_relu, eltwise_elu))
-        && IMPLICATION(isa == sse42 || isa == avx2, utils::one_of(
-                    desc()->alg_kind, eltwise_relu, eltwise_tanh, eltwise_elu,
-                    eltwise_square, eltwise_abs, eltwise_sqrt, eltwise_linear,
-                    eltwise_bounded_relu, eltwise_soft_relu, eltwise_logistic))
-        && memory_desc_wrapper(src_pd()).is_dense()
+        && utils::one_of(desc()->alg_kind, eltwise_relu, eltwise_tanh,
+                eltwise_elu, eltwise_square, eltwise_abs, eltwise_sqrt,
+                eltwise_linear, eltwise_bounded_relu, eltwise_soft_relu,
+                eltwise_logistic, eltwise_clamp, eltwise_exp)
+        && memory_desc_wrapper(src_pd()).is_dense(true)
+        && IMPLICATION(!memory_desc_wrapper(src_pd()).is_dense(false),
+                math::eltwise_fwd_preserves_zero(desc()->alg_kind, true))
         && attr()->has_default_values();
 
     return ok ? status::success : status::unimplemented;
 }
 
 template <cpu_isa_t isa>
-jit_uni_eltwise_fwd_t<isa>::jit_uni_eltwise_fwd_t(const pd_t *pd,
+jit_uni_eltwise_fwd_t<isa>::jit_uni_eltwise_fwd_t(const pd_t *apd,
         const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), kernel_(nullptr) {
-    const auto &desc = *conf_.desc();
+    : cpu_primitive_t(apd, inputs, outputs), kernel_(nullptr) {
+    const auto &desc = *pd()->desc();
     switch (desc.alg_kind) {
     case alg_kind::eltwise_relu:
         kernel_ = new jit_uni_relu_kernel_f32<isa>(desc); break;
@@ -989,13 +1061,13 @@ jit_uni_eltwise_fwd_t<isa>::~jit_uni_eltwise_fwd_t()
 { delete kernel_; }
 
 template <cpu_isa_t isa>
-void jit_uni_eltwise_fwd_t<isa>::execute_forward() {
+void jit_uni_eltwise_fwd_t<isa>::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t *>(this->memory(0));
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
 
-    const size_t nelems = data_d.nelems();
+    const size_t nelems = data_d.nelems(true);
 
     src += data_d.blocking_desc().offset_padding;
     dst += data_d.blocking_desc().offset_padding;
@@ -1037,10 +1109,10 @@ status_t jit_uni_eltwise_bwd_t<isa>::pd_t::init() {
 }
 
 template <cpu_isa_t isa>
-jit_uni_eltwise_bwd_t<isa>::jit_uni_eltwise_bwd_t(const pd_t *pd,
+jit_uni_eltwise_bwd_t<isa>::jit_uni_eltwise_bwd_t(const pd_t *apd,
         const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), kernel_(nullptr) {
-    const auto &desc = *conf_.desc();
+    : cpu_primitive_t(apd, inputs, outputs), kernel_(nullptr) {
+    const auto &desc = *pd()->desc();
     switch (desc.alg_kind) {
     case alg_kind::eltwise_relu:
         kernel_ = new jit_uni_relu_kernel_f32<isa>(desc); break;
@@ -1053,13 +1125,13 @@ jit_uni_eltwise_bwd_t<isa>::~jit_uni_eltwise_bwd_t()
 { delete kernel_; }
 
 template <cpu_isa_t isa>
-void jit_uni_eltwise_bwd_t<isa>::execute_backward() {
+void jit_uni_eltwise_bwd_t<isa>::execute_backward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t *>(this->memory(0));
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
-    const memory_desc_wrapper diff_data_d(conf_.diff_src_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
+    const memory_desc_wrapper diff_data_d(pd()->diff_src_pd());
 
     const size_t nelems = data_d.nelems();
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_eltwise.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_eltwise.hpp
index 063556dbd..1acc2390d 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_eltwise.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_eltwise.hpp
@@ -18,7 +18,6 @@
 #define CPU_JIT_UNI_ELTWISE_HPP
 
 #include <assert.h>
-#include <mkldnn.hpp>
 
 #include "c_types_map.hpp"
 #include "cpu_eltwise_pd.hpp"
@@ -33,45 +32,57 @@ namespace cpu {
 
 template <cpu_isa_t isa>
 struct jit_uni_eltwise_injector_f32 {
-    jit_uni_eltwise_injector_f32(jit_generator* host, alg_kind_t elt_alg_,
-            float alpha_, float beta_, bool save_vecs_state_ = true,
-            int table_reg_idx_ = 0, int opmask_idx_ = 1) {
+    using Vmm = typename utils::conditional3<isa == sse42, Xbyak::Xmm,
+            isa == avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
+
+    jit_uni_eltwise_injector_f32(jit_generator *host, alg_kind_t alg,
+            float alpha, float beta, bool save_state = true,
+            Xbyak::Reg64 p_table = Xbyak::util::rax,
+            Xbyak::Opmask k_mask = Xbyak::Opmask(1))
+        : alg_(alg), alpha_(alpha), beta_(beta), h(host)
+        , save_state_(save_state), p_table(p_table), k_mask(k_mask)
+    {
+        using namespace alg_kind;
         assert(utils::one_of(isa, sse42, avx2, avx512_common));
-        assert(utils::one_of(elt_alg_, alg_kind::eltwise_relu,
-                alg_kind::eltwise_tanh, alg_kind::eltwise_elu,
-                alg_kind::eltwise_square, alg_kind::eltwise_abs,
-                alg_kind::eltwise_sqrt, alg_kind::eltwise_linear,
-                alg_kind::eltwise_bounded_relu, alg_kind::eltwise_soft_relu,
-                alg_kind::eltwise_logistic, alg_kind::eltwise_clamp));
-
-        h = host;
-        elt_alg = elt_alg_;
-        alpha = alpha_;
-        beta = beta_;
-        save_vecs_state = save_vecs_state_;
-        table_reg_idx = table_reg_idx_;
-        opmask_idx = opmask_idx_;
+        assert(utils::one_of(alg_, eltwise_relu, eltwise_tanh, eltwise_elu,
+                    eltwise_square, eltwise_abs, eltwise_sqrt, eltwise_linear,
+                    eltwise_bounded_relu, eltwise_soft_relu, eltwise_logistic,
+                    eltwise_clamp, eltwise_exp));
     }
 
+    // note that eltwise.scale is ignored
+    jit_uni_eltwise_injector_f32(jit_generator *host,
+            const post_ops_t::entry_t::eltwise_t &eltwise,
+            bool save_state = true, Xbyak::Reg64 p_table = Xbyak::util::rax,
+            Xbyak::Opmask k_mask = Xbyak::Opmask(1))
+        : jit_uni_eltwise_injector_f32(host, eltwise.alg, eltwise.alpha,
+                eltwise.beta, save_state, p_table, k_mask) {}
+
     void compute_vector_range(size_t start_idx, size_t end_idx);
-    void compute_vector(size_t idx);
-    void prepare_table();
+    void compute_vector(size_t idx) { compute_vector_range(idx, idx + 1); }
+    void prepare_table(bool gen_table=true);
+    void load_table_addr() { h->mov(p_table, l_table); }
 
-private:
-    jit_generator* h;
+    const alg_kind_t alg_;
+    const float alpha_;
+    const float beta_;
 
-    using Vmm = typename utils::conditional3<isa == sse42, Xbyak::Xmm,
-            isa == avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
+    jit_generator * const h;
 
-    size_t vlen = cpu_isa_traits<isa>::vlen;
+    const bool save_state_;
+    const Xbyak::Reg64 p_table;
+    const Xbyak::Opmask k_mask;
+    Xbyak::Label l_table;
 
-    alg_kind_t elt_alg;
-    float alpha;
-    float beta;
+private:
+    // if only the injector was inherited from jit_generator...
+    enum {
+        _cmp_le_os = jit_generator::_cmp_le_os,
+        _cmp_nle_us = jit_generator::_cmp_nle_us,
+        _op_floor = jit_generator::_op_floor,
+    };
 
-    bool save_vecs_state;
-    int table_reg_idx;
-    int opmask_idx;
+    size_t vlen = cpu_isa_traits<isa>::vlen;
 
     const static size_t preserved_vecs_max = 5;
 
@@ -81,20 +92,17 @@ private:
     size_t preserved_vec_idxs[preserved_vecs_max] = {0};
     size_t start_idx_tail = 0;
 
-    Vmm vmm_mask, vmm_aux0, vmm_aux1, vmm_aux2, vmm_aux3;
-
-    Xbyak::Reg64 p_table;
-    Xbyak::Opmask k_mask;
-    Xbyak::Label l_table;
+    Vmm vmm_mask, vmm_aux0, vmm_aux1, vmm_aux2, vmm_aux3, vmm_aux4;
 
-    int aux_vecs_count(alg_kind_t elt_alg);
+    Xbyak::Address table_val(int index)
+    { return h->ptr[p_table + index * vlen]; }
 
+    int aux_vecs_count(alg_kind_t alg);
     void compute_body(size_t start_idx, size_t end_idx);
     void injector_preamble(size_t start_idx, size_t end_idx);
     void injector_preamble_tail(size_t start_idx);
     void injector_postamble();
     void assign_regs();
-    bool is_free_vec(size_t idx);
 
     void exp_compute_vector(const Vmm &vmm_src);
     void relu_compute_vector(const Vmm &vmm_src);
@@ -137,21 +145,21 @@ struct jit_uni_eltwise_fwd_t : public cpu_primitive_t {
         virtual status_t init() override;
     };
 
-    jit_uni_eltwise_fwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_uni_eltwise_fwd_t(const pd_t *apd, const input_vector &inputs,
                        const output_vector &outputs);
     ~jit_uni_eltwise_fwd_t();
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_uni_eltwise_kernel_f32 *kernel_;
 };
 
@@ -170,21 +178,21 @@ struct jit_uni_eltwise_bwd_t : public cpu_primitive_t {
         virtual status_t init() override;
     };
 
-    jit_uni_eltwise_bwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_uni_eltwise_bwd_t(const pd_t *apd, const input_vector &inputs,
                        const output_vector &outputs);
     ~jit_uni_eltwise_bwd_t();
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e)
+    virtual void execute(event_t *e) const
     {
         execute_backward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward();
-    pd_t conf_;
+    void execute_backward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_uni_eltwise_kernel_f32 *kernel_;
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_i8i8_pooling.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_i8i8_pooling.cpp
index ccc1c343d..8f93163ef 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_i8i8_pooling.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_i8i8_pooling.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018 Intel Corporation
+* Copyright 2017-2018 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
 * limitations under the License.
 *******************************************************************************/
 
+#include "jit_uni_i8i8_pooling.hpp"
+
 #include <math.h>
 
 #include "mkldnn_types.h"
@@ -23,7 +25,6 @@
 
 #include "jit_generator.hpp"
 
-#include "jit_uni_i8i8_pooling.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -37,20 +38,34 @@ using namespace mkldnn::impl::utils;
 using namespace mkldnn::impl::types;
 using namespace alg_kind;
 
-struct call_params_t {
-    const char *src_i8;
-    const char *dst_i8;
-    size_t kw_range;
-    size_t kh_range;
-    float idivider;
-};
-
 template <cpu_isa_t isa>
-struct jit_uni_i8i8_pool_fwd_ker_t : public jit_generator {
-    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_i8i8_pool_fwd_ker_t)
-
+struct jit_uni_i8i8_pooling_fwd_ker_t: public jit_generator {
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_i8i8_pooling_fwd_ker_t)
+
+    struct call_params_t {
+        const char *src_i8;
+        const char *dst_i8;
+        size_t kw_range;
+        size_t kh_range;
+        float idivider;
+    };
+
+    using Vmm = typename cpu_isa_traits<isa>::Vmm;
+    Xmm xreg(int idx) const { return Xmm(idx); }
+    Ymm yreg(int idx) const { return Ymm(xreg(idx).getIdx()); }
+    Vmm vreg(int idx) const { return Vmm(xreg(idx).getIdx()); }
+
+    // Rounding modes for axv2
+    enum:uint8_t { rnd_op_nearest = 0x0 };
+
+    // In case of avx2 with data type i8 we need to use
+    // maskmovdqu instruction which has its destination hardcoded in rdi.
+    // Windows ABI: abi_param1 is rcx - nothing to do else
+    // Unix ABI: abi_param1 is rdi - copy it to rcx and use it as abi_param1
+    Reg64 reg_param      = rcx; // Our "unified abi_param1"
     Reg64 reg_ptr_src_i8 = r8;
     Reg64 reg_ptr_dst_i8 = r9;
+    Reg64 reg_ptr_maskmovdqu_dst = rdi; // store destination - must be rdi
 
     Reg64 ki = r10;
     Reg64 kj = r11;
@@ -62,73 +77,70 @@ struct jit_uni_i8i8_pool_fwd_ker_t : public jit_generator {
     Reg64 aux_reg_src_w = rbx;
 
     Reg64 reg_tmp = rdx;
-    Reg64 reg_src_64 = r15;
-    Reg32 reg_src_32 = r15d;
-    Reg8 reg_src_8 = r15b;
 
-    size_t sizeof_src_dt() const { return data_type_size(jpp.src_dt); }
-    size_t sizeof_dst_dt() const { return data_type_size(jpp.dst_dt); }
+    Reg64 reg_mask = r15;
 
-    using Vmm = typename utils::conditional3<isa == sse42, Xbyak::Xmm,
-            isa == avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
+    Opmask k_cmp_mask = Opmask(7);
 
-    Xmm xmm_tmp = Xmm(0);
-    Vmm vreg_tmp = Vmm(14);
-    Vmm vreg_zeros = Vmm(15);
-
-    /* max pooling */
-    Vmm vmm_src(int jj, int ii) {
-        return Vmm(2*jj + ii);
+    Opmask mask(int idx) {
+        return Opmask(6 - idx);
     }
 
-    Xmm xmm_src(int jj) {
-        return Xmm(2*jj);
-    }
+    // ref to any of XYZ-regs via xreg/yreg/vreg functions
+    Xmm xmm_tmp = xreg(0);     // temp to init vreg_tmp
+    Vmm vreg_tmp = vreg(0);    // max pooling : holds minimum values for data_type
+    Vmm vreg_zeros = vreg(1);
 
-    Vmm vmm_dst(int jj, int ii) {
-        return Vmm(2*jj + ii + 2 * jpp.ur_c);
-    }
+    // only in case of <isa> == avx2
+    Vmm vreg_mask    = vreg(2); // full byte-mask
+    Xmm xreg_mask_lo = xreg(2); // low 128-bits part of byte-mask (alias for xmm part of vreg_mask)
+    Xmm xreg_mask_hi = xreg(3); // "max" - high 128-bits part of byte-mask (stored separately)
+    Xmm xreg_mask_q  = xreg(3); // "avg" - 1/4 part of the mask for s8/u8 operations
+    Vmm vreg_mask_q  = vreg(3); // "avg" - 1/4 part for non-zero tails
 
-    Xmm xmm_dst(int jj) {
-        return Xmm(2*jj + 2 * jpp.ur_c);
-    }
+    enum:int {vidx_base = isa == avx2 ? 4 : 2};
+    Vmm base_vr(int idx) const { return vreg(vidx_base + idx); }
 
-    /* avg pooling */
-    Vmm vmm_src_s32(int jj, int ii) {
-        return Vmm(2*jj + ii);
-    }
-
-    Xmm xmm_src_s32(int jj, int ii) {
-        return Xmm(2*jj + ii);
-    }
-
-    Vmm vmm_dst_s32(int jj, int ii) {
-        return Vmm(2*jj + ii + 2 * jpp.ur_c);
-    }
-
-    Ymm ymm_dst_s32(int jj, int ii) {
-        return Ymm(2*jj + ii + 2 * jpp.ur_c);
-    }
+    size_t sizeof_src_dt() const { return data_type_size(jpp.src_dt); }
+    size_t sizeof_dst_dt() const { return data_type_size(jpp.dst_dt); }
 
-    Xmm xmm_dst_s32(int jj, int ii) {
-        return Xmm(2*jj + ii + 2 * jpp.ur_c);
-    }
+    /* max pooling */
+    Vmm vreg_src(int idx) const { return base_vr(idx); }            // [0    .. ur_c-1]
+    Vmm vreg_dst(int idx) const { return base_vr(jpp.ur_c + idx); } // [ur_c .. 2*ur_c-1]
 
-    Vmm vmm_dst_f32(int jj, int ii) {
-        return Vmm(2*jj + ii + 4 * jpp.ur_c);
-    }
+    /* avg pooling */
+    // s32 used for processing of s8/u8 data
+    // thus we need to take into account ratio of sizes s32/i8 = 4
+    static constexpr data_type_t avg_proc_dt = data_type::s32;
+    enum:int {
+        s32_to_i8_ratio = sizeof(typename prec_traits<avg_proc_dt>::type)
+                / sizeof(typename prec_traits<data_type::u8>::type),
+        max_num_ll =  s32_to_i8_ratio
+    };
+    Vmm vreg_src_s32(int jj, int ll) { return base_vr(3*max_num_ll*jj + ll + 0*max_num_ll); }  // ll: 0..4 [0..3]
+    Vmm vreg_dst_s32(int jj, int ll) { return base_vr(3*max_num_ll*jj + ll + 1*max_num_ll); }  // ll: 0..4 [4..7]
+    Vmm vreg_dst_f32(int jj, int ll) { return base_vr(3*max_num_ll*jj + ll + 2*max_num_ll); }  // ll: 0..4 [8..11]
 
     void (*ker_)(const call_params_t *);
     jit_pool_conf_t jpp;
 
     void init_tmp_reg();
+    void init_mask();
+
+    void load_vreg_mask_q(int ll) {};
+
+    void load_src_max_op(int jj, int ll, size_t offset, bool masked, uint64_t msk);
+    void load_src_avg_op(int jj, int ll, size_t offset, bool masked, uint64_t msk);
+    void load_src(int jj, int ll, int c_tail);
 
-    void load_src(int jj, int c_step);
-    void store_dst(int jj, int c_step);
+    void store_dst_max_op(int jj, int ll, size_t offset, bool masked, uint64_t msk);
+    void store_dst_avg_op(int jj, int ll, size_t offset, bool masked, uint64_t msk);
+    void store_dst(int jj, int ll, int c_tail);
 
-    void compute_avg_step(int ur_c, int c_step);
-    void compute_max_step(int ur_c, int c_step);
-    void compute_step(int ur_c, int c_step);
+    void compute_avg_step(int ur_c, int c_tail);
+    void compute_max_op(const int jj);
+    void compute_max_step(int ur_c, int c_tail);
+    void compute_step(int ur_c, int c_tail);
 
     void compute_c_block();
     void generate();
@@ -137,7 +149,7 @@ struct jit_uni_i8i8_pool_fwd_ker_t : public jit_generator {
         const pooling_desc_t &pd, const memory_desc_wrapper &src_d,
         const memory_desc_wrapper &dst_d);
 
-    jit_uni_i8i8_pool_fwd_ker_t(const jit_pool_conf_t &jpp_)
+    jit_uni_i8i8_pooling_fwd_ker_t(const jit_pool_conf_t &jpp_)
            : jpp(jpp_) {
         generate();
         ker_ = reinterpret_cast<decltype(ker_)>(const_cast<uint8_t*>(
@@ -145,179 +157,376 @@ struct jit_uni_i8i8_pool_fwd_ker_t : public jit_generator {
     }
 };
 
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx2>::load_vreg_mask_q(int ll) {
+
+    // extract ll-th part of mask (ll-th QWORD)
+    vpblendd(vreg_mask_q, vreg_zeros, vreg_mask, 0x3 << ll); // 0x3 - mask for 2 x DWORD
+
+    // Move mask from ll-th pos to 0-th pos
+    if (ll>0)
+        vpermq(vreg_mask_q, vreg_mask_q, ll);
+};
+
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx2>::load_src_max_op(int jj, int ll,
+        size_t offset, bool masked, uint64_t msk) {
+    using namespace data_type;
+
+    if (masked) {
+        if (jpp.src_dt == s32) {
+            vpblendd(vreg_src(jj), vreg_tmp, ptr[aux_reg_src_w + offset], static_cast<uint8_t>(msk));
+        } else {
+            vpblendvb(vreg_src(jj), vreg_tmp, ptr[aux_reg_src_w + offset], vreg_mask);
+        }
+    } else
+        vmovups(vreg_src(jj), ptr[aux_reg_src_w + offset]);
+};
+
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx512_core>::load_src_max_op(int jj, int ll,
+        size_t offset, bool masked, uint64_t msk) {
+    using namespace data_type;
+
+    if (masked) {
+        if (jpp.src_dt == s32)
+            vmovups(vreg_src(jj) | mask(0), ptr[aux_reg_src_w + offset]);
+        else
+            vmovdqu8(vreg_src(jj) | mask(0), ptr[aux_reg_src_w + offset]);
+    } else
+        vmovups(vreg_src(jj), ptr[aux_reg_src_w + offset]);
+};
+
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx2>::load_src_avg_op(int jj, int ll,
+        size_t offset, bool masked, uint64_t msk) {
+    using namespace data_type;
+
+    // Don't generate useless code
+    if (masked && !msk)
+        return;
+
+    auto load_i8 = [&](bool is_signed, const Vmm& vr_src) {
+
+        // Need to use mask of tail?
+        if (masked) {
+
+            // load ll-th part of mask into vreg_mask_q
+            load_vreg_mask_q(ll);
+
+            // Load by mask from mem into register vr_src
+            vpblendvb(vr_src, vreg_zeros, ptr[aux_reg_src_w + offset], vreg_mask_q);
+
+            // Conversion s8/u8 -> s32
+            if (is_signed)
+                vpmovsxbd(vr_src, vr_src);
+            else
+                vpmovzxbd(vr_src, vr_src);
+        } else {
+
+            // Load from mem into vr_src with conversion
+            if (is_signed)
+                vpmovsxbd(vr_src, ptr[aux_reg_src_w + offset]);
+            else
+                vpmovzxbd(vr_src, ptr[aux_reg_src_w + offset]);
+        }
+    };
+
+    switch (jpp.src_dt) {
+        case s32:
+            if (masked)
+                vpblendd(vreg_src_s32(jj, ll), vreg_zeros, ptr[aux_reg_src_w + offset],
+                    static_cast<uint8_t>(msk));
+            else
+                vmovups(vreg_src_s32(jj, ll), ptr[aux_reg_src_w + offset]);
+            break;
+        case s8:
+                load_i8(true, vreg_src_s32(jj, ll));
+            break;
+        case u8:
+                load_i8(false, vreg_src_s32(jj, ll));
+            break;
+        default: assert(!"unsupported src data type");
+    }
+};
+
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx512_core>::load_src_avg_op(int jj, int ll,
+        size_t offset, bool masked, uint64_t msk) {
+    using namespace data_type;
+
+    // Don't generate useless code
+    if (masked && !msk)
+        return;
+
+    const Vmm& vr_src = masked ?
+            vreg_src_s32(jj, ll) | mask(ll) :
+            vreg_src_s32(jj, ll);
+
+    switch (jpp.src_dt) {
+        case s32:
+            vmovups(vr_src, ptr[aux_reg_src_w + offset]);
+            break;
+        case s8:
+            vpmovsxbd(vr_src, ptr[aux_reg_src_w + offset]);
+            break;
+        case u8:
+            vpmovzxbd(vr_src, ptr[aux_reg_src_w + offset]);
+            break;
+        default: assert(!"unsupported src data type");
+    }
+};
+
 template <cpu_isa_t isa>
-void jit_uni_i8i8_pool_fwd_ker_t<isa>::load_src(int jj, int c_step) {
+void jit_uni_i8i8_pooling_fwd_ker_t<isa>::load_src(int jj, int ll, int c_tail) {
     using namespace data_type;
 
-    int repeats = isa == sse42 && c_step != 1 ? 2 : 1;
+    int c_block = jpp.c_block;
+    int ur_c = jpp.ur_c;
+
     switch (jpp.alg) {
         case pooling_max: {
-            auto offset = jj*c_step*sizeof_src_dt();
-            if (c_step == jpp.c_block) {
-                for (int ii = 0; ii < repeats; ii++)
-                    uni_vmovups(vmm_src(jj, ii), ptr[aux_reg_src_w + offset + (jpp.c_block / 2) * ii * sizeof_src_dt()]);
-            } else if (c_step == 1) {
-                if (jpp.src_dt == s32) {
-                    movsd(xmm_src(jj), ptr[aux_reg_src_w + offset]);
-                } else {
-                    mov(reg_src_8, ptr[aux_reg_src_w + offset]);
-                    movq(xmm_src(jj), reg_src_64);
-                }
-            }
+            auto offset = jj*c_block*sizeof_src_dt();
+            bool masked = jj == ur_c - 1 && c_tail;
+            load_src_max_op(jj, ll, offset, masked, jpp.tail[0]);
             break;
         }
         case pooling_avg_include_padding:
         case pooling_avg_exclude_padding: {
-            auto offset = jj*c_step*sizeof_src_dt();
-            switch (jpp.src_dt) {
-                case s32:
-                    if (c_step == jpp.c_block) {
-                        for (int ii = 0; ii < repeats; ii++)
-                            uni_vmovups(vmm_src_s32(jj, ii), ptr[aux_reg_src_w + offset + (jpp.c_block / 2) * ii * sizeof_src_dt()]);
-                    } else if (c_step == 1) {
-                        movsd(xmm_src_s32(jj, 0), ptr[aux_reg_src_w + offset]);
-                    }
-                    break;
-                case s8:
-                    if (c_step == jpp.c_block) {
-                        for (int ii = 0; ii < repeats; ii++) {
-                            if (isa == sse42)
-                                movd(xmm_src_s32(jj, ii), ptr[aux_reg_src_w + offset + (jpp.c_block / 2) * ii * sizeof_src_dt()]);
-                            else
-                                movq(xmm_src_s32(jj, ii), ptr[aux_reg_src_w + offset + (jpp.c_block / 2) * ii * sizeof_src_dt()]);
-
-                            uni_vpmovsxbd(vmm_src_s32(jj, ii), xmm_src_s32(jj, ii));
-                        }
-                    } else if (c_step == 1) {
-                        movsx(reg_src_32, ptr[aux_reg_src_w + offset]);
-                        movq(xmm_src_s32(jj, 0), reg_src_64);
-                    }
-                    break;
-                case u8:
-                    if (c_step == jpp.c_block) {
-                        for (int ii = 0; ii < repeats; ii++) {
-                            if (isa == sse42)
-                                movd(xmm_src_s32(jj, ii), ptr[aux_reg_src_w + offset + (jpp.c_block / 2) * ii * sizeof_src_dt()]);
-                            else
-                                movq(xmm_src_s32(jj, ii), ptr[aux_reg_src_w + offset + (jpp.c_block / 2) * ii * sizeof_src_dt()]);
-
-                            uni_vpmovzxbd(vmm_src_s32(jj, ii), xmm_src_s32(jj, ii));
-                        }
-                    } else if (c_step == 1) {
-                        movzx(reg_src_32, ptr[aux_reg_src_w + offset]);
-                        movq(xmm_src_s32(jj, 0), reg_src_64);
-                    }
-                    break;
-                default: assert(!"unsupported src data type");
-            }
+            auto offset = (ll*(c_block/max_num_ll) + jj*c_block)*sizeof_src_dt();
+            bool masked = jj == ur_c - 1 && c_tail;
+            load_src_avg_op(jj, ll, offset, masked, jpp.tail[ll]);
             break;
         }
         default: assert(!"unsupported algorithm");
     }
 }
 
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx2>::store_dst_max_op(int jj, int ll,
+        size_t offset, bool masked, uint64_t msk) {
+    using namespace data_type;
+
+    int c_block = jpp.c_block;
+
+    if (masked) {
+        switch (jpp.src_dt) {
+            case s32:
+                vpmaskmovd(ptr[reg_ptr_dst_i8 + offset], vreg_mask, vreg_dst(jj));
+                break;
+            case s8:
+            case u8: {
+                // Store low half by mask (bytes 0...15)
+                lea(reg_ptr_maskmovdqu_dst, ptr[reg_ptr_dst_i8 + offset]);
+                maskmovdqu(vreg_dst(jj), xreg_mask_lo);
+
+                // Do we need to store high half (bytes 16...31) ?
+                const uint64_t low_mask = (1ULL << (c_block/2))-1;
+                if (msk & ~low_mask) {
+                    vextracti128(Xmm(vreg_dst(jj).getIdx()), vreg_dst(jj), 1);
+                    add(reg_ptr_maskmovdqu_dst, c_block / 2);
+                    maskmovdqu(vreg_dst(jj), xreg_mask_hi);
+                }
+            } break;
+            default: assert(!"unsupported src data type");
+        }
+    } else
+        vmovups(ptr[reg_ptr_dst_i8 + offset], vreg_dst(jj));
+}
+
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx512_core>::store_dst_max_op(int jj, int ll,
+        size_t offset, bool masked, uint64_t msk) {
+    using namespace data_type;
+
+    if (masked) {
+        switch (jpp.src_dt) {
+            case s32:
+                vmovups(ptr[reg_ptr_dst_i8 + offset], vreg_dst(jj) | mask(0));
+                break;
+            case s8:
+            case u8:
+                vmovdqu8(ptr[reg_ptr_dst_i8 + offset], vreg_dst(jj) | mask(0));
+                break;
+            default: assert(!"unsupported src data type");
+        }
+    } else
+        vmovups(ptr[reg_ptr_dst_i8 + offset], vreg_dst(jj));
+}
+
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx2>::store_dst_avg_op(int jj, int ll,
+        size_t offset, bool masked, uint64_t msk){
+    using namespace data_type;
+
+    // Don't generate useless code
+    if (masked && !msk)
+        return;
+
+    auto s32_to_i8 = [&](bool is_signed, const Vmm& vr_dst) {
+
+        // conversion: s32 -> s16/u16 : {8 x s32}{8 x 0} -> {16 x s16/u16}
+        // Result QWORDs (qw0, qw1) permuted: {qw0, 0, qw1, 0}
+        if (is_signed)
+            vpackssdw(vr_dst, vr_dst, vreg_zeros);
+        else
+            vpackusdw(vr_dst, vr_dst, vreg_zeros);
+
+        // Permute qwords to restore original order
+        // {qw0, 0, qw1, 0} -> {qw0, qw1, 0, 0}
+        vpermq(vr_dst, vr_dst, 0x58);
+
+        // conversion: s16/u16 -> s8/u8 : {16 x s16/u16}{16 x 0} -> {32 x s8/u8}
+        // Target QWORD qw = {8 x s8/u8} has proper position: {qw, xx, xx, xx}
+        if (is_signed)
+            vpacksswb(vr_dst, vr_dst, vreg_zeros);
+        else
+            vpackuswb(vr_dst, vr_dst, vreg_zeros);
+
+    };
+
+    auto store_i8 = [&](bool is_signed, bool is_masked, const Vmm& vr_dst) {
+
+        // Conversion s32 -> s8/u8
+        s32_to_i8(is_signed, vr_dst);
+
+        // Need to use mask of tail?
+        if (is_masked) {
+            // load ll-th part of mask into vreg_mask_q
+            load_vreg_mask_q(ll);
+        }
+
+        // store 8 bytes
+        lea(reg_ptr_maskmovdqu_dst, ptr[reg_ptr_dst_i8 + offset]);
+        maskmovdqu(vr_dst, xreg_mask_q);
+    };
+
+    switch (jpp.dst_dt) {
+        case s32:
+            if (masked) {
+                vpmaskmovd(ptr[reg_ptr_dst_i8 + offset], vreg_mask, vreg_dst_s32(jj, ll));
+            } else
+                vmovups(ptr[reg_ptr_dst_i8 + offset], vreg_dst_s32(jj, ll));
+            break;
+        case s8:
+            store_i8(true, masked, vreg_dst_s32(jj, ll));
+            break;
+        case u8:
+            store_i8(false, masked, vreg_dst_s32(jj, ll));
+            break;
+        default: assert(!"unsuppotred dst data_type");
+    }
+}
+
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx512_core>::store_dst_avg_op(int jj, int ll,
+        size_t offset, bool masked, uint64_t msk) {
+    using namespace data_type;
+
+    // Don't generate useless code
+    if (masked && !msk)
+        return;
+
+    const Vmm& vr_dst = masked ?
+            vreg_dst_s32(jj, ll) | mask(ll) :
+            vreg_dst_s32(jj, ll);
+
+    switch (jpp.dst_dt) {
+        case s32:
+            vmovups(ptr[reg_ptr_dst_i8 + offset], vr_dst);
+            break;
+        case s8:
+            vpmovdb(ptr[reg_ptr_dst_i8 + offset], vr_dst);
+            break;
+        case u8:
+            vpmovusdb(ptr[reg_ptr_dst_i8 + offset], vr_dst);
+            break;
+        default: assert(!"unsupported dst data_type");
+    }
+}
+
+
 template <cpu_isa_t isa>
-void jit_uni_i8i8_pool_fwd_ker_t<isa>::store_dst(int jj, int c_step) {
+void jit_uni_i8i8_pooling_fwd_ker_t<isa>::store_dst(int jj, int ll,
+        int c_tail) {
     using namespace data_type;
 
-    int repeats = isa == sse42 && c_step != 1 ? 2 : 1;
+    int c_block = jpp.c_block;
+    int ur_c = jpp.ur_c;
+
     switch(jpp.alg) {
         case pooling_max: {
-            auto offset = jj*c_step*sizeof_dst_dt();
-            if (c_step == jpp.c_block) {
-                for (int ii = 0; ii < repeats; ii++)
-                    uni_vmovups(ptr[reg_ptr_dst_i8 + offset + (jpp.c_block / 2) * ii * sizeof_dst_dt()], vmm_dst(jj, ii));
-            } else if (c_step == 1) {
-                if (jpp.src_dt == s32) {
-                    movq(reg_src_64, xmm_dst(jj));
-                    mov(ptr[reg_ptr_dst_i8 + offset], reg_src_32);
-                } else {
-                    movq(reg_src_64, xmm_dst(jj));
-                    mov(ptr[reg_ptr_dst_i8 + offset], reg_src_8);
-                }
-            }
+            auto offset = jj*c_block*sizeof_dst_dt();
+            bool masked = jj == ur_c - 1 && c_tail;
+            store_dst_max_op(jj, ll, offset, masked, jpp.tail[ll]);
             break;
         }
         case pooling_avg_include_padding:
         case pooling_avg_exclude_padding: {
-            auto offset = jj*c_step*sizeof_dst_dt();
-            switch (jpp.dst_dt) {
-                case s32:
-                    if (c_step == jpp.c_block) {
-                        for (int ii = 0; ii < repeats; ii++)
-                            uni_vmovups(ptr[reg_ptr_dst_i8 + offset + (jpp.c_block / 2) * ii * sizeof_dst_dt()], vmm_dst_s32(jj, ii));
-                    } else if (c_step == 1) {
-                        movq(reg_src_64, xmm_dst_s32(jj, 0));
-                        mov(ptr[reg_ptr_dst_i8 + offset], reg_src_32);
-                    }
-                    break;
-                case s8:
-                    if (c_step == jpp.c_block) {
-                        for (int ii = 0; ii < repeats; ii++) {
-                            uni_vpackssdw(vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii));
-
-                            if (isa != sse42)
-                                vpermq(ymm_dst_s32(jj, ii), ymm_dst_s32(jj, ii), 0x08);
-
-                            uni_vpacksswb(xmm_dst_s32(jj, ii), xmm_dst_s32(jj, ii), xmm_dst_s32(jj, ii));
-
-                            if (isa != sse42)
-                                movq(ptr[reg_ptr_dst_i8 + offset + (jpp.c_block / 2) * ii * sizeof_dst_dt()], xmm_dst_s32(jj, ii));
-                            else
-                                movd(ptr[reg_ptr_dst_i8 + offset + (jpp.c_block / 2) * ii * sizeof_dst_dt()], xmm_dst_s32(jj, ii));
-                        }
-                    } else if (c_step == 1) {
-                        vpackssdw(vmm_dst_s32(jj, 0), vmm_dst_s32(jj, 0), vmm_dst_s32(jj, 0));
-                        vpacksswb(xmm_dst_s32(jj, 0), xmm_dst_s32(jj, 0), xmm_dst_s32(jj, 0));
-                        movq(reg_src_64, xmm_dst_s32(jj, 0));
-                        mov(ptr[reg_ptr_dst_i8 + offset], reg_src_8);
-                    }
-                    break;
-                case u8:
-                    if (c_step == jpp.c_block) {
-                        for (int ii = 0; ii < repeats; ii++) {
-                            uni_vpackusdw(vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii));
-
-                            if (isa != sse42)
-                                vpermq(ymm_dst_s32(jj, ii), ymm_dst_s32(jj, ii), 0x08);
-
-                            uni_vpackuswb(xmm_dst_s32(jj, ii), xmm_dst_s32(jj, ii), xmm_dst_s32(jj, ii));
-
-                            if (isa != sse42)
-                                movq(ptr[reg_ptr_dst_i8 + offset + (jpp.c_block / 2) * ii * sizeof_dst_dt()], xmm_dst_s32(jj, ii));
-                            else
-                                movd(ptr[reg_ptr_dst_i8 + offset + (jpp.c_block / 2) * ii * sizeof_dst_dt()], xmm_dst_s32(jj, ii));
-                        }
-                    } else if (c_step == 1) {
-                        vpackusdw(vmm_dst_s32(jj, 0), vmm_dst_s32(jj, 0), vmm_dst_s32(jj, 0));
-                        vpackuswb(xmm_dst_s32(jj, 0), xmm_dst_s32(jj, 0), xmm_dst_s32(jj, 0));
-                        movq(reg_src_64, xmm_dst_s32(jj, 0));
-                        mov(ptr[reg_ptr_dst_i8 + offset], reg_src_8);
-                    }
-                    break;
-                default: assert(!"unsuppotred dst data_type");
-            }
+            auto offset = (ll*(c_block/max_num_ll) + jj*c_block)*sizeof_dst_dt();
+            bool masked = jj == ur_c - 1 && c_tail;
+            store_dst_avg_op(jj, ll, offset, masked, jpp.tail[ll]);
             break;
         }
         default: assert(!"unsupported pooling algorithm");
     }
 }
 
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx2>::compute_max_op(const int jj)
+{
+    using namespace data_type;
+    switch (jpp.src_dt) {
+        case s32:
+            vpmaxsd(vreg_dst(jj), vreg_dst(jj), vreg_src(jj));
+            break;
+        case s8:
+            vpmaxsb(vreg_dst(jj), vreg_dst(jj), vreg_src(jj));
+            break;
+        case u8:
+            vpmaxub(vreg_dst(jj), vreg_dst(jj), vreg_src(jj));
+            break;
+        default: assert(!"unsupported src data type");
+    }
+}
+
+template <>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx512_core>::compute_max_op(const int jj)
+{
+    using namespace data_type;
+
+    // Compare
+    switch (jpp.src_dt) {
+        case s32:
+            vpcmpd(k_cmp_mask, vreg_dst(jj), vreg_src(jj), _cmp_lt_os);
+            break;
+        case s8:
+            vpcmpb(k_cmp_mask, vreg_dst(jj), vreg_src(jj), _cmp_lt_os);
+            break;
+        case u8:
+            vpcmpub(k_cmp_mask, vreg_dst(jj), vreg_src(jj), _cmp_lt_os);
+            break;
+        default: assert(!"unsupported src data type");
+    }
+
+    // move max values into vreg_dst
+    if (jpp.src_dt == s32)
+        vpblendmd(vreg_dst(jj) | k_cmp_mask, vreg_dst(jj), vreg_src(jj));
+    else
+        vpblendmb(vreg_dst(jj) | k_cmp_mask, vreg_dst(jj), vreg_src(jj));
+}
+
+
 template <cpu_isa_t isa>
-void jit_uni_i8i8_pool_fwd_ker_t<isa>::compute_max_step(int ur_c, int c_step)
+void jit_uni_i8i8_pooling_fwd_ker_t<isa>::compute_max_step(int ur_c, int c_tail)
 {
     Label l_kw, l_kh;
 
     int iw = jpp.iw;
     int c = jpp.c;
 
-    int repeats = isa == sse42 && c_step != 1 ? 2 : 1;
-
-    for (int jj = 0; jj < ur_c; jj++) {
-        for (int ii = 0; ii < repeats; ii++) {
-            uni_vmovups(vmm_dst(jj, ii), vreg_tmp);
-        }
-    }
+    for (int jj = 0; jj < ur_c; jj++)
+        vmovups(vreg_dst(jj), vreg_tmp);
 
     mov(aux_reg_src_h, reg_ptr_src_i8);
 
@@ -329,18 +538,8 @@ void jit_uni_i8i8_pool_fwd_ker_t<isa>::compute_max_step(int ur_c, int c_step)
         L(l_kw);
         {
             for (int jj = 0; jj < ur_c; jj++) {
-                load_src(jj, c_step);
-
-                for (int ii = 0; ii < repeats; ii++) {
-                    if (jpp.src_dt == data_type::s32) {
-                        uni_vpmaxsd(vmm_dst(jj, ii), vmm_dst(jj, ii), vmm_src(jj, ii));
-                    } else {
-                        if (jpp.src_dt == data_type::s8)
-                            uni_vpmaxsb(vmm_dst(jj, ii), vmm_dst(jj, ii), vmm_src(jj, ii));
-                        else
-                            uni_vpmaxub(vmm_dst(jj, ii), vmm_dst(jj, ii), vmm_src(jj, ii));
-                    }
-                }
+                load_src(jj, 0, c_tail);
+                compute_max_op(jj);
             }
             add(aux_reg_src_w, c * sizeof_src_dt());
             inc(ki);
@@ -354,11 +553,11 @@ void jit_uni_i8i8_pool_fwd_ker_t<isa>::compute_max_step(int ur_c, int c_step)
     }
 
     for (int jj = 0; jj < ur_c; jj++)
-        store_dst(jj, c_step);
+        store_dst(jj, 0, c_tail);
 }
 
 template <cpu_isa_t isa>
-void jit_uni_i8i8_pool_fwd_ker_t<isa>::compute_avg_step(int ur_c, int c_step)
+void jit_uni_i8i8_pooling_fwd_ker_t<isa>::compute_avg_step(int ur_c, int c_tail)
 {
     using namespace data_type;
 
@@ -367,12 +566,16 @@ void jit_uni_i8i8_pool_fwd_ker_t<isa>::compute_avg_step(int ur_c, int c_step)
     int iw = jpp.iw;
     int c = jpp.c;
 
-    int repeats = isa == sse42 && c_step != 1 ? 2 : 1;
+    const int num_ll = data_type_size(avg_proc_dt)/data_type_size(jpp.src_dt);
 
     for (int jj = 0; jj < ur_c; jj++) {
-        for (int ii = 0; ii < repeats; ii++) {
-            uni_vpxor(vmm_src_s32(jj, ii), vmm_src_s32(jj, ii), vmm_src_s32(jj, ii));
-            uni_vpxor(vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii));
+        for (int ll = 0; ll < num_ll; ll++) {
+            bool masked = jj == ur_c - 1 && c_tail;
+            size_t msk = jpp.tail[ll];
+            if (!(masked && !msk)) {
+                uni_vpxor(vreg_src_s32(jj, ll), vreg_src_s32(jj, ll), vreg_src_s32(jj, ll));
+                uni_vpxor(vreg_dst_s32(jj, ll), vreg_dst_s32(jj, ll), vreg_dst_s32(jj, ll));
+            }
         }
     }
 
@@ -386,10 +589,14 @@ void jit_uni_i8i8_pool_fwd_ker_t<isa>::compute_avg_step(int ur_c, int c_step)
         L(l_kw);
         {
             for (int jj = 0; jj < ur_c; jj++) {
-                load_src(jj, c_step);
-
-                for (int ii = 0; ii < repeats; ii++) {
-                    uni_vpaddd(vmm_dst_s32(jj, ii), vmm_dst_s32(jj, ii), vmm_src_s32(jj, ii));
+                for (int ll = 0; ll < num_ll; ll++) {
+                    bool masked = jj == ur_c - 1 && c_tail;
+                    size_t msk = jpp.tail[ll];
+                    if (!(masked && !msk)) {
+                        load_src(jj, ll, c_tail);
+                        vpaddd(vreg_dst_s32(jj, ll), vreg_dst_s32(jj, ll),
+                                vreg_src_s32(jj, ll));
+                    }
                 }
             }
             add(aux_reg_src_w, c * sizeof_src_dt());
@@ -404,82 +611,171 @@ void jit_uni_i8i8_pool_fwd_ker_t<isa>::compute_avg_step(int ur_c, int c_step)
     }
 
     for (int jj = 0; jj < ur_c; jj++) {
-        for (int ii = 0; ii < repeats; ii++) {
-            uni_vcvtdq2ps(vmm_dst_f32(jj, ii), vmm_dst_s32(jj, ii));
+        for (int ll = 0; ll < num_ll; ll++) {
+            bool masked = jj == ur_c - 1 && c_tail;
+            size_t msk = jpp.tail[ll];
+            if (!(masked && !msk)) {
 
-            if (isa == sse42)
-                mulps(vmm_dst_f32(jj, ii), vreg_tmp);
-            else
-                vfmadd132ps(vmm_dst_f32(jj, ii), vreg_zeros, vreg_tmp);
+                vcvtdq2ps(vreg_dst_f32(jj, ll), vreg_dst_s32(jj, ll));
+                vfmadd132ps(vreg_dst_f32(jj, ll), vreg_zeros, vreg_tmp);
 
-            uni_vcvtps2dq(vmm_dst_s32(jj, ii), vmm_dst_f32(jj, ii));
-        }
+                if (isa == avx2) {
+                    uni_vroundps(vreg_dst_f32(jj, ll), vreg_dst_f32(jj, ll), rnd_op_nearest);
+                    vcvtps2dq(vreg_dst_s32(jj, ll), vreg_dst_f32(jj, ll));
+                } else if (isa >= avx512_common) {
+                    // AVX512: use of EVEX-embedded static rounding override
+                    vcvtps2dq(vreg_dst_s32(jj, ll) | T_rn_sae, vreg_dst_f32(jj, ll));
+                }
 
-        store_dst(jj, c_step);
+                store_dst(jj, ll, c_tail);
+            }
+        }
     }
 }
 
 template <cpu_isa_t isa>
-void jit_uni_i8i8_pool_fwd_ker_t<isa>::compute_step(int ur_c, int c_step) {
+void jit_uni_i8i8_pooling_fwd_ker_t<isa>::compute_step(int ur_c, int c_tail) {
     switch (jpp.alg) {
         case pooling_max:
-            compute_max_step(ur_c, c_step); break;
+            compute_max_step(ur_c, c_tail); break;
         case pooling_avg_include_padding:
         case pooling_avg_exclude_padding:
-            compute_avg_step(ur_c, c_step); break;
+            compute_avg_step(ur_c, c_tail); break;
         default: assert(!"unsupported pooling algorithm");
     }
 }
 
 template <cpu_isa_t isa>
-void jit_uni_i8i8_pool_fwd_ker_t<isa>::compute_c_block() {
+void jit_uni_i8i8_pooling_fwd_ker_t<isa>::compute_c_block(){
     Label l_main_loop;
-    Label l_tail_loop;
-    Label exit;
 
+    int nb_c = jpp.nb_c;
+    int c_block = jpp.c_block;
     int ur_c = jpp.ur_c;
+    int ur_c_tail = jpp.ur_c_tail;
+    int c_steps = nb_c / ur_c;
+    int c_tail = jpp.c_tail;
 
     xor_(c_iter, c_iter);
+    if (c_steps > 0) {
+        L(l_main_loop); {
+            compute_step(ur_c, 0);
+            add(reg_ptr_src_i8, ur_c*c_block*sizeof_src_dt());
+            add(reg_ptr_dst_i8, ur_c*c_block*sizeof_dst_dt());
+            inc(c_iter);
+            cmp(c_iter, c_steps);
+            jl(l_main_loop, T_NEAR);
+        }
+    }
 
-    L(l_main_loop);
-    {
-        cmp(c_iter, jpp.c - ur_c * jpp.c_block);
-        jg(l_tail_loop, T_NEAR);
+    if (ur_c_tail != 0) {
+        compute_step(ur_c_tail, c_tail);
+    }
+}
 
-        compute_step(ur_c, jpp.c_block);
+template<>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx2>::init_mask() {
+    using namespace data_type;
+    using cpu_isa = cpu_isa_traits<avx2>;
+
+    // AVX2 mask initialization: mask stored in Ymm-regs
+    auto init = [&](uint64_t bit_mask, bool init_mask_q) {
+        const size_t QW_PER_VREG = cpu_isa::vlen / sizeof(uint64_t);
+
+        uint64_t vmask[QW_PER_VREG];
+        for (size_t i = 0; i < QW_PER_VREG; i++){
+
+            uint64_t qw_vmask=0ULL;
+            const size_t DBITS = 8*sizeof_src_dt();
+            const uint64_t VMSK = 1ULL << (DBITS-1);
+            const size_t D_PER_QW = (8*sizeof(qw_vmask))/DBITS;
+            for (size_t j = 0; j < D_PER_QW; j++) {
+                if (bit_mask & 1)
+                    qw_vmask |= VMSK << DBITS * j;
+                bit_mask >>= 1;
+            }
+            vmask[i] = qw_vmask;
+        }
 
-        add(reg_ptr_src_i8, ur_c * jpp.c_block * sizeof_src_dt());
-        add(reg_ptr_dst_i8, ur_c * jpp.c_block * sizeof_dst_dt());
-        add(c_iter, ur_c * jpp.c_block);
-        jmp(l_main_loop);
-    }
+        // Put QWORDS with target mask into xmm regs
+        const int xdst_i[QW_PER_VREG] = {
+                xreg_mask_lo.getIdx(),
+                xreg_mask_lo.getIdx(),
+                xreg_mask_hi.getIdx(),
+                xreg_mask_hi.getIdx()
+        };
+        const int xsrc_i[QW_PER_VREG] = {
+                vreg_zeros.getIdx(),   // 0-th qword insert in zeros -> {qw0,  0}
+                xreg_mask_lo.getIdx(), // 1-st and 0-th merge        -> {qw0,qw1}
+                vreg_zeros.getIdx(),
+                xreg_mask_hi.getIdx()
+        };
+        const uint8 qw_dst_idx[QW_PER_VREG] = {0, 1, 0, 1}; // qword index in 128-bit xreg
+
+        for (size_t i = 0; i < QW_PER_VREG; i++) {
+            mov(reg_mask, vmask[i]);
+            vpinsrq(Xmm(xdst_i[i]), Xmm(xsrc_i[i]), reg_mask, qw_dst_idx[i]);
+        }
 
-    L(l_tail_loop);
-    {
-        cmp(c_iter, jpp.c - ur_c);
-        jg(exit, T_NEAR);
+        // Merge Low (xreg_mask_lo alias for vreg_mask.xreg)
+        // and High (xreg_mask_hi) into full vreg_mask
+        // vreg_mask -> {xreg_mask_hi, vreg_mask.xreg}
+        vinserti128(vreg_mask, vreg_mask, xreg_mask_hi, 1);
 
-        compute_step(ur_c, 1);
+        // Keep only low qword of mask in xreg_mask_q
+        if (init_mask_q) {
+            mov(reg_mask, vmask[0]);
+            vpinsrq(xreg_mask_q, Xmm(vreg_zeros.getIdx()), reg_mask, 0);
+        }
+    };
 
-        add(reg_ptr_src_i8, ur_c * sizeof_src_dt());
-        add(reg_ptr_dst_i8, ur_c * sizeof_dst_dt());
-        add(c_iter, ur_c);
-        jmp(l_tail_loop);
+    uint64_t tail_mask = (1ULL << jpp.c_tail) - 1;
+    switch (jpp.alg) {
+        case pooling_max:
+            // For "max" we need mask only in case of non-zero tail
+            if (tail_mask)
+                init(tail_mask, false);
+            break;
+        case pooling_avg_include_padding:
+        case pooling_avg_exclude_padding:
+            // For "avg" we need mask:
+            // - s32   - in case of the non-zero tail
+            // - s8/u8 - irrespective of the tail
+            switch (jpp.src_dt) {
+                case s32:
+                    if (tail_mask)
+                        init(tail_mask, false);
+                    break;
+                case s8:
+                case u8:
+                    init(tail_mask ? tail_mask : ~0ULL, tail_mask == 0);
+                    break;
+                default: assert(!"unsupported src data type");
+            }
+            break;
+        default: assert(!"unsupported pooling algorithm");
     }
+}
+
+template<>
+void jit_uni_i8i8_pooling_fwd_ker_t<avx512_core>::init_mask() {
 
-    L(exit);
+    for (int ll = 0; ll < max_num_ll; ll++) {
+        mov(reg_mask, jpp.tail[ll]);
+        kmovq(mask(ll), reg_mask);
+    }
 }
 
 template <cpu_isa_t isa>
-void jit_uni_i8i8_pool_fwd_ker_t<isa>::init_tmp_reg() {
+void jit_uni_i8i8_pooling_fwd_ker_t<isa>::init_tmp_reg() {
     using namespace data_type;
 
     switch (jpp.alg) {
         case pooling_avg_include_padding:
         case pooling_avg_exclude_padding:
-            mov(reg_tmp, ptr[abi_param1 + offsetof(call_params_t, idivider)]);
+            mov(reg_tmp, ptr[reg_param + offsetof(call_params_t, idivider)]);
             movq(xmm_tmp, reg_tmp);
-            uni_vpbroadcastd(vreg_tmp, xmm_tmp);
+            vpbroadcastd(vreg_tmp, xmm_tmp);
             break;
         case pooling_max:
             switch (jpp.src_dt) {
@@ -496,17 +792,10 @@ void jit_uni_i8i8_pool_fwd_ker_t<isa>::init_tmp_reg() {
             }
 
             movq(xmm_tmp, reg_tmp);
-            if (jpp.src_dt == s32) {
-                uni_vpbroadcastd(vreg_tmp, xmm_tmp);
-            } else {
-                if (isa == avx2) {
-                    vpbroadcastb(vreg_tmp, xmm_tmp);
-                } else {
-                    movups(vreg_tmp, xmm_tmp);
-                    uni_vpxor(xmm_tmp, xmm_tmp, xmm_tmp);
-                    pshufb(vreg_tmp, xmm_tmp);
-                }
-            }
+            if (jpp.src_dt == s32)
+                vpbroadcastd(vreg_tmp, xmm_tmp);
+            else
+                vpbroadcastb(vreg_tmp, xmm_tmp);
             break;
         default: assert(!"unsupported pooling algorithm");
     }
@@ -514,11 +803,17 @@ void jit_uni_i8i8_pool_fwd_ker_t<isa>::init_tmp_reg() {
 }
 
 template <cpu_isa_t isa>
-void jit_uni_i8i8_pool_fwd_ker_t<isa>::generate() {
+void jit_uni_i8i8_pooling_fwd_ker_t<isa>::generate() {
     preamble();
 
+#if !defined(_WIN32)
+    // Always use rcx as abi_param1 -
+    // see the note about maskmovdqu near reg_param.
+    mov(rcx, rdi);
+#endif
+
 #   define READ_PARAM(reg, field) \
-        mov(reg, ptr[abi_param1 + offsetof(call_params_t, field)])
+        mov(reg, ptr[reg_param + offsetof(call_params_t, field)])
     READ_PARAM(reg_ptr_src_i8, src_i8);
     READ_PARAM(reg_ptr_dst_i8, dst_i8);
     READ_PARAM(reg_kw, kw_range);
@@ -526,22 +821,23 @@ void jit_uni_i8i8_pool_fwd_ker_t<isa>::generate() {
 
 #   undef READ_PARAM
 
-    init_tmp_reg();
-
     uni_vpxor(vreg_zeros, vreg_zeros, vreg_zeros);
 
+    init_mask();
+
+    init_tmp_reg();
+
     compute_c_block();
 
     postamble();
 }
 
 template <cpu_isa_t isa>
-status_t jit_uni_i8i8_pool_fwd_ker_t<isa>::init_conf(jit_pool_conf_t &jpp,
+status_t jit_uni_i8i8_pooling_fwd_ker_t<isa>::init_conf(jit_pool_conf_t &jpp,
         const pooling_desc_t &pd, const memory_desc_wrapper &src_d,
         const memory_desc_wrapper &dst_d) {
-    if (!mayiuse(isa)) {
+    if (!mayiuse(isa))
         return status::unimplemented;
-    }
 
     jpp.mb = src_d.dims()[0];
     jpp.c = src_d.dims()[1];
@@ -563,71 +859,106 @@ status_t jit_uni_i8i8_pool_fwd_ker_t<isa>::init_conf(jit_pool_conf_t &jpp,
     jpp.src_dt = pd.src_desc.data_type;
     jpp.dst_dt = pd.dst_desc.data_type;
 
-    jpp.c_block = jpp.alg == pooling_max ? 32 / (jpp.src_dt == data_type::s32 ? 4 : 1) : 8;
+    // data_type items per one vreg on the <isa>
+    //     isa == avx2    : 32 bytes -> 32 for s8/u8, 8 for s32
+    //     isa == avx512* : 64 bytes -> 64 for s8/u8, 16 for s32
+    int simd_w = cpu_isa_traits<isa>::vlen / data_type_size(jpp.src_dt);
+
+    jpp.c_block = simd_w;
     jpp.c_tail = jpp.c % jpp.c_block;
     jpp.nb_c = jpp.c / jpp.c_block;
     jpp.ur_c = 1;
-    jpp.ur_c_tail = jpp.nb_c - (jpp.nb_c / jpp.ur_c)*jpp.ur_c + (jpp.c_tail != 0);
+    jpp.ur_c_tail = jpp.nb_c - (jpp.nb_c / jpp.ur_c)*jpp.ur_c +
+            (jpp.c_tail != 0);
+
+    size_t tail_mask = (1ULL << jpp.c_tail) - 1;
+
+    switch (jpp.alg) {
+        case pooling_max:
+            jpp.tail[0] = tail_mask;
+            jpp.tail[1] = 0;
+            jpp.tail[2] = 0;
+            jpp.tail[3] = 0;
+            break;
+        case pooling_avg_include_padding:
+        case pooling_avg_exclude_padding: {
+            // avg_proc_dt (s32) defines granularity (because u8/s8 processed as s32)
+            // avx2 : 8, avx512 : 16
+            const size_t msk_gran = cpu_isa_traits<isa>::vlen / data_type_size(avg_proc_dt);
+            const size_t msk_msk = (1ULL << msk_gran) - 1;
+            size_t m = tail_mask;
+            for (size_t ll = 0; ll < max_num_ll; ll++) {
+                jpp.tail[ll] = m & msk_msk;
+                m = m >> msk_gran;
+            }
+            break;
+        }
+        default: return status::unimplemented;
+    }
 
     return status::success;
 }
 
 template <cpu_isa_t isa>
 status_t jit_uni_i8i8_pooling_fwd_t<isa>::pd_t::jit_conf() {
-    return jit_uni_i8i8_pool_fwd_ker_t<isa>::init_conf(jpp_,
+    return jit_uni_i8i8_pooling_fwd_ker_t<isa>::init_conf(jpp_,
        desc_, src_pd_.desc(), dst_pd_.desc());
 }
 
 template <cpu_isa_t isa>
-jit_uni_i8i8_pooling_fwd_t<isa>::jit_uni_i8i8_pooling_fwd_t(const pd_t *pd,
+jit_uni_i8i8_pooling_fwd_t<isa>::
+jit_uni_i8i8_pooling_fwd_t(const pd_t *apd,
           const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), ker_(nullptr) {
-    ker_ = new jit_uni_i8i8_pool_fwd_ker_t<isa>(conf_.jpp_);
-}
+    : cpu_primitive_t(apd, inputs, outputs), ker_(nullptr)
+{ ker_ = new jit_uni_i8i8_pooling_fwd_ker_t<isa>(pd()->jpp_); }
 
 template <cpu_isa_t isa>
-jit_uni_i8i8_pooling_fwd_t<isa>::~jit_uni_i8i8_pooling_fwd_t() {
-    delete ker_;
-}
+jit_uni_i8i8_pooling_fwd_t<isa>::
+~jit_uni_i8i8_pooling_fwd_t() { delete ker_; }
 
 template <cpu_isa_t isa>
-void jit_uni_i8i8_pooling_fwd_t<isa>::execute_forward() {
+void jit_uni_i8i8_pooling_fwd_t<isa>::execute_forward() const {
     auto src_i8 = reinterpret_cast<const char *>(input_memory(0));
     auto dst_i8 = reinterpret_cast<char *>(memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
 
-    const auto &jpp = conf_.jpp_;
+    const auto &jpp = pd()->jpp_;
 
     parallel_nd(jpp.mb, jpp.oh, jpp.ow,
-        [&](int n, int oh, int ow) {
-        const int ih = nstl::max(oh * jpp.stride_h - jpp.t_pad, 0);
-        const int iw = nstl::max(ow * jpp.stride_w - jpp.l_pad, 0);
+            [&](int n, int oh, int ow) {
+        const int ih = nstl::max(oh*jpp.stride_h - jpp.t_pad, 0);
+        const int iw = nstl::max(ow*jpp.stride_w - jpp.l_pad, 0);
 
         const int kh_start = nstl::max(0, jpp.t_pad - oh * jpp.stride_h);
         const int kh_end = nstl::min(jpp.kh,
-                                     jpp.ih + jpp.t_pad - oh * jpp.stride_h);
+                jpp.ih + jpp.t_pad - oh * jpp.stride_h);
         const int kw_start = nstl::max(0, jpp.l_pad - ow * jpp.stride_w);
         const int kw_end = nstl::min(jpp.kw,
-                                     jpp.iw + jpp.l_pad - ow * jpp.stride_w);
+                jpp.iw + jpp.l_pad - ow * jpp.stride_w);
 
-        auto p = call_params_t();
+        auto p = typename jit_uni_i8i8_pooling_fwd_ker_t<isa>::call_params_t();
         p.src_i8 = &src_i8[
-                src_d.blk_off(n, 0, ih, iw) * src_d.data_type_size()];
+            src_d.blk_off(n, 0, ih, iw) * src_d.data_type_size()];
         p.dst_i8 = &dst_i8[
-                dst_d.blk_off(n, 0, oh, ow) * dst_d.data_type_size()];
-        p.kw_range = (size_t) (kw_end - kw_start);
-        p.kh_range = (size_t) (kh_end - kh_start);
+            dst_d.blk_off(n, 0, oh, ow) * dst_d.data_type_size()];
+        p.kw_range = (size_t)(kw_end - kw_start);
+        p.kh_range = (size_t)(kh_end - kh_start);
         p.idivider = 1.0f / ((jpp.alg == pooling_avg_exclude_padding) ?
-                             p.kh_range * p.kw_range : jpp.kw * jpp.kh);
+            p.kh_range*p.kw_range : jpp.kw*jpp.kh);
 
         ker_->ker_(&p);
     });
 }
 
+// Explicit instantiation only for supported <isa> values.
+//
+template struct jit_uni_i8i8_pooling_fwd_ker_t<avx512_core>;
+template struct jit_uni_i8i8_pooling_fwd_t<avx512_core>;
+
+template struct jit_uni_i8i8_pooling_fwd_ker_t<avx2>;
 template struct jit_uni_i8i8_pooling_fwd_t<avx2>;
-template struct jit_uni_i8i8_pooling_fwd_t<sse42>;
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_i8i8_pooling.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_i8i8_pooling.hpp
index 2e274edf1..fe44d5aaa 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_i8i8_pooling.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_i8i8_pooling.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018 Intel Corporation
+* Copyright 2017-2018 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -14,13 +14,14 @@
 * limitations under the License.
 *******************************************************************************/
 
-#ifndef CPU_JIT_uni_I8I8_POOLING_HPP
-#define CPU_JIT_uni_I8I8_POOLING_HPP
+#ifndef CPU_JIT_UNI_I8I8_POOLING_HPP
+#define CPU_JIT_UNI_I8I8_POOLING_HPP
 
 #include "c_types_map.hpp"
+#include "cpu_isa_traits.hpp"
 #include "cpu_pooling_pd.hpp"
 #include "cpu_engine.hpp"
-#include "jit_generator.hpp"
+
 #include "jit_primitive_conf.hpp"
 
 namespace mkldnn {
@@ -28,7 +29,7 @@ namespace impl {
 namespace cpu {
 
 template <cpu_isa_t isa>
-struct jit_uni_i8i8_pool_fwd_ker_t;
+struct jit_uni_i8i8_pooling_fwd_ker_t;
 
 template <cpu_isa_t isa>
 struct jit_uni_i8i8_pooling_fwd_t : public cpu_primitive_t {
@@ -40,11 +41,12 @@ struct jit_uni_i8i8_pooling_fwd_t : public cpu_primitive_t {
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit:", isa, ""),
-                jit_uni_i8i8_pooling_fwd_t);
+                jit_uni_i8i8_pooling_fwd_t<isa>);
 
         virtual status_t init() override {
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
+                && mayiuse(isa)
                 && desc()->src_desc.ndims == 4
                 && set_default_params() == status::success
                 && desc()->prop_kind == prop_kind::forward_inference
@@ -75,20 +77,20 @@ struct jit_uni_i8i8_pooling_fwd_t : public cpu_primitive_t {
         }
     };
 
-    jit_uni_i8i8_pooling_fwd_t(const pd_t *pd,
+    jit_uni_i8i8_pooling_fwd_t(const pd_t *apd,
             const input_vector &inputs, const output_vector &outputs);
     ~jit_uni_i8i8_pooling_fwd_t();
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    jit_uni_i8i8_pool_fwd_ker_t<isa> *ker_;
+    jit_uni_i8i8_pooling_fwd_ker_t<isa> *ker_;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_lrn.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_lrn.cpp
index f774d4443..00bea0746 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_lrn.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_lrn.cpp
@@ -26,23 +26,23 @@ namespace cpu {
 
 template <cpu_isa_t isa>
 jit_uni_lrn_fwd_t<isa>::jit_uni_lrn_fwd_t(
-    const pd_t *pd,
+    const pd_t *apd,
     const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), ker_(nullptr)
+    : cpu_primitive_t(apd, inputs, outputs), ker_(nullptr)
     , ker_first_(nullptr), ker_last_(nullptr)
 {
     using namespace alg_kind;
 
-    const int C = conf_.C();
-    const int H = conf_.H();
-    const int W = conf_.W();
-    const int ls = conf_.desc()->local_size;
-    float A = conf_.desc()->lrn_alpha / ls;
-    float K = conf_.desc()->lrn_k;
+    const int C = pd()->C();
+    const int H = pd()->H();
+    const int W = pd()->W();
+    const int ls = pd()->desc()->local_size;
+    float A = pd()->desc()->lrn_alpha / ls;
+    float K = pd()->desc()->lrn_k;
 
-    auto pk = conf_.desc()->prop_kind;
-    auto ak = conf_.desc()->alg_kind;
-    auto dfmt = conf_.src_pd()->desc()->format;
+    auto pk = pd()->desc()->prop_kind;
+    auto ak = pd()->desc()->alg_kind;
+    auto dfmt = pd()->src_pd()->desc()->format;
 
     if (dfmt == nChw8c && ls == 5 && ak == lrn_across_channels) {
         ker_ = new jit_uni_lrn_fwd_kernel_f32<isa>(
@@ -74,20 +74,20 @@ jit_uni_lrn_fwd_t<isa>::~jit_uni_lrn_fwd_t()
 { delete ker_; delete ker_first_; delete ker_last_; }
 
 template <cpu_isa_t isa>
-void jit_uni_lrn_fwd_t<isa>::execute_forward() {
+void jit_uni_lrn_fwd_t<isa>::execute_forward() const {
     using namespace alg_kind;
 
     auto src = reinterpret_cast<const data_t*>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
     auto ws = reinterpret_cast<data_t*>(this->memory(1));
 
-    const int N = conf_.MB();
-    const int C = conf_.C();
-    const int HW = conf_.H() * conf_.W();
-    const int ls = conf_.desc()->local_size;
+    const int N = pd()->MB();
+    const int C = pd()->C();
+    const int HW = pd()->H() * pd()->W();
+    const int ls = pd()->desc()->local_size;
 
-    auto ak = conf_.desc()->alg_kind;
-    auto dfmt = conf_.src_pd()->desc()->format;
+    auto ak = pd()->desc()->alg_kind;
+    auto dfmt = pd()->src_pd()->desc()->format;
 
     if (dfmt == nChw8c && ls == 5 && ak == lrn_across_channels) {
         parallel_nd(N, C / VECTOR_LENGTH, [&](int n, int c8) {
@@ -177,18 +177,18 @@ status_t jit_uni_lrn_fwd_t<isa>::pd_t::init() {
 }
 
 template <cpu_isa_t isa>
-jit_uni_lrn_bwd_t<isa>::jit_uni_lrn_bwd_t(const pd_t *pd,
+jit_uni_lrn_bwd_t<isa>::jit_uni_lrn_bwd_t(const pd_t *apd,
     const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
+    : cpu_primitive_t(apd, inputs, outputs)
     , ker_(nullptr), ker_first_(nullptr), ker_last_(nullptr)
 {
     using namespace alg_kind;
-    const int C = conf_.C();
-    const int H = conf_.H();
-    const int W = conf_.W();
-    const int ls = conf_.desc()->local_size;
-    float A = conf_.desc()->lrn_alpha / ls;
-    float B = conf_.desc()->lrn_beta;
+    const int C = pd()->C();
+    const int H = pd()->H();
+    const int W = pd()->W();
+    const int ls = pd()->desc()->local_size;
+    float A = pd()->desc()->lrn_alpha / ls;
+    float B = pd()->desc()->lrn_beta;
 
     int use_h_parallelizm = 0;// XXX
     if (C / VECTOR_LENGTH == 1) {
@@ -212,16 +212,16 @@ jit_uni_lrn_bwd_t<isa>::~jit_uni_lrn_bwd_t()
 }
 
 template <cpu_isa_t isa>
-void jit_uni_lrn_bwd_t<isa>::execute_backward() {
+void jit_uni_lrn_bwd_t<isa>::execute_backward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto ws = reinterpret_cast<const data_t*>(this->input_memory(2));
     auto diff_src = reinterpret_cast<data_t*>(this->memory(0));
 
-    const int N = conf_.MB();
-    const int C = conf_.C();
-    const int H = conf_.H();
-    const int W = conf_.W();
+    const int N = pd()->MB();
+    const int C = pd()->C();
+    const int H = pd()->H();
+    const int W = pd()->W();
 
     int use_h_parallelizm = 0; // XXX
     if (use_h_parallelizm) {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_lrn.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_lrn.hpp
index c88e7af90..f10fb5215 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_lrn.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_lrn.hpp
@@ -47,20 +47,20 @@ struct jit_uni_lrn_fwd_t: public cpu_primitive_t {
         virtual status_t init() override;
     };
 
-    jit_uni_lrn_fwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_uni_lrn_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs);
     ~jit_uni_lrn_fwd_t();
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
     jit_uni_lrn_fwd_kernel_f32<isa> *ker_, *ker_first_, *ker_last_;
 };
@@ -79,20 +79,20 @@ struct jit_uni_lrn_bwd_t: public cpu_primitive_t {
         virtual status_t init() override;
     };
 
-    jit_uni_lrn_bwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_uni_lrn_bwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs);
     ~jit_uni_lrn_bwd_t();
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward();
-    pd_t conf_;
+    void execute_backward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
     jit_uni_lrn_bwd_kernel_f32<isa> *ker_, *ker_first_, *ker_last_;
 };
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_conv_kernel_f32.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_conv_kernel_f32.cpp
new file mode 100644
index 000000000..bdba89121
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_conv_kernel_f32.cpp
@@ -0,0 +1,760 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <common/primitive_attr.hpp>
+#include "c_types_map.hpp"
+#include "nstl.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
+#include "cpu_memory.hpp"
+
+#include "jit_uni_planar_conv_kernel_f32.hpp"
+#include "cpu_isa_traits.hpp"
+
+#define GET_OFF(field) offsetof(jit_conv_call_s, field)
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::prop_kind;
+using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::utils;
+
+using namespace Xbyak;
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::load_src_scalar(int ur_h) {
+    Label init_done_label;
+    Label init_first_label;
+
+    mov(reg_ci_flag, ptr[this->param1 + GET_OFF(flags)]);
+    if (jcp.with_bias)
+        mov(reg_bias, ptr[this->param1 + GET_OFF(bias)]);
+
+    if (!jcp.with_sum) {
+        test(reg_ci_flag, FLAG_IC_FIRST);
+        jne(init_first_label, T_NEAR);
+    }
+
+    for (int kk = 0; kk < ur_h; kk++) {
+        size_t offt = sizeof(float) * (kk * jcp.ow * jcp.oh_block_step);
+        movss(Xmm(kk), make_safe_addr(reg_output, offt, reg_long_offt));
+    }
+
+    if (jcp.with_sum && jcp.with_bias) {
+        test(reg_ci_flag, FLAG_IC_FIRST);
+        je(init_done_label, T_NEAR);
+
+        movss(xmm_tmp, make_safe_addr(reg_bias, 0, reg_long_offt));
+        for (int kk = 0; kk < ur_h; kk++) {
+            uni_vaddps(Vmm(kk), Vmm(kk), vmm_tmp);
+        }
+    }
+
+    jmp(init_done_label, T_NEAR);
+
+    L(init_first_label);
+    if (this->jcp.with_bias) {
+        movss(xmm_tmp, make_safe_addr(reg_bias, 0, reg_long_offt));
+        for (int kk = 0; kk < ur_h; kk++) {
+            uni_vmovups(Vmm(kk), vmm_tmp);
+        }
+    } else {
+        for (int kk = 0; kk < ur_h; kk++) {
+            uni_vpxor(Vmm(kk), Vmm(kk), Vmm(kk));
+        }
+    }
+
+    L(init_done_label);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::filter_scalar(int ur_h) {
+    Label iter_exit_label;
+
+    int iw = jcp.iw;
+    int ih = jcp.ih;
+    int id = jcp.id;
+    int dilate_w = jcp.dilate_w + 1;
+    int ic_blk = jcp.ic_block;
+    int kw = jcp.kw;
+    int kh = jcp.kh;
+    int kd = jcp.kd;
+
+    cmp(reg_kw, 0);
+    je(iter_exit_label, T_NEAR);
+
+    mov(aux_reg_input_w, aux_reg_input_h);
+    mov(aux_reg_kernel_w, aux_reg_kernel_h);
+    mov(kw_iter, reg_kw);
+
+    Label kw_label;
+    L(kw_label);
+    {
+        for (size_t ifm2 = 0; ifm2 < (size_t)ic_blk; ifm2++) {
+            for (int kk = 0; kk < ur_h; kk++) {
+                size_t inp_off = sizeof(float) * (ifm2 * id * ih * iw + kk * jcp.iw * jcp.oh_block_step);
+                movss(xmm_src, make_safe_addr(aux_reg_input_w, inp_off, reg_long_offt));
+
+                size_t ker_off = sizeof(float) * (ifm2 * kd * kh * kw);
+                movss(xmm_ker, ptr[aux_reg_kernel_w + ker_off]);
+
+                uni_vfmadd231ps(Vmm(kk), vmm_src, vmm_ker);
+            }
+        }
+
+        add(aux_reg_kernel_w, sizeof(float));
+        add(aux_reg_input_w, dilate_w * sizeof(float));
+
+        dec(kw_iter);
+        cmp(kw_iter, 0);
+        jg(kw_label, T_NEAR);
+    }
+
+    L(iter_exit_label);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::apply_filter_scalar(int ur_h) {
+    int iw = jcp.iw;
+    int kw = jcp.kw;
+    int dilate_h = jcp.dilate_h + 1;
+    int dilate_d = jcp.dilate_h + 1;
+    const int inp_mult_h = dilate_h;
+    const int inp_mult_d = dilate_d;
+
+    Label skip_kh_loop, skip_kd_loop, kd_label;
+    if (jcp.ndims == 5) {
+        push(reg_kernel);
+        push(reg_output);
+
+        mov(reg_kd, ptr[param1 + GET_OFF(kd_padding)]);
+        mov(aux_reg_ker_d, aux_reg_kernel_h);
+        mov(aux_reg_inp_d, aux_reg_input_h);
+
+        cmp(reg_kd, 0);
+        je(skip_kd_loop, T_NEAR);
+
+        L(kd_label);
+        mov(kh_iter, ptr[param1 + GET_OFF(kh_padding)]);
+    } else {
+        mov(kh_iter, reg_kh);
+    }
+
+    if (jcp.ndims == 5) {
+        mov(aux_reg_input_h, aux_reg_inp_d);
+        mov(aux_reg_kernel_h, aux_reg_ker_d);
+    }
+
+    cmp(kh_iter, 0);
+    je(skip_kh_loop, T_NEAR);
+
+    Label kh_label;
+    L(kh_label);
+    {
+        filter_scalar(ur_h);
+
+        add(aux_reg_kernel_h, sizeof(float) * kw);
+        add(aux_reg_input_h, sizeof(float) * iw * inp_mult_h);
+
+        dec(kh_iter);
+        cmp(kh_iter, 0);
+        jg(kh_label, T_NEAR);
+    }
+
+    L(skip_kh_loop);
+
+    if (jcp.ndims == 5) {
+        add(aux_reg_ker_d, sizeof(float) * jcp.kw * jcp.kh);
+        add(aux_reg_inp_d, sizeof(float) * jcp.ih * jcp.iw * inp_mult_d);
+
+        dec(reg_kd);
+        cmp(reg_kd, 0);
+        jg(kd_label, T_NEAR);
+        L(skip_kd_loop);
+
+        pop(reg_output);
+        pop(reg_kernel);
+    }
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::apply_postprocess_scalar(int ur_h) {
+    Label regular_store_label;
+
+    mov(reg_ci_flag, ptr[this->param1 + GET_OFF(flags)]);
+    test(reg_ci_flag, FLAG_IC_LAST);
+    je(regular_store_label, T_NEAR);
+
+    int eltwise_inj_idx = 0;
+    const auto &p = attr_.post_ops_;
+
+
+    for (int i = 0; i < p.len_; i++) {
+        auto& post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0, ur_h);
+            eltwise_inj_idx++;
+        }
+    }
+
+    L(regular_store_label);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::store_dst_scalar(int ur_h) {
+    for (int kk = 0; kk < ur_h; kk++) {
+        size_t o_off = sizeof(float) * (kk * jcp.ow * jcp.oh_block_step);
+        movss(make_safe_addr(reg_output, o_off, reg_long_offt), Xmm(kk));
+    }
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::load_src(int ur_h, int ur_w) {
+    Label init_done_label;
+    Label init_first_label;
+
+    mov(reg_ci_flag, ptr[this->param1 + GET_OFF(flags)]);
+    if (jcp.with_bias)
+        mov(reg_bias, ptr[this->param1 + GET_OFF(bias)]);
+
+    if (!jcp.with_sum) {
+        test(reg_ci_flag, FLAG_IC_FIRST);
+        jne(init_first_label, T_NEAR);
+    }
+
+    for (int kk = 0; kk < ur_h; kk++) {
+        for (int jj = 0; jj < ur_w; jj++) {
+            size_t offt = sizeof(float) * (jj * jcp.ow_block + kk * jcp.ow * jcp.oh_block_step);
+            uni_vmovups(Vmm(kk * ur_w + jj), make_safe_addr(reg_output, offt, reg_long_offt));
+        }
+    }
+
+    if (jcp.with_sum && jcp.with_bias) {
+        test(reg_ci_flag, FLAG_IC_FIRST);
+        je(init_done_label, T_NEAR);
+
+        uni_vbroadcastss(vmm_tmp, make_safe_addr(reg_bias, 0, reg_long_offt));
+        for (int kk = 0; kk < ur_h; kk++) {
+            for (int jj = 0; jj < ur_w; jj++) {
+                uni_vaddps(Vmm(kk * ur_w + jj), Vmm(kk * ur_w + jj), vmm_tmp);
+            }
+        }
+    }
+
+    jmp(init_done_label, T_NEAR);
+
+    L(init_first_label);
+    if (this->jcp.with_bias) {
+        uni_vbroadcastss(vmm_tmp, make_safe_addr(reg_bias, 0, reg_long_offt));
+        for (int kk = 0; kk < ur_h; kk++) {
+            for (int jj = 0; jj < ur_w; jj++) {
+                uni_vmovups(Vmm(kk * ur_w + jj), vmm_tmp);
+            }
+        }
+    } else {
+        for (int kk = 0; kk < ur_h; kk++) {
+            for (int jj = 0; jj < ur_w; jj++) {
+                uni_vpxor(Vmm(kk * ur_w + jj), Vmm(kk * ur_w + jj), Vmm(kk * ur_w + jj));
+            }
+        }
+    }
+
+    L(init_done_label);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::filter_unrolled(int ur_h, int ur_w) {
+    int iw = jcp.iw;
+    int ih = jcp.ih;
+    int id = jcp.id;
+    int stride_w = jcp.stride_w;
+    int dilate_w = jcp.dilate_w + 1;
+    int ic_blk = jcp.ic_block;
+    int kw = jcp.kw;
+    int kh = jcp.kh;
+    int kd = jcp.kd;
+    int ow_blk = jcp.ow_block;
+
+    for (int ki = 0; ki < kw; ki++) {
+        for (int ifm2 = 0; ifm2 < ic_blk; ifm2++) {
+            for (int kk = 0; kk < ur_h; kk++) {
+                for (int jj = 0; jj < ur_w; jj++) {
+                    size_t inp_off = sizeof(float) * ((size_t) ifm2 * id * ih * iw + ki * dilate_w +
+                            jj * stride_w * ow_blk + kk * jcp.ow * jcp.oh_block_step);
+                    uni_vmovups(vmm_src, make_safe_addr(aux_reg_input_h, inp_off, reg_long_offt));
+
+                    int ker_off = sizeof(float) * ((size_t) ifm2 * kd * kh * kw + ki);
+                    uni_vbroadcastss(vmm_ker, ptr[aux_reg_kernel_h + ker_off]);
+
+                    uni_vfmadd231ps(Vmm(kk * ur_w + jj), vmm_src, vmm_ker);
+                }
+            }
+        }
+    }
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::filter(int ur_h) {
+    Label iter_exit_label;
+
+    int iw = jcp.iw;
+    int ih = jcp.ih;
+    int id = jcp.id;
+    int dilate_w = jcp.dilate_w + 1;
+    int ic_blk = jcp.ic_block;
+    int kw = jcp.kw;
+    int kh = jcp.kh;
+    int kd = jcp.kd;
+
+    cmp(reg_kw, 0);
+    je(iter_exit_label, T_NEAR);
+
+    mov(aux_reg_input_w, aux_reg_input_h);
+    mov(aux_reg_kernel_w, aux_reg_kernel_h);
+    mov(kw_iter, reg_kw);
+
+    Label kw_label;
+    L(kw_label);
+    {
+        for (int ifm2 = 0; ifm2 < ic_blk; ifm2++) {
+            for (int kk = 0; kk < ur_h; kk++) {
+                size_t inp_off = sizeof(float) * ((size_t) ifm2 * id * ih * iw + kk * jcp.ow * jcp.oh_block_step);
+                uni_vmovups(vmm_src, make_safe_addr(aux_reg_input_w, inp_off, reg_long_offt));
+
+                size_t ker_off = sizeof(float) * ((size_t) ifm2 * kd * kh * kw);
+                uni_vbroadcastss(vmm_ker, ptr[aux_reg_kernel_w + ker_off]);
+
+                uni_vfmadd231ps(Vmm(kk), vmm_src, vmm_ker);
+            }
+        }
+
+        add(aux_reg_kernel_w, sizeof(float));
+        add(aux_reg_input_w, dilate_w * sizeof(float));
+
+        dec(kw_iter);
+        cmp(kw_iter, 0);
+        jg(kw_label, T_NEAR);
+    }
+
+    L(iter_exit_label);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::apply_filter(int ur_h, int ur_w) {
+    int iw = jcp.iw;
+    int kw = jcp.kw;
+    int dilate_h = jcp.dilate_h + 1;
+    int dilate_d = jcp.dilate_h + 1;
+    const int inp_mult_h = dilate_h;
+    const int inp_mult_d = dilate_d;
+
+    Label skip_kh_loop, skip_kd_loop, kd_label;
+    if (jcp.ndims == 5) {
+        push(reg_kernel);
+        push(reg_output);
+
+        mov(reg_kd, ptr[param1 + GET_OFF(kd_padding)]);
+        mov(aux_reg_ker_d, aux_reg_kernel_h);
+        mov(aux_reg_inp_d, aux_reg_input_h);
+
+        cmp(reg_kd, 0);
+        je(skip_kd_loop, T_NEAR);
+
+        L(kd_label);
+        mov(kh_iter, ptr[param1 + GET_OFF(kh_padding)]);
+    } else {
+        mov(kh_iter, reg_kh);
+    }
+
+    if (jcp.ndims == 5) {
+        mov(aux_reg_input_h, aux_reg_inp_d);
+        mov(aux_reg_kernel_h, aux_reg_ker_d);
+    }
+
+    cmp(kh_iter, 0);
+    je(skip_kh_loop, T_NEAR);
+
+    Label kh_label;
+    L(kh_label);
+    {
+        if (ur_w == jcp.nb_ow_blocking)
+            filter_unrolled(ur_h, ur_w);
+        else
+            filter(ur_h);
+
+        add(aux_reg_kernel_h, sizeof(float) * kw);
+        add(aux_reg_input_h, sizeof(float) * iw * inp_mult_h);
+
+        dec(kh_iter);
+        cmp(kh_iter, 0);
+        jg(kh_label, T_NEAR);
+    }
+
+    L(skip_kh_loop);
+
+    if (jcp.ndims == 5) {
+        add(aux_reg_ker_d, sizeof(float) * jcp.kw * jcp.kh);
+        add(aux_reg_inp_d, sizeof(float) * jcp.ih * jcp.iw * inp_mult_d);
+
+        dec(reg_kd);
+        cmp(reg_kd, 0);
+        jg(kd_label, T_NEAR);
+        L(skip_kd_loop);
+
+        pop(reg_output);
+        pop(reg_kernel);
+    }
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::apply_postprocess(int ur_h, int ur_w) {
+    Label regular_store_label;
+
+    mov(reg_ci_flag, ptr[this->param1 + GET_OFF(flags)]);
+    test(reg_ci_flag, FLAG_IC_LAST);
+    je(regular_store_label, T_NEAR);
+
+    int eltwise_inj_idx = 0;
+    const auto &p = attr_.post_ops_;
+
+    for (int i = 0; i < p.len_; i++) {
+        auto& post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors[eltwise_inj_idx]->compute_vector_range(0, ur_w * ur_h);
+            eltwise_inj_idx++;
+        }
+    }
+
+    L(regular_store_label);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::store_dst(int ur_h, int ur_w) {
+    for (int kk = 0; kk < ur_h; kk++) {
+        for (int jj = 0; jj < ur_w; jj++) {
+            size_t o_off = sizeof(float) * (jj * jcp.ow_block + kk * jcp.ow * jcp.oh_block_step);
+            uni_vmovups(make_safe_addr(reg_output, o_off, reg_long_offt), Vmm(kk * ur_w + jj));
+        }
+    }
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::solve_common(int ur_h) {
+    auto solve_loop = [&](int ur_w, int step_w) {
+        Label loop_label;
+        Label exit_label;
+
+        L(loop_label);
+        {
+            if (step_w == 1) {
+                load_src_scalar(ur_h);
+                apply_filter_scalar(ur_h);
+                apply_postprocess_scalar(ur_h);
+                store_dst_scalar(ur_h);
+            } else {
+                load_src(ur_h, ur_w);
+                apply_filter(ur_h, ur_w);
+                apply_postprocess(ur_h, ur_w);
+                store_dst(ur_h, ur_w);
+            }
+
+            add(reg_input, sizeof(float) * step_w * jcp.stride_w);
+            add(reg_output, sizeof(float) * step_w);
+        }
+
+        L(exit_label);
+    };
+
+    Label left_border_label;
+    Label main_loop_unrolled_label;
+    Label main_loop_label;
+    Label right_border_label;
+    Label exit_label;
+
+    xor_(reg_ow, reg_ow);
+    sub(reg_input, sizeof(float) * jcp.l_pad);
+
+    auto adjust_indexes_left = [&]() {
+        Label border_indexes_label;
+        Label border_indexes_exit_label;
+
+        mov(reg_wj, jcp.l_pad);
+        sub(reg_wj, reg_ow);
+        L(border_indexes_label);
+        {
+            cmp(reg_wj, 0);
+            jle(border_indexes_exit_label, T_NEAR);
+
+            add(aux_reg_kernel_h, sizeof(float));
+            add(aux_reg_input_h, sizeof(float) * (jcp.dilate_w + 1));
+            dec(reg_kw);
+            sub(reg_wj, jcp.dilate_w + 1);
+
+            jmp(border_indexes_label);
+
+            L(border_indexes_exit_label);
+        }
+    };
+
+    auto adjust_indexes_right = [&]() {
+        Label border_indexes_right_label;
+        Label border_indexes_right_exit_label;
+
+        imul(reg_wj, reg_ow, jcp.stride_w);
+        add(reg_wj, (jcp.kw-1) * (jcp.dilate_w+1) - jcp.l_pad+1 - jcp.iw);
+
+        L(border_indexes_right_label);
+        {
+            cmp(reg_wj, 0);
+            jle(border_indexes_right_exit_label, T_NEAR);
+
+            dec(reg_kw);
+            sub(reg_wj, jcp.dilate_w + 1);
+
+            jmp(border_indexes_right_label);
+
+            L(border_indexes_right_exit_label);
+        }
+    };
+
+    int left_border_end = nstl::min(div_up(jcp.l_pad, jcp.stride_w), jcp.ow);
+    L(left_border_label); {
+        cmp(reg_ow, left_border_end);
+        jge(main_loop_unrolled_label, T_NEAR);
+
+        mov(aux_reg_input_h, reg_input);
+        mov(aux_reg_kernel_h, reg_kernel);
+        mov(reg_kw, jcp.kw);
+
+        adjust_indexes_left();
+        adjust_indexes_right();
+
+        solve_loop(1, 1); // scalar
+
+        inc(reg_ow);
+        jmp(left_border_label, T_NEAR);
+    }
+
+    int main_loop_end = (jcp.iw - (jcp.kw - 1)*(jcp.dilate_w + 1) + jcp.l_pad - 1) / jcp.stride_w + 1;
+    L(main_loop_unrolled_label); {
+        cmp(reg_ow, main_loop_end - jcp.nb_ow_blocking * jcp.ow_block);
+        jg(main_loop_label, T_NEAR);
+
+        mov(aux_reg_input_h, reg_input);
+        mov(aux_reg_kernel_h, reg_kernel);
+        mov(reg_kw, jcp.kw);
+
+        solve_loop(jcp.nb_ow_blocking, jcp.nb_ow_blocking * jcp.ow_block);
+
+        add(reg_ow, jcp.nb_ow_blocking * jcp.ow_block);
+        jmp(main_loop_unrolled_label, T_NEAR);
+    }
+
+    L(main_loop_label); {
+        cmp(reg_ow, main_loop_end - jcp.ow_block);
+        jg(right_border_label, T_NEAR);
+
+        mov(aux_reg_input_h, reg_input);
+        mov(aux_reg_kernel_h, reg_kernel);
+        mov(reg_kw, jcp.kw);
+
+        solve_loop(1, jcp.ow_block); // vectorized
+
+        add(reg_ow, jcp.ow_block);
+        jmp(main_loop_label, T_NEAR);
+    }
+
+    int right_border_end = jcp.ow;
+    L(right_border_label); {
+        cmp(reg_ow, right_border_end);
+        jge(exit_label, T_NEAR);
+
+        mov(aux_reg_input_h, reg_input);
+        mov(aux_reg_kernel_h, reg_kernel);
+        mov(reg_kw, jcp.kw);
+
+        adjust_indexes_left();
+        adjust_indexes_right();
+
+        solve_loop(1, 1); // scalar
+
+        inc(reg_ow);
+        jmp(right_border_label, T_NEAR);
+    }
+
+    L(exit_label);
+}
+
+template <cpu_isa_t isa>
+void jit_uni_planar_conv_fwd_kernel_f32<isa>::generate() {
+    const auto &p = attr_.post_ops_;
+    for (int i = 0; i < p.len_; i++) {
+        auto &post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<isa>(
+                    this,
+                    post_op.eltwise.alg,
+                    post_op.eltwise.alpha,
+                    post_op.eltwise.beta
+            ));
+        }
+    }
+
+    this->preamble();
+
+    mov(reg_input, ptr[this->param1 + GET_OFF(src)]);
+    mov(reg_output, ptr[this->param1 + GET_OFF(dst)]);
+    mov(reg_kernel, ptr[this->param1 + GET_OFF(filt)]);
+    mov(reg_kh, ptr[this->param1 + GET_OFF(kh_padding)]);
+    mov(reg_oh_blocks, ptr[this->param1 + GET_OFF(oh_blocks)]);
+
+    Label tail_label;
+    Label exit_label;
+
+    solve_common(1);
+
+    this->postamble();
+
+    for (auto& inj : eltwise_injectors)
+        inj->prepare_table();
+}
+
+template <cpu_isa_t isa>
+bool jit_uni_planar_conv_fwd_kernel_f32<isa>::post_ops_ok(
+        jit_conv_conf_t &jcp, const primitive_attr_t &attr) {
+    const auto &p = attr.post_ops_;
+
+    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
+    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
+    auto is_simple = [&](int idx) { return is_eltwise(idx); };
+
+    switch (p.len_) {
+    case 0: return true; // no post_ops
+    case 1:
+        return true // sum OR eltwise OR depthwise
+                && !jcp.with_eltwise && (is_simple(0) || is_sum(0));
+    case 2:
+        return true // sum->relu
+                && !jcp.with_eltwise && ((is_sum(0) && is_simple(1)) ||
+                                         (is_simple(0) && is_simple(1)));
+    case 3:
+        return true // sum->relu
+                && !jcp.with_eltwise && (is_sum(0) && is_simple(1) && is_simple(2));
+    default: return false;
+    }
+
+    return false;
+}
+
+template <cpu_isa_t isa>
+status_t jit_uni_planar_conv_fwd_kernel_f32<isa>::init_conf(jit_conv_conf_t &jcp,
+        const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
+        const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d,
+        const primitive_attr_t &attr) {
+    if (!mayiuse(isa)) return status::unimplemented;
+
+    jcp.prop_kind = cd.prop_kind;
+
+    const bool with_groups = weights_d.ndims() == src_d.ndims() + 1;
+    int ndims = src_d.ndims();
+    jcp.ndims = ndims;
+
+    jcp.ngroups = with_groups ? weights_d.dims()[0] : 1;
+    jcp.mb = src_d.dims()[0];
+
+    jcp.oc = dst_d.dims()[1] / jcp.ngroups;
+    jcp.oc_without_padding = jcp.oc;
+    jcp.ic = src_d.dims()[1] / jcp.ngroups;
+
+    jcp.id = (ndims == 5) ? src_d.dims()[2] : 1;
+    jcp.ih = src_d.dims()[ndims-2];
+    jcp.iw = src_d.dims()[ndims-1];
+    jcp.od = (ndims == 5) ? dst_d.dims()[2] : 1;
+    jcp.oh = dst_d.dims()[ndims-2];
+    jcp.ow = dst_d.dims()[ndims-1];
+    jcp.kd = (ndims == 5) ? weights_d.dims()[with_groups + 2] : 1;
+    jcp.kh = weights_d.dims()[with_groups + ndims-2];
+    jcp.kw = weights_d.dims()[with_groups + ndims-1];
+
+    jcp.f_pad = (ndims == 5) ? cd.padding[0][0] : 0;
+    jcp.t_pad = cd.padding[0][ndims-4];
+    jcp.l_pad = cd.padding[0][ndims-3];
+    jcp.stride_d = (ndims == 5) ? cd.strides[0] : 1;
+    jcp.stride_h = cd.strides[ndims-4];
+    jcp.stride_w = cd.strides[ndims-3];
+
+    jcp.dilate_d = (ndims == 5) ? cd.dilates[0] : 0;
+    jcp.dilate_h = cd.dilates[ndims-4];
+    jcp.dilate_w = cd.dilates[ndims-3];
+
+    jcp.b_pad = (jcp.oh - 1) * jcp.stride_h + (jcp.kh - 1) * (jcp.dilate_h + 1)
+            - (jcp.ih + jcp.t_pad - 1);
+
+    jcp.src_fmt = src_d.format();
+    jcp.with_bias = cd.bias_desc.format != memory_format::undef;
+    jcp.with_eltwise = false;
+
+    if (!post_ops_ok(jcp, attr))
+        return status::unimplemented;
+
+    const auto &p = attr.post_ops_;
+    jcp.with_sum = p.find(primitive_kind::sum) != -1;
+
+    const int simd_w = isa == avx512_common ? 16 : 8;
+
+    bool args_ok = true
+        && one_of(src_d.format(), nchw, ncdhw)
+        && one_of(weights_d.format(), oihw, oidhw)
+        && one_of(cd.bias_desc.format, memory_format::undef, any, x)
+        && one_of(dst_d.format(), nchw, ncdhw);
+    if (!args_ok) return status::unimplemented;
+
+    // This convolution implementation was introduced as workaround to provide competitive performance on MSD topology.
+    // The conditions below are needed to bound applicability scope.
+    args_ok = jcp.ngroups == 1 &&
+              jcp.oc == 1 &&
+              jcp.stride_d == 1 && jcp.stride_h == 1 && jcp.stride_w == 1;
+
+    if (!args_ok) return status::unimplemented;
+
+    jcp.ur_w = 1;
+
+    jcp.ow_block = simd_w;
+    jcp.nb_ow_blocking = isa == avx512_common ? 3 : 3;
+
+    jcp.oh_block = 1;
+    jcp.nb_oh_blocking = 1;
+    jcp.oh_block_step = 1; // (jcp.dilate_h + 1);
+
+    jcp.oc_block = 1;
+    jcp.nb_oc = jcp.oc / jcp.oc_block;
+    jcp.nb_oc_blocking = 1;
+
+    jcp.ic_block = 1;
+    jcp.nb_ic = jcp.ic / jcp.ic_block;
+    jcp.nb_ic_blocking = 1;
+
+    return status::success;
+}
+
+template struct jit_uni_planar_conv_fwd_kernel_f32<avx512_common>;
+template struct jit_uni_planar_conv_fwd_kernel_f32<avx2>;
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_conv_kernel_f32.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_conv_kernel_f32.hpp
new file mode 100644
index 000000000..f5104ec23
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_conv_kernel_f32.hpp
@@ -0,0 +1,135 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef JIT_UNI_PLANAR_CONV_KERNEL_F32_HPP
+#define JIT_UNI_PLANAR_CONV_KERNEL_F32_HPP
+
+#include "c_types_map.hpp"
+#include "jit_generator.hpp"
+#include "jit_primitive_conf.hpp"
+#include "jit_uni_eltwise.hpp"
+#include "jit_uni_depthwise.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <cpu_isa_t isa>
+struct jit_uni_planar_conv_fwd_kernel_f32: public jit_generator {
+    jit_uni_planar_conv_fwd_kernel_f32(jit_conv_conf_t ajcp,
+            const primitive_attr_t &attr): jcp(ajcp), attr_(attr)
+    {
+        this->generate();
+        jit_ker = (void (*)(jit_conv_call_s *))this->getCode();
+    }
+
+    ~jit_uni_planar_conv_fwd_kernel_f32() {
+        for (auto inj : eltwise_injectors)
+           delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
+    }
+
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_planar_conv_fwd_kernel_f32)
+
+    static bool post_ops_ok(jit_conv_conf_t &jcp,
+            const primitive_attr_t &attr);
+    static status_t init_conf(jit_conv_conf_t &jcp,
+            const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
+            const memory_desc_wrapper &weights_d,
+            const memory_desc_wrapper &dst_d,
+            const primitive_attr_t &attr);
+
+    jit_conv_conf_t jcp;
+    const primitive_attr_t &attr_;
+    void (*jit_ker)(jit_conv_call_s *);
+
+private:
+    using Vmm = typename utils::conditional3<isa == sse42, Xbyak::Xmm,
+            isa == avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
+    using reg64_t = const Xbyak::Reg64;
+    using reg32_t = const Xbyak::Reg32;
+    const Xbyak::AddressFrame &vmmword = (isa == sse42)
+        ? xword : (isa == avx2) ? yword : zword;
+
+    reg64_t reg_input = r8;
+    reg64_t reg_kernel = r9;
+    reg64_t reg_output = r10;
+
+    reg64_t aux_reg_input_h = r11;
+    reg64_t aux_reg_kernel_h = r12;
+
+    reg64_t aux_reg_input_w = r13;
+    reg64_t aux_reg_kernel_w = r14;
+
+    reg64_t aux_reg_inp_d = r9;
+    reg64_t aux_reg_ker_d = r10;
+
+    reg64_t reg_kd = rbx;
+    reg64_t reg_kh = rdx;
+    reg64_t reg_kw = rsi;
+
+    reg64_t kh_iter = rax;
+    reg64_t kw_iter = abi_not_param1;
+
+    reg64_t reg_bias = r13;
+    reg64_t reg_long_offt = r15;
+    reg32_t reg_ci_flag = r15d;
+
+    reg64_t reg_d_weights = r15;
+    reg64_t reg_d_bias = kh_iter;
+
+    reg64_t reg_ow = rbp;
+
+    reg64_t reg_oh_blocks = aux_reg_kernel_w;
+
+    reg64_t reg_wj = aux_reg_input_w;
+
+    Vmm vmm_ker = Vmm(15);
+    Vmm vmm_tmp = Vmm(15);
+    Vmm vmm_src = Vmm(14);
+    Xbyak::Xmm xmm_ker = Xbyak::Xmm(15);
+    Xbyak::Xmm xmm_tmp = Xbyak::Xmm(15);
+    Xbyak::Xmm xmm_src = Xbyak::Xmm(14);
+
+    nstl::vector<jit_uni_eltwise_injector_f32<isa>*> eltwise_injectors;
+    nstl::vector<jit_uni_depthwise_injector_f32<isa>*> depthwise_injectors;
+
+    inline void load_src(int ur_h, int ur_w);
+    inline void filter(int ur_h);
+    inline void filter_unrolled(int ur_h, int ur_w);
+    inline void apply_filter(int ur_h, int ur_w);
+    inline void apply_postprocess(int ur_h, int ur_w);
+    inline void store_dst(int ur_h, int ur_w);
+    inline void solve_common(int ur_h);
+
+    inline void filter_scalar(int ur_h);
+    inline void load_src_scalar(int ur_h);
+    inline void apply_filter_scalar(int ur_h);
+    inline void apply_postprocess_scalar(int ur_h);
+    inline void store_dst_scalar(int ur_h);
+
+    void generate();
+};
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_convolution.cpp
new file mode 100644
index 000000000..5a8f30240
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_convolution.cpp
@@ -0,0 +1,172 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <cstring>
+#include "mkldnn_types.h"
+
+#include "c_types_map.hpp"
+#include "jit_uni_planar_convolution.hpp"
+#include "utils.hpp"
+#include "mkldnn_thread.hpp"
+#include "type_helpers.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::status;
+using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::utils;
+
+#define src_blk_off(f, n, c, d, h, w) \
+    pd()->ndims() == 5 \
+        ? (f).blk_off(n, c, d, h, w) \
+        : (f).blk_off(n, c, h, w)
+
+#define wht_blk_off(f, g, oc, ic, kd, kh, kw) \
+    pd()->ndims() == 5 \
+        ? pd()->with_groups() \
+            ? (f).blk_off(g, oc, ic, kd, kh, kw) \
+            : (f).blk_off(oc, ic, kd, kh, kw) \
+        : pd()->with_groups() \
+            ? (f).blk_off(g, oc, ic, kh, kw) \
+            : (f).blk_off(oc, ic, kh, kw)
+
+template <cpu_isa_t isa>
+void _jit_uni_planar_convolution_fwd_t<isa>::execute_forward() const {
+    auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
+    auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
+    auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
+    auto dst = reinterpret_cast<data_t *>(this->memory());
+
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
+
+    const auto &jcp = kernel_->jcp;
+    const int MB = pd()->MB();
+
+    int od_indexes[jcp.od];
+
+    int idx = 0;
+    for (int i = 0; i < (jcp.dilate_d + 1); i++) {
+        for (int ib = 0; ib < jcp.od; ib += (jcp.dilate_d + 1)) {
+            if (ib + i >= jcp.od)
+                continue;
+
+            od_indexes[idx++] = ib + i;
+            if (idx >= jcp.od)
+                break;
+        }
+        if (idx >= jcp.od)
+            break;
+    }
+
+    int threads_count = mkldnn_get_max_threads();
+    int odb_size = div_up(jcp.od, threads_count);
+
+    auto kernel_params = [&](int n, int g, int icb, int oc, int od, int oh, int oh_blocks, int id, int wd, int kd_padding) {
+        auto par_conv = jit_conv_call_s();
+
+        const int hj = oh * jcp.stride_h;
+        const int i_t_overflow = nstl::max(0, jcp.t_pad - hj);
+        const int i_b_overflow = nstl::max(jcp.ih, hj + (jcp.kh - 1) * (jcp.dilate_h + 1) - jcp.t_pad + 1) - jcp.ih;
+        const int ih = nstl::max(hj - jcp.t_pad + div_up(i_t_overflow, (jcp.dilate_h + 1)) * (jcp.dilate_h + 1), 0);
+        const int wh = div_up(i_t_overflow, (jcp.dilate_h + 1));
+        const int kh_padding = jcp.kh - div_up(i_t_overflow, (jcp.dilate_h + 1)) - div_up(i_b_overflow, (jcp.dilate_h + 1));
+
+        const size_t _oc = oc;
+        const size_t _ic = g * jcp.nb_ic + icb;
+
+        par_conv.src = &src[src_blk_off(src_d, n, _ic, id, ih, 0)];
+        par_conv.dst = &dst[src_blk_off(dst_d, n, _oc, od, oh, 0)];
+        par_conv.filt = &weights[wht_blk_off(weights_d, g, _oc, _ic, wd, wh, 0)];
+
+        if (icb == 0) {
+            if (bias)
+                par_conv.bias = &bias[bias_d.blk_off(_oc)];
+            par_conv.flags |= FLAG_IC_FIRST;
+        }
+
+        if (icb + 1 == jcp.nb_ic) {
+            par_conv.flags |= FLAG_IC_LAST;
+        }
+
+        par_conv.oc_off = _oc * sizeof(float);
+        par_conv.oh_blocks = (size_t)oh_blocks;
+
+        par_conv.kh_padding = (size_t)nstl::max(0, kh_padding);
+        par_conv.kd_padding = (size_t)nstl::max(0, kd_padding);
+
+        return par_conv;
+    };
+
+    auto ker = [&](const int ithr, const int nthr) {
+        int g = 0;
+        int oc = 0;
+
+        for (int n = 0; n < MB; n++) {
+            int icbb = 0;
+            while (icbb < jcp.nb_ic) {
+                int icb_step = jcp.nb_ic_blocking;
+                int icb_step_rem = jcp.nb_ic - icbb;
+                if (icb_step_rem < jcp.nb_ic_blocking_max)
+                    icb_step = icb_step_rem;
+
+                for (int icb = icbb; icb < icbb + icb_step; ++icb) {
+                    for (int ohb = 0; ohb < (jcp.dilate_h + 1); ohb++) {
+                        for (int oh = ohb; oh < jcp.oh; oh += (jcp.dilate_h + 1)) {
+                            int od_idx_off = ithr * odb_size;
+                            for (int od_idx = 0; od_idx < odb_size; od_idx++) {
+                                if ((od_idx_off + od_idx) >= jcp.od || od_indexes[od_idx_off + od_idx] >= jcp.od)
+                                    continue;
+                                int od = od_indexes[od_idx_off + od_idx];
+
+                                const int dj = od * jcp.stride_d;
+                                const int d_t_overflow = nstl::max(0, jcp.f_pad - dj);
+                                const int d_b_overflow =
+                                        nstl::max(jcp.id, dj + (jcp.kd - 1) * (jcp.dilate_d + 1) - jcp.f_pad + 1) -
+                                        jcp.id;
+                                const int id = nstl::max(dj - jcp.f_pad +
+                                                         div_up(d_t_overflow, (jcp.dilate_d + 1)) * (jcp.dilate_d + 1),
+                                                         0);
+                                const int wd = div_up(d_t_overflow, (jcp.dilate_d + 1));
+                                const int kd_padding = jcp.kd - div_up(d_t_overflow, (jcp.dilate_d + 1)) -
+                                                       div_up(d_b_overflow, (jcp.dilate_d + 1));
+
+                                jit_conv_call_s par_conv = kernel_params(n, g, icb, oc, od, oh, 1, id, wd, kd_padding);
+
+                                kernel_->jit_ker(&par_conv);
+                            }
+                        }
+                    }
+                }
+                icbb += icb_step;
+            }
+        }
+    };
+
+    parallel(0, ker);
+}
+
+
+template struct _jit_uni_planar_convolution_fwd_t<avx512_common>;
+template struct _jit_uni_planar_convolution_fwd_t<avx2>;
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_convolution.hpp
new file mode 100644
index 000000000..007ebb88e
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_planar_convolution.hpp
@@ -0,0 +1,119 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef CPU_JIT_UNI_PLANAR_CONVOLUTION_HPP
+#define CPU_JIT_UNI_PLANAR_CONVOLUTION_HPP
+
+#include "c_types_map.hpp"
+#include "cpu_convolution_pd.hpp"
+#include "cpu_engine.hpp"
+#include "cpu_reducer.hpp"
+#include "jit_primitive_conf.hpp"
+#include "jit_uni_planar_conv_kernel_f32.hpp"
+#include "mkldnn_thread.hpp"
+#include "jit_uni_depthwise.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <cpu_isa_t isa>
+struct _jit_uni_planar_convolution_fwd_t: public cpu_primitive_t {
+    struct pd_t: public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
+                const primitive_attr_t *attr,
+                const typename pd_t::base_class *hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+            , jcp_() {}
+
+        DECLARE_COMMON_PD_T(
+                JIT_IMPL_NAME_HELPER("jit_planar:", isa, ""),
+                _jit_uni_planar_convolution_fwd_t<isa>);
+
+        virtual status_t init() override {
+            using namespace prop_kind;
+            assert(this->engine()->kind() == engine_kind::cpu);
+            bool ok = true
+                && this->set_default_params() == status::success
+                && utils::one_of(this->desc()->prop_kind, forward_training,
+                        forward_inference)
+                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && !this->has_zero_dim_memory()
+                && utils::everyone_is(data_type::f32,
+                        this->desc()->src_desc.data_type,
+                        this->desc()->weights_desc.data_type,
+                        this->desc()->dst_desc.data_type)
+                && IMPLICATION(this->with_bias(),
+                        data_type::f32 == this->desc()->bias_desc.data_type);
+            if (!ok) return status::unimplemented;
+
+            status_t sts = jit_uni_planar_conv_fwd_kernel_f32<isa>::init_conf(jcp_, *this->desc(),
+                    *this->src_pd_.desc(), *this->weights_pd_.desc(),
+                    *this->dst_pd_.desc(), *this->attr());
+
+            return sts;
+        }
+
+        jit_conv_conf_t jcp_;
+
+    protected:
+        virtual status_t set_default_params() override {
+            using namespace memory_format;
+
+            if (this->src_pd_.desc()->format == any)
+                CHECK(this->src_pd_.set_format(this->ndims() == 4 ? nchw : ncdhw));
+            if (this->dst_pd_.desc()->format == any)
+                CHECK(this->dst_pd_.set_format(this->ndims() == 4 ? nchw : ncdhw));
+            if (this->weights_pd_.desc()->format == any)
+                CHECK(this->weights_pd_.set_format(this->ndims() == 4 ? oihw : oidhw));
+            if (this->bias_pd_.desc()->format == any)
+                CHECK(this->bias_pd_.set_format(x));
+            return status::success;
+        }
+    };
+
+    _jit_uni_planar_convolution_fwd_t(const pd_t *apd,
+                                      const input_vector &inputs, const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {
+        kernel_ = new jit_uni_planar_conv_fwd_kernel_f32<isa>(pd()->jcp_, *pd()->attr());
+    }
+
+    ~_jit_uni_planar_convolution_fwd_t() {
+        delete kernel_;
+    };
+
+    typedef typename prec_traits<data_type::f32>::type data_t;
+
+    virtual void execute(event_t *e) const {
+        execute_forward();
+        e->set_state(event_t::ready);
+    }
+
+private:
+    void execute_forward() const;
+
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+    jit_uni_planar_conv_fwd_kernel_f32<isa> *kernel_;
+};
+
+using jit_avx512_common_planar_convolution_fwd_t = _jit_uni_planar_convolution_fwd_t<avx512_common>;
+using jit_avx2_planar_convolution_fwd_t = _jit_uni_planar_convolution_fwd_t<avx2>;
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_pooling.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_pooling.cpp
index 8e2a03ef4..d85f338ad 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_pooling.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_pooling.cpp
@@ -26,20 +26,20 @@ namespace impl {
 namespace cpu {
 
 template <cpu_isa_t isa>
-void jit_uni_pooling_fwd_t<isa>::execute_forward() {
+void jit_uni_pooling_fwd_t<isa>::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
-    auto indices = conf_.desc()->alg_kind == alg_kind::pooling_max ?
+    auto indices = pd()->desc()->alg_kind == alg_kind::pooling_max ?
         reinterpret_cast<unsigned char *>(this->memory(1)) : nullptr;
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper indices_d(conf_.workspace_pd());
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper indices_d(pd()->workspace_pd());
     const size_t ind_dt_size = indices
         ? types::data_type_size(indices_d.data_type()) : 0;
 
-    const auto &jpp = conf_.jpp_;
-    int mb = conf_.MB();
+    const auto &jpp = pd()->jpp_;
+    int mb = pd()->MB();
 
     auto ker = [&](int n, int b_c, int oh) {
         auto arg = jit_pool_call_s();
@@ -59,7 +59,7 @@ void jit_uni_pooling_fwd_t<isa>::execute_forward() {
         arg.kh_padding = jpp.kh - i_t_overflow - i_b_overflow;
         arg.kh_padding_shift = i_t_overflow*jpp.kw;
         arg.kw_padding = 0;
-        arg.ker_area_h = conf_.desc()->alg_kind == alg_kind::pooling_avg_exclude_padding
+        arg.ker_area_h = pd()->desc()->alg_kind == alg_kind::pooling_avg_exclude_padding
              ?  (float)(jpp.kh - nstl::max(0, oh*jpp.stride_h - jpp.t_pad + jpp.kh - jpp.ih) -
                 nstl::max(0, jpp.t_pad - oh*jpp.stride_h))
              :  (float)(jpp.kh - nstl::max(0, oh*jpp.stride_h - jpp.t_pad + jpp.kh - jpp.ih - jpp.b_pad));
@@ -74,20 +74,20 @@ void jit_uni_pooling_fwd_t<isa>::execute_forward() {
 }
 
 template <cpu_isa_t isa>
-void jit_uni_pooling_fwd_t<isa>::execute_forward_3d() {
+void jit_uni_pooling_fwd_t<isa>::execute_forward_3d() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
-    auto indices = conf_.desc()->alg_kind == alg_kind::pooling_max ?
+    auto indices = pd()->desc()->alg_kind == alg_kind::pooling_max ?
         reinterpret_cast<unsigned char *>(this->memory(1)) : nullptr;
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper indices_d(conf_.workspace_pd());
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper indices_d(pd()->workspace_pd());
     const size_t ind_dt_size = indices
         ? types::data_type_size(indices_d.data_type()) : 0;
 
-    const auto &jpp = conf_.jpp_;
-    int mb = conf_.MB();
+    const auto &jpp = pd()->jpp_;
+    int mb = pd()->MB();
 
     auto ker = [&](int n, int b_c, int od, int oh, int id, int d_t_overflow,
             int d_b_overflow) {
@@ -135,20 +135,20 @@ void jit_uni_pooling_fwd_t<isa>::execute_forward_3d() {
 
 
 template <cpu_isa_t isa>
-void jit_uni_pooling_bwd_t<isa>::execute_backward() {
+void jit_uni_pooling_bwd_t<isa>::execute_backward() const {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_src = reinterpret_cast<data_t*>(this->memory(0));
-    auto indices = conf_.desc()->alg_kind == alg_kind::pooling_max ?
+    auto indices = pd()->desc()->alg_kind == alg_kind::pooling_max ?
         reinterpret_cast<const char*>(this->input_memory(1)) : nullptr;
 
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper indices_d(conf_.workspace_pd());
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper indices_d(pd()->workspace_pd());
     const size_t ind_dt_size = indices
         ? types::data_type_size(indices_d.data_type()) : 0;
 
-    const auto &jpp = conf_.jpp_;
-    int mb = conf_.MB();
+    const auto &jpp = pd()->jpp_;
+    int mb = pd()->MB();
 
     auto ker = [&](int n, int b_c, int oh) {
         auto arg = jit_pool_call_s();
@@ -183,20 +183,20 @@ void jit_uni_pooling_bwd_t<isa>::execute_backward() {
 }
 
 template <cpu_isa_t isa>
-void jit_uni_pooling_bwd_t<isa>::execute_backward_3d() {
+void jit_uni_pooling_bwd_t<isa>::execute_backward_3d() const {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_src = reinterpret_cast<data_t*>(this->memory(0));
-    auto indices = conf_.desc()->alg_kind == alg_kind::pooling_max ?
+    auto indices = pd()->desc()->alg_kind == alg_kind::pooling_max ?
         reinterpret_cast<const char*>(this->input_memory(1)) : nullptr;
 
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper indices_d(conf_.workspace_pd());
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper indices_d(pd()->workspace_pd());
     const size_t ind_dt_size = indices
         ? types::data_type_size(indices_d.data_type()) : 0;
 
-    const auto &jpp = conf_.jpp_;
-    int mb = conf_.MB();
+    const auto &jpp = pd()->jpp_;
+    int mb = pd()->MB();
 
     auto ker = [&](int n, int b_c, int od, int oh, int id, int d_t_overflow,
             int d_b_overflow, int zero_size, int kd) {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_pooling.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_pooling.hpp
index 520ab1299..25d3d79fa 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_pooling.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_pooling.hpp
@@ -91,25 +91,25 @@ struct jit_uni_pooling_fwd_t: public cpu_primitive_t {
         }
     };
 
-    jit_uni_pooling_fwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_uni_pooling_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-    { kernel_ = new jit_uni_pool_kernel_f32<isa>(conf_.jpp_); }
+        : cpu_primitive_t(apd, inputs, outputs)
+    { kernel_ = new jit_uni_pool_kernel_f32<isa>(pd()->jpp_); }
 
     ~jit_uni_pooling_fwd_t() { delete kernel_; }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        if (conf_.jpp_.ndims == 5) execute_forward_3d();
+    virtual void execute(event_t *e) const {
+        if (pd()->jpp_.ndims == 5) execute_forward_3d();
         else execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    void execute_forward_3d();
-    pd_t conf_;
+    void execute_forward() const;
+    void execute_forward_3d() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_uni_pool_kernel_f32<isa> *kernel_;
 };
 
@@ -175,25 +175,25 @@ struct jit_uni_pooling_bwd_t: public cpu_primitive_t {
         }
     };
 
-    jit_uni_pooling_bwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_uni_pooling_bwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-    { kernel_ = new jit_uni_pool_kernel_f32<isa>(conf_.jpp_); }
+        : cpu_primitive_t(apd, inputs, outputs)
+    { kernel_ = new jit_uni_pool_kernel_f32<isa>(pd()->jpp_); }
 
     ~jit_uni_pooling_bwd_t() { delete kernel_; }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        if (conf_.jpp_.ndims == 5) execute_backward_3d();
+    virtual void execute(event_t *e) const {
+        if (pd()->jpp_.ndims == 5) execute_backward_3d();
         else execute_backward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward();
-    void execute_backward_3d();
-    pd_t conf_;
+    void execute_backward() const;
+    void execute_backward_3d() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_uni_pool_kernel_f32<isa> *kernel_;
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_reorder.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_reorder.cpp
index 81677ba68..7afc3fb06 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_reorder.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_reorder.cpp
@@ -116,7 +116,7 @@ struct jit_uni_reorder_kernel_f32: public kernel_t, public jit_generator {
             && simple_impl_desc_init(p, nullptr)
             && mayiuse(sse42)
             && IMPLICATION(!utils::everyone_is(f32, p.itype, p.otype),
-                    mayiuse(avx512_core));
+                    mayiuse(avx));
         if (!ok) return false;
 
         const ptrdiff_t max_stride = (1LL<<31) - 1;
@@ -306,14 +306,26 @@ struct jit_uni_reorder_kernel_f32: public kernel_t, public jit_generator {
                 break;
             case s8:
                 if (idt == f32) vcvtps2dq(xmm, xmm);
-                if (idt == f32 || idt == s32) vpmovsdb(xmm, xmm);
-                if (idt == u8) vpminub(xmm, xmm, xmm_127b);
+                if (idt == f32 || idt == s32) {
+                    if (mayiuse(avx512_core)) {
+                        vpmovsdb(xmm, xmm);
+                    } else {
+                        vpackssdw(xmm, xmm, xmm_zero);
+                        vpacksswb(xmm, xmm, xmm_zero);
+                    }
+                }
+                if (idt == u8) vpminub(xmm, xmm, xmm_4x127b);
                 break;
             case u8:
                 if (idt == f32) vcvtps2dq(xmm, xmm);
                 if (idt == f32 || idt == s32) {
-                    vpmaxsd(xmm, xmm, xmm_zero);
-                    vpmovusdb(xmm, xmm);
+                    if (mayiuse(avx512_core)) {
+                        vpmaxsd(xmm, xmm, xmm_zero);
+                        vpmovusdb(xmm, xmm);
+                    } else {
+                        vpackssdw(xmm, xmm, xmm_zero);
+                        vpackuswb(xmm, xmm, xmm_zero);
+                    }
                 }
                 if (idt == s8) vpmaxsb(xmm, xmm, xmm_zero);
                 break;
@@ -495,7 +507,13 @@ struct jit_uni_reorder_kernel_f32: public kernel_t, public jit_generator {
                     if (prb_.otype == f32) {
                         addss(Xmm(ur), o_addr(o_off[ur]));
                     } else {
-                        vmovss(xmm_tmp, o_addr(o_off[ur]));
+                        if (prb_.otype == s32) {
+                            vmovss(xmm_tmp, o_addr(o_off[ur]));
+                        } else if (utils::one_of(prb_.otype, s8, u8)) {
+                            pinsrb(xmm_tmp, o_addr(o_off[ur]), 0x0);
+                        } else {
+                            assert(!"unsupported o_type");
+                        }
                         cvt2ps(xmm_tmp, xmm_tmp, prb_.otype);
                         addps(Xmm(ur), xmm_tmp);
                     }
@@ -631,13 +649,12 @@ struct jit_uni_reorder_kernel_f32: public kernel_t, public jit_generator {
         mov(reg_ptr_out, PARAM(out));
 #       undef PARAM
 
-        if (mayiuse(avx512_core)) {
+        if (mayiuse(avx)) {
             vxorps(xmm_zero, xmm_zero, xmm_zero);
 
             if (prb_.itype == data_type::u8 && prb_.otype == data_type::s8) {
                 mov(reg_tmp.cvt32(), 0x7f7f7f7f);
-                movd(xmm_127b, reg_tmp.cvt32());
-                vbroadcastss(xmm_127b, xmm_127b);
+                movd(xmm_4x127b, reg_tmp.cvt32());
             }
         }
 
@@ -663,7 +680,7 @@ private:
 
     Xmm xmm_scale = xmm15;
     Xmm xmm_zero = xmm14;
-    Xmm xmm_127b = xmm13; // TODO: unite with xmm_zero
+    Xmm xmm_4x127b = xmm13; // TODO: unite with xmm_zero
     Xmm xmm_tmp = xmm12;
 };
 
@@ -825,6 +842,12 @@ struct jit_uni_reorder_t : public cpu_primitive_t {
 
             auto prb = tr::prb_t();
 
+            if (imd->format == mkldnn_OhIw8o4i || imd->format == mkldnn_gOhIw8o4i ||
+                imd->format == mkldnn_OhIw8o4i_s8s8 || imd->format == mkldnn_gOhIw8o4i_s8s8 ||
+                omd->format == mkldnn_OhIw8o4i || omd->format == mkldnn_gOhIw8o4i ||
+                omd->format == mkldnn_OhIw8o4i_s8s8 || omd->format == mkldnn_gOhIw8o4i_s8s8)
+                return status::unimplemented;
+
             status_t prb_init_status = prb_init(prb, *imd, *omd, attr);
             if (prb_init_status != success) return prb_init_status;
 
@@ -863,97 +886,98 @@ struct jit_uni_reorder_t : public cpu_primitive_t {
         tr::kernel_t::desc_t ker_desc_;
     };
 
-    jit_uni_reorder_t(const pd_t *pd, const input_vector &inputs,
+    jit_uni_reorder_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {
-        kernel_ = tr::kernel_t::create(conf_.ker_desc_);
+        : cpu_primitive_t(apd, inputs, outputs) {
+        kernel_ = tr::kernel_t::create(pd()->ker_desc_);
         assert(kernel_);
     }
     ~jit_uni_reorder_t() { delete kernel_; }
 
-    void omp_driver_0d(int off, const char *in, char *out, const float *scale) {
+    void omp_driver_0d(int off, const char *in, char *out,
+            const float *scale) const {
         tr::call_param_t c{in, out, scale};
         (*kernel_)(&c);
     }
 
     void omp_driver_1d(int ithr, int nthr, int off, const char *in, char *out,
-            const float *scale) {
-        tr::node_t *ns = conf_.prb_.nodes + off;
+            const float *scale) const {
+        const tr::node_t *ns = pd()->prb_.nodes + off;
         for_nd(ithr, nthr, (ptrdiff_t)ns[0].n, [&](ptrdiff_t d0) {
             auto c = tr::call_param_t();
-            c.in = in + d0 * ns[0].is * data_type_size(conf_.prb_.itype);
-            c.out = out + d0 * ns[0].os * data_type_size(conf_.prb_.otype);
+            c.in = in + d0 * ns[0].is * data_type_size(pd()->prb_.itype);
+            c.out = out + d0 * ns[0].os * data_type_size(pd()->prb_.otype);
             c.scale = scale + d0 * ns[0].ss;
             (*kernel_)(&c);
         });
     }
 
     void omp_driver_2d(int ithr, int nthr, int off, const char *in, char *out,
-            const float *scale) {
-        tr::node_t *ns = conf_.prb_.nodes + off;
+            const float *scale) const {
+        const tr::node_t *ns = pd()->prb_.nodes + off;
         for_nd(ithr, nthr, (ptrdiff_t)ns[1].n, (ptrdiff_t)ns[0].n,
                 [&](ptrdiff_t d1, ptrdiff_t d0) {
             auto c = tr::call_param_t();
             c.in = in + (d0 * ns[0].is + d1 * ns[1].is)
-                * data_type_size(conf_.prb_.itype);
+                * data_type_size(pd()->prb_.itype);
             c.out = out + (d0 * ns[0].os + d1 * ns[1].os)
-                * data_type_size(conf_.prb_.otype);
+                * data_type_size(pd()->prb_.otype);
             c.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss;
             (*kernel_)(&c);
         });
     }
 
     void omp_driver_3d(int ithr, int nthr, int off, const char *in, char *out,
-            const float *scale) {
-        tr::node_t *ns = conf_.prb_.nodes + off;
+            const float *scale) const {
+        const tr::node_t *ns = pd()->prb_.nodes + off;
         for_nd(ithr, nthr, (ptrdiff_t)ns[2].n, (ptrdiff_t)ns[1].n,
                 (ptrdiff_t)ns[0].n,
                 [&](ptrdiff_t d2, ptrdiff_t d1, ptrdiff_t d0) {
             auto c = tr::call_param_t();
             c.in = in + (d0 * ns[0].is + d1 * ns[1].is + d2 * ns[2].is)
-                * data_type_size(conf_.prb_.itype);
+                * data_type_size(pd()->prb_.itype);
             c.out = out + (d0 * ns[0].os + d1 * ns[1].os + d2 * ns[2].os)
-                * data_type_size(conf_.prb_.otype);
+                * data_type_size(pd()->prb_.otype);
             c.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss + d2 * ns[2].ss;
             (*kernel_)(&c);
         });
     }
 
     void omp_driver_4d(int ithr, int nthr, int off, const char *in, char *out,
-            const float *scale) {
-        tr::node_t *ns = conf_.prb_.nodes + off;
+            const float *scale) const {
+        const tr::node_t *ns = pd()->prb_.nodes + off;
         for_nd(ithr, nthr, (ptrdiff_t)ns[3].n, (ptrdiff_t)ns[2].n,
                 (ptrdiff_t)ns[1].n, (ptrdiff_t)ns[0].n,
                 [&](ptrdiff_t d3, ptrdiff_t d2, ptrdiff_t d1, ptrdiff_t d0) {
             auto c = tr::call_param_t();
             c.in = in + (d0 * ns[0].is + d1 * ns[1].is + d2 * ns[2].is
-                    + d3 * ns[3].is) * data_type_size(conf_.prb_.itype);
+                    + d3 * ns[3].is) * data_type_size(pd()->prb_.itype);
             c.out = out + (d0 * ns[0].os + d1 * ns[1].os + d2 * ns[2].os
-                    + d3 * ns[3].os) * data_type_size(conf_.prb_.otype);
+                    + d3 * ns[3].os) * data_type_size(pd()->prb_.otype);
             c.scale = scale + d0 * ns[0].ss + d1 * ns[1].ss + d2 * ns[2].ss
                 + d3 * ns[3].ss;
             (*kernel_)(&c);
         });
     }
 
-    void omp_driver(const char *in, char *out, const float *scale) {
-        in += conf_.prb_.ioff * data_type_size(conf_.prb_.itype);
-        out += conf_.prb_.ooff * data_type_size(conf_.prb_.otype);
+    void omp_driver(const char *in, char *out, const float *scale) const {
+        in += pd()->prb_.ioff * data_type_size(pd()->prb_.itype);
+        out += pd()->prb_.ooff * data_type_size(pd()->prb_.otype);
 
-        DEBUG({ printf("prb : "); tr::prb_dump(conf_.prb_); });
-        DEBUG({ printf("ker : "); tr::prb_dump(conf_.ker_desc_.prb); });
+        DEBUG({ printf("prb : "); tr::prb_dump(pd()->prb_); });
+        DEBUG({ printf("ker : "); tr::prb_dump(pd()->ker_desc_.prb); });
 
-        int ndims = conf_.prb_.ndims;
-        int ndims_ker = conf_.ker_desc_.prb.ndims;
+        int ndims = pd()->prb_.ndims;
+        int ndims_ker = pd()->ker_desc_.prb.ndims;
         assert(ndims - ndims_ker <= ndims_driver_max);
 
         if (ndims - ndims_ker == 0) {
-            set_rnd_mode(conf_.attr()->round_mode_);
+            set_rnd_mode(pd()->attr()->round_mode_);
             omp_driver_0d(ndims_ker, in, out, scale);
             restore_rnd_mode();
         } else {
             parallel(0, [&](const int ithr, const int nthr) {
-                set_rnd_mode(conf_.attr()->round_mode_);
+                set_rnd_mode(pd()->attr()->round_mode_);
                 switch (ndims - ndims_ker) {
                 case 1: omp_driver_1d(ithr, nthr, ndims_ker, in, out, scale); break;
                 case 2: omp_driver_2d(ithr, nthr, ndims_ker, in, out, scale); break;
@@ -966,11 +990,11 @@ struct jit_uni_reorder_t : public cpu_primitive_t {
         }
     }
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         auto in = reinterpret_cast<const char *>(input_memory(0));
         auto out = reinterpret_cast<char *>(memory());
 
-        omp_driver(in, out, conf_.attr()->output_scales_.scales_);
+        omp_driver(in, out, pd()->attr()->output_scales_.scales_);
 
         e->set_state(event_t::ready);
     }
@@ -978,7 +1002,7 @@ struct jit_uni_reorder_t : public cpu_primitive_t {
     enum { ndims_driver_max = 4 };
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     tr::kernel_t *kernel_;
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_reorder_utils.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_reorder_utils.cpp
index cb9c1d116..cf193c8b1 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_reorder_utils.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_reorder_utils.cpp
@@ -69,8 +69,11 @@ status_t cvt_mem_desc_to_layout_desc(const memory_desc_t &md_,
     case memory_format::any:
     case hwio_s8s8:
     case hwigo_s8s8:
+    case gOIhw4o4i_s8s8:
+    case gOIhw2i8o4i_s8s8:
     case gOIhw4i16o4i_s8s8:
     case OIhw4i16o4i_s8s8:
+    case Goihw16g_s8s8:
     case wino_fmt:
         return invalid_arguments;
     case OIhw4i16o4i:
@@ -107,6 +110,16 @@ status_t cvt_mem_desc_to_layout_desc(const memory_desc_t &md_,
         if (md.format() == OIhw8o16i2o)
             P(3, bd.padding_dims[3], bd.strides[0][3]);
         return success;
+    case gOIhw2i8o4i:
+        P(0, bd.padding_dims[0], bd.strides[0][0]);
+        P(1, bd.padding_dims[1] / 8, bd.strides[0][1]);
+        P(1, 8, 4);
+        P(2, bd.padding_dims[2] / 8, bd.strides[0][2]);
+        P(2, 2, 8*4);
+        P(2, 4, 1);
+        P(3, bd.padding_dims[3], bd.strides[0][3]);
+        P(4, bd.padding_dims[4], bd.strides[0][4]);
+        return success;
     case gOIhw4i16o4i:
         P(0, bd.padding_dims[0], bd.strides[0][0]);
         P(1, bd.padding_dims[1] / 16, bd.strides[0][1]);
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_roi_pooling.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_roi_pooling.cpp
index 08a129a5b..8ac889b13 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_roi_pooling.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_roi_pooling.cpp
@@ -28,16 +28,16 @@ namespace impl {
 namespace cpu {
 
 template <cpu_isa_t isa>
-void jit_uni_roi_pooling_fwd_t<isa>::execute_forward() {
+void jit_uni_roi_pooling_fwd_t<isa>::execute_forward() const {
     auto src_data = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto src_roi = reinterpret_cast<const data_t*>(this->input_memory(1));
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
 
-    const memory_desc_wrapper src_d(conf_.src_pd(0));
-    const memory_desc_wrapper src_roi_d(conf_.src_pd(1));
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
+    const memory_desc_wrapper src_d(pd()->src_pd(0));
+    const memory_desc_wrapper src_roi_d(pd()->src_pd(1));
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
 
-    const auto &jpp = conf_.jpp_;
+    const auto &jpp = pd()->jpp_;
 
     int cb_work = utils::div_up(jpp.nb_c, jpp.nb_c_blocking);
     int MB = jpp.mb;
@@ -68,7 +68,7 @@ void jit_uni_roi_pooling_fwd_t<isa>::execute_forward() {
         utils::nd_iterator_init(start, n, MB, cbb, cb_work, oh, jpp.oh, ow, jpp.ow);
 
         for (int iwork = start; iwork < end; iwork++) {
-            jit_roi_pool_call_s arg = {};
+            auto arg = jit_roi_pool_call_s();
 
             int cb = cbb * jpp.nb_c_blocking;
             int cb_num = jpp.nb_c_blocking;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_roi_pooling.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_roi_pooling.hpp
index ca7dd2ed8..e0325d363 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_roi_pooling.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_roi_pooling.hpp
@@ -82,23 +82,23 @@ struct jit_uni_roi_pooling_fwd_t: public cpu_primitive_t {
         }
     };
 
-    jit_uni_roi_pooling_fwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_uni_roi_pooling_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-    { kernel_ = new jit_uni_roi_pool_kernel_f32<isa>(conf_.jpp_); }
+        : cpu_primitive_t(apd, inputs, outputs)
+    { kernel_ = new jit_uni_roi_pool_kernel_f32<isa>(pd()->jpp_); }
 
     ~jit_uni_roi_pooling_fwd_t() { delete kernel_; }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_uni_roi_pool_kernel_f32<isa> *kernel_;
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_softmax.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_softmax.cpp
index 8d402695f..32d2139d1 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_softmax.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_softmax.cpp
@@ -33,11 +33,11 @@ using namespace mkldnn::impl::memory_format;
 using namespace mkldnn::impl::utils;
 
 template <cpu_isa_t isa>
-jit_uni_softmax_fwd_t<isa>::jit_uni_softmax_fwd_t(const pd_t *pd,
+jit_uni_softmax_fwd_t<isa>::jit_uni_softmax_fwd_t(const pd_t *apd,
         const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
+        : cpu_primitive_t(apd, inputs, outputs)
 {
-    kernel_ = new jit_uni_softmax_kernel_f32<isa>(conf_.jpp_);
+    kernel_ = new jit_uni_softmax_kernel_f32<isa>(pd()->jpp_);
 }
 
 template <cpu_isa_t isa>
@@ -46,16 +46,16 @@ jit_uni_softmax_fwd_t<isa>::~jit_uni_softmax_fwd_t() {
 }
 
 template <cpu_isa_t isa>
-void jit_uni_softmax_fwd_t<isa>::execute_forward()
+void jit_uni_softmax_fwd_t<isa>::execute_forward() const
 {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t *>(this->memory(0));
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
 
-    const auto &jpp = conf_.jpp_;
+    const auto &jpp = pd()->jpp_;
 
-    size_t outer_size = utils::array_product(conf_.src_pd()->desc()->dims, conf_.desc()->softmax_axis);
+    size_t outer_size = utils::array_product(pd()->src_pd()->desc()->dims, pd()->desc()->softmax_axis);
 
     size_t dim = jpp.channels * jpp.inner_size;
 
@@ -70,7 +70,7 @@ void jit_uni_softmax_fwd_t<isa>::execute_forward()
             nd_iterator_init(start, ou, outer_size);
 
             for (size_t iwork = start; iwork < end; ++iwork) {
-                jit_softmax_call_s args{};
+                auto args = jit_softmax_call_s();
                 args.channels = jpp.channels;
                 args.work = jpp.inner_size;
                 size_t off = data_d.off_l(ou * dim);
@@ -99,7 +99,7 @@ void jit_uni_softmax_fwd_t<isa>::execute_forward()
             for (size_t iwork = start; iwork < end; ++iwork) {
                 size_t work = nstl::min(jpp.outer_block, outer_size - oub * jpp.outer_block);
 
-                jit_softmax_call_s args{};
+                auto args = jit_softmax_call_s();
                 args.channels = jpp.channels;
                 args.work = work;
                 size_t off = data_d.off_l(oub * jpp.outer_block * dim);
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_softmax.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_softmax.hpp
index 24f4f4826..19d61ebbf 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_softmax.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_softmax.hpp
@@ -76,20 +76,20 @@ struct jit_uni_softmax_fwd_t : public cpu_primitive_t {
         jit_softmax_conf_t jpp_;
     };
 
-    jit_uni_softmax_fwd_t(const pd_t *pd, const input_vector &inputs,
+    jit_uni_softmax_fwd_t(const pd_t *apd, const input_vector &inputs,
                        const output_vector &outputs);
     ~jit_uni_softmax_fwd_t();
 
     using data_t = prec_traits<data_type::f32>::type;
 
-    virtual void execute(event_t *e) override {
+    virtual void execute(event_t *e) const override {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_uni_softmax_kernel_f32<isa> *kernel_;
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_conv_kernel.cpp
deleted file mode 100644
index b3917d59c..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_conv_kernel.cpp
+++ /dev/null
@@ -1,507 +0,0 @@
-/*******************************************************************************
-* Copyright 2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#include "c_types_map.hpp"
-#include "nstl.hpp"
-#include "type_helpers.hpp"
-#include "utils.hpp"
-#include "cpu_memory.hpp"
-
-#include "jit_uni_x8s8s32x_1x1_conv_kernel.hpp"
-
-#define GET_OFF(field) offsetof(jit_1x1_conv_call_s, field)
-
-#include <iostream>
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-using namespace mkldnn::impl::prop_kind;
-using namespace mkldnn::impl::memory_format;
-using namespace mkldnn::impl::utils;
-using namespace mkldnn::impl::types;
-
-using namespace Xbyak;
-
-template <cpu_isa_t isa>
-void jit_uni_x8s8s32x_1x1_conv_fwd_kernel<isa>::cvt2ps(data_type_t type_in,
-        Vmm vmm_in, const Xbyak::Operand &op) {
-    switch (type_in) {
-    case data_type::f32:
-    case data_type::s32: vmovups(vmm_in, op); break;
-    case data_type::s8: vpmovsxbd(vmm_in, op); break;
-    case data_type::u8: vpmovzxbd(vmm_in, op); break;
-    default: assert(!"unsupported data type");
-    }
-    if (type_in != data_type::f32)
-        vcvtdq2ps(vmm_in, vmm_in);
-}
-
-template <cpu_isa_t isa>
-void jit_uni_x8s8s32x_1x1_conv_fwd_kernel<isa>::loop_os(int oc_loop_blk)
-{
-    mov(aux_reg_dst_data, reg_dst_data);
-
-    Label loop_os;
-    Label loop_ow_tail;
-
-    mov(reg_ow_loop_work, jcp.ow);
-
-    L(loop_os); {
-        assert(jcp.os_block == jcp.ur);
-        cmp(reg_ow_loop_work, jcp.ow_tail);
-        je(loop_ow_tail, T_NEAR);
-
-        ic_loop(oc_loop_blk, jcp.ur);
-
-        sub(reg_ow_loop_work, jcp.ur);
-
-        add(reg_src_data, jcp.os_loop_src_step);
-        add(aux_reg_dst_data, jcp.os_loop_dst_step);
-
-        sub(reg_loop_os_iter, jcp.os_block);
-        cmp(reg_loop_os_iter, jcp.os_block);
-        jge(loop_os, T_NEAR);
-
-        L(loop_ow_tail); {
-            if (jcp.ow_tail > 0) {
-                ic_loop(oc_loop_blk, jcp.ow_tail);
-            }
-
-            add(reg_src_data, jcp.os_loop_src_tail_step);
-            add(aux_reg_dst_data, jcp.os_loop_dst_tail_step);
-
-            mov(reg_ow_loop_work, jcp.ow);
-
-            sub(reg_loop_os_iter, jcp.ow_tail);
-            cmp(reg_loop_os_iter, 0);
-            jg(loop_os, T_NEAR);
-        }
-    }
-}
-
-template <cpu_isa_t isa>
-void jit_uni_x8s8s32x_1x1_conv_fwd_kernel<isa>::ic_loop(int oc_loop_blk, int ur)
-{
-    auto vreg_wei = [=](int i) {
-        return Vmm(ur * oc_loop_blk + i);
-    };
-
-    auto vreg_accum_vmm = [=](int i, int j) {
-        return Vmm(j * oc_loop_blk + i);
-    };
-
-    auto vreg_accum_xmm = [=](int i, int j) {
-        return Xmm(j * oc_loop_blk + i);
-    };
-
-    auto src_ptr = [=](int u, int j) {
-        size_t offt = j * jcp.ic * jcp.stride_w + u*jcp.ic_block;
-        return ptr[aux_reg_src_data + jcp.typesize_in * offt];
-    };
-
-    auto wei_ptr = [=](int u, int i) {
-        size_t offt = i*jcp.nb_ic*jcp.oc_block*jcp.ic_block + u*jcp.ic_block * jcp.oc_block;
-        return ptr[aux_reg_weight_data + offt * jcp.typesize_in];
-    };
-
-    auto output_ptr = [=](int i, int j) {
-        return ptr[aux_reg_dst_data + (i * jcp.oc_block + j * jcp.oc) *
-                                              jcp.typesize_out];
-    };
-
-    auto init = [&]() {
-        for (int i = 0; i < oc_loop_blk; ++i) {
-            for (int j = 0; j < ur; ++j) {
-                auto vmm_acc = vreg_accum_vmm(i, j);
-                uni_vpxor(vmm_acc, vmm_acc, vmm_acc);
-            }
-        }
-
-        for (int i = 0; i < oc_loop_blk; ++i)
-            uni_vmovdqu(vreg_wei(i), wei_ptr(0, i));
-
-        uni_vpbroadcastd(vreg_src, src_ptr(0, 0));
-    };
-
-    auto store = [=]() {
-        mov(reg_scales, ptr[this->param1 + GET_OFF(scales)]);
-        uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
-
-        for (int j = 0; j < ur; ++j)
-            for (int i = 0; i < oc_loop_blk; ++i) {
-                int b_off = i*jcp.oc_block;
-
-                if (jcp.with_bias) {
-                    switch (jcp.bia_dt) {
-                        case data_type::f32:
-                        case data_type::s32: vmovups(vmm_bias, ptr[reg_bias_data + b_off*jcp.typesize_bia]); break;
-                        case data_type::s8: vpmovsxbd(vmm_bias, ptr[reg_bias_data + b_off*jcp.typesize_bia]); break;
-                        case data_type::u8: vpmovzxbd(vmm_bias, ptr[reg_bias_data + b_off*jcp.typesize_bia]); break;
-                        default: assert(!"unsupported dst data type");
-                    }
-                }
-                if (jcp.bia_dt != data_type::f32)
-                    vcvtdq2ps(vmm_bias, vmm_bias);
-
-                Vmm vmm_dst = vreg_accum_vmm(i, j);
-                Xmm xmm_dst = vreg_accum_xmm(i, j);
-
-                vcvtdq2ps(vmm_dst, vmm_dst);
-
-                if (jcp.with_bias)
-                    vaddps(vmm_dst, vmm_dst, vmm_bias);
-
-                int s_off = jcp.is_oc_scale * (sizeof(float) * (i*jcp.oc_block));
-                vmulps(vmm_dst, vmm_dst, ptr[reg_scales + s_off]);
-
-                if (jcp.with_sum) {
-                    Ymm vmm_prev_dst = Ymm(12);
-                    cvt2ps(jcp.dst_dt, vmm_prev_dst, output_ptr(i, j));
-                    vaddps(vmm_dst, vmm_prev_dst);
-                }
-
-                if (maybe_relu(0))
-                    vmaxps(vmm_dst, vmm_zero, vmm_dst);
-
-                if (maybe_relu(1))
-                    vmaxps(vmm_dst, vmm_zero, vmm_dst);
-
-                if (jcp.dst_dt != data_type::f32) {
-                    if (attr_.round_mode_ == round_mode::nearest)
-                        if (isa == avx512_common) {
-                            vcvtps2dq(vmm_dst | T_rn_sae, vmm_dst);
-                        } else {
-                            vcvtps2dq(vmm_dst, vmm_dst);
-                        }
-                    else if (attr_.round_mode_ == round_mode::down) {
-                        if (isa == avx512_common) {
-                            vcvtps2dq(vmm_dst | T_rd_sae, vmm_dst);
-                        } else {
-                            vroundps(vmm_dst, vmm_dst, 1);
-                            vcvtps2dq(vmm_dst, vmm_dst);
-                        }
-                    } else
-                        assert(!"unimplemented");
-                }
-
-                switch (jcp.dst_dt) {
-                    case data_type::f32:
-                    case data_type::s32: vmovups(output_ptr(i, j), vmm_dst); break;
-                    case data_type::s8:
-                        if (isa == avx512_common) {
-                            vpmovsdb(xmm_dst, vmm_dst);
-                            vmovups(output_ptr(i, j), xmm_dst);
-                        } else if (isa == avx2) {
-                            Ymm ymm_dst = Ymm(vmm_dst.getIdx());
-
-                            vpackssdw(ymm_dst, ymm_dst, ymm_dst);
-                            vpermq(ymm_dst, ymm_dst, 0x08);
-                            vpacksswb(xmm_dst, xmm_dst, xmm_dst);
-                            vmovq(output_ptr(i, j), xmm_dst);
-                        }
-                        break;
-                    case data_type::u8:
-                        if (isa == avx512_common) {
-                            vpmovusdb(xmm_dst, vmm_dst);
-                            vmovups(output_ptr(i, j), xmm_dst);
-                        } else if (isa == avx2) {
-                            Ymm ymm_dst = Ymm(vmm_dst.getIdx());
-
-                            vpackusdw(ymm_dst, ymm_dst, ymm_dst);
-                            vpermq(ymm_dst, ymm_dst, 0x08);
-                            vpackuswb(xmm_dst, xmm_dst, xmm_dst);
-                            vmovq(output_ptr(i, j), xmm_dst);
-                        }
-                        break;
-                    default: assert(!"unknown dst_dt");
-                }
-            }
-    };
-
-    auto fma_block = [=]() {
-        for (int j = 0; j < ur; ++j) {
-            for (int i = 0; i < oc_loop_blk; i++) {
-                vpmaddubsw(vreg_sum_0, vreg_src, vreg_wei(i));
-                vpmaddwd(vreg_sum_0, vreg_sum_0, vmm_one);
-                vpaddd(vreg_accum_vmm(i, j), vreg_accum_vmm(i, j), vreg_sum_0);
-
-                if (j == ur - 1) {
-                    uni_vmovdqu(vreg_wei(i), wei_ptr(1, i));
-                }
-            }
-
-            if (j < ur - 1)
-                uni_vpbroadcastd(vreg_src, src_ptr(0, j + 1));
-        }
-
-        uni_vpbroadcastd(vreg_src, src_ptr(1, 0));
-    };
-
-    mov(aux_reg_weight_data, reg_weight_data);
-    mov(aux_reg_src_data, reg_src_data);
-
-    init();
-
-    Label ic_loop;
-    Label exit;
-
-    xor_(reg_loop_ic_iter, reg_loop_ic_iter);
-    L(ic_loop); {
-        cmp(reg_loop_ic_iter, jcp.nb_ic);
-        jge(exit, T_NEAR);
-
-        fma_block();
-
-        add(aux_reg_src_data, jcp.ic_block * jcp.typesize_in);
-        add(aux_reg_weight_data, jcp.ic_block * jcp.oc_block * jcp.typesize_in);
-        inc(reg_loop_ic_iter);
-        jmp(ic_loop, T_NEAR);
-    }
-
-    L(exit);
-
-    store();
-}
-
-template <cpu_isa_t isa>
-void jit_uni_x8s8s32x_1x1_conv_fwd_kernel<isa>::generate()
-{
-    preamble();
-
-    mov(reg_scratch, 0x1);
-    movq(xmm_one, reg_scratch);
-    vpbroadcastw(vmm_one, xmm_one);
-
-    mov(reg_weight_data, ptr[param1 + GET_OFF(oc_data)]);
-    mov(reg_dst_data,    ptr[param1 + GET_OFF(output_data)]);
-    if (jcp.with_bias) {
-        mov(reg_bias_data, ptr[param1 + GET_OFF(bias_data)]);
-    }
-
-    mov(reg_oc_loop_work, ptr[param1 + GET_OFF(oc_dim)]);
-    mov(reg_src_data, ptr[param1 + GET_OFF(is_data)]);
-    mov(reg_loop_os_iter,  ptr[param1 + GET_OFF(os_dim)]);
-
-    Label oc_blocks_tail_label;
-    Label exit_label;
-
-    int oc_blocks_tail = jcp.nb_oc % jcp.nb_oc_blocking;
-
-    cmp(reg_oc_loop_work, jcp.nb_oc_blocking);
-    jne(oc_blocks_tail ? oc_blocks_tail_label : exit_label, T_NEAR);
-
-    loop_os(jcp.nb_oc_blocking); // channel main loop
-    jmp(exit_label, T_NEAR);
-
-    if (oc_blocks_tail) {
-        L(oc_blocks_tail_label);
-
-        cmp(reg_oc_loop_work, oc_blocks_tail);
-        jne(exit_label, T_NEAR);
-
-        loop_os(oc_blocks_tail); // channel tail loop
-    }
-
-    L(exit_label);
-
-    postamble();
-}
-
-template <cpu_isa_t isa>
-bool jit_uni_x8s8s32x_1x1_conv_fwd_kernel<isa>::post_ops_ok(
-        jit_1x1_conv_conf_t &jcp, const primitive_attr_t &attr) {
-    const auto &p = attr.post_ops_;
-
-    auto is_relu = [&](int idx) { return p.entry_[idx].is_relu(); };
-    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
-
-    switch (p.len_) {
-        case 0: return true; // no post_ops
-        case 1: return !jcp.with_eltwise && (is_relu(0) || is_sum(0)); // sum OR relu
-        case 2: return !jcp.with_eltwise && (is_sum(0) && is_relu(1)); // sum->relu
-        default: return false;
-    }
-
-    return false;
-}
-
-template <cpu_isa_t isa>
-bool jit_uni_x8s8s32x_1x1_conv_fwd_kernel<isa>::maybe_relu(int position) {
-    using namespace primitive_kind;
-    const auto &p = attr_.post_ops_;
-
-    if (position == 0) {
-        /* relu before sum */
-        return false
-               || jcp.with_eltwise
-               || p.contain(eltwise, 0)
-               || (jcp.dst_dt == data_type::u8 && !p.contain(sum, 0));
-    } else if (position == 1) {
-        /* relu after sum */
-        const int sum_idx = p.contain(sum, 0)
-                            ? 0 : (p.contain(sum, 1) ? 1 : -1);
-        if (sum_idx == -1)
-            return false;
-
-        return false
-               || p.contain(eltwise, sum_idx + 1)
-               || jcp.dst_dt == data_type::u8;
-    }
-
-    return false;
-}
-
-template <cpu_isa_t isa>
-status_t jit_uni_x8s8s32x_1x1_conv_fwd_kernel<isa>::init_conf(jit_1x1_conv_conf_t &jcp,
-        const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
-        const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d,
-        const memory_desc_wrapper &bias_pd, const primitive_attr_t &attr,
-        bool with_relu, float relu_negative_slope)
-{
-    if (!mayiuse(isa)) return status::unimplemented;
-
-    const bool with_groups = weights_d.ndims() == src_d.ndims() + 1;
-
-    jcp.prop_kind = cd.prop_kind;
-
-    jcp.ngroups = with_groups ? weights_d.dims()[0] : 1;
-    jcp.mb = src_d.dims()[0];
-
-    jcp.oc = dst_d.dims()[1] / jcp.ngroups;
-    jcp.ic = src_d.dims()[1] / jcp.ngroups;
-
-    jcp.ih = src_d.dims()[2];
-    jcp.iw = src_d.dims()[3];
-    jcp.oh = dst_d.dims()[2];
-    jcp.ow = dst_d.dims()[3];
-
-    jcp.kh = weights_d.dims()[with_groups + 2];
-    jcp.kw = weights_d.dims()[with_groups + 3];
-
-    jcp.t_pad = cd.padding[0][0];
-    jcp.l_pad = cd.padding[0][1];
-
-    jcp.stride_h = cd.strides[0];
-    jcp.stride_w = cd.strides[1];
-
-    jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.bia_dt = jcp.with_bias ? cd.bias_desc.data_type : data_type::undef;
-    jcp.dst_dt = cd.dst_desc.data_type;
-
-    jcp.src_fmt = src_d.format();
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
-
-    jcp.os = jcp.oh * jcp.ow;
-    jcp.is = jcp.ih * jcp.iw;
-
-    auto desired_wei_fmt = OhIw8o4i;
-    auto desired_gr_wei_fmt = gOhIw8o4i;
-
-    int simd_w = isa == avx512_common ? 16 : 8;
-
-    bool args_ok = true
-        && jcp.ngroups == 1
-        && src_d.format() == nhwc
-        && one_of(weights_d.format(), desired_wei_fmt, desired_gr_wei_fmt)
-        && one_of(cd.bias_desc.format, memory_format::undef, any, x)
-        && dst_d.format() == nhwc
-        && jcp.oc % simd_w == 0 && jcp.ic % simd_w == 0
-        && jcp.t_pad == 0 && jcp.l_pad == 0
-        && jcp.kh == 1 && jcp.kw == 1
-        && jcp.stride_h == 1 && jcp.stride_w == 1;
-
-    if (!args_ok) return status::unimplemented;
-
-    jcp.ic_block = 4;
-    jcp.oc_block = simd_w;
-
-    jcp.ur = 2;
-    jcp.ow_tail = jcp.ow % jcp.ur;
-
-    int oc_blocking{ 0 };
-    int oc_blocking_max{ 0 };
-    int os_blocking{ 0 };
-    int os_blocking_max{ 0 };
-    int ic_blocking{ 0 };
-
-    jcp.ic_dim = jcp.ic;
-    jcp.oc_dim = jcp.oc;
-    jcp.is_dim = jcp.is;
-    jcp.os_block = jcp.ur;
-
-    jcp.typesize_in = types::data_type_size(src_d.data_type());
-    jcp.typesize_out = types::data_type_size(dst_d.data_type());
-    jcp.typesize_acc = sizeof(int32_t);
-    jcp.typesize_bia = jcp.with_bias
-                       ? types::data_type_size(bias_pd.data_type())
-                       : 0;
-
-    const auto &oscales = attr.output_scales_;
-    jcp.is_oc_scale = oscales.mask_ == 1 << 1;
-
-    const auto &p = attr.post_ops_;
-    jcp.with_sum = p.find(primitive_kind::sum) != -1;
-
-    assert(IMPLICATION(!jcp.is_oc_scale, oscales.mask_ == 0));
-
-    jcp.ic_loop_src_step = jcp.ic_block * jcp.ic_loop_unroll * jcp.typesize_in;
-    jcp.ic_loop_wei_step = jcp.ic_block * jcp.ic_loop_unroll * jcp.oc_block * jcp.typesize_in;
-
-    jcp.os_loop_dst_step = jcp.ur * jcp.oc * jcp.typesize_out;
-    jcp.os_loop_acc_step = jcp.ur * jcp.oc_block * jcp.typesize_acc;
-    jcp.os_loop_src_step = jcp.stride_w * jcp.ur * jcp.ic * jcp.typesize_in;
-    jcp.os_loop_dst_tail_step = jcp.ow_tail * jcp.oc * jcp.typesize_out;
-    jcp.os_loop_acc_tail_step = jcp.ow_tail * jcp.oc_block * jcp.typesize_acc;
-    jcp.os_loop_src_tail_step = jcp.stride_w * jcp.ow_tail * jcp.ic * jcp.typesize_in
-             + ((jcp.stride_h-1)*jcp.iw*jcp.ic*jcp.typesize_in);
-
-    oc_blocking     = 4 * jcp.oc_block;
-    oc_blocking_max = 4 * jcp.oc_block;
-    os_blocking     = 48; // affects oc balancing across threads
-    os_blocking_max = 320;
-    ic_blocking     = 4*128; // affects L1$ utilization
-
-    assert(oc_blocking);
-    assert(oc_blocking_max);
-    assert(os_blocking);
-    assert(os_blocking_max);
-    assert(ic_blocking);
-
-    assert(jcp.os_block % jcp.ur == 0);
-    jcp.ur_tail = jcp.is_dim % jcp.ur;
-
-    jcp.nb_oh_blocking     = nstl::max(1, os_blocking     / jcp.ow);
-    jcp.nb_oh_blocking_max = nstl::max(1, os_blocking_max / jcp.ow);
-    jcp.nb_oc_blocking     = oc_blocking / jcp.oc_block;
-    jcp.nb_oc_blocking_max = oc_blocking_max / jcp.oc_block;
-    jcp.nb_ic_blocking     = ic_blocking / jcp.ic_block;
-
-    jcp.nb_oc = div_up(jcp.oc_dim, jcp.oc_block);
-
-    jcp.nb_ic = jcp.ic / jcp.ic_block;
-
-    return status::success;
-}
-
-template struct jit_uni_x8s8s32x_1x1_conv_fwd_kernel<avx2>;
-template struct jit_uni_x8s8s32x_1x1_conv_fwd_kernel<sse42>;
-
-}
-}
-}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_conv_kernel.hpp
deleted file mode 100644
index d082231b7..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_conv_kernel.hpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*******************************************************************************
-* Copyright 2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#ifndef JIT_UNI_X8S8S32X_1x1_CONV_KERNEL_HPP
-#define JIT_UNI_X8S8S32X_1x1_CONV_KERNEL_HPP
-
-#include "c_types_map.hpp"
-#include "type_helpers.hpp"
-#include "jit_generator.hpp"
-#include "jit_primitive_conf.hpp"
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-using Xbyak::Reg64;
-using Xbyak::Ymm;
-using Xbyak::Xmm;
-
-template <cpu_isa_t isa>
-struct jit_uni_x8s8s32x_1x1_conv_fwd_kernel: public jit_generator {
-    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_x8s8s32x_1x1_conv_fwd_kernel)
-
-    jit_uni_x8s8s32x_1x1_conv_fwd_kernel(jit_1x1_conv_conf_t ajcp,
-        const primitive_attr_t &attr): jcp(ajcp), attr_(attr)
-    {
-        this->generate();
-        jit_ker = (void (*)(jit_1x1_conv_call_s *))this->getCode();
-    }
-
-    static bool post_ops_ok(jit_1x1_conv_conf_t &jcp,
-                            const primitive_attr_t &attr);
-    static status_t init_conf(jit_1x1_conv_conf_t &jcp,
-                              const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
-                              const memory_desc_wrapper &weights_d,
-                              const memory_desc_wrapper &dst_d,
-                              const memory_desc_wrapper &bias_pd,
-                              const primitive_attr_t &attr,
-                              bool with_relu = false, float relu_negative_slope = 0.f);
-
-    jit_1x1_conv_conf_t jcp;
-    const primitive_attr_t &attr_;
-    void (*jit_ker)(jit_1x1_conv_call_s *);
-
-private:
-    using Vmm = typename utils::conditional3<isa == sse42, Xbyak::Xmm,
-            isa == avx2, Xbyak::Ymm, Xbyak::Zmm>::type;
-
-    Reg64 reg_weight_data = rsi;
-    Reg64 reg_src_data = abi_not_param1;
-    Reg64 reg_dst_data = rbx;
-    Reg64 reg_bias_data = r12;
-
-    Reg64 reg_scales = rdx;
-    Reg64 aux_reg_src_data = rdx;
-    Reg64 aux_reg_weight_data = rax;
-    Reg64 aux_reg_dst_data = rbp;
-    Reg64 reg_oc_loop_work = r9;
-    Reg64 reg_ow_loop_work = r10;
-    Reg64 reg_loop_os_iter = r14;
-    Reg64 reg_loop_ic_iter = r15;
-
-    Reg64 reg_scratch = r14;
-
-    Vmm vreg_sum_0 = Vmm(15);
-    Vmm vreg_src = Vmm(14);
-    Vmm vmm_bias = Vmm(15);
-    Vmm vmm_zero = Vmm(14);
-    Vmm vmm_one = Vmm(13);
-    Xmm xmm_one = Xmm(13);
-
-    void loop_os(int oc_loop_blk);
-    void ic_loop(int oc_loop_blk, int ur);
-
-    void generate();
-
-    bool maybe_relu(int position);
-    void cvt2ps(data_type_t type_in, Vmm vmm_in, const Xbyak::Operand &op);
-};
-
-}
-}
-}
-
-#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_convolution.cpp
deleted file mode 100644
index 1eddc7990..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_convolution.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-/*******************************************************************************
-* Copyright 2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#include "mkldnn_types.h"
-#include "c_types_map.hpp"
-#include "jit_uni_x8s8s32x_1x1_convolution.hpp"
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-using namespace mkldnn::impl::status;
-using namespace mkldnn::impl::memory_format;
-using namespace mkldnn::impl::utils;
-
-template <cpu_isa_t isa, bool with_relu, data_type_t src_type, data_type_t dst_type>
-void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<isa, with_relu, src_type, dst_type>::execute_forward() {
-    auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
-    auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
-    auto bias = reinterpret_cast<const char *>(this->input_memory(2));
-    auto dst = reinterpret_cast<dst_data_t *>(this->memory());
-
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
-
-    const auto &jcp = kernel_->jcp;
-
-    int ocb_work = utils::div_up(jcp.nb_oc, jcp.nb_oc_blocking);
-    int ohb_work = utils::div_up(jcp.oh, jcp.nb_oh_blocking);
-    const int work_amount = jcp.mb * jcp.ngroups * ocb_work * ohb_work;
-
-    const int stride_h = conf_.cdesc()->strides[0];
-    const int stride_w = conf_.cdesc()->strides[1];
-    const int pad_t = conf_.cdesc()->padding[0][0];
-    const int pad_l = conf_.cdesc()->padding[0][1];
-
-    const size_t bia_dt_size = conf_.with_bias()
-        ? types::data_type_size(conf_.cdesc()->bias_desc.data_type) : 0;
-
-    const auto &oscales = conf_.attr()->output_scales_;
-
-    auto ker = [&](const int ithr, const int nthr) {
-        jit_1x1_conv_call_s p = {};
-        p.acc_s32 = ws_ + ithr * ws_per_thread_;
-
-        const int oh_block = jcp.ow;
-
-        int start{0}, end{0};
-        balance211(work_amount, nthr, ithr, start, end);
-
-        int n{0}, g{0}, ocb{0}, ohb{0};
-        nd_iterator_init(start, n, jcp.mb, g, jcp.ngroups, ohb,
-                         ohb_work, ocb, ocb_work);
-
-        for (int iwork = start; iwork < end; ++iwork) {
-            int oc_ = ocb * jcp.nb_oc_blocking;
-            int oc_num = jcp.nb_oc_blocking;
-
-            int oh_ = ohb * jcp.nb_oh_blocking;
-            int oh_num = jcp.nb_oh_blocking;
-
-            int oh_step = nstl::min(oh_ + oh_num, jcp.oh) - oh_;
-
-            const int os = oh_ * oh_block;
-            const int oh = os / jcp.ow;
-            const int ow = os % jcp.ow;
-
-            const int ih = nstl::max(oh * stride_h - pad_t, 0);
-            const int iw = nstl::max(ow * stride_w - pad_l, 0);
-
-            p.os_dim = this_block_size(os, jcp.os, oh_step * oh_block);
-            p.oc_dim = nstl::min(oc_ + oc_num, jcp.nb_oc) - oc_;
-
-            const size_t dst_off = dst_d.blk_off(n, oc_*jcp.oc_block, oh, ow);
-            p.output_data = &dst[dst_off];
-
-            if (bias)
-                p.bias_data = &bias[bias_d.blk_off(oc_ * jcp.oc_block * bia_dt_size)];
-
-            p.scales = &oscales.scales_[jcp.is_oc_scale * oc_ * jcp.oc_block];
-            p.oc_data = &weights[conf_.with_groups() ? weights_d.blk_off(g, oc_, 0) : weights_d.blk_off(oc_, 0)];
-            p.is_data = src + src_d.blk_off(n, 0, ih, iw);
-
-            kernel_->jit_ker(&p);
-
-            nd_iterator_step(n, jcp.mb, g, jcp.ngroups, ohb,
-                             ohb_work, ocb, ocb_work);
-        }
-    };
-
-    parallel(0, ker);
-}
-
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, true, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, true, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, true, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, true, data_type::u8, data_type::f32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, false, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, false, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, false, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, false, data_type::u8, data_type::f32>::execute_forward();
-
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, true, data_type::s8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, true, data_type::s8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, true, data_type::s8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, true, data_type::s8, data_type::f32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, false, data_type::s8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, false, data_type::s8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, false, data_type::s8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, false, data_type::s8, data_type::f32>::execute_forward();
-
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, true, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, true, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, true, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, true, data_type::u8, data_type::f32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, false, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, false, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, false, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, false, data_type::u8, data_type::f32>::execute_forward();
-
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, true, data_type::s8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, true, data_type::s8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, true, data_type::s8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, true, data_type::s8, data_type::f32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, false, data_type::s8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, false, data_type::s8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, false, data_type::s8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, false, data_type::s8, data_type::f32>::execute_forward();
-
-}
-}
-}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_convolution.hpp
deleted file mode 100644
index 5ae3b8fcb..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_1x1_convolution.hpp
+++ /dev/null
@@ -1,140 +0,0 @@
-/*******************************************************************************
-* Copyright 2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#ifndef CPU_JIT_UNI_X8S8S32X_1x1_CONVOLUTION_HPP
-#define CPU_JIT_UNI_X8S8S32X_1x1_CONVOLUTION_HPP
-
-#include "c_types_map.hpp"
-#include "cpu_convolution_pd.hpp"
-#include "cpu_engine.hpp"
-#include "cpu_reducer.hpp"
-#include "jit_uni_x8s8s32x_1x1_conv_kernel.hpp"
-#include "mkldnn_thread.hpp"
-#include "utils.hpp"
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-template <cpu_isa_t isa, bool with_relu, impl::data_type_t src_type, impl::data_type_t dst_type>
-struct _jit_uni_x8s8s32x_1x1_convolution_fwd_t: public cpu_primitive_t {
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine, const typename pd_t::base_desc_t *adesc,
-                const primitive_attr_t *attr,
-                const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                hint_fwd_pd)
-            , jcp_({}) {}
-
-        DECLARE_COMMON_PD_T(JIT_IMPL_NAME_HELPER("jit_1x1:", isa, ""),
-            _jit_uni_x8s8s32x_1x1_convolution_fwd_t<isa, with_relu, src_type, dst_type>);
-
-        virtual status_t init() override {
-            using namespace prop_kind;
-            assert(this->engine()->kind() == engine_kind::cpu);
-            bool ok = true
-                && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
-                        forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
-                && this->cdesc_().src_desc.data_type == data_type::u8
-                && this->cdesc_().dst_desc.data_type == dst_type
-                && this->cdesc_().weights_desc.data_type == data_type::s8
-                && IMPLICATION(this->with_bias(), utils::one_of(
-                   this->cdesc_().bias_desc.data_type, data_type::f32,
-                   data_type::s32, data_type::s8, data_type::u8))
-                && this->cdesc_().accum_data_type == data_type::s32;
-            if (!ok) return status::unimplemented;
-
-            return jit_uni_x8s8s32x_1x1_conv_fwd_kernel<isa>::init_conf(jcp_,
-                        this->cdesc_(),
-                        this->src_pd_.desc(), *this->weights_pd_.desc(),
-                        *this->dst_pd_.desc(), *this->bias_pd_.desc(),
-                        *this->attr(), with_relu, this->negative_slope());
-        }
-
-        jit_1x1_conv_conf_t jcp_;
-
-    protected:
-        virtual status_t set_default_params() override {
-            using namespace memory_format;
-            auto desired_act_fmt = nhwc;
-
-            auto desired_wei_fmt = OhIw8o4i;
-            auto desired_gr_wei_fmt = gOhIw8o4i;
-
-            if (this->src_pd_.desc()->format == any)
-                CHECK(this->src_pd_.set_format(desired_act_fmt));
-            if (this->dst_pd_.desc()->format == any)
-                CHECK(this->dst_pd_.set_format(desired_act_fmt));
-            if (this->weights_pd_.desc()->format == any)
-                CHECK(this->weights_pd_.set_format(this->with_groups() ? desired_gr_wei_fmt : desired_wei_fmt));
-            if (this->bias_pd_.desc()->format == any)
-                CHECK(this->bias_pd_.set_format(x));
-            return status::success;
-        }
-    };
-
-    _jit_uni_x8s8s32x_1x1_convolution_fwd_t(const pd_t *pd, const
-                                            input_vector &inputs,
-                                            const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-        , kernel_(nullptr), ws_(nullptr)
-    {
-        kernel_ = new jit_uni_x8s8s32x_1x1_conv_fwd_kernel<isa>(conf_.jcp_, *conf_.attr());
-        const int nthreads = mkldnn_get_max_threads();
-        ws_per_thread_ = conf_.jcp_.ow * conf_.jcp_.nb_oh_blocking_max * conf_.jcp_.oc_block;
-        ws_ = (acc_data_t*)malloc(nthreads * ws_per_thread_ * sizeof(acc_data_t), 64);
-    }
-    ~_jit_uni_x8s8s32x_1x1_convolution_fwd_t() {
-        delete kernel_;
-        free(ws_);
-    }
-
-    typedef typename prec_traits<data_type::u8>::type src_data_t;
-    typedef typename prec_traits<data_type::s8>::type wei_data_t;
-    typedef typename prec_traits<dst_type>::type dst_data_t;
-    typedef typename prec_traits<data_type::s32>::type acc_data_t;
-
-    virtual void execute(event_t *e) {
-        execute_forward();
-        e->set_state(event_t::ready);
-    }
-
-private:
-    void execute_forward();
-    pd_t conf_;
-    jit_uni_x8s8s32x_1x1_conv_fwd_kernel<isa> *kernel_;
-
-    /* reduction to unit stride */
-    size_t ws_per_thread_;
-    acc_data_t *ws_;
-};
-
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_avx2_x8s8s32x_1x1_convolution_fwd_t = _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, false, src_type, dst_type>;
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_sse42_x8s8s32x_1x1_convolution_fwd_t = _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, false, src_type, dst_type>;
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_avx2_x8s8s32x_1x1_convolution_relu_t = _jit_uni_x8s8s32x_1x1_convolution_fwd_t<avx2, true, src_type, dst_type>;
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_sse42_x8s8s32x_1x1_convolution_relu_t = _jit_uni_x8s8s32x_1x1_convolution_fwd_t<sse42, true, src_type, dst_type>;
-
-}
-}
-}
-
-#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_conv_kernel.cpp
index b94295bf8..09c60dc12 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_conv_kernel.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_conv_kernel.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018 Intel Corporation
+* Copyright 2018-2019 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
 * limitations under the License.
 *******************************************************************************/
 
+#include <common/memory_tracking.hpp>
 #include "c_types_map.hpp"
 #include "nstl.hpp"
 #include "type_helpers.hpp"
@@ -30,37 +31,12 @@ namespace cpu {
 
 using namespace mkldnn::impl::prop_kind;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
 using namespace Xbyak;
 
 template <cpu_isa_t isa>
-bool jit_uni_x8s8s32x_conv_fwd_kernel<isa>::maybe_relu(int position) {
-    using namespace primitive_kind;
-    const auto &p = attr_.post_ops_;
-
-    if (position == 0) {
-        /* relu before sum */
-        return false
-               || jcp.with_eltwise
-               || p.contain(eltwise, 0)
-               || (jcp.dst_dt == data_type::u8 && !p.contain(sum, 0));
-    } else if (position == 1) {
-        /* relu after sum */
-        const int sum_idx = p.contain(sum, 0)
-                            ? 0 : (p.contain(sum, 1) ? 1 : -1);
-        if (sum_idx == -1)
-            return false;
-
-        return false
-               || p.contain(eltwise, sum_idx + 1)
-               || jcp.dst_dt == data_type::u8;
-    }
-
-    return false;
-}
-
-template <cpu_isa_t isa>
 void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::cvt2ps(data_type_t type_in, Vmm vmm_in,
         const Xbyak::Operand &op, bool scalar_load) {
     Xmm xmm_in = Xmm(vmm_in.getIdx());
@@ -118,7 +94,7 @@ void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::store_dst(const Xbyak::Address &op,
             if (isa != sse42 && !scalar_store)
                 vpermq(ymm_dst, ymm_dst, 0x08);
 
-            uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst);
+            uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst);
 
             if (scalar_store) {
                 movq(reg_tmp_64, xmm_dst);
@@ -136,7 +112,7 @@ void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::store_dst(const Xbyak::Address &op,
             if (isa != sse42 && !scalar_store)
                 vpermq(ymm_dst, ymm_dst, 0x08);
 
-            uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst);
+            uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst);
 
             if (scalar_store) {
                 movq(reg_tmp_64, xmm_dst);
@@ -177,32 +153,27 @@ void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::apply_filter(int ur_w, int pad_l, in
         for (int r = 0; r < repeats; r++) {
             for (int jj = _start; jj < _end; jj++) {
                 int inp_off = (ki * dilate_w + jj * stride_w - pad_l) * jcp.ic * jcp.ngroups;
-                    if (tail_size > 0) {
-                        if (h_padded || jj < jj_start || jj >= jj_end) {
-                            uni_vpxor(get_src_reg(jj), get_src_reg(jj), get_src_reg(jj));
-                            uni_vpsubb(get_src_reg(jj), get_src_reg(jj), vmm_shift);
-                            uni_vandps(get_src_reg(jj), get_src_reg(jj), vmm_mask);
-                            uni_vpbroadcastd(get_src_reg(jj), Xmm(get_src_reg(jj).getIdx()));
-                        } else {
-                            uni_vpbroadcastd(get_src_reg(jj), ptr[aux1_reg_input + jcp.typesize_in * inp_off]);
-
-                            if (jcp.signed_input) {
-                                uni_vpsubb(get_src_reg(jj), get_src_reg(jj), vmm_shift);
-                            }
-
-                            uni_vandps(get_src_reg(jj), get_src_reg(jj), vmm_mask);
-                            uni_vpbroadcastd(get_src_reg(jj), Xmm(get_src_reg(jj).getIdx()));
-                        }
+                if (tail_size > 0) {
+                    if (h_padded || jj < jj_start || jj >= jj_end) {
+                        uni_vpxor(get_src_reg(jj), get_src_reg(jj), get_src_reg(jj));
+                        uni_vpsubb(get_src_reg(jj), get_src_reg(jj), vmm_shift);
                     } else {
-                        if (h_padded || jj < jj_start || jj >= jj_end) {
-                            uni_vpxor(get_src_reg(jj), get_src_reg(jj), get_src_reg(jj));
-                        } else {
-                            uni_vpbroadcastd(get_src_reg(jj), ptr[aux1_reg_input + jcp.typesize_in * inp_off]);
-                        }
+                        uni_vpbroadcastd(get_src_reg(jj), ptr[aux1_reg_input + jcp.typesize_in * inp_off]);
 
-                        if (jcp.signed_input)
+                        if (jcp.signed_input) {
                             uni_vpsubb(get_src_reg(jj), get_src_reg(jj), vmm_shift);
+                        }
+                    }
+                } else {
+                    if (h_padded || jj < jj_start || jj >= jj_end) {
+                        uni_vpxor(get_src_reg(jj), get_src_reg(jj), get_src_reg(jj));
+                    } else {
+                        uni_vpbroadcastd(get_src_reg(jj), ptr[aux1_reg_input + jcp.typesize_in * inp_off]);
                     }
+
+                    if (jcp.signed_input)
+                        uni_vpsubb(get_src_reg(jj), get_src_reg(jj), vmm_shift);
+                }
             }
 
             for (int ii = 0; ii < oc_blocks; ii++) {
@@ -279,7 +250,6 @@ void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::kh_loop(int ur_w, int pad_l, int pad
     mov(imm_addr64, l_table);
     uni_vmovups(vmm_one,   ptr[imm_addr64 + 0 * vlen]);
     uni_vmovups(vmm_shift, ptr[imm_addr64 + 1 * vlen]);
-    uni_vmovups(vmm_mask, ptr[imm_addr64 + 4 * vlen]);
 
     if (jcp.signed_input) {
         mov(reg_overflow,  ptr[param1 + GET_OFF(t_overflow)]);
@@ -349,6 +319,7 @@ void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::width_blk_step(int ur_w, int pad_l,
 
     kh_loop(ur_w, pad_l, pad_r, oc_blocks, oc_step);
 
+    pop(reg_oc_off);
     pop(reg_scales_base);
 
     mov(imm_addr64, l_table);
@@ -359,140 +330,143 @@ void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::width_blk_step(int ur_w, int pad_l,
     const float p_sum_scale = (sum_idx != -1) ? p.entry_[sum_idx].sum.scale : 1.f;
 
     for (int r = 0; r < repeats; r++) {
+        auto get_dst_off = [=](int ii, int jj) {
+            if (jcp.with_dw_conv)
+                return (ii * jcp_dw.kh * jcp.ow + jj) * jcp.oc_block + r * (jcp.oc_block / 2);
+            else
+                return ii * jcp.oc_block + jj * jcp.oc * jcp.ngroups + r * (jcp.oc_block / 2);
+        };
+
         int tail_size = isa == avx2 ? oc_step : nstl::min(jcp.oc_block / 2, oc_step - r * jcp.oc_block / 2);
         bool is_scalar_store = isa == avx2 ? tail_size < jcp.oc_block : tail_size < jcp.oc_block / 2;
 
-        if (is_scalar_store) {
+        for (int ii = 0; ii < oc_blocks; ii++) {
+            if (jcp.with_bias) {
+                int b_off = ii * jcp.oc_block + r * (jcp.oc_block / 2);
+                cvt2ps(jcp.bia_dt, vmm_bias, ptr[reg_bias_base + b_off * jcp.typesize_bia], false);
+
+                if (jcp.signed_input)
+                    uni_vmulps(vmm_bias, vmm_bias, vmm_bias_alpha);
+            }
+
             for (int jj = 0; jj < ur_w; jj++) {
-                Vmm vmm_dst = get_acc_reg(r * jcp.ur_w * jcp.nb_oc_blocking + jj);
+                Vmm vmm_dst = get_acc_reg(r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj);
                 uni_vcvtdq2ps(vmm_dst, vmm_dst);
-                uni_vmovups(vmm_reminder_dst, vmm_dst);
 
-                for (int oc = 0; oc < tail_size; oc++) {
-                    uni_vmovups(vmm_dst, vmm_reminder_dst);
+                if (jcp.signed_input) {
+                    int c_off = ii * jcp.oc_block + r * (jcp.oc_block / 2);
+                    cvt2ps(data_type::s32, vmm_comp, ptr[reg_compensation_base + c_off * sizeof(int32_t)], false);
+                }
 
-                    if (jcp.with_bias) {
-                        int b_off = r * (jcp.oc_block / 2) + oc;
-                        cvt2ps(jcp.bia_dt, vmm_bias, ptr[reg_bias_base + b_off * jcp.typesize_bia], true);
+                if (jcp.signed_input)
+                    uni_vaddps(vmm_dst, vmm_dst, vmm_comp);
+                if (jcp.with_bias)
+                    uni_vaddps(vmm_dst, vmm_dst, vmm_bias);
 
-                        if (jcp.signed_input)
-                            uni_vmulps(vmm_bias, vmm_bias, vmm_bias_alpha);
-                    }
-                    if (jcp.signed_input) {
-                        int c_off = r * (jcp.oc_block / 2) + oc;
-                        cvt2ps(data_type::s32, vmm_comp, ptr[reg_compensation_base + c_off * sizeof(int32_t)], true);
-                    }
+                int s_off = jcp.is_oc_scale * (ii * jcp.oc_block + r * (jcp.oc_block / 2));
+                cvt2ps(mkldnn_f32, vmm_scale, ptr[reg_scales_base + s_off * sizeof(float)], false);
+                uni_vmulps(vmm_dst, vmm_dst, vmm_scale);
+            }
+        }
 
-                    if (jcp.signed_input)
-                        uni_vaddps(vmm_dst, vmm_dst, vmm_comp);
-                    if (jcp.with_bias)
-                        uni_vaddps(vmm_dst, vmm_dst, vmm_bias);
+        int eltwise_inj_idx = 0;
+        int depthwise_inj_idx = 0;
+        int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
+        for (int i = 0; i < end_idx; i++) {
+            int start_idx = 1 + r * jcp.ur_w * jcp.nb_oc_blocking;
+
+            auto& post_op = p.entry_[i];
+            if (post_op.is_eltwise()) {
+                eltwise_injectors[eltwise_inj_idx]->compute_vector_range(start_idx, start_idx + oc_blocks * ur_w);
+                eltwise_inj_idx++;
+            } else if (post_op.is_depthwise()) {
+                mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+                mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+                add(reg_d_weights, reg_oc_off);
+                add(reg_d_bias, reg_oc_off);
+
+                if (r == 1) {
+                    add(reg_d_weights, (jcp.oc_block / 2) * sizeof(float));
+                    add(reg_d_bias, (jcp.oc_block / 2) * sizeof(float));
+                }
 
-                    int s_off = jcp.is_oc_scale * (r * (jcp.oc_block / 2) + oc);
-                    cvt2ps(mkldnn_f32, vmm_scale, ptr[reg_scales_base + s_off * sizeof(float)], true);
-                    uni_vmulps(vmm_dst, vmm_dst, vmm_scale);
+                for (int ii = 0; ii < oc_blocks; ii++) {
+                    depthwise_injectors[depthwise_inj_idx]->compute_vector_range(start_idx + ur_w * ii,
+                            start_idx + ur_w * ii + ur_w, reg_d_weights, reg_d_bias);
 
-                    int o_off = jj * jcp.oc * jcp.ngroups + r * (jcp.oc_block / 2) + oc;
-                    if (jcp.with_sum) {
-                        uni_vpxor(vmm_prev_dst, vmm_prev_dst, vmm_prev_dst);
-                        cvt2ps(jcp.dst_dt, vmm_prev_dst, ptr[reg_output + o_off * jcp.typesize_out], true);
+                    add(reg_d_weights, jcp.oc_block * sizeof(float));
+                    add(reg_d_bias, jcp.oc_block * sizeof(float));
+                }
 
-                        if (p_sum_scale == 1.f) {
-                            uni_vaddps(vmm_dst, vmm_dst, vmm_prev_dst);
+                depthwise_inj_idx++;
+            } else if (post_op.is_sum(false)) {
+                for (int ii = 0; ii < oc_blocks; ii++) {
+                    for (int jj = 0; jj < ur_w; jj++) {
+                        Vmm vmm_dst = get_acc_reg(r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj);
+                        int o_off = get_dst_off(ii, jj);
+
+                        if (is_scalar_store) {
+                            for (int oc = 0; oc < tail_size; oc++) {
+                                uni_vpxor(vmm_prev_dst, vmm_prev_dst, vmm_prev_dst);
+                                cvt2ps(jcp.dst_dt, vmm_prev_dst, ptr[reg_output + (o_off + oc) * jcp.typesize_out], true);
+
+                                if (oc < jcp.oc_block / 2) {
+                                    uni_vpslldq(vmm_prev_dst, vmm_prev_dst, oc * sizeof(float));
+                                } else {
+                                    Ymm ymm_prev_dst = Ymm(vmm_prev_dst.getIdx());
+                                    vperm2i128(ymm_prev_dst, ymm_prev_dst, ymm_prev_dst, 0x01);
+                                    vpslldq(vmm_prev_dst, vmm_prev_dst, (oc - jcp.oc_block / 2) * sizeof(float));
+                                }
+
+                                if (p_sum_scale == 1.f) {
+                                    uni_vaddps(vmm_dst, vmm_dst, vmm_prev_dst);
+                                } else {
+                                    uni_vfmadd231ps(vmm_dst, vmm_prev_dst, ptr[imm_addr64 + 3 * vlen]);
+                                }
+                            }
                         } else {
-                            uni_vfmadd231ps(vmm_dst, vmm_prev_dst, ptr[imm_addr64 + 3 * vlen]);
-                        }
-                    }
-
-                    if (maybe_relu(0)) {
-                        uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
-                        uni_vmaxps(vmm_dst, vmm_dst, vmm_zero);
-                    }
-
-                    if (maybe_relu(1)) {
-                        uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
-                        uni_vmaxps(vmm_dst, vmm_dst, vmm_zero);
-                    }
-
-                    if (jcp.dst_dt != data_type::f32) {
-                        if (attr_.round_mode_ == round_mode::nearest)
-                            uni_vcvtps2dq(vmm_dst, vmm_dst);
-                        else if (attr_.round_mode_ == round_mode::down) {
-                            uni_vroundps(vmm_dst, vmm_dst, 1);
-                            uni_vcvtps2dq(vmm_dst, vmm_dst);
-                        } else
-                            assert(!"unimplemented");
-                    }
-
-                    store_dst(ptr[reg_output + o_off * jcp.typesize_out], vmm_dst, true);
+                            cvt2ps(jcp.dst_dt, vmm_prev_dst, ptr[reg_output + o_off * jcp.typesize_out], false);
 
-                    if (isa == avx2) {
-                        vperm2i128(ymm_tmp, ymm_reminder_dst, ymm_reminder_dst, 0x01);
-                        vpalignr(ymm_reminder_dst, ymm_tmp, ymm_reminder_dst, jcp.typesize_out);
-                    } else {
-                        psrldq(vmm_reminder_dst, jcp.typesize_out);
+                            if (p_sum_scale == 1.f) {
+                                uni_vaddps(vmm_dst, vmm_dst, vmm_prev_dst);
+                            } else {
+                                uni_vfmadd231ps(vmm_dst, vmm_prev_dst, ptr[imm_addr64 + 3 * vlen]);
+                            }
+                        }
                     }
                 }
             }
-        } else {
-            for (int ii = 0; ii < oc_blocks; ii++) {
-                if (jcp.with_bias) {
-                    int b_off = ii * jcp.oc_block + r * (jcp.oc_block / 2);
-                    cvt2ps(jcp.bia_dt, vmm_bias, ptr[reg_bias_base + b_off * jcp.typesize_bia], false);
+        }
 
-                    if (jcp.signed_input)
-                        uni_vmulps(vmm_bias, vmm_bias, vmm_bias_alpha);
+        for (int ii = 0; ii < oc_blocks; ii++) {
+            for (int jj = 0; jj < ur_w; jj++) {
+                Vmm vmm_dst = get_acc_reg(r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj);
+                int o_off = get_dst_off(ii, jj);
+
+                if (jcp.dst_dt != data_type::f32) {
+                    if (attr_.round_mode_ == round_mode::nearest)
+                        uni_vcvtps2dq(vmm_dst, vmm_dst);
+                    else if (attr_.round_mode_ == round_mode::down) {
+                        uni_vroundps(vmm_dst, vmm_dst, 1);
+                        uni_vcvtps2dq(vmm_dst, vmm_dst);
+                    } else
+                        assert(!"unimplemented");
                 }
 
-                for (int jj = 0; jj < ur_w; jj++) {
-                    Vmm vmm_dst = get_acc_reg(r * jcp.ur_w * jcp.nb_oc_blocking + ur_w * ii + jj);
-                    uni_vcvtdq2ps(vmm_dst, vmm_dst);
-
-                    if (jcp.signed_input) {
-                        int c_off = ii * jcp.oc_block + r * (jcp.oc_block / 2);
-                        cvt2ps(data_type::s32, vmm_comp, ptr[reg_compensation_base + c_off * sizeof(int32_t)], false);
-                    }
-
-                    if (jcp.signed_input)
-                        uni_vaddps(vmm_dst, vmm_dst, vmm_comp);
-                    if (jcp.with_bias)
-                        uni_vaddps(vmm_dst, vmm_dst, vmm_bias);
-
-                    int s_off = jcp.is_oc_scale * (ii * jcp.oc_block + r * (jcp.oc_block / 2));
-                    cvt2ps(mkldnn_f32, vmm_scale, ptr[reg_scales_base + s_off * sizeof(float)], false);
-                    uni_vmulps(vmm_dst, vmm_dst, vmm_scale);
-
-                    int o_off = ii * jcp.oc_block + jj * jcp.oc * jcp.ngroups + r * (jcp.oc_block / 2);
-                    if (jcp.with_sum) {
-                        cvt2ps(jcp.dst_dt, vmm_prev_dst, ptr[reg_output + o_off * jcp.typesize_out], false);
+                if (is_scalar_store) {
+                    for (int oc = 0; oc < tail_size; oc++) {
+                        store_dst(ptr[reg_output + (o_off + oc) * jcp.typesize_out], vmm_dst, true);
 
-                        if (p_sum_scale == 1.f) {
-                            uni_vaddps(vmm_dst, vmm_dst, vmm_prev_dst);
+                        if (isa == avx2) {
+                            Ymm ymm_dst = Ymm(vmm_dst.getIdx());
+                            vperm2i128(ymm_tmp, ymm_dst, ymm_dst, 0x01);
+                            vpalignr(ymm_dst, ymm_tmp, ymm_dst, jcp.typesize_out);
                         } else {
-                            uni_vfmadd231ps(vmm_dst, vmm_prev_dst, ptr[imm_addr64 + 3 * vlen]);
+                            psrldq(vmm_dst, jcp.typesize_out);
                         }
                     }
-
-                    if (maybe_relu(0)) {
-                        uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
-                        uni_vmaxps(vmm_dst, vmm_dst, vmm_zero);
-                    }
-
-                    if (maybe_relu(1)) {
-                        uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
-                        uni_vmaxps(vmm_dst, vmm_dst, vmm_zero);
-                    }
-
-                    if (jcp.dst_dt != data_type::f32) {
-                        if (attr_.round_mode_ == round_mode::nearest)
-                            uni_vcvtps2dq(vmm_dst, vmm_dst);
-                        else if (attr_.round_mode_ == round_mode::down) {
-                            uni_vroundps(vmm_dst, vmm_dst, 1);
-                            uni_vcvtps2dq(vmm_dst, vmm_dst);
-                        } else
-                            assert(!"unimplemented");
-                    }
-
+                } else {
                     store_dst(ptr[reg_output + o_off * jcp.typesize_out], vmm_dst, false);
                 }
             }
@@ -500,6 +474,7 @@ void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::width_blk_step(int ur_w, int pad_l,
     }
 
     push(reg_scales_base);
+    push(reg_oc_off);
 }
 
 template <cpu_isa_t isa>
@@ -513,6 +488,7 @@ inline void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::solve_common(int oc_blocks, i
     int dilate_w = jcp.dilate_w + 1;
     int str_w = jcp.stride_w;
     const int inp_mult = jcp.ic * jcp.ngroups;
+    const int out_mult = jcp.with_dw_conv ? jcp.oc_block : jcp.oc * jcp.ngroups;
 
     int l_pad = jcp.l_pad;
     int r_pad = nstl::max(0, (int(jcp.ow) - 1) * str_w + (kw - 1) * dilate_w
@@ -529,6 +505,7 @@ inline void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::solve_common(int oc_blocks, i
     push(reg_output_base);
     push(reg_kernel_base);
     push(reg_scales_base);
+    push(reg_oc_off);
 
     if (l_pad > 0) {
         n_oi--;
@@ -537,7 +514,7 @@ inline void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::solve_common(int oc_blocks, i
         else
             width_blk_step(ur_w, l_pad, 0, oc_blocks, oc_step); // "lpad"
         add(reg_input, jcp.typesize_in * (ur_w * str_w - l_pad) * inp_mult);
-        add(reg_output, jcp.typesize_out * ur_w * jcp.oc * jcp.ngroups);
+        add(reg_output, jcp.typesize_out * ur_w * out_mult);
     }
 
     Label ow_loop_label;
@@ -548,7 +525,7 @@ inline void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::solve_common(int oc_blocks, i
 
         width_blk_step(ur_w, 0, 0, oc_blocks, oc_step); // "middle"
         add(reg_input, jcp.typesize_in * ur_w * str_w * inp_mult);
-        add(reg_output, jcp.typesize_out * ur_w * jcp.oc * jcp.ngroups);
+        add(reg_output, jcp.typesize_out * ur_w * out_mult);
 
         inc(reg_oi_iter);
         cmp(reg_oi_iter, n_oi);
@@ -558,12 +535,13 @@ inline void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::solve_common(int oc_blocks, i
     if (r_pad1 > 0 && n_oi >=0) {
         width_blk_step(ur_w, 0, r_pad1, oc_blocks, oc_step); // "rpad"
         add(reg_input, jcp.typesize_in * ur_w * str_w * inp_mult);
-        add(reg_output, jcp.typesize_out * ur_w * jcp.oc * jcp.ngroups);
+        add(reg_output, jcp.typesize_out * ur_w * out_mult);
     }
 
     if (ur_w_tail != 0)
         width_blk_step(ur_w_tail, 0, r_pad, oc_blocks, oc_step); // "tail"
 
+    pop(reg_oc_off);
     pop(reg_scales_base);
     pop(reg_kernel_base);
     pop(reg_output_base);
@@ -573,56 +551,84 @@ inline void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::solve_common(int oc_blocks, i
 template <cpu_isa_t isa>
 void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::generate()
 {
+    const auto &p = attr_.post_ops_;
+    int end_idx = jcp.with_dw_conv ? p.find(primitive_kind::convolution) : p.len_;
+    for (int i = 0; i < end_idx; i++) {
+        auto &post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<isa>(
+                    this,
+                    post_op.eltwise.alg,
+                    post_op.eltwise.alpha,
+                    post_op.eltwise.beta
+            ));
+        } else if (post_op.is_depthwise()) {
+            depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<isa>(
+                    this,
+                    post_op.depthwise.alg
+            ));
+        }
+    }
+
     this->preamble();
 
     mov(reg_kernel_base, ptr[this->param1 + GET_OFF(filt)]);
     mov(reg_input_base, ptr[this->param1 + GET_OFF(src)]);
     mov(reg_output_base, ptr[this->param1 + GET_OFF(dst)]);
-    mov(reg_oc, ptr[this->param1 + GET_OFF(oc_work)]);
+    mov(reg_oc_work, ptr[this->param1 + GET_OFF(oc_work)]);
     if (jcp.with_bias)
         mov(reg_bias_base, ptr[this->param1 + GET_OFF(bias)]);
     mov(reg_scales_base, ptr[this->param1 + GET_OFF(scales)]);
     if (jcp.signed_input)
         mov(reg_compensation_base, ptr[param1 + GET_OFF(compensation)]);
+    mov(reg_oc_off, ptr[param1 + GET_OFF(oc_off)]);
 
     Label main_loop_label;
     Label tail_label;
     Label exit_label;
 
-    cmp(reg_oc, jcp.nb_oc_blocking * jcp.oc_block);
+    cmp(reg_oc_work, jcp.nb_oc_blocking * jcp.oc_block);
     jne(main_loop_label, T_NEAR);
 
     solve_common(jcp.nb_oc_blocking, jcp.oc_block);
 
-    sub(reg_oc, jcp.nb_oc_blocking * jcp.oc_block);
+    sub(reg_oc_work, jcp.nb_oc_blocking * jcp.oc_block);
 
     jmp(exit_label, T_NEAR);
 
     L(main_loop_label); {
-        cmp(reg_oc, jcp.oc_block);
+        cmp(reg_oc_work, jcp.oc_block);
         jl(tail_label, T_NEAR);
 
         solve_common(1, jcp.oc_block);
 
-        sub(reg_oc, jcp.oc_block);
+        sub(reg_oc_work, jcp.oc_block);
         add(reg_kernel_base, jcp.oc_block * jcp.nb_ic * jcp.kh * jcp.kw * jcp.ic_block * jcp.typesize_in);
-        add(reg_output_base, jcp.oc_block * jcp.typesize_out);
+        if (jcp.with_dw_conv)
+            add(reg_output_base, jcp.oc_block * jcp_dw.kh * jcp.ow * jcp.typesize_out);
+        else
+            add(reg_output_base, jcp.oc_block * jcp.typesize_out);
         add(reg_bias_base, jcp.oc_block * jcp.typesize_bia);
         add(reg_scales_base, jcp.is_oc_scale * jcp.oc_block * sizeof(float));
         add(reg_compensation_base, jcp.oc_block * sizeof(int32_t));
+        add(reg_oc_off, jcp.oc_block * sizeof(float));
 
         jmp(main_loop_label, T_NEAR);
     }
 
     L(tail_label);
 
-    solve_common(1, jcp.oc % jcp.oc_block);
+    if (jcp.oc % jcp.oc_block != 0)
+        solve_common(1, jcp.oc % jcp.oc_block);
 
     L(exit_label);
 
     this->postamble();
 
     prepare_table();
+
+    for (auto& inj : eltwise_injectors)
+        inj->prepare_table();
 }
 
 template <cpu_isa_t isa>
@@ -672,43 +678,29 @@ void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::prepare_table() {
             dd(cvals_sum_scale[i]);
         }
     }
-
-    for (size_t i = 0; i < sizeof(cvals_shift) / sizeof(cvals_shift[0]); ++i) {
-        for (size_t d = 0; d < vlen / sizeof(int8_t); ++d) {
-            if ((int)d < jcp.ic % jcp.ic_block)
-                db(255);
-            else
-                db(0);
-        }
-    }
 }
 
 template <cpu_isa_t isa>
 bool jit_uni_x8s8s32x_conv_fwd_kernel<isa>::post_ops_ok(
         jit_conv_conf_t &jcp, const primitive_attr_t &attr) {
-    using namespace primitive_kind;
     const auto &p = attr.post_ops_;
 
-    auto is_relu = [&](int idx) {
-        return p.entry_[idx].kind == eltwise
-               && p.entry_[idx].eltwise.scale == 1.
-               && p.entry_[idx].eltwise.alg == alg_kind::eltwise_relu
-               && p.entry_[idx].eltwise.alpha == 0.;
-    };
+    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
+    auto is_depthwise = [&](int idx) { return p.entry_[idx].is_depthwise(); };
+    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(false); };
+    auto is_dw_conv = [&](int idx) { return p.entry_[idx].is_dw_conv(); };
+    auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
         case 0: return true;
-        case 1: return true
-                       && IMPLICATION(jcp.with_eltwise, p.contain(sum, 0))
-                       && IMPLICATION(!jcp.with_eltwise, is_relu(0) || p.contain(sum, 0));
-        case 2: return true
-                       && IMPLICATION(jcp.with_eltwise, p.contain(sum, 0) && is_relu(1))
-                       && IMPLICATION(!jcp.with_eltwise, false
-                                                         || (p.contain(sum, 0) && is_relu(1))
-                                                         || (p.contain(sum, 1) && is_relu(0)));
-        case 3: return true
-                       && jcp.with_eltwise == false
-                       && (is_relu(0) && p.contain(sum, 1) && is_relu(2));
+        case 1: return is_simple(0) || is_sum(0) || is_dw_conv(0);
+        case 2: return (is_sum(0) && is_simple(1)) || (is_simple(0) && is_sum(1)) ||
+                       (is_dw_conv(0) && is_simple(1)) || (is_simple(0) && is_dw_conv(1)) ||
+                       (is_simple(0) && is_simple(1));
+        case 3: return (is_simple(0) && is_sum(1) && is_simple(2)) ||
+                       (is_simple(0) && is_dw_conv(1) && is_simple(2)) ||
+                       (is_dw_conv(0) && is_simple(1) && is_simple(2));
+        case 4: return (is_simple(0) && is_dw_conv(1) && is_simple(2) && is_simple(3));
         default: return false;
     }
 
@@ -720,7 +712,7 @@ status_t jit_uni_x8s8s32x_conv_fwd_kernel<isa>::init_conf(jit_conv_conf_t &jcp,
         const convolution_desc_t &cd, cpu_memory_t::pd_t &src_pd,
         cpu_memory_t::pd_t &weights_pd, cpu_memory_t::pd_t &dst_pd,
         cpu_memory_t::pd_t &bias_pd,
-        const primitive_attr_t &attr, bool with_relu, float relu_negative_slope)
+        const primitive_attr_t &attr)
 {
     if (!mayiuse(isa)) return status::unimplemented;
 
@@ -758,8 +750,6 @@ status_t jit_uni_x8s8s32x_conv_fwd_kernel<isa>::init_conf(jit_conv_conf_t &jcp,
 
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
 
     jcp.signed_input = src_d.data_type() == data_type::s8;
 
@@ -772,14 +762,23 @@ status_t jit_uni_x8s8s32x_conv_fwd_kernel<isa>::init_conf(jit_conv_conf_t &jcp,
     jcp.oc_padded = rnd_up(jcp.oc, jcp.oc_block);
     jcp.nb_oc = div_up(jcp.oc, jcp.oc_block);
 
+    if (jcp.ngroups != 1) {
+        if (jcp.ic % jcp.ic_block != 0 || jcp.oc % jcp.oc_block != 0)
+            return status::unimplemented;
+    }
+
     if (!post_ops_ok(jcp, attr))
         return status::unimplemented;
 
     const auto &p = attr.post_ops_;
-    jcp.with_sum = p.find(primitive_kind::sum) != -1;
-    if (!jcp.with_eltwise) {
-        jcp.with_eltwise = p.find(primitive_kind::eltwise) != -1;
-        jcp.eltwise_alpha = 0.f;
+
+    int dw_conv_ind = p.find(primitive_kind::convolution);
+    jcp.with_dw_conv = dw_conv_ind != -1;
+    if (jcp.with_dw_conv) {
+        jcp.dw_conv_oh = jcp.oh;
+        jcp.dw_conv_ow = jcp.ow;
+        jcp.oh = p.entry_[dw_conv_ind].dw_conv.in_h;
+        jcp.ow = p.entry_[dw_conv_ind].dw_conv.in_w;
     }
 
     auto desired_act_fmt = nhwc;
@@ -808,6 +807,7 @@ status_t jit_uni_x8s8s32x_conv_fwd_kernel<isa>::init_conf(jit_conv_conf_t &jcp,
             return status::unimplemented;
     }
 
+    jcp.src_dt = cd.src_desc.data_type;
     jcp.bia_dt = jcp.with_bias ? cd.bias_desc.data_type : data_type::undef;
     jcp.dst_dt = cd.dst_desc.data_type;
 
@@ -824,9 +824,15 @@ status_t jit_uni_x8s8s32x_conv_fwd_kernel<isa>::init_conf(jit_conv_conf_t &jcp,
     assert(IMPLICATION(!jcp.is_oc_scale, oscales.mask_ == 0));
 
     jcp.ur_h = 1; /* no code-unrolling by h so far */
-    jcp.ur_w = isa == avx2 ? 3 : 2;
-    jcp.nb_oc_blocking = 2;
-    if (jcp.nb_oc % jcp.nb_oc_blocking != 0) jcp.nb_oc_blocking = 1;
+    jcp.ur_w = isa == avx2 ? 4 : 2;
+    jcp.nb_oc_blocking = nstl::min(2, jcp.nb_oc);
+    jcp.max_regs_ur = 12;
+
+    // WA to prevent fallback on gemm implementation
+    if (isa == sse42 && jcp.ic == 3) {
+        jcp.ur_w = 4;
+        jcp.nb_oc_blocking = 1;
+    }
 
     if (jcp.ow < jcp.ur_w) jcp.ur_w = jcp.ow;
     jcp.ur_w_tail = jcp.ow % jcp.ur_w;
@@ -839,24 +845,42 @@ status_t jit_uni_x8s8s32x_conv_fwd_kernel<isa>::init_conf(jit_conv_conf_t &jcp,
 
     int r_pad_no_tail = nstl::max(0, (jcp.ow - jcp.ur_w_tail - 1) * jcp.stride_w
         + (jcp.kw - 1) * (jcp.dilate_w + 1) - (jcp.iw + jcp.l_pad - 1));
+    if (r_pad_no_tail > jcp.ur_w)
+        return status::unimplemented;
 
-    if (r_pad_no_tail > jcp.ur_w) {
-        /* recalculate ur_w, nb_oc_blocking and ur_w_tail */
-        jcp.ur_w = r_pad_no_tail + 1;
-        jcp.ur_w_tail = jcp.ow % jcp.ur_w;
-        /* check again ... */
-        r_pad_no_tail = nstl::max(0, (jcp.ow - jcp.ur_w_tail - 1) * jcp.stride_w
-            + (jcp.kw - 1) * (jcp.dilate_w + 1) - (jcp.iw + jcp.l_pad - 1));
-        if ((r_pad_no_tail > jcp.ur_w) || (jcp.ow < jcp.ur_w))
-            return status::unimplemented;
-    }
-    if (jcp.l_pad > jcp.ur_w) return status::unimplemented;
+    if (jcp.l_pad > jcp.ur_w)
+        return status::unimplemented;
 
     jcp.wei_adj_scale = (jcp.signed_input) ? (1.0f / 2.0f) : 1.0f;
 
     return status::success;
 }
 
+template <cpu_isa_t isa>
+void jit_uni_x8s8s32x_conv_fwd_kernel<isa>::init_scratchpad(
+        memory_tracking::registrar_t &scratchpad, const jit_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw,
+        const primitive_attr_t &attr) {
+    if (jcp.oc != jcp.oc_padded)
+        scratchpad.book(key_conv_padded_bias, (size_t)jcp.typesize_bia * jcp.oc_padded);
+
+    if (jcp.signed_input) {
+        size_t count = nstl::max(attr.output_scales_.count_, 8);
+        scratchpad.book(key_conv_adjusted_scales, sizeof(float) * count);
+
+        if (jcp.oc != jcp.oc_padded)
+            scratchpad.book(key_conv_padded_compensation, sizeof(int32_t) * jcp.oc_padded);
+    }
+
+    if (jcp.with_dw_conv) {
+        const int nthreads = mkldnn_get_max_threads();
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block * jcp.nb_oc_blocking;
+        scratchpad.book(key_dw_conv_buffer, jcp_dw.typesize_in * dw_conv_buffer_size_ * nthreads);
+
+        if (jcp.oc != jcp.oc_padded)
+            scratchpad.book(key_dw_conv_padded_bias, (size_t)jcp_dw.typesize_bia * jcp.oc_padded);
+    }
+}
+
 template struct jit_uni_x8s8s32x_conv_fwd_kernel<avx2>;
 template struct jit_uni_x8s8s32x_conv_fwd_kernel<sse42>;
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_conv_kernel.hpp
index 110fa3a74..a7af3d348 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_conv_kernel.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_conv_kernel.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018 Intel Corporation
+* Copyright 2018-2019 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -21,6 +21,8 @@
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
 #include "cpu_memory.hpp"
+#include "jit_uni_eltwise.hpp"
+#include "jit_uni_depthwise.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -28,13 +30,23 @@ namespace cpu {
 
 template <cpu_isa_t isa>
 struct jit_uni_x8s8s32x_conv_fwd_kernel: public jit_generator {
-    jit_uni_x8s8s32x_conv_fwd_kernel(jit_conv_conf_t ajcp,
-            const primitive_attr_t &attr): jcp(ajcp), attr_(attr)
+    jit_uni_x8s8s32x_conv_fwd_kernel(jit_conv_conf_t ajcp, jit_conv_conf_t ajcp_dw,
+            const primitive_attr_t &attr): jcp(ajcp), jcp_dw(ajcp_dw), attr_(attr)
     {
         this->generate();
         jit_ker = (void (*)(jit_conv_call_s *))this->getCode();
     }
 
+    ~jit_uni_x8s8s32x_conv_fwd_kernel() {
+        for (auto inj : eltwise_injectors)
+           delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
+    }
+
     DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_x8s8s32x_conv_fwd_kernel)
 
     static bool post_ops_ok(jit_conv_conf_t &jcp,
@@ -45,11 +57,12 @@ struct jit_uni_x8s8s32x_conv_fwd_kernel: public jit_generator {
             cpu_memory_t::pd_t &weights_pd,
             cpu_memory_t::pd_t &dst_pd,
             cpu_memory_t::pd_t &bias_pd,
-            const primitive_attr_t &attr,
-            bool with_relu = false,
-            float relu_negative_slope = 0.);
+            const primitive_attr_t &attr);
+    static void init_scratchpad(memory_tracking::registrar_t &scratchpad,
+            const jit_conv_conf_t &jcp, const jit_conv_conf_t &jcp_dw, const primitive_attr_t &attr);
 
     jit_conv_conf_t jcp;
+    jit_conv_conf_t jcp_dw;
     const primitive_attr_t &attr_;
     void (*jit_ker)(jit_conv_call_s *);
 
@@ -81,30 +94,30 @@ private:
     reg64_t reg_oi_iter = r11;
     reg64_t reg_ic_iter = r15;
     reg64_t reg_compensation_base = abi_not_param1;
-    reg64_t reg_oc = r12;
+    reg64_t reg_oc_work = r12;
     reg64_t imm_addr64 = rbx;
 
     reg8_t reg_tmp_8 = r14b;
     reg32_t reg_tmp_32 = r14d;
     reg64_t reg_tmp_64 = r14;
 
-    Vmm vmm_zero = Vmm(14);
+    reg64_t reg_oc_off = r10;
+    reg64_t reg_d_weights = aux_reg_kernel;
+    reg64_t reg_d_bias = aux_reg_input;
+
     Vmm vmm_one = Vmm(15);
     Vmm vmm_bias_alpha = Vmm(13);
     Vmm vmm_shift = Vmm(14);
-    Vmm vmm_mask = Vmm(13);
     Vmm vmm_bias = Vmm(15);
-    Vmm vmm_reminder_dst = Vmm(11);
-    Ymm ymm_reminder_dst = Ymm(11);
     Ymm ymm_tmp = Ymm(10);
     Vmm vmm_scale = Vmm(12);
     Vmm vmm_comp = Vmm(12);
     Vmm vmm_prev_dst = Vmm(12);
 
-    inline Vmm get_src_reg(int idx) { return Vmm(idx + 8); }
-    inline Vmm get_ker_reg(int idx) { return Vmm(idx + 11); }
-    inline Vmm get_tmp_reg(int idx) { return Vmm(idx + 12); }
-    inline Vmm get_acc_reg(int idx) { return Vmm(idx + 0); }
+    inline Vmm get_src_reg(int idx) { return Vmm(idx + 9); }
+    inline Vmm get_ker_reg(int idx) { return Vmm(idx + 0); }
+    inline Vmm get_tmp_reg(int idx) { return Vmm(idx + 13); }
+    inline Vmm get_acc_reg(int idx) { return Vmm(idx + 1); }
 
     inline void cvt2ps(data_type_t type_in, Vmm ymm_in, const Xbyak::Operand &op, bool scalar_load);
     inline void store_dst(const Xbyak::Address &op, Vmm vmm_dst, bool scalar_store);
@@ -116,12 +129,13 @@ private:
     inline void width_blk_step(int ur_w, int pad_l, int pad_r, int oc_blocks, int oc_step);
     inline void solve_common(int oc_blocks, int oc_step);
 
-    bool maybe_relu(int position);
-
     void generate();
 
     void prepare_table();
 
+    nstl::vector<jit_uni_eltwise_injector_f32<isa>*> eltwise_injectors;
+    nstl::vector<jit_uni_depthwise_injector_f32<isa>*> depthwise_injectors;
+
     Xbyak::Label l_table;
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_convolution.cpp
index d574361da..83ca9cedf 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_convolution.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018 Intel Corporation
+* Copyright 2018-2019 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include "utils.hpp"
 #include "mkldnn_thread.hpp"
 #include "type_helpers.hpp"
+#include <cstring>
 
 namespace mkldnn {
 namespace impl {
@@ -27,19 +28,20 @@ namespace cpu {
 
 using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
+using namespace mkldnn::impl::memory_tracking::names;
 using namespace mkldnn::impl::utils;
 
-template <cpu_isa_t isa, bool with_relu, impl::data_type_t src_type, data_type_t dst_type>
-void _jit_uni_x8s8s32x_convolution_fwd_t<isa, with_relu, src_type, dst_type>::execute_forward() {
+template <cpu_isa_t isa, impl::data_type_t src_type, data_type_t dst_type>
+void _jit_uni_x8s8s32x_convolution_fwd_t<isa, src_type, dst_type>::execute_forward() const {
     auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const char *>(this->input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
     const auto &jcp = kernel_->jcp;
 
@@ -47,8 +49,33 @@ void _jit_uni_x8s8s32x_convolution_fwd_t<isa, with_relu, src_type, dst_type>::ex
     auto w = const_cast<wei_data_t *>(weights);
     int32_t* compensation = (jcp.signed_input) ? reinterpret_cast<int32_t *>(&w[offset]) : 0;
 
-    const size_t bia_dt_size = conf_.with_bias() ? types::data_type_size(conf_.cdesc()->bias_desc.data_type) : 0;
-    float* scales = conf_.attr()->output_scales_.scales_;
+    if (bias && jcp.oc != jcp.oc_padded) {
+        auto padded_bias = this->scratchpad().template get<bia_data_t>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, (bia_data_t*)bias, jcp.oc);
+        utils::array_set(padded_bias + jcp.oc, 0, jcp.oc_padded - jcp.oc);
+        bias = (char *)padded_bias;
+    }
+
+    const float *oscales = pd()->attr()->output_scales_.scales_;
+    if (jcp.signed_input) {
+        auto local_scales = scratchpad().template get<float>(key_conv_adjusted_scales);
+        size_t count = pd()->attr()->output_scales_.count_;
+        float factor = 1.f / jcp.wei_adj_scale;
+        if (count == 1) {
+            utils::array_set(local_scales, oscales[0] * factor, 8);
+        } else {
+            for (size_t c = 0; c < count; c++)
+                local_scales[c] = oscales[c] * factor;
+        }
+        oscales = local_scales;
+
+        if (jcp.oc != jcp.oc_padded) {
+            auto padded_compensation = this->scratchpad().template get<int32_t>(key_conv_padded_compensation);
+            utils::array_copy(padded_compensation, compensation, jcp.oc);
+            utils::array_set(padded_compensation + jcp.oc, 0, jcp.oc_padded - jcp.oc);
+            compensation = padded_compensation;
+        }
+    }
 
     int ocb_work = div_up(jcp.nb_oc, jcp.nb_oc_blocking);
     const size_t work_amount = jcp.mb * jcp.ngroups * ocb_work * jcp.oh;
@@ -64,7 +91,7 @@ void _jit_uni_x8s8s32x_convolution_fwd_t<isa, with_relu, src_type, dst_type>::ex
             int ocb = ocbb * jcp.nb_oc_blocking;
             int ocb_num = jcp.nb_oc_blocking;
 
-            jit_conv_call_s par_conv = {};
+            auto par_conv = jit_conv_call_s();
 
             const int ij = oh * jcp.stride_h;
             const int i_t_overflow = nstl::min(jcp.kh, div_up(nstl::max(0, jcp.t_pad - ij), (jcp.dilate_h+1)));
@@ -81,12 +108,12 @@ void _jit_uni_x8s8s32x_convolution_fwd_t<isa, with_relu, src_type, dst_type>::ex
             par_conv.dst = &dst[dst_off];
 
             const int wh = (!jcp.signed_input) ? i_t_overflow : 0;
-            par_conv.filt = &weights[conf_.with_groups()
+            par_conv.filt = &weights[pd()->with_groups()
                                 ? weights_d.blk_off(g, ocb, 0, wh, 0)
                                 : weights_d.blk_off(ocb, 0, wh, 0)];
 
             if (bias)
-                par_conv.bias = &bias[bias_d.blk_off(_oc * jcp.oc_block*bia_dt_size)];
+                par_conv.bias = &bias[bias_d.blk_off(_oc * jcp.oc_block*jcp.typesize_bia)];
 
             par_conv.oc_work =
                     nstl::min((ocb + ocb_num) * jcp.oc_block, jcp.oc) - ocb*jcp.oc_block;
@@ -95,13 +122,14 @@ void _jit_uni_x8s8s32x_convolution_fwd_t<isa, with_relu, src_type, dst_type>::ex
             const int kh_padding = jcp.kh - i_t_overflow - i_b_overflow;
             par_conv.kh_padding = nstl::max(0, kh_padding);
 
-            par_conv.scales = (jcp.signed_input) ? &local_scales_[jcp.is_oc_scale * _oc * jcp.oc_block]
-                                                 : &scales[jcp.is_oc_scale * _oc * jcp.oc_block];
+            par_conv.scales = &oscales[jcp.is_oc_scale * _oc * jcp.oc_block];
 
             par_conv.compensation = (jcp.signed_input) ? compensation + _oc * jcp.oc_block : 0;
             par_conv.t_overflow = i_t_overflow;
             par_conv.b_overflow = i_b_overflow;
 
+            par_conv.oc_off = _oc * jcp.oc_block * sizeof(float);
+
             kernel_->jit_ker(&par_conv);
             nd_iterator_step(n, jcp.mb, g, jcp.ngroups, ocbb, ocb_work, oh, jcp.oh);
         }
@@ -110,41 +138,193 @@ void _jit_uni_x8s8s32x_convolution_fwd_t<isa, with_relu, src_type, dst_type>::ex
     parallel(0, ker);
 }
 
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, true, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, true, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, true, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, true, data_type::u8, data_type::f32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, false, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, false, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, false, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, false, data_type::u8, data_type::f32>::execute_forward();
-
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, true, data_type::s8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, true, data_type::s8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, true, data_type::s8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, true, data_type::s8, data_type::f32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, false, data_type::s8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, false, data_type::s8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, false, data_type::s8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<avx2, false, data_type::s8, data_type::f32>::execute_forward();
-
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, true, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, true, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, true, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, true, data_type::u8, data_type::f32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, false, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, false, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, false, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, false, data_type::u8, data_type::f32>::execute_forward();
-
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, true, data_type::s8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, true, data_type::s8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, true, data_type::s8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, true, data_type::s8, data_type::f32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, false, data_type::s8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, false, data_type::s8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, false, data_type::s8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_convolution_fwd_t<sse42, false, data_type::s8, data_type::f32>::execute_forward();
+template <cpu_isa_t isa, impl::data_type_t src_type, data_type_t dst_type>
+void _jit_uni_x8s8s32x_convolution_fwd_t<isa, src_type, dst_type>::execute_forward_with_dw_conv() const {
+    auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
+    auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
+    auto bias = reinterpret_cast<const char *>(this->input_memory(2));
+    auto dst = reinterpret_cast<dst_data_t *>(this->memory());
+
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
+
+    const auto &jcp = kernel_->jcp;
+    const auto &jcp_dw = kernel_dw_->jcp;
+    const int MB = pd()->MB();
+
+    size_t offset = (size_t)jcp.ngroups * rnd_up(jcp.oc, jcp.oc_block) * rnd_up(jcp.ic, jcp.ic_block) * jcp.kh * jcp.kw;
+    auto w = const_cast<wei_data_t *>(weights);
+    int32_t* compensation = (jcp.signed_input) ? reinterpret_cast<int32_t *>(&w[offset]) : 0;
+
+    auto dw_bias = jcp_dw.conv_biases;
+    auto dw_weights = reinterpret_cast<const wei_data_t *>(jcp_dw.conv_weights);
+
+    if (jcp.oc != jcp.oc_padded) {
+        auto padded_bias = this->scratchpad().template get<bia_data_t>(key_conv_padded_bias);
+        utils::array_copy(padded_bias, (bia_data_t*)bias, jcp.oc);
+        utils::array_set(padded_bias + jcp.oc, 0, jcp.oc_padded - jcp.oc);
+        bias = (char *)padded_bias;
+
+        auto dw_padded_bias = this->scratchpad().template get<bia_data_t>(key_dw_conv_padded_bias);
+        utils::array_copy(dw_padded_bias, dw_bias, jcp.oc);
+        utils::array_set(dw_padded_bias + jcp.oc, 0.f, jcp.oc_padded - jcp.oc);
+        dw_bias = dw_padded_bias;
+    }
+
+    const float *oscales = pd()->attr()->output_scales_.scales_;
+    if (jcp.signed_input) {
+        auto local_scales = scratchpad().template get<float>(key_conv_adjusted_scales);
+        size_t count = pd()->attr()->output_scales_.count_;
+        float factor = 1.f / jcp.wei_adj_scale;
+        if (count == 1) {
+            utils::array_set(local_scales, oscales[0] * factor, 8);
+        } else {
+            for (size_t c = 0; c < count; c++)
+                local_scales[c] = oscales[c] * factor;
+        }
+        oscales = local_scales;
+
+        if (jcp.oc != jcp.oc_padded) {
+            auto padded_compensation = this->scratchpad().template get<int32_t>(key_conv_padded_compensation);
+            utils::array_copy(padded_compensation, compensation, jcp.oc);
+            utils::array_set(padded_compensation + jcp.oc, 0, jcp.oc_padded - jcp.oc);
+            compensation = padded_compensation;
+        }
+    }
+
+    int ocb_work = div_up(jcp.nb_oc, jcp.nb_oc_blocking);
+    const size_t work_amount = MB * jcp.ngroups * ocb_work * jcp.oh;
+
+    auto ker = [&](const int ithr, const int nthr) {
+        auto compute_row_gen = [&](dst_data_t* ws_p, int n, int g, int ocb, int ocb_num, int oh, int num_rows) {
+            for (int h = 0; h < num_rows; h++) {
+                if ((oh + h) < 0 || (oh + h) >= jcp.oh) {
+                    for (int chb = ocb; chb < ocb + ocb_num; chb++) {
+                        memset(ws_p + (((oh + h) + 1) % jcp_dw.kh) * jcp.ow * jcp.oc_block +
+                               (chb - ocb) * jcp_dw.kh * jcp.ow * jcp.oc_block, 0, jcp.ow * jcp.oc_block * sizeof(dst_data_t));
+                    }
+                } else {
+                    auto par_conv = jit_conv_call_s();
+
+                    const int ij = (oh + h) * jcp.stride_h;
+                    const int i_t_overflow = nstl::min(jcp.kh, div_up(nstl::max(0, jcp.t_pad - ij), (jcp.dilate_h+1)));
+                    const int i_b_overflow = nstl::min(jcp.kh, div_up(nstl::max(jcp.ih, ij + (jcp.kh-1) * (jcp.dilate_h+1) -
+                                                       jcp.t_pad+1) - jcp.ih, (jcp.dilate_h + 1)));
+
+                    const size_t _oc = g * jcp.nb_oc + ocb;
+                    const size_t _ic = g * jcp.nb_ic;
+
+                    const int ih = nstl::max(ij - jcp.t_pad + i_t_overflow * (jcp.dilate_h + 1), 0);
+                    par_conv.src = &src[src_d.blk_off(n, _ic*jcp.ic_block, ih, 0)];
+
+                    par_conv.dst = &ws_p[(((oh + h) + 1) % jcp_dw.kh) * jcp.ow * jcp.oc_block];
+
+                    const int wh = (!jcp.signed_input) ? i_t_overflow : 0;
+                    par_conv.filt = &weights[pd()->with_groups()
+                                        ? weights_d.blk_off(g, ocb, 0, wh, 0)
+                                        : weights_d.blk_off(ocb, 0, wh, 0)];
+
+                    if (bias)
+                        par_conv.bias = &bias[bias_d.blk_off(_oc * jcp.oc_block*jcp.typesize_bia)];
+
+                    par_conv.oc_work =
+                            nstl::min((ocb + ocb_num) * jcp.oc_block, jcp.oc) - ocb*jcp.oc_block;
+
+                    par_conv.kw_padding = 0;
+                    const int kh_padding = jcp.kh - i_t_overflow - i_b_overflow;
+                    par_conv.kh_padding = nstl::max(0, kh_padding);
+
+                    par_conv.scales = &oscales[jcp.is_oc_scale * _oc * jcp.oc_block];
+                    par_conv.compensation = (jcp.signed_input) ? compensation + _oc * jcp.oc_block : 0;
+                    par_conv.t_overflow = i_t_overflow;
+                    par_conv.b_overflow = i_b_overflow;
+
+                    par_conv.oc_off = _oc * jcp.oc_block * sizeof(float);
+
+                    kernel_->jit_ker(&par_conv);
+                }
+            }
+        };
+
+        auto compute_row_dw = [&](const dst_data_t* ws_p, int n, int ocb, int ocb_num, int dst_idx) {
+            for (int chb = ocb; chb < nstl::min(ocb + ocb_num, jcp.nb_oc); chb++) {
+                auto par_conv_dw = jit_conv_call_s();
+
+                par_conv_dw.src_row0 = &ws_p[(((dst_idx+1) - 1) % jcp_dw.kh) * jcp_dw.iw * jcp_dw.ch_block +
+                                             (chb - ocb) * jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block];
+                par_conv_dw.src_row1 = &ws_p[(((dst_idx+1) - 0) % jcp_dw.kh) * jcp_dw.iw * jcp_dw.ch_block +
+                                             (chb - ocb) * jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block];
+                par_conv_dw.src_row2 = &ws_p[(((dst_idx+1) + 1) % jcp_dw.kh) * jcp_dw.iw * jcp_dw.ch_block +
+                                             (chb - ocb) * jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block];
+
+                par_conv_dw.dst = &dst[n*jcp_dw.oc*jcp_dw.oh*jcp_dw.ow + dst_idx/jcp_dw.stride_h*jcp_dw.ow*jcp_dw.oc + chb*jcp_dw.ch_block];
+
+                par_conv_dw.kh_padding = jcp_dw.kh;
+                par_conv_dw.filt = &dw_weights[chb * jcp_dw.kh * jcp_dw.kw * jcp_dw.ch_block];
+                par_conv_dw.bias = &dw_bias[chb * jcp_dw.ch_block];
+                par_conv_dw.ur_w = (size_t)(jcp_dw.ow);
+                par_conv_dw.oc_work = nstl::min((chb + 1) * jcp_dw.ch_block, (int)jcp_dw.oc) - chb*jcp_dw.ch_block;
+                par_conv_dw.oc_off = chb * jcp_dw.ch_block * sizeof(float);
+
+                kernel_dw_->jit_ker(&par_conv_dw);
+            }
+        };
+
+        size_t start{0}, end{0};
+        balance211(work_amount, nthr, ithr, start, end);
+
+        auto dw_conv_buffer = scratchpad().template get<dst_data_t>(key_dw_conv_buffer);
+        size_t dw_conv_buffer_size_ = (size_t)jcp_dw.kh * jcp_dw.iw * jcp_dw.ch_block * jcp.nb_oc_blocking;
+        auto pbuf = dw_conv_buffer + ithr * dw_conv_buffer_size_;
+
+        size_t n{0}, g{0}, ocbb{0}, oh{0};
+        nd_iterator_init(start, n, MB, g, jcp.ngroups, ocbb, ocb_work, oh, jcp.oh);
+        for (size_t iwork = start; iwork < end; ++iwork) {
+            int ocb = ocbb * jcp.nb_oc_blocking;
+            int ocb_num = jcp.nb_oc_blocking;
+
+            if (iwork == start || oh == 0) {
+                compute_row_gen(pbuf, n, g, ocb, ocb_num, oh - 1, 2);
+            } else {
+                compute_row_gen(pbuf, n, g, ocb, ocb_num, oh, 1);
+            }
+
+            if (iwork > start && ((oh - 1) % jcp_dw.stride_h == 0) && oh > 0) {
+                compute_row_dw(pbuf, n, ocb, ocb_num, oh - 1);
+            }
+
+            if ((iwork == end - 1 || (int) oh == jcp.oh - 1) && ((oh) % jcp_dw.stride_h == 0)) {
+                compute_row_gen(pbuf, n, g, ocb, ocb_num, oh + 1, 1);
+                compute_row_dw(pbuf, n, ocb, ocb_num, oh);
+            }
+
+            nd_iterator_step(n, MB, g, jcp.ngroups, ocbb, ocb_work, oh, jcp.oh);
+        }
+    };
+
+    parallel(0, ker);
+}
+
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<avx2, data_type::u8, data_type::u8>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<avx2, data_type::u8, data_type::s8>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<avx2, data_type::u8, data_type::s32>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<avx2, data_type::u8, data_type::f32>;
+
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<avx2, data_type::s8, data_type::u8>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<avx2, data_type::s8, data_type::s8>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<avx2, data_type::s8, data_type::s32>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<avx2, data_type::s8, data_type::f32>;
+
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<sse42, data_type::u8, data_type::u8>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<sse42, data_type::u8, data_type::s8>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<sse42, data_type::u8, data_type::s32>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<sse42, data_type::u8, data_type::f32>;
+
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<sse42, data_type::s8, data_type::u8>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<sse42, data_type::s8, data_type::s8>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<sse42, data_type::s8, data_type::s32>;
+template struct _jit_uni_x8s8s32x_convolution_fwd_t<sse42, data_type::s8, data_type::f32>;
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_convolution.hpp
index efd1185fc..7b5d61c43 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_convolution.hpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2018 Intel Corporation
+* Copyright 2018-2019 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -25,100 +25,105 @@
 #include "jit_uni_x8s8s32x_conv_kernel.hpp"
 #include "jit_generator.hpp"
 #include "mkldnn_thread.hpp"
-
+#include "jit_uni_depthwise.hpp"
 
 namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <cpu_isa_t isa, bool with_relu, impl::data_type_t src_type, impl::data_type_t dst_type>
+template <cpu_isa_t isa, impl::data_type_t src_type, impl::data_type_t dst_type>
 struct _jit_uni_x8s8s32x_convolution_fwd_t: public cpu_primitive_t {
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine,
-                const typename pd_t::base_desc_t *adesc,
+    struct pd_t: public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
-            , jcp_({}) {}
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
+            , jcp_(), jcp_dw_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit:", isa, ""),
-                _jit_uni_x8s8s32x_convolution_fwd_t<isa, with_relu, src_type, dst_type>);
+                _jit_uni_x8s8s32x_convolution_fwd_t<isa, src_type, dst_type>);
 
         virtual status_t init() override {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                         forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
+                && this->desc()->alg_kind == alg_kind::convolution_direct
                 && IMPLICATION(this->with_bias(), utils::one_of(
-                    this->cdesc_().bias_desc.data_type, data_type::f32,
+                    this->desc()->bias_desc.data_type, data_type::f32,
                     data_type::s32, data_type::s8, data_type::u8))
-                && this->cdesc_().accum_data_type == data_type::s32
-                && this->cdesc_().src_desc.data_type == src_type
-                && this->cdesc_().dst_desc.data_type == dst_type;
+                && this->desc()->accum_data_type == data_type::s32
+                && this->desc()->src_desc.data_type == src_type
+                && this->desc()->dst_desc.data_type == dst_type;
             if (!ok) return status::unimplemented;
 
-            return jit_uni_x8s8s32x_conv_fwd_kernel<isa>::init_conf(jcp_, this->cdesc_(),
+            status_t sts = jit_uni_x8s8s32x_conv_fwd_kernel<isa>::init_conf(jcp_, *this->desc(),
                     this->src_pd_, this->weights_pd_,
-                    this->dst_pd_, this->bias_pd_, *this->attr(),
-                    with_relu, this->negative_slope());
+                    this->dst_pd_, this->bias_pd_, *this->attr());
+            if (sts != status::success) return sts;
+
+            if (jcp_.with_dw_conv) {
+                status_t sts_dw = jit_uni_dw_conv_row_f32<isa>::init_conf(jcp_, jcp_dw_, *this->attr());
+                if (sts_dw != status::success) return sts_dw;
+            }
+
+            auto scratchpad = scratchpad_registry().registrar();
+            jit_uni_x8s8s32x_conv_fwd_kernel<isa>::init_scratchpad(scratchpad, jcp_, jcp_dw_, *this->attr());
+
+            return status::success;
         }
 
         jit_conv_conf_t jcp_;
+        jit_conv_conf_t jcp_dw_;
     };
 
-    _jit_uni_x8s8s32x_convolution_fwd_t(const pd_t *pd, const input_vector &inputs,
-            const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), local_scales_(nullptr) {
-        kernel_ = new jit_uni_x8s8s32x_conv_fwd_kernel<isa>(conf_.jcp_, *conf_.attr());
-
-        if (conf_.jcp_.signed_input) {
-            size_t scales_size = (conf_.attr()->output_scales_.count_ == 1)
-                                 ? 8
-                                 : conf_.attr()->output_scales_.count_;
-            local_scales_ = (float *)malloc(sizeof(float) * scales_size, 64);
-            for (size_t i = 0; i < scales_size; i++) {
-                local_scales_[i] = conf_.attr()->output_scales_.scales_[i] *
-                                   (1.0 / conf_.jcp_.wei_adj_scale);
-            }
+    _jit_uni_x8s8s32x_convolution_fwd_t(const pd_t *apd,
+            const input_vector &inputs, const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {
+        kernel_ = new jit_uni_x8s8s32x_conv_fwd_kernel<isa>(pd()->jcp_, pd()->jcp_dw_, *pd()->attr());
+
+        if (pd()->jcp_.with_dw_conv) {
+            kernel_dw_ = new jit_uni_dw_conv_row_f32<isa>(pd()->jcp_dw_, *pd()->attr(), pd()->jcp_dw_.oc);
         }
     }
 
     ~_jit_uni_x8s8s32x_convolution_fwd_t() {
         delete kernel_;
-        if (local_scales_) free(local_scales_);
+
+        if (pd()->jcp_.with_dw_conv) {
+            delete kernel_dw_;
+        }
     };
 
     typedef typename prec_traits<data_type::u8>::type src_data_t;
     typedef typename prec_traits<data_type::s8>::type wei_data_t;
+    typedef typename prec_traits<data_type::f32>::type bia_data_t;
     typedef typename prec_traits<dst_type>::type dst_data_t;
 
-    virtual void execute(event_t *e) {
-        execute_forward();
+    virtual void execute(event_t *e) const {
+        if (pd()->jcp_.with_dw_conv)
+            execute_forward_with_dw_conv();
+        else
+            execute_forward();
+
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    void execute_forward_with_dw_conv() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_uni_x8s8s32x_conv_fwd_kernel<isa> *kernel_;
-    float *local_scales_;
+    jit_uni_dw_conv_row_f32<isa> *kernel_dw_;
 };
 
 template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_avx2_x8s8s32x_convolution_fwd_t = _jit_uni_x8s8s32x_convolution_fwd_t<avx2, false, src_type, dst_type>;
-
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_avx2_x8s8s32x_convolution_relu_t = _jit_uni_x8s8s32x_convolution_fwd_t<avx2, true, src_type, dst_type>;
-
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_sse42_x8s8s32x_convolution_fwd_t = _jit_uni_x8s8s32x_convolution_fwd_t<sse42, false, src_type, dst_type>;
+using jit_avx2_x8s8s32x_convolution_fwd_t = _jit_uni_x8s8s32x_convolution_fwd_t<avx2, src_type, dst_type>;
 
 template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_sse42_x8s8s32x_convolution_relu_t = _jit_uni_x8s8s32x_convolution_fwd_t<sse42, true, src_type, dst_type>;
+using jit_sse42_x8s8s32x_convolution_fwd_t = _jit_uni_x8s8s32x_convolution_fwd_t<sse42, src_type, dst_type>;
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_conv_kernel.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_conv_kernel.cpp
index c02bd80ec..d7b3994be 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_conv_kernel.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_conv_kernel.cpp
@@ -183,32 +183,6 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::apply_filter_unrolled(int ur_ch_b
 }
 
 template <cpu_isa_t isa>
-bool jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::maybe_relu(int position) {
-    using namespace primitive_kind;
-    const auto &p = attr_.post_ops_;
-
-    if (position == 0) {
-        /* relu before sum */
-        return false
-               || jcp.with_eltwise
-               || p.contain(eltwise, 0)
-               || (jcp.dst_dt == data_type::u8 && !p.contain(sum, 0));
-    } else if (position == 1) {
-        /* relu after sum */
-        const int sum_idx = p.contain(sum, 0)
-                            ? 0 : (p.contain(sum, 1) ? 1 : -1);
-        if (sum_idx == -1)
-            return false;
-
-        return false
-               || p.contain(eltwise, sum_idx + 1)
-               || jcp.dst_dt == data_type::u8;
-    }
-
-    return false;
-}
-
-template <cpu_isa_t isa>
 void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::store_dst(const Xbyak::Address &op, Vmm vmm_dst, bool scalar_store) {
     Ymm ymm_dst = Ymm(vmm_dst.getIdx());
     Xmm xmm_dst = Xmm(vmm_dst.getIdx());
@@ -229,7 +203,7 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::store_dst(const Xbyak::Address &o
             if (isa != sse42 && !scalar_store)
                 vpermq(ymm_dst, ymm_dst, 0x08);
 
-            uni_vpacksswb(xmm_dst, xmm_dst, xmm_dst);
+            uni_vpacksswb(vmm_dst, vmm_dst, vmm_dst);
 
             if (scalar_store) {
                 movq(reg_tmp_64, xmm_dst);
@@ -247,7 +221,7 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::store_dst(const Xbyak::Address &o
             if (isa != sse42 && !scalar_store)
                 vpermq(ymm_dst, ymm_dst, 0x08);
 
-            uni_vpackuswb(xmm_dst, xmm_dst, xmm_dst);
+            uni_vpackuswb(vmm_dst, vmm_dst, vmm_dst);
 
             if (scalar_store) {
                 movq(reg_tmp_64, xmm_dst);
@@ -306,37 +280,89 @@ template <cpu_isa_t isa>
 void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::store_dst(int ur_ch_blocks, int ch_step, int ur_w) {
     int repeats = isa == sse42 && ch_step > (jcp.ch_block / 2) ? 2 : 1;
 
+    pop(reg_oc_off);
     pop(reg_scales_base);
 
-    uni_vpxor(vmm_zero, vmm_zero, vmm_zero);
+    mov(imm_addr64, l_table);
+
+    const auto &p = attr_.post_ops_;
+    const int sum_idx = p.find(primitive_kind::sum);
+    const float p_sum_scale = (sum_idx != -1) ? p.entry_[sum_idx].sum.scale : 1.f;
+
+    bool is_scalar_store = ch_step < jcp.ch_block;
+
     for (int r = 0; r < repeats; r++) {
-        if (ch_step < jcp.ch_block) {
+        for (int ii = 0; ii < ur_ch_blocks; ii++) {
+            if (jcp.with_bias) {
+                int b_off = ii * jcp.ch_block + r * (jcp.ch_block / 2);
+                cvt2ps(jcp.bia_dt, vmm_bias, ptr[reg_bias_base + b_off * jcp.typesize_bia], is_scalar_store);
+            }
+
             for (int jj = 0; jj < ur_w; jj++) {
-                Vmm vmm_dst = get_acc_reg(r * ur_w * ur_ch_blocks + jj);
+                Vmm vmm_dst = get_acc_reg(r * ur_ch_blocks * ur_w + ur_w * ii + jj);
                 uni_vcvtdq2ps(vmm_dst, vmm_dst);
 
-                if (jcp.with_bias) {
-                    int b_off = r * (jcp.ch_block / 2);
-                    cvt2ps(jcp.bia_dt, vmm_bias, ptr[reg_bias_base + b_off * jcp.typesize_bia], true);
+                if (jcp.with_bias)
                     uni_vaddps(vmm_dst, vmm_dst, vmm_bias);
-                }
 
-                int s_off = jcp.is_oc_scale * (r * (jcp.ch_block / 2));
-                cvt2ps(mkldnn_f32, vmm_scale, ptr[reg_scales_base + s_off * sizeof(float)], true);
+                int s_off = jcp.is_oc_scale * (ii * jcp.ch_block + r * (jcp.ch_block / 2));
+                cvt2ps(mkldnn_f32, vmm_scale, ptr[reg_scales_base + s_off * sizeof(float)], is_scalar_store);
                 uni_vmulps(vmm_dst, vmm_dst, vmm_scale);
+            }
+        }
 
-                int o_off = jj * jcp.oc + r * (jcp.ch_block / 2);
-                if (jcp.with_sum) {
-                    uni_vpxor(vmm_prev_dst, vmm_prev_dst, vmm_prev_dst);
-                    cvt2ps(jcp.dst_dt, vmm_prev_dst, ptr[reg_output + o_off * jcp.typesize_out], true);
-                    uni_vaddps(vmm_dst, vmm_dst, vmm_prev_dst);
+        int eltwise_inj_idx = 0;
+        int depthwise_inj_idx = 0;
+        for (int i = 0; i < p.len_; i++) {
+            int start_idx = 4 + r * ur_ch_blocks*ur_w;
+
+            auto& post_op = p.entry_[i];
+            if (post_op.is_eltwise()) {
+                eltwise_injectors[eltwise_inj_idx]->compute_vector_range(start_idx, start_idx + ur_ch_blocks * ur_w);
+                eltwise_inj_idx++;
+            } else if (post_op.is_depthwise()) {
+                mov(reg_d_weights, reinterpret_cast<size_t>(post_op.depthwise.weights_data));
+                mov(reg_d_bias, reinterpret_cast<size_t>(post_op.depthwise.biases_data));
+
+                add(reg_d_weights, reg_oc_off);
+                add(reg_d_bias, reg_oc_off);
+
+                if (r == 1) {
+                    add(reg_d_weights, (jcp.ch_block / 2) * sizeof(float));
+                    add(reg_d_bias, (jcp.ch_block / 2) * sizeof(float));
                 }
 
-                if (maybe_relu(0))
-                    uni_vmaxps(vmm_dst, vmm_dst, vmm_zero);
+                for (int ii = 0; ii < ur_ch_blocks; ii++) {
+                    depthwise_injectors[depthwise_inj_idx]->compute_vector_range(
+                            start_idx + ur_w * ii, start_idx + ur_w * ii + ur_w, reg_d_weights, reg_d_bias);
+
+                    add(reg_d_weights, jcp.ch_block * sizeof(float));
+                    add(reg_d_bias, jcp.ch_block * sizeof(float));
+                }
+
+                depthwise_inj_idx++;
+            } else if (post_op.is_sum(false)) {
+                for (int ii = 0; ii < ur_ch_blocks; ii++) {
+                    for (int jj = 0; jj < ur_w; jj++) {
+                        Vmm vmm_dst = get_acc_reg(r * ur_ch_blocks*ur_w + ur_w * ii + jj);
+                        int o_off = ii * jcp.ch_block + jj * jcp.oc + r * (jcp.ch_block / 2);
+
+                        cvt2ps(jcp.dst_dt, vmm_prev_dst, ptr[reg_output + o_off * jcp.typesize_out], is_scalar_store);
+
+                        if (p_sum_scale == 1.f) {
+                            uni_vaddps(vmm_dst, vmm_dst, vmm_prev_dst);
+                        } else {
+                            uni_vfmadd231ps(vmm_dst, vmm_prev_dst, ptr[imm_addr64 + 0 * vlen]);
+                        }
+                    }
+                }
+            }
+        }
 
-                if (maybe_relu(1))
-                    uni_vmaxps(vmm_dst, vmm_dst, vmm_zero);
+        for (int ii = 0; ii < ur_ch_blocks; ii++) {
+            for (int jj = 0; jj < ur_w; jj++) {
+                Vmm vmm_dst = get_acc_reg(r * ur_ch_blocks * ur_w + ur_w * ii + jj);
+                int o_off = ii * jcp.ch_block + jj * jcp.oc + r * (jcp.ch_block / 2);
 
                 if (jcp.dst_dt != data_type::f32) {
                     if (attr_.round_mode_ == round_mode::nearest)
@@ -348,55 +374,13 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::store_dst(int ur_ch_blocks, int c
                         assert(!"unimplemented");
                 }
 
-                store_dst(ptr[reg_output + o_off * jcp.typesize_out], vmm_dst, true);
-            }
-        } else {
-            for (int ii = 0; ii < ur_ch_blocks; ii++) {
-                if (jcp.with_bias) {
-                    int b_off = ii * jcp.ch_block + r * (jcp.ch_block / 2);
-                    cvt2ps(jcp.bia_dt, vmm_bias, ptr[reg_bias_base + b_off * jcp.typesize_bia], false);
-                }
-
-                for (int jj = 0; jj < ur_w; jj++) {
-                    Vmm vmm_dst = get_acc_reg(r * ur_ch_blocks*ur_w + ur_w * ii + jj);
-                    uni_vcvtdq2ps(vmm_dst, vmm_dst);
-
-                    if (jcp.with_bias)
-                        uni_vaddps(vmm_dst, vmm_dst, vmm_bias);
-
-                    int s_off = jcp.is_oc_scale * (ii * jcp.ch_block + r * (jcp.ch_block / 2));
-                    cvt2ps(mkldnn_f32, vmm_scale, ptr[reg_scales_base + s_off * sizeof(float)], false);
-                    uni_vmulps(vmm_dst, vmm_dst, vmm_scale);
-
-                    int o_off = ii * jcp.ch_block + jj * jcp.oc + r * (jcp.ch_block / 2);
-                    if (jcp.with_sum) {
-                        cvt2ps(jcp.dst_dt, vmm_prev_dst, ptr[reg_output + o_off * jcp.typesize_out], false);
-                        uni_vaddps(vmm_dst, vmm_dst, vmm_prev_dst);
-                    }
-
-                    if (maybe_relu(0))
-                        uni_vmaxps(vmm_dst, vmm_dst, vmm_zero);
-
-                    if (maybe_relu(1))
-                        uni_vmaxps(vmm_dst, vmm_dst, vmm_zero);
-
-                    if (jcp.dst_dt != data_type::f32) {
-                        if (attr_.round_mode_ == round_mode::nearest)
-                            uni_vcvtps2dq(vmm_dst, vmm_dst);
-                        else if (attr_.round_mode_ == round_mode::down) {
-                            uni_vroundps(vmm_dst, vmm_dst, 1);
-                            uni_vcvtps2dq(vmm_dst, vmm_dst);
-                        } else
-                            assert(!"unimplemented");
-                    }
-
-                    store_dst(ptr[reg_output + o_off * jcp.typesize_out], vmm_dst, false);
-                }
+                store_dst(ptr[reg_output + o_off * jcp.typesize_out], vmm_dst, is_scalar_store);
             }
         }
     }
 
     push(reg_scales_base);
+    push(reg_oc_off);
 }
 
 template <cpu_isa_t isa>
@@ -415,6 +399,7 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::loop_body(int ur_ch_blocks, int c
     push(reg_kernel_base);
     push(reg_ch_work);
     push(reg_scales_base);
+    push(reg_oc_off);
 
     L(unrolled_w_label); {
         int ur_w = jcp.ur_w;
@@ -458,6 +443,7 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::loop_body(int ur_ch_blocks, int c
 
     L(exit_label);
 
+    pop(reg_oc_off);
     pop(reg_scales_base);
     pop(reg_ch_work);
     pop(reg_kernel_base);
@@ -467,6 +453,24 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::loop_body(int ur_ch_blocks, int c
 
 template <cpu_isa_t isa>
 void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::generate() {
+    const auto &p = attr_.post_ops_;
+    for (int i = 0; i < p.len_; i++) {
+        auto &post_op = p.entry_[i];
+        if (post_op.is_eltwise()) {
+            eltwise_injectors.push_back(new jit_uni_eltwise_injector_f32<isa>(
+                    this,
+                    post_op.eltwise.alg,
+                    post_op.eltwise.alpha,
+                    post_op.eltwise.beta
+            ));
+        } else if (post_op.is_depthwise()) {
+            depthwise_injectors.push_back(new jit_uni_depthwise_injector_f32<isa>(
+                    this,
+                    post_op.depthwise.alg
+            ));
+        }
+    }
+
     this->preamble();
 
     mov(reg_input_base, ptr[this->param1 + GET_OFF(src)]);
@@ -478,6 +482,7 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::generate() {
     mov(reg_kh, ptr[this->param1 + GET_OFF(kh_padding)]);
     mov(reg_kw, ptr[this->param1 + GET_OFF(kw_padding)]);
     mov(reg_ch_work, ptr[this->param1 + GET_OFF(ch_work)]);
+    mov(reg_oc_off, ptr[this->param1 + GET_OFF(oc_off)]);
 
     Label main_loop_label;
     Label tail_loop_label;
@@ -504,6 +509,7 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::generate() {
         add(reg_kernel_base, jcp.ch_block * jcp.kh * jcp.kw * jcp.typesize_in);
         add(reg_bias_base, jcp.ch_block * jcp.typesize_bia);
         add(reg_scales_base, jcp.is_oc_scale * jcp.ch_block * sizeof(float));
+        add(reg_oc_off, jcp.ch_block * sizeof(float));
 
         jmp(main_loop_label, T_NEAR);
     }
@@ -520,6 +526,7 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::generate() {
         add(reg_kernel_base, 1 * jcp.typesize_in);
         add(reg_bias_base, 1 * jcp.typesize_bia);
         add(reg_scales_base, jcp.is_oc_scale * 1 * sizeof(float));
+        add(reg_oc_off, 1 * sizeof(float));
 
         jmp(tail_loop_label, T_NEAR);
     }
@@ -527,6 +534,30 @@ void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::generate() {
     L(exit_label);
 
     this->postamble();
+
+    prepare_table();
+
+    for (auto& inj : eltwise_injectors)
+        inj->prepare_table();
+}
+
+template <cpu_isa_t isa>
+void jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::prepare_table() {
+    const auto &p = attr_.post_ops_;
+    const int sum_idx = p.find(primitive_kind::sum);
+    const float p_sum_scale = (sum_idx != -1) ? p.entry_[sum_idx].sum.scale : 1.f;
+
+    const int32_t cvals_sum_scale[] = {
+        float2int(p_sum_scale)
+    };
+
+    align(64);
+    L(l_table);
+    for (size_t i = 0; i < sizeof(cvals_sum_scale) / sizeof(cvals_sum_scale[0]); ++i) {
+        for (size_t d = 0; d < vlen / sizeof(int32_t); ++d) {
+            dd(cvals_sum_scale[i]);
+        }
+    }
 }
 
 template <cpu_isa_t isa>
@@ -534,14 +565,18 @@ bool jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::post_ops_ok(
         jit_conv_conf_t &jcp, const primitive_attr_t &attr) {
     const auto &p = attr.post_ops_;
 
-    auto is_relu = [&](int idx) { return p.entry_[idx].is_relu(); };
-    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(); };
+    auto is_eltwise = [&](int idx) { return p.entry_[idx].is_eltwise(); };
+    auto is_depthwise = [&](int idx) { return p.entry_[idx].is_depthwise(); };
+    auto is_sum = [&](int idx) { return p.entry_[idx].is_sum(false); };
+    auto is_simple = [&](int idx) { return is_eltwise(idx) || is_depthwise(idx); };
 
     switch (p.len_) {
-    case 0: return true; // no post_ops
-    case 1: return !jcp.with_eltwise && (is_relu(0) || is_sum(0)); // sum OR relu
-    case 2: return !jcp.with_eltwise && (is_sum(0) && is_relu(1)); // sum->relu
-    default: return false;
+        case 0: return true;
+        case 1: return is_simple(0) || is_sum(0);
+        case 2: return (is_sum(0) && is_simple(1)) || (is_simple(0) && is_sum(1)) ||
+                       (is_simple(0) && is_simple(1));
+        case 3: return (is_simple(0) && is_sum(1) && is_simple(2));
+        default: return false;
     }
 
     return false;
@@ -551,8 +586,7 @@ template <cpu_isa_t isa>
 status_t jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::init_conf(jit_conv_conf_t &jcp,
         const convolution_desc_t &cd, const memory_desc_wrapper &src_d,
         const memory_desc_wrapper &weights_d, const memory_desc_wrapper &dst_d,
-        const memory_desc_wrapper &bias_pd, const primitive_attr_t &attr,
-        bool with_relu, float relu_negative_slope)
+        const memory_desc_wrapper &bias_pd, const primitive_attr_t &attr)
 {
     if (!mayiuse(isa)) return status::unimplemented;
 
@@ -593,8 +627,6 @@ status_t jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::init_conf(jit_conv_conf_t &jc
 
     jcp.src_fmt = src_d.format();
     jcp.with_bias = cd.bias_desc.format != memory_format::undef;
-    jcp.with_eltwise = with_relu;
-    jcp.eltwise_alpha = relu_negative_slope;
 
     jcp.signed_input = (src_d.data_type() == data_type::s8) ? true : false;
 
@@ -610,13 +642,10 @@ status_t jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::init_conf(jit_conv_conf_t &jc
 
     const auto &p = attr.post_ops_;
     jcp.with_sum = p.find(primitive_kind::sum) != -1;
-    if (!jcp.with_eltwise) {
-        int eltwise_ind = p.find(primitive_kind::eltwise);
-        if (eltwise_ind != -1) {
-            jcp.with_eltwise  = true;
-            jcp.eltwise_alpha = p.entry_[eltwise_ind].eltwise.alpha;
-        }
-    }
+    const int eltwise_ind = p.find(primitive_kind::eltwise);
+    jcp.with_eltwise = eltwise_ind != -1;
+    if (jcp.with_eltwise)
+        jcp.eltwise = p.entry_[eltwise_ind].eltwise;
 
     auto desired_act_fmt = nhwc;
     auto desired_wei_fmt = isa == avx512_common ? Goihw16g : Goihw8g;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_conv_kernel.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_conv_kernel.hpp
index 9c9b41fc2..8bb781105 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_conv_kernel.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_conv_kernel.hpp
@@ -21,6 +21,8 @@
 #include "jit_generator.hpp"
 #include "jit_primitive_conf.hpp"
 #include "type_helpers.hpp"
+#include "jit_uni_eltwise.hpp"
+#include "jit_uni_depthwise.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -36,6 +38,16 @@ struct jit_uni_x8s8s32x_dw_conv_fwd_kernel: public jit_generator {
         jit_ker = (void (*)(jit_conv_call_s *))this->getCode();
     }
 
+    ~jit_uni_x8s8s32x_dw_conv_fwd_kernel() {
+        for (auto inj : eltwise_injectors)
+           delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
+    }
+
     static bool post_ops_ok(jit_conv_conf_t &jcp,
             const primitive_attr_t &attr);
     static status_t init_conf(jit_conv_conf_t &jcp,
@@ -43,8 +55,7 @@ struct jit_uni_x8s8s32x_dw_conv_fwd_kernel: public jit_generator {
             const memory_desc_wrapper &weights_d,
             const memory_desc_wrapper &dst_d,
             const memory_desc_wrapper &bias_pd,
-            const primitive_attr_t &attr,
-            bool with_relu = false, float relu_negative_slope = 0.f);
+            const primitive_attr_t &attr);
 
     jit_conv_conf_t jcp;
     const primitive_attr_t &attr_;
@@ -84,6 +95,12 @@ private:
     reg64_t reg_tmp_64 = r15;
     reg8_t reg_tmp_8 = r15b;
 
+    reg64_t imm_addr64 = r10;
+
+    reg64_t reg_oc_off = iter_kw;
+    reg64_t reg_d_weights = aux1_reg_kernel;
+    reg64_t reg_d_bias = aux_reg_input;
+
     Vmm vmm_zero = Vmm(0);
     Vmm vmm_bias = Vmm(3);
     Vmm vmm_scale = Vmm(2);
@@ -99,11 +116,16 @@ private:
     inline void load_src(int ur_ch_blocks, int ch_step, int ur_w);
     inline void apply_filter(int ur_ch_blocks, int ch_step, int ur_w);
     inline void apply_filter_unrolled(int ur_ch_blocks, int ch_step, int ur_w);
-    inline bool maybe_relu(int position);
     inline void store_dst(int ur_ch_blocks, int ch_step, int ur_w);
     inline void loop_body(int ur_ch_blocks, int ch_step);
 
+    inline void prepare_table();
     void generate();
+
+    nstl::vector<jit_uni_eltwise_injector_f32<isa>*> eltwise_injectors;
+    nstl::vector<jit_uni_depthwise_injector_f32<isa>*> depthwise_injectors;
+
+    Xbyak::Label l_table;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_convolution.cpp
index bc31a3847..b102c5363 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_convolution.cpp
@@ -26,17 +26,17 @@ using namespace mkldnn::impl::status;
 using namespace mkldnn::impl::memory_format;
 using namespace mkldnn::impl::utils;
 
-template <cpu_isa_t isa, bool with_relu, data_type_t src_type, data_type_t dst_type>
-void _jit_uni_x8s8s32x_dw_convolution_fwd_t<isa, with_relu, src_type, dst_type>::execute_forward() {
+template <cpu_isa_t isa, data_type_t src_type, data_type_t dst_type>
+void _jit_uni_x8s8s32x_dw_convolution_fwd_t<isa, src_type, dst_type>::execute_forward() const {
     auto src = reinterpret_cast<const src_data_t*>(this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t*>(this->input_memory(1));
     auto bias = reinterpret_cast<const char*>(this->input_memory(2));
     auto dst = reinterpret_cast<dst_data_t*>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
     const auto &jcp = kernel_->jcp;
 
@@ -45,10 +45,10 @@ void _jit_uni_x8s8s32x_dw_convolution_fwd_t<isa, with_relu, src_type, dst_type>:
     int str_h = jcp.stride_h;
     int str_w = jcp.stride_w;
 
-    const size_t bia_dt_size = conf_.with_bias()
-        ? types::data_type_size(conf_.cdesc()->bias_desc.data_type) : 0;
+    const size_t bia_dt_size = pd()->with_bias()
+        ? types::data_type_size(pd()->desc()->bias_desc.data_type) : 0;
 
-    const auto &oscales = conf_.attr()->output_scales_;
+    const auto &oscales = pd()->attr()->output_scales_;
 
     int MB = jcp.mb;
     int chb_work = utils::div_up(jcp.nb_ch, jcp.nb_ch_blocking);
@@ -56,7 +56,7 @@ void _jit_uni_x8s8s32x_dw_convolution_fwd_t<isa, with_relu, src_type, dst_type>:
 
     auto kernel_params = [&](int ur_w_step, int ow, int oh, int ih, int kh,
             int kh_padding, int ch, int ch_num, int n) {
-        jit_conv_call_s par_conv = {};
+        auto par_conv = jit_conv_call_s();
 
         const int i_l_overflow = nstl::max(0, (jcp.l_pad - ow * str_w));
         const int i_r_overflow = nstl::max(jcp.iw, (ow * str_w
@@ -86,6 +86,7 @@ void _jit_uni_x8s8s32x_dw_convolution_fwd_t<isa, with_relu, src_type, dst_type>:
         par_conv.ch_work = nstl::min((ch + ch_num) * jcp.ch_block, jcp.oc) - ch*jcp.ch_block;
 
         par_conv.scales = &oscales.scales_[jcp.is_oc_scale * ch * jcp.ch_block];
+        par_conv.oc_off = ch * jcp.ch_block * sizeof(float);
 
         return par_conv;
     };
@@ -149,23 +150,25 @@ void _jit_uni_x8s8s32x_dw_convolution_fwd_t<isa, with_relu, src_type, dst_type>:
     parallel(0, ker);
 }
 
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, true, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, true, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, true, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, true, data_type::u8, data_type::f32>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, false, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, false, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, false, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, false, data_type::u8, data_type::f32>::execute_forward();
-
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, true, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, true, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, true, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, true, data_type::u8, data_type::f32>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, false, data_type::u8, data_type::u8>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, false, data_type::u8, data_type::s8>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, false, data_type::u8, data_type::s32>::execute_forward();
-template void _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, false, data_type::u8, data_type::f32>::execute_forward();
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, data_type::u8, data_type::u8>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, data_type::u8, data_type::s8>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, data_type::u8, data_type::s32>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, data_type::u8, data_type::f32>;
+
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, data_type::s8, data_type::u8>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, data_type::s8, data_type::s8>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, data_type::s8, data_type::s32>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, data_type::s8, data_type::f32>;
+
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, data_type::u8, data_type::u8>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, data_type::u8, data_type::s8>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, data_type::u8, data_type::s32>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, data_type::u8, data_type::f32>;
+
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, data_type::s8, data_type::u8>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, data_type::s8, data_type::s8>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, data_type::s8, data_type::s32>;
+template struct _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, data_type::s8, data_type::f32>;
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_convolution.hpp
index 17d70c171..a6c3cf6c0 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/jit_uni_x8s8s32x_dw_convolution.hpp
@@ -28,40 +28,40 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <cpu_isa_t isa, bool with_relu, impl::data_type_t src_type, impl::data_type_t dst_type>
+template <cpu_isa_t isa, impl::data_type_t src_type, impl::data_type_t dst_type>
 struct _jit_uni_x8s8s32x_dw_convolution_fwd_t: public cpu_primitive_t {
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
-        pd_t(engine_t *engine, const typename pd_t::base_desc_t *adesc,
+    struct pd_t: public cpu_convolution_fwd_pd_t {
+        pd_t(engine_t *engine, const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr,
                 hint_fwd_pd)
-            , jcp_({}) {}
+            , jcp_() {}
 
         DECLARE_COMMON_PD_T(
                 JIT_IMPL_NAME_HELPER("jit_dw:", isa, ""),
-                _jit_uni_x8s8s32x_dw_convolution_fwd_t<isa, with_relu, src_type, dst_type>);
+                _jit_uni_x8s8s32x_dw_convolution_fwd_t<isa, src_type, dst_type>);
 
         virtual status_t init() override {
             using namespace prop_kind;
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                         forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
-                && this->cdesc_().dst_desc.data_type == dst_type
+                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && this->desc()->dst_desc.data_type == dst_type
                 && IMPLICATION(this->with_bias(), utils::one_of(
-                    this->cdesc_().bias_desc.data_type, data_type::f32,
+                    this->desc()->bias_desc.data_type, data_type::f32,
                     data_type::s32, data_type::s8, data_type::u8))
-                && this->cdesc_().accum_data_type == data_type::s32;
+                && this->desc()->accum_data_type == data_type::s32;
             if (!ok) return status::unimplemented;
 
             return jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>::init_conf(jcp_,
-                        this->cdesc_(),
-                        this->src_pd_.desc(), *this->weights_pd_.desc(),
+                        *this->desc(),
+                        *this->src_pd_.desc(), *this->weights_pd_.desc(),
                         *this->dst_pd_.desc(), *this->bias_pd_.desc(),
-                        *this->attr(), with_relu, this->negative_slope());
+                        *this->attr());
         }
 
         jit_conv_conf_t jcp_;
@@ -84,35 +84,34 @@ struct _jit_uni_x8s8s32x_dw_convolution_fwd_t: public cpu_primitive_t {
         }
     };
 
-    _jit_uni_x8s8s32x_dw_convolution_fwd_t(const pd_t *pd, const input_vector &inputs,
-                                    const output_vector &outputs)
-            : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-    { kernel_ = new jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>(conf_.jcp_, *conf_.attr()); }
+    _jit_uni_x8s8s32x_dw_convolution_fwd_t(const pd_t *apd,
+            const input_vector &inputs, const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs)
+    {
+        kernel_ = new jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa>(pd()->jcp_, *pd()->attr());
+    }
+
     ~_jit_uni_x8s8s32x_dw_convolution_fwd_t() { delete kernel_; };
 
     typedef typename prec_traits<data_type::u8>::type src_data_t;
     typedef typename prec_traits<data_type::s8>::type wei_data_t;
     typedef typename prec_traits<dst_type>::type dst_data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const ;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     jit_uni_x8s8s32x_dw_conv_fwd_kernel<isa> *kernel_;
 };
 
 template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_avx2_x8s8s32x_dw_convolution_fwd_t = _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, false, src_type, dst_type>;
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_sse42_x8s8s32x_dw_convolution_fwd_t = _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, false, src_type, dst_type>;
-template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_avx2_x8s8s32x_dw_convolution_relu_t = _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, true, src_type, dst_type>;
+using jit_avx2_x8s8s32x_dw_convolution_fwd_t = _jit_uni_x8s8s32x_dw_convolution_fwd_t<avx2, src_type, dst_type>;
 template <impl::data_type_t src_type, impl::data_type_t dst_type>
-using jit_sse42_x8s8s32x_dw_convolution_relu_t = _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, true, src_type, dst_type>;
+using jit_sse42_x8s8s32x_dw_convolution_fwd_t = _jit_uni_x8s8s32x_dw_convolution_fwd_t<sse42, src_type, dst_type>;
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/nchw_pooling.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/nchw_pooling.cpp
index e9da692a9..fa3c51468 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/nchw_pooling.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/nchw_pooling.cpp
@@ -30,44 +30,44 @@ namespace impl {
 namespace cpu {
 
 template <impl::data_type_t data_type>
-void nchw_pooling_fwd_t<data_type>::execute_forward() {
+void nchw_pooling_fwd_t<data_type>::execute_forward() const {
     using namespace alg_kind;
 
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
-    auto ws = conf_.desc()->alg_kind == alg_kind::pooling_max ?
+    auto ws = pd()->desc()->alg_kind == alg_kind::pooling_max ?
         reinterpret_cast<unsigned char *>(this->memory(1)) : nullptr;
 
-    const memory_desc_wrapper ws_d(conf_.workspace_pd());
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
+    const memory_desc_wrapper ws_d(pd()->workspace_pd());
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
     const data_type_t ws_dt = ws ? ws_d.data_type() : data_type::undef;
 
     src += src_d.off_l(0);
     dst += dst_d.off_l(0);
 
-    const int MB = conf_.MB();
-    const int C = conf_.C();
-    const int OD = conf_.OD();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
-    const int ID = conf_.ID();
-    const int IH = conf_.IH();
-    const int IW = conf_.IW();
-    const int KD = conf_.KD();
-    const int KH = conf_.KH();
-    const int KW = conf_.KW();
-    const int SD = conf_.KSD();
-    const int SH = conf_.KSH();
-    const int SW = conf_.KSW();
-    const int padF = conf_.padFront();
-    const int padT = conf_.padT();
-    const int padL = conf_.padL();
-    const int padBack = conf_.padBack();
-    const int padB = conf_.padB();
-    const int padR = conf_.padR();
-
-    auto alg = conf_.desc()->alg_kind;
+    const int MB = pd()->MB();
+    const int C = pd()->C();
+    const int OD = pd()->OD();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
+    const int ID = pd()->ID();
+    const int IH = pd()->IH();
+    const int IW = pd()->IW();
+    const int KD = pd()->KD();
+    const int KH = pd()->KH();
+    const int KW = pd()->KW();
+    const int SD = pd()->KSD();
+    const int SH = pd()->KSH();
+    const int SW = pd()->KSW();
+    const int padF = pd()->padFront();
+    const int padT = pd()->padT();
+    const int padL = pd()->padL();
+    const int padBack = pd()->padBack();
+    const int padB = pd()->padB();
+    const int padR = pd()->padR();
+
+    auto alg = pd()->desc()->alg_kind;
     
     auto set_ws = [=](int mb, int c, int od, int oh, int ow, int value) {
         if (ws) {
@@ -160,7 +160,7 @@ void nchw_pooling_fwd_t<data_type>::execute_forward() {
     };
 
 
-    if (conf_.desc()->alg_kind == pooling_max) {
+    if (pd()->desc()->alg_kind == pooling_max) {
         parallel_nd(MB, C, OD, OH, OW,
             [&](int mb, int c, int od, int oh, int ow) {
             size_t dst_offset
@@ -191,37 +191,37 @@ void nchw_pooling_fwd_t<data_type>::execute_forward() {
 }
 
 template <impl::data_type_t data_type>
-void nchw_pooling_bwd_t<data_type>::execute_backward() {
+void nchw_pooling_bwd_t<data_type>::execute_backward() const {
     using namespace alg_kind;
 
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(0));
-    auto ws = conf_.desc()->alg_kind != alg_kind::pooling_max ? nullptr :
+    auto ws = pd()->desc()->alg_kind != alg_kind::pooling_max ? nullptr :
         reinterpret_cast<const unsigned char *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t*>(this->memory(0));
 
-    const memory_desc_wrapper ws_d(conf_.workspace_pd());
-
-    const int MB = conf_.MB();
-    const int C = conf_.C();
-    const int OD = conf_.OD();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
-    const int ID = conf_.ID();
-    const int IH = conf_.IH();
-    const int IW = conf_.IW();
-    const int KD = conf_.KD();
-    const int KH = conf_.KH();
-    const int KW = conf_.KW();
-    const int SD = conf_.KSD();
-    const int SH = conf_.KSH();
-    const int SW = conf_.KSW();
-    const int padF = conf_.padFront();
-    const int padT = conf_.padT();
-    const int padL = conf_.padL();
-
-    const bool is_3d = conf_.desc()->diff_src_desc.ndims == 5;
-
-    auto alg = conf_.desc()->alg_kind;
+    const memory_desc_wrapper ws_d(pd()->workspace_pd());
+
+    const int MB = pd()->MB();
+    const int C = pd()->C();
+    const int OD = pd()->OD();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
+    const int ID = pd()->ID();
+    const int IH = pd()->IH();
+    const int IW = pd()->IW();
+    const int KD = pd()->KD();
+    const int KH = pd()->KH();
+    const int KW = pd()->KW();
+    const int SD = pd()->KSD();
+    const int SH = pd()->KSH();
+    const int SW = pd()->KSW();
+    const int padF = pd()->padFront();
+    const int padT = pd()->padT();
+    const int padL = pd()->padL();
+
+    const bool is_3d = pd()->desc()->diff_src_desc.ndims == 5;
+
+    auto alg = pd()->desc()->alg_kind;
 
     auto apply_offset = [=](int index, int offset) {
         return (index > offset) ? index - offset : 0;
@@ -296,7 +296,7 @@ void nchw_pooling_bwd_t<data_type>::execute_backward() {
         }
     };
 
-    if (conf_.desc()->alg_kind == pooling_max) {
+    if (pd()->desc()->alg_kind == pooling_max) {
         parallel_nd(MB, C, [&](int mb, int c) {
             size_t diff_dst_offset = (size_t)mb*C*OD*OH*OW
                 + (size_t)c*OD*OH*OW;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/nchw_pooling.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/nchw_pooling.hpp
index 951ef502d..0e57565f5 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/nchw_pooling.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/nchw_pooling.hpp
@@ -72,19 +72,19 @@ struct nchw_pooling_fwd_t: public cpu_primitive_t {
         }
     };
 
-    nchw_pooling_fwd_t(const pd_t *pd, const input_vector &inputs,
+    nchw_pooling_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t data_type>
@@ -133,19 +133,19 @@ struct nchw_pooling_bwd_t: public cpu_primitive_t {
         }
     };
 
-    nchw_pooling_bwd_t(const pd_t *pd, const input_vector &inputs,
+    nchw_pooling_bwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward();
-    pd_t conf_;
+    void execute_backward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ncsp_batch_normalization.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ncsp_batch_normalization.cpp
index d755538c6..66523a644 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ncsp_batch_normalization.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ncsp_batch_normalization.cpp
@@ -17,11 +17,13 @@
 #include <assert.h>
 #include <math.h>
 
-#include "cpu_batch_normalization_utils.hpp"
 #include "c_types_map.hpp"
+#include "type_helpers.hpp"
+
+#include "cpu_batch_normalization_utils.hpp"
 #include "jit_generator.hpp"
+
 #include "ncsp_batch_normalization.hpp"
-#include "type_helpers.hpp"
 
 // clang 6 and 7 generate incorrect code with OMP_SIMD in some particular cases
 #if (defined __clang_major__) && (__clang_major__ >= 6)
@@ -34,38 +36,17 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-typedef float data_t;
-ncsp_batch_normalization_fwd_t::ncsp_batch_normalization_fwd_t(const pd_t *pd,
-        const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), stats_reduction_(nullptr),
-    tmp_mean_(nullptr), tmp_variance_(nullptr), conf_(*pd) {
-    if (!conf_.stats_is_src()) {
-        this->stats_reduction_ = (data_t *)malloc(
-                conf_.C() * mkldnn_get_max_threads() * sizeof(data_t), 64);
-        if (!conf_.is_training()) {
-            this->tmp_mean_ = (data_t *)malloc(conf_.C() * sizeof(data_t), 64);
-            this->tmp_variance_
-                    = (data_t *)malloc(conf_.C() * sizeof(data_t), 64);
-        }
-    }
-}
-ncsp_batch_normalization_fwd_t::~ncsp_batch_normalization_fwd_t() {
-    if (!conf_.stats_is_src()) {
-        free(this->stats_reduction_);
-        if (!conf_.is_training()) {
-            free(this->tmp_mean_);
-            free(this->tmp_variance_);
-        }
-    }
-}
+using namespace memory_tracking::names;
 
-void ncsp_batch_normalization_fwd_t::execute_forward() {
+void ncsp_batch_normalization_fwd_t::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t *>(this->memory(0));
-    const bool calculate_stats = !conf_.stats_is_src();
-    const bool save_stats = conf_.is_training();
-    const bool is_training = conf_.is_training();
-    const bool fuse_bn_relu = conf_.fuse_bn_relu();
+    auto scratchpad = this->scratchpad();
+
+    const bool calculate_stats = !pd()->stats_is_src();
+    const bool save_stats = pd()->is_training();
+    const bool is_training = pd()->is_training();
+    const bool fuse_bn_relu = pd()->fuse_bn_relu();
 
     data_t *mean, *variance;
     if (!calculate_stats) {
@@ -78,25 +59,25 @@ void ncsp_batch_normalization_fwd_t::execute_forward() {
             mean = reinterpret_cast<data_t *>(this->memory(1));
             variance = reinterpret_cast<data_t *>(this->memory(2));
         } else {
-            mean = this->tmp_mean_;
-            variance = this->tmp_variance_;
+            mean = scratchpad.get<data_t>(key_bnorm_tmp_mean);
+            variance = scratchpad.get<data_t>(key_bnorm_tmp_var);
         }
     }
-    auto idx_scale_shift = 1 + 2 * conf_.stats_is_src();
+    auto idx_scale_shift = 1 + 2 * pd()->stats_is_src();
     auto scaleshift = reinterpret_cast<const data_t *>(
             this->input_memory(idx_scale_shift));
-    auto ws = reinterpret_cast<uint8_t *>(this->memory(conf_.ws_idx()));
-    data_t *ws_reduce = this->stats_reduction_;
+    auto ws = reinterpret_cast<uint8_t *>(this->memory(pd()->ws_idx()));
+    auto *ws_reduce = scratchpad.get<data_t>(key_bnorm_reduction);
 
-    const float eps = conf_.desc()->batch_norm_epsilon;
-    const bool use_scaleshift = conf_.use_scaleshift();
-    const bool with_relu = conf_.with_relu_post_op();
+    const float eps = pd()->desc()->batch_norm_epsilon;
+    const bool use_scaleshift = pd()->use_scaleshift();
+    const bool with_relu = pd()->with_relu_post_op();
     auto maybe_post_op
             = [&](data_t res) { return (with_relu && res < 0) ? 0 : res; };
-    const bool has_spatial = utils::one_of(conf_.ndims(), 4, 5);
-    int SP = (has_spatial) ? conf_.H() * conf_.W() * conf_.D() : 1;
-    size_t N = conf_.MB();
-    size_t C = conf_.C();
+    const bool has_spatial = utils::one_of(pd()->ndims(), 4, 5);
+    int SP = (has_spatial) ? pd()->H() * pd()->W() * pd()->D() : 1;
+    size_t N = pd()->MB();
+    size_t C = pd()->C();
 
     int nthr = mkldnn_get_max_threads();
     size_t l3_size_ = get_cache_size(3, true) * nthr / 2;
@@ -232,44 +213,30 @@ void ncsp_batch_normalization_fwd_t::execute_forward() {
     });
 }
 
-ncsp_batch_normalization_bwd_t::ncsp_batch_normalization_bwd_t(const pd_t *pd,
-        const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
-    , stats_reduction_(nullptr), tmp_diff_scaleshift_(nullptr) {
-    this->stats_reduction_ = (data_t *)malloc(
-            conf_.C() * 2 * mkldnn_get_max_threads() * sizeof(data_t), 64);
-    if (!(conf_.use_scaleshift()
-                && conf_.desc()->prop_kind == prop_kind::backward))
-        this->tmp_diff_scaleshift_
-                = (data_t *)malloc(conf_.C() * 2 * sizeof(data_t), 64);
-}
-
-ncsp_batch_normalization_bwd_t::~ncsp_batch_normalization_bwd_t() {
-    free(this->stats_reduction_);
-    free(this->tmp_diff_scaleshift_);
-}
-
-void ncsp_batch_normalization_bwd_t::execute_backward() {
+void ncsp_batch_normalization_bwd_t::execute_backward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto mean = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto variance = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(3));
     auto scaleshift = reinterpret_cast<const data_t *>(this->input_memory(4));
     auto diff_src = reinterpret_cast<data_t *>(this->memory(0));
-    auto diff_scaleshift = (this->memory(1)) ?
-            reinterpret_cast<data_t *>(this->memory(1)) :
-            this->tmp_diff_scaleshift_;
+
+    auto scratchpad = this->scratchpad();
+
+    auto diff_scaleshift = this->memory(1)
+        ? reinterpret_cast<data_t *>(this->memory(1))
+        : scratchpad.get<data_t>(key_bnorm_tmp_diff_ss);
     auto ws = reinterpret_cast<const uint8_t *>(
-            this->input_memory(conf_.ws_idx()));
-    data_t *ws_reduce = this->stats_reduction_;
-
-    const bool has_spatial = utils::one_of(conf_.ndims(), 4, 5);
-    int SP = (has_spatial) ? conf_.H() * conf_.W() * conf_.D() : 1;
-    size_t C = conf_.C(), N = conf_.MB();
-    const bool use_scaleshift = conf_.use_scaleshift();
-    const float eps = conf_.desc()->batch_norm_epsilon;
-    const bool calculate_diff_stats = !conf_.omit_stats();
-    const bool fuse_bn_relu = conf_.fuse_bn_relu();
+            this->input_memory(pd()->ws_idx()));
+    auto *ws_reduce = scratchpad.get<data_t>(key_bnorm_reduction);
+
+    const bool has_spatial = utils::one_of(pd()->ndims(), 4, 5);
+    int SP = (has_spatial) ? pd()->H() * pd()->W() * pd()->D() : 1;
+    size_t C = pd()->C(), N = pd()->MB();
+    const bool use_scaleshift = pd()->use_scaleshift();
+    const float eps = pd()->desc()->batch_norm_epsilon;
+    const bool calculate_diff_stats = !pd()->use_global_stats();
+    const bool fuse_bn_relu = pd()->fuse_bn_relu();
 
     int nthr = mkldnn_get_max_threads();
     size_t l3_size_ = get_cache_size(3, true) * nthr / 2;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ncsp_batch_normalization.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ncsp_batch_normalization.hpp
index ddf6df6fe..a723e9a77 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ncsp_batch_normalization.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ncsp_batch_normalization.hpp
@@ -20,11 +20,12 @@
 #include <assert.h>
 
 #include "c_types_map.hpp"
-#include "cpu_batch_normalization_pd.hpp"
-#include "cpu_engine.hpp"
+#include "memory_tracking.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
 
+#include "cpu_batch_normalization_pd.hpp"
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
@@ -40,9 +41,11 @@ struct ncsp_batch_normalization_fwd_t : public cpu_primitive_t {
         DECLARE_COMMON_PD_T("ncsp_bnorm:any", ncsp_batch_normalization_fwd_t);
 
         virtual status_t init() override {
-            using namespace prop_kind;
             using namespace data_type;
+            using namespace prop_kind;
+
             assert(engine()->kind() == engine_kind::cpu);
+
             bool ok = true
                 && is_fwd()
                 && !has_zero_dim_memory()
@@ -52,41 +55,56 @@ struct ncsp_batch_normalization_fwd_t : public cpu_primitive_t {
                 && utils::one_of(data_pd_.desc()->format, memory_format::nchw,
                         memory_format::ncdhw, memory_format::nc)
                 && (attr()->has_default_values() || this->with_relu_post_op());
-            if (!ok)
-                return status::unimplemented;
+            if (!ok) return status::unimplemented;
 
-            if (is_training() && fuse_bn_relu()) {
+            if (is_training() && fuse_bn_relu())
                 bn_init_default_ws(this, this->workspace_pd_, 8);
-            }
 
             if (stats_is_src() || is_training()) {
                 memory_desc_t stats_d;
                 dims_t stats_dims = { C() };
-                mkldnn_memory_desc_init(&stats_d, 1, stats_dims, data_type::f32,
-                        memory_format::x);
+                mkldnn_memory_desc_init(&stats_d, 1, stats_dims,
+                        data_type::f32, memory_format::x);
                 mean_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
                 variance_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
             }
 
+            init_scratchpad();
+
             return success;
         }
+
+    private:
+        void init_scratchpad() {
+            using namespace memory_tracking::names;
+            auto scratchpad = scratchpad_registry().registrar();
+            if (!stats_is_src()) {
+                scratchpad.book(key_bnorm_reduction,
+                        sizeof(data_t) * C() * mkldnn_get_max_threads());
+
+                if (!is_training()) {
+                    scratchpad.book(key_bnorm_tmp_mean, sizeof(data_t) * C());
+                    scratchpad.book(key_bnorm_tmp_var, sizeof(data_t) * C());
+                }
+            }
+        }
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    ncsp_batch_normalization_fwd_t(const pd_t *pd, const input_vector &inputs,
-            const output_vector &outputs);
-    ~ncsp_batch_normalization_fwd_t();
+    ncsp_batch_normalization_fwd_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {}
+    ~ncsp_batch_normalization_fwd_t() {}
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    data_t *stats_reduction_, *tmp_mean_, *tmp_variance_;
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 struct ncsp_batch_normalization_bwd_t : public cpu_primitive_t {
@@ -95,14 +113,14 @@ struct ncsp_batch_normalization_bwd_t : public cpu_primitive_t {
                 const primitive_attr_t *attr,
                 const batch_normalization_fwd_pd_t *hint_fwd_pd)
             : cpu_batch_normalization_bwd_pd_t(
-                      engine, adesc, attr, hint_fwd_pd) {}
+                    engine, adesc, attr, hint_fwd_pd) {}
 
         DECLARE_COMMON_PD_T("ncsp_bnorm:any", ncsp_batch_normalization_bwd_t);
 
         virtual status_t init() override {
-            using namespace prop_kind;
             using namespace data_type;
             assert(engine()->kind() == engine_kind::cpu);
+
             bool ok = true
                 && is_bwd()
                 && !has_zero_dim_memory()
@@ -112,42 +130,54 @@ struct ncsp_batch_normalization_bwd_t : public cpu_primitive_t {
                 && utils::one_of(data_pd_.desc()->format, memory_format::nchw,
                         memory_format::ncdhw, memory_format::nc)
                 && attr()->has_default_values();
-            if (!ok)
-                return status::unimplemented;
+            if (!ok) return status::unimplemented;
 
             if (fuse_bn_relu()) {
                 bn_init_default_ws(this, this->workspace_pd_, 8);
                 const size_t this_ws_sz
-                        = memory_desc_wrapper(this->workspace_pd()).size();
-
-                bool ws_ok = true && hint_fwd_pd_->workspace_pd()
-                        && memory_desc_wrapper(hint_fwd_pd_->workspace_pd())
-                                        .size()
-                                == this_ws_sz;
-                if (!ws_ok)
-                    return status::unimplemented;
+                    = memory_desc_wrapper(this->workspace_pd()).size();
+
+                bool ws_ok = true
+                    && hint_fwd_pd_->workspace_pd()
+                    && memory_desc_wrapper(hint_fwd_pd_->workspace_pd()).size()
+                    == this_ws_sz;
+                if (!ws_ok) return status::unimplemented;
             }
 
+            init_scratchpad();
+
             return success;
         }
+
+    private:
+        void init_scratchpad() {
+            using namespace memory_tracking::names;
+            auto scratchpad = scratchpad_registry().registrar();
+            scratchpad.book(key_bnorm_reduction,
+                    sizeof(data_t) * 2 * C() * mkldnn_get_max_threads());
+            if (!(use_scaleshift() && desc()->prop_kind == prop_kind::backward))
+                scratchpad.book(key_bnorm_tmp_diff_ss,
+                        sizeof(data_t) * 2 * C());
+        }
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    ncsp_batch_normalization_bwd_t(const pd_t *pd, const input_vector &inputs,
-            const output_vector &outputs);
-    ~ncsp_batch_normalization_bwd_t();
-    virtual void execute(event_t *e) {
+    ncsp_batch_normalization_bwd_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {}
+    ~ncsp_batch_normalization_bwd_t() {}
+
+    virtual void execute(event_t *e) const {
         execute_backward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward();
-    pd_t conf_;
-
-    data_t *stats_reduction_, *tmp_diff_scaleshift_;
+    void execute_backward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/nhwc_pooling.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/nhwc_pooling.cpp
index 1fc47887e..553fddc09 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/nhwc_pooling.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/nhwc_pooling.cpp
@@ -58,7 +58,7 @@ namespace nhwc_pooling {
 
 template <impl::data_type_t data_type>
 void nhwc_pooling_fwd_t<data_type>::array_div_by_const(const int n,
-                   const data_t *src, const size_t num, data_t *dst)
+        const data_t *src, const size_t num, data_t *dst) const
 {
     for (int i = 0; i < n; ++i)
     {
@@ -69,8 +69,8 @@ void nhwc_pooling_fwd_t<data_type>::array_div_by_const(const int n,
 }
 
 template <impl::data_type_t data_type>
-void nhwc_pooling_fwd_t<data_type>::array_add(const int n,
-                                              const data_t *src, data_t *dst)
+void nhwc_pooling_fwd_t<data_type>::array_add(const int n, const data_t *src,
+        data_t *dst) const
 {
     for (int i = 0;  i < n; ++i)
     {
@@ -79,44 +79,44 @@ void nhwc_pooling_fwd_t<data_type>::array_add(const int n,
 }
 
 template <impl::data_type_t data_type>
-void nhwc_pooling_fwd_t<data_type>::execute_forward() {
+void nhwc_pooling_fwd_t<data_type>::execute_forward() const {
     using namespace alg_kind;
     using namespace prop_kind;
     using namespace nhwc_pooling;
 
-    auto alg = conf_.desc()->alg_kind;
+    auto alg = pd()->desc()->alg_kind;
 
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t *>(this->memory(0));
     unsigned char * ws = reinterpret_cast<unsigned char *>(
                   alg == pooling_max
-                      && conf_.desc()->prop_kind == forward_training ?
+                      && pd()->desc()->prop_kind == forward_training ?
                   this->memory(1) : nullptr
               );
 
-    const memory_desc_wrapper MEM_D(dst)(conf_.dst_pd());
-    const memory_desc_wrapper MEM_D(ws)(conf_.workspace_pd());
-    const memory_desc_wrapper MEM_D(src)(conf_.src_pd());
-
-    const int ID = conf_.ID();
-    const int IH = conf_.IH();
-    const int IW = conf_.IW();
-    const int KD = conf_.KD();
-    const int KH = conf_.KH();
-    const int KW = conf_.KW();
-    const int SD = conf_.KSD();
-    const int SH = conf_.KSH();
-    const int SW = conf_.KSW();
-    const int padF = conf_.padFront();
-    const int padT = conf_.padT();
-    const int padL = conf_.padL();
-    const int MB = conf_.MB();
-    const int OC = conf_.C();
-    const int OD = conf_.OD();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
-
-    const bool is_3d = conf_.desc()->src_desc.ndims == 5;
+    const memory_desc_wrapper MEM_D(dst)(pd()->dst_pd());
+    const memory_desc_wrapper MEM_D(ws)(pd()->workspace_pd());
+    const memory_desc_wrapper MEM_D(src)(pd()->src_pd());
+
+    const int ID = pd()->ID();
+    const int IH = pd()->IH();
+    const int IW = pd()->IW();
+    const int KD = pd()->KD();
+    const int KH = pd()->KH();
+    const int KW = pd()->KW();
+    const int SD = pd()->KSD();
+    const int SH = pd()->KSH();
+    const int SW = pd()->KSW();
+    const int padF = pd()->padFront();
+    const int padT = pd()->padT();
+    const int padL = pd()->padL();
+    const int MB = pd()->MB();
+    const int OC = pd()->C();
+    const int OD = pd()->OD();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
+
+    const bool is_3d = pd()->desc()->src_desc.ndims == 5;
     const data_type_t ws_dt = ws ? ws_d.data_type() : data_type::undef;
 
     DECLARE_READ_STRIDES(src);
@@ -234,38 +234,38 @@ void nhwc_pooling_fwd_t<data_type>::execute_forward() {
 }
 
 template <impl::data_type_t data_type>
-void nhwc_pooling_bwd_t<data_type>::execute_backward() {
+void nhwc_pooling_bwd_t<data_type>::execute_backward() const {
     using namespace alg_kind;
     using namespace nhwc_pooling;
 
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(0));
-    auto ws = conf_.desc()->alg_kind != alg_kind::pooling_max ? nullptr
+    auto ws = pd()->desc()->alg_kind != alg_kind::pooling_max ? nullptr
         : reinterpret_cast<const unsigned char *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t *>(this->memory(0));
 
-    const memory_desc_wrapper MEM_D(diff_dst)(conf_.diff_dst_pd());
-    const memory_desc_wrapper MEM_D(ws)(conf_.workspace_pd());
-    const memory_desc_wrapper MEM_D(diff_src)(conf_.diff_src_pd());
-
-    const int ID = conf_.ID();
-    const int IH = conf_.IH();
-    const int IW = conf_.IW();
-    const int KD = conf_.KD();
-    const int KH = conf_.KH();
-    const int KW = conf_.KW();
-    const int SD = conf_.KSD();
-    const int SH = conf_.KSH();
-    const int SW = conf_.KSW();
-    const int OC = conf_.C();
-    const int padF = conf_.padFront();
-    const int padT = conf_.padT();
-    const int padL = conf_.padL();
-    const int OD = conf_.OD();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
-
-    const bool is_3d = conf_.desc()->diff_src_desc.ndims == 5;
-    auto alg = conf_.desc()->alg_kind;
+    const memory_desc_wrapper MEM_D(diff_dst)(pd()->diff_dst_pd());
+    const memory_desc_wrapper MEM_D(ws)(pd()->workspace_pd());
+    const memory_desc_wrapper MEM_D(diff_src)(pd()->diff_src_pd());
+
+    const int ID = pd()->ID();
+    const int IH = pd()->IH();
+    const int IW = pd()->IW();
+    const int KD = pd()->KD();
+    const int KH = pd()->KH();
+    const int KW = pd()->KW();
+    const int SD = pd()->KSD();
+    const int SH = pd()->KSH();
+    const int SW = pd()->KSW();
+    const int OC = pd()->C();
+    const int padF = pd()->padFront();
+    const int padT = pd()->padT();
+    const int padL = pd()->padL();
+    const int OD = pd()->OD();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
+
+    const bool is_3d = pd()->desc()->diff_src_desc.ndims == 5;
+    auto alg = pd()->desc()->alg_kind;
 
     DECLARE_READ_STRIDES(diff_src);
     DECLARE_READ_STRIDES(diff_dst);
@@ -274,7 +274,7 @@ void nhwc_pooling_bwd_t<data_type>::execute_backward() {
         return (index > offset) ? index - offset : 0;
     };
 
-    const int MB = conf_.MB();
+    const int MB = pd()->MB();
 
     parallel_nd(MB, ID, IH, IW,
         [&](int mb, int id, int ih, int iw) {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/nhwc_pooling.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/nhwc_pooling.hpp
index 91cb2abd5..c510b7748 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/nhwc_pooling.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/nhwc_pooling.hpp
@@ -90,27 +90,27 @@ struct nhwc_pooling_fwd_t: public cpu_primitive_t {
         }
     };
 
-    nhwc_pooling_fwd_t(const pd_t *pd, const input_vector &inputs,
+    nhwc_pooling_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
 
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
+    void execute_forward() const;
     void array_div_by_const(const int n, const data_t *src, const size_t num,
-            data_t *dst);
-    void array_add(const int n, const data_t *src, data_t *dst);
+            data_t *dst) const;
+    void array_add(const int n, const data_t *src, data_t *dst) const;
 
     template <bool use_workspace>
     void array_nhwc_max(const int n, data_t *dst, const data_t *src,
             unsigned char *ws, const size_t ws_offset, const data_type_t ws_dt,
-            const int index) {
+            const int index) const {
         assert(!((use_workspace == false) ^ (!ws))); // ensure ws pointer exists
         PRAGMA_OMP_SIMD()
         for (int oc = 0; oc < n; ++oc) {
@@ -158,7 +158,7 @@ private:
 
     template <bool use_workspace>
     void array_nhwc_initialize(const int n, data_t *dst, unsigned char *ws,
-            const size_t ws_offset, const data_type_t ws_dt) {
+            const size_t ws_offset, const data_type_t ws_dt) const {
         assert(!((use_workspace == false) ^ (!ws))); // ensure ws pointer exists
         for (int oc = 0; oc < n; ++oc) {
             if (use_workspace) {
@@ -172,7 +172,7 @@ private:
         }
     }
 
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t data_type>
@@ -224,19 +224,19 @@ struct nhwc_pooling_bwd_t: public cpu_primitive_t {
         }
     };
 
-    nhwc_pooling_bwd_t(const pd_t *pd, const input_vector &inputs,
+    nhwc_pooling_bwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward();
-    pd_t conf_;
+    void execute_backward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }// namespace cpu
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/nspc_batch_normalization.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/nspc_batch_normalization.cpp
index 96eb50baf..f7162ff97 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/nspc_batch_normalization.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/nspc_batch_normalization.cpp
@@ -18,9 +18,12 @@
 #include <math.h>
 
 #include "c_types_map.hpp"
+#include "type_helpers.hpp"
+
+#include "cpu_batch_normalization_utils.hpp"
 #include "jit_generator.hpp"
+
 #include "nspc_batch_normalization.hpp"
-#include "type_helpers.hpp"
 
 // clang 6 and 7 generate incorrect code with OMP_SIMD in some particular cases
 #if (defined __clang_major__) && (__clang_major__ >= 6)
@@ -33,36 +36,21 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-typedef float data_t;
-nspc_batch_normalization_fwd_t::nspc_batch_normalization_fwd_t(const pd_t *pd,
-        const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), stats_reduction_(nullptr),
-    tmp_mean_(nullptr), tmp_variance_(nullptr), conf_(*pd) {
-    if (!conf_.stats_is_src()) {
-        this->stats_reduction_ = (data_t *)malloc(
-                nstl::max(conf_.C(), 16) * mkldnn_get_max_threads() * sizeof(data_t), 64);
-        this->tmp_mean_ = (data_t *)malloc(mkldnn_get_max_threads() *
-                nstl::max(conf_.C(), 16) * sizeof(data_t), 64);
-        this->tmp_variance_
-                = (data_t *)malloc(mkldnn_get_max_threads() *
-                       nstl::max(conf_.C(), 16) * sizeof(data_t), 64);
-    }
-}
-nspc_batch_normalization_fwd_t::~nspc_batch_normalization_fwd_t() {
-    if (!conf_.stats_is_src()) {
-        free(this->stats_reduction_);
-        free(this->tmp_mean_);
-        free(this->tmp_variance_);
-    }
-}
+using namespace memory_tracking::names;
 
-void nspc_batch_normalization_fwd_t::execute_forward() {
+void nspc_batch_normalization_fwd_t::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
-    const bool save_stats = conf_.is_training();
-    const bool is_training = conf_.is_training();
-    const bool fuse_bn_relu = conf_.fuse_bn_relu();
-    const bool calculate_stats = !conf_.stats_is_src();
-    const bool with_relu = conf_.with_relu_post_op();
+
+    const bool save_stats = pd()->is_training();
+    const bool is_training = pd()->is_training();
+    const bool fuse_bn_relu = pd()->fuse_bn_relu();
+    const bool calculate_stats = !pd()->stats_is_src();
+    const bool with_relu = pd()->with_relu_post_op();
+
+    auto scratchpad = this->scratchpad();
+    auto tmp_mean = scratchpad.get<data_t>(key_bnorm_tmp_mean);
+    auto tmp_var = scratchpad.get<data_t>(key_bnorm_tmp_var);
+
     data_t *mean, *variance;
     if (!calculate_stats) {
         mean = reinterpret_cast<data_t *>(
@@ -74,24 +62,24 @@ void nspc_batch_normalization_fwd_t::execute_forward() {
             mean = reinterpret_cast<data_t *>(this->memory(1));
             variance = reinterpret_cast<data_t *>(this->memory(2));
         } else {
-            mean = this->tmp_mean_;
-            variance = this->tmp_variance_;
+            mean = tmp_mean;
+            variance = tmp_var;
         }
     }
-    auto idx_scaleshift = 1 + 2 * conf_.stats_is_src();
+    auto idx_scaleshift = 1 + 2 * pd()->stats_is_src();
     auto scaleshift = reinterpret_cast<const data_t *>(
             this->input_memory(idx_scaleshift));
 
     auto dst = reinterpret_cast<data_t *>(this->memory(0));
-    auto ws = reinterpret_cast<uint8_t *>(this->memory(conf_.ws_idx()));
-    auto ws_reduce = this->stats_reduction_;
+    auto ws = reinterpret_cast<uint8_t *>(this->memory(pd()->ws_idx()));
+    auto *ws_reduce = scratchpad.get<data_t>(key_bnorm_reduction);
 
-    const int N = conf_.MB();
-    const int C = conf_.C();
-    const int SP = conf_.H() * conf_.W() * conf_.D();
+    const int N = pd()->MB();
+    const int C = pd()->C();
+    const int SP = pd()->H() * pd()->W() * pd()->D();
 
-    const float eps = conf_.desc()->batch_norm_epsilon;
-    const bool use_scaleshift = conf_.use_scaleshift();
+    const float eps = pd()->desc()->batch_norm_epsilon;
+    const bool use_scaleshift = pd()->use_scaleshift();
     auto maybe_post_op
             = [&](data_t res) { return (with_relu && res < 0) ? 0 : res; };
 
@@ -100,8 +88,8 @@ void nspc_batch_normalization_fwd_t::execute_forward() {
         int N_s = 0, N_e = 0, C_s = 0, C_e = 0;
         balance211(N, nthr, ithr, N_s, N_e);
         balance211(C, nthr, ithr, C_s, C_e);
-        data_t *mean_loc = this->tmp_mean_ + nstl::max(C, 16)*ithr;
-        data_t *variance_loc = this->tmp_variance_ + nstl::max(C,16)*ithr;
+        data_t *mean_loc = tmp_mean + nstl::max(C, 16) * ithr;
+        data_t *variance_loc = tmp_var + nstl::max(C, 16) * ithr;
 
         if (calculate_stats) {
             for (int c = 0; c < C; c++)
@@ -187,45 +175,32 @@ void nspc_batch_normalization_fwd_t::execute_forward() {
     });
 }
 
-nspc_batch_normalization_bwd_t::nspc_batch_normalization_bwd_t(const pd_t *pd,
-        const input_vector &inputs, const output_vector &outputs)
-    : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {
-    this->stats_reduction_ = (data_t *)malloc(
-            conf_.C() * 2 * mkldnn_get_max_threads() * sizeof(data_t), 64);
-    this->tmp_diff_scaleshift_
-            = (data_t *)malloc((mkldnn_get_max_threads() + 1) * conf_.C() * 2 *
-                    sizeof(data_t), 64);
-}
-nspc_batch_normalization_bwd_t::~nspc_batch_normalization_bwd_t() {
-    free(this->stats_reduction_);
-    free(this->tmp_diff_scaleshift_);
-}
-
-
-void nspc_batch_normalization_bwd_t::execute_backward() {
+void nspc_batch_normalization_bwd_t::execute_backward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto mean = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto variance = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(3));
     auto scaleshift = reinterpret_cast<const data_t *>(this->input_memory(4));
     auto ws = reinterpret_cast<const uint8_t *>(
-            this->input_memory(conf_.ws_idx()));
+            this->input_memory(pd()->ws_idx()));
+
+    auto scratchpad = this->scratchpad();
+    auto tmp_diff_ss = scratchpad.get<data_t>(key_bnorm_tmp_diff_ss);
 
     auto diff_src = reinterpret_cast<data_t *>(this->memory(0));
-    auto diff_scaleshift = (this->memory(1)) ?
-            reinterpret_cast<data_t *>(this->memory(1)) :
-            this->tmp_diff_scaleshift_;
+    auto diff_scaleshift = this->memory(1)
+        ? reinterpret_cast<data_t *>(this->memory(1)) : tmp_diff_ss;
 
-    const int N = conf_.MB();
-    const int C = conf_.C();
-    const int SP = conf_.D() * conf_.H() * conf_.W();
+    const int N = pd()->MB();
+    const int C = pd()->C();
+    const int SP = pd()->D() * pd()->H() * pd()->W();
     data_t *diff_gamma = diff_scaleshift, *diff_beta = diff_scaleshift + C;
-    data_t *ws_reduce = this->stats_reduction_;
+    auto *ws_reduce = scratchpad.get<data_t>(key_bnorm_reduction);
 
-    const float eps = conf_.desc()->batch_norm_epsilon;
-    const bool use_scaleshift = conf_.use_scaleshift();
-    const bool calculate_diff_stats = !conf_.omit_stats();
-    const bool fuse_bn_relu = conf_.fuse_bn_relu();
+    const float eps = pd()->desc()->batch_norm_epsilon;
+    const bool use_scaleshift = pd()->use_scaleshift();
+    const bool calculate_diff_stats = !pd()->use_global_stats();
+    const bool fuse_bn_relu = pd()->fuse_bn_relu();
 
     assert(mkldnn_thr_syncable());
     parallel(0, [&](const int ithr, const int nthr) {
@@ -233,9 +208,8 @@ void nspc_batch_normalization_bwd_t::execute_backward() {
         balance211(N, nthr, ithr, N_s, N_e);
         balance211(C, nthr, ithr, C_s, C_e);
 
-        data_t *diff_gamma_loc = this->tmp_diff_scaleshift_ + 2*C + C*ithr;
-        data_t *diff_beta_loc = this->tmp_diff_scaleshift_ + 2*C + C*nthr
-            + C*ithr;
+        data_t *diff_gamma_loc = tmp_diff_ss + 2 * C + C * ithr;
+        data_t *diff_beta_loc = tmp_diff_ss + 2 * C + C * (nthr + ithr);
 
         for (int c = 0; c < C; c++) {
             ws_reduce[C * ithr + c] = 0.;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/nspc_batch_normalization.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/nspc_batch_normalization.hpp
index 168caf97f..6c1ec2532 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/nspc_batch_normalization.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/nspc_batch_normalization.hpp
@@ -20,11 +20,12 @@
 #include <assert.h>
 
 #include "c_types_map.hpp"
-#include "cpu_batch_normalization_pd.hpp"
-#include "cpu_engine.hpp"
+#include "memory_tracking.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
 
+#include "cpu_batch_normalization_pd.hpp"
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
@@ -40,9 +41,11 @@ struct nspc_batch_normalization_fwd_t : public cpu_primitive_t {
         DECLARE_COMMON_PD_T("nspc_bnorm:any", nspc_batch_normalization_fwd_t);
 
         virtual status_t init() override {
-            using namespace prop_kind;
             using namespace data_type;
+            using namespace prop_kind;
+
             assert(engine()->kind() == engine_kind::cpu);
+
             bool ok = true
                 /* the algorithm requires barriers while switching
                  * between parallelization over N and C dimensions */
@@ -54,8 +57,7 @@ struct nspc_batch_normalization_fwd_t : public cpu_primitive_t {
                         desc()->data_scaleshift_desc.data_type == f32)
                 && utils::one_of(data_pd_.desc()->format, memory_format::nhwc)
                 && (attr()->has_default_values() || this->with_relu_post_op());
-            if (!ok)
-                return status::unimplemented;
+            if (!ok) return status::unimplemented;
 
             if (is_training() && fuse_bn_relu())
                 bn_init_default_ws(this, this->workspace_pd_, 8);
@@ -63,31 +65,45 @@ struct nspc_batch_normalization_fwd_t : public cpu_primitive_t {
             if (stats_is_src() || is_training()) {
                 memory_desc_t stats_d;
                 dims_t stats_dims = { C() };
-                mkldnn_memory_desc_init(&stats_d, 1, stats_dims, data_type::f32,
-                        memory_format::x);
+                mkldnn_memory_desc_init(&stats_d, 1, stats_dims,
+                        data_type::f32, memory_format::x);
                 mean_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
                 variance_pd_ = cpu_memory_t::pd_t(engine_, &stats_d);
             }
 
+            init_scratchpad();
+
             return status::success;
         }
+
+    private:
+        void init_scratchpad() {
+            using namespace memory_tracking::names;
+            auto scratchpad = scratchpad_registry().registrar();
+            if (!stats_is_src()) {
+                int sz = nstl::max(C(), 16) * mkldnn_get_max_threads();
+                scratchpad.book(key_bnorm_reduction, sizeof(data_t) * sz);
+                scratchpad.book(key_bnorm_tmp_mean, sizeof(data_t) * sz);
+                scratchpad.book(key_bnorm_tmp_var, sizeof(data_t) * sz);
+            }
+        }
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    nspc_batch_normalization_fwd_t(const pd_t *pd, const input_vector &inputs,
-            const output_vector &outputs);
-    ~nspc_batch_normalization_fwd_t();
-    virtual void execute(event_t *e) {
+    nspc_batch_normalization_fwd_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {}
+    ~nspc_batch_normalization_fwd_t() {}
+
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    data_t *stats_reduction_;
-    data_t *tmp_mean_, *tmp_variance_;
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 struct nspc_batch_normalization_bwd_t : public cpu_primitive_t {
@@ -101,9 +117,11 @@ struct nspc_batch_normalization_bwd_t : public cpu_primitive_t {
         DECLARE_COMMON_PD_T("nspc_bnorm:any", nspc_batch_normalization_bwd_t);
 
         virtual status_t init() override {
-            using namespace prop_kind;
             using namespace data_type;
+            using namespace prop_kind;
+
             assert(engine()->kind() == engine_kind::cpu);
+
             bool ok = true
                 /* the algorithm requires barriers while switching
                  * between parallelization over N and C dimensions */
@@ -115,42 +133,53 @@ struct nspc_batch_normalization_bwd_t : public cpu_primitive_t {
                         desc()->data_scaleshift_desc.data_type == f32)
                 && utils::one_of(data_pd_.desc()->format, memory_format::nhwc)
                 && (attr()->has_default_values() || this->with_relu_post_op());
-            if (!ok)
-                return status::unimplemented;
+            if (!ok) return status::unimplemented;
 
             if (fuse_bn_relu()) {
                 bn_init_default_ws(this, this->workspace_pd_, 8);
                 const size_t this_ws_sz
-                        = memory_desc_wrapper(this->workspace_pd()).size();
-
-                bool ws_ok = true && hint_fwd_pd_->workspace_pd()
-                        && memory_desc_wrapper(hint_fwd_pd_->workspace_pd())
-                                        .size()
-                                == this_ws_sz;
-                if (!ws_ok)
-                    return status::unimplemented;
+                    = memory_desc_wrapper(this->workspace_pd()).size();
+
+                bool ws_ok = true
+                    && hint_fwd_pd_->workspace_pd()
+                    && memory_desc_wrapper(hint_fwd_pd_->workspace_pd()).size()
+                    == this_ws_sz;
+                if (!ws_ok) return status::unimplemented;
             }
 
+            init_scratchpad();
+
             return status::success;
         }
+
+    private:
+        void init_scratchpad() {
+            using namespace memory_tracking::names;
+            auto scratchpad = scratchpad_registry().registrar();
+            scratchpad.book(key_bnorm_reduction,
+                    sizeof(data_t) * 2 * C() * mkldnn_get_max_threads());
+            scratchpad.book(key_bnorm_tmp_diff_ss, sizeof(data_t) * 2 * C()
+                    * (mkldnn_get_max_threads() + 1));
+        }
     };
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    nspc_batch_normalization_bwd_t(const pd_t *pd, const input_vector &inputs,
-            const output_vector &outputs);
-    ~nspc_batch_normalization_bwd_t();
-    virtual void execute(event_t *e) {
+    nspc_batch_normalization_bwd_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {}
+    ~nspc_batch_normalization_bwd_t() {}
+
+    virtual void execute(event_t *e) const {
         execute_backward();
         e->set_state(event_t::ready);
     }
 
 private:
-    data_t *stats_reduction_;
-    data_t *tmp_diff_scaleshift_;
-    void execute_backward();
-    pd_t conf_;
+    void execute_backward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
+
 }
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_batch_normalization.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_batch_normalization.cpp
index 65570f160..f009d85c5 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_batch_normalization.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_batch_normalization.cpp
@@ -28,51 +28,51 @@ namespace impl {
 namespace cpu {
 
 template <impl::data_type_t data_type>
-void ref_batch_normalization_fwd_t<data_type>::execute_forward() {
+void ref_batch_normalization_fwd_t<data_type>::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     /* FIXME: check this */
-    data_t* mean = conf_.stats_is_src() ?
+    data_t* mean = pd()->stats_is_src() ?
         const_cast<data_t*>(reinterpret_cast<const data_t*>(
                this->input_memory(1))) :
         reinterpret_cast<data_t*>(this->memory(1));
 
-    data_t* variance = conf_.stats_is_src() ?
+    data_t* variance = pd()->stats_is_src() ?
         const_cast<data_t*>(reinterpret_cast<const data_t*>(
                 this->input_memory(2))) :
         reinterpret_cast<data_t*>(this->memory(2));
 
-    auto idx_scaleshift = 1 + 2*conf_.stats_is_src();
+    auto idx_scaleshift = 1 + 2*pd()->stats_is_src();
     auto scaleshift =
         reinterpret_cast<const data_t *>(this->input_memory(idx_scaleshift));
 
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
-    auto ws = reinterpret_cast<uint8_t *>(this->memory(conf_.ws_idx()));
+    auto ws = reinterpret_cast<uint8_t *>(this->memory(pd()->ws_idx()));
 
     /* fast return */
-    if (this->conf_.has_zero_dim_memory()) return;
+    if (this->pd()->has_zero_dim_memory()) return;
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
-    const memory_desc_wrapper scaleshift_d(conf_.weights_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
+    const memory_desc_wrapper scaleshift_d(pd()->weights_pd());
 
-    const int N = conf_.MB();
-    const int C = conf_.C();
+    const int N = pd()->MB();
+    const int C = pd()->C();
     int H = 1, W = 1, D = 1;
     const bool has_spatial = utils::one_of(data_d.ndims(), 4 ,5);
     if (has_spatial)
     {
-        D = conf_.D();
-        H = conf_.H();
-        W = conf_.W();
+        D = pd()->D();
+        H = pd()->H();
+        W = pd()->W();
     }
 
-    const float eps = conf_.desc()->batch_norm_epsilon;
-    const bool use_scaleshift = conf_.use_scaleshift();;
-    const bool save_stats = conf_.is_training();
-    const bool is_training = conf_.is_training();
-    const bool fuse_bn_relu = conf_.fuse_bn_relu();
-    const bool calculate_stats = !conf_.stats_is_src();
+    const float eps = pd()->desc()->batch_norm_epsilon;
+    const bool use_scaleshift = pd()->use_scaleshift();;
+    const bool save_stats = pd()->is_training();
+    const bool is_training = pd()->is_training();
+    const bool fuse_bn_relu = pd()->fuse_bn_relu();
+    const bool calculate_stats = !pd()->stats_is_src();
 
-    const bool with_relu = conf_.with_relu_post_op();
+    const bool with_relu = pd()->with_relu_post_op();
     auto maybe_post_op = [&](data_t res) {
         return (with_relu && res < 0) ? 0 : res;
     };
@@ -146,29 +146,29 @@ void ref_batch_normalization_fwd_t<data_type>::execute_forward() {
 template struct ref_batch_normalization_fwd_t<data_type::f32>;
 
 template <impl::data_type_t data_type>
-void ref_batch_normalization_bwd_t<data_type>::execute_backward() {
+void ref_batch_normalization_bwd_t<data_type>::execute_backward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto mean = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto variance = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(3));
     auto scaleshift = reinterpret_cast<const data_t *>(this->input_memory(4));
     auto ws = reinterpret_cast<const uint8_t *>(
-            this->input_memory(conf_.ws_idx()));
+            this->input_memory(pd()->ws_idx()));
 
     auto diff_src = reinterpret_cast<data_t*>(this->memory(0));
     auto diff_scaleshift = reinterpret_cast<data_t *>(this->memory(1));
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
-    const memory_desc_wrapper diff_data_d(conf_.diff_src_pd());
-    const memory_desc_wrapper scaleshift_d(conf_.weights_pd());
-    const memory_desc_wrapper diff_scaleshift_d(conf_.diff_weights_pd());
-    const memory_desc_wrapper mean_d(conf_.mean_pd());
-    const memory_desc_wrapper variance_d(conf_.variance_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
+    const memory_desc_wrapper diff_data_d(pd()->diff_src_pd());
+    const memory_desc_wrapper scaleshift_d(pd()->weights_pd());
+    const memory_desc_wrapper diff_scaleshift_d(pd()->diff_weights_pd());
+    const memory_desc_wrapper mean_d(pd()->mean_pd());
+    const memory_desc_wrapper variance_d(pd()->variance_pd());
 
-    const int C = conf_.C();
+    const int C = pd()->C();
 
     /* fast return */
-    if (this->conf_.has_zero_dim_memory()) {
+    if (this->pd()->has_zero_dim_memory()) {
         if (diff_scaleshift) {
             for (int c = 0; c < C; ++c) {
                 diff_scaleshift[diff_scaleshift_d.off(0, c)] = 0;
@@ -178,20 +178,20 @@ void ref_batch_normalization_bwd_t<data_type>::execute_backward() {
         return;
     }
 
-    const int N = conf_.MB();
+    const int N = pd()->MB();
     int H = 1, W = 1, D = 1;
     const bool has_spatial = utils::one_of(data_d.ndims(), 4 ,5);
     if (has_spatial)
     {
-        D = conf_.D();
-        H = conf_.H();
-        W = conf_.W();
+        D = pd()->D();
+        H = pd()->H();
+        W = pd()->W();
     }
 
-    const float eps = conf_.desc()->batch_norm_epsilon;
-    const bool use_scaleshift = conf_.use_scaleshift();
-    const bool calculate_diff_stats = !conf_.omit_stats();
-    const bool fuse_bn_relu = conf_.fuse_bn_relu();
+    const float eps = pd()->desc()->batch_norm_epsilon;
+    const bool use_scaleshift = pd()->use_scaleshift();
+    const bool calculate_diff_stats = !pd()->use_global_stats();
+    const bool fuse_bn_relu = pd()->fuse_bn_relu();
 
     const bool is_3d = data_d.ndims() == 5;
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_batch_normalization.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_batch_normalization.hpp
index 95bf3438c..a3e53a08f 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_batch_normalization.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_batch_normalization.hpp
@@ -67,19 +67,19 @@ struct ref_batch_normalization_fwd_t: public cpu_primitive_t {
         }
     };
 
-    ref_batch_normalization_fwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_batch_normalization_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t data_type>
@@ -132,19 +132,19 @@ struct ref_batch_normalization_bwd_t: public cpu_primitive_t {
         }
     };
 
-    ref_batch_normalization_bwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_batch_normalization_bwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward();
-    pd_t conf_;
+    void execute_backward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binarization.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binarization.cpp
new file mode 100644
index 000000000..4fa937208
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binarization.cpp
@@ -0,0 +1,86 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <assert.h>
+#include <math.h>
+#include <common/utils.hpp>
+
+#include "c_types_map.hpp"
+#include "type_helpers.hpp"
+#include "mkldnn_thread.hpp"
+
+#include "ref_binarization.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace alg_kind;
+
+template <impl::data_type_t src_type>
+void ref_binarization_fwd_t<src_type>::execute_forward() const {
+    auto src = reinterpret_cast<const src_data_t*>(this->input_memory(0));
+    auto weights = reinterpret_cast<const src_data_t*>(this->input_memory(1));
+    auto dst = reinterpret_cast<uint8_t*>(this->memory());
+
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+    int nbits = 8;
+
+    const int MB = pd()->MB();
+    const int C = pd()->C();
+    const int CB = utils::div_up(C, nbits);
+    const int D = pd()->D();
+    const int H = pd()->H();
+    const int W = pd()->W();
+
+    parallel_nd(MB, CB, D, H, W,
+        [&](int n, int cb, int d, int h, int w) {
+
+        uint8_t bin_val = 0x00;
+        for (int c = cb * nbits, shift = 0; c < std::min(C, (cb + 1) * nbits); c++, shift++) {
+            size_t src_off = src_d.ndims() == 4
+                              ? src_d.off(n, c, h, w)
+                              : src_d.ndims() == 5
+                                ? src_d.off(n, c, d, h, w)
+                                : src_d.off(n, c);
+
+            size_t wei_off = weights_d.off(c);
+
+            float val = src[src_off];
+            float thr = weights[wei_off];
+
+            auto bit = uint8_t((val > thr) ? 0x01 : 0x00);
+            bin_val |= (bit << shift);
+        }
+
+        size_t dst_off = dst_d.ndims() == 4
+                           ? dst_d.off(n, cb*nbits, h, w)
+                           : dst_d.ndims() == 5
+                             ? dst_d.off(n, cb, d, h, w)
+                             : dst_d.off(n, cb);
+
+        dst[dst_off / nbits] = bin_val;
+    });
+}
+
+template struct ref_binarization_fwd_t<data_type::f32>;
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binarization.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binarization.hpp
new file mode 100644
index 000000000..726d70038
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binarization.hpp
@@ -0,0 +1,78 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef CPU_REF_BINARIZATION_HPP
+#define CPU_REF_BINARIZATION_HPP
+
+#include <assert.h>
+
+#include "cpu_binarization_pd.hpp"
+#include "cpu_engine.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
+#include "c_types_map.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <impl::data_type_t src_type>
+struct ref_binarization_fwd_t: public cpu_primitive_t {
+    struct pd_t: public cpu_binarization_fwd_pd_t {
+        pd_t(engine_t *engine, const binarization_desc_t *adesc,
+                const primitive_attr_t *attr,
+                const binarization_fwd_pd_t *hint_fwd_pd)
+            : cpu_binarization_fwd_pd_t(engine, adesc, attr, hint_fwd_pd) {}
+
+        DECLARE_COMMON_PD_T("ref:any", ref_binarization_fwd_t);
+
+        virtual status_t init() override {
+            using namespace prop_kind;
+            assert(engine()->kind() == engine_kind::cpu);
+
+            bool ok = true
+                && utils::one_of(desc()->prop_kind, forward_training, forward_inference)
+                && utils::everyone_is(src_type, desc()->src_desc.data_type, desc()->weights_desc.data_type)
+                && utils::everyone_is(data_type::bin, desc()->dst_desc.data_type)
+                && utils::one_of(desc()->alg_kind, mkldnn_binarization_depthwise)
+                && attr()->has_default_values();
+            if (!ok) return status::unimplemented;
+
+            return status::success;
+        }
+    };
+
+    ref_binarization_fwd_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {}
+
+    typedef typename prec_traits<src_type>::type src_data_t;
+
+    virtual void execute(event_t *e) const {
+        execute_forward();
+        e->set_state(event_t::ready);
+    }
+
+private:
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+};
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binary_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binary_convolution.cpp
new file mode 100644
index 000000000..2c9cbdef4
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binary_convolution.cpp
@@ -0,0 +1,284 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <common/utils.hpp>
+#include <common/primitive_attr.hpp>
+#include "c_types_map.hpp"
+#include "type_helpers.hpp"
+#include "mkldnn_thread.hpp"
+#include "mkldnn_traits.hpp"
+#include "math_utils.hpp"
+
+#include "ref_binary_convolution.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using math::saturate;
+
+void _ref_binary_convolution_fwd_t::execute_forward() const {
+    auto src = reinterpret_cast<const uint8_t*>(this->input_memory(0));
+    auto weights = reinterpret_cast<const uint8_t*>(this->input_memory(1));
+
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+
+    const bool with_groups = pd()->with_groups();
+
+    const int G = pd()->G();
+    const int MB = pd()->MB();
+    const int OD = pd()->OD();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
+    const int ID = pd()->ID();
+    const int IH = pd()->IH();
+    const int IW = pd()->IW();
+
+    const int OC = pd()->OC() / G;
+    const int IC = pd()->IC() / G;
+    const int KD = pd()->KD();
+    const int KH = pd()->KH();
+    const int KW = pd()->KW();
+
+    const int KSD = pd()->KSD();
+    const int KSH = pd()->KSH();
+    const int KSW = pd()->KSW();
+
+    const int KDD = pd()->KDD();
+    const int KDH = pd()->KDH();
+    const int KDW = pd()->KDW();
+
+    const int padFront = pd()->padFront();
+    const int padT = pd()->padT();
+    const int padL = pd()->padL();
+
+    const float pad_value = pd()->pad_value();
+
+    const int ndims = pd()->cdesc()->src_desc.ndims;
+
+    const int nbits = 8;
+
+    const auto &p = pd()->attr()->post_ops_;
+    bool with_sum = p.find(primitive_kind::sum) != -1;
+    bool with_binarization = p.find(primitive_kind::binarization) != -1;
+
+    auto extract_bit = [](uint8_t val, uint8_t bit) -> uint8_t {
+        return (uint8_t)((val >> bit) & 0x0001);
+    };
+
+    auto ker = [=](int32_t &d, int g, int mb, int oc, int od, int oh, int ow) {
+        for (int ic = 0; ic < IC; ++ic)
+        for (int kd = 0; kd < KD; ++kd)
+        for (int kh = 0; kh < KH; ++kh)
+        for (int kw = 0; kw < KW; ++kw) {
+            const int id = od * KSD - padFront + kd * (1 + KDD);
+            const int ih = oh * KSH - padT + kh * (1 + KDH);
+            const int iw = ow * KSW - padL + kw * (1 + KDW);
+
+            size_t iidx = 0;
+            size_t widx = 0;
+            if (ndims == 5) {
+                iidx = src_d.off(mb, g * IC + ic, id, ih, iw);
+                widx = with_groups ? weights_d.off(g, oc, ic, kd, kh, kw)
+                                   : weights_d.off(oc, ic, kd, kh, kw);
+            } else if (ndims == 4) {
+                iidx = src_d.off(mb, g * IC + ic, ih, iw);
+                widx = with_groups ? weights_d.off(g, oc, ic, kh, kw)
+                                   : weights_d.off(oc, ic, kh, kw);
+            } else if (ndims == 3) {
+                iidx = src_d.off(mb, g * IC + ic, iw);
+                widx = with_groups ? weights_d.off(g, oc, ic, kw)
+                                   : weights_d.off(oc, ic, kw);
+            } else {
+                assert(false);
+            }
+
+
+            uint8_t s;
+            if (id < 0 || id >= ID || ih < 0 || ih >= IH || iw < 0 || iw >= IW) {
+                if (pad_value == 0)
+                    continue;
+                else {
+                    s = pad_value == 1.0f ? (uint8_t)1 : (uint8_t)0;
+                }
+            }  else {
+                s = extract_bit(src[iidx/nbits], (uint8_t)(iidx % nbits));
+            }
+
+            uint8_t w = extract_bit(weights[widx/nbits], (uint8_t)(widx % nbits));
+
+            d += (int32_t)(s ^ w);
+       }
+    };
+
+    if (with_binarization) {
+        auto dst = reinterpret_cast<uint8_t*>(this->memory());
+
+        int binarization_idx = p.find(primitive_kind::binarization);
+        const float* binarization_weights = p.entry_[binarization_idx].binarization.weights_data;
+
+        parallel_nd(G, MB, utils::div_up(OC, nbits), OD, OH, OW,
+            [&](int g, int mb, int ocb, int od, int oh, int ow) {
+
+            uint8_t bin_val = 0x00;
+            for (int oc = ocb * nbits, shift = 0; oc < std::min(OC, (ocb + 1) * nbits); oc++, shift++) {
+                int32_t a = 0;
+                ker(a, g, mb, oc, od, oh, ow);
+
+                float base_value;
+                if (pad_value == 0.0f) {
+                    const int i_left_overflow = nstl::max(0, (padL - ow * KSW));
+                    const int i_right_overflow = nstl::max(IW, (ow * KSW + (KW - 1) * (KDW + 1) - padL + 1)) - IW;
+                    const int kw_padding =
+                            KW - utils::div_up(i_left_overflow, (KDW + 1)) - utils::div_up(i_right_overflow, (KDW + 1));
+
+                    const int i_top_overflow = nstl::max(0, (padT - oh * KSH));
+                    const int i_bottom_overflow = nstl::max(IH, (oh * KSH + (KH - 1) * (KDH + 1) - padT + 1)) - IH;
+                    const int kh_padding =
+                            KH - utils::div_up(i_top_overflow, (KDH + 1)) - utils::div_up(i_bottom_overflow, (KDH + 1));
+
+                    const int i_front_overflow = nstl::max(0, (padFront - od * KSD));
+                    const int i_back_overflow = nstl::max(ID, (od * KSD + (KD - 1) * (KDD + 1) - padFront + 1)) - ID;
+                    const int kd_padding =
+                            KD - utils::div_up(i_front_overflow, (KDD + 1)) - utils::div_up(i_back_overflow, (KDD + 1));
+
+                    base_value = IC * kd_padding * kh_padding * kw_padding;
+                } else {
+                    base_value = IC * KD * KH * KW;
+                }
+
+                float a_fp = base_value - (float)(2 * a);
+
+                if (with_sum) {
+                    if (ndims == 5)
+                        a_fp += dst[dst_d.off(mb, g * OC + oc, od, oh, ow)];
+                    else if (ndims == 4)
+                        a_fp += dst[dst_d.off(mb, g * OC + oc, oh, ow)];
+                    else if (ndims == 3)
+                        a_fp += dst[dst_d.off(mb, g * OC + oc, ow)];
+                    else
+                        assert(false);
+                }
+
+                int eltwise_inj_idx = 0;
+                int depthwise_inj_idx = 0;
+                for (int i = 0; i < p.len_; i++) {
+                    auto &post_op = p.entry_[i];
+                    if (post_op.is_eltwise()) {
+                        a_fp = eltwise_injectors[eltwise_inj_idx]->compute_scalar(a_fp);
+                        eltwise_inj_idx++;
+                    } else if (post_op.is_depthwise()) {
+                        auto depthwise_weights = post_op.depthwise.weights_data;
+                        auto depthwise_bias = post_op.depthwise.biases_data;
+
+                        a_fp = depthwise_injectors[depthwise_inj_idx]->compute_scalar(a_fp,
+                                                                                      depthwise_weights + g * OC + oc,
+                                                                                      depthwise_bias + g * OC + oc);
+                        depthwise_inj_idx++;
+                    }
+                }
+
+                float thr = binarization_weights[g * OC + oc];
+                auto bit = uint8_t((a_fp > thr) ? 0x01 : 0x00);
+                bin_val |= (bit << shift);
+            }
+
+            if (ndims == 5)
+                dst[dst_d.off(mb, g*OC + ocb*nbits, od, oh, ow) / nbits] = bin_val;
+            else if (ndims == 4)
+                dst[dst_d.off(mb, g*OC + ocb*nbits, oh, ow) / nbits] = bin_val;
+            else if (ndims == 3)
+                dst[dst_d.off(mb, g*OC + ocb*nbits, ow) / nbits] = bin_val;
+            else
+                assert(false);
+        });
+    } else {
+        auto dst = reinterpret_cast<float*>(this->memory());
+
+        parallel_nd(G, MB, OC, OD, OH, OW,
+            [&](int g, int mb, int oc, int od, int oh, int ow) {
+            int32_t a = 0;
+            ker(a, g, mb, oc, od, oh, ow);
+
+            float base_value;
+            if (pad_value == 0.0f) {
+                const int i_left_overflow = nstl::max(0, (padL - ow * KSW));
+                const int i_right_overflow = nstl::max(IW, (ow * KSW + (KW - 1) * (KDW + 1) - padL + 1)) - IW;
+                const int kw_padding =
+                        KW - utils::div_up(i_left_overflow, (KDW + 1)) - utils::div_up(i_right_overflow, (KDW + 1));
+
+                const int i_top_overflow = nstl::max(0, (padT - oh * KSH));
+                const int i_bottom_overflow = nstl::max(IH, (oh * KSH + (KH - 1) * (KDH + 1) - padT + 1)) - IH;
+                const int kh_padding =
+                        KH - utils::div_up(i_top_overflow, (KDH + 1)) - utils::div_up(i_bottom_overflow, (KDH + 1));
+
+                const int i_front_overflow = nstl::max(0, (padFront - od * KSD));
+                const int i_back_overflow = nstl::max(ID, (od * KSD + (KD - 1) * (KDD + 1) - padFront + 1)) - ID;
+                const int kd_padding =
+                        KD - utils::div_up(i_front_overflow, (KDD + 1)) - utils::div_up(i_back_overflow, (KDD + 1));
+
+                base_value = IC * kd_padding * kh_padding * kw_padding;
+            } else {
+                base_value = IC * KD * KH * KW;
+            }
+
+            float a_fp = base_value - (float)(2 * a);
+
+            if (with_sum) {
+                if (ndims == 5)
+                    a_fp += dst[dst_d.off(mb, g*OC + oc, od, oh, ow)];
+                else if (ndims == 4)
+                    a_fp += dst[dst_d.off(mb, g*OC + oc, oh, ow)];
+                else if (ndims == 3)
+                    a_fp += dst[dst_d.off(mb, g*OC + oc, ow)];
+                else
+                    assert(false);
+            }
+
+            int eltwise_inj_idx = 0;
+            int depthwise_inj_idx = 0;
+            for (int i = 0; i < p.len_; i++) {
+                auto& post_op = p.entry_[i];
+                if (post_op.is_eltwise()) {
+                    a_fp = eltwise_injectors[eltwise_inj_idx]->compute_scalar(a_fp);
+                    eltwise_inj_idx++;
+                } else if (post_op.is_depthwise()) {
+                    auto depthwise_weights = post_op.depthwise.weights_data;
+                    auto depthwise_bias = post_op.depthwise.biases_data;
+
+                    a_fp = depthwise_injectors[depthwise_inj_idx]->compute_scalar(a_fp, depthwise_weights + g * OC + oc,
+                                                                                        depthwise_bias + g * OC + oc);
+                    depthwise_inj_idx++;
+                }
+            }
+
+            if (ndims == 5)
+                dst[dst_d.off(mb, g*OC + oc, od, oh, ow)] = a_fp;
+            else if (ndims == 4)
+                dst[dst_d.off(mb, g*OC + oc, oh, ow)] = a_fp;
+            else if (ndims == 3)
+                dst[dst_d.off(mb, g*OC + oc, ow)] = a_fp;
+            else
+                assert(false);
+        });
+    }
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binary_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binary_convolution.hpp
new file mode 100644
index 000000000..2160d9bd5
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_binary_convolution.hpp
@@ -0,0 +1,151 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef CPU_REF_BINARY_CONVOLUTION_HPP
+#define CPU_REF_BINARY_CONVOLUTION_HPP
+
+#include <assert.h>
+
+#include "c_types_map.hpp"
+#include "cpu_binary_convolution_pd.hpp"
+#include "cpu_engine.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
+#include "ref_eltwise.hpp"
+#include "ref_depthwise.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+struct _ref_binary_convolution_fwd_t: public cpu_primitive_t {
+    struct pd_t: public _cpu_binary_convolution_fwd_pd_t {
+        pd_t(engine_t *engine,
+                const typename pd_t::base_desc_t *adesc,
+                const primitive_attr_t *attr,
+                const typename pd_t::base_class *hint_fwd_pd)
+            : _cpu_binary_convolution_fwd_pd_t(engine, adesc, attr,
+                    hint_fwd_pd)
+        {}
+
+        DECLARE_COMMON_PD_T("ref:any", _ref_binary_convolution_fwd_t);
+
+        virtual status_t init() override {
+            using namespace prop_kind;
+            using namespace data_type;
+            assert(this->engine()->kind() == engine_kind::cpu);
+            bool ok = true
+                && this->set_default_params() == status::success
+                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                        forward_inference)
+                && this->cdesc_().alg_kind == alg_kind::binary_convolution_direct
+                && this->cdesc_().src_desc.data_type == bin
+                && this->cdesc_().weights_desc.data_type == bin
+                && this->cdesc_().accum_data_type == s32
+                && utils::one_of(this->cdesc_().dst_desc.data_type, f32, bin)
+                && is_supported_post_ops();
+            return ok ? status::success : status::unimplemented;
+        }
+
+        virtual bool is_supported_post_ops() const {
+            bool ok = true;
+            auto const &po = this->attr()->post_ops_;
+
+            auto is_eltwise = [&](int idx) { return po.entry_[idx].is_eltwise(); };
+            auto is_depthwise = [&](int idx) { return po.entry_[idx].is_depthwise(); };
+            auto is_sum = [&](int idx) { return po.entry_[idx].is_sum(); };
+            auto is_simple = [&](int idx) { return (is_eltwise(idx) || is_depthwise(idx)); };
+            auto is_binarization = [&](int idx) { return po.entry_[idx].is_binarization(); };
+
+            switch (po.len_) {
+            case 0: // no post_ops
+                break;
+            case 1:
+                ok = ok && (is_simple(0) || is_sum(0) || is_binarization(0));
+                break;
+            case 2:
+                ok = ok && ((is_sum(0) && is_simple(1)) || (is_simple(0) && is_simple(1)) ||
+                            (is_simple(0) && is_binarization(1)));
+                break;
+            case 3:
+                ok = ok && ((is_sum(0) && is_simple(1) && is_simple(2)) ||
+                            (is_simple(0) && is_simple(1) && is_binarization(2)));
+                break;
+
+            default: ok = false;
+            }
+            return ok;
+        }
+    };
+
+    _ref_binary_convolution_fwd_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {
+        const auto &post_ops = pd()->attr()->post_ops_;
+
+        for (int i = 0; i < post_ops.len_; i++) {
+            auto &post_op = post_ops.entry_[i];
+            if (post_op.is_eltwise()) {
+                eltwise_injectors.push_back(new ref_eltwise_scalar_fwd_t(
+                        post_op.eltwise.alg,
+                        post_op.eltwise.alpha,
+                        post_op.eltwise.beta
+                ));
+            } else if (post_op.is_depthwise()) {
+                depthwise_injectors.push_back(new ref_depthwise_scalar_fwd_t(
+                        post_op.depthwise.alg
+                ));
+            }
+        }
+    }
+
+    ~_ref_binary_convolution_fwd_t() {
+        for (auto inj : eltwise_injectors)
+            delete inj;
+        eltwise_injectors.clear();
+
+        for (auto inj : depthwise_injectors)
+            delete inj;
+        depthwise_injectors.clear();
+    }
+
+    virtual void execute(event_t *e) const {
+        switch (pd()->cdesc()->prop_kind) {
+        case prop_kind::forward_training:
+        case prop_kind::forward_inference:
+            execute_forward();
+            break;
+        default:
+            assert(!"invalid prop_kind");
+        }
+        e->set_state(event_t::ready);
+    }
+
+private:
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
+    nstl::vector<ref_eltwise_scalar_fwd_t*> eltwise_injectors;
+    nstl::vector<ref_depthwise_scalar_fwd_t*> depthwise_injectors;
+};
+
+using ref_binary_convolution_fwd_t = _ref_binary_convolution_fwd_t;
+
+}
+}
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_concat.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_concat.hpp
index 923bb613a..5d346df40 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_concat.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_concat.hpp
@@ -77,7 +77,7 @@ struct ref_concat_t: public cpu_primitive_t {
             }
             return ret;
         }
-        virtual pd_t *clone() const override { return nullptr; }
+        virtual pd_t *clone() const override { return  new pd_t(*this); }
         virtual const char *name() const override { return "ref:any"; }
 
         virtual status_t init() override {
@@ -99,15 +99,15 @@ struct ref_concat_t: public cpu_primitive_t {
                     }
                 }
             }
-            return success;
+            return (size_t)n_ == reorder_pds_.size() ? success : unimplemented;
         }
 
         nstl::vector<const reorder_pd_t *> reorder_pds_;
     };
 
-    ref_concat_t(const pd_t *conf, const input_vector &inputs,
+    ref_concat_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs, nstl::vector<primitive_t *> reorders)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*conf),
+        : cpu_primitive_t(apd, inputs, outputs),
         reorders_(reorders) {}
 
     ~ref_concat_t() {
@@ -116,7 +116,7 @@ struct ref_concat_t: public cpu_primitive_t {
             delete reorders_[i];
     }
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         for (size_t i = 0; i < reorders_.size(); ++i) {
             event_t ei;
             reorders_[i]->execute(&ei);
@@ -125,7 +125,7 @@ struct ref_concat_t: public cpu_primitive_t {
     }
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     nstl::vector<primitive_t *> reorders_;
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_convolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_convolution.cpp
index 33b5fe0ef..d3e648387 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_convolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_convolution.cpp
@@ -27,56 +27,59 @@ namespace impl {
 namespace cpu {
 
 using math::saturate;
+using math::get_bias;
 
-template <bool with_relu, data_type_t src_type, data_type_t wei_type,
+template <data_type_t src_type, data_type_t wei_type,
          data_type_t dst_type, data_type_t acc_type>
-void _ref_convolution_fwd_t<with_relu, src_type, wei_type, dst_type, acc_type>
-        ::execute_forward() {
+void ref_convolution_fwd_t<src_type, wei_type, dst_type, acc_type>
+        ::execute_forward() const {
     auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const char *>(this->input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
-    const bool with_groups = conf_.with_groups();
+    const bool with_groups = pd()->with_groups();
 
-    const int G = conf_.G();
-    const int MB = conf_.MB();
-    const int OD = conf_.OD();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
-    const int ID = conf_.ID();
-    const int IH = conf_.IH();
-    const int IW = conf_.IW();
+    const int G = pd()->G();
+    const int MB = pd()->MB();
+    const int OD = pd()->OD();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
+    const int ID = pd()->ID();
+    const int IH = pd()->IH();
+    const int IW = pd()->IW();
 
-    const int OC = conf_.OC() / G;
-    const int IC = conf_.IC() / G;
-    const int KD = conf_.KD();
-    const int KH = conf_.KH();
-    const int KW = conf_.KW();
+    const int OC = pd()->OC() / G;
+    const int IC = pd()->IC() / G;
+    const int KD = pd()->KD();
+    const int KH = pd()->KH();
+    const int KW = pd()->KW();
 
-    const int KSD = conf_.KSD();
-    const int KSH = conf_.KSH();
-    const int KSW = conf_.KSW();
+    const int KSD = pd()->KSD();
+    const int KSH = pd()->KSH();
+    const int KSW = pd()->KSW();
 
-    const int KDD = conf_.KDD();
-    const int KDH = conf_.KDH();
-    const int KDW = conf_.KDW();
+    const int KDD = pd()->KDD();
+    const int KDH = pd()->KDH();
+    const int KDW = pd()->KDW();
 
-    const int padFront = conf_.padFront();
-    const int padT = conf_.padT();
-    const int padL = conf_.padL();
+    const int padFront = pd()->padFront();
+    const int padT = pd()->padT();
+    const int padL = pd()->padL();
 
-    const float nslope = conf_.negative_slope();
+    const bool with_relu = 0; // TODO: change if support post_ops
+    const float nslope = 0.f;
 
-    const int ndims = conf_.cdesc()->src_desc.ndims;
+    const int ndims = pd()->desc()->src_desc.ndims;
 
-    auto ker = [=](acc_data_t &d, int g, int mb, int oc, int od, int oh,
+    auto ker = [=](int g, int mb, int oc, int od, int oh,
             int ow) {
+        acc_data_t d = 0;
         for (int ic = 0; ic < IC; ++ic)
         for (int kd = 0; kd < KD; ++kd)
         for (int kh = 0; kh < KH; ++kh)
@@ -107,36 +110,23 @@ void _ref_convolution_fwd_t<with_relu, src_type, wei_type, dst_type, acc_type>
            else
                assert(false);
 
-       }
-    };
-    auto get_bias = [=, &bias](size_t off) -> float {
-#       define CASE(dt) case dt: \
-            return (float)(*((const prec_traits<dt>::type *)bias + off))
-        switch (conf_.cdesc()->bias_desc.data_type) {
-        CASE(data_type::s8);
-        CASE(data_type::u8);
-        CASE(data_type::s32);
-        CASE(data_type::f32);
-        default: assert(!"unimplemented");
         }
-#       undef CASE
-        return 0;
+        return d;
     };
+
     parallel_nd(G, MB, OC, OD, OH, OW,
         [&](int g, int mb, int oc, int od, int oh, int ow) {
-        acc_data_t a = 0;
-        ker(a, g, mb, oc, od, oh, ow);
-
-        float a_fp = (float)a;
+        float a_fp = ker(g, mb, oc, od, oh, ow);
 
         if (bias)
-            a_fp += get_bias(bias_d.off(g*OC + oc));
+            a_fp += get_bias(bias, bias_d.off(g * OC + oc),
+                             pd()->desc()->bias_desc.data_type);
 
         if (with_relu && a_fp < 0)
             a_fp *= nslope;
 
         if (data_traits<dst_data_t>::data_type != data_type::f32) {
-            switch (conf_.attr()->round_mode_) {
+            switch (pd()->attr()->round_mode_) {
                 case round_mode::down:    a_fp = floorf(a_fp); break;
                 case round_mode::nearest: a_fp = nearbyintf(a_fp); break;
             }
@@ -156,51 +146,52 @@ void _ref_convolution_fwd_t<with_relu, src_type, wei_type, dst_type, acc_type>
 template <data_type_t diff_src_type, data_type_t wei_type,
          data_type_t diff_dst_type, data_type_t acc_type>
 void ref_convolution_bwd_data_t<diff_src_type, wei_type, diff_dst_type,
-     acc_type>::execute_backward_data() {
+     acc_type>::execute_backward_data() const {
     auto diff_dst = reinterpret_cast<const diff_dst_data_t*>(
             this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t*>(this->input_memory(1));
     auto bias = reinterpret_cast<const char *>(this->input_memory(2));
     auto diff_src = reinterpret_cast<diff_src_data_t*>(this->memory());
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
-    const bool with_groups = conf_.with_groups();
+    const bool with_groups = pd()->with_groups();
 
-    const int G = conf_.G();
-    const int MB = conf_.MB();
-    const int OD = conf_.OD();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
-    const int ID = conf_.ID();
-    const int IH = conf_.IH();
-    const int IW = conf_.IW();
+    const int G = pd()->G();
+    const int MB = pd()->MB();
+    const int OD = pd()->OD();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
+    const int ID = pd()->ID();
+    const int IH = pd()->IH();
+    const int IW = pd()->IW();
 
-    const int OC = conf_.OC() / G;
-    const int IC = conf_.IC() / G;
-    const int KD = conf_.KD();
-    const int KH = conf_.KH();
-    const int KW = conf_.KW();
+    const int OC = pd()->OC() / G;
+    const int IC = pd()->IC() / G;
+    const int KD = pd()->KD();
+    const int KH = pd()->KH();
+    const int KW = pd()->KW();
 
-    const int KSD = conf_.KSD();
-    const int KSH = conf_.KSH();
-    const int KSW = conf_.KSW();
+    const int KSD = pd()->KSD();
+    const int KSH = pd()->KSH();
+    const int KSW = pd()->KSW();
 
-    const int KDD = conf_.KDD();
-    const int KDH = conf_.KDH();
-    const int KDW = conf_.KDW();
+    const int KDD = pd()->KDD();
+    const int KDH = pd()->KDH();
+    const int KDW = pd()->KDW();
 
-    const int padFront = conf_.padFront();
-    const int padT = conf_.padT();
-    const int padL = conf_.padL();
+    const int padFront = pd()->padFront();
+    const int padT = pd()->padT();
+    const int padL = pd()->padL();
 
-    const int ndims = conf_.cdesc()->diff_src_desc.ndims;
+    const int ndims = pd()->desc()->diff_src_desc.ndims;
 
-    auto ker = [=](acc_data_t &d, int g, int mb, int ic, int id, int ih,
+    auto ker = [=](int g, int mb, int ic, int id, int ih,
             int iw) {
+        acc_data_t d = 0;
         for (int oc = 0; oc < OC; ++oc)
         for (int kd = 0; kd < KD; ++kd)
         for (int kh = 0; kh < KH; ++kh)
@@ -239,20 +230,9 @@ void ref_convolution_bwd_data_t<diff_src_type, wei_type, diff_dst_type,
                     assert(false);
             }
         }
+        return d;
     };
-    auto get_bias = [=, &bias](size_t off) -> acc_data_t {
-#       define CASE(dt) case dt: \
-            return (acc_data_t)(*((const prec_traits<dt>::type *)bias + off))
-        switch (conf_.desc()->bias_desc.data_type) {
-        CASE(data_type::s8);
-        CASE(data_type::u8);
-        CASE(data_type::s32);
-        CASE(data_type::f32);
-        default: assert(!"unimplemented");
-        }
-#       undef CASE
-        return 0;
-    };
+
     parallel_nd(G, MB, IC, ID, IH, IW,
         [&](int g, int mb, int ic, int id, int ih, int iw) {
         auto ds_idx = (ndims == 5)
@@ -260,10 +240,11 @@ void ref_convolution_bwd_data_t<diff_src_type, wei_type, diff_dst_type,
             : (ndims == 4)
             ? diff_src_d.off(mb, g*IC + ic, ih, iw)
             : diff_src_d.off(mb, g*IC + ic, iw);
-        acc_data_t a = bias
-            ? get_bias(bias_d.off(g*IC + ic))
-            : (acc_data_t)0;
-        ker(a, g, mb, ic, id, ih, iw);
+        float a = bias
+            ? get_bias(bias, bias_d.off(g * IC + ic),
+                    pd()->desc()->bias_desc.data_type)
+            : 0;
+        a += ker(g, mb, ic, id, ih, iw);
         diff_src[ds_idx] = saturate<diff_src_data_t>(a);
     });
 }
@@ -271,48 +252,48 @@ void ref_convolution_bwd_data_t<diff_src_type, wei_type, diff_dst_type,
 template <data_type_t src_type, data_type_t diff_wei_type,
          data_type_t diff_dst_type, data_type_t acc_type>
 void ref_convolution_bwd_weights_t<src_type, diff_wei_type, diff_dst_type,
-     acc_type>::execute_backward_weights() {
+     acc_type>::execute_backward_weights() const {
     auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const diff_dst_data_t *>(
             this->input_memory(1));
     auto diff_weights = reinterpret_cast<diff_wei_data_t*>(this->memory(0));
     auto diff_bias = reinterpret_cast<diff_wei_data_t *>(this->memory(1));
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_weights_d(conf_.diff_weights_pd(0));
-    const memory_desc_wrapper diff_bias_d(conf_.diff_weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_weights_d(pd()->diff_weights_pd(0));
+    const memory_desc_wrapper diff_bias_d(pd()->diff_weights_pd(1));
 
-    const bool with_groups = conf_.with_groups();
+    const bool with_groups = pd()->with_groups();
 
-    const int G = conf_.G();
-    const int MB = conf_.MB();
-    const int OD = conf_.OD();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
-    const int ID = conf_.ID();
-    const int IH = conf_.IH();
-    const int IW = conf_.IW();
+    const int G = pd()->G();
+    const int MB = pd()->MB();
+    const int OD = pd()->OD();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
+    const int ID = pd()->ID();
+    const int IH = pd()->IH();
+    const int IW = pd()->IW();
 
-    const int OC = conf_.OC() / G;
-    const int IC = conf_.IC() / G;
-    const int KD = conf_.KD();
-    const int KH = conf_.KH();
-    const int KW = conf_.KW();
+    const int OC = pd()->OC() / G;
+    const int IC = pd()->IC() / G;
+    const int KD = pd()->KD();
+    const int KH = pd()->KH();
+    const int KW = pd()->KW();
 
-    const int KSD = conf_.KSD();
-    const int KSH = conf_.KSH();
-    const int KSW = conf_.KSW();
+    const int KSD = pd()->KSD();
+    const int KSH = pd()->KSH();
+    const int KSW = pd()->KSW();
 
-    const int KDD = conf_.KDD();
-    const int KDH = conf_.KDH();
-    const int KDW = conf_.KDW();
+    const int KDD = pd()->KDD();
+    const int KDH = pd()->KDH();
+    const int KDW = pd()->KDW();
 
-    const int padFront = conf_.padFront();
-    const int padT = conf_.padT();
-    const int padL = conf_.padL();
+    const int padFront = pd()->padFront();
+    const int padT = pd()->padT();
+    const int padL = pd()->padL();
 
-    const int ndims = conf_.cdesc()->src_desc.ndims;
+    const int ndims = pd()->desc()->src_desc.ndims;
 
 auto ker = [=](acc_data_t &d, int g, int oc, int ic, int kd, int kh, int kw) {
         for (int mb = 0; mb < MB; ++mb)
@@ -364,6 +345,7 @@ auto ker = [=](acc_data_t &d, int g, int oc, int ic, int kd, int kh, int kw) {
 
     parallel_nd(G, OC, [&](int g, int oc) {
         if (diff_bias) {
+            // XXX: loss of precision when bias is a float...
             acc_data_t db = 0;
             ker_bias(db, g, oc);
             diff_bias[diff_bias_d.off(g*OC+oc)]
@@ -401,19 +383,13 @@ auto ker = [=](acc_data_t &d, int g, int oc, int ic, int kd, int kh, int kw) {
 
 using namespace data_type;
 
-template struct _ref_convolution_fwd_t<false, f32>;
-template struct _ref_convolution_fwd_t<true, f32>;
-template struct _ref_convolution_fwd_t<false, s16, s16, s32, s32>;
-template struct _ref_convolution_fwd_t<true, s16, s16, s32, s32>;
-
-template struct _ref_convolution_fwd_t<false, u8, s8, f32, s32>;
-template struct _ref_convolution_fwd_t<true, u8, s8, f32, s32>;
-template struct _ref_convolution_fwd_t<false, u8, s8, s32, s32>;
-template struct _ref_convolution_fwd_t<true, u8, s8, s32, s32>;
-template struct _ref_convolution_fwd_t<false, u8, s8, s8, s32>;
-template struct _ref_convolution_fwd_t<true, u8, s8, s8, s32>;
-template struct _ref_convolution_fwd_t<false, u8, s8, u8, s32>;
-template struct _ref_convolution_fwd_t<true, u8, s8, u8, s32>;
+template struct ref_convolution_fwd_t<f32>;
+template struct ref_convolution_fwd_t<s16, s16, s32, s32>;
+
+template struct ref_convolution_fwd_t<u8, s8, f32, s32>;
+template struct ref_convolution_fwd_t<u8, s8, s32, s32>;
+template struct ref_convolution_fwd_t<u8, s8, s8, s32>;
+template struct ref_convolution_fwd_t<u8, s8, u8, s32>;
 
 template struct ref_convolution_bwd_data_t<f32, f32, f32, f32>;
 template struct ref_convolution_bwd_data_t<s32, s16, s16, s32>;
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_convolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_convolution.hpp
index 3153e4d00..9cb8dc208 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_convolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_convolution.hpp
@@ -29,21 +29,20 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
-template <bool with_relu, impl::data_type_t src_type,
+template <impl::data_type_t src_type,
          impl::data_type_t wei_type = src_type,
          impl::data_type_t dst_type = src_type,
          impl::data_type_t acc_type = dst_type>
-struct _ref_convolution_fwd_t: public cpu_primitive_t {
-    struct pd_t: public _cpu_convolution_fwd_pd_t<with_relu> {
+struct ref_convolution_fwd_t: public cpu_primitive_t {
+    struct pd_t: public cpu_convolution_fwd_pd_t {
         pd_t(engine_t *engine,
-                const typename pd_t::base_desc_t *adesc,
+                const convolution_desc_t *adesc,
                 const primitive_attr_t *attr,
                 const typename pd_t::base_class *hint_fwd_pd)
-            : _cpu_convolution_fwd_pd_t<with_relu>(engine, adesc, attr,
-                    hint_fwd_pd)
+            : cpu_convolution_fwd_pd_t(engine, adesc, attr, hint_fwd_pd)
         {}
 
-        DECLARE_COMMON_PD_T("ref:any", _ref_convolution_fwd_t);
+        DECLARE_COMMON_PD_T("ref:any", ref_convolution_fwd_t);
 
         virtual status_t init() override {
             using namespace prop_kind;
@@ -51,35 +50,37 @@ struct _ref_convolution_fwd_t: public cpu_primitive_t {
             assert(this->engine()->kind() == engine_kind::cpu);
             bool ok = true
                 && this->set_default_params() == status::success
-                && utils::one_of(this->cdesc_().prop_kind, forward_training,
+                && utils::one_of(this->desc()->prop_kind, forward_training,
                         forward_inference)
-                && this->cdesc_().alg_kind == alg_kind::convolution_direct
-                && this->cdesc_().src_desc.data_type == src_type
-                && this->cdesc_().weights_desc.data_type == wei_type
-                && this->cdesc_().accum_data_type == acc_type
-                && this->cdesc_().dst_desc.data_type == dst_type
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
+                && this->desc()->src_desc.data_type == src_type
+                && this->desc()->weights_desc.data_type == wei_type
+                && this->desc()->accum_data_type == acc_type
+                && this->desc()->dst_desc.data_type == dst_type
                 && IMPLICATION(this->with_bias(), true
                         && IMPLICATION(src_type == u8,
-                            utils::one_of(this->cdesc_().bias_desc.data_type,
+                            utils::one_of(this->desc()->bias_desc.data_type,
                                 f32, s32, s8, u8))
                         && IMPLICATION(src_type == f32,
-                            this->cdesc_().bias_desc.data_type == f32))
+                            this->desc()->bias_desc.data_type == f32))
                 && this->attr()->has_default_values();
             return ok ? status::success : status::unimplemented;
         }
     };
 
-    _ref_convolution_fwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_convolution_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
 
     typedef typename prec_traits<src_type>::type src_data_t;
     typedef typename prec_traits<wei_type>::type wei_data_t;
     typedef typename prec_traits<dst_type>::type dst_data_t;
     typedef typename prec_traits<acc_type>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.cdesc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::forward_training:
         case prop_kind::forward_inference:
             execute_forward();
@@ -91,22 +92,10 @@ struct _ref_convolution_fwd_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
-template <impl::data_type_t src_type, impl::data_type_t wei_type = src_type,
-         impl::data_type_t dst_type = src_type,
-         impl::data_type_t acc_type = dst_type>
-using ref_convolution_fwd_t = _ref_convolution_fwd_t<false, src_type, wei_type,
-      dst_type, acc_type>;
-
-template <impl::data_type_t src_type, impl::data_type_t wei_type = src_type,
-         impl::data_type_t dst_type = src_type,
-         impl::data_type_t acc_type = dst_type>
-using ref_convolution_relu_t = _ref_convolution_fwd_t<true, src_type, wei_type,
-      dst_type, acc_type>;
-
 template <impl::data_type_t diff_src_type, impl::data_type_t wei_type,
          impl::data_type_t diff_dst_type,
          impl::data_type_t acc_type = diff_src_type>
@@ -127,7 +116,9 @@ struct ref_convolution_bwd_data_t: public cpu_primitive_t {
             bool ok = true
                 && this->set_default_params() == status::success
                 && this->desc()->prop_kind == backward_data
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && this->desc()->diff_dst_desc.data_type == diff_dst_type
                 && this->desc()->weights_desc.data_type == wei_type
                 && this->desc()->accum_data_type == acc_type
@@ -139,17 +130,17 @@ struct ref_convolution_bwd_data_t: public cpu_primitive_t {
         virtual bool support_bias() const override { return true; }
     };
 
-    ref_convolution_bwd_data_t(const pd_t *pd, const input_vector &inputs,
+    ref_convolution_bwd_data_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
 
     typedef typename prec_traits<diff_src_type>::type diff_src_data_t;
     typedef typename prec_traits<wei_type>::type wei_data_t;
     typedef typename prec_traits<diff_dst_type>::type diff_dst_data_t;
     typedef typename prec_traits<acc_type>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_data:
             execute_backward_data();
             break;
@@ -160,8 +151,8 @@ struct ref_convolution_bwd_data_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_data();
-    pd_t conf_;
+    void execute_backward_data() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t src_type, impl::data_type_t diff_wei_type,
@@ -184,7 +175,9 @@ struct ref_convolution_bwd_weights_t: public cpu_primitive_t {
             bool ok = true
                 && this->set_default_params() == status::success
                 && this->desc()->prop_kind == backward_weights
-                && this->desc()->alg_kind == alg_kind::convolution_direct
+                && utils::one_of(this->desc()->alg_kind,
+                           alg_kind::convolution_auto,
+                           alg_kind::convolution_direct)
                 && this->desc()->src_desc.data_type == src_type
                 && this->desc()->diff_weights_desc.data_type == diff_wei_type
                 && this->desc()->diff_dst_desc.data_type == diff_dst_type
@@ -197,17 +190,17 @@ struct ref_convolution_bwd_weights_t: public cpu_primitive_t {
         }
     };
 
-    ref_convolution_bwd_weights_t(const pd_t *pd, const input_vector &inputs,
+    ref_convolution_bwd_weights_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
 
     typedef typename prec_traits<src_type>::type src_data_t;
     typedef typename prec_traits<diff_wei_type>::type diff_wei_data_t;
     typedef typename prec_traits<diff_dst_type>::type diff_dst_data_t;
     typedef typename prec_traits<acc_type>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_weights:
             execute_backward_weights();
             break;
@@ -218,8 +211,8 @@ struct ref_convolution_bwd_weights_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_weights();
-    pd_t conf_;
+    void execute_backward_weights() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_deconvolution.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_deconvolution.cpp
index 0100367d3..d97f3b473 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_deconvolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_deconvolution.cpp
@@ -28,18 +28,18 @@ namespace cpu {
 
 typedef float data_t;
 
-void ref_deconvolution_fwd_t::compute_fwd_bias() {
+void ref_deconvolution_fwd_t::compute_fwd_bias() const {
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
 
-    const int G = conf_.G();
-    const int MB = conf_.MB();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
-    const int OD = conf_.OD();
-    const int OC = conf_.OC() / G;
-    const int ndims = conf_.desc()->src_desc.ndims;
+    const int G = pd()->G();
+    const int MB = pd()->MB();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
+    const int OD = pd()->OD();
+    const int OC = pd()->OC() / G;
+    const int ndims = pd()->desc()->src_desc.ndims;
 
     parallel_nd(MB, G, OC, OD, OH, OW,
         [&](int mb, int g, int oc, int od, int oh, int ow) {
@@ -51,15 +51,15 @@ void ref_deconvolution_fwd_t::compute_fwd_bias() {
     });
 }
 
-void ref_deconvolution_fwd_t::compute_fwd_bias_ncdhw() {
+void ref_deconvolution_fwd_t::compute_fwd_bias_ncdhw() const {
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
 
-    const int MB = conf_.MB();
-    const int OC = conf_.OC();
-    const int SP = conf_.OW()*conf_.OH()*conf_.OD();
+    const int MB = pd()->MB();
+    const int OC = pd()->OC();
+    const int SP = pd()->OW()*pd()->OH()*pd()->OD();
 
     parallel_nd(MB, OC, [&](int mb, int oc) {
         PRAGMA_OMP_SIMD()
@@ -71,15 +71,15 @@ void ref_deconvolution_fwd_t::compute_fwd_bias_ncdhw() {
 }
 
 template <int blksize>
-void ref_deconvolution_fwd_t::compute_fwd_bias_nCdhwXc() {
+void ref_deconvolution_fwd_t::compute_fwd_bias_nCdhwXc() const {
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
 
-    const int MB = conf_.MB();
-    const int OC = conf_.OC();
-    const int SP = conf_.OW() * conf_.OH() * conf_.OD();
+    const int MB = pd()->MB();
+    const int OC = pd()->OC();
+    const int SP = pd()->OW() * pd()->OH() * pd()->OD();
 
     const ptrdiff_t stride_mb = dst_d.blocking_desc().strides[0][0];
 
@@ -95,18 +95,18 @@ void ref_deconvolution_fwd_t::compute_fwd_bias_nCdhwXc() {
     });
 }
 
-void ref_deconvolution_bwd_weights_t::compute_bwd_bias() {
+void ref_deconvolution_bwd_weights_t::compute_bwd_bias() const {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_bias = reinterpret_cast<data_t *>(this->memory(1));
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
 
-    const int G = conf_.G();
-    const int MB = conf_.MB();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
-    const int OC = conf_.OC() / G;
-    const int OD = conf_.OD();
-    const int ndims = conf_.desc()->src_desc.ndims;
+    const int G = pd()->G();
+    const int MB = pd()->MB();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
+    const int OC = pd()->OC() / G;
+    const int OD = pd()->OD();
+    const int ndims = pd()->desc()->src_desc.ndims;
 
     parallel_nd(G, OC, [&](int g, int oc) {
         data_t db = 0;
@@ -128,15 +128,15 @@ void ref_deconvolution_bwd_weights_t::compute_bwd_bias() {
     });
 }
 
-void ref_deconvolution_bwd_weights_t::compute_bwd_bias_ncdhw() {
+void ref_deconvolution_bwd_weights_t::compute_bwd_bias_ncdhw() const {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_bias = reinterpret_cast<data_t *>(this->memory(1));
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
 
-    const int OC = conf_.OC();
-    const int MB = conf_.MB();
-    const int SP = conf_.OH()*conf_.OW()*conf_.OD();
+    const int OC = pd()->OC();
+    const int MB = pd()->MB();
+    const int SP = pd()->OH()*pd()->OW()*pd()->OD();
 
     parallel_nd(OC, [&](int oc) {
         data_t db = 0;
@@ -152,15 +152,15 @@ void ref_deconvolution_bwd_weights_t::compute_bwd_bias_ncdhw() {
 }
 
 template <int blksize>
-void ref_deconvolution_bwd_weights_t::compute_bwd_bias_nCdhwXc() {
+void ref_deconvolution_bwd_weights_t::compute_bwd_bias_nCdhwXc() const {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_bias = reinterpret_cast<data_t *>(this->memory(1));
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
 
-    const int OC = conf_.OC();
-    const int MB = conf_.MB();
-    const int SP = conf_.OH() * conf_.OW() * conf_.OD();
+    const int OC = pd()->OC();
+    const int MB = pd()->MB();
+    const int SP = pd()->OH() * pd()->OW() * pd()->OD();
 
     const ptrdiff_t stride_mb = diff_dst_d.blocking_desc().strides[0][0];
 
@@ -185,10 +185,10 @@ void ref_deconvolution_bwd_weights_t::compute_bwd_bias_nCdhwXc() {
     });
 }
 
-template void ref_deconvolution_fwd_t::compute_fwd_bias_nCdhwXc<8>();
-template void ref_deconvolution_fwd_t::compute_fwd_bias_nCdhwXc<16>();
-template void ref_deconvolution_bwd_weights_t::compute_bwd_bias_nCdhwXc<8>();
-template void ref_deconvolution_bwd_weights_t::compute_bwd_bias_nCdhwXc<16>();
+template void ref_deconvolution_fwd_t::compute_fwd_bias_nCdhwXc<8>() const;
+template void ref_deconvolution_fwd_t::compute_fwd_bias_nCdhwXc<16>() const;
+template void ref_deconvolution_bwd_weights_t::compute_bwd_bias_nCdhwXc<8>() const;
+template void ref_deconvolution_bwd_weights_t::compute_bwd_bias_nCdhwXc<16>() const;
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_deconvolution.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_deconvolution.hpp
index 6890c1c47..e18517258 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_deconvolution.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_deconvolution.hpp
@@ -28,39 +28,6 @@
 #include "utils.hpp"
 #include "primitive_iterator.hpp"
 
-#define DECLARE_DECONVOLUTION_PD_t(impl_name, ...) \
-    virtual pd_t *clone() const override { return new pd_t(*this); } \
-    virtual status_t create_primitive(primitive_t **primitive, \
-                const primitive_at_t *inputs, \
-                const primitive_t **outputs) const override { \
-        double ms = get_msec(); \
-        using namespace prop_kind;\
-        primitive_t::input_vector ins(inputs, inputs + this->n_inputs()); \
-        primitive_t::output_vector outs(outputs, outputs + this->n_outputs()); \
-        auto ret = safe_ptr_assign<primitive_t>(*primitive, \
-                                new (__VA_ARGS__)(this, ins, outs)); \
-        primitive_t *conv_primitive; \
-        if (this->desc()->prop_kind == backward_weights) {\
-            primitive_at_t conv_inputs[2];\
-            conv_inputs[0] = inputs[1];\
-            conv_inputs[1] = inputs[0];\
-            conv_pd_->create_primitive((&conv_primitive), conv_inputs, outputs);\
-        } \
-        else conv_pd_->create_primitive((&conv_primitive), inputs, outputs);\
-        ((__VA_ARGS__ *)(*primitive))->conv_p_ = conv_primitive;\
-        ms = get_msec() - ms; \
-        if (mkldnn_verbose()->level >= 2) { \
-                        printf("mkldnn_verbose,create,%s,%g\n", this->info(), ms); \
-                        fflush(0); \
-                    } \
-        return ret; \
-    } \
-virtual const char *name() const override { return impl_name; }
-
-#define DECLARE_DECONVOLUTION_PD_T(impl_name, ...) \
-        DECLARE_DECONVOLUTION_PD_t(impl_name,  __VA_ARGS__)
-
-
 namespace mkldnn {
 namespace impl {
 namespace cpu {
@@ -146,7 +113,7 @@ struct ref_deconvolution_fwd_t: public cpu_primitive_t {
 
         ~pd_t() { delete conv_pd_; }
 
-        DECLARE_DECONVOLUTION_PD_T("ref:any", ref_deconvolution_fwd_t);
+        DECLARE_DECONVOLUTION_PD_T(ref_deconvolution_fwd_t);
 
         status_t init_convolution(){
             using namespace memory_format;
@@ -154,7 +121,7 @@ struct ref_deconvolution_fwd_t: public cpu_primitive_t {
             convolution_desc_t cd;
             status_t status;
 
-            status = conv_descr_create(this->cdesc(), &cd);
+            status = conv_descr_create(this->desc(), &cd);
             if (status != status::success) return status;
 
             mkldnn_primitive_desc_iterator it(this->engine_, (op_desc_t *)&cd,
@@ -216,19 +183,19 @@ struct ref_deconvolution_fwd_t: public cpu_primitive_t {
         bool conv_supports_bias_;
     };
 
-    ref_deconvolution_fwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_deconvolution_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), conv_p_(nullptr) {}
+        : cpu_primitive_t(apd, inputs, outputs), conv_p_(nullptr) {}
 
     ~ref_deconvolution_fwd_t() { delete this->conv_p_; }
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::forward_training:
         case prop_kind::forward_inference:
             (conv_p_)->execute(e);
-            if (conf_.with_bias() && !conf_.conv_supports_bias_) {
-                switch (conf_.dst_pd()->desc()->format) {
+            if (pd()->with_bias() && !pd()->conv_supports_bias_) {
+                switch (pd()->dst_pd()->desc()->format) {
                     case memory_format::nchw :
                     case memory_format::ncdhw :
                         compute_fwd_bias_ncdhw();
@@ -254,10 +221,10 @@ struct ref_deconvolution_fwd_t: public cpu_primitive_t {
     }
 
 private:
-    void compute_fwd_bias();
-    void compute_fwd_bias_ncdhw();
-    template <int blksize> void compute_fwd_bias_nCdhwXc();
-    pd_t conf_;
+    void compute_fwd_bias() const;
+    void compute_fwd_bias_ncdhw() const;
+    template <int blksize> void compute_fwd_bias_nCdhwXc() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     primitive_t *conv_p_;
 };
 
@@ -277,7 +244,7 @@ struct ref_deconvolution_bwd_data_t: public cpu_primitive_t {
 
         ~pd_t() { delete conv_pd_; }
 
-        DECLARE_DECONVOLUTION_PD_T("ref:any", ref_deconvolution_bwd_data_t);
+        DECLARE_DECONVOLUTION_PD_T(ref_deconvolution_bwd_data_t);
 
         status_t init_convolution(){
             using namespace memory_format;
@@ -285,7 +252,7 @@ struct ref_deconvolution_bwd_data_t: public cpu_primitive_t {
             convolution_desc_t cd;
             status_t status;
 
-            status = conv_descr_create(this->cdesc(), &cd);
+            status = conv_descr_create(this->desc(), &cd);
             if (status != status::success) return status;
 
              mkldnn_primitive_desc_iterator it(this->engine_, (op_desc_t *)&cd,
@@ -336,13 +303,13 @@ struct ref_deconvolution_bwd_data_t: public cpu_primitive_t {
         }
         primitive_desc_t *conv_pd_;
     };
-    ref_deconvolution_bwd_data_t(const pd_t *pd, const input_vector &inputs,
+    ref_deconvolution_bwd_data_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), conv_p_(nullptr) {}
+        : cpu_primitive_t(apd, inputs, outputs), conv_p_(nullptr) {}
     ~ref_deconvolution_bwd_data_t() { delete this->conv_p_; }
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_data:
             (conv_p_)->execute(e);
             break;
@@ -353,7 +320,7 @@ struct ref_deconvolution_bwd_data_t: public cpu_primitive_t {
     }
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     primitive_t *conv_p_;
 };
 
@@ -373,7 +340,7 @@ struct ref_deconvolution_bwd_weights_t: public cpu_primitive_t {
 
         ~pd_t() { delete conv_pd_; }
 
-        DECLARE_DECONVOLUTION_PD_T("ref:any", ref_deconvolution_bwd_weights_t);
+        DECLARE_DECONVOLUTION_PD_T(ref_deconvolution_bwd_weights_t);
 
         status_t init_convolution(){
             using namespace memory_format;
@@ -381,7 +348,7 @@ struct ref_deconvolution_bwd_weights_t: public cpu_primitive_t {
             convolution_desc_t cd;
             status_t status;
 
-            status = conv_descr_create(this->cdesc(), &cd);
+            status = conv_descr_create(this->desc(), &cd);
             if (status != status::success) return status;
 
              mkldnn_primitive_desc_iterator it(this->engine_, (op_desc_t *)&cd,
@@ -434,20 +401,20 @@ struct ref_deconvolution_bwd_weights_t: public cpu_primitive_t {
         primitive_desc_t *conv_pd_;
     };
 
-    ref_deconvolution_bwd_weights_t(const pd_t *pd, const input_vector &inputs,
+    ref_deconvolution_bwd_weights_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), conv_p_(nullptr) {}
+        : cpu_primitive_t(apd, inputs, outputs), conv_p_(nullptr) {}
 
     ~ref_deconvolution_bwd_weights_t() { delete this->conv_p_; }
 
     typedef typename prec_traits<data_type::f32>::type data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward_weights:
             (conv_p_)->execute(e);
-            if (conf_.with_bias()) {
-                switch (conf_.diff_dst_pd()->desc()->format) {
+            if (pd()->with_bias()) {
+                switch (pd()->diff_dst_pd()->desc()->format) {
                     case memory_format::nchw :
                     case memory_format::ncdhw :
                         compute_bwd_bias_ncdhw();
@@ -472,11 +439,11 @@ struct ref_deconvolution_bwd_weights_t: public cpu_primitive_t {
     }
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     primitive_t *conv_p_;
-    void compute_bwd_bias();
-    void compute_bwd_bias_ncdhw();
-    template <int blksize> void compute_bwd_bias_nCdhwXc();
+    void compute_bwd_bias() const;
+    void compute_bwd_bias_ncdhw() const;
+    template <int blksize> void compute_bwd_bias_nCdhwXc() const;
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.cpp
index b5d334ab5..4e954742e 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.cpp
@@ -55,22 +55,22 @@ float ref_depthwise_scalar_fwd_t::compute_scalar(float s, const float* weights,
 }
 
 template <impl::data_type_t data_type>
-void ref_depthwise_fwd_t<data_type>::execute_forward() {
+void ref_depthwise_fwd_t<data_type>::execute_forward() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const data_t *>(this->input_memory(2));
     auto dst = reinterpret_cast<data_t *>(this->memory());
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper data_d(pd()->src_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
-    const int MB = conf_.MB();
-    const int C = conf_.C();
-    const int D = conf_.D();
-    const int H = conf_.H();
-    const int W = conf_.W();
-    const auto alg_kind = conf_.desc()->alg_kind;
+    const int MB = pd()->MB();
+    const int C = pd()->C();
+    const int D = pd()->D();
+    const int H = pd()->H();
+    const int W = pd()->W();
+    const auto alg_kind = pd()->desc()->alg_kind;
 
     parallel_nd(MB, C, D, H, W,
         [&](int n, int c, int d, int h, int w) {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.hpp
index 28c08be48..4ac116cdc 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_depthwise.hpp
@@ -63,19 +63,19 @@ struct ref_depthwise_fwd_t: public cpu_primitive_t {
         }
     };
 
-    ref_depthwise_fwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_depthwise_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_eltwise.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_eltwise.cpp
index 0d0122b08..e3e703d2e 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_eltwise.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_eltwise.cpp
@@ -30,51 +30,56 @@ namespace cpu {
 using namespace alg_kind;
 using namespace math;
 
-ref_eltwise_scalar_fwd_t::ref_eltwise_scalar_fwd_t(const alg_kind_t alg_, const float alpha_, const float beta_)
-        : alg(alg_), alpha(alpha_), beta(beta_) {
-    using namespace alg_kind;
-
-    assert(utils::one_of(alg, eltwise_relu, eltwise_tanh, eltwise_elu,
-                         eltwise_square, eltwise_abs, eltwise_sqrt, eltwise_linear,
-                         eltwise_bounded_relu, eltwise_soft_relu, eltwise_logistic, eltwise_clamp));
+ref_eltwise_scalar_fwd_t::ref_eltwise_scalar_fwd_t(alg_kind_t alg, float alpha,
+        float beta): alg_(alg), alpha_(alpha), beta_(beta) {
+    assert(utils::one_of(alg_, eltwise_relu, eltwise_tanh, eltwise_elu,
+                eltwise_square, eltwise_abs, eltwise_sqrt, eltwise_linear,
+                eltwise_bounded_relu, eltwise_soft_relu, eltwise_logistic,
+                eltwise_clamp, eltwise_exp, eltwise_not));
 }
 
+ref_eltwise_scalar_fwd_t::ref_eltwise_scalar_fwd_t(
+        const post_ops_t::entry_t::eltwise_t &eltwise)
+    : ref_eltwise_scalar_fwd_t(eltwise.alg, eltwise.alpha, eltwise.beta) {}
+
 float ref_eltwise_scalar_fwd_t::compute_scalar(float s) {
-    switch (alg) {
-        case eltwise_relu:   return relu_fwd(s, alpha);
-        case eltwise_tanh:   return tanh_fwd(s);
-        case eltwise_elu:    return elu_fwd(s, alpha);
+    switch (alg_) {
+        case eltwise_relu: return relu_fwd(s, alpha_);
+        case eltwise_tanh: return tanh_fwd(s);
+        case eltwise_elu: return elu_fwd(s, alpha_);
         case eltwise_square: return square_fwd(s);
-        case eltwise_abs:    return abs_fwd(s);
-        case eltwise_sqrt:   return sqrt_fwd(s);
-        case eltwise_linear: return linear_fwd(s, alpha, beta);
-        case eltwise_bounded_relu: return bounded_relu_fwd(s, alpha);
+        case eltwise_abs: return abs_fwd(s);
+        case eltwise_sqrt: return sqrt_fwd(s);
+        case eltwise_linear: return linear_fwd(s, alpha_, beta_);
+        case eltwise_bounded_relu: return bounded_relu_fwd(s, alpha_);
         case eltwise_soft_relu: return soft_relu_fwd(s);
         case eltwise_logistic: return logistic_fwd(s);
-        case eltwise_clamp: return clamp_fwd(s, alpha, beta);
+        case eltwise_clamp: return clamp_fwd(s, alpha_, beta_);
+        case eltwise_exp: return exp_fwd(s);
+        case eltwise_not: return not_fwd(s);
         default: assert(!"unknown eltwise alg_kind");
     }
 
-    return 0.0f;
+    return 0.f;
 }
 
 template <impl::data_type_t data_type>
-void ref_eltwise_fwd_t<data_type>::execute_forward_nCspBc_padded() {
+void ref_eltwise_fwd_t<data_type>::execute_forward_nCspBc_padded() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
     const blocking_desc_t &blk = data_d.blocking_desc();
     const int block = blk.block_dims[1];
 
-    const int MB = conf_.MB();
-    const int C = conf_.C() / block;
+    const int MB = pd()->MB();
+    const int C = pd()->C() / block;
     const int C_PADDED = blk.padding_dims[1] / block;
-    const int tail = conf_.C() % block;
-    const int SP = conf_.D() * conf_.H() * conf_.W();
-    const auto alg_kind = conf_.desc()->alg_kind;
-    const float alpha = conf_.desc()->alpha;
-    const float beta = conf_.desc()->beta;
+    const int tail = pd()->C() % block;
+    const int SP = pd()->D() * pd()->H() * pd()->W();
+    const auto alg_kind = pd()->desc()->alg_kind;
+    const float alpha = pd()->desc()->alpha;
+    const float beta = pd()->desc()->beta;
 
     auto ker = [=] (data_t &d, data_t s) {
         switch (alg_kind) {
@@ -84,6 +89,8 @@ void ref_eltwise_fwd_t<data_type>::execute_forward_nCspBc_padded() {
             case eltwise_soft_relu: d = soft_relu_fwd(s); break;
             case eltwise_logistic: d = logistic_fwd(s); break;
             case eltwise_clamp: d = clamp_fwd(s, alpha, beta); break;
+            case eltwise_exp: d = exp_fwd(s); break;
+            case eltwise_not: d = not_fwd(s); break;
             default: assert(!"unknown eltwise alg_kind");
         }
     };
@@ -104,24 +111,24 @@ void ref_eltwise_fwd_t<data_type>::execute_forward_nCspBc_padded() {
 }
 
 template <impl::data_type_t data_type>
-void ref_eltwise_fwd_t<data_type>::execute_forward_generic() {
+void ref_eltwise_fwd_t<data_type>::execute_forward_generic() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
 
     /* fast return */
-    if (conf_.has_zero_dim_memory()) return;
+    if (pd()->has_zero_dim_memory()) return;
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
 
-    const int MB = conf_.MB();
-    const int C = conf_.C();
-    const int D = conf_.D();
-    const int H = conf_.H();
-    const int W = conf_.W();
-    const auto alg_kind = conf_.desc()->alg_kind;
-    const float alpha = conf_.desc()->alpha;
-    const float beta = conf_.desc()->beta;
-    const bool is_3d = conf_.desc()->data_desc.ndims == 5;
+    const int MB = pd()->MB();
+    const int C = pd()->C();
+    const int D = pd()->D();
+    const int H = pd()->H();
+    const int W = pd()->W();
+    const auto alg_kind = pd()->desc()->alg_kind;
+    const float alpha = pd()->desc()->alpha;
+    const float beta = pd()->desc()->beta;
+    const bool is_3d = pd()->desc()->data_desc.ndims == 5;
 
     parallel_nd(MB, C, D, H, W,
         [&](int n, int c, int id, int h, int w) {
@@ -142,22 +149,24 @@ void ref_eltwise_fwd_t<data_type>::execute_forward_generic() {
             case eltwise_soft_relu: d = soft_relu_fwd(s); break;
             case eltwise_logistic: d = logistic_fwd(s); break;
             case eltwise_clamp: d = clamp_fwd(s, alpha, beta); break;
+            case eltwise_exp: d = exp_fwd(s); break;
+            case eltwise_not: d = not_fwd(s); break;
             default: assert(!"unknown eltwise alg_kind");
         }
     });
 }
 
 template <impl::data_type_t data_type>
-void ref_eltwise_fwd_t<data_type>::execute_forward_dense() {
+void ref_eltwise_fwd_t<data_type>::execute_forward_dense() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
 
     const ptrdiff_t nelems = static_cast<ptrdiff_t>(data_d.nelems(true));
-    const auto alg_kind = conf_.desc()->alg_kind;
-    const float alpha = conf_.desc()->alpha;
-    const float beta  = conf_.desc()->beta;
+    const auto alg_kind = pd()->desc()->alg_kind;
+    const float alpha = pd()->desc()->alpha;
+    const float beta  = pd()->desc()->beta;
 
     src += data_d.blocking_desc().offset_padding;
     dst += data_d.blocking_desc().offset_padding;
@@ -185,32 +194,34 @@ void ref_eltwise_fwd_t<data_type>::execute_forward_dense() {
         case eltwise_soft_relu: d = soft_relu_fwd(s); break;
         case eltwise_logistic: d = logistic_fwd(s); break;
         case eltwise_clamp: d = clamp_fwd(s, alpha, beta); break;
+        case eltwise_exp: d = exp_fwd(s); break;
+        case eltwise_not: d = not_fwd(s); break;
         default: assert(!"unknown eltwise alg_kind");
         }
     });
 }
 
 template <impl::data_type_t data_type>
-void ref_eltwise_bwd_t<data_type>::execute_backward_generic() {
+void ref_eltwise_bwd_t<data_type>::execute_backward_generic() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t*>(this->memory(0));
 
     /* fast return */
-    if (conf_.has_zero_dim_memory()) return;
+    if (pd()->has_zero_dim_memory()) return;
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
-    const memory_desc_wrapper diff_data_d(conf_.diff_src_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
+    const memory_desc_wrapper diff_data_d(pd()->diff_src_pd());
 
-    const int MB = conf_.MB();
-    const int C = conf_.C();
-    const int D = conf_.D();
-    const int H = conf_.H();
-    const int W = conf_.W();
-    const auto alg_kind = conf_.desc()->alg_kind;
-    const float alpha = conf_.desc()->alpha;
-    const float beta = conf_.desc()->beta;
-    const bool is_3d = conf_.desc()->data_desc.ndims == 5;
+    const int MB = pd()->MB();
+    const int C = pd()->C();
+    const int D = pd()->D();
+    const int H = pd()->H();
+    const int W = pd()->W();
+    const auto alg_kind = pd()->desc()->alg_kind;
+    const float alpha = pd()->desc()->alpha;
+    const float beta = pd()->desc()->beta;
+    const bool is_3d = pd()->desc()->data_desc.ndims == 5;
 
     parallel_nd(MB, C, D, H, W,
         [&](int n, int c, int d, int h, int w) {
@@ -236,24 +247,25 @@ void ref_eltwise_bwd_t<data_type>::execute_backward_generic() {
             case eltwise_soft_relu: ds = soft_relu_bwd(dd, s); break;
             case eltwise_logistic: ds = logistic_bwd(dd, s); break;
             case eltwise_clamp: ds = clamp_bwd(dd, s, alpha, beta); break;
+            case eltwise_exp: ds = exp_bwd(dd, s); break;
             default: assert(!"unknown eltwise alg_kind");
         }
     });
 }
 
 template <impl::data_type_t data_type>
-void ref_eltwise_bwd_t<data_type>::execute_backward_dense() {
+void ref_eltwise_bwd_t<data_type>::execute_backward_dense() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t*>(this->memory(0));
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
-    const memory_desc_wrapper diff_data_d(conf_.diff_src_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
+    const memory_desc_wrapper diff_data_d(pd()->diff_src_pd());
 
     const ptrdiff_t nelems = static_cast<ptrdiff_t>(data_d.nelems(true));
-    const auto alg_kind = conf_.desc()->alg_kind;
-    const float alpha = conf_.desc()->alpha;
-    const float beta = conf_.desc()->beta;
+    const auto alg_kind = pd()->desc()->alg_kind;
+    const float alpha = pd()->desc()->alpha;
+    const float beta = pd()->desc()->beta;
 
     src += data_d.blocking_desc().offset_padding;
     diff_dst += diff_data_d.blocking_desc().offset_padding;
@@ -276,6 +288,7 @@ void ref_eltwise_bwd_t<data_type>::execute_backward_dense() {
         case eltwise_soft_relu: ds = soft_relu_bwd(dd, s); break;
         case eltwise_logistic: ds = logistic_bwd(dd, s); break;
         case eltwise_clamp: ds = clamp_bwd(dd, s, alpha, beta); break;
+        case eltwise_exp: ds = exp_bwd(dd, s); break;
         default: assert(!"unknown eltwise alg_kind");
         }
     });
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_eltwise.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_eltwise.hpp
index bd90dc198..718844ba4 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_eltwise.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_eltwise.hpp
@@ -31,13 +31,16 @@ namespace cpu {
 
 struct ref_eltwise_scalar_fwd_t {
 public:
-    ref_eltwise_scalar_fwd_t(const alg_kind_t alg, float alpha, float beta);
+    ref_eltwise_scalar_fwd_t(alg_kind_t alg, float alpha, float beta);
+
+    // note that eltwise.scale is ignored
+    ref_eltwise_scalar_fwd_t(const post_ops_t::entry_t::eltwise_t &eltwise);
+
     float compute_scalar(float s);
 
-private:
-    alg_kind_t alg;
-    float alpha;
-    float beta;
+    const alg_kind_t alg_;
+    const float alpha_;
+    const float beta_;
 };
 
 template <impl::data_type_t data_type>
@@ -87,15 +90,15 @@ struct ref_eltwise_fwd_t: public cpu_primitive_t {
         bool use_dense_, use_nCspBc_padded_;
     };
 
-    ref_eltwise_fwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_eltwise_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
-        if (conf_.use_dense_)
+    virtual void execute(event_t *e) const {
+        if (pd()->use_dense_)
             execute_forward_dense();
-        else if (conf_.use_nCspBc_padded_)
+        else if (pd()->use_nCspBc_padded_)
             execute_forward_nCspBc_padded();
         else
             execute_forward_generic();
@@ -103,10 +106,10 @@ struct ref_eltwise_fwd_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_forward_nCspBc_padded();
-    void execute_forward_dense();
-    void execute_forward_generic();
-    pd_t conf_;
+    void execute_forward_nCspBc_padded() const;
+    void execute_forward_dense() const;
+    void execute_forward_generic() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t data_type>
@@ -142,27 +145,30 @@ struct ref_eltwise_bwd_t: public cpu_primitive_t {
             if (use_generic && !one_of(diff_dst_d.ndims(), 4, 5))
                 return status::unimplemented;
 
+            if (desc()->alg_kind == mkldnn_eltwise_not)
+                return status::unimplemented;
+
             return status::success;
         }
 
         bool use_dense_;
     };
 
-    ref_eltwise_bwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_eltwise_bwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
-        if (conf_.use_dense_) execute_backward_dense();
+    virtual void execute(event_t *e) const {
+        if (pd()->use_dense_) execute_backward_dense();
         else execute_backward_generic();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward_dense();
-    void execute_backward_generic();
-    pd_t conf_;
+    void execute_backward_dense() const;
+    void execute_backward_generic() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_inner_product.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_inner_product.cpp
index 6d3edfab9..92616650a 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_inner_product.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_inner_product.cpp
@@ -27,37 +27,39 @@ namespace impl {
 namespace cpu {
 
 using math::saturate;
+using math::get_bias;
 
 template <data_type_t src_type, data_type_t wei_type, data_type_t dst_type,
          data_type_t acc_type>
 void ref_inner_product_fwd_t<src_type, wei_type, dst_type, acc_type>
-        ::execute_forward() {
+        ::execute_forward() const {
     auto src = reinterpret_cast<const src_data_t *>(this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto bias = reinterpret_cast<const char *>(this->input_memory(2));
     auto dst = reinterpret_cast<dst_data_t *>(this->memory());
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper bias_d(conf_.weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper bias_d(pd()->weights_pd(1));
 
-    const int MB = conf_.MB();
-    const int OC = conf_.OC();
-    const int IC = conf_.IC();
+    const int MB = pd()->MB();
+    const int OC = pd()->OC();
+    const int IC = pd()->IC();
 
     const bool src_has_spatial = utils::one_of(src_d.ndims(), 4, 5);
 
     const bool is_3d = src_d.ndims() == 5;
 
-    const auto &post_ops = conf_.attr()->post_ops_;
+    const auto &post_ops = pd()->attr()->post_ops_;
     const bool do_relu = post_ops.len_ == 1;
     const float nslope = do_relu ? post_ops.entry_[0].eltwise.alpha : 0.f;
 
-    auto ker_has_spatial = [=](acc_data_t &d, int mb, int oc) {
-        const int KD = conf_.KD();
-        const int KH = conf_.KH();
-        const int KW = conf_.KW();
+    auto ker_has_spatial = [=](int mb, int oc) {
+        acc_data_t d = 0;
+        const int KD = pd()->KD();
+        const int KH = pd()->KH();
+        const int KW = pd()->KW();
         for (int ic = 0; ic < IC; ++ic) {
             for (int kd = 0; kd < KD; ++kd) {
                 for (int kh = 0; kh < KH; ++kh) {
@@ -72,42 +74,29 @@ void ref_inner_product_fwd_t<src_type, wei_type, dst_type, acc_type>
                 }
             }
         }
+        return d;
     };
 
-    auto ker_no_spatial = [=](acc_data_t &d, int mb, int oc) {
+    auto ker_no_spatial = [=](int mb, int oc) {
+        acc_data_t d = 0;
         for (int ic = 0; ic < IC; ++ic) {
             d += (acc_data_t)src[src_d.off(mb, ic)]
                 * weights[weights_d.off(oc, ic)];
         }
-    };
-
-    auto get_bias = [=, &bias](size_t off) -> acc_data_t {
-#       define CASE(dt) case dt: \
-            return (acc_data_t)(*((const prec_traits<dt>::type *)bias + off))
-        switch (conf_.desc()->bias_desc.data_type) {
-        CASE(data_type::s8);
-        CASE(data_type::u8);
-        CASE(data_type::s32);
-        CASE(data_type::f32);
-        default: assert(!"unimplemented");
-        }
-#       undef CASE
-        return 0;
+        return d;
     };
 
     parallel_nd(MB, OC, [&](int mb, int oc) {
-        acc_data_t a = bias ? get_bias(bias_d.off(oc)) : (acc_data_t)0;
-        if (src_has_spatial) {
-            ker_has_spatial(a, mb, oc);
-        } else {
-            ker_no_spatial(a, mb, oc);
-        }
-        if (do_relu && a < (acc_data_t)0) {
-            float ds = (float)a * nslope;
-            dst[dst_d.off(mb, oc)] = saturate<dst_data_t>(ds);
-        } else {
-            dst[dst_d.off(mb, oc)] = saturate<dst_data_t>(a);
-        }
+        float a = bias
+            ? get_bias(bias, bias_d.off(oc), pd()->desc()->bias_desc.data_type)
+            : 0;
+        if (src_has_spatial)
+            a += ker_has_spatial(mb, oc);
+        else
+            a += ker_no_spatial(mb, oc);
+        if (do_relu && a < (acc_data_t)0)
+            a *= nslope;
+        dst[dst_d.off(mb, oc)] = saturate<dst_data_t>(a);
     });
 }
 using namespace data_type;
@@ -121,19 +110,19 @@ template struct ref_inner_product_fwd_t<u8, s8, u8, s32>;
 template <data_type_t diff_src_type, data_type_t wei_type,
          data_type_t diff_dst_type, data_type_t acc_type>
 void ref_inner_product_bwd_data_t<diff_src_type, wei_type, diff_dst_type,
-     acc_type>::execute_backward_data() {
+     acc_type>::execute_backward_data() const {
     auto diff_dst = reinterpret_cast<const diff_dst_data_t *>(
             this->input_memory(0));
     auto weights = reinterpret_cast<const wei_data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<diff_src_data_t*>(this->memory());
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper weights_d(conf_.weights_pd(0));
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper weights_d(pd()->weights_pd(0));
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
 
-    const int MB = conf_.MB();
-    const int OC = conf_.OC();
-    const int IC = conf_.IC();
+    const int MB = pd()->MB();
+    const int OC = pd()->OC();
+    const int IC = pd()->IC();
 
     const bool diff_src_has_spatial = utils::one_of(diff_src_d.ndims(), 4, 5);
 
@@ -141,9 +130,9 @@ void ref_inner_product_bwd_data_t<diff_src_type, wei_type, diff_dst_type,
 
     parallel_nd(MB, IC, [&](int mb, int ic) {
         if (diff_src_has_spatial) {
-            const int KD = conf_.KD();
-            const int KH = conf_.KH();
-            const int KW = conf_.KW();
+            const int KD = pd()->KD();
+            const int KH = pd()->KH();
+            const int KW = pd()->KW();
             for (int kd = 0; kd < KD; ++kd)
             for (int kh = 0; kh < KH; ++kh)
             for (int kw = 0; kw < KW; ++kw) {
@@ -176,20 +165,20 @@ template struct ref_inner_product_bwd_data_t<f32, f32, f32, f32>;
 template struct ref_inner_product_bwd_data_t<s32, s16, s16, s32>;
 
 template <impl::data_type_t data_type>
-void ref_inner_product_bwd_weights_t<data_type>::execute_backward_weights() {
+void ref_inner_product_bwd_weights_t<data_type>::execute_backward_weights() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_weights = reinterpret_cast<data_t*>(this->memory(0));
     auto diff_bias = reinterpret_cast<data_t*>(this->memory(1));
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper diff_weights_d(conf_.diff_weights_pd(0));
-    const memory_desc_wrapper diff_bias_d(conf_.diff_weights_pd(1));
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper diff_weights_d(pd()->diff_weights_pd(0));
+    const memory_desc_wrapper diff_bias_d(pd()->diff_weights_pd(1));
 
-    const int MB = conf_.MB();
-    const int OC = conf_.OC();
-    const int IC = conf_.IC();
+    const int MB = pd()->MB();
+    const int OC = pd()->OC();
+    const int IC = pd()->IC();
 
     const bool src_has_spatial = utils::one_of(src_d.ndims(), 4 ,5);
 
@@ -197,9 +186,9 @@ void ref_inner_product_bwd_weights_t<data_type>::execute_backward_weights() {
 
     parallel_nd(OC, IC, [&](int oc, int ic) {
         if (src_has_spatial) {
-            const int KD = conf_.KD();
-            const int KH = conf_.KH();
-            const int KW = conf_.KW();
+            const int KD = pd()->KD();
+            const int KH = pd()->KH();
+            const int KW = pd()->KW();
             for (int kd = 0; kd < KD; ++kd) {
                 for (int kh = 0; kh < KH; ++kh) {
                     for (int kw = 0; kw < KW; ++kw) {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_inner_product.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_inner_product.hpp
index afb21a116..e777c6dd2 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_inner_product.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_inner_product.hpp
@@ -64,17 +64,17 @@ struct ref_inner_product_fwd_t: public cpu_primitive_t {
         }
     };
 
-    ref_inner_product_fwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_inner_product_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
 
     typedef typename prec_traits<src_type>::type src_data_t;
     typedef typename prec_traits<wei_type>::type wei_data_t;
     typedef typename prec_traits<dst_type>::type dst_data_t;
     typedef typename prec_traits<acc_type>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::forward_training:
         case prop_kind::forward_inference:
             execute_forward();
@@ -86,8 +86,8 @@ struct ref_inner_product_fwd_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t diff_src_type, impl::data_type_t wei_type,
@@ -119,17 +119,17 @@ struct ref_inner_product_bwd_data_t: public cpu_primitive_t {
         }
     };
 
-    ref_inner_product_bwd_data_t(const pd_t *pd, const input_vector &inputs,
+    ref_inner_product_bwd_data_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
 
     typedef typename prec_traits<diff_src_type>::type diff_src_data_t;
     typedef typename prec_traits<wei_type>::type wei_data_t;
     typedef typename prec_traits<diff_dst_type>::type diff_dst_data_t;
     typedef typename prec_traits<acc_type>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward:
         case prop_kind::backward_data:
             execute_backward_data();
@@ -141,8 +141,8 @@ struct ref_inner_product_bwd_data_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_data();
-    pd_t conf_;
+    void execute_backward_data() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t data_type>
@@ -174,13 +174,13 @@ struct ref_inner_product_bwd_weights_t: public cpu_primitive_t {
         }
     };
 
-    ref_inner_product_bwd_weights_t(const pd_t *pd, const input_vector &inputs,
+    ref_inner_product_bwd_weights_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
-        switch (conf_.desc()->prop_kind) {
+    virtual void execute(event_t *e) const {
+        switch (pd()->desc()->prop_kind) {
         case prop_kind::backward:
         case prop_kind::backward_weights:
             execute_backward_weights();
@@ -192,8 +192,8 @@ struct ref_inner_product_bwd_weights_t: public cpu_primitive_t {
     }
 
 private:
-    void execute_backward_weights();
-    pd_t conf_;
+    void execute_backward_weights() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_lrn.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_lrn.cpp
index 38b81dd72..de9a1d9f9 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_lrn.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_lrn.cpp
@@ -47,7 +47,7 @@ static inline float fast_negative_powf(float omega, float beta) {
 
 template <impl::data_type_t data_type>
 template <mkldnn_memory_format_t fmt>
-void ref_lrn_fwd_t<data_type>::execute_forward() {
+void ref_lrn_fwd_t<data_type>::execute_forward() const {
     using namespace alg_kind;
     using namespace memory_format;
 
@@ -55,15 +55,15 @@ void ref_lrn_fwd_t<data_type>::execute_forward() {
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
     auto ws = reinterpret_cast<data_t*>(this->memory(1));
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
-    const memory_desc_wrapper ws_d(conf_.workspace_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
+    const memory_desc_wrapper ws_d(pd()->workspace_pd());
     MAYBE_UNUSED(ws_d);
 
-    const int C = conf_.C();
-    const int H = conf_.H();
-    const int W = conf_.W();
+    const int C = pd()->C();
+    const int H = pd()->H();
+    const int W = pd()->W();
     const size_t stride_mb = data_d.blocking_desc().strides[0][0];
-    const bool across_channels = conf_.desc()->alg_kind == lrn_across_channels;
+    const bool across_channels = pd()->desc()->alg_kind == lrn_across_channels;
     constexpr int blksize = fmt == nChw16c ? 16 : 8;
 
     auto data_off = [&](int mb, int c, int h, int w) -> size_t {
@@ -78,11 +78,11 @@ void ref_lrn_fwd_t<data_type>::execute_forward() {
     };
 
     auto ker = [=](data_t *d, int mb, int oc, int oh, int ow) {
-        const float alpha = static_cast<float>(conf_.desc()->lrn_alpha);
-        const float beta = static_cast<float>(conf_.desc()->lrn_beta);
-        const float k = static_cast<float>(conf_.desc()->lrn_k);
+        const float alpha = static_cast<float>(pd()->desc()->lrn_alpha);
+        const float beta = static_cast<float>(pd()->desc()->lrn_beta);
+        const float k = static_cast<float>(pd()->desc()->lrn_k);
 
-        const int size = conf_.desc()->local_size;
+        const int size = pd()->desc()->local_size;
         const int half_size = (size - 1) / 2;
 
         float sum = 0;
@@ -114,7 +114,7 @@ void ref_lrn_fwd_t<data_type>::execute_forward() {
         d[0] = static_cast<data_t>(src[off] * fast_negative_powf(sum, beta));
     };
 
-    const int MB = conf_.MB();
+    const int MB = pd()->MB();
     if (fmt == nChw16c || fmt == nChw8c) {
         parallel_nd(MB, utils::div_up(C, blksize), H, W,
             [&](int mb, int c_blk, int h, int w) {
@@ -142,7 +142,7 @@ void ref_lrn_fwd_t<data_type>::execute_forward() {
 
 template <impl::data_type_t data_type>
 template <mkldnn_memory_format_t fmt>
-void ref_lrn_bwd_t<data_type>::execute_backward() {
+void ref_lrn_bwd_t<data_type>::execute_backward() const {
     using namespace alg_kind;
     using namespace memory_format;
 
@@ -150,21 +150,21 @@ void ref_lrn_bwd_t<data_type>::execute_backward() {
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t*>(this->memory(0));
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
-    const memory_desc_wrapper diff_data_d(conf_.diff_dst_pd());
+    const memory_desc_wrapper data_d(pd()->src_pd());
+    const memory_desc_wrapper diff_data_d(pd()->diff_dst_pd());
     MAYBE_UNUSED(diff_data_d);
 
-    const int MB = conf_.MB();
-    const int C = conf_.C();
-    const int H = conf_.H();
-    const int W = conf_.W();
+    const int MB = pd()->MB();
+    const int C = pd()->C();
+    const int H = pd()->H();
+    const int W = pd()->W();
     const size_t stride_mb = data_d.blocking_desc().strides[0][0];
     constexpr int blksize = fmt == nChw16c ? 16 : 8;
 
-    const float alpha = static_cast<float>(conf_.desc()->lrn_alpha);
-    const float beta = static_cast<float>(conf_.desc()->lrn_beta);
-    const float k = static_cast<float>(conf_.desc()->lrn_k);
-    const int kernel_size = conf_.desc()->local_size;
+    const float alpha = static_cast<float>(pd()->desc()->lrn_alpha);
+    const float beta = static_cast<float>(pd()->desc()->lrn_beta);
+    const float k = static_cast<float>(pd()->desc()->lrn_k);
+    const int kernel_size = pd()->desc()->local_size;
     const int half_ksize = (kernel_size - 1) / 2;
 
     auto data_off = [&](int mb, int c, int h, int w) -> size_t {
@@ -231,16 +231,16 @@ void ref_lrn_bwd_t<data_type>::execute_backward() {
     }
 }
 
-template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nChw16c>();
-template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nChw8c>();
-template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nchw>();
-template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nhwc>();
-template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::any>();
-template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nChw16c>();
-template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nChw8c>();
-template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nchw>();
-template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nhwc>();
-template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::any>();
+template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nChw16c>() const;
+template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nChw8c>() const;
+template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nchw>() const;
+template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::nhwc>() const;
+template void ref_lrn_fwd_t<data_type::f32>::execute_forward<memory_format::any>() const;
+template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nChw16c>() const;
+template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nChw8c>() const;
+template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nchw>() const;
+template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::nhwc>() const;
+template void ref_lrn_bwd_t<data_type::f32>::execute_backward<memory_format::any>() const;
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_lrn.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_lrn.hpp
index ad89ed713..e2750f9d5 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_lrn.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_lrn.hpp
@@ -57,14 +57,14 @@ struct ref_lrn_fwd_t: public cpu_primitive_t {
         }
     };
 
-    ref_lrn_fwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_lrn_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         using namespace memory_format;
-        switch (conf_.src_pd()->desc()->format) {
+        switch (pd()->src_pd()->desc()->format) {
         case nChw16c: execute_forward<nChw16c>(); break;
         case nChw8c: execute_forward<nChw8c>(); break;
         case nchw: execute_forward<nchw>(); break;
@@ -77,8 +77,8 @@ struct ref_lrn_fwd_t: public cpu_primitive_t {
     }
 
 private:
-    template<memory_format_t fmt>void execute_forward();
-    pd_t conf_;
+    template<memory_format_t fmt>void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t data_type>
@@ -106,14 +106,14 @@ struct ref_lrn_bwd_t: public cpu_primitive_t {
         }
     };
 
-    ref_lrn_bwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_lrn_bwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         using namespace memory_format;
-        switch (conf_.src_pd()->desc()->format) {
+        switch (pd()->src_pd()->desc()->format) {
         case nChw16c: execute_backward<nChw16c>(); break;
         case nChw8c: execute_backward<nChw8c>(); break;
         case nchw: execute_backward<nchw>(); break;
@@ -126,8 +126,8 @@ struct ref_lrn_bwd_t: public cpu_primitive_t {
     }
 
 private:
-    template<memory_format_t fmt>void execute_backward();
-    pd_t conf_;
+    template<memory_format_t fmt>void execute_backward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_pooling.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_pooling.cpp
index 4ee010d79..d7ae20888 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_pooling.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_pooling.cpp
@@ -30,43 +30,39 @@ namespace impl {
 namespace cpu {
 
 template <data_type_t data_type, data_type_t acc_type>
-void ref_pooling_fwd_t<data_type, acc_type>::execute_forward() {
+void ref_pooling_fwd_t<data_type, acc_type>::execute_forward() const {
     using namespace alg_kind;
     using namespace prop_kind;
 
-    auto alg = conf_.desc()->alg_kind;
+    auto alg = pd()->desc()->alg_kind;
 
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t *>(this->memory(0));
-    auto ws = alg == pooling_max && conf_.desc()->prop_kind == forward_training
+    auto ws = alg == pooling_max && pd()->desc()->prop_kind == forward_training
         ? reinterpret_cast<unsigned char *>(this->memory(1)) : nullptr;
 
-    const memory_desc_wrapper src_d(conf_.src_pd());
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    const memory_desc_wrapper ws_d(conf_.workspace_pd());
+    const memory_desc_wrapper src_d(pd()->src_pd());
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    const memory_desc_wrapper ws_d(pd()->workspace_pd());
     const data_type_t ws_dt = ws ? ws_d.data_type() : data_type::undef;
 
-    const int ID = conf_.ID();
-    const int IH = conf_.IH();
-    const int IW = conf_.IW();
-    const int KD = conf_.KD();
-    const int KH = conf_.KH();
-    const int KW = conf_.KW();
-    const int SD = conf_.KSD();
-    const int SH = conf_.KSH();
-    const int SW = conf_.KSW();
-    const int padF = conf_.padFront();
-    const int padT = conf_.padT();
-    const int padL = conf_.padL();
-    const int padBack = conf_.padBack();
-    const int padB = conf_.padB();
-    const int padR = conf_.padR();
-
-    const bool is_3d = conf_.desc()->src_desc.ndims == 5;
-
-//    auto apply_offset = [=](int index, int offset) {
-//        return (index > offset) ? index - offset : 0;
-//    };
+    const int ID = pd()->ID();
+    const int IH = pd()->IH();
+    const int IW = pd()->IW();
+    const int KD = pd()->KD();
+    const int KH = pd()->KH();
+    const int KW = pd()->KW();
+    const int SD = pd()->KSD();
+    const int SH = pd()->KSH();
+    const int SW = pd()->KSW();
+    const int padF = pd()->padFront();
+    const int padT = pd()->padT();
+    const int padL = pd()->padL();
+    const int padBack = pd()->padBack();
+    const int padB = pd()->padB();
+    const int padR = pd()->padR();
+
+    const bool is_3d = pd()->desc()->src_desc.ndims == 5;
 
     auto set_ws = [=](int mb, int oc, int od, int oh, int ow, int value) {
         if (ws) {
@@ -195,11 +191,11 @@ void ref_pooling_fwd_t<data_type, acc_type>::execute_forward() {
         d[0] = math::out_round<data_t>((float)dst / num_summands);
     };
 
-    const int MB = conf_.MB();
-    const int OC = conf_.C();
-    const int OD = conf_.OD();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
+    const int MB = pd()->MB();
+    const int OC = pd()->C();
+    const int OD = pd()->OD();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
 
     if (alg == pooling_max) {
         parallel_nd(MB, OC, OD, OH, OW,
@@ -226,34 +222,34 @@ void ref_pooling_fwd_t<data_type, acc_type>::execute_forward() {
 }
 
 template <data_type_t data_type, data_type_t acc_type>
-void ref_pooling_bwd_t<data_type, acc_type>::execute_backward() {
+void ref_pooling_bwd_t<data_type, acc_type>::execute_backward() const {
     using namespace alg_kind;
 
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(0));
-    auto ws = conf_.desc()->alg_kind != alg_kind::pooling_max ? nullptr
+    auto ws = pd()->desc()->alg_kind != alg_kind::pooling_max ? nullptr
         : reinterpret_cast<const unsigned char *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t *>(this->memory(0));
 
-    const memory_desc_wrapper diff_dst_d(conf_.diff_dst_pd());
-    const memory_desc_wrapper ws_d(conf_.workspace_pd());
-    const memory_desc_wrapper diff_src_d(conf_.diff_src_pd());
+    const memory_desc_wrapper diff_dst_d(pd()->diff_dst_pd());
+    const memory_desc_wrapper ws_d(pd()->workspace_pd());
+    const memory_desc_wrapper diff_src_d(pd()->diff_src_pd());
 
-    const int ID = conf_.ID();
-    const int IH = conf_.IH();
-    const int IW = conf_.IW();
-    const int KD = conf_.KD();
-    const int KH = conf_.KH();
-    const int KW = conf_.KW();
-    const int SD = conf_.KSD();
-    const int SH = conf_.KSH();
-    const int SW = conf_.KSW();
-    const int padF = conf_.padFront();
-    const int padT = conf_.padT();
-    const int padL = conf_.padL();
+    const int ID = pd()->ID();
+    const int IH = pd()->IH();
+    const int IW = pd()->IW();
+    const int KD = pd()->KD();
+    const int KH = pd()->KH();
+    const int KW = pd()->KW();
+    const int SD = pd()->KSD();
+    const int SH = pd()->KSH();
+    const int SW = pd()->KSW();
+    const int padF = pd()->padFront();
+    const int padT = pd()->padT();
+    const int padL = pd()->padL();
 
-    const bool is_3d = conf_.desc()->diff_src_desc.ndims == 5;
+    const bool is_3d = pd()->desc()->diff_src_desc.ndims == 5;
 
-    auto alg = conf_.desc()->alg_kind;
+    auto alg = pd()->desc()->alg_kind;
 
     auto apply_offset = [=](int index, int offset) {
         return (index > offset) ? index - offset : 0;
@@ -360,13 +356,13 @@ void ref_pooling_bwd_t<data_type, acc_type>::execute_backward() {
         }
     };
 
-    const int MB = conf_.MB();
-    const int OC = conf_.C();
-    const int OD = conf_.OD();
-    const int OH = conf_.OH();
-    const int OW = conf_.OW();
+    const int MB = pd()->MB();
+    const int OC = pd()->C();
+    const int OD = pd()->OD();
+    const int OH = pd()->OH();
+    const int OW = pd()->OW();
 
-    if (conf_.desc()->alg_kind == alg_kind::pooling_max) {
+    if (pd()->desc()->alg_kind == alg_kind::pooling_max) {
         parallel_nd(MB, OC, [&](int mb, int oc) {
             if (is_3d) ker_zero_3d(mb, oc);
             else ker_zero(mb, oc);
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_pooling.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_pooling.hpp
index b2be03bc2..ef01167a5 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_pooling.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_pooling.hpp
@@ -67,21 +67,21 @@ struct ref_pooling_fwd_t: public cpu_primitive_t {
         }
     };
 
-    ref_pooling_fwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_pooling_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
 
     typedef typename prec_traits<data_type>::type data_t;
     typedef typename prec_traits<acc_type>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward();
-    pd_t conf_;
+    void execute_forward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 template <impl::data_type_t data_type, impl::data_type_t acc_type = data_type>
@@ -120,20 +120,20 @@ struct ref_pooling_bwd_t: public cpu_primitive_t {
         }
     };
 
-    ref_pooling_bwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_pooling_bwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
     typedef typename prec_traits<data_type>::type data_t;
     typedef typename prec_traits<acc_type>::type acc_data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_backward();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward();
-    pd_t conf_;
+    void execute_backward() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_rnn.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_rnn.cpp
deleted file mode 100644
index 122b4248d..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_rnn.cpp
+++ /dev/null
@@ -1,1192 +0,0 @@
-/*******************************************************************************
-* Copyright 2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-/*
-  General architecture
-
-  for diff states, we have n_states + 1 as we have n_states diff
-  to propagate to the previous iteration and 1 states to propagate
-  to the previous layer
-  index 0 is dh for cell(t-1, l) to consume
-  index 1 is dc for cell(t-1, l) to consume
-  index 2 is dh for cell(t, l-1) to consume
-  this indexing enables to have the same indexing for states in elemwise
-  function
-  only the cell execution function should be impacted
-
- */
-
-#include "c_types_map.hpp"
-#include "math_utils.hpp"
-#include "mkldnn_thread.hpp"
-#include "mkldnn_traits.hpp"
-#include "type_helpers.hpp"
-#include "gemm/gemm.hpp"
-
-#include "ref_rnn.hpp"
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-using namespace mkldnn::impl::utils;
-using namespace mkldnn::impl::math;
-using namespace prop_kind;
-using namespace alg_kind;
-
-#define AOC array_offset_calculator
-
-inline float one_m_square(float x) {
-    return (1.0f - x) * (1.0f + x);
-}
-inline float x_m_square(float x) {
-    return (1.0f - x) * x;
-}
-
-template <>
-float activation<alg_kind::eltwise_relu, prop_kind::forward>(
-        float dd, float s, float alpha, float cliping) {
-    return relu_fwd<float>(s, alpha);
-}
-
-template <>
-float activation<alg_kind::eltwise_relu, prop_kind::backward>(
-        float dd, float s, float alpha, float cliping) {
-    return relu_bwd<float>(dd, s, alpha);
-}
-
-template <>
-float activation<alg_kind::eltwise_tanh, prop_kind::forward>(
-        float dd, float s, float alpha, float cliping) {
-    return tanh_fwd<float>(s);
-}
-
-template <>
-float activation<alg_kind::eltwise_tanh, prop_kind::backward>(
-        float dd, float s, float alpha, float cliping) {
-    return dd * one_m_square(s);
-}
-
-template <>
-float activation<alg_kind::eltwise_logistic, prop_kind::forward>(
-        float dd, float s, float alpha, float cliping) {
-    return logistic_fwd<float>(s);
-}
-
-template <>
-float activation<alg_kind::eltwise_logistic, prop_kind::backward>(
-        float dd, float s, float alpha, float cliping) {
-    return dd * x_m_square(s);
-}
-
-//************************* Cell execution *************************//
-/// @todo shall this be templated on activation function to enable svml calls
-/// particularly?
-template <>
-elemwise_sig(_ref_rnn_common_t<prop_kind::forward>::rnn_elemwise) {
-    AOC<float, 3> ws_gates(ws_gates_, batch, conf_.GC());
-    AOC<const float, 2> bias(bias_, n_gates, dic);
-    AOC<float, 4> states_t_l(states_t_l_, n_states, iter_stride, batch, wic);
-    parallel_nd(batch, [&](int i) {
-        for (int j = 0; j < dic; j++) {
-            const float h =
-                activation_func(0, ws_gates(i, j) + bias(0, j), 0, 0);
-            ws_gates(i, j) = states_t_l(0, 0, i, j) = h;
-        }
-    });
-}
-
-template <>
-elemwise_sig(_ref_rnn_common_t<prop_kind::backward>::rnn_elemwise) {
-    AOC<float, 3> ws_gates(ws_gates_, batch, conf_.GC());
-    AOC<float, 4> diff_states_tp1_l(
-            diff_states_tp1_l_, n_states + 1, iter_stride, batch, wic);
-    AOC<float, 4> diff_states_t_lp1(
-            diff_states_t_lp1_, n_states + 1, iter_stride, batch, wic);
-    parallel_nd(batch, [&](int i) {
-        for (int j = 0; j < dic; ++j) {
-            const float dH = diff_states_t_lp1(n_states, 0, i, j)
-                + diff_states_tp1_l(0, 0, i, j);
-            auto g = ws_gates(i, j);
-            ws_gates(i, j) = activation_func(dH, g, 0, 0);
-        }
-    });
-}
-
-template <>
-elemwise_sig(_ref_rnn_common_t<prop_kind::forward>::lstm_elemwise) {
-    AOC<float, 3> ws_gates(ws_gates_, batch, conf_.GC());
-    AOC<const float, 2> bias(bias_, n_gates, dic);
-    AOC<float, 4> states_t_l(states_t_l_, n_states, iter_stride, batch, wic);
-    AOC<float, 4> states_tm1_l(states_tm1_l_, n_states, iter_stride, batch, wic);
-
-    parallel_nd(batch, [&](int i) {
-// WA. Loss of correctnes in case of simd loop unrolling with icc 18
-#if !defined(__INTEL_COMPILER)
-        PRAGMA_OMP_SIMD()
-#endif
-        for (int j = 0; j < dic; j++) {
-            ws_gates(i, 0 * dic + j) = logistic_fwd(ws_gates(i, 0 * dic + j) + bias(0, j));
-            ws_gates(i, 1 * dic + j) = logistic_fwd(ws_gates(i, 1 * dic + j) + bias(1, j));
-            ws_gates(i, 2 * dic + j) = tanh_fwd(ws_gates(i, 2 * dic + j) + bias(2, j));
-            ws_gates(i, 3 * dic + j) = logistic_fwd(ws_gates(i, 3 * dic + j) + bias(3, j));
-
-            float tmp = ws_gates(i, 1 * dic + j) * states_tm1_l(1, 0, i, j)
-                    + ws_gates(i, 0 * dic + j) * ws_gates(i, 2 * dic + j);
-            states_t_l(0, 0, i, j) = ws_gates(i, 3 * dic + j) * tanh_fwd(tmp);
-            states_t_l(1, 0, i, j) = tmp;
-        }
-    });
-}
-
-template <>
-elemwise_sig(_ref_rnn_common_t<prop_kind::backward>::lstm_elemwise) {
-    AOC<float, 3> ws_gates(ws_gates_, batch, conf_.GC());
-    AOC<const float, 2> bias(bias_, n_gates, dic);
-    AOC<float, 4> states_t_l(states_t_l_, n_states, iter_stride, batch, wic);
-    AOC<float, 4> states_tm1_l(states_tm1_l_, n_states, iter_stride, batch, wic);
-    AOC<float, 4> diff_states_t_l(diff_states_t_l_, n_states + 1, iter_stride, batch, wic);
-    AOC<float, 4> diff_states_tp1_l(
-        diff_states_tp1_l_, n_states + 1, iter_stride, batch, wic);
-    AOC<float, 4> diff_states_t_lp1(
-        diff_states_t_lp1_, n_states + 1, iter_stride, batch, wic);
-
-    parallel_nd(batch, [&](int i) {
-        PRAGMA_OMP_SIMD()
-        for (int j = 0; j < dic; j++) {
-            float Ct = states_t_l(1, 0, i, j);
-            /// @todo save it in the workspace in fwd pass or recompute it to
-            /// save bw
-            float tanhCt = tanh_fwd(Ct);
-            // we have 2 incoming diffs on Ht
-            float dHt = diff_states_tp1_l(0, 0, i, j)
-            + diff_states_t_lp1(n_states, 0, i, j);
-            float dCt = diff_states_tp1_l(1, 0, i, j)
-                    + one_m_square(tanhCt) * ws_gates(i, 3 * dic + j) * dHt;
-
-            float dG1 = states_tm1_l(1, 0, i, j) * dCt
-                    * x_m_square(ws_gates(i, 1 * dic + j));
-            float dG0 = ws_gates(i, 2 * dic + j) * dCt
-                    * x_m_square(ws_gates(i, 0 * dic + j));
-            float dG3 = tanhCt * dHt * x_m_square(ws_gates(i, 3 * dic + j));
-            float dG2 = ws_gates(i, 0 * dic + j) * dCt
-                    * one_m_square(ws_gates(i, 2 * dic + j));
-
-            diff_states_t_l(1, 0, i, j) = dCt * ws_gates(i, 1 * dic + j);
-
-            ws_gates(i, 0 * dic + j) = dG0;
-            ws_gates(i, 1 * dic + j) = dG1;
-            ws_gates(i, 2 * dic + j) = dG2;
-            ws_gates(i, 3 * dic + j) = dG3;
-        }
-    });
-}
-
-template <prop_kind_t aprop>
-gemm_sig(_ref_rnn_common_t<aprop>::packed_gemm) {
-#if (USE_MKL_PACKED_GEMM)
-    cblas_sgemm_compute(CblasColMajor, CblasPacked,
-            is_B_trans ? CblasTrans : CblasNoTrans, m, n, k, a_, strideA_m, b_,
-            is_B_trans ? strideB_n : strideB_k, beta, c_, strideC_m);
-#else
-    UNUSED(m);
-    UNUSED(n);
-    UNUSED(k);
-    UNUSED(a_);
-    UNUSED(b_);
-    UNUSED(c_);
-    UNUSED(is_B_trans);
-    UNUSED(beta);
-    assert(!"packed gemm is disabled");
-#endif
-}
-
-template <prop_kind_t aprop>
-gemm_sig(_ref_rnn_common_t<aprop>::gemm) {
-    float alpha = 1.f;
-    extended_sgemm("N", is_B_trans ? "T" : "N", &m, &n, &k, &alpha,
-            a_, &strideA_m, b_, is_B_trans ? &strideB_n : &strideB_k, &beta,
-            c_, &strideC_m, nullptr, use_jit_sgemm_);
-}
-
-template <prop_kind_t aprop>
-void _ref_rnn_common_t<aprop>::gates_reduction(int n_gates, int dic, int wic, int batch,
-        const float *ws_gates_, float *diff_bias_) {
-    auto body = [&](int i, int k) {
-        for (int j = 0; j < batch; j++)
-            diff_bias_[i * dic + k]
-                    += ws_gates_[j * conf_.GC() + i * dic + k];
-    };
-
-    // @todo block k on simd-width
-#if MKLDNN_THR == MKLDNN_THR_OMP && _OPENMP >= 201307 \
-    /* icc 17.0 has a problem with simd collapse */ \
-    && !((defined __INTEL_COMPILER) && (__INTEL_COMPILER == 1700))
-#pragma omp parallel for simd collapse(2)
-    for (int i = 0; i < n_gates; i++)
-        for (int k = 0; k < dic; k++)
-            body(i, k);
-#else
-    parallel_nd(n_gates, dic, body);
-#endif
-}
-/// @todo template this function on fwd or bwd, if the overhead
-///  to pass argument for empty function is too big
-template <>
-cell_execution_sig(_ref_rnn_common_t<prop_kind::forward>::cell_execution) {
-    if (!merge_gemm_layer) {
-        (this->*gemm_input_func)(n_gates * dic, batch, slc, conf_.WL_GLD(), slc,
-                batch, wic, conf_.GC(), batch, w_input_[0], states_t_lm1_,
-                ws_gates_, false, 0.0f);
-    }
-    (this->*gemm_state_func)(n_gates * dic, batch, sic, conf_.WI_GLD(), sic,
-            batch, wic, conf_.GC(), batch, w_state_[0], states_tm1_l_,
-            ws_gates_, false, 1.0f);
-    (this->*elemwise_func)(dic, wic, batch, n_states, iter_stride, n_gates, ws_gates_,
-            states_t_l_, states_t_lm1_, states_tm1_l_, diff_states_t_l_,
-            diff_states_t_lp1_, diff_states_tp1_l_, bias_, ws_grid_, ws_cell_);
-}
-
-template <>
-cell_execution_sig(_ref_rnn_common_t<prop_kind::backward>::cell_execution) {
-    (this->*elemwise_func)(dic, wic, batch, n_states, iter_stride, n_gates, ws_gates_,
-            states_t_l_, states_t_lm1_, states_tm1_l_, diff_states_t_l_,
-            diff_states_t_lp1_, diff_states_tp1_l_, bias_, ws_grid_, ws_cell_);
-
-    /// bwd by data on the cell
-    (this->*gemm_state_func)(sic, batch, n_gates * dic, conf_.WI_GLD(),
-            n_gates * dic, batch, conf_.GC(), wic, batch, w_state_[0],
-            ws_gates_, diff_states_t_l_, false, 0.0f);
-
-    if (!merge_gemm_layer) {
-        (this->*gemm_input_func)(slc, batch, n_gates * dic, conf_.WL_GLD(),
-                n_gates * dic, batch, conf_.GC(), wic, batch, w_input_[0],
-                ws_gates_,
-                diff_states_t_l_ + n_states * iter_stride * (batch * wic),
-                false, 0.0f);
-
-        /// bwd by weights on the cell
-        gemm(n_gates * dic, slc, batch, conf_.GC(), batch, wic, batch,
-                conf_.DWL_GLD(), slc, ws_gates_, states_t_lm1_, diff_w_input_,
-                true, 1.0f);
-    }
-
-    if (!merge_gemm_iter)
-        gemm(n_gates * dic, sic, batch, conf_.GC(), batch, wic, batch,
-                conf_.DWI_GLD(), sic, ws_gates_, states_tm1_l_, diff_w_state_,
-                true, 1.0f);
-    /// bwd by bias we just accumulate diffs from the gates
-    gates_reduction(n_gates, dic, wic, batch, ws_gates_, diff_bias_);
-}
-
-template <>
-cell_execution_sig(_ref_rnn_common_t<prop_kind::forward>::cell_execution_gru) {
-    AOC<float, 3> ws_gates(ws_gates_, batch, conf_.GC());
-    AOC<const float, 2> bias(bias_, n_gates, dic);
-    AOC<float, 2> states_t_l(states_t_l_, batch, wic);
-    AOC<float, 2> states_tm1_l(states_tm1_l_, batch, wic);
-
-    // 1. gemm Wx[0-2],x
-    if (!merge_gemm_layer) {
-        (this->*gemm_input_func)(n_gates * dic, batch, slc, conf_.WL_GLD(), slc,
-                batch, wic, conf_.GC(), batch, w_input_[0], states_t_lm1_,
-                ws_gates_, false, 0.0f);
-    }
-
-    // 2. gemm Wh[0-1],h
-    (this->*gemm_state_func)((n_gates - 1) * dic, batch, sic, conf_.WI_GLD(),
-            sic, batch, wic, conf_.GC(), batch, w_state_[0], states_tm1_l_,
-            ws_gates_, false, 1.0f);
-
-    // 3. activation zt and rt + elemwise multiplication rt,ht-1
-    parallel_nd(batch, [&](int i) {
-        PRAGMA_OMP_SIMD()
-        for (int j = 0; j < dic; j++) {
-            ws_gates(i, 0 * dic + j) = logistic_fwd(ws_gates(i, 0 * dic + j) + bias(0, j));
-            ws_gates(i, 1 * dic + j) = logistic_fwd(ws_gates(i, 1 * dic + j) + bias(1, j));
-            states_t_l(i, j) = states_tm1_l(i, j) * ws_gates(i, 1 * dic + j);
-        }
-    });
-
-    // 4. gemm Wh[2],h~t
-    (this->*gemm_state_func)(dic, batch, sic, conf_.WI_GLD(), sic, batch, wic,
-            conf_.GC(), batch, w_state_[1], states_t_l_,
-            &(ws_gates(0, 2 * dic)), false, 1.0f);
-
-    // 5. activation h~t + calculate ht
-    parallel_nd(batch, [&](int i) {
-        PRAGMA_OMP_SIMD()
-        for (int j = 0; j < dic; j++) {
-            ws_gates(i, 2 * dic + j) = tanh_fwd(ws_gates(i, 2 * dic + j) + bias(2, j));
-            states_t_l(i, j) = states_tm1_l(i, j) * ws_gates(i, 0 * dic + j) +
-                (1.0f - ws_gates(i, 0 * dic +  j)) * ws_gates(i, 2 * dic + j);
-        }
-    });
-}
-
-template <>
-elemwise_sig(_ref_rnn_common_t<prop_kind::forward>::gru_lbr_elemwise) {
-    bool is_training = conf_.is_training();
-    AOC<float, 3> ws_gates(ws_gates_, batch, conf_.GC());
-    AOC<float, 2> ws_Wh_b(ws_grid_, batch, dic);
-    AOC<const float, 2> bias(bias_, n_gates + 1, dic);
-    AOC<float, 2> states_t_l(states_t_l_, batch, wic);
-    AOC<float, 2> states_tm1_l(states_tm1_l_, batch, wic);
-    AOC<float, 3> ws_gemm_state(ws_cell_, batch, conf_.GC());
-    parallel_nd(batch, [&](int i) {
-        PRAGMA_OMP_SIMD()
-        for (int j = 0; j < dic; j++) {
-            float Wh_b = ws_gemm_state(i, 2 * dic + j) + bias(3, j);
-            ws_gates(i, 0 * dic + j) = logistic_fwd(ws_gates(i, 0 * dic + j) +
-                ws_gemm_state(i, j) + bias(0, j));
-            ws_gates(i, 1 * dic + j) = logistic_fwd(ws_gates(i, 1 * dic + j) +
-                ws_gemm_state(i, dic + j) + bias(1, j));
-            ws_gates(i, 2 * dic + j) = tanh_fwd(ws_gates(i, 2 * dic + j) +
-                ws_gates(i, 1 * dic + j) * Wh_b + bias(2, j));
-            states_t_l(i, j) = states_tm1_l(i, j) * ws_gates(i, 0 * dic + j) +
-                (1.0f - ws_gates(i, 0 * dic + j)) * ws_gates(i, 2 * dic + j);
-            if (is_training) ws_Wh_b(i, j) = Wh_b;
-        }
-    });
-}
-
-template <>
-cell_execution_sig(_ref_rnn_common_t<prop_kind::forward>::cell_execution_gru_lbr) {
-    if (!merge_gemm_layer) {
-        (this->*gemm_input_func)(n_gates * dic, batch, slc, conf_.WL_GLD(), slc,
-                batch, wic, conf_.GC(), batch, w_input_[0], states_t_lm1_,
-                ws_gates_, false, 0.0f);
-    }
-    (this->*gemm_state_func)(n_gates * dic, batch, sic, conf_.WI_GLD(), sic,
-            batch, wic, conf_.GC(), batch, w_state_[0], states_tm1_l_, ws_cell_,
-            false, 0.0f);
-    (this->*elemwise_func)(dic, wic, batch, n_states, iter_stride, n_gates, ws_gates_,
-            states_t_l_, states_t_lm1_, states_tm1_l_, diff_states_t_l_,
-            diff_states_t_lp1_, diff_states_tp1_l_, bias_, ws_grid_, ws_cell_);
-}
-
-template <>
-elemwise_sig(_ref_rnn_common_t<prop_kind::backward>::gru_lbr_elemwise) {
-    AOC<float, 3> ws_gates(ws_gates_, batch, conf_.GC());
-    AOC<const float, 2> states_tm1_l(states_tm1_l_, batch, wic);
-    AOC<float, 4> diff_states_t_l(diff_states_t_l_, n_states + 1, iter_stride, batch, wic);//dht-1 dxt
-    AOC<float, 4> diff_states_tp1_l(
-        diff_states_tp1_l_, n_states + 1, iter_stride, batch, wic);
-    AOC<float, 4> diff_states_t_lp1(
-        diff_states_t_lp1_, n_states + 1, iter_stride, batch, wic);
-    AOC<float, 3> ws_gates_r(ws_cell_, batch, conf_.GC());
-    AOC<float, 2> ws_Wh_b(ws_grid_, batch, dic);
-
-    // 1. calculate dG1 dG2 dG3
-    // dG0 = (dht - G2) * dht * (1 - G0) * G0
-    // dG1 = (W*h + b) * dG2 * (1 - G1) * G1
-    // dG2 = (1 - G0) * dht * (1 - G2*G2)
-    parallel_nd(batch, [&](int i) {
-        PRAGMA_OMP_SIMD()
-        for (int j = 0; j < dic; j++) {
-            float h = states_tm1_l(i, j);
-            float dHt = diff_states_tp1_l(0, 0, i, j)
-                    + diff_states_t_lp1(n_states, 0, i, j);
-            float dG0 = (h - ws_gates(i, 2 * dic + j)) * dHt
-                    * x_m_square(ws_gates(i, 0 * dic + j));
-            float dG2 = (1.0f - ws_gates(i, 0 * dic + j))
-                    * one_m_square(ws_gates(i, 2 * dic + j)) * dHt;
-            float dG1 = ws_Wh_b(i, j) * dG2
-                    * x_m_square(ws_gates(i, 1 * dic + j));
-
-            diff_states_t_l(0, 0, i, j) = dHt * ws_gates(i, 0 * dic + j);
-            ws_gates(i, 2 * dic + j) = dG2;
-            ws_gates_r(i, 2 * dic + j) = dG2 * ws_gates(i, 1 * dic + j);
-            ws_gates(i, 0 * dic + j) = ws_gates_r(i, 0 * dic + j) = dG0;
-            ws_gates(i, 1 * dic + j) = ws_gates_r(i, 1 * dic + j) = dG1;
-        }
-    });
-}
-
-template <>
-cell_execution_sig(_ref_rnn_common_t<prop_kind::backward>::cell_execution_gru_lbr) {
-    AOC<float, 2> diff_bias(diff_bias_, n_gates + 1, dic);
-    AOC<float, 3> ws_gates_r(ws_cell_, batch, conf_.GC());
-
-    (this->*elemwise_func)(dic, wic, batch, n_states, iter_stride, n_gates, ws_gates_,
-            states_t_l_, states_t_lm1_, states_tm1_l_, diff_states_t_l_,
-            diff_states_t_lp1_, diff_states_tp1_l_, bias_, ws_grid_, ws_cell_);
-
-    if (!merge_gemm_layer) {
-         //  dx = dG * Wx^t
-         (this->*gemm_input_func)(slc, batch, n_gates * dic, conf_.WL_GLD(),
-                 n_gates * dic, batch, conf_.GC(), wic, batch, w_input_[0],
-                 ws_gates_,
-                 diff_states_t_l_ + n_states * iter_stride * (batch * wic),
-                 false, 0.0f);
-         // dWx +=  dG^t * x
-         gemm(n_gates * dic, slc, batch, conf_.GC(), batch, wic, batch,
-                 conf_.DWL_GLD(), slc, ws_gates_, states_t_lm1_, diff_w_input_,
-                 true, 1.0f);
-    }
-    // dh +=  dGr * Wh^t
-    (this->*gemm_state_func)(sic, batch, n_gates * dic, conf_.WI_GLD(),
-            n_gates * dic, batch, conf_.GC(), wic, batch, w_state_[0], ws_cell_,
-            diff_states_t_l_, false, 1.0f);
-
-    // dWh += dGr^t * h
-    gemm(n_gates * dic, sic, batch, conf_.GC(), batch, wic, batch,
-            conf_.DWL_GLD(), sic, ws_cell_, states_tm1_l_, diff_w_state_, true,
-            1.0f);
-
-    // db1-3 += e * dG
-    // db4 += e * (r * dG2)
-    gates_reduction(n_gates, dic, wic, batch, ws_gates_, diff_bias_);
-
-    parallel_nd(dic, [&](int j) {
-        for (int i = 0; i < batch; i++) {
-            diff_bias_[3 * dic + j] += ws_gates_r(i, 2 *dic + j);
-        }
-    });
-}
-
-template <>
-cell_execution_sig(_ref_rnn_common_t<prop_kind::backward>::cell_execution_gru) {
-    AOC<float, 2> ws_gates(ws_gates_, batch, conf_.GC());
-    AOC<const float, 2> states_tm1_l(states_tm1_l_, batch, wic);
-    AOC<float, 4> diff_states_t_l(diff_states_t_l_, n_states + 1, iter_stride, batch, wic);//dht-1 dxt
-    AOC<float, 3> diff_w_state(diff_w_state_, sic, conf_.GC());
-    AOC<float, 4> diff_states_tp1_l(
-        diff_states_tp1_l_, n_states + 1, iter_stride, batch, wic);
-    AOC<float, 4> diff_states_t_lp1(
-        diff_states_t_lp1_, n_states + 1, iter_stride, batch, wic);
-    //use state memory for intermediate computations
-    float *dhG1_ = &(diff_states_t_l(n_states, 0, 0, 0));
-    float *hG1_ = dhG1_;
-    AOC<float, 2> dhG1(dhG1_, batch, wic);
-    AOC<float, 2> hG1(hG1_, batch, wic);
-
-    // 1. calculate dG2, dG1, and part of dht-1
-    // dG2^ = dh * (1 - G0) * (1 - G2^2)
-    // dG0^ = dh * (ht-1 - G2) * u * (1 - G0)
-    // dht-1 (part) = dh * G0
-    parallel_nd(batch, [&](int i) {
-        PRAGMA_OMP_SIMD()
-        for (int j = 0; j < dic; j++) {
-            float h = states_tm1_l(i, j);
-            float dHt = diff_states_tp1_l(0, 0, i, j)
-                    + diff_states_t_lp1(n_states, 0, i, j);
-            float dG2 = (1.0f - ws_gates(i, 0 * dic + j)) * dHt
-                    * one_m_square(ws_gates(i, 2 * dic + j));
-            float dG0 = (h - ws_gates(i, 2 * dic + j)) * dHt
-                    * x_m_square(ws_gates(i, 0 * dic + j));
-
-            diff_states_t_l(0, 0, i, j) = dHt * ws_gates(i, 0 * dic + j);
-            ws_gates(i, 0 * dic + j) = dG0;
-            ws_gates(i, 2 * dic + j) = dG2;
-        }
-    });
-
-    //2. calculate intermediate d(hG1)
-    //d(hG1) = dG2 * W2h^t
-    (this->*gemm_state_func)(sic, batch, dic, conf_.WI_GLD(), n_gates * dic,
-            batch, conf_.GC(), wic, batch, w_state_[1], &(ws_gates(0, 2 * dic)),
-            dhG1_, false, 0.0f);
-
-    //3. calculate dG1^ and part of dht-1
-    //dG1^ = d(hG1) * h * G1 * (1 - G1)
-    //dht-1 (part) += d(hG1) * G1
-    //h * G1 (required for dWh)
-    parallel_nd(batch, [&](int i) {
-        PRAGMA_OMP_SIMD()
-        for (int j = 0; j < dic; j++) {
-            float h = states_tm1_l(i, j);
-            float G1 =  ws_gates(i, 1 * dic + j);
-            diff_states_t_l(0, 0, i, j) += dhG1(i, j) * G1;
-            ws_gates(i, 1 * dic + j) = dhG1(i, j) * h * x_m_square(G1);
-            hG1(i, j) = G1 * h;
-        }
-    });
-
-    //4. calculate diff weights
-    //dWh1 += dG1 * h, dWh2 += dG2 * h, dWh3 += dG3 * (G1(*)h)
-    gemm((n_gates - 1) * dic, sic, batch, conf_.GC(), batch, wic, batch,
-            conf_.DWI_GLD(), sic, ws_gates_, states_tm1_l_, diff_w_state_, true,
-            1.0f);
-    gemm(dic, sic, batch, conf_.GC(), batch, wic, batch, conf_.DWI_GLD(), sic,
-            &(ws_gates(0, 2 * dic)), hG1_, &(diff_w_state(0, 2 * dic)), true,
-            1.0f);
-
-    //5. calculate diff states
-    //dht-1 += dG1 * W1h + dG0 * W0h
-    (this->*gemm_state_func)(sic, batch, (n_gates - 1) * dic, conf_.WI_GLD(),
-            n_gates * dic, batch, conf_.GC(), wic, batch, w_state_[0],
-            ws_gates_, diff_states_t_l_, false, 1.0f);
-
-    if (!merge_gemm_layer) {
-        //dWx += [dG0 dG1 dG2] * [x]
-        gemm(n_gates * dic, slc, batch, conf_.GC(), batch, wic, batch,
-                conf_.DWL_GLD(), slc, ws_gates_, states_t_lm1_, diff_w_input_,
-                true, 1.0f);
-        //dx = dG2 * W2x + dG1 * W1x + dG0 * W0x
-        (this->*gemm_input_func)(slc, batch, n_gates * dic, conf_.WL_GLD(),
-                n_gates * dic, batch, conf_.GC(), wic, batch, w_input_[0],
-                ws_gates_, &(diff_states_t_l(n_states, 0, 0, 0)), false, 0.0f);
-    }
-
-    //6. calculate diff bias
-    gates_reduction(n_gates, dic, wic, batch, ws_gates_, diff_bias_);
-}
-
-//*************** Grid computations strategy: linear ***************//
-template <prop_kind_t aprop>
-grid_execution_sig(_ref_rnn_common_t<aprop>::linear_execution) {
-    AOC<float, 5> ws_states(ws_states_, n_layer + 1, n_direction, n_states, n_iter + 1,
-            batch * wic);
-    AOC<float, 5> ws_diff_states(ws_diff_states_, n_layer + 1, n_direction, (n_states + 1),
-            n_iter + 1, batch * wic);
-    AOC<float, 4> ws_gates(
-            ws_gates_, n_layer, n_direction, n_iter, batch * conf_.GC());
-    AOC<float *, 3> weights_input(weights_input_, n_layer, n_direction,
-            n_parts_wei_i);
-    AOC<float *, 3> weights_states(weights_states_, n_layer, n_direction,
-            n_parts_wei_st);
-    AOC<const float, 3> bias(bias_, n_layer, n_direction, n_bias * dic);
-    AOC<float, 3> diff_weights_layer(
-            diff_weights_layer_, n_layer, n_direction, slc * conf_.DWL_GLD());
-    AOC<float, 3> diff_weights_iter(
-            diff_weights_iter_, n_layer, n_direction, sic * conf_.DWI_GLD());
-    AOC<float, 3> diff_bias(diff_bias_, n_layer, n_direction, n_bias * dic);
-    AOC<float, 4> ws_grid(ws_grid_, n_layer, n_direction, n_iter, ws_per_cell);
-
-    // We run the grid of computation
-    for (int dir = 0; dir < n_direction; dir++) {
-        for (int j = 0; j < n_layer; j++) {
-            int lay = (aprop == prop_kind::forward) ? j : n_layer - j - 1;
-            if ((aprop == prop_kind::forward) && merge_gemm_layer) {
-                /* Assumption: merge_gemm_layer happens only on forward */
-                (this->*gemm_input_func)(n_gates * dic, batch * n_iter, slc,
-                        conf_.WL_GLD(), slc, batch * n_iter, wic, conf_.GC(),
-                        batch * n_iter, weights_input(lay, dir, 0),
-                        &(ws_states(lay, dir, 0, 1, 0)),
-                        &(ws_gates(lay, dir, 0, 0)), false, 0.0f);
-            }
-            for (int i = 0; i < n_iter; i++) {
-                int iter = (aprop == prop_kind::forward) ? i : n_iter - i - 1;
-                (this->*cell_func)(dic, slc, sic, wic, batch, n_gates, n_states, n_iter + 1,
-                        &(ws_states(lay + 1, dir, 0, iter + 1, 0)),
-                        &(ws_diff_states(lay, dir, 0, iter, 0)),
-                        &(weights_input(lay, dir, 0)),
-                        &(weights_states(lay, dir, 0)),
-                        &(bias(lay, dir, 0)),
-                        &(ws_states(lay, dir, 0, iter + 1, 0)),
-                        &(ws_states(lay + 1, dir, 0, iter, 0)),
-                        &(ws_diff_states(lay + 1, dir, 0, iter, 0)),
-                        &(ws_diff_states(lay, dir, 0, iter + 1, 0)),
-                        &(diff_weights_layer(lay, dir, 0)),
-                        &(diff_weights_iter(lay, dir, 0)),
-                        &(diff_bias(lay, dir, 0)),
-                        &(ws_gates(lay, dir, iter, 0)),
-                        &(ws_grid(lay, dir, iter, 0)),
-                        ws_cell_);
-            }
-            if ((aprop == prop_kind::backward) && merge_gemm_layer) {
-                (this->*gemm_input_func)(slc, batch * n_iter, n_gates * dic,
-                        conf_.WL_GLD(), n_gates * dic, batch * n_iter,
-                        conf_.GC(), wic, batch * n_iter,
-                        weights_input(lay, dir, 0), &(ws_gates(lay, dir, 0, 0)),
-                        &(ws_diff_states(lay, dir, n_states, 0, 0)), false,
-                        0.0f);
-                gemm(n_gates * dic, slc, batch * n_iter, conf_.GC(),
-                        batch * n_iter, wic, batch * n_iter, conf_.DWL_GLD(),
-                        slc, &(ws_gates(lay, dir, 0, 0)),
-                        &(ws_states(lay, dir, 0, 1, 0)),
-                        &(diff_weights_layer(lay, dir, 0)), true, 1.0f);
-            }
-            if ((aprop == prop_kind::backward) && merge_gemm_iter) {
-                gemm(n_gates * dic, sic, batch * n_iter, conf_.GC(),
-                        batch * n_iter, wic, batch * n_iter, conf_.DWI_GLD(),
-                        sic, &(ws_gates(lay, dir, 0, 0)),
-                        &(ws_states(lay + 1, dir, 0, 0, 0)),
-                        &(diff_weights_iter(lay, dir, 0)), true, 1.0f);
-            }
-        }
-    }
-}
-
-//********* GRID computations strategy: utility functions **********//
-
-template <>
-void _ref_rnn_common_t<prop_kind::forward>::copy_init_layer(bool lr, bool rl,
-        int n_layer, int n_direction, int n_iter, int batch, int slc, int dic,
-        int dlc, int wic, int n_states, float *ws_states_,
-        float *ws_diff_states_, const float *xt_,
-        const float *diff_dst_layer_) {
-    AOC<float, 5> ws_states(
-            ws_states_, n_direction, n_states, n_iter + 1, batch, wic);
-    auto xt_d = memory_desc_wrapper(conf_.src_pd(0));
-
-    parallel_nd(n_iter, [&](int it) {
-        auto xxt = xt_ + xt_d.blk_off(it);
-        if (lr)
-            for (int b = 0; b < batch; b++)
-                for (int c = 0; c < slc; c++)
-                    ws_states(0, 0, it + 1, b, c) = *(xxt + b * slc + c);
-        if (rl)
-            for (int b = 0; b < batch; b++)
-                for (int c = 0; c < slc; c++)
-                    ws_states(n_direction - 1, 0, n_iter - it, b, c)
-                            = *(xxt + b * slc + c);
-    });
-}
-
-template <>
-void _ref_rnn_common_t<prop_kind::backward>::copy_init_layer(bool lr, bool rl,
-        int n_layer, int n_direction, int n_iter, int batch, int slc, int dic,
-        int dlc, int wic, int n_states, float *ws_states_,
-        float *ws_diff_states_, const float *xt_,
-        const float *diff_dst_layer_) {
-    AOC<float, 6> ws_diff_states(ws_diff_states_, n_layer + 1, n_direction,
-            (n_states + 1), n_iter + 1, batch, wic);
-    auto diff_dst_layer_d = memory_desc_wrapper(conf_.diff_dst_pd(0));
-
-    switch (conf_.direction()) {
-    case mkldnn_bidirectional_concat:
-        parallel_nd(n_iter, batch, [&](int it, int b) {
-            auto diff_dst_layer_x
-            = diff_dst_layer_ + diff_dst_layer_d.blk_off(it, b);
-            for (int s = 0; s < dic; s++) {
-                ws_diff_states(n_layer, 0, n_states, it, b, s)
-                    = diff_dst_layer_x[s];
-                ws_diff_states(n_layer, 1, n_states, n_iter - it - 1, b, s)
-                    = diff_dst_layer_x[dic + s];
-            }
-        });
-        break;
-    case mkldnn_bidirectional_sum:
-        parallel_nd(n_iter, batch, [&](int it, int b) {
-            auto diff_dst_layer_x
-            = diff_dst_layer_ + diff_dst_layer_d.blk_off(it, b);
-            for (int s = 0; s < dic; s++) {
-                ws_diff_states(n_layer, 0, n_states, it, b, s)
-                    = diff_dst_layer_x[s];
-                ws_diff_states(n_layer, 1, n_states, n_iter - it - 1, b, s)
-                    = diff_dst_layer_x[s];
-            }
-        });
-        break;
-    case mkldnn_unidirectional_left2right:
-        parallel_nd(n_iter, batch, [&](int it, int b) {
-            auto diff_dst_layer_x
-                    = diff_dst_layer_ + diff_dst_layer_d.blk_off(it, b);
-            for (int s = 0; s < dic; s++) {
-                ws_diff_states(n_layer, 0, n_states, it, b, s)
-                        = diff_dst_layer_x[s];
-            }
-        });
-        break;
-    case mkldnn_unidirectional_right2left:
-        parallel_nd(n_iter, batch, [&](int it, int b) {
-            auto diff_dst_layer_x
-                    = diff_dst_layer_ + diff_dst_layer_d.blk_off(n_iter - it - 1, b);
-            for (int s = 0; s < dic; s++) {
-                ws_diff_states(n_layer, 0, n_states, it, b, s)
-                        = diff_dst_layer_x[s];
-            }
-        });
-        break;
-    default:
-        assert(!"Unsupported direction");
-        break;
-    }
-}
-
-template <>
-void _ref_rnn_common_t<prop_kind::forward>::copy_init_iter(int n_layer,
-        int n_direction, int n_states, int batch, int sic, int dic, int wic,
-        int n_iter, float *ws_states_, float *ws_diff_states_,
-        const float *firstit_states_, const float *diff_dst_iter_) {
-    AOC<float, 6> ws_states(ws_states_, n_layer + 1, n_direction, n_states,
-            n_iter + 1, batch, wic);
-    auto firstit_states_d = memory_desc_wrapper(conf_.src_pd(1));
-    if (firstit_states_) {
-        parallel_nd(n_layer, n_direction, [&](int lay, int dir) {
-            for (int state = 0; state < n_states; state++)
-                for (int b = 0; b < batch; ++b) {
-                    array_copy(&(ws_states(lay + 1, dir, state, 0, b, 0)),
-                        firstit_states_ + firstit_states_d.blk_off(
-                        lay, dir, state, b), sic);
-                }
-        });
-    } else {
-        parallel_nd(n_layer, n_direction, [&](int lay, int dir) {
-            for (int state = 0; state < n_states; state++)
-                for (int i = 0; i < batch; i++)
-                    for (int j = 0; j < sic; j++)
-                        ws_states(lay + 1, dir, state, 0, i, j) = 0.0f;
-        });
-    }
-}
-
-template <>
-void _ref_rnn_common_t<prop_kind::backward>::copy_init_iter(int n_layer,
-        int n_direction, int n_states, int batch, int sic, int dic, int wic,
-        int n_iter, float *ws_states_, float *ws_diff_states_,
-        const float *firstit_states_, const float *diff_dst_iter_) {
-    AOC<float, 6> ws_diff_states(ws_diff_states_, n_layer + 1, n_direction,
-            n_states + 1, n_iter + 1, batch, wic);
-    auto diff_dst_iter_d = memory_desc_wrapper(conf_.diff_dst_pd(1));
-    if (diff_dst_iter_) {
-        parallel_nd(n_layer, n_direction, n_states, batch,
-            [&](int lay, int dir, int state, int b) {
-            array_copy(&(ws_diff_states(lay, dir, state, n_iter, b, 0)),
-                diff_dst_iter_ + diff_dst_iter_d.blk_off(lay, dir, state, b),
-                dic);
-        });
-    } else {
-        parallel_nd(n_layer, n_direction, n_states, batch,
-            [&](int lay, int dir, int state, int i) {
-            for (int j = 0; j < dic; j++)
-                ws_diff_states(lay, dir, state, n_iter, i, j) = 0.0f;
-        });
-    }
-}
-
-template <>
-void _ref_rnn_common_t<prop_kind::forward>::copy_res_layer(bool lr, bool rl,
-        int n_layer, int n_direction, int n_iter, int batch,
-        int n_output_features, int slc, int dic, int wic, int n_states,
-        mkldnn_rnn_direction_t direction, float *dst_layer_,
-        float *diff_src_layer, const float *ws_states_,
-        const float *ws_diff_states_) {
-    auto dst_layer_d = memory_desc_wrapper(conf_.dst_pd(0));
-    AOC<const float, 6> ws_states(ws_states_, n_layer + 1, n_direction,
-            n_states, n_iter + 1, batch, wic);
-
-    parallel_nd(n_iter, batch, [&](int it, int b) {
-        int dir = 0;
-        if (lr) {
-            for (int s = 0; s < dic; s++)
-                dst_layer_[dst_layer_d.blk_off(it, b, dir * dic + s)]
-                        = ws_states(n_layer, dir, 0, it + 1, b, s);
-            dir = 1;
-        }
-        if (rl) {
-            for (int s = 0; s < dic; s++)
-                switch (direction) {
-                case mkldnn_bidirectional_sum:
-                    dst_layer_[dst_layer_d.blk_off(it, b, s)] += ws_states(
-                            n_layer, dir, 0, n_iter - it, b, s);
-                    break;
-                default:
-                    dst_layer_[dst_layer_d.blk_off(it, b, dir * dic + s)]
-                            = ws_states(n_layer, dir, 0, n_iter - it, b, s);
-                }
-        }
-    });
-}
-
-template <>
-void _ref_rnn_common_t<prop_kind::backward>::copy_res_layer(bool lr, bool rl,
-        int n_layer, int n_direction, int n_iter, int batch,
-        int n_output_features, int slc, int dic, int wic, int n_states,
-        mkldnn_rnn_direction_t direction, float *dst_layer_,
-        float *diff_src_layer_, const float *ws_states_,
-        const float *ws_diff_states_) {
-    auto diff_src_layer_d = memory_desc_wrapper(conf_.diff_src_pd(0));
-    AOC<const float, 6> ws_diff_states(ws_diff_states_, n_layer + 1,
-            n_direction, n_states + 1, n_iter + 1, batch, wic);
-
-    parallel_nd(n_iter, batch, [&](int it, int b) {
-        int dir = 0;
-        for (int s = 0; s < slc; s++) {
-            float *dst_addr = diff_src_layer_
-                    + diff_src_layer_d.blk_off(
-                              (direction
-                                      == mkldnn_unidirectional_right2left) ?
-                                      n_iter - 1 - it :
-                                      it,
-                              b, dir * slc + s);
-            float res = ws_diff_states(0, 0, n_states, it, b, s);
-            if (n_direction - 1)
-                res += ws_diff_states(
-                        0, 1, n_states, n_iter - 1 - it, b, s);
-            dst_addr[0] = res;
-        }
-    });
-}
-
-template <>
-void _ref_rnn_common_t<prop_kind::forward>::copy_res_iter(int n_layer,
-        int n_direction, int n_states, int batch, int sic, int dic, int wic,
-        int n_iter, float *dst_iter_, float *diff_src_iter_,
-        const float *ws_states_, const float *ws_diff_states_) {
-    auto dst_iter_d = memory_desc_wrapper(conf_.dst_pd(1));
-    AOC<const float, 6> ws_states(ws_states_, n_layer + 1, n_direction,
-            n_states, n_iter + 1, batch, wic);
-    if (dst_iter_) {
-        parallel_nd(n_layer, n_direction, n_states, batch,
-            [&](int lay, int dir, int state, int b) {
-            for (int s = 0; s < dic; s++) {
-                dst_iter_[dst_iter_d.blk_off(lay, dir, state, b, s)]
-                        = ws_states(lay + 1, dir, state, n_iter, b, s);
-            }
-        });
-    }
-}
-
-template <>
-void _ref_rnn_common_t<prop_kind::backward>::copy_res_iter(int n_layer,
-        int n_direction, int n_states, int batch, int sic, int dic, int wic,
-        int n_iter, float *dst_iter_, float *diff_src_iter_,
-        const float *ws_states_, const float *ws_diff_states_) {
-    auto diff_src_iter_d = memory_desc_wrapper(conf_.diff_src_pd(1));
-    AOC<const float, 6> ws_diff_states(ws_diff_states_, n_layer + 1,
-            n_direction, n_states + 1, n_iter + 1, batch, wic);
-    if (diff_src_iter_) {
-        parallel_nd(n_layer, n_direction, n_states, batch,
-            [&](int lay, int dir, int state, int b) {
-            for (int s = 0; s < sic; s++) {
-                diff_src_iter_[diff_src_iter_d.blk_off(
-                        lay, dir, state, b, s)]
-                        = ws_diff_states(lay, dir, state, 0, b, s);
-            }
-        });
-    }
-}
-
-template <prop_kind_t aprop>
-packing_sig(_ref_rnn_common_t<aprop>::pack_weights) {
-#if (USE_MKL_PACKED_GEMM)
-    AOC<const float, 5> w(
-            w_, n_layer, n_direction, IC_size, n_gates, OC_size);
-    AOC<float *, 3> weights(weights_, n_layer, n_direction, n_parts);
-    int m = 0, n = 0, k = 0;
-    auto transA = CblasNoTrans;
-    bool is_fwd = aprop == prop_kind::forward;
-    if (is_fwd) {
-        m = n_gates * OC_size;
-        n = batch;
-        k = IC_size;
-        //todo: do a transposition if ldgoi
-        transA = CblasNoTrans;
-    } else {
-        m = IC_size;
-        n = batch;
-        k = n_gates * OC_size;
-        //TODO: do a transposition if ldigo
-        transA = CblasNoTrans;
-    }
-    for (int i = 0; i < n_layer; i++) {
-        for (int d = 0; d < n_direction; d++) {
-            for (int p = 0; p < n_parts; p++) {
-                int m_p = is_fwd ? (gates_per_part[p] * OC_size) : m;
-                int k_p = is_fwd ? k : (gates_per_part[p] * OC_size);
-                int g = (p > 0) ? gates_per_part[p - 1] : 0;
-                weights(i, d, p) = cblas_sgemm_alloc(CblasAMatrix, m_p, n, k_p);
-                cblas_sgemm_pack(CblasColMajor, CblasAMatrix, transA, m_p, n,
-                        k_p, 1.0f, &(w(i, d, 0, g, 0)), m, weights(i, d, p));
-            }
-        }
-    }
-#else
-    UNUSED(n_layer);
-    UNUSED(n_direction);
-    UNUSED(n_weights);
-    UNUSED(n_gates);
-    UNUSED(n_parts);
-    UNUSED(gates_per_part);
-    UNUSED(batch);
-    UNUSED(OC_size);
-    UNUSED(IC_size);
-    UNUSED(weights_);
-    UNUSED(w_);
-    assert(!"packed gemm is disabled");
-#endif
-}
-
-template <prop_kind_t aprop>
-packing_sig(_ref_rnn_common_t<aprop>::no_pack_weights) {
-    AOC<const float, 3> w(
-            w_, n_layer, n_direction, IC_size * n_gates * OC_size);
-    AOC<float *, 3> weights(weights_, n_layer, n_direction, n_parts);
-    int m = 0, n = 0, ldA = 0;
-
-    bool is_fwd = aprop == prop_kind::forward;
-    if (is_fwd) {
-        m = n_gates * OC_size;
-        n = IC_size;
-        ldA = conf_.GC();
-    } else {
-        m = IC_size;
-        n = n_gates * OC_size;
-        ldA = conf_.WIC();
-    }
-
-    if (!do_copy) {
-        for (int i=0; i < n_layer; i++)
-            for (int d = 0; d < n_direction; d++) {
-                weights(i, d, 0) = (float *) &(w(i, d, 0));
-                for (int p = 1; p < n_parts; p++) {
-                    size_t offset = is_fwd
-                        ? gates_per_part[p - 1] * OC_size
-                        : gates_per_part[p - 1] * OC_size * IC_size;
-                    weights(i, d, p) = (float *) &w(i, d, offset);
-                }
-            }
-        return;
-    }
-
-    /* We always assume
-       - column major
-       - alpha = 1.0f
-    */
-    auto copy_matrix = [](char trans, int nrows, int ncols,
-            const float *src, const int ld_src, float *dst, const int ld_dst){
-        for (int i = 0; i < ncols; i++)
-            for (int j = 0; j < nrows; j++)
-                dst[i * ld_dst + j] = src[i * ld_src + j];
-    };
-
-    AOC<float, 3> tmp(scratch_mem, n_layer, n_direction, ldA * n);
-    mkldnn::impl::parallel_nd(n_layer, n_direction, [&](int i, int d) {
-            auto src_mat = &(w(i, d, 0));
-            auto dst_mat = &(tmp(i, d, 0));
-            copy_matrix('N', m, n, src_mat, m, dst_mat, ldA);
-            weights(i, d, 0) = &tmp(i, d, 0);
-            for (int p = 1; p < n_parts; p++) {
-                size_t offset = is_fwd
-                    ? gates_per_part[p - 1] * OC_size
-                    : gates_per_part[p - 1] * OC_size * conf_.WIC();
-                weights(i, d, p) = &tmp(i, d, offset);
-            }
-        });
-}
-
-
-template <prop_kind_t aprop>
-free_packed_sig(_ref_rnn_common_t<aprop>::free_packed_weights) {
-#if (USE_MKL_PACKED_GEMM)
-    AOC<float *, 3> weights(weights_, n_layer, n_direction, n_parts);
-    for (int i = 0; i < n_layer; i++)
-        for (int j = 0; j < n_direction; j++)
-            for (int k = 0; k < n_parts; k++)
-                cblas_sgemm_free(weights(i, j, k));
-#else
-    UNUSED(n_layer);
-    UNUSED(n_direction);
-    UNUSED(n_parts);
-    UNUSED(weights_);
-    assert(!"packed gemm is disabled");
-#endif
-}
-
-template <prop_kind_t aprop>
-free_packed_sig(_ref_rnn_common_t<aprop>::free_no_packed_weights) {
-    // IN this case, only scratchpad is used, so no free necessary
-}
-
-//********************* Execution function *********************//
-template <prop_kind_t aprop>
-void _ref_rnn_common_t<aprop>::execute_() {
-    int n_layer = conf_.L();
-    int n_direction = conf_.D();
-    int n_iter = conf_.T();
-    int n_gates = conf_.G();
-    int n_bias = n_gates + conf_.is_lbr();
-    int n_states = conf_.S();
-    int n_weights_input = conf_.SLC();
-    int n_weights_state = conf_.SIC();
-    int batch = conf_.MB();
-    int slc = conf_.SLC();
-    int sic = conf_.SIC();
-    int dic = conf_.DIC();
-    int dlc = conf_.DLC();
-    int wic = conf_.WIC();
-
-    bool is_orig_gru = conf_.cell_kind()
-        == alg_kind::vanilla_gru;
-    int n_parts_wei_st = is_orig_gru ? 2 : 1, n_parts_wei_i = 1;
-    int parts_wei_st = n_gates, parts_wei_i = n_gates,
-        parts_wei_st_gru[2] = {2, 1};
-    bool is_fwd = aprop == prop_kind::forward;
-    int ws_per_cell = conf_.ws_per_cell();
-
-    int input_idx = 0;
-    int output_idx = 0;
-    auto input
-            = reinterpret_cast<const float *>(this->input_memory(input_idx++));
-    auto states = conf_.with_src_iter() ?
-            reinterpret_cast<const float *>(this->input_memory(input_idx++)) :
-            nullptr;
-    auto w_input
-            = reinterpret_cast<const float *>(this->input_memory(input_idx++));
-    auto w_state
-            = reinterpret_cast<const float *>(this->input_memory(input_idx++));
-    auto bias = conf_.with_bias() ?
-            reinterpret_cast<const float *>(this->input_memory(input_idx++)) :
-            nullptr;
-
-    auto dst_last_layer = is_fwd ?
-            reinterpret_cast<float *>(this->memory(output_idx++)) :
-            const_cast<float *>(reinterpret_cast<const float *>(
-                    this->input_memory(input_idx++)));
-    auto dst_last_iter = conf_.with_dst_iter() ?
-            (is_fwd ? reinterpret_cast<float *>(this->memory(output_idx++)) :
-                      const_cast<float *>(reinterpret_cast<const float *>(
-                              this->input_memory(input_idx++)))) :
-            nullptr;
-
-    auto diff_dst_layer = is_fwd ?
-            nullptr :
-            reinterpret_cast<const float *>(this->input_memory(input_idx++));
-    auto diff_dst_iter = is_fwd || !conf_.with_dst_iter() ?
-            nullptr :
-            reinterpret_cast<const float *>(this->input_memory(input_idx++));
-
-    // fetchihg buffers from the workspace
-    // if no workspace was provided we use the scratchpad
-    float *scratch_ptr = ((float *)scratchpad_->get());
-    float *ws_ptr = nullptr;
-    if (use_workspace_)
-        ws_ptr = is_fwd ?
-            reinterpret_cast<float *>(this->memory(output_idx++)) :
-            const_cast<float *>(reinterpret_cast<const float *>(
-                    this->input_memory(input_idx++)));
-    float *base_ptr = use_workspace_ ? ws_ptr : scratch_ptr;
-    ws_gates_ = base_ptr + ws_gates_offset_;
-    ws_states_ = base_ptr + ws_states_offset_;
-    ws_diff_states_ = base_ptr + ws_diff_states_offset_;
-    ws_grid_ = base_ptr + ws_grid_comp_offset_;
-    ws_cell_ = base_ptr + ws_cell_comp_offset_;
-
-    auto diff_src_layer = is_fwd ?
-            nullptr :
-            reinterpret_cast<float *>(this->memory(output_idx++));
-    auto diff_src_iter = is_fwd || !conf_.with_src_iter() ?
-            nullptr :
-            reinterpret_cast<float *>(this->memory(output_idx++));
-    auto diff_weights_layer = is_fwd ?
-            nullptr :
-            reinterpret_cast<float *>(this->memory(output_idx++));
-    auto diff_weights_iter = is_fwd ?
-            nullptr :
-            reinterpret_cast<float *>(this->memory(output_idx++));
-    auto diff_bias = is_fwd || !conf_.with_bias() ?
-            nullptr :
-            reinterpret_cast<float *>(this->memory(output_idx++));
-
-    // Fetching extra buffers from scratchpad
-    ws_weights_layer_ = scratch_ptr + ws_weights_layer_offset_;
-    ws_weights_iter_ = scratch_ptr + ws_weights_iter_offset_;
-    ws_diff_weights_layer_ = scratch_ptr + ws_diff_weights_layer_offset_;
-    ws_diff_weights_iter_ = scratch_ptr + ws_diff_weights_iter_offset_;
-
-
-// initialize diff_states to 0
-    if (aprop == prop_kind::backward) {
-        array_set(ws_diff_states_, 0.0f, conf_.ws_diff_states_size());
-        // TODO: add a variable to check if good_ld_copy is necessary
-        if (copy_diff_weights_layer_) {
-            parallel_nd(conf_.ws_diff_weights_layer_size(), [&](size_t i) {
-                ws_diff_weights_layer_[i] = 0.;
-            });
-        } else
-            ws_diff_weights_layer_ = diff_weights_layer;
-        if (copy_diff_weights_iter_) {
-            parallel_nd(conf_.ws_diff_weights_iter_size(), [&](size_t i) {
-                ws_diff_weights_iter_[i] = 0.;
-            });
-        } else
-            ws_diff_weights_iter_ = diff_weights_iter;
-    }
-
-    // TODO: implement without copies
-    bool is_lr = !one_of(exec_dir, b2t_r2l, t2b_r2l);
-    bool is_rl = !one_of(exec_dir, b2t_l2r, t2b_l2r);
-    // we pack the weights if we are using the packed API
-    (this->*weights_state_pack_func)(n_layer, n_direction, n_weights_state,
-            n_gates, batch, dic, sic, ptr_wei_state_, n_parts_wei_st,
-            (is_orig_gru ? parts_wei_st_gru : &parts_wei_st), w_state,
-            ws_weights_iter_, copy_weights_iter_);
-    (this->*weights_input_pack_func)(n_layer, n_direction, n_weights_input,
-            n_gates, batch, dic, slc, ptr_wei_input_, n_parts_wei_i,
-            &parts_wei_i, w_input,
-            ws_weights_layer_, copy_weights_layer_);
-
-    // we first need to copy the initial states and input into ws
-    copy_init_layer(is_lr, is_rl, n_layer, n_direction, n_iter, batch, slc, dic,
-            dlc, wic, n_states, ws_states_, ws_diff_states_, input,
-            diff_dst_layer);
-    copy_init_iter(n_layer, n_direction, n_states, batch, sic, dic, wic, n_iter,
-            ws_states_, ws_diff_states_, states, diff_dst_iter);
-
-    // run the execution on the grid
-    (this->*grid_computation)(dic, slc, sic, wic, batch, n_layer, n_direction,
-            n_iter, n_gates, n_states, n_bias, ptr_wei_input_, n_parts_wei_i,
-            ptr_wei_state_, n_parts_wei_st, (float *)bias, ws_states_,
-            ws_diff_states_, ws_gates_, ws_cell_, ws_grid_, ws_per_cell,
-            ws_diff_weights_layer_, ws_diff_weights_iter_, diff_bias);
-
-    // Finally we copy the results to the result buffers
-    copy_res_layer(is_lr, is_rl, n_layer, n_direction, n_iter, batch,
-            n_output_features, slc, dic, wic, n_states, conf_.direction(),
-            dst_last_layer, diff_src_layer, ws_states_, ws_diff_states_);
-    copy_res_iter(n_layer, n_direction, n_states, batch, sic, dic, wic, n_iter,
-            dst_last_iter, diff_src_iter, ws_states_, ws_diff_states_);
-
-    // copy of the diff weights if bwd
-    if (aprop == prop_kind::backward){
-        // TODO: write an impl of matcopy in MKL-DNN
-        // TODO: support ldgoi using the trans parameters
-        AOC<float, 3> diff_weights_layer_aoc(diff_weights_layer, n_layer, n_direction, slc * n_gates * dic);
-        AOC<float, 3> diff_weights_iter_aoc(diff_weights_iter, n_layer, n_direction, sic * n_gates * dic);
-        AOC<float, 3> ws_diff_weights_layer_aoc(ws_diff_weights_layer_, n_layer, n_direction, slc * conf_.GC());
-        AOC<float, 3> ws_diff_weights_iter_aoc(ws_diff_weights_iter_, n_layer, n_direction, sic * conf_.GC());
-
-        /*
-           - assumes column major and non transposed matrices
-           - computes B = A + B
-        */
-        auto inplace_matadd = [=](const int nrows, const int ncols,
-                const float *A, const int ldA, float *B, const int ldB){
-            for(int i = 0; i < ncols; i++)
-                for(int j = 0; j < nrows; j++)
-                    B[i * ldB + j] += A[i * ldA + j];
-        };
-        mkldnn::impl::parallel_nd(n_layer, n_direction, [&](int i, int d) {
-            auto wei_lay = &(diff_weights_layer_aoc(i, d, 0));
-            auto wei_it = &(diff_weights_iter_aoc(i, d, 0));
-            auto ws_wei_lay = &(ws_diff_weights_layer_aoc(i, d, 0));
-            auto ws_wei_it = &(ws_diff_weights_iter_aoc(i, d, 0));
-            if (copy_diff_weights_layer_)
-                inplace_matadd(n_gates*dic, slc, ws_wei_lay, conf_.GC(),
-                        wei_lay, n_gates*dic);
-            if (copy_diff_weights_iter_)
-                inplace_matadd(n_gates*dic, sic, ws_wei_it, conf_.GC(),
-                        wei_it, n_gates*dic);
-        });
-    }
-
-    // We free the packed weights if they were packed internally
-    (this->*weights_state_free_packed_func)(n_layer, n_direction,
-            n_parts_wei_st, ptr_wei_state_);
-    (this->*weights_input_free_packed_func)(n_layer, n_direction,
-            n_parts_wei_i, ptr_wei_input_);
-};
-
-template struct _ref_rnn_common_t<prop_kind::forward>;
-template struct _ref_rnn_common_t<prop_kind::backward>;
-
-#undef AOC
-}
-}
-}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_rnn.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_rnn.hpp
deleted file mode 100644
index 703aa1834..000000000
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_rnn.hpp
+++ /dev/null
@@ -1,440 +0,0 @@
-/*******************************************************************************
-* Copyright 2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#ifndef CPU_REF_RNN_HPP
-#define CPU_REF_RNN_HPP
-
-#include <assert.h>
-
-#include "c_types_map.hpp"
-#include "cpu_engine.hpp"
-#include "cpu_rnn_pd.hpp"
-#include "cpu_isa_traits.hpp"
-#include "scratchpad.hpp"
-#include "type_helpers.hpp"
-#include "utils.hpp"
-
-#include "gemm/os_blas.hpp"
-
-namespace mkldnn {
-namespace impl {
-namespace cpu {
-
-#define elemwise_sig(f)                                                 \
-    void f(int dic, int wic, int batch, int n_states, int iter_stride, int n_gates, \
-            float *ws_gates_, float *states_t_l_, float *states_t_lm1_, \
-            float *states_tm1_l_, float *diff_states_t_l_,              \
-            float *diff_states_t_lp1_, float *diff_states_tp1_l_,       \
-            const float *bias_, float *ws_grid_, float *ws_cell_)
-
-#define cell_execution_sig(f)                                                 \
-    void f(int dic, int slc, int sic, int wic, int batch, int n_gates,        \
-            int n_states, int iter_stride, float *states_t_l_, float *diff_states_t_l_, \
-            float **w_input_, float **w_state_, const float *bias_,           \
-            float *states_t_lm1_, float *states_tm1_l_,                       \
-            float *diff_states_t_lp1_, float *diff_states_tp1_l_,             \
-            float *diff_w_input_, float *diff_w_state_, float *diff_bias_,    \
-            float *ws_gates_, float *ws_grid_, float *ws_cell_)
-
-#define grid_execution_sig(f)                                              \
-    void f(int dic, int slc, int sic, int wic, int batch, int n_layer,     \
-            int n_direction, int n_iter, int n_gates, int n_states,        \
-            int n_bias, float **weights_input_, int n_parts_wei_i,         \
-            float **weights_states_, int n_parts_wei_st,                   \
-            const float *bias_, float *ws_states_, float *ws_diff_states_, \
-            float *ws_gates_, float *ws_cell_, float *ws_grid_,            \
-            int ws_per_cell, float *diff_weights_layer_,                   \
-            float *diff_weights_iter_, float *diff_bias_)
-
-#define gemm_sig(f)                                                          \
-    void f(int m, int n, int k, int strideA_m, int strideA_k, int strideB_n, \
-            int strideB_k, int strideC_m, int strideC_n, const float *a_,    \
-            float *b_, float *c_, bool is_B_trans, float beta)
-
-#define packing_sig(f)                                               \
-    void f(int n_layer, int n_direction, int n_weights, int n_gates, \
-            int batch, int OC_size, int IC_size, float **weights_,   \
-            int n_parts, int *gates_per_part, const float *w_,       \
-            float * scratch_mem, bool do_copy)
-
-#define free_packed_sig(f) void f(int n_layer, int n_direction, int n_parts, \
-            float **weights_)
-
-template <alg_kind_t alg_kind, prop_kind_t prop_kind>
-float activation(float s, float alpha, float cliping, float dd);
-
-template <prop_kind_t aprop>
-struct _ref_rnn_common_t : public cpu_primitive_t {
-    using class_name = _ref_rnn_common_t<aprop>;
-    typedef enum execution_direction_ {
-        b2t_l2r,
-        b2t_r2l,
-        b2t_bi_concat,
-        b2t_bi_sum,
-        t2b_l2r,
-        t2b_r2l,
-        t2b_bi_concat,
-        t2b_bi_sum
-    } execution_direction;
-    typedef elemwise_sig((class_name::*elemwise_f));
-    typedef cell_execution_sig((class_name::*cell_execution_f));
-    typedef grid_execution_sig((class_name::*grid_execution_f));
-
-    typedef gemm_sig((class_name::*gemm_t));
-    typedef packing_sig((class_name::*packing_t));
-    typedef free_packed_sig((class_name::*free_packed_t));
-
-    using base_pd_t =
-            typename utils::conditional<false || aprop == prop_kind::forward,
-                    cpu_rnn_fwd_pd_t, cpu_rnn_bwd_pd_t>::type;
-
-    struct pd_t : public base_pd_t {
-        pd_t(engine_t *engine, const rnn_desc_t *adesc,
-                const primitive_attr_t *attr,
-                const typename pd_t::base_class *hint_pd)
-            : base_pd_t(engine, adesc, attr, hint_pd) {}
-
-        DECLARE_COMMON_PD_T("ref:any", class_name);
-
-        status_t init() {
-            using namespace prop_kind;
-            using namespace utils;
-            using namespace memory_format;
-            assert(this->engine()->kind() == engine_kind::cpu);
-            const alg_kind_t cell_kind = this->desc()->cell_desc.cell_kind;
-
-            bool ok = true
-                    && one_of(cell_kind, alg_kind::vanilla_rnn,
-                               alg_kind::vanilla_lstm, alg_kind::vanilla_gru,
-                               alg_kind::gru_linear_before_reset)
-                    && IMPLICATION(aprop == prop_kind::forward,
-                               one_of(this->desc()->prop_kind, forward_training,
-                                       forward_inference))
-                    && IMPLICATION(aprop == backward,
-                               one_of(this->desc()->prop_kind, backward))
-                    && this->set_default_params() == status::success;
-            if (!ok)
-                return status::unimplemented;
-
-            ok = ok && utils::one_of(cell_kind, alg_kind::vanilla_rnn,
-                               alg_kind::vanilla_lstm, alg_kind::vanilla_gru,
-                               alg_kind::gru_linear_before_reset);
-
-            /// @todo check data layouts for all input tensors
-            ok = ok && this->desc()->src_layer_desc.format == tnc
-                    && this->desc()->dst_layer_desc.format == tnc;
-
-            ok = ok && this->with_bias();
-            switch (aprop) {
-            case (prop_kind::forward):
-                ok = ok && utils::one_of(this->desc()->prop_kind,
-                                   forward_training, forward_inference);
-                ok = ok && utils::one_of(
-                                   this->desc()->weights_layer_desc.format, any,
-                                   ldigo, ldigo_p)
-                        && utils::one_of(this->desc()->weights_iter_desc.format,
-                                   any, ldigo, ldigo_p);
-                break;
-            case (prop_kind::backward):
-                ok = ok && utils::one_of(this->desc()->prop_kind, backward);
-                ok = ok && utils::one_of(
-                                   this->desc()->weights_layer_desc.format, any,
-                                   ldgoi, ldgoi_p)
-                        && utils::one_of(this->desc()->weights_iter_desc.format,
-                                   any, ldgoi, ldgoi_p);
-                break;
-            default: ok = false;
-            }
-
-            // Check dimensions consistency
-            int ls_multiplier
-                    = (this->direction() == mkldnn_bidirectional_concat) ? 2 :
-                                                                           1;
-
-            ok = ok && (ls_multiplier * this->DIC() == this->DLC())
-                    && ((ls_multiplier * this->SLC()) == this->DLC()
-                               || (this->L() == 1))
-                    && (this->SIC() == this->DIC() || (this->T() == 1));
-
-            // initialize the workspace_pd if needed
-            if (this->desc()->prop_kind != forward_inference){
-                dims_t ws_dims = { (dim_t)this->get_ws_size() };
-                memory_desc_t ws_d;
-                mkldnn_memory_desc_init(
-                        &ws_d, 1, ws_dims, impl::data_type::f32, memory_format::x);
-                this->ws_pd_ = cpu_memory_t::pd_t(this->engine(), &ws_d);
-            }
-
-            return ok ? status::success : status::unimplemented;
-        }
-    };
-
-    _ref_rnn_common_t(const pd_t *pd, const input_vector &inputs,
-            const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {
-        /// @todo set max_feature_size assuming that we limit the number of
-        /// iterations and layer to one if slc != dic and sic != dic
-        /// respectively
-
-        memory_format_t packed_format;
-        switch (aprop) {
-        case prop_kind::forward_inference:
-        case prop_kind::forward_training:
-            packed_format = memory_format::ldigo_p;
-            break;
-        case prop_kind::backward: packed_format = memory_format::ldgoi_p; break;
-        default: assert(false);
-        }
-
-        merge_gemm_layer = ((aprop == prop_kind::forward) && (conf_.MB() < 128))
-            || (aprop == prop_kind::backward);
-        merge_gemm_iter = (aprop == prop_kind::backward)
-                && (!utils::one_of(conf_.cell_kind(), alg_kind::vanilla_gru,
-                            alg_kind::gru_linear_before_reset));
-        auto set_pack_funcs = [](bool packed_gemm, gemm_t &g, bool pack_w,
-                packing_t &p, free_packed_t &f) {
-            g = packed_gemm ? &class_name::packed_gemm : &class_name::gemm;
-            p = pack_w ? &class_name::pack_weights :
-                             &class_name::no_pack_weights;
-            f = pack_w ? &class_name::free_packed_weights :
-                             &class_name::free_no_packed_weights;
-        };
-#ifdef USE_MKL_PACKED_GEMM
-        const bool weights_pack_cond =
-            (conf_.T() > 1) && (conf_.MB() == 32) &&
-            (conf_.SIC() == 512) &&(conf_.SLC() == 512) && (conf_.DIC() == 512);
-#else
-        const bool weights_pack_cond = false;
-#endif
-
-        const bool is_weights_state_packed = conf_.desc()->weights_iter_desc.format == packed_format;
-        set_pack_funcs(weights_pack_cond || is_weights_state_packed,
-                gemm_state_func, weights_pack_cond && !is_weights_state_packed,
-                weights_state_pack_func, weights_state_free_packed_func);
-
-        const bool is_weights_input_packed = conf_.desc()->weights_layer_desc.format == packed_format;
-        set_pack_funcs(weights_pack_cond || is_weights_input_packed,
-                gemm_input_func, weights_pack_cond && !is_weights_input_packed,
-                weights_input_pack_func, weights_input_free_packed_func);
-
-        switch (conf_.cell_kind()) {
-        case alg_kind::vanilla_lstm:
-            cell_func = &class_name::cell_execution;
-            elemwise_func = &class_name::lstm_elemwise;
-            break;
-        case alg_kind::vanilla_rnn: // @todo switch on cell kind
-            cell_func = &class_name::cell_execution;
-            elemwise_func = &class_name::rnn_elemwise;
-            switch (conf_.activation_kind()) {
-            case alg_kind::eltwise_relu:
-                activation_func = &activation<alg_kind::eltwise_relu, aprop>;
-                break;
-            case alg_kind::eltwise_tanh:
-                activation_func = &activation<alg_kind::eltwise_tanh, aprop>;
-                break;
-            case alg_kind::eltwise_logistic:
-                activation_func = &activation<alg_kind::eltwise_logistic, aprop>;
-                break;
-            default: break;
-            }
-            break;
-        case alg_kind::vanilla_gru:
-            cell_func = &class_name::cell_execution_gru;
-            break;
-        case alg_kind::gru_linear_before_reset:
-            cell_func = &class_name::cell_execution_gru_lbr;
-            elemwise_func = &class_name::gru_lbr_elemwise;
-            break;
-        default: break;
-        }
-
-        n_output_features
-                = (conf_.direction() == mkldnn_bidirectional_concat) ? 2 : 1;
-        switch (conf_.direction()) {
-        case mkldnn_unidirectional_left2right: exec_dir = b2t_l2r; break;
-        case mkldnn_unidirectional_right2left: exec_dir = b2t_r2l; break;
-        case mkldnn_bidirectional_concat: exec_dir = b2t_bi_concat; break;
-        case mkldnn_bidirectional_sum: exec_dir = b2t_bi_sum; break;
-        default: break;
-        }
-
-        /// @todo put a heuristic to choose between linear execution and
-        /// wavefront
-        grid_computation = &class_name::linear_execution;
-
-        // we need to allocate memory for:
-        // - the states to compute a pass.
-        // - the intermediate results from the gates.
-        // - the diff_states to compute the backward pass (training only)
-        // These should be allocated on scratchpad if fwd inference
-        // or on a workspace provided by the user for training.
-        /// @todo shall we require the workspace for training or make it
-        /// optional?
-
-        // if no worskpace is provided on forward, we use a scratchpad
-        // NOTE: here we use a large worskpace for simplicity:
-        // - for states:
-        //   - TODO: allocate only n_iter * dic + dic for linear execution
-        //   (inference)
-        //   - TODO: allocate only n_layer_wav * (2*dic) for wavefront
-        //   execution (inference)
-        // - for gates:
-        //   - TODO: allocate only batch * n_gates * dic for linear execution
-        //   (inference)
-        //   = TODO: allocate only n_layer_wav * batch * n_gates * dic for
-        //   wavefront execution (inference)
-
-        use_jit_sgemm_ = ((aprop == prop_kind::forward_inference)
-            || (conf_.is_training() && conf_.DIC() < 500))
-            && !mayiuse(avx512_mic);
-
-        copy_weights_layer_ = (conf_.WL_LD() != conf_.WL_GLD());
-        copy_weights_iter_ = (conf_.WI_LD() != conf_.WI_GLD());
-
-        copy_diff_weights_layer_ = (aprop == prop_kind::backward)
-                && (conf_.DWL_LD() != conf_.DWL_GLD());
-        copy_diff_weights_iter_ = (aprop == prop_kind::backward)
-                && (conf_.DWI_LD() != conf_.DWI_GLD());
-
-        use_workspace_ = (conf_.desc()->prop_kind != prop_kind::forward_inference);
-
-        size_t scratchpad_size = conf_.set_offsets(use_workspace_,
-            ws_gates_offset_, ws_states_offset_,  ws_diff_states_offset_,
-            ws_grid_comp_offset_,
-            conf_.is_lbr(), ws_cell_comp_offset_,
-            copy_weights_layer_, ws_weights_layer_offset_,
-            copy_weights_iter_, ws_weights_iter_offset_,
-            copy_diff_weights_layer_, ws_diff_weights_layer_offset_,
-            copy_diff_weights_iter_, ws_diff_weights_iter_offset_);
-
-        scratchpad_ =
-            create_scratchpad(scratchpad_size * sizeof(float));
-
-        int max_nparts = (conf_.cell_kind() == alg_kind::vanilla_gru) ? 2 : 1;
-        int ptr_wei_sz = conf_.L() * conf_.D() * max_nparts;
-        ptr_wei_input_ = (float **)malloc(sizeof(float *) * ptr_wei_sz, 64);
-        ptr_wei_state_ = (float **)malloc(sizeof(float *) * ptr_wei_sz, 64);
-    }
-    ~_ref_rnn_common_t() {
-        delete scratchpad_;
-        free(ptr_wei_input_);
-        free(ptr_wei_state_);
-    }
-
-    // typedef typename prec_traits::type data_t;
-
-    virtual void execute(event_t *e) {
-        execute_();
-        e->set_state(event_t::ready);
-    }
-
-private:
-    void execute_();
-    grid_execution_sig(linear_execution);
-    // grid_execution_sig(wavefront_execution);
-    cell_execution_sig(cell_execution);
-    cell_execution_sig(cell_execution_gru);
-    cell_execution_sig(cell_execution_gru_lbr);
-    elemwise_sig(rnn_elemwise);
-    elemwise_sig(lstm_elemwise);
-    elemwise_sig(gru_lbr_elemwise);
-    gemm_sig(gemm);
-    gemm_sig(packed_gemm);
-    packing_sig(pack_weights);
-    packing_sig(no_pack_weights);
-    free_packed_sig(free_packed_weights);
-    free_packed_sig(free_no_packed_weights);
-
-    float (*activation_func)(float dd, float s, float alpha, float cliping);
-
-    void copy_init_layer(bool lr, bool rl, int n_direction, int n_layer,
-            int n_iter, int batch, int slc, int dic, int dlc, int wic,
-            int n_states, float *ws_states_, float *ws_diff_states_,
-            const float *xt_, const float *diff_dst_layer);
-    void copy_init_iter(int n_layer, int n_direction, int n_states, int batch,
-            int sic, int dic, int wic, int n_iter, float *ws_states_,
-            float *ws_diff_states_, const float *firstit_states_,
-            const float *diff_dst_iter);
-    void copy_res_layer(bool lr, bool rl, int n_layer, int n_direction,
-            int n_iter, int batch, int n_output_features, int slc, int dlc,
-            int wic, int n_states, mkldnn_rnn_direction_t direction,
-            float *dst_layer_, float *diff_src_layer, const float *ws_states_,
-            const float *ws_diff_states_);
-    void copy_res_iter(int n_layer, int n_direction, int n_states, int batch,
-            int sic, int dic, int wic, int n_iter, float *dst_iter_,
-            float *diff_src_iter, const float *ws_states_,
-            const float *ws_diff_states_);
-    void gates_reduction(int n_gates, int dic, int wic, int batch,
-            const float *ws_gates_, float *diff_bias_);
-    pd_t conf_;
-    bool use_workspace_;
-    scratchpad_t *scratchpad_;
-
-    size_t ws_gates_offset_;
-    size_t ws_states_offset_;
-    size_t ws_weights_layer_offset_;
-    size_t ws_weights_iter_offset_;
-    size_t ws_diff_states_offset_;
-    size_t ws_diff_weights_layer_offset_;
-    size_t ws_diff_weights_iter_offset_;
-    size_t ws_grid_comp_offset_;
-    size_t ws_cell_comp_offset_;
-
-    float *ws_gates_;
-    float *ws_states_;
-    float *ws_diff_states_;
-    float *ws_cell_;
-    float *ws_grid_;
-    float *ws_weights_layer_;
-    float *ws_weights_iter_;
-    float *ws_diff_weights_layer_;
-    float *ws_diff_weights_iter_;
-    int n_output_features;
-
-    float **ptr_wei_input_;
-    float **ptr_wei_state_;
-
-    execution_direction exec_dir;
-    grid_execution_f grid_computation;
-    cell_execution_f cell_func;
-
-    bool copy_weights_layer_;
-    bool copy_weights_iter_;
-    bool copy_diff_weights_layer_;
-    bool copy_diff_weights_iter_;
-    bool merge_gemm_layer;
-    bool merge_gemm_iter;
-    bool use_jit_sgemm_;
-
-    packing_t weights_input_pack_func;
-    packing_t weights_state_pack_func;
-
-    gemm_t gemm_input_func;
-    gemm_t gemm_state_func;
-    elemwise_f elemwise_func;
-
-    free_packed_t weights_input_free_packed_func;
-    free_packed_t weights_state_free_packed_func;
-};
-
-using ref_rnn_fwd_t = _ref_rnn_common_t<prop_kind::forward>;
-using ref_rnn_bwd_t = _ref_rnn_common_t<prop_kind::backward>;
-}
-}
-}
-#endif
-
-// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_roi_pooling.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_roi_pooling.cpp
index 2d8188d47..e8806cbd8 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_roi_pooling.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_roi_pooling.cpp
@@ -31,20 +31,20 @@ namespace impl {
 namespace cpu {
 
 template <impl::data_type_t data_type>
-void ref_roi_pooling_fwd_t<data_type>::execute_forward_generic() {
+void ref_roi_pooling_fwd_t<data_type>::execute_forward_generic() const {
     int roi_idx = 1;
     int data_idx = 0;
 
-    const memory_desc_wrapper dst_d(conf_.dst_pd());
-    memory_desc_wrapper src_data_d = conf_.src_pd(data_idx);
-    memory_desc_wrapper src_roi_d = conf_.src_pd(roi_idx);
+    const memory_desc_wrapper dst_d(pd()->dst_pd());
+    memory_desc_wrapper src_data_d = pd()->src_pd(data_idx);
+    memory_desc_wrapper src_roi_d = pd()->src_pd(roi_idx);
 
     if (src_roi_d.dims()[0] < src_data_d.dims()[0]) {
         roi_idx = 0;
         data_idx = 1;
 
-        src_data_d = conf_.src_pd(data_idx);
-        src_roi_d = conf_.src_pd(roi_idx);
+        src_data_d = pd()->src_pd(data_idx);
+        src_roi_d = pd()->src_pd(roi_idx);
     }
 
     auto dst = reinterpret_cast<data_t*>(this->memory(0));
@@ -57,9 +57,9 @@ void ref_roi_pooling_fwd_t<data_type>::execute_forward_generic() {
 
     int ROIS = src_roi_d.dims()[0];
 
-    double spatial_scale = conf_.spatialScale();
-    int pooled_h = conf_.pooledH();
-    int pooled_w = conf_.pooledW();
+    double spatial_scale = pd()->spatialScale();
+    int pooled_h = pd()->pooledH();
+    int pooled_w = pd()->pooledW();
 
     for (size_t i = 0; i < dst_d.size() / sizeof(data_t); i++) {
         dst[i] = -FLT_MAX;
@@ -94,7 +94,7 @@ void ref_roi_pooling_fwd_t<data_type>::execute_forward_generic() {
         const data_t* src_roi_ptr = &src_roi[roi_off];
         int roi_batch_ind = src_roi_ptr[0];
 
-        if (conf_.desc()->alg_kind == mkldnn_roi_pooling_max) {
+        if (pd()->desc()->alg_kind == mkldnn_roi_pooling_max) {
             int roi_start_w = round(src_roi_ptr[1] * spatial_scale);
             int roi_start_h = round(src_roi_ptr[2] * spatial_scale);
             int roi_end_w = round(src_roi_ptr[3] * spatial_scale);
@@ -152,7 +152,7 @@ void ref_roi_pooling_fwd_t<data_type>::execute_forward_generic() {
                     }
                 }
             }
-        } else if (conf_.desc()->alg_kind == mkldnn_roi_pooling_bilinear) {
+        } else if (pd()->desc()->alg_kind == mkldnn_roi_pooling_bilinear) {
             float roi_start_w_ = src_roi_ptr[1];
             float roi_start_h_ = src_roi_ptr[2];
             float roi_end_w_   = src_roi_ptr[3];
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_roi_pooling.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_roi_pooling.hpp
index 5bcc56adc..afb66612f 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_roi_pooling.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_roi_pooling.hpp
@@ -52,22 +52,22 @@ struct ref_roi_pooling_fwd_t: public cpu_primitive_t {
         }
     };
 
-    ref_roi_pooling_fwd_t(const pd_t *pd, const input_vector &inputs,
-                          const output_vector &outputs)
-            : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) { }
+    ref_roi_pooling_fwd_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) { }
 
     typedef typename prec_traits<data_type>::type data_t;
 
     ~ref_roi_pooling_fwd_t() { }
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute_forward_generic();
         e->set_state(event_t::ready);
     }
     
 private:
-    void execute_forward_generic();
-    pd_t conf_;
+    void execute_forward_generic() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_shuffle.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_shuffle.cpp
index 42234e947..89eb24e4d 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_shuffle.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_shuffle.cpp
@@ -31,27 +31,27 @@ using namespace memory_format;
 
 template <int data_type_size>
 template <mkldnn_memory_format_t fmt>
-void ref_shuffle_t<data_type_size>::execute_() {
+void ref_shuffle_t<data_type_size>::execute_() const {
     using namespace prop_kind;
     using namespace utils;
 
-    const memory_desc_wrapper data_d(conf_.data_pd());
+    const memory_desc_wrapper data_d(pd()->data_pd());
 
     auto input = reinterpret_cast<const data_t*>(this->input_memory(0));
     auto output = reinterpret_cast<data_t*>(this->memory(0));
 
-    const int axis = conf_.axis();
-    const int axis_size = conf_.axis_size();
+    const int axis = pd()->axis();
+    const int axis_size = pd()->axis_size();
 
-    const int MB = conf_.MB();
-    const int C = conf_.C();
+    const int MB = pd()->MB();
+    const int C = pd()->C();
     int H = 1, W = 1, D = 1, HW = 1, SP = 1;
     const bool has_spatial = utils::one_of(data_d.ndims(), 3, 4 ,5);
     if (has_spatial)
     {
-        D = conf_.D();
-        H = conf_.H();
-        W = conf_.W();
+        D = pd()->D();
+        H = pd()->H();
+        W = pd()->W();
         HW = H * W;
         SP = D * HW;
     }
@@ -107,8 +107,8 @@ void ref_shuffle_t<data_type_size>::execute_() {
             }
         });
     } else {
-        auto dims = conf_.desc()->data_desc.dims;
-        auto ndims = conf_.desc()->data_desc.ndims;
+        auto dims = pd()->desc()->data_desc.dims;
+        auto ndims = pd()->desc()->data_desc.ndims;
         const size_t outer_size = utils::array_product(dims, axis);
         const size_t inner_size = utils::array_product(dims + axis + 1,
                                          ndims - axis - 1);
@@ -124,25 +124,25 @@ void ref_shuffle_t<data_type_size>::execute_() {
     }
 }
 
-template void ref_shuffle_t<4>::execute_<nCdhw16c>();
-template void ref_shuffle_t<4>::execute_<nChw16c>();
-template void ref_shuffle_t<4>::execute_<nCdhw8c>();
-template void ref_shuffle_t<4>::execute_<nChw8c>();
-template void ref_shuffle_t<4>::execute_<ncdhw>();
-template void ref_shuffle_t<4>::execute_<nchw>();
-template void ref_shuffle_t<4>::execute_<ndhwc>();
-template void ref_shuffle_t<4>::execute_<nhwc>();
-template void ref_shuffle_t<4>::execute_<any>();
-
-template void ref_shuffle_t<1>::execute_<nCdhw16c>();
-template void ref_shuffle_t<1>::execute_<nChw16c>();
-template void ref_shuffle_t<1>::execute_<nCdhw8c>();
-template void ref_shuffle_t<1>::execute_<nChw8c>();
-template void ref_shuffle_t<1>::execute_<ncdhw>();
-template void ref_shuffle_t<1>::execute_<nchw>();
-template void ref_shuffle_t<1>::execute_<ndhwc>();
-template void ref_shuffle_t<1>::execute_<nhwc>();
-template void ref_shuffle_t<1>::execute_<any>();
+template void ref_shuffle_t<4>::execute_<nCdhw16c>() const;
+template void ref_shuffle_t<4>::execute_<nChw16c>() const;
+template void ref_shuffle_t<4>::execute_<nCdhw8c>() const;
+template void ref_shuffle_t<4>::execute_<nChw8c>() const;
+template void ref_shuffle_t<4>::execute_<ncdhw>() const;
+template void ref_shuffle_t<4>::execute_<nchw>() const;
+template void ref_shuffle_t<4>::execute_<ndhwc>() const;
+template void ref_shuffle_t<4>::execute_<nhwc>() const;
+template void ref_shuffle_t<4>::execute_<any>() const;
+
+template void ref_shuffle_t<1>::execute_<nCdhw16c>() const;
+template void ref_shuffle_t<1>::execute_<nChw16c>() const;
+template void ref_shuffle_t<1>::execute_<nCdhw8c>() const;
+template void ref_shuffle_t<1>::execute_<nChw8c>() const;
+template void ref_shuffle_t<1>::execute_<ncdhw>() const;
+template void ref_shuffle_t<1>::execute_<nchw>() const;
+template void ref_shuffle_t<1>::execute_<ndhwc>() const;
+template void ref_shuffle_t<1>::execute_<nhwc>() const;
+template void ref_shuffle_t<1>::execute_<any>() const;
 
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_shuffle.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_shuffle.hpp
index 763bbaab6..cd653dc20 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_shuffle.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_shuffle.hpp
@@ -53,15 +53,15 @@ struct ref_shuffle_t : public cpu_primitive_t {
         }
     };
 
-    ref_shuffle_t(const pd_t *pd, const input_vector &inputs,
+    ref_shuffle_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd)
+        : cpu_primitive_t(apd, inputs, outputs)
     {
-        const int axis_size = conf_.axis_size();
-        const int group_size = conf_.group_size();
-        const int transpose_row = conf_.is_fwd() ? group_size
+        const int axis_size = pd()->axis_size();
+        const int group_size = pd()->group_size();
+        const int transpose_row = pd()->is_fwd() ? group_size
                                                  : axis_size / group_size;
-        const int transpose_col = conf_.is_fwd() ? axis_size / group_size
+        const int transpose_col = pd()->is_fwd() ? axis_size / group_size
                                                  : group_size;
         rev_transposed_ = (int *)malloc(axis_size * sizeof(int), 64);
         parallel_nd(transpose_col, transpose_row, [&](int i, int j) {
@@ -73,9 +73,9 @@ struct ref_shuffle_t : public cpu_primitive_t {
 
     typedef typename typesize_traits<data_type_size>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         using namespace memory_format;
-        switch (conf_.data_pd()->desc()->format) {
+        switch (pd()->data_pd()->desc()->format) {
         case nCdhw16c: execute_<nCdhw16c>(); break;
         case nChw16c:  execute_<nChw16c>(); break;
         case nCdhw8c:  execute_<nCdhw8c>(); break;
@@ -91,8 +91,8 @@ struct ref_shuffle_t : public cpu_primitive_t {
     }
 
 private:
-    template<memory_format_t fmt>void execute_();
-    pd_t conf_;
+    template<memory_format_t fmt>void execute_() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     int *rev_transposed_;
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_softmax.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_softmax.cpp
index a65632f06..30b3299c0 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_softmax.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_softmax.cpp
@@ -23,10 +23,10 @@
 #include "mkldnn_thread.hpp"
 
 #include "ref_softmax.hpp"
+#include "gemm/os_blas.hpp"
 
 #ifdef USE_MKL
 #include "mkl_vml_functions.h"
-#include "mkl_cblas.h"
 #endif
 
 namespace mkldnn {
@@ -34,11 +34,11 @@ namespace impl {
 namespace cpu {
 
 template <impl::data_type_t data_type>
-void ref_softmax_fwd_t<data_type>::execute_forward_dense() {
+void ref_softmax_fwd_t<data_type>::execute_forward_dense() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t *>(this->memory(0));
 
-    outer_size_ = utils::array_product(conf_.src_pd()->desc()->dims, conf_.desc()->softmax_axis);
+    int outer_size_ = utils::array_product(pd()->src_pd()->desc()->dims, pd()->desc()->softmax_axis);
 
     if (outer_size_ == 1) {
         for (int ou = 0; ou < outer_size_; ou++) {
@@ -68,60 +68,112 @@ void ref_softmax_fwd_t<data_type>::execute_forward_dense() {
 }
 
 template <impl::data_type_t data_type>
-void ref_softmax_fwd_t<data_type>::execute_forward_generic() {
+void ref_softmax_fwd_t<data_type>::execute_forward_generic() const {
     auto src = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto dst = reinterpret_cast<data_t *>(this->memory(0));
 
-    const memory_desc_wrapper data_d(conf_.src_pd());
+    data_t space_max_val = 0, space_denom_val = 0;
+    data_t *space_max = &space_max_val, *space_denom = &space_denom_val;
+    if (inner_size_ > 1) {
+        using namespace memory_tracking::names;
+        space_max = scratchpad().template get<data_t>(key_softmax_reduction);
+        space_denom = space_max + inner_size_;
+    }
+
+    const memory_desc_wrapper data_d(pd()->src_pd());
     const size_t dim = channels_ * inner_size_;
 
-    outer_size_ = utils::array_product(conf_.src_pd()->desc()->dims, conf_.desc()->softmax_axis);
+    int outer_size_ = utils::array_product(pd()->src_pd()->desc()->dims, pd()->desc()->softmax_axis);
 
     for (int ou = 0; ou < outer_size_; ou++) {
-        utils::array_set(max_, -FLT_MAX, inner_size_);
-        utils::array_set(denom_, 0, inner_size_);
+        utils::array_set(space_max, -FLT_MAX, inner_size_);
+        utils::array_set(space_denom, 0, inner_size_);
 
         for (int c = 0; c < channels_; c++) {
             for(int in = 0; in < inner_size_; in++) {
                 size_t off = data_d.off_l(ou * dim + c * inner_size_ + in);
-                max_[in] = nstl::max(max_[in], src[off]);
+                space_max[in] = nstl::max(space_max[in], src[off]);
             }
         }
 
         for (int c = 0; c < channels_; c++) {
             for(int in = 0; in < inner_size_; in++) {
                 size_t off = data_d.off_l(ou * dim + c * inner_size_ + in);
-                denom_[in] += dst[off] = exp(src[off] - max_[in]);
+                space_denom[in] += dst[off] = exp(src[off] - space_max[in]);
             }
         }
 
         for (int c = 0; c < channels_; c++) {
             for (int in = 0; in < inner_size_; in++) {
                 size_t off = data_d.off_l(ou * dim + c * inner_size_ + in);
-                dst[off] /= denom_[in];
+                dst[off] /= space_denom[in];
             }
         }
     }
 }
 
-
 template <impl::data_type_t data_type>
 void ref_softmax_fwd_t<data_type>::_max(int n, const data_t *x,
-        data_t *max_data) {
+        data_t *max_data) const {
+// Intel(R) C++ Compiler generates the maxps + shuffle pattern
+// for the max search which works faster
+#if !defined(__INTEL_COMPILER)
+    // The code below makes a compiler to generate maxps instruction
+    // rather than maxss, which is generated for the 'else' code path
+    auto max_wrapper = [](data_t a, data_t b) { return nstl::max(a, b); };
+    auto min_wrapper = [](int a, int b) { return nstl::min(a, b); };
+
+    constexpr int unroll_factor = 32;
+    data_t max_values[unroll_factor];
+
+    if (n < unroll_factor) {
+        data_t max_val = x[0];
+        for (int i = 1; i < n; i++) {
+            max_val = max_wrapper(max_val, x[i]);
+        }
+        max_data[0] = max_val;
+        return;
+    }
+    for (int i = 0; i < unroll_factor; i++) {
+        max_values[i] = x[i];
+    }
+    for (int i = unroll_factor; i < n; i += unroll_factor) {
+        int offset = min_wrapper(i, n - unroll_factor);
+        for (int j = 0; j < unroll_factor; j++) {
+            max_values[j] = max_wrapper(max_values[j], x[offset + j]);
+        }
+    }
+    data_t max_val = max_values[0];
+    for (int i = 1; i < unroll_factor; i++) {
+        max_val = max_wrapper(max_val, max_values[i]);
+    }
+    max_data[0] = max_val;
+#else
     max_data[0] = x[0];
     for (int c = 1; c < n; ++c)
         max_data[0] = nstl::max(max_data[0], x[c]);
+#endif
 }
 
 template <impl::data_type_t data_type>
 void ref_softmax_fwd_t<data_type>::_sub(int n, data_t alpha, const data_t *x,
-        data_t *y) {
-    for (int c = 0; c < n; ++c)
-        y[c] = x[c] - alpha;
+        data_t *y) const {
+    constexpr int unroll_factor = 32;
+    int tail = n % unroll_factor;
+    for (int i = 0; i < n - tail; i += unroll_factor) {
+        PRAGMA_OMP_SIMD()
+        for (int j = 0; j < unroll_factor; j++) {
+            y[i + j] = x[i + j] - alpha;
+        }
+    }
+    PRAGMA_OMP_SIMD()
+    for (int i = n - tail; i < n; i++) {
+        y[i] = x[i] - alpha;
+    }
 }
 
 template <impl::data_type_t data_type>
-void ref_softmax_fwd_t<data_type>::_exp_parallel(int n, const data_t *a, data_t *r) {
+void ref_softmax_fwd_t<data_type>::_exp_parallel(int n, const data_t *a, data_t *r) const {
 #ifdef USE_MKL
     if (data_type == data_type::f32) {
         vsExp(n, a, r);
@@ -132,22 +184,32 @@ void ref_softmax_fwd_t<data_type>::_exp_parallel(int n, const data_t *a, data_t
 }
 
 template <impl::data_type_t data_type>
-void ref_softmax_fwd_t<data_type>::_exp(int n, const data_t *a, data_t *r) {
+void ref_softmax_fwd_t<data_type>::_exp(int n, const data_t *a, data_t *r) const {
     for (int c = 0; c < n; c++)
         r[c] = expf(a[c]);
 }
 
 template <impl::data_type_t data_type>
 void ref_softmax_fwd_t<data_type>::_sum(int n, const data_t *x,
-        data_t *sum_data) {
-    sum_data[0] = 0;
+        data_t *sum_data) const {
+#ifdef USE_CBLAS
+    // Here we are summing x's eg. e^z , which are positives
+    // so we can use BLAS ASUM
+    if (data_type == data_type::f32) {
+        sum_data[0] = cblas_sasum(n, x, 1);
+        return;
+    }
+#endif
+    data_t tsum = static_cast<data_t>(0);
+    PRAGMA_OMP_SIMD(reduction(+ : tsum))
     for (int c = 0; c < n; ++c)
-        sum_data[0] += x[c];
+        tsum += x[c];
+    sum_data[0] = tsum;
 }
 
 template <impl::data_type_t data_type>
-void ref_softmax_fwd_t<data_type>::_scal_parallel(int n, data_t alpha, data_t *x) {
-#ifdef USE_MKL
+void ref_softmax_fwd_t<data_type>::_scal_parallel(int n, data_t alpha, data_t *x) const {
+#ifdef USE_CBLAS
     if (data_type == data_type::f32) {
         cblas_sscal(n, alpha, x, 1);
         return;
@@ -157,7 +219,7 @@ void ref_softmax_fwd_t<data_type>::_scal_parallel(int n, data_t alpha, data_t *x
 }
 
 template <impl::data_type_t data_type>
-void ref_softmax_fwd_t<data_type>::_scal(int n, data_t alpha, data_t *x) {
+void ref_softmax_fwd_t<data_type>::_scal(int n, data_t alpha, data_t *x) const {
     for (int c = 0; c < n; c++)
         x[c] *= alpha;
 }
@@ -167,7 +229,7 @@ template struct ref_softmax_fwd_t<data_type::f32>;
 
 // NC/NCHW softmax for along final axe (1 for NC, 3 for NCHW)
 template <impl::data_type_t data_type>
-void ref_softmax_bwd_t<data_type>::execute_backward_dense() {
+void ref_softmax_bwd_t<data_type>::execute_backward_dense() const {
     auto data = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t *>(this->memory(0));
@@ -190,13 +252,13 @@ void ref_softmax_bwd_t<data_type>::execute_backward_dense() {
 }
 
 template <impl::data_type_t data_type>
-void ref_softmax_bwd_t<data_type>::execute_backward_generic() {
+void ref_softmax_bwd_t<data_type>::execute_backward_generic() const {
     const size_t dim = channels_ * inner_size_;
     auto data = reinterpret_cast<const data_t *>(this->input_memory(0));
     auto diff_dst = reinterpret_cast<const data_t *>(this->input_memory(1));
     auto diff_src = reinterpret_cast<data_t *>(this->memory(0));
-    const memory_desc_wrapper diff_d(conf_.diff_src_pd());
-    const memory_desc_wrapper data_d(conf_.dst_pd());
+    const memory_desc_wrapper diff_d(pd()->diff_src_pd());
+    const memory_desc_wrapper data_d(pd()->dst_pd());
 
     parallel_nd(outer_size_, [&](int ou) {
         for (int in = 0; in < inner_size_; in++) {
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_softmax.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_softmax.hpp
index c82f5b278..80237851a 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_softmax.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_softmax.hpp
@@ -14,17 +14,18 @@
 * limitations under the License.
 *******************************************************************************/
 
-#ifndef CPU_REF_SOFTMAX_FWD_HPP
-#define CPU_REF_SOFTMAX_FWD_HPP
+#ifndef CPU_REF_SOFTMAX_HPP
+#define CPU_REF_SOFTMAX_HPP
 
 #include <assert.h>
 
 #include "c_types_map.hpp"
-#include "cpu_softmax_pd.hpp"
-#include "cpu_engine.hpp"
+#include "memory_tracking.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
 
+#include "cpu_softmax_pd.hpp"
+
 namespace mkldnn {
 namespace impl {
 namespace cpu {
@@ -49,63 +50,68 @@ struct ref_softmax_fwd_t: public cpu_primitive_t {
                 && attr()->has_default_values();
             if (!ok) return status::unimplemented;
 
+            init_scratchpad();
+
             return status::success;
         }
+
+    private:
+        void init_scratchpad() {
+            const int inner_size = utils::array_product(
+                    desc()->data_desc.dims + desc()->softmax_axis + 1,
+                    desc()->data_desc.ndims - desc()->softmax_axis - 1);
+
+            if (inner_size > 1) {
+                auto scratchpad = scratchpad_registry().registrar();
+                scratchpad.book(memory_tracking::names::key_softmax_reduction,
+                        sizeof(data_t) * 2 * inner_size);
+            }
+        }
     };
 
-    ref_softmax_fwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_softmax_fwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd), ws_(nullptr) {
-        auto ndims = conf_.desc()->data_desc.ndims;
-        auto dims = conf_.desc()->data_desc.dims;
-        auto axis = conf_.desc()->softmax_axis;
+        : cpu_primitive_t(apd, inputs, outputs)
+    {
+        auto ndims = pd()->desc()->data_desc.ndims;
+        auto dims = pd()->desc()->data_desc.dims;
+        auto axis = pd()->desc()->softmax_axis;
 
         outer_size_ = utils::array_product(dims, axis);
         channels_ = dims[axis];
         inner_size_ = utils::array_product(dims + axis + 1, ndims - axis - 1);
-        val_max_ = val_denom_ = 0;
-
-        if (inner_size_ > 1) {
-            ws_ = new data_t[2*inner_size_];
-            max_ = &ws_[0];
-            denom_ = &ws_[inner_size_];
-        } else {
-            max_ = &val_max_;
-            denom_ = &val_denom_;
-        }
 
-        const memory_desc_wrapper data_d(conf_.src_pd());
+        const memory_desc_wrapper data_d(pd()->src_pd());
         use_dense_ = inner_size_ == 1 && data_d.is_dense()
             && data_d.blocking_desc().block_dims[axis] == 1
             && data_d.blocking_desc().strides[0][axis] == 1;
     }
-    ~ref_softmax_fwd_t() { if (ws_) delete [] ws_; }
+    ~ref_softmax_fwd_t() {}
+
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         if (use_dense_) execute_forward_dense();
         else execute_forward_generic();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_forward_dense();
-    void execute_forward_generic();
+    void execute_forward_dense() const;
+    void execute_forward_generic() const;
 
-    void _max(int n, const data_t *x, data_t *max_data);
-    void _sub(int n, data_t alpha, const data_t *x, data_t *y);
-    void _exp(int n, const data_t *a, data_t *r);
-    void _exp_parallel(int n, const data_t *a, data_t *r);
-    void _sum(int n, const data_t *x, data_t *sum_data);
-    void _scal(int n, data_t alpha, data_t *x);
-    void _scal_parallel(int n, data_t alpha, data_t *x);
+    void _max(int n, const data_t *x, data_t *max_data) const;
+    void _sub(int n, data_t alpha, const data_t *x, data_t *y) const;
+    void _exp(int n, const data_t *a, data_t *r) const;
+    void _exp_parallel(int n, const data_t *a, data_t *r) const;
+    void _sum(int n, const data_t *x, data_t *sum_data) const;
+    void _scal(int n, data_t alpha, data_t *x) const;
+    void _scal_parallel(int n, data_t alpha, data_t *x) const;
 
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
     bool use_dense_;
     int outer_size_, channels_, inner_size_;
-    data_t val_max_, val_denom_;
-    data_t *ws_, *max_, *denom_;
 };
 
 template <impl::data_type_t data_type>
@@ -132,20 +138,20 @@ struct ref_softmax_bwd_t: public cpu_primitive_t {
         }
     };
 
-    ref_softmax_bwd_t(const pd_t *pd, const input_vector &inputs,
+    ref_softmax_bwd_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {
-        auto dims = conf_.desc()->diff_desc.dims;
-        auto axis = conf_.desc()->softmax_axis;
-        auto ndims = conf_.desc()->diff_desc.ndims;
+        : cpu_primitive_t(apd, inputs, outputs) {
+        auto dims = pd()->desc()->diff_desc.dims;
+        auto axis = pd()->desc()->softmax_axis;
+        auto ndims = pd()->desc()->diff_desc.ndims;
 
         outer_size_ = utils::array_product(dims, axis);
         channels_ = dims[axis];
         inner_size_ = utils::array_product(dims + axis + 1, ndims - axis - 1);
 
         // Diff desc as well as data desc whould be checked
-        const memory_desc_wrapper data_d(conf_.dst_pd());
-        const memory_desc_wrapper diff_d(conf_.diff_dst_pd());
+        const memory_desc_wrapper data_d(pd()->dst_pd());
+        const memory_desc_wrapper diff_d(pd()->diff_dst_pd());
         use_dense_ = true
             && inner_size_ == 1
             && diff_d == data_d
@@ -154,23 +160,22 @@ struct ref_softmax_bwd_t: public cpu_primitive_t {
             && diff_d.blocking_desc().strides[0][axis] == 1;
     }
     ~ref_softmax_bwd_t() {}
+
     typedef typename prec_traits<data_type>::type data_t;
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         if (use_dense_) execute_backward_dense();
         else execute_backward_generic();
         e->set_state(event_t::ready);
     }
 
 private:
-    void execute_backward_dense();
-    void execute_backward_generic();
+    void execute_backward_dense() const;
+    void execute_backward_generic() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 
-    pd_t conf_;
     bool use_dense_;
     int outer_size_, channels_, inner_size_;
-    data_t val_max_, val_denom_;
-    data_t *max_, *denom_;
 };
 
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_sum.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_sum.hpp
index 4fd9bad83..17e0bde9f 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_sum.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/ref_sum.hpp
@@ -78,8 +78,8 @@ struct ref_sum_t: public cpu_primitive_t {
             }
             return ret;
         }
-        virtual pd_t *clone() const override { return nullptr; /* FIXME */ }
-       virtual const char *name() const override { return "ref:any"; }
+        virtual pd_t *clone() const override { return new pd_t(*this); }
+        virtual const char *name() const override { return "ref:any"; }
 
         virtual status_t init() override {
             bool ok = cpu_sum_pd_t::init() == success;
@@ -109,9 +109,9 @@ struct ref_sum_t: public cpu_primitive_t {
         nstl::vector<const reorder_pd_t *> reorder_pds_;
     };
 
-    ref_sum_t(const pd_t *conf, const input_vector &inputs,
+    ref_sum_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs, nstl::vector<primitive_t *> reorders)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*conf),
+        : cpu_primitive_t(apd, inputs, outputs),
         reorders_(reorders) {}
 
     ~ref_sum_t() {
@@ -120,7 +120,7 @@ struct ref_sum_t: public cpu_primitive_t {
             delete reorders_[i];
     }
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         const auto n = reorders_.size();
         for (size_t i = 0; i < n; ++i) {
             event_t ei;
@@ -130,7 +130,7 @@ struct ref_sum_t: public cpu_primitive_t {
     }
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     nstl::vector<primitive_t *> reorders_;
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_common.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_common.cpp
new file mode 100644
index 000000000..537084db9
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_common.cpp
@@ -0,0 +1,90 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+ * Common for RNN and LSTM cell execution
+ */
+#include "ref_rnn.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+using namespace rnn_utils;
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+rnn_cell_execution_sig(
+        (_ref_rnn_common_t<aprop, src_type, weights_type>::cell_execution)) {
+    if (!rnn.merge_gemm_layer) {
+        (this->*gemm_layer_func)('N', 'N', rnn.n_gates * rnn.dic, rnn.mb,
+                rnn.slc, 1.0, w_layer_[0], rnn.weights_layer_ld,
+                states_t_lm1_, rnn.states_ws_ld, 0.0, ws_gates_,
+                rnn.gates_ws_ld);
+    }
+    (this->*gemm_iter_func)('N', 'N', rnn.n_gates * rnn.dic, rnn.mb, rnn.sic,
+            1.0, w_iter_[0], rnn.weights_iter_ld, states_tm1_l_,
+            rnn.states_ws_ld, 1.0, ws_gates_, rnn.gates_ws_ld);
+
+    if (rnn_postgemm_ != nullptr)
+        rnn_postgemm_->execute<src_data_t, acc_data_t>(rnn, ws_gates_, states_t_l_, c_states_t_l_,
+            states_tm1_l_, c_states_tm1_l_, diff_states_t_l_,
+            diff_states_t_lp1_, diff_states_tp1_l_, bias_[0], ws_grid_,
+            ws_cell_);
+    else
+        (this->*elemwise_func)(rnn, ws_gates_, states_t_l_, c_states_t_l_,
+                states_tm1_l_, c_states_tm1_l_, diff_states_t_l_,
+                diff_states_t_lp1_, diff_states_tp1_l_, bias_[0], ws_grid_,
+                ws_cell_);
+}
+template rnn_cell_execution_sig(ref_rnn_fwd_f32_t::cell_execution);
+template rnn_cell_execution_sig(ref_rnn_fwd_u8s8_t::cell_execution);
+
+template <>
+rnn_cell_execution_sig(ref_rnn_bwd_f32_t::cell_execution) {
+    ws_diff_states_aoc_t diff_states_t_l(rnn, diff_states_t_l_);
+    (this->*elemwise_func)(rnn, ws_gates_, states_t_l_, c_states_t_l_,
+            states_tm1_l_, c_states_tm1_l_, diff_states_t_l_,
+            diff_states_t_lp1_, diff_states_tp1_l_, bias_[0], ws_grid_,
+            ws_cell_);
+
+    /// bwd by data on the cell
+    (this->*gemm_iter_func)('N', 'N', rnn.sic, rnn.mb, rnn.n_gates * rnn.dic,
+            1.0, w_iter_[0], rnn.weights_iter_ld, ws_gates_, rnn.gates_ws_ld,
+            0.0, diff_states_t_l_, rnn.states_ws_ld);
+
+    if (!rnn.merge_gemm_layer) {
+        (this->*gemm_layer_func)('N', 'N', rnn.slc, rnn.mb,
+                rnn.n_gates * rnn.dic, 1.0, w_layer_[0],
+                rnn.weights_layer_ld, ws_gates_, rnn.gates_ws_ld, 0.0,
+                &diff_states_t_l(rnn.n_states, 0, 0), rnn.states_ws_ld);
+
+        /// bwd by weights on the cell
+        gemm('N', 'T', rnn.n_gates * rnn.dic, rnn.slc, rnn.mb, 1.0, ws_gates_,
+                rnn.gates_ws_ld, states_t_lm1_, rnn.states_ws_ld, 1.0,
+                diff_w_layer_, rnn.diff_weights_layer_ld);
+    }
+
+    if (!rnn.merge_gemm_iter)
+        gemm('N', 'T', rnn.n_gates * rnn.dic, rnn.sic, rnn.mb, 1.0, ws_gates_,
+                rnn.gates_ws_ld, states_tm1_l_, rnn.states_ws_ld, 1.0,
+                diff_w_iter_, rnn.diff_weights_iter_ld);
+
+    /// bwd by bias we just accumulate diffs from the gates
+    gates_reduction(rnn, ws_gates_, diff_bias_);
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_gru.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_gru.cpp
new file mode 100644
index 000000000..e1a61d4c6
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_gru.cpp
@@ -0,0 +1,180 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+ * Cell execution GRU
+ */
+
+#include "math_utils.hpp"
+#include "mkldnn_thread.hpp"
+
+#include "ref_rnn.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::utils;
+using namespace mkldnn::impl::math;
+using namespace rnn_utils;
+
+#define AOC array_offset_calculator
+template <>
+rnn_cell_execution_sig(ref_rnn_fwd_f32_t::cell_execution_gru) {
+    ws_gates_aoc_t ws_gates(rnn, ws_gates_);
+    bias_aoc_t bias(rnn, bias_[0]);
+    ws_states_aoc_t states_t_l(rnn, states_t_l_);
+    ws_states_aoc_t states_tm1_l(rnn, states_tm1_l_);
+
+    // 1. gemm Wx[0-2],x
+    if (!rnn.merge_gemm_layer) {
+        (this->*gemm_layer_func)('N', 'N', rnn.n_gates * rnn.dic, rnn.mb,
+                rnn.slc, 1.0, w_layer_[0], rnn.weights_layer_ld,
+                states_t_lm1_, rnn.states_ws_ld, 0.0, ws_gates_,
+                rnn.gates_ws_ld);
+    }
+
+    // 2. gemm Wh[0-1],h
+    (this->*gemm_iter_func)('N', 'N', (rnn.n_gates - 1) * rnn.dic, rnn.mb,
+            rnn.sic, 1.0, w_iter_[0], rnn.weights_iter_ld, states_tm1_l_,
+            rnn.states_ws_ld, 1.0, ws_gates_, rnn.gates_ws_ld);
+
+    // 3. activation zt and rt + elemwise multiplication rt,ht-1
+    parallel_nd(rnn.mb, [&](int i) {
+        PRAGMA_OMP_SIMD()
+        for (int j = 0; j < rnn.dic; j++) {
+            ws_gates(i, 0, j) = logistic_fwd(ws_gates(i, 0, j) + bias(0, j));
+            ws_gates(i, 1, j) = logistic_fwd(ws_gates(i, 1, j) + bias(1, j));
+            states_t_l(i, j) = states_tm1_l(i, j) * ws_gates(i, 1, j);
+        }
+    });
+
+    // 4. gemm Wh[2],h~t
+    (this->*gemm_iter_func)('N', 'N', rnn.dic, rnn.mb, rnn.sic, 1.0, w_iter_[1],
+            rnn.weights_iter_ld, states_t_l_, rnn.states_ws_ld, 1.0,
+            &(ws_gates(0, 2, 0)), rnn.gates_ws_ld);
+
+    // 5. activation h~t + calculate ht
+    parallel_nd(rnn.mb, [&](int i) {
+        PRAGMA_OMP_SIMD()
+        for (int j = 0; j < rnn.dic; j++) {
+            ws_gates(i, 2, j) = tanh_fwd(ws_gates(i, 2, j) + bias(2, j));
+            states_t_l(i, j) = states_tm1_l(i, j) * ws_gates(i, 0, j)
+                    + (1.0f - ws_gates(i, 0, j)) * ws_gates(i, 2, j);
+        }
+    });
+}
+
+template <>
+rnn_cell_execution_sig(ref_rnn_fwd_u8s8_t::cell_execution_gru) {
+    assert(!"GRU int8 is not supported");
+}
+
+template <>
+rnn_cell_execution_sig(ref_rnn_bwd_f32_t::cell_execution_gru) {
+    ws_gates_aoc_t ws_gates(rnn, ws_gates_);
+    ws_states_aoc_t states_t_l(rnn, states_t_l_);
+    ws_states_aoc_t states_tm1_l(rnn, states_tm1_l_);
+    ws_diff_w_iter_aoc_t diff_w_iter(rnn, diff_w_iter_);
+    ws_diff_states_aoc_t diff_states_t_l(rnn, diff_states_t_l_);
+    ws_diff_states_aoc_t diff_states_tp1_l(rnn, diff_states_tp1_l_);
+    ws_diff_states_aoc_t diff_states_t_lp1(rnn, diff_states_t_lp1_);
+
+    // use state memory for intermediate computations
+    // TODO: use cell ws for that
+    float *dhG1_ = &(diff_states_t_l(rnn.n_states, 0, 0));
+    float *hG1_ = dhG1_;
+    AOC<float, 2> dhG1(dhG1_, rnn.states_nld, rnn.states_ws_ld);
+    AOC<float, 2> hG1(hG1_, rnn.states_nld, rnn.states_ws_ld);
+
+    // 1. calculate dG2, dG1, and part of dht-1
+    // dG2^ = dh * (1 - G0) * (1 - G2^2)
+    // dG0^ = dh * (ht-1 - G2) * u * (1 - G0)
+    // dht-1 (part) = dh * G0
+    parallel_nd(rnn.mb, [&](int i) {
+        PRAGMA_OMP_SIMD()
+        for (int j = 0; j < rnn.dic; j++) {
+            float h = states_tm1_l(i, j);
+            float dHt = diff_states_tp1_l(0, i, j)
+                    + diff_states_t_lp1(rnn.n_states, i, j);
+            float dG2 = (1.0f - ws_gates(i, 0, j)) * dHt
+                    * one_m_square(ws_gates(i, 2, j));
+            float dG0 = (h - ws_gates(i, 2, j)) * dHt
+                    * x_m_square(ws_gates(i, 0, j));
+
+            diff_states_t_l(0, i, j) = dHt * ws_gates(i, 0, j);
+            ws_gates(i, 0, j) = dG0;
+            ws_gates(i, 2, j) = dG2;
+        }
+    });
+
+    // 2. calculate intermediate d(hG1)
+    // d(hG1) = dG2 * W2h^t
+    (this->*gemm_iter_func)('N', 'N', rnn.sic, rnn.mb, rnn.dic, 1.0, w_iter_[1],
+            rnn.weights_iter_ld, &(ws_gates(0, 2, 0)), rnn.gates_ws_ld, 0.0,
+            dhG1_, rnn.states_ws_ld);
+
+    // 3. calculate dG1^ and part of dht-1
+    // dG1^ = d(hG1) * h * G1 * (1 - G1)
+    // dht-1 (part) += d(hG1) * G1
+    // h * G1 (required for dWh)
+    parallel_nd(rnn.mb, [&](int i) {
+        PRAGMA_OMP_SIMD()
+        for (int j = 0; j < rnn.dic; j++) {
+            float h = states_tm1_l(i, j);
+            float G1 = ws_gates(i, 1, j);
+            diff_states_t_l(0, i, j) += dhG1(i, j) * G1;
+            ws_gates(i, 1, j) = dhG1(i, j) * h * x_m_square(G1);
+            hG1(i, j) = G1 * h;
+        }
+    });
+
+    // 4. calculate diff weights
+    // dWh1 += dG1 * h, dWh2 += dG2 * h, dWh3 += dG3 * (G1(*)h)
+    gemm('N', 'T', (rnn.n_gates - 1) * rnn.dic, rnn.sic, rnn.mb, 1.0, ws_gates_,
+            rnn.gates_ws_ld, states_tm1_l_, rnn.states_ws_ld, 1.0, diff_w_iter_,
+            rnn.diff_weights_iter_ld);
+    gemm('N', 'T', rnn.dic, rnn.sic, rnn.mb, 1.0, &(ws_gates(0, 2, 0)),
+            rnn.gates_ws_ld, hG1_, rnn.states_ws_ld, 1.0,
+            &(diff_w_iter(0, 2, 0)), rnn.diff_weights_iter_ld);
+
+    // 5. calculate diff states
+    // dht-1 += dG1 * W1h + dG0 * W0h
+    (this->*gemm_iter_func)('N', 'N', rnn.sic, rnn.mb,
+            (rnn.n_gates - 1) * rnn.dic, 1.0, w_iter_[0],
+            rnn.weights_iter_ld, ws_gates_, rnn.gates_ws_ld, 1.0,
+            diff_states_t_l_, rnn.states_ws_ld);
+
+    if (!rnn.merge_gemm_layer) {
+        // dWx += [dG0 dG1 dG2] * [x]
+        gemm('N', 'T', rnn.n_gates * rnn.dic, rnn.slc, rnn.mb, 1.0, ws_gates_,
+                rnn.gates_ws_ld, states_t_lm1_, rnn.states_ws_ld, 1.0,
+                diff_w_layer_, rnn.diff_weights_layer_ld);
+        // dx = dG2 * W2x + dG1 * W1x + dG0 * W0x
+        (this->*gemm_layer_func)('N', 'N', rnn.slc, rnn.mb,
+                rnn.n_gates * rnn.dic, 1.0, w_layer_[0],
+                rnn.weights_layer_ld, ws_gates_, rnn.gates_ws_ld, 0.0,
+                &(diff_states_t_l(rnn.n_states, 0, 0)), rnn.states_ws_ld);
+    }
+
+    // 6. calculate diff bias
+    gates_reduction(rnn, ws_gates_, diff_bias_);
+}
+#undef AOC
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_gru_lbr.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_gru_lbr.cpp
new file mode 100644
index 000000000..8dea8c90a
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_gru_lbr.cpp
@@ -0,0 +1,170 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+ * Cell execution GRU with linear before reset
+ */
+
+#include "math_utils.hpp"
+#include "mkldnn_thread.hpp"
+
+#include "ref_rnn.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::utils;
+using namespace mkldnn::impl::math;
+using namespace rnn_utils;
+#define AOC array_offset_calculator
+
+template <>
+rnn_elemwise_sig(ref_rnn_fwd_f32_t::gru_lbr_elemwise) {
+    ws_gates_aoc_t ws_gates(rnn, ws_gates_);
+    bias_aoc_t bias(rnn, bias_);
+    ws_states_aoc_t states_t_l(rnn, states_t_l_);
+    ws_states_aoc_t states_tm1_l(rnn, states_tm1_l_);
+    ws_gates_aoc_t ws_gemm_state(rnn, ws_cell_);
+    AOC<float, 2> ws_Wh_b(ws_grid_, rnn.mb, rnn.dic);
+
+    parallel_nd(rnn.mb, [&](int i) {
+        PRAGMA_OMP_SIMD()
+        for (int j = 0; j < rnn.dic; j++) {
+            float Wh_b = ws_gemm_state(i, 2, j) + bias(3, j);
+            ws_gates(i, 0, j) = logistic_fwd(
+                    ws_gates(i, 0, j) + ws_gemm_state(i, 0, j) + bias(0, j));
+            ws_gates(i, 1, j) = logistic_fwd(
+                    ws_gates(i, 1, j) + ws_gemm_state(i, 1, j) + bias(1, j));
+            ws_gates(i, 2, j) = tanh_fwd(
+                    ws_gates(i, 2, j) + ws_gates(i, 1, j) * Wh_b + bias(2, j));
+            states_t_l(i, j) = states_tm1_l(i, j) * ws_gates(i, 0, j)
+                    + (1.0f - ws_gates(i, 0, j)) * ws_gates(i, 2, j);
+            if (rnn.is_training)
+                ws_Wh_b(i, j) = Wh_b;
+        }
+    });
+}
+
+template <>
+rnn_elemwise_sig(ref_rnn_fwd_u8s8_t::gru_lbr_elemwise) {
+    assert(!"GRU LBR int8 is not supported");
+}
+
+template <>
+rnn_cell_execution_sig(ref_rnn_fwd_f32_t::cell_execution_gru_lbr) {
+    if (!rnn.merge_gemm_layer) {
+        (this->*gemm_layer_func)('N', 'N', rnn.n_gates * rnn.dic, rnn.mb,
+                rnn.slc, 1.0, w_layer_[0], rnn.weights_layer_ld,
+                states_t_lm1_, rnn.states_ws_ld, 0.0, ws_gates_,
+                rnn.gates_ws_ld);
+    }
+    (this->*gemm_iter_func)('N', 'N', rnn.n_gates * rnn.dic, rnn.mb, rnn.sic,
+            1.0, w_iter_[0], rnn.weights_iter_ld, states_tm1_l_,
+            rnn.states_ws_ld, 0.0, ws_cell_, rnn.gates_ws_ld);
+    (this->*elemwise_func)(rnn, ws_gates_, states_t_l_, c_states_t_l_,
+            states_tm1_l_, c_states_tm1_l_, diff_states_t_l_,
+            diff_states_t_lp1_, diff_states_tp1_l_, bias_[0], ws_grid_,
+            ws_cell_);
+}
+
+template <>
+rnn_cell_execution_sig(ref_rnn_fwd_u8s8_t::cell_execution_gru_lbr) {
+    assert(!"GRU LBR int8 is not supported");
+}
+
+template <>
+rnn_elemwise_sig(ref_rnn_bwd_f32_t::gru_lbr_elemwise) {
+    ws_gates_aoc_t ws_gates(rnn, ws_gates_);
+    ws_states_aoc_t states_tm1_l(rnn, states_tm1_l_);
+    ws_diff_states_aoc_t diff_states_t_l(rnn, diff_states_t_l_);
+    ws_diff_states_aoc_t diff_states_tp1_l(rnn, diff_states_tp1_l_);
+    ws_diff_states_aoc_t diff_states_t_lp1(rnn, diff_states_t_lp1_);
+    ws_gates_aoc_t ws_gates_r(rnn, ws_cell_);
+    AOC<float, 2> ws_Wh_b(ws_grid_, rnn.mb, rnn.dic);
+
+    // 1. calculate dG1 dG2 dG3
+    // dG0 = (dht - G2) * dht * (1 - G0) * G0
+    // dG1 = (W*h + b) * dG2 * (1 - G1) * G1
+    // dG2 = (1 - G0) * dht * (1 - G2*G2)
+    parallel_nd(rnn.mb, [&](int i) {
+        PRAGMA_OMP_SIMD()
+        for (int j = 0; j < rnn.dic; j++) {
+            float h = states_tm1_l(i, j);
+            float dHt = diff_states_tp1_l(0, i, j)
+                    + diff_states_t_lp1(rnn.n_states, i, j);
+            float dG0 = (h - ws_gates(i, 2, j)) * dHt
+                    * x_m_square(ws_gates(i, 0, j));
+            float dG2 = (1.0f - ws_gates(i, 0, j))
+                    * one_m_square(ws_gates(i, 2, j)) * dHt;
+            float dG1 = ws_Wh_b(i, j) * dG2 * x_m_square(ws_gates(i, 1, j));
+
+            diff_states_t_l(0, i, j) = dHt * ws_gates(i, 0, j);
+            ws_gates(i, 2, j) = dG2;
+            ws_gates_r(i, 2, j) = dG2 * ws_gates(i, 1, j);
+            ws_gates(i, 0, j) = ws_gates_r(i, 0, j) = dG0;
+            ws_gates(i, 1, j) = ws_gates_r(i, 1, j) = dG1;
+        }
+    });
+}
+
+template <>
+rnn_cell_execution_sig(ref_rnn_bwd_f32_t::cell_execution_gru_lbr) {
+    ws_gates_aoc_t ws_gates_r(rnn, ws_cell_);
+    ws_diff_states_aoc_t diff_states_t_l(rnn, diff_states_t_l_);
+
+    (this->*elemwise_func)(rnn, ws_gates_, states_t_l_, c_states_t_l_,
+            states_tm1_l_, c_states_tm1_l_, diff_states_t_l_,
+            diff_states_t_lp1_, diff_states_tp1_l_, bias_[0], ws_grid_,
+            ws_cell_);
+
+    if (!rnn.merge_gemm_layer) {
+        //  dx = dG * Wx^t
+        (this->*gemm_layer_func)('N', 'N', rnn.slc, rnn.mb,
+                rnn.n_gates * rnn.dic, 1.0, w_layer_[0],
+                rnn.weights_layer_ld, ws_gates_, rnn.gates_ws_ld, 0.0,
+                &diff_states_t_l(rnn.n_states, 0, 0), rnn.states_ws_ld);
+        // dWx +=  dG^t * x
+        gemm('N', 'T', rnn.n_gates * rnn.dic, rnn.slc, rnn.mb, 1.0, ws_gates_,
+                rnn.gates_ws_ld, states_t_lm1_, rnn.states_ws_ld, 1.0,
+                diff_w_layer_, rnn.diff_weights_layer_ld);
+    }
+    // dh +=  dGr * Wh^t
+    (this->*gemm_iter_func)('N', 'N', rnn.sic, rnn.mb, rnn.n_gates * rnn.dic,
+            1.0, w_iter_[0], rnn.weights_iter_ld, ws_cell_, rnn.gates_ws_ld,
+            1.0, diff_states_t_l_, rnn.states_ws_ld);
+
+    // dWh += dGr^t * h
+    gemm('N', 'T', rnn.n_gates * rnn.dic, rnn.sic, rnn.mb, 1.0, ws_cell_,
+            rnn.gates_ws_ld, states_tm1_l_, rnn.states_ws_ld, 1.0, diff_w_iter_,
+            rnn.diff_weights_layer_ld);
+
+    // db1-3 += e * dG
+    // db4 += e * (r * dG2)
+    gates_reduction(rnn, ws_gates_, diff_bias_);
+
+    parallel_nd(rnn.dic, [&](int j) {
+        for (int i = 0; i < rnn.mb; i++) {
+            diff_bias_[3 * rnn.dic + j] += ws_gates_r(i, 2, j);
+        }
+    });
+}
+
+#undef AOC
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_lstm.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_lstm.cpp
new file mode 100644
index 000000000..334552198
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_lstm.cpp
@@ -0,0 +1,147 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+ * Cell execution LSTM
+ */
+
+#include "math_utils.hpp"
+#include "mkldnn_thread.hpp"
+
+#include "../simple_q10n.hpp"
+#include "ref_rnn.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::utils;
+using namespace mkldnn::impl::math;
+using namespace rnn_utils;
+
+template <>
+rnn_elemwise_sig(ref_rnn_fwd_f32_t::lstm_elemwise) {
+    ws_gates_aoc_t ws_gates(rnn, ws_gates_);
+    bias_aoc_t bias(rnn, bias_);
+    ws_states_aoc_t states_t_l(rnn, states_t_l_);
+    ws_states_aoc_t c_states_t_l(rnn, c_states_t_l_);
+    ws_states_aoc_t c_states_tm1_l(rnn, c_states_tm1_l_);
+
+    parallel_nd(rnn.mb, [&](int i) {
+// WA. Loss of correctnes in case of simd loop unrolling with icc 18
+#if !defined(__INTEL_COMPILER)
+        PRAGMA_OMP_SIMD()
+#endif
+        for (int j = 0; j < rnn.dic; j++) {
+            ws_gates(i, 0, j) = logistic_fwd(ws_gates(i, 0, j) + bias(0, j));
+            ws_gates(i, 1, j) = logistic_fwd(ws_gates(i, 1, j) + bias(1, j));
+            ws_gates(i, 2, j) = tanh_fwd(ws_gates(i, 2, j) + bias(2, j));
+            ws_gates(i, 3, j) = logistic_fwd(ws_gates(i, 3, j) + bias(3, j));
+
+            float tmp = ws_gates(i, 1, j) * c_states_tm1_l(i, j)
+                    + ws_gates(i, 0, j) * ws_gates(i, 2, j);
+            states_t_l(i, j) = ws_gates(i, 3, j) * tanh_fwd(tmp);
+            c_states_t_l(i, j) = tmp;
+        }
+    });
+}
+
+template <>
+rnn_elemwise_sig(ref_rnn_fwd_u8s8_t::lstm_elemwise) {
+    ws_gates_aoc_s32_t ws_gates_s32(rnn, ws_gates_);
+    bias_aoc_t bias(rnn, bias_);
+    ws_states_aoc_u8_t states_t_l(rnn, states_t_l_);
+    ws_states_aoc_t c_states_t_l(rnn, c_states_t_l_);
+    ws_states_aoc_t c_states_tm1_l(rnn, c_states_tm1_l_);
+
+    float *weights_scales = pd()->attr()->rnn_weights_qparams_.scales_;
+    float data_shift = pd()->attr()->rnn_data_qparams_.shift_;
+    float data_scale = pd()->attr()->rnn_data_qparams_.scale_;
+    round_mode_t rmode = pd()->attr()->round_mode_;
+
+    auto q_d = [&](float f) {
+        float qf = f * data_scale + data_shift;
+        return qz_a1b0<float, src_data_t>()(qf, rmode);
+    };
+
+    auto deq_w = [&](acc_data_t s, int gate, int j) {
+        return pd()->attr()->rnn_weights_qparams_.mask_ == 0 ?
+                saturate<float>(s) * (1.f / (weights_scales[0] * data_scale)) :
+                saturate<float>(s) * (1.f / (weights_scales[gate * rnn.dic + j]
+                                                   * data_scale));
+    };
+
+    parallel_nd(rnn.mb, [&](int i) {
+        PRAGMA_OMP_SIMD()
+        for (int j = 0; j < rnn.dic; j++) {
+            float G0 = logistic_fwd<float>(
+                    deq_w(ws_gates_s32(i, 0, j), 0, j) + bias(0, j));
+            float G1 = logistic_fwd<float>(
+                    deq_w(ws_gates_s32(i, 1, j), 1, j) + bias(1, j));
+            float G2 = tanh_fwd<float>(
+                    deq_w(ws_gates_s32(i, 2, j), 2, j) + bias(2, j));
+            float G3 = logistic_fwd<float>(
+                    deq_w(ws_gates_s32(i, 3, j), 3, j) + bias(3, j));
+            float tmp = G1 * c_states_tm1_l(i, j) + G0 * G2;
+            states_t_l(i, j) = q_d(G3 * tanh_fwd(tmp));
+            c_states_t_l(i, j) = tmp;
+        }
+    });
+}
+
+template <>
+rnn_elemwise_sig(ref_rnn_bwd_f32_t::lstm_elemwise) {
+    ws_gates_aoc_t ws_gates(rnn, ws_gates_);
+    bias_aoc_t bias(rnn, bias_);
+    ws_states_aoc_t c_states_t_l(rnn, c_states_t_l_);
+    ws_states_aoc_t c_states_tm1_l(rnn, c_states_tm1_l_);
+    ws_diff_states_aoc_t diff_states_t_l(rnn, diff_states_t_l_);
+    ws_diff_states_aoc_t diff_states_tp1_l(rnn, diff_states_tp1_l_);
+    ws_diff_states_aoc_t diff_states_t_lp1(rnn, diff_states_t_lp1_);
+
+    parallel_nd(rnn.mb, [&](int i) {
+        PRAGMA_OMP_SIMD()
+        for (int j = 0; j < rnn.dic; j++) {
+            float Ct = c_states_t_l(i, j);
+            /// @todo save it in the workspace in fwd pass or recompute it to
+            /// save bw
+            float tanhCt = tanh_fwd(Ct);
+            // we have 2 incoming diffs on Ht
+            float dHt = diff_states_tp1_l(0, i, j)
+                    + diff_states_t_lp1(rnn.n_states, i, j);
+            float dCt = diff_states_tp1_l(1, i, j)
+                    + one_m_square(tanhCt) * ws_gates(i, 3, j) * dHt;
+
+            float dG1 = c_states_tm1_l(i, j) * dCt
+                    * x_m_square(ws_gates(i, 1, j));
+            float dG0 = ws_gates(i, 2, j) * dCt * x_m_square(ws_gates(i, 0, j));
+            float dG3 = tanhCt * dHt * x_m_square(ws_gates(i, 3, j));
+            float dG2
+                    = ws_gates(i, 0, j) * dCt * one_m_square(ws_gates(i, 2, j));
+
+            diff_states_t_l(1, i, j) = dCt * ws_gates(i, 1, j);
+
+            ws_gates(i, 0, j) = dG0;
+            ws_gates(i, 1, j) = dG1;
+            ws_gates(i, 2, j) = dG2;
+            ws_gates(i, 3, j) = dG3;
+        }
+    });
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_rnn.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_rnn.cpp
new file mode 100644
index 000000000..4536e8dfa
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cell_rnn.cpp
@@ -0,0 +1,113 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+ * Cell execution of Vanilla RNN
+ */
+
+#include "math_utils.hpp"
+#include "mkldnn_thread.hpp"
+
+#include "ref_rnn.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::utils;
+using namespace mkldnn::impl::math;
+using namespace rnn_utils;
+
+template <>
+float activation<alg_kind::eltwise_relu, prop_kind::forward>(
+        float dd, float s, float alpha, float cliping) {
+    return relu_fwd<float>(s, alpha);
+}
+
+template <>
+float activation<alg_kind::eltwise_relu, prop_kind::backward>(
+        float dd, float s, float alpha, float cliping) {
+    return relu_bwd<float>(dd, s, alpha);
+}
+
+template <>
+float activation<alg_kind::eltwise_tanh, prop_kind::forward>(
+        float dd, float s, float alpha, float cliping) {
+    return tanh_fwd<float>(s);
+}
+
+template <>
+float activation<alg_kind::eltwise_tanh, prop_kind::backward>(
+        float dd, float s, float alpha, float cliping) {
+    return dd * one_m_square<float>(s);
+}
+
+template <>
+float activation<alg_kind::eltwise_logistic, prop_kind::forward>(
+        float dd, float s, float alpha, float cliping) {
+    return logistic_fwd<float>(s);
+}
+
+template <>
+float activation<alg_kind::eltwise_logistic, prop_kind::backward>(
+        float dd, float s, float alpha, float cliping) {
+    return dd * x_m_square<float>(s);
+}
+
+template <>
+rnn_elemwise_sig(ref_rnn_fwd_f32_t::rnn_elemwise) {
+    ws_gates_aoc_t ws_gates(rnn, ws_gates_);
+    bias_aoc_t bias(rnn, bias_);
+    ws_states_aoc_t states_t_l(rnn, states_t_l_);
+    ws_states_aoc_t states_tm1_l(rnn, states_tm1_l_);
+
+    parallel_nd(rnn.mb, [&](int i) {
+        for (int j = 0; j < rnn.dic; j++) {
+            const float h
+                    = activation_func(0, ws_gates(i, 0, j) + bias(0, j), 0, 0);
+            ws_gates(i, 0, j) = states_t_l(i, j) = h;
+        }
+    });
+}
+
+template <>
+rnn_elemwise_sig(ref_rnn_fwd_u8s8_t::rnn_elemwise) {
+    assert(!"VANILLA RNN int8 is not supported");
+}
+
+template <>
+rnn_elemwise_sig(ref_rnn_bwd_f32_t::rnn_elemwise) {
+    ws_gates_aoc_t ws_gates(rnn, ws_gates_);
+    bias_aoc_t bias(rnn, bias_);
+    ws_states_aoc_t states_t_l(rnn, states_t_l_);
+    ws_states_aoc_t states_tm1_l(rnn, states_tm1_l_);
+    ws_diff_states_aoc_t diff_states_t_l(rnn, diff_states_t_l_);
+    ws_diff_states_aoc_t diff_states_tp1_l(rnn, diff_states_tp1_l_);
+    ws_diff_states_aoc_t diff_states_t_lp1(rnn, diff_states_t_lp1_);
+
+    parallel_nd(rnn.mb, [&](int i) {
+        for (int j = 0; j < rnn.dic; ++j) {
+            const float dH = diff_states_t_lp1(rnn.n_states, i, j)
+                    + diff_states_tp1_l(0, i, j);
+            auto g = ws_gates(i, 0, j);
+            ws_gates(i, 0, j) = activation_func(dH, g, 0, 0);
+        }
+    });
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_rnn_pd.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cpu_rnn_pd.hpp
index 3b9317ac9..12b95c8dd 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/cpu_rnn_pd.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/cpu_rnn_pd.hpp
@@ -18,13 +18,14 @@
 #define CPU_RNN_PD_HPP
 
 #include "c_types_map.hpp"
-#include "cpu_engine.hpp"
-#include "cpu_memory.hpp"
-#include "cpu_primitive.hpp"
+#include "../cpu_engine.hpp"
+#include "../cpu_memory.hpp"
+#include "../cpu_primitive.hpp"
 #include "nstl.hpp"
 #include "rnn_pd.hpp"
 #include "type_helpers.hpp"
 #include "utils.hpp"
+#include "rnn_utils.hpp"
 
 namespace mkldnn {
 namespace impl {
@@ -87,10 +88,6 @@ protected:
         using namespace memory_format;
         if (src_layer_pd_.desc()->format == any)
             CHECK(src_layer_pd_.set_format(tnc));
-        if (weights_layer_pd_.desc()->format == any)
-            CHECK(weights_layer_pd_.set_format(ldigo));
-        if (weights_iter_pd_.desc()->format == any)
-            CHECK(weights_iter_pd_.set_format(ldigo));
         if (dst_layer_pd_.desc()->format == any)
             CHECK(dst_layer_pd_.set_format(tnc));
 
@@ -104,14 +101,51 @@ protected:
 
         return status::success;
     }
+
+    status_t check_layout_consistency() {
+        using namespace memory_format;
+        using namespace utils;
+        using namespace data_type;
+        bool ok = true;
+        ok = ok && src_layer_pd_.desc()->format == tnc
+                && dst_layer_pd_.desc()->format == tnc;
+        ok = ok && IMPLICATION(!src_iter_pd_.is_zero(),
+                           src_iter_pd_.desc()->format == ldsnc)
+                && IMPLICATION(!dst_iter_pd_.is_zero(),
+                           dst_iter_pd_.desc()->format == ldsnc);
+
+        ok = ok && one_of(weights_layer_pd_.desc()->format, ldigo, rnn_packed)
+                && one_of(weights_iter_pd_.desc()->format, ldigo, rnn_packed);
+        ok = ok && IMPLICATION(weights_iter_pd_.desc()->format == rnn_packed,
+                           weights_iter_pd_.desc()
+                                           ->layout_desc.rnn_packed_desc.format
+                                   == mkldnn_ldigo_p);
+        ok = ok && IMPLICATION(weights_layer_pd_.desc()->format == rnn_packed,
+                           weights_layer_pd_.desc()
+                                           ->layout_desc.rnn_packed_desc.format
+                                   == mkldnn_ldigo_p);
+
+        ok = ok && IMPLICATION(!bias_pd_.is_zero(),
+                           bias_pd_.desc()->format == ldgo);
+
+        /* Int8 is supported only for packed weights */
+        data_type_t weights_iter_dt = weights_iter_pd_.desc()->data_type;
+        data_type_t weights_layer_dt = weights_layer_pd_.desc()->data_type;
+        ok = ok && IMPLICATION(weights_iter_dt == s8,
+                           weights_iter_pd_.desc()->format == rnn_packed);
+        ok = ok && IMPLICATION(weights_layer_dt == s8,
+                           weights_layer_pd_.desc()->format == rnn_packed);
+
+        return ok ? status::success : status::unimplemented;
+    }
 };
 
 struct cpu_rnn_bwd_pd_t : public rnn_bwd_pd_t {
     using cpu_memory_pd_t = cpu_memory_t::pd_t;
 
     cpu_rnn_bwd_pd_t(engine_t *engine, const rnn_desc_t *adesc,
-            const primitive_attr_t *attr, const rnn_bwd_pd_t *hint_bwd_pd)
-        : rnn_bwd_pd_t(engine, adesc, attr, hint_bwd_pd)
+            const primitive_attr_t *attr, const rnn_fwd_pd_t *hint_fwd_pd)
+        : rnn_bwd_pd_t(engine, adesc, attr, hint_fwd_pd)
         , src_layer_pd_(engine, &desc_.src_layer_desc)
         , src_iter_pd_(engine, &desc_.src_iter_desc)
         , weights_layer_pd_(engine, &desc_.weights_layer_desc)
@@ -203,14 +237,22 @@ protected:
             CHECK(src_layer_pd_.set_format(tnc));
         if (diff_src_layer_pd_.desc()->format == any)
             CHECK(diff_src_layer_pd_.set_format(tnc));
-        if (weights_layer_pd_.desc()->format == any)
-            CHECK(weights_layer_pd_.set_format(ldgoi));
-        if (diff_weights_layer_pd_.desc()->format == any)
-            CHECK(diff_weights_layer_pd_.set_format(ldigo));
-        if (weights_iter_pd_.desc()->format == any)
-            CHECK(weights_iter_pd_.set_format(ldgoi));
-        if (diff_weights_iter_pd_.desc()->format == any)
-            CHECK(diff_weights_iter_pd_.set_format(ldigo));
+        if (diff_weights_layer_pd_.desc()->format == any) {
+            memory_desc_t md = *(diff_weights_layer_pd_.desc());
+            md.format = ldigo;
+            CHECK(memory_desc_wrapper::compute_blocking(md));
+            CHECK(rnn_utils::set_good_strides(md));
+            cpu_memory_t::pd_t new_pd(engine_, &md);
+            diff_weights_layer_pd_ = new_pd;
+        }
+        if (diff_weights_iter_pd_.desc()->format == any) {
+            memory_desc_t md = *(diff_weights_iter_pd_.desc());
+            md.format = ldigo;
+            CHECK(memory_desc_wrapper::compute_blocking(md));
+            CHECK(rnn_utils::set_good_strides(md));
+            cpu_memory_t::pd_t new_pd(engine_, &md);
+            diff_weights_iter_pd_ = new_pd;
+        }
         if (dst_layer_pd_.desc()->format == any)
             CHECK(dst_layer_pd_.set_format(tnc));
         if (diff_dst_layer_pd_.desc()->format == any)
@@ -234,6 +276,45 @@ protected:
 
         return status::success;
     }
+
+    status_t check_layout_consistency() {
+        using namespace memory_format;
+        using namespace utils;
+        bool ok = true;
+        ok = ok && src_layer_pd_.desc()->format == tnc
+                && dst_layer_pd_.desc()->format == tnc;
+        ok = ok && IMPLICATION(!src_iter_pd_.is_zero(),
+                           src_iter_pd_.desc()->format == ldsnc)
+                && IMPLICATION(!dst_iter_pd_.is_zero(),
+                           dst_iter_pd_.desc()->format == ldsnc);
+
+        ok = ok && one_of(weights_layer_pd_.desc()->format, ldgoi, rnn_packed)
+                && one_of(weights_iter_pd_.desc()->format, ldgoi, rnn_packed);
+        ok = ok && IMPLICATION(weights_iter_pd_.desc()->format == rnn_packed,
+                           weights_iter_pd_.desc()
+                                           ->layout_desc.rnn_packed_desc.format
+                                   == mkldnn_ldgoi_p);
+        ok = ok && IMPLICATION(weights_layer_pd_.desc()->format == rnn_packed,
+                           weights_layer_pd_.desc()
+                                           ->layout_desc.rnn_packed_desc.format
+                                   == mkldnn_ldgoi_p);
+
+        ok = ok && IMPLICATION(!bias_pd_.is_zero(),
+                           bias_pd_.desc()->format == ldgo);
+
+        ok = ok && diff_src_layer_pd_.desc()->format == tnc
+                && diff_dst_layer_pd_.desc()->format == tnc;
+        ok = ok && IMPLICATION(!diff_states_pd_.is_zero(),
+                           diff_states_pd_.desc()->format == ldsnc)
+                && IMPLICATION(!diff_dst_iter_pd_.is_zero(),
+                           diff_dst_iter_pd_.desc()->format == ldsnc);
+        ok = ok && diff_weights_layer_pd_.desc()->format == ldigo
+                && diff_weights_iter_pd_.desc()->format == ldigo;
+        ok = ok && IMPLICATION(!diff_bias_pd_.is_zero(),
+                           diff_bias_pd_.desc()->format == ldgo);
+
+        return ok ? status::success : status::unimplemented;
+    }
 };
 }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/jit_uni_rnn_postgemm.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/jit_uni_rnn_postgemm.hpp
new file mode 100644
index 000000000..048264c8f
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/jit_uni_rnn_postgemm.hpp
@@ -0,0 +1,424 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+ * Cell execution LSTM
+ */
+
+#include "rnn_utils.hpp"
+#include "../jit_generator.hpp"
+#include "../jit_uni_eltwise.hpp"
+#include "c_types_map.hpp"
+#include "utils.hpp"
+
+#include "mkldnn_thread.hpp"
+
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+struct jit_uni_rnn_postgemm_kernel : public jit_generator {
+
+    typedef void (*kernel_t)(void *gates_, const void *bias, void *states_t_l_,
+                     void *c_states_t_l_, void *c_states_tm1_l_);
+
+    jit_uni_rnn_postgemm_kernel(const rnn_utils::rnn_conf_t &rnn, const primitive_attr_t *attr): rnn_(rnn), attr_(attr){}
+
+    virtual void init() = 0;
+
+template <typename src_data_t, typename acc_data_t>
+    rnn_elemwise_sig(execute) {
+        rnn_utils::ws_gates_aoc<acc_data_t> ws_gates(rnn, ws_gates_);
+        rnn_utils::bias_aoc_t bias(rnn, bias_);
+        rnn_utils::ws_states_aoc<src_data_t> states_t_l(rnn, states_t_l_);
+        rnn_utils::ws_states_aoc_t c_states_t_l(rnn, c_states_t_l_);
+        rnn_utils::ws_states_aoc_t c_states_tm1_l(rnn, c_states_tm1_l_);
+
+        // Todo: add parallelization on dic for the batch 1 case
+        // Assumption: the kernel runs a loop on dic elements
+        parallel_nd(rnn.mb, [&](int i) {
+                auto b_ = &bias(0, 0);
+                auto g_ = &ws_gates(i, 0, 0);
+                auto s_tl_ = &states_t_l(i, 0);
+                auto c_tl_ = &c_states_t_l(i, 0);
+                auto c_tm1l_ = &c_states_tm1_l(i, 0);
+                kernel_(g_, b_, s_tl_, c_tm1l_, c_tl_);
+            });
+    }
+
+protected:
+    kernel_t kernel_;
+    const rnn_utils::rnn_conf_t &rnn_;
+    const primitive_attr_t *attr_;
+};
+
+template <cpu_isa_t isa, impl::data_type_t src_data_t>
+struct jit_uni_lstm_postgemm_kernel_fwd: public jit_uni_rnn_postgemm_kernel
+{
+    DECLARE_CPU_JIT_AUX_FUNCTIONS(jit_uni_lstm_postgemm_kernel_fwd)
+
+    typedef typename utils::conditional<src_data_t == data_type::u8, int32_t,
+            float>::type acc_data_t;
+    typedef typename utils::conditional<isa == avx512_core,
+            jit_uni_eltwise_injector_f32<avx512_common>,
+            jit_uni_eltwise_injector_f32<isa>>::type injector_t;
+
+    jit_uni_lstm_postgemm_kernel_fwd(const rnn_utils::rnn_conf_t &rnn, const primitive_attr_t *attr)
+    : jit_uni_rnn_postgemm_kernel(rnn, attr){}
+
+    void init() override {
+        // we use rax for both constant tables as they use the same table
+        sigmoid_injector_ = new injector_t(this,
+                alg_kind::eltwise_logistic, 0.0f, 0.0f, true, rax);
+        tanh_injector_ = new injector_t(this,
+                alg_kind::eltwise_tanh, 0.0f, 0.0f, true, rax);
+        generate();
+        kernel_ = (kernel_t) this->getCode();
+    }
+
+protected:
+    injector_t *sigmoid_injector_;
+    injector_t *tanh_injector_;
+
+    // register size in bytes
+    using Vmm = typename jit_uni_eltwise_injector_f32<isa>::Vmm;
+    size_t vlen = cpu_isa_traits<isa>::vlen;
+    size_t vlen_dst = (src_data_t == data_type::u8) ? vlen/4 : vlen;
+    size_t cstate_dt_size = sizeof(float);
+    size_t hstate_dt_size = (src_data_t == data_type::u8) ? sizeof(uint8_t) : sizeof(float);
+    size_t gate_dt_size = (src_data_t == data_type::u8) ? sizeof(uint32_t) : sizeof(float);
+    size_t qscale_dt_size = sizeof(float);
+    size_t bias_dt_size = sizeof(float);
+
+    void generate() {
+        using namespace Xbyak;
+
+        int mask = attr_->rnn_weights_qparams_.mask_;
+        float *weights_scales = attr_->rnn_weights_qparams_.scales_;
+        float data_scale = attr_->rnn_data_qparams_.scale_;
+        float data_shift = attr_->rnn_data_qparams_.shift_;
+        round_mode_t rmode = attr_->round_mode_;
+
+        // Labels declaration
+        Label vector_loop_start_label, vector_loop_end_label;
+        Label rem_loop_start_label, rem_loop_end_label;
+        Label table_label;
+
+        // Register map
+        Reg64 loop_cnt(r11);  // loop counter
+        Reg64 table_reg(rbx); // table is used for data scale and shifts
+        Reg64 tmp_reg(r12);   // used as temporary to customize mxcsr
+        Reg64 weights_scales_reg(r13);
+        // We skip vmm0 as it can be used by the injector for masks on sse4.2
+        Vmm G0(1), G1(2), G2(3), G3(4), tmp1_vmm(5), tmp2_vmm(6), zero_vmm(7);
+
+        // stack map
+        Address saved_csr_addr = ptr[rsp];
+        Address modified_csr_addr = ptr[rsp + sizeof(int64_t)];
+        size_t stack_size = 2 * sizeof(int64_t);
+
+        // constant table map
+        Address dscale_off_addr = ptr[table_reg];
+        Address dshift_off_addr = ptr[table_reg + vlen];
+        Address ymm_perm_mask_addr = ptr[table_reg + 2*vlen];
+        Address zmm_perm_mask_addr = ptr[table_reg + 2*vlen + cpu_isa_traits<avx>::vlen];
+
+        // quantize from float to u8
+        auto q_d = [&](Vmm f, Vmm tmp_vmm, Reg64 tmp_reg) {
+            sub(rsp, stack_size);
+            stmxcsr(saved_csr_addr); // save the mxcsr
+
+            // set the rounding mode appropriatly
+            mov(tmp_reg, saved_csr_addr);
+            and_(tmp_reg, 0xffff9fff); // clear rc bits (rc = RNE)
+            if (rmode == round_mode::down)
+                or_(tmp_reg, 0x00002000); // set rc=01 if RD
+            mov(modified_csr_addr, tmp_reg);
+            ldmxcsr(modified_csr_addr);
+
+            uni_vpxor(tmp_vmm, tmp_vmm, tmp_vmm);
+            uni_vmulps(f, f, dscale_off_addr); // apply scale
+            uni_vaddps(f, f, dshift_off_addr); // apply shift
+            uni_vcvtps2dq(f, f); // convert to int32 with mxcsr rounding
+            uni_vpackssdw(f, f, tmp_vmm); // convert from s32 to s16
+            uni_vpackuswb(f, f, tmp_vmm); // convert from s16 to u8 with saturation
+            // Note that the results are interleaved by 128 bit chunks, so we need to merge them together
+            switch (vlen) {
+            case 64:  { //avx512
+                Zmm fz(f.getIdx()), tmpz(tmp_vmm.getIdx());
+                uni_vmovups(tmpz, zmm_perm_mask_addr);
+                vpermd(fz, tmpz, fz);
+                break; }
+            case 32: { //avx
+                Ymm fy(f.getIdx()), tmpy(tmp_vmm.getIdx());
+                uni_vmovups(tmpy, ymm_perm_mask_addr);
+                vpermd(fy, tmpy, fy);
+                break; }
+            case 16: // sse: nothing to do
+                break;
+            default: assert(!"Unsupported case");
+            };
+
+            ldmxcsr(saved_csr_addr); // restore the original mxcsr
+            add(rsp, stack_size);
+        };
+
+        auto fast_recip =[&](Vmm s, Vmm tmp, bool packed) {
+            if (packed)
+                uni_vrcpps(tmp, s);
+            else
+                uni_vrcpss(tmp, s); // prevent divide by zero
+            // we add one Newton iteration
+            uni_vmulps(s, s, tmp);
+            uni_vmulps(s, s, tmp); // s <- s * tmp^2
+            uni_vaddps(tmp, tmp, tmp);
+            uni_vsubps(tmp, tmp, s);
+            uni_vmovups(s, tmp); // s <- 2 * tmp - s * tmp^2
+        };
+
+        // dequantize from s32 to float
+        auto deq_w = [&](Vmm s, Vmm tmp1, Vmm tmp2, int gate, bool packed) {
+            // TODO: if mask is 0 precompute mul and inverse
+            if (mask == 0)
+                uni_vbroadcastss(tmp1, ptr[weights_scales_reg]);
+            else
+                uni_vmovups(tmp1, ptr[weights_scales_reg + gate * rnn_.dic * qscale_dt_size]);
+            uni_vcvtdq2ps(s, s);
+            uni_vmulps(tmp1, tmp1, dscale_off_addr);
+            fast_recip(tmp1, tmp2, packed);
+            uni_vmulps(s, s, tmp1);
+        };
+
+        // We start code generations here
+        preamble();
+
+        // extract addresses passed as parameter
+#ifdef _WIN32
+        auto addr_ws_gates_reg = abi_param1;
+        auto addr_bias_reg = abi_param2;
+        auto addr_states_t_l_reg = abi_param3;
+        auto addr_c_states_tm1_l_reg = abi_param4;
+        auto addr_c_states_t_l_reg = r10;
+        // Here we cannot use rbp to have initial stack pointer so we
+        // use rsp and offset it with the size of pushed registers in
+        // preamble
+        mov(addr_c_states_t_l_reg, ptr[rsp + get_size_of_abi_save_regs() + 40]);
+#else
+        auto addr_ws_gates_reg = abi_param1;
+        auto addr_bias_reg = abi_param2;
+        auto addr_states_t_l_reg = abi_param3;
+        auto addr_c_states_tm1_l_reg = abi_param4;
+        auto addr_c_states_t_l_reg = abi_param5;
+#endif
+
+        // initialize registers with addresses and constants
+        mov(table_reg, table_label);
+        mov(weights_scales_reg, size_t(weights_scales));
+        // both sigmoid and tanh use the same table so load address just once in rax
+        sigmoid_injector_->load_table_addr();
+
+        mov(loop_cnt, rnn_.dic * gate_dt_size);
+        cmp(loop_cnt, vlen);
+        jl(vector_loop_end_label, Xbyak::CodeGenerator::T_NEAR);
+
+        L(vector_loop_start_label);
+        {
+            // load G0 G1 G2 G3
+            uni_vmovups(G0, ptr[addr_ws_gates_reg + 0 * rnn_.dic * gate_dt_size]);
+            uni_vmovups(G1, ptr[addr_ws_gates_reg + 1 * rnn_.dic * gate_dt_size]);
+            uni_vmovups(G2, ptr[addr_ws_gates_reg + 2 * rnn_.dic * gate_dt_size]);
+            uni_vmovups(G3, ptr[addr_ws_gates_reg + 3 * rnn_.dic * gate_dt_size]);
+
+            // dequantize the gates from s32 to f32 if needed
+            if (src_data_t == data_type::u8){
+                deq_w(G0, tmp1_vmm, tmp2_vmm, 0, true);
+                deq_w(G1, tmp1_vmm, tmp2_vmm, 1, true);
+                deq_w(G2, tmp1_vmm, tmp2_vmm, 2, true);
+                deq_w(G3, tmp1_vmm, tmp2_vmm, 3, true);
+            }
+
+            // add biases
+            uni_vaddps(G0, G0, ptr[addr_bias_reg + 0 * rnn_.dic * bias_dt_size]);
+            uni_vaddps(G1, G1, ptr[addr_bias_reg + 1 * rnn_.dic * bias_dt_size]);
+            uni_vaddps(G2, G2, ptr[addr_bias_reg + 2 * rnn_.dic * bias_dt_size]);
+            uni_vaddps(G3, G3, ptr[addr_bias_reg + 3 * rnn_.dic * bias_dt_size]);
+
+            // inject eltwise code
+            sigmoid_injector_->compute_vector(G0.getIdx());
+            sigmoid_injector_->compute_vector(G1.getIdx());
+            tanh_injector_->compute_vector(G2.getIdx());
+            sigmoid_injector_->compute_vector(G3.getIdx());
+
+            // compute c_states_t_l = G1 * c_tm1_l + G0 * G2
+            uni_vmovups(tmp1_vmm, ptr[addr_c_states_tm1_l_reg]);
+            uni_vmulps(tmp1_vmm, tmp1_vmm, G1);
+            uni_vfmadd231ps(tmp1_vmm, G0, G2);
+            uni_vmovups(ptr[addr_c_states_t_l_reg], tmp1_vmm);
+
+            // states_t_l = G3 * tanh(c_states_t_l)
+            tanh_injector_->compute_vector(tmp1_vmm.getIdx());
+            uni_vmulps(tmp1_vmm, tmp1_vmm, G3);
+
+            // if int8, we quantize the resulting state
+            if (src_data_t == data_type::u8) {
+                q_d(tmp1_vmm, tmp2_vmm, tmp_reg);
+            }
+
+            // write back the result
+            if(vlen_dst == vlen)
+                uni_vmovups(ptr[addr_states_t_l_reg], tmp1_vmm);
+            else
+                // we write only 1/4 of the register
+                switch(vlen_dst){
+                case 16: uni_vmovups(ptr[addr_states_t_l_reg], Xmm(tmp1_vmm.getIdx())); break;
+                case 8: uni_vmovsd(ptr[addr_states_t_l_reg], Xmm(tmp1_vmm.getIdx())); break;
+                case 4: uni_vmovss(ptr[addr_states_t_l_reg], Xmm(tmp1_vmm.getIdx())); break;
+                default:
+                    assert(!"Unsuported vector length for quantization");
+                }
+
+            // increment address pointers
+            add(addr_ws_gates_reg, vlen);
+            add(addr_bias_reg, vlen);
+            add(addr_states_t_l_reg, vlen_dst);
+            add(addr_c_states_tm1_l_reg, vlen);
+            add(addr_c_states_t_l_reg, vlen);
+            if (mask != 0)
+                add(weights_scales_reg, vlen);
+
+            // increment loop counter
+            sub(loop_cnt, vlen);
+            cmp(loop_cnt, vlen);
+            jge(vector_loop_start_label);
+        }
+        L(vector_loop_end_label);
+
+        cmp(loop_cnt, 0);
+        je(rem_loop_end_label, Xbyak::CodeGenerator::T_NEAR);
+        // Same code as above, we just use movuss for accessing inputs
+        // TODO: smarter handling of tails with Zmm -> Ymm -> Xmm -> scalar
+        L(rem_loop_start_label);
+        {
+            // remaping registers to Xmms
+            Xmm G0s(G0.getIdx()), G1s(G1.getIdx()), G2s(G2.getIdx()), G3s(G3.getIdx());
+            Xmm tmp1s_vmm(tmp1_vmm.getIdx());
+
+            // load G0 G1 G2 G3
+            uni_vmovss(G0s, ptr[addr_ws_gates_reg + 0 * rnn_.dic * gate_dt_size]);
+            uni_vmovss(G1s, ptr[addr_ws_gates_reg + 1 * rnn_.dic * gate_dt_size]);
+            uni_vmovss(G2s, ptr[addr_ws_gates_reg + 2 * rnn_.dic * gate_dt_size]);
+            uni_vmovss(G3s, ptr[addr_ws_gates_reg + 3 * rnn_.dic * gate_dt_size]);
+
+            // dequantize the gates from s32 to f32 if needed
+            if (src_data_t == data_type::u8){
+                deq_w(G0, tmp1_vmm, tmp2_vmm, 0, false);
+                deq_w(G1, tmp1_vmm, tmp2_vmm, 1, false);
+                deq_w(G2, tmp1_vmm, tmp2_vmm, 2, false);
+                deq_w(G3, tmp1_vmm, tmp2_vmm, 3, false);
+            }
+
+            // add biases
+            uni_vmovss(tmp1s_vmm, ptr[addr_bias_reg + 0 * rnn_.dic * bias_dt_size]);
+            uni_vaddps(G0s, G0s, tmp1s_vmm);
+            uni_vmovss(tmp1s_vmm, ptr[addr_bias_reg + 1 * rnn_.dic * bias_dt_size]);
+            uni_vaddps(G1s, G1s, tmp1s_vmm);
+            uni_vmovss(tmp1s_vmm, ptr[addr_bias_reg + 2 * rnn_.dic * bias_dt_size]);
+            uni_vaddps(G2s, G2s, tmp1s_vmm);
+            uni_vmovss(tmp1s_vmm, ptr[addr_bias_reg + 3 * rnn_.dic * bias_dt_size]);
+            uni_vaddps(G3s, G3s, tmp1s_vmm);
+
+            // inject eltwise code
+            sigmoid_injector_->compute_vector(G0s.getIdx());
+            sigmoid_injector_->compute_vector(G1s.getIdx());
+            tanh_injector_->compute_vector(G2s.getIdx());
+            sigmoid_injector_->compute_vector(G3s.getIdx());
+
+            // compute c_states_t_l = G1 * c_tm1_l + G0s * G2
+            uni_vmovups(tmp1s_vmm, ptr[addr_c_states_tm1_l_reg]);
+            uni_vmulps(tmp1s_vmm, tmp1s_vmm, G1s);
+            uni_vfmadd231ps(tmp1s_vmm, G0s, G2s);
+            uni_vmovss(ptr[addr_c_states_t_l_reg], tmp1s_vmm);
+
+            // states_t_l = G3 * tanh(c_states_t_l)
+            tanh_injector_->compute_vector(tmp1s_vmm.getIdx());
+            uni_vmulps(tmp1s_vmm, tmp1s_vmm, G3s);
+
+            // if int8, we quantize the resulting state
+            if (src_data_t == data_type::u8) {
+                q_d(tmp1_vmm, tmp2_vmm, tmp_reg);
+            }
+
+            // write back the result
+            if(vlen_dst == vlen)
+                uni_vmovups(ptr[addr_states_t_l_reg], tmp1s_vmm);
+            else
+                // we write only 1/4 of the register
+                switch(vlen_dst){
+                case 16: uni_vmovups(ptr[addr_states_t_l_reg], Xmm(tmp1s_vmm.getIdx())); break;
+                case 8: uni_vmovsd(ptr[addr_states_t_l_reg], Xmm(tmp1s_vmm.getIdx())); break;
+                case 4: uni_vmovss(ptr[addr_states_t_l_reg], Xmm(tmp1s_vmm.getIdx())); break;
+                default:
+                    assert(!"Unsuported vector length for quantization");
+                }
+
+            // increment address pointers
+            add(addr_ws_gates_reg, gate_dt_size);
+            add(addr_bias_reg, bias_dt_size);
+            add(addr_states_t_l_reg, hstate_dt_size);
+            add(addr_c_states_tm1_l_reg, cstate_dt_size);
+            add(addr_c_states_t_l_reg, cstate_dt_size);
+            if (mask != 0)
+                add(weights_scales_reg, qscale_dt_size);
+
+            // increment loop counter
+            sub(loop_cnt, gate_dt_size);
+            cmp(loop_cnt, 0);
+            jg(rem_loop_start_label);
+
+        }
+        L(rem_loop_end_label);
+
+        postamble();
+
+        // Again, only one table is needed and shared between sigmoid and tanh
+        sigmoid_injector_->prepare_table(false);
+        tanh_injector_->prepare_table(true);
+
+        L(table_label);
+        {
+            for (size_t i = 0; i < vlen / sizeof(float); i++) dd(float2int(data_scale));
+            for (size_t i = 0; i < vlen / sizeof(float); i++) dd(float2int(data_shift));
+            // perm mask for ymm
+            dd(0); dd(4); dd(2); dd(3); dd(1); dd(5); dd(6); dd(7);
+            // perm mask for zmm
+            dd(0); dd(4); dd(8); dd(12); dd(1); dd(5); dd(6); dd(7);
+            dd(2); dd(9); dd(10); dd(11); dd(3); dd(12); dd(13); dd(14);
+        }
+    }
+
+};
+
+template struct jit_uni_lstm_postgemm_kernel_fwd<sse42, data_type::f32>;
+template struct jit_uni_lstm_postgemm_kernel_fwd<avx2, data_type::f32>;
+template struct jit_uni_lstm_postgemm_kernel_fwd<avx512_core, data_type::f32>;
+
+template struct jit_uni_lstm_postgemm_kernel_fwd<sse42, data_type::u8>;
+template struct jit_uni_lstm_postgemm_kernel_fwd<avx2, data_type::u8>;
+template struct jit_uni_lstm_postgemm_kernel_fwd<avx512_core, data_type::u8>;
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/ref_rnn.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/ref_rnn.cpp
new file mode 100644
index 000000000..1e887137c
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/ref_rnn.cpp
@@ -0,0 +1,807 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+/*
+  General architecture
+
+  for diff states, we have n_states + 1 as we have n_states diff
+  to propagate to the previous iteration and 1 states to propagate
+  to the previous layer
+  index 0 is dh for cell(t-1, l) to consume
+  index 1 is dc for cell(t-1, l) to consume
+  index 2 is dh for cell(t, l-1) to consume
+  this indexing enables to have the same indexing for states in elemwise
+  function
+  only the cell execution function should be impacted
+
+ */
+
+#include "math_utils.hpp"
+#include "mkldnn_thread.hpp"
+
+#include "ref_rnn.hpp"
+#include "../gemm/gemm.hpp"
+#include "../simple_q10n.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::utils;
+using namespace mkldnn::impl::memory_tracking::names;
+using namespace rnn_utils;
+#define AOC array_offset_calculator
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+void _ref_rnn_common_t<aprop, src_type, weights_type>::gates_reduction(
+        const rnn_conf_t &rnn, const acc_data_t *ws_gates_,
+        float *diff_bias_) const {
+    auto body = [&](int i, int k) {
+        for (int j = 0; j < rnn.mb; j++)
+            diff_bias_[i * rnn.dic + k]
+                    += ws_gates_[j * rnn.gates_ws_ld + i * rnn.dic + k];
+    };
+
+    // @todo block k on simd-width
+#if MKLDNN_THR == MKLDNN_THR_OMP && _OPENMP >= 201307 \
+    /* icc 17.0 has a problem with simd collapse */ \
+    && !((defined __INTEL_COMPILER) && (__INTEL_COMPILER == 1700))
+#pragma omp parallel for simd collapse(2)
+    for (int i = 0; i < rnn.n_gates; i++)
+        for (int k = 0; k < rnn.dic; k++)
+            body(i, k);
+#else
+    parallel_nd(rnn.n_gates, rnn.dic, body);
+#endif
+}
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+rnn_gemm_sig((_ref_rnn_common_t<aprop, src_type, weights_type>::gemm)) {
+    assert(ldA * ldB * ldC != 0);
+    extended_sgemm(&transA, &transB, &m, &n, &k, &alpha, a_, &ldA, b_, &ldB,
+            &beta, c_, &ldC, nullptr, pd()->rnn_.use_jit_gemm);
+}
+
+template <>
+rnn_gemm_sig((ref_rnn_fwd_u8s8_t::gemm)) {
+    assert(!"non packed gemm is disabled for int8");
+}
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+rnn_gemm_sig((_ref_rnn_common_t<aprop, src_type, weights_type>::packed_gemm)) {
+#if (USE_MKL_PACKED_GEMM)
+    assert(transA == 'N');
+    cblas_sgemm_compute(CblasColMajor, CblasPacked,
+            (transB == 'T') ? CblasTrans : CblasNoTrans, m, n, k, a_, ldA, b_,
+            ldB, beta, c_, ldC);
+#else
+    UNUSED(transA);
+    UNUSED(transB);
+    UNUSED(m);
+    UNUSED(n);
+    UNUSED(k);
+    UNUSED(alpha);
+    UNUSED(ldA);
+    UNUSED(b_);
+    UNUSED(ldB);
+    UNUSED(beta);
+    UNUSED(c_);
+    UNUSED(ldC);
+    assert(!"packed gemm is disabled");
+#endif
+}
+
+template <>
+rnn_gemm_sig((ref_rnn_fwd_u8s8_t::packed_gemm)) {
+#if (USE_MKL_PACKED_GEMM)
+    int8_t offseta = 0, offsetb = 0;
+    int32_t offsetc = 0;
+    cblas_gemm_s8u8s32_compute(CblasColMajor, (CBLAS_TRANSPOSE)CblasPacked,
+            CblasNoTrans, CblasFixOffset, m, n, k, alpha, a_, ldA, offseta, b_,
+            ldB, offsetb, beta, c_, ldC, &offsetc);
+#else
+    UNUSED(transA);
+    UNUSED(transB);
+    UNUSED(m);
+    UNUSED(n);
+    UNUSED(k);
+    UNUSED(alpha);
+    UNUSED(ldA);
+    UNUSED(b_);
+    UNUSED(ldB);
+    UNUSED(beta);
+    UNUSED(c_);
+    UNUSED(ldC);
+    assert(!"packed gemm is disabled");
+#endif
+}
+
+//*************** Grid computations strategy: linear ***************//
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+rnn_grid_execution_sig(
+        (_ref_rnn_common_t<aprop, src_type, weights_type>::linear_execution)) {
+    AOC<src_data_t, 4> ws_states(ws_states_, rnn.n_layer + 1, rnn.n_dir,
+            rnn.n_iter + 1, rnn.states_nld * rnn.states_ws_ld);
+    AOC<float, 4> ws_c_states(ws_c_states_, rnn.n_layer + 1, rnn.n_dir,
+            rnn.n_iter + 1, rnn.states_nld * rnn.states_ws_ld);
+    AOC<float, 5> ws_diff_states(ws_diff_states_, rnn.n_layer + 1, rnn.n_dir,
+            (rnn.n_states + 1), rnn.n_iter + 1,
+            rnn.states_nld * rnn.states_ws_ld);
+    AOC<acc_data_t, 4> ws_gates(ws_gates_, rnn.n_layer, rnn.n_dir, rnn.n_iter,
+            rnn.gates_nld * rnn.gates_ws_ld);
+    AOC<weights_data_t *, 3> weights_input(
+            weights_layer_, rnn.n_layer, rnn.n_dir, rnn.n_parts_weights_layer);
+    AOC<weights_data_t *, 3> weights_states(
+            weights_states_, rnn.n_layer, rnn.n_dir, rnn.n_parts_weights_iter);
+    AOC<float*, 3> bias(
+        bias_, rnn.n_layer, rnn.n_dir, rnn.n_parts_bias);
+    AOC<float, 3> diff_weights_layer(diff_weights_layer_, rnn.n_layer,
+            rnn.n_dir,
+            rnn.diff_weights_layer_nld * rnn.diff_weights_layer_ld);
+    AOC<float, 3> diff_weights_iter(diff_weights_iter_, rnn.n_layer, rnn.n_dir,
+            rnn.diff_weights_iter_nld * rnn.diff_weights_iter_ld);
+    AOC<float, 3> diff_bias(
+            diff_bias_, rnn.n_layer, rnn.n_dir, rnn.n_bias * rnn.dic);
+    AOC<float, 4> ws_grid(
+            ws_grid_, rnn.n_layer, rnn.n_dir, rnn.n_iter, (int)rnn.ws_per_cell);
+
+    // We run the grid of computation
+    for (int dir = 0; dir < rnn.n_dir; dir++) {
+        for (int j = 0; j < rnn.n_layer; j++) {
+            int lay = (aprop == prop_kind::forward) ? j : rnn.n_layer - j - 1;
+
+            if ((aprop == prop_kind::forward) && rnn.merge_gemm_layer) {
+                (this->*gemm_layer_func)('N', 'N', rnn.n_gates * rnn.dic,
+                        rnn.mb * rnn.n_iter, rnn.slc, 1.0,
+                        weights_input(lay, dir, 0), rnn.weights_iter_ld,
+                        &(ws_states(lay, dir, 1, 0)), rnn.states_ws_ld, 0.0,
+                        &(ws_gates(lay, dir, 0, 0)), rnn.gates_ws_ld);
+            }
+
+            for (int i = 0; i < rnn.n_iter; i++) {
+                int iter = (aprop == prop_kind::forward) ? i : rnn.n_iter - i - 1;
+                (this->*cell_func)(rnn,
+                        &(ws_states(lay + 1, dir, iter + 1, 0)),
+                        &(ws_c_states(lay + 1, dir, iter + 1, 0)),
+                        &(ws_diff_states(lay, dir, 0, iter, 0)),
+                        &(weights_input(lay, dir, 0)),
+                        &(weights_states(lay, dir, 0)),
+                        &(bias(lay, dir, 0)),
+                        &(ws_states(lay, dir, iter + 1, 0)),
+                        &(ws_states(lay + 1, dir, iter, 0)),
+                        &(ws_c_states(lay + 1, dir, iter, 0)),
+                        &(ws_diff_states(lay + 1, dir, 0, iter, 0)),
+                        &(ws_diff_states(lay, dir, 0, iter + 1, 0)),
+                        &(diff_weights_layer(lay, dir, 0)),
+                        &(diff_weights_iter(lay, dir, 0)),
+                        &(diff_bias(lay, dir, 0)),
+                        &(ws_gates(lay, dir, iter, 0)),
+                        &(ws_grid(lay, dir, iter, 0)),
+                        ws_cell_);
+            }
+
+            if ((aprop == prop_kind::backward) && rnn.merge_gemm_layer) {
+                (this->*gemm_layer_func)('N', 'N', rnn.slc, rnn.mb * rnn.n_iter,
+                        rnn.n_gates * rnn.dic, 1.0, weights_input(lay, dir, 0),
+                        rnn.weights_layer_ld,
+                        (src_data_t *)(&(ws_gates(lay, dir, 0, 0))),
+                        rnn.gates_ws_ld, 0.0,
+                        (acc_data_t *)(&(ws_diff_states(
+                                lay, dir, rnn.n_states, 0, 0))),
+                        rnn.states_ws_ld);
+                gemm('N', 'T', rnn.n_gates * rnn.dic, rnn.slc,
+                        rnn.mb * rnn.n_iter, 1.0,
+                        (weights_data_t *)(&(ws_gates(lay, dir, 0, 0))),
+                        rnn.gates_ws_ld,
+                        (src_data_t *)(&(ws_states(lay, dir, 1, 0))),
+                        rnn.states_ws_ld, 1.0,
+                        (acc_data_t *)(&(diff_weights_layer(lay, dir, 0))),
+                        rnn.diff_weights_layer_ld);
+            }
+            if ((aprop == prop_kind::backward) && rnn.merge_gemm_iter) {
+                gemm('N', 'T', rnn.n_gates * rnn.dic, rnn.sic,
+                        rnn.mb * rnn.n_iter, 1.0,
+                        (weights_data_t *)(&(ws_gates(lay, dir, 0, 0))),
+                        rnn.gates_ws_ld,
+                        (src_data_t *)(&(ws_states(lay + 1, dir, 0, 0))),
+                        rnn.states_ws_ld, 1.0,
+                        (acc_data_t *)(&(diff_weights_iter(lay, dir, 0))),
+                        rnn.diff_weights_iter_ld);
+            }
+        }
+    }
+}
+
+//********* GRID computations strategy: utility functions **********//
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+void _ref_rnn_common_t<aprop, src_type, weights_type>::copy_init_layer(
+        const rnn_conf_t &rnn, src_data_t *__restrict ws_states_,
+        float *__restrict ws_diff_states_, const src_data_t *__restrict xt_,
+        const float *__restrict diff_dst_layer_) const {
+
+    AOC<src_data_t, 4> ws_states(
+            ws_states_, rnn.n_dir, rnn.n_iter + 1, rnn.mb, rnn.states_ws_ld);
+    auto xt_d = memory_desc_wrapper(pd()->src_pd(0));
+
+    parallel_nd(rnn.n_iter, rnn.mb, [&](int it, int b) {
+        auto xxt = xt_ + xt_d.blk_off(it, b);
+        src_data_t *ws_l2r_ptr = &(ws_states(0, it + 1, b, 0));
+        src_data_t *ws_r2l_ptr = &(ws_states(rnn.n_dir - 1, rnn.n_iter - it, b, 0));
+        if (rnn.exec_dir != r2l)
+            for (int c = 0; c < rnn.slc; c++)
+                ws_l2r_ptr[c] = xxt[c];
+        if (rnn.exec_dir != l2r)
+            for (int c = 0; c < rnn.slc; c++)
+                ws_r2l_ptr[c] = xxt[c];
+    });
+}
+
+template <>
+void ref_rnn_bwd_f32_t::copy_init_layer(const rnn_conf_t &rnn,
+        src_data_t *ws_states_, float *ws_diff_states_, const src_data_t *xt_,
+        const float *diff_dst_layer_) const {
+    AOC<float, 6> ws_diff_states(ws_diff_states_, rnn.n_layer + 1, rnn.n_dir,
+            (rnn.n_states + 1), rnn.n_iter + 1, rnn.mb, rnn.states_ws_ld);
+    auto diff_dst_layer_d = memory_desc_wrapper(pd()->diff_dst_pd(0));
+
+    switch (rnn.exec_dir) {
+    case bi_concat:
+        parallel_nd(rnn.n_iter, rnn.mb, [&](int it, int b) {
+            auto diff_dst_layer_x
+                    = diff_dst_layer_ + diff_dst_layer_d.blk_off(it, b);
+            for (int s = 0; s < rnn.dic; s++) {
+                ws_diff_states(rnn.n_layer, 0, rnn.n_states, it, b, s)
+                        = diff_dst_layer_x[s];
+                ws_diff_states(
+                        rnn.n_layer, 1, rnn.n_states, rnn.n_iter - it - 1, b, s)
+                        = diff_dst_layer_x[rnn.dic + s];
+            }
+        });
+        break;
+    case bi_sum:
+        parallel_nd(rnn.n_iter, rnn.mb, [&](int it, int b) {
+            auto diff_dst_layer_x
+                    = diff_dst_layer_ + diff_dst_layer_d.blk_off(it, b);
+            for (int s = 0; s < rnn.dic; s++) {
+                ws_diff_states(rnn.n_layer, 0, rnn.n_states, it, b, s)
+                        = diff_dst_layer_x[s];
+                ws_diff_states(
+                        rnn.n_layer, 1, rnn.n_states, rnn.n_iter - it - 1, b, s)
+                        = diff_dst_layer_x[s];
+            }
+        });
+        break;
+    case l2r:
+        parallel_nd(rnn.n_iter, rnn.mb, [&](int it, int b) {
+            auto diff_dst_layer_x
+                    = diff_dst_layer_ + diff_dst_layer_d.blk_off(it, b);
+            for (int s = 0; s < rnn.dic; s++) {
+                ws_diff_states(rnn.n_layer, 0, rnn.n_states, it, b, s)
+                        = diff_dst_layer_x[s];
+            }
+        });
+        break;
+    case r2l:
+        parallel_nd(rnn.n_iter, rnn.mb, [&](int it, int b) {
+            auto diff_dst_layer_x = diff_dst_layer_
+                    + diff_dst_layer_d.blk_off(rnn.n_iter - it - 1, b);
+            for (int s = 0; s < rnn.dic; s++) {
+                ws_diff_states(rnn.n_layer, 0, rnn.n_states, it, b, s)
+                        = diff_dst_layer_x[s];
+            }
+        });
+        break;
+    default: assert(!"Unsupported direction"); break;
+    }
+}
+
+/* For int8 configuration, input iteration states may be of types f32 or u8
+ * Internally h_state is always stored in u8 and c_state is always stored in f32
+ * If input states are of type u8 then h state is copied and c state is dequantized
+ * If input states are of type f32 then h state is quantized and c_state is copied
+ * */
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+template <typename input_data_t>
+void _ref_rnn_common_t<aprop, src_type, weights_type>::copy_init_iter(
+        const rnn_conf_t &rnn, src_data_t *__restrict ws_states_,
+        float *__restrict ws_c_states_, float *__restrict ws_diff_states_,
+        const input_data_t *__restrict firstit_states_,
+        const float *__restrict diff_dst_iter_) const {
+    AOC<src_data_t, 5> ws_states(ws_states_, rnn.n_layer + 1, rnn.n_dir,
+            rnn.n_iter + 1, rnn.mb, rnn.states_ws_ld);
+    AOC<float, 5> ws_c_states(ws_c_states_, rnn.n_layer + 1, rnn.n_dir,
+            rnn.n_iter + 1, rnn.mb, rnn.states_ws_ld);
+    float data_shift = pd()->attr()->rnn_data_qparams_.shift_;
+    float data_scale = pd()->attr()->rnn_data_qparams_.scale_;
+    round_mode_t rmode = pd()->attr()->round_mode_;
+
+    const bool quantize
+            = pd()->desc()->src_iter_desc.data_type == data_type::f32
+            && rnn.dt_conf != all_f32;
+    auto maybe_q = [&](input_data_t f) {
+        if (quantize) {
+            float qf = f * data_scale + data_shift;
+            return qz_a1b0<float, src_data_t>()(qf, rmode);
+        } else
+            return (src_data_t)f;
+    };
+
+    const bool dequantize
+            = pd()->desc()->src_iter_desc.data_type == data_type::u8;
+    auto maybe_deq = [&](input_data_t s) {
+        if (dequantize)
+            return (((float)s - data_shift) / data_scale);
+        else
+            return (float)s;
+    };
+    auto firstit_states_d = memory_desc_wrapper(pd()->src_pd(1));
+    if (firstit_states_) {
+        parallel_nd(
+                rnn.n_layer, rnn.n_dir, rnn.mb, [&](int lay, int dir, int b) {
+                    for (int s = 0; s < rnn.sic; s++)
+                        ws_states(lay + 1, dir, 0, b, s) = maybe_q(
+                                firstit_states_[firstit_states_d.blk_off(
+                                        lay, dir, 0, b, s)]);
+                    if (pd()->cell_kind() == alg_kind::vanilla_lstm)
+                        for (int s = 0; s < rnn.sic; s++)
+                            ws_c_states(lay + 1, dir, 0, b, s) = maybe_deq(
+                                    firstit_states_[firstit_states_d.blk_off(
+                                            lay, dir, 1, b, s)]);
+                });
+    } else {
+        parallel_nd(
+                rnn.n_layer, rnn.n_dir, rnn.mb, [&](int lay, int dir, int b) {
+                    for (int j = 0; j < rnn.sic; j++) {
+                        ws_states(lay + 1, dir, 0, b, j) = (src_data_t)0;
+                        ws_c_states(lay + 1, dir, 0, b, j) = 0.0f;
+                    }
+        });
+    }
+}
+
+template <>
+template <typename input_data_t>
+void ref_rnn_bwd_f32_t::copy_init_iter(const rnn_conf_t &rnn,
+        src_data_t *ws_states_, float *ws_c_states_, float *ws_diff_states_,
+        const input_data_t *firstit_states_,
+        const float *diff_dst_iter_) const {
+    AOC<float, 6> ws_diff_states(ws_diff_states_, rnn.n_layer + 1, rnn.n_dir,
+            rnn.n_states + 1, rnn.n_iter + 1, rnn.mb, rnn.states_ws_ld);
+    auto diff_dst_iter_d = memory_desc_wrapper(pd()->diff_dst_pd(1));
+    if (diff_dst_iter_) {
+        parallel_nd(rnn.n_layer, rnn.n_dir, rnn.n_states, rnn.mb,
+                [&](int lay, int dir, int state, int b) {
+                    array_copy(&(ws_diff_states(
+                                       lay, dir, state, rnn.n_iter, b, 0)),
+                            diff_dst_iter_
+                                    + diff_dst_iter_d.blk_off(
+                                              lay, dir, state, b),
+                            rnn.dic);
+                });
+    } else {
+        parallel_nd(rnn.n_layer, rnn.n_dir, rnn.n_states, rnn.mb,
+                [&](int lay, int dir, int state, int i) {
+                    for (int j = 0; j < rnn.dic; j++)
+                        ws_diff_states(lay, dir, state, rnn.n_iter, i, j)
+                                = 0.0f;
+                });
+    }
+}
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+template <typename dst_data_t>
+void _ref_rnn_common_t<aprop, src_type, weights_type>::copy_res_layer(
+        const rnn_conf_t &rnn, dst_data_t *dst_layer_, float *diff_src_layer,
+        const src_data_t *ws_states_, const float *ws_diff_states_) const {
+
+    auto dst_layer_d = memory_desc_wrapper(pd()->dst_pd(0));
+    AOC<const src_data_t, 5> ws_states(ws_states_, rnn.n_layer + 1, rnn.n_dir,
+            rnn.n_iter + 1, rnn.mb, rnn.states_ws_ld);
+    float shift = (pd()->attr()->rnn_data_qparams_.shift_);
+    float scale = (pd()->attr()->rnn_data_qparams_.scale_);
+
+    const bool dequantize
+            = pd()->desc()->dst_layer_desc.data_type == data_type::f32
+            && rnn.dt_conf != all_f32;
+    auto maybe_deq = [&](src_data_t s) {
+        if (dequantize)
+            return (dst_data_t)(((float)s - shift) / scale);
+        else
+            return (dst_data_t)s;
+    };
+    parallel_nd(rnn.n_iter, rnn.mb, [&](int it, int b) {
+        int dir = 0;
+        if (rnn.exec_dir != r2l) {
+            for (int s = 0; s < rnn.dic; s++) {
+                dst_layer_[dst_layer_d.blk_off(it, b, dir * rnn.dic + s)]
+                        = maybe_deq(ws_states(rnn.n_layer, dir, it + 1, b, s));
+            }
+            dir = 1;
+        }
+        if (rnn.exec_dir != l2r) {
+            for (int s = 0; s < rnn.dic; s++)
+                switch (rnn.exec_dir) {
+                case bi_sum:
+                    dst_layer_[dst_layer_d.blk_off(it, b, s)]
+                            += maybe_deq(ws_states(
+                                    rnn.n_layer, dir, rnn.n_iter - it, b, s));
+                    break;
+                default:
+                    dst_layer_[dst_layer_d.blk_off(it, b, dir * rnn.dic + s)]
+                            = maybe_deq(ws_states(
+                                    rnn.n_layer, dir, rnn.n_iter - it, b, s));
+                }
+        }
+    });
+}
+
+template <>
+template <typename dst_data_t>
+void ref_rnn_bwd_f32_t::copy_res_layer(
+        const rnn_conf_t &rnn, dst_data_t *dst_layer_, float *diff_src_layer_,
+        const src_data_t *ws_states_, const float *ws_diff_states_) const {
+    auto diff_src_layer_d = memory_desc_wrapper(pd()->diff_src_pd(0));
+    AOC<const float, 6> ws_diff_states(ws_diff_states_, rnn.n_layer + 1,
+            rnn.n_dir, rnn.n_states + 1, rnn.n_iter + 1, rnn.mb,
+            rnn.states_ws_ld);
+
+    parallel_nd(rnn.n_iter, rnn.mb, [&](int it, int b) {
+        int dir = 0;
+        for (int s = 0; s < rnn.slc; s++) {
+            float *dst_addr = diff_src_layer_
+                    + diff_src_layer_d.blk_off(
+                              (rnn.exec_dir == r2l) ? rnn.n_iter - 1 - it : it,
+                              b, dir * rnn.slc + s);
+            float res = ws_diff_states(0, 0, rnn.n_states, it, b, s);
+            if (rnn.n_dir - 1)
+                res += ws_diff_states(
+                        0, 1, rnn.n_states, rnn.n_iter - 1 - it, b, s);
+            dst_addr[0] = res;
+        }
+    });
+}
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+template <typename output_data_t>
+void _ref_rnn_common_t<aprop, src_type, weights_type>::copy_res_iter(
+        const rnn_conf_t &rnn, output_data_t *dst_iter_, float *diff_src_iter_,
+        const src_data_t *ws_states_, float *ws_c_states_,
+        const float *ws_diff_states_) const {
+    auto dst_iter_d = memory_desc_wrapper(pd()->dst_pd(1));
+    AOC<const src_data_t, 5> ws_states(ws_states_, rnn.n_layer + 1, rnn.n_dir,
+            rnn.n_iter + 1, rnn.mb, rnn.states_ws_ld);
+    AOC<const float, 5> ws_c_states(ws_c_states_, rnn.n_layer + 1, rnn.n_dir,
+            rnn.n_iter + 1, rnn.mb, rnn.states_ws_ld);
+    float data_shift = pd()->attr()->rnn_data_qparams_.shift_;
+    float data_scale = pd()->attr()->rnn_data_qparams_.scale_;
+    round_mode_t rmode = pd()->attr()->round_mode_;
+
+    const bool quantize = pd()->desc()->dst_iter_desc.data_type == data_type::u8
+            && rnn.dt_conf != all_f32;
+    auto maybe_q = [&](float f) {
+        if (quantize) {
+            float qf = f * data_scale + data_shift;
+            return qz_a1b0<float, output_data_t>()(qf, rmode);
+        } else
+            return (output_data_t)f;
+    };
+
+    const bool dequantize
+            = pd()->desc()->dst_iter_desc.data_type == data_type::f32
+            && rnn.dt_conf != all_f32;
+    auto maybe_deq = [&](src_data_t s) {
+        if (dequantize)
+            return (output_data_t)(((float)s - data_shift) / data_scale);
+        else
+            return (output_data_t)s;
+    };
+    if (dst_iter_) {
+        parallel_nd(rnn.n_layer, rnn.n_dir, rnn.mb,
+                [&](int lay, int dir, int b) {
+            for (int s = 0; s < rnn.dic; s++) {
+                dst_iter_[dst_iter_d.blk_off(lay, dir, 0, b, s)]
+                        = maybe_deq(ws_states(lay + 1, dir, rnn.n_iter, b, s));
+            }
+            if (pd()->cell_kind() == alg_kind::vanilla_lstm)
+                    for (int s = 0; s < rnn.dic; s++) {
+                        dst_iter_[dst_iter_d.blk_off(lay, dir, 1, b, s)]
+                                = maybe_q(ws_c_states(
+                                        lay + 1, dir, rnn.n_iter, b, s));
+                    }
+            });
+    }
+}
+
+template <>
+template <typename output_data_t>
+void ref_rnn_bwd_f32_t::copy_res_iter(
+        const rnn_conf_t &rnn, output_data_t *dst_iter_, float *diff_src_iter_,
+        const src_data_t *ws_states_, float *ws_c_states_,
+        const float *ws_diff_states_) const {
+    auto diff_src_iter_d = memory_desc_wrapper(pd()->diff_src_pd(1));
+    AOC<const float, 6> ws_diff_states(ws_diff_states_, rnn.n_layer + 1,
+            rnn.n_dir, rnn.n_states + 1, rnn.n_iter + 1, rnn.mb,
+            rnn.states_ws_ld);
+    if (diff_src_iter_) {
+        parallel_nd(rnn.n_layer, rnn.n_dir, rnn.n_states, rnn.mb,
+                [&](int lay, int dir, int state, int b) {
+                    for (int s = 0; s < rnn.sic; s++) {
+                        diff_src_iter_[diff_src_iter_d.blk_off(
+                                lay, dir, state, b, s)]
+                                = ws_diff_states(lay, dir, state, 0, b, s);
+                    }
+                });
+    }
+}
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+rnn_bias_prepare_sig((_ref_rnn_common_t<aprop, src_type, weights_type>::bias_prepare)) {
+    /* Original set of bias provided by the user */
+    AOC<const float, 5> b(
+            b_, rnn.n_layer, rnn.n_dir, rnn.n_bias * rnn.dic);
+    /* Array of pointers initialized in packing */
+    AOC<float *, 3> bias(bias_, rnn.n_layer, rnn.n_dir, rnn.n_parts_bias);
+    AOC<float, 3> scratch_bias(
+            scratch_bias_, rnn.n_layer, rnn.n_dir, rnn.n_bias * rnn.dic);
+
+    if (rnn.copy_bias) {
+        parallel_nd(rnn.n_layer * rnn.n_dir * rnn.n_bias * rnn.dic,
+                [&](size_t i) { scratch_bias_[i] = b_[i]; });
+    }
+
+    for (int i = 0; i < rnn.n_layer; i++) {
+        for (int d = 0; d < rnn.n_dir; d++) {
+            int offset_bias = 0;
+            for (int p = 0; p < rnn.n_parts_bias; p++) {
+                bias(i, d, p) = rnn.copy_bias
+                        ? (float *) &scratch_bias(i, d, offset_bias)
+                        : (float *) &b(i, d, offset_bias);
+                offset_bias += rnn.parts_bias[p] * rnn.dic;
+            }
+        }
+    }
+
+}
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+rnn_bias_finalize_sig(
+        (_ref_rnn_common_t<aprop, src_type, weights_type>::bias_finalize)) {
+    if (rnn.dt_conf != all_f32) {
+        float data_shift = pd()->attr()->rnn_data_qparams_.shift_;
+        float data_scale = pd()->attr()->rnn_data_qparams_.scale_;
+        float *weights_scales = pd()->attr()->rnn_weights_qparams_.scales_;
+        bool scale_per_oc = pd()->attr()->rnn_weights_qparams_.mask_ != 0;
+        for (int i = 0; i < rnn.n_layer * rnn.n_dir; i++)
+            for (int j = 0; j < rnn.n_bias * rnn.dic; j++) {
+                size_t off = i * rnn.n_bias * rnn.dic + j;
+                float weights_scale
+                        = scale_per_oc ? weights_scales[j] : weights_scales[0];
+                scratch_bias_[off] -= (w_iter_comp[off] + w_layer_comp[off])
+                        * data_shift / (weights_scale * data_scale);
+            }
+    }
+}
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+rnn_weights_assign_sig((_ref_rnn_common_t<aprop, src_type,
+        weights_type>::assign_packed_weights)) {
+    AOC<weights_data_t *, 3> weights(weights_, rnn.n_layer, rnn.n_dir, n_parts);
+
+    size_t offset_packed = 0;
+    for (int l = 0; l < rnn.n_layer; l++)
+        for (int d = 0; d < rnn.n_dir; d++) {
+            for (int p = 0; p < n_parts; p++) {
+                weights(l, d, p) = (weights_data_t *)&w_[offset_packed];
+                offset_packed
+                        += part_weights_pack_size[p] / sizeof(weights_data_t);
+            }
+        }
+}
+
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+rnn_weights_assign_sig(
+        (_ref_rnn_common_t<aprop, src_type, weights_type>::assign_weights)) {
+    assert(nld * ld != 0);
+    /* Original set of weights provided by the user */
+    AOC<const weights_data_t, 3> w(w_, rnn.n_layer, rnn.n_dir, nld * ld);
+    /* Array of pointers for each part of weights */
+    AOC<weights_data_t *, 3> weights(weights_, rnn.n_layer, rnn.n_dir, n_parts);
+
+    for (int i = 0; i < rnn.n_layer; i++)
+        for (int d = 0; d < rnn.n_dir; d++) {
+            size_t offset_weights = 0;
+            for (int p = 0; p < n_parts; p++) {
+                weights(i, d, p) = (weights_data_t *)&w(i, d, offset_weights);
+                offset_weights += fmt == memory_format::ldigo ?
+                        gates_per_part[p] * OC_size :
+                        gates_per_part[p] * OC_size * ld;
+            }
+        }
+}
+
+//********************* Execution function *********************//
+template <prop_kind_t aprop, data_type_t src_type, data_type_t weights_type>
+void _ref_rnn_common_t<aprop, src_type, weights_type>::execute_() const {
+    const rnn_conf_t &rnn = this->pd()->rnn_;
+    int input_idx = 0;
+    int output_idx = 0;
+    auto input = reinterpret_cast<const src_data_t *>(
+            this->input_memory(input_idx++));
+    auto states = pd()->with_src_iter() ? (this->input_memory(input_idx++)) :
+                                          nullptr;
+
+    const char *layer_weights_n_comp = this->input_memory(input_idx++);
+    auto w_layer
+            = reinterpret_cast<const weights_data_t *>(layer_weights_n_comp);
+    auto w_layer_comp = reinterpret_cast<const float *>(layer_weights_n_comp
+            + rnn.weights_layer_comp_offset);
+    const char *iter_weights_n_comp = this->input_memory(input_idx++);
+    auto w_iter
+            = reinterpret_cast<const weights_data_t *>(iter_weights_n_comp);
+    auto w_iter_comp = reinterpret_cast<const float *>(iter_weights_n_comp
+            + rnn.weights_iter_comp_offset);
+    auto bias = pd()->with_bias() ?
+            reinterpret_cast<const float *>(this->input_memory(input_idx++)) :
+            nullptr;
+
+    auto dst_last_layer = rnn.is_fwd ? this->memory(output_idx++) :
+                                       this->input_memory(input_idx++);
+    auto dst_last_iter = pd()->with_dst_iter()
+            ? (rnn.is_fwd
+                ? this->memory(output_idx++)
+                : this->input_memory(input_idx++))
+            : nullptr;
+
+    auto diff_dst_layer = rnn.is_fwd ?
+            nullptr :
+            reinterpret_cast<const float *>(this->input_memory(input_idx++));
+    auto diff_dst_iter = rnn.is_fwd || !pd()->with_dst_iter() ?
+            nullptr :
+            reinterpret_cast<const float *>(this->input_memory(input_idx++));
+
+    auto scratchpad = this->scratchpad();
+
+    auto ptr_wei_layer
+            = scratchpad.template get<weights_data_t *>(key_rnn_ptrs_wei_layer);
+    auto ptr_wei_iter
+            = scratchpad.template get<weights_data_t *>(key_rnn_ptrs_wei_iter);
+    auto ptr_bias =
+        scratchpad.template get<float *>(key_rnn_ptrs_bia);
+
+    // fetchihg buffers from the workspace
+    // if no workspace was provided we use the scratchpad
+    char *scratch_ptr = scratchpad.template get<char>(key_rnn_space);
+    char *ws_ptr = nullptr;
+    if (rnn.use_workspace)
+        ws_ptr = rnn.is_fwd
+            ? this->memory(output_idx++)
+            : const_cast<char *>(this->input_memory(input_idx++));
+    char *base_ptr = rnn.use_workspace ? ws_ptr : scratch_ptr;
+    acc_data_t *ws_gates = (acc_data_t *)(base_ptr + ws_gates_offset_);
+    src_data_t *ws_states = (src_data_t *)(base_ptr + ws_states_offset_);
+    float *ws_c_states = (float *)(base_ptr + ws_c_states_offset_);
+    float *ws_diff_states = (float *)(base_ptr + ws_diff_states_offset_);
+    float *ws_grid = (float *)(base_ptr + ws_grid_comp_offset_);
+    float *ws_cell = (float *)(base_ptr + ws_cell_comp_offset_);
+
+    auto diff_src_layer = rnn.is_fwd ?
+            nullptr :
+            reinterpret_cast<float *>(this->memory(output_idx++));
+    auto diff_src_iter = rnn.is_fwd || !pd()->with_src_iter() ?
+            nullptr :
+            reinterpret_cast<float *>(this->memory(output_idx++));
+    auto diff_weights_layer = rnn.is_fwd ?
+            nullptr :
+            reinterpret_cast<float *>(this->memory(output_idx++));
+    auto diff_weights_iter = rnn.is_fwd ?
+            nullptr :
+            reinterpret_cast<float *>(this->memory(output_idx++));
+    auto diff_bias = rnn.is_fwd || !pd()->with_bias() ?
+            nullptr :
+            reinterpret_cast<float *>(this->memory(output_idx++));
+
+    // Fetching extra buffers from scratchpad
+    float *ws_bias = (float *)(scratch_ptr + ws_bias_offset_);
+
+    // initialize diff_states to 0
+    if (aprop == prop_kind::backward)
+        array_set(ws_diff_states, 0.0f, rnn.ws_diff_states_size / sizeof(float));
+
+    /* Pack(if using packed gemm API) or copy(if input arrays have bad leading
+     * dimension */
+    (this->*bias_preparation_func)(rnn, ptr_bias, bias, ws_bias);
+
+    (this->*weights_iter_assign_func)(rnn, rnn.weights_iter_fmt,
+            rnn.weights_iter_nld, rnn.weights_iter_ld, rnn.dic,
+            rnn.sic, rnn.n_parts_weights_iter, rnn.parts_weights_iter,
+            rnn.part_weights_iter_pack_size, ptr_wei_iter, w_iter,
+            ptr_bias, bias, ws_bias);
+    (this->*weights_layer_assign_func)(rnn, rnn.weights_layer_fmt,
+            rnn.weights_layer_nld, rnn.weights_layer_ld, rnn.dic, rnn.slc,
+            rnn.n_parts_weights_layer, rnn.parts_weights_layer,
+            rnn.part_weights_layer_pack_size, ptr_wei_layer, w_layer, ptr_bias,
+            bias, ws_bias);
+
+    (this->*bias_finalization_func)(rnn, ws_bias, w_iter_comp, w_layer_comp);
+
+    // we first need to copy the initial states and input into ws
+    copy_init_layer(rnn, ws_states, ws_diff_states, input, diff_dst_layer);
+    if (rnn.dt_conf == f32u8f32u8 || rnn.dt_conf == f32u8f32f32
+            || rnn.dt_conf == all_f32)
+        copy_init_iter(rnn, ws_states, ws_c_states, ws_diff_states,
+                (const float *)states, diff_dst_iter);
+    else if (rnn.dt_conf == u8u8u8u8 || rnn.dt_conf == u8u8u8f32)
+        copy_init_iter(rnn, ws_states, ws_c_states, ws_diff_states,
+                (const uint8_t *)states, diff_dst_iter);
+    else
+        assert(!"unimplemented");
+
+    // run the execution on the grid
+    (this->*grid_computation)(rnn, ptr_wei_layer, ptr_wei_iter, ptr_bias,
+            ws_states, ws_c_states, ws_diff_states, ws_gates, ws_cell, ws_grid,
+            diff_weights_layer, diff_weights_iter, diff_bias);
+
+    // Finally we copy the results to the result buffers
+    if (rnn.dt_conf == u8u8u8f32 || rnn.dt_conf == f32u8f32f32
+            || rnn.dt_conf == all_f32)
+        copy_res_layer(rnn, (float *)dst_last_layer, diff_src_layer, ws_states,
+                ws_diff_states);
+    else if (rnn.dt_conf == u8u8u8u8 || rnn.dt_conf == f32u8f32u8)
+        copy_res_layer(rnn, (uint8_t *)dst_last_layer, diff_src_layer,
+                ws_states, ws_diff_states);
+    else
+        assert(!"unimplemented");
+
+    if (rnn.dt_conf == f32u8f32u8 || rnn.dt_conf == f32u8f32f32
+            || rnn.dt_conf == all_f32)
+        copy_res_iter(rnn, (float *)dst_last_iter, diff_src_iter, ws_states,
+                ws_c_states, ws_diff_states);
+    else if (rnn.dt_conf == u8u8u8u8 || rnn.dt_conf == u8u8u8f32)
+        copy_res_iter(rnn, (uint8_t *)dst_last_iter, diff_src_iter, ws_states,
+                ws_c_states, ws_diff_states);
+    else
+        assert(!"unimplemented");
+};
+
+/* Fix for MSVS warning C4661 */
+template<> rnn_cell_execution_sig(ref_rnn_fwd_f32_t::cell_execution);
+template<> rnn_cell_execution_sig(ref_rnn_fwd_u8s8_t::cell_execution);
+template<> rnn_cell_execution_sig(ref_rnn_bwd_f32_t::cell_execution);
+template<> rnn_cell_execution_sig(ref_rnn_fwd_f32_t::cell_execution_gru);
+template<> rnn_cell_execution_sig(ref_rnn_fwd_u8s8_t::cell_execution_gru);
+template<> rnn_cell_execution_sig(ref_rnn_bwd_f32_t::cell_execution_gru);
+template<> rnn_cell_execution_sig(ref_rnn_fwd_f32_t::cell_execution_gru_lbr);
+template<> rnn_cell_execution_sig(ref_rnn_fwd_u8s8_t::cell_execution_gru_lbr);
+template<> rnn_cell_execution_sig(ref_rnn_bwd_f32_t::cell_execution_gru_lbr);
+template<> rnn_elemwise_sig(ref_rnn_fwd_f32_t::rnn_elemwise);
+template<> rnn_elemwise_sig(ref_rnn_fwd_u8s8_t::rnn_elemwise);
+template<> rnn_elemwise_sig(ref_rnn_bwd_f32_t::rnn_elemwise);
+template<> rnn_elemwise_sig(ref_rnn_fwd_f32_t::lstm_elemwise);
+template<> rnn_elemwise_sig(ref_rnn_fwd_u8s8_t::lstm_elemwise);
+template<> rnn_elemwise_sig(ref_rnn_bwd_f32_t::lstm_elemwise);
+template<> rnn_elemwise_sig(ref_rnn_fwd_f32_t::gru_lbr_elemwise);
+template<> rnn_elemwise_sig(ref_rnn_fwd_u8s8_t::gru_lbr_elemwise);
+template<> rnn_elemwise_sig(ref_rnn_bwd_f32_t::gru_lbr_elemwise);
+
+template struct _ref_rnn_common_t<prop_kind::forward, data_type::f32, data_type::f32>;
+template struct _ref_rnn_common_t<prop_kind::forward, data_type::u8, data_type::s8>;
+template struct _ref_rnn_common_t<prop_kind::backward, data_type::f32, data_type::f32>;
+
+#undef AOC
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/ref_rnn.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/ref_rnn.hpp
new file mode 100644
index 000000000..c213b4173
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/ref_rnn.hpp
@@ -0,0 +1,335 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef CPU_REF_RNN_HPP
+#define CPU_REF_RNN_HPP
+
+#include <assert.h>
+
+#include "c_types_map.hpp"
+#include "memory_tracking.hpp"
+#include "type_helpers.hpp"
+#include "utils.hpp"
+
+#include "../cpu_isa_traits.hpp"
+#include "../gemm/os_blas.hpp"
+
+#include "cpu_rnn_pd.hpp"
+#include "rnn_utils.hpp"
+#include "jit_uni_rnn_postgemm.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <alg_kind_t alg_kind, prop_kind_t prop_kind>
+float activation(float s, float alpha, float cliping, float dd);
+
+template <prop_kind_t aprop, impl::data_type_t src_type,
+        impl::data_type_t weights_type>
+struct _ref_rnn_common_t : public cpu_primitive_t {
+    typedef typename prec_traits<src_type>::type src_data_t;
+    typedef typename prec_traits<weights_type>::type weights_data_t;
+    typedef typename utils::conditional<src_type == data_type::u8, int32_t,
+            float>::type acc_data_t;
+
+    using class_name = _ref_rnn_common_t<aprop, src_type, weights_type>;
+
+    typedef rnn_elemwise_sig((class_name::*elemwise_f));
+    typedef rnn_cell_execution_sig((class_name::*cell_execution_f));
+    typedef rnn_grid_execution_sig((class_name::*grid_execution_f));
+
+    typedef rnn_gemm_sig((class_name::*gemm_t));
+    typedef rnn_bias_prepare_sig((class_name::*bias_prepare_t));
+    typedef rnn_bias_finalize_sig((class_name::*bias_finalize_t));
+    typedef rnn_weights_assign_sig((class_name::*weights_assign_t));
+
+    using base_pd_t =
+            typename utils::conditional<false || aprop == prop_kind::forward,
+                    cpu_rnn_fwd_pd_t, cpu_rnn_bwd_pd_t>::type;
+
+    struct pd_t : public base_pd_t {
+        pd_t(engine_t *engine, const rnn_desc_t *adesc,
+                const primitive_attr_t *attr,
+                const typename pd_t::hint_class *hint_pd)
+            : base_pd_t(engine, adesc, attr, hint_pd) {}
+
+        DECLARE_COMMON_PD_T("ref:any", class_name);
+
+        status_t init() {
+            using namespace prop_kind;
+            using namespace utils;
+            using namespace memory_format;
+            using namespace rnn_utils;
+            assert(this->engine()->kind() == engine_kind::cpu);
+            const alg_kind_t cell_kind = this->desc()->cell_desc.cell_kind;
+
+            data_type_t src_layer_dt = this->desc()->src_layer_desc.data_type;
+            data_type_t weights_iter_dt
+                    = this->desc()->weights_iter_desc.data_type;
+            data_type_t weights_layer_dt
+                    = this->desc()->weights_layer_desc.data_type;
+
+            bool ok = true
+                    && one_of(cell_kind, alg_kind::vanilla_rnn,
+                               alg_kind::vanilla_lstm, alg_kind::vanilla_gru,
+                               alg_kind::gru_linear_before_reset)
+                    && IMPLICATION(aprop == prop_kind::forward,
+                               one_of(this->desc()->prop_kind, forward_training,
+                                           forward_inference))
+                    && IMPLICATION(aprop == backward,
+                               one_of(this->desc()->prop_kind, backward))
+                    && src_layer_dt == src_type
+                    && everyone_is(
+                               weights_type, weights_iter_dt, weights_layer_dt)
+                    && this->set_default_params() == status::success
+                    && this->with_bias();
+            if (!ok)
+                return status::unimplemented;
+
+            init_conf(rnn_, *this->desc(), this->src_pd(0), this->src_pd(1),
+                    this->weights_pd(0), this->weights_pd(1), this->dst_pd(0));
+
+            if (rnn_.dt_conf == all_f32)
+                ok = ok && this->attr()->has_default_values();
+
+            // Set weights descriptors to desired format
+            memory_desc_t weights_layer_md = *(this->weights_layer_pd_.desc());
+            CHECK(set_expected_desc(rnn_, weights_layer_md, false));
+            cpu_memory_t::pd_t new_weights_layer_pd(
+                    this->engine_, &weights_layer_md);
+            if (this->weights_layer_pd_.desc()->format == any) {
+                this->weights_layer_pd_ = new_weights_layer_pd;
+            } else if (this->weights_layer_pd_.desc()->format == rnn_packed) {
+                if (!this->weights_layer_pd_.is_equal(&new_weights_layer_pd))
+                    return status::unimplemented;
+            }
+
+            memory_desc_t weights_iter_md = *(this->weights_iter_pd_.desc());
+            CHECK(set_expected_desc(rnn_, weights_iter_md, true));
+            cpu_memory_t::pd_t new_weights_iter_pd(
+                    this->engine_, &weights_iter_md);
+            if (this->weights_iter_pd_.desc()->format == any) {
+                this->weights_iter_pd_ = new_weights_iter_pd;
+            } else if (this->weights_iter_pd_.desc()->format == rnn_packed) {
+                if (!this->weights_iter_pd_.is_equal(&new_weights_iter_pd))
+                    return status::unimplemented;
+            }
+
+            CHECK(this->check_layout_consistency());
+
+            set_conf(rnn_, *this->desc(), this->weights_pd(0),
+                    this->weights_pd(1), this->diff_weights_pd(0),
+                    this->diff_weights_pd(1));
+
+            size_t scratchpad_sz{0}, ws_sz{0};
+            get_scratchpad_and_workspace_sizes(rnn_, scratchpad_sz, ws_sz);
+
+            // initialize the workspace_pd if needed
+            if (rnn_.is_training) {
+                dims_t ws_dims = {(int)ws_sz};
+                memory_desc_t ws_d;
+                mkldnn_memory_desc_init(&ws_d, 1, ws_dims, data_type::u8, x);
+                this->ws_pd_ = cpu_memory_t::pd_t(this->engine(), &ws_d);
+            }
+
+            init_scratchpad(scratchpad_sz);
+
+            return status::success;
+        }
+
+        rnn_utils::rnn_conf_t rnn_;
+
+    private:
+        void init_scratchpad(size_t scratchpad_sz) {
+            using namespace memory_tracking::names;
+            auto scratchpad = this->scratchpad_registry().registrar();
+            scratchpad.book(key_rnn_space, sizeof(float) * scratchpad_sz, 4096);
+
+            int max_nparts = this->cell_kind() == alg_kind::vanilla_gru ? 2 : 1;
+            int ptr_wei_sz = rnn_.n_layer * rnn_.n_dir * max_nparts;
+            scratchpad.book(key_rnn_ptrs_wei_layer,
+                    sizeof(float *) * ptr_wei_sz);
+            scratchpad.book(key_rnn_ptrs_wei_iter,
+                    sizeof(float *) * ptr_wei_sz);
+            scratchpad.book(key_rnn_ptrs_bia,
+                    sizeof(float *) * ptr_wei_sz);
+        }
+    };
+
+    _ref_rnn_common_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs, true), rnn_postgemm_(nullptr) {
+        /// @todo set max_feature_size assuming that we limit the number of
+        /// iterations and layer to one if slc != dic and sic != dic
+        /// respectively
+
+        bias_preparation_func = &class_name::bias_prepare;
+        bias_finalization_func = &class_name::bias_finalize;
+
+        auto set_gemm_funcs
+                = [](bool packed_gemm, gemm_t &g, weights_assign_t &a) {
+                      if (packed_gemm) {
+                          g = &class_name::packed_gemm;
+                          a = &class_name::assign_packed_weights;
+                      } else {
+                          g = &class_name::gemm;
+                          a = &class_name::assign_weights;
+                      }
+                  };
+        set_gemm_funcs(pd()->rnn_.use_iter_packed_gemm, gemm_iter_func,
+                weights_iter_assign_func);
+
+        set_gemm_funcs(pd()->rnn_.use_layer_packed_gemm, gemm_layer_func,
+                weights_layer_assign_func);
+
+        switch (pd()->cell_kind()) {
+        case alg_kind::vanilla_lstm:
+            cell_func = &class_name::cell_execution;
+            if (aprop == prop_kind::forward) {
+                if (mayiuse(avx512_core))
+                    rnn_postgemm_ = new jit_uni_lstm_postgemm_kernel_fwd<avx512_core, src_type>(
+                        pd()->rnn_, pd()->attr());
+                else if (mayiuse(avx2))
+                    rnn_postgemm_ = new jit_uni_lstm_postgemm_kernel_fwd<avx2, src_type>(
+                        pd()->rnn_, pd()->attr());
+                else if (mayiuse(sse42))
+                    rnn_postgemm_ = new jit_uni_lstm_postgemm_kernel_fwd<sse42, src_type>(
+                        pd()->rnn_, pd()->attr());
+                assert(rnn_postgemm_ != nullptr);
+                rnn_postgemm_->init();
+            }
+            elemwise_func = &class_name::lstm_elemwise;
+            break;
+        case alg_kind::vanilla_rnn: // @todo switch on cell kind
+            cell_func = &class_name::cell_execution;
+            elemwise_func = &class_name::rnn_elemwise;
+            switch (pd()->activation_kind()) {
+            case alg_kind::eltwise_relu:
+                activation_func = &activation<alg_kind::eltwise_relu, aprop>;
+                break;
+            case alg_kind::eltwise_tanh:
+                activation_func = &activation<alg_kind::eltwise_tanh, aprop>;
+                break;
+            case alg_kind::eltwise_logistic:
+                activation_func = &activation<alg_kind::eltwise_logistic, aprop>;
+                break;
+            default: break;
+            }
+            break;
+        case alg_kind::vanilla_gru:
+            cell_func = &class_name::cell_execution_gru;
+            break;
+        case alg_kind::gru_linear_before_reset:
+            cell_func = &class_name::cell_execution_gru_lbr;
+            elemwise_func = &class_name::gru_lbr_elemwise;
+            break;
+        default: break;
+        }
+
+        grid_computation = &class_name::linear_execution;
+
+        size_t scratchpad_size, workspace_size;
+        rnn_utils::set_offsets(pd()->rnn_, ws_gates_offset_, ws_states_offset_,
+                ws_c_states_offset_, ws_diff_states_offset_,
+                ws_grid_comp_offset_, ws_cell_comp_offset_,
+                ws_bias_offset_, scratchpad_size, workspace_size);
+    }
+
+    ~_ref_rnn_common_t() {}
+
+    // typedef typename prec_traits::type data_t;
+
+    virtual void execute(event_t *e) const {
+        execute_();
+        e->set_state(event_t::ready);
+    }
+
+private:
+    void execute_() const;
+    rnn_grid_execution_sig(linear_execution);
+    rnn_cell_execution_sig(cell_execution);
+    rnn_cell_execution_sig(cell_execution_gru);
+    rnn_cell_execution_sig(cell_execution_gru_lbr);
+    rnn_elemwise_sig(rnn_elemwise);
+    rnn_elemwise_sig(lstm_elemwise);
+    rnn_elemwise_sig(gru_lbr_elemwise);
+    rnn_gemm_sig(gemm);
+    rnn_gemm_sig(packed_gemm);
+    rnn_bias_prepare_sig(bias_prepare);
+    rnn_bias_finalize_sig(bias_finalize);
+    rnn_weights_assign_sig(assign_weights);
+    rnn_weights_assign_sig(assign_packed_weights);
+
+    float (*activation_func)(float dd, float s, float alpha, float cliping);
+
+    void copy_init_layer(const rnn_utils::rnn_conf_t &rnn,
+            src_data_t *ws_states_, float *ws_diff_states_,
+            const src_data_t *xt_, const float *diff_dst_layer) const;
+
+    template <typename input_data_t>
+    void copy_init_iter(const rnn_utils::rnn_conf_t &rnn,
+            src_data_t *ws_states_, float *ws_c_states, float *ws_diff_states_,
+            const input_data_t *firstit_states_,
+            const float *diff_dst_iter) const;
+
+    template <typename dst_data_t>
+    void copy_res_layer(const rnn_utils::rnn_conf_t &rnn,
+            dst_data_t *dst_layer_, float *diff_src_layer,
+            const src_data_t *ws_states_, const float *ws_diff_states_) const;
+
+    template <typename output_data_t>
+    void copy_res_iter(const rnn_utils::rnn_conf_t &rnn,
+            output_data_t *dst_iter_, float *diff_src_iter,
+            const src_data_t *ws_states_, float *ws_c_states,
+            const float *ws_diff_states_) const;
+
+    void gates_reduction(const rnn_utils::rnn_conf_t &rnn,
+            const acc_data_t *ws_gates_, float *diff_bias_) const;
+
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+
+    size_t ws_gates_offset_;
+    size_t ws_states_offset_;
+    size_t ws_c_states_offset_;
+    size_t ws_bias_offset_;
+    size_t ws_diff_states_offset_;
+    size_t ws_grid_comp_offset_;
+    size_t ws_cell_comp_offset_;
+    jit_uni_rnn_postgemm_kernel *rnn_postgemm_;
+
+    grid_execution_f grid_computation;
+    cell_execution_f cell_func;
+
+    bias_prepare_t bias_preparation_func;
+    bias_finalize_t bias_finalization_func;
+    weights_assign_t weights_layer_assign_func;
+    weights_assign_t weights_iter_assign_func;
+
+    gemm_t gemm_layer_func;
+    gemm_t gemm_iter_func;
+    elemwise_f elemwise_func;
+};
+
+using ref_rnn_fwd_f32_t = _ref_rnn_common_t<prop_kind::forward, data_type::f32, data_type::f32>;
+using ref_rnn_bwd_f32_t = _ref_rnn_common_t<prop_kind::backward, data_type::f32, data_type::f32>;
+using ref_rnn_fwd_u8s8_t = _ref_rnn_common_t<prop_kind::forward, data_type::u8, data_type::s8>;
+}
+}
+}
+#endif
+
+// vim: et ts=4 sw=4 cindent cino^=l0,\:0,N-s
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/rnn_reorders.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/rnn_reorders.hpp
new file mode 100644
index 000000000..91dd85aff
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/rnn_reorders.hpp
@@ -0,0 +1,396 @@
+/*******************************************************************************
+ * Copyright 2018 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *******************************************************************************/
+
+#ifndef CPU_RNN_REORDERS_HPP
+#define CPU_RNN_REORDERS_HPP
+
+#include <assert.h>
+
+#include "type_helpers.hpp"
+#include "mkldnn_thread.hpp"
+#include "utils.hpp"
+#include "simple_q10n.hpp"
+#include "cpu_reorder_pd.hpp"
+#include "../gemm/os_blas.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+template <data_type_t type_i, data_type_t type_o>
+struct rnn_data_reorder_t : public cpu_primitive_t {
+    struct pd_t : public cpu_reorder_pd_t {
+        pd_t(const cpu_memory_pd_t *input_pd, const cpu_memory_pd_t *output_pd,
+                const primitive_attr_t *attr)
+            : cpu_reorder_pd_t(input_pd, output_pd, attr) {}
+
+        DECLARE_COMMON_PD_T("rnn_data_reorder", rnn_data_reorder_t);
+
+        static status_t create(reorder_pd_t **reorder_pd,
+                const memory_pd_t *input_pd, const memory_pd_t *output_pd,
+                const primitive_attr_t *attr) {
+            using namespace memory_format;
+            using namespace data_type;
+            assert(input_pd->engine()->kind() == engine_kind::cpu);
+            assert(output_pd->engine()->kind() == engine_kind::cpu);
+
+            const memory_desc_wrapper id(input_pd), od(output_pd);
+            bool args_ok = true
+                    && id.data_type() == type_i
+                    && od.data_type() == type_o
+                    && utils::one_of(id.format(), tnc, ldsnc)
+                    && od.format() == id.format();
+            if (!args_ok) return status::invalid_arguments;
+
+            auto _pd = new pd_t((const cpu_memory_pd_t *)input_pd,
+                    (const cpu_memory_pd_t *)output_pd, attr);
+            if (_pd == nullptr) return out_of_memory;
+            if (_pd->init() != success) { delete _pd; return unimplemented; }
+            return safe_ptr_assign<reorder_pd_t>(*reorder_pd, _pd);
+        }
+    };
+
+private:
+    typedef typename prec_traits<type_i>::type in_data_t;
+    typedef typename prec_traits<type_o>::type out_data_t;
+
+    rnn_data_reorder_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {}
+
+    virtual void execute(event_t *e) const {
+        auto input = reinterpret_cast<const in_data_t *>(input_memory(0));
+        auto output = reinterpret_cast<out_data_t *>(memory());
+        const memory_desc_wrapper &input_d = pd()->input_pd();
+        const memory_desc_wrapper &output_d = pd()->output_pd();
+        const round_mode_t rmode = pd()->attr()->round_mode_;
+        const size_t nelems = input_d.nelems();
+        const float scale = pd()->attr()->rnn_data_qparams_.scale_;
+        const float shift = pd()->attr()->rnn_data_qparams_.shift_;
+
+        parallel_nd(nelems, [&](size_t i) {
+            float in = (float)input[input_d.off_l(i)] * scale + shift;
+            output[output_d.off_l(i)] = qz_a1b0<float, out_data_t>()(in, rmode);
+        });
+
+        e->set_state(event_t::ready);
+    }
+
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+};
+
+template <data_type_t type_i, data_type_t type_o>
+struct rnn_weights_reorder_t : public cpu_primitive_t {
+    struct pd_t : public cpu_reorder_pd_t {
+        pd_t(const cpu_memory_pd_t *input_pd, const cpu_memory_pd_t *output_pd,
+                const primitive_attr_t *attr)
+            : cpu_reorder_pd_t(input_pd, output_pd, attr) {}
+
+        DECLARE_COMMON_PD_T("rnn_weights_reorder", rnn_weights_reorder_t);
+
+        static status_t create(reorder_pd_t **reorder_pd,
+                const memory_pd_t *input_pd, const memory_pd_t *output_pd,
+                const primitive_attr_t *attr) {
+#if !USE_MKL_PACKED_GEMM
+            return status::unimplemented;
+#endif
+            using namespace memory_format;
+            assert(input_pd->engine()->kind() == engine_kind::cpu);
+            assert(output_pd->engine()->kind() == engine_kind::cpu);
+            const memory_desc_wrapper output_d(output_pd);
+
+            const memory_desc_wrapper id(input_pd), od(output_pd);
+            bool args_ok = true
+                    && id.data_type() == type_i
+                    && od.data_type() == type_o
+                    && utils::one_of(id.format(), ldigo, ldgoi)
+                    && od.format() == rnn_packed
+                    && od.rnn_packed_desc().format
+                            == mkldnn_ldigo_p
+                    && od.rnn_packed_desc().n_parts == 1
+                    && attr != nullptr;
+            if (!args_ok) return status::invalid_arguments;
+
+            const int mask = attr->rnn_weights_qparams_.mask_;
+            if (!utils::one_of(mask, 0, 3)) return status::unimplemented;
+
+            auto _pd = new pd_t((const cpu_memory_pd_t *)input_pd,
+                    (const cpu_memory_pd_t *)output_pd, attr);
+            if (_pd == nullptr) return out_of_memory;
+            if (_pd->init() != success) { delete _pd; return unimplemented; }
+            return safe_ptr_assign<reorder_pd_t>(*reorder_pd, _pd);
+        }
+
+        virtual status_t init() override {
+            status_t status = cpu_reorder_pd_t::init();
+            if (status != status::success) return status;
+
+            init_scratchpad();
+
+            return status::success;
+        }
+
+    private:
+        void init_scratchpad() {
+            const memory_desc_wrapper id(input_pd());
+            const size_t nelems = id.nelems();
+            const auto &dims = id.dims();
+
+            using namespace memory_tracking::names;
+            auto scratchpad = scratchpad_registry().registrar();
+            size_t quantization_size = sizeof(int8_t) * nelems;
+            size_t reduction_size = id.format() == ldigo
+                    ? sizeof(int32_t) * mkldnn_get_max_threads() * dims[0]
+                            * dims[1] * dims[3] * dims[4]
+                    : 0;
+            scratchpad.book(
+                    key_reorder_rnn_weights_quantization, quantization_size);
+            scratchpad.book(key_reorder_rnn_weights_reduction, reduction_size);
+        }
+    };
+
+private:
+    typedef typename prec_traits<type_i>::type in_data_t;
+    typedef typename prec_traits<type_o>::type out_data_t;
+
+    rnn_weights_reorder_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {}
+
+    virtual void execute(event_t *e) const {
+#if USE_MKL_PACKED_GEMM
+        auto input = reinterpret_cast<const in_data_t *>(input_memory(0));
+        auto output = reinterpret_cast<char *>(memory());
+        const memory_desc_wrapper &input_d = pd()->input_pd();
+        const memory_desc_wrapper &output_d = pd()->output_pd();
+        const auto &dims = input_d.dims();
+
+        const int L = dims[0];
+        const int D = dims[1];
+        const int I = dims[2];
+        const int G = dims[3];
+        const int O = dims[4];
+
+        const bool is_igo = input_d.format() == memory_format::ldigo;
+
+        /* Quantize input & compute compensation */
+        auto quantized = (int8_t * __restrict)scratchpad().template get<void>(
+                memory_tracking::names::key_reorder_rnn_weights_quantization);
+        auto reduction = (int32_t * __restrict)scratchpad().template get<void>(
+                memory_tracking::names::key_reorder_rnn_weights_reduction);
+        float *comp = reinterpret_cast<float *>(
+                output + output_d.rnn_packed_desc().offset_compensation);
+        const round_mode_t rmode = pd()->attr()->round_mode_;
+        const float *scales = pd()->attr()->rnn_weights_qparams_.scales_;
+        const int mask = pd()->attr()->rnn_weights_qparams_.mask_;
+
+        if (is_igo) {
+            int nthr = mkldnn_get_max_threads();
+            int LD_nthr = nstl::min(L * D, nthr);
+            int I_nthr = nstl::min(I, nthr / LD_nthr);
+            parallel(nthr, [&](const int ithr, const int nthr) {
+                int LD_ithr = -1, LD_s = -1, LD_e = -1;
+                int I_ithr = -1, I_s = -1, I_e = -1;
+                if (ithr < LD_nthr * I_nthr) {
+                    LD_ithr = ithr % LD_nthr;
+                    I_ithr = ithr / LD_nthr;
+                    balance211(L * D, LD_nthr, LD_ithr, LD_s, LD_e);
+                    balance211(I, I_nthr, I_ithr, I_s, I_e);
+                }
+                int32_t *comp_ithr = reduction + I_ithr * L * D * G * O;
+                for (int ld = LD_s; ld < LD_e; ld++) {
+                    for (int go = 0; go < G * O; go++)
+                        comp_ithr[ld * G * O + go] = 0;
+                    for (int i = I_s; i < I_e; i++) {
+                        PRAGMA_OMP_SIMD()
+                        for (int go = 0; go < G * O; go++) {
+                            const float s = scales[(mask == 0) ? 0 : go];
+                            int8_t q = qz_b0<in_data_t, out_data_t>()(
+                                    input[ld * I * G * O + i * G * O + go], s,
+                                    rmode);
+                            quantized[ld * I * G * O + i * G * O + go]
+                                    = (int32_t)q;
+                            comp_ithr[ld * G * O + go] += (int32_t)q;
+                        }
+                    }
+                }
+            });
+            parallel_nd(L * D * G * O,
+                    [&](int s) { comp[s] = saturate<float>(reduction[s]); });
+            for (int i = 1; i < I_nthr; i++) {
+                parallel_nd(L * D * G * O, [&](int s) {
+                    comp[s] += saturate<float>(
+                            reduction[i * L * D * G * O + s]);
+                });
+            }
+        } else {
+            parallel_nd(L * D, G * O, [&](int ld, int go) {
+                int32_t compensation = 0;
+                const float s = scales[(mask == 0) ? 0 : go];
+                PRAGMA_OMP_SIMD()
+                for (int i = 0; i < I; i++) {
+                    int8_t q = qz_b0<in_data_t, out_data_t>()(
+                            input[ld * G * O * I + go * I + i], s, rmode);
+                    compensation += (int32_t)q;
+                    quantized[ld * G * O * I + go * I + i] = q;
+                }
+                comp[ld * G * O + go] = saturate<float>(compensation);
+            });
+        }
+
+        /* Pack */
+        auto off_igo = [&](int l, int d, int i, int g, int o) {
+            return l * D * I * G * O + d * I * G * O + i * G * O + g * O + o;
+        };
+        auto off_goi = [&](int l, int d, int i, int g, int o) {
+            return l * D * G * O * I + d * G * O * I + g * O * I + o * I + i;
+        };
+        int n_parts = output_d.rnn_packed_desc().n_parts;
+        const size_t *size_packed_cell
+                = output_d.rnn_packed_desc().part_pack_size;
+        const int *parts = output_d.rnn_packed_desc().parts;
+        const int n = output_d.rnn_packed_desc().n;
+        char *to_pack = output;
+        for (int l = 0; l < L; l++) {
+            for (int d = 0; d < D; d++) {
+                for (int p = 0; p < n_parts; p++) {
+                    int g = (p > 0) ? parts[p - 1] : 0;
+                    int m_p = parts[p] * O;
+                    int k_p = I;
+                    cblas_gemm_s8u8s32_pack(CblasColMajor, CblasAMatrix,
+                            is_igo ? CblasNoTrans : CblasTrans, m_p, n, k_p,
+                            &quantized[is_igo ? off_igo(l, d, 0, g, 0) :
+                                                off_goi(l, d, g, 0, 0)],
+                            is_igo ? G * O : I, to_pack);
+                    to_pack += size_packed_cell[p];
+                }
+            }
+        }
+#endif
+        e->set_state(event_t::ready);
+    }
+
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+};
+
+template <>
+struct rnn_weights_reorder_t<data_type::f32, data_type::f32>
+        : public cpu_primitive_t {
+    struct pd_t : public cpu_reorder_pd_t {
+        pd_t(const cpu_memory_pd_t *input_pd, const cpu_memory_pd_t *output_pd,
+                const primitive_attr_t *attr)
+            : cpu_reorder_pd_t(input_pd, output_pd, attr) {}
+
+        DECLARE_COMMON_PD_T("rnn_weights_reorder", rnn_weights_reorder_t);
+
+        static status_t create(reorder_pd_t **reorder_pd,
+                const memory_pd_t *input_pd, const memory_pd_t *output_pd,
+                const primitive_attr_t *attr) {
+#if !USE_MKL_PACKED_GEMM
+            return status::unimplemented;
+#endif
+            using namespace memory_format;
+            using namespace data_type;
+            assert(input_pd->engine()->kind() == engine_kind::cpu);
+            assert(output_pd->engine()->kind() == engine_kind::cpu);
+            const memory_desc_wrapper output_d(output_pd);
+
+            const memory_desc_wrapper id(input_pd), od(output_pd);
+            bool args_ok = true
+                    && id.data_type() == f32
+                    && od.data_type() == f32
+                    && utils::one_of(id.format(), ldigo, ldgoi)
+                    && od.format() == rnn_packed
+                    && utils::one_of(od.rnn_packed_desc().format,
+                        mkldnn_ldigo_p, mkldnn_ldgoi_p)
+                    && attr->has_default_values();
+            if (!args_ok) return status::invalid_arguments;
+
+            const int mask = attr->rnn_weights_qparams_.mask_;
+            if (!utils::one_of(mask, 0, 3)) return status::unimplemented;
+
+            auto _pd = new pd_t((const cpu_memory_pd_t *)input_pd,
+                    (const cpu_memory_pd_t *)output_pd, attr);
+            if (_pd == nullptr) return out_of_memory;
+            if (_pd->init() != success) { delete _pd; return unimplemented; }
+            return safe_ptr_assign<reorder_pd_t>(*reorder_pd, _pd);
+        }
+    };
+
+private:
+    rnn_weights_reorder_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {}
+
+    virtual void execute(event_t *e) const {
+#if USE_MKL_PACKED_GEMM
+        auto input = reinterpret_cast<const float *>(input_memory(0));
+        auto output = reinterpret_cast<float *>(memory());
+        const memory_desc_wrapper &input_d = pd()->input_pd();
+        const memory_desc_wrapper &output_d = pd()->output_pd();
+        const auto &dims = input_d.dims();
+        const rnn_packed_data_t &rnn_pdata = output_d.rnn_packed_desc();
+        const int L = dims[0];
+        const int D = dims[1];
+        const int I = dims[2];
+        const int G = dims[3];
+        const int O = dims[4];
+
+        /* Pack */
+        bool cross_case = (input_d.format() == memory_format::ldigo
+                        && rnn_pdata.format == mkldnn_ldgoi_p)
+                || (input_d.format() == memory_format::ldgoi
+                        && rnn_pdata.format == mkldnn_ldigo_p);
+        auto trans = cross_case ? CblasTrans : CblasNoTrans;
+        int n_parts = rnn_pdata.n_parts;
+        const size_t *size_packed_cell = rnn_pdata.part_pack_size;
+        const int *parts = rnn_pdata.parts;
+        const int n = rnn_pdata.n;
+
+        const bool is_igo = input_d.format() == memory_format::ldigo;
+        auto off_igo = [&](int l, int d, int i, int g, int o) {
+            return l * D * I * G * O + d * I * G * O + i * G * O + g * O + o;
+        };
+        auto off_goi = [&](int l, int d, int i, int g, int o) {
+            return l * D * G * O * I + d * G * O * I + g * O * I + o * I + i;
+        };
+        for (int l = 0; l < L; l++) {
+            for (int d = 0; d < D; d++) {
+                for (int p = 0; p < n_parts; p++) {
+                    int g = (p > 0) ? parts[p - 1] : 0;
+                    int m_p = is_igo ? parts[p] * O : I;
+                    int k_p = is_igo ? I : parts[p] * O;
+                    int ld = is_igo ? G * O : I;
+                    cblas_sgemm_pack(CblasColMajor, CblasAMatrix, trans, m_p, n,
+                            k_p, 1.0f, &input[is_igo ? off_igo(l, d, 0, g, 0) :
+                                                       off_goi(l, d, 0, g, 0)],
+                            ld, output);
+                    output += size_packed_cell[p] / sizeof(float);
+                }
+            }
+        }
+        e->set_state(event_t::ready);
+#endif
+    }
+
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
+};
+
+} // namespace cpu
+} // namespace impl
+} // namespace mkldnn
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/rnn_utils.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/rnn_utils.cpp
new file mode 100644
index 000000000..7a073b86b
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/rnn_utils.cpp
@@ -0,0 +1,400 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "c_types_map.hpp"
+#include "math_utils.hpp"
+#include "mkldnn_thread.hpp"
+
+#include "ref_rnn.hpp"
+#include "rnn_utils.hpp"
+#include "type_helpers.hpp"
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+using namespace mkldnn::impl::utils;
+using namespace rnn_utils;
+using namespace memory_format;
+using namespace rnn_packed_format;
+using namespace data_type;
+
+void rnn_utils::init_conf(rnn_conf_t &rnn, const rnn_desc_t &rd,
+        const memory_desc_wrapper &src_layer_d,
+        const memory_desc_wrapper &src_iter_d,
+        const memory_desc_wrapper &weights_layer_d,
+        const memory_desc_wrapper &weights_iter_d,
+        const memory_desc_wrapper &dst_layer_d) {
+    rnn.is_fwd = utils::one_of(rd.prop_kind, prop_kind::forward_training,
+            prop_kind::forward_inference);
+    rnn.is_training = utils::one_of(
+            rd.prop_kind, prop_kind::forward_training, prop_kind::backward);
+    rnn.is_lbr = rd.cell_desc.cell_kind == mkldnn_gru_linear_before_reset;
+
+    switch (rd.direction) {
+    case mkldnn_unidirectional_left2right: rnn.exec_dir = l2r; break;
+    case mkldnn_unidirectional_right2left: rnn.exec_dir = r2l; break;
+    case mkldnn_bidirectional_concat: rnn.exec_dir = bi_concat; break;
+    case mkldnn_bidirectional_sum: rnn.exec_dir = bi_sum; break;
+    default: break;
+    }
+
+    if (everyone_is(f32, src_layer_d.data_type(), dst_layer_d.data_type(),
+                weights_layer_d.data_type()))
+        rnn.dt_conf = all_f32;
+    else if (dst_layer_d.data_type() == u8) {
+        if (IMPLICATION(src_iter_d._md, src_iter_d.data_type() == u8))
+            rnn.dt_conf = u8u8u8u8;
+        else
+            rnn.dt_conf = f32u8f32u8;
+    } else {
+        if (IMPLICATION(src_iter_d._md, src_iter_d.data_type() == u8))
+            rnn.dt_conf = u8u8u8f32;
+        else
+            rnn.dt_conf = f32u8f32f32;
+    }
+
+    rnn.n_layer = weights_layer_d.dims()[0];
+    rnn.n_iter = src_layer_d.dims()[0];
+    rnn.n_dir = weights_layer_d.dims()[1];
+    rnn.n_gates = weights_layer_d.dims()[3];
+    rnn.n_states = mkldnn_rnn_cell_get_states_count(&rd.cell_desc);
+    rnn.n_bias = rnn.n_gates + rnn.is_lbr;
+    rnn.mb = src_layer_d.dims()[1];
+    rnn.sic = weights_iter_d.dims()[2];
+    rnn.slc = weights_layer_d.dims()[2];
+    rnn.dic = weights_layer_d.dims()[4];
+    rnn.dlc = dst_layer_d.dims()[2];
+
+    rnn.gates_ld = rnn.dic * rnn.n_gates;
+    rnn.gates_nld = rnn.mb;
+    rnn.states_nld = rnn.mb;
+
+    /* Set the correct number of weights parts */
+    bool is_orig_gru = rd.cell_desc.cell_kind == alg_kind::vanilla_gru;
+    rnn.n_parts_weights_layer = 1;
+    rnn.parts_weights_layer[0] = rnn.n_gates;
+    rnn.parts_weights_layer[1] = 0;
+
+    rnn.n_parts_weights_iter = is_orig_gru ? 2 : 1;
+    rnn.parts_weights_iter[0] = is_orig_gru ? 2 : rnn.n_gates;
+    rnn.parts_weights_iter[1] = is_orig_gru ? 1 : 0;
+
+    rnn.n_parts_bias = 1;
+    rnn.parts_bias[0] = rnn.n_bias;
+    rnn.parts_bias[1] = 0;
+
+    /* Decide wich gemm implementation to use: packed/nonpacked jit/cblas
+     * and if to mergre gemm across iterations */
+    bool is_int8 = rnn.dt_conf != all_f32;
+    rnn.merge_gemm_layer = ((rnn.is_fwd && rnn.mb < 128) || !rnn.is_fwd)
+            || is_int8;
+    bool is_gru = utils::one_of(rd.cell_desc.cell_kind, alg_kind::vanilla_gru,
+            alg_kind::gru_linear_before_reset);
+    rnn.merge_gemm_iter = !(rnn.is_fwd || is_gru) || is_int8;
+    bool is_inference = !rnn.is_training;
+
+    rnn.use_jit_gemm = !mayiuse(avx512_mic)
+            && ((is_inference && (rnn.n_layer > 1 || rnn.mb < 100))
+                || (rnn.is_training && rnn.dic < 500));
+
+    /* Decide to copy bias */
+    rnn.copy_bias = rnn.dt_conf != all_f32;
+
+#if USE_MKL_PACKED_GEMM
+    rnn.use_layer_packed_gemm
+            = (weights_layer_d.format() == any && rnn.slc > 760 && rnn.dic > 760
+                      && is_inference)
+            || is_int8; // packed gemm is the only supported option for int8
+    rnn.use_iter_packed_gemm = (weights_iter_d.format() == any && rnn.sic > 760
+                                       && rnn.dic > 760 && is_inference)
+            || is_int8;
+#else
+    rnn.use_layer_packed_gemm = false;
+    rnn.use_iter_packed_gemm = false;
+#endif
+
+    /* Set packed gemm sizes */
+    if (rnn.use_layer_packed_gemm) {
+        rnn.weights_layer_pack_size = 0;
+        for (int p = 0; p < rnn.n_parts_weights_layer; p++) {
+            int m_p = rnn.is_fwd
+                ? (rnn.parts_weights_layer[p] * rnn.dic)
+                : rnn.slc;
+            int k_p = rnn.is_fwd
+                ? rnn.slc
+                : (rnn.parts_weights_layer[p] * rnn.dic);
+            int n_p = rnn.merge_gemm_layer ? rnn.mb * rnn.n_iter : rnn.mb;
+
+#if USE_MKL_PACKED_GEMM
+            if (rnn.dt_conf == all_f32)
+                rnn.part_weights_layer_pack_size[p] = cblas_sgemm_pack_get_size(
+                        CblasAMatrix, m_p, n_p, k_p);
+            else
+                rnn.part_weights_layer_pack_size[p]
+                        = cblas_gemm_s8u8s32_pack_get_size(
+                                CblasAMatrix, m_p, n_p, k_p);
+#else
+            UNUSED(m_p);
+            UNUSED(k_p);
+            UNUSED(n_p);
+            rnn.part_weights_layer_pack_size[p] = 0;
+#endif
+            rnn.weights_layer_pack_size += rnn.n_layer * rnn.n_dir
+                    * rnn.part_weights_layer_pack_size[p];
+        }
+        rnn.weights_layer_comp_offset = rnn.weights_layer_pack_size;
+        rnn.weights_layer_pack_size += rnn.dt_conf == all_f32 ? 0 : rnn.n_layer
+                        * rnn.n_dir * rnn.n_gates * rnn.dlc * sizeof(float);
+    }
+
+    if (rnn.use_iter_packed_gemm) {
+        rnn.weights_iter_pack_size = 0;
+        for (int p = 0; p < rnn.n_parts_weights_iter; p++) {
+            int m_p = rnn.is_fwd ? (rnn.parts_weights_iter[p] * rnn.dic) :
+                                   rnn.sic;
+            int k_p = rnn.is_fwd ? rnn.sic :
+                                   (rnn.parts_weights_iter[p] * rnn.dic);
+            int n_p = rnn.merge_gemm_iter ? rnn.mb * rnn.n_iter : rnn.mb;
+
+#if USE_MKL_PACKED_GEMM
+            if (rnn.dt_conf == all_f32)
+                rnn.part_weights_iter_pack_size[p] = cblas_sgemm_pack_get_size(
+                        CblasAMatrix, m_p, n_p, k_p);
+            else
+                rnn.part_weights_iter_pack_size[p]
+                        = cblas_gemm_s8u8s32_pack_get_size(
+                                CblasAMatrix, m_p, n_p, k_p);
+#else
+            UNUSED(m_p);
+            UNUSED(k_p);
+            UNUSED(n_p);
+            rnn.part_weights_iter_pack_size[p] = 0;
+#endif
+            rnn.weights_iter_pack_size += rnn.n_layer * rnn.n_dir
+                    * rnn.part_weights_iter_pack_size[p];
+        }
+        rnn.weights_iter_comp_offset = rnn.weights_iter_pack_size;
+        rnn.weights_iter_pack_size += rnn.dt_conf == all_f32 ? 0 : rnn.n_layer
+                        * rnn.n_dir * rnn.n_gates * rnn.dic * sizeof(float);
+    }
+
+}
+
+void rnn_utils::set_conf(rnn_conf_t &rnn, const rnn_desc_t &rd,
+        const memory_desc_wrapper &weights_layer_d,
+        const memory_desc_wrapper &weights_iter_d,
+        const memory_desc_wrapper &diff_weights_layer_d,
+        const memory_desc_wrapper &diff_weights_iter_d) {
+
+    /* Set leading dimensions for input weights arrays depending on input format
+     */
+    rnn.weights_layer_fmt = weights_layer_d.format();
+    rnn.weights_iter_fmt = weights_iter_d.format();
+    rnn.weights_layer_is_packed = rnn.weights_layer_fmt == rnn_packed;
+    rnn.weights_iter_is_packed = rnn.weights_iter_fmt == rnn_packed;
+
+    auto set_dims = [&](const memory_desc_wrapper &md, int &ld, int &nld) {
+        switch (md.format()) {
+        case ldigo:
+            ld = (int)md.blocking_desc().strides[0][2];
+            nld = md.dims()[2];
+            return;
+        case ldgoi:
+            ld = (int)md.blocking_desc().strides[0][4];
+            nld = md.dims()[3] * md.dims()[4];
+            return;
+        default: ld = 0; nld = 0;
+        }
+    };
+    set_dims(weights_layer_d, rnn.weights_layer_ld, rnn.weights_layer_nld);
+    set_dims(weights_iter_d, rnn.weights_iter_ld, rnn.weights_iter_nld);
+    if (!rnn.is_fwd) {
+        set_dims(diff_weights_layer_d, rnn.diff_weights_layer_ld,
+                rnn.diff_weights_layer_nld);
+        set_dims(diff_weights_iter_d, rnn.diff_weights_iter_ld,
+                rnn.diff_weights_iter_nld);
+    }
+
+    int sizeof_states_dt
+            = rnn.dt_conf == all_f32 ? sizeof(float) : sizeof(uint8_t);
+    rnn.states_ws_ld
+            = get_good_ld(nstl::max(rnn.slc, nstl::max(rnn.sic, rnn.dic)),
+                sizeof_states_dt);
+    rnn.gates_ws_ld = get_good_ld(rnn.gates_ld, sizeof(float));
+
+    /* Set workspace sizes to store:
+     * states to copmute a pass
+     * diff states to copmute bwd pass (training only)
+     * intermediate results from the gates
+     */
+    rnn.use_workspace = rnn.is_training;
+    rnn.ws_states_size = (size_t)(rnn.n_layer + 1) * rnn.n_dir
+            * (rnn.n_iter + 1) * rnn.mb * rnn.states_ws_ld * sizeof_states_dt;
+    bool is_lstm = rd.cell_desc.cell_kind == mkldnn_vanilla_lstm;
+    rnn.ws_c_states_size = is_lstm
+            ? (size_t)(rnn.n_layer + 1) * rnn.n_dir * (rnn.n_iter + 1) * rnn.mb
+                    * rnn.states_ws_ld * sizeof(float)
+            : 0;
+    rnn.ws_diff_states_size = rnn.is_training
+            ? (size_t)(rnn.n_layer + 1) * rnn.n_dir * (rnn.n_iter + 1)
+                    * (rnn.n_states + 1) * rnn.mb * rnn.states_ws_ld
+                    * sizeof(float)
+            : (size_t)0;
+    rnn.ws_gates_size = (size_t)rnn.n_layer * rnn.n_dir * rnn.n_iter * rnn.mb
+            * rnn.gates_ws_ld * sizeof(float);
+
+    /* set other sizes */
+    rnn.ws_per_cell = (size_t)rnn.is_lbr * rnn.mb * rnn.dic * sizeof(float);
+    rnn.ws_cell_comp_size
+            = rnn.is_lbr || rnn.dt_conf != all_f32
+                ? (size_t) rnn.gates_nld * rnn.gates_ws_ld * sizeof(float)
+                : 0;
+    rnn.ws_grid_comp_size = (size_t)rnn.is_lbr * rnn.is_training * rnn.n_layer
+            * rnn.n_dir * rnn.n_iter * rnn.ws_per_cell * sizeof(float);
+    rnn.ws_bias_size = (size_t)rnn.n_layer * rnn.n_dir * rnn.n_bias * rnn.dic
+            * sizeof(float);
+}
+
+int rnn_utils::get_good_ld(int dim, int sizeof_dt) {
+    // we want matrices leading dimentions to be 64-byte aligned,
+    // and not divisible by 256 to avoid 4K aliasing effects
+    int ld = rnd_up(dim, 64 / sizeof_dt);
+    return (ld % 256 == 0) ? ld + 64 / sizeof_dt : ld;
+}
+
+void rnn_utils::set_offsets(const rnn_conf_t &rnn, size_t &ws_gates_offset,
+        size_t &ws_states_offset, size_t &ws_c_states_offset,
+        size_t &ws_diff_states_offset, size_t &ws_grid_comp_offset,
+        size_t &ws_cell_comp_offset, size_t &ws_bias_offset,
+        size_t &scratchpad_size, size_t &workspace_size) {
+
+    const size_t page_size = 4096; // 2097152;
+    size_t current_offset;
+    /* Mandatory workspaces: go to workspace if use_workspace, scratchpad
+     * otherwise */
+    current_offset = 0; // assumes the workspace base pointer is page aligned
+    ws_gates_offset = current_offset;
+    current_offset += rnn.ws_gates_size;
+
+    current_offset = utils::rnd_up(current_offset, page_size);
+    ws_states_offset = current_offset;
+    current_offset += rnn.ws_states_size;
+
+    current_offset = utils::rnd_up(current_offset, page_size);
+    ws_c_states_offset = current_offset;
+    current_offset += rnn.ws_c_states_size;
+
+    current_offset = utils::rnd_up(current_offset, page_size);
+    ws_diff_states_offset = current_offset;
+    current_offset += rnn.ws_diff_states_size;
+
+    current_offset = utils::rnd_up(current_offset, page_size);
+    ws_grid_comp_offset = current_offset;
+    current_offset += rnn.ws_grid_comp_size;
+
+    current_offset = utils::rnd_up(current_offset, page_size);
+    ws_cell_comp_offset = current_offset;
+    current_offset += rnn.ws_cell_comp_size;
+
+    workspace_size = rnn.use_workspace ? current_offset : 0;
+
+    /* Optional scratchpads */
+    // Assumes the scratchpad base pointer is page aligned.
+    // If use_workspace, the following goes to scratchpad alone,
+    // otherwise, all goes to scratchpad and continue incrementing offset
+    current_offset = rnn.use_workspace ? 0 : current_offset;
+
+    if (rnn.copy_bias) {
+        current_offset = utils::rnd_up(current_offset, page_size);
+        ws_bias_offset = current_offset;
+        current_offset += rnn.ws_bias_size;
+    }
+
+    scratchpad_size = current_offset;
+}
+
+void rnn_utils::get_scratchpad_and_workspace_sizes(const rnn_conf_t &rnn,
+        size_t &scratchpad_size, size_t &workspace_size) {
+    size_t ws_gates_offset, ws_states_offset, ws_c_states_offset,
+            ws_diff_states_offset, ws_grid_comp_offset, ws_cell_comp_offset,
+            ws_bias_offset;
+    set_offsets(rnn, ws_gates_offset, ws_states_offset, ws_diff_states_offset,
+            ws_c_states_offset, ws_grid_comp_offset, ws_cell_comp_offset,
+            ws_bias_offset, scratchpad_size, workspace_size);
+}
+
+status_t rnn_utils::set_good_strides(memory_desc_t &weights_md) {
+    auto &strides = weights_md.layout_desc.blocking.strides[0];
+    auto dims = weights_md.dims;
+
+    if (weights_md.format == ldigo) {
+        strides[2] = rnn_utils::get_good_ld((int)strides[2],
+                (int)types::data_type_size(weights_md.data_type));
+        strides[1] = dims[2] * strides[2];
+        strides[0] = dims[1] * strides[1];
+    } else if (weights_md.format == ldgoi) {
+        strides[4] = rnn_utils::get_good_ld((int)strides[4],
+                (int)types::data_type_size(weights_md.data_type));
+        strides[3] = dims[4] * strides[4];
+        strides[1] = dims[3] * strides[3];
+        strides[0] = dims[1] * strides[1];
+    } else
+        return unimplemented;
+
+    return success;
+}
+
+status_t rnn_utils::set_expected_desc(rnn_conf_t &rnn,
+        memory_desc_t &weights_md, bool is_iter) {
+    bool use_packed_gemm = is_iter
+        ? rnn.use_iter_packed_gemm
+        : rnn.use_layer_packed_gemm;
+    if (use_packed_gemm) {
+        weights_md.format = rnn_packed;
+        rnn_packed_data_t &rnn_pdata = weights_md.layout_desc.rnn_packed_desc;
+        rnn_pdata.format = rnn.is_fwd ? mkldnn_ldigo_p : mkldnn_ldgoi_p;
+        if (is_iter) {
+            rnn_pdata.n = rnn.mb;
+            rnn_pdata.n_parts = rnn.n_parts_weights_iter;
+            array_copy(rnn_pdata.parts, rnn.parts_weights_iter,
+                    MKLDNN_RNN_MAX_N_PARTS);
+            array_copy(rnn_pdata.part_pack_size,
+                    rnn.part_weights_iter_pack_size, MKLDNN_RNN_MAX_N_PARTS);
+            rnn_pdata.offset_compensation = rnn.weights_iter_comp_offset;
+            rnn_pdata.size = rnn.weights_iter_pack_size;
+        } else {
+            rnn_pdata.n = rnn.merge_gemm_layer ? rnn.n_iter * rnn.mb : rnn.mb;
+            rnn_pdata.n_parts = rnn.n_parts_weights_layer;
+            array_copy(rnn_pdata.parts, rnn.parts_weights_layer,
+                    MKLDNN_RNN_MAX_N_PARTS);
+            array_copy(rnn_pdata.part_pack_size,
+                    rnn.part_weights_layer_pack_size, MKLDNN_RNN_MAX_N_PARTS);
+            rnn_pdata.offset_compensation = rnn.weights_layer_comp_offset;
+            rnn_pdata.size = rnn.weights_layer_pack_size;
+        }
+    } else {
+        weights_md.format = rnn.is_fwd ? ldigo : ldgoi;
+        CHECK(memory_desc_wrapper::compute_blocking(weights_md));
+        // Adjust strides for good leading dimension in GEMM
+        CHECK(set_good_strides(weights_md));
+    }
+    return success;
+}
+
+}
+}
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/rnn_utils.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/rnn_utils.hpp
new file mode 100644
index 000000000..88f0b4417
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/rnn/rnn_utils.hpp
@@ -0,0 +1,224 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef RNN_UTILS_HPP
+#define RNN_UTILS_HPP
+
+#include "mkldnn.h"
+
+#include "cpu_rnn_pd.hpp"
+
+
+#define rnn_elemwise_sig(f)                                               \
+    void f(const rnn_utils::rnn_conf_t &rnn, acc_data_t *ws_gates_,   \
+            src_data_t *states_t_l_, float *c_states_t_l_,            \
+            src_data_t *states_tm1_l_, float *c_states_tm1_l_,        \
+            float *diff_states_t_l_, float *diff_states_t_lp1_,       \
+            float *diff_states_tp1_l_, float *bias_, float *ws_grid_, \
+            float *ws_cell_) const
+
+#define rnn_cell_execution_sig(f)                                             \
+    void f(const rnn_utils::rnn_conf_t &rnn, src_data_t *states_t_l_,     \
+            float *c_states_t_l_, float *diff_states_t_l_,                \
+            weights_data_t **w_layer_, weights_data_t **w_iter_,          \
+            float **bias_, src_data_t *states_t_lm1_,                     \
+            src_data_t *states_tm1_l_, float *c_states_tm1_l_,            \
+            float *diff_states_t_lp1_, float *diff_states_tp1_l_,         \
+            float *diff_w_layer_, float *diff_w_iter_, float *diff_bias_, \
+            acc_data_t *ws_gates_, float *ws_grid_, float *ws_cell_) const
+
+#define rnn_grid_execution_sig(f)                                                 \
+    void f(const rnn_utils::rnn_conf_t &rnn, weights_data_t **weights_layer_, \
+            weights_data_t **weights_states_, float **bias_,                  \
+            src_data_t *ws_states_, float *ws_c_states_,                      \
+            float *ws_diff_states_, acc_data_t *ws_gates_, float *ws_cell_,   \
+            float *ws_grid_, float *diff_weights_layer_,                      \
+            float *diff_weights_iter_, float *diff_bias_) const
+
+#define rnn_gemm_sig(f)                                                     \
+    void f(const char transA, const char transB, int m, int n, int k,   \
+            const float alpha, const weights_data_t *a_, const int ldA, \
+            const src_data_t *b_, const int ldB, const float beta,      \
+            acc_data_t *c_, const int ldC) const
+
+#define rnn_bias_prepare_sig(f)                                                  \
+    void f(const rnn_utils::rnn_conf_t &rnn, float **bias_, const float *b_, \
+            float *scratch_bias_) const
+
+#define rnn_bias_finalize_sig(f)                                       \
+    void f(const rnn_utils::rnn_conf_t &rnn, float *scratch_bias_, \
+            const float *w_iter_comp, const float *w_layer_comp) const
+
+#define rnn_weights_assign_sig(f)                                                \
+    void f(const rnn_utils::rnn_conf_t &rnn, memory_format_t fmt, int nld,   \
+            int ld, int OC_size, int IC_size, const int n_parts,             \
+            const int *gates_per_part, const size_t *part_weights_pack_size, \
+            weights_data_t **weights_, const weights_data_t *w_,             \
+            float **bias_, const float *b_, float *scratch_bias_) const
+
+
+namespace mkldnn {
+namespace impl {
+namespace cpu {
+
+namespace rnn_utils {
+
+using namespace mkldnn::impl::utils;
+
+enum execution_direction_t {
+    l2r,
+    r2l,
+    bi_concat,
+    bi_sum,
+};
+
+enum data_type_conf_t {
+    all_f32,
+    u8u8u8f32,
+    f32u8f32f32,
+    u8u8u8u8,
+    f32u8f32u8
+};
+
+struct rnn_conf_t {
+    execution_direction_t exec_dir;
+    data_type_conf_t dt_conf;
+    int n_layer, n_iter, n_dir, n_gates, n_states;
+    int mb;
+    int slc, sic, dic, dlc;
+    int gates_ld, gates_nld, gates_ws_ld;
+    int n_parts_weights_layer, parts_weights_layer[MKLDNN_RNN_MAX_N_PARTS];
+    int n_parts_weights_iter, parts_weights_iter[MKLDNN_RNN_MAX_N_PARTS];
+    int n_bias, n_parts_bias, parts_bias[MKLDNN_RNN_MAX_N_PARTS];
+    size_t part_weights_iter_pack_size[MKLDNN_RNN_MAX_N_PARTS],
+            part_weights_layer_pack_size[MKLDNN_RNN_MAX_N_PARTS];
+    bool weights_layer_is_packed, weights_iter_is_packed;
+    /* Size of packed data in bytes */
+    size_t weights_layer_comp_offset, weights_layer_pack_size,
+        weights_iter_comp_offset, weights_iter_pack_size;
+
+    bool copy_bias;
+    int weights_layer_ld, weights_layer_nld;
+    int diff_weights_layer_ld, diff_weights_layer_nld;
+    int weights_iter_ld, weights_iter_nld;
+    int diff_weights_iter_ld, diff_weights_iter_nld;
+    int states_nld, states_ws_ld;
+    int weights_iter_compensation_size, weights_layer_compensation_size;
+    bool is_fwd, is_training, is_lbr;
+    bool use_workspace;
+
+    /* Size of workspace for each tensor in bytes */
+    size_t ws_gates_size, ws_states_size, ws_c_states_size, ws_diff_states_size,
+            ws_cell_comp_size, ws_grid_comp_size, ws_per_cell, ws_bias_size;
+    bool merge_gemm_iter, merge_gemm_layer, use_jit_gemm, use_layer_packed_gemm,
+        use_iter_packed_gemm;
+    memory_format_t weights_layer_fmt, weights_iter_fmt, diff_weights_layer_fmt,
+            diff_weights_iter_fmt;
+};
+
+int get_good_ld(int dim, int sizeof_dt);
+
+void init_conf(rnn_conf_t &rnn, const rnn_desc_t &rd,
+        const memory_desc_wrapper &src_layer_d,
+        const memory_desc_wrapper &src_iter_d,
+        const memory_desc_wrapper &weights_layer_d,
+        const memory_desc_wrapper &weights_iter_d,
+        const memory_desc_wrapper &dst_layer_d);
+
+void set_conf(rnn_conf_t &rnn, const rnn_desc_t &rd,
+        const memory_desc_wrapper &weights_layer_d,
+        const memory_desc_wrapper &weights_iter_d,
+        const memory_desc_wrapper &diff_weights_layer_d,
+        const memory_desc_wrapper &diff_weights_iter_d);
+
+void set_offsets(const rnn_conf_t &rnn, size_t &ws_gates_offset,
+        size_t &ws_h_state_offset, size_t &ws_c_state_offset,
+        size_t &ws_diff_states_offset, size_t &ws_grid_comp_offset,
+        size_t &ws_cell_comp_offset, size_t &ws_bias_offset,
+        size_t &scratchpad_size, size_t &workspace_size);
+
+void get_scratchpad_and_workspace_sizes(const rnn_conf_t &rnn,
+        size_t &scratchpad_size, size_t &workspace_size);
+status_t set_expected_desc(
+        rnn_conf_t &rnn, memory_desc_t &weights_md, bool is_iter);
+status_t set_good_strides(memory_desc_t &weights_md);
+
+template <typename T>
+struct ws_gates_aoc {
+    ws_gates_aoc(const rnn_conf_t &rnn, T *data)
+        : gates_(data, rnn.gates_nld, rnn.gates_ws_ld), DIC_(rnn.dic) {}
+    T &operator()(int batch, int gate, int dic) {
+        return gates_(batch, gate * DIC_ + dic);
+    }
+
+private:
+    mkldnn::impl::utils::array_offset_calculator<T, 2> gates_;
+    int DIC_;
+};
+using ws_gates_aoc_t = ws_gates_aoc<float>;
+using ws_gates_aoc_s32_t = ws_gates_aoc<int32_t>;
+
+struct bias_aoc_t {
+    bias_aoc_t(const rnn_conf_t &rnn, const float *data)
+        : bias_(data, rnn.n_bias, rnn.dic) {}
+    const float &operator()(int bias_n, int dic) { return bias_(bias_n, dic); }
+
+private:
+    mkldnn::impl::utils::array_offset_calculator<const float, 2> bias_;
+};
+
+template <typename T>
+struct ws_states_aoc {
+    ws_states_aoc(const rnn_conf_t &rnn, T *data)
+        : state_(data, rnn.states_nld, rnn.states_ws_ld) {}
+    T &operator()(int batch, int dic) { return state_(batch, dic); }
+
+private:
+    mkldnn::impl::utils::array_offset_calculator<T, 2> state_;
+};
+using ws_states_aoc_t = ws_states_aoc<float>;
+using ws_states_aoc_u8_t = ws_states_aoc<uint8_t>;
+
+struct ws_diff_states_aoc_t {
+    ws_diff_states_aoc_t(const rnn_conf_t &rnn, float *data)
+        : diff_states_(data, rnn.n_states + 1, rnn.n_iter + 1, rnn.states_nld,
+                  rnn.states_ws_ld) {}
+    float &operator()(int state_n, int batch, int dic) {
+        return diff_states_(state_n, 0, batch, dic);
+    }
+
+private:
+    mkldnn::impl::utils::array_offset_calculator<float, 4> diff_states_;
+};
+
+struct ws_diff_w_iter_aoc_t {
+    ws_diff_w_iter_aoc_t(const rnn_conf_t &rnn, float *data)
+        : diff_weights_iter_(
+                  data, rnn.diff_weights_iter_nld, rnn.diff_weights_iter_ld)
+        , DIC_(rnn.dic) {}
+    float &operator()(int sic, int gate, int dic) {
+        return diff_weights_iter_(sic, gate * DIC_ + dic);
+    }
+
+private:
+    mkldnn::impl::utils::array_offset_calculator<float, 2> diff_weights_iter_;
+    int DIC_;
+};
+}
+}
+}
+}
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_concat.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_concat.cpp
index eb5723fc5..c64248996 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_concat.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_concat.cpp
@@ -22,68 +22,95 @@ namespace mkldnn {
 namespace impl {
 namespace cpu {
 
+using namespace memory_tracking::names;
+
 template <data_type_t data_type>
-void simple_concat_t<data_type>::execute() {
-    const int num_arrs = conf_.n_inputs();
-    int *perm = conf_.perm_, *iperm = conf_.iperm_;
-    int concat_dim = conf_.concat_dim();
+void simple_concat_t<data_type>::execute() const {
+    auto scratchpad = this->scratchpad();
+    auto iptrs = scratchpad.template get<const data_t *>(key_concat_iptrs);
+    auto optrs = scratchpad.template get<data_t *>(key_concat_optrs);
+    auto nelems_to_copy = scratchpad.template get<size_t>(key_concat_nelems);
+    auto is = scratchpad.template get<strides_t>(key_concat_istrides);
+
+    const int num_arrs = pd()->n_inputs();
+    const ptrdiff_t *perm = pd()->perm_, *iperm = pd()->iperm_;
+    const int concat_dim = pd()->concat_dim();
     auto o_base_ptr = reinterpret_cast<data_t *>(this->memory());
 
     for (int a = 0; a < num_arrs; ++a) {
-        const memory_desc_wrapper i_d(conf_.src_pd(a));
-        const memory_desc_wrapper o_d(conf_.src_image_pd(a));
+        const memory_desc_wrapper i_d(pd()->src_pd(a));
+        const memory_desc_wrapper o_d(pd()->src_image_pd(a));
 
-        input_ptrs_[a] = reinterpret_cast<const data_t *>(
+        iptrs[a] = reinterpret_cast<const data_t *>(
                 this->input_memory(a)) + i_d.blk_off(0);
-        output_ptrs_[a] = o_base_ptr + o_d.blk_off(0);
-        nelems_to_copy_[a] = nelems_to_concat(concat_dim, perm, iperm, i_d);
+        optrs[a] = o_base_ptr + o_d.blk_off(0);
+        nelems_to_copy[a] = pd()->nelems_to_concat(i_d);
         for (int i = 0; i < TENSOR_MAX_DIMS; i++) {
             if (i < perm[concat_dim])
-                is_[a][i] = size_t(i_d.blocking_desc().strides[0][iperm[i]]);
+                is[a][i] = size_t(i_d.blocking_desc().strides[0][iperm[i]]);
             else
-                is_[a][i] = 0;
+                is[a][i] = 0;
         }
     }
 
-    const memory_desc_wrapper o_d(conf_.src_image_pd());
+    const memory_desc_wrapper o_d(pd()->src_image_pd());
     auto &blk = o_d.blocking_desc();
+
     strides_t os = { 0 };
     for (int i = 0; i < perm[concat_dim]; i++)
         os[i] = o_d.blocking_desc().strides[0][iperm[i]];
+
     dims_t phys_dims;
     for (size_t i = 0; i < sizeof(phys_dims)/sizeof(phys_dims[0]); i++)
-        phys_dims[i] = (i < (size_t)perm[concat_dim]) ?
-                o_d.dims()[iperm[i]] / blk.block_dims[iperm[i]] :
-                1;
+        phys_dims[i] = (i < (size_t)perm[concat_dim])
+            ?  o_d.dims()[iperm[i]] / blk.block_dims[iperm[i]] : 1;
 
-    switch (perm[concat_dim]) {
-    case (0): {
+    if (perm[concat_dim] == 0) {
         for (int a = 0; a < num_arrs; ++a) {
-            const data_t *i = &input_ptrs_[a][0];
-            data_t *o = &output_ptrs_[a][0];
-            parallel_nd((ptrdiff_t)nelems_to_copy_[a],
+            const data_t *i = &iptrs[a][0];
+            data_t *o = &optrs[a][0];
+            parallel_nd((ptrdiff_t)nelems_to_copy[a],
                     [&](ptrdiff_t e) { o[e] = i[e]; });
         }
-        break;
-    }
-    default:
+    } else {
         parallel_nd(phys_dims[0], phys_dims[1], phys_dims[2], phys_dims[3],
             phys_dims[4], num_arrs,
             [&](int n0, int n1, int n2, int n3, int n4, int a) {
-            // XXX: this code may access unitialized values in is_[*][0-4] --
+            // XXX: this code may access uninitialized values in is[*][0-4] --
             // that's why we have to set them to zero although this is
             // probably benign
-            size_t in_off = is_[a][0] * n0 + is_[a][1] * n1
-                    + is_[a][2] * n2 + is_[a][3] * n3
-                    + is_[a][4] * n4;
-            size_t out_off = os[0] * n0 + os[1] * n1
-                    + os[2] * n2 + os[3] * n3 + os[4] * n4;
-            const data_t *i = &input_ptrs_[a][in_off];
-            data_t *o = &output_ptrs_[a][out_off];
+            size_t in_off = is[a][0] * n0 + is[a][1] * n1 + is[a][2] * n2
+                    + is[a][3] * n3 + is[a][4] * n4;
+            size_t out_off = os[0] * n0 + os[1] * n1 + os[2] * n2
+                    + os[3] * n3 + os[4] * n4;
+            const data_t *i = &iptrs[a][in_off];
+            data_t *o = &optrs[a][out_off];
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+            // The code below performs data copying: o[e] = i[e]
+            // and uses a workaround to make GNU compilers optimize it
+            uint8_t *ptro = reinterpret_cast<uint8_t *>(o);
+            const uint8_t *ptri = reinterpret_cast<const uint8_t *>(i);
+            const size_t main_part =
+                nelems_to_copy[a] * sizeof(data_t) / sizeof(uint32_t);
+            const size_t tail_part =
+                nelems_to_copy[a] * sizeof(data_t) % sizeof(uint32_t);
 
             PRAGMA_OMP_SIMD()
-            for (size_t e = 0; e < nelems_to_copy_[a]; ++e)
-                o[e] = i[e];
+            for (size_t e = 0; e < main_part; ++e) {
+                *(reinterpret_cast<uint32_t *>(ptro))
+                    = *(reinterpret_cast<const uint32_t *>(ptri));
+                ptro += sizeof(uint32_t);
+                ptri += sizeof(uint32_t);
+            }
+            for (size_t e = 0; e < tail_part; ++e) {
+                *ptro = *ptri;
+                ++ptro;
+                ++ptri;
+            }
+#else
+            PRAGMA_OMP_SIMD()
+            for (size_t e = 0; e < nelems_to_copy[a]; ++e) o[e] = i[e];
+#endif
         });
     }
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_concat.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_concat.hpp
index 45193b22f..84946da3d 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_concat.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_concat.hpp
@@ -17,6 +17,8 @@
 #ifndef SIMPLE_CONCAT_HPP
 #define SIMPLE_CONCAT_HPP
 
+#include "memory_tracking.hpp"
+
 #include "cpu_concat.hpp"
 
 namespace mkldnn {
@@ -28,29 +30,25 @@ struct simple_concat_t: public cpu_primitive_t {
     using cpu_memory_pd_t = cpu_memory_t::pd_t;
 
     struct pd_t: public cpu_concat_pd_t {
-        pd_t(const memory_desc_t *output_d, int n,
-                int concat_dim, const cpu_memory_pd_t **input_pds,
+        pd_t(const memory_desc_t *output_d, int n, int concat_dim,
+                const cpu_memory_pd_t **input_pds,
                 const primitive_attr_t *attr)
-            : cpu_concat_pd_t(output_d, n, concat_dim, input_pds, attr)
-        {}
+            : cpu_concat_pd_t(output_d, n, concat_dim, input_pds, attr) {}
+
         pd_t(const pd_t &rhs) : cpu_concat_pd_t(rhs) {
             for (size_t i = 0; i < sizeof(perm_)/sizeof(perm_[0]); i++) {
                 perm_[i] = rhs.perm_[i];
                 iperm_[i] = rhs.iperm_[i];
             }
         }
+
         DECLARE_CPU_CONCAT_PD_T("simple:any", simple_concat_t);
 
         virtual status_t init() override {
-            auto is_dense = [&](const memory_desc_wrapper &data_d) {
-                return nelems_to_concat(concat_dim_, perm_, iperm_, data_d)
-                        == _size_to_concat(concat_dim_, perm_, iperm_, data_d);
-            };
             const memory_desc_wrapper dst_d(&dst_pd_);
             bool ok = true
                 && cpu_concat_pd_t::init() == success
                 && dst_d.ndims() <= 6;
-
             if (!ok) return unimplemented;
 
             for (size_t i = 0; i < src_pds_.size(); ++i) {
@@ -61,118 +59,110 @@ struct simple_concat_t: public cpu_primitive_t {
                             o_d.data_type())
                     && i_d.format() == o_d.format()
                     && !utils::one_of(i_d.format(), memory_format::blocked,
-                        memory_format::wino_fmt)
+                            memory_format::wino_fmt)
                     && !i_d.is_additional_buffer();
+                if (!ok) return unimplemented;
             }
 
-            if (!ok)
-                return unimplemented;
-
-            format_perm(dst_d.ndims(), dst_d.blocking_desc().strides[0], perm_,
-                    iperm_);
+            format_perm();
 
+            // density check
             for (size_t i = 0; i < src_pds_.size(); ++i) {
                 const memory_desc_wrapper i_d(&src_pds_[i]);
                 const memory_desc_wrapper o_d(&src_image_pds_[i]);
-                ok = ok && is_dense(i_d) && is_dense(o_d);
+                ok = ok
+                    && nelems_to_concat(i_d) == size_to_concat(i_d)
+                    && nelems_to_concat(o_d) == size_to_concat(o_d);
+                if (!ok) return unimplemented;
             }
 
-            return ok ? success : unimplemented;
+            init_scratchpad();
+
+            return success;
         }
+
         dims_t perm_;
         dims_t iperm_;
-    };
 
-    simple_concat_t(const pd_t *conf, const input_vector &inputs,
-            const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*conf)
-    {
-        const int n = conf_.n_inputs();
-        input_ptrs_ = (decltype(input_ptrs_))malloc(
-                sizeof(*input_ptrs_) * n, 64);
-        output_ptrs_ = (decltype(output_ptrs_))malloc(
-                sizeof(*output_ptrs_) * n, 64);
-        nelems_to_copy_ = (decltype(nelems_to_copy_))malloc(
-                sizeof(*nelems_to_copy_) * n, 64);
-        is_ = (decltype(is_))malloc(sizeof(*is_) * n, 64);
-    }
+        size_t nelems_to_concat(const memory_desc_wrapper &data_d) const {
+            const int ndims = data_d.ndims();
+            auto &blk = data_d.blocking_desc();
 
-    ~simple_concat_t() {
-        free(input_ptrs_);
-        free(output_ptrs_);
-        free(nelems_to_copy_);
-        free(is_);
-    }
+            size_t nelems = 1;
+            for (int i = perm_[concat_dim()]; i < ndims; i++)
+                nelems *= data_d.dims()[iperm_[i]] / blk.block_dims[iperm_[i]];
+            for (int i = 0; i < ndims; i++)
+                nelems *= blk.block_dims[i];
 
-    virtual void execute(event_t *e) {
-        execute();
-        e->set_state(event_t::ready);
-    }
+            return nelems;
+        }
 
-    typedef typename prec_traits<data_type>::type data_t;
+    private:
+        void format_perm() {
+            const memory_desc_wrapper dst_d(&dst_pd_);
+            const int ndims = dst_d.ndims();
 
-private:
-    static void format_perm(
-            const int ndims, const stride_t *strides, int *perm, int *iperm) {
-        assert(ndims >= 0);
-        bool swapped;
-        strides_t strides_tmp;
-        utils::array_copy(strides_tmp, strides, ndims);
-        for (int i = 0; i < ndims; i++)
-            iperm[i] = i;
-        for (int i = 0; i < ndims - 1; i++) {
-            swapped = false;
-            for (int j = 0; j < ndims - i - 1; j++) {
-                if (strides_tmp[j] < strides_tmp[j + 1]) {
-                    nstl::swap(strides_tmp[j], strides_tmp[j + 1]);
-                    nstl::swap(iperm[j], iperm[j + 1]);
-                    swapped = true;
+            strides_t strides;
+            utils::array_copy(strides, dst_d.blocking_desc().strides[0], ndims);
+
+            for (int i = 0; i < ndims; i++) iperm_[i] = i;
+
+            for (int i = 0; i < ndims - 1; i++) {
+                bool swapped = false;
+                for (int j = 0; j < ndims - i - 1; j++) {
+                    if (strides[j] < strides[j + 1]) {
+                        nstl::swap(strides[j], strides[j + 1]);
+                        nstl::swap(iperm_[j], iperm_[j + 1]);
+                        swapped = true;
+                    }
                 }
+                if (swapped == false)
+                    break;
             }
-            if (swapped == false)
-                break;
-        }
-        for (int i = 0; i < ndims; i++)
-            perm[iperm[i]] = i;
-    }
 
-    static size_t nelems_to_concat(const int concat_dim, int *perm, int *iperm,
-            const memory_desc_wrapper &data_d) {
-        const int ndims = data_d.ndims();
-        auto &blk = data_d.blocking_desc();
-        int nelems = 1;
-        for (int i = perm[concat_dim]; i < ndims; i++) {
-            nelems *= data_d.dims()[iperm[i]] / blk.block_dims[iperm[i]];
+            for (int i = 0; i < ndims; i++) perm_[iperm_[i]] = i;
         }
-        for (int i = 0; i < ndims; i++) {
-            nelems *= blk.block_dims[i];
-        }
-        return nelems;
-    }
 
-    static size_t _size_to_concat(const int concat_dim, int *perm, int *iperm,
-            const memory_desc_wrapper &data_d) {
-        size_t max_size = 0;
-        auto &blk = data_d.blocking_desc();
-        for (int d = perm[concat_dim]; d < data_d.ndims(); ++d) {
-            auto block = blk.block_dims[iperm[d]];
-            max_size = nstl::max(max_size,
-                    size_t(blk.padding_dims[iperm[d]] / block)
-                            * blk.strides[0][iperm[d]]);
-            if (block > 1)
+        size_t size_to_concat(const memory_desc_wrapper &data_d) const {
+            size_t max_size = 0;
+            auto &blk = data_d.blocking_desc();
+            for (int d = perm_[concat_dim()]; d < data_d.ndims(); ++d) {
+                auto block = blk.block_dims[iperm_[d]];
                 max_size = nstl::max(max_size,
-                        size_t(block * blk.strides[1][iperm[d]]));
+                        size_t(blk.padding_dims[iperm_[d]] / block)
+                        * blk.strides[0][iperm_[d]]);
+                if (block > 1) max_size = nstl::max(max_size,
+                        size_t(block * blk.strides[1][iperm_[d]]));
+            }
+            return max_size;
+        }
+
+        void init_scratchpad() {
+            using namespace memory_tracking::names;
+            auto scratchpad = scratchpad_registry().registrar();
+            scratchpad.book(key_concat_iptrs, sizeof(data_t *) * n_inputs());
+            scratchpad.book(key_concat_optrs, sizeof(data_t *) * n_inputs());
+            scratchpad.book(key_concat_nelems, sizeof(size_t) * n_inputs());
+            scratchpad.book(key_concat_istrides,
+                    sizeof(strides_t) * n_inputs());
         }
-        return max_size;
+    };
+
+    simple_concat_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs) {}
+    ~simple_concat_t() {}
+
+    virtual void execute(event_t *e) const {
+        execute();
+        e->set_state(event_t::ready);
     }
 
-    void execute();
-    pd_t conf_;
+    typedef typename prec_traits<data_type>::type data_t;
 
-    const data_t **input_ptrs_ = nullptr;
-    data_t **output_ptrs_ = nullptr;
-    size_t *nelems_to_copy_ = nullptr;
-    strides_t *is_ = nullptr;
+private:
+    void execute() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_reorder.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_reorder.hpp
index e78d6adf8..4e4a7da7c 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_reorder.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_reorder.hpp
@@ -101,75 +101,6 @@ bool simple_attr_check(const primitive_attr_t *attr, bool many_scales_support) {
 /* specific reorders: implementation */
 template <SIMPLE_REORDER_TEMPL_DECL>
 struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
-    typename utils::enable_if<fmt_i == nChw8c && fmt_o == nChw16c>::type>
-{
-    static bool is_applicable(const memory_desc_wrapper &input_d,
-            const memory_desc_wrapper &output_d, const primitive_attr_t *attr)
-    {
-        return simple_fmt_check(order_keep, fmt_i, fmt_o, input_d, output_d)
-            && simple_attr_check(attr, false);
-    }
-
-
-    static status_t execute(const cpu_reorder_pd_t *pd,
-        const data_t<type_i> *input, data_t<type_o> *output) {
-        DECLARE_COMMON_PARAMS();
-
-        const auto &dims = input_d.dims();
-
-        constexpr int blksize_16c = 16;
-        constexpr int blksize_8c = 8;
-        constexpr int ic_mult = order_keep ? 2 : 1;
-        constexpr int oc_mult = order_keep ? 1 : 2;
-
-        const auto stride_8c = order_keep ? input_d.blocking_desc().strides[0]
-            : output_d.blocking_desc().strides[0];
-
-        auto ker = [&](const data_t<type_i> *i, data_t<type_o> *o, int blk_proc) {
-            if (alpha == 1.0 && beta == 0.0) {
-                for (int blk = 0; blk < blk_proc; ++blk){
-                    const int i_blk = order_keep ? blk * (int)stride_8c[1]
-                        : blk * blksize_8c;
-                    const int o_blk = order_keep ? blk * blksize_8c
-                        : blk * (int)stride_8c[1];
-                    for (int c = 0; c < blksize_8c; ++c) {
-                        o[o_blk + c] = i[i_blk + c];
-                    }
-                }
-            } else {
-                for (int blk = 0; blk < 2; ++blk) {
-                    const int i_blk = order_keep ? blk * (int)stride_8c[1]
-                        : blk * blksize_8c;
-                    const int o_blk = order_keep ? blk * blksize_8c
-                        : blk * (int)stride_8c[1];
-                    for (int c = 0; c < blk_proc; ++c) {
-                        o[o_blk + c] = data_t<type_o>(
-                            alpha * i[i_blk + c]
-                            + (beta ? beta * o[o_blk + c] : 0));
-                    }
-                }
-            }
-        };
-
-        const int CB = (dims[1] - 1) / blksize_16c + 1;
-        const int blktile_16  = ((dims[1] - 1) % blksize_16c + 1);
-        int blktile  = ((blktile_16 - 1) / blksize_8c + 1);
-
-        parallel_nd(dims[0], CB, dims[2], dims[3],
-            [&](int n, int C, int h, int w) {
-            auto i = &input[input_d.blk_off(n, C * ic_mult, h, w)];
-            auto o = &output[output_d.blk_off(n, C * oc_mult, h, w)];
-            ker(i,o, C < CB-1 ? 2 : blktile );
-
-        });
-
-        return success;
-    }
-};
-
-
-template <SIMPLE_REORDER_TEMPL_DECL>
-struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
 typename utils::enable_if<fmt_i == any && (false
     || fmt_o == hwio_s8s8
     || fmt_o == hwigo_s8s8)>::type>
@@ -234,8 +165,10 @@ typename utils::enable_if<fmt_i == any && (false
 template <SIMPLE_REORDER_TEMPL_DECL>
 struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
     typename utils::enable_if<
-          (fmt_i == goihw && fmt_o == gOIhw4i16o4i_s8s8)
-       || (fmt_i == oihw && fmt_o == OIhw4i16o4i_s8s8)
+          ((fmt_i == goihw || fmt_i == oihw)
+           && (format_traits<fmt_o>::blk_fmt == bf::_4i16o4i_s8s8
+               || format_traits<fmt_o>::blk_fmt == bf::_2i8o4i_s8s8
+               || format_traits<fmt_o>::blk_fmt == bf::_4o4i_s8s8))
     >::type>
 {
     static bool is_applicable(const memory_desc_wrapper &input_d,
@@ -258,7 +191,7 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
         DECLARE_COMMON_PARAMS();
 
         static constexpr bool w_groups = fmt_i == goihw;
-        const int blksize = 16;
+        const int blksize = format_traits<fmt_o>::blk_size;
         const int sblk = 4;
 
         const auto &_g_oihw_d = order_keep ? input_d : output_d;
@@ -333,6 +266,85 @@ struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
 template <SIMPLE_REORDER_TEMPL_DECL>
 struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
     typename utils::enable_if<true
+    && (fmt_i == goihw && fmt_o == Goihw16g_s8s8)>::type>
+{
+    static bool is_applicable(const memory_desc_wrapper &input_d,
+            const memory_desc_wrapper &output_d, const primitive_attr_t *attr) {
+        const size_t D_mask = utils::array_product(input_d.dims(),
+                            math::ilog2q(attr->output_scales_.mask_ + 1));
+        const int oc = input_d.dims()[1];
+        const int g = input_d.dims()[0];
+
+        return true
+            && order_keep
+            && input_d.format() == fmt_i
+            && output_d.format() == fmt_o
+            && (input_d.data_type() == f32 || input_d.data_type() == s8)
+            && output_d.data_type() == s8
+            && (D_mask == 1 || D_mask == (size_t)g * oc);
+    }
+
+    static status_t execute(const cpu_reorder_pd_t *pd,
+            const data_t<type_i> *input, data_t<type_o> *output) {
+        DECLARE_COMMON_PARAMS();
+
+        const int blksize = 16;
+
+        const auto &dims = input_d.dims();
+        const auto &pdims = output_d.blocking_desc().padding_dims;
+        const int G = dims[0];
+        const int Gp = pdims[0];
+        const int OC = dims[1];
+        const int IC = dims[2];
+        const int H = dims[3];
+        const int W = dims[4];
+
+        const size_t D_mask = utils::array_product(input_d.dims(),
+                            math::ilog2q(pd->attr()->output_scales_.mask_ + 1));
+        const float *scales = pd->attr()->output_scales_.scales_;
+        float adj_scale = (mayiuse(avx512_core_vnni)) ? 1.f : (1.f / 2.f);
+
+
+        auto ker = [&](const data_t<type_i> *inp, data_t<type_o> *out,
+                int32_t *cp, const float *s, const int g_block) {
+            PRAGMA_OMP_SIMD()
+            for (int g = 0; g < g_block; g++) {
+                const auto i_off = g * input_d.blocking_desc().strides[0][0];
+                out[g] = qz_b0<data_t<type_i>, data_t<type_o>>()(
+                        inp[i_off], s[g * OC] * adj_scale, rmode);
+                cp[g * OC] -= 128 * (int32_t)(out[g]);
+            }
+        };
+
+        size_t cp_offset = output_d.size() - output_d.additional_buffer_size();
+        int32_t *cp = reinterpret_cast<int32_t *>(output + cp_offset);
+        parallel_nd((Gp/blksize) * OC, [&](int ib) {
+            PRAGMA_OMP_SIMD()
+            for (int i = 0; i < blksize; i++)
+                cp[ib * blksize + i] = 0;
+        });
+
+        parallel_nd(Gp/blksize, OC, [&](int gb, int O) {
+                for (int I = 0; I < IC; I++) {
+                    for (int h = 0; h < H; h++) {
+                    for (int w = 0; w < W; w++) {
+                        const int g_block = nstl::min(G - gb * blksize, blksize);
+                        const auto inp = &input[input_d.blk_off(gb * blksize, O, I, h, w)];
+                        const auto out = &output[output_d.blk_off(gb, O, I, h, w)];
+                        int offset = gb * blksize + O;
+                        ker(inp, out, &cp[offset],
+                            &scales[(D_mask == 1) ? 0 : offset], g_block);
+                   }
+                   }
+               }
+        });
+        return success;
+    }
+};
+
+template <SIMPLE_REORDER_TEMPL_DECL>
+struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
+    typename utils::enable_if<true
     && format_traits<fmt_i>::blk_fmt == bf::_8i16o2i
     && format_traits<fmt_o>::blk_fmt == bf::_8o16i2o>::type>
 {
@@ -530,7 +542,7 @@ typename utils::enable_if<fmt_i == nhwc && fmt_o == nChw8c>::type>
 
 template <SIMPLE_REORDER_TEMPL_DECL>
 struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
-typename utils::enable_if<fmt_i == nhwc && fmt_o == nhwc>::type>
+typename utils::enable_if<fmt_i == nhwc && fmt_o == nhwc && type_o != mkldnn_bin>::type>
 {
     static bool is_applicable(const memory_desc_wrapper &input_d,
         const memory_desc_wrapper &output_d, const primitive_attr_t *attr) {
@@ -570,7 +582,7 @@ typename utils::enable_if<fmt_i == nhwc && fmt_o == nhwc>::type>
 
 template <SIMPLE_REORDER_TEMPL_DECL>
 struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
-typename utils::enable_if<fmt_i == nchw && fmt_o == nhwc>::type>
+typename utils::enable_if<fmt_i == nchw && fmt_o == nhwc && type_i != mkldnn_bin && type_o != mkldnn_bin>::type>
 {
     static bool is_applicable(const memory_desc_wrapper &input_d,
         const memory_desc_wrapper &output_d, const primitive_attr_t *attr) {
@@ -621,6 +633,56 @@ typename utils::enable_if<fmt_i == nchw && fmt_o == nhwc>::type>
 
 template <SIMPLE_REORDER_TEMPL_DECL>
 struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
+typename utils::enable_if<(fmt_i == nchw || fmt_i == nhwc) && fmt_o == nhwc && (type_i == mkldnn_bin || type_o == mkldnn_bin)>::type>
+{
+    static bool is_applicable(const memory_desc_wrapper &input_d,
+        const memory_desc_wrapper &output_d, const primitive_attr_t *attr) {
+        int smask = attr ? attr->output_scales_.mask_ : 0;
+        return smask == 0 && order_keep && (input_d._md->format == nchw || input_d._md->format == nhwc) && output_d._md->format == nhwc;
+    }
+
+    static status_t execute(const cpu_reorder_pd_t *pd,
+        const data_t<type_i> *input, data_t<type_o> *output) {
+        DECLARE_COMMON_PARAMS();
+
+        const auto &dims = input_d.dims();
+        const int C = dims[1];
+        const int H = dims[2];
+        const int W = dims[3];
+
+        int nbits = 8;
+        const int CB = div_up(C, nbits);
+
+        auto ker = [&](const data_t<type_i> *i, data_t<type_o> *o) {
+            for (int cb = 0; cb < CB; ++cb) {
+                uint8_t bin_val = 0x00;
+                for (int c = cb * nbits, shift = 0; c < std::min(C, (cb + 1) * nbits); c++, shift++) {
+                    const ptrdiff_t flat_off = c * input_d.blocking_desc().strides[0][1];
+
+                    auto bit = uint8_t((i[flat_off] > 0) ? 0x01 : 0x00);
+                    bin_val |= (bit << shift);
+                }
+
+                o[cb] = bin_val;
+            }
+        };
+
+        parallel_nd(dims[0], H, W,
+            [&](int n, int h, int w) {
+                auto iidx = input_d.blk_off(n, 0, h, w);
+                auto oidx = output_d.blk_off(n, 0, h, w);
+
+                auto i = &input[iidx];
+                auto o = &output[oidx / nbits];
+                ker(i, o);
+        });
+
+        return success;
+    }
+};
+
+template <SIMPLE_REORDER_TEMPL_DECL>
+struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
 typename utils::enable_if<fmt_i == nhwc && fmt_o == nchw>::type>
 {
     static bool is_applicable(const memory_desc_wrapper &input_d,
@@ -670,6 +732,90 @@ typename utils::enable_if<fmt_i == nhwc && fmt_o == nchw>::type>
     }
 };
 
+template <SIMPLE_REORDER_TEMPL_DECL>
+struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
+typename utils::enable_if<format_traits<fmt_i>::blk_fmt == bf::_8c
+    && format_traits<fmt_o>::blk_fmt == bf::_16c>::type>
+{
+    static bool is_applicable(const memory_desc_wrapper &input_d,
+            const memory_desc_wrapper &output_d, const primitive_attr_t *attr)
+    {
+        return simple_fmt_check(order_keep, fmt_i, fmt_o, input_d, output_d)
+            && simple_attr_check(attr, false);
+    }
+
+    static status_t execute(const cpu_reorder_pd_t *pd,
+        const data_t<type_i> *input, data_t<type_o> *output) {
+        DECLARE_COMMON_PARAMS();
+
+        constexpr int is_1d = format_traits<fmt_o>::ndims_sp == 1;
+        constexpr int is_3d = format_traits<fmt_o>::ndims_sp == 3;
+        constexpr int blksize_16 = format_traits<fmt_o>::blk_size;
+        constexpr int blksize_8 = format_traits<fmt_i>::blk_size;
+        constexpr int ic_mult = order_keep ? 2 : 1;
+        constexpr int oc_mult = order_keep ? 1 : 2;
+
+        const auto &nchw8c_d = order_keep ? input_d : output_d;
+        const auto &dims = input_d.dims();
+        const auto &pdims = order_keep ? output_d.blocking_desc().padding_dims
+                                       : input_d.blocking_desc().padding_dims;
+        const auto stride_8c = nchw8c_d.blocking_desc().strides[0];
+
+        const int C = dims[1];
+        const int D = is_3d ? dims[2] : 1;
+        const int H = is_1d ? 1 : dims[2 + is_3d];
+        const int W = dims[3 + is_3d - is_1d];
+
+        auto ker = [&](const data_t<type_i> *i, data_t<type_o> *o,
+            const int block_16) {
+            const int nb = (block_16 - 1) / blksize_8 + 1;
+            if (alpha == 1.0 && beta == 0.0) {
+                for (int b = 0; b < nb; ++b) {
+                    const ptrdiff_t i_off = order_keep ? b * stride_8c[1]
+                                                       : b * blksize_8;
+                    const ptrdiff_t o_off = order_keep ? b * blksize_8
+                                                       : b * stride_8c[1];
+                    const int block_8 = nstl::min(blksize_8,
+                                                  block_16 - b * blksize_8);
+                    for (int c = 0; c < block_8; ++c) {
+                        o[o_off + c] = _qz_a1b0<type_i, type_o>()(
+                                i[i_off + c], rmode);
+                    }
+                }
+            } else {
+                for (int b = 0; b < nb; ++b) {
+                    const ptrdiff_t i_off = order_keep ? b * stride_8c[1]
+                                                       : b * blksize_8;
+                    const ptrdiff_t o_off = order_keep ? b * blksize_8
+                                                       : b * stride_8c[1];
+                    const int block_8 = nstl::min(blksize_8,
+                                                  block_16 - b * blksize_8);
+                    for (int c = 0; c < block_8; ++c) {
+                        o[o_off + c] = _qz<type_i, type_o>()(i[i_off + c],
+                                o[o_off + c], alpha, beta, rmode);
+                    }
+                }
+            }
+        };
+
+#       define data_blk_off(md, n, c, d, h, w) \
+        ( is_1d ? (md).blk_off(n, c, w) \
+          : is_3d ? (md).blk_off(n, c, d, h, w) : (md).blk_off(n, c, h, w))
+
+        parallel_nd(dims[0], pdims[1] / blksize_16, D, H, W,
+            [&](int n, int nb_c, int d, int h, int w) {
+            auto i = &input[data_blk_off(input_d, n, ic_mult * nb_c, d, h, w)];
+            auto o = &output[data_blk_off(output_d, n, oc_mult * nb_c, d, h, w)];
+            const int block_16 = nstl::min(blksize_16, C - nb_c * blksize_16);
+            ker(i, o, block_16);
+        });
+
+#       undef data_blk_off
+
+        return success;
+    }
+};
+
 #define PLAIN_TO_BLOCKED_IS_APPLICABLE() \
     static bool is_applicable(const memory_desc_wrapper &input_d, \
         const memory_desc_wrapper &output_d, const primitive_attr_t *attr) { \
@@ -681,6 +827,7 @@ typename utils::enable_if<fmt_i == nhwc && fmt_o == nchw>::type>
 template <SIMPLE_REORDER_TEMPL_DECL>
 struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
 typename utils::enable_if<fmt_i == any && (false
+    || format_traits<fmt_o>::blk_fmt == bf::_4c
     || format_traits<fmt_o>::blk_fmt == bf::_8c
     || format_traits<fmt_o>::blk_fmt == bf::_16c)>::type>
 {
@@ -956,8 +1103,77 @@ typename utils::enable_if<fmt_i == any && (fmt_o == OhIw8o4i || fmt_o == gOhIw8o
 
 template <SIMPLE_REORDER_TEMPL_DECL>
 struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
+typename utils::enable_if<fmt_i == any && (fmt_o == OhIw8o32i || fmt_o == OhIw16o32i) && type_i == mkldnn_bin && type_o == mkldnn_bin>::type>
+{
+    PLAIN_TO_BLOCKED_IS_APPLICABLE();
+
+    static status_t execute(const cpu_reorder_pd_t *pd,
+        const data_t<type_i> *input, data_t<type_o> *output) {
+        DECLARE_COMMON_PARAMS();
+
+        static constexpr bool w_groups
+            = format_traits<fmt_o>::data_kind == dk::gwei;
+        constexpr int is_1d = format_traits<fmt_o>::ndims_sp == 1;
+        constexpr int is_3d = format_traits<fmt_o>::ndims_sp == 3;
+        constexpr int blksize_o = fmt_o == OhIw8o32i ? 8 : 16;
+        constexpr int blksize_i = 32;
+
+        const auto &dims = input_d.dims();
+        const auto &pdims = order_keep
+            ? output_d.blocking_desc().padding_dims
+            : input_d.blocking_desc().padding_dims;
+
+        const int G = w_groups ? dims[0] : 1;
+        const int OC = dims[w_groups + 0];
+        const int NB_OC = pdims[w_groups + 0] / blksize_o;
+        const int IC = dims[w_groups + 1];
+        const int NB_IC = pdims[w_groups + 1] / blksize_i;
+        const int H = is_1d ? 1 : dims[w_groups + 2 + is_3d];
+        const int W = dims[w_groups + 3 + is_3d - is_1d];
+
+        constexpr int i_mult_o = blksize_o;
+        constexpr int i_mult_i = blksize_i;
+        constexpr int nbits = 8;
+
+        auto extract_bit = [](uint8_t val, uint8_t bit) -> uint8_t {
+            return (uint8_t) ((val >> bit) & 0x0001);
+        };
+
+        parallel_nd(G, NB_OC, NB_IC, H, W,
+            [&](int g, int nb_oc, int nb_ic, int h, int w) {
+                const int oc_block = nstl::min(blksize_o, OC - nb_oc * blksize_o);
+                const int ic_block = nstl::min(blksize_i, IC - nb_ic * blksize_i);
+
+                for (int oc = 0; oc < oc_block; ++oc) {
+                    for (int icb = 0; icb < div_up(ic_block, nbits); ++icb) {
+
+                        uint8_t bin_val = 0x00;
+                        for (int ic = icb*nbits, shift = 0; ic < std::min(IC, (icb + 1)*nbits); ic++, shift++) {
+                            size_t iidx = (i_mult_o * nb_oc + oc) * input_d.blocking_desc().strides[0][0] +
+                                          (i_mult_i * nb_ic + ic) *input_d.blocking_desc().strides[0][1] +
+                                                                h * input_d.blocking_desc().strides[0][2] +
+                                                                w;
+
+                            uint8_t bit = extract_bit(input[iidx / nbits], (uint8_t)(iidx % nbits));
+                            bin_val |= (bit << shift);
+                        }
+
+                        size_t oidx = wei_blk_off_like_gwei3D<fmt_o>(output_d, g, nb_oc, nb_ic, 0, h, w) + oc * blksize_i + icb * blksize_o;
+                        output[oidx / nbits] = bin_val;
+
+                    }
+                }
+            });
+
+        return success;
+    }
+};
+
+template <SIMPLE_REORDER_TEMPL_DECL>
+struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
 typename utils::enable_if<fmt_i == any
-&& block_format_traits<format_traits<fmt_o>::blk_fmt>::blk_ndims == 2 && fmt_o != OhIw8o4i && fmt_o != gOhIw8o4i>::type>
+&& block_format_traits<format_traits<fmt_o>::blk_fmt>::blk_ndims == 2
+&& fmt_o != OhIw8o4i && fmt_o != gOhIw8o4i && fmt_o != OhIw8o32i && fmt_o != OhIw16o32i>::type>
 {
     PLAIN_TO_BLOCKED_IS_APPLICABLE();
 
@@ -1045,6 +1261,7 @@ typename utils::enable_if<fmt_i == any
 template <SIMPLE_REORDER_TEMPL_DECL>
 struct simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL,
 typename utils::enable_if<fmt_i == any && (false
+    || format_traits<fmt_o>::blk_fmt == bf::_4o
     || format_traits<fmt_o>::blk_fmt == bf::_8o
     || format_traits<fmt_o>::blk_fmt == bf::_16o)>::type>
 {
@@ -1392,21 +1609,21 @@ struct simple_reorder_t: public cpu_primitive_t {
         }
     };
 
-    simple_reorder_t(const pd_t *pd, const input_vector &inputs,
+    simple_reorder_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         auto input = reinterpret_cast<const data_t<type_i> *>(
                 this->input_memory(0));
         auto output = reinterpret_cast<data_t<type_o> *>(this->memory());
         simple_reorder_impl<SIMPLE_REORDER_TEMPL_CALL, spec>::execute(
-                &conf_, input, output);
+                pd(), input, output);
         e->set_state(event_t::ready);
     }
 
 private:
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 #undef SIMPLE_REORDER_TEMPL_DECL
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_sum.cpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_sum.cpp
index 4a4906143..fc7f94bca 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_sum.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_sum.cpp
@@ -22,16 +22,16 @@ namespace impl {
 namespace cpu {
 
 template <data_type_t data_type>
-void simple_sum_t<data_type>::execute() {
+void simple_sum_t<data_type>::execute() const {
     auto output = reinterpret_cast<data_t *>(this->memory());
-    const int num_arrs = conf_.n_inputs();
-    const memory_desc_wrapper o_d(conf_.dst_pd());
+    const int num_arrs = pd()->n_inputs();
+    const memory_desc_wrapper o_d(pd()->dst_pd());
     output += o_d.blk_off(0);
     const size_t nelems = o_d.nelems();
     const data_t *input_ptrs[max_num_arrs];
 
     for (int a = 0; a < num_arrs; ++a) {
-        const memory_desc_wrapper i_d(conf_.src_pd(a));
+        const memory_desc_wrapper i_d(pd()->src_pd(a));
 
         input_ptrs[a] = reinterpret_cast<const data_t *>(
                 this->input_memory(a)) + i_d.blk_off(0);
@@ -41,7 +41,7 @@ void simple_sum_t<data_type>::execute() {
     const size_t blocks_number = nelems / block_size;
     const size_t tail = nelems % block_size;
 
-    const auto &scales = conf_.scales_;
+    const auto &scales = pd()->scales_;
     parallel(0, [&](const int ithr, const int nthr) {
         size_t start{0}, end{0};
         balance211(blocks_number, nthr, ithr, start, end);
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_sum.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_sum.hpp
index 8704be513..133b25161 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_sum.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/simple_sum.hpp
@@ -58,11 +58,11 @@ struct simple_sum_t: public cpu_primitive_t {
         }
     };
 
-    simple_sum_t(const pd_t *conf, const input_vector &inputs,
+    simple_sum_t(const pd_t *apd, const input_vector &inputs,
             const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*conf) {}
+        : cpu_primitive_t(apd, inputs, outputs) {}
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         execute();
         e->set_state(event_t::ready);
     }
@@ -71,8 +71,8 @@ struct simple_sum_t: public cpu_primitive_t {
     typedef typename prec_traits<data_type>::type data_t;
 
 private:
-    void execute();
-    pd_t conf_;
+    void execute() const;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
 };
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/wino_reorder.hpp b/inference-engine/thirdparty/mkl-dnn/src/cpu/wino_reorder.hpp
index 78d005e2a..0e2474636 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/wino_reorder.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/wino_reorder.hpp
@@ -35,30 +35,47 @@ struct wino_reorder_t : public cpu_primitive_t {
                 const primitive_attr_t *attr) {
             assert(input_pd->engine()->kind() == engine_kind::cpu);
             assert(output_pd->engine()->kind() == engine_kind::cpu);
-            const memory_desc_wrapper output_d(output_pd);
 
-            bool args_ok = true && input_pd->desc()->data_type == type_i
-                    && output_pd->desc()->data_type == type_o
-                    && one_of(input_pd->desc()->format, goihw, oihw)
-                    && output_pd->desc()->format == wino_fmt
-                    && one_of(output_d.wino_desc().wino_format,
-                               mkldnn_wino_wei_aaOIoi, mkldnn_wino_wei_aaOio,
-                               mkldnn_wino_wei_aaOBiOo,
-                               mkldnn_wino_wei_OBaaIBOIio);
-
-            if (!args_ok)
-                return status::invalid_arguments;
+            const memory_desc_wrapper id(input_pd), od(output_pd);
+            bool args_ok = true
+                && id.data_type() == type_i
+                && od.data_type() == type_o
+                && utils::one_of(id.format(), goihw, oihw)
+                && od.format() == wino_fmt
+                && one_of(od.wino_desc().wino_format,
+                        mkldnn_wino_wei_aaOIoi, mkldnn_wino_wei_aaOio,
+                        mkldnn_wino_wei_aaOBiOo, mkldnn_wino_wei_OBaaIBOIio);
+            if (!args_ok) return status::invalid_arguments;
 
             auto _pd = new pd_t((const cpu_memory_pd_t *)input_pd,
                     (const cpu_memory_pd_t *)output_pd, attr);
-            if (_pd == nullptr)
-                return out_of_memory;
-            if (_pd->init() != success) {
-                delete _pd;
-                return unimplemented;
-            }
+            if (_pd == nullptr) return out_of_memory;
+            if (_pd->init() != success) { delete _pd; return unimplemented; }
             return safe_ptr_assign<reorder_pd_t>(*reorder_pd, _pd);
         }
+
+        virtual status_t init() override {
+            status_t status = cpu_reorder_pd_t::init();
+            if (status != status::success) return status;
+
+            init_scratchpad();
+
+            return status::success;
+        }
+
+    private:
+        void init_scratchpad() {
+            auto &o = memory_desc_wrapper(output_pd()).wino_desc();
+            size_t transform_space_size = (size_t)o.r * o.alpha * o.oc_block;
+            size_t plain_size = (size_t)o.alpha * o.alpha * o.oc * o.ic;
+
+            using namespace memory_tracking::names;
+            auto scratchpad = scratchpad_registry().registrar();
+            scratchpad.book(key_reorder_wino_transform_space,
+                    sizeof(in_data_t) * transform_space_size);
+            scratchpad.book(key_reorder_wino_plain,
+                    sizeof(out_data_t) * plain_size);
+        }
     };
 
 private:
@@ -66,11 +83,12 @@ private:
     typedef typename prec_traits<type_o>::type out_data_t;
     const int unsign_val_in_wino_domain_ = 5;
 
-    wino_reorder_t(const pd_t *pd,
-            const input_vector &inputs, const output_vector &outputs)
-        : cpu_primitive_t(&conf_, inputs, outputs), conf_(*pd) {
-        const memory_desc_wrapper input_d(conf_.input_pd());
-        const memory_desc_wrapper output_d(conf_.output_pd());
+    wino_reorder_t(const pd_t *apd, const input_vector &inputs,
+            const output_vector &outputs)
+        : cpu_primitive_t(apd, inputs, outputs)
+    {
+        const memory_desc_wrapper input_d(pd()->input_pd());
+        const memory_desc_wrapper output_d(pd()->output_pd());
 
         r_ = output_d.wino_desc().r;
         w_alpha_ = output_d.wino_desc().alpha;
@@ -111,25 +129,18 @@ private:
 
         size_wino_wei_ = w_alpha_ * w_alpha_ * oc_ * ic_;
         size_wspace_ = r_ * w_alpha_ * oc_block_;
-
-        wspace_ = (in_data_t *)malloc(sizeof(in_data_t) * size_wspace_, 64);
-        tmp_wei_ =
-                (out_data_t *)malloc(sizeof(out_data_t) * size_wino_wei_, 64);
     }
 
-    ~wino_reorder_t() {
-        free(wspace_);
-        free(tmp_wei_);
-    }
-
-    void transform(const in_data_t *__restrict input) {
-        const memory_desc_wrapper input_d(conf_.input_pd()->desc());
+    void transform(out_data_t *__restrict tmp_wei,
+            const in_data_t *__restrict input,
+            in_data_t *__restrict wspace) const {
+        const memory_desc_wrapper input_d(pd()->input_pd()->desc());
 
-        round_mode_t rmode = conf_.attr()->round_mode_;
-        const int smask = conf_.attr()->output_scales_.mask_;
+        round_mode_t rmode = pd()->attr()->round_mode_;
+        const int smask = pd()->attr()->output_scales_.mask_;
         const int ndims_mask = math::ilog2q(smask + 1);
         const size_t D_mask = utils::array_product(input_d.dims(), ndims_mask);
-        const float *__restrict scales = conf_.attr()->output_scales_.scales_;
+        const float *__restrict scales = pd()->attr()->output_scales_.scales_;
         assert(D_mask == 1 || D_mask == (size_t)oc_);
 
         /* transform weights to winograd domain */
@@ -162,9 +173,9 @@ private:
             const in_data_t *__restrict _inp
                     = input + (ob * oc_block_ * or_ic_ + iic) * kh_ * kw_;
             out_data_t *__restrict _out
-                    = tmp_wei_ + (iic * nb_oc_ + ob) * oc_block_;
+                    = tmp_wei + (iic * nb_oc_ + ob) * oc_block_;
 
-            parallel_nd(size_wspace_, [&](int i) { wspace_[i] = 0.f; });
+            parallel_nd(size_wspace_, [&](int i) { wspace[i] = 0.f; });
 
             parallel_nd(r_, w_alpha_, oc_block_,
                 [&](int ih, int j, int ioc) {
@@ -174,7 +185,7 @@ private:
                     in_data_t inp_v = (inp_ic < or_ic_ && inp_oc < or_oc_)
                         ? _inp[ioc * or_ic_ * kh_ * kw_ + ih * kw_ + iw]
                         : 0.f;
-                    wspace_[(ih * w_alpha_ + j) * oc_block_ + ioc]
+                    wspace[(ih * w_alpha_ + j) * oc_block_ + ioc]
                             += inp_v * g[j * r_ + iw];
                 }
             });
@@ -184,7 +195,7 @@ private:
                 float t = 0;
                 for (int k = 0; k < r_; ++k)
                     t += g[i * r_ + k]
-                            * wspace_[(k * w_alpha_ + j) * oc_block_ + ioc];
+                            * wspace[(k * w_alpha_ + j) * oc_block_ + ioc];
                 if (type_o == s8) {
                     const float scale = (D_mask == 1)
                         ? scales[0]
@@ -199,7 +210,8 @@ private:
         }}
     }
 
-    void reorder_to_aaOIoi(out_data_t *__restrict output) {
+    void reorder_to_aaOIoi(out_data_t *__restrict output,
+            const out_data_t *__restrict tmp_wei) const {
         int32_t *__restrict dst_bias = nullptr;
         if (type_o == s8) {
             const auto bias_shift = sizeof(out_data_t) * size_wino_wei_;
@@ -229,7 +241,7 @@ private:
                     int dst_offset = u_h_shift + u_w_shift + oc_block_shift
                             + ic_block_shift;
 
-                    output[dst_offset] = tmp_wei_[src_offset];
+                    output[dst_offset] = tmp_wei[src_offset];
                     if (type_o == s8) {
                         int bias_offset = u_h_shift_b + u_w_shift_b + oc_shift;
                         if (index != unsign_val_in_wino_domain_)
@@ -244,7 +256,8 @@ private:
         }}
     }
 
-    void reorder_to_aaOio(out_data_t *__restrict output) {
+    void reorder_to_aaOio(out_data_t *__restrict output,
+            const out_data_t *__restrict tmp_wei) const {
         parallel_nd(w_alpha_, w_alpha_, nb_oc_,
             [&](int u_h, int u_w, int ob) {
             for (int ib = 0; ib < nb_ic_; ib++) {
@@ -258,12 +271,13 @@ private:
                     + u_w * nb_oc_ * nb_ic_ * ic_block_ * oc_block_
                     + ob * nb_ic_ * ic_block_ * oc_block_
                     + ib * ic_block_ * oc_block_ + i * oc_block_ + o;
-                output[dst_offset] = tmp_wei_[src_offset];
+                output[dst_offset] = tmp_wei[src_offset];
             }}}
         });
     }
 
-    void reorder_to_aaOBiOo(out_data_t *__restrict output) {
+    void reorder_to_aaOBiOo(out_data_t *__restrict output,
+            const out_data_t *__restrict tmp_wei) const {
         int oc_chunks = nb_oc_ / oc2_block_;
 
         parallel_nd(w_alpha_, w_alpha_, oc_chunks,
@@ -282,7 +296,7 @@ private:
 
                         int src_offset = u_h * w_alpha_ * ic_ * oc_
                             + u_w * ic_ * oc_ + icp * oc_ + ocp;
-                        wei_ptr[wei_offset + o] = tmp_wei_[src_offset];
+                        wei_ptr[wei_offset + o] = tmp_wei[src_offset];
                     }
                     wei_offset += oc_block_;
                 }}
@@ -290,7 +304,8 @@ private:
         });
     }
 
-    void reorder_to_OBaaIBOIio(out_data_t *__restrict output) {
+    void reorder_to_OBaaIBOIio(out_data_t *__restrict output,
+            const out_data_t *__restrict tmp_wei) const {
         int ic_chunks = nb_ic_ / ic2_block_;
         int oc_chunks = nb_oc_ / oc2_block_;
 
@@ -310,39 +325,46 @@ private:
                             * ic_chunks + icc) * oc2_block_ + ob) * ic2_block_
                             + ib) * ic_block_ + i) * oc_block_;
                     for (int o = 0; o < oc_block_; o++)
-                        output[wei_offset + o] = tmp_wei_[src_offset + o];
+                        output[wei_offset + o] = tmp_wei[src_offset + o];
                 }}
             }}
         });
     }
 
-    virtual void execute(event_t *e) {
+    virtual void execute(event_t *e) const {
         auto input = reinterpret_cast<const in_data_t *>(input_memory(0));
         auto output = reinterpret_cast<out_data_t *>(memory());
 
-        transform(input);
+        auto wspace = (in_data_t *__restrict)scratchpad().template get<void>(
+                memory_tracking::names::key_reorder_wino_transform_space);
+        auto tmp_wei = (out_data_t *__restrict)scratchpad().template get<void>(
+                memory_tracking::names::key_reorder_wino_plain);
+
+        transform(tmp_wei, input, wspace);
 
         /* reorder to winograd domain */
         switch (wino_format_) {
-        case mkldnn_wino_wei_aaOIoi: reorder_to_aaOIoi(output); break;
-        case mkldnn_wino_wei_aaOio: reorder_to_aaOio(output); break;
-        case mkldnn_wino_wei_aaOBiOo: reorder_to_aaOBiOo(output); break;
-        case mkldnn_wino_wei_OBaaIBOIio: reorder_to_OBaaIBOIio(output); break;
+        case mkldnn_wino_wei_aaOIoi:
+            reorder_to_aaOIoi(output, tmp_wei); break;
+        case mkldnn_wino_wei_aaOio:
+            reorder_to_aaOio(output, tmp_wei); break;
+        case mkldnn_wino_wei_aaOBiOo:
+            reorder_to_aaOBiOo(output, tmp_wei); break;
+        case mkldnn_wino_wei_OBaaIBOIio:
+            reorder_to_OBaaIBOIio(output, tmp_wei); break;
         default: assert("Unknown wino format"); break;
         }
 
         e->set_state(event_t::ready);
     }
 
-    pd_t conf_;
+    const pd_t *pd() const { return (const pd_t *)primitive_t::pd(); }
     int r_, w_alpha_;
     int ic_, oc_, or_ic_, or_oc_, kh_, kw_;
     int oc_block_, ic_block_, oc2_block_, ic2_block_;
     float adj_scale_;
     int nb_oc_, nb_ic_;
     mkldnn_wino_memory_format_t wino_format_;
-    in_data_t *__restrict wspace_;
-    out_data_t *__restrict tmp_wei_;
     int size_wino_wei_;
     int size_wspace_;
 };
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak.h b/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak.h
index 74d91d4c1..5c202f486 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak.h
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2018 Intel Corporation
+* Copyright 2016-2019 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -85,6 +85,8 @@
 // This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft.
 #if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\
 	 			 ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__)))
+	#include <unordered_set>
+	#define XBYAK_STD_UNORDERED_SET std::unordered_set
 	#include <unordered_map>
 	#define XBYAK_STD_UNORDERED_MAP std::unordered_map
 	#define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap
@@ -94,16 +96,22 @@
 	libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version).
 */
 #elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__)
+	#include <tr1/unordered_set>
+	#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
 	#include <tr1/unordered_map>
 	#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
 	#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
 
 #elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600)
+	#include <unordered_set>
+	#define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set
 	#include <unordered_map>
 	#define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map
 	#define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap
 
 #else
+	#include <set>
+	#define XBYAK_STD_UNORDERED_SET std::set
 	#include <map>
 	#define XBYAK_STD_UNORDERED_MAP std::map
 	#define XBYAK_STD_UNORDERED_MULTIMAP std::multimap
@@ -150,7 +158,7 @@ namespace Xbyak {
 
 enum {
 	DEFAULT_MAX_CODE_SIZE = 4096,
-	VERSION = 0x5631 /* 0xABCD = A.BC(D) */
+	VERSION = 0x5760 /* 0xABCD = A.BC(D) */
 };
 
 #ifndef MIE_INTEGER_TYPE_DEFINED
@@ -223,7 +231,8 @@ enum {
 	ERR_INVALID_ZERO,
 	ERR_INVALID_RIP_IN_AUTO_GROW,
 	ERR_INVALID_MIB_ADDRESS,
-	ERR_INTERNAL
+	ERR_INTERNAL,
+	ERR_X2APIC_IS_NOT_SUPPORTED
 };
 
 class Error : public std::exception {
@@ -285,6 +294,7 @@ public:
 			"invalid rip in AutoGrow",
 			"invalid mib address",
 			"internal error",
+			"x2APIC is not supported"
 		};
 		assert((size_t)err_ < sizeof(errTbl) / sizeof(*errTbl));
 		return errTbl[err_];
@@ -662,6 +672,12 @@ struct RegRip {
 	const Label* label_;
 	bool isAddr_;
 	explicit RegRip(sint64 disp = 0, const Label* label = 0, bool isAddr = false) : disp_(disp), label_(label), isAddr_(isAddr) {}
+	friend const RegRip operator+(const RegRip& r, int disp) {
+		return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
+	}
+	friend const RegRip operator-(const RegRip& r, int disp) {
+		return RegRip(r.disp_ - disp, r.label_, r.isAddr_);
+	}
 	friend const RegRip operator+(const RegRip& r, sint64 disp) {
 		return RegRip(r.disp_ + disp, r.label_, r.isAddr_);
 	}
@@ -831,6 +847,7 @@ inline RegExp operator-(const RegExp& e, size_t disp)
 
 // 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc)
 void *const AutoGrow = (void*)1; //-V566
+void *const DontSetProtectRWE = (void*)2; //-V566
 
 class CodeArray {
 	enum Type {
@@ -870,6 +887,7 @@ protected:
 	size_t size_;
 	bool isCalledCalcJmpAddress_;
 
+	bool useProtect() const { return alloc_->useProtect(); }
 	/*
 		allocate new memory and copy old data to the new area
 	*/
@@ -893,12 +911,16 @@ protected:
 			uint64 disp = i->getVal(top_);
 			rewrite(i->codeOffset, disp, i->jmpSize);
 		}
-		if (alloc_->useProtect() && !protect(top_, size_, true)) throw Error(ERR_CANT_PROTECT);
 		isCalledCalcJmpAddress_ = true;
 	}
 public:
+	enum ProtectMode {
+		PROTECT_RW = 0, // read/write
+		PROTECT_RWE = 1, // read/write/exec
+		PROTECT_RE = 2 // read/exec
+	};
 	explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0)
-		: type_(userPtr == AutoGrow ? AUTO_GROW : userPtr ? USER_BUF : ALLOC_BUF)
+		: type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF)
 		, alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_)
 		, maxSize_(maxSize)
 		, top_(type_ == USER_BUF ? reinterpret_cast<uint8*>(userPtr) : alloc_->alloc((std::max<size_t>)(maxSize, 1)))
@@ -906,7 +928,7 @@ public:
 		, isCalledCalcJmpAddress_(false)
 	{
 		if (maxSize_ > 0 && top_ == 0) throw Error(ERR_CANT_ALLOC);
-		if ((type_ == ALLOC_BUF && alloc_->useProtect()) && !protect(top_, maxSize, true)) {
+		if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) {
 			alloc_->free(top_);
 			throw Error(ERR_CANT_PROTECT);
 		}
@@ -914,10 +936,19 @@ public:
 	virtual ~CodeArray()
 	{
 		if (isAllocType()) {
-			if (alloc_->useProtect()) protect(top_, maxSize_, false);
+			if (useProtect()) setProtectModeRW(false);
 			alloc_->free(top_);
 		}
 	}
+	bool setProtectMode(ProtectMode mode, bool throwException = true)
+	{
+		bool isOK = protect(top_, maxSize_, mode);
+		if (isOK) return true;
+		if (throwException) throw Error(ERR_CANT_PROTECT);
+		return false;
+	}
+	bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); }
+	bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); }
 	void resetSize()
 	{
 		size_ = 0;
@@ -949,10 +980,10 @@ public:
 	void dq(uint64 code) { db(code, 8); }
 	const uint8 *getCode() const { return top_; }
 	template<class F>
-	const F getCode() const { return CastTo<F>(top_); }
+	const F getCode() const { return reinterpret_cast<F>(top_); }
 	const uint8 *getCurr() const { return &top_[size_]; }
 	template<class F>
-	const F getCurr() const { return CastTo<F>(&top_[size_]); }
+	const F getCurr() const { return reinterpret_cast<F>(&top_[size_]); }
 	size_t getSize() const { return size_; }
 	void setSize(size_t size)
 	{
@@ -1005,19 +1036,39 @@ public:
 		change exec permission of memory
 		@param addr [in] buffer address
 		@param size [in] buffer size
-		@param canExec [in] true(enable to exec), false(disable to exec)
+		@param protectMode [in] mode(RW/RWE/RE)
 		@return true(success), false(failure)
 	*/
-	static inline bool protect(const void *addr, size_t size, bool canExec)
+	static inline bool protect(const void *addr, size_t size, int protectMode)
 	{
 #if defined(_WIN32)
+		const DWORD c_rw = PAGE_READWRITE;
+		const DWORD c_rwe = PAGE_EXECUTE_READWRITE;
+		const DWORD c_re = PAGE_EXECUTE_READ;
+		DWORD mode;
+#else
+		const int c_rw = PROT_READ | PROT_WRITE;
+		const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC;
+		const int c_re = PROT_READ | PROT_EXEC;
+		int mode;
+#endif
+		switch (protectMode) {
+		case PROTECT_RW: mode = c_rw; break;
+		case PROTECT_RWE: mode = c_rwe; break;
+		case PROTECT_RE: mode = c_re; break;
+		default:
+			return false;
+		}
+#if defined(_WIN32)
 		DWORD oldProtect;
-		return VirtualProtect(const_cast<void*>(addr), size, canExec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldProtect) != 0;
+		return VirtualProtect(const_cast<void*>(addr), size, mode, &oldProtect) != 0;
 #elif defined(__GNUC__)
 		size_t pageSize = sysconf(_SC_PAGESIZE);
 		size_t iaddr = reinterpret_cast<size_t>(addr);
 		size_t roundAddr = iaddr & ~(pageSize - static_cast<size_t>(1));
-		int mode = PROT_READ | PROT_WRITE | (canExec ? PROT_EXEC : 0);
+#ifndef NDEBUG
+		if (pageSize != 4096) fprintf(stderr, "large page(%zd) is used. not tested enough.\n", pageSize);
+#endif
 		return mprotect(reinterpret_cast<void*>(roundAddr), size + (iaddr - roundAddr), mode) == 0;
 #else
 		return true;
@@ -1044,46 +1095,43 @@ public:
 		M_ripAddr
 	};
 	Address(uint32 sizeBit, bool broadcast, const RegExp& e)
-		: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), permitVsib_(false), broadcast_(broadcast)
+		: Operand(0, MEM, sizeBit), e_(e), label_(0), mode_(M_ModRM), broadcast_(broadcast)
 	{
 		e_.verify();
 	}
 #ifdef XBYAK64
 	explicit Address(size_t disp)
-		: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), permitVsib_(false), broadcast_(false){ }
+		: Operand(0, MEM, 64), e_(disp), label_(0), mode_(M_64bitDisp), broadcast_(false){ }
 	Address(uint32 sizeBit, bool broadcast, const RegRip& addr)
-		: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), permitVsib_(false), broadcast_(broadcast) { }
+		: Operand(0, MEM, sizeBit), e_(addr.disp_), label_(addr.label_), mode_(addr.isAddr_ ? M_ripAddr : M_rip), broadcast_(broadcast) { }
 #endif
-	void permitVsib() const { permitVsib_ = true; }
 	RegExp getRegExp(bool optimize = true) const
 	{
 		return optimize ? e_.optimize() : e_;
 	}
 	Mode getMode() const { return mode_; }
-	bool is32bit() const { verify(); return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
-	bool isOnlyDisp() const { verify(); return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
-	size_t getDisp() const { verify(); return e_.getDisp(); }
+	bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; }
+	bool isOnlyDisp() const { return !e_.getBase().getBit() && !e_.getIndex().getBit(); } // for mov eax
+	size_t getDisp() const { return e_.getDisp(); }
 	uint8 getRex() const
 	{
-		verify();
 		if (mode_ != M_ModRM) return 0;
 		return getRegExp().getRex();
 	}
-	bool is64bitDisp() const { verify(); return mode_ == M_64bitDisp; } // for moffset
+	bool is64bitDisp() const { return mode_ == M_64bitDisp; } // for moffset
 	bool isBroadcast() const { return broadcast_; }
 	const Label* getLabel() const { return label_; }
 	bool operator==(const Address& rhs) const
 	{
-		return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && permitVsib_ == rhs.permitVsib_ && broadcast_ == rhs.broadcast_;
+		return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && broadcast_ == rhs.broadcast_;
 	}
 	bool operator!=(const Address& rhs) const { return !operator==(rhs); }
+	bool isVsib() const { return e_.isVsib(); }
 private:
 	RegExp e_;
 	const Label* label_;
 	Mode mode_;
-	mutable bool permitVsib_;
 	bool broadcast_;
-	void verify() const { if (e_.isVsib() && !permitVsib_) throw Error(ERR_BAD_VSIB_ADDRESSING); }
 };
 
 inline const Address& Operand::getAddress() const
@@ -1141,6 +1189,7 @@ public:
 	Label(const Label& rhs);
 	Label& operator=(const Label& rhs);
 	~Label();
+	void clear() { mgr = 0; id = 0; }
 	int getId() const { return id; }
 	const uint8 *getAddress() const;
 
@@ -1179,6 +1228,7 @@ class LabelManager {
 	};
 	typedef XBYAK_STD_UNORDERED_MAP<int, ClabelVal> ClabelDefList;
 	typedef XBYAK_STD_UNORDERED_MULTIMAP<int, const JmpLabel> ClabelUndefList;
+	typedef XBYAK_STD_UNORDERED_SET<Label*> LabelPtrList;
 
 	CodeArray *base_;
 	// global : stateList_.front(), local : stateList_.back()
@@ -1186,6 +1236,7 @@ class LabelManager {
 	mutable int labelId_;
 	ClabelDefList clabelDefList_;
 	ClabelUndefList clabelUndefList_;
+	LabelPtrList labelPtrList_;
 
 	int getId(const Label& label) const
 	{
@@ -1234,9 +1285,14 @@ class LabelManager {
 		return true;
 	}
 	friend class Label;
-	void incRefCount(int id) { clabelDefList_[id].refCount++; }
-	void decRefCount(int id)
+	void incRefCount(int id, Label *label)
 	{
+		clabelDefList_[id].refCount++;
+		labelPtrList_.insert(label);
+	}
+	void decRefCount(int id, Label *label)
+	{
+		labelPtrList_.erase(label);
 		ClabelDefList::iterator i = clabelDefList_.find(id);
 		if (i == clabelDefList_.end()) return;
 		if (i->second.refCount == 1) {
@@ -1255,11 +1311,23 @@ class LabelManager {
 #endif
 		return !list.empty();
 	}
+	// detach all labels linked to LabelManager
+	void resetLabelPtrList()
+	{
+		for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) {
+			(*i)->clear();
+		}
+		labelPtrList_.clear();
+	}
 public:
 	LabelManager()
 	{
 		reset();
 	}
+	~LabelManager()
+	{
+		resetLabelPtrList();
+	}
 	void reset()
 	{
 		base_ = 0;
@@ -1269,6 +1337,7 @@ public:
 		stateList_.push_back(SlabelState());
 		clabelDefList_.clear();
 		clabelUndefList_.clear();
+		resetLabelPtrList();
 	}
 	void enterLocal()
 	{
@@ -1301,10 +1370,11 @@ public:
 		SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front();
 		define_inner(st.defList, st.undefList, label, base_->getSize());
 	}
-	void defineClabel(const Label& label)
+	void defineClabel(Label& label)
 	{
 		define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize());
 		label.mgr = this;
+		labelPtrList_.insert(&label);
 	}
 	void assign(Label& dst, const Label& src)
 	{
@@ -1312,6 +1382,7 @@ public:
 		if (i == clabelDefList_.end()) throw Error(ERR_LABEL_ISNOT_SET_BY_L);
 		define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset);
 		dst.mgr = this;
+		labelPtrList_.insert(&dst);
 	}
 	bool getOffset(size_t *offset, std::string& label) const
 	{
@@ -1359,19 +1430,19 @@ inline Label::Label(const Label& rhs)
 {
 	id = rhs.id;
 	mgr = rhs.mgr;
-	if (mgr) mgr->incRefCount(id);
+	if (mgr) mgr->incRefCount(id, this);
 }
 inline Label& Label::operator=(const Label& rhs)
 {
 	if (id) throw Error(ERR_LABEL_IS_ALREADY_SET_BY_L);
 	id = rhs.id;
 	mgr = rhs.mgr;
-	if (mgr) mgr->incRefCount(id);
+	if (mgr) mgr->incRefCount(id, this);
 	return *this;
 }
 inline Label::~Label()
 {
-	if (id && mgr) mgr->decRefCount(id);
+	if (id && mgr) mgr->decRefCount(id, this);
 }
 inline const uint8* Label::getAddress() const
 {
@@ -1488,6 +1559,8 @@ private:
 		T_B32 = 1 << 26, // m32bcst
 		T_B64 = 1 << 27, // m64bcst
 		T_M_K = 1 << 28, // mem{k}
+		T_VSIB = 1 << 29,
+		T_MEM_EVEX = 1 << 30, // use evex if mem
 		T_XXX
 	};
 	void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
@@ -1525,7 +1598,7 @@ private:
 		if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) return Error(err);
 		return v;
 	}
-	int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0)
+	int evex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false, bool b = false, int aaa = 0, uint32 VL = 0, bool Hi16Vidx = false)
 	{
 		if (!(type & (T_EVEX | T_MUST_EVEX))) throw Error(ERR_EVEX_IS_INVALID);
 		int w = (type & T_EW1) ? 1 : 0;
@@ -1568,7 +1641,7 @@ private:
 				}
 			}
 		}
-		bool Vp = !(v ? v->isExtIdx2() : 0);
+		bool Vp = !((v ? v->isExtIdx2() : 0) | Hi16Vidx);
 		bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false);
 		if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
 		db(0x62);
@@ -1714,8 +1787,9 @@ private:
 	// reg is reg field of ModRM
 	// immSize is the size for immediate value
 	// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
-	void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0)
+	void opAddr(const Address &addr, int reg, int immSize = 0, int disp8N = 0, bool permitVisb = false)
 	{
+		if (!permitVisb && addr.isVsib()) throw Error(ERR_BAD_VSIB_ADDRESSING);
 		if (addr.getMode() == Address::M_ModRM) {
 			setSIB(addr.getRegExp(), reg, disp8N);
 		} else if (addr.getMode() == Address::M_rip || addr.getMode() == Address::M_ripAddr) {
@@ -1857,15 +1931,20 @@ private:
 	}
 	void opPushPop(const Operand& op, int code, int ext, int alt)
 	{
-		if (op.isREG()) {
-			if (op.isBit(16)) db(0x66);
-			if (op.getReg().getIdx() >= 8) db(0x41);
-			db(alt | (op.getIdx() & 7));
-		} else if (op.isMEM()) {
-			opModM(op.getAddress(), Reg(ext, Operand::REG, op.getBit()), code);
-		} else {
-			throw Error(ERR_BAD_COMBINATION);
+		int bit = op.getBit();
+		if (bit == 16 || bit == BIT) {
+			if (bit == 16) db(0x66);
+			if (op.isREG()) {
+				if (op.getReg().getIdx() >= 8) db(0x41);
+				db(alt | (op.getIdx() & 7));
+				return;
+			}
+			if (op.isMEM()) {
+				opModM(op.getAddress(), Reg(ext, Operand::REG, 32), code);
+				return;
+			}
 		}
+		throw Error(ERR_BAD_COMBINATION);
 	}
 	void verifyMemHasSize(const Operand& op) const
 	{
@@ -1954,10 +2033,11 @@ private:
 			const Address& addr = op2.getAddress();
 			const RegExp& regExp = addr.getRegExp();
 			const Reg& base = regExp.getBase();
+			const Reg& index = regExp.getIndex();
 			if (BIT == 64 && addr.is32bit()) db(0x67);
 			int disp8N = 0;
-			bool x = regExp.getIndex().isExtIdx();
-			if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
+			bool x = index.isExtIdx();
+			if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx()) {
 				int aaa = addr.getOpmaskIdx();
 				if (aaa && !(type & T_M_K)) throw Error(ERR_INVALID_OPMASK_WITH_MEMORY);
 				bool b = false;
@@ -1965,12 +2045,12 @@ private:
 					if (!(type & (T_B32 | T_B64))) throw Error(ERR_INVALID_BROADCAST);
 					b = true;
 				}
-				int VL = regExp.isVsib() ? regExp.getIndex().getBit() : 0;
-				disp8N = evex(r, base, p1, type, code, x, b, aaa, VL);
+				int VL = regExp.isVsib() ? index.getBit() : 0;
+				disp8N = evex(r, base, p1, type, code, x, b, aaa, VL, index.isExtIdx2());
 			} else {
 				vex(r, base, p1, type, code, x);
 			}
-			opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N);
+			opAddr(addr, r.getIdx(), (imm8 != NONE) ? 1 : 0, disp8N, (type & T_VSIB) != 0);
 		} else {
 			const Reg& base = op2.getReg();
 			if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) {
@@ -2071,8 +2151,7 @@ private:
 			}
 			if (!isOK) throw Error(ERR_BAD_VSIB_ADDRESSING);
 		}
-		addr.permitVsib();
-		opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type | T_YMM, code);
+		opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code);
 	}
 	enum {
 		xx_yy_zz = 0,
@@ -2096,7 +2175,6 @@ private:
 	{
 		if (x.hasZero()) throw Error(ERR_INVALID_ZERO);
 		checkGather2(x, addr.getRegExp().getIndex(), mode);
-		addr.permitVsib();
 		opVex(x, 0, addr, type, code);
 	}
 	/*
@@ -2116,7 +2194,6 @@ private:
 	{
 		if (addr.hasZero()) throw Error(ERR_INVALID_ZERO);
 		if (addr.getRegExp().getIndex().getKind() != kind) throw Error(ERR_BAD_VSIB_ADDRESSING);
-		addr.permitVsib();
 		opVex(x, 0, addr, type, code);
 	}
 public:
@@ -2169,7 +2246,8 @@ public:
 	const Segment es, cs, ss, ds, fs, gs;
 #endif
 	void L(const std::string& label) { labelMgr_.defineSlabel(label); }
-	void L(const Label& label) { labelMgr_.defineClabel(label); }
+	void L(Label& label) { labelMgr_.defineClabel(label); }
+	Label L() { Label label; L(label); return label; }
 	void inLocalLabel() { labelMgr_.enterLocal(); }
 	void outLocalLabel() { labelMgr_.leaveLocal(); }
 	/*
@@ -2200,7 +2278,7 @@ public:
 	// call(function pointer)
 #ifdef XBYAK_VARIADIC_TEMPLATE
 	template<class Ret, class... Params>
-	void call(Ret(*func)(Params...)) { call(CastTo<const void*>(func)); }
+	void call(Ret(*func)(Params...)) { call(reinterpret_cast<const void*>(func)); }
 #endif
 	void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); }
 
@@ -2458,11 +2536,16 @@ public:
 		MUST call ready() to complete generating code if you use AutoGrow mode.
 		It is not necessary for the other mode if hasUndefinedLabel() is true.
 	*/
-	void ready()
+	void ready(ProtectMode mode = PROTECT_RWE)
 	{
 		if (hasUndefinedLabel()) throw Error(ERR_LABEL_IS_NOT_FOUND);
-		if (isAutoGrow()) calcJmpAddress();
+		if (isAutoGrow()) {
+			calcJmpAddress();
+			if (useProtect()) setProtectMode(mode);
+		}
 	}
+	// set read/exec
+	void readyRE() { return ready(PROTECT_RE); }
 #ifdef XBYAK_TEST
 	void dump(bool doClear = true)
 	{
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_bin2hex.h b/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_bin2hex.h
index 5b812bdf5..a22e5224c 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_bin2hex.h
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_bin2hex.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2018 Intel Corporation
+* Copyright 2016-2019 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_mnemonic.h b/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_mnemonic.h
index 9e3c53518..28d2d222f 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_mnemonic.h
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_mnemonic.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2018 Intel Corporation
+* Copyright 2016-2019 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -43,7 +43,7 @@
 * THE POSSIBILITY OF SUCH DAMAGE.
 *******************************************************************************/
 
-const char *getVersionString() const { return "5.631"; }
+const char *getVersionString() const { return "5.76"; }
 void adc(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x10, 2); }
 void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
 void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
@@ -167,8 +167,11 @@ void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); }
 void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); }
 void cmppd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x66, isXMM_XMMorMEM, imm8); }
 void cmpps(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0x100, isXMM_XMMorMEM, imm8); }
+void cmpsb() { db(0xA6); }
+void cmpsd() { db(0xA7); }
 void cmpsd(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF2, isXMM_XMMorMEM, imm8); }
 void cmpss(const Xmm& xmm, const Operand& op, uint8 imm8) { opGen(xmm, op, 0xC2, 0xF3, isXMM_XMMorMEM, imm8); }
+void cmpsw() { db(0x66); db(0xA7); }
 void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); }
 void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); }
 void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); }
@@ -728,6 +731,9 @@ void sar(const Operand& op, int imm) { opShift(op, imm, 7); }
 void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opGpr(r1, op, r2, T_F3 | T_0F38, 0xf7, false); }
 void sbb(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x18, 3); }
 void sbb(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x18); }
+void scasb() { db(0xAE); }
+void scasd() { db(0xAF); }
+void scasw() { db(0x66); db(0xAF); }
 void seta(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 7); }//-V524
 void setae(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 3); }//-V524
 void setb(const Operand& op) { opR_ModM(op, 8, 0, 0x0F, 0x90 | 2); }//-V524
@@ -787,6 +793,9 @@ void stc() { db(0xF9); }
 void std() { db(0xFD); }
 void sti() { db(0xFB); }
 void stmxcsr(const Address& addr) { opModM(addr, Reg32(3), 0x0F, 0xAE); }
+void stosb() { db(0xAA); }
+void stosd() { db(0xAB); }
+void stosw() { db(0x66); db(0xAB); }
 void sub(const Operand& op, uint32 imm) { opRM_I(op, imm, 0x28, 5); }
 void sub(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x28); }
 void subpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0x66, isXMM_XMMorMEM); }
@@ -1046,10 +1055,10 @@ void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_X
 void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0xBE); }
 void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_W1 | T_EW1 | T_EVEX | T_ER_X, 0xBF); }
 void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_W0 | T_EW0 | T_EVEX | T_ER_X, 0xBF); }
-void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x92, 0); }
-void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x92, 1); }
-void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x93, 1); }
-void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x93, 2); }
+void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x92, 0); }
+void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); }
+void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); }
+void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); }
 void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCF, imm); }
 void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W1 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0xCE, imm); }
 void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_SAE_Z, 0xCF); }
@@ -1059,7 +1068,7 @@ void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand())
 void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0x7D); }
 void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); }
 void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) throw Error(ERR_BAD_COMBINATION); opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); }
-void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
+void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_W0 | T_EW0 | T_EVEX, 0x21, imm); }
 void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); }
 void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); }
 void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); }
@@ -1180,10 +1189,10 @@ void vpextrb(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(8|16|
 void vpextrd(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N4, 0x16, imm); }
 void vpextrq(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); }
 void vpextrw(const Operand& op, const Xmm& x, uint8 imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) throw Error(ERR_BAD_COMBINATION); if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } }
-void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x90, 1); }
-void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x90, 0); }
-void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W0, 0x91, 2); }
-void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_W1, 0x91, 1); }
+void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); }
+void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); }
+void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); }
+void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x91, 1); }
 void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x02); }
 void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x03); }
 void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x01); }
@@ -1242,28 +1251,28 @@ void vpshuflw(const Xmm& xm, const Operand& op, uint8 imm) { opAVX_X_XM_IMM(xm,
 void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x08); }
 void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x0A); }
 void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_YMM, 0x09); }
-void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
+void vpslld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
 void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xF2); }
-void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
-void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
+void vpslldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
+void vpsllq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
 void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xF3); }
 void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x47); }
 void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x47); }
-void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
+void vpsllw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
 void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xF1); }
-void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
+void vpsrad(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
 void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xE2); }
 void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x46); }
-void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
+void vpsraw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
 void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xE1); }
-void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0x72, imm); }
+void vpsrld(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32 | T_MEM_EVEX, 0x72, imm); }
 void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW0 | T_YMM | T_EVEX, 0xD2); }
-void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x73, imm); }
-void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64, 0x73, imm); }
+void vpsrldq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x73, imm); }
+void vpsrlq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_B64 | T_MEM_EVEX, 0x73, imm); }
 void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0xD3); }
 void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x45); }
 void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_W1 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x45); }
-void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX, 0x71, imm); }
+void vpsrlw(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66 | T_0F | T_YMM | T_EVEX | T_MEM_EVEX, 0x71, imm); }
 void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16 | T_66 | T_0F | T_YMM | T_EVEX, 0xD1); }
 void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM | T_EVEX, 0xF8); }
 void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_B32, 0xFA); }
@@ -1589,7 +1598,10 @@ void jrcxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
 void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); }
 void cdqe() { db(0x48); db(0x98); }
 void cqo() { db(0x48); db(0x99); }
+void cmpsq() { db(0x48); db(0xA7); }
 void movsq() { db(0x48); db(0xA5); }
+void scasq() { db(0x48); db(0xAF); }
+void stosq() { db(0x48); db(0xAB); }
 void cmpxchg16b(const Address& addr) { opModM(addr, Reg64(1), 0x0F, 0xC7); }
 void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
 void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
@@ -1762,18 +1774,18 @@ void vfpclasspd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(1
 void vfpclassps(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isBit(128|256|512)) throw Error(ERR_BAD_MEM_SIZE); Reg x = k; x.setBit(op.getBit()); opVex(x, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
 void vfpclasssd(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
 void vfpclassss(const Opmask& k, const Operand& op, uint8 imm) { if (!op.isXMEM()) throw Error(ERR_BAD_MEM_SIZE); opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
-void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x92, 1); }
-void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x92, 0); }
-void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
-void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
-void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
-void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
-void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
-void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
-void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
-void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
-void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x93, 0); }
-void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x93, 2); }
+void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
+void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
+void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
+void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
+void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
+void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
+void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
+void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
+void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
+void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
+void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); }
+void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); }
 void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
 void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
 void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
@@ -1860,10 +1872,10 @@ void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1 | T
 void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x89); }
 void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x89); }
 void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX, 0x62); }
-void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x90, 0); }
-void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x90, 1); }
-void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x91, 2); }
-void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x91, 0); }
+void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 0); }
+void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x90, 1); }
+void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 2); }
+void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x91, 0); }
 void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x44); }
 void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x44); }
 void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xB5); }
@@ -1914,10 +1926,10 @@ void vprord(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.get
 void vprorq(const Xmm& x, const Operand& op, uint8 imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66 | T_0F | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x72, imm); }
 void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x14); }
 void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x14); }
-void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 0); }
-void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA0, 1); }
-void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 2); }
-void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA1, 0); }
+void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 0); }
+void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA0, 1); }
+void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 2); }
+void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA1, 0); }
 void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71, imm); }
 void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8 imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x71, imm); }
 void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x71); }
@@ -1981,18 +1993,18 @@ void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x
 void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
 void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
 void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
-void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 1); }
-void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA2, 0); }
-void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
-void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
-void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
-void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
-void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC6, Operand::YMM); }
-void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC6, Operand::ZMM); }
-void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_N8 | T_M_K, 0xC7, Operand::ZMM); }
-void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_N4 | T_M_K, 0xC7, Operand::ZMM); }
-void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 0); }
-void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K, 0xA3, 2); }
+void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); }
+void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); }
+void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
+void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
+void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
+void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
+void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::YMM); }
+void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC6, Operand::ZMM); }
+void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8 | T_66 | T_0F38 | T_EW1 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
+void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4 | T_66 | T_0F38 | T_EW0 | T_MUST_EVEX | T_M_K | T_VSIB, 0xC7, Operand::ZMM); }
+void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 0); }
+void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA3, 2); }
 void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x23, imm); }
 void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
 void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8 imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
diff --git a/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h b/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h
index 713c68db2..08f0a30c0 100644
--- a/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h
+++ b/inference-engine/thirdparty/mkl-dnn/src/cpu/xbyak/xbyak_util.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
-* Copyright 2016-2018 Intel Corporation
+* Copyright 2016-2019 Intel Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -54,6 +54,11 @@
 */
 #include "xbyak.h"
 
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
+	#define XBYAK_INTEL_CPU_SPECIFIC
+#endif
+
+#ifdef XBYAK_INTEL_CPU_SPECIFIC
 #ifdef _MSC_VER
 	#if (_MSC_VER < 1400) && defined(XBYAK32)
 		static inline __declspec(naked) void __cpuid(int[4], int)
@@ -92,14 +97,30 @@
 		#endif
 	#endif
 #endif
+#endif
 
 namespace Xbyak { namespace util {
 
+typedef enum {
+   SmtLevel = 1,
+   CoreLevel = 2
+} IntelCpuTopologyLevel;
+
 /**
 	CPU detection class
 */
 class Cpu {
 	uint64 type_;
+	//system topology
+	bool x2APIC_supported_;
+	static const size_t maxTopologyLevels = 2;
+	unsigned int numCores_[maxTopologyLevels];
+
+	static const unsigned int maxNumberCacheLevels = 10;
+	unsigned int dataCacheSize_[maxNumberCacheLevels];
+	unsigned int coresSharignDataCache_[maxNumberCacheLevels];
+	unsigned int dataCacheLevels_;
+
 	unsigned int get32bitAsBE(const char *x) const
 	{
 		return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
@@ -110,7 +131,7 @@ class Cpu {
 	}
 	void setFamily()
 	{
-		unsigned int data[4];
+		unsigned int data[4] = {};
 		getCpuid(1, data);
 		stepping = data[0] & mask(4);
 		model = (data[0] >> 4) & mask(4);
@@ -133,6 +154,42 @@ class Cpu {
 	{
 		return (val >> base) & ((1u << (end - base)) - 1);
 	}
+	void setNumCores()
+	{
+		if ((type_ & tINTEL) == 0) return;
+
+		unsigned int data[4] = {};
+
+		 /* CAUTION: These numbers are configuration as shipped by Intel. */
+		getCpuidEx(0x0, 0, data);
+		if (data[0] >= 0xB) {
+			 /*
+				if leaf 11 exists(x2APIC is supported),
+				we use it to get the number of smt cores and cores on socket
+
+				leaf 0xB can be zeroed-out by a hypervisor
+			*/
+			x2APIC_supported_ = true;
+			for (unsigned int i = 0; i < maxTopologyLevels; i++) {
+				getCpuidEx(0xB, i, data);
+				IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
+				if (level == SmtLevel || level == CoreLevel) {
+					numCores_[level - 1] = extractBit(data[1], 0, 15);
+				}
+			}
+			if (numCores_[SmtLevel - 1] != 0) {
+				numCores_[CoreLevel - 1] /= numCores_[SmtLevel - 1];
+			}
+		} else {
+			/*
+				Failed to deremine num of cores without x2APIC support.
+				TODO: USE initial APIC ID to determine ncores.
+			*/
+			numCores_[SmtLevel - 1] = 0;
+			numCores_[CoreLevel - 1] = 0;
+		}
+
+	}
 	void setCacheHierarchy()
 	{
 		if ((type_ & tINTEL) == 0) return;
@@ -141,21 +198,12 @@ class Cpu {
 //		const unsigned int INSTRUCTION_CACHE = 2;
 		const unsigned int UNIFIED_CACHE = 3;
 		unsigned int smt_width = 0;
-		unsigned int n_cores = 0;
-		unsigned int data[4];
+		unsigned int logical_cores = 0;
+		unsigned int data[4] = {};
 
-		/*
-			if leaf 11 exists, we use it to get the number of smt cores and cores on socket
-			If x2APIC is supported, these are the only correct numbers.
-
-			leaf 0xB can be zeroed-out by a hypervisor
-		*/
-		getCpuidEx(0x0, 0, data);
-		if (data[0] >= 0xB) {
-			getCpuidEx(0xB, 0, data); // CPUID for SMT Level
-			smt_width = data[1] & 0x7FFF;
-			getCpuidEx(0xB, 1, data); // CPUID for CORE Level
-			n_cores = data[1] & 0x7FFF;
+		if (x2APIC_supported_) {
+			smt_width = numCores_[0];
+			logical_cores = numCores_[1];
 		}
 
 		/*
@@ -163,28 +211,29 @@ class Cpu {
 			the first level of data cache is not shared (which is the
 			case for every existing architecture) and use this to
 			determine the SMT width for arch not supporting leaf 11.
-			when leaf 4 reports a number of core less than n_cores
+			when leaf 4 reports a number of core less than numCores_
 			on socket reported by leaf 11, then it is a correct number
 			of cores not an upperbound.
 		*/
-		for (int i = 0; data_cache_levels < maxNumberCacheLevels; i++) {
+		for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
 			getCpuidEx(0x4, i, data);
 			unsigned int cacheType = extractBit(data[0], 0, 4);
 			if (cacheType == NO_CACHE) break;
 			if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
-				unsigned int nb_logical_cores = extractBit(data[0], 14, 25) + 1;
-				if (n_cores != 0) // true only if leaf 0xB is supported and valid
-					nb_logical_cores = (std::min)(nb_logical_cores, n_cores);
-				assert(nb_logical_cores != 0);
-				data_cache_size[data_cache_levels] =
+				unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
+				if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
+					actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
+				}
+				assert(actual_logical_cores != 0);
+				dataCacheSize_[dataCacheLevels_] =
 					(extractBit(data[1], 22, 31) + 1)
 					* (extractBit(data[1], 12, 21) + 1)
 					* (extractBit(data[1], 0, 11) + 1)
 					* (data[2] + 1);
-				if (cacheType == DATA_CACHE && smt_width == 0) smt_width = nb_logical_cores;
+				if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores;
 				assert(smt_width != 0);
-				cores_sharing_data_cache[data_cache_levels] = nb_logical_cores / smt_width;
-				data_cache_levels++;
+				coresSharignDataCache_[dataCacheLevels_] = (std::max)(actual_logical_cores / smt_width, 1u);
+				dataCacheLevels_++;
 			}
 		}
 	}
@@ -198,22 +247,22 @@ public:
 	int displayFamily; // family + extFamily
 	int displayModel; // model + extModel
 
-	// may I move these members into private?
-	static const unsigned int maxNumberCacheLevels = 10;
-	unsigned int data_cache_size[maxNumberCacheLevels];
-	unsigned int cores_sharing_data_cache[maxNumberCacheLevels];
-	unsigned int data_cache_levels;
+	unsigned int getNumCores(IntelCpuTopologyLevel level) {
+		if (level != SmtLevel && level != CoreLevel) throw Error(ERR_BAD_PARAMETER);
+		if (!x2APIC_supported_) throw Error(ERR_X2APIC_IS_NOT_SUPPORTED);
+		return numCores_[level - 1];
+	}
 
-	unsigned int getDataCacheLevels() const { return data_cache_levels; }
+	unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
 	unsigned int getCoresSharingDataCache(unsigned int i) const
 	{
-		if (i >= data_cache_levels) throw  Error(ERR_BAD_PARAMETER);
-		return cores_sharing_data_cache[i];
+		if (i >= dataCacheLevels_) throw  Error(ERR_BAD_PARAMETER);
+		return coresSharignDataCache_[i];
 	}
 	unsigned int getDataCacheSize(unsigned int i) const
 	{
-		if (i >= data_cache_levels) throw  Error(ERR_BAD_PARAMETER);
-		return data_cache_size[i];
+		if (i >= dataCacheLevels_) throw  Error(ERR_BAD_PARAMETER);
+		return dataCacheSize_[i];
 	}
 
 	/*
@@ -221,30 +270,45 @@ public:
 	*/
 	static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
 	{
-#ifdef _MSC_VER
+#ifdef XBYAK_INTEL_CPU_SPECIFIC
+	#ifdef _MSC_VER
 		__cpuid(reinterpret_cast<int*>(data), eaxIn);
-#else
+	#else
 		__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
+	#endif
+#else
+		(void)eaxIn;
+		(void)data;
 #endif
 	}
 	static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
 	{
-#ifdef _MSC_VER
+#ifdef XBYAK_INTEL_CPU_SPECIFIC
+	#ifdef _MSC_VER
 		__cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
-#else
+	#else
 		__cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
+	#endif
+#else
+		(void)eaxIn;
+		(void)ecxIn;
+		(void)data;
 #endif
 	}
 	static inline uint64 getXfeature()
 	{
-#ifdef _MSC_VER
+#ifdef XBYAK_INTEL_CPU_SPECIFIC
+	#ifdef _MSC_VER
 		return _xgetbv(0);
-#else
+	#else
 		unsigned int eax, edx;
 		// xgetvb is not support on gcc 4.2
 //		__asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
 		__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
 		return ((uint64)edx << 32) | eax;
+	#endif
+#else
+		return 0;
 #endif
 	}
 	typedef uint64 Type;
@@ -315,9 +379,13 @@ public:
 
 	Cpu()
 		: type_(NONE)
-		, data_cache_levels(0)
+		, x2APIC_supported_(false)
+		, numCores_()
+		, dataCacheSize_()
+		, coresSharignDataCache_()
+		, dataCacheLevels_(0)
 	{
-		unsigned int data[4];
+		unsigned int data[4] = {};
 		const unsigned int& EAX = data[0];
 		const unsigned int& EBX = data[1];
 		const unsigned int& ECX = data[2];
@@ -407,6 +475,7 @@ public:
 			if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
 		}
 		setFamily();
+		setNumCores();
 		setCacheHierarchy();
 	}
 	void putFamily() const
@@ -425,12 +494,17 @@ class Clock {
 public:
 	static inline uint64 getRdtsc()
 	{
-#ifdef _MSC_VER
+#ifdef XBYAK_INTEL_CPU_SPECIFIC
+	#ifdef _MSC_VER
 		return __rdtsc();
-#else
+	#else
 		unsigned int eax, edx;
 		__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
 		return ((uint64)edx << 32) | eax;
+	#endif
+#else
+		// TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu
+		return 0;
 #endif
 	}
 	Clock()
@@ -460,7 +534,7 @@ const int UseRCX = 1 << 6;
 const int UseRDX = 1 << 7;
 
 class Pack {
-	static const size_t maxTblNum = 10;
+	static const size_t maxTblNum = 15;
 	const Xbyak::Reg64 *tbl_[maxTblNum];
 	size_t n_;
 public:
@@ -520,7 +594,7 @@ public:
 	const Xbyak::Reg64& operator[](size_t n) const
 	{
 		if (n >= n_) {
-			fprintf(stderr, "ERR Pack bad n=%d\n", (int)n);
+			fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
 			throw Error(ERR_BAD_PARAMETER);
 		}
 		return *tbl_[n];
@@ -562,6 +636,7 @@ class StackFrame {
 	static const int rcxPos = 3;
 	static const int rdxPos = 2;
 #endif
+	static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax
 	Xbyak::CodeGenerator *code_;
 	int pNum_;
 	int tNum_;
@@ -571,7 +646,7 @@ class StackFrame {
 	int P_;
 	bool makeEpilog_;
 	Xbyak::Reg64 pTbl_[4];
-	Xbyak::Reg64 tTbl_[10];
+	Xbyak::Reg64 tTbl_[maxRegNum];
 	Pack p_;
 	Pack t_;
 	StackFrame(const StackFrame&);
@@ -583,7 +658,7 @@ public:
 		make stack frame
 		@param sf [in] this
 		@param pNum [in] num of function parameter(0 <= pNum <= 4)
-		@param tNum [in] num of temporary register(0 <= tNum <= 10, with UseRCX, UseRDX)
+		@param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14
 		@param stackSizeByte [in] local stack size
 		@param makeEpilog [in] automatically call close() if true
 
@@ -610,27 +685,17 @@ public:
 		using namespace Xbyak;
 		if (pNum < 0 || pNum > 4) throw Error(ERR_BAD_PNUM);
 		const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0);
-		if (allRegNum < pNum || allRegNum > 14) throw Error(ERR_BAD_TNUM);
+		if (tNum_ < 0 || allRegNum > maxRegNum) throw Error(ERR_BAD_TNUM);
 		const Reg64& _rsp = code->rsp;
-		const AddressFrame& _ptr = code->ptr;
 		saveNum_ = (std::max)(0, allRegNum - noSaveNum);
 		const int *tbl = getOrderTbl() + noSaveNum;
-		P_ = saveNum_ + (stackSizeByte + 7) / 8;
-		if (P_ > 0 && (P_ & 1) == 0) P_++; // here (rsp % 16) == 8, then increment P_ for 16 byte alignment
-		P_ *= 8;
-		if (P_ > 0) code->sub(_rsp, P_);
-#ifdef XBYAK64_WIN
-		for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
-			code->mov(_ptr [_rsp + P_ + (i + 1) * 8], Reg64(tbl[i]));
-		}
-		for (int i = 4; i < saveNum_; i++) {
-			code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
-		}
-#else
 		for (int i = 0; i < saveNum_; i++) {
-			code->mov(_ptr [_rsp + P_ - 8 * (saveNum_ - i)], Reg64(tbl[i]));
+			code->push(Reg64(tbl[i]));
 		}
-#endif
+		P_ = (stackSizeByte + 7) / 8;
+		if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment
+		P_ *= 8;
+		if (P_ > 0) code->sub(_rsp, P_);
 		int pos = 0;
 		for (int i = 0; i < pNum; i++) {
 			pTbl_[i] = Xbyak::Reg64(getRegIdx(pos));
@@ -651,21 +716,11 @@ public:
 	{
 		using namespace Xbyak;
 		const Reg64& _rsp = code_->rsp;
-		const AddressFrame& _ptr = code_->ptr;
 		const int *tbl = getOrderTbl() + noSaveNum;
-#ifdef XBYAK64_WIN
-		for (int i = 0; i < (std::min)(saveNum_, 4); i++) {
-			code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ + (i + 1) * 8]);
-		}
-		for (int i = 4; i < saveNum_; i++) {
-			code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
-		}
-#else
+		if (P_ > 0) code_->add(_rsp, P_);
 		for (int i = 0; i < saveNum_; i++) {
-			code_->mov(Reg64(tbl[i]), _ptr [_rsp + P_ - 8 * (saveNum_ - i)]);
+			code_->pop(Reg64(tbl[saveNum_ - 1 - i]));
 		}
-#endif
-		if (P_ > 0) code_->add(_rsp, P_);
 
 		if (callRet) code_->ret();
 	}
@@ -677,9 +732,6 @@ public:
 		} catch (std::exception& e) {
 			printf("ERR:StackFrame %s\n", e.what());
 			exit(1);
-		} catch (...) {
-			printf("ERR:StackFrame otherwise\n");
-			exit(1);
 		}
 	}
 private:
@@ -698,7 +750,7 @@ private:
 	}
 	int getRegIdx(int& pos) const
 	{
-		assert(pos < 14);
+		assert(pos < maxRegNum);
 		using namespace Xbyak;
 		const int *tbl = getOrderTbl();
 		int r = tbl[pos++];
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/CMakeLists.txt b/inference-engine/thirdparty/mkl-dnn/tests/CMakeLists.txt
index 6e9caa630..a4816eef8 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/CMakeLists.txt
+++ b/inference-engine/thirdparty/mkl-dnn/tests/CMakeLists.txt
@@ -37,7 +37,7 @@ append(CMAKE_CXX_FLAGS "${CMAKE_CCXX_SANITIZER_FLAGS}")
 
 # allow tests to include internal header files with, e.g.
 # include "src/common/mkldnn_thread.hpp"
-include_directories(${CMAKE_SOURCE_DIR})
+include_directories(${PROJECT_SOURCE_DIR})
 
 if(UNIX OR MINGW)
     # workaround for Intel Compiler 16.0 that doesn't suppress warning on
@@ -68,7 +68,7 @@ if(UNIX OR MINGW)
     add_custom_command(
         OUTPUT ${test_c_symbols}
         COMMAND /bin/bash ${CMAKE_CURRENT_SOURCE_DIR}/generate_c_symbols_refs.sh
-        ${CMAKE_CURRENT_SOURCE_DIR}/.. ${test_c_symbols}
+        ${CMAKE_CURRENT_SOURCE_DIR}/.. ${PROJECT_BINARY_DIR}/include ${test_c_symbols}
     )
     register_exe(test_c_symbols-c ${test_c_symbols} "test")
 # elseif(WIN32)
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/api.c b/inference-engine/thirdparty/mkl-dnn/tests/api.c
index da91859d8..55581d2d6 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/api.c
+++ b/inference-engine/thirdparty/mkl-dnn/tests/api.c
@@ -37,7 +37,7 @@
     } \
 } while(0)
 
-static size_t product(int *arr, size_t size) {
+static size_t product(ptrdiff_t *arr, size_t size) {
     size_t prod = 1;
     for (size_t i = 0; i < size; ++i) prod *= arr[i];
     return prod;
@@ -92,12 +92,12 @@ void test2() {
 
     const int mb = 2;
     const int groups = 2;
-    int c3_src_sizes[4] = {mb, 256, 13, 13};
-    int c3_weights_sizes[] = {groups, 384/groups, 256/groups, 3, 3};
-    int c3_bias_sizes[1] = {384};
-    int strides[] = {1, 1};
-    int32_t  padding[] = {0, 0}; // set proper values
-    int c3_dst_sizes[4] = {mb, 384,
+    ptrdiff_t c3_src_sizes[4] = {mb, 256, 13, 13};
+    ptrdiff_t c3_weights_sizes[] = {groups, 384/groups, 256/groups, 3, 3};
+    ptrdiff_t c3_bias_sizes[1] = {384};
+    ptrdiff_t strides[] = {1, 1};
+    ptrdiff_t  padding[] = {0, 0}; // set proper values
+    ptrdiff_t c3_dst_sizes[4] = {mb, 384,
         (c3_src_sizes[2] + 2*padding[0] - c3_weights_sizes[3])/strides[0] + 1,
         (c3_src_sizes[3] + 2*padding[1] - c3_weights_sizes[4])/strides[1] + 1
     };
@@ -249,7 +249,7 @@ void test2() {
 
 void test3() {
     const int mb = 2;
-    int l2_data_sizes[4] = {mb, 256, 13, 13};
+    ptrdiff_t l2_data_sizes[4] = {mb, 256, 13, 13};
 
     real_t *src = (real_t*)calloc(product(l2_data_sizes, 4), sizeof(real_t));
     real_t *dst = (real_t*)calloc(product(l2_data_sizes, 4), sizeof(real_t));
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/CMakeLists.txt b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/CMakeLists.txt
index aaaf7f808..ee32b4fc3 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/CMakeLists.txt
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/CMakeLists.txt
@@ -55,16 +55,13 @@ function(register_benchdnn_test name cmd)
         DEPENDS benchdnn
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
-    if(WIN32)
-        set_property(TARGET ${name} PROPERTY ENVIRONMENT "PATH=${CTESTCONFIG_PATH}")
-        configure_file(${CMAKE_SOURCE_DIR}/config_template.vcxproj.user ${name}.vcxproj.user @ONLY)
-    endif()
+    maybe_configure_windows_test(${name} TARGET)
 endfunction()
 
 register_benchdnn_test(test_conv "benchdnn -v1 --conv --batch=inputs/test_conv_all")
 register_benchdnn_test(test_benchdnn_conv "benchdnn -v1 --conv --batch=inputs/test_conv_all")
-register_benchdnn_test(test_benchdnn_deconv "benchdnn -v1 --deconv --batch=inputs/test_deconv_all")
-register_benchdnn_test(test_benchdnn_rnn "benchdnn -v1 --rnn")
+register_benchdnn_test(test_benchdnn_deconv "benchdnn -v1 --deconv --batch=inputs/deconv/test_deconv_all")
+register_benchdnn_test(test_benchdnn_rnn "benchdnn -v1 --rnn --batch=inputs/rnn/test_rnn_small")
 register_benchdnn_test(test_benchdnn_reorder "benchdnn --reorder --batch=inputs/reorder/test_default")
 register_benchdnn_test(test_benchdnn_bnorm "benchdnn --bnorm  --batch=inputs/bnorm/test_bnorm_topo")
 register_benchdnn_test(test_benchdnn_ip "benchdnn --ip --batch=inputs/ip/test_ip_all")
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/README.md b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/README.md
index 9d5ba2f68..95253e707 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/README.md
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/README.md
@@ -1,10 +1,10 @@
 # benchdnn
 
 **benchdnn** is a standalone correctness and performance benchmark for
-[Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN)](/intel/mkl-dnn) library.
+[Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN)](/intel/mkl-dnn).
 The purpose of the benchmark is extended and robust correctness verification of
-the primitives provided by MKL-DNN. So far **benchdnn** supports convolutions
-and inner products of different data types. It also implicitly tests reorders.
+the primitives provided by Intel MKL-DNN. Currently, **benchdnn** supports convolutions
+, inner products, reorder, batch normalization, deconvolution, recurrent neural network, and shuffle of different data types.
 
 
 ## License
@@ -14,40 +14,62 @@ and inner products of different data types. It also implicitly tests reorders.
 
 ## Usage (main driver)
 
-**benchdnn** itself is a driver for different implementation specific
-harnesses. So far it has harness for Intel MKL-DNN convolution, inner product,
-reorder, batch normalization, and harness for testing itself.
-The usage:
+**benchdnn** itself is a driver for different implementation-specific
+harnesses. So far it uses a harness for Intel MKL-DNN [convolution](/tests/benchdnn/README.md#usage-convolution-harness), [inner product](/tests/benchdnn/README.md#usage-ip-harness),
+[reorder](/tests/benchdnn/README.md#usage-reorder-harness), [batch normalization](/tests/benchdnn/README.md#usage-batch-normalization-harness), [deconvolution](/tests/benchdnn/README.md#usage-deconvolution-harness), [shuffle](/tests/benchdnn/README.md#usage-shuffle-harness), and [recurrent neural network](/tests/benchdnn/README.md#usage-rnn-harness) as well as a
+harness for testing [itself](/tests/benchdnn/README.md#usage-self-harness).
+
+Usage:
 ```
-    $ ./benchdnn: [--HARNESS] [--mode=MODE] [-vN|--verbose=N] HARNESS-OPTS
+    $ ./benchdnn: [--HARNESS] [--mode=MODE] [--max-ms-per-prb=MAX-MS-PER-PRB] [-vN|--verbose=N] HARNESS-OPTS
 ```
 where:
 
- - `HARNESS` is either `conv` [default], `ip`, `shuffle`, `reorder`, `bnorm`, `rnn` or `self`
+ - `HARNESS` is either `conv` [default], `ip`, `shuffle`, `reorder`, `bnorm`, `rnn`, or `self`
 
  - `MODE` -- string that contains flags for benchmark mode. Use `C` or `c` for correctness (used by default), and `P` or `p` for performance
 
- - `N` -- verbose level (integer from 0 [default] to ...)
+ - `MAX-MS-PER-PRB`  is passed to assign the maximum time spent per problem in milliseconds, by default `3e3`
+ - `-vN|--verbose=N` -- verbose level, default `0`
+
+ - `HARNESS-OPTS`  are passed to the chosen harness
 
- - `HARNESS-OPTS` are passed to the chosen harness
+Returns `0` on success (all tests passed) or non-zero in case of any error.
 
-Returns `0` on success (all tests passed), and non-zero in case of any error
-happened.
+## Notations / Glossary / Abbreviations
+
+|Abbreviation   | Description
+|:---           |:---
+| src           | Source image (input image for forward convolution)
+| wei           | Weights (aka filter)
+| bia           | Bias
+| dst           | Destination image (output image for forward convolution)
+| acc           | Accumulation (typically in terms of data type)
+| ic, oc        | Input/Output channels (aka feature maps)
+| ih, iw        | Input height and width
+| oh, ow        | Output height and width
+| kh, kw        | Kernel (filter, weights) height and width
+| sh, sw        | Convolution stride over height and width
+| ph, pw        | Convolution top and left padding
+| mb            | Minibatch (amount of images processed at once)
+| g             | Groups (a way to reduce the amount of computations, see Alexnet topology)
+| FWD_{D,B}     | forward w/o and w/ bias
+| BWD_{D,W,WB}  | backward wrt data, weights, and weights and bias
+| DIRECT, WINO  | convolution algorithm: direct or Winograd based
+| AUTO          | convolution algorithm is chosen by MKL-DNN for best performance
 
 
 ## Usage (convolution harness)
 
-The usage:
 ```
     [harness-knobs] [conv-desc] ...
 ```
 
 where *harness-knobs* are:
 
- - `--cfg={f32, u8s8u8s32, ...}` configuration (see below), default `f32`
+ - `--cfg={f32, u8s8u8s32, ...}` configuration (see below [convolution configuration](/tests/benchdnn/README.md#convolution-configurations-also-known-as-precision-specification)), default `f32`
  - `--dir={FWD_D (forward data), FWD_B (forward data + bias),FWD_I (forward data inference), BWD_D (backward data), BWD_W (backward weights), BWD_WB (backward weights + bias)}` direction, default `FWD_B`
- - `--alg={DIRECT, WINO}` convolution algorithm, default DIRECT
- - `--merge={NONE, RELU}` merged primitive, default NONE (nothing merged)
+ - `--alg={DIRECT, WINO, AUTO}` convolution algorithm, default DIRECT
  - `--attr="attr_str"` convolution attributes (see in the section below), default `""` (no attributes set)
  - `--mb=N` override minibatch that is specified in convolution description, default `0` (use mb specified in conv desc)
  - `--match=regex` check only convolutions that match with regex, default is `".*"`. Notice: Windows may only interpret string arguments surrounded by double quotation marks.
@@ -57,20 +79,21 @@ where *harness-knobs* are:
  - `--reset` reset all the parameters set before to default one
  - `-vN|--verbose=N` verbose level, default `0`
  - `--batch=file` use options from the given file (see in subdirectory)
+ - `--mode=` string that contains flags for benchmark mode. Use `C` or `c` for correctness (used by default), and `P` or `p` for performance 
 
-and *conv-desc* is convolution description. The canonical form is:
+and *conv-desc* is the convolution description. The canonical form is:
 ```
     gXmbXicXihXiwXocXohXowXkhXkwXshXswXphXpwXdhXdwXnS
 ```
-Here X is a number and S is string (n stands for name). Some of the parameters
-might be omitted if there is either default one (e.g. if g is not specified
-**benchdnn** uses 1) or if the can be computed automatically (e.g. output shape
-can be derived from the input one and kernel). Also if either width or height
-is not specified than it is assumed height == width. Special symbol `_` is
-ignored, hence maybe used as delimiter. See `str2desc()` in conv/conv_aux.cpp
-for more details and implicit rules :^)
+Here X is a number and S is a string (n stands for name). Some of the parameters
+may be omitted if a default exists (for example, if g is not specified
+**benchdnn** uses 1) or if it can be computed automatically (for example, the output shape
+can be derived from the input one and the kernel). Also, if either width or height
+is not specified, it is assumed that height == width. The special symbol `_` is
+ignored, so it may be used as a delimiter. See `str2desc()` in conv/conv_aux.cpp
+for more details and implicit rules.
 
-The attribute string *attr_str* is defined as (new lines for readability):
+The attribute string *attr_str* is defined as follows (line breaks are for readability):
 ```
     [irmode={nearest,down};]
     [oscale={none,common,per_oc}[:scale];]
@@ -81,8 +104,8 @@ Here `irmode` defines the rounding mode for integer output (default is nearest).
 
 Next, `oscale` stands for output_scales. The first parameter is the policy that
 is defined below. The second optional parameter is a scale that specifies
-either the one common output scale (for `none` and `common` polices) or a
-starting point for `per_oc` policy, which uses many scales. The default scale
+either the one common output scale (for the `none` and `common` polices) or a
+starting point for the `per_oc` policy, which uses many scales. The default scale
 is 1.0. Known policies are:
 
   - `none` (default) means no output scales set (i.e. scale = 1.)
@@ -90,19 +113,19 @@ is 1.0. Known policies are:
   - `per_oc` corresponds to `mask=1<<1` (i.e. output channels) with different scale factors
 
 Next, `post_ops` stands for post operation sequence. Currently supported post
-ops are:
+operations are:
 
   - `relu` with no parameters (i.e. corresponding scale is 1., alg = eltwise_relu, alpha = beta = 0.)
   - `sum` with optional parameter scale (default 1.)
 
-### convolution configurations (aka precision specification)
+### Convolution configurations (also known as precision specification)
 
 `--cfg` option specifies what convolution would be used in terms of data type.
-Also it defines all the magic with data filling inside. For integer type
+Also it defines all the magic with data filling inside. For the integer type,
 saturation is implicitly implied.
 
-Finally configuration defines threshold for computation errors (ideally we
-want keep it 0 and it seems to work for now).
+Finally configuration defines the threshold for computation errors (ideally we
+want to keep it at 0, and it seems to work for now).
 
 The table below shows cases supported by Intel MKL-DNN and corresponding
 configurations for **benchdnn**:
@@ -123,18 +146,18 @@ configurations for **benchdnn**:
 | s8      | s8       | u8       | s32      | s8s8u8s32    | same notes as for u8s8f32s32
 
 
-## Performance measurements
+### Performance measurements (convolution harness)
 
-**benchdnn** supports custom performance report. Template is passed via
+**benchdnn** supports a custom performance report. A template is passed via the
 command line and consists of terminal and nonterminal symbols. Nonterminal
-symbols are printed as is. Description of terminal symbols is given below.
-There is also a notion of modifiers (marked as @) that change meaning of
-terminal symbols, e.g. sign '-' means minimum of (in terms of time). See
-table of modifiers below.
+symbols are printed as-is. A description of terminal symbols is given below.
+There is also a notion of modifiers (marked with @) that change the meaning of
+terminal symbols; for example, the sign '-' means minimum of (in terms of time).
+See the table of modifiers below.
 
-> **caution:** threads have to be pinned in order to get consistent frequency
+> **Caution:** Threads must be pinned in order to get consistent frequency.
 
-| abbreviation  | description
+| Abbreviation  | Description
 |:------------  |:-----------
 | %d            | problem descriptor
 | %D            | expanded problem descriptor (conv parameters in csv format)
@@ -146,7 +169,7 @@ table of modifiers below.
 | %@c           | time in clocks
 | %@p           | ops per second
 
-| modifier  | description
+| Modifier  | Description
 |:--------  |:-----------
 |           | default
 | -         | min (time) -- default
@@ -160,7 +183,7 @@ table of modifiers below.
 The definition of expanded problem descriptor is:
 `g,mb,ic,ih,iw,oc,oh,ow,kh,kw,sh,sw,ph,pw`.
 
-The default template can be found in conv/bench_conv.cpp that is defined as
+The default template can be found in conv/bench_conv.cpp and is defined as
 `perf,%n,%d,%GO,%GF,%-t,%-Gp,%0t,%0Gp`. That will produce the following output
 in CSV format:
 ```
@@ -174,8 +197,13 @@ best gigaops (since it corresponds to mimimum time)
 average time spent in ms
 average gigaops (since it corresponds to average time)
 ```
+Here is an example of the performance output:
+```
+ perf,"yolov2:conv1",mb16ic3ih610oc32oh608kh3n"yolov2:conv1",10.2205,0,43.9827,232.375,58.0146,176.171
+```
+full convolution descriptor is `mb16ic3ih610oc32oh608kh3n"yolov2:conv1"` in the above example.
 
-## Examples
+### Examples (convolution harness)
 
 Run the set of f32 forward convolutions from inputs/conv_all file w/ bias and default minibatch:
 ```
@@ -183,19 +211,19 @@ Run the set of f32 forward convolutions from inputs/conv_all file w/ bias and de
         --cfg=f32 --dir=FWD_B --batch=inputs/conv_all
 ```
 
-Run the same but with merged ReLU:
+Run the same but with post_ops ReLU:
 ```
     $ ./benchdnn --conv \
-        --cfg=f32 --dir=FWD_B --merge=RELU --batch=inputs/conv_all
+        --cfg=f32 --dir=FWD_B --attr="post_ops='relu'" --batch=inputs/conv_all
 ```
 
 Run the same as previous but also measure performance:
 ```
-    $ ./benchdnn --conv --mode=CORRnPERF \
-        --cfg=f32 --dir=FWD_B --merge=RELU --batch=inputs/conv_all
+    $ ./benchdnn --conv  --mode=CORRnPERF \
+        --cfg=f32 --dir=FWD_B --attr="post_ops='relu'" --batch=inputs/conv_all
 ```
 
-> **note**: instead of `CORRnPERF` one can use `CP`, `PC`, `cp`, or `pc`
+> **Note**: Instead of `CORRnPERF`, one can use `CP`, `PC`, `cp`, or `pc`
 
 Run a set of f32 backward convolutions wrt weights with kh=3 and
 verbose level set to 2:
@@ -221,18 +249,19 @@ configurations (`u8s8u8s32` and `f32`):
         --cfg=f32 ic3ih227iw227_oc96oh55ow55_kh11kw11_sh4sw4ph0pw0_n"alexnet:conv1"
 ```
 
-Run batch file for different algorithms (assuming the file only specifies
-convolutions and does not include harness options that would override ones
-passed in the command line). Also ignore mkldnn_unimplemented errors in case of
+Run batch file for different algorithms (assuming the file specifies only
+convolutions and does not include harness options that would override any
+passed on the command line). Also ignore mkldnn_unimplemented errors in case of
 Winograd:
 ```
     $ ./benchdnn --conv \
         --alg=DIRECT --batch=convs.in \
         --allow-unimpl=true \
-        --alg=WINO   --batch=convs.in
+        --alg=WINO   --batch=convs.in \
+        --alg=AUTO   --batch=convs.in 
 ```
 
-Run a set of u8s8u8s32 forward convolutions w/o bias, skipping
+Run a set of u8s8u8s32 forward convolutions without bias, skipping
 reference implementations and not triggering unimplemented as an error, with
 one common output scale set to 0.5 with rounding mode set to down
 (via attributes):
@@ -242,42 +271,10 @@ one common output scale set to 0.5 with rounding mode set to down
         --attr="irmode=down;oscale=common:.5" --batch=inputs/conv_all
 ```
 
-Almost the same as above (with minor changes), but also add post operation
-sequence **(relu, then sum with scale .3, then relu)** using
-attributes/mkldnn_post_ops_t:
-```
-    $ ./benchdnn --conv \
-        --cfg=u8s8s32s32 --dir=FWD_B \
-        --attr="oscale=common:.5;post_ops='relu;sum:.3;relu'" --batch=inputs/conv_all
-```
-
-
-## Notations / Glossary / Abbreviations
-
-|Abbreviation   | Description
-|:---           |:---
-| src           | Source image (input image for forward convolution)
-| wei           | Weights (aka filter)
-| bia           | Bias
-| dst           | Destination image (output image for forward convolution)
-| acc           | Accumulation (typically in terms of data type)
-| ic, oc        | Input/Output channels (aka feature maps)
-| ih, iw        | Input height and width
-| oh, ow        | Output height and width
-| kh, kw        | Kernel (filter, weights) height and width
-| sh, sw        | Convolution stride over height and width
-| ph, pw        | Convolution top and left padding
-| mb            | Minibatch (amount of images processed at once)
-| g             | Groups (a way to reduce the amount of computations, see Alexnet topology)
-| FWD_{D,B}     | forward w/o and w/ bias
-| BWD_{D,W,WB}  | backward wrt data, weights, and weights and bias
-| DIRECT, WINO  | convolution algorithm: direct or Winograd based
-| NONE, RELU    | merged primitives: nothing or ReLU
 
 
 ## Usage (batch normalization harness)
 
-The usage:
 ```
     ./benchdnn --bnorm [harness-knobs] bnorm-desc ...
 ```
@@ -290,7 +287,7 @@ where *harness-knobs* are:
  - `--fmt={nchw, nChw16c, ...}` data layout, default `nchw`
  - `--flags=[|G|S|R]` batch normalization flags, default `none` (G -- global stats, S -- use scale shift, R -- fuse with ReLU)
  - `--attr="attr_str"` attributes (see in the convolution section above), default `""` (no attributes set)
- - `--match=regex` check only convolutions that match with regex, default is `".*"`. Notice: Windows may only interpret string arguments surrounded by double quotation marks.
+ - `--match=regex` check only bnorm that match with regex, default is `".*"`. Notice: Windows may only interpret string arguments surrounded by double quotation marks.
  - `--skip-impl="str1[:str2]..."` skip implementation (see mkldnn_query_impl_info_str), default `""`
  - `--perf-template=template-str` set template for performance report (very similar to the convolution one)
  - `--reset` reset all the parameters set before to default one
@@ -299,10 +296,10 @@ where *harness-knobs* are:
 
 and *bnorm-desc* is a batch normalization description. The canonical form is:
 ```
-    mbXicXihXiwXepsYnS
+    mbXicXidXihXiwXepsYnS
 ```
-Here X is an integer number, Y is a real number, and S is string (n stands for
-name). Special symbol `_` is ignored, hence maybe used as delimiter. There are
+Here X is an integer number, Y is a real number, and S is a string (n stands for
+name). The special symbol `_` is ignored, so it may be used as delimiter. There are
 some implicit rules:
  - if mb is omitted set mb to 2
 
@@ -310,39 +307,516 @@ some implicit rules:
 
  - if eps is omitted set eps to 1./16
 
+### Performance measurements (batch normalization harness)
+
+**benchdnn** supports a custom performance report. A template is passed via the
+command line and consists of terminal and nonterminal symbols. Nonterminal
+symbols are printed as-is. A description of terminal symbols is given below.
+There is also a notion of modifiers (marked with @) that change the meaning of
+terminal symbols; for example, the sign '-' means minimum of (in terms of time). See the
+table of modifiers below.
+
+> **Caution:** Threads must be pinned in order to get consistent frequency.
+
+| abbreviation  | description
+|:------------  |:-----------
+| %d            | problem descriptor
+| %D            | expanded problem descriptor (parameters in csv format)
+| %n            | problem name
+| %z            | direction
+| %f            | flags
+| %q            | data type (precision)
+| %f            | data format (layout)
+| %@t           | time in ms
+
+The definition of expanded problem descriptor is: `mb,ic,id,ih,iw,eps`.
+
+The default template can be found in bnorm/bench_bnorm.cpp and is defined as
+`perf,%n,%z,%f,%q,%f,%D,%-t,%0t`. That will produce the following output
+in CSV format:
+```
+string: perf
+bnorm name
+direction
+batch normalization flags
+base data type
+batch normalization flags
+expanded bnorm problem descriptor
+minimum time spent in ms
+average time spent in ms
+```
+Here is an example of performance output:
+```
+perf,"resnet_50:bn_conv1",FWD_D,,f32,,50,64,1,112,112,0.0625,10.7729,77.1917
+```
+expanded bnorm problem descriptor is `50,64,1,112,112,0.0625` in the above example.
+
+### Examples (batch normalization harness)
+
+Run the set of bnorms from inputs/bnorm/bnorm_resnet_50 file with default minibatch:
+```
+    $ ./benchdnn --bnorm \
+         --batch=inputs/bnorm/bnorm_resnet_50
+```
+
+Run the same as previous but also measure performance:
+```
+    $ ./benchdnn --bnorm --mode=CORRnPERF \
+         --batch=inputs/bnorm/bnorm_resnet_50
+```
+
+
+## Usage (rnn harness)
+
+```
+    ./benchdnn --rnn [harness-knobs] [rnn-desc] ...
+```
+
+where *harness-knobs* are:
+
+ - `--prop={FWD_D (forward data), BWD_DW (backward data + weights)}` direction, default `FWD_D``
+ - `--alg={VANILLA_RNN, VANILLA_LSTM, VANILLA_GRU, LBR_GRU}` algorithm, default `VANILLA_RNN``
+ - `--direction={left2right, right2left, concat, sum}`  direction, default `left2right``
+ - `--activation={RELU, LOGISTIC, TANH}` activation, default `RELU``
+ - `--reset` reset all the parameters set before to default one 
+ - `--batch=file` use options from the given file (see in subdirectory)
+
+and *rnn-desc* is rnn description. The canonical form is:
+```
+ lXtXmbXsicXslcXdicXdlc
+```
+Here X is a number and S is a string. Some implicit rules:
+ - default values: l = 1, t = 1, mb = 2, S="wip"
+
+ - if slc/dlc/dic is undefined => slc/dlc/dic = sic
+
+See `str2desc()` in rnn/rnn_aux.cpp
+for more details and implicit rules :^)
+
+### Performance measurements (rnn harness)
+
+
+Runing rnn with performance measurememt mode will produce the following output
+in CSV format:
+```
+string: perf
+algorithm
+activation function
+direction
+expanded rnn problem descriptor
+name
+time spent in ms
+minimum time spent in ms
+maximum time spent in ms
+average time spent in ms
+```
+Here is an example of performance output:
+```
+perf,VANILLA_RNN,RELU,left2right,l1t1mb128sic512slc512dic512dlc512n""GNMT_enc-training"",time(ms):min=68.0007,max=176.006,avg=91.2686
+```
+expanded rnn problem descriptor is `l1t1mb128sic512slc512dic512dlc512n` in the above example.
+
+### Examples (rnn harness)
+
+Run the set of rnn training from inputs/rnn/rnn_training file with default minibatch:
+```
+    $ ./benchdnn --rnn \
+         --batch=inputs/rnn/rnn_training
+```
+
+Run the same as previous but also measure performance:
+```
+    $ ./benchdnn --rnn --mode=CORRnPERF \
+         --batch=inputs/rnn/rnn_training
+```
+
+
+## Usage (deconvolution harness)
+
+```
+    ./benchdnn --deconv [harness-knobs] [deconv-desc] ...
+```
+
+where *harness-knobs* are:
+
+ - `--cfg={f32, u8s8u8s32, ...}` configuration (ref conv session above  [convolution configuration](/tests/benchdnn/README.md#convolution-configurations-also-known-as-precision-specification)), default `f32`
+ - `--match=regex` check only deconvolutions that match with regex, default is `".*"`. Notice: Windows may only interpret string arguments surrounded by double quotation marks.
+ - `--mb=N` override minibatch that is specified in deconvolution description, default `0` (use mb specified in deconv desc)
+ - `--dir={FWD_D (forward data), FWD_B (forward data + bias),FWD_I (forward data inference), BWD_D (backward data), BWD_W (backward weights), BWD_WB (backward weights + bias)}` direction, default `FWD_B`
+ - `--alg={DIRECT, WINO, AUTO}` deconvolution algorithm, default DIRECT
+ - `--attr="attr_str"` deconvolution attributes (see in the convolution section above), default `""` (no attributes set)
+ - `--skip-impl="str1[:str2]..."` skip implementation (see mkldnn_query_impl_info_str), default `""`
+ - `--allow-unimpl=true|false` do not treat unimplemented configuration as an error, default `false`
+ - `--perf-template=template-str` set template for performance report (see section *Performance measurements*)
+ - `--mode=` string that contains flags for benchmark mode. Use `C` or `c` for correctness (used by default), and `P` or `p` for performance  
+ - `--reset` reset all the parameters set before to default one 
+ - `-vN|--verbose=N` verbose level, default `0`
+ - `--batch=file` use options from the given file (see in subdirectory)
+
+and *deconv-desc* is deconvolution description. The canonical form is:
+```
+    gXmbXicXihXiwXocXohXowXkhXkwXshXswXphXpwXdhXdwXnS   
+```
+Here X is a number and S is string (n stands for name). Some of the parameters
+might be omitted if a default exists (e.g. if g is not specified
+**benchdnn** uses 1) or if the can be computed automatically (e.g. output shape
+can be derived from the input one and kernel). Also if either width or height
+is not specified than it is assumed height == width. Special symbol `_` is
+ignored, hence maybe used as delimiter. See `str2desc()` in conv/conv_aux.cpp
+for more details and implicit rules :^)
+
+
+### Performance measurements (deconvolution harness)
+
+**benchdnn** supports a custom performance report. please refer above Performance measurements convolution harness session for detail, [convolution harness](/tests/benchdnn/README.md#performance-measurements-convolution-harness).
+
+The default template can be found in conv/bench_deconv.cpp and is defined as
+`perf,%n,%d,%GO,%GF,%-t,%-Gp,%0t,%0Gp`. That will produce the following output
+in CSV format:
+```
+string: perf
+deconvolution name
+full deconv-desc
+number of giga ops calculated
+effective cpu frequency in GHz (amb clocks[min] / time[min])
+minimum time spent in ms
+best gigaops (since it corresponds to mimimum time)
+average time spent in ms
+average gigaops (since it corresponds to average time)
+```
+Here is an example of performance output:
+```
+ perf,"alexnet:deconv1",mb256ic96ih55oc3oh227kh11sh4n"alexnet:deconv1",2.9733,0,249.474,11.9183,307.702,9.66291
+```
+full deconvolution descriptor is `mb256ic96ih55oc3oh227kh11sh4n"alexnet:deconv1"` in the above example.
+
+### Examples (deconvolution harness)
+
+Run the set of f32 forward deconvolutions from inputs/deconv_all file w/ bias and default minibatch:
+```
+    $ ./benchdnn --deconv \
+        --cfg=f32 --dir=FWD_B --batch=inputs/deconv_all
+```
+
+Run the same as previous but also measure performance:
+```
+    $ ./benchdnn --deconv  --mode=CORRnPERF \
+        --cfg=f32 --dir=FWD_B  --batch=inputs/deconv_all
+```
+
+## Usage (ip harness)
+
+```
+    ./benchdnn --ip [harness-knobs] [ip-desc] ...
+```
+
+where *harness-knobs* are:
+
+ - `--cfg={f32, u8s8u8s32, ...}` configuration (ref conv session above  [convolution configuration](/tests/benchdnn/README.md#convolution-configurations-also-known-as-precision-specification)), default `f32``
+ - `--mb=N` override minibatch that is specified in ip description, default `0` (use mb specified in ip desc)
+ - `--dir={FWD_D (forward data), FWD_B (forward data + bias),FWD_I (forward data inference), BWD_D (backward data), BWD_W (backward weights), BWD_WB (backward weights + bias)}` direction, default `FWD_B`
+ - `--attr="attr_str"` ip attributes (see in the convolution section above), default `""` (no attributes set)
+ - `--allow-unimpl=true|false` do not treat unimplemented configuration as an error, default `false`
+ - `--perf-template=template-str` set template for performance report (see section *Performance measurements*)
+ - `--mode=` string that contains flags for benchmark mode. Use `C` or `c` for correctness (used by default), and `P` or `p` for performance  
+ - `--reset`  reset all the parameters set before to default one 
+ - `-vN|--verbose=N` verbose level, default `0`
+ - `--batch=file` use options from the given file (see in subdirectory)
+
+and *ip-desc* is ip description. The canonical form is:
+```
+    mbXicXidXihXiwXSocXnS   
+```
+Here X is a number and S is a string (n stands for name). 
+The special symbol `_` is ignored, so it may be used as a delimiter. 
+Some implicit rules:
+ - default values:  mb = 2, id = 1, S="wip"
+
+ - if H is undefined => H = W
+
+ - if W is undefined => W = H
+
+See `str2desc()` in ip/ip_aux.cpp
+for more details and implicit rules :^)
+
+### Performance measurements (ip harness)
+
+**benchdnn** supports a custom performance report. A template is passed via the
+command line and consists of terminal and nonterminal symbols. Nonterminal
+symbols are printed as-is. A description of terminal symbols is given below.
+There is also a notion of modifiers (marked with @) that change the meaning of
+terminal symbols; for example, the sign '-' means minimum of (in terms of time). See the
+table of modifiers below.
+
+> **Caution:** Threads must be pinned in order to get consistent frequency.
+
+| abbreviation  | description
+|:------------  |:-----------
+| %d            | problem descriptor
+| %D            | expanded problem descriptor (parameters in csv format)
+| %n            | problem name
+| %z            | direction
+| %f            | flags
+| %q            | data type (precision)
+| %f            | data format (layout)
+| %@t           | time in ms
+
+The definition of expanded problem descriptor is: `mb,oc,ic,id,ih,iw`.
+
+The default template can be found in bnorm/bench_ip.cpp and is defined as
+`perf,%D,%n,%z,%q,%-t,%-Gp,%0t,%0Gp`. That will produce the following output
+in CSV format:
+```
+string: perf
+expanded ip problem descriptor
+name
+direction
+data type
+minimum time spent in ms
+best gigaops (since it corresponds to mimimum time)
+average time spent in ms
+average gigaops (since it corresponds to average time)
+```
+
+Here is an example of performance output:
+```
+perf,112,1000,2048,1,1,1,"resnet:ip1",FWD_B,f32,3.99976,114.695,19.0323,24.1039
+```
+expanded ip problem descriptor is `112,1000,2048,1,1,1` in the above example.
+
+### Examples (ip harness)
+
+Run the set of ip from inputs/ip/ip_all file with default minibatch:
+```
+    $ ./benchdnn --ip \
+         --batch=inputs/ip/ip_all
+```
+
+Run the same as previous but also measure performance:
+```
+    $ ./benchdnn --ip --mode=CORRnPERF \
+         --batch=inputs/ip/ip_all
+```
+
+## Usage (shuffle harness)
+
+```
+    ./benchdnn --shuffle [harness-knobs]  [dim]...
+```
+
+where *harness-knobs* are:
+
+ - `--match==regex` check only shuffle that match with regex, default is `".*"`. Notice: Windows may only interpret string arguments surrounded by double quotation marks.
+ - `--dir={FWD_D (forward data), FWD_B (forward data + bias),FWD_I (forward data inference), BWD_D (backward data), BWD_W (backward weights), BWD_WB (backward weights + bias)}` direction, default `FWD_B`
+ - `--dt={f32, s32, ...}` base data type, default `f32`
+ - `--fmt={nchw, nChw16c, ...}` data layout, default `nchw`
+ - `--axis=` default `1`
+ - `--group=` default `1`
+ - `--mode=` string that contains flags for benchmark mode. Use `C` or `c` for correctness (used by default), and `P` or `p` for performance 
+ - `-vN|--verbose=N` verbose level, default `0`
+ - `--batch=file` use options from the given file (see in subdirectory)
+
+and *dim* is ip description. The canonical form is:
+```
+    dxdxdxdxd   
+```
+Here d is a number.
+
+See `str2dims()` in shuffle/shuffle_aux.cpp for more details.
+
+### Performance measurements (shuffle harness)
+
+**benchdnn** supports a custom performance report. A template is passed via the
+command line and consists of terminal and nonterminal symbols. Nonterminal
+symbols are printed as-is. A description of terminal symbols is given below.
+There is also a notion of modifiers (marked with @) that change the meaning of
+terminal symbols; for example, the sign '-' means minimum of (in terms of time). See the
+table of modifiers below.
+
+> **Caution:** Threads must be pinned in order to get consistent frequency.
+
+| Abbreviation  | Description
+|:------------  |:-----------
+| %d            | problem descriptor
+| %D            | expanded problem descriptor (parameters in csv format)
+| %z            | direction
+| %q            | data type (precision)
+| %f            | data format (layout)
+| %a            | axis
+| %g            | group size
+| %@t           | time in ms
+
+The definition of expanded problem descriptor is: `dxdxdxdxd`.
+
+The default template can be found in shuffle/bench_shuffle.cpp and is defined as
+`perf,%z,%q,%f,%D,%a,%g,%-t,%0t`. That will produce the following output
+in CSV format:
+```
+string: perf
+direction
+data type
+data format
+expanded shuffle problem descriptor
+axis
+group size
+minimum time spent in ms
+average time spent in ms
+```
+Here is an example of performance output.
+```
+perf,FWD_D,u8,nCdhw16c,1x272x2x56x56,4,4,11.6177,16.509
+```
+expanded shuffle problem descriptor is `1x272x2x56x56` in the above example.
+
+### Examples (shuffle harness)
+
+Run the set of shuffle from inputs/shuffle/test_shuffle_axis file with default minibatch:
+```
+    $ ./benchdnn --shuffle \
+         --batch=inputs/shuffle/test_shuffle_axis
+```
+
+Run the same as previous but also measure performance:
+```
+    $ ./benchdnn --shuffle --mode=CORRnPERF \
+         --batch=inputs/shuffle/test_shuffle_axis
+```
+
+## Usage (reorder harness)
+
+```
+    ./benchdnn --reorder [harness-knobs]  ...
+```
+
+where *harness-knobs* are:
+
+ - `--idt={f32, s32, ...}` base input data type, default `f32`
+ - `--odt={f32, s32, ...}` base output data type, default `f32`
+ - `--dt={f32, s32, ...}` base data type, default `f32`
+ - `--ifmt={nchw, nChw16c, ...}` input data layout, default `nchw`
+ - `--ofmt={nchw, nChw16c, ...}` output data layout, default `nchw`
+ - `--fmt={nchw, nChw16c, ...}` data layout, default `nchw`
+ - `--def-scales={,,}` input defined scales. separate number by ',' ex : 0.125, 0.25, 0.5, 1, 2, 4, 8
+ - `--attr="attr_str"` ip attributes (see in the section below), default `""` (no attributes set)
+ - `--both-dir-dt=true|false` , default `false`
+ - `--both-dir-fmt=true|false` , default `false`
+ - `--allow-unimpl=true|false` do not treat unimplemented configuration as an error, default `false`
+ - `--run` run reorder bench
+ - `--perf-template=template-str` set template for performance report (see section *Performance measurements*)
+ - `--reset` reset all the parameters set before to default one 
+ - `--mode=` string that contains flags for benchmark mode. Use `C` or `c` for correctness (used by default), and `P` or `p` for performance 
+ - `-vN|--verbose=N` verbose level, default `0`
+ - `--batch=file` use options from the given file (see in subdirectory)
+
+### Performance measurements (reorder harness)
+
+**benchdnn** supports a custom performance report. A template is passed via the
+command line and consists of terminal and nonterminal symbols. Nonterminal
+symbols are printed as-is. A description of terminal symbols is given below.
+There is also a notion of modifiers (marked with @) that change the meaning of
+terminal symbols; for example, the sign '-' means minimum of (in terms of time). See the
+table of modifiers below.
+
+> **Caution:** Threads must be pinned in order to get consistent frequency.
+
+| abbreviation  | description
+|:------------  |:-----------
+| %d            | problem descriptor
+| %D            | expanded problem descriptor (reorder parameters in csv format)
+| %n            | dimensionality of the problem
+| %@O           | number of elements being reordered
+| %@t           | time in ms
+| %@p           | elements per second
+
+| modifier  | description
+|:--------  |:-----------
+|           | default
+| -         | min (time) -- default
+| 0         | avg (time)
+| +         | max (time)
+|           |
+| K         | Kilo (1e3)
+| M         | Mega (1e6)
+| G         | Giga (1e9)
+
+The definition of expanded problem descriptor is:
+`idt,odt,ifmt,ofmt,attrs,dims`.
+
+The default template can be found in reorder/bench_reorder.cpp and is defined as
+`perf,%n,%D,%O,%-t,%-Gp,%0t,%0Gp`. That will produce the following output
+in CSV format:
+```
+string: perf
+dimensionality of the problem
+expanded reorder problem descriptor
+number of elements being reordered
+minimum time spent in ms
+best gigaops (since it corresponds to mimimum time)
+average time spent in ms
+average gigaops (since it corresponds to average time)
+```
+Here is an example of performance output:
+```
+ perf,4,f32,f32,nchw,nchw,irmode=nearest;oscale=per_oc:0.125;post_ops='',2x64x3x3,1152,4.00244,0.000287824,24.0279,4.79442e-05
+```
+expanded reorder problem descriptor is `f32,f32,nchw,nchw,irmode=nearest;oscale=per_oc:0.125;post_ops='',2x64x3x3` in the above example.
+
+### Examples (reorder harness)
+
+Run the set of reorder from reorder/test_default file with default minibatch:
+```
+    $ ./benchdnn --reorder \
+        --batch=inputs/reorder/test_default
+```
+
+Run the same as previous but also measure performance:
+```
+    $ ./benchdnn --reorder  --mode=CORRnPERF \
+        --batch=inputs/reorder/test_default
+```
+
+## Usage (self harness)
+
+```
+    ./benchdnn --self ...
+```
+
+Check enumlation type, attributes, flags, and descriptions. 
+
+
 
 ## Installation
 
-**benchdnn** is automatically built with Intel MKL-DNN. For the convenience one
-may build **benchdnn** using cmake or make.
+**benchdnn** is automatically built with Intel MKL-DNN. For convenience, you can
+build **benchdnn** using cmake or make.
 
 
 ## Essence of convolution testing
 
-Intel MKL-DNN supports different data types, such as single precision floating
-point (`mkldnn_f32`), signed/unsigned integer of different length
-(`mkldnn_{s,u}{8,16,32}`). We need to cover all those cases by tests. It is
-essential to test real convolution sizes, since Intel MKL-DNN provides
-different optimizations depending on convolution parameters, so there is no
-one unified approach inside, which means it would not be enough to test only
-few convolutions (aka unit tests).
-
-But even for given convolution the correctness convolution test is not as
-simple as it might seem to be at first sight. One of the biggest problem we
-encountered is numerical instability. For every output point a lot of
-operations may happen. For instance on backward propagation with respect to
-filter each filter point requires `mb * oh * ow` operations (see *Notation*
-section below). That big amount of compute operations may lead to either
+Intel MKL-DNN supports different data types, such as single-precision floating
+point (`mkldnn_f32`) and signed/unsigned integer of different length
+(`mkldnn_{s,u}{8,16,32}`). We need to cover all those cases with tests. It is
+essential to test real convolution sizes, because Intel MKL-DNN provides
+different optimizations depending on convolution parameters. There is no
+single unified approach inside, so it would not be enough to test only a few
+convolutions (also known as unit tests).
+
+But even for a given convolution, the correctness convolution test is not as
+simple as it might seem at first sight. One of the biggest problems we
+encountered is numerical instability. For every output point, a lot of
+operations may occur. For instance, on backward propagation with respect to
+filter, each filter point requires `mb * oh * ow` operations (see the *Notation*
+section below). That large amount of compute operations may lead to either
 integer overflow or accuracy loss if initial data was chosen inadequately.
 
-These two main things complicate testing. **benchdnn** tries to address these
-issues by using integers for initialization with uniform distribution in a
+These two main issues complicate testing. **benchdnn** tries to address these
+by using integers for initialization with uniform distribution in a
 range `[cfg->f_min .. cfg->f_max]`, with the step `cfg->f_step`
 (see `struct dt_conf_t` in conv/conv.hpp). `f_min` and `f_max` are chosen so
-that most of the result would belong `[cfg->min .. cfg->max]` range. Also
-for floating point all integers in both ranges have exact representation (i.e.
+that most of the results would belong in the `[cfg->min .. cfg->max]` range. Also
+for floating point all integers in both ranges have exact representation (that is,
 the absolute numbers are less than `2^size_of_mantissa`). Uniform distribution
-leads to have result uniformly distributed and quite small `f_min/f_max` keep
+leads to results that are uniformly distributed and quite small. `f_min/f_max` keep
 the result in a reasonable range. Yet another trick: not all the points are
 initialized with non-zero values: see `fill_{src,wei,bia,dst}` in
 conv/conv.cpp.
@@ -350,14 +824,14 @@ conv/conv.cpp.
 
 ## Further plans
 
-Please see TODO.md in **benchdnn** root directory for development plans.
+Please see TODO.md in the **benchdnn** root directory for development plans.
 
 
 ## Issues and contributions
 
-We welcome community contributions to **benchdnn** as well as Intel MKL-DNN.
+We welcome community contributions to **benchdnn** as well as to Intel MKL-DNN.
 If you have any ideas or issues please submit an issue or pull request. For
-clarity please include ''benchdnn: '' in the title.
+clarity, please include ''benchdnn: '' in the title.
 
 
 ## Inspiration
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/bench_bnorm.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/bench_bnorm.cpp
index 36751767a..4c0a83570 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/bench_bnorm.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/bench_bnorm.cpp
@@ -41,7 +41,7 @@ attr_t attr;
 const char *pattern = NULL;
 const char *skip_impl = "";
 bool allow_unimpl = false;
-const char *perf_template = "perf,%n,%z,%f,%q,%f,%D,%-t,%0t";
+const char *perf_template = "perf,%n,%z,%F,%q,%f,%D,%-t,%0t";
 
 void reset_parameters() {
     check_alg = ALG_AUTO;
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/bnorm.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/bnorm.cpp
index 7a6c81c0a..0d47b9e86 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/bnorm.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/bnorm.cpp
@@ -371,15 +371,15 @@ static int compare(const prb_t *p, data_kind_t kind, const dnn_mem_t &fp_mem,
 int check_fwd_ws(const dnn_mem_t &data_dt, const dnn_mem_t &ws_dt, res_t *r) {
     /* so far we know ws is just bit-mask of whether value was negative or
      * positive */
-    const size_t nelems = data_dt.nelems();
+    const size_t nelems = data_dt.nelems(true);
     const float *d = (const float *)data_dt;
     const uint8_t *ws = (const uint8_t *)ws_dt;
 
     /* some internal knowledge: flags in ws are either stored as bytes (e.g.
      * for the ref implementation) or as bits (e.g. for the jitted one); in
-     * the first case the ws memory has fewer elements than the data memory */
+     * the latter case the ws memory has fewer elements than the data memory */
     enum { ws_byte, ws_bit } ws_type;
-    ws_type = ws_dt.nelems() < nelems ? ws_bit : ws_byte;
+    ws_type = ws_dt.nelems(true) < nelems ? ws_bit : ws_byte;
 
     /* more internal knowledge: data_dt and ws_dt are expected to have exactly
      * the same data layout, and data_dt padded regions are expected to be
@@ -488,8 +488,9 @@ static int cvt_mask_to_ws(const prb_t *p, const dnn_mem_t &mask_fp,
         is_bnorm_3d(p) ? data_dims_3d : data_dims, mkldnn_f32, p->fmt);
     SAFE(data.reorder(mask_fp), WARN);
 
-    dnn_mem_t mean(1, &p->ic, mkldnn_f32, mkldnn_x);
-    dnn_mem_t var(1, &p->ic, mkldnn_f32, mkldnn_x);
+    ptrdiff_t ic = p->ic;
+    dnn_mem_t mean(1, &ic, mkldnn_f32, mkldnn_x);
+    dnn_mem_t var(1, &ic, mkldnn_f32, mkldnn_x);
     for (int c = 0; c < p->ic; ++c) ((float *)mean)[c] = 0.5;
     for (int c = 0; c < p->ic; ++c) ((float *)var)[c] = 1;
 
@@ -603,8 +604,7 @@ int doit(const prb_t *p, res_t *r) {
                 SAFE(compare(p, MEAN, mean_fp, mean_dt, r), WARN);
                 SAFE(compare(p, VAR, var_fp, var_dt, r), WARN);
             }
-            dnn_mem_t data(data_dt.md_, fp, src_format);
-            SAFE(data.reorder(data_dt), WARN);
+            dnn_mem_t data(data_dt, fp, src_format);
             SAFE(compare(p, DATA, data_fp, data, r), WARN);
             if ((p->flags & FUSE_BN_RELU) && !(p->dir & FLAG_INF))
                 SAFE(check_fwd_ws(data_dt, ws_dt, r), WARN);
@@ -652,9 +652,8 @@ int doit(const prb_t *p, res_t *r) {
                     ws_fp, d_data_fp, d_ss_fp);
             if ((p->flags & USE_SCALESHIFT) && (p->dir & FLAG_WEI))
                 SAFE(compare(p, SS, d_ss_fp, d_ss_dt, r), WARN);
-            dnn_mem_t d_data(d_data_dt.md_, fp,
+            dnn_mem_t d_data(d_data_dt, fp,
             is_bnorm_3d(p) ? mkldnn_ncdhw : mkldnn_nchw);
-            SAFE(d_data.reorder(d_data_dt), WARN);
             SAFE(compare(p, DATA, d_data_fp, d_data, r), WARN);
         }
     }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/perf_report.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/perf_report.cpp
index 97399fbac..8373d4465 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/perf_report.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/bnorm/perf_report.cpp
@@ -37,7 +37,7 @@ See modifiers at the same place.
 | %D            | expanded problem descriptor (parameters in csv format)
 | %n            | problem name
 | %z            | direction
-| %f            | flags
+| %F            | flags
 | %q            | data type (precision)
 | %f            | data format (layout)
 | %@t           | time in ms
@@ -100,7 +100,7 @@ void perf_report(const prb_t *p, const res_t *r, const char *pstr) {
             DPRINT("%s", p->name);
         else if (c == 'z')
             DPRINT("%s", dir2str(p->dir));
-        else if (c == 'f')
+        else if (c == 'F')
             DPRINT("%s", flags2str(p->flags));
         else if (c == 'q')
             DPRINT("%s", dt2str(p->dt));
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/bench_conv.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/bench_conv.cpp
index d3de6ed4b..1c3db1709 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/bench_conv.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/bench_conv.cpp
@@ -35,7 +35,6 @@ const char *pattern = NULL;
 dir_t dir = FWD_B;
 int mb = 0;
 alg_t alg = DIRECT;
-merge_t merge = NONE;
 attr_t attr;
 const char *skip_impl = "";
 bool allow_unimpl = false;
@@ -47,14 +46,13 @@ void reset_parameters() {
     dir = FWD_B;
     mb = 0;
     alg = DIRECT;
-    merge = NONE;
     attr = attr_t();
     skip_impl = "";
     allow_unimpl = false;
 }
 
 void check_correctness(const desc_t *c) {
-    const prb_t p(*c, dir, cfg, alg, merge, attr, mb);
+    const prb_t p(*c, dir, cfg, alg, attr, mb);
     char pstr[max_prb_len];
     prb2str(&p, pstr);
 
@@ -90,8 +88,6 @@ int bench(int argc, char **argv, bool main_bench) {
             dir = str2dir(argv[arg] + 6);
         else if (!strncmp("--alg=", argv[arg], 6))
             alg = str2alg(argv[arg] + 6);
-        else if (!strncmp("--merge=", argv[arg], 8))
-            merge = str2merge(argv[arg] + 8);
         else if (!strncmp("--attr=", argv[arg], 7))
             SAFE(str2attr(&attr, argv[arg] + 7), CRIT);
         else if (!strncmp("--skip-impl=", argv[arg], 12))
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/bench_deconv.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/bench_deconv.cpp
index 18792c1eb..937d50e40 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/bench_deconv.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/bench_deconv.cpp
@@ -36,7 +36,6 @@ const char *pattern = NULL;
 dir_t dir = FWD_B;
 int mb = 0;
 alg_t alg = DIRECT;
-merge_t merge = NONE;
 attr_t attr;
 const char *skip_impl = "";
 bool allow_unimpl = false;
@@ -48,14 +47,13 @@ void reset_parameters() {
     dir = FWD_B;
     mb = 0;
     alg = DIRECT;
-    merge = NONE;
     attr = attr_t();
     skip_impl = "";
     allow_unimpl = false;
 }
 
 void check_correctness(const desc_t *c) {
-    const prb_t p(*c, dir, cfg, alg, merge, attr, mb);
+    const prb_t p(*c, dir, cfg, alg, attr, mb, true);
     char pstr[max_prb_len];
     prb2str(&p, pstr);
 
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/cfg.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/cfg.cpp
index a08e1d1b7..28093fae6 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/cfg.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/cfg.cpp
@@ -42,6 +42,14 @@ const _dt_conf_t conf_f32 = {
     {mkldnn_f32,},
 };
 
+const _dt_conf_t conf_f32_no_limits = {
+    {mkldnn_f32, -FLT_MAX, FLT_MAX,  -32,  32, 0, 1, .25, 0.},
+    {mkldnn_f32, -FLT_MAX, FLT_MAX,  -32,  32, 0, 1, 1.0, 0.},
+    {mkldnn_f32, -FLT_MAX, FLT_MAX, -512, 512, 0, 1, 1.0, 0.},
+    {mkldnn_f32, -FLT_MAX, FLT_MAX,  -32,  32, 0, 1, .25, 0.},
+    {mkldnn_f32,},
+};
+
 const _dt_conf_t conf_f32_full = {
     {mkldnn_f32, -int_max_exact, int_max_exact,  -64,  64, 0, 1, 1.0, 0.},
     {mkldnn_f32, -int_max_exact, int_max_exact,  -32,  32, 0, 1, 1.0, 0.},
@@ -182,6 +190,7 @@ const dt_conf_t *str2cfg(const char *str) {
 #define CASE(cfg) \
     if (!strcasecmp(STRINGIFY(cfg), str)) return CONCAT2(conf_,cfg)
     CASE(f32);
+    CASE(f32_no_limits);
     CASE(f32_full);
     CASE(f32_wino);
     CASE(s16s16s32s32);
@@ -207,6 +216,7 @@ const dt_conf_t *str2cfg(const char *str) {
 const char *cfg2str(const dt_conf_t *cfg) {
 #define CASE(_cfg) if (cfg == CONCAT2(conf_,_cfg)) return STRINGIFY(_cfg)
     CASE(f32);
+    CASE(f32_no_limits);
     CASE(f32_full);
     CASE(f32_wino);
     CASE(s16s16s32s32);
@@ -229,4 +239,17 @@ const char *cfg2str(const dt_conf_t *cfg) {
     return NULL;
 }
 
+const dt_conf_t *auto_cfg(const alg_t alg, const dt_conf_t *cfg) {
+    const char *cfg_s = cfg2str(cfg);
+#define CASE(_cfg_) \
+    if (alg == WINO && !strcmp(cfg_s, STRINGIFY(_cfg_))) return CONCAT2(conf_, CONCAT2(_cfg_, _wino))
+    CASE(f32);
+    CASE(u8s8f32s32);
+    CASE(u8s8s32s32);
+    CASE(u8s8s8s32);
+    CASE(u8s8u8s32);
+#undef CASE
+    return cfg;
+}
+
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv.cpp
index eb1e4caeb..7248c92b5 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv.cpp
@@ -25,38 +25,37 @@
 
 #include "mkldnn_common.hpp"
 #include "mkldnn_memory.hpp"
-
 #include "norm.hpp"
 
 #include "conv/conv_common.hpp"
 
 namespace conv {
 
-inline bool is_conv_3d(const prb_t *p)
-{
-    return (p->id > 1) ? 1 : 0;
+inline bool is_conv_3d(const prb_t *p) {
+    return p->id > 1;
 }
 
-inline bool is_conv_1d(const prb_t *p)
-{
-    return (!is_conv_3d(p) && p->ih == 1 && p->kh == 1
+inline bool is_conv_1d(const prb_t *p) {
+    return !is_conv_3d(p) && p->ih == 1 && p->kh == 1
                    && p->cfg[SRC].dt != mkldnn_s8 // temporary workaround until
-                   && p->cfg[SRC].dt != mkldnn_u8) // int8 jit supports 1d
-            ? 1 : 0;
+                   && p->cfg[SRC].dt != mkldnn_u8; // int8 jit supports 1d
 }
 
-double get_trust_nz_level(const prb_t *p, data_kind_t kind, bool final_compare)
-{
+double get_trust_nz_level(const prb_t *p, data_kind_t kind,
+        bool final_compare) {
     if (!final_compare)
         return p->cfg[kind].f_sparsity;
 
-    auto count_relu = [&]() {
+    auto negative_to_zero = [&]() {
+        using pk = attr_t::post_ops_t::kind_t;
         const auto &po = p->attr.post_ops;
         int count = 0;
-        for (int i = 0; i < po.len; ++i)
-            count += po.entry[i].kind == attr_t::post_ops_t::kind_t::RELU;
-        count = MAX2(count, p->merge == RELU ? 1 : 0);
-        return count;
+        for (int i = 0; i < po.len; ++i) {
+            auto k = po.entry[i].kind;
+            count +=
+                k == pk::RELU || k == pk::ELU || k == pk::SQRT || k == pk::BRELU;
+        }
+        return !!count;
     };
 
     double trust = 0.3; /* why? */
@@ -73,36 +72,70 @@ double get_trust_nz_level(const prb_t *p, data_kind_t kind, bool final_compare)
             trust = 0.8 * p->cfg[DST].f_sparsity; /* why? */
             break;
         case DST:
-            trust /= count_relu() == 0 ? 1 : 2;
+            trust /= negative_to_zero() == 0 ? 1 : 2;
             break;
     }
 
     return trust;
 }
 
+inline bool post_ops_require_integral_check(const prb_t *p) {
+    if (p->attr.post_ops.len == 0) return false;
+
+    using pk = attr_t::post_ops_t::kind_t;
+    const auto &ops = p->attr.post_ops;
+
+    // assumptions: at most 1 eltwise, scale = 1.
+    for (int idx = 0; idx < ops.len; ++idx) {
+        const auto &e = ops.entry[idx];
+        if (e.kind == pk::SUM || e.kind == pk::ABS) continue;
+        if (e.kind == pk::RELU && e.eltwise.alpha == 0.f) continue;
+        return true;
+    }
+
+    return false;
+}
+
 inline double get_eps(const prb_t *p, const data_kind_t kind) {
+    // Winograd specifics
     if (p->alg & WINO && p->dir & FLAG_WEI) {
         /*This is an empirical equation derived by observing growth error
           with increasing 'k' dimension in gemm of winograd*/
         return p->cfg[kind].eps *
             (MAX2(1, pow(10, 0.4 * log10(0.125 * p->mb * p->oh * p->ow))));
     }
+
+    // post-ops specifics
+    if (post_ops_require_integral_check(p))
+        return MAX2(1e-5, p->cfg[kind].eps);
+
     return p->cfg[kind].eps;
 }
 
 inline void get_result(const prb_t *p, const data_kind_t kind, res_t *r,
         const diff_norm_t diff_norm) {
-    bool wino_test = (p->alg & WINO)
-        && (diff_norm.rel_diff(norm_t::L2) <= get_eps(p, kind));
-    /* Ignoring elementwise errors for winograd,
-       since large relative error in few elements(which are anyways close to zero)
-       results in false positive failures*/
+    const float eps = get_eps(p, kind);
+
+    /* Ignoring element-wise errors for Winograd and in some cases of post-ops,
+     * since large relative error in few elements (which are anyways close
+     * to zero) results in false positive failures */
+
+    bool wino_test = (p->alg & WINO) && diff_norm.rel_diff(norm_t::L2) <= eps;
     if (wino_test) r->errors = 0;
-    r->state = r->errors ? FAILED : r->state;
+
+    bool post_ops_test = post_ops_require_integral_check(p)
+        && diff_norm.rel_diff(norm_t::L2) <= eps;
+    if (post_ops_test) r->errors = 0;
+
+    if (r->errors) r->state = FAILED;
 }
 
 inline int compare_dat(const prb_t *p, data_kind_t kind, dnn_mem_t &mem_dt,
         dnn_mem_t &mem_fp, res_t *r, bool final_compare = false) {
+    const bool dont_complain = false
+        || (p->alg & WINO)
+        || post_ops_require_integral_check(p);
+
     size_t nelems = mem_dt.nelems();
 
     const char *skind = data_kind2str(kind);
@@ -153,7 +186,7 @@ inline int compare_dat(const prb_t *p, data_kind_t kind, dnn_mem_t &mem_dt,
         }
         if (!ok) {
             r->errors++;
-            if ((!(p->alg & WINO) && r->errors < 10) || verbose >=10) {
+            if ((!dont_complain && r->errors < 10) || verbose >=10) {
                 int mb_or_g = 0, g_or_oc = 0, c = 0, d = 0, h = 0, w = 0;
                 switch (kind) {
                 case SRC: inv_src_off_f(p, i, mb_or_g, g_or_oc, c, d, h, w); break;
@@ -189,14 +222,15 @@ inline int compare_dat(const prb_t *p, data_kind_t kind, dnn_mem_t &mem_dt,
     }
 
     diff_norm.done();
+    get_result(p, kind, r, diff_norm);
 
     if (final_compare || r->errors) {
         const int vl = r->errors ? 0 : 2;
-        print(vl, "@@@ [%s] %sdiff: l0(``%g``) "
+        print(vl, "@@@ [%s] %sdiff: err:%d, l0(``%g``) "
                 "l1:(%g,%g,%g,``%g``) "
                 "l2:(%g,%g,%g,``%g``) "
                 "l8:(%g,%g,%g,``%g``)\n",
-                skind, final_compare ? "final: " : "",
+                skind, final_compare ? "final: " : "", (int)r->errors,
                 diff_norm.rel_diff(norm_t::L0),
                 diff_norm.a_[norm_t::L1], diff_norm.b_[norm_t::L1],
                 diff_norm.diff_[norm_t::L1], diff_norm.rel_diff(norm_t::L1),
@@ -236,8 +270,6 @@ inline int compare_dat(const prb_t *p, data_kind_t kind, dnn_mem_t &mem_dt,
                 non_zero, (unsigned long)r->total);
     }
 
-    get_result(p, kind, r, diff_norm);
-
     if (final_compare && r->state == UNTESTED)
         r->state = PASSED; /* optimism */
 
@@ -298,7 +330,7 @@ int fill_wei(const prb_t *p, dnn_mem_t &mem_dt, dnn_mem_t &mem_fp,
 
     dnn_mem_t *p_mem_00 = check_reorder
         ? new dnn_mem_t(mem_dt.md_, mkldnn_f32,
-            get_default_format(mem_dt.md_.ndims, GWEI))
+            get_default_format(mem_dt.md_.ndims, p->has_groups ? GWEI : WEI))
         : &mem_fp;
     dnn_mem_t &mem_00 = *p_mem_00;
 
@@ -394,47 +426,59 @@ inline int init_pd(const prb_t *p, mkldnn_convolution_desc_t &cd,
     mkldnn_memory_desc_t src_d, wei_d, bia_d, dst_d;
 
     int ndims = is_conv_3d(p) ? 5 : is_conv_1d(p) ? 3 : 4;
-    mkldnn_dims_t src_dims = {p->mb, p->ic, p->ih, p->iw};
     mkldnn_dims_t src_1d_dims = {p->mb, p->ic, p->iw};
+    mkldnn_dims_t src_2d_dims = {p->mb, p->ic, p->ih, p->iw};
     mkldnn_dims_t src_3d_dims = {p->mb, p->ic, p->id, p->ih, p->iw};
-    mkldnn_dims_t wei_dims = {p->g, p->oc / p->g, p->ic / p->g, p->kh, p->kw};
+
     mkldnn_dims_t wei_1d_dims = {p->g, p->oc / p->g, p->ic / p->g, p->kw};
+    mkldnn_dims_t wei_2d_dims = {p->g, p->oc / p->g, p->ic / p->g, p->kh, p->kw};
     mkldnn_dims_t wei_3d_dims = {p->g, p->oc / p->g, p->ic / p->g, p->kd, p->kh, p->kw};
+
     mkldnn_dims_t bia_dims = {p->oc};
-    mkldnn_dims_t dst_dims = {p->mb, p->oc, p->oh, p->ow};
+
     mkldnn_dims_t dst_1d_dims = {p->mb, p->oc, p->ow};
+    mkldnn_dims_t dst_2d_dims = {p->mb, p->oc, p->oh, p->ow};
     mkldnn_dims_t dst_3d_dims = {p->mb, p->oc, p->od, p->oh, p->ow};
 
     DNN_SAFE(mkldnn_memory_desc_init(&src_d, ndims,
-        is_conv_3d(p) ? src_3d_dims : is_conv_1d(p) ? src_1d_dims : src_dims,
+        is_conv_3d(p) ? src_3d_dims : is_conv_1d(p) ? src_1d_dims : src_2d_dims,
         p->cfg[SRC].dt, mkldnn_any), WARN);
-    DNN_SAFE(mkldnn_memory_desc_init(&wei_d, ndims + 1,
-        is_conv_3d(p) ? wei_3d_dims :  is_conv_1d(p) ? wei_1d_dims : wei_dims,
+
+    DNN_SAFE(mkldnn_memory_desc_init(&wei_d, ndims + p->has_groups,
+        is_conv_3d(p)
+        ? &wei_3d_dims[!p->has_groups]
+        : is_conv_1d(p)
+        ? &wei_1d_dims[!p->has_groups]
+        : &wei_2d_dims[!p->has_groups],
         p->cfg[WEI].dt, mkldnn_any), WARN);
+
     DNN_SAFE(mkldnn_memory_desc_init(&bia_d, 1, bia_dims, p->cfg[BIA].dt,
         mkldnn_any), WARN);
+
     DNN_SAFE(mkldnn_memory_desc_init(&dst_d, ndims,
-        is_conv_3d(p) ? dst_3d_dims : is_conv_1d(p) ? dst_1d_dims : dst_dims,
+        is_conv_3d(p) ? dst_3d_dims : is_conv_1d(p) ? dst_1d_dims : dst_2d_dims,
         p->cfg[DST].dt, mkldnn_any), WARN);
-    int strides_nd[] = {p->sd, p->sh, p->sw};
-    int dilates_nd[] = {p->dd, p->dh, p->dw};
-    int padding_nd[] = {p->pd, p->ph, p->pw};
+
+    ptrdiff_t strides_nd[] = {p->sd, p->sh, p->sw};
+    ptrdiff_t dilates_nd[] = {p->dd, p->dh, p->dw};
+    ptrdiff_t padding_nd[] = {p->pd, p->ph, p->pw};
 
     auto bph = [&](int ih, int oh, int kh, int sh, int ph, int dh) {
         return (oh - 1) * sh - ih + ((kh - 1) * (dh + 1) + 1) - ph;
     };
-    int padding_r_nd[] = {
+    ptrdiff_t padding_r_nd[] = {
         bph(p->id, p->od, p->kd, p->sd, p->pd, p->dd),
         bph(p->ih, p->oh, p->kh, p->sh, p->ph, p->dh),
         bph(p->iw, p->ow, p->kw, p->sw, p->pw, p->dw)};
 
-    int *strides = strides_nd + (5 - ndims);
-    int *dilates = dilates_nd + (5 - ndims);
-    int *padding = padding_nd + (5 - ndims);
-    int *padding_r = padding_r_nd + (5 - ndims);
+    ptrdiff_t *strides = strides_nd + (5 - ndims);
+    ptrdiff_t *dilates = dilates_nd + (5 - ndims);
+    ptrdiff_t *padding = padding_nd + (5 - ndims);
+    ptrdiff_t *padding_r = padding_r_nd + (5 - ndims);
 
     mkldnn_alg_kind_t alg = mkldnn_convolution_direct;
     if (p->alg == WINO) alg = mkldnn_convolution_winograd;
+    if (p->alg == AUTO) alg = mkldnn_convolution_auto;
 
     switch (p->dir) {
     case FWD_D: case FWD_B: case FWD_I:
@@ -467,15 +511,8 @@ inline int init_pd(const prb_t *p, mkldnn_convolution_desc_t &cd,
     auto mkldnn_attr = create_mkldnn_attr(p->attr, p->oc, p->scales);
 
     mkldnn_status_t init_status = mkldnn_success;
-    if (p->merge == RELU) {
-        mkldnn_convolution_relu_desc_t crd;
-        DNN_SAFE(mkldnn_convolution_relu_desc_init(&crd, &cd, 0), WARN);
-        init_status = mkldnn_primitive_desc_create_v2(&cpd, &crd, mkldnn_attr,
+    init_status = mkldnn_primitive_desc_create_v2(&cpd, &cd, mkldnn_attr,
                 engine, NULL);
-    } else {
-        init_status = mkldnn_primitive_desc_create_v2(&cpd, &cd, mkldnn_attr,
-                engine, NULL);
-    }
 
     mkldnn_primitive_attr_destroy(mkldnn_attr);
 
@@ -498,6 +535,13 @@ inline int init_pd(const prb_t *p, mkldnn_convolution_desc_t &cd,
                 mkldnn_primitive_desc_query_pd(cpd, query, index));
     };
 
+    if (p->alg == AUTO) {
+        mkldnn_convolution_desc_t *temp_conv_desc = {0};
+        DNN_SAFE(mkldnn_primitive_desc_query(cpd,
+                mkldnn_query_convolution_d, 0, &temp_conv_desc), CRIT);
+        cd.alg_kind = temp_conv_desc->alg_kind;
+    }
+
     if (p->dir == BWD_D)
         cd.diff_src_desc = q(mkldnn_query_diff_src_pd);
     else
@@ -532,6 +576,17 @@ int doit(const prb_t *p, res_t *r) {
     mkldnn_primitive_t c{};
 
     SAFE(init_pd(p, cd, cpd, r), WARN);
+
+    prb_t *p_temp = nullptr;
+    if (p->alg == AUTO || p->alg == WINO) {
+        p_temp = new prb_t((desc_t)*p, p->dir, p->cfg,
+                    p->alg, p->attr, p->mb);
+        if (p->alg == AUTO) p_temp->alg = alg_kind2alg(cd.alg_kind);
+        p_temp->cfg = auto_cfg(p_temp->alg, p->cfg);
+        p = p_temp;
+    }
+
+
     if (r->state == SKIPPED || r->state == UNIMPLEMENTED)
         return OK;
 
@@ -548,7 +603,8 @@ int doit(const prb_t *p, res_t *r) {
     dnn_mem_t &bia_dt = *p_bia_dt;
 
     auto src_format = get_default_format(src_dt.md_.ndims, DATA);
-    auto wei_format = get_default_format(wei_dt.md_.ndims, GWEI);
+    auto wei_format = get_default_format(wei_dt.md_.ndims,
+        p->has_groups ? GWEI : WEI);
 
     const auto fp = mkldnn_f32;
     dnn_mem_t src_fp(src_dt_d, fp, src_format);
@@ -574,7 +630,6 @@ int doit(const prb_t *p, res_t *r) {
         if (bench_mode & CORR) {
             compute_ref_fwd(p, src_fp, wei_fp, bia_fp, dst_fp);
             dnn_mem_t dst(dst_dt, fp, src_format);
-            SAFE(dst.reorder(dst_dt), WARN);
             SAFE(compare_dst(p, dst, dst_fp, r, true), WARN);
         }
     } else if (p->dir == BWD_D) {
@@ -585,7 +640,6 @@ int doit(const prb_t *p, res_t *r) {
         if (bench_mode & CORR) {
             compute_ref_bwd_d(p, src_fp, wei_fp, bia_fp, dst_fp);
             dnn_mem_t src(src_dt, fp, src_format);
-            SAFE(src.reorder(src_dt), WARN);
             SAFE(compare_src(p, src, src_fp, r, true), WARN);
         }
     } else if (p->dir & FLAG_BWD && p->dir & FLAG_WEI) {
@@ -598,11 +652,9 @@ int doit(const prb_t *p, res_t *r) {
         if (bench_mode & CORR) {
             compute_ref_bwd_w(p, src_fp, wei_fp, bia_fp, dst_fp);
             dnn_mem_t wei(wei_dt, fp, wei_format);
-            SAFE(wei.reorder(wei_dt), WARN);
             SAFE(compare_wei(p, wei, wei_fp, r, true), WARN);
             if (p->dir & FLAG_BIA) {
                 dnn_mem_t bia(bia_dt, fp, mkldnn_x);
-                SAFE(bia.reorder(bia_dt), WARN);
                 SAFE(compare_bia(p, bia, bia_fp, r, true), WARN);
             }
         }
@@ -632,6 +684,7 @@ int doit(const prb_t *p, res_t *r) {
 
     delete p_bia_dt;
     delete p_bia_fp;
+    delete p_temp;
 
     return OK;
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv_aux.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv_aux.cpp
index 8301e874c..44a504d25 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv_aux.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv_aux.cpp
@@ -30,6 +30,7 @@ namespace conv {
 
 alg_t str2alg(const char *str) {
 #define CASE(_alg) if (!strcasecmp(STRINGIFY(_alg), str)) return _alg
+    CASE(AUTO);
     CASE(DIRECT);
     CASE(WINO);
 #undef CASE
@@ -38,26 +39,19 @@ alg_t str2alg(const char *str) {
 }
 
 const char *alg2str(alg_t alg) {
+    if (alg == AUTO) return "auto";
     if (alg == DIRECT) return "direct";
     if (alg == WINO) return "wino";
     assert(!"unknown algorithm");
     return "unknown algorithm";
 }
 
-merge_t str2merge(const char *str) {
-#define CASE(_mrg) if (!strcasecmp(STRINGIFY(_mrg), str)) return _mrg
-    CASE(NONE);
-    CASE(RELU);
-#undef CASE
-    assert(!"unknown merge");
-    return NONE;
-}
-
-const char *merge2str(merge_t merge) {
-    if (merge == NONE) return "none";
-    if (merge == RELU) return "relu";
-    assert(!"unknown merge");
-    return "unknown merge";
+alg_t alg_kind2alg(mkldnn_alg_kind_t alg) {
+    if (alg == mkldnn_convolution_auto) return AUTO;
+    if (alg == mkldnn_convolution_direct) return DIRECT;
+    if (alg == mkldnn_convolution_winograd) return WINO;
+    assert(!"unknown algorithm");
+    return DIRECT;
 }
 
 int str2desc(desc_t *desc, const char *str, bool is_deconv) {
@@ -78,7 +72,9 @@ int str2desc(desc_t *desc, const char *str, bool is_deconv) {
      *  - if padding is undefined => compute trivial padding
      */
 
-    d.g = 1; d.mb = 2; d.sd = d.sh = d.sw = 1; d.dd = d.dh = d.dw = 0; d.name = "\"wip\"";
+    d.g = 1; d.mb = 2; d.sd = d.sh = d.sw = 1; d.dd = d.dh = d.dw = 0;
+    d.has_groups = false, d.name = "\"wip\"";
+    d.pw = -1; d.ph = -1; d.pd = -1;
 
     const char *s = str;
     assert(s);
@@ -87,6 +83,7 @@ int str2desc(desc_t *desc, const char *str, bool is_deconv) {
         if (!strncmp(p, s, strlen(p))) { \
             ok = 1; s += strlen(p); \
             char *end_s; d. c = strtol(s, &end_s, 10); s += (end_s - s); \
+            if (!strncmp(p, "g", 1)) d.has_groups = true; \
             /* printf("@@@debug: %s: %d\n", p, d. c); */ \
         } \
     } while (0)
@@ -123,34 +120,35 @@ int str2desc(desc_t *desc, const char *str, bool is_deconv) {
             return ((o - 1) * s - i + ((k - 1) * (d + 1) + 1)) / 2;
     };
 
-    const bool no_d = (d.id | d.kd | d.od | d.pd | d.dd) == 0 && d.sd == 1;
-    const bool no_h = (d.ih | d.kh | d.oh | d.ph | d.dh) == 0 && d.sh == 1;
-    const bool no_w = (d.iw | d.kw | d.ow | d.pw | d.dw) == 0 && d.sw == 1;
-
+    const bool no_d = (d.id | d.kd | d.od | d.dd) == 0 && d.sd == 1 && d.pd < 1;
+    const bool no_h = (d.ih | d.kh | d.oh | d.dh) == 0 && d.sh == 1 && d.ph < 1;
+    const bool no_w = (d.iw | d.kw | d.ow | d.dw) == 0 && d.sw == 1 && d.pw < 1;
     if (!no_h) {
         if (!d.ih || !d.kh) return FAIL;
-
-        if (!d.oh) d.oh = compute_out(is_deconv, d.ih, d.kh, d.sh, d.ph, d.dh);
-        else if (!d.ph && d.oh != compute_out(is_deconv, d.ih, d.kh, d.sh, d.ph, d.dh))
+        if (!d.oh) {
+            d.ph = 0;
+            d.oh = compute_out(is_deconv, d.ih, d.kh, d.sh, d.ph, d.dh);
+        } else if (d.ph < 0)
             d.ph = compute_pad(is_deconv, d.oh, d.ih, d.kh, d.sh, d.dh);
     }
 
     if (!no_w) {
         if (!d.iw || !d.kw) return FAIL;
-
-        if (!d.ow) d.ow = compute_out(is_deconv, d.iw, d.kw, d.sw, d.pw, d.dw);
-        else if (!d.pw && d.ow != compute_out(is_deconv, d.iw, d.kw, d.sw, d.pw, d.dw))
+        if (!d.ow) {
+            d.pw = 0;
+            d.ow = compute_out(is_deconv, d.iw, d.kw, d.sw, d.pw, d.dw);
+        } else if (d.pw < 0)
             d.pw = compute_pad(is_deconv, d.ow, d.iw, d.kw, d.sw, d.dw);
     }
 
     if (!no_d && d.id) {
         if (!d.id || !d.kd) return FAIL;
-
-        if (!d.od) d.od = compute_out(is_deconv, d.id, d.kd, d.sd, d.pd, d.dd);
-        else if (!d.pd && d.od != compute_out(is_deconv, d.id, d.kd, d.sd, d.pd, d.dd))
+        if (!d.od) {
+            d.pd = 0;
+            d.od = compute_out(is_deconv, d.id, d.kd, d.sd, d.pd, d.dd);
+        } else if (d.pd < 0)
             d.pd = compute_pad(is_deconv, d.od, d.id, d.kd, d.sd, d.dd);
     }
-
     if (no_w && no_h && d.id) {
         d.iw = d.ih = d.id;
         d.kw = d.kh = d.kd;
@@ -187,7 +185,7 @@ void desc2str(const desc_t *d, char *buffer, bool canonical) {
         buffer += l; rem_len -= l; \
     } while(0)
 
-    if (canonical || d->g != 1) DPRINT("g%d", d->g);
+    if (canonical || d->has_groups) DPRINT("g%d", d->g);
     if (canonical || d->mb != 2) DPRINT("mb%d", d->mb);
 
     const bool half_form = (d->ih == d->iw && d->kh == d->kw && d->oh == d->ow
@@ -230,19 +228,25 @@ void desc2str(const desc_t *d, char *buffer, bool canonical) {
 void prb_t::count_ops() {
     if (ops > 0) return;
 
+    int od_t = is_deconv ? this->id : this->od;
+    int oh_t = is_deconv ? this->ih : this->oh;
+    int ow_t = is_deconv ? this->iw : this->ow;
+    int id_t = is_deconv ? this->od : this->id;
+    int ih_t = is_deconv ? this->oh : this->ih;
+    int iw_t = is_deconv ? this->ow : this->iw;
     double sp_ops = 0;
-    for (int od = 0; od < this->od; ++od) {
-    for (int oh = 0; oh < this->oh; ++oh) {
-    for (int ow = 0; ow < this->ow; ++ow) {
+    for (int od = 0; od < od_t; ++od) {
+    for (int oh = 0; oh < oh_t; ++oh) {
+    for (int ow = 0; ow < ow_t; ++ow) {
         for (int kd = 0; kd < this->kd; ++kd) {
             const int id = od * this->sd - this->pd + kd * (this->dd + 1);
-            if (id < 0 || id >= this->id) continue;
+            if (id < 0 || id >= id_t) continue;
             for (int kh = 0; kh < this->kh; ++kh) {
                 const int ih = oh * this->sh - this->ph + kh * (this->dh + 1);
-                if (ih < 0 || ih >= this->ih) continue;
+                if (ih < 0 || ih >= ih_t) continue;
                 for (int kw = 0; kw < this->kw; ++kw) {
                     const int iw = ow * this->sw - this->pw + kw * (this->dw + 1);
-                    if (iw < 0 || iw >= this->iw) continue;
+                    if (iw < 0 || iw >= iw_t) continue;
                     sp_ops += 1;
                 }
             }
@@ -278,13 +282,11 @@ void prb_t::generate_oscales() {
 
 void prb2str(const prb_t *p, char *buffer, bool canonical) {
     char desc_buf[max_desc_len], attr_buf[max_attr_len];
-    char dir_str[32] = {0}, cfg_str[32] = {0}, alg_str[32] = {0},
-         merge_str[32] = {0};
+    char dir_str[32] = {0}, cfg_str[32] = {0}, alg_str[32] = {0};
     desc2str(p, desc_buf, canonical);
     snprintf(dir_str, sizeof(dir_str), "--dir=%s ", dir2str(p->dir));
     snprintf(cfg_str, sizeof(cfg_str), "--cfg=%s ", cfg2str(p->cfg));
     snprintf(alg_str, sizeof(alg_str), "--alg=%s ", alg2str(p->alg));
-    snprintf(merge_str, sizeof(merge_str), "--merge=%s ", merge2str(p->merge));
     bool is_attr_def = p->attr.is_def();
     if (!is_attr_def) {
         int len = snprintf(attr_buf, max_attr_len, "--attr=\"");
@@ -293,11 +295,10 @@ void prb2str(const prb_t *p, char *buffer, bool canonical) {
         len = (int)strnlen(attr_buf, max_attr_len);
         snprintf(attr_buf + len, max_attr_len - len, "\" ");
     }
-    snprintf(buffer, max_prb_len, "%s%s%s%s%s%s",
+    snprintf(buffer, max_prb_len, "%s%s%s%s%s",
             p->dir == FWD_B ? "" : dir_str,
             p->cfg == conf_f32 ? "" : cfg_str,
             p->alg == DIRECT ? "" : alg_str,
-            p->merge == NONE ? "" : merge_str,
             is_attr_def ? "" : attr_buf,
             desc_buf);
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv_common.hpp
index d3969ec65..624338efc 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv_common.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/conv_common.hpp
@@ -26,15 +26,19 @@
 #include "mkldnn_common.hpp"
 #include "mkldnn_memory.hpp"
 
+namespace deconv {
+/* some extra control parameters which shouldn't be placed in prb_t */
+extern const char *skip_impl; /* NULL or "" means do not skip anything */
+extern bool allow_unimpl; /* true means do not treat unimplemented as error */
+extern const char *perf_template; /* performance output template */
+}
+
 namespace conv {
 
-enum alg_t { DIRECT, WINO };
+enum alg_t { DIRECT, WINO, AUTO };
 alg_t str2alg(const char *str);
 const char *alg2str(alg_t alg);
-
-enum merge_t { NONE, RELU, };
-merge_t str2merge(const char *str);
-const char *merge2str(merge_t merge);
+alg_t alg_kind2alg(mkldnn_alg_kind_t alg);
 
 struct desc_t {
     int g, mb;
@@ -44,6 +48,7 @@ struct desc_t {
     int sd, sh, sw;
     int pd, ph, pw;
     int dd, dh, dw;
+    bool has_groups;
 
     const char *name;
 };
@@ -95,12 +100,13 @@ extern const _dt_conf_t conf_u8s8u8s32_wino;
 
 const dt_conf_t *str2cfg(const char *str);
 const char *cfg2str(const dt_conf_t *cfg);
+const dt_conf_t *auto_cfg(const alg_t alg, const dt_conf_t *cfg);
 
 struct prb_t: public desc_t {
     prb_t(const desc_t &desc, dir_t dir, const dt_conf_t *cfg, alg_t alg,
-            merge_t merge, const attr_t &attr, int mb = 0)
-        : desc_t(desc), dir(dir), cfg(cfg), alg(alg), merge(merge), attr(attr)
-        , ops(0), scales(NULL) {
+            const attr_t &attr, int mb = 0, bool is_deconv = false)
+        : desc_t(desc), dir(dir), cfg(cfg), alg(alg), attr(attr)
+        , ops(0), scales(NULL), is_deconv(is_deconv) {
         if (mb) this->mb = mb;
         count_ops();
         generate_oscales();
@@ -110,11 +116,11 @@ struct prb_t: public desc_t {
     dir_t dir;
     const dt_conf_t *cfg;
     alg_t alg;
-    merge_t merge;
     attr_t attr;
 
     double ops;
     float *scales;
+    bool is_deconv;
 
     void count_ops();
     void generate_oscales();
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/deconv.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/deconv.cpp
index ec0e0d04d..034acfe9e 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/deconv.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/deconv.cpp
@@ -33,15 +33,15 @@ using namespace conv;
 
 namespace deconv {
 
-inline static void swap(int &a, int &b)
-{
-    int temp = a;
+template <typename T>
+inline static void swap(T &a, T &b) {
+    T temp = a;
     a = b;
     b = temp;
 }
-inline bool is_deconv_3d(const prb_t *p)
-{
-    return (p->id > 1 || p->od > 1) ? 1 : 0;
+
+inline bool is_deconv_3d(const prb_t *p) {
+    return p->id > 1;
 }
 
 inline int transpose_data_wei(const prb_t *p, dnn_mem_t &wei, dnn_mem_t &wei_tr) {
@@ -61,43 +61,42 @@ inline int init_pd(const prb_t *p, mkldnn_deconvolution_desc_t &cd,
     int ndims = is_deconv_3d(p) ? 5 : 4;
 
     mkldnn_memory_desc_t src_d, wei_d, bia_d, dst_d;
-    mkldnn_dims_t src_dims = {p->mb, p->ic, p->ih, p->iw};
+    mkldnn_dims_t src_2d_dims = {p->mb, p->ic, p->ih, p->iw};
     mkldnn_dims_t src_3d_dims = {p->mb, p->ic, p->id, p->ih, p->iw};
-    mkldnn_dims_t wei_dims = {p->g, p->oc / p->g, p->ic / p->g, p->kh, p->kw};
+    mkldnn_dims_t wei_2d_dims = {p->g, p->oc / p->g, p->ic / p->g, p->kh, p->kw};
     mkldnn_dims_t wei_3d_dims = {p->g, p->oc / p->g, p->ic / p->g, p->kd, p->kh, p->kw};
     mkldnn_dims_t bia_dims = {p->oc};
-    mkldnn_dims_t dst_dims = {p->mb, p->oc, p->oh, p->ow};
+    mkldnn_dims_t dst_2d_dims = {p->mb, p->oc, p->oh, p->ow};
     mkldnn_dims_t dst_3d_dims = {p->mb, p->oc, p->od, p->oh, p->ow};
-
     DNN_SAFE(mkldnn_memory_desc_init(&src_d, ndims,
-        is_deconv_3d(p) ? src_3d_dims : src_dims, p->cfg[SRC].dt, mkldnn_any), WARN);
-    DNN_SAFE(mkldnn_memory_desc_init(&wei_d, ndims + 1,
-        is_deconv_3d(p) ? wei_3d_dims : wei_dims, p->cfg[WEI].dt, mkldnn_any), WARN);
+        is_deconv_3d(p) ? src_3d_dims : src_2d_dims, p->cfg[SRC].dt, mkldnn_any), WARN);
+    DNN_SAFE(mkldnn_memory_desc_init(&wei_d, ndims + p->has_groups,
+        is_deconv_3d(p)
+        ? &wei_3d_dims[!p->has_groups]
+        : &wei_2d_dims[!p->has_groups],
+        p->cfg[WEI].dt, mkldnn_any), WARN);
     DNN_SAFE(mkldnn_memory_desc_init(&bia_d, 1, bia_dims, p->cfg[BIA].dt, mkldnn_any), WARN);
     DNN_SAFE(mkldnn_memory_desc_init(&dst_d, ndims,
-        is_deconv_3d(p) ? dst_3d_dims : dst_dims, p->cfg[DST].dt, mkldnn_any), WARN);
-    int strides_2d[] = {p->sh, p->sw};
-    int dilates_2d[] = {p->dh, p->dw};
-    int padding_2d[] = {p->ph, p->pw};
-    int strides_3d[] = {p->sd, p->sh, p->sw};
-    int dilates_3d[] = {p->dd, p->dh, p->dw};
-    int padding_3d[] = {p->pd, p->ph, p->pw};
+        is_deconv_3d(p) ? dst_3d_dims : dst_2d_dims, p->cfg[DST].dt, mkldnn_any), WARN);
+
+    ptrdiff_t strides_nd[] = {p->sd, p->sh, p->sw};
+    ptrdiff_t dilates_nd[] = {p->dd, p->dh, p->dw};
+    ptrdiff_t padding_nd[] = {p->pd, p->ph, p->pw};
 
     auto bph = [&](int ih, int oh, int kh, int sh, int ph, int dh) {
         return (oh - 1) * sh - ih + ((kh - 1) * (dh + 1) + 1) - ph;
     };
-    int padding_r_3d[] = {
+
+    ptrdiff_t padding_r_nd[] = {
         bph(p->od, p->id, p->kd, p->sd, p->pd, p->dd),
         bph(p->oh, p->ih, p->kh, p->sh, p->ph, p->dh),
         bph(p->ow, p->iw, p->kw, p->sw, p->pw, p->dw)};
-    int padding_r_2d[] = {
-        bph(p->oh, p->ih, p->kh, p->sh, p->ph, p->dh),
-        bph(p->ow, p->iw, p->kw, p->sw, p->pw, p->dw)};
 
-    int *strides = is_deconv_3d(p) ? strides_3d : strides_2d;
-    int *dilates = is_deconv_3d(p) ? dilates_3d : dilates_2d;
-    int *padding = is_deconv_3d(p) ? padding_3d : padding_2d;
-    int *padding_r = is_deconv_3d(p) ? padding_r_3d : padding_r_2d;
+    ptrdiff_t *strides = strides_nd + (5 - ndims);
+    ptrdiff_t *dilates = dilates_nd + (5 - ndims);
+    ptrdiff_t *padding = padding_nd + (5 - ndims);
+    ptrdiff_t *padding_r = padding_r_nd + (5 - ndims);
+
     mkldnn_alg_kind_t alg = mkldnn_deconvolution_direct;
     if (p->alg == WINO) alg = mkldnn_deconvolution_winograd;
 
@@ -182,7 +181,7 @@ int doit(const prb_t *p, res_t *r) {
     *r = res_zero;
     bool with_groups = 1;
 
-    prb_t p_tr((desc_t)*p, p->dir, p->cfg, p->alg, p->merge, p->attr, p->mb);
+    prb_t p_tr((desc_t)*p, p->dir, p->cfg, p->alg, p->attr, p->mb, true);
     swap(p_tr.ic,  p_tr.oc);
     swap(p_tr.ih,  p_tr.oh);
     swap(p_tr.id,  p_tr.od);
@@ -210,8 +209,9 @@ int doit(const prb_t *p, res_t *r) {
         ? new dnn_mem_t(bia_dt_d, p->cfg[BIA].dt) : new dnn_mem_t();
     dnn_mem_t &bia_dt = *p_bia_dt;
 
-    auto src_format = is_deconv_3d(p) ? mkldnn_ncdhw : mkldnn_nchw;
-    auto wei_format = is_deconv_3d(p) ? mkldnn_goidhw : mkldnn_goihw;
+    auto src_format = get_default_format(src_dt.md_.ndims, DATA);
+    auto wei_format = get_default_format(wei_dt.md_.ndims,
+        p->has_groups ? GWEI : WEI);
 
     const auto fp = mkldnn_f32;
 
@@ -243,7 +243,6 @@ int doit(const prb_t *p, res_t *r) {
         if (bench_mode & CORR) {
             compute_ref_bwd_d(&p_tr, dst_fp, wei_tr_fp, bia_fp, src_fp);
             dnn_mem_t dst(dst_dt, fp, src_format);
-            SAFE(dst.reorder(dst_dt), WARN);
             SAFE(compare_dst(p, dst, dst_fp, r, true), WARN);
         }
     } else if (p->dir == BWD_D) {
@@ -254,7 +253,6 @@ int doit(const prb_t *p, res_t *r) {
         if (bench_mode & CORR) {
             compute_ref_fwd(&p_tr, dst_fp, wei_tr_fp, zero_fp, src_fp);
             dnn_mem_t src(src_dt, fp, src_format);
-            SAFE(src.reorder(src_dt), WARN);
             SAFE(compare_src(p, src, src_fp, r, true), WARN);
         }
     } else if (p->dir & FLAG_BWD && p->dir & FLAG_WEI) {
@@ -268,12 +266,10 @@ int doit(const prb_t *p, res_t *r) {
             compute_ref_bwd_weights(&p_tr, dst_fp, wei_tr_fp, src_fp);
             transpose_data_wei(&p_tr, wei_tr_fp, wei_fp);
             dnn_mem_t wei(wei_dt, fp, wei_format);
-            SAFE(wei.reorder(wei_dt), WARN);
             SAFE(compare_wei(&p_tr, wei, wei_fp, r, true), WARN);
             if (p->dir & FLAG_BIA) {
                 compute_ref_bwd_bias(p, bia_fp, dst_fp);
                 dnn_mem_t bia(bia_dt, fp, mkldnn_x);
-                SAFE(bia.reorder(bia_dt), WARN);
                 SAFE(compare_bia(p, bia, bia_fp, r, true), WARN);
             }
         }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/ref_conv.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/ref_conv.cpp
index a471d21df..60b791294 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/ref_conv.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/ref_conv.cpp
@@ -15,6 +15,7 @@
 *******************************************************************************/
 
 #include "src/common/mkldnn_thread.hpp"
+#include "src/common/math_utils.hpp"
 
 #include "conv/conv_common.hpp"
 
@@ -85,17 +86,29 @@ void compute_ref_direct_fwd(const prb_t *p, dnn_mem_t &src_m,
     };
 
     auto maybe_post_ops = [&](float &conv_res, float dst) {
+        using namespace mkldnn::impl::math;
+
         const auto &ops = p->attr.post_ops;
         for (int idx = 0; idx < ops.len; ++idx) {
             using pk = attr_t::post_ops_t::kind_t;
             const auto &e = ops.entry[idx];
+
+            const auto &s = e.eltwise.scale;
+            const auto &a = e.eltwise.alpha;
+            const auto &b = e.eltwise.beta;
+
             switch (e.kind) {
-            case pk::SUM:
-                conv_res += e.sum.scale * dst;
-                break;
-            case pk::RELU:
-                conv_res = e.eltwise.scale * (conv_res < 0 ? 0 : conv_res);
-                break;
+            case pk::SUM: conv_res += e.sum.scale * dst; break;
+            case pk::RELU: conv_res = s*relu_fwd(conv_res, a); break;
+            case pk::TANH: conv_res = s*tanh_fwd(conv_res); break;
+            case pk::ELU: conv_res = s*elu_fwd(conv_res, a); break;
+            case pk::SQUARE: conv_res = s*square_fwd(conv_res); break;
+            case pk::ABS: conv_res = s*abs_fwd(conv_res); break;
+            case pk::SQRT: conv_res = s*sqrt_fwd(conv_res); break;
+            case pk::LINEAR: conv_res = s*linear_fwd(conv_res, a, b); break;
+            case pk::BRELU: conv_res = s*bounded_relu_fwd(conv_res, a); break;
+            case pk::SRELU: conv_res = s*soft_relu_fwd(conv_res); break;
+            case pk::LOGISTIC: conv_res = s*logistic_fwd(conv_res); break;
             default:
                 assert(!"unknown attr::post_ops::kind");
             }
@@ -115,9 +128,6 @@ void compute_ref_direct_fwd(const prb_t *p, dnn_mem_t &src_m,
                 conv_res += ((float*)bia_m)[bia_off];
             }
 
-            if (p->merge == RELU && conv_res < 0)
-                conv_res = 0;
-
             maybe_scale(conv_res, g * p->oc / p->g + oc);
             maybe_post_ops(conv_res, dst);
 
@@ -211,21 +221,55 @@ void compute_ref_direct_bwd_d(const prb_t *p, dnn_mem_t &diff_src_m,
         }
     };
 
+    /* Used for Deconv FWD */
+    auto maybe_post_ops = [&](float &conv_res, float dst) {
+        using namespace mkldnn::impl::math;
+
+        const auto &ops = p->attr.post_ops;
+        for (int idx = 0; idx < ops.len; ++idx) {
+            using pk = attr_t::post_ops_t::kind_t;
+            const auto &e = ops.entry[idx];
+
+            const auto &s = e.eltwise.scale;
+            const auto &a = e.eltwise.alpha;
+            const auto &b = e.eltwise.beta;
+
+            switch (e.kind) {
+            case pk::SUM: conv_res += e.sum.scale * dst; break;
+            case pk::RELU: conv_res = s*relu_fwd(conv_res, a); break;
+            case pk::TANH: conv_res = s*tanh_fwd(conv_res); break;
+            case pk::ELU: conv_res = s*elu_fwd(conv_res, a); break;
+            case pk::SQUARE: conv_res = s*square_fwd(conv_res); break;
+            case pk::ABS: conv_res = s*abs_fwd(conv_res); break;
+            case pk::SQRT: conv_res = s*sqrt_fwd(conv_res); break;
+            case pk::LINEAR: conv_res = s*linear_fwd(conv_res, a, b); break;
+            case pk::BRELU: conv_res = s*bounded_relu_fwd(conv_res, a); break;
+            case pk::SRELU: conv_res = s*soft_relu_fwd(conv_res); break;
+            case pk::LOGISTIC: conv_res = s*logistic_fwd(conv_res); break;
+            default:
+                assert(!"unknown attr::post_ops::kind");
+            }
+        }
+    };
+
     mkldnn::impl::parallel_nd(p->g, p->mb, p->ic / p->g, p->id, p->ih, p->iw,
         [&](int g, int mb, int ic, int id, int ih, int iw) {
             size_t src_off = src_off_f(p, mb, g, ic, id, ih, iw);
             float &ds = ((float*)diff_src_m)[src_off];
-            ds = 0;
+            float conv_res = 0;
             if (fast)
-                ker_fast(ds, g, mb, ic, id, ih, iw);
+                ker_fast(conv_res, g, mb, ic, id, ih, iw);
             else
-                ker(ds, g, mb, ic, id, ih, iw);
+                ker(conv_res, g, mb, ic, id, ih, iw);
 
             if (p->dir & FLAG_BIA) {
                 const size_t bia_off = (size_t)g * p->ic / p->g + ic;
-                ds += ((float*)bia_m)[bia_off];
+                conv_res += ((float*)bia_m)[bia_off];
             }
-            maybe_scale(ds, g * p->ic / p->g + ic);
+            maybe_scale(conv_res, g * p->ic / p->g + ic);
+            maybe_post_ops(conv_res, ds);
+
+            ds = conv_res;
         }
     );
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/ref_wino.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/ref_wino.cpp
index a5c56a3f4..ac31f1fb9 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/ref_wino.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/conv/ref_wino.cpp
@@ -422,10 +422,6 @@ void compute_wino_ref_fwd(const prb_t *p, dnn_mem_t &src_m, dnn_mem_t &wei_m,
                                             ((float *)bia_m)[bia_off] :
                                             0.f;
 
-                                    if (p->merge == RELU && conv_res < 0) {
-                                        conv_res = 0.f;
-                                    }
-
                                     const auto &ops = p->attr.post_ops;
                                     for (int idx = 0; idx < ops.len; ++idx) {
                                         using pk = attr_t::post_ops_t::kind_t;
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/dnn_types.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/dnn_types.cpp
index 2bb34299c..ca200aebf 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/dnn_types.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/dnn_types.cpp
@@ -102,6 +102,7 @@ data_kind_t fmt2data_kind(mkldnn_memory_format_t fmt) {
     case mkldnn_gOIw8i16o2i:
     case mkldnn_goihw:
     case mkldnn_hwigo:
+    case mkldnn_giohw:
     case mkldnn_hwigo_s8s8:
     case mkldnn_gOIhw8i8o:
     case mkldnn_gOIhw16i16o:
@@ -119,6 +120,7 @@ data_kind_t fmt2data_kind(mkldnn_memory_format_t fmt) {
     case mkldnn_gOhwi16o:
     case mkldnn_Goihw8g:
     case mkldnn_Goihw16g:
+    case mkldnn_Goihw16g_s8s8:
     case mkldnn_gOhIw16o4i:
     case mkldnn_goidhw:
     case mkldnn_gOIdhw16i16o:
@@ -192,18 +194,56 @@ attr_t::post_ops_t::kind_t attr_t::post_ops_t::str2kind(const char *str) {
 #define CASE(_knd) if (!strcasecmp(STRINGIFY(_knd), str)) return _knd
     CASE(SUM);
     CASE(RELU);
+    CASE(TANH);
+    CASE(ELU);
+    CASE(SQUARE);
+    CASE(ABS);
+    CASE(SQRT);
+    CASE(LINEAR);
+    CASE(BRELU);
+    CASE(SRELU);
+    CASE(LOGISTIC);
 #undef CASE
     assert(!"unknown attr::post_ops::kind");
     return KIND_TOTAL;
 }
 
 const char *attr_t::post_ops_t::kind2str(attr_t::post_ops_t::kind_t kind) {
-    if (kind == SUM) return "sum";
-    if (kind == RELU) return "relu";
+#define CASE(_knd, str) if (kind == _knd) return str
+    CASE(SUM, "sum");
+    CASE(RELU, "relu");
+    CASE(TANH, "tanh");
+    CASE(ELU, "elu");
+    CASE(SQUARE, "square");
+    CASE(ABS, "abs");
+    CASE(SQRT, "sqrt");
+    CASE(LINEAR, "linear");
+    CASE(BRELU, "brelu");
+    CASE(SRELU, "srelu");
+    CASE(LOGISTIC, "logistic");
+#undef CASE
     assert(!"unknown attr::post_ops::kind");
     return "unknown attr::post_ops::kind";
 }
 
+mkldnn_alg_kind_t attr_t::post_ops_t::kind2mkldnn_kind(
+        attr_t::post_ops_t::kind_t kind) {
+#define CASE(_knd, _mknd) if (kind == _knd) return _mknd
+    CASE(RELU, mkldnn_eltwise_relu);
+    CASE(TANH, mkldnn_eltwise_tanh);
+    CASE(ELU, mkldnn_eltwise_elu);
+    CASE(SQUARE, mkldnn_eltwise_square);
+    CASE(ABS, mkldnn_eltwise_abs);
+    CASE(SQRT, mkldnn_eltwise_sqrt);
+    CASE(LINEAR, mkldnn_eltwise_linear);
+    CASE(BRELU, mkldnn_eltwise_bounded_relu);
+    CASE(SRELU, mkldnn_eltwise_soft_relu);
+    CASE(LOGISTIC, mkldnn_eltwise_logistic);
+#undef CASE
+    assert(!"unknown attr::post_ops::kind");
+    return mkldnn_alg_kind_undef;
+}
+
 int attr_t::post_ops_t::from_str(const char *str, const char **end_s) {
     *this = post_ops_t();
 
@@ -236,9 +276,26 @@ int attr_t::post_ops_t::from_str(const char *str, const char **end_s) {
                     } else {
                         e.sum.scale = 1.f;
                     }
-                } else if (k == RELU) {
+                } else {
+                    e.eltwise.alg = kind2mkldnn_kind(k);
                     e.eltwise.scale = 1.f;
                     e.eltwise.alpha = e.eltwise.beta = 0.f;
+
+                    for (int i = 0; i < 3; ++i) {
+                        // :alpha:beta:scale
+                        float &val = i == 0 ? e.eltwise.alpha
+                            : i == 1 ? e.eltwise.beta : e.eltwise.scale;
+                        if (*s == ':') {
+                            char *end;
+                            val = strtof(++s, &end);
+                            if (end == s) return FAIL;
+                            s = end;
+                        } else {
+                            break;
+                        }
+                    }
+
+                    if (e.eltwise.scale <= 0) return FAIL;
                 }
 
                 break;
@@ -265,7 +322,18 @@ void attr_t::post_ops_t::to_str(char *buffer, char **end_b) const {
             buffer += sprintf(buffer, "%s:%g", kind2str(e.kind), e.sum.scale);
             break;
         case RELU:
-            buffer += sprintf(buffer, "%s", kind2str(e.kind));
+        case TANH:
+        case ELU:
+        case SQUARE:
+        case ABS:
+        case SQRT:
+        case LINEAR:
+        case BRELU:
+        case SRELU:
+        case LOGISTIC:
+            buffer += sprintf(buffer, "%s:%g", kind2str(e.kind), e.eltwise.alpha);
+            if (e.eltwise.beta != 0.f || e.eltwise.scale != 1.f)
+                buffer += sprintf(buffer, ":%g:%g", e.eltwise.beta, e.eltwise.scale);
             break;
         default:
             assert(!"unknown kind");
@@ -372,9 +440,17 @@ mkldnn_primitive_attr_t create_mkldnn_attr(const attr_t &attr, int scale_cnt,
                 DNN_SAFE_V(mkldnn_post_ops_append_sum(ops, e.sum.scale));
                 break;
             case attr_t::post_ops_t::RELU:
+            case attr_t::post_ops_t::TANH:
+            case attr_t::post_ops_t::ELU:
+            case attr_t::post_ops_t::SQUARE:
+            case attr_t::post_ops_t::ABS:
+            case attr_t::post_ops_t::SQRT:
+            case attr_t::post_ops_t::LINEAR:
+            case attr_t::post_ops_t::BRELU:
+            case attr_t::post_ops_t::SRELU:
+            case attr_t::post_ops_t::LOGISTIC:
                 DNN_SAFE_V(mkldnn_post_ops_append_eltwise(ops, e.eltwise.scale,
-                            mkldnn_eltwise_relu, e.eltwise.alpha,
-                            e.eltwise.beta));
+                            e.eltwise.alg, e.eltwise.alpha, e.eltwise.beta));
                 break;
             default:
                 assert(!"unknown attr::post_ops::kind");
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/dnn_types.hpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/dnn_types.hpp
index 7010c9814..594ac41aa 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/dnn_types.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/dnn_types.hpp
@@ -70,17 +70,19 @@ struct attr_t {
     };
 
     struct post_ops_t {
-        enum kind_t { SUM, RELU, KIND_TOTAL };
+        enum kind_t { SUM, RELU, TANH, ELU, SQUARE, ABS, SQRT, LINEAR, BRELU,
+            SRELU, LOGISTIC, KIND_TOTAL };
         static kind_t str2kind(const char *str);
         static const char *kind2str(kind_t kind);
+        static mkldnn_alg_kind_t kind2mkldnn_kind(kind_t kind);
 
         struct entry_t {
             kind_t kind;
             union {
                 struct { float scale; } sum;
                 struct {
-                    // eltwise algorithm in future
-                    float scale, alpha, beta; // unused now
+                    mkldnn_alg_kind_t alg;
+                    float scale, alpha, beta;
                 } eltwise;
             };
         };
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_auto b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_auto
new file mode 100644
index 000000000..aafdbc699
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_auto
@@ -0,0 +1,2 @@
+mb2_ic3oc64_ih300kh3oh300ph1n"ssd_300_voc0712:conv1_1"
+mb32_ic3oc64_ih300kh3oh300ph1n"ssd_300_voc0712:conv1_1"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_fastrcnn_p1 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_fastrcnn_p1
index 44b9bf7b7..efbf15919 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_fastrcnn_p1
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_fastrcnn_p1
@@ -3,10 +3,10 @@
 
 mb1_g1ic3oc64_ih606oh300kh7sh2dh0ph0_iw756ow375kw7sw2dw0pw0_n"fastrcnn_p1:conv1"
 mb1_g1ic64oc64_ih150oh150kh1sh1dh0ph0_iw188ow188kw1sw1dw0pw0_n"fastrcnn_p1:conv2"
-mb1_g1ic64oc64_ih150oh150kh3sh1dh0ph1_iw188ow188kw3sw1dw0pw1_n"fastrcnn_p1:conv3"
-mb1_g1ic64oc256_ih150oh150kh1sh1dh0ph0_iw188ow188kw1sw1dw0pw0_n"fastrcnn_p1:conv4"
+mb1_g1ic64oc64_ih150oh150kh3sh1dh0ph1_iw188ow188kw3sw1dw0pw1_n"fastrcnn_p1:conv3*2"
+mb1_g1ic64oc256_ih150oh150kh1sh1dh0ph0_iw188ow188kw1sw1dw0pw0_n"fastrcnn_p1:conv4*3"
 # mb1_g1ic64oc256_ih150oh150kh1sh1dh0ph0_iw188ow188kw1sw1dw0pw0_n"fastrcnn_p1:conv4"
-mb1_g1ic256oc64_ih150oh150kh1sh1dh0ph0_iw188ow188kw1sw1dw0pw0_n"fastrcnn_p1:conv5"
+mb1_g1ic256oc64_ih150oh150kh1sh1dh0ph0_iw188ow188kw1sw1dw0pw0_n"fastrcnn_p1:conv5*2"
 # mb1_g1ic64oc64_ih150oh150kh3sh1dh0ph1_iw188ow188kw3sw1dw0pw1_n"fastrcnn_p1:conv3"
 # mb1_g1ic64oc256_ih150oh150kh1sh1dh0ph0_iw188ow188kw1sw1dw0pw0_n"fastrcnn_p1:conv4"
 # mb1_g1ic256oc64_ih150oh150kh1sh1dh0ph0_iw188ow188kw1sw1dw0pw0_n"fastrcnn_p1:conv5"
@@ -14,9 +14,9 @@ mb1_g1ic64oc64_ih152oh75kh3sh2dh0ph0_iw190ow94kw3sw2dw0pw0_n"fastrcnn_p1:conv6"
 mb1_g1ic64oc256_ih75oh75kh1sh1dh0ph0_iw94ow94kw1sw1dw0pw0_n"fastrcnn_p1:conv7"
 mb1_g1ic256oc128_ih75oh75kh1sh1dh0ph0_iw94ow94kw1sw1dw0pw0_n"fastrcnn_p1:conv8"
 mb1_g1ic256oc512_ih75oh75kh1sh1dh0ph0_iw94ow94kw1sw1dw0pw0_n"fastrcnn_p1:conv9"
-mb1_g1ic128oc128_ih75oh75kh3sh1dh0ph1_iw94ow94kw3sw1dw0pw1_n"fastrcnn_p1:conv10"
-mb1_g1ic128oc512_ih75oh75kh1sh1dh0ph0_iw94ow94kw1sw1dw0pw0_n"fastrcnn_p1:conv11"
-mb1_g1ic512oc128_ih75oh75kh1sh1dh0ph0_iw94ow94kw1sw1dw0pw0_n"fastrcnn_p1:conv12"
+mb1_g1ic128oc128_ih75oh75kh3sh1dh0ph1_iw94ow94kw3sw1dw0pw1_n"fastrcnn_p1:conv10*3"
+mb1_g1ic128oc512_ih75oh75kh1sh1dh0ph0_iw94ow94kw1sw1dw0pw0_n"fastrcnn_p1:conv11*3"
+mb1_g1ic512oc128_ih75oh75kh1sh1dh0ph0_iw94ow94kw1sw1dw0pw0_n"fastrcnn_p1:conv12*3"
 # mb1_g1ic128oc128_ih75oh75kh3sh1dh0ph1_iw94ow94kw3sw1dw0pw1_n"fastrcnn_p1:conv10"
 # mb1_g1ic128oc512_ih75oh75kh1sh1dh0ph0_iw94ow94kw1sw1dw0pw0_n"fastrcnn_p1:conv11"
 # mb1_g1ic512oc128_ih75oh75kh1sh1dh0ph0_iw94ow94kw1sw1dw0pw0_n"fastrcnn_p1:conv12"
@@ -26,10 +26,10 @@ mb1_g1ic512oc128_ih75oh75kh1sh1dh0ph0_iw94ow94kw1sw1dw0pw0_n"fastrcnn_p1:conv12"
 mb1_g1ic128oc128_ih77oh38kh3sh2dh0ph0_iw96ow47kw3sw2dw0pw0_n"fastrcnn_p1:conv13"
 mb1_g1ic128oc512_ih38oh38kh1sh1dh0ph0_iw47ow47kw1sw1dw0pw0_n"fastrcnn_p1:conv14"
 mb1_g1ic512oc256_ih38oh38kh1sh1dh0ph0_iw47ow47kw1sw1dw0pw0_n"fastrcnn_p1:conv15"
-mb1_g1ic256oc256_ih38oh38kh3sh1dh0ph1_iw47ow47kw3sw1dw0pw1_n"fastrcnn_p1:conv16"
-mb1_g1ic256oc1024_ih38oh38kh1sh1dh0ph0_iw47ow47kw1sw1dw0pw0_n"fastrcnn_p1:conv17"
+mb1_g1ic256oc256_ih38oh38kh3sh1dh0ph1_iw47ow47kw3sw1dw0pw1_n"fastrcnn_p1:conv16*6"
+mb1_g1ic256oc1024_ih38oh38kh1sh1dh0ph0_iw47ow47kw1sw1dw0pw0_n"fastrcnn_p1:conv17*6"
 mb1_g1ic512oc1024_ih38oh38kh1sh1dh0ph0_iw47ow47kw1sw1dw0pw0_n"fastrcnn_p1:conv18"
-mb1_g1ic1024oc256_ih38oh38kh1sh1dh0ph0_iw47ow47kw1sw1dw0pw0_n"fastrcnn_p1:conv19"
+mb1_g1ic1024oc256_ih38oh38kh1sh1dh0ph0_iw47ow47kw1sw1dw0pw0_n"fastrcnn_p1:conv19*5"
 # mb1_g1ic256oc256_ih38oh38kh3sh1dh0ph1_iw47ow47kw3sw1dw0pw1_n"fastrcnn_p1:conv16"
 # mb1_g1ic256oc1024_ih38oh38kh1sh1dh0ph0_iw47ow47kw1sw1dw0pw0_n"fastrcnn_p1:conv17"
 # mb1_g1ic1024oc256_ih38oh38kh1sh1dh0ph0_iw47ow47kw1sw1dw0pw0_n"fastrcnn_p1:conv19"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_fastrcnn_p2 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_fastrcnn_p2
index 758d70ec7..d2cc2ebe6 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_fastrcnn_p2
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_fastrcnn_p2
@@ -1,10 +1,10 @@
 # FastRCNN part 2
 
-mb64_g1ic1024oc512_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"fastrcnn_p2:conv1"
-mb64_g1ic512oc512_ih7oh7kh3sh1dh0ph1_iw7ow7kw3sw1dw0pw1_n"fastrcnn_p2:conv2"
-mb64_g1ic512oc2048_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"fastrcnn_p2:conv3"
-mb64_g1ic1024oc2048_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"fastrcnn_p2:conv4"
-mb64_g1ic2048oc512_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"fastrcnn_p2:conv5"
+mb64_g1ic1024oc512_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"fastrcnn_p2:conv1*3"
+mb64_g1ic512oc512_ih7oh7kh3sh1dh0ph1_iw7ow7kw3sw1dw0pw1_n"fastrcnn_p2:conv2*9"
+mb64_g1ic512oc2048_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"fastrcnn_p2:conv3*9"
+mb64_g1ic1024oc2048_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"fastrcnn_p2:conv4*3"
+mb64_g1ic2048oc512_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"fastrcnn_p2:conv5*6"
 # mb64_g1ic512oc512_ih7oh7kh3sh1dh0ph1_iw7ow7kw3sw1dw0pw1_n"fastrcnn_p2:conv2"
 # mb64_g1ic512oc2048_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"fastrcnn_p2:conv3"
 # mb64_g1ic2048oc512_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"fastrcnn_p2:conv5"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v1 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v1
index ec08dcc53..248148de3 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v1
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v1
@@ -9,7 +9,7 @@ mb96ic96ih28oc128oh28kh3ph1n"googlenet_v1:inception_3a/3x3"
 mb96ic192ih28oc16oh28kh1ph0n"googlenet_v1:inception_3a/5x5_reduce"
 mb96ic16ih28oc32oh28kh5ph2n"googlenet_v1:inception_3a/5x5"
 mb96ic192ih28oc32oh28kh1ph0n"googlenet_v1:inception_3a/pool_proj"
-mb96ic256ih28oc128oh28kh1ph0n"googlenet_v1:inception_3b/1x1"
+mb96ic256ih28oc128oh28kh1ph0n"googlenet_v1:inception_3b/1x1*2"
 # mb96ic256ih28oc128oh28kh1ph0n"googlenet_v1:inception_3b/3x3_reduce"    # inception_3b/1x1
 mb96ic128ih28oc192oh28kh3ph1n"googlenet_v1:inception_3b/3x3"
 mb96ic256ih28oc32oh28kh1ph0n"googlenet_v1:inception_3b/5x5_reduce"
@@ -23,12 +23,12 @@ mb96ic16ih14oc48oh14kh5ph2n"googlenet_v1:inception_4a/5x5"
 mb96ic480ih14oc64oh14kh1ph0n"googlenet_v1:inception_4a/pool_proj"
 mb96ic512ih4oc128oh4kh1ph0n"googlenet_v1:loss1/conv"
 mb96ic512ih14oc160oh14kh1ph0n"googlenet_v1:inception_4b/1x1"
-mb96ic512ih14oc112oh14kh1ph0n"googlenet_v1:inception_4b/3x3_reduce"
+mb96ic512ih14oc112oh14kh1ph0n"googlenet_v1:inception_4b/3x3_reduce*2"
 mb96ic112ih14oc224oh14kh3ph1n"googlenet_v1:inception_4b/3x3"
-mb96ic512ih14oc24oh14kh1ph0n"googlenet_v1:inception_4b/5x5_reduce"
-mb96ic24ih14oc64oh14kh5ph2n"googlenet_v1:inception_4b/5x5"
-mb96ic512ih14oc64oh14kh1ph0n"googlenet_v1:inception_4b/pool_proj"
-mb96ic512ih14oc128oh14kh1ph0n"googlenet_v1:inception_4c/1x1"
+mb96ic512ih14oc24oh14kh1ph0n"googlenet_v1:inception_4b/5x5_reduce*2"
+mb96ic24ih14oc64oh14kh5ph2n"googlenet_v1:inception_4b/5x5*2"
+mb96ic512ih14oc64oh14kh1ph0n"googlenet_v1:inception_4b/pool_proj*3"
+mb96ic512ih14oc128oh14kh1ph0n"googlenet_v1:inception_4c/1x1*2"
 # mb96ic512ih14oc128oh14kh1ph0n"googlenet_v1:inception_4c/3x3_reduce"   # inception_4c/1x1
 mb96ic128ih14oc256oh14kh3ph1n"googlenet_v1:inception_4c/3x3"
 # mb96ic512ih14oc24oh14kh1ph0n"googlenet_v1:inception_4c/5x5_reduce"    # inception_4b/5x5_reduce
@@ -52,7 +52,7 @@ mb96ic832ih7oc160oh7kh1ph0n"googlenet_v1:inception_5a/3x3_reduce"
 mb96ic160ih7oc320oh7kh3ph1n"googlenet_v1:inception_5a/3x3"
 mb96ic832ih7oc32oh7kh1ph0n"googlenet_v1:inception_5a/5x5_reduce"
 mb96ic32ih7oc128oh7kh5ph2n"googlenet_v1:inception_5a/5x5"
-mb96ic832ih7oc128oh7kh1ph0n"googlenet_v1:inception_5a/pool_proj"
+mb96ic832ih7oc128oh7kh1ph0n"googlenet_v1:inception_5a/pool_proj*2"
 mb96ic832ih7oc384oh7kh1ph0n"googlenet_v1:inception_5b/1x1"
 mb96ic832ih7oc192oh7kh1ph0n"googlenet_v1:inception_5b/3x3_reduce"
 mb96ic192ih7oc384oh7kh3ph1n"googlenet_v1:inception_5b/3x3"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v2 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v2
index 835970ecb..caf100a0a 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v2
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v2
@@ -3,14 +3,14 @@
 g1mb96ic3ih224iw224oc64oh112ow112kh7kw7sh2sw2ph3pw3n"googlenet_v2:conv1/7x7_s2"
 mb96ic64ih56oc64oh56kh1ph0n"googlenet_v2:conv2/3x3_reduce"
 mb96ic64ih56oc192oh56kh3ph1n"googlenet_v2:conv2/3x3"
-mb96ic192ih28oc64oh28kh1ph0n"googlenet_v2:inception_3a/1x1"
+mb96ic192ih28oc64oh28kh1ph0n"googlenet_v2:inception_3a/1x1*3"
 # mb96ic192ih28oc64oh28kh1ph0n"googlenet_v2:inception_3a/3x3_reduce"        # inception_3a/1x1
 mb96ic64ih28oc64oh28kh3ph1n"googlenet_v2:inception_3a/3x3"
 # mb96ic192ih28oc64oh28kh1ph0n"googlenet_v2:inception_3a/double3x3_reduce"  # inception_3a/1x1
-mb96ic64ih28oc96oh28kh3ph1n"googlenet_v2:inception_3a/double3x3a"
-mb96ic96ih28oc96oh28kh3ph1n"googlenet_v2:inception_3a/double3x3b"
+mb96ic64ih28oc96oh28kh3ph1n"googlenet_v2:inception_3a/double3x3a*4"
+mb96ic96ih28oc96oh28kh3ph1n"googlenet_v2:inception_3a/double3x3b*2"
 mb96ic192ih28oc32oh28kh1ph0n"googlenet_v2:inception_3a/pool_proj"
-mb96ic256ih28oc64oh28kh1ph0n"googlenet_v2:inception_3b/1x1"
+mb96ic256ih28oc64oh28kh1ph0n"googlenet_v2:inception_3b/1x1*4"
 # mb96ic256ih28oc64oh28kh1ph0n"googlenet_v2:inception_3b/3x3_reduce"        # inception_3b/1x1
 # mb96ic64ih28oc96oh28kh3ph1n"googlenet_v2:inception_3b/3x3"                # inception_3a/double3x3a
 # mb96ic256ih28oc64oh28kh1ph0n"googlenet_v2:inception_3b/double3x3_reduce"  # inception_3b/1x1
@@ -26,20 +26,20 @@ mb96ic576ih4oc128oh4kh1ph0n"googlenet_v2:loss1/conv"
 mb96ic576ih14oc224oh14kh1ph0n"googlenet_v2:inception_4a/1x1"
 mb96ic576ih14oc64oh14kh1ph0n"googlenet_v2:inception_4a/3x3_reduce"
 mb96ic64ih14oc96oh14kh3ph1n"googlenet_v2:inception_4a/3x3"
-mb96ic576ih14oc96oh14kh1ph0n"googlenet_v2:inception_4a/double3x3_reduce"
-mb96ic96ih14oc128oh14kh3ph1n"googlenet_v2:inception_4a/double3x3a"
-mb96ic128ih14oc128oh14kh3ph1n"googlenet_v2:inception_4a/double3x3b"
-mb96ic576ih14oc128oh14kh1ph0n"googlenet_v2:inception_4a/pool_proj"
-mb96ic576ih14oc192oh14kh1ph0n"googlenet_v2:inception_4b/1x1"
+mb96ic576ih14oc96oh14kh1ph0n"googlenet_v2:inception_4a/double3x3_reduce*6"
+mb96ic96ih14oc128oh14kh3ph1n"googlenet_v2:inception_4a/double3x3a*3"
+mb96ic128ih14oc128oh14kh3ph1n"googlenet_v2:inception_4a/double3x3b*2"
+mb96ic576ih14oc128oh14kh1ph0n"googlenet_v2:inception_4a/pool_proj*6"
+mb96ic576ih14oc192oh14kh1ph0n"googlenet_v2:inception_4b/1x1*2"
 # mb96ic576ih14oc96oh14kh1ph0n"googlenet_v2:inception_4b/3x3_reduce"        # inception_4a/double3x3_reduce
 # mb96ic96ih14oc128oh14kh3ph1n"googlenet_v2:inception_4b/3x3"               # inception_4a/double3x3a
 # mb96ic576ih14oc96oh14kh1ph0n"googlenet_v2:inception_4b/double3x3_reduce"  # inception_4a/double3x3_reduce
 # mb96ic96ih14oc128oh14kh3ph1n"googlenet_v2:inception_4b/double3x3a"        # inception_4a/double3x3a
 # mb96ic128ih14oc128oh14kh3ph1n"googlenet_v2:inception_4b/double3x3b"       # inception_4a/double3x3b
 # mb96ic576ih14oc128oh14kh1ph0n"googlenet_v2:inception_4b/pool_proj"        # inception_4a/pool_proj
-mb96ic576ih14oc160oh14kh1ph0n"googlenet_v2:inception_4c/1x1"
+mb96ic576ih14oc160oh14kh1ph0n"googlenet_v2:inception_4c/1x1*2"
 # mb96ic576ih14oc128oh14kh1ph0n"googlenet_v2:inception_4c/3x3_reduce"       # inception_4a/pool_proj
-mb96ic128ih14oc160oh14kh3ph1n"googlenet_v2:inception_4c/3x3"
+mb96ic128ih14oc160oh14kh3ph1n"googlenet_v2:inception_4c/3x3*2"
 # mb96ic576ih14oc128oh14kh1ph0n"googlenet_v2:inception_4c/double3x3_reduce" # inception_4a/pool_proj
 # mb96ic128ih14oc160oh14kh3ph1n"googlenet_v2:inception_4c/double3x3a"       # inception_4c/3x3
 mb96ic160ih14oc160oh14kh3ph1n"googlenet_v2:inception_4c/double3x3b"
@@ -57,13 +57,13 @@ g1mb96ic128ih14iw14oc192oh7ow7kh3kw3sh2sw2ph1pw1n"googlenet_v2:inception_4e/3x3"
 mb96ic192ih14oc256oh14kh3ph1n"googlenet_v2:inception_4e/double3x3a"
 g1mb96ic256ih14iw14oc256oh7ow7kh3kw3sh2sw2ph1pw1n"googlenet_v2:inception_4e/double3x3b"
 mb96ic1024ih2oc128oh2kh1ph0n"googlenet_v2:loss2/conv"
-mb96ic1024ih7oc352oh7kh1ph0n"googlenet_v2:inception_5a/1x1"
-mb96ic1024ih7oc192oh7kh1ph0n"googlenet_v2:inception_5a/3x3_reduce"
-mb96ic192ih7oc320oh7kh3ph1n"googlenet_v2:inception_5a/3x3"
+mb96ic1024ih7oc352oh7kh1ph0n"googlenet_v2:inception_5a/1x1*2"
+mb96ic1024ih7oc192oh7kh1ph0n"googlenet_v2:inception_5a/3x3_reduce*3"
+mb96ic192ih7oc320oh7kh3ph1n"googlenet_v2:inception_5a/3x3*2"
 mb96ic1024ih7oc160oh7kh1ph0n"googlenet_v2:inception_5a/double3x3_reduce"
 mb96ic160ih7oc224oh7kh3ph1n"googlenet_v2:inception_5a/double3x3a"
-mb96ic224ih7oc224oh7kh3ph1n"googlenet_v2:inception_5a/double3x3b"
-mb96ic1024ih7oc128oh7kh1ph0n"googlenet_v2:inception_5a/pool_proj"
+mb96ic224ih7oc224oh7kh3ph1n"googlenet_v2:inception_5a/double3x3b*2"
+mb96ic1024ih7oc128oh7kh1ph0n"googlenet_v2:inception_5a/pool_proj*2"
 # mb96ic1024ih7oc352oh7kh1ph0n"googlenet_v2:inception_5b/1x1"               # inception_5a/1x1
 # mb96ic1024ih7oc192oh7kh1ph0n"googlenet_v2:inception_5b/3x3_reduce"        # inception_5a/3x3_reduce
 # mb96ic192ih7oc320oh7kh3ph1n"googlenet_v2:inception_5b/3x3"                # inception_5a/3x3
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v3 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v3
index f300f7703..f71086d4b 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v3
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_googlenet_v3
@@ -4,21 +4,21 @@ g1mb22ic32ih149iw149oc32oh147ow147kh3kw3ph0pw0sh1sw1n"googlenet_v3:conv_1_1_conv
 g1mb22ic32ih147iw147oc64oh147ow147kh3kw3ph1pw1sh1sw1n"googlenet_v3:conv_2_2_conv2d"
 g1mb22ic64ih73iw73oc80oh73ow73kh1kw1ph0pw0sh1sw1n"googlenet_v3:conv_3_3_conv2d"
 g1mb22ic80ih73iw73oc192oh71ow71kh3kw3ph0pw0sh1sw1n"googlenet_v3:conv_4_4_conv2d"
-g1mb22ic192ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_conv_conv2d"
+g1mb22ic192ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_conv_conv2d*2"
 g1mb22ic192ih35iw35oc48oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_tower_conv_conv2d"
-g1mb22ic48ih35iw35oc64oh35ow35kh5kw5ph2pw2sh1sw1n"googlenet_v3:mixed_tower_conv_1_conv2d"
+g1mb22ic48ih35iw35oc64oh35ow35kh5kw5ph2pw2sh1sw1n"googlenet_v3:mixed_tower_conv_1_conv2d*3"
 # g1mb22ic192ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_tower_1_conv_conv2d"
-g1mb22ic64ih35iw35oc96oh35ow35kh3kw3ph1pw1sh1sw1n"googlenet_v3:mixed_tower_1_conv_1_conv2d"
-g1mb22ic96ih35iw35oc96oh35ow35kh3kw3ph1pw1sh1sw1n"googlenet_v3:mixed_tower_1_conv_2_conv2d"
+g1mb22ic64ih35iw35oc96oh35ow35kh3kw3ph1pw1sh1sw1n"googlenet_v3:mixed_tower_1_conv_1_conv2d*4"
+g1mb22ic96ih35iw35oc96oh35ow35kh3kw3ph1pw1sh1sw1n"googlenet_v3:mixed_tower_1_conv_2_conv2d*3"
 g1mb22ic192ih35iw35oc32oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_tower_2_conv_conv2d"
-g1mb22ic256ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_1_conv_conv2d"
+g1mb22ic256ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_1_conv_conv2d*3"
 g1mb22ic256ih35iw35oc48oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_1_tower_conv_conv2d"
 # g1mb22ic48ih35iw35oc64oh35ow35kh5kw5ph2pw2sh1sw1n"googlenet_v3:mixed_1_tower_conv_1_conv2d"
 # g1mb22ic256ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_1_tower_1_conv_conv2d"
 # g1mb22ic64ih35iw35oc96oh35ow35kh3kw3ph1pw1sh1sw1n"googlenet_v3:mixed_1_tower_1_conv_1_conv2d"
 # g1mb22ic96ih35iw35oc96oh35ow35kh3kw3ph1pw1sh1sw1n"googlenet_v3:mixed_1_tower_1_conv_2_conv2d"
 # g1mb22ic256ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_1_tower_2_conv_conv2d"
-g1mb22ic288ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_2_conv_conv2d"
+g1mb22ic288ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_2_conv_conv2d*4"
 g1mb22ic288ih35iw35oc48oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_2_tower_conv_conv2d"
 # g1mb22ic48ih35iw35oc64oh35ow35kh5kw5ph2pw2sh1sw1n"googlenet_v3:mixed_2_tower_conv_1_conv2d"
 # g1mb22ic288ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_2_tower_1_conv_conv2d"
@@ -29,25 +29,25 @@ g1mb22ic288ih35iw35oc384oh17ow17kh3kw3ph0pw0sh2sw2n"googlenet_v3:mixed_3_conv_co
 # g1mb22ic288ih35iw35oc64oh35ow35kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_3_tower_conv_conv2d"
 # g1mb22ic64ih35iw35oc96oh35ow35kh3kw3ph1pw1sh1sw1n"googlenet_v3:mixed_3_tower_conv_1_conv2d"
 g1mb22ic96ih35iw35oc96oh17ow17kh3kw3ph0pw0sh2sw2n"googlenet_v3:mixed_3_tower_conv_2_conv2d"
-g1mb22ic768ih17iw17oc192oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_4_conv_conv2d"
-g1mb22ic768ih17iw17oc128oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_4_tower_conv_conv2d"
-g1mb22ic128ih17iw17oc128oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_4_tower_conv_1_conv2d"
+g1mb22ic768ih17iw17oc192oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_4_conv_conv2d*12"
+g1mb22ic768ih17iw17oc128oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_4_tower_conv_conv2d*2"
+g1mb22ic128ih17iw17oc128oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_4_tower_conv_1_conv2d*2"
 g1mb22ic128ih17iw17oc192oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_4_tower_conv_2_conv2d"
 # g1mb22ic768ih17iw17oc128oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_4_tower_1_conv_conv2d"
-g1mb22ic128ih17iw17oc128oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_4_tower_1_conv_1_conv2d"
+g1mb22ic128ih17iw17oc128oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_4_tower_1_conv_1_conv2d*2"
 # g1mb22ic128ih17iw17oc128oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_4_tower_1_conv_2_conv2d"
 # g1mb22ic128ih17iw17oc128oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_4_tower_1_conv_3_conv2d"
 g1mb22ic128ih17iw17oc192oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_4_tower_1_conv_4_conv2d"
 # g1mb22ic768ih17iw17oc192oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_4_tower_2_conv_conv2d"
 # g1mb22ic768ih17iw17oc192oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_5_conv_conv2d"
-g1mb22ic768ih17iw17oc160oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_5_tower_conv_conv2d"
-g1mb22ic160ih17iw17oc160oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_5_tower_conv_1_conv2d"
-g1mb22ic160ih17iw17oc192oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_5_tower_conv_2_conv2d"
+g1mb22ic768ih17iw17oc160oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_5_tower_conv_conv2d*4"
+g1mb22ic160ih17iw17oc160oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_5_tower_conv_1_conv2d*4"
+g1mb22ic160ih17iw17oc192oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_5_tower_conv_2_conv2d*2"
 # g1mb22ic768ih17iw17oc160oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_5_tower_1_conv_conv2d"
-g1mb22ic160ih17iw17oc160oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_5_tower_1_conv_1_conv2d"
+g1mb22ic160ih17iw17oc160oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_5_tower_1_conv_1_conv2d*4"
 # g1mb22ic160ih17iw17oc160oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_5_tower_1_conv_2_conv2d"
 # g1mb22ic160ih17iw17oc160oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_5_tower_1_conv_3_conv2d"
-g1mb22ic160ih17iw17oc192oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_5_tower_1_conv_4_conv2d"
+g1mb22ic160ih17iw17oc192oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_5_tower_1_conv_4_conv2d*2"
 # g1mb22ic768ih17iw17oc192oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_5_tower_2_conv_conv2d"
 # g1mb22ic768ih17iw17oc192oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_6_conv_conv2d"
 # g1mb22ic768ih17iw17oc160oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_6_tower_conv_conv2d"
@@ -61,8 +61,8 @@ g1mb22ic160ih17iw17oc192oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_5_tower_1
 # g1mb22ic768ih17iw17oc192oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_6_tower_2_conv_conv2d"
 # g1mb22ic768ih17iw17oc192oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_7_conv_conv2d"
 # g1mb22ic768ih17iw17oc192oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_7_tower_conv_conv2d"
-g1mb22ic192ih17iw17oc192oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_7_tower_conv_1_conv2d"
-g1mb22ic192ih17iw17oc192oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_7_tower_conv_2_conv2d"
+g1mb22ic192ih17iw17oc192oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_7_tower_conv_1_conv2d*4"
+g1mb22ic192ih17iw17oc192oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_7_tower_conv_2_conv2d*4"
 # g1mb22ic768ih17iw17oc192oh17ow17kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_7_tower_1_conv_conv2d"
 # g1mb22ic192ih17iw17oc192oh17ow17kh7kw1ph3pw0sh1sw1n"googlenet_v3:mixed_7_tower_1_conv_1_conv2d"
 # g1mb22ic192ih17iw17oc192oh17ow17kh1kw7ph0pw3sh1sw1n"googlenet_v3:mixed_7_tower_1_conv_2_conv2d"
@@ -77,10 +77,10 @@ g1mb22ic192ih17iw17oc320oh8ow8kh3kw3ph0pw0sh2sw2n"googlenet_v3:mixed_8_tower_con
 g1mb22ic192ih17iw17oc192oh8ow8kh3kw3ph0pw0sh2sw2n"googlenet_v3:mixed_8_tower_1_conv_3_conv2d"
 g1mb22ic1280ih8iw8oc320oh8ow8kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_9_conv_conv2d"
 g1mb22ic1280ih8iw8oc384oh8ow8kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_9_tower_conv_conv2d"
-g1mb22ic384ih8iw8oc384oh8ow8kh1kw3ph0pw1sh1sw1n"googlenet_v3:mixed_9_tower_mixed_conv_conv2d"
-g1mb22ic384ih8iw8oc384oh8ow8kh3kw1ph1pw0sh1sw1n"googlenet_v3:mixed_9_tower_mixed_conv_1_conv2d"
+g1mb22ic384ih8iw8oc384oh8ow8kh1kw3ph0pw1sh1sw1n"googlenet_v3:mixed_9_tower_mixed_conv_conv2d*4"
+g1mb22ic384ih8iw8oc384oh8ow8kh3kw1ph1pw0sh1sw1n"googlenet_v3:mixed_9_tower_mixed_conv_1_conv2d*4"
 g1mb22ic1280ih8iw8oc448oh8ow8kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_9_tower_1_conv_conv2d"
-g1mb22ic448ih8iw8oc384oh8ow8kh3kw3ph1pw1sh1sw1n"googlenet_v3:mixed_9_tower_1_conv_1_conv2d"
+g1mb22ic448ih8iw8oc384oh8ow8kh3kw3ph1pw1sh1sw1n"googlenet_v3:mixed_9_tower_1_conv_1_conv2d*2"
 # g1mb22ic384ih8iw8oc384oh8ow8kh1kw3ph0pw1sh1sw1n"googlenet_v3:mixed_9_tower_1_mixed_conv_conv2d"
 # g1mb22ic384ih8iw8oc384oh8ow8kh3kw1ph1pw0sh1sw1n"googlenet_v3:mixed_9_tower_1_mixed_conv_1_conv2d"
 g1mb22ic1280ih8iw8oc192oh8ow8kh1kw1ph0pw0sh1sw1n"googlenet_v3:mixed_9_tower_2_conv_conv2d"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_maskrcnn_p1 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_maskrcnn_p1
index f7e81f75e..5c89307e6 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_maskrcnn_p1
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_maskrcnn_p1
@@ -1,11 +1,11 @@
 # MaskRCNN part 1
 
 mb1_g1ic3oc64_ih1030oh512kh7sh2dh0ph0_iw1030ow512kw7sw2dw0pw0_n"masknet_p1:conv1"
-mb1_g1ic64oc256_ih256oh256kh1sh1dh0ph0_iw256ow256kw1sw1dw0pw0_n"masknet_p1:conv2"
+mb1_g1ic64oc256_ih256oh256kh1sh1dh0ph0_iw256ow256kw1sw1dw0pw0_n"masknet_p1:conv2*4"
 mb1_g1ic64oc64_ih256oh256kh1sh1dh0ph0_iw256ow256kw1sw1dw0pw0_n"masknet_p1:conv3"
-mb1_g1ic64oc64_ih256oh256kh3sh1dh0ph1_iw256ow256kw3sw1dw0pw1_n"masknet_p1:conv4"
+mb1_g1ic64oc64_ih256oh256kh3sh1dh0ph1_iw256ow256kw3sw1dw0pw1_n"masknet_p1:conv4*3"
 # mb1_g1ic64oc256_ih256oh256kh1sh1dh0ph0_iw256ow256kw1sw1dw0pw0_n"masknet_p1:conv2"
-mb1_g1ic256oc64_ih256oh256kh1sh1dh0ph0_iw256ow256kw1sw1dw0pw0_n"masknet_p1:conv5"
+mb1_g1ic256oc64_ih256oh256kh1sh1dh0ph0_iw256ow256kw1sw1dw0pw0_n"masknet_p1:conv5*2"
 # mb1_g1ic64oc64_ih256oh256kh3sh1dh0ph1_iw256ow256kw3sw1dw0pw1_n"masknet_p1:conv4"
 # mb1_g1ic64oc256_ih256oh256kh1sh1dh0ph0_iw256ow256kw1sw1dw0pw0_n"masknet_p1:conv2"
 # mb1_g1ic256oc64_ih256oh256kh1sh1dh0ph0_iw256ow256kw1sw1dw0pw0_n"masknet_p1:conv5"
@@ -14,9 +14,9 @@ mb1_g1ic256oc64_ih256oh256kh1sh1dh0ph0_iw256ow256kw1sw1dw0pw0_n"masknet_p1:conv5
 mb1_g1ic256oc128_ih256oh128kh1sh2dh0ph0_iw256ow128kw1sw2dw0pw0_n"masknet_p1:conv6"
 mb1_g1ic256oc256_ih256oh256kh1sh1dh0ph0_iw256ow256kw1sw1dw0pw0_n"masknet_p1:conv7"
 mb1_g1ic256oc512_ih256oh128kh1sh2dh0ph0_iw256ow128kw1sw2dw0pw0_n"masknet_p1:conv8"
-mb1_g1ic128oc128_ih128oh128kh3sh1dh0ph1_iw128ow128kw3sw1dw0pw1_n"masknet_p1:conv9"
-mb1_g1ic128oc512_ih128oh128kh1sh1dh0ph0_iw128ow128kw1sw1dw0pw0_n"masknet_p1:conv10"
-mb1_g1ic512oc128_ih128oh128kh1sh1dh0ph0_iw128ow128kw1sw1dw0pw0_n"masknet_p1:conv11"
+mb1_g1ic128oc128_ih128oh128kh3sh1dh0ph1_iw128ow128kw3sw1dw0pw1_n"masknet_p1:conv9*4"
+mb1_g1ic128oc512_ih128oh128kh1sh1dh0ph0_iw128ow128kw1sw1dw0pw0_n"masknet_p1:conv10*4"
+mb1_g1ic512oc128_ih128oh128kh1sh1dh0ph0_iw128ow128kw1sw1dw0pw0_n"masknet_p1:conv11*3"
 # mb1_g1ic128oc128_ih128oh128kh3sh1dh0ph1_iw128ow128kw3sw1dw0pw1_n"masknet_p1:conv9"
 # mb1_g1ic128oc512_ih128oh128kh1sh1dh0ph0_iw128ow128kw1sw1dw0pw0_n"masknet_p1:conv10"
 # mb1_g1ic512oc128_ih128oh128kh1sh1dh0ph0_iw128ow128kw1sw1dw0pw0_n"masknet_p1:conv11"
@@ -26,11 +26,11 @@ mb1_g1ic512oc128_ih128oh128kh1sh1dh0ph0_iw128ow128kw1sw1dw0pw0_n"masknet_p1:conv
 # mb1_g1ic128oc128_ih128oh128kh3sh1dh0ph1_iw128ow128kw3sw1dw0pw1_n"masknet_p1:conv9"
 # mb1_g1ic128oc512_ih128oh128kh1sh1dh0ph0_iw128ow128kw1sw1dw0pw0_n"masknet_p1:conv10"
 mb1_g1ic512oc256_ih128oh64kh1sh2dh0ph0_iw128ow64kw1sw2dw0pw0_n"masknet_p1:conv12"
-mb1_g1ic256oc256_ih64oh64kh3sh1dh0ph1_iw64ow64kw3sw1dw0pw1_n"masknet_p1:conv13"
-mb1_g1ic256oc1024_ih64oh64kh1sh1dh0ph0_iw64ow64kw1sw1dw0pw0_n"masknet_p1:conv14"
+mb1_g1ic256oc256_ih64oh64kh3sh1dh0ph1_iw64ow64kw3sw1dw0pw1_n"masknet_p1:conv13*24"
+mb1_g1ic256oc1024_ih64oh64kh1sh1dh0ph0_iw64ow64kw1sw1dw0pw0_n"masknet_p1:conv14*23"
 mb1_g1ic512oc256_ih128oh128kh1sh1dh0ph0_iw128ow128kw1sw1dw0pw0_n"masknet_p1:conv15"
 mb1_g1ic512oc1024_ih128oh64kh1sh2dh0ph0_iw128ow64kw1sw2dw0pw0_n"masknet_p1:conv16"
-mb1_g1ic1024oc256_ih64oh64kh1sh1dh0ph0_iw64ow64kw1sw1dw0pw0_n"masknet_p1:conv17"
+mb1_g1ic1024oc256_ih64oh64kh1sh1dh0ph0_iw64ow64kw1sw1dw0pw0_n"masknet_p1:conv17*23"
 # mb1_g1ic256oc256_ih64oh64kh3sh1dh0ph1_iw64ow64kw3sw1dw0pw1_n"masknet_p1:conv13"
 # mb1_g1ic256oc1024_ih64oh64kh1sh1dh0ph0_iw64ow64kw1sw1dw0pw0_n"masknet_p1:conv14"
 # mb1_g1ic1024oc256_ih64oh64kh1sh1dh0ph0_iw64ow64kw1sw1dw0pw0_n"masknet_p1:conv17"
@@ -98,9 +98,9 @@ mb1_g1ic1024oc256_ih64oh64kh1sh1dh0ph0_iw64ow64kw1sw1dw0pw0_n"masknet_p1:conv17"
 # mb1_g1ic256oc1024_ih64oh64kh1sh1dh0ph0_iw64ow64kw1sw1dw0pw0_n"masknet_p1:conv14"
 mb1_g1ic1024oc2048_ih64oh32kh1sh2dh0ph0_iw64ow32kw1sw2dw0pw0_n"masknet_p1:conv18"
 mb1_g1ic1024oc512_ih64oh32kh1sh2dh0ph0_iw64ow32kw1sw2dw0pw0_n"masknet_p1:conv19"
-mb1_g1ic512oc512_ih32oh32kh3sh1dh0ph1_iw32ow32kw3sw1dw0pw1_n"masknet_p1:conv20"
-mb1_g1ic512oc2048_ih32oh32kh1sh1dh0ph0_iw32ow32kw1sw1dw0pw0_n"masknet_p1:conv21"
-mb1_g1ic2048oc512_ih32oh32kh1sh1dh0ph0_iw32ow32kw1sw1dw0pw0_n"masknet_p1:conv22"
+mb1_g1ic512oc512_ih32oh32kh3sh1dh0ph1_iw32ow32kw3sw1dw0pw1_n"masknet_p1:conv20*3"
+mb1_g1ic512oc2048_ih32oh32kh1sh1dh0ph0_iw32ow32kw1sw1dw0pw0_n"masknet_p1:conv21*3"
+mb1_g1ic2048oc512_ih32oh32kh1sh1dh0ph0_iw32ow32kw1sw1dw0pw0_n"masknet_p1:conv22*2"
 # mb1_g1ic512oc512_ih32oh32kh3sh1dh0ph1_iw32ow32kw3sw1dw0pw1_n"masknet_p1:conv20"
 # mb1_g1ic512oc2048_ih32oh32kh1sh1dh0ph0_iw32ow32kw1sw1dw0pw0_n"masknet_p1:conv21"
 # mb1_g1ic2048oc512_ih32oh32kh1sh1dh0ph0_iw32ow32kw1sw1dw0pw0_n"masknet_p1:conv22"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_maskrcnn_p2 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_maskrcnn_p2
index 8998eb601..914b1817b 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_maskrcnn_p2
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_maskrcnn_p2
@@ -2,7 +2,7 @@
 
 mb1000_g1ic256oc1024_ih7oh1kh7sh1dh0ph0_iw7ow1kw7sw1dw0pw0_n"masknet_p2:conv1"
 mb1000_g1ic1024oc1024_ih1oh1kh1sh1dh0ph0_iw1ow1kw1sw1dw0pw0_n"masknet_p2:conv2"
-mb100_g1ic256oc256_ih14oh14kh3sh1dh0ph1_iw14ow14kw3sw1dw0pw1_n"masknet_p2:conv3"
+mb100_g1ic256oc256_ih14oh14kh3sh1dh0ph1_iw14ow14kw3sw1dw0pw1_n"masknet_p2:conv3*4"
 # mb100_g1ic256oc256_ih14oh14kh3sh1dh0ph1_iw14ow14kw3sw1dw0pw1_n"masknet_p2:conv3"
 # mb100_g1ic256oc256_ih14oh14kh3sh1dh0ph1_iw14ow14kw3sw1dw0pw1_n"masknet_p2:conv3"
 # mb100_g1ic256oc256_ih14oh14kh3sh1dh0ph1_iw14ow14kw3sw1dw0pw1_n"masknet_p2:conv3"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_mobilenet b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_mobilenet
index 0b425ebdf..f67143f6f 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_mobilenet
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_mobilenet
@@ -1,21 +1,21 @@
 # MobileNet
 # according to TF log
 
-mb32_g1ic3oc32_ih224oh112kh3sh2dh0ph1_iw224ow112kw3sw2dw0pw1_n"mobilenet:conv1"
-mb32_g1ic32oc64_ih112oh112kh1sh1dh0ph0_iw112ow112kw1sw1dw0pw0_n"mobilenet:conv2"
-mb32_g1ic64oc128_ih56oh56kh1sh1dh0ph0_iw56ow56kw1sw1dw0pw0_n"mobilenet:conv3"
-mb32_g1ic128oc128_ih56oh56kh1sh1dh0ph0_iw56ow56kw1sw1dw0pw0_n"mobilenet:conv4"
-mb32_g1ic128oc256_ih28oh28kh1sh1dh0ph0_iw28ow28kw1sw1dw0pw0_n"mobilenet:conv5"
-mb32_g1ic256oc256_ih28oh28kh1sh1dh0ph0_iw28ow28kw1sw1dw0pw0_n"mobilenet:conv6"
-mb32_g1ic256oc512_ih14oh14kh1sh1dh0ph0_iw14ow14kw1sw1dw0pw0_n"mobilenet:conv7"
-mb32_g1ic512oc512_ih14oh14kh1sh1dh0ph0_iw14ow14kw1sw1dw0pw0_n"mobilenet:conv8"
+mb32_g1ic3oc32_ih224oh112kh3sh2dh0ph1_iw224ow112kw3sw2dw0pw1_n"mobilenet:conv1*3"
+mb32_g1ic32oc64_ih112oh112kh1sh1dh0ph0_iw112ow112kw1sw1dw0pw0_n"mobilenet:conv2*4"
+mb32_g1ic64oc128_ih56oh56kh1sh1dh0ph0_iw56ow56kw1sw1dw0pw0_n"mobilenet:conv3*4"
+mb32_g1ic128oc128_ih56oh56kh1sh1dh0ph0_iw56ow56kw1sw1dw0pw0_n"mobilenet:conv4*4"
+mb32_g1ic128oc256_ih28oh28kh1sh1dh0ph0_iw28ow28kw1sw1dw0pw0_n"mobilenet:conv5*4"
+mb32_g1ic256oc256_ih28oh28kh1sh1dh0ph0_iw28ow28kw1sw1dw0pw0_n"mobilenet:conv6*4"
+mb32_g1ic256oc512_ih14oh14kh1sh1dh0ph0_iw14ow14kw1sw1dw0pw0_n"mobilenet:conv7*4"
+mb32_g1ic512oc512_ih14oh14kh1sh1dh0ph0_iw14ow14kw1sw1dw0pw0_n"mobilenet:conv8*20"
 # mb32_g1ic512oc512_ih14oh14kh1sh1dh0ph0_iw14ow14kw1sw1dw0pw0_n"mobilenet:conv8"
 # mb32_g1ic512oc512_ih14oh14kh1sh1dh0ph0_iw14ow14kw1sw1dw0pw0_n"mobilenet:conv8"
 # mb32_g1ic512oc512_ih14oh14kh1sh1dh0ph0_iw14ow14kw1sw1dw0pw0_n"mobilenet:conv8"
 # mb32_g1ic512oc512_ih14oh14kh1sh1dh0ph0_iw14ow14kw1sw1dw0pw0_n"mobilenet:conv8"
-mb32_g1ic512oc1024_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"mobilenet:conv9"
-mb32_g1ic1024oc1024_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"mobilenet:conv10"
-mb32_g1ic1024oc5_ih1oh1kh1sh1dh0ph0_iw1ow1kw1sw1dw0pw0_n"mobilenet:conv11"
+mb32_g1ic512oc1024_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"mobilenet:conv9*4"
+mb32_g1ic1024oc1024_ih7oh7kh1sh1dh0ph0_iw7ow7kw1sw1dw0pw0_n"mobilenet:conv10*4"
+mb32_g1ic1024oc5_ih1oh1kh1sh1dh0ph0_iw1ow1kw1sw1dw0pw0_n"mobilenet:conv11*4"
 # mb32_g1ic3oc32_ih224oh112kh3sh2dh0ph1_iw224ow112kw3sw2dw0pw1_n"mobilenet:conv1"
 # mb32_g1ic32oc64_ih112oh112kh1sh1dh0ph0_iw112ow112kw1sw1dw0pw0_n"mobilenet:conv2"
 # mb32_g1ic64oc128_ih56oh56kh1sh1dh0ph0_iw56ow56kw1sw1dw0pw0_n"mobilenet:conv3"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_mobilenet_dw b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_mobilenet_dw
index 418496a11..433896a4f 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_mobilenet_dw
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_mobilenet_dw
@@ -9,4 +9,3 @@ g256mb1ic256ih28iw28oc256oh14ow14kh3kw3sh2sw2ph1pw1n"mobilenet:conv4_2/dw"
 g512mb1ic512ih14iw14oc512oh14ow14kh3kw3sh1sw1ph1pw1n"mobilenet:conv5_1/dw"
 g512mb1ic512ih14iw14oc512oh7ow7kh3kw3sh2sw2ph1pw1n"mobilenet:conv5_6/dw"
 g1024mb1ic1024ih7iw7oc1024oh7ow7kh3kw3sh1sw1ph1pw1n"mobilenet:conv6/dw"
-
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_regression_gemm b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_regression_gemm
new file mode 100644
index 000000000..8863cf3eb
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_regression_gemm
@@ -0,0 +1,6 @@
+# ResNext50
+mb2_g32ic128oc128_ih56oh56kh3sh1dh0ph1_iw56ow56kw3sw1dw0pw1
+
+# Faster RCNN
+mb1_g64ic256oc256_ih240oh240kh3sh1dh0ph1_iw352ow352kw3sw1dw0pw1
+
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_resnet_50 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_resnet_50
index 0432edaa8..f946de32f 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_resnet_50
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_resnet_50
@@ -1,11 +1,11 @@
 # resnet_50
 
 g1mb50ic3ih224iw224oc64oh112ow112kh7kw7sh2sw2ph3pw3n"resnet_50:conv1"
-mb50ic64ih56oc256oh56kh1ph0n"resnet_50:res2a_branch1"
+mb50ic64ih56oc256oh56kh1ph0n"resnet_50:res2a_branch1*4"
 mb50ic64ih56oc64oh56kh1ph0n"resnet_50:res2a_branch2a"
-mb50ic64ih56oc64oh56kh3ph1n"resnet_50:res2a_branch2b"
+mb50ic64ih56oc64oh56kh3ph1n"resnet_50:res2a_branch2b*3"
 # mb50ic64ih56oc256oh56kh1ph0n"resnet_50:res2a_branch2c"        # conv1
-mb50ic256ih56oc64oh56kh1ph0n"resnet_50:res2b_branch2a"
+mb50ic256ih56oc64oh56kh1ph0n"resnet_50:res2b_branch2a*2"
 # mb50ic64ih56oc64oh56kh3ph1n"resnet_50:res2b_branch2b"         # res2a_branch2b
 # mb50ic64ih56oc256oh56kh1ph0n"resnet_50:res2b_branch2c"        # conv1
 # mb50ic256ih56oc64oh56kh1ph0n"resnet_50:res2c_branch2a"        # res2b_branch2a
@@ -13,9 +13,9 @@ mb50ic256ih56oc64oh56kh1ph0n"resnet_50:res2b_branch2a"
 # mb50ic64ih56oc256oh56kh1ph0n"resnet_50:res2c_branch2c"        # conv1
 g1mb50ic256ih56iw56oc512oh28ow28kh1kw1sh2sw2ph0pw0n"resnet_50:res3a_branch1"
 g1mb50ic256ih56iw56oc128oh28ow28kh1kw1sh2sw2ph0pw0n"resnet_50:res3a_branch2a"
-mb50ic128ih28oc128oh28kh3ph1n"resnet_50:res3a_branch2b"
-mb50ic128ih28oc512oh28kh1ph0n"resnet_50:res3a_branch2c"
-mb50ic512ih28oc128oh28kh1ph0n"resnet_50:res3b_branch2a"
+mb50ic128ih28oc128oh28kh3ph1n"resnet_50:res3a_branch2b*4"
+mb50ic128ih28oc512oh28kh1ph0n"resnet_50:res3a_branch2c*4"
+mb50ic512ih28oc128oh28kh1ph0n"resnet_50:res3b_branch2a*3"
 # mb50ic128ih28oc128oh28kh3ph1n"resnet_50:res3b_branch2b"       # res3a_branch2b
 # mb50ic128ih28oc512oh28kh1ph0n"resnet_50:res3b_branch2c"       # res3a_branch2c
 # mb50ic512ih28oc128oh28kh1ph0n"resnet_50:res3c_branch2a"       # res3b_branch2a
@@ -26,9 +26,9 @@ mb50ic512ih28oc128oh28kh1ph0n"resnet_50:res3b_branch2a"
 # mb50ic128ih28oc512oh28kh1ph0n"resnet_50:res3d_branch2c"       # res3a_branch2c
 g1mb50ic512ih28iw28oc1024oh14ow14kh1kw1sh2sw2ph0pw0n"resnet_50:res4a_branch1"
 g1mb50ic512ih28iw28oc256oh14ow14kh1kw1sh2sw2ph0pw0n"resnet_50:res4a_branch2a"
-mb50ic256ih14oc256oh14kh3ph1n"resnet_50:res4a_branch2b"
-mb50ic256ih14oc1024oh14kh1ph0n"resnet_50:res4a_branch2c"
-mb50ic1024ih14oc256oh14kh1ph0n"resnet_50:res4b_branch2a"
+mb50ic256ih14oc256oh14kh3ph1n"resnet_50:res4a_branch2b*6"
+mb50ic256ih14oc1024oh14kh1ph0n"resnet_50:res4a_branch2c*6"
+mb50ic1024ih14oc256oh14kh1ph0n"resnet_50:res4b_branch2a*5"
 # mb50ic256ih14oc256oh14kh3ph1n"resnet_50:res4b_branch2b"       # res4a_branch2b
 # mb50ic256ih14oc1024oh14kh1ph0n"resnet_50:res4b_branch2c"      # res4a_branch2c
 # mb50ic1024ih14oc256oh14kh1ph0n"resnet_50:res4c_branch2a"      # res4b_branch2a
@@ -45,9 +45,9 @@ mb50ic1024ih14oc256oh14kh1ph0n"resnet_50:res4b_branch2a"
 # mb50ic256ih14oc1024oh14kh1ph0n"resnet_50:res4f_branch2c"      # res4a_branch2c
 g1mb50ic1024ih14iw14oc2048oh7ow7kh1kw1sh2sw2ph0pw0n"resnet_50:res5a_branch1"
 g1mb50ic1024ih14iw14oc512oh7ow7kh1kw1sh2sw2ph0pw0n"resnet_50:res5a_branch2a"
-mb50ic512ih7oc512oh7kh3ph1n"resnet_50:res5a_branch2b"
-mb50ic512ih7oc2048oh7kh1ph0n"resnet_50:res5a_branch2c"
-mb50ic2048ih7oc512oh7kh1ph0n"resnet_50:res5b_branch2a"
+mb50ic512ih7oc512oh7kh3ph1n"resnet_50:res5a_branch2b*3"
+mb50ic512ih7oc2048oh7kh1ph0n"resnet_50:res5a_branch2c*3"
+mb50ic2048ih7oc512oh7kh1ph0n"resnet_50:res5b_branch2a*2"
 # mb50ic512ih7oc512oh7kh3ph1n"resnet_50:res5b_branch2b"         # res5a_branch2b
 # mb50ic512ih7oc2048oh7kh1ph0n"resnet_50:res5b_branch2c"        # res5a_branch2c
 # mb50ic2048ih7oc512oh7kh1ph0n"resnet_50:res5c_branch2a"        # res5b_branch2a
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_ssd_mobilenet b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_ssd_mobilenet
new file mode 100644
index 000000000..134cf3ac7
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_ssd_mobilenet
@@ -0,0 +1,11 @@
+# ssd_mobilenet
+
+mb12_g1024ic1024oc1024_ih10oh10kh3sh1dh0ph1_iw10ow10kw3sw1dw0pw1n"conv_1:ssd_mobilenet_dw"
+mb12_g512ic512oc512_ih19oh10kh3sh2dh0ph1_iw19ow10kw3sw2dw0pw1n"conv_2:ssd_mobilenet_dw"
+mb12_g512ic512oc512_ih19oh19kh3sh1dh0ph1_iw19ow19kw3sw1dw0pw1n"conv_3:ssd_mobilenet_dw"
+mb12_g256ic256oc256_ih38oh19kh3sh2dh0ph0_iw38ow19kw3sw2dw0pw0n"conv_4:ssd_mobilenet_dw"
+mb12_g256ic256oc256_ih38oh38kh3sh1dh0ph1_iw38ow38kw3sw1dw0pw1n"conv_5:ssd_mobilenet_dw"
+mb12_g128ic128oc128_ih75oh38kh3sh2dh0ph1_iw75ow38kw3sw2dw0pw1n"conv_6:ssd_mobilenet_dw"
+mb12_g128ic128oc128_ih75oh75kh3sh1dh0ph1_iw75ow75kw3sw1dw0pw1n"conv_7:ssd_mobilenet_dw"
+mb12_g64ic64oc64_ih150oh75kh3sh2dh0ph0_iw150ow75kw3sw2dw0pw0n"conv_8:ssd_mobilenet_dw"
+mb12_g32ic32oc32_ih150oh150kh3sh1dh0ph1_iw150ow150kw3sw1dw0pw1n"conv_9:ssd_mobilenet_dw"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_tails b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_tails
index 7d8b0fdd1..d33856253 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_tails
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_tails
@@ -24,6 +24,8 @@ ic25oc24_ih13oh12kh3ph0_n"tails_conv:17"
 ic27oc30_ih13oh13kh3ph1_n"tails_conv:18"
 ic28oc20_ih13oh12kh3ph0_n"tails_conv:19"
 ic29oc65_ih13oh13kh3ph1_n"tails_conv:20"
+g64ic512oc512_ih240oh120kh3sh2dh0ph1_n"tails_conv:21"
+g128ic512oc512_ih240oh120kh3sh2dh0ph1_n"tails_conv:22"
 
 # conv 1x1
 ic32oc13_ih13oh13kh1ph0_n"tails_conv_1x1:1"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_vgg_19 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_vgg_19
index 738f7c573..e65ae894d 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_vgg_19
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_vgg_19
@@ -5,14 +5,14 @@ mb64ic64ih224oc64oh224kh3ph1n"vgg_19:conv1_2"
 mb64ic64ih112oc128oh112kh3ph1n"vgg_19:conv2_1"
 mb64ic128ih112oc128oh112kh3ph1n"vgg_19:conv2_2"
 mb64ic128ih56oc256oh56kh3ph1n"vgg_19:conv3_1"
-mb64ic256ih56oc256oh56kh3ph1n"vgg_19:conv3_2"
+mb64ic256ih56oc256oh56kh3ph1n"vgg_19:conv3_2*3"
 # mb64ic256ih56oc256oh56kh3ph1n"vgg_19:conv3_3"     # conv3_2
 # mb64ic256ih56oc256oh56kh3ph1n"vgg_19:conv3_4"     # conv3_2
 mb64ic256ih28oc512oh28kh3ph1n"vgg_19:conv4_1"
-mb64ic512ih28oc512oh28kh3ph1n"vgg_19:conv4_2"
+mb64ic512ih28oc512oh28kh3ph1n"vgg_19:conv4_2*3"
 # mb64ic512ih28oc512oh28kh3ph1n"vgg_19:conv4_3"     # conv4_2
 # mb64ic512ih28oc512oh28kh3ph1n"vgg_19:conv4_4"     # conv4_2
-mb64ic512ih14oc512oh14kh3ph1n"vgg_19:conv5_1"
+mb64ic512ih14oc512oh14kh3ph1n"vgg_19:conv5_1*4"
 # mb64ic512ih14oc512oh14kh3ph1n"vgg_19:conv5_2"     # conv5_2
 # mb64ic512ih14oc512oh14kh3ph1n"vgg_19:conv5_3"     # conv5_2
 # mb64ic512ih14oc512oh14kh3ph1n"vgg_19:conv5_4"     # conv5_2
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_yolov2 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_yolov2
index 90b027cca..8174d7666 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_yolov2
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/conv_yolov2
@@ -1,28 +1,28 @@
 # Yolo v2
 
-mb16_g1ic3oc32_ih610oh608kh3sh1dh0ph0_iw610ow608kw3sw1dw0pw0_n"yolov2:conv1"
-mb16_g1ic32oc64_ih306oh304kh3sh1dh0ph0_iw306ow304kw3sw1dw0pw0_n"yolov2:conv2"
-mb16_g1ic64oc128_ih154oh152kh3sh1dh0ph0_iw154ow152kw3sw1dw0pw0_n"yolov2:conv3"
-mb16_g1ic128oc64_ih152oh152kh1sh1dh0ph0_iw152ow152kw1sw1dw0pw0_n"yolov2:conv4"
+mb16_g1ic3oc32_ih610oh608kh3sh1dh0ph0_iw610ow608kw3sw1dw0pw0_n"yolov2:conv1*6"
+mb16_g1ic32oc64_ih306oh304kh3sh1dh0ph0_iw306ow304kw3sw1dw0pw0_n"yolov2:conv2*9"
+mb16_g1ic64oc128_ih154oh152kh3sh1dh0ph0_iw154ow152kw3sw1dw0pw0_n"yolov2:conv3*18"
+mb16_g1ic128oc64_ih152oh152kh1sh1dh0ph0_iw152ow152kw1sw1dw0pw0_n"yolov2:conv4*9"
 # mb16_g1ic64oc128_ih154oh152kh3sh1dh0ph0_iw154ow152kw3sw1dw0pw0_n"yolov2:conv3"
-mb16_g1ic128oc256_ih78oh76kh3sh1dh0ph0_iw78ow76kw3sw1dw0pw0_n"yolov2:conv5"
-mb16_g1ic256oc128_ih76oh76kh1sh1dh0ph0_iw76ow76kw1sw1dw0pw0_n"yolov2:conv6"
+mb16_g1ic128oc256_ih78oh76kh3sh1dh0ph0_iw78ow76kw3sw1dw0pw0_n"yolov2:conv5*18"
+mb16_g1ic256oc128_ih76oh76kh1sh1dh0ph0_iw76ow76kw1sw1dw0pw0_n"yolov2:conv6*9"
 # mb16_g1ic128oc256_ih78oh76kh3sh1dh0ph0_iw78ow76kw3sw1dw0pw0_n"yolov2:conv5"
-mb16_g1ic256oc512_ih40oh38kh3sh1dh0ph0_iw40ow38kw3sw1dw0pw0_n"yolov2:conv7"
-mb16_g1ic512oc256_ih38oh38kh1sh1dh0ph0_iw38ow38kw1sw1dw0pw0_n"yolov2:conv8"
+mb16_g1ic256oc512_ih40oh38kh3sh1dh0ph0_iw40ow38kw3sw1dw0pw0_n"yolov2:conv7*27"
+mb16_g1ic512oc256_ih38oh38kh1sh1dh0ph0_iw38ow38kw1sw1dw0pw0_n"yolov2:conv8*18"
 # mb16_g1ic256oc512_ih40oh38kh3sh1dh0ph0_iw40ow38kw3sw1dw0pw0_n"yolov2:conv7"
 # mb16_g1ic512oc256_ih38oh38kh1sh1dh0ph0_iw38ow38kw1sw1dw0pw0_n"yolov2:conv8"
 # mb16_g1ic256oc512_ih40oh38kh3sh1dh0ph0_iw40ow38kw3sw1dw0pw0_n"yolov2:conv7"
-mb16_g1ic512oc1024_ih21oh19kh3sh1dh0ph0_iw21ow19kw3sw1dw0pw0_n"yolov2:conv9"
-mb16_g1ic1024oc512_ih19oh19kh1sh1dh0ph0_iw19ow19kw1sw1dw0pw0_n"yolov2:conv10"
+mb16_g1ic512oc1024_ih21oh19kh3sh1dh0ph0_iw21ow19kw3sw1dw0pw0_n"yolov2:conv9*27"
+mb16_g1ic1024oc512_ih19oh19kh1sh1dh0ph0_iw19ow19kw1sw1dw0pw0_n"yolov2:conv10*18"
 # mb16_g1ic512oc1024_ih21oh19kh3sh1dh0ph0_iw21ow19kw3sw1dw0pw0_n"yolov2:conv9"
 # mb16_g1ic1024oc512_ih19oh19kh1sh1dh0ph0_iw19ow19kw1sw1dw0pw0_n"yolov2:conv10"
 # mb16_g1ic512oc1024_ih21oh19kh3sh1dh0ph0_iw21ow19kw3sw1dw0pw0_n"yolov2:conv9"
-mb16_g1ic1024oc1024_ih21oh19kh3sh1dh0ph0_iw21ow19kw3sw1dw0pw0_n"yolov2:conv11"
+mb16_g1ic1024oc1024_ih21oh19kh3sh1dh0ph0_iw21ow19kw3sw1dw0pw0_n"yolov2:conv11*18"
 # mb16_g1ic1024oc1024_ih21oh19kh3sh1dh0ph0_iw21ow19kw3sw1dw0pw0_n"yolov2:conv11"
-mb16_g1ic512oc64_ih38oh38kh1sh1dh0ph0_iw38ow38kw1sw1dw0pw0_n"yolov2:conv12"
-mb16_g1ic1280oc1024_ih21oh19kh3sh1dh0ph0_iw21ow19kw3sw1dw0pw0_n"yolov2:conv13"
-mb16_g1ic1024oc425_ih19oh19kh1sh1dh0ph0_iw19ow19kw1sw1dw0pw0_n"yolov2:conv14"
+mb16_g1ic512oc64_ih38oh38kh1sh1dh0ph0_iw38ow38kw1sw1dw0pw0_n"yolov2:conv12*9"
+mb16_g1ic1280oc1024_ih21oh19kh3sh1dh0ph0_iw21ow19kw3sw1dw0pw0_n"yolov2:conv13*9"
+mb16_g1ic1024oc425_ih19oh19kh1sh1dh0ph0_iw19ow19kw1sw1dw0pw0_n"yolov2:conv14*9"
 # mb16_g1ic1024oc425_ih19oh19kh1sh1dh0ph0_iw19ow19kw1sw1dw0pw0_n"yolov2:conv14"
 # mb16_g1ic1024oc425_ih19oh19kh1sh1dh0ph0_iw19ow19kw1sw1dw0pw0_n"yolov2:conv14"
 # mb16_g1ic1280oc1024_ih21oh19kh3sh1dh0ph0_iw21ow19kw3sw1dw0pw0_n"yolov2:conv13"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_1x1 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_1x1
new file mode 100644
index 000000000..c07c179b2
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_1x1
@@ -0,0 +1,33 @@
+# 1x1 2d deconv
+mb96ic64ih56oc64oh56kh1ph0n"googlenet_v1:conv2/3x3_reduce"
+mb96ic64ih28oc192oh28kh1ph0n"googlenet_v1:inception_3a/1x1"
+mb96ic96ih28oc192oh28kh1ph0n"googlenet_v1:inception_3a/3x3_reduce"
+mb96ic16ih28oc192oh28kh1ph0n"googlenet_v1:inception_3a/5x5_reduce"
+mb96ic32ih28oc192oh28kh1ph0n"googlenet_v1:inception_3a/pool_proj"
+mb96ic128ih28oc256oh28kh1ph0n"googlenet_v1:inception_3b/1x1"
+mb96ic32ih28oc256oh28kh1ph0n"googlenet_v1:inception_3b/5x5_reduce"
+mb96ic64ih28oc256oh28kh1ph0n"googlenet_v1:inception_3b/pool_proj"
+mb96ic192ih14oc480oh14kh1ph0n"googlenet_v1:inception_4a/1x1"
+mb96ic96ih14oc480oh14kh1ph0n"googlenet_v1:inception_4a/3x3_reduce"
+mb96ic16ih14oc480oh14kh1ph0n"googlenet_v1:inception_4a/5x5_reduce"
+mb96ic64ih14oc480oh14kh1ph0n"googlenet_v1:inception_4a/pool_proj"
+mb96ic128ih4oc512oh4kh1ph0n"googlenet_v1:loss1/conv"
+mb96ic160ih14oc512oh14kh1ph0n"googlenet_v1:inception_4b/1x1"
+mb96ic112ih14oc512oh14kh1ph0n"googlenet_v1:inception_4b/3x3_reduce"
+mb96ic24ih14oc512oh14kh1ph0n"googlenet_v1:inception_4b/5x5_reduce"
+mb96ic64ih14oc512oh14kh1ph0n"googlenet_v1:inception_4b/pool_proj"
+mb96ic128ih14oc512oh14kh1ph0n"googlenet_v1:inception_4c/1x1"
+mb96ic144ih14oc512oh14kh1ph0n"googlenet_v1:inception_4d/3x3_reduce"
+mb96ic32ih14oc512oh14kh1ph0n"googlenet_v1:inception_4d/5x5_reduce"
+mb96ic128ih4oc528oh4kh1ph0n"googlenet_v1:loss2/conv"
+mb96ic256ih14oc528oh14kh1ph0n"googlenet_v1:inception_4e/1x1"
+mb96ic160ih14oc528oh14kh1ph0n"googlenet_v1:inception_4e/3x3_reduce"
+mb96ic32ih14oc528oh14kh1ph0n"googlenet_v1:inception_4e/5x5_reduce"
+mb96ic128ih14oc528oh14kh1ph0n"googlenet_v1:inception_4e/pool_proj"
+mb96ic256ih7oc832oh7kh1ph0n"googlenet_v1:inception_5a/1x1"
+mb96ic160ih7oc832oh7kh1ph0n"googlenet_v1:inception_5a/3x3_reduce"
+mb96ic32ih7oc832oh7kh1ph0n"googlenet_v1:inception_5a/5x5_reduce"
+mb96ic128ih7oc832oh7kh1ph0n"googlenet_v1:inception_5a/pool_proj"
+mb96ic384ih7oc832oh7kh1ph0n"googlenet_v1:inception_5b/1x1"
+mb96ic192ih7oc832oh7kh1ph0n"googlenet_v1:inception_5b/3x3_reduce"
+mb96ic48ih7oc832oh7kh1ph0n"googlenet_v1:inception_5b/5x5_reduce"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv_2d b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_2d
index 5c0435975..bee5de1fd 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv_2d
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_2d
@@ -55,3 +55,9 @@ mb96ic192ih7oc832oh7kh1ph0n"googlenet_v1:inception_5b/3x3_reduce"
 mb96ic384ih7oc192oh7kh3ph1n"googlenet_v1:inception_5b/3x3"
 mb96ic48ih7oc832oh7kh1ph0n"googlenet_v1:inception_5b/5x5_reduce"
 mb96ic128ih7oc48oh7kh5ph2n"googlenet_v1:inception_5b/5x5"
+
+mb1_g1oc3ic64_oh1030ih512kh7sh2dh0ph0_ow1030iw512kw7sw2dw0pw0_n"masknet_p1:deconv1"
+g1mb50_oc512oh56ow56_ic256ih28iw28_kh1kw1sh2sw2ph0pw0n"resnet_50:res3a_branch1"
+
+ic8ih1iw5oc8oh1ow2kh1kw3ph0pw3dh0dw2n"deconv1d:1"
+ic8ih5iw1oc8oh2ow1kh3kw1ph3pw0dh2dw0n"deconv1d:2"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv_3d b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_3d
index f6d726144..f6d726144 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv_3d
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_3d
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv_all b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_all
index f2d02a4e0..e198bdeaa 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv_all
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_all
@@ -1,3 +1,3 @@
 --batch=deconv_3d
 --batch=deconv_2d
---batch=dilated_deconv
-\ No newline at end of file
+--batch=deconv_dilated
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/dilated_deconv b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_dilated
index 80a8ee817..80a8ee817 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/dilated_deconv
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/deconv_dilated
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/test_deconv_1x1 b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/test_deconv_1x1
new file mode 100644
index 000000000..e7687c421
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/test_deconv_1x1
@@ -0,0 +1,24 @@
+--cfg=u8s8u8s32
+--batch=deconv_1x1
+
+--cfg=s8s8u8s32
+--batch=deconv_1x1
+
+--cfg=u8s8s8s32
+--batch=deconv_1x1
+
+--cfg=s8s8s8s32
+--batch=deconv_1x1
+
+--cfg=u8s8s32s32
+--batch=deconv_1x1
+
+--cfg=s8s8s32s32
+--batch=deconv_1x1
+
+--cfg=u8s8f32s32
+--batch=deconv_1x1
+
+--cfg=s8s8f32s32
+--batch=deconv_1x1
+
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/test_deconv_all b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/test_deconv_all
new file mode 100644
index 000000000..b12029508
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deconv/test_deconv_all
@@ -0,0 +1,30 @@
+# f32
+--reset --skip-impl=ref
+--mb=2 --cfg=f32
+
+--dir=FWD_B --batch=deconv_all
+--dir=BWD_D --batch=deconv_all
+--dir=BWD_W --batch=deconv_all
+--dir=BWD_WB --batch=deconv_all
+
+# int8
+--reset --skip-impl=ref --allow-unimpl=true
+--mb=2 --dir=FWD_B
+
+--attr=irmode=down;oscale=per_oc:2.25;
+--cfg=u8s8u8s32 --batch=deconv_2d --batch=deconv_dilated
+--cfg=s8s8u8s32 --batch=deconv_2d --batch=deconv_dilated
+
+--attr=irmode=nearest;oscale=common:2.25;
+--cfg=u8s8s8s32 --batch=deconv_2d
+--cfg=u8s8s32s32 --batch=deconv_2d
+--cfg=s8s8u8s32 --batch=deconv_2d
+
+--attr=irmode=nearest;oscale=none;
+--cfg=s8s8s8s32 --batch=deconv_2d
+--cfg=s8s8s32s32 --batch=deconv_2d
+
+# 1x1 int8
+--reset --mb=2 --dir=FWD_B --allow-unimpl=true
+--attr=irmode=down;oscale=per_oc:2.25;post_ops='sum:1.5;relu' --batch=test_deconv_1x1
+--attr=irmode=nearest;oscale=common:2.25;post_ops='sum:1.5' --batch=test_deconv_1x1
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deepbench b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deepbench
index 5256c7563..afb663c35 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deepbench
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/deepbench
@@ -12,7 +12,7 @@
 --dir=BWD_W --batch=deepbench_inference_server
 --dir=BWD_W --batch=deepbench_training
 
---merge=RELU
+--attr=post_ops='relu'
 
 --dir=FWD_B --batch=deepbench_inference_device
 --dir=FWD_B --batch=deepbench_inference_server
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/ip/ip_all b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/ip/ip_all
index 085fdfef8..19c48ec06 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/ip/ip_all
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/ip/ip_all
@@ -2,11 +2,14 @@ mb112ic2048ih1iw1oc1000n"resnet:ip1"
 mb128ic128ih4oc1024n"googlenet_v1:ip1"
 mb128ic1024ih1oc1000n"googlenet_v1:ip2"
 mb224ic2048ih1oc1000n"inceptionv3:ip1"
-mb64ic2048ih1oc1000n"resnet_sparse:ip2"
+mb64ic2048ih1oc1000n"resnet_sparse:ip1"
 mb64ic512ih7iw7oc4096n"VGG16:ip1"
 mb64ic4096ih1iw1oc4096n"VGG16:ip2"
 mb64ic4096ih1iw1oc81n"VGG16:ip3"
 mb64ic4096ih1iw1oc324n"VGG16:ip4"
-mb32ic64id2ih3iw3oc1000n"wip_3d:1"
-mb32ic512id5ih5iw5oc1000n"wip_3d:2"
-mb256ic128id5ih5iw5oc128n"wip_3d:3"
+mb32ic64id2ih3iw3oc1000n"wip_3d:ip1"
+mb32ic512id5ih5iw5oc1000n"wip_3d:ip2"
+mb256ic128id5ih5iw5oc128n"wip_3d:ip3"
+mb1024ic845iw1ih1oc1024n"WD:ip1"
+mb1024ic1024iw1ih1oc1024n"WD:ip2"
+mb1024ic512iw1ih1oc256n"WD:ip3"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/ip/test_ip_all b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/ip/test_ip_all
index e59a6697a..a99582d3b 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/ip/test_ip_all
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/ip/test_ip_all
@@ -9,8 +9,12 @@
 --cfg=u8s8u8s32  --batch=ip_all
 --cfg=u8s8s8s32  --batch=ip_all
 --cfg=u8s8s32s32 --batch=ip_all
+--cfg=s8s8u8s32  --batch=ip_all
+--cfg=s8s8s8s32  --batch=ip_all
+--cfg=s8s8s32s32 --batch=ip_all
 
 # relu
 --reset --dir=FWD_B --mb=2 --attr=post_ops='relu'
 --batch=ip_all
 --cfg=u8s8s32s32  --batch=ip_all
+--cfg=s8s8s32s32  --batch=ip_all
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/reorder/test_default b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/reorder/test_default
index f7b5fef2d..339ae1ab1 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/reorder/test_default
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/reorder/test_default
@@ -8,6 +8,9 @@
 --fmt=oihw,hwio 2x64x3x3
 --fmt=goihw,gOIhw16i16o 3x32x32x2x2
 
+--both-dir-fmt=true
+--ifmt=nChw8c --ofmt=nChw16c 2x40x3x3  # blocked with tail
+
 --attr=irmode=down;oscale=common:0.
 --fmt=nchw,nhwc,nChw8c,nChw16c 2x64x3x3
 --fmt=oihw,hwio 2x64x3x3
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_gru b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_gru
index 7496c8b1e..986193d1f 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_gru
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_gru
@@ -1,4 +1,4 @@
 l2t2mb128sic512n"exp-gru-0"
-l7t1mb128sic512slc1024dic512dlc512n"exp-gru-1"
+l1t7mb128sic512slc1024dic512dlc512n"exp-gru-1"
 l1t10mb32sic128slc512dic128dlc128n"exp-gru-2"
 
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_inference b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_inference
index be3524761..3d5ddd7e2 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_inference
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_inference
@@ -1,6 +1,7 @@
 l1t30mb1sic512n"GNMT_enc-inference"
 l7t30mb1sic1024n"GNMT_enc-inference"
 l8t1mb1sic2048slc1024dic1024dlc1024n"GNMT_dec-inference"
+l1t1mb1sic2048slc1024dic1024dlc1024n"GNMT_dec-inference"
 l1t50mb1sic1760n"deepspeech2-inference"
 l1t100mb1sic760n"deepspeech2-inference"
 l1t200mb1sic1760n"deepspeech2-inference"
@@ -8,3 +9,5 @@ l1t50mb1sic500n"pytorch_testcase-inference"
 l1t629mb1sic128n"paddlepaddle_testcase-inference"
 l1t10mb1sic128slc512dic128dlc128n"exp-0"
 l10t1mb1sic512slc128dic128dlc128n"exp-1"
+
+l1t1mb640sic2048slc1024dic1024dlc1024n"GNMT_dec-inference"
+\ No newline at end of file
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_training b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_training
index 5d9a0ddea..d60107d79 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_training
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/rnn_training
@@ -1,6 +1,7 @@
 l1t1mb128sic512n"GNMT_enc-training"
 l2t2mb128sic1024n"GNMT_enc-training"
 l8t1mb128sic2048slc1024dic1024dlc1024n"GNMT_dec-training"
+l1t1mb128sic2048slc1024dic1024dlc1024n"GNMT_dec-training"
 l1t50mb32sic1760n"deepspeech2-training"
 l1t100mb32sic1760n"deepspeech2-training"
 l1t200mb32sic1760n"deepspeech2-training"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/test_rnn_small b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/test_rnn_small
index 6ca0cb3fa..9f1ac2e69 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/test_rnn_small
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/rnn/test_rnn_small
@@ -48,7 +48,40 @@
 --direction=left2right
 --activation=TANH
 --prop=FWD_D --batch=rnn_small
---prop=BWD_DW --batch=rnn_small
+# --prop=BWD_DW --batch=rnn_small
+
+# LSTM int8
+--reset --alg=VANILLA_LSTM
+--direction=left2right
+--activation=TANH
+--cfg=u8u8u8u8
+--allow-unimpl=true
+--attr=irmode=nearest --scaling=common
+--prop=FWD_D --batch=rnn_small
+
+--reset --alg=VANILLA_LSTM
+--direction=left2right
+--activation=TANH
+--allow-unimpl=true
+--cfg=u8u8u8f32
+--attr=irmode=down --scaling=common
+--prop=FWD_D --batch=rnn_small
+
+--reset --alg=VANILLA_LSTM
+--direction=left2right
+--activation=TANH
+--allow-unimpl=true
+--cfg=f32u8f32u8
+--attr=irmode=down --scaling=per_oc
+--prop=FWD_D --batch=rnn_small
+
+--reset --alg=VANILLA_LSTM
+--direction=left2right
+--activation=TANH
+--allow-unimpl=true
+--cfg=f32u8f32f32
+--attr=irmode=nearest --scaling=per_oc
+--prop=FWD_D --batch=rnn_small
 
 # GRU
 --reset --alg=VANILLA_GRU
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_all b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_all
index 6f725f9a6..6f705c1cd 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_all
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_all
@@ -8,7 +8,7 @@
 --dir=BWD_WB --batch=conv_resnet_50
 
 --mb=2
---merge=RELU                # +relu
+--attr=post_ops='relu'                # +relu
 --dir=FWD_B --batch=conv_alexnet
 
 # depthwise
@@ -21,12 +21,12 @@
 --cfg=u8s8u8s32  --batch=conv_all
 --cfg=u8s8s8s32  --batch=conv_resnet_50
 --cfg=u8s8s32s32 --batch=conv_googlenet_v3
---merge=RELU
+--attr=post_ops='relu'
 --cfg=u8s8s32s32 --batch=conv_vgg_19
 --cfg=s8s8u8s32  --batch=conv_all
 --cfg=s8s8s8s32  --batch=conv_resnet_50
 --cfg=s8s8s32s32 --batch=conv_googlenet_v3
---merge=RELU
+--attr=post_ops='relu'
 --cfg=s8s8s32s32 --batch=conv_vgg_19
 
 # s16 (knm)
@@ -36,7 +36,8 @@
 --cfg=s32s16s16s32 --dir=BWD_D --batch=conv_all
 --cfg=s16s32s16s32 --dir=BWD_WB --batch=conv_all
 
---merge=RELU                # +relu
+--attr=post_ops='relu'                # +relu
+--allow-unimpl=true                   # TODO: remove if ref_convolution accepts post_ops
 --cfg=s16s16s32s32 --dir=FWD_B --batch=conv_googlenet_v1
 
 # f32 wino
@@ -58,7 +59,7 @@
 --cfg=u8s8u8s32_wino  --batch=conv_all
 --cfg=u8s8s8s32_wino  --batch=conv_resnet_50
 --cfg=u8s8s32s32_wino --batch=conv_googlenet_v3
---merge=RELU
+--attr=post_ops='relu'
 --cfg=u8s8s32s32_wino --batch=conv_googlenet_v2
 
 # dilated
@@ -72,3 +73,12 @@
 
 # 3D conv
 --batch=test_conv_3d
+
+# auto algo
+--reset --cfg=f32 --alg=auto
+--dir=FWD_B --batch=conv_auto
+--dir=BWD_D --batch=conv_auto
+--dir=BWD_WB --batch=conv_auto
+--cfg=u8s8s8s32
+--dir=FWD_B --batch=conv_auto
+
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_attrs b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_attrs
index 00d4cff66..a54a0383a 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_attrs
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_attrs
@@ -7,23 +7,38 @@
 --attr=irmode=down;oscale=per_oc:2.25;post_ops='sum:1.5;relu'
 --cfg=u8s8u8s32  --batch=conv_vgg_19
 --cfg=u8s8f32s32 --batch=conv_googlenet_v2
+--cfg=u8s8s32s32 --batch=conv_tails
 --cfg=s8s8u8s32  --batch=conv_vgg_19
 --cfg=s8s8f32s32 --batch=conv_googlenet_v2
+--cfg=s8s8s32s32 --batch=conv_tails
 --dir=FWD_D
 --attr=irmode=nearest;oscale=common:2.25;post_ops='sum:1.5'
 --cfg=u8s8s8s32  --batch=conv_googlenet_v3
 --cfg=u8s8s32s32 --batch=conv_alexnet
+--cfg=u8s8s32s32 --batch=conv_tails
 --cfg=s8s8s8s32  --batch=conv_googlenet_v3
 --cfg=s8s8s32s32 --batch=conv_alexnet
+--cfg=s8s8s32s32 --batch=conv_tails
 
 # f32
 --reset --cfg=f32
 --mb=2
 --skip-impl="ref:gemm"      # ! test jit version only
 --allow-unimpl=true
---dir=FWD_B --attr=post_ops='sum;relu' --batch=conv_resnet_50
---dir=FWD_B --attr=post_ops='sum;relu' --batch=conv_3d
---dir=FWD_B --attr=post_ops='sum;relu' --batch=conv_1d
+
+--dir=FWD_B
+--attr=post_ops='sum;relu' --batch=conv_resnet_50
+--attr=post_ops='sum;relu:0.5' --batch=conv_tails
+--attr=post_ops='sum;tanh' --batch=conv_tails
+--attr=post_ops='sum;elu:0.5' --batch=conv_tails
+--attr=post_ops='sum;abs' --batch=conv_tails
+--attr=post_ops='sum;sqrt' --batch=conv_tails
+--attr=post_ops='sum;linear:0.5:1.5' --batch=conv_tails
+--attr=post_ops='sum;brelu:0.5' --batch=conv_tails
+--attr=post_ops='sum;logistic' --batch=conv_tails
+--cfg=f32_no_limits # square and srelu might overrun int_max_exact
+--attr=post_ops='sum;square' --batch=conv_tails
+--attr=post_ops='sum;srelu' --batch=conv_tails
 
 # f32_wino
 --reset --alg=wino --cfg=f32_wino
@@ -44,3 +59,17 @@
 --attr=irmode=nearest;oscale=common:2.25;post_ops='sum:1.5'
 --cfg=u8s8s8s32_wino  --batch=conv_googlenet_v3
 --cfg=u8s8s32s32_wino --batch=conv_resnet_50
+
+# i8 conv + f32 leaky relu
+--reset --dir=FWD_B --mb=2
+--skip-impl="ref:gemm"      # ! test jit version only
+--allow-unimpl=true
+--attr=post_ops='relu:0.5'
+--cfg=s8s8f32s32 --batch=conv_yolov2
+--cfg=u8s8f32s32 --batch=conv_yolov2
+--attr=post_ops='relu:0.5;sum'
+--cfg=s8s8f32s32 --batch=conv_yolov2
+--cfg=u8s8f32s32 --batch=conv_yolov2
+--attr=post_ops='sum;relu:0.5'
+--cfg=s8s8f32s32 --batch=conv_yolov2
+--cfg=u8s8f32s32 --batch=conv_yolov2
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_depthwise b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_depthwise
index 0fa59738b..577b2f9d0 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_depthwise
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_depthwise
@@ -3,7 +3,7 @@
 --mb=2
 --dir=FWD_D --batch=conv_mobilenet_dw
 --dir=BWD_D --batch=conv_mobilenet_dw
---merge=RELU
+--attr=post_ops='relu'
 --dir=FWD_D --batch=conv_mobilenet_dw # +relu
 
 #post-ops
@@ -30,8 +30,11 @@
 --dir=FWD_B
 --attr=irmode=down;oscale=per_oc:2.25;post_ops='sum:1.5;relu'
 --cfg=u8s8u8s32 --batch=conv_mobilenet_dw
---cfg=u8s8f32s32 --batch=conv_mobilenet_dw
+--cfg=s8s8f32s32 --batch=conv_mobilenet_dw
 --dir=FWD_D
 --attr=irmode=nearest;oscale=common:2.25;post_ops='sum:1.5'
 --cfg=u8s8s8s32 --batch=conv_mobilenet_dw
---cfg=u8s8s32s32 --batch=conv_mobilenet_dw
+--cfg=s8s8s32s32 --batch=conv_mobilenet_dw
+
+--cfg=u8s8s8s32 g8mb1ic8ih112iw112oc8oh112ow112kh3kw3sh1sw1ph1pw1n"depthwise:conv1"
+--cfg=s8s8u8s32 g8mb1ic8ih112iw112oc8oh112ow112kh3kw3sh1sw1ph1pw1n"depthwise:conv1"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_regression b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_regression
index 27c2a9e2f..82c81af7a 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_regression
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_regression
@@ -6,7 +6,10 @@
 --dir=FWD_B  --batch=conv_regression_padding
 --dir=BWD_D  --batch=conv_regression_padding
 --dir=BWD_WB --batch=conv_regression_padding
---merge=RELU
+--dir=FWD_B  --batch=conv_regression_gemm
+--dir=BWD_D  --batch=conv_regression_gemm
+--dir=BWD_WB --batch=conv_regression_gemm
+--attr=post_ops='relu'
 --dir=FWD_B --batch=conv_regression_small_spatial
 --dir=FWD_B --batch=conv_regression_padding
 
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_regression_general b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_regression_general
index e2cebb980..d50f74c10 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_regression_general
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_conv_regression_general
@@ -69,3 +69,13 @@
 # MKLDNN-1074: FPE for mb1 with ih < sh or iw < sw
 --reset --dir=FWD_D mb1_g1ic128oc256_ih1oh1kh3sh2dh0ph1_iw1ow1kw3sw2dw0pw1
 
+#MKLDNN-1184 grouped convolutions with small input-channel and
+# non-blocked src format
+--reset --dir=FWD_D
+#AVX2
+mb1_g2ic4oc16_ih8oh6kh3sh1dh0ph0_iw8ow6kw3sw1dw0pw0
+#AVX512
+mb1_g2ic16oc32_ih8oh8kh3sh1dh0ph1_iw8ow8kw3sw1dw0pw1
+mb1_g2ic8oc32_ih8oh8kh3sh1dh0ph1_iw8ow8kw3sw1dw0pw1
+mb1_g2ic4oc32_ih8oh8kh3sh1dh0ph1_iw8ow8kw3sw1dw0pw1
+mb1_g2ic22oc32_ih8oh6kh3sh1dh0ph0_iw8ow6kw3sw1dw0pw0
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_deconv_all b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_deconv_all
deleted file mode 100644
index 2b71b5002..000000000
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/inputs/test_deconv_all
+++ /dev/null
@@ -1,26 +0,0 @@
-# f32
---reset --cfg=f32
---mb=2
---dir=FWD_B --batch=deconv_all
---dir=BWD_D --batch=deconv_all
---dir=BWD_W --batch=deconv_all
---dir=BWD_WB --batch=deconv_all
-
-#int8
---skip-impl=ref
---reset --allow-unimpl=true --dir=FWD_B --mb=2
---attr=irmode=down;oscale=per_oc:2.25;
---cfg=u8s8u8s32 --batch=deconv_2d
---cfg=u8s8s8s32 --batch=deconv_2d
---cfg=u8s8s32s32 --batch=deconv_2d
---cfg=s8s8u8s32 --batch=deconv_2d
---cfg=s8s8s8s32 --batch=deconv_2d
---cfg=s8s8s32s32 --batch=deconv_2d
---attr=irmode=nearest;oscale=common:2.25;
---attr=irmode=down;oscale=per_oc:2.25;
---cfg=u8s8u8s32 --batch=deconv_2d
---cfg=u8s8s8s32 --batch=deconv_2d
---cfg=u8s8s32s32 --batch=deconv_2d
---cfg=s8s8u8s32 --batch=deconv_2d
---cfg=s8s8s8s32 --batch=deconv_2d
---cfg=s8s8s32s32 --batch=deconv_2d
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/ip/cfg.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/ip/cfg.cpp
index 2f4d1bcc1..15f1540e3 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/ip/cfg.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/ip/cfg.cpp
@@ -80,6 +80,38 @@ const _dt_conf_t conf_u8s8u8s32 = {
     {mkldnn_s32,},
 };
 
+const _dt_conf_t conf_s8s8f32s32 = {
+    {mkldnn_s8,        INT8_MIN,      INT8_MAX,   -5,   5, 0, 1, .35, 0.},
+    {mkldnn_s8,        INT8_MIN,      INT8_MAX,   -5,   5, 0, 1, .35, 0.},
+    {mkldnn_f32, -int_max_exact, int_max_exact,   -8,  32, 0, 1, .35, 0.},
+    {mkldnn_f32, -int_max_exact, int_max_exact, -255, 255, 0, 1, .35, 0.},
+    {mkldnn_s32,},
+};
+
+const _dt_conf_t conf_s8s8s32s32 = {
+    {mkldnn_s8,        INT8_MIN,      INT8_MAX,   -5,   5, 0, 1, .35, 0.},
+    {mkldnn_s8,        INT8_MIN,      INT8_MAX,   -5,   5, 0, 1, .35, 0.},
+    {mkldnn_f32, -int_max_exact, int_max_exact,   -8,  32, 0, 1, .35, 0.},
+    {mkldnn_s32,      INT32_MIN,     INT32_MAX, -255, 255, 0, 1, .35, 0.},
+    {mkldnn_s32,},
+};
+
+const _dt_conf_t conf_s8s8s8s32 = {
+    {mkldnn_s8,        INT8_MIN,      INT8_MAX,   -5,   5, 0, 1, .35, 0.},
+    {mkldnn_s8,        INT8_MIN,      INT8_MAX,   -5,   5, 0, 1, .35, 0.},
+    {mkldnn_f32, -int_max_exact, int_max_exact,   -8,  32, 0, 1, .35, 0.},
+    {mkldnn_s8,        INT8_MIN,      INT8_MAX, -127, 127, 0, 1, .35, 0.},
+    {mkldnn_s32,},
+};
+
+const _dt_conf_t conf_s8s8u8s32 = {
+    {mkldnn_s8,        INT8_MIN,      INT8_MAX,   -5,   5, 0, 1, .35, 0.},
+    {mkldnn_s8,        INT8_MIN,      INT8_MAX,   -5,   5, 0, 1, .35, 0.},
+    {mkldnn_f32, -int_max_exact, int_max_exact,   -8,  32, 0, 1, .35, 0.},
+    {mkldnn_u8,               0,     UINT8_MAX,    0, 255, 0, 1, .35, 0.},
+    {mkldnn_s32,},
+};
+
 const dt_conf_t *str2cfg(const char *str) {
 #define CASE(cfg) \
     if (!strcasecmp(STRINGIFY(cfg), str)) return CONCAT2(conf_,cfg)
@@ -89,6 +121,10 @@ const dt_conf_t *str2cfg(const char *str) {
     CASE(u8s8s32s32);
     CASE(u8s8s8s32);
     CASE(u8s8u8s32);
+    CASE(s8s8f32s32);
+    CASE(s8s8s32s32);
+    CASE(s8s8s8s32);
+    CASE(s8s8u8s32);
 #undef CASE
     []() { SAFE(FAIL, CRIT); return 0; }();
     return (const dt_conf_t *)1;
@@ -102,6 +138,10 @@ const char *cfg2str(const dt_conf_t *cfg) {
     CASE(u8s8s32s32);
     CASE(u8s8s8s32);
     CASE(u8s8u8s32);
+    CASE(s8s8f32s32);
+    CASE(s8s8s32s32);
+    CASE(s8s8s8s32);
+    CASE(s8s8u8s32);
 #undef CASE
     []() { SAFE(FAIL, CRIT); return 0; }();
     return NULL;
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/ip/ip.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/ip/ip.cpp
index eba082c3f..4166161be 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/ip/ip.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/ip/ip.cpp
@@ -263,9 +263,6 @@ int fill_dst(const prb_t *p, dnn_mem_t &mem_dt, dnn_mem_t &mem_fp, res_t *r) {
         ((float *)mem_00)[dst_off_f(p, mb, oc)] = value;
     });
 
-    mem_dt.reorder(mem_00);
-    mem_fp.reorder(mem_dt);
-
     SAFE(mem_dt.reorder(mem_00), WARN);
     SAFE(mem_fp.reorder(mem_dt), WARN);
 
@@ -317,7 +314,6 @@ int doit(const prb_t *p, res_t *r) {
         if (bench_mode & CORR) {
             compute_ref_fwd(p, src_fp, wei_fp, bia_fp, dst_fp);
             dnn_mem_t dst(dst_dt, fp, mkldnn_nc);
-            SAFE(dst.reorder(dst_dt), WARN);
             SAFE(compare_dat(p, DST, dst, dst_fp, r), WARN);
         }
     } else if (p->dir == BWD_D) {
@@ -328,7 +324,6 @@ int doit(const prb_t *p, res_t *r) {
         if (bench_mode & CORR) {
             compute_ref_bwd_d(p, src_fp, wei_fp, dst_fp);
             dnn_mem_t src(src_dt, fp, src_format);
-            SAFE(src.reorder(src_dt), WARN);
             SAFE(compare_dat(p, SRC, src, src_fp, r), WARN);
         }
     } else if (p->dir & FLAG_BWD && p->dir & FLAG_WEI) {
@@ -341,11 +336,9 @@ int doit(const prb_t *p, res_t *r) {
         if (bench_mode & CORR) {
             compute_ref_bwd_w(p, src_fp, wei_fp, bia_fp, dst_fp);
             dnn_mem_t wei(wei_dt, fp, wei_format);
-            SAFE(wei.reorder(wei_dt), WARN);
             if (compare_dat(p, WEI, wei, wei_fp, r) != OK) return FAIL;
             if (p->dir & FLAG_BIA) {
                 dnn_mem_t bia(bia_dt, fp, mkldnn_x);
-                SAFE(bia.reorder(bia_dt), WARN);
                 SAFE(compare_dat(p, BIA, bia, bia_fp, r), WARN);
             }
         }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/mkldnn_debug.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/mkldnn_debug.cpp
index decf41bf3..12a1ffa51 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/mkldnn_debug.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/mkldnn_debug.cpp
@@ -78,6 +78,7 @@ mkldnn_memory_format_t str2fmt(const char *str) {
     CASE(nc);
     CASE(ncw);
     CASE(nwc);
+    CASE(nCw8c);
     CASE(nCw16c);
     CASE(nchw);
     CASE(nhwc);
@@ -96,6 +97,7 @@ mkldnn_memory_format_t str2fmt(const char *str) {
     CASE(oihw);
     CASE(ihwo);
     CASE(hwio);
+    CASE(iohw);
     CASE(hwio_s8s8);
     CASE(dhwio);
     CASE(OIhw8i8o);
@@ -114,6 +116,7 @@ mkldnn_memory_format_t str2fmt(const char *str) {
     CASE(goiw);
     CASE(goihw);
     CASE(hwigo);
+    CASE(giohw);
     CASE(hwigo_s8s8);
     CASE(goiw);
     CASE(gOIw16i16o);
@@ -136,6 +139,7 @@ mkldnn_memory_format_t str2fmt(const char *str) {
     CASE(gOhwi16o);
     CASE(Goihw8g);
     CASE(Goihw16g);
+    CASE(Goihw16g_s8s8);
     CASE(oIhw8i);
     CASE(oIhw16i);
     CASE(ncdhw);
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/mkldnn_memory.hpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/mkldnn_memory.hpp
index 8c6a4c1b8..6a1441c3d 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/mkldnn_memory.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/mkldnn_memory.hpp
@@ -77,10 +77,13 @@ struct dnn_mem_t {
 
     size_t size() const { return mkldnn_memory_primitive_desc_get_size(mpd_); }
 
-    size_t nelems() const {
+    size_t nelems(bool with_padding_dims = false) const {
+        auto dims = with_padding_dims
+            ? md_.layout_desc.blocking.padding_dims
+            : md_.dims;
         size_t n = 1;
         for (int i = 0; i < md_.ndims; ++i)
-            n *= md_.dims[i];
+            n *= dims[i];
         return n;
     }
 
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder.cpp
index a19917b43..235e1af2c 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder.cpp
@@ -258,7 +258,7 @@ int check_reorder(const prb_t *p, res_t *res) {
 
     const reorder_conf_t &r = p->reorder;
     const int ndims = (int)r.dims.size();
-    const int *dims = &r.dims[0];
+    const ptrdiff_t *dims = &r.dims[0];
 
     mkldnn_memory_format_t fmt_ref;
     const bool is_data = fmt2data_kind(r.fmt_in) == DATA;
@@ -313,18 +313,21 @@ int check_reorder(const prb_t *p, res_t *res) {
     SAFE(init_status, WARN);
 
     SAFE(mem_dt_out_fmt_out.reorder(mem_dt_in_fmt_in, mkldnn_attr), WARN);
-    SAFE(mem_dt_out_fmt_ref.reorder(mem_dt_out_fmt_out), WARN);
 
-    /* Step 5: execute benchdnn reorder */
-    SAFE(reorder(p, mem_test_dt_out_fmt_ref, mem_dt_in_fmt_ref, scales), WARN);
-
-    /* Step 6: compare results */
+    /* Step 5: check corrrectness */
     if (bench_mode & CORR) {
+        /* Step 5a: reorder output from mkldnn to ref format using mkldnn */
+        SAFE(mem_dt_out_fmt_ref.reorder(mem_dt_out_fmt_out), WARN);
+
+        /* Step 5b: execute benchdnn reorder */
+        SAFE(reorder(p, mem_test_dt_out_fmt_ref, mem_dt_in_fmt_ref, scales), WARN);
+
+        /* Step 5c: compare benchdnn and mkldnn output */
         SAFE(compare(p, mem_test_dt_out_fmt_ref, mem_dt_out_fmt_ref,
                     scales, count, res), WARN);
     }
 
-    /* Step 7: performance measurement */
+    /* Step 6: performance measurement */
     if (bench_mode & PERF) {
         mkldnn_primitive_desc_t perf_r_pd;
         mkldnn_primitive_t perf_r;
@@ -353,7 +356,7 @@ int check_reorder(const prb_t *p, res_t *res) {
         DNN_SAFE_V(mkldnn_primitive_destroy(perf_r));
     }
 
-    /* Step 8: clean up */
+    /* Step 7: clean up */
 cleanup:
     mkldnn_primitive_attr_destroy(mkldnn_attr);
     zfree(scales);
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder.hpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder.hpp
index 356420559..d509f4bc1 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder.hpp
@@ -28,7 +28,7 @@
 
 namespace reorder {
 
-using dims_t = std::vector<int>;
+using dims_t = std::vector<ptrdiff_t>;
 
 struct dt_conf_s {
     mkldnn_data_type_t dt;
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder_aux.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder_aux.cpp
index a4137a832..51df6ebd9 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder_aux.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/reorder/reorder_aux.cpp
@@ -40,8 +40,8 @@ dims_t str2dims(const char *str) {
 void dims2str(const dims_t &dims, char *buffer) {
     int rem_len = max_dims_len;
     for (size_t d = 0; d < dims.size() - 1; ++d)
-        DPRINT("%dx", dims[d]);
-    DPRINT("%d", dims[dims.size() - 1]);
+        DPRINT("%tdx", dims[d]);
+    DPRINT("%td", dims[dims.size() - 1]);
 }
 
 void prb2str(const prb_t *p, const res_t *res, char *buffer) {
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/bench_rnn.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/bench_rnn.cpp
index 3d43c77b3..875db4c91 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/bench_rnn.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/bench_rnn.cpp
@@ -35,12 +35,23 @@ mkldnn_prop_kind_t prop = mkldnn_forward;
 alg_t alg = VANILLA_RNN;
 mkldnn_rnn_direction_t direction = mkldnn_unidirectional_left2right;
 activation_t activation = RELU;
+const char *perf_template = "perf,%n,%d,,,%-t,,%0t,";
+const dt_conf_t *cfg = conf_f32;
+policy_t scale_policy = NONE;
+attr_t attr;
+bool allow_unimpl = false;
+int mb = 0;
 
 void reset_parameters() {
+    cfg = conf_f32;
+    attr = attr_t();
     prop = mkldnn_forward;
     alg = VANILLA_RNN;
     direction = mkldnn_unidirectional_left2right;
     activation = RELU;
+    scale_policy = NONE;
+    allow_unimpl = false;
+    mb = 0;
 }
 
 int bench(int argc, char **argv, bool main_bench) {
@@ -57,12 +68,28 @@ int bench(int argc, char **argv, bool main_bench) {
                 assert("unknown dir");
         } else if (!strncmp("--alg=", argv[arg], 6))
             alg = str2alg(argv[arg] + 6);
+        else if (!strncmp("--cfg=", argv[arg], 6))
+            cfg = str2cfg(argv[arg] + 6);
+        else if (!strncmp("--attr=", argv[arg], 7))
+            SAFE(str2attr(&attr, argv[arg] + 7), CRIT);
         else if (!strncmp("--direction=", argv[arg], 12))
             direction = str2direction(argv[arg] + 12);
         else if (!strncmp("--activation=", argv[arg], 13))
             activation = str2activation(argv[arg] + 13);
+        else if (!strncmp("--allow-unimpl=", argv[arg], 15))
+            allow_unimpl = str2bool(argv[arg] + 15);
+        else if (!strncmp("--scaling=", argv[arg], 10))
+            scale_policy = str2policy(argv[arg] + 10);
         else if (!strncmp("--reset", argv[arg], 7))
             reset_parameters();
+        else if (!strncmp("--perf-template=", argv[arg], 16))
+            perf_template = argv[arg] + 16;
+        else if (!strncmp("--mb=", argv[arg], 5))
+            mb = atoi(argv[arg] + 5);
+        else if (!strncmp("-v", argv[arg], 2))
+            verbose = atoi(argv[arg] + 2);
+        else if (!strncmp("--verbose=", argv[arg], 10))
+            verbose = atoi(argv[arg] + 10);
         else {
             rnn_desc_t d;
             if (str2desc(&d, argv[arg]) == FAIL) {
@@ -70,6 +97,20 @@ int bench(int argc, char **argv, bool main_bench) {
                         argv[arg]);
                 exit(2);
             }
+            if (cfg != conf_f32 && alg != VANILLA_LSTM) {
+                fprintf(stderr,
+                        "driver: configuration ``%s` is supported for LSTM "
+                        "cell only, exiting...\n",
+                        cfg2str(cfg));
+                exit(2);
+            }
+            if (cfg != conf_f32 && scale_policy == NONE) {
+                fprintf(stderr,
+                        "driver: configuration ``%s` requires scale policy to "
+                        "be COMMON or PER_OC, exiting...\n",
+                        cfg2str(cfg));
+                exit(2);
+            }
             check(&d);
         }
     }
@@ -77,17 +118,17 @@ int bench(int argc, char **argv, bool main_bench) {
 }
 
 void check(rnn_desc_t *d) {
-    const rnn_prb_t p(*d, conf_f32, prop, alg, direction, activation);
+    const rnn_prb_t p(*d, cfg, prop, alg, direction, activation, attr,
+        scale_policy, mb);
     res_t res{};
     char pstr[max_prb_len];
-    prb2str(&p, &res, pstr);
 
     int status = rnn::doit(&p, &res);
 
     prb2str(&p, &res, pstr);
     bool want_perf_report = false;
 
-    parse_result(res, want_perf_report, false, status, pstr);
+    parse_result(res, want_perf_report, allow_unimpl, status, pstr);
 
     if (bench_mode & PERF)
         perf_report(&p, &res, pstr);
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/cfg.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/cfg.cpp
index 4680572c5..7d93f0662 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/cfg.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/cfg.cpp
@@ -26,66 +26,70 @@ states,
 weights_input,
 weights_states,
 bias,
-dst_last_layer,
 dst_last_iteration,
+dst_last_layer,
 dst_diff_input,
 dst_diff_states,
 dst_diff_weights_input,
 dst_diff_weights_states,
 dst_diff_bias,
-diff_last_layer,
 diff_last_iteration,
-params: {data_type, min, max, f_min,* f_max, f_base, f_step, f_sparsity, eps}
+diff_last_layer,
+params: {data_type, min, max, f_min, f_max, f_mean, f_var, eps}
 */
 
 const int int_max_exact = 1 << 24;
 const _dt_conf_t conf_f32 = {
-#if 0
-        { mkldnn_f32, -int_max_exact, int_max_exact,  1,  1, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  1,  1, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  1,  1, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  1,  1, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  1,  1, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  777,  777, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  777,  777, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  777,  777, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  777,  777, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  777,  777, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  777,  777, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  777,  777, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  2,  2, 0, 1, .25, 1e-5 },
-        { mkldnn_f32, -int_max_exact, int_max_exact,  2,  2, 0, 1, .25, 1e-5 },
-#elif 0
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -4, 4, 0, 1, .25, 1e-5 },
-#else
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-    { mkldnn_f32, -int_max_exact, int_max_exact, -64, 64, 0, 1, 1., 1e-5 },
-#endif
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //input
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //states
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //weights_input
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //weights_states
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //bias
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //dst_last_iteration
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //dst_last_layer
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //dst_diff_input
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //dst_diff_states
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //dst_diff_weights_input
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //dst_diff_weights_states
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //dst_diff_bias
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //diff_last_iteration
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //diff_last_layer
+};
+const _dt_conf_t conf_u8u8u8u8 = {
+    { mkldnn_u8, 0, UINT8_MAX, 0, 127, 64.f, 5.f, 0. }, //input
+    { mkldnn_u8, 0, UINT8_MAX, 0, 127, 64.f, 5.f, 0. }, //states
+    { mkldnn_s8, INT8_MIN, INT8_MAX, -63, 63, 0.f, 10.f, 0. }, //weights_input
+    { mkldnn_s8, INT8_MIN, INT8_MAX, -63, 63, 0.f, 10.f, 0. }, //weights_states
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.01f, 0. }, //bias
+    { mkldnn_u8, 0, UINT8_MAX, 0, 127, 64.f, 10.f, 0. }, //dst_iter
+    { mkldnn_u8, 0, UINT8_MAX, 0, 127, 64.f, 10.f, 0. }, //dst_layer
+};
+const _dt_conf_t conf_u8u8u8f32 = {
+    { mkldnn_u8, 0, UINT8_MAX, 0, 127, 64.f, 5.f, 0. }, //input
+    { mkldnn_u8, 0, UINT8_MAX, 0, 127, 64.f, 5.f, 0. }, //states
+    { mkldnn_s8, INT8_MIN, INT8_MAX, -63, 63, 0.f, 10.f, 0. }, //weights_input
+    { mkldnn_s8, INT8_MIN, INT8_MAX, -63, 63, 0.f, 10.f, 0. }, //weights_states
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.01f, 0. }, //bias
+    { mkldnn_u8, 0, UINT8_MAX, 0, 127, 64.f, 10.f, 0. }, //dst_iter
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.001f, 1e-5 }, //dst_last_layer
+};
+const _dt_conf_t conf_f32u8f32u8 = {
+    { mkldnn_u8, 0, UINT8_MAX, 0, 127, 64.f, 5.f, 0. }, //input
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.05f, 1e-5 }, //states
+    { mkldnn_s8, INT8_MIN, INT8_MAX, -63, 63, 0.f, 10.f, 0. }, //weights_input
+    { mkldnn_s8, INT8_MIN, INT8_MAX, -63, 63, 0.f, 10.f, 0. }, //weights_states
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.01f, 0. }, //bias
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.01f, 1e-5 }, //dst_iter
+    { mkldnn_u8, 0, UINT8_MAX, 0, 127, 64.f, 10.f, 0. }, //dst_layer
+};
+const _dt_conf_t conf_f32u8f32f32 = {
+    { mkldnn_u8, 0, UINT8_MAX, 0, 127, 64.f, 5.f, 0. }, //input
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.05f, 1e-5 }, //states
+    { mkldnn_s8, INT8_MIN, INT8_MAX, -63, 63, 0.f, 10.f, 0. }, //weights_input
+    { mkldnn_s8, INT8_MIN, INT8_MAX, -63, 63, 0.f, 10.f, 0. }, //weights_states
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.01f, 0. }, //bias
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.01f, 1e-5 }, //dst_iter
+    { mkldnn_f32, -int_max_exact, int_max_exact, -1, 1, 0.f, 0.01f, 1e-5 }, //dst_last_layer
 };
 
 const dt_conf_t *str2cfg(const char *str) {
@@ -93,6 +97,10 @@ const dt_conf_t *str2cfg(const char *str) {
     if (!strcasecmp(STRINGIFY(cfg), str)) \
     return CONCAT2(conf_, cfg)
     CASE(f32);
+    CASE(u8u8u8u8);
+    CASE(u8u8u8f32);
+    CASE(f32u8f32u8);
+    CASE(f32u8f32f32);
 #undef CASE
     []() {
         SAFE(FAIL, CRIT);
@@ -106,6 +114,10 @@ const char *cfg2str(const dt_conf_t *cfg) {
     if (cfg == CONCAT2(conf_, _cfg)) \
     return STRINGIFY(_cfg)
     CASE(f32);
+    CASE(u8u8u8u8);
+    CASE(u8u8u8f32);
+    CASE(f32u8f32u8);
+    CASE(f32u8f32f32);
 #undef CASE
     []() {
         SAFE(FAIL, CRIT);
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/perf_report.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/perf_report.cpp
index ddecb23ca..334568e2b 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/perf_report.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/perf_report.cpp
@@ -30,20 +30,70 @@ namespace rnn {
 void perf_report(const rnn_prb_t *p, const res_t *r, const char *pstr) {
     const auto &t = r->timer;
     const int max_len = 400;
-    char buffer[max_len], *buf = buffer;
     int rem_len = max_len - 1;
+    char buffer[max_len], *buf = buffer;
 
-    #   define DPRINT(...) do { \
+#   define DPRINT(...) do { \
         int l = snprintf(buf, rem_len, __VA_ARGS__); \
         buf += l; rem_len -= l; \
     } while(0)
 
-    DPRINT("perf,");
-    DPRINT("%s,", pstr);
-    DPRINT("time(ms):");
-    DPRINT("min=%g,", t.ms(benchdnn_timer_t::min));
-    DPRINT("max=%g,", t.ms(benchdnn_timer_t::max));
-    DPRINT("avg=%g", t.ms(benchdnn_timer_t::avg));
+    auto modifier2mode = [](char c) {
+        if (c == '-') return benchdnn_timer_t::min;
+        if (c == '0') return benchdnn_timer_t::avg;
+        if (c == '+') return benchdnn_timer_t::max;
+        return benchdnn_timer_t::min;
+    };
+
+    auto modifier2unit = [](char c) {
+        if (c == 'K') return 1e3;
+        if (c == 'M') return 1e6;
+        if (c == 'G') return 1e9;
+        return 1e0;
+    };
+
+    const char *pt = perf_template;
+    char c;
+
+    while ((c = *pt++) != '\0') {
+        if (c != '%') { *buf++ = c; rem_len--; continue; }
+
+        c = *pt++;
+
+        benchdnn_timer_t::mode_t mode = benchdnn_timer_t::min;
+        double unit = 1e0;
+
+        if (c == '-' || c == '0' || c == '+') {
+            mode = modifier2mode(c);
+            c = *pt++;
+        }
+
+        if (c == 'K' || c == 'M' || c == 'G') {
+            unit = modifier2unit(c);
+            c = *pt++;
+        }
+        // cellkind:activation:direction:l d mb
+        if (c == 'd') DPRINT("%s_%s_%s_l%dd%dt%dmb%d_slc%dsic%ddic%d",
+                             alg2str(p->alg), activation2str(p->activation), direction2str(p->direction),
+                             p->n_layer, p->n_directions(), p->n_iter, p->mb, p->slc, p->sic, p->dic);
+        else if (c == 'D')
+            DPRINT("%s", pstr);
+        else if (c == 'n')
+            DPRINT("%s", p->name);
+        else if (c == 'z')
+            DPRINT("%s", prop2str(p->prop));
+        else if (c == 'F')
+            DPRINT("%g", t.ticks(mode) / t.ms(mode) / unit * 1e3);
+        else if (c == 't')
+            DPRINT("%g", t.ms(mode) / unit);
+        else if (c == 'c')
+            DPRINT("%g", t.ticks(mode) / unit);
+        else
+            []() { SAFE(FAIL, CRIT); return 0; }();
+    }
+
+    *buf = '\0';
+    assert(rem_len >= 0);
 
 #   undef DPRINT
     print(0, "%s\n", buffer);
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/ref_rnn.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/ref_rnn.cpp
index ed668c1b6..9bb9a1f99 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/ref_rnn.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/ref_rnn.cpp
@@ -52,8 +52,8 @@ float activation(activation_t f, float x, bool is_fwd = true) {
     float result = 0;
     switch (f) {
     case RELU: result = is_fwd ? relu(x) : drelu(x); break;
-    case LOGISTIC: result = is_fwd ? logistic(x) : dlogistic(x); break;
-    case TANH: result = is_fwd ? tanhf(x) : dtanhf(x); break;
+    case LOGISTIC: result = is_fwd ? logistic(x) : x_m_square(x); break;
+    case TANH: result = is_fwd ? tanhf(x) : one_m_square(x); break;
     default: assert(!"unknown activation");
     }
     return result;
@@ -164,8 +164,8 @@ void gru_lbr_fwd(int sic, int slc, int dic, int wc, int batch, int n_gates,
 }
 
 // w = [weights_layer | weights_iter] : with order f, i , o, \bar(c)
-void lstm_fwd(int sic, int slc, int dic, int wc, int batch, int n_gates,
-        float *dst_iter_h_, float *c_dst_, float *gates_,
+void lstm_fwd(const rnn_prb_t *p, int sic, int slc, int dic, int wc, int batch,
+        int n_gates, float *dst_iter_h_, float *c_dst_, float *gates_,
         const float *weights_layer_, const float *weights_iter_h_,
         const float *bias_, const float *src_layer_, const float *src_iter_h_,
         const float *src_iter_c_) {
@@ -182,34 +182,64 @@ void lstm_fwd(int sic, int slc, int dic, int wc, int batch, int n_gates,
 
     gemm("C", "N", "N", batch, n_gates * dic, slc, 1.0, src_layer_, wc,
             weights_layer_, n_gates * dic, 0.0, gates_, n_gates * dic);
-    gemm("C", "N", "N", batch, n_gates * dic, sic,1.0, src_iter_h_, wc,
+    gemm("C", "N", "N", batch, n_gates * dic, sic, 1.0, src_iter_h_, wc,
             weights_iter_h_, n_gates * dic, 1.0, gates_, n_gates * dic);
 
+    auto maybe_deq_w = [&](float g, int oc) {
+        if (p->cfg == conf_f32)
+            return g;
+        float scale = 1.;
+        if (p->scale_policy == PER_OC)
+            scale = p->wei_oc_scales[oc];
+        else if (p->scale_policy == COMMON)
+            scale = p->wei_scale;
+        scale *= p->data_scale;
+        return g / scale;
+    };
+
     // add bias
     for (int i = 0; i < batch; i++)
         for (int j = 0; j < n_gates; j++)
             for (int k = 0; k < dic; k++) {
-                gates(i, j, k) += bias(j, k);
+                gates(i, j, k)
+                        = maybe_deq_w(gates(i, j, k), j * dic + k) + bias(j, k);
             }
 
     // run the eltwise
     lstm_activation(dic, n_gates, batch, gates_);
 
+    auto maybe_q_d = [&](float h) {
+        if (p->cfg == conf_f32)
+            return h;
+        float fp = p->data_scale * h;
+        using R = attr_t::round_mode_t;
+        switch (p->attr.irmode) {
+        case R::DOWN: fp = floorf(fp); break;
+        case R::NEAREST: fp = nearbyintf(fp); break;
+        default: assert(!"unkown round mode");
+        }
+        if (fp + p->data_shift > p->cfg[input].max)
+            fp = p->cfg[input].max - p->data_shift;
+        if (fp + p->data_shift < p->cfg[input].min)
+            fp = p->cfg[input].min - p->data_shift;
+        return fp;
+    };
+
     // compute C_t_l and H_t_l
     for (int i = 0; i < batch; i++)
         for (int j = 0; j < dic; j++) {
             float tmp = gates(i, ohf, j) * src_iter_c(i, j)
                     + gates(i, ohi, j) * gates(i, ohc, j);
             c_dst(i, j) = tmp;
-            h_dst(i, j) = gates(i, oho, j) * tanhf(tmp);
+            h_dst(i, j) = maybe_q_d(gates(i, oho, j) * tanhf(tmp));
         }
 }
 
-void rnn_cell_fwd(alg_t alg, activation_t f, int sic, int slc, int dic, int wc,
-        int batch, int n_gates, float *dst_iter_h, float *dst_iter_c,
-        float *gates, const float *weights_layer, const float *weights_iter,
-        const float *bias, const float *src_layer, const float *src_iter_h,
-        const float *src_iter_c, float *ws_local_) {
+void rnn_cell_fwd(const rnn_prb_t *p, alg_t alg, activation_t f, int sic,
+        int slc, int dic, int wc, int batch, int n_gates, float *dst_iter_h,
+        float *dst_iter_c, float *gates, const float *weights_layer,
+        const float *weights_iter, const float *bias, const float *src_layer,
+        const float *src_iter_h, const float *src_iter_c, float *ws_local_) {
     switch (alg) {
     case VANILLA_GRU:
         gru_fwd(sic, slc, dic, wc, batch, n_gates, dst_iter_h, gates,
@@ -221,7 +251,7 @@ void rnn_cell_fwd(alg_t alg, activation_t f, int sic, int slc, int dic, int wc,
                 ws_local_);
         break;
     case VANILLA_LSTM:
-        lstm_fwd(sic, slc, dic, wc, batch, n_gates, dst_iter_h, dst_iter_c,
+        lstm_fwd(p, sic, slc, dic, wc, batch, n_gates, dst_iter_h, dst_iter_c,
                 gates, weights_layer, weights_iter, bias, src_layer, src_iter_h,
                 src_iter_c);
         break;
@@ -232,6 +262,7 @@ void rnn_cell_fwd(alg_t alg, activation_t f, int sic, int slc, int dic, int wc,
     default: break;
     }
 }
+
 void copy(int dimc, int dimr, int ld_src, int ld_dst, const float *src_,
         float *dst_, rnn_action_t action = action_copy) {
     AOC<const float> src(src_, dimc, ld_src);
@@ -245,86 +276,212 @@ void copy(int dimc, int dimr, int ld_src, int ld_dst, const float *src_,
     });
 }
 
-/* FIXME: separate copy_init ???
- * fwd: ws_states = n_states
- * bwd: ws_states = n_states + 1
- *
- * lstm example:
+void shift(int dimc, int dimr, int ld_src, float *src_, float shift,
+        bool round = false, const rnn_prb_t *p = nullptr) {
+    AOC<float> src(src_, dimc, ld_src);
+    mkldnn::impl::parallel_nd(dimc, [&](int i) {
+        for (int j = 0; j < dimr; j++) {
+            float fp = src(i, j) + shift;
+            if (round) {
+                using R = attr_t::round_mode_t;
+                switch (p->attr.irmode) {
+                case R::DOWN: fp = floorf(fp); break;
+                case R::NEAREST: fp = nearbyintf(fp); break;
+                default: assert(!"unkown round mode");
+                }
+                if (fp > UINT8_MAX)
+                    fp = UINT8_MAX;
+                if (fp < 0)
+                    fp = 0;
+            }
+            src(i, j) = fp;
+        }
+    });
+}
+
+void scale(int dimc, int dimr, int ld_src, float *src_, float scale,
+        bool round = false, const rnn_prb_t *p = nullptr) {
+    AOC<float> src(src_, dimc, ld_src);
+    mkldnn::impl::parallel_nd(dimc, [&](int i) {
+        for (int j = 0; j < dimr; j++) {
+            float fp = src(i, j) * scale;
+            if (round) {
+                using R = attr_t::round_mode_t;
+                switch (p->attr.irmode) {
+                case R::DOWN: fp = floorf(fp); break;
+                case R::NEAREST: fp = nearbyintf(fp); break;
+                default: assert(!"unkown round mode");
+                }
+            }
+            src(i, j) = fp;
+        }
+    });
+}
+
+/* lstm example:
  * fwd: ws keeps {h, c} for every cell
- * bwd: wsb keeps {dh, dc, dx} for every cell
  */
-void copy_init(alg_t alg, int sic, int slc, int dic, int dlc, int wc, int batch,
-        int n_layer, int n_iter, int n_states, float *ws_,
+void copy_init_fwd(const rnn_prb_t *p, alg_t alg, int sic, int slc, int dic,
+        int dlc, int wc, int batch, int n_layer, int n_iter, int n_dir,
+        int n_states, float *ws_, const float *src_layer_,
+        const float *firstit_states_, rnn_iter_direction_t iter_dir,
+        rnn_layer_direction_t lay_dir, int dir_val) {
+    AOC<float> ws(ws_, n_layer + 2, n_dir, n_iter + 2, n_states, batch * wc);
+    AOC<const float> src_layer(src_layer_, n_iter, batch * slc);
+    AOC<const float> firstit_states(
+            firstit_states_, n_layer, n_dir, n_states, batch * sic);
+
+    int lay_dest = (lay_dir == bottom2top) ? 0 : n_layer + 1;
+    int it_dest = (iter_dir == left2right) ? 0 : n_iter + 1;
+    bool is_int8 = p->cfg[input].dt == mkldnn_u8;
+
+    // Copy input
+    for (int it = 0; it < n_iter; it++) {
+        copy(batch, slc, slc, wc, &src_layer(it, 0),
+                &ws(lay_dest, dir_val, it + 1, H, 0));
+        if (p->cfg[input].dt == mkldnn_u8)
+            // shift u8 input to s8 to avoid compensation in gemm
+            shift(batch, slc, wc, &ws(lay_dest, dir_val, it + 1, H, 0),
+                    -1. * p->data_shift);
+    }
+
+    // Copy states
+    for (int lay = 0; lay < n_layer; lay++) {
+        copy(batch, sic, sic, wc, &firstit_states(lay, dir_val, H, 0),
+                &ws(lay + 1, dir_val, it_dest, H, 0));
+        if (p->cfg[states].dt == mkldnn_u8)
+            shift(batch, sic, wc, &ws(lay + 1, dir_val, it_dest, H, 0),
+                    -1. * p->data_shift);
+        else if (p->cfg[states].dt == mkldnn_f32 && is_int8) {
+            // quantize to s8
+            scale(batch, sic, wc, &ws(lay + 1, dir_val, it_dest, H, 0),
+                    p->data_scale, true, p);
+        }
+
+        if (alg == VANILLA_LSTM) {
+            copy(batch, sic, sic, wc, &firstit_states(lay, dir_val, C, 0),
+                    &ws(lay + 1, dir_val, it_dest, C, 0));
+            if (p->cfg[states].dt == mkldnn_u8) {
+                // dequantize to f32
+                shift(batch, sic, wc, &ws(lay + 1, dir_val, it_dest, C, 0),
+                        -1. * p->data_shift);
+                scale(batch, sic, wc, &ws(lay + 1, dir_val, it_dest, C, 0),
+                        1. / p->data_scale);
+            }
+        }
+    }
+}
+
+/* lstm example:
+ * bwd: wsb keeps {dh, dc, dx} for every cell
+*/
+void copy_init_bwd(alg_t alg, int sic, int slc, int dic, int dlc, int wc,
+        int batch, int n_layer, int n_iter, int n_dir, int n_states, float *ws_,
         const float *src_layer_, const float *firstit_states_,
         rnn_iter_direction_t iter_dir, rnn_layer_direction_t lay_dir,
-        int dir_val, int n_dir, bool is_bwd = false, bool is_concat = false) {
+        int dir_val, bool is_concat = false) {
     AOC<float> ws(
-            ws_, n_layer + 2, n_dir, n_iter + 2, n_states + is_bwd, batch, wc);
-    auto c_stride = is_bwd ? (is_concat ? 2 * dlc : dlc) : slc;
+            ws_, n_layer + 2, n_dir, n_iter + 2, n_states + 1, batch * wc);
+    auto c_stride = is_concat ? 2 * dlc : dlc;
     AOC<const float> src_layer(src_layer_, n_iter, batch * c_stride);
-    AOC<const float> firstit_states(firstit_states_, n_layer, n_dir, n_states,
-            batch, is_bwd ? dic : sic);
+    AOC<const float> firstit_states(
+            firstit_states_, n_layer, n_dir, n_states, batch * dic);
 
     int lay_dest = (lay_dir == bottom2top) ? 0 : n_layer + 1;
     int it_dest = (iter_dir == left2right) ? 0 : n_iter + 1;
 
-    if (!is_bwd) {
-        for (int it = 0; it < n_iter; it++)
-            copy(batch, slc, slc, wc, &src_layer(it, 0),
-                    &ws(lay_dest, dir_val, it + 1, H, 0, 0));
-
-        for (int lay = 0; lay < n_layer; lay++) {
-            copy(batch, sic, sic, wc, &firstit_states(lay, dir_val, H, 0, 0),
-                    &ws(lay + 1, dir_val, it_dest, H, 0, 0));
-            if (alg == VANILLA_LSTM) {
-                copy(batch, sic, sic, wc,
-                        &firstit_states(lay, dir_val, C, 0, 0),
-                        &ws(lay + 1, dir_val, it_dest, C, 0, 0));
+    for (int it = 0; it < n_iter; it++)
+        copy(batch, dic, c_stride, wc,
+                &src_layer(it, dir_val * is_concat * dlc),
+                &ws(lay_dest, dir_val, it + 1, n_states, 0));
+
+    for (int lay = 0; lay < n_layer; lay++) {
+        copy(batch, dic, dic, wc, &firstit_states(lay, dir_val, H, 0),
+                &ws(lay + 1, dir_val, it_dest, H, 0));
+        if (alg == VANILLA_LSTM) {
+            copy(batch, dic, dic, wc, &firstit_states(lay, dir_val, C, 0),
+                    &ws(lay + 1, dir_val, it_dest, C, 0));
+        }
+    }
+}
+
+void copy_res_fwd(const rnn_prb_t *p, alg_t alg, int sic, int slc, int dic,
+        int dlc, int wc, int batch, int n_layer, int n_iter, int n_dir,
+        int n_states, float *lastit_states_, float *lastlay_states_,
+        const float *ws_, rnn_iter_direction_t iter_dir,
+        rnn_layer_direction_t lay_dir, int dir_val, rnn_action_t action,
+        bool is_concat = false) {
+    int lastlay_c = is_concat ? 2 * dlc : dlc;
+    AOC<float> lastit_states(
+            lastit_states_, n_layer, n_dir, n_states, batch, dic);
+    AOC<float> lastlay_states(lastlay_states_, n_iter, batch, lastlay_c);
+    AOC<const float> ws(
+            ws_, n_layer + 2, n_dir, n_iter + 2, n_states, batch, wc);
+
+    // Copy states layer
+    for (int it = 0; it < n_iter; it++) {
+        for (int nb = 0; nb < batch; nb++) {
+            auto from = &ws(n_layer, dir_val, it + 1, H, nb, 0);
+            auto to = &lastlay_states(
+                    it, nb, action == action_concat ? dlc : 0);
+            copy(1, dlc, wc, lastlay_c, from, to, action);
+
+            if (p->cfg[dst_last_layer].dt == mkldnn_u8) {
+                // shift s8 internal ws to u8
+                shift(1, dlc, lastlay_c, to, p->data_shift);
+            } else {
+                // dequantize to f32
+                scale(1, dlc, lastlay_c, to, 1. / p->data_scale);
             }
         }
-    } else {
-        for (int it = 0; it < n_iter; it++)
-            copy(batch, dic, c_stride, wc,
-                    &src_layer(it, dir_val * is_concat * dlc),
-                    &ws(lay_dest, dir_val, it + 1, n_states, 0, 0));
-
-        for (int lay = 0; lay < n_layer; lay++) {
-            copy(batch, dic, dic, wc, &firstit_states(lay, dir_val, H, 0, 0),
-                    &ws(lay + 1, dir_val, it_dest, H, 0, 0));
-            if (alg == VANILLA_LSTM) {
-                copy(batch, dic, dic, wc,
-                        &firstit_states(lay, dir_val, C, 0, 0),
-                        &ws(lay + 1, dir_val, it_dest, C, 0, 0));
+    }
+
+    int it_source = (iter_dir == left2right) ? n_iter : 1;
+
+    // Copy states iteration
+    for (int lay = 0; lay < n_layer; lay++) {
+        if (alg == VANILLA_LSTM) {
+            copy(batch, dic, wc, dic, &ws(lay + 1, dir_val, it_source, C, 0, 0),
+                    &lastit_states(lay, dir_val, C, 0, 0));
+            if (p->cfg[dst_last_iteration].dt == mkldnn_u8) {
+                // quantize internal f32 ws to u8
+                scale(batch, dic, dic, &lastit_states(lay, dir_val, C, 0, 0),
+                        p->data_scale);
+                shift(batch, dic, dic, &lastit_states(lay, dir_val, C, 0, 0),
+                        p->data_shift, true, p);
             }
         }
+        copy(batch, dic, wc, dic, &ws(lay + 1, dir_val, it_source, H, 0, 0),
+                &lastit_states(lay, dir_val, H, 0, 0));
+        if (p->cfg[dst_last_iteration].dt == mkldnn_u8) {
+            // shift s8 internal ws to u8
+            shift(batch, dic, dic, &lastit_states(lay, dir_val, H, 0, 0),
+                    p->data_shift);
+        } else {
+            // dequantize to f32
+            scale(batch, dic, dic, &lastit_states(lay, dir_val, H, 0, 0),
+                    1. / p->data_scale);
+        }
     }
 }
 
-void copy_res(alg_t alg, int sic, int slc, int dic, int dlc, int wc, int batch,
-        int n_layer, int n_iter, int n_states, float *lastit_states_,
-        float *lastlay_states_, const float *ws_,
-        mkldnn_rnn_direction_t direction, rnn_iter_direction_t iter_dir,
-        rnn_layer_direction_t lay_dir, int dir_val, int n_dir,
-        rnn_action_t action, bool is_bwd = false) {
-    int lastlay_c = is_bwd ?
-            slc :
-            (direction == mkldnn_bidirectional_concat) * dlc + dlc;
-    int lastiter_c = is_bwd ? sic : dic;
+void copy_res_bwd(alg_t alg, int sic, int slc, int dic, int dlc, int wc,
+        int batch, int n_layer, int n_iter, int n_dir, int n_states,
+        float *lastit_states_, float *lastlay_states_, const float *ws_,
+        rnn_iter_direction_t iter_dir, rnn_layer_direction_t lay_dir,
+        int dir_val, rnn_action_t action) {
     AOC<float> lastit_states(
-            lastit_states_, n_layer, n_dir, n_states, batch, lastiter_c);
-    AOC<float> lastlay_states(lastlay_states_, n_iter, batch, lastlay_c);
+            lastit_states_, n_layer, n_dir, n_states, batch, sic);
+    AOC<float> lastlay_states(lastlay_states_, n_iter, batch, slc);
     AOC<const float> ws(
-            ws_, n_layer + 2, n_dir, n_iter + 2, n_states + is_bwd, batch, wc);
+            ws_, n_layer + 2, n_dir, n_iter + 2, n_states + 1, batch, wc);
     for (int it = 0; it < n_iter; it++) {
         for (int nb = 0; nb < batch; nb++) {
             // copy H to last layer states
-            int lay = is_bwd ? 1 : n_layer;
-            int state = is_bwd ? n_states : H;
-            auto from = &ws(lay, dir_val, it + 1, state, nb, 0);
-            auto to = &lastlay_states(
-                    it, nb, (action == action_concat) && (!is_bwd) ? dlc : 0);
+            auto from = &ws(1, dir_val, it + 1, n_states, nb, 0);
+            auto to = &lastlay_states(it, nb, 0);
 
-            copy(1, is_bwd ?  slc : dlc, wc, lastlay_c, from, to, action);
+            copy(1, slc, wc, slc, from, to, action);
         }
     }
 
@@ -332,12 +489,10 @@ void copy_res(alg_t alg, int sic, int slc, int dic, int dlc, int wc, int batch,
 
     for (int lay = 0; lay < n_layer; lay++) {
         if (alg == VANILLA_LSTM) {
-            copy(batch, lastiter_c, wc, lastiter_c,
-                    &ws(lay + 1, dir_val, it_source, C, 0, 0),
+            copy(batch, sic, wc, sic, &ws(lay + 1, dir_val, it_source, C, 0, 0),
                     &lastit_states(lay, dir_val, C, 0, 0));
         }
-        copy(batch, lastiter_c, wc, lastiter_c,
-                &ws(lay + 1, dir_val, it_source, H, 0, 0),
+        copy(batch, sic, wc, sic, &ws(lay + 1, dir_val, it_source, H, 0, 0),
                 &lastit_states(lay, dir_val, H, 0, 0));
     }
 }
@@ -355,6 +510,7 @@ void rnn_linear_fwd(const rnn_prb_t *p, mkldnn_rnn_direction_t direction,
     const int dlc = p->dlc;
     const int wc = max(sic, max(slc, dic));
     bool is_lbr = p->alg == LBR_GRU;
+    bool is_concat = direction == mkldnn_bidirectional_concat;
 
     const int batch = p->mb;
     const int n_gates = p->n_gates();
@@ -380,8 +536,9 @@ void rnn_linear_fwd(const rnn_prb_t *p, mkldnn_rnn_direction_t direction,
         // we first need to copy the initial states and input into ws
         // it simplifies the logic in the following code
         print(80, "rnn_linear_fwd: call copy_init dir_val = %d\n", dir_val);
-        copy_init(alg, sic, slc, dic, dlc, wc, batch, n_layer, n_iter, n_states, ws_,
-                src_layer_, src_iter_, iter_dir, lay_dir, dir_val, n_dir);
+        copy_init_fwd(p, alg, sic, slc, dic, dlc, wc, batch, n_layer, n_iter,
+                n_dir, n_states, ws_, src_layer_, src_iter_, iter_dir, lay_dir,
+                dir_val);
 
         // We run the grid of computation
         for (int il = 0; il < n_layer; il++) {
@@ -390,7 +547,7 @@ void rnn_linear_fwd(const rnn_prb_t *p, mkldnn_rnn_direction_t direction,
                 int iter = (iter_dir == left2right) ? it + 1 : n_iter - it;
                 int prev_iter = (iter_dir == left2right) ? iter - 1 : iter + 1;
                 int lay = il + 1;
-                rnn_cell_fwd(alg, f, sic, slc, dic, wc, batch, n_gates,
+                rnn_cell_fwd(p, alg, f, sic, slc, dic, wc, batch, n_gates,
                         &ws(lay, dir_val, iter, H, 0, 0),
                         &ws(lay, dir_val, iter, C, 0, 0),
                         &gates(lay - 1, dir_val, iter - 1, 0, 0, 0),
@@ -399,15 +556,14 @@ void rnn_linear_fwd(const rnn_prb_t *p, mkldnn_rnn_direction_t direction,
                         &bias(lay - 1, dir_val, 0),
                         &ws(lay - 1, dir_val, iter, H, 0, 0),
                         &ws(lay, dir_val, prev_iter, H, 0, 0),
-                        &ws(lay, dir_val, prev_iter, C, 0, 0),
-                        ws_local_);
+                        &ws(lay, dir_val, prev_iter, C, 0, 0), ws_local_);
             }
         }
 
         // Finally we copy the results to the result buffers
-        copy_res(alg, sic, slc, dic, dlc, wc, batch, n_layer, n_iter, n_states,
-                dst_iter_, dst_layer_, ws_, direction, iter_dir, lay_dir,
-                dir_val, n_dir, action);
+        copy_res_fwd(p, alg, sic, slc, dic, dlc, wc, batch, n_layer, n_iter,
+                n_dir, n_states, dst_iter_, dst_layer_, ws_, iter_dir, lay_dir,
+                dir_val, action, is_concat);
     };
 
     switch (direction) {
@@ -533,7 +689,7 @@ void lstm_bwd(alg_t alg, int sic, int slc, int dic, int wc, int batch,
             float dh = diff_dst_layer(ib, ih) + diff_dst_iter_h(ib, ih);
             float c = dst_iter_c(ib, ih);
             float dho = tanhf(c) * dh;
-            b_gates(ib, oho, ih) = dlogistic(ho) * dho;
+            b_gates(ib, oho, ih) = x_m_square(ho) * dho;
 
             float dc_next = diff_dst_iter_c(ib, ih);
             float dc = ho * dh * dtanhf(c) + dc_next;
@@ -541,13 +697,13 @@ void lstm_bwd(alg_t alg, int sic, int slc, int dic, int wc, int batch,
 
             float c_old = src_iter_c(ib, ih);
             float dhf = c_old * dc;
-            b_gates(ib, ohf, ih) = dlogistic(hf) * dhf;
+            b_gates(ib, ohf, ih) = x_m_square(hf) * dhf;
 
             float dhi = hc * dc;
-            b_gates(ib, ohi, ih) = dlogistic(hi) * dhi;
+            b_gates(ib, ohi, ih) = x_m_square(hi) * dhi;
 
             float dhc = hi * dc;
-            b_gates(ib, ohc, ih) = dtanhf(hc) * dhc;
+            b_gates(ib, ohc, ih) = one_m_square(hc) * dhc;
         }
 
     gemm("C", "T", "N", sic, n_gates * dic, batch, 1.0, src_iter_h_, wc, b_gates_,
@@ -592,10 +748,10 @@ void gru_bwd(alg_t alg, activation_t f, int sic, int slc, int dic, int wc,
     AOC<float> dhr(dhr_, batch, wc);
     AOC<float> hr(hr_, batch, wc);
 
-// dc = (1 - u) * dh; dc^ = dtanhf(c) * dc;
-// du = (h - u) * dh; du^ = dlogistic(u) * du;
+// dc = (1 - u) * dh; dc^ = one_m_square(c) * dc;
+// du = (h - u) * dh; du^ = x_m_square(u) * du;
 // dhr = Wc dc^;
-// dr = h * dhr; dr^ = dlogistic(r) * dr;
+// dr = h * dhr; dr^ = x_m_square(r) * dr;
     const int ohu = 0;
     const int ohr = 1;
     const int ohc = 2;
@@ -607,12 +763,12 @@ void gru_bwd(alg_t alg, activation_t f, int sic, int slc, int dic, int wc,
             float dh = diff_dst_layer(ib, ih) + diff_dst_iter_h(ib, ih);
             float du = (h - c) * dh;
             float dc = (1.0f - u) * dh;
-            b_gates(ib, ohu, ih) = dlogistic(u) * du;
-            b_gates(ib, ohc, ih) = dtanhf(c) * dc;
+            b_gates(ib, ohu, ih) = x_m_square(u) * du;
+            b_gates(ib, ohc, ih) = one_m_square(c) * dc;
             diff_src_iter(ib, ih) = dh * u;
         }
-    gemm("C", "N", "T", batch, slc, dic, 1.0, &(b_gates(0, 2, 0)), n_gates * dic,
-            &(weights_layer(0, 2, 0)), n_gates * dic, 0.0, dhr_, wc);
+    gemm("C", "N", "T", batch, sic, dic, 1.0, &(b_gates(0, 2, 0)), n_gates * dic,
+            &(weights_iter_h(0, 2, 0)), n_gates * dic, 0.0, dhr_, wc);
 
     for (int ib = 0; ib < batch; ib++)
         for (int ih = 0; ih < dic; ih++) {
@@ -621,7 +777,7 @@ void gru_bwd(alg_t alg, activation_t f, int sic, int slc, int dic, int wc,
             float dr = h * dhr(ib, ih);
             hr(ib, ih) = h * r;
             diff_src_iter(ib, ih) += dhr(ib, ih) * r;
-            b_gates(ib, ohr, ih) = dlogistic(r) * dr;
+            b_gates(ib, ohr, ih) = x_m_square(r) * dr;
         }
 
 // dWx += xdu^ | xdr^ | xdc^
@@ -682,9 +838,9 @@ void gru_lbr_bwd(alg_t alg, activation_t f, int sic, int slc, int dic, int wc,
             &weights_iter_h(0, 2, 0), n_gates * dic, 1.0, Wh_b_, dic);
 
 
-// dc = (1 - u) * dh; dc^ = dtanhf(c) * dc;
-// du = (h - u) * dh; du^ = dlogistic(u) * du;
-// dr = (Wh + b) * dc^; dr^ = dlogistic(r) * dr;
+// dc = (1 - u) * dh; dc^ = one_m_square(c) * dc;
+// du = (h - c) * dh; du^ = x_m_square(u) * du;
+// dr = (Wh + b) * dc^; dr^ = x_m_square(r) * dr;
     const int ohu = 0;
     const int ohr = 1;
     const int ohc = 2;
@@ -698,11 +854,11 @@ void gru_lbr_bwd(alg_t alg, activation_t f, int sic, int slc, int dic, int wc,
             float du = (h - c) * dh;
             float dc = (1.0f - u) * dh;
 
-            b_gates(ib, ohu, ih) = dlogistic(u) * du;
-            b_gates(ib, ohc, ih) = dtanhf(c) * dc;
+            b_gates(ib, ohu, ih) = x_m_square(u) * du;
+            b_gates(ib, ohc, ih) = one_m_square(c) * dc;
 
             float dr = Wh_b(ib, ih) * b_gates(ib, ohc, ih);
-            b_gates(ib, ohr, ih) = dlogistic(r) * dr;
+            b_gates(ib, ohr, ih) = x_m_square(r) * dr;
 
             b_gates_r(ib, ohu, ih) = b_gates(ib, ohu, ih);
             b_gates_r(ib, ohr, ih) = b_gates(ib, ohr, ih);
@@ -841,9 +997,10 @@ void rnn_linear_bwd(const rnn_prb_t *p, mkldnn_rnn_direction_t direction,
             rnn_layer_direction_t lay_dir, int dir_val, rnn_action_t action) {
         // we first need to copy the initial states and input into ws
         // it simplifies the logic in the following code
-        copy_init(alg, sic, slc, dic, dlc, wc, batch, n_layer, n_iter, n_states,
-                wsb_, diff_dst_layer_, diff_dst_iter_, iter_dir, lay_dir,
-                dir_val, n_dir, true, direction == mkldnn_bidirectional_concat);
+        copy_init_bwd(alg, sic, slc, dic, dlc, wc, batch, n_layer, n_iter,
+                n_dir, n_states, wsb_, diff_dst_layer_, diff_dst_iter_,
+                iter_dir, lay_dir, dir_val,
+                direction == mkldnn_bidirectional_concat);
 
         // We run the grid of computation
         for (int j = n_layer - 1; j >= 0; j--) {
@@ -881,9 +1038,9 @@ void rnn_linear_bwd(const rnn_prb_t *p, mkldnn_rnn_direction_t direction,
         }
 
         // Finally we copy the results to the result buffers
-        copy_res(alg, sic, slc, dic, dlc, wc, batch, n_layer, n_iter, n_states,
-                diff_src_iter_, diff_src_layer_, wsb_, direction, iter_dir,
-                lay_dir, dir_val, n_dir, action, true);
+        copy_res_bwd(alg, sic, slc, dic, dlc, wc, batch, n_layer, n_iter, n_dir,
+                n_states, diff_src_iter_, diff_src_layer_, wsb_, iter_dir,
+                lay_dir, dir_val, action);
     };
 
     switch (direction) {
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn.cpp
index d9408315a..526b2da8d 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn.cpp
@@ -35,6 +35,30 @@ namespace rnn {
 
 #define CALL_MKLDNN_RNN 1
 
+mkldnn_primitive_attr_t create_mkldnn_rnn_attr(const rnn_prb_t *p) {
+    mkldnn_primitive_attr_t mkldnn_attr = NULL;
+
+    DNN_SAFE_V(mkldnn_primitive_attr_create(&mkldnn_attr));
+    if (p->attr.irmode != attr_t::round_mode_t::NEAREST)
+        DNN_SAFE_V(mkldnn_primitive_attr_set_int_output_round_mode(
+                mkldnn_attr, (mkldnn_round_mode_t)p->attr.irmode));
+
+    if (p->scale_policy == PER_OC) {
+        DNN_SAFE_V(mkldnn_primitive_attr_set_rnn_weights_qparams(
+                mkldnn_attr, p->dic * p->n_gates(), 0x3, p->wei_oc_scales));
+    } else if (p->scale_policy == COMMON && p->wei_scale != 1.) {
+        DNN_SAFE_V(mkldnn_primitive_attr_set_rnn_weights_qparams(
+                mkldnn_attr, 1, 0, &p->wei_scale));
+    }
+
+    if (p->data_scale != 1.0 || p->data_shift != 0.0) {
+        DNN_SAFE_V(mkldnn_primitive_attr_set_rnn_data_qparams(
+                mkldnn_attr, p->data_scale, p->data_shift));
+    }
+
+    return mkldnn_attr;
+}
+
 int fill_memory(const rnn_prb_t *p, rnn_data_kind_t kind, dnn_mem_t &mem1,
         dnn_mem_t &mem2) {
 #ifdef CALL_MKLDNN_RNN
@@ -43,20 +67,20 @@ int fill_memory(const rnn_prb_t *p, rnn_data_kind_t kind, dnn_mem_t &mem1,
 #else
     const size_t nelems = mem2.nelems();
 #endif
-    size_t nchunks = mkldnn_get_max_threads();
-    size_t chunk_size = (nelems + nchunks - 1) / nchunks;
 
+    dt_conf_t c = p->cfg[kind];
+    float mean = c.f_mean, var = c.f_var, min = c.f_min, max = c.f_max;
     mkldnn::impl::parallel(0, [&](int ithr, int nthr) {
+        size_t chunk_size = (nelems + nthr - 1) / nthr;
         size_t idx_start = ithr * chunk_size;
         size_t idx_end = MIN2(idx_start + chunk_size, nelems);
-
         std::minstd_rand msr;
-        std::normal_distribution<float> gen(.0f, .001f);
+        msr.seed((unsigned long int)kind);
+        std::normal_distribution<float> gen(mean, var);
         msr.discard(idx_start);
-
-        for (size_t idx = idx_start; idx < idx_end; ++idx){
-            auto val = gen(msr);
-            mem2.set_elem(idx, MAX2(MIN2(val, 1.0f), -1.0f));
+        for (size_t idx = idx_start; idx < idx_end; ++idx) {
+            auto val = (c.dt == mkldnn_f32) ? gen(msr) : round(gen(msr));
+            mem2.set_elem(idx, MAX2(MIN2(val, max), min));
         }
     });
 
@@ -88,23 +112,20 @@ inline int init_pd(const rnn_prb_t *p, mkldnn_rnn_desc_t rd[2],
     mkldnn_dims_t bias_dims
             = { p->n_layer, p->n_directions(), p->n_gates() + is_gru_lbr, p->dic };
     // mkldnn_tnc
-    int lastlay_dlc = (p->direction == mkldnn_bidirectional_concat) ?
-            2 * p->dlc :
-            p->dlc;
+    int lastlay_dlc = (p->direction == mkldnn_bidirectional_concat)
+            ? 2 * p->dlc
+            : p->dlc;
     mkldnn_dims_t dst_last_layer_dims = { p->n_iter, p->mb, lastlay_dlc };
 
     DNN_SAFE(mkldnn_memory_desc_init(
-                     &input_d, 3, input_dims, p->cfg[SRC].dt, mkldnn_tnc),
+                     &input_d, 3, input_dims, p->cfg[input].dt, mkldnn_tnc),
             WARN);
     input_d.layout_desc.blocking.strides[0][0] += the_stride;
-    DNN_SAFE(mkldnn_memory_desc_init(
-                     &diff_input_d, 3, input_dims, p->cfg[SRC].dt, mkldnn_any),
-            WARN);
 
     mkldnn_dims_t states_dims
             = { p->n_layer, p->n_directions(), p->n_states(), p->mb, p->sic };
-    DNN_SAFE(mkldnn_memory_desc_init(
-                     &states_d, 5, states_dims, p->cfg[SRC].dt, mkldnn_ldsnc),
+    DNN_SAFE(mkldnn_memory_desc_init(&states_d, 5, states_dims,
+                     p->cfg[states].dt, mkldnn_ldsnc),
             WARN);
 
     states_d.layout_desc.blocking.strides[0][3] = p->sic + the_stride;
@@ -116,43 +137,28 @@ inline int init_pd(const rnn_prb_t *p, mkldnn_rnn_desc_t rd[2],
                 = states_d.layout_desc.blocking.strides[0][d + 1]
                 * states_d.dims[d + 1];
 
-    DNN_SAFE(mkldnn_memory_desc_init(&diff_states_d, 5, states_dims,
-                     p->cfg[SRC].dt, mkldnn_any),
-            WARN);
-
     DNN_SAFE(mkldnn_memory_desc_init(&weights_input_d, 5, weights_input_dims,
-                     p->cfg[SRC].dt, mkldnn_any),
-            WARN);
-    DNN_SAFE(mkldnn_memory_desc_init(&diff_weights_input_d, 5,
-                     weights_input_dims, p->cfg[SRC].dt, mkldnn_any),
+                     p->cfg[weights_input].dt, mkldnn_any),
             WARN);
 
     DNN_SAFE(mkldnn_memory_desc_init(&weights_states_d, 5, weights_states_dims,
-                     p->cfg[SRC].dt, mkldnn_any),
-            WARN);
-    DNN_SAFE(mkldnn_memory_desc_init(&diff_weights_states_d, 5,
-                     weights_states_dims, p->cfg[SRC].dt, mkldnn_any),
+                     p->cfg[weights_states].dt, mkldnn_any),
             WARN);
 
     DNN_SAFE(mkldnn_memory_desc_init(
-                     &bias_d, 4, bias_dims, p->cfg[SRC].dt, mkldnn_any),
-            WARN);
-    DNN_SAFE(mkldnn_memory_desc_init(
-                     &diff_bias_d, 4, bias_dims, p->cfg[SRC].dt, mkldnn_any),
+                     &bias_d, 4, bias_dims, p->cfg[bias].dt, mkldnn_any),
             WARN);
 
     DNN_SAFE(mkldnn_memory_desc_init(&dst_last_layer_d, 3, dst_last_layer_dims,
-                     p->cfg[SRC].dt, mkldnn_tnc),
+                     p->cfg[dst_last_layer].dt, mkldnn_tnc),
             WARN);
     dst_last_layer_d.layout_desc.blocking.strides[0][0] += the_stride;
-    DNN_SAFE(mkldnn_memory_desc_init(&diff_last_layer_d, 3, dst_last_layer_dims,
-                     p->cfg[SRC].dt, mkldnn_any),
-            WARN);
 
     mkldnn_dims_t dst_last_iteration_dims
             = { p->n_layer, p->n_directions(), p->n_states(), p->mb, p->dic };
     DNN_SAFE(mkldnn_memory_desc_init(&dst_last_iteration_d, 5,
-                     dst_last_iteration_dims, p->cfg[SRC].dt, mkldnn_ldsnc),
+                     dst_last_iteration_dims, p->cfg[dst_last_iteration].dt,
+                     mkldnn_ldsnc),
             WARN);
 
     dst_last_iteration_d.layout_desc.blocking.strides[0][3]
@@ -166,10 +172,6 @@ inline int init_pd(const rnn_prb_t *p, mkldnn_rnn_desc_t rd[2],
                 = dst_last_iteration_d.layout_desc.blocking.strides[0][d + 1]
                 * dst_last_iteration_d.dims[d + 1];
 
-    DNN_SAFE(mkldnn_memory_desc_init(&diff_last_iteration_d, 5,
-                     dst_last_iteration_dims, p->cfg[SRC].dt, mkldnn_any),
-            WARN);
-
     mkldnn_alg_kind_t kind = alg2kind(p->alg);
     mkldnn_alg_kind_t f = activation2kind(p->activation);
 
@@ -179,14 +181,43 @@ inline int init_pd(const rnn_prb_t *p, mkldnn_rnn_desc_t rd[2],
     // When inference, we use forward_inference
     // When training, we use forward_training
     {
-        DNN_SAFE(mkldnn_rnn_forward_desc_init(&rd[0], fwd_prop, &rcd,
+        mkldnn_status_t init_status = mkldnn_success;
+        init_status = mkldnn_rnn_forward_desc_init(&rd[0], fwd_prop, &rcd,
                          p->direction, &input_d, &states_d, &weights_input_d,
                          &weights_states_d, &bias_d, &dst_last_layer_d,
-                         &dst_last_iteration_d),
-                WARN);
+                         &dst_last_iteration_d);
+        if (init_status == mkldnn_unimplemented)
+            return r->state = UNIMPLEMENTED, OK;
+        else
+            SAFE(init_status, WARN);
     }
 
     if (is_bwd) {
+        DNN_SAFE(mkldnn_memory_desc_init(&diff_input_d, 3, input_dims,
+                         p->cfg[dst_diff_input].dt, mkldnn_any),
+                WARN);
+        DNN_SAFE(mkldnn_memory_desc_init(&diff_states_d, 5, states_dims,
+                         p->cfg[dst_diff_states].dt, mkldnn_any),
+                WARN);
+        DNN_SAFE(mkldnn_memory_desc_init(&diff_weights_input_d, 5,
+                         weights_input_dims, p->cfg[dst_diff_weights_input].dt,
+                         mkldnn_any),
+                WARN);
+        DNN_SAFE(mkldnn_memory_desc_init(&diff_weights_states_d, 5,
+                         weights_states_dims,
+                         p->cfg[dst_diff_weights_states].dt, mkldnn_any),
+                WARN);
+        DNN_SAFE(mkldnn_memory_desc_init(&diff_bias_d, 4, bias_dims,
+                         p->cfg[dst_diff_bias].dt, mkldnn_any),
+                WARN);
+        DNN_SAFE(mkldnn_memory_desc_init(&diff_last_layer_d, 3,
+                         dst_last_layer_dims, p->cfg[diff_last_layer].dt,
+                         mkldnn_any),
+                WARN);
+        DNN_SAFE(mkldnn_memory_desc_init(&diff_last_iteration_d, 5,
+                         dst_last_iteration_dims,
+                         p->cfg[diff_last_iteration].dt, mkldnn_any),
+                WARN);
         DNN_SAFE(mkldnn_rnn_backward_desc_init(&rd[1], p->prop, &rcd,
                          p->direction, &input_d, &states_d, &weights_input_d,
                          &weights_states_d, &bias_d, &dst_last_layer_d,
@@ -196,17 +227,17 @@ inline int init_pd(const rnn_prb_t *p, mkldnn_rnn_desc_t rd[2],
                          &diff_last_iteration_d),
                 WARN);
     }
+    auto mkldnn_attr = create_mkldnn_rnn_attr(p);
     mkldnn_status_t init_status = mkldnn_success;
     for (int i = 0; i < 1 + (int)is_bwd; i++) {
-        init_status = mkldnn_primitive_desc_create(
-                &(rpd[i]), &(rd[i]), engine, NULL);
+        init_status = mkldnn_primitive_desc_create_v2(
+                &(rpd[i]), &(rd[i]), mkldnn_attr, engine, NULL);
         if (init_status == mkldnn_unimplemented)
             return r->state = UNIMPLEMENTED, OK;
         else
             SAFE(init_status, WARN);
     }
-
-    // const char *impl_str = query_impl_info(rpd);
+    mkldnn_primitive_attr_destroy(mkldnn_attr);
 
     auto q = [=](mkldnn_query_t query, int rpd_idx, int index = 0) {
         return *mkldnn_primitive_desc_query_memory_d(
@@ -311,13 +342,17 @@ int doit(const rnn_prb_t *p, res_t *r) {
     auto &diff_dst_layer_dt_d = rd[1].diff_dst_layer_desc;
     auto &diff_dst_iter_dt_d = rd[1].diff_dst_iter_desc;
 
-    input_dt = new dnn_mem_t(input_dt_d, fp);
-    states_dt = new dnn_mem_t(states_dt_d, fp);
-    weights_input_dt = new dnn_mem_t(weights_input_dt_d, fp);
-    weights_states_dt = new dnn_mem_t(weights_states_dt_d, fp);
-    bias_dt = new dnn_mem_t(bias_dt_d, fp);
-    dst_last_layer_dt = new dnn_mem_t(dst_last_layer_dt_d, fp);
-    dst_last_iteration_dt = new dnn_mem_t(dst_last_iteration_dt_d, fp);
+    input_dt = new dnn_mem_t(input_dt_d, p->cfg[input].dt);
+    states_dt = new dnn_mem_t(states_dt_d, p->cfg[states].dt);
+    weights_input_dt
+            = new dnn_mem_t(weights_input_dt_d, p->cfg[weights_input].dt);
+    weights_states_dt
+            = new dnn_mem_t(weights_states_dt_d, p->cfg[weights_states].dt);
+    bias_dt = new dnn_mem_t(bias_dt_d, p->cfg[bias].dt);
+    dst_last_layer_dt
+            = new dnn_mem_t(dst_last_layer_dt_d, p->cfg[dst_last_layer].dt);
+    dst_last_iteration_dt = new dnn_mem_t(
+            dst_last_iteration_dt_d, p->cfg[dst_last_iteration].dt);
 
     if (is_bwd) {
         bwd_weights_input_dt = new dnn_mem_t(bwd_weights_input_dt_d, fp);
@@ -417,8 +452,6 @@ int doit(const rnn_prb_t *p, res_t *r) {
             dnn_mem_t dst_last_layer(*dst_last_layer_dt, fp, mkldnn_tnc);
             dnn_mem_t dst_last_iteration(
                     *dst_last_iteration_dt, fp, mkldnn_ldsnc);
-            SAFE(dst_last_layer.reorder(*dst_last_layer_dt), WARN);
-            SAFE(dst_last_iteration.reorder(*dst_last_iteration_dt), WARN);
             SAFE(compare_dst_last_layer(
                          p, dst_last_layer, *dst_last_layer_fp, r, true),
                     WARN);
@@ -457,8 +490,6 @@ int doit(const rnn_prb_t *p, res_t *r) {
             dnn_mem_t dst_last_layer(*dst_last_layer_dt, fp, mkldnn_tnc);
             dnn_mem_t dst_last_iteration(
                     *dst_last_iteration_dt, fp, mkldnn_ldsnc);
-            SAFE(dst_last_layer.reorder(*dst_last_layer_dt), WARN);
-            SAFE(dst_last_iteration.reorder(*dst_last_iteration_dt), WARN);
             SAFE(compare_dst_last_layer(
                          p, dst_last_layer, *dst_last_layer_fp, r, true),
                     WARN);
@@ -468,8 +499,6 @@ int doit(const rnn_prb_t *p, res_t *r) {
 
             dnn_mem_t diff_input(*dst_diff_input_dt, fp, mkldnn_tnc);
             dnn_mem_t diff_states(*dst_diff_states_dt, fp, mkldnn_ldsnc);
-            SAFE(diff_input.reorder(*dst_diff_input_dt), WARN);
-            SAFE(diff_states.reorder(*dst_diff_states_dt), WARN);
             SAFE(compare_input(p, diff_input, *dst_diff_input_fp, r, true),
                     WARN);
             SAFE(compare_states(p, diff_states, *dst_diff_states_fp, r, true),
@@ -479,9 +508,6 @@ int doit(const rnn_prb_t *p, res_t *r) {
                     *dst_diff_weights_input_dt, fp, mkldnn_ldigo);
             dnn_mem_t diff_weights_states(
                     *dst_diff_weights_states_dt, fp, mkldnn_ldigo);
-            SAFE(diff_weights_input.reorder(*dst_diff_weights_input_dt), WARN);
-            SAFE(diff_weights_states.reorder(*dst_diff_weights_states_dt),
-                    WARN);
             SAFE(compare_weights_input(p, diff_weights_input,
                          *dst_diff_weights_input_fp, r, true),
                     WARN);
@@ -490,7 +516,6 @@ int doit(const rnn_prb_t *p, res_t *r) {
                     WARN);
 
             dnn_mem_t diff_bias(*dst_diff_bias_dt, fp, mkldnn_ldgo);
-            SAFE(diff_bias.reorder(*dst_diff_bias_dt), WARN);
             SAFE(compare_bias(p, diff_bias, *dst_diff_bias_fp, r, true), WARN);
         }
     }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn.hpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn.hpp
index 36d6a56df..45ab7fb56 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn.hpp
@@ -29,6 +29,8 @@
 
 namespace rnn {
 
+extern const char *perf_template;
+
 enum alg_t { VANILLA_RNN, VANILLA_LSTM, VANILLA_GRU, LBR_GRU };
 alg_t str2alg(const char *str);
 const char *alg2str(alg_t alg);
@@ -39,6 +41,9 @@ activation_t str2activation(const char *str);
 const char *activation2str(activation_t alg);
 mkldnn_alg_kind_t activation2kind(activation_t alg);
 
+mkldnn_prop_kind_t str2prop(const char *str);
+const char *prop2str(mkldnn_prop_kind_t prop);
+
 mkldnn_rnn_direction_t str2direction(const char *str);
 const char *direction2str(mkldnn_rnn_direction_t direction);
 
@@ -104,15 +109,15 @@ enum rnn_data_kind_t {
     weights_input,
     weights_states,
     bias,
-    dst_last_layer,
     dst_last_iteration,
+    dst_last_layer,
     dst_diff_input,
     dst_diff_states,
     dst_diff_weights_input,
     dst_diff_weights_states,
     dst_diff_bias,
-    diff_last_layer,
     diff_last_iteration,
+    diff_last_layer,
     data_kind_total // should be last to provide the total number of data kinds
 };
 
@@ -149,20 +154,46 @@ typedef struct dt_conf_t {
     mkldnn_data_type_t dt;
     int min, max; /* representative */
     int f_min, f_max; /* fill range */
-    int f_base; /* fill base, use 0 */
-    int f_step; /* fill step, use 1 */
-    double f_sparsity; /* amount of non-zeros, default 0.25 */
+    float f_mean, f_var; /* mean and variance of normally distributed data */
     double eps; /* acceptable error */
 } _dt_conf_t[data_kind_total];
 
 extern const _dt_conf_t conf_f32;
+extern const _dt_conf_t conf_u8u8u8u8;
+extern const _dt_conf_t conf_u8u8u8f32;
+extern const _dt_conf_t conf_f32u8f32f32;
+extern const _dt_conf_t conf_f32u8f32u8;
+
+const dt_conf_t *str2cfg(const char *str);
+const char *cfg2str(const dt_conf_t *cfg);
+
+enum policy_t { NONE = 0, COMMON, PER_OC };
+policy_t str2policy(const char *str);
+const char *policy2str(attr_t::scale_t::policy_t policy);
 
 struct rnn_prb_t : public rnn_desc_t {
     rnn_prb_t(const rnn_desc_t desc, const dt_conf_t *cfg,
             mkldnn_prop_kind_t prop, alg_t alg,
-            mkldnn_rnn_direction_t direction, activation_t activation)
-        : rnn_desc_t(desc), cfg(cfg), prop(prop), alg(alg),
-        direction(direction), activation(activation){
+            mkldnn_rnn_direction_t direction, activation_t activation,
+            const attr_t &attr, policy_t scale_policy, int mb = 0)
+        : rnn_desc_t(desc)
+        , cfg(cfg)
+        , prop(prop)
+        , alg(alg)
+        , direction(direction)
+        , activation(activation)
+        , attr(attr)
+        , scale_policy(scale_policy) {
+        if (mb) this->mb = mb;
+        wei_oc_scales = NULL;
+        if (scale_policy == PER_OC)
+            wei_oc_scales
+                    = (float *)zmalloc(sizeof(float) * dic * n_gates(), 64);
+        set_qparams(-1., 1.);
+    }
+    ~rnn_prb_t() {
+        if (wei_oc_scales)
+            zfree(wei_oc_scales);
     }
 
     int n_directions() const {
@@ -178,14 +209,24 @@ struct rnn_prb_t : public rnn_desc_t {
                 4 :
                 (alg == VANILLA_GRU || alg == LBR_GRU ? 3 : 1);
     }
+    int n_bias() const {
+        return alg == LBR_GRU ? n_gates() + 1 : n_gates();
+    }
 
     const dt_conf_t *cfg;
     mkldnn_prop_kind_t prop;
     alg_t alg;
     mkldnn_rnn_direction_t direction;
     activation_t activation;
+    attr_t attr;
+    policy_t scale_policy;
+
+    float data_scale, data_shift;
+    float wei_scale;
+    float *wei_oc_scales;
 
 private:
+    void set_qparams(float fp_min, float fp_max);
     rnn_prb_t(const rnn_prb_t &) = delete;
     rnn_prb_t &operator=(const rnn_prb_t &) = delete;
 };
@@ -301,7 +342,7 @@ inline void inv_ldwOcIc_off_f(const rnn_prb_t *p, size_t off, int &l, int &d,
 
 // bias: mkldnn_ldgo
 inline size_t ldgo_off_f(const rnn_prb_t *p, int l, int d, int b, int c) {
-    return (((size_t)l * p->n_directions() + d) * p->n_gates() + b) * p->sic
+    return (((size_t)l * p->n_directions() + d) * p->n_bias() + b) * p->sic
             + c;
 }
 
@@ -309,8 +350,8 @@ inline void inv_ldgo_off_f(
         const rnn_prb_t *p, size_t off, int &l, int &d, int &b, int &c) {
     c = off % p->sic;
     off /= p->sic;
-    b = off % p->n_gates();
-    off /= p->n_gates();
+    b = off % p->n_bias();
+    off /= p->n_bias();
     d = off % p->n_directions();
     off /= p->n_directions();
     l = off % p->n_layer;
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn_aux.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn_aux.cpp
index 124cbecf5..c6068da50 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn_aux.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn_aux.cpp
@@ -39,6 +39,24 @@ alg_t str2alg(const char *str) {
     return VANILLA_RNN;
 }
 
+policy_t str2policy(const char *str) {
+#define CASE(_plc) if (!strcasecmp(STRINGIFY(_plc), str)) return _plc
+    CASE(NONE);
+    CASE(COMMON);
+    CASE(PER_OC);
+#undef CASE
+    assert(!"unknown policy");
+    return NONE;
+}
+
+const char * policy2str(policy_t policy) {
+    if (policy == NONE) return "none";
+    if (policy == COMMON) return "common";
+    if (policy == PER_OC) return "per_oc";
+    assert(!"unknown policy");
+    return "unknown policy";
+}
+
 const char *alg2str(alg_t alg) {
     if (alg == VANILLA_RNN)
         return "VANILLA_RNN";
@@ -99,6 +117,25 @@ mkldnn_alg_kind_t activation2kind(activation_t act) {
     return alg_kind;
 }
 
+mkldnn_prop_kind_t str2prop(const char *str) {
+    if (!strcasecmp("FWD_D", str))
+        return mkldnn_forward;
+    if (!strcasecmp("BWD_D", str))
+        return mkldnn_backward;
+    assert(!"unknown propagation");
+    return mkldnn_forward;
+}
+
+const char *prop2str(mkldnn_prop_kind_t prop) {
+    if (prop == mkldnn_forward)
+        return "FWD_D";
+    if (prop == mkldnn_backward)
+        return "BWD_DW";
+    assert(!"unknown propagation");
+    return "unknown propagation";
+
+}
+
 mkldnn_rnn_direction_t str2direction(const char *str) {
     if (!strcasecmp("left2right", str))
         return mkldnn_unidirectional_left2right;
@@ -185,8 +222,11 @@ int str2desc(rnn_desc_t *desc, const char *str) {
 void prb2str(const rnn_prb_t *p, const res_t *res, char *buffer) {
     int rem_len = max_prb_len;
 
-    DPRINT("%s,%s,%s,", alg2str(p->alg), activation2str(p->activation),
-            direction2str(p->direction));
+    DPRINT("--prop=%s --alg=%s --activation=%s --direction=%s --cfg=%s "
+           "--scaling=%s ",
+            prop2str(p->prop), alg2str(p->alg), activation2str(p->activation),
+            direction2str(p->direction), cfg2str(p->cfg),
+            policy2str(p->scale_policy));
     DPRINT("l%d", p->n_layer);
     DPRINT("t%d", p->n_iter);
     DPRINT("mb%d", p->mb);
@@ -203,10 +243,20 @@ void init_buffer(float *buf, int size, float value) {
 }
 
 float logistic(float x) {
-    return 1.0f / (1.0f + expf(-x));
+    if (x < 0)
+        return (expf(x) / (1 + expf(x)));
+    else
+        return 1.0f - (expf(-x) / (1 + expf(-x)));
 }
 float dlogistic(float x) {
-    return x * (1 - x);
+    float tmp = logistic(x);
+    return tmp * (1 - tmp);
+}
+float dtanhf(float x) {
+    return (1 - tanhf(x)) * (1 + tanhf(x));
+}
+float x_m_square(float x) {
+    return x - x * x;
 }
 float relu(float x) {
     return x > 0 ? x : 0;
@@ -214,8 +264,8 @@ float relu(float x) {
 float drelu(float x) {
     return float(x > 0);
 }
-float dtanhf(float x) {
-    return (1 - x) * (1 + x);
+float one_m_square(float x) {
+    return 1 - x * x;
 }
 
 int compare_dat(const rnn_prb_t *p, rnn_data_kind_t kind, dnn_mem_t &mem_dt,
@@ -414,4 +464,32 @@ int compare_dst_last_iteration(const rnn_prb_t *p, dnn_mem_t &mem_dt,
     return compare_dat(p, dst_last_iteration, mem_dt, mem_fp, r, final_compare);
 }
 
+void rnn_prb_t::set_qparams(float fp_min, float fp_max) {
+    if (cfg == conf_f32) {
+        data_shift = 0.;
+        data_scale = 1.;
+        wei_scale = 1.;
+        return;
+    }
+
+    /* Set parameters for quantization of src and weights from fp32 data
+     * in [-1, 1] to int8 data in a range specified in cfg */
+    float fp_range = fp_max - fp_min;
+    float int8_src_range = cfg[input].f_max - cfg[input].f_min,
+          int8_wei_range = cfg[weights_input].f_max - cfg[weights_input].f_min;
+
+    data_shift = cfg[input].f_mean;
+    data_scale = int8_src_range / fp_range;
+
+    if (scale_policy == COMMON) {
+        wei_scale = int8_wei_range / fp_range;
+    } else if (scale_policy == PER_OC) {
+        float K = int8_wei_range / fp_range;
+        int nelems = dic * n_gates();
+        for (int i = 0; i < nelems; i++) {
+            wei_oc_scales[i] = K * (1. + (float)i / nelems);
+        }
+    }
+}
+
 } // namespace rnn
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn_aux.hpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn_aux.hpp
index 3ac859843..71d040014 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn_aux.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/rnn/rnn_aux.hpp
@@ -44,6 +44,8 @@ float dlogistic(float x);
 float relu(float x);
 float drelu(float x);
 float dtanhf(float x);
+float one_m_square(float x);
+float x_m_square(float x);
 
 int compare_dat(const rnn_prb_t *p, rnn_data_kind_t kind, dnn_mem_t &mem_dt,
         dnn_mem_t &mem_fp, res_t *r, bool final_compare);
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/self/conv.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/self/conv.cpp
index 46662d9d3..b449cbe1d 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/self/conv.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/self/conv.cpp
@@ -26,31 +26,24 @@ namespace self {
 
 static int check_simple_enums() {
     /* alg */
+    CHECK_CASE_STR_EQ(alg2str(alg_t::AUTO), "auto");
+    CHECK_CASE_STR_NE(alg2str(alg_t::AUTO), "autox");
+
     CHECK_CASE_STR_EQ(alg2str(alg_t::DIRECT), "direct");
     CHECK_CASE_STR_NE(alg2str(alg_t::DIRECT), "directx");
 
     CHECK_CASE_STR_EQ(alg2str(alg_t::WINO), "wino");
     CHECK_CASE_STR_NE(alg2str(alg_t::WINO), "winox");
 
+    CHECK_EQ(str2alg("auto"), alg_t::AUTO);
+    CHECK_EQ(str2alg("AUTO"), alg_t::AUTO);
+
     CHECK_EQ(str2alg("direct"), alg_t::DIRECT);
     CHECK_EQ(str2alg("DIRECT"), alg_t::DIRECT);
 
     CHECK_EQ(str2alg("wino"), alg_t::WINO);
     CHECK_EQ(str2alg("WINO"), alg_t::WINO);
 
-    /* merge */
-    CHECK_CASE_STR_EQ(merge2str(merge_t::NONE), "none");
-    CHECK_CASE_STR_NE(merge2str(merge_t::NONE), "nonex");
-
-    CHECK_CASE_STR_EQ(merge2str(merge_t::RELU), "relu");
-    CHECK_CASE_STR_NE(merge2str(merge_t::RELU), "relux");
-
-    CHECK_EQ(str2merge("none"), merge_t::NONE);
-    CHECK_EQ(str2merge("NONE"), merge_t::NONE);
-
-    CHECK_EQ(str2merge("relu"), merge_t::RELU);
-    CHECK_EQ(str2merge("RELU"), merge_t::RELU);
-
     return OK;
 }
 
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/shuffle/shuffle.cpp b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/shuffle/shuffle.cpp
index f2db80876..cddbb4997 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/shuffle/shuffle.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/benchdnn/shuffle/shuffle.cpp
@@ -144,26 +144,23 @@ int doit(const prb_t *p, res_t *r) {
            ? mkldnn_nc
            : get_default_format(ndims, fmt2data_kind(p->fmt));
 
-    dnn_mem_t data_fp(src_dt_d, fp, src_format),
-              data_dt(src_dt_d);
-    dnn_mem_t d_data_fp(src_dt_d, fp, src_format),
-              d_data_dt(src_dt_d);
+    dnn_mem_t src_fp(src_dt_d, fp, src_format), src_dt(src_dt_d);
+    dnn_mem_t dst_fp(src_dt_d, fp, src_format), dst_dt(src_dt_d);
 
-    SAFE(fill_memory(p, data_fp), WARN);
+    SAFE(fill_memory(p, src_fp), WARN);
 
     mkldnn_primitive_at_t inputs[1];
     const_mkldnn_primitive_t outputs[1];
-    SAFE(data_dt.reorder(data_fp), WARN);
-    inputs[0] = {data_dt.p_, 0};
-    outputs[0] = d_data_dt.p_;
+    SAFE(src_dt.reorder(src_fp), WARN);
+    inputs[0] = {src_dt.p_, 0};
+    outputs[0] = dst_dt.p_;
     DNN_SAFE(mkldnn_primitive_create(&s, spd, inputs, outputs), WARN);
     DNN_SAFE_V(mkldnn_primitive_desc_destroy(spd));
     SAFE(execute(s), WARN);
     if (bench_mode & CORR) {
-        compute_shuffle(p, data_fp, d_data_fp);
-        dnn_mem_t data(d_data_dt.md_, fp, src_format);
-        SAFE(data.reorder(d_data_dt), WARN);
-        SAFE(compare(p, d_data_fp, data, r), WARN);
+        compute_shuffle(p, src_fp, dst_fp);
+        dnn_mem_t data(dst_dt, fp, src_format);
+        SAFE(compare(p, dst_fp, data, r), WARN);
     }
 
     if (bench_mode & PERF) {
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/generate_c_symbols_refs.sh b/inference-engine/thirdparty/mkl-dnn/tests/generate_c_symbols_refs.sh
index 690dc6a5f..45040b79f 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/generate_c_symbols_refs.sh
+++ b/inference-engine/thirdparty/mkl-dnn/tests/generate_c_symbols_refs.sh
@@ -16,10 +16,13 @@
 #===============================================================================
 
 mkldnn_root="$1"
-output="$2"
+extra_include_dir="$2"
+output="$3"
 
 echo -e '#include "mkldnn.h"' > "$output"
 echo -e "const void *c_functions[] = {" >> "$output"
-cpp "${mkldnn_root}/include/mkldnn.h" | grep -o 'mkldnn_\w\+(' \
-    | sed 's/\(.*\)(/(void*)\1,/g' | sort -u >> "$output"
+cpp -I"${extra_include_dir}" "${mkldnn_root}/include/mkldnn.h" \
+    | grep -o 'mkldnn_\w\+(' \
+    | sed 's/\(.*\)(/(void*)\1,/g' \
+    | sort -u >> "$output"
 echo -e "NULL};\nint main() { return 0; }" >> "$output"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/CMakeLists.txt b/inference-engine/thirdparty/mkl-dnn/tests/gtests/CMakeLists.txt
index 94394234f..48829a551 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/CMakeLists.txt
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/CMakeLists.txt
@@ -45,7 +45,6 @@ file(GLOB PRIM_TEST_CASES_SRC
                               test_softmax_backward.cpp
                               test_eltwise.cpp
                               test_depthwise.cpp
-                              test_relu.cpp
                               test_lrn_forward.cpp
                               test_lrn_backward.cpp
                               test_pooling_forward.cpp
@@ -61,9 +60,8 @@ file(GLOB PRIM_TEST_CASES_SRC
                               test_convolution_forward_s16s16s32.cpp
                               test_convolution_forward_u8s8s32.cpp
                               test_convolution_forward_u8s8fp.cpp
-                              test_convolution_relu_forward_f32.cpp
-                              test_convolution_relu_forward_neg_slope_f32.cpp
-                              test_convolution_relu_forward_s16s16s32.cpp
+                              test_convolution_eltwise_forward_f32.cpp
+                              test_convolution_eltwise_forward_x8s8f32s32.cpp
                               test_convolution_backward_data_f32.cpp
                               test_convolution_backward_data_s16s16s32.cpp
                               test_convolution_backward_weights_f32.cpp
@@ -72,10 +70,23 @@ file(GLOB PRIM_TEST_CASES_SRC
                               test_gemm_f32.cpp
                               test_gemm_s8u8s32.cpp
                               test_gemm_s8s8s32.cpp
+                              test_rnn_forward.cpp
                               test_roi_pooling_forward.cpp
-                              test_convolution_eltwise_forward_f32.cpp
                               test_convolution_depthwise_forward_f32.cpp
+                              test_convolution_depthwise_forward_x8s8f32s32.cpp
                               test_convolution_dw_conv_f32.cpp
+                              test_convolution_dw_conv_u8s8s32.cpp
+                              test_binary_convolution_forward.cpp
+                              test_binary_convolution_eltwise_forward.cpp
+                              test_binary_convolution_depthwise_forward.cpp
+                              test_binary_convolution_sum_forward.cpp
+                              test_binary_convolution_binarization_forward.cpp
+                              test_binarization.cpp
+                              test_binary_convolution_dw_conv_forward.cpp
+                              test_binary_convolution_dw_conv_eltwise_forward.cpp
+                              test_binary_convolution_dw_conv_depthwise_forward.cpp
+                              test_binary_convolution_dw_conv_sum_forward.cpp
+                              test_binary_convolution_dw_conv_binarization_forward.cpp
                               ) #temporary
 
 foreach(TEST_FILE ${PRIM_TEST_CASES_SRC})
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/convolution_common.h b/inference-engine/thirdparty/mkl-dnn/tests/gtests/convolution_common.h
index 6fc6d8515..8306a72cb 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/convolution_common.h
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/convolution_common.h
@@ -18,12 +18,24 @@
 
 #include "mkldnn.hpp"
 
+#if defined(WITH_DW_CONV)
+#define EXPAND_FORMATS(src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst) \
+    { mkldnn::memory::format::src, mkldnn::memory::format::conv1_weights, mkldnn::memory::format::conv1_bias, \
+    mkldnn::memory::format::conv2_weights, mkldnn::memory::format::conv2_bias, mkldnn::memory::format::dst }
+#else
 #define EXPAND_FORMATS(src, weights, bias, dst) \
     { mkldnn::memory::format::src, mkldnn::memory::format::weights, \
     mkldnn::memory::format::bias, mkldnn::memory::format::dst }
+#endif
+
+#define EXPAND_ARGS(args) args
 
 #define ENGINE mkldnn::engine::kind::cpu
+#if defined(BIN)
+#define ALGORITHM mkldnn::binary_convolution_direct
+#else
 #define ALGORITHM mkldnn::convolution_direct
+#endif
 
 #ifdef DIRECTION_FORWARD
 #if defined(FP32)
@@ -47,6 +59,15 @@
 #define FMT_WEIGHTS_BLOCKED_G gOhIw8o4i
 #define FMT_WEIGHTS_BLOCKED16 OIhw4i16o4i
 #define FMT_WEIGHTS_BLOCKED16_G gOIhw4i16o4i
+#elif defined(BIN)
+#define FMT_DATA_BLOCKED nhwc
+#define FMT_DATA_BLOCKED16 nhwc
+#define FMT_WEIGHTS_BLOCKED OhIw8o32i
+#define FMT_WEIGHTS_BLOCKED_G OhIw8o32i
+#define FMT_WEIGHTS_BLOCKED16 OhIw16o32i
+#define FMT_WEIGHTS_BLOCKED16_G OhIw16o32i
+#define FMT_WEIGHTS_DW_BLOCKED Goihw8g
+#define FMT_WEIGHTS_DW_BLOCKED16 Goihw16g
 #endif
 #define FMT_WEIGHTS_BLOCKED16_IOhw16o16i FMT_WEIGHTS_BLOCKED16
 #define TEST_CASE_NAME_PREFIX Forward
@@ -85,42 +106,104 @@
 #define CONCAT_WITH_UNDERSCORE_(a,b) a ## _ ## b
 #define CONCAT_WITH_UNDERSCORE(a,b) CONCAT_WITH_UNDERSCORE_(a,b)
 
+#if defined(BIN)
+#define INST_TEST_CASE_(str, ...) INSTANTIATE_TEST_CASE_P( \
+        str, binary_convolution_test, ::testing::Values(__VA_ARGS__))
+#define INST_TEST_CASE(str, ...) INST_TEST_CASE_( \
+        CONCAT_WITH_UNDERSCORE(TEST_CASE_NAME_PREFIX, str), __VA_ARGS__)
+#else
 #define INST_TEST_CASE_(str, ...) INSTANTIATE_TEST_CASE_P( \
         str, convolution_test, ::testing::Values(__VA_ARGS__))
 #define INST_TEST_CASE(str, ...) INST_TEST_CASE_( \
         CONCAT_WITH_UNDERSCORE(TEST_CASE_NAME_PREFIX, str), __VA_ARGS__)
+#endif
 
 #define INST_TEST_CASE_3D_(str, ...) INSTANTIATE_TEST_CASE_P( \
         str, convolution_test_3d, ::testing::Values(__VA_ARGS__))
 #define INST_TEST_CASE_3D(str, ...) INST_TEST_CASE_3D_( \
         CONCAT_WITH_UNDERSCORE(TEST_CASE_NAME_PREFIX, str), __VA_ARGS__)
 
-#ifndef NEGATIVE_SLOPE
-#define NEGATIVE_SLOPE 0.0f
+#if defined(BIN)
+#define PAD_VALUE -1.0f
+#define ELTWISE_ALGORITHM mkldnn::algorithm_undef
+#define DEPTHWISE_ALGORITHM mkldnn::algorithm_undef
+#define BINARIZATION_ALGORITHM mkldnn::algorithm_undef
+#define ELTWISE_ALPHA 0.5f
+#define ELTWISE_BETA 0.1f
+
+#if defined(WITH_SUM)
+#define WITH_SUM_BOOL true
 #else
-#undef INST_TEST_CASE
-#define INST_TEST_CASE(str, ...) INST_TEST_CASE_( \
-        CONCAT_WITH_UNDERSCORE(CONCAT_WITH_UNDERSCORE(TEST_CASE_NAME_PREFIX, \
-        str), neg_slope),  __VA_ARGS__)
+#define WITH_SUM_BOOL false
 #endif
 
+#if defined(WITH_ELTWISE)
+#if defined(WITH_DW_CONV)
+#define PARAMS(elt_alg, src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst, ...) \
+    test_binary_convolution_dw_conv_params_t { ENGINE, ALGORITHM, elt_alg, ELTWISE_ALPHA, ELTWISE_BETA, DEPTHWISE_ALGORITHM, WITH_SUM_BOOL, BINARIZATION_ALGORITHM, \
+    EXPAND_FORMATS(src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst), \
+    {__VA_ARGS__} }
+#else
+#define PARAMS(elt_alg, src, weights, bias, dst, ...) \
+    test_binary_convolution_params_t { ENGINE, ALGORITHM, PAD_VALUE, elt_alg, ELTWISE_ALPHA, ELTWISE_BETA, DEPTHWISE_ALGORITHM, WITH_SUM_BOOL, BINARIZATION_ALGORITHM, \
+    EXPAND_FORMATS(src, weights, bias, dst), \
+    {__VA_ARGS__} }
+#endif
+#elif defined(WITH_DEPTHWISE)
+#if defined(WITH_DW_CONV)
+#define PARAMS(dep_alg, src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst, ...) \
+    test_binary_convolution_dw_conv_params_t { ENGINE, ALGORITHM, ELTWISE_ALGORITHM, ELTWISE_ALPHA, ELTWISE_BETA, dep_alg, WITH_SUM_BOOL, BINARIZATION_ALGORITHM, \
+    EXPAND_FORMATS(src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst), \
+    {__VA_ARGS__} }
+#else
+#define PARAMS(dep_alg, src, weights, bias, dst, ...) \
+    test_binary_convolution_params_t { ENGINE, ALGORITHM, PAD_VALUE, ELTWISE_ALGORITHM, ELTWISE_ALPHA, ELTWISE_BETA, dep_alg, WITH_SUM_BOOL, BINARIZATION_ALGORITHM, \
+    EXPAND_FORMATS(src, weights, bias, dst), \
+    {__VA_ARGS__} }
+#endif
+#elif defined(WITH_BINARIZATION)
+#if defined(WITH_DW_CONV)
+#define PARAMS(bin_alg, src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst, ...) \
+    test_binary_convolution_dw_conv_params_t { ENGINE, ALGORITHM, ELTWISE_ALGORITHM, ELTWISE_ALPHA, ELTWISE_BETA, DEPTHWISE_ALGORITHM, WITH_SUM_BOOL, bin_alg, \
+    EXPAND_FORMATS(src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst), \
+    {__VA_ARGS__} }
+#else
+#define PARAMS(bin_alg, src, weights, bias, dst, ...) \
+    test_binary_convolution_params_t { ENGINE, ALGORITHM, PAD_VALUE, ELTWISE_ALGORITHM, ELTWISE_ALPHA, ELTWISE_BETA, DEPTHWISE_ALGORITHM, WITH_SUM_BOOL, bin_alg, \
+    EXPAND_FORMATS(src, weights, bias, dst), \
+    {__VA_ARGS__} }
+#endif
+#else
+#if defined(WITH_DW_CONV)
+#define PARAMS(src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst, ...) \
+    test_binary_convolution_dw_conv_params_t { ENGINE, ALGORITHM, ELTWISE_ALGORITHM, ELTWISE_ALPHA, ELTWISE_BETA, DEPTHWISE_ALGORITHM, WITH_SUM_BOOL, BINARIZATION_ALGORITHM, \
+    EXPAND_FORMATS(src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst), \
+    {__VA_ARGS__} }
+#else
 #define PARAMS(src, weights, bias, dst, ...) \
-    test_convolution_params_t { ENGINE, ALGORITHM, NEGATIVE_SLOPE, \
+    test_binary_convolution_params_t { ENGINE, ALGORITHM, PAD_VALUE, ELTWISE_ALGORITHM, ELTWISE_ALPHA, ELTWISE_BETA, DEPTHWISE_ALGORITHM, WITH_SUM_BOOL, BINARIZATION_ALGORITHM, \
+    EXPAND_FORMATS(src, weights, bias, dst), \
+    {__VA_ARGS__} }
+#endif
+#endif
+#else
+#define PARAMS(src, weights, bias, dst, ...) \
+    test_convolution_params_t { ENGINE, ALGORITHM, \
     EXPAND_FORMATS(src, weights, bias, dst), /* empty attributes */ {}, \
     {__VA_ARGS__} }
+#endif
 
 #define PARAMS_3D(src, weights, bias, dst, ...) \
-    test_convolution_params_t_3d { ENGINE, ALGORITHM, NEGATIVE_SLOPE, \
+    test_convolution_params_t_3d { ENGINE, ALGORITHM, \
     EXPAND_FORMATS(src, weights, bias, dst), /* empty attributes */ {}, \
     {__VA_ARGS__} }
-
 #define PARAMS_EXPECT_FAIL(src, weights, bias, dst, code, ...) \
-    test_convolution_params_t { ENGINE, ALGORITHM, NEGATIVE_SLOPE, \
+    test_convolution_params_t { ENGINE, ALGORITHM, \
     EXPAND_FORMATS(src, weights, bias, dst), /* empty attributes */ {}, \
     {__VA_ARGS__}, true, code }
 
 #define PARAMS_ATTR(src, weights, bias, dst, round_mode, scale, policy, ...) \
-    test_convolution_params_t { ENGINE, ALGORITHM, NEGATIVE_SLOPE, \
+    test_convolution_params_t { ENGINE, ALGORITHM, \
     EXPAND_FORMATS(src, weights, bias, dst), \
     {mkldnn::round_mode, scale, test_convolution_attr_t::scale_t::policy}, \
     {__VA_ARGS__} }
@@ -128,8 +211,12 @@
 #ifdef TEST_PARAM_ATTR
 #include "convolution_attr.h"
 #else
+
+#if !defined(BIN)
 #include "convolution_simple_small.h"
 #endif
+
+#endif
 //#include "convolution_alexnet.h"
 //#include "convolution_googlenet_v1.h"
 //#include "convolution_googlenet_v2.h"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/in/convolution_simple_small.h b/inference-engine/thirdparty/mkl-dnn/tests/gtests/in/convolution_simple_small.h
index c9bf46adb..f901e9b16 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/in/convolution_simple_small.h
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/in/convolution_simple_small.h
@@ -119,7 +119,7 @@ INST_TEST_CASE(SimpleSmall_Blocked_1x1,
     PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
         2, 4, 16, 10, 10, 32, 10, 10, 1, 1, 0, 0, 1, 1),
     PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
-        2, 8, 32, 10, 10, 256, 10, 10, 1, 1, 0, 0, 1, 1)
+        1, 8, 32, 1, 1, 128, 1, 1, 1, 1, 0, 0, 1, 1)
 );
 
 INST_TEST_CASE(SimpleSmall_Blocked16,
@@ -164,6 +164,12 @@ INST_TEST_CASE(SimpleSmall_Blocked16,
 );
 
 INST_TEST_CASE(SimpleSmall_Regression,
+    /* grouped small input-channel avx512 */
+    PARAMS(nchw, gOhwi16o, FMT_BIAS, FMT_DATA_BLOCKED16,
+        2, 2, 16, 8, 8, 32, 8, 8, 3, 3, 1, 1, 1, 1),
+    /* grouped small input-channel avx2 */
+    PARAMS(nchw, gOhwi8o, FMT_BIAS, nChw8c,
+        2, 2, 4, 2, 2, 16, 8, 8, 3, 3, 1, 1, 1, 1),
     PARAMS(FMT_DATA_BLOCKED16, FMT_WEIGHTS_BLOCKED16, FMT_BIAS, FMT_DATA_BLOCKED16,
         2, 1, 32, 16, 16, 32, 16, 16, 3, 3, 0, 0, 1, 1),
     PARAMS(FMT_DATA_BLOCKED16, FMT_WEIGHTS_BLOCKED16, FMT_BIAS, FMT_DATA_BLOCKED16,
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/in/gemm_in.h b/inference-engine/thirdparty/mkl-dnn/tests/gtests/in/gemm_in.h
index 5b3c34a1b..7c8b692db 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/in/gemm_in.h
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/in/gemm_in.h
@@ -1,136 +1,307 @@
-constexpr char unused = 'x';
-
 #if defined(FP32)
 INST_TEST_CASE(TestGEMM,
-    test_params{unused, 'n', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, true, mkldnn_invalid_arguments},
-    test_params{unused, 't', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, true, mkldnn_invalid_arguments},
-    test_params{unused, 'n', 't', 3, 2, 1, 1.0, 0.0, 3, 1, 8, true, mkldnn_invalid_arguments},
-    test_params{unused, 'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, true, mkldnn_invalid_arguments},
-
-    test_params{unused, 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{unused, 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{unused, 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{unused, 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{unused, 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, false},
-    test_params{unused, 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{unused, 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{unused, 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{unused, 'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, false},
-
-    test_params{unused, 'n', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, false},
-    test_params{unused, 'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, false},
-    test_params{unused, 't', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, false},
-    test_params{unused, 't', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, false},
-    test_params{unused, 'n', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, false},
-    test_params{unused, 'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, false},
-    test_params{unused, 't', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, false},
-    test_params{unused, 't', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, false}
+    test_params{'n', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'t', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'n', 't', 3, 2, 1, 1.0, 0.0, 3, 1, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, true, mkldnn_invalid_arguments},
+
+    test_params{'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, {}, false},
+    test_params{'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, {}, false},
+    test_params{'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, {}, false},
+    test_params{'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, {}, false},
+    test_params{'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, {}, false},
+    test_params{'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, {}, false},
+    test_params{'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, {}, false},
+    test_params{'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, {}, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, {}, false},
+
+    test_params{'n', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, {}, false},
+    test_params{'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, {}, false},
+    test_params{'t', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, {}, false},
+    test_params{'t', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, {}, false},
+    test_params{'n', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, {}, false},
+    test_params{'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, {}, false},
+    test_params{'t', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, {}, false},
+    test_params{'t', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, {}, false}
 );
 
 #else
+constexpr test_igemm_params fix_use_oc = {'F', true, true, false};
+constexpr test_igemm_params col_use_oc = {'C', true, true, false};
+constexpr test_igemm_params row_use_oc = {'R', true, true, false};
+
+constexpr test_igemm_params fix_use_all_offsets = {'F', false, false, false};
+constexpr test_igemm_params col_use_all_offsets = {'C', false, false, false};
+constexpr test_igemm_params row_use_all_offsets = {'R', false, false, false};
+
+constexpr test_igemm_params fix_no_offsets = {'F', true, true, true};
+constexpr test_igemm_params col_no_offsets = {'C', true, true, true};
+constexpr test_igemm_params row_no_offsets = {'R', true, true, true};
 
 INST_TEST_CASE(TestGEMM_expected_failures,
-    test_params{'f', 'n', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, true, mkldnn_invalid_arguments},
-    test_params{'f', 't', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, true, mkldnn_invalid_arguments},
-    test_params{'f', 'n', 't', 3, 2, 1, 1.0, 0.0, 3, 1, 8, true, mkldnn_invalid_arguments},
-    test_params{'f', 'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, true, mkldnn_invalid_arguments},
-
-    test_params{'r', 'n', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, true, mkldnn_invalid_arguments},
-    test_params{'R', 't', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, true, mkldnn_invalid_arguments},
-    test_params{'r', 'n', 't', 3, 2, 1, 1.0, 0.0, 3, 1, 8, true, mkldnn_invalid_arguments},
-    test_params{'R', 'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, true, mkldnn_invalid_arguments},
-
-    test_params{'c', 'n', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, true, mkldnn_invalid_arguments},
-    test_params{'C', 't', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, true, mkldnn_invalid_arguments},
-    test_params{'c', 'n', 't', 3, 2, 1, 1.0, 0.0, 3, 1, 8, true, mkldnn_invalid_arguments},
-    test_params{'C', 'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, true, mkldnn_invalid_arguments}
+    test_params{'n', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'t', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'n', 't', 3, 2, 1, 1.0, 0.0, 3, 1, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, true, mkldnn_invalid_arguments},
+
+    test_params{'n', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'t', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'n', 't', 3, 2, 1, 1.0, 0.0, 3, 1, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, true, mkldnn_invalid_arguments},
+
+    test_params{'n', 'n', 3, 2, 1, 1.0, 0.0, 2, 5, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'t', 'n', 3, 2, 2, 1.0, 0.0, 1, 5, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'n', 't', 3, 2, 1, 1.0, 0.0, 3, 1, 8, {}, true, mkldnn_invalid_arguments},
+    test_params{'n', 'd', 3, 2, 1, 1.0, 0.0, 3, 3, 3, {}, true, mkldnn_invalid_arguments}
+);
+
+INST_TEST_CASE(TestGEMM_general_cases_fix_offset,
+    test_params{'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc, false},
+    test_params{'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc, false},
+    test_params{'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc, false},
+    test_params{'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_oc, false},
+    test_params{'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_use_oc, false},
+    test_params{'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc, false},
+    test_params{'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc, false},
+    test_params{'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_oc, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, fix_use_oc, false},
+
+    test_params{'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_all_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_all_offsets,false},
+    test_params{'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_all_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_use_all_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_use_all_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_use_all_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_all_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_use_all_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, fix_use_all_offsets, false},
+
+    test_params{'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets,false},
+    test_params{'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, fix_no_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, fix_no_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, fix_no_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, fix_no_offsets, false}
+);
+
+INST_TEST_CASE(TestGEMM_general_cases_col_offset,
+    test_params{'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc, false},
+    test_params{'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc, false},
+    test_params{'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc, false},
+    test_params{'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_oc, false},
+    test_params{'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_use_oc, false},
+    test_params{'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_use_oc, false},
+    test_params{'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_oc, false},
+    test_params{'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_oc, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, col_use_oc, false},
+
+    test_params{'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_all_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_all_offsets,false},
+    test_params{'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_all_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_use_all_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_use_all_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_use_all_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_all_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_use_all_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, col_use_all_offsets, false},
+
+    test_params{'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets,false},
+    test_params{'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, col_no_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, col_no_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, col_no_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, col_no_offsets, false}
 );
 
-INST_TEST_CASE(TestGEMM_general_cases,
-    /* offsetc is fixed */
-    test_params{'f', 'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{'f', 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{'f', 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{'f', 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{'f', 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'f', 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'f', 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'f', 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'f', 'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, false},
-
-    /* offsetc is row */
-    test_params{'r', 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{'R', 'n', 'T', 30, 20, 10, 2.0, 1.0, 120, 120, 120, false},
-    test_params{'r', 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{'R', 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{'r', 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'r', 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'R', 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'R', 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'R', 'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, false},
-
-    /* offsetc is column */
-    test_params{'C', 'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{'c', 'n', 'T', 30, 20, 10, 2.0, 1.0, 120, 120, 120, false},
-    test_params{'c', 'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{'c', 't', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, false},
-    test_params{'C', 'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'C', 'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'C', 't', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'c', 't', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, false},
-    test_params{'c', 'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, false}
+INST_TEST_CASE(TestGEMM_general_cases_row_offset,
+    test_params{'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc, false},
+    test_params{'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc, false},
+    test_params{'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc, false},
+    test_params{'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_oc, false},
+    test_params{'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_use_oc, false},
+    test_params{'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_use_oc, false},
+    test_params{'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_oc, false},
+    test_params{'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_oc, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, row_use_oc, false},
+
+    test_params{'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_all_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_all_offsets,false},
+    test_params{'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_all_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_use_all_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_use_all_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_use_all_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_all_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_use_all_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, row_use_all_offsets, false},
+
+    test_params{'N', 'n', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets,false},
+    test_params{'T', 'N', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.0, 1.0, 60, 50, 80, row_no_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.0, 2.0, 100, 100, 100, row_no_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.0, 2.0, 100, 100, 100, row_no_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.0, 2.0, 2, 10000, 2, row_no_offsets, false}
 );
 
-INST_TEST_CASE(TestGEMM_fractional_scales,
+INST_TEST_CASE(TestGEMM_fractional_scales_fix_offset,
     /* alpha and beta have non-zero fractional part */
-    test_params{'f', 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, false},
-    test_params{'F', 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, false},
-    test_params{'f', 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, false},
-    test_params{'F', 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, false},
-    test_params{'f', 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, false},
-    test_params{'f', 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, false},
-    test_params{'F', 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, false},
-    test_params{'F', 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, false},
-    test_params{'f', 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, false},
-
-    test_params{'r', 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, false},
-    test_params{'R', 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, false},
-    test_params{'r', 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, false},
-    test_params{'R', 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, false},
-    test_params{'r', 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, false},
-    test_params{'r', 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, false},
-    test_params{'R', 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, false},
-    test_params{'R', 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, false},
-    test_params{'r', 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, false},
-
-    test_params{'C', 'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, false},
-    test_params{'c', 'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, false},
-    test_params{'c', 'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, false},
-    test_params{'c', 't', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, false},
-    test_params{'C', 'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, false},
-    test_params{'C', 'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, false},
-    test_params{'C', 't', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, false},
-    test_params{'c', 't', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, false},
-    test_params{'c', 'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, false}
+    test_params{'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_use_oc, false},
+    test_params{'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, fix_use_oc, false},
+    test_params{'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_use_oc, false},
+    test_params{'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_use_oc, false},
+    test_params{'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, fix_use_oc, false},
+    test_params{'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, fix_use_oc, false},
+    test_params{'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, fix_use_oc, false},
+    test_params{'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, fix_use_oc, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, fix_use_oc, false},
+
+    test_params{'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_use_all_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, fix_use_all_offsets, false},
+    test_params{'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_use_all_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_use_all_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, fix_use_all_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, fix_use_all_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, fix_use_all_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, fix_use_all_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, fix_use_all_offsets, false},
+
+    test_params{'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, fix_no_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, fix_no_offsets, false},
+    test_params{'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, fix_no_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, fix_no_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, fix_no_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, fix_no_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, fix_no_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, fix_no_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, fix_no_offsets, false}
 );
 
+INST_TEST_CASE(TestGEMM_fractional_scales_col_offset,
+    /* alpha and beta have non-zero fractional part */
+    test_params{'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_use_oc, false},
+    test_params{'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, col_use_oc, false},
+    test_params{'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_use_oc, false},
+    test_params{'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_use_oc, false},
+    test_params{'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, col_use_oc, false},
+    test_params{'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, col_use_oc, false},
+    test_params{'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, col_use_oc, false},
+    test_params{'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, col_use_oc, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, col_use_oc, false},
+
+    test_params{'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_use_all_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, col_use_all_offsets, false},
+    test_params{'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_use_all_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_use_all_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, col_use_all_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, col_use_all_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, col_use_all_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, col_use_all_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, col_use_all_offsets, false},
+
+    test_params{'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, col_no_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, col_no_offsets, false},
+    test_params{'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, col_no_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, col_no_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, col_no_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, col_no_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, col_no_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, col_no_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, col_no_offsets, false}
+);
+
+INST_TEST_CASE(TestGEMM_fractional_scales_row_offset,
+    /* alpha and beta have non-zero fractional part */
+    test_params{'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_use_oc, false},
+    test_params{'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, row_use_oc, false},
+    test_params{'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_use_oc, false},
+    test_params{'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_use_oc, false},
+    test_params{'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, row_use_oc, false},
+    test_params{'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, row_use_oc, false},
+    test_params{'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, row_use_oc, false},
+    test_params{'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, row_use_oc, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, row_use_oc, false},
+
+    test_params{'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_use_all_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, row_use_all_offsets, false},
+    test_params{'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_use_all_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_use_all_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, row_use_all_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, row_use_all_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, row_use_all_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, row_use_all_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, row_use_all_offsets, false},
+
+    test_params{'n', 'T', 30, 20, 10, 2.33f, 1.66f, 60, 50, 80, row_no_offsets, false},
+    test_params{'n', 'T', 30, 20, 10, 2.19f, 1.99f, 120, 120, 120, row_no_offsets, false},
+    test_params{'T', 'N', 30, 20, 10, 2.01f, 1.01f, 60, 50, 80, row_no_offsets, false},
+    test_params{'t', 't', 30, 20, 10, 2.99f, 1.19f, 60, 50, 80, row_no_offsets, false},
+    test_params{'n', 'n', 100, 100, 2, 1.33f, 2.33f, 100, 100, 100, row_no_offsets, false},
+    test_params{'n', 't', 100, 2, 100, 1.19f, 2.99f, 100, 100, 100, row_no_offsets, false},
+    test_params{'t', 'n', 2, 100, 100, 1.01f, 2.01f, 100, 100, 100, row_no_offsets, false},
+    test_params{'t', 't', 2, 100, 100, 1.99f, 2.19f, 100, 100, 100, row_no_offsets, false},
+    test_params{'n', 'n', 2, 2, 10000, 1.66f, 2.33f, 2, 10000, 2, row_no_offsets, false}
+);
+
+
+INST_TEST_CASE(TestGEMV,
+    test_params{'n', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1000, 2000, fix_no_offsets, false},
+    test_params{'n', 'n', 1, 3000, 2000, 1.0f, 0.0f, 1, 2000, 1, fix_no_offsets, false},
+    test_params{'t', 'n', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1000, 2000, fix_no_offsets, false},
+    test_params{'t', 'n', 1, 3000, 2000, 1.0f, 0.0f, 2000, 2000, 1, fix_no_offsets, false},
+    test_params{'n', 't', 2000, 1, 1000, 1.0f, 0.0f, 2000, 1, 2000, fix_no_offsets, false},
+    test_params{'n', 't', 1, 3000, 2000, 1.0f, 0.0f, 1, 3000, 1, fix_no_offsets, false},
+    test_params{'t', 't', 2000, 1, 1000, 1.0f, 0.0f, 1000, 1, 2000, fix_no_offsets, false},
+    test_params{'t', 't', 1, 3000, 2000, 1.0f, 0.0f, 2000, 3000, 1, fix_no_offsets, false},
+
+    test_params{'n', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 2000, fix_no_offsets, false},
+    test_params{'n', 'n', 1, 3000, 2000, 1.0f, 1.0f, 1, 2000, 1, fix_no_offsets, false},
+    test_params{'t', 'n', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1000, 2000, fix_no_offsets, false},
+    test_params{'t', 'n', 1, 3000, 2000, 1.0f, 1.0f, 2000, 2000, 1, fix_no_offsets, false},
+    test_params{'n', 't', 2000, 1, 1000, 1.0f, 1.0f, 2000, 1, 2000, fix_no_offsets, false},
+    test_params{'n', 't', 1, 3000, 2000, 1.0f, 1.0f, 1, 3000, 1, fix_no_offsets, false},
+    test_params{'t', 't', 2000, 1, 1000, 1.0f, 1.0f, 1000, 1, 2000, fix_no_offsets, false},
+    test_params{'t', 't', 1, 3000, 2000, 1.0f, 1.0f, 2000, 3000, 1, fix_no_offsets, false}
+);
+
+INST_TEST_CASE(TestGEMV_kblocking,
+    test_params{'t', 'n', 20, 1, 7000, 1.0f, 0.0f, 7000, 7000, 7000, fix_no_offsets, false},
+    test_params{'t', 't', 50, 1, 7000, 1.0f, 0.0f, 7000, 7000, 7000, fix_no_offsets, false},
+    test_params{'t', 'n', 400, 1, 7000, 1.0f, 0.0f, 7000, 7000, 7000, fix_no_offsets, false},
+    test_params{'t', 't', 500, 1, 7000, 1.0f, 0.0f, 7000, 1, 7000, fix_no_offsets, false},
+    test_params{'t', 'n', 20, 1, 7000, 1.0f, 1.0f, 7000, 7000, 7000, fix_no_offsets, false},
+    test_params{'t', 't', 50, 1, 7000, 1.0f, 1.0f, 7000, 1, 7000, fix_no_offsets, false},
+    test_params{'t', 'n', 500, 1, 7000, 1.0f, 1.0f, 7000, 7000, 7000, fix_no_offsets, false},
+    test_params{'t', 't', 500, 1, 7000, 1.0f, 1.0f, 7000, 7000, 7000, fix_no_offsets, false},
+
+    test_params{'n', 'n', 1, 40, 7000, 1.0f, 0.0f, 1, 7000, 7000, fix_no_offsets, false},
+    test_params{'t', 'n', 1, 10, 7000, 1.0f, 0.0f, 7000, 7000, 1, fix_no_offsets, false},
+    test_params{'n', 'n', 1, 400, 7000, 1.0f, 0.0f, 7000, 7000, 7000, fix_no_offsets, false},
+    test_params{'t', 'n', 1, 100, 7000, 1.0f, 0.0f, 7000, 7000, 7000, fix_no_offsets, false},
+    test_params{'n', 'n', 1, 40, 7000, 1.0f, 1.0f, 7000, 7000, 7000, fix_no_offsets, false},
+    test_params{'t', 'n', 1, 10, 7000, 1.0f, 1.0f, 7000, 7000, 7000, fix_no_offsets, false},
+    test_params{'n', 'n', 1, 400, 7000, 1.0f, 1.0f, 1, 7000, 7000, fix_no_offsets, false},
+    test_params{'t', 'n', 1, 550, 7000, 1.0f, 1.0f, 7000, 7000, 1, fix_no_offsets, false}
+);
+
+
 INST_TEST_CASE(TestGEMM_heavy,
-    test_params{'f', 'n', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, false},
-    test_params{'f', 'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, false},
-    test_params{'f', 't', 'n', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, false},
-    test_params{'f', 't', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, false},
-    test_params{'f', 'n', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, false},
-    test_params{'f', 'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, false},
-    test_params{'f', 't', 't', 2000, 2000, 2000, 1.0, 0.0, 2000, 2000, 2000, false},
-    test_params{'f', 't', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, false},
-
-    test_params{'f', 'n', 'n', 2000, 2000, 2000, 2.33f, 1.66f, 2000, 2000, 2000, false},
-    test_params{'f', 'n', 'n', 3000, 3000, 3000, 2.19f, 1.99f, 3000, 3000, 3000, false},
-    test_params{'f', 't', 'n', 2000, 2000, 2000, 2.01f, 1.01f, 2000, 2000, 2000, false},
-    test_params{'f', 't', 'n', 3000, 3000, 3000, 2.99f, 1.19f, 3000, 3000, 3000, false},
-    test_params{'f', 'n', 't', 2000, 2000, 2000, 1.33f, 2.33f, 2000, 2000, 2000, false},
-    test_params{'f', 'n', 't', 3000, 3000, 3000, 1.19f, 2.99f, 3000, 3000, 3000, false},
-    test_params{'f', 't', 't', 2000, 2000, 2000, 1.01f, 2.01f, 2000, 2000, 2000, false},
-    test_params{'f', 't', 't', 3000, 3000, 3000, 1.99f, 2.19f, 3000, 3000, 3000, false}
+    test_params{'n', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, fix_use_oc, false},
+    test_params{'t', 'n', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, fix_use_oc, false},
+    test_params{'n', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, fix_use_oc, false},
+    test_params{'t', 't', 3000, 3000, 3000, 1.0, 0.0, 3000, 3000, 3000, fix_use_oc, false},
+
+    test_params{'n', 'n', 3000, 3000, 3000, 2.19f, 1.99f, 3000, 3000, 3000, fix_use_oc, false},
+    test_params{'t', 'n', 3000, 3000, 3000, 2.99f, 1.19f, 3000, 3000, 3000, fix_use_oc, false},
+    test_params{'n', 't', 3000, 3000, 3000, 1.19f, 2.99f, 3000, 3000, 3000, fix_use_oc, false},
+    test_params{'t', 't', 3000, 3000, 3000, 1.99f, 2.19f, 3000, 3000, 3000, fix_use_oc, false}
 );
+
 #endif
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/mkldnn_test_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/mkldnn_test_common.hpp
index 317c08656..61efe71b1 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/mkldnn_test_common.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/mkldnn_test_common.hpp
@@ -112,9 +112,9 @@ inline size_t map_index(const mkldnn::memory::desc &md, size_t index,
                       || (md.data.format == bwd_weights_qvnni);
 
     const int ndims = md.data.ndims;
-    const int *dims = md.data.dims;
-    const int *pdims = md.data.layout_desc.blocking.padding_dims;
-    const int *optd = md.data.layout_desc.blocking.offset_padding_to_data;
+    const ptrdiff_t *dims = md.data.dims;
+    const ptrdiff_t *pdims = md.data.layout_desc.blocking.padding_dims;
+    const ptrdiff_t *optd = md.data.layout_desc.blocking.offset_padding_to_data;
 
     auto *strides_block = md.data.layout_desc.blocking.strides[0];
     auto *strides_within_block = md.data.layout_desc.blocking.strides[1];
@@ -179,8 +179,8 @@ void check_zero_tail(int set_zero_flag, mkldnn::memory &src) {
 
     const mkldnn::memory::desc src_d = src.get_primitive_desc().desc();
     const int ndims = src_d.data.ndims;
-    const int *dims = src_d.data.dims;
-    const int *pdims = src_d.data.layout_desc.blocking.padding_dims;
+    const ptrdiff_t *dims = src_d.data.dims;
+    const ptrdiff_t *pdims = src_d.data.layout_desc.blocking.padding_dims;
 
     size_t idx[MAX_NDIMS] = {}, str[MAX_NDIMS] = {};
     size_t nelems = 1;
@@ -237,6 +237,7 @@ inline mkldnn::memory::desc create_md(mkldnn::memory::dims dims,
     case f::nChw16c:
     case f::oihw:
     case f::hwio:
+    case f::iohw:
     case f::oIhw8i:
     case f::oIhw16i:
     case f::OIhw8i8o:
@@ -250,6 +251,10 @@ inline mkldnn::memory::desc create_md(mkldnn::memory::dims dims,
     case f::Ohwi8o:
     case f::Ohwi16o:
     case f::OhIw8o4i:
+    case f::OIhw4i16o4i_s8s8:
+    case f::OhIw8o4i_s8s8:
+    case f::OhIw8o32i:
+    case f::OhIw16o32i:
         ndims = 4; break;
     case f::ncdhw:
     case f::ndhwc:
@@ -259,6 +264,7 @@ inline mkldnn::memory::desc create_md(mkldnn::memory::dims dims,
     case f::oidhw:
     case f::goihw:
     case f::hwigo:
+    case f::giohw:
     case f::oIdhw8i:
     case f::oIdhw16i:
     case f::OIdhw8i8o:
@@ -268,6 +274,7 @@ inline mkldnn::memory::desc create_md(mkldnn::memory::dims dims,
     case f::gOhwi8o:
     case f::Goihw8g:
     case f::Goihw16g:
+    case f::gOhwi16o:
     case f::gOIhw8i8o:
     case f::gOIhw16i16o:
     case f::gOIhw8i16o2i:
@@ -277,6 +284,7 @@ inline mkldnn::memory::desc create_md(mkldnn::memory::dims dims,
     case f::gOIhw16o16i:
     case f::gIOhw16o16i:
     case f::gOhIw8o4i:
+    case f::Goihw16g_s8s8:
         ndims = 5; break;
     case f::gOIdhw8i8o:
     case f::gOIdhw16i16o:
@@ -340,14 +348,19 @@ static void fill_data(const size_t size, data_t *data, double sparsity = 1.,
     });
 }
 
+int div_up(const int a, const int b) {
+    return (a + b - 1) / b;
+}
+
 template <typename data_t>
 static void compare_data(mkldnn::memory& ref, mkldnn::memory& dst,
-                         data_t threshold = (data_t)1e-4)
+        data_t threshold = (data_t)1e-4, bool isBinary = false)
 {
     using data_type = mkldnn::memory::data_type;
 
     ASSERT_TRUE(data_traits<data_t>::data_type == data_type::f32 ||
-            data_traits<data_t>::data_type == data_type::s32);
+                data_traits<data_t>::data_type == data_type::s32 ||
+                data_traits<data_t>::data_type == data_type::u8);
 
     /* Note: size_t incompatible with MSVC++ */
     auto ref_desc = ref.get_primitive_desc().desc();
@@ -365,21 +378,27 @@ static void compare_data(mkldnn::memory& ref, mkldnn::memory& dst,
 
     ptrdiff_t num = 1;
     for (auto d = 0; d < ndims; ++d) {
-        num *= dims[d];
+        if (isBinary && d == 1) {
+            num *= div_up(dims[d], 8);
+        } else {
+            num *= dims[d];
+        }
     }
 
     data_t *ref_data = (data_t *)ref.get_data_handle();
     data_t *dst_data = (data_t *)dst.get_data_handle();
 
     mkldnn::impl::parallel_nd(num, [&](ptrdiff_t i) {
-        data_t ref = ref_data[map_index(ref_desc, i)];
-        data_t got = dst_data[map_index(dst_desc, i)];
+        int divider = isBinary ? 8 : 1;
+
+        data_t ref = ref_data[map_index(ref_desc, i) / divider];
+        data_t got = dst_data[map_index(dst_desc, i) / divider];
 
         if (data_traits<data_t>::data_type == data_type::f32) {
             data_t diff = got - ref;
             data_t e = (std::abs(ref) > threshold) ? diff / ref : diff;
-            EXPECT_NEAR(e, (data_t)0.0, threshold)
-                << "Index: " << i << " Total: " << num;
+            EXPECT_NEAR(e, (data_t) 0.0, threshold)
+                                << "Index: " << i << " Total: " << num;
         } else {
             EXPECT_EQ(ref, got) << "Index: " << i << " Total: " << num;
         }
@@ -505,7 +524,6 @@ struct test_convolution_formats_t {
 struct test_convolution_params_t {
     const mkldnn::engine::kind engine_kind;
     mkldnn::algorithm aalgorithm;
-    const float relu_negative_slope;
     test_convolution_formats_t formats;
     test_convolution_attr_t attr;
     test_convolution_sizes_t sizes;
@@ -516,7 +534,6 @@ struct test_convolution_params_t {
 struct test_convolution_params_t_3d {
     const mkldnn::engine::kind engine_kind;
     mkldnn::algorithm aalgorithm;
-    const float relu_negative_slope;
     test_convolution_formats_t formats;
     test_convolution_attr_t attr;
     test_convolution_sizes_t_3d sizes;
@@ -621,6 +638,33 @@ struct roi_pool_test_params {
     test_roi_pool_desc_t test_pd;
 };
 
+struct test_binary_convolution_params_t {
+    const mkldnn::engine::kind engine_kind;
+    mkldnn::algorithm aalgorithm;
+    float pad_value;
+    mkldnn::algorithm eltwise_algorithm;
+    const float eltwise_alpha;
+    const float eltwise_beta;
+    mkldnn::algorithm depthwise_algorithm;
+    bool with_sum;
+    mkldnn::algorithm binarization_algorithm;
+    test_convolution_formats_t formats;
+    test_convolution_sizes_t sizes;
+};
+
+struct test_binary_convolution_dw_conv_params_t {
+    const mkldnn::engine::kind engine_kind;
+    mkldnn::algorithm aalgorithm;
+    mkldnn::algorithm eltwise_algorithm;
+    const float eltwise_alpha;
+    const float eltwise_beta;
+    mkldnn::algorithm depthwise_algorithm;
+    bool with_sum;
+    mkldnn::algorithm binarization_algorithm;
+    test_convolution_dw_conv_formats_t formats;
+    test_convolution_dw_conv_sizes_t sizes;
+};
+
 std::ostream &operator<<(std::ostream &stream,
                          const roi_pool_test_params &tp)
 {
@@ -634,7 +678,7 @@ std::ostream &operator<<(std::ostream &stream,
 }
 
 template<typename F> bool catch_expected_failures(const F &f,
-        bool expect_to_fail, mkldnn_status_t expected_status)
+        bool expect_to_fail, mkldnn_status_t expected_status, bool ignore_unimplemented = true)
 {
     try {
         f();
@@ -643,7 +687,7 @@ template<typename F> bool catch_expected_failures(const F &f,
         // not match.
         if (!(expect_to_fail) || e.status != (expected_status)) {
             // Ignore unimplemented
-            if (e.status == mkldnn_unimplemented)
+            if ( ignore_unimplemented && (e.status == mkldnn_unimplemented))
                 return true;
             else
                 throw e;
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_batch_normalization.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_batch_normalization.cpp
index 48d5bfcc1..8b82ddbce 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_batch_normalization.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_batch_normalization.cpp
@@ -152,7 +152,7 @@ void check_bnrm_bwd(const test_bnrm_params_t &p,
 {
     const test_bnrm_sizes_t &bp = p.sizes;
     const bool use_weights = flags & use_scale_shift;
-    const bool calculate_diff_stats = !(flags & omit_stats);
+    const bool calculate_diff_stats = !(flags & use_global_stats);
 
     const data_t *src_data = (const data_t *)src.get_data_handle();
     const data_t *weights_data = use_weights ? (const data_t *)weights.get_data_handle() : nullptr;
@@ -316,11 +316,11 @@ protected:
         Forward(use_scale_shift | use_global_stats, training);
 
         Backward(0u, backward_data);
-        Backward(omit_stats, backward_data);
+        Backward(use_global_stats, backward_data);
         Backward(use_scale_shift, backward);
         Backward(use_scale_shift, backward_data);
-        Backward(use_scale_shift | omit_stats, backward);
-        Backward(use_scale_shift | omit_stats, backward_data);
+        Backward(use_scale_shift | use_global_stats, backward);
+        Backward(use_scale_shift | use_global_stats, backward_data);
 
     }
 
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binarization.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binarization.cpp
new file mode 100644
index 000000000..e720faf53
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binarization.cpp
@@ -0,0 +1,160 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <mkldnn_types.h>
+#include "gtest/gtest.h"
+#include "mkldnn_test_common.hpp"
+#include "mkldnn.hpp"
+
+namespace mkldnn {
+
+template <typename data_t>
+struct binarization_test_params {
+    engine::kind engine_kind;
+    algorithm alg_kind;
+    memory::format data_format;
+    memory::dims dims;
+};
+
+template <typename src_data_t>
+void check_binarization_fwd(const binarization_test_params<src_data_t> &p,
+        const memory::desc &src_md, const memory &src, const memory &weights, const memory &dst) {
+    auto src_data = (src_data_t*)src.get_data_handle();
+    auto weights_data = (src_data_t*)weights.get_data_handle();
+    auto dst_data = (uint8_t*)dst.get_data_handle();
+
+    const memory::desc src_d = src.get_primitive_desc().desc();
+    const memory::desc weights_d = weights.get_primitive_desc().desc();
+    const memory::desc dst_d = dst.get_primitive_desc().desc();
+
+    int N = src_md.data.ndims > 0 ? src_md.data.dims[0] : 1;
+    int C = src_md.data.ndims > 1 ? src_md.data.dims[1] : 1;
+    int H = src_md.data.ndims > 2 ? src_md.data.dims[2] : 1;
+    int W = src_md.data.ndims > 3 ? src_md.data.dims[3] : 1;
+
+    int nbits = 8;
+    int CB = div_up(C, nbits);
+
+    int padded_ic = src_d.data.layout_desc.blocking.padding_dims[1];
+    int padded_oc = dst_d.data.layout_desc.blocking.padding_dims[1];
+
+    for (int n = 0; n < N; ++n) {
+        for (int cb = 0; cb < CB; ++cb) {
+            for (int h = 0; h < H; ++h) {
+                for (int w = 0; w < W; ++w) {
+
+                    uint8_t bin_val = 0x00;
+                    for (int c = cb * nbits, shift = 0; c < std::min(C, (cb + 1) * nbits); c++, shift++) {
+                        int src_idx = n*padded_ic*H*W + c*H*W + h*W + w;
+                        int wei_idx = c;
+
+                        src_data_t s_val = src_data[map_index(src_d, src_idx)];
+                        src_data_t w_val = weights_data[map_index(weights_d, wei_idx)];
+
+                        auto bit = uint8_t((s_val > w_val) ? 0x01 : 0x00);
+                        bin_val |= (bit << shift);
+                    }
+
+                    int dst_idx = n*padded_oc*H*W + cb*nbits*H*W + h*W + w;
+                    dst_idx = map_index(dst_d, dst_idx);
+
+                    EXPECT_EQ(dst_data[dst_idx / nbits], bin_val);
+                }
+            }
+        }
+    }
+}
+
+template <typename src_data_t>
+class binarization_test : public ::testing::TestWithParam<binarization_test_params<src_data_t>> {
+private:
+
+protected:
+    virtual void SetUp() {
+        auto p = ::testing::TestWithParam<binarization_test_params<src_data_t>>::GetParam();
+
+        auto eng = engine(p.engine_kind, 0);
+        auto src_data_type = data_traits<src_data_t>::data_type;
+
+        memory::dims src_dims = memory::dims({p.dims[0], p.dims[1], p.dims[2], p.dims[3]});
+        memory::dims wei_dims = memory::dims({src_dims[1]});
+        memory::dims dst_dims = memory::dims({p.dims[0], p.dims[1], p.dims[2], p.dims[3]});
+
+        auto src_desc = create_md(src_dims, src_data_type, p.data_format);
+        auto weights_desc = create_md(wei_dims, src_data_type, memory::format::x);
+        auto dst_desc = create_md(dst_dims, memory::data_type::bin, p.data_format);
+
+        auto src = test_memory(src_desc, eng);
+        auto weights = test_memory(weights_desc, eng);
+        auto dst = test_memory(dst_desc, eng);
+
+        fill_data<src_data_t>(src.get_size() / sizeof(src_data_t), (src_data_t *)src.get().get_data_handle(),
+                              src_data_t(0), src_data_t(1));
+        fill_data<src_data_t>(weights.get_size() / sizeof(src_data_t), (src_data_t *)weights.get().get_data_handle(),
+                              src_data_t(0), src_data_t(1));
+        fill_data<uint8_t>(dst.get_size() / sizeof(uint8_t), (uint8_t*)dst.get().get_data_handle());
+
+        std::vector<primitive> pipeline;
+        auto binarization_desc = binarization_forward::desc(prop_kind::forward_training, p.alg_kind, src_desc, weights_desc, dst_desc);
+        auto binarization_prim_desc = binarization_forward::primitive_desc(binarization_desc, eng);
+        auto binarization = binarization_forward(binarization_prim_desc, src.get(), weights.get(), dst.get());
+
+        pipeline.push_back(binarization);
+        auto s = stream(stream::kind::lazy);
+        s.submit(pipeline).wait();
+
+        check_binarization_fwd(p, src_desc, src.get(), weights.get(), dst.get());
+    }
+};
+
+using binarization_test_float = binarization_test<float>;
+using binarization_test_params_float = binarization_test_params<float>;
+
+TEST_P(binarization_test_float, TestsBinarization)
+{
+}
+
+#define EXPAND(args) args
+
+#define EXPAND_FORMATS(data) memory::format::data
+
+#define ENGINE engine::kind::cpu
+
+#define PARAMS(alg, data, mb, c, h, w) \
+    binarization_test_params_float { ENGINE, algorithm::alg, \
+    EXPAND_FORMATS(data), {mb, c, h, w} }
+
+#define PARAMS_ALL_ALG(...) \
+    EXPAND(PARAMS(binarization_depthwise, __VA_ARGS__))
+
+#define INST_TEST_CASE(str, ...) INSTANTIATE_TEST_CASE_P( \
+        str, binarization_test_float, ::testing::Values(__VA_ARGS__))
+
+INST_TEST_CASE(Simple_NHWC,
+    PARAMS_ALL_ALG(nhwc, 2, 8, 4, 4),
+    PARAMS_ALL_ALG(nhwc, 2, 16, 4, 4),
+    PARAMS_ALL_ALG(nhwc, 2, 16, 8, 8),
+    PARAMS_ALL_ALG(nhwc, 2, 16, 16, 8),
+    PARAMS_ALL_ALG(nhwc, 2, 16, 10, 8),
+    PARAMS_ALL_ALG(nhwc, 10, 10, 10, 10),
+    PARAMS_ALL_ALG(nhwc, 256, 64, 8, 16),
+    PARAMS_ALL_ALG(nhwc, 1, 1, 1, 1),
+    PARAMS_ALL_ALG(nhwc, 3, 5, 7, 11),
+    PARAMS_ALL_ALG(nhwc, 2, 129, 7, 4),
+    PARAMS_ALL_ALG(nhwc, 2, 333, 8, 3)
+);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_binarization_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_binarization_forward.cpp
new file mode 100644
index 000000000..acdd5552b
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_binarization_forward.cpp
@@ -0,0 +1,74 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_binary_convolution_forward_common.hpp"
+
+namespace mkldnn {
+
+using binary_convolution_test = binary_convolution_forward_test;
+
+TEST_P(binary_convolution_test, TestBinaryConvolutionBinarization)
+{
+}
+
+#define BIN
+#define WITH_BINARIZATION
+#define DIRECTION_FORWARD
+#include "convolution_common.h"
+
+#define PARAMS_WITH_BINARIZATION(...) \
+    EXPAND_ARGS(PARAMS(binarization_depthwise, __VA_ARGS__))
+
+INST_TEST_CASE(SimpleSmall_Blocked_Padded_Channels,
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 3, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 32, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 41, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 47, 10, 10, 137, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 256, 10, 10, 256, 10, 10, 3, 3, 1, 1, 1, 1)
+);
+
+INST_TEST_CASE(SimpleSmall_Blocked_1x1_Padded_Channels,
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 3, 10, 10, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 13, 3, 3, 32, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 13, 3, 3, 41, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 47, 3, 3, 137, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 256, 3, 3, 256, 3, 3, 1, 1, 0, 0, 1, 1)
+);
+
+//INST_TEST_CASE(SimpleSmall_Depthwise_Blocked_Padded_Channels,
+//    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 32, 32, 10, 10, 32, 10, 10, 3, 3, 1, 1, 1, 1),
+//    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 43, 43, 10, 10, 43, 10, 10, 3, 3, 1, 1, 1, 1),
+//    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 256, 256, 10, 10, 256, 10, 10, 3, 3, 1, 1, 1, 1)
+//);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_depthwise_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_depthwise_forward.cpp
new file mode 100644
index 000000000..329337115
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_depthwise_forward.cpp
@@ -0,0 +1,75 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_binary_convolution_forward_common.hpp"
+
+namespace mkldnn {
+
+using binary_convolution_test = binary_convolution_forward_test;
+
+TEST_P(binary_convolution_test, TestBinaryConvolutionDepthwise)
+{
+}
+
+#define BIN
+#define WITH_DEPTHWISE
+#define DIRECTION_FORWARD
+#include "convolution_common.h"
+
+#define PARAMS_WITH_DEPTHWISE(...) \
+    EXPAND_ARGS(PARAMS(depthwise_scale_shift, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(depthwise_prelu, __VA_ARGS__))
+
+INST_TEST_CASE(SimpleSmall_Blocked_Padded_Channels,
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 3, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 32, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 41, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 47, 10, 10, 137, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 256, 10, 10, 256, 10, 10, 3, 3, 1, 1, 1, 1)
+);
+
+INST_TEST_CASE(SimpleSmall_Blocked_1x1_Padded_Channels,
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 3, 10, 10, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 13, 3, 3, 32, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 13, 3, 3, 41, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 47, 3, 3, 137, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 256, 3, 3, 256, 3, 3, 1, 1, 0, 0, 1, 1)
+);
+
+//INST_TEST_CASE(SimpleSmall_Depthwise_Blocked_Padded_Channels,
+//    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 32, 32, 10, 10, 32, 10, 10, 3, 3, 1, 1, 1, 1),
+//    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 43, 43, 10, 10, 43, 10, 10, 3, 3, 1, 1, 1, 1),
+//    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 256, 256, 10, 10, 256, 10, 10, 3, 3, 1, 1, 1, 1)
+//);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_binarization_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_binarization_forward.cpp
new file mode 100644
index 000000000..8d0019a72
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_binarization_forward.cpp
@@ -0,0 +1,56 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_binary_convolution_dw_conv_forward_common.hpp"
+
+namespace mkldnn {
+
+using binary_convolution_test = binary_convolution_forward_test;
+
+TEST_P(binary_convolution_test, TestBinaryConvolutionDwConvBinarization)
+{
+}
+
+#define BIN
+#define WITH_DW_CONV
+#define WITH_BINARIZATION
+#define DIRECTION_FORWARD
+#include "convolution_common.h"
+
+#define PARAMS_WITH_BINARIZATION(...) \
+    EXPAND_ARGS(PARAMS(binarization_depthwise, __VA_ARGS__))
+
+INST_TEST_CASE(SimpleSmall_Blocked,
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           1, 19, 5, 5,  77, 1, 1, 0, 0, 1, 1,  77, 3, 3, 1, 1, 1, 1)
+);
+
+INST_TEST_CASE(Mobilenet_Blocked,
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 8, 19, 33,  56, 3, 3, 1, 1, 2, 2,  56, 3, 3, 1, 1, 1, 1), // 1_1
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 56, 9, 16,  112, 1, 1, 0, 0, 1, 1,  112, 3, 3, 1, 1, 1, 1), // 2_2
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 112, 9, 16,  112, 1, 1, 0, 0, 1, 1,  112, 3, 3, 1, 1, 2, 2), // 3_1
+    PARAMS_WITH_BINARIZATION(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 288, 2, 4,  240, 1, 1, 0, 0, 1, 1,  240, 3, 3, 1, 1, 1, 1)  // 5_3
+);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_depthwise_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_depthwise_forward.cpp
new file mode 100644
index 000000000..23c7ab1b4
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_depthwise_forward.cpp
@@ -0,0 +1,46 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_binary_convolution_dw_conv_forward_common.hpp"
+
+namespace mkldnn {
+
+using binary_convolution_test = binary_convolution_forward_test;
+
+TEST_P(binary_convolution_test, TestBinaryConvolutionDwConvDepthwise)
+{
+}
+
+#define BIN
+#define WITH_DW_CONV
+#define WITH_DEPTHWISE
+#define DIRECTION_FORWARD
+#include "convolution_common.h"
+
+#define PARAMS_WITH_DEPTHWISE(...) \
+    EXPAND_ARGS(PARAMS(depthwise_scale_shift, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(depthwise_prelu, __VA_ARGS__))
+
+INST_TEST_CASE(SimpleSmall_Blocked,
+    PARAMS_WITH_DEPTHWISE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           1, 7, 10, 10,  37, 1, 1, 0, 0, 1, 1,  37, 3, 3, 1, 1, 1, 1)
+);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_eltwise_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_eltwise_forward.cpp
new file mode 100644
index 000000000..acbdb234d
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_eltwise_forward.cpp
@@ -0,0 +1,55 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_binary_convolution_dw_conv_forward_common.hpp"
+
+namespace mkldnn {
+
+using binary_convolution_test = binary_convolution_forward_test;
+
+TEST_P(binary_convolution_test, TestBinaryConvolutionDwConvEltwise)
+{
+}
+
+#define BIN
+#define WITH_DW_CONV
+#define WITH_ELTWISE
+#define DIRECTION_FORWARD
+#include "convolution_common.h"
+
+#define PARAMS_WITH_ELTIWSE(...) \
+    EXPAND_ARGS(PARAMS(eltwise_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_elu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_tanh, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_square, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_abs, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_sqrt, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_linear, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_bounded_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_soft_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_logistic, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_clamp, __VA_ARGS__))
+
+INST_TEST_CASE(Mobilenet_Blocked,
+    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           1, 7, 10, 10,  37, 1, 1, 0, 0, 1, 1,  37, 3, 3, 1, 1, 2, 2)
+);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_forward.cpp
new file mode 100644
index 000000000..c813834fb
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_forward.cpp
@@ -0,0 +1,61 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_binary_convolution_dw_conv_forward_common.hpp"
+
+namespace mkldnn {
+
+using binary_convolution_test = binary_convolution_forward_test;
+
+TEST_P(binary_convolution_test, TestBinaryConvolutionDwConv)
+{
+}
+
+#define BIN
+#define WITH_DW_CONV
+#define DIRECTION_FORWARD
+#include "convolution_common.h"
+
+INST_TEST_CASE(Mobilenet_Blocked,
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 8, 19, 33,  56, 3, 3, 1, 1, 2, 2,  56, 3, 3, 1, 1, 1, 1), // 1_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 32, 19, 33,  56, 1, 1, 0, 0, 1, 1,  56, 3, 3, 1, 1, 2, 2), // 2_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 56, 9, 16,  112, 1, 1, 0, 0, 1, 1,  112, 3, 3, 1, 1, 1, 1), // 2_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 112, 9, 16,  112, 1, 1, 0, 0, 1, 1,  112, 3, 3, 1, 1, 2, 2), // 3_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 112, 4, 8,  208, 1, 1, 0, 0, 1, 1,  208, 3, 3, 1, 1, 1, 1),  // 3_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 208, 4, 8,  216, 1, 1, 0, 0, 1, 1,  216, 3, 3, 1, 1, 2, 2),  // 4_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 216, 2, 4,  328, 1, 1, 0, 0, 1, 1,  328, 3, 3, 1, 1, 1, 1),  // 4_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 328, 2, 4,  288, 1, 1, 0, 0, 1, 1,  288, 3, 3, 1, 1, 1, 1),  // 5_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 288, 2, 4,  288, 1, 1, 0, 0, 1, 1,  288, 3, 3, 1, 1, 1, 1),  // 5_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 288, 2, 4,  240, 1, 1, 0, 0, 1, 1,  240, 3, 3, 1, 1, 1, 1),  // 5_3
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 240, 2, 4,  264, 1, 1, 0, 0, 1, 1,  264, 3, 3, 1, 1, 1, 1)   // 5_4
+);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_forward_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_forward_common.hpp
new file mode 100644
index 000000000..b84f71508
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_forward_common.hpp
@@ -0,0 +1,528 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef TEST_BINARY_CONVOLUTION_DW_CONV_FORWARD_COMMON_HPP
+#define TEST_BINARY_CONVOLUTION_DW_CONV_FORWARD_COMMON_HPP
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+#include "math_utils.hpp"
+#include "mkldnn.hpp"
+
+using namespace mkldnn::impl::math;
+
+namespace mkldnn {
+
+void compute_ref_bin_conv_fwd(const test_binary_convolution_dw_conv_params_t &p,
+        const memory::desc &src_d,
+        const memory::desc &weights_d,
+        const memory::desc &dst_d,
+        const memory &src,
+        const memory &weights,
+        const memory &dst,
+        const memory &depthwise_weights,
+        const memory &depthwise_bias)
+{
+    auto src_dims = src_d.data.dims;
+    auto dst_dims = dst_d.data.dims;
+    auto sizes = p.sizes;
+    test_convolution_sizes_t c = {(int)src_dims[0], 1, sizes.ic, (int)src_dims[2], (int)src_dims[3],
+                                  (int)dst_dims[1], (int)dst_dims[2], (int)dst_dims[3],
+                                  sizes.conv1_kh, sizes.conv1_kw, sizes.conv1_padh, sizes.conv1_padw, sizes.conv1_strh, sizes.conv1_strw};
+
+    float pad_value = -1.f;
+
+    uint8_t* src_data = (uint8_t*)src.get_data_handle();
+    uint8_t* weights_data = (uint8_t*)weights.get_data_handle();
+    float* dst_data = (float*)dst.get_data_handle();
+
+    float *d_weights_data = (float *)depthwise_weights.get_data_handle();
+    float *d_bias_data = (float *)depthwise_bias.get_data_handle();
+
+    int nbits = 8;
+
+    size_t padded_ic = src_d.data.layout_desc.blocking.padding_dims[1];
+    size_t padded_ic_w = weights_d.data.layout_desc.blocking.padding_dims[1];
+    size_t padded_oc_w = weights_d.data.layout_desc.blocking.padding_dims[0];
+
+    auto extract_bit = [](uint8_t val, uint8_t bit) -> uint8_t {
+        return (uint8_t) ((val >> bit) & 0x0001);
+    };
+
+    mkldnn::impl::parallel_nd(c.mb, c.ng, c.oc / c.ng, c.oh, c.ow,
+        [&](int n, int g, int oc, int oh, int ow) {
+            int32_t a = 0;
+            int roi = 0;
+            for (int ic = 0; ic < c.ic; ic++) {
+                for (int kh = 0; kh < c.kh; kh++) {
+                    for (int kw = 0; kw < c.kw; kw++) {
+                        int ih = oh * c.strh - c.padh + kh * (1 + c.dilh);
+                        int iw = ow * c.strw - c.padw + kw * (1 + c.dilw);
+
+                        size_t iidx = n * padded_ic * c.ih * c.iw
+                                      + g * padded_ic / c.ng * c.ih * c.iw
+                                      + ic * c.ih * c.iw + ih * c.iw + iw;
+                        iidx = map_index(src_d, iidx);
+
+                        uint8_t s;
+                        if (ih < 0 || ih >= c.ih || iw < 0 || iw >= c.iw) {
+                            if (pad_value == 0.0f) {
+                                continue;
+                            } else {
+                                s = pad_value == 1.0f ? (uint8_t)1 : (uint8_t)0;
+                            }
+                        } else {
+                             s = extract_bit(src_data[iidx/nbits], (uint8_t)(iidx % nbits));
+                        }
+
+                        size_t widx = g * padded_oc_w / c.ng * padded_ic_w
+                                      / c.ng * c.kh * c.kw
+                                      + oc * padded_ic_w / c.ng * c.kh * c.kw
+                                      + ic * c.kh * c.kw + kh * c.kw + kw;
+                        widx = map_index(weights_d, widx);
+
+                        uint8_t w = extract_bit(weights_data[widx/nbits], (uint8_t)(widx % nbits));
+
+                        a += (int32_t)(s ^ w);
+
+                        roi++;
+                    }
+                }
+            }
+
+            float a_fp = (float)(roi - 2*a);
+
+            size_t oidx = n * c.oc * c.oh * c.ow +
+                          g * c.oc / c.ng * c.oh * c.ow +
+                          oc * c.oh * c.ow +
+                          oh * c.ow +
+                          ow;
+
+            switch (p.eltwise_algorithm) {
+                case algorithm_undef:
+                    break;
+                case eltwise_relu:
+                    a_fp = relu_fwd(a_fp, p.eltwise_alpha);
+                    break;
+                case eltwise_tanh:
+                    a_fp = tanh_fwd(a_fp);
+                    break;
+                case eltwise_elu:
+                    a_fp = elu_fwd(a_fp, p.eltwise_alpha);
+                    break;
+                case eltwise_square:
+                    a_fp = square_fwd(a_fp);
+                    break;
+                case eltwise_abs:
+                    a_fp = abs_fwd(a_fp);
+                    break;
+                case eltwise_sqrt:
+                    a_fp = sqrt_fwd(a_fp);
+                    break;
+                case eltwise_linear:
+                    a_fp = linear_fwd(a_fp, p.eltwise_alpha, p.eltwise_beta);
+                    break;
+                case eltwise_bounded_relu:
+                    a_fp = bounded_relu_fwd(a_fp, p.eltwise_alpha);
+                    break;
+                case eltwise_soft_relu:
+                    a_fp = soft_relu_fwd(a_fp);
+                    break;
+                case eltwise_logistic:
+                    a_fp = logistic_fwd(a_fp);
+                    break;
+                case eltwise_clamp:
+                    a_fp = clamp_fwd(a_fp, p.eltwise_alpha, p.eltwise_beta);
+                    break;
+                default:
+                    assert(!"unknown alg_kind");
+            }
+
+            switch (p.depthwise_algorithm) {
+                case algorithm_undef:
+                    break;
+                case depthwise_scale_shift:
+                    a_fp = scale_shift_fwd(a_fp, d_weights_data[g * c.oc / c.ng + oc], d_bias_data[g * c.oc / c.ng + oc]);
+                    break;
+                case depthwise_prelu:
+                    a_fp = prelu_fwd(a_fp, d_weights_data[g * c.oc / c.ng + oc]);
+                    break;
+                default: assert(!"unknown alg_kind");
+            }
+
+            dst_data[map_index(dst_d, oidx)] = a_fp;
+        }
+    );
+}
+
+void compute_ref_dw_conv_fwd(const test_binary_convolution_dw_conv_params_t &p,
+        const memory &src, const memory &weights, const memory &bias, const memory &dst,
+        const memory &depthwise_weights, const memory &depthwise_bias)
+{
+    const memory::desc src_d = src.get_primitive_desc().desc();
+    const memory::desc weights_d = weights.get_primitive_desc().desc();
+    const memory::desc dst_d = dst.get_primitive_desc().desc();
+
+    auto src_dims = src_d.data.dims;
+    auto dst_dims = dst_d.data.dims;
+
+    int MB = src_dims[0];
+    int G = src_dims[1];
+    int IC = src_dims[1];
+    int IH = src_dims[2];
+    int IW = src_dims[3];
+    int OC = dst_dims[1];
+    int OH = dst_dims[2];
+    int OW = dst_dims[3];
+
+    int KH = p.sizes.conv2_kh;
+    int KW = p.sizes.conv2_kw;
+    int SH = p.sizes.conv2_strh;
+    int SW = p.sizes.conv2_strw;
+    int PH = p.sizes.conv2_padh;
+    int PW = p.sizes.conv2_padw;
+    int DH = 0;
+    int DW = 0;
+
+    float *src_data = (float *)src.get_data_handle();
+    float *weights_data = (float *)weights.get_data_handle();
+    float *bias_data = (float *)bias.get_data_handle();
+    float *dst_data = (float *)dst.get_data_handle();
+
+    float *d_weights_data = (float *)depthwise_weights.get_data_handle();
+    float *d_bias_data = (float *)depthwise_bias.get_data_handle();
+
+    mkldnn::impl::parallel_nd(MB, G, OC / G, OH, OW,
+        [&](int n, int g, int oc, int oh, int ow) {
+            int oidx = n * OC * OH * OW
+                       + g * OC / G * OH * OW
+                       + oc * OH * OW + oh * OW + ow;
+
+            float a = (float)0;
+
+            for (int ic = 0; ic < IC / G; ic++) {
+                for (int kh = 0; kh < KH; kh++) {
+                    for (int kw = 0; kw < KW; kw++) {
+                        int iw = ow * SW
+                                 - PW + kw * (1 + DW);
+                        int ih = oh * SH
+                                 - PH + kh * (1 + DH);
+                        if (iw < 0 || iw >= IW) continue;
+                        if (ih < 0 || ih >= IH) continue;
+                        int iidx = n * IC * IH * IW
+                                   + g * IC / G * IH * IW
+                                   + ic * IH * IW + ih * IW + iw;
+                        int widx = g * OC / G * IC
+                                   / G * KH * KW
+                                   + oc * IC / G * KH * KW
+                                   + ic * KH * KW + kh * KW + kw;
+
+                        iidx = map_index(src_d, iidx);
+
+                        float s = src_data[iidx];
+                        float w = weights_data[map_index(weights_d, widx)];
+
+                        a += s * w;
+
+                    }
+                }
+            }
+
+            float a_fp = (float)a;
+
+            a_fp += bias_data[G > 1 ? g : oc];
+
+            if (p.with_sum)
+                a_fp += dst_data[map_index(dst_d, oidx)];
+
+            switch (p.eltwise_algorithm) {
+                case algorithm_undef:
+                    break;
+                case eltwise_relu:
+                    a_fp = relu_fwd(a_fp, p.eltwise_alpha);
+                    break;
+                case eltwise_tanh:
+                    a_fp = tanh_fwd(a_fp);
+                    break;
+                case eltwise_elu:
+                    a_fp = elu_fwd(a_fp, p.eltwise_alpha);
+                    break;
+                case eltwise_square:
+                    a_fp = square_fwd(a_fp);
+                    break;
+                case eltwise_abs:
+                    a_fp = abs_fwd(a_fp);
+                    break;
+                case eltwise_sqrt:
+                    a_fp = sqrt_fwd(a_fp);
+                    break;
+                case eltwise_linear:
+                    a_fp = linear_fwd(a_fp, p.eltwise_alpha, p.eltwise_beta);
+                    break;
+                case eltwise_bounded_relu:
+                    a_fp = bounded_relu_fwd(a_fp, p.eltwise_alpha);
+                    break;
+                case eltwise_soft_relu:
+                    a_fp = soft_relu_fwd(a_fp);
+                    break;
+                case eltwise_logistic:
+                    a_fp = logistic_fwd(a_fp);
+                    break;
+                case eltwise_clamp:
+                    a_fp = clamp_fwd(a_fp, p.eltwise_alpha, p.eltwise_beta);
+                    break;
+                default:
+                    assert(!"unknown alg_kind");
+            }
+
+            switch (p.depthwise_algorithm) {
+                case algorithm_undef:
+                    break;
+                case depthwise_scale_shift:
+                    a_fp = scale_shift_fwd(a_fp, d_weights_data[g * OC / G + oc], d_bias_data[g * OC / G + oc]);
+                    break;
+                case depthwise_prelu:
+                    a_fp = prelu_fwd(a_fp, d_weights_data[g * OC / G + oc]);
+                    break;
+                default: assert(!"unknown alg_kind");
+            }
+
+            dst_data[map_index(dst_d, oidx)] = (float)a_fp;
+        }
+    );
+}
+
+void compute_ref_binarization_fwd(const test_binary_convolution_dw_conv_params_t &p,
+    const memory::desc &src_md, const memory &src, const memory &weights, const memory &dst) {
+    auto src_data = (float*)src.get_data_handle();
+    auto weights_data = (float*)weights.get_data_handle();
+    auto dst_data = (uint8_t*)dst.get_data_handle();
+
+    const memory::desc src_d = src.get_primitive_desc().desc();
+    const memory::desc weights_d = weights.get_primitive_desc().desc();
+    const memory::desc dst_d = dst.get_primitive_desc().desc();
+
+    int N = src_md.data.ndims > 0 ? src_md.data.dims[0] : 1;
+    int C = src_md.data.ndims > 1 ? src_md.data.dims[1] : 1;
+    int H = src_md.data.ndims > 2 ? src_md.data.dims[2] : 1;
+    int W = src_md.data.ndims > 3 ? src_md.data.dims[3] : 1;
+
+    int nbits = 8;
+    int CB = div_up(C, nbits);
+
+    int padded_ic = src_d.data.layout_desc.blocking.padding_dims[1];
+    int padded_oc = dst_d.data.layout_desc.blocking.padding_dims[1];
+
+    for (int n = 0; n < N; ++n) {
+        for (int cb = 0; cb < CB; ++cb) {
+            for (int h = 0; h < H; ++h) {
+                for (int w = 0; w < W; ++w) {
+
+                    uint8_t bin_val = 0x00;
+                    for (int c = cb * nbits, shift = 0; c < std::min(C, (cb + 1) * nbits); c++, shift++) {
+                        int src_idx = n*padded_ic*H*W + c*H*W + h*W + w;
+                        int wei_idx = c;
+
+                        float s_val = src_data[map_index(src_d, src_idx)];
+                        float w_val = weights_data[map_index(weights_d, wei_idx)];
+
+                        auto bit = uint8_t((s_val > w_val) ? 0x01 : 0x00);
+                        bin_val |= (bit << shift);
+                    }
+
+                    int dst_idx = n*padded_oc*H*W + cb*nbits*H*W + h*W + w;
+                    dst_idx = map_index(dst_d, dst_idx);
+                    dst_data[dst_idx / nbits] = bin_val;
+                }
+            }
+        }
+    }
+}
+
+class binary_convolution_forward_test : public ::testing::TestWithParam<test_binary_convolution_dw_conv_params_t>
+{
+protected:
+    virtual void SetUp()
+    {
+        test_binary_convolution_dw_conv_params_t p = ::testing::TestWithParam<test_binary_convolution_dw_conv_params_t>::GetParam();
+
+        ASSERT_TRUE(p.engine_kind == engine::kind::cpu);
+        ASSERT_EQ(p.aalgorithm, algorithm::binary_convolution_direct);
+
+        test_convolution_dw_conv_sizes_t cd = p.sizes;
+
+        auto eng = engine(p.engine_kind, 0);
+        auto aprop_kind = prop_kind::forward;
+        bool with_binarization = p.binarization_algorithm != algorithm_undef;
+//        int nbits = 8;
+
+        memory::data_type data_type_bin_conv_src = memory::data_type::bin;
+        memory::data_type data_type_bin_conv_wei = memory::data_type::bin;
+        memory::data_type data_type_bin_conv_bia = data_traits<float>::data_type;
+        memory::data_type data_type_bin_conv_dst = data_traits<float>::data_type;
+
+        memory::data_type data_type_dw_conv_wei = data_traits<float>::data_type;
+        memory::data_type data_type_dw_conv_bia = data_traits<float>::data_type;
+        memory::data_type data_type_dw_conv_dst = with_binarization ? memory::data_type::bin
+                                                                    : data_traits<float>::data_type;
+
+        int bin_conv_oh = (cd.ih - ((cd.conv1_kh - 1) + 1) + 2 * cd.conv1_padh) / cd.conv1_strh + 1;
+        int bin_conv_ow = (cd.iw - ((cd.conv1_kw - 1) + 1) + 2 * cd.conv1_padw) / cd.conv1_strw + 1;
+
+        int dw_conv_oh = (bin_conv_oh - ((cd.conv2_kh - 1) + 1) + 2 * cd.conv2_padh) / cd.conv2_strh + 1;
+        int dw_conv_ow = (bin_conv_ow - ((cd.conv2_kw - 1) + 1) + 2 * cd.conv2_padw) / cd.conv2_strw + 1;
+
+        std::vector<ptrdiff_t> bin_conv_padR = { cd.conv1_padh, cd.conv1_padw };
+        bin_conv_padR[0] += dw_conv_oh - bin_conv_oh;
+        bin_conv_padR[1] += dw_conv_ow - bin_conv_ow;
+
+        auto bin_conv_src_desc = create_md({ cd.mb, cd.ic, cd.ih, cd.iw }, data_type_bin_conv_src, p.formats.src_format);
+        auto bin_conv_weights_desc = create_md({ cd.conv1_oc, cd.ic, cd.conv1_kh, cd.conv1_kw }, data_type_bin_conv_wei, p.formats.conv1_weights_format);
+        auto bin_conv_dst_desc = create_md({ cd.mb, cd.conv1_oc, dw_conv_oh, dw_conv_ow }, data_type_bin_conv_dst, p.formats.dst_format);
+
+        auto bin_conv_src = test_memory(bin_conv_src_desc, eng);
+        auto bin_conv_weights = test_memory(bin_conv_weights_desc, eng);
+
+        fill_data<uint8_t>(bin_conv_src.get_size() / sizeof(uint8_t), (uint8_t*)bin_conv_src.get().get_data_handle());
+        fill_data<uint8_t>(bin_conv_weights.get_size() / sizeof(uint8_t), (uint8_t*)bin_conv_weights.get().get_data_handle());
+
+        auto dw_conv_weights_desc = create_md({ cd.conv2_oc, 1, 1, cd.conv2_kh, cd.conv2_kw }, data_type_dw_conv_wei, p.formats.conv2_weights_format);
+        auto dw_conv_dst_desc = create_md({ cd.mb, cd.conv2_oc, dw_conv_oh, dw_conv_ow }, data_type_dw_conv_dst, p.formats.dst_format);
+        auto dw_conv_bias_desc = create_md({ cd.conv2_oc }, data_type_dw_conv_bia, p.formats.conv2_bias_format);
+
+        auto dw_conv_weights = test_memory(dw_conv_weights_desc, eng);
+        auto dw_conv_bias = test_memory(dw_conv_bias_desc, eng);
+        auto dw_conv_dst = test_memory(dw_conv_dst_desc, eng);
+
+        if (with_binarization)
+            fill_data<uint8_t>(dw_conv_dst.get_size() / sizeof(uint8_t), (uint8_t*)dw_conv_dst.get().get_data_handle());
+        else
+            fill_data<float>(dw_conv_dst.get_size() / sizeof(float), (float*)dw_conv_dst.get().get_data_handle());
+
+        fill_data<float>(dw_conv_weights.get_size() / sizeof(float), (float*)dw_conv_weights.get().get_data_handle());
+        fill_data<float>(dw_conv_bias.get_size() / sizeof(float), (float*)dw_conv_bias.get().get_data_handle());
+
+        auto bin_conv_desc = binary_convolution_forward::desc(aprop_kind, p.aalgorithm,
+                                                              bin_conv_src_desc, bin_conv_weights_desc, bin_conv_dst_desc,
+                                                              { cd.conv1_strh, cd.conv1_strw }, { 0, 0 },
+                                                              { cd.conv1_padh, cd.conv1_padw }, bin_conv_padR, -1.f);
+
+        mkldnn::post_ops bin_conv_post_ops;
+        if (p.eltwise_algorithm != algorithm_undef)
+            bin_conv_post_ops.append_eltwise(1.0, p.eltwise_algorithm, p.eltwise_alpha, p.eltwise_beta);
+
+        auto bin_conv_depthwise_weights_desc = create_md({ cd.conv1_oc }, data_type_bin_conv_bia, memory::x);
+        auto bin_conv_depthwise_bias_desc = create_md({ cd.conv1_oc }, data_type_bin_conv_bia, memory::x);
+        auto bin_conv_depthwise_weights = memory({bin_conv_depthwise_weights_desc, eng});
+        auto bin_conv_depthwise_bias = memory({bin_conv_depthwise_bias_desc, eng});
+
+        if (p.depthwise_algorithm != algorithm_undef) {
+            fill_data<float>(bin_conv_depthwise_weights.get_primitive_desc().get_size() / sizeof(float),
+                             (float *)bin_conv_depthwise_weights.get_data_handle(), 1., true);
+            fill_data<float>(bin_conv_depthwise_bias.get_primitive_desc().get_size() / sizeof(float),
+                             (float *)bin_conv_depthwise_bias.get_data_handle(), 1., true);
+
+            bin_conv_post_ops.append_depthwise(p.depthwise_algorithm, static_cast<const float*>(bin_conv_depthwise_weights.get_data_handle()),
+                                               static_cast<const float*>(bin_conv_depthwise_bias.get_data_handle()));
+        }
+
+        bin_conv_post_ops.append_dw_conv(bin_conv_oh, bin_conv_ow, cd.conv2_kh, cd.conv2_kw, cd.conv2_strh, cd.conv2_strw,
+                                         static_cast<const float*>(dw_conv_weights.get().get_data_handle()),
+                                         static_cast<const float*>(dw_conv_bias.get().get_data_handle()));
+
+        if (p.with_sum)
+            bin_conv_post_ops.append_sum();
+
+        if (p.eltwise_algorithm != algorithm_undef)
+            bin_conv_post_ops.append_eltwise(1.0, p.eltwise_algorithm, p.eltwise_alpha, p.eltwise_beta);
+
+        auto dw_conv_depthwise_weights_desc = create_md({ cd.conv2_oc }, data_type_bin_conv_bia, memory::x);
+        auto dw_conv_depthwise_bias_desc = create_md({ cd.conv2_oc }, data_type_bin_conv_bia, memory::x);
+        auto dw_conv_depthwise_weights = memory({dw_conv_depthwise_weights_desc, eng});
+        auto dw_conv_depthwise_bias = memory({dw_conv_depthwise_bias_desc, eng});
+
+        if (p.depthwise_algorithm != algorithm_undef) {
+            fill_data<float>(dw_conv_depthwise_weights.get_primitive_desc().get_size() / sizeof(float),
+                             (float *)dw_conv_depthwise_weights.get_data_handle(), 1., true);
+            fill_data<float>(dw_conv_depthwise_bias.get_primitive_desc().get_size() / sizeof(float),
+                             (float *)dw_conv_depthwise_bias.get_data_handle(), 1., true);
+
+            bin_conv_post_ops.append_depthwise(p.depthwise_algorithm, static_cast<const float*>(dw_conv_depthwise_weights.get_data_handle()),
+                                 static_cast<const float*>(dw_conv_depthwise_bias.get_data_handle()));
+        }
+
+        auto dw_conv_binarization_weights_desc = create_md({ cd.conv2_oc }, memory::data_type::f32, memory::x);
+        auto dw_conv_binarization_weights = memory({dw_conv_binarization_weights_desc, eng});
+
+        if (p.binarization_algorithm != algorithm_undef) {
+            fill_data<float>(dw_conv_binarization_weights.get_primitive_desc().get_size() / sizeof(float),
+                             (float *)dw_conv_binarization_weights.get_data_handle(), 0.f, p.sizes.conv2_oc * p.sizes.conv2_kh * p.sizes.conv2_kw);
+
+            bin_conv_post_ops.append_binarization(p.binarization_algorithm, static_cast<const float*>(dw_conv_binarization_weights.get_data_handle()));
+        }
+
+        mkldnn::primitive_attr bin_conv_attr;
+        bin_conv_attr.set_post_ops(bin_conv_post_ops);
+
+        auto bin_conv_primitive_desc = binary_convolution_forward::primitive_desc(bin_conv_desc, bin_conv_attr, eng);
+
+        auto bin_conv = binary_convolution_forward(bin_conv_primitive_desc, bin_conv_src.get(), bin_conv_weights.get(), dw_conv_dst.get());
+
+        auto bin_conv_dst_desc_ref = create_md({ cd.mb, cd.conv1_oc, bin_conv_oh, bin_conv_ow }, data_type_bin_conv_dst, p.formats.dst_format);
+        auto ref_bin_conv_dst = test_memory(bin_conv_dst_desc_ref, eng);
+        compute_ref_bin_conv_fwd(p, bin_conv_src_desc, bin_conv_weights_desc, bin_conv_dst_desc_ref,
+                                 bin_conv_src.get(), bin_conv_weights.get(), ref_bin_conv_dst.get(),
+                                 bin_conv_depthwise_weights, bin_conv_depthwise_bias);
+
+        if (with_binarization) {
+            auto ref_dw_conv_dst_desc = create_md({ cd.mb, cd.conv2_oc, dw_conv_oh, dw_conv_ow }, memory::data_type::f32, p.formats.dst_format);
+            auto ref_dw_conv_dst = test_memory(ref_dw_conv_dst_desc, eng);
+
+            compute_ref_dw_conv_fwd(p, ref_bin_conv_dst.get(), dw_conv_weights.get(), dw_conv_bias.get(),
+                                    ref_dw_conv_dst.get(),
+                                    dw_conv_depthwise_weights, dw_conv_depthwise_bias);
+
+            auto ref_binarization_dst = test_memory(dw_conv_dst_desc, eng);
+
+            compute_ref_binarization_fwd(p, ref_dw_conv_dst_desc, ref_dw_conv_dst.get(), dw_conv_binarization_weights, ref_binarization_dst.get());
+
+            std::vector<primitive> pipeline;
+            pipeline.push_back(bin_conv);
+            auto s = stream(stream::kind::lazy);
+            s.submit(pipeline).wait();
+
+            compare_data<uint8_t>(ref_binarization_dst.get(), dw_conv_dst.get(), 0, true);
+        } else {
+            auto ref_dw_conv_dst = test_memory(dw_conv_dst_desc, eng);
+            memcpy((float *) ref_dw_conv_dst.get().get_data_handle(), (float *) dw_conv_dst.get().get_data_handle(),
+                   ref_dw_conv_dst.get_size());
+            compute_ref_dw_conv_fwd(p, ref_bin_conv_dst.get(), dw_conv_weights.get(), dw_conv_bias.get(),
+                                    ref_dw_conv_dst.get(),
+                                    dw_conv_depthwise_weights, dw_conv_depthwise_bias);
+
+            std::vector<primitive> pipeline;
+            pipeline.push_back(bin_conv);
+            auto s = stream(stream::kind::lazy);
+            s.submit(pipeline).wait();
+
+            compare_data<float>(ref_dw_conv_dst.get(), dw_conv_dst.get(), 1e-3);
+        }
+    }
+};
+
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_sum_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_sum_forward.cpp
new file mode 100644
index 000000000..7e0bcae79
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_dw_conv_sum_forward.cpp
@@ -0,0 +1,67 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_binary_convolution_dw_conv_forward_common.hpp"
+
+namespace mkldnn {
+
+using binary_convolution_test = binary_convolution_forward_test;
+
+TEST_P(binary_convolution_test, TestBinaryConvolutionDwConvSum)
+{
+}
+
+#define BIN
+#define WITH_DW_CONV
+#define WITH_SUM
+#define DIRECTION_FORWARD
+#include "convolution_common.h"
+
+INST_TEST_CASE(SimpleSmall_Blocked,
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           1, 7, 10, 10,  37, 1, 1, 0, 0, 1, 1,  37, 3, 3, 1, 1, 1, 1)
+);
+
+INST_TEST_CASE(Mobilenet_Blocked,
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 8, 19, 33,  56, 3, 3, 1, 1, 2, 2,  56, 3, 3, 1, 1, 1, 1), // 1_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 32, 19, 33,  56, 1, 1, 0, 0, 1, 1,  56, 3, 3, 1, 1, 2, 2), // 2_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 56, 9, 16,  112, 1, 1, 0, 0, 1, 1,  112, 3, 3, 1, 1, 1, 1), // 2_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 112, 9, 16,  112, 1, 1, 0, 0, 1, 1,  112, 3, 3, 1, 1, 2, 2), // 3_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 112, 4, 8,  208, 1, 1, 0, 0, 1, 1,  208, 3, 3, 1, 1, 1, 1), // 3_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 208, 4, 8,  216, 1, 1, 0, 0, 1, 1,  216, 3, 3, 1, 1, 2, 2),  // 4_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 216, 2, 4,  328, 1, 1, 0, 0, 1, 1,  328, 3, 3, 1, 1, 1, 1),  // 4_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 328, 2, 4,  288, 1, 1, 0, 0, 1, 1,  288, 3, 3, 1, 1, 1, 1),  // 5_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 288, 2, 4,  288, 1, 1, 0, 0, 1, 1,  288, 3, 3, 1, 1, 1, 1),  // 5_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 288, 2, 4,  240, 1, 1, 0, 0, 1, 1,  240, 3, 3, 1, 1, 1, 1),  // 5_3
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 240, 2, 4,  264, 1, 1, 0, 0, 1, 1,  264, 3, 3, 1, 1, 1, 1)   // 5_4
+);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_eltwise_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_eltwise_forward.cpp
new file mode 100644
index 000000000..74dcc0384
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_eltwise_forward.cpp
@@ -0,0 +1,80 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_binary_convolution_forward_common.hpp"
+
+namespace mkldnn {
+
+using binary_convolution_test = binary_convolution_forward_test;
+
+TEST_P(binary_convolution_test, TestBinaryConvolutionEltwise)
+{
+}
+
+#define BIN
+#define WITH_ELTWISE
+#define DIRECTION_FORWARD
+#include "convolution_common.h"
+
+#define PARAMS_WITH_ELTIWSE(...) \
+    EXPAND_ARGS(PARAMS(eltwise_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_elu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_tanh, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_square, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_abs, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_sqrt, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_linear, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_bounded_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_soft_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_logistic, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS(eltwise_clamp, __VA_ARGS__))
+
+INST_TEST_CASE(SimpleSmall_Blocked_Padded_Channels,
+    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 3, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 32, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 5, 10, 10, 137, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 4, 10, 10, 256, 10, 10, 3, 3, 1, 1, 1, 1)
+);
+
+INST_TEST_CASE(SimpleSmall_Blocked_1x1_Padded_Channels,
+    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 3, 10, 10, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 13, 3, 3, 41, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 15, 3, 3, 137, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 14, 3, 3, 256, 3, 3, 1, 1, 0, 0, 1, 1)
+);
+
+//INST_TEST_CASE(SimpleSmall_Depthwise_Blocked_Padded_Channels,
+//    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 32, 32, 10, 10, 32, 10, 10, 3, 3, 1, 1, 1, 1),
+//    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 43, 43, 10, 10, 43, 10, 10, 3, 3, 1, 1, 1, 1),
+//    PARAMS_WITH_ELTIWSE(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 256, 256, 10, 10, 256, 10, 10, 3, 3, 1, 1, 1, 1)
+//);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_forward.cpp
new file mode 100644
index 000000000..0dcc32684
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_forward.cpp
@@ -0,0 +1,92 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_binary_convolution_forward_common.hpp"
+
+namespace mkldnn {
+
+using binary_convolution_test = binary_convolution_forward_test;
+
+TEST_P(binary_convolution_test, TestBinaryConvolution)
+{
+}
+
+#define BIN
+#define DIRECTION_FORWARD
+#include "convolution_common.h"
+
+INST_TEST_CASE(SimpleSmall_Blocked_Padded_Channels,
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 7, 3, 3, 5, 3, 3, 1, 1, 0, 0, 1, 1, 0),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 15, 3, 3, 37, 4, 4, 3, 3, 1, 1, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 14, 4, 4, 1, 4, 4, 3, 3, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 7, 3, 3, 33, 3, 3, 3, 3, 1, 1, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 19, 2, 2, 22, 2, 2, 3, 3, 1, 1, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 126, 13, 13, 126, 13, 13, 3, 3, 1, 1, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 77, 13, 13, 99, 11, 11, 3, 3, 0, 0, 1, 1)
+);
+
+INST_TEST_CASE(SimpleSmall_Blocked_1x1_Padded_Channels,
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 13, 13, 35, 13, 13, 1, 1, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 7, 3, 3, 11, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 1, 4, 4, 58, 4, 4, 1, 1, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 27, 3, 3, 33, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 81, 2, 2, 81, 2, 2, 1, 1, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 126, 13, 13, 13, 13, 13, 1, 1, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 111, 13, 13, 71, 13, 13, 1, 1, 0, 0, 1, 1)
+);
+
+//INST_TEST_CASE(SimpleSmall_Depthwise_Blocked_Padded_Channels,
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 126, 126, 10, 10, 126, 10, 10, 3, 3, 1, 1, 1, 1),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 77, 77, 9, 9, 77, 2, 2, 5, 5, 0, 0, 3, 3),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 68, 68, 26, 26, 68, 13, 13, 4, 4, 1, 1, 2, 2),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 33, 33, 111, 111, 33, 112, 112, 1, 1, 0, 0, 1, 1),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+//        1, 111, 111, 1, 2, 111, 1, 1, 3, 3, 1, 1, 1, 2),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+//        1, 29, 29, 16, 32, 29, 16, 18, 3, 3, 1, 2, 1, 2),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+//        1, 53, 53, 32, 16, 53, 16, 14, 3, 3, 1, 0, 2, 1),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+//        1, 13, 13, 32, 16, 13, 18, 16, 3, 3, 2, 1, 2, 1),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+//        1, 9, 9, 500, 500, 9, 698, 698, 3, 3, 100, 100, 1, 1),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+//        1, 2, 2, 500, 500, 2, 698, 698, 3, 3, 100, 100, 1, 1)
+//);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_forward_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_forward_common.hpp
new file mode 100644
index 000000000..bef6e15c1
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_forward_common.hpp
@@ -0,0 +1,352 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#ifndef TEST_BINARY_CONVOLUTION_FORWARD_COMMON_HPP
+#define TEST_BINARY_CONVOLUTION_FORWARD_COMMON_HPP
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+#include "math_utils.hpp"
+#include "mkldnn.hpp"
+
+using namespace mkldnn::impl::math;
+
+namespace {
+
+}
+
+namespace mkldnn {
+
+void compute_ref_bin_conv_fwd(const test_binary_convolution_params_t &p,
+        const memory::desc &src_d,
+        const memory::desc &weights_d,
+        const memory::desc &dst_d,
+        const memory &src,
+        const memory &weights,
+        const memory &dst,
+        const memory &depthwise_weights,
+        const memory &depthwise_bias)
+{
+    auto c = p.sizes;
+
+    uint8_t* src_data = (uint8_t*)src.get_data_handle();
+    uint8_t* weights_data = (uint8_t*)weights.get_data_handle();
+    float* dst_data = (float*)dst.get_data_handle();
+
+    float *d_weights_data = (float *)depthwise_weights.get_data_handle();
+    float *d_bias_data = (float *)depthwise_bias.get_data_handle();
+
+    int nbits = 8;
+
+    size_t padded_ic = src_d.data.layout_desc.blocking.padding_dims[1];
+    size_t padded_ic_w = weights_d.data.layout_desc.blocking.padding_dims[1];
+    size_t padded_oc_w = weights_d.data.layout_desc.blocking.padding_dims[0];
+
+    auto extract_bit = [](uint8_t val, uint8_t bit) -> uint8_t {
+        return (uint8_t) ((val >> bit) & 0x0001);
+    };
+
+    mkldnn::impl::parallel_nd(c.mb, c.ng, c.oc / c.ng, c.oh, c.ow,
+        [&](int n, int g, int oc, int oh, int ow) {
+            int32_t a = 0;
+            int roi = 0;
+            for (int ic = 0; ic < c.ic; ic++) {
+                for (int kh = 0; kh < c.kh; kh++) {
+                    for (int kw = 0; kw < c.kw; kw++) {
+                        int ih = oh * c.strh - c.padh + kh * (1 + c.dilh);
+                        int iw = ow * c.strw - c.padw + kw * (1 + c.dilw);
+
+                        size_t iidx = n * padded_ic * c.ih * c.iw
+                                      + g * padded_ic / c.ng * c.ih * c.iw
+                                      + ic * c.ih * c.iw + ih * c.iw + iw;
+                        iidx = map_index(src_d, iidx);
+
+                        uint8_t s;
+                        if (ih < 0 || ih >= c.ih || iw < 0 || iw >= c.iw) {
+                            if (p.pad_value == 0.0f) {
+                                continue;
+                            } else {
+                                s = p.pad_value == 1.0f ? (uint8_t)1 : (uint8_t)0;
+                            }
+                        } else {
+                             s = extract_bit(src_data[iidx/nbits], (uint8_t)(iidx % nbits));
+                        }
+
+                        size_t widx = g * padded_oc_w / c.ng * padded_ic_w
+                                      / c.ng * c.kh * c.kw
+                                      + oc * padded_ic_w / c.ng * c.kh * c.kw
+                                      + ic * c.kh * c.kw + kh * c.kw + kw;
+                        widx = map_index(weights_d, widx);
+
+                        uint8_t w = extract_bit(weights_data[widx/nbits], (uint8_t)(widx % nbits));
+
+                        a += (int32_t)(s ^ w);
+
+                        roi++;
+                    }
+                }
+            }
+
+            float a_fp = (float)(roi - 2*a);
+
+            size_t oidx = n * c.oc * c.oh * c.ow +
+                          g * c.oc / c.ng * c.oh * c.ow +
+                          oc * c.oh * c.ow +
+                          oh * c.ow +
+                          ow;
+
+            if (p.with_sum)
+                a_fp += dst_data[map_index(dst_d, oidx)];
+
+            switch (p.eltwise_algorithm) {
+                case algorithm_undef:
+                    break;
+                case eltwise_relu:
+                    a_fp = relu_fwd(a_fp, p.eltwise_alpha);
+                    break;
+                case eltwise_tanh:
+                    a_fp = tanh_fwd(a_fp);
+                    break;
+                case eltwise_elu:
+                    a_fp = elu_fwd(a_fp, p.eltwise_alpha);
+                    break;
+                case eltwise_square:
+                    a_fp = square_fwd(a_fp);
+                    break;
+                case eltwise_abs:
+                    a_fp = abs_fwd(a_fp);
+                    break;
+                case eltwise_sqrt:
+                    a_fp = sqrt_fwd(a_fp);
+                    break;
+                case eltwise_linear:
+                    a_fp = linear_fwd(a_fp, p.eltwise_alpha, p.eltwise_beta);
+                    break;
+                case eltwise_bounded_relu:
+                    a_fp = bounded_relu_fwd(a_fp, p.eltwise_alpha);
+                    break;
+                case eltwise_soft_relu:
+                    a_fp = soft_relu_fwd(a_fp);
+                    break;
+                case eltwise_logistic:
+                    a_fp = logistic_fwd(a_fp);
+                    break;
+                case eltwise_clamp:
+                    a_fp = clamp_fwd(a_fp, p.eltwise_alpha, p.eltwise_beta);
+                    break;
+                default:
+                    assert(!"unknown alg_kind");
+            }
+
+            switch (p.depthwise_algorithm) {
+                case algorithm_undef:
+                    break;
+                case depthwise_scale_shift:
+                    a_fp = scale_shift_fwd(a_fp, d_weights_data[g * c.oc / c.ng + oc], d_bias_data[g * c.oc / c.ng + oc]);
+                    break;
+                case depthwise_prelu:
+                    a_fp = prelu_fwd(a_fp, d_weights_data[g * c.oc / c.ng + oc]);
+                    break;
+                default: assert(!"unknown alg_kind");
+            }
+
+            dst_data[map_index(dst_d, oidx)] = a_fp;
+        }
+    );
+}
+
+void compute_ref_binarization_fwd(const test_binary_convolution_params_t &p,
+    const memory::desc &src_md, const memory &src, const memory &weights, const memory &dst) {
+    auto src_data = (float*)src.get_data_handle();
+    auto weights_data = (float*)weights.get_data_handle();
+    auto dst_data = (uint8_t*)dst.get_data_handle();
+
+    const memory::desc src_d = src.get_primitive_desc().desc();
+    const memory::desc weights_d = weights.get_primitive_desc().desc();
+    const memory::desc dst_d = dst.get_primitive_desc().desc();
+
+    int N = src_md.data.ndims > 0 ? src_md.data.dims[0] : 1;
+    int C = src_md.data.ndims > 1 ? src_md.data.dims[1] : 1;
+    int H = src_md.data.ndims > 2 ? src_md.data.dims[2] : 1;
+    int W = src_md.data.ndims > 3 ? src_md.data.dims[3] : 1;
+
+    int nbits = 8;
+    int CB = div_up(C, nbits);
+
+    int padded_ic = src_d.data.layout_desc.blocking.padding_dims[1];
+    int padded_oc = dst_d.data.layout_desc.blocking.padding_dims[1];
+
+    for (int n = 0; n < N; ++n) {
+        for (int cb = 0; cb < CB; ++cb) {
+            for (int h = 0; h < H; ++h) {
+                for (int w = 0; w < W; ++w) {
+
+                    uint8_t bin_val = 0x00;
+                    for (int c = cb * nbits, shift = 0; c < std::min(C, (cb + 1) * nbits); c++, shift++) {
+                        int src_idx = n*padded_ic*H*W + c*H*W + h*W + w;
+                        int wei_idx = c;
+
+                        float s_val = src_data[map_index(src_d, src_idx)];
+                        float w_val = weights_data[map_index(weights_d, wei_idx)];
+
+                        auto bit = uint8_t((s_val > w_val) ? 0x01 : 0x00);
+                        bin_val |= (bit << shift);
+                    }
+
+                    int dst_idx = n*padded_oc*H*W + cb*nbits*H*W + h*W + w;
+                    dst_idx = map_index(dst_d, dst_idx);
+                    dst_data[dst_idx / nbits] = bin_val;
+                }
+            }
+        }
+    }
+}
+
+class binary_convolution_forward_test : public ::testing::TestWithParam<test_binary_convolution_params_t>
+{
+protected:
+    virtual void SetUp()
+    {
+        test_binary_convolution_params_t p = ::testing::TestWithParam<test_binary_convolution_params_t>::GetParam();
+
+        ASSERT_TRUE(p.engine_kind == engine::kind::cpu);
+        ASSERT_EQ(p.aalgorithm, algorithm::binary_convolution_direct);
+
+        test_convolution_sizes_t cd = p.sizes;
+
+        auto eng = engine(p.engine_kind, 0);
+        auto aprop_kind = prop_kind::forward;
+        bool with_binarization = p.binarization_algorithm != algorithm_undef;
+
+        memory::data_type data_type_src = memory::data_type::bin;
+        memory::data_type data_type_wei = memory::data_type::bin;
+        memory::data_type data_type_bia = memory::data_type::f32;
+        memory::data_type data_type_dst = with_binarization ? memory::data_type::bin
+                                                            : data_traits<float>::data_type;
+
+        auto c_src_desc = create_md({ cd.mb, cd.ic, cd.ih, cd.iw }, data_type_src, p.formats.src_format);
+        auto c_weights_desc = cd.ng > 1
+                ? create_md({ cd.ng, cd.oc / cd.ng, cd.ic / cd.ng, cd.kh, cd.kw }, data_type_wei, p.formats.weights_format)
+                : create_md({ cd.oc, cd.ic, cd.kh, cd.kw }, data_type_wei, p.formats.weights_format);
+        auto c_dst_desc = create_md({ cd.mb, cd.oc, cd.oh, cd.ow }, data_type_dst, p.formats.dst_format);
+
+        auto c_src = test_memory(c_src_desc, eng);
+        auto c_weights = test_memory(c_weights_desc, eng);
+        auto c_dst = test_memory(c_dst_desc, eng);
+
+        // Only true for dense format
+        if (with_binarization)
+            fill_data<uint8_t>(c_dst.get_size() / sizeof(uint8_t), (uint8_t*)c_dst.get().get_data_handle());
+        else
+            fill_data<float>(c_dst.get_size() / sizeof(float), (float*)c_dst.get().get_data_handle());
+        fill_data<uint8_t>(c_src.get_size() / sizeof(uint8_t), (uint8_t*)c_src.get().get_data_handle());
+        fill_data<uint8_t>(c_weights.get_size() / sizeof(uint8_t), (uint8_t*)c_weights.get().get_data_handle());
+
+        std::vector<ptrdiff_t> padR = {
+            right_padding(cd.ih, cd.oh, cd.kh, cd.padh, cd.strh, cd.dilh),
+            right_padding(cd.iw, cd.ow, cd.kw, cd.padw, cd.strw, cd.dilw)
+        };
+
+        auto bin_conv_desc = binary_convolution_forward::desc(aprop_kind, p.aalgorithm,
+                    c_src_desc, c_weights_desc, c_dst_desc,
+                    { cd.strh, cd.strw }, { cd.dilh, cd.dilw },
+                    { cd.padh, cd.padw }, padR, p.pad_value);
+
+        mkldnn::post_ops ops;
+
+        if (p.with_sum)
+            ops.append_sum();
+
+        if (p.eltwise_algorithm != algorithm_undef)
+            ops.append_eltwise(1.0, p.eltwise_algorithm, p.eltwise_alpha, p.eltwise_beta);
+
+        auto c_depthwise_weights_desc = create_md({ cd.oc }, data_type_bia, memory::x);
+        auto c_depthwise_bias_desc = create_md({ cd.oc }, data_type_bia, memory::x);
+
+        auto c_depthwise_weights = memory({c_depthwise_weights_desc, eng});
+        auto c_depthwise_bias = memory({c_depthwise_bias_desc, eng});
+
+        if (p.depthwise_algorithm != algorithm_undef) {
+            fill_data<float>(c_depthwise_weights.get_primitive_desc().get_size() / sizeof(float),
+                             (float *)c_depthwise_weights.get_data_handle(), 1., true);
+            fill_data<float>(c_depthwise_bias.get_primitive_desc().get_size() / sizeof(float),
+                             (float *)c_depthwise_bias.get_data_handle(), 1., true);
+
+            ops.append_depthwise(p.depthwise_algorithm, static_cast<const float*>(c_depthwise_weights.get_data_handle()),
+                                                        static_cast<const float*>(c_depthwise_bias.get_data_handle()));
+        }
+
+        auto c_binarization_weights_desc = create_md({ cd.oc }, memory::data_type::f32, memory::x);
+        auto c_binarization_weights = memory({c_binarization_weights_desc, eng});
+
+        if (p.binarization_algorithm != algorithm_undef) {
+            fill_data<float>(c_binarization_weights.get_primitive_desc().get_size() / sizeof(float),
+                             (float *)c_binarization_weights.get_data_handle(), 1., true);
+
+            ops.append_binarization(p.binarization_algorithm, static_cast<const float*>(c_binarization_weights.get_data_handle()));
+        }
+
+        mkldnn::primitive_attr attr;
+        attr.set_post_ops(ops);
+
+        auto bin_conv_primitive_desc = binary_convolution_forward::primitive_desc(bin_conv_desc, attr, eng);
+
+        auto bin_conv = binary_convolution_forward(bin_conv_primitive_desc, c_src.get(), c_weights.get(), c_dst.get());
+
+        if (with_binarization) {
+            auto c_dst_desc_ref = create_md({ cd.mb, cd.oc, cd.oh, cd.ow }, memory::data_type::f32, p.formats.dst_format);
+            auto c_dst_ref = test_memory(c_dst_desc_ref, eng);
+
+            std::vector<float> ref_dst_conv_data(c_dst_ref.get_size() / sizeof(float));
+            auto ref_conv_memory = memory(memory::primitive_desc(c_dst_desc_ref, eng), &ref_dst_conv_data[0]);
+
+            std::vector<uint8_t > ref_dst_data(c_dst.get_size() / sizeof(uint8_t));
+            auto ref_memory = memory(memory::primitive_desc(c_dst_desc, eng), &ref_dst_data[0]);
+
+            compute_ref_bin_conv_fwd(p, c_src_desc, c_weights_desc, c_dst_desc_ref,
+                                     c_src.get(), c_weights.get(), ref_conv_memory,
+                                     c_depthwise_weights, c_depthwise_bias);
+
+            compute_ref_binarization_fwd(p, c_dst_desc_ref, ref_conv_memory, c_binarization_weights, ref_memory);
+
+            std::vector<primitive> pipeline;
+            pipeline.push_back(bin_conv);
+            auto s = stream(stream::kind::lazy);
+            s.submit(pipeline).wait();
+
+            compare_data<uint8_t>(ref_memory, c_dst.get(), 0, true);
+        } else {
+            std::vector<float> ref_dst_data(c_dst.get_size() / sizeof(float));
+            memcpy(&ref_dst_data[0], (float*)c_dst.get().get_data_handle(), ref_dst_data.size() * sizeof(float));
+            auto ref_memory = memory(memory::primitive_desc(c_dst_desc, eng), &ref_dst_data[0]);
+
+            compute_ref_bin_conv_fwd(p, c_src_desc, c_weights_desc, c_dst_desc,
+                                     c_src.get(), c_weights.get(), ref_memory,
+                                     c_depthwise_weights, c_depthwise_bias);
+
+            std::vector<primitive> pipeline;
+            pipeline.push_back(bin_conv);
+            auto s = stream(stream::kind::lazy);
+            s.submit(pipeline).wait();
+
+            compare_data<float>(ref_memory, c_dst.get(), 1e-3);
+        }
+    }
+};
+
+}
+
+#endif
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_sum_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_sum_forward.cpp
new file mode 100644
index 000000000..1a9a54805
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_binary_convolution_sum_forward.cpp
@@ -0,0 +1,71 @@
+/*******************************************************************************
+* Copyright 2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_binary_convolution_forward_common.hpp"
+
+namespace mkldnn {
+
+using binary_convolution_test = binary_convolution_forward_test;
+
+TEST_P(binary_convolution_test, TestBinaryConvolutionSum)
+{
+}
+
+#define BIN
+#define WITH_SUM
+#define DIRECTION_FORWARD
+#include "convolution_common.h"
+
+INST_TEST_CASE(SimpleSmall_Blocked_Padded_Channels,
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 3, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 32, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 41, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 47, 10, 10, 137, 10, 10, 3, 3, 1, 1, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 256, 10, 10, 256, 10, 10, 3, 3, 1, 1, 1, 1)
+);
+
+INST_TEST_CASE(SimpleSmall_Blocked_1x1_Padded_Channels,
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 3, 10, 10, 3, 10, 10, 1, 1, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 13, 3, 3, 32, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 13, 3, 3, 41, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 47, 3, 3, 137, 3, 3, 1, 1, 0, 0, 1, 1),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+        2, 1, 256, 3, 3, 256, 3, 3, 1, 1, 0, 0, 1, 1)
+);
+
+//INST_TEST_CASE(SimpleSmall_Depthwise_Blocked_Padded_Channels,
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 32, 32, 10, 10, 32, 10, 10, 3, 3, 1, 1, 1, 1),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 43, 43, 10, 10, 43, 10, 10, 3, 3, 1, 1, 1, 1),
+//    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED_G, FMT_BIAS, FMT_DATA_BLOCKED,
+//        2, 256, 256, 10, 10, 256, 10, 10, 3, 3, 1, 1, 1, 1)
+//);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_concat.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_concat.cpp
index b47977906..48e2f4f5e 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_concat.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_concat.cpp
@@ -39,7 +39,7 @@ class concat_test: public ::testing::TestWithParam<concat_test_params> {
         const data_t *dst_data = (const data_t *)dst.get_data_handle();
         const auto &dst_d = dst.get_primitive_desc().desc();
         const auto dst_dims = dst_d.data.dims;
-        const int* dst_pdims = dst_d.data.layout_desc.blocking.padding_dims;
+        const ptrdiff_t* dst_pdims = dst_d.data.layout_desc.blocking.padding_dims;
 
         int acc_concat_dim = 0;
         const auto ndims = dst_d.data.ndims;
@@ -47,8 +47,8 @@ class concat_test: public ::testing::TestWithParam<concat_test_params> {
         for (size_t num = 0; num < srcs.size(); num++) {
             const data_t *src_data = (const data_t *)srcs[num].get_data_handle();
             const auto &src_d = srcs[num].get_primitive_desc().desc();
-            const int* src_dims = src_d.data.dims;
-            const int* src_pdims = src_d.data.layout_desc.blocking.padding_dims;
+            const ptrdiff_t* src_dims = src_d.data.dims;
+            const ptrdiff_t* src_pdims = src_d.data.layout_desc.blocking.padding_dims;
 
             auto N = src_dims[0];
             auto C = src_dims[1];
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_backward_data_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_backward_data_common.hpp
index b523c507a..1df9df75c 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_backward_data_common.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_backward_data_common.hpp
@@ -119,7 +119,7 @@ protected:
         auto c_weights = test_memory(c_weights_desc, eng);
         auto c_diff_dst = test_memory(c_dst_desc, eng);
 
-        std::vector<int> padR = {
+        std::vector<ptrdiff_t> padR = {
             right_padding(cd.ih, cd.oh, cd.kh, cd.padh, cd.strh, cd.dilh),
             right_padding(cd.iw, cd.ow, cd.kw, cd.padw, cd.strw, cd.dilw)
         };
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_backward_weights_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_backward_weights_common.hpp
index 8331c1824..00b896647 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_backward_weights_common.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_backward_weights_common.hpp
@@ -172,7 +172,7 @@ protected:
         check_zero_tail<data_t_src>(1, c_src.get());
         check_zero_tail<data_t_diff_weights>(1, c_diff_weights.get());
 
-        std::vector<int> padR = {
+        std::vector<ptrdiff_t> padR = {
             right_padding(cd.ih, cd.oh, cd.kh, cd.padh, cd.strh, cd.dilh),
             right_padding(cd.iw, cd.ow, cd.kw, cd.padw, cd.strw, cd.dilw)
         };
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_common.hpp
new file mode 100644
index 000000000..730be03b7
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_common.hpp
@@ -0,0 +1,237 @@
+/*******************************************************************************
+* Copyright 2018-2019 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+#include "math_utils.hpp"
+#include "mkldnn.hpp"
+
+using namespace mkldnn::impl::math;
+
+namespace mkldnn {
+
+template <typename T, typename U>
+inline typename std::remove_reference<T>::type div_up(const T a, const U b) {
+    assert(b);
+    return (a + b - 1) / b;
+}
+
+template <typename T, typename U>
+inline typename std::remove_reference<T>::type rnd_up(const T a, const U b) {
+    return div_up(a, b) * b;
+}
+
+template <typename data_t_src, typename data_t_wei,
+          typename data_t_acc, typename data_t_dst>
+void compute_ref_conv_depthwise_fwd(const test_convolution_sizes_t &c,
+        const memory &src, const memory &weights, const memory &bias,
+        const memory &dst, bool w_bias, algorithm depthwise_alg,
+        const memory &depthwise_weights, const memory &depthwise_bias)
+{
+    data_t_src *src_data = (data_t_src *)src.get_data_handle();
+    data_t_wei *weights_data = (data_t_wei *)weights.get_data_handle();
+    data_t_dst *bias_data
+            = (data_t_dst *)(w_bias ? bias.get_data_handle() : nullptr);
+    data_t_dst *dst_data = (data_t_dst *)dst.get_data_handle();
+
+    float *d_weights_data = (float *)depthwise_weights.get_data_handle();
+    float *d_bias_data = (float *)depthwise_bias.get_data_handle();
+
+    const memory::desc src_d = src.get_primitive_desc().desc();
+    const memory::desc weights_d = weights.get_primitive_desc().desc();
+    const memory::desc dst_d = dst.get_primitive_desc().desc();
+
+    size_t padded_ic = src_d.data.layout_desc.blocking.padding_dims[1];
+    size_t padded_oc = dst_d.data.layout_desc.blocking.padding_dims[1];
+
+    size_t padded_ic_w = weights_d.data.format == mkldnn_OhIw8o4i ? weights_d.data.layout_desc.blocking.padding_dims[1] :
+                                                                    src_d.data.layout_desc.blocking.padding_dims[1];
+    size_t padded_oc_w = weights_d.data.format == mkldnn_OhIw8o4i ? weights_d.data.layout_desc.blocking.padding_dims[0] :
+                                                                    dst_d.data.layout_desc.blocking.padding_dims[1];
+
+    mkldnn::impl::parallel_nd(c.mb, c.ng, c.oc / c.ng, c.oh, c.ow,
+        [&](int n, int g, int oc, int oh, int ow) {
+            size_t oidx = n * padded_oc * c.oh * c.ow
+                    + g * padded_oc / c.ng * c.oh * c.ow
+                    + oc * c.oh * c.ow + oh * c.ow + ow;
+
+            size_t didx = map_index(dst_d, oidx);
+            size_t bidx = g * c.oc / c.ng + oc;
+            dst_data[didx] = bias_data
+                    ? bias_data[bidx] : data_t_dst{0};
+
+            for (int ic = 0; ic < c.ic / c.ng; ic++)
+            for (int kh = 0; kh < c.kh; kh++)
+            for (int kw = 0; kw < c.kw; kw++)
+            {
+                int ih = oh * c.strh - c.padh + kh * (1 + c.dilh);
+                if (ih < 0 || ih >= c.ih) continue;
+                int iw = ow * c.strw - c.padw + kw * (1 + c.dilw);
+                if (iw < 0 || iw >= c.iw) continue;
+
+                size_t iidx = n * padded_ic * c.ih * c.iw
+                    + g * padded_ic / c.ng * c.ih * c.iw
+                    + ic * c.ih * c.iw + ih * c.iw + iw;
+                size_t widx = g * padded_oc_w / c.ng * padded_ic_w
+                    / c.ng * c.kh * c.kw
+                    + oc * padded_ic_w / c.ng * c.kh * c.kw
+                    + ic * c.kh * c.kw + kh * c.kw + kw;
+
+                dst_data[didx] += src_data[map_index(src_d, iidx)]
+                        * weights_data[map_index(weights_d, widx)];
+            }
+
+            switch (depthwise_alg) {
+                case depthwise_scale_shift:
+                    dst_data[didx] = scale_shift_fwd(dst_data[didx], d_weights_data[bidx], d_bias_data[bidx]);
+                    break;
+                case depthwise_prelu:
+                    dst_data[didx] = prelu_fwd(dst_data[didx], d_weights_data[bidx]);
+                    break;
+                default: assert(!"unknown alg_kind");
+            }
+        }
+    );
+}
+
+template <typename data_t_src, typename data_t_wei,
+          typename data_t_acc, typename data_t_dst>
+class convolution_depthwise_test
+    : public ::testing::TestWithParam<test_convolution_depthwise_params_t> {
+protected:
+    virtual void SetUp() {
+        test_convolution_depthwise_params_t p
+                = ::testing::TestWithParam<
+                test_convolution_depthwise_params_t>::GetParam();
+
+        ASSERT_TRUE(p.engine_kind == engine::kind::cpu);
+        ASSERT_EQ(p.aalgorithm, convolution_direct);
+        auto eng = engine(p.engine_kind, 0);
+
+        memory::data_type data_type_src = data_traits<data_t_src>::data_type;
+        memory::data_type data_type_dst = data_traits<data_t_dst>::data_type;
+        memory::data_type data_type_wei = data_traits<data_t_wei>::data_type;
+
+        test_convolution_sizes_t cd = p.sizes;
+
+        auto c_src_desc = create_md({ cd.mb, cd.ic, cd.ih, cd.iw },
+                data_type_src, p.formats.src_format);
+        auto c_weights_desc = cd.ng > 1 ?
+                create_md({ cd.ng, cd.oc / cd.ng, cd.ic / cd.ng, cd.kh, cd.kw },
+                        data_type_wei, p.formats.weights_format) :
+                create_md({ cd.oc, cd.ic, cd.kh, cd.kw },
+                        data_type_wei, p.formats.weights_format);
+        auto c_dst_desc = create_md({ cd.mb, cd.oc, cd.oh, cd.ow },
+                data_type_dst, p.formats.dst_format);
+
+        auto c_src = memory({c_src_desc, eng});
+        auto c_weights = memory({c_weights_desc, eng});
+        auto c_dst = memory({c_dst_desc, eng});
+
+        auto dst_ref = memory({c_dst_desc, eng});
+
+        fill_data<data_t_src>(c_src.get_primitive_desc().get_size()
+                / sizeof(data_t_src), (data_t_src *)c_src.get_data_handle(),
+                data_t_src(0), data_t_src(1));
+        check_zero_tail<data_t_src>(1, c_src);
+
+        fill_data<data_t_wei>(
+                c_weights.get_primitive_desc().get_size()
+                / sizeof(data_t_wei),(data_t_wei *)c_weights.get_data_handle(),
+                data_t_wei(0), data_t_wei(1));
+        check_zero_tail<data_t_wei>(1, c_weights);
+
+        bool with_bias = p.formats.bias_format != memory::format::format_undef;
+        auto c_bias_desc = with_bias ?
+                create_md({ cd.oc }, data_type_dst, p.formats.bias_format) :
+                create_md({}, data_type_dst, p.formats.bias_format);
+        auto c_bias = memory({c_bias_desc, eng});
+        if (with_bias) {
+            fill_data<data_t_dst>(
+                    c_bias.get_primitive_desc().get_size() / sizeof(data_t_dst),
+                    (data_t_dst *)c_bias.get_data_handle(), 1., true);
+        }
+
+        std::vector<ptrdiff_t> padR = { cd.padh, cd.padw };
+        for (int i = 0; i < 2; ++i) {
+            if ((cd.ih - ((cd.kh - 1) * (cd.dilh + 1) + 1) + cd.padh + padR[0])
+                / cd.strh + 1 != cd.oh)
+                ++padR[0];
+            if ((cd.iw - ((cd.kw - 1) * (cd.dilw + 1) + 1) + cd.padw + padR[1])
+                / cd.strw + 1 != cd.ow)
+                ++padR[1];
+        }
+
+        auto c_depthwise_weights_desc = create_md({ rnd_up(cd.oc, 16) }, data_type_dst, memory::x);
+        auto c_depthwise_bias_desc = create_md({ rnd_up(cd.oc, 16) }, data_type_dst, memory::x);
+
+        auto c_depthwise_weights = memory({c_depthwise_weights_desc, eng});
+        auto c_depthwise_bias = memory({c_depthwise_bias_desc, eng});
+
+        fill_data<data_t_dst>(
+                c_depthwise_weights.get_primitive_desc().get_size() / sizeof(data_t_dst),
+                (data_t_dst *)c_depthwise_weights.get_data_handle(), 1., true);
+        fill_data<data_t_dst>(
+                c_depthwise_bias.get_primitive_desc().get_size() / sizeof(data_t_dst),
+                (data_t_dst *)c_depthwise_bias.get_data_handle(), 1., true);
+
+
+        auto test = [&]() {
+            mkldnn::post_ops ops;
+            ops.append_depthwise(p.alg, static_cast<const float*>(c_depthwise_weights.get_data_handle()),
+                                        static_cast<const float*>(c_depthwise_bias.get_data_handle()));
+
+            mkldnn::primitive_attr attr;
+            attr.set_post_ops(ops);
+
+            auto conv_desc = with_bias
+                ? convolution_forward::desc(prop_kind::forward_scoring,
+                        p.aalgorithm, c_src_desc, c_weights_desc, c_bias_desc,
+                        c_dst_desc, { cd.strh, cd.strw }, { cd.dilh, cd.dilw },
+                        { cd.padh, cd.padw }, padR, padding_kind::zero)
+                : convolution_forward::desc(prop_kind::forward_scoring,
+                        p.aalgorithm, c_src_desc, c_weights_desc, c_dst_desc,
+                        { cd.strh, cd.strw }, { cd.dilh, cd.dilw },
+                        { cd.padh, cd.padw }, padR, padding_kind::zero);
+
+            auto conv_primitive_desc =
+                convolution_forward::primitive_desc(conv_desc, attr, eng);
+
+            auto conv = with_bias
+                ? convolution_forward(conv_primitive_desc,
+                        c_src, c_weights, c_bias, c_dst)
+                : convolution_forward(conv_primitive_desc,
+                        c_src, c_weights, c_dst);
+            std::vector<primitive> pipeline;
+            pipeline.push_back(conv);
+
+            stream(stream::kind::lazy).submit(pipeline).wait();
+        };
+
+        if (catch_expected_failures(test, p.expect_to_fail, p.expected_status))
+            return;
+
+        compute_ref_conv_depthwise_fwd<data_t_src, data_t_wei, data_t_wei,
+            data_t_dst>(cd, c_src, c_weights, c_bias, dst_ref, with_bias,
+                        p.alg, c_depthwise_weights, c_depthwise_bias);
+        check_zero_tail<data_t_dst>(1, dst_ref);
+
+        compare_data<data_t_dst>(dst_ref, c_dst, 1e-2);
+        check_zero_tail<data_t_dst>(0, c_dst);
+    }
+};
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_f32.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_f32.cpp
index 3789f8fdf..9008310d9 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_f32.cpp
@@ -16,217 +16,11 @@
 
 #include "mkldnn_test_common.hpp"
 #include "gtest/gtest.h"
-
 #include "mkldnn.hpp"
+#include "test_convolution_depthwise_forward_common.hpp"
 
 namespace mkldnn {
 
-template <typename T> inline T scale_shift_fwd(T s_val, T w_val, T b_val) {
-    return s_val*w_val + b_val;
-}
-
-template <typename T> inline T prelu_fwd(T s_val, T w_val) {
-    return s_val >= 0 ? s_val : w_val*s_val;
-}
-
-template <typename data_t_src, typename data_t_wei,
-          typename data_t_acc, typename data_t_dst>
-void compute_ref_conv_depthwise_fwd(const test_convolution_sizes_t &c,
-        const memory &src, const memory &weights, const memory &bias,
-        const memory &dst, bool w_bias, algorithm depthwise_alg,
-        const memory &depthwise_weights, const memory &depthwise_bias)
-{
-    data_t_src *src_data = (data_t_src *)src.get_data_handle();
-    data_t_wei *weights_data = (data_t_wei *)weights.get_data_handle();
-    data_t_dst *bias_data
-            = (data_t_dst *)(w_bias ? bias.get_data_handle() : nullptr);
-    data_t_dst *dst_data = (data_t_dst *)dst.get_data_handle();
-
-    data_t_dst *d_weights_data = (data_t_dst *)depthwise_weights.get_data_handle();
-    data_t_dst *d_bias_data = (data_t_dst *)depthwise_bias.get_data_handle();
-
-    const memory::desc src_d = src.get_primitive_desc().desc();
-    const memory::desc weights_d = weights.get_primitive_desc().desc();
-    const memory::desc dst_d = dst.get_primitive_desc().desc();
-
-#pragma omp parallel for collapse(5) schedule(static)
-    for (int n = 0; n < c.mb; n++) {
-        for (int g = 0; g < c.ng; g++) {
-            for (int oc = 0; oc < c.oc / c.ng; oc++) {
-                for (int oh = 0; oh < c.oh; oh++) {
-                    for (int ow = 0; ow < c.ow; ow++) {
-                        int oidx = n * c.oc * c.oh * c.ow
-                                + g * c.oc / c.ng * c.oh * c.ow
-                                + oc * c.oh * c.ow + oh * c.ow + ow;
-
-                        int didx = map_index(dst_d, oidx);
-                        int bidx = g * c.oc / c.ng + oc;
-                        dst_data[didx] = bias_data ?
-                                bias_data[map_index(
-                                        bias.get_primitive_desc().desc(),
-                                        bidx)] :
-                                data_t_dst{0};
-                        for (int ic = 0; ic < c.ic / c.ng; ic++) {
-                            for (int kh = 0; kh < c.kh; kh++) {
-                                for (int kw = 0; kw < c.kw; kw++) {
-                                    int iw = ow * c.strw
-                                          - c.padw + kw * (1 + c.dilw);
-                                    int ih = oh * c.strh
-                                          - c.padh + kh * (1 + c.dilh);
-                                    if (iw < 0 || iw >= c.iw) continue;
-                                    if (ih < 0 || ih >= c.ih) continue;
-                                    int iidx = n * c.ic * c.ih * c.iw
-                                            + g * c.ic / c.ng * c.ih * c.iw
-                                            + ic * c.ih * c.iw + ih * c.iw + iw;
-                                    int widx = g * c.oc / c.ng * c.ic
-                                                    / c.ng * c.kh * c.kw
-                                            + oc * c.ic / c.ng * c.kh * c.kw
-                                            + ic * c.kh * c.kw + kh * c.kw + kw;
-
-                                    dst_data[didx]
-                                            += src_data[map_index(src_d, iidx)]
-                                            * weights_data[map_index(
-                                                      weights_d, widx)];
-                                }
-                            }
-                        }
-
-                        switch (depthwise_alg) {
-                            case depthwise_scale_shift:
-                                dst_data[didx] = scale_shift_fwd(dst_data[didx], d_weights_data[bidx], d_bias_data[bidx]);
-                                break;
-                            case depthwise_prelu:
-                                dst_data[didx] = prelu_fwd(dst_data[didx], d_weights_data[bidx]);
-                                break;
-                            default: assert(!"unknown alg_kind");
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-template <typename data_t_src, typename data_t_wei,
-          typename data_t_acc, typename data_t_dst>
-class convolution_depthwise_test
-    : public ::testing::TestWithParam<test_convolution_depthwise_params_t> {
-protected:
-    virtual void SetUp()
-    {
-        test_convolution_depthwise_params_t p
-                = ::testing::TestWithParam<
-                test_convolution_depthwise_params_t>::GetParam();
-
-        ASSERT_TRUE(p.engine_kind == engine::kind::cpu);
-        ASSERT_EQ(p.aalgorithm, convolution_direct);
-        auto eng = engine(p.engine_kind, 0);
-
-        memory::data_type data_type_src = data_traits<data_t_src>::data_type;
-        memory::data_type data_type_dst = data_traits<data_t_dst>::data_type;
-        memory::data_type data_type_wei = data_traits<data_t_wei>::data_type;
-
-        test_convolution_sizes_t cd = p.sizes;
-
-        auto c_src_desc = create_md({ cd.mb, cd.ic, cd.ih, cd.iw },
-                data_type_src, p.formats.src_format);
-        auto c_weights_desc = cd.ng > 1 ?
-                create_md({ cd.ng, cd.oc / cd.ng, cd.ic / cd.ng, cd.kh, cd.kw },
-                        data_type_wei, p.formats.weights_format) :
-                create_md({ cd.oc, cd.ic, cd.kh, cd.kw },
-                        data_type_wei, p.formats.weights_format);
-        auto c_dst_desc = create_md({ cd.mb, cd.oc, cd.oh, cd.ow },
-                data_type_dst, p.formats.dst_format);
-
-        auto c_src = memory({c_src_desc, eng});
-        auto c_weights = memory({c_weights_desc, eng});
-        auto c_dst = memory({c_dst_desc, eng});
-
-        auto dst_ref = memory({c_dst_desc, eng});
-
-        fill_data<data_t_src>(c_src.get_primitive_desc().get_size()
-                / sizeof(data_t_src), (data_t_src *)c_src.get_data_handle(), data_t_src(0), data_t_src(1));
-
-        fill_data<data_t_wei>(
-                c_weights.get_primitive_desc().get_size()
-                / sizeof(data_t_wei),(data_t_wei *)c_weights.get_data_handle(), data_t_wei(0), data_t_wei(1));
-
-        bool with_bias = p.formats.bias_format != memory::format::format_undef;
-        auto c_bias_desc = with_bias ?
-                create_md({ cd.oc }, data_type_dst, p.formats.bias_format) :
-                create_md({}, data_type_dst, p.formats.bias_format);
-        auto c_bias = memory({c_bias_desc, eng});
-        if (with_bias) {
-            fill_data<data_t_dst>(
-                    c_bias.get_primitive_desc().get_size() / sizeof(data_t_dst),
-                    (data_t_dst *)c_bias.get_data_handle(), 1., true);
-        }
-
-        std::vector<int> padR = { cd.padh, cd.padw };
-        for (int i = 0; i < 2; ++i) {
-            if ((cd.ih - ((cd.kh - 1) * (cd.dilh + 1) + 1) + cd.padh + padR[0])
-                / cd.strh + 1 != cd.oh)
-                ++padR[0];
-            if ((cd.iw - ((cd.kw - 1) * (cd.dilw + 1) + 1) + cd.padw + padR[1])
-                / cd.strw + 1 != cd.ow)
-                ++padR[1];
-        }
-
-        auto c_depthwise_weights_desc = create_md({ cd.oc }, data_type_dst, memory::x);
-        auto c_depthwise_bias_desc = create_md({ cd.oc }, data_type_dst, memory::x);
-
-        auto c_depthwise_weights = memory({c_depthwise_weights_desc, eng});
-        auto c_depthwise_bias = memory({c_depthwise_bias_desc, eng});
-
-        fill_data<data_t_dst>(
-                c_depthwise_weights.get_primitive_desc().get_size() / sizeof(data_t_dst),
-                (data_t_dst *)c_depthwise_weights.get_data_handle(), 1., true);
-        fill_data<data_t_dst>(
-                c_depthwise_bias.get_primitive_desc().get_size() / sizeof(data_t_dst),
-                (data_t_dst *)c_depthwise_bias.get_data_handle(), 1., true);
-
-        auto test = [&]() {
-            mkldnn::post_ops ops;
-            ops.append_depthwise(p.alg, static_cast<const float*>(c_depthwise_weights.get_data_handle()),
-                                        static_cast<const float*>(c_depthwise_bias.get_data_handle()));
-
-            mkldnn::primitive_attr attr;
-            attr.set_post_ops(ops);
-
-            auto conv_desc = with_bias
-                ? convolution_forward::desc(prop_kind::forward_scoring,
-                        p.aalgorithm, c_src_desc, c_weights_desc, c_bias_desc,
-                        c_dst_desc, { cd.strh, cd.strw }, { cd.dilh, cd.dilw },
-                        { cd.padh, cd.padw }, padR, padding_kind::zero)
-                : convolution_forward::desc(prop_kind::forward_scoring,
-                        p.aalgorithm, c_src_desc, c_weights_desc, c_dst_desc,
-                        { cd.strh, cd.strw }, { cd.dilh, cd.dilw },
-                        { cd.padh, cd.padw }, padR, padding_kind::zero);
-
-            auto conv_primitive_desc =
-                convolution_forward::primitive_desc(conv_desc, attr, eng);
-
-            auto conv = with_bias
-                ? convolution_forward(conv_primitive_desc,
-                        c_src, c_weights, c_bias, c_dst)
-                : convolution_forward(conv_primitive_desc,
-                        c_src, c_weights, c_dst);
-            std::vector<primitive> pipeline;
-            pipeline.push_back(conv);
-
-            stream(stream::kind::lazy).submit(pipeline).wait();
-        };
-
-        if (catch_expected_failures(test, p.expect_to_fail, p.expected_status))
-            return;
-
-        compute_ref_conv_depthwise_fwd<data_t_src, data_t_wei, data_t_wei,
-            data_t_dst>(cd, c_src, c_weights, c_bias, dst_ref, with_bias,
-                        p.alg, c_depthwise_weights, c_depthwise_bias);
-        compare_data<data_t_dst>(dst_ref, c_dst, 1e-3);
-    }
-};
-
 using convolution_test = convolution_depthwise_test<float, float, float, float>;
 
 TEST_P(convolution_test, TestConvolution)
@@ -237,8 +31,10 @@ TEST_P(convolution_test, TestConvolution)
     { mkldnn::memory::format::src, mkldnn::memory::format::weights, \
     mkldnn::memory::format::bias, mkldnn::memory::format::dst }
 
-#define FMT_WEIGHTS_BLOCKED OIhw8i8o
+#define FMT_WEIGHTS_BLOCKED8 OIhw8i8o
+#define FMT_WEIGHTS_BLOCKED8_DW Goihw8g
 #define FMT_WEIGHTS_BLOCKED16 OIhw16i16o
+#define FMT_WEIGHTS_BLOCKED16_DW Goihw16g
 
 #define ENGINE mkldnn::engine::kind::cpu
 #define ALGORITHM mkldnn::convolution_direct
@@ -259,7 +55,6 @@ TEST_P(convolution_test, TestConvolution)
     EXPAND_ARGS(PARAMS_CONV(depthwise_scale_shift, __VA_ARGS__)), \
     EXPAND_ARGS(PARAMS_CONV(depthwise_prelu, __VA_ARGS__))
 
-
 #define PARAMS_CONV(alg, src, weights, bias, dst, ...) \
     test_convolution_depthwise_params_t {alg,  ENGINE, ALGORITHM, \
     EXPAND_FORMATS(src, weights, bias, dst), /* empty attributes */ {}, \
@@ -276,25 +71,25 @@ TEST_P(convolution_test, TestConvolution)
                2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
     );
 
-    INST_TEST_CASE(SimpleSmall_Blocked,
-        PARAMS(nChw8c, OIhw8i8o, x, nChw8c,
+    INST_TEST_CASE(SimpleSmall_Blocked8,
+        PARAMS(nChw8c, FMT_WEIGHTS_BLOCKED8, x, nChw8c,
                2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
-        PARAMS(nChw8c, OIhw8i8o, x, nChw8c,
+        PARAMS(nChw8c, FMT_WEIGHTS_BLOCKED8, x, nChw8c,
                2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
-        PARAMS(nChw8c, Goihw8g, x, nChw8c,
+        PARAMS(nChw8c, FMT_WEIGHTS_BLOCKED8_DW, x, nChw8c,
                2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
-        PARAMS(nChw8c, Goihw8g, x, nChw8c,
+        PARAMS(nChw8c, FMT_WEIGHTS_BLOCKED8_DW, x, nChw8c,
                2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
     );
 
     INST_TEST_CASE(SimpleSmall_Blocked16,
-        PARAMS(nChw16c, OIhw16i16o, x, nChw16c,
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
                2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
-        PARAMS(nChw16c, OIhw16i16o, x, nChw16c,
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16, x, nChw16c,
                2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
-        PARAMS(nChw16c, Goihw16g, x, nChw16c,
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16_DW, x, nChw16c,
                2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
-        PARAMS(nChw16c, Goihw16g, x, nChw16c,
+        PARAMS(nChw16c, FMT_WEIGHTS_BLOCKED16_DW, x, nChw16c,
                2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
     );
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_x8s8f32s32.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_x8s8f32s32.cpp
new file mode 100644
index 000000000..79ba40621
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_depthwise_forward_x8s8f32s32.cpp
@@ -0,0 +1,106 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+#include "mkldnn.hpp"
+#include "test_convolution_depthwise_forward_common.hpp"
+
+namespace mkldnn {
+
+using convolution_test = convolution_depthwise_test<uint8_t, int8_t, int32_t, float>;
+
+TEST_P(convolution_test, TestConvolution)
+{
+}
+
+#define EXPAND_FORMATS(src, weights, bias, dst) \
+    { mkldnn::memory::format::src, mkldnn::memory::format::weights, \
+    mkldnn::memory::format::bias, mkldnn::memory::format::dst }
+
+#define FMT_WEIGHTS_BLOCKED8 OhIw8o4i
+#define FMT_WEIGHTS_BLOCKED8_DW Goihw8g
+#define FMT_WEIGHTS_BLOCKED16 OIhw4i16o4i
+#define FMT_WEIGHTS_BLOCKED16_DW Goihw16g
+
+#define ENGINE mkldnn::engine::kind::cpu
+#define ALGORITHM mkldnn::convolution_direct
+
+#define CONCAT_WITH_UNDERSCORE_(a,b) a ## _ ## b
+#define CONCAT_WITH_UNDERSCORE(a,b) CONCAT_WITH_UNDERSCORE_(a,b)
+
+#define INST_TEST_CASE_(str, ...) INSTANTIATE_TEST_CASE_P( \
+        str, convolution_test, ::testing::Values(__VA_ARGS__))
+
+#define INST_TEST_CASE(str, ...) INST_TEST_CASE_( \
+        CONCAT_WITH_UNDERSCORE(CONCAT_WITH_UNDERSCORE(Convolution, \
+        str), depthwise),  __VA_ARGS__)
+
+#define EXPAND_ARGS(args) args
+
+#define PARAMS(...) \
+    EXPAND_ARGS(PARAMS_CONV(depthwise_scale_shift, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(depthwise_prelu, __VA_ARGS__))
+
+#define PARAMS_CONV(alg, src, weights, bias, dst, ...) \
+    test_convolution_depthwise_params_t {alg,  ENGINE, ALGORITHM, \
+    EXPAND_FORMATS(src, weights, bias, dst), /* empty attributes */ {}, \
+    {__VA_ARGS__} }
+
+    INST_TEST_CASE(SimpleSmall,
+        PARAMS(nhwc, oihw, x, nhwc,
+               2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
+        PARAMS(nhwc, oihw, x, nhwc,
+               2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+        PARAMS(nhwc, goihw, x, nhwc,
+               2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
+        PARAMS(nhwc, goihw, x, nhwc,
+               2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
+    );
+
+    INST_TEST_CASE(SimpleSmall_Blocked8,
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED8, x, nhwc,
+               2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED8, x, nhwc,
+               2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED8_DW, x, nhwc,
+               2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED8_DW, x, nhwc,
+               2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
+    );
+
+    INST_TEST_CASE(SimpleSmall_Blocked_Tail8,
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED8, x, nhwc,
+               2, 1, 15, 13, 13, 19, 11, 11, 3, 3, 0, 0, 1, 1),
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED8, x, nhwc,
+               2, 1, 77, 13, 13, 91, 13, 13, 1, 1, 0, 0, 1, 1),
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED8_DW, x, nhwc,
+               2, 21, 21, 16, 16, 21, 16, 16, 3, 3, 0, 0, 1, 1),
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED8_DW, x, nhwc,
+               2, 77, 77, 9, 9, 77, 9, 9, 1, 1, 0, 0, 1, 1)
+    );
+
+    INST_TEST_CASE(SimpleSmall_Blocked16,
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED16, x, nhwc,
+               2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED16, x, nhwc,
+               2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED16_DW, x, nhwc,
+               2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
+        PARAMS(nhwc, FMT_WEIGHTS_BLOCKED16_DW, x, nhwc,
+               2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
+    );
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_common.hpp
index 7f3537b30..4c8445b18 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_common.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_common.hpp
@@ -25,7 +25,7 @@ template <typename data_t_src, typename data_t_wei,
           typename data_t_acc, typename data_t_dst>
 void compute_ref_conv_fwd(const mkldnn_convolution_desc_t &conv_desc,
         const memory &src, const memory &weights, const memory &bias, const memory &dst,
-        bool with_relu, float eltwise_alpha)
+        bool with_relu, float eltwise_alpha, const float* depthwise_weights)
 {
     int MB = conv_desc.src_desc.dims[0];
     int G = conv_desc.weights_desc.ndims == 5 ? conv_desc.weights_desc.dims[0] : 1;
@@ -47,7 +47,7 @@ void compute_ref_conv_fwd(const mkldnn_convolution_desc_t &conv_desc,
 
     data_t_src *src_data = (data_t_src *)src.get_data_handle();
     data_t_wei *weights_data = (data_t_wei *)weights.get_data_handle();
-    data_t_dst *bias_data = (data_t_dst *)bias.get_data_handle();
+    float *bias_data = (float *)bias.get_data_handle();
     data_t_dst *dst_data = (data_t_dst *)dst.get_data_handle();
 
     const memory::desc src_d = src.get_primitive_desc().desc();
@@ -82,8 +82,6 @@ void compute_ref_conv_fwd(const mkldnn_convolution_desc_t &conv_desc,
                         a += src_data[map_index(src_d, iidx)]
                              * weights_data[map_index(
                                 weights_d, widx)];
-
-
                     }
                 }
             }
@@ -92,11 +90,19 @@ void compute_ref_conv_fwd(const mkldnn_convolution_desc_t &conv_desc,
 
             a_fp += bias_data[G > 1 ? g : oc];
 
+            if (depthwise_weights)
+                a_fp *= depthwise_weights[G > 1 ? g : oc];
+
             if (with_relu) {
                 a_fp = (a_fp > 0) ? a_fp : eltwise_alpha * a_fp;
             }
 
-            dst_data[map_index(dst_d, oidx)] = (data_t_dst) a_fp;
+            using D = memory::data_type;
+            if (data_traits<data_t_dst>::data_type != D::f32){
+               a_fp = nearbyintf(a_fp);
+            }
+
+            dst_data[map_index(dst_d, oidx)] = (data_t_dst)a_fp;
         }
     );
 }
@@ -115,7 +121,9 @@ protected:
         memory::data_type data_type_src = data_traits<data_t_src>::data_type;
         memory::data_type data_type_dst = data_traits<data_t_dst>::data_type;
         memory::data_type data_type_wei = data_traits<data_t_wei>::data_type;
-        memory::data_type data_type_bia = data_traits<data_t_wei>::data_type;
+        memory::data_type data_type_bia = data_traits<float>::data_type;
+
+        bool is_int8 = data_type_src == mkldnn_u8 || data_type_src == mkldnn_s8;
 
         test_convolution_dw_conv_sizes_t cd = p.sizes;
 
@@ -125,7 +133,7 @@ protected:
         int conv2_oh = (conv1_oh - ((cd.conv2_kh - 1) + 1) + 2 * cd.conv2_padh) / cd.conv2_strh + 1;
         int conv2_ow = (conv1_ow - ((cd.conv2_kw - 1) + 1) + 2 * cd.conv2_padw) / cd.conv2_strw + 1;
 
-        std::vector<int> conv1_padR = { cd.conv1_padh, cd.conv1_padw };
+        std::vector<ptrdiff_t> conv1_padR = { cd.conv1_padh, cd.conv1_padw };
         conv1_padR[0] += conv2_oh - conv1_oh;
         conv1_padR[1] += conv2_ow - conv1_ow;
 
@@ -159,27 +167,62 @@ protected:
         auto conv2_dst = memory({conv2_dst_desc, eng});
 
         fill_data<data_t_src>(conv1_src.get_primitive_desc().get_size()
-                / sizeof(data_t_src), (data_t_src *)conv1_src.get_data_handle(), 1., true);
+                / sizeof(data_t_src), (data_t_src *)conv1_src.get_data_handle(), (data_t_src)1, (data_t_src)1);
         fill_data<data_t_wei>(
                 conv1_weights.get_primitive_desc().get_size()
-                / sizeof(data_t_wei),(data_t_wei *)conv1_weights.get_data_handle(), 1., true);
-        fill_data<data_t_wei>(
+                / sizeof(data_t_wei),(data_t_wei *)conv1_weights.get_data_handle(), (data_t_wei)1, (data_t_wei)1);
+        fill_data<float>(
                 conv1_bias.get_primitive_desc().get_size()
-                / sizeof(data_t_wei),(data_t_wei *)conv1_bias.get_data_handle(), 1., true);
+                / sizeof(float),(float *)conv1_bias.get_data_handle(), 1., true);
         fill_data<data_t_wei>(
                 conv2_weights.get_primitive_desc().get_size()
-                / sizeof(data_t_wei),(data_t_wei *)conv2_weights.get_data_handle(), 1., true);
-        fill_data<data_t_wei>(
+                / sizeof(data_t_wei),(data_t_wei *)conv2_weights.get_data_handle(), (data_t_wei)1, (data_t_wei)1);
+        fill_data<float>(
                 conv2_bias.get_primitive_desc().get_size()
-                / sizeof(data_t_wei),(data_t_wei *)conv2_bias.get_data_handle(), 1., true);
+                / sizeof(float),(float *)conv2_bias.get_data_handle(), 1., true);
+
+//        auto conv1_depthwise_weights_desc = create_md({ cd.conv2_oc }, mkldnn::memory::data_type::f32, memory::x);
+//        auto conv1_depthwise_weights = memory({conv1_depthwise_weights_desc, eng});
+        std::vector<float> conv1_depthwise_weights;
+        conv1_depthwise_weights.resize(cd.conv1_oc);
+        fill_data<float>(conv1_depthwise_weights.size(), &conv1_depthwise_weights[0], 1.f / ((float)cd.ic), 1.f / ((float)cd.ic * cd.conv1_kh * cd.conv1_kw));
+
+        std::vector<float> conv2_depthwise_weights;
+        conv2_depthwise_weights.resize(cd.conv1_oc);
+        fill_data<float>(conv2_depthwise_weights.size(), &conv2_depthwise_weights[0], 1.f / ((float)cd.conv2_oc), 1.f / ((float)cd.conv2_oc * cd.conv2_kh * cd.conv2_kw));
+
+        std::vector<float> conv2_depthwise_bias;
+        conv2_depthwise_bias.resize(cd.conv1_oc);
+//        fill_data<float>(conv2_depthwise_bias.size(), &conv2_depthwise_bias[0], 1., true);
+        memset(&conv2_depthwise_bias[0], 0, conv2_depthwise_bias.size() * sizeof(float));
+
+//        auto conv2_depthwise_weights_desc = create_md({ cd.conv2_oc }, mkldnn::memory::data_type::f32, memory::x);
+//        auto conv2_depthwise_bias_desc = create_md({ cd.conv2_oc }, mkldnn::memory::data_type::f32, memory::x);
+//
+//        auto conv2_depthwise_weights = memory({conv2_depthwise_weights_desc, eng});
+//        auto conv2_depthwise_bias = memory({conv2_depthwise_bias_desc, eng});
+
+//        fill_data<float>(conv2_depthwise_weights.get_primitive_desc().get_size() / sizeof(float),
+//                         (float *)conv2_depthwise_weights.get_data_handle(), 1., true);
+//        memset((float*)conv2_depthwise_bias.get_data_handle(), 0, conv2_depthwise_bias.get_primitive_desc().get_size());
 
         mkldnn::post_ops conv1_post_ops;
         conv1_post_ops.append_eltwise(1.0, mkldnn::algorithm::eltwise_relu, 0.0f, 0.0f);
         conv1_post_ops.append_dw_conv(conv1_oh, conv1_ow, cd.conv2_kh, cd.conv2_kw, cd.conv2_strh, cd.conv2_strw,
                                       static_cast<const float*>(conv2_weights.get_data_handle()),
                                       static_cast<const float*>(conv2_bias.get_data_handle()));
+
+        if (is_int8)
+            conv1_post_ops.append_depthwise(depthwise_scale_shift, &conv2_depthwise_weights[0], &conv2_depthwise_bias[0]);
+
         conv1_post_ops.append_eltwise(1.0, mkldnn::algorithm::eltwise_relu, 0.0f, 0.0f);
         mkldnn::primitive_attr conv1_attr;
+
+        if (is_int8) {
+            conv1_attr.set_int_output_round_mode(mkldnn::round_nearest);
+            conv1_attr.set_output_scales(1 << 1 /*through C dim*/, conv1_depthwise_weights);
+        }
+
         conv1_attr.set_post_ops(conv1_post_ops);
 
         auto conv1_primitive_desc = convolution_forward::primitive_desc(conv1_desc, conv1_attr, eng);
@@ -197,8 +240,14 @@ protected:
 
         auto conv1_dst_ref = memory({conv1_dst_desc_ref, eng});
         auto conv2_dst_ref = memory({conv2_dst_desc, eng});
-        compute_ref_conv_fwd<data_t_src, data_t_wei, data_t_acc, data_t_dst>(conv1_desc_ref.data, conv1_src, conv1_weights, conv1_bias, conv1_dst_ref, true, 0.0f);
-        compute_ref_conv_fwd<data_t_dst, data_t_wei, data_t_acc, data_t_dst>(conv2_desc.data, conv1_dst_ref, conv2_weights, conv2_bias, conv2_dst_ref, true, 0.0f);
+
+        auto conv1_depthwise_weights_data = is_int8 ? &conv1_depthwise_weights[0] : nullptr;
+        auto conv2_depthwise_weights_data = is_int8 ? &conv2_depthwise_weights[0] : nullptr;
+
+        compute_ref_conv_fwd<data_t_src, data_t_wei, data_t_acc, data_t_dst>(conv1_desc_ref.data,
+                conv1_src, conv1_weights, conv1_bias, conv1_dst_ref, true, 0.0f, conv1_depthwise_weights_data);
+        compute_ref_conv_fwd<data_t_dst, data_t_wei, data_t_acc, data_t_dst>(conv2_desc.data,
+                conv1_dst_ref, conv2_weights, conv2_bias, conv2_dst_ref, true, 0.0f, conv2_depthwise_weights_data);
 
         compare_data<data_t_dst>(conv2_dst_ref, conv2_dst);
     }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_f32.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_f32.cpp
index 4db7a2eeb..c519533a8 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_f32.cpp
@@ -82,7 +82,12 @@ INST_TEST_CASE(Mobilenet_Blocked,
     PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
            2, 288, 2, 4,  240, 1, 1, 0, 0, 1, 1,  240, 3, 3, 1, 1, 1, 1),  // 5_3
     PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
-           2, 240, 2, 4,  264, 1, 1, 0, 0, 1, 1,  264, 3, 3, 1, 1, 1, 1)   // 5_4
+           2, 240, 2, 4,  264, 1, 1, 0, 0, 1, 1,  264, 3, 3, 1, 1, 1, 1),  // 5_4
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 48, 75, 75,  48, 1, 1, 0, 0, 1, 1,  48, 3, 3, 1, 1, 2, 2),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 48, 75, 75,  48, 3, 3, 1, 1, 1, 1,  48, 3, 3, 1, 1, 2, 2)
+
 );
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_u8s8s32.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_u8s8s32.cpp
new file mode 100644
index 000000000..bed193719
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_dw_conv_u8s8s32.cpp
@@ -0,0 +1,89 @@
+/*******************************************************************************
+* Copyright 2016-2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+
+#include "mkldnn.hpp"
+#include "test_convolution_dw_conv_common.hpp"
+namespace mkldnn {
+
+using convolution_test = convolution_dw_conv_test<uint8_t, int8_t, int32_t, uint8_t>;
+
+TEST_P(convolution_test, TestConvolutionDwConv)
+{
+}
+
+#define FMT_BIAS x
+#define FMT_DATA_BLOCKED nhwc
+
+#define EXPAND_FORMATS(src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst) \
+    { mkldnn::memory::format::src, mkldnn::memory::format::conv1_weights, mkldnn::memory::format::conv1_bias, \
+    mkldnn::memory::format::conv2_weights, mkldnn::memory::format::conv2_bias, mkldnn::memory::format::dst }
+
+#define FMT_WEIGHTS_BLOCKED OhIw8o4i
+
+#define FMT_WEIGHTS_DW_BLOCKED Goihw8g
+
+#define ENGINE mkldnn::engine::kind::cpu
+#define ALGORITHM mkldnn::convolution_direct
+
+#define CONCAT_WITH_UNDERSCORE_(a,b) a ## _ ## b
+#define CONCAT_WITH_UNDERSCORE(a,b) CONCAT_WITH_UNDERSCORE_(a,b)
+
+#define INST_TEST_CASE_(str, ...) INSTANTIATE_TEST_CASE_P( \
+        str, convolution_test, ::testing::Values(__VA_ARGS__))
+
+#define INST_TEST_CASE(str, ...) INST_TEST_CASE_( \
+        CONCAT_WITH_UNDERSCORE(CONCAT_WITH_UNDERSCORE(TEST_CASE_NAME_PREFIX, \
+        str), dw_conv),  __VA_ARGS__)
+
+#define EXPAND_ARGS(args) args
+
+#define PARAMS(src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst, ...) \
+    test_convolution_dw_conv_params_t {ENGINE, ALGORITHM, \
+    EXPAND_FORMATS(src, conv1_weights, conv1_bias, conv2_weights, conv2_bias, dst), {__VA_ARGS__} }
+
+INST_TEST_CASE(Mobilenet_Blocked,
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 8, 19, 33,  56, 3, 3, 1, 1, 2, 2,  56, 3, 3, 1, 1, 1, 1), // 1_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 32, 19, 33,  56, 1, 1, 0, 0, 1, 1,  56, 3, 3, 1, 1, 2, 2), // 2_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 56, 9, 16,  112, 1, 1, 0, 0, 1, 1,  112, 3, 3, 1, 1, 1, 1), // 2_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 112, 9, 16,  112, 1, 1, 0, 0, 1, 1,  112, 3, 3, 1, 1, 2, 2), // 3_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 112, 4, 8,  208, 1, 1, 0, 0, 1, 1,  208, 3, 3, 1, 1, 1, 1),  // 3_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 208, 4, 8,  216, 1, 1, 0, 0, 1, 1,  216, 3, 3, 1, 1, 2, 2),  // 4_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 216, 2, 4,  328, 1, 1, 0, 0, 1, 1,  328, 3, 3, 1, 1, 1, 1),  // 4_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 328, 2, 4,  288, 1, 1, 0, 0, 1, 1,  288, 3, 3, 1, 1, 1, 1),  // 5_1
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 288, 2, 4,  288, 1, 1, 0, 0, 1, 1,  288, 3, 3, 1, 1, 1, 1),  // 5_2
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 288, 2, 4,  240, 1, 1, 0, 0, 1, 1,  240, 3, 3, 1, 1, 1, 1),  // 5_3
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 240, 2, 4,  264, 1, 1, 0, 0, 1, 1,  264, 3, 3, 1, 1, 1, 1),  // 5_4
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 48, 75, 75,  48, 1, 1, 0, 0, 1, 1,  48, 3, 3, 1, 1, 2, 2),
+    PARAMS(FMT_DATA_BLOCKED, FMT_WEIGHTS_BLOCKED, FMT_BIAS, FMT_WEIGHTS_DW_BLOCKED, FMT_BIAS, FMT_DATA_BLOCKED,
+           2, 48, 75, 75,  48, 3, 3, 1, 1, 1, 1,  48, 3, 3, 1, 1, 2, 2)
+);
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_common.hpp
index 5337807ba..c0b6e2120 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_common.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_common.hpp
@@ -16,66 +16,12 @@
 
 #include "mkldnn_test_common.hpp"
 #include "gtest/gtest.h"
-
+#include "math_utils.hpp"
 #include "mkldnn.hpp"
 
-namespace mkldnn {
-
-
-template <typename T, typename A> inline T relu_fwd(T s, A alpha) {
-    return s > 0 ? s : static_cast<T>(s * alpha);
-}
-
-template <typename T> T tanh_fwd(T s) {
-    const float e = ::expf(2*s); /* maybe replace with -2*s? */
-    return static_cast<T>((e - 1.0) / (e + 1.0));
-}
-
-template <typename T, typename A> T elu_fwd(T s, A alpha) {
-    return s > 0 ? s : static_cast<T>(alpha * (::expf(s) - 1));
-}
-
-template <typename T>
-T square_fwd(T s) {
-    return s * s;
-}
-
-template <typename T>
-T abs_fwd(T s) {
-    return s > 0 ? s : -s;;
-}
-
-template <typename T>
-T sqrt_fwd(T s) {
-    return s > 0 ? ::sqrtf(s) : 0;
-}
-
-template <typename T, typename A>
-T linear_fwd(T s, A alpha, A beta) {
-    return alpha * s + beta;
-}
-
-template <typename T, typename A>
-T bounded_relu_fwd(T s, A alpha) {
-    s = s > 0 ? s : 0;
-    return s > alpha ? alpha : s;
-}
-
-template <typename T>
-T soft_relu_fwd(T s) {
-    return logf(1 + ::expf(s));
-}
+using namespace mkldnn::impl::math;
 
-template <typename T>
-T logistic_fwd(T s) {
-    T v = ::expf(s);
-    return v / (v + 1);
-}
-
-template <typename T, typename A>
-T clamp_fwd(T s, A alpha, A beta) {
-    return s > alpha ? (T)(alpha) : s < beta ? (T)(beta) : s;
-}
+namespace mkldnn {
 
 template <typename data_t_src, typename data_t_wei,
           typename data_t_acc, typename data_t_dst>
@@ -94,76 +40,60 @@ void compute_ref_conv_eltwise_fwd(const test_convolution_sizes_t &c,
     const memory::desc weights_d = weights.get_primitive_desc().desc();
     const memory::desc dst_d = dst.get_primitive_desc().desc();
 
+    size_t padded_ic = src_d.data.layout_desc.blocking.padding_dims[1];
+    size_t padded_oc = dst_d.data.layout_desc.blocking.padding_dims[1];
+
+    size_t padded_ic_w = weights_d.data.format == mkldnn_OhIw8o4i ? weights_d.data.layout_desc.blocking.padding_dims[1] :
+                                                                    src_d.data.layout_desc.blocking.padding_dims[1];
+    size_t padded_oc_w = weights_d.data.format == mkldnn_OhIw8o4i ? weights_d.data.layout_desc.blocking.padding_dims[0] :
+                                                                    dst_d.data.layout_desc.blocking.padding_dims[1];
+
     mkldnn::impl::parallel_nd(c.mb, c.ng, c.oc / c.ng, c.oh, c.ow,
         [&](int n, int g, int oc, int oh, int ow) {
-            int oidx = n * c.oc * c.oh * c.ow
-                       + g * c.oc / c.ng * c.oh * c.ow
-                       + oc * c.oh * c.ow + oh * c.ow + ow;
-
-            int didx = map_index(dst_d, oidx);
-            dst_data[didx] = bias_data ?
-                             bias_data[map_index(
-                                     bias.get_primitive_desc().desc(),
-                                     g * c.oc / c.ng + oc)] :
-                             data_t_dst{0};
-            for (int ic = 0; ic < c.ic / c.ng; ic++) {
-                for (int kh = 0; kh < c.kh; kh++) {
-                    for (int kw = 0; kw < c.kw; kw++) {
-                        int iw = ow * c.strw
-                                 - c.padw + kw * (1 + c.dilw);
-                        int ih = oh * c.strh
-                                 - c.padh + kh * (1 + c.dilh);
-                        if (iw < 0 || iw >= c.iw) continue;
-                        if (ih < 0 || ih >= c.ih) continue;
-                        int iidx = n * c.ic * c.ih * c.iw
-                                   + g * c.ic / c.ng * c.ih * c.iw
-                                   + ic * c.ih * c.iw + ih * c.iw + iw;
-                        int widx = g * c.oc / c.ng * c.ic
-                                   / c.ng * c.kh * c.kw
-                                   + oc * c.ic / c.ng * c.kh * c.kw
-                                   + ic * c.kh * c.kw + kh * c.kw + kw;
-
-                        dst_data[didx]
-                                += src_data[map_index(src_d, iidx)]
-                                   * weights_data[map_index(
-                                weights_d, widx)];
-                    }
-                }
+            size_t oidx = n * padded_oc * c.oh * c.ow
+                    + g * padded_oc / c.ng * c.oh * c.ow
+                    + oc * c.oh * c.ow + oh * c.ow + ow;
+
+            size_t didx = map_index(dst_d, oidx);
+            dst_data[didx] = bias_data
+                    ? bias_data[g * c.oc / c.ng + oc] : data_t_dst{0};
+
+            for (int ic = 0; ic < c.ic / c.ng; ic++)
+            for (int kh = 0; kh < c.kh; kh++)
+            for (int kw = 0; kw < c.kw; kw++)
+            {
+                int ih = oh * c.strh - c.padh + kh * (1 + c.dilh);
+                if (ih < 0 || ih >= c.ih) continue;
+                int iw = ow * c.strw - c.padw + kw * (1 + c.dilw);
+                if (iw < 0 || iw >= c.iw) continue;
+
+                size_t iidx = n * padded_ic * c.ih * c.iw
+                    + g * padded_ic / c.ng * c.ih * c.iw
+                    + ic * c.ih * c.iw + ih * c.iw + iw;
+                size_t widx = g * padded_oc_w / c.ng * padded_ic_w
+                    / c.ng * c.kh * c.kw
+                    + oc * padded_ic_w / c.ng * c.kh * c.kw
+                    + ic * c.kh * c.kw + kh * c.kw + kw;
+
+                dst_data[didx] += src_data[map_index(src_d, iidx)]
+                        * weights_data[map_index(weights_d, widx)];
             }
 
+            auto &d = dst_data[didx];
             switch (elt_alg) {
-                case eltwise_relu:
-                    dst_data[didx] = relu_fwd(dst_data[didx], elt_alpha);
-                    break;
-                case eltwise_tanh:
-                    dst_data[didx] = tanh_fwd(dst_data[didx]);
-                    break;
-                case eltwise_elu:
-                    dst_data[didx] = elu_fwd(dst_data[didx], elt_alpha);
-                    break;
-                case eltwise_square:
-                    dst_data[didx] = square_fwd(dst_data[didx]);
-                    break;
-                case eltwise_abs:
-                    dst_data[didx] = abs_fwd(dst_data[didx]);
-                    break;
-                case eltwise_sqrt:
-                    dst_data[didx] = sqrt_fwd(dst_data[didx]);
-                    break;
-                case eltwise_linear:
-                    dst_data[didx] = linear_fwd(dst_data[didx], elt_alpha, elt_beta);
-                    break;
-                case eltwise_bounded_relu:
-                    dst_data[didx] = bounded_relu_fwd(dst_data[didx], elt_alpha);
-                    break;
-                case eltwise_soft_relu:
-                    dst_data[didx] = soft_relu_fwd(dst_data[didx]);
-                    break;
-                case eltwise_logistic:
-                    dst_data[didx] = logistic_fwd(dst_data[didx]);
-                    break;
-                default:
-                    assert(!"unknown alg_kind");
+            case eltwise_relu: d = relu_fwd(d, elt_alpha); break;
+            case eltwise_tanh: d = tanh_fwd(d); break;
+            case eltwise_elu: d = elu_fwd(d, elt_alpha); break;
+            case eltwise_square: d = square_fwd(d); break;
+            case eltwise_abs: d = abs_fwd(d); break;
+            case eltwise_sqrt: d = sqrt_fwd(d); break;
+            case eltwise_linear: d = linear_fwd(d, elt_alpha, elt_beta); break;
+            case eltwise_bounded_relu: d = bounded_relu_fwd(d, elt_alpha); break;
+            case eltwise_soft_relu: d = soft_relu_fwd(d); break;
+            case eltwise_logistic: d = logistic_fwd(d); break;
+            case eltwise_clamp: d = clamp_fwd(d, elt_alpha, elt_beta); break;
+            case eltwise_exp: d = exp_fwd(d); break;
+            default: assert(!"unknown alg_kind");
             }
         }
     );
@@ -174,8 +104,7 @@ template <typename data_t_src, typename data_t_wei,
 class convolution_eltwise_test
     : public ::testing::TestWithParam<test_convolution_eltwise_params_t> {
 protected:
-    virtual void SetUp()
-    {
+    virtual void SetUp() {
         test_convolution_eltwise_params_t p
                 = ::testing::TestWithParam<
                 test_convolution_eltwise_params_t>::GetParam();
@@ -209,11 +138,15 @@ protected:
         auto dst_ref = memory({c_dst_desc, eng});
 
         fill_data<data_t_src>(c_src.get_primitive_desc().get_size()
-                / sizeof(data_t_src), (data_t_src *)c_src.get_data_handle(), data_t_src(0), data_t_src(1));
+                / sizeof(data_t_src), (data_t_src *)c_src.get_data_handle(),
+                data_t_src(0), data_t_src(1));
+        check_zero_tail<data_t_src>(1, c_src);
 
         fill_data<data_t_wei>(
                 c_weights.get_primitive_desc().get_size()
-                / sizeof(data_t_wei),(data_t_wei *)c_weights.get_data_handle(), data_t_wei(0), data_t_wei(1));
+                / sizeof(data_t_wei),(data_t_wei *)c_weights.get_data_handle(),
+                data_t_wei(0), data_t_wei(1));
+        check_zero_tail<data_t_wei>(1, c_weights);
 
         bool with_bias = p.formats.bias_format != memory::format::format_undef;
         auto c_bias_desc = with_bias ?
@@ -226,7 +159,7 @@ protected:
                     (data_t_dst *)c_bias.get_data_handle(), 1., true);
         }
 
-        std::vector<int> padR = { cd.padh, cd.padw };
+        std::vector<ptrdiff_t> padR = { cd.padh, cd.padw };
         for (int i = 0; i < 2; ++i) {
             if ((cd.ih - ((cd.kh - 1) * (cd.dilh + 1) + 1) + cd.padh + padR[0])
                 / cd.strh + 1 != cd.oh)
@@ -273,7 +206,10 @@ protected:
         compute_ref_conv_eltwise_fwd<data_t_src, data_t_wei, data_t_wei,
             data_t_dst>(cd, c_src, c_weights, c_bias, dst_ref, with_bias,
                         p.alg, eltwise_alpha, eltwise_beta);
-        compare_data<data_t_dst>(dst_ref, c_dst);
+        check_zero_tail<data_t_dst>(1, dst_ref);
+
+        compare_data<data_t_dst>(dst_ref, c_dst, 1e-2);
+        check_zero_tail<data_t_dst>(0, c_dst);
     }
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_f32.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_f32.cpp
index 19a6def5e..9b751cffd 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_f32.cpp
@@ -18,220 +18,10 @@
 #include "gtest/gtest.h"
 #include "math_utils.hpp"
 #include "mkldnn.hpp"
-
-using namespace mkldnn::impl::math;
+#include "test_convolution_eltwise_forward_common.hpp"
 
 namespace mkldnn {
 
-template <typename data_t_src, typename data_t_wei,
-          typename data_t_acc, typename data_t_dst>
-void compute_ref_conv_eltwise_fwd(const test_convolution_sizes_t &c,
-        const memory &src, const memory &weights, const memory &bias,
-        const memory &dst, bool w_bias, algorithm elt_alg,
-        float elt_alpha, float elt_beta)
-{
-    data_t_src *src_data = (data_t_src *)src.get_data_handle();
-    data_t_wei *weights_data = (data_t_wei *)weights.get_data_handle();
-    data_t_dst *bias_data
-            = (data_t_dst *)(w_bias ? bias.get_data_handle() : nullptr);
-    data_t_dst *dst_data = (data_t_dst *)dst.get_data_handle();
-
-    const memory::desc src_d = src.get_primitive_desc().desc();
-    const memory::desc weights_d = weights.get_primitive_desc().desc();
-    const memory::desc dst_d = dst.get_primitive_desc().desc();
-
-#pragma omp parallel for collapse(5) schedule(static)
-    for (int n = 0; n < c.mb; n++) {
-        for (int g = 0; g < c.ng; g++) {
-            for (int oc = 0; oc < c.oc / c.ng; oc++) {
-                for (int oh = 0; oh < c.oh; oh++) {
-                    for (int ow = 0; ow < c.ow; ow++) {
-                        int oidx = n * c.oc * c.oh * c.ow
-                                + g * c.oc / c.ng * c.oh * c.ow
-                                + oc * c.oh * c.ow + oh * c.ow + ow;
-
-                        int didx = map_index(dst_d, oidx);
-                        dst_data[didx] = bias_data ?
-                                bias_data[map_index(
-                                        bias.get_primitive_desc().desc(),
-                                        g * c.oc / c.ng + oc)] :
-                                data_t_dst{0};
-                        for (int ic = 0; ic < c.ic / c.ng; ic++) {
-                            for (int kh = 0; kh < c.kh; kh++) {
-                                for (int kw = 0; kw < c.kw; kw++) {
-                                    int iw = ow * c.strw
-                                          - c.padw + kw * (1 + c.dilw);
-                                    int ih = oh * c.strh
-                                          - c.padh + kh * (1 + c.dilh);
-                                    if (iw < 0 || iw >= c.iw) continue;
-                                    if (ih < 0 || ih >= c.ih) continue;
-                                    int iidx = n * c.ic * c.ih * c.iw
-                                            + g * c.ic / c.ng * c.ih * c.iw
-                                            + ic * c.ih * c.iw + ih * c.iw + iw;
-                                    int widx = g * c.oc / c.ng * c.ic
-                                                    / c.ng * c.kh * c.kw
-                                            + oc * c.ic / c.ng * c.kh * c.kw
-                                            + ic * c.kh * c.kw + kh * c.kw + kw;
-
-                                    dst_data[didx]
-                                            += src_data[map_index(src_d, iidx)]
-                                            * weights_data[map_index(
-                                                      weights_d, widx)];
-                                }
-                            }
-                        }
-
-                        switch (elt_alg) {
-                            case eltwise_relu: dst_data[didx] =
-                                    relu_fwd(dst_data[didx], elt_alpha);
-                                break;
-                            case eltwise_tanh: dst_data[didx] =
-                                    tanh_fwd(dst_data[didx]);
-                                break;
-                            case eltwise_elu: dst_data[didx] =
-                                    elu_fwd(dst_data[didx], elt_alpha);
-                                break;
-                            case eltwise_square: dst_data[didx] =
-                                    square_fwd(dst_data[didx]);
-                                break;
-                            case eltwise_abs: dst_data[didx] =
-                                    abs_fwd(dst_data[didx]);
-                                break;
-                            case eltwise_sqrt: dst_data[didx] =
-                                    sqrt_fwd(dst_data[didx]);
-                                break;
-                            case eltwise_linear: dst_data[didx] =
-                                    linear_fwd(dst_data[didx], elt_alpha,
-                                            elt_beta);
-                                break;
-                            case eltwise_bounded_relu: dst_data[didx] =
-                                    bounded_relu_fwd(dst_data[didx], elt_alpha);
-                                break;
-                            case eltwise_soft_relu: dst_data[didx] =
-                                    soft_relu_fwd(dst_data[didx]);
-                                break;
-                            case eltwise_logistic: dst_data[didx] =
-                                    logistic_fwd(dst_data[didx]);
-                                break;
-                            default: assert(!"unknown alg_kind");
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-template <typename data_t_src, typename data_t_wei,
-          typename data_t_acc, typename data_t_dst>
-class convolution_eltwise_test
-    : public ::testing::TestWithParam<test_convolution_eltwise_params_t> {
-protected:
-    virtual void SetUp()
-    {
-        test_convolution_eltwise_params_t p
-                = ::testing::TestWithParam<
-                test_convolution_eltwise_params_t>::GetParam();
-
-        ASSERT_TRUE(p.engine_kind == engine::kind::cpu);
-        ASSERT_EQ(p.aalgorithm, convolution_direct);
-        auto eng = engine(p.engine_kind, 0);
-        float eltwise_alpha = p.eltwise_alpha;
-        float eltwise_beta = p.eltwise_beta;
-
-        memory::data_type data_type_src = data_traits<data_t_src>::data_type;
-        memory::data_type data_type_dst = data_traits<data_t_dst>::data_type;
-        memory::data_type data_type_wei = data_traits<data_t_wei>::data_type;
-
-        test_convolution_sizes_t cd = p.sizes;
-
-        auto c_src_desc = create_md({ cd.mb, cd.ic, cd.ih, cd.iw },
-                data_type_src, p.formats.src_format);
-        auto c_weights_desc = cd.ng > 1 ?
-                create_md({ cd.ng, cd.oc / cd.ng, cd.ic / cd.ng, cd.kh, cd.kw },
-                        data_type_wei, p.formats.weights_format) :
-                create_md({ cd.oc, cd.ic, cd.kh, cd.kw },
-                        data_type_wei, p.formats.weights_format);
-        auto c_dst_desc = create_md({ cd.mb, cd.oc, cd.oh, cd.ow },
-                data_type_dst, p.formats.dst_format);
-
-        auto c_src = memory({c_src_desc, eng});
-        auto c_weights = memory({c_weights_desc, eng});
-        auto c_dst = memory({c_dst_desc, eng});
-
-        auto dst_ref = memory({c_dst_desc, eng});
-
-        fill_data<data_t_src>(c_src.get_primitive_desc().get_size()
-                / sizeof(data_t_src), (data_t_src *)c_src.get_data_handle(),
-                data_t_src(0), data_t_src(1));
-
-        fill_data<data_t_wei>(
-                c_weights.get_primitive_desc().get_size()
-                / sizeof(data_t_wei),(data_t_wei *)c_weights.get_data_handle(),
-                data_t_wei(0), data_t_wei(1));
-
-        bool with_bias = p.formats.bias_format != memory::format::format_undef;
-        auto c_bias_desc = with_bias ?
-                create_md({ cd.oc }, data_type_dst, p.formats.bias_format) :
-                create_md({}, data_type_dst, p.formats.bias_format);
-        auto c_bias = memory({c_bias_desc, eng});
-        if (with_bias) {
-            fill_data<data_t_dst>(
-                    c_bias.get_primitive_desc().get_size() / sizeof(data_t_dst),
-                    (data_t_dst *)c_bias.get_data_handle(), 1., true);
-        }
-
-        std::vector<int> padR = { cd.padh, cd.padw };
-        for (int i = 0; i < 2; ++i) {
-            if ((cd.ih - ((cd.kh - 1) * (cd.dilh + 1) + 1) + cd.padh + padR[0])
-                / cd.strh + 1 != cd.oh)
-                ++padR[0];
-            if ((cd.iw - ((cd.kw - 1) * (cd.dilw + 1) + 1) + cd.padw + padR[1])
-                / cd.strw + 1 != cd.ow)
-                ++padR[1];
-        }
-
-        auto test = [&]() {
-            mkldnn::post_ops ops;
-            ops.append_eltwise(1.0, p.alg, p.eltwise_alpha, p.eltwise_beta);
-
-            mkldnn::primitive_attr attr;
-            attr.set_post_ops(ops);
-
-            auto conv_desc = with_bias
-                ? convolution_forward::desc(prop_kind::forward_scoring,
-                        p.aalgorithm, c_src_desc, c_weights_desc, c_bias_desc,
-                        c_dst_desc, { cd.strh, cd.strw }, { cd.dilh, cd.dilw },
-                        { cd.padh, cd.padw }, padR, padding_kind::zero)
-                : convolution_forward::desc(prop_kind::forward_scoring,
-                        p.aalgorithm, c_src_desc, c_weights_desc, c_dst_desc,
-                        { cd.strh, cd.strw }, { cd.dilh, cd.dilw },
-                        { cd.padh, cd.padw }, padR, padding_kind::zero);
-
-            auto conv_primitive_desc =
-                convolution_forward::primitive_desc(conv_desc, attr, eng);
-
-            auto conv = with_bias
-                ? convolution_forward(conv_primitive_desc,
-                        c_src, c_weights, c_bias, c_dst)
-                : convolution_forward(conv_primitive_desc,
-                        c_src, c_weights, c_dst);
-            std::vector<primitive> pipeline;
-            pipeline.push_back(conv);
-
-            stream(stream::kind::lazy).submit(pipeline).wait();
-        };
-
-        if (catch_expected_failures(test, p.expect_to_fail, p.expected_status))
-            return;
-
-        compute_ref_conv_eltwise_fwd<data_t_src, data_t_wei, data_t_wei,
-            data_t_dst>(cd, c_src, c_weights, c_bias, dst_ref, with_bias,
-                        p.alg, eltwise_alpha, eltwise_beta);
-        compare_data<data_t_dst>(dst_ref, c_dst, 1e-2);
-    }
-};
-
 using convolution_test = convolution_eltwise_test<float, float, float, float>;
 
 TEST_P(convolution_test, TestConvolutionEltwise)
@@ -276,33 +66,35 @@ TEST_P(convolution_test, TestConvolutionEltwise)
     {__VA_ARGS__} }
 
     INST_TEST_CASE(SimpleSmall,
-        PARAMS(nchw, oihw, x, nchw,
-               2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
-        PARAMS(nchw, oihw, x, nchw,
-               2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
-        PARAMS(nchw, goihw, x, nchw,
-               2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
-        PARAMS(nchw, goihw, x, nchw,
-               2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
+        PARAMS(nchw, oihw, x, nchw, 2, 1, 32, 13, 13, 48, 11, 11, 3, 3, 0, 0, 1, 1),
+        PARAMS(nchw, oihw, x, nchw, 2, 1, 16, 13, 13, 48, 13, 13, 1, 1, 0, 0, 1, 1),
+        PARAMS(nchw, goihw, x, nchw, 2, 64, 64, 16, 16, 64, 16, 16, 3, 3, 0, 0, 1, 1),
+        PARAMS(nchw, goihw, x, nchw, 2, 32, 32, 9, 9, 32, 9, 9, 1, 1, 0, 0, 1, 1)
     );
 
     INST_TEST_CASE(SimpleSmall_Blocked,
-        PARAMS(nChw8c, Goihw8g, x, nChw8c,
-               1, 48, 48, 20, 20, 48, 20, 20, 3, 3, 1, 1, 1, 1),
-        PARAMS(nChw8c, OIhw8i8o, x, nChw8c,
-               1, 1, 48, 20, 20, 48, 20, 20, 1, 1, 0, 0, 1, 1),
-        PARAMS(nChw8c, OIhw8i8o, x, nChw8c,
-               1, 1, 48, 20, 20, 48, 20, 20, 3, 3, 0, 0, 1, 1)
+        PARAMS(nChw8c, Goihw8g, x, nChw8c, 1, 8, 8, 5, 5, 8, 5, 5, 3, 3, 1, 1, 1, 1),
+        PARAMS(nChw8c, OIhw8i8o, x, nChw8c, 1, 1, 48, 20, 20, 48, 20, 20, 1, 1, 0, 0, 1, 1),
+        PARAMS(nChw8c, OIhw8i8o, x, nChw8c, 1, 1, 48, 20, 20, 48, 20, 20, 3, 3, 0, 0, 1, 1)
+    );
+
+    INST_TEST_CASE(SimpleSmall_Blocked_Tail,
+        PARAMS(nChw8c, Goihw8g, x, nChw8c, 1, 47, 47, 20, 20, 47, 20, 20, 3, 3, 1, 1, 1, 1),
+        PARAMS(nChw8c, OIhw8i8o, x, nChw8c, 1, 1, 47, 20, 20, 47, 20, 20, 1, 1, 0, 0, 1, 1),
+        PARAMS(nChw8c, OIhw8i8o, x, nChw8c, 1, 1, 47, 20, 20, 47, 20, 20, 3, 3, 0, 0, 1, 1)
     );
 
     INST_TEST_CASE(SimpleSmall_Blocked16,
-        PARAMS(nChw16c, Goihw16g, x, nChw16c,
-               1, 48, 48, 20, 20, 48, 20, 20, 3, 3, 1, 1, 1, 1),
-        PARAMS(nChw16c, OIhw16i16o, x, nChw16c,
-               1, 1, 48, 20, 20, 48, 20, 20, 1, 1, 0, 0, 1, 1),
-        PARAMS(nChw16c, OIhw16i16o, x, nChw16c,
-               1, 1, 48, 20, 20, 48, 20, 20, 3, 3, 0, 0, 1, 1),
-        PARAMS(nChw16c, OIhw16i16o, x, nChw16c,
-               2, 1, 32, 32, 32, 32, 32, 32, 3, 3, 0, 0, 1, 1)
+        PARAMS(nChw16c, Goihw16g, x, nChw16c, 1, 48, 48, 20, 20, 48, 20, 20, 3, 3, 1, 1, 1, 1),
+        PARAMS(nChw16c, OIhw16i16o, x, nChw16c, 1, 1, 48, 20, 20, 48, 20, 20, 1, 1, 0, 0, 1, 1),
+        PARAMS(nChw16c, OIhw16i16o, x, nChw16c, 1, 1, 48, 20, 20, 48, 20, 20, 3, 3, 0, 0, 1, 1),
+        PARAMS(nChw16c, OIhw16i16o, x, nChw16c, 2, 1, 32, 32, 32, 32, 32, 32, 3, 3, 0, 0, 1, 1)
+    );
+
+    INST_TEST_CASE(SimpleSmall_Blocked16_Tail,
+        PARAMS(nChw16c, Goihw16g, x, nChw16c, 1, 47, 47, 20, 20, 47, 20, 20, 3, 3, 1, 1, 1, 1),
+        PARAMS(nChw16c, OIhw16i16o, x, nChw16c, 1, 1, 47, 20, 20, 47, 20, 20, 1, 1, 0, 0, 1, 1),
+        PARAMS(nChw16c, OIhw16i16o, x, nChw16c, 1, 1, 47, 20, 20, 47, 20, 20, 3, 3, 0, 0, 1, 1),
+        PARAMS(nChw16c, OIhw16i16o, x, nChw16c, 2, 1, 32, 32, 32, 32, 32, 32, 3, 3, 0, 0, 1, 1)
     );
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_x8s8f32s32.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_x8s8f32s32.cpp
new file mode 100644
index 000000000..1e95fc334
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_eltwise_forward_x8s8f32s32.cpp
@@ -0,0 +1,109 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "mkldnn_test_common.hpp"
+#include "gtest/gtest.h"
+#include "math_utils.hpp"
+#include "mkldnn.hpp"
+#include "test_convolution_eltwise_forward_common.hpp"
+
+namespace mkldnn {
+
+using convolution_test_u8s8s32f32 =
+        convolution_eltwise_test<uint8_t, int8_t, int32_t, float>;
+using convolution_test_s8s8s32f32 =
+        convolution_eltwise_test<int8_t, int8_t, int32_t, float>;
+
+#define EXPAND_FORMATS(src, weights, bias, dst) \
+    { mkldnn::memory::format::src, mkldnn::memory::format::weights, \
+    mkldnn::memory::format::bias, mkldnn::memory::format::dst }
+
+#define CONCAT_WITH_UNDERSCORE_(a,b) a ## _ ## b
+#define CONCAT_WITH_UNDERSCORE(a,b) CONCAT_WITH_UNDERSCORE_(a,b)
+
+#define INST_TEST_CASE_(str, test, ...) INSTANTIATE_TEST_CASE_P( \
+        str, test, ::testing::Values(__VA_ARGS__))
+
+#define INST_TEST_CASE(str, test, ...) INST_TEST_CASE_( \
+        CONCAT_WITH_UNDERSCORE(CONCAT_WITH_UNDERSCORE(Convolution, \
+        str), eltwise), test,  __VA_ARGS__)
+
+#define EXPAND_ARGS(args) args
+
+#define PARAMS(...) \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_elu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_tanh, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_square, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_abs, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_sqrt, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_linear, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_bounded_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_soft_relu, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_logistic, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_clamp, __VA_ARGS__)), \
+    EXPAND_ARGS(PARAMS_CONV(eltwise_exp, __VA_ARGS__))
+
+#define ELTWISE_ALPHA 0.5f
+#define ELTWISE_BETA 0.f
+
+#define PARAMS_CONV(alg, src, weights, bias, dst, ...) \
+    test_convolution_eltwise_params_t {alg,  mkldnn::engine::kind::cpu, \
+        mkldnn::convolution_direct, ELTWISE_ALPHA, ELTWISE_BETA, \
+    EXPAND_FORMATS(src, weights, bias, dst), /* empty attributes */ {}, \
+    {__VA_ARGS__} }
+
+#define INST_TEST_CASE_P_UNSIGNED(test) \
+TEST_P(test, TestConvolutionEltwise) {} \
+INST_TEST_CASE(SimpleSmall_Blocked16, test, \
+PARAMS(nhwc, OIhw4i16o4i, x, nhwc, 2, 1, 32, 13, 13, 32, 13, 13, 1, 1, 0, 0, 1, 1), \
+PARAMS(nhwc, Goihw16g, x, nhwc, 2, 32, 32, 13, 13, 32, 13, 13, 3, 3, 1, 1, 1, 1), \
+PARAMS(nhwc, OIhw4i16o4i, x, nhwc, 2, 1, 32, 13, 13, 32, 13, 13, 3, 3, 1, 1, 1, 1) \
+);\
+\
+INST_TEST_CASE(SimpleSmall_Blocked8, test, \
+PARAMS(nhwc, OhIw8o4i, x, nhwc, 2, 1, 32, 13, 13, 32, 13, 13, 1, 1, 0, 0, 1, 1), \
+PARAMS(nhwc, Goihw8g, x, nhwc, 2, 32, 32, 13, 13, 32, 13, 13, 3, 3, 1, 1, 1, 1), \
+PARAMS(nhwc, OhIw8o4i, x, nhwc, 2, 1, 32, 13, 13, 32, 13, 13, 3, 3, 1, 1, 1, 1) \
+);\
+\
+INST_TEST_CASE(SimpleSmall_Blocked8_Tail, test, \
+PARAMS(nhwc, OhIw8o4i, x, nhwc, 2, 1, 47, 20, 20, 47, 20, 20, 1, 1, 0, 0, 1, 1), \
+PARAMS(nhwc, Goihw8g, x, nhwc, 2, 47, 47, 20, 20, 47, 20, 20, 3, 3, 1, 1, 1, 1), \
+PARAMS(nhwc, OhIw8o4i, x, nhwc, 2, 1, 47, 20, 20, 47, 20, 20, 3, 3, 1, 1, 1, 1) \
+);
+
+#define INST_TEST_CASE_P_SIGNED(test) \
+TEST_P(test, TestConvolutionEltwise) {} \
+INST_TEST_CASE(SimpleSmall_Blocked16, test, \
+PARAMS(nhwc, OIhw4i16o4i_s8s8, x, nhwc, 2, 1, 32, 13, 13, 32, 12, 12, 3, 3, 0, 0, 1, 1), \
+PARAMS(nhwc, Goihw16g_s8s8, x, nhwc, 2, 32, 32, 13, 13, 32, 13, 13, 1, 1, 0, 0, 1, 1), \
+PARAMS(nhwc, OIhw4i16o4i_s8s8, x, nhwc, 2, 1, 32, 13, 13, 32, 13, 13, 3, 3, 1, 1, 1, 1) \
+);\
+\
+INST_TEST_CASE(SimpleSmall_Blocked8, test, \
+PARAMS(nhwc, OhIw8o4i_s8s8, x, nhwc, 2, 1, 32, 13, 13, 32, 13, 13, 1, 1, 0, 0, 1, 1), \
+PARAMS(nhwc, OhIw8o4i_s8s8, x, nhwc, 2, 1, 32, 13, 13, 32, 13, 13, 3, 3, 1, 1, 1, 1) \
+);\
+\
+INST_TEST_CASE(SimpleSmall_Blocked8_Tail, test, \
+PARAMS(nhwc, OhIw8o4i_s8s8, x, nhwc, 2, 1, 47, 20, 20, 47, 20, 20, 1, 1, 0, 0, 1, 1), \
+PARAMS(nhwc, OhIw8o4i_s8s8, x, nhwc, 2, 1, 47, 20, 20, 47, 20, 20, 3, 3, 1, 1, 1, 1) \
+);
+
+//INST_TEST_CASE_P_SIGNED(convolution_test_s8s8s32f32);
+INST_TEST_CASE_P_UNSIGNED(convolution_test_u8s8s32f32);
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_common.hpp
index e3f2ac5c3..b87f3543e 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_common.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_common.hpp
@@ -179,7 +179,7 @@ protected:
         check_zero_tail<data_t_wei>(1, c_weights.get());
         check_zero_tail<data_t_dst>(1, c_dst.get());
 
-        std::vector<int> padR = {
+        std::vector<ptrdiff_t> padR = {
             right_padding(cd.ih, cd.oh, cd.kh, cd.padh, cd.strh, cd.dilh),
             right_padding(cd.iw, cd.ow, cd.kw, cd.padw, cd.strw, cd.dilw)
         };
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_common_3d.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_common_3d.hpp
index 8291cddc2..9c4691f40 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_common_3d.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_common_3d.hpp
@@ -197,7 +197,7 @@ protected:
         check_zero_tail<data_t_wei>(1, c_weights.get());
         check_zero_tail<data_t_dst>(1, c_dst.get());
 
-        std::vector<int> padR = {
+        std::vector<ptrdiff_t> padR = {
             right_padding(cd.id, cd.od, cd.kd, cd.padd, cd.strd, cd.dild),
             right_padding(cd.ih, cd.oh, cd.kh, cd.padh, cd.strh, cd.dilh),
             right_padding(cd.iw, cd.ow, cd.kw, cd.padw, cd.strw, cd.dilw)
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_f32_3d.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_f32_3d.cpp
index 632a55786..f76a0b7c0 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_f32_3d.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_f32_3d.cpp
@@ -82,4 +82,34 @@ INST_TEST_CASE_3D(SimpleSmall_Blocked16,
         2, 1, 32, 13, 13, 13, 48, 11, 11, 11, 3, 3, 3, 0, 0, 0, 1, 1, 1)
 );
 
+INST_TEST_CASE_3D(SimpleSmall_NCDHW_PLANAR,
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 1, 79, 79, 79, 1, 77, 77, 79, 5, 5, 5, 1, 1, 2, 1, 1, 1, 0, 0, 0),
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 1, 79, 79, 79, 1, 75, 79, 75, 5, 5, 5, 2, 0, 2, 1, 1, 1, 0, 0, 0),
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 9, 68, 68, 68, 1, 50, 50, 50, 5, 5, 5, 18, 18, 18, 1, 1, 1, 8, 8, 8),
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 1, 75, 63, 91, 1, 73, 61, 91, 5, 5, 5, 1, 1, 2, 1, 1, 1, 0, 0, 0),
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 1, 58, 41, 37, 1, 58, 37, 37, 5, 5, 5, 2, 0, 2, 1, 1, 1, 0, 0, 0),
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 9, 68, 34, 48, 1, 50, 16, 30, 5, 5, 5, 18, 18, 18, 1, 1, 1, 8, 8, 8)
+);
+
+INST_TEST_CASE_3D(SimpleSmall_NCDHW_MSD,
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 1, 79, 79, 79, 1, 79, 79, 79, 5, 5, 5, 2, 2, 2, 1, 1, 1, 0, 0, 0),
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 2, 77, 77, 77, 1, 77, 77, 77, 5, 5, 5, 4, 4, 4, 1, 1, 1, 1, 1, 1),
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 3, 50, 50, 50, 1, 50, 50, 50, 5, 5, 5, 6, 6, 6, 1, 1, 1, 2, 2, 2),
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 8, 30, 30, 30, 1, 30, 30, 30, 5, 5, 5, 16, 16, 16, 1, 1, 1, 7, 7, 7),
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 40, 15, 15, 15, 1, 15, 15, 15, 5, 5, 5, 20, 20, 20, 1, 1, 1, 9, 9, 9),
+    PARAMS_3D(ncdhw, oidhw, FMT_BIAS, ncdhw,
+        2, 1, 41, 111, 111, 111, 1, 111, 111, 111, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0)
+);
+
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_u8s8fp.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_u8s8fp.cpp
index 7a1618f84..785c96ec8 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_u8s8fp.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_u8s8fp.cpp
@@ -28,7 +28,6 @@ TEST_P(convolution_test, TestConvolution)
 {
 }
 
-//#define TEST_PARAM_ATTR
 #define U8S8
 #define DIRECTION_FORWARD
 #include "convolution_common.h"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_u8s8s32.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_u8s8s32.cpp
index bd04f9421..6d1d6f74d 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_u8s8s32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_u8s8s32.cpp
@@ -28,7 +28,6 @@ TEST_P(convolution_test, TestConvolution)
 {
 }
 
-//#define TEST_PARAM_ATTR
 #define U8S8
 #define DIRECTION_FORWARD
 #include "convolution_common.h"
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_relu_forward_neg_slope_f32.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_u8s8u8.cpp
index 1c57c300d..36c12db8a 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_relu_forward_neg_slope_f32.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_forward_u8s8u8.cpp
@@ -18,18 +18,19 @@
 #include "gtest/gtest.h"
 
 #include "mkldnn.hpp"
-#include "test_convolution_relu_forward_common.hpp"
+#include "test_convolution_forward_common.hpp"
 namespace mkldnn {
 
-using convolution_test = convolution_relu_test<float, float, float, float>;
+using convolution_test = convolution_forward_test<uint8_t, int8_t,
+                                                int32_t, int32_t>;
 
 TEST_P(convolution_test, TestConvolution)
 {
 }
 
-#define FP32
+#define U8S8
 #define DIRECTION_FORWARD
-#define NEGATIVE_SLOPE 0.2f
 #include "convolution_common.h"
+#undef TEST_PARAM_ATTR
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_relu_forward_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_relu_forward_common.hpp
deleted file mode 100644
index c5c1ab16f..000000000
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_convolution_relu_forward_common.hpp
+++ /dev/null
@@ -1,201 +0,0 @@
-/*******************************************************************************
-* Copyright 2016-2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#include "mkldnn_test_common.hpp"
-#include "gtest/gtest.h"
-
-#include "mkldnn.hpp"
-
-namespace mkldnn {
-
-template <typename data_t_src, typename data_t_wei,
-          typename data_t_acc, typename data_t_dst>
-void compute_ref_conv_relu_fwd(const test_convolution_sizes_t &c,
-        const memory &src, const memory &weights, const memory &bias,
-        const memory &dst, bool w_bias, float negative_slope)
-{
-    data_t_src *src_data = (data_t_src *)src.get_data_handle();
-    data_t_wei *weights_data = (data_t_wei *)weights.get_data_handle();
-    data_t_dst *bias_data
-            = (data_t_dst *)(w_bias ? bias.get_data_handle() : nullptr);
-    data_t_dst *dst_data = (data_t_dst *)dst.get_data_handle();
-
-    const memory::desc src_d = src.get_primitive_desc().desc();
-    const memory::desc weights_d = weights.get_primitive_desc().desc();
-    const memory::desc dst_d = dst.get_primitive_desc().desc();
-
-    size_t padded_ic = src_d.data.layout_desc.blocking.padding_dims[1];
-    size_t padded_oc = dst_d.data.layout_desc.blocking.padding_dims[1];
-
-    mkldnn::impl::parallel_nd(c.mb, c.ng, c.oc / c.ng, c.oh, c.ow,
-        [&](int n, int g, int oc, int oh, int ow) {
-            size_t oidx = n * padded_oc * c.oh * c.ow
-                    + g * padded_oc / c.ng * c.oh * c.ow
-                    + oc * c.oh * c.ow + oh * c.ow + ow;
-            dst_data[map_index(dst_d, oidx)] = bias_data ?
-                    bias_data[map_index(
-                            bias.get_primitive_desc().desc(),
-                            g * padded_oc / c.ng + oc)] :
-                    data_t_dst{0};
-            for (int ic = 0; ic < c.ic / c.ng; ic++) {
-                for (int kh = 0; kh < c.kh; kh++) {
-                    for (int kw = 0; kw < c.kw; kw++) {
-                        int iw = ow * c.strw
-                              - c.padw + kw * (1 + c.dilw);
-                        int ih = oh * c.strh
-                              - c.padh + kh * (1 + c.dilh);
-                        if (iw < 0 || iw >= c.iw) continue;
-                        if (ih < 0 || ih >= c.ih) continue;
-                        size_t iidx = n * padded_ic * c.ih * c.iw
-                                + g * padded_ic / c.ng * c.ih * c.iw
-                                + ic * c.ih * c.iw + ih * c.iw + iw;
-                        size_t widx = g * padded_oc / c.ng * padded_ic
-                                        / c.ng * c.kh * c.kw
-                            + oc * padded_ic / c.ng * c.kh * c.kw
-                            + ic * c.kh * c.kw + kh * c.kw + kw;
-
-                        dst_data[map_index(dst_d, oidx)]
-                                += src_data[map_index(src_d, iidx)]
-                                * weights_data[map_index(
-                                          weights_d, widx)];
-                    }
-                }
-            }
-
-            if (dst_data[map_index(dst_d, oidx)] < 0) {
-                dst_data[map_index(dst_d, oidx)] =
-                    static_cast<data_t_dst>( negative_slope
-                    * dst_data[map_index(dst_d, oidx)] );
-            }
-        }
-    );
-}
-
-template <typename data_t_src, typename data_t_wei,
-          typename data_t_acc, typename data_t_dst>
-class convolution_relu_test
-    : public ::testing::TestWithParam<test_convolution_params_t> {
-protected:
-    virtual void SetUp() {
-        auto p = ::testing::TestWithParam<test_convolution_params_t>::GetParam();
-        catch_expected_failures([=](){Test();}, p.expect_to_fail,
-                    p.expected_status);
-    }
-
-    void Test() {
-        auto p = ::testing::TestWithParam<test_convolution_params_t>::GetParam();
-        ASSERT_TRUE(p.engine_kind == engine::kind::cpu);
-        ASSERT_EQ(p.aalgorithm, convolution_direct);
-        auto eng = engine(p.engine_kind, 0);
-        float negative_slope = p.relu_negative_slope;
-
-        memory::data_type data_type_src = data_traits<data_t_src>::data_type;
-        memory::data_type data_type_dst = data_traits<data_t_dst>::data_type;
-        memory::data_type data_type_wei = data_traits<data_t_wei>::data_type;
-
-        test_convolution_sizes_t cd = p.sizes;
-
-        auto c_src_desc = create_md({ cd.mb, cd.ic, cd.ih, cd.iw },
-                data_type_src, p.formats.src_format);
-        auto c_weights_desc = cd.ng > 1 ?
-                create_md({ cd.ng, cd.oc / cd.ng, cd.ic / cd.ng, cd.kh, cd.kw },
-                        data_type_wei, p.formats.weights_format) :
-                create_md({ cd.oc, cd.ic, cd.kh, cd.kw },
-                        data_type_wei, p.formats.weights_format);
-        auto c_dst_desc = create_md({ cd.mb, cd.oc, cd.oh, cd.ow },
-                data_type_dst, p.formats.dst_format);
-
-        auto c_src = memory({c_src_desc, eng});
-        auto c_weights = memory({c_weights_desc, eng});
-        auto c_dst = memory({c_dst_desc, eng});
-
-        auto dst_ref = memory({c_dst_desc, eng});
-
-        fill_data<data_t_src>(c_src.get_primitive_desc().get_size()
-                / sizeof(data_t_src), (data_t_src *)c_src.get_data_handle());
-        // TODO: Temporary workaround for testing of convolution + relu
-        if (cd.mb) {
-            data_t_src *src_data = (data_t_src *)c_src.get_data_handle();
-            const int mb_chunk = static_cast<int>(
-                (c_src.get_primitive_desc().get_size() / sizeof(data_t_src))
-                / cd.mb );
-            for (int i = 0; i < cd.mb * mb_chunk; ++i) {
-                if ((i / mb_chunk) % 2) src_data[i] *= (data_t_src)-1.;
-            }
-        }
-
-        fill_data<data_t_wei>(
-                c_weights.get_primitive_desc().get_size()
-                / sizeof(data_t_wei),(data_t_wei *)c_weights.get_data_handle());
-        fill_data<data_t_dst>(
-                c_dst.get_primitive_desc().get_size()
-                / sizeof(data_t_dst),(data_t_dst *)c_dst.get_data_handle());
-
-        bool with_bias = p.formats.bias_format != memory::format::format_undef;
-        auto c_bias_desc = with_bias ?
-                create_md({ cd.oc }, data_type_dst, p.formats.bias_format) :
-                create_md({}, data_type_dst, p.formats.bias_format);
-        auto c_bias = memory({c_bias_desc, eng});
-        if (with_bias) {
-            fill_data<data_t_dst>(
-                    c_bias.get_primitive_desc().get_size() / sizeof(data_t_dst),
-                    (data_t_dst *)c_bias.get_data_handle(), 1., true);
-        }
-        check_zero_tail<data_t_src>(1, c_src);
-        check_zero_tail<data_t_wei>(1, c_weights);
-        check_zero_tail<data_t_dst>(1, c_dst);
-
-        std::vector<int> padR = {
-            right_padding(cd.ih, cd.oh, cd.kh, cd.padh, cd.strh, cd.dilh),
-            right_padding(cd.iw, cd.ow, cd.kw, cd.padw, cd.strw, cd.dilw)
-        };
-
-        auto conv_desc = with_bias
-            ? convolution_forward::desc(prop_kind::forward_scoring,
-                    p.aalgorithm, c_src_desc, c_weights_desc, c_bias_desc,
-                    c_dst_desc, { cd.strh, cd.strw }, { cd.dilh, cd.dilw },
-                    { cd.padh, cd.padw }, padR, padding_kind::zero)
-        : convolution_forward::desc(prop_kind::forward_scoring,
-                p.aalgorithm, c_src_desc, c_weights_desc, c_dst_desc,
-                { cd.strh, cd.strw }, { cd.dilh, cd.dilw },
-                { cd.padh, cd.padw }, padR, padding_kind::zero);
-
-        auto conv_relu_desc =
-            convolution_relu_forward::desc(conv_desc, negative_slope);
-        auto conv_primitive_desc =
-            convolution_relu_forward::primitive_desc(conv_relu_desc, eng);
-
-        auto conv = with_bias
-            ? convolution_relu_forward(conv_primitive_desc,
-                    c_src, c_weights, c_bias, c_dst)
-            : convolution_relu_forward(conv_primitive_desc,
-                    c_src, c_weights, c_dst);
-        std::vector<primitive> pipeline;
-        pipeline.push_back(conv);
-
-        stream(stream::kind::lazy).submit(pipeline).wait();
-
-        compute_ref_conv_relu_fwd<data_t_src, data_t_wei, data_t_wei,
-            data_t_dst>(cd, c_src, c_weights, c_bias, dst_ref, with_bias,
-                    negative_slope);
-        check_zero_tail<data_t_dst>(1, dst_ref);
-        compare_data<data_t_dst>(dst_ref, c_dst);
-        check_zero_tail<data_t_dst>(0, c_dst);
-
-    }
-};
-
-}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_deconvolution.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_deconvolution.cpp
index 1c2bac88b..71e0675a1 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_deconvolution.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_deconvolution.cpp
@@ -120,7 +120,7 @@ private:
 
    std::shared_ptr<engine> eng;
    bool with_bias;
-   std::vector<int> padR;
+   std::vector<ptrdiff_t> padR;
 protected:
     virtual void SetUp() {
         auto p = ::testing::TestWithParam<deconvolution_test_params>::GetParam();
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_depthwise.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_depthwise.cpp
index 13253980b..932ec73ab 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_depthwise.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_depthwise.cpp
@@ -95,7 +95,8 @@ private:
     std::shared_ptr<memory> bias;
     std::shared_ptr<memory> dst;
     std::shared_ptr<memory> workspace;
-    std::shared_ptr<memory::desc> data_desc;
+    std::shared_ptr<memory::desc> src_desc;
+    std::shared_ptr<memory::desc> dst_desc;
     std::shared_ptr<memory::desc> weights_desc;
     std::shared_ptr<memory::desc> bias_desc;
     std::shared_ptr<depthwise_forward::primitive_desc> depthwise_prim_desc;
@@ -126,9 +127,10 @@ protected:
 
         memory::dims dims = p.data_format == mkldnn_nc ? memory::dims({p.dims[0], p.dims[1]}) : p.dims;
 
-        data_desc.reset(new memory::desc(dims, data_type, p.data_format));
-        src.reset(new memory({*data_desc, *eng}));
-        dst.reset(new memory({*data_desc, *eng}));
+        src_desc.reset(new memory::desc(dims, data_type, p.data_format));
+        dst_desc.reset(new memory::desc(dims, data_type, p.data_format));
+        src.reset(new memory({*src_desc, *eng}));
+        dst.reset(new memory({*dst_desc, *eng}));
         fill_data<data_t>(data_size, (data_t *)src->get_data_handle(),
                           data_t(0), data_t(1));
 
@@ -146,8 +148,8 @@ protected:
 
         std::vector<primitive> pipeline;
         auto depthwise_desc = with_bias
-                              ? depthwise_forward::desc(prop_kind::forward_training, p.alg_kind, *data_desc, *data_desc, *weights_desc, *bias_desc)
-                              : depthwise_forward::desc(prop_kind::forward_training, p.alg_kind, *data_desc, *data_desc, *weights_desc);
+                              ? depthwise_forward::desc(prop_kind::forward_training, p.alg_kind, *src_desc, *dst_desc, *weights_desc, *bias_desc)
+                              : depthwise_forward::desc(prop_kind::forward_training, p.alg_kind, *src_desc, *dst_desc, *weights_desc);
         depthwise_prim_desc.reset(new depthwise_forward::primitive_desc(depthwise_desc, *eng));
 
         auto depthwise = with_bias
@@ -158,7 +160,7 @@ protected:
         auto s = stream(stream::kind::lazy);
         s.submit(pipeline).wait();
 
-        check_depthwise_fwd(p, *data_desc, *src, *weights, *bias, with_bias, *dst);
+        check_depthwise_fwd(p, *src_desc, *src, *weights, *bias, with_bias, *dst);
     }
 };
 
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_eltwise.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_eltwise.cpp
index e75e37720..b1e13811f 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_eltwise.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_eltwise.cpp
@@ -16,116 +16,12 @@
 
 #include "gtest/gtest.h"
 #include "mkldnn_test_common.hpp"
-
+#include "math_utils.hpp"
 #include "mkldnn.hpp"
 
-namespace mkldnn {
-
-template <typename T, typename A> inline T relu_fwd(T s, A alpha) {
-    return s > 0 ? s : static_cast<T>(s * alpha);
-}
-template <typename T, typename A> inline T relu_bwd(T dd, T s, A alpha) {
-    return s > 0 ? dd : static_cast<T>(dd * alpha);
-}
-template <typename T> T tanh_fwd(T s) {
-    return static_cast<T>(::tanhf((float)s));
-}
-template <typename T> T tanh_bwd(T dd, T s) {
-    const float th = ::tanhf((float)s);
-    return static_cast<T>(dd * (1 - th) * (1 + th));
-}
-
-template <typename T, typename A> T elu_fwd(T s, A alpha) {
-    return s > 0 ? s : static_cast<T>(alpha * (::expf(s) - 1));
-}
-template <typename T, typename A> T elu_bwd(T dd, T s, A alpha) {
-    return static_cast<T>(dd * (s > 0 ? 1 : alpha * ::expf(s)));
-}
-
-template <typename T>
-T square_fwd(T s) {
-    return s * s;
-}
+using namespace mkldnn::impl::math;
 
-template <typename T>
-T square_bwd(T dd, T s) {
-    return dd * 2*s;
-}
-
-template <typename T>
-T abs_fwd(T s) {
-    return s > 0 ? s : -s;;
-}
-
-template <typename T>
-T abs_bwd(T dd, T s) {
-    return dd * (s > 0 ? 1 : s < 0 ? -1 : 0);
-}
-
-template <typename T>
-T sqrt_fwd(T s) {
-    return s > 0 ? ::sqrtf(s) : 0;
-}
-
-template <typename T>
-T sqrt_bwd(T dd, T s) {
-    return s > 0 ? dd / (2 * ::sqrtf(s)) : 0;
-}
-
-template <typename T, typename A>
-T linear_fwd(T s, A alpha, A beta) {
-    return alpha * s + beta;
-}
-
-template <typename T, typename A>
-T linear_bwd(T dd, T s, A alpha, A beta) {
-    (void) s;
-    (void) beta;
-    return dd * alpha;
-}
-
-template <typename T, typename A>
-T bounded_relu_fwd(T s, A alpha) {
-    s = s > 0 ? s : 0;
-    return s > alpha ? alpha : s;
-}
-
-template <typename T, typename A>
-T bounded_relu_bwd(T dd, T s, A alpha) {
-    return dd * ((0 < s && s < alpha) ? 1 : 0);
-}
-
-template <typename T>
-T soft_relu_fwd(T s) {
-    return s < (T)logf(FLT_MAX) ? log1pf(::expf(s)) : s;
-}
-
-template <typename T>
-T soft_relu_bwd(T dd, T s) {
-    return dd / (1 + ::expf(-s));
-}
-
-template <typename T>
-T logistic_fwd(T s) {
-    T v = (T)(::expf(- (float)s));
-    return 1 / (1 + v);
-}
-
-template <typename T>
-T logistic_bwd(T dd, T s) {
-    T v = logistic_fwd<T>(s);
-    return dd * v * (1 - v);
-}
-
-template <typename T, typename A>
-T clamp_fwd(T s, A alpha, A beta) {
-    return s > alpha ? (T)(alpha) : s < beta ? (T)(beta) : s;
-}
-
-template <typename T, typename A>
-T clamp_bwd(T dd, T s, A alpha, A beta) {
-    return dd * ((beta < s && s < alpha) ? 1 : 0);
-}
+namespace mkldnn {
 
 template <typename data_t>
 struct eltwise_test_params {
@@ -141,7 +37,7 @@ struct eltwise_test_params {
 
 size_t n_elems(const memory::desc &md) {
     size_t p = 1;
-    const int *pdims = md.data.layout_desc.blocking.padding_dims;
+    const ptrdiff_t *pdims = md.data.layout_desc.blocking.padding_dims;
     for (int i = 0; i < md.data.ndims; ++i)
         p *= (size_t)(pdims[i]);
     return p;
@@ -172,6 +68,8 @@ void check_eltwise_fwd(const eltwise_test_params<data_t> &p,
         case eltwise_soft_relu:   ref_d = soft_relu_fwd(s);               break;
         case eltwise_logistic:    ref_d = logistic_fwd(s);                break;
         case eltwise_clamp:       ref_d = clamp_fwd(s, p.alpha, p.beta);  break;
+        case eltwise_exp:         ref_d = exp_fwd(s);                     break;
+        case eltwise_not:         ref_d = not_fwd(s);                     break;
         default: assert(!"unknown alg_kind");
         }
         dst_data[i] = ref_d;
@@ -236,6 +134,7 @@ void check_eltwise_bwd(const eltwise_test_params<data_t> &p,
             break;
         case eltwise_logistic: ref_ds = logistic_bwd(ref_dd, ref_s); break;
         case eltwise_clamp: ref_ds = clamp_bwd(ref_dd, ref_s, p.alpha, p.beta); break;
+        case eltwise_exp: ref_ds = exp_bwd(ref_dd, ref_s); break;
         default: assert(!"unknown alg_kind");
         }
         EXPECT_NEAR(diff_src_data[map_index(diff_data_d, i)], ref_ds, 1.e-6);
@@ -289,7 +188,7 @@ protected:
 
         data_t data_median = data_t(0);
         data_t data_deviation
-                = p.alg_kind == eltwise_elu ? data_t(1) : data_t(200);
+                = p.alg_kind == eltwise_elu || p.alg_kind == eltwise_exp ? data_t(1) : data_t(200);
         fill_data<data_t>(n_elems(*data_desc), (data_t *)src->get_data_handle(),
                 data_median, data_deviation);
         check_zero_tail<data_t>(1, *src);
@@ -366,13 +265,16 @@ TEST_P(eltwise_test_float, TestsEltwise)
     EXPAND(PARAMS(eltwise_square, __VA_ARGS__)), \
     EXPAND(PARAMS(eltwise_abs, __VA_ARGS__))
 
+
 #define PARAMS_ALL_ALG_SDPART(...) \
     EXPAND(PARAMS(eltwise_sqrt, __VA_ARGS__)), \
     EXPAND(PARAMS(eltwise_linear, __VA_ARGS__)), \
     EXPAND(PARAMS(eltwise_soft_relu, __VA_ARGS__)), \
     EXPAND(PARAMS(eltwise_bounded_relu, __VA_ARGS__)), \
     EXPAND(PARAMS(eltwise_logistic, __VA_ARGS__)), \
-    EXPAND(PARAMS(eltwise_clamp, __VA_ARGS__))
+    EXPAND(PARAMS(eltwise_clamp, __VA_ARGS__)), \
+    EXPAND(PARAMS(eltwise_exp, __VA_ARGS__))
+
 
 #define INST_TEST_CASE(str, ...) INSTANTIATE_TEST_CASE_P( \
         str, eltwise_test_float, ::testing::Values(__VA_ARGS__))
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_gemm_common.hpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_gemm_common.hpp
index fa8e68367..f468d3e63 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_gemm_common.hpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_gemm_common.hpp
@@ -33,8 +33,14 @@
 
 namespace mkldnn {
 
-struct test_params {
+struct test_igemm_params {
     char offsetc;
+    bool zero_oa;
+    bool zero_ob;
+    bool zero_oc;
+};
+
+struct test_params {
     char transA;
     char transB;
     int M;
@@ -46,6 +52,7 @@ struct test_params {
     int ldb;
     int ldc;
 
+    test_igemm_params igemm_params;
     bool expect_to_fail;
     mkldnn_status_t expected_status;
 };
@@ -77,9 +84,9 @@ void ref_gemm(const char *transa, const char *transb, int m, int n, int k,
 template <typename b_dt>
 void ref_gemm_s8x8s32(const char *transa, const char *transb,
         const char *offsetc, int m, int n, int k, const float alpha,
-        int8_t *A, int lda, const int8_t *ao, b_dt *B, int ldb,
-        const int8_t *bo, const float beta, int32_t *C, int ldc,
-        const int32_t *co) {
+        int8_t *A, int lda, const int8_t *oa, b_dt *B, int ldb,
+        const int8_t *ob, const float beta, int32_t *C, int ldc,
+        const int32_t *oc) {
 
     bool OCisR = (*offsetc == 'R' || *offsetc == 'r');
     bool OCisC = (*offsetc == 'C' || *offsetc == 'c');
@@ -104,14 +111,14 @@ void ref_gemm_s8x8s32(const char *transa, const char *transb,
     const int a_cols = AisN ? k : m;
     mkldnn::impl::parallel_nd(a_cols, a_rows, [&](int j, int i) {
         da_setter(i, j,
-            static_cast<double>(ia_accessor(i, j)) + static_cast<double>(ao[0]));
+            static_cast<double>(ia_accessor(i, j)) + static_cast<double>(oa[0]));
     });
 
     const int b_rows = BisN ? k : n;
     const int b_cols = BisN ? n : k;
     mkldnn::impl::parallel_nd(b_cols, b_rows, [&](int j, int i) {
         db_setter(i, j,
-            static_cast<double>(ib_accessor(i, j)) + static_cast<double>(bo[0]));
+            static_cast<double>(ib_accessor(i, j)) + static_cast<double>(ob[0]));
     });
 
     ref_gemm(transa, transb, m, n, k, 1.0, dA, lda, dB, ldb, 0.0, dC, ldc);
@@ -120,7 +127,7 @@ void ref_gemm_s8x8s32(const char *transa, const char *transb,
     auto f2d = [=] (float v) { return static_cast<double>(v); };
 
     mkldnn::impl::parallel_nd(n, m, [&] (int j, int i) {
-        double coffset = OCisR ? i2d(co[j]) : OCisC ? i2d(co[i]) : i2d(co[0]);
+        double coffset = OCisR ? i2d(oc[j]) : OCisC ? i2d(oc[i]) : i2d(oc[0]);
         double val = ((beta == 0.0f) ? 0.0 : f2d(beta) * i2d(C[i + j * ldc]))
             + f2d(alpha) * dC[i + j * ldc] + coffset;
         C[i + j * ldc] =
@@ -132,20 +139,31 @@ void ref_gemm_s8x8s32(const char *transa, const char *transb,
     test_free((char *)dC);
 }
 
-template <typename T>
-void compare(int M, int N, int ldc, T *C, T *C_ref, int K = 1) {
-    mkldnn::impl::parallel_nd(N, ldc, [&](int i, int j) {
-        T ref = C_ref[i*ldc + j];
-        T got = C[i*ldc + j];
-        T diff = got - ref;
-        if (data_traits<T>::data_type == memory::data_type::f32) {
-            T e = (std::abs(ref) > 1e-4) ? diff / ref : diff;
-            EXPECT_NEAR(e, 0.0, 1e-4)
-                << "Row: " << j << " Column: " << i;
+template <typename b_dt, typename c_dt>
+void compare(int m, int n, const c_dt *c, const c_dt *c_ref, int ldc,
+        float alpha = 1.0f, float beta = 0.0f, int k = 1) {
+    using data_type = memory::data_type;
+    mkldnn::impl::parallel_nd(n, ldc, [&](int i, int j) {
+        c_dt ref = c_ref[i*ldc + j];
+        c_dt got = c[i*ldc + j];
+        c_dt diff = got - ref;
+
+        if (data_traits<b_dt>::data_type == data_type::f32) {
+            c_dt e = (std::abs(ref) > 1e-4) ? diff / ref : diff;
+            EXPECT_NEAR(e, 0.0, 1e-4) << "Row: " << j << " Col: " << i;
         } else {
-            T eps = K / 1000 + 1;
-            EXPECT_NEAR(diff, 0, eps)
-                << "Row: " << j << " Column: " << i;
+            // igemm
+            if (alpha == 1.0f) {
+                EXPECT_NEAR(diff, 0, 1) << "Row: " << j << " Col: " << i;
+            } else {
+                if (data_traits<b_dt>::data_type == data_type::u8) {
+                    c_dt eps = k / 1000 + 1;
+                    EXPECT_NEAR(diff, 0, eps) << "Row: " << j << " Col: " << i;
+                } else if (data_traits<b_dt>::data_type == data_type::s8) {
+                    c_dt eps = k / 500 + 1;
+                    EXPECT_NEAR(diff, 0, eps) << "Row: " << j << " Col: " << i;
+                }
+            }
         }
     });
 }
@@ -165,15 +183,23 @@ inline T* get_matrix_buffer(size_t n) {
 }
 
 template <typename a_dt, typename b_dt, typename c_dt>
-inline void fill_matrix(size_t sizeA, size_t sizeB, size_t sizeC, size_t sizeco,
-        a_dt *A, b_dt *B, c_dt *C, a_dt *ao, a_dt *bo, c_dt *co) {
+inline void fill_matrix(const test_params &p, size_t sizeA, size_t sizeB,
+        size_t sizeC, size_t sizeco, a_dt *A, b_dt *B, c_dt *C, a_dt *oa,
+        a_dt *ob, c_dt *oc) {
     fill_data<a_dt>(sizeA, A);
     fill_data<b_dt>(sizeB, B);
     fill_data<c_dt>(sizeC, C);
-    if (ao != nullptr && bo != nullptr && co != nullptr) {
-        fill_data<a_dt>(1, ao);
-        fill_data<a_dt>(1, bo);
-        fill_data<c_dt>(sizeco, co);
+    if (oa != nullptr && ob != nullptr && oc != nullptr) {
+        if (p.igemm_params.zero_oa) (*oa) = 0;
+        else fill_data<a_dt>(1, oa);
+
+        if (p.igemm_params.zero_ob) (*ob) = 0;
+        else fill_data<a_dt>(1, ob);
+
+        if (p.igemm_params.zero_oc) {
+            for (size_t i = 0; i < sizeco; i++)
+                oc[i] = 0;
+        } else fill_data<c_dt>(sizeco, oc);
     }
 }
 
@@ -190,37 +216,37 @@ void run_test_gemm<int8_t, uint8_t, int32_t>(const test_params &p) {
     int32_t *C = get_matrix_buffer<int32_t>(sizeC);
     int32_t *C_ref = get_matrix_buffer<int32_t>(sizeC);
 
-    bool OCisR = (p.offsetc == 'R' || p.offsetc == 'r');
-    bool OCisC = (p.offsetc == 'C' || p.offsetc == 'c');
+    bool OCisR = (p.igemm_params.offsetc == 'R' || p.igemm_params.offsetc == 'r');
+    bool OCisC = (p.igemm_params.offsetc == 'C' || p.igemm_params.offsetc == 'c');
     size_t sizeco = OCisR ? p.N : OCisC ? p.M : 1;
 
-    int8_t ao, bo;
-    int32_t *co = get_matrix_buffer<int32_t>(sizeco);
+    int8_t oa, ob;
+    int32_t *oc = get_matrix_buffer<int32_t>(sizeco);
 
-    fill_matrix<int8_t, uint8_t, int32_t>(sizeA, sizeB, sizeC, sizeco, A, B, C,
-        &ao, &bo, co);
+    fill_matrix<int8_t, uint8_t, int32_t>(p, sizeA, sizeB, sizeC, sizeco,
+        A, B, C, &oa, &ob, oc);
 
     mkldnn::impl::parallel_nd(p.ldc * p.N,
         [&](int i) { C_ref[i] = static_cast<int32_t>(C[i]); });
 
-    auto status = mkldnn_gemm_s8u8s32(&p.transA, &p.transB, &p.offsetc,
-        &p.M, &p.N, &p.K, &p.alpha, A, &p.lda, &ao, B, &p.ldb, &bo,
-        &p.beta, C, &p.ldc, co);
+    auto status = mkldnn_gemm_s8u8s32(&p.transA, &p.transB, &p.igemm_params.offsetc,
+        &p.M, &p.N, &p.K, &p.alpha, A, &p.lda, &oa, B, &p.ldb, &ob,
+        &p.beta, C, &p.ldc, oc);
 
     if (status != mkldnn_success)
         throw error(status, "mkldnn_gemm_s8u8s32 returned error");
 
-    ref_gemm_s8x8s32<uint8_t>(&p.transA, &p.transB, &p.offsetc, p.M, p.N,
-        p.K, p.alpha, A, p.lda, &ao, B, p.ldb, &bo, p.beta, C_ref,
-        p.ldc, co);
+    ref_gemm_s8x8s32<uint8_t>(&p.transA, &p.transB, &p.igemm_params.offsetc, p.M, p.N,
+        p.K, p.alpha, A, p.lda, &oa, B, p.ldb, &ob, p.beta, C_ref,
+        p.ldc, oc);
 
-    compare(p.M, p.N, p.ldc, C, C_ref, p.K);
+    compare<uint8_t, int32_t>(p.M, p.N, C, C_ref, p.ldc, p.alpha, p.beta, p.K);
 
     test_free((char *)A);
     test_free((char *)B);
     test_free((char *)C);
     test_free((char *)C_ref);
-    test_free((char *)co);
+    test_free((char *)oc);
 }
 
 template <>
@@ -233,37 +259,37 @@ void run_test_gemm<int8_t, int8_t, int32_t>(const test_params &p) {
     int32_t *C = get_matrix_buffer<int32_t>(sizeC);
     int32_t *C_ref = get_matrix_buffer<int32_t>(sizeC);
 
-    bool OCisR = (p.offsetc == 'R' || p.offsetc == 'r');
-    bool OCisC = (p.offsetc == 'C' || p.offsetc == 'c');
+    bool OCisR = (p.igemm_params.offsetc == 'R' || p.igemm_params.offsetc == 'r');
+    bool OCisC = (p.igemm_params.offsetc == 'C' || p.igemm_params.offsetc == 'c');
     size_t sizeco = OCisR ? p.N : OCisC ? p.M : 1;
 
-    int8_t ao, bo;
-    int32_t* co = get_matrix_buffer<int32_t>(sizeco);
+    int8_t oa, ob;
+    int32_t* oc = get_matrix_buffer<int32_t>(sizeco);
 
-    fill_matrix<int8_t, int8_t, int32_t>(sizeA, sizeB, sizeC, sizeco, A, B, C,
-        &ao, &bo, co);
+    fill_matrix<int8_t, int8_t, int32_t>(p, sizeA, sizeB, sizeC, sizeco, A, B, C,
+        &oa, &ob, oc);
 
     mkldnn::impl::parallel_nd(p.ldc * p.N,
         [&](int i) { C_ref[i] = static_cast<int32_t>(C[i]); });
 
-    auto status = mkldnn_gemm_s8s8s32(&p.transA, &p.transB, &p.offsetc,
-        &p.M, &p.N, &p.K, &p.alpha, A, &p.lda, &ao, B, &p.ldb, &bo,
-        &p.beta, C, &p.ldc, co);
+    auto status = mkldnn_gemm_s8s8s32(&p.transA, &p.transB, &p.igemm_params.offsetc,
+        &p.M, &p.N, &p.K, &p.alpha, A, &p.lda, &oa, B, &p.ldb, &ob,
+        &p.beta, C, &p.ldc, oc);
 
     if (status != mkldnn_success)
         throw error(status, "mkldnn_gemm_s8s8s32 returned error");
 
-    ref_gemm_s8x8s32<int8_t>(&p.transA, &p.transB, &p.offsetc, p.M, p.N,
-        p.K, p.alpha, A, p.lda, &ao, B, p.ldb, &bo, p.beta, C_ref,
-        p.ldc, co);
+    ref_gemm_s8x8s32<int8_t>(&p.transA, &p.transB, &p.igemm_params.offsetc, p.M, p.N,
+        p.K, p.alpha, A, p.lda, &oa, B, p.ldb, &ob, p.beta, C_ref,
+        p.ldc, oc);
 
-    compare(p.M, p.N, p.ldc, C, C_ref, p.K);
+    compare<int8_t, int32_t>(p.M, p.N, C, C_ref, p.ldc, p.alpha, p.beta, p.K);
 
     test_free((char *)A);
     test_free((char *)B);
     test_free((char *)C);
     test_free((char *)C_ref);
-    test_free((char *)co);
+    test_free((char *)oc);
 }
 
 template <>
@@ -276,7 +302,7 @@ void run_test_gemm<float, float, float>(const test_params &p) {
     float *C = get_matrix_buffer<float>(sizeC);
     float *C_ref = get_matrix_buffer<float>(sizeC);
 
-    fill_matrix<float, float, float>(sizeA, sizeB, sizeC, 0, A, B, C,
+    fill_matrix<float, float, float>(p, sizeA, sizeB, sizeC, 0, A, B, C,
         nullptr, nullptr, nullptr);
 
     mkldnn::impl::parallel_nd(p.N * p.ldc, [&](int i) { C_ref[i] = C[i]; });
@@ -286,7 +312,7 @@ void run_test_gemm<float, float, float>(const test_params &p) {
     if (status == mkldnn_success) {
         ref_gemm(&p.transA, &p.transB, p.M, p.N, p.K, p.alpha, A, p.lda, B, p.ldb,
             p.beta, C_ref, p.ldc);
-        compare(p.M, p.N, p.ldc, C, C_ref);
+        compare<float, float>(p.M, p.N, C, C_ref, p.ldc);
     }
 
     test_free((char *)A);
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_memory.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_memory.cpp
index 7de906704..de7b2375e 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_memory.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_memory.cpp
@@ -73,18 +73,29 @@ TEST_F(memory_test, WeightPaddingTest) {
     data_t *mem0_ptr = (data_t *)mem0.get_data_handle();
     fill_data<data_t>(O_16*I_16*H*W, mem0_ptr);
 
+    /* mem1 is OIhw16i16o with fmt = OIhw16i16o */
     std::vector<data_t> mem1_vec(phys_sz);
     mem1_vec.assign(mem0_ptr,
             mem0_ptr + mem0.get_primitive_desc().get_size() / sizeof(data_t));
-
     mkldnn::memory mem1({{{O, I, H, W}, memory::data_type::f32,
             memory::format::OIhw16i16o}, e}, &mem1_vec[0]);
-
     check_zero_tail<data_t>(0, mem1);
+
+    /* mem2 is OIhw16i16o with fmt = blocked */
+    std::vector<data_t> mem2_vec(phys_sz);
+    mem2_vec.assign(mem0_ptr,
+            mem0_ptr + mem0.get_primitive_desc().get_size() / sizeof(data_t));
+    mkldnn::memory::desc mem2_d = mem1.get_primitive_desc().desc();
+    mem2_d.data.format = mkldnn_blocked;
+    mkldnn::memory mem2({mem2_d, e}, &mem2_vec[0]);
+    check_zero_tail<data_t>(0, mem2);
+
     check_zero_tail<data_t>(1, mem0);
+    for (size_t i = 0; i < phys_sz; ++i)
+        EXPECT_NEAR(mem0_ptr[i], mem1_vec[i], 1e-7) << i << " :mem1";
 
     for (size_t i = 0; i < phys_sz; ++i)
-        EXPECT_NEAR(mem0_ptr[i], mem1_vec[i], 1e-7) << i;
+        EXPECT_NEAR(mem0_ptr[i], mem2_vec[i], 1e-7) << i << " :mem2";
 }
 
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_pooling_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_pooling_forward.cpp
index aa1a191f6..d855301a4 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_pooling_forward.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_pooling_forward.cpp
@@ -190,11 +190,11 @@ protected:
         check_zero_tail<data_t>(1, p_dst);
 
         // calculate right padding exactly
-        std::vector<int> padR_2d = {
+        std::vector<ptrdiff_t> padR_2d = {
             right_padding(pd.ih, pd.oh, pd.kh, pd.padt, pd.strh),
             right_padding(pd.iw, pd.ow, pd.kw, pd.padl, pd.strw)
         };
-        std::vector<int> padR_3d = {
+        std::vector<ptrdiff_t> padR_3d = {
             right_padding(pd.id, pd.od, pd.kd, pd.padf, pd.strd),
             right_padding(pd.ih, pd.oh, pd.kh, pd.padt, pd.strh),
             right_padding(pd.iw, pd.ow, pd.kw, pd.padl, pd.strw)
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_relu.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_relu.cpp
deleted file mode 100644
index a837e3c28..000000000
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_relu.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-/*******************************************************************************
-* Copyright 2016-2018 Intel Corporation
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*******************************************************************************/
-
-#include "gtest/gtest.h"
-#include "mkldnn_test_common.hpp"
-
-#include "mkldnn.hpp"
-
-namespace mkldnn {
-
-template <typename data_t>
-struct relu_test_params {
-    engine::kind engine_kind;
-    memory::format data_format;
-    memory::format diff_format;
-    data_t negative_slope;
-    memory::dims dims;
-    bool expect_to_fail;
-    mkldnn_status_t expected_status;
-};
-
-template <typename data_t>
-void check_relu_fwd(data_t negative_slope, const memory::desc &md,
-        const memory &src, const memory &dst)
-{
-    data_t *src_data = (data_t *)src.get_data_handle();
-    data_t *dst_data = (data_t *)dst.get_data_handle();
-
-    ASSERT_EQ(md.data.ndims, 4);
-    ASSERT_EQ(md.data.data_type, memory::data_type::f32); // TODO: type assert
-
-    size_t N = md.data.dims[0];
-    size_t C = md.data.dims[1];
-    size_t H = md.data.dims[2];
-    size_t W = md.data.dims[3];
-    for (size_t i = 0; i < N * C * H * W; ++i) {
-        data_t s = src_data[i];
-        EXPECT_NEAR(dst_data[i], s > 0 ? s : s * negative_slope, 1.e-7);
-    }
-}
-
-template <typename data_t>
-void check_relu_bwd(data_t negative_slope, const memory::desc &md,
-        const memory &src, const memory &diff_dst, const memory &diff_src)
-{
-    data_t *src_data = (data_t *)src.get_data_handle();
-    data_t *diff_dst_data = (data_t *)diff_dst.get_data_handle();
-    data_t *diff_src_data = (data_t *)diff_src.get_data_handle();
-
-    const memory::desc data_d = src.get_primitive_desc().desc();
-    const memory::desc diff_data_d = diff_src.get_primitive_desc().desc();
-
-    ASSERT_EQ(md.data.ndims, 4);
-    ASSERT_EQ(md.data.data_type, memory::data_type::f32); // TODO: type assert
-
-    size_t N = md.data.dims[0];
-    size_t C = md.data.dims[1];
-    size_t H = md.data.dims[2];
-    size_t W = md.data.dims[3];
-    for (size_t i = 0; i < N * C * H * W; ++i) {
-        data_t ref_s = src_data[map_index(data_d, i)];
-        data_t ref_dd = diff_dst_data[map_index(diff_data_d, i)];
-        data_t ref_ds = ref_dd * ((ref_s > 0) ? data_t{1} : negative_slope);
-        EXPECT_NEAR(diff_src_data[map_index(diff_data_d, i)], ref_ds, 1.e-7);
-    }
-}
-
-template <typename data_t>
-class relu_test : public ::testing::TestWithParam<relu_test_params<data_t>> {
-private:
-    std::shared_ptr<memory> src;
-    std::shared_ptr<memory> diff_src;
-    std::shared_ptr<memory> dst;
-    std::shared_ptr<memory> diff_dst;
-    std::shared_ptr<memory> workspace;
-    std::shared_ptr<memory::desc> data_desc;
-    std::shared_ptr<memory::desc> diff_data_desc;
-    std::shared_ptr<relu_forward::primitive_desc> relu_prim_desc;
-    relu_test_params<data_t> p;
-    std::shared_ptr<engine> eng;
-    memory::data_type data_type;
-    int size;
-
-protected:
-    virtual void SetUp() {
-        p = ::testing::TestWithParam<decltype(p)>::GetParam();
-        catch_expected_failures([=](){Test();}, p.expect_to_fail,
-                p.expected_status);
-    }
-
-    void Test() {
-        p = ::testing::TestWithParam<decltype(p)>::GetParam();
-
-        ASSERT_TRUE(p.engine_kind == engine::kind::cpu);
-        eng.reset(new engine(p.engine_kind, 0));
-
-        ASSERT_EQ(p.dims.size(), 4U);
-
-        data_type = data_traits<data_t>::data_type;
-        ASSERT_EQ(data_type, mkldnn::memory::data_type::f32);
-
-        size = p.dims[0] * p.dims[1] * p.dims[2] * p.dims[3];
-
-        Forward();
-        Backward();
-    }
-
-    void Forward() {
-        data_desc.reset(new memory::desc(p.dims, data_type,
-            p.data_format));
-        diff_data_desc.reset(new memory::desc(p.dims, data_type,
-            p.diff_format));
-        src.reset(new memory({*data_desc, *eng}));
-        dst.reset(new memory({*data_desc, *eng}));
-
-        fill_data<data_t>(size, (data_t *)src->get_data_handle(),
-                data_t(0), data_t(1));
-
-        auto relu_desc = relu_forward::desc(prop_kind::forward_training,
-                algorithm::eltwise_relu, *data_desc, p.negative_slope);
-        relu_prim_desc.reset(
-                new relu_forward::primitive_desc(relu_desc, *eng));
-        auto relu = relu_forward(*relu_prim_desc, *src, *dst);
-
-        std::vector<primitive> pipeline;
-        pipeline.push_back(relu);
-        auto s = stream(stream::kind::lazy);
-        s.submit(pipeline).wait();
-
-        check_relu_fwd(p.negative_slope, *data_desc,
-            *src, *dst);
-    }
-
-    void Backward() {
-        diff_src.reset(new memory({*diff_data_desc, *eng}));
-        diff_dst.reset(new memory({*diff_data_desc, *eng}));
-
-        fill_data<data_t>(size, (data_t *)diff_dst->get_data_handle(),
-                data_t(0), data_t(1));
-
-        auto relu_bwd_desc = relu_backward::desc(algorithm::eltwise_relu,
-                *diff_data_desc, *data_desc, p.negative_slope);
-        auto relu_bwd_prim_desc = relu_backward::primitive_desc(
-                relu_bwd_desc, *eng, *relu_prim_desc);
-        auto relu_bwd = relu_backward(relu_bwd_prim_desc, *src, *diff_dst,
-                *diff_src);
-
-        std::vector<primitive> pipeline;
-        pipeline.push_back(relu_bwd);
-        auto s = stream(stream::kind::lazy);
-        s.submit(pipeline).wait();
-
-        check_relu_bwd(p.negative_slope, *data_desc,
-            *src, *diff_dst, *diff_src);
-    }
-};
-
-using relu_test_float = relu_test<float>;
-using relu_test_params_float = relu_test_params<float>;
-
-TEST_P(relu_test_float, TestsReLU)
-{
-}
-
-#define EXPAND_SIZES(mb, c, h, w) { mb, c, h, w }
-#define EXPAND_FORMATS(data) memory::format::data
-
-#define ENGINE engine::kind::cpu
-
-#define PARAMS_EF(data, diff_data, ns, mb, c, h, w, ef, es) \
-    relu_test_params_float { ENGINE, \
-    EXPAND_FORMATS(data), EXPAND_FORMATS(diff_data), \
-    ns, EXPAND_SIZES(mb, c, h, w), ef, es}
-
-#define PARAMS(data, diff_data, ns, mb, c, h, w) \
-    PARAMS_EF(data, diff_data, ns, mb, c, h, w, false, mkldnn_success)
-
-#define INST_TEST_CASE(str, ...) INSTANTIATE_TEST_CASE_P( \
-        str, relu_test_float, ::testing::Values(__VA_ARGS__))
-
-INST_TEST_CASE(SimpleZeroDim,
-    PARAMS(nchw, nchw, 0.f, 0, 8, 4, 4),
-    PARAMS(nchw, nchw, 0.f, 2, 0, 4, 4),
-    PARAMS(nchw, nchw, 0.f, 2, 8, 0, 4),
-    PARAMS(nchw, nchw, 0.f, 2, 8, 4, 0)
-);
-
-INST_TEST_CASE(SimpleEF,
-    PARAMS_EF(nchw, nchw, 0.f, -1, 8, 4, 4, true, mkldnn_invalid_arguments),
-    PARAMS_EF(nchw, nchw, 0.f, 2, -1, 4, 4, true, mkldnn_invalid_arguments),
-    PARAMS_EF(nchw, nchw, 0.f, 2, 8, -1, 4, true, mkldnn_invalid_arguments),
-    PARAMS_EF(nchw, nchw, 0.f, 2, 8, 4, -1, true, mkldnn_invalid_arguments)
-);
-
-INST_TEST_CASE(SimpleZeroNegativeSlope_NCHW,
-    //PARAMS(nchw, nchw, 0.f, 1, 8, 10000, 10000),  // is a tensor of 3 Gb data ok? YES (330 s runtime, slow)
-    //PARAMS(nchw, nchw, 0.f, 1, 12, 10000, 10000), // is a tensor of >4 Gb data ok? worked once (release mode)
-    PARAMS(nchw, nchw, 0.f, 2, 8, 4, 4),
-    PARAMS(nchw, nchw, 0.f, 2, 16, 4, 4),
-    PARAMS(nchw, nchw, 0.f, 2, 16, 8, 8),
-    PARAMS(nchw, nchw, 0.f, 2, 16, 16, 8),
-    PARAMS(nchw, nchw, 0.f, 2, 16, 10, 8),
-    PARAMS(nchw, nchw, 0.f, 10, 10, 10, 10),
-    PARAMS(nchw, nchw, 0.f, 256, 64, 8, 16),
-    PARAMS(nchw, nchw, 0.f, 1, 1, 1, 1),
-    PARAMS(nchw, nchw, 0.f, 3, 5, 7, 11)
-);
-
-INST_TEST_CASE(Simple_NCHW,
-    PARAMS(nchw, nchw, 0.1f, 2, 8, 4, 4),
-    PARAMS(nchw, nchw, 0.1f, 2, 16, 4, 4),
-    PARAMS(nchw, nchw, 0.1f, 2, 16, 8, 8),
-    PARAMS(nchw, nchw, 0.1f, 2, 16, 16, 8),
-    PARAMS(nchw, nchw, 0.1f, 2, 16, 10, 8),
-    PARAMS(nchw, nchw, 0.1f, 10, 10, 10, 10),
-    PARAMS(nchw, nchw, 0.1f, 256, 64, 8, 16),
-    PARAMS(nchw, nchw, 0.1f, 1, 1, 1, 1),
-    PARAMS(nchw, nchw, 0.1f, 3, 5, 7, 11)
-);
-
-INST_TEST_CASE(Simple,
-    PARAMS(nchw, nChw8c, 0.1f, 2, 8, 4, 4),
-    PARAMS(nChw8c, nchw, 0.1f, 2, 16, 4, 4),
-    PARAMS(nchw, nchw, 0.1f, 2, 16, 8, 8),
-    PARAMS(nChw8c, nChw8c, 0.1f, 2, 16, 16, 8),
-    PARAMS(nhwc, nchw, 0.1f, 2, 16, 10, 8),
-    PARAMS(nchw, nhwc, 0.1f, 10, 10, 10, 10)
-);
-
-INST_TEST_CASE(AlexNet_NCHW,
-    PARAMS(nchw, nchw, 0.f, 2, 96, 55, 55),
-    PARAMS(nchw, nchw, 0.f, 2, 256, 27, 27),
-    PARAMS(nchw, nchw, 0.f, 2, 384, 13, 13)
-);
-
-}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_reorder.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_reorder.cpp
index e182e91c8..d4b5fbe8f 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_reorder.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_reorder.cpp
@@ -29,7 +29,7 @@ inline void check_reorder(const memory::desc &md_i, const memory::desc &md_o,
         const data_i_t *src, const data_o_t *dst)
 {
     const int ndims = md_i.data.ndims;
-    const int *dims = md_i.data.dims;
+    const ptrdiff_t *dims = md_i.data.dims;
     const size_t nelems = std::accumulate(
             dims, dims + ndims, size_t(1), std::multiplies<size_t>());
 
@@ -333,7 +333,11 @@ TEST_P(reorder_simple_test_weights_f32_f32_1, TestsReorder) { }
 INSTANTIATE_TEST_CASE_P(TestReorder, reorder_simple_test_weights_f32_f32_1,
         ::testing::Values(
             cfg_f32{eng::cpu, fmt::goihw, fmt::Goihw16g, {32, 32, 32, 3, 3}},
-            cfg_f32{eng::cpu, fmt::Goihw16g, fmt::goihw, {32, 32, 32, 3, 3}}
+            cfg_f32{eng::cpu, fmt::Goihw16g, fmt::goihw, {32, 32, 32, 3, 3}},
+            cfg_f32{eng::cpu, fmt::oihw, fmt::iohw, {32, 32, 3, 3}},
+            cfg_f32{eng::cpu, fmt::iohw, fmt::oihw, {32, 32, 3, 3}},
+            cfg_f32{eng::cpu, fmt::goihw, fmt::giohw, {2, 32, 32, 3, 3}},
+            cfg_f32{eng::cpu, fmt::giohw, fmt::goihw, {2, 32, 32, 3, 3}}
             )
         );
 
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_rnn_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_rnn_forward.cpp
new file mode 100644
index 000000000..a0614c32d
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_rnn_forward.cpp
@@ -0,0 +1,243 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <utility>
+#include <numeric>
+
+#include "gtest/gtest.h"
+#include "mkldnn_test_common.hpp"
+
+#include "mkldnn.hpp"
+
+namespace mkldnn {
+
+struct test_rnn_sizes_t {
+    test_rnn_sizes_t(
+        int l, int d, int t, int mb,
+        int slc, int sic, int dlc, int dic) :
+        l(l), d(d), t(t), mb(mb),
+        slc(slc), sic(sic), dlc(dlc), dic(dic) {}
+    int l, d;
+    int t;
+    int mb;
+    int slc, sic, dlc, dic;
+};
+
+struct test_rnn_formats_t {
+    mkldnn::memory::format src_layer_fmt;
+    mkldnn::memory::format src_iter_fmt;
+    mkldnn::memory::format weights_layer_fmt;
+    mkldnn::memory::format weights_iter_fmt;
+    mkldnn::memory::format bias_fmt;
+    mkldnn::memory::format dst_layer_fmt;
+    mkldnn::memory::format dst_iter_fmt;
+};
+
+struct test_rnn_params_t {
+    const mkldnn::engine::kind engine_kind;
+    mkldnn::algorithm aalgorithm;
+    mkldnn::algorithm activation;
+    mkldnn::rnn_direction direction;
+    test_rnn_formats_t fmts;
+    test_rnn_sizes_t sizes;
+    bool expect_to_fail;
+    mkldnn_status_t expected_status;
+};
+
+// We assume uniform data type accross tensors for now
+template <typename data_t>
+class rnn_forward_test
+    : public ::testing::TestWithParam<test_rnn_params_t> {
+protected:
+    virtual void SetUp() {
+        auto p = ::testing::TestWithParam<test_rnn_params_t>::GetParam();
+        catch_expected_failures([=](){Test();}, p.expect_to_fail,
+                p.expected_status, false);
+    }
+
+    void Test() {
+        auto p = ::testing::TestWithParam<test_rnn_params_t>::GetParam();
+        ASSERT_TRUE(p.engine_kind == engine::kind::cpu);
+        auto eng = engine(p.engine_kind, 0);
+        //@todo check algorithm is one of the supported by RNN
+        //ASSERT_EQ(p.aalgorithm, algorithm::vanilla_lstm);
+
+        // Initialize the data
+        memory::data_type prec = data_traits<data_t>::data_type;
+        auto dims = p.sizes;
+        auto t = dims.t, mb = dims.mb, l = dims.l, d = dims.d;
+        auto slc = dims.slc, sic = dims.sic, dlc = dims.dlc, dic = dims.dic;
+        int s, g;
+
+        switch (p.aalgorithm) {
+        case vanilla_lstm:
+            g = 4; s = 2; break;
+        case vanilla_gru:
+        case gru_linear_before_reset:
+            g = 3; s = 1; break;
+        default:
+            g = 1; s = 1; break;
+        };
+
+        mkldnn::memory::dims weights_layer_dims = {l, d, slc, g, dic};
+        mkldnn::memory::dims weights_iter_dims = {l, d, sic, g, dic};
+        mkldnn::memory::dims bias_dims = {l, d, g, dic};
+        mkldnn::memory::dims src_layer_dims = {t, mb, slc};
+        mkldnn::memory::dims src_iter_dims = {l, d, s, mb, sic};
+        mkldnn::memory::dims dst_layer_dims = {t, mb, dlc};
+        mkldnn::memory::dims dst_iter_dims = {l, d, s, mb, dic};
+
+        auto weights_layer_md_any = memory::desc({weights_layer_dims}, prec, memory::format::any);
+        auto weights_iter_md_any = memory::desc({weights_iter_dims}, prec, memory::format::any);
+        auto bias_md_any = memory::desc({bias_dims}, prec, memory::format::any);
+        auto src_layer_md_any = memory::desc({src_layer_dims}, prec, memory::format::any);
+        auto src_iter_md_any = memory::desc({src_iter_dims}, prec, memory::format::any);
+        auto dst_layer_md_any = memory::desc({dst_layer_dims}, prec, memory::format::any);
+        auto dst_iter_md_any = memory::desc({dst_iter_dims}, prec, memory::format::any);
+
+        auto weights_layer_md_tgt = memory::desc({weights_layer_dims}, prec, p.fmts.weights_layer_fmt);
+        auto weights_iter_md_tgt = memory::desc({weights_iter_dims}, prec, p.fmts.weights_iter_fmt);
+        auto bias_md_tgt = memory::desc({bias_dims}, prec, p.fmts.bias_fmt);
+        auto src_layer_md_tgt = memory::desc({src_layer_dims}, prec, p.fmts.src_layer_fmt);
+        auto src_iter_md_tgt = memory::desc({src_iter_dims}, prec, p.fmts.src_iter_fmt);
+        auto dst_layer_md_tgt = memory::desc({dst_layer_dims}, prec, p.fmts.dst_layer_fmt);
+        auto dst_iter_md_tgt = memory::desc({dst_iter_dims}, prec, p.fmts.dst_iter_fmt);
+
+
+        // Create the reference descriptor
+        rnn_cell::desc cell(p.aalgorithm, p.activation);
+        auto direction = p.direction;
+
+        rnn_forward::desc ref_desc(prop_kind::forward_inference, cell,
+                direction, src_layer_md_any, src_iter_md_any,
+                weights_layer_md_any, weights_iter_md_any, bias_md_any,
+                dst_layer_md_any, dst_iter_md_any);
+        auto ref_prim_desc = rnn_forward::primitive_desc(ref_desc, eng);
+
+        // Query the descriptor for memory descriptors
+        auto weights_layer_md_ref = ref_prim_desc.weights_layer_primitive_desc().desc();
+        auto weights_iter_md_ref = ref_prim_desc.weights_iter_primitive_desc().desc();
+        auto bias_md_ref = ref_prim_desc.bias_primitive_desc().desc();
+        auto src_layer_md_ref = ref_prim_desc.src_layer_primitive_desc().desc();
+        auto src_iter_md_ref = ref_prim_desc.src_iter_primitive_desc().desc();
+        auto dst_layer_md_ref = ref_prim_desc.dst_layer_primitive_desc().desc();
+        auto dst_iter_md_ref = ref_prim_desc.dst_iter_primitive_desc().desc();
+
+        auto are_equal_md = [](memory::desc a, memory::desc b, engine eng){
+            return memory::primitive_desc(a, eng)
+                    == memory::primitive_desc(b, eng);
+        };
+
+        bool skip_test =
+            are_equal_md(weights_layer_md_ref, weights_layer_md_tgt, eng)
+            && are_equal_md(weights_iter_md_ref, weights_iter_md_tgt, eng)
+            && are_equal_md(bias_md_ref, bias_md_tgt, eng)
+            && are_equal_md(src_layer_md_ref, src_layer_md_tgt, eng)
+            && are_equal_md(src_iter_md_ref, src_iter_md_tgt, eng)
+            && are_equal_md(dst_layer_md_ref, dst_layer_md_tgt, eng)
+            && are_equal_md(dst_iter_md_ref, dst_iter_md_tgt, eng);
+
+        if (skip_test) return;
+
+        /* initialize data */
+        auto weights_layer_ref = memory({weights_layer_md_ref, eng});
+        auto weights_iter_ref = memory({weights_iter_md_ref, eng});
+        auto bias_ref = memory({bias_md_ref, eng});
+        auto src_layer_ref = memory({src_layer_md_ref, eng});
+        auto src_iter_ref = memory({src_iter_md_ref, eng});
+        auto dst_layer_ref = memory({dst_layer_md_ref, eng});
+        auto dst_iter_ref = memory({dst_iter_md_ref, eng});
+
+        auto weights_layer_tgt = memory({weights_layer_md_tgt, eng});
+        auto weights_iter_tgt = memory({weights_iter_md_tgt, eng});
+        auto bias_tgt = memory({bias_md_tgt, eng});
+        auto src_layer_tgt = memory({src_layer_md_tgt, eng});
+        auto src_iter_tgt = memory({src_iter_md_tgt, eng});
+        auto dst_layer_tgt = memory({dst_layer_md_tgt, eng});
+        auto dst_iter_tgt = memory({dst_iter_md_tgt, eng});
+
+        auto init_tensor = [&](memory a, memory b) {
+            auto a_ptr = static_cast<float *>(a.get_data_handle());
+            auto desc = a.get_primitive_desc().desc();
+            auto a_dims = desc.data.dims;
+            auto a_ndims = desc.data.ndims;
+            auto n_elems = std::accumulate(a_dims, a_dims + a_ndims, size_t(1),
+                    std::multiplies<float>());
+            for(size_t i = 0; i < n_elems; i++)
+                a_ptr[map_index(desc, i, false)] = i;
+            stream(stream::kind::eager).submit({reorder(a, b)}).wait();
+        };
+
+        init_tensor(weights_layer_ref, weights_layer_tgt);
+        init_tensor(weights_iter_ref, weights_iter_tgt);
+        init_tensor(bias_ref, bias_tgt);
+        init_tensor(src_layer_ref, src_layer_tgt);
+        init_tensor(src_iter_ref, src_iter_tgt);
+
+        // run the non packed version
+        auto prim_ref = rnn_forward(ref_prim_desc, src_layer_ref, src_iter_ref,
+                weights_layer_ref, weights_iter_ref, bias_ref,
+                dst_layer_ref, dst_iter_ref, null_memory(eng));
+        stream(stream::kind::eager).submit({prim_ref}).wait();
+
+        // run the packed version
+        rnn_forward::desc tgt_desc(prop_kind::forward_inference, cell,
+                direction, src_layer_md_tgt, src_iter_md_tgt,
+                weights_layer_md_tgt, weights_iter_md_tgt, bias_md_tgt,
+                dst_layer_md_tgt, dst_iter_md_tgt);
+        auto tgt_prim_desc = rnn_forward::primitive_desc(tgt_desc, eng);
+        auto prim_tgt = rnn_forward(tgt_prim_desc, src_layer_tgt, src_iter_tgt,
+                weights_layer_tgt, weights_iter_tgt, bias_tgt,
+                dst_layer_tgt, dst_iter_tgt, null_memory(eng));
+        stream(stream::kind::eager).submit({prim_tgt}).wait();
+
+        // compare dst_layer and dst_iter
+        compare_data<data_t>(dst_layer_ref, dst_layer_tgt, 1e-5);
+        compare_data<data_t>(dst_iter_ref, dst_iter_tgt, 1e-5);
+    }
+};
+
+    using eng = engine::kind;
+    using fmt = memory::format;
+    using alg = algorithm;
+    using dir = rnn_direction;
+    using rnn_forward_test_f32 = rnn_forward_test<float>;
+    using cfg_f32 = test_rnn_params_t;
+
+TEST_P(rnn_forward_test_f32, TestsRnn) { }
+INSTANTIATE_TEST_CASE_P(TestRnn, rnn_forward_test_f32,
+        ::testing::Values(
+            cfg_f32{eng::cpu, alg::vanilla_rnn, alg::eltwise_tanh, dir::unidirectional_left2right,
+                {fmt::tnc, fmt::ldsnc, fmt::ldigo, fmt::ldigo, fmt::ldgo, fmt::tnc, fmt::ldsnc},
+                    test_rnn_sizes_t(1, 1, 10, 16, 100, 100, 100, 100)},
+            cfg_f32{eng::cpu, alg::vanilla_lstm, alg::eltwise_tanh, dir::unidirectional_left2right,
+                {fmt::tnc, fmt::ldsnc, fmt::ldigo, fmt::ldigo, fmt::ldgo, fmt::tnc, fmt::ldsnc},
+                    test_rnn_sizes_t(1, 1, 10, 16, 100, 100, 100, 100)},
+            /* Check for invalid parameters: unsupported unrolling */
+            cfg_f32{eng::cpu, alg::vanilla_rnn, alg::eltwise_tanh, dir::unidirectional_left2right,
+                {fmt::tnc, fmt::ldsnc, fmt::ldigo, fmt::ldigo, fmt::ldgo, fmt::tnc, fmt::ldsnc},
+                    test_rnn_sizes_t(2, 1, 10, 16, 200, 100, 100, 100), true, mkldnn_invalid_arguments},
+            cfg_f32{eng::cpu, alg::vanilla_rnn, alg::eltwise_tanh, dir::unidirectional_left2right,
+                {fmt::tnc, fmt::ldsnc, fmt::ldigo, fmt::ldigo, fmt::ldgo, fmt::tnc, fmt::ldsnc},
+                    test_rnn_sizes_t(2, 1, 10, 16, 100, 200, 100, 100), true, mkldnn_invalid_arguments},
+            /* Check for invalid parameters: inconsistent dimensions */
+            cfg_f32{eng::cpu, alg::vanilla_rnn, alg::eltwise_tanh, dir::unidirectional_left2right,
+                {fmt::tnc, fmt::ldsnc, fmt::ldigo, fmt::ldigo, fmt::ldgo, fmt::tnc, fmt::ldsnc},
+                    test_rnn_sizes_t(2, 1, 10, 16, 100, 100, 50, 100), true, mkldnn_invalid_arguments}
+            )
+    );
+
+}
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_softmax_forward.cpp b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_softmax_forward.cpp
index e938da699..d9f6d68e7 100644
--- a/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_softmax_forward.cpp
+++ b/inference-engine/thirdparty/mkl-dnn/tests/gtests/test_softmax_forward.cpp
@@ -181,6 +181,7 @@ protected:
             check_softmax_fwd<data_t>(p.aprop_kind, src, dst, p.axis);
         };
 
+        test_with_given_fill(-50, 50);
         test_with_given_fill(-200, 1);
         test_with_given_fill(   0, 1);
         test_with_given_fill( 200, 1);
@@ -216,5 +217,9 @@ INSTANTIATE_TEST_CASE_P(TestSoftmaxForward, softmax_forward_test_float,
             softmax_fwd_test_params_float{prop_kind::forward_scoring,
             engine::kind::cpu, memory::format::nc, {2, 1000}, 0},
             softmax_fwd_test_params_float{prop_kind::forward_scoring,
-            engine::kind::cpu, memory::format::nc, {2, 1000}, 1}));
+            engine::kind::cpu, memory::format::nc, {2, 1000}, 1},
+            softmax_fwd_test_params_float{prop_kind::forward_scoring,
+            engine::kind::cpu, memory::format::nc, {1, 256}, 1},
+            softmax_fwd_test_params_float{prop_kind::forward_scoring,
+            engine::kind::cpu, memory::format::nc, {1, 13}, 1}));
 }
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/other/subproject/CMakeLists.txt b/inference-engine/thirdparty/mkl-dnn/tests/other/subproject/CMakeLists.txt
new file mode 100644
index 000000000..392a8b3c5
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/other/subproject/CMakeLists.txt
@@ -0,0 +1,33 @@
+#===============================================================================
+# Copyright 2018 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
+# Test Intel MKL-DNN for embeddability
+# by imitating a project that includes the library
+#
+# To test run:
+# mkdir -p build && cd build && cmake .. && make -j && ./project_app
+
+cmake_minimum_required(VERSION 2.8)
+
+set(PROJECT_NAME "Project")
+
+# include Intel MKL-DNN
+set(MKLDNN_DIR "../../..")
+add_subdirectory(${MKLDNN_DIR} mkl-dnn)
+include_directories(${MKLDNN_DIR}/include)
+
+add_executable(project_app main.c)
+target_link_libraries(project_app mkldnn)
diff --git a/inference-engine/thirdparty/mkl-dnn/tests/other/subproject/main.c b/inference-engine/thirdparty/mkl-dnn/tests/other/subproject/main.c
new file mode 100644
index 000000000..5d23650bf
--- /dev/null
+++ b/inference-engine/thirdparty/mkl-dnn/tests/other/subproject/main.c
@@ -0,0 +1,26 @@
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <stdio.h>
+#include "mkldnn.h"
+
+int main() {
+    printf("mkldnn_version: %d.%d.%d\n",
+            MKLDNN_VERSION_MAJOR, MKLDNN_VERSION_MINOR, MKLDNN_VERSION_PATCH);
+    printf("mkldnn_memory_desc_init = %p, sizeof(mkldnn_memory_desc_t) = %d\n",
+            mkldnn_memory_desc_init, (int)sizeof(mkldnn_memory_desc_t));
+    return 0;
+}
diff --git a/inference-engine/thirdparty/mkldnn.cmake b/inference-engine/thirdparty/mkldnn.cmake
index 0cf504560..d90717cc3 100644
--- a/inference-engine/thirdparty/mkldnn.cmake
+++ b/inference-engine/thirdparty/mkldnn.cmake
@@ -1,5 +1,5 @@
 #===============================================================================
-# Copyright (c) 2016 Intel Corporation
+# Copyright (C) 2018-2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,6 +22,34 @@
 set (CMAKE_CXX_STANDARD 11)
 set (CMAKE_CXX_STANDARD_REQUIRED ON)
 
+set(version_cmake_included true)
+
+set(TARGET mkldnn)
+set(MKLDNN_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/mkl-dnn)
+
+string(REPLACE "." ";" VERSION_LIST "0.18.0")
+list(GET VERSION_LIST 0 MKLDNN_VERSION_MAJOR)
+list(GET VERSION_LIST 1 MKLDNN_VERSION_MINOR)
+list(GET VERSION_LIST 2 MKLDNN_VERSION_PATCH)
+
+find_package(Git)
+if (GIT_FOUND)
+    execute_process(COMMAND ${GIT_EXECUTABLE} log -1 --format=%H
+            WORKING_DIRECTORY ${MKLDNN_ROOT}
+            RESULT_VARIABLE RESULT
+            OUTPUT_VARIABLE MKLDNN_VERSION_HASH
+            OUTPUT_STRIP_TRAILING_WHITESPACE)
+endif()
+
+if(NOT GIT_FOUND OR RESULT)
+    set(MKLDNN_VERSION_HASH "N/A")
+endif()
+
+configure_file(
+    "${MKLDNN_ROOT}/include/mkldnn_version.h.in"
+    "${CMAKE_BINARY_DIR}/include/mkldnn_version.h"
+)
+
 function(detect_mkl LIBNAME)
     message(STATUS "Detecting Intel(R) MKL: trying ${LIBNAME}")
     find_path(MKLINC mkl_cblas.h ${MKL}/include)
@@ -51,9 +79,6 @@ function(detect_mkl LIBNAME)
     endif()
 endfunction()
 
-set(TARGET mkldnn)
-set(MKLDNN_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/mkl-dnn)
-
 if (THREADING STREQUAL "TBB")
     add_definitions(-DMKLDNN_THR=MKLDNN_THR_TBB)
 elseif (THREADING STREQUAL "OMP")
@@ -76,7 +101,9 @@ include_directories(
         ${MKLDNN_ROOT}/include
         ${MKLDNN_ROOT}/src
         ${MKLDNN_ROOT}/src/common
+        ${MKLDNN_ROOT}/src/cpu/
         ${MKLDNN_ROOT}/src/cpu/xbyak
+        ${CMAKE_BINARY_DIR}/include/
 )
 
 if(WIN32)
@@ -88,6 +115,23 @@ if(WIN32)
     endif()
 endif()
 
+# to make build time reasonable, don't use optimizations for s8u8s32 Xbyak
+# kernels
+file(GLOB FILES_WITHNO_OPT
+    ${MKLDNN_ROOT}/src/cpu/gemm/s8x8s32/jit_avx512_core_kernel_b0_gemm_s8u8s32_kern.cpp
+    ${MKLDNN_ROOT}/src/cpu/gemm/s8x8s32/jit_avx512_core_kernel_gemm_s8u8s32_kern.cpp
+    ${MKLDNN_ROOT}/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_an_kern.cpp
+    ${MKLDNN_ROOT}/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_at_kern.cpp
+    ${MKLDNN_ROOT}/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_bn_kern.cpp
+    ${MKLDNN_ROOT}/src/cpu/gemm/s8x8s32/jit_avx512_core_u8_copy_bt_kern.cpp)
+if(WIN32 AND NOT MINGW)
+    set_source_files_properties(${FILES_WITHNO_OPT}
+        PROPERTIES COMPILE_FLAGS "/Od")
+else()
+    set_source_files_properties(${FILES_WITHNO_OPT}
+        PROPERTIES COMPILE_FLAGS "-O0 -U_FORTIFY_SOURCE")
+endif()
+
 add_library(${TARGET} STATIC ${HDR} ${SRC})
 set_ie_threading_interface_for(${TARGET})
 
@@ -98,7 +142,7 @@ if(GEMM STREQUAL "OPENBLAS")
     list(APPEND ${TARGET}_LINKER_LIBS ${BLAS_LIBRARIES})
 elseif (GEMM STREQUAL "MKL")
     ## enable cblas_gemm from mlkml package
-if(WIN32)
+if(WIN32 OR APPLE)
     detect_mkl("mklml")
 else()
     if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
@@ -113,4 +157,4 @@ endif()
 endif()
 ## enable jit_gemm from mlk-dnn
 
-target_link_libraries(${TARGET} PRIVATE ${${TARGET}_LINKER_LIBS})
-\ No newline at end of file
+target_link_libraries(${TARGET} PRIVATE ${${TARGET}_LINKER_LIBS})
diff --git a/inference-engine/tools/accuracy_checker_tool/README.md b/inference-engine/tools/accuracy_checker_tool/README.md
new file mode 100644
index 000000000..8dc651145
--- /dev/null
+++ b/inference-engine/tools/accuracy_checker_tool/README.md
@@ -0,0 +1,163 @@
+# Deep Learning accuracy validation framework
+
+#### Usage
+
+You may test your installation and get familiar with accuracy checker by running [sample][sample-readme].
+
+Once you installed accuracy checker you can evaluate your configurations with:
+
+```python
+python3 accuracy_check.py -c path/to/configuration_file -m /path/to/models -s /path/to/source/data -a /path/to/annotation
+```
+
+All relative paths in config files will be prefixed with values specified in command line:
+
+- `-c, --config` path to configuration file.
+- `-m, --models` specifies directory in which models and weights declared in config file will be searched.
+- `-s, --source` specifies directory in which input images will be searched.
+- `-a, --annotations` specifies directory in which annotation and meta files will be searched.
+
+You may refer to `-h, --help` to full list of command line options. Some optional arguments are:
+
+- `-e, --extensions` directory with InferenceEngine extensions.
+- `-b, --bitstreams` directory with bitstream (for Inference Engine with fpga plugin).
+- `-C, '--converted_models` directory to store Model Optimizer converted models (used for DLSDK launcher only).
+- `-tf, --target_framework` framework for infer.
+- `-td, --target_devices` devices for infer. You can specify several devices using space as a delimiter.
+
+#### Configuration
+
+There is config file which declares validation process.
+Every validated model has to have its entry in `models` list
+with distinct `name` and other properties described below.
+
+There is also definitions file, which declares global options shared across all models.
+Config file has priority over definitions file.
+
+example:
+
+```yaml
+models:
+- name: model_name
+  launchers:
+    - framework: caffe
+      model:   public/alexnet/caffe/bvlc_alexnet.prototxt
+      weights: public/alexnet/caffe/bvlc_alexnet.caffemodel
+      adapter: classification
+      batch: 128
+  datasets:
+    - name: dataset_name
+```
+
+### Launchers
+
+Launcher is a description of how your model should be executed.
+Each launcher configuration starts with setting `framework` name. Currently *caffe* and *dlsdk* supported. Launcher description can have differences.
+
+Please view:
+
+- [how to configure Caffe launcher][caffe-launcher-configuration].
+- [how to configure DLSDK launcher][dlsdk-launcher-configuration].
+
+### Datasets
+
+Dataset entry describes data on which model should be evaluated,
+all required preprocessing and postprocessing/filtering steps,
+and metrics that will be used for evaluation.
+
+If your dataset data is a well-known competition problem (COCO, Pascal VOC, ...) and/or can be potentially reused for other models
+it is reasonable to declare it in some global configuration file (*definition* file). This way in your local configuration file you can provide only
+`name` and all required steps will be picked from global one. To pass path to this global configuration use `--definition` argument of CLI.
+
+Each dataset must have:
+
+- `name` - unique identifier of your model/topology.
+- `data_source`: path to directory where input data is stored.
+- `metrics`: list of metrics that should be computed.
+
+And optionally:
+- `preprocessing`: list of preprocessing steps applied to input data. If you want calculated metrics to match reported, you must reproduce preprocessing from canonical paper of your topology or ask topology author about required steps if it is ICV topology.
+- `postprocessing`: list of postprocessing steps.
+- `reader`: approach for data reading. You can specify: `opencv_imread` or `pillow_imread` for reading images and `opencv_capture` for reading frames from video. Default reader is `opencv_imread`.
+
+Also it must contain data related to annotation.
+You can convert annotation inplace using:
+- `annotation_conversion`: parameters for annotation conversion
+
+
+or use existing annotation file and dataset meta: 
+- `annotation` - path to annotation file, you must **convert annotation to representation of dataset problem first**, you may choose one of the converters from *annotation-converters* if there is already converter for your dataset or write your own.
+- `dataset_meta`: path to metadata file (generated by converter).
+More detailed information about annotation conversion you can find [here][converters]
+
+example of dataset definition:
+
+```yaml
+- name: dataset_name
+  annotation: annotation.pickle
+  data_source: images_folder
+
+  preprocessing:
+    - type: resize
+      dst_width: 256
+      dst_height: 256
+
+    - type: normalization
+      mean: imagenet
+
+    - type: crop
+      dst_width: 227
+      dst_height: 227
+
+  metrics:
+    - type: accuracy
+```
+
+### Preprocessing, Metrics, Postprocessing
+
+Each entry of preprocessing, metrics, postprocessing must have `type` field,
+other options are specific to type. If you do not provide any other option, then it
+will be picked from *definitions* file.
+
+You can find useful following instructions:
+
+- [how to use preprocessings][preprocessors].
+- [how to use postprocessings][postprocessors].
+- [how to use metrics][metrics].
+
+You may optionally provide `reference` field for metric, if you want calculated metric
+tested against specific value (i.e. reported in canonical paper).
+
+Some metrics support providing vector results ( e. g. mAP is able to return average precision for each detection class).  You can change view mode for metric results using `presenter` (e.g. `print_vector`, `print_scalar`).
+
+example:
+
+```yaml
+metrics:
+- type: accuracy
+  top_k: 5
+  reference: 86.43
+  threshold: 0.005
+```
+
+### Testing new models
+
+Typical workflow for testing new model include:
+
+1. Convert annotation of your dataset. Use one of the converters from annotation-converters, or write your own if there is no converter for your dataset. You can find detailed instruction how to use converters [here][converters].
+
+```bash
+python3 convert_annotation.py converter --converter_specific_parameter --output_dir data/annotations
+```
+
+1. Choose one of *adapters* or write your own. Adapter converts raw output produced by framework to high level problem specific representation (e.g. *ClassificationPrediction*, *DetectionPrediction*, etc).
+1. Reproduce preprocessing, metrics and postprocessing from canonical paper.
+1. Create entry in config file and execute.
+
+[sample-readme]: ./tools/accuracy_checker/sample/README.md
+[preprocessors]: ./tools/accuracy_checker/accuracy_checker/preprocessor/README.md
+[postprocessors]: ./tools/accuracy_checker/accuracy_checker/postprocessor/README.md
+[metrics]: ./tools/accuracy_checker/accuracy_checker/metrics/README.md
+[converters]: ./tools/accuracy_checker/accuracy_checker/annotation_converters/README.md
+[caffe-launcher-configuration]: ./tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher_readme.md
+[dlsdk-launcher-configuration]: ./tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher_readme.md
diff --git a/inference-engine/tools/accuracy_checker_tool/accuracy_check.py b/inference-engine/tools/accuracy_checker_tool/accuracy_check.py
new file mode 100644
index 000000000..3d4fc2bfb
--- /dev/null
+++ b/inference-engine/tools/accuracy_checker_tool/accuracy_check.py
@@ -0,0 +1,19 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from openvino.tools.accuracy_checker.accuracy_checker.main import main
+
+main()
diff --git a/inference-engine/tools/accuracy_checker_tool/convert_annotation.py b/inference-engine/tools/accuracy_checker_tool/convert_annotation.py
new file mode 100644
index 000000000..5313d71c0
--- /dev/null
+++ b/inference-engine/tools/accuracy_checker_tool/convert_annotation.py
@@ -0,0 +1,20 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from openvino.tools.accuracy_checker.accuracy_checker.annotation_converters.convert import main
+
+if __name__ == '__main__':
+    main()
diff --git a/inference-engine/tools/benchmark_tool/README.md b/inference-engine/tools/benchmark_tool/README.md
new file mode 100644
index 000000000..bf11be24c
--- /dev/null
+++ b/inference-engine/tools/benchmark_tool/README.md
@@ -0,0 +1,16 @@
+# OpenVINO™ Benchmark Tool
+Inference Engine Benchmark Tool is a Python\* command-line tool, which measures latency for synchronous mode.
+
+Please, refer to https://docs.openvinotoolkit.org for details.
+
+## Usage
+
+In general, the Benchmark Tool is configured in the same way as the Accuracy Checker. You can also use additional command line arguments to define benchmark-specific parameters:
+
+| Argument                                     | Type   | Description                                              |
+| -------------------------------------------- | ------ | -------------------------------------------------------- |
+| -c, --config                                 | string | Required. Path to the YML file with local configuration  |
+| -ic, --benchmark_iterations_count            | string | Optional. Benchmark itertations count. (1000 is default) |
+
+## Hardware requirements
+Hardware requirements depend on a model. Typically for public models RAM memory size has to be not less then 16Gb independently on operation system.
+\ No newline at end of file
diff --git a/inference-engine/tools/benchmark_tool/benchmark.py b/inference-engine/tools/benchmark_tool/benchmark.py
new file mode 100644
index 000000000..0e5280f21
--- /dev/null
+++ b/inference-engine/tools/benchmark_tool/benchmark.py
@@ -0,0 +1,22 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import openvino.tools.benchmark as benchmark
+
+if __name__ == '__main__':
+    config = benchmark.CommandLineReader.read()
+    result = benchmark.Benchmark(config).run()
+    print("{0}: {1:.4} ms".format(config.model, result.latency * 1000.0))
+\ No newline at end of file
diff --git a/inference-engine/tools/calibration_tool/README.md b/inference-engine/tools/calibration_tool/README.md
new file mode 100644
index 000000000..6402705f0
--- /dev/null
+++ b/inference-engine/tools/calibration_tool/README.md
@@ -0,0 +1,149 @@
+# OpenVINO™ Calibration Tool
+Inference Engine Calibration Tool calibrates a given FP32 model so that you can run calibrated model in low-precision 8-bit integer mode while keeping the input data of this model in the original precision.
+Inference Engine Calibration Tool is a Python\* command-line tool, which imports Python types from the `openvino.tools.calibration` package.
+
+Please, refer to https://docs.openvinotoolkit.org for details.
+
+## Hardware requirements
+Hardware requirements depend on a model. Typically for public models RAM memory size has to be not less then 16Gb, drive has to have not less then 30 Gb free space independently on operation system. Temporary directory is used to cache layers output during calibration.
+
+## Usage
+The Calibration Tool is configured in the same way as the Accuracy Checker. You can also use additional command-line arguments to define calibration-specific parameters.
+
+### Command-Line Arguments for the Accuracy Checker Tool reused in Calibration Tool
+| Argument                                     | Type   | Description                                             |
+| -------------------------------------------- | ------ | ------------------------------------------------------- |
+| -c, --config                                 | string | Required. Path to the YML file with local configuration |
+| -d, --definitions                            | string | Optional. Path to the YML file with definitions         |
+| -m, --models                                 | string | Optional. Prefix path to the models and weights         |
+| -s, --source                                 | string | Optional. Prefix path to the data source                |
+| -a, --annotations                            | string | Optional. Pefix path to the converted annotations and datasets meta data |
+| -e, --extensions                             | string | Optional. Prefix path to extensions folder              |
+| --cpu_extensions_mode, --cpu-extensions-mode | string | Optional. specified preferable set of processor instruction for automatic searching cpu extension lib: `avx2` or `sse4` |
+| -C, --converted_models, --converted-models   | string | Optional. Directory to store Model Optimizer converted models. Used for DLSDK launcher only |
+| -M, --model_optimizer, --model-optimizer     | string | Optional. Path to model optimizer caffe directory       |
+| --tf_custom_op_config_dir, --tf-custom-op-config-dir | string | Optional. Path to directory with tensorflow custom operation configuration files for model optimizer |
+| --tf_obj_detection_api_pipeline_config_path, --tf-obj-detection-api-pipeline-config-path | string | Optional. Path to directory with tensorflow object detection api pipeline configuration files for model optimizer |
+| --progress                                   | string | Optional. Progress reporter: `bar`, `print` or `None`   |
+| -td, --target_devices, --target-devices      | string | Optional. Space-separated list of devices for infer     |
+| -tt, --target_tags, --target-tags | string   | Optional. Space-separated list of launcher tags for infer        |
+
+### Specific Command Line Arguments for Calibration Tool
+| Argument                          | Type   | Description                                               |
+| --------------------------------- | ------ | --------------------------------------------------------- |
+| -p, --precision                   | string | Optional. Precision to calibrate. Default value is INT8   |
+| --ignore_layer_types, --ignore-layer-types | string | Optional. Layer types list which will be skipped during quantization |
+| --ignore_layer_types_path, --ignore-layer-types-path | string | Optional. Ignore layer types file path |
+| --ignore_layer_names, --ignore-layer-names | string | Optional. Layer names list which will be skipped during quantization |
+| --ignore_layer_names_path, --ignore-layer-names-path | string | Optional. Ignore layer names file path |
+| --batch_size, --batch-size        | integer| Optional. Batch size value. If not specified, the batch size value is determined from IR |
+| -th, --threshold                  | float | Optional. Accuracy drop of quantized model should not exceed this threshold. Should be pointer in percents without percent sign. (1% is default) |
+| -ic, --benchmark_iterations_count, --benchmark-iterations-count | integer | Optional. Benchmark itertations count. (1000 is default) |
+| -mn, --metric_name, --metric-name | string | Optional. Metric name used during calibration |
+| -mt, --metric_type, --metric-type | string | Optional. Metric type used during calibration |
+| -o, --output_dir, --output-dir    | string | Optional. Directory to store converted models. Original model directory is used if not defined |
+
+## Model calibration flow
+
+### Introduction
+Calibration tool read original FP32 model, calibration dataset and create low precision model. Low precision model has two differences from original model:
+1. Per channel statistics are defined. Statistics have minimum and maximum values for each layer and each channel. Model statistics are stored in Inference Engine intermediate representation file (IR) in XML format.
+2. `quantization_level` layer attribute is defined. The attribute defines precision which is used during inference.
+
+### Prerequisites
+* Model: Tensorflow\* Inception v1. You can download the model from here: https://github.com/tensorflow/models/tree/master/research/slim
+* Dataset: ImageNet. You can download ImageNet from here: http://www.image-net.org/download.php
+* YML configuration files: you can find YML configuration files and YML definition file which are used below in `configs` directory:
+  - `definitions.yml` - definition file
+  - `inception_v1.yml` - configuration file for Tensorflow\* Inception v1 model
+  - `ncf_config.yml` - configuration file for NCF model in OpenVINO\* Inference Engine Intermediate Representation format
+  - `ssd_mobilenet_v1_coco.yml` - configuration file for Tensorflow\* SSD Mobilenet v1 model
+  - `unet2d.yml` - configuration file for Unet2D mode in in OpenVINO\* Inference Engine Intermediate Representation format
+
+If you have custom topology with not supported accuracy metric or not suported custom dataset then you should add some components implementation in `openvino.tools.accuracy_checker` Python\* package yourself. Refer to `openvino.tools.accuracy_checker` documentation how to implement metric and dataset support. 
+
+There are steps to calibrate and evaluate result model:
+- Step #1. Convert data annotation files
+- Optional step for low precision model performance estimation.
+- Step #2. Calibration
+- Step #3. Result model evaluation
+
+Additional optional step before calibration is available to rough estimate possible INT8 performance.
+
+### Step #1. Convert data annotation files
+Calibration dataset is subset of training dataset. Use Convert Annotation Tool to convert ImageNet\* dataset to Calibration Tool readable data annotation files. Data annotation files describe subset of images which are used during calibration. Command line:
+```sh
+python convert_annotation.py imagenet --annotation_file /datasets/ImageNet/val.txt --labels_file /datasets/ImageNet/synset_words.txt -ss 2000 -o ~/annotations -a imagenet.pickle -m imagenet.json
+```
+
+> **NOTE:** For simplicity all command line tools in below steps use the same command line arguments. In practice [Collect Statistics Tool](./inference-engine/tools/collect_statistics_tool/README.md) uses calibration dataset, but [Accuracy Checker Tool](./inference-engine/tools/accuracy_checker_tool/README.md) has to use whole validation dataset.
+
+
+| Argument           | Type   | Description                                                                       |
+| -------------------| ------ | --------------------------------------------------------------------------------- |
+| --config           | string | Path to the YML file with local configuration                                     |
+| -d                 | string | Path to the YML file with definitions                                             |
+| -M                 | string | Path to model optimizer directory                                                 |
+| --models           | string | Prefix path to the models and weights                                             |
+| --source           | string | Prefix path to the data source                                                    |
+| --annotations      | string | Pefix path to the converted annotations and datasets meta data                    |
+| --converted_models | string | Directory to store Model Optimizer converted models. Used for DLSDK launcher only |
+
+
+### Optional step for low precision model performance estimation.
+Before calibration you can roughly estimate low presition performance with [Collect Statistics Tool](./inference-engine/tools/collect_statistics_tool/README.md).
+
+[Collect Statistics Tool](./inference-engine/tools/collect_statistics_tool/README.md) ignores metric in YML configuration file but you can use the same command line arguments.
+
+Command line:
+
+```sh
+python collect_statistics.py --config ~/inception_v1.yml -d ~/defenitions.yml -M /home/user/intel/openvino/deployment_tools/model_optimizer --models ~/models --source /media/user/calibration/datasets --annotations ~/annotations --converted_models ~/models
+```
+
+Result model has statistics which allow to infer this model in INT8 precision. To measure performance you can use [Benchmark Tool](./inference-engine/tools/benchmark_tool/README.md).
+
+### Step #2. Calibration
+During calibration process, the model is ajusted for efficient quantization and minimization of accuracy drop on calibration dataset. Calibration tool produces calibrated model which will be executed in low precision 8 bit quantzed mode after loading into CPU plugin.
+
+[Calibration Tool](./inference-engine/tools/calibration_tool/README.md) has flexible and extensible mechanism of enabling new data set and metrics. Each network has its own dedicated network metric and dataset where network was trained. Dataset description and network metrics can be reused for different network.
+
+To plug new dataset you need to develop YML file. To develop new metric you need to develop Python\* module implementing metric and describe in YML. Please, refer to [Accuracy Checker Tool](./inference-engine/tools/accuracy_checker_tool/README.md) for details.
+
+
+Command line example:
+```sh
+python calibrate.py --config ~/inception_v1.yml --definition ~/defenitions.yml -M /home/user/intel/openvino/deployment_tools/model_optimizer --tf_custom_op_config_dir ~/tf_custom_op_configs --models ~/models --source /media/user/calibration/datasets --annotations ~/annotations
+```
+
+### Step #3. Result model evaluation
+After calibration of the model it worse to evaluate network accuracy on whole validation set using [Accuracy Checker Tool](./inference-engine/tools/accuracy_checker_tool/README.md).
+
+#### Step #3.1 Check accuracy
+Command line:
+```sh
+python accuracy_check.py --config ~/inception_v1.yml -d ~/defenitions.yml -M /home/user/intel/openvino/deployment_tools/model_optimizer --tf_custom_op_config_dir ~/tf_custom_op_configs --models ~/models --source /media/user/calibration/datasets --annotations ~/annotations -tf dlsdk -td CPU
+```
+
+#### Step #3.2 Check performance
+Use `benchmark_app` command line tool to measure latency and throughput for synchronous and asynchronous modes. Note, please, `benchmark_app` command line tool uses converted OpenVINO\* Intermediate Representation model.
+
+Command line for synchronous mode:
+
+```sh
+./benchmark_app -i <path_to_image>/inputImage.bmp -m <path_to_model>/inception_v1.xml -d CPU -api sync
+```
+
+Command line for the asynchronous mode:
+```sh
+./benchmark_app -i <path_to_image>/inputImage.bmp -m <path_to_model>/inception_v1.xml -d CPU -api async
+```
+
+#### Optional step to check performance
+You can use Python\* [Benchmark Tool](./inference-engine/tools/benchmark_tool/README.md) command line tool to quickly check performance with the same command line arguments and configuration YML files as for [Calibration Tool](./inference-engine/tools/calibration_tool/README.md).
+
+Command line:
+```sh
+python benchmark.py --config ~/inception_v1.yml -d ~/defenitions.yml -M /home/user/intel/openvino/deployment_tools/model_optimizer --tf_custom_op_config_dir ~/tf_custom_op_configs --models ~/models --source /media/user/calibration/datasets --annotations ~/annotations --converted_models ~/models
+```
+
diff --git a/inference-engine/tools/calibration_tool/calibrate.py b/inference-engine/tools/calibration_tool/calibrate.py
new file mode 100644
index 000000000..c3034bd2f
--- /dev/null
+++ b/inference-engine/tools/calibration_tool/calibrate.py
@@ -0,0 +1,23 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import openvino.tools.calibration as calibration
+
+if __name__ == '__main__':
+    with calibration.CommandLineProcessor.process() as config:
+        network = calibration.Calibrator(config).run()
+        if network:
+            network.serialize(config.output_model)
diff --git a/inference-engine/tools/calibration_tool/configs/definitions.yml b/inference-engine/tools/calibration_tool/configs/definitions.yml
new file mode 100644
index 000000000..a14d66095
--- /dev/null
+++ b/inference-engine/tools/calibration_tool/configs/definitions.yml
@@ -0,0 +1,202 @@
+launchers:
+  - framework: dlsdk
+    device: CPU
+  - framework: caffe
+    device: CPU
+
+datasets:
+  - name: classification_dataset
+    data_source: ImageNet/original
+    annotation: ImageNet/accuracy_checker_annotations/2012/full/imagenet.pickle
+    dataset_meta: ImageNet/accuracy_checker_annotations/2012/full/imagenet.json
+    preprocessing:
+      - type: resize
+        size: 256
+      - type: crop
+        size: 224
+      - type: normalization
+        mean: IMAGENET
+    metrics:
+      - name: accuracy @ top1
+        type: accuracy
+        top_k: 1
+      - name: accuracy @ top5
+        type: accuracy
+        top_k: 5
+
+  - name: classification_dataset_1001classes
+    data_source: ImageNet/original
+    annotation: ImageNet/accuracy_checker_annotations/2012_1001classes/full/imagenet.pickle
+    dataset_meta: ImageNet/accuracy_checker_annotations/2012_1001classes/full/imagenet.json
+    preprocessing:
+      - type: bgr_to_rgb
+      - type: resize
+        size: 256
+      - type: crop
+        size: 224
+      - type: normalization
+        mean: 127.5
+        std: 127.5
+    metrics:
+      - name: accuracy @ top1
+        type: accuracy
+        top_k: 1
+      - name: accuracy @ top5
+        type: accuracy
+        top_k: 5
+
+  - name: classification_dataset_2015
+    data_source: ImageNet/original
+    annotation: ImageNet/accuracy_checker_annotations/2015/full/imagenet.pickle
+    dataset_meta: ImageNet/accuracy_checker_annotations/2015/full/imagenet.json
+    preprocessing:
+      - type: resize
+        size: 256
+      - type: crop
+        size: 224
+      - type: normalization
+        mean: 104, 117, 123
+    metrics:
+      - name: accuracy @ top1
+        type: accuracy
+        top_k: 1
+      - name: accuracy @ top5
+        type: accuracy
+        top_k: 5
+
+  - name: VOC2007
+    data_source: VOC/VOCdevkit/VOC2007/JPEGImages
+    annotation: VOC/accuracy_checker_annotations/VOC2007/full/voc07.pickle
+    dataset_meta: VOC/accuracy_checker_annotations/VOC2007/full/voc07.json
+    preprocessing:
+      - type: resize
+        size: 300
+      - type: normalization
+        mean: 104, 117, 123
+    postprocessing:
+      - type: resize_prediction_boxes
+    metrics:
+      - type: map
+        integral: 11point
+        ignore_difficult: True
+        presenter: print_scalar
+
+  - name: VOC2007_20classes
+    data_source: VOC/VOCdevkit/VOC2007/JPEGImages
+    annotation: VOC/accuracy_checker_annotations/VOC2007_20classes/full/voc07.pickle
+    dataset_meta: VOC/accuracy_checker_annotations/VOC2007_20classes/full/voc07.json
+    preprocessing:
+      - type: resize
+        size: 300
+      - type: normalization
+        mean: 104, 117, 123
+    postprocessing:
+      - type: resize_prediction_boxes
+    metrics:
+      - type: map
+        integral: 11point
+        ignore_difficult: True
+        presenter: print_scalar
+
+  - name: VOC2007_Segmentation
+    data_source: VOC/VOCdevkit/VOC2007
+    annotation: VOC/accuracy_checker_annotations/VOC2007/full/voc07_segmentation.pickle
+    dataset_meta: VOC/accuracy_checker_annotations/VOC2007/full/voc07_segmentation.json
+    postprocessing:
+      - type: resize_segmentation_mask
+        apply_to: prediction
+      - type: encode_segmentation_mask
+    metrics:
+      - type: segmentation_accuracy
+      - type: mean_iou
+      - type: mean_accuracy
+      - type: frequency_weighted_accuracy
+
+  - name: VOC2012_Segmentation
+    data_source: VOC/VOCdevkit/VOC2012
+    annotation: VOC/accuracy_checker_annotations/VOC2012/full/voc12_segmentation.pickle
+    dataset_meta: VOC/accuracy_checker_annotations/VOC2012/full/voc12_segmentation.json
+    postprocessing:
+      - type: resize_segmentation_mask
+        apply_to: prediction
+      - type: encode_segmentation_mask
+    metrics:
+      - type: segmentation_accuracy
+      - type: mean_iou
+      - type: mean_accuracy
+      - type: frequency_weighted_accuracy
+
+  - name: COCO2014_80cl
+    data_source: COCO/2014/val2014
+    annotation: COCO/accuracy_checker_annotations/2014/full/mscoco_detection_80cl.pickle
+    dataset_meta: COCO/accuracy_checker_annotations/2014/full/mscoco_detection_80cl.json
+    preprocessing:
+      - type: bgr_to_rgb
+      - type: resize
+        size: 300
+    postprocessing:
+      - type: resize_prediction_boxes
+    metrics:
+      - type: map
+        integral: 11point
+        ignore_difficult: True
+        presenter: print_scalar
+
+  - name: COCO2017_80cl
+    data_source: COCO/2017/val2017
+    annotation: COCO/accuracy_checker_annotations/2017/full/mscoco_detection_80cl.pickle
+    dataset_meta: COCO/accuracy_checker_annotations/2017/full/mscoco_detection_80cl.json
+    preprocessing:
+      - type: bgr_to_rgb
+      - type: resize
+        size: 300
+    postprocessing:
+      - type: resize_prediction_boxes
+    metrics:
+      - type: map
+        integral: 11point
+        ignore_difficult: True
+        presenter: print_scalar
+
+  - name: COCO2017_80cl_bkgr
+    data_source: COCO/2017/val2017
+    annotation: COCO/accuracy_checker_annotations/2017/full/mscoco_detection_80cl_bkgr.pickle
+    dataset_meta: COCO/accuracy_checker_annotations/2017/full/mscoco_detection_80cl_bkgr.json
+    preprocessing:
+      - type: bgr_to_rgb
+      - type: resize
+        size: 300
+    postprocessing:
+      - type: resize_prediction_boxes
+    metrics:
+      - type: map
+        integral: 11point
+        ignore_difficult: True
+        presenter: print_scalar
+
+  - name: COCO2017_90cl_bkgr
+    data_source: COCO/2017/val2017
+    annotation: COCO/accuracy_checker_annotations/2017/full/mscoco_detection_90cl_bkgr.pickle
+    dataset_meta: COCO/accuracy_checker_annotations/2017/full/mscoco_detection_90cl_bkgr.json
+    preprocessing:
+      - type: bgr_to_rgb
+      - type: resize
+        size: 300
+    postprocessing:
+      - type: resize_prediction_boxes
+    metrics:
+      - type: map
+        integral: 11point
+        ignore_difficult: True
+        presenter: print_scalar
+
+  - name: lfw
+    data_source: LFW/lfw
+    annotation: LFW/accuracy_checker_annotations/full/lfw.pickle
+    preprocessing:
+      - type: point_alignment
+        size: 400
+      - type: resize
+        size: 160
+    metrics:
+      - type: pairwise_accuracy_subsets
diff --git a/inference-engine/tools/calibration_tool/configs/inception_v1.yml b/inference-engine/tools/calibration_tool/configs/inception_v1.yml
new file mode 100644
index 000000000..86c832cd0
--- /dev/null
+++ b/inference-engine/tools/calibration_tool/configs/inception_v1.yml
@@ -0,0 +1,29 @@
+models:
+  - name: GoogleNet_v1
+
+    # list of launchers for your topology.
+    launchers:
+        # launcher framework (e.g. caffe, dlsdk)
+      - framework: dlsdk
+        # device for infer (e.g. for dlsdk cpu, gpu, hetero:cpu, gpu ...)
+        device: CPU
+        # topology IR (*.prototxt for caffe, *.xml for InferenceEngine, etc)
+        # path to topology is prefixed with directory, specified in "-m/--models" option
+        tf_model: inception_v1.pb
+        # launcher returns raw result, so it should be converted
+        # to an appropriate representation with adapter
+        adapter: classification
+        mo_params:
+            data_type: FP32
+            input_shape: "(1, 224, 224, 3)"
+
+    # metrics, preprocessing and postprocessing are typically dataset specific, so dataset field
+    # specifies data and all other steps required to validate topology
+    # there is typically definitions file, which contains options for common datasets and which is merged
+    # during evaluation, but since "sample_dataset" is not used anywhere else, this config contains full definition
+    datasets:
+      # uniquely distinguishable name for dataset
+      # note that all other steps are specific for this dataset only
+      # if you need to test topology on multiple datasets, you need to specify
+      # every step explicitly for each dataset
+      - name: classification_dataset_1001classes
diff --git a/inference-engine/tools/calibration_tool/configs/ncf_config.yml b/inference-engine/tools/calibration_tool/configs/ncf_config.yml
new file mode 100644
index 000000000..3ba3d1a8d
--- /dev/null
+++ b/inference-engine/tools/calibration_tool/configs/ncf_config.yml
@@ -0,0 +1,56 @@
+models:
+  - name: NCF_example
+
+    # list of launchers for your topology.
+    launchers:
+        # launcher framework (e.g. caffe, dlsdk)
+      - framework: dlsdk
+        # device for infer (e.g. for dlsdk cpu, gpu, hetero:cpu, gpu ...)
+        device: CPU
+        cpu_extensions: libcpu_extension.so
+        # topology IR (*.prototxt for caffe, *.xml for InferenceEngine, etc)
+        # path to topology is prefixed with directory, specified in "-m/--models" option
+        model:   graph_frozen.xml
+        # topology weights binary (*.caffemodel for caffe, *.bin for InferenceEngine)
+        weights: graph_frozen.bin
+        # launcher returns raw result, so it should be converted
+        # to an appropriate representation with adapter
+        adapter: hit_ratio_adapter
+
+        inputs:
+            - type: INPUT
+              value: "u"
+              name: embedding/embedding_lookup/placeholder_port_1
+            - type: INPUT
+              value: "i"
+              name: embedding_1/embedding_lookup/placeholder_port_1
+            - type: INPUT
+              value: "u"
+              name: embedding_2/embedding_lookup/placeholder_port_1
+            - type: INPUT
+              value: "i"
+              name: embedding_3/embedding_lookup/placeholder_port_1
+
+    # metrics, preprocessing and postprocessing are typically dataset specific, so dataset field
+    # specifies data and all other steps required to validate topology
+    # there is typically definitions file, which contains options for common datasets and which is merged
+    # during evaluation, but since "sample_dataset" is not used anywhere else, this config contains full definition
+    datasets:
+      # uniquely distinguishable name for dataset
+      # note that all other steps are specific for this dataset only
+      # if you need to test topology on multiple datasets, you need to specify
+      # every step explicitly for each dataset
+      - name: ncf_validation_dataset.npy
+        # directory where input images are searched.
+        # prefixed with directory specified in "-s/--source" option
+        # name of converted annotation file (specified in -a option during annotation conversion)
+        # prefixed with directory specified in "-a/--annotations" option
+        annotation: ncf_converter.pickle
+        dataset_meta: ncf_converter.json
+
+        reader: ncf_data_reader
+
+        # list of metrics, calculated on dataset
+        metrics:
+          - type: hit_ratio
+          - type: ndcg
diff --git a/inference-engine/tools/calibration_tool/configs/ssd_mobilenet_v1_coco.yml b/inference-engine/tools/calibration_tool/configs/ssd_mobilenet_v1_coco.yml
new file mode 100644
index 000000000..778621384
--- /dev/null
+++ b/inference-engine/tools/calibration_tool/configs/ssd_mobilenet_v1_coco.yml
@@ -0,0 +1,40 @@
+models:
+  - name: ssd_mobilenet_v1_coco
+
+    # list of launchers for your topology.
+    launchers:
+        # launcher framework (e.g. caffe, dlsdk)
+      - framework: dlsdk
+        # device for infer (e.g. for dlsdk cpu, gpu, hetero:cpu, gpu ...)
+        device: CPU
+        # topology IR (*.prototxt for caffe, *.xml for InferenceEngine, etc)
+        # path to topology is prefixed with directory, specified in "-m/--models" option
+        tf_model:   ssd_mobilenet_v1_coco.pb
+        # launcher returns raw result, so it should be converted
+        # to an appropriate representation with adapter
+        adapter: ssd
+        cpu_extensions: AUTO
+        mo_params:
+            data_type: FP32
+            tensorflow_use_custom_operations_config: ssd_v2_support.json
+            tensorflow_object_detection_api_pipeline_config: ssd_mobilenet_v1_coco.config
+
+    # metrics, preprocessing and postprocessing are typically dataset specific, so dataset field
+    # specifies data and all other steps required to validate topology
+    # there is typically definitions file, which contains options for common datasets and which is merged
+    # during evaluation, but since "sample_dataset" is not used anywhere else, this config contains full definition
+    datasets:
+      # uniquely distinguishable name for dataset
+      # note that all other steps are specific for this dataset only
+      # if you need to test topology on multiple datasets, you need to specify
+      # every step explicitly for each dataset
+      - name: COCO2017_90cl_bkgr
+
+        # list of metrics, calculated on dataset
+        metrics:
+          - type: map
+            integral: 11point
+            ignore_difficult: True
+            presenter: print_scalar
+
+          - type: coco_precision
diff --git a/inference-engine/tools/calibration_tool/configs/unet2d.yml b/inference-engine/tools/calibration_tool/configs/unet2d.yml
new file mode 100644
index 000000000..49ed489d6
--- /dev/null
+++ b/inference-engine/tools/calibration_tool/configs/unet2d.yml
@@ -0,0 +1,54 @@
+models:
+  - name: UNet_2D
+
+    # list of launchers for your topology.
+    launchers:
+        # launcher framework (e.g. caffe, dlsdk)
+      - framework: dlsdk
+        # device for infer (e.g. for dlsdk cpu, gpu, hetero:cpu, gpu ...)
+        device: CPU
+        # topology IR (*.prototxt for caffe, *.xml for InferenceEngine, etc)
+        # path to topology is prefixed with directory, specified in "-m/--models" option
+        model: model.ckpt.xml
+        # topology weights binary (*.caffemodel for caffe, *.bin for InferenceEngine)
+        weights: model.ckpt.bin
+        # launcher returns raw result, so it should be converted
+        # to an appropriate representation with adapter
+        adapter: brain_tumor_segmentation
+        cpu_extensions: AUTO
+
+    # metrics, preprocessing and postprocessing are typically dataset specific, so dataset field
+    # specifies data and all other steps required to validate topology
+    # there is typically definitions file, which contains options for common datasets and which is merged
+    # during evaluation, but since "sample_dataset" is not used anywhere else, this config contains full definition
+    datasets:
+      # uniquely distinguishable name for dataset
+      # note that all other steps are specific for this dataset only
+      # if you need to test topology on multiple datasets, you need to specify
+      # every step explicitly for each dataset
+      - name: brats
+        data_source: Task01_BrainTumour
+        # directory where input images are searched.
+        # prefixed with directory specified in "-s/--source" option
+        # name of converted annotation file (specified in -a option during annotation conversion)
+        # prefixed with directory specified in "-a/--annotations" option
+        annotation: annotations/unet/calibration/brats.pickle
+
+        reader: nifti_reader
+        preprocessing:
+          - type: crop3d
+            size: 128
+          - type: normalize3d
+
+        postprocessing:
+          - type: crop_segmentation_mask
+            apply_to: annotation
+            size: 128
+          - type: clip_segmentation_mask
+            apply_to: annotation
+            max_value: 1
+
+        # list of metrics, calculated on dataset
+        metrics:
+          - type: dice
+            presenter: return_value
diff --git a/inference-engine/tools/collect_statistics_tool/README.md b/inference-engine/tools/collect_statistics_tool/README.md
new file mode 100644
index 000000000..e5a73ef6a
--- /dev/null
+++ b/inference-engine/tools/collect_statistics_tool/README.md
@@ -0,0 +1,7 @@
+# OpenVINO™ Collect Statistics Tool
+Inference Engine Collect Statistics Tool collects statistics for a given model.
+
+Please, refer to https://docs.openvinotoolkit.org for details.
+
+## Hardware requirements
+Hardware requirements depend on a model. Typically for public models RAM memory size has to be not less then 16Gb independently on operation system.
+\ No newline at end of file
diff --git a/inference-engine/tools/collect_statistics_tool/collect_statistics.py b/inference-engine/tools/collect_statistics_tool/collect_statistics.py
new file mode 100644
index 000000000..95b83646b
--- /dev/null
+++ b/inference-engine/tools/collect_statistics_tool/collect_statistics.py
@@ -0,0 +1,39 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+import os
+from openvino.tools.calibration import CalibratorConfiguration, CalibrationConfigurationHelper, CalibratorFactory, CommandLineProcessor
+from openvino.tools.utils import Path
+
+def collect_statistics():
+    with CommandLineProcessor.process() as configuration:
+        calibrator = CalibratorFactory.create(configuration.precision, CalibratorConfiguration(configuration))
+
+        print("Collecting FP32 statistics for {}...".format(configuration.model))
+        fp32_result = calibrator.infer(add_outputs=True, collect_aggregated_statistics=True)
+        print("FP32 accuracy: {0:.4f}%".format(100.0 * fp32_result.metrics.accuracy))
+
+        output_model_file_path = Path.get_model(configuration.output_model, "_statistics")
+        output_weights_file_path = Path.get_weights(configuration.output_weights, "_statistics")
+
+        quantization_levels = calibrator.get_quantization_levels(CalibrationConfigurationHelper.read_ignore_layer_names(configuration))
+        statistics = fp32_result.aggregated_statistics.get_node_statistics()
+        calibrator.save(output_model_file_path, output_weights_file_path, quantization_levels, statistics)
+        print("Network with statistics was written to {}.(xml|bin) IR file".format(os.path.splitext(output_model_file_path)[0]))
+
+if __name__ == '__main__':
+    collect_statistics()
diff --git a/model-optimizer/extensions/back/ConvolutionReshaper.py b/model-optimizer/extensions/back/ConvolutionReshaper.py
index 155d1eb98..9cbbb10ee 100644
--- a/model-optimizer/extensions/back/ConvolutionReshaper.py
+++ b/model-optimizer/extensions/back/ConvolutionReshaper.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,11 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
+from extensions.back.ReshapeMutation import ReshapeMutation
 from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
 from mo.ops.reshape import Reshape
 
 
@@ -30,6 +31,9 @@ class ConvolutionReshaper(BackReplacementPattern):
     """
     enabled = True
 
+    def run_before(self):
+        return [ReshapeMutation]
+
     @staticmethod
     def pattern():
         return dict(
@@ -39,7 +43,7 @@ class ConvolutionReshaper(BackReplacementPattern):
             edges=[]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         conv = match['conv']
 
         assert len(conv.out_nodes()) == 1, "Convolution operation {} should have 1 output data node".format(conv.id)
diff --git a/model-optimizer/extensions/back/CreateConstNodes.py b/model-optimizer/extensions/back/CreateConstNodes.py
new file mode 100644
index 000000000..8dce9e79f
--- /dev/null
+++ b/model-optimizer/extensions/back/CreateConstNodes.py
@@ -0,0 +1,84 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.back.replacement import BackReplacementPattern
+from mo.front.extractor import update_ie_fields
+from mo.graph.graph import *
+
+
+class CreateConstNodesReplacement(BackReplacementPattern):
+    enabled = False
+
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[
+                ('data', dict(kind='data'))
+            ],
+            edges=[]
+        )
+
+    @staticmethod
+    def _check_bin_attrs(node):
+        """Check that at least one output edge from node without 'bin' attribute."""
+        out_edges = node.out_edges()
+        bin_in_out_ports = ['bin' in edge for edge in out_edges]
+        out_node = [node.has('op') and node.op == 'OpOutput' for node in node.out_nodes()]
+        return np.any(out_node) or not np.all(bin_in_out_ports)
+
+    @staticmethod
+    def _check_that_node_from_body(node):
+        """Check that all output edges from node have 'internal_port_id'
+        (that shows that this node is from TI body)"""
+        n_ports = len(node.out_edges())
+        internal_port_in_out_ports = ['internal_port_id' in edge for edge in node.out_edges()]
+        return np.all(internal_port_in_out_ports) and n_ports
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        """
+            Adds layers with type 'Const' that produce blob from 'bin' file. The pass finds data nodes with one output which
+            doesn't have edge with 'bin' attribute (or with two outputs and at least one output havent 'bin' attr)
+            and generate Const op node before the node and data node before the Const node. The data node before 'Const'
+            node is needed because the op node dumps input tensors to bin file.
+        """
+        node = match['data']
+        if len(node.in_nodes()) > 0:
+            return
+
+        if self._check_bin_attrs(node):
+            if node.has_valid('value'):
+                const_node_name = graph.unique_id(node.id + '_const')
+                log.debug("Added Const node '{}'".format(const_node_name))
+                graph.add_node(const_node_name, name=const_node_name, type='Const', kind='op', op='Const',
+                               precision="FP32")
+                update_ie_fields(node.graph.node[const_node_name])
+                graph.add_edges_from([(const_node_name, node.id, {'out': 0})])
+
+                copy_data_node_name = graph.unique_id(node.id + '_copy_')
+                graph.add_node(copy_data_node_name, kind='data', precision="FP32", shape=np.array(node.shape),
+                               value=np.array(node.value))
+
+                if node.has_valid('force_precision'):
+                    Node(graph, copy_data_node_name)['force_precision'] = node.force_precision
+                    Node(graph, const_node_name)['force_precision'] = node.force_precision
+                graph.add_edges_from([(copy_data_node_name, const_node_name, {'in': 0, 'bin': 'custom'})])
+            elif not self._check_that_node_from_body(node):
+                log.debug('node = {}'.format(node.graph.node[node.id]))
+                raise Error(
+                    'Discovered data node without inputs and value, node.name = {}, consumer.name = {}. ' +
+                    refer_to_faq_msg(23),
+                    node.soft_get('name'),
+                    node.out_node().soft_get('name') if len(node.out_nodes()) else "<no consumer>"
+                )
diff --git a/model-optimizer/extensions/back/CreateConstNodes_test.py b/model-optimizer/extensions/back/CreateConstNodes_test.py
new file mode 100644
index 000000000..a0a0aec03
--- /dev/null
+++ b/model-optimizer/extensions/back/CreateConstNodes_test.py
@@ -0,0 +1,138 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+import numpy as np
+from extensions.back.CreateConstNodes import CreateConstNodesReplacement
+from mo.utils.unittest.graph import build_graph_with_attrs, compare_graphs
+
+
+class CreateConstNodesReplacementTest(unittest.TestCase):
+    nodes = [
+        ('data_node', {'kind': 'data', 'shape': None, 'value': None}),
+        ('next_node', {'kind': 'op'}),
+    ]
+    edges = [
+        ('data_node', 'next_node')
+    ]
+
+    new_nodes = [
+        ('const', {'kind': 'op', 'op': 'Const'}),
+        ('const_data', {'kind': 'data'})
+    ]
+    new_edges = [
+        ('const', 'data_node'),
+        ('const_data', 'const')
+    ]
+
+    def test_one_node(self):
+        """We should add Const node and data node."""
+        shape = np.array([2, 3, 4])
+        data = np.zeros(shape)
+        graph = build_graph_with_attrs(
+            nodes_with_attrs=self.nodes,
+            edges_with_attrs=self.edges,
+            update_nodes_attributes=[('data_node', {'shape': shape, 'value': data})]
+        )
+        graph_ref = build_graph_with_attrs(
+            nodes_with_attrs=self.nodes + self.new_nodes,
+            edges_with_attrs=self.edges + self.new_edges,
+            update_nodes_attributes=[('data_node', {'shape': shape, 'value': data}),
+                                     ('const_data', {'shape': shape, 'value': data})]
+        )
+        tested_pattern = CreateConstNodesReplacement()
+        tested_pattern.find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, last_node='next_node')
+        self.assertTrue(flag, resp)
+
+    def test_one_bin_node(self):
+        """Nothing should happen."""
+        shape = np.array([2, 3, 4])
+        data = np.zeros(shape)
+        graph = build_graph_with_attrs(
+            nodes_with_attrs=self.nodes,
+            edges_with_attrs=self.edges,
+            update_nodes_attributes=[('data_node', {'shape': shape, 'value': data})],
+            update_edge_attrs={('data_node', 'next_node', 0): {'bin': 0}},
+        )
+        tested_pattern = CreateConstNodesReplacement()
+        tested_pattern.find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph, last_node='next_node')
+        self.assertTrue(flag, resp)
+
+    def test_force_precision_parameter(self):
+        precision = 'FP16'
+        shape = np.array([2, 3, 4])
+        data = np.zeros(shape)
+        graph = build_graph_with_attrs(
+            nodes_with_attrs=self.nodes,
+            edges_with_attrs=self.edges,
+            update_nodes_attributes=[('data_node', {'shape': shape, 'value': data, 'force_precision': precision})]
+        )
+        graph_ref = build_graph_with_attrs(
+            nodes_with_attrs=self.nodes + self.new_nodes,
+            edges_with_attrs=self.edges + self.new_edges,
+            update_nodes_attributes=[('data_node', {'shape': shape, 'value': data}),
+                                     ('const_data', {'shape': shape, 'value': data, 'force_precision': precision}),
+                                     ('const', {'force_precision': precision})]
+        )
+        tested_pattern = CreateConstNodesReplacement()
+        tested_pattern.find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, last_node='next_node')
+        self.assertTrue(flag, resp)
+
+        #check that force precision was added to data and Const nodes
+        force_precision_const_node = graph.nodes['data_node_const']['force_precision']
+        force_precision_new_data = graph.nodes['data_node_copy_']['force_precision']
+        self.assertEqual(force_precision_const_node, precision)
+        self.assertEqual(force_precision_new_data, precision)
+
+    def test_two_nodes_with_bin(self):
+        """Test case for data node with 2 consumers with bin edge attr.
+        Nothing should happened."""
+        shape = np.array([2, 3, 4])
+        data = np.zeros(shape)
+        graph = build_graph_with_attrs(
+            nodes_with_attrs=self.nodes + [('next_node_2', {'kind': 'op'})],
+            edges_with_attrs=self.edges + [('data_node', 'next_node_2')],
+            update_nodes_attributes=[('data_node', {'shape': shape, 'value': data})],
+            update_edge_attrs={('data_node', 'next_node', 0): {'bin': 0}, ('data_node', 'next_node_2', 0): {'bin': 0}},
+        )
+        tested_pattern = CreateConstNodesReplacement()
+        tested_pattern.find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph, last_node='next_node')
+        self.assertTrue(flag, resp)
+
+    def test_two_nodes_one_bin(self):
+        """Test case for two output nodes, one with 'bin' parameter, other without."""
+        shape = np.array([2, 3, 4])
+        data = np.zeros(shape)
+        graph = build_graph_with_attrs(
+            nodes_with_attrs=self.nodes + [('next_node_2', {'kind': 'op'})],
+            edges_with_attrs=self.edges + [('data_node', 'next_node_2')],
+            update_nodes_attributes=[('data_node', {'shape': shape, 'value': data})],
+            update_edge_attrs={('data_node', 'next_node', 0): {'bin': 0}},
+        )
+        graph_ref = build_graph_with_attrs(
+            nodes_with_attrs=self.nodes + self.new_nodes + [('next_node_2', {'kind': 'op'})],
+            edges_with_attrs=self.edges + self.new_edges + [('data_node', 'next_node_2')],
+            update_nodes_attributes=[('data_node', {'shape': shape, 'value': data}),
+                                     ('const_data', {'shape': shape, 'value': data})]
+        )
+        tested_pattern = CreateConstNodesReplacement()
+        tested_pattern.find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, last_node='next_node')
+        self.assertTrue(flag, resp)
+
diff --git a/model-optimizer/extensions/back/DumpFakeQuantStat.py b/model-optimizer/extensions/back/DumpFakeQuantStat.py
new file mode 100644
index 000000000..b161ceb28
--- /dev/null
+++ b/model-optimizer/extensions/back/DumpFakeQuantStat.py
@@ -0,0 +1,57 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import networkx as nx
+import numpy as np
+
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Node
+from mo.middle.passes.eliminate import remove_op_nodes
+from mo.utils.graph import pseudo_topological_sort
+
+
+class DumpFakeQuantStat(BackReplacementPattern):
+    enabled = True
+
+    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+        intervals = {}
+        for n in pseudo_topological_sort(graph):
+            node = Node(graph, n)
+            if not node.has('op') or (node.op != 'FakeQuantWithMinMaxVars' and node.op != 'Quantize'):
+                continue
+            if node.op == 'Quantize':
+                # check if input range matches output range
+                low_match = np.all(node.in_node(1).value == node.in_node(3).value)
+                high_match = np.all(node.in_node(2).value == node.in_node(4).value)
+                if not low_match or not high_match:
+                    continue
+
+            prev_node = node.in_node().in_node()
+            prev_node_id = prev_node.id
+            prev_node_out_shape = prev_node.out_node()['shape']
+            C = prev_node_out_shape[1]
+            assert node.in_node(1).value.size == 1
+            assert node.in_node(2).value.size == 1
+            min = ', '.join([str(node.in_node(1).value.flatten()[0])] * C)
+            max = ', '.join([str(node.in_node(2).value.flatten()[0])] * C)
+            intervals[prev_node_id] = {'min': min, 'max': max}
+        if intervals:
+            if 'statistics' not in graph.graph:
+                graph.graph['statistics'] = intervals
+            else:
+                graph.graph['statistics'].update(intervals)
+            remove_op_nodes(graph, {'op': 'FakeQuantWithMinMaxVars'})
+            remove_op_nodes(graph, {'op': 'Quantize'})
diff --git a/model-optimizer/extensions/back/EltwiseBroadcast.py b/model-optimizer/extensions/back/EltwiseBroadcast.py
index a75974a0e..fce51d68c 100644
--- a/model-optimizer/extensions/back/EltwiseBroadcast.py
+++ b/model-optimizer/extensions/back/EltwiseBroadcast.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ import networkx as nx
 import numpy as np
 
 from mo.back.replacement import BackReplacementPattern
-from mo.graph.graph import unique_id, Node
+from mo.graph.graph import Node, Graph
 from mo.ops.tile import Tile
 
 
@@ -36,7 +36,7 @@ class EltwiseBroadcast(BackReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         node = match['op']
         shapes = [in_node.shape for _, in_node in node.in_nodes().items()]
         out_shape = node.out_node().shape
@@ -69,7 +69,7 @@ class EltwiseBroadcast(BackReplacementPattern):
                 if shapes[input_idx][i] == 1 and out_shape[i] > 1:
                     new_op = tile.create_node([input], dict(axis=i, tiles=out_shape[i]))
                     # add a data node following a new operation node
-                    data_id = unique_id(graph, node.name)
+                    data_id = graph.unique_id(node.name)
                     graph.add_node(data_id, kind='data', shape=None, value=None)
                     new_data = Node(graph, data_id)
                     graph.add_edge(new_op.id, new_data.id, **{'out': 0})
diff --git a/model-optimizer/extensions/back/EnableConstantStridedSlice.py b/model-optimizer/extensions/back/EnableConstantStridedSlice.py
new file mode 100644
index 000000000..2090d2d11
--- /dev/null
+++ b/model-optimizer/extensions/back/EnableConstantStridedSlice.py
@@ -0,0 +1,36 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
+
+
+class EnableConstantStridedSlice(BackReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['cmd_params'].keep_shape_ops]
+
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[('const_strided_slice', {'op': 'StridedSlice', 'type': lambda type: type != 'StridedSlice'}),
+                   ('data', {'kind': 'data', 'value': lambda value: value is not None})
+                   ],
+            edges=[('const_strided_slice', 'data')],
+        )
+
+    @staticmethod
+    def replace_pattern(graph: Graph, match: dict):
+        graph.node[match['const_strided_slice'].id]['type'] = 'StridedSlice'
diff --git a/model-optimizer/extensions/back/PackBinaryWeights.py b/model-optimizer/extensions/back/PackBinaryWeights.py
new file mode 100644
index 000000000..c1b8f63a9
--- /dev/null
+++ b/model-optimizer/extensions/back/PackBinaryWeights.py
@@ -0,0 +1,58 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+import networkx as nx
+import numpy as np
+
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Node, Graph
+from mo.ops.tile import Tile
+
+
+class PackBinaryWeights(BackReplacementPattern):
+    enabled = True
+
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[
+                ('op', dict(kind='op', type='BinaryConvolution'))],
+            edges=[]
+        )
+
+    @staticmethod
+    def replace_pattern(graph: Graph, match: dict):
+        conv = match['op']
+        assert len(conv.in_nodes()) == 2
+        weights = conv.in_port(1).data.get_value().flatten()
+        weights_rounded = np.round(weights)
+        assert np.all(np.isclose(weights, weights_rounded))
+        assert len(conv.in_node(1).out_nodes()) == 1
+        weights_rounded = np.array(weights_rounded, dtype=np.int32) + 1  # -1 --> 0
+        # Reversing element in chunks by 8 elements to pack bits correctly
+        # First need to pad data with necessary number of element to make the length dividable by 8
+        pad = (-len(weights_rounded))%8
+        weights_rounded = np.array(np.concatenate((weights_rounded, np.zeros([pad]))), dtype=np.int32)
+        assert len(weights_rounded) % 8 == 0
+        weights_rounded = weights_rounded.reshape([len(weights_rounded)//8, 8])
+        weights_rounded = np.flip(weights_rounded, axis=1)
+        weights_rounded = weights_rounded.flatten()
+        packed = np.packbits(weights_rounded)
+        conv.in_port(1).data.set_value(packed)
+        conv.in_node(1)['force_precision'] = 'uint8'
+        conv['packed_weights'] = 1
+\ No newline at end of file
diff --git a/model-optimizer/extensions/back/PermuteForReshape.py b/model-optimizer/extensions/back/PermuteForReshape.py
index f0f14c430..015dddea9 100644
--- a/model-optimizer/extensions/back/PermuteForReshape.py
+++ b/model-optimizer/extensions/back/PermuteForReshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@ from extensions.back.ConvolutionReshaper import ConvolutionReshaper
 from extensions.back.TileReshaper import TileReshaper
 from mo.back.replacement import BackReplacementPattern
 from mo.front.common.layout import get_width_dim, get_height_dim, get_features_dim, indices_mapping
+from mo.graph.graph import Graph
 from mo.ops.op import PermuteAttrs
 from mo.ops.permute import Permute
 
@@ -46,7 +47,7 @@ class PermuteForReshape(BackReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         reshape = match['reshape']
         assert len(reshape.in_nodes()) > 0
         if graph.graph['layout'] == 'NCHW' or reshape.has_and_set('nchw_layout') or\
diff --git a/model-optimizer/extensions/back/PermuteForReshape_test.py b/model-optimizer/extensions/back/PermuteForReshape_test.py
index 6efc48293..dc33d37bd 100644
--- a/model-optimizer/extensions/back/PermuteForReshape_test.py
+++ b/model-optimizer/extensions/back/PermuteForReshape_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/back/RNNSequenceTypeRename.py b/model-optimizer/extensions/back/RNNSequenceTypeRename.py
new file mode 100644
index 000000000..dda359909
--- /dev/null
+++ b/model-optimizer/extensions/back/RNNSequenceTypeRename.py
@@ -0,0 +1,40 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
+
+
+class RNNSequence(BackReplacementPattern):
+    """
+    This transform change type RNNSequence (internal MO type for all recurrent layers)
+    to correct operation name.
+    """
+    enabled = True
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('rnn_layer', {'type': 'RNNSequence'})
+            ],
+            edges=[]
+        )
+
+    _supported_ops = ['RNN', 'LSTM', 'GRU']
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        rnn_layer = match['rnn_layer']
+        assert rnn_layer['op'] in self._supported_ops
+        rnn_layer['type'] = rnn_layer['op'] + 'Sequence'
diff --git a/model-optimizer/extensions/back/ReshapeMutation.py b/model-optimizer/extensions/back/ReshapeMutation.py
new file mode 100644
index 000000000..e8365abab
--- /dev/null
+++ b/model-optimizer/extensions/back/ReshapeMutation.py
@@ -0,0 +1,89 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph, Node
+from mo.middle.pattern_match import for_each_sub_graph_recursively
+
+
+class ReshapeMutation(BackReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[('reshape', {'kind': 'op', 'type': 'Reshape'})],
+            edges=[],
+        )
+
+    @staticmethod
+    def replace_pattern(graph: Graph, match: dict):
+        reshape = match['reshape']
+        if hasattr(reshape, 'dim') and reshape.dim is not None:
+            reshape_inputs = reshape.in_nodes()
+            value = np.array(reshape.dim)
+            shape = np.array(value.shape)
+            del reshape.graph.node[reshape.id]['dim']
+
+            if 1 in reshape_inputs:
+                reshape_inputs[1].value = value
+                reshape_inputs[1].shape = shape
+            else:
+                const_id = graph.unique_id(reshape.id + '/DimData')
+                graph.add_node(const_id,
+                               **{'kind': 'data', 'value': value, 'shape': shape, 'name': reshape.id + '/DimData'})
+                graph.add_edge(const_id, reshape.id, **{'in': 1})
+
+
+class DisableReshapeMutationInTensorIterator(BackReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        return [ReshapeMutation]
+
+    @staticmethod
+    def add_supported_attrs_to_node(node: Node, params: list):
+        node.graph.node[node.id].update({
+            'IE': [(
+                'layer',
+                [('id', lambda node: node.node), 'name', 'precision', 'type'],
+                [
+                    ('data', params, []),
+                    '@ports',
+                    '@consts'])]
+        })
+
+    def reshapes_with_two_inputs_to_reshape_with_dim(self, graph: Graph):
+        reshapes = graph.get_op_nodes(op='Reshape')
+
+        for reshape in reshapes:
+            in_nodes = reshape.in_nodes()
+
+            if len(in_nodes) == 1:
+                continue
+            assert len(in_nodes) == 2, "Reshape operation should have 2 inputs or 1 input and `dim` attribute"
+
+            reshape['dim'] = reshape.in_port(1).get_connection().data.get_value()
+            reshape.in_port(1).disconnect()
+
+            params = [('dim', lambda node: ','.join(map(str, node['dim'])))]
+            self.add_supported_attrs_to_node(reshape, params)
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for_each_sub_graph_recursively(graph, self.reshapes_with_two_inputs_to_reshape_with_dim)
diff --git a/model-optimizer/extensions/back/ShufflenetReLUReorder.py b/model-optimizer/extensions/back/ShufflenetReLUReorder.py
index 234c78c62..2323d64a9 100644
--- a/model-optimizer/extensions/back/ShufflenetReLUReorder.py
+++ b/model-optimizer/extensions/back/ShufflenetReLUReorder.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,10 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
 
 
 class ShufflenetReLUReorder(BackReplacementPattern):
@@ -50,7 +50,7 @@ class ShufflenetReLUReorder(BackReplacementPattern):
                    ]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         relu = match['relu']
         reshape1 = match['reshape1']
         reshape2_data = match['reshape2_data']
diff --git a/model-optimizer/extensions/back/ShufflenetReLUReorder_test.py b/model-optimizer/extensions/back/ShufflenetReLUReorder_test.py
index 27c0f34cc..fd15e9b54 100644
--- a/model-optimizer/extensions/back/ShufflenetReLUReorder_test.py
+++ b/model-optimizer/extensions/back/ShufflenetReLUReorder_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/back/TileReshaper.py b/model-optimizer/extensions/back/TileReshaper.py
index 7c6e2d612..f1123c8f2 100644
--- a/model-optimizer/extensions/back/TileReshaper.py
+++ b/model-optimizer/extensions/back/TileReshaper.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,11 +13,12 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-import networkx as nx
+
 import numpy as np
 
 from extensions.back.EltwiseBroadcast import EltwiseBroadcast
 from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
 from mo.ops.reshape import Reshape
 
 
@@ -37,7 +38,7 @@ class TileReshaper(BackReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         """
         Workarounds not supported type of Tile in Inference Engine (Tiles are supported for 2-D or 4-D tensors):
         Searches for Tiles with 3D shapes and covers it with Reshapes.
diff --git a/model-optimizer/extensions/back/TileReshaper_test.py b/model-optimizer/extensions/back/TileReshaper_test.py
index 5c432190e..2fac84f9f 100644
--- a/model-optimizer/extensions/back/TileReshaper_test.py
+++ b/model-optimizer/extensions/back/TileReshaper_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/back/disable_unsupported_ND_operations.py b/model-optimizer/extensions/back/disable_unsupported_ND_operations.py
index f657bc7e4..2b6283052 100644
--- a/model-optimizer/extensions/back/disable_unsupported_ND_operations.py
+++ b/model-optimizer/extensions/back/disable_unsupported_ND_operations.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 
 from mo.back.replacement import BackReplacementPattern
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 
 
@@ -29,7 +29,7 @@ class DisableUnsupportedNDOperations(BackReplacementPattern):
 
     unsupported_operations = ['Convolution', 'Deconvolution', 'Pooling']
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def find_and_replace_pattern(self, graph: Graph):
         unsupported_nodes = []
         for node in graph.nodes():
             node = Node(graph, node)
diff --git a/model-optimizer/extensions/back/insert_compatibility_l2normalization.py b/model-optimizer/extensions/back/insert_compatibility_l2normalization.py
index 4f4dfe9fc..994b5af5a 100644
--- a/model-optimizer/extensions/back/insert_compatibility_l2normalization.py
+++ b/model-optimizer/extensions/back/insert_compatibility_l2normalization.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import numpy as np
 import networkx as nx
 from mo.ops.op import Op
-from mo.graph.graph import create_edge
+from mo.graph.graph import Graph
 from mo.back.replacement import BackReplacementPattern
 
 
@@ -32,7 +32,7 @@ class CompatibilityL2NormalizationPattern(BackReplacementPattern):
             ],
             edges=[])
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         """
         Adds Normalize layer weights, which are required by Inference Engine, 
         but do not always exist in MXNet model. 
@@ -42,7 +42,7 @@ class CompatibilityL2NormalizationPattern(BackReplacementPattern):
         
         Parameters
         ----------
-        graph : nx.MultiDiGraph
+        graph : Graph
            Graph with loaded model.
          match : dict
            Patterns which were found in graph structure.
@@ -51,4 +51,4 @@ class CompatibilityL2NormalizationPattern(BackReplacementPattern):
         if len(l2_normalization_node.in_nodes()) < 2:
             value = np.full([l2_normalization_node.in_node(0).shape[1]], 1.0, dtype=np.float32)
             weights_node = Op.create_input_data_node(graph, name=l2_normalization_node['name'] + '_weights', value=value)
-            create_edge(weights_node, l2_normalization_node, out_port=0, in_port=1, edge_attrs={'bin': 'weights'})
+            graph.create_edge(weights_node, l2_normalization_node, out_port=0, in_port=1, edge_attrs={'bin': 'weights'})
diff --git a/model-optimizer/extensions/back/insert_compatibility_l2normalization_test.py b/model-optimizer/extensions/back/insert_compatibility_l2normalization_test.py
index a1296ac09..2179339e5 100644
--- a/model-optimizer/extensions/back/insert_compatibility_l2normalization_test.py
+++ b/model-optimizer/extensions/back/insert_compatibility_l2normalization_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/back/kaldi_remove_memory_output.py b/model-optimizer/extensions/back/kaldi_remove_memory_output.py
index 72e4cb4f2..3892635ad 100644
--- a/model-optimizer/extensions/back/kaldi_remove_memory_output.py
+++ b/model-optimizer/extensions/back/kaldi_remove_memory_output.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
 
 
 class KaldiRemoveMemoryOutputBackReplacementPattern(BackReplacementPattern):
@@ -26,16 +25,18 @@ class KaldiRemoveMemoryOutputBackReplacementPattern(BackReplacementPattern):
     def pattern():
         return dict(
             nodes=[
-                ('memory_node', dict(kind='op', op='Memory')),
-                ('data_node', dict(kind='data'))
+                ('memory_node', dict(op='Memory')),
+                ('data_node', dict(kind='data')),
+                ('op_output', dict(op='OpOutput'))
             ],
             edges=[
-                ('memory_node', 'data_node', {'out': 0})
+                ('memory_node', 'data_node'),
+                ('data_node', 'op_output')
             ]
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         """
         Need to find the pattern: Memory -> Data -> OpOutput
 
@@ -47,7 +48,7 @@ class KaldiRemoveMemoryOutputBackReplacementPattern(BackReplacementPattern):
 
         Parameters
         ----------
-        graph : nx.MultiDiGraph
+        graph : Graph
            Graph with loaded model.
         match : dict
            Patterns which were found in graph structure.
@@ -55,8 +56,5 @@ class KaldiRemoveMemoryOutputBackReplacementPattern(BackReplacementPattern):
         memory = match['memory_node']
         data = match['data_node']
 
-        # Those Memory nodes that are not output ones, should not be replaced
-        if not data.has_and_set('is_output'):
-            return
         graph.remove_edge(memory.id, data.id)
         graph.remove_node(data.id)
diff --git a/model-optimizer/extensions/back/kaldi_remove_memory_output_test.py b/model-optimizer/extensions/back/kaldi_remove_memory_output_test.py
index c72351c23..12269c686 100644
--- a/model-optimizer/extensions/back/kaldi_remove_memory_output_test.py
+++ b/model-optimizer/extensions/back/kaldi_remove_memory_output_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -31,21 +31,28 @@ class KaldiRemoveMemoryOutputTest(unittest.TestCase):
         },
         'output_node': {
             'kind': 'data'
+        },
+        'op_output': {
+            'kind': 'data',
+            'op': 'OpOutput',
         }
     }
 
     def test_remove_out_data_for_memory(self):
-        graph = build_graph(self.nodes, [('input_node', 'memory_node')])
-        # Need for matching in pattern. The edge memory_node->out_node must contain only the attribute 'out' = 0
-        # build_graph creates edge  memory_node->out_node with attributes 'in' and 'out'
-        graph.add_node('output_node', is_output=True, **self.nodes['output_node'])
-        graph.add_edge('memory_node', 'output_node', out=0)
+        graph = build_graph(self.nodes,
+                            [
+                                ('input_node', 'memory_node'),
+                                ('memory_node', 'output_node'),
+                                ('output_node', 'op_output')
+                            ])
         KaldiRemoveMemoryOutputBackReplacementPattern().find_and_replace_pattern(graph)
         self.assertNotIn('output_node', graph.node)
 
     def test_do_not_remove_out_data_for_memory(self):
-        graph = build_graph(self.nodes, [('input_node', 'memory_node')])
-        graph.add_node('output_node', **self.nodes['output_node'])
-        graph.add_edge('memory_node', 'output_node', out=0)
+        graph = build_graph(self.nodes,
+                            [
+                                ('input_node', 'memory_node'),
+                                ('memory_node', 'output_node'),
+                            ])
         KaldiRemoveMemoryOutputBackReplacementPattern().find_and_replace_pattern(graph)
         self.assertIn('output_node', graph.node)
diff --git a/model-optimizer/extensions/back/remove_last_softmax_pattern.py b/model-optimizer/extensions/back/remove_last_softmax_pattern.py
index 488e16143..243274c61 100644
--- a/model-optimizer/extensions/back/remove_last_softmax_pattern.py
+++ b/model-optimizer/extensions/back/remove_last_softmax_pattern.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 import networkx as nx
 
 from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph
 from mo.middle.passes.eliminate import remove_op_node_with_data_node
 
 
@@ -27,27 +28,22 @@ class RemoveLastSoftMaxPattern(BackReplacementPattern):
     def pattern():
         return dict(
             nodes=[
-                ('softmax_node', dict(kind='op', op='SoftMax'))
+                ('softmax_node', dict(op='SoftMax')),
+                ('softmax_data', dict(kind='data')),
+                ('op_output', dict(op='OpOutput'))
             ],
-            edges=[]
+            edges=[
+                ('softmax_node', 'softmax_data'),
+                ('softmax_data', 'op_output')
+            ]
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         """
-        Need to find the pattern: Parent (any type) -> SoftMAx -> OpOutput
-
-        It is needed to remove output SoftMAx layer
-
-        Parameters
-        ----------
-        graph : nx.MultiDiGraph
-           Graph with loaded model.
-        match : dict
-           Patterns which were found in graph structure.
+        Removes output SoftMax layer
+        :param graph: graph to operate on
+        :param match: dictionary with matched nodes
         """
-        softmax = match['softmax_node']
-        child = softmax.out_node()
-        if not child.has_and_set('is_output'):
-            return
-        remove_op_node_with_data_node(graph, softmax)
+        if len(match['softmax_data'].out_nodes()) == 1:
+            remove_op_node_with_data_node(graph, match['softmax_node'])
diff --git a/model-optimizer/extensions/back/remove_last_softmax_test.py b/model-optimizer/extensions/back/remove_last_softmax_test.py
index 29a01737a..dd73f13a9 100644
--- a/model-optimizer/extensions/back/remove_last_softmax_test.py
+++ b/model-optimizer/extensions/back/remove_last_softmax_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -31,14 +31,19 @@ class KaldiRemoveLastSoftMaxTest(unittest.TestCase):
         },
         'output_node': {
             'kind': 'data'
+        },
+        'op_output': {
+            'kind': 'op',
+            'op': 'OpOutput'
         }
     }
 
     def test_remove_last_SoftMax(self):
         graph = build_graph(self.nodes, [
             ('input_node', 'softmax_node'),
-            ('softmax_node', 'output_node')
-        ], {'output_node': {'is_output': True}})
+            ('softmax_node', 'output_node'),
+            ('output_node', 'op_output')
+        ])
         RemoveLastSoftMaxPattern().find_and_replace_pattern(graph)
         self.assertNotIn('softmax_node', graph.node)
 
diff --git a/model-optimizer/extensions/front/LRNReplacer.py b/model-optimizer/extensions/front/LRNReplacer.py
index 111b5983a..b844a8780 100644
--- a/model-optimizer/extensions/front/LRNReplacer.py
+++ b/model-optimizer/extensions/front/LRNReplacer.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@ import numpy as np
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Graph
 from mo.ops.lin_op import Mul
 from mo.ops.const import Const
 
@@ -26,7 +27,7 @@ class LRNReplacer(FrontReplacementOp):
     op = 'LRN'
     enabled = True
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         node = match['op']
 
         if not node.has_valid('bias') or (node.has_valid('bias') and node.bias == 1):
diff --git a/model-optimizer/extensions/front/Pack.py b/model-optimizer/extensions/front/Pack.py
index a7defbaf7..160539e5c 100644
--- a/model-optimizer/extensions/front/Pack.py
+++ b/model-optimizer/extensions/front/Pack.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,11 +13,8 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-import networkx as nx
-
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.concat import Concat
 from mo.ops.const import Const
 from mo.ops.expand_dims import ExpandDims
@@ -27,14 +24,16 @@ class Pack(FrontReplacementOp):
     op = "Pack"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         expand_dims_nodes = list()
         expand_axis_node = Const(graph, dict(value=node.axis)).create_node([])
         for ind, edge_attrs in node.in_edges().items():
             expand_dims_nodes.append(ExpandDims(graph, dict(name=node.name + '/ExpandDims_')).
                                      create_node([(node.in_node(ind), edge_attrs['out']), expand_axis_node]))
 
-        out_node = Concat(graph, dict(name=node.name + '/Concat_', axis=node.axis)).create_node(expand_dims_nodes)
+        out_node = Concat(graph, dict(name=node.name + '/Concat_',
+                                      axis=node.axis,
+                                      in_ports_count=len(expand_dims_nodes))).create_node(expand_dims_nodes)
         # Replace edge from out port 0 of the matched node with a edge from node out_node.id with port 0.
         # The "explicit" version of the return value is: [(out_node.id, 0)])
         return [out_node.id]
diff --git a/model-optimizer/extensions/front/caffe/accum_ext.py b/model-optimizer/extensions/front/caffe/accum_ext.py
index 9c185cd16..1dc74c55b 100644
--- a/model-optimizer/extensions/front/caffe/accum_ext.py
+++ b/model-optimizer/extensions/front/caffe/accum_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/accum_ext_test.py b/model-optimizer/extensions/front/caffe/accum_ext_test.py
index f67e745c9..ac65ad2f2 100644
--- a/model-optimizer/extensions/front/caffe/accum_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/accum_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/argmax_ext.py b/model-optimizer/extensions/front/caffe/argmax_ext.py
index dc5f927af..69946ea4e 100644
--- a/model-optimizer/extensions/front/caffe/argmax_ext.py
+++ b/model-optimizer/extensions/front/caffe/argmax_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/argmax_ext_test.py b/model-optimizer/extensions/front/caffe/argmax_ext_test.py
index 39547d1c8..723084400 100644
--- a/model-optimizer/extensions/front/caffe/argmax_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/argmax_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/axpy.py b/model-optimizer/extensions/front/caffe/axpy.py
index e5f575982..88ef5c07e 100644
--- a/model-optimizer/extensions/front/caffe/axpy.py
+++ b/model-optimizer/extensions/front/caffe/axpy.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.lin_op import Add
 from mo.ops.scale_shift import ScaleShiftOp
 
@@ -29,7 +27,7 @@ class AxpyToEltwise(FrontReplacementOp):
     op = "Axpy"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         in_node_0 = node.in_node(0)
         in_node_1 = node.in_node(1)
         in_node_2 = node.in_node(2)
diff --git a/model-optimizer/extensions/front/caffe/axpy_test.py b/model-optimizer/extensions/front/caffe/axpy_test.py
index 01e535cdf..6cd0bf2fe 100644
--- a/model-optimizer/extensions/front/caffe/axpy_test.py
+++ b/model-optimizer/extensions/front/caffe/axpy_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/bias_ext.py b/model-optimizer/extensions/front/caffe/bias_ext.py
new file mode 100644
index 000000000..8cce76b01
--- /dev/null
+++ b/model-optimizer/extensions/front/caffe/bias_ext.py
@@ -0,0 +1,37 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.caffe.extractors.utils import embed_input
+from mo.front.extractor import FrontExtractorOp
+from mo.graph.graph import Node
+from mo.ops.lin_op import Add
+
+
+class BiasToAdd(FrontExtractorOp):
+    """
+    Replaces Bias layer with Eltwise.
+    """
+    op = "Bias"
+    enabled = True
+
+    @staticmethod
+    def extract(node: Node):
+        attrs = {'axis': node.pb.bias_param.axis}
+        embed_input(attrs, 1, 'bias', node.model_pb.blobs[0].data, 'biases')
+
+        Add.update_node_stat(node, attrs)
+
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/caffe/bias_ext_test.py b/model-optimizer/extensions/front/caffe/bias_ext_test.py
new file mode 100644
index 000000000..869aae89f
--- /dev/null
+++ b/model-optimizer/extensions/front/caffe/bias_ext_test.py
@@ -0,0 +1,46 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+from unittest.mock import patch
+
+from extensions.front.caffe.bias_ext import BiasToAdd
+from mo.utils.unittest.extractors import FakeModelLayer, FakeMultiParam
+from mo.utils.unittest.graph import FakeNode
+
+
+class FakeBiasProtoLayer:
+    def __init__(self, val):
+        self.bias_param = val
+
+
+class TestBias(unittest.TestCase):
+
+    @patch('extensions.front.caffe.bias_ext.embed_input')
+    def test_bias(self, embed_input_mock):
+        embed_input_mock.return_value = {}
+        params = {'axis': 1}
+        add_node = FakeNode(FakeBiasProtoLayer(FakeMultiParam(params)),
+                            FakeModelLayer([1, 2, 3, 4, 5]))
+        BiasToAdd.extract(add_node)
+
+        exp_res = {
+            'type': "Eltwise",
+            'operation': 'sum',
+            'axis': 1
+        }
+
+        for key in exp_res.keys():
+            self.assertEqual(add_node[key], exp_res[key])
diff --git a/model-optimizer/extensions/front/caffe/binarization.py b/model-optimizer/extensions/front/caffe/binarization.py
new file mode 100644
index 000000000..ba6957342
--- /dev/null
+++ b/model-optimizer/extensions/front/caffe/binarization.py
@@ -0,0 +1,43 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.ops.quantize import QuantizeOp
+from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Node, Graph
+from mo.ops.const import Const
+
+
+class BinarizationToQuantize(FrontReplacementOp):
+    """
+    Replaces Binarization layer with Quantize.
+    """
+    op = "Binarization"
+    enabled = True
+
+    def replace_op(self, graph: Graph, node: Node):
+        in_node_0 = node.in_node(0)
+
+        broadcast = lambda x: np.array([x], dtype=np.float32)
+        threshold = Const(graph, {'name': node.id + "/Input_1", "value": broadcast(0)}).create_node()
+        in_1 = threshold
+        in_2 = threshold
+        in_3 = Const(graph, {'name': node.id + "/Input_3", "value": broadcast(-1)}).create_node()
+        in_4 = Const(graph, {'name': node.id + "/Input_4", "value": broadcast(+1)}).create_node()
+        quant = QuantizeOp(graph, {'name': node.id + "/Quantize_", "levels": 2}).create_node(
+            inputs=[in_node_0, in_1, in_2, in_3, in_4])
+
+        return [quant.id]
diff --git a/model-optimizer/extensions/front/caffe/binary_conv_ext.py b/model-optimizer/extensions/front/caffe/binary_conv_ext.py
new file mode 100644
index 000000000..4ba74b5a3
--- /dev/null
+++ b/model-optimizer/extensions/front/caffe/binary_conv_ext.py
@@ -0,0 +1,55 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.front.caffe.conv_ext import conv_create_attrs, conv_set_params
+from mo.front.caffe.extractors.utils import weights_biases
+from mo.front.common.extractors.utils import layout_attrs
+from mo.front.extractor import FrontExtractorOp
+from mo.ops.convolution import Convolution
+from mo.utils.error import Error
+
+
+class ConvFrontExtractor(FrontExtractorOp):
+    op = 'ConvolutionBinary'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        proto_layer, model_layer = node.pb, node.model_pb
+
+        if not proto_layer:
+            raise Error('Protobuf layer can not be empty')
+
+        conv_param = proto_layer.convolution_param
+        conv_type = 'ConvND' if len(proto_layer.bottom) > 1 else 'Conv2D'
+
+        params = conv_set_params(conv_param, conv_type)
+        attrs = conv_create_attrs(params)
+        attrs.update({'op': __class__.op,
+                      'get_group': lambda node: node.group,
+                      'get_output_feature_dim': lambda node: node.output
+                      })
+
+        # Embed weights and biases as attributes
+        # It will be moved to a separate nodes in special pass
+        attrs.update(
+            weights_biases(conv_param.bias_term, model_layer, start_index=len(proto_layer.bottom), proto=conv_param))
+        attrs.update(layout_attrs())
+
+        # update the attributes of the node
+        Convolution.update_node_stat(node, attrs)
+        return __class__.enabled
+
diff --git a/model-optimizer/extensions/front/caffe/bn.py b/model-optimizer/extensions/front/caffe/bn.py
index 06ad48629..01e52f4d3 100644
--- a/model-optimizer/extensions/front/caffe/bn.py
+++ b/model-optimizer/extensions/front/caffe/bn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,12 +14,11 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from mo.front.caffe.extractors.utils import embed_input
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.scale_shift import ScaleShiftOp
 from mo.utils.error import Error
 
@@ -31,7 +30,7 @@ class BNToScaleShift(FrontReplacementOp):
     op = "BN"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         attrs = {'name': node.id + "/ScaleShift_"}
 
         param = graph.node[node.id]['pb'].bn_param
diff --git a/model-optimizer/extensions/front/caffe/bn_test.py b/model-optimizer/extensions/front/caffe/bn_test.py
index f075e5033..ac4ecace1 100644
--- a/model-optimizer/extensions/front/caffe/bn_test.py
+++ b/model-optimizer/extensions/front/caffe/bn_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/conv_ext.py b/model-optimizer/extensions/front/caffe/conv_ext.py
index 8146917ed..dfd9ed691 100644
--- a/model-optimizer/extensions/front/caffe/conv_ext.py
+++ b/model-optimizer/extensions/front/caffe/conv_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/conv_ext_test.py b/model-optimizer/extensions/front/caffe/conv_ext_test.py
index 49c8b0b4a..22d7d00c6 100644
--- a/model-optimizer/extensions/front/caffe/conv_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/conv_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/correlation_ext.py b/model-optimizer/extensions/front/caffe/correlation_ext.py
index c05e04c9a..066e973a2 100644
--- a/model-optimizer/extensions/front/caffe/correlation_ext.py
+++ b/model-optimizer/extensions/front/caffe/correlation_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/correlation_ext_test.py b/model-optimizer/extensions/front/caffe/correlation_ext_test.py
index de4b74ca7..3ee600604 100644
--- a/model-optimizer/extensions/front/caffe/correlation_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/correlation_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext.py b/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext.py
index 3073128a9..37c1f2e48 100644
--- a/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext.py
+++ b/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext_test.py b/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext_test.py
index 07b724ed0..a01f40548 100644
--- a/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/ctcgreedydecoder_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/data_augmentation_ext.py b/model-optimizer/extensions/front/caffe/data_augmentation_ext.py
index f7769e729..12f8ad711 100644
--- a/model-optimizer/extensions/front/caffe/data_augmentation_ext.py
+++ b/model-optimizer/extensions/front/caffe/data_augmentation_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/data_augmentation_ext_test.py b/model-optimizer/extensions/front/caffe/data_augmentation_ext_test.py
index 0dd0abadf..4524ff842 100644
--- a/model-optimizer/extensions/front/caffe/data_augmentation_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/data_augmentation_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/detection_output.py b/model-optimizer/extensions/front/caffe/detection_output.py
index 296fcf316..57f336a51 100644
--- a/model-optimizer/extensions/front/caffe/detection_output.py
+++ b/model-optimizer/extensions/front/caffe/detection_output.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -140,6 +140,8 @@ class DetectionOutputFrontExtractor(FrontExtractorOp):
             attrs['input_height'] = param.input_height
         if 'normalized' in fields:
             attrs['normalized'] = int(param.normalized)
+        if 'objectness_score' in fields:
+            attrs['objectness_score'] = param.objectness_score
 
         mapping_rule = merge_attrs(param, attrs)
 
diff --git a/model-optimizer/extensions/front/caffe/flatten_ext.py b/model-optimizer/extensions/front/caffe/flatten_ext.py
index a68d81c40..19d4aef09 100644
--- a/model-optimizer/extensions/front/caffe/flatten_ext.py
+++ b/model-optimizer/extensions/front/caffe/flatten_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/grn_ext.py b/model-optimizer/extensions/front/caffe/grn_ext.py
index 4b4cd97f3..57bf40560 100644
--- a/model-optimizer/extensions/front/caffe/grn_ext.py
+++ b/model-optimizer/extensions/front/caffe/grn_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/grn_ext_test.py b/model-optimizer/extensions/front/caffe/grn_ext_test.py
index e284a8ad7..9eeba17dc 100644
--- a/model-optimizer/extensions/front/caffe/grn_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/grn_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/interp_ext.py b/model-optimizer/extensions/front/caffe/interp_ext.py
index ae8a8da55..9bfb33c06 100644
--- a/model-optimizer/extensions/front/caffe/interp_ext.py
+++ b/model-optimizer/extensions/front/caffe/interp_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/interp_ext_test.py b/model-optimizer/extensions/front/caffe/interp_ext_test.py
index ecbf11401..be17dcbeb 100644
--- a/model-optimizer/extensions/front/caffe/interp_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/interp_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/mvn_ext.py b/model-optimizer/extensions/front/caffe/mvn_ext.py
index a34e007be..cc4fb2658 100644
--- a/model-optimizer/extensions/front/caffe/mvn_ext.py
+++ b/model-optimizer/extensions/front/caffe/mvn_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/normalize_ext.py b/model-optimizer/extensions/front/caffe/normalize_ext.py
index bc411b743..1202f3b55 100644
--- a/model-optimizer/extensions/front/caffe/normalize_ext.py
+++ b/model-optimizer/extensions/front/caffe/normalize_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/normalize_ext_test.py b/model-optimizer/extensions/front/caffe/normalize_ext_test.py
index 4b2c42fd9..01d6f085c 100644
--- a/model-optimizer/extensions/front/caffe/normalize_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/normalize_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/pooling_ext.py b/model-optimizer/extensions/front/caffe/pooling_ext.py
index 96540a171..b48324eca 100644
--- a/model-optimizer/extensions/front/caffe/pooling_ext.py
+++ b/model-optimizer/extensions/front/caffe/pooling_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/pooling_ext_test.py b/model-optimizer/extensions/front/caffe/pooling_ext_test.py
index f391d93bd..ec3e74ff8 100644
--- a/model-optimizer/extensions/front/caffe/pooling_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/pooling_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/power_file_ext.py b/model-optimizer/extensions/front/caffe/power_file_ext.py
index cba120be0..62843695c 100644
--- a/model-optimizer/extensions/front/caffe/power_file_ext.py
+++ b/model-optimizer/extensions/front/caffe/power_file_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/power_file_ext_test.py b/model-optimizer/extensions/front/caffe/power_file_ext_test.py
index da06fc66e..37b04ced7 100644
--- a/model-optimizer/extensions/front/caffe/power_file_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/power_file_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/prelu_ext.py b/model-optimizer/extensions/front/caffe/prelu_ext.py
index 40cff272c..039ace3e1 100644
--- a/model-optimizer/extensions/front/caffe/prelu_ext.py
+++ b/model-optimizer/extensions/front/caffe/prelu_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/prelu_ext_test.py b/model-optimizer/extensions/front/caffe/prelu_ext_test.py
index 2ed4370c5..fb0a167c0 100644
--- a/model-optimizer/extensions/front/caffe/prelu_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/prelu_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/priorbox_clustered_ext.py b/model-optimizer/extensions/front/caffe/priorbox_clustered_ext.py
index 68e98a49f..959bdd1a0 100644
--- a/model-optimizer/extensions/front/caffe/priorbox_clustered_ext.py
+++ b/model-optimizer/extensions/front/caffe/priorbox_clustered_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/priorbox_clustered_ext_test.py b/model-optimizer/extensions/front/caffe/priorbox_clustered_ext_test.py
index 4ce3e3233..8b0261759 100644
--- a/model-optimizer/extensions/front/caffe/priorbox_clustered_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/priorbox_clustered_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/priorbox_ext.py b/model-optimizer/extensions/front/caffe/priorbox_ext.py
index ae87dc444..c13e8281f 100644
--- a/model-optimizer/extensions/front/caffe/priorbox_ext.py
+++ b/model-optimizer/extensions/front/caffe/priorbox_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/priorbox_ext_test.py b/model-optimizer/extensions/front/caffe/priorbox_ext_test.py
index b93a8831f..23f46d998 100644
--- a/model-optimizer/extensions/front/caffe/priorbox_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/priorbox_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/proposal_ext.py b/model-optimizer/extensions/front/caffe/proposal_ext.py
index 059e84377..5ecfde55e 100644
--- a/model-optimizer/extensions/front/caffe/proposal_ext.py
+++ b/model-optimizer/extensions/front/caffe/proposal_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/proposal_ext_test.py b/model-optimizer/extensions/front/caffe/proposal_ext_test.py
index ff41fb0ab..edb9f31d4 100644
--- a/model-optimizer/extensions/front/caffe/proposal_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/proposal_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/proposal_python_ext.py b/model-optimizer/extensions/front/caffe/proposal_python_ext.py
index 364611b3a..3db451fcb 100644
--- a/model-optimizer/extensions/front/caffe/proposal_python_ext.py
+++ b/model-optimizer/extensions/front/caffe/proposal_python_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/proposal_python_ext_test.py b/model-optimizer/extensions/front/caffe/proposal_python_ext_test.py
index d47f2b76a..4c3ac4d48 100644
--- a/model-optimizer/extensions/front/caffe/proposal_python_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/proposal_python_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/psroipooling_ext.py b/model-optimizer/extensions/front/caffe/psroipooling_ext.py
index 9dead6dcd..ffaef798a 100644
--- a/model-optimizer/extensions/front/caffe/psroipooling_ext.py
+++ b/model-optimizer/extensions/front/caffe/psroipooling_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/psroipooling_ext_test.py b/model-optimizer/extensions/front/caffe/psroipooling_ext_test.py
index f1752788b..5da3c7d0f 100644
--- a/model-optimizer/extensions/front/caffe/psroipooling_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/psroipooling_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/regionyolo_ext.py b/model-optimizer/extensions/front/caffe/regionyolo_ext.py
index ca28c1453..22bde0805 100644
--- a/model-optimizer/extensions/front/caffe/regionyolo_ext.py
+++ b/model-optimizer/extensions/front/caffe/regionyolo_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/regionyolo_ext_test.py b/model-optimizer/extensions/front/caffe/regionyolo_ext_test.py
index 8c37989a3..56e451f75 100644
--- a/model-optimizer/extensions/front/caffe/regionyolo_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/regionyolo_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/reorgyolo_ext.py b/model-optimizer/extensions/front/caffe/reorgyolo_ext.py
index d6ee37429..57bc30be8 100644
--- a/model-optimizer/extensions/front/caffe/reorgyolo_ext.py
+++ b/model-optimizer/extensions/front/caffe/reorgyolo_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/reorgyolo_ext_test.py b/model-optimizer/extensions/front/caffe/reorgyolo_ext_test.py
index 502c5ad99..f5939def4 100644
--- a/model-optimizer/extensions/front/caffe/reorgyolo_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/reorgyolo_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/resample_ext.py b/model-optimizer/extensions/front/caffe/resample_ext.py
index 8e8bcb5c2..84f72e5d7 100644
--- a/model-optimizer/extensions/front/caffe/resample_ext.py
+++ b/model-optimizer/extensions/front/caffe/resample_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/resample_ext_test.py b/model-optimizer/extensions/front/caffe/resample_ext_test.py
index c1fc3d6aa..3e56de70f 100644
--- a/model-optimizer/extensions/front/caffe/resample_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/resample_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/shufflechannel_ext.py b/model-optimizer/extensions/front/caffe/shufflechannel_ext.py
index 37b72219f..81ffcf8ee 100644
--- a/model-optimizer/extensions/front/caffe/shufflechannel_ext.py
+++ b/model-optimizer/extensions/front/caffe/shufflechannel_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/simplernms_ext.py b/model-optimizer/extensions/front/caffe/simplernms_ext.py
index 2d9cbaf8f..5ad99796a 100644
--- a/model-optimizer/extensions/front/caffe/simplernms_ext.py
+++ b/model-optimizer/extensions/front/caffe/simplernms_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/simplernms_ext_test.py b/model-optimizer/extensions/front/caffe/simplernms_ext_test.py
index 06b298bfc..8ce238c7f 100644
--- a/model-optimizer/extensions/front/caffe/simplernms_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/simplernms_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/softmax_ext.py b/model-optimizer/extensions/front/caffe/softmax_ext.py
index 6bb8d74ff..972c11324 100644
--- a/model-optimizer/extensions/front/caffe/softmax_ext.py
+++ b/model-optimizer/extensions/front/caffe/softmax_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/spatial_transformer_ext.py b/model-optimizer/extensions/front/caffe/spatial_transformer_ext.py
index fc27ded27..842b2a147 100644
--- a/model-optimizer/extensions/front/caffe/spatial_transformer_ext.py
+++ b/model-optimizer/extensions/front/caffe/spatial_transformer_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/spatial_transformer_ext_test.py b/model-optimizer/extensions/front/caffe/spatial_transformer_ext_test.py
index 9039cda91..8747867e4 100644
--- a/model-optimizer/extensions/front/caffe/spatial_transformer_ext_test.py
+++ b/model-optimizer/extensions/front/caffe/spatial_transformer_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/caffe/split_to_identity.py b/model-optimizer/extensions/front/caffe/split_to_identity.py
index d46c1c31d..189139b11 100644
--- a/model-optimizer/extensions/front/caffe/split_to_identity.py
+++ b/model-optimizer/extensions/front/caffe/split_to_identity.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Graph
 
 
 class SplitToIdentity(FrontReplacementOp):
@@ -31,7 +30,7 @@ class SplitToIdentity(FrontReplacementOp):
     op = "Split"
     enabled = True
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         split_node = match['op']
         split_node.op = 'Identity'
         for u, v, edge_attrs in split_node.graph.out_edges(split_node.id, data=True):
diff --git a/model-optimizer/extensions/front/create_tensor_nodes.py b/model-optimizer/extensions/front/create_tensor_nodes.py
new file mode 100644
index 000000000..2417e9153
--- /dev/null
+++ b/model-optimizer/extensions/front/create_tensor_nodes.py
@@ -0,0 +1,34 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.front.extractor import create_tensor_nodes
+from mo.graph.graph import Graph
+
+
+class CreateTensorNodes(FrontReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_before(self):
+        return []
+
+    def run_after(self):
+        from extensions.front.pass_separator import FrontFinish
+        return [FrontFinish]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        create_tensor_nodes(graph)
+        graph.stage = 'middle'
diff --git a/model-optimizer/mo/ops/div.py b/model-optimizer/extensions/front/div.py
index 4f39e4c5c..9509d79e1 100644
--- a/model-optimizer/mo/ops/div.py
+++ b/model-optimizer/extensions/front/div.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,10 +15,9 @@
 """
 
 import numpy as np
-import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.eltwise import Eltwise
 from mo.ops.power import Power
 
@@ -27,13 +26,15 @@ class Div(FrontReplacementOp):
     op = "Div"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
-        reciprocal = Power(graph, dict(scale=1, power=np.float64(-1), shift=0, name=node.name + '/reciprocal_'))
-        mul = Eltwise(graph, dict(operation='mul', name=node.name + '/mul_'))
+    def replace_op(self, graph: Graph, node: Node):
+        reciprocal = Power(graph, {'scale': 1, 'power': np.float64(-1), 'shift': 0,
+                                   'name': node.name + '/reciprocal_'}).create_node()
+        mul = Eltwise(graph, {'operation': 'mul', 'name': node.name + '/mul_'}).create_node()
+
+        # Connect nodes
+        node.in_port(1).get_connection().set_destination(reciprocal.in_port(0))
+        node.in_port(0).get_connection().set_destination(mul.in_port(1))
+        reciprocal.out_port(0).connect(mul.in_port(0))
 
-        out_node = mul.create_node([(node.in_node(0), node.in_edge(0)['out']),
-                                    reciprocal.create_node([(node.in_node(1), node.in_edge(1)['out'])])
-                                   ])
-        # Replace edge from out port 0 of the matched node with a edge from node out_node.id with port 0.
         # The "explicit" version of the return value is: [(out_node.id, 0)])
-        return [out_node.id]
+        return [mul.id]
diff --git a/model-optimizer/extensions/front/div_test.py b/model-optimizer/extensions/front/div_test.py
new file mode 100644
index 000000000..50ec3e8b2
--- /dev/null
+++ b/model-optimizer/extensions/front/div_test.py
@@ -0,0 +1,98 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.front.div import Div
+from mo.utils.unittest.graph import build_graph, compare_graphs
+
+nodes_attributes = {
+    'placeholder_1': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+    'placeholder_2': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+    # Div operation
+    'Div': {'kind': 'op', 'op': 'Div'},
+    # Test operation
+    'last': {'type': None, 'value': None, 'kind': 'op', 'op': None},
+    # Add and Power operations
+    'power_1': {'scale': None, 'power': None, 'shift': None, 'type': 'Power', 'kind': 'op', 'op': 'Power'},
+    'mul_1': {'value': None, 'type': 'Eltwise', 'kind': 'op', 'op': 'Mul'},
+}
+
+
+class TestDiv(unittest.TestCase):
+    def test_div_test_1(self):
+        # Test with two different inputs from two placeholders
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'Div'),
+                             ('placeholder_2', 'Div'),
+                             ('Div', 'last')
+                             ],
+                            {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                             'placeholder_2': {'shape': np.array([1, 227, 227, 3])},
+                             }, nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_2', 'power_1'),
+                                 ('power_1', 'mul_1'),
+                                 ('placeholder_1', 'mul_1'),
+                                 ('mul_1', 'last'),
+                                 ],
+                                {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                                 'placeholder_2': {'shape': np.array([1, 227, 227, 3])},
+                                 'power_1': {'scale': np.array(1), 'power': np.array(-1), 'shift': np.array(0),
+                                             'type': 'Power'},
+                                 'mul_1': {'type': 'Eltwise', 'op': 'Mul'},
+                                 }, nodes_with_edges_only=True)
+
+        graph.stage = 'front'
+
+        tested_class = Div()
+        tested_class.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_div_test_2(self):
+        # Test with two same inputs from one placeholder
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'Div'),
+                             ('placeholder_1', 'Div'),
+                             ('Div', 'last')
+                             ],
+                            {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                             }, nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('power_1', 'mul_1'),
+                                 ('placeholder_1', 'mul_1'),
+                                 ('placeholder_1', 'power_1'),
+                                 ('mul_1', 'last'),
+                                 ],
+                                {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                                 'power_1': {'scale': np.array(1), 'power': np.array(-1), 'shift': np.array(0),
+                                             'type': 'Power'},
+                                 'mul_1': {'type': 'Eltwise', 'op': 'Mul'},
+                                 }, nodes_with_edges_only=True)
+
+        graph.stage = 'front'
+
+        tested_class = Div()
+        tested_class.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last', check_op_attrs=True)
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/front/eltwise_n.py b/model-optimizer/extensions/front/eltwise_n.py
index f1a42cbc1..7501e2622 100644
--- a/model-optimizer/extensions/front/eltwise_n.py
+++ b/model-optimizer/extensions/front/eltwise_n.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.eltwise import Eltwise
 
 
@@ -29,7 +29,7 @@ class EltwiseNReplacement(FrontReplacementOp):
     op = 'EltwiseN'
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         out_node = node.in_node(0)
         operation = node.operation
         for ind in range(1, len(node.in_nodes())):
diff --git a/model-optimizer/extensions/front/eltwise_n_test.py b/model-optimizer/extensions/front/eltwise_n_test.py
index 33cedbd03..c0e1ad1b7 100644
--- a/model-optimizer/extensions/front/eltwise_n_test.py
+++ b/model-optimizer/extensions/front/eltwise_n_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/freeze_placeholder_value.py b/model-optimizer/extensions/front/freeze_placeholder_value.py
index 2775738e8..cda5a95a9 100644
--- a/model-optimizer/extensions/front/freeze_placeholder_value.py
+++ b/model-optimizer/extensions/front/freeze_placeholder_value.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,11 +16,10 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import erase_node
+from mo.graph.graph import Graph
 from mo.middle.passes.convert_data_type import SUPPORTED_DATA_TYPES
 from mo.ops.const import Const
 from mo.utils.error import Error
@@ -28,13 +27,19 @@ from mo.utils.error import Error
 
 class FreezePlaceholderValue(FrontReplacementSubgraph):
     """
-    Replaces existing placeholder to Constant node with provided value. It takes value from raplacement_dict as string
-    and casts it to actual node data type
-    :param replacement_dict: dictionary with node names as keys and strings as values
+    Replaces existing placeholder to Constant node with provided value. It takes value from freeze_placeholder as
+    a string and casts it to actual node data type
     """
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['freeze_placeholder'] is not None]
 
-    enabled = False
-    replacement_dict = dict()
+    def run_after(self):
+        from extensions.front.restore_ports import RestorePorts
+        return [RestorePorts]
+
+    def run_before(self):
+        from extensions.front.pass_separator import FrontStart
+        return [FrontStart]
 
     @staticmethod
     def pattern():
@@ -43,15 +48,15 @@ class FreezePlaceholderValue(FrontReplacementSubgraph):
             edges=[]
         )
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         ph = match['placeholder']
-        if ph.name in self.replacement_dict:
+        if ph.name in graph.graph['freeze_placeholder']:
             name = ph.name
             if ph.has_and_set('data_type'):
                 data_type = ph.data_type
             else:
                 data_type = SUPPORTED_DATA_TYPES[graph.graph['cmd_params'].data_type][0]
-            string_value = self.replacement_dict[name]
+            string_value = graph.graph['freeze_placeholder'][name]
             try:
                 if data_type != np.bool:
                     value = np.array(string_value, dtype=data_type)
@@ -76,7 +81,7 @@ class FreezePlaceholderValue(FrontReplacementSubgraph):
             new_node = Const(graph).create_node(
                 attrs={'value': value, 'data_type': type(value), 'name': name + '/const_placeholder',
                        'shape': ph.shape})
-            erase_node(ph)
+            graph.erase_node(ph)
             graph.add_edges_from([(new_node.id, v, attrs) for u, v, attrs in out_edges])
             log.info("Placeholder node \"{}\" was replaced with Const node \"{}\" with value \"{}\"".format(
                 name, new_node.name, value))
diff --git a/model-optimizer/extensions/front/freeze_placeholder_value_test.py b/model-optimizer/extensions/front/freeze_placeholder_value_test.py
index 5c2329148..1eeb5357d 100644
--- a/model-optimizer/extensions/front/freeze_placeholder_value_test.py
+++ b/model-optimizer/extensions/front/freeze_placeholder_value_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -28,8 +28,8 @@ nodes_bool = {
     '3': {'name': 'node_2', 'kind': 'op', 'op': 'NotPlaceholder'},
     '4': {'name': 'node_3', 'kind': 'op', 'op': 'NotPlaceholder'},
     '5': {'name': 'node_4', 'kind': 'op', 'op': 'NotPlaceholder'},
-    '6': {'name': 'output1', 'kind': 'op', 'op': 'OpOutput', 'is_output': True},
-    '7': {'name': 'output2', 'kind': 'op', 'op': 'OpOutput', 'is_output': True}
+    '6': {'name': 'output1', 'kind': 'op', 'op': 'OpOutput', 'type': 'OpOutput'},
+    '7': {'name': 'output2', 'kind': 'op', 'op': 'OpOutput', 'type': 'OpOutput'}
 
 }
 edges = {
@@ -46,7 +46,7 @@ class TestFreezePlaceholderValue(unittest.TestCase):
         graph = build_graph(nodes_bool, edges)
         graph.graph['fw'] = 'tf'
         tested_class = FreezePlaceholderValue()
-        tested_class.replacement_dict = {'input1': 'True'}
+        graph.graph['freeze_placeholder'] = {'input1': 'True'}
         before_pattern = graph.nodes()
         tested_class.find_and_replace_pattern(graph=graph)
         after_pattern = graph.nodes()
@@ -65,7 +65,7 @@ class TestFreezePlaceholderValue(unittest.TestCase):
         graph = build_graph(nodes_bool, edges)
         graph.graph['fw'] = 'tf'
         tested_class = FreezePlaceholderValue()
-        tested_class.replacement_dict = {'input1': 'False'}
+        graph.graph['freeze_placeholder'] = {'input1': 'False'}
         before_pattern = graph.nodes()
         tested_class.find_and_replace_pattern(graph=graph)
         after_pattern = graph.nodes()
@@ -84,7 +84,7 @@ class TestFreezePlaceholderValue(unittest.TestCase):
         graph = build_graph(nodes_bool, edges)
         graph.graph['fw'] = 'tf'
         tested_class = FreezePlaceholderValue()
-        tested_class.replacement_dict = {'input1': 'False', 'input2': 'True'}
+        graph.graph['freeze_placeholder'] = {'input1': 'False', 'input2': 'True'}
         before_pattern = graph.nodes()
         tested_class.find_and_replace_pattern(graph=graph)
         after_pattern = graph.nodes()
diff --git a/model-optimizer/extensions/front/image_scaler.py b/model-optimizer/extensions/front/image_scaler.py
index c0342563f..8ec13c646 100644
--- a/model-optimizer/extensions/front/image_scaler.py
+++ b/model-optimizer/extensions/front/image_scaler.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,10 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Graph
 from mo.ops.const import Const
 from mo.ops.lin_op import Mul, Add
 
@@ -26,7 +26,7 @@ class ImageScaler(FrontReplacementOp):
     op = "ImageScaler"
     enabled = True
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         # This replacer replace ImageScalar operation to Mul->Add sequence
         # Also it check that weights and biases are good
         op = match['op']
@@ -38,28 +38,24 @@ class ImageScaler(FrontReplacementOp):
         if all([x == 0 for x in np.nditer(op.bias)]):
             has_bias = False
 
-        # Get all outputs for op node
-        out_nodes = [node for node in op.out_nodes().values()]
+        assert len(op.in_ports()) == 1
 
-        assert len(op.in_nodes()) == 1
+        last_port = op.in_port(0).get_source()
 
-        last_node = op.in_node()
         # Create Mul & Add nodes
         if has_weights:
-            mul_weights = Const(graph, dict(value=op.scale, shape=op.scale.shape))
-            mul_op = Mul(graph, dict(name=op.id + '/mul_'))
-            last_node = mul_op.create_node(inputs=[last_node, mul_weights.create_node()])
+            mul_weights = Const(graph, dict(value=op.scale, shape=op.scale.shape)).create_node()
+            mul_op = Mul(graph, dict(name=op.id + '/mul_')).create_node()
+            op.in_port(0).get_connection().set_destination(mul_op.in_port(0))
+            mul_weights.out_port(0).connect(mul_op.in_port(1))
+            last_port = mul_op.out_port(0)
 
         if has_bias:
-            add_bias = Const(graph, dict(value=op.bias, shape=op.bias.shape))
-            add_op = Add(graph, dict(name=op.id + '/add_'))
-            last_node = add_op.create_node(inputs=[last_node, add_bias.create_node()])
-
-        # Move edges from ImageScaler to last_node (Mul or Add)
-        for out_node in out_nodes:
-            edge_attrs = graph.get_edge_data(op.id, out_node.id)[0]
-            graph.remove_edge(op.id, out_node.id)
-            graph.add_edges_from([(last_node.id, out_node.id, edge_attrs)])
-
-        # Disconnect ImageScalar node
-        graph.remove_edge(op.in_node().id, op.id)
+            add_bias = Const(graph, dict(value=op.bias, shape=op.bias.shape)).create_node()
+            add_op = Add(graph, dict(name=op.id + '/add_')).create_node()
+            last_port.get_connection().set_destination(add_op.in_port(0))
+            add_bias.out_port(0).connect(add_op.in_port(1))
+            last_port = add_op.out_port(0)
+
+        op.in_port(0).disconnect()
+        op.out_port(0).get_connection().set_source(last_port)
diff --git a/model-optimizer/extensions/front/image_scaler_test.py b/model-optimizer/extensions/front/image_scaler_test.py
index 2c4ec9085..40d7aeac8 100644
--- a/model-optimizer/extensions/front/image_scaler_test.py
+++ b/model-optimizer/extensions/front/image_scaler_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -32,16 +32,59 @@ nodes_attributes = {
     'last_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Mul and Add operations
     'mul_1': {'type': None, 'value': None, 'kind': 'op', 'op': 'Mul'},
-    'mul_1_w': {'value': None, 'shape': None, 'kind': 'op', 'op': 'Const'},
+    'const_mul_1_w': {'type': None, 'value': None, 'kind': 'op', 'op': 'Const'},
+    'mul_1_w': {'value': None, 'shape': None, 'kind': 'data'},
     'mul_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     'add_1': {'type': None, 'value': None, 'kind': 'op', 'op': 'Add'},
-    'add_1_w': {'value': None, 'shape': None, 'kind': 'op', 'op': 'Const'},
+    'const_add_1_w': {'type': None, 'value': None, 'kind': 'op', 'op': 'Const'},
+    'add_1_w': {'value': None, 'shape': None, 'kind': 'data'},
     'add_1_data': {'value': None, 'shape': None, 'kind': 'data'},
 }
 
 
 class ImageScalerTest(unittest.TestCase):
-    def test_image_scaler_test1(self):
+    # Tests for MIDDLE stage
+    # Graph with Mul and Add operations
+    def test_image_scaler_test_1(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'im_scaler'),
+                             ('im_scaler', 'im_scaler_data'),
+                             ('im_scaler_data', 'last'),
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'im_scaler': {'scale': np.array(2.0), 'bias': np.reshape(np.array([1, 2, 3]), [3, 1, 1])},
+                             }, nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
+                                 ('mul_1_w', 'mul_1'),
+                                 ('mul_1', 'mul_1_data'),
+                                 ('mul_1_data', 'add_1'),
+                                 ('const_add_1_w', 'add_1_w'),
+                                 ('add_1_w', 'add_1'),
+                                 ('add_1', 'add_1_data'),
+                                 ('add_1_data', 'last')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array(2.0).shape, 'value': np.array(2.0)},
+                                 'const_add_1_w': {'shape': np.array([3, 1, 1]),
+                                                   'value': np.reshape(np.array([1, 2, 3]), [3, 1, 1])},
+                                 }, nodes_with_edges_only=True)
+
+        graph.graph['layout'] = 'NCHW'
+        graph.stage = 'middle'
+
+        replacer = ImageScaler()
+        replacer.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)
+
+    # Graph with Add operation
+    def test_image_scaler_test_2(self):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'im_scaler'),
@@ -55,16 +98,18 @@ class ImageScalerTest(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'add_1'),
+                                 ('const_add_1_w', 'add_1_w'),
                                  ('add_1_w', 'add_1'),
                                  ('add_1', 'add_1_data'),
                                  ('add_1_data', 'last')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
-                                 'add_1_w': {'shape': np.array([3, 1, 1]),
-                                             'value': np.reshape(np.array([1, 2, 3]), [3, 1, 1])},
+                                 'const_add_1_w': {'shape': np.array([3, 1, 1]),
+                                                   'value': np.reshape(np.array([1, 2, 3]), [3, 1, 1])},
                                  }, nodes_with_edges_only=True)
 
         graph.graph['layout'] = 'NCHW'
+        graph.stage = 'middle'
 
         replacer = ImageScaler()
         replacer.find_and_replace_pattern(graph)
@@ -72,7 +117,8 @@ class ImageScalerTest(unittest.TestCase):
         (flag, resp) = compare_graphs(graph, graph_ref, 'last')
         self.assertTrue(flag, resp)
 
-    def test_image_scaler_test2(self):
+    # Graph with Mul operation
+    def test_image_scaler_test_3(self):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'im_scaler'),
@@ -86,15 +132,161 @@ class ImageScalerTest(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'mul_1_data'),
                                  ('mul_1_data', 'last')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
-                                 'mul_1_w': {'shape': np.array(2.0).shape, 'value': np.array(2.0)},
+                                 'const_mul_1_w': {'shape': np.array(2.0).shape, 'value': np.array(2.0)},
+                                 }, nodes_with_edges_only=True)
+
+        graph.graph['layout'] = 'NCHW'
+        graph.stage = 'middle'
+
+        replacer = ImageScaler()
+        replacer.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)
+
+    # Graph without Mul and Add operations
+    def test_image_scaler_test_4(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'im_scaler'),
+                             ('im_scaler', 'im_scaler_data'),
+                             ('im_scaler_data', 'last'),
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'im_scaler_data': {'shape': np.array([1, 227, 227, 3])},
+                             'im_scaler': {'scale': np.array(1.0), 'bias': np.reshape(np.array([0, 0, 0]), [3, 1, 1])},
+                             }, nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'last')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 }, nodes_with_edges_only=True)
+
+        graph.graph['layout'] = 'NCHW'
+        graph.stage = 'middle'
+
+        replacer = ImageScaler()
+        replacer.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)
+
+    # Tests for FRONT stage
+    # Graph with Mul and Add operations
+    def test_image_scaler_test_5(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'im_scaler'),
+                             ('im_scaler', 'last'),
+                             ],
+                            {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                             'im_scaler': {'scale': np.array(2.0), 'bias': np.reshape(np.array([1, 2, 3]), [3, 1, 1])},
+                             }, nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1'),
+                                 ('mul_1', 'add_1'),
+                                 ('const_add_1_w', 'add_1'),
+                                 ('add_1', 'last')
+                                 ],
+                                {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array(2.0).shape, 'value': np.array(2.0)},
+                                 'const_add_1_w': {'shape': np.array([3, 1, 1]),
+                                                   'value': np.reshape(np.array([1, 2, 3]), [3, 1, 1])},
+                                 }, nodes_with_edges_only=True)
+
+        graph.graph['layout'] = 'NCHW'
+        graph.stage = 'front'
+
+        replacer = ImageScaler()
+        replacer.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)
+
+    # Graph with Add operation
+    def test_image_scaler_test_6(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'im_scaler'),
+                             ('im_scaler', 'last'),
+                             ],
+                            {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                             'im_scaler': {'scale': np.array(1.0), 'bias': np.reshape(np.array([1, 2, 3]), [3, 1, 1])},
+                             }, nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'add_1'),
+                                 ('const_add_1_w', 'add_1'),
+                                 ('add_1', 'last')
+                                 ],
+                                {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_add_1_w': {'shape': np.array([3, 1, 1]),
+                                                   'value': np.reshape(np.array([1, 2, 3]), [3, 1, 1])},
+                                 }, nodes_with_edges_only=True)
+
+        graph.graph['layout'] = 'NCHW'
+        graph.stage = 'front'
+
+        replacer = ImageScaler()
+        replacer.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)
+
+    # Graph with Mul operation
+    def test_image_scaler_test_7(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'im_scaler'),
+                             ('im_scaler', 'last'),
+                             ],
+                            {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                             'im_scaler': {'scale': np.array(2.0), 'bias': np.reshape(np.array([0, 0, 0]), [3, 1, 1])},
+                             }, nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1'),
+                                 ('mul_1', 'last')
+                                 ],
+                                {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array(2.0).shape, 'value': np.array(2.0)},
+                                 }, nodes_with_edges_only=True)
+
+        graph.graph['layout'] = 'NCHW'
+        graph.stage = 'front'
+
+        replacer = ImageScaler()
+        replacer.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)
+
+    # Graph without Mul and Add operations
+    def test_image_scaler_test_8(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'im_scaler'),
+                             ('im_scaler', 'last'),
+                             ],
+                            {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
+                             'im_scaler': {'scale': np.array(1.0), 'bias': np.reshape(np.array([0, 0, 0]), [3, 1, 1])},
+                             }, nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'last')
+                                 ],
+                                {'placeholder_1': {'shape': np.array([1, 227, 227, 3])},
                                  }, nodes_with_edges_only=True)
 
         graph.graph['layout'] = 'NCHW'
+        graph.stage = 'front'
 
         replacer = ImageScaler()
         replacer.find_and_replace_pattern(graph)
diff --git a/model-optimizer/extensions/front/input_cut.py b/model-optimizer/extensions/front/input_cut.py
new file mode 100644
index 000000000..66e48290e
--- /dev/null
+++ b/model-optimizer/extensions/front/input_cut.py
@@ -0,0 +1,33 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.front.extractor import add_input_ops
+from mo.graph.graph import Graph
+
+
+class InputCut(FrontReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        from extensions.front.output_cut import OutputCut
+        return [OutputCut]
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        add_input_ops(graph, graph.graph['user_shapes'], True)
diff --git a/model-optimizer/extensions/front/instance_normalization.py b/model-optimizer/extensions/front/instance_normalization.py
index abcc1e98c..c80c65bc8 100644
--- a/model-optimizer/extensions/front/instance_normalization.py
+++ b/model-optimizer/extensions/front/instance_normalization.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.lin_op import Add, Mul
 from extensions.ops.mvn import MVN
 
@@ -30,7 +30,7 @@ class InstanceNormalization(FrontReplacementOp):
     op = "InstanceNormalization"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         prefix = node.name + '/InstanceNormalization'
         mvn = MVN(graph, dict(
             name=prefix + '/MVN',
diff --git a/model-optimizer/extensions/front/instance_normalization_test.py b/model-optimizer/extensions/front/instance_normalization_test.py
index 90dbe1bff..bdfcd55f0 100644
--- a/model-optimizer/extensions/front/instance_normalization_test.py
+++ b/model-optimizer/extensions/front/instance_normalization_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/kaldi/__init__.py b/model-optimizer/extensions/front/kaldi/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/model-optimizer/extensions/front/kaldi/__init__.py
diff --git a/model-optimizer/extensions/front/kaldi/add_permute_after_convolution.py b/model-optimizer/extensions/front/kaldi/add_permute_after_convolution.py
new file mode 100644
index 000000000..72b1f0ced
--- /dev/null
+++ b/model-optimizer/extensions/front/kaldi/add_permute_after_convolution.py
@@ -0,0 +1,111 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from collections import deque
+
+import numpy as np
+
+from extensions.front.kaldi.add_reshape_around_convolution import ReplaceConvolutionReshape
+from extensions.middle.TensorIteratorMerge import op_type
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Node, Graph
+from mo.ops.permute import Permute
+
+
+class ReplaceConvolutionPermute(FrontReplacementSubgraph):
+    """
+    This pass adds Permute around a Convolution layer if after there is sequence Pooling or Activation afterConvolution
+    **IMPORTANT**: This pass must run after inserting Reshapes around Poolings and Convolutions
+       For example:
+           Let's suppose we have next graph:
+
+           Convolution -> [Pooling | Activation -> Pooling | Pooling -> Activation | Activation]* -> ... -> (ScaleShift | FullyConnected)
+
+           **NOTE**: Please, remember about Reshapes around Poolings and Convolutions.
+                     In this example we do not print them for simplicity.
+           **NOTE**: After Convolution, it is not necessary to have a sequence [Pooling | Activation -> Pooling | Pooling -> Activation | Activation]*
+
+           So this pass will convert this graph to the next one:
+
+           Convolution -> * -> Permute (order 0, 3, 2, 1 )-> Next_Layer -> ... -> (ScaleShift|FullyConnected)
+
+    """
+    enabled = True
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('target_node', dict(op=lambda x: x in ['ScaleShift', 'FullyConnected']))
+            ],
+            edges=[]
+        )
+
+    def replace_sub_graph(self, graph: Graph, match: dict):
+        target_node = match['target_node']
+        nodes_with_weights = self.dfs(graph, target_node.name, ('Convolution', 'FullyConnected', 'ScaleShift'), True)
+        convolution_nodes = [node for node in nodes_with_weights if Node(graph, node).op == 'Convolution']
+        for convolution_node in convolution_nodes:
+            target_node = self.search_target_node(Node(graph, convolution_node))
+            permute_op = Permute(graph, {'order': np.array([0, 3, 2, 1])})
+            permute_node = permute_op.add_node({'name': '{}/Permute'.format(target_node.name)})
+            target_node.insert_node_after( permute_node, 0)
+
+    def run_after(self):
+        from extensions.front.kaldi.add_reshape_around_pooling import ReplacePoolingReshape
+        return [ReplaceConvolutionReshape, ReplacePoolingReshape]
+
+    @staticmethod
+    def search_target_node(node: Node):
+        target_node = ReplaceConvolutionPermute.skip_reshapes(node)
+        sequence_layers = ['Pooling', 'Activation']
+        if target_node.op not in sequence_layers:
+            return node
+        if target_node.op == 'Activation':
+            sequence_layers.reverse()
+        if target_node.op == sequence_layers[0]:
+            next_node = ReplaceConvolutionPermute.skip_reshapes(target_node)
+            if next_node.op == sequence_layers[1]:
+                target_node = next_node
+
+        return target_node
+
+    @staticmethod
+    def skip_reshapes(node: Node):
+        next_node = node.out_node()
+        while next_node.op == 'Reshape':
+            next_node = next_node.out_node()
+        return next_node
+
+    @staticmethod
+    def dfs(graph: Graph, node_name: str, stop_nodes: tuple, reverse: bool = False) -> list:
+        d = deque()
+        res = []
+        visited = set()
+        visited.add(node_name)
+        d.appendleft(node_name)
+        while len(d) != 0:
+            cur_node = d.popleft()
+            if reverse:
+                nodes = graph.in_edges(cur_node)
+            else:
+                nodes = graph.out_edges(cur_node)
+            for in_node_name, _ in nodes:
+                if in_node_name not in visited:
+                    if op_type(graph, in_node_name) not in stop_nodes:
+                        visited.add(in_node_name)
+                        d.append(in_node_name)
+                    else:
+                        res.append(in_node_name)
+        return res
diff --git a/model-optimizer/extensions/front/kaldi/add_permute_after_convolution_test.py b/model-optimizer/extensions/front/kaldi/add_permute_after_convolution_test.py
new file mode 100644
index 000000000..c166fb986
--- /dev/null
+++ b/model-optimizer/extensions/front/kaldi/add_permute_after_convolution_test.py
@@ -0,0 +1,75 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+import numpy as np
+
+from extensions.front.kaldi.add_permute_after_convolution import ReplaceConvolutionPermute
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
+
+
+class ReplaceConvolutionPermuteTests(unittest.TestCase):
+    nodes_attributes = {
+        'conv': {'kind': 'op', 'op': 'Convolution'},
+        'reshape_conv': {'kind': 'op', 'op': 'Reshape'},
+        'reshape_pool': {'kind': 'op', 'op': 'Reshape'},
+        'pool': {'kind': 'op', 'op': 'Pooling'},
+        'reshape_after_pool': {'kind': 'op', 'op': 'Reshape'},
+        'act': {'kind': 'op', 'op': 'Activation'},
+        'fc': {'kind': 'op', 'op': 'FullyConnected'},
+        'scale_shift': {'kind': 'op', 'op': 'ScaleShift'}
+    }
+
+    def test_simple_convolution(self):
+        graph = build_graph(self.nodes_attributes, [
+            ('conv', 'reshape_conv'),
+            ('reshape_conv', 'scale_shift'),
+        ])
+        ReplaceConvolutionPermute().find_and_replace_pattern(graph)
+        conv_node = Node(graph, graph.nodes['conv']['name'])
+        permute = conv_node.out_node()
+        self.assertEqual(permute.op, 'Permute')
+        self.assertTrue(np.array_equal(permute.order, np.array([0, 3, 2, 1])))
+
+    def test_conv_pool(self):
+        graph = build_graph(self.nodes_attributes, [
+            ('conv', 'reshape_conv'),
+            ('reshape_conv', 'reshape_pool'),
+            ('reshape_pool', 'pool'),
+            ('pool', 'reshape_after_pool'),
+            ('reshape_after_pool', 'fc'),
+        ])
+        ReplaceConvolutionPermute().find_and_replace_pattern(graph)
+        pool_node = Node(graph, graph.nodes['pool']['name'])
+        permute = pool_node.out_node()
+        self.assertEqual(permute.op, 'Permute')
+        self.assertTrue(np.array_equal(permute.order, np.array([0, 3, 2, 1])))
+
+    def test_conv_act_pool(self):
+        graph = build_graph(self.nodes_attributes, [
+            ('conv', 'reshape_conv'),
+            ('reshape_conv', 'act'),
+            ('act', 'reshape_pool'),
+            ('reshape_pool', 'pool'),
+            ('pool', 'reshape_after_pool'),
+            ('reshape_after_pool', 'fc'),
+        ])
+        ReplaceConvolutionPermute().find_and_replace_pattern(graph)
+        pool_node = Node(graph, graph.nodes['pool']['name'])
+        permute = pool_node.out_node()
+        self.assertEqual(permute.op, 'Permute')
+        self.assertTrue(np.array_equal(permute.order, np.array([0, 3, 2, 1])))
diff --git a/model-optimizer/extensions/front/kaldi/add_reshape_around_convolution.py b/model-optimizer/extensions/front/kaldi/add_reshape_around_convolution.py
index 02c0e0fa1..2900da4bf 100644
--- a/model-optimizer/extensions/front/kaldi/add_reshape_around_convolution.py
+++ b/model-optimizer/extensions/front/kaldi/add_reshape_around_convolution.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,10 +13,9 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.convolution import Convolution
 from mo.ops.reshape import Reshape
 
@@ -38,7 +37,7 @@ class ReplaceConvolutionReshape(FrontReplacementOp):
     op = "Convolution"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         input_node = node.in_node(0)
         port = graph.get_edge_data(input_node.id, node.id)[0]['out']
         input_reshape_node = Reshape(graph,
diff --git a/model-optimizer/extensions/front/kaldi/add_reshape_around_pooling.py b/model-optimizer/extensions/front/kaldi/add_reshape_around_pooling.py
index b7326ad3b..f17a8ae2b 100644
--- a/model-optimizer/extensions/front/kaldi/add_reshape_around_pooling.py
+++ b/model-optimizer/extensions/front/kaldi/add_reshape_around_pooling.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.pooling import Pooling
 from mo.ops.reshape import Reshape
 
@@ -39,7 +37,7 @@ class ReplacePoolingReshape(FrontReplacementOp):
     op = "Pooling"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node) -> list:
+    def replace_op(self, graph: Graph, node: Node) -> list:
         input_node = node.in_node(0)
 
         input_reshape_node = Reshape(graph,
@@ -48,7 +46,7 @@ class ReplacePoolingReshape(FrontReplacementOp):
                                          'infer': Reshape.kaldi_infer
                                      }).create_node([input_node])
 
-        pooling_node = Pooling(graph, graph.nodes[node.id]).create_node([input_reshape_node])
+        pooling_node = Pooling(graph, graph.node[node.id]).create_node([input_reshape_node])
 
         output_reshape_node = Reshape(graph,
                                       {
diff --git a/model-optimizer/extensions/front/kaldi/eliminate_redundant_reshape.py b/model-optimizer/extensions/front/kaldi/eliminate_redundant_reshape.py
index a5c9a8c75..64ae6949b 100644
--- a/model-optimizer/extensions/front/kaldi/eliminate_redundant_reshape.py
+++ b/model-optimizer/extensions/front/kaldi/eliminate_redundant_reshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,11 +14,11 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from extensions.front.kaldi.fuse_repeated_reshape import FuseRepeatedReshapes
 from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
 from mo.middle.passes.eliminate import remove_op_node_with_data_node
 
 
@@ -40,7 +40,7 @@ class EliminateRedundantReshape(FrontReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         reshape_node = match['reshape']
         in_node = reshape_node.in_node()
         out_node = reshape_node.out_node()
diff --git a/model-optimizer/extensions/front/kaldi/fuse_repeated_reshape.py b/model-optimizer/extensions/front/kaldi/fuse_repeated_reshape.py
index 9a8a98407..639240eea 100644
--- a/model-optimizer/extensions/front/kaldi/fuse_repeated_reshape.py
+++ b/model-optimizer/extensions/front/kaldi/fuse_repeated_reshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
 from mo.middle.passes.eliminate import remove_op_node_with_data_node
 
 
@@ -38,7 +37,7 @@ class FuseRepeatedReshapes(FrontReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         node = match['reshape_1']
         if (node.has_valid('type') and node.type == 'Reshape' and
                 len(node.out_nodes()) == 1 and node.out_node().has_valid('kind') and node.out_node().kind == 'data' and
diff --git a/model-optimizer/extensions/front/kaldi/replace_lstm_node_pattern.py b/model-optimizer/extensions/front/kaldi/replace_lstm_node_pattern.py
index bfba4c46d..b846a4c86 100644
--- a/model-optimizer/extensions/front/kaldi/replace_lstm_node_pattern.py
+++ b/model-optimizer/extensions/front/kaldi/replace_lstm_node_pattern.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,11 +15,9 @@
 """
 import numpy as np
 
-import networkx as nx
-
 from mo.front.caffe.extractors.utils import embed_input
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.activation import Activation
 from mo.ops.clamp import Clamp
 from mo.ops.eltwise import Eltwise
@@ -50,7 +48,15 @@ class ReplaceLSTMNodePattern(FrontReplacementOp):
     op = "LSTMCell"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    # we need to rewrite this transform to fit unified pipeline (it should be a part of traditional FRONT phase)
+    def run_before(self):
+        from extensions.front.output_cut import OutputCut
+        return [OutputCut]
+
+    def run_after(self):
+        return []
+
+    def replace_op(self, graph: Graph, node: Node):
         input_node = node.in_node()
 
         memory_pair_input = unique_id('id')
@@ -102,7 +108,8 @@ class ReplaceLSTMNodePattern(FrontReplacementOp):
         #     |____(4)Eltwise(sum)
         split_joined_input = Split(graph, {'name': 'join_input_split',
                                            'axis': 1,
-                                           'num_split': 4
+                                           'num_split': 4,
+                                           'out_ports_count': 4,
                                            }).create_node([join_input_prev_state_sum])
 
         prev_lstm_state = Memory(graph, {'name': 'prev_memory_state',
diff --git a/model-optimizer/extensions/front/kaldi/replace_splice_node_pattern.py b/model-optimizer/extensions/front/kaldi/replace_splice_node_pattern.py
index 360a225e5..9c14e2cbf 100644
--- a/model-optimizer/extensions/front/kaldi/replace_splice_node_pattern.py
+++ b/model-optimizer/extensions/front/kaldi/replace_splice_node_pattern.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,12 +15,10 @@
 """
 import numpy as np
 
-import networkx as nx
-
 from extensions.front.kaldi.replace_lstm_node_pattern import unique_id
 from mo.front.common.partial_infer.utils import int64_array
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.concat import Concat
 from mo.ops.crop import Crop
 from mo.ops.memory import Memory
@@ -49,7 +47,7 @@ class ReplaceSpliceNodePattern(FrontReplacementOp):
     op = "Splice"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         input_node = node.in_nodes()[0]
         memory_pair_id = unique_id('id')
         # Memory(in)
@@ -72,6 +70,7 @@ class ReplaceSpliceNodePattern(FrontReplacementOp):
         #         Concat
         # Input  /
         concat_node = Concat(graph, {'name': 'Splice_Concat',
+                                     'in_ports_count': 2,
                                      'axis': 1}).create_node([crop, input_node])
 
         # Concat -> Memory(out)
diff --git a/model-optimizer/extensions/front/kaldi/replace_splice_node_pattern_test.py b/model-optimizer/extensions/front/kaldi/replace_splice_node_pattern_test.py
index f967f4b0d..88e630c34 100644
--- a/model-optimizer/extensions/front/kaldi/replace_splice_node_pattern_test.py
+++ b/model-optimizer/extensions/front/kaldi/replace_splice_node_pattern_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/RNN_ext.py b/model-optimizer/extensions/front/mxnet/RNN_ext.py
index 1ae8e31f7..984283827 100644
--- a/model-optimizer/extensions/front/mxnet/RNN_ext.py
+++ b/model-optimizer/extensions/front/mxnet/RNN_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,10 +13,11 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
+from extensions.ops.GRU import GRU
+from extensions.ops.LSTM import LSTM
+from extensions.ops.RNN import RNN
 from mo.front.extractor import FrontExtractorOp
-from extensions.ops.lstm_sequence import LSTMSequence
+from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
 from mo.utils.error import Error
 from mo.utils.utils import refer_to_faq_msg
 
@@ -32,31 +33,40 @@ class RNNFrontExtractor(FrontExtractorOp):
         state_size = attrs.int('state_size', None)
         bidirectional = attrs.bool('bidirectional', False)
         num_layers = attrs.int('num_layers', 1)
+        layout = attrs.str('layout', 'TNC')  # in MXNet RNN by default take data in
+                                             # format [seq_len, batch_size, inp_size]
 
         node_attrs = {
-            'batch_dim': 1,
-            'sequence_dim': 0,
+            'batch_dim': layout.index('N'),
+            'sequence_dim': layout.index('T'),
             'blobs_wrb': False,
             'hidden_size': state_size,
             'has_num_directions': bidirectional,
+            'direction': 'bidirectional' if bidirectional else 'forward',
+            'num_layers': num_layers,
             'format': 'mxnet',
+            'multilayers': num_layers != 1,
+            'gate_order':  None,
         }
 
-        if bidirectional:
-            raise Error(
-                "Operation RNN with bidirectional not supported. num_directions = 1 is supported only " +
-                refer_to_faq_msg(86))
-
-        if num_layers > 1:
-            raise Error(
-                "Operation RNN with num_layers more then one not supported. num_layers = 1 is supported only " +
-                refer_to_faq_msg(86))
-
-        if mode == 'lstm':
-            LSTMSequence.update_node_stat(node, node_attrs)
+        if mode == 'rnn_tanh':
+            node_attrs['gate_order'] = [0]
+            node_attrs['activations'] = ['tanh']
+            RNN.update_node_stat(node, node_attrs)
+        elif mode == 'rnn_relu':
+            node_attrs['gate_order'] = [0]
+            node_attrs['activations'] = ['relu']
+            RNN.update_node_stat(node, node_attrs)
+        elif mode == 'gru':
+            node_attrs['gate_order'] = [1, 0, 2]
+            node_attrs['linear_before_reset'] = 1
+            GRU.update_node_stat(node, node_attrs)
+        elif mode == 'lstm':
+            node_attrs['gate_order'] = [1, 0, 2, 3]
+            LSTM.update_node_stat(node, node_attrs)
         else:
             raise Error(
-                "Operation RNN with mode '{}' not supported. Please register RNN as custom op. " +
+                "Operation RNN with mode '{}' not supported." +
                 refer_to_faq_msg(86),
                 mode)
         return __class__.enabled
diff --git a/model-optimizer/extensions/front/mxnet/RNN_ext_test.py b/model-optimizer/extensions/front/mxnet/RNN_ext_test.py
new file mode 100644
index 000000000..41ee5b31c
--- /dev/null
+++ b/model-optimizer/extensions/front/mxnet/RNN_ext_test.py
@@ -0,0 +1,99 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+import numpy as np
+
+from extensions.front.mxnet.RNN_ext import RNNFrontExtractor
+from mo.utils.error import Error
+from mo.utils.unittest.extractors import PB
+
+
+class RNNFrontExtractorTest(unittest.TestCase):
+    @staticmethod
+    def _create_node(**attrs):
+        params = {'attrs': {
+            **attrs
+        }}
+        node = PB({'symbol_dict': params})
+        return node
+
+    base_attrs = {
+        'batch_dim': 1,
+        'sequence_dim': 0,
+        'blobs_wrb': False,
+        'format': 'mxnet',
+        'gate_order': [1, 0, 2, 3],
+    }
+
+    def test_base_attrs(self):
+        attrs = {
+            'state_size': 128,
+            'mode': 'lstm',
+        }
+
+        additional_attrs = {
+            'multilayers': False,
+            'hidden_size': 128,
+            'has_num_directions': False,
+            'direction': 'forward',
+            'num_layers': 1,
+        }
+
+        node = self._create_node(**attrs)
+        RNNFrontExtractor.extract(node)
+
+        expect_attrs = {**self.base_attrs, **additional_attrs}
+
+        for key in expect_attrs.keys():
+            equal = np.all(np.equal(node[key], expect_attrs[key], dtype=object))
+            self.assertTrue(equal, 'Values for attr {} are not equal'.format(key))
+
+        self.assertTrue(node.op == 'LSTM')
+
+    def test_unsupported_mode(self):
+        attrs = {
+            'state_size': 128,
+            'mode': 'abracadabra',
+        }
+        node = self._create_node(**attrs)
+        with self.assertRaises(Error):
+            RNNFrontExtractor.extract(node)
+
+    def test_additional_attrs(self):
+        attrs = {
+            'state_size': 128,
+            'mode': 'lstm',
+            'bidirectional': True,
+            'num_layers': 2,
+        }
+
+        additional_attrs = {
+            'multilayers': True,
+            'hidden_size': 128,
+            'has_num_directions': True,
+            'direction': 'bidirectional',
+            'num_layers': 2,
+        }
+
+        node = self._create_node(**attrs)
+        RNNFrontExtractor.extract(node)
+
+        expect_attrs = {**self.base_attrs, **additional_attrs}
+
+        for key in expect_attrs.keys():
+            equal = np.all(np.equal(node[key], expect_attrs[key], dtype=object))
+            self.assertTrue(equal, 'Values for attr {} are not equal'.format(key))
+\ No newline at end of file
diff --git a/model-optimizer/extensions/front/mxnet/add_input_data_to_prior_boxes.py b/model-optimizer/extensions/front/mxnet/add_input_data_to_prior_boxes.py
new file mode 100644
index 000000000..eb6f9e5c2
--- /dev/null
+++ b/model-optimizer/extensions/front/mxnet/add_input_data_to_prior_boxes.py
@@ -0,0 +1,62 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph, Node
+
+
+class AddInputDataToPriorBoxes(FrontReplacementPattern):
+    enabled = True
+
+    def run_before(self):
+        from extensions.front.create_tensor_nodes import CreateTensorNodes
+        return [CreateTensorNodes]
+
+    def run_after(self):
+        from extensions.front.pass_separator import FrontFinish
+        return [FrontFinish]
+
+    @staticmethod
+    def add_input_data_to_prior_boxes(graph: Graph, input_names: str = ''):
+        """
+        PriorBox layer has data input unlike mxnet.
+        Need to add data input to _contrib_MultiBoxPrior for
+        for correct conversion to PriorBox layer.
+
+        Parameters
+        ----------
+        graph : Graph
+           Graph with loaded model.
+        """
+        if not input_names:
+            input_names = ('data',)
+        else:
+            input_names = input_names.split(',')
+
+        input_nodes = {}
+        for node in graph.nodes():
+            node = Node(graph, node)
+            if node.has_valid('op') and node.name in input_names:
+                input_nodes.update({node.id: node})
+
+        if len(input_nodes) > 0:
+            for node in graph.nodes():
+                node = Node(graph, node)
+                if node.has_valid('op') and node.op == '_contrib_MultiBoxPrior':
+                    node.add_input_port(idx=1)
+                    graph.create_edge(list(input_nodes.values())[0], node, out_port=0, in_port=1)
+
+    def find_and_replace_pattern(self, graph: Graph):
+        self.add_input_data_to_prior_boxes(graph, graph.graph['cmd_params'].input)
diff --git a/model-optimizer/mo/pipeline/mx_test.py b/model-optimizer/extensions/front/mxnet/add_input_data_to_prior_boxes_test.py
index 66e49bb32..9f0c9cf12 100644
--- a/model-optimizer/mo/pipeline/mx_test.py
+++ b/model-optimizer/extensions/front/mxnet/add_input_data_to_prior_boxes_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,9 +17,10 @@
 import unittest
 
 import numpy as np
+from argparse import Namespace
 
 from mo.graph.graph import Node
-from mo.pipeline.mx import add_input_data_to_prior_boxes
+from extensions.front.mxnet.add_input_data_to_prior_boxes import AddInputDataToPriorBoxes
 from mo.utils.unittest.graph import build_graph
 
 
@@ -37,7 +38,8 @@ class TestMxnetPipeline(unittest.TestCase):
                 'node_2': {'shape': np.array([1, 3, 10, 10])},
             })
 
-        add_input_data_to_prior_boxes(graph)
+        graph.graph['cmd_params'] = Namespace(input=None)
+        AddInputDataToPriorBoxes().find_and_replace_pattern(graph)
         node_multi_box = Node(graph, 'node_multi_box')
 
         node_input1 = node_multi_box.in_node(0)
@@ -58,7 +60,8 @@ class TestMxnetPipeline(unittest.TestCase):
                 'node_2': {'shape': np.array([1, 3, 10, 10])},
             })
 
-        add_input_data_to_prior_boxes(graph, 'node_1')
+        graph.graph['cmd_params'] = Namespace(input='node_1')
+        AddInputDataToPriorBoxes().find_and_replace_pattern(graph)
         node_multi_box = Node(graph, 'node_multi_box')
 
         node_input1 = node_multi_box.in_node(0)
diff --git a/model-optimizer/extensions/front/mxnet/add_n_ext.py b/model-optimizer/extensions/front/mxnet/add_n_ext.py
index 5577983ee..083e3c4f7 100644
--- a/model-optimizer/extensions/front/mxnet/add_n_ext.py
+++ b/model-optimizer/extensions/front/mxnet/add_n_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/block_grad_ext.py b/model-optimizer/extensions/front/mxnet/block_grad_ext.py
index 0d5946e19..1cdda0f13 100644
--- a/model-optimizer/extensions/front/mxnet/block_grad_ext.py
+++ b/model-optimizer/extensions/front/mxnet/block_grad_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/broadcast_mul.py b/model-optimizer/extensions/front/mxnet/broadcast_mul.py
index 8f1e064c9..9b861cba2 100644
--- a/model-optimizer/extensions/front/mxnet/broadcast_mul.py
+++ b/model-optimizer/extensions/front/mxnet/broadcast_mul.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,8 +17,7 @@
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import replace_node
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.lin_op import Mul
 
 
@@ -26,8 +25,8 @@ class BroadcastMulFrontReplacer(FrontReplacementOp):
     op = 'broadcast_mul'
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         mul_op = Mul(graph, dict(name=node.id + '/mul_', symbol_dict={'name': node.id + '/mul_'}))
         mul_node = mul_op.create_node(inputs=[node.in_node(0), node.in_node(1)])
-        replace_node(node, mul_node)
+        node.replace_node(mul_node)
         return [mul_node.id]
diff --git a/model-optimizer/extensions/front/mxnet/broadcast_mul_ext.py b/model-optimizer/extensions/front/mxnet/broadcast_mul_ext.py
index 7fd99eebc..a37b5beb8 100644
--- a/model-optimizer/extensions/front/mxnet/broadcast_mul_ext.py
+++ b/model-optimizer/extensions/front/mxnet/broadcast_mul_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs.py b/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs.py
index 1e740ad06..a8b6fa08a 100644
--- a/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs.py
+++ b/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,34 +14,39 @@
  limitations under the License.
 """
 
-import networkx as nx
 from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
 
 
 class CheckSoftmaxNodeInputs(FrontReplacementPattern):
-
     enabled = True
 
+    def run_before(self):
+        from extensions.front.user_data_repack import UserDataRepack
+        return [UserDataRepack]
+
+    def run_after(self):
+        return []
+
     @staticmethod
     def pattern():
         return dict(
             nodes=[
-                ('softmax', dict(op='SoftmaxOutput'))
+                ('softmax', dict(op=lambda op: op in ['SoftMax', 'SoftmaxActivation', 'SoftmaxOutput']))
             ],
             edges=[])
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         """
         Need to remove from softmax layer all unused inputs
         Parameters
         ----------
-        graph : nx.MultiDiGraph
+        graph : Graph
            Graph with loaded model.
          match : dict
            Patterns which were found in graph structure.
         """
-
         softmax_node = match['softmax']
         softmax_nodes_len = len(softmax_node.in_nodes())
         for i in reversed(range(1, softmax_nodes_len)):
diff --git a/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs_test.py b/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs_test.py
index ea7da2a4b..2e2dc20e4 100644
--- a/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs_test.py
+++ b/model-optimizer/extensions/front/mxnet/check_softmax_node_inputs_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/conv_ext.py b/model-optimizer/extensions/front/mxnet/conv_ext.py
index 6463bfb1d..1792ff8d1 100644
--- a/model-optimizer/extensions/front/mxnet/conv_ext.py
+++ b/model-optimizer/extensions/front/mxnet/conv_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/conv_ext_test.py b/model-optimizer/extensions/front/mxnet/conv_ext_test.py
index ee686889f..2a75fcea7 100644
--- a/model-optimizer/extensions/front/mxnet/conv_ext_test.py
+++ b/model-optimizer/extensions/front/mxnet/conv_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/copy_ext.py b/model-optimizer/extensions/front/mxnet/copy_ext.py
index cc06a5467..0a1fa31fe 100644
--- a/model-optimizer/extensions/front/mxnet/copy_ext.py
+++ b/model-optimizer/extensions/front/mxnet/copy_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/custom.py b/model-optimizer/extensions/front/mxnet/custom.py
index 33436c5ee..f08407550 100644
--- a/model-optimizer/extensions/front/mxnet/custom.py
+++ b/model-optimizer/extensions/front/mxnet/custom.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/custom_test.py b/model-optimizer/extensions/front/mxnet/custom_test.py
index 3d698cf9a..36bd32a6b 100644
--- a/model-optimizer/extensions/front/mxnet/custom_test.py
+++ b/model-optimizer/extensions/front/mxnet/custom_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/dropout_ext.py b/model-optimizer/extensions/front/mxnet/dropout_ext.py
index ee169736b..421049860 100644
--- a/model-optimizer/extensions/front/mxnet/dropout_ext.py
+++ b/model-optimizer/extensions/front/mxnet/dropout_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/element_wise_sum_ext.py b/model-optimizer/extensions/front/mxnet/element_wise_sum_ext.py
index 8ad2e207d..ac826d3b6 100644
--- a/model-optimizer/extensions/front/mxnet/element_wise_sum_ext.py
+++ b/model-optimizer/extensions/front/mxnet/element_wise_sum_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/exp_ext.py b/model-optimizer/extensions/front/mxnet/exp_ext.py
new file mode 100644
index 000000000..05e84a252
--- /dev/null
+++ b/model-optimizer/extensions/front/mxnet/exp_ext.py
@@ -0,0 +1,28 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.extractor import FrontExtractorOp
+from mo.ops.activation import Activation
+
+
+class ExpExtractor(FrontExtractorOp):
+    op = 'exp'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Activation.update_node_stat(node, {'operation': 'exp'})
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/mxnet/flatten_ext.py b/model-optimizer/extensions/front/mxnet/flatten_ext.py
index f0c34691a..6b02cad59 100644
--- a/model-optimizer/extensions/front/mxnet/flatten_ext.py
+++ b/model-optimizer/extensions/front/mxnet/flatten_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/gather.py b/model-optimizer/extensions/front/mxnet/gather.py
new file mode 100644
index 000000000..c94c33297
--- /dev/null
+++ b/model-optimizer/extensions/front/mxnet/gather.py
@@ -0,0 +1,33 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Node, Graph
+from extensions.ops.gather import Gather
+
+
+class GatherFrontReplacer(FrontReplacementOp):
+    op = 'Embedding'
+    enabled = True
+
+    def replace_sub_graph(self, graph: Graph, match: dict):
+        node = match['op']
+        gather_node = Gather(graph, dict(name=node.id + '/embedding_',
+                                         axis=0,
+                                         symbol_dict={'name': node.id + '/embedding_'})).create_node()
+        node.in_port(0).get_connection().set_destination(gather_node.in_port(1))
+        node.in_port(1).get_connection().set_destination(gather_node.in_port(0))
+        node.out_port(0).get_connection().set_source(gather_node.out_port(0))
diff --git a/model-optimizer/mo/front/tf/extractors/shape.py b/model-optimizer/extensions/front/mxnet/gather_ext.py
index e174bc9f3..62e2f9601 100644
--- a/model-optimizer/mo/front/tf/extractors/shape.py
+++ b/model-optimizer/extensions/front/mxnet/gather_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,14 +13,14 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-import numpy as np
 
-from mo.front.tf.extractors.utils import tf_dtype_extractor
-from mo.ops.shape import Shape
+from mo.front.extractor import FrontExtractorOp
 
 
-def tf_shape_ext(pb):
-    return {
-        'infer': Shape.infer,
-        'data_type': tf_dtype_extractor(pb.attr['out_type'].type, np.int32)
-    }
+class GatherFrontExtractor(FrontExtractorOp):
+    op = 'Embedding'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/mxnet/gather_test.py b/model-optimizer/extensions/front/mxnet/gather_test.py
new file mode 100644
index 000000000..005695010
--- /dev/null
+++ b/model-optimizer/extensions/front/mxnet/gather_test.py
@@ -0,0 +1,64 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.front.mxnet.gather import GatherFrontReplacer
+from mo.utils.unittest.graph import build_graph, compare_graphs
+from mo.graph.graph import Node
+
+
+class GatherTest(unittest.TestCase):
+    def test_embedding_replace1(self):
+        graph = build_graph({'placeholder_1': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+                             'embedding_const': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None, 'type': 'Const', 'op': 'Const'},
+                             'embedding': {'type': None, 'kind': 'op', 'op': 'Embedding'},
+                             'last': {'type': None, 'kind': 'op', 'op': None},
+                            },
+                            [('placeholder_1', 'embedding', {'out': 0, 'in': 0}),
+                             ('embedding_const', 'embedding', {'out': 0, 'in': 1}),
+                             ('embedding', 'last')
+                             ],
+                            {'placeholder_1': {'shape': np.array([32,35])},
+                             'embedding_const': {'shape': np.array([2000, 650]),
+                                                'bias': np.array(np.random.random_integers(0, 225, (2000, 650)))},
+                             }, nodes_with_edges_only=True)
+
+        graph_ref = build_graph({'placeholder_1': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+                                 'embedding_const': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None, 'type': 'Const', 'op': 'Const'},
+                                 'embedding': {'type': None, 'kind': 'op', 'op': 'Gather'},
+                                 'last': {'type': None, 'kind': 'op', 'op': None},
+                                },
+                                [
+                                 ('embedding_const', 'embedding'),
+                                 ('placeholder_1', 'embedding'),
+                                 ('embedding', 'last')
+                                 ],
+                                {'placeholder_1': {'shape': np.array([32,35])},
+                                 'embedding_const': {'shape': np.array([2000, 650]),
+                                                'bias': np.array(np.random.random_integers(0, 225, (2000, 650)))},
+                                 }, nodes_with_edges_only=True)
+
+        graph.graph['layout'] = 'NCHW'
+        graph.stage = 'front'
+
+        replacer = GatherFrontReplacer()
+        replacer.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'last')
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/front/mxnet/instance_norm_ext.py b/model-optimizer/extensions/front/mxnet/instance_norm_ext.py
index 26fe6746f..3a8a1d16b 100644
--- a/model-optimizer/extensions/front/mxnet/instance_norm_ext.py
+++ b/model-optimizer/extensions/front/mxnet/instance_norm_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/max_ext.py b/model-optimizer/extensions/front/mxnet/max_ext.py
index 3db428c45..4af1468eb 100644
--- a/model-optimizer/extensions/front/mxnet/max_ext.py
+++ b/model-optimizer/extensions/front/mxnet/max_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
  limitations under the License.
 """
 
+import numpy as np
+
 from mo.front.extractor import FrontExtractorOp
 from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
 from mo.ops.reduce import Reduce
@@ -27,7 +29,7 @@ class MaxFrontExtractor(FrontExtractorOp):
     def extract(node):
         attrs = get_mxnet_layer_attrs(node.symbol_dict)
         data = {
-            'axis': [attrs.int('axis', 0)],
+            'axis': np.array([attrs.int('axis', 0)], dtype=np.int64),
             'reduce_type': 'max',
             'keep_dims': False
         }
diff --git a/model-optimizer/extensions/front/mxnet/maximum_ext.py b/model-optimizer/extensions/front/mxnet/maximum_ext.py
index 573a2dd9e..913e9b80d 100644
--- a/model-optimizer/extensions/front/mxnet/maximum_ext.py
+++ b/model-optimizer/extensions/front/mxnet/maximum_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/minimum_ext.py b/model-optimizer/extensions/front/mxnet/minimum_ext.py
index fb3d094fd..c13fe6037 100644
--- a/model-optimizer/extensions/front/mxnet/minimum_ext.py
+++ b/model-optimizer/extensions/front/mxnet/minimum_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/minus_scalar.py b/model-optimizer/extensions/front/mxnet/minus_scalar.py
index b190ebc8a..116de1994 100644
--- a/model-optimizer/extensions/front/mxnet/minus_scalar.py
+++ b/model-optimizer/extensions/front/mxnet/minus_scalar.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.lin_op import Add
 from mo.ops.const import Const
 
@@ -26,7 +26,7 @@ class MinusScalarFrontReplacer(FrontReplacementOp):
     op = '_minus_scalar'
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         in_node = node.in_node()
         out_nodes = [node for node in node.out_nodes().values()]
         graph.remove_edge(node.in_node().id, node.id)
diff --git a/model-optimizer/extensions/front/mxnet/minus_scalar_ext.py b/model-optimizer/extensions/front/mxnet/minus_scalar_ext.py
index 43146fbef..d748dbc90 100644
--- a/model-optimizer/extensions/front/mxnet/minus_scalar_ext.py
+++ b/model-optimizer/extensions/front/mxnet/minus_scalar_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/mul_scalar.py b/model-optimizer/extensions/front/mxnet/mul_scalar.py
index 24dd307a5..7d9d863b0 100644
--- a/model-optimizer/extensions/front/mxnet/mul_scalar.py
+++ b/model-optimizer/extensions/front/mxnet/mul_scalar.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.lin_op import Mul
 from mo.ops.const import Const
 
@@ -26,7 +24,7 @@ class MulScalarFrontReplacer(FrontReplacementOp):
     op = '_mul_scalar'
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         in_node = node.in_node()
         out_nodes = [node for node in node.out_nodes().values()]
         graph.remove_edge(node.in_node().id, node.id)
diff --git a/model-optimizer/extensions/front/mxnet/mul_scalar_ext.py b/model-optimizer/extensions/front/mxnet/mul_scalar_ext.py
index fdee6e113..5c0b457df 100644
--- a/model-optimizer/extensions/front/mxnet/mul_scalar_ext.py
+++ b/model-optimizer/extensions/front/mxnet/mul_scalar_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/pad_ext.py b/model-optimizer/extensions/front/mxnet/pad_ext.py
index a3b3c0ecb..cd1dad192 100644
--- a/model-optimizer/extensions/front/mxnet/pad_ext.py
+++ b/model-optimizer/extensions/front/mxnet/pad_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/pooling_ext.py b/model-optimizer/extensions/front/mxnet/pooling_ext.py
index 6a2452f1b..9710cc5f3 100644
--- a/model-optimizer/extensions/front/mxnet/pooling_ext.py
+++ b/model-optimizer/extensions/front/mxnet/pooling_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/pooling_ext_test.py b/model-optimizer/extensions/front/mxnet/pooling_ext_test.py
index 43450a818..9edd5834f 100644
--- a/model-optimizer/extensions/front/mxnet/pooling_ext_test.py
+++ b/model-optimizer/extensions/front/mxnet/pooling_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/proposal_ext.py b/model-optimizer/extensions/front/mxnet/proposal_ext.py
index 5e2fa80e3..32fe32c80 100644
--- a/model-optimizer/extensions/front/mxnet/proposal_ext.py
+++ b/model-optimizer/extensions/front/mxnet/proposal_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/reshape_ext.py b/model-optimizer/extensions/front/mxnet/reshape_ext.py
index 32251fe60..0ed3c0fd6 100644
--- a/model-optimizer/extensions/front/mxnet/reshape_ext.py
+++ b/model-optimizer/extensions/front/mxnet/reshape_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/rnn_param_concat.py b/model-optimizer/extensions/front/mxnet/rnn_param_concat.py
index 8b21e7e43..fb487a45a 100644
--- a/model-optimizer/extensions/front/mxnet/rnn_param_concat.py
+++ b/model-optimizer/extensions/front/mxnet/rnn_param_concat.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/roi_pooling_ext.py b/model-optimizer/extensions/front/mxnet/roi_pooling_ext.py
index f274c4176..e17a4dfff 100644
--- a/model-optimizer/extensions/front/mxnet/roi_pooling_ext.py
+++ b/model-optimizer/extensions/front/mxnet/roi_pooling_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/slice_channel_ext.py b/model-optimizer/extensions/front/mxnet/slice_channel_ext.py
index 95b1cd8a4..17243329c 100644
--- a/model-optimizer/extensions/front/mxnet/slice_channel_ext.py
+++ b/model-optimizer/extensions/front/mxnet/slice_channel_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/slice_channel_ext_test.py b/model-optimizer/extensions/front/mxnet/slice_channel_ext_test.py
index 080e87121..a6e619411 100644
--- a/model-optimizer/extensions/front/mxnet/slice_channel_ext_test.py
+++ b/model-optimizer/extensions/front/mxnet/slice_channel_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/softmax.py b/model-optimizer/extensions/front/mxnet/softmax.py
index 10991ea33..d60c48dc3 100644
--- a/model-optimizer/extensions/front/mxnet/softmax.py
+++ b/model-optimizer/extensions/front/mxnet/softmax.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 import numpy as np
 import networkx as nx
 
+from mo.graph.graph import Graph
 from mo.ops.lin_op import Mul
 from mo.ops.const import Const
 from mo.front.common.replacement import FrontReplacementSubgraph
@@ -33,7 +34,7 @@ class SoftmaxFrontReplacementSubgraph(FrontReplacementSubgraph):
             edges=[]
         )
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         node = match['softmax']
         if 'temperature' in node and node['temperature'] != 1.0:
             in_node = node.in_node()
diff --git a/model-optimizer/extensions/front/mxnet/softmax_activation_ext.py b/model-optimizer/extensions/front/mxnet/softmax_activation_ext.py
index 2dbb1145c..93438a8e6 100644
--- a/model-optimizer/extensions/front/mxnet/softmax_activation_ext.py
+++ b/model-optimizer/extensions/front/mxnet/softmax_activation_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/softmax_ext.py b/model-optimizer/extensions/front/mxnet/softmax_ext.py
index c2071daae..30768fbed 100644
--- a/model-optimizer/extensions/front/mxnet/softmax_ext.py
+++ b/model-optimizer/extensions/front/mxnet/softmax_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/softmax_output_ext.py b/model-optimizer/extensions/front/mxnet/softmax_output_ext.py
index 60a3423bf..728c3090d 100644
--- a/model-optimizer/extensions/front/mxnet/softmax_output_ext.py
+++ b/model-optimizer/extensions/front/mxnet/softmax_output_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation.py
index d26b54449..7da51745d 100644
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ import networkx as nx
 from extensions.front.mxnet.ssd_pattern_remove_flatten import SsdPatternRemoveFlatten
 from extensions.front.mxnet.ssd_pattern_remove_reshape import SsdPatternRemoveReshape
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import create_edge
+from mo.graph.graph import Graph
 from mo.ops.reshape import Reshape
 
 
@@ -40,7 +40,7 @@ class SsdPatternFlattenSoftmaxActivation(FrontReplacementSubgraph):
             ]
         )
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         """
         Need to find the pattern: SoftmaxActivation -> DetectionOutput
         DetectionOutput in IE expects flattened input from SoftMax, that is why there is the need to add
@@ -48,7 +48,7 @@ class SsdPatternFlattenSoftmaxActivation(FrontReplacementSubgraph):
 
         Parameters
         ----------
-        graph : nx.MultiDiGraph
+        graph : Graph
            Graph with loaded model.
          match : dict
            Patterns which were found in graph structure.
@@ -70,4 +70,4 @@ class SsdPatternFlattenSoftmaxActivation(FrontReplacementSubgraph):
         new_reshape_op = Reshape(graph, {'symbol_dict': symbol_node})
         new_reshape_node = new_reshape_op.create_node([softmax_activation])
         new_reshape_node['dim'] = [0, -1]
-        create_edge(new_reshape_node, multi_box_detection, in_port=in_port, out_port=out_port)
+        graph.create_edge(new_reshape_node, multi_box_detection, in_port=in_port, out_port=out_port)
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation_test.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation_test.py
index 7c9bc9ef5..fe78beb40 100644
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation_test.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_flatten_softmax_activation_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -42,4 +42,4 @@ class TestSsdPatternFlattenSoftmaxActivation(unittest.TestCase):
         pattern.find_and_replace_pattern(graph)
         flatten_name = list(graph.nodes())[-1]
         self.assertTrue(graph.has_node(flatten_name))
-        self.assertFalse(graph.has_edge(Node(graph, 'softmax_activation').id, Node(graph, 'multi_box_detection').id))
+        self.assertFalse(graph.has_edge(Node(graph, 'node_softmax_activation').id, Node(graph, 'node_multi_box_detection').id))
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten.py
index 5686dc274..6ff1a7114 100644
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,8 +17,8 @@
 import networkx as nx
 
 from extensions.front.mxnet.ssd_pattern_remove_reshape import SsdPatternRemoveReshape
+from mo.graph.graph import Graph
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import erase_node
 
 
 class SsdPatternRemoveFlatten(FrontReplacementSubgraph):
@@ -38,16 +38,16 @@ class SsdPatternRemoveFlatten(FrontReplacementSubgraph):
             ]
         )
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         """
         Need to find each occurrence of pattern: _contrib_MultiBoxPrior -> Flatten
         remove Flatten layer - IE does not expect outputs to be flattened
 
         Parameters
         ----------
-        graph : nx.MultiDiGraph
+        graph : Graph
            Graph with loaded model.
          match : dict
            Patterns which were found in graph structure.
         """
-        erase_node(match['flatten'])
+        graph.erase_node(match['flatten'])
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten_test.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten_test.py
index dfd5708ff..c9cef9812 100644
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten_test.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_flatten_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape.py
index cf12e192e..6c8e746f4 100644
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,9 +16,9 @@
 
 import networkx as nx
 
+from mo.graph.graph import Graph
 from mo.front.common.replacement import FrontReplacementSubgraph
 from mo.front.mxnet.extractors.utils import get_json_layer_attrs
-from mo.graph.graph import erase_node
 
 
 class SsdPatternRemoveReshape(FrontReplacementSubgraph):
@@ -37,19 +37,19 @@ class SsdPatternRemoveReshape(FrontReplacementSubgraph):
             ]
         )
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         """
         Need to find each occurrence of pattern: _contrib_MultiBoxPrior(s) -> Concat -> Reshape
         remove Reshape layer - IE does not expect outputs from concatenation of _contrib_MultiBoxPrior to be reshaped
 
         Parameters
         ----------
-        graph : nx.MultiDiGraph
+        graph : Graph
            Graph with loaded model.
          match : dict
            Patterns which were found in graph structure.
         """
-        erase_node(match['reshape'])
+        graph.erase_node(match['reshape'])
 
         # concat should be performed for the third axis
         concat_node = match['concat']
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape_test.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape_test.py
index 40a76491c..a72620a47 100644
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape_test.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_reshape_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose.py
index a3af10c24..70627c1d2 100644
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ from extensions.front.mxnet.ssd_pattern_flatten_softmax_activation import SsdPat
 from extensions.front.mxnet.ssd_pattern_remove_flatten import SsdPatternRemoveFlatten
 from extensions.front.mxnet.ssd_pattern_remove_reshape import SsdPatternRemoveReshape
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import create_edge
+from mo.graph.graph import Graph
 
 
 class SsdPatternRemoveTranspose(FrontReplacementSubgraph):
@@ -42,7 +42,7 @@ class SsdPatternRemoveTranspose(FrontReplacementSubgraph):
             ]
         )
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         """
         Need to find each occurrence of pattern:
         transpose -> SoftmaxActivation -> _contrib_MultiBoxDetection
@@ -52,7 +52,7 @@ class SsdPatternRemoveTranspose(FrontReplacementSubgraph):
 
         Parameters
         ----------
-        graph : nx.MultiDiGraph
+        graph : Graph
            Graph with loaded model.
          match : dict
            Patterns which were found in graph structure.
@@ -64,4 +64,4 @@ class SsdPatternRemoveTranspose(FrontReplacementSubgraph):
         graph.remove_edge(transpose_in_node.id, transpose_node.id)
         graph.remove_edge(transpose_node.id, softmax_activation.id)
         graph.remove_node(transpose_node.id)
-        create_edge(transpose_in_node, softmax_activation)
+        graph.create_edge(transpose_in_node, softmax_activation)
diff --git a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose_test.py b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose_test.py
index 576e2f91d..38bcd1501 100644
--- a/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose_test.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_pattern_remove_transpose_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs.py b/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs.py
index ce9f2cf92..533a06cb9 100644
--- a/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 
 import networkx as nx
 
-from mo.graph.graph import create_edge
+from mo.graph.graph import Graph
 from mo.front.common.replacement import FrontReplacementPattern
 from extensions.front.mxnet.ssd_pattern_remove_transpose import SsdPatternRemoveTranspose
 from extensions.front.mxnet.ssd_pattern_flatten_softmax_activation import SsdPatternFlattenSoftmaxActivation
@@ -38,7 +38,7 @@ class SsdReorderDetectionOutInputs(FrontReplacementPattern):
             edges=[])
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         """
         DetectionOutput layer has another order of inputs unlike mxnet.
         Need to reorder _contrib_MultiBoxDetection inputs
@@ -46,7 +46,7 @@ class SsdReorderDetectionOutInputs(FrontReplacementPattern):
 
         Parameters
         ----------
-        graph : nx.MultiDiGraph
+        graph : Graph
            Graph with loaded model.
         """
         multi_box_detection_node = match['multi_box_detection']
@@ -64,5 +64,5 @@ class SsdReorderDetectionOutInputs(FrontReplacementPattern):
         graph.remove_edge(conf_node.id, multi_box_detection_node.id)
         graph.remove_edge(loc_node.id, multi_box_detection_node.id)
 
-        create_edge(loc_node, multi_box_detection_node, in_port=conf_in_port, out_port=conf_out_port)
-        create_edge(conf_node, multi_box_detection_node, in_port=loc_in_port, out_port=loc_out_port)
+        graph.create_edge(loc_node, multi_box_detection_node, in_port=conf_in_port, out_port=conf_out_port)
+        graph.create_edge(conf_node, multi_box_detection_node, in_port=loc_in_port, out_port=loc_out_port)
diff --git a/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs_test.py b/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs_test.py
index 6ddde4cfa..d2beaaf95 100644
--- a/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs_test.py
+++ b/model-optimizer/extensions/front/mxnet/ssd_reorder_detection_out_inputs_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/stack_ext.py b/model-optimizer/extensions/front/mxnet/stack_ext.py
index 6b5b79b49..5c1d5d04c 100644
--- a/model-optimizer/extensions/front/mxnet/stack_ext.py
+++ b/model-optimizer/extensions/front/mxnet/stack_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/swapaxes_ext.py b/model-optimizer/extensions/front/mxnet/swapaxes_ext.py
index 1b34f0988..2741f7bdf 100644
--- a/model-optimizer/extensions/front/mxnet/swapaxes_ext.py
+++ b/model-optimizer/extensions/front/mxnet/swapaxes_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/up_sampling_ext.py b/model-optimizer/extensions/front/mxnet/up_sampling_ext.py
index a4284b1a0..cc8d87c48 100644
--- a/model-optimizer/extensions/front/mxnet/up_sampling_ext.py
+++ b/model-optimizer/extensions/front/mxnet/up_sampling_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/mxnet/zeros_ext.py b/model-optimizer/extensions/front/mxnet/zeros_ext.py
index 00923d239..5fec92904 100644
--- a/model-optimizer/extensions/front/mxnet/zeros_ext.py
+++ b/model-optimizer/extensions/front/mxnet/zeros_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
  limitations under the License.
 """
 
+import ast
 import numpy as np
 
 from mo.front.mxnet.extractors.utils import get_mxnet_layer_attrs
@@ -29,13 +30,16 @@ class ZerosFrontExtractor(FrontExtractorOp):
     def extract(node):
         attrs = get_mxnet_layer_attrs(node.symbol_dict)
         shape = list(attrs.tuple('shape', int, None))
+        zero_shapes = []
         for i, s in enumerate(shape):
             if s == 0:
                 shape[i] = 1
+                zero_shapes.append(i)
 
         update_attrs = {
             'shape': np.ndarray(shape),
             'value': np.zeros(shape),
+            'zero_shapes': zero_shapes
         }
 
         # update the attributes of the node
diff --git a/model-optimizer/extensions/front/no_op_eraser.py b/model-optimizer/extensions/front/no_op_eraser.py
index 7d0b5c038..2c5f4e9b1 100644
--- a/model-optimizer/extensions/front/no_op_eraser.py
+++ b/model-optimizer/extensions/front/no_op_eraser.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ import logging as log
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import erase_node
+from mo.graph.graph import Graph
 
 
 class NoOpEraser(FrontReplacementSubgraph):
@@ -35,7 +35,7 @@ class NoOpEraser(FrontReplacementSubgraph):
         )
 
     @staticmethod
-    def replace_sub_graph(graph: nx.MultiDiGraph, match: dict):
-        erase_node(match['output'])
-        erase_node(match['noop'])
+    def replace_sub_graph(graph: Graph, match: dict):
+        graph.erase_node(match['output'])
+        graph.erase_node(match['noop'])
         log.info("NoOp node \"{}\" was removed from the graph".format(match['noop'].id))
diff --git a/model-optimizer/extensions/front/onnx/add_ext.py b/model-optimizer/extensions/front/onnx/add_ext.py
index 42e64d06a..efe59b5bf 100644
--- a/model-optimizer/extensions/front/onnx/add_ext.py
+++ b/model-optimizer/extensions/front/onnx/add_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/affine_ext.py b/model-optimizer/extensions/front/onnx/affine_ext.py
index 4067f95d1..237e1d8f0 100644
--- a/model-optimizer/extensions/front/onnx/affine_ext.py
+++ b/model-optimizer/extensions/front/onnx/affine_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/affine_ext_test.py b/model-optimizer/extensions/front/onnx/affine_ext_test.py
index 799e6433f..ea0ad60f6 100644
--- a/model-optimizer/extensions/front/onnx/affine_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/affine_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/argmax.py b/model-optimizer/extensions/front/onnx/argmax.py
new file mode 100644
index 000000000..2f3070459
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/argmax.py
@@ -0,0 +1,46 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import networkx as nx
+
+from extensions.ops.argmax import ArgMaxOp
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.front.onnx.extractors.utils import onnx_attr
+from mo.graph.graph import Graph
+from mo.ops.squeeze import Squeeze
+
+class Argmax(FrontReplacementSubgraph):
+    enabled = True
+
+    def pattern(self):
+        return dict(
+            nodes=[('argmax', dict(op='ArgMax', keepdims=0))],
+            edges=[]
+        )
+
+    def replace_sub_graph(self, graph: Graph, match: dict):
+        """
+        In ONNX ArgMax operation has keepdims attribute that indicates
+        whether to stay a dimension along which maximum is computed or not.
+        In case of keepdims=0 this dimension should be removed but ArgMax operation in IR format
+        is not designed to cover this case. So we should additionally add Squeeze operation 
+        right after ArgMax for this case.
+        """
+        argmax_node = match['argmax']
+        axis = argmax_node.axis
+        squeeze_node = Squeeze(graph, {'squeeze_dims': [axis]}).create_node()
+        argmax_node.out_port(0).get_connection().set_source(squeeze_node.out_port(0))
+        squeeze_node.in_port(0).connect(argmax_node.out_port(0))
diff --git a/model-optimizer/extensions/front/onnx/argmax_ext.py b/model-optimizer/extensions/front/onnx/argmax_ext.py
new file mode 100644
index 000000000..162ee8120
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/argmax_ext.py
@@ -0,0 +1,42 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.argmax import ArgMaxOp
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+class ArgMaxFrontExtractor(FrontExtractorOp):
+    op = 'ArgMax'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        keepdims = onnx_attr(node, 'keepdims', 'i', default=1)
+        axis = onnx_attr(node, 'axis', 'i', default=0)
+
+        attrs = {
+            'axis': axis,
+
+            # ONNX ArgMax always computes an index of one maximum value
+            'top_k' : 1,
+            'out_max_val' : 0,
+
+            # Set attribute to trigger ArgMax replacer in case do not keep the dimension
+            'keepdims': keepdims
+        }
+
+        ArgMaxOp.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/cast_ext.py b/model-optimizer/extensions/front/onnx/cast_ext.py
new file mode 100644
index 000000000..b19fb339a
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/cast_ext.py
@@ -0,0 +1,30 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.Cast import Cast
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import get_onnx_datatype_as_numpy, onnx_attr
+
+
+class CastFrontExtractor(FrontExtractorOp):
+    op = 'Cast'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        to = onnx_attr(node, 'to', 'i', default=None)
+        Cast.update_node_stat(node, {'dst_type': get_onnx_datatype_as_numpy(to)})
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/clip_ext.py b/model-optimizer/extensions/front/onnx/clip_ext.py
new file mode 100644
index 000000000..4940afd8b
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/clip_ext.py
@@ -0,0 +1,33 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+from mo.ops.clamp import Clamp
+
+
+class ClipFrontExtractor(FrontExtractorOp):
+    op = 'Clip'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = {
+            'min': onnx_attr(node, 'min', 'f', -3.4028234663852886e+38),
+            'max': onnx_attr(node, 'max', 'f', 3.4028234663852886e+38),
+        }
+        Clamp.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/constant_fill_ext.py b/model-optimizer/extensions/front/onnx/constant_fill_ext.py
index e80027663..92d05607d 100644
--- a/model-optimizer/extensions/front/onnx/constant_fill_ext.py
+++ b/model-optimizer/extensions/front/onnx/constant_fill_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/conv_ext.py b/model-optimizer/extensions/front/onnx/conv_ext.py
index 262a46922..5562f581e 100644
--- a/model-optimizer/extensions/front/onnx/conv_ext.py
+++ b/model-optimizer/extensions/front/onnx/conv_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -110,27 +110,28 @@ class ConvTransposeFrontExtractor(FrontExtractorOp):
 
     @staticmethod
     def extract(node):
+        pads = onnx_attr(node, 'pads', 'ints', dst_type=int64_array)
+        auto_pad = onnx_attr(node, 'auto_pad', 's', default=None, dst_type=get_onnx_autopad)
 
-        int64array = lambda x: np.array(x, dtype=np.int64)
+        if pads is not None:
+            if len(pads) % 2 != 0:
+                raise Error(
+                    'ConvTranspose node {} specifies pads = {} which has odd number of elements. The model is not correct.',
+                    node.soft_get('name'),
+                    pads
+                )
+            pads = pads.reshape([2, -1])
+            pads = np.transpose(pads)
 
-        pads = onnx_attr(node, 'pads', 'ints', dst_type=int64array)
-        auto_pad = onnx_attr(node, 'auto_pad', 's', default=None, dst_type=get_onnx_autopad)
+        final_pads = int64_array([[0, 0], [0, 0], *pads]) if pads is not None else None
 
-        if pads is None:
-            pads = np.array([0, 0, 0, 0], dtype=np.int64)
+        dilations = onnx_attr(node, 'dilations', 'ints', default=None)
+        final_dilations = int64_array([1, 1, *dilations]) if dilations is not None else None
 
-        if len(pads) % 2 != 0:
-            raise Error(
-                'ConvTranspose node {} specifies pads = {} which has odd number of elements. The model is not correct.',
-                node.soft_get('name'),
-                pads
-            )
+        strides = onnx_attr(node, 'strides', 'ints', default=None)
+        final_strides = int64_array([1, 1, *strides]) if strides is not None else None
 
-        pads = pads.reshape([2, -1])
-        pads = np.transpose(pads)
-        dilations = int64array(onnx_attr(node, 'dilations', 'ints', default=[1, 1]))
-        strides = int64array(onnx_attr(node, 'strides', 'ints', default=[1, 1]))
-        kernel_shape = onnx_attr(node, 'kernel_shape', 'ints', dst_type=int64array)
+        kernel_shape = onnx_attr(node, 'kernel_shape', 'ints', dst_type=int64_array)
 
         if kernel_shape is None:
             raise Error(
@@ -138,9 +139,10 @@ class ConvTransposeFrontExtractor(FrontExtractorOp):
                 node.soft_get('name')
             )
 
-        output_padding = onnx_attr(node, 'output_padding', 'ints', default=[0, 0])
+        output_padding = onnx_attr(node, 'output_padding', 'ints', default=None)
+        final_output_padding = int64_array([0, 0, *output_padding]) if output_padding is not None else None
 
-        output_shape = onnx_attr(node, 'output_shape', 'ints', default=None, dst_type=int64array)
+        output_shape = onnx_attr(node, 'output_shape', 'ints', default=None, dst_type=int64_array)
 
         attrs = {
             'type': 'Deconvolution',
@@ -148,26 +150,24 @@ class ConvTransposeFrontExtractor(FrontExtractorOp):
             'auto_pad': auto_pad,
             'bias_addable': True,
             'bias_term': None,  # will be deduced later; not really needed
-            'pad': int64array([[0, 0], [0, 0], pads[0], pads[1]]),
-            'pad_spatial_shape': int64array([pads[0], pads[1]]),
-            'dilation': int64array([1, 1, dilations[0], dilations[1]]),
+            'pad': final_pads,
+            'dilation': final_dilations,
             'output_spatial_shape': output_shape,
             'output_shape': None,
-            'output_padding': int64array([0, 0, output_padding[0], output_padding[1]]),
-            'stride': int64array([1, 1, strides[0], strides[1]]),
+            'output_padding': final_output_padding,
+            'stride': final_strides,
             'group': onnx_attr(node, 'group', 'i', default=1),
             'output': None,
-            'spatial_dims': int64array([2, 3]),
-            'channel_dims': int64array([1]),
-            'batch_dims': int64array([0]),
-            'kernel_spatial': int64array([kernel_shape[0], kernel_shape[1]]),  # TODO WARNING Don't misuse X/Y
+
+            'spatial_dims': None,  # Will be calculated in infer function
+            'channel_dims': int64_array([1]),
+            'batch_dims': int64_array([0]),
+            'layout': 'NCHW',
 
             'input_feature_channel': 0,
             'output_feature_channel': 1,
-            'kernel_spatial_idx': np.array([2, 3]),
             'get_pad': ConvTransposeFrontExtractor.get_pad
         }
-        attrs.update(layout_attrs())
 
         # update the attributes of the node
         Convolution.update_node_stat(node, attrs)
diff --git a/model-optimizer/extensions/front/onnx/conv_ext_test.py b/model-optimizer/extensions/front/onnx/conv_ext_test.py
index 937542a39..e853c8045 100644
--- a/model-optimizer/extensions/front/onnx/conv_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/conv_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -46,13 +46,11 @@ class ConvTransposeONNXExtractorTest(unittest.TestCase):
             dict(
                 type='Deconvolution',
                 pad=[[0, 0], [0, 0], [1, 3], [2, 4]],
-                pad_spatial_shape=[[1, 3], [2, 4]],
-                kernel_spatial=[5, 6],
                 bias_term=None,
                 output_shape=None,
-                output_padding=[0, 0, 0, 0],
-                dilation=[1, 1, 1, 1],
-                stride=[1, 1, 1, 1],
+                output_padding=None,
+                dilation=None,
+                stride=None,
                 output_spatial_shape=None,
                 group=1
             )
@@ -74,8 +72,7 @@ class ConvTransposeONNXExtractorTest(unittest.TestCase):
     def test_all_valid_default(self):
         inp, ref = self._base_attrs()
         del inp['pads']
-        ref['pad'] = [[0, 0], [0, 0], [0, 0], [0, 0]]
-        ref['pad_spatial_shape'] = [[0, 0], [0, 0]]
+        del ref['pad']
         out = self._extract(inp)
         self._match(out, ref)
 
@@ -111,8 +108,7 @@ class ConvTransposeONNXExtractorTest(unittest.TestCase):
         inp['auto_pad'] = 'SAME_UPPER'
 
         ref['auto_pad'] = 'same_upper'
-        ref['pad'] = [[0, 0], [0, 0], [0, 0], [0, 0]]
-        ref['pad_spatial_shape'] = [[0, 0], [0, 0]]
+        del ref['pad']
 
         out = self._extract(inp)
         self._match(out, ref)
diff --git a/model-optimizer/extensions/front/onnx/crop_ext.py b/model-optimizer/extensions/front/onnx/crop_ext.py
index d11f79d9c..1ef2e949d 100644
--- a/model-optimizer/extensions/front/onnx/crop_ext.py
+++ b/model-optimizer/extensions/front/onnx/crop_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/crop_ext_test.py b/model-optimizer/extensions/front/onnx/crop_ext_test.py
index 1696b69a5..1b0646621 100644
--- a/model-optimizer/extensions/front/onnx/crop_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/crop_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/detection_output.py b/model-optimizer/extensions/front/onnx/detection_output.py
new file mode 100644
index 000000000..8e23cb492
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/detection_output.py
@@ -0,0 +1,112 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+from mo.ops.op import Op
+from mo.utils.error import Error
+
+
+class DetectionOutputFrontExtractor(FrontExtractorOp):
+    op = 'DetectionOutput'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        nms_threshold = onnx_attr(node, 'nms_threshold', 'f', default=0.0)
+        eta = onnx_attr(node, 'eta', 'f', default=0.0)
+        top_k = onnx_attr(node, 'top_k', 'i', default=-1)
+
+        code_type_values = {
+            b"CORNER": "caffe.PriorBoxParameter.CORNER",
+            b"CENTER_SIZE": "caffe.PriorBoxParameter.CENTER_SIZE",
+        }
+
+        code_type = onnx_attr(node, 'code_type', 's', default=code_type_values[b"CORNER"])
+        try:
+            code_type = code_type_values[code_type]
+        except KeyError:
+            raise Error("Incorrect value of code_type parameter {}".format(code_type))
+
+        resize_mode_values = {
+            b"": "",
+            b"WARP": "caffe.ResizeParameter.WARP",
+            b"FIT_SMALL_SIZE": "caffe.ResizeParameter.FIT_SMALL_SIZE",
+            b"FIT_LARGE_SIZE_AND_PAD": "caffe.ResizeParameter.FIT_LARGE_SIZE_AND_PAD",
+        }
+        resize_mode = onnx_attr(node, 'resize_mode', 's', default=b"")
+        try:
+            resize_mode = resize_mode_values[resize_mode]
+        except KeyError:
+            raise Error("Incorrect value of resize_mode parameter {}".format(resize_mode))
+
+        pad_mode_values = {
+            b"": "",
+            b"CONSTANT": "caffe.ResizeParameter.CONSTANT",
+            b"MIRRORED": "caffe.ResizeParameter.MIRRORED",
+            b"REPEAT_NEAREST": "caffe.ResizeParameter.REPEAT_NEAREST"
+        }
+        pad_mode = onnx_attr(node, 'pad_mode', 's', default=b"")
+        try:
+            pad_mode = pad_mode_values[pad_mode]
+        except KeyError:
+            raise Error("Incorrect value of pad_mode parameter {}".format(pad_mode))
+
+        interp_mode_values = {
+            b"": "",
+            b"LINEAR": "caffe.ResizeParameter.LINEAR",
+            b"AREA": "caffe.ResizeParameter.AREA",
+            b"NEAREST": "caffe.ResizeParameter.NEAREST",
+            b"CUBIC": "caffe.ResizeParameter.CUBIC",
+            b"LANCZOS4": "caffe.ResizeParameter.LANCZOS4"
+        }
+        interp_mode = onnx_attr(node, 'interp_mode', 's', default=b"")
+        try:
+            interp_mode = interp_mode_values[interp_mode]
+        except KeyError:
+            raise Error("Incorrect value of interp_mode parameter {}".format(interp_mode))
+
+        attrs = {
+            'num_classes': onnx_attr(node, 'num_classes', 'i', default=0),
+            'share_location': onnx_attr(node, 'share_location', 'i', default=0),
+            'background_label_id': onnx_attr(node, 'background_label_id', 'i', default=0),
+            'code_type': code_type,
+            'variance_encoded_in_target': onnx_attr(node, 'variance_encoded_in_target', 'i', default=0),
+            'keep_top_k': onnx_attr(node, 'keep_top_k', 'i', default=0),
+            'confidence_threshold':  onnx_attr(node, 'confidence_threshold', 'f', default=0),
+            'visualize_threshold': onnx_attr(node, 'visualize_threshold', 'f', default=0.6),
+            # nms_param
+            'nms_threshold': nms_threshold,
+            'top_k': top_k,
+            'eta': eta,
+            # save_output_param.resize_param
+            'prob': onnx_attr(node, 'prob', 'f', default=0),
+            'resize_mode': resize_mode,
+            'height': onnx_attr(node, 'height', 'i', default=0),
+            'width': onnx_attr(node, 'width', 'i', default=0),
+            'height_scale': onnx_attr(node, 'height_scale', 'i', default=0),
+            'width_scale': onnx_attr(node, 'width_scale', 'i', default=0),
+            'pad_mode': pad_mode,
+            'pad_value': onnx_attr(node, 'pad_value', 's', default=""),
+            'interp_mode': interp_mode,
+            'input_width': onnx_attr(node, 'input_width', 'i', default=1),
+            'input_height': onnx_attr(node, 'input_height', 'i', default=1),
+            'normalized': onnx_attr(node, 'normalized', 'i', default=1),
+        }
+
+        # update the attributes of the node
+        Op.get_op_class_by_name(__class__.op).update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/detection_output_test.py b/model-optimizer/extensions/front/onnx/detection_output_test.py
new file mode 100644
index 000000000..f055f0086
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/detection_output_test.py
@@ -0,0 +1,102 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import onnx
+import unittest
+
+import numpy as np
+
+from extensions.front.onnx.detection_output import DetectionOutputFrontExtractor
+from extensions.ops.DetectionOutput import DetectionOutput
+from mo.ops.op import Op
+from mo.utils.unittest.extractors import PB
+
+
+class TestDetectionOutputExt(unittest.TestCase):
+    @staticmethod
+    def _create_do_node(num_classes=0, share_location=0, background_label_id=0,
+                        code_type="", variance_encoded_in_target=0, keep_top_k=0,
+                        confidence_threshold=0, nms_threshold=0, top_k=0, eta=0):
+        pb = onnx.helper.make_node(
+            'DetectionOutput',
+            inputs=['x'],
+            outputs=['y'],
+            num_classes=num_classes,
+            share_location=share_location,
+            background_label_id=background_label_id,
+            code_type=code_type,
+            variance_encoded_in_target=variance_encoded_in_target,
+            keep_top_k=keep_top_k,
+            confidence_threshold=confidence_threshold,
+            # nms_param
+            nms_threshold=nms_threshold,
+            top_k=top_k,
+            eta=eta,
+        )
+        
+        node = PB({'pb': pb})
+        return node
+
+    @classmethod
+    def setUpClass(cls):
+        Op.registered_ops['DetectionOutput'] = DetectionOutput
+
+    def test_do_no_pb_no_ml(self):
+        self.assertRaises(AttributeError, DetectionOutputFrontExtractor.extract, None)
+
+    def test_do_ext_ideal_numbers(self):
+        node = self._create_do_node(num_classes=21, share_location=1,
+                                    code_type="CENTER_SIZE", keep_top_k=200,
+                                    confidence_threshold=0.01, nms_threshold=0.45, top_k=400, eta=1.0)
+        
+        DetectionOutputFrontExtractor.extract(node)
+        
+        exp_res = {
+            'op': 'DetectionOutput',
+            'type': 'DetectionOutput',
+            'num_classes': 21,
+            'share_location': 1,
+            'background_label_id': 0,
+            'code_type': "caffe.PriorBoxParameter.CENTER_SIZE",
+            'variance_encoded_in_target': 0,
+            'keep_top_k': 200,
+            'confidence_threshold': 0.01,
+            'visualize_threshold': 0.6,
+            # nms_param
+            'nms_threshold': 0.45,
+            'top_k': 400,
+            'eta': 1.0,
+            # ONNX have not such parameters
+            # save_output_param.resize_param
+            'prob': 0,
+            'resize_mode': "",
+            'height': 0,
+            'width': 0,
+            'height_scale': 0,
+            'width_scale': 0,
+            'pad_mode': "",
+            'pad_value': "",
+            'interp_mode': "",
+            'input_width': 1,
+            'input_height': 1,
+            'normalized': 1,            
+        }
+
+        for key in exp_res.keys():
+            if key in ['confidence_threshold', 'visualise_threshold', 'nms_threshold', 'eta']:
+                np.testing.assert_almost_equal(node[key], exp_res[key])
+            else:
+                self.assertEqual(node[key], exp_res[key])
diff --git a/model-optimizer/extensions/front/onnx/detectionoutput_ext.py b/model-optimizer/extensions/front/onnx/detectionoutput_ext.py
new file mode 100644
index 000000000..3d00fc168
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/detectionoutput_ext.py
@@ -0,0 +1,42 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from math import log
+import numpy as np
+
+from extensions.ops.detectionoutput_onnx import ExperimentalDetectronDetectionOutput
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class ExperimentalDetectronDetectionOutputFrontExtractor(FrontExtractorOp):
+    op = 'ExperimentalDetectronDetectionOutput'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = dict(class_agnostic_box_regression=onnx_attr(node, 'class_agnostic_box_regression', 'i', 0),
+                     max_detections_per_image=onnx_attr(node, 'max_detections_per_image', 'i', 100),
+                     nms_threshold=onnx_attr(node, 'nms_threshold', 'f', 0.5),
+                     num_classes=onnx_attr(node, 'num_classes', 'i', 81),
+                     post_nms_count=onnx_attr(node, 'post_nms_count', 'i', 2000),
+                     score_threshold=onnx_attr(node, 'score_threshold', 'f', 0.05),
+                     max_delta_log_wh=onnx_attr(node, 'max_delta_log_wh', 'f', log(1000. / 16.)),
+                     deltas_weights=np.array(onnx_attr(node, 'deltas_weights', 'floats', [10., 10., 5., 5.]),
+                                             dtype=np.float32)
+                     )
+        ExperimentalDetectronDetectionOutput.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/dropout_ext.py b/model-optimizer/extensions/front/onnx/dropout_ext.py
new file mode 100644
index 000000000..21292bd04
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/dropout_ext.py
@@ -0,0 +1,36 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+from extensions.ops.identity import IdentityOp
+from mo.utils.error import Error
+
+
+class DropoutFrontExtractor(FrontExtractorOp):
+    op = 'Dropout'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        # some Dropout flavors doesn't have is_test attribute; when it is missing, interpret it as 1
+        is_test = onnx_attr(node, 'is_test', 'i', 1)
+        if len(node.out_nodes()) > 1:
+            raise Error('Dropout node {} has more than one consumer. Unsupported.', node.name)
+        if not is_test:
+            raise Error('Dropout node {} has is_test: 0. This means training mode which is not supported.', node.name)
+        IdentityOp.update_node_stat(node)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/elu_ext.py b/model-optimizer/extensions/front/onnx/elu_ext.py
index 36d66acb3..5c1dfd4ab 100644
--- a/model-optimizer/extensions/front/onnx/elu_ext.py
+++ b/model-optimizer/extensions/front/onnx/elu_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/elu_ext_test.py b/model-optimizer/extensions/front/onnx/elu_ext_test.py
index e509e4e78..1ca029b82 100644
--- a/model-optimizer/extensions/front/onnx/elu_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/elu_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/exp_ext.py b/model-optimizer/extensions/front/onnx/exp_ext.py
new file mode 100644
index 000000000..77165799d
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/exp_ext.py
@@ -0,0 +1,28 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.extractor import FrontExtractorOp
+from mo.ops.activation import Activation
+
+
+class ExpExtractor(FrontExtractorOp):
+    op = 'Exp'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Activation.update_node_stat(node, {'operation': 'exp'})
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/flatten_ext.py b/model-optimizer/extensions/front/onnx/flatten_ext.py
index 11aaa1b9d..945b59dfb 100644
--- a/model-optimizer/extensions/front/onnx/flatten_ext.py
+++ b/model-optimizer/extensions/front/onnx/flatten_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/flatten_ext_test.py b/model-optimizer/extensions/front/onnx/flatten_ext_test.py
index 5498343a1..de9e9f294 100644
--- a/model-optimizer/extensions/front/onnx/flatten_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/flatten_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/gather_ext.py b/model-optimizer/extensions/front/onnx/gather_ext.py
index 1484bc858..ad639d76b 100644
--- a/model-optimizer/extensions/front/onnx/gather_ext.py
+++ b/model-optimizer/extensions/front/onnx/gather_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/gather_ext_test.py b/model-optimizer/extensions/front/onnx/gather_ext_test.py
index d91c7932c..5d48ea402 100644
--- a/model-optimizer/extensions/front/onnx/gather_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/gather_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/gru_ext.py b/model-optimizer/extensions/front/onnx/gru_ext.py
new file mode 100644
index 000000000..a1e260537
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/gru_ext.py
@@ -0,0 +1,59 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.ops.GRU import GRU
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class GRUFrontExtractor(FrontExtractorOp):
+    op = 'GRU'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        activation_alpha = onnx_attr(node, 'activation_alpha', 'floats',
+                                     default=None, dst_type=lambda x: np.array(x, dtype=np.float32))
+        activation_beta = onnx_attr(node, 'activation_beta', 'floats',
+                                    default=None, dst_type=lambda x: np.array(x, dtype=np.float32))
+        activations = onnx_attr(node, 'activations', 'strings', default=None,
+                                dst_type=lambda x: list(map(lambda s: s.decode(encoding="utf-8").lower(), list(x))))
+        clip = onnx_attr(node, 'clip', 'f', default=None)
+        linear_before_reset = onnx_attr(node, 'linear_before_reset', 'i', default=0)
+
+        attrs = {
+            'batch_dim': 1,
+            'sequence_dim': 0,
+            'blobs_wrb': True,
+            'has_num_directions': True,
+            'num_layers': 1,
+            'format': 'onnx',
+            'multilayers': False,
+            'gate_order': [0, 1, 2],
+
+            # ONNX - specific attrs
+            'activation_alpha': activation_alpha,
+            'activation_beta': activation_beta,
+            'activations': activations,
+            'clip': clip,
+            'direction': onnx_attr(node, 'direction', 's', b'forward').decode().lower(),
+            'hidden_size': np.array(onnx_attr(node, 'hidden_size', 'i'), dtype=np.int64),
+            'linear_before_reset': linear_before_reset,
+        }
+
+        GRU.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/gru_ext_test.py b/model-optimizer/extensions/front/onnx/gru_ext_test.py
new file mode 100644
index 000000000..44e29519b
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/gru_ext_test.py
@@ -0,0 +1,79 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+import numpy as np
+import onnx
+
+from extensions.front.onnx.gru_ext import GRUFrontExtractor
+from mo.utils.unittest.extractors import PB
+
+
+class GRUExtractorTest(unittest.TestCase):
+    @staticmethod
+    def _create_node(**attrs):
+        pb = onnx.helper.make_node(
+            'GRU',
+            inputs=['X', 'W', 'R', 'B',],
+            outputs=['Y', 'Y_h', 'Y_c'],
+            hidden_size=128,
+            **attrs,
+        )
+        node = PB({'pb': pb})
+        return node
+
+    base_attrs = {
+        'type': 'RNNSequence',
+        'op': 'GRU',
+        'batch_dim': 1,
+        'sequence_dim': 0,
+        'blobs_wrb': True,
+        'has_num_directions': True,
+        'num_layers': 1,
+        'format': 'onnx',
+        'multilayers': False,
+        'gate_order': np.array([0, 1, 2]),
+        'direction': 'forward',
+        'linear_before_reset': 0,
+    }
+
+    def test_base_attrs(self):
+        node = self._create_node()
+        GRUFrontExtractor.extract(node)
+
+        exp_res = self.base_attrs
+
+        for key in exp_res.keys():
+            equal = np.all(np.equal(node[key], exp_res[key], dtype=object))
+            self.assertTrue(equal, 'Values for attr {} are not equal'.format(key))
+
+    def test_additional_attributes(self):
+        additional_attrs = {
+            'activation_alpha': [1.0, 0.0, 2.0],
+            'activations': [b'relu', b'tanh', b'sigmoid'],
+            'clip': 10.0,
+            'linear_before_reset': 1,
+        }
+
+        node = self._create_node(**additional_attrs)
+        GRUFrontExtractor.extract(node)
+
+        exp_res = {**self.base_attrs, **additional_attrs}
+        exp_res['activations'] = ['relu', 'tanh', 'sigmoid']
+
+        for key in exp_res.keys():
+            equal = np.all(np.equal(node[key], exp_res[key], dtype=object))
+            self.assertTrue(equal, 'Values for attr {} are not equal'.format(key))
diff --git a/model-optimizer/extensions/front/onnx/image_scaler_ext.py b/model-optimizer/extensions/front/onnx/image_scaler_ext.py
index 5d46fc667..2bfb181a5 100644
--- a/model-optimizer/extensions/front/onnx/image_scaler_ext.py
+++ b/model-optimizer/extensions/front/onnx/image_scaler_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/image_scaler_ext_test.py b/model-optimizer/extensions/front/onnx/image_scaler_ext_test.py
index 8f5fb04dd..8a1b6ef5f 100644
--- a/model-optimizer/extensions/front/onnx/image_scaler_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/image_scaler_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/instance_normalization_ext.py b/model-optimizer/extensions/front/onnx/instance_normalization_ext.py
index 44737b462..2a30ff42e 100644
--- a/model-optimizer/extensions/front/onnx/instance_normalization_ext.py
+++ b/model-optimizer/extensions/front/onnx/instance_normalization_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/instance_normalization_ext_test.py b/model-optimizer/extensions/front/onnx/instance_normalization_ext_test.py
index c38a30f4a..60878cfed 100644
--- a/model-optimizer/extensions/front/onnx/instance_normalization_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/instance_normalization_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/leaky_relu_ext.py b/model-optimizer/extensions/front/onnx/leaky_relu_ext.py
index e6694e94f..ef8c6263c 100644
--- a/model-optimizer/extensions/front/onnx/leaky_relu_ext.py
+++ b/model-optimizer/extensions/front/onnx/leaky_relu_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/lrn_ext.py b/model-optimizer/extensions/front/onnx/lrn_ext.py
index d402a6e08..9d89d6002 100644
--- a/model-optimizer/extensions/front/onnx/lrn_ext.py
+++ b/model-optimizer/extensions/front/onnx/lrn_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/lstm_ext.py b/model-optimizer/extensions/front/onnx/lstm_ext.py
index 20bc8ba6d..6673932da 100644
--- a/model-optimizer/extensions/front/onnx/lstm_ext.py
+++ b/model-optimizer/extensions/front/onnx/lstm_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,14 +13,11 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-
 import numpy as np
 
-from extensions.ops.lstm_sequence import LSTMSequence
+from extensions.ops.LSTM import LSTM
 from mo.front.extractor import FrontExtractorOp
 from mo.front.onnx.extractors.utils import onnx_attr
-from mo.ops.op import Op
 
 
 class LSTMFrontExtractor(FrontExtractorOp):
@@ -29,27 +26,34 @@ class LSTMFrontExtractor(FrontExtractorOp):
 
     @staticmethod
     def extract(node):
-
-        def split_helper(node, index: int, direction: str):
-            return Op._create_data_node(
-                node.graph,
-                name=node.name + '/SplittedBiLSTM/{}/'.format(direction),
-                attrs={'value': node.value[index], 'shape': np.array(node.value[index].shape, dtype=np.int64)}
-            )
+        activation_alpha = onnx_attr(node, 'activation_alpha', 'floats',
+                                     default=None, dst_type=lambda x: np.array(x, dtype=np.float32))
+        activation_beta = onnx_attr(node, 'activation_beta', 'floats',
+                                     default=None, dst_type=lambda x: np.array(x, dtype=np.float32))
+        activations = onnx_attr(node, 'activations', 'strings', default=None,
+                                dst_type=lambda x: list(map(lambda s: s.decode(encoding="utf-8").lower(), list(x))))
+        clip = onnx_attr(node, 'clip', 'f', default=None)
+        input_forget = onnx_attr(node, 'input_forget', 'i', default=0)
 
         attrs = {
-            'hidden_size': np.array(onnx_attr(node, 'hidden_size', 'i'), dtype=np.int64),
             'batch_dim': 1,
             'sequence_dim': 0,
             'blobs_wrb': True,
             'has_num_directions': True,
-            'direction': onnx_attr(node, 'direction', 's', b'forward').decode().lower(),
+            'num_layers': 1,
             'format': 'onnx',
-            'blob_bidirectional_split': lambda node: (
-                split_helper(node, 0, 'forward'),
-                split_helper(node, 1, 'reverse')
-            )
+            'multilayers': False,
+            'gate_order': [2, 0, 3, 1],  # iofc --> fico
+
+            # ONNX attrs
+            'activation_alpha': activation_alpha,
+            'activation_beta': activation_beta,
+            'activations': activations,
+            'clip': clip,
+            'direction': onnx_attr(node, 'direction', 's', b'forward').decode().lower(),
+            'hidden_size': np.array(onnx_attr(node, 'hidden_size', 'i'), dtype=np.int64),
+            'input_forget': input_forget,
         }
 
-        LSTMSequence.update_node_stat(node, attrs)
+        LSTM.update_node_stat(node, attrs)
         return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/lstm_ext_test.py b/model-optimizer/extensions/front/onnx/lstm_ext_test.py
new file mode 100644
index 000000000..ea66dfaa7
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/lstm_ext_test.py
@@ -0,0 +1,77 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+import numpy as np
+import onnx
+
+from extensions.front.onnx.lstm_ext import LSTMFrontExtractor
+from mo.utils.unittest.extractors import PB
+
+
+class LSTMExtractorTest(unittest.TestCase):
+    @staticmethod
+    def _create_node(**attrs):
+        pb = onnx.helper.make_node(
+            'LSTM',
+            inputs=['X', 'W', 'R', 'B',],
+            outputs=['Y', 'Y_h', 'Y_c'],
+            hidden_size=128,
+            **attrs,
+        )
+        node = PB({'pb': pb})
+        return node
+
+    base_attrs = {
+        'type': 'RNNSequence',
+        'op': 'LSTM',
+        'batch_dim': 1,
+        'sequence_dim': 0,
+        'blobs_wrb': True,
+        'has_num_directions': True,
+        'num_layers': 1,
+        'format': 'onnx',
+        'multilayers': False,
+        'gate_order': np.array([2, 0, 3, 1]),
+        'direction': 'forward',
+    }
+
+    def test_base_attrs(self):
+        node = self._create_node()
+        LSTMFrontExtractor.extract(node)
+
+        exp_res = self.base_attrs
+
+        for key in exp_res.keys():
+            equal = np.all(np.equal(node[key], exp_res[key], dtype=object))
+            self.assertTrue(equal)
+
+    def test_additional_attributes(self):
+        additional_attrs = {
+            'activation_alpha': [1.0, 0.0, 2.0],
+            'activations': [b'relu', b'tanh', b'sigmoid'],
+            'clip': 10.0,
+        }
+
+        node = self._create_node(**additional_attrs)
+        LSTMFrontExtractor.extract(node)
+
+        exp_res = dict(**self.base_attrs, **additional_attrs)
+        exp_res['activations'] = ['relu', 'tanh', 'sigmoid']
+
+        for key in exp_res.keys():
+            equal = np.all(np.equal(node[key], exp_res[key], dtype=object))
+            self.assertTrue(equal, 'Values for attr {} are not equal'.format(key))
diff --git a/model-optimizer/extensions/front/onnx/matmul_ext.py b/model-optimizer/extensions/front/onnx/matmul_ext.py
index 38b318976..33e8f47c4 100644
--- a/model-optimizer/extensions/front/onnx/matmul_ext.py
+++ b/model-optimizer/extensions/front/onnx/matmul_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/mul_ext.py b/model-optimizer/extensions/front/onnx/mul_ext.py
index f1de1226d..14af8c880 100644
--- a/model-optimizer/extensions/front/onnx/mul_ext.py
+++ b/model-optimizer/extensions/front/onnx/mul_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/neg_ext.py b/model-optimizer/extensions/front/onnx/neg_ext.py
index 939c16752..33103cacf 100644
--- a/model-optimizer/extensions/front/onnx/neg_ext.py
+++ b/model-optimizer/extensions/front/onnx/neg_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/pad_ext.py b/model-optimizer/extensions/front/onnx/pad_ext.py
index 449949f4d..f87f72628 100644
--- a/model-optimizer/extensions/front/onnx/pad_ext.py
+++ b/model-optimizer/extensions/front/onnx/pad_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/pad_ext_test.py b/model-optimizer/extensions/front/onnx/pad_ext_test.py
index 1f4f25d6c..46de62776 100644
--- a/model-optimizer/extensions/front/onnx/pad_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/pad_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/pooling_ext.py b/model-optimizer/extensions/front/onnx/pooling_ext.py
index 17c894cb7..5916bbd68 100644
--- a/model-optimizer/extensions/front/onnx/pooling_ext.py
+++ b/model-optimizer/extensions/front/onnx/pooling_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/pow_ext.py b/model-optimizer/extensions/front/onnx/pow_ext.py
index ab8330ffe..327725f1c 100644
--- a/model-optimizer/extensions/front/onnx/pow_ext.py
+++ b/model-optimizer/extensions/front/onnx/pow_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/priorbox_ext.py b/model-optimizer/extensions/front/onnx/priorbox_ext.py
new file mode 100644
index 000000000..6a45003b6
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/priorbox_ext.py
@@ -0,0 +1,51 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.front.extractor import FrontExtractorOp
+from mo.ops.op import Op
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class PriorBoxFrontExtractor(FrontExtractorOp):
+    op = 'PriorBox'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        variance = onnx_attr(node, 'variance', 'floats', default=[], dst_type=lambda x: np.array(x, dtype=np.float32))
+        if len(variance) == 0:
+            variance = [0.1]
+
+        update_attrs = {
+            'aspect_ratio': onnx_attr(node, 'aspect_ratio', 'floats', dst_type=lambda x: np.array(x, dtype=np.float32)),
+            'min_size': onnx_attr(node, 'min_size', 'floats', dst_type=lambda x: np.array(x, dtype=np.float32)),
+            'max_size': onnx_attr(node, 'max_size', 'floats', dst_type=lambda x: np.array(x, dtype=np.float32)),
+            'flip': onnx_attr(node, 'flip', 'i', default=0),
+            'clip': onnx_attr(node, 'clip', 'i', default=0),
+            'variance': list(variance),
+            'img_size': onnx_attr(node, 'img_size', 'i', default=0),
+            'img_h': onnx_attr(node, 'img_h', 'i', default=0),
+            'img_w': onnx_attr(node, 'img_w', 'i', default=0),
+            'step': onnx_attr(node, 'step', 'f', default=0.0),
+            'step_h': onnx_attr(node, 'step_h', 'f', default=0.0),
+            'step_w': onnx_attr(node, 'step_w', 'f', default=0.0),
+            'offset': onnx_attr(node, 'offset', 'f', default=0.0),
+        }
+
+        # update the attributes of the node
+        Op.get_op_class_by_name(__class__.op).update_node_stat(node, update_attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/priorbox_ext_test.py b/model-optimizer/extensions/front/onnx/priorbox_ext_test.py
new file mode 100644
index 000000000..8608fdd46
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/priorbox_ext_test.py
@@ -0,0 +1,89 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import onnx
+import unittest
+
+import numpy as np
+
+from extensions.front.onnx.priorbox_ext import PriorBoxFrontExtractor
+from extensions.ops.priorbox import PriorBoxOp
+from mo.ops.op import Op
+from mo.utils.unittest.extractors import PB
+
+
+class TestPriorBoxExt(unittest.TestCase):
+    @staticmethod
+    def _create_priorbox_node(aspect_ratio=[], min_size=np.array([]), max_size=np.array([]),
+                              flip=False, clip=False, variance=None, img_size=0, img_h=0,
+                              img_w=0, step=0, step_h=0, step_w=0, offset=0):
+        pb = onnx.helper.make_node(
+            'PriorBox',
+            inputs=['x'],
+            outputs=['y'],
+            aspect_ratio=aspect_ratio,
+            min_size=min_size,
+            max_size=max_size,
+            flip=flip,
+            clip=clip,
+            variance=variance,
+            img_size=img_size,
+            img_h=img_h,
+            img_w=img_w,
+            step=step,
+            step_h=step_h,
+            step_w=step_w,
+            offset=offset,
+        )
+        
+        node = PB({'pb': pb})
+        return node
+
+    @classmethod
+    def setUpClass(cls):
+        Op.registered_ops['PriorBox'] = PriorBoxOp
+
+    def test_priorbox_no_pb_no_ml(self):
+        self.assertRaises(AttributeError, PriorBoxFrontExtractor.extract, None)
+
+    def test_priorbox_ext_ideal_numbers(self):
+        node = self._create_priorbox_node(aspect_ratio=np.array([2, 3], dtype=np.float),
+                                          variance=np.array([0.2, 0.3, 0.2, 0.3]),
+                                          img_size=300, step=5.0, offset=0.6, flip=True)
+
+        PriorBoxFrontExtractor.extract(node)
+
+        exp_res = {
+            'op': 'PriorBox',
+            'type': 'PriorBox',
+            'clip': 0,
+            'flip': 1,
+            'aspect_ratio': np.array([2, 3], dtype=np.float),
+            'variance': [0.2, 0.3, 0.2, 0.3],
+            'img_size': 300,
+            'img_h': 0,
+            'img_w': 0,
+            'step': 5,
+            'step_h': 0,
+            'step_w': 0,
+            'offset': 0.6
+        }
+
+        for key in exp_res.keys():
+            if key in ['variance', 'aspect_ratio', 'step_h', 'step_w', 'offset']:
+                np.testing.assert_almost_equal(node[key], exp_res[key])
+            else:
+                self.assertEqual(node[key], exp_res[key])
diff --git a/model-optimizer/extensions/front/onnx/priorgridgenerator_ext.py b/model-optimizer/extensions/front/onnx/priorgridgenerator_ext.py
new file mode 100644
index 000000000..f8db64b65
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/priorgridgenerator_ext.py
@@ -0,0 +1,35 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.priorgridgenerator_onnx import ExperimentalDetectronPriorGridGenerator
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class ExperimentalDetectronPriorGridGeneratorFrontExtractor(FrontExtractorOp):
+    op = 'ExperimentalDetectronPriorGridGenerator'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = dict(h=onnx_attr(node, 'h', 'i', 0),
+                     w=onnx_attr(node, 'w', 'i', 0),
+                     stride_x=onnx_attr(node, 'stride_x', 'f', 0),
+                     stride_y=onnx_attr(node, 'stride_y', 'f', 0),
+                     flatten=onnx_attr(node, 'flatten', 'i', 1)
+                     )
+        ExperimentalDetectronPriorGridGenerator.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/proposal_ext.py b/model-optimizer/extensions/front/onnx/proposal_ext.py
new file mode 100644
index 000000000..b82f080a4
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/proposal_ext.py
@@ -0,0 +1,34 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.proposal_onnx import ExperimentalDetectronGenerateProposalsSingleImage
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class ExperimentalDetectronGenerateProposalsSingleImageFrontExtractor(FrontExtractorOp):
+    op = 'ExperimentalDetectronGenerateProposalsSingleImage'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = dict(min_size=onnx_attr(node, 'min_size', 'f', 0.0),
+                     nms_threshold=onnx_attr(node, 'nms_threshold', 'f', 0.7),
+                     post_nms_count=onnx_attr(node, 'post_nms_count', 'i', 1000),
+                     pre_nms_count=onnx_attr(node, 'pre_nms_count', 'i', 1000)
+                     )
+        ExperimentalDetectronGenerateProposalsSingleImage.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/quantize_ext.py b/model-optimizer/extensions/front/onnx/quantize_ext.py
new file mode 100644
index 000000000..bcead30f2
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/quantize_ext.py
@@ -0,0 +1,30 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+from extensions.ops.quantize import QuantizeOp
+
+
+class QuantizeFrontExtractor(FrontExtractorOp):
+    op = 'Quantize'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        levels = onnx_attr(node, 'levels', 'i')
+        QuantizeOp.update_node_stat(node, {'levels' : levels})
+        return QuantizeFrontExtractor.enabled
diff --git a/model-optimizer/extensions/front/onnx/reduce_mean_ext.py b/model-optimizer/extensions/front/onnx/reduce_mean_ext.py
index 174cff17a..555ffad1f 100644
--- a/model-optimizer/extensions/front/onnx/reduce_mean_ext.py
+++ b/model-optimizer/extensions/front/onnx/reduce_mean_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/reduce_sum_ext.py b/model-optimizer/extensions/front/onnx/reduce_sum_ext.py
index 8886eabb1..1c04349d6 100644
--- a/model-optimizer/extensions/front/onnx/reduce_sum_ext.py
+++ b/model-optimizer/extensions/front/onnx/reduce_sum_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/rnn_ext.py b/model-optimizer/extensions/front/onnx/rnn_ext.py
new file mode 100644
index 000000000..aa8f44134
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/rnn_ext.py
@@ -0,0 +1,57 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.ops.RNN import RNN
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class RNNFrontExtractor(FrontExtractorOp):
+    op = 'RNN'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        activation_alpha = onnx_attr(node, 'activation_alpha', 'floats',
+                                     default=None, dst_type=lambda x: np.array(x, dtype=np.float32))
+        activation_beta = onnx_attr(node, 'activation_beta', 'floats',
+                                    default=None, dst_type=lambda x: np.array(x, dtype=np.float32))
+        activations = onnx_attr(node, 'activations', 'strings', default=None,
+                                dst_type=lambda x: list(map(lambda s: s.decode(encoding="utf-8").lower(), list(x))))
+        clip = onnx_attr(node, 'clip', 'f', default=None)
+
+        attrs = {
+            'batch_dim': 1,
+            'sequence_dim': 0,
+            'blobs_wrb': True,
+            'has_num_directions': True,
+            'num_layers': 1,
+            'format': 'onnx',
+            'multilayers': False,
+            'gate_order': [0],
+
+            # ONNX attrs
+            'activation_alpha': activation_alpha,
+            'activation_beta': activation_beta,
+            'activations': activations,
+            'clip': clip,
+            'direction': onnx_attr(node, 'direction', 's', b'forward').decode().lower(),
+            'hidden_size': np.array(onnx_attr(node, 'hidden_size', 'i'), dtype=np.int64),
+        }
+
+        RNN.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/rnn_ext_test.py b/model-optimizer/extensions/front/onnx/rnn_ext_test.py
new file mode 100644
index 000000000..83f702582
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/rnn_ext_test.py
@@ -0,0 +1,77 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+import numpy as np
+import onnx
+
+from extensions.front.onnx.rnn_ext import RNNFrontExtractor
+from mo.utils.unittest.extractors import PB
+
+
+class RNNExtractorTest(unittest.TestCase):
+    @staticmethod
+    def _create_node(**attrs):
+        pb = onnx.helper.make_node(
+            'RNN',
+            inputs=['X', 'W', 'R', 'B',],
+            outputs=['Y', 'Y_h', 'Y_c'],
+            hidden_size=128,
+            **attrs,
+        )
+        node = PB({'pb': pb})
+        return node
+
+    base_attrs = {
+        'type': 'RNNSequence',
+        'op': 'RNN',
+        'batch_dim': 1,
+        'sequence_dim': 0,
+        'blobs_wrb': True,
+        'has_num_directions': True,
+        'num_layers': 1,
+        'format': 'onnx',
+        'multilayers': False,
+        'gate_order': np.array([0]),
+        'direction': 'forward',
+    }
+
+    def test_base_attrs(self):
+        node = self._create_node()
+        RNNFrontExtractor.extract(node)
+
+        exp_res = self.base_attrs
+
+        for key in exp_res.keys():
+            equal = np.all(np.equal(node[key], exp_res[key], dtype=object))
+            self.assertTrue(equal)
+
+    def test_additional_attributes(self):
+        additional_attrs = {
+            'activation_alpha': [1.0, 0.0, 2.0],
+            'activations': [b'relu', b'tanh', b'sigmoid'],
+            'clip': 10.0,
+        }
+
+        node = self._create_node(**additional_attrs)
+        RNNFrontExtractor.extract(node)
+
+        exp_res = {**self.base_attrs, **additional_attrs}
+        exp_res['activations'] = ['relu', 'tanh', 'sigmoid']
+
+        for key in exp_res.keys():
+            equal = np.all(np.equal(node[key], exp_res[key], dtype=object))
+            self.assertTrue(equal, 'Values for attr {} are not equal'.format(key))
diff --git a/model-optimizer/extensions/front/onnx/roifeatureextractor_ext.py b/model-optimizer/extensions/front/onnx/roifeatureextractor_ext.py
new file mode 100644
index 000000000..99dae31bf
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/roifeatureextractor_ext.py
@@ -0,0 +1,42 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from extensions.ops.roifeatureextractor_onnx import ExperimentalDetectronROIFeatureExtractor
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class ExperimentalDetectronROIFeatureExtractorFrontExtractor(FrontExtractorOp):
+    op = 'ExperimentalDetectronROIFeatureExtractor'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = dict(output_size=onnx_attr(node, 'output_size', 'i', 7),
+                     sampling_ratio=onnx_attr(node, 'sampling_ratio', 'i', 2),
+                     distribute_rois_between_levels=onnx_attr(node, 'distribute_rois_between_levels', 'i', 1),
+                     preserve_rois_order=onnx_attr(node, 'preserve_rois_order', 'i', 1),
+                     num_classes=onnx_attr(node, 'num_classes', 'i', 81),
+                     post_nms_count=onnx_attr(node, 'post_nms_count', 'i', 2000),
+                     score_threshold=onnx_attr(node, 'score_threshold', 'f', 0.05),
+                     pyramid_scales=np.array(onnx_attr(node, 'pyramid_scales', 'ints', [4, 8, 16, 32, 64]),
+                                             dtype=np.int64),
+                     )
+
+        ExperimentalDetectronROIFeatureExtractor.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/scale_ext.py b/model-optimizer/extensions/front/onnx/scale_ext.py
new file mode 100644
index 000000000..7793ea96a
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/scale_ext.py
@@ -0,0 +1,35 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class ScaleFrontExtractor(FrontExtractorOp):
+    op = 'Scale'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        scale = onnx_attr(node, 'scale', 'f', default=np.array(1.0), dst_type=lambda x: np.array(x))
+
+        node['scale'] = scale
+        node['bias'] = np.array(0)
+        node['op'] = 'ImageScaler'
+
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/sigmoid_ext.py b/model-optimizer/extensions/front/onnx/sigmoid_ext.py
index 052c9a475..4c4c28c0b 100644
--- a/model-optimizer/extensions/front/onnx/sigmoid_ext.py
+++ b/model-optimizer/extensions/front/onnx/sigmoid_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/sigmoid_ext_test.py b/model-optimizer/extensions/front/onnx/sigmoid_ext_test.py
index 3d25ea164..776af04cb 100644
--- a/model-optimizer/extensions/front/onnx/sigmoid_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/sigmoid_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/slice_ext.py b/model-optimizer/extensions/front/onnx/slice_ext.py
index 2cc4b367f..93affa047 100644
--- a/model-optimizer/extensions/front/onnx/slice_ext.py
+++ b/model-optimizer/extensions/front/onnx/slice_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/slice_ext_test.py b/model-optimizer/extensions/front/onnx/slice_ext_test.py
index 74ab96a83..7a4de9262 100644
--- a/model-optimizer/extensions/front/onnx/slice_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/slice_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/softmax_ext.py b/model-optimizer/extensions/front/onnx/softmax_ext.py
index 543fd4aad..2d09ece9a 100644
--- a/model-optimizer/extensions/front/onnx/softmax_ext.py
+++ b/model-optimizer/extensions/front/onnx/softmax_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/split_ext.py b/model-optimizer/extensions/front/onnx/split_ext.py
index 4e9e5ad3d..0e5db4b5c 100644
--- a/model-optimizer/extensions/front/onnx/split_ext.py
+++ b/model-optimizer/extensions/front/onnx/split_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/squeeze_ext.py b/model-optimizer/extensions/front/onnx/squeeze_ext.py
index 8472b8723..2478be11d 100644
--- a/model-optimizer/extensions/front/onnx/squeeze_ext.py
+++ b/model-optimizer/extensions/front/onnx/squeeze_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/squeeze_ext_test.py b/model-optimizer/extensions/front/onnx/squeeze_ext_test.py
index 5c697289e..209edf858 100644
--- a/model-optimizer/extensions/front/onnx/squeeze_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/squeeze_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/tanh_ext.py b/model-optimizer/extensions/front/onnx/tanh_ext.py
index 6b88ce2a7..61999319e 100644
--- a/model-optimizer/extensions/front/onnx/tanh_ext.py
+++ b/model-optimizer/extensions/front/onnx/tanh_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/tanh_ext_test.py b/model-optimizer/extensions/front/onnx/tanh_ext_test.py
index 25b858628..f5a49e542 100644
--- a/model-optimizer/extensions/front/onnx/tanh_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/tanh_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/topkrois_ext.py b/model-optimizer/extensions/front/onnx/topkrois_ext.py
new file mode 100644
index 000000000..ab8c9f10e
--- /dev/null
+++ b/model-optimizer/extensions/front/onnx/topkrois_ext.py
@@ -0,0 +1,30 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.topkrois_onnx import ExperimentalDetectronTopKROIs
+from mo.front.extractor import FrontExtractorOp
+from mo.front.onnx.extractors.utils import onnx_attr
+
+
+class ExperimentalDetectronTopKROIsFrontExtractor(FrontExtractorOp):
+    op = 'ExperimentalDetectronTopKROIs'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        attrs = dict(max_rois=onnx_attr(node, 'max_rois', 'i', 1000))
+        ExperimentalDetectronTopKROIs.update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/onnx/transpose_ext.py b/model-optimizer/extensions/front/onnx/transpose_ext.py
index c2ff50150..b6b6941aa 100644
--- a/model-optimizer/extensions/front/onnx/transpose_ext.py
+++ b/model-optimizer/extensions/front/onnx/transpose_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/transpose_ext_test.py b/model-optimizer/extensions/front/onnx/transpose_ext_test.py
index 2880c2d75..d94a3391d 100644
--- a/model-optimizer/extensions/front/onnx/transpose_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/transpose_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/unsqueeze_ext.py b/model-optimizer/extensions/front/onnx/unsqueeze_ext.py
index 93488898f..92ea63ca6 100644
--- a/model-optimizer/extensions/front/onnx/unsqueeze_ext.py
+++ b/model-optimizer/extensions/front/onnx/unsqueeze_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/unsqueeze_ext_test.py b/model-optimizer/extensions/front/onnx/unsqueeze_ext_test.py
index 7cdcdaeea..3d55103f5 100644
--- a/model-optimizer/extensions/front/onnx/unsqueeze_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/unsqueeze_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/upsample_ext.py b/model-optimizer/extensions/front/onnx/upsample_ext.py
index 867e504ea..9e8578a85 100644
--- a/model-optimizer/extensions/front/onnx/upsample_ext.py
+++ b/model-optimizer/extensions/front/onnx/upsample_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/onnx/upsample_ext_test.py b/model-optimizer/extensions/front/onnx/upsample_ext_test.py
index e3634174e..f86f47dd9 100644
--- a/model-optimizer/extensions/front/onnx/upsample_ext_test.py
+++ b/model-optimizer/extensions/front/onnx/upsample_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/output_cut.py b/model-optimizer/extensions/front/output_cut.py
new file mode 100644
index 000000000..e55b42124
--- /dev/null
+++ b/model-optimizer/extensions/front/output_cut.py
@@ -0,0 +1,32 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.front.extractor import add_output_ops
+from mo.graph.graph import Graph
+
+
+class OutputCut(FrontReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        from extensions.front.user_data_repack import UserDataRepack
+        return [UserDataRepack]
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        add_output_ops(graph, graph.graph['packed_outputs'], inputs=graph.graph['user_shapes'])
diff --git a/model-optimizer/extensions/front/override_batch.py b/model-optimizer/extensions/front/override_batch.py
new file mode 100644
index 000000000..678c83cf5
--- /dev/null
+++ b/model-optimizer/extensions/front/override_batch.py
@@ -0,0 +1,25 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
+from mo.middle.passes.infer import override_batch
+
+
+class OverrideBatch(FrontReplacementPattern):
+    enabled = True
+
+    def find_and_replace_pattern(self, graph: Graph):
+        override_batch(graph, graph.graph['cmd_params'].batch)
diff --git a/model-optimizer/extensions/front/pass_separator.py b/model-optimizer/extensions/front/pass_separator.py
new file mode 100644
index 000000000..3dcac16ca
--- /dev/null
+++ b/model-optimizer/extensions/front/pass_separator.py
@@ -0,0 +1,43 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
+
+
+class FrontStart(FrontReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        pass
+
+
+class FrontFinish(FrontReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        pass
diff --git a/model-optimizer/extensions/front/reciprocal.py b/model-optimizer/extensions/front/reciprocal.py
index 3c656eafe..74fe9336b 100644
--- a/model-optimizer/extensions/front/reciprocal.py
+++ b/model-optimizer/extensions/front/reciprocal.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.power import Power
 
 
@@ -25,7 +25,7 @@ class ReciprocalReplacer(FrontReplacementOp):
     op = "Reciprocal"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         reciprocal = Power(graph, dict(scale=1, power=-1, shift=0, name=node.name + '/power_'))
         out_node = reciprocal.create_node([node.in_node(0)])
 
diff --git a/model-optimizer/extensions/front/reciprocal_test.py b/model-optimizer/extensions/front/reciprocal_test.py
index 527cb7e3f..1a8df9e5b 100644
--- a/model-optimizer/extensions/front/reciprocal_test.py
+++ b/model-optimizer/extensions/front/reciprocal_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/restore_ports.py b/model-optimizer/extensions/front/restore_ports.py
new file mode 100644
index 000000000..7f8fbc857
--- /dev/null
+++ b/model-optimizer/extensions/front/restore_ports.py
@@ -0,0 +1,42 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Graph
+
+
+class RestorePorts(FrontReplacementSubgraph):
+    enabled = True
+
+    def run_after(self):
+        from extensions.front.input_cut import InputCut
+        return [InputCut]
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for node_id, attrs in graph.nodes(data=True):
+            attrs['_in_ports'] = set()
+            attrs['_out_ports'] = set()
+
+        for u, v, k, d in graph.edges(data=True, keys=True):
+            from_node_attrs = graph.node[u]
+            to_node_attrs = graph.node[v]
+            from_node_attrs['_out_ports'].add(d['out'])
+            to_node_attrs['_in_ports'].add(d['in'])
+
+        graph.stage = 'front'
diff --git a/model-optimizer/extensions/front/squared_difference.py b/model-optimizer/extensions/front/squared_difference.py
index e5c94a691..a53e2ae7f 100644
--- a/model-optimizer/extensions/front/squared_difference.py
+++ b/model-optimizer/extensions/front/squared_difference.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.eltwise import Eltwise
 from mo.ops.power import Power
 
@@ -31,7 +31,7 @@ class SquaredDifference(FrontReplacementOp):
     op = "SquaredDifference"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         negate = Power(graph, dict(scale=-1, name=node.name + '/negate_'))
         add = Eltwise(graph, dict(operation='sum', name=node.name + '/add_'))
         squared = Power(graph, dict(power=2, name=node.name + '/squared_'))
diff --git a/model-optimizer/extensions/front/standalone_const_eraser.py b/model-optimizer/extensions/front/standalone_const_eraser.py
index 98ea814d3..295f9a3a4 100644
--- a/model-optimizer/extensions/front/standalone_const_eraser.py
+++ b/model-optimizer/extensions/front/standalone_const_eraser.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ import logging as log
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import erase_node
+from mo.graph.graph import Graph
 
 
 class StandaloneConstEraser(FrontReplacementSubgraph):
@@ -35,8 +35,8 @@ class StandaloneConstEraser(FrontReplacementSubgraph):
         )
 
     @staticmethod
-    def replace_sub_graph(graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(graph: Graph, match: dict):
         if not len(match['const'].in_edges()) and len(match['const'].out_edges()) == 1:
-            erase_node(match['const'])
-            erase_node(match['output'])
+            graph.erase_node(match['const'])
+            graph.erase_node(match['output'])
             log.info("Standalone Const node \"{}\" was removed from the graph".format(match['const'].id))
diff --git a/model-optimizer/extensions/front/sub.py b/model-optimizer/extensions/front/sub.py
index a24407872..2097ed04b 100644
--- a/model-optimizer/extensions/front/sub.py
+++ b/model-optimizer/extensions/front/sub.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.eltwise import Eltwise
 from mo.ops.power import Power
 
@@ -26,7 +26,7 @@ class Sub(FrontReplacementOp):
     op = "Sub"
     enabled = True
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         negate = Power(graph, dict(scale=-1, name=node.name + '/negate_'))
         add = Eltwise(graph, dict(operation='sum', name=node.name + '/add_'))
         out_node = add.create_node([(node.in_node(0), node.in_edge(0)['out']),
diff --git a/model-optimizer/extensions/front/tf/ArgMaxReshape.py b/model-optimizer/extensions/front/tf/ArgMaxReshape.py
index b01768483..ed77c2d7c 100644
--- a/model-optimizer/extensions/front/tf/ArgMaxReshape.py
+++ b/model-optimizer/extensions/front/tf/ArgMaxReshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,13 +15,11 @@
 """
 
 import logging as log
-import networkx as nx
 import numpy as np
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.squeeze import Squeeze
-from mo.graph.graph import insert_node_after
 
 
 class ArgMaxReshape(FrontReplacementOp):
@@ -32,17 +30,17 @@ class ArgMaxReshape(FrontReplacementOp):
     op = "ArgMax"
     enabled = True
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: dict):
+    def nodes_to_remove(self, graph: Graph, match: dict):
         # do not remove matched node
         return []
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         squeeze_op = Squeeze(graph, dict())
         squeeze_op.attrs['old_infer'] = squeeze_op.attrs['infer']
         squeeze_op.attrs['infer'] = __class__.do_infer
 
         squeeze_node = squeeze_op.create_node([], dict(name=node.name + '/Squeeze'))
-        insert_node_after(node, squeeze_node)
+        node.insert_node_after(squeeze_node)
         return []
 
     @staticmethod
diff --git a/model-optimizer/extensions/front/tf/BlockLSTM.py b/model-optimizer/extensions/front/tf/BlockLSTM.py
index 3e1bed424..cd0247fec 100644
--- a/model-optimizer/extensions/front/tf/BlockLSTM.py
+++ b/model-optimizer/extensions/front/tf/BlockLSTM.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ import logging as log
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 
 
@@ -61,11 +61,19 @@ class BlockLSTM(FrontReplacementOp):
     op = "BlockLSTM"
     enabled = True
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: dict):
+    def nodes_to_remove(self, graph: Graph, match: dict):
         # do not remove matched node
         return []
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    @staticmethod
+    def find_key_by_input_port(u: Node, v: Node, p: int):
+        key = None
+        for k, edge_info in u.graph.get_edge_data(u.id, v.id).items():
+            if p == edge_info['in']:
+                return k
+        return key
+
+    def replace_op(self, graph: Graph, node: Node):
         if node.use_peephole:
             raise Error("BlockLSTM operation is not supported with `use_peephole`==True. Node: {}"
                         "".format(node.soft_get('name')))
@@ -81,7 +89,12 @@ class BlockLSTM(FrontReplacementOp):
                                                    {p: o.id for p, o in node.out_nodes().items()}))
 
         log.debug("Cutting all inputs for peephole connection (5, 6, 7 input ports) off, as `use_peephole`=False")
-        [graph.remove_edge(node.in_node(p).id, node.id) for p, input_data in node.in_nodes().items() if p in [5, 6, 7]]
+
+        for p, input_data in node.in_nodes().items():
+            if p in [5, 6, 7]:
+                key = self.find_key_by_input_port(node.in_node(p), node, p)
+                assert key is not None
+                graph.remove_edge(node.in_node(p).id, node.id, key=key)
 
         log.debug("Cutting seq_len_max input off")
         graph.remove_edge(node.in_node(0).id, node.id)
diff --git a/model-optimizer/extensions/front/tf/BlockLSTM_ext.py b/model-optimizer/extensions/front/tf/BlockLSTM_ext.py
index feddc170a..cdf46f85a 100644
--- a/model-optimizer/extensions/front/tf/BlockLSTM_ext.py
+++ b/model-optimizer/extensions/front/tf/BlockLSTM_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/CTCGreedyDecoder.py b/model-optimizer/extensions/front/tf/CTCGreedyDecoder.py
index e36bf507e..c424bf8b8 100644
--- a/model-optimizer/extensions/front/tf/CTCGreedyDecoder.py
+++ b/model-optimizer/extensions/front/tf/CTCGreedyDecoder.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,11 +14,11 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
+from mo.front.common.partial_infer.utils import int64_array
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import replace_node, Node
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 
 
@@ -52,14 +52,14 @@ class CTCGreedyDecoderReplacement(FrontReplacementSubgraph):
             ]
         )
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: dict):
+    def nodes_to_remove(self, graph: Graph, match: dict):
         return [match['cast'].id, match['sparse_to_dense']]
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         decoder_node = match['decoder']
         graph.remove_edge(decoder_node.id, match['sparse_to_dense'].id)
         graph.remove_edge(decoder_node.id, match['cast'].id)
-        replace_node(match['sparse_to_dense'], decoder_node)
+        match['sparse_to_dense'].replace_node(decoder_node)
 
         # update the TensorFlow infer function for the CTCGreedyDecoder to make necessary changes with the second input
         decoder_node['old_infer'] = decoder_node.infer
@@ -77,6 +77,6 @@ class CTCGreedyDecoderReplacement(FrontReplacementSubgraph):
         new_value[:, 0] = 0
         new_value = np.transpose(new_value)
         sequence_length_node.value = new_value
-        sequence_length_node.shape = sequence_length_node.value.shape
+        sequence_length_node.shape = int64_array(sequence_length_node.value.shape)
 
         node.old_infer(node)
diff --git a/model-optimizer/extensions/front/tf/CTCGreedyDecoder_ext.py b/model-optimizer/extensions/front/tf/CTCGreedyDecoder_ext.py
index 89986e4fd..ed5a40575 100644
--- a/model-optimizer/extensions/front/tf/CTCGreedyDecoder_ext.py
+++ b/model-optimizer/extensions/front/tf/CTCGreedyDecoder_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/Cast_ext.py b/model-optimizer/extensions/front/tf/Cast_ext.py
new file mode 100644
index 000000000..2c29f78c3
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/Cast_ext.py
@@ -0,0 +1,30 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from extensions.ops.Cast import Cast
+from mo.front.extractor import FrontExtractorOp
+from mo.front.tf.common import tf_data_type_decode
+
+
+class CastFrontExtractor(FrontExtractorOp):
+    op = 'Cast'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        cast_dst_type = tf_data_type_decode[node.pb.attr['DstT'].type][0]
+        Cast.update_node_stat(node, {'dst_type': cast_dst_type})
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/tf/ConvFlatten.py b/model-optimizer/extensions/front/tf/ConvFlatten.py
index 27282d301..2fd80f22e 100644
--- a/model-optimizer/extensions/front/tf/ConvFlatten.py
+++ b/model-optimizer/extensions/front/tf/ConvFlatten.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,29 +14,28 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from mo.front.subgraph_matcher import SubgraphMatch
 from mo.front.tf.replacement import FrontReplacementFromConfigFileSubGraph
-from mo.graph.graph import insert_node_after
+from mo.graph.graph import Graph
 from mo.ops.permute import Permute
 
 
 class ConvFlattenReplacement(FrontReplacementFromConfigFileSubGraph):
     replacement_id = 'ConvFlatten'
 
-    def output_edges_match(self, graph: nx.DiGraph, match: SubgraphMatch, new_sub_graph: dict):
+    def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict):
         return {}
 
-    def input_edges_match(self, graph: nx.DiGraph, match: SubgraphMatch, new_sub_graph: dict):
+    def input_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict):
         return {}
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def nodes_to_remove(self, graph: Graph, match: SubgraphMatch):
         # no need to remove any of matched nodes. We just insert 'Permute' node before the matched sub-graph.
         return []
 
-    def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
         permute_op = Permute(graph, {'order': np.array([0, 2, 3, 1])})
         permute_node = permute_op.add_node({'name': match.scope + '_permute_'})
 
@@ -44,5 +43,5 @@ class ConvFlattenReplacement(FrontReplacementFromConfigFileSubGraph):
 
         # reshape_in_node is the node after which we should insert Permute
         reshape_in_node = reshape_node.in_nodes()[0]
-        insert_node_after(reshape_in_node, permute_node, 0)
+        reshape_in_node.insert_node_after(permute_node, 0)
         return {}
diff --git a/model-optimizer/extensions/front/tf/CropAndResizeReplacement.py b/model-optimizer/extensions/front/tf/CropAndResizeReplacement.py
index d02f109ce..15c110324 100644
--- a/model-optimizer/extensions/front/tf/CropAndResizeReplacement.py
+++ b/model-optimizer/extensions/front/tf/CropAndResizeReplacement.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,12 +14,13 @@
  limitations under the License.
 """
 
-import networkx as nx
+import logging as log
+
 import numpy as np
 
-from mo.front.tf.graph_utils import add_convolution_to_swap_xy_coordinates
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node, create_edge
+from mo.front.tf.graph_utils import add_convolution_to_swap_xy_coordinates
+from mo.graph.graph import Node, Graph
 from mo.ops.concat import Concat
 from mo.ops.reshape import Reshape
 from mo.ops.unsqueeze import Unsqueeze
@@ -34,16 +35,19 @@ class CropAndResizeReplacement(FrontReplacementOp):
     op = "CropAndResize"
     enabled = True
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: dict):
+    def nodes_to_remove(self, graph: Graph, match: dict):
         # do not remove matched node
         return []
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
+        if node.has_and_set('inputs_preprocessed'):
+            log.debug('Node "{}" has already been preprocessed'.format(node.soft_get('name')))
+            return []
         # reshape tensor with batch indices to 2d
         unsqueeze_op = Unsqueeze(graph, {'unsqueeze_dims': np.array([1], dtype=np.int64)})
         unsqueeze_node = unsqueeze_op.create_node([node.in_node(2)])
 
-        concat_op = Concat(graph, {'axis': 1, 'name': node.name + '/concat_batch_indices_and_boxes'})
+        concat_op = Concat(graph, {'axis': 1, 'name': node.name + '/concat_batch_indices_and_boxes', 'in_ports_count': 2})
         concat_node = concat_op.create_node([unsqueeze_node, node.in_node(1)])
 
         # do not remove edge with crop_size because it is needed in the partial infer
@@ -55,9 +59,11 @@ class CropAndResizeReplacement(FrontReplacementOp):
 
         # reshape locations tensor to 2D so it could be passed to Eltwise which will be converted to ScaleShift
         reshape_2d_op = Reshape(graph, dict(dim=np.array([-1, 5])))
-        reshape_2d_node = reshape_2d_op.create_node([swapped_box_coordinates_node], dict(name='reshape_2d_'))
-        create_edge(reshape_2d_node, node, 0, 1)
+
+        reshape_2d_node = reshape_2d_op.create_node([swapped_box_coordinates_node],
+                                                    dict(name=swapped_box_coordinates_node.id + '/reshape_2d_',
+                                                         nchw_layout=True))
+        graph.create_edge(reshape_2d_node, node, 0, 1)
 
         # do not replace any output edge
         return []
-
diff --git a/model-optimizer/extensions/front/tf/FlattenToReshape.py b/model-optimizer/extensions/front/tf/FlattenToReshape.py
new file mode 100644
index 000000000..7198f5ff1
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/FlattenToReshape.py
@@ -0,0 +1,91 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+import numpy as np
+
+from extensions.front.Pack import Pack
+from extensions.front.tf.nearest_neighbor_upsampling import NearestNeighborUpsampling
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Graph
+
+
+def is_value_is_constant(val: np.ndarray, const: [int, float]):
+    if val.ndim > 1:
+        return False
+    if val.ndim == 1 and len(val) > 1:
+        return False
+    return val.item() == const
+
+
+class FlattenToReshapeableReshape(FrontReplacementSubgraph):
+    """
+    The TensorFlow implementation of the Flatten operation is not reshape-able because the batch size is hardcoded
+    during te constant propagation. This transform sets the 'dim' attribute for the Reshape to [0, -1].
+    """
+    enabled = True
+
+    def run_after(self):
+        return [NearestNeighborUpsampling]
+
+    def run_before(self):
+        return [Pack]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('shape', dict(op='Shape')),
+                ('strided_slice', dict(op='StridedSlice')),
+                ('pack', dict(op='Pack')),
+                ('const', dict(op='Const')),
+                ('reshape', dict(op='Reshape')),
+            ],
+            edges=[
+                ('shape', 'strided_slice', {'in': 0}),
+                ('strided_slice', 'pack', {'in': 0}),
+                ('const', 'pack', {'in': 1}),
+                ('pack', 'reshape', {'in': 1}),
+            ])
+
+    @staticmethod
+    def replace_sub_graph(graph: Graph, match: dict):
+        strided_slice_node = match['strided_slice']
+        const_node = match['const']
+        reshape_node = match['reshape']
+        pack_node = match['pack']
+
+        if not const_node.has_valid('value') or not is_value_is_constant(const_node.value, -1):
+            log.debug('The pattern does not correspond to flatten. The second reshape dimension is not -1. It is {}'.
+                      format(const_node.soft_get('value')))
+            return
+        if len(pack_node.in_nodes()) != 2:
+            log.debug('The pattern does not correspond to flatten. The "Pack" operation produces tensor with 3 items '
+                      'but should produce just 2.')
+            return
+
+        expected_values = [0, 1, 1]  # expected values to a StridedSlice to get the batch size
+        for ind in range(3):
+            if not strided_slice_node.in_node(ind + 1).has_valid('value') or \
+                    not is_value_is_constant(strided_slice_node.in_node(ind + 1).value, expected_values[ind]):
+                log.debug('The pattern does not correspond to flatten because of the input with index {}. The value is '
+                          '"{}".'.format(ind, strided_slice_node.soft_get('value')))
+                return
+
+        graph.remove_edge(pack_node.id, reshape_node.id)
+        reshape_node['dim'] = int64_array([0, -1])
+        log.debug('The node "{}" is actually a Flatten node'.format(reshape_node.soft_get('name')))
diff --git a/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py b/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
index c62f9f657..c729051ea 100644
--- a/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
+++ b/model-optimizer/extensions/front/tf/ObjectDetectionAPI.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,39 +17,43 @@
 import logging as log
 from math import sqrt
 
-import networkx as nx
 import numpy as np
 
+from extensions.front.Pack import Pack
+from extensions.front.div import Div
 from extensions.front.standalone_const_eraser import StandaloneConstEraser
 from extensions.front.sub import Sub
 from extensions.front.tf.CropAndResizeReplacement import CropAndResizeReplacement
-from extensions.front.Pack import Pack
 from extensions.front.tf.Unpack import Unpack
 from extensions.ops.DetectionOutput import DetectionOutput
 from extensions.ops.priorbox_clustered import PriorBoxClusteredOp
 from extensions.ops.proposal import ProposalOp
+from extensions.ops.psroipooling import PSROIPoolingOp
 from mo.front.common.layout import get_batch_dim, get_height_dim, get_width_dim
+from mo.front.common.partial_infer.utils import int64_array
 from mo.front.common.weights import swap_weights_xy
-from mo.front.extractor import output_user_data_repack, add_output_ops
+from mo.front.extractor import output_user_data_repack, add_output_ops, update_attrs
 from mo.front.subgraph_matcher import SubgraphMatch
 from mo.front.tf.graph_utils import add_activation_function_after_node, add_convolution_to_swap_xy_coordinates, \
-    squeeze_reshape_and_concat
+    squeeze_reshape_and_concat, add_fake_background_loc
 from mo.front.tf.replacement import FrontReplacementFromConfigFileSubGraph, FrontReplacementFromConfigFileGeneral
-from mo.graph.graph import create_edge, insert_node_after, Node, replace_node
+from mo.graph.graph import Graph, Node
 from mo.ops.activation import Activation
 from mo.ops.concat import Concat
 from mo.ops.const import Const
 from mo.ops.crop import Crop
-from mo.ops.div import Div
 from mo.ops.eltwise import Eltwise
+from mo.ops.input import Input
 from mo.ops.op import PermuteAttrs
 from mo.ops.output import Output
 from mo.ops.permute import Permute
+from mo.ops.reduce import Reduce
 from mo.ops.reshape import Reshape
 from mo.ops.roipooling import ROIPooling
+from mo.ops.shape import Shape
 from mo.ops.softmax import Softmax
 from mo.utils.error import Error
-from mo.utils.graph import backward_bfs_for_operation
+from mo.utils.graph import backward_bfs_for_operation, bfs_search
 from mo.utils.pipeline_config import PipelineConfig
 
 missing_param_error = 'To convert the model specify path to the pipeline configuration file which was used to ' \
@@ -82,7 +86,7 @@ def _value_or_raise(match: SubgraphMatch, pipeline_config: PipelineConfig, key:
     return value
 
 
-def _find_ssd_head_node(graph: nx.MultiDiGraph, ssd_head_index: int, head_type: str):
+def _find_ssd_head_node(graph: Graph, ssd_head_index: int, head_type: str):
     """
     Finds the SSD head node with index 'ssd_head_index' in the topology. The parameter 'head_type' specifies what type
     of the head is requested: with box predictions or class predictions.
@@ -135,7 +139,7 @@ def _skip_node_of_type(node: Node, node_ops_to_skip: list):
     return node
 
 
-def _relax_reshape_nodes(graph: nx.MultiDiGraph, pipeline_config: PipelineConfig):
+def _relax_reshape_nodes(graph: Graph, pipeline_config: PipelineConfig):
     """
     Finds the 'Reshape' operations following the SSD head nodes which have hard-coded output dimensions and replaces
     them with new ones with one of the dimensions sizes equal to -1. This function is used to make TF OD API SSD models
@@ -155,23 +159,23 @@ def _relax_reshape_nodes(graph: nx.MultiDiGraph, pipeline_config: PipelineConfig
         assert (input_node is not None)
         old_reshape_node = _skip_node_of_type(input_node.out_node(), ['Identity'])
         assert (old_reshape_node.op == 'Reshape')
-        reshape_size_node = Const(graph, {'value': np.array([0, -1, 1, 4])}).create_node([])
+        reshape_size_node = Const(graph, {'value': int64_array([0, -1, 1, 4])}).create_node([])
         new_reshape_op = Reshape(graph, {'name': input_node.id + '/Reshape', 'correct_data_layout': True})
         new_reshape_node = new_reshape_op.create_node([input_node, reshape_size_node])
-        replace_node(old_reshape_node, new_reshape_node)
+        old_reshape_node.replace_node(new_reshape_node)
 
         # fix hard-coded value for the number of items in tensor produced by the convolution to make topology reshapable
         input_node = _find_ssd_head_node(graph, ssd_head_ind, 'class')
         assert (input_node is not None)
         old_reshape_node = _skip_node_of_type(input_node.out_node(), ['Identity'])
         assert (old_reshape_node.op == 'Reshape')
-        reshape_size_node_2 = Const(graph, {'value': np.array([0, -1, num_classes + 1])}).create_node([])
+        reshape_size_node_2 = Const(graph, {'value': int64_array([0, -1, num_classes + 1])}).create_node([])
         new_reshape_op_2 = Reshape(graph, {'name': input_node.id + '/Reshape', 'correct_data_layout': True})
         new_reshape_node_2 = new_reshape_op_2.create_node([input_node, reshape_size_node_2])
-        replace_node(old_reshape_node, new_reshape_node_2)
+        old_reshape_node.replace_node(new_reshape_node_2)
 
 
-def _create_prior_boxes_node(graph: nx.MultiDiGraph, pipeline_config: PipelineConfig):
+def _create_prior_boxes_node(graph: Graph, pipeline_config: PipelineConfig):
     """
     The function creates one or several PriorBoxClustered nodes based on information from the pipeline configuration
     files. The PriorBoxClustered nodes get input data from SSD 'heads' and from the placeholder node (just to get
@@ -227,11 +231,11 @@ def _create_prior_boxes_node(graph: nx.MultiDiGraph, pipeline_config: PipelineCo
     if len(prior_box_nodes) == 1:
         return prior_box_nodes[0]
     else:
-        concat_prior_boxes_op = Concat(graph, {'axis': -1})
+        concat_prior_boxes_op = Concat(graph, {'axis': -1, 'in_ports_count': len(prior_box_nodes)})
         return concat_prior_boxes_op.create_node(prior_box_nodes, {'name': 'ConcatPriorBoxesClustered'})
 
 
-def _create_multiscale_prior_boxes_node(graph: nx.MultiDiGraph, pipeline_config: PipelineConfig):
+def _create_multiscale_prior_boxes_node(graph: Graph, pipeline_config: PipelineConfig):
     """
     The function creates one or several PriorBoxClustered nodes based on information from the pipeline configuration
     files. The PriorBoxClustered nodes get input data from SSD 'heads' and from the placeholder node (just to get
@@ -272,7 +276,7 @@ def _create_multiscale_prior_boxes_node(graph: nx.MultiDiGraph, pipeline_config:
     if len(prior_box_nodes) == 1:
         return prior_box_nodes[0]
     else:
-        concat_prior_boxes_op = Concat(graph, {'axis': -1})
+        concat_prior_boxes_op = Concat(graph, {'axis': -1, 'in_ports_count': len(prior_box_nodes)})
         return concat_prior_boxes_op.create_node(prior_box_nodes, {'name': 'ConcatPriorBoxesClustered'})
 
 
@@ -293,7 +297,7 @@ def calculate_shape_keeping_aspect_ratio(height: int, width: int, min_size: int,
     return int(round(height * ratio)), int(round(width * ratio))
 
 
-def calculate_placeholder_spatial_shape(graph: nx.MultiDiGraph, match: SubgraphMatch, pipeline_config: PipelineConfig):
+def calculate_placeholder_spatial_shape(graph: Graph, match: SubgraphMatch, pipeline_config: PipelineConfig):
     """
     The function calculates the preprocessed shape of the input image for a TensorFlow Object Detection API model.
     It uses various sources to calculate it:
@@ -388,7 +392,7 @@ class ObjectDetectionAPIPreprocessorReplacement(FrontReplacementFromConfigFileSu
     def run_before(self):
         return [Pack, Sub]
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def nodes_to_remove(self, graph: Graph, match: SubgraphMatch):
         new_nodes_to_remove = match.matched_nodes_names()
         # do not remove nodes that perform input image scaling and mean value subtraction
         for node_to_keep in ('Preprocessor/sub', 'Preprocessor/sub/y', 'Preprocessor/mul', 'Preprocessor/mul/x'):
@@ -396,7 +400,7 @@ class ObjectDetectionAPIPreprocessorReplacement(FrontReplacementFromConfigFileSu
                 new_nodes_to_remove.remove(node_to_keep)
         return new_nodes_to_remove
 
-    def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
         argv = graph.graph['cmd_params']
         layout = graph.graph['layout']
         if argv.tensorflow_object_detection_api_pipeline_config is None:
@@ -423,8 +427,6 @@ class ObjectDetectionAPIPreprocessorReplacement(FrontReplacementFromConfigFileSu
         batch_dim = get_batch_dim(layout, 4)
         if argv.batch is None and placeholder_node.shape[batch_dim] == -1:
             placeholder_node.shape[batch_dim] = 1
-        if placeholder_node.shape[batch_dim] > 1:
-            print("[ WARNING ] The batch size more than 1 is supported for SSD topologies only.")
         height, width = calculate_placeholder_spatial_shape(graph, match, pipeline_config)
         placeholder_node.shape[get_height_dim(layout, 4)] = height
         placeholder_node.shape[get_width_dim(layout, 4)] = width
@@ -440,9 +442,9 @@ class ObjectDetectionAPIPreprocessorReplacement(FrontReplacementFromConfigFileSu
 
         # connect to_float_node directly with node performing scale on mean value subtraction
         if mul_node is None:
-            create_edge(to_float_node, sub_node, 0, 0)
+            graph.create_edge(to_float_node, sub_node, 0, 0)
         else:
-            create_edge(to_float_node, mul_node, 0, 1)
+            graph.create_edge(to_float_node, mul_node, 0, 1)
 
         print('The Preprocessor block has been removed. Only nodes performing mean value subtraction and scaling (if'
               ' applicable) are kept.')
@@ -465,12 +467,22 @@ class ObjectDetectionAPIDetectionOutputReplacement(FrontReplacementFromConfigFil
     def run_after(self):
         return [ObjectDetectionAPIProposalReplacement, CropAndResizeReplacement]
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def nodes_to_remove(self, graph: Graph, match: SubgraphMatch):
         new_nodes_to_remove = match.matched_nodes_names().copy()
-        new_nodes_to_remove.extend(['detection_boxes', 'detection_scores', 'num_detections'])
+        outputs = ['detection_boxes', 'detection_scores', 'num_detections']
+        for output in outputs:
+            children = Node(graph, output).out_nodes()
+            if len(children) != 1:
+                log.warning('Output {} has {} children. It should have only one output: with op==`OpOutput`'
+                            ''.format(output, len(children)))
+            elif children[list(children.keys())[0]].op == 'OpOutput':
+                new_nodes_to_remove.append(children[list(children.keys())[0]].id)
+            else:
+                continue
+        new_nodes_to_remove.extend(outputs)
         return new_nodes_to_remove
 
-    def output_edges_match(self, graph: nx.DiGraph, match: SubgraphMatch, new_sub_graph: dict):
+    def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict):
         # the DetectionOutput in IE produces single tensor, but in TF it produces four tensors, so we need to create
         # only one output edge match
         return {match.output_node(0)[0].id: new_sub_graph['detection_output_node'].id}
@@ -481,62 +493,60 @@ class ObjectDetectionAPIDetectionOutputReplacement(FrontReplacementFromConfigFil
             current_node = current_node.in_node()
         return current_node
 
-    def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
         argv = graph.graph['cmd_params']
         if argv.tensorflow_object_detection_api_pipeline_config is None:
             raise Error(missing_param_error)
         pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config)
 
         num_classes = _value_or_raise(match, pipeline_config, 'num_classes')
-        first_stage_max_proposals = _value_or_raise(match, pipeline_config, 'first_stage_max_proposals')
+        max_proposals = _value_or_raise(match, pipeline_config, 'first_stage_max_proposals')
         activation_function = _value_or_raise(match, pipeline_config, 'postprocessing_score_converter')
 
         activation_conf_node = add_activation_function_after_node(graph, match.single_input_node(1)[0].in_node(0),
                                                                   activation_function)
 
-        # IE DetectionOutput layer consumes flattened tensors
-        # reshape operation to flatten confidence tensor
-        reshape_conf_op = Reshape(graph, dict(dim=np.array([1, -1])))
+        # IE DetectionOutput layer consumes flattened tensors so need add a Reshape layer.
+        # The batch value of the input tensor is not equal to the batch of the topology, so it is not possible to use
+        # "0" value in the Reshape layer attribute to refer to the batch size, but we know how to
+        # calculate the second dimension so the batch value will be deduced from it with help of "-1".
+        reshape_conf_op = Reshape(graph, dict(dim=int64_array([-1, (num_classes + 1) * max_proposals])))
         reshape_conf_node = reshape_conf_op.create_node([activation_conf_node], dict(name='do_reshape_conf'))
 
-        # TF produces locations tensor without boxes for background.
-        # Inference Engine DetectionOutput layer requires background boxes so we generate them with some values
-        # and concatenate with locations tensor
-        fake_background_locs_blob = np.tile([[[1, 1, 2, 2]]], [first_stage_max_proposals, 1, 1])
-        fake_background_locs_const_op = Const(graph, dict(value=fake_background_locs_blob))
-        fake_background_locs_const_node = fake_background_locs_const_op.create_node([])
-
         # Workaround for PermuteForReshape pass.
         # We looking for first not Reshape-typed node before match.single_input_node(0)[0].in_node(0).
         # And add  reshape_loc node after this first not Reshape-typed node.
         current_node = self.skip_nodes_by_condition(match.single_input_node(0)[0].in_node(0),
                                                     lambda x: x['kind'] == 'op' and x.soft_get('type') == 'Reshape')
 
-        reshape_loc_op = Reshape(graph, dict(dim=np.array([first_stage_max_proposals, num_classes, 4])))
-        reshape_loc_node = reshape_loc_op.create_node([current_node], dict(name='reshape_loc'))
-
-        concat_loc_op = Concat(graph, dict(axis=1))
-        concat_loc_node = concat_loc_op.create_node([fake_background_locs_const_node, reshape_loc_node],
-                                                    dict(name='concat_fake_loc'))
-        PermuteAttrs.set_permutation(reshape_loc_node, concat_loc_node, None)
-        PermuteAttrs.set_permutation(fake_background_locs_const_node, concat_loc_node, None)
+        reshape_loc_op = Reshape(graph, dict(dim=int64_array([-1, num_classes, 1, 4])))
+        reshape_loc_node = reshape_loc_op.create_node([current_node], dict(name='reshape_loc', nchw_layout=True))
+        update_attrs(reshape_loc_node, 'shape_attrs', 'dim')
 
         # constant node with variances
         variances_const_op = Const(graph, dict(value=_variance_from_pipeline_config(pipeline_config)))
         variances_const_node = variances_const_op.create_node([])
 
+        # TF produces locations tensor without boxes for background.
+        # Inference Engine DetectionOutput layer requires background boxes so we generate them
+        loc_node = add_fake_background_loc(graph, reshape_loc_node)
+        PermuteAttrs.set_permutation(reshape_loc_node, loc_node, None)
+
         # reshape locations tensor to 2D so it could be passed to Eltwise which will be converted to ScaleShift
-        reshape_loc_2d_op = Reshape(graph, dict(dim=np.array([-1, 4])))
-        reshape_loc_2d_node = reshape_loc_2d_op.create_node([concat_loc_node], dict(name='reshape_locs_2'))
-        PermuteAttrs.set_permutation(concat_loc_node, reshape_loc_2d_node, None)
+        reshape_loc_2d_op = Reshape(graph, dict(dim=int64_array([-1, 4])))
+        reshape_loc_2d_node = reshape_loc_2d_op.create_node([loc_node], dict(name='reshape_locs_2d', nchw_layout=True))
+        PermuteAttrs.set_permutation(loc_node, reshape_loc_2d_node, None)
 
         # element-wise multiply locations with variances
         eltwise_locs_op = Eltwise(graph, dict(operation='mul'))
         eltwise_locs_node = eltwise_locs_op.create_node([reshape_loc_2d_node, variances_const_node],
                                                         dict(name='scale_locs'))
 
-        # IE DetectionOutput layer consumes flattened tensors
-        reshape_loc_do_op = Reshape(graph, dict(dim=np.array([1, -1])))
+        # IE DetectionOutput layer consumes flattened tensors so need add a Reshape layer.
+        # The batch value of the input tensor is not equal to the batch of the topology, so it is not possible to use
+        # "0" value in the Reshape layer attribute to refer to the batch size, but we know how to
+        # calculate the second dimension so the batch value will be deduced from it with help of "-1".
+        reshape_loc_do_op = Reshape(graph, dict(dim=int64_array([-1, (num_classes + 1) * max_proposals * 4])))
 
         custom_attributes = match.custom_replacement_desc.custom_attributes
         coordinates_swap_method = 'add_convolution'
@@ -564,18 +574,21 @@ class ObjectDetectionAPIDetectionOutputReplacement(FrontReplacementFromConfigFil
 
         # find Proposal output which has the data layout as in TF: YXYX coordinates without batch indices.
         proposal_nodes_ids = [node_id for node_id, attrs in graph.nodes(data=True)
-                              if 'name' in attrs and attrs['name'] == 'proposals']
+                              if 'name' in attrs and attrs['name'] == 'crop_proposals']
         if len(proposal_nodes_ids) != 1:
-            raise Error("Found the following nodes '{}' with name 'proposals' but there should be exactly 1. "
+            raise Error("Found the following nodes '{}' with name 'crop_proposals' but there should be exactly 1. "
                         "Looks like ObjectDetectionAPIProposalReplacement replacement didn't work.".
                         format(proposal_nodes_ids))
         proposal_node = Node(graph, proposal_nodes_ids[0])
 
-        swapped_proposals_node = add_convolution_to_swap_xy_coordinates(graph, proposal_node, 5)
+        # check whether it is necessary to permute proposals coordinates before passing them to the DetectionOutput
+        # currently this parameter is set for the RFCN topologies
+        if 'swap_proposals' in custom_attributes and custom_attributes['swap_proposals']:
+            proposal_node = add_convolution_to_swap_xy_coordinates(graph, proposal_node, 4)
 
         # reshape priors boxes as Detection Output expects
-        reshape_priors_op = Reshape(graph, dict(dim=np.array([1, 1, -1])))
-        reshape_priors_node = reshape_priors_op.create_node([swapped_proposals_node],
+        reshape_priors_op = Reshape(graph, dict(dim=int64_array([-1, 1, max_proposals * 4])))
+        reshape_priors_node = reshape_priors_op.create_node([proposal_node],
                                                             dict(name='DetectionOutput_reshape_priors_'))
 
         detection_output_op = DetectionOutput(graph, {})
@@ -583,14 +596,16 @@ class ObjectDetectionAPIDetectionOutputReplacement(FrontReplacementFromConfigFil
             # update infer function to re-pack weights
             detection_output_op.attrs['old_infer'] = detection_output_op.attrs['infer']
             detection_output_op.attrs['infer'] = __class__.do_infer
+        for key in ('clip_before_nms', 'clip_after_nms'):
+            if key in match.custom_replacement_desc.custom_attributes:
+                detection_output_op.attrs[key] = int(match.custom_replacement_desc.custom_attributes[key])
+
         detection_output_node = detection_output_op.create_node(
             [reshape_loc_do_node, reshape_conf_node, reshape_priors_node],
-            dict(name=detection_output_op.attrs['type'], share_location=0, normalized=0, variance_encoded_in_target=1,
-                 clip=1, code_type='caffe.PriorBoxParameter.CENTER_SIZE', pad_mode='caffe.ResizeParameter.CONSTANT',
+            dict(name=detection_output_op.attrs['type'], share_location=0, variance_encoded_in_target=1,
+                 code_type='caffe.PriorBoxParameter.CENTER_SIZE', pad_mode='caffe.ResizeParameter.CONSTANT',
                  resize_mode='caffe.ResizeParameter.WARP',
                  num_classes=num_classes,
-                 input_height=graph.graph['preprocessed_image_height'],
-                 input_width=graph.graph['preprocessed_image_width'],
                  confidence_threshold=_value_or_raise(match, pipeline_config, 'postprocessing_score_threshold'),
                  top_k=_value_or_raise(match, pipeline_config, 'postprocessing_max_detections_per_class'),
                  keep_top_k=_value_or_raise(match, pipeline_config, 'postprocessing_max_total_detections'),
@@ -618,10 +633,13 @@ class ObjectDetectionAPIDetectionOutputReplacement(FrontReplacementFromConfigFil
 class ObjectDetectionAPIMaskRCNNROIPoolingSecondReplacement(FrontReplacementFromConfigFileSubGraph):
     replacement_id = 'ObjectDetectionAPIMaskRCNNROIPoolingSecondReplacement'
 
-    def output_edges_match(self, graph: nx.DiGraph, match: SubgraphMatch, new_sub_graph: dict):
+    def run_after(self):
+        return [ObjectDetectionAPIProposalReplacement]
+
+    def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict):
         return {match.output_node(0)[0].id: new_sub_graph['roi_pooling_node'].id}
 
-    def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
         argv = graph.graph['cmd_params']
         if argv.tensorflow_object_detection_api_pipeline_config is None:
             raise Error(missing_param_error)
@@ -636,7 +654,7 @@ class ObjectDetectionAPIMaskRCNNROIPoolingSecondReplacement(FrontReplacementFrom
         detection_output_node = Node(graph, detection_output_nodes_ids[0])
 
         # add reshape of Detection Output so it can be an output of the topology
-        reshape_detection_output_2d_op = Reshape(graph, dict(dim=np.array([-1, 7])))
+        reshape_detection_output_2d_op = Reshape(graph, dict(dim=int64_array([-1, 7])))
         reshape_detection_output_2d_node = reshape_detection_output_2d_op.create_node(
             [detection_output_node], dict(name='reshape_do_2d'))
 
@@ -648,15 +666,24 @@ class ObjectDetectionAPIMaskRCNNROIPoolingSecondReplacement(FrontReplacementFrom
         output_node.in_edge()['data_attrs'].append('output_sort_order')
         output_node.in_edge()['output_sort_order'] = [('detection_boxes', 0)]
 
-        # creates the Crop operation that gets input from the DetectionOutput layer, cuts of slices of data with batch
-        # indices and class labels producing a tensor with classes probabilities and bounding boxes only as it is
-        # expected by the ROIPooling layer
-        crop_op = Crop(graph, dict(axis=np.array([3]), offset=np.array([2]), dim=np.array([5]), nchw_layout=True))
-        crop_node = crop_op.create_node([detection_output_node], dict(name='crop_do'))
+        # creates two Crop operations which get input from the DetectionOutput layer, cuts of slices of data with class
+        # ids and probabilities and produce a tensor with batch ids and bounding boxes only (as it is expected by the
+        # ROIPooling layer)
+        crop_batch_op = Crop(graph, dict(axis=int64_array([3]), offset=int64_array([0]), dim=int64_array([1]),
+                                         nchw_layout=True))
+        crop_batch_node = crop_batch_op.create_node([detection_output_node], dict(name='crop_do_batch_ids'))
+
+        crop_coordinates_op = Crop(graph, dict(axis=int64_array([3]), offset=int64_array([3]), dim=int64_array([4]),
+                                               nchw_layout=True))
+        crop_coordinates_node = crop_coordinates_op.create_node([detection_output_node], dict(name='crop_do_coords'))
+
+        concat_op = Concat(graph, dict(axis=3))
+        concat_node = concat_op.create_node([crop_batch_node, crop_coordinates_node], dict(name='batch_and_coords',
+                                                                                           nchw_layout=True))
 
         # reshape bounding boxes as required by ROIPooling
-        reshape_do_op = Reshape(graph, dict(dim=np.array([-1, 5])))
-        reshape_do_node = reshape_do_op.create_node([crop_node], dict(name='reshape_do'))
+        reshape_do_op = Reshape(graph, dict(dim=int64_array([-1, 5])))
+        reshape_do_node = reshape_do_op.create_node([concat_node], dict(name='reshape_do'))
 
         roi_pooling_op = ROIPooling(graph, dict(method="bilinear", spatial_scale=1,
                                                 pooled_h=roi_pool_size, pooled_w=roi_pool_size))
@@ -675,7 +702,7 @@ class ObjectDetectionAPIMaskRCNNSigmoidReplacement(FrontReplacementFromConfigFil
     def run_after(self):
         return [ObjectDetectionAPIMaskRCNNROIPoolingSecondReplacement]
 
-    def transform_graph(self, graph: nx.MultiDiGraph, replacement_descriptions):
+    def transform_graph(self, graph: Graph, replacement_descriptions):
         output_node = None
         op_outputs = [n for n, d in graph.nodes(data=True) if 'op' in d and d['op'] == 'OpOutput']
         for op_output in op_outputs:
@@ -711,24 +738,22 @@ class ObjectDetectionAPIProposalReplacement(FrontReplacementFromConfigFileSubGra
     def run_before(self):
         return [Sub, CropAndResizeReplacement]
 
-    def output_edges_match(self, graph: nx.DiGraph, match: SubgraphMatch, new_sub_graph: dict):
+    def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict):
         return {match.output_node(0)[0].id: new_sub_graph['proposal_node'].id}
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def nodes_to_remove(self, graph: Graph, match: SubgraphMatch):
         new_list = match.matched_nodes_names().copy()
         # do not remove nodes that produce box predictions and class predictions
         new_list.remove(match.single_input_node(0)[0].id)
         new_list.remove(match.single_input_node(1)[0].id)
         return new_list
 
-    def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
         argv = graph.graph['cmd_params']
         if argv.tensorflow_object_detection_api_pipeline_config is None:
             raise Error(missing_param_error)
         pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config)
 
-        input_height = graph.graph['preprocessed_image_height']
-        input_width = graph.graph['preprocessed_image_width']
         max_proposals = _value_or_raise(match, pipeline_config, 'first_stage_max_proposals')
         proposal_ratios = _value_or_raise(match, pipeline_config, 'anchor_generator_aspect_ratios')
         proposal_scales = _value_or_raise(match, pipeline_config, 'anchor_generator_scales')
@@ -737,39 +762,24 @@ class ObjectDetectionAPIProposalReplacement(FrontReplacementFromConfigFileSubGra
         # Convolution/matmul node that produces classes predictions
         # Permute result of the tensor with classes permissions so it will be in a correct layout for Softmax
         predictions_node = backward_bfs_for_operation(match.single_input_node(1)[0], ['Add'])[0]
-        permute_predictions_op = Permute(graph, dict(order=np.array([0, 2, 3, 1])))
-        permute_predictions_node = permute_predictions_op.create_node([], dict(name=predictions_node.name + '/Permute'))
-        insert_node_after(predictions_node, permute_predictions_node, 0)
-
-        # creates constant input with the image height, width and scale H and scale W (if present) required for Proposal
-        const_op = Const(graph, dict(value=np.array([[input_height, input_width, 1]], dtype=np.float32)))
-        const_node = const_op.create_node([], dict(name='proposal_const_image_size'))
-
-        reshape_classes_op = Reshape(graph, dict(dim=np.array([0, -1, 2])))
-        reshape_classes_node = reshape_classes_op.create_node([permute_predictions_node],
-                                                              dict(name='reshape_FirstStageBoxPredictor_class',
-                                                                   nchw_layout=True))
 
-        softmax_conf_op = Softmax(graph, dict(axis=2))
-        softmax_conf_node = softmax_conf_op.create_node([reshape_classes_node],
-                                                        dict(name='FirstStageBoxPredictor_softMax_class'))
-        PermuteAttrs.set_permutation(reshape_classes_node, softmax_conf_node, None)
+        reshape_classes_op = Reshape(graph, dict(dim=int64_array([0, anchors_count, 2, -1])))
+        reshape_classes_node = reshape_classes_op.create_node([], dict(name='predictions/Reshape', nchw_layout=True))
+        predictions_node.insert_node_after(reshape_classes_node, 0)
 
-        reshape_softmax_op = Reshape(graph, dict(dim=np.array([1, anchors_count, 2, -1])))
-        reshape_softmax_node = reshape_softmax_op.create_node([softmax_conf_node], dict(name='reshape_softmax_class'))
-        PermuteAttrs.set_permutation(softmax_conf_node, reshape_softmax_node, None)
+        softmax_conf_op = Softmax(graph, dict(axis=2, nchw_layout=True, name=reshape_classes_node.id + '/Softmax'))
+        softmax_conf_node = softmax_conf_op.create_node([reshape_classes_node])
+        permute_reshape_softmax_op = Permute(graph, dict(order=int64_array([0, 2, 1, 3]), nchw_layout=True))
+        permute_reshape_softmax_node = permute_reshape_softmax_op.create_node([softmax_conf_node], dict(
+            name=softmax_conf_node.name + '/Permute'))
 
-        permute_reshape_softmax_op = Permute(graph, dict(order=np.array([0, 1, 3, 2])))
-        permute_reshape_softmax_node = permute_reshape_softmax_op.create_node([reshape_softmax_node], dict(
-            name=reshape_softmax_node.name + '/Permute'))
+        initial_shape_op = Shape(graph, dict(name=predictions_node.id + '/Shape'))
+        initial_shape_node = initial_shape_op.create_node([predictions_node])
 
         # implement custom reshape infer function because we need to know the input convolution node output dimension
         # sizes but we can know it only after partial infer
-        reshape_permute_op = Reshape(graph,
-                                     dict(dim=np.ones([4]), anchors_count=anchors_count, conv_node=predictions_node))
-        reshape_permute_op.attrs['old_infer'] = reshape_permute_op.attrs['infer']
-        reshape_permute_op.attrs['infer'] = __class__.classes_probabilities_reshape_shape_infer
-        reshape_permute_node = reshape_permute_op.create_node([permute_reshape_softmax_node],
+        reshape_permute_op = Reshape(graph, dict())
+        reshape_permute_node = reshape_permute_op.create_node([permute_reshape_softmax_node, initial_shape_node],
                                                               dict(name='Reshape_Permute_Class'))
 
         variance_height = pipeline_config.get_param('frcnn_variance_height')
@@ -805,46 +815,61 @@ class ObjectDetectionAPIProposalReplacement(FrontReplacementFromConfigFileSubGra
                                              feat_stride=anchor_generator_height_stride,
                                              ratio=proposal_ratios,
                                              scale=proposal_scales,
+                                             normalize=1,
                                              base_size=anchor_generator_height,
                                              nms_thresh=_value_or_raise(match, pipeline_config,
                                                                         'first_stage_nms_iou_threshold')))
+        for key in ('clip_before_nms', 'clip_after_nms'):
+            if key in match.custom_replacement_desc.custom_attributes:
+                proposal_op.attrs[key] = int(match.custom_replacement_desc.custom_attributes[key])
 
         anchors_node = backward_bfs_for_operation(match.single_input_node(0)[0], ['Add'])[0]
-        proposal_node = proposal_op.create_node([reshape_permute_node, anchors_node, const_node],
-                                                dict(name='proposals'))
 
-        # the TF implementation of ROIPooling with bi-linear filtration need proposals scaled by image size
-        proposal_scale_const = np.array([1.0, 1 / input_height, 1 / input_width, 1 / input_height, 1 / input_width],
-                                        dtype=np.float32)
-        proposal_scale_const_op = Const(graph, dict(value=proposal_scale_const))
-        proposal_scale_const_node = proposal_scale_const_op.create_node([], dict(name='Proposal_scale_const'))
+        # creates input to store input image height, width and scales (usually 1.0s)
+        # the batch size for this input is fixed because it is allowed to pass images of the same size only as input
+        input_op_with_image_size = Input(graph, dict(shape=int64_array([1, 3]), fixed_batch=True))
+        input_with_image_size_node = input_op_with_image_size.create_node([], dict(name='image_info'))
 
-        scale_proposals_op = Eltwise(graph, dict(operation='mul'))
-        scale_proposals_node = scale_proposals_op.create_node([proposal_node, proposal_scale_const_node],
-                                                              dict(name='scaled_proposals'))
+        proposal_node = proposal_op.create_node([reshape_permute_node, anchors_node, input_with_image_size_node],
+                                                dict(name='proposals'))
 
-        proposal_reshape_4d_op = Reshape(graph, dict(dim=np.array([1, 1, max_proposals, 5]), nchw_layout=True))
-        proposal_reshape_4d_node = proposal_reshape_4d_op.create_node([scale_proposals_node],
-                                                                      dict(name="reshape_proposals_4d"))
+        if 'do_not_swap_proposals' in match.custom_replacement_desc.custom_attributes and \
+                match.custom_replacement_desc.custom_attributes['do_not_swap_proposals']:
+            swapped_proposals_node = proposal_node
+        else:
+            swapped_proposals_node = add_convolution_to_swap_xy_coordinates(graph, proposal_node, 5)
 
-        # creates the Crop operation that gets input from the Proposal layer and gets tensor with bounding boxes only
-        crop_op = Crop(graph, dict(axis=np.array([3]), offset=np.array([1]), dim=np.array([4]), nchw_layout=True))
-        crop_node = crop_op.create_node([proposal_reshape_4d_node], dict(name='crop_proposals'))
+        proposal_reshape_2d_op = Reshape(graph, dict(dim=int64_array([-1, 5]), nchw_layout=True))
+        proposal_reshape_2d_node = proposal_reshape_2d_op.create_node([swapped_proposals_node],
+                                                                      dict(name="reshape_swap_proposals_2d"))
 
-        proposal_reshape_3d_op = Reshape(graph, dict(dim=np.array([0, -1, 4]), nchw_layout=True))
-        proposal_reshape_3d_node = proposal_reshape_3d_op.create_node([crop_node], dict(name="tf_proposals"))
+        # feed the CropAndResize node with a correct boxes information produced with the Proposal layer
+        # find the first CropAndResize node in the BFS order
+        crop_and_resize_nodes_ids = [node_id for node_id in bfs_search(graph, [match.single_input_node(0)[0].id]) if
+                                     graph.node[node_id]['op'] == 'CropAndResize']
+        assert len(crop_and_resize_nodes_ids) != 0, "Didn't find any CropAndResize nodes in the graph."
+        if 'do_not_swap_proposals' not in match.custom_replacement_desc.custom_attributes or not \
+                match.custom_replacement_desc.custom_attributes['do_not_swap_proposals']:
+            crop_and_resize_node = Node(graph, crop_and_resize_nodes_ids[0])
+            # set a marker that the input with box coordinates has been pre-processed so the CropAndResizeReplacement
+            # transform doesn't try to merge the second and the third inputs
+            crop_and_resize_node['inputs_preprocessed'] = True
+            graph.remove_edge(crop_and_resize_node.in_node(1).id, crop_and_resize_node.id)
+            graph.create_edge(proposal_reshape_2d_node, crop_and_resize_node, out_port=0, in_port=1)
 
-        return {'proposal_node': proposal_reshape_3d_node}
+        tf_proposal_reshape_4d_op = Reshape(graph, dict(dim=int64_array([-1, 1, max_proposals, 5]), nchw_layout=True))
+        tf_proposal_reshape_4d_node = tf_proposal_reshape_4d_op.create_node([swapped_proposals_node],
+                                                                            dict(name="reshape_proposal_4d"))
 
-    @staticmethod
-    def classes_probabilities_reshape_shape_infer(node: Node):
-        # now we can determine the reshape dimensions from Convolution node
-        conv_node = node.conv_node
-        conv_output_shape = conv_node.out_node().shape
+        crop_op = Crop(graph, dict(axis=int64_array([3]), offset=int64_array([1]), dim=int64_array([4]),
+                                   nchw_layout=True))
+        crop_node = crop_op.create_node([tf_proposal_reshape_4d_node], dict(name='crop_proposals'))
 
-        # update desired shape of the Reshape node
-        node.dim = np.array([0, conv_output_shape[1], conv_output_shape[2], node.anchors_count * 2])
-        node.old_infer(node)
+        tf_proposals_crop_reshape_3d_op = Reshape(graph, dict(dim=int64_array([0, -1, 4]), nchw_layout=True))
+        tf_proposals_crop_reshape_3d_node = tf_proposals_crop_reshape_3d_op.create_node([crop_node],
+                                                                                        dict(name="reshape_crop_3d"))
+
+        return {'proposal_node': tf_proposals_crop_reshape_3d_node}
 
 
 class ObjectDetectionAPISSDPostprocessorReplacement(FrontReplacementFromConfigFileSubGraph):
@@ -859,12 +884,12 @@ class ObjectDetectionAPISSDPostprocessorReplacement(FrontReplacementFromConfigFi
         # nodes
         return [Div, StandaloneConstEraser]
 
-    def output_edges_match(self, graph: nx.DiGraph, match: SubgraphMatch, new_sub_graph: dict):
+    def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict):
         # the DetectionOutput in IE produces single tensor, but in TF it produces two tensors, so create only one output
         # edge match
         return {match.output_node(0)[0].id: new_sub_graph['detection_output_node'].id}
 
-    def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
         argv = graph.graph['cmd_params']
         if argv.tensorflow_object_detection_api_pipeline_config is None:
             raise Error(missing_param_error)
@@ -872,7 +897,7 @@ class ObjectDetectionAPISSDPostprocessorReplacement(FrontReplacementFromConfigFi
         num_classes = _value_or_raise(match, pipeline_config, 'num_classes')
 
         # reshapes confidences to 4D before applying activation function
-        expand_dims_op = Reshape(graph, {'dim': np.array([0, 1, -1, num_classes + 1])})
+        expand_dims_op = Reshape(graph, {'dim': int64_array([0, 1, -1, num_classes + 1])})
         # do not convert from NHWC to NCHW this node shape
         expand_dims_node = expand_dims_op.create_node([match.input_nodes(1)[0][0].in_node(0)],
                                                       dict(name='do_ExpandDims_conf'))
@@ -883,13 +908,13 @@ class ObjectDetectionAPISSDPostprocessorReplacement(FrontReplacementFromConfigFi
 
         # IE DetectionOutput layer consumes flattened tensors
         # reshape operation to flatten locations tensor
-        reshape_loc_op = Reshape(graph, {'dim': np.array([0, -1])})
+        reshape_loc_op = Reshape(graph, {'dim': int64_array([0, -1])})
         reshape_loc_node = reshape_loc_op.create_node([match.input_nodes(0)[0][0].in_node(0)],
                                                       dict(name='do_reshape_loc'))
 
         # IE DetectionOutput layer consumes flattened tensors
         # reshape operation to flatten confidence tensor
-        reshape_conf_op = Reshape(graph, {'dim': np.array([0, -1])})
+        reshape_conf_op = Reshape(graph, {'dim': int64_array([0, -1])})
         reshape_conf_node = reshape_conf_op.create_node([activation_conf_node], dict(name='do_reshape_conf'))
 
         if pipeline_config.get_param('ssd_anchor_generator_num_layers') is not None or \
@@ -933,7 +958,7 @@ class ObjectDetectionAPISSDPostprocessorReplacement(FrontReplacementFromConfigFi
             variance = _variance_from_pipeline_config(pipeline_config)
             # replicating the variance values for all prior-boxes
             variances = np.tile(variance, [prior_boxes.shape[-2], 1])
-            # DetectionOutput in the Inference Engine expects the prior-boxes in the following layout: (values, variances)
+            # DetectionOutput Inference Engine expects the prior-boxes in the following layout: (values, variances)
             prior_boxes = prior_boxes.reshape([-1, 4])
             prior_boxes = np.concatenate((prior_boxes, variances), 0)
             # compared to the IE's DetectionOutput, the TF keeps the prior-boxes in YXYX, need to get back to the XYXY
@@ -941,7 +966,7 @@ class ObjectDetectionAPISSDPostprocessorReplacement(FrontReplacementFromConfigFi
                                           prior_boxes[:, 3:4], prior_boxes[:, 2:3]), 1)
             #  adding another dimensions, as the prior-boxes are expected as 3d tensors
             prior_boxes = prior_boxes.reshape((1, 2, -1))
-            node.in_node(2).shape = np.array(prior_boxes.shape, dtype=np.int64)
+            node.in_node(2).shape = int64_array(prior_boxes.shape)
             node.in_node(2).value = prior_boxes
 
         node.old_infer(node)
@@ -977,7 +1002,7 @@ class ObjectDetectionAPIOutputReplacement(FrontReplacementFromConfigFileGeneral)
     def run_before(self):
         return [ObjectDetectionAPIPreprocessorReplacement]
 
-    def transform_graph(self, graph: nx.MultiDiGraph, replacement_descriptions: dict):
+    def transform_graph(self, graph: Graph, replacement_descriptions: dict):
         if graph.graph['cmd_params'].output is not None:
             log.warning('User defined output nodes are specified. Skip the graph cut-off by the '
                         'ObjectDetectionAPIOutputReplacement.')
@@ -993,3 +1018,97 @@ class ObjectDetectionAPIOutputReplacement(FrontReplacementFromConfigFileGeneral)
                     log.debug('A node "{}" does not exist in the graph. Do not add it as output'.format(out_node_name))
         _outputs = output_user_data_repack(graph, outputs)
         add_output_ops(graph, _outputs, graph.graph['inputs'])
+
+
+class ObjectDetectionAPIPSROIPoolingReplacement(FrontReplacementFromConfigFileSubGraph):
+    replacement_id = 'ObjectDetectionAPIPSROIPoolingReplacement'
+
+    def run_after(self):
+        return [ObjectDetectionAPIProposalReplacement]
+
+    def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict):
+        return {match.output_node(0)[0].id: new_sub_graph['output_node'].id}
+
+    def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
+        argv = graph.graph['cmd_params']
+        if argv.tensorflow_object_detection_api_pipeline_config is None:
+            raise Error(missing_param_error)
+        pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config)
+        num_classes = _value_or_raise(match, pipeline_config, 'num_classes')
+
+        input_node = match.input_nodes(0)[0][0].in_node(0)
+        if 'class_predictions' in input_node.id:
+            psroipooling_output_dim = num_classes + 1
+        else:
+            psroipooling_output_dim = num_classes * 4
+
+        num_spatial_bins_height = pipeline_config.get_param('num_spatial_bins_height')
+        num_spatial_bins_width = pipeline_config.get_param('num_spatial_bins_width')
+        crop_height = pipeline_config.get_param('crop_height')
+        crop_width = pipeline_config.get_param('crop_width')
+        if crop_height != crop_width:
+            raise Error('Different "crop_height" and "crop_width" parameters from the pipeline config are not '
+                        'supported: {} vs {}'.format(crop_height, crop_width))
+        psroipooling_op = PSROIPoolingOp(graph, {'name': input_node.soft_get('name') + '/PSROIPooling',
+                                                 'output_dim': psroipooling_output_dim,
+                                                 'group_size': crop_width / num_spatial_bins_width,
+                                                 'spatial_bins_x': num_spatial_bins_width,
+                                                 'spatial_bins_y': num_spatial_bins_height,
+                                                 'mode': 'bilinear',
+                                                 'spatial_scale': 1,
+                                                 })
+
+        if 'reshape_swap_proposals_2d' in graph.nodes():
+            reshape_swap_proposals_node = Node(graph, 'reshape_swap_proposals_2d')
+        else:
+            swap_proposals_node = add_convolution_to_swap_xy_coordinates(graph, Node(graph, 'proposals'), 5)
+            reshape_swap_proposals_node = Reshape(graph, {'dim': [-1, 5], 'nchw_layout': True,
+                                                          'name': 'reshape_swap_proposals_2d'}).create_node(
+                [swap_proposals_node])
+        psroipooling_node = psroipooling_op.create_node([input_node, reshape_swap_proposals_node])
+
+        reduce_op = Reduce(graph, {'name': 'mean',
+                                   'reduce_type': 'mean',
+                                   'axis': int64_array([1, 2]),
+                                   'keep_dims': True
+                                   })
+        reduce_node = reduce_op.create_node([psroipooling_node])
+
+        graph.erase_node(match.output_node(0)[0].out_node())
+
+        return {'output_node': reduce_node}
+
+
+class ObjectDetectionAPIConstValueOverride(FrontReplacementFromConfigFileGeneral):
+    """
+    Transforms allows to override specific constant values in the topology. The replacement description configuration
+    file contains list of tuples describing the desired replacements specified in the "replacements" key of the
+    "custom_attributes". The first element in the tuple is the initial node name of the graph with constant value. The
+    second element is the name of the parameter from the pipeline configuration file which stores new value.
+
+    Usage example. The Faster-RCNNs topologies has constant node with the number specifying maximum generated proposals.
+    This value is specified in the pipeline configuration file in the parameter 'first_stage_max_proposals' and is
+    saved as a constant node in the generated topology. If the parameter is modified from it's original value then the
+    topology will be incorrect because the number 'first_stage_max_proposals' is used in the transforms of this file is
+    no more equal to the 'first_stage_max_proposals' saved as a constant.
+    """
+    replacement_id = 'ObjectDetectionAPIConstValueOverride'
+
+    def run_before(self):
+        return [ObjectDetectionAPIPreprocessorReplacement]
+
+    def transform_graph(self, graph: Graph, replacement_descriptions: dict):
+        argv = graph.graph['cmd_params']
+        if argv.tensorflow_object_detection_api_pipeline_config is None:
+            raise Error(missing_param_error)
+        pipeline_config = PipelineConfig(argv.tensorflow_object_detection_api_pipeline_config)
+        for (node_id, pipeline_config_name) in replacement_descriptions['replacements']:
+            if node_id not in graph.nodes():
+                log.debug('Node with id {} does not exist in the graph'.format(node_id))
+                continue
+            node = Node(graph, node_id)
+            if not node.has_valid('value'):
+                log.debug('Node with id {} does not have value'.format(node_id))
+                continue
+            node.value = np.array(pipeline_config.get_param(pipeline_config_name))
+            node.value = node.value.reshape(node.shape)
diff --git a/model-optimizer/extensions/front/tf/ObjectDetectionAPI_test.py b/model-optimizer/extensions/front/tf/ObjectDetectionAPI_test.py
index d9056efe4..739d6dea0 100644
--- a/model-optimizer/extensions/front/tf/ObjectDetectionAPI_test.py
+++ b/model-optimizer/extensions/front/tf/ObjectDetectionAPI_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,11 +16,10 @@
 
 import unittest
 
-import networkx as nx
-
 from extensions.front.tf.ObjectDetectionAPI import calculate_shape_keeping_aspect_ratio, \
     calculate_placeholder_spatial_shape
 from mo.front.subgraph_matcher import SubgraphMatch
+from mo.graph.graph import Graph
 from mo.utils.custom_replacement_config import CustomReplacementDescriptor
 from mo.utils.error import Error
 
@@ -91,7 +90,7 @@ class TestCalculateShape(unittest.TestCase):
 
 class TestCalculatePlaceholderSpatialShape(unittest.TestCase):
     def setUp(self):
-        self.graph = nx.MultiDiGraph()
+        self.graph = Graph()
         self.graph.graph['user_shapes'] = None
         self.replacement_desc = CustomReplacementDescriptor('dummy_id', {})
         self.match = SubgraphMatch(self.graph, self.replacement_desc, [], [], [], '')
diff --git a/model-optimizer/extensions/front/tf/RetinaNetFilteredDetectionsReplacement.py b/model-optimizer/extensions/front/tf/RetinaNetFilteredDetectionsReplacement.py
index a46bb50c7..b0f6eaebf 100644
--- a/model-optimizer/extensions/front/tf/RetinaNetFilteredDetectionsReplacement.py
+++ b/model-optimizer/extensions/front/tf/RetinaNetFilteredDetectionsReplacement.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,14 +14,13 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from extensions.ops.DetectionOutput import DetectionOutput
 from extensions.ops.splitv import SplitV
 from mo.front.subgraph_matcher import SubgraphMatch
 from mo.front.tf.replacement import FrontReplacementFromConfigFileSubGraph
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.concat import Concat
 from mo.ops.const import Const
 from mo.ops.eltwise import Eltwise
@@ -43,23 +42,23 @@ class RetinaNetFilteredDetectionsReplacement(FrontReplacementFromConfigFileSubGr
     replacement_id = 'RetinaNetFilteredDetectionsReplacement'
 
     @staticmethod
-    def _create_sub(graph: nx.MultiDiGraph, input_1: Node, port_1: int, input_2: Node, port_2: int):
+    def _create_sub(graph: Graph, input_1: Node, port_1: int, input_2: Node, port_2: int):
         negate = Power(graph, dict(scale=-1, name=input_2.name + '/negate_'))
         add = Eltwise(graph, dict(operation='sum', name=input_1.name + '/add_'))
         out_node = add.create_node([(input_1, port_1), negate.create_node([(input_2, port_2)])])
         return out_node
 
-    def output_edges_match(self, graph: nx.DiGraph, match: SubgraphMatch, new_sub_graph: dict):
+    def output_edges_match(self, graph: Graph, match: SubgraphMatch, new_sub_graph: dict):
         return {match.output_node(0)[0].id: new_sub_graph['detection_output_node'].id}
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def nodes_to_remove(self, graph: Graph, match: SubgraphMatch):
         new_nodes_to_remove = match.matched_nodes_names()
         new_nodes_to_remove.remove(match.single_input_node(0)[0].id)
         new_nodes_to_remove.remove(match.single_input_node(1)[0].id)
         new_nodes_to_remove.remove(match.single_input_node(2)[0].id)
         return new_nodes_to_remove
 
-    def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
         reshape_classes_op = Reshape(graph, {'dim': np.array([0, -1])})
         reshape_classes_node = reshape_classes_op.create_node([match.single_input_node(1)[0]],
                                                               dict(name='do_reshape_classes'))
@@ -79,12 +78,12 @@ class RetinaNetFilteredDetectionsReplacement(FrontReplacementFromConfigFileSubGr
             [priors_node, priors_scale_const_node])
 
         # calculate prior boxes widths and heights
-        split_node = SplitV(graph, {'axis': 2, 'size_splits': [1, 1, 1, 1]}).create_node([priors_scale_node])
+        split_node = SplitV(graph, {'axis': 2, 'size_splits': [1, 1, 1, 1], 'out_ports_count': 4}).create_node([priors_scale_node])
         priors_width_node = __class__._create_sub(graph, split_node, 2, split_node, 0)
         priors_height_node = __class__._create_sub(graph, split_node, 3, split_node, 1)
 
         # concat weights and heights into a single tensor and multiple with the box coordinates regression values
-        concat_width_height_node = Concat(graph, {'name': 'concat_priors_width_height', 'axis': -1}).create_node(
+        concat_width_height_node = Concat(graph, {'name': 'concat_priors_width_height', 'axis': -1, 'in_ports_count': 4}).create_node(
             [priors_width_node, priors_height_node, priors_width_node, priors_height_node])
         applied_width_height_regressions_node = Eltwise(graph, {'name': 'final_regressions', 'operation': 'mul'}). \
             create_node([concat_width_height_node, match.single_input_node(0)[0]])
diff --git a/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py b/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py
index 278998c37..15fa70f20 100644
--- a/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py
+++ b/model-optimizer/extensions/front/tf/SSDToolboxDetectionOutput.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,14 +14,13 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from extensions.front.standalone_const_eraser import StandaloneConstEraser
 from extensions.ops.DetectionOutput import DetectionOutput
 from mo.front.subgraph_matcher import SubgraphMatch
 from mo.front.tf.replacement import FrontReplacementFromConfigFileSubGraph
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import PermuteAttrs
 from mo.ops.output import Output
 from mo.ops.reshape import Reshape
@@ -33,16 +32,28 @@ class SSDToolboxDetectionOutputReplacement(FrontReplacementFromConfigFileSubGrap
     def run_before(self):
         return [StandaloneConstEraser]
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def nodes_to_remove(self, graph: Graph, match: SubgraphMatch):
         return []
 
-    def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
         # IE DetectionOutput layer consumes flattened confidences and locations tensors.
         # That is why we add reshapes before them.
         locs_node = match.single_input_node(0)
         conf_node = match.single_input_node(1)
         prior_boxes_node = match.single_input_node(2)
 
+        locs_out_nodes = locs_node[0].out_nodes()
+        assert len(locs_out_nodes) == 1
+        locs_out_node = locs_out_nodes[list(locs_out_nodes.keys())[0]]
+        assert locs_out_node.op == "OpOutput", locs_out_node.op
+        graph.remove_node(locs_out_node.id)
+
+        conf_out_nodes = conf_node[0].out_nodes()
+        assert len(conf_out_nodes) == 1
+        conf_out_node = conf_out_nodes[list(conf_out_nodes.keys())[0]]
+        assert conf_out_node.op == "OpOutput", conf_out_node.op
+        graph.remove_node(conf_out_node.id)
+
         # reshape operation to flatten confidence tensor
         reshape_loc_op = Reshape(graph, {'dim': np.array([0, -1])})
         reshape_loc_node = reshape_loc_op.create_node([locs_node], dict(name='DetectionOutput_Reshape_loc_'))
diff --git a/model-optimizer/extensions/front/tf/TensorArrayExtractors.py b/model-optimizer/extensions/front/tf/TensorArrayExtractors.py
index 20e0d692e..b7d7d4bdf 100644
--- a/model-optimizer/extensions/front/tf/TensorArrayExtractors.py
+++ b/model-optimizer/extensions/front/tf/TensorArrayExtractors.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/TensorArrayGatherV3.py b/model-optimizer/extensions/front/tf/TensorArrayGatherV3.py
index 46c29c21c..d4dfcfcf8 100644
--- a/model-optimizer/extensions/front/tf/TensorArrayGatherV3.py
+++ b/model-optimizer/extensions/front/tf/TensorArrayGatherV3.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/Unpack.py b/model-optimizer/extensions/front/tf/Unpack.py
index 30af2d32d..005459875 100644
--- a/model-optimizer/extensions/front/tf/Unpack.py
+++ b/model-optimizer/extensions/front/tf/Unpack.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.replacement import FrontReplacementOp
-from mo.graph.graph import Node, insert_node_after
+from mo.graph.graph import Node, Graph
 from mo.ops.squeeze import Squeeze
 
 
@@ -29,14 +27,14 @@ class Unpack(FrontReplacementOp):
     op = "Unpack"
     enabled = True
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: dict):
+    def nodes_to_remove(self, graph: Graph, match: dict):
         # do not remove matched node
         return []
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         for ind in range(len(node.out_nodes())):
             squeeze_node = Squeeze(graph, dict(squeeze_dims=[node.axis], name=node.name + '/Squeeze_')).create_node([])
-            insert_node_after(node, squeeze_node, ind)
+            node.insert_node_after(squeeze_node, ind)
 
         # do not replace any output edge
         return []
diff --git a/model-optimizer/extensions/front/tf/YOLO.py b/model-optimizer/extensions/front/tf/YOLO.py
index 294725499..651e5acda 100644
--- a/model-optimizer/extensions/front/tf/YOLO.py
+++ b/model-optimizer/extensions/front/tf/YOLO.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,14 +13,12 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-import networkx as nx
 
 from extensions.front.no_op_eraser import NoOpEraser
 from extensions.front.standalone_const_eraser import StandaloneConstEraser
 from extensions.ops.regionyolo import RegionYoloOp
 from mo.front.tf.replacement import FrontReplacementFromConfigFileGeneral
-from mo.graph.graph import Node
-from mo.middle.passes.eliminate import get_nodes_with_attributes
+from mo.graph.graph import Node, Graph
 from mo.ops.output import Output
 from mo.utils.error import Error
 
@@ -35,7 +33,7 @@ class YoloRegionAddon(FrontReplacementFromConfigFileGeneral):
     def run_after(self):
         return [NoOpEraser, StandaloneConstEraser]
 
-    def transform_graph(self, graph: nx.MultiDiGraph, replacement_descriptions):
+    def transform_graph(self, graph: Graph, replacement_descriptions):
         op_outputs = [n for n, d in graph.nodes(data=True) if 'op' in d and d['op'] == 'OpOutput']
         for op_output in op_outputs:
             last_node = Node(graph, op_output).in_node(0)
@@ -55,8 +53,8 @@ class YoloV3RegionAddon(FrontReplacementFromConfigFileGeneral):
     """
     replacement_id = 'TFYOLOV3'
 
-    def transform_graph(self, graph: nx.MultiDiGraph, replacement_descriptions):
-        graph.remove_nodes_from(get_nodes_with_attributes(graph, is_output=True))
+    def transform_graph(self, graph: Graph, replacement_descriptions):
+        graph.remove_nodes_from(graph.get_nodes_with_attributes(op='OpOutput'))
         for input_node_name in replacement_descriptions['entry_points']:
             if input_node_name not in graph.nodes():
                 raise Error('TensorFlow YOLO V3 conversion mechanism was enabled. '
@@ -66,7 +64,7 @@ class YoloV3RegionAddon(FrontReplacementFromConfigFileGeneral):
                             'Refer to documentation about converting YOLO models for more information.'.format(
                     ', '.join(replacement_descriptions['entry_points']), input_node_name))
             last_node = Node(graph, input_node_name).in_node(0)
-            op_params = dict(name=last_node.id + '/YoloRegion', axis=1, end_axis=-1, do_softmax=0, is_output=True)
+            op_params = dict(name=last_node.id + '/YoloRegion', axis=1, end_axis=-1, do_softmax=0)
             op_params.update(replacement_descriptions)
             region_layer_node = RegionYoloOp(graph, op_params).create_node([last_node])
             # TODO: do we need change axis for further permutation
diff --git a/model-optimizer/extensions/front/tf/ZerosLike.py b/model-optimizer/extensions/front/tf/ZerosLike.py
new file mode 100644
index 000000000..e58f3239e
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/ZerosLike.py
@@ -0,0 +1,38 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Graph, Node
+from mo.ops.power import Power
+
+
+class ZerosLikeReplacer(FrontReplacementOp):
+    """
+    Replace TF operation ZerosLike by multiplying input tensor by zero.
+    """
+    op = "ZerosLike"
+    enabled = True
+
+    def nodes_to_remove(self, graph: Graph, match: dict):
+        # do not remove matched node
+        return []
+
+    def replace_op(self, graph: Graph, node: Node):
+        power = Power(graph, dict(scale=0, name=node.name + '/Power/')).create_node()
+
+        # Reconnecting inputs to this new node
+        node.in_port(0).get_connection().set_destination(power.in_port(0))
+        node.out_port(0).get_connection().set_source(power.out_port(0))
+        return [power.id]
diff --git a/model-optimizer/extensions/front/tf/addn_ext.py b/model-optimizer/extensions/front/tf/addn_ext.py
index 78fde8b19..f01b16c17 100644
--- a/model-optimizer/extensions/front/tf/addn_ext.py
+++ b/model-optimizer/extensions/front/tf/addn_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/argmax_ext.py b/model-optimizer/extensions/front/tf/argmax_ext.py
index a5a40ae76..ef4eb1213 100644
--- a/model-optimizer/extensions/front/tf/argmax_ext.py
+++ b/model-optimizer/extensions/front/tf/argmax_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/assign_elimination.py b/model-optimizer/extensions/front/tf/assign_elimination.py
index 2a6dc07de..6550c27b0 100644
--- a/model-optimizer/extensions/front/tf/assign_elimination.py
+++ b/model-optimizer/extensions/front/tf/assign_elimination.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@ import logging as log
 import networkx as nx
 
 from mo.front.common.replacement import FrontReplacementOp
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 
 
@@ -26,7 +27,7 @@ class AssignElimination(FrontReplacementOp):
     op = "Assign"
     enabled = True
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         node = match['op']
         # here we request all data flow output edges (control flow edges will not be listed)
         out_edges = node.out_edges()
@@ -41,7 +42,7 @@ class AssignSubElimination(FrontReplacementOp):
     op = "AssignSub"
     enabled = True
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         node = match['op']
         # here we request all data flow output edges (control flow edges will not be listed)
         out_edges = node.out_edges()
@@ -56,7 +57,7 @@ class AssignAddElimination(FrontReplacementOp):
     op = "AssignAdd"
     enabled = True
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         node = match['op']
         # here we request all data flow output edges (control flow edges will not be listed)
         out_edges = node.out_edges()
@@ -65,3 +66,18 @@ class AssignAddElimination(FrontReplacementOp):
             log.debug('AssignAdd op was removed {}'.format(node.id))
         else:
             raise Error('Data flow edge coming out of AssignAdd node {}'.format(node.id))
+
+
+class AssertElimination(FrontReplacementOp):
+    op = "Assert"
+    enabled = True
+
+    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+        node = match['op']
+        # here we request all data flow output edges (control flow edges will not be listed)
+        out_edges = node.out_edges()
+        if len(out_edges) == 0:
+            graph.remove_node(node.id)
+            log.debug('Assert op was removed {}'.format(node.id))
+        else:
+            raise Error('Data flow edge coming out of Assert node {}'.format(node.id))
diff --git a/model-optimizer/extensions/front/tf/basic_lstm_cell.py b/model-optimizer/extensions/front/tf/basic_lstm_cell.py
index 37391ae49..fc0b40dcb 100644
--- a/model-optimizer/extensions/front/tf/basic_lstm_cell.py
+++ b/model-optimizer/extensions/front/tf/basic_lstm_cell.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,28 +14,27 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from extensions.ops.lstm_cell import LSTMCell
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import Node, replace_node, get_inputs_with_ports
+from mo.graph.graph import Node, Graph
 from mo.ops.output import Output
 
 
 class BasicLSTMCell(FrontReplacementSubgraph):
     enabled = True
 
+    # When the deprecated IR version was requested, we configure only those phases that can lead
+    # to functional regressions in the version 2. BasicLSTMCell is one such transformation;
+    # when it is turned off, the body of TF basic_lstm_cell is converted as-is in a decomposed form,
+    # and should work in version 2.
+    graph_condition = [lambda graph: graph.graph['ir_version'] != 2]
+
     # list of names of all original nodes that are supported by IE
     # this list is collected gradually by a separate transformation
     # original name in this case is a selected node in the pattern
     # that is returned from anchor() function
     instances_supported_by_IE = []
 
-    # True if transformation should be activated only for instances collected in supported_by_IE list
-    # It will be set to True by a separate transformation
-    second_round = False
-
-
     def __init__(self):
 
         super().__init__()
@@ -50,7 +49,6 @@ class BasicLSTMCell(FrontReplacementSubgraph):
 
         __class__.outputs = ['mul_2', 'add_1']
 
-
     def pattern(self):
         return dict(
             nodes=[
@@ -87,10 +85,10 @@ class BasicLSTMCell(FrontReplacementSubgraph):
                 ('biasadd', 'split', {'in': 1}),
 
                 # This important block specifies how gates are ordered in TF graph
-                ('split', 'sigmoid_1', {'out': 0}),   # i
-                ('split', 'tanh_0', {'out': 1}),      # c
-                ('split', 'shift', {'out': 2}),       # f (this is unbiased f, there is an extra addition here)
-                ('split', 'sigmoid_2', {'out': 3}),   # o
+                ('split', 'sigmoid_1', {'out': 0}),  # i
+                ('split', 'tanh_0', {'out': 1}),  # c
+                ('split', 'shift', {'out': 2}),  # f (this is unbiased f, there is an extra addition here)
+                ('split', 'sigmoid_2', {'out': 3}),  # o
 
                 ('shift_const', 'shift', {}),
                 ('shift', 'sigmoid_0', {}),
@@ -107,25 +105,6 @@ class BasicLSTMCell(FrontReplacementSubgraph):
                 ('sigmoid_2', 'mul_2', {}),
             ])
 
-
-    @staticmethod
-    def mark_supported_by_IE(node: Node):
-        """ Mark a given node as a supported LSTMCell by setting attribute `supported_by_IE`.
-            The node original name is also included in the list of all supported by IE LSTMCell
-            instances for possible second round of the network conversion.
-        """
-        assert node.has_valid('original_name'), \
-            'Node {} doesn\'t have a reference to original FW operation name; bad LSTMCell'.format(node.soft_get('name'))
-        __class__.instances_supported_by_IE.append(node.original_name)
-        node['supported_by_IE'] = True
-
-
-    @staticmethod
-    def finalize_first_round():
-        """ Switch the mode of this pattern into `second stage` where only supported patterns are converted. """
-        __class__.second_round = True
-
-
     @staticmethod
     def anchor():
         """ Mnemonic name in the pattern that is used as an anchor name for this pattern in the original graph.
@@ -133,8 +112,7 @@ class BasicLSTMCell(FrontReplacementSubgraph):
         """
         return 'concat'
 
-
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
 
         # node that is used to identify this pattern application instance for switching between supported
         # and not supported LSTMCell sub-graphs; this value will be searched in __class__.instances_supported_by_IE.
@@ -142,25 +120,17 @@ class BasicLSTMCell(FrontReplacementSubgraph):
         assert anchor_node.has_valid('name'), \
             'LSTMCell anchor node {} does\'t have attribute name; such nodes are not supported.'
 
-        if __class__.second_round and anchor_node.name not in __class__.instances_supported_by_IE:
-            # at the second round of conversion we apply pattern selectively: only instances from
-            # __class__.instances_supported_by_IE are allowed for conversion; all others should be skipped
-            return
-
         match['input_op'] = match['concat'].in_node(0)
         match['input_hidden_state'] = match['concat'].in_node(1)
-        match['input_cell_state'] = match['mul_0'].in_node(0) if match['mul_0'].in_node(0).id != match['sigmoid_0'].id \
-            else match['mul_0'].in_node(1)
+        match['input_cell_state'] = match['mul_0'].in_node(0) \
+            if match['mul_0'].in_node(0).id != match['sigmoid_0'].id else match['mul_0'].in_node(1)
 
         pattern_edges = self.pattern()['edges']
         pattern_edges.extend([('input_op', 'concat'), ('input_cell_state', 'mul_0'), ('input_hidden_state', 'concat')])
-        inputs = get_inputs_with_ports(graph, match, pattern_edges, __class__.inputs + __class__.extra_inputs)
+        inputs = graph.get_inputs_with_ports(match, pattern_edges, __class__.inputs + __class__.extra_inputs)
 
         lstm_op = LSTMCell(graph, dict(
-            name=match['concat'].name + '/LSTMCell',
-            mark_supported_by_IE=__class__.mark_supported_by_IE,
-            original_name=anchor_node.name,
-            finalize_first_round=__class__.finalize_first_round,
+            name=match['concat'].name + '/LSTMCell', activations=None,
         ))
         lstm_node = lstm_op.create_node(inputs)
         lstm_node['old_infer'] = lstm_node.infer
@@ -172,7 +142,7 @@ class BasicLSTMCell(FrontReplacementSubgraph):
         graph.remove_node(match['tanh_1'].id)
 
         for i, output in enumerate(__class__.outputs):
-            replace_node(match[output], lstm_node, i)
+            match[output].replace_node(lstm_node, i)
 
         # Because of LSTMCell specification, this layer MUST have 2 outputs.
         # => we need to create fake consumers for LSTMCell
@@ -186,7 +156,6 @@ class BasicLSTMCell(FrontReplacementSubgraph):
         lstm_node['extra_inputs'] = {name: match[name].id for name in __class__.extra_inputs}
         lstm_node['inputs'] = {name: match[name].id for name in __class__.inputs}
 
-
     @staticmethod
     def infer(node: Node):
         assert len(node.in_nodes()) == len(__class__.inputs) + len(__class__.extra_inputs)
diff --git a/model-optimizer/extensions/front/tf/concat.py b/model-optimizer/extensions/front/tf/concat.py
index 87ea9f351..9d3a0d4cb 100644
--- a/model-optimizer/extensions/front/tf/concat.py
+++ b/model-optimizer/extensions/front/tf/concat.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.replacement import FrontReplacementSubgraph
+from mo.graph.graph import Graph
 
 
 class Concat(FrontReplacementSubgraph):
@@ -28,7 +27,7 @@ class Concat(FrontReplacementSubgraph):
             edges=[]
         )
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         """
         There are Concat and ConcatV2 operations in TensorFlow
         The main difference is incoming port of tensor representing axis of concatenation
diff --git a/model-optimizer/extensions/front/tf/concat_ext.py b/model-optimizer/extensions/front/tf/concat_ext.py
index 95ef2623d..a9c18abf4 100644
--- a/model-optimizer/extensions/front/tf/concat_ext.py
+++ b/model-optimizer/extensions/front/tf/concat_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/concat_ext_test.py b/model-optimizer/extensions/front/tf/concat_ext_test.py
index 9cf90216d..16f96ac65 100644
--- a/model-optimizer/extensions/front/tf/concat_ext_test.py
+++ b/model-optimizer/extensions/front/tf/concat_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/concat_test.py b/model-optimizer/extensions/front/tf/concat_test.py
index abee3b0b0..7682245aa 100644
--- a/model-optimizer/extensions/front/tf/concat_test.py
+++ b/model-optimizer/extensions/front/tf/concat_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/conv_ext.py b/model-optimizer/extensions/front/tf/conv_ext.py
index 00931debc..d008ced76 100644
--- a/model-optimizer/extensions/front/tf/conv_ext.py
+++ b/model-optimizer/extensions/front/tf/conv_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/conv_ext_test.py b/model-optimizer/extensions/front/tf/conv_ext_test.py
index d420f9101..6813d57df 100644
--- a/model-optimizer/extensions/front/tf/conv_ext_test.py
+++ b/model-optimizer/extensions/front/tf/conv_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/crop_and_resize_ext.py b/model-optimizer/extensions/front/tf/crop_and_resize_ext.py
index 98034c6a2..11503a713 100644
--- a/model-optimizer/extensions/front/tf/crop_and_resize_ext.py
+++ b/model-optimizer/extensions/front/tf/crop_and_resize_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/deconv_ext.py b/model-optimizer/extensions/front/tf/deconv_ext.py
index 8838cd5f8..df046c7af 100644
--- a/model-optimizer/extensions/front/tf/deconv_ext.py
+++ b/model-optimizer/extensions/front/tf/deconv_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/deconv_ext_test.py b/model-optimizer/extensions/front/tf/deconv_ext_test.py
index c11d4da45..333c785f0 100644
--- a/model-optimizer/extensions/front/tf/deconv_ext_test.py
+++ b/model-optimizer/extensions/front/tf/deconv_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/depth_to_space.py b/model-optimizer/extensions/front/tf/depth_to_space.py
index d422141d6..53a0d8354 100644
--- a/model-optimizer/extensions/front/tf/depth_to_space.py
+++ b/model-optimizer/extensions/front/tf/depth_to_space.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/exp_ext.py b/model-optimizer/extensions/front/tf/exp_ext.py
new file mode 100644
index 000000000..77165799d
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/exp_ext.py
@@ -0,0 +1,28 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.front.extractor import FrontExtractorOp
+from mo.ops.activation import Activation
+
+
+class ExpExtractor(FrontExtractorOp):
+    op = 'Exp'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        Activation.update_node_stat(node, {'operation': 'exp'})
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/tf/extract_image_patches.py b/model-optimizer/extensions/front/tf/extract_image_patches.py
index a6e0837ed..fd544d6d6 100644
--- a/model-optimizer/extensions/front/tf/extract_image_patches.py
+++ b/model-optimizer/extensions/front/tf/extract_image_patches.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/fake_const.py b/model-optimizer/extensions/front/tf/fake_const.py
index 2a487ef78..0ba757942 100644
--- a/model-optimizer/extensions/front/tf/fake_const.py
+++ b/model-optimizer/extensions/front/tf/fake_const.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,11 +15,12 @@
 """
 import logging as log
 
-import networkx as nx
+import numpy as np
 
+from mo.front.common.partial_infer.utils import int64_array
 from mo.front.common.replacement import FrontReplacementOp
 from mo.front.tf.extractors.utils import tf_dtype_extractor
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.const import Const
 
 
@@ -27,7 +28,7 @@ class FakeConstToConst(FrontReplacementOp):
     op = "FakeConst"
     enabled = True
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         node = match['op']
         if not node.has_valid('value'):
             log.debug("No value in FakeConst node {}".format(node.id))
@@ -35,7 +36,7 @@ class FakeConstToConst(FrontReplacementOp):
         node_value = node.value
         extracted_attrs = {
             'data_type': tf_dtype_extractor(node.pb.attr['dtype'].type),
-            'shape': node_value.shape,
+            'shape': int64_array(node_value.shape),
             'value': node_value
         }
         Const.update_node_stat(node, extracted_attrs)
diff --git a/model-optimizer/extensions/front/tf/faster_rcnn_support.json b/model-optimizer/extensions/front/tf/faster_rcnn_support.json
index b2d8b3753..c535044f7 100644
--- a/model-optimizer/extensions/front/tf/faster_rcnn_support.json
+++ b/model-optimizer/extensions/front/tf/faster_rcnn_support.json
@@ -36,6 +36,8 @@
     },
     {
         "custom_attributes": {
+            "clip_before_nms": true,
+            "clip_after_nms": false
         },
         "id": "ObjectDetectionAPIProposalReplacement",
         "include_inputs_to_sub_graph": true,
@@ -57,6 +59,8 @@
     },
     {
         "custom_attributes": {
+            "clip_before_nms": true,
+            "clip_after_nms": false,
             "coordinates_swap_method": "swap_weights"
         },
         "id": "ObjectDetectionAPIDetectionOutputReplacement",
@@ -97,5 +101,13 @@
         },
         "id": "ObjectDetectionAPIOutputReplacement",
         "match_kind": "general"
+    },
+    {
+        "custom_attributes":
+        {
+            "replacements": [["mul/y", "first_stage_max_proposals"]]
+        },
+        "id": "ObjectDetectionAPIConstValueOverride",
+        "match_kind": "general"
     }
-]
-\ No newline at end of file
+]
diff --git a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.10.json b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.10.json
new file mode 100644
index 000000000..95be086e3
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.10.json
@@ -0,0 +1,113 @@
+[
+    {
+        "custom_attributes": {
+        },
+        "id": "ObjectDetectionAPIPreprocessorReplacement",
+        "inputs": [
+            [
+                {
+                    "node": "map/Shape$",
+                    "port": 0
+                },
+                {
+                    "node": "map/TensorArrayUnstack/Shape$",
+                    "port": 0
+                },
+                {
+                    "node": "map/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3$",
+                    "port": 2
+                }
+            ]
+        ],
+        "instances": [
+            ".*Preprocessor/"
+        ],
+        "match_kind": "scope",
+        "outputs": [
+            {
+                "node": "sub$",
+                "port": 0
+            },
+            {
+                "node": "map/TensorArrayStack_1/TensorArrayGatherV3$",
+                "port": 0
+            }
+        ]
+    },
+    {
+        "custom_attributes": {
+            "clip_before_nms": false,
+            "clip_after_nms": true
+        },
+        "id": "ObjectDetectionAPIProposalReplacement",
+        "include_inputs_to_sub_graph": true,
+        "include_outputs_to_sub_graph": true,
+        "instances": {
+            "end_points": [
+                "map/TensorArrayStack/TensorArrayGatherV3",
+                "map_1/TensorArrayStack/TensorArrayGatherV3",
+                "BatchMultiClassNonMaxSuppression/map/TensorArrayStack_4/TensorArrayGatherV3"
+            ],
+            "start_points": [
+                "concat",
+                "concat_1",
+                "GridAnchorGenerator/Identity",
+                "Shape"
+            ]
+        },
+        "match_kind": "points"
+    },
+    {
+        "custom_attributes": {
+            "clip_before_nms": false,
+            "clip_after_nms": true,
+            "coordinates_swap_method": "swap_weights"
+        },
+        "id": "ObjectDetectionAPIDetectionOutputReplacement",
+        "inputs": [
+            [
+                {
+                    "node": "Reshape$",
+                    "port": 0
+                }
+            ],
+            [
+                {
+                    "node": "Reshape_1$",
+                    "port": 0
+                }
+            ],
+            [
+                {
+                    "node": "ExpandDims$",
+                    "port": 0
+                }
+            ]
+        ],
+        "instances": [
+            ".*SecondStagePostprocessor/"
+        ],
+        "match_kind": "scope",
+        "outputs": [
+            {
+                "node": "BatchMultiClassNonMaxSuppression/map/TensorArrayStack/TensorArrayGatherV3$",
+                "port": 0
+            }
+        ]
+    },
+    {
+       "custom_attributes": {
+            "outputs": "detection_boxes,detection_scores,num_detections"
+        },
+        "id": "ObjectDetectionAPIOutputReplacement",
+        "match_kind": "general"
+    },
+    {
+        "custom_attributes":
+        {
+            "replacements": [["mul/y", "first_stage_max_proposals"]]
+        },
+        "id": "ObjectDetectionAPIConstValueOverride",
+        "match_kind": "general"
+    }
+]
+\ No newline at end of file
diff --git a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.7.json b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.7.json
index 8f9d74c3f..6eba96ffa 100644
--- a/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.7.json
+++ b/model-optimizer/extensions/front/tf/faster_rcnn_support_api_v1.7.json
@@ -36,6 +36,8 @@
     },
     {
         "custom_attributes": {
+            "clip_before_nms": true,
+            "clip_after_nms": false
         },
         "id": "ObjectDetectionAPIProposalReplacement",
         "include_inputs_to_sub_graph": true,
@@ -57,6 +59,8 @@
     },
     {
         "custom_attributes": {
+            "clip_before_nms": true,
+            "clip_after_nms": false,
             "coordinates_swap_method": "swap_weights"
         },
         "id": "ObjectDetectionAPIDetectionOutputReplacement",
@@ -97,5 +101,13 @@
         },
         "id": "ObjectDetectionAPIOutputReplacement",
         "match_kind": "general"
+    },
+    {
+        "custom_attributes":
+        {
+            "replacements": [["mul/y", "first_stage_max_proposals"]]
+        },
+        "id": "ObjectDetectionAPIConstValueOverride",
+        "match_kind": "general"
     }
 ]
 \ No newline at end of file
diff --git a/model-optimizer/extensions/front/tf/fifo_queue_v2_ext.py b/model-optimizer/extensions/front/tf/fifo_queue_v2_ext.py
index 5a2b591f2..238ae683f 100644
--- a/model-optimizer/extensions/front/tf/fifo_queue_v2_ext.py
+++ b/model-optimizer/extensions/front/tf/fifo_queue_v2_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -37,6 +37,6 @@ class FIFOQueueV2Extractor(FrontExtractorOp):
             if len(shape) == 3:
                 result_shapes.append(np.array([1, shape[0].size, shape[1].size, shape[2].size], dtype=np.int64))
             else:
-                result_shapes.append(np.array(shape, dtype=np.int64))
+                result_shapes.append(np.array([dim.size for dim in shape], dtype=np.int64))
         Op.update_node_stat(node, {'shapes': result_shapes, 'types': extracted_types})
         return __class__.enabled
diff --git a/model-optimizer/extensions/front/tf/fifo_replacer.py b/model-optimizer/extensions/front/tf/fifo_replacer.py
index 576dcf162..9063cf593 100644
--- a/model-optimizer/extensions/front/tf/fifo_replacer.py
+++ b/model-optimizer/extensions/front/tf/fifo_replacer.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,17 +15,20 @@
 """
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import create_edge, erase_node, Node
+from mo.graph.graph import Graph, Node
 from mo.ops.input import Input
 
 
 class FIFOQueue(FrontReplacementSubgraph):
     enabled = True
 
+    def run_before(self):
+        from extensions.front.override_batch import OverrideBatch
+        return [OverrideBatch]
+
     @staticmethod
     def pattern(**kwargs):
         return dict(
@@ -43,7 +46,7 @@ class FIFOQueue(FrontReplacementSubgraph):
         )
 
     @staticmethod
-    def replace_sub_graph(graph: nx.MultiDiGraph, match: dict, **kwargs):
+    def replace_sub_graph(graph: Graph, match: dict, **kwargs):
         """
         Usually graph looks like:
 
@@ -70,16 +73,16 @@ class FIFOQueue(FrontReplacementSubgraph):
                 ''.format(match['placeholder'].id, true_placeholder_shape, placeholder_shape))
             placeholder_shape = true_placeholder_shape
         placeholder_name = match['fifo_queue'].name
-        erase_node(match['fifo_queue'])
-        erase_node(match['placeholder'])
+        graph.erase_node(match['fifo_queue'])
+        graph.erase_node(match['placeholder'])
         for _, out in match['batch_join'].out_nodes().items():
             if out.id != match['image_batch'].id:
                 if out.out_node().op == 'OpOutput':
-                    erase_node(out.out_node())
-                erase_node(out)
-        erase_node(match['batch_join'])
+                    graph.remove_node(out.out_node().id)
+                graph.remove_node(out.id)
+        graph.remove_node(match['batch_join'].id)
         placeholder = Input(graph, {'name': placeholder_name, 'shape': placeholder_shape}).create_node()
-        create_edge(placeholder, match['image_batch'])
+        graph.create_edge(placeholder, match['image_batch'])
         log.info("FIFOQueueV2 pattern was detected. New shape of placeholder {} is {}. Use -b to set batch size if "
                  "needed".format(placeholder.id, placeholder['shape']))
 
@@ -90,6 +93,10 @@ class QueueDequeueManyV2(FrontReplacementSubgraph):
     """
     enabled = True
 
+    def run_before(self):
+        from extensions.front.override_batch import OverrideBatch
+        return [OverrideBatch]
+
     @staticmethod
     def pattern(**kwargs):
         return dict(
@@ -103,7 +110,7 @@ class QueueDequeueManyV2(FrontReplacementSubgraph):
         )
 
     @staticmethod
-    def replace_sub_graph(graph: nx.MultiDiGraph, match: dict, **kwargs):
+    def replace_sub_graph(graph: Graph, match: dict, **kwargs):
         inputs_dict = {}
         for u, v, edge_attrs in graph.out_edges(match['queue_deque'].id, data=True):
             out_port = edge_attrs['out']
@@ -111,7 +118,7 @@ class QueueDequeueManyV2(FrontReplacementSubgraph):
             if out_port not in inputs_dict:
                 input_op = Input(graph, {'shape': shape.copy()})
                 inputs_dict[out_port] = input_op.create_node([])
-            create_edge(inputs_dict[out_port], Node(graph, v), edge_attrs['out'], edge_attrs['in'], edge_attrs)
+            graph.create_edge(inputs_dict[out_port], Node(graph, v), edge_attrs['out'], edge_attrs['in'], edge_attrs)
 
         graph.remove_node(match['queue_deque'].id)
         graph.remove_node(match['fifo_queue'].id)
diff --git a/model-optimizer/extensions/front/tf/fifo_replacer_test.py b/model-optimizer/extensions/front/tf/fifo_replacer_test.py
index e1150c2e6..a7a65d417 100644
--- a/model-optimizer/extensions/front/tf/fifo_replacer_test.py
+++ b/model-optimizer/extensions/front/tf/fifo_replacer_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/gather_ext.py b/model-optimizer/extensions/front/tf/gather_ext.py
index 0cb924f31..1c3a7e2f8 100644
--- a/model-optimizer/extensions/front/tf/gather_ext.py
+++ b/model-optimizer/extensions/front/tf/gather_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support.json b/model-optimizer/extensions/front/tf/mask_rcnn_support.json
index 9ff12e33a..383cb948e 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support.json
@@ -36,6 +36,8 @@
     },
     {
         "custom_attributes": {
+            "clip_before_nms": true,
+            "clip_after_nms": false
         },
         "id": "ObjectDetectionAPIProposalReplacement",
         "include_inputs_to_sub_graph": true,
@@ -57,6 +59,8 @@
     },
     {
        "custom_attributes": {
+            "clip_before_nms": true,
+            "clip_after_nms": false,
             "coordinates_swap_method": "swap_weights"
         },
         "id": "ObjectDetectionAPIDetectionOutputReplacement",
@@ -104,5 +108,13 @@
         },
         "id": "ObjectDetectionAPIOutputReplacement",
         "match_kind": "general"
+    },
+    {
+        "custom_attributes":
+        {
+            "replacements": [["mul/y", "first_stage_max_proposals"]]
+        },
+        "id": "ObjectDetectionAPIConstValueOverride",
+        "match_kind": "general"
     }
-]
-\ No newline at end of file
+]
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.11.json b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.11.json
index 6220ea188..178b53bb6 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.11.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.11.json
@@ -36,6 +36,8 @@
     },
     {
         "custom_attributes": {
+            "clip_before_nms": false,
+            "clip_after_nms": true
         },
         "id": "ObjectDetectionAPIProposalReplacement",
         "include_inputs_to_sub_graph": true,
@@ -57,6 +59,8 @@
     },
     {
        "custom_attributes": {
+            "clip_before_nms": false,
+            "clip_after_nms": true,
             "coordinates_swap_method": "swap_weights"
         },
         "id": "ObjectDetectionAPIDetectionOutputReplacement",
@@ -104,5 +108,13 @@
         },
         "id": "ObjectDetectionAPIOutputReplacement",
         "match_kind": "general"
+    },
+    {
+        "custom_attributes":
+        {
+            "replacements": [["mul/y", "first_stage_max_proposals"]]
+        },
+        "id": "ObjectDetectionAPIConstValueOverride",
+        "match_kind": "general"
     }
 ]
 \ No newline at end of file
diff --git a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.7.json b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.7.json
index 9b59125f0..3574f7a49 100644
--- a/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.7.json
+++ b/model-optimizer/extensions/front/tf/mask_rcnn_support_api_v1.7.json
@@ -36,6 +36,8 @@
     },
     {
         "custom_attributes": {
+            "clip_before_nms": true,
+            "clip_after_nms": false
         },
         "id": "ObjectDetectionAPIProposalReplacement",
         "include_inputs_to_sub_graph": true,
@@ -57,6 +59,8 @@
     },
     {
        "custom_attributes": {
+            "clip_before_nms": true,
+            "clip_after_nms": false,
             "coordinates_swap_method": "swap_weights"
         },
         "id": "ObjectDetectionAPIDetectionOutputReplacement",
@@ -104,5 +108,13 @@
         },
         "id": "ObjectDetectionAPIOutputReplacement",
         "match_kind": "general"
+    },
+    {
+        "custom_attributes":
+        {
+            "replacements": [["mul/y", "first_stage_max_proposals"]]
+        },
+        "id": "ObjectDetectionAPIConstValueOverride",
+        "match_kind": "general"
     }
 ]
 \ No newline at end of file
diff --git a/model-optimizer/extensions/front/tf/max_ext.py b/model-optimizer/extensions/front/tf/max_ext.py
index 34f1bafda..a68fea0ae 100644
--- a/model-optimizer/extensions/front/tf/max_ext.py
+++ b/model-optimizer/extensions/front/tf/max_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/mvn.py b/model-optimizer/extensions/front/tf/mvn.py
index 0dd00eec7..c03cae9bc 100644
--- a/model-optimizer/extensions/front/tf/mvn.py
+++ b/model-optimizer/extensions/front/tf/mvn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ import networkx as nx
 
 from extensions.front.squared_difference import SquaredDifference
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import Node, replace_node
+from mo.graph.graph import Node, Graph
 from mo.ops.eltwise import Eltwise
 from mo.ops.op import Op
 
@@ -53,7 +53,7 @@ class MVN(FrontReplacementSubgraph):
                 ('squeeze_variance', 'fbn', {'in': 4}),
             ])
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         fbn = match['fbn']
         input = fbn.in_node(0)
         log.debug('Found potential MVN pattern after {} with name {}'.format(input.op, input.name))
@@ -87,8 +87,7 @@ class MVN(FrontReplacementSubgraph):
             ]),
             input_beta
         ])
-
-        replace_node(fbn, new_subgraph)
+        fbn.replace_node(new_subgraph)
 
     @staticmethod
     def infer(node: Node):
diff --git a/model-optimizer/extensions/front/tf/mvn_unrolled.py b/model-optimizer/extensions/front/tf/mvn_unrolled.py
index a73ed496f..2c33f52f9 100644
--- a/model-optimizer/extensions/front/tf/mvn_unrolled.py
+++ b/model-optimizer/extensions/front/tf/mvn_unrolled.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,13 +16,11 @@
 
 import logging as log
 
-import networkx as nx
-
 from extensions.front.squared_difference import SquaredDifference
 from extensions.front.sub import Sub
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import Node, replace_node
-from mo.ops.div import Div
+from mo.graph.graph import Node, Graph
+from extensions.front.div import Div
 from mo.ops.op import Op
 
 
@@ -57,7 +55,7 @@ class MVNUnrolled(FrontReplacementSubgraph):
             ])
 
     @staticmethod
-    def replace_sub_graph(graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(graph: Graph, match: dict):
         MVN = Op.get_op_class_by_name('MVN')
 
         mvn = MVN(graph, dict(
@@ -74,7 +72,7 @@ class MVNUnrolled(FrontReplacementSubgraph):
 
         new_subgraph = mvn.create_node([match['mean'].in_node(0), mean_reduction, variance_reduction, pow2, eps])
 
-        replace_node(match['truediv'], new_subgraph)
+        match['truediv'].replace_node(new_subgraph)
 
     @staticmethod
     def infer(node: Node):
diff --git a/model-optimizer/extensions/front/tf/mvn_unrolled_test.py b/model-optimizer/extensions/front/tf/mvn_unrolled_test.py
index de9618b1f..11dd6405a 100644
--- a/model-optimizer/extensions/front/tf/mvn_unrolled_test.py
+++ b/model-optimizer/extensions/front/tf/mvn_unrolled_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/nearest_neighbor_upsampling.py b/model-optimizer/extensions/front/tf/nearest_neighbor_upsampling.py
index 23b1f45aa..d42b73b7b 100644
--- a/model-optimizer/extensions/front/tf/nearest_neighbor_upsampling.py
+++ b/model-optimizer/extensions/front/tf/nearest_neighbor_upsampling.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,12 +16,10 @@
 
 import logging as log
 
-import networkx as nx
-
 from extensions.front.Pack import Pack
 from extensions.ops.resample import ResampleOp
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import replace_node
+from mo.graph.graph import Node, Graph
 
 
 class NearestNeighborUpsampling(FrontReplacementSubgraph):
@@ -56,7 +54,7 @@ class NearestNeighborUpsampling(FrontReplacementSubgraph):
             ]
         )
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         log.debug('Matched NearestNeighborUpsampling pattern: {}'.format([node.id for node in match.values()]))
         try:
             input_height = match['pack_1'].in_node(1).value.item()
@@ -73,5 +71,5 @@ class NearestNeighborUpsampling(FrontReplacementSubgraph):
                                          'resample_type': 'caffe.ResampleParameter.NEAREST'})
         resample_node = resample_op.create_node([match['op']])
 
-        replace_node(match['reshape_2'], resample_node)
+        match['reshape_2'].replace_node(resample_node)
         graph.remove_nodes_from([node.id for node in match.values() if node.id != match['op'].id])
diff --git a/model-optimizer/extensions/front/tf/next_iteration_ext.py b/model-optimizer/extensions/front/tf/next_iteration_ext.py
index ceb385c15..0968b697d 100644
--- a/model-optimizer/extensions/front/tf/next_iteration_ext.py
+++ b/model-optimizer/extensions/front/tf/next_iteration_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/next_iteration_ext_test.py b/model-optimizer/extensions/front/tf/next_iteration_ext_test.py
index 98e0ab6c7..0d0455c0b 100644
--- a/model-optimizer/extensions/front/tf/next_iteration_ext_test.py
+++ b/model-optimizer/extensions/front/tf/next_iteration_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/pad_ext.py b/model-optimizer/extensions/front/tf/pad_ext.py
index 542d9aaa9..98aabb518 100644
--- a/model-optimizer/extensions/front/tf/pad_ext.py
+++ b/model-optimizer/extensions/front/tf/pad_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/pad_ext_test.py b/model-optimizer/extensions/front/tf/pad_ext_test.py
index 138b4f0e5..f1a930219 100644
--- a/model-optimizer/extensions/front/tf/pad_ext_test.py
+++ b/model-optimizer/extensions/front/tf/pad_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/pooling_ext.py b/model-optimizer/extensions/front/tf/pooling_ext.py
index 772747c5b..29fd59c8f 100644
--- a/model-optimizer/extensions/front/tf/pooling_ext.py
+++ b/model-optimizer/extensions/front/tf/pooling_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/pooling_ext_test.py b/model-optimizer/extensions/front/tf/pooling_ext_test.py
index a03095ee9..85a13d05a 100644
--- a/model-optimizer/extensions/front/tf/pooling_ext_test.py
+++ b/model-optimizer/extensions/front/tf/pooling_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/prelu.py b/model-optimizer/extensions/front/tf/prelu.py
index bea37f385..15b13bc8c 100644
--- a/model-optimizer/extensions/front/tf/prelu.py
+++ b/model-optimizer/extensions/front/tf/prelu.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,12 +16,10 @@
 
 import logging as log
 
-import networkx as nx
-
 from extensions.front.sub import Sub
 from extensions.ops.prelu import PreluOp
 from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.graph.graph import replace_node
+from mo.graph.graph import Graph
 from mo.middle.pattern_match import check_node_usages_out_of_match
 
 
@@ -49,7 +47,7 @@ class PReLU(FrontReplacementSubgraph):
             ]
         )
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         consumers = [n for n in match if n not in ['mul', 'op', 'add'] and not check_node_usages_out_of_match(match, n)]
         if consumers:
             log.warning('PReLU pattern was detected. Non pattern consumers of nodes: "{}" were found. Won\'t replace'
@@ -57,7 +55,7 @@ class PReLU(FrontReplacementSubgraph):
             return
         gamma = match['mul'].in_node(0) if match['mul'].in_node(1).id == match['neg_1'].id else match['mul'].in_node(1)
         prelu_node = PreluOp(graph, {'name': '{}/PReLU'.format(match['add'].id)}).create_node([match['op'], gamma])
-        replace_node(match['add'], prelu_node)
+        match['add'].replace_node(prelu_node)
         log.debug('PReLU pattern starting from "{}" was collapsed to "{}"'.format(match['op'].id, prelu_node.id))
 
 
@@ -89,7 +87,7 @@ class PReLUWithAbs(FrontReplacementSubgraph):
             ]
         )
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         consumers = [n for n in match if
                      n not in ['mul', 'mul_1', 'op', 'add', 'abs', 'sub'] and not check_node_usages_out_of_match(match,
                                                                                                                  n)]
@@ -99,5 +97,5 @@ class PReLUWithAbs(FrontReplacementSubgraph):
             return
         gamma = match['mul'].in_node(0) if match['mul'].in_node(1).id == match['sub'].id else match['mul'].in_node(1)
         prelu_node = PreluOp(graph, {'name': '{}/PReLU'.format(match['add'].id)}).create_node([match['op'], gamma])
-        replace_node(match['add'], prelu_node)
+        match['add'].replace_node(prelu_node)
         log.debug('PReLUWithAbs pattern starting from "{}" was collapsed to "{}"'.format(match['op'].id, prelu_node.id))
diff --git a/model-optimizer/extensions/front/tf/rank_ext.py b/model-optimizer/extensions/front/tf/rank_ext.py
index 71ca94d23..7ad44b464 100644
--- a/model-optimizer/extensions/front/tf/rank_ext.py
+++ b/model-optimizer/extensions/front/tf/rank_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/resize_bilinear.py b/model-optimizer/extensions/front/tf/resize_bilinear.py
index f7670acb9..9519b8435 100644
--- a/model-optimizer/extensions/front/tf/resize_bilinear.py
+++ b/model-optimizer/extensions/front/tf/resize_bilinear.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/resize_nearest_neighbor.py b/model-optimizer/extensions/front/tf/resize_nearest_neighbor.py
index 0b8b8ec49..f86ad585d 100644
--- a/model-optimizer/extensions/front/tf/resize_nearest_neighbor.py
+++ b/model-optimizer/extensions/front/tf/resize_nearest_neighbor.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/reverse_sequence.py b/model-optimizer/extensions/front/tf/reverse_sequence.py
index 2c6491ff5..75b9d2565 100644
--- a/model-optimizer/extensions/front/tf/reverse_sequence.py
+++ b/model-optimizer/extensions/front/tf/reverse_sequence.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
  limitations under the License.
 """
 
-from mo.front.extractor import FrontExtractorOp
 from extensions.ops.reverse_sequence import ReverseSequence
+from mo.front.extractor import FrontExtractorOp
 
 
 class ReverseSequenceFrontExtractor(FrontExtractorOp):
@@ -24,8 +24,11 @@ class ReverseSequenceFrontExtractor(FrontExtractorOp):
 
     @staticmethod
     def extract(node):
+        if node.has_valid('seq_dim'):
+            return
+
         ReverseSequence.update_node_stat(node, {
-            'seq_dim': node.pb.attr['seq_dim'],
-            'batch_dim': node.pb.attr['batch_dim'],
+            'seq_axis': node.pb.attr['seq_dim'].i,
+            'batch_axis': node.pb.attr['batch_dim'].i,
         })
         return __class__.enabled
diff --git a/model-optimizer/extensions/front/tf/reverse_v2.py b/model-optimizer/extensions/front/tf/reverse_v2.py
index 6254d23b3..02241ff5b 100644
--- a/model-optimizer/extensions/front/tf/reverse_v2.py
+++ b/model-optimizer/extensions/front/tf/reverse_v2.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,9 +13,8 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
+from extensions.ops.Reverse import Reverse
 from mo.front.extractor import FrontExtractorOp
-from extensions.ops.reverse_sequence import ReverseSequence
 
 
 class ReverseV2FrontExtractor(FrontExtractorOp):
@@ -24,5 +23,5 @@ class ReverseV2FrontExtractor(FrontExtractorOp):
 
     @staticmethod
     def extract(node):
-        ReverseSequence.update_node_stat(node)
+        Reverse.update_node_stat(node)
         return __class__.enabled
diff --git a/model-optimizer/extensions/front/tf/rfcn_support.json b/model-optimizer/extensions/front/tf/rfcn_support.json
index 3f612fade..2e250c086 100644
--- a/model-optimizer/extensions/front/tf/rfcn_support.json
+++ b/model-optimizer/extensions/front/tf/rfcn_support.json
@@ -36,6 +36,9 @@
     },
     {
         "custom_attributes": {
+            "clip_before_nms": true,
+            "clip_after_nms": false,
+            "do_not_swap_proposals": true
         },
         "id": "ObjectDetectionAPIProposalReplacement",
         "include_inputs_to_sub_graph": true,
@@ -57,7 +60,10 @@
     },
     {
         "custom_attributes": {
-            "coordinates_swap_method": "add_convolution"
+            "clip_before_nms": true,
+            "clip_after_nms": false,
+            "coordinates_swap_method": "add_convolution",
+            "swap_proposals": true
         },
         "id": "ObjectDetectionAPIDetectionOutputReplacement",
         "inputs": [
diff --git a/model-optimizer/extensions/front/tf/rfcn_support_api_v1.10.json b/model-optimizer/extensions/front/tf/rfcn_support_api_v1.10.json
new file mode 100644
index 000000000..c0ed3be4f
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/rfcn_support_api_v1.10.json
@@ -0,0 +1,145 @@
+[
+    {
+        "custom_attributes": {},
+        "id": "ObjectDetectionAPIPreprocessorReplacement",
+        "inputs": [
+            [
+                {
+                    "node": "map/Shape$",
+                    "port": 0
+                },
+                {
+                    "node": "map/TensorArrayUnstack/Shape$",
+                    "port": 0
+                },
+                {
+                    "node": "map/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3$",
+                    "port": 2
+                }
+            ]
+        ],
+        "instances": [
+            ".*Preprocessor/"
+        ],
+        "match_kind": "scope",
+        "outputs": [
+            {
+                "node": "sub$",
+                "port": 0
+            },
+            {
+                "node": "map/TensorArrayStack_1/TensorArrayGatherV3$",
+                "port": 0
+            }
+        ]
+    },
+    {
+        "custom_attributes": {
+            "clip_before_nms": false,
+            "clip_after_nms": true
+        },
+        "id": "ObjectDetectionAPIProposalReplacement",
+        "include_inputs_to_sub_graph": true,
+        "include_outputs_to_sub_graph": true,
+        "instances": {
+            "end_points": [
+                "map/TensorArrayStack/TensorArrayGatherV3",
+                "map_1/TensorArrayStack/TensorArrayGatherV3",
+                "BatchMultiClassNonMaxSuppression/map/TensorArrayStack_4/TensorArrayGatherV3"
+            ],
+            "start_points": [
+                "FirstStageBoxPredictor/Reshape",
+                "FirstStageBoxPredictor/Reshape_1",
+                "GridAnchorGenerator/Identity",
+                "Shape"
+            ]
+        },
+        "match_kind": "points"
+    },
+    {
+        "custom_attributes": {
+           "clip_before_nms": false,
+           "clip_after_nms": true,
+           "coordinates_swap_method": "add_convolution"
+        },
+        "id": "ObjectDetectionAPIDetectionOutputReplacement",
+        "inputs": [
+            [
+                {
+                    "node": "Reshape$",
+                    "port": 0
+                }
+            ],
+            [
+                {
+                    "node": "Reshape_1$",
+                    "port": 0
+                }
+            ],
+            [
+                {
+                    "node": "ExpandDims$",
+                    "port": 0
+                }
+            ]
+        ],
+        "instances": [
+            ".*SecondStagePostprocessor/"
+        ],
+        "match_kind": "scope",
+        "outputs": [
+            {
+                "node": "BatchMultiClassNonMaxSuppression/map/TensorArrayStack/TensorArrayGatherV3$",
+                "port": 0
+            }
+        ]
+    },
+    {
+        "custom_attributes": {},
+        "id": "ObjectDetectionAPIPSROIPoolingReplacement",
+        "inputs": [
+            [
+                {
+                    "node": "Shape$",
+                    "port": 0
+                },
+                {
+                    "node": "TensorArrayUnstack/Shape$",
+                    "port": 0
+                },
+                {
+                    "node": "TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3$",
+                    "port": 2
+                }
+            ],
+            [
+                {
+                    "node": "TensorArrayUnstack_1/TensorArrayScatter/TensorArrayScatterV3$",
+                    "port": 2
+                },
+                {
+                    "node": "TensorArrayUnstack_1/Shape$",
+                    "port": 0
+                }
+            ]
+        ],
+        "instances": [
+            "SecondStageBoxPredictor/map/",
+            "SecondStageBoxPredictor/map_1/"
+        ],
+        "match_kind": "scope",
+        "outputs": [
+            {
+                "node": "TensorArrayStack/TensorArrayGatherV3$",
+                "port": 0
+            }
+        ]
+    },
+    {
+       "custom_attributes": {
+             "outputs": "detection_boxes"
+        },
+        "id": "ObjectDetectionAPIOutputReplacement",
+        "match_kind": "general"
+    }
+]
diff --git a/model-optimizer/extensions/front/tf/shape_ext.py b/model-optimizer/extensions/front/tf/shape_ext.py
new file mode 100644
index 000000000..1a6c0d796
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/shape_ext.py
@@ -0,0 +1,31 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.front.extractor import FrontExtractorOp
+from mo.front.tf.extractors.utils import tf_dtype_extractor
+from mo.graph.graph import Node
+from mo.ops.shape import Shape
+
+
+class ShapeExtractor(FrontExtractorOp):
+    op = 'Shape'
+    enabled = True
+
+    @staticmethod
+    def extract(node: Node):
+        Shape.update_node_stat(node, {'data_type': tf_dtype_extractor(node.pb.attr['out_type'].type, np.int32)})
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/tf/slice_ext.py b/model-optimizer/extensions/front/tf/slice_ext.py
index 54881b3d6..ab9d053bc 100644
--- a/model-optimizer/extensions/front/tf/slice_ext.py
+++ b/model-optimizer/extensions/front/tf/slice_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/softmax_ext.py b/model-optimizer/extensions/front/tf/softmax_ext.py
index 8891b5f9f..6f0b02964 100644
--- a/model-optimizer/extensions/front/tf/softmax_ext.py
+++ b/model-optimizer/extensions/front/tf/softmax_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/split_ext.py b/model-optimizer/extensions/front/tf/split_ext.py
index e316a811d..e713c805b 100644
--- a/model-optimizer/extensions/front/tf/split_ext.py
+++ b/model-optimizer/extensions/front/tf/split_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/sqrt_ext.py b/model-optimizer/extensions/front/tf/sqrt_ext.py
index 0886316d2..d68c27025 100644
--- a/model-optimizer/extensions/front/tf/sqrt_ext.py
+++ b/model-optimizer/extensions/front/tf/sqrt_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/square_ext.py b/model-optimizer/extensions/front/tf/square_ext.py
index 6a3e939d2..457c82e03 100644
--- a/model-optimizer/extensions/front/tf/square_ext.py
+++ b/model-optimizer/extensions/front/tf/square_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/stop_gradient_ext.py b/model-optimizer/extensions/front/tf/stop_gradient_ext.py
index fd166a71a..a7320e5ff 100644
--- a/model-optimizer/extensions/front/tf/stop_gradient_ext.py
+++ b/model-optimizer/extensions/front/tf/stop_gradient_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/stop_gradient_ext_test.py b/model-optimizer/extensions/front/tf/stop_gradient_ext_test.py
index 603039307..6b1f7c7ae 100644
--- a/model-optimizer/extensions/front/tf/stop_gradient_ext_test.py
+++ b/model-optimizer/extensions/front/tf/stop_gradient_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/sum_ext.py b/model-optimizer/extensions/front/tf/sum_ext.py
new file mode 100644
index 000000000..6394dd95f
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/sum_ext.py
@@ -0,0 +1,28 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.extractor import FrontExtractorOp
+from mo.graph.graph import Node
+from mo.ops.reduce import Reduce
+
+
+class SumFrontExtractor(FrontExtractorOp):
+    op = 'Sum'
+    enabled = True
+
+    @staticmethod
+    def extract(node: Node):
+        Reduce.update_node_stat(node, {'keep_dims': node.pb.attr["keep_dims"].b, 'reduce_type': 'sum'})
+        return __class__.enabled
diff --git a/model-optimizer/extensions/front/tf/tensorflow_custom_operations_config_update.py b/model-optimizer/extensions/front/tf/tensorflow_custom_operations_config_update.py
new file mode 100644
index 000000000..bf2e551b4
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/tensorflow_custom_operations_config_update.py
@@ -0,0 +1,61 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import json
+
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
+from mo.utils.custom_replacement_config import parse_custom_replacement_config_file
+from mo.utils.error import Error
+from mo.utils.utils import refer_to_faq_msg
+
+
+class TensorflowCustomOperationsConfigUpdate(FrontReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['cmd_params'].tensorflow_custom_operations_config_update is not None]
+
+    def run_before(self):
+        return []
+
+    def run_after(self):
+        from extensions.front.freeze_placeholder_value import FreezePlaceholderValue
+        return [FreezePlaceholderValue]
+
+    @staticmethod
+    def save_custom_replacement_config_file(descriptions: list, file_name: str):
+        """
+        Save custom layer(s) description(s) to the file.
+        :param file_name: file to save description information to.
+        :param descriptions: list with instances of the CustomLayerDescriptor classes.
+        :return: True if operation is successful.
+        """
+        try:
+            json.dump([replacement_desc.get_config_file_representation() for replacement_desc in descriptions],
+                      open(file_name, "w"), indent=4, sort_keys=True)
+        except Exception as ex:
+            raise Error("failed to update configuration file {}: {}".format(file_name, str(ex)))
+
+    def find_and_replace_pattern(self, graph: Graph):
+        argv = graph.graph['cmd_params']
+        file_name = argv.tensorflow_custom_operations_config_update
+
+        data = parse_custom_replacement_config_file(file_name)
+        if data is None:
+            raise Error("Cannot update the file '{}' because it is broken. ".format(file_name) + refer_to_faq_msg(73))
+
+        for replacement_desc in data:
+            replacement_desc.update_custom_replacement_attributes(graph)
+
+        self.save_custom_replacement_config_file(data, file_name)
diff --git a/model-optimizer/extensions/front/tf/tensorflow_patterns.py b/model-optimizer/extensions/front/tf/tensorflow_patterns.py
new file mode 100644
index 000000000..c4e5673e6
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/tensorflow_patterns.py
@@ -0,0 +1,51 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import mo.front.tf.custom_subgraph_call as csc
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.graph.graph import Graph
+
+
+class TensorflowSubgraphPatterns(FrontReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['cmd_params'].tensorflow_subgraph_patterns is not None]
+
+    def run_before(self):
+        return []
+
+    def run_after(self):
+        from extensions.front.tf.tensorflow_custom_operations_config_update import \
+            TensorflowCustomOperationsConfigUpdate
+        return [TensorflowCustomOperationsConfigUpdate]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        argv = graph.graph['cmd_params']
+        csc.replace_subgraph_calls(graph, argv.tensorflow_subgraph_patterns)
+
+
+class TensorflowOperationPatterns(FrontReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['cmd_params'].tensorflow_operation_patterns is not None]
+
+    def run_before(self):
+        from extensions.front.tf.tensorflow_use_custom_operations_config import TensorflowUseCustomOperationsConfig
+        return [TensorflowUseCustomOperationsConfig]
+
+    def run_after(self):
+        return [TensorflowSubgraphPatterns]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        argv = graph.graph['cmd_params']
+        csc.offload_operations_to_tf(graph, argv.tensorflow_operation_patterns)
diff --git a/model-optimizer/extensions/front/tf/tensorflow_use_custom_operations_config.py b/model-optimizer/extensions/front/tf/tensorflow_use_custom_operations_config.py
new file mode 100644
index 000000000..8438657c9
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/tensorflow_use_custom_operations_config.py
@@ -0,0 +1,44 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.custom_replacement_registry import CustomReplacementRegistry
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.front.tf.replacement import FrontReplacementFromConfigFileOp
+from mo.graph.graph import Graph
+from mo.utils.class_registration import update_registration, get_enabled_and_disabled_transforms
+
+
+class TensorflowUseCustomOperationsConfig(FrontReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['cmd_params'].tensorflow_use_custom_operations_config is not None]
+
+    def run_before(self):
+        from extensions.front.pass_separator import FrontStart
+        return [FrontStart]
+
+    def run_after(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        argv = graph.graph['cmd_params']
+        registry = CustomReplacementRegistry()
+        registry.add_custom_replacement_description_from_config(argv.tensorflow_use_custom_operations_config)
+
+        # automatically generate sub-classes for custom replacements that replace sub-graph with a single node
+        for replacement_desc in registry.get_all_replacements_descriptions():
+            if replacement_desc.has('op'):
+                type('FrontReplacementFromConfigFileOp' + replacement_desc.op, (FrontReplacementFromConfigFileOp,),
+                     {'replacement_id': replacement_desc.id})
+        update_registration([FrontReplacementFromConfigFileOp], *get_enabled_and_disabled_transforms())
diff --git a/model-optimizer/extensions/front/tf/tile_ext.py b/model-optimizer/extensions/front/tf/tile_ext.py
index 7f8e861f2..1d745b556 100644
--- a/model-optimizer/extensions/front/tf/tile_ext.py
+++ b/model-optimizer/extensions/front/tf/tile_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/variable_ext.py b/model-optimizer/extensions/front/tf/variable_ext.py
index 7f4c270ac..20280004c 100644
--- a/model-optimizer/extensions/front/tf/variable_ext.py
+++ b/model-optimizer/extensions/front/tf/variable_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/front/tf/variables_values_freezing.py b/model-optimizer/extensions/front/tf/variables_values_freezing.py
new file mode 100644
index 000000000..c9be92ead
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/variables_values_freezing.py
@@ -0,0 +1,36 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.front.tf.loader import variables_to_constants
+from mo.graph.graph import Graph
+
+
+class VariablesToConstants(FrontReplacementPattern):
+    enabled = True
+    force_clean_up = True
+    graph_condition = [lambda graph: graph.graph['variables_values']]
+
+    def run_after(self):
+        from extensions.front.input_cut import InputCut
+        return [InputCut]
+
+    def run_before(self):
+        from extensions.front.freeze_placeholder_value import FreezePlaceholderValue
+        return [FreezePlaceholderValue]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        variables_to_constants(graph, graph.graph['variables_values'])
+        del graph.graph['variables_values']
diff --git a/model-optimizer/extensions/front/tf/yolo_v3_tiny.json b/model-optimizer/extensions/front/tf/yolo_v3_tiny.json
new file mode 100644
index 000000000..76f0a397f
--- /dev/null
+++ b/model-optimizer/extensions/front/tf/yolo_v3_tiny.json
@@ -0,0 +1,14 @@
+[
+  {
+    "id": "TFYOLOV3",
+    "match_kind": "general",
+    "custom_attributes": {
+      "classes": 80,
+      "anchors": [10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319],
+      "coords": 4,
+      "num": 6,
+      "mask": [0, 1, 2],
+      "entry_points": ["detector/yolo-v3-tiny/Reshape", "detector/yolo-v3-tiny/Reshape_4"]
+    }
+  }
+]
+\ No newline at end of file
diff --git a/model-optimizer/extensions/front/user_data_repack.py b/model-optimizer/extensions/front/user_data_repack.py
new file mode 100644
index 000000000..2e6b88b8f
--- /dev/null
+++ b/model-optimizer/extensions/front/user_data_repack.py
@@ -0,0 +1,42 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.replacement import FrontReplacementPattern
+from mo.front.extractor import user_data_repack
+from mo.graph.graph import Graph
+
+
+class UserDataRepack(FrontReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        argv = graph.graph['cmd_params']
+
+        packed_user_shapes, packed_outputs, freeze_placeholder = user_data_repack(
+            graph, argv.placeholder_shapes, argv.output, argv.freeze_placeholder_with_value)
+
+        graph.graph['user_shapes'] = packed_user_shapes
+        graph.graph['packed_outputs'] = packed_outputs
+        graph.graph['freeze_placeholder'] = freeze_placeholder
+
+        inputs = list(packed_user_shapes.keys()) \
+            if packed_user_shapes is not None and isinstance(packed_user_shapes, dict) else None
+        graph.graph['inputs'] = inputs  # save user defined inputs for other extensions
diff --git a/model-optimizer/extensions/middle/AddIsCyclicAttribute.py b/model-optimizer/extensions/middle/AddIsCyclicAttribute.py
index c2616ad4d..d70495da5 100644
--- a/model-optimizer/extensions/middle/AddIsCyclicAttribute.py
+++ b/model-optimizer/extensions/middle/AddIsCyclicAttribute.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,11 +15,22 @@
 """
 
 import networkx as nx
+
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 
 
 class AddIsCyclicAttribute(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.DeleteControlFlowEdges import DeleteControlFlowEdges
+        return [DeleteControlFlowEdges]
+
+    def run_before(self):
+        return []
+
     @staticmethod
-    def find_and_replace_pattern(graph: nx.MultiDiGraph):
+    def find_and_replace_pattern(graph: Graph):
         is_acyclic = nx.is_directed_acyclic_graph(graph)
-        graph.graph['is_cyclic'] = not is_acyclic
-\ No newline at end of file
+        graph.graph['is_cyclic'] = not is_acyclic
diff --git a/model-optimizer/extensions/middle/AddIsCyclicAttribute_test.py b/model-optimizer/extensions/middle/AddIsCyclicAttribute_test.py
index 81f4ba74a..ddc7b4cd3 100644
--- a/model-optimizer/extensions/middle/AddIsCyclicAttribute_test.py
+++ b/model-optimizer/extensions/middle/AddIsCyclicAttribute_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/AddMeanScaleValues.py b/model-optimizer/extensions/middle/AddMeanScaleValues.py
new file mode 100644
index 000000000..a72a9ad04
--- /dev/null
+++ b/model-optimizer/extensions/middle/AddMeanScaleValues.py
@@ -0,0 +1,122 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.graph.graph import Graph, Node
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.lin_op import Add, Mul
+from mo.ops.op import Op
+from mo.utils.error import Error
+from mo.utils.utils import refer_to_faq_msg
+
+
+class AddMeanScaleValues(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    @staticmethod
+    def apply_scale(graph: Graph, input_node: Node, node_mean_scale_values: dict):
+        if 'scale' in node_mean_scale_values and node_mean_scale_values['scale'] is not None:
+            if all([x == 1 for x in node_mean_scale_values['scale']]):
+                return
+            out_node = input_node.out_node()
+            if not input_node.has_valid('shape'):
+                raise Error("Node {} has not valid shape attribute".format(input_node.id))
+            input_shape = input_node.shape
+
+            # Create Mul node
+            value = 1 / np.array(node_mean_scale_values['scale'])
+            graph.remove_edge(input_node.id, out_node.id)
+
+            mul_node = Mul(graph, dict(name="Mul_"))
+            mul_data = Op.create_input_data_node(graph, "data_mul_", np.array(value))
+            Op.expand_node_shape(mul_data, (len(input_shape) - 2 if graph.graph['layout'] == 'NCHW' else 0))
+            mul_input = Op.create_data_node(graph, input_node, {'shape': out_node.shape})
+
+            mul_node.create_node_with_data(inputs=[mul_input, mul_data], data_nodes=out_node)
+
+    @staticmethod
+    def apply_mean_value(graph: Graph, input_node: Node, node_mean_scale_values: dict):
+        if 'mean' in node_mean_scale_values and node_mean_scale_values['mean'] is not None:
+            if all([x == 0 for x in node_mean_scale_values['mean']]):
+                return
+            out_node = input_node.out_node()
+            if not input_node.has_valid('shape'):
+                raise Error("Node {} has not valid shape attribute".format(input_node.id))
+            input_shape = input_node.shape
+            # Create Add node
+            graph.remove_edge(input_node.id, out_node.id)
+
+            value = np.array(node_mean_scale_values['mean']) * (-1)
+
+            add_node = Add(graph, dict(name="Add_"))
+            add_data = Op.create_input_data_node(graph, "data_add_", np.array(value))
+            Op.expand_node_shape(add_data, (len(input_shape) - 2 if graph.graph['layout'] == 'NCHW' else 0))
+            add_input = Op.create_data_node(graph, input_node, {'shape': out_node.shape})
+
+            add_node.create_node_with_data(inputs=[add_input, add_data], data_nodes=out_node)
+
+    def find_and_replace_pattern(self, graph: Graph):
+        input_nodes = {}
+        values = graph.graph['cmd_params'].mean_scale_values
+        for node in graph.nodes():
+            node = Node(graph, node)
+            if node.has_valid('op') and node.op == 'Placeholder':
+                input_nodes.update({node.id: node})
+
+        if not isinstance(values, dict):
+            if len(values) != len(input_nodes):
+                raise Error('Numbers of inputs and mean/scale values do not match. ' +
+                            refer_to_faq_msg(61))
+
+            data = np.copy(values)
+            values = {}
+            for idx, key in enumerate(input_nodes.keys()):
+                values.update(
+                    {
+                        input_nodes[key]['name']: {
+                            'mean': data[idx][0],
+                            'scale': data[idx][1]
+                        }
+                    }
+                )
+
+        for node_name in values:
+            node_id = graph.get_node_id_by_name(node_name)
+            node_mean_scale_values = values[node_name]
+            if node_id not in input_nodes:
+                # if the user cutted-off input of the network then input node name specified in the --scale_values
+                # or --mean_values doesn't correspond to a real input node generated by Model Optimizer. But
+                # the information about initial input node name is stored in Placeholder's attribute 'initial_node_name'
+                new_node_id = None
+                for placeholder in input_nodes.values():
+                    if placeholder.has('initial_node_name') and placeholder.initial_node_name == node_name:
+                        new_node_id = placeholder.id
+                        break
+                if new_node_id is None:
+                    raise Error('Input with name {} wasn\'t found!'.format(node_name) +
+                                refer_to_faq_msg(83))
+                node_id = new_node_id
+
+            input_node = Node(graph, node_id)
+            AddMeanScaleValues.apply_scale(graph, input_node, node_mean_scale_values)
+            AddMeanScaleValues.apply_mean_value(graph, input_node, node_mean_scale_values)
diff --git a/model-optimizer/extensions/middle/AddMeanScaleValues_test.py b/model-optimizer/extensions/middle/AddMeanScaleValues_test.py
new file mode 100644
index 000000000..0cfa31850
--- /dev/null
+++ b/model-optimizer/extensions/middle/AddMeanScaleValues_test.py
@@ -0,0 +1,252 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+from argparse import Namespace
+
+import numpy as np
+
+from extensions.middle.AddMeanScaleValues import AddMeanScaleValues
+from mo.graph.graph import Node
+from mo.utils.cli_parser import get_mean_scale_dictionary, parse_tuple_pairs
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes = {'node_1': {'type': 'Identity', 'value': None, 'kind': 'op'},
+                    'node_1_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    'node_2': {'type': 'Identity', 'value': None, 'kind': 'op'},
+                    'concat': {'type': 'Concat', 'value': None, 'kind': 'op'},
+                    'node_3': {'type': 'Identity', 'value': None, 'kind': 'op'},
+                    'node_3_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    # Placeholders
+                    'placeholder_1': {'shape': None, 'type': 'Input', 'kind': 'op', 'op': 'Placeholder'},
+                    'placeholder_1_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+                    'placeholder_2': {'shape': None, 'type': 'Input', 'kind': 'op', 'op': 'Placeholder'},
+                    'pl_1': {'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+                    'pl_1_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    'pl_2': {'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+                    'pl_2_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    'placeholder_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+                    # ScaleShift layer
+                    'scaleshift_1': {'type': 'ScaleShift', 'kind': 'op', 'op': 'ScaleShift'},
+                    'scaleshift_1_w': {'value': None, 'shape': None, 'kind': 'data'},
+                    'scaleshift_1_b': {'value': None, 'shape': None, 'kind': 'data'},
+                    'scaleshift_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+                    # Mul op
+                    'mul_1': {'type': None, 'kind': 'op', 'op': 'Mul'},
+                    'mul_1_w': {'value': None, 'shape': None, 'kind': 'data'},
+                    'mul_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput', 'infer': lambda x: None}
+                    }
+
+
+class AddMeanScaleValuesTest(unittest.TestCase):
+    def test_add_mean_scale_values_with_data_name(self):
+        graph = build_graph(nodes_attributes,
+                            [('node_1', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None, 'data_type': None},
+                             'node_1': {'shape': np.array([1, 3, 227, 227]), 'op': 'Placeholder', 'name': 'data',
+                                        'data_type': None}
+                             },
+                            nodes_with_edges_only=True)
+        graph.graph['layout'] = 'NCHW'
+        mean_values = parse_tuple_pairs('(124,117,104)')
+        scale_values = parse_tuple_pairs('')
+
+        # input = 'data'
+        mean_scale = get_mean_scale_dictionary(mean_values, scale_values, None)
+        argv = Namespace(mean_scale_values=mean_scale)
+        graph.graph['cmd_params'] = argv
+        self.assertEqual(len(graph), 3)
+        AddMeanScaleValues().find_and_replace_pattern(graph)
+        self.assertEqual(len(graph), 6)
+
+    def test_add_mean_scale_values_without_data_name(self):
+        graph = build_graph(nodes_attributes,
+                            [('node_1', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None, 'data_type': None},
+                             'node_1': {'shape': np.array([1, 3, 227, 227]), 'op': 'Placeholder', 'name': 'data',
+                                        'data_type': None}
+                             },
+                            nodes_with_edges_only=True)
+        graph.graph['layout'] = 'NCHW'
+        mean_values = parse_tuple_pairs('(124,117,104)')
+        scale_values = parse_tuple_pairs('')
+        # input = None
+        mean_scale = get_mean_scale_dictionary(mean_values, scale_values, None)
+        argv = Namespace(mean_scale_values=mean_scale)
+        graph.graph['cmd_params'] = argv
+        self.assertEqual(len(graph), 3)
+        AddMeanScaleValues().find_and_replace_pattern(graph)
+        self.assertEqual(len(graph), 6)
+
+    def test_add_mean_scale_values1(self):
+        graph = build_graph(nodes_attributes,
+                            [('pl_1', 'pl_1_data'), ('pl_2', 'pl_2_data')],
+                            {'pl_1_data': {'shape': np.array([1, 3, 38, 38]), 'infer': None},
+                             'pl_2_data': {'shape': np.array([1, 6]), 'infer': None},
+                             'pl_1': {'shape': np.array([1, 3, 38, 38])},
+                             'pl_2': {'shape': np.array([1, 6])},
+                             },
+                            nodes_with_edges_only=True)
+        graph.graph['layout'] = 'NCHW'
+        argv = Namespace(
+            mean_scale_values={'pl_1': {'mean': np.array([1., 2., 3.])}, 'pl_2': {'mean': np.array([0., 0., 0.])}})
+        graph.graph['cmd_params'] = argv
+        graph.graph['cmd_params'] = argv
+        AddMeanScaleValues().find_and_replace_pattern(graph)
+        mul_op_cnt = 0
+        add_op_cnt = 0
+        for node in graph.nodes():
+            node = Node(graph, node)
+            if node.has_valid('op') and node.op == 'Mul':
+                mul_op_cnt += 1
+            if node.has_valid('op') and node.op == 'Add':
+                add_op_cnt += 1
+
+        self.assertEqual(add_op_cnt, 1, "Found more than one Add op in graph")
+        self.assertEqual(mul_op_cnt, 0, "Found Mul op in graph")
+
+    def test_optimize_scale_and_add_mean_values(self):
+        graph = build_graph(
+            nodes_attributes,
+            [
+                ('pl_1', 'pl_1_data')
+            ],
+            {
+                'pl_1_data': {
+                    'shape': np.array([1, 3, 38, 38]),
+                    'infer': None
+                },
+                'pl_1': {
+                    'shape': np.array([1, 3, 38, 38])
+                }
+            },
+            nodes_with_edges_only=True
+        )
+        graph.graph['layout'] = 'NCHW'
+        argv = Namespace(mean_scale_values={'pl_1': {'scale': np.array([1.]), 'mean': np.array([1., 2., 3.])}})
+        graph.graph['cmd_params'] = argv
+        AddMeanScaleValues().find_and_replace_pattern(graph)
+        mul_op_cnt = 0
+        add_op_cnt = 0
+        for node in graph.nodes():
+            node = Node(graph, node)
+            if node.has_valid('op') and node.op == 'Mul':
+                mul_op_cnt += 1
+            if node.has_valid('op') and node.op == 'Add':
+                add_op_cnt += 1
+
+        self.assertEqual(add_op_cnt, 1, "Found more than one Add op in graph")
+        self.assertEqual(mul_op_cnt, 0, "Found Mul op in graph")
+
+    def test_optimize_mean_and_add_scale_values(self):
+        graph = build_graph(
+            nodes_attributes,
+            [
+                ('pl_1', 'pl_1_data')
+            ],
+            {
+                'pl_1_data': {
+                    'shape': np.array([1, 3, 38, 38]),
+                    'infer': None
+                },
+                'pl_1': {
+                    'shape': np.array([1, 3, 38, 38])
+                }
+            },
+            nodes_with_edges_only=True
+        )
+        graph.graph['layout'] = 'NCHW'
+        argv = Namespace(mean_scale_values={'pl_1': {'scale': np.array([1.43]), 'mean': np.array([0., 0., 0.])}})
+        graph.graph['cmd_params'] = argv
+        AddMeanScaleValues().find_and_replace_pattern(graph)
+        mul_op_cnt = 0
+        add_op_cnt = 0
+        for node in graph.nodes():
+            node = Node(graph, node)
+            if node.has_valid('op') and node.op == 'Mul':
+                mul_op_cnt += 1
+            if node.has_valid('op') and node.op == 'Add':
+                add_op_cnt += 1
+
+        self.assertEqual(add_op_cnt, 0, "Found more than one Add op in graph")
+        self.assertEqual(mul_op_cnt, 1, "Found Mul op in graph")
+
+    def test_add_mean_scale_values3(self):
+        graph = build_graph(nodes_attributes,
+                            [('pl_1', 'pl_1_data')],
+                            {'pl_1_data': {'shape': np.array([1, 3, 38, 38]), 'infer': None},
+                             'pl_1': {'shape': np.array([1, 3, 38, 38])},
+                             },
+                            nodes_with_edges_only=True)
+        graph.graph['layout'] = 'NCHW'
+        argv = Namespace(mean_scale_values=[[np.array([1., 2., 3.]), np.array([1., 2., 3.])]])
+        graph.graph['cmd_params'] = argv
+        AddMeanScaleValues().find_and_replace_pattern(graph)
+
+        mul_op_cnt = 0
+        add_op_cnt = 0
+        for node in graph.nodes():
+            node = Node(graph, node)
+            if node.has_valid('op') and node.op == 'Mul':
+                mul_op_cnt += 1
+            if node.has_valid('op') and node.op == 'Add':
+                add_op_cnt += 1
+
+        self.assertEqual(add_op_cnt, 1, "Found more than one Add op in graph")
+        self.assertEqual(mul_op_cnt, 1, "Found more than one Nul op in graph")
+
+    def test_add_mean_scale_values_cut_graph(self):
+        """
+        Test case when user cutted start of the network and specified mean/scale value to the new input node 'node_3'.
+        """
+        graph = build_graph(nodes_attributes,
+                            [('pl_1', 'pl_1_data'),
+                             ('pl_2', 'pl_2_data'),
+                             ('pl_2_data', 'node_3'),
+                             ('node_3', 'node_3_data'),
+                             ('pl_1_data', 'node_1'),
+                             ('node_3_data', 'node_1'),
+                             ],
+                            {'pl_1_data': {'shape': np.array([1, 3, 38, 38]), 'infer': None},
+                             'pl_2_data': {'shape': np.array([1, 3, 38, 38]), 'infer': None},
+                             'pl_2': {'initial_node_name': 'node_3', 'shape': np.array([1, 3, 38, 38])},
+                             'pl_1': {'shape': np.array([1, 3, 38, 38])},
+                             },
+                            nodes_with_edges_only=True)
+        graph.graph['layout'] = 'NCHW'
+        argv = Namespace(
+            mean_scale_values={'pl_1': {'mean': np.array([1, 2, 3])}, 'node_3': {'scale': np.array([1, 2, 3])}})
+        graph.graph['cmd_params'] = argv
+        AddMeanScaleValues().find_and_replace_pattern(graph)
+
+        mul_op_cnt = 0
+        add_op_cnt = 0
+        for node in graph.nodes():
+            node = Node(graph, node)
+            if node.has_valid('op') and node.op == 'Mul':
+                mul_op_cnt += 1
+            if node.has_valid('op') and node.op == 'Add':
+                add_op_cnt += 1
+
+        self.assertEqual(add_op_cnt, 1, "There should be exactly one Add op")
+        self.assertEqual(mul_op_cnt, 1, "There should be exactly one Mul op")
+        self.assertEqual(Node(graph, 'pl_2').out_node().out_node().op, 'Mul', "The Mul op should be added after pl_2")
+        self.assertEqual(Node(graph, 'pl_1').out_node().out_node().op, 'Add', "The Add op should be added after pl_1")
diff --git a/model-optimizer/extensions/middle/AddQuantizeFuse.py b/model-optimizer/extensions/middle/AddQuantizeFuse.py
new file mode 100644
index 000000000..7dcbc3e61
--- /dev/null
+++ b/model-optimizer/extensions/middle/AddQuantizeFuse.py
@@ -0,0 +1,80 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+from typing import Dict
+
+from mo.graph.graph import Graph, Node
+from mo.middle.passes.conv import get_tensor_in_port, get_value_in_port
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class AddQuantizeFuse(MiddleReplacementPattern):
+    """ Fuses Add --> Quantize sequence if possible
+    """
+    enabled = False
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        return []
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('preop', dict(op='Add')),
+                ('preoped', dict()),
+                ('quantize', dict(op='Quantize')),
+            ],
+            edges=[
+                ('preop', 'preoped'),
+                ('preoped', 'quantize', {'in': 0}),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: Dict[str, Node]):
+
+        quantize = match['quantize']
+        preop = match['preop']
+
+        # Check for total number of Add consumers -- if something else consume its output it cannot be fused
+        if len(preop.out_node().out_nodes()) > 1:
+            log.debug('AddQuantizeFuse: cannot fuse because Add have Addtiple consumers')
+            return
+
+        # If the fusion is applicable, direct modifications to quantize 1-st and 2-nd inputs
+        # are performed. So the data nodes at those inputs shouldn't have more than 1 consumer
+        # maximum 2 consumers to the same quantize op (consumed by 1st and 2nd ports).
+        # TODO: relax this limitation and duplicate data nodes accordingly to modify the input range freely
+
+        # Provisional limitation that related to binary quantization
+        # TODO: Relax it beyond binarization case
+        if len(quantize.in_node(1).out_nodes()) != 1 or \
+                len(quantize.in_node(2).out_nodes()) != 1 or \
+                len(quantize.in_node(3).out_nodes()) != 1 or len(quantize.in_node(4).out_nodes()) != 1 or \
+                quantize.levels != 2:
+            log.debug('AddQuantizeFuse: cannot fuse because Quantize op has '
+                      'unexpected number of consumers for ports 1, 2, 3 or 4')
+            return
+
+        tensor_port, value_port = get_tensor_in_port(preop), get_value_in_port(preop)
+
+        quantize.in_port(1).data.set_value(quantize.in_port(1).data.get_value() - value_port.data.get_value())
+        quantize.in_port(2).data.set_value(quantize.in_port(2).data.get_value() - value_port.data.get_value())
+        quantize.in_port(0).disconnect()
+        tensor_port.get_connection().set_destination(quantize.in_port(0))
diff --git a/model-optimizer/extensions/middle/AddReshapeAfterStridedSlice.py b/model-optimizer/extensions/middle/AddReshapeAfterStridedSlice.py
deleted file mode 100644
index 2ed08ff45..000000000
--- a/model-optimizer/extensions/middle/AddReshapeAfterStridedSlice.py
+++ /dev/null
@@ -1,124 +0,0 @@
-"""
- Copyright (c) 2018 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import logging as log
-import networkx as nx
-import numpy as np
-
-from copy import deepcopy
-from extensions.middle.UselessStridedSlice import UselessStridedSliceEraser
-
-from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
-from mo.ops.reshape import Reshape
-
-
-class AddReshapeAfterStridedSlice(MiddleReplacementPattern):
-    """
-      Transform adds Reshape after StridedSlice layers if new_axis_mask or/and
-      shrink_axis_mask contains True. After this transform StridedSlice layer 
-      does not change shape dims and new_axis_mask/shrink_axis_mask fulfilled by 
-      False
-    """
-    enabled = True
-
-    # Run before passes that will convert/remove StridedSlice
-    def run_before(self):
-        return [UselessStridedSliceEraser]
-
-    def pattern(self):
-        return dict(nodes=[('strided_slice', dict(kind='op', op='StridedSlice'))],
-                    edges=[])
-
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
-        # add Reshape for shrink_axis_mask
-        if True in match['strided_slice']['shrink_axis_mask']:
-            log.info("StridedSlice op with shrink mask '{}' has been detected".format(match['strided_slice'].id))
-            node = match['strided_slice']
-
-            if len(node.in_nodes()) != 4 or len(node.out_nodes()) != 1:
-                return
-
-            shape_in = node.in_node().shape
-            shape_out = node.out_node().shape
-            dim = shape_out.copy()
-            ss_shape = []
-            k = 0
-
-            # Don't permute reshape if channels were squeezed
-            dont_permute = False
-            if graph.graph['layout'] == 'NHWC' and node['shrink_axis_mask'][-1] == True:
-                dont_permute = True
-
-            for i in range(0, len(node['shrink_axis_mask'])):
-                if not node['shrink_axis_mask'][i]:
-                    ss_shape.append(shape_out[k])
-                    k = k + 1
-                else:
-                    node['shrink_axis_mask'][i] = False
-                    ss_shape.append(1)
-
-            out_node = node.out_node(0)
-
-            # insert data node for StridedSlice
-            data_node = Op._create_data_node(graph, node.name + "/Reshape_shrink_data", {'shape': ss_shape})
-            attrs = deepcopy(graph.get_edge_data(node.id, out_node.id)[0])
-            graph.remove_edge(node.id, out_node.id)
-            graph.add_edge(node.id, data_node.id, **attrs)
-
-            # insert Reshape
-            if dont_permute:
-                reshape = Reshape(graph, dict(name=node.name + "/Reshape_shrink",
-                                              dim=np.array(dim, dtype=np.int64), nchw_layout=True))
-                reshape_data_node = reshape.create_node_with_data([data_node], reshape.attrs,
-                                                                  data_nodes=[out_node])
-                reshape_data_node['nchw_layout'] = True
-            else:
-                reshape = Reshape(graph, dict(name=node.name + "/Reshape_shrink",
-                                              dim=np.array(dim, dtype=np.int64)))
-                reshape_data_node = reshape.create_node_with_data([data_node], reshape.attrs,
-                                                                  data_nodes=[out_node])
-
-        # add Reshape for new_axis_mask
-        if True in match['strided_slice']['new_axis_mask']:
-            log.info("StridedSlice op with new axis mask '{}' has been detected".format(match['strided_slice'].id))
-            node = match['strided_slice']
-
-            if len(node.in_nodes()) != 4 or len(node.out_nodes()) != 1:
-                return
-
-            shape_in = node.in_node().shape
-            shape_out = node.out_node().shape
-            dim = shape_out.copy()
-            ss_shape = []
-            for i in range(0, len(node['new_axis_mask'])):
-                if not node['new_axis_mask'][i]:
-                    ss_shape.append(shape_out[i])
-                else:
-                    node['new_axis_mask'][i] = False
-
-            out_node = node.out_node(0)
-            # insert data node for StridedSlice
-            data_node = Op._create_data_node(graph, node.name + "/Reshape_new_data", {'shape': ss_shape})
-            attrs = deepcopy(graph.get_edge_data(node.id, out_node.id)[0])
-            graph.remove_edge(node.id, out_node.id)
-            graph.add_edge(node.id, data_node.id, **attrs)
-
-            # insert Reshape
-            reshape = Reshape(graph, dict(name=node.name + "/Reshape_new",
-                                          dim=np.array(dim, dtype=np.int64)))
-            reshape_data_node = reshape.create_node_with_data([data_node], reshape.attrs,
-                                                              data_nodes=[out_node])
diff --git a/model-optimizer/extensions/middle/AddReshapeAfterStridedSlice_test.py b/model-optimizer/extensions/middle/AddReshapeAfterStridedSlice_test.py
deleted file mode 100644
index a834d996b..000000000
--- a/model-optimizer/extensions/middle/AddReshapeAfterStridedSlice_test.py
+++ /dev/null
@@ -1,312 +0,0 @@
-"""
- Copyright (c) 2018 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import numpy as np
-import unittest
-
-from extensions.middle.AddReshapeAfterStridedSlice import AddReshapeAfterStridedSlice
-from mo.graph.graph import Node
-from mo.middle.passes.fusing.fuse_linear_ops_test import compare_graphs
-from mo.middle.passes.eliminate_test import build_graph
-
-# The dictionary with nodes attributes used to build various graphs. A key is the name of the node and the value is the
-# dictionary with node attributes.
-nodes_attributes_test = {
-    'placeholder_1': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
-    'placeholder_1_data': {'shape': None, 'kind': 'data', 'data_type': None},
-    'placeholder_2': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
-    'placeholder_2_data': {'shape': None, 'kind': 'data', 'data_type': None},
-    'placeholder_begin_data': {'shape': None, 'kind': 'data', 'data_type': None},
-    'placeholder_end_data': {'shape': None, 'kind': 'data', 'data_type': None},
-    'placeholder_stride_data': {'shape': None, 'kind': 'data', 'data_type': None},
-    # StridedSlice layers
-    'sslice_1': {'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'slices': None,
-                 'shrink_axis_mask': np.array([False, False, True, False]),
-                 'new_axis_mask': np.array([False, False, False, False])},
-    'sslice_1_data': {'shape': None, 'kind': 'data'},
-    'sslice_2': {'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'slices': None,
-                 'shrink_axis_mask': np.array([False, False, True, False]),
-                 'new_axis_mask': np.array([False, False, False, False])},
-    'sslice_2_data': {'shape': None, 'kind': 'data'}}
-
-nodes_reshape = {
-    'placeholder_1': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
-    'placeholder_1_data': {'shape': None, 'kind': 'data', 'data_type': None},
-    'placeholder_2': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
-    'placeholder_2_data': {'shape': None, 'kind': 'data', 'data_type': None},
-    'placeholder_begin_data': {'shape': None, 'kind': 'data', 'data_type': None},
-    'placeholder_end_data': {'shape': None, 'kind': 'data', 'data_type': None},
-    'placeholder_stride_data': {'shape': None, 'kind': 'data', 'data_type': None},
-    # StridedSlice layers
-    'sslice_1': {'type': 'StridedSlice', 'value': None, 'kind': 'op', 'op': 'StridedSlice', 'slices': None,
-                 'shrink_axis_mask': np.array([False, False, True, False]),
-                 'new_axis_mask': np.array([False, False, False, False])},
-    'sslice_1_data': {'value': None, 'shape': None, 'kind': 'data'},
-    'sslice_2': {'type': 'StridedSlice', 'value': None, 'kind': 'op', 'op': 'StridedSlice', 'slices': None,
-                 'shrink_axis_mask': np.array([False, False, True, False]),
-                 'new_axis_mask': np.array([False, False, False, False])},
-    'sslice_2_data': {'value': None, 'shape': None, 'kind': 'data'},
-    # Reshape layer
-    'sslice_1/Reshape_shrink': {'type': 'Reshape', 'value': None, 'kind': 'op', 'op': 'Reshape'},
-    'sslice_1/Reshape_shrink_data': {'value': None, 'shape': None, 'kind': 'data'},
-    'sslice_2/Reshape_shrink': {'type': 'Reshape', 'value': None, 'kind': 'op', 'op': 'Reshape'},
-    'sslice_2/Reshape_shrink_data': {'value': None, 'shape': None, 'kind': 'data'},
-    'sslice_2/Reshape_new': {'type': 'Reshape', 'value': None, 'kind': 'op', 'op': 'Reshape'},
-    'sslice_2/Reshape_new_data': {'value': None, 'shape': None, 'kind': 'data'},
-}
-
-
-class AddReshapeAfterStridedSliceTests(unittest.TestCase):
-    def test_ss_1_shrink_last(self):
-        graph = build_graph(nodes_attributes_test,
-                            [('placeholder_1', 'placeholder_1_data'),
-                             ('placeholder_1_data', 'sslice_1'),
-                             ('placeholder_begin_data', 'sslice_1'),
-                             ('placeholder_end_data', 'sslice_1'),
-                             ('placeholder_stride_data', 'sslice_1'),
-                             ('sslice_1', 'sslice_1_data')],
-                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
-                             'sslice_1': {'slices': np.array(
-                                 [slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1), slice(0, 54, 1)])},
-                             'sslice_1_data': {'shape': np.array([1, 227, 54]), 'is_output': True},
-                             })
-        graph.graph['layout'] = 'NHWC'
-
-        graph_ref = build_graph(nodes_reshape,
-                                [('placeholder_1', 'placeholder_1_data'),
-                                 ('placeholder_1_data', 'sslice_1'),
-                                 ('placeholder_begin_data', 'sslice_1'),
-                                 ('placeholder_end_data', 'sslice_1'),
-                                 ('placeholder_stride_data', 'sslice_1'),
-                                 ('sslice_1', 'sslice_1/Reshape_shrink_data'),
-                                 ('sslice_1/Reshape_shrink_data', 'sslice_1/Reshape_shrink'),
-                                 ('sslice_1/Reshape_shrink', 'sslice_1_data')],
-                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
-                                 'sslice_1': {'slices': np.array(
-                                     [slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1), slice(0, 54, 1)]),
-                                     'shrink_axis_mask': np.array([False, False, False, False]),
-                                     'new_axis_mask': np.array([False, False, False, False])},
-                                 'sslice_1_data': {'shape': np.array([1, 227, 54]), 'is_output': True},
-                                 'sslice_1/Reshape_shrink': {'dim': np.array([1, 227, 54])},
-                                 'sslice_1/Reshape_shrink_data': {'shape': np.array([1, 227, 1, 54])}
-                                 })
-
-        pattern = AddReshapeAfterStridedSlice()
-        pattern.find_and_replace_pattern(graph)
-
-        (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_1_data', check_op_attrs=True)
-        graph.clear()
-        graph_ref.clear()
-        self.assertTrue(flag, resp)
-
-    def test_ss_1_shrink(self):
-        graph = build_graph(nodes_attributes_test,
-                            [('placeholder_1', 'placeholder_1_data'),
-                             ('placeholder_1_data', 'sslice_2'),
-                             ('placeholder_begin_data', 'sslice_2'),
-                             ('placeholder_end_data', 'sslice_2'),
-                             ('placeholder_stride_data', 'sslice_2'),
-                             ('sslice_2', 'sslice_2_data'),
-                             ('sslice_2_data', 'placeholder_2'),
-                             ('placeholder_2', 'placeholder_2_data'), ],
-                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
-                             'sslice_2': {'slices': np.array(
-                                 [slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1), slice(0, 54, 1)]), },
-                             'sslice_2_data': {'shape': np.array([1, 227, 54]), 'is_output': True}
-                             })
-        graph.graph['layout'] = 'NHWC'
-
-        graph_ref = build_graph(nodes_reshape,
-                                [('placeholder_1', 'placeholder_1_data'),
-                                 ('placeholder_1_data', 'sslice_2'),
-                                 ('placeholder_begin_data', 'sslice_2'),
-                                 ('placeholder_end_data', 'sslice_2'),
-                                 ('placeholder_stride_data', 'sslice_2'),
-                                 ('sslice_2', 'sslice_2/Reshape_shrink_data'),
-                                 ('sslice_2/Reshape_shrink_data', 'sslice_2/Reshape_shrink'),
-                                 ('sslice_2/Reshape_shrink', 'sslice_2_data'),
-                                 ('sslice_2_data', 'placeholder_2'),
-                                 ('placeholder_2', 'placeholder_2_data')],
-                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
-                                 'sslice_2': {'slices': np.array(
-                                     [slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1), slice(0, 54, 1)]),
-                                     'shrink_axis_mask': np.array([False, False, False, False]),
-                                     'new_axis_mask': np.array([False, False, False, False])},
-                                 'sslice_2_data': {'shape': np.array([1, 227, 54])},
-                                 'sslice_2/Reshape_shrink': {'dim': np.array([1, 227, 54])},
-                                 'sslice_2/Reshape_shrink_data': {'shape': np.array([1, 227, 1, 54])},
-                                 })
-
-        pattern = AddReshapeAfterStridedSlice()
-        pattern.find_and_replace_pattern(graph)
-
-        (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_2_data', check_op_attrs=True)
-        graph.clear()
-        graph_ref.clear()
-        self.assertTrue(flag, resp)
-
-    def test_ss_2_shrink(self):
-        graph = build_graph(nodes_attributes_test,
-                            [('placeholder_1', 'placeholder_1_data'),
-                             ('placeholder_1_data', 'sslice_2'),
-                             ('placeholder_begin_data', 'sslice_2'),
-                             ('placeholder_end_data', 'sslice_2'),
-                             ('placeholder_stride_data', 'sslice_2'),
-                             ('sslice_2', 'sslice_2_data'),
-                             ('sslice_2_data', 'placeholder_2'),
-                             ('placeholder_2', 'placeholder_2_data'), ],
-                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
-                             'sslice_2': {
-                                 'slices': np.array([slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1)]),
-                                 'shrink_axis_mask': np.array([False, True, False, True])},
-                             'sslice_2_data': {'shape': np.array([1, 227]), 'is_output': True}
-                             })
-        graph.graph['layout'] = 'NHWC'
-
-        graph_ref = build_graph(nodes_reshape,
-                                [('placeholder_1', 'placeholder_1_data'),
-                                 ('placeholder_1_data', 'sslice_2'),
-                                 ('placeholder_begin_data', 'sslice_2'),
-                                 ('placeholder_end_data', 'sslice_2'),
-                                 ('placeholder_stride_data', 'sslice_2'),
-                                 ('sslice_2', 'sslice_2/Reshape_shrink_data'),
-                                 ('sslice_2/Reshape_shrink_data', 'sslice_2/Reshape_shrink'),
-                                 ('sslice_2/Reshape_shrink', 'sslice_2_data'),
-                                 ('sslice_2_data', 'placeholder_2'),
-                                 ('placeholder_2', 'placeholder_2_data')],
-                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
-                                 'sslice_2': {'slices': np.array(
-                                     [slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1)]),
-                                     'shrink_axis_mask': np.array([False, False, False, False]),
-                                     'new_axis_mask': np.array([False, False, False, False])},
-                                 'sslice_2_data': {'shape': np.array([1, 227])},
-                                 'sslice_2/Reshape_shrink': {'dim': np.array([1, 227])},
-                                 'sslice_2/Reshape_shrink_data': {'shape': np.array([1, 1, 227, 1])},
-                                 })
-
-        pattern = AddReshapeAfterStridedSlice()
-        pattern.find_and_replace_pattern(graph)
-
-        (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_2_data', check_op_attrs=True)
-        graph.clear()
-        graph_ref.clear()
-        self.assertTrue(flag, resp)
-
-    def test_ss_1_new(self):
-        graph = build_graph(nodes_attributes_test,
-                            [('placeholder_1', 'placeholder_1_data'),
-                             ('placeholder_1_data', 'sslice_2'),
-                             ('placeholder_begin_data', 'sslice_2'),
-                             ('placeholder_end_data', 'sslice_2'),
-                             ('placeholder_stride_data', 'sslice_2'),
-                             ('sslice_2', 'sslice_2_data'),
-                             ('sslice_2_data', 'placeholder_2'),
-                             ('placeholder_2', 'placeholder_2_data'), ],
-                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
-                             'sslice_2': {'slices': np.array(
-                                 [slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 54, 1)]),
-                                 'shrink_axis_mask': np.array([False, False, False, False, False]),
-                                 'new_axis_mask': np.array([False, True, False, False, False])},
-                             'sslice_2_data': {'shape': np.array([1, 1, 227, 227, 54])}
-                             })
-        graph.graph['layout'] = 'NHWC'
-
-        graph_ref = build_graph(nodes_reshape,
-                                [('placeholder_1', 'placeholder_1_data'),
-                                 ('placeholder_1_data', 'sslice_2'),
-                                 ('placeholder_begin_data', 'sslice_2'),
-                                 ('placeholder_end_data', 'sslice_2'),
-                                 ('placeholder_stride_data', 'sslice_2'),
-                                 ('sslice_2', 'sslice_2/Reshape_new_data'),
-                                 ('sslice_2/Reshape_new_data', 'sslice_2/Reshape_new'),
-                                 ('sslice_2/Reshape_new', 'sslice_2_data'),
-                                 ('sslice_2_data', 'placeholder_2'),
-                                 ('placeholder_2', 'placeholder_2_data')],
-                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
-                                 'sslice_2': {'slices': np.array(
-                                     [slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1),
-                                      slice(0, 54, 1)]),
-                                     'shrink_axis_mask': np.array([False, False, False, False, False]),
-                                     'new_axis_mask': np.array([False, False, False, False, False])},
-                                 'sslice_2_data': {'shape': np.array([1, 1, 227, 227, 54])},
-                                 'sslice_2/Reshape_new': {'dim': np.array([1, 1, 227, 227, 54])},
-                                 'sslice_2/Reshape_new_data': {'shape': np.array([1, 227, 227, 54])},
-                                 })
-
-        pattern = AddReshapeAfterStridedSlice()
-        pattern.find_and_replace_pattern(graph)
-
-        (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_2_data', check_op_attrs=True)
-        graph.clear()
-        graph_ref.clear()
-        self.assertTrue(flag, resp)
-
-    def test_ss_shrink_new(self):
-        graph = build_graph(nodes_attributes_test,
-                            [('placeholder_1', 'placeholder_1_data'),
-                             ('placeholder_1_data', 'sslice_2'),
-                             ('placeholder_begin_data', 'sslice_2'),
-                             ('placeholder_end_data', 'sslice_2'),
-                             ('placeholder_stride_data', 'sslice_2'),
-                             ('sslice_2', 'sslice_2_data'),
-                             ('sslice_2_data', 'placeholder_2'),
-                             ('placeholder_2', 'placeholder_2_data'), ],
-                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
-                             'sslice_2': {'slices': np.array(
-                                 [slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1), slice(0, 54, 1)]),
-                                 'shrink_axis_mask': np.array([False, False, False, True, False]),
-                                 'new_axis_mask': np.array([False, True, False, False, False])},
-                             'sslice_2_data': {'shape': np.array([1, 1, 227, 54]), 'is_output': True}
-                             })
-        graph.graph['layout'] = 'NHWC'
-
-        graph_ref = build_graph(nodes_reshape,
-                                [('placeholder_1', 'placeholder_1_data'),
-                                 ('placeholder_1_data', 'sslice_2'),
-                                 ('placeholder_begin_data', 'sslice_2'),
-                                 ('placeholder_end_data', 'sslice_2'),
-                                 ('placeholder_stride_data', 'sslice_2'),
-                                 ('sslice_2', 'sslice_2/Reshape_new_data'),
-                                 ('sslice_2/Reshape_new_data', 'sslice_2/Reshape_new'),
-                                 ('sslice_2/Reshape_new', 'sslice_2/Reshape_shrink_data'),
-                                 ('sslice_2/Reshape_shrink_data', 'sslice_2/Reshape_shrink'),
-                                 ('sslice_2/Reshape_shrink', 'sslice_2_data'),
-                                 ('sslice_2_data', 'placeholder_2'),
-                                 ('placeholder_2', 'placeholder_2_data')],
-                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
-                                 'sslice_2': {'slices': np.array(
-                                     [slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1),
-                                      slice(0, 54, 1)]),
-                                     'shrink_axis_mask': np.array([False, False, False, False, False]),
-                                     'new_axis_mask': np.array([False, False, False, False, False])},
-                                 'sslice_2_data': {'shape': np.array([1, 1, 227, 54])},
-                                 'sslice_2/Reshape_new': {'dim': np.array([1, 1, 227, 1, 54])},
-                                 'sslice_2/Reshape_new_data': {'shape': np.array([1, 227, 1, 54])},
-                                 'sslice_2/Reshape_shrink': {'dim': np.array([1, 1, 227, 54])},
-                                 'sslice_2/Reshape_shrink_data': {'shape': np.array([1, 1, 227, 1, 54])},
-                                 })
-
-        pattern = AddReshapeAfterStridedSlice()
-        pattern.find_and_replace_pattern(graph)
-
-        (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_2_data', check_op_attrs=True)
-        graph.clear()
-        graph_ref.clear()
-        self.assertTrue(flag, resp)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/model-optimizer/extensions/middle/BinarizeWeightsM1P1.py b/model-optimizer/extensions/middle/BinarizeWeightsM1P1.py
new file mode 100644
index 000000000..670029036
--- /dev/null
+++ b/model-optimizer/extensions/middle/BinarizeWeightsM1P1.py
@@ -0,0 +1,154 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+import numpy as np
+
+from extensions.middle.CheckForCycle import CheckForCycle
+from extensions.middle.DeleteControlFlowEdges import DeleteControlFlowEdges
+from extensions.middle.DeleteNotExecutable import DeleteNotExecutable
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.lin_op import Mul
+from mo.ops.power import Power
+
+
+class BinarizeWeightsM1P1(MiddleReplacementPattern):
+    """ Convert weights to -1/+1 form
+
+        Applicable for convolutions and other operations that have 'weights' that combined with the input data
+        by mean of multiplication operation. So any linear operator suits. Detect such operations by
+        multiplication_transparent attribute -- if it is presents and set to True, then multiplication term
+        can be passed through the operation. If multiplication_transparent attribute is set to True for an operation,
+        such operation should also has multiplication_transparent_ports that contain a list of pairs with
+        port indices (in_port, out_port) that defines which port pairs can pass multiplication through.
+
+        For example for some convolutional operation which has 2 ports (input tensor and weights) and 1 output port
+        this list includes [(0,0)(1,0)]. If convolutional operation also has biases at port 2, it is not included into
+        this list because this port is not transparent for multiplication operation.
+
+        multiplication_transparent_ports can be None if all possible input/output pairs are multiplication
+        transparent.
+
+        #TODO Describe how to apply multiplication at output ports -- this is not specified. In the current definition
+        we can pass through only scalar multiplication, but we already requre passing it channel-wise.
+    """
+    enabled = True
+
+    def run_after(self):
+        return [DeleteControlFlowEdges]
+
+    def run_before(self):
+        # CheckForCycle and DeleteNotExecutable run graph clean up which should not be run before weights binarization
+        return [CheckForCycle, DeleteNotExecutable]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('quantize', dict(kind='op', op='Quantize')),
+                ('quantized', dict()),
+                ('operator', dict(kind='op', multiplication_transparent=True)),
+            ],
+            edges=[
+                ('quantize', 'quantized'),
+                ('quantized', 'operator'),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        assert match['operator'].has('multiplication_transparent_ports')
+
+        port = match['operator'].input_ports_with(match['quantized'])
+        assert len(port) >= 1
+        if len(port) > 1:
+            log.debug('BinarizeWeightsM1P1 cannot apply transformation for data {} because it consumed more'
+                      ' than once'.format(match['quantized'].name))
+            return
+
+        assert len(port) == 1
+        port = port[0]
+        applicable = [pair for pair in match['operator'].multiplication_transparent_ports if pair[0] == port]
+        if len(applicable) == 0:
+            return
+
+        # Look at 3-rd and 4-th inputs of Quantize -- they have constants that should be passed through.
+        # Assume that the constant that should be passed through is a scalar.
+        quantize = match['quantize']
+        output_low = quantize.in_node(3)
+        output_high = quantize.in_node(4)
+
+        if not output_low.has_valid('value') and not output_high.has_valid('value'):
+            return
+
+        output_low = output_low.value
+        output_high = output_high.value
+
+        # This pass is applicable for binarization only. Other intX variants are not relevant.
+        if quantize.levels != 2:
+            return
+
+        # Recognize two cases: 0/+1 and -1/+1.
+        zp1 = np.all(output_low == 0) or np.all(output_high == 0)
+        m1p1 = np.all(-output_low == output_high)
+        if (not zp1 and not m1p1) or (zp1 and m1p1):
+            log.debug('BinarizeWeightsM1P1 cannot apply transformation for data {} because it does\'t has one of'
+                      ' 0/+1 or -1/+1 forms.'.format(match['quantized'].name))
+            return
+
+        # Recognize scalar
+        if len(np.unique(output_low)) != 1 or len(np.unique(output_high)) != 1:
+            log.debug('BinarizeWeightsM1P1 cannot apply transformation for data {} because output_low or output_high '
+                      'cannot be interpreted as scalars.'.format(match['quantized'].name))
+            return
+
+        # TODO: Extract real scalar from 3rd and 4th inputs; reusing original tensors is dangerous because
+        #       it may have incompatible shape.
+
+        mult_term = quantize.in_node(3) if np.all(output_high == 0) else quantize.in_node(4)
+
+        # Patch inflow path (by diving by mult_term)
+        # Put a new Power/Mul combination here:
+        #       ---->---- (here)---> data ---> [3rd/4th ports]quantize ---> quantized ---> operator
+
+        if len(match['quantized'].out_nodes()) > 1:
+            log.debug('BinarizeWeightsM1P1: len(match[\'quantized\'].out_nodes()) > 1')
+            return
+        div_op = Power(graph, {'name': quantize.name + '/DivNormalize', 'power': -1.0})
+        div_output = div_op.create_node_with_data([mult_term])
+
+        for i in [3, 4]:
+            match['quantize'].insert_node_with_data_before(
+                match['quantize'].in_node(i),
+                Mul,
+                dict(name=quantize.name + '/MulNormalize'),
+                additional_inputs=[div_output],
+            )
+
+        match['quantized'].value = None  # reset value because it will be recomputed
+        match['quantize'].infer(match['quantize'])
+
+        # Put a complimentary new Mul node here:   operator -->---(here)-----> operator.out_node()
+
+        match['operator'].insert_node_with_data_after(
+            match['operator'].out_node(),
+            Mul,
+            dict(name=match['operator'].name + '/MulNormalize'),
+            [mult_term],
+        )
+
+        # Disable 'operator' fusion with linear ops, otherwise it will annihilate changes that we just made
+        match['operator']['can_be_fused'] = False
diff --git a/model-optimizer/extensions/middle/BlockLSTMtoLSTMSequence.py b/model-optimizer/extensions/middle/BlockLSTMtoLSTMSequence.py
index 98354428f..aa4bdf69f 100644
--- a/model-optimizer/extensions/middle/BlockLSTMtoLSTMSequence.py
+++ b/model-optimizer/extensions/middle/BlockLSTMtoLSTMSequence.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,20 +13,17 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-import networkx as nx
 import numpy as np
 
-from extensions.middle.FusePermutesSequence import FusePermutesSequence
-from extensions.middle.lstm_sequence_normalize import LSTMSequenceNormalize
-from extensions.middle.lstm_sequence_tensor_iterator import LSTMSequenceTensorIterator
+from extensions.ops.LSTM import LSTM
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.utils.error import Error
 
 
 class BlockLSTMtoLSTMSequence(MiddleReplacementPattern):
     """
-    MO virtual operation LSTMSequence that converts to IE TensorIterator with LSTMCell inside supports 3 outputs:
+    MO virtual operation RNNSequence that converts to IE TensorIterator with LSTMCell inside supports 3 outputs:
     0: concatenated hidden states over the whole time sequence,
     1: last hidden state,
     2: last cell state.
@@ -37,13 +34,21 @@ class BlockLSTMtoLSTMSequence(MiddleReplacementPattern):
     2. Searches for sub-graph, that takes last cell state out of unsupported concatenated cell state output.
     We cut this sub-graph off in case if there are no other consumers of concatenated cell state output and we connect
     BlockLSTM to consumers of this sub-graph by port producing last cell state output
-    3. (Optional. Resolves by multiple checks) We cut the same sug-graph (as in 2) for concatenated cell states check
+    3. Renumber input ports of BlockLSTM to match RNNSequence specification.
+    4. (Optional. Resolves by multiple checks) We cut the same sug-graph (as in 2) for concatenated cell states check
     for better performance
     """
     enabled = True
 
     def run_before(self):
-        return [FusePermutesSequence, LSTMSequenceTensorIterator]
+        from extensions.middle.FusePermutesSequence import FusePermutesSequence
+        from extensions.middle.LSTMRNNSequenceToTensorIterator import LSTMToTensorIterator
+        return [FusePermutesSequence, LSTMToTensorIterator]
+
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        from extensions.middle.RNNSequenceNormalizeToIE import RNNSequenceNormalize
+        return [MiddleStart, RNNSequenceNormalize]
 
     def pattern(self):
         return dict(
@@ -96,11 +101,11 @@ class BlockLSTMtoLSTMSequence(MiddleReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         time_len = match['concatenated_hidden_states'].shape[0]
         """
         Working with concatenated_cell_states_data part first, because IE TensorIterator primitive doesn't have
-        concatenated cell states output and if we can not collepse it, then we does not support this type of BlockLSTM
+        concatenated cell states output and if we can not collapse it, then we does not support this type of BlockLSTM
 
         We simplify the sub-graph below by taking another output of BlockLSTM:
         concatenated cell states over the whole time sequence -> last cell state
@@ -156,8 +161,10 @@ class BlockLSTMtoLSTMSequence(MiddleReplacementPattern):
         hidden_size = node.in_node(3).shape[-1]
         weights = weights_node.value
         biases = biases_node.value
-        assert weights.shape[0] == input_size + hidden_size, "weights.shape={} input_size={} hidden_size={}".format(weights.shape, input_size, hidden_size)
-        assert weights.shape[1] == biases.shape[0] == 4 * hidden_size, "weights.shape={} biases.shape={} hidden_size={}".format(weights.shape, biases.shape, hidden_size)
+        assert weights.shape[0] == input_size + hidden_size, \
+            "weights.shape={} input_size={} hidden_size={}".format(weights.shape, input_size, hidden_size)
+        assert weights.shape[1] == biases.shape[0] == 4 * hidden_size, \
+            "weights.shape={} biases.shape={} hidden_size={}".format(weights.shape, biases.shape, hidden_size)
 
         weights = weights.reshape([
             weights.shape[0],
@@ -199,15 +206,35 @@ class BlockLSTMtoLSTMSequence(MiddleReplacementPattern):
 
         graph.add_edge(match['BlockLSTM'].id, match['gather_1_data'].id, **attrs)
 
-        match['BlockLSTM'].op = 'LSTMSequence'
-        match['BlockLSTM']['sequence_dim'] = 0  # TF reference
-        match['BlockLSTM']['batch_dim'] = 1  # TF reference
-        match['BlockLSTM']['direction'] = 'forward'  # TF reference
-        match['BlockLSTM']['hidden_size'] = match['concatenated_hidden_states'].shape[-1]
-        match['BlockLSTM']['format'] = 'tf'
+        """
+        #3 Renumbering h_init_state, c_init_state input ports to match RNNSequence ports order.
+        """
+        h_init_port = 4
+        c_init_port = 5
+        # c_init_state
+        if 4 in node.in_nodes():
+            assert c_init_port not in node.in_nodes()
+            cell_state_edge = graph.get_edge_data(node.in_node(4).id, node.id)
+            cell_state_edge[0]['in'] = c_init_port
+
+
+        #h_init_state
+        if 3 in node.in_nodes():
+            assert h_init_port not in node.in_nodes()
+            hidden_state_edge = graph.get_edge_data(node.in_node(3).id, node.id)
+            hidden_state_edge[0]['in'] = h_init_port
+
+        new_attrs = {'sequence_dim': 0,
+                     'batch_dim': 1,
+                     'direction': 'forward',
+                     'hidden_size': match['concatenated_hidden_states'].shape[-1],
+                     'format': 'tf',
+                     }
+
+        LSTM.update_node_stat(match['BlockLSTM'], new_attrs)
 
         """
-        Optional #3 optimization from class description following
+        Optional #4 optimization from class description following
         """
         data_to_mul = [n for n in match['mul'].in_nodes().values() if n.id != match['concatenated_hidden_states'].id]
         if len(data_to_mul) != 1:
diff --git a/model-optimizer/extensions/middle/Cast.py b/model-optimizer/extensions/middle/Cast.py
new file mode 100644
index 000000000..fad89d7c5
--- /dev/null
+++ b/model-optimizer/extensions/middle/Cast.py
@@ -0,0 +1,41 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.middle.RemoveIdentity import RemoveIdentity
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class CastToFloatMark(MiddleReplacementPattern):
+    enabled = True
+
+    def run_before(self):
+        return [RemoveIdentity]
+
+    def run_after(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def pattern(self):
+        return dict(
+            nodes=[('op', dict(op='Cast', dst_type=np.float32))],
+            edges=[])
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        # resulting network is fully floating point, so casts to float are useless
+        match['op']['identity'] = True
+    
+\ No newline at end of file
diff --git a/model-optimizer/extensions/middle/ChangePlaceholderTypes.py b/model-optimizer/extensions/middle/ChangePlaceholderTypes.py
new file mode 100644
index 000000000..bfba1c106
--- /dev/null
+++ b/model-optimizer/extensions/middle/ChangePlaceholderTypes.py
@@ -0,0 +1,94 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+from mo.graph.graph import Graph, Node
+from mo.middle.passes.fusing.helpers import get_next_operation
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.utils.error import Error
+from mo.utils.utils import refer_to_faq_msg
+
+
+class ChangePlaceholderTypes(MiddleReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['fw'] == 'tf']
+    force_clean_up = True
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        from extensions.middle.ScaleInput import ScaleInput
+        return [ScaleInput]
+
+    @staticmethod
+    def change_node_type(node: Node, new_type: type):
+        node.graph.node[node.id]['pb'].attr['dtype'].type = new_type
+
+    @staticmethod
+    def is_node_casts_to_float(node: Node):
+        from tensorflow.core.framework import types_pb2 as tf_types  # pylint: disable=no-name-in-module
+        attrs = node.graph.node[node.id]
+        return 'pb' in attrs and attrs['pb'].op == 'Cast' and attrs['pb'].attr['DstT'].type == tf_types.DT_FLOAT
+
+    @staticmethod
+    def remove_node_preserving_edges(pl_node: Node, nodes: list):
+        graph = pl_node.graph
+        pl_node_data = pl_node.out_node()
+
+        # Disconnect Placeholder data node from Cast nodes
+        for out_node in pl_node.out_node().out_nodes():
+            graph.remove_edge(pl_node_data.id, out_node.id)
+
+        # Move edges from Cast data nodes to Placeholder data node
+        for cast_node in nodes:
+            # it is necessary to create a list from the result of function "graph.out_edges()" because we modify
+            # the graph during iteration over the list. networkx version 2.1 raises error without creating a list
+            for u, v, d in list(graph.out_edges(cast_node.out_node().id, data=True)):
+                graph.remove_edge(u, v)
+                graph.add_edges_from([(pl_node_data.id, v, d)])
+
+    @staticmethod
+    def is_node_gather(node: Node):
+        attrs = node.graph.node[node.id]
+        return 'pb' in attrs and attrs['pb'].op == 'GatherV2' and attrs['precision'] == 'FP32'
+
+    def find_and_replace_pattern(self, graph: Graph):
+        from tensorflow.core.framework import types_pb2 as tf_types  # pylint: disable=no-name-in-module
+        for node_name, node_attrs in list(graph.nodes(data=True)):
+            node = Node(graph, node_name)
+            pb = node_attrs.get('pb')
+            if pb is not None and pb.op == 'Placeholder' and pb.attr['dtype'].type != tf_types.DT_FLOAT:
+                log.info('Placeholder "{}" has type that is different from DT_FLOAT'.format(node_name))
+                next_ops = get_next_operation(node)
+                # check that all output nodes are nodes of type 'ToFloat'
+                if all([ChangePlaceholderTypes.is_node_casts_to_float(op) and
+                        len(op.in_nodes()) == 1 for op in next_ops]):
+                    ChangePlaceholderTypes.change_node_type(node, tf_types.DT_FLOAT)
+                    ChangePlaceholderTypes.remove_node_preserving_edges(node, next_ops)  # remove 'Cast' nodes
+
+                elif all([ChangePlaceholderTypes.is_node_gather(op) for op in next_ops] for op in next_ops):
+                    ChangePlaceholderTypes.change_node_type(node, tf_types.DT_FLOAT)
+
+                else:
+                    raise Error(
+                        ('Cannot convert type of placeholder "{}" because not all of its outputs are "Cast" to float '
+                         'operations: {}. ' +
+                         refer_to_faq_msg(49)),
+                        node.soft_get('name'),
+                        [op.soft_get('name') for op in next_ops]
+                    )
diff --git a/model-optimizer/extensions/middle/CheckForCycle.py b/model-optimizer/extensions/middle/CheckForCycle.py
new file mode 100644
index 000000000..4d8021ab3
--- /dev/null
+++ b/model-optimizer/extensions/middle/CheckForCycle.py
@@ -0,0 +1,39 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import networkx as nx
+
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.utils.error import Error
+from mo.utils.utils import refer_to_faq_msg
+
+
+class CheckForCycle(MiddleReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+        return [TensorIteratorMerge]
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        is_acyclic = nx.is_directed_acyclic_graph(graph)
+        if not is_acyclic:
+            raise Error('Graph contains a cycle. Can not proceed. ' + refer_to_faq_msg(97))
diff --git a/model-optimizer/extensions/middle/CheckForCycle_test.py b/model-optimizer/extensions/middle/CheckForCycle_test.py
new file mode 100644
index 000000000..5ef52143b
--- /dev/null
+++ b/model-optimizer/extensions/middle/CheckForCycle_test.py
@@ -0,0 +1,77 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+from extensions.middle.CheckForCycle import CheckForCycle
+from mo.utils.error import Error
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes = {'node_1': {'type': 'Identity', 'value': None, 'kind': 'op'},
+                    'node_1_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    'node_2': {'type': 'Identity', 'value': None, 'kind': 'op'},
+                    'concat': {'type': 'Concat', 'value': None, 'kind': 'op'},
+                    'node_3': {'type': 'Identity', 'value': None, 'kind': 'op'},
+                    'node_3_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    # Placeholders
+                    'placeholder_1': {'shape': None, 'type': 'Input', 'kind': 'op', 'op': 'Placeholder'},
+                    'placeholder_1_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+                    'placeholder_2': {'shape': None, 'type': 'Input', 'kind': 'op', 'op': 'Placeholder'},
+                    'pl_1': {'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+                    'pl_1_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    'pl_2': {'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+                    'pl_2_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    'placeholder_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+                    # ScaleShift layer
+                    'scaleshift_1': {'type': 'ScaleShift', 'kind': 'op', 'op': 'ScaleShift'},
+                    'scaleshift_1_w': {'value': None, 'shape': None, 'kind': 'data'},
+                    'scaleshift_1_b': {'value': None, 'shape': None, 'kind': 'data'},
+                    'scaleshift_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+                    # Mul op
+                    'mul_1': {'type': None, 'kind': 'op', 'op': 'Mul'},
+                    'mul_1_w': {'value': None, 'shape': None, 'kind': 'data'},
+                    'mul_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput', 'infer': lambda x: None}
+                    }
+
+
+class CycleTest(unittest.TestCase):
+    def test_check_for_cycle1(self):
+        # cyclic case
+        graph = build_graph(nodes_attributes,
+                            [('node_1', 'node_1_data'),
+                             ('node_1_data', 'node_3'),
+                             ('node_3', 'node_3_data'),
+                             ('node_3_data', 'node_1')],
+                            nodes_with_edges_only=True)
+        with self.assertRaisesRegex(Error, 'Graph contains a cycle. Can not proceed.*'):
+            CheckForCycle().find_and_replace_pattern(graph)
+
+    def test_check_for_cycle2(self):
+        # acyclic case
+        graph = build_graph(nodes_attributes,
+                            [('node_1', 'node_1_data'),
+                             ('node_1_data', 'node_3'),
+                             ('node_3', 'node_3_data'),
+                             ('node_3_data', 'mul_1'),
+                             ('mul_1_w', 'mul_1'),
+                             ('mul_1', 'mul_1_data')
+                             ],
+                            nodes_with_edges_only=True)
+        try:
+            CheckForCycle().find_and_replace_pattern(graph)
+        except Error:
+            self.fail("Unexpected Error raised")
diff --git a/model-optimizer/extensions/middle/ConcatOptimization.py b/model-optimizer/extensions/middle/ConcatOptimization.py
new file mode 100644
index 000000000..17f715fd8
--- /dev/null
+++ b/model-optimizer/extensions/middle/ConcatOptimization.py
@@ -0,0 +1,93 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import networkx as nx
+import logging as log
+
+from mo.graph.graph import Node
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class ConcatOptimization(MiddleReplacementPattern):
+    # This optimization reduces number of edges between Concat operations
+    # that significantly reduce memory consumption
+
+    enabled = False
+
+    def run_after(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+        mp = {}
+        used = {}
+        for node in graph.nodes():
+            node = Node(graph, node)
+            if node.kind == 'op' and node.soft_get('type') == 'Concat':
+                in_nodes = tuple([node.in_node(idx).id for idx in range(len(node.in_nodes()))])
+                out_node = (node.id, node.out_node().id)
+                if in_nodes in mp:
+                    log.warning("Something is weird! {} and {}".format(node.id, mp[in_nodes]))
+                else:
+                    mp.update({in_nodes: out_node})
+                    used.update({node.id: {x: False for x in in_nodes}})
+
+        for key in mp.keys():
+            replacers = []
+            for i in range(len(key)):
+                for j in range(i + 1, len(key)):
+                    arr = tuple(key[i:j + 1])
+                    if arr in mp.keys() and arr != key:
+                        # print("Output of {} can be used as input for {} ({})".format(mp[arr][0], mp[key][0], len(arr)))
+                        replacers.append((len(arr), arr))
+
+            replacers.sort(reverse=True)
+
+            concat_id = mp[key][0]
+            for ln, arr in replacers:
+                # Check that we can do it!!!
+                we_can = True
+                for x in arr:
+                    if used[concat_id][x]:
+                        # print("Sorry but {} input was already removed from {}".format(x, concat_id))
+                        we_can = False
+                        break
+
+                if not we_can:
+                    continue
+
+                for x in arr:
+                    used[concat_id][x] = True
+
+                edge_attrs = graph.get_edge_data(arr[0], concat_id)[0]
+                for in_node in arr:
+                    graph.remove_edge(in_node, concat_id)
+
+                new_input = mp[arr][1]
+                out_port = len(Node(graph, new_input).out_nodes()) + 1
+                edge_attrs['out'] = out_port
+                graph.add_edge(new_input, concat_id, **edge_attrs)
+
+                # Renumber 'in' attrs
+                concat_node = Node(graph, concat_id)
+                ln = len(concat_node.in_nodes())
+                ports = [x for x in concat_node.in_nodes().keys()]
+                ports.sort()
+
+                p_id = 0
+                for p in ports:
+                    in_node = concat_node.in_nodes()[p]
+                    graph[in_node.id][concat_id][0]['in'] = p_id
+                    p_id += 1
diff --git a/model-optimizer/extensions/middle/ConstSwitchResolver.py b/model-optimizer/extensions/middle/ConstSwitchResolver.py
index 73459b094..ad9171ccc 100644
--- a/model-optimizer/extensions/middle/ConstSwitchResolver.py
+++ b/model-optimizer/extensions/middle/ConstSwitchResolver.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,7 @@
  limitations under the License.
 """
 
-import networkx as nx
-
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.middle.passes.eliminate import remove_op_node_with_data_node
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.utils.graph import pseudo_topological_sort
@@ -28,7 +26,11 @@ class ConstSwitchEraser(MiddleReplacementPattern):
     """
     enabled = True
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def find_and_replace_pattern(self, graph: Graph):
         for n in pseudo_topological_sort(graph):
             if graph.node[n]['kind'] == 'data' or graph.node[n]['op'] != 'Switch':
                 continue
diff --git a/model-optimizer/extensions/middle/ConvToBinaryConv.py b/model-optimizer/extensions/middle/ConvToBinaryConv.py
new file mode 100644
index 000000000..d370eccd9
--- /dev/null
+++ b/model-optimizer/extensions/middle/ConvToBinaryConv.py
@@ -0,0 +1,129 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+import numpy as np
+
+from extensions.middle.CheckForCycle import CheckForCycle
+from extensions.middle.DeleteControlFlowEdges import DeleteControlFlowEdges
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.const import Const
+from mo.ops.lin_op import Mul, Add
+from mo.ops.op import Op
+from mo.ops.power import Power
+
+
+class ConvToBinaryConv(MiddleReplacementPattern):
+    """ Transform usual convolution with [0,+1] input and [-1,+1] to BinaryConvolution
+
+        Modifies output terms after the Convolution to be able to apply BinaryConvolution
+        operation instead that accepts [-1,1] input and [-1,1] weights. It requires modification
+        channel-wise addition with weights reduced along all axis except output channel dimension.
+    """
+    enabled = True
+    force_clean_up = True
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('quantize', dict(kind='op', op='Quantize')),
+                ('quantized', dict()),  # input tensor, not weights
+                ('operator', dict(kind='op', type='Convolution')),
+            ],
+            edges=[
+                ('quantize', 'quantized'),
+                ('quantized', 'operator', {'in':0}),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        assert match['operator'].has('multiplication_transparent_ports')
+
+        quantize = match['quantize']
+        # This pass is applicable for binarization only. Other intX variants are not relevant.
+        if quantize.levels != 2:
+            return
+
+        port = match['operator'].input_ports_with(match['quantized'])
+        assert len(port) >= 1
+        if len(port) > 1:
+            log.debug('BinarizeWeightsM1P1 cannot apply transformation for data {} because it consumed more'
+                      ' than once'.format(match['quantized'].name))
+            return
+
+        assert len(port) == 1
+        port = port[0]
+        applicable = [pair for pair in match['operator'].multiplication_transparent_ports if pair[0] == port]
+        if len(applicable) == 0:
+            return
+
+        # Look at 3-rd and 4-th inputs of Quantize -- they have constants that should be passed through.
+        # Assume that the constant that should be passed through is a scalar.
+        output_low = quantize.in_node(3)
+        output_high = quantize.in_node(4)
+        assert len(output_low.out_nodes()) == 1
+        assert len(output_high.out_nodes()) == 1
+
+        if not output_low.has_valid('value') and not output_high.has_valid('value'):
+            return
+
+        output_low = output_low.value
+        output_high = output_high.value
+
+        operator = match['operator']
+
+        if np.all(np.isclose(output_low, 0)) and np.all(np.isclose(output_high, 1)):
+
+            weights = operator.in_node(1).value
+            reduction_indices = set(range(len(weights.shape))) - set([operator.output_feature_channel])
+            weights_reduced = np.add.reduce(weights, axis=tuple(reduction_indices))
+            weights_reduced = weights_reduced.reshape([len(weights_reduced), 1, 1])
+
+            add_term = Const(graph, {'value': weights_reduced}).create_node()
+            add = Add(graph, {}).create_node()
+            add.in_port(1).connect(add_term.out_port(0))
+            mul_term = Const(graph, {'value': np.array(0.5)}).create_node()
+            mul = Mul(graph, {}).create_node()
+            mul.in_port(1).connect(mul_term.out_port(0))
+            add.out_port(0).connect(mul.in_port(0))
+
+            operator.out_port(0).get_connection().set_source(mul.out_port(0))
+            add.in_port(0).connect(operator.out_port(0))
+
+            operator['pad_value'] = float(-1.0)
+        elif np.all(np.isclose(output_low, -1)) and np.all(np.isclose(output_high, +1)):
+            pass
+        else:
+            log.debug('ConvToBinaryConv: cannot apply transformation because input range is neither in [0, +1] nor '
+                      'in [-1, +1].')
+            return
+
+        operator['type'] = 'BinaryConvolution'
+        operator['mode'] = 'xnor-popcount'
+        operator['input'] = operator.in_node(0).shape[1]
+        # Weights are not bit-packed yet; there should be a separate transformation to do that
+
+        assert output_low.size == 1
+        assert output_high.size == 1
+
+        output_low = quantize.in_node(3)
+        output_high = quantize.in_node(4)
+
+        # Make sure that low/high values are exactly 0/1
+        output_low.value = np.zeros(output_low.shape)
+        output_high.value = np.ones(output_high.shape)
diff --git a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py
index cec09ccb7..d9906b3a3 100644
--- a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py
+++ b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,16 +14,19 @@
  limitations under the License.
 """
 
-import numpy as np
-import networkx as nx
+from copy import deepcopy
+
 import logging as log
+import numpy as np
 
+from extensions.middle.SliceConverter import ConvertSlice
 from extensions.ops.splitv import SplitV
-from mo.graph.graph import Node
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node, Graph, add_opoutput
+from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.op import Op
 from mo.ops.reshape import Reshape
-from mo.middle.replacement import MiddleReplacementPattern
-from extensions.middle.SliceConverter import ConvertSlice
+
 
 class ConvertGroupedStridedSlice(MiddleReplacementPattern):
     """
@@ -50,7 +53,11 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
     def run_after(self):
         return [ConvertSlice]
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
+    def find_and_replace_pattern(self, graph: Graph):
         # Iterate over all data nodes and find all with >= 1 consumers
         data_nodes = [Node(graph, node) for node in graph.node if Node(graph, node).kind == 'data']
         for input_data in data_nodes:
@@ -61,12 +68,16 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
             input_shape = np.array(input_data.shape)
 
             # Get all StridedSlice consumers
-            out_nodes = [node for node in input_data.out_nodes() if node.op == 'StridedSlice']
+            out_nodes = [node for node in input_data.out_nodes() if node.op == 'StridedSlice' and node.in_node(0).name == input_data.name]
             if len(out_nodes) < 1:
                 continue
 
             valid_for_replacement = True
 
+            for node in out_nodes:
+                if len(node.slices) != len(out_nodes[0].slices):
+                    valid_for_replacement = False
+
             # Detect dimension for splitting
             split_channel_dim = None
             for dim_id, s in enumerate(out_nodes[0].slices):
@@ -80,9 +91,6 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
             # split_dims contains tuples with split range and output data node
             split_dims = []
             for out_id, node in enumerate(out_nodes):
-                # Check that StridedSlice op has no shrink_axis_mask attribute
-                if not np.all([x == False for x in node.shrink_axis_mask]):
-                    valid_for_replacement = False
                 # Check that StridedSlice op has stride eq 1 and splits only feature channel
                 for id, s in enumerate(node.slices):
                     l, r, stride = s.start, s.stop, s.step
@@ -97,7 +105,23 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
 
             # Check feature split intersection
             final_data_nodes_list = []
-            sorted_split_dims = sorted(split_dims)
+            sorted_split_dims = sorted(split_dims, key=lambda item: (item[0], item[1]))
+
+            # check if we have similar StridedSlice operations with different outputs
+            prev_sd = sorted_split_dims[0]
+            to_remove = []
+            for i in range(1, len(sorted_split_dims)):
+                if sorted_split_dims[i][0] == prev_sd[0] and sorted_split_dims[i][1] == prev_sd[1] and sorted_split_dims[i][2].name != prev_sd[2].name:
+                    cur_node = sorted_split_dims[i][2]
+                    for out in cur_node.out_nodes():
+                        attrs = deepcopy(graph.get_edge_data(cur_node.id, out.id)[0])
+                        graph.remove_edge(cur_node.id, out.id)
+                        graph.add_edge(prev_sd[2].id, out.id, **attrs)
+                    to_remove.append(i)
+
+            for ind in reversed(to_remove):
+                sorted_split_dims.pop(ind)
+
             size_splits = []
             prev_r = 0
             for l, r, out in sorted_split_dims:
@@ -109,10 +133,10 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
                     shape = np.array(input_shape)
                     size_splits.append(l - prev_r)
                     shape[split_channel_dim] = l - prev_r
-                    data_node = Op._create_data_node(graph, 'fake_data', {'shape': shape, 'is_output': True})
+                    data_node = Op._create_data_node(graph, 'fake_data', {'shape': shape})
+                    add_opoutput(graph, data_node.id, 0, False)
                     final_data_nodes_list.append(data_node)
 
-
                 prev_r = r
                 size_splits.append(r - l)
                 final_data_nodes_list.append(out)
@@ -124,12 +148,26 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
                 shape = input_shape.copy()
                 shape[split_channel_dim] = input_shape[split_channel_dim] - prev_r
                 size_splits.append(input_shape[split_channel_dim] - prev_r)
-                data_node = Op._create_data_node(graph, 'fake_data', {'shape': shape, 'is_output': True})
+                data_node = Op._create_data_node(graph, 'fake_data', {'shape': shape})
+                add_opoutput(graph, data_node.id, 0, False)
                 final_data_nodes_list.append(data_node)
 
             if not valid_for_replacement:
                 continue
 
+            for node in out_nodes:
+                if not np.all([x == 0 for x in node.shrink_axis_mask]):
+                    out_node = node.out_node()
+                    if np.any(node['shrink_axis_mask']):
+                        self.add_reshape_for_shrink(graph, node)
+                    if np.any(node['new_axis_mask']):
+                        self.add_reshape_for_new(graph, node)
+
+                    for i in range(len(final_data_nodes_list)):
+                        if final_data_nodes_list[i].name == out_node.name:
+                            final_data_nodes_list[i] = node.out_node()
+                            break
+
             # Insert Split layer and remove old StridedSlice layers
             # 1. Remove connections from input_data to StridedSlice ops
             out_data_nodes = []
@@ -143,5 +181,82 @@ class ConvertGroupedStridedSlice(MiddleReplacementPattern):
 
             # 2. Create Split layer and reorder outputs
             split = SplitV(graph, dict(name=name_for_future_split + "/Split", axis=split_channel_dim,
-                                       size_splits=size_splits))
+                                       size_splits=size_splits, out_ports_count=len(size_splits)))
             split.create_node_with_data(inputs=[input_data], data_nodes=final_data_nodes_list)
+
+    @staticmethod
+    def add_reshape_for_shrink(graph: Graph, ss_node):
+        # add Reshape for shrink_axis_mask
+        log.info("StridedSlice op with shrink mask '{}' has been detected".format(ss_node.id))
+        node = ss_node
+
+        if len(node.in_nodes()) != 4 or len(node.out_nodes()) != 1:
+            return
+
+        shape_out = node.out_node().shape
+        dim = shape_out.copy()
+        ss_shape = []
+        k = 0
+
+        # Don't permute reshape if channels were squeezed
+        dont_permute = False
+        if graph.graph['layout'] == 'NHWC' and node['shrink_axis_mask'][-1] == 1:
+            dont_permute = True
+
+        for i in range(0, len(node['shrink_axis_mask'])):
+            if not node['shrink_axis_mask'][i]:
+                ss_shape.append(shape_out[k])
+                k = k + 1
+            else:
+                node['shrink_axis_mask'][i] = 0
+                ss_shape.append(1)
+
+        out_node = node.out_node(0)
+
+        # insert data node for StridedSlice
+        data_node = Op._create_data_node(graph, node.name + "/Reshape_shrink_data", {'shape': int64_array(ss_shape)})
+        attrs = deepcopy(graph.get_edge_data(node.id, out_node.id)[0])
+        graph.remove_edge(node.id, out_node.id)
+        graph.add_edge(node.id, data_node.id, **attrs)
+
+        # insert Reshape
+        if dont_permute:
+            reshape = Reshape(graph, dict(name=node.name + "/Reshape_shrink",
+                                          dim=np.array(dim, dtype=np.int64), nchw_layout=True))
+            reshape_data_node = reshape.create_node_with_data([data_node], reshape.attrs,
+                                                              data_nodes=[out_node])
+            reshape_data_node['nchw_layout'] = True
+        else:
+            reshape = Reshape(graph, dict(name=node.name + "/Reshape_shrink",
+                                          dim=np.array(dim, dtype=np.int64)))
+            reshape_data_node = reshape.create_node_with_data([data_node], reshape.attrs,
+                                                              data_nodes=[out_node])
+
+    @staticmethod
+    def add_reshape_for_new(graph: Graph, ss_node):
+        log.info("StridedSlice op with new axis mask '{}' has been detected".format(ss_node.id))
+        node = ss_node
+
+        if len(node.in_nodes()) != 4 or len(node.out_nodes()) != 1:
+            return
+
+        shape_out = node.out_node().shape
+        dim = shape_out.copy()
+        ss_shape = []
+        for i in range(0, len(node['new_axis_mask'])):
+            if not node['new_axis_mask'][i]:
+                ss_shape.append(shape_out[i])
+            else:
+                node['new_axis_mask'][i] = 0
+
+        out_node = node.out_node(0)
+        # insert data node for StridedSlice
+        data_node = Op._create_data_node(graph, node.name + "/Reshape_new_data", {'shape': ss_shape})
+        attrs = deepcopy(graph.get_edge_data(node.id, out_node.id)[0])
+        graph.remove_edge(node.id, out_node.id)
+        graph.add_edge(node.id, data_node.id, **attrs)
+
+        # insert Reshape
+        reshape = Reshape(graph, dict(name=node.name + "/Reshape_new",
+                                      dim=np.array(dim, dtype=np.int64)))
+        reshape.create_node_with_data([data_node], reshape.attrs, data_nodes=[out_node])
diff --git a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py
index 0ebdb3876..24d1ca98e 100644
--- a/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py
+++ b/model-optimizer/extensions/middle/ConvertGroupedStridedSlice_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,20 +19,26 @@ import unittest
 import numpy as np
 
 from extensions.middle.ConvertGroupedStridedSlice import ConvertGroupedStridedSlice
+from mo.graph.graph import Node
 from mo.utils.unittest.graph import build_graph, compare_graphs
 
 nodes_attributes = {
     'placeholder_1': {'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
     'placeholder_1_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'placeholder_2': {'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+    'placeholder_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'placeholder_begin_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'placeholder_end_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'placeholder_stride_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     # StridedSlice layers
     'sslice_1': {'type': None, 'kind': 'op', 'op': 'StridedSlice', 'slices': None,
-                 'shrink_axis_mask': np.array([False, False, False, False])},
+                 'shrink_axis_mask': np.array([0, 0, 0, 0])},
     'sslice_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     'sslice_2': {'type': None, 'kind': 'op', 'op': 'StridedSlice', 'slices': None,
-                 'shrink_axis_mask': np.array([False, False, False, False])},
+                 'shrink_axis_mask': np.array([0, 0, 0, 0])},
     'sslice_2_data': {'value': None, 'shape': None, 'kind': 'data'},
     'sslice_3': {'type': None, 'kind': 'op', 'op': 'StridedSlice', 'slices': None,
-                 'shrink_axis_mask': np.array([False, False, False, False])},
+                 'shrink_axis_mask': np.array([0, 0, 0, 0])},
     'sslice_3_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Split layer
     'split_1': {'type': 'Split', 'kind': 'op', 'op': 'SplitV'},
@@ -43,6 +49,16 @@ nodes_attributes = {
     # Concat1 operation
     'concat_1': {'type': 'Concat', 'kind': 'op', 'op': 'Concat'},
     'concat_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+    'op_output': {'kind': 'op', 'op': 'OpOutput'},
+    'op_output_1': {'kind': 'op', 'op': 'OpOutput'},
+    'op_output_2': {'kind': 'op', 'op': 'OpOutput'},
+    # Reshape layer
+    'sslice_1/Reshape_shrink': {'type': 'Reshape', 'value': None, 'kind': 'op', 'op': 'Reshape'},
+    'sslice_1/Reshape_shrink_data': {'value': None, 'shape': None, 'kind': 'data'},
+    'sslice_2/Reshape_shrink': {'type': 'Reshape', 'value': None, 'kind': 'op', 'op': 'Reshape'},
+    'sslice_2/Reshape_shrink_data': {'value': None, 'shape': None, 'kind': 'data'},
+    'sslice_2/Reshape_new': {'type': 'Reshape', 'value': None, 'kind': 'op', 'op': 'Reshape'},
+    'sslice_2/Reshape_new_data': {'value': None, 'shape': None, 'kind': 'data'},
 }
 
 
@@ -59,7 +75,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                              ('sslice_1_data', 'concat_1'),
                              ('sslice_2_data', 'concat_1'),
                              ('sslice_3_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
 
@@ -75,7 +92,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(36, 54, 1)])},
                              'sslice_3_data': {'shape': np.array([1, 227, 227, 18])},
 
-                             'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                             'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -88,14 +105,16 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  ('split_1_data', 'concat_1'),
                                  ('split_2_data', 'concat_1'),
                                  ('split_3_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
+
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  'split_1': {'axis': 3},
                                  'split_1_data': {'shape': np.array([1, 227, 227, 18])},
                                  'split_2_data': {'shape': np.array([1, 227, 227, 18])},
                                  'split_3_data': {'shape': np.array([1, 227, 227, 18])},
-                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  })
 
         pattern = ConvertGroupedStridedSlice()
@@ -116,7 +135,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                              ('sslice_1_data', 'concat_1'),
                              ('sslice_2_data', 'concat_1'),
                              ('sslice_3_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
 
@@ -132,7 +152,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 19, 1)])},
                              'sslice_3_data': {'shape': np.array([1, 227, 227, 19])},
 
-                             'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                             'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -145,14 +165,15 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  ('split_1_data', 'concat_1'),
                                  ('split_2_data', 'concat_1'),
                                  ('split_3_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  'split_1': {'axis': 3},
                                  'split_1_data': {'shape': np.array([1, 227, 227, 18])},
                                  'split_2_data': {'shape': np.array([1, 227, 227, 17])},
                                  'split_3_data': {'shape': np.array([1, 227, 227, 19])},
-                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  })
 
         pattern = ConvertGroupedStridedSlice()
@@ -174,7 +195,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                              ('sslice_1_data', 'concat_1'),
                              ('sslice_2_data', 'concat_1'),
                              ('sslice_3_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
 
@@ -190,7 +212,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 19, 1)])},
                              'sslice_3_data': {'shape': np.array([1, 227, 227, 19])},
 
-                             'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                             'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -205,7 +227,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  ('sslice_1_data', 'concat_1'),
                                  ('sslice_2_data', 'concat_1'),
                                  ('sslice_3_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
 
@@ -221,7 +244,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                      [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 19, 1)])},
                                  'sslice_3_data': {'shape': np.array([1, 227, 227, 19])},
 
-                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  })
 
         pattern = ConvertGroupedStridedSlice()
@@ -243,7 +266,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                              ('sslice_1_data', 'concat_1'),
                              ('sslice_2_data', 'concat_1'),
                              ('sslice_3_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
 
@@ -259,7 +283,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 19, 1)])},
                              'sslice_3_data': {'shape': np.array([1, 227, 227, 19])},
 
-                             'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                             'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -274,7 +298,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  ('sslice_1_data', 'concat_1'),
                                  ('sslice_2_data', 'concat_1'),
                                  ('sslice_3_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
 
@@ -290,7 +315,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                      [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 19, 1)])},
                                  'sslice_3_data': {'shape': np.array([1, 227, 227, 19])},
 
-                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  })
 
         pattern = ConvertGroupedStridedSlice()
@@ -315,7 +340,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                              ('sslice_1_data', 'concat_1'),
                              ('sslice_2_data', 'concat_1'),
                              ('sslice_3_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output'),
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
 
@@ -331,7 +357,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(1, 19, 1)])},
                              'sslice_3_data': {'shape': np.array([1, 227, 227, 18])},
 
-                             'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                             'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -345,7 +371,9 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  ('split_2_data', 'concat_1'),
                                  ('split_3_data', 'concat_1'),
                                  ('split_4_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output'),
+                                 ('split_1_data', 'op_output_1')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  'split_1': {'axis': 3},
@@ -353,7 +381,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  'split_2_data': {'shape': np.array([1, 227, 227, 18])},
                                  'split_3_data': {'shape': np.array([1, 227, 227, 17])},
                                  'split_4_data': {'shape': np.array([1, 227, 227, 18])},
-                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  })
 
         pattern = ConvertGroupedStridedSlice()
@@ -376,7 +404,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                              ('sslice_2', 'sslice_2_data'),
                              ('sslice_1_data', 'concat_1'),
                              ('sslice_2_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
 
@@ -388,7 +417,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(27, 45, 1)])},
                              'sslice_2_data': {'shape': np.array([1, 227, 227, 18])},
 
-                             'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                             'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -401,7 +430,10 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  ('split_1', 'split_4_data'),
                                  ('split_1_data', 'concat_1'),
                                  ('split_3_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output'),
+                                 ('split_2_data', 'op_output_1'),
+                                 ('split_4_data', 'op_output_2'),
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  'split_1': {'axis': 3},
@@ -409,7 +441,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  'split_2_data': {'shape': np.array([1, 227, 227, 9])},
                                  'split_3_data': {'shape': np.array([1, 227, 227, 18])},
                                  'split_4_data': {'shape': np.array([1, 227, 227, 9])},
-                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  })
 
         pattern = ConvertGroupedStridedSlice()
@@ -427,7 +459,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                              ('sslice_2', 'sslice_2_data'),
                              ('sslice_1_data', 'concat_1'),
                              ('sslice_2_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
 
@@ -439,7 +472,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  [slice(0, 1, 1), slice(10, 227, 1), slice(0, 227, 1), slice(27, 45, 1)])},
                              'sslice_2_data': {'shape': np.array([1, 217, 227, 18])},
 
-                             'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                             'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -451,7 +484,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  ('sslice_2', 'sslice_2_data'),
                                  ('sslice_1_data', 'concat_1'),
                                  ('sslice_2_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
 
@@ -463,7 +497,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                      [slice(0, 1, 1), slice(10, 227, 1), slice(0, 227, 1), slice(27, 45, 1)])},
                                  'sslice_2_data': {'shape': np.array([1, 217, 227, 18])},
 
-                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54]), 'is_output': True},
+                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  })
 
         pattern = ConvertGroupedStridedSlice()
@@ -485,7 +519,8 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                              ('sslice_2', 'sslice_2_data'),
                              ('sslice_1_data', 'concat_1'),
                              ('sslice_2_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 54, 54, 3])},
 
@@ -497,7 +532,7 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  [slice(0, 1, 1), slice(18, 36, 1), slice(0, 54, 1), slice(0, 3, 1)])},
                              'sslice_2_data': {'shape': np.array([1, 18, 54, 3])},
 
-                             'concat_1_data': {'shape': np.array([1, 54, 54, 3]), 'is_output': True},
+                             'concat_1_data': {'shape': np.array([1, 54, 54, 3])},
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -509,14 +544,336 @@ class ConvertGroupedStridedSliceTests(unittest.TestCase):
                                  ('split_1', 'split_3_data'),
                                  ('split_1_data', 'concat_1'),
                                  ('split_3_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output'),
+                                 ('split_2_data', 'op_output_1')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 54, 54, 3])},
                                  'split_1': {'axis': 1},
                                  'split_1_data': {'shape': np.array([1, 18, 54, 3])},
                                  'split_2_data': {'shape': np.array([1, 18, 54, 3])},
                                  'split_3_data': {'shape': np.array([1, 18, 54, 3])},
-                                 'concat_1_data': {'shape': np.array([1, 54, 54, 3]), 'is_output': True},
+                                 'concat_1_data': {'shape': np.array([1, 54, 54, 3])},
+                                 })
+
+        pattern = ConvertGroupedStridedSlice()
+        pattern.find_and_replace_pattern(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'concat_1_data', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+
+class AddReshapeAfterStridedSliceTests(unittest.TestCase):
+    def test_ss_1_shrink_last(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'sslice_1'),
+                             ('placeholder_begin_data', 'sslice_1'),
+                             ('placeholder_end_data', 'sslice_1'),
+                             ('placeholder_stride_data', 'sslice_1'),
+                             ('sslice_1', 'sslice_1_data'),
+                             ('sslice_1_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                             'sslice_1': {'slices': np.array([slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1), slice(0, 54, 1)]),
+                                          'shrink_axis_mask': [0, 0, 1, 0],
+                                          'new_axis_mask': np.array([0, 0, 0, 0])},
+                             'sslice_1_data': {'shape': np.array([1, 227, 54])},
+                             })
+        graph.graph['layout'] = 'NHWC'
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'sslice_1'),
+                                 ('placeholder_begin_data', 'sslice_1'),
+                                 ('placeholder_end_data', 'sslice_1'),
+                                 ('placeholder_stride_data', 'sslice_1'),
+                                 ('sslice_1', 'sslice_1/Reshape_shrink_data'),
+                                 ('sslice_1/Reshape_shrink_data', 'sslice_1/Reshape_shrink'),
+                                 ('sslice_1/Reshape_shrink', 'sslice_1_data'),
+                                 ('sslice_1_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                                 'sslice_1': {'slices': np.array(
+                                     [slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1), slice(0, 54, 1)]),
+                                     'shrink_axis_mask': np.array([0, 0, 0, 0]),
+                                     'new_axis_mask': np.array([0, 0, 0, 0])},
+                                 'sslice_1_data': {'shape': np.array([1, 227, 54])},
+                                 'sslice_1/Reshape_shrink': {'dim': np.array([1, 227, 54])},
+                                 'sslice_1/Reshape_shrink_data': {'shape': np.array([1, 227, 1, 54])}
+                                 })
+
+        pattern = ConvertGroupedStridedSlice()
+        pattern.add_reshape_for_shrink(graph, Node(graph, 'sslice_1'))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_1_data', check_op_attrs=True)
+        graph.clear()
+        graph_ref.clear()
+        self.assertTrue(flag, resp)
+
+    def test_ss_1_shrink(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'sslice_2'),
+                             ('placeholder_begin_data', 'sslice_2'),
+                             ('placeholder_end_data', 'sslice_2'),
+                             ('placeholder_stride_data', 'sslice_2'),
+                             ('sslice_2', 'sslice_2_data'),
+                             ('sslice_2_data', 'placeholder_2'),
+                             ('placeholder_2', 'placeholder_2_data'),
+                             ('sslice_2_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                             'sslice_2': {'slices': np.array([slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1), slice(0, 54, 1)]),
+                                          'shrink_axis_mask': [0, 0, 1, 0],
+                                          'new_axis_mask': np.array([0, 0, 0, 0])},
+                             'sslice_2_data': {'shape': np.array([1, 227, 54])}
+                             })
+        graph.graph['layout'] = 'NHWC'
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'sslice_2'),
+                                 ('placeholder_begin_data', 'sslice_2'),
+                                 ('placeholder_end_data', 'sslice_2'),
+                                 ('placeholder_stride_data', 'sslice_2'),
+                                 ('sslice_2', 'sslice_2/Reshape_shrink_data'),
+                                 ('sslice_2/Reshape_shrink_data', 'sslice_2/Reshape_shrink'),
+                                 ('sslice_2/Reshape_shrink', 'sslice_2_data'),
+                                 ('sslice_2_data', 'placeholder_2'),
+                                 ('placeholder_2', 'placeholder_2_data'),
+                                 ('sslice_2_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                                 'sslice_2': {'slices': np.array([slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1), slice(0, 54, 1)]),
+                                              'shrink_axis_mask': np.array([0, 0, 0, 0]),
+                                              'new_axis_mask': np.array([0, 0, 0, 0])},
+                                 'sslice_2_data': {'shape': np.array([1, 227, 54])},
+                                 'sslice_2/Reshape_shrink': {'dim': np.array([1, 227, 54])},
+                                 'sslice_2/Reshape_shrink_data': {'shape': np.array([1, 227, 1, 54])},
+                                 })
+
+        pattern = ConvertGroupedStridedSlice()
+        pattern.add_reshape_for_shrink(graph, Node(graph, 'sslice_2'))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_2_data', check_op_attrs=True)
+        graph.clear()
+        graph_ref.clear()
+        self.assertTrue(flag, resp)
+
+    def test_ss_2_shrink(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'sslice_2'),
+                             ('placeholder_begin_data', 'sslice_2'),
+                             ('placeholder_end_data', 'sslice_2'),
+                             ('placeholder_stride_data', 'sslice_2'),
+                             ('sslice_2', 'sslice_2_data'),
+                             ('sslice_2_data', 'placeholder_2'),
+                             ('placeholder_2', 'placeholder_2_data'),
+                             ('sslice_2_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                             'sslice_2': {
+                                 'slices': np.array([slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1)]),
+                                 'shrink_axis_mask': np.array([0, 1, 0, 1]),
+                                 'new_axis_mask': np.array([0, 0, 0, 0])},
+                             'sslice_2_data': {'shape': np.array([1, 227])}
+                             })
+        graph.graph['layout'] = 'NHWC'
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'sslice_2'),
+                                 ('placeholder_begin_data', 'sslice_2'),
+                                 ('placeholder_end_data', 'sslice_2'),
+                                 ('placeholder_stride_data', 'sslice_2'),
+                                 ('sslice_2', 'sslice_2/Reshape_shrink_data'),
+                                 ('sslice_2/Reshape_shrink_data', 'sslice_2/Reshape_shrink'),
+                                 ('sslice_2/Reshape_shrink', 'sslice_2_data'),
+                                 ('sslice_2_data', 'placeholder_2'),
+                                 ('placeholder_2', 'placeholder_2_data'),
+                                 ('sslice_2_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                                 'sslice_2': {'slices': np.array(
+                                     [slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1)]),
+                                     'shrink_axis_mask': np.array([0, 0, 0, 0]),
+                                     'new_axis_mask': np.array([0, 0, 0, 0])},
+                                 'sslice_2_data': {'shape': np.array([1, 227])},
+                                 'sslice_2/Reshape_shrink': {'dim': np.array([1, 227])},
+                                 'sslice_2/Reshape_shrink_data': {'shape': np.array([1, 1, 227, 1])},
+                                 })
+
+        pattern = ConvertGroupedStridedSlice()
+        pattern.add_reshape_for_shrink(graph, Node(graph, 'sslice_2'))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_2_data', check_op_attrs=True)
+        graph.clear()
+        graph_ref.clear()
+        self.assertTrue(flag, resp)
+
+    def test_ss_1_new(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'sslice_2'),
+                             ('placeholder_begin_data', 'sslice_2'),
+                             ('placeholder_end_data', 'sslice_2'),
+                             ('placeholder_stride_data', 'sslice_2'),
+                             ('sslice_2', 'sslice_2_data'),
+                             ('sslice_2_data', 'placeholder_2'),
+                             ('placeholder_2', 'placeholder_2_data'), ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                             'sslice_2': {'slices': np.array(
+                                 [slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 54, 1)]),
+                                 'shrink_axis_mask': np.array([0, 0, 0, 0, 0]),
+                                 'new_axis_mask': np.array([0, 1, 0, 0, 0])},
+                             'sslice_2_data': {'shape': np.array([1, 1, 227, 227, 54])}
+                             })
+        graph.graph['layout'] = 'NHWC'
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'sslice_2'),
+                                 ('placeholder_begin_data', 'sslice_2'),
+                                 ('placeholder_end_data', 'sslice_2'),
+                                 ('placeholder_stride_data', 'sslice_2'),
+                                 ('sslice_2', 'sslice_2/Reshape_new_data'),
+                                 ('sslice_2/Reshape_new_data', 'sslice_2/Reshape_new'),
+                                 ('sslice_2/Reshape_new', 'sslice_2_data'),
+                                 ('sslice_2_data', 'placeholder_2'),
+                                 ('placeholder_2', 'placeholder_2_data')],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                                 'sslice_2': {'slices': np.array(
+                                     [slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1),
+                                      slice(0, 54, 1)]),
+                                     'shrink_axis_mask': np.array([0, 0, 0, 0, 0]),
+                                     'new_axis_mask': np.array([0, 0, 0, 0, 0])},
+                                 'sslice_2_data': {'shape': np.array([1, 1, 227, 227, 54])},
+                                 'sslice_2/Reshape_new': {'dim': np.array([1, 1, 227, 227, 54])},
+                                 'sslice_2/Reshape_new_data': {'shape': np.array([1, 227, 227, 54])},
+                                 })
+
+        pattern = ConvertGroupedStridedSlice()
+        pattern.add_reshape_for_new(graph, Node(graph, 'sslice_2'))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_2_data', check_op_attrs=True)
+        graph.clear()
+        graph_ref.clear()
+        self.assertTrue(flag, resp)
+
+    def test_ss_shrink_new(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'sslice_2'),
+                             ('placeholder_begin_data', 'sslice_2'),
+                             ('placeholder_end_data', 'sslice_2'),
+                             ('placeholder_stride_data', 'sslice_2'),
+                             ('sslice_2', 'sslice_2_data'),
+                             ('sslice_2_data', 'placeholder_2'),
+                             ('placeholder_2', 'placeholder_2_data'),
+                             ('sslice_2_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                             'sslice_2': {'slices': np.array(
+                                 [slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1), slice(0, 54, 1)]),
+                                 'shrink_axis_mask': np.array([0, 0, 0, 1, 0]),
+                                 'new_axis_mask': np.array([0, 1, 0, 0, 0])},
+                             'sslice_2_data': {'shape': np.array([1, 1, 227, 54])}
+                             })
+        graph.graph['layout'] = 'NHWC'
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'sslice_2'),
+                                 ('placeholder_begin_data', 'sslice_2'),
+                                 ('placeholder_end_data', 'sslice_2'),
+                                 ('placeholder_stride_data', 'sslice_2'),
+                                 ('sslice_2', 'sslice_2/Reshape_new_data'),
+                                 ('sslice_2/Reshape_new_data', 'sslice_2/Reshape_new'),
+                                 ('sslice_2/Reshape_new', 'sslice_2/Reshape_shrink_data'),
+                                 ('sslice_2/Reshape_shrink_data', 'sslice_2/Reshape_shrink'),
+                                 ('sslice_2/Reshape_shrink', 'sslice_2_data'),
+                                 ('sslice_2_data', 'placeholder_2'),
+                                 ('placeholder_2', 'placeholder_2_data'),
+                                 ('sslice_2_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                                 'sslice_2': {'slices': np.array(
+                                     [slice(0, 1, 1), slice(0, 1, 1), slice(0, 227, 1), slice(0, 1, 1),
+                                      slice(0, 54, 1)]),
+                                     'shrink_axis_mask': np.array([0, 0, 0, 0, 0]),
+                                     'new_axis_mask': np.array([0, 0, 0, 0, 0])},
+                                 'sslice_2_data': {'shape': np.array([1, 1, 227, 54])},
+                                 'sslice_2/Reshape_new': {'dim': np.array([1, 1, 227, 1, 54])},
+                                 'sslice_2/Reshape_new_data': {'shape': np.array([1, 227, 1, 54])},
+                                 'sslice_2/Reshape_shrink': {'dim': np.array([1, 1, 227, 54])},
+                                 'sslice_2/Reshape_shrink_data': {'shape': np.array([1, 1, 227, 1, 54])},
+                                 })
+
+        pattern = ConvertGroupedStridedSlice()
+        pattern.add_reshape_for_shrink(graph, Node(graph, 'sslice_2'))
+        pattern.add_reshape_for_new(graph, Node(graph, 'sslice_2'))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'sslice_2_data', check_op_attrs=True)
+        graph.clear()
+        graph_ref.clear()
+        self.assertTrue(flag, resp)
+
+    # test case with 2 strided slices with the same parameters but different outputs
+    def test_1(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'sslice_1'),
+                             ('sslice_1', 'sslice_1_data'),
+                             ('placeholder_1_data', 'sslice_2'),
+                             ('sslice_2', 'sslice_2_data'),
+                             ('placeholder_1_data', 'sslice_3'),
+                             ('sslice_3', 'sslice_3_data'),
+                             ('sslice_1_data', 'concat_1'),
+                             ('sslice_2_data', 'concat_1'),
+                             ('sslice_3_data', 'placeholder_2'),
+                             ('placeholder_2', 'placeholder_2_data'),
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output'),
+                             ('placeholder_2_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+
+                             'sslice_1': {'slices': np.array(
+                                 [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 27, 1)])},
+                             'sslice_1_data': {'shape': np.array([1, 227, 227, 27])},
+
+                             'sslice_2': {'slices': np.array(
+                                 [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(27, 54, 1)])},
+                             'sslice_2_data': {'shape': np.array([1, 227, 227, 27])},
+
+                             'sslice_3': {'slices': np.array(
+                                 [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 27, 1)])},
+                             'sslice_3_data': {'shape': np.array([1, 227, 227, 27])},
+
+                             'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
+                             })
+        graph.graph['layout'] = 'NHWC'
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'split_1'),
+                                 ('split_1', 'split_1_data'),
+                                 ('split_1', 'split_2_data'),
+                                 ('split_1_data', 'concat_1'),
+                                 ('split_2_data', 'concat_1'),
+                                 ('split_1_data', 'placeholder_2'),
+                                 ('placeholder_2', 'placeholder_2_data'),
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output'),
+                                 ('placeholder_2_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 54])},
+                                 'split_1': {'axis': 3},
+                                 'split_1_data': {'shape': np.array([1, 227, 227, 27])},
+                                 'split_2_data': {'shape': np.array([1, 227, 227, 27])},
+                                 'concat_1_data': {'shape': np.array([1, 227, 227, 54])},
                                  })
 
         pattern = ConvertGroupedStridedSlice()
diff --git a/model-optimizer/extensions/middle/ConvertLayoutDependentOperations.py b/model-optimizer/extensions/middle/ConvertLayoutDependentOperations.py
index 7f2e87c9c..9308506d8 100644
--- a/model-optimizer/extensions/middle/ConvertLayoutDependentOperations.py
+++ b/model-optimizer/extensions/middle/ConvertLayoutDependentOperations.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.layout import indices_mapping
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.op import Op, PermuteAttrs
 from mo.ops.permute import Permute
@@ -32,9 +30,10 @@ class ConvertLayoutDependentOperations(MiddleReplacementPattern):
     enabled = True
 
     def run_after(self):
-        return []
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def find_and_replace_pattern(self, graph: Graph):
         for node in list(graph.nodes()):
             node = Node(graph, node)
             # Check that node layout mismatch with graph layout
diff --git a/model-optimizer/extensions/middle/ConvertMultiInputConv.py b/model-optimizer/extensions/middle/ConvertMultiInputConv.py
new file mode 100644
index 000000000..8e5fd5338
--- /dev/null
+++ b/model-optimizer/extensions/middle/ConvertMultiInputConv.py
@@ -0,0 +1,75 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import copy
+
+from mo.graph.graph import Graph, Node
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class ConvertMultiInputConv(MiddleReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def pattern(self):
+        return dict(
+            nodes=[('op', dict(kind='op', op='ConvND'))],
+            edges=[]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        node = match['op']
+        node.op = 'Conv2D'
+
+        if node.bias_term:
+            num_inputs = len(node.in_nodes()) - 2
+            w_node = node.in_node(len(node.in_nodes()) - 2)
+            b_node = node.in_node(len(node.in_nodes()) - 1)
+        else:
+            num_inputs = len(node.in_nodes()) - 1
+            w_node = node.in_node(len(node.in_nodes()) - 1)
+
+        for i in range(1, num_inputs):
+            in_i = node.in_node(i)
+            out_i = node.out_node(i)
+            conv_id = graph.unique_id(node.id + '__')
+            graph.add_node(conv_id, **copy.deepcopy(node.get_attrs()))
+            new_conv = Node(graph, conv_id)
+            new_conv.name = conv_id
+
+            graph.remove_edge(in_i.id, node.id)
+            graph.remove_edge(node.id, out_i.id)
+            graph.add_edges_from([
+                (w_node.id, conv_id, {'in': 1, 'bin': 'weights'}),
+            ])
+
+            if node.bias_term:
+                graph.add_edges_from([
+                    (b_node.id, conv_id, {'in': 2, 'bin': 'biases'}),
+                ])
+
+            graph.add_edges_from([
+                (in_i.id, conv_id, {'in': 0}),
+            ])
+            graph.add_edge(conv_id, out_i.id, **{'out': 0})
diff --git a/model-optimizer/extensions/middle/CustomSubgraphCall.py b/model-optimizer/extensions/middle/CustomSubgraphCall.py
new file mode 100644
index 000000000..f2eba63b4
--- /dev/null
+++ b/model-optimizer/extensions/middle/CustomSubgraphCall.py
@@ -0,0 +1,322 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import copy
+import logging as log
+
+import numpy as np
+
+from mo.front.common.layout import nhwc_to_nchw_permute
+from mo.front.common.partial_infer.utils import int64_array
+from mo.front.extractor import update_ie_fields
+from mo.graph.graph import Graph
+from mo.graph.graph import Node, add_opoutput
+from mo.middle.replacement import MiddleReplacementPattern
+
+nchw_to_nhwc_constant_name = 'IE_NCHW_TO_NHWC'
+nhwc_to_nchw_constant_name = 'IE_NHWC_TO_NCHW'
+
+
+class CustomSubgraphCall(MiddleReplacementPattern):
+    enabled = True
+    force_clean_up = True
+    graph_condition = [lambda graph: graph.graph['fw'] == 'tf']
+
+    def run_after(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    @staticmethod
+    def update_placeholders(graph: Graph):
+        """
+        Iterates over all nodes of the graph, find all TF sub-graph call operations and updates placeholders shapes and adds
+        transpose operation if necessary.
+        :param graph: graph to operate on
+        :return: None
+        """
+        for node_name in graph.nodes():
+            node = Node(graph, node_name)
+            if node.kind == 'op' and node.has_valid('op') and node.op == 'TFCustomSubgraphCall':
+                CustomSubgraphCall.update_placeholder_shape_and_add_transpose(node)
+
+    @staticmethod
+    def update_placeholder_shape_and_add_transpose(node: Node):
+        """
+        The function changes placeholders shapes from NHWC to NCHW format and add transpose operations if needed.
+        :param node: node to operate on.
+        :return: None
+        """
+        import tensorflow as tf
+        from mo.front.common.layout import convert_shape, nhwc_to_nchw_permute, nchw_to_nhwc_permute
+        from mo.front.tf.extractors.utils import tf_tensor_shape
+        from mo.front.tf.partial_infer.tf import add_node_def_to_subgraph, update_input_in_pbs
+
+        tf.reset_default_graph()
+
+        inputs_replacements = list()
+
+        # transpose permutation constant
+        nchw_to_nhwc_constant = tf.constant(nchw_to_nhwc_permute, dtype=tf.int32, name=nchw_to_nhwc_constant_name)
+        nhwc_to_nchw_constant = tf.constant(nhwc_to_nchw_permute, dtype=tf.int32, name=nhwc_to_nchw_constant_name)
+
+        for placeholder_name in node['input_nodes_names']:
+            # dummy node which we can refer to as input in the transpose for the output node
+            # dummy node should be unique for each placeholder
+            dummy_node = tf.constant(value=[[[[1]]]], dtype=tf.float32, name='random_dummy_name_' + placeholder_name)
+
+            placeholder = node['pbs'][placeholder_name]
+            cur_shape = tf_tensor_shape(placeholder.attr['shape'].shape)
+            if len(cur_shape) == 4:  # TODO think about better check that transpose is required
+                nchw_shape = convert_shape(cur_shape, nhwc_to_nchw_permute)
+                for ind in range(len(cur_shape)):
+                    placeholder.attr['shape'].shape.dim[ind].size = nchw_shape[ind]
+                transpose_name = placeholder.name + '_transpose'
+                transpose = tf.transpose(dummy_node, nchw_to_nhwc_constant, transpose_name)  # NCHW -> NHWC
+
+                # add transpose operations to GraphDef after placeholders
+                add_node_def_to_subgraph(node, transpose.op.node_def, transpose_name, len(node['input_nodes_names']))
+                inputs_replacements.append((placeholder.name, transpose_name))
+                inputs_replacements.append((dummy_node.name, placeholder.name))
+                node['real_input_dims'].append(nchw_shape)
+            else:
+                node['real_input_dims'].append(cur_shape)
+        add_node_def_to_subgraph(node, nchw_to_nhwc_constant.op.node_def)
+        add_node_def_to_subgraph(node, nhwc_to_nchw_constant.op.node_def)
+
+        # update initial input names to a transposed ones
+        for old_input_tensor_name, new_name in inputs_replacements:
+            update_input_in_pbs(node, old_input_tensor_name, new_name)
+
+    @staticmethod
+    def add_output_nodes_transposes(graph: Graph):
+        """
+        Iterates over all nodes of the graph, find all TF sub-graph call operations and adds Transpose operations to the
+        output nodes if they are 4D to covert output from NHWC to NCHW.
+        :param graph: graph to operate on
+        :return: None
+        """
+        for node_name in graph.nodes():
+            node = Node(graph, node_name)
+            if node.kind == 'op' and node.has_valid('op') and node.op == 'TFCustomSubgraphCall':
+                CustomSubgraphCall.add_sub_graph_call_output_tensors_transposes(node)
+
+    @staticmethod
+    def make_shape_4d(shape: np.array):
+        """
+        Create 4D tensor from 1D, 2D or 3D by adding new dimensions of size 1.
+        :param shape: shape to extend.
+        :return: 4D tensor.
+        """
+        new_shape = int64_array(shape)
+        old_shape_len = len(shape)
+
+        for x in range(
+                4 - old_shape_len):  # TODO think about proper way to add additional dimensions considering layout
+            if len(
+                    new_shape) <= 1:  # if the shape is 0D or 1D then we should add additional dimensions to batch dimension
+                new_shape = np.insert(new_shape, 0, 1)
+            #            new_shape = np.array([1, shape[0], 1, 1])
+            else:
+                new_shape = np.insert(new_shape, 1, 1)
+        return new_shape
+
+    @staticmethod
+    def add_reshape_before_op_node(graph: Graph, data_node_name: str, op_node_name: str, edge_attrs: dict):
+        """
+        Adds reshape operation which expands dimension of the specified data tensor to 4D.
+        :param graph: graph to operate on.
+        :param data_node_name: the name of the data node to be reshaped to 4D tensor.
+        :param op_node_name: name of the TFCustomSubgraphCall node which produces the tensor.
+        :param edge_attrs: edge attributes which should be preserved.
+        :return: None
+        """
+        data_node = Node(graph, data_node_name)
+
+        graph.remove_edge(data_node_name, op_node_name)
+
+        assert data_node['shape'] is not None
+
+        new_shape = CustomSubgraphCall.make_shape_4d(data_node['shape'])
+
+        # reshape shape data node
+        reshape_shape_data_node_name = graph.unique_id("Reshape_shape_")
+        graph.add_node(reshape_shape_data_node_name, kind='data', precision="FP32", name=reshape_shape_data_node_name,
+                       value=new_shape, shape=[1])
+
+        # reshape operation node
+        reshape_node_name = graph.unique_id("Reshape_")
+        graph.add_node(reshape_node_name, kind='op', precision="FP32", type='Reshape', name=reshape_node_name,
+                       op='Reshape',
+                       data_type=data_node['data_type'])
+        update_ie_fields(graph.node[reshape_node_name])
+
+        # reshaped data node
+        reshaped_value = None
+        if data_node['value'] is not None:
+            reshaped_value = np.reshape(data_node['value'], new_shape)
+        reshaped_data_node_name = graph.unique_id("reshaped_data_")
+        graph.add_node(reshaped_data_node_name, kind='data', precision="FP32", name=reshaped_data_node_name,
+                       shape=new_shape, value=reshaped_value, nchw_layout=True)
+
+        graph.add_edges_from([
+            (data_node_name, reshape_node_name, {'in': 0}),
+            (reshape_shape_data_node_name, reshape_node_name, {'in': 1}),
+            (reshape_node_name, reshaped_data_node_name, {'out': 0}),
+            (reshaped_data_node_name, op_node_name, edge_attrs)
+        ])
+
+    @staticmethod
+    def add_reshape_after_data_node(graph: Graph, data_node_name: str):
+        """
+        Adds reshape operation which changes shape of the tensor produced by TFSubgraphCall from 4D to real dimension
+        of the tensor. The data_node_name node contains real dimensions of the tensor but they will be changed in the
+        add_reshapes_for_tf_subgraph_calls function to a 4D because IE TF call layer supports output in 4D only.
+        :param graph: graph to operate on.
+        :param data_node_name: name of the data node to be reshaped to correct dimensions.
+        :return: None
+        """
+        data_node = Node(graph, data_node_name)
+
+        # if the data node was previously marked as output then we need to mark as output new reshaped data node
+        is_out_node = False
+        if len(data_node.out_nodes()) == 1 and data_node.out_node().has('op') and data_node.out_node().op == 'OpOutput':
+            is_out_node = True
+            graph.remove_node(data_node.out_node().id)
+
+        # save old consumers nodes with edge attributes
+        old_consumer_nodes_with_attrs = list()
+        for index, out_op in enumerate(data_node.out_nodes()):
+            edge_attrs = graph.get_edge_data(data_node_name, out_op.name)[0]
+            old_consumer_nodes_with_attrs.append((out_op.name, edge_attrs))
+
+        # remove old consumers from the data node
+        for out_op in list(data_node.out_nodes()):
+            graph.remove_edge(data_node_name, out_op.name)
+
+        # reshape operation node
+        reshape_node_name = graph.unique_id("Reshape_")
+        graph.add_node(reshape_node_name, kind='op', precision="FP32", type='Reshape', name=reshape_node_name,
+                       op='Reshape',
+                       data_type=data_node['data_type'])
+        update_ie_fields(graph.node[reshape_node_name])
+
+        # reshape shape data node
+        reshape_shape_data_node_name = graph.unique_id("Reshape_shape_")
+        graph.add_node(reshape_shape_data_node_name, kind='data', precision="FP32", name=reshape_shape_data_node_name,
+                       value=np.array(data_node['shape']), shape=[1])
+
+        # reshaped data node
+        reshaped_value = None
+        if data_node['value'] is not None:
+            reshaped_value = np.array(data_node['value'])
+        reshaped_data_node_name = graph.unique_id("reshaped_data_")
+        graph.add_node(reshaped_data_node_name, kind='data', precision="FP32", name=reshaped_data_node_name,
+                       shape=np.array(data_node['shape']), value=reshaped_value, nchw_layout=True)
+
+        if is_out_node:
+            add_opoutput(graph, reshaped_data_node_name, 0, False)
+
+        graph.add_edges_from([
+            (data_node_name, reshape_node_name, {'in': 0}),
+            (reshape_shape_data_node_name, reshape_node_name, {'in': 1}),
+            (reshape_node_name, reshaped_data_node_name, {'out': 0}),
+        ])
+
+        for out_node_name, edge_attrs in old_consumer_nodes_with_attrs:
+            graph.add_edges_from([
+                (reshaped_data_node_name, out_node_name, edge_attrs)
+            ])
+
+    @staticmethod
+    def add_reshapes_for_tf_subgraph_calls(graph: Graph):
+        """
+        Input and output tensors of the TFCustomSubgraphCall must be 4D because IE layer accepts and produces only 4D
+        tensors. This function adds reshape operations where it is necessary.
+        :param graph: graph to operate on.
+        :return: None.
+        """
+        for src_node_name, dst_node_name, edge_attrs in list(graph.edges(data=True)):
+            src_node = Node(graph, src_node_name)
+            dst_node = Node(graph, dst_node_name)
+            if dst_node.kind == 'op' and dst_node.has_valid('type') and dst_node.type == 'TFCustomSubgraphCall' and \
+                    src_node.has_valid('shape') and len(src_node.shape) != 4:
+                log.info("There is an data tensor of shape '{}' which goes into '{}' node".format(
+                    src_node.shape, dst_node.type))
+                CustomSubgraphCall.add_reshape_before_op_node(graph, src_node_name, dst_node_name, edge_attrs)
+
+        for node_name in list(graph.nodes()):
+            node = Node(graph, node_name)
+            if node['kind'] == 'op' and node.has_and_set('type') and node.type == 'TFCustomSubgraphCall':
+                for index, data_node in node.out_nodes().items():
+                    real_dims_count = len(data_node.shape)
+                    if real_dims_count != 4:
+                        log.info(
+                            "There is an data tensor of shape '{}' with real dims count '{}' which goes out of '{}' "
+                            "node".format(data_node.shape, real_dims_count, node.name))
+                        CustomSubgraphCall.add_reshape_after_data_node(graph, data_node.id)
+
+                        # need to update shape of the op so IE generates XML with 4D tensors
+                        out_shape = CustomSubgraphCall.make_shape_4d(data_node['shape'])
+
+                        data_node['shape'] = out_shape
+
+    @staticmethod
+    def add_sub_graph_call_output_tensors_transposes(node: Node):
+        """
+        Adds transpose operations to the output nodes if they are 4D to change layout from NCHW to NHWC.
+        :param node: the node to add transposes to the output nodes to.
+        :return: None
+        """
+        import tensorflow as tf
+        from mo.front.tf.partial_infer.tf import get_subgraph_output_tensors, add_node_def_to_subgraph
+        _, output_tensors = get_subgraph_output_tensors(node)
+
+        # transpose permutation constant
+        nhwc_to_nchw_constant = tf.constant(nhwc_to_nchw_permute, dtype=tf.int32, name=nhwc_to_nchw_constant_name)
+
+        # dummy node which we can refer to as input in the transpose for the output node
+        dummy_node = tf.constant(value=[[[[1]]]], dtype=tf.float32, name='random_dummy_name')
+
+        new_out_tensor_names = list()
+        for out_tensor_name in node['output_tensors_names']:
+            out_name, out_port = out_tensor_name.split(':')
+            if len(output_tensors[
+                       int(out_port)].shape) == 4:  # TODO think about better check whether transpose is required
+                out_transpose_name = out_name + '_port_' + out_port + '_transpose'
+                transpose = tf.transpose(dummy_node, nhwc_to_nchw_constant, name=out_transpose_name)
+
+                # starting from TF 1.8 it is not possible to modify the "node_def" of the "tf.op", so we create a copy,
+                # update it and use further
+                new_input_names = transpose.op.node_def.input[:]
+                new_input_names[0] = out_tensor_name
+                new_node_def = copy.deepcopy(transpose.op.node_def)
+                new_node_def.input[:] = new_input_names
+                add_node_def_to_subgraph(node, new_node_def, position=len(node['nodes_order']))
+                new_out_tensor_names.append(out_transpose_name)
+            else:
+                new_out_tensor_names.append(out_tensor_name)
+
+        # update output tensor names with transposes operations
+        node['output_tensors_names'] = new_out_tensor_names
+
+    def find_and_replace_pattern(self, graph: Graph):
+        CustomSubgraphCall.update_placeholders(graph)
+        CustomSubgraphCall.add_output_nodes_transposes(graph)
+        CustomSubgraphCall.add_reshapes_for_tf_subgraph_calls(graph)
diff --git a/model-optimizer/extensions/middle/DecomposeBidirectionalRNNSequence.py b/model-optimizer/extensions/middle/DecomposeBidirectionalRNNSequence.py
new file mode 100644
index 000000000..5828c16fb
--- /dev/null
+++ b/model-optimizer/extensions/middle/DecomposeBidirectionalRNNSequence.py
@@ -0,0 +1,213 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.graph.graph import Node, Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.concat import Concat
+from mo.ops.op import Op
+from mo.ops.split import Split
+
+
+class DecomposeBidirectionalRNNSequence(MiddleReplacementPattern):
+    """
+        Decomposes bidirectional RNNSequence to forward and reverse RNNSequence ops.
+
+        Both initial state are split to two part, two parts of the results are concatenated.
+
+        Axis of split/concat is completely defined by ONNX recurrent layers specification.
+    """
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.MXNetRNNSequenceNormalize import MXNetRNNSequenceNormalize
+        from extensions.middle.ONNXRNNSequenceNormalize import ONNXRNNSequenceNormalize
+        return [ONNXRNNSequenceNormalize, MXNetRNNSequenceNormalize]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('lstm', dict(kind='op', type='RNNSequence', direction='bidirectional')),
+                ('input', dict(kind='data')),
+                ('W', dict(kind='data')),
+                ('R', dict(kind='data')),
+                ('B', dict(kind='data')),
+            ],
+            edges=[
+                ('input', 'lstm', {'in': 0}),
+                ('W', 'lstm', {'in': 1}),
+                ('R', 'lstm', {'in': 2}),
+                ('B', 'lstm', {'in': 3}),
+            ]
+        )
+
+    @staticmethod
+    def split_helper(node: Node, index: int, direction: str, axis: int=0):
+        return Op._create_data_node(
+            node.graph,
+            name=node.name + '/SplittedBiLSTM/{}/'.format(direction),
+            attrs={'value': np.take(node.value, [index], axis),
+                   'shape': np.array(np.take(node.value, [index], axis).shape, dtype=np.int64)}
+        )
+
+    def split_data(self, data: Node):
+        """ Helper. Split data node into two part along 0 axis """
+        assert len(data.shape) == 3
+        assert data.shape[0] == 2
+
+        output_data = [Op._create_data_node(data.graph,
+                       name=data.name + '/SplittedBiLSTM/{}'.format(['forward', 'reverse'][i])) for i in [0, 1]]
+        split_op = Split(data.graph, dict(name=data.name + '/DecomposedBiLSTM_0', axis=0, num_split=2,
+                                          out_ports_count=2))
+        return split_op.create_node_with_data([data], data_nodes=output_data)
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        bidirectional_cell = match['lstm']
+        new_init_hiddens = self.split_data(bidirectional_cell.in_node(5))
+        new_init_cells = self.split_data(bidirectional_cell.in_node(6)) if 6 in bidirectional_cell.in_nodes()\
+            else (None, None)
+
+        blob_bidirectional_split = lambda node: (
+            self.split_helper(node, 0, 'forward'),
+            self.split_helper(node, 1, 'reverse')
+        )
+
+        splitted_W = blob_bidirectional_split(bidirectional_cell.in_node(1))
+        splitted_R = blob_bidirectional_split(bidirectional_cell.in_node(2))
+        splitted_B = blob_bidirectional_split(bidirectional_cell.in_node(3))
+
+        outputs = self.split_bidirectional(
+            bidirectional_cell,
+            new_init_hiddens,
+            new_init_cells,
+            splitted_W,
+            splitted_R,
+            splitted_B,
+        )
+
+        self.concat_outputs(bidirectional_cell, outputs[0], outputs[1], bidirectional_cell.out_nodes())
+
+    @staticmethod
+    def get_new_cell(bidirectional_cell: Node, direction: str):
+        assert direction in ['forward', 'reverse']
+
+        cell_class = Op.get_op_class_by_name(bidirectional_cell.op)
+        new_cell = lambda graph, attrs: cell_class(graph, attrs)
+        attrs = bidirectional_cell.attrs().copy()
+        new_attrs = {
+            'direction': direction,
+            'name': bidirectional_cell.name + '/Split/' + direction,
+        }
+        attrs.update(new_attrs)
+        return new_cell(bidirectional_cell.graph, attrs)
+
+    def split_bidirectional(self,
+                            bidirectional_cell: Node,
+                            new_init_hiddens: list,
+                            new_init_cells: list,
+                            splitted_W: tuple,
+                            splitted_R: tuple,
+                            splitted_B: tuple):
+        """
+            Split one bidirectional RNNSequence node into 2 one-directional RNNSequence nodes.
+
+            All input data nodes should be already prepared; they are
+            have 2 in the num_dir dimension.
+        """
+        all_outputs = []
+        for i in [0, 1]:
+            direction = ['forward', 'reverse'][i]
+            op = self.get_new_cell(bidirectional_cell, direction)
+
+            output_data = Op._create_data_node(
+                bidirectional_cell.graph,
+                name=bidirectional_cell.out_node(0).name + '/Split/' + str(i),
+                attrs={'shape': bidirectional_cell.out_node(0).shape.copy()}
+            )
+
+            assert output_data.shape[1] == 2
+            output_data.shape[1] = 1
+
+            output_hidden = Op._create_data_node(
+                bidirectional_cell.graph,
+                name=bidirectional_cell.out_node(1).name + '/Split/' + str(i),
+                attrs={'shape': bidirectional_cell.out_node(1).shape.copy()}
+            )
+
+            assert output_hidden.shape[0] == 2
+            output_hidden.shape[0] = 1
+
+            data_nodes = [
+                output_data,
+                output_hidden,
+            ]
+
+            if bidirectional_cell.op == 'LSTM':
+                output_cell = Op._create_data_node(
+                    bidirectional_cell.graph,
+                    name=bidirectional_cell.out_node(2).name + '/Split/' + str(i),
+                    attrs={'shape': bidirectional_cell.out_node(2).shape.copy()}
+                )
+
+                assert output_cell.shape[0] == 2
+                output_cell.shape[0] = 1
+
+                data_nodes.append(output_cell)
+
+            all_outputs.append(
+                op.create_node_with_data(
+                    inputs=[
+                        bidirectional_cell.in_node(0),
+                        splitted_W[i],
+                        splitted_R[i],
+                        splitted_B[i],
+                        None,
+                        new_init_hiddens[i],
+                        new_init_cells[i] if bidirectional_cell.op == 'LSTM' else None,
+                    ],
+                    data_nodes=data_nodes
+                )
+            )
+        return all_outputs
+
+    @staticmethod
+    def concat_outputs(bi_rnn, forward_outputs, reverse_outputs, final_outputs):
+        """ Concatenates two set of outputs from bidirectiondl RNNSequence nodes """
+        concat_ops = [
+            Concat(bi_rnn.graph, {
+                'name': bi_rnn.name + '/FinalConcat/Data',
+                'axis': 1,
+                'in_ports_count': 2,
+            }),
+            Concat(bi_rnn.graph, {
+                'name': bi_rnn.name + '/FinalConcat/HiddenState',
+                'axis': 0,
+                'in_ports_count': 2,
+            }),
+            Concat(bi_rnn.graph, {
+                'name': bi_rnn.name + '/FinalConcat/CellState',
+                'axis': 0,
+                'in_ports_count': 2,
+            })
+        ]
+
+        bi_rnn.graph.remove_node(bi_rnn.id)
+
+        for i in final_outputs:
+            concat_ops[i].create_node_with_data(
+                [forward_outputs[i], reverse_outputs[i]],
+                data_nodes=[final_outputs[i]]
+            )
diff --git a/model-optimizer/extensions/middle/DeleteControlFlowEdges.py b/model-optimizer/extensions/middle/DeleteControlFlowEdges.py
new file mode 100644
index 000000000..4ae88a24c
--- /dev/null
+++ b/model-optimizer/extensions/middle/DeleteControlFlowEdges.py
@@ -0,0 +1,37 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class DeleteControlFlowEdges(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.PartialInfer import PartialInfer
+        return [PartialInfer]
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        for u, v, k, attrs in list(graph.edges(keys=True, data=True)):
+            if 'control_flow_edge' in attrs and attrs['control_flow_edge']:
+                graph.remove_edge(u, v, k)
+                log.debug('Removing control flow edge from {} to {}'.format(u, v))
diff --git a/model-optimizer/extensions/middle/DeleteNotExecutable.py b/model-optimizer/extensions/middle/DeleteNotExecutable.py
new file mode 100644
index 000000000..157cbe187
--- /dev/null
+++ b/model-optimizer/extensions/middle/DeleteNotExecutable.py
@@ -0,0 +1,42 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class DeleteNotExecutable(MiddleReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        from extensions.middle.TensorIteratorConditionChecker import ConditionChecks
+        return [ConditionChecks]
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        nodes_to_remove = set()
+        for node_name, node_attrs in list(graph.nodes(data=True)):
+            if node_attrs['kind'] == 'data' and 'executable' in node_attrs and not node_attrs['executable']:
+                [nodes_to_remove.add(op) for op, _ in graph.in_edges(node_name)]
+                nodes_to_remove.add(node_name)
+        log.debug('Removing the following not executable nodes: {}'
+                  ''.format('\n'.join(sorted(map(str, nodes_to_remove)))))
+        graph.remove_nodes_from(nodes_to_remove)
diff --git a/model-optimizer/extensions/middle/DepthToSpace.py b/model-optimizer/extensions/middle/DepthToSpace.py
index 1e05c8aca..6470b2397 100644
--- a/model-optimizer/extensions/middle/DepthToSpace.py
+++ b/model-optimizer/extensions/middle/DepthToSpace.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,12 +13,8 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-import logging as log
-from copy import deepcopy
-
-import networkx as nx
-
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.permute import Permute
 from mo.ops.reshape import Reshape
@@ -31,6 +27,14 @@ class DepthToSpace(MiddleReplacementPattern):
 
     enabled = True
 
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -43,7 +47,7 @@ class DepthToSpace(MiddleReplacementPattern):
                 ('op', 'out_data')
             ])
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         node = match['op']
 
         N, H, W, C = match['in_data'].shape
@@ -52,13 +56,17 @@ class DepthToSpace(MiddleReplacementPattern):
         graph.remove_edge(match['in_data'].id, match['op'].id)
         graph.remove_edge(match['op'].id, match['out_data'].id)
 
-        dim_6D = [N, block_size, block_size, int(C / (block_size ** 2)), H, W]
-        order_6D = [0, 3, 4, 1, 5, 2]
-        dim_4D = [N, int(H * block_size), int(W * block_size), int(C / (block_size ** 2))]
-
-        reshape_data_node = Reshape(graph=graph, attrs={'name': match['op'].id + '/Reshape_to_6D', 'dim': dim_6D}).create_node_with_data([match['in_data']])
-        permute_data_node = Permute(graph=graph, attrs={'name': match['op'].id + '/Permute', 'order': order_6D}).create_node_with_data([reshape_data_node])
-        reshape_node = Reshape(graph=graph, attrs={'infer': None, 'name': match['op'].id + '/Reshape_to_4D', 'dim': dim_4D}).create_node_with_data([permute_data_node], data_nodes=[match['out_data']])
+        dim_6D = int64_array([N, block_size, block_size, int(C / (block_size ** 2)), H, W])
+        order_6D = int64_array([0, 3, 4, 1, 5, 2])
+        dim_4D = int64_array([N, int(H * block_size), int(W * block_size), int(C / (block_size ** 2))])
+
+        reshape_data_node = Reshape(graph=graph, attrs={'name': match['op'].id + '/Reshape_to_6D',
+                                                        'dim': dim_6D}).create_node_with_data([match['in_data']])
+        permute_data_node = Permute(graph=graph, attrs={'name': match['op'].id + '/Permute',
+                                                        'order': order_6D}).create_node_with_data([reshape_data_node])
+        reshape_node = Reshape(graph=graph, attrs={'name': match['op'].id + '/Reshape_to_4D',
+                                                   'dim': dim_4D}).create_node_with_data([permute_data_node],
+                                                                                         data_nodes=[match['out_data']])
 
         reshape_data_node.in_node()['nchw_layout'] = True
         reshape_data_node['nchw_layout'] = True
diff --git a/model-optimizer/extensions/middle/DilatedConvolution.py b/model-optimizer/extensions/middle/DilatedConvolution.py
new file mode 100644
index 000000000..11776241d
--- /dev/null
+++ b/model-optimizer/extensions/middle/DilatedConvolution.py
@@ -0,0 +1,89 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class DilatedConvolutionConverter(MiddleReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('conv', dict(kind='op', op=lambda value: value in ['Conv2D', 'DepthwiseConv2dNative', 'Conv3D'])),
+                ('space_to_batch', dict(kind='op', op='SpaceToBatchND')),
+                ('batch_to_space', dict(kind='op', op='BatchToSpaceND')),
+                ('input', dict(kind='data')),
+                ('output', dict(kind='data')),
+                ('conv_output', dict(kind='data')),
+                ('stb_output', dict(kind='data')),
+                ('stb_bs', dict(kind='data')),
+                ('stb_pad', dict(kind='data')),
+                ('bts_bs', dict(kind='data')),
+                ('bts_crop', dict(kind='data'))
+            ],
+            edges=[
+                ('input', 'space_to_batch', {'in': 0}),
+                ('stb_bs', 'space_to_batch', {'in': 1}),
+                ('stb_pad', 'space_to_batch', {'in': 2}),
+                ('space_to_batch', 'stb_output', {'out': 0}),
+                ('stb_output', 'conv', {'in': 0}),
+                ('conv', 'conv_output', {'out': 0}),
+                ('conv_output', 'batch_to_space', {'in': 0}),
+                ('bts_bs', 'batch_to_space', {'in': 1}),
+                ('bts_crop', 'batch_to_space', {'in': 2}),
+                ('batch_to_space', 'output', {'out': 0}),
+            ])
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        conv = match['conv']
+        stb = match['space_to_batch']
+        bts = match['batch_to_space']
+
+        block_size = match['stb_bs']
+
+        input = match['input']
+        output = match['output']
+        stb_out = match['stb_output']
+        conv_out = match['conv_output']
+
+        in_edge_attrs = graph.get_edge_data(input.id, stb.id)[0]
+        out_edge_attrs = graph.get_edge_data(bts.id, output.id)[0]
+
+        graph.remove_edge(input.id, stb.id)
+        graph.remove_edge(stb_out.id, conv.id)
+        graph.remove_edge(conv.id, conv_out.id)
+        graph.remove_edge(bts.id, output.id)
+
+        conv.dilation[conv.spatial_dims] = block_size.value
+
+        pad = match['stb_pad'].value - match['bts_crop'].value
+        conv.pad[conv.spatial_dims] = [[pad[x][0], pad[x][1]] for x in range(len(pad))]
+        conv['auto_pad'] = None
+
+        graph.add_edges_from([
+            (input.id, conv.id, {'in': 0, **in_edge_attrs}),
+            (conv.id, output.id, {'out': 0, **out_edge_attrs}),
+        ])
diff --git a/model-optimizer/extensions/middle/EltwiseChecker.py b/model-optimizer/extensions/middle/EltwiseChecker.py
index 751f5c7aa..abbcd8baf 100644
--- a/model-optimizer/extensions/middle/EltwiseChecker.py
+++ b/model-optimizer/extensions/middle/EltwiseChecker.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,18 +14,10 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
-from copy import deepcopy
 
-from extensions.middle.EltwiseInputNormalization import EltwiseInputNormalize
-from extensions.middle.EltwiseInputReshape import EltwiseInputReshape, Eltwise1DInputReshape
-from mo.front.common.layout import get_features_dim, shape_for_layout
-from mo.graph.graph import Node, get_sorted_inputs
-from mo.middle.passes.fusing.helpers import get_value_id
+from mo.graph.graph import Node, Graph
 from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
-from mo.ops.reshape import Reshape
 
 
 class EltwiseChecker(MiddleReplacementPattern):
@@ -33,12 +25,17 @@ class EltwiseChecker(MiddleReplacementPattern):
     enabled = True
 
     def run_after(self):
+        from extensions.middle.EltwiseInputReshape import Eltwise1DInputReshape
         return [Eltwise1DInputReshape]
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
+    def find_and_replace_pattern(self, graph: Graph):
         eltwise_nodes = [Node(graph, node) for node in graph.node if Node(graph, node).soft_get('type') == 'Eltwise']
         for node in eltwise_nodes:
-            raw_inputs = [(inp, attr) for inp, attr in get_sorted_inputs(node)
+            raw_inputs = [(inp, attr) for inp, attr in node.get_sorted_inputs()
                           if 'control_flow_edge' not in attr or not attr['control_flow_edge']]
             shapes = [node.graph.node[inp]['shape'] for inp, attr in raw_inputs]
 
diff --git a/model-optimizer/extensions/middle/EltwiseInputNormalization.py b/model-optimizer/extensions/middle/EltwiseInputNormalization.py
index 6f5687e6c..c7fe206de 100644
--- a/model-optimizer/extensions/middle/EltwiseInputNormalization.py
+++ b/model-optimizer/extensions/middle/EltwiseInputNormalization.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,22 +16,17 @@
 
 import networkx as nx
 import numpy as np
-from copy import deepcopy
 
 from extensions.middle.EltwiseInputReshape import EltwiseInputReshape
-from mo.front.common.layout import get_features_dim, shape_for_layout
-from mo.graph.graph import Node
-from mo.middle.passes.fusing.helpers import get_value_id
+from mo.graph.graph import Node, Graph
 from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
-from mo.ops.reshape import Reshape
 
 
 class EltwiseInputNormalize(EltwiseInputReshape, MiddleReplacementPattern):
     # This pass should be called directly from pipeline before layout change and other permutations
     enabled = False
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def find_and_replace_pattern(self, graph: Graph):
         eltwise_nodes = [Node(graph, node) for node in graph.node if Node(graph, node).soft_get('type') == 'Eltwise']
         # Iterating over all Eltwise operations and check that every input has similar shape
         # in case of different shapes, we inserts new_shape attribute and then call EltwiseInputReshape extension
diff --git a/model-optimizer/extensions/middle/EltwiseInputNormalization_test.py b/model-optimizer/extensions/middle/EltwiseInputNormalization_test.py
index 829b13be6..1608d21c4 100644
--- a/model-optimizer/extensions/middle/EltwiseInputNormalization_test.py
+++ b/model-optimizer/extensions/middle/EltwiseInputNormalization_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/EltwiseInputReshape.py b/model-optimizer/extensions/middle/EltwiseInputReshape.py
index 74ff0695e..70647af34 100644
--- a/model-optimizer/extensions/middle/EltwiseInputReshape.py
+++ b/model-optimizer/extensions/middle/EltwiseInputReshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,11 +14,12 @@
  limitations under the License.
 """
 
-import networkx as nx
-import numpy as np
 from copy import deepcopy
+
+import numpy as np
+
 from mo.front.common.layout import get_features_dim, shape_for_layout
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.middle.passes.fusing.helpers import get_value_id
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.op import Op
@@ -46,7 +47,7 @@ class Eltwise1DInputReshape(MiddleReplacementPattern):
     def run_after(self):
         return [EltwiseInputReshape]
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def find_and_replace_pattern(self, graph: Graph):
         layout = graph.graph['layout']
         for n in list(graph.nodes()):
             if 'type' in graph.node[n] and graph.node[n]['type'] == 'Eltwise' and get_value_id(Node(graph, n)) is None:
@@ -68,7 +69,11 @@ class Eltwise1DInputReshape(MiddleReplacementPattern):
 class EltwiseInputReshape(MiddleReplacementPattern):
     enabled = True
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def find_and_replace_pattern(self, graph: Graph):
         data_nodes = [Node(graph, node) for node in graph.node if Node(graph, node).kind == 'data']
         for node in data_nodes:
             # Get all requested shapes for current node
@@ -113,4 +118,4 @@ class EltwiseInputReshape(MiddleReplacementPattern):
 
                         # Reconnect edge from original data node to Reshape output datanode
                         graph.remove_edge(node.id, consumer.id)
-                        graph.add_edge(reshape_data.id, consumer.id, **edge_attrs)
-\ No newline at end of file
+                        graph.add_edge(reshape_data.id, consumer.id, **edge_attrs)
diff --git a/model-optimizer/extensions/middle/EltwiseInputReshape_test.py b/model-optimizer/extensions/middle/EltwiseInputReshape_test.py
index 24c727d72..abf379018 100644
--- a/model-optimizer/extensions/middle/EltwiseInputReshape_test.py
+++ b/model-optimizer/extensions/middle/EltwiseInputReshape_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/FusePermutesSequence.py b/model-optimizer/extensions/middle/FusePermutesSequence.py
index ea5c1c14d..a230c6bdb 100644
--- a/model-optimizer/extensions/middle/FusePermutesSequence.py
+++ b/model-optimizer/extensions/middle/FusePermutesSequence.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,12 +13,10 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-import networkx as nx
 import numpy as np
 
 from extensions.middle.ConvertLayoutDependentOperations import ConvertLayoutDependentOperations
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.middle.passes.eliminate import merge_data_nodes, graph_clean_up_tf
 from mo.middle.passes.fusing.helpers import get_next_operation
 from mo.middle.replacement import MiddleReplacementPattern
@@ -32,11 +30,12 @@ class FusePermutesSequence(MiddleReplacementPattern):
     """
 
     enabled = True
+    graph_condition = [lambda graph: graph.graph['fw'] != 'caffe']
 
     def run_after(self):
         return [ConvertLayoutDependentOperations]
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def find_and_replace_pattern(self, graph: Graph):
         for node in list(graph.nodes()):
             if node not in graph.nodes():
                 continue
diff --git a/model-optimizer/extensions/middle/FusePermutesSequence_test.py b/model-optimizer/extensions/middle/FusePermutesSequence_test.py
index 850cf1740..d852186a2 100644
--- a/model-optimizer/extensions/middle/FusePermutesSequence_test.py
+++ b/model-optimizer/extensions/middle/FusePermutesSequence_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 import unittest
 
 import numpy as np
+from argparse import Namespace
 
 from extensions.middle.FusePermutesSequence import FusePermutesSequence
 from mo.middle.passes.eliminate_test import build_graph
@@ -38,6 +39,7 @@ nodes_attributes = {
 
     'permute_3': {'type': 'Permute', 'value': None, 'kind': 'op', 'op': 'Permute'},
     'permute_3_data': {'value': None, 'shape': None, 'kind': 'data'},
+    'op_output': { 'op': 'OpOutput', 'kind': 'op'}
 }
 
 
@@ -52,7 +54,8 @@ class FusePermutesSequenceTest(unittest.TestCase):
                              ('placeholder_1_data', 'permute_1'),
                              ('permute_1', 'permute_1_data'),
                              ('permute_1_data', 'permute_2'),
-                             ('permute_2', 'permute_2_data')
+                             ('permute_2', 'permute_2_data'),
+                             ('permute_2_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
 
@@ -60,14 +63,18 @@ class FusePermutesSequenceTest(unittest.TestCase):
                              'permute_1_data': {'shape': np.array([1, 3, 227, 227])},
 
                              'permute_2': {'order': np.array([0, 2, 3, 1])},
-                             'permute_2_data': {'shape': np.array([1, 227, 227, 3]), 'is_output': True},
+                             'permute_2_data': {'shape': np.array([1, 227, 227, 3])},
                              }, nodes_with_edges_only=True)
 
         graph.graph['layout'] = 'NHWC'
+        graph.graph['cmd_params'] = Namespace(keep_shape_ops=False)
 
         graph_ref = build_graph(nodes_attributes,
-                            [('placeholder_1', 'placeholder_1_data')],
-                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])}}, nodes_with_edges_only=True)
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])}},
+                                nodes_with_edges_only=True)
 
         pattern = FusePermutesSequence()
         pattern.find_and_replace_pattern(graph)
@@ -84,7 +91,8 @@ class FusePermutesSequenceTest(unittest.TestCase):
                              ('placeholder_1_data', 'permute_1'),
                              ('permute_1', 'permute_1_data'),
                              ('permute_1_data', 'permute_2'),
-                             ('permute_2', 'permute_2_data')
+                             ('permute_2', 'permute_2_data'),
+                             ('permute_2_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
 
@@ -92,20 +100,22 @@ class FusePermutesSequenceTest(unittest.TestCase):
                              'permute_1_data': {'shape': np.array([1, 3, 227, 227])},
 
                              'permute_2': {'order': np.array([0, 1, 2, 3])},
-                             'permute_2_data': {'shape': np.array([1, 3, 227, 227]), 'is_output': True},
+                             'permute_2_data': {'shape': np.array([1, 3, 227, 227])},
                              }, nodes_with_edges_only=True)
 
         graph.graph['layout'] = 'NHWC'
+        graph.graph['cmd_params'] = Namespace(keep_shape_ops=False)
 
         graph_ref = build_graph(nodes_attributes,
-                            [('placeholder_1', 'placeholder_1_data'),
-                             ('placeholder_1_data', 'permute_1'),
-                             ('permute_1', 'permute_1_data'),
-                             ],
-                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
-                             'permute_1': {'order': np.array([0, 3, 1, 2])},
-                             'permute_1_data': {'shape': np.array([1, 3, 227, 227])},
-                             }, nodes_with_edges_only=True)
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'permute_1'),
+                                 ('permute_1', 'permute_1_data'),
+                                 ('permute_1_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'permute_1': {'order': np.array([0, 3, 1, 2])},
+                                 'permute_1_data': {'shape': np.array([1, 3, 227, 227])},
+                                 }, nodes_with_edges_only=True)
 
         pattern = FusePermutesSequence()
         pattern.find_and_replace_pattern(graph)
diff --git a/model-optimizer/extensions/middle/FusedBatchNormNonConstant.py b/model-optimizer/extensions/middle/FusedBatchNormNonConstant.py
index 6b0ed8e42..d3a84ab46 100644
--- a/model-optimizer/extensions/middle/FusedBatchNormNonConstant.py
+++ b/model-optimizer/extensions/middle/FusedBatchNormNonConstant.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,9 +13,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-import networkx as nx
-
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.eltwise import Eltwise
 from mo.ops.power import Power
@@ -30,6 +28,14 @@ class FusedBatchNormNonConstant(MiddleReplacementPattern):
 
     enabled = True
 
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -37,7 +43,7 @@ class FusedBatchNormNonConstant(MiddleReplacementPattern):
             edges=[]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         node = match['op']
         if (node.data_format != b'NHWC' or
                 len(node.in_nodes()) != 5 or
diff --git a/model-optimizer/extensions/middle/FusedBatchNormTrainingCatch.py b/model-optimizer/extensions/middle/FusedBatchNormTrainingCatch.py
index 90fedc9fb..93749e332 100644
--- a/model-optimizer/extensions/middle/FusedBatchNormTrainingCatch.py
+++ b/model-optimizer/extensions/middle/FusedBatchNormTrainingCatch.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  limitations under the License.
 """
 
-import networkx as nx
-
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.utils.error import Error
 
@@ -30,6 +29,14 @@ class FusedBatchNormTrainingCatch(MiddleReplacementPattern):
     enabled = True
     replacement_id = "Fused_Batch_Norm_is_training_true_catcher"
 
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -37,5 +44,5 @@ class FusedBatchNormTrainingCatch(MiddleReplacementPattern):
             edges=[]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         raise Error('FusedBatchNorm doesn\'t support is_training=True. Node {}'.format(match['op'].id))
diff --git a/model-optimizer/extensions/middle/GRURNNSequenceToTensorIterator.py b/model-optimizer/extensions/middle/GRURNNSequenceToTensorIterator.py
new file mode 100644
index 000000000..1b2bdc6e2
--- /dev/null
+++ b/model-optimizer/extensions/middle/GRURNNSequenceToTensorIterator.py
@@ -0,0 +1,223 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.ops.tensor_iterator import TensorIterator
+from mo.graph.graph import Graph, add_opoutput
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.op import Op
+from mo.ops.reshape import Reshape
+
+
+class GRUAndRNNToTensorIterator(MiddleReplacementPattern):
+    """ Converts normalized RNNSequence with op=GRU/RNN to TensorIterator.
+
+        Normalized RNNSequence means that it should be processed by
+        RNNSequenceNormalize transform that ensures its strict form.
+
+        This transformation builds an alternative sub-graph for GRUSequence
+        with TensorIterator connected in the same way as an original GRUSequence
+        node and with internal body represented as GRUCell op node with necessary
+        squeezes and unsqueezes around.
+    """
+
+    enabled = True
+    id = 'gru_and_rnn_to_tensor_iterator'
+
+    def run_after(self):
+        from extensions.middle.RNNSequenceNormalizeToIE import RNNSequenceNormalize
+        return [RNNSequenceNormalize]
+
+    def run_before(self):
+        from extensions.middle.FusePermutesSequence import FusePermutesSequence
+        return [FusePermutesSequence]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('rnn_layer', dict(kind='op', type='RNNSequence')),
+                ('input', dict(kind='data')),
+                ('weights', dict(kind='data')),
+                ('biases', dict(kind='data')),
+                # don't capture optional input initial states here
+                ('output', dict(kind='data')),
+                # don't capture optional output last states here
+            ],
+            edges=[
+                ('input', 'rnn_layer', {'in': 0}),
+                ('weights', 'rnn_layer', {'bin': 'weights', 'in': 1}),
+                ('biases', 'rnn_layer', {'bin': 'biases', 'in': 2}),
+                ('rnn_layer', 'output', {'out': 0}),
+            ]
+        )
+
+    @staticmethod
+    def get_rnn_cell(name: str):
+        op = Op.get_op_class_by_name(name + 'Cell')
+        return op
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        if match['rnn_layer']['op'] == 'LSTM':
+            return
+
+        rnn_layer = match['rnn_layer']
+
+        # Build TensorIterator body first
+        body = Graph(name=rnn_layer.name + '/sub_graph')
+        body.graph = graph.graph
+
+        # 1. Input squeeze Reshape
+        inputs = [Op._create_data_node(body, rnn_layer.name + '/inport/' + str(inp),
+                                       {'shape': rnn_layer.in_node(inp).shape.copy(),
+                                        'value': rnn_layer.in_node(inp).value.copy()
+                                        if rnn_layer.in_node(inp).value is not None and inp in [1, 2] else None})
+                  for inp in [0, 4, 1, 2]]  # X, h_init, WR, B
+
+        inputs[0].shape[rnn_layer.sequence_dim] = 1
+        reshape_dim = inputs[0].shape.copy()
+        reshape_dim[rnn_layer.batch_dim] = -1
+        reshape_dim = np.delete(reshape_dim, rnn_layer.sequence_dim)
+        input_squeeze = Reshape(
+            body,
+            dict(name=rnn_layer.name + '/input_squeeze', internal_layer_id=0, dim=reshape_dim)
+        )
+        inputs[0] = input_squeeze.create_node_with_data([inputs[0]], edge_attrs=[{'internal_port_id': 0}])
+
+        # 2. Output unsqueeze Reshape
+        outputs = [Op._create_data_node(body, rnn_layer.name + '/outport/' + str(out),
+                                        {'shape': rnn_layer.out_node(out).shape.copy() if out in rnn_layer.out_nodes() else None})
+                                        for out in [0]]
+        for out in outputs:
+            add_opoutput(body, out.id, 0, False)
+
+        unsqueezed_output_shape = outputs[0].shape.copy()
+        unsqueezed_output_shape[rnn_layer.sequence_dim] = 1
+        squeezed_output_shape = np.delete(unsqueezed_output_shape, rnn_layer.sequence_dim)
+        outputs[0].shape = squeezed_output_shape
+        unsqueezed_output_shape[rnn_layer.batch_dim] = -1
+        output_unsqueeze = Reshape(body, dict(name=rnn_layer.name + '/output_unsqueeze/', dim=unsqueezed_output_shape,
+                                              internal_layer_id=2))
+
+        additional_attrs = dict(activations=rnn_layer.activations,
+                                activation_alpha=rnn_layer.activation_alpha,
+                                activation_beta=rnn_layer.activation_beta,
+                                clip=rnn_layer.clip)
+        if rnn_layer.op == 'GRU':
+            additional_attrs['linear_before_reset'] = rnn_layer.linear_before_reset
+
+        # 3. ***Cell
+        rnn_cell_op = self.get_rnn_cell(rnn_layer['op'])(body, dict(hidden_size=rnn_layer.hidden_size,
+                                                                    name=rnn_layer.name + '/{}Cell'.format(rnn_layer.op),
+                                                                    **additional_attrs,
+                                                                    internal_layer_id=1))
+
+        gru_cell = rnn_cell_op.create_node_with_data(inputs, data_nodes=outputs,
+                                                             edge_attrs=[{}, {'internal_port_id': 1},
+                                                                        {'internal_port_id': 2}, {'bin': 'weights'},
+                                                                        {'bin': 'biases'}])
+
+        # internal ports for outputs of cell
+        gru_cell.in_node().out_edge(0)['internal_port_id'] = 4  # h_state
+
+        gru_cell = output_unsqueeze.create_node_with_data([gru_cell])
+        gru_cell.in_node().out_edge(0)['internal_port_id'] = 3
+        add_opoutput(body, gru_cell.id, 0, False)
+
+        # 4. TensorIterator layer creating
+        assert rnn_layer.direction in ['forward', 'reverse']
+        if rnn_layer.direction == 'forward':
+            stride = 1
+            start = None
+            end = None
+        else:
+            assert rnn_layer.direction == 'reverse'
+            stride = -1
+            start = -1
+            end = 0
+
+        # stacked h_state
+        output_port_map = [{
+            'external_port_id': 3,
+            'internal_layer_id': 2,
+            'internal_port_id': 3,
+
+            'axis': rnn_layer.sequence_dim,
+            'stride': stride,
+            'start': start,
+            'end': end,
+            'part_size': 1,
+        }]
+
+        # Adding last h_state to outputs
+        if len(rnn_layer.out_nodes()) == 2:
+            output_port_map.extend([{
+                'external_port_id': 4,
+                'internal_layer_id': 1,
+                'internal_port_id': 4,
+            }])
+
+        ti_op = TensorIterator(graph, {
+            'name': rnn_layer.name + '/TensorIterator',
+            'body': body,
+            'in_ports_count': 4,
+            'out_ports_count': len(rnn_layer.out_nodes()),
+
+            'input_port_map': [
+                {
+                    'external_port_id': 0,
+                    'internal_layer_id': 0,
+                    'internal_port_id': 0,
+
+                    'axis': rnn_layer.sequence_dim,
+                    'stride': stride,
+                    'start': start,
+                    'end': end,
+                    'part_size': 1,
+                },
+                {
+                    'external_port_id': 1,
+                    'internal_layer_id': 1,
+                    'internal_port_id': 1,
+                },
+            ],
+
+            'output_port_map': output_port_map,
+            # only for h state
+            'back_edges': [
+                {
+                    'from_layer': 1,
+                    'from_port': 4,
+                    'to_layer': 1,
+                    'to_port': 1,
+                },
+            ]
+        })
+
+        assert sorted(rnn_layer.out_nodes().keys()) == list(range(len(rnn_layer.out_nodes()))), \
+            "There are gaps in output ports of GRUSequence operation. Node {}".format(rnn_layer.id)
+
+        outs = ti_op.create_node_with_data([rnn_layer.in_node(i) for i in [0, 4]],  # X, h_init
+                                           data_nodes=[rnn_layer.out_node(i) for i in range(len(rnn_layer.out_nodes()))],
+                                           edge_attrs=[{'external_port_id': 0}, {'external_port_id': 1}])
+
+        if not isinstance(outs, list):
+            outs = list([outs])
+
+        graph.remove_node(rnn_layer.id)
+        outs[0].in_edge(0)['external_port_id'] = 3
+        for i, out in enumerate(outs[1:]):
+            external_port_id = 4 + i
+            out.in_edge()['external_port_id'] = external_port_id
diff --git a/model-optimizer/extensions/middle/GatherNdNormalizer.py b/model-optimizer/extensions/middle/GatherNdNormalizer.py
new file mode 100644
index 000000000..08dd6dc6f
--- /dev/null
+++ b/model-optimizer/extensions/middle/GatherNdNormalizer.py
@@ -0,0 +1,100 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import logging as log
+
+import numpy as np
+
+from extensions.ops.gather import Gather
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.const import Const
+from mo.ops.reshape import Reshape
+
+
+class GatherNdNormalize(MiddleReplacementPattern):
+    """
+    Hot fix for new speech-to-text model enabling while GatherND is not implemented in IE.
+    We can replace GatherNd to Reshape + Gather in case when GatherNd indices have just one
+    meaningful dimension.
+    """
+    enabled = True
+    force_clean_up = True
+
+    def run_before(self):
+        from extensions.middle.BlockLSTMtoLSTMSequence import BlockLSTMtoLSTMSequence
+        return [BlockLSTMtoLSTMSequence]
+
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def pattern(self):
+        return dict(
+            nodes=[('GatherNd', dict(kind='op', op='GatherNd'))],
+            edges=[]
+        )
+
+    @staticmethod
+    def indices_check(indices: np.array, input_shape: tuple):
+        """
+        Check that indices have just one meaningful dimension and all other dimensions of input have size 1.
+        """
+        n_dims = indices.shape[-1]
+        non_zero = None
+        for i in range(n_dims):
+            if not all(np.take(indices, indices=[i], axis=-1) == 0):
+                if non_zero is None:
+                    non_zero = i
+                else:
+                    return None
+            else:
+                if input_shape[i] != 1:
+                    return None
+        return non_zero
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        gather = match['GatherNd']
+        input_shape = gather.in_node(0).shape
+        indices = gather.in_node(1).value
+        if indices is None:
+            # We can't do such special pass without indices value
+            return
+
+        # 0. All needed checks that we can replace GatherNd by Gather
+        gather_idx = self.indices_check(indices, input_shape)
+        if gather_idx is None:
+            log.warning('Node {} with op=GatherNd  can\'t be normalized to op=Gather.'.format(gather.name))
+            return
+
+        # 1. Add Reshape and connect
+        new_shape = int64_array([-1] + list(input_shape[indices.shape[-1]:]))
+        reshape = Reshape(graph, {'name': gather.name + '/Reshape_for_GatherNd/', 'dim': new_shape, }).create_node()
+        gather.in_port(0).get_connection().set_destination(reshape.in_port(0))
+
+        # 2. Change indices from Nd to 1d:
+        new_indices = np.reshape(np.take(indices, indices=[gather_idx], axis=-1), [-1])
+        new_indices_const = Const(graph, dict(value=new_indices)).create_node()
+
+        # 3. Create new Gather operation and reconnect all inputs/outputs
+        new_gather = Gather(graph, {'name': gather.name + '/NewGather/', 'axis': 0}).create_node()
+        reshape.out_port(0).connect(new_gather.in_port(0))
+        new_indices_const.out_port(0).connect(new_gather.in_port(1))
+
+        gather.out_port(0).get_connection().set_source(new_gather.out_port(0))
+
+        # 4. Remove old Gather node
+        graph.remove_node(gather.id)
diff --git a/model-optimizer/extensions/middle/GemmResolver.py b/model-optimizer/extensions/middle/GemmResolver.py
index 29a39b9d2..edef22ae6 100644
--- a/model-optimizer/extensions/middle/GemmResolver.py
+++ b/model-optimizer/extensions/middle/GemmResolver.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,27 +14,31 @@
  limitations under the License.
 """
 
-import networkx as nx
-
-from extensions.middle.NormalizeFullyConnected import NormalizeFullyConnected
 from mo.front.common.partial_infer.utils import mark_input_bins, assign_dims_to_weights, int64_array
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.op import PermuteAttrs
 
 
 class GemmResolver(MiddleReplacementPattern):
     enabled = True
+    graph_condition = [lambda graph: graph.graph['fw'] != 'tf']
 
     def run_before(self):
+        from extensions.middle.NormalizeFullyConnected import NormalizeFullyConnected
         return [NormalizeFullyConnected]
 
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
     def pattern(self):
         return dict(
             nodes=[
-                   ('input_0', dict(kind='data')),
-                   ('input_1', dict(kind='data')),
-                   ('fc', dict(op='MatMul')),
-                   ('fc_data', dict(kind='data'))],
+                ('input_0', dict(kind='data')),
+                ('input_1', dict(kind='data')),
+                ('fc', dict(op='MatMul')),
+                ('fc_data', dict(kind='data'))],
             edges=[
                 ('input_0', 'fc', {'in': 0}),
                 ('input_1', 'fc', {'in': 1}),
@@ -42,9 +46,10 @@ class GemmResolver(MiddleReplacementPattern):
             ]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         if not match['input_0'].has_valid('value') and not match['input_1'].has_valid('value') or \
-                not match['input_0'].has_valid('value') and match['input_1'].has_valid('value') and match['input_1'].shape.size > 2:
+                not match['input_0'].has_valid('value') and match['input_1'].has_valid('value') and match[
+            'input_1'].shape.size > 2:
             match['fc']['type'] = 'GEMM'
         elif not match['input_0'].has_valid('value') and match['input_1'].has_valid('value'):
             match['fc']['type'] = 'FullyConnected'
@@ -57,6 +62,3 @@ class GemmResolver(MiddleReplacementPattern):
             weights_shape = weights_node.shape
 
             node['out-size'] = weights_shape[1]
-
-
-
diff --git a/model-optimizer/extensions/middle/GemmToFullyConnected.py b/model-optimizer/extensions/middle/GemmToFullyConnected.py
new file mode 100644
index 000000000..1cba6b4c9
--- /dev/null
+++ b/model-optimizer/extensions/middle/GemmToFullyConnected.py
@@ -0,0 +1,88 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+import numpy as np
+
+from typing import Dict
+from mo.front.common.partial_infer.utils import assign_dims_to_weights
+from mo.graph.graph import Graph, Node
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.lin_op import Add
+
+
+class GemmToFullyConnected(MiddleReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['fw'] == 'onnx']
+
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('gemm', dict(kind='op', op='Gemm')),
+                ('output', dict(kind='data'))],
+            edges=[('gemm', 'output')]
+        )
+
+    def replace_pattern(self, graph: Graph, match: Dict[str, Node]):
+        log.debug('GemmToFullyConnected is triggered')
+        gemm = match['gemm']
+        A = gemm.in_node(0)
+        B = gemm.in_node(1)
+        B_consumers = graph.out_edges(B.node)
+        C = gemm.in_node(2)
+
+        if not (B.value is not None and
+                C.value is not None and
+                A.shape is not None and
+                not gemm.transpose_a and
+                (len(B_consumers) == 1 or not gemm.transpose_b)):
+            log.warning('Cannot convert Gemm to FullyConnected')
+            return
+
+        if gemm.transpose_b:
+            # B.value = B.value.transpose()
+            # B.shape = np.array(B.value.shape, dtype=np.int64)
+            gemm.transpose_b = 0
+        else:
+            B.value = B.value.transpose()
+            B.shape = np.array(B.value.shape, dtype=np.int64)
+
+        gemm['out-size'] = gemm.out_port(0).data.get_shape()[-1]
+        gemm['type'] = 'FullyConnected'
+        gemm['channel_dims'] = len(match['output'].shape) - 1
+        gemm['bias_addable'] = True
+        gemm['input_channel_dim'] = 1  # MatMul weights in IO
+        gemm['output_channel_dim'] = 0
+        gemm['layout'] = 'NCHW'
+
+        gemm.in_port(1).bin = 'weights'
+
+        bias_node = Add(graph, {}).create_node()
+        gemm.out_port(0).get_connection().set_source(bias_node.out_port(0))
+        gemm.in_port(2).get_connection().set_destination(bias_node.in_port(1))
+        gemm.out_port(0).connect(bias_node.in_port(0))
+
+        assign_dims_to_weights(gemm.in_node(1), None, 1, 0, 2)
+        # Do not transpose weights in this pass, it will be done as a separate pass
diff --git a/model-optimizer/extensions/middle/InputCut.py b/model-optimizer/extensions/middle/InputCut.py
new file mode 100644
index 000000000..902cd2ddb
--- /dev/null
+++ b/model-optimizer/extensions/middle/InputCut.py
@@ -0,0 +1,34 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.extractor import add_input_ops
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class MiddleInputCut(MiddleReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def run_before(self):
+        from extensions.middle.ChangePlaceholderTypes import ChangePlaceholderTypes
+        return [ChangePlaceholderTypes]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        add_input_ops(graph, graph.graph['user_shapes'], False)
diff --git a/model-optimizer/extensions/middle/L2NormToNorm.py b/model-optimizer/extensions/middle/L2NormToNorm.py
new file mode 100644
index 000000000..b440c1b85
--- /dev/null
+++ b/model-optimizer/extensions/middle/L2NormToNorm.py
@@ -0,0 +1,107 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.front.extractor import add_attrs_props
+from mo.front.extractor import update_ie_fields
+from mo.graph.graph import Node, Graph
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class L2NormToNorm(MiddleReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('input', dict(kind='data')),
+                ('l2_normalize', dict(kind='op', op='Mul')),
+                ('l2_normalize_data', dict(kind='data')),
+                ('maximum', dict(kind='op', op='Maximum')),
+                ('maximum_data', dict(kind='data')),
+                ('maximum_y_data', dict(kind='data')),
+                ('rsqrt', dict(kind='op', op='Rsqrt')),
+                ('rsqrt_data', dict(kind='data')),
+                ('square', dict(kind='op', op='Square')),
+                ('square_data', dict(kind='data')),
+                ('sum', dict(kind='op', op='Reduce', reduce_type='sum')),
+                ('sum_data', dict(kind='data')),
+            ],
+            edges=[
+                ('input', 'square'),
+                ('square', 'square_data'),
+                ('square_data', 'sum'),
+                ('sum', 'sum_data'),
+                ('maximum_y_data', 'maximum'),
+                ('sum_data', 'maximum'),
+                ('maximum', 'maximum_data'),
+                ('maximum_data', 'rsqrt'),
+                ('rsqrt', 'rsqrt_data'),
+                ('rsqrt_data', 'l2_normalize'),
+                ('input', 'l2_normalize'),
+                ('l2_normalize', 'l2_normalize_data'),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        input_data_name = match['input'].node
+        output_data_name = match['l2_normalize_data'].node
+
+        if not match['maximum_y_data'].has_valid('value'):
+            return
+        if match['maximum_y_data'].value.shape != ():
+            return
+        y = match['maximum_y_data'].value
+
+        normalize_id = graph.unique_id()
+        graph.add_node(normalize_id,
+                       **add_attrs_props(
+                           dict(kind='op', precision="FP32", type='Normalize', name=str(graph.unique_id('normalize')),
+                                op='Normalize', shape=None, eps=str(y), across_spatial=str(0), channel_shared=str(0),
+                                data_type=None, infer=None, in_ports_count=2, out_ports_count=1)))
+        normalize_data_id = graph.unique_id()
+
+        graph.add_node(normalize_data_id, **add_attrs_props(graph.node[output_data_name]))
+        update_ie_fields(graph.node[normalize_id])
+        weights_id = graph.unique_id('weights_')
+        graph.add_node(weights_id, **add_attrs_props(
+            dict(kind='data', precision="FP32", name=weights_id, value=None, shape=None, data_type=None, infer=None)))
+        wnode = Node(graph, weights_id)
+        wnode['value'] = np.ones(shape=match['input'].shape[-1],
+                                 dtype=match['input'].data_type)  # TODO feature dim instead of -1
+        wnode['shape'] = np.array(wnode['value'].shape)
+        output_edges = list(graph.out_edges(output_data_name, data=True))
+        graph.remove_edges_from([
+            (input_data_name, match['l2_normalize'].id),
+            (input_data_name, match['square'].id)
+        ])
+        graph.remove_edges_from(list(graph.out_edges(output_data_name)))
+        graph.remove_node(output_data_name)
+        graph.add_edge(input_data_name, normalize_id, **{'in': 0})
+        graph.add_edge(weights_id, normalize_id, **{'in': 1, 'bin': 'weights'})
+        graph.add_edge(normalize_id, normalize_data_id, **{'out': 0})
+        for data, owner, attr in output_edges:
+            graph.add_edge(normalize_data_id, owner, **attr)
diff --git a/model-optimizer/extensions/middle/lstm_sequence_tensor_iterator.py b/model-optimizer/extensions/middle/LSTMRNNSequenceToTensorIterator.py
index f576cdec6..2c19ad707 100644
--- a/model-optimizer/extensions/middle/lstm_sequence_tensor_iterator.py
+++ b/model-optimizer/extensions/middle/LSTMRNNSequenceToTensorIterator.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,36 +13,36 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-import networkx as nx
 import numpy as np
 
 from extensions.middle.FusePermutesSequence import FusePermutesSequence
-from extensions.middle.lstm_sequence_normalize import LSTMSequenceNormalize
-from extensions.middle.mxnet_lstm_sequence_normalize import MXNetLSTMSequenceNormalize
+from extensions.middle.RNNSequenceNormalizeToIE import RNNSequenceNormalize
 from extensions.ops.lstm_cell import LSTMCell
 from extensions.ops.tensor_iterator import TensorIterator
+from mo.graph.graph import Graph, add_opoutput
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.op import Op
 from mo.ops.reshape import Reshape
 
 
-class LSTMSequenceTensorIterator(MiddleReplacementPattern):
-    """ Converts normalized LSTMSequence op to TensorIterator.
+class LSTMToTensorIterator(MiddleReplacementPattern):
+    """ Converts normalized RNNSequence with op=LSTM to TensorIterator.
 
-        Normalized LSTMSequence means that it should be processed by
-        LSTMSequenceNormalize transform that ensures its stict form.
+        Normalized RNNSequence means that it should be processed by
+        RNNSequenceNormalize transform that ensures its strict form.
 
-        This transformation builds an altenative sub-graph for LSTMSequence
+        This transformation builds an alternative sub-graph for LSTMSequence
         with TensorIterator connected in the same way as an original LSTMSequence
         node and with internal body represented as LSTMCell op node with necessary
         squeezes and unsqueezes around.
     """
 
     enabled = True
-
+    force_clean_up = True
+    id = 'lstm_to_tensor_iterator'
+    
     def run_after(self):
-        return [LSTMSequenceNormalize, MXNetLSTMSequenceNormalize]
+        return [RNNSequenceNormalize]
 
     def run_before(self):
         return [FusePermutesSequence]
@@ -50,7 +50,7 @@ class LSTMSequenceTensorIterator(MiddleReplacementPattern):
     def pattern(self):
         return dict(
             nodes=[
-                ('lstm', dict(kind='op', op='LSTMSequence')),
+                ('lstm', dict(kind='op', op='LSTM', type='RNNSequence')),
                 ('input', dict(kind='data')),
                 ('weights', dict(kind='data')),
                 ('biases', dict(kind='data')),
@@ -66,16 +66,20 @@ class LSTMSequenceTensorIterator(MiddleReplacementPattern):
             ]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         lstm = match['lstm']
 
         # Build TensorIterator body first
-        body = nx.MultiDiGraph(name=lstm.name + '/sub_graph', layout=graph.graph['layout'])
+        body = Graph(name=lstm.name + '/sub_graph')
+        body.graph = graph.graph
+
+        # 1. Input squeeze Reshape
         inputs = [Op._create_data_node(body, lstm.name + '/inport/' + str(inp),
                                        {'shape': lstm.in_node(inp).shape.copy(),
                                         'value': lstm.in_node(inp).value.copy()
                                         if lstm.in_node(inp).value is not None and inp in [1, 2] else None})
-                                        for inp in [0, 3, 4, 1, 2]]
+                  for inp in [0, 4, 5, 1, 2]]  # X, WR, B, h_init, c_init
+
         inputs[0].shape[lstm.sequence_dim] = 1
         reshape_dim = inputs[0].shape.copy()
         reshape_dim[lstm.batch_dim] = -1
@@ -85,11 +89,14 @@ class LSTMSequenceTensorIterator(MiddleReplacementPattern):
             dict(name=lstm.name + '/input_squeeze', internal_layer_id=0, dim=reshape_dim)
         )
         inputs[0] = input_squeeze.create_node_with_data([inputs[0]], edge_attrs=[{'internal_port_id': 0}])
-        lstm_cell_op = LSTMCell(body, dict(hidden_size=match['lstm'].hidden_size, name=lstm.name + '/LSTMCell',
-                                           internal_layer_id=1))
+
+        # 2. Output unsqueeze Reshape
         outputs = [Op._create_data_node(body, lstm.name + '/outport/' + str(out),
                                         {'shape': lstm.out_node(out).shape.copy() if out in lstm.out_nodes()
-                                        else lstm.in_node(3).shape.copy(), 'is_output': True}) for out in [0, 1]]
+                                        else lstm.in_node(4).shape.copy()}) for out in [0, 1]]
+        for out in outputs:
+            add_opoutput(body, out.id, 0, False)
+
         unsqueezed_output_shape = outputs[0].shape.copy()
         unsqueezed_output_shape[lstm.sequence_dim] = 1
         squeezed_output_shape = np.delete(unsqueezed_output_shape, lstm.sequence_dim)
@@ -97,7 +104,16 @@ class LSTMSequenceTensorIterator(MiddleReplacementPattern):
         unsqueezed_output_shape[lstm.batch_dim] = -1
         output_unsqueeze = Reshape(body, dict(name=lstm.name + 'output_unsqueeze', dim=unsqueezed_output_shape,
                                               internal_layer_id=2))
-        # TODO edge attributes should be assigned by the op itself
+
+        # 3. LSTMCell
+        lstm_cell_op = LSTMCell(body, dict(hidden_size=lstm.hidden_size,
+                                           activations=lstm.activations,
+                                           activation_alpha=lstm.activation_alpha,
+                                           activation_beta=lstm.activation_beta,
+                                           clip=lstm.clip,
+                                           input_forget=lstm.input_forget,
+                                           name=lstm.name + '/LSTMCell',
+                                           internal_layer_id=1))
         lstm_cell_node = lstm_cell_op.create_node_with_data(inputs, data_nodes=outputs,
                                                             edge_attrs=[{}, {'internal_port_id': 1},
                                                                         {'internal_port_id': 2}, {'bin': 'weights'},
@@ -106,8 +122,9 @@ class LSTMSequenceTensorIterator(MiddleReplacementPattern):
         lstm_cell_node[0].in_node().out_edge(1)['internal_port_id'] = 5
         lstm_cell_node[0] = output_unsqueeze.create_node_with_data([lstm_cell_node[0]])
         lstm_cell_node[0].in_node().out_edge(0)['internal_port_id'] = 3
-        lstm_cell_node[0]['is_output'] = True
+        add_opoutput(body, lstm_cell_node[0].id, 0, False)
 
+        # 4. TensorIterator layer creating
         assert lstm.direction in ['forward', 'reverse']
         if lstm.direction == 'forward':
             stride = 1
@@ -123,6 +140,7 @@ class LSTMSequenceTensorIterator(MiddleReplacementPattern):
             'external_port_id': 3,
             'internal_layer_id': 2,
             'internal_port_id': 3,
+
             'axis': lstm.sequence_dim,
             'stride': stride,
             'start': start,
@@ -130,6 +148,7 @@ class LSTMSequenceTensorIterator(MiddleReplacementPattern):
             'part_size': 1,
         }]
 
+        # Adding h_state, c_state to outputs
         if len(lstm.out_nodes()) == 3:
             output_port_map.extend([{
                 'external_port_id': 4,
@@ -144,12 +163,15 @@ class LSTMSequenceTensorIterator(MiddleReplacementPattern):
         ti_op = TensorIterator(graph, {
             'name': lstm.name + '/TensorIterator',
             'body': body,
+            'in_ports_count': 3,
+            'out_ports_count': len(lstm.out_nodes()),
 
             'input_port_map': [
                 {
                     'external_port_id': 0,
                     'internal_layer_id': 0,
                     'internal_port_id': 0,
+
                     'axis': lstm.sequence_dim,
                     'stride': stride,
                     'start': start,
@@ -188,7 +210,8 @@ class LSTMSequenceTensorIterator(MiddleReplacementPattern):
 
         assert sorted(lstm.out_nodes().keys()) == list(range(len(lstm.out_nodes()))), \
             "There are gaps in output ports of LSTMSequence operation. Node {}".format(lstm.id)
-        outs = ti_op.create_node_with_data([lstm.in_node(i) for i in [0, 3, 4]],
+
+        outs = ti_op.create_node_with_data([lstm.in_node(i) for i in [0, 4, 5]],  # X, h_init, c_init
                                            data_nodes=[lstm.out_node(i) for i in range(len(lstm.out_nodes()))],
                                            edge_attrs=[{'external_port_id': 0}, {'external_port_id': 1},
                                                        {'external_port_id': 2}])
diff --git a/model-optimizer/extensions/middle/LayoutChangeForConstantShapePaths.py b/model-optimizer/extensions/middle/LayoutChangeForConstantShapePaths.py
new file mode 100644
index 000000000..37d1dd80b
--- /dev/null
+++ b/model-optimizer/extensions/middle/LayoutChangeForConstantShapePaths.py
@@ -0,0 +1,113 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.ops.gather import Gather
+from mo.back.replacement import BackReplacementPattern
+from mo.graph.graph import Graph, Node
+from mo.ops.const import Const
+
+
+class LayoutChangeForConstantShapePaths(BackReplacementPattern):
+    enabled = False
+    graph_condition = [lambda graph: graph.graph['fw'] == 'tf',
+                       lambda graph: graph.graph['cmd_params'].keep_shape_ops]
+    force_clean_up = True
+
+    @staticmethod
+    def if_has_value(graph: Graph, node_name: str):
+        return Node(graph, node_name).has_valid('value')
+
+    def search_of_constant_path_end(self, graph: Graph, node_name: str, visited: set):
+        from collections import deque
+        d = deque()
+        d.appendleft(node_name)
+        ends = set()
+        while len(d) != 0:
+            cur_node = d.popleft()
+            node = Node(graph, cur_node)
+            if node.has_valid('permute_attrs'):
+                node['permute_attrs'] = None
+            for _, out_node_name in graph.out_edges(cur_node):
+                if out_node_name not in visited:
+                    if self.if_has_value(graph, out_node_name):
+                        visited.add(cur_node)
+                        d.extend([op for _, op in graph.out_edges(out_node_name)])
+                    else:
+                        ends.add(cur_node)
+        return ends
+
+    def find_and_replace_pattern(self, graph: Graph):
+        # 1. Inserting Gather to N*C format on constant shape paths
+        #   - Search for Shape ops
+        #   - Inserting Gather after them in case of [4] or [5] output shape
+
+        shape_ops = graph.get_op_nodes(op='Shape')
+        constant_shape_paths = set()
+        gather_inserted = []
+
+        for shape in shape_ops:
+            shape_of_shape_op_output = shape.out_node().shape
+
+            if np.array_equal(shape_of_shape_op_output, [4]):
+                index = np.array([0, 2, 3, 1])
+            elif np.array_equal(shape_of_shape_op_output, [5]):
+                index = np.array([0, 2, 3, 4, 1])
+            else:
+                continue
+
+            const = Const(graph, {'value': index}).create_node()
+            gather = Gather(graph, {}).create_node()
+
+            shape.out_port(0).get_connection().set_source(gather.out_port(0))
+            shape.out_port(0).connect(gather.in_port(0))
+            const.out_port(0).connect(gather.in_port(1))
+
+            constant_shape_paths.add(gather.id)
+            gather_inserted.append(gather.id)
+
+        # 2. Inserting Gather to NC* format
+        #   - Search from Shape ops found in previous step for nodes without value that are n-th children of Shape op
+        #       * MO can not propagate value, there is data path
+        #   - Inserting Gather on ports which comes from operations in `constant_shape_paths` list
+
+        constant_shape_ends = []
+
+        for shape in shape_ops:
+            constant_shape_ends.extend(self.search_of_constant_path_end(graph, node_name=shape.id,
+                                                                        visited=constant_shape_paths))
+
+        for end in constant_shape_ends:
+            node = Node(graph, end)
+            in_ports = [in_port for in_port in node.in_ports().values()
+                        if in_port.get_source().node.id in constant_shape_paths]
+
+            for in_port in in_ports:
+                shape = in_port.data.get_shape()
+
+                if np.array_equal(shape, [4]):
+                    index = np.array([0, 3, 1, 2])
+                elif np.array_equal(shape, [5]):
+                    index = np.array([0, 2, 3, 4, 1])
+                else:
+                    continue
+
+                const = Const(graph, {'value': np.array(index)}).create_node()
+                gather = Gather(graph, {}).create_node()
+
+                in_port.get_connection().set_destination(gather.in_port(0))
+                const.out_port(0).connect(gather.in_port(1))
+                gather.out_port(0).connect(in_port)
diff --git a/model-optimizer/extensions/middle/MXNetRNNSequenceNormalize.py b/model-optimizer/extensions/middle/MXNetRNNSequenceNormalize.py
new file mode 100644
index 000000000..78235cd46
--- /dev/null
+++ b/model-optimizer/extensions/middle/MXNetRNNSequenceNormalize.py
@@ -0,0 +1,229 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.op import Op
+from mo.ops.permute import Permute
+from mo.ops.reshape import Reshape
+
+
+class MXNetRNNSequenceNormalize(MiddleReplacementPattern):
+    """
+        Convert blobs and shapes of MXNet-like RNN cell to IE compatible form.
+
+        The target form of this operation is not normally covered by a dedicated
+        layer in IE. It should be further transformed to some other layer
+        that are supported by IE. This transformation pass involves weights and
+        shapes processing only.
+
+        Post-conditions:
+        Inputs:
+            0: X input data,    shape [batch_size, seq_len, input_size] (or [seq_len. bathc_size, int_size], depends on
+                                batch_dim param)
+            1: W weights blob,  shape [num_dir, n_cells, M, hidden_size, input_size]
+            2: R weights blob,  shape [num_dir, n_cells, M, hidden_size, hidden_size]
+            3: B biases blob,   shape [num_dir, n_cells, 2, M, hidden_size]
+            4: (optional) sequence_length, shape [batch_size]
+            5: initial hidden state, shape  [num_dir, batch_size, hidden_size]
+                                                      ([num_dir, n_cells, batch_size, hidden_size] if num_cells != 1)
+            6: (only for LSTM) initial cell state, shape [num_dir, batch_size, hidden_size]
+            7: (optional for LSTM) Peepholes weights, shape  [num_dir, n_cells, (M - 1) * hidden_size]
+
+        Outputs:
+            0: Y output blob,  shape [batch_size, num_dir, seq_len, hidden_size]
+            1: (optional) Y_h, shape [num_dir, batch_size, hidden_size]
+            2: (optional for LSTM) Y_c, shape [num_dir, batch_size, hidden_size]
+
+        Where:
+            M -- number of gates in this cell (4 for LSTM, 3 for GRU, 1 for RNN).
+            num_dir -- number of directions ('forvard', 'bidirectional', 'reverse')
+            n_cells -- number of cells in layer (always 1 for ONNX).
+
+    """
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.MXNetSplitMultiLayers import MXNetSplitLayersToRNNSequence
+        return [MXNetSplitLayersToRNNSequence]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('rnn_layer', dict(kind='op', type='RNNSequence', format='mxnet')),
+                ('input', dict(kind='data')),
+                ('params', dict(kind='data')),
+            ],
+            edges=[
+                ('input', 'rnn_layer', {'in': 0}),
+                ('params', 'rnn_layer', {'in': 1}),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        rnn_layer = match['rnn_layer']
+
+        self.check_init_states(graph, match)
+        self.repack_weights(graph, match)
+        self.add_output_reshape(graph, match)
+        self.check_input_ports(graph, match)
+        rnn_layer['normalized'] = True
+
+    @staticmethod
+    def repack_weights(graph: Graph, match: dict):
+        input = match['input']
+        rnn_layer = match['rnn_layer']
+        params = match['params'].value.copy()
+
+        graph.remove_edge(match['params'].id, rnn_layer.id)
+
+        input_size = input.shape[2]
+        direction = 2 if rnn_layer.has_num_directions else 1
+        bsize = (2 * rnn_layer.hidden_size * direction * 1) * rnn_layer.multiplier
+
+        W = np.array(params[0:len(params) - bsize])
+        B = np.array(params[len(params) - bsize:])
+
+        W = W.reshape((direction, -1))
+        B = B.reshape((direction, -1))
+
+        W, R = np.array(W[:, 0:rnn_layer.hidden_size * rnn_layer.multiplier * input_size]), np.array(W[:, rnn_layer.hidden_size * rnn_layer.multiplier* input_size:])
+
+        W, R = [x.reshape([
+            direction,  # 0: num of directions
+            1,  # 1: num_cells
+            rnn_layer.multiplier,  # 2: four output parts of the matrix for all gates
+            rnn_layer.hidden_size,  # 3: output size per direction and gate
+            -1])  # 4: input size/hidden size in W/R correspondingly
+            for x in (W, R)]
+
+        assert W.shape[-1] == input_size
+        assert R.shape[-1] == rnn_layer.hidden_size
+
+        B = B.reshape([
+                 direction,  # 0: num of directions, limitation: should be 1
+                 1,
+                 2,  # 3: num of component B
+                 rnn_layer.multiplier,  # 1: four output parts of the matrix for all gates in order: i, f, c, o
+                 rnn_layer.hidden_size,  # 2: output size per direction and gate
+        ])
+
+        # Reorder gates: ifco --> fico
+        gate_reorder = rnn_layer.gate_order
+        W = np.take(W, gate_reorder, axis=2)
+        R = np.take(R, gate_reorder, axis=2)
+        B = np.take(B, gate_reorder, axis=3)
+
+        for blob, port in [(W, 1), (R, 2), (B, 3)]:
+            Op.create_and_connect_input_data_node(
+                graph,
+                rnn_layer,
+                {'value': blob, 'shape': np.array(blob.shape, dtype=np.int64)},
+                {'in': port, 'permutation': None}
+            )
+
+    @staticmethod
+    def check_init_states(graph: Graph, match: dict):
+        """
+        Check if cell have initial states and create zeros states if not.
+        And renumber ports for this states.
+        """
+        rnn_cell = match['rnn_layer']
+        num_directions = 2 if rnn_cell.direction == 'bidirectional' else 1
+        batch_size = rnn_cell.in_node(0).shape[rnn_cell.batch_dim]
+
+        h_init_port = 5
+        c_init_port = 6
+
+        if 2 not in rnn_cell.in_nodes():
+            h_shape = [num_directions, batch_size, rnn_cell.hidden_size]  # from ONNX spec
+            h_init = np.full(h_shape, 0, dtype=np.float32)
+            Op.create_and_connect_input_data_node(
+                graph,
+                rnn_cell,
+                {'value': h_init, 'shape': np.array(h_init.shape, dtype=np.int64)},
+                {'in': h_init_port, 'permutation': None}
+            )
+        else:
+            hidden_state_edge = graph.get_edge_data(rnn_cell.in_node(2).id, rnn_cell.id)
+            hidden_state_edge[0]['in'] = h_init_port
+
+        if rnn_cell.op == 'LSTM':
+            if 3 not in rnn_cell.in_nodes():
+                c_shape = [num_directions, batch_size, rnn_cell.hidden_size]  # from ONNX spec
+                c_init = np.full(c_shape, 0, dtype=np.float32)
+                Op.create_and_connect_input_data_node(
+                    graph,
+                    rnn_cell,
+                    {'value': c_init, 'shape': np.array(c_init.shape, dtype=np.int64)},
+                    {'in': c_init_port, 'permutation': None}
+                )
+            else:
+                cell_state_edge = graph.get_edge_data(rnn_cell.in_node(3).id, rnn_cell.id)
+                cell_state_edge[0]['in'] = c_init_port
+
+    @staticmethod
+    def add_output_reshape(graph: Graph, match: dict):
+        """
+        Since MXNet Y output shape is [batch_size, seq_len, hidden_size * num_directions] we need to add reshape
+        from above common format [batch_size, num_directions, seq_len, hidden_size] to MXNet format.
+        """
+        lstm = match['rnn_layer']
+        input = match['input']
+        if not lstm.has_num_directions:
+            return
+        old_data_node =lstm.out_node(0)
+        num_directions = 2 if lstm.direction in ['bidirectional'] else 1
+        mxnet_shape = lstm.out_node(0).shape.copy()
+
+        if lstm.batch_dim == 0:
+            mo_shape = np.array([input.shape[lstm.batch_dim], input.shape[lstm.sequence_dim], lstm.hidden_size],
+                             dtype=np.int64)
+        else:
+            mo_shape = np.array([input.shape[lstm.sequence_dim], input.shape[lstm.batch_dim], lstm.hidden_size],
+                                dtype=np.int64)
+
+        if lstm.has_num_directions:
+            mo_shape = np.insert(mo_shape, 1, np.int64(num_directions))
+
+        new_data = Op._create_data_node(graph, name=lstm.name + '/Data/Reshape_mxnet/', attrs={'shape': mo_shape})
+        graph.remove_edge(lstm.id, old_data_node.id)
+        graph.add_edge(lstm.id, new_data.id, key=0, out=0)
+
+        # Add Permute
+        permute_order = np.array([0, 2, 1, 3], dtype=np.int64)
+        permute = Permute(graph, dict(order=permute_order))
+        permute_data = permute.create_node_with_data([new_data], dict(name=lstm.name + '/Permute_mxnet/'))
+
+        # Add Reshape
+        reshape = Reshape(graph, dict(dim=mxnet_shape))
+        reshape.create_node_with_data([permute_data], dict(name=lstm.name + '/Reshape_mxnet/'),
+                                      data_nodes=[old_data_node])
+
+    @staticmethod
+    def check_input_ports(graph: Graph, match: dict):
+        """
+        Check that all mandatory ports is present.
+        """
+        rnn_layer = match['rnn_layer']
+        mandatory_ports = [0, 1, 2, 3, 5]
+
+        if rnn_layer.op == 'LSTM':
+            mandatory_ports.append(6)
+
+        assert set(rnn_layer.in_nodes().keys()) >= set(mandatory_ports)
diff --git a/model-optimizer/extensions/middle/MXNetSplitMultiLayers.py b/model-optimizer/extensions/middle/MXNetSplitMultiLayers.py
new file mode 100644
index 000000000..0749308df
--- /dev/null
+++ b/model-optimizer/extensions/middle/MXNetSplitMultiLayers.py
@@ -0,0 +1,206 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.graph.graph import Graph, Node
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.concat import Concat
+from mo.ops.op import Op
+
+
+class MXNetSplitLayersToRNNSequence(MiddleReplacementPattern):
+    """
+        Split MXNet multilayer cell to multiple one-layers cells LSTM/GRU/RNN.
+        Also concatenate output hiddens and cells states of this layers.
+    """
+    enabled = True
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('rnn_layer', dict(kind='op', type='RNNSequence', format='mxnet', multilayers=True)),
+                ('input', dict(kind='data')),
+                ('params', dict(kind='data')),
+            ],
+            edges=[
+                ('input', 'rnn_layer', {'in': 0}),
+                ('params', 'rnn_layer', {'in': 1}),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        output_states = self.split_multilayer_cell(graph, match)
+
+        rnn_layer = match['rnn_layer']
+        self.concat_output_states(graph, match, output_states)
+        rnn_layer.graph.remove_node(rnn_layer.id)
+
+    @staticmethod
+    def get_new_cell(multilayer_cell: Node, number: int):
+        cell_class = Op.get_op_class_by_name(multilayer_cell.op)
+        new_cell = lambda graph, attrs: cell_class(graph, attrs)
+        attrs = multilayer_cell.attrs().copy()
+        new_attrs = {
+            'num_layers': 1,
+            'multilayers': False,
+            'name': multilayer_cell.name + '/LayerSplittedLSTM/{}'.format(number),
+        }
+        attrs.update(new_attrs)
+        return new_cell(multilayer_cell.graph, attrs)
+
+    def split_multilayer_cell(self, graph: Graph, match: dict):
+        """
+        Split one multilayer type=RNNSequence cell to num_layers consecutive cells.
+        All parameters splits to parts for new num_layers cells.
+        """
+        input = match['input']
+        rnn_layer = match['rnn_layer']
+        params = match['params'].value.copy()
+
+        have_hidden = False
+        if 2 in rnn_layer.in_nodes():
+            hidden_state_value = rnn_layer.in_node(2).value
+            have_hidden = True
+
+        have_cell = False
+        if 3 in rnn_layer.in_nodes():
+            cell_state_value = rnn_layer.in_node(3).value
+            have_cell = True
+
+        direction = 2 if rnn_layer.has_num_directions else 1
+        num_layers = rnn_layer.num_layers
+        input_size = input.shape[2]
+        bsize = (2 * rnn_layer.hidden_size * direction * num_layers) * rnn_layer.multiplier
+
+        size = rnn_layer.hidden_size * direction * rnn_layer.multiplier
+        first_layer_params_size = (input_size + rnn_layer.hidden_size + 2) * size
+        other_layer_params_size = (rnn_layer.hidden_size * direction + rnn_layer.hidden_size + 2) * size
+        assert params.size == (first_layer_params_size + (num_layers - 1) * other_layer_params_size)
+
+        input_node = input
+        params_layer_size_count = 0
+        output_states = [[], []]
+
+        param_w = params[0:len(params)-bsize]
+        param_b = params[len(params) - bsize:]
+        layer_bsize = (2 * rnn_layer.hidden_size * direction) * rnn_layer.multiplier
+
+        for l in range(num_layers):
+            params_layer_size = first_layer_params_size if l == 0 else other_layer_params_size
+
+            layer_params_w = param_w[params_layer_size_count: params_layer_size_count +
+                                                              (params_layer_size - layer_bsize)].copy()
+            layer_params_b = param_b[layer_bsize*l: layer_bsize*l+layer_bsize].copy()
+            layer_params = np.concatenate((layer_params_w, layer_params_b), axis=0)
+            params_layer_size_count = params_layer_size_count + params_layer_size - layer_bsize
+
+            op = self.get_new_cell(rnn_layer, l)
+
+            params_value_node = Op._create_data_node(
+                rnn_layer.graph,
+                name=rnn_layer.name + '/LayerSplittedParamsLSTM/{}/'.format(l),
+                attrs={'value': layer_params, 'shape': np.array(layer_params.shape, dtype=np.int64)}
+            )
+            if have_hidden:
+                layer_hidden_state = hidden_state_value[l * direction: l * direction + direction]
+                hidden_state_value_node = Op._create_data_node(
+                    rnn_layer.graph,
+                    name=str(rnn_layer.name) + '/LayerSplittedHiddenState/{}/'.format(l),
+                    attrs={'value': layer_hidden_state, 'shape': np.array(layer_hidden_state.shape, dtype=np.int64)}
+                )
+            else:
+                hidden_state_value_node = None
+
+            if have_cell:
+                layer_cell_state = cell_state_value[l * direction: l * direction + direction]
+                cell_state_value_node = Op._create_data_node(
+                    rnn_layer.graph,
+                    name=str(rnn_layer.name) + '/LayerSplittedCellState/{}/'.format(l),
+                    attrs={'value': layer_cell_state, 'shape': np.array(layer_cell_state.shape, dtype=np.int64)}
+                )
+            else:
+                cell_state_value_node = None
+
+            if l < num_layers-1:
+                output_data = Op._create_data_node(
+                    rnn_layer.graph,
+                    name=rnn_layer.out_node(0).name + '/LayerSplit/' + str(l),
+                    attrs={'shape': rnn_layer.out_node(0).shape.copy()}
+                )
+            else:
+                output_data = rnn_layer.out_node(0)
+
+            # Output nodes creating:
+            state_size = np.array([input.shape[rnn_layer.batch_dim], rnn_layer.hidden_size], dtype=np.int64)
+            if rnn_layer.has_num_directions:
+                state_size = np.insert(state_size, 0, direction)
+
+            output_hidden = Op._create_data_node(
+                rnn_layer.graph,
+                name=rnn_layer.out_node(1).name + '/LayerSplit/' + str(l),
+                attrs={'shape': np.array(state_size)}
+            )
+
+            current_data_nodes = [output_data, output_hidden]
+
+            if rnn_layer.op == 'LSTM':
+                output_cell = Op._create_data_node(
+                    rnn_layer.graph,
+                    name=rnn_layer.out_node(2).name + '/LayerSplit/' + str(l),
+                    attrs={'shape': np.array(state_size)}
+                )
+                current_data_nodes.append(output_cell)
+
+            data_nodes = op.create_node_with_data(
+                inputs=[
+                    input_node,
+                    params_value_node,
+                    hidden_state_value_node,
+                    cell_state_value_node
+                ],
+                data_nodes=current_data_nodes,
+            )
+
+            input_node = data_nodes[0]
+            output_states[0].append(data_nodes[1])
+
+            if rnn_layer.op =='LSTM':
+                output_states[1].append(data_nodes[2])
+
+        return output_states
+
+    @staticmethod
+    def concat_output_states(graph: Graph, match: dict, new_states: list):
+        """ Concatenates output states from multilayer layer. """
+        rnn_layer = match['rnn_layer']
+        original_states = [rnn_layer.out_node(i) if i in rnn_layer.out_nodes() else None for i in [1, 2]]
+
+        concat_ops = [
+            Concat(rnn_layer.graph, {
+                'name': rnn_layer.name + '/FinalLayerSplitConcat/HiddenState',
+                'axis': -1
+            }),
+            Concat(rnn_layer.graph, {
+                'name': rnn_layer.name + '/FinalLayerSplitConcat/CellState',
+                'axis': -1
+            })
+        ]
+
+        for i in range(len(original_states)):  # [0] or [0, 1]
+            if original_states[i] is None:
+                continue
+            concat_ops[i].attrs.update({'in_ports_count': len(new_states[i])})
+            concat_ops[i].create_node_with_data(inputs=new_states[i], data_nodes=[original_states[i]])
diff --git a/model-optimizer/extensions/middle/MeanToAvgPool.py b/model-optimizer/extensions/middle/MeanToAvgPool.py
new file mode 100644
index 000000000..fafc50315
--- /dev/null
+++ b/model-optimizer/extensions/middle/MeanToAvgPool.py
@@ -0,0 +1,95 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.graph.graph import create_edge, Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.op import PermuteAttrs, Op
+from mo.ops.reshape import Reshape
+
+
+class MeanToAvgPool(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
+    def run_before(self):
+        return []
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('input', dict(kind='data')),
+                ('axis', dict(kind='data')),
+                ('mean', dict(kind='op', op='Mean'))
+            ],
+            edges=[
+                ('input', 'mean', {'in': 0}),
+                ('axis', 'mean', {'in': 1})
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        if match['axis'].value is None or match['input'].shape is None:
+            return
+        dims = len(match['input'].shape)
+        ones = np.ones(dims, dtype=np.int64)
+        axis = np.array(match['axis'].value)
+        axis = axis if axis.ndim != 0 else np.array([axis], dtype=np.int64)
+
+        mean = graph.node[match['mean'].node]
+        mean['stride'] = np.array(ones)
+        # TODO: need to check axis with real layout
+        spatial_dims = np.array(axis)
+        mean['spatial_dims'] = spatial_dims
+        mean['pad'] = np.zeros((dims, 2), np.int64)
+        mean['pad_spatial_shape'] = np.array(mean['pad'][spatial_dims])
+        window = np.array(ones)
+        window[spatial_dims] = match['input'].shape[spatial_dims]
+        mean['window'] = window
+        mean['TF_op'] = mean['op']
+        mean['op'] = 'AvgPool'
+        mean['pool_method'] = 'avg'
+        mean['rounding_type'] = 'ceil'
+        mean['exclude_pad'] = 'true'
+        mean['kernel_spatial'] = window[spatial_dims]
+        graph.remove_edge(match['axis'].node, match['mean'].node)
+        mean['permute_attrs'] = PermuteAttrs().update_attrs(attrs=[('pad', 'input:0'),
+                                                                   ('stride', 'input:0'),
+                                                                   ('window', 'input:0'),
+                                                                   ('spatial_dims', 'input:0')])
+
+        if match['mean'].keep_dims == False:
+            output = match['mean'].out_node()
+            pool_node = match['mean']
+
+            # Keep dims for AvgPool
+            shape = np.array(output.shape)
+            for idx in spatial_dims:
+                shape = np.insert(shape, idx, 1)
+
+            graph.remove_edge(pool_node.id, output.id)
+            # Create new data for pool with all dims
+            pool_data = Op.create_data_node(graph, pool_node, {'shape': np.array(shape)})
+            # Create and connect reshape node
+            reshape_op = Reshape(graph, {'dim': np.array(output.shape)})
+            reshape_node = reshape_op.create_node([pool_data], dict(name='Reshape_',
+                                                                    permute_attrs=PermuteAttrs().update_attrs(
+                                                                        attrs=[('dim', 'output:0')])))
+            graph.create_edge(reshape_node, output)
diff --git a/model-optimizer/mo/middle/passes/pool_test.py b/model-optimizer/extensions/middle/MeanToAvgPool_test.py
index 1473f1eeb..16952c294 100644
--- a/model-optimizer/mo/middle/passes/pool_test.py
+++ b/model-optimizer/extensions/middle/MeanToAvgPool_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ import unittest
 import numpy as np
 
 from mo.middle.passes.eliminate import graph_clean_up
-from mo.middle.passes.pool import mean_to_avgpool
+from extensions.middle.MeanToAvgPool import MeanToAvgPool
 from mo.utils.unittest.graph import build_graph, compare_graphs
 
 nodes_attributes = {
@@ -35,6 +35,8 @@ nodes_attributes = {
     # Reshape layer
     'reshape_1': {'type': 'Reshape', 'kind': 'op', 'op': 'Reshape'},
     'reshape_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+    # OpOutput
+    'op_output': {'kind': 'op', 'op': 'OpOutput', 'type': 'OpOutput'}
 }
 
 
@@ -43,14 +45,16 @@ class MeanToAvgPoolTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mean_1'),
-                             ('mean_1', 'mean_1_data'),
                              ('mean_axis', 'mean_1'),
+                             ('mean_1', 'mean_1_data'),
+                             ('mean_1_data', 'op_output')
+
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mean_1': {'shape': np.array([1, 227, 227, 3]), 'keep_dims': keep_dims},
                              'mean_axis': {'shape': np.array(axis.shape) if axis is not None else None,
                                            'value': np.array(axis) if axis is not None else None},
-                             'mean_1_data': {'shape': mean_out_shape, 'is_output': True},
+                             'mean_1_data': {'shape': mean_out_shape},
                              })
         del graph['mean_1']['mean_1_data'][0]['in']
         return graph
@@ -62,13 +66,14 @@ class MeanToAvgPoolTests(unittest.TestCase):
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'pool_1'),
                                  ('pool_1', 'pool_1_data'),
+                                 ('pool_1_data', 'op_output'),
                                  ],
                                 {'pool_1': {'pool_method': 'avg', 'rounding_type': 'ceil', 'exclude_pad': 'true',
                                             'op': 'AvgPool', 'shape': np.array([1, 227, 227, 3])},
-                                 'pool_1_data': {'is_output': True, 'shape': np.array([1, 227, 227, 3])}
+                                 'pool_1_data': {'shape': np.array([1, 227, 227, 3])}
                                  })
 
-        mean_to_avgpool(graph)
+        MeanToAvgPool().find_and_replace_pattern(graph)
         graph_clean_up(graph)
         (flag, resp) = compare_graphs(graph, graph_ref, 'mean_1_data', 'pool_1_data', check_op_attrs=True)
         self.assertTrue(flag, resp)
@@ -82,15 +87,16 @@ class MeanToAvgPoolTests(unittest.TestCase):
                                  ('placeholder_1_data', 'pool_1'),
                                  ('pool_1', 'pool_1_data'),
                                  ('pool_1_data', 'reshape_1'),
-                                 ('reshape_1', 'reshape_1_data')
+                                 ('reshape_1', 'reshape_1_data'),
+                                 ('reshape_1_data', 'op_output')
                                  ],
                                 {'pool_1': {'pool_method': 'avg', 'rounding_type': 'ceil', 'exclude_pad': 'true',
                                             'op': 'AvgPool', 'shape': np.array([1, 227, 227, 3])},
                                  'pool_1_data': {'shape': np.array([1, 227, 227, 3])},
-                                 'reshape_1_data': {'is_output': True, 'shape': np.array([227, 227, 3])},
+                                 'reshape_1_data': {'shape': np.array([227, 227, 3])},
                                  })
 
-        mean_to_avgpool(graph)
+        MeanToAvgPool().find_and_replace_pattern(graph)
         graph_clean_up(graph)
         (flag, resp) = compare_graphs(graph, graph_ref, 'mean_1_data', 'reshape_1_data', check_op_attrs=True)
         self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/middle/MinimumMiddleReplacer.py b/model-optimizer/extensions/middle/MinimumMiddleReplacer.py
index d21563705..100755e7f 100644
--- a/model-optimizer/extensions/middle/MinimumMiddleReplacer.py
+++ b/model-optimizer/extensions/middle/MinimumMiddleReplacer.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  limitations under the License.
 """
 
-import networkx as nx
-
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.eltwise import Eltwise
 from mo.ops.power import Power
@@ -25,6 +24,14 @@ class MinimumMiddleReplacer(MiddleReplacementPattern):
     op = "Minimum"
     enabled = True
 
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -33,7 +40,7 @@ class MinimumMiddleReplacer(MiddleReplacementPattern):
             edges=[]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         node = match['minimum']
         # Constant propagation case
         if node.in_node(0).value is not None and node.in_node(1).value is not None:
diff --git a/model-optimizer/extensions/middle/MinumumMiddleReplacer_test.py b/model-optimizer/extensions/middle/MinumumMiddleReplacer_test.py
index eb04cda95..96555cfdf 100644
--- a/model-optimizer/extensions/middle/MinumumMiddleReplacer_test.py
+++ b/model-optimizer/extensions/middle/MinumumMiddleReplacer_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/MulQuantizeFuse.py b/model-optimizer/extensions/middle/MulQuantizeFuse.py
new file mode 100644
index 000000000..0bfdc65e1
--- /dev/null
+++ b/model-optimizer/extensions/middle/MulQuantizeFuse.py
@@ -0,0 +1,90 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+from typing import Dict
+
+import numpy as np
+
+from mo.graph.graph import Graph, Node
+from mo.middle.passes.conv import get_tensor_in_port, get_value_in_port
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class MulQuantizeFuse(MiddleReplacementPattern):
+    """ Fuses Mul --> Quantize sequence if possible
+    """
+    enabled = False
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        return []
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('preop', dict(op='Mul')),
+                ('preoped', dict()),
+                ('quantize', dict(op='Quantize')),
+            ],
+            edges=[
+                ('preop', 'preoped'),
+                ('preoped', 'quantize', {'in': 0}),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: Dict[str, Node]):
+        quantize = match['quantize']
+        preop = match['preop']
+
+        # Check for total number of Mul consumers -- if something else consume its output it cannot be fused
+        if len(preop.out_node().out_nodes()) > 1:
+            log.debug('MulQuantizeFuse: cannot fuse because Mul have multiple consumers')
+            return
+
+        # If the fusion is applicable, direct modifications to quantize 1-st and 2-nd inputs
+        # are performed. So the data nodes at those inputs shouldn't have more than 1 consumer
+        # maximum 2 consumers to the same quantize op (consumed by 1st and 2nd ports).
+        # TODO: relax this limitation and duplicate data nodes accordingly to modify the input range freely
+
+        # Provisional limitation that related to binary quantization
+        # TODO: Relax it beyond binarization case
+        # Provisional limitation that related to binary quantization
+        # TODO: Relax it beyond binarization case
+        if len(quantize.in_node(1).out_nodes()) != 1 or \
+                len(quantize.in_node(2).out_nodes()) != 1 or \
+                len(quantize.in_node(3).out_nodes()) != 1 or len(quantize.in_node(4).out_nodes()) != 1 or \
+                quantize.levels != 2:
+            log.debug('MulQuantizeFuse: cannot fuse because Quantize op has '
+                      'unexpected number of consumers for ports 1, 2, 3 or 4')
+            return
+
+        tensor_port, value_port = get_tensor_in_port(preop), get_value_in_port(preop)
+
+
+        # Need to flip output_low and output_high for those elements that have multiplier < 0
+        # TODO: need some special processing for values that exactly equal to threshold
+        if np.all(value_port.data.get_value() <= 0):
+            log.debug('MulQuantizeFuse: cannot fuse because Mul op has non-positive multipliers.')
+
+        quantize.in_port(1).data.set_value(quantize.in_port(1).data.get_value() / value_port.data.get_value())
+        quantize.in_port(2).data.set_value(quantize.in_port(2).data.get_value() / value_port.data.get_value())
+
+        # Remove Mul as it no longer needed
+        quantize.in_port(0).disconnect()
+        tensor_port.get_connection().set_destination(quantize.in_port(0))
diff --git a/model-optimizer/extensions/middle/NasNet.py b/model-optimizer/extensions/middle/NasNet.py
new file mode 100644
index 000000000..1280923d3
--- /dev/null
+++ b/model-optimizer/extensions/middle/NasNet.py
@@ -0,0 +1,146 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+import numpy as np
+
+from mo.front.extractor import add_attrs_props, update_ie_fields
+from mo.graph.graph import Node, Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.op import Op
+
+
+class NasNet(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
+    def run_before(self):
+        return []
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('input', dict(kind='data')),
+                ('pad_op', dict(kind='op', op='Pad')),
+                ('pad_out', dict(kind='data')),
+
+                ('begin', dict(kind='data')),
+                ('end', dict(kind='data')),
+                ('stride', dict(kind='data')),
+
+                ('sslice', dict(kind='op', op='StridedSlice')),
+                ('sslice_out', dict(kind='data')),
+
+                ('avg_pool', dict(kind='op', op='AvgPool')),
+                ('output', dict(kind='data')),
+            ],
+            edges=[
+                ('input', 'pad_op', {'in': 0}),
+                ('pad_op', 'pad_out'),
+
+                ('begin', 'sslice', {'in': 1}),
+                ('end', 'sslice', {'in': 2}),
+                ('stride', 'sslice', {'in': 3}),
+
+                ('pad_out', 'sslice', {'in': 0}),
+                ('sslice', 'sslice_out'),
+
+                ('sslice_out', 'avg_pool', {'in': 0}),
+                ('avg_pool', 'output')
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        """
+        Converts specific for NasNet topology subgraph Pad->StridedSlice->AvgPool to Conv->Crop->AvgPool
+        """
+        input = match['input']
+
+        pad_op = match['pad_op']
+
+        sslice = match['sslice']
+        sslice_out = match['sslice_out']
+        begin = []
+        end = []
+        stride = []
+        for s in sslice.slices:
+            begin.append(s.start)
+            end.append(s.stop)
+            stride.append(s.step)
+
+        if not np.array_equal(pad_op.pads, np.array([[0, 0], [0, 1], [0, 1], [0, 0]])):
+            log.error(" Pad values doesn't match!")
+            return
+
+        if not np.array_equal(begin, np.array([0, 1, 1, 0])):
+            log.error("StridedSlice has wrong begin")
+            return
+
+        if not np.array_equal(sslice.end_mask, np.array([0, 0, 0, 0])) or not np.array_equal(sslice.begin_mask, np.array([0, 1, 1, 0])):
+            log.error("StridedSlice has wrong masks")
+            return
+
+        # Cut Smth-x->Pad->StrudedSlice-x->AvgPool
+        graph.remove_edge(input.id, pad_op.id)
+        graph.remove_edge(sslice.id, sslice_out.id)
+
+        # Pad -> Conv
+        conv_node = graph.unique_id(pad_op.name + '/Conv_')
+        conv_weights_node = graph.unique_id(pad_op.name + '/ConvW_')
+        conv_weights = np.ones((1, 1, input.shape[3], 1))
+        conv_output = graph.unique_id(pad_op.name + '/ConvOut_')
+        output_shape = np.array([input.shape[0], input.shape[1] + 1, input.shape[2] + 1, input.shape[3]])
+
+        graph.add_node(conv_node,
+                       **add_attrs_props(
+                           dict(kind='op', precision="FP32", type='Convolution', name=conv_node, op='Conv2D',
+                                stride=np.array([1, 1, 1, 1]), dilation=np.array([1, 1, 1, 1]),
+                                group=input.shape[3], bias_addable=True, bias_term=False,
+                                spatial_dims=np.array([1, 2]),
+                                kernel_spatial=np.array([1, 1]),
+                                pad=np.array([[0, 0], [0, 0], [0, 0], [0, 0]]), output_shape=output_shape,
+                                channel_dims=np.array([3]),
+                                in_ports_count=3, out_ports_count=1)))
+
+        graph.add_node(conv_weights_node, **add_attrs_props(
+            dict(kind='data', precision="FP32", name=conv_weights_node, value=np.array(conv_weights),
+                 shape=np.array(conv_weights.shape),
+                 data_type=input.data_type, infer=None,
+                 spatial_dims=np.array([0, 1]),
+                 input_channel_dim=2,
+                 output_channel_dim=3,
+                 dims_number=4, can_be_bias=True)))
+        graph.add_node(conv_output, **add_attrs_props(
+            dict(kind='data', precision="FP32", name=conv_output, value=None, shape=output_shape,
+                 data_type=input.data_type)))
+
+        # StridedSlice -> Crop
+        crop_cls = Op.get_op_class_by_name('Crop')
+        crop = crop_cls(graph, dict(name=sslice.name + '/Crop_', axis=np.array([1, 2]),
+                                    dim=np.array([output_shape[1] - 1, output_shape[2] - 1]), offset=np.array([1, 1])))
+        crop.create_node_with_data([Node(graph, conv_output)], data_nodes=sslice_out)
+
+        # Connect : Conv->Crop->AvgPool
+        graph.add_edges_from([
+            (input.id, conv_node, {'in': 0}),
+            (conv_weights_node, conv_node, {'in': 1, 'bin': 'weights'}),
+            (conv_node, conv_output, {'out': 0}),
+        ])
+        update_ie_fields(graph.node[conv_node], graph.graph['ir_version'])
diff --git a/model-optimizer/extensions/middle/NormalizeFullyConnected.py b/model-optimizer/extensions/middle/NormalizeFullyConnected.py
index 945248676..991f816ec 100644
--- a/model-optimizer/extensions/middle/NormalizeFullyConnected.py
+++ b/model-optimizer/extensions/middle/NormalizeFullyConnected.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,16 +14,25 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.op import Op
 from mo.ops.reshape import Reshape
 
 
 class NormalizeFullyConnected(MiddleReplacementPattern):
-    enabled = False
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['fw'] == 'onnx']
+
+    def run_after(self):
+        from extensions.middle.GemmToFullyConnected import GemmToFullyConnected
+        return [GemmToFullyConnected]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
 
     def pattern(self):
         return dict(
@@ -33,7 +42,7 @@ class NormalizeFullyConnected(MiddleReplacementPattern):
             edges=[('fc', 'fc_output')],
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         """
             This pass normalize FC layer
             Example:
diff --git a/model-optimizer/extensions/middle/NormalizeFullyConnected_test.py b/model-optimizer/extensions/middle/NormalizeFullyConnected_test.py
index de6a73a8d..1cb2b3560 100644
--- a/model-optimizer/extensions/middle/NormalizeFullyConnected_test.py
+++ b/model-optimizer/extensions/middle/NormalizeFullyConnected_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/NormalizePad.py b/model-optimizer/extensions/middle/NormalizePad.py
index 2e9e89fe2..5e4ae170f 100644
--- a/model-optimizer/extensions/middle/NormalizePad.py
+++ b/model-optimizer/extensions/middle/NormalizePad.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
+from mo.graph.graph import Graph
 from mo.middle.passes.eliminate import remove_op_node_with_data_node
 from mo.middle.replacement import MiddleReplacementPattern
 
@@ -30,6 +30,14 @@ class NormalizePad(MiddleReplacementPattern):
     """
     enabled = True
 
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -38,7 +46,7 @@ class NormalizePad(MiddleReplacementPattern):
             edges=[]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         node = match['pad']
         for port, input_node in node.in_nodes().items():
             if port != 0:
diff --git a/model-optimizer/extensions/middle/ONNXRNNSequenceNormalize.py b/model-optimizer/extensions/middle/ONNXRNNSequenceNormalize.py
new file mode 100644
index 000000000..344249766
--- /dev/null
+++ b/model-optimizer/extensions/middle/ONNXRNNSequenceNormalize.py
@@ -0,0 +1,234 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from copy import deepcopy
+
+import numpy as np
+
+from mo.graph.graph import Node, Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.op import Op
+from mo.ops.permute import Permute
+
+
+def permute_before_and_after(inp: Node, middle: Node, out: Node, input_order, output_order):
+    """
+        Insert two permutes: before middle node and after middle node.
+
+        Both permutes has a given order (input/output).
+    """
+    # Permute before input
+    permute = Permute(middle.graph, dict(order=np.array(input_order)))
+
+    edge_attrs = deepcopy(middle.graph.get_edge_data(inp.id, middle.id)[0])
+    middle.graph.remove_edge(inp.id, middle.id)
+    new_inp = permute.create_node_with_data([inp], dict(name=middle.name + '/InputPermute'))
+    middle.graph.add_edge(new_inp.id, middle.id, **edge_attrs)
+
+    # Permute after output
+    permute = Permute(middle.graph, dict(order=output_order))
+
+    middle.graph.remove_edge(middle.id, out.id)
+    new_out = Op._create_data_node(middle.graph, name=middle.name + '/WithoutPermute',
+                                   attrs={'shape': out.shape[output_order]})
+    middle.graph.add_edge(middle.id, new_out.id, key=0, out=0)
+    permute.create_node_with_data([new_out], dict(name=middle.name + '/OutputPermute'), data_nodes=out)
+
+
+class ONNXRNNSequenceNormalize(MiddleReplacementPattern):
+    """
+        Convert blobs and shapes of ONNX-like LSTM, GRU, RNN cells to common form (internal for MO).
+        After this normalization pass passes for spliting bidirectional calls and
+        multilayer cells will be applied.
+
+        This transformation pass involves weights and shapes processing only:
+            1. Weights reshaping and reordering
+            2. Gates reordering
+
+
+        Inputs will have the following order after normalising:
+            0: X input data,    shape [batch_size, seq_len, input_size]
+            1: W weights blob,  shape [num_dir, n_cells, M, hidden_size, input_size]
+            2: R weights blob,  shape [num_dir, n_cells, M, hidden_size, hidden_size]
+            3: B biases blob,   shape [num_dir, n_cells, 2, M, hidden_size]
+            4: (optional) sequence_length, shape [batch_size]
+            5: initial hidden state, shape  [num_dir, batch_size, hidden_size]
+                                                      ([num_dir, n_cells, batch_size, hidden_size] if num_cells != 1)
+            6: (only for LSTM) initial cell state, shape [num_dir, batch_size, hidden_size]
+            7: (optional for LSTM) Peepholes weights, shape  [num_dir, n_cells, (M - 1) * hidden_size]
+
+        Outputs:
+            0: Y output blob,  shape [batch_size, num_dir, seq_len, hidden_size]
+            1: (optional) Y_h, shape [num_dir, batch_size, hidden_size]
+            2: (optional for LSTM) Y_c, shape [num_dir, batch_size, hidden_size]
+
+        Where:
+            M -- number of gates in this cell (4 for LSTM, 3 for GRU, 1 for RNN).
+            num_dir -- number of directions ('forvard', 'bidirectional', 'reverse')
+            n_cells -- number of cells in layer (always 1 for ONNX).
+    """
+
+    enabled = True
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('rnn_layer', dict(kind='op', type='RNNSequence', format='onnx')),
+                ('input', dict(kind='data')),
+                ('W', dict(kind='data')),
+                ('R', dict(kind='data')),
+            ],
+            # We are not handling optional inputs
+            edges=[
+                ('input', 'rnn_layer', {'in': 0}),
+                ('W', 'rnn_layer', {'bin': 'W'}),
+                ('R', 'rnn_layer', {'bin': 'R'}),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        self.repack_weights(graph, match)
+        self.check_init_states(graph, match)
+        self.check_input_ports(graph, match)
+        match['rnn_layer']['normalized'] = True
+
+    @staticmethod
+    def repack_weights(graph: Graph, match: dict):
+        """
+        Repack weights into general format (described above) and reorder gates.
+        """
+        rnn_layer = match['rnn_layer']
+        W = match['W'].value.copy()
+        R = match['R'].value.copy()
+        num_directions = 2 if rnn_layer.direction == 'bidirectional' else 1
+
+        graph.remove_edge(match['W'].id, rnn_layer.id)
+        graph.remove_edge(match['R'].id, rnn_layer.id)
+
+        # find optional 'B' biases blob
+        if 3 in rnn_layer.in_nodes():
+            # TODO: check if 'bin': 'B' attribute is assigned to this edge
+            B = rnn_layer.in_node(3).value.copy()
+            graph.remove_edge(rnn_layer.in_node(3).id, rnn_layer.id)
+        else:
+            B_shape = [num_directions, 2 * rnn_layer.multiplier * rnn_layer.hidden_size]  # from ONNX spec
+            B = np.full(B_shape, 0, dtype=np.float32)
+
+        # Add extra dimensions for W, R and B for easier repacking and reordering
+        B = B.reshape([
+            num_directions,  # 0: num of directions
+            rnn_layer.num_layers,  # 1: num_layers
+            2,  # 2: two input parts of the matrix: W, R
+            rnn_layer.multiplier,  # 3: four output parts of the matrix for all gates in order: i, o, f, c
+            rnn_layer.hidden_size,  # 4: output size per direction and gate
+        ])
+
+        W, R = [x.reshape([
+                num_directions,  # 0: num of directions
+                rnn_layer.num_layers,  # 1: num_layers
+                rnn_layer.multiplier,  # 2: four output parts of the matrix for all gates in order: i, o, f, c
+                rnn_layer.hidden_size,  # 3: output size per direction and gate
+                -1])  # 4: input size/hidden size in W/R correspondingly
+                for x in (W, R)]
+
+        input_size = match['input'].shape[2]
+        assert input_size == W.shape[-1]
+
+        # Reorder gates: iofc --> fico
+        gate_reorder = rnn_layer.gate_order
+        W, R = (np.take(x, gate_reorder, axis=2) for x in (W, R))
+        B = np.take(B, gate_reorder, axis=3)
+
+        for blob, port in [(W, 1), (R, 2), (B, 3)]:
+            Op.create_and_connect_input_data_node(
+                graph,
+                rnn_layer,
+                {'value': blob, 'shape': np.array(blob.shape, dtype=np.int64)},
+                {'in': port, 'permutation': None}
+            )
+
+    @staticmethod
+    def batch_sequence_transpose(graph: Graph, match: dict):
+        """
+
+        """
+        rnn_layer = match['rnn_layer']
+        inp = match['input']
+        out = rnn_layer.out_node(0)
+
+        if rnn_layer.batch_dim == 0:
+            assert rnn_layer.sequence_dim == 1
+            # nothing to do -- it's already in normal form
+            return
+
+        assert rnn_layer.sequence_dim == 0
+        assert rnn_layer.batch_dim == 1
+        assert len(inp.shape) == 3
+
+        # Reorder the first two dimensions on both ends: input and output.
+        # Two Permute ops are inserted before and after the LSTM node.
+        # In this transformation we don't analyze the rest of the model around
+        # LSTM cell, so these Permute ops are not fused to some other layers here.
+        # But other transformations in the pipeline may optimize the Permute ops out.
+
+        rnn_layer.batch_dim, rnn_layer.sequence_dim = rnn_layer.sequence_dim, rnn_layer.batch_dim
+        permute_before_and_after(inp, rnn_layer, out, [1, 0, 2], [2, 1, 0, 3])
+
+    @staticmethod
+    def check_init_states(graph: Graph, match: dict):
+        """
+        Check if cell have initial states and create zeros states if not.
+        """
+        rnn_layer = match['rnn_layer']
+        num_directions = 2 if rnn_layer.direction == 'bidirectional' else 1
+        batch_size = rnn_layer.in_node(0).shape[rnn_layer.batch_dim]
+
+        h_init_port = 5
+        c_init_port = 6
+
+        if h_init_port not in rnn_layer.in_nodes():
+            h_shape = [num_directions, batch_size, rnn_layer.hidden_size]  # from ONNX spec
+            h_init = np.full(h_shape, 0, dtype=np.float32)
+            Op.create_and_connect_input_data_node(
+                graph,
+                rnn_layer,
+                {'value': h_init, 'shape': np.array(h_init.shape, dtype=np.int64)},
+                {'in': h_init_port, 'permutation': None}
+            )
+
+        if rnn_layer.op == 'LSTM':
+            if c_init_port not in rnn_layer.in_nodes():
+                c_shape = [num_directions, batch_size, rnn_layer.hidden_size]  # from ONNX spec
+                c_init = np.full(c_shape, 0, dtype=np.float32)
+                Op.create_and_connect_input_data_node(
+                    graph,
+                    rnn_layer,
+                    {'value': c_init, 'shape': np.array(c_init.shape, dtype=np.int64)},
+                    {'in': c_init_port, 'permutation': None}
+                )
+
+    @staticmethod
+    def check_input_ports(graph: Graph, match: dict):
+        """
+        Check that all mandatory ports is present.
+        """
+        rnn_layer = match['rnn_layer']
+        mandatory_ports = [0, 1, 2, 3, 5]
+
+        if rnn_layer.op == 'LSTM':
+            mandatory_ports.extend([6])
+
+        assert set(rnn_layer.in_nodes().keys()) >= set(mandatory_ports)
diff --git a/model-optimizer/extensions/middle/PartialInfer.py b/model-optimizer/extensions/middle/PartialInfer.py
new file mode 100644
index 000000000..d5d519c29
--- /dev/null
+++ b/model-optimizer/extensions/middle/PartialInfer.py
@@ -0,0 +1,31 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.graph.graph import Graph
+from mo.middle.passes.infer import partial_infer
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class PartialInfer(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        partial_infer(graph)
diff --git a/model-optimizer/extensions/middle/PixelLinkReshape.py b/model-optimizer/extensions/middle/PixelLinkReshape.py
index 9564b5d39..9c6cceb15 100644
--- a/model-optimizer/extensions/middle/PixelLinkReshape.py
+++ b/model-optimizer/extensions/middle/PixelLinkReshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,14 +15,14 @@
 """
 
 import logging as log
-import networkx as nx
 import numpy as np
 
 from copy import deepcopy
 
-from extensions.middle.AddReshapeAfterStridedSlice import AddReshapeAfterStridedSlice
+from extensions.middle.ConvertGroupedStridedSlice import ConvertGroupedStridedSlice
 from extensions.middle.FusePermutesSequence import FusePermutesSequence
 from extensions.middle.ShufflenetReshape import ReshapeSoftmaxReshape
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.op import Op
 from mo.ops.permute import Permute
@@ -30,16 +30,17 @@ from mo.ops.permute import Permute
 
 class PixelLinkReshape(MiddleReplacementPattern):
     """
-      Transform adds Permutes around Reshapes that pack 4 dimensions in 2, than 
-      do Softmax and then unpack it back to 5 dims. 
+      Transform adds Permutes around Reshapes that pack 4 dimensions in 2, than
+      do Softmax and then unpack it back to 5 dims.
     """
     enabled = True
 
     def run_before(self):
-        return [FusePermutesSequence, ReshapeSoftmaxReshape, AddReshapeAfterStridedSlice]
+        return [FusePermutesSequence, ReshapeSoftmaxReshape, ConvertGroupedStridedSlice]
 
     def run_after(self):
-        return []
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
 
     def pattern(self):
         return dict(nodes=[('reshape_split', dict(kind='op', type='Reshape')),
@@ -51,7 +52,7 @@ class PixelLinkReshape(MiddleReplacementPattern):
                            ('reshape_unpack', dict(kind='op', type='Reshape')),
                            ('reshape_unpack_data', dict(kind='data')),
                            ('strided_slice', dict(kind='op', op='StridedSlice')),
-                         ],
+                           ],
                     edges=[('reshape_split', 'reshape_split_data'),
                            ('reshape_split_data', 'reshape_pack'),
                            ('reshape_pack', 'reshape_data'),
@@ -84,7 +85,7 @@ class PixelLinkReshape(MiddleReplacementPattern):
         else:
             return False
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         if graph.graph['layout'] != 'NHWC':
             return
 
@@ -120,55 +121,72 @@ class PixelLinkReshape(MiddleReplacementPattern):
             attrs = deepcopy(graph.get_edge_data(node.id, out_node.id)[0])
             graph.remove_edge(node.id, out_node.id)
 
-            permute_after_node = permute_after.create_node_with_data([data_node], permute_after.attrs,
-                                                                     data_nodes=[out_node])
+            permute_after.create_node_with_data([data_node], permute_after.attrs,
+                                                data_nodes=[out_node])
             graph.add_edge(node.id, data_node.id, **attrs)
 
             # update softmax shape
             node_softmax = match['softmax']
             node_softmax.out_node(0).shape = out_node.shape
 
-            # revert strided slice and reshape
-            node_ss = match['strided_slice']
-            node_unpack = match['reshape_unpack']
-
-            unpack_out = node_unpack.out_node(0).id
-            ss_out = node_ss.out_node(0).id
-
-            #gather edge attributes
-            soft_reshape_attrs = deepcopy(graph.get_edge_data(node_softmax.out_node(0).id, node_unpack.id)[0])
-            reshape_data_attrs = deepcopy(graph.get_edge_data(node_unpack.id, unpack_out)[0])
-            reshape_ss_attrs = deepcopy(graph.get_edge_data(unpack_out, node_ss.id)[0])
-            ss_out_attrs = deepcopy(graph.get_edge_data(node_ss.id, ss_out)[0])
-
-            #remove all edges in Softmax->Reshape->StridedSlice chain
-            graph.remove_edge(node_softmax.out_node(0).id, node_unpack.id)
-            graph.remove_edge(node_unpack.id, unpack_out)
-            graph.remove_edge(unpack_out, node_ss.id)
-            graph.remove_edge(node_ss.id, ss_out)
-
-            #add new edges to get chain Softmax->StridedSlice->Reshape
-            graph.add_edge(node_softmax.out_node(0).id, node_ss.id, **soft_reshape_attrs)
-            graph.add_edge(node_ss.id, unpack_out, **reshape_data_attrs)
-            graph.add_edge(unpack_out, node_unpack.id, **reshape_ss_attrs)
-            graph.add_edge(node_unpack.id, ss_out, **ss_out_attrs)
-
-            #update output shape and parameters for StridedSlice
-            node_ss.out_node(0).shape = np.zeros(3)
-            node_ss.out_node(0).shape[0] = out_node.shape[0]
-            node_ss.out_node(0).shape[1] = 1
-            node_ss.out_node(0).shape[2] = out_node.shape[2]
-
-            old_slices = node_ss.slices.copy()
-            node_ss.slices = []
-            node_ss.slices.append(old_slices[0])
-            node_ss.slices.append(old_slices[-1])
-            node_ss.slices.append(slice(0, out_node.shape[2], 1))
-            node_ss.shrink_axis_mask = [False, False, False]
-            node_ss.new_axis_mask = [False, False, False]
-
-            #update Reshape attribute
-            node_unpack.dim = np.delete(node_unpack.dim, 4)
-            #prevent permute for reshape because it gives wrong result
-            node_unpack['nchw_layout'] = True
-            node_unpack.out_node(0)['nchw_layout'] = True
+            if ConvertGroupedStridedSlice.enabled is True:
+                # revert strided slice and reshape
+                node_ss = match['strided_slice']
+                node_unpack = match['reshape_unpack']
+
+                unpack_out = node_unpack.out_node(0).id
+                ss_out = node_ss.out_node(0).id
+
+                # gather edge attributes
+                soft_reshape_attrs = deepcopy(graph.get_edge_data(node_softmax.out_node(0).id, node_unpack.id)[0])
+                reshape_data_attrs = deepcopy(graph.get_edge_data(node_unpack.id, unpack_out)[0])
+                reshape_ss_attrs = deepcopy(graph.get_edge_data(unpack_out, node_ss.id)[0])
+                ss_out_attrs = deepcopy(graph.get_edge_data(node_ss.id, ss_out)[0])
+
+                # remove all edges in Softmax->Reshape->StridedSlice chain
+                graph.remove_edge(node_softmax.out_node(0).id, node_unpack.id)
+                graph.remove_edge(node_unpack.id, unpack_out)
+                graph.remove_edge(unpack_out, node_ss.id)
+                graph.remove_edge(node_ss.id, ss_out)
+
+                # add new edges to get chain Softmax->StridedSlice->Reshape
+                graph.add_edge(node_softmax.out_node(0).id, node_ss.id, **soft_reshape_attrs)
+                graph.add_edge(node_ss.id, unpack_out, **reshape_data_attrs)
+                graph.add_edge(unpack_out, node_unpack.id, **reshape_ss_attrs)
+                graph.add_edge(node_unpack.id, ss_out, **ss_out_attrs)
+
+                # update output shape and parameters for StridedSlice
+                node_ss.out_node(0).shape = np.zeros(3)
+                node_ss.out_node(0).shape[0] = out_node.shape[0]
+                node_ss.out_node(0).shape[1] = 1
+                node_ss.out_node(0).shape[2] = out_node.shape[2]
+
+                old_slices = node_ss.slices.copy()
+                node_ss.slices = []
+                node_ss.slices.append(old_slices[0])
+                node_ss.slices.append(old_slices[-1])
+                node_ss.slices.append(slice(0, out_node.shape[2], 1))
+                node_ss.shrink_axis_mask = np.array([0, 0, 0], dtype=np.int64)
+                node_ss.new_axis_mask = np.array([0, 0, 0], dtype=np.int64)
+                node_ss.ellipsis_mask = np.array([0, 0, 0], dtype=np.int64)
+                node_ss.begin_mask = np.array([0, 1, 0], dtype=np.int64)
+                node_ss.end_mask = np.array([0, 1, 0], dtype=np.int64)
+
+                # update Reshape attribute
+                node_unpack.dim = np.delete(node_unpack.dim, 4)
+                # prevent permute for reshape because it gives wrong result
+                node_unpack['nchw_layout'] = True
+                node_unpack.out_node(0)['nchw_layout'] = True
+            else:
+                # reshape unpack: permute correctly
+                node_unpack = match['reshape_unpack']
+                data_node = Op._create_data_node(graph, node.name + "/Permute_after_unpack_data", {'shape': node_unpack.out_node().shape})
+                permute_after_unpack = Permute(graph, dict(name=node.name + "/Permute_after_unpack",
+                                                           order=np.array([0, 3, 1, 2, 4])))
+                out_node = node_unpack.out_node(0)
+                out_node.shape = out_node.shape[np.array([0, 3, 1, 2, 4], dtype=np.int)]
+                attrs = deepcopy(graph.get_edge_data(node_unpack.id, out_node.id)[0])
+                graph.remove_edge(node_unpack.id, out_node.id)
+                permute_after.create_node_with_data([data_node], permute_after_unpack.attrs,
+                                                    data_nodes=[out_node])
+                graph.add_edge(node_unpack.id, data_node.id, **attrs)
diff --git a/model-optimizer/extensions/middle/PixelLinkReshape_test.py b/model-optimizer/extensions/middle/PixelLinkReshape_test.py
index e281f6016..11a41c546 100644
--- a/model-optimizer/extensions/middle/PixelLinkReshape_test.py
+++ b/model-optimizer/extensions/middle/PixelLinkReshape_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -38,6 +38,9 @@ nodes_attributes = {
     'reshape_split/Permute_before_data': {'value': None, 'shape': None, 'kind': 'data'},
     'reshape_pack/Permute_after': {'type': 'Permute', 'kind': 'op', 'op': 'Permute'},
     'reshape_pack/Permute_after_data': {'value': None, 'shape': None, 'kind': 'data'},
+    # uncoment when strided slice will be enabled
+    # 'reshape_unpack/Permute_after_unpack': {'type': 'Permute', 'kind': 'op', 'op': 'Permute'},
+    # 'reshape_unpack/Permute_after_unpack_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Softmax layer
     'softmax_1': {'type': 'SoftMax', 'kind': 'op', 'op': 'SoftMax'},
     'softmax_1_data': {'value': None, 'shape': None, 'kind': 'data'},
@@ -70,8 +73,11 @@ class ReshapeSoftmaxReshapeTests(unittest.TestCase):
                              'strided_slice': {
                                  'slices': [slice(0, 1, 1), slice(0, 227, 1), slice(0, 227, 1), slice(0, 8, 1),
                                             slice(1, 2, 1)],
-                                 'shrink_axis_mask': [False, False, False, False, True],
-                                 'new_axis_mask': [False, False, False, False, False]},
+                                 'shrink_axis_mask': [0, 0, 0, 0, 1],
+                                 'new_axis_mask': [0, 0, 0, 0, 0],
+                                 'ellipsis_mask': [0, 0, 0, 0, 0],
+                                 'begin_mask': [1, 1, 1, 1, 1],
+                                 'end_mask': [1, 1, 1, 1, 1], },
                              'strided_slice_data': {'shape': np.array([1, 227, 227, 8])},
                              })
         graph.graph['layout'] = 'NHWC'
@@ -88,10 +94,18 @@ class ReshapeSoftmaxReshapeTests(unittest.TestCase):
                                  ('reshape_pack/Permute_after', 'reshape_pack_data'),
                                  ('reshape_pack_data', 'softmax_1'),
                                  ('softmax_1', 'softmax_1_data'),
+                                 # comment when strided slice will be enabled
                                  ('softmax_1_data', 'strided_slice'),
                                  ('strided_slice', 'reshape_unpack_data'),
                                  ('reshape_unpack_data', 'reshape_unpack'),
-                                 ('reshape_unpack', 'strided_slice_data')
+                                 ('reshape_unpack', 'strided_slice_data'),
+                                 # uncomment when strided slice will be enabled
+                                 # ('softmax_1_data', 'reshape_unpack'),
+                                 # ('reshape_unpack', 'reshape_unpack/Permute_after_unpack_data'),
+                                 # ('reshape_unpack/Permute_after_unpack_data', 'reshape_unpack/Permute_after_unpack'),
+                                 # ('reshape_unpack/Permute_after_unpack', 'reshape_unpack_data'),
+                                 # ('reshape_unpack_data', 'strided_slice'),
+                                 # ('strided_slice', 'strided_slice_data'),
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 16])},
                                  'reshape_split/Permute_before_data': {'shape': np.array([1, 227, 16, 227])},
@@ -99,7 +113,11 @@ class ReshapeSoftmaxReshapeTests(unittest.TestCase):
                                  'reshape_pack_data': {'shape': np.array([1, 2, 1 * 227 * 227 * 8])},
                                  'reshape_pack/Permute_after_data': {'shape': np.array([1, 227 * 227 * 8, 2])},
                                  'softmax_1_data': {'shape': np.array([1, 2, 1 * 227 * 227 * 8])},
+                                 # comment when strided slice will be enabled
                                  'reshape_unpack_data': {'shape': np.array([1, 1, 227 * 227 * 8])},
+                                 # uncomment when strided slice will be enabled
+                                 # 'reshape_unpack_data': {'shape': np.array([1, 8, 227, 227, 2])},
+                                 # 'reshape_unpack/Permute_after_unpack_data': {'shape': np.array([1, 227, 227, 8, 2])},
                                  'strided_slice_data': {'shape': np.array([1, 227, 227, 8])}
                                  })
 
diff --git a/model-optimizer/extensions/middle/RNNSequenceNormalizeToIE.py b/model-optimizer/extensions/middle/RNNSequenceNormalizeToIE.py
new file mode 100644
index 000000000..0809c21a8
--- /dev/null
+++ b/model-optimizer/extensions/middle/RNNSequenceNormalizeToIE.py
@@ -0,0 +1,215 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from copy import deepcopy
+
+import numpy as np
+
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.op import Op
+from mo.ops.reshape import Reshape
+
+
+class RNNSequenceNormalize(MiddleReplacementPattern):
+    """
+    This class normalize RNNSequence layers to IE-compatible from of weights, inputs and outputs.
+
+    In this pass next will be done:
+        1. Weights repack (squeeze all useless shapes in all blobls and concatenate W and R together, also add
+                            bin param and all similar staff )
+        1. UNSqueeze num directions (in states and )
+        2. Initial states squeeze
+        4. Renumbering inputs
+        5. Ports checks
+
+    After this normalization this layer will have next format of inputs:
+            0: X input data,    shape [batch_size, seq_len, input_size]
+            1: WR weights blob,  shape [M * hidden_size, hidden_size + input_size]
+            2: B biases blob,   shape [M * hidden_size]
+            3: (optional) sequence_length, shape [batch_size]
+            4: initial hidden state, shape  [batch_size, hidden_size]
+            5: (only for LSTM) initial cell state, shape [batch_size, hidden_size]
+            6: (optional for LSTM) Peepholes weights, shape  [(M - 1) * hidden_size]
+
+    """
+    def run_after(self):
+        from extensions.middle.DecomposeBidirectionalRNNSequence import DecomposeBidirectionalRNNSequence
+        return [DecomposeBidirectionalRNNSequence]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('rnn_layer', dict(kind='op', type='RNNSequence')),
+                ('input', dict(kind='data')),
+                ('W', dict(kind='data')),
+                ('R', dict(kind='data')),
+                ('B', dict(kind='data')),
+            ],
+            edges=[
+                ('input', 'rnn_layer', {'in': 0}),
+                ('W', 'rnn_layer', {'in': 1}),
+                ('R', 'rnn_layer', {'in': 2}),
+                ('B', 'rnn_layer', {'in': 3}),
+            ],
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        self.repack_weights(graph, match)
+        if match['rnn_layer'].has_num_directions:
+            self.unsqueeze_num_directions(graph, match)
+        self.squeeze_initial_states(graph, match)
+        self.reordering_inputs(graph, match)
+        # some additional checks for ports number and similar stuff
+
+    def repack_weights(self, graph: Graph, match: dict):
+        # Concat W, R in IE- format
+        # Delete useless num_dir dimensions and n_cells dimensions in W, R, B (peepholes?)
+        lstm = match['rnn_layer']
+        W, R, B = match['W'].value.copy(), match['R'].value.copy(), match['B'].value.copy()
+
+        graph.remove_edge(match['W'].id, lstm.id)
+        graph.remove_edge(match['R'].id, lstm.id)
+        graph.remove_edge(match['B'].id, lstm.id)
+
+        # Sum component of B that correspond to W and R
+        if lstm.op == 'GRU' and lstm.linear_before_reset:
+            B_shape = np.array(B.shape)
+            B_shape[3] = 4
+            B_shape[2] = 1
+            B_tmp = np.zeros(shape=B_shape)
+            B_tmp[:, :, :, 0, :] = B[:, :, 0, 0, :] + B[:, :, 1, 0, :]
+            B_tmp[:, :, :, 1, :] = B[:, :, 0, 1, :] + B[:, :, 1, 1, :]
+            B_tmp[:, :, :, 2, :] = B[:, :, 0, 2, :][:, :, np.newaxis, :]
+            B_tmp[:, :, :, 3, :] = B[:, :, 1, 2, :][:, :, np.newaxis, :]
+            B = B_tmp
+        else:
+            B = np.add.reduce(B, axis=2, keepdims=True)
+
+        # Concatenate W, R to IE-compatible format
+        assert len(W.shape) == 5
+        assert len(R.shape) == 5
+        WR = np.concatenate([W, R], axis=4)
+
+        # Squeeze useless dimensions
+        assert WR.shape[0] == 1  # num_dir == 1
+        assert WR.shape[1] == 1  # num_cells == 1
+        assert B.shape[0] == 1
+        assert B.shape[1] == 1
+        WR = WR.squeeze(axis=(0, 1))
+        B = B.squeeze(axis=(0, 1))
+
+        # Flatten all output (0, 1) and input dimensions (2, 3)
+        final_shape_WR = [WR.shape[0] * WR.shape[1], -1]
+        assert final_shape_WR[0] == lstm.hidden_size * lstm.multiplier
+        WR = WR.reshape(final_shape_WR)
+
+        final_shape_B = final_shape_WR
+        if lstm.op == 'GRU' and lstm.linear_before_reset:
+            final_shape_B[0] = lstm.hidden_size * 4
+        B = B.reshape(final_shape_B)
+
+        # Squeeze fake dimension in B
+        B = B.squeeze(axis=-1)
+
+        for blob, port, name in [(WR, 1, 'weights'), (B, 2, 'biases')]:
+            Op.create_and_connect_input_data_node(
+                graph,
+                lstm,
+                {'value': blob, 'shape': np.array(blob.shape, dtype=np.int64)},
+                {'in': port, 'bin': name, 'permutation': None}
+            )
+
+    @staticmethod
+    def unsqueeze_num_directions(graph: Graph, match: dict):
+        """ Assuming considered LSTM/GRU/RNN node should has num_directions in output shape and add Reshape
+            to match it.
+        """
+
+        rnn_layer = match['rnn_layer']
+        # num_directions is at 1st position in output shape, and in 0st position in hidden and cell states
+        # please refer to docs in this transform
+
+        direction_dim = [1, 0, 0]  # index of dimension with direction index
+        for i in rnn_layer.out_nodes():
+            old_data_node = rnn_layer.out_node(i)
+            old_shape = old_data_node.shape.copy()
+            new_shape = np.delete(old_shape, direction_dim[i])
+
+            data = Op._create_data_node(graph, name=rnn_layer.name + '/Out/{}/'.format(i), attrs={'shape': new_shape})
+            graph.remove_edge(rnn_layer.id, old_data_node.id)
+            graph.add_edge(rnn_layer.id, data.id, key=0, out=i)
+
+            reshape = Reshape(graph, dict(dim=old_shape))
+            reshape.create_node_with_data([data], dict(name=rnn_layer.name + '/SqueezeNumDirections/{}'.format(i)),
+                                          data_nodes=[old_data_node])
+
+    @staticmethod
+    def squeeze_initial_states(graph: Graph, match: dict):
+        """
+        Squeeze input initial states of recurrent node to 2-D shape.
+        """
+        hidden_init_port = 5
+        cell_init_port = 6
+
+        rnn_layer = match['rnn_layer']
+
+        reshape = Reshape(graph, dict(dim=[rnn_layer.in_node(0).shape[rnn_layer.batch_dim], rnn_layer.hidden_size]))
+
+        assert hidden_init_port in rnn_layer.in_nodes()
+        init_h = rnn_layer.in_node(hidden_init_port)
+        edge_attrs = deepcopy(graph.get_edge_data(init_h.id, rnn_layer.id)[0])
+        edge_attrs['in'] = hidden_init_port
+        graph.remove_edge(init_h.id, rnn_layer.id)
+        new_init_h = reshape.create_node_with_data([init_h], dict(name=rnn_layer.name + '/HiddenStateResize'))
+        graph.add_edge(new_init_h.id, rnn_layer.id, **edge_attrs)
+
+        if rnn_layer.op == 'LSTM':
+            assert cell_init_port in rnn_layer.in_nodes()
+
+            init_c = rnn_layer.in_node(cell_init_port)
+            edge_attrs = deepcopy(graph.get_edge_data(init_c.id, rnn_layer.id)[0])
+            edge_attrs['in'] = cell_init_port
+            graph.remove_edge(init_c.id, rnn_layer.id)
+            new_init_c = reshape.create_node_with_data([init_c], dict(name=rnn_layer.name + '/CellStateResize'))
+            graph.add_edge(new_init_c.id, rnn_layer.id, **edge_attrs)
+
+    @staticmethod
+    def reordering_inputs(graph: Graph, match: dict):
+        """
+        Reorder (renumbering) inputs to described format. We need to renumber initial states ports.
+        """
+        rnn_layer = match['rnn_layer']
+        assert 5 in rnn_layer.in_nodes()
+        hidden_state_edge = graph.get_edge_data(rnn_layer.in_node(5).id, rnn_layer.id)
+        hidden_state_edge[0]['in'] = 4
+
+        if rnn_layer.op == 'LSTM':
+            assert 6 in rnn_layer.in_nodes()
+            cell_state_edge = graph.get_edge_data(rnn_layer.in_node(6).id, rnn_layer.id)
+            cell_state_edge[0]['in'] = 5
+
+    @staticmethod
+    def ports_checks(graph: Graph, match: dict):
+        """
+            Check that all mandatory ports is present.
+        """
+        rnn_layer = match['rnn_layer']
+        mandatory_ports = [0, 1, 2, 4]
+
+        if rnn_layer.op == 'LSTM':
+            mandatory_ports.append(5)
+
+        assert set(rnn_layer.in_nodes().keys()) >= set(mandatory_ports)
+\ No newline at end of file
diff --git a/model-optimizer/extensions/middle/Reduce.py b/model-optimizer/extensions/middle/Reduce.py
index 6c6c91d27..1dedf8322 100644
--- a/model-optimizer/extensions/middle/Reduce.py
+++ b/model-optimizer/extensions/middle/Reduce.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,11 +16,11 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from mo.front.caffe.extractors.utils import get_canonical_axis_index
 from mo.front.common.layout import get_batch_dim, get_features_dim
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.pooling import Pooling
 from mo.ops.power import Power
@@ -39,6 +39,14 @@ class ReduceReplacer(MiddleReplacementPattern):
         'sum': 'avg'
     }
 
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -47,7 +55,7 @@ class ReduceReplacer(MiddleReplacementPattern):
             edges=[]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         node = match['reduce']
         if not node.has_valid('reduce_type') or node.reduce_type.lower() not in self.supported_reduce_types:
             log.error("Reduce type {} is not supported for node {}".format(node.soft_get('reduce_type'), node.id))
diff --git a/model-optimizer/extensions/middle/Reduce_test.py b/model-optimizer/extensions/middle/Reduce_test.py
index 1925df1a7..f708e0a2f 100644
--- a/model-optimizer/extensions/middle/Reduce_test.py
+++ b/model-optimizer/extensions/middle/Reduce_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/ReluQuantizeFuse.py b/model-optimizer/extensions/middle/ReluQuantizeFuse.py
new file mode 100644
index 000000000..116a493b5
--- /dev/null
+++ b/model-optimizer/extensions/middle/ReluQuantizeFuse.py
@@ -0,0 +1,90 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+from extensions.middle.BinarizeWeightsM1P1 import BinarizeWeightsM1P1
+from mo.graph.graph import Graph
+from mo.middle.passes.eliminate import remove_op_node_with_data_node
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class ReluQuantizeFuse(MiddleReplacementPattern):
+    """ Fuses ReLU --> Quantize sequence if possible
+
+        Relu --> Quantize fusion is possible if:
+            1. Relu is consumed to 0-th port of Quantize
+            2. Quantize ports 1 and 2 defines such input range that 0 is not included
+    """
+    enabled = True
+
+    def run_after(self):
+        return [BinarizeWeightsM1P1]
+
+    def run_before(self):
+        from extensions.middle.SharedWeightsDuplication import SharedWeightsDuplication
+        return [SharedWeightsDuplication]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('relu', dict(op='Relu')),
+                ('relued', dict()),
+                ('quantize', dict(op='Quantize')),
+            ],
+            edges=[
+                ('relu', 'relued'),
+                ('relued', 'quantize', {'in': 0}),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+
+        quantize = match['quantize']
+
+        # Check for total number of ReLU consumers -- if something else consume its output it cannot be fused
+        if len(match['relu'].out_node().out_nodes()) > 1:
+            log.debug('ReluQuantizeFuse: cannot fuse because ReLU have multiple consumers')
+            return
+
+        # If the fusion is applicable, direct modifications to quantize 1-st and 2-nd inputs
+        # are performed. So the data nodes at those inputs shouldn't have more than 1 consumer
+        # maximum 2 consumers to the same quantize op (consumed by 1st and 2nd ports).
+        # TODO: relax this limitation and duplicate data nodes accordingly to modify the input range freely
+
+        # Provisional limitation that related to binary quantization
+        # TODO: Relax it beyond binarization case
+        if len(quantize.in_node(1).out_nodes()) != 2 or \
+                        len(quantize.in_node(2).out_nodes()) != 2 or \
+                        quantize.in_node(1).id != quantize.in_node(2).id or \
+                        quantize.levels != 2:
+            log.debug('ReluQuantizeFuse: cannot fuse because Quantize op has '
+                      'unexpected number of consumers for ports 1 and 2')
+            return
+
+        threshold = quantize.in_node(1)
+
+        # As we restricted to binarization case only, so we need to detect from
+        # which side of 0 Quantize threshold resides:
+        #   if the threshold > 0, it remains the same;
+        #   if the threshold == 0, it also remains the same;
+        #   if the threshold < 0, it should be modified to -infinity that means that all inputs map to output_high
+
+        modification_mask = threshold.value < 0
+        threshold.value[modification_mask] = float('-inf')
+
+        # Remove ReLU as it no longer needed
+        remove_op_node_with_data_node(graph, match['relu'])
diff --git a/model-optimizer/extensions/middle/RemoveIdentity.py b/model-optimizer/extensions/middle/RemoveIdentity.py
new file mode 100644
index 000000000..ba7535c6e
--- /dev/null
+++ b/model-optimizer/extensions/middle/RemoveIdentity.py
@@ -0,0 +1,83 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.graph.graph import Graph
+from mo.middle.passes.eliminate import remove_op_node_with_data_node
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class RemoveIdentity(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.InputCut import MiddleInputCut
+        return [MiddleInputCut]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def pattern(self):
+        return dict(
+            nodes=[('op', dict(kind='op', identity=True))],
+            edges=[]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        remove_op_node_with_data_node(graph, match['op'])
+
+
+class RemoveDropout(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def pattern(self):
+        return dict(
+            nodes=[('op', dict(op='Dropout'))],
+            edges=[]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        remove_op_node_with_data_node(graph, match['op'])
+
+
+class RemoveNodesWithZeroPhase(MiddleReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def pattern(self):
+        return dict(
+            nodes=[('op', dict(kind='op', phase=0))],
+            edges=[]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        remove_op_node_with_data_node(graph, match['op'])
diff --git a/model-optimizer/extensions/middle/RemoveRedundantReshapeAfterCropAndResize.py b/model-optimizer/extensions/middle/RemoveRedundantReshapeAfterCropAndResize.py
new file mode 100644
index 000000000..9f5416582
--- /dev/null
+++ b/model-optimizer/extensions/middle/RemoveRedundantReshapeAfterCropAndResize.py
@@ -0,0 +1,68 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+import numpy as np
+
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class RemoveRedundantReshapeAfterCropAndResize(MiddleReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
+    def run_before(self):
+        return []
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('crop_and_resize', dict(kind='op', op='CropAndResize')),
+                ('crop_and_resize_data', dict(kind='data')),
+                ('reshape_1', dict(kind='op', op='Reshape')),
+                ('reshape_1_data', dict(kind='data')),
+                ('reshape_2', dict(kind='op', op='Reshape')),
+            ],
+            edges=[
+                ('crop_and_resize', 'crop_and_resize_data'),
+                ('crop_and_resize_data', 'reshape_1'),
+                ('reshape_1', 'reshape_1_data'),
+                ('reshape_1_data', 'reshape_2'),
+            ]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        car_node = match['crop_and_resize']
+        reshape_2_node = match['reshape_2']
+
+        shape_1 = match['crop_and_resize_data'].shape
+        shape_2 = match['reshape_2'].out_node().shape
+        if not np.all(shape_1 == shape_2):
+            log.debug('Cannot remove reshape operations after CropAndResize due to different shapes: {} vs {}'.format(
+                shape_1, shape_2
+            ))
+            return
+
+        car_node.out_port(0).disconnect()
+        consumer_port_node = reshape_2_node.out_port(0).get_connection().get_destination()
+        consumer_port_node.disconnect()
+        car_node.out_port(0).connect(consumer_port_node)
diff --git a/model-optimizer/extensions/middle/ReverseV2ToReverseSequence.py b/model-optimizer/extensions/middle/ReverseV2ToReverseSequence.py
new file mode 100644
index 000000000..a1aa418e4
--- /dev/null
+++ b/model-optimizer/extensions/middle/ReverseV2ToReverseSequence.py
@@ -0,0 +1,62 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from extensions.ops.reverse_sequence import ReverseSequence
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.const import Const
+
+
+class ReverseToReverseSequence(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.PartialInfer import PartialInfer
+        return [PartialInfer]
+
+    def run_before(self):
+        from extensions.middle.reverse_tensor_iterator import ReverseTensorIteratorLSTM
+        return [ReverseTensorIteratorLSTM]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('reverse', dict(kind='op', op='Reverse'))
+            ],
+            edges=[]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        reverse = match['reverse']
+        input_data_shape = reverse.in_node(0).shape
+
+        assert reverse.in_port(1).disconnected()
+
+        # 1. For ReverseSequence 1-port input is seq_lengths => create this input node
+        seq_lengths = np.ones(input_data_shape[0]) * input_data_shape[reverse['axis']]
+        const = Const(graph, dict(value=seq_lengths)).create_node()
+
+        # 2. Create new ReverseSequence node and reconnect all inputs/outputs to it
+        reverse_sequence = ReverseSequence(graph, {'name': reverse.name + '/ReverseSequence/',
+                                                   'seq_axis': reverse['axis']}).create_node()
+
+        reverse.in_port(0).get_connection().set_destination(reverse_sequence.in_port(0))
+        const.out_port(0).connect(reverse_sequence.in_port(1))
+        reverse.out_port(0).get_connection().set_source(reverse_sequence.out_port(0))
+
+        # 3. Delete old Reverse node
+        graph.remove_node(reverse.id)
diff --git a/model-optimizer/extensions/middle/ScaleInput.py b/model-optimizer/extensions/middle/ScaleInput.py
new file mode 100644
index 000000000..ad04300eb
--- /dev/null
+++ b/model-optimizer/extensions/middle/ScaleInput.py
@@ -0,0 +1,71 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.lin_op import Mul
+from mo.ops.op import Op
+from mo.utils.error import Error
+
+
+class ScaleInput(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def run_before(self):
+        from extensions.middle.AddMeanScaleValues import AddMeanScaleValues
+        return [AddMeanScaleValues]
+
+    def pattern(self):
+        return dict(
+            nodes=[
+                ('placeholder', dict(kind='op', op='Placeholder')),
+                ('data', dict(kind='data'))],
+            edges=[
+                ('placeholder', 'data'),
+            ],
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        scale = graph.graph['cmd_params'].scale
+        if scale is None or scale == 1:
+            return
+        assert (len(match['placeholder'].out_nodes()))
+
+        tinput = match['placeholder']
+        if not tinput.has_valid('shape'):
+            raise Error("Node {} has not valid shape attribute".format(tinput.id))
+
+        input_shape = tinput.shape
+        toutput = match['data']
+
+        # Create Mul node
+        value = np.array([1 / scale])
+
+        # Disconnect input with data node
+        graph.remove_edge(tinput.id, toutput.id)
+
+        # Create Mul node
+        mul_node = Mul(graph, dict(name="Mul1_"))
+        mul_data = Op.create_input_data_node(graph, "data_mul_scale_", np.array(value))
+        Op.expand_node_shape(mul_data, len(input_shape) - 2 if graph.graph['layout'] == 'NCHW' else 0)
+        mul_input = Op.create_data_node(graph, tinput, {'shape': toutput.shape})
+
+        mul_node.create_node_with_data(inputs=[mul_input, mul_data], data_nodes=toutput)
diff --git a/model-optimizer/extensions/middle/ScaleInput_test.py b/model-optimizer/extensions/middle/ScaleInput_test.py
new file mode 100644
index 000000000..2dac2dad3
--- /dev/null
+++ b/model-optimizer/extensions/middle/ScaleInput_test.py
@@ -0,0 +1,91 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+from argparse import Namespace
+
+import numpy as np
+
+from extensions.middle.ScaleInput import ScaleInput
+from mo.utils.unittest.graph import build_graph, compare_graphs
+
+nodes_attributes = {'node_1': {'type': 'Identity', 'value': None, 'kind': 'op'},
+                    'node_1_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    'node_2': {'type': 'Identity', 'value': None, 'kind': 'op'},
+                    'concat': {'type': 'Concat', 'value': None, 'kind': 'op'},
+                    'node_3': {'type': 'Identity', 'value': None, 'kind': 'op'},
+                    'node_3_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    # Placeholders
+                    'placeholder_1': {'shape': None, 'type': 'Input', 'kind': 'op', 'op': 'Placeholder'},
+                    'placeholder_1_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+                    'placeholder_2': {'shape': None, 'type': 'Input', 'kind': 'op', 'op': 'Placeholder'},
+                    'pl_1': {'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+                    'pl_1_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    'pl_2': {'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+                    'pl_2_data': {'value': None, 'kind': 'data', 'data_type': None},
+                    'placeholder_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+                    # ScaleShift layer
+                    'scaleshift_1': {'type': 'ScaleShift', 'kind': 'op', 'op': 'ScaleShift'},
+                    'scaleshift_1_w': {'value': None, 'shape': None, 'kind': 'data'},
+                    'scaleshift_1_b': {'value': None, 'shape': None, 'kind': 'data'},
+                    'scaleshift_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+                    # Mul op
+                    'mul_1': {'type': None, 'kind': 'op', 'op': 'Mul'},
+                    'mul_1_w': {'value': None, 'shape': None, 'kind': 'data'},
+                    'mul_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput', 'infer': lambda x: None}
+                    }
+
+
+class ScaleInputTests(unittest.TestCase):
+    def test_scale_input_1(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'op_output')
+                             ],
+                            {'placeholder_1': {'shape': np.array([1, 3, 224, 224])}},
+                            nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'mul_1_data'),
+                                 ('mul_1_data', 'mul_1'),
+                                 ('mul_1_w', 'mul_1'),
+                                 ('mul_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'op_output')
+                                 ],
+                                {'mul_1_w': {'shape': np.array([1, 1, 1]), 'value': np.array([1 / 255])}},
+                                nodes_with_edges_only=True)
+        graph.graph['layout'] = 'NCHW'
+        graph.graph['cmd_params'] = Namespace(scale=255)
+        ScaleInput().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1_data')
+        self.assertTrue(flag, resp)
+
+    def test_scale_input_2(self):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'op_output')
+                             ],
+                            nodes_with_edges_only=True)
+
+        graph_ref = build_graph(nodes_attributes,
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'op_output')
+                                 ],
+                                nodes_with_edges_only=True)
+        graph.graph['cmd_params'] = Namespace(scale=1)
+        ScaleInput().find_and_replace_pattern(graph)
+        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1_data')
+        self.assertTrue(flag, resp)
diff --git a/model-optimizer/extensions/middle/SharedWeightsDuplication.py b/model-optimizer/extensions/middle/SharedWeightsDuplication.py
new file mode 100644
index 000000000..d1f67ea34
--- /dev/null
+++ b/model-optimizer/extensions/middle/SharedWeightsDuplication.py
@@ -0,0 +1,54 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.graph.graph import Graph, Node
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.op import Op
+
+
+class SharedWeightsDuplication(MiddleReplacementPattern):
+    enabled = True
+    force_clean_up = True
+
+    def run_after(self):
+        from extensions.middle.CheckForCycle import CheckForCycle
+        return [CheckForCycle]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def find_and_replace_pattern(self, graph: Graph):
+        """
+        This function finds all const data nodes that have more that one consumer and then duplicate them
+        """
+        data_nodes = [Node(graph, id) for id in graph.nodes() if Node(graph, id).soft_get('kind') == 'data']
+        for node in data_nodes:
+            # Check that node has const values and more than one consumer
+            if len(node.in_nodes()) and node.in_node().soft_get('type') == 'Const' and len(node.out_nodes()) > 1 and \
+                            node.value is not None:
+                # Here we delete all edges between base node and it's consumers (except first), and then duplicate this
+                # node to connect with other consumers
+                for v, d in node.get_outputs():
+                    out_node = Node(graph, v)
+                    e_attrs = d
+                    graph.remove_edge(node.id, out_node.id)
+                    data = Op.create_input_data_node(graph, "Copy_{}".format(node.id), np.array(node.value),
+                                                     graph.node[node.id])
+
+                    graph.add_edges_from([(data.id, out_node.id, e_attrs)])
+
diff --git a/model-optimizer/mo/middle/passes/shared_weights_duplication_test.py b/model-optimizer/extensions/middle/SharedWeightsDuplication_test.py
index ef482762f..49f571ffb 100644
--- a/model-optimizer/mo/middle/passes/shared_weights_duplication_test.py
+++ b/model-optimizer/extensions/middle/SharedWeightsDuplication_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,10 +18,12 @@ import unittest
 
 import numpy as np
 
-from mo.middle.passes.shared_weights_duplication import duplicate_shared_weights
+from extensions.middle.SharedWeightsDuplication import SharedWeightsDuplication
+from mo.middle.passes.eliminate import graph_clean_up
 from mo.utils.unittest.graph import build_graph, compare_graphs
 
 nodes_attributes = {
+    'const': {'shape': None, 'type': 'Const', 'kind': 'op', 'op': 'Const'},
     # Mul and Add operations
     'mul_1': {'type': None, 'kind': 'op', 'op': 'Mul'},
     'mul_1_w': {'value': None, 'shape': None, 'kind': 'data'},
@@ -35,13 +37,15 @@ nodes_attributes = {
     # Concat1 operation
     'concat_1': {'type': 'Concat', 'kind': 'op', 'op': 'Concat'},
     'concat_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+    'op_output': {'op': 'OpOutput', 'kind': 'op'}
 }
 
 
 class DuplicateSharedWeightsTests(unittest.TestCase):
     def test_duplicate_shared_weights_1(self):
         graph = build_graph(nodes_attributes,
-                            [('mul_1_w', 'mul_1'),
+                            [('const', 'mul_1_w'),
+                             ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_w', 'mul_2'),
                              ('mul_2', 'mul_2_data'),
@@ -50,12 +54,16 @@ class DuplicateSharedWeightsTests(unittest.TestCase):
                              ('mul_1_data', 'concat_1'),
                              ('mul_2_data', 'concat_1'),
                              ('mul_3_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
-                            {'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])}})
+                            {'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])}},
+                            nodes_with_edges_only=True
+                            )
 
         graph_ref = build_graph(nodes_attributes,
-                                [('mul_1_w', 'mul_1'),
+                                [
+                                 ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'mul_1_data'),
                                  ('mul_2_w', 'mul_2'),
                                  ('mul_2', 'mul_2_data'),
@@ -64,14 +72,16 @@ class DuplicateSharedWeightsTests(unittest.TestCase):
                                  ('mul_1_data', 'concat_1'),
                                  ('mul_2_data', 'concat_1'),
                                  ('mul_3_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
-                                 ],
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
+                                ],
                                 {'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_3_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
-                                 })
-
-        duplicate_shared_weights(graph)
+                                 }, nodes_with_edges_only=True)
 
+        SharedWeightsDuplication().find_and_replace_pattern(graph)
+        graph_clean_up(graph)
+        graph_clean_up(graph_ref)
         (flag, resp) = compare_graphs(graph, graph_ref, 'concat_1_data')
-        self.assertTrue(flag, resp)
+        self.assertTrue(flag, resp)
+\ No newline at end of file
diff --git a/model-optimizer/extensions/middle/ShuffleChannel.py b/model-optimizer/extensions/middle/ShuffleChannel.py
index 5370aebca..d5e85faa3 100644
--- a/model-optimizer/extensions/middle/ShuffleChannel.py
+++ b/model-optimizer/extensions/middle/ShuffleChannel.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,10 +13,10 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-import networkx as nx
 import numpy as np
 
 from extensions.middle.ShufflenetReshape import FeatureShuffleReshape
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.permute import Permute
 from mo.ops.reshape import Reshape
@@ -33,6 +33,10 @@ class ShuffleChannel(MiddleReplacementPattern):
     def run_after(self):
         return [FeatureShuffleReshape]
 
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -41,7 +45,7 @@ class ShuffleChannel(MiddleReplacementPattern):
             edges=[
             ])
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         if graph.graph['layout'] != "NCHW":
             return
 
@@ -58,7 +62,8 @@ class ShuffleChannel(MiddleReplacementPattern):
         cols = in_node.shape[1] // group
 
         if rows * cols != in_node.shape[1]:
-            raise Error("Group {} should divide input channels number {} without reminder for node {}".format(group, in_node.shape[1], node.id))
+            raise Error("Group {} should divide input channels number {} without reminder for node {}"
+                        "".format(group, in_node.shape[1], node.id))
 
         reshape_split = Reshape(graph, attrs={'name': node.id + '/Reshape_split_',
                                               'dim': np.array([in_node.shape[0], rows, cols, -1])})
diff --git a/model-optimizer/extensions/middle/ShuffleChannel_test.py b/model-optimizer/extensions/middle/ShuffleChannel_test.py
index 4b1e7e44b..2cd6dd146 100644
--- a/model-optimizer/extensions/middle/ShuffleChannel_test.py
+++ b/model-optimizer/extensions/middle/ShuffleChannel_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/ShufflenetReshape.py b/model-optimizer/extensions/middle/ShufflenetReshape.py
index f85d60dab..b25eb09f7 100644
--- a/model-optimizer/extensions/middle/ShufflenetReshape.py
+++ b/model-optimizer/extensions/middle/ShufflenetReshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,11 +16,11 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.layout import get_features_dim, get_height_dim, get_width_dim
 from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.reshape import Reshape
 
@@ -33,6 +33,10 @@ class FeatureShuffleReshape(MiddleReplacementPattern):
 
     enabled = True
 
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -51,7 +55,7 @@ class FeatureShuffleReshape(MiddleReplacementPattern):
                    ]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         reshape1 = match['reshape1']
         reshape2 = match['reshape2']
         transpose = match['transpose']
@@ -117,6 +121,8 @@ class FeatureShuffleReshape(MiddleReplacementPattern):
         new_transpose_shape = np.array(new_reshape1_shape[new_transpose_order])
 
         reshape1.out_node().shape = new_reshape1_shape
+        reshape1.dim = np.copy(new_reshape1_shape)
+
         transpose.order = new_transpose_order
         transpose.out_node().shape = new_transpose_shape
 
@@ -137,6 +143,10 @@ class ReshapeSoftmaxReshape(MiddleReplacementPattern):
 
     enabled = True
 
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -150,7 +160,7 @@ class ReshapeSoftmaxReshape(MiddleReplacementPattern):
                    ('softmax', 'softmax_data'),
                    ])
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         layout = graph.graph['layout']
         if layout != 'NHWC':
             return
diff --git a/model-optimizer/extensions/middle/ShufflenetReshape_test.py b/model-optimizer/extensions/middle/ShufflenetReshape_test.py
index d75c83d9f..1bd8b2aec 100644
--- a/model-optimizer/extensions/middle/ShufflenetReshape_test.py
+++ b/model-optimizer/extensions/middle/ShufflenetReshape_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/SliceConvert_test.py b/model-optimizer/extensions/middle/SliceConvert_test.py
index f282d5e0d..745ca4220 100644
--- a/model-optimizer/extensions/middle/SliceConvert_test.py
+++ b/model-optimizer/extensions/middle/SliceConvert_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,18 +25,23 @@ from mo.ops.slice import Slice
 nodes_attributes = {
     # input data
     'placeholder_1': {'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
+    'placeholder_2': {'type': 'Const', 'kind': 'op', 'op': 'Const'},
+    'placeholder_3': {'type': 'Const', 'kind': 'op', 'op': 'Const'},
     'placeholder_1_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'placeholder_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'placeholder_3_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     # Slice layer
     'slice': {'type': 'Slice', 'kind': 'op', 'op': 'Slice'},
     'slice_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Output operation
     'output_op': {'type': 'Const', 'value': None, 'kind': 'op', 'op': 'Const'},
     'output_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'op_output': { 'kind': 'op', 'op': 'OpOutput'},
     # Crop layer
     'crop': {'type': 'Crop', 'kind': 'op', 'op': 'Crop', 'axis': None, 'offset': None, 'dim': None},
     'dim': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     # StridedSlice layer
-    'strided_slice': {'type': 'StridedSlice', 'kind': 'op', 'op': 'StridedSlice', 'slices': None,
+    'strided_slice': {'kind': 'op', 'op': 'StridedSlice', 'slices': None,
                       'shrink_axis_mask': None}
 }
 
@@ -53,11 +58,11 @@ class ConvertSliceTests(unittest.TestCase):
                              ('placeholder_1_data', 'slice'),
                              ('slice', 'slice_data'),
                              ('slice_data', 'output_op'),
-                             ('output_op', 'output_data')
+                             ('output_op', 'output_data'),
+                             ('output_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([4, 5, 6])},
                              'slice': {'start': np.array([1, 2, 3]), 'end': np.array([3, 4, 4]), 'axis': None},
-                             'output_op': {'is_output': True},
                              }
                             )
         slice_node = Node(graph, 'slice')
@@ -71,12 +76,11 @@ class ConvertSliceTests(unittest.TestCase):
                                  ('placeholder_1_data', 'crop'),
                                  ('crop', 'slice_data'),
                                  ('slice_data', 'output_op'),
-                                 ('output_op', 'output_data')
+                                 ('output_op', 'output_data'),
+                                 ('output_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([4, 5, 6])},
-                                 'crop': {'axis': np.array([0, 1, 2]), 'offset': np.array([1, 2, 3]),
-                                          },
-                                 'output_op': {'is_output': True},
+                                 'crop': {'axis': np.array([0, 1, 2]), 'offset': np.array([1, 2, 3])},
                                  'dim': {'dim': np.array([2, 2, 1])},
                                  }
                                 )
@@ -93,11 +97,11 @@ class ConvertSliceTests(unittest.TestCase):
                              ('placeholder_1_data', 'slice'),
                              ('slice', 'slice_data'),
                              ('slice_data', 'output_op'),
-                             ('output_op', 'output_data')
+                             ('output_op', 'output_data'),
+                             ('output_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([4, 5, 6])},
-                             'slice': {'start': np.array([1]), 'end': np.array([3]), 'axis': None},
-                             'output_op': {'is_output': True}
+                             'slice': {'start': np.array([1]), 'end': np.array([3]), 'axis': None}
                              }
                             )
         slice_node = Node(graph, 'slice')
@@ -108,15 +112,19 @@ class ConvertSliceTests(unittest.TestCase):
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_2', 'placeholder_2_data'),
+                                 ('placeholder_3', 'placeholder_3_data'),
                                  ('placeholder_1_data', 'strided_slice'),
+                                 ('placeholder_2_data', 'strided_slice'),
+                                 ('placeholder_3_data', 'strided_slice'),
                                  ('strided_slice', 'slice_data'),
                                  ('slice_data', 'output_op'),
-                                 ('output_op', 'output_data')
+                                 ('output_op', 'output_data'),
+                                 ('output_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([4, 5, 6])},
                                  'strided_slice': {'slices': np.array([slice(1, 3, 1),slice(0, 5, 1),slice(0, 6, 1)]),
                                                    'shrink_axis_mask': np.array([False, False, False])},
-                                 'output_op': {'is_output': True}
                                  }
                                 )
 
diff --git a/model-optimizer/extensions/middle/SliceConverter.py b/model-optimizer/extensions/middle/SliceConverter.py
index f6e925b73..e4c026677 100644
--- a/model-optimizer/extensions/middle/SliceConverter.py
+++ b/model-optimizer/extensions/middle/SliceConverter.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,12 +14,13 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.const import Const
 from mo.ops.crop import Crop
-from mo.ops.op import Op
+from mo.ops.strided_slice import StridedSlice
 
 
 def convert_negative_indices(indices: np.array, shape: np.array):
@@ -36,6 +37,10 @@ class ConvertSlice(MiddleReplacementPattern):
     enabled = True
     op = "Slice"
 
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -44,7 +49,7 @@ class ConvertSlice(MiddleReplacementPattern):
             edges=[]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         node = match['slice']
         # Caffe case
         if not node.has_valid('start') or not node.has_valid('end'):
@@ -52,31 +57,50 @@ class ConvertSlice(MiddleReplacementPattern):
 
         begin = node.start
         end = node.end
+        axis = node.axis if node.has_valid('axis') else range(begin.size)
+        
 
         input = node.in_node(0)
         output_data = node.out_node()
 
         # Check whether operation use only one axis or not
+        axes_begin = np.zeros(len(input.shape), dtype=np.int32)
+        axes_end = np.zeros(len(input.shape), dtype=np.int32)
+        begin_ext = np.zeros(len(input.shape), dtype=np.int32)
+        end_ext = np.zeros(len(input.shape), dtype=np.int32)
         dims = 0
         axes = np.zeros(begin.size)
-        for i in range(begin.size):
-            if begin[i] != 0 or end[i] != input.shape[i]:
+        for i in range(len(axis)):
+            if begin[i] != 0 or end[i] < input.shape[i]:
                 dims += 1
                 axes[i] = 1
+                if begin[i] != 0:
+                    axes_begin[axis[i]] = 1
+                    begin_ext[axis[i]] = begin[i]
+                if end[i] < input.shape[i]:
+                    axes_end[axis[i]] = 1
+                    end_ext[axis[i]] = end[i]
         axes = np.array(axes, dtype=bool)
-        if dims == 0:
-            return
-        elif dims == 1:
-            # If Slice use only one axis, than
+
+        if dims == 1 or dims == 0:
+            # If Slice use only one axis or no axis, than
             # convert Slice to StridedSlice
+            ss = StridedSlice(graph, dict(new_axis_mask=np.zeros(len(output_data.shape), dtype=np.int32),
+                                          shrink_axis_mask=np.zeros(len(output_data.shape), dtype=np.int32),
+                                          ellipsis_mask=np.zeros(len(output_data.shape), dtype=np.int32),
+                                          begin_mask=axes_begin,
+                                          end_mask=axes_end))
+
+            convert_negative_indices(begin_ext, input.shape)
+            convert_negative_indices(end_ext, input.shape)
 
-            node['op'] = 'StridedSlice'
-            node['type'] = 'StridedSlice'
-            node['new_axis_mask'] = np.zeros(len(output_data.shape), dtype=np.bool)
-            node['shrink_axis_mask'] = np.zeros(len(output_data.shape), dtype=np.bool)
+            begin_node = Const(graph, {'name': 'begin', 'value': begin_ext, 'force_precision': 'I32'}).create_node_with_data()
+            end_node = Const(graph, {'name': 'end', 'value': end_ext, 'force_precision': 'I32'}).create_node_with_data()
 
-            convert_negative_indices(begin, input.shape)
-            convert_negative_indices(end, input.shape)
+            ss.create_node_with_data(inputs=[input, begin_node, end_node], data_nodes=[output_data])
+            # Remove unnecessary edges from and to to Slice vertex
+            graph.remove_edge(input.id, node.id)
+            graph.remove_edge(node.id, output_data.id)
         else:
             # If Slice use more than one axis use Crop layer
             crop = Crop(graph, dict(axis=np.arange(begin.size)[axes],
diff --git a/model-optimizer/extensions/middle/SwapAxesMiddleReplacer.py b/model-optimizer/extensions/middle/SwapAxesMiddleReplacer.py
index 276ff7fd2..409bdee32 100644
--- a/model-optimizer/extensions/middle/SwapAxesMiddleReplacer.py
+++ b/model-optimizer/extensions/middle/SwapAxesMiddleReplacer.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
 from mo.ops.reshape import Reshape
 
 
@@ -31,7 +30,7 @@ class SwapAxesMiddleReplacer(MiddleReplacementPattern):
             edges=[],
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         """
             Replace swapaxes layer:
             swapaxes -> Reshape
@@ -47,5 +46,6 @@ class SwapAxesMiddleReplacer(MiddleReplacementPattern):
         graph.remove_edge(swapaxes_in_node.id, swapaxes.id)
         graph.remove_edge(swapaxes.id, swapaxes_out_node.id)
         Reshape(graph, {'dim': np.array(swapaxes_in_node.shape)}).create_node_with_data(inputs=[swapaxes_in_node],
-                                                                                      data_nodes=[swapaxes_out_node],
-                                                                                      edge_attrs=[input_edge_attrs, output_edge_attrs])
+                                                                                        data_nodes=[swapaxes_out_node],
+                                                                                        edge_attrs=[input_edge_attrs,
+                                                                                                    output_edge_attrs])
diff --git a/model-optimizer/extensions/middle/TF_lstm_cell_to_generic.py b/model-optimizer/extensions/middle/TF_lstm_cell_to_generic.py
index b029b4549..20faa4efc 100644
--- a/model-optimizer/extensions/middle/TF_lstm_cell_to_generic.py
+++ b/model-optimizer/extensions/middle/TF_lstm_cell_to_generic.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,10 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from extensions.middle.FusePermutesSequence import FusePermutesSequence
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 
 
@@ -31,7 +31,8 @@ class TensorFlowLSTMtoGeneric(MiddleReplacementPattern):
     enabled = True
 
     def run_after(self):
-        return []
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
 
     def run_before(self):
         return [
@@ -44,7 +45,7 @@ class TensorFlowLSTMtoGeneric(MiddleReplacementPattern):
             edges=[]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         weights_node = match['lstm'].in_node(3)
         biases_node = match['lstm'].in_node(4)
         node = match['lstm']
@@ -61,9 +62,9 @@ class TensorFlowLSTMtoGeneric(MiddleReplacementPattern):
         hidden_size = node.in_node(1).shape[1]
         weights = weights_node.value
         biases = biases_node.value
-        assert weights.shape[0] == input_size + hidden_size, "weights.shape={} input_size={} hidden_size={}".format(
-            weights.shape, input_size, hidden_size)
-        assert weights.shape[1] == biases.shape[0] == 4 * hidden_size,\
+        assert weights.shape[0] == input_size + hidden_size, \
+            "weights.shape={} input_size={} hidden_size={}".format(weights.shape, input_size, hidden_size)
+        assert weights.shape[1] == biases.shape[0] == 4 * hidden_size, \
             "weights.shape={} biases.shape={} hidden_size={}".format(weights.shape, biases.shape, hidden_size)
 
         weights = weights.reshape([
diff --git a/model-optimizer/extensions/middle/TensorIteratorBackEdge.py b/model-optimizer/extensions/middle/TensorIteratorBackEdge.py
index 868b38cd6..2ae1fe96f 100644
--- a/model-optimizer/extensions/middle/TensorIteratorBackEdge.py
+++ b/model-optimizer/extensions/middle/TensorIteratorBackEdge.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,9 +16,8 @@
 
 import logging as log
 
-import networkx as nx
-
 from extensions.ops.TensorIterator_ops import TensorIteratorBackEdge, TensorIteratorOutput
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 
 
@@ -44,6 +43,15 @@ class BackEdgesMatching(MiddleReplacementPattern):
        TensorIteratorCondition--
     """
     enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
+
+    def run_after(self):
+        from extensions.middle.TensorIteratorCondition import SimpleConditionMatcher
+        return [SimpleConditionMatcher]
+
+    def run_before(self):
+        from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+        return [TensorIteratorMerge]
 
     @staticmethod
     def pattern():
@@ -83,7 +91,7 @@ class BackEdgesMatching(MiddleReplacementPattern):
             ]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         log.debug('================== BackEdgeFind ===============')
 
         nodes_for_remove = []
diff --git a/model-optimizer/extensions/middle/TensorIteratorBackEdge_test.py b/model-optimizer/extensions/middle/TensorIteratorBackEdge_test.py
index c4482c466..d9cc63fc4 100644
--- a/model-optimizer/extensions/middle/TensorIteratorBackEdge_test.py
+++ b/model-optimizer/extensions/middle/TensorIteratorBackEdge_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/TensorIteratorCondition.py b/model-optimizer/extensions/middle/TensorIteratorCondition.py
index 70b169f0c..435a6865a 100644
--- a/model-optimizer/extensions/middle/TensorIteratorCondition.py
+++ b/model-optimizer/extensions/middle/TensorIteratorCondition.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,10 +16,10 @@
 
 import logging as log
 
-import networkx as nx
-
 from extensions.ops.TensorIterator_ops import TensorIteratorCondition
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
+import numpy as np
 
 
 class LoopConditionMatcher(MiddleReplacementPattern):
@@ -46,6 +46,14 @@ Shape -> StridedSlice -> Enter -|    LogicalAnd --> LoopCond (data)
                                                                    Const----
     """
     enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+        return [TensorIteratorMerge]
 
     @staticmethod
     def pattern():
@@ -69,7 +77,6 @@ Shape -> StridedSlice -> Enter -|    LogicalAnd --> LoopCond (data)
 
                 ('Enter_2_less', dict(kind='op', op='Enter')),
                 ('Enter_2_less_data', dict(kind='data')),
-                ('minimum', dict(kind='op', op='Minimum')),
                 ('minimum_data', dict(kind='data')),
 
                 ('and', dict(kind='op', op='LogicalAnd')),
@@ -78,9 +85,9 @@ Shape -> StridedSlice -> Enter -|    LogicalAnd --> LoopCond (data)
                 ('loop_cond_data', dict(kind='data')),
 
                 ('init_1', dict(kind='op', op='Const')),
-                ('init_1_data',  dict(kind='data')),
+                ('init_1_data', dict(kind='data')),
                 ('Enter_1', dict(kind='op', op='Enter')),
-                ('Enter_1_data',  dict(kind='data')),
+                ('Enter_1_data', dict(kind='data')),
 
                 ('init_2', dict(kind='op', op='Const')),
                 ('init_2_data', dict(kind='data')),
@@ -92,7 +99,7 @@ Shape -> StridedSlice -> Enter -|    LogicalAnd --> LoopCond (data)
                 ('Identity_1', dict(kind='op', op='Identity')),
                 ('Identity_1_data', dict(kind='data')),
                 ('add_1', dict(kind='op', op='Add')),
-                ('add_1_y',  dict(kind='op', op='Const')),
+                ('add_1_y', dict(kind='op', op='Const')),
                 ('add_1_y_data', dict(kind='data')),
                 ('add_1_data', dict(kind='data')),
                 ('NextIteration_1', dict(kind='op', op='NextIteration')),
@@ -111,7 +118,6 @@ Shape -> StridedSlice -> Enter -|    LogicalAnd --> LoopCond (data)
             edges=[
                 ('Strided_slice', 'Strided_slice_data'),
                 ('Strided_slice_data', 'Enter_1_less'),
-                ('Strided_slice_data', 'minimum'),
                 ('Enter_1_less', 'Enter_1_less_data'),
                 ('Enter_1_less_data', 'Less_1'),
                 ('Less_1', 'Less_1_data'),
@@ -150,7 +156,6 @@ Shape -> StridedSlice -> Enter -|    LogicalAnd --> LoopCond (data)
                 ('add_2', 'add_2_data'),
                 ('add_2_data', 'NextIteration_2'),
 
-                ('minimum', 'minimum_data'),
                 ('minimum_data', 'Enter_2_less'),
                 ('Enter_2_less', 'Enter_2_less_data'),
                 ('Enter_2_less_data', 'Less_2'),
@@ -168,26 +173,35 @@ Shape -> StridedSlice -> Enter -|    LogicalAnd --> LoopCond (data)
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def looking_for_iteration_counter(graph: Graph, match: dict):
+        types = ['TensorIteratorInput', 'TensorIteratorOutput']
+        candidates = np.array([match['Identity_1_data'], match['Identity_2_data']])
+        results = np.array([False for i in range(len(candidates))])
+        for i, candidat in enumerate(candidates):
+            for node in candidat.out_nodes():
+                if node['op'] in types:
+                    results[i] = True
+        assert not np.all(results)
+        assert sum(results) == 1
+        return candidates[results == True][0]
+
+    def replace_pattern(self, graph: Graph, match: dict):
         log.debug('================== ConditionFind ===============')
-        max_node = match['minimum'].in_node(1).in_node()
-        assert max_node['kind'] == 'op' and max_node['op'] == 'Maximum'
-
-        #init_1
+        # init_1
         init_1 = match['init_1_data'].value
         assert init_1 is not None
         init_1 = int(init_1)
 
-        #init_2
+        # init_2
         init_2 = match['init_2_data'].value
         assert init_2 is not None
         init_2 = int(init_2)
 
-        #step_1
+        # step_1
         assert match['add_1_y_data'].value is not None
         step_1 = int(match['add_1_y_data'].value)
 
-        #step_2
+        # step_2
         assert match['add_2_y_data'].value is not None
         step_2 = int(match['add_2_y_data'].value)
 
@@ -195,14 +209,17 @@ Shape -> StridedSlice -> Enter -|    LogicalAnd --> LoopCond (data)
         match['Identity_2_data'].value = None
 
         # Create condition node and delete all useless nodes from condition pattern
-        condition_attrs = dict(time=dict(init=init_2, step=step_2), iter=dict(init=init_1, step=step_1), \
+        loop_condiiton = match['loop_cond_data']
+        iterator_data = self.looking_for_iteration_counter(graph, match)
+
+        condition_attrs = dict(time=dict(init=init_2, step=step_2), iter=dict(init=init_1, step=step_1),
                                name=match['loop_cond'].name + '/TensorIteratorCondition_')
         condition = TensorIteratorCondition(graph, attrs=condition_attrs)
         condition.create_node_with_data(inputs=[match['Strided_slice_data'], match['minimum_data']],
-                                        data_nodes=[match['loop_cond_data'], match['Identity_2_data']])
+                                        data_nodes=[loop_condiiton, iterator_data])
 
         # Delete useless nodes
-        safe_nodes = ['loop_cond_data', 'Identity_2_data', 'Strided_slice', 'Strided_slice_data',
+        safe_nodes = ['loop_cond_data', 'Identity_1_data', 'Identity_2_data',  'Strided_slice', 'Strided_slice_data',
                       'minimum', 'minimum_data']
         nodes_for_remove = []
         for node in match.keys():
@@ -211,7 +228,17 @@ Shape -> StridedSlice -> Enter -|    LogicalAnd --> LoopCond (data)
         graph.remove_nodes_from(nodes_for_remove)
 
 
-class SimpleConditionMather(MiddleReplacementPattern):
+class SimpleConditionMatcher(MiddleReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
+
+    def run_after(self):
+        return [LoopConditionMatcher]
+
+    def run_before(self):
+        from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+        return [TensorIteratorMerge]
+
     @staticmethod
     def pattern():
         log.debug('+++++++++++++++ SimpleConditionMatching ++++++++++++++++')
@@ -231,17 +258,16 @@ class SimpleConditionMather(MiddleReplacementPattern):
                 ('loop_cond_data', dict(kind='data')),
 
                 ('init_1', dict(kind='op', op='Const')),
-                ('init_1_data',  dict(kind='data')),
+                ('init_1_data', dict(kind='data')),
                 ('Enter_1', dict(kind='op', op='Enter')),
-                ('Enter_1_data',  dict(kind='data')),
-
+                ('Enter_1_data', dict(kind='data')),
 
                 ('Switch_1', dict(kind='op', op='Switch')),
                 ('Switch_1_data', dict(kind='data')),
                 ('Identity_1', dict(kind='op', op='Identity')),
                 ('Identity_1_data', dict(kind='data')),
                 ('add_1', dict(kind='op', op='Add')),
-                ('add_1_y',  dict(kind='op', op='Const')),
+                ('add_1_y', dict(kind='op', op='Const')),
                 ('add_1_y_data', dict(kind='data')),
                 ('add_1_data', dict(kind='data')),
                 ('NextIteration_1', dict(kind='op', op='NextIteration')),
@@ -278,7 +304,7 @@ class SimpleConditionMather(MiddleReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         log.debug('================== SimpleConditionFind ===============')
         # init_1
         init_1 = match['init_1_data'].value
@@ -292,7 +318,7 @@ class SimpleConditionMather(MiddleReplacementPattern):
         match['loop_cond_data'].value = None
 
         # Create condition node and delete all useless nodes from condition pattern
-        condition_attrs = dict(iter=dict(init=init_1, step=step_1), \
+        condition_attrs = dict(iter=dict(init=init_1, step=step_1),
                                name=match['loop_cond'].name + '/TensorIteratorCondition_')
         condition = TensorIteratorCondition(graph, attrs=condition_attrs)
         condition.create_node_with_data(inputs=[match['Strided_slice_data']],
@@ -304,4 +330,4 @@ class SimpleConditionMather(MiddleReplacementPattern):
         for node in match.keys():
             if node not in safe_nodes:
                 nodes_for_remove.append(match[node].id)
-        graph.remove_nodes_from(nodes_for_remove)
-\ No newline at end of file
+        graph.remove_nodes_from(nodes_for_remove)
diff --git a/model-optimizer/extensions/middle/TensorIteratorConditionChecker.py b/model-optimizer/extensions/middle/TensorIteratorConditionChecker.py
index 5dfea5bac..80351f95b 100644
--- a/model-optimizer/extensions/middle/TensorIteratorConditionChecker.py
+++ b/model-optimizer/extensions/middle/TensorIteratorConditionChecker.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,14 +13,24 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
+import logging as log
 
 import numpy as np
-import logging as log
+
 from mo.middle.replacement import MiddleReplacementPattern
 
 
 class ConditionChecks(MiddleReplacementPattern):
     enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
+
+    def run_after(self):
+        from extensions.middle.TensorIteratorBackEdge import BackEdgesMatching
+        return [BackEdgesMatching]
+
+    def run_before(self):
+        from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+        return [TensorIteratorMerge]
 
     @staticmethod
     def pattern():
@@ -54,7 +64,7 @@ class ConditionChecks(MiddleReplacementPattern):
 
     @staticmethod
     def replace_pattern(graph, match: dict):
-        #Check for SS params
+        # Check for SS params
         # Sanity check that we iterate over axis of some tensor
         ss = match['Strided_slice']
         params = ss.in_nodes()
@@ -62,7 +72,7 @@ class ConditionChecks(MiddleReplacementPattern):
         assert np.all(params[2].in_node().value == 1)
         assert np.all(params[3].in_node().value == 1)
 
-        #Check Maximum/Minimum params
+        # Check Maximum/Minimum params
 
         # Check for comparing SS and seq_length source (it should be one tensor)
         # SIMPLE CHECK
@@ -71,10 +81,9 @@ class ConditionChecks(MiddleReplacementPattern):
             log.warning('TF loop doesn\'t have a constant upper bound produced by node {}, or ModelOptimizer '
                         'cannot detect a constant in this case. Loops with a dynamic number of iterations are not '
                         'supported, so in the resulting IR, generated TensorIterator will have '
-                        'a maximum number of iterations determined by input tensor size: {}',
-                        match['minimum_data'].soft_get('name'),
-                        match['Strided_slice_data'].value
-            )
+                        'a maximum number of iterations determined by input tensor size: {}'
+                        ''.format(match['minimum_data'].soft_get('name'), match['Strided_slice_data'].value)
+                        )
         else:
             assert match['Strided_slice_data'].value == match['minimum_data'].value, \
                 'Values do not match: {} and {}'.format(match['Strided_slice_data'].value, match['minimum_data'].value)
@@ -82,7 +91,7 @@ class ConditionChecks(MiddleReplacementPattern):
         # SMART CHECK
         # TODO: add here some smart check for tensors equality
 
-        #Check that bound for Condition and Inputs/Outputs sizes match
+        # Check that bound for Condition and Inputs/Outputs sizes match
         condition_time = match['condition'].out_node(0)
         inputs_and_outputs = condition_time.out_nodes()
         type_list = ['TensorIteratorInput', 'TensorIteratorOutput']
diff --git a/model-optimizer/extensions/middle/TensorIteratorCondition_test.py b/model-optimizer/extensions/middle/TensorIteratorCondition_test.py
index 8ebd9dd16..2085b67a3 100644
--- a/model-optimizer/extensions/middle/TensorIteratorCondition_test.py
+++ b/model-optimizer/extensions/middle/TensorIteratorCondition_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -27,18 +27,18 @@ class TensorIteratorConditionTests(unittest.TestCase):
         pattern = pattern_matcher.pattern()
 
         graph = build_graph_with_attrs(nodes_with_attrs=pattern['nodes'], edges_with_attrs=pattern['edges'],
-                                       new_nodes_with_attrs=[('maximum', {'kind':'op', 'op': 'Maximum'}),
-                                                             ('maximum_data', {'kind': 'data'})],
+                                       new_nodes_with_attrs=[('maximum', {'kind': 'op', 'op': 'Maximum'}),
+                                                             ('maximum_data', {'kind': 'data'}),
+                                                             ('TensorIteratorInput', {'kind': 'op', 'op': 'TensorIteratorInput'})],
                                        new_edges_with_attrs=[('maximum', 'maximum_data'),
-                                                             ('maximum_data', 'minimum', {'in':1})],
+                                                             ('Identity_1_data', 'TensorIteratorInput')],
                                        update_nodes_attributes=[('init_1_data', {'value': np.array([0])}),
                                                                 ('init_2_data', {'value': np.array([0])}),
                                                                 ('add_1_y_data', {'value': np.array(1)}),
                                                                 ('add_2_y_data', {'value': np.array(1)}),
                                                                 ('loop_cond_data', {'value': None}),
                                                                 ('Identity_2_data', {'value': None}),
-                                                                ],
-                                       update_edge_attrs={('Strided_slice_data', 'minimum',0): {'in': 0}})
+                                                                ])
 
         pattern_matcher.find_and_replace_pattern(graph)
         graph_ref = build_graph_with_attrs(
@@ -49,18 +49,16 @@ class TensorIteratorConditionTests(unittest.TestCase):
                               ('StridedSlice_data', {'kind': 'data'}),
                               ('Maximum', {'kind': 'op', 'op': 'Maximum'}),
                               ('Maximum_data', {'kind': 'data'}),
-                              ('minimum', {'kind': 'op', 'op': 'Minimum'}),
                               ('minimum_data', {'kind': 'data'}),
+                              ('TensorIteratorInput', {'kind': 'op', 'op': 'TensorIteratorInput'})
                               ],
             edges_with_attrs=[('Maximum', 'Maximum_data'),
-                              ('Maximum_data', 'minimum'),
                               ('StridedSlice', 'StridedSlice_data'),
                               ('StridedSlice_data', 'TensorIteratorCondition', {'in':0}),
-                              ('StridedSlice_data', 'minimum'),
-                              ('minimum', 'minimum_data'),
                               ('minimum_data', 'TensorIteratorCondition', {'in':1}),
                               ('TensorIteratorCondition', 'loop_cond_data'),
                               ('TensorIteratorCondition', 'identity_data'),
+                              ('identity_data', 'TensorIteratorInput'),
                               ],
             update_edge_attrs=None,
             new_nodes_with_attrs=[],
diff --git a/model-optimizer/extensions/middle/TensorIteratorInput.py b/model-optimizer/extensions/middle/TensorIteratorInput.py
index 65cdb409b..93d63fa56 100644
--- a/model-optimizer/extensions/middle/TensorIteratorInput.py
+++ b/model-optimizer/extensions/middle/TensorIteratorInput.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,10 +15,11 @@
 """
 
 import logging as log
-import networkx as nx
+
 import numpy as np
 
 from extensions.ops.TensorIterator_ops import TensorIteratorInput
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 
 
@@ -38,7 +39,16 @@ class SmartInputMatcher(MiddleReplacementPattern):
         |__________________________________________________|
     """
 
-    enabled = False  # called from mo.pipeline.tf directly
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
+
+    def run_after(self):
+        from extensions.middle.TensorIterator_utils import DeleteSelect
+        return [DeleteSelect]
+
+    def run_before(self):
+        from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+        return [TensorIteratorMerge]
 
     @staticmethod
     def pattern():
@@ -115,7 +125,7 @@ class SmartInputMatcher(MiddleReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         log.debug('================== SmartInputFind ===============')
 
         assert match['Enter_data'].value is not None
@@ -141,12 +151,12 @@ class SmartInputMatcher(MiddleReplacementPattern):
         # axis == 0 because in TensorArray we ALWAYS iterate over 0 axis, other params will be fill later (with
         # condition)
         input_node = TensorIteratorInput(graph, dict(axis=0, start=start, stride=None, part_size=None,
-                                                external_port_id=str(match['Enter_data'].value),
-                                                internal_layer_id=match['TensorArrayRead_data'].id,
-                                                name=match['TensorArrayRead'].name + '/TensorIteratorInput_'
-                                                ))
+                                                     external_port_id=str(match['Enter_data'].value),
+                                                     internal_layer_id=match['TensorArrayRead_data'].id,
+                                                     name=match['TensorArrayRead'].name + '/TensorIteratorInput_'
+                                                     ))
         input_node.create_node_with_data(inputs=[ta_size_data, value, match['Condition_data']],
-                                    data_nodes=[match['TensorArrayRead_data']])
+                                         data_nodes=[match['TensorArrayRead_data']])
         # Delete useless nodes
         safe_nodes = ['TensorArrayRead_data', 'Condition', 'Condition_data']
 
@@ -158,12 +168,21 @@ class SmartInputMatcher(MiddleReplacementPattern):
 
 
 class SimpleInputMatcher(MiddleReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
 
-    enabled = False  # called from mo.pipeline.tf directly
+    def run_after(self):
+        from extensions.middle.DeleteNotExecutable import DeleteNotExecutable
+        return [DeleteNotExecutable]
+
+    def run_before(self):
+        from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+        return [TensorIteratorMerge]
 
     """
     This pattern match simple inputs (without partitions) in while loops in TF (this inputs are set by Enter nodes).
     """
+
     @staticmethod
     def pattern():
         return dict(
@@ -175,13 +194,13 @@ class SimpleInputMatcher(MiddleReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         log.debug('================== SimpletInputFind ===============')
 
         input_node = TensorIteratorInput(graph, dict(external_port_id=None,
-                                         internal_layer_id=None,
-                                         name=match['Enter'].name + '/TensorIteratorInput_'
-                                         ))
+                                                     internal_layer_id=None,
+                                                     name=match['Enter'].name + '/TensorIteratorInput_'
+                                                     ))
         input_node.create_node_with_data(inputs=[match['Enter'].in_node()], data_nodes=[match['Enter'].out_node()])
 
         # Delete useless nodes
@@ -189,8 +208,15 @@ class SimpleInputMatcher(MiddleReplacementPattern):
 
 
 class BackEdgeSimpleInputMatcher(MiddleReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
 
-    enabled = False  # called from mo.pipeline.tf directly
+    def run_after(self):
+        return [SimpleInputMatcher]
+
+    def run_before(self):
+        from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+        return [TensorIteratorMerge]
 
     @staticmethod
     def pattern():
@@ -203,7 +229,7 @@ class BackEdgeSimpleInputMatcher(MiddleReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         log.debug('================== SimpleBackEdgeInputFind ===============')
 
         assert len(match['BackEdge'].in_nodes()) == 3
@@ -212,11 +238,18 @@ class BackEdgeSimpleInputMatcher(MiddleReplacementPattern):
         cycle_input = match['BackEdge'].in_node(1)
 
         # We need to create new TensorItertorInput node only if this node doesn't exist already.
-        if len(init_input.in_nodes()) == 0:
+        if len(init_input.in_nodes()) == 0 or\
+           (len(init_input.in_nodes()) == 1 and init_input.has_valid('value')):
+
             input_node = TensorIteratorInput(graph, dict(external_port_id=None,
-                                             internal_layer_id=None,
-                                             name=match['BackEdge'].name + '/TensorIteratorInput_'
-                                            ))
+                                                         internal_layer_id=None,
+                                                         name=match['BackEdge'].name + '/TensorIteratorInput_'
+                                                         ))
+
+            # In case if data node has Constant producer
+            if len(init_input.in_nodes()) == 1:
+                graph.remove_edge(init_input.in_node(0).id, init_input.id)
+
             input_data_node = input_node.create_node_with_data(inputs=[init_input])
             input_data_node.shape = np.array(init_input.shape, dtype=np.int64)
             graph.remove_edges_from([(init_input.id, match['BackEdge'].id)])
diff --git a/model-optimizer/extensions/middle/TensorIteratorInput_test.py b/model-optimizer/extensions/middle/TensorIteratorInput_test.py
index efd560c6d..3d5b73815 100644
--- a/model-optimizer/extensions/middle/TensorIteratorInput_test.py
+++ b/model-optimizer/extensions/middle/TensorIteratorInput_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/lstm_tensor_iterator_to_lstm_sequence.py b/model-optimizer/extensions/middle/TensorIteratorLSTMToLSTMSequence.py
index a7b6b5687..95edf9af8 100644
--- a/model-optimizer/extensions/middle/lstm_tensor_iterator_to_lstm_sequence.py
+++ b/model-optimizer/extensions/middle/TensorIteratorLSTMToLSTMSequence.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,17 +14,13 @@
  limitations under the License.
 """
 
-import networkx as nx
-
-from mo.graph.graph import copy_node
-from mo.utils.error import Error
+from extensions.middle.TF_lstm_cell_to_generic import TensorFlowLSTMtoGeneric
+from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+from mo.graph.graph import Graph
 from mo.middle.pattern_match import find_isomorphisms
 from mo.middle.replacement import MiddleReplacementPattern
-from extensions.ops.lstm_sequence import LSTMSequence
-from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
-from extensions.middle.lstm_sequence_normalize import LSTMSequenceNormalize, permute_before_and_after
-from extensions.middle.lstm_sequence_tensor_iterator import LSTMSequenceTensorIterator
-from extensions.middle.TF_lstm_cell_to_generic import TensorFlowLSTMtoGeneric
+from mo.utils.error import Error
+from extensions.middle.ONNXRNNSequenceNormalize import ONNXRNNSequenceNormalize, permute_before_and_after
 
 
 class TensorIteratorLSTM(MiddleReplacementPattern):
@@ -40,7 +36,7 @@ class TensorIteratorLSTM(MiddleReplacementPattern):
     enabled = False
 
     def run_after(self):
-        return [TensorIteratorMerge, LSTMSequenceNormalize, LSTMSequenceTensorIterator, TensorFlowLSTMtoGeneric]
+        return [TensorIteratorMerge, ONNXRNNSequenceNormalize, TensorFlowLSTMtoGeneric]
 
     def pattern(self):
         return dict(
@@ -52,8 +48,8 @@ class TensorIteratorLSTM(MiddleReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
-        nodes=[
+    def replace_pattern(graph: Graph, match: dict):
+        nodes = [
             ('input_unsqueezed'),
             ('squeeze', dict(op='Reshape')),
             ('input_squeezed'),
@@ -69,7 +65,7 @@ class TensorIteratorLSTM(MiddleReplacementPattern):
             ('unsqueeze', dict(op='Reshape')),
             ('output_unsqueezed'),
         ]
-        edges=[
+        edges = [
             ('input_unsqueezed', 'squeeze'),
             ('squeeze', 'input_squeezed'),
 
@@ -101,37 +97,3 @@ class TensorIteratorLSTM(MiddleReplacementPattern):
                         'Please modify the original network '
                         'to meet the requirements.'.format(ti.soft_get('name')))
         # TODO Additional checks for port indices
-        if body_match['lstm'].has_valid('mark_supported_by_IE'):
-            body_match['lstm'].mark_supported_by_IE(body_match['lstm'])
-
-
-class CheckUnsupportedLSTMCell(MiddleReplacementPattern):
-    """ Finds all unsupported LSTMCell.
-
-        Initiates the second translation round if find any not supported LSTMCell instances.
-    """
-
-    enabled = False
-
-    def run_after(self):
-        return [TensorIteratorLSTM]
-
-    def pattern(self):
-        return dict(
-            nodes=[
-                ('lstm', dict(op='LSTMCell')),
-            ],
-            edges=[
-            ]
-        )
-
-    @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
-        lstmcell = match['lstm']
-        if lstmcell.has_valid('finalize_first_round'):
-            lstmcell.finalize_first_round()
-            if not lstmcell.has_and_set('supported_by_IE'):
-                # this is a signal for the main translation pipeline to repeat the entire conversion process
-                graph.graph['repeat_conversion'] = True
-        # in case when there is no lstmcell.finalize_first_round then this cell wasn't created with the pattern
-        # (for example in ONNX) and we don't initiate the second round.
diff --git a/model-optimizer/extensions/middle/TensorIteratorMerge.py b/model-optimizer/extensions/middle/TensorIteratorMerge.py
index 218b1297f..29e974930 100644
--- a/model-optimizer/extensions/middle/TensorIteratorMerge.py
+++ b/model-optimizer/extensions/middle/TensorIteratorMerge.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,22 +14,21 @@
  limitations under the License.
 """
 
-
 from collections import deque
 from copy import deepcopy
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
-from mo.utils.graph import sub_graph_between_nodes
-from mo.middle.replacement import MiddleReplacementPattern
 from extensions.ops.tensor_iterator import TensorIterator
+from mo.graph.graph import Node, Graph, add_opoutput
+from mo.middle.replacement import MiddleReplacementPattern
 from mo.ops.op import Op
 from mo.ops.reshape import Reshape
+from mo.utils.graph import sub_graph_between_nodes
 
 stop_nodes = ['TensorIteratorInput', 'TensorIteratorOutput', 'TensorIteratorBackEdge', 'TensorIteratorCondition']
 
+
 def op_type(graph, node_name: str):
     node = Node(graph, node_name)
     if node.has_valid('kind') and node['kind'] == 'op':
@@ -45,7 +44,7 @@ def update_inputs(graph, inputs: list, node_name: str):
             inputs.append(node_name)
 
 
-def reverse_dfs(graph: nx.MultiDiGraph, node_name: str, stop_nodes: list, inputs: list, visited: set = None):
+def reverse_dfs(graph: Graph, node_name: str, stop_nodes: list, inputs: list, visited: set = None):
     d = deque()
 
     if visited is None:
@@ -62,7 +61,8 @@ def reverse_dfs(graph: nx.MultiDiGraph, node_name: str, stop_nodes: list, inputs
                 else:
                     update_inputs(graph, inputs, in_node_name)
 
-def dfs(graph: nx.MultiDiGraph, node_name: str, stop_nodes: list, visited: set = None):
+
+def dfs(graph: Graph, node_name: str, stop_nodes: list, visited: set = None):
     d = deque()
 
     visited.add(node_name)
@@ -75,18 +75,28 @@ def dfs(graph: nx.MultiDiGraph, node_name: str, stop_nodes: list, visited: set =
                     visited.add(out_node_name)
                     d.append(out_node_name)
 
+
 def get_body(graph, inputs, outputs):
     nodes, extra_inputs = sub_graph_between_nodes(
         graph,
         inputs,
         outputs,
-        lambda node: node.soft_get('op')  == 'TensorIteratorInput'
+        lambda node: node.soft_get('op') == 'TensorIteratorInput'
     )
     nodes = list(set(nodes) - set(inputs) - set(outputs) - set(extra_inputs))
     return nodes, extra_inputs
 
 
 class TensorIteratorMerge(MiddleReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        return []
+
     @staticmethod
     def pattern():
         return dict(
@@ -144,7 +154,7 @@ class TensorIteratorMerge(MiddleReplacementPattern):
         inputs = [Node(graph, node) for node in inputs]
         outputs = [Node(graph, node) for node in outputs]
         back_edges = [Node(graph, node) for node in back_edges]
-        
+
         external_inputs = [
             {
                 'external_data_id': node.in_node(1 if node.has_valid('axis') else 0),
@@ -156,7 +166,6 @@ class TensorIteratorMerge(MiddleReplacementPattern):
                 'part_size': node.part_size
             } for node in inputs]
 
-
         external_outputs = [
             {
                 'external_data_id': node.out_node(0),
@@ -168,7 +177,6 @@ class TensorIteratorMerge(MiddleReplacementPattern):
                 'part_size': node.part_size
             } for node in outputs]
 
-
         back_edges_data = [
             {
                 'from_data_id': node.in_node(1),
@@ -177,12 +185,14 @@ class TensorIteratorMerge(MiddleReplacementPattern):
             } for node in back_edges
         ]
 
-        body = nx.MultiDiGraph(name='body')
-        body.graph['layout'] = graph.graph['layout']
+        body = Graph(name='body')
+        body.graph = graph.graph
         body.add_nodes_from([(node, graph.node[node]) for node in body_nodes])
-        body.add_edges_from([(u,v,k,d)for u,v,k,d in graph.edges(data=True, keys=True) if u in body_nodes and v in body_nodes])
+        body.add_edges_from(
+            [(u, v, k, d) for u, v, k, d in graph.edges(data=True, keys=True) if u in body_nodes and v in body_nodes])
 
-        graph.remove_nodes_from(body_nodes + [match['condition'].id] + [inp.id for inp in inputs] + [out.id for out in outputs])
+        graph.remove_nodes_from(
+            body_nodes + [match['condition'].id] + [inp.id for inp in inputs] + [out.id for out in outputs])
         internal_id_count = 0
         real_back_edges = []
         for edge in back_edges_data:
@@ -192,7 +202,7 @@ class TensorIteratorMerge(MiddleReplacementPattern):
             edge['from_data_id'] = Node(body, edge['from_data_id'].id)
             edge['to_data_id'] = Node(body, edge['to_data_id'].id)
             edge['init_data_id'] = Node(body, edge['init_data_id'].id)
-            edge['from_data_id']['is_output'] = True
+            add_opoutput(body, edge['from_data_id'].id, 0, False)
 
             # Assign/reuse ids for the back-edge start; it comes from from_data_id
             assert len(edge['from_data_id'].in_nodes()) == 1
@@ -214,13 +224,14 @@ class TensorIteratorMerge(MiddleReplacementPattern):
             for _, consumer, key, edge_attrs in body.out_edges(edge['to_data_id'].id, data=True, keys=True):
 
                 real_edge = {}
-                real_edge.update(edge) # all real back_edges have the same back-edge start
+                real_edge.update(edge)  # all real back_edges have the same back-edge start
 
                 consumer = Node(body, consumer)
 
                 if real_edge['to_data_id'].in_node().has_valid('internal_layer_id'):
                     assert False
-                    real_edge['to_data_id'].out_node()['internal_layer_id'] = real_edge['to_data_id'].in_node().internal_layer_id
+                    real_edge['to_data_id'].out_node()['internal_layer_id'] = \
+                        real_edge['to_data_id'].in_node().internal_layer_id
                 elif not consumer.has_valid('internal_layer_id'):
                     consumer['internal_layer_id'] = internal_id_count
                     internal_id_count += 1
@@ -245,7 +256,7 @@ class TensorIteratorMerge(MiddleReplacementPattern):
                     real_edge['consumer'].id,
                     real_edge['consumer_key'],
                     real_edge['attrs'])
-            for real_edge in current_real_back_edges])
+                for real_edge in current_real_back_edges])
 
             body.remove_nodes_from([edge['to_data_id'].id, edge['to_data_id'].in_node().id])
             real_back_edges += current_real_back_edges
@@ -261,7 +272,8 @@ class TensorIteratorMerge(MiddleReplacementPattern):
                 # Insert squeezing resize at input port that has partitioning
                 shape = ext_inp['internal_data_id'].shape.copy()
                 assert not ext_inp['internal_data_id'].has_valid('value')
-                new_input_data = Op._create_data_node(body, ext_inp['internal_data_id'].name + '/UnsqueezedInput', dict(shape=np.insert(shape, ext_inp['axis'], 1)))
+                new_input_data = Op._create_data_node(body, ext_inp['internal_data_id'].name + '/UnsqueezedInput',
+                                                      dict(shape=np.insert(shape, ext_inp['axis'], 1)))
                 dim = shape.copy()
                 # try to do it dynamically reshapable along one of the axis
                 # it is practically useful to reshape along batch dimension, but here we cannot detect where it is
@@ -300,13 +312,14 @@ class TensorIteratorMerge(MiddleReplacementPattern):
                 # trying to make it dynamically reshapable (see related comment above for the first Reshape)
                 dim[0] = -1
                 assert not ext_out['internal_data_id'].has_valid('value')
-                reshape_op = Reshape(body, dict(name=ext_out['internal_data_id'].name + '/OutputUnsqueeze', dim=np.insert(dim, ext_out['axis'], 1)))
+                reshape_op = Reshape(body, dict(name=ext_out['internal_data_id'].name + '/OutputUnsqueeze',
+                                                dim=np.insert(dim, ext_out['axis'], 1)))
                 ext_out['internal_data_id'] = reshape_op.create_node_with_data([ext_out['internal_data_id']])
 
             # TODO: add here working with simple outputs
 
-            ext_out['internal_data_id']['is_output'] = True
-            #assert len(ext_out['internal_data_id'].out_nodes()) == 0
+            add_opoutput(body, ext_out['internal_data_id'].id, 0, False)
+            # assert len(ext_out['internal_data_id'].out_nodes()) == 0
             assert len(ext_out['internal_data_id'].in_nodes()) == 1
             if not 'internal_layer_id' in ext_out['internal_data_id'].in_node():
                 ext_out['internal_data_id'].in_node()['internal_layer_id'] = internal_id_count
@@ -322,16 +335,22 @@ class TensorIteratorMerge(MiddleReplacementPattern):
         ti_op = TensorIterator(graph, {
             'name': name + '/TensorIterator',
             'body': body,
+            'in_ports_count': len(external_inputs),
+            'out_ports_count': len(external_outputs),
 
             'input_port_map': [
-                {field: external_input[field] for field in [ 'external_port_id', 'internal_layer_id', 'internal_port_id', 'axis', 'stride', 'part_size', 'start', 'end']}
+                {field: external_input[field] for field in
+                 ['external_port_id', 'internal_layer_id', 'internal_port_id', 'axis', 'stride', 'part_size', 'start',
+                  'end']}
                 for external_input in real_external_inputs],
 
             'output_port_map': [
-                {field: external_output[field] for field in [ 'external_port_id', 'internal_layer_id', 'internal_port_id', 'axis', 'stride', 'part_size', 'start', 'end']}
+                {field: external_output[field] for field in
+                 ['external_port_id', 'internal_layer_id', 'internal_port_id', 'axis', 'stride', 'part_size', 'start',
+                  'end']}
                 for external_output in external_outputs],
             'back_edges': [
-                {field: edge[field] for field in [ 'from_layer', 'from_port', 'to_layer', 'to_port']}
+                {field: edge[field] for field in ['from_layer', 'from_port', 'to_layer', 'to_port']}
                 for edge in real_back_edges],
         })
 
@@ -346,7 +365,3 @@ class TensorIteratorMerge(MiddleReplacementPattern):
 
         for i, out in enumerate(ti_outs):
             out.in_edge()['external_port_id'] = external_outputs[i]['external_port_id']
-
-
-
-        # Create TI operation
diff --git a/model-optimizer/extensions/middle/TensorIteratorOutput.py b/model-optimizer/extensions/middle/TensorIteratorOutput.py
index 695e776be..07b64dba6 100644
--- a/model-optimizer/extensions/middle/TensorIteratorOutput.py
+++ b/model-optimizer/extensions/middle/TensorIteratorOutput.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,9 +16,8 @@
 
 import logging as log
 
-import networkx as nx
-
 from extensions.ops.TensorIterator_ops import TensorIteratorOutput
+from mo.graph.graph import Graph
 from mo.middle.replacement import MiddleReplacementPattern
 
 
@@ -40,6 +39,15 @@ class SmartOutputMatcher(MiddleReplacementPattern):
                                     --------> Identity -> TensorArrayWrite -> NextIteration
     """
     enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
+
+    def run_after(self):
+        from extensions.middle.TensorIteratorInput import SmartInputMatcher
+        return [SmartInputMatcher]
+
+    def run_before(self):
+        from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+        return [TensorIteratorMerge]
 
     @staticmethod
     def pattern():
@@ -121,7 +129,7 @@ class SmartOutputMatcher(MiddleReplacementPattern):
         )
 
     @staticmethod
-    def replace_pattern(graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(graph: Graph, match: dict):
         log.debug('================== SmartOutputFind ===============')
 
         assert match['WriteEnter_data'].value is not None
@@ -149,3 +157,132 @@ class SmartOutputMatcher(MiddleReplacementPattern):
             if node not in safe_nodes:
                 nodes_for_remove.append(match[node].id)
         graph.remove_nodes_from(nodes_for_remove)
+
+
+class SimpleOutputMatcher(MiddleReplacementPattern):
+    """
+    This pattern match partitioned outputs for TensorIterator in dynamic_rnn loops in TF.
+    The structure of pattern without Data nodes between ops. Every node is named as op attribute of this node
+    (data nodes is marked by (data)):
+        TensorArray
+        |         |
+    Flow(data)  Handle(data)------------------------------
+            |    |                                       |
+            v    v                                       v
+            Enter  ->  Merge -> Switch -> Exit -> TensorArrayRead
+                                    |
+                                    |
+                                    |
+                                    |
+                                    --------> Identity -> TensorArrayWrite -> NextIteration
+    """
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
+
+    def run_after(self):
+        return [SmartOutputMatcher]
+
+    def run_before(self):
+        from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
+        from extensions.middle.TensorIteratorCondition import LoopConditionMatcher
+        return [TensorIteratorMerge, LoopConditionMatcher]
+
+    @staticmethod
+    def pattern():
+        return dict(
+            nodes=[
+                ('TensorArray', dict(kind='op', op='TensorArrayV3')),
+                ('TensorArray_data', dict(kind='data')),
+                ('TensorArray_flow_data', dict(kind='data')),
+
+                ('TensorArrayWrite', dict(kind='op', op='TensorArrayWriteV3')),
+                ('TensorArrayWrite_data', dict(kind='data')),
+
+                ('NextIteration', dict(kind='op', op='NextIteration')),
+                ('NextIteration_data', dict(kind='data')),
+
+                ('Condition_data', dict(kind='data')),
+
+                ('Identity_2', dict(kind='op', op='Identity')),
+                ('Identity_2_data', dict(kind='data')),
+
+                ('Switch_2', dict(kind='op', op='Switch')),
+                ('Switch_2_data', dict(kind='data')),
+                ('Switch_2_data_exit', dict(kind='data')),
+
+                ('Merge_2', dict(kind='op', op='Merge')),
+                ('Merge_2_data', dict(kind='data')),
+
+                ('Enter_2', dict(kind='op', op='Enter')),
+                ('Enter_2_data', dict(kind='data')),
+
+                ('WriteEnter', dict(kind='op', op='Enter')),
+                ('WriteEnter_data', dict(kind='data')),
+
+                ('Exit', dict(kind='op', op='Exit')),
+                ('Exit_data', dict(kind='data')),
+                #
+                ('TensorArrayRead', dict(op='TensorArrayReadV3')),
+                ('TensorArrayRead_data', dict(kind='data')),
+            ],
+            edges=[
+                ('TensorArray', 'TensorArray_data'),
+                ('TensorArray', 'TensorArray_flow_data'),
+                ('TensorArray_flow_data', 'Enter_2'),
+                ('TensorArray_data', 'WriteEnter'),
+
+
+                ('Enter_2', 'Enter_2_data'),
+                ('Enter_2_data', 'Merge_2'),
+                ('Merge_2', 'Merge_2_data'),
+                ('Merge_2_data', 'Switch_2'),
+                ('Switch_2', 'Switch_2_data'),
+                ('Switch_2', 'Switch_2_data_exit'),
+                ('Switch_2_data', 'Identity_2'),
+                ('Identity_2', 'Identity_2_data'),
+
+                ('Switch_2_data_exit', 'Exit'),
+                ('Exit', 'Exit_data'),
+                ('Exit_data', 'TensorArrayRead'),
+
+                ('WriteEnter', 'WriteEnter_data'),
+                ('WriteEnter_data', 'TensorArrayWrite', {'in': 0}),
+
+                ('Identity_2_data', 'TensorArrayWrite', {'in': 3}),
+                #
+                ('TensorArrayWrite', 'TensorArrayWrite_data'),
+                ('TensorArrayWrite_data', 'NextIteration'),
+                ('Condition_data', 'Switch_2'),
+                #
+                ('TensorArray_data', 'TensorArrayRead'),
+                ('TensorArrayRead', 'TensorArrayRead_data'),
+                ('NextIteration', 'NextIteration_data'),
+                ('NextIteration_data', 'Merge_2'),
+            ],
+        )
+
+    @staticmethod
+    def replace_pattern(graph: Graph, match: dict):
+        log.debug('================== SimpleOutputFind ===============')
+        assert match['WriteEnter_data'].value is not None
+
+        index = match['TensorArrayWrite'].in_node(1)
+        value = match['TensorArrayWrite'].in_node(2)
+
+        # axis == 0 because in TensorArray we ALWAYS iterate over 0 axis, other params will be fill later (with
+        # condition)
+        output = TensorIteratorOutput(graph, dict(
+                                                  external_port_id=str(match['WriteEnter_data'].value),
+                                                  internal_layer_id=value.id,
+                                                  name=match['TensorArrayWrite'].name + '/TensorIteratorOutput_'
+                                                  ))
+        output.create_node_with_data(inputs=[value, index],
+                                     data_nodes=[match['TensorArrayRead_data']])
+
+        # Delete useless nodes
+        safe_nodes = ['TensorArrayRead_data', 'Condition_data']
+        nodes_for_remove = []
+        for node in match.keys():
+            if node not in safe_nodes:
+                nodes_for_remove.append(match[node].id)
+        graph.remove_nodes_from(nodes_for_remove)
diff --git a/model-optimizer/extensions/middle/TensorIteratorOutput_test.py b/model-optimizer/extensions/middle/TensorIteratorOutput_test.py
index d6aa9402f..f141e99df 100644
--- a/model-optimizer/extensions/middle/TensorIteratorOutput_test.py
+++ b/model-optimizer/extensions/middle/TensorIteratorOutput_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/middle/TensorIterator_utils.py b/model-optimizer/extensions/middle/TensorIterator_utils.py
index 40e0efcb7..f05875838 100644
--- a/model-optimizer/extensions/middle/TensorIterator_utils.py
+++ b/model-optimizer/extensions/middle/TensorIterator_utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,13 +13,22 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
 from mo.middle.replacement import MiddleReplacementPattern
 
 next_ops = ['NextIteration', 'TensorArrayWriteV3']
 
 
 class DeleteSelect(MiddleReplacementPattern):
+    enabled = True
+    graph_condition = [lambda graph: graph.graph['is_cyclic']]
+
+    def run_after(self):
+        from extensions.middle.AddIsCyclicAttribute import AddIsCyclicAttribute
+        return [AddIsCyclicAttribute]
+
+    def run_before(self):
+        return []
+
     @staticmethod
     def pattern():
         return dict(
diff --git a/model-optimizer/extensions/middle/UselessMerge.py b/model-optimizer/extensions/middle/UselessMerge.py
index b0923bcd5..d3ef24ac3 100644
--- a/model-optimizer/extensions/middle/UselessMerge.py
+++ b/model-optimizer/extensions/middle/UselessMerge.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,9 +16,8 @@
 
 import logging as log
 
-import networkx as nx
-
 from extensions.middle.ConstSwitchResolver import ConstSwitchEraser
+from mo.graph.graph import Graph
 from mo.middle.passes.eliminate import remove_op_node_with_data_node
 from mo.middle.replacement import MiddleReplacementPattern
 
@@ -29,13 +28,17 @@ class UselessMergeEraser(MiddleReplacementPattern):
     def run_after(self):
         return [ConstSwitchEraser]
 
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     def pattern(self):
         return dict(
             nodes=[('merge', dict(kind='op', op='Merge'))],
             edges=[]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         if len(graph.in_edges(match['merge'].id)) <= 1:
             remove_op_node_with_data_node(graph, match['merge'])
             log.info("Useles Merge op and data nodes was deleted op='{}'".format(match['merge'].id))
diff --git a/model-optimizer/extensions/middle/UselessSplitEraser.py b/model-optimizer/extensions/middle/UselessSplitEraser.py
new file mode 100644
index 000000000..4c8d318e6
--- /dev/null
+++ b/model-optimizer/extensions/middle/UselessSplitEraser.py
@@ -0,0 +1,46 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class UselessSplitEraser(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        from extensions.middle.pass_separator import PreMiddleStart
+        return [PreMiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def pattern(self):
+        return dict(
+            nodes=[('split', {'kind': 'op', 'op': 'Split', 'num_split': 1})],
+            edges=[]
+        )
+
+    def replace_pattern(self, graph: Graph, match: dict):
+        split_node = match['split']
+        input = split_node.in_node(1)
+        output = split_node.out_node()
+        graph.remove_edge(input.id, split_node.id)
+
+        for u, v, d in list(graph.out_edges(output.id, data=True)):
+            graph.add_edges_from([(input.id, v, d)])
+            graph.remove_edge(u, v)
diff --git a/model-optimizer/extensions/middle/UselessSridedSlice_test.py b/model-optimizer/extensions/middle/UselessSridedSlice_test.py
index 8fbf2408c..5c4a25b4a 100644
--- a/model-optimizer/extensions/middle/UselessSridedSlice_test.py
+++ b/model-optimizer/extensions/middle/UselessSridedSlice_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -36,7 +36,7 @@ nodes_attributes = {
                         'slices': [slice(0, 4, 1), slice(0, 5, 1), slice(0, 6, 1)]},
     'strided_slice_2_data': {'value': None, 'shape': np.array([4, 5, 6]), 'kind': 'data'},
     # Output operation
-    'output_op': {'type': 'OpOutput', 'kind': 'op', 'op': 'OpOutput', 'output_op': {'is_output': True}},
+    'output_op': {'kind': 'op', 'op': 'OpOutput'},
 }
 
 
diff --git a/model-optimizer/extensions/middle/UselessStridedSlice.py b/model-optimizer/extensions/middle/UselessStridedSlice.py
index b8272eaa6..6860a5a4e 100644
--- a/model-optimizer/extensions/middle/UselessStridedSlice.py
+++ b/model-optimizer/extensions/middle/UselessStridedSlice.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,11 +16,11 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from extensions.middle.ConvertGroupedStridedSlice import ConvertGroupedStridedSlice
 from extensions.middle.SliceConverter import ConvertSlice
+from mo.graph.graph import Graph
 from mo.middle.passes.eliminate import remove_op_node_with_data_node
 from mo.middle.replacement import MiddleReplacementPattern
 
@@ -40,7 +40,7 @@ class UselessStridedSliceEraser(MiddleReplacementPattern):
             edges=[]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         output_data_node = match['strided_slice'].out_node(0)
         input_data_node = match['strided_slice'].in_node(0)
         if np.array_equal(input_data_node.shape, output_data_node.shape) and \
@@ -49,6 +49,7 @@ class UselessStridedSliceEraser(MiddleReplacementPattern):
             # remove inputs to Strided Slice so it has just one input with data so we can use 'remove_op_node' function
             graph.remove_edge(match['strided_slice'].in_node(1).id, match['strided_slice'].id)
             graph.remove_edge(match['strided_slice'].in_node(2).id, match['strided_slice'].id)
-            graph.remove_edge(match['strided_slice'].in_node(3).id, match['strided_slice'].id)
+            if len(match['strided_slice'].in_nodes()) > 3:
+                graph.remove_edge(match['strided_slice'].in_node(3).id, match['strided_slice'].id)
 
             remove_op_node_with_data_node(graph, match['strided_slice'])
diff --git a/model-optimizer/extensions/middle/decompose_bi_lstm.py b/model-optimizer/extensions/middle/decompose_bi_lstm.py
deleted file mode 100644
index 0cfad4e27..000000000
--- a/model-optimizer/extensions/middle/decompose_bi_lstm.py
+++ /dev/null
@@ -1,188 +0,0 @@
-"""
- Copyright (c) 2018 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import networkx as nx
-import numpy as np
-from copy import deepcopy
-
-from extensions.ops.lstm_sequence import LSTMSequence
-from mo.utils.error import Error
-from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.concat import Concat
-from mo.ops.op import Op
-from mo.ops.split import Split
-from mo.graph.graph import Node
-
-
-class DecomposeBiLSTM(MiddleReplacementPattern):
-    ''' Decomposes bidirectional LSTMSequence to forward and reverse LSTM ops.
-
-        To extract forward and reverse parts from initial blobs, the helper
-        functions used that should be already built-in into the operation attributes.
-
-        Both initial state are split to two part, two parts of the results are concatenated.
-        Axis of split/concat is completelly defined by ONNX/LSTM specification.
-    '''
-
-    enabled = True
-
-    def pattern(self):
-        return dict(
-            nodes=[
-                ('lstm', dict(kind='op', op='LSTMSequence', format='onnx', direction='bidirectional')),
-                ('input', dict(kind='data')),
-                ('W', dict(kind='data')),
-                ('R', dict(kind='data')),
-            ],
-            edges=[
-                ('input', 'lstm', {'in': 0}),
-                ('W', 'lstm', {'bin': 'W'}),
-                ('R', 'lstm', {'bin': 'R'}),
-            ]
-        )
-
-
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
-        bilstm = match['lstm']
-        new_init_hiddens = self.split_data(bilstm.in_node(5))
-        new_init_cells = self.split_data(bilstm.in_node(6))
-        assert bilstm.has_valid('blob_bidirectional_split'), \
-            'Node {} doesnt\'t have blob_bidirectional_split attribute defined.'.format(bilstm.soft_get('name'))
-        splitted_W = bilstm.blob_bidirectional_split(bilstm.in_node(1))
-        splitted_R = bilstm.blob_bidirectional_split(bilstm.in_node(2))
-        splitted_B = bilstm.blob_bidirectional_split(bilstm.in_node(3)) if 3 in bilstm.in_nodes() else (None, None)
-
-        outputs = self.split_bilstm(
-            bilstm,
-            new_init_hiddens,
-            new_init_cells,
-            splitted_W,
-            splitted_R,
-            splitted_B,
-        )
-
-        self.concat(bilstm, outputs[0], outputs[1], bilstm.out_nodes())
-
-    def split_data(self, data: Node):
-        """ Split data node into two part along 0 axis """
-        assert len(data.shape) == 3
-        assert data.shape[0] == 2
-
-        output_data = [Op._create_data_node(data.graph, name=data.name + '/SplittedBiLSTM/{}'.format(['forward', 'reverse'][i])) for i in [0, 1]]
-        split_op = Split(data.graph, dict(name=data.name + '/DecomposedBiLSTM_0', axis=0, num_split=2))
-        return split_op.create_node_with_data([data], data_nodes=output_data)
-
-
-    def split_bilstm(self,
-                     bilstm,
-                     new_init_hiddens,
-                     new_init_cells,
-                     splitted_W,
-                     splitted_R,
-                     splitted_B):
-        """ Split one bilstm node into 2 one-directional lstm nodes.
-
-            All input data nodes should be already prepared; they are
-            have 2 in the major dimension.
-        """
-        assert len(bilstm.out_nodes()) == 3
-        all_outputs = []
-        for i in [0, 1]:
-            direction = ['forward', 'reverse'][i]
-            op = LSTMSequence(bilstm.graph, {
-                'hidden_size': bilstm.hidden_size,
-                'direction': direction,
-                'batch_dim': bilstm.batch_dim,
-                'sequence_dim': bilstm.sequence_dim,
-                'blobs_wrb': bilstm.blobs_wrb,
-                'has_num_directions': bilstm.has_num_directions,
-                'format': bilstm.format,
-                'name': bilstm.name + '/Split/' + direction,
-            })
-
-            output_data = Op._create_data_node(
-                bilstm.graph,
-                name=bilstm.out_node(0).name + '/Split/' + str(i),
-                attrs = {'shape': bilstm.out_node(0).shape.copy()}
-            )
-
-            assert output_data.shape[1] == 2
-            output_data.shape[1] = 1
-
-            output_hidden = Op._create_data_node(
-                bilstm.graph,
-                name=bilstm.out_node(1).name + '/Split/' + str(i),
-                attrs = {'shape': bilstm.out_node(1).shape.copy()}
-            )
-
-            assert output_hidden.shape[0] == 2
-            output_hidden.shape[0] = 1
-
-            output_cell = Op._create_data_node(
-                bilstm.graph,
-                name=bilstm.out_node(2).name + '/Split/' + str(i),
-                attrs = {'shape': bilstm.out_node(2).shape.copy()}
-            )
-
-            assert output_cell.shape[0] == 2
-            output_cell.shape[0] = 1
-
-            all_outputs.append(
-                op.create_node_with_data(
-                    inputs = [
-                        bilstm.in_node(0),
-                        splitted_W[i],
-                        splitted_R[i],
-                        splitted_B[i],
-                        None,
-                        new_init_hiddens[i],
-                        new_init_cells[i],
-                    ],
-                    data_nodes = [
-                        output_data,
-                        output_hidden,
-                        output_cell
-                    ]
-                )
-            )
-        return all_outputs
-
-
-    def concat(self, bilstm, forward_outputs, reverse_outputs, final_outputs):
-        """ Concatenates two set of outputs from BiLSTM """
-
-        concat_ops = [
-            Concat(bilstm.graph, {
-                'name': bilstm.name + '/FinalConcat/Data',
-                'axis': 1
-            }),
-            Concat(bilstm.graph, {
-                'name': bilstm.name + '/FinalConcat/HiddenState',
-                'axis': 0
-            }),
-            Concat(bilstm.graph, {
-                'name': bilstm.name + '/FinalConcat/CellState',
-                'axis': 0
-            })
-        ]
-
-        bilstm.graph.remove_node(bilstm.id)
-
-        for i in final_outputs:
-            concat_ops[i].create_node_with_data(
-                [forward_outputs[i], reverse_outputs[i]],
-                data_nodes=[final_outputs[i]]
-            )
diff --git a/model-optimizer/extensions/middle/lstm_sequence_normalize.py b/model-optimizer/extensions/middle/lstm_sequence_normalize.py
deleted file mode 100644
index f2fe56148..000000000
--- a/model-optimizer/extensions/middle/lstm_sequence_normalize.py
+++ /dev/null
@@ -1,281 +0,0 @@
-"""
- Copyright (c) 2018 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import networkx as nx
-import numpy as np
-from copy import deepcopy
-
-from extensions.middle.decompose_bi_lstm import DecomposeBiLSTM
-from mo.utils.error import Error
-from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
-from mo.ops.permute import Permute
-from mo.ops.reshape import Reshape
-from mo.graph.graph import Node
-
-
-def inverse_perm(order: np.array):
-    indices = np.empty(order.size, dtype=np.int64)
-    indices[order] = np.arange(order.size)
-    return indices
-
-
-def permute_before_and_after(inp: Node, middle: Node, out: Node, order):
-    ''' Insert two permutes: before middle node and after middle node.
-
-        The first permute has a given order, the second permute has an
-        inversed order.
-    '''
-
-    permute = Permute(middle.graph, dict(order=np.array(order)))
-
-    edge_attrs = deepcopy(middle.graph.get_edge_data(inp.id, middle.id)[0])
-    middle.graph.remove_edge(inp.id, middle.id)
-    new_inp = permute.create_node_with_data([inp], dict(name=middle.name + '/InputPermute'))
-    middle.graph.add_edge(new_inp.id, middle.id, **edge_attrs)
-
-    permute = Permute(middle.graph, dict(order=inverse_perm(np.array(order))))
-
-    middle.graph.remove_edge(middle.id, out.id)
-    new_out = Op._create_data_node(middle.graph, name=middle.name + '/WithoutPermute', attrs={'shape': out.shape[order]})
-    middle.graph.add_edge(middle.id, new_out.id, key=0, out=0)
-    permute.create_node_with_data([new_out], dict(name=middle.name + '/OutputPermute'), data_nodes=out)
-
-
-class LSTMSequenceNormalize(MiddleReplacementPattern):
-    ''' Convert blobs and shapes of ONNX-like LSTM to IE compatible form.
-
-        Fuse W, R and optional B input blobs to weights and biases according
-        to IE LSTM specification. In case of bidirectional LSTM, the resulting
-        blobs are not directly supported by IE, but it will be further processed
-        by a separate transformation to break down to one-directional LSTMs.
-
-        The target form of this operation is not normally covered by a dedicated
-        layer in IE. It should be further transformed to some other layer
-        that are supported by IE. This transformation pass involves weights and
-        shapes processing only.
-
-        Post-conditions:
-
-        Inputs have the following order:
-            0: input data
-            1: weights blob
-            2: biases blob
-            3: initial hidden state [optional]
-            4: initial cell state [optional]
-    '''
-
-    enabled = True
-
-
-    def run_after(self):
-        return [
-            DecomposeBiLSTM
-        ]
-
-
-    def pattern(self):
-        return dict(
-            nodes=[
-                ('lstm', dict(kind='op', op='LSTMSequence', format='onnx')),
-                ('input', dict(kind='data')),
-                ('W', dict(kind='data')),
-                ('R', dict(kind='data')),
-            ],
-            edges=[
-                ('input', 'lstm', {'in': 0}),
-                ('W', 'lstm', {'bin': 'W'}),
-                ('R', 'lstm', {'bin': 'R'}),
-            ]
-        )
-
-
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
-        self.repack_weights(graph, match)
-        if match['lstm'].has_num_directions:
-            self.squeeze_num_directions(graph, match)
-        self.batch_sequence_transpose(graph, match)
-        self.check_not_supported_ports(graph, match)
-        self.states_squeeze(graph, match)
-
-
-    def repack_weights(self, graph: nx.MultiDiGraph, match: dict):
-
-        lstm = match['lstm']
-        W = match['W'].value.copy()
-        R = match['R'].value.copy()
-
-        # bidirectional case should be processed separately before this transformation
-        if lstm.direction not in ['forward', 'reverse']:
-            raise Error('ONNX/LSTM operator with `forward` or `reverse` is supported only. '
-                'Node {} has direction = {} which is not supported.'.format(lstm.name, lstm.direction))
-
-        graph.remove_edge(match['W'].id, lstm.id)
-        graph.remove_edge(match['R'].id, lstm.id)
-
-        # find optional 'B'
-        if 3 in lstm.in_nodes():
-            # TODO: check if 'bin': 'B' attribute is assigned to this edge
-            B = lstm.in_node(3).value.copy()
-            graph.remove_edge(lstm.in_node(3).id, lstm.id)
-        else:
-            B = np.full([1, lstm.hidden_size*8], 0, dtype=np.float32)
-
-        # Add extra dimensions for W, R and B for easier repacking
-
-        B = B.reshape([
-            1,  # 0: num of directions, limitation: should be 1
-            2,  # 1: two input parts of the matrix: W, R
-            4,  # 2: four output parts of the matrix for all gates in order: i, o, f, c
-            lstm.hidden_size,  # 3: output size per direction and gate
-            1,  # 4: fake dimension to match the input dimension in W and R for shorter code
-        ])
-
-        W, R = [x.reshape([
-                1,  # 0: num of directions, limitation: should be 1
-                1,  # 1: dummy dimension to be aligned with B
-                4,  # 2: four output parts of the matrix for all gates in order: i, o, f, c
-                lstm.hidden_size,  # 3: output size per direction and gate
-                -1])  # 4: input size/hidden size in W/R
-            for x in (W, R)]
-
-        input_size = match['input'].shape[2]
-        assert input_size == W.shape[-1]
-
-        WR = np.concatenate([W, R], axis=4)
-
-        # Reorder gates: iofc --> fico
-        gate_reorder = [2, 0, 3, 1]
-        WR = np.take(WR, gate_reorder, axis=2)
-        B = np.take(B, gate_reorder, axis=2)
-
-        # Sum component of B that correspond to W and R
-        B = np.add.reduce(B, axis=1, keepdims=True)
-
-        # Reorder dimensions by collection output dimensions first, then input dimension
-        # Interpret the numbers below by looking at W, R and B reshape above in the code
-        inout_reorder = [0, 2, 3, 1, 4]
-        WR = WR.transpose(inout_reorder)
-        B = B.transpose(inout_reorder)
-
-        # Supposing it is unidirectional LSTM, squeeze 'direction' dimension
-        assert WR.shape[0] == 1
-        assert B.shape[0] == 1
-        WR = WR.squeeze(axis=0)
-        B = B.squeeze(axis=0)
-
-        # Flatten all output (0, 1) and input dimensions (2, 3)
-        final_shape = [WR.shape[0] * WR.shape[1], -1]
-        WR = WR.reshape(final_shape)
-        B = B.reshape(final_shape)
-
-        # Squeeze fake dimension in B
-        B = B.squeeze(axis=-1)
-
-        assert WR.ndim == 2
-        assert B.ndim == 1
-        assert WR.shape[0] == lstm.hidden_size*4
-        assert B.shape[0] == lstm.hidden_size*4
-        assert WR.shape[1] == lstm.hidden_size + input_size
-
-        for blob, port, name in [(WR, 1, 'weights'), (B, 2, 'biases')]:
-            Op.create_and_connect_input_data_node(
-                graph,
-                lstm,
-                {'value': blob, 'shape': np.array(blob.shape, dtype=np.int64)},
-                {'in': port, 'bin': name, 'permutation': None}
-            )
-
-
-    def squeeze_num_directions(self, graph: nx.MultiDiGraph, match: dict):
-        """ Assuming considered LSTM node has num_directions in output shape, remove it. """
-        lstm = match['lstm']
-        # num_directions is at 1st position in output shape, please refer to LSTMSequence op definition
-
-        direction_dim = [1, 0, 0] # index of dimension with direction index
-        for i in lstm.out_nodes():
-            old_data_node = lstm.out_node(i)
-            old_shape = old_data_node.shape.copy()
-            new_shape = np.delete(old_shape, direction_dim[i])
-            data = Op._create_data_node(graph, name=lstm.name + '/Out/{}/'.format(i), attrs={'shape': new_shape})
-            graph.remove_edge(lstm.id, old_data_node.id)
-            graph.add_edge(lstm.id, data.id, key=0, out=i)
-            reshape = Reshape(graph, dict(dim=old_shape))
-            reshape.create_node_with_data([data], dict(name=lstm.name + '/SqueezeNumDirections/{}'.format(i)), data_nodes=[old_data_node])
-
-
-    def batch_sequence_transpose(self, graph: nx.MultiDiGraph, match: dict):
-
-        lstm = match['lstm']
-        inp = match['input']
-        out = lstm.out_node(0)
-
-        if lstm.batch_dim == 0:
-            assert lstm.sequence_dim == 1
-            # nothing to do -- it's already in normal form
-            return
-
-        assert lstm.sequence_dim == 0
-        assert lstm.batch_dim == 1
-        assert len(inp.shape) == 3
-
-        # Reorder the first two dimensions on both ends: input and output.
-        # Two Permute ops are inserted before and after the LSTM node.
-        # In this transformation we don't analyze the rest of the model around
-        # LSTM cell, so these Permute ops are not fused to some other layers here.
-        # But other transformations in the pipeline may optimize the Permute ops out.
-
-        lstm.batch_dim, lstm.sequence_dim = lstm.sequence_dim, lstm.batch_dim
-        permute_before_and_after(inp, lstm, out, [1, 0, 2])
-
-
-    def check_not_supported_ports(self, graph: nx.MultiDiGraph, match: dict):
-        lstm = match['lstm']
-        inputs = lstm.in_edges()
-        assert 0 in inputs
-        assert 1 in inputs and inputs[1]['bin'] == 'weights'
-        assert 2 in inputs and inputs[2]['bin'] == 'biases'
-        assert 3 not in inputs
-        
-        if not(set(list(inputs.keys())) <= set([0, 1, 2, 5, 6])):
-            raise Error('Node {} that is interpreted as {} operation has '
-                'some unexpected inputs initialized, '
-                'they can include: sequence_lenght, '
-                'and weight tensor for peepholes. '
-                'This is not supported.'.format(lstm.name, lstm.op))
-
-
-    def states_squeeze(self, graph: nx.MultiDiGraph, match: dict):
-
-        lstm = match['lstm']
-
-        reshape = Reshape(graph, dict(dim=[lstm.in_node(0).shape[0], lstm.hidden_size]))
-
-        if len(lstm.in_nodes()) > 3:
-            init_h = lstm.in_node(5)
-            edge_attrs = deepcopy(graph.get_edge_data(init_h.id, lstm.id)[0])
-            edge_attrs['in'] = 3
-            graph.remove_edge(init_h.id, lstm.id)
-            new_init_h = reshape.create_node_with_data([init_h], dict(name=lstm.name + '/HiddenStateResize'))
-            graph.add_edge(new_init_h.id, lstm.id, **edge_attrs)
-
-        if len(lstm.in_nodes()) > 4:
-            init_c = lstm.in_node(6)
-            edge_attrs = deepcopy(graph.get_edge_data(init_c.id, lstm.id)[0])
-            edge_attrs['in'] = 4
-            graph.remove_edge(init_c.id, lstm.id)
-            new_init_c = reshape.create_node_with_data([init_c], dict(name=lstm.name + '/CellStateResize'))
-            graph.add_edge(new_init_c.id, lstm.id, **edge_attrs)
diff --git a/model-optimizer/extensions/middle/lstm_sequence_normalize_test.py b/model-optimizer/extensions/middle/lstm_sequence_normalize_test.py
deleted file mode 100644
index d15e6808e..000000000
--- a/model-optimizer/extensions/middle/lstm_sequence_normalize_test.py
+++ /dev/null
@@ -1,55 +0,0 @@
-
-"""
- Copyright (c) 2018 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import unittest
-import numpy as np
-
-from extensions.middle.lstm_sequence_normalize import LSTMSequenceNormalize
-from mo.utils.unittest.graph import compare_graphs, build_graph_with_attrs
-from mo.graph.graph import Node
-
-
-class LSTMSequenceNormalizeTest(unittest.TestCase):
-
-    def test_squeeze_num_directions(self):
-        tested_obj = LSTMSequenceNormalize()
-        pattern = tested_obj.pattern()
-        orig_shape = np.array([10, 1, 20, 128], dtype=np.int64)  # seq_length, num_dims, batch_size, data_size
-        new_shape = np.array([10, 20, 128], dtype=np.int64)
-        graph = build_graph_with_attrs(
-            nodes_with_attrs=pattern['nodes'],
-            edges_with_attrs=pattern['edges'],
-            update_edge_attrs={
-                ('W', 'lstm', 0): {'in': 1},
-                ('R', 'lstm', 0): {'in': 2},
-            },
-            new_nodes_with_attrs=[
-                ('output', {'shape': orig_shape}),
-            ],
-            new_edges_with_attrs=[
-                ('lstm', 'output', {'out': 0}),
-            ],
-        )
-
-        lstm = Node(graph, 'lstm')
-        match = {'lstm': lstm}
-        tested_obj.squeeze_num_directions(graph, match)
-        self.assertTrue(np.array_equal(lstm.out_node(0).shape, new_shape))
-        reshape_node = lstm.out_node(0).out_node(0)
-        self.assertTrue(reshape_node.op == 'Reshape')
-        self.assertTrue(np.array_equal(reshape_node.dim, orig_shape))
-        self.assertTrue(reshape_node.out_node(0).id == 'output')
diff --git a/model-optimizer/extensions/middle/mxnet_lstm_sequence_normalize.py b/model-optimizer/extensions/middle/mxnet_lstm_sequence_normalize.py
deleted file mode 100644
index 17fb9b1bc..000000000
--- a/model-optimizer/extensions/middle/mxnet_lstm_sequence_normalize.py
+++ /dev/null
@@ -1,168 +0,0 @@
-"""
- Copyright (c) 2018 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import networkx as nx
-import numpy as np
-from copy import deepcopy
-
-from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
-from mo.ops.reshape import Reshape
-from mo.graph.graph import Node
-
-
-class MXNetLSTMSequenceNormalize(MiddleReplacementPattern):
-    ''' Convert blobs and shapes of MXNet-like LSTM to IE compatible form.
-
-        The target form of this operation is not normally covered by a dedicated
-        layer in IE. It should be further transformed to some other layer
-        that are supported by IE. This transformation pass involves weights and
-        shapes processing only.
-
-        Post-conditions:
-
-        Inputs have the following order:
-            0: input data
-            1: weights blob
-            2: biases blob
-            3: initial hidden state [optional]
-            4: initial cell state [optional]
-    '''
-    enabled = True
-
-    def pattern(self):
-        return dict(
-            nodes=[
-                ('lstm', dict(kind='op', op='LSTMSequence', format='mxnet')),
-                ('input', dict(kind='data')),
-                ('hidden_state', dict(kind='data')),
-                ('cell_state', dict(kind='data')),
-                ('params', dict(kind='data')),
-            ],
-            edges=[
-                ('input', 'lstm', {'in': 0}),
-                ('hidden_state', 'lstm', {'in': 2}),
-                ('cell_state', 'lstm', {'in': 3}),
-                ('params', 'lstm', {'in': 1}),
-            ]
-        )
-
-
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
-        input = match['input']
-        lstm = match['lstm']
-        params = match['params'].value.copy()
-        hidden_state = match['hidden_state']
-        cell_state = match['cell_state']
-
-        hidden_state_edge_attrs = deepcopy(graph.get_edge_data(hidden_state.id, lstm.id)[0])
-        cell_state_edge_attrs = deepcopy(graph.get_edge_data(cell_state.id, lstm.id)[0])
-
-        graph.remove_edge(match['params'].id, lstm.id)
-        graph.remove_edge(match['hidden_state'].id, lstm.id)
-        graph.remove_edge(match['cell_state'].id, lstm.id)
-
-        self.repack_weights(graph, input, lstm, params)
-
-        reshape = Reshape(graph, dict(dim=[lstm.in_node(0).shape[0], lstm.hidden_size]))
-
-        if len(lstm.in_nodes()) > 2:
-            hidden_state_edge_attrs['in'] = 3
-            new_init_h = reshape.create_node_with_data([hidden_state], attrs=dict(name=lstm.name + '/HiddenStateResize'))
-            graph.add_edge(new_init_h.id, lstm.id, **hidden_state_edge_attrs)
-
-        if len(lstm.in_nodes()) > 3:
-            cell_state_edge_attrs['in'] = 4
-            new_init_c = reshape.create_node_with_data([cell_state], attrs=dict(name=lstm.name + '/CellStateResize'))
-            graph.add_edge(new_init_c.id, lstm.id, **cell_state_edge_attrs)
-
-
-    def repack_weights(self, graph: nx.MultiDiGraph, input: Node, lstm: Node, params: np.array):
-        input_size = input.shape[2]
-
-        direction = 2 if lstm.has_num_directions else 1
-        bsize = (2*lstm.hidden_size*direction*1)*4
-
-        assert direction == 1
-
-        W = np.array(params[0:len(params) - bsize])
-        B = np.array(params[len(params) - bsize:])
-
-        WX = np.array(W[0:lstm.hidden_size*4*input_size])
-        WH = np.array(W[lstm.hidden_size*4*input_size:])
-
-        WX = WX.reshape([lstm.hidden_size*4, input_size])
-        WH = WH.reshape([lstm.hidden_size*4, lstm.hidden_size])
-
-        WX = WX.transpose([1, 0])
-        WH = WH.transpose([1, 0])
-
-        WX = WX.reshape([
-                1,  # 0: num of directions, limitation: should be 1
-               -1,  # 3: input size
-                4,  # 1: four output parts of the matrix for all gates in order: i, f, c, o
-                lstm.hidden_size,  # 2: output size per direction and gate
-        ])
-
-        WH = WH.reshape([
-                1,  # 0: num of directions, limitation: should be 1
-               -1,  # 3: hidden state size
-                4,  # 1: four output parts of the matrix for all gates in order: i, f, c, o
-                lstm.hidden_size,  # 2: output size per direction and gate
-        ])
-
-        B = B.reshape([
-                 1,  # 0: num of directions, limitation: should be 1
-                 2,  # 3: num of component B
-                 4,  # 1: four output parts of the matrix for all gates in order: i, f, c, o
-                 lstm.hidden_size,  # 2: output size per direction and gate
-        ])
-
-        assert WX.shape[1] == input_size
-        assert WH.shape[1] == lstm.hidden_size
-
-        W = np.concatenate([WX, WH], axis=1)
-
-        # Reorder gates: ifco --> fico
-        gate_reorder = [1, 0, 2, 3]
-        W = np.take(W, gate_reorder, axis=2)
-        B = np.take(B, gate_reorder, axis=2)
-
-        inout_reorder = [0, 2, 3, 1]
-        W = W.transpose(inout_reorder)
-        B = B.transpose(inout_reorder)
-
-        final_shape = [W.shape[0] * W.shape[1] * lstm.hidden_size, -1]
-        W = W.reshape(final_shape)
-        B = B.reshape(final_shape)
-
-        # Sum component of B
-        B = np.add.reduce(B, axis=1, keepdims=True)
-        B = B.squeeze(axis=1)
-
-        assert W.ndim == 2
-        assert B.ndim == 1
-        assert W.shape[0] == lstm.hidden_size * 4
-        assert B.shape[0] == lstm.hidden_size * 4
-        assert W.shape[1] == lstm.hidden_size + input_size
-
-        for blob, port, name in [(W, 1, 'weights'), (B, 2, 'biases')]:
-            Op.create_and_connect_input_data_node(
-                graph,
-                lstm,
-                {'value': blob, 'shape': np.array(blob.shape, dtype=np.int64)},
-                {'in': port, 'bin': name, 'permutation': None}
-            )
diff --git a/model-optimizer/extensions/middle/pass_separator.py b/model-optimizer/extensions/middle/pass_separator.py
new file mode 100644
index 000000000..1b7e0aab1
--- /dev/null
+++ b/model-optimizer/extensions/middle/pass_separator.py
@@ -0,0 +1,58 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.graph.graph import Graph
+from mo.middle.replacement import MiddleReplacementPattern
+
+
+class PreMiddleStart(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        pass
+
+
+class MiddleStart(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        pass
+
+
+class MiddleFinish(MiddleReplacementPattern):
+    enabled = True
+
+    def run_after(self):
+        return []
+
+    def run_before(self):
+        return []
+
+    def find_and_replace_pattern(self, graph: Graph):
+        pass
+
diff --git a/model-optimizer/extensions/middle/permute_tensor_iterator.py b/model-optimizer/extensions/middle/permute_tensor_iterator.py
index fbd3d633a..769666038 100644
--- a/model-optimizer/extensions/middle/permute_tensor_iterator.py
+++ b/model-optimizer/extensions/middle/permute_tensor_iterator.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,35 +14,33 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
-from copy import deepcopy
 
-from mo.graph.graph import copy_node, Node, dict_includes
-from mo.utils.error import Error
-from mo.middle.passes.eliminate import remove_op_node_with_data_node
-from mo.middle.pattern_match import find_isomorphisms, find_pattern_matches
-from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
-from extensions.ops.lstm_sequence import LSTMSequence
 from extensions.middle.FusePermutesSequence import FusePermutesSequence
+from extensions.middle.LSTMRNNSequenceToTensorIterator import LSTMToTensorIterator
+from extensions.middle.ONNXRNNSequenceNormalize import ONNXRNNSequenceNormalize
 from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
-from extensions.middle.lstm_sequence_normalize import LSTMSequenceNormalize, permute_before_and_after
-from extensions.middle.lstm_sequence_tensor_iterator import LSTMSequenceTensorIterator
-from extensions.middle.decompose_bi_lstm import DecomposeBiLSTM
+from mo.graph.graph import dict_includes, Graph
+from mo.middle.passes.eliminate import remove_op_node_with_data_node
+from mo.middle.pattern_match import find_isomorphisms
+from mo.middle.replacement import MiddleReplacementPattern
 
 
 class PermuteTensorIteratorLSTM(MiddleReplacementPattern):
-    ''' Fuses Permute(1,0,2) --> TI --> Permute(1,0,2) pattern to a single TI with changed axis.
+    """ Fuses Permute(1,0,2) --> TI --> Permute(1,0,2) pattern to a single TI with changed axis.
 
         WARNING This transformation is limited to support of very special case of TI but
         code doesn't check all the cases.
-    '''
+    """
 
     enabled = True
 
     def run_after(self):
-        return [TensorIteratorMerge, LSTMSequenceNormalize, LSTMSequenceTensorIterator, FusePermutesSequence, DecomposeBiLSTM]
+        return [TensorIteratorMerge, ONNXRNNSequenceNormalize, LSTMToTensorIterator, FusePermutesSequence]
+
+
+    def run_before(self):
+        return []
 
     def pattern(self):
         return dict(
@@ -63,21 +61,21 @@ class PermuteTensorIteratorLSTM(MiddleReplacementPattern):
                 ('input', 'direct_permute'),
                 ('direct_permute', 'input_permuted'),
 
-                ('input_permuted', 'ti', {'in': 0}),   # affected by permute
+                ('input_permuted', 'ti', {'in': 0}),  # affected by permute
                 ('init_hidden', 'ti', {'in': 1}),
                 ('init_cell', 'ti', {'in': 2}),
-                ('ti', 'output_permuted', {'out': 0}), # affected by permute
+                ('ti', 'output_permuted', {'out': 0}),  # affected by permute
 
                 ('output_permuted', 'inverse_permute'),
                 ('inverse_permute', 'output'),
             ]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
 
         # This transformation works if and only if a body of TI
         # matches the following topology (Reshape -> LSTMCell -> Reshape)
-        nodes=[
+        nodes = [
             ('input_unsqueezed'),
             ('squeeze', dict(op='Reshape')),
             ('input_squeezed'),
@@ -92,8 +90,16 @@ class PermuteTensorIteratorLSTM(MiddleReplacementPattern):
             ('output_cell'),
             ('unsqueeze', dict(op='Reshape')),
             ('output_unsqueezed'),
+
+            ('const_w', dict(op='Const')),
+            ('const_b', dict(op='Const')),
+
+            ('op_output', dict(op='OpOutput')),
+            ('op_output_1', dict(op='OpOutput')),
+            ('op_output_2', dict(op='OpOutput'))
+
         ]
-        edges=[
+        edges = [
             ('input_unsqueezed', 'squeeze'),
             ('squeeze', 'input_squeezed'),
 
@@ -103,11 +109,19 @@ class PermuteTensorIteratorLSTM(MiddleReplacementPattern):
             ('weights', 'lstm', {'in': 3}),
             ('biases', 'lstm', {'in': 4}),
 
+            ('const_w', 'weights'),
+            ('const_b', 'biases'),
+
             ('lstm', 'output_hidden', {'out': 0}),
             ('lstm', 'output_cell', {'out': 1}),
 
             ('output_hidden', 'unsqueeze'),
             ('unsqueeze', 'output_unsqueezed'),
+
+            ('output_unsqueezed', 'op_output'),
+            ('output_hidden', 'op_output_1'),
+            ('output_cell', 'op_output_2'),
+
         ]
         ti = match['ti']
         isomorphisms = find_isomorphisms(ti.body, nodes, edges)
@@ -126,7 +140,6 @@ class PermuteTensorIteratorLSTM(MiddleReplacementPattern):
         if not inverse_permute.has_valid('order') or not np.array_equal(inverse_permute.order, permute_order):
             return
 
-
         def find_ports(port_map: list, attrs: dict):
             """ Find all ports in a given port map with specified attributes """
             result = []
diff --git a/model-optimizer/extensions/middle/reverse_tensor_iterator.py b/model-optimizer/extensions/middle/reverse_tensor_iterator.py
index 7cd529b50..62f513376 100644
--- a/model-optimizer/extensions/middle/reverse_tensor_iterator.py
+++ b/model-optimizer/extensions/middle/reverse_tensor_iterator.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,15 +14,10 @@
  limitations under the License.
 """
 
-import networkx as nx
-
-from mo.middle.replacement import MiddleReplacementPattern
-from extensions.ops.lstm_sequence import LSTMSequence
 from extensions.middle.FusePermutesSequence import FusePermutesSequence
-from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
-from extensions.middle.lstm_sequence_normalize import LSTMSequenceNormalize
-from extensions.middle.lstm_sequence_tensor_iterator import LSTMSequenceTensorIterator
+from extensions.middle.ONNXRNNSequenceNormalize import ONNXRNNSequenceNormalize
 from extensions.middle.permute_tensor_iterator import PermuteTensorIteratorLSTM
+from mo.graph.graph import Graph
 from mo.middle.passes.eliminate import remove_op_node_with_data_node
 from mo.middle.replacement import MiddleReplacementPattern
 
@@ -38,13 +33,16 @@ class ReverseTensorIteratorLSTM(MiddleReplacementPattern):
 
     def run_after(self):
         return [
-            TensorIteratorMerge,
-            LSTMSequenceNormalize,
-            LSTMSequenceTensorIterator,
+            ONNXRNNSequenceNormalize,
+
             FusePermutesSequence,
             PermuteTensorIteratorLSTM,
         ]
 
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -52,7 +50,6 @@ class ReverseTensorIteratorLSTM(MiddleReplacementPattern):
                 ('direct_reverse', dict(op='ReverseSequence')),
                 ('input_reversed'),
                 ('init_hidden'),
-                ('init_cell'),
 
                 ('ti', dict(kind='op', op='TensorIterator')),
 
@@ -66,7 +63,6 @@ class ReverseTensorIteratorLSTM(MiddleReplacementPattern):
 
                 ('input_reversed', 'ti', {'in': 0}),
                 ('init_hidden', 'ti', {'in': 1}),
-                ('init_cell', 'ti', {'in': 2}),
                 ('ti', 'output_reversed', {'out': 0}),
 
                 ('output_reversed', 'inverse_reverse', {'in': 0}),
@@ -74,21 +70,21 @@ class ReverseTensorIteratorLSTM(MiddleReplacementPattern):
             ]
         )
 
-    def replace_pattern(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_pattern(self, graph: Graph, match: dict):
         ti = match['ti']
         direct_reverse = match['direct_reverse']
         inverse_reverse = match['inverse_reverse']
 
-        assert direct_reverse.seq_dim == inverse_reverse.seq_dim
-        assert direct_reverse.batch_dim is None and inverse_reverse.batch_dim is None or \
-            direct_reverse.batch_dim == inverse_reverse.batch_dim
+        assert direct_reverse.seq_axis == inverse_reverse.seq_axis
+        assert direct_reverse.batch_axis is None and inverse_reverse.batch_axis is None or \
+               direct_reverse.batch_axis == inverse_reverse.batch_axis
 
         # Modify stride in TI
         for port_map in [ti.input_port_map, ti.output_port_map]:
             for port in port_map:
                 if 'axis' in port and port['axis'] is not None and 'external_port_id' in port:
-                    assert port['axis'] == direct_reverse.seq_dim, \
-                        'axis == {} != {} == direct_reverse.seq_dim'.format(port['axis'], direct_reverse.seq_dim)
+                    assert port['axis'] == direct_reverse.seq_axis, \
+                        'axis == {} != {} == direct_reverse.seq_dim'.format(port['axis'], direct_reverse.seq_axis)
                     if 'stride' not in port or port['stride'] is None:
                         port['stride'] = 1
                     assert port['stride'] in [-1, 1]
diff --git a/model-optimizer/extensions/ops/BlockLSTM.py b/model-optimizer/extensions/ops/BlockLSTM.py
index 8e3ac7f6b..3e28b17f9 100644
--- a/model-optimizer/extensions/ops/BlockLSTM.py
+++ b/model-optimizer/extensions/ops/BlockLSTM.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 
 from mo.front.common.partial_infer.utils import mark_input_bins
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 import numpy as np
 
@@ -25,10 +25,11 @@ import numpy as np
 class BlockLSTM(Op):
     op = 'BlockLSTM'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
-            'infer': __class__.infer
+            'infer': __class__.infer,
+            'type': __class__.op,
         }
         super().__init__(graph, mandatory_props, attrs)
 
diff --git a/model-optimizer/extensions/ops/Cast.py b/model-optimizer/extensions/ops/Cast.py
new file mode 100644
index 000000000..517699402
--- /dev/null
+++ b/model-optimizer/extensions/ops/Cast.py
@@ -0,0 +1,40 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+class Cast(Op):
+    op = 'Cast'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'op': __class__.op,
+            'infer': __class__.infer,
+            'dst_type': None,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    @staticmethod
+    def infer(node: Node):
+        assert node.has_valid('dst_type'), 'Destination type of "Cast" operation should be extracted earlier'
+        copy_shape_infer(node, lambda n: n.in_node().value.astype(n.dst_type))
diff --git a/model-optimizer/extensions/ops/DetectionOutput.py b/model-optimizer/extensions/ops/DetectionOutput.py
index 6eb3d936b..fb2f91d2d 100644
--- a/model-optimizer/extensions/ops/DetectionOutput.py
+++ b/model-optimizer/extensions/ops/DetectionOutput.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.multi_box_detection import multi_box_detection_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -24,10 +23,12 @@ class DetectionOutput(Op):
     op = 'DetectionOutput'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 3,
+            'out_ports_count': 1,
             'infer': multi_box_detection_infer,
             'input_width': 1,
             'input_height': 1,
@@ -39,7 +40,8 @@ class DetectionOutput(Op):
     def supported_attrs(self):
         return [
             'background_label_id',
-            'clip',
+            'clip_after_nms',
+            'clip_before_nms',
             'code_type',
             'confidence_threshold',
             'eta',
@@ -70,4 +72,5 @@ class DetectionOutput(Op):
             'visualize_threshold',
             'width',
             'width_scale',
+            'objectness_score',
         ]
diff --git a/model-optimizer/extensions/ops/Enter.py b/model-optimizer/extensions/ops/Enter.py
index edc27d5e0..73bda61b9 100644
--- a/model-optimizer/extensions/ops/Enter.py
+++ b/model-optimizer/extensions/ops/Enter.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 
 import networkx as nx
 import numpy as np
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.error import Error
 
@@ -25,10 +25,11 @@ from mo.utils.error import Error
 class Enter(Op):
     op = "Enter"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
             'infer': Enter.enter_infer,
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/Exit.py b/model-optimizer/extensions/ops/Exit.py
index 6f5c8d9f9..a06f6ef8f 100644
--- a/model-optimizer/extensions/ops/Exit.py
+++ b/model-optimizer/extensions/ops/Exit.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,8 @@
  limitations under the License.
 """
 
-
-import networkx as nx
 import numpy as np
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.error import Error
 
@@ -25,11 +23,12 @@ from mo.utils.error import Error
 class Exit(Op):
     op = "Exit"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
             'infer': Exit.exit_infer,
+            'in_ports_count': 1,
         }
         super().__init__(graph, mandatory_props, attrs)
 
diff --git a/model-optimizer/extensions/ops/GRU.py b/model-optimizer/extensions/ops/GRU.py
new file mode 100644
index 000000000..16b1909c3
--- /dev/null
+++ b/model-optimizer/extensions/ops/GRU.py
@@ -0,0 +1,81 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.RNN import rnn_infer
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+import numpy as np
+
+
+class GRU(Op):
+    op = 'GRU'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': 'RNNSequence',  # should be never emitted to IR; for debugging purposes
+            'op': __class__.op,
+            'blobs_wrb': False,
+            'has_num_directions': False,
+            'direction': 'forward',
+            'infer': __class__.infer,
+            'multiplier': 3,
+            'multilayers': False,
+            'gate_order': np.array([0, 1, 2]),  # TODO: change it later
+            'normalized': False,
+
+            'activation_alpha': None,
+            'activation_beta': None,
+            'activations': None,
+            'clip': None,
+            'linear_before_reset': None,
+            'in_ports_count': 6,
+            'out_ports_count': 2,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    @staticmethod
+    def supported_attrs():
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            'direction',  # one of 'forward', 'reverse', or 'bidirectional'
+            'axis',
+
+            'activation_alpha',
+            'activation_beta',
+            'activations',
+            'clip',
+            'linear_before_reset',
+        ]
+
+    def backend_attrs(self):
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            'direction',  # one of 'forward', 'reverse', or 'bidirectional'
+            'axis',
+
+            'activation_alpha',
+            'activation_beta',
+            ('activations', lambda node: ','.join(node.activations) if node.activations is not None else None),
+            'clip',
+            'linear_before_reset',
+        ]
+
+    @staticmethod
+    def infer(node: Node):
+        assert len(node.in_nodes()) >= 3  # X, W and R
+        assert len(node.in_nodes()) <= 5
+        assert len(node.out_nodes()) <= 2
+
+        rnn_infer(node, [1])
diff --git a/model-optimizer/extensions/ops/GRUCell.py b/model-optimizer/extensions/ops/GRUCell.py
new file mode 100644
index 000000000..120aedd5d
--- /dev/null
+++ b/model-optimizer/extensions/ops/GRUCell.py
@@ -0,0 +1,83 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.partial_infer.utils import mark_input_bins
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+from mo.utils.error import Error
+
+
+class GRUCell(Op):
+    """ A single GRU cell (without a loop).
+
+        2 inputs:
+            - [0, required] input data (2D),
+            - [1, required] initial hidden state (2D),
+
+        2 blobs:
+            - [2, required] cell FC weights
+            - [3, required] cell FC biases
+
+        1 outputs:
+            - [required] output data / resulting hidden state (2D)
+    """
+    op = 'GRUCell'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': __class__.op,
+            'op': __class__.op,
+            'infer': __class__.infer,
+            'in_ports_count': 4,
+            'out_ports_count': 1,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    def supported_attrs(self):
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            'activations',
+            'activation_alpha',
+            'activation_beta',
+            'clip',
+            'linear_before_reset',
+        ]
+
+    def backend_attrs(self):
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            ('activations', lambda node: ','.join(node.activations) if node.activations is not None else None),
+            'activation_alpha',
+            'activation_beta',
+            'clip',
+            'linear_before_reset',
+        ]
+
+    @staticmethod
+    def infer(node: Node):
+        assert len(node.out_nodes()) in [1, 2]
+
+        hidden_shape = node.in_node(1).shape.copy()
+
+        mark_input_bins(node, start_port=2)
+        node.out_node(0).shape = hidden_shape
+
+        hidden_size = hidden_shape[1]
+        if node.has_valid('hidden_size'):
+            if node.hidden_size != hidden_size:
+                raise Error("Input shape {} for hidden size doesn't match pre-defined hidden_size in node {}".format(
+                    node.in_node(1).shape, node.soft_get('name')))
+        else:
+            node['hidden_size'] = hidden_size
diff --git a/model-optimizer/extensions/ops/GatherNd.py b/model-optimizer/extensions/ops/GatherNd.py
new file mode 100644
index 000000000..9a4de3e04
--- /dev/null
+++ b/model-optimizer/extensions/ops/GatherNd.py
@@ -0,0 +1,47 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+class GatherNd(Op):
+    op = 'GatherNd'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'op': __class__.op,
+            'infer': __class__.infer,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    def supported_attrs(self):
+        return []
+
+    @staticmethod
+    def infer(node: Node):
+        input_node = node.in_node(0)
+        indices = node.in_node(1).value
+
+        assert indices is not None
+
+        output_shape = list(indices.shape[:-1]) + list(input_node.shape[indices.shape[-1]:])
+        node.out_node().shape = np.array(output_shape, dtype=np.int64)
+        # TODO: implement constant path
diff --git a/model-optimizer/extensions/ops/LSTM.py b/model-optimizer/extensions/ops/LSTM.py
new file mode 100644
index 000000000..196d653e9
--- /dev/null
+++ b/model-optimizer/extensions/ops/LSTM.py
@@ -0,0 +1,82 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from extensions.ops.RNN import rnn_infer
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+class LSTM(Op):
+    op = 'LSTM'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': 'RNNSequence',  # should be never emitted to IR; for debugging purposes
+            'op': __class__.op,
+            'blobs_wrb': False,  # input blobs have three separate components W, R and B like in ONNX/LSTM
+            'has_num_directions': False,  # if True, output shape has 4 dimensions; 3D otherwise
+            'direction': 'forward',
+            'infer': __class__.infer,
+            'multiplier': 4,
+            'gate_order': None,
+            'normalized': False,
+            'multilayers': False,
+            'format': None,  # format type of input blobs for different frameworks (onnx, tf, mxnet),
+
+            'activation_alpha': None,
+            'activation_beta': None,
+            'activations': None,
+            'clip': None,
+            'input_forget': None,
+            'in_ports_count': 7,
+            'out_ports_count': 3,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    @staticmethod
+    def supported_attrs():
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            'direction',  # one of 'forward', 'reverse', or 'bidirectional'
+            'axis',
+
+            'activation_alpha',
+            'activation_beta',
+            'activations',
+            'clip',
+            # 'input_forget',  # Not supported yet
+        ]
+
+    def backend_attrs(self):
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            'direction',  # one of 'forward', 'reverse', or 'bidirectional'
+            'axis',
+
+            'activation_alpha',
+            'activation_beta',
+            ('activations', lambda node: ','.join(node.activations) if node.activations is not None else None),
+            'clip',
+            # 'input_forget',  # Not supported yet
+        ]
+
+    @staticmethod
+    def infer(node: Node):
+        # there are limitations coming from ONNX LSTM definition and normalization rules
+        assert len(node.in_nodes()) >= 3  # X, W and R
+        assert len(node.in_nodes()) <= 7
+        assert len(node.out_nodes()) <= 3
+
+        rnn_infer(node, [1, 2])
diff --git a/model-optimizer/extensions/ops/NextIteration.py b/model-optimizer/extensions/ops/NextIteration.py
index 5ee49afdd..3a4a5fee0 100644
--- a/model-optimizer/extensions/ops/NextIteration.py
+++ b/model-optimizer/extensions/ops/NextIteration.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,20 +14,20 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class NextIteration(Op):
     op = "NextIteration"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
             'infer': NextIteration.enter_infer,
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/RNN.py b/model-optimizer/extensions/ops/RNN.py
new file mode 100644
index 000000000..ba0a02497
--- /dev/null
+++ b/model-optimizer/extensions/ops/RNN.py
@@ -0,0 +1,154 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import numpy as np
+
+from mo.front.common.partial_infer.utils import mark_input_bins
+from mo.graph.graph import Node, Graph, add_opoutput
+from mo.ops.op import Op
+
+
+class RNN(Op):
+    op = 'RNN'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': 'RNNSequence',  # should be never emitted to IR; for debugging purposes
+            'op': __class__.op,
+            'blobs_wrb': False,
+            'has_num_directions': False,
+            'direction': 'forward',
+            'infer': __class__.infer,
+            'multiplier': 1,
+            'gate_order': np.array([0]),  # Only one gate in this cell
+            'normalized': False,
+
+            'activation_alpha': None,
+            'activation_beta': None,
+            'activations': None,
+            'clip': None,
+            'in_ports_count': 6,
+            'out_ports_count': 2,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    @staticmethod
+    def supported_attrs():
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            'direction',  # one of 'forward', 'reverse', or 'bidirectional'
+            'axis',
+
+            # Additional attributes
+            'activation_alpha',
+            'activation_beta',
+            'activations',
+            'clip',
+        ]
+
+    def backend_attrs(self):
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            'direction',  # one of 'forward', 'reverse', or 'bidirectional'
+            'axis',
+
+            # Additional attributes
+            'activation_alpha',
+            'activation_beta',
+            ('activations', lambda node: ','.join(node.activations) if node.activations is not None else None),
+            'clip',
+        ]
+
+    @staticmethod
+    def infer(node: Node):
+        assert len(node.in_nodes()) >= 3  # X, W and R
+        assert len(node.in_nodes()) <= 5
+        assert len(node.out_nodes()) <= 2
+
+        rnn_infer(node, [1])
+
+
+def rnn_infer(node: Node, out_ports=None):
+    """
+    General infer function for RNN, GRU, LSTM layers.
+    Assume that 0-port input of node is input data for recurrent layer and node have attrs:
+    hidden_size,
+    """
+    if out_ports is None:
+        out_ports = []
+
+    # 1. Necessary checks (from ONNX specification)
+    assert node.batch_dim <= 1
+    assert node.sequence_dim <= 1
+    assert node.batch_dim != node.sequence_dim
+    assert node.direction in ['forward', 'reverse', 'bidirectional']
+
+    if node.blobs_wrb:
+        mark_input_bins(node, ['W', 'R', 'B'])
+    else:
+        mark_input_bins(node)
+
+    # 2. Output shape calculations
+    input_shape = node.in_node(0).shape
+    assert len(input_shape) == 3
+
+    # Reshape input nodes
+    for port in [2, 3]:
+        if port in node.in_nodes() and len(node.in_node(port).in_nodes()) > 0 and \
+                'zero_shapes' in node.in_node(port).in_node():
+            for i in node.in_node(port).in_node().zero_shapes:
+                if node.in_node(port).shape[i] != input_shape[i]:
+                    node.in_node(port).value = np.repeat(node.in_node(port).value, input_shape[i], axis=i)
+                    node.in_node(port).shape[i] = input_shape[i]
+
+    out_shape = np.array([input_shape[node.sequence_dim], input_shape[node.batch_dim], node.hidden_size], dtype=np.int64)
+
+    if node.batch_dim == 0:
+        out_shape = np.array([input_shape[node.batch_dim], input_shape[node.sequence_dim], node.hidden_size], dtype=np.int64)
+
+    num_directions = 2 if node.direction in ['bidirectional'] else 1
+    if node.has_num_directions:
+        if node.format == 'mxnet' and node.normalized is False:
+            # In MXNet RNN layer return output with shape [seq_len, batch_size, hidden_size * num_directions]
+            out_shape[-1] *= num_directions
+        else:
+            # ONNX-like, insert extra dimension to output shape for num_directions
+            out_shape = np.insert(out_shape, 1, np.int64(num_directions))
+    node.out_node(0).shape = out_shape
+
+    # 3. Extra outputs for hidden/cell states shape calculations (optional)
+    state_size = np.array([input_shape[node.batch_dim], node.hidden_size], dtype=np.int64)
+    if node.has_num_directions:
+        state_size = np.insert(state_size, 0, num_directions)
+
+    if node.multilayers:
+        # For multilayer case state sizes from every layer will be concatenated by last axis
+        num_layers = node.num_layers
+        state_size[-1] *= num_layers
+
+    for i in out_ports:
+        # If node hasn't consumers for hidden/cells state -> create them
+        if i not in node.out_nodes():
+            data_node = Op._create_data_node(
+                node.graph,
+                name=node.node + '/ExtraOutput/' + str(i),
+                attrs={'executable': True}
+            )
+            node.add_output_port(i)
+            node.graph.add_edge(node.id, data_node.id, key=0, out=i)
+            add_opoutput(node.graph, data_node.id, 0, False)
+        else:
+            data_node = node.out_node(i)
+        data_node.shape = state_size.copy()
diff --git a/model-optimizer/extensions/ops/RNNCell.py b/model-optimizer/extensions/ops/RNNCell.py
new file mode 100644
index 000000000..0fd71ed91
--- /dev/null
+++ b/model-optimizer/extensions/ops/RNNCell.py
@@ -0,0 +1,81 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.front.common.partial_infer.utils import mark_input_bins
+from mo.graph.graph import Graph, Node
+from mo.ops.op import Op
+from mo.utils.error import Error
+
+
+class RNNCell(Op):
+    """ A single RNN cell (without a loop).
+
+        2 inputs:
+            - [0, required] input data (2D),
+            - [1, required] initial hidden state (2D),
+
+        2 blobs:
+            - [2, required] cell FC weights
+            - [3, required] cell FC biases
+
+        1 outputs:
+            - [required] output data / resulting hidden state (2D)
+    """
+    op = 'RNNCell'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': __class__.op,
+            'op': __class__.op,
+            'infer': __class__.infer,
+            'in_ports_count': 4,
+            'out_ports_count': 1,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    def supported_attrs(self):
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            'activations',
+            'activation_alpha',
+            'activation_beta',
+            'clip',
+        ]
+
+    def backend_attrs(self):
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            ('activations', lambda node: ','.join(node.activations) if node.activations is not None else None),
+            'activation_alpha',
+            'activation_beta',
+            'clip',
+        ]
+
+    @staticmethod
+    def infer(node: Node):
+        assert len(node.out_nodes()) in [1, 2]
+
+        hidden_shape = node.in_node(1).shape.copy()
+
+        mark_input_bins(node, start_port=2)
+        node.out_node(0).shape = hidden_shape
+
+        hidden_size = hidden_shape[1]
+        if node.has_valid('hidden_size'):
+            if node.hidden_size != hidden_size:
+                raise Error("Input shape {} for hidden size doesn't match pre-defined hidden_size in node {}".format(
+                    node.in_node(1).shape, node.soft_get('name')))
+        else:
+            node['hidden_size'] = hidden_size
diff --git a/model-optimizer/extensions/ops/Reverse.py b/model-optimizer/extensions/ops/Reverse.py
new file mode 100644
index 000000000..66dcf4e46
--- /dev/null
+++ b/model-optimizer/extensions/ops/Reverse.py
@@ -0,0 +1,47 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from mo.graph.graph import Graph
+from mo.ops.op import Op
+
+
+class Reverse(Op):
+    op = 'Reverse'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            # 'type': __class__.op, # Internal MO primitive
+            'axis': None,
+            'op': __class__.op,
+            'infer': __class__.infer,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    @staticmethod
+    def infer(node):
+        input_data_shape = node.in_node(0).shape
+        assert input_data_shape is not None
+        if not node.has_valid('axis'):
+            assert 1 in node.in_nodes()
+            assert node.in_node(1).has_valid('value')
+            assert node.in_node(1).value.size == 1
+
+            node['axis'] = node.in_node(1).value.item()
+            node.in_port(1).disconnect()
+
+        assert node.has_valid('axis')
+
+        assert len(node.out_nodes()) == 1
+        node.out_node().shape = input_data_shape.copy()
diff --git a/model-optimizer/extensions/ops/SquaredDifference.py b/model-optimizer/extensions/ops/SquaredDifference.py
index f1c37358d..ce858510a 100644
--- a/model-optimizer/extensions/ops/SquaredDifference.py
+++ b/model-optimizer/extensions/ops/SquaredDifference.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 import networkx as nx
 
 from mo.front.common.partial_infer.eltwise import eltwise_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -24,9 +25,11 @@ class SquaredDifference(Op):
     op = 'SquaredDifference'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,  # IE layer type, not required if this op won't be dumped to IE
             'op': __class__.op,  # internal MO name for the operation, can be the same as type; required
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'infer': lambda node: eltwise_infer(node, lambda a, b: (a - b) ** 2)},
                          attrs)
diff --git a/model-optimizer/extensions/ops/TensorArray.py b/model-optimizer/extensions/ops/TensorArray.py
index 9108e05b9..6fd80f596 100644
--- a/model-optimizer/extensions/ops/TensorArray.py
+++ b/model-optimizer/extensions/ops/TensorArray.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,17 +14,16 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class TensorArray(Op):
     op = "TensorArrayV3"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
diff --git a/model-optimizer/extensions/ops/TensorArrayGather.py b/model-optimizer/extensions/ops/TensorArrayGather.py
index 221c0c98c..ef6a05abf 100644
--- a/model-optimizer/extensions/ops/TensorArrayGather.py
+++ b/model-optimizer/extensions/ops/TensorArrayGather.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.utils import symm_match_shapes
 
@@ -25,7 +25,7 @@ from mo.utils.utils import symm_match_shapes
 class TensorArrayGather(Op):
     op = "TensorArrayGatherV3"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
diff --git a/model-optimizer/extensions/ops/TensorArrayRead.py b/model-optimizer/extensions/ops/TensorArrayRead.py
index 2b35159ad..6184e45cd 100644
--- a/model-optimizer/extensions/ops/TensorArrayRead.py
+++ b/model-optimizer/extensions/ops/TensorArrayRead.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,14 +17,14 @@
 import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class TensorArrayReader(Op):
     op = "TensorArrayReadV3"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
diff --git a/model-optimizer/extensions/ops/TensorArrayScatter.py b/model-optimizer/extensions/ops/TensorArrayScatter.py
index cb30e87ec..4f46007f4 100644
--- a/model-optimizer/extensions/ops/TensorArrayScatter.py
+++ b/model-optimizer/extensions/ops/TensorArrayScatter.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,9 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.utils import match_shapes
 
@@ -25,7 +24,7 @@ from mo.utils.utils import match_shapes
 class TensorArrayScatter(Op):
     op = "TensorArrayScatterV3"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
diff --git a/model-optimizer/extensions/ops/TensorArraySize.py b/model-optimizer/extensions/ops/TensorArraySize.py
index a16a06adc..b5feac813 100644
--- a/model-optimizer/extensions/ops/TensorArraySize.py
+++ b/model-optimizer/extensions/ops/TensorArraySize.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,17 +14,16 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class TensorArraySize(Op):
     op = "TensorArraySizeV3"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
diff --git a/model-optimizer/extensions/ops/TensorArrayWrite.py b/model-optimizer/extensions/ops/TensorArrayWrite.py
index 43304604d..d9ace7395 100644
--- a/model-optimizer/extensions/ops/TensorArrayWrite.py
+++ b/model-optimizer/extensions/ops/TensorArrayWrite.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,9 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.utils import match_shapes
 
@@ -25,7 +24,7 @@ from mo.utils.utils import match_shapes
 class TensorArrayWriter(Op):
     op = "TensorArrayWriteV3"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
diff --git a/model-optimizer/extensions/ops/TensorIterator_ops.py b/model-optimizer/extensions/ops/TensorIterator_ops.py
index 8e408b888..bac24af0a 100644
--- a/model-optimizer/extensions/ops/TensorIterator_ops.py
+++ b/model-optimizer/extensions/ops/TensorIterator_ops.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,15 +16,16 @@
 
 
 import networkx as nx
-import numpy as np
-from mo.graph.graph import Node
+
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
+
 # TODO: check all supported attributes in this file
 class TensorIteratorInput(Op):
     op = "TensorIteratorInput"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
             'axis': None,
@@ -32,6 +33,8 @@ class TensorIteratorInput(Op):
             'end': None,
             'stride': None,
             'part_size': None,
+            'in_ports_count': 3,
+            'out_ports_count': 1,
             'infer': TensorIteratorInput.input_infer,
         }
         super().__init__(graph, mandatory_props, attrs)
@@ -47,7 +50,7 @@ class TensorIteratorInput(Op):
 class TensorIteratorOutput(Op):
     op = "TensorIteratorOutput"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
             'axis': None,
@@ -55,6 +58,8 @@ class TensorIteratorOutput(Op):
             'end': None,
             'stride': None,
             'part_size': None,
+            'in_ports_count': 3,
+            'out_ports_count': 1,
             'infer': TensorIteratorOutput.input_infer,
         }
         super().__init__(graph, mandatory_props, attrs)
@@ -70,16 +75,15 @@ class TensorIteratorOutput(Op):
 class TensorIteratorCondition(Op):
     op = "TensorIteratorCondition"
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
+            'in_ports_count': 2,
+            'out_ports_count': 2,
             'infer': TensorIteratorCondition.input_infer,
         }
         super().__init__(graph, mandatory_props, attrs)
 
-    def supported_attrs(self):
-        return ['time', 'iter']
-
     @staticmethod
     def input_infer(node: Node):
         pass
@@ -88,17 +92,15 @@ class TensorIteratorCondition(Op):
 class TensorIteratorBackEdge(Op):
     op = 'TensorIteratorBackEdge'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
+            'in_ports_count': 3,
+            'out_ports_count': 1,
             'infer': TensorIteratorBackEdge.input_infer,
         }
         super().__init__(graph, mandatory_props, attrs)
 
     @staticmethod
-    def supported_attrs():
-        return ['is_output']
-
-    @staticmethod
     def input_infer(node: Node):
         pass
diff --git a/model-optimizer/extensions/ops/accum.py b/model-optimizer/extensions/ops/accum.py
index b361c018c..04446ab38 100644
--- a/model-optimizer/extensions/ops/accum.py
+++ b/model-optimizer/extensions/ops/accum.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,17 +14,16 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class AccumOp(Op):
     op = 'Accum'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
@@ -32,6 +31,7 @@ class AccumOp(Op):
             'top_width': 0,
             'size_divisible_by': 0,
             'have_reference': 0,
+            'out_ports_count': 1,
             'infer': AccumOp.accum_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/accum_test.py b/model-optimizer/extensions/ops/accum_test.py
index b2762f316..d949b599b 100644
--- a/model-optimizer/extensions/ops/accum_test.py
+++ b/model-optimizer/extensions/ops/accum_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -24,20 +24,26 @@ from mo.utils.unittest.graph import build_graph
 
 wrong_attrs_graph = {'node_1': {'type': 'Identity', 'kind': 'op'},
                      'accum': {'type': 'Accum', 'kind': 'op'},
-                     'node_3': {'type': 'Identity', 'kind': 'op'}}
+                     'node_3': {'type': 'Identity', 'kind': 'op'},
+                     'op_output': { 'kind': 'op', 'op': 'OpOutput'}
+                     }
 
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'node_2': {'type': 'Identity', 'kind': 'op'},
                     'accum': {'type': 'Accum', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
+                    }
 
 
 class TestAccumOp(unittest.TestCase):
     def test_accum_infer_assertion(self):
         graph = build_graph(wrong_attrs_graph,
                             [('node_1', 'accum'),
-                             ('accum', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('accum', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'accum': {
                                  'top_height': 0,
@@ -54,8 +60,10 @@ class TestAccumOp(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'accum'),
                              ('node_2', 'accum'),
-                             ('accum', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('accum', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'node_2': {'shape': np.array([1, 3, 227, 227])},
                              'accum': {
@@ -77,8 +85,10 @@ class TestAccumOp(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'accum'),
                              ('node_2', 'accum'),
-                             ('accum', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('accum', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'node_2': {'shape': np.array([1, 3, 227, 227])},
                              'accum': {
@@ -100,8 +110,10 @@ class TestAccumOp(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'accum'),
                              ('node_2', 'accum'),
-                             ('accum', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('accum', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'node_2': {'shape': np.array([1, 3, 227, 227])},
                              'accum': {
diff --git a/model-optimizer/extensions/ops/argmax.py b/model-optimizer/extensions/ops/argmax.py
index 41435cc37..73cd95598 100644
--- a/model-optimizer/extensions/ops/argmax.py
+++ b/model-optimizer/extensions/ops/argmax.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,22 +15,23 @@
 """
 
 import logging as log
-import networkx as nx
 import numpy as np
 
 from mo.front.caffe.extractors.utils import get_canonical_axis_index
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op, PermuteAttrs
 
 
 class ArgMaxOp(Op):
     op = 'ArgMax'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
-            'infer': ArgMaxOp.argmax_infer
+            'infer': ArgMaxOp.argmax_infer,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
         }
         super().__init__(graph, mandatory_props, attrs)
 
diff --git a/model-optimizer/extensions/ops/argmax_test.py b/model-optimizer/extensions/ops/argmax_test.py
index 14edf5e5c..105441e6c 100644
--- a/model-optimizer/extensions/ops/argmax_test.py
+++ b/model-optimizer/extensions/ops/argmax_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -24,7 +24,8 @@ from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'argmax': {'type': 'ArgMax', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -32,8 +33,10 @@ class TestArgMaxOp(unittest.TestCase):
     def test_caffe_argmax_axis(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'argmax'),
-                             ('argmax', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('argmax', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 1025, 2049])},
                              'argmax': {
                                  'out_max_val': True,
@@ -52,8 +55,10 @@ class TestArgMaxOp(unittest.TestCase):
     def test_caffe_argmax_axis_negative(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'argmax'),
-                             ('argmax', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('argmax', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 1025, 2049])},
                              'argmax': {
                                  'out_max_val': True,
@@ -73,8 +78,10 @@ class TestArgMaxOp(unittest.TestCase):
     def test_caffe_argmax_no_axis(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'argmax'),
-                             ('argmax', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('argmax', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 1025, 2049])},
                              'argmax': {
                                  'out_max_val': True,
@@ -92,8 +99,10 @@ class TestArgMaxOp(unittest.TestCase):
     def test_caffe_argmax_extend_shape(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'argmax'),
-                             ('argmax', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('argmax', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3])},
                              'argmax': {
                                  'out_max_val': True,
@@ -111,8 +120,10 @@ class TestArgMaxOp(unittest.TestCase):
     def test_caffe_argmax_out_max_val_false(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'argmax'),
-                             ('argmax', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('argmax', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3])},
                              'argmax': {
                                  'out_max_val': False,
@@ -130,8 +141,10 @@ class TestArgMaxOp(unittest.TestCase):
     def test_caffe_argmax_no_shape(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'argmax'),
-                             ('argmax', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('argmax', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': None},
                              'argmax': {
                                  'out_max_val': False,
diff --git a/model-optimizer/extensions/ops/assert_op.py b/model-optimizer/extensions/ops/assert_op.py
index 249f8fb51..f79808e8c 100644
--- a/model-optimizer/extensions/ops/assert_op.py
+++ b/model-optimizer/extensions/ops/assert_op.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,7 @@
  limitations under the License.
 """
 
-import networkx as nx
-
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.error import Error
 
@@ -24,7 +22,7 @@ from mo.utils.error import Error
 class Assert(Op):
     op = 'Assert'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
             'infer': Assert.assert_infer,
diff --git a/model-optimizer/extensions/ops/assert_test.py b/model-optimizer/extensions/ops/assert_test.py
index 37417d5d6..9d83df794 100644
--- a/model-optimizer/extensions/ops/assert_test.py
+++ b/model-optimizer/extensions/ops/assert_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/ops/axpy.py b/model-optimizer/extensions/ops/axpy.py
index 26e15cd51..6534ed4f6 100644
--- a/model-optimizer/extensions/ops/axpy.py
+++ b/model-optimizer/extensions/ops/axpy.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  limitations under the License.
 """
 
-import networkx as nx
-
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -26,7 +25,7 @@ class AxpyOp(Op):
     op = 'Axpy'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,
             'op': __class__.op,
diff --git a/model-optimizer/extensions/ops/binarization.py b/model-optimizer/extensions/ops/binarization.py
new file mode 100644
index 000000000..ab2c0e371
--- /dev/null
+++ b/model-optimizer/extensions/ops/binarization.py
@@ -0,0 +1,32 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+from mo.graph.graph import Graph
+from mo.ops.op import Op
+
+
+class Binarization(Op):
+    op = 'Binarization'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'op': __class__.op,
+            'infer': None,
+            'dst_type': None,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
+        }
+        super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/bn.py b/model-optimizer/extensions/ops/bn.py
index 69f7bf1b2..4b7cd86dc 100644
--- a/model-optimizer/extensions/ops/bn.py
+++ b/model-optimizer/extensions/ops/bn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  limitations under the License.
 """
 
-import networkx as nx
-
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -26,9 +25,11 @@ class BNOp(Op):
     op = 'BN'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 5,
+            'out_ports_count': 1,
             'infer': None
         }, attrs)
diff --git a/model-optimizer/extensions/ops/constant_fill.py b/model-optimizer/extensions/ops/constant_fill.py
index 0a51160e3..1f9655f83 100644
--- a/model-optimizer/extensions/ops/constant_fill.py
+++ b/model-optimizer/extensions/ops/constant_fill.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,29 +14,27 @@
  limitations under the License.
 """
 
-import logging as log
-
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
-from mo.utils.utils import refer_to_faq_msg
 
 
 class ConstantFill(Op):
-    ''' Constant blob generation by broadcasting specified value to a given shape.
+    """ Constant blob generation by broadcasting specified value to a given shape.
 
         It is assumed that there is no equivalent of this op in IE,
         so it is usually relevant to constant folding.
-    '''
+    """
     op = 'ConstantFill'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
-            'type': None, # do not set type as there is no IE equivalent
+            'type': __class__.op,
             'op': __class__.op,
             'input_as_shape': 1,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': __class__.infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/correlation.py b/model-optimizer/extensions/ops/correlation.py
index b61ed48d1..715830f49 100644
--- a/model-optimizer/extensions/ops/correlation.py
+++ b/model-optimizer/extensions/ops/correlation.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,22 +16,23 @@
 
 from math import ceil
 
-import networkx as nx
 # Concat infer : N - number of inputs to concat
 #                axis - dimension number for tensors concatenation
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class CorrelationOp(Op):
     op = 'Correlation'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': CorrelationOp.corr_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/correlation_test.py b/model-optimizer/extensions/ops/correlation_test.py
index a47aec206..0ec121d0b 100644
--- a/model-optimizer/extensions/ops/correlation_test.py
+++ b/model-optimizer/extensions/ops/correlation_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'node_2': {'type': 'Identity', 'kind': 'op'},
                     'corr': {'type': 'Correlation', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -35,9 +36,11 @@ class TestConcatPartialInfer(unittest.TestCase):
                             [
                                 ('node_1', 'corr'),
                                 ('node_2', 'corr'),
-                                ('corr', 'node_3')],
+                                ('corr', 'node_3'),
+                                ('node_3', 'op_output')
+                            ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([1, 3, 227, 227])},
                                 'node_2': {'shape': np.array([1, 3, 227, 227])},
                                 'corr': {'pad': 20,
diff --git a/model-optimizer/extensions/ops/ctc_greedy_decoder.py b/model-optimizer/extensions/ops/ctc_greedy_decoder.py
index 1d032cc7e..fb6dabdfb 100644
--- a/model-optimizer/extensions/ops/ctc_greedy_decoder.py
+++ b/model-optimizer/extensions/ops/ctc_greedy_decoder.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,20 +14,21 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class CTCGreedyDecoderOp(Op):
     op = 'CTCGreedyDecoder'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'infer': CTCGreedyDecoderOp.ctc_greedy_decoder_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/ctc_greedy_decoder_test.py b/model-optimizer/extensions/ops/ctc_greedy_decoder_test.py
index b5a921782..40e3794df 100644
--- a/model-optimizer/extensions/ops/ctc_greedy_decoder_test.py
+++ b/model-optimizer/extensions/ops/ctc_greedy_decoder_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'node_2': {'type': 'Identity', 'kind': 'op'},
                     'ctc': {'type': 'CTCGreedyDecoder', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'},
                     }
 
 
@@ -35,9 +36,11 @@ class TestConcatPartialInfer(unittest.TestCase):
                             [
                                 ('node_1', 'ctc'),
                                 ('node_2', 'ctc'),
-                                ('ctc', 'node_3')],
+                                ('ctc', 'node_3'),
+                                ('node_3', 'op_output')
+                            ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([88, 2, 71])},
                                 'node_2': {'shape': np.array([88, 2])},
                                 'ctc': {'ctc_merge_repeated': 1}
diff --git a/model-optimizer/extensions/ops/data_augmentation.py b/model-optimizer/extensions/ops/data_augmentation.py
index c49ff92b1..46d99bfef 100644
--- a/model-optimizer/extensions/ops/data_augmentation.py
+++ b/model-optimizer/extensions/ops/data_augmentation.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,17 +20,19 @@ import copy
 
 import networkx as nx
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class DataAugmentationOp(Op):
     op = 'DataAugmentation'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': DataAugmentationOp.data_augmentation_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/data_augmentation_test.py b/model-optimizer/extensions/ops/data_augmentation_test.py
index d8b30e3e3..6d570a86f 100644
--- a/model-optimizer/extensions/ops/data_augmentation_test.py
+++ b/model-optimizer/extensions/ops/data_augmentation_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {
     'node_1': {'type': 'Identity', 'kind': 'op'},
     'da': {'type': 'DataAugmentation', 'kind': 'op'},
-    'node_3': {'type': 'Identity', 'kind': 'op'}
+    'node_3': {'type': 'Identity', 'kind': 'op'},
+    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
 }
 
 
@@ -34,9 +35,11 @@ class TestConcatPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [
                                 ('node_1', 'da'),
-                                ('da', 'node_3')],
+                                ('da', 'node_3'),
+                                ('node_3', 'op_output')
+                            ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([1, 3, 227, 227])},
                                 'da': {'crop_width': 225,
                                        'crop_height': 225,
diff --git a/model-optimizer/extensions/ops/depth_to_space.py b/model-optimizer/extensions/ops/depth_to_space.py
index 0e75495d1..5de83b2ea 100644
--- a/model-optimizer/extensions/ops/depth_to_space.py
+++ b/model-optimizer/extensions/ops/depth_to_space.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,19 +16,21 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class DepthToSpaceOp(Op):
     op = 'DepthToSpace'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': DepthToSpaceOp.depth_to_space_infer
         }
         super().__init__(graph, mandatory_props, attrs)
@@ -50,4 +52,4 @@ class DepthToSpaceOp(Op):
         out_shape = [N, int(H * block_size), int(W * block_size), int(C / (block_size ** 2))]
         if np.prod(in_shape) != np.prod(out_shape):
             return
-        node.out_node().shape = out_shape
+        node.out_node().shape = int64_array(out_shape)
diff --git a/model-optimizer/extensions/ops/depth_to_space_test.py b/model-optimizer/extensions/ops/depth_to_space_test.py
index 26b3c4ec3..b0a0b68e8 100644
--- a/model-optimizer/extensions/ops/depth_to_space_test.py
+++ b/model-optimizer/extensions/ops/depth_to_space_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/ops/detectionoutput_onnx.py b/model-optimizer/extensions/ops/detectionoutput_onnx.py
new file mode 100644
index 000000000..8566e8a25
--- /dev/null
+++ b/model-optimizer/extensions/ops/detectionoutput_onnx.py
@@ -0,0 +1,59 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.ops.op import Op
+
+
+class ExperimentalDetectronDetectionOutput(Op):
+    op = 'ExperimentalDetectronDetectionOutput'
+    enabled = True
+
+    def __init__(self, graph, attrs):
+        mandatory_props = dict(
+            type=__class__.op,
+            op=__class__.op,
+            infer=__class__.infer
+        )
+
+        super().__init__(graph, mandatory_props, attrs)
+
+    def backend_attrs(self):
+        return [
+            'class_agnostic_box_regression',
+            'max_detections_per_image',
+            'nms_threshold',
+            'num_classes',
+            'post_nms_count',
+            'score_threshold',
+            'max_delta_log_wh',
+            ('deltas_weights', lambda node: ','.join(map(str, node['deltas_weights'])))]
+
+    @staticmethod
+    def infer(node):
+        rois_num = node.max_detections_per_image
+        # boxes
+        node.out_node(0).shape = np.array([rois_num, 4], dtype=np.int64)
+        try:
+            # classes
+            node.out_node(1).shape = np.array([rois_num], dtype=np.int64)
+            # scores
+            node.out_node(2).shape = np.array([rois_num], dtype=np.int64)
+            # batch_ids
+            node.out_node(3).shape = np.array([rois_num], dtype=np.int64)
+        except Exception as ex:
+            print(ex)
diff --git a/model-optimizer/extensions/ops/exp.py b/model-optimizer/extensions/ops/exp.py
new file mode 100644
index 000000000..8130c1f86
--- /dev/null
+++ b/model-optimizer/extensions/ops/exp.py
@@ -0,0 +1,47 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+import networkx as nx
+import numpy as np
+
+from mo.front.caffe.extractors.utils import get_canonical_axis_index
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op, PermuteAttrs
+
+
+class ExpOp(Op):
+    op = 'Exp'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': __class__.op,
+            'op': __class__.op,
+            'infer': __class__.infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    @staticmethod
+    def infer(node: Node):
+        assert len(node.in_nodes()) == 1
+        assert len(node.out_nodes()) == 1
+        input_node = node.in_node()
+        assert input_node.has_valid('shape')
+        node.out_node().shape = input_node.shape.copy()
+        if input_node.has_valid('value'):
+            node.out_node().value = np.exp(input_node.value)
diff --git a/model-optimizer/extensions/ops/exp_test.py b/model-optimizer/extensions/ops/exp_test.py
new file mode 100644
index 000000000..882f9e800
--- /dev/null
+++ b/model-optimizer/extensions/ops/exp_test.py
@@ -0,0 +1,76 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.ops.exp import ExpOp
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes = {'node_1': {'op': 'Identity', 'kind': 'op'},
+                    'exp': {'op': 'Exp', 'kind': 'op'},
+                    'node_3': {'op': 'Identity', 'kind': 'op'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput'}
+                    }
+
+
+class TestExpOp(unittest.TestCase):
+    def test_shape_only(self):
+        graph = build_graph(nodes_attributes,
+                            [('node_1', 'exp'),
+                             ('exp', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
+                             'node_1': {'shape': np.array([1, 3, 10, 20])},
+                             })
+
+        exp_node = Node(graph, 'exp')
+        ExpOp.infer(exp_node)
+        exp_shape = np.array([1, 3, 10, 20])
+        res_shape = graph.node['node_3']['shape']
+        for i in range(0, len(exp_shape)):
+            self.assertEqual(exp_shape[i], res_shape[i])
+
+    def test_shape_and_value(self):
+        graph = build_graph(nodes_attributes,
+                            [('node_1', 'exp'),
+                             ('exp', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {
+                                'node_3': {
+                                    'shape': None,
+                                    'value': None,
+                                },
+                                'node_1': {
+                                    'shape': np.array([2]),
+                                    'value': np.array([0, 1], dtype=np.float32),
+                                },
+                            })
+
+        exp_node = Node(graph, 'exp')
+        ExpOp.infer(exp_node)
+        exp_shape = np.array([2])
+        exp_value = np.array([1, 2.7182818], dtype=np.float32)
+        res_shape = graph.node['node_3']['shape']
+        res_value = graph.node['node_3']['value']
+        for i in range(0, len(exp_shape)):
+            self.assertEqual(exp_shape[i], res_shape[i])
+        for i in range(0, len(exp_value)):
+            self.assertAlmostEqual(exp_value[i], res_value[i], places=6)
diff --git a/model-optimizer/extensions/ops/gather.py b/model-optimizer/extensions/ops/gather.py
index 255fd1f56..210633d40 100644
--- a/model-optimizer/extensions/ops/gather.py
+++ b/model-optimizer/extensions/ops/gather.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,22 +14,26 @@
  limitations under the License.
 """
 
+import logging as log
+
 import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class Gather(Op):
     op = 'Gather'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
             'axis': 0,
-            'infer': __class__.infer
+            'in_ports_count': 3,
+            'out_ports_count': 1,
+            'infer': __class__.infer,
         }
         super().__init__(graph, mandatory_props, attrs)
 
@@ -62,6 +66,6 @@ class Gather(Op):
 
         shape = np.concatenate((data.shape[:axis], indices.shape))
         if axis < len(data.shape) - 1:
-            shape = np.concatenate((shape, data.shape[axis+1:]))
+            shape = np.concatenate((shape, data.shape[axis + 1:]))
 
         node.out_node(0).shape = np.array(shape, dtype=np.int64)
diff --git a/model-optimizer/extensions/ops/gather_test.py b/model-optimizer/extensions/ops/gather_test.py
index 4f749f78d..1a6c5ce85 100644
--- a/model-optimizer/extensions/ops/gather_test.py
+++ b/model-optimizer/extensions/ops/gather_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/ops/grn.py b/model-optimizer/extensions/ops/grn.py
index 3a8df99af..33d3c6405 100644
--- a/model-optimizer/extensions/ops/grn.py
+++ b/model-optimizer/extensions/ops/grn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,16 +17,19 @@
 import networkx as nx
 
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
 class GRNOp(Op):
     op = 'GRN'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': copy_shape_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/grn_test.py b/model-optimizer/extensions/ops/grn_test.py
index 351023fef..6781dea89 100644
--- a/model-optimizer/extensions/ops/grn_test.py
+++ b/model-optimizer/extensions/ops/grn_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -24,15 +24,19 @@ from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'grn': {'type': 'GRN', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput'},
+                    }
 
 
 class TestGRNOp(unittest.TestCase):
     def test_grn_infer(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'grn'),
-                             ('grn', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('grn', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'grn': {'bias': 1}
                              })
diff --git a/model-optimizer/extensions/ops/identity.py b/model-optimizer/extensions/ops/identity.py
index 30995a150..dbc27b7cb 100644
--- a/model-optimizer/extensions/ops/identity.py
+++ b/model-optimizer/extensions/ops/identity.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,27 +13,24 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-import networkx as nx
-
-from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.front.common.partial_infer.elemental import copy_shape_infer, copy_value
+from mo.graph.graph import Graph
 from mo.ops.op import Op
-from mo.front.common.partial_infer.utils import mark_input_bins
 
 
 class IdentityOp(Op):
     op = 'Identity'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
-            'type': __class__.op,
             'op': __class__.op,
             'identity': True,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': IdentityOp.shape_infer
         }, attrs)
 
     @staticmethod
     def shape_infer(node):
-        copy_shape_infer(node)
-
+        copy_shape_infer(node, value_infer=copy_value)
diff --git a/model-optimizer/extensions/ops/instance_normalization.py b/model-optimizer/extensions/ops/instance_normalization.py
index b1c9b3713..9e2deb7d0 100644
--- a/model-optimizer/extensions/ops/instance_normalization.py
+++ b/model-optimizer/extensions/ops/instance_normalization.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 import networkx as nx
 
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -29,7 +30,7 @@ class InstanceNormalization(Op):
     op = 'InstanceNormalization'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'op': __class__.op,
             'epsilon': None,
diff --git a/model-optimizer/extensions/ops/instance_normalization_test.py b/model-optimizer/extensions/ops/instance_normalization_test.py
index e106f47cb..5318f2fcc 100644
--- a/model-optimizer/extensions/ops/instance_normalization_test.py
+++ b/model-optimizer/extensions/ops/instance_normalization_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,12 +17,12 @@
 import unittest
 
 import networkx as nx
-
+from mo.graph.graph import Graph
 from extensions.ops.instance_normalization import InstanceNormalization
 
 
 class InstanceNormalizationOp(unittest.TestCase):
     def test_constructor_supported_attrs(self):
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         op = InstanceNormalization(graph, attrs={'epsilon': 0.1})
         self.assertEqual(op.supported_attrs(), ['epsilon'])
diff --git a/model-optimizer/extensions/ops/interp.py b/model-optimizer/extensions/ops/interp.py
index 8768582b2..b7eefc751 100644
--- a/model-optimizer/extensions/ops/interp.py
+++ b/model-optimizer/extensions/ops/interp.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,11 +17,9 @@
 import inspect
 import logging as log
 
-import networkx as nx
-
 from extensions.ops.resize_factor_utils import factor_update
 from mo.front.common.layout import get_batch_dim, get_features_dim, get_height_dim, get_width_dim, shape_for_layout
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.utils import refer_to_faq_msg
 
@@ -29,13 +27,15 @@ from mo.utils.utils import refer_to_faq_msg
 class InterpOp(Op):
     op = 'Interp'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
             'factor': None,
             'align_corners': 1,
             'parse_2nd_input': 'value',
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'infer': InterpOp.interp_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/interp_test.py b/model-optimizer/extensions/ops/interp_test.py
index cf2bbc928..b2670d26c 100644
--- a/model-optimizer/extensions/ops/interp_test.py
+++ b/model-optimizer/extensions/ops/interp_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'node_2': {'type': 'Identity', 'value': None, 'kind': 'data'},
                     'interp': {'type': 'Interp', 'kind': 'op', 'factor': None, 'parse_2nd_input': 'value'},
-                    'node_3': {'type': 'Identity', 'shape': None, 'value': None, 'kind': 'data'}
+                    'node_3': {'type': 'Identity', 'shape': None, 'value': None, 'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -33,8 +34,10 @@ class TestInterpOp(unittest.TestCase):
     def test_caffe_interp_infer_shrink(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'interp'),
-                             ('interp', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('interp', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 1025, 2049])},
                              'interp': {'shrink_factor': 2,
                                         'height': 0,
@@ -55,8 +58,10 @@ class TestInterpOp(unittest.TestCase):
     def test_caffe_interp_infer_wh(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'interp'),
-                             ('interp', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('interp', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 1024, 1, 1])},
                              'interp': {'width': 65,
                                         'height': 33,
@@ -77,8 +82,10 @@ class TestInterpOp(unittest.TestCase):
     def test_caffe_interp_infer_zoom(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'interp'),
-                             ('interp', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('interp', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 256, 33, 65])},
                              'interp': {'zoom_factor': 2,
                                         'height': 0,
@@ -99,8 +106,10 @@ class TestInterpOp(unittest.TestCase):
     def test_caffe_interp_infer_zoom_shrink(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'interp'),
-                             ('interp', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('interp', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 256, 33, 65])},
                              'interp': {'zoom_factor': 2,
                                         'height': 0,
@@ -121,8 +130,10 @@ class TestInterpOp(unittest.TestCase):
     def test_caffe_interp_infer_zoom_shrink_error(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'interp'),
-                             ('interp', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('interp', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 256, 33, 65])},
                              'interp': {'zoom_factor': 0,
                                         'height': 0,
@@ -140,8 +151,10 @@ class TestInterpOp(unittest.TestCase):
     def test_caffe_interp_infer_zoom_default(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'interp'),
-                             ('interp', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('interp', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 256, 33, 65])},
                              'interp': {'zoom_factor': 1,
                                         'height': 0,
@@ -164,8 +177,10 @@ class TestInterpOp(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'interp'),
                              ('node_2', 'interp'),
-                             ('interp', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('interp', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 256, 33, 66])},
                              'node_2': {'shape': np.array([1, 1, 3, 6])},
                              'interp': {'zoom_factor': 1,
diff --git a/model-optimizer/extensions/ops/lstm_cell.py b/model-optimizer/extensions/ops/lstm_cell.py
index 1d1c54530..75811c4e6 100644
--- a/model-optimizer/extensions/ops/lstm_cell.py
+++ b/model-optimizer/extensions/ops/lstm_cell.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 
 from mo.front.common.partial_infer.utils import mark_input_bins
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.error import Error
 
@@ -40,17 +40,32 @@ class LSTMCell(Op):
     '''
     op = 'LSTMCell'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
-            'infer': __class__.infer
+            'infer': __class__.infer,
+            'in_ports_count': 5,
+            'out_ports_count': 2,
         }
         super().__init__(graph, mandatory_props, attrs)
 
     def supported_attrs(self):
         return [
             'hidden_size',  # number of the elements in hidden cell size
+            'activations',
+            'activation_alpha',
+            'activation_beta',
+            'clip',
+        ]
+
+    def backend_attrs(self):
+        return [
+            'hidden_size',  # number of the elements in hidden cell size
+            ('activations', lambda node: ','.join(node.activations) if node.activations is not None else None),
+            'activation_alpha',
+            'activation_beta',
+            'clip',
         ]
 
     @staticmethod
diff --git a/model-optimizer/extensions/ops/lstm_sequence.py b/model-optimizer/extensions/ops/lstm_sequence.py
index 0f3c63b5b..ad590bbdd 100644
--- a/model-optimizer/extensions/ops/lstm_sequence.py
+++ b/model-optimizer/extensions/ops/lstm_sequence.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,15 +14,11 @@
  limitations under the License.
 """
 
-import logging as log
-
-import networkx as nx
 import numpy as np
 
 from mo.front.common.partial_infer.utils import mark_input_bins
-from mo.graph.graph import Node
+from mo.graph.graph import Node, add_opoutput, Graph
 from mo.ops.op import Op
-from mo.utils.utils import refer_to_faq_msg
 
 
 class LSTMSequence(Op):
@@ -46,14 +42,19 @@ class LSTMSequence(Op):
     """
     op = 'LSTMSequence'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': '__LSTMSequence',   # should be never emitted to IR; for debugging purposes
             'op': __class__.op,
             'blobs_wrb': False,
             'has_num_directions': False,
             'direction': 'forward',
-            'infer': __class__.infer
+            'num_layers': 1,
+            'infer': __class__.infer,
+            'blob_bidirectional_split': lambda node: (
+                LSTMSequence.split_helper(node, 0, 'forward'),
+                LSTMSequence.split_helper(node, 1, 'reverse')
+            )
         }
         super().__init__(graph, mandatory_props, attrs)
 
@@ -74,13 +75,21 @@ class LSTMSequence(Op):
         ]
 
     @staticmethod
+    def split_helper(node, index: int, direction: str):
+        return Op._create_data_node(
+            node.graph,
+            name=node.name + '/SplittedBiLSTM/{}/'.format(direction),
+            attrs={'value': node.value[index], 'shape': np.array(node.value[index].shape, dtype=np.int64)}
+        )
+
+    @staticmethod
     def infer(node: Node):
         # there are limitations coming from ONNX LSTM definition and normalization rules
         assert len(node.in_nodes()) >= 3  # X, W and R
         assert len(node.in_nodes()) <= 7
         assert len(node.out_nodes()) <= 3
         assert node.batch_dim <= 1
-        assert node.sequence_dim <=1
+        assert node.sequence_dim <= 1
         assert node.batch_dim != node.sequence_dim
 
         assert node.direction in ['forward', 'reverse', 'bidirectional']
@@ -91,11 +100,21 @@ class LSTMSequence(Op):
             mark_input_bins(node)
         input_shape = node.in_node(0).shape
         assert len(input_shape) == 3
+
+        for port in [2, 3]:
+            if port in node.in_nodes() and len(node.in_node(port).in_nodes()) > 0 and \
+               'zero_shapes' in node.in_node(port).in_node():
+                for i in node.in_node(port).in_node().zero_shapes:
+                    if node.in_node(port).shape[i] != input_shape[i]:
+                        node.in_node(port).value = np.repeat(node.in_node(port).value, input_shape[i], axis=i)
+                        node.in_node(port).shape[i] = input_shape[i]
+
         out_shape = np.array([input_shape[node.sequence_dim], input_shape[node.batch_dim], node.hidden_size], dtype=np.int64)
         assert not node.has_num_directions or node.sequence_dim == 0, \
             'If has_num_directions == True, then node.sequence_dim should be equal 0, but it is {}'.format(
                 node.sequence_dim)
         num_directions = 2 if node.direction in ['bidirectional'] else 1
+        num_layers = node.num_layers
         if node.has_num_directions:
             # insert extra dimension to output shape for num_directions
             out_shape = np.insert(out_shape, 1, np.int64(num_directions))
@@ -103,15 +122,16 @@ class LSTMSequence(Op):
         # extra outputs for hidden/cell states
         state_size = np.array([input_shape[1], node.hidden_size], dtype=np.int64)
         if node.has_num_directions:
-            state_size = np.insert(state_size, 0, num_directions)
+            state_size = np.insert(state_size, 0, num_directions*num_layers)
         for i in [1,2]:
             if i not in node.out_nodes():
                 data_node = Op._create_data_node(
                     node.graph,
                     name=node.node+'/ExtraOutput/' + str(i),
-                    attrs={'is_output': True, 'executable': None}
+                    attrs={'executable': True}
                 )
                 node.graph.add_edge(node.id, data_node.id, key=0, out=i)
+                add_opoutput(node.graph, data_node.id, 0, False)
             else:
                 data_node = node.out_node(i)
             data_node.shape = state_size.copy()
diff --git a/model-optimizer/extensions/ops/merge.py b/model-optimizer/extensions/ops/merge.py
index 040cbf578..a106c9085 100644
--- a/model-optimizer/extensions/ops/merge.py
+++ b/model-optimizer/extensions/ops/merge.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,17 +14,17 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class Merge(Op):
     op = 'Merge'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
             'infer': __class__.merge_infer
@@ -51,4 +51,4 @@ class Merge(Op):
                 node.out_node().value = tensor.value.copy() if tensor.has_valid('value') else None
 
         tensor = inferred_nodes[0]
-        node.out_node().shape = tensor.shape
+        node.out_node().shape = int64_array(tensor.shape)
diff --git a/model-optimizer/extensions/ops/merge_test.py b/model-optimizer/extensions/ops/merge_test.py
index 755da1ab8..f6ee19dcd 100644
--- a/model-optimizer/extensions/ops/merge_test.py
+++ b/model-optimizer/extensions/ops/merge_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/ops/mvn.py b/model-optimizer/extensions/ops/mvn.py
index a00d93579..bcf65a20b 100644
--- a/model-optimizer/extensions/ops/mvn.py
+++ b/model-optimizer/extensions/ops/mvn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 import networkx as nx
 
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -24,7 +25,7 @@ class MVN(Op):
     op = 'MVN'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'kind': 'op',
             'type': __class__.op,
@@ -32,6 +33,8 @@ class MVN(Op):
             'eps': None,
             'across_channels': 0,
             'normalize_variance': 1,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': copy_shape_infer
         }, attrs)
 
diff --git a/model-optimizer/extensions/ops/normalize.py b/model-optimizer/extensions/ops/normalize.py
index cb6a8445b..c7cad951e 100644
--- a/model-optimizer/extensions/ops/normalize.py
+++ b/model-optimizer/extensions/ops/normalize.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -24,12 +23,14 @@ class NormalizeOp(Op):
     op = 'Normalize'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'kind': 'op',
             'type': __class__.op,
             'op': __class__.op,
             'eps': None,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': copy_shape_infer
         }, attrs)
 
diff --git a/model-optimizer/extensions/ops/normalize_test.py b/model-optimizer/extensions/ops/normalize_test.py
index 8a15fd6db..5a57e5e51 100644
--- a/model-optimizer/extensions/ops/normalize_test.py
+++ b/model-optimizer/extensions/ops/normalize_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -24,7 +24,8 @@ from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'norm': {'type': 'Normalize', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -32,8 +33,10 @@ class TestNormalize(unittest.TestCase):
     def test_region_infer(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'norm'),
-                             ('norm', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('norm', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'norm': {}
                              })
diff --git a/model-optimizer/extensions/ops/pack.py b/model-optimizer/extensions/ops/pack.py
index c6a241db5..705f5bdd4 100644
--- a/model-optimizer/extensions/ops/pack.py
+++ b/model-optimizer/extensions/ops/pack.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,16 +17,17 @@
 import numpy as np
 import networkx as nx
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class PackOp(Op):
     op = 'Pack'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
-            'op': __class__.op
+            'op': __class__.op,
+            'out_ports_count': 1,
         }
         super().__init__(graph, mandatory_props, attrs)
 
diff --git a/model-optimizer/extensions/ops/power_file.py b/model-optimizer/extensions/ops/power_file.py
index 50177f9fb..bfe9aabf9 100644
--- a/model-optimizer/extensions/ops/power_file.py
+++ b/model-optimizer/extensions/ops/power_file.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 import networkx as nx
 
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -24,10 +25,12 @@ class PowerFileOp(Op):
     op = 'PowerFile'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': copy_shape_infer
         }, attrs)
 
diff --git a/model-optimizer/extensions/ops/prediction_heatmap.py b/model-optimizer/extensions/ops/prediction_heatmap.py
index 0db515cc4..35e334b06 100644
--- a/model-optimizer/extensions/ops/prediction_heatmap.py
+++ b/model-optimizer/extensions/ops/prediction_heatmap.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,17 +17,19 @@
 import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class PredictionHeatmapOp(Op):
     op = 'PredictionHeatmap'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': PredictionHeatmapOp.infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/prelu.py b/model-optimizer/extensions/ops/prelu.py
index 2825ae0f0..2aa02df1a 100644
--- a/model-optimizer/extensions/ops/prelu.py
+++ b/model-optimizer/extensions/ops/prelu.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 import networkx as nx
 
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 from mo.front.common.partial_infer.utils import mark_input_bins
 
@@ -25,10 +26,12 @@ class PreluOp(Op):
     op = 'PReLU'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': PreluOp.prelu_shape_infer
         }, attrs)
 
diff --git a/model-optimizer/extensions/ops/priorbox.py b/model-optimizer/extensions/ops/priorbox.py
index e494097d2..1793c6212 100644
--- a/model-optimizer/extensions/ops/priorbox.py
+++ b/model-optimizer/extensions/ops/priorbox.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,19 +14,18 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.layout import get_width_dim, get_height_dim
 from mo.front.extractor import attr_getter
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class PriorBoxOp(Op):
     op = 'PriorBox'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
@@ -34,6 +33,8 @@ class PriorBoxOp(Op):
             'max_size': np.array([]),
             'min_size': np.array([]),
             'aspect_ratio': np.array([]),
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'infer': PriorBoxOp.priorbox_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/priorbox_clustered.py b/model-optimizer/extensions/ops/priorbox_clustered.py
index e1fe98392..f26d905fe 100644
--- a/model-optimizer/extensions/ops/priorbox_clustered.py
+++ b/model-optimizer/extensions/ops/priorbox_clustered.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,17 +19,19 @@ import numpy as np
 
 from mo.front.common.layout import get_width_dim, get_height_dim
 from mo.front.extractor import attr_getter
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class PriorBoxClusteredOp(Op):
     op = 'PriorBoxClustered'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'infer': PriorBoxClusteredOp.priorbox_clustered_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/priorbox_clustered_test.py b/model-optimizer/extensions/ops/priorbox_clustered_test.py
index 849ba7e02..35bb3069c 100644
--- a/model-optimizer/extensions/ops/priorbox_clustered_test.py
+++ b/model-optimizer/extensions/ops/priorbox_clustered_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'type': 'Identity', 'value': None, 'kind': 'data'},
                     'node_2': {'type': 'Identity', 'value': None, 'kind': 'data'},
                     'pbc': {'type': 'PriorBoxClustered', 'value': None, 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'value': None, 'kind': 'data'}
+                    'node_3': {'type': 'Identity', 'value': None, 'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -35,9 +36,11 @@ class TestPriorBoxClusteredPartialInfer(unittest.TestCase):
                             [
                                 ('node_1', 'pbc'),
                                 ('node_2', 'pbc'),
-                                ('pbc', 'node_3')],
+                                ('pbc', 'node_3'),
+                                ('node_3', 'op_output')
+                             ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([1, 384, 19, 19])},
                                 'node_2': {'shape': np.array([1, 3, 300, 300])},
                                 'pbc': {'flip': 0, 'clip': 0, 'variance': [0.1, 0.1, 0.2, 0.2],
@@ -58,9 +61,11 @@ class TestPriorBoxClusteredPartialInfer(unittest.TestCase):
                             [
                                 ('node_1', 'pbc'),
                                 ('node_2', 'pbc'),
-                                ('pbc', 'node_3')],
+                                ('pbc', 'node_3'),
+                                ('node_3', 'op_output')
+                             ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([1, 19, 19, 384])},
                                 'node_2': {'shape': np.array([1, 300, 300, 3])},
                                 'pbc': {'flip': 0, 'clip': 0, 'variance': [0.1, 0.1, 0.2, 0.2],
diff --git a/model-optimizer/extensions/ops/priorbox_test.py b/model-optimizer/extensions/ops/priorbox_test.py
index fbb42a48b..74e7e1a06 100644
--- a/model-optimizer/extensions/ops/priorbox_test.py
+++ b/model-optimizer/extensions/ops/priorbox_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -24,8 +24,9 @@ from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'node_1': {'type': 'Identity', 'value': None, 'kind': 'data'},
                     'pb': {'type': 'PriorBox', 'value': None, 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'value': None, 'kind': 'data'}
-                   }
+                    'node_3': {'type': 'Identity', 'value': None, 'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
+                    }
 
 
 class TestPriorBoxPartialInfer(unittest.TestCase):
@@ -33,9 +34,11 @@ class TestPriorBoxPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [
                                 ('node_1', 'pb'),
-                                ('pb', 'node_3')],
+                                ('pb', 'node_3'),
+                                ('node_3', 'op_output')
+                            ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([1, 384, 19, 19])},
                                 'pb': {
                                     'aspect_ratio': np.array([1]),
@@ -47,7 +50,7 @@ class TestPriorBoxPartialInfer(unittest.TestCase):
         graph.graph['layout'] = 'NCHW'
         pb_node = Node(graph, 'pb')
         PriorBoxOp.priorbox_infer(pb_node)
-        exp_shape = np.array([1, 2, 4*19*19*2])
+        exp_shape = np.array([1, 2, 4 * 19 * 19 * 2])
         res_shape = graph.node['node_3']['shape']
         for i in range(0, len(exp_shape)):
             self.assertEqual(exp_shape[i], res_shape[i])
@@ -56,9 +59,11 @@ class TestPriorBoxPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [
                                 ('node_1', 'pb'),
-                                ('pb', 'node_3')],
+                                ('pb', 'node_3'),
+                                ('node_3', 'op_output')
+                            ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([1, 384, 19, 19])},
                                 'pb': {
                                     'aspect_ratio': np.array([1, 2, 0.5]),
@@ -70,7 +75,7 @@ class TestPriorBoxPartialInfer(unittest.TestCase):
         graph.graph['layout'] = 'NCHW'
         pb_node = Node(graph, 'pb')
         PriorBoxOp.priorbox_infer(pb_node)
-        exp_shape = np.array([1, 2, 4*19*19*4])
+        exp_shape = np.array([1, 2, 4 * 19 * 19 * 4])
         res_shape = graph.node['node_3']['shape']
         for i in range(0, len(exp_shape)):
             self.assertEqual(exp_shape[i], res_shape[i])
@@ -79,9 +84,11 @@ class TestPriorBoxPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [
                                 ('node_1', 'pb'),
-                                ('pb', 'node_3')],
+                                ('pb', 'node_3'),
+                                ('node_3', 'op_output')
+                            ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([1, 19, 19, 384])},
                                 'pb': {
                                     'aspect_ratio': np.array([1]),
@@ -93,7 +100,7 @@ class TestPriorBoxPartialInfer(unittest.TestCase):
         graph.graph['layout'] = 'NHWC'
         pb_node = Node(graph, 'pb')
         PriorBoxOp.priorbox_infer(pb_node)
-        exp_shape = np.array([1, 2, 4*19*19*2])
+        exp_shape = np.array([1, 2, 4 * 19 * 19 * 2])
         res_shape = graph.node['node_3']['shape']
         for i in range(0, len(exp_shape)):
             self.assertEqual(exp_shape[i], res_shape[i])
@@ -102,9 +109,11 @@ class TestPriorBoxPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [
                                 ('node_1', 'pb'),
-                                ('pb', 'node_3')],
+                                ('pb', 'node_3'),
+                                ('node_3', 'op_output')
+                            ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([1, 19, 19, 384])},
                                 'pb': {
                                     'aspect_ratio': np.array([1, 2, 0.5]),
@@ -116,7 +125,7 @@ class TestPriorBoxPartialInfer(unittest.TestCase):
         graph.graph['layout'] = 'NHWC'
         pb_node = Node(graph, 'pb')
         PriorBoxOp.priorbox_infer(pb_node)
-        exp_shape = np.array([1, 2, 4*19*19*4])
+        exp_shape = np.array([1, 2, 4 * 19 * 19 * 4])
         res_shape = graph.node['node_3']['shape']
         for i in range(0, len(exp_shape)):
             self.assertEqual(exp_shape[i], res_shape[i])
diff --git a/model-optimizer/extensions/ops/priorgridgenerator_onnx.py b/model-optimizer/extensions/ops/priorgridgenerator_onnx.py
new file mode 100644
index 000000000..7bfdba8cb
--- /dev/null
+++ b/model-optimizer/extensions/ops/priorgridgenerator_onnx.py
@@ -0,0 +1,52 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.ops.op import Op
+
+
+class ExperimentalDetectronPriorGridGenerator(Op):
+    op = 'ExperimentalDetectronPriorGridGenerator'
+
+    def __init__(self, graph, attrs):
+        mandatory_props = dict(
+            type=__class__.op,
+            op=__class__.op,
+            infer=__class__.infer,
+        )
+        super().__init__(graph, mandatory_props, attrs)
+
+    def backend_attrs(self):
+        return [
+            'flatten',
+            'h',
+            'w',
+            'stride_x',
+            'stride_y',
+        ]
+
+    @staticmethod
+    def infer(node):
+        input_shape = node.in_node(0).shape
+        priors_num = input_shape[0]
+        grid_h = node.in_node(1).shape[2]
+        grid_w = node.in_node(1).shape[3]
+        if node.flatten:
+            out_shape = np.array([grid_h * grid_w * priors_num, 4], dtype=np.int64)
+        else:
+            out_shape = np.array([grid_h, grid_w, priors_num, 4], dtype=np.int64)
+        node.out_node(0).shape = out_shape
diff --git a/model-optimizer/extensions/ops/proposal.py b/model-optimizer/extensions/ops/proposal.py
index 7eba530c8..8b7891b53 100644
--- a/model-optimizer/extensions/ops/proposal.py
+++ b/model-optimizer/extensions/ops/proposal.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,19 +18,21 @@ import networkx as nx
 import numpy as np
 
 from mo.front.extractor import attr_getter
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class ProposalOp(Op):
     op = 'Proposal'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
             'post_nms_topn': 300,  # default in caffe-shared
-            'infer': ProposalOp.proposal_infer
+            'infer': ProposalOp.proposal_infer,
+            'in_ports_count': 3,
+            'out_ports_count': 1,
         }
         super().__init__(graph, mandatory_props, attrs)
 
@@ -59,6 +61,9 @@ class ProposalOp(Op):
             'framework',
             'box_coordinate_scale',
             'box_size_scale',
+            'normalize',
+            'clip_after_nms',
+            'clip_before_nms',
         ]
 
     @staticmethod
diff --git a/model-optimizer/extensions/ops/proposal_onnx.py b/model-optimizer/extensions/ops/proposal_onnx.py
new file mode 100644
index 000000000..56f78f50e
--- /dev/null
+++ b/model-optimizer/extensions/ops/proposal_onnx.py
@@ -0,0 +1,45 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.ops.op import Op
+
+
+class ExperimentalDetectronGenerateProposalsSingleImage(Op):
+    op = 'ExperimentalDetectronGenerateProposalsSingleImage'
+
+    def __init__(self, graph, attrs):
+        mandatory_props = dict(
+            type=__class__.op,
+            op=__class__.op,
+            infer=__class__.infer
+        )
+
+        super().__init__(graph, mandatory_props, attrs)
+
+    def backend_attrs(self):
+        return [
+            'min_size',
+            'nms_threshold',
+            'post_nms_count',
+            'pre_nms_count'
+        ]
+
+    @staticmethod
+    def infer(node):
+        node.out_node(0).shape = np.array([node.post_nms_count, 4], dtype=np.int64)
+        node.out_node(1).shape = np.array([node.post_nms_count], dtype=np.int64)
diff --git a/model-optimizer/extensions/ops/proposal_python_example.py b/model-optimizer/extensions/ops/proposal_python_example.py
index 80c7a5bae..cf9bcaeb9 100644
--- a/model-optimizer/extensions/ops/proposal_python_example.py
+++ b/model-optimizer/extensions/ops/proposal_python_example.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,13 +18,14 @@ import networkx as nx
 
 from extensions.ops.proposal import ProposalOp
 from mo.front.caffe.extractor import register_caffe_python_extractor
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
 class ProposalPythonExampleOp(Op):
     op = 'Proposal'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
diff --git a/model-optimizer/extensions/ops/proposal_test.py b/model-optimizer/extensions/ops/proposal_test.py
index 0298468c8..e16b14777 100644
--- a/model-optimizer/extensions/ops/proposal_test.py
+++ b/model-optimizer/extensions/ops/proposal_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'proposal': {'type': 'proposal', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -33,8 +34,10 @@ class TestProposal(unittest.TestCase):
     def test_proposal_infer(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'proposal'),
-                             ('proposal', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('proposal', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'proposal': {'post_nms_topn': 2, **layout_attrs()}
                              })
diff --git a/model-optimizer/extensions/ops/psroipooling.py b/model-optimizer/extensions/ops/psroipooling.py
index 246e60125..da84db8f8 100644
--- a/model-optimizer/extensions/ops/psroipooling.py
+++ b/model-optimizer/extensions/ops/psroipooling.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,17 +17,20 @@
 import networkx as nx
 
 from mo.front.common.layout import get_batch_dim, shape_for_layout
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class PSROIPoolingOp(Op):
     op = 'PSROIPooling'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'mode': 'average',
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'infer': PSROIPoolingOp.psroipooling_infer
         }
         super().__init__(graph, mandatory_props, attrs)
@@ -36,7 +39,10 @@ class PSROIPoolingOp(Op):
         return [
             'spatial_scale',
             'output_dim',
-            'group_size'
+            'group_size',
+            'mode',
+            'spatial_bins_x',
+            'spatial_bins_y',
         ]
 
     @staticmethod
diff --git a/model-optimizer/extensions/ops/psroipooling_test.py b/model-optimizer/extensions/ops/psroipooling_test.py
index 10cdee1cf..c55bef8ce 100644
--- a/model-optimizer/extensions/ops/psroipooling_test.py
+++ b/model-optimizer/extensions/ops/psroipooling_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'node_2': {'type': 'Identity', 'kind': 'op'},
                     'psroipool': {'type': 'PSROIPooling', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -34,8 +35,10 @@ class TestPSROIPooling(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'psroipool'),
                              ('node_2', 'psroipool'),
-                             ('psroipool', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('psroipool', 'node_3'),
+                             ('node_3', 'op_output')
+                            ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'node_2': {'shape': np.array([100, 5])},
                              'psroipool': {'output_dim': 4, 'group_size': 15}
@@ -52,8 +55,10 @@ class TestPSROIPooling(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'psroipool'),
                              ('node_2', 'psroipool'),
-                             ('psroipool', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('psroipool', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 227, 227, 3])},
                              'node_2': {'shape': np.array([100, 5])},
                              'psroipool': {'output_dim': 4, 'group_size': 15}
@@ -70,8 +75,10 @@ class TestPSROIPooling(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'psroipool'),
                              ('node_2', 'psroipool'),
-                             ('psroipool', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('psroipool', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': None},
                              'node_2': {'shape': np.array([100, 5])},
                              'psroipool': {'output_dim': 4, 'group_size': 224}
diff --git a/model-optimizer/extensions/ops/quantize.py b/model-optimizer/extensions/ops/quantize.py
new file mode 100644
index 000000000..1bd79958f
--- /dev/null
+++ b/model-optimizer/extensions/ops/quantize.py
@@ -0,0 +1,98 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+def broadcastable(broadcast_from, broadcast_to):
+    """Check if shape broadcast_from can be broadcasted to broadcast_to"""
+    broadcast_to = np.array(broadcast_to, dtype=np.int64)
+    broadcast_from = np.array(broadcast_from, dtype=np.int64)
+    if broadcast_from.size > broadcast_to.size:
+        return False
+    broadcast_from = np.concatenate(
+        (np.array([1] * (broadcast_to.size - broadcast_from.size), dtype=np.int64), broadcast_from))
+    return np.all(np.logical_or(broadcast_from == 1, broadcast_from == broadcast_to))
+
+
+class QuantizeOp(Op):
+    op = 'Quantize'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': __class__.op,
+            'op': __class__.op,
+            'levels': None,
+            'infer': __class__.infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    def supported_attrs(self):
+        return [
+            'levels',
+        ]
+
+    @staticmethod
+    def infer(node: Node):
+        assert len(node.in_nodes()) == 5
+        assert len(node.out_nodes()) == 1
+        inputs = [node.in_node(i) for i in range(5)]
+        x, input_low, input_high, output_low, output_high = inputs
+        assert x.has_valid('shape')
+        # TODO Check all input[1..4] shapes are broadcastable to intput[0] shape
+        assert all([broadcastable(inputs[i].shape, inputs[0].shape) for i in range(1, 5)]), \
+            "Not all shapes from Quantize inputs can be broadcasted to input[0] for node {}".format(
+                node.soft_get('name'))
+        node.out_node().shape = x.shape.copy()
+
+        if all([node.in_node(i).has_valid('value') for i in range(5)]):
+            x, input_low, input_high, output_low, output_high = \
+                [np.array(np.broadcast_to(node.value, x.value.shape)) for node in inputs]
+
+            assert node.has_valid('levels')
+            assert isinstance(node.levels, int)
+
+            underflow_mask = x <= input_low
+            overflow_mask = x > input_high
+            # pylint: disable=assignment-from-no-return
+            middle_mask = np.logical_not(np.logical_or(underflow_mask, overflow_mask))
+
+            def middle_part(x, input_low, input_high, output_low, output_high):
+                return np.round(
+                    (x - input_low) / (input_high - input_low) * (node.levels - 1)
+                ) / (node.levels - 1) * (output_high - output_low) + output_low
+
+            output = np.zeros_like(x)
+            # pylint: disable=unsupported-assignment-operation
+            output[middle_mask] = middle_part(
+                x[middle_mask],
+                input_low[middle_mask],
+                input_high[middle_mask],
+                output_low[middle_mask],
+                output_high[middle_mask],
+            )
+
+            # pylint: disable=unsupported-assignment-operation
+            output[overflow_mask] = output_high[overflow_mask]
+            # pylint: disable=unsupported-assignment-operation
+            output[underflow_mask] = output_low[underflow_mask]
+
+            node.out_node().value = output
diff --git a/model-optimizer/extensions/ops/quantize_test.py b/model-optimizer/extensions/ops/quantize_test.py
new file mode 100644
index 000000000..a3fae9771
--- /dev/null
+++ b/model-optimizer/extensions/ops/quantize_test.py
@@ -0,0 +1,135 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import unittest
+
+import numpy as np
+
+from extensions.ops.quantize import QuantizeOp, broadcastable
+from mo.graph.graph import Node
+from mo.utils.unittest.graph import build_graph
+
+
+class TestBroadcastable(unittest.TestCase):
+    def test_matching(self):
+        self.assertTrue(broadcastable([1, 2, 3], [1, 2, 3]))
+
+    def test_incomplete(self):
+        self.assertTrue(broadcastable([1, 1, 1], [1, 2, 3]))
+        self.assertTrue(broadcastable([2, 3], [1, 2, 3]))
+        self.assertTrue(broadcastable([1, 3], [1, 2, 3]))
+        self.assertTrue(broadcastable([1, 1], [1, 2, 3]))
+        self.assertTrue(broadcastable([], [1, 2, 3]))
+        self.assertTrue(broadcastable([1], [1, 2, 3]))
+
+    def test_reverse_incomplete(self):
+        self.assertFalse(broadcastable([1, 2, 3], [1, 1, 1]))
+        self.assertFalse(broadcastable([1, 2, 3], [2, 3]))
+        self.assertFalse(broadcastable([1, 2, 3], [1, 3]))
+        self.assertFalse(broadcastable([1, 2, 3], [1, 1]))
+        self.assertFalse(broadcastable( [1, 2, 3], []))
+        self.assertFalse(broadcastable([1, 2, 3], [1]))
+
+    def test_invalid(self):
+        self.assertFalse(broadcastable([3, 2, 1], [1, 2, 3]))
+        self.assertFalse(broadcastable([5], [6]))
+        self.assertFalse(broadcastable([5], [1]))
+
+
+nodes_attributes = {'node_in_1': {'op': 'Identity', 'kind': 'op'},
+                    'node_in_2': {'op': 'Identity', 'kind': 'op'},
+                    'node_in_3': {'op': 'Identity', 'kind': 'op'},
+                    'node_in_4': {'op': 'Identity', 'kind': 'op'},
+                    'node_in_5': {'op': 'Identity', 'kind': 'op'},
+                    'quantize': {'op': 'Quantize', 'kind': 'op', 'levels': 2},
+                    'node_out_1': {'op': 'Identity', 'kind': 'op'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput'}
+                    }
+
+
+class TestQuantizeOp(unittest.TestCase):
+    def test_shape_only(self):
+        graph = build_graph(nodes_attributes,
+                            [('node_in_1', 'quantize'),
+                             ('node_in_2', 'quantize'),
+                             ('node_in_3', 'quantize'),
+                             ('node_in_4', 'quantize'),
+                             ('node_in_5', 'quantize'),
+                             ('quantize', 'node_out_1'),
+                             ('node_out_1', 'op_output')
+                             ],
+                            {'node_out_1': {'shape': None},
+                             'node_in_1': {'shape': np.array([1, 3, 10, 20])},
+                             'node_in_2': {'shape': np.array([1, 3, 10, 20])},
+                             'node_in_3': {'shape': np.array([1, 3, 10, 20])},
+                             'node_in_4': {'shape': np.array([1, 3, 10, 20])},
+                             'node_in_5': {'shape': np.array([1, 3, 10, 20])},
+                             })
+
+        quantize_node = Node(graph, 'quantize')
+        QuantizeOp.infer(quantize_node)
+        quantize_shape = np.array([1, 3, 10, 20])
+        res_shape = graph.node['node_out_1']['shape']
+        for i in range(0, len(quantize_shape)):
+            self.assertEqual(quantize_shape[i], res_shape[i])
+
+    def test_shape_and_value(self):
+        graph = build_graph(nodes_attributes,
+                            [('node_in_1', 'quantize'),
+                             ('node_in_2', 'quantize'),
+                             ('node_in_3', 'quantize'),
+                             ('node_in_4', 'quantize'),
+                             ('node_in_5', 'quantize'),
+                             ('quantize', 'node_out_1'),
+                             ('node_out_1', 'op_output')
+                             ],
+                            {
+                                'node_out_1': {
+                                    'shape': None,
+                                    'value': None,
+                                },
+                                'node_in_1': {
+                                    'shape': np.array([4]),
+                                    'value': np.array([5, 17, 0, 100], dtype=np.float32),
+                                },
+                                'node_in_2': {
+                                    'shape': np.array([4]),
+                                    'value': np.array([0, 12, 12, 12], dtype=np.float32),
+                                },
+                                'node_in_3': {
+                                    'shape': np.array([4]),
+                                    'value': np.array([10, 20, 20, 20], dtype=np.float32),
+                                },
+                                'node_in_4': {
+                                    'shape': np.array([4]),
+                                    'value': np.array([0, 0, 0, 0], dtype=np.float32),
+                                },
+                                'node_in_5': {
+                                    'shape': np.array([4]),
+                                    'value': np.array([1, 1, 1, 1], dtype=np.float32),
+                                },
+                            })
+
+        exp_node = Node(graph, 'quantize')
+        QuantizeOp.infer(exp_node)
+        quantize_shape = np.array([4])
+        quantize_value = np.array([0, 1, 0, 1], dtype=np.float32)
+        res_shape = graph.node['node_out_1']['shape']
+        res_value = graph.node['node_out_1']['value']
+        for i in range(0, len(quantize_shape)):
+            self.assertEqual(quantize_shape[i], res_shape[i])
+        for i in range(0, len(quantize_value)):
+            self.assertAlmostEqual(quantize_value[i], res_value[i], places=6)
diff --git a/model-optimizer/extensions/ops/range.py b/model-optimizer/extensions/ops/range.py
new file mode 100644
index 000000000..2b02ce1ff
--- /dev/null
+++ b/model-optimizer/extensions/ops/range.py
@@ -0,0 +1,71 @@
+"""
+ Copyright (c) 2018-2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import logging as log
+
+import numpy as np
+
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op
+
+
+class Range(Op):
+    op = 'Range'
+
+    def __init__(self, graph: Graph, attrs: dict):
+        mandatory_props = {
+            'type': __class__.op,
+            'op': __class__.op,
+            'in_ports_count': 3,
+            'out_ports_count': 1,
+            'infer': __class__.infer,
+        }
+        super().__init__(graph, mandatory_props, attrs)
+
+    @staticmethod
+    def infer(node: Node):
+        start = node.in_node(0)
+        limit = node.in_node(1)
+        delta = node.in_node(2)
+        output = node.out_node()
+
+        if not start.has_valid('value') or not limit.has_valid('value') or not delta.has_valid('value'):
+            log.error("Range operation is supported with constant inputs only")
+            return
+        if 'type' in node.pb.attr:
+            from mo.front.tf.extractors.utils import tf_dtype_extractor
+            result_data_type = tf_dtype_extractor(node.pb.attr["type"].type)
+        else:
+            result_data_type = start.value.dtype
+        output.value = np.arange(start.value, limit.value, delta.value, dtype=result_data_type)
+        output.shape = np.array(output.value.shape, dtype=np.int64)
+
+        # Some notes on the automatic result data type infer. The tf.range does is differently than np.arange. Numpy
+        # by default creates array with elements of type int64 and float64, but TF does not widen data types and keep them
+        # int32 and float32.
+        # Compare:
+
+        # >>> tf.range(1, 5, 0.5)
+        # <tf.Tensor 'range_1:0' shape = (8,) dtype = float32>
+        # >>> tf.range(1, 5, 2)
+        # <tf.Tensor 'range_2:0' shape = (2,) dtype = int32>
+
+        # >>> np.array([0.5], dtype=np.float32)
+        # array([0.5], dtype=float32)
+        # >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([2], dtype=np.int32)).dtype
+        # dtype('int64')
+        # >>> np.arange(np.array([1], dtype=np.int32), np.array([5], dtype=np.int32), np.array([0.5], dtype=np.float32)).dtype
+        # dtype('float64')
diff --git a/model-optimizer/extensions/ops/rank.py b/model-optimizer/extensions/ops/rank.py
index ed17048cc..f6ee0cfb8 100644
--- a/model-optimizer/extensions/ops/rank.py
+++ b/model-optimizer/extensions/ops/rank.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,8 @@
 import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.error import Error
 
@@ -25,9 +26,11 @@ from mo.utils.error import Error
 class Rank(Op):
     op = 'Rank'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': __class__.infer,
         }
         super().__init__(graph, mandatory_props, attrs)
@@ -37,4 +40,4 @@ class Rank(Op):
         rank = len(node.in_node(0).shape)
         out_value = np.array(rank)
         node.out_node().value = out_value
-        node.out_node().shape = out_value.shape
+        node.out_node().shape = int64_array(out_value.shape)
diff --git a/model-optimizer/extensions/ops/regionyolo.py b/model-optimizer/extensions/ops/regionyolo.py
index f47245e95..b35af5388 100644
--- a/model-optimizer/extensions/ops/regionyolo.py
+++ b/model-optimizer/extensions/ops/regionyolo.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,23 +14,24 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from mo.front.caffe.extractors.utils import get_canonical_axis_index
 from mo.front.common.layout import get_batch_dim, get_height_dim, get_width_dim, shape_for_layout
 from mo.front.extractor import attr_getter
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class RegionYoloOp(Op):
     op = 'RegionYolo'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: Node):
+    def __init__(self, graph: Graph, attrs: Node):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': RegionYoloOp.regionyolo_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/regionyolo_test.py b/model-optimizer/extensions/ops/regionyolo_test.py
index 715163a86..070837b81 100644
--- a/model-optimizer/extensions/ops/regionyolo_test.py
+++ b/model-optimizer/extensions/ops/regionyolo_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'region': {'type': 'RegionYolo', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -33,8 +34,10 @@ class TestRegionYOLOCaffe(unittest.TestCase):
     def test_region_infer(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'region'),
-                             ('region', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('region', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'region': {'axis': 1, 'end_axis': -1, 'do_softmax': 1, **layout_attrs()}
                              })
@@ -49,8 +52,10 @@ class TestRegionYOLOCaffe(unittest.TestCase):
     def test_region_infer_flatten(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'region'),
-                             ('region', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('region', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'region': {'end_axis': 1, 'axis': 0, 'do_softmax': 1, **layout_attrs()}
                              })
@@ -65,8 +70,10 @@ class TestRegionYOLOCaffe(unittest.TestCase):
     def test_region_infer_flatten_again(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'region'),
-                             ('region', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('region', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'region': {'end_axis': 2, 'axis': 0, 'do_softmax': 1, **layout_attrs()}
                              })
@@ -81,8 +88,10 @@ class TestRegionYOLOCaffe(unittest.TestCase):
     def test_region_infer_do_softmax(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'region'),
-                             ('region', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('region', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'region': {'do_softmax': 0, 'end_axis': -1, 'axis': 1, 'classes': 80, 'coords': 4,
                                         'mask': np.array([6, 7, 8]), **layout_attrs()}
@@ -101,8 +110,10 @@ class TestRegionYOLOTF(unittest.TestCase):
     def test_region_infer(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'region'),
-                             ('region', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('region', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 227, 227, 3])},
                              'region': {'axis': 1, 'end_axis': -1, 'do_softmax': 1, **layout_attrs()}
                              })
@@ -117,8 +128,10 @@ class TestRegionYOLOTF(unittest.TestCase):
     def test_region_infer_do_softmax(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'region'),
-                             ('region', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('region', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 227, 227, 3])},
                              'region': {'do_softmax': 0, 'end_axis': -1, 'axis': 1, 'classes': 80, 'coords': 4,
                                         'mask': np.array([6, 7, 8]), **layout_attrs()}
diff --git a/model-optimizer/extensions/ops/reorgyolo.py b/model-optimizer/extensions/ops/reorgyolo.py
index 51a2c207e..e5bb9acd6 100644
--- a/model-optimizer/extensions/ops/reorgyolo.py
+++ b/model-optimizer/extensions/ops/reorgyolo.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,14 +17,14 @@
 import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op, PermuteAttrs
 
 
 class ReorgYoloOp(Op):
     op = 'ReorgYolo'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
@@ -46,10 +46,10 @@ class ReorgYoloOp(Op):
         stride = node.stride
 
         output_shape = np.full_like(input_shape, -1, dtype=np.int64)
-        output_shape[node.batch_dims] = input_shape[node.batch_dims]
-        output_shape[node.channel_dims] = input_shape[node.channel_dims] * stride ** 2
+        output_shape[node.batch_dims] = input_shape[node.batch_dims]  # pylint: disable=unsupported-assignment-operation
+        output_shape[node.channel_dims] = input_shape[node.channel_dims] * stride ** 2  # pylint: disable=unsupported-assignment-operation
         # Round as in caffe
-        output_shape[node.spatial_dims] = np.round(input_shape[node.spatial_dims] / stride)
+        output_shape[node.spatial_dims] = np.round(input_shape[node.spatial_dims] / stride)  # pylint: disable=unsupported-assignment-operation
 
         node.out_node().shape = output_shape
         PermuteAttrs.create_permute_attrs(node, attrs=[('channel_dims', 'input:0'), ('spatial_dims', 'input:0')])
diff --git a/model-optimizer/extensions/ops/reorgyolo_test.py b/model-optimizer/extensions/ops/reorgyolo_test.py
index 7021fd5bb..696465c55 100644
--- a/model-optimizer/extensions/ops/reorgyolo_test.py
+++ b/model-optimizer/extensions/ops/reorgyolo_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'reorg': {'type': 'ReorgYolo', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -42,8 +43,10 @@ class TestReorgYOLO(unittest.TestCase):
     def test_reorgyolo_infer(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'reorg'),
-                             ('reorg', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('reorg', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'reorg': {'stride': 2,
                                        **layout_attrs()}
diff --git a/model-optimizer/extensions/ops/resample.py b/model-optimizer/extensions/ops/resample.py
index b227c0074..331ab67af 100644
--- a/model-optimizer/extensions/ops/resample.py
+++ b/model-optimizer/extensions/ops/resample.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,22 +16,22 @@
 
 import logging as log
 
-import networkx as nx
-
 from extensions.ops.resize_factor_utils import factor_update
 from mo.front.common.layout import get_batch_dim, get_features_dim, get_height_dim, get_width_dim, shape_for_layout
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class ResampleOp(Op):
     op = 'Resample'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
             'factor': None,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'infer': ResampleOp.resample_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/resample_test.py b/model-optimizer/extensions/ops/resample_test.py
index bf4c4f0ad..b33ba71d7 100644
--- a/model-optimizer/extensions/ops/resample_test.py
+++ b/model-optimizer/extensions/ops/resample_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -24,7 +24,8 @@ from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'resample': {'type': 'Resample', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput'},
                     }
 
 
@@ -32,8 +33,10 @@ class TestResampleOp(unittest.TestCase):
     def test_tf_resample_infer(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'resample'),
-                             ('resample', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('resample', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'resample': {'antialias': 1,
                                           'height': 384,
@@ -54,8 +57,10 @@ class TestResampleOp(unittest.TestCase):
         factor = 3.0
         graph = build_graph(nodes_attributes,
                             [('node_1', 'resample'),
-                             ('resample', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('resample', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 224, 227])},
                              'resample': {'antialias': 1,
                                           'resample_type': 'LINEAR',
@@ -77,8 +82,10 @@ class TestResampleOp(unittest.TestCase):
         graph = build_graph(new_attrs,
                             [('node_1', 'resample'),
                              ('new_shape', 'resample'),
-                             ('resample', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('resample', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 224, 227, 3])},
                              'resample': {'antialias': 1,
                                           'resample_type': 'LINEAR',
diff --git a/model-optimizer/extensions/ops/resize_factor_utils.py b/model-optimizer/extensions/ops/resize_factor_utils.py
index 28424d31b..09a3557df 100644
--- a/model-optimizer/extensions/ops/resize_factor_utils.py
+++ b/model-optimizer/extensions/ops/resize_factor_utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/ops/reverse_sequence.py b/model-optimizer/extensions/ops/reverse_sequence.py
index ff7329d53..938eba148 100644
--- a/model-optimizer/extensions/ops/reverse_sequence.py
+++ b/model-optimizer/extensions/ops/reverse_sequence.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,38 +14,38 @@
  limitations under the License.
 """
 
-import logging as log
-import networkx as nx
-import numpy as np
-
-from mo.graph.graph import Node
-from mo.ops.op import Op, PermuteAttrs
+from mo.graph.graph import Graph
+from mo.ops.op import Op
 
 
 class ReverseSequence(Op):
     op = 'ReverseSequence'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
-            #'type': not set, there shouldn't be translated to real layer
-            'seq_dim': None,
-            'batch_dim': None,
+            'type': __class__.op,
+            'seq_axis': None,
+            'batch_axis': 0,
             'op': __class__.op,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'infer': __class__.infer,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
         }
         super().__init__(graph, mandatory_props, attrs)
 
     def supported_attrs(self):
         return [
+            'seq_axis', 'batch_axis',
         ]
-
+    
     @staticmethod
     def infer(node):
-        if not node.has_valid('seq_dim'):
-            assert 1 in node.in_nodes()
-            assert node.in_node(1).has_valid('value')
-            assert node.in_node(1).value.size == 1
-            node['seq_dim'] = node.in_node(1).value.item()
-            node.graph.remove_edge(node.in_node(1).id, node.id)
+        input_data_shape = node.in_node(0).shape
+        assert input_data_shape is not None
+        assert node.has_valid('seq_axis')
+        assert node.has_valid('batch_axis')
+
         assert len(node.out_nodes()) == 1
-        node.out_node().shape = node.in_node().shape.copy()
+        node.out_node().shape = input_data_shape.copy()
diff --git a/model-optimizer/extensions/ops/roifeatureextractor_onnx.py b/model-optimizer/extensions/ops/roifeatureextractor_onnx.py
new file mode 100644
index 000000000..5477d9b86
--- /dev/null
+++ b/model-optimizer/extensions/ops/roifeatureextractor_onnx.py
@@ -0,0 +1,53 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.ops.op import Op
+
+
+class ExperimentalDetectronROIFeatureExtractor(Op):
+    op = 'ExperimentalDetectronROIFeatureExtractor'
+
+    def __init__(self, graph, attrs):
+        mandatory_props = dict(
+            type=__class__.op,
+            op=__class__.op,
+            infer=__class__.infer
+        )
+
+        super().__init__(graph, mandatory_props, attrs)
+
+    def backend_attrs(self):
+        return [
+            'distribute_rois_between_levels',
+            ('pyramid_scales', lambda node: ','.join(map(str, node['pyramid_scales']))),
+            'image_id',
+            'output_size',
+            'sampling_ratio',
+            'preserve_rois_order']
+
+    @staticmethod
+    def infer(node):
+        input_rois_shape = node.in_node(0).shape
+        rois_num = input_rois_shape[0]
+        input_features_level_0_shape = node.in_node(1).shape
+        channels_num = input_features_level_0_shape[1]
+        node.out_node(0).shape = np.array([rois_num, channels_num, node.output_size, node.output_size], dtype=np.int64)
+        try:
+            node.out_node(1).shape = np.array([rois_num, 4], dtype=np.int64)
+        except Exception as ex:
+            print(ex)
diff --git a/model-optimizer/extensions/ops/select.py b/model-optimizer/extensions/ops/select.py
index b377eb2ba..4af65dc8b 100644
--- a/model-optimizer/extensions/ops/select.py
+++ b/model-optimizer/extensions/ops/select.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.error import Error
 
@@ -25,9 +25,11 @@ from mo.utils.error import Error
 class Select(Op):
     op = 'Select'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
+            'in_ports_count': 3,
+            'out_ports_count': 1,
             'infer': __class__.infer,
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/select_test.py b/model-optimizer/extensions/ops/select_test.py
index 15578d30e..5fa154741 100644
--- a/model-optimizer/extensions/ops/select_test.py
+++ b/model-optimizer/extensions/ops/select_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/ops/shufflechannel.py b/model-optimizer/extensions/ops/shufflechannel.py
index bb1036031..8577d0b78 100644
--- a/model-optimizer/extensions/ops/shufflechannel.py
+++ b/model-optimizer/extensions/ops/shufflechannel.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 import networkx as nx
 
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -27,9 +28,11 @@ class ShuffleChannelOp(Op):
     op = 'ShuffleChannel'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': None,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': copy_shape_infer
         }, attrs)
diff --git a/model-optimizer/extensions/ops/simplernms.py b/model-optimizer/extensions/ops/simplernms.py
index 15d5298e3..cd1352a72 100644
--- a/model-optimizer/extensions/ops/simplernms.py
+++ b/model-optimizer/extensions/ops/simplernms.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,17 +20,19 @@ import networkx as nx
 import numpy as np
 
 from mo.front.extractor import attr_getter
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class SimplerNMSOp(Op):
     op = 'SimplerNMS'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 3,
+            'out_ports_count': 1,
             'infer': SimplerNMSOp.simplernms_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/simplernms_test.py b/model-optimizer/extensions/ops/simplernms_test.py
index 08cbf5343..6c4403561 100644
--- a/model-optimizer/extensions/ops/simplernms_test.py
+++ b/model-optimizer/extensions/ops/simplernms_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -23,15 +23,18 @@ from mo.graph.graph import Node
 from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'SimplerNMS_1': {'type': 'SimplerNMS', 'kind': 'op'},
-                    'node_1': {'type': 'Identity', 'kind': 'op'}
+                    'node_1': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
 class TestSimplerNMSInfer(unittest.TestCase):
     def test_simplernms_infer_ideal(self):
         graph = build_graph(nodes_attributes,
-                            [('SimplerNMS_1', 'node_1')],
-                            {'node_1': {'is_output': True, 'shape': None},
+                            [('SimplerNMS_1', 'node_1'),
+                             ('node_1', 'op_output')
+                             ],
+                            {'node_1': {'shape': None},
                              'SimplerNMS_1': {'feat_stride': 16, 'post_nms_topn': 150, 'scale': [1, 2, 3]}
                              })
 
@@ -46,8 +49,10 @@ class TestSimplerNMSInfer(unittest.TestCase):
 
     def test_simplernms_infer_no_shape(self):
         graph = build_graph(nodes_attributes,
-                            [('SimplerNMS_1', 'node_1')],
-                            {'node_1': {'is_output': True, 'shape': None},
+                            [('SimplerNMS_1', 'node_1'),
+                             ('node_1', 'op_output')
+                             ],
+                            {'node_1': {'shape': None},
                              'SimplerNMS_1': {'feat_stride': 12, 'post_nms_topn': 150, 'scale': [1, 2, 3]}
                              })
 
diff --git a/model-optimizer/extensions/ops/spatial_transformer.py b/model-optimizer/extensions/ops/spatial_transformer.py
index 3ab42a93a..d914830ab 100644
--- a/model-optimizer/extensions/ops/spatial_transformer.py
+++ b/model-optimizer/extensions/ops/spatial_transformer.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,19 +16,19 @@
 
 import copy
 
-import networkx as nx
-
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class SpatialTransformOp(Op):
     op = 'SpatialTransformer'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': SpatialTransformOp.sp_infer
         }
         super().__init__(graph, mandatory_props, attrs)
diff --git a/model-optimizer/extensions/ops/spatial_transformer_test.py b/model-optimizer/extensions/ops/spatial_transformer_test.py
index 86b7ec26c..eac48b018 100644
--- a/model-optimizer/extensions/ops/spatial_transformer_test.py
+++ b/model-optimizer/extensions/ops/spatial_transformer_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'type': 'Identity', 'kind': 'op'},
                     'node_2': {'type': 'Identity', 'kind': 'op'},
                     'st': {'type': 'SpatialTransform', 'kind': 'op'},
-                    'node_3': {'type': 'Identity', 'kind': 'op'}
+                    'node_3': {'type': 'Identity', 'kind': 'op'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -35,9 +36,11 @@ class TestSpatialTransformInfer(unittest.TestCase):
                             [
                                 ('node_1', 'st'),
                                 ('node_2', 'st'),
-                                ('st', 'node_3')],
+                                ('st', 'node_3'),
+                                ('node_3', 'op_output')
+                            ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([1, 3, 227, 227])},
                                 'node_2': {'shape': np.array([1, 3, 227, 227])},
                                 'st': {}
@@ -55,9 +58,11 @@ class TestSpatialTransformInfer(unittest.TestCase):
                             [
                                 ('node_1', 'st'),
                                 ('node_2', 'st'),
-                                ('st', 'node_3')],
+                                ('st', 'node_3'),
+                                ('node_3', 'op_output')
+                            ],
                             {
-                                'node_3': {'is_output': True, 'shape': None},
+                                'node_3': {'shape': None},
                                 'node_1': {'shape': np.array([1, 3, 227, 227])},
                                 'node_2': {'shape': np.array([1, 3, 227, 227])},
                                 'st': {'output_H': 200, 'output_W': 15}
diff --git a/model-optimizer/extensions/ops/splice.py b/model-optimizer/extensions/ops/splice.py
index 381559e5c..e1fd72e96 100644
--- a/model-optimizer/extensions/ops/splice.py
+++ b/model-optimizer/extensions/ops/splice.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,16 +15,20 @@
 """
 
 import networkx as nx
+
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
 class Splice(Op):
     op = 'Splice'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': None,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }
         super().__init__(graph, mandatory_props, attrs)
 
diff --git a/model-optimizer/extensions/ops/splitv.py b/model-optimizer/extensions/ops/splitv.py
index 7c1fd4211..67428e9e8 100644
--- a/model-optimizer/extensions/ops/splitv.py
+++ b/model-optimizer/extensions/ops/splitv.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 import networkx as nx
 
 from mo.front.common.partial_infer.split import tf_split_v_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -24,12 +25,13 @@ class SplitV(Op):
     op = 'SplitV'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': 'Split',
             'op': 'SplitV',
             'axis': 1,
             'input_port': 0,
+            'in_ports_count': 3,
             'infer': tf_split_v_infer
         }, attrs)
 
diff --git a/model-optimizer/extensions/ops/stop_gradient.py b/model-optimizer/extensions/ops/stop_gradient.py
index 58ad9bced..8db3eeac8 100644
--- a/model-optimizer/extensions/ops/stop_gradient.py
+++ b/model-optimizer/extensions/ops/stop_gradient.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 from mo.front.common.partial_infer.utils import mark_input_bins
 
@@ -25,11 +24,13 @@ class StopGradientOp(Op):
     op = 'StopGradient'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,
             'op': __class__.op,
             'identity': True,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': StopGradientOp.shape_infer
         }, attrs)
 
diff --git a/model-optimizer/extensions/ops/swapaxes.py b/model-optimizer/extensions/ops/swapaxes.py
index e8507fd13..0029785ca 100644
--- a/model-optimizer/extensions/ops/swapaxes.py
+++ b/model-optimizer/extensions/ops/swapaxes.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,7 @@
  limitations under the License.
 """
 
-import networkx as nx
-
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.permute import Permute
 
 
@@ -24,7 +22,7 @@ class SwapAxes(Permute):
     op = 'SwapAxis'
     enabled = False
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         attrs.update({'infer': SwapAxes.infer})
         super().__init__(graph, attrs)
 
diff --git a/model-optimizer/extensions/ops/switch.py b/model-optimizer/extensions/ops/switch.py
index b6fa822c7..630c05153 100644
--- a/model-optimizer/extensions/ops/switch.py
+++ b/model-optimizer/extensions/ops/switch.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,17 +14,16 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
 class Switch(Op):
     op = 'Switch'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'op': __class__.op,
             'infer': __class__.infer,
diff --git a/model-optimizer/extensions/ops/switch_test.py b/model-optimizer/extensions/ops/switch_test.py
index c5bb75989..73bbf55e1 100644
--- a/model-optimizer/extensions/ops/switch_test.py
+++ b/model-optimizer/extensions/ops/switch_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/extensions/ops/tensor_iterator.py b/model-optimizer/extensions/ops/tensor_iterator.py
index faaf9a7ca..c5bc8883b 100644
--- a/model-optimizer/extensions/ops/tensor_iterator.py
+++ b/model-optimizer/extensions/ops/tensor_iterator.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ import networkx as nx
 import numpy as np
 
 from mo.utils.error import Error
-from mo.graph.graph import Node, dict_includes
+from mo.graph.graph import Node, dict_includes, Graph
 from mo.ops.op import Op
 from mo.utils.utils import refer_to_faq_msg
 
@@ -32,14 +32,14 @@ class TensorIterator(Op):
     op = 'TensorIterator'
 
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         mandatory_props = {
             'type': __class__.op,
             'op': __class__.op,
             'input_port_map': [],  # a list of dicts with such attrs as external_port_id, etc.
             'output_port_map': [],  # a list of dicts with such attrs as external_port_id, etc.
             'back_edges': [], # a list of dicts with such attrs as from_layer, from_port, etc.
-            'body': None,   # an nx.MultiDiGraph object with a body sub-graph
+            'body': None,   # an Graph object with a body sub-graph
             'sub_graphs': ['body'],  # built-in attribute with all sub-graphg
             'infer': __class__.infer
         }
@@ -96,14 +96,14 @@ class TensorIterator(Op):
 
 
     @staticmethod
-    def find_internal_layer_id(graph: nx.MultiDiGraph, virtual_id):
+    def find_internal_layer_id(graph: Graph, virtual_id):
         internal_nodes = list(filter(lambda d: dict_includes(d[1], {'internal_layer_id': virtual_id}), graph.nodes(data=True)))
         assert len(internal_nodes) == 1, 'Nodes: {}, virtual_id: {}'.format(internal_nodes, virtual_id)
         return  internal_nodes[0][0]
 
 
     @staticmethod
-    def find_internal_layer_and_port(graph: nx.MultiDiGraph, virtual_layer_id, virtual_port_id):
+    def find_internal_layer_and_port(graph: Graph, virtual_layer_id, virtual_port_id):
         internal_layer_id = __class__.find_internal_layer_id(graph, virtual_layer_id)
         internal_port_id = __class__.find_port_id(Node(graph, internal_layer_id), virtual_port_id, 'internal_port_id')
         return internal_layer_id, internal_port_id
@@ -111,11 +111,11 @@ class TensorIterator(Op):
 
     @staticmethod
     def generate_port_map(node: Node, src_port_map):
-        ''' Extract port_map attributes from node and node.body attributes.
+        """ Extract port_map attributes from node and node.body attributes.
         
             It iterates over src_port_map and substitude external_port_id, internal_port_id and
             internal_layer_id by real values queried from node ports and node.body attributes.
-        '''
+        """
         result_list = []
         for map_item in src_port_map:
             result = dict(map_item)
diff --git a/model-optimizer/extensions/ops/topkrois_onnx.py b/model-optimizer/extensions/ops/topkrois_onnx.py
new file mode 100644
index 000000000..d6bba13d1
--- /dev/null
+++ b/model-optimizer/extensions/ops/topkrois_onnx.py
@@ -0,0 +1,38 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.ops.op import Op
+
+
+class ExperimentalDetectronTopKROIs(Op):
+    op = 'ExperimentalDetectronTopKROIs'
+
+    def __init__(self, graph, attrs):
+        mandatory_props = dict(
+            type=__class__.op,
+            op=__class__.op,
+            infer=__class__.infer
+        )
+        super().__init__(graph, mandatory_props, attrs)
+
+    def backend_attrs(self):
+        return ['max_rois', ]
+
+    @staticmethod
+    def infer(node):
+        node.out_node(0).shape = np.array([node.max_rois, 4], dtype=np.int64)
diff --git a/model-optimizer/install_prerequisites/install_prerequisites.sh b/model-optimizer/install_prerequisites/install_prerequisites.sh
index cb6da98dc..8c78058e9 100755
--- a/model-optimizer/install_prerequisites/install_prerequisites.sh
+++ b/model-optimizer/install_prerequisites/install_prerequisites.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2018 Intel Corporation
+# Copyright (c) 2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -82,7 +82,11 @@ if [[ $V_ENV -eq 1 ]]; then
     echo
     echo "Before running the Model Optimizer, please activate virtualenv environment by running \"source ${SCRIPTDIR}/../venv/bin/activate\""
 else
-    sudo -E $python_binary -m pip install -r $SCRIPTDIR/../requirements${postfix}.txt
+    if [[ "$OSTYPE" == "darwin"* ]]; then
+        python3 -m pip install -r $SCRIPTDIR/../requirements${postfix}.txt
+    else
+        sudo -E $python_binary -m pip install -r $SCRIPTDIR/../requirements${postfix}.txt
+    fi
     echo [WARNING] All Model Optimizer dependencies are installed globally.
     echo [WARNING] If you want to keep Model Optimizer in separate sandbox
     echo [WARNING] run install_prerequisites.sh venv "{caffe|tf|mxnet|kaldi|onnx}"
diff --git a/model-optimizer/install_prerequisites/install_prerequisites_caffe.sh b/model-optimizer/install_prerequisites/install_prerequisites_caffe.sh
index 9ea25189c..0348223e4 100755
--- a/model-optimizer/install_prerequisites/install_prerequisites_caffe.sh
+++ b/model-optimizer/install_prerequisites/install_prerequisites_caffe.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2018 Intel Corporation
+# Copyright (c) 2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/model-optimizer/install_prerequisites/install_prerequisites_kaldi.sh b/model-optimizer/install_prerequisites/install_prerequisites_kaldi.sh
index bcdd0e239..2996dc3c6 100755
--- a/model-optimizer/install_prerequisites/install_prerequisites_kaldi.sh
+++ b/model-optimizer/install_prerequisites/install_prerequisites_kaldi.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2018 Intel Corporation
+# Copyright (c) 2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/model-optimizer/install_prerequisites/install_prerequisites_mxnet.sh b/model-optimizer/install_prerequisites/install_prerequisites_mxnet.sh
index 2cf20d9af..da4169392 100755
--- a/model-optimizer/install_prerequisites/install_prerequisites_mxnet.sh
+++ b/model-optimizer/install_prerequisites/install_prerequisites_mxnet.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2018 Intel Corporation
+# Copyright (c) 2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/model-optimizer/install_prerequisites/install_prerequisites_onnx.sh b/model-optimizer/install_prerequisites/install_prerequisites_onnx.sh
index 97ea4f0be..d9c9d774d 100755
--- a/model-optimizer/install_prerequisites/install_prerequisites_onnx.sh
+++ b/model-optimizer/install_prerequisites/install_prerequisites_onnx.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2018 Intel Corporation
+# Copyright (c) 2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/model-optimizer/install_prerequisites/install_prerequisites_tf.sh b/model-optimizer/install_prerequisites/install_prerequisites_tf.sh
index 3d7d58f8f..ce67a03bf 100755
--- a/model-optimizer/install_prerequisites/install_prerequisites_tf.sh
+++ b/model-optimizer/install_prerequisites/install_prerequisites_tf.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2018 Intel Corporation
+# Copyright (c) 2019 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo.py b/model-optimizer/mo.py
index 7b8cc0636..5c6f305d6 100755
--- a/model-optimizer/mo.py
+++ b/model-optimizer/mo.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py
index e72d1fdcc..3763c2d12 100644
--- a/model-optimizer/mo/back/ie_ir_ver_2/emitter.py
+++ b/model-optimizer/mo/back/ie_ir_ver_2/emitter.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,60 +15,16 @@
 """
 
 import hashlib
-import xml.dom.minidom
+from defusedxml.minidom import parseString
 from xml.etree.ElementTree import Element, SubElement, tostring
 
-from mo.front.extractor import update_ie_fields
 from mo.graph.graph import *
 from mo.utils.unsupported_ops import UnsupportedOps
 from mo.utils.utils import refer_to_faq_msg
 from mo.utils.version import get_version
 
 
-def create_const_nodes(graph: nx.MultiDiGraph, start_data_nodes_are_not_allowed: bool=True):
-    """
-    Adds layers with type 'Const' that produce blob from 'bin' file. The pass finds data nodes with one output which
-    doesn't have edge with 'bin' attribute and generate Const op node before the node and data node before the Const
-    node. The data node before 'Const' node is needed because the op node dumps input tensors to bin file.
-    :param graph: input graph.
-    :return: None
-    """
-    for node_name in list(graph.nodes()):
-        node = NodeWrap(graph, node_name)
-        if (
-                node.has('kind') and
-                node.kind == 'data' and (
-                (len(node.out_edges()) == 1 and 'bin' not in node.out_edge(0)) or
-                node.has_and_set('is_output')
-        ) and
-                len(node.in_nodes()) == 0):
-
-            if node.has_valid('value'):
-                const_node_name = node.id + '_const'
-                log.debug("Added Const node '{}'".format(const_node_name))
-                graph.add_node(const_node_name, name=const_node_name, type='Const', kind='op', op='Const',
-                               precision="FP32")
-                update_ie_fields(node.graph.node[const_node_name])
-                graph.add_edges_from([(const_node_name, node.id, {'out': 0})])
-                copy_data_node_name = unique_id(graph, node.id + '_copy_')
-                graph.add_node(copy_data_node_name, kind='data', precision="FP32", shape=np.array(node.shape),
-                               value=np.array(node.value))
-                if node.has_valid('force_precision'):
-                    Node(graph, copy_data_node_name)['force_precision'] = node.force_precision
-                    Node(graph, const_node_name)['force_precision'] = node.force_precision
-                graph.add_edges_from([(copy_data_node_name, const_node_name, {'in': 0, 'bin': 'custom'})])
-            elif start_data_nodes_are_not_allowed:
-                log.debug('node = {}'.format(node.graph.node[node.id]))
-                # TODO for body sub-graph it shouldn't be reported as an error
-                raise Error(
-                    'Discovered data node without inputs and value, node.name = {}, consumer.name = {}. ' +
-                    refer_to_faq_msg(23),
-                    node.soft_get('name'),
-                    node.out_node().soft_get('name') if len(node.out_nodes()) else "<no consumer>"
-                )
-
-
-def serialize_constants(graph: nx.MultiDiGraph, bin_file_name:str, data_type=np.float32):
+def serialize_constants(graph: Graph, bin_file_name:str, data_type=np.float32):
     """
     Found all data constants that has output edges with 'bin' attribute.
     Serialize content for such constants to a binary file with name bin_file_name in
@@ -86,10 +42,10 @@ def serialize_constants(graph: nx.MultiDiGraph, bin_file_name:str, data_type=np.
         serialize_constants_recursively(graph, bin_file, data_type, bin_hashes)
 
 
-def serialize_constants_recursively(graph: nx.MultiDiGraph, bin_file, data_type, bin_hashes):
+def serialize_constants_recursively(graph: Graph, bin_file, data_type, bin_hashes):
     nodes = sorted(graph.nodes())
     for node in nodes:
-        node = NodeWrap(graph, node)
+        node = Node(graph, node)
 
         if node.kind == 'data' and node.value is not None and any('bin' in d for u, v, d in graph.out_edges(node.node, data=True)):
             blob = node.value
@@ -118,7 +74,7 @@ def serialize_constants_recursively(graph: nx.MultiDiGraph, bin_file, data_type,
     # separate loop for sub-graph to dump them after all blobs for more natural blob offset ordering
     # TODO: implement strict order for all blobs in entier IR
     for node in nodes:
-        node = NodeWrap(graph, node)
+        node = Node(graph, node)
         # Dump blobs recursively if sub-graphs are present in the node
         if node.has_valid('sub_graphs'):
             for sub_graph_attr_name in node.sub_graphs:
@@ -140,7 +96,7 @@ def serialize_mean_image(bin_file_name: str, mean_data=[]):
         return mean_offset, mean_size
 
 
-def xml_shape(shape: np.ndarray, element: xml.etree.ElementTree.Element):
+def xml_shape(shape: np.ndarray, element: Element):
     for d in shape:
         dim = SubElement(element, 'dim')
         if d <= 0:
@@ -154,10 +110,10 @@ def xml_shape(shape: np.ndarray, element: xml.etree.ElementTree.Element):
         dim.text = str(d)
 
 
-def xml_ports(node: Node, element: xml.etree.ElementTree.Element, edges: xml.etree.ElementTree.Element):
+def xml_ports(node: Node, element: Element, edges: Element):
     # input ports
     inputs = None  # will create input section only if at least one input is available
-    for u, d in get_sorted_inputs(node):
+    for u, d in node.get_sorted_inputs():
         if 'bin' not in d and ('xml_skip' not in d or not d['xml_skip']):
             if inputs is None:
                 inputs = SubElement(element, 'input')
@@ -180,7 +136,7 @@ def xml_ports(node: Node, element: xml.etree.ElementTree.Element, edges: xml.etr
 
     # output ports
     outputs = None
-    for v, d in get_sorted_outputs(node):
+    for v, d in node.get_sorted_outputs():
         if 'xml_skip' not in d or not d['xml_skip']:
             if outputs is None:
                 outputs = SubElement(element, 'output')
@@ -192,9 +148,9 @@ def xml_ports(node: Node, element: xml.etree.ElementTree.Element, edges: xml.etr
             xml_shape(node.graph.node[v]['shape'], port)
 
 
-def xml_consts(graph: nx.MultiDiGraph, node: Node, element: xml.etree.ElementTree.Element):
+def xml_consts(graph: Graph, node: Node, element: Element):
     blobs = None  # sub-element that will be created on-demand
-    for u, d in get_sorted_inputs(node):
+    for u, d in node.get_sorted_inputs():
         if 'bin' in d:
             if not blobs:
                 blobs = SubElement(element, 'blobs')
@@ -213,11 +169,11 @@ def soft_get(node, attr):
 
 
 def serialize_element(
-        graph: nx.MultiDiGraph,
+        graph: Graph,
         node,
         schema: list,
-        parent_element: xml.etree.ElementTree.Element,
-        edges: xml.etree.ElementTree.Element,
+        parent_element: Element,
+        edges: Element,
         unsupported):
 
     name, attrs, subelements = schema
@@ -265,11 +221,11 @@ def serialize_meta_list(graph, node, schema, element, edges, unsupported):
 
 
 def serialize_node_attributes(
-        graph: nx.MultiDiGraph,  # the current network graph
+        graph: Graph,  # the current network graph
         node,   # dictionry-like object that should be serialized
         schema: list,
-        parent_element: xml.etree.ElementTree.Element,
-        edges: xml.etree.ElementTree.Element,
+        parent_element: Element,
+        edges: Element,
         unsupported):
 
     try:
@@ -303,7 +259,7 @@ def serialize_node_attributes(
         ) from e
 
 
-def create_pre_process_block_for_image(net: xml.etree.ElementTree.Element, ref_layer_names: list, mean_offset: tuple,
+def create_pre_process_block_for_image(net: Element, ref_layer_names: list, mean_offset: tuple,
                                        mean_size: tuple):
     pre_process = SubElement(net, 'pre-process')
     pre_process.set('mean-precision', 'FP32')  # TODO: to think about need to output FP16 mean values
@@ -346,7 +302,21 @@ def create_pre_process_block(net, ref_layer_name, means, scales=None):
     return pre_process
 
 
-def add_meta_data(net: xml.etree.ElementTree.Element, meta_info: dict):
+def add_quantization_statistics(graph, net_element):
+    if 'statistics' in graph.graph:
+        stats = SubElement(net_element, 'statistics')
+        for tensor, interval in graph.graph['statistics'].items():
+            layer = SubElement(stats, 'layer')
+            name = SubElement(layer, 'name')
+            name.text = tensor
+            min = SubElement(layer, 'min')
+            min.text = interval['min']
+            max = SubElement(layer, 'max')
+            max.text = interval['max']
+        log.info('Statistics were inserted to IR')
+
+
+def add_meta_data(net: Element, meta_info: dict):
     meta = SubElement(net, 'meta_data')
     SubElement(meta, 'MO_version').set('value', get_version())
     parameters = SubElement(meta, 'cli_parameters')
@@ -355,7 +325,6 @@ def add_meta_data(net: xml.etree.ElementTree.Element, meta_info: dict):
     SubElement(parameters, 'unset').set('unset_cli_parameters', ', '.join(sorted(meta_info['unset'])))
 
 
-
 def serialize_network(graph, net_element, unsupported):
     layers = SubElement(net_element, 'layers')
     edges = SubElement(net_element, 'edges')
@@ -363,7 +332,7 @@ def serialize_network(graph, net_element, unsupported):
         return
     nodes = sorted(graph.nodes())
     for node in nodes:
-        node = NodeWrap(graph, node)
+        node = Node(graph, node)
         if not node.has('IE'):
             continue
         if node.kind == 'op' and (not node.has('type') or node.type is None):
@@ -375,7 +344,7 @@ def serialize_network(graph, net_element, unsupported):
             raise Error(str(e).replace('<SUB-ELEMENT>', '{} (id = {})'.format(node.soft_get('name'), node.id))) from e
 
 
-def generate_ie_ir(graph: nx.MultiDiGraph, file_name: str, input_names: tuple = (), mean_offset: tuple = (),
+def generate_ie_ir(graph: Graph, file_name: str, input_names: tuple = (), mean_offset: tuple = (),
                    mean_size: tuple = (), meta_info: dict = dict()):
     """
     Extracts IE/IR attributes from kind='op' nodes in three ways:
@@ -408,27 +377,28 @@ def generate_ie_ir(graph: nx.MultiDiGraph, file_name: str, input_names: tuple =
     unsupported = UnsupportedOps(graph)
 
     serialize_network(graph, net, unsupported)
+    add_quantization_statistics(graph, net)
     add_meta_data(net, meta_info)
     xml_string = tostring(net)
-    xml_doc = xml.dom.minidom.parseString(xml_string)  # ugly?
+    xml_doc = parseString(xml_string)
     pretty_xml_as_string = xml_doc.toprettyxml()
     if len(unsupported.unsupported):
         log.debug('Partially correct IR XML:\n{}'.format(pretty_xml_as_string))
-        unsupported.report(log.error, "List of operations that cannot be converted to IE IR:")
-        raise Error('Part of the nodes was not translated to IE. Stopped. ' +
+        unsupported.report(log.error, "List of operations that cannot be converted to Inference Engine IR:")
+        raise Error('Part of the nodes was not converted to IR. Stopped. ' +
                     refer_to_faq_msg(24))
     with open(file_name, 'w') as file:
         file.write(pretty_xml_as_string)
 
 
-def port_renumber(graph: nx.MultiDiGraph):
+def port_renumber(graph: Graph):
     for node in list(graph.nodes()):
-        node = NodeWrap(graph, node)
+        node = Node(graph, node)
         if node.kind == 'op':
             base = 0
-            for u, d in get_sorted_inputs(node):
+            for u, d in node.get_sorted_inputs():
                 d['in'] = base
                 base += 1
-            for v, d in get_sorted_outputs(node):
+            for v, d in node.get_sorted_outputs():
                 d['out'] = base
                 base += 1
diff --git a/model-optimizer/mo/back/ie_ir_ver_2/emitter_test.py b/model-optimizer/mo/back/ie_ir_ver_2/emitter_test.py
index 44830dd10..bb39758d7 100644
--- a/model-optimizer/mo/back/ie_ir_ver_2/emitter_test.py
+++ b/model-optimizer/mo/back/ie_ir_ver_2/emitter_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/back/replacement.py b/model-optimizer/mo/back/replacement.py
index c55c074d5..e47e6fed2 100644
--- a/model-optimizer/mo/back/replacement.py
+++ b/model-optimizer/mo/back/replacement.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/collect_attributes.py b/model-optimizer/mo/front/caffe/collect_attributes.py
index 0ce705435..1855d2073 100644
--- a/model-optimizer/mo/front/caffe/collect_attributes.py
+++ b/model-optimizer/mo/front/caffe/collect_attributes.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/custom_layers_mapping.py b/model-optimizer/mo/front/caffe/custom_layers_mapping.py
index 65500da83..f9ecae3a4 100644
--- a/model-optimizer/mo/front/caffe/custom_layers_mapping.py
+++ b/model-optimizer/mo/front/caffe/custom_layers_mapping.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 """
 import logging as log
 from builtins import AttributeError
-from xml.etree import ElementTree
+from defusedxml import ElementTree
 
 from mo.front.caffe.collect_attributes import collect_attributes
 from mo.front.caffe.extractor import node_pb_arg
diff --git a/model-optimizer/mo/front/caffe/custom_layers_mapping_test.py b/model-optimizer/mo/front/caffe/custom_layers_mapping_test.py
index 84ce9b5cf..c9efbc450 100644
--- a/model-optimizer/mo/front/caffe/custom_layers_mapping_test.py
+++ b/model-optimizer/mo/front/caffe/custom_layers_mapping_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractor.py b/model-optimizer/mo/front/caffe/extractor.py
index 72e3283f7..6d7f777bc 100644
--- a/model-optimizer/mo/front/caffe/extractor.py
+++ b/model-optimizer/mo/front/caffe/extractor.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractor_test.py b/model-optimizer/mo/front/caffe/extractor_test.py
index b5b292525..9b4d0ce5b 100644
--- a/model-optimizer/mo/front/caffe/extractor_test.py
+++ b/model-optimizer/mo/front/caffe/extractor_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/batchnorm.py b/model-optimizer/mo/front/caffe/extractors/batchnorm.py
index c4bb8cb3a..5c71a198b 100644
--- a/model-optimizer/mo/front/caffe/extractors/batchnorm.py
+++ b/model-optimizer/mo/front/caffe/extractors/batchnorm.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/batchnorm_test.py b/model-optimizer/mo/front/caffe/extractors/batchnorm_test.py
index eeb441d2e..a8f122fa4 100644
--- a/model-optimizer/mo/front/caffe/extractors/batchnorm_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/batchnorm_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/concat.py b/model-optimizer/mo/front/caffe/extractors/concat.py
index e3bfd7bd7..cd67d65f8 100644
--- a/model-optimizer/mo/front/caffe/extractors/concat.py
+++ b/model-optimizer/mo/front/caffe/extractors/concat.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/concat_test.py b/model-optimizer/mo/front/caffe/extractors/concat_test.py
index 117ce04f6..a82633ff8 100644
--- a/model-optimizer/mo/front/caffe/extractors/concat_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/concat_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/crop.py b/model-optimizer/mo/front/caffe/extractors/crop.py
index 4c82d6afd..7eadf4a82 100644
--- a/model-optimizer/mo/front/caffe/extractors/crop.py
+++ b/model-optimizer/mo/front/caffe/extractors/crop.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/crop_test.py b/model-optimizer/mo/front/caffe/extractors/crop_test.py
index 9405e70fa..cc764fbe2 100644
--- a/model-optimizer/mo/front/caffe/extractors/crop_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/crop_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/eltwise.py b/model-optimizer/mo/front/caffe/extractors/eltwise.py
index 23653034f..bf57976a9 100644
--- a/model-optimizer/mo/front/caffe/extractors/eltwise.py
+++ b/model-optimizer/mo/front/caffe/extractors/eltwise.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/eltwise_test.py b/model-optimizer/mo/front/caffe/extractors/eltwise_test.py
index e077c4243..86f9172d6 100644
--- a/model-optimizer/mo/front/caffe/extractors/eltwise_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/eltwise_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/elu.py b/model-optimizer/mo/front/caffe/extractors/elu.py
index 464a77f1e..e52d9335f 100644
--- a/model-optimizer/mo/front/caffe/extractors/elu.py
+++ b/model-optimizer/mo/front/caffe/extractors/elu.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/elu_test.py b/model-optimizer/mo/front/caffe/extractors/elu_test.py
index c482888bd..4df18b049 100644
--- a/model-optimizer/mo/front/caffe/extractors/elu_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/elu_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/inner_product.py b/model-optimizer/mo/front/caffe/extractors/inner_product.py
index bac429c91..f6ee2126a 100644
--- a/model-optimizer/mo/front/caffe/extractors/inner_product.py
+++ b/model-optimizer/mo/front/caffe/extractors/inner_product.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/inner_product_test.py b/model-optimizer/mo/front/caffe/extractors/inner_product_test.py
index 44501c390..f70bef908 100644
--- a/model-optimizer/mo/front/caffe/extractors/inner_product_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/inner_product_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/input.py b/model-optimizer/mo/front/caffe/extractors/input.py
index 94d182217..743e6ea2e 100644
--- a/model-optimizer/mo/front/caffe/extractors/input.py
+++ b/model-optimizer/mo/front/caffe/extractors/input.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/input_test.py b/model-optimizer/mo/front/caffe/extractors/input_test.py
index 37d1fc149..ea54f43fc 100644
--- a/model-optimizer/mo/front/caffe/extractors/input_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/input_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/lrn.py b/model-optimizer/mo/front/caffe/extractors/lrn.py
index 669e337e1..3d5ba4d17 100644
--- a/model-optimizer/mo/front/caffe/extractors/lrn.py
+++ b/model-optimizer/mo/front/caffe/extractors/lrn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/lrn_test.py b/model-optimizer/mo/front/caffe/extractors/lrn_test.py
index e5c7f8bdc..ef9a419f1 100644
--- a/model-optimizer/mo/front/caffe/extractors/lrn_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/lrn_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/native_caffe.py b/model-optimizer/mo/front/caffe/extractors/native_caffe.py
index 6e96b1712..db13d5af8 100644
--- a/model-optimizer/mo/front/caffe/extractors/native_caffe.py
+++ b/model-optimizer/mo/front/caffe/extractors/native_caffe.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/permute.py b/model-optimizer/mo/front/caffe/extractors/permute.py
index 34dcd5ff1..2a5e6177f 100644
--- a/model-optimizer/mo/front/caffe/extractors/permute.py
+++ b/model-optimizer/mo/front/caffe/extractors/permute.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/permute_test.py b/model-optimizer/mo/front/caffe/extractors/permute_test.py
index 232e5200a..f6faf9ed2 100644
--- a/model-optimizer/mo/front/caffe/extractors/permute_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/permute_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/power.py b/model-optimizer/mo/front/caffe/extractors/power.py
index 0f44824af..2a06da2f2 100644
--- a/model-optimizer/mo/front/caffe/extractors/power.py
+++ b/model-optimizer/mo/front/caffe/extractors/power.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/power_test.py b/model-optimizer/mo/front/caffe/extractors/power_test.py
index 5281bbb58..a39e5b23b 100644
--- a/model-optimizer/mo/front/caffe/extractors/power_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/power_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/relu.py b/model-optimizer/mo/front/caffe/extractors/relu.py
index 4e2ca8867..100b553aa 100644
--- a/model-optimizer/mo/front/caffe/extractors/relu.py
+++ b/model-optimizer/mo/front/caffe/extractors/relu.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/relu6.py b/model-optimizer/mo/front/caffe/extractors/relu6.py
index e66d3a608..6a3f92562 100644
--- a/model-optimizer/mo/front/caffe/extractors/relu6.py
+++ b/model-optimizer/mo/front/caffe/extractors/relu6.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/relu_test.py b/model-optimizer/mo/front/caffe/extractors/relu_test.py
index b80716694..aa4b7bf7f 100644
--- a/model-optimizer/mo/front/caffe/extractors/relu_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/relu_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/reshape.py b/model-optimizer/mo/front/caffe/extractors/reshape.py
index 13deb9908..c7893c13c 100644
--- a/model-optimizer/mo/front/caffe/extractors/reshape.py
+++ b/model-optimizer/mo/front/caffe/extractors/reshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/reshape_test.py b/model-optimizer/mo/front/caffe/extractors/reshape_test.py
index 4551eb733..8738d4433 100644
--- a/model-optimizer/mo/front/caffe/extractors/reshape_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/reshape_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/roipooling.py b/model-optimizer/mo/front/caffe/extractors/roipooling.py
index 8d6dc7c28..3a5629785 100644
--- a/model-optimizer/mo/front/caffe/extractors/roipooling.py
+++ b/model-optimizer/mo/front/caffe/extractors/roipooling.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/scale.py b/model-optimizer/mo/front/caffe/extractors/scale.py
index 196b7d56b..cc7e46f9b 100644
--- a/model-optimizer/mo/front/caffe/extractors/scale.py
+++ b/model-optimizer/mo/front/caffe/extractors/scale.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/scale_test.py b/model-optimizer/mo/front/caffe/extractors/scale_test.py
index 9258295b8..19cfd6239 100644
--- a/model-optimizer/mo/front/caffe/extractors/scale_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/scale_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/sigmoid.py b/model-optimizer/mo/front/caffe/extractors/sigmoid.py
index 5594c830b..851d599eb 100644
--- a/model-optimizer/mo/front/caffe/extractors/sigmoid.py
+++ b/model-optimizer/mo/front/caffe/extractors/sigmoid.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/slice.py b/model-optimizer/mo/front/caffe/extractors/slice.py
index 953f88c34..3927e99bc 100644
--- a/model-optimizer/mo/front/caffe/extractors/slice.py
+++ b/model-optimizer/mo/front/caffe/extractors/slice.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/slice_test.py b/model-optimizer/mo/front/caffe/extractors/slice_test.py
index 22b43b81e..b2a921597 100644
--- a/model-optimizer/mo/front/caffe/extractors/slice_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/slice_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/tanh.py b/model-optimizer/mo/front/caffe/extractors/tanh.py
index 97bfb89e0..9d75264a1 100644
--- a/model-optimizer/mo/front/caffe/extractors/tanh.py
+++ b/model-optimizer/mo/front/caffe/extractors/tanh.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/tile.py b/model-optimizer/mo/front/caffe/extractors/tile.py
index 63b4c56ff..f9d331988 100644
--- a/model-optimizer/mo/front/caffe/extractors/tile.py
+++ b/model-optimizer/mo/front/caffe/extractors/tile.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/utils.py b/model-optimizer/mo/front/caffe/extractors/utils.py
index 416598a33..32d0cef8c 100644
--- a/model-optimizer/mo/front/caffe/extractors/utils.py
+++ b/model-optimizer/mo/front/caffe/extractors/utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/extractors/utils_test.py b/model-optimizer/mo/front/caffe/extractors/utils_test.py
index 6983a0fdd..7a98511ca 100644
--- a/model-optimizer/mo/front/caffe/extractors/utils_test.py
+++ b/model-optimizer/mo/front/caffe/extractors/utils_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/loader.py b/model-optimizer/mo/front/caffe/loader.py
index 69f63f213..40dd09c86 100644
--- a/model-optimizer/mo/front/caffe/loader.py
+++ b/model-optimizer/mo/front/caffe/loader.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,14 +18,12 @@ import logging as log
 import mmap
 import os
 
-
-import networkx as nx
 import numpy as np
 from google.protobuf import text_format
 from google.protobuf.internal import api_implementation
 
 from mo.front.caffe.proto import caffe_pb2
-from mo.graph.graph import Node, unique_id
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error, FrameworkError
 from mo.utils.utils import refer_to_faq_msg
 
@@ -165,10 +163,10 @@ def caffe_pb_to_nx(proto, model):
 
     Returns
     ----------
-    nx.MultiDiGraph
+        Graph
         built NX Directed graph.
     """
-    graph = nx.MultiDiGraph()
+    graph = Graph()
     # Blobs in prototxt model can be reused by inplace layer.
     # This requires loading of pb layers in order and tracking the latest
     # layer that writes a particular blob.
@@ -282,7 +280,7 @@ def caffe_pb_to_nx(proto, model):
                 input_dims.append(np.array(list(dims), dtype=np.int64))
                 input_names.append(layer.name)
 
-        layer.name = unique_id(graph, layer.name)        
+        layer.name = graph.unique_id(layer.name)
         graph.add_node(layer.name, pb=layer, model_pb=model_layer, kind='op')
 
         # connect inputs based on blob_producers dictionary
@@ -307,27 +305,6 @@ def caffe_pb_to_nx(proto, model):
                 log.debug("Detected reuse of blob {} by layer {}".format(top, layer.name))
             blob_producers[top] = (layer.name, src_port)
 
-    # Find all nodes that do not have consumers.
-    # Add identity ops as a consumers for each output port for such nodes.
-    for node in list(graph.nodes()):
-        node = Node(graph, node)
-        if len(node.out_nodes()) == 0:
-            if not node.has_valid('pb') or not hasattr(node.pb, 'top'):
-                continue
-            for port, top in enumerate(node.pb.top):
-                new_id = unique_id(graph, 'TerminalIdentity_')
-                graph.add_node(new_id, op='Identity', type='Identity', kind='op')
-                edge_attrs = {
-                    'out': port,
-                    'in': 0,
-                    'name': top,
-                    'fw_tensor_debug_info': [(node.id, top)], # debug anchor for a framework tensor name and port
-                    'in_attrs': ['in', 'name'],
-                    'out_attrs': ['out', 'name'],
-                    'data_attrs': ['fw_tensor_debug_info']
-                }
-                graph.add_edge(node.id, new_id, **edge_attrs)
-
     if len(input_names) <= 0:
         raise Error('The topology contains no "input" layers. ' +
                     refer_to_faq_msg(79))
diff --git a/model-optimizer/mo/front/caffe/loader_test.py b/model-optimizer/mo/front/caffe/loader_test.py
index b61f6d37c..912873051 100644
--- a/model-optimizer/mo/front/caffe/loader_test.py
+++ b/model-optimizer/mo/front/caffe/loader_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -156,5 +156,5 @@ class TestLoader(unittest.TestCase):
         proto = caffe_pb2.NetParameter()
         text_format.Merge(proto_str_multi_input + proto_same_name_layers, proto)
         graph, input_shapes = caffe_pb_to_nx(proto, None)
-        # 6 nodes because: 2 inputs + 2 convolutions + 2 output nodes  
-        np.testing.assert_equal(len(graph.nodes()), 6)
+        # 6 nodes because: 2 inputs + 2 convolutions
+        np.testing.assert_equal(len(graph.nodes()), 4)
diff --git a/model-optimizer/mo/front/caffe/proto/caffe_pb2.py b/model-optimizer/mo/front/caffe/proto/caffe_pb2.py
index c32fa78c4..6e14d46ec 100644
--- a/model-optimizer/mo/front/caffe/proto/caffe_pb2.py
+++ b/model-optimizer/mo/front/caffe/proto/caffe_pb2.py
@@ -19,7 +19,7 @@ _sym_db = _symbol_database.Default()
 DESCRIPTOR = _descriptor.FileDescriptor(
   name='mo_caffe.proto',
   package='mo_caffe',
-  serialized_pb=_b('\n\x0emo_caffe.proto\x12\x08mo_caffe\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcf\x01\n\tBlobProto\x12\"\n\x05shape\x18\x07 \x01(\x0b\x32\x13.mo_caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"5\n\x0f\x42lobProtoVector\x12\"\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x13.mo_caffe.BlobProto\"M\n\x1e\x43osineSimilarityBatchParameter\x12\x14\n\tpos_label\x18\x01 \x01(\x01:\x01\x31\x12\x15\n\tneg_label\x18\x02 \x01(\x01:\x02-1\"\x81\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\"A\n\x0cLabelMapItem\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05label\x18\x02 \x01(\x05\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\"0\n\x08LabelMap\x12$\n\x04item\x18\x01 \x03(\x0b\x32\x16.mo_caffe.LabelMapItem\"\x87\x01\n\x0eNormalizedBBox\x12\x0c\n\x04xmin\x18\x01 \x01(\x02\x12\x0c\n\x04ymin\x18\x02 \x01(\x02\x12\x0c\n\x04xmax\x18\x03 \x01(\x02\x12\x0c\n\x04ymax\x18\x04 \x01(\x02\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x11\n\tdifficult\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x02\x12\x0c\n\x04size\x18\x08 \x01(\x02\"\xad\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x45\n\rvariance_norm\x18\x08 \x01(\x0e\x32&.mo_caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\x12\x0c\n\x04\x66ile\x18\t \x01(\t\x12\x10\n\x08\x64iag_val\x18\n \x03(\x02\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\xed\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12(\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x13.mo_caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12!\n\x05state\x18\x06 \x01(\x0b\x32\x12.mo_caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x0cprofile_info\x18\t \x01(\x08:\x05\x66\x61lse\x12\x18\n\x0cprofile_iter\x18\n \x01(\x05:\x02\x35\x30\x12\x1a\n\x0eprofile_warmup\x18\x0b \x01(\x05:\x02\x31\x30\x12\'\n\x05layer\x18\x64 \x03(\x0b\x32\x18.mo_caffe.LayerParameter\x12*\n\x06layers\x18\x02 \x03(\x0b\x32\x1a.mo_caffe.V1LayerParameter\"\xf4\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12)\n\tnet_param\x18\x19 \x01(\x0b\x32\x16.mo_caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12/\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x16.mo_caffe.NetParameter\x12.\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x16.mo_caffe.NetParameter\x12\'\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x12.mo_caffe.NetState\x12&\n\ntest_state\x18\x1b \x03(\x0b\x32\x12.mo_caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18  \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x17\n\x0fplateau_winsize\x18* \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12N\n\x0fsnapshot_format\x18% \x01(\x0e\x32(.mo_caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12>\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32$.mo_caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x14\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x05\x31\x65-08\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12>\n\x0bsolver_type\x18\x1e \x01(\x0e\x32$.mo_caffe.SolverParameter.SolverType:\x03SGD\x12\x1f\n\x11layer_wise_reduce\x18) \x01(\x08:\x04true\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"\xa8\x01\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12$\n\x07history\x18\x03 \x03(\x0b\x32\x13.mo_caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\x12\x1b\n\x0cminimum_loss\x18\x05 \x01(\x02:\x05\x31\x65+38\x12\x1a\n\x0fiter_last_event\x18\x06 \x01(\x05:\x01\x30\"Q\n\x08NetState\x12$\n\x05phase\x18\x01 \x01(\x0e\x32\x0f.mo_caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"v\n\x0cNetStateRule\x12\x1e\n\x05phase\x18\x01 \x01(\x0e\x32\x0f.mo_caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\"\xad\x02\n\x1bSpatialTransformerParameter\x12\x1e\n\x0etransform_type\x18\x01 \x01(\t:\x06\x61\x66\x66ine\x12\x1e\n\x0csampler_type\x18\x02 \x01(\t:\x08\x62ilinear\x12\x10\n\x08output_H\x18\x03 \x01(\x05\x12\x10\n\x08output_W\x18\x04 \x01(\x05\x12\x1b\n\rto_compute_dU\x18\x05 \x01(\x08:\x04true\x12\x11\n\ttheta_1_1\x18\x06 \x01(\x01\x12\x11\n\ttheta_1_2\x18\x07 \x01(\x01\x12\x11\n\ttheta_1_3\x18\x08 \x01(\x01\x12\x11\n\ttheta_2_1\x18\t \x01(\x01\x12\x11\n\ttheta_2_2\x18\n \x01(\x01\x12\x11\n\ttheta_2_3\x18\x0b \x01(\x01\x12\x1b\n\x0c\x64\x65_transform\x18\x0c \x01(\x08:\x05\x66\x61lse\"(\n\x12PowerFileParameter\x12\x12\n\nshift_file\x18\x01 \x01(\t\"5\n\x0fSTLossParameter\x12\x10\n\x08output_H\x18\x01 \x02(\x05\x12\x10\n\x08output_W\x18\x02 \x02(\x05\"%\n\x10LocLossParameter\x12\x11\n\tthreshold\x18\x01 \x02(\x01\"\xa6\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x34\n\nshare_mode\x18\x02 \x01(\x0e\x32 .mo_caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xf4#\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1e\n\x05phase\x18\n \x01(\x0e\x32\x0f.mo_caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\"\n\x05param\x18\x06 \x03(\x0b\x32\x13.mo_caffe.ParamSpec\x12\"\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x13.mo_caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12\'\n\x07include\x18\x08 \x03(\x0b\x32\x16.mo_caffe.NetStateRule\x12\'\n\x07\x65xclude\x18\t \x03(\x0b\x32\x16.mo_caffe.NetStateRule\x12:\n\x0ftransform_param\x18\x64 \x01(\x0b\x32!.mo_caffe.TransformationParameter\x12+\n\nloss_param\x18\x65 \x01(\x0b\x32\x17.mo_caffe.LossParameter\x12\x33\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x1b.mo_caffe.AccuracyParameter\x12/\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x19.mo_caffe.ArgMaxParameter\x12\x37\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x1c.mo_caffe.BatchNormParameter\x12,\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x17.mo_caffe.BiasParameter\x12I\n\x19\x63hannel_permutation_param\x18\x92? \x01(\x0b\x32%.mo_caffe.ChannelPermutationParameter\x12/\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x19.mo_caffe.ConcatParameter\x12\x42\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\".mo_caffe.ContrastiveLossParameter\x12\x39\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1e.mo_caffe.ConvolutionParameter\x12,\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x17.mo_caffe.CropParameter\x12\x39\n\x11\x63tc_decoder_param\x18\x95\x01 \x01(\x0b\x32\x1d.mo_caffe.CTCDecoderParameter\x12\x33\n\x0e\x63tc_loss_param\x18\x94\x01 \x01(\x0b\x32\x1a.mo_caffe.CTCLossParameter\x12+\n\ndata_param\x18k \x01(\x0b\x32\x17.mo_caffe.DataParameter\x12\x31\n\rdropout_param\x18l \x01(\x0b\x32\x1a.mo_caffe.DropoutParameter\x12\x36\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x1c.mo_caffe.DummyDataParameter\x12\x31\n\reltwise_param\x18n \x01(\x0b\x32\x1a.mo_caffe.EltwiseParameter\x12*\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x16.mo_caffe.ELUParameter\x12.\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x18.mo_caffe.EmbedParameter\x12)\n\texp_param\x18o \x01(\x0b\x32\x16.mo_caffe.ExpParameter\x12\x32\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x1a.mo_caffe.FlattenParameter\x12*\n\tgrn_param\x18\xd5\x01 \x01(\x0b\x32\x16.mo_caffe.GRNParameter\x12\x34\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x1b.mo_caffe.HDF5DataParameter\x12\x38\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1d.mo_caffe.HDF5OutputParameter\x12\x36\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x1c.mo_caffe.HingeLossParameter\x12\x36\n\x10image_data_param\x18s \x01(\x0b\x32\x1c.mo_caffe.ImageDataParameter\x12<\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1f.mo_caffe.InfogainLossParameter\x12<\n\x13inner_product_param\x18u \x01(\x0b\x32\x1f.mo_caffe.InnerProductParameter\x12.\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x18.mo_caffe.InputParameter\x12*\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x16.mo_caffe.LogParameter\x12)\n\tlrn_param\x18v \x01(\x0b\x32\x16.mo_caffe.LRNParameter\x12\x38\n\x11memory_data_param\x18w \x01(\x0b\x32\x1d.mo_caffe.MemoryDataParameter\x12)\n\tmvn_param\x18x \x01(\x0b\x32\x16.mo_caffe.MVNParameter\x12\x36\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x1c.mo_caffe.ParameterParameter\x12\x31\n\rpooling_param\x18y \x01(\x0b\x32\x1a.mo_caffe.PoolingParameter\x12\x32\n\rpermute_param\x18\x9a\x01 \x01(\x0b\x32\x1a.mo_caffe.PermuteParameter\x12-\n\x0bpower_param\x18z \x01(\x0b\x32\x18.mo_caffe.PowerParameter\x12.\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x18.mo_caffe.PReLUParameter\x12\x30\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x19.mo_caffe.PythonParameter\x12\x36\n\x0frecurrent_param\x18\x92\x01 \x01(\x0b\x32\x1c.mo_caffe.RecurrentParameter\x12\x36\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x1c.mo_caffe.ReductionParameter\x12+\n\nrelu_param\x18{ \x01(\x0b\x32\x17.mo_caffe.ReLUParameter\x12\x32\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x1a.mo_caffe.ReshapeParameter\x12\x32\n\rreverse_param\x18\x93\x01 \x01(\x0b\x32\x1a.mo_caffe.ReverseParameter\x12.\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x18.mo_caffe.ScaleParameter\x12\x31\n\rsigmoid_param\x18| \x01(\x0b\x32\x1a.mo_caffe.SigmoidParameter\x12\x31\n\rsoftmax_param\x18} \x01(\x0b\x32\x1a.mo_caffe.SoftmaxParameter\x12*\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x16.mo_caffe.SPPParameter\x12-\n\x0bslice_param\x18~ \x01(\x0b\x32\x18.mo_caffe.SliceParameter\x12+\n\ntanh_param\x18\x7f \x01(\x0b\x32\x17.mo_caffe.TanHParameter\x12\x36\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x1c.mo_caffe.ThresholdParameter\x12,\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x17.mo_caffe.TileParameter\x12\x39\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1d.mo_caffe.WindowDataParameter\x12\x38\n\x08st_param\x18\x96\x01 \x01(\x0b\x32%.mo_caffe.SpatialTransformerParameter\x12\x31\n\rst_loss_param\x18\x97\x01 \x01(\x0b\x32\x19.mo_caffe.STLossParameter\x12\x37\n\x10power_file_param\x18\x98\x01 \x01(\x0b\x32\x1c.mo_caffe.PowerFileParameter\x12\x33\n\x0eloc_loss_param\x18\x99\x01 \x01(\x0b\x32\x1a.mo_caffe.LocLossParameter\x12\x34\n\x0eproposal_param\x18\xc9\x01 \x01(\x0b\x32\x1b.mo_caffe.ProposalParameter\x12P\n\x1d\x63osine_similarity_batch_param\x18\xca\x01 \x01(\x0b\x32(.mo_caffe.CosineSimilarityBatchParameter\x12\x45\n\x0erss_loss_param\x18\xcb\x01 \x01(\x0b\x32,.mo_caffe.RandomSamplingSoftmaxLossParameter\x12\x31\n\nnorm_param\x18\xcc\x01 \x01(\x0b\x32\x1c.mo_caffe.NormalizeParameter\x12\x39\n\x11roi_warping_param\x18\xcd\x01 \x01(\x0b\x32\x1d.mo_caffe.ROIWarpingParameter\x12=\n\x13psroi_pooling_param\x18\xcf\x01 \x01(\x0b\x32\x1f.mo_caffe.PSROIPoolingParameter\x12\x39\n\x11roi_pooling_param\x18\xd0\x01 \x01(\x0b\x32\x1d.mo_caffe.ROIPoolingParameter\x12>\n\x14smooth_l1_loss_param\x18\xd1\x01 \x01(\x0b\x32\x1f.mo_caffe.SmoothL1LossParameter\x12\x46\n\x18\x62ox_annotator_ohem_param\x18\xd2\x01 \x01(\x0b\x32#.mo_caffe.BoxAnnotatorOHEMParameter\x12\x43\n\x16\x64\x65tection_output_param\x18\xd3\x01 \x01(\x0b\x32\".mo_caffe.DetectionOutputParameter\x12\x35\n\x0fprior_box_param\x18\xd4\x01 \x01(\x0b\x32\x1b.mo_caffe.PriorBoxParameter\x12\x39\n\x11region_yolo_param\x18\xd6\x01 \x01(\x0b\x32\x1d.mo_caffe.RegionYoloParameter\x12\x37\n\x10reorg_yolo_param\x18\xd7\x01 \x01(\x0b\x32\x1c.mo_caffe.ReorgYoloParameter\x12.\n\x0brelu6_param\x18\xd8\x01 \x01(\x0b\x32\x18.mo_caffe.ReLU6Parameter\x12\x30\n\x0cinterp_param\x18\xd9\x01 \x01(\x0b\x32\x19.mo_caffe.InterpParameter\x12<\n\x12\x61ugmentation_param\x18\xda\x01 \x01(\x0b\x32\x1f.mo_caffe.AugmentationParameter\x12:\n\x11\x63orrelation_param\x18\xdb\x01 \x01(\x0b\x32\x1e.mo_caffe.CorrelationParameter\x12\x34\n\x0eresample_param\x18\xdc\x01 \x01(\x0b\x32\x1b.mo_caffe.ResampleParameter\x12\x35\n\x0f\x66low_warp_param\x18\xdd\x01 \x01(\x0b\x32\x1b.mo_caffe.FlowWarpParameter\x12.\n\x0b\x61\x63\x63um_param\x18\xde\x01 \x01(\x0b\x32\x18.mo_caffe.AccumParameter\x12?\n\x14\x63oeff_schedule_param\x18\xdf\x01 \x01(\x0b\x32 .mo_caffe.CoeffScheduleParameter\x12\x41\n\x15shuffle_channel_param\x18\xe0\x01 \x01(\x0b\x32!.mo_caffe.ShuffleChannelParameter\"\x90\x01\n\x0fInterpParameter\x12\x11\n\x06height\x18\x01 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0bzoom_factor\x18\x03 \x01(\x05:\x01\x31\x12\x18\n\rshrink_factor\x18\x04 \x01(\x05:\x01\x31\x12\x12\n\x07pad_beg\x18\x05 \x01(\x05:\x01\x30\x12\x12\n\x07pad_end\x18\x06 \x01(\x05:\x01\x30\"n\n\"RandomSamplingSoftmaxLossParameter\x12 \n\x13random_sampling_num\x18\x01 \x01(\x05:\x03\x31\x30\x30\x12&\n\x16random_sampling_policy\x18\x02 \x01(\t:\x06random\"\xc8\x01\n\x11ProposalParameter\x12\x17\n\x0b\x66\x65\x61t_stride\x18\x01 \x01(\r:\x02\x31\x36\x12\x15\n\tbase_size\x18\x02 \x01(\r:\x02\x31\x36\x12\x14\n\x08min_size\x18\x03 \x01(\r:\x02\x31\x36\x12\r\n\x05ratio\x18\x04 \x03(\x02\x12\r\n\x05scale\x18\x05 \x03(\x02\x12\x1a\n\x0cpre_nms_topn\x18\x06 \x01(\r:\x04\x36\x30\x30\x30\x12\x1a\n\rpost_nms_topn\x18\x07 \x01(\r:\x03\x33\x30\x30\x12\x17\n\nnms_thresh\x18\x08 \x01(\x02:\x03\x30.7\"\x95\x01\n\x12NormalizeParameter\x12\x1c\n\x0e\x61\x63ross_spatial\x18\x01 \x01(\x08:\x04true\x12/\n\x0cscale_filler\x18\x02 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x1c\n\x0e\x63hannel_shared\x18\x03 \x01(\x08:\x04true\x12\x12\n\x03\x65ps\x18\x04 \x01(\x02:\x05\x31\x65-10\"!\n\x10PermuteParameter\x12\r\n\x05order\x18\x01 \x03(\r\"\xb6\x01\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\"\xb4\x02\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12G\n\rnormalization\x18\x03 \x01(\x0e\x32).mo_caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\x12\x1f\n\x14pre_fixed_normalizer\x18\x04 \x01(\x02:\x01\x31\x12$\n\x15weight_by_label_freqs\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x17\n\x0f\x63lass_weighting\x18\x06 \x03(\x02\"Q\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\r\n\tPRE_FIXED\x10\x03\x12\x08\n\x04NONE\x10\x04\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"D\n\x18\x43hannelPermutationAction\x12\x0c\n\x04\x63han\x18\x01 \x02(\r\x12\x0c\n\x04\x63opy\x18\x02 \x01(\r\x12\x0c\n\x04\x66ill\x18\x03 \x01(\x02\"\x9a\x01\n\x1b\x43hannelPermutationParameter\x12\x32\n\x06\x61\x63tion\x18\x01 \x03(\x0b\x32\".mo_caffe.ChannelPermutationAction\x12\x12\n\nnum_output\x18\x10 \x02(\r\x12\x1f\n\x10inplace_possible\x18\x11 \x01(\x08:\x05\x66\x61lse\x12\x12\n\x07version\x18\x12 \x01(\x05:\x01\x30\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"j\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12&\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x05\x30.999\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x31\x65-05\"J\n\x19\x42oxAnnotatorOHEMParameter\x12\x13\n\x0broi_per_img\x18\x01 \x02(\r\x12\x18\n\x0cignore_label\x18\x02 \x01(\x05:\x02-1\"`\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12)\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\x85\x04\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12\x30\n\rweight_filler\x18\x07 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12.\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12>\n\x06\x65ngine\x18\x0f \x01(\x0e\x32%.mo_caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"A\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\x12\x0f\n\x07\x64imsize\x18\x03 \x03(\r\"P\n\x13\x43TCDecoderParameter\x12\x17\n\x0b\x62lank_index\x18\x01 \x01(\x05:\x02-1\x12 \n\x12\x63tc_merge_repeated\x18\x02 \x01(\x08:\x04true\"\xb2\x01\n\x10\x43TCLossParameter\x12\x17\n\x0coutput_delay\x18\x01 \x01(\x05:\x01\x30\x12\x17\n\x0b\x62lank_index\x18\x02 \x01(\x05:\x02-1\x12+\n\x1cpreprocess_collapse_repeated\x18\x03 \x01(\x08:\x05\x66\x61lse\x12 \n\x12\x63tc_merge_repeated\x18\x04 \x01(\x08:\x04true\x12\x1d\n\x12loss_calculation_t\x18\x05 \x01(\x05:\x01\x30\"\xa7\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x34\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x1a.mo_caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x34\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\"[\n\x1eNonMaximumSuppressionParameter\x12\x1a\n\rnms_threshold\x18\x01 \x01(\x02:\x03\x30.3\x12\r\n\x05top_k\x18\x02 \x01(\x05\x12\x0e\n\x03\x65ta\x18\x03 \x01(\x02:\x01\x31\"\x99\x04\n\x0fResizeParameter\x12\x0f\n\x04prob\x18\x01 \x01(\x02:\x01\x31\x12@\n\x0bresize_mode\x18\x02 \x01(\x0e\x32%.mo_caffe.ResizeParameter.Resize_mode:\x04WARP\x12\x11\n\x06height\x18\x03 \x01(\r:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\r:\x01\x30\x12\x17\n\x0cheight_scale\x18\x08 \x01(\r:\x01\x30\x12\x16\n\x0bwidth_scale\x18\t \x01(\r:\x01\x30\x12>\n\x08pad_mode\x18\x05 \x01(\x0e\x32\".mo_caffe.ResizeParameter.Pad_mode:\x08\x43ONSTANT\x12\x11\n\tpad_value\x18\x06 \x03(\x02\x12:\n\x0binterp_mode\x18\x07 \x03(\x0e\x32%.mo_caffe.ResizeParameter.Interp_mode\"G\n\x0bResize_mode\x12\x08\n\x04WARP\x10\x01\x12\x12\n\x0e\x46IT_SMALL_SIZE\x10\x02\x12\x1a\n\x16\x46IT_LARGE_SIZE_AND_PAD\x10\x03\":\n\x08Pad_mode\x12\x0c\n\x08\x43ONSTANT\x10\x01\x12\x0c\n\x08MIRRORED\x10\x02\x12\x12\n\x0eREPEAT_NEAREST\x10\x03\"I\n\x0bInterp_mode\x12\n\n\x06LINEAR\x10\x01\x12\x08\n\x04\x41REA\x10\x02\x12\x0b\n\x07NEAREST\x10\x03\x12\t\n\x05\x43UBIC\x10\x04\x12\x0c\n\x08LANCZOS4\x10\x05\"\xdb\x01\n\x13SaveOutputParameter\x12\x18\n\x10output_directory\x18\x01 \x01(\t\x12\x1a\n\x12output_name_prefix\x18\x02 \x01(\t\x12\x15\n\routput_format\x18\x03 \x01(\t\x12\x16\n\x0elabel_map_file\x18\x04 \x01(\t\x12\x16\n\x0ename_size_file\x18\x05 \x01(\t\x12\x16\n\x0enum_test_image\x18\x06 \x01(\r\x12/\n\x0cresize_param\x18\x07 \x01(\x0b\x32\x19.mo_caffe.ResizeParameter\"\x9d\x04\n\x18\x44\x65tectionOutputParameter\x12\x13\n\x0bnum_classes\x18\x01 \x01(\r\x12\x1c\n\x0eshare_location\x18\x02 \x01(\x08:\x04true\x12\x1e\n\x13\x62\x61\x63kground_label_id\x18\x03 \x01(\x05:\x01\x30\x12;\n\tnms_param\x18\x04 \x01(\x0b\x32(.mo_caffe.NonMaximumSuppressionParameter\x12\x38\n\x11save_output_param\x18\x05 \x01(\x0b\x32\x1d.mo_caffe.SaveOutputParameter\x12?\n\tcode_type\x18\x06 \x01(\x0e\x32$.mo_caffe.PriorBoxParameter.CodeType:\x06\x43ORNER\x12)\n\x1avariance_encoded_in_target\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x16\n\nkeep_top_k\x18\x07 \x01(\x05:\x02-1\x12\x1c\n\x14\x63onfidence_threshold\x18\t \x01(\x02\x12\x18\n\tvisualize\x18\n \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x13visualize_threshold\x18\x0b \x01(\x02\x12\x11\n\tsave_file\x18\x0c \x01(\t\x12\x17\n\x0binput_width\x18\r \x01(\x05:\x02-1\x12\x18\n\x0cinput_height\x18\x0e \x01(\x05:\x02-1\x12\x18\n\nnormalized\x18\x0f \x01(\x08:\x04true\".\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\"\xa6\x01\n\x12\x44ummyDataParameter\x12.\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x19.mo_caffe.FillerParameter\x12\"\n\x05shape\x18\x06 \x03(\x0b\x32\x13.mo_caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa8\x01\n\x10\x45ltwiseParameter\x12<\n\toperation\x18\x01 \x01(\x0e\x32$.mo_caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xb2\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12\x30\n\rweight_filler\x18\x04 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12.\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"a\n\x12HingeLossParameter\x12\x33\n\x04norm\x18\x01 \x01(\x0e\x32!.mo_caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"\'\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\"\xd1\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x30\n\rweight_filler\x18\x03 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12.\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"4\n\x0eInputParameter\x12\"\n\x05shape\x18\x01 \x03(\x0b\x32\x13.mo_caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xbe\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12G\n\x0bnorm_region\x18\x04 \x01(\x0e\x32!.mo_caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x36\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1d.mo_caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\x1f\n\x0cGRNParameter\x12\x0f\n\x04\x62ias\x18\x01 \x01(\x02:\x01\x31\"Z\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\"d\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x31\x65-09\"8\n\x12ParameterParameter\x12\"\n\x05shape\x18\x01 \x01(\x0b\x32\x13.mo_caffe.BlobShape\"\xc1\x03\n\x10PoolingParameter\x12\x38\n\x04pool\x18\x01 \x01(\x0e\x32%.mo_caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12:\n\x06\x65ngine\x18\x0b \x01(\x0e\x32!.mo_caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x17\n\tceil_mode\x18\r \x01(\x08:\x04true\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xd4\x02\n\x11PriorBoxParameter\x12\x10\n\x08min_size\x18\x01 \x03(\x02\x12\x10\n\x08max_size\x18\x02 \x03(\x02\x12\x14\n\x0c\x61spect_ratio\x18\x03 \x03(\x02\x12\x12\n\x04\x66lip\x18\x04 \x01(\x08:\x04true\x12\x13\n\x04\x63lip\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x08variance\x18\x06 \x03(\x02\x12\x10\n\x08img_size\x18\x07 \x01(\r\x12\r\n\x05img_h\x18\x08 \x01(\r\x12\r\n\x05img_w\x18\t \x01(\r\x12\x0c\n\x04step\x18\n \x01(\x02\x12\x0e\n\x06step_h\x18\x0b \x01(\x02\x12\x0e\n\x06step_w\x18\x0c \x01(\x02\x12\x13\n\x06offset\x18\r \x01(\x02:\x03\x30.5\x12\r\n\x05width\x18\x0e \x03(\x02\x12\x0e\n\x06height\x18\x0f \x03(\x02\"8\n\x08\x43odeType\x12\n\n\x06\x43ORNER\x10\x01\x12\x0f\n\x0b\x43\x45NTER_SIZE\x10\x02\x12\x0f\n\x0b\x43ORNER_SIZE\x10\x03\"V\n\x15PSROIPoolingParameter\x12\x15\n\rspatial_scale\x18\x01 \x02(\x02\x12\x12\n\noutput_dim\x18\x02 \x02(\x05\x12\x12\n\ngroup_size\x18\x03 \x02(\x05\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xc6\x01\n\x12RecurrentParameter\x12\x15\n\nnum_output\x18\x01 \x01(\r:\x01\x30\x12\x30\n\rweight_filler\x18\x02 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12.\n\x0b\x62ias_filler\x18\x03 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x19\n\ndebug_info\x18\x04 \x01(\x08:\x05\x66\x61lse\x12\x1c\n\rexpose_hidden\x18\x05 \x01(\x08:\x05\x66\x61lse\"\xb0\x01\n\x12ReductionParameter\x12@\n\toperation\x18\x01 \x01(\x0e\x32(.mo_caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x90\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x37\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1e.mo_caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\x1e\n\x0eReLU6Parameter\x12\x0c\n\x01n\x18\x01 \x01(\x02:\x01\x36\"]\n\x10ReshapeParameter\x12\"\n\x05shape\x18\x01 \x01(\x0b\x32\x13.mo_caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"#\n\x10ReverseParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x30\"Y\n\x13ROIPoolingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"]\n\x17ROIWarpingTestParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"Y\n\x13ROIWarpingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"\xab\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12)\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12.\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\"{\n\x10SigmoidParameter\x12:\n\x06\x65ngine\x18\x01 \x01(\x0e\x32!.mo_caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\")\n\x15SmoothL1LossParameter\x12\x10\n\x05sigma\x18\x01 \x01(\x02:\x01\x31\"\x8c\x01\n\x10SoftmaxParameter\x12:\n\x06\x65ngine\x18\x01 \x01(\x0e\x32!.mo_caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"u\n\rTanHParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.mo_caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"/\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xf1\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x34\n\x04pool\x18\x02 \x01(\x0e\x32!.mo_caffe.SPPParameter.PoolMethod:\x03MAX\x12\x36\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1d.mo_caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xcc\x14\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12\'\n\x07include\x18  \x03(\x0b\x32\x16.mo_caffe.NetStateRule\x12\'\n\x07\x65xclude\x18! \x03(\x0b\x32\x16.mo_caffe.NetStateRule\x12\x32\n\x04type\x18\x05 \x01(\x0e\x32$.mo_caffe.V1LayerParameter.LayerType\x12\"\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x13.mo_caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12\x41\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32\'.mo_caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x33\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x1b.mo_caffe.AccuracyParameter\x12/\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x19.mo_caffe.ArgMaxParameter\x12/\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x19.mo_caffe.ConcatParameter\x12\x42\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\".mo_caffe.ContrastiveLossParameter\x12\x39\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1e.mo_caffe.ConvolutionParameter\x12+\n\ndata_param\x18\x0b \x01(\x0b\x32\x17.mo_caffe.DataParameter\x12\x31\n\rdropout_param\x18\x0c \x01(\x0b\x32\x1a.mo_caffe.DropoutParameter\x12\x36\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x1c.mo_caffe.DummyDataParameter\x12\x31\n\reltwise_param\x18\x18 \x01(\x0b\x32\x1a.mo_caffe.EltwiseParameter\x12)\n\texp_param\x18) \x01(\x0b\x32\x16.mo_caffe.ExpParameter\x12\x34\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x1b.mo_caffe.HDF5DataParameter\x12\x38\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1d.mo_caffe.HDF5OutputParameter\x12\x36\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x1c.mo_caffe.HingeLossParameter\x12\x36\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x1c.mo_caffe.ImageDataParameter\x12<\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1f.mo_caffe.InfogainLossParameter\x12<\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1f.mo_caffe.InnerProductParameter\x12)\n\tlrn_param\x18\x12 \x01(\x0b\x32\x16.mo_caffe.LRNParameter\x12\x38\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1d.mo_caffe.MemoryDataParameter\x12)\n\tmvn_param\x18\" \x01(\x0b\x32\x16.mo_caffe.MVNParameter\x12\x31\n\rpooling_param\x18\x13 \x01(\x0b\x32\x1a.mo_caffe.PoolingParameter\x12-\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x18.mo_caffe.PowerParameter\x12+\n\nrelu_param\x18\x1e \x01(\x0b\x32\x17.mo_caffe.ReLUParameter\x12\x31\n\rsigmoid_param\x18& \x01(\x0b\x32\x1a.mo_caffe.SigmoidParameter\x12\x31\n\rsoftmax_param\x18\' \x01(\x0b\x32\x1a.mo_caffe.SoftmaxParameter\x12-\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x18.mo_caffe.SliceParameter\x12+\n\ntanh_param\x18% \x01(\x0b\x32\x17.mo_caffe.TanHParameter\x12\x35\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x1c.mo_caffe.ThresholdParameter\x12\x38\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1d.mo_caffe.WindowDataParameter\x12:\n\x0ftransform_param\x18$ \x01(\x0b\x32!.mo_caffe.TransformationParameter\x12+\n\nloss_param\x18* \x01(\x0b\x32\x17.mo_caffe.LossParameter\x12)\n\x05layer\x18\x01 \x01(\x0b\x32\x1a.mo_caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\x8c\x08\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12\x30\n\rweight_filler\x18\x05 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12.\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x38\n\x04pool\x18\x0b \x01(\x0e\x32%.mo_caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\"\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x13.mo_caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x39\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1d.mo_caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"Z\n\x0ePReLUParameter\x12)\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse\"\x86\x01\n\x13RegionYoloParameter\x12\x11\n\x06\x63oords\x18\x01 \x01(\x05:\x01\x34\x12\x13\n\x07\x63lasses\x18\x02 \x01(\x05:\x02\x32\x30\x12\x0e\n\x03num\x18\x03 \x01(\x05:\x01\x31\x12\x18\n\ndo_softmax\x18\x04 \x01(\x08:\x04true\x12\x0f\n\x07\x61nchors\x18\x05 \x03(\x02\x12\x0c\n\x04mask\x18\x06 \x03(\x05\"\'\n\x12ReorgYoloParameter\x12\x11\n\x06stride\x18\x01 \x01(\x05:\x01\x31\"\xcf\x01\n\x18RandomGeneratorParameter\x12\x1a\n\trand_type\x18\x01 \x01(\t:\x07uniform\x12\x12\n\x03\x65xp\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x0f\n\x04mean\x18\x04 \x01(\x02:\x01\x30\x12\x11\n\x06spread\x18\x05 \x01(\x02:\x01\x30\x12\x0f\n\x04prob\x18\x06 \x01(\x02:\x01\x31\x12\x1c\n\x0e\x61pply_schedule\x18\x07 \x01(\x08:\x04true\x12\x19\n\ndiscretize\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nmultiplier\x18\t \x01(\x02:\x01\x31\"`\n\x16\x43oeffScheduleParameter\x12\x14\n\thalf_life\x18\x01 \x01(\x02:\x01\x31\x12\x18\n\rinitial_coeff\x18\x02 \x01(\x02:\x01\x31\x12\x16\n\x0b\x66inal_coeff\x18\x03 \x01(\x02:\x01\x31\"\xde\x07\n\x11\x41ugmentationCoeff\x12\x11\n\x06mirror\x18\x01 \x01(\x02:\x01\x30\x12\r\n\x02\x64x\x18\x02 \x01(\x02:\x01\x30\x12\r\n\x02\x64y\x18\x03 \x01(\x02:\x01\x30\x12\x10\n\x05\x61ngle\x18\x04 \x01(\x02:\x01\x30\x12\x11\n\x06zoom_x\x18\x05 \x01(\x02:\x01\x31\x12\x11\n\x06zoom_y\x18\x06 \x01(\x02:\x01\x31\x12\x10\n\x05gamma\x18\x64 \x01(\x02:\x01\x31\x12\x15\n\nbrightness\x18\x65 \x01(\x02:\x01\x30\x12\x13\n\x08\x63ontrast\x18\x66 \x01(\x02:\x01\x31\x12\x11\n\x06\x63olor1\x18g \x01(\x02:\x01\x31\x12\x11\n\x06\x63olor2\x18h \x01(\x02:\x01\x31\x12\x11\n\x06\x63olor3\x18i \x01(\x02:\x01\x31\x12\x16\n\x0bpow_nomean0\x18\n \x01(\x02:\x01\x31\x12\x16\n\x0bpow_nomean1\x18\x0b \x01(\x02:\x01\x31\x12\x16\n\x0bpow_nomean2\x18\x0c \x01(\x02:\x01\x31\x12\x16\n\x0b\x61\x64\x64_nomean0\x18\r \x01(\x02:\x01\x30\x12\x16\n\x0b\x61\x64\x64_nomean1\x18\x0e \x01(\x02:\x01\x30\x12\x16\n\x0b\x61\x64\x64_nomean2\x18\x0f \x01(\x02:\x01\x30\x12\x17\n\x0cmult_nomean0\x18\x10 \x01(\x02:\x01\x31\x12\x17\n\x0cmult_nomean1\x18\x11 \x01(\x02:\x01\x31\x12\x17\n\x0cmult_nomean2\x18\x12 \x01(\x02:\x01\x31\x12\x18\n\rpow_withmean0\x18\x13 \x01(\x02:\x01\x31\x12\x18\n\rpow_withmean1\x18\x14 \x01(\x02:\x01\x31\x12\x18\n\rpow_withmean2\x18\x15 \x01(\x02:\x01\x31\x12\x18\n\radd_withmean0\x18\x16 \x01(\x02:\x01\x30\x12\x18\n\radd_withmean1\x18\x17 \x01(\x02:\x01\x30\x12\x18\n\radd_withmean2\x18\x18 \x01(\x02:\x01\x30\x12\x19\n\x0emult_withmean0\x18\x19 \x01(\x02:\x01\x31\x12\x19\n\x0emult_withmean1\x18\x1a \x01(\x02:\x01\x31\x12\x19\n\x0emult_withmean2\x18\x1b \x01(\x02:\x01\x31\x12\x14\n\tlmult_pow\x18\x1c \x01(\x02:\x01\x31\x12\x14\n\tlmult_add\x18\x1d \x01(\x02:\x01\x30\x12\x15\n\nlmult_mult\x18\x1e \x01(\x02:\x01\x31\x12\x14\n\tcol_angle\x18\x1f \x01(\x02:\x01\x30\x12\x15\n\nfog_amount\x18& \x01(\x02:\x01\x30\x12\x13\n\x08\x66og_size\x18\' \x01(\x02:\x01\x30\x12\x1c\n\x11motion_blur_angle\x18( \x01(\x02:\x01\x30\x12\x1b\n\x10motion_blur_size\x18) \x01(\x02:\x01\x30\x12\x17\n\x0cshadow_angle\x18* \x01(\x02:\x01\x30\x12\x1a\n\x0fshadow_distance\x18+ \x01(\x02:\x01\x30\x12\x1a\n\x0fshadow_strength\x18, \x01(\x02:\x01\x30\x12\x10\n\x05noise\x18- \x01(\x02:\x01\x30\"\xcc\x10\n\x15\x41ugmentationParameter\x12\x15\n\ncrop_width\x18! \x01(\r:\x01\x30\x12\x16\n\x0b\x63rop_height\x18\" \x01(\r:\x01\x30\x12\x19\n\x0fwrite_augmented\x18\x02 \x01(\t:\x00\x12\x1b\n\x0emax_multiplier\x18\x03 \x01(\x02:\x03\x32\x35\x35\x12\"\n\x13\x61ugment_during_test\x18\x04 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0erecompute_mean\x18\x05 \x01(\r:\x01\x30\x12\x14\n\nwrite_mean\x18\x06 \x01(\t:\x00\x12\x1c\n\x0emean_per_pixel\x18\x07 \x01(\x08:\x04true\x12\x0c\n\x04mean\x18\x12 \x03(\x02\x12\x11\n\x04mode\x18\x08 \x01(\t:\x03\x61\x64\x64\x12\x16\n\x0b\x62ottomwidth\x18P \x01(\r:\x01\x30\x12\x17\n\x0c\x62ottomheight\x18Q \x01(\r:\x01\x30\x12\x0e\n\x03num\x18R \x01(\r:\x01\x30\x12\x18\n\x10\x63hromatic_eigvec\x18S \x03(\x02\x12\x32\n\x06mirror\x18\n \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x35\n\ttranslate\x18\x0b \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x32\n\x06rotate\x18\x0c \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x30\n\x04zoom\x18\r \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x33\n\x07squeeze\x18\x0e \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x37\n\x0btranslate_x\x18\x0f \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x37\n\x0btranslate_y\x18\x10 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x31\n\x05gamma\x18# \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x36\n\nbrightness\x18$ \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08\x63ontrast\x18% \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x31\n\x05\x63olor\x18& \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x35\n\tlmult_pow\x18\x14 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x36\n\nlmult_mult\x18\x15 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x35\n\tlmult_add\x18\x16 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x33\n\x07sat_pow\x18\x17 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08sat_mult\x18\x18 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x33\n\x07sat_add\x18\x19 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x33\n\x07\x63ol_pow\x18\x1a \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08\x63ol_mult\x18\x1b \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x33\n\x07\x63ol_add\x18\x1c \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08ladd_pow\x18\x1d \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x35\n\tladd_mult\x18\x1e \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08ladd_add\x18\x1f \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x36\n\ncol_rotate\x18  \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x36\n\nfog_amount\x18\x64 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08\x66og_size\x18\x65 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12=\n\x11motion_blur_angle\x18\x66 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12<\n\x10motion_blur_size\x18g \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x38\n\x0cshadow_angle\x18h \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12;\n\x0fshadow_distance\x18i \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12;\n\x0fshadow_strength\x18j \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x31\n\x05noise\x18k \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\"\x85\x01\n\x11\x46lowWarpParameter\x12\x43\n\nfill_value\x18\x01 \x01(\x0e\x32).mo_caffe.FlowWarpParameter.FillParameter:\x04ZERO\"+\n\rFillParameter\x12\x08\n\x04ZERO\x10\x01\x12\x10\n\x0cNOT_A_NUMBER\x10\x02\"\xb6\x02\n\x14\x43orrelationParameter\x12\x0e\n\x03pad\x18\x02 \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x03 \x01(\r\x12\x18\n\x10max_displacement\x18\x04 \x01(\r\x12\x13\n\x08stride_1\x18\x05 \x01(\r:\x01\x31\x12\x13\n\x08stride_2\x18\x06 \x01(\r:\x01\x31\x12\x1b\n\x10single_direction\x18\x08 \x01(\x05:\x01\x30\x12\x15\n\x06\x64o_abs\x18\x07 \x01(\x08:\x05\x66\x61lse\x12R\n\x10\x63orrelation_type\x18\x0f \x01(\x0e\x32..mo_caffe.CorrelationParameter.CorrelationType:\x08MULTIPLY\"-\n\x0f\x43orrelationType\x12\x0c\n\x08MULTIPLY\x10\x00\x12\x0c\n\x08SUBTRACT\x10\x01\"\xdc\x01\n\x11ResampleParameter\x12\x17\n\tantialias\x18\x04 \x01(\x08:\x04true\x12\r\n\x05width\x18\x01 \x01(\r\x12\x0e\n\x06height\x18\x02 \x01(\r\x12>\n\x04type\x18\x03 \x01(\x0e\x32(.mo_caffe.ResampleParameter.ResampleType:\x06LINEAR\x12\x11\n\x06\x66\x61\x63tor\x18\x05 \x01(\x02:\x01\x31\"<\n\x0cResampleType\x12\x0b\n\x07NEAREST\x10\x01\x12\n\n\x06LINEAR\x10\x02\x12\t\n\x05\x43UBIC\x10\x03\x12\x08\n\x04\x41REA\x10\x04\"z\n\x0e\x41\x63\x63umParameter\x12\x15\n\ntop_height\x18\x01 \x01(\r:\x01\x30\x12\x14\n\ttop_width\x18\x02 \x01(\r:\x01\x30\x12\x1c\n\x11size_divisible_by\x18\x03 \x01(\r:\x01\x30\x12\x1d\n\x0ehave_reference\x18\x04 \x01(\x08:\x05\x66\x61lse\"(\n\x17ShuffleChannelParameter\x12\r\n\x05group\x18\x01 \x02(\r*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01')
+  serialized_pb=_b('\n\x0emo_caffe.proto\x12\x08mo_caffe\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcf\x01\n\tBlobProto\x12\"\n\x05shape\x18\x07 \x01(\x0b\x32\x13.mo_caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"5\n\x0f\x42lobProtoVector\x12\"\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x13.mo_caffe.BlobProto\"M\n\x1e\x43osineSimilarityBatchParameter\x12\x14\n\tpos_label\x18\x01 \x01(\x01:\x01\x31\x12\x15\n\tneg_label\x18\x02 \x01(\x01:\x02-1\"\x81\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\"A\n\x0cLabelMapItem\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05label\x18\x02 \x01(\x05\x12\x14\n\x0c\x64isplay_name\x18\x03 \x01(\t\"0\n\x08LabelMap\x12$\n\x04item\x18\x01 \x03(\x0b\x32\x16.mo_caffe.LabelMapItem\"\x87\x01\n\x0eNormalizedBBox\x12\x0c\n\x04xmin\x18\x01 \x01(\x02\x12\x0c\n\x04ymin\x18\x02 \x01(\x02\x12\x0c\n\x04xmax\x18\x03 \x01(\x02\x12\x0c\n\x04ymax\x18\x04 \x01(\x02\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x11\n\tdifficult\x18\x06 \x01(\x08\x12\r\n\x05score\x18\x07 \x01(\x02\x12\x0c\n\x04size\x18\x08 \x01(\x02\"\xad\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x45\n\rvariance_norm\x18\x08 \x01(\x0e\x32&.mo_caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\x12\x0c\n\x04\x66ile\x18\t \x01(\t\x12\x10\n\x08\x64iag_val\x18\n \x03(\x02\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\xed\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12(\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x13.mo_caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12!\n\x05state\x18\x06 \x01(\x0b\x32\x12.mo_caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x0cprofile_info\x18\t \x01(\x08:\x05\x66\x61lse\x12\x18\n\x0cprofile_iter\x18\n \x01(\x05:\x02\x35\x30\x12\x1a\n\x0eprofile_warmup\x18\x0b \x01(\x05:\x02\x31\x30\x12\'\n\x05layer\x18\x64 \x03(\x0b\x32\x18.mo_caffe.LayerParameter\x12*\n\x06layers\x18\x02 \x03(\x0b\x32\x1a.mo_caffe.V1LayerParameter\"\xf4\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12)\n\tnet_param\x18\x19 \x01(\x0b\x32\x16.mo_caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12/\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x16.mo_caffe.NetParameter\x12.\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x16.mo_caffe.NetParameter\x12\'\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x12.mo_caffe.NetState\x12&\n\ntest_state\x18\x1b \x03(\x0b\x32\x12.mo_caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18  \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x17\n\x0fplateau_winsize\x18* \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12N\n\x0fsnapshot_format\x18% \x01(\x0e\x32(.mo_caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12>\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32$.mo_caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x14\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x05\x31\x65-08\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12>\n\x0bsolver_type\x18\x1e \x01(\x0e\x32$.mo_caffe.SolverParameter.SolverType:\x03SGD\x12\x1f\n\x11layer_wise_reduce\x18) \x01(\x08:\x04true\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"\xa8\x01\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12$\n\x07history\x18\x03 \x03(\x0b\x32\x13.mo_caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\x12\x1b\n\x0cminimum_loss\x18\x05 \x01(\x02:\x05\x31\x65+38\x12\x1a\n\x0fiter_last_event\x18\x06 \x01(\x05:\x01\x30\"Q\n\x08NetState\x12$\n\x05phase\x18\x01 \x01(\x0e\x32\x0f.mo_caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"v\n\x0cNetStateRule\x12\x1e\n\x05phase\x18\x01 \x01(\x0e\x32\x0f.mo_caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\"\xad\x02\n\x1bSpatialTransformerParameter\x12\x1e\n\x0etransform_type\x18\x01 \x01(\t:\x06\x61\x66\x66ine\x12\x1e\n\x0csampler_type\x18\x02 \x01(\t:\x08\x62ilinear\x12\x10\n\x08output_H\x18\x03 \x01(\x05\x12\x10\n\x08output_W\x18\x04 \x01(\x05\x12\x1b\n\rto_compute_dU\x18\x05 \x01(\x08:\x04true\x12\x11\n\ttheta_1_1\x18\x06 \x01(\x01\x12\x11\n\ttheta_1_2\x18\x07 \x01(\x01\x12\x11\n\ttheta_1_3\x18\x08 \x01(\x01\x12\x11\n\ttheta_2_1\x18\t \x01(\x01\x12\x11\n\ttheta_2_2\x18\n \x01(\x01\x12\x11\n\ttheta_2_3\x18\x0b \x01(\x01\x12\x1b\n\x0c\x64\x65_transform\x18\x0c \x01(\x08:\x05\x66\x61lse\"(\n\x12PowerFileParameter\x12\x12\n\nshift_file\x18\x01 \x01(\t\"5\n\x0fSTLossParameter\x12\x10\n\x08output_H\x18\x01 \x02(\x05\x12\x10\n\x08output_W\x18\x02 \x02(\x05\"%\n\x10LocLossParameter\x12\x11\n\tthreshold\x18\x01 \x02(\x01\"\xa6\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x34\n\nshare_mode\x18\x02 \x01(\x0e\x32 .mo_caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xf4#\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1e\n\x05phase\x18\n \x01(\x0e\x32\x0f.mo_caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\"\n\x05param\x18\x06 \x03(\x0b\x32\x13.mo_caffe.ParamSpec\x12\"\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x13.mo_caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12\'\n\x07include\x18\x08 \x03(\x0b\x32\x16.mo_caffe.NetStateRule\x12\'\n\x07\x65xclude\x18\t \x03(\x0b\x32\x16.mo_caffe.NetStateRule\x12:\n\x0ftransform_param\x18\x64 \x01(\x0b\x32!.mo_caffe.TransformationParameter\x12+\n\nloss_param\x18\x65 \x01(\x0b\x32\x17.mo_caffe.LossParameter\x12\x33\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x1b.mo_caffe.AccuracyParameter\x12/\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x19.mo_caffe.ArgMaxParameter\x12\x37\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x1c.mo_caffe.BatchNormParameter\x12,\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x17.mo_caffe.BiasParameter\x12I\n\x19\x63hannel_permutation_param\x18\x92? \x01(\x0b\x32%.mo_caffe.ChannelPermutationParameter\x12/\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x19.mo_caffe.ConcatParameter\x12\x42\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\".mo_caffe.ContrastiveLossParameter\x12\x39\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1e.mo_caffe.ConvolutionParameter\x12,\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x17.mo_caffe.CropParameter\x12\x39\n\x11\x63tc_decoder_param\x18\x95\x01 \x01(\x0b\x32\x1d.mo_caffe.CTCDecoderParameter\x12\x33\n\x0e\x63tc_loss_param\x18\x94\x01 \x01(\x0b\x32\x1a.mo_caffe.CTCLossParameter\x12+\n\ndata_param\x18k \x01(\x0b\x32\x17.mo_caffe.DataParameter\x12\x31\n\rdropout_param\x18l \x01(\x0b\x32\x1a.mo_caffe.DropoutParameter\x12\x36\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x1c.mo_caffe.DummyDataParameter\x12\x31\n\reltwise_param\x18n \x01(\x0b\x32\x1a.mo_caffe.EltwiseParameter\x12*\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x16.mo_caffe.ELUParameter\x12.\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x18.mo_caffe.EmbedParameter\x12)\n\texp_param\x18o \x01(\x0b\x32\x16.mo_caffe.ExpParameter\x12\x32\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x1a.mo_caffe.FlattenParameter\x12*\n\tgrn_param\x18\xd5\x01 \x01(\x0b\x32\x16.mo_caffe.GRNParameter\x12\x34\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x1b.mo_caffe.HDF5DataParameter\x12\x38\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1d.mo_caffe.HDF5OutputParameter\x12\x36\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x1c.mo_caffe.HingeLossParameter\x12\x36\n\x10image_data_param\x18s \x01(\x0b\x32\x1c.mo_caffe.ImageDataParameter\x12<\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1f.mo_caffe.InfogainLossParameter\x12<\n\x13inner_product_param\x18u \x01(\x0b\x32\x1f.mo_caffe.InnerProductParameter\x12.\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x18.mo_caffe.InputParameter\x12*\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x16.mo_caffe.LogParameter\x12)\n\tlrn_param\x18v \x01(\x0b\x32\x16.mo_caffe.LRNParameter\x12\x38\n\x11memory_data_param\x18w \x01(\x0b\x32\x1d.mo_caffe.MemoryDataParameter\x12)\n\tmvn_param\x18x \x01(\x0b\x32\x16.mo_caffe.MVNParameter\x12\x36\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x1c.mo_caffe.ParameterParameter\x12\x31\n\rpooling_param\x18y \x01(\x0b\x32\x1a.mo_caffe.PoolingParameter\x12\x32\n\rpermute_param\x18\x9a\x01 \x01(\x0b\x32\x1a.mo_caffe.PermuteParameter\x12-\n\x0bpower_param\x18z \x01(\x0b\x32\x18.mo_caffe.PowerParameter\x12.\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x18.mo_caffe.PReLUParameter\x12\x30\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x19.mo_caffe.PythonParameter\x12\x36\n\x0frecurrent_param\x18\x92\x01 \x01(\x0b\x32\x1c.mo_caffe.RecurrentParameter\x12\x36\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x1c.mo_caffe.ReductionParameter\x12+\n\nrelu_param\x18{ \x01(\x0b\x32\x17.mo_caffe.ReLUParameter\x12\x32\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x1a.mo_caffe.ReshapeParameter\x12\x32\n\rreverse_param\x18\x93\x01 \x01(\x0b\x32\x1a.mo_caffe.ReverseParameter\x12.\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x18.mo_caffe.ScaleParameter\x12\x31\n\rsigmoid_param\x18| \x01(\x0b\x32\x1a.mo_caffe.SigmoidParameter\x12\x31\n\rsoftmax_param\x18} \x01(\x0b\x32\x1a.mo_caffe.SoftmaxParameter\x12*\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x16.mo_caffe.SPPParameter\x12-\n\x0bslice_param\x18~ \x01(\x0b\x32\x18.mo_caffe.SliceParameter\x12+\n\ntanh_param\x18\x7f \x01(\x0b\x32\x17.mo_caffe.TanHParameter\x12\x36\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x1c.mo_caffe.ThresholdParameter\x12,\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x17.mo_caffe.TileParameter\x12\x39\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1d.mo_caffe.WindowDataParameter\x12\x38\n\x08st_param\x18\x96\x01 \x01(\x0b\x32%.mo_caffe.SpatialTransformerParameter\x12\x31\n\rst_loss_param\x18\x97\x01 \x01(\x0b\x32\x19.mo_caffe.STLossParameter\x12\x37\n\x10power_file_param\x18\x98\x01 \x01(\x0b\x32\x1c.mo_caffe.PowerFileParameter\x12\x33\n\x0eloc_loss_param\x18\x99\x01 \x01(\x0b\x32\x1a.mo_caffe.LocLossParameter\x12\x34\n\x0eproposal_param\x18\xc9\x01 \x01(\x0b\x32\x1b.mo_caffe.ProposalParameter\x12P\n\x1d\x63osine_similarity_batch_param\x18\xca\x01 \x01(\x0b\x32(.mo_caffe.CosineSimilarityBatchParameter\x12\x45\n\x0erss_loss_param\x18\xcb\x01 \x01(\x0b\x32,.mo_caffe.RandomSamplingSoftmaxLossParameter\x12\x31\n\nnorm_param\x18\xcc\x01 \x01(\x0b\x32\x1c.mo_caffe.NormalizeParameter\x12\x39\n\x11roi_warping_param\x18\xcd\x01 \x01(\x0b\x32\x1d.mo_caffe.ROIWarpingParameter\x12=\n\x13psroi_pooling_param\x18\xcf\x01 \x01(\x0b\x32\x1f.mo_caffe.PSROIPoolingParameter\x12\x39\n\x11roi_pooling_param\x18\xd0\x01 \x01(\x0b\x32\x1d.mo_caffe.ROIPoolingParameter\x12>\n\x14smooth_l1_loss_param\x18\xd1\x01 \x01(\x0b\x32\x1f.mo_caffe.SmoothL1LossParameter\x12\x46\n\x18\x62ox_annotator_ohem_param\x18\xd2\x01 \x01(\x0b\x32#.mo_caffe.BoxAnnotatorOHEMParameter\x12\x43\n\x16\x64\x65tection_output_param\x18\xd3\x01 \x01(\x0b\x32\".mo_caffe.DetectionOutputParameter\x12\x35\n\x0fprior_box_param\x18\xd4\x01 \x01(\x0b\x32\x1b.mo_caffe.PriorBoxParameter\x12\x39\n\x11region_yolo_param\x18\xd6\x01 \x01(\x0b\x32\x1d.mo_caffe.RegionYoloParameter\x12\x37\n\x10reorg_yolo_param\x18\xd7\x01 \x01(\x0b\x32\x1c.mo_caffe.ReorgYoloParameter\x12.\n\x0brelu6_param\x18\xd8\x01 \x01(\x0b\x32\x18.mo_caffe.ReLU6Parameter\x12\x30\n\x0cinterp_param\x18\xd9\x01 \x01(\x0b\x32\x19.mo_caffe.InterpParameter\x12<\n\x12\x61ugmentation_param\x18\xda\x01 \x01(\x0b\x32\x1f.mo_caffe.AugmentationParameter\x12:\n\x11\x63orrelation_param\x18\xdb\x01 \x01(\x0b\x32\x1e.mo_caffe.CorrelationParameter\x12\x34\n\x0eresample_param\x18\xdc\x01 \x01(\x0b\x32\x1b.mo_caffe.ResampleParameter\x12\x35\n\x0f\x66low_warp_param\x18\xdd\x01 \x01(\x0b\x32\x1b.mo_caffe.FlowWarpParameter\x12.\n\x0b\x61\x63\x63um_param\x18\xde\x01 \x01(\x0b\x32\x18.mo_caffe.AccumParameter\x12?\n\x14\x63oeff_schedule_param\x18\xdf\x01 \x01(\x0b\x32 .mo_caffe.CoeffScheduleParameter\x12\x41\n\x15shuffle_channel_param\x18\xe0\x01 \x01(\x0b\x32!.mo_caffe.ShuffleChannelParameter\"\x90\x01\n\x0fInterpParameter\x12\x11\n\x06height\x18\x01 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x02 \x01(\x05:\x01\x30\x12\x16\n\x0bzoom_factor\x18\x03 \x01(\x05:\x01\x31\x12\x18\n\rshrink_factor\x18\x04 \x01(\x05:\x01\x31\x12\x12\n\x07pad_beg\x18\x05 \x01(\x05:\x01\x30\x12\x12\n\x07pad_end\x18\x06 \x01(\x05:\x01\x30\"n\n\"RandomSamplingSoftmaxLossParameter\x12 \n\x13random_sampling_num\x18\x01 \x01(\x05:\x03\x31\x30\x30\x12&\n\x16random_sampling_policy\x18\x02 \x01(\t:\x06random\"\xc8\x01\n\x11ProposalParameter\x12\x17\n\x0b\x66\x65\x61t_stride\x18\x01 \x01(\r:\x02\x31\x36\x12\x15\n\tbase_size\x18\x02 \x01(\r:\x02\x31\x36\x12\x14\n\x08min_size\x18\x03 \x01(\r:\x02\x31\x36\x12\r\n\x05ratio\x18\x04 \x03(\x02\x12\r\n\x05scale\x18\x05 \x03(\x02\x12\x1a\n\x0cpre_nms_topn\x18\x06 \x01(\r:\x04\x36\x30\x30\x30\x12\x1a\n\rpost_nms_topn\x18\x07 \x01(\r:\x03\x33\x30\x30\x12\x17\n\nnms_thresh\x18\x08 \x01(\x02:\x03\x30.7\"\x95\x01\n\x12NormalizeParameter\x12\x1c\n\x0e\x61\x63ross_spatial\x18\x01 \x01(\x08:\x04true\x12/\n\x0cscale_filler\x18\x02 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x1c\n\x0e\x63hannel_shared\x18\x03 \x01(\x08:\x04true\x12\x12\n\x03\x65ps\x18\x04 \x01(\x02:\x05\x31\x65-10\"!\n\x10PermuteParameter\x12\r\n\x05order\x18\x01 \x03(\r\"\xb6\x01\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\"\xb4\x02\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12G\n\rnormalization\x18\x03 \x01(\x0e\x32).mo_caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\x12\x1f\n\x14pre_fixed_normalizer\x18\x04 \x01(\x02:\x01\x31\x12$\n\x15weight_by_label_freqs\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x17\n\x0f\x63lass_weighting\x18\x06 \x03(\x02\"Q\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\r\n\tPRE_FIXED\x10\x03\x12\x08\n\x04NONE\x10\x04\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"D\n\x18\x43hannelPermutationAction\x12\x0c\n\x04\x63han\x18\x01 \x02(\r\x12\x0c\n\x04\x63opy\x18\x02 \x01(\r\x12\x0c\n\x04\x66ill\x18\x03 \x01(\x02\"\x9a\x01\n\x1b\x43hannelPermutationParameter\x12\x32\n\x06\x61\x63tion\x18\x01 \x03(\x0b\x32\".mo_caffe.ChannelPermutationAction\x12\x12\n\nnum_output\x18\x10 \x02(\r\x12\x1f\n\x10inplace_possible\x18\x11 \x01(\x08:\x05\x66\x61lse\x12\x12\n\x07version\x18\x12 \x01(\x05:\x01\x30\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"j\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12&\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x05\x30.999\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x31\x65-05\"J\n\x19\x42oxAnnotatorOHEMParameter\x12\x13\n\x0broi_per_img\x18\x01 \x02(\r\x12\x18\n\x0cignore_label\x18\x02 \x01(\x05:\x02-1\"`\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12)\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\x85\x04\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12\x30\n\rweight_filler\x18\x07 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12.\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12>\n\x06\x65ngine\x18\x0f \x01(\x0e\x32%.mo_caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"A\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\x12\x0f\n\x07\x64imsize\x18\x03 \x03(\r\"P\n\x13\x43TCDecoderParameter\x12\x17\n\x0b\x62lank_index\x18\x01 \x01(\x05:\x02-1\x12 \n\x12\x63tc_merge_repeated\x18\x02 \x01(\x08:\x04true\"\xb2\x01\n\x10\x43TCLossParameter\x12\x17\n\x0coutput_delay\x18\x01 \x01(\x05:\x01\x30\x12\x17\n\x0b\x62lank_index\x18\x02 \x01(\x05:\x02-1\x12+\n\x1cpreprocess_collapse_repeated\x18\x03 \x01(\x08:\x05\x66\x61lse\x12 \n\x12\x63tc_merge_repeated\x18\x04 \x01(\x08:\x04true\x12\x1d\n\x12loss_calculation_t\x18\x05 \x01(\x05:\x01\x30\"\xa7\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x34\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x1a.mo_caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x34\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\"[\n\x1eNonMaximumSuppressionParameter\x12\x1a\n\rnms_threshold\x18\x01 \x01(\x02:\x03\x30.3\x12\r\n\x05top_k\x18\x02 \x01(\x05\x12\x0e\n\x03\x65ta\x18\x03 \x01(\x02:\x01\x31\"\x99\x04\n\x0fResizeParameter\x12\x0f\n\x04prob\x18\x01 \x01(\x02:\x01\x31\x12@\n\x0bresize_mode\x18\x02 \x01(\x0e\x32%.mo_caffe.ResizeParameter.Resize_mode:\x04WARP\x12\x11\n\x06height\x18\x03 \x01(\r:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\r:\x01\x30\x12\x17\n\x0cheight_scale\x18\x08 \x01(\r:\x01\x30\x12\x16\n\x0bwidth_scale\x18\t \x01(\r:\x01\x30\x12>\n\x08pad_mode\x18\x05 \x01(\x0e\x32\".mo_caffe.ResizeParameter.Pad_mode:\x08\x43ONSTANT\x12\x11\n\tpad_value\x18\x06 \x03(\x02\x12:\n\x0binterp_mode\x18\x07 \x03(\x0e\x32%.mo_caffe.ResizeParameter.Interp_mode\"G\n\x0bResize_mode\x12\x08\n\x04WARP\x10\x01\x12\x12\n\x0e\x46IT_SMALL_SIZE\x10\x02\x12\x1a\n\x16\x46IT_LARGE_SIZE_AND_PAD\x10\x03\":\n\x08Pad_mode\x12\x0c\n\x08\x43ONSTANT\x10\x01\x12\x0c\n\x08MIRRORED\x10\x02\x12\x12\n\x0eREPEAT_NEAREST\x10\x03\"I\n\x0bInterp_mode\x12\n\n\x06LINEAR\x10\x01\x12\x08\n\x04\x41REA\x10\x02\x12\x0b\n\x07NEAREST\x10\x03\x12\t\n\x05\x43UBIC\x10\x04\x12\x0c\n\x08LANCZOS4\x10\x05\"\xdb\x01\n\x13SaveOutputParameter\x12\x18\n\x10output_directory\x18\x01 \x01(\t\x12\x1a\n\x12output_name_prefix\x18\x02 \x01(\t\x12\x15\n\routput_format\x18\x03 \x01(\t\x12\x16\n\x0elabel_map_file\x18\x04 \x01(\t\x12\x16\n\x0ename_size_file\x18\x05 \x01(\t\x12\x16\n\x0enum_test_image\x18\x06 \x01(\r\x12/\n\x0cresize_param\x18\x07 \x01(\x0b\x32\x19.mo_caffe.ResizeParameter\"\xbd\x04\n\x18\x44\x65tectionOutputParameter\x12\x13\n\x0bnum_classes\x18\x01 \x01(\r\x12\x1c\n\x0eshare_location\x18\x02 \x01(\x08:\x04true\x12\x1e\n\x13\x62\x61\x63kground_label_id\x18\x03 \x01(\x05:\x01\x30\x12;\n\tnms_param\x18\x04 \x01(\x0b\x32(.mo_caffe.NonMaximumSuppressionParameter\x12\x38\n\x11save_output_param\x18\x05 \x01(\x0b\x32\x1d.mo_caffe.SaveOutputParameter\x12?\n\tcode_type\x18\x06 \x01(\x0e\x32$.mo_caffe.PriorBoxParameter.CodeType:\x06\x43ORNER\x12)\n\x1avariance_encoded_in_target\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x16\n\nkeep_top_k\x18\x07 \x01(\x05:\x02-1\x12\x1c\n\x14\x63onfidence_threshold\x18\t \x01(\x02\x12\x18\n\tvisualize\x18\n \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x13visualize_threshold\x18\x0b \x01(\x02\x12\x11\n\tsave_file\x18\x0c \x01(\t\x12\x17\n\x0binput_width\x18\r \x01(\x05:\x02-1\x12\x18\n\x0cinput_height\x18\x0e \x01(\x05:\x02-1\x12\x18\n\nnormalized\x18\x0f \x01(\x08:\x04true\x12\x1e\n\x10objectness_score\x18\x10 \x01(\x02:\x04\x30.01\".\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\"\xa6\x01\n\x12\x44ummyDataParameter\x12.\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x19.mo_caffe.FillerParameter\x12\"\n\x05shape\x18\x06 \x03(\x0b\x32\x13.mo_caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa8\x01\n\x10\x45ltwiseParameter\x12<\n\toperation\x18\x01 \x01(\x0e\x32$.mo_caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xb2\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12\x30\n\rweight_filler\x18\x04 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12.\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"a\n\x12HingeLossParameter\x12\x33\n\x04norm\x18\x01 \x01(\x0e\x32!.mo_caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"\'\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\"\xd1\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x30\n\rweight_filler\x18\x03 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12.\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"4\n\x0eInputParameter\x12\"\n\x05shape\x18\x01 \x03(\x0b\x32\x13.mo_caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xbe\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12G\n\x0bnorm_region\x18\x04 \x01(\x0e\x32!.mo_caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x36\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1d.mo_caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\x1f\n\x0cGRNParameter\x12\x0f\n\x04\x62ias\x18\x01 \x01(\x02:\x01\x31\"Z\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\"d\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x31\x65-09\"8\n\x12ParameterParameter\x12\"\n\x05shape\x18\x01 \x01(\x0b\x32\x13.mo_caffe.BlobShape\"\xc1\x03\n\x10PoolingParameter\x12\x38\n\x04pool\x18\x01 \x01(\x0e\x32%.mo_caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12:\n\x06\x65ngine\x18\x0b \x01(\x0e\x32!.mo_caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x17\n\tceil_mode\x18\r \x01(\x08:\x04true\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xd4\x02\n\x11PriorBoxParameter\x12\x10\n\x08min_size\x18\x01 \x03(\x02\x12\x10\n\x08max_size\x18\x02 \x03(\x02\x12\x14\n\x0c\x61spect_ratio\x18\x03 \x03(\x02\x12\x12\n\x04\x66lip\x18\x04 \x01(\x08:\x04true\x12\x13\n\x04\x63lip\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x08variance\x18\x06 \x03(\x02\x12\x10\n\x08img_size\x18\x07 \x01(\r\x12\r\n\x05img_h\x18\x08 \x01(\r\x12\r\n\x05img_w\x18\t \x01(\r\x12\x0c\n\x04step\x18\n \x01(\x02\x12\x0e\n\x06step_h\x18\x0b \x01(\x02\x12\x0e\n\x06step_w\x18\x0c \x01(\x02\x12\x13\n\x06offset\x18\r \x01(\x02:\x03\x30.5\x12\r\n\x05width\x18\x0e \x03(\x02\x12\x0e\n\x06height\x18\x0f \x03(\x02\"8\n\x08\x43odeType\x12\n\n\x06\x43ORNER\x10\x01\x12\x0f\n\x0b\x43\x45NTER_SIZE\x10\x02\x12\x0f\n\x0b\x43ORNER_SIZE\x10\x03\"V\n\x15PSROIPoolingParameter\x12\x15\n\rspatial_scale\x18\x01 \x02(\x02\x12\x12\n\noutput_dim\x18\x02 \x02(\x05\x12\x12\n\ngroup_size\x18\x03 \x02(\x05\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xc6\x01\n\x12RecurrentParameter\x12\x15\n\nnum_output\x18\x01 \x01(\r:\x01\x30\x12\x30\n\rweight_filler\x18\x02 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12.\n\x0b\x62ias_filler\x18\x03 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x19\n\ndebug_info\x18\x04 \x01(\x08:\x05\x66\x61lse\x12\x1c\n\rexpose_hidden\x18\x05 \x01(\x08:\x05\x66\x61lse\"\xb0\x01\n\x12ReductionParameter\x12@\n\toperation\x18\x01 \x01(\x0e\x32(.mo_caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x90\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x37\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1e.mo_caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\x1e\n\x0eReLU6Parameter\x12\x0c\n\x01n\x18\x01 \x01(\x02:\x01\x36\"]\n\x10ReshapeParameter\x12\"\n\x05shape\x18\x01 \x01(\x0b\x32\x13.mo_caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"#\n\x10ReverseParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x30\"Y\n\x13ROIPoolingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"]\n\x17ROIWarpingTestParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"Y\n\x13ROIWarpingParameter\x12\x13\n\x08pooled_h\x18\x01 \x01(\r:\x01\x30\x12\x13\n\x08pooled_w\x18\x02 \x01(\r:\x01\x30\x12\x18\n\rspatial_scale\x18\x03 \x01(\x02:\x01\x31\"\xab\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12)\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12.\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\"{\n\x10SigmoidParameter\x12:\n\x06\x65ngine\x18\x01 \x01(\x0e\x32!.mo_caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\")\n\x15SmoothL1LossParameter\x12\x10\n\x05sigma\x18\x01 \x01(\x02:\x01\x31\"\x8c\x01\n\x10SoftmaxParameter\x12:\n\x06\x65ngine\x18\x01 \x01(\x0e\x32!.mo_caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"u\n\rTanHParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.mo_caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"/\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xf1\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x34\n\x04pool\x18\x02 \x01(\x0e\x32!.mo_caffe.SPPParameter.PoolMethod:\x03MAX\x12\x36\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1d.mo_caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xcc\x14\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12\'\n\x07include\x18  \x03(\x0b\x32\x16.mo_caffe.NetStateRule\x12\'\n\x07\x65xclude\x18! \x03(\x0b\x32\x16.mo_caffe.NetStateRule\x12\x32\n\x04type\x18\x05 \x01(\x0e\x32$.mo_caffe.V1LayerParameter.LayerType\x12\"\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x13.mo_caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12\x41\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32\'.mo_caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x33\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x1b.mo_caffe.AccuracyParameter\x12/\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x19.mo_caffe.ArgMaxParameter\x12/\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x19.mo_caffe.ConcatParameter\x12\x42\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\".mo_caffe.ContrastiveLossParameter\x12\x39\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1e.mo_caffe.ConvolutionParameter\x12+\n\ndata_param\x18\x0b \x01(\x0b\x32\x17.mo_caffe.DataParameter\x12\x31\n\rdropout_param\x18\x0c \x01(\x0b\x32\x1a.mo_caffe.DropoutParameter\x12\x36\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x1c.mo_caffe.DummyDataParameter\x12\x31\n\reltwise_param\x18\x18 \x01(\x0b\x32\x1a.mo_caffe.EltwiseParameter\x12)\n\texp_param\x18) \x01(\x0b\x32\x16.mo_caffe.ExpParameter\x12\x34\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x1b.mo_caffe.HDF5DataParameter\x12\x38\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1d.mo_caffe.HDF5OutputParameter\x12\x36\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x1c.mo_caffe.HingeLossParameter\x12\x36\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x1c.mo_caffe.ImageDataParameter\x12<\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1f.mo_caffe.InfogainLossParameter\x12<\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1f.mo_caffe.InnerProductParameter\x12)\n\tlrn_param\x18\x12 \x01(\x0b\x32\x16.mo_caffe.LRNParameter\x12\x38\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1d.mo_caffe.MemoryDataParameter\x12)\n\tmvn_param\x18\" \x01(\x0b\x32\x16.mo_caffe.MVNParameter\x12\x31\n\rpooling_param\x18\x13 \x01(\x0b\x32\x1a.mo_caffe.PoolingParameter\x12-\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x18.mo_caffe.PowerParameter\x12+\n\nrelu_param\x18\x1e \x01(\x0b\x32\x17.mo_caffe.ReLUParameter\x12\x31\n\rsigmoid_param\x18& \x01(\x0b\x32\x1a.mo_caffe.SigmoidParameter\x12\x31\n\rsoftmax_param\x18\' \x01(\x0b\x32\x1a.mo_caffe.SoftmaxParameter\x12-\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x18.mo_caffe.SliceParameter\x12+\n\ntanh_param\x18% \x01(\x0b\x32\x17.mo_caffe.TanHParameter\x12\x35\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x1c.mo_caffe.ThresholdParameter\x12\x38\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1d.mo_caffe.WindowDataParameter\x12:\n\x0ftransform_param\x18$ \x01(\x0b\x32!.mo_caffe.TransformationParameter\x12+\n\nloss_param\x18* \x01(\x0b\x32\x17.mo_caffe.LossParameter\x12)\n\x05layer\x18\x01 \x01(\x0b\x32\x1a.mo_caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\x8c\x08\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12\x30\n\rweight_filler\x18\x05 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12.\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x38\n\x04pool\x18\x0b \x01(\x0e\x32%.mo_caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\"\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x13.mo_caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x39\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1d.mo_caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"Z\n\x0ePReLUParameter\x12)\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x19.mo_caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse\"\x86\x01\n\x13RegionYoloParameter\x12\x11\n\x06\x63oords\x18\x01 \x01(\x05:\x01\x34\x12\x13\n\x07\x63lasses\x18\x02 \x01(\x05:\x02\x32\x30\x12\x0e\n\x03num\x18\x03 \x01(\x05:\x01\x31\x12\x18\n\ndo_softmax\x18\x04 \x01(\x08:\x04true\x12\x0f\n\x07\x61nchors\x18\x05 \x03(\x02\x12\x0c\n\x04mask\x18\x06 \x03(\x05\"\'\n\x12ReorgYoloParameter\x12\x11\n\x06stride\x18\x01 \x01(\x05:\x01\x31\"\xcf\x01\n\x18RandomGeneratorParameter\x12\x1a\n\trand_type\x18\x01 \x01(\t:\x07uniform\x12\x12\n\x03\x65xp\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x0f\n\x04mean\x18\x04 \x01(\x02:\x01\x30\x12\x11\n\x06spread\x18\x05 \x01(\x02:\x01\x30\x12\x0f\n\x04prob\x18\x06 \x01(\x02:\x01\x31\x12\x1c\n\x0e\x61pply_schedule\x18\x07 \x01(\x08:\x04true\x12\x19\n\ndiscretize\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nmultiplier\x18\t \x01(\x02:\x01\x31\"`\n\x16\x43oeffScheduleParameter\x12\x14\n\thalf_life\x18\x01 \x01(\x02:\x01\x31\x12\x18\n\rinitial_coeff\x18\x02 \x01(\x02:\x01\x31\x12\x16\n\x0b\x66inal_coeff\x18\x03 \x01(\x02:\x01\x31\"\xde\x07\n\x11\x41ugmentationCoeff\x12\x11\n\x06mirror\x18\x01 \x01(\x02:\x01\x30\x12\r\n\x02\x64x\x18\x02 \x01(\x02:\x01\x30\x12\r\n\x02\x64y\x18\x03 \x01(\x02:\x01\x30\x12\x10\n\x05\x61ngle\x18\x04 \x01(\x02:\x01\x30\x12\x11\n\x06zoom_x\x18\x05 \x01(\x02:\x01\x31\x12\x11\n\x06zoom_y\x18\x06 \x01(\x02:\x01\x31\x12\x10\n\x05gamma\x18\x64 \x01(\x02:\x01\x31\x12\x15\n\nbrightness\x18\x65 \x01(\x02:\x01\x30\x12\x13\n\x08\x63ontrast\x18\x66 \x01(\x02:\x01\x31\x12\x11\n\x06\x63olor1\x18g \x01(\x02:\x01\x31\x12\x11\n\x06\x63olor2\x18h \x01(\x02:\x01\x31\x12\x11\n\x06\x63olor3\x18i \x01(\x02:\x01\x31\x12\x16\n\x0bpow_nomean0\x18\n \x01(\x02:\x01\x31\x12\x16\n\x0bpow_nomean1\x18\x0b \x01(\x02:\x01\x31\x12\x16\n\x0bpow_nomean2\x18\x0c \x01(\x02:\x01\x31\x12\x16\n\x0b\x61\x64\x64_nomean0\x18\r \x01(\x02:\x01\x30\x12\x16\n\x0b\x61\x64\x64_nomean1\x18\x0e \x01(\x02:\x01\x30\x12\x16\n\x0b\x61\x64\x64_nomean2\x18\x0f \x01(\x02:\x01\x30\x12\x17\n\x0cmult_nomean0\x18\x10 \x01(\x02:\x01\x31\x12\x17\n\x0cmult_nomean1\x18\x11 \x01(\x02:\x01\x31\x12\x17\n\x0cmult_nomean2\x18\x12 \x01(\x02:\x01\x31\x12\x18\n\rpow_withmean0\x18\x13 \x01(\x02:\x01\x31\x12\x18\n\rpow_withmean1\x18\x14 \x01(\x02:\x01\x31\x12\x18\n\rpow_withmean2\x18\x15 \x01(\x02:\x01\x31\x12\x18\n\radd_withmean0\x18\x16 \x01(\x02:\x01\x30\x12\x18\n\radd_withmean1\x18\x17 \x01(\x02:\x01\x30\x12\x18\n\radd_withmean2\x18\x18 \x01(\x02:\x01\x30\x12\x19\n\x0emult_withmean0\x18\x19 \x01(\x02:\x01\x31\x12\x19\n\x0emult_withmean1\x18\x1a \x01(\x02:\x01\x31\x12\x19\n\x0emult_withmean2\x18\x1b \x01(\x02:\x01\x31\x12\x14\n\tlmult_pow\x18\x1c \x01(\x02:\x01\x31\x12\x14\n\tlmult_add\x18\x1d \x01(\x02:\x01\x30\x12\x15\n\nlmult_mult\x18\x1e \x01(\x02:\x01\x31\x12\x14\n\tcol_angle\x18\x1f \x01(\x02:\x01\x30\x12\x15\n\nfog_amount\x18& \x01(\x02:\x01\x30\x12\x13\n\x08\x66og_size\x18\' \x01(\x02:\x01\x30\x12\x1c\n\x11motion_blur_angle\x18( \x01(\x02:\x01\x30\x12\x1b\n\x10motion_blur_size\x18) \x01(\x02:\x01\x30\x12\x17\n\x0cshadow_angle\x18* \x01(\x02:\x01\x30\x12\x1a\n\x0fshadow_distance\x18+ \x01(\x02:\x01\x30\x12\x1a\n\x0fshadow_strength\x18, \x01(\x02:\x01\x30\x12\x10\n\x05noise\x18- \x01(\x02:\x01\x30\"\xcc\x10\n\x15\x41ugmentationParameter\x12\x15\n\ncrop_width\x18! \x01(\r:\x01\x30\x12\x16\n\x0b\x63rop_height\x18\" \x01(\r:\x01\x30\x12\x19\n\x0fwrite_augmented\x18\x02 \x01(\t:\x00\x12\x1b\n\x0emax_multiplier\x18\x03 \x01(\x02:\x03\x32\x35\x35\x12\"\n\x13\x61ugment_during_test\x18\x04 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0erecompute_mean\x18\x05 \x01(\r:\x01\x30\x12\x14\n\nwrite_mean\x18\x06 \x01(\t:\x00\x12\x1c\n\x0emean_per_pixel\x18\x07 \x01(\x08:\x04true\x12\x0c\n\x04mean\x18\x12 \x03(\x02\x12\x11\n\x04mode\x18\x08 \x01(\t:\x03\x61\x64\x64\x12\x16\n\x0b\x62ottomwidth\x18P \x01(\r:\x01\x30\x12\x17\n\x0c\x62ottomheight\x18Q \x01(\r:\x01\x30\x12\x0e\n\x03num\x18R \x01(\r:\x01\x30\x12\x18\n\x10\x63hromatic_eigvec\x18S \x03(\x02\x12\x32\n\x06mirror\x18\n \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x35\n\ttranslate\x18\x0b \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x32\n\x06rotate\x18\x0c \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x30\n\x04zoom\x18\r \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x33\n\x07squeeze\x18\x0e \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x37\n\x0btranslate_x\x18\x0f \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x37\n\x0btranslate_y\x18\x10 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x31\n\x05gamma\x18# \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x36\n\nbrightness\x18$ \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08\x63ontrast\x18% \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x31\n\x05\x63olor\x18& \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x35\n\tlmult_pow\x18\x14 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x36\n\nlmult_mult\x18\x15 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x35\n\tlmult_add\x18\x16 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x33\n\x07sat_pow\x18\x17 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08sat_mult\x18\x18 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x33\n\x07sat_add\x18\x19 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x33\n\x07\x63ol_pow\x18\x1a \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08\x63ol_mult\x18\x1b \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x33\n\x07\x63ol_add\x18\x1c \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08ladd_pow\x18\x1d \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x35\n\tladd_mult\x18\x1e \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08ladd_add\x18\x1f \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x36\n\ncol_rotate\x18  \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x36\n\nfog_amount\x18\x64 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x34\n\x08\x66og_size\x18\x65 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12=\n\x11motion_blur_angle\x18\x66 \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12<\n\x10motion_blur_size\x18g \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x38\n\x0cshadow_angle\x18h \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12;\n\x0fshadow_distance\x18i \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12;\n\x0fshadow_strength\x18j \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\x12\x31\n\x05noise\x18k \x01(\x0b\x32\".mo_caffe.RandomGeneratorParameter\"\x85\x01\n\x11\x46lowWarpParameter\x12\x43\n\nfill_value\x18\x01 \x01(\x0e\x32).mo_caffe.FlowWarpParameter.FillParameter:\x04ZERO\"+\n\rFillParameter\x12\x08\n\x04ZERO\x10\x01\x12\x10\n\x0cNOT_A_NUMBER\x10\x02\"\xb6\x02\n\x14\x43orrelationParameter\x12\x0e\n\x03pad\x18\x02 \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x03 \x01(\r\x12\x18\n\x10max_displacement\x18\x04 \x01(\r\x12\x13\n\x08stride_1\x18\x05 \x01(\r:\x01\x31\x12\x13\n\x08stride_2\x18\x06 \x01(\r:\x01\x31\x12\x1b\n\x10single_direction\x18\x08 \x01(\x05:\x01\x30\x12\x15\n\x06\x64o_abs\x18\x07 \x01(\x08:\x05\x66\x61lse\x12R\n\x10\x63orrelation_type\x18\x0f \x01(\x0e\x32..mo_caffe.CorrelationParameter.CorrelationType:\x08MULTIPLY\"-\n\x0f\x43orrelationType\x12\x0c\n\x08MULTIPLY\x10\x00\x12\x0c\n\x08SUBTRACT\x10\x01\"\xdc\x01\n\x11ResampleParameter\x12\x17\n\tantialias\x18\x04 \x01(\x08:\x04true\x12\r\n\x05width\x18\x01 \x01(\r\x12\x0e\n\x06height\x18\x02 \x01(\r\x12>\n\x04type\x18\x03 \x01(\x0e\x32(.mo_caffe.ResampleParameter.ResampleType:\x06LINEAR\x12\x11\n\x06\x66\x61\x63tor\x18\x05 \x01(\x02:\x01\x31\"<\n\x0cResampleType\x12\x0b\n\x07NEAREST\x10\x01\x12\n\n\x06LINEAR\x10\x02\x12\t\n\x05\x43UBIC\x10\x03\x12\x08\n\x04\x41REA\x10\x04\"z\n\x0e\x41\x63\x63umParameter\x12\x15\n\ntop_height\x18\x01 \x01(\r:\x01\x30\x12\x14\n\ttop_width\x18\x02 \x01(\r:\x01\x30\x12\x1c\n\x11size_divisible_by\x18\x03 \x01(\r:\x01\x30\x12\x1d\n\x0ehave_reference\x18\x04 \x01(\x08:\x05\x66\x61lse\"(\n\x17ShuffleChannelParameter\x12\r\n\x05group\x18\x01 \x02(\r*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01')
 )
 _sym_db.RegisterFileDescriptor(DESCRIPTOR)
 
@@ -40,8 +40,8 @@ _PHASE = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=26741,
-  serialized_end=26769,
+  serialized_start=26773,
+  serialized_end=26801,
 )
 _sym_db.RegisterEnumDescriptor(_PHASE)
 
@@ -369,8 +369,8 @@ _ELTWISEPARAMETER_ELTWISEOP = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=13284,
-  serialized_end=13323,
+  serialized_start=13316,
+  serialized_end=13355,
 )
 _sym_db.RegisterEnumDescriptor(_ELTWISEPARAMETER_ELTWISEOP)
 
@@ -391,8 +391,8 @@ _HINGELOSSPARAMETER_NORM = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=13867,
-  serialized_end=13889,
+  serialized_start=13899,
+  serialized_end=13921,
 )
 _sym_db.RegisterEnumDescriptor(_HINGELOSSPARAMETER_NORM)
 
@@ -413,8 +413,8 @@ _LRNPARAMETER_NORMREGION = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=14771,
-  serialized_end=14824,
+  serialized_start=14803,
+  serialized_end=14856,
 )
 _sym_db.RegisterEnumDescriptor(_LRNPARAMETER_NORMREGION)
 
@@ -465,8 +465,8 @@ _POOLINGPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=15515,
-  serialized_end=15561,
+  serialized_start=15547,
+  serialized_end=15593,
 )
 _sym_db.RegisterEnumDescriptor(_POOLINGPARAMETER_POOLMETHOD)
 
@@ -517,8 +517,8 @@ _PRIORBOXPARAMETER_CODETYPE = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=15965,
-  serialized_end=16021,
+  serialized_start=15997,
+  serialized_end=16053,
 )
 _sym_db.RegisterEnumDescriptor(_PRIORBOXPARAMETER_CODETYPE)
 
@@ -547,8 +547,8 @@ _REDUCTIONPARAMETER_REDUCTIONOP = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=16541,
-  serialized_end=16594,
+  serialized_start=16573,
+  serialized_end=16626,
 )
 _sym_db.RegisterEnumDescriptor(_REDUCTIONPARAMETER_REDUCTIONOP)
 
@@ -677,8 +677,8 @@ _SPPPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=15515,
-  serialized_end=15561,
+  serialized_start=15547,
+  serialized_end=15593,
 )
 _sym_db.RegisterEnumDescriptor(_SPPPARAMETER_POOLMETHOD)
 
@@ -877,8 +877,8 @@ _V1LAYERPARAMETER_LAYERTYPE = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=20520,
-  serialized_end=21120,
+  serialized_start=20552,
+  serialized_end=21152,
 )
 _sym_db.RegisterEnumDescriptor(_V1LAYERPARAMETER_LAYERTYPE)
 
@@ -925,8 +925,8 @@ _V0LAYERPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=15515,
-  serialized_end=15561,
+  serialized_start=15547,
+  serialized_end=15593,
 )
 _sym_db.RegisterEnumDescriptor(_V0LAYERPARAMETER_POOLMETHOD)
 
@@ -947,8 +947,8 @@ _FLOWWARPPARAMETER_FILLPARAMETER = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=25994,
-  serialized_end=26037,
+  serialized_start=26026,
+  serialized_end=26069,
 )
 _sym_db.RegisterEnumDescriptor(_FLOWWARPPARAMETER_FILLPARAMETER)
 
@@ -969,8 +969,8 @@ _CORRELATIONPARAMETER_CORRELATIONTYPE = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=26305,
-  serialized_end=26350,
+  serialized_start=26337,
+  serialized_end=26382,
 )
 _sym_db.RegisterEnumDescriptor(_CORRELATIONPARAMETER_CORRELATIONTYPE)
 
@@ -999,8 +999,8 @@ _RESAMPLEPARAMETER_RESAMPLETYPE = _descriptor.EnumDescriptor(
   ],
   containing_type=None,
   options=None,
-  serialized_start=26513,
-  serialized_end=26573,
+  serialized_start=26545,
+  serialized_end=26605,
 )
 _sym_db.RegisterEnumDescriptor(_RESAMPLEPARAMETER_RESAMPLETYPE)
 
@@ -4492,6 +4492,13 @@ _DETECTIONOUTPUTPARAMETER = _descriptor.Descriptor(
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       options=None),
+    _descriptor.FieldDescriptor(
+      name='objectness_score', full_name='mo_caffe.DetectionOutputParameter.objectness_score', index=15,
+      number=16, type=2, cpp_type=6, label=1,
+      has_default_value=True, default_value=0.01,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      options=None),
   ],
   extensions=[
   ],
@@ -4504,7 +4511,7 @@ _DETECTIONOUTPUTPARAMETER = _descriptor.Descriptor(
   oneofs=[
   ],
   serialized_start=12394,
-  serialized_end=12935,
+  serialized_end=12967,
 )
 
 
@@ -4533,8 +4540,8 @@ _DROPOUTPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=12937,
-  serialized_end=12983,
+  serialized_start=12969,
+  serialized_end=13015,
 )
 
 
@@ -4598,8 +4605,8 @@ _DUMMYDATAPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=12986,
-  serialized_end=13152,
+  serialized_start=13018,
+  serialized_end=13184,
 )
 
 
@@ -4643,8 +4650,8 @@ _ELTWISEPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13155,
-  serialized_end=13323,
+  serialized_start=13187,
+  serialized_end=13355,
 )
 
 
@@ -4673,8 +4680,8 @@ _ELUPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13325,
-  serialized_end=13357,
+  serialized_start=13357,
+  serialized_end=13389,
 )
 
 
@@ -4731,8 +4738,8 @@ _EMBEDPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13360,
-  serialized_end=13538,
+  serialized_start=13392,
+  serialized_end=13570,
 )
 
 
@@ -4775,8 +4782,8 @@ _EXPPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13540,
-  serialized_end=13608,
+  serialized_start=13572,
+  serialized_end=13640,
 )
 
 
@@ -4812,8 +4819,8 @@ _FLATTENPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13610,
-  serialized_end=13667,
+  serialized_start=13642,
+  serialized_end=13699,
 )
 
 
@@ -4856,8 +4863,8 @@ _HDF5DATAPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13669,
-  serialized_end=13748,
+  serialized_start=13701,
+  serialized_end=13780,
 )
 
 
@@ -4886,8 +4893,8 @@ _HDF5OUTPUTPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13750,
-  serialized_end=13790,
+  serialized_start=13782,
+  serialized_end=13822,
 )
 
 
@@ -4917,8 +4924,8 @@ _HINGELOSSPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13792,
-  serialized_end=13889,
+  serialized_start=13824,
+  serialized_end=13921,
 )
 
 
@@ -5024,8 +5031,8 @@ _IMAGEDATAPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=13892,
-  serialized_end=14171,
+  serialized_start=13924,
+  serialized_end=14203,
 )
 
 
@@ -5054,8 +5061,8 @@ _INFOGAINLOSSPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14173,
-  serialized_end=14212,
+  serialized_start=14205,
+  serialized_end=14244,
 )
 
 
@@ -5119,8 +5126,8 @@ _INNERPRODUCTPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14215,
-  serialized_end=14424,
+  serialized_start=14247,
+  serialized_end=14456,
 )
 
 
@@ -5149,8 +5156,8 @@ _INPUTPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14426,
-  serialized_end=14478,
+  serialized_start=14458,
+  serialized_end=14510,
 )
 
 
@@ -5193,8 +5200,8 @@ _LOGPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14480,
-  serialized_end=14548,
+  serialized_start=14512,
+  serialized_end=14580,
 )
 
 
@@ -5260,8 +5267,8 @@ _LRNPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14551,
-  serialized_end=14869,
+  serialized_start=14583,
+  serialized_end=14901,
 )
 
 
@@ -5290,8 +5297,8 @@ _GRNPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14871,
-  serialized_end=14902,
+  serialized_start=14903,
+  serialized_end=14934,
 )
 
 
@@ -5341,8 +5348,8 @@ _MEMORYDATAPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14904,
-  serialized_end=14994,
+  serialized_start=14936,
+  serialized_end=15026,
 )
 
 
@@ -5385,8 +5392,8 @@ _MVNPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=14996,
-  serialized_end=15096,
+  serialized_start=15028,
+  serialized_end=15128,
 )
 
 
@@ -5415,8 +5422,8 @@ _PARAMETERPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=15098,
-  serialized_end=15154,
+  serialized_start=15130,
+  serialized_end=15186,
 )
 
 
@@ -5531,8 +5538,8 @@ _POOLINGPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=15157,
-  serialized_end=15606,
+  serialized_start=15189,
+  serialized_end=15638,
 )
 
 
@@ -5575,8 +5582,8 @@ _POWERPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=15608,
-  serialized_end=15678,
+  serialized_start=15640,
+  serialized_end=15710,
 )
 
 
@@ -5704,8 +5711,8 @@ _PRIORBOXPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=15681,
-  serialized_end=16021,
+  serialized_start=15713,
+  serialized_end=16053,
 )
 
 
@@ -5748,8 +5755,8 @@ _PSROIPOOLINGPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16023,
-  serialized_end=16109,
+  serialized_start=16055,
+  serialized_end=16141,
 )
 
 
@@ -5799,8 +5806,8 @@ _PYTHONPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16111,
-  serialized_end=16214,
+  serialized_start=16143,
+  serialized_end=16246,
 )
 
 
@@ -5857,8 +5864,8 @@ _RECURRENTPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16217,
-  serialized_end=16415,
+  serialized_start=16249,
+  serialized_end=16447,
 )
 
 
@@ -5902,8 +5909,8 @@ _REDUCTIONPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16418,
-  serialized_end=16594,
+  serialized_start=16450,
+  serialized_end=16626,
 )
 
 
@@ -5940,8 +5947,8 @@ _RELUPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16597,
-  serialized_end=16741,
+  serialized_start=16629,
+  serialized_end=16773,
 )
 
 
@@ -5970,8 +5977,8 @@ _RELU6PARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16743,
-  serialized_end=16773,
+  serialized_start=16775,
+  serialized_end=16805,
 )
 
 
@@ -6014,8 +6021,8 @@ _RESHAPEPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16775,
-  serialized_end=16868,
+  serialized_start=16807,
+  serialized_end=16900,
 )
 
 
@@ -6044,8 +6051,8 @@ _REVERSEPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16870,
-  serialized_end=16905,
+  serialized_start=16902,
+  serialized_end=16937,
 )
 
 
@@ -6088,8 +6095,8 @@ _ROIPOOLINGPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16907,
-  serialized_end=16996,
+  serialized_start=16939,
+  serialized_end=17028,
 )
 
 
@@ -6132,8 +6139,8 @@ _ROIWARPINGTESTPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=16998,
-  serialized_end=17091,
+  serialized_start=17030,
+  serialized_end=17123,
 )
 
 
@@ -6176,8 +6183,8 @@ _ROIWARPINGPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17093,
-  serialized_end=17182,
+  serialized_start=17125,
+  serialized_end=17214,
 )
 
 
@@ -6234,8 +6241,8 @@ _SCALEPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17185,
-  serialized_end=17356,
+  serialized_start=17217,
+  serialized_end=17388,
 )
 
 
@@ -6265,8 +6272,8 @@ _SIGMOIDPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17358,
-  serialized_end=17481,
+  serialized_start=17390,
+  serialized_end=17513,
 )
 
 
@@ -6309,8 +6316,8 @@ _SLICEPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17483,
-  serialized_end=17559,
+  serialized_start=17515,
+  serialized_end=17591,
 )
 
 
@@ -6339,8 +6346,8 @@ _SMOOTHL1LOSSPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17561,
-  serialized_end=17602,
+  serialized_start=17593,
+  serialized_end=17634,
 )
 
 
@@ -6377,8 +6384,8 @@ _SOFTMAXPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17605,
-  serialized_end=17745,
+  serialized_start=17637,
+  serialized_end=17777,
 )
 
 
@@ -6408,8 +6415,8 @@ _TANHPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17747,
-  serialized_end=17864,
+  serialized_start=17779,
+  serialized_end=17896,
 )
 
 
@@ -6445,8 +6452,8 @@ _TILEPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17866,
-  serialized_end=17913,
+  serialized_start=17898,
+  serialized_end=17945,
 )
 
 
@@ -6475,8 +6482,8 @@ _THRESHOLDPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17915,
-  serialized_end=17957,
+  serialized_start=17947,
+  serialized_end=17989,
 )
 
 
@@ -6589,8 +6596,8 @@ _WINDOWDATAPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=17960,
-  serialized_end=18281,
+  serialized_start=17992,
+  serialized_end=18313,
 )
 
 
@@ -6635,8 +6642,8 @@ _SPPPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18284,
-  serialized_end=18525,
+  serialized_start=18316,
+  serialized_end=18557,
 )
 
 
@@ -6961,8 +6968,8 @@ _V1LAYERPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=18528,
-  serialized_end=21164,
+  serialized_start=18560,
+  serialized_end=21196,
 )
 
 
@@ -7251,8 +7258,8 @@ _V0LAYERPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=21167,
-  serialized_end=22203,
+  serialized_start=21199,
+  serialized_end=22235,
 )
 
 
@@ -7288,8 +7295,8 @@ _PRELUPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=22205,
-  serialized_end=22295,
+  serialized_start=22237,
+  serialized_end=22327,
 )
 
 
@@ -7353,8 +7360,8 @@ _REGIONYOLOPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=22298,
-  serialized_end=22432,
+  serialized_start=22330,
+  serialized_end=22464,
 )
 
 
@@ -7383,8 +7390,8 @@ _REORGYOLOPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=22434,
-  serialized_end=22473,
+  serialized_start=22466,
+  serialized_end=22505,
 )
 
 
@@ -7462,8 +7469,8 @@ _RANDOMGENERATORPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=22476,
-  serialized_end=22683,
+  serialized_start=22508,
+  serialized_end=22715,
 )
 
 
@@ -7506,8 +7513,8 @@ _COEFFSCHEDULEPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=22685,
-  serialized_end=22781,
+  serialized_start=22717,
+  serialized_end=22813,
 )
 
 
@@ -7823,8 +7830,8 @@ _AUGMENTATIONCOEFF = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=22784,
-  serialized_end=23774,
+  serialized_start=22816,
+  serialized_end=23806,
 )
 
 
@@ -8168,8 +8175,8 @@ _AUGMENTATIONPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=23777,
-  serialized_end=25901,
+  serialized_start=23809,
+  serialized_end=25933,
 )
 
 
@@ -8199,8 +8206,8 @@ _FLOWWARPPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=25904,
-  serialized_end=26037,
+  serialized_start=25936,
+  serialized_end=26069,
 )
 
 
@@ -8279,8 +8286,8 @@ _CORRELATIONPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26040,
-  serialized_end=26350,
+  serialized_start=26072,
+  serialized_end=26382,
 )
 
 
@@ -8338,8 +8345,8 @@ _RESAMPLEPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26353,
-  serialized_end=26573,
+  serialized_start=26385,
+  serialized_end=26605,
 )
 
 
@@ -8389,8 +8396,8 @@ _ACCUMPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26575,
-  serialized_end=26697,
+  serialized_start=26607,
+  serialized_end=26729,
 )
 
 
@@ -8419,8 +8426,8 @@ _SHUFFLECHANNELPARAMETER = _descriptor.Descriptor(
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=26699,
-  serialized_end=26739,
+  serialized_start=26731,
+  serialized_end=26771,
 )
 
 _BLOBPROTO.fields_by_name['shape'].message_type = _BLOBSHAPE
diff --git a/model-optimizer/mo/front/caffe/proto/mo_caffe.proto b/model-optimizer/mo/front/caffe/proto/mo_caffe.proto
index 82f83a5d2..d25ec87ce 100644
--- a/model-optimizer/mo/front/caffe/proto/mo_caffe.proto
+++ b/model-optimizer/mo/front/caffe/proto/mo_caffe.proto
@@ -1031,6 +1031,8 @@ message DetectionOutputParameter {
   optional int32 input_height = 14 [default = -1];
   // If false, bboxes need to be normalized
   optional bool normalized = 15 [default = true];
+  //the objectness score is used for the anchor refinement module to filter easy negative anchor.
+  optional float objectness_score = 16 [default = 0.01];
 }
 
 message DropoutParameter {
diff --git a/model-optimizer/mo/front/caffe/python_layer_extractor.py b/model-optimizer/mo/front/caffe/python_layer_extractor.py
index 0908e6961..0a693a9ab 100644
--- a/model-optimizer/mo/front/caffe/python_layer_extractor.py
+++ b/model-optimizer/mo/front/caffe/python_layer_extractor.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/python_layer_extractor_test.py b/model-optimizer/mo/front/caffe/python_layer_extractor_test.py
index 35f676078..bdb0a8c85 100644
--- a/model-optimizer/mo/front/caffe/python_layer_extractor_test.py
+++ b/model-optimizer/mo/front/caffe/python_layer_extractor_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/caffe/register_custom_ops.py b/model-optimizer/mo/front/caffe/register_custom_ops.py
index fb8ea579c..1a89012f3 100644
--- a/model-optimizer/mo/front/caffe/register_custom_ops.py
+++ b/model-optimizer/mo/front/caffe/register_custom_ops.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,15 +14,11 @@
  limitations under the License.
 """
 
-from mo.back.replacement import BackReplacementPattern
 from mo.front.common.replacement import FrontReplacementOp, FrontReplacementPattern, FrontReplacementSubgraph
 from mo.front.extractor import FrontExtractorOp, CaffePythonFrontExtractorOp
-from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
-from mo.utils import class_registration
 
 
-def update_registration():
-    class_registration.update_registration([Op, FrontExtractorOp, CaffePythonFrontExtractorOp, FrontReplacementOp,
-                                            FrontReplacementPattern, FrontReplacementSubgraph, MiddleReplacementPattern,
-                                            BackReplacementPattern])
+def get_front_classes():
+    front_classes = [FrontExtractorOp, CaffePythonFrontExtractorOp, FrontReplacementOp,
+                     FrontReplacementPattern, FrontReplacementSubgraph]
+    return front_classes
diff --git a/model-optimizer/mo/front/common/custom_replacement_registry.py b/model-optimizer/mo/front/common/custom_replacement_registry.py
index cc1dd0e70..87410d8d1 100644
--- a/model-optimizer/mo/front/common/custom_replacement_registry.py
+++ b/model-optimizer/mo/front/common/custom_replacement_registry.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/extractors/utils.py b/model-optimizer/mo/front/common/extractors/utils.py
index e82f89e3c..e4d0dcd11 100644
--- a/model-optimizer/mo/front/common/extractors/utils.py
+++ b/model-optimizer/mo/front/common/extractors/utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/find_unsupported_ops.py b/model-optimizer/mo/front/common/find_unsupported_ops.py
index 87067060a..8b632c221 100644
--- a/model-optimizer/mo/front/common/find_unsupported_ops.py
+++ b/model-optimizer/mo/front/common/find_unsupported_ops.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,15 +16,12 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
-from mo.utils.dsu import DSU, DSUElem
-from mo.utils.graph import bfs_search
+from mo.graph.graph import Node, Graph
 
 
-def find_unsupported_ops(graph: nx.MultiDiGraph):
+def find_unsupported_ops(graph: Graph):
     """
     The function returns list of node name those are not supported. Currently nodes that product non FP32 data tensors
     or has undefined 'type' attribute are considered unsupported.
@@ -36,57 +33,13 @@ def find_unsupported_ops(graph: nx.MultiDiGraph):
         node = Node(graph, node_name)
         # op node that produce non FP32 data or has no type are considered unsupported
         if node.kind == 'op':
-            if not node.has_valid('type'):
-                log.info('Node "{}" does not have type. Consider it unsupported'.format(node_name))
-                unsupported.append(node.id)
-            else:
+            if node.has_valid('type') or (node.has_valid('op') and node.op == 'OpOutput'):
                 for out_data_node in node.out_nodes().values():
                     if out_data_node.has_valid('data_type') and out_data_node.data_type != np.float32:
                         log.info('Node "{}" produces output as non FP32. Consider it unsupported'.format(node_name))
                         unsupported.append(node.id)
+            else:
+                log.info('Node "{}" does not have type. Consider it unsupported'.format(node_name))
+                unsupported.append(node.id)
     return unsupported
 
-
-def find_unsupported_ops_subgraphs(graph: nx.MultiDiGraph, unsupported_nodes: list,
-                                   find_constant_input_fn: callable = lambda node: node):
-    bfs_nodes = bfs_search(graph, list())
-    visited = set()
-    # mark initial set of nodes as not supported
-    for node_name in unsupported_nodes:
-        graph.node[node_name]['supported'] = False
-
-    for node_name in bfs_nodes:
-        if node_name in visited:
-            continue
-
-        node = Node(graph, node_name)
-        if node.has_valid('supported') and not node['supported']:
-            added_nodes = find_constant_input_fn(node)
-            visited.update(added_nodes)
-            for node in added_nodes:
-                node['supported'] = False
-
-    dsu_elems = list()
-    for node_name in bfs_nodes:
-        node = Node(graph, node_name)
-        if node.has_valid('supported') and not node['supported']:
-            dsu_elems.append(DSUElem(node_name))
-
-    dsu = DSU(dsu_elems)
-
-    # merge adjacent unsupported nodes
-    for dsu_elem in dsu_elems:
-        node = Node(graph, dsu_elem.name)
-        if not node['supported']:
-            for out_node in node.out_nodes().values():
-                if out_node.has_valid('supported') and not out_node['supported']:
-                    dsu.union(dsu_elem, dsu.find_elem(out_node.id))
-
-    subgraph_id = dict()  # key is the name of the node, value is the set of nodes that belong to this subgraph
-    for dsu_elem in dsu.map.values():
-        parent = dsu.find_parent(dsu_elem).name
-        if parent not in subgraph_id.keys():
-            subgraph_id[parent] = set()
-        subgraph_id[parent].add(dsu_elem.name)
-
-    return [list(s) for s in subgraph_id.values()]
diff --git a/model-optimizer/mo/front/common/layout.py b/model-optimizer/mo/front/common/layout.py
index 6da786138..b95677d34 100644
--- a/model-optimizer/mo/front/common/layout.py
+++ b/model-optimizer/mo/front/common/layout.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/layout_test.py b/model-optimizer/mo/front/common/layout_test.py
index e3865e44a..1f0a288d6 100644
--- a/model-optimizer/mo/front/common/layout_test.py
+++ b/model-optimizer/mo/front/common/layout_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/batch_norm.py b/model-optimizer/mo/front/common/partial_infer/batch_norm.py
index e20e96131..6b68628b0 100644
--- a/model-optimizer/mo/front/common/partial_infer/batch_norm.py
+++ b/model-optimizer/mo/front/common/partial_infer/batch_norm.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/caffe_fallback.py b/model-optimizer/mo/front/common/partial_infer/caffe_fallback.py
index b8bcdce17..d750af958 100644
--- a/model-optimizer/mo/front/common/partial_infer/caffe_fallback.py
+++ b/model-optimizer/mo/front/common/partial_infer/caffe_fallback.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,18 +20,18 @@ import os
 import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 from mo.utils.find_inputs import find_inputs
 from mo.utils.utils import refer_to_faq_msg
 
 
-def get_node_top(graph: nx.MultiDiGraph, name: str):
+def get_node_top(graph: Graph, name: str):
     node = Node(graph, name)
     return node.out_edge()['name'] if node else None
 
 
-def build_net(graph: nx.DiGraph):
+def build_net(graph: Graph):
     try:
         if not hasattr(os.environ, 'GLOG_minloglevel'):
             os.environ['GLOG_minloglevel'] = '2'
@@ -80,7 +80,7 @@ def build_net(graph: nx.DiGraph):
     graph.__setattr__('caffe_net', net)
 
 
-def get_net(graph: nx.DiGraph):
+def get_net(graph: Graph):
     if not graph:
         return None
 
@@ -101,6 +101,9 @@ def caffe_native_node_infer(node: Node):
     node node to infer the shape for
 
     """
+    log.error("Caffe fallback is deprecated. It will be removed in future releases. Please use extensions for unsupported layers.\n" +
+              "See more information in the \"Custom Layers in the Model Optimizer\" chapter of the Model Optimizer Developer Guide",
+              extra={'is_warning': True})
     log.info('Called "caffe_native_node_infer" for node "{}"'.format(node.id))
 
     graph = node.graph
diff --git a/model-optimizer/mo/front/common/partial_infer/caffe_fallback_test.py b/model-optimizer/mo/front/common/partial_infer/caffe_fallback_test.py
index 1d03857dc..c2bfc0c78 100644
--- a/model-optimizer/mo/front/common/partial_infer/caffe_fallback_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/caffe_fallback_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -52,7 +52,8 @@ class TestCaffeNativePartialInfer(unittest.TestCase):
             'node_1': {'type': 'Input', 'kind': 'op'},
             'node_2': {'type': 'Input', 'kind': 'op'},
             'node_3': {'type': 'Identity', 'kind': 'op'},
-            'node_4': {'type': 'Identity', 'kind': 'op'}
+            'node_4': {'type': 'Identity', 'kind': 'op'},
+            'op_output': { 'kind': 'op', 'op': 'OpOutput'}
         }
 
     def test_build_net_equal_inputs(self):
@@ -66,10 +67,11 @@ class TestCaffeNativePartialInfer(unittest.TestCase):
                             [
                                 ('node_1', 'node_3'),
                                 ('node_2', 'node_3'),
-                                ('node_3', 'node_4')
+                                ('node_3', 'node_4'),
+                                ('node_4', 'op_output')
                             ],
                             {
-                                'node_4': {'is_output': True, 'shape': None},
+                                'node_4': {'shape': None},
                                 'node_1': {'shape': np.array([1, 3, 227, 227])},
                                 'node_2': {'shape': np.array([1, 3, 224, 224])},
                                 'node_3': {'top': 'top_node'}
@@ -94,9 +96,10 @@ class TestCaffeNativePartialInfer(unittest.TestCase):
         graph = build_graph(self.nodes_attributes,
                             [
                                 ('node_1', 'node_3'),
-                                ('node_3', 'node_4')
+                                ('node_3', 'node_4'),
+                                ('node_4', 'op_output')
                             ],
-                            {'node_4': {'is_output': True, 'shape': None},
+                            {'node_4': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'node_3': {'top': 'top_node'}
                              },
diff --git a/model-optimizer/mo/front/common/partial_infer/concat.py b/model-optimizer/mo/front/common/partial_infer/concat.py
index f041c28da..372a124c2 100644
--- a/model-optimizer/mo/front/common/partial_infer/concat.py
+++ b/model-optimizer/mo/front/common/partial_infer/concat.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -46,8 +46,8 @@ def concat_infer(node):
     node.axis = axis
 
     mask = np.zeros_like(shape, dtype=np.bool)
-    mask[axis] = True
-    not_mask = np.logical_not(mask)
+    mask[axis] = True  # pylint: disable=unsupported-assignment-operation
+    not_mask = np.logical_not(mask)  # pylint: disable=assignment-from-no-return
     for s in shapes[1:]:
         if np.all(shape[not_mask] == s[not_mask]):  # TODO handle -1 in a special way
             shape[mask] += s[mask]
diff --git a/model-optimizer/mo/front/common/partial_infer/concat_test.py b/model-optimizer/mo/front/common/partial_infer/concat_test.py
index 07b53a1c8..864498340 100644
--- a/model-optimizer/mo/front/common/partial_infer/concat_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/concat_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'kind': 'data', 'value': None},
                     'node_2': {'kind': 'data', 'value': None},
                     'concat': {'type': 'Concat', 'kind': 'op'},
-                    'node_3': {'kind': 'data'}
+                    'node_3': {'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'},
                     }
 
 
@@ -34,8 +35,10 @@ class TestConcatPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'concat'),
                              ('node_2', 'concat'),
-                             ('concat', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('concat', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'node_2': {'shape': np.array([1, 3, 227, 227])},
                              'concat': {'axis': 2}
@@ -52,8 +55,10 @@ class TestConcatPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'concat'),
                              ('node_2', 'concat'),
-                             ('concat', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('concat', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'node_2': {'shape': np.array([1, 3, 227, 227])},
                              'concat': {'axis': -1}
@@ -70,8 +75,10 @@ class TestConcatPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'concat'),
                              ('node_2', 'concat'),
-                             ('concat', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('concat', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'node_2': {'shape': np.array([1, 2, 227, 227])},
                              'concat': {'axis': 2}
@@ -86,8 +93,10 @@ class TestConcatPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'concat'),
                              ('node_2', 'concat'),
-                             ('concat', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('concat', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227])},
                              'node_2': {'shape': None},
                              'concat': {'axis': 2}
diff --git a/model-optimizer/mo/front/common/partial_infer/const.py b/model-optimizer/mo/front/common/partial_infer/const.py
index 0ceb8801c..ebc3f6ee4 100644
--- a/model-optimizer/mo/front/common/partial_infer/const.py
+++ b/model-optimizer/mo/front/common/partial_infer/const.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/crop.py b/model-optimizer/mo/front/common/partial_infer/crop.py
index e097ec997..5c11617b8 100644
--- a/model-optimizer/mo/front/common/partial_infer/crop.py
+++ b/model-optimizer/mo/front/common/partial_infer/crop.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/crop_test.py b/model-optimizer/mo/front/common/partial_infer/crop_test.py
index d1eb97b4d..d070592c1 100644
--- a/model-optimizer/mo/front/common/partial_infer/crop_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/crop_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'value': None, 'kind': 'data'},
                     'node_2': {'value': None, 'kind': 'data'},
                     'crop_1': {'type': 'Crop', 'kind': 'op'},
-                    'node_3': {'value': None, 'kind': 'data'}
+                    'node_3': {'value': None, 'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -34,8 +35,10 @@ class TestCropInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'crop_1'),
                              ('node_2', 'crop_1'),
-                             ('crop_1', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('crop_1', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 2, 500, 500])},
                              'node_2': {'shape': np.array([1, 2, 256, 256])},
                              'crop_1': {'axis': 2, 'offset': [0, 0], 'dim': None}
@@ -57,8 +60,10 @@ class TestCropInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'crop_1'),
                              ('node_2', 'crop_1'),
-                             ('crop_1', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('crop_1', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 2, 500, 500])},
                              'node_2': {'shape': np.array([1, 2, 256, 256])},
                              'crop_1': {'axis': -1, 'offset': [0, 0], 'dim': None}
@@ -80,8 +85,10 @@ class TestCropInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'crop_1'),
                              ('node_2', 'crop_1'),
-                             ('crop_1', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('crop_1', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 2, 500, 500])},
                              'node_2': {'shape': None},
                              'crop_1': {'axis': 2, 'offset': [0, 0], 'dim': None}
@@ -95,8 +102,10 @@ class TestCropInfer(unittest.TestCase):
     def test_crop_infer_one_shape(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'crop_1'),
-                             ('crop_1', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('crop_1', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 2, 500, 500])},
                              'crop_1': {'axis': 2, 'offset': [0], 'dim': None}
                              })
@@ -110,8 +119,10 @@ class TestCropInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'crop_1'),
                              ('node_2', 'crop_1'),
-                             ('crop_1', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('crop_1', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 2, 500, 500])},
                              'node_2': {'shape': np.array([1, 2, 256, 256])},
                              'crop_1': {'axis': 2, 'offset': [300], 'dim': None}
diff --git a/model-optimizer/mo/front/common/partial_infer/elemental.py b/model-optimizer/mo/front/common/partial_infer/elemental.py
index 99adb1ff9..c33a3563c 100644
--- a/model-optimizer/mo/front/common/partial_infer/elemental.py
+++ b/model-optimizer/mo/front/common/partial_infer/elemental.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,14 +19,19 @@ def single_output_infer(node, shape_infer, value_infer=None):
     node.out_node(0).shape = shape_infer(node)
 
     if value_infer is not None and \
-       'value' in node.in_node() and \
-       node.in_node().value is not None:
+            'value' in node.in_node() and \
+            node.in_node().value is not None:
         node.out_node(0).value = value_infer(node)
 
-def copy_shape_infer(node):
+
+def copy_shape_infer(node, value_infer=None):
     """
     Sets output dimensions of node equal to input ones
     Args:
         node: graph node
     """
-    single_output_infer(node, lambda n: n.in_node().shape)
+    single_output_infer(node, lambda n: n.in_node().shape, value_infer)
+
+
+def copy_value(node):
+    return None if node.in_node().value is None else node.in_node().value.copy()
diff --git a/model-optimizer/mo/front/common/partial_infer/elemental_test.py b/model-optimizer/mo/front/common/partial_infer/elemental_test.py
index 78d1daeab..a1c698541 100644
--- a/model-optimizer/mo/front/common/partial_infer/elemental_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/elemental_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/eltwise.py b/model-optimizer/mo/front/common/partial_infer/eltwise.py
index 7d199072c..f0d96e106 100644
--- a/model-optimizer/mo/front/common/partial_infer/eltwise.py
+++ b/model-optimizer/mo/front/common/partial_infer/eltwise.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,11 +19,11 @@ import logging as log
 import networkx as nx
 
 from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import get_sorted_inputs, Node
+from mo.graph.graph import Node
 
 
 def eltwise_infer(node, op=None, **kwargs):
-    raw_inputs = [(inp, attr) for inp, attr in get_sorted_inputs(node)
+    raw_inputs = [(inp, attr) for inp, attr in node.get_sorted_inputs()
               if 'control_flow_edge' not in attr or not attr['control_flow_edge']]
     inputs = [Node(node.graph, inp) for inp, attr in raw_inputs]
     shapes = [node.graph.node[inp]['shape'] for inp, attr in raw_inputs]
diff --git a/model-optimizer/mo/front/common/partial_infer/eltwise_test.py b/model-optimizer/mo/front/common/partial_infer/eltwise_test.py
index 5b57bf6e7..0bd0a24d1 100644
--- a/model-optimizer/mo/front/common/partial_infer/eltwise_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/eltwise_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'value': 2, 'kind': 'data'},
                     'node_2': {'value': 3, 'kind': 'data'},
                     'eltw_1': {'type': 'Eltwise', 'kind': 'op'},
-                    'node_3': {'value': None, 'kind': 'data'}
+                    'node_3': {'value': None, 'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'},
                     }
 
 
@@ -34,8 +35,10 @@ class TestEltwiseInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'eltw_1'),
                              ('node_2', 'eltw_1'),
-                             ('eltw_1', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('eltw_1', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 256, 256])},
                              'node_2': {'shape': np.array([1, 3, 256, 256])},
                              'eltw_1': {}
@@ -59,8 +62,10 @@ class TestEltwiseInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'eltw_1'),
                              ('node_2', 'eltw_1'),
-                             ('eltw_1', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('eltw_1', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 256, 256])},
                              'node_2': {'shape': np.array([1, 3, 256, 256])}
                              })
@@ -81,8 +86,10 @@ class TestEltwiseInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'eltw_1'),
                              ('node_2', 'eltw_1'),
-                             ('eltw_1', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('eltw_1', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 256, 256])},
                              'node_2': {'shape': np.array([1, 3, 256, 256])}
                              })
@@ -103,8 +110,10 @@ class TestEltwiseInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'eltw_1'),
                              ('node_2', 'eltw_1'),
-                             ('eltw_1', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('eltw_1', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 256, 256]), 'value': None},
                              'node_2': {'shape': np.array([1, 3, 256, 256])}
                              })
@@ -124,8 +133,10 @@ class TestEltwiseInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'eltw_1'),
                              ('node_2', 'eltw_1'),
-                             ('eltw_1', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('eltw_1', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 257, 256])},
                              'node_2': {'shape': np.array([1, 3, 256, 257])}
                              })
diff --git a/model-optimizer/mo/front/common/partial_infer/expand_dims.py b/model-optimizer/mo/front/common/partial_infer/expand_dims.py
index 50ac4f06f..dbdebd543 100644
--- a/model-optimizer/mo/front/common/partial_infer/expand_dims.py
+++ b/model-optimizer/mo/front/common/partial_infer/expand_dims.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -44,6 +44,12 @@ def tf_expand_dims_infer(node):
     if expand_axis is None:
         return
 
+    # expand_axis is a position where the new axis is placed
+    # so expand_dims works for negative axis in a different way
+    # not as insert operation
+    if expand_axis < 0:
+        expand_axis += len(input_node.shape) + 1
+
     output_node.shape = np.insert(input_node.shape, expand_axis, [1])
     # convert data type of the shape to int64 explicitly
     output_node.shape = output_node.shape.astype(np.int64)
diff --git a/model-optimizer/mo/front/common/partial_infer/expand_dims_test.py b/model-optimizer/mo/front/common/partial_infer/expand_dims_test.py
index 69dbc44c6..119c3c287 100644
--- a/model-optimizer/mo/front/common/partial_infer/expand_dims_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/expand_dims_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -146,6 +146,40 @@ class TestExpandDimsInfer(unittest.TestCase):
         for i in range(0, len(exp_shape)):
             self.assertEqual(exp_shape[i], res_shape[i])
 
+    def test_expand_dims_infer_one_input_3(self):
+        graph = build_graph(nodes_attributes,
+                            [('input_1', 'expand_dims'),
+                             ('expand_dims', 'out')],
+                            {'input_1': {'shape': np.array([3, 256, 256])},
+                             'expand_dims': {'expand_axis': -1}
+                             })
+
+        expand_dims_node = Node(graph, 'expand_dims')
+
+        tf_expand_dims_infer(expand_dims_node)
+        exp_shape = np.array([3, 256, 256, 1])
+        res_shape = expand_dims_node.out_node().shape
+        self.assertEqual(len(exp_shape), len(res_shape))
+        for i in range(0, len(exp_shape)):
+            self.assertEqual(exp_shape[i], res_shape[i])
+
+    def test_expand_dims_infer_one_input_4(self):
+        graph = build_graph(nodes_attributes,
+                            [('input_1', 'expand_dims'),
+                             ('expand_dims', 'out')],
+                            {'input_1': {'shape': np.array([3, 256, 256])},
+                             'expand_dims': {'expand_axis': -2}
+                             })
+
+        expand_dims_node = Node(graph, 'expand_dims')
+
+        tf_expand_dims_infer(expand_dims_node)
+        exp_shape = np.array([3, 256, 1, 256])
+        res_shape = expand_dims_node.out_node().shape
+        self.assertEqual(len(exp_shape), len(res_shape))
+        for i in range(0, len(exp_shape)):
+            self.assertEqual(exp_shape[i], res_shape[i])
+
     def test_expand_dims_infer_one_input_negative(self):
         graph = build_graph(nodes_attributes,
                             [('input_1', 'expand_dims'),
diff --git a/model-optimizer/mo/front/common/partial_infer/inner_product.py b/model-optimizer/mo/front/common/partial_infer/inner_product.py
index 765363b2f..a92f2ecde 100644
--- a/model-optimizer/mo/front/common/partial_infer/inner_product.py
+++ b/model-optimizer/mo/front/common/partial_infer/inner_product.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/inner_product_test.py b/model-optimizer/mo/front/common/partial_infer/inner_product_test.py
index 8b3931275..485889343 100644
--- a/model-optimizer/mo/front/common/partial_infer/inner_product_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/inner_product_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -26,7 +26,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'value': None, 'kind': 'data'},
                     'inner': {'type': 'FullyConnected', 'value': None, 'kind': 'op'},
                     'node_2': {'value': FakeValue(None), 'kind': 'data'},
-                    'node_3': {'value': None, 'kind': 'data'}
+                    'node_3': {'value': None, 'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -35,8 +36,10 @@ class TestInnerPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'inner'),
                              ('node_2', 'inner'),
-                             ('inner', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('inner', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 256, 256])},
                              'node_2': {'shape': np.array([1, 3, 256, 256]),
                                         'dim_attrs': ['spatial_dims', 'channel_dims', 'batch_dims', 'axis']},
@@ -60,8 +63,10 @@ class TestInnerPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'inner'),
                              ('node_2', 'inner'),
-                             ('inner', 'node_3')],
-                            {'node_3': {'is_output': True, 'shape': None},
+                             ('inner', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'shape': None},
                              'node_1': {'shape': None},
                              'node_2': {'shape': np.array([1, 3, 256, 256])},
                              'inner': {'out-size': 4}
diff --git a/model-optimizer/mo/front/common/partial_infer/matmul.py b/model-optimizer/mo/front/common/partial_infer/matmul.py
index 157402c9c..e615dcf4b 100644
--- a/model-optimizer/mo/front/common/partial_infer/matmul.py
+++ b/model-optimizer/mo/front/common/partial_infer/matmul.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/multi_box_detection.py b/model-optimizer/mo/front/common/partial_infer/multi_box_detection.py
index eb076aac5..755451a04 100644
--- a/model-optimizer/mo/front/common/partial_infer/multi_box_detection.py
+++ b/model-optimizer/mo/front/common/partial_infer/multi_box_detection.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/multi_box_detection_test.py b/model-optimizer/mo/front/common/partial_infer/multi_box_detection_test.py
index ad9859f5e..f82a49482 100644
--- a/model-optimizer/mo/front/common/partial_infer/multi_box_detection_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/multi_box_detection_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/multi_box_prior.py b/model-optimizer/mo/front/common/partial_infer/multi_box_prior.py
index 4b70e37ef..8510b2d15 100644
--- a/model-optimizer/mo/front/common/partial_infer/multi_box_prior.py
+++ b/model-optimizer/mo/front/common/partial_infer/multi_box_prior.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/multi_box_prior_test.py b/model-optimizer/mo/front/common/partial_infer/multi_box_prior_test.py
index 6e1ce7c88..f50dd7128 100644
--- a/model-optimizer/mo/front/common/partial_infer/multi_box_prior_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/multi_box_prior_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/random_uniform.py b/model-optimizer/mo/front/common/partial_infer/random_uniform.py
index 0d33882b6..a720c557b 100644
--- a/model-optimizer/mo/front/common/partial_infer/random_uniform.py
+++ b/model-optimizer/mo/front/common/partial_infer/random_uniform.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/range.py b/model-optimizer/mo/front/common/partial_infer/range.py
index de1832366..ac7c135d6 100644
--- a/model-optimizer/mo/front/common/partial_infer/range.py
+++ b/model-optimizer/mo/front/common/partial_infer/range.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/range_test.py b/model-optimizer/mo/front/common/partial_infer/range_test.py
index 113c49b78..3ea693e9f 100644
--- a/model-optimizer/mo/front/common/partial_infer/range_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/range_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/reduce.py b/model-optimizer/mo/front/common/partial_infer/reduce.py
index 627badced..b65f91413 100644
--- a/model-optimizer/mo/front/common/partial_infer/reduce.py
+++ b/model-optimizer/mo/front/common/partial_infer/reduce.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -35,6 +35,6 @@ def tf_reduce_infer(node, op=None):
         output_shape = np.delete(output_shape, axis)
     node.out_node().shape = output_shape
     if op is not None and node.in_node(0).value is not None:
-        node.out_node(0).value = np.array([op(node.in_node(0).value, (*axis,))],
+        node.out_node(0).value = np.array(op(node.in_node(0).value, (*axis,)),
                                           dtype=node.in_node(0).value.dtype)  # TODO extend to multi-dimensional axis
         log.debug("value: {}".format(node.out_node(0).value))
 \ No newline at end of file
diff --git a/model-optimizer/mo/front/common/partial_infer/reshape.py b/model-optimizer/mo/front/common/partial_infer/reshape.py
index ae616022f..bb752a4dc 100644
--- a/model-optimizer/mo/front/common/partial_infer/reshape.py
+++ b/model-optimizer/mo/front/common/partial_infer/reshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -30,6 +30,10 @@ def tf_reshape_shape_infer(node):
     input_shape = node.in_node(0).shape
     reshape_output = node.in_node(1).value if len(node.in_nodes()) > 1 else node.dim
 
+    # In case if Reshape operation was created with two inputs and dim attr wasn't set, we set in automatically
+    if not node.has_valid('dim'):
+        node['dim'] = reshape_output.copy()
+
     if node.in_node(0).shape is None:
         return None
 
@@ -68,8 +72,4 @@ def tf_reshape_shape_infer(node):
 
     output_shape = int64_array(output_shape)
 
-    # In case if Reshape operation was created with two inputs and dim attr wasn't set, we set in automatically
-    if not node.has_valid('dim'):
-        node['dim'] = output_shape
-
     return output_shape
diff --git a/model-optimizer/mo/front/common/partial_infer/roipooling.py b/model-optimizer/mo/front/common/partial_infer/roipooling.py
index 48d2d329f..115f923c4 100644
--- a/model-optimizer/mo/front/common/partial_infer/roipooling.py
+++ b/model-optimizer/mo/front/common/partial_infer/roipooling.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/roipooling_test.py b/model-optimizer/mo/front/common/partial_infer/roipooling_test.py
index f6b9ebadc..b56d21b12 100644
--- a/model-optimizer/mo/front/common/partial_infer/roipooling_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/roipooling_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -27,7 +27,8 @@ nodes_attributes = {'node_1': {'kind': 'data'},
                     'node_3': {'kind': 'data'},
                     'node_4': {'kind': 'data'},
                     'roipool': {'type': 'ROIPooling', 'kind': 'op', 'pooled_h': None, 'pooled_w': None},
-                    'output': {'value': None, 'kind': 'data'}
+                    'output': {'value': None, 'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'},
                     }
 
 
@@ -36,8 +37,10 @@ class TestRoipoolingInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'roipool'),
                              ('node_2', 'roipool'),
-                             ('roipool', 'output')],
-                            {'output': {'is_output': True, 'shape': None},
+                             ('roipool', 'output'),
+                             ('output', 'op_output')
+                             ],
+                            {'output': {'shape': None},
                              'node_1': {'shape': np.array([1, 256, 20, 20])},
                              'node_2': {'shape': np.array([150, 5])},
                              'roipool': {'pooled_h': 6, 'pooled_w': 6}
@@ -55,8 +58,10 @@ class TestRoipoolingInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'roipool'),
                              ('node_2', 'roipool'),
-                             ('roipool', 'output')],
-                            {'output': {'is_output': True, 'shape': None},
+                             ('roipool', 'output'),
+                             ('output', 'op_output')
+                             ],
+                            {'output': {'shape': None},
                              'node_1': {'shape': None},
                              'node_2': {'shape': np.array([1, 256])},
                              'roipool': {'pooled_h': 6, 'pooled_w': 6}
@@ -74,8 +79,10 @@ class TestRoipoolingInfer(unittest.TestCase):
                              ('node_2', 'roipool'),
                              ('node_3', 'roipool'),
                              ('node_4', 'roipool'),
-                             ('roipool', 'output')],
-                            {'output': {'is_output': True, 'shape': None},
+                             ('roipool', 'output'),
+                             ('output', 'op_output')
+                             ],
+                            {'output': {'shape': None},
                              'node_1': {'shape': np.array([1, 20, 20, 256])},
                              'node_2': {'shape': np.array([150, 5])},
                              'node_3': {'shape': np.array([150])},
diff --git a/model-optimizer/mo/front/common/partial_infer/slice.py b/model-optimizer/mo/front/common/partial_infer/slice.py
index bf23763fe..a63658a03 100644
--- a/model-optimizer/mo/front/common/partial_infer/slice.py
+++ b/model-optimizer/mo/front/common/partial_infer/slice.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,13 +16,20 @@
 
 import numpy as np
 
-from mo.graph.graph import erase_node
 from mo.utils.error import Error
 
+
 def tf_strided_slice_infer(node):
+    if node.in_node(1).value is None or node.in_node(2).value is None:
+        raise Error('Strided slice layer supports only constant begin and end inputs')
     begin_id = node.in_node(1).value
     end_id = node.in_node(2).value
-    stride = node.in_node(3).value
+    if len(node.in_nodes()) > 3:
+        if node.in_node(3).value is None:
+            raise Error('Strided slice layer supports only constant stride input')
+        stride = node.in_node(3).value
+    else:
+        stride = []
 
     shape = node.in_node(0).shape
 
@@ -32,63 +39,79 @@ def tf_strided_slice_infer(node):
     convert_negative_indices(begin_id, shape)
     convert_negative_indices(end_id, shape)
 
-    test_bit = lambda val, offset: ((1 << offset) & val != 0)
-
     slice_idx = []
-    shrink_axis_mask = []
-    ellipsis_mask = []
-    new_axis_mask = []
-    dims = len(begin_id)
-
+    dims = np.amax(np.array([len(begin_id), len(end_id), len(stride),
+                             len(node.shrink_axis_mask), len(node.new_axis_mask), len(node.ellipsis_mask),
+                             len(node.begin_mask), len(node.end_mask)]))
+
+    # make mask correct length
+    def extend_mask(in_mask, fin_len, zeros=True):
+        mask = list(in_mask)
+        if len(mask) < fin_len:
+            if zeros:
+                mask.extend(np.zeros(dims-len(mask), dtype=np.int32))
+            else:
+                mask.extend(np.ones(dims-len(mask), dtype=np.int32))
+        return np.array(mask, dtype=np.int32)
+
+    for mask in {'new_axis_mask', 'shrink_axis_mask', 'ellipsis_mask'}:
+        node[mask] = extend_mask(node[mask], dims)
+    node.begin_mask = extend_mask(node.begin_mask, dims, False)
+    node.end_mask = extend_mask(node.end_mask, dims, False)
+
+    old_idx = 0
+    ellips_ext = 0
+    id_em = 0
     for idx in range(dims):
-        def_beg = 0 if stride[idx] > 0 else -1
-        def_end = shape[idx] if stride[idx] > 0 else -shape[idx]-1
-        l = begin_id[idx] if not test_bit(node.begin_mask, idx) else def_beg
-        r = end_id[idx] if not test_bit(node.end_mask, idx) else def_end
-
-        # Check shrink_axis_mask
-        shrink_axis_mask.append(test_bit(node.shrink_axis_mask, idx))
-        if shrink_axis_mask[idx]:
-            l, r = l, l + 1
-
-        # Check new_axis_mask
-        new_axis_mask.append(test_bit(node.new_axis_mask, idx))
-        if new_axis_mask[idx]:
+        if node.new_axis_mask[idx]:
             slice_idx.append(np.newaxis)
-
-        # Check ellipsis_mask
-        ellipsis_mask.append(test_bit(node.ellipsis_mask, idx))
-        if ellipsis_mask[idx]:
-            shrink_axis_mask[idx] = False
-            l, r = 0, shape[idx]
-
-        slice_idx.append(slice(l, r, stride[idx]))
-    
-    # if masks length are less than input dims length than add slices and masks for such dims
-    for idx in range(dims, len(shape)):
-        slice_idx.append(slice(0, shape[idx], 1))
-        shrink_axis_mask.append(False)
-        new_axis_mask.append(False)
+        elif node.ellipsis_mask[idx]:
+            ellips_ext = len(shape) - (dims - np.count_nonzero(node.new_axis_mask) - 1)
+            id_em = idx
+            for i in range(0, ellips_ext):
+                slice_idx.append(slice(0, shape[old_idx], 1))
+                old_idx = old_idx + 1
+        else:
+            s = stride[idx] if len(stride) > idx else 1
+            def_beg = 0 if s > 0 else -1
+            def_end = shape[old_idx] if s > 0 else -shape[old_idx]-1
+            l = begin_id[idx] if node.begin_mask[idx] and idx < len(begin_id) else def_beg
+            r = end_id[idx] if node.end_mask[idx] and idx < len(end_id) else def_end
+
+            # Check shrink_axis_mask
+            if node.shrink_axis_mask[idx] and idx < len(shape):
+                slice_idx.append(slice(l, l+1, s))
+            else:
+                slice_idx.append(slice(l, r, s))
+            old_idx = old_idx + 1
 
     value = node.in_node(0).value if node.in_node(0).value is not None else np.zeros(shape)
     # fix for the warning: "FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated use
     # `arr[tuple(seq)]` instead of `arr[seq]`"
     value = value[tuple(slice_idx)]
 
-    for idx, flag in reversed(list(enumerate(shrink_axis_mask))):
+    for idx, flag in reversed(list(enumerate(node.shrink_axis_mask))):
         if flag:
-            value = np.squeeze(value, idx)
+            if ellips_ext > 0 and idx > id_em:
+                idx = idx + ellips_ext - 1
+            try:
+                value = np.squeeze(value, idx)
+            except ValueError:
+                # ignore this error
+                continue
 
     node['slices'] = np.array(slice_idx)
-    node['shrink_axis_mask'] = np.array(shrink_axis_mask)
-    node['new_axis_mask'] = np.array(new_axis_mask)
+    for attr in ('shrink_axis_mask', 'new_axis_mask', 'ellipsis_mask', 'begin_mask', 'end_mask'):
+        node[attr] = np.array(node[attr], dtype=np.int32)
 
     node.out_node().value = np.array(value) if node.in_node(0).value is not None else None
-    node.out_node().shape = np.array(value.shape)
+    node.out_node().shape = np.array(value.shape, dtype=np.int64)
+
+    # change precision to I32 for begin, end, stride inputs
+    for i in range(1, len(node.in_nodes())):
+        inp = node.in_node(i)
+        inp["force_precision"] = "I32"
 
-    #remove inputs converted in attributes
-    #for i in range(1,4):
-    #    node.graph.remove_edge(node.in_node(i).id, node.id)
 
 def convert_negative_indices(indices: np.array, shape: np.array):
     for ind, value in enumerate(indices):
diff --git a/model-optimizer/mo/front/common/partial_infer/slice_test.py b/model-optimizer/mo/front/common/partial_infer/slice_test.py
index cdd674d0e..af54493c9 100644
--- a/model-optimizer/mo/front/common/partial_infer/slice_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/slice_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -41,6 +41,9 @@ nodes_attributes = {'node_1': {'value': None, 'kind': 'data'},
                     'tf_slice_size': {'value': None, 'shape': None, 'kind': 'data'},
                     'tf_slice': {'kind': 'op'},
                     'tf_slice_output': {'value': None, 'shape': None, 'kind': 'data'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput'},
+                    'op_output_1': {'kind': 'op', 'op': 'OpOutput'},
+                    'op_output_2': {'kind': 'op', 'op': 'OpOutput'}
                     }
 
 tf_slice_edges = [('tf_slice_input', 'tf_slice'), ('tf_slice_begin', 'tf_slice'), ('tf_slice_size', 'tf_slice'),
@@ -52,10 +55,13 @@ class TestSSliceInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'Slice_node'),
                              ('Slice_node', 'node_2'),
-                             ('Slice_node', 'node_3')],
+                             ('Slice_node', 'node_3'),
+                             ('node_2', 'op_output'),
+                             ('node_3', 'op_output_1')
+                             ],
                             {'node_1': {'shape': np.array([1, 288, 56, 56])},
-                             'node_2': {'is_output': True, 'shape': None},
-                             'node_3': {'is_output': True, 'shape': None},
+                             'node_2': {'shape': None},
+                             'node_3': {'shape': None},
                              'Slice_node': {'axis': 1, 'slice_point': np.array([256])}
                              })
 
@@ -77,10 +83,13 @@ class TestSSliceInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'Slice_node'),
                              ('Slice_node', 'node_2'),
-                             ('Slice_node', 'node_3')],
+                             ('Slice_node', 'node_3'),
+                             ('node_2', 'op_output'),
+                             ('node_3', 'op_output_1')
+                             ],
                             {'node_1': {'shape': np.array([1, 288, 56, 56])},
-                             'node_2': {'is_output': True, 'shape': None},
-                             'node_3': {'is_output': True, 'shape': None},
+                             'node_2': {'shape': None},
+                             'node_3': {'shape': None},
                              'Slice_node': {'axis': 1, 'slice_point': []}
                              })
 
@@ -102,11 +111,15 @@ class TestSSliceInfer(unittest.TestCase):
                             [('node_1', 'Slice_node'),
                              ('Slice_node', 'node_2'),
                              ('Slice_node', 'node_3'),
-                             ('Slice_node', 'node_4')],
+                             ('Slice_node', 'node_4'),
+                             ('node_2', 'op_output'),
+                             ('node_3', 'op_output_1'),
+                             ('node_2', 'op_output_2')
+                             ],
                             {'node_1': {'shape': np.array([1, 288, 56, 56])},
-                             'node_2': {'is_output': True, 'shape': None},
-                             'node_3': {'is_output': True, 'shape': None},
-                             'node_4': {'is_output': True, 'shape': None},
+                             'node_2': {'shape': None},
+                             'node_3': {'shape': None},
+                             'node_4': {'shape': None},
                              'Slice_node': {'axis': 1, 'slice_point': []}
                              })
 
@@ -132,11 +145,15 @@ class TestSSliceInfer(unittest.TestCase):
                             [('node_1', 'Slice_node'),
                              ('Slice_node', 'node_2'),
                              ('Slice_node', 'node_3'),
-                             ('Slice_node', 'node_4')],
+                             ('Slice_node', 'node_4'),
+                             ('node_2', 'op_output'),
+                             ('node_3', 'op_output_1'),
+                             ('node_2', 'op_output_2')
+                             ],
                             {'node_1': {'shape': np.array([1, 288, 56, 56])},
-                             'node_2': {'is_output': True, 'shape': None},
-                             'node_3': {'is_output': True, 'shape': None},
-                             'node_4': {'is_output': True, 'shape': None},
+                             'node_2': {'shape': None},
+                             'node_3': {'shape': None},
+                             'node_4': {'shape': None},
                              'Slice_node': {'axis': 1, 'slice_point': [100, 150]}
                              })
 
@@ -168,15 +185,16 @@ class TestTFStridedSliceInfer(unittest.TestCase):
                             ('sslice_end_1', 'sslice_1'),
                             ('sslice_stride_1', 'sslice_1'),
                             ('sslice_1', 'sslice_data_1'),
+                            ('sslice_data_1', 'op_output')
                             ],
-                           {'sslice_data_1': {'is_output': True},
+                           {
                             'sslice_input': {'value': np.array([1, 34, 34, 62]),
                                              'shape': np.array([3])},
                             'sslice_begin_1': {'value': np.array([0]), 'shape': np.array([1])},
                             'sslice_end_1': {'value': np.array([4]), 'shape': np.array([1])},
                             'sslice_stride_1': {'value': np.array([1]), 'shape': np.array([1])},
-                            'sslice_1': {'shrink_axis_mask': 0, 'ellipsis_mask': 0, 'new_axis_mask': 0,
-                                         'begin_mask': 0, 'end_mask': 0},
+                            'sslice_1': {'shrink_axis_mask': [0], 'ellipsis_mask': [0], 'new_axis_mask': [0],
+                                         'begin_mask': [1], 'end_mask': [1]},
                             })
 
     def build_test_graph(self):
@@ -186,17 +204,18 @@ class TestTFStridedSliceInfer(unittest.TestCase):
                             ('sslice_end_1', 'sslice_1'),
                             ('sslice_stride_1', 'sslice_1'),
                             ('sslice_1', 'sslice_data_1'),
+                            ('sslice_data_1', 'op_output')
                             ],
-                           {'sslice_data_1': {'is_output': True},
+                           {
                             'sslice_input': {'value': None, 'shape': np.array([1, 35, 35, 3])},
                             'sslice_begin_1': {'value': np.array([0, 0, 0, 0]), 'shape': np.array([4])},
                             'sslice_end_1': {'value': np.array([1, 34, 30, 2]), 'shape': np.array([4])},
                             'sslice_stride_1': {'value': np.array([1, 1, 1, 1]),
                                                 'shape': np.array([4])},
-                            'sslice_1': {'shrink_axis_mask': 0, 'ellipsis_mask': 0, 'new_axis_mask': 0,
-                                         'begin_mask': 0, 'end_mask': 0},
+                            'sslice_1': {'shrink_axis_mask': [0], 'ellipsis_mask': [0], 'new_axis_mask': [0],
+                                         'begin_mask': [1], 'end_mask': [1]},
                             })
- 
+
     def build_test_graph_dim_beg(self):
         return build_graph(nodes_attributes,
                            [('sslice_input', 'sslice_1'),
@@ -204,18 +223,18 @@ class TestTFStridedSliceInfer(unittest.TestCase):
                             ('sslice_end_1', 'sslice_1'),
                             ('sslice_stride_1', 'sslice_1'),
                             ('sslice_1', 'sslice_data_1'),
+                            ('sslice_data_1', 'op_output')
                             ],
-                           {'sslice_data_1': {'is_output': True},
+                           {
                             'sslice_input': {'value': np.array([[1, 34, 34, 62]]),
                                              'shape': np.array([1, 4])},
                             'sslice_begin_1': {'value': np.array([0]), 'shape': np.array([1])},
                             'sslice_end_1': {'value': np.array([4]), 'shape': np.array([1])},
                             'sslice_stride_1': {'value': np.array([1]), 'shape': np.array([1])},
-                            'sslice_1': {'shrink_axis_mask': 0, 'ellipsis_mask': 0, 'new_axis_mask': 0,
-                                         'begin_mask': 0, 'end_mask': 0},
+                            'sslice_1': {'shrink_axis_mask': [0], 'ellipsis_mask': [0], 'new_axis_mask': [0],
+                                         'begin_mask': [1], 'end_mask': [1]},
                             })
 
-
     def test_slice_infer_1(self):
         graph = self.build_test_graph()
         node = Node(graph, 'sslice_1')
@@ -225,7 +244,7 @@ class TestTFStridedSliceInfer(unittest.TestCase):
     def test_slice_infer_2(self):
         graph = self.build_test_graph()
         node = Node(graph, 'sslice_1')
-        node.end_mask = 6  # 0110
+        node.end_mask = [1, 0, 0, 1]  # 6
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([1, 35, 35, 2])), 'Wrong output shape detected')
 
@@ -233,7 +252,7 @@ class TestTFStridedSliceInfer(unittest.TestCase):
         graph = self.build_test_graph()
         node = Node(graph, 'sslice_1')
         node.in_node(1).value = np.array([0, 10, 10, 0])
-        node.end_mask = 6  # 0110
+        node.end_mask = [1, 0, 0, 1]  # 6
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([1, 25, 25, 2])), 'Wrong output shape detected')
 
@@ -241,7 +260,7 @@ class TestTFStridedSliceInfer(unittest.TestCase):
         graph = self.build_test_graph()
         node = Node(graph, 'sslice_1')
         node.in_node(1).value = np.array([0, 10, 10, 0])
-        node.begin_mask = 6  # 0110
+        node.begin_mask = [1, 0, 0, 1]  # 6
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([1, 34, 30, 2])), 'Wrong output shape detected')
 
@@ -249,8 +268,8 @@ class TestTFStridedSliceInfer(unittest.TestCase):
         graph = self.build_test_graph()
         node = Node(graph, 'sslice_1')
         node.in_node(1).value = np.array([0, 10, 10, 0])
-        node.begin_mask = 15  # 1111
-        node.end_mask = 15  # 1111
+        node.begin_mask = [0, 0, 0, 0]  # 15
+        node.end_mask = [0, 0, 0, 0]  # 15
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([1, 35, 35, 3])), 'Wrong output shape detected')
 
@@ -273,7 +292,7 @@ class TestTFStridedSliceInfer(unittest.TestCase):
     def test_slice_infer_8(self):
         graph = self.build_test_graph2()
         node = Node(graph, 'sslice_1')
-        node.new_axis_mask = 1
+        node.new_axis_mask = [1]
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([1, 4])), 'Wrong output shape detected')
         self.assertTrue(np.array_equal(node.out_node().value, np.array([[1, 34, 34, 62]])),
@@ -282,59 +301,57 @@ class TestTFStridedSliceInfer(unittest.TestCase):
     def test_slice_infer_9(self):
         graph = self.build_test_graph()
         node = Node(graph, 'sslice_1')
-        node.begin_mask = 15  # 1111
-        node.end_mask = 15  # 1111
-        node.shrink_axis_mask = 1
+        node.begin_mask = [0, 0, 0, 0]  # 15
+        node.end_mask = [0, 0, 0, 0]  # 15
+        node.shrink_axis_mask = [1]
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([35, 35, 3])), 'Wrong output shape detected')
 
     def test_slice_infer_10(self):
         graph = self.build_test_graph()
         node = Node(graph, 'sslice_1')
-        node.begin_mask = 15  # 1111
-        node.end_mask = 15  # 1111
-        node.shrink_axis_mask = 1
-        node.new_axis_mask = 8
+        node.begin_mask = [0, 0, 0, 0]  # 15
+        node.end_mask = [0, 0, 0, 0]  # 15
+        node.shrink_axis_mask = [1, 0, 0, 0]
+        node.new_axis_mask = [0, 0, 0, 1]  # 8
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([35, 35, 1, 3])), 'Wrong output shape detected')
 
     def test_slice_infer_11(self):
         graph = self.build_test_graph()
         node = Node(graph, 'sslice_1')
-        node.begin_mask = 15  # 1111
-        node.end_mask = 15  # 1111
-        node.shrink_axis_mask = 5  # 0101
+        node.begin_mask = [0, 0, 0, 0]  # 15
+        node.end_mask = [0, 0, 0, 0]  # 15
+        node.shrink_axis_mask = [1, 0, 1, 0]  # 5
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([35, 3])), 'Wrong output shape detected')
 
     def test_slice_infer_12(self):
         graph = self.build_test_graph()
         node = Node(graph, 'sslice_1')
-        node.begin_mask = 15  # 1111
-        node.end_mask = 15  # 1111
-        node.shrink_axis_mask = 7  # 0111
+        node.begin_mask = [0, 0, 0, 0]  # 15
+        node.end_mask = [0, 0, 0, 0]  # 15
+        node.shrink_axis_mask = [1, 1, 1, 0]  # 7
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([3])), 'Wrong output shape detected')
 
     def test_slice_infer_13(self):
         graph = self.build_test_graph2()
         node = Node(graph, 'sslice_1')
-        # node.in_node(0).value = np.array([1])
         node.in_node(1).value = np.array([1])
-        node.shrink_axis_mask = 1
+        node.shrink_axis_mask = [1]
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([])), 'Wrong output shape detected')
         self.assertTrue(np.array_equal(node.out_node().value, np.array(34)), 'Wrong output shape detected')
 
-    def test_slice_infer_14(self):  
+    def test_slice_infer_14(self):
         graph = self.build_test_graph2()
         node = Node(graph, 'sslice_1')
-        # node.in_node(0).value = np.array([1])
         node.in_node(3).value = np.array([-1])
-        node.end_mask=1
-        node.begin_mask=1
-        node.in_node(0).shape=[4]
-        tf_strided_slice_infer(node) 
+        node.end_mask = [0]
+        node.begin_mask = [0]
+        node.in_node(0).shape = [4]
+        tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([4])), 'Wrong output shape detected')
         print(node.out_node().value)
         self.assertTrue(np.array_equal(node.out_node().value, np.array([62, 34, 34, 1])), 'Wrong output shape detected')
@@ -342,8 +359,7 @@ class TestTFStridedSliceInfer(unittest.TestCase):
     def test_slice_infer_dim_beg(self):
         graph = self.build_test_graph_dim_beg()
         node = Node(graph, 'sslice_1')
-        # node.in_node(0).value = np.array([1])
-        node.shrink_axis_mask = 1
+        node.shrink_axis_mask = [1]
         tf_strided_slice_infer(node)
         self.assertTrue(np.array_equal(node.out_node().shape, np.array([4])), 'Wrong output shape detected')
         self.assertTrue(np.array_equal(node.out_node().value, np.array([1, 34, 34, 62])), 'Wrong output shape detected')
diff --git a/model-optimizer/mo/front/common/partial_infer/space_to_batch.py b/model-optimizer/mo/front/common/partial_infer/space_to_batch.py
index 39083eb2a..d1573641b 100644
--- a/model-optimizer/mo/front/common/partial_infer/space_to_batch.py
+++ b/model-optimizer/mo/front/common/partial_infer/space_to_batch.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/split.py b/model-optimizer/mo/front/common/partial_infer/split.py
index ff8abb8eb..033914783 100644
--- a/model-optimizer/mo/front/common/partial_infer/split.py
+++ b/model-optimizer/mo/front/common/partial_infer/split.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/split_test.py b/model-optimizer/mo/front/common/partial_infer/split_test.py
index a81b57ae5..60e399e7b 100644
--- a/model-optimizer/mo/front/common/partial_infer/split_test.py
+++ b/model-optimizer/mo/front/common/partial_infer/split_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/squeeze.py b/model-optimizer/mo/front/common/partial_infer/squeeze.py
index 574ba85c5..ffdfdc787 100644
--- a/model-optimizer/mo/front/common/partial_infer/squeeze.py
+++ b/model-optimizer/mo/front/common/partial_infer/squeeze.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/transpose.py b/model-optimizer/mo/front/common/partial_infer/transpose.py
index 6cef48d2f..b2bee1fd2 100644
--- a/model-optimizer/mo/front/common/partial_infer/transpose.py
+++ b/model-optimizer/mo/front/common/partial_infer/transpose.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/partial_infer/utils.py b/model-optimizer/mo/front/common/partial_infer/utils.py
index 0056a0a57..97ce37a80 100644
--- a/model-optimizer/mo/front/common/partial_infer/utils.py
+++ b/model-optimizer/mo/front/common/partial_infer/utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,8 +18,10 @@ import logging as log
 
 import numpy as np
 
+from typing import Iterable
 
-def int64_array(l: list):
+
+def int64_array(l: Iterable):
     return np.array(l, dtype=np.int64)
 
 
@@ -58,26 +60,31 @@ def convert_tf_padding_to_str(padding):
 
 
 # TODO eliminate this dependency and pass necessary function as an argument
-def tf_window_op_pad_infer(input, window, stride, auto_pad):
+def tf_window_op_pad_infer(input, window, stride, auto_pad, is_deconv=False):
     if input is None or window is None or stride is None or auto_pad is None:
         return (None, None)
+
+    normalized_stride = stride
+    if is_deconv:
+        normalized_stride = 1 / stride
+
     if auto_pad in ['same_lower', 'same_upper']:
         if auto_pad == 'same_upper':
-            output = np.int64(np.ceil(input / stride))
+            output = np.int64(np.ceil(input / normalized_stride))
         else:
-            output = np.int64(np.floor(input / stride))
+            output = np.int64(np.floor(input / normalized_stride))
         residual = input % stride
         mask = residual == 0
         full_pad = window.copy()
         full_pad[mask] -= stride[mask]
-        mask = np.logical_not(mask)
+        mask = np.logical_not(mask)  # pylint: disable=assignment-from-no-return
         full_pad[mask] -= input[mask] % stride[mask]
-        full_pad = np.maximum(full_pad, 0)
+        full_pad = np.maximum(full_pad, 0)  # pylint: disable=assignment-from-no-return
         low_pad = np.int64(full_pad / 2)
         high_pad = full_pad - low_pad
         pad = np.array([low_pad, high_pad]).transpose()
     elif auto_pad == 'valid':
-        output = np.int64(np.ceil((input - window + 1) / stride))
+        output = np.int64(np.ceil((input - window + 1) / normalized_stride))
         pad = np.zeros((len(output), 2), dtype=np.int64)
     else:
         log.error("Unsupported padding scheme: {}".format(auto_pad))
diff --git a/model-optimizer/mo/front/common/register_custom_ops.py b/model-optimizer/mo/front/common/register_custom_ops.py
index 1172bf303..fdaa392be 100644
--- a/model-optimizer/mo/front/common/register_custom_ops.py
+++ b/model-optimizer/mo/front/common/register_custom_ops.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/common/replacement.py b/model-optimizer/mo/front/common/replacement.py
index 6a2874d4a..6b8668956 100644
--- a/model-optimizer/mo/front/common/replacement.py
+++ b/model-optimizer/mo/front/common/replacement.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@ import logging as log
 import networkx as nx
 
 from mo.front.subgraph_matcher import SubgraphMatch
-from mo.graph.graph import Node, merge_edge_props, get_sorted_inputs
+from mo.graph.graph import Node, merge_edge_props, Graph
 from mo.middle.pattern_match import apply_pattern
 from mo.utils import class_registration
 from mo.utils.replacement_pattern import ReplacementPattern
@@ -28,6 +28,14 @@ class FrontReplacementPattern(ReplacementPattern):
     registered_ops = {}
     registered_cls = []
 
+    def run_after(self):
+        from extensions.front.pass_separator import FrontStart
+        return [FrontStart]
+
+    def run_before(self):
+        from extensions.front.pass_separator import FrontFinish
+        return [FrontFinish]
+
     def pattern(self):
         raise Exception('Function "pattern" must be overridden in the sub-class')
 
@@ -45,6 +53,14 @@ class FrontReplacementSubgraph(FrontReplacementPattern):
     """
     replacement_id = 'None'
 
+    def run_after(self):
+        from extensions.front.pass_separator import FrontStart
+        return [FrontStart]
+
+    def run_before(self):
+        from extensions.front.pass_separator import FrontFinish
+        return [FrontFinish]
+
     def __init__(self):
         pass
 
@@ -53,7 +69,7 @@ class FrontReplacementSubgraph(FrontReplacementPattern):
         return node_port if isinstance(node_port, tuple) else (node_port, 0)
 
     @staticmethod
-    def replace_input_edges(graph: nx.DiGraph, input_edges_match: dict):
+    def replace_input_edges(graph: Graph, input_edges_match: dict):
         """
         Replacing existing input/output edges with a new ones to a new sub-graph.
         :param graph: networkX graph to operate on.
@@ -64,14 +80,14 @@ class FrontReplacementSubgraph(FrontReplacementPattern):
             old_node_name, old_in_port = __class__.extract_port(old_name_port)
             new_node_name, new_in_port = __class__.extract_port(new_name_port)
             old_node = Node(graph, old_node_name)
-            src_node_name = get_sorted_inputs(old_node)[old_in_port][0]
+            src_node_name = old_node.get_sorted_inputs()[old_in_port][0]
             edge_attrs = graph[src_node_name][old_node_name][0].copy()
             edge_attrs['in'] = new_in_port
             graph.add_edge(src_node_name, new_node_name, **edge_attrs)
             log.debug("Created edge from {} to {} with attrs: {}".format(src_node_name, new_node_name, edge_attrs))
 
     @staticmethod
-    def replace_output_edges(graph: nx.DiGraph, output_edges_match: dict):
+    def replace_output_edges(graph: Graph, output_edges_match: dict):
         """
         Replacing existing input/output edges with a new ones to a new sub-graph.
         :param graph: networkX graph to operate on.
@@ -88,28 +104,28 @@ class FrontReplacementSubgraph(FrontReplacementPattern):
                     graph.add_edge(new_node_name, dst, **new_edge_attrs)
                     log.debug("Created edge from {} to {} with attrs: {}".format(new_node_name, dst, new_edge_attrs))
 
-    def input_edges_match(self, graph: nx.MultiDiGraph, match: object, new_sub_graph: dict):
+    def input_edges_match(self, graph: Graph, match: object, new_sub_graph: dict):
         """
         Default implementation doesn't add new input edges automatically.
         """
         return {}
 
-    def output_edges_match(self, graph: nx.MultiDiGraph, match: object, new_sub_graph: dict):
+    def output_edges_match(self, graph: Graph, match: object, new_sub_graph: dict):
         """
         Default implementation doesn't add new output edges automatically.
         """
         return {}
 
-    def generate_sub_graph(self, graph: nx.MultiDiGraph, match: object):
+    def generate_sub_graph(self, graph: Graph, match: object):
         raise Exception("The function 'generate_sub_graph' must be implemented in the sub-class.")
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: dict):
+    def nodes_to_remove(self, graph: Graph, match: dict):
         """
         Default implementation generates list of all matched nodes. So all matched nodes will be removed.
         """
         return [node.id for node in match.values()]
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: [dict, SubgraphMatch]):
+    def replace_sub_graph(self, graph: Graph, match: [dict, SubgraphMatch]):
         log.debug('replace_sub_graph: "{}" matched nodes: {}'.format(self.replacement_id,
                                                                      '\n'.join(sorted(match.matched_nodes_names()))))
         new_sub_graph = self.generate_sub_graph(graph, match)  # pylint: disable=assignment-from-no-return
@@ -121,7 +137,7 @@ class FrontReplacementSubgraph(FrontReplacementPattern):
             'replace_sub_graph: "{}" removing nodes: {}'.format(self.replacement_id, '\n'.join(sorted(remove_nodes))))
         graph.remove_nodes_from(remove_nodes)
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def find_and_replace_pattern(self, graph: Graph):
         apply_pattern(graph, action=self.replace_sub_graph, **self.pattern())
 
     registered_ops = {}
@@ -143,6 +159,14 @@ class FrontReplacementOp(FrontReplacementSubgraph):
     """
     op = 'UnknownOp'
 
+    def run_after(self):
+        from extensions.front.pass_separator import FrontStart
+        return [FrontStart]
+
+    def run_before(self):
+        from extensions.front.pass_separator import FrontFinish
+        return [FrontFinish]
+
     def pattern(self):
         return dict(
             nodes=[
@@ -150,7 +174,7 @@ class FrontReplacementOp(FrontReplacementSubgraph):
             edges=[]
         )
 
-    def replace_op(self, graph: nx.MultiDiGraph, node: Node):
+    def replace_op(self, graph: Graph, node: Node):
         raise Exception("The function 'replace_op' must be implemented in the sub-class.")
 
     @staticmethod
@@ -167,7 +191,7 @@ class FrontReplacementOp(FrontReplacementSubgraph):
         return out_edges_match_dict
 
     @staticmethod
-    def update_input_edges_attrs(graph: nx.MultiDiGraph, node: Node, added_nodes: list):
+    def update_input_edges_attrs(graph: Graph, node: Node, added_nodes: list):
         """
         Copy edge attributes from 'old' input edges of node 'node' to new input sub-graph edges.
         :param graph: graph to operate on
@@ -181,7 +205,7 @@ class FrontReplacementOp(FrontReplacementSubgraph):
                     if old_u == new_u and old_edge_attrs['out'] == new_edge_attrs['out']:
                         merge_edge_props(new_edge_attrs, old_edge_attrs)  # copy old edge attributes
 
-    def replace_sub_graph(self, graph: nx.MultiDiGraph, match: dict):
+    def replace_sub_graph(self, graph: Graph, match: dict):
         assert 'op' in match
         assert len(match) == 1
         node = match['op']
diff --git a/model-optimizer/mo/front/common/weights.py b/model-optimizer/mo/front/common/weights.py
index 84e06796b..486e8da4e 100644
--- a/model-optimizer/mo/front/common/weights.py
+++ b/model-optimizer/mo/front/common/weights.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/extractor.py b/model-optimizer/mo/front/extractor.py
index 6ba1ea4c4..0b682948f 100644
--- a/model-optimizer/mo/front/extractor.py
+++ b/model-optimizer/mo/front/extractor.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -22,8 +22,8 @@ import networkx as nx
 import numpy as np
 
 from mo.front.onnx.extractors.utils import get_backend_pad
-from mo.graph.graph import Node, unique_id, get_node_id_by_name
-from mo.middle.passes.eliminate import reverse_dfs, get_nodes_with_attributes
+from mo.graph.graph import Node, Graph, add_opoutput
+from mo.middle.passes.eliminate import reverse_dfs
 from mo.utils import class_registration
 from mo.utils.error import Error
 from mo.utils.graph import dfs
@@ -31,15 +31,14 @@ from mo.utils.unsupported_ops import UnsupportedOps
 from mo.utils.utils import refer_to_faq_msg
 
 
-def restore_edges(graph: nx.DiGraph, get_edges: callable):
+def restore_edges(graph: Graph, get_edges: callable):
     """
     Take a graph without edges and extract dependencies between nodes with the help of get_edges function.
     For a given node n the get_edges function returns a list of tuples (n1, n2, attrs), that is used to create
     n1 --> n2 edge with attributes attrs.
-    It is possible that two nodes n1 and n2 have more than one n1 --> n2 edges, so the resulting graph is
-    nx.MultiDiGraph.
+    It is possible that two nodes n1 and n2 have more than one n1 --> n2 edges, so the resulting graph is Graph.
     """
-    graph = nx.MultiDiGraph(graph)
+    graph = Graph(graph)
     for node in list(graph.nodes()):
         edges = get_edges(Node(graph, node))
         for u, v, d in edges:
@@ -56,7 +55,7 @@ def restore_edges(graph: nx.DiGraph, get_edges: callable):
     return graph
 
 
-def remove_control_dependency_inputs(graph: nx.MultiDiGraph):
+def remove_control_dependency_inputs(graph: Graph):
     """
     Delete control dependency inputs from pb all over the graph
     :param graph: graph to operate on 
@@ -473,6 +472,7 @@ def update_ie_fields(attrs: dict, ir_version = None):
     ir_version_mapping = {
         # Default behaviour is IR V3 attributes
         None: ir_v3_attrs,
+        5: ir_v3_attrs,
         4: ir_v3_attrs,
         3: ir_v3_attrs,
         2: ir_v2_attrs
@@ -484,7 +484,7 @@ def update_ie_fields(attrs: dict, ir_version = None):
     attrs.update(ir_version_mapping[ir_version])
 
 
-def create_tensor_nodes(graph: nx.MultiDiGraph):
+def create_tensor_nodes(graph: Graph):
     """
     Creates nodes between ops to represent intermediate data that flows from one op to another.
     For each edge with unique out attribute that goes from a given node,
@@ -528,7 +528,7 @@ def create_tensor_nodes(graph: nx.MultiDiGraph):
         node_name = str(smart_node.name) if smart_node.has_valid('name') else str(smart_node.id)
 
         # assign to each output port a tensor unique id in the graph
-        out_tensor_dict = {port: unique_id(graph, '{}/Output_{}/Data_'.format(node_name, port)) for port in out_ports}
+        out_tensor_dict = {port: graph.unique_id('{}/Output_{}/Data_'.format(node_name, port)) for port in out_ports}
 
         # add a new node with kind='data' per each tensor
         graph.add_nodes_from([(uid,
@@ -561,7 +561,7 @@ def create_tensor_nodes(graph: nx.MultiDiGraph):
         # data node content (numpy array). Shape is initialized by this array.
         if 'embedded_inputs' in node_attr:
             for port_index, value_attr, attrs in node_attr['embedded_inputs']:
-                input_node_id = unique_id(graph, 'embedded_input_')
+                input_node_id = graph.unique_id('embedded_input_')
                 value = node_attr[value_attr]
                 shape = np.array(value.shape, dtype=np.int64)
                 graph.add_node(input_node_id, **add_attrs_props(
@@ -569,6 +569,9 @@ def create_tensor_nodes(graph: nx.MultiDiGraph):
                 edge_attrs = {'in': port_index, 'name': value_attr}
                 edge_attrs.update(attrs)
                 graph.add_edge(input_node_id, node, **edge_attrs)
+                op_node = Node(graph, node)
+                if not op_node.has_port(port_type='in', idx=edge_attrs['in']):
+                    op_node.add_input_port(edge_attrs['in'])
                 del node_attr[value_attr]
     return graph
 
@@ -586,7 +589,7 @@ def get_specific_edge_attrs(attrs: dict, attrs_type: str, additional_attrs=None)
     return new_attrs
 
 
-def extract_node_attrs(graph: nx.MultiDiGraph, extractor: callable):
+def extract_node_attrs(graph: Graph, extractor: callable):
     """
     For each node produce new entries in a node attributes dictionary by existing attributes.
     Old attributes are not removed but merged with new ones.
@@ -652,7 +655,7 @@ def extract_port_from_string(node_name: str):
         return name, in_port, out_port
 
 
-def get_node_id_with_ports(graph: nx.MultiDiGraph, name: str):
+def get_node_id_with_ports(graph: Graph, name: str):
     """
     Extracts port and node ID out of user provided name
     :param graph: graph to operate on
@@ -660,7 +663,7 @@ def get_node_id_with_ports(graph: nx.MultiDiGraph, name: str):
     :return: node ID, direction of port ('in', 'out', 'port') and port number or None
     """
     node_name, in_port, out_port = extract_port_from_string(name)
-    node_id = get_node_id_by_name(graph, node_name)
+    node_id = graph.get_node_id_by_name(node_name)
     if in_port is not None:
         direction = 'in'
         port = in_port
@@ -673,7 +676,7 @@ def get_node_id_with_ports(graph: nx.MultiDiGraph, name: str):
     return node_id, direction, port
 
 
-def input_user_data_repack(graph: nx.MultiDiGraph, input_user_shapes: [None, list, dict, np.ndarray], freeze_placeholder: dict):
+def input_user_data_repack(graph: Graph, input_user_shapes: [None, list, dict, np.ndarray], freeze_placeholder: dict):
     """
     Restructures user input cutting request. Splits ports out of node names. Transforms node names to node ids.
     :param graph: graph to operate on
@@ -712,12 +715,12 @@ def input_user_data_repack(graph: nx.MultiDiGraph, input_user_shapes: [None, lis
     _freeze_placeholder = dict()
     # freeze placeholder restructure
     # Replaces placeholder name with placeholder id. Raises if there is no placeholder with such ID
-    placeholders_ids = get_nodes_with_attributes(graph, op='Placeholder')
+    placeholders_ids = graph.get_nodes_with_attributes(op='Placeholder')
     if freeze_placeholder is None:
         _freeze_placeholder = None
     else:
         for placeholder_name, value in freeze_placeholder.items():
-            placeholder_id = get_node_id_by_name(graph, placeholder_name)
+            placeholder_id = graph.get_node_id_by_name(placeholder_name)
             if placeholder_id not in placeholders_ids:
                 raise Error(
                     'There is no placeholder with name {}. Can not freeze it with value.'.format(placeholder_name))
@@ -761,7 +764,7 @@ def input_user_data_repack(graph: nx.MultiDiGraph, input_user_shapes: [None, lis
     return _input_shapes, _freeze_placeholder
 
 
-def output_user_data_repack(graph: nx.MultiDiGraph, outputs: list):
+def output_user_data_repack(graph: Graph, outputs: list):
     """
 
     :param graph: graph to operate on
@@ -795,7 +798,7 @@ def output_user_data_repack(graph: nx.MultiDiGraph, outputs: list):
     return _outputs
 
 
-def user_data_repack(graph: nx.MultiDiGraph, input_user_shapes: [None, list, dict, np.array], outputs: list,
+def user_data_repack(graph: Graph, input_user_shapes: [None, list, dict, np.array], outputs: list,
                      freeze_placeholder: dict):
     """
     :param graph: graph to operate on
@@ -809,41 +812,17 @@ def user_data_repack(graph: nx.MultiDiGraph, input_user_shapes: [None, list, dic
     return _input_shapes, _outputs, _freeze_placeholder
 
 
-def add_opoutput(graph: nx.MultiDiGraph, node_name: str, port: int, cut: bool = True):
-    """
-    Creates and connects OpOutput node to node_name port. Cuts existing port if requested.
-    :param graph: graph to operate with
-    :param node_name: name of existing node in the graph that we want to add OpOutput to
-    :param port: output port of node to connect OpOutput to
-    :param cut: determines way of operating with edge specified by node_name and port
-    """
-    # we import it here because Op imports add_attrs_props and update_ie_fields from this file
-    from mo.ops.output import Output
-    if cut and len(Node(graph, node_name).out_edges()) != 0:
-        opoutput_node = Output(graph).cut_edge_and_create_node(Node(graph, node_name), port,
-                                                               {'name': '{}/sink_port_{}'.format(node_name, port)})
-    else:
-        opoutput_node = Output(graph).create_node([(Node(graph, node_name), port)],
-                                                  {'name': '{}/sink_port_{}'.format(node_name, port)})
-        opoutput_node.in_edge()['data_attrs'] = ['fw_tensor_debug_info']
-        opoutput_node.in_edge()['fw_tensor_debug_info'] = [(node_name, port)]
-    log.debug('Sink: {} for node {}'.format(opoutput_node.id, node_name))
-    log.debug(str(graph.node[opoutput_node.id]))
-    log.debug("Add edge from {} to {}".format(node_name, opoutput_node.id))
-    return opoutput_node.id
-
-
-def add_output_ops(graph: nx.MultiDiGraph, user_defined_outputs: dict, inputs: dict = None):
+def add_output_ops(graph: Graph, user_defined_outputs: dict, inputs: dict = None):
     sinks = []
     # func sets all layers as outputs in case of empty user_defined_outputs list (it's impossible to reach by cli)
     assert not (isinstance(user_defined_outputs, list) and not len(user_defined_outputs))
 
     # remove previously generated OpOutput if any
     graph.remove_nodes_from([node_name for node_name in graph.nodes() if
-                             'type' in graph.node[node_name] and graph.node[node_name]['type'] == 'OpOutput'])
+                             'op' in graph.node[node_name] and graph.node[node_name]['op'] == 'OpOutput'])
 
     if user_defined_outputs is None:
-        inputs = get_nodes_with_attributes(graph, op='Placeholder') if inputs is None else list(inputs.keys())
+        inputs = graph.get_nodes_with_attributes(op='Placeholder') if inputs is None else list(inputs.keys())
         input_reachable, dead_outputs, undead_outputs = set(), [], []
         for input in inputs:
             dfs(graph=graph, node_name=input, visited=input_reachable)
@@ -885,12 +864,12 @@ def add_output_ops(graph: nx.MultiDiGraph, user_defined_outputs: dict, inputs: d
     return sinks
 
 
-def set_is_input(graph: nx.MultiDiGraph, placeholders: list, is_input: bool):
+def set_is_input(graph: Graph, placeholders: list, is_input: bool):
     for placeholder in placeholders:
         graph.node[placeholder]['is_input'] = is_input
 
 
-def check_input(graph: nx.MultiDiGraph, node_name: str):
+def check_input(graph: Graph, node_name: str):
     node = Node(graph, node_name)
     if node['kind'] == 'op' and node['op'] == 'Placeholder' and not len(graph.in_edges(node_name)) and not node[
         'is_input']:
@@ -914,7 +893,7 @@ def split_node_in_port(node_id: str):
     return node_id, None
 
 
-def add_input_op_input_port_without_data(graph: nx.MultiDiGraph, node_id: str, input_op, edge_attrs: dict):
+def add_input_op_input_port_without_data(graph: Graph, node_id: str, input_op, edge_attrs: dict):
     input_node = input_op.create_node()
     graph.add_edge(input_node.id, node_id, **edge_attrs)
     log.debug('Input: {} for node {}'.format(input_node.id, node_id))
@@ -922,7 +901,7 @@ def add_input_op_input_port_without_data(graph: nx.MultiDiGraph, node_id: str, i
     return input_node.id
 
 
-def add_input_op_input_port_with_data(graph: nx.MultiDiGraph, node_id: str, input_op, edge_attrs: dict):
+def add_input_op_input_port_with_data(graph: Graph, node_id: str, input_op, edge_attrs: dict):
     input_data_node = input_op.create_node_with_data()
     input_node = input_data_node.in_node()
     graph.add_edge(input_data_node.id, node_id, **edge_attrs)
@@ -933,7 +912,7 @@ def add_input_op_input_port_with_data(graph: nx.MultiDiGraph, node_id: str, inpu
     return input_node.id
 
 
-def add_input_op_output_port_without_data(graph: nx.MultiDiGraph, node_id: str, input_op, port: int):
+def add_input_op_output_port_without_data(graph: Graph, node_id: str, input_op, port: int):
     input_node = input_op.create_node()
     # In this case it can be more than one out edge from one port and we should iterate over all output edges
     for _, out_node, attrs in graph.out_edges(node_id, data=True):
@@ -947,7 +926,7 @@ def add_input_op_output_port_without_data(graph: nx.MultiDiGraph, node_id: str,
     return input_node.id
 
 
-def add_input_op_output_port_with_data(graph: nx.MultiDiGraph, node_id: str, input_op, port: int):
+def add_input_op_output_port_with_data(graph: Graph, node_id: str, input_op, port: int):
     # we assume that after op always data node
     data_node = Node(graph, node_id).out_node(port)
     assert data_node.has_valid('kind') and data_node.kind == 'data'
@@ -959,7 +938,7 @@ def add_input_op_output_port_with_data(graph: nx.MultiDiGraph, node_id: str, inp
     return input_node.id
 
 
-def add_input_op(graph: nx.MultiDiGraph, node_id: str, port: int = 0, data: bool = False, shape=None,
+def add_input_op(graph: Graph, node_id: str, port: int = 0, data: bool = False, shape=None,
                  is_out_port: bool = False):
     """
     This function adds Input node to node with id==node_id to specified port (in or out defined with is_out_port).
@@ -996,7 +975,7 @@ def add_input_op(graph: nx.MultiDiGraph, node_id: str, port: int = 0, data: bool
     return new_input_id
 
 
-def add_input_ops_helper_before_infer_input_port(graph: nx.MultiDiGraph, smart_node: Node, port: int, node_id: str,
+def add_input_ops_helper_before_infer_input_port(graph: Graph, smart_node: Node, port: int, node_id: str,
                                                  shape: np.array, inputs: list, edges_to_remove: list):
     n_inputs = len(smart_node.in_nodes())
     if n_inputs > 1 and port is None:
@@ -1010,7 +989,7 @@ def add_input_ops_helper_before_infer_input_port(graph: nx.MultiDiGraph, smart_n
                                shape=shape))
 
 
-def add_input_ops_helper_after_infer_input_port(graph: nx.MultiDiGraph, smart_node: Node, port:int, node_id: str,
+def add_input_ops_helper_after_infer_input_port(graph: Graph, smart_node: Node, port:int, node_id: str,
                                                 inputs: list, edges_to_remove: list):
     n_inputs = len(smart_node.in_nodes())
     if n_inputs > 1 and port is not None and port != 0:
@@ -1029,7 +1008,7 @@ def add_input_ops_helper_after_infer_input_port(graph: nx.MultiDiGraph, smart_no
     edges_to_remove.append((in_node.id, node_id))
 
 
-def add_input_ops_helper_before_infer_output_port(graph: nx.MultiDiGraph, port:int, node_id: str,
+def add_input_ops_helper_before_infer_output_port(graph: Graph, port:int, node_id: str,
                                                  shape: np.array, inputs: list, edges_to_remove: list):
     for u, v, edge_attrs in graph.out_edges(node_id, data=True):
         if edge_attrs['out'] == port:
@@ -1037,7 +1016,7 @@ def add_input_ops_helper_before_infer_output_port(graph: nx.MultiDiGraph, port:i
     inputs.append(add_input_op(graph=graph, node_id=node_id, port=port, data=False,
                                shape=shape, is_out_port=True))
 
-def add_input_ops_helper_after_infer_output_port(graph: nx.MultiDiGraph, smart_node: Node, port:int, node_id: str,
+def add_input_ops_helper_after_infer_output_port(graph: Graph, smart_node: Node, port:int, node_id: str,
                                                  inputs: list, edges_to_remove: list):
     out_node = smart_node.out_node(port)
     shape = out_node['shape'] if 'shape' in out_node else None
@@ -1049,7 +1028,7 @@ def add_input_ops_helper_after_infer_output_port(graph: nx.MultiDiGraph, smart_n
     edges_to_remove.append((node_id, out_node.id))
 
 
-def add_input_ops(graph: nx.MultiDiGraph, user_defined_inputs: dict, before_infer: bool):
+def add_input_ops(graph: Graph, user_defined_inputs: dict, before_infer: bool):
     """
     This function add user defined input operations.
     For cutting without port:
@@ -1067,9 +1046,9 @@ def add_input_ops(graph: nx.MultiDiGraph, user_defined_inputs: dict, before_infe
     For case with before_infer=False data nodes are added to this schemes.
     """
     inputs = []
-    set_is_input(graph, get_nodes_with_attributes(graph, op='Placeholder'), False)
+    set_is_input(graph, graph.get_nodes_with_attributes(op='Placeholder'), False)
     if user_defined_inputs is None:
-        inputs = get_nodes_with_attributes(graph, op='Placeholder')
+        inputs = graph.get_nodes_with_attributes(op='Placeholder')
     else:
         # cutting the net by inputs
         assert isinstance(user_defined_inputs, dict)
@@ -1137,7 +1116,7 @@ def add_input_ops(graph: nx.MultiDiGraph, user_defined_inputs: dict, before_infe
     if len(inputs):
         set_is_input(graph, inputs, True)
         # Check if there are inputs that are not listed in user_defined_inputs and are needed to calculate outputs
-        outputs = get_nodes_with_attributes(graph, is_output=True)
+        outputs = graph.get_nodes_with_attributes(op='OpOutput')
         visited = set()
         for output_name in outputs:
             reverse_dfs(graph, output_name, check_input, visited)
@@ -1145,13 +1124,12 @@ def add_input_ops(graph: nx.MultiDiGraph, user_defined_inputs: dict, before_infe
     return inputs
 
 
-def remove_output_ops(graph: nx.MultiDiGraph):
+def remove_output_ops(graph: Graph):
     for node in list(graph.nodes()):
         node = Node(graph, node)
         if node.has_valid('op') and node.op == 'OpOutput':
             if len(node.in_nodes()) > 0:
                 assert (len(node.in_nodes()) == 1)
-                list(node.in_nodes().values())[0]['is_output'] = node.is_output
             graph.remove_node(node.id)
 
 
diff --git a/model-optimizer/mo/front/extractor_test.py b/model-optimizer/mo/front/extractor_test.py
index 5fcb5eb70..1d6840a0c 100644
--- a/model-optimizer/mo/front/extractor_test.py
+++ b/model-optimizer/mo/front/extractor_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -37,7 +37,8 @@ class FakePythonParam:
 
 nodes_attributes = {'input': {'kind': 'data'},
                     'pool_1': {'type': 'Pooling', 'kind': 'op'},
-                    'output': {'kind': 'data'}
+                    'output': {'kind': 'data'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput'},
                     }
 
 
@@ -60,10 +61,12 @@ class TestExtractor(unittest.TestCase):
         }
         graph = build_graph(nodes_attributes,
                             [('input', 'pool_1'),
-                             ('pool_1', 'output')],
+                             ('pool_1', 'output'),
+                             ('output', 'op_output')
+                             ],
                             {'input': {'shape': input_shape},
                              'pool_1': {**params, 'spatial_dims': [2, 3]},
-                             'output': {'is_output': True, 'shape': None}})
+                             'output': {'shape': None}})
         pool_1_node = Node(graph, 'pool_1')
         for param in params.keys():
             if type(params[param]) is np.ndarray:
@@ -89,10 +92,12 @@ class TestExtractor(unittest.TestCase):
         }
         graph = build_graph(nodes,
                             [('input', 'reshape'),
-                             ('reshape', 'output')],
+                             ('reshape', 'output'),
+                             ('output', 'op_output')
+                             ],
                             {'input': {'shape': input_shape},
                              'reshape': {**params, 'spatial_dims': [2, 3]},
-                             'output': {'is_output': True, 'shape': None}})
+                             'output': {'shape': None}})
         pool_1_node = Node(graph, 'reshape')
         for param in params.keys():
             if type(params[param]) is list:
@@ -244,8 +249,9 @@ class TestInputAddition(unittest.TestCase):
             'conv_1_data': {'kind': 'data', 'value': True, 'shape': np.array([-1, 224, 224, 3])},
             'relu_1': {'type': 'ReLU', 'kind': 'op', 'op': 'NotPlaceholder'},
             'relu_1_data': {'kind': 'data', 'value': None, 'shape': np.array([-1, 112, 112, 64])},
-            'output': {'type': 'SoftMax', 'kind': 'op', 'op': 'NotPlaceholder', 'is_output': True},
-            'output_data': {'name': 'output_data', 'kind': 'data', 'shape': np.array([-1, 112, 112, 64])}
+            'output': {'type': 'SoftMax', 'kind': 'op', 'op': 'NotPlaceholder'},
+            'output_data': {'name': 'output_data', 'kind': 'data', 'shape': np.array([-1, 112, 112, 64])},
+            'op_output': {'kind': 'op', 'op': 'OpOutput'}
         }
         edges = [
             ('old_input', 'old_input_data'),
@@ -254,7 +260,8 @@ class TestInputAddition(unittest.TestCase):
             ('conv_1_data', 'relu_1'),
             ('relu_1', 'relu_1_data'),
             ('relu_1_data', 'output'),
-            ('output', 'output_data')
+            ('output', 'output_data'),
+            ('output_data', 'op_output')
         ]
         graph = build_graph(nodes, edges)
         add_input_ops(graph=graph, user_defined_inputs=inputs, before_infer=False)
@@ -277,7 +284,7 @@ class TestInputAddition(unittest.TestCase):
             'node_2': {'type': 'Identity', 'kind': 'op', 'op': 'NotPlaceholder'},
             'node_3': {'type': 'Identity', 'kind': 'op', 'op': 'NotPlaceholder'},
             'node_4': {'type': 'Identity', 'kind': 'op', 'op': 'NotPlaceholder'},
-            'output': {'type': 'Identity', 'kind': 'op', 'op': 'OpOutput', 'is_output': True}
+            'output': {'kind': 'op', 'op': 'OpOutput'}
         }
         edges = [
             ('input_1', 'node_1'),
@@ -309,7 +316,7 @@ class TestInputAddition(unittest.TestCase):
             'node_2': {'type': 'Identity', 'kind': 'op', 'op': 'NotPlaceholder'},
             'node_3': {'type': 'Identity', 'kind': 'op', 'op': 'NotPlaceholder'},
             'node_4': {'type': 'Identity', 'kind': 'op', 'op': 'NotPlaceholder'},
-            'output': {'type': 'Identity', 'kind': 'op', 'op': 'OpOutput', 'is_output': True},
+            'output': { 'kind': 'op', 'op': 'OpOutput'},
             'input_3': {'type': 'Identity', 'kind': 'op', 'op': 'Placeholder'}
         }
         edges = [
diff --git a/model-optimizer/mo/front/kaldi/extractor.py b/model-optimizer/mo/front/kaldi/extractor.py
index f0e3b3b11..d97001969 100644
--- a/model-optimizer/mo/front/kaldi/extractor.py
+++ b/model-optimizer/mo/front/kaldi/extractor.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/add_shift_ext.py b/model-optimizer/mo/front/kaldi/extractors/add_shift_ext.py
index ff5dff90b..6c9d56623 100644
--- a/model-optimizer/mo/front/kaldi/extractors/add_shift_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/add_shift_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/add_shift_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/add_shift_ext_test.py
index 08703d2a9..0b5f46af4 100644
--- a/model-optimizer/mo/front/kaldi/extractors/add_shift_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/add_shift_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/affine_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/affine_component_ext.py
index 7900639b2..347b4fefd 100644
--- a/model-optimizer/mo/front/kaldi/extractors/affine_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/affine_component_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/affine_component_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/affine_component_ext_test.py
index 14b083b7f..691525aca 100644
--- a/model-optimizer/mo/front/kaldi/extractors/affine_component_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/affine_component_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/affine_component_preconditioned_online_ext.py b/model-optimizer/mo/front/kaldi/extractors/affine_component_preconditioned_online_ext.py
index 70a8c41f9..7aa11d133 100644
--- a/model-optimizer/mo/front/kaldi/extractors/affine_component_preconditioned_online_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/affine_component_preconditioned_online_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/affine_transform_ext.py b/model-optimizer/mo/front/kaldi/extractors/affine_transform_ext.py
index 8175fb108..cb807a735 100644
--- a/model-optimizer/mo/front/kaldi/extractors/affine_transform_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/affine_transform_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/affine_transform_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/affine_transform_ext_test.py
index 7b9f41c37..6a4925e03 100644
--- a/model-optimizer/mo/front/kaldi/extractors/affine_transform_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/affine_transform_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/common_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/common_ext_test.py
index e9cdb98c9..24e907724 100644
--- a/model-optimizer/mo/front/kaldi/extractors/common_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/common_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -21,12 +21,12 @@ import numpy as np
 
 from mo.front.common.partial_infer.utils import int64_array
 from mo.front.kaldi.loader.utils_test import TestKaldiUtilsLoading
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.utils.unittest.graph import build_graph
 
 
 class KaldiFrontExtractorTest(unittest.TestCase):
-    graph = nx.MultiDiGraph()
+    graph = Graph()
 
     @classmethod
     def setUp(cls):
diff --git a/model-optimizer/mo/front/kaldi/extractors/concat_ext.py b/model-optimizer/mo/front/kaldi/extractors/concat_ext.py
index 9299c7c2d..aa339cb3e 100644
--- a/model-optimizer/mo/front/kaldi/extractors/concat_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/concat_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/concat_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/concat_ext_test.py
index b2274badf..b0f05cbe6 100644
--- a/model-optimizer/mo/front/kaldi/extractors/concat_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/concat_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/convolutional_1d_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/convolutional_1d_component_ext.py
index d77eeb33e..fa46c9750 100644
--- a/model-optimizer/mo/front/kaldi/extractors/convolutional_1d_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/convolutional_1d_component_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/convolutional_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/convolutional_component_ext.py
index 21a1e3350..af9fa9107 100644
--- a/model-optimizer/mo/front/kaldi/extractors/convolutional_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/convolutional_component_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/convolutional_component_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/convolutional_component_ext_test.py
index 50fef84f8..b030422ef 100644
--- a/model-optimizer/mo/front/kaldi/extractors/convolutional_component_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/convolutional_component_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/copy_ext.py b/model-optimizer/mo/front/kaldi/extractors/copy_ext.py
index 3348ef14c..6237e9acf 100644
--- a/model-optimizer/mo/front/kaldi/extractors/copy_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/copy_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/fixed_affine_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/fixed_affine_component_ext.py
index eee267f16..799971b32 100644
--- a/model-optimizer/mo/front/kaldi/extractors/fixed_affine_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/fixed_affine_component_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/fixed_affine_component_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/fixed_affine_component_ext_test.py
index e03f6982d..731c436ac 100644
--- a/model-optimizer/mo/front/kaldi/extractors/fixed_affine_component_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/fixed_affine_component_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/lstm_projected_streams_ext.py b/model-optimizer/mo/front/kaldi/extractors/lstm_projected_streams_ext.py
index 09e8061d3..a18c384e9 100644
--- a/model-optimizer/mo/front/kaldi/extractors/lstm_projected_streams_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/lstm_projected_streams_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext.py b/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext.py
index 0e38dd33e..a1c8cf979 100644
--- a/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext_test.py
index b3e7ad161..4b6838788 100644
--- a/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/max_pooling_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext.py
index 4d1e9e930..c5b397cb6 100644
--- a/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/normalize_component_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -35,8 +35,8 @@ class NormalizeComponentFrontExtractor(FrontExtractorOp):
         d_scaled = dim * target_rms ** 2
         in_norm = np.zeros([dim], np.float64)
         in_norm += 1.0 / d_scaled
-        in_norm = np.maximum(in_norm, 2. ** (-66))
-        in_norm = np.power(in_norm, -0.5)
+        in_norm = np.maximum(in_norm, 2. ** (-66))  # pylint: disable=assignment-from-no-return
+        in_norm = np.power(in_norm, -0.5)  # pylint: disable=assignment-from-no-return
         attrs = {}
         embed_input(attrs, 1, 'weights', in_norm)
         ScaleShiftOp.update_node_stat(node, attrs)
diff --git a/model-optimizer/mo/front/kaldi/extractors/rectified_linear_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/rectified_linear_component_ext.py
index 713db4beb..4b09cd332 100644
--- a/model-optimizer/mo/front/kaldi/extractors/rectified_linear_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/rectified_linear_component_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/rescale_ext.py b/model-optimizer/mo/front/kaldi/extractors/rescale_ext.py
index 459e5582e..ff2c57dc1 100644
--- a/model-optimizer/mo/front/kaldi/extractors/rescale_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/rescale_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/rescale_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/rescale_ext_test.py
index b7628bb24..c0a160f1e 100644
--- a/model-optimizer/mo/front/kaldi/extractors/rescale_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/rescale_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/sigmoid_ext.py b/model-optimizer/mo/front/kaldi/extractors/sigmoid_ext.py
index a68ad4f3f..36bd4b3d6 100644
--- a/model-optimizer/mo/front/kaldi/extractors/sigmoid_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/sigmoid_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/sigmoid_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/sigmoid_ext_test.py
index 521ac062f..638ed6e3e 100644
--- a/model-optimizer/mo/front/kaldi/extractors/sigmoid_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/sigmoid_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/slice_ext.py b/model-optimizer/mo/front/kaldi/extractors/slice_ext.py
index 4235c0dbc..379571ca2 100644
--- a/model-optimizer/mo/front/kaldi/extractors/slice_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/slice_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/slice_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/slice_ext_test.py
index 0c2a16c3c..47ae3ed2c 100644
--- a/model-optimizer/mo/front/kaldi/extractors/slice_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/slice_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/softmax_ext.py b/model-optimizer/mo/front/kaldi/extractors/softmax_ext.py
index da9f0a1eb..1dee8685c 100644
--- a/model-optimizer/mo/front/kaldi/extractors/softmax_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/softmax_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/splice_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/splice_component_ext.py
index 47cbc23f6..da3991438 100644
--- a/model-optimizer/mo/front/kaldi/extractors/splice_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/splice_component_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/tanh_component_ext.py b/model-optimizer/mo/front/kaldi/extractors/tanh_component_ext.py
index e67f9c410..e75ed7732 100644
--- a/model-optimizer/mo/front/kaldi/extractors/tanh_component_ext.py
+++ b/model-optimizer/mo/front/kaldi/extractors/tanh_component_ext.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/extractors/tanh_ext_test.py b/model-optimizer/mo/front/kaldi/extractors/tanh_ext_test.py
index 4604022b8..3fb0daf75 100644
--- a/model-optimizer/mo/front/kaldi/extractors/tanh_ext_test.py
+++ b/model-optimizer/mo/front/kaldi/extractors/tanh_ext_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/loader/loader.py b/model-optimizer/mo/front/kaldi/loader/loader.py
index 8bf9085a1..9f0bdf37c 100644
--- a/model-optimizer/mo/front/kaldi/loader/loader.py
+++ b/model-optimizer/mo/front/kaldi/loader/loader.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,7 +25,7 @@ import logging as log
 from mo.front.kaldi.loader.utils import find_next_tag, read_placeholder, find_next_component, get_name_from_path, \
     find_end_of_component, end_of_nnet_tag, read_binary_integer32_token, get_parameters, read_token_value, collect_until_token, \
     create_edge_attrs
-from mo.graph.graph import unique_id, Node
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 from mo.utils.utils import refer_to_faq_msg
 
@@ -39,7 +39,7 @@ def read_counts_file(file_path):
 
     counts_line = file_content[0].strip().replace('[', '').replace(']', '')
     try:
-        counts = np.fromstring(counts_line, dtype=int, sep=' ')
+        counts = np.fromstring(counts_line, dtype=float, sep=' ')
     except TypeError:
         raise Error('Expect counts file to contain list of integers.' +
                     refer_to_faq_msg(90))
@@ -47,12 +47,12 @@ def read_counts_file(file_path):
     cutoff_idxs = np.where(counts < cutoff)
     counts[cutoff_idxs] = cutoff
     scale = 1.0 / np.sum(counts)
-    counts = np.log(counts * scale)
+    counts = np.log(counts * scale)  # pylint: disable=assignment-from-no-return
     counts[cutoff_idxs] += np.finfo(np.float32).max / 2
     return counts
 
 
-def load_parallel_component(file_descr, graph: nx.MultiDiGraph, prev_layer_id):
+def load_parallel_component(file_descr, graph: Graph, prev_layer_id):
     """
     Load ParallelComponent of the Kaldi model.
     ParallelComponent contains parallel nested networks.
@@ -67,7 +67,7 @@ def load_parallel_component(file_descr, graph: nx.MultiDiGraph, prev_layer_id):
     nnet_count = read_token_value(file_descr, b'<NestedNnetCount>')
     log.debug('Model contains parallel component with {} nested networks'.format(nnet_count))
 
-    slice_id = unique_id(graph, prefix='Slice')
+    slice_id = graph.unique_id(prefix='Slice')
     graph.add_node(slice_id, parameters=None, op='slice', kind='op')
 
     slice_node = Node(graph, slice_id)
@@ -84,7 +84,7 @@ def load_parallel_component(file_descr, graph: nx.MultiDiGraph, prev_layer_id):
         if i != nnet_count - 1:
             slices_points.append(shape[1])
         g.remove_node(input_nodes[0][0])
-        mapping = {node: unique_id(graph, node) for node in g.nodes(data=False) if node in graph}
+        mapping = {node: graph.unique_id(node) for node in g.nodes(data=False) if node in graph}
         g = nx.relabel_nodes(g, mapping)
         for val in mapping.values():
             g.node[val]['name'] = val
@@ -99,7 +99,7 @@ def load_parallel_component(file_descr, graph: nx.MultiDiGraph, prev_layer_id):
     for i in slices_points:
         packed_sp += struct.pack("I", i)
     slice_node.parameters = io.BytesIO(packed_sp)
-    concat_id = unique_id(graph, prefix='Concat')
+    concat_id = graph.unique_id(prefix='Concat')
     graph.add_node(concat_id, parameters=None, op='concat', kind='op')
     for i, output in enumerate(outputs):
         edge_attrs = create_edge_attrs(output, concat_id)
@@ -113,7 +113,6 @@ def load_kaldi_model(nnet_path):
     Structure of the file is the following:
     magic-number(16896)<Nnet> <Next Layer Name> weights etc.
     :param nnet_path:
-    :param check_sum:
     :return:
     """
     nnet_name = None
@@ -140,7 +139,7 @@ def load_kaldi_model(nnet_path):
 
 
 def load_kalid_nnet1_model(file_descr, name):
-    graph = nx.MultiDiGraph(name=name)
+    graph = Graph(name=name)
 
     prev_layer_id = 'Input'
     graph.add_node(prev_layer_id, name=prev_layer_id, kind='op', op='Input', parameters=None)
@@ -161,7 +160,7 @@ def load_kalid_nnet1_model(file_descr, name):
         start_index = file_descr.tell()
         end_tag, end_index = find_end_of_component(file_descr, component_type)
         end_index -= len(end_tag)
-        layer_id = unique_id(graph, prefix=component_type)
+        layer_id = graph.unique_id(prefix=component_type)
         graph.add_node(layer_id,
                        parameters=get_parameters(file_descr, start_index, end_index),
                        op=component_type,
@@ -180,8 +179,9 @@ def load_kalid_nnet1_model(file_descr, name):
 
 
 def load_kalid_nnet2_model(file_descr, nnet_name):
-    graph = nx.MultiDiGraph(name=nnet_name)
+    graph = Graph(name=nnet_name)
     input_name = 'Input'
+    input_shape = np.array([])
     graph.add_node(input_name, name=input_name, kind='op', op='Input', parameters=None, shape=None)
 
     prev_layer_id = input_name
@@ -197,7 +197,7 @@ def load_kalid_nnet2_model(file_descr, nnet_name):
             break
         start_index = file_descr.tell()
         end_tag, end_index = find_end_of_component(file_descr, component_type)
-        layer_id = unique_id(graph, prefix=component_type)
+        layer_id = graph.unique_id(prefix=component_type)
         graph.add_node(layer_id,
                        parameters=get_parameters(file_descr, start_index, end_index),
                        op=component_type,
diff --git a/model-optimizer/mo/front/kaldi/loader/utils.py b/model-optimizer/mo/front/kaldi/loader/utils.py
index 4dbba940d..55f46a4c7 100644
--- a/model-optimizer/mo/front/kaldi/loader/utils.py
+++ b/model-optimizer/mo/front/kaldi/loader/utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/loader/utils_test.py b/model-optimizer/mo/front/kaldi/loader/utils_test.py
index ba5b06bb6..b026069d4 100644
--- a/model-optimizer/mo/front/kaldi/loader/utils_test.py
+++ b/model-optimizer/mo/front/kaldi/loader/utils_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/kaldi/register_custom_ops.py b/model-optimizer/mo/front/kaldi/register_custom_ops.py
index 237ee9121..719c6dfdc 100644
--- a/model-optimizer/mo/front/kaldi/register_custom_ops.py
+++ b/model-optimizer/mo/front/kaldi/register_custom_ops.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,14 +13,10 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-from mo.back.replacement import BackReplacementPattern
-from mo.front.common.replacement import FrontReplacementOp, FrontReplacementSubgraph
+from mo.front.common.replacement import FrontReplacementOp, FrontReplacementSubgraph, FrontReplacementPattern
 from mo.front.extractor import FrontExtractorOp
-from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
-from mo.utils import class_registration
 
 
-def update_registration():
-    class_registration.update_registration([Op, FrontExtractorOp, FrontReplacementOp, FrontReplacementSubgraph,
-                                            MiddleReplacementPattern, BackReplacementPattern])
+def get_front_classes():
+    front_classes = [FrontExtractorOp, FrontReplacementOp, FrontReplacementPattern, FrontReplacementSubgraph]
+    return front_classes
diff --git a/model-optimizer/mo/front/kaldi/utils.py b/model-optimizer/mo/front/kaldi/utils.py
index f29a643fd..76af016b8 100644
--- a/model-optimizer/mo/front/kaldi/utils.py
+++ b/model-optimizer/mo/front/kaldi/utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractor.py b/model-optimizer/mo/front/mxnet/extractor.py
index ad613f8b9..c6e2d0cb7 100644
--- a/model-optimizer/mo/front/mxnet/extractor.py
+++ b/model-optimizer/mo/front/mxnet/extractor.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/activation.py b/model-optimizer/mo/front/mxnet/extractors/activation.py
index 21b563566..fe23d753f 100644
--- a/model-optimizer/mo/front/mxnet/extractors/activation.py
+++ b/model-optimizer/mo/front/mxnet/extractors/activation.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/activation_test.py b/model-optimizer/mo/front/mxnet/extractors/activation_test.py
index d7e034ce8..eda8a0bee 100644
--- a/model-optimizer/mo/front/mxnet/extractors/activation_test.py
+++ b/model-optimizer/mo/front/mxnet/extractors/activation_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/add_n.py b/model-optimizer/mo/front/mxnet/extractors/add_n.py
index a1fe83c6e..b77705f54 100644
--- a/model-optimizer/mo/front/mxnet/extractors/add_n.py
+++ b/model-optimizer/mo/front/mxnet/extractors/add_n.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/batchnorm.py b/model-optimizer/mo/front/mxnet/extractors/batchnorm.py
index 0d8162509..5b3cc8f13 100644
--- a/model-optimizer/mo/front/mxnet/extractors/batchnorm.py
+++ b/model-optimizer/mo/front/mxnet/extractors/batchnorm.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/concat.py b/model-optimizer/mo/front/mxnet/extractors/concat.py
index 84c651f03..85c0c1478 100644
--- a/model-optimizer/mo/front/mxnet/extractors/concat.py
+++ b/model-optimizer/mo/front/mxnet/extractors/concat.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/crop.py b/model-optimizer/mo/front/mxnet/extractors/crop.py
index 28cb464aa..a5cf6d306 100644
--- a/model-optimizer/mo/front/mxnet/extractors/crop.py
+++ b/model-optimizer/mo/front/mxnet/extractors/crop.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/crop_test.py b/model-optimizer/mo/front/mxnet/extractors/crop_test.py
index 06b839c08..50fbb6c76 100644
--- a/model-optimizer/mo/front/mxnet/extractors/crop_test.py
+++ b/model-optimizer/mo/front/mxnet/extractors/crop_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/eltwise.py b/model-optimizer/mo/front/mxnet/extractors/eltwise.py
index 61f20658e..91c74b783 100644
--- a/model-optimizer/mo/front/mxnet/extractors/eltwise.py
+++ b/model-optimizer/mo/front/mxnet/extractors/eltwise.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/eltwise_test.py b/model-optimizer/mo/front/mxnet/extractors/eltwise_test.py
index 4d07e57d0..46d0f88ec 100644
--- a/model-optimizer/mo/front/mxnet/extractors/eltwise_test.py
+++ b/model-optimizer/mo/front/mxnet/extractors/eltwise_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/fully_connected.py b/model-optimizer/mo/front/mxnet/extractors/fully_connected.py
index 932299054..78dae8df1 100644
--- a/model-optimizer/mo/front/mxnet/extractors/fully_connected.py
+++ b/model-optimizer/mo/front/mxnet/extractors/fully_connected.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/l2_normalization.py b/model-optimizer/mo/front/mxnet/extractors/l2_normalization.py
index f73cf9e98..01662309b 100644
--- a/model-optimizer/mo/front/mxnet/extractors/l2_normalization.py
+++ b/model-optimizer/mo/front/mxnet/extractors/l2_normalization.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/leaky_relu.py b/model-optimizer/mo/front/mxnet/extractors/leaky_relu.py
index a20464392..9537bbbce 100644
--- a/model-optimizer/mo/front/mxnet/extractors/leaky_relu.py
+++ b/model-optimizer/mo/front/mxnet/extractors/leaky_relu.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/leaky_relu_test.py b/model-optimizer/mo/front/mxnet/extractors/leaky_relu_test.py
index f3fab2b18..7d660ea0f 100644
--- a/model-optimizer/mo/front/mxnet/extractors/leaky_relu_test.py
+++ b/model-optimizer/mo/front/mxnet/extractors/leaky_relu_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/lrn.py b/model-optimizer/mo/front/mxnet/extractors/lrn.py
index b6dbf343b..c313f9219 100644
--- a/model-optimizer/mo/front/mxnet/extractors/lrn.py
+++ b/model-optimizer/mo/front/mxnet/extractors/lrn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/multibox_detection.py b/model-optimizer/mo/front/mxnet/extractors/multibox_detection.py
index 0e81c9725..6245904f2 100644
--- a/model-optimizer/mo/front/mxnet/extractors/multibox_detection.py
+++ b/model-optimizer/mo/front/mxnet/extractors/multibox_detection.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/multibox_detection_test.py b/model-optimizer/mo/front/mxnet/extractors/multibox_detection_test.py
index c6e4c0c07..5f1f1ab41 100644
--- a/model-optimizer/mo/front/mxnet/extractors/multibox_detection_test.py
+++ b/model-optimizer/mo/front/mxnet/extractors/multibox_detection_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/multibox_prior.py b/model-optimizer/mo/front/mxnet/extractors/multibox_prior.py
index 7284eb7f0..7e692776e 100644
--- a/model-optimizer/mo/front/mxnet/extractors/multibox_prior.py
+++ b/model-optimizer/mo/front/mxnet/extractors/multibox_prior.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/multibox_prior_test.py b/model-optimizer/mo/front/mxnet/extractors/multibox_prior_test.py
index cc2cc8f66..38501fd0e 100644
--- a/model-optimizer/mo/front/mxnet/extractors/multibox_prior_test.py
+++ b/model-optimizer/mo/front/mxnet/extractors/multibox_prior_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/null.py b/model-optimizer/mo/front/mxnet/extractors/null.py
index c53da6dfc..a49f69da0 100644
--- a/model-optimizer/mo/front/mxnet/extractors/null.py
+++ b/model-optimizer/mo/front/mxnet/extractors/null.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/relu.py b/model-optimizer/mo/front/mxnet/extractors/relu.py
index 41400c532..71693d695 100644
--- a/model-optimizer/mo/front/mxnet/extractors/relu.py
+++ b/model-optimizer/mo/front/mxnet/extractors/relu.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/relu_test.py b/model-optimizer/mo/front/mxnet/extractors/relu_test.py
index c045d869b..881a3099e 100644
--- a/model-optimizer/mo/front/mxnet/extractors/relu_test.py
+++ b/model-optimizer/mo/front/mxnet/extractors/relu_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/scaleshift.py b/model-optimizer/mo/front/mxnet/extractors/scaleshift.py
index 23d4b5d32..dbc89e0e1 100644
--- a/model-optimizer/mo/front/mxnet/extractors/scaleshift.py
+++ b/model-optimizer/mo/front/mxnet/extractors/scaleshift.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/sigmoid.py b/model-optimizer/mo/front/mxnet/extractors/sigmoid.py
index 79b0c67cd..834b0a533 100644
--- a/model-optimizer/mo/front/mxnet/extractors/sigmoid.py
+++ b/model-optimizer/mo/front/mxnet/extractors/sigmoid.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/sigmoid_test.py b/model-optimizer/mo/front/mxnet/extractors/sigmoid_test.py
index fcf5893a4..ba73f6c80 100644
--- a/model-optimizer/mo/front/mxnet/extractors/sigmoid_test.py
+++ b/model-optimizer/mo/front/mxnet/extractors/sigmoid_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/slice_axis.py b/model-optimizer/mo/front/mxnet/extractors/slice_axis.py
index 956c1775b..046c4105a 100644
--- a/model-optimizer/mo/front/mxnet/extractors/slice_axis.py
+++ b/model-optimizer/mo/front/mxnet/extractors/slice_axis.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/slice_axis_test.py b/model-optimizer/mo/front/mxnet/extractors/slice_axis_test.py
index 435044d10..246d88b21 100644
--- a/model-optimizer/mo/front/mxnet/extractors/slice_axis_test.py
+++ b/model-optimizer/mo/front/mxnet/extractors/slice_axis_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/transpose.py b/model-optimizer/mo/front/mxnet/extractors/transpose.py
index 985f40c67..d0d7b32b4 100644
--- a/model-optimizer/mo/front/mxnet/extractors/transpose.py
+++ b/model-optimizer/mo/front/mxnet/extractors/transpose.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/extractors/utils.py b/model-optimizer/mo/front/mxnet/extractors/utils.py
index 8c8d23df9..3358ccd41 100644
--- a/model-optimizer/mo/front/mxnet/extractors/utils.py
+++ b/model-optimizer/mo/front/mxnet/extractors/utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -87,10 +87,11 @@ class AttrDictionary(object):
 
     def val(self, key, valtype, default=None):
         attr = self.str(key, default)
+        attr = None if attr == 'None' else attr
         if valtype is None:
             return attr
         else:
-            if not isinstance(attr, valtype):
+            if not isinstance(attr, valtype) and attr is not None:
                 return valtype(attr)
             else:
                 return attr
@@ -178,3 +179,15 @@ def load_params(input_model, data_names = ('data',)):
     model_params._param_names = arg_keys
     model_params._aux_names = aux_keys
     return model_params
+
+
+def init_rnn_states(model_nodes):
+    states = {}
+    for i, node in enumerate(model_nodes):
+        if node['op'] == 'RNN':
+            for i in node['inputs'][2:]:
+                attrs = get_mxnet_layer_attrs(model_nodes[i[0]])
+                shape = attrs.tuple('__shape__', int, None)
+                if shape:
+                    states.update({model_nodes[i[0]]['name']: shape})
+    return states
+\ No newline at end of file
diff --git a/model-optimizer/mo/front/mxnet/extractors/utils_test.py b/model-optimizer/mo/front/mxnet/extractors/utils_test.py
index 070d53231..b52316219 100644
--- a/model-optimizer/mo/front/mxnet/extractors/utils_test.py
+++ b/model-optimizer/mo/front/mxnet/extractors/utils_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -169,6 +169,15 @@ class TestAttrDictionary(unittest.TestCase):
         self.assertEqual(2, l[1])
         self.assertEqual(3, l[2])
 
+    def testIntWithAttrNone(self):
+        attrs = {
+            "something": "None"
+        }
+
+        attr_dict = AttrDictionary(attrs)
+        attr = attr_dict.int("something", None)
+        self.assertEqual(None, attr)
+
 
 class TestUtils(unittest.TestCase):
     @patch('mxnet.nd.load')
diff --git a/model-optimizer/mo/front/mxnet/loader.py b/model-optimizer/mo/front/mxnet/loader.py
index 219abb152..4bf85baab 100644
--- a/model-optimizer/mo/front/mxnet/loader.py
+++ b/model-optimizer/mo/front/mxnet/loader.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,16 +17,14 @@
 import os
 import json
 
-import networkx as nx
 import numpy as np
 import mxnet as mx
 import logging as log
 
-from mo.front.mxnet.extractors.utils import get_mxnet_node_edges, load_params
+from mo.front.mxnet.extractors.utils import get_mxnet_node_edges, load_params, init_rnn_states
 from mo.front.mxnet.extractor import common_mxnet_fields
 from mo.front.mxnet.nd_to_params import build_params_file
-from mo.graph.graph import Node
-from mo.graph.graph import unique_id
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 from mo.utils.utils import refer_to_faq_msg
 
@@ -97,7 +95,10 @@ def symbol2nx(model_nodes, model_params, input_names: str = ''):
     else:
         input_names = input_names.split(',')
 
-    graph = nx.MultiDiGraph()
+    rnn_states = init_rnn_states(model_nodes)
+    names_rnn_states = list(rnn_states.keys())
+
+    graph = Graph()
     # as mxnet contain input layers as index of layer, for correct set up edges, we need provide index of layer with name of  graph node
     index_node_keys = {}
     for i, node in enumerate(model_nodes):
@@ -105,7 +106,9 @@ def symbol2nx(model_nodes, model_params, input_names: str = ''):
             node['value'] = np.array(model_params._arg_params[node['name']].asnumpy(), dtype=np.float32)
         elif node['name'] in model_params._aux_params and node['name'] not in input_names:
             node['value'] = np.array(model_params._aux_params[node['name']].asnumpy(), dtype=np.float32)
-        node_name = unique_id(graph, node['name'])
+        elif node['name'] in names_rnn_states:
+            node['value'] = np.zeros(rnn_states[node['name']])
+        node_name = graph.unique_id(node['name'])
         graph.add_node(node_name, **symbol_attrs(node))
         graph.node[node_name].update(common_mxnet_fields(Node(graph, node_name)))
         index_node_keys[i] = node_name
@@ -119,7 +122,7 @@ def symbol2nx(model_nodes, model_params, input_names: str = ''):
     return graph
 
 
-def find_output_node(graph: nx.MultiDiGraph, src_input_index):
+def find_output_node(graph: Graph, src_input_index):
     for i, attrs in (list(graph.nodes(data=True))[src_input_index + 1:]):
         for input_index in attrs['symbol_dict']['inputs']:
             if input_index[0] == src_input_index:
diff --git a/model-optimizer/mo/front/mxnet/loader_test.py b/model-optimizer/mo/front/mxnet/loader_test.py
index 2c77d7e95..cb52cb239 100644
--- a/model-optimizer/mo/front/mxnet/loader_test.py
+++ b/model-optimizer/mo/front/mxnet/loader_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/nd_to_params.py b/model-optimizer/mo/front/mxnet/nd_to_params.py
index e4a66ccb7..a0f1fdc0c 100644
--- a/model-optimizer/mo/front/mxnet/nd_to_params.py
+++ b/model-optimizer/mo/front/mxnet/nd_to_params.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/mxnet/register_custom_ops.py b/model-optimizer/mo/front/mxnet/register_custom_ops.py
index a6992227f..a07bf0ee0 100644
--- a/model-optimizer/mo/front/mxnet/register_custom_ops.py
+++ b/model-optimizer/mo/front/mxnet/register_custom_ops.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,13 +16,9 @@
 
 from mo.front.common.replacement import FrontReplacementOp, FrontReplacementSubgraph, FrontReplacementPattern
 from mo.front.extractor import FrontExtractorOp, MXNetCustomFrontExtractorOp
-from mo.ops.op import Op
-from mo.utils import class_registration
-from mo.middle.replacement import MiddleReplacementPattern
-from mo.back.replacement import BackReplacementPattern
 
 
-def update_registration():
-    class_registration.update_registration([Op, FrontExtractorOp, FrontReplacementOp, FrontReplacementSubgraph,
-                                            MXNetCustomFrontExtractorOp, MiddleReplacementPattern,
-                                            BackReplacementPattern, FrontReplacementPattern])
+def get_front_classes():
+    front_classes = [FrontExtractorOp, FrontReplacementOp, FrontReplacementSubgraph, MXNetCustomFrontExtractorOp,
+                     FrontReplacementPattern]
+    return front_classes
diff --git a/model-optimizer/mo/front/onnx/extractor.py b/model-optimizer/mo/front/onnx/extractor.py
index 76a666c92..00cefbee7 100644
--- a/model-optimizer/mo/front/onnx/extractor.py
+++ b/model-optimizer/mo/front/onnx/extractor.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@ import numpy as np
 from mo.front.onnx.extractors.concat import concat_ext
 from mo.front.onnx.extractors.const import onnx_const_ext
 from mo.front.onnx.extractors.constant import onnx_constant_ext
-from mo.front.onnx.extractors.dropout import dropout_ext
 from mo.front.onnx.extractors.eltwise import make_tf_eltwise
 from mo.front.onnx.extractors.fused_bn import tf_fused_bn_extractor
 from mo.front.onnx.extractors.matmul import onnx_gemm_ext
@@ -39,8 +38,7 @@ onnx_op_extractors = {
     'Concat': concat_ext,
     'Const': onnx_const_ext,
     'Constant': onnx_constant_ext,
-    'Identity': node_pb_arg(make_tf_eltwise(lambda v: v)),
-    'Dropout': dropout_ext,
+    'Identity': node_pb_arg(make_tf_eltwise(lambda v: v, attrs={'identity': True})),
     'Sum': node_pb_arg(
         make_tf_eltwise(lambda a, b: a + b, attrs={'type': 'Eltwise', 'operation': 'sum', 'can_be_bias': True})),
     'Relu': node_pb_arg(make_tf_eltwise(lambda v: np.maximum(0, v), attrs={'type': 'ReLU'})),  # 0 is an integer
diff --git a/model-optimizer/mo/front/onnx/extractors/concat.py b/model-optimizer/mo/front/onnx/extractors/concat.py
index c99aa4204..4cb510ee4 100644
--- a/model-optimizer/mo/front/onnx/extractors/concat.py
+++ b/model-optimizer/mo/front/onnx/extractors/concat.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/onnx/extractors/const.py b/model-optimizer/mo/front/onnx/extractors/const.py
index 2bfe163b1..254a84364 100644
--- a/model-optimizer/mo/front/onnx/extractors/const.py
+++ b/model-optimizer/mo/front/onnx/extractors/const.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/onnx/extractors/constant.py b/model-optimizer/mo/front/onnx/extractors/constant.py
index aa78db768..9339f01b9 100644
--- a/model-optimizer/mo/front/onnx/extractors/constant.py
+++ b/model-optimizer/mo/front/onnx/extractors/constant.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/onnx/extractors/constant_test.py b/model-optimizer/mo/front/onnx/extractors/constant_test.py
index 8204966ab..63990399a 100644
--- a/model-optimizer/mo/front/onnx/extractors/constant_test.py
+++ b/model-optimizer/mo/front/onnx/extractors/constant_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/onnx/extractors/dropout.py b/model-optimizer/mo/front/onnx/extractors/dropout.py
deleted file mode 100644
index dff586a09..000000000
--- a/model-optimizer/mo/front/onnx/extractors/dropout.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
- Copyright (c) 2018 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-from mo.front.onnx.extractors.utils import onnx_attr
-from mo.utils.error import Error
-
-def dropout_ext(node):
-    # some Dropout flavors doesn't have is_test attribute; when it is missing, interpret it as 1
-    is_test = onnx_attr(node, 'is_test', 'i', 1)
-    if len(node.out_nodes()) > 1:
-        raise Error('Dropout node {} has more than one consumer. Unsupported.', node.name)
-    if not is_test:
-        raise Error('Dropout node {} has is_test: 0. This means training mode which is not supported.', node.name)
-
-    return {
-        # redefine op to automatically remove a node in the next tranformations
-        'op': 'Identity',
-    }
-
diff --git a/model-optimizer/mo/front/onnx/extractors/eltwise.py b/model-optimizer/mo/front/onnx/extractors/eltwise.py
index 9a096a972..b33b87759 100644
--- a/model-optimizer/mo/front/onnx/extractors/eltwise.py
+++ b/model-optimizer/mo/front/onnx/extractors/eltwise.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/onnx/extractors/fused_bn.py b/model-optimizer/mo/front/onnx/extractors/fused_bn.py
index b167da6a4..73db9ca5d 100644
--- a/model-optimizer/mo/front/onnx/extractors/fused_bn.py
+++ b/model-optimizer/mo/front/onnx/extractors/fused_bn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/onnx/extractors/matmul.py b/model-optimizer/mo/front/onnx/extractors/matmul.py
index f04890f56..79a61ef46 100644
--- a/model-optimizer/mo/front/onnx/extractors/matmul.py
+++ b/model-optimizer/mo/front/onnx/extractors/matmul.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/onnx/extractors/placeholder.py b/model-optimizer/mo/front/onnx/extractors/placeholder.py
index 78a8e59ce..cd9294007 100644
--- a/model-optimizer/mo/front/onnx/extractors/placeholder.py
+++ b/model-optimizer/mo/front/onnx/extractors/placeholder.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/onnx/extractors/reshape.py b/model-optimizer/mo/front/onnx/extractors/reshape.py
index 1ef7995e8..19c13e037 100644
--- a/model-optimizer/mo/front/onnx/extractors/reshape.py
+++ b/model-optimizer/mo/front/onnx/extractors/reshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/onnx/extractors/utils.py b/model-optimizer/mo/front/onnx/extractors/utils.py
index da28d6462..9315f4a1e 100644
--- a/model-optimizer/mo/front/onnx/extractors/utils.py
+++ b/model-optimizer/mo/front/onnx/extractors/utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,6 +14,8 @@
  limitations under the License.
 """
 
+import numpy as np
+
 from mo.graph.graph import Node
 from mo.utils.error import Error
 
@@ -47,3 +49,25 @@ def get_onnx_autopad(auto_pad):
     if auto_pad == 'notset':
         auto_pad = None
     return auto_pad
+
+
+def get_onnx_datatype_as_numpy(value):
+    datatype_to_numpy = {
+                         1: np.float32,
+                         9: np.bool,
+                         11: np.double,
+                         10: np.float16,
+                         5: np.int16,
+                         6: np.int32,
+                         7: np.int64,
+                         3: np.int8,
+                         8: np.ubyte,
+                         4: np.uint16,
+                         12: np.uint32,
+                         13: np.uint64,
+                         2: np.uint8,
+                         }
+    try:
+        return datatype_to_numpy[value]
+    except KeyError:
+        raise Error("Incorrect value {} for Datatype enum".format(value))
diff --git a/model-optimizer/mo/front/onnx/loader.py b/model-optimizer/mo/front/onnx/loader.py
index 0da413f7c..d90f228d2 100644
--- a/model-optimizer/mo/front/onnx/loader.py
+++ b/model-optimizer/mo/front/onnx/loader.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@ import logging as log
 import networkx as nx
 import onnx
 
-from mo.graph.graph import create_graph_with_nodes, unique_id
+from mo.graph.graph import create_graph_with_nodes, Graph
 from mo.utils.error import Error, FrameworkError
 
 
@@ -64,7 +64,7 @@ def protobuf2nx(pb):
     # convert initializers to a NX graph for easier control of model consistency and to use it as a dictionary later
     initializers = create_graph_with_nodes(pb.graph.initializer, get_id=lambda pb: pb.name, get_attrs=protobuf_attrs)
 
-    graph = nx.MultiDiGraph()
+    graph = Graph()
 
     # maps a tensor name to a node produced it and the node port: str -> (node_id, node_port)
     data_nodes_map = {}
@@ -95,7 +95,7 @@ def protobuf2nx(pb):
     # important)
     for node in pb.graph.node:
         # create an NX node
-        id = unique_id(graph, node_id(node))
+        id = graph.unique_id(node_id(node))
         graph.add_node(id, pb=node, kind='op')
 
         # add incoming edges based on data_nodes_map
diff --git a/model-optimizer/mo/front/onnx/register_custom_ops.py b/model-optimizer/mo/front/onnx/register_custom_ops.py
index d3ec4eaaf..7ded9e1e0 100644
--- a/model-optimizer/mo/front/onnx/register_custom_ops.py
+++ b/model-optimizer/mo/front/onnx/register_custom_ops.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,14 +14,10 @@
  limitations under the License.
 """
 
-from mo.back.replacement import BackReplacementPattern
 from mo.front.common.replacement import FrontReplacementOp, FrontReplacementPattern, FrontReplacementSubgraph
 from mo.front.extractor import FrontExtractorOp
-from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
-from mo.utils import class_registration
 
 
-def update_registration():
-    class_registration.update_registration([Op, FrontExtractorOp, FrontReplacementOp, FrontReplacementPattern,
-                                            FrontReplacementSubgraph, MiddleReplacementPattern, BackReplacementPattern])
+def get_front_classes():
+    front_classes = [FrontExtractorOp, FrontReplacementOp, FrontReplacementPattern, FrontReplacementSubgraph]
+    return front_classes
diff --git a/model-optimizer/mo/front/subgraph_matcher.py b/model-optimizer/mo/front/subgraph_matcher.py
index 410e2fef0..6149098bf 100644
--- a/model-optimizer/mo/front/subgraph_matcher.py
+++ b/model-optimizer/mo/front/subgraph_matcher.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,9 +16,7 @@
 import logging as log
 import re
 
-import networkx as nx
-
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.utils.custom_replacement_config import CustomReplacementDescriptor
 from mo.utils.error import Error
 from mo.utils.graph import nodes_matching_name_pattern, sub_graph_between_nodes
@@ -40,7 +38,7 @@ class SubgraphMatch(object):
     Class providing information about matched sub-graph.
     """
 
-    def __init__(self, graph: nx.DiGraph, replacement_desc: CustomReplacementDescriptor, matched_nodes: list,
+    def __init__(self, graph: Graph, replacement_desc: CustomReplacementDescriptor, matched_nodes: list,
                  inputs_order: list, outputs_order: list, prefix: str):
         """
         Creates instance of a SubgraphMatch class from the provided configuration.
@@ -164,7 +162,7 @@ class SubgraphMatcher(object):
     def __init__(self, replacement_descriptor: CustomReplacementDescriptor):
         self.replacement_desc = replacement_descriptor
 
-    def _match_sub_graph_for_scope(self, graph: nx.MultiDiGraph, scope_pattern: str):
+    def _match_sub_graph_for_scope(self, graph: Graph, scope_pattern: str):
         """
         :param graph: networkx graph to find sub-graph in.
         :param scope_pattern: regular expression specifying sub-graph scope.
@@ -187,7 +185,7 @@ class SubgraphMatcher(object):
 
         return SubgraphMatch(graph, self.replacement_desc, matched_nodes, inputs_order, outputs_order, scope_pattern)
 
-    def _match_sub_graph_for_points(self, graph: nx.MultiDiGraph):
+    def _match_sub_graph_for_points(self, graph: Graph):
         """
         :param graph: networkx graph to find sub-graph in.
         :return: an object describing matched sub-graph.
@@ -206,7 +204,7 @@ class SubgraphMatcher(object):
                              self.replacement_desc.get_inputs_description(),
                              self.replacement_desc.get_outputs_description(), '')
 
-    def matched_sub_graph_instances(self, graph: nx.MultiDiGraph):
+    def matched_sub_graph_instances(self, graph: Graph):
         """
         Generator to product all instances of matched sub-graphs.
         :param graph: graph to find instances in.
diff --git a/model-optimizer/mo/front/tf/change_placeholder_type.py b/model-optimizer/mo/front/tf/change_placeholder_type.py
deleted file mode 100644
index 8c35bc3a5..000000000
--- a/model-optimizer/mo/front/tf/change_placeholder_type.py
+++ /dev/null
@@ -1,80 +0,0 @@
-"""
- Copyright (c) 2018 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import logging as log
-
-import networkx as nx
-from tensorflow.core.framework import types_pb2 as tf_types  # pylint: disable=no-name-in-module
-
-from mo.graph.graph import Node
-from mo.middle.passes.fusing.helpers import get_next_operation
-from mo.utils.error import Error
-from mo.utils.utils import refer_to_faq_msg
-
-
-def change_placeholders_types_to_FP32(graph: nx.MultiDiGraph):
-    for node_name, node_attrs in list(graph.nodes(data=True)):
-        node = Node(graph, node_name)
-        pb = node_attrs.get('pb')
-        if pb is not None and pb.op == 'Placeholder' and pb.attr['dtype'].type != tf_types.DT_FLOAT:
-            log.info('Placeholder "{}" has type that is different from DT_FLOAT'.format(node_name))
-            next_ops = get_next_operation(node)
-            # check that all output nodes are nodes of type 'ToFloat'
-            if all([is_node_casts_to_float(op) and len(op.in_nodes()) == 1 for op in next_ops]):
-                change_node_type(node, tf_types.DT_FLOAT)
-                remove_node_preserving_edges(node, next_ops)  # remove 'Cast' nodes
-            elif all([is_node_gather(op) for op in next_ops] for op in next_ops):
-                change_node_type(node, tf_types.DT_FLOAT)
-            else:
-                raise Error(
-                    ('Cannot convert type of placeholder "{}" because not all of its outputs are "Cast" to float '
-                     'operations: {}. ' +
-                     refer_to_faq_msg(49)),
-                    node.soft_get('name'),
-                    [op.soft_get('name') for op in next_ops]
-                )
-    return graph
-
-
-def is_node_casts_to_float(node: Node):
-    attrs = node.graph.node[node.id]
-    return 'pb' in attrs and attrs['pb'].op == 'Cast' and attrs['pb'].attr['DstT'].type == tf_types.DT_FLOAT
-
-
-def is_node_gather(node: Node):
-    attrs = node.graph.node[node.id]
-    return 'pb' in attrs and attrs['pb'].op == 'GatherV2' and attrs['precision'] == 'FP32'
-
-
-def change_node_type(node: Node, new_type: type):
-    node.graph.node[node.id]['pb'].attr['dtype'].type = new_type
-
-
-def remove_node_preserving_edges(pl_node: Node, nodes: list):
-    graph = pl_node.graph
-    pl_node_data = pl_node.out_node()
-
-    # Disconnect Placeholder data node from Cast nodes
-    for out_node in pl_node.out_node().out_nodes():
-        graph.remove_edge(pl_node_data.id, out_node.id)
-
-    # Move edges from Cast data nodes to Placeholder data node
-    for cast_node in nodes:
-        # it is necessary to create a list from the result of function "graph.out_edges()" because we modify the graph
-        # during iteration over the list. networkx version 2.1 raises error without creating a list
-        for u, v, d in list(graph.out_edges(cast_node.out_node().id, data=True)):
-            graph.remove_edge(u, v)
-            graph.add_edges_from([(pl_node_data.id, v, d)])
diff --git a/model-optimizer/mo/front/tf/common.py b/model-optimizer/mo/front/tf/common.py
index 72f85f795..a00274dc0 100644
--- a/model-optimizer/mo/front/tf/common.py
+++ b/model-optimizer/mo/front/tf/common.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/custom_subgraph_call.py b/model-optimizer/mo/front/tf/custom_subgraph_call.py
index 2a66ca571..8cd5fd5c0 100644
--- a/model-optimizer/mo/front/tf/custom_subgraph_call.py
+++ b/model-optimizer/mo/front/tf/custom_subgraph_call.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,26 +17,15 @@
 import logging as log
 from re import compile, match, findall
 
-import copy
 import networkx as nx
-import numpy as np
-import tensorflow as tf
 
-from mo.front.common.find_unsupported_ops import find_unsupported_ops_subgraphs
-from mo.front.common.layout import convert_shape, nhwc_to_nchw_permute, nchw_to_nhwc_permute
-from mo.front.common.partial_infer.utils import int64_array
 from mo.front.extractor import update_ie_fields
-from mo.front.tf.extractors.utils import tf_tensor_shape
-from mo.front.tf.partial_infer.tf import get_subgraph_output_tensors, tf_subgraph_infer, \
-    add_node_def_to_subgraph, update_input_in_pbs
-from mo.graph.graph import dump_graph_for_graphviz, unique_id, Node, get_outputs, get_inputs, merge_edge_props
+from mo.front.tf.partial_infer.tf import tf_subgraph_infer
+from mo.graph.graph import Node, merge_edge_props, Graph
 from mo.utils.graph import nodes_matching_name_pattern, is_connected_component
 
-nchw_to_nhwc_constant_name = 'IE_NCHW_TO_NHWC'
-nhwc_to_nchw_constant_name = 'IE_NHWC_TO_NCHW'
 
-
-def replace_subgraph_calls(graph: nx.MultiDiGraph, patterns_string: str):
+def replace_subgraph_calls(graph: Graph, patterns_string: str):
     """
     The function replaces sub-graphs defined by the node names with single nodes that are executed using the TensorFlow.
     The patterns applied independently, so N patterns produce N TensorFlow call nodes.
@@ -59,18 +48,11 @@ def replace_subgraph_calls(graph: nx.MultiDiGraph, patterns_string: str):
             if cycle_exist:
                 log.warning("Graph contains a cycle after merging nodes using pattern '{}'".format(pattern))
     if cycle_exist:
-        dump_graph_for_graphviz(graph)
+        graph.dump_graph_for_graphviz()
         log.error('graph contains cycle after applying all merge node patterns')
+        
 
-
-def offload_unsupported_operations_to_tf(graph: nx.MultiDiGraph, unsupported_nodes: list):
-    assert len(unsupported_nodes) != 0
-    sub_graphs_list = find_unsupported_ops_subgraphs(graph, unsupported_nodes, tf_find_constant_inputs)
-    for nodes_set in sub_graphs_list:
-        merge_nodes(graph, nodes_set)
-
-
-def offload_operations_to_tf(graph: nx.MultiDiGraph, op_names_patterns: str):
+def offload_operations_to_tf(graph: Graph, op_names_patterns: str):
     """
     The function accepts the list of strings with operation names patterns. The patterns applied independently and nodes
     matching specific pattern are executed using the TF runtime.
@@ -89,158 +71,6 @@ def offload_operations_to_tf(graph: nx.MultiDiGraph, op_names_patterns: str):
                     merge_nodes(graph, [node_name])
 
 
-def make_shape_4d(shape: np.array):
-    """
-    Create 4D tensor from 1D, 2D or 3D by adding new dimensions of size 1.
-    :param shape: shape to extend.
-    :return: 4D tensor.
-    """
-    new_shape = int64_array(shape)
-    old_shape_len = len(shape)
-
-    for x in range(4 - old_shape_len):  # TODO think about proper way to add additional dimensions considering layout
-        if len(new_shape) <= 1:  # if the shape is 0D or 1D then we should add additional dimensions to batch dimension
-            new_shape = np.insert(new_shape, 0, 1)
-        #            new_shape = np.array([1, shape[0], 1, 1])
-        else:
-            new_shape = np.insert(new_shape, 1, 1)
-    return new_shape
-
-
-def add_reshape_before_op_node(graph: nx.MultiDiGraph, data_node_name: str, op_node_name: str, edge_attrs: dict):
-    """
-    Adds reshape operation which expands dimension of the specified data tensor to 4D.
-    :param graph: graph to operate on.
-    :param data_node_name: the name of the data node to be reshaped to 4D tensor.
-    :param op_node_name: name of the TFCustomSubgraphCall node which produces the tensor.
-    :param edge_attrs: edge attributes which should be preserved.
-    :return: None
-    """
-    data_node = Node(graph, data_node_name)
-
-    graph.remove_edge(data_node_name, op_node_name)
-
-    assert data_node['shape'] is not None
-
-    new_shape = make_shape_4d(data_node['shape'])
-
-    # reshape shape data node
-    reshape_shape_data_node_name = unique_id(graph, "Reshape_shape_")
-    graph.add_node(reshape_shape_data_node_name, kind='data', precision="FP32", name=reshape_shape_data_node_name,
-                   value=new_shape, shape=[1])
-
-    # reshape operation node
-    reshape_node_name = unique_id(graph, "Reshape_")
-    graph.add_node(reshape_node_name, kind='op', precision="FP32", type='Reshape', name=reshape_node_name, op='Reshape',
-                   data_type=data_node['data_type'])
-    update_ie_fields(graph.node[reshape_node_name])
-
-    # reshaped data node
-    reshaped_value = None
-    if data_node['value'] is not None:
-        reshaped_value = np.reshape(data_node['value'], new_shape)
-    reshaped_data_node_name = unique_id(graph, "reshaped_data_")
-    graph.add_node(reshaped_data_node_name, kind='data', precision="FP32", name=reshaped_data_node_name,
-                   shape=new_shape, value=reshaped_value, nchw_layout=True)
-
-    graph.add_edges_from([
-        (data_node_name, reshape_node_name, {'in': 0}),
-        (reshape_shape_data_node_name, reshape_node_name, {'in': 1}),
-        (reshape_node_name, reshaped_data_node_name, {'out': 0}),
-        (reshaped_data_node_name, op_node_name, edge_attrs)
-    ])
-
-
-def add_reshape_after_data_node(graph: nx.MultiDiGraph, data_node_name: str):
-    """
-    Adds reshape operation which changes shape of the tensor produced by TFSubgraphCall from 4D to real dimension
-    of the tensor. The data_node_name node contains real dimensions of the tensor but they will be changed in the
-    add_reshapes_for_tf_subgraph_calls function to a 4D because IE TF call layer supports output in 4D only.
-    :param graph: graph to operate on.
-    :param data_node_name: name of the data node to be reshaped to correct dimensions.
-    :return: None
-    """
-    data_node = Node(graph, data_node_name)
-
-    # if the data node was previously marked as output then we need to mark as output new reshaped data node
-    is_output = False
-    if data_node.has_and_set('is_output'):
-        is_output = data_node['is_output']
-        data_node['is_output'] = False
-
-    # save old consumers nodes with edge attributes
-    old_consumer_nodes_with_attrs = list()
-    for index, out_op in enumerate(data_node.out_nodes()):
-        edge_attrs = graph.get_edge_data(data_node_name, out_op.name)[0]
-        old_consumer_nodes_with_attrs.append((out_op.name, edge_attrs))
-
-    # remove old consumers from the data node
-    for out_op in list(data_node.out_nodes()):
-        graph.remove_edge(data_node_name, out_op.name)
-
-    # reshape operation node
-    reshape_node_name = unique_id(graph, "Reshape_")
-    graph.add_node(reshape_node_name, kind='op', precision="FP32", type='Reshape', name=reshape_node_name, op='Reshape',
-                   data_type=data_node['data_type'])
-    update_ie_fields(graph.node[reshape_node_name])
-
-    # reshape shape data node
-    reshape_shape_data_node_name = unique_id(graph, "Reshape_shape_")
-    graph.add_node(reshape_shape_data_node_name, kind='data', precision="FP32", name=reshape_shape_data_node_name,
-                   value=np.array(data_node['shape']), shape=[1])
-
-    # reshaped data node
-    reshaped_value = None
-    if data_node['value'] is not None:
-        reshaped_value = np.array(data_node['value'])
-    reshaped_data_node_name = unique_id(graph, "reshaped_data_")
-    graph.add_node(reshaped_data_node_name, kind='data', precision="FP32", name=reshaped_data_node_name,
-                   shape=np.array(data_node['shape']), value=reshaped_value, is_output=is_output, nchw_layout=True)
-
-    graph.add_edges_from([
-        (data_node_name, reshape_node_name, {'in': 0}),
-        (reshape_shape_data_node_name, reshape_node_name, {'in': 1}),
-        (reshape_node_name, reshaped_data_node_name, {'out': 0}),
-    ])
-
-    for out_node_name, edge_attrs in old_consumer_nodes_with_attrs:
-        graph.add_edges_from([
-            (reshaped_data_node_name, out_node_name, edge_attrs)
-        ])
-
-
-def add_reshapes_for_tf_subgraph_calls(graph: nx.MultiDiGraph):
-    """
-    Input and output tensors of the TFCustomSubgraphCall must be 4D because IE layer accepts and produces only 4D
-    tensors. This function adds reshape operations where it is necessary.
-    :param graph: graph to operate on.
-    :return: None.
-    """
-    for src_node_name, dst_node_name, edge_attrs in list(graph.edges(data=True)):
-        src_node = Node(graph, src_node_name)
-        dst_node = Node(graph, dst_node_name)
-        if dst_node.kind == 'op' and dst_node.has_valid('type') and dst_node.type == 'TFCustomSubgraphCall' and \
-                src_node.has_valid('shape') and len(src_node.shape) != 4:
-            log.info("There is an data tensor of shape '{}' which goes into '{}' node".format(
-                src_node.shape, dst_node.type))
-            add_reshape_before_op_node(graph, src_node_name, dst_node_name, edge_attrs)
-
-    for node_name in list(graph.nodes()):
-        node = Node(graph, node_name)
-        if node['kind'] == 'op' and node.has_and_set('type') and node.type == 'TFCustomSubgraphCall':
-            for index, data_node in node.out_nodes().items():
-                real_dims_count = len(data_node.shape)
-                if real_dims_count != 4:
-                    log.info("There is an data tensor of shape '{}' with real dims count '{}' which goes out of '{}' "
-                             "node".format(data_node.shape, real_dims_count, node.name))
-                    add_reshape_after_data_node(graph, data_node.id)
-
-                    # need to update shape of the op so IE generates XML with 4D tensors
-                    out_shape = make_shape_4d(data_node['shape'])
-
-                    data_node['shape'] = out_shape
-
-
 def internal_output_name_for_node(node_name: str, output_port: int):
     return node_name + ":" + str(output_port)
 
@@ -273,7 +103,7 @@ def find_output_port(node: Node, output_desc: list, search_node_name: str, searc
                                                                                       search_node_port))
 
 
-def merge_nodes(graph: nx.MultiDiGraph, nodes_to_merge_names: list, inputs_desc: list = None,
+def merge_nodes(graph: Graph, nodes_to_merge_names: list, inputs_desc: list = None,
                 outputs_desc: list = None):
     """
     Merges nodes specified in the set 'nodes_to_merge_names' into one mega-node, creating new edges between mega-node
@@ -288,9 +118,9 @@ def merge_nodes(graph: nx.MultiDiGraph, nodes_to_merge_names: list, inputs_desc:
     """
     if not is_connected_component(graph, nodes_to_merge_names):
         log.warning("The following nodes do not form connected sub-graph: {}".format(nodes_to_merge_names))
-        dump_graph_for_graphviz(graph, nodes_to_dump=nodes_to_merge_names)
+        graph.dump_graph_for_graphviz(nodes_to_dump=nodes_to_merge_names)
 
-    new_node_name = unique_id(graph, "TFSubgraphCall_")
+    new_node_name = graph.unique_id("TFSubgraphCall_")
     log.info("Create new node with name '{}' for nodes '{}'".format(new_node_name, ', '.join(nodes_to_merge_names)))
     graph.add_node(new_node_name)
     new_node_attrs = graph.node[new_node_name]
@@ -305,7 +135,8 @@ def merge_nodes(graph: nx.MultiDiGraph, nodes_to_merge_names: list, inputs_desc:
     for node_name in nodes_to_merge_names:
         node = Node(graph, node_name)
         add_node_pb_if_not_yet_added(node, new_node)
-        for in_node_name, edge_attrs in get_inputs(graph, node_name):
+        # TODO: any improvements?
+        for in_node_name, edge_attrs in Node(graph, node_name).get_inputs():
             in_node = Node(graph, in_node_name)
 
             # internal edges between nodes of the sub-graph
@@ -336,7 +167,7 @@ def merge_nodes(graph: nx.MultiDiGraph, nodes_to_merge_names: list, inputs_desc:
                     added_input_tensors_names.add(input_tensor_name)
 
         # edge from inside sub-graph to outside sub-graph
-        for out_node_name, edge_attrs in get_outputs(graph, node_name):
+        for out_node_name, edge_attrs in Node(graph, node_name).get_outputs():
             if out_node_name not in nodes_to_merge_names:
                 log.debug("Creating edge from inside of sub-graph to outside sub-graph: {} -> {}".format(
                     new_node_name, out_node_name))
@@ -378,122 +209,6 @@ def set_tf_custom_call_node_attrs(node_attrs: dict):
     node_attrs['kind'] = 'op'
 
 
-def prepare_tf_call_nodes(graph: nx.MultiDiGraph):
-    """
-    The function performs preparation of the TF call nodes. Details are provided in the description of called functions.
-    :param graph: graph to operate on.
-    :return: None
-    """
-    update_placeholders(graph)
-    add_output_nodes_transposes(graph)
-    add_reshapes_for_tf_subgraph_calls(graph)
-
-
-def update_placeholders(graph: nx.MultiDiGraph):
-    """
-    Iterates over all nodes of the graph, find all TF sub-graph call operations and updates placeholders shapes and adds
-    transpose operation if necessary.
-    :param graph: graph to operate on
-    :return: None
-    """
-    for node_name in graph.nodes():
-        node = Node(graph, node_name)
-        if node.kind == 'op' and node.has_valid('op') and node.op == 'TFCustomSubgraphCall':
-            update_placeholder_shape_and_add_transpose(node)
-
-
-def update_placeholder_shape_and_add_transpose(node: Node):
-    """
-    The function changes placeholders shapes from NHWC to NCHW format and add transpose operations if needed.
-    :param node: node to operate on.
-    :return: None
-    """
-    tf.reset_default_graph()
-
-    inputs_replacements = list()
-
-    # transpose permutation constant
-    nchw_to_nhwc_constant = tf.constant(nchw_to_nhwc_permute, dtype=tf.int32, name=nchw_to_nhwc_constant_name)
-    nhwc_to_nchw_constant = tf.constant(nhwc_to_nchw_permute, dtype=tf.int32, name=nhwc_to_nchw_constant_name)
-
-    for placeholder_name in node['input_nodes_names']:
-        # dummy node which we can refer to as input in the transpose for the output node
-        # dummy node should be unique for each placeholder
-        dummy_node = tf.constant(value=[[[[1]]]], dtype=tf.float32, name='random_dummy_name_' + placeholder_name)
-
-        placeholder = node['pbs'][placeholder_name]
-        cur_shape = tf_tensor_shape(placeholder.attr['shape'].shape)
-        if len(cur_shape) == 4:  # TODO think about better check that transpose is required
-            nchw_shape = convert_shape(cur_shape, nhwc_to_nchw_permute)
-            for ind in range(len(cur_shape)):
-                placeholder.attr['shape'].shape.dim[ind].size = nchw_shape[ind]
-            transpose_name = placeholder.name + '_transpose'
-            transpose = tf.transpose(dummy_node, nchw_to_nhwc_constant, transpose_name)  # NCHW -> NHWC
-
-            # add transpose operations to GraphDef after placeholders
-            add_node_def_to_subgraph(node, transpose.op.node_def, transpose_name, len(node['input_nodes_names']))
-            inputs_replacements.append((placeholder.name, transpose_name))
-            inputs_replacements.append((dummy_node.name, placeholder.name))
-            node['real_input_dims'].append(nchw_shape)
-        else:
-            node['real_input_dims'].append(cur_shape)
-    add_node_def_to_subgraph(node, nchw_to_nhwc_constant.op.node_def)
-    add_node_def_to_subgraph(node, nhwc_to_nchw_constant.op.node_def)
-
-    # update initial input names to a transposed ones
-    for old_input_tensor_name, new_name in inputs_replacements:
-        update_input_in_pbs(node, old_input_tensor_name, new_name)
-
-
-def add_output_nodes_transposes(graph: nx.MultiDiGraph):
-    """
-    Iterates over all nodes of the graph, find all TF sub-graph call operations and adds Transpose operations to the
-    output nodes if they are 4D to covert output from NHWC to NCHW.
-    :param graph: graph to operate on
-    :return: None
-    """
-    for node_name in graph.nodes():
-        node = Node(graph, node_name)
-        if node.kind == 'op' and node.has_valid('op') and node.op == 'TFCustomSubgraphCall':
-            add_sub_graph_call_output_tensors_transposes(node)
-
-
-def add_sub_graph_call_output_tensors_transposes(node: Node):
-    """
-    Adds transpose operations to the output nodes if they are 4D to change layout from NCHW to NHWC.
-    :param node: the node to add transposes to the output nodes to.
-    :return: None
-    """
-    _, output_tensors = get_subgraph_output_tensors(node)
-
-    # transpose permutation constant
-    nhwc_to_nchw_constant = tf.constant(nhwc_to_nchw_permute, dtype=tf.int32, name=nhwc_to_nchw_constant_name)
-
-    # dummy node which we can refer to as input in the transpose for the output node
-    dummy_node = tf.constant(value=[[[[1]]]], dtype=tf.float32, name='random_dummy_name')
-
-    new_out_tensor_names = list()
-    for out_tensor_name in node['output_tensors_names']:
-        out_name, out_port = out_tensor_name.split(':')
-        if len(output_tensors[int(out_port)].shape) == 4:  # TODO think about better check whether transpose is required
-            out_transpose_name = out_name + '_port_' + out_port + '_transpose'
-            transpose = tf.transpose(dummy_node, nhwc_to_nchw_constant, name=out_transpose_name)
-
-            # starting from TF 1.8 it is not possible to modify the "node_def" of the "tf.op", so we create a copy,
-            # update it and use further
-            new_input_names = transpose.op.node_def.input[:]
-            new_input_names[0] = out_tensor_name
-            new_node_def = copy.deepcopy(transpose.op.node_def)
-            new_node_def.input[:] = new_input_names
-            add_node_def_to_subgraph(node, new_node_def, position=len(node['nodes_order']))
-            new_out_tensor_names.append(out_transpose_name)
-        else:
-            new_out_tensor_names.append(out_tensor_name)
-
-    # update output tensor names with transposes operations
-    node['output_tensors_names'] = new_out_tensor_names
-
-
 def tf_find_constant_inputs(node: Node):
     """
     The function finds constant inputs of the node and nodes with Identity operation.
diff --git a/model-optimizer/mo/front/tf/extractor.py b/model-optimizer/mo/front/tf/extractor.py
index d7af0d506..50ae67ed2 100644
--- a/model-optimizer/mo/front/tf/extractor.py
+++ b/model-optimizer/mo/front/tf/extractor.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -33,12 +33,9 @@ from mo.front.tf.extractors.prod import tf_reduce_prod_ext
 from mo.front.tf.extractors.random_uniform import tf_random_uniform_ext
 from mo.front.tf.extractors.range import tf_range_ext
 from mo.front.tf.extractors.reshape import tf_reshape_ext
-from mo.front.tf.extractors.shape import tf_shape_ext
 from mo.front.tf.extractors.space_to_batch import tf_space_to_batch_ext, tf_batch_to_space_ext
 from mo.front.tf.extractors.split import tf_split_ext
 from mo.front.tf.extractors.squeeze import tf_squeeze_ext
-from mo.front.tf.extractors.strided_slice import tf_strided_slice_ext
-from mo.front.tf.extractors.sum import tf_sum_ext
 from mo.front.tf.extractors.transpose import tf_transpose_ext
 from mo.front.tf.extractors.unpack import tf_unpack_ext
 from mo.front.tf.extractors.utils import get_tf_node_port
@@ -90,7 +87,6 @@ tf_op_extractors = {
     'MatMul': node_pb_arg(tf_matmul_ext),
     'Pack': node_pb_arg(tf_pack_ext),
     'Unpack': node_pb_arg(tf_unpack_ext),
-    'StridedSlice': node_pb_arg(tf_strided_slice_ext),
     'Prod': node_pb_arg(tf_reduce_prod_ext),
     'Const': node_pb_arg(tf_const_ext),
     'Placeholder': node_pb_arg(tf_placeholder_ext),
@@ -109,15 +105,12 @@ tf_op_extractors = {
     'BiasAdd': node_pb_arg(tf_bias_add_ext),
     'Reshape': node_pb_arg(tf_reshape_ext),
     'Squeeze': node_pb_arg(tf_squeeze_ext),
-    'Shape': node_pb_arg(tf_shape_ext),
     'SpaceToBatchND': node_pb_arg(tf_space_to_batch_ext),
     'BatchToSpaceND': node_pb_arg(tf_batch_to_space_ext),
     'Square': node_pb_arg(make_tf_eltwise(lambda a: a * a)),
     'Minimum': node_pb_arg(make_tf_eltwise(lambda a, b: np.minimum(a, b))),  # can use clamp if one argument is const
     'Maximum': node_pb_arg(make_tf_eltwise(lambda a, b: np.maximum(a, b), attrs={'type': 'Eltwise',
                                                                                  'operation': 'max'})),
-    'Sum': node_pb_arg(tf_sum_ext),
-    'Range': node_pb_arg(tf_range_ext),
     'ReadVariableOp': node_pb_arg(make_tf_eltwise(lambda v: v, attrs={'identity': True})),
     'PlaceholderWithDefault': node_pb_arg(make_tf_eltwise(lambda v: v, attrs={'identity': True}))
 }
diff --git a/model-optimizer/mo/front/tf/extractors/bias_add.py b/model-optimizer/mo/front/tf/extractors/bias_add.py
index 883440c3a..d669a37ed 100644
--- a/model-optimizer/mo/front/tf/extractors/bias_add.py
+++ b/model-optimizer/mo/front/tf/extractors/bias_add.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/concat.py b/model-optimizer/mo/front/tf/extractors/concat.py
index 376a1f0d5..18a15311c 100644
--- a/model-optimizer/mo/front/tf/extractors/concat.py
+++ b/model-optimizer/mo/front/tf/extractors/concat.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/concat_test.py b/model-optimizer/mo/front/tf/extractors/concat_test.py
index 054da6186..517c2df08 100644
--- a/model-optimizer/mo/front/tf/extractors/concat_test.py
+++ b/model-optimizer/mo/front/tf/extractors/concat_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/const.py b/model-optimizer/mo/front/tf/extractors/const.py
index 8977a853f..1924b4335 100644
--- a/model-optimizer/mo/front/tf/extractors/const.py
+++ b/model-optimizer/mo/front/tf/extractors/const.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/const_test.py b/model-optimizer/mo/front/tf/extractors/const_test.py
index 5caafa48f..c73e90b7b 100644
--- a/model-optimizer/mo/front/tf/extractors/const_test.py
+++ b/model-optimizer/mo/front/tf/extractors/const_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/eltwise.py b/model-optimizer/mo/front/tf/extractors/eltwise.py
index c45f769f7..3fc56f7aa 100644
--- a/model-optimizer/mo/front/tf/extractors/eltwise.py
+++ b/model-optimizer/mo/front/tf/extractors/eltwise.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/eltwise_test.py b/model-optimizer/mo/front/tf/extractors/eltwise_test.py
index 0a0f1e30d..2cf897d15 100644
--- a/model-optimizer/mo/front/tf/extractors/eltwise_test.py
+++ b/model-optimizer/mo/front/tf/extractors/eltwise_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/elu.py b/model-optimizer/mo/front/tf/extractors/elu.py
index 192250cc6..500df47f8 100644
--- a/model-optimizer/mo/front/tf/extractors/elu.py
+++ b/model-optimizer/mo/front/tf/extractors/elu.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/expand_dims.py b/model-optimizer/mo/front/tf/extractors/expand_dims.py
index 0386a16cc..5363bf426 100644
--- a/model-optimizer/mo/front/tf/extractors/expand_dims.py
+++ b/model-optimizer/mo/front/tf/extractors/expand_dims.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/expand_dims_test.py b/model-optimizer/mo/front/tf/extractors/expand_dims_test.py
index dd1f1d8c7..ef2c34466 100644
--- a/model-optimizer/mo/front/tf/extractors/expand_dims_test.py
+++ b/model-optimizer/mo/front/tf/extractors/expand_dims_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/fused_bn.py b/model-optimizer/mo/front/tf/extractors/fused_bn.py
index 31b4a1211..96b2688d5 100644
--- a/model-optimizer/mo/front/tf/extractors/fused_bn.py
+++ b/model-optimizer/mo/front/tf/extractors/fused_bn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/identity.py b/model-optimizer/mo/front/tf/extractors/identity.py
index 9211da327..8d8832f66 100644
--- a/model-optimizer/mo/front/tf/extractors/identity.py
+++ b/model-optimizer/mo/front/tf/extractors/identity.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/identity_test.py b/model-optimizer/mo/front/tf/extractors/identity_test.py
index 1a6a84f9b..ad29c4a03 100644
--- a/model-optimizer/mo/front/tf/extractors/identity_test.py
+++ b/model-optimizer/mo/front/tf/extractors/identity_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/lrn.py b/model-optimizer/mo/front/tf/extractors/lrn.py
index e4a7d57dc..8ebc3a7ab 100644
--- a/model-optimizer/mo/front/tf/extractors/lrn.py
+++ b/model-optimizer/mo/front/tf/extractors/lrn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/lrn_test.py b/model-optimizer/mo/front/tf/extractors/lrn_test.py
index b4855b671..bb2865664 100644
--- a/model-optimizer/mo/front/tf/extractors/lrn_test.py
+++ b/model-optimizer/mo/front/tf/extractors/lrn_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/matmul.py b/model-optimizer/mo/front/tf/extractors/matmul.py
index e0c763dbc..5fd6711c9 100644
--- a/model-optimizer/mo/front/tf/extractors/matmul.py
+++ b/model-optimizer/mo/front/tf/extractors/matmul.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/matmul_test.py b/model-optimizer/mo/front/tf/extractors/matmul_test.py
index e7bd52410..19cac794b 100644
--- a/model-optimizer/mo/front/tf/extractors/matmul_test.py
+++ b/model-optimizer/mo/front/tf/extractors/matmul_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/mean.py b/model-optimizer/mo/front/tf/extractors/mean.py
index 46453b4f1..ac74b4e40 100644
--- a/model-optimizer/mo/front/tf/extractors/mean.py
+++ b/model-optimizer/mo/front/tf/extractors/mean.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/mean_test.py b/model-optimizer/mo/front/tf/extractors/mean_test.py
index 7430bae42..cad5ed315 100644
--- a/model-optimizer/mo/front/tf/extractors/mean_test.py
+++ b/model-optimizer/mo/front/tf/extractors/mean_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/native_tf.py b/model-optimizer/mo/front/tf/extractors/native_tf.py
index ef2dcb348..0b20226b5 100644
--- a/model-optimizer/mo/front/tf/extractors/native_tf.py
+++ b/model-optimizer/mo/front/tf/extractors/native_tf.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/pack.py b/model-optimizer/mo/front/tf/extractors/pack.py
index 06fedf34d..24535901a 100644
--- a/model-optimizer/mo/front/tf/extractors/pack.py
+++ b/model-optimizer/mo/front/tf/extractors/pack.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/placeholder.py b/model-optimizer/mo/front/tf/extractors/placeholder.py
index c87112f8a..a0da30e81 100644
--- a/model-optimizer/mo/front/tf/extractors/placeholder.py
+++ b/model-optimizer/mo/front/tf/extractors/placeholder.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/prod.py b/model-optimizer/mo/front/tf/extractors/prod.py
index 18947f659..70151f9c0 100644
--- a/model-optimizer/mo/front/tf/extractors/prod.py
+++ b/model-optimizer/mo/front/tf/extractors/prod.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/prod_test.py b/model-optimizer/mo/front/tf/extractors/prod_test.py
index 53b974d86..a197b8294 100644
--- a/model-optimizer/mo/front/tf/extractors/prod_test.py
+++ b/model-optimizer/mo/front/tf/extractors/prod_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/random_uniform.py b/model-optimizer/mo/front/tf/extractors/random_uniform.py
index e86936e07..17bee997a 100644
--- a/model-optimizer/mo/front/tf/extractors/random_uniform.py
+++ b/model-optimizer/mo/front/tf/extractors/random_uniform.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/range.py b/model-optimizer/mo/front/tf/extractors/range.py
index 73dffebfa..d6807a42b 100644
--- a/model-optimizer/mo/front/tf/extractors/range.py
+++ b/model-optimizer/mo/front/tf/extractors/range.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/reshape.py b/model-optimizer/mo/front/tf/extractors/reshape.py
index e95920e37..aeed5fe04 100644
--- a/model-optimizer/mo/front/tf/extractors/reshape.py
+++ b/model-optimizer/mo/front/tf/extractors/reshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/sigmoid.py b/model-optimizer/mo/front/tf/extractors/sigmoid.py
index 4a43ee39e..4093659b0 100644
--- a/model-optimizer/mo/front/tf/extractors/sigmoid.py
+++ b/model-optimizer/mo/front/tf/extractors/sigmoid.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/space_to_batch.py b/model-optimizer/mo/front/tf/extractors/space_to_batch.py
index d87f9c84f..70c78304e 100644
--- a/model-optimizer/mo/front/tf/extractors/space_to_batch.py
+++ b/model-optimizer/mo/front/tf/extractors/space_to_batch.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/split.py b/model-optimizer/mo/front/tf/extractors/split.py
index f11210268..a55059921 100644
--- a/model-optimizer/mo/front/tf/extractors/split.py
+++ b/model-optimizer/mo/front/tf/extractors/split.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/squeeze.py b/model-optimizer/mo/front/tf/extractors/squeeze.py
index 95ccefa90..00541433e 100644
--- a/model-optimizer/mo/front/tf/extractors/squeeze.py
+++ b/model-optimizer/mo/front/tf/extractors/squeeze.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/squeeze_test.py b/model-optimizer/mo/front/tf/extractors/squeeze_test.py
index ccc0ff18b..d5e42d7d8 100644
--- a/model-optimizer/mo/front/tf/extractors/squeeze_test.py
+++ b/model-optimizer/mo/front/tf/extractors/squeeze_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/strided_slice.py b/model-optimizer/mo/front/tf/extractors/strided_slice.py
index cc2ecd27d..909c10ff5 100644
--- a/model-optimizer/mo/front/tf/extractors/strided_slice.py
+++ b/model-optimizer/mo/front/tf/extractors/strided_slice.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,16 +13,37 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
+import numpy as np
 
-from mo.front.common.partial_infer.slice import tf_strided_slice_infer
+from mo.front.extractor import FrontExtractorOp
+from mo.ops.op import Op
 
 
-def tf_strided_slice_ext(pb):
-    return {
-        'begin_mask': pb.attr["begin_mask"].i,
-        'end_mask': pb.attr["end_mask"].i,
-        'ellipsis_mask': pb.attr["ellipsis_mask"].i,
-        'new_axis_mask': pb.attr["new_axis_mask"].i,
-        'shrink_axis_mask': pb.attr["shrink_axis_mask"].i,
-        'infer': tf_strided_slice_infer
-    }
+def int_to_array_bit_mask(im):
+    list_repr = list(np.binary_repr(im))
+    list_repr.reverse()
+    list_repr = [int(li) for li in list_repr]
+    return np.array(list_repr, dtype=np.int32)
+
+
+class StridedSliceFrontExtractor(FrontExtractorOp):
+    op = 'StridedSlice'
+    enabled = True
+
+    @staticmethod
+    def extract(node):
+        pb = node.pb
+        bm = int_to_array_bit_mask(pb.attr["begin_mask"].i)
+        bm = np.array([1 - b for b in bm], dtype=np.int32)
+        em = int_to_array_bit_mask(pb.attr["end_mask"].i)
+        em = np.array([1 - b for b in em], dtype=np.int32)
+        attrs = {
+            'begin_mask': bm,
+            'end_mask': em,
+            'ellipsis_mask': int_to_array_bit_mask(pb.attr["ellipsis_mask"].i),
+            'new_axis_mask': int_to_array_bit_mask(pb.attr["new_axis_mask"].i),
+            'shrink_axis_mask': int_to_array_bit_mask(pb.attr["shrink_axis_mask"].i),
+        }
+
+        Op.get_op_class_by_name(__class__.op).update_node_stat(node, attrs)
+        return __class__.enabled
diff --git a/model-optimizer/mo/front/tf/extractors/tanh.py b/model-optimizer/mo/front/tf/extractors/tanh.py
index 9ee14a44c..e640d448c 100644
--- a/model-optimizer/mo/front/tf/extractors/tanh.py
+++ b/model-optimizer/mo/front/tf/extractors/tanh.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/transpose.py b/model-optimizer/mo/front/tf/extractors/transpose.py
index 7d4d6db3a..90bc5bb0a 100644
--- a/model-optimizer/mo/front/tf/extractors/transpose.py
+++ b/model-optimizer/mo/front/tf/extractors/transpose.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/unpack.py b/model-optimizer/mo/front/tf/extractors/unpack.py
index 2ff831c27..5d1bee1f9 100644
--- a/model-optimizer/mo/front/tf/extractors/unpack.py
+++ b/model-optimizer/mo/front/tf/extractors/unpack.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/utils.py b/model-optimizer/mo/front/tf/extractors/utils.py
index 5b736df0c..0b71a9450 100644
--- a/model-optimizer/mo/front/tf/extractors/utils.py
+++ b/model-optimizer/mo/front/tf/extractors/utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/extractors/utils_test.py b/model-optimizer/mo/front/tf/extractors/utils_test.py
index 51544cd08..d278ccf69 100644
--- a/model-optimizer/mo/front/tf/extractors/utils_test.py
+++ b/model-optimizer/mo/front/tf/extractors/utils_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/graph_utils.py b/model-optimizer/mo/front/tf/graph_utils.py
index 2a8454c67..72891c098 100644
--- a/model-optimizer/mo/front/tf/graph_utils.py
+++ b/model-optimizer/mo/front/tf/graph_utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,14 +17,15 @@
 import collections
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from mo.front.extractor import update_attrs
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.activation import Activation
+from mo.ops.concat import Concat
 from mo.ops.const import Const
 from mo.ops.convolution import Convolution
+from mo.ops.crop import Crop
 from mo.ops.reshape import Reshape
 from mo.ops.softmax import Softmax
 from mo.utils.error import Error
@@ -55,6 +56,7 @@ def squeeze_reshape_and_concat(start_nodes: list):
                     assert new_shape[2] == 1
                     new_shape = np.delete(new_shape, 2)
                     cur_node.in_node(1).value = new_shape
+                    cur_node.in_node(1).shape = np.array(new_shape.shape, dtype=np.int64)
                     cur_node['dim'] = new_shape.copy()
                     # run infer function once again
                     cur_node.infer(cur_node)
@@ -72,7 +74,7 @@ def squeeze_reshape_and_concat(start_nodes: list):
             q.append(node)
 
 
-def add_convolution_to_swap_xy_coordinates(graph: nx.MultiDiGraph, input_node: Node, coordinates_size: int):
+def add_convolution_to_swap_xy_coordinates(graph: Graph, input_node: Node, coordinates_size: int):
     """
     The function add convolution node after the node 'input_node' to swap xy coordinates of the boxes produced
     by the node 'input_node'. It is expected that box coordinates are located in the fastest changing dimension of the
@@ -121,7 +123,26 @@ def add_convolution_to_swap_xy_coordinates(graph: nx.MultiDiGraph, input_node: N
     return conv_op.create_node([input_reshape_4d_node, conv_filter_const_node], dict(name=input_node.name + "/conv"))
 
 
-def add_activation_function_after_node(graph: nx.MultiDiGraph, node: Node, activation_function: str):
+def add_fake_background_loc(graph: Graph, input_node: Node):
+    """
+    DetectionOutput layer expects that box coordinates contains coordinates of boxes for the "background" class also,
+    but in the TensorFlow\* Object Detection API the tensor contains information about real object classes only.
+    The function copies a slice of the output data of the node 'input_node' and then concats it to the beginning of the
+    data. The data in this slice is not used by the Detection Output layer so the actual values are not important. This
+    approach allows the model to be reshape-able and does not introduce many layers.
+    "background" class box coordinates.
+    :param graph: graph to operate on.
+    :param input_node: node producing the boxes coordinates.
+    :return convolution node that adds slice of data for the "background" class.
+    """
+    crop_op = Crop(graph, dict(axis=np.array([1]), offset=np.array([0]), dim=np.array([1]), nchw_layout=True))
+    crop_node = crop_op.create_node([input_node], dict(name='crop_locs'))
+
+    concat_op = Concat(graph, dict(axis=1, in_ports_count=2, nchw_layout=True))
+    return concat_op.create_node([crop_node, input_node], dict(name=input_node.id + '/locs_with_fake_background'))
+
+
+def add_activation_function_after_node(graph: Graph, node: Node, activation_function: str):
     """
     The function adds node with activation function defined by string 'activation_function' which gets input from the
     node 'node'.
diff --git a/model-optimizer/mo/front/tf/loader.py b/model-optimizer/mo/front/tf/loader.py
index 8310e0acb..ea933961b 100644
--- a/model-optimizer/mo/front/tf/loader.py
+++ b/model-optimizer/mo/front/tf/loader.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,8 +18,6 @@ import logging as log
 import os
 import re
 
-import networkx as nx
-
 from mo.utils.error import Error, FrameworkError
 from mo.utils.utils import refer_to_faq_msg
 
@@ -30,7 +28,7 @@ except ImportError:
                 refer_to_faq_msg(42))
 
 from google.protobuf import text_format
-from mo.graph.graph import create_graph_with_nodes
+from mo.graph.graph import create_graph_with_nodes, Graph
 from mo.utils.summarize_graph import summarize_graph
 
 
@@ -258,22 +256,17 @@ def protobuf2nx(pb: tf.GraphDef):
     return graph
 
 
-def variables_to_constants(graph: nx.MultiDiGraph, variables_values: dict):
+def variables_to_constants(graph: Graph, variables_values: dict):
     """
     Converts `Variable<V2>` operations to FakeConst operations with `value` from `variables_values` dictionary
     :param graph: graph to operate on
     :param variables_values: dictionary with variable names as keys and np.array data as values
     """
-    variable_operations = ['Variable', 'VariableV2']
-    for node_name in graph.nodes():
-        node_attr_dict = graph.node[node_name]
-        if 'op' not in node_attr_dict:
-            continue
-        op_name = node_attr_dict['op']
-        if op_name not in variable_operations:
-            continue
+    for node in graph.get_op_nodes(op='FakeConst'):
+        node_name = node.name
+
         if node_name not in variables_values:
-            log.debug("There is no value for '{}': {} in checkpoint variable values".format(op_name, node_name))
+            log.debug("There is no value for '{}': {} in checkpoint variable values".format(node.op, node_name))
             continue
-        graph.node[node_name]['op'] = 'FakeConst'
-        graph.node[node_name]['value'] = variables_values[node_name]
+
+        node['value'] = variables_values[node_name]
diff --git a/model-optimizer/mo/front/tf/loader_test.py b/model-optimizer/mo/front/tf/loader_test.py
index 326849ff2..58f425454 100644
--- a/model-optimizer/mo/front/tf/loader_test.py
+++ b/model-optimizer/mo/front/tf/loader_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/front/tf/partial_infer/tf.py b/model-optimizer/mo/front/tf/partial_infer/tf.py
index a7247b95e..ef358895f 100644
--- a/model-optimizer/mo/front/tf/partial_infer/tf.py
+++ b/model-optimizer/mo/front/tf/partial_infer/tf.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -23,7 +23,7 @@ from google.protobuf import text_format
 
 from mo.front.extractor import node_defs_to_str
 from mo.front.tf.extractors.utils import tf_dtype_extractor, tf_tensor_shape, get_tf_node_port
-from mo.graph.graph import Node, get_sorted_inputs, get_inputs, create_sub_graph_copy
+from mo.graph.graph import Node
 from mo.utils.graph import node_incoming_neighbourhood, node_outcoming_neighbourhood
 
 
@@ -41,7 +41,7 @@ def tf_native_tf_node_infer(node: Node):
     # Also the sub-graph contains names of the output nodes of the node to perform native infer.
     nodes_to_extract = node_incoming_neighbourhood(node.graph, node.id, 10) + node_outcoming_neighbourhood(node.graph,
                                                                                                            node.id, 1)
-    tmp_graph = create_sub_graph_copy(node.graph, nodes_to_extract)
+    tmp_graph = node.graph.create_sub_graph_copy(nodes_to_extract)
 
     tmp_node_attrs = tmp_graph.node[node.id]
     tmp_node = Node(tmp_graph, node.id)
@@ -82,7 +82,7 @@ def generate_feed_dict(graph: tf.Graph, node: Node):
     """
     all_constants = True
     feed_dict = dict()
-    for in_data_node_name, edge_attrs in get_inputs(node.graph, node.id):
+    for in_data_node_name, edge_attrs in node.get_inputs():
         if 'control_flow_edge' in edge_attrs and edge_attrs['control_flow_edge']:
             continue
         value = node.in_node(edge_attrs['in']).value
@@ -198,7 +198,7 @@ def add_placeholders_to_subgraph(node: Node):
     :return: None
     """
     inputs_replacements = list()
-    for index, (in_data_node, edge_attrs) in enumerate(get_sorted_inputs(node)):
+    for index, (in_data_node, edge_attrs) in enumerate(node.get_sorted_inputs()):
         if 'control_flow_edge' in edge_attrs and edge_attrs['control_flow_edge']:
             continue
 
diff --git a/model-optimizer/mo/front/tf/register_custom_ops.py b/model-optimizer/mo/front/tf/register_custom_ops.py
index 70ebe56c4..7a11e9ce8 100644
--- a/model-optimizer/mo/front/tf/register_custom_ops.py
+++ b/model-optimizer/mo/front/tf/register_custom_ops.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,18 +14,14 @@
  limitations under the License.
 """
 
-from mo.back.replacement import BackReplacementPattern
 from mo.front.common.replacement import FrontReplacementOp, FrontReplacementPattern, FrontReplacementSubgraph
 from mo.front.extractor import FrontExtractorOp
 from mo.front.tf.replacement import FrontReplacementFromConfigFileSubGraph, FrontReplacementFromConfigFileOp, \
     FrontReplacementFromConfigFileGeneral
-from mo.middle.replacement import MiddleReplacementPattern
-from mo.ops.op import Op
-from mo.utils import class_registration
 
 
-def update_registration():
-    class_registration.update_registration([Op, FrontExtractorOp, FrontReplacementOp, FrontReplacementPattern,
-                                            FrontReplacementSubgraph, FrontReplacementFromConfigFileSubGraph,
-                                            FrontReplacementFromConfigFileOp, MiddleReplacementPattern,
-                                            BackReplacementPattern, FrontReplacementFromConfigFileGeneral])
+def get_front_classes():
+    front_classes = [FrontExtractorOp, FrontReplacementOp, FrontReplacementPattern, FrontReplacementSubgraph,
+                     FrontReplacementFromConfigFileSubGraph, FrontReplacementFromConfigFileOp,
+                     FrontReplacementFromConfigFileGeneral]
+    return front_classes
diff --git a/model-optimizer/mo/front/tf/replacement.py b/model-optimizer/mo/front/tf/replacement.py
index b9e1e60fb..c9b48eeac 100644
--- a/model-optimizer/mo/front/tf/replacement.py
+++ b/model-optimizer/mo/front/tf/replacement.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2017-2018 Intel Corporation
+ Copyright (c) 2017-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,13 +15,11 @@
 """
 import logging as log
 
-import networkx as nx
-
 from mo.front.common.custom_replacement_registry import CustomReplacementRegistry
 from mo.front.common.replacement import FrontReplacementSubgraph, FrontReplacementPattern
 from mo.front.subgraph_matcher import SubgraphMatcher, SubgraphMatch
 from mo.front.tf.custom_subgraph_call import merge_nodes
-from mo.graph.graph import dump_graph_for_graphviz, unique_id
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 from mo.utils import class_registration
 from mo.utils.graph import is_connected_component
@@ -40,7 +38,7 @@ class FrontReplacementFromConfigFileGeneral(FrontReplacementPattern):
     def transform_graph(self, graph, replacement_descriptions):
         raise Exception('Function "transform_graph" must be overridden in the sub-class')
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def find_and_replace_pattern(self, graph: Graph):
         replacement_descriptions = CustomReplacementRegistry().get_custom_replacement_description(self.replacement_id)
         if replacement_descriptions is None or len(replacement_descriptions) < 1:
             log.info("Failed to find custom replacement description with id '{}'".format(self.replacement_id))
@@ -72,10 +70,10 @@ class FrontReplacementFromConfigFileSubGraph(FrontReplacementSubgraph):
     def __init__(self):
         super().__init__()
 
-    def nodes_to_remove(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def nodes_to_remove(self, graph: Graph, match: SubgraphMatch):
         return match.matched_nodes_names()
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def find_and_replace_pattern(self, graph: Graph):
         replacement_descriptions = CustomReplacementRegistry().get_custom_replacement_description(self.replacement_id)
         if replacement_descriptions is None:
             log.info("Failed to find custom replacement description with id '{}'".format(self.replacement_id))
@@ -87,7 +85,7 @@ class FrontReplacementFromConfigFileSubGraph(FrontReplacementSubgraph):
                 if not is_connected_component(graph, match.matched_nodes_names()):
                     log.warning("The following nodes don't form connected sub-graph: {}".format(
                         match.matched_nodes_names()))
-                    dump_graph_for_graphviz(graph, match.matched_nodes_names())
+                    graph.dump_graph_for_graphviz(match.matched_nodes_names())
                 self.replace_sub_graph(graph, match)
 
     registered_ops = {}
@@ -111,7 +109,7 @@ class FrontReplacementFromConfigFileOp(FrontReplacementFromConfigFileSubGraph):
         super().__init__()
 
     def input_edges_match(self,  # pylint: disable=method-hidden
-                          graph: nx.DiGraph,
+                          graph: Graph,
                           match: SubgraphMatch,
                           new_sub_graph: dict):
         """
@@ -131,7 +129,7 @@ class FrontReplacementFromConfigFileOp(FrontReplacementFromConfigFileSubGraph):
         return input_edges_match
 
     def output_edges_match(self,  # pylint: disable=method-hidden
-                           graph: nx.DiGraph,
+                           graph: Graph,
                            match: SubgraphMatch,
                            new_sub_graph: dict):
         """
@@ -150,7 +148,7 @@ class FrontReplacementFromConfigFileOp(FrontReplacementFromConfigFileSubGraph):
             output_edges_match[(output_node.id, output_port)] = (new_sub_graph['new_node'].id, sub_graph_output_port)
         return output_edges_match
 
-    def generate_sub_graph(self, graph: nx.MultiDiGraph, match: SubgraphMatch):
+    def generate_sub_graph(self, graph: Graph, match: SubgraphMatch):
         replacement_desc = match.custom_replacement_desc
         op = Op.get_op_class_by_name(replacement_desc.op)(graph, match.custom_replacement_desc.custom_attributes)
         op.default_backend_attrs = list(match.custom_replacement_desc.custom_attributes.keys())
@@ -159,7 +157,7 @@ class FrontReplacementFromConfigFileOp(FrontReplacementFromConfigFileSubGraph):
             op.substitute_ie_attrs(op.attrs)
             node = merge_nodes(graph, match.matched_nodes_names(), replacement_desc.get_inputs_description(),
                                replacement_desc.get_outputs_description())
-            node.name = unique_id(graph, op.attrs['type'])
+            node.name = graph.unique_id(op.attrs['type'])
             node_attrs = graph.node[node.id]
             # copy attributes which are defined in the custom operation
             for key in op.attrs.keys():
diff --git a/model-optimizer/mo/graph/connection.py b/model-optimizer/mo/graph/connection.py
new file mode 100644
index 000000000..0af19737b
--- /dev/null
+++ b/model-optimizer/mo/graph/connection.py
@@ -0,0 +1,221 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from collections import namedtuple
+from copy import deepcopy
+from mo.utils.error import Error
+
+
+class Connection:
+    def __init__(self, graph, source, destinations: list):
+        self.graph = graph
+        self.source = source
+        self.destinations = destinations
+        self.data = namedtuple('Data', ['get_value', 'get_shape'])
+        self.data.get_value = self._get_value
+        self.data.get_shape = self._get_shape
+
+    def _get_value(self):
+        if self.graph.stage == 'front':
+            return None
+        return self.source.node.out_node().value
+
+    def _get_shape(self):
+        if self.graph.stage == 'front':
+            return None
+        return self.source.node.out_node().shape
+
+    def get_source(self):
+        return self.source
+
+    def get_destination(self):
+        if self.destinations and len(self.destinations) > 1:
+            raise Error("Connection has more than one destination: {}".format(len(self.destinations)))
+        return self.destinations[0] if self.destinations else None
+
+    def get_destinations(self):
+        return self.destinations
+
+    def set_source(self, port):
+        # In this method we are changing source for a connection with given port.
+        # See detailed example below.
+        #
+        # SOURCE - Op1(out_port:0)
+        #
+        #                | Op4(in_port:0)
+        # DESTINATIONS - | Op3(in_port:0)
+        #                | Op2(in_port:0)
+        #
+        # NEW PORT - Op5(out_port:0)
+        #
+        #                               ,--->Op4(in_port:0)
+        # CONNECTION                   ,--->Op3(in_port:0)
+        #               Op1(out_port:0)--->Op2(in_port:0)
+        #
+        # When we set source for connection we disconnect existing source and reconnect all consumers to
+        # the new given port with type='out'.
+        #
+        # UPDATED CONNECTION            ,--->Op4(in_port:0)
+        #                              ,--->Op3(in_port:0)
+        #               Op5(out_port:0)--->Op2(in_port:0)
+        #
+
+        if port.type == 'in':
+            raise Error("Wrong port type in set_source method. Should be 'out' but given 'in'")
+
+        if self.graph.stage == 'front':
+            scr_node = port.node
+            # Reconnecting all destinations as consumers to the source port preserving edge attrs
+            for dst_port in self.destinations:
+                edge_attrs, u, v, key = dst_port.get_in_edge_attrs(data=True)
+                if u is not None:
+                    edge_attrs['out'] = port.idx
+                    self.graph.remove_edge(u, v, key=key)
+                    self.graph.add_edge(scr_node.id, v, **edge_attrs)
+                else:
+                    self.graph.create_edge(scr_node, dst_port.node, port.idx, dst_port.idx)
+        else:
+            # Create out data node if not exists and mark node with need_shape_inference = True
+            # In case if data node exists just use it.
+            port._create_data_if_necessary()
+            port_out_data = port.node.out_node(port.idx)
+
+            if self.source is not None and self.source.idx in self.source.node.out_nodes():
+                source_out_data = self.source.node.out_node(self.source.idx)
+                # Copy attrs from source_out_data to port_out_data
+                attrs = deepcopy(source_out_data.attrs())
+                for attr in attrs:
+                    port_out_data[attr] = attrs[attr]
+
+            for dst_port in self.destinations:
+                edge_attrs, u, v, key = dst_port.get_in_edge_attrs(data=True)
+                if u is not None:
+                    self.graph.remove_edge(u, v, key=key)
+                    self.graph.add_edge(port_out_data.id, v, **edge_attrs)
+                else:
+                    self.graph.add_edge(port_out_data.id, dst_port.node.id, **{'in': dst_port.idx})
+
+    def set_destination(self, port):
+        # In this method we are changing destination for a connection with given port with type 'in'.
+        # This method requires exactly one destination or empty destinations list.
+        # See detailed example below.
+        #
+        # SOURCE - Op1(out_port:0)
+        #
+        # DESTINATIONS - Op2(in_port:0)
+        #
+        # NEW PORT - Op3(in_port:0)
+        #
+        # CONNECTION
+        #               Op1(out_port:0)--->Op2(in_port:0)
+        #
+        # When we set destination for connection we disconnect destination port if exists and connect source to
+        # the new given port with type='in'.
+        #
+        # UPDATED CONNECTION
+        #
+        #               Op1(out_port:0)--->Op3(in_port:0)
+        #
+
+        def check_and_remove_edge():
+            if self.destinations:
+                for destination in self.destinations:
+                    edge_attrs, u, v, key = destination.get_in_edge_attrs(data=True)
+                    if u is None:
+                        raise Error(
+                            "Broken Connection object! Destination (node:{}) is not connected to source.".format(
+                                destination.node.name))
+                    destination.disconnect()
+
+        if self.destinations and len(self.destinations) > 1:
+            raise Error("set_destination applicable only for connections that has exactly one destination or \
+                         when there is no destinations")
+
+        if port.type == 'out':
+            raise Error("Wrong port type in set_destination method. Should be 'in' but given 'out'")
+
+        if self.graph.stage == 'front':
+            if self.source is not None:
+                node = self.source.node
+                check_and_remove_edge()
+                self.graph.create_edge(node, port.node, out_port=self.source.idx, in_port=port.idx)
+            self.destinations = [port]
+        else:
+            # create out node if not exists and mark node with need_shape_inference = True
+            # in case if data node exists just use it as is
+            if self.source is not None:
+                data_node = self.source._create_data_if_necessary()
+                check_and_remove_edge()
+                self.graph.add_edge(data_node.id, port.node.id, **{'in': port.idx})
+            self.destinations = [port]
+
+    def add_destination(self, port):
+        # In this method we are adding destination port with type 'in' for a connection.
+        # See detailed example below.
+        #
+        # SOURCE - Op1(out_port:0)
+        #
+        # DESTINATIONS - Op2(in_port:0)
+        #
+        # NEW PORT - Op3(in_port:0)
+        #
+        # CONNECTION
+        #               Op1(out_port:0)--->Op2(in_port:0)
+        #
+        # When we set destination for connection we disconnect destination port if exists and connect source to
+        # the new given port with type='in'.
+        #
+        # UPDATED CONNECTION
+        #                                 ,-->Op3(in_port:0)
+        #               Op1(out_port:0)--->Op2(in_port:0)
+        #
+
+        if self.source is None:
+            raise Error("Can not add destination for connection without source port!")
+
+        if self.graph.stage == 'front':
+            node = self.source.node
+            self.graph.create_edge(node, port.node, out_port=self.source.idx, in_port=port.idx)
+        else:
+            data_node = self.source._create_data_if_necessary()
+            self.graph.add_edge(data_node.id, port.node.id, **{'in': port.idx})
+
+        self.destinations.append(port)
+
+    def remove(self):
+        # This method deletes all edges in connection. After that connection is not more accessible.
+        # See detailed example below.
+        #
+        # SOURCE - Op1(out_port:0)
+        #
+        #                | Op4(in_port:0)
+        # DESTINATIONS - | Op3(in_port:0)
+        #                | Op2(in_port:0)
+        #
+        #                               ,--->Op4(in_port:0)
+        # CONNECTION                   ,--->Op3(in_port:0)
+        #               Op1(out_port:0)--->Op2(in_port:0)
+        #
+        # After removing edges connection will be empty
+        #
+        # REMOVED CONNECTION
+        #            Op5(out_port:0)   Op4(in_port:0)  Op2(in_port:0)  Op3(in_port:0)
+        #
+
+        if self.destinations:
+            for dst_port in self.destinations:
+                dst_port.disconnect()
+        self.source = None
+        self.destinations = []
diff --git a/model-optimizer/mo/graph/graph.py b/model-optimizer/mo/graph/graph.py
index e44b1083b..12cbbed5a 100644
--- a/model-optimizer/mo/graph/graph.py
+++ b/model-optimizer/mo/graph/graph.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -21,239 +21,28 @@ from copy import deepcopy
 import networkx as nx
 import numpy as np
 
+from mo.graph.port import Port
 from mo.utils.error import Error
-from mo.utils.utils import refer_to_faq_msg
+from mo.utils.utils import refer_to_faq_msg, deprecated_api, shrink_str_value
 
 
-def unique_id(graph: nx.MultiDiGraph, prefix: str = ""):
-    """
-    Generates a unique node id for a new node in a given graph.
-    The optional string prefix can be specified.
-    """
-    # TODO thread safety?
-    unique_id.count = max(unique_id.count, graph.number_of_nodes()) + 1
-    if prefix and not graph.has_node(prefix):
-        return str(prefix)
-    while graph.has_node(prefix + str(unique_id.count)):
-        unique_id.count += 1
-    return prefix + str(unique_id.count)
-
-
-unique_id.count = 0
-
-
-def get_node_id_by_name(graph: nx.MultiDiGraph, name: str):
-    for node in graph.nodes():
-        if 'name' in graph.node[node] and graph.node[node]['name'] == name:
-            return node
-    raise Error('No node with name {}. ' +
-                refer_to_faq_msg(51), name)
-
-
-def create_graph_with_nodes(src_nodes, get_id: callable, get_attrs: callable):
-    """
-    Go over all nodes in src_nodes that should be enumerable and create new NX nodes
-    using get_id and get_attrs functions to create node id and node attributes correspondingly.
-    """
-    graph = nx.MultiDiGraph()
-    for node in src_nodes:
-        graph.add_node(get_id(node), **get_attrs(node))
-    return graph
-
-
-# TODO implement merging for keys with dictionary values?
-def merge_edge_props(attrs: dict, additional_attrs: dict):
-    """
-    Update edge attributes without changing 'in' and 'out' keys.
-    It is necessary to copy edge attributes during merging of nodes when
-    result of one subgraph call is passed as input to another subgraph call
-    """
-    result = attrs
-    for (key, value) in additional_attrs.items():
-        if key not in ['in', 'out']:
-            if type(additional_attrs[key]) is list:
-                if key not in result:
-                    result[key] = []
-                result[key].extend(additional_attrs[key])
-                result[key] = list(set(result[key]))  # silly solution to find unique elements
-            else:
-                result[key] = value
-    return result
-
-
-def print_graph_stat(graph: nx.MultiDiGraph):
-    log.debug('Number of nodes in graph: {}'.format(graph.number_of_nodes()))
-    log.debug('Number of edges in graph: {}'.format(len(list(graph.edges()))))
-    ops = collections.defaultdict(int)
-    for _node in graph.nodes():
-        node = NodeWrap(graph, _node)
-        kind = node.kind if node.has('kind') else '<UNDEFINED>'
-        if node.has('op'):
-            ops['op/' + node.op] += 1
-        else:
-            ops[kind] += 1
-        if node.has('shape') and np.any(node.shape == 0):
-            log.error("Found bad shape: '{}' for node '{}'".format(node.shape, node.node))
-    for k, v in ops.items():
-        log.debug('   {} : {}'.format(k, v))
-
-
-def get_inputs_with_ports(graph, match, pattern_edges, input_names_in_pattern):
-    """
-    Front replacements of multi-input nodes should specify output port to add_node-like functions
-    This function is a helper to get such information out of matched nodes
-    :param graph: graph to operate on
-    :param match: dictionary returned by matching function
-    :param pattern_edges: edges that are specified in pattern
-    :param input_names_in_pattern: names of matched nodes as they were specified in pattern that should be in
-    resulting list
-    :return: list of tuples of node and output port
-    """
-    inputs = []
-    for name in input_names_in_pattern:
-        assert name in match, "node named {} not in match {}".format(name, match)
-        src = match[name]
-        dst = []
-        for edge in pattern_edges:
-            if edge[0] == name:
-                assert edge[1] in match, "name from pattern_edges {} not in match {}".format(edge[1], match)
-                dst.append(match[edge[1]])
-        if len(dst) != 1:
-            raise Error('Multiple output ports detected for node {} as {} in pattern'.format(match[name].id, name))
-        dst = dst[0]
-        out_port = graph.get_edge_data(src.id, dst.id)[0]['out']
-        inputs.append((src, out_port))
-    return inputs
-
-
-def dump_graph_for_graphviz(graph: nx.MultiDiGraph, node_attrs: list = ['kind', 'op', 'shape'],
-                            edge_attrs: list = ['in', 'out'],
-                            nodes_to_dump: list = None, save_to_svg = False):
-    log.debug("---- GRAPHVIZ OUTPUT STARTS ----")
-    if nodes_to_dump is None:
-        nodes_to_dump = graph.nodes()
-    string = '\ndigraph {\n'
-    visited_nodes = set()
-    for src_node_name, dst_node_name, attrs in graph.edges(data=True):
-        visited_nodes.add(src_node_name)
-        visited_nodes.add(dst_node_name)
-        if src_node_name not in nodes_to_dump or dst_node_name not in nodes_to_dump:
-            continue
-        src_node = graph.node[src_node_name]
-        dst_node = graph.node[dst_node_name]
-        src_node_string = str(src_node_name) + '\\n' + '\\n'.join(
-            [str(key) + '=' + str(src_node.get(key, 'None')) for key in node_attrs if key in src_node])
-        dst_node_string = str(dst_node_name) + '\\n' + '\\n'.join(
-            [str(key) + '=' + str(dst_node.get(key, 'None')) for key in node_attrs if key in dst_node])
-        edge_string = ' '.join([str(key) + '=' + str(attrs.get(key, 'None')) for key in edge_attrs if key in attrs])
-        string += '"{}" -> "{}" [label = "{}"];\n'.format(src_node_string, dst_node_string, edge_string)
-    for node in nodes_to_dump:
-        if node not in visited_nodes:
-            string += '"{}"'.format(node) # TODO: add attributes like it was done in the loop above
-            visited_nodes.add(node)
-    string += '}'
-    log.debug(string)
-    log.debug("---- GRAPHVIZ OUTPUT ENDS ----")
-
-    if save_to_svg:
-        try:
-            import graphviz
-            import os
-            file_name = "{}_{}.txt".format(graph.name.replace('/', '_'), 0)
-            id = 1
-            while os.path.exists(file_name):
-                file_name = "{}_{}.txt".format(graph.name.replace('/', '_'), id)
-                id += 1
-            with open(file_name, "w") as f:
-                f.write(string)
-            graphviz.render('dot','svg', file_name)
-            print('Graph was saved to {}.{}'.format(file_name, 'svg'))
-        except ImportError:
-            raise ImportError('Can\'t import graphviz')
-        except Exception as e:
-            raise Error('Can\'t save graph to svg') from e
-
-    return string
-
-
-def create_sub_graph_copy(graph: nx.MultiDiGraph, nodes_to_extract: list):
-    """
-    Create new graph which is a sub-graph of the 'graph' that contains just nodes from 'nodes_to_extract' list. The
-    returned sub-graph is a deep copy of the provided graph nodes.
-    :param graph: graph to create a sub-graph from.
-    :param nodes_to_extract: list of node names to extract.
-    :return: new graph.
-    """
-    return graph.subgraph(nodes_to_extract).copy()
-
-
-def get_inputs(graph: nx.MultiDiGraph, node: str, edge_attr: dict = {}, control_flow: bool = False):
-    in_edges = graph.in_edges(node, data=True)
-    if not control_flow:
-        in_edges = [(u, v, d) for u, v, d in in_edges if 'control_flow_edge' not in d or not d['control_flow_edge']]
-    return [(u, d) for u, v, d in in_edges if all([attr in d and d[attr] == edge_attr[attr] for attr in edge_attr])]
-
-
-def get_outputs(graph: nx.MultiDiGraph, node: str, edge_attr: dict = {}, control_flow: bool = False):
-    out_edges = graph.out_edges(node, data=True)
-    if not control_flow:
-        out_edges = [(u, v, d) for u, v, d in out_edges if 'control_flow_edge' not in d or not d['control_flow_edge']]
-    return [(v, d) for u, v, d in out_edges if all([attr in d and d[attr] == edge_attr[attr] for attr in edge_attr])]
-
-
-def get_single_input(graph: nx.MultiDiGraph, node: str, edge_attr: dict = {}):
-    """
-    Searches for all edges that have given attributes.
-    If there no such edges or there are multiple edges, raise exception.
-    If there is only one edge, returns the source node for this edge
-    and the edge attributes themselves.
-    """
-    inputs = get_inputs(graph, node, edge_attr)
-    if len(inputs) != 1:
-        log.debug("Node '{}' has {} inputs with edge attributes '{}'".format(node, inputs, str(edge_attr)))
-        raise AttributeError(
-            "None or multiple inputs satisfy given attributes. Node: " + str(node) + ", edge_attr: " + str(edge_attr))
-    return inputs[0]
+def dict_to_ordered_dict(d: dict):
+    return collections.OrderedDict(sorted(d.items(), key=lambda t: t[0]))
 
 
-def get_single_output(graph: nx.MultiDiGraph, node: str, edge_attr: dict = {}):
-    outputs = get_outputs(graph, node, edge_attr)
-    if len(outputs) != 1:
-        log.debug("Node '{}' has {} outputs with edge attributes '{}'".format(node, outputs, str(edge_attr)))
-        raise AttributeError(
-            "None or multiple outputs satisfy given attributes. Node: " + str(node) + ", edge_attr: " + str(edge_attr))
-    return outputs[0]
+class Node:
+    def __init__(self, graph, node: str):
+        if node not in graph:
+            raise AttributeError("Attempt to access node {} that not in graph".format(node))
 
+        super(Node, self).__setattr__('graph', graph)
+        super(Node, self).__setattr__('node', node)  # obsolete
+        super(Node, self).__setattr__('id', node)
 
-def get_graph_ops(graph: nx.MultiDiGraph):
-    return [Node(graph, node) for node in graph.nodes() if Node(graph, node).soft_get('kind') == 'op']
-
-
-def dict_includes_compare_attrs(attr, attr_probe):
-    if callable(attr_probe) and not isinstance(attr_probe, type):
-        return attr_probe(attr)
-    else:
-        return attr == attr_probe
-
-def dict_includes(big: dict, sub_dict: dict):
-    ''' Searches attributes from sub_dict in big and ensures that all values match.
-
-        Entries in sub_dict can be of two types: callable or not callable. If callable is specified
-        it is treated as probing function for attribute value from big dictionary by callable(attr) expression.
-        If it is not callable, the values are compared with == operator.
-    '''
-    return all(
-        dict_includes_compare_attrs(big.get(attr, None), sub_dict[attr])
-        for attr in sub_dict.keys()
-    )
-
-
-class NodeWrap:
-
-    def __init__(self, graph: nx.MultiDiGraph, node: str):
-        super(NodeWrap, self).__setattr__('graph', graph)
-        super(NodeWrap, self).__setattr__('node', node)  # obsolete
-        super(NodeWrap, self).__setattr__('id', node)
+    def __str__(self, max_length: int = 100):
+        node_dict = self.graph.node[self.id]
+        print_dict = {k: v if k != 'value' else shrink_str_value(v, max_symbols=max_length) for k, v in node_dict.items()}
+        return str(print_dict)
 
     def __setattr__(self, k, v):
         # you can assign only existing attributes
@@ -266,6 +55,61 @@ class NodeWrap:
         # hope it raises AttributeError if k is not in the dict
         return self.graph.node[self.node][k]
 
+    def __getitem__(self, k):
+        return self.graph.node[self.node][k]
+
+    def __setitem__(self, k, v):
+        self.graph.node[self.node][k] = v
+
+    def __contains__(self, k):
+        return self.has(k)
+
+    def add_input_port(self, idx):
+        if not self.has_valid('_in_ports'):
+            Node(self.graph, self.id)['_in_ports'] = set()
+        if idx in self.in_ports():
+            raise Error("Input port with {} index already exists for {} node.".format(idx, self.name))
+        self._in_ports.add(idx)
+
+    def add_output_port(self, idx):
+        if not self.has_valid('_out_ports'):
+            Node(self.graph, self.id)['_out_ports'] = set()
+        if idx in self.out_ports():
+            raise Error("Output port with {} index already exists for {} node.".format(idx, self.name))
+        self._out_ports.add(idx)
+
+    def in_port(self, idx=None) -> Port:
+        if not self.has_valid('_in_ports'):
+            raise Error("Operation {} {} has no _in_ports attribute", self.op, self.name)
+        if idx not in self._in_ports:
+            raise Error("Input port with index {} is not in node {}".format(idx, self.name))
+        return Port(node=self, idx=idx, type='in')
+
+    def in_ports(self):
+        if not self.has_valid('_in_ports'):
+            raise Error("Operation {} {} has no _in_ports attribute", self.op, self.name)
+        return dict_to_ordered_dict({idx: self.in_port(idx) for idx in self._in_ports})
+
+    def out_port(self, idx=None) -> Port:
+        if not self.has_valid('_out_ports'):
+            raise Error("Operation {} {} has no _out_ports attribute", self.op, self.name)
+        if idx not in self._out_ports:
+            raise Error("Output port with index {} is not in node {}".format(idx, self.name))
+        return Port(node=self, idx=idx, type='out')
+
+    def out_ports(self):
+        if not self.has_valid('_out_ports'):
+            raise Error("Operation {} {} has no _out_ports attribute", self.op, self.name)
+        return dict_to_ordered_dict({idx: self.out_port(idx) for idx in self._out_ports})
+
+    def has_port(self, port_type, idx):
+        assert port_type in ['in', 'out'], "Invalid usage of has_port method"
+
+        if port_type == 'in':
+            return self.has_valid('_in_ports') and idx in self.in_ports()
+        else:
+            return self.has_valid('_out_ports') and idx in self.out_ports()
+
     def attrs(self):
         return self.graph.node[self.node]
 
@@ -278,55 +122,50 @@ class NodeWrap:
     def has_and_set(self, k):
         return self.has_valid(k) and self[k]
 
-    def __getitem__(self, k):
-        return self.graph.node[self.node][k]
-
-    def __setitem__(self, k, v):
-        self.graph.node[self.node][k] = v
-
-    def __contains__(self, k):
-        return self.has(k)
-
     def in_nodes_edges(self, control_flow: bool=False):
-        return {x[1]['in']: (NodeWrap(self.graph, x[0]), x[1]) for x in get_inputs(self.graph, self.node, control_flow=control_flow)}
+        return dict_to_ordered_dict({x[1]['in']: (Node(self.graph, x[0]), x[1]) for x in
+                                     self.get_inputs(control_flow=control_flow)})
 
     def in_nodes(self, control_flow: bool=False):
-        assert self.has('kind')
-        assert self.kind in ['op', 'data']
+        assert self.has('kind')  # TODO: remove as it always exists
+        assert self.kind in ['op', 'data'] # TODO: remove as it always exists
         if self.kind == 'op':
-            return {x[1]['in']: NodeWrap(self.graph, x[0]) for x in get_inputs(self.graph, self.node, control_flow=control_flow)}
+            return dict_to_ordered_dict({x[1]['in']: Node(self.graph, x[0]) for x in
+                                         self.get_inputs(control_flow=control_flow)})
         elif self.kind == 'data':
-            return [NodeWrap(self.graph, n) for n, d in get_inputs(self.graph, self.node, control_flow=control_flow)]
+            return [Node(self.graph, n) for n, d in self.get_inputs(control_flow=control_flow)]
+
+    def in_node(self, key=0, control_flow: bool=False):
+        return self.in_nodes(control_flow=control_flow)[key]
 
     def in_edges(self, control_flow: bool=False):
         assert self.has('kind')
         assert self.kind in ['op', 'data']
         if self.kind == 'op':
-            return {x[1]['in']: x[1] for x in get_inputs(self.graph, self.node, control_flow=control_flow)}
+            return dict_to_ordered_dict({x[1]['in']: x[1] for x in self.get_inputs(control_flow=control_flow)})
         elif self.kind == 'data':
-            return [d for n, d in get_inputs(self.graph, self.node, control_flow=control_flow)]
+            return [d for n, d in self.get_inputs(control_flow=control_flow)]
 
     def out_nodes_edges(self, control_flow: bool=False):
-        return {x[1]['out']: (NodeWrap(self.graph, x[0]), x[1]) for x in get_outputs(self.graph, self.node, control_flow=control_flow)}
+        return dict_to_ordered_dict({x[1]['out']: (Node(self.graph, x[0]), x[1]) for x in
+                                     self.get_outputs(control_flow=control_flow)})
 
     def out_nodes(self, control_flow: bool=False):
         assert self.has('kind')
         assert self.kind in ['op', 'data']
         if self.kind == 'op':
-            return {x[1]['out']: NodeWrap(self.graph, x[0]) for x in get_outputs(self.graph, self.node, control_flow=control_flow)}
+            return dict_to_ordered_dict({x[1]['out']: Node(self.graph, x[0]) for x in
+                                         self.get_outputs(control_flow=control_flow)})
         elif self.kind == 'data':
-            return [NodeWrap(self.graph, n) for n, d in get_outputs(self.graph, self.node, control_flow=control_flow)]
+            return [Node(self.graph, n) for n, d in self.get_outputs(control_flow=control_flow)]
 
     def out_edges(self, control_flow: bool=False):
         assert self.has('kind')
         assert self.kind in ['op', 'data']
         if self.kind == 'op':
-            return {x[1]['out']: x[1] for x in get_outputs(self.graph, self.node, control_flow=control_flow)}
+            return dict_to_ordered_dict({x[1]['out']: x[1] for x in self.get_outputs(control_flow=control_flow)})
         elif self.kind == 'data':
-            return [d for n, d in get_outputs(self.graph, self.node, control_flow=control_flow)]
-
-    def in_node(self, key=0, control_flow: bool=False):
-        return self.in_nodes(control_flow=control_flow)[key]
+            return [d for n, d in self.get_outputs(control_flow=control_flow)]
 
     def out_node(self, key=0, control_flow: bool=False):
         return self.out_nodes(control_flow=control_flow)[key]
@@ -340,32 +179,71 @@ class NodeWrap:
     def get_attrs(self):
         return self.graph.node[self.node]
 
+    def get_inputs(self, edge_attr: dict = None, control_flow: bool = False):
+        if edge_attr is None:
+            edge_attr = {}
+        in_edges = self.graph.in_edges(self.id, data=True)
+        if not control_flow:
+            in_edges = [(u, v, d) for u, v, d in in_edges if 'control_flow_edge' not in d or not d['control_flow_edge']]
+        return [(u, d) for u, v, d in in_edges if all([attr in d and d[attr] == edge_attr[attr] for attr in edge_attr])]
+
+    def get_outputs(self, edge_attr: dict = None, control_flow: bool = False):
+        if edge_attr is None:
+            edge_attr = {}
+        out_edges = self.graph.out_edges(self.id, data=True)
+        if not control_flow:
+            out_edges = [(u, v, d) for u, v, d in out_edges if
+                         'control_flow_edge' not in d or not d['control_flow_edge']]
+        return [(v, d) for u, v, d in out_edges if
+                all([attr in d and d[attr] == edge_attr[attr] for attr in edge_attr])]
+
+    def get_sorted_inputs(self, control_flow: bool = False):
+        return sorted([x for x in self.get_inputs(control_flow=control_flow) if 'in' in x[1]],
+                      key=lambda x: x[1]['in'])
+
+    def get_sorted_outputs(self, control_flow: bool = False):
+        return sorted([x for x in self.get_outputs(control_flow=control_flow) if 'out' in x[1]],
+                      key=lambda x: x[1]['out'])
+
     def soft_get(self, k):
         return self[k] if self.has_valid(k) else '<UNKNOWN>'
 
     def edges(self, attrs: dict=None):
-        ''' Get a list of all edges with specified set of attributes.
+        """ Get a single edge with specified set of attributes.
 
+            If none or multiple edges satisfies this criteria, exception is raised
             Edge is represented as tuple (u, v, d), where u is source node,
-            v is destination node and d is edge attributes. The function
-            returns a list of such tuples.
-        '''
+            v is destination node and d is edge attributes.
+        """
         edges = list(self.graph.in_edges([self.id], data=True)) + list(self.graph.out_edges([self.id], data=True))
-        return [(u, v, d) for u,v,d in edges if dict_includes(d, attrs)]
+        return [(u, v, d) for u, v, d in edges if dict_includes(d, attrs)]
 
     def edge(self, attrs: dict=None):
-        ''' Get a single edge with specified set of attributes.
+        """ Get a single edge with specified set of attributes.
 
             If none or multiple edges satisfies this criteria, exception is raised
             Edge is represented as tuple (u, v, d), where u is source node,
             v is destination node and d is edge attributes.
-        '''
+        """
         edges = self.edges(attrs)
         assert len(edges) == 1, 'edges: {}, required attributes: {}'.format(edges, attrs)
         return edges[0]
 
+    def copy_node(self, new_attrs: dict = None, dst_graph=None):
+        ''' Copies node with all attributes (optionally updated) within the same graph or to different graph.'''
+        if new_attrs is None:
+            new_attrs = {}
+        if dst_graph is None:
+            dst_graph = self.graph
+
+        attrs = deepcopy(self.attrs())
+        attrs.update(new_attrs)
+        new_id = dst_graph.unique_id()
+        dst_graph.add_node(new_id, **attrs)
+        return Node(dst_graph, new_id)
+
     def insert_node_with_data_before(self, inp, new_op_class: callable, op_before_params: dict = None,
-                                     infer_current: bool = False):
+                                     infer_current: bool = False, additional_inputs: list = None):
         """
         Inserts operation node with op_before_params and data node before current operation
 
@@ -389,18 +267,26 @@ class NodeWrap:
         new_op_before = new_op_class(graph, op_before_params)
         edge_attrs = deepcopy(graph.get_edge_data(inp.id, node.id)[0])
         graph.remove_edge(inp.id, node.id)
-        new_inp = new_op_before.create_node_with_data([inp], {'name': node.name + cls_name + '/Before'})
+        # form a list of input nodes for a new op node combining new_out and additional_inputs
+        inputs = [inp] + (additional_inputs if additional_inputs else [])
+        new_inp = new_op_before.create_node_with_data(inputs, {'name': node.name + cls_name + '/Before'})
         graph.add_edge(new_inp.id, node.id, **edge_attrs)
         if infer_current:
             node.infer(node)
 
-    def insert_node_with_data_after(self, out, new_op_class: callable, op_after_params: dict = None):
+    def insert_node_with_data_after(self, out, new_op_class: callable, op_after_params: dict = None,
+                                    additional_inputs: list = None):
         """
         Inserts operation node with op_after_params and data node after current operation
 
         :param out: output data node of current node
         :param new_op_class: class of operation that will be inserted after current operation node
         :param op_after_params:  parameters to be added to operation that will be inserted after current operation
+        :param additional_inputs:  other parameters for a new operation node in addition to one that is created
+            at the 'out' placed; new nodes are added after 0-th input
+
+            TODO Allow indexing for input parameters as well as for 'out' data node to explicitly
+                specify ports that are connected to.
 
         Before calling:
         [...] -> Cur_Op -> Cur_Data -> [...]
@@ -421,7 +307,9 @@ class NodeWrap:
         graph.remove_edge(node.id, out.id)
         new_out = Op.create_data_node(graph, node)
         node.infer(node)
-        new_op_after.create_node_with_data([new_out], {'name': node.name + cls_name + '/After'}, data_nodes=out)
+        # form a list of input nodes for a new op node combining new_out and additional_inputs
+        inputs = [new_out] + (additional_inputs if additional_inputs else [])
+        new_op_after.create_node_with_data(inputs, {'name': node.name + cls_name + '/After'}, data_nodes=out)
 
     def bracket_with_different_nodes_with_data(self, inp, out, new_op_class_before: callable,
                                                new_op_class_after: callable,
@@ -469,19 +357,499 @@ class NodeWrap:
                                                     new_op_class_before=new_op_class, new_op_class_after=new_op_class,
                                                     op_before_params=op_before_params, op_after_params=op_after_params)
 
+    def insert_node_after(self, new_node, node_out_port: int = 0):
+        """
+        Insert node 'new_node' after output with index 'node_out_port' of the node 'node'. All consumers of node 'node'
+        output with index 'node_out_port' will be changed to consume node 'new_node'.
+        The function should be used when graph doesn't contain data nodes yet.
+        :param node: node after which new node should be inserted.
+        :param new_node: node to be inserted.
+        :param node_out_port: the output index for the node 'node' to insert
+        :return: None
+        """
+        assert self.graph is new_node.graph
+        assert (len([name for name in self.graph.nodes() if Node(self.graph, name).soft_get('kind') == 'data']) == 0)
 
-class Node(NodeWrap):
-    pass
+        graph = self.graph
+        old_edges = list(graph.out_edges(self.id, data=True, keys=True))
+        # create new edges first and then remove all old edges. This is needed for case when 'node' has several consumers
+        # getting input from 'node_out_port'.
+        # save tuple ("name of the destination edge", "edge key") to be removed
+        node_name_and_edge_key = []
+        for _, dst_name, edge_key, edge_attrs in old_edges:
+            if edge_attrs['out'] == node_out_port:
+                log.debug('Create edge from "{}" to "{}"'.format(new_node.name, dst_name))
+                graph.create_edge(new_node, Node(graph, dst_name), 0, edge_attrs['in'])
+                node_name_and_edge_key.append((dst_name, edge_key))
+        for dst_name, edge_key in node_name_and_edge_key:
+            log.debug('Remove edge from "{}" to "{}"'.format(self.id, dst_name))
+            graph.remove_edge(self.id, dst_name, edge_key)
+        graph.create_edge(self, new_node, node_out_port, 0, {})
+
+    def replace_node(self, new_node, new_node_out_port: int=None):
+        """
+        Replaces node 'old_node' with a node 'new_node' preserving edge attributes.
+        :param old_node: node to be replaced.
+        :param new_node: node to replace with.
+        :return: None
+        """
+        assert self.graph is new_node.graph
+        assert self.id != new_node.id, "New node and replaceable node are the same"
+        graph = self.graph
+        # save output edges and reconnect them to new node
+        for _, dst_node_name, edge_attrs in graph.out_edges(self.id, data=True):
+            new_edge_attrs = deepcopy(edge_attrs)
+            if new_node_out_port is not None:
+                assert 'out' not in edge_attrs or edge_attrs['out'] == 0, \
+                    'replace_node function can replace old node with a single output port only if new_node_out_port is ' \
+                    'specified'
+                new_edge_attrs.update({'out': new_node_out_port})
+            graph.add_edge(new_node.id, dst_node_name, **new_edge_attrs)
+
+        # if the node for replace is output node then we propagate this attribute to a new node
+        if len(self.out_nodes()) == 1 and self.out_node().has('op') and self.out_node().op == 'OpOutput':
+            graph.remove_node(self.out_node().id)
+            add_opoutput(graph, new_node.id, 0, False)
+        graph.remove_node(self.id)
+
+    def input_ports_with(self, node):
+        """
+        Returns a list of integers that specify input ports that connected to a given node.
+        :param node: node in the graph that is expected to appear at input port for self node
+        :return: a list of integers with port indices that are connected to self node
+        """
+        return [i for i in range(len(self.in_nodes())) if self.in_node(i).id == node.id]
+
+class Graph(nx.MultiDiGraph):
+    def __init__(self, data=None, **attr):
+        self.stage = None
+        super().__init__(data, **attr)
+
+    unique_id_count = 0
+
+    # SAFE API DESCRIPTION
+    # all provided methods below are designed to be more safe and convenient
+    # be careful while using other methods from nx.MultiDiGraph
+
+    def add_node(self, node_for_adding, **attrs):
+        # TODO: check required attrs for node
+        super().add_node(node_for_adding, **attrs)
+        node = Node(self, node_for_adding)
+
+        in_ports_count = node.in_ports_count if node.has_valid('in_ports_count') else None
+        out_ports_count = node.out_ports_count if node.has_valid('out_ports_count') else None
+
+        node['_in_ports'] = set()
+        node['_out_ports'] = set()
+
+        if in_ports_count is not None:
+            for idx in range(in_ports_count):
+                node.add_input_port(idx=idx)
+
+        if out_ports_count is not None:
+            for idx in range(out_ports_count):
+                node.add_output_port(idx=idx)
+
+    def add_edge(self, u_for_edge, v_for_edge, key=None, **attr):
+        return super().add_edge(u_for_edge, v_for_edge, key=key, **attr)
+
+    def add_edges_from(self, ebunch_to_add, **attr):
+        for e in ebunch_to_add:
+            ne = len(e)
+            if ne == 4:
+                u, v, key, dd = e
+            elif ne == 3:
+                u, v, dd = e
+                key = None
+            elif ne == 2:
+                u, v = e
+                dd = {}
+                key = None
+            else:
+                raise Error("Edge tuple %s must be a 2-tuple, 3-tuple or 4-tuple." % (e,))
+            ddd = attr.copy()
+            ddd.update(dd)
+            self.add_edge(u, v, key=key, **ddd)
 
+    def remove_edge(self, u, v, key=None):
+        return super().remove_edge(u, v, key=key)
 
-def get_sorted_inputs(node: Node, control_flow: bool=False):
-    return sorted([x for x in get_inputs(node.graph, node.node, control_flow=control_flow) if 'in' in x[1]], key=lambda x: x[1]['in'])
+    def erase_node(self, node: Node):
+        """
+        Erases node from the graph and reconnect edges from input node(s) to output node(s)
+        Produces assertion error if the node being removed has multiple inputs or outputs.
+        The function can be used in the front phase only (when there are no data nodes in the graph).
+        :param node: Node to erase
+        """
+        node_id = node.id
+
+        inputs = list(self.in_edges(node_id, data=True))
+        outputs = list(self.out_edges(node_id, data=True))
+
+        assert node.kind == 'op' and (len(node.out_nodes()) == 0 or list(node.out_nodes().values())[0].kind != 'data'), \
+            "The function must be used before the partial infer when graph doesn't contain data nodes."
+        assert len(node.out_nodes()) <= 1, "The node {} must produce just one output tensor".format(
+            node.soft_get('name'))
+        assert len(inputs) <= 1, "The node {} must have just one input".format(node.soft_get('name'))
+
+        if len(outputs) == 0 and len(inputs) != 0:
+            from mo.front.extractor import add_output_ops
+            input_ids = {input_node_id: {'port': {'out': [attrs['out']]}} for input_node_id, _, attrs in inputs}
+            if node.has('op') and node.op == 'OpOutput':
+                add_output_ops(self, input_ids)
+
+        if len(outputs) == 0 or len(inputs) == 0:
+            self.remove_node(node_id)
+            return
+
+        input_node_id = inputs[0][0]
+        for src, dst, attrs in outputs:
+            self.remove_edge(src, dst)
+            # update the 'out' attribute of the edge from the node being removed
+            attrs['out'] = inputs[0][2]['out']
+            self.add_edge(input_node_id, dst, **attrs)
+        self.remove_node(node_id)
+
+    def get_edge_data(self, u, v, key=None, default=None):
+        return super().get_edge_data(u, v, key=key, default=default)
+
+    def get_inputs_with_ports(self, match, pattern_edges, input_names_in_pattern):
+        """
+        Front replacements of multi-input nodes should specify output port to add_node-like functions
+        This function is a helper to get such information out of matched nodes
+        :param graph: graph to operate on
+        :param match: dictionary returned by matching function
+        :param pattern_edges: edges that are specified in pattern
+        :param input_names_in_pattern: names of matched nodes as they were specified in pattern that should be in
+        resulting list
+        :return: list of tuples of node and output port
+        """
+        inputs = []
+        for name in input_names_in_pattern:
+            assert name in match, "node named {} not in match {}".format(name, match)
+            src = match[name]
+            dst = []
+            for edge in pattern_edges:
+                if edge[0] == name:
+                    assert edge[1] in match, "name from pattern_edges {} not in match {}".format(edge[1], match)
+                    dst.append(match[edge[1]])
+            if len(dst) != 1:
+                raise Error('Multiple output ports detected for node {} as {} in pattern'.format(match[name].id, name))
+            dst = dst[0]
+            out_port = self.get_edge_data(src.id, dst.id)[0]['out']
+            inputs.append((src, out_port))
+        return inputs
+
+    def get_node_id_by_name(self, name: str):
+        for node in self.nodes():
+            if 'name' in self.node[node] and self.node[node]['name'] == name:
+                return node
+        raise Error('No node with name {}. ' +
+                    refer_to_faq_msg(51), name)
+
+    def get_op_nodes(self, **attrs):
+        nodes = self.get_nodes_with_attributes(**dict(kind='op', **attrs))
+        return [Node(self, node) for node in nodes]
+
+    def get_data_nodes(self, has_value=None):
+        """
+        Returns list of data nodes.
+        If has_value = True, returns data nodes with value
+        If has_value = False, returns data nodes without value
+        """
+        data_nodes = [Node(self, node) for node in self.nodes() if Node(self, node).soft_get('kind') == 'data']
+        return [node for node in data_nodes if has_value is None or node.has_valid('value') == has_value]
+
+    def get_nodes_with_attributes(self, **attrs: dict):
+        node_attrs = self.nodes(data=True)
+        return [n for n, d in node_attrs if all(a in d.items() for a in attrs.items())]
+
+    def unique_id(self, prefix: str = ""):
+        """
+        Generates a unique node id for a new node in a given graph.
+        The optional string prefix can be specified.
+        """
+        # TODO thread safety?
+        self.unique_id_count = max(self.unique_id_count, self.number_of_nodes()) + 1
+        if prefix and not self.has_node(prefix):
+            return str(prefix)
+        while self.has_node(prefix + str(self.unique_id_count)):
+            self.unique_id_count += 1
+        return prefix + str(self.unique_id_count)
+
+    def check_empty_graph(self, description: str):
+        if len(self.nodes()) <= 1:
+            raise Error(
+                "Graph contains {} node after executing {}. It considered as error because resulting IR will be "
+                "empty which is not usual".format(len(self.nodes()), description))
+
+    def check_shapes_consistency(self):
+        data_nodes = self.get_data_nodes()
+        data_nodes_with_wrong_shapes = []
+        for data_node in data_nodes:
+            if not data_node.has('shape'):
+                data_nodes_with_wrong_shapes.append((data_node.name, "no shape attribute"))
+                continue
+            if data_node.shape is not None and not isinstance(data_node.shape, np.ndarray):
+                data_nodes_with_wrong_shapes.append((data_node.name, type(data_node.shape)))
+        if len(data_nodes_with_wrong_shapes) > 0:
+            raise Error("Graph contains data nodes ({}) with inconsistent shapes: {}".format(
+                len(data_nodes_with_wrong_shapes),
+                data_nodes_with_wrong_shapes
+            ))
+
+    def check_nodes_ports_are_consecutive(self):
+        # Check that all operation nodes has consecutive ports indexes
+        op_nodes = self.get_op_nodes()
+        for node in op_nodes:
+            for idx in range(len(node.in_ports())):
+                if idx not in node.in_ports():
+                    raise Error("Node {} has not consecutive in ports indexes: {}".format(node.name,
+                                                                                          list(node.in_ports().keys())))
+            for idx in range(len(node.out_ports())):
+                if idx not in node.out_ports():
+                    raise Error("Node {} has not consecutive out ports indexes: {}".format(node.name,
+                                                                                           list(node.out_ports().keys())))
+
+    def dump_graph_for_graphviz(self, node_attrs: list = ['kind', 'op', 'shape'],
+                                edge_attrs: list = ['in', 'out'],
+                                nodes_to_dump: list = None, save_to_svg=False):
+        log.debug("---- GRAPHVIZ OUTPUT STARTS ----")
+        if nodes_to_dump is None:
+            nodes_to_dump = self.nodes()
+        string = '\ndigraph {\n'
+        visited_nodes = set()
+        for src_node_name, dst_node_name, attrs in self.edges(data=True):
+            visited_nodes.add(src_node_name)
+            visited_nodes.add(dst_node_name)
+            if src_node_name not in nodes_to_dump or dst_node_name not in nodes_to_dump:
+                continue
+            src_node = self.node[src_node_name]
+            dst_node = self.node[dst_node_name]
+            src_node_string = str(src_node_name) + '\\n' + '\\n'.join(
+                [str(key) + '=' + str(src_node.get(key, 'None')) for key in node_attrs if key in src_node])
+            dst_node_string = str(dst_node_name) + '\\n' + '\\n'.join(
+                [str(key) + '=' + str(dst_node.get(key, 'None')) for key in node_attrs if key in dst_node])
+            edge_string = ' '.join([str(key) + '=' + str(attrs.get(key, 'None')) for key in edge_attrs if key in attrs])
+            string += '"{}" -> "{}" [label = "{}"];\n'.format(src_node_string, dst_node_string, edge_string)
+        for node in nodes_to_dump:
+            if node not in visited_nodes:
+                string += '"{}"'.format(node)  # TODO: add attributes like it was done in the loop above
+                visited_nodes.add(node)
+        string += '}'
+        log.debug(string)
+        log.debug("---- GRAPHVIZ OUTPUT ENDS ----")
+
+        if save_to_svg:
+            try:
+                import graphviz
+                import os
+                file_name = "{}_{}.txt".format(self.name.replace('/', '_'), 0)
+                id = 1
+                while os.path.exists(file_name):
+                    file_name = "{}_{}.txt".format(self.name.replace('/', '_'), id)
+                    id += 1
+                with open(file_name, "w") as f:
+                    f.write(string)
+                graphviz.render('dot', 'svg', file_name)
+                print('Graph was saved to {}.{}'.format(file_name, 'svg'))
+            except ImportError:
+                raise ImportError('Can\'t import graphviz')
+            except Exception as e:
+                raise Error('Can\'t save graph to svg') from e
+
+        return string
+
+    def print_graph_stat(self):
+        log.debug('Number of nodes in graph: {}'.format(self.number_of_nodes()))
+        log.debug('Number of edges in graph: {}'.format(len(list(self.edges()))))
+        ops = collections.defaultdict(int)
+        for _node in self.nodes():
+            node = Node(self, _node)
+            kind = node.kind if node.has('kind') else '<UNDEFINED>'
+            if node.has('op'):
+                ops['op/' + node.op] += 1
+            else:
+                ops[kind] += 1
+            if node.has('shape') and np.any(node.shape == 0):
+                log.error("Found bad shape: '{}' for node '{}'".format(node.shape, node.node))
+        for k, v in ops.items():
+            log.debug('   {} : {}'.format(k, v))
+
+    def create_sub_graph_copy(self, nodes_to_extract: list):
+        """
+        Create new graph which is a sub-graph of the 'graph' that contains just nodes from 'nodes_to_extract' list. The
+        returned sub-graph is a deep copy of the provided graph nodes.
+        :param graph: graph to create a sub-graph from.
+        :param nodes_to_extract: list of node names to extract.
+        :return: new graph.
+        """
+        return self.subgraph(nodes_to_extract).copy()
+
+    def create_edge(self, src_node: Node, dst_node: Node, out_port: int = 0, in_port: int = 0, edge_attrs: dict = None):
+        """
+        Creates edge from node 'src_node' from output with index 'out_port' to node 'dst_node' with input index 'in_port'.
+        :param src_node: node to create edge from.
+        :param dst_node: node to create edge to.
+        :param out_port: the index of output tensor of the 'src_node'.
+        :param in_port: the input index of the node 'dst_node'.
+        :param edge_attrs: dictionary with edge attrs.
+        :return: None
+        """
+        # edges must belong to the same graph
+        assert src_node.graph is dst_node.graph
+        graph = src_node.graph
+
+        if edge_attrs is None:
+            edge_attrs = dict()
+        else:
+            edge_attrs = edge_attrs.copy()
+        edge_attrs.update(
+            {'in': in_port, 'out': out_port, 'in_attrs': ['in', 'permutation'], 'out_attrs': ['out', 'permutation'],
+             'data_attrs': ['fw_tensor_debug_info']})
+
+        # TODO: in case if in_port do not exists, we should raise an Exception here
+        graph.add_edges_from([(src_node.id, dst_node.id, edge_attrs)])
+
+
+def create_graph_with_nodes(src_nodes, get_id: callable, get_attrs: callable):
+    """
+    Go over all nodes in src_nodes that should be enumerable and create new NX nodes
+    using get_id and get_attrs functions to create node id and node attributes correspondingly.
+    """
+    graph = Graph()
+    for node in src_nodes:
+        graph.add_node(get_id(node), **get_attrs(node))
+    return graph
+
+
+def dict_includes_compare_attrs(attr, attr_probe):
+    if callable(attr_probe) and not isinstance(attr_probe, type):
+        return attr_probe(attr)
+    else:
+        return attr == attr_probe
+
+
+def dict_includes(big: dict, sub_dict: dict, skip_attr_names=[]):
+    """ Searches attributes from sub_dict in big and ensures that all values match.
+
+        Entries in sub_dict can be of two types: callable or not callable. If callable is specified
+        it is treated as probing function for attribute value from big dictionary by callable(attr) expression.
+        If it is not callable, the values are compared with == operator.
+    """
+    return all(
+        dict_includes_compare_attrs(big.get(attr, None), sub_dict[attr])
+        for attr in sub_dict.keys() if attr not in skip_attr_names
+    )
+
+
+def add_opoutput(graph: Graph, node_name: str, port: int, cut: bool = True):
+    """
+    Creates and connects OpOutput node to node_name port. Cuts existing port if requested.
+    :param graph: graph to operate with
+    :param node_name: name of existing node in the graph that we want to add OpOutput to
+    :param port: output port of node to connect OpOutput to
+    :param cut: determines way of operating with edge specified by node_name and port
+    """
+    # we import it here because Op imports add_attrs_props and update_ie_fields from this file
+    from mo.ops.output import Output
+    node = Node(graph, node_name)
+    if cut and len(node.out_edges()) != 0:
+        opoutput_node = Output(graph).create_node_on_port(node, port, {'name': node_name + '/sink_port_' + str(port)})
+    else:
+        opoutput_node = Output(graph).create_node([(node, port)], {'name': node_name + '/sink_port_' + str(port)})
+        opoutput_node.in_edge()['data_attrs'] = ['fw_tensor_debug_info']
+        opoutput_node.in_edge()['fw_tensor_debug_info'] = [(node_name, port)]
+    log.debug('Sink: {} for node {}'.format(opoutput_node.id, node_name))
+    log.debug(str(graph.node[opoutput_node.id]))
+    log.debug("Add edge from {} to {}".format(node_name, opoutput_node.id))
+    return opoutput_node.id
+
+
+# TODO implement merging for keys with dictionary values?
+def merge_edge_props(attrs: dict, additional_attrs: dict):
+    """
+    Update edge attributes without changing 'in' and 'out' keys.
+    It is necessary to copy edge attributes during merging of nodes when
+    result of one subgraph call is passed as input to another subgraph call
+    """
+    result = attrs
+    for (key, value) in additional_attrs.items():
+        if key not in ['in', 'out']:
+            if type(additional_attrs[key]) is list:
+                if key not in result:
+                    result[key] = []
+                result[key].extend(additional_attrs[key])
+                result[key] = list(set(result[key]))  # silly solution to find unique elements
+            else:
+                result[key] = value
+    return result
+
+
+# All functions below are deprecated and will be removed in next release
+# Please, use methods from Graph/Node classes instead
+
+
+@deprecated_api(Graph)
+def get_node_id_by_name(graph: Graph, name: str):
+    return graph.get_node_id_by_name(name=name)
+
+
+@deprecated_api(Graph)
+def print_graph_stat(graph: Graph):
+    return graph.print_graph_stat()
+
+
+@deprecated_api(Graph)
+def get_inputs_with_ports(graph: Graph, match, pattern_edges, input_names_in_pattern):
+    """
+    Front replacements of multi-input nodes should specify output port to add_node-like functions
+    This function is a helper to get such information out of matched nodes
+    :param graph: graph to operate on
+    :param match: dictionary returned by matching function
+    :param pattern_edges: edges that are specified in pattern
+    :param input_names_in_pattern: names of matched nodes as they were specified in pattern that should be in
+    resulting list
+    :return: list of tuples of node and output port
+    """
+    return graph.get_inputs_with_ports(match=match,
+                                       pattern_edges=pattern_edges,
+                                       input_names_in_pattern=input_names_in_pattern)
+
+
+@deprecated_api(Graph)
+def dump_graph_for_graphviz(graph: Graph, node_attrs: list = ['kind', 'op', 'shape'],
+                            edge_attrs: list = ['in', 'out'],
+                            nodes_to_dump: list = None, save_to_svg=False):
+    return graph.dump_graph_for_graphviz(node_attrs=node_attrs,
+                                         edge_attrs=edge_attrs,
+                                         nodes_to_dump=nodes_to_dump,
+                                         save_to_svg=save_to_svg)
+
+
+@deprecated_api(Graph)
+def create_sub_graph_copy(graph: Graph, nodes_to_extract: list):
+    """
+    Create new graph which is a sub-graph of the 'graph' that contains just nodes from 'nodes_to_extract' list. The
+    returned sub-graph is a deep copy of the provided graph nodes.
+    :param graph: graph to create a sub-graph from.
+    :param nodes_to_extract: list of node names to extract.
+    :return: new graph.
+    """
+    return graph.create_sub_graph_copy(nodes_to_extract=nodes_to_extract)
 
 
-def get_sorted_outputs(node: Node, control_flow: bool=False):
-    return sorted([x for x in get_outputs(node.graph, node.node, control_flow=control_flow) if 'out' in x[1]], key=lambda x: x[1]['out'])
+@deprecated_api(Graph)
+def get_graph_ops(graph: Graph):
+    return graph.get_op_nodes()
 
 
+@deprecated_api(Graph)
+def check_empty_graph(graph: Graph, description: str):
+    return graph.check_empty_graph(description=description)
+
+
+@deprecated_api(Graph)
 def create_edge(src_node: Node, dst_node: Node, out_port: int = 0, in_port: int = 0, edge_attrs: dict = None):
     """
     Creates edge from node 'src_node' from output with index 'out_port' to node 'dst_node' with input index 'in_port'.
@@ -492,20 +860,35 @@ def create_edge(src_node: Node, dst_node: Node, out_port: int = 0, in_port: int
     :param edge_attrs: dictionary with edge attrs.
     :return: None
     """
-    # edges must belong to the same graph
     assert src_node.graph is dst_node.graph
     graph = src_node.graph
+    return graph.create_edge(src_node=src_node, dst_node=dst_node, out_port=out_port, in_port=in_port,
+                             edge_attrs=edge_attrs)
 
-    if edge_attrs is None:
-        edge_attrs = dict()
-    else:
-        edge_attrs = edge_attrs.copy()
-    edge_attrs.update({'in': in_port, 'out': out_port, 'in_attrs': ['in', 'permutation'], 'out_attrs': ['out', 'permutation'],
-                       'data_attrs': ['fw_tensor_debug_info']})
 
-    graph.add_edges_from([(src_node.id, dst_node.id, edge_attrs)])
+@deprecated_api(Graph)
+def erase_node(node: Node):
+    """
+    Erases node from the graph and reconnect edges from input node(s) to output node(s)
+    Produces assertion error if the node being removed has multiple inputs or outputs.
+    The function can be used in the front phase only (when there are no data nodes in the graph).
+    :param node: Node to erase
+    """
+    graph = node.graph
+    return graph.erase_node(node)
+
+
+@deprecated_api(Node)
+def get_sorted_inputs(node: Node, control_flow: bool = False):
+    return node.get_sorted_inputs(control_flow=control_flow)
 
 
+@deprecated_api(Node)
+def get_sorted_outputs(node: Node, control_flow: bool = False):
+    return node.get_sorted_outputs(control_flow=control_flow)
+
+
+@deprecated_api(Node)
 def insert_node_after(node: Node, new_node: Node, node_out_port: int = 0):
     """
     Insert node 'new_node' after output with index 'node_out_port' of the node 'node'. All consumers of node 'node'
@@ -516,67 +899,10 @@ def insert_node_after(node: Node, new_node: Node, node_out_port: int = 0):
     :param node_out_port: the output index for the node 'node' to insert
     :return: None
     """
-    assert node.graph is new_node.graph
-    assert (len([name for name in node.graph.nodes() if Node(node.graph, name).soft_get('kind') == 'data']) == 0)
-
-    graph = node.graph
-    old_edges = list(graph.out_edges(node.id, data=True, keys=True))
-    # create new edges first and then remove all old edges. This is needed for case when 'node' has several consumers
-    # getting input from 'node_out_port'.
-    # save tuple ("name of the destination edge", "edge key") to be removed
-    node_name_and_edge_key = []
-    for _, dst_name, edge_key, edge_attrs in old_edges:
-        if edge_attrs['out'] == node_out_port:
-            log.debug('Create edge from "{}" to "{}"'.format(new_node.name, dst_name))
-            create_edge(new_node, Node(graph, dst_name), 0, edge_attrs['in'])
-            node_name_and_edge_key.append((dst_name, edge_key))
-    for dst_name, edge_key in node_name_and_edge_key:
-        log.debug('Remove edge from "{}" to "{}"'.format(node.id, dst_name))
-        graph.remove_edge(node.id, dst_name, edge_key)
-    create_edge(node, new_node, node_out_port, 0, {})
-
-
-def erase_node(node: Node):
-    """
-    Erases node from the graph and reconnect edges from input node(s) to output node(s)
-    Produces assertion error if the node being removed has multiple inputs or outputs.
-    The function can be used in the front phase only (when there are no data nodes in the graph).
-    :param node: Node to erase
-    """
-    graph = node.graph
-    node_id = node.id
-
-    inputs = list(graph.in_edges(node_id, data=True))
-    outputs = list(graph.out_edges(node_id, data=True))
-
-    assert node.kind == 'op' and (len(node.out_nodes()) == 0 or list(node.out_nodes().values())[0].kind != 'data'), \
-        "The function must be used before the partial infer when graph doesn't contain data nodes."
-    assert len(node.out_nodes()) <= 1, "The node {} must produce just one output tensor".format(node.soft_get('name'))
-    assert len(inputs) <= 1, "The node {} must have just one input".format(node.soft_get('name'))
-
-    if len(outputs) == 0 and len(inputs) != 0:
-        for input_node_id, _, __ in inputs:
-            if node.has_and_set('is_output'):
-                if graph.node[input_node_id]['kind'] == 'op':
-                    data_nodes = [u for u, v in graph.in_edges(input_node_id)]
-                    for data in data_nodes:
-                        graph.node[data]['is_output'] = graph.node[node_id]['is_output']
-                else:
-                    graph.node[input_node_id]['is_output'] = graph.node[node_id]['is_output']
-
-    if len(outputs) == 0 or len(inputs) == 0:
-        graph.remove_node(node_id)
-        return
-
-    input_node_id = inputs[0][0]
-    for src, dst, attrs in outputs:
-        graph.remove_edge(src, dst)
-        # update the 'out' attribute of the edge from the node being removed
-        attrs['out'] = inputs[0][2]['out']
-        graph.add_edge(input_node_id, dst, **attrs)
-    graph.remove_node(node_id)
+    return node.insert_node_after(new_node=new_node, node_out_port=node_out_port)
 
 
+@deprecated_api(Node)
 def replace_node(old_node: Node, new_node: Node, new_node_out_port: int=None):
     """
     Replaces node 'old_node' with a node 'new_node' preserving edge attributes.
@@ -584,40 +910,20 @@ def replace_node(old_node: Node, new_node: Node, new_node_out_port: int=None):
     :param new_node: node to replace with.
     :return: None
     """
-    assert old_node.graph is new_node.graph
-    graph = old_node.graph
-    # save output edges and reconnect them to new node
-    for _, dst_node_name, edge_attrs in graph.out_edges(old_node.id, data=True):
-        new_edge_attrs = deepcopy(edge_attrs)
-        if new_node_out_port is not None:
-            assert 'out' not in edge_attrs or edge_attrs['out'] == 0, \
-                'replace_node function can replace old node with a single output port only if new_node_out_port is ' \
-                'specified'
-            new_edge_attrs.update({'out': new_node_out_port})
-        graph.add_edge(new_node.id, dst_node_name, **new_edge_attrs)
-
-    # if the node for replace is output node then we propagate this attribute to a new node
-    if old_node.has_valid('is_output') and old_node.is_output:
-        old_node.is_output = False
-        new_node['is_output'] = True
-    graph.remove_node(old_node.id)
-
-
-def check_empty_graph(graph: nx.MultiDiGraph, description: str):
-    if len(graph.nodes()) <= 1:
-        raise Error("Graph contains {} node after executing {}. It considered as error because resulting IR will be "
-                    "empty which is not usual".format(len(graph.nodes()), description))
-
-
-def copy_node(src_node: Node, new_attrs: dict=None, dst_graph: nx.MultiDiGraph=None):
-    ''' Copies node with all attributes (optionally updated) within the same graph or to different graph.'''
-    if new_attrs is None:
-        new_attrs = {}
-    if dst_graph is None:
-        dst_graph = src_node.graph
-
-    attrs = deepcopy(src_node.attrs())
-    attrs.update(new_attrs)
-    new_id = unique_id(dst_graph)
-    dst_graph.add_node(new_id, attrs)
-    return Node(dst_graph, new_id)
+    return old_node.replace_node(new_node=new_node, new_node_out_port=new_node_out_port)
+
+
+@deprecated_api(Node)
+def copy_node(src_node: Node, new_attrs: dict=None, dst_graph: nx.MultiDiGraph = None):
+    """ Copies node with all attributes (optionally updated) within the same graph or to different graph."""
+    return src_node.copy_node(new_attrs=new_attrs, dst_graph=dst_graph)
+
+
+@deprecated_api(Node)
+def get_inputs(graph: Graph, node: str, edge_attr: dict = None, control_flow: bool = False):
+    return Node(graph, node).get_inputs(edge_attr=edge_attr, control_flow=control_flow)
+
+
+@deprecated_api(Node)
+def get_outputs(graph: Graph, node: str, edge_attr: dict = None, control_flow: bool = False):
+    return Node(graph, node).get_outputs(edge_attr=edge_attr, control_flow=control_flow)
diff --git a/model-optimizer/mo/graph/graph_test.py b/model-optimizer/mo/graph/graph_test.py
index 6b5d9905e..91131a65f 100644
--- a/model-optimizer/mo/graph/graph_test.py
+++ b/model-optimizer/mo/graph/graph_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,11 +16,16 @@
 
 import unittest
 
-from mo.graph.graph import erase_node, get_node_id_by_name, Node, replace_node, get_inputs_with_ports
+import numpy as np
+
+from generator import generator, generate
+
+from mo.graph.graph import Node, Graph, add_opoutput
 from mo.ops.const import Const
 from mo.utils.error import Error
 from mo.utils.unittest.graph import build_graph, compare_graphs
 
+
 nodes = {
     '0': {'name': 'input1', 'type': 'Identity', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
     '1': {'name': 'input2', 'type': 'Identity', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
@@ -28,8 +33,7 @@ nodes = {
     '3': {'name': 'node_2', 'type': 'Identity', 'value': None, 'kind': 'op', 'op': 'NotPlaceholder'},
     '4': {'name': 'node_3', 'type': 'Identity', 'value': None, 'kind': 'op', 'op': 'NotPlaceholder'},
     '5': {'name': 'node_4', 'type': 'Identity', 'value': None, 'kind': 'op', 'op': 'NotPlaceholder'},
-    '6': {'name': 'output', 'type': 'Identity', 'value': None, 'kind': 'op', 'op': 'OpOutput',
-          'is_output': True},
+    '6': {'name': 'output', 'value': None, 'kind': 'op', 'op': 'OpOutput'},
     'input_3': {'name': 'input_3', 'type': 'Identity', 'value': None, 'kind': 'op', 'op': 'Placeholder'}
 }
 edges = {
@@ -47,31 +51,31 @@ class TestGetNodeById(unittest.TestCase):
         self.graph = build_graph(nodes, edges)
 
     def test_get_node_id_by_name(self):
-        self.assertEqual(get_node_id_by_name(self.graph, 'input1'), '0')
+        self.assertEqual(self.graph.get_node_id_by_name('input1'), '0')
 
     def test_get_node_id_by_name_1(self):
-        self.assertEqual(get_node_id_by_name(self.graph, 'input2'), '1')
+        self.assertEqual(self.graph.get_node_id_by_name('input2'), '1')
 
     def test_get_node_id_by_name_2(self):
-        self.assertEqual(get_node_id_by_name(self.graph, 'node_1'), '2')
+        self.assertEqual(self.graph.get_node_id_by_name('node_1'), '2')
 
     def test_get_node_id_by_name_3(self):
-        self.assertEqual(get_node_id_by_name(self.graph, 'node_2'), '3')
+        self.assertEqual(self.graph.get_node_id_by_name('node_2'), '3')
 
     def test_get_node_id_by_name_4(self):
-        self.assertEqual(get_node_id_by_name(self.graph, 'node_3'), '4')
+        self.assertEqual(self.graph.get_node_id_by_name('node_3'), '4')
 
     def test_get_node_id_by_name_5(self):
-        self.assertEqual(get_node_id_by_name(self.graph, 'node_4'), '5')
+        self.assertEqual(self.graph.get_node_id_by_name('node_4'), '5')
 
     def test_get_node_id_by_name_6(self):
-        self.assertEqual(get_node_id_by_name(self.graph, 'output'), '6')
+        self.assertEqual(self.graph.get_node_id_by_name('output'), '6')
 
     def test_get_node_id_by_name_7(self):
-        self.assertEqual(get_node_id_by_name(self.graph, 'input_3'), 'input_3')
+        self.assertEqual(self.graph.get_node_id_by_name('input_3'), 'input_3')
 
     def test_get_node_id_by_name_8(self):
-        self.assertRaises(Error, get_node_id_by_name, self.graph, '1')
+        self.assertRaises(Error, self.graph.get_node_id_by_name, '1')
 
 
 class TestEraseNode(unittest.TestCase):
@@ -89,7 +93,7 @@ class TestEraseNode(unittest.TestCase):
         self.assertEqual(len(graph.edges()), 2)
         self.assertListEqual(list(graph.out_edges('input')), [('input', 'noop')])
 
-        erase_node(Node(graph, 'noop'))
+        graph.erase_node(Node(graph, 'noop'))
 
         self.assertEqual(len(graph.nodes()), 2)
         self.assertEqual(len(graph.edges()), 1)
@@ -121,7 +125,7 @@ class TestEraseNode(unittest.TestCase):
              ('input', 'output_3', {'in': 10, 'out': 0})],
             nodes_with_edges_only=True)
 
-        erase_node(Node(graph, 'noop'))
+        graph.erase_node(Node(graph, 'noop'))
 
         compare_graphs(graph, ref_graph, 'output_1')
 
@@ -151,7 +155,7 @@ class TestEraseNode(unittest.TestCase):
              ('input', 'output_3', {'in': 10, 'out': 0})],
             nodes_with_edges_only=True)
 
-        erase_node(Node(graph, 'noop'))
+        graph.erase_node(Node(graph, 'noop'))
 
         compare_graphs(graph, ref_graph, 'output_1')
 
@@ -169,7 +173,7 @@ class TestEraseNode(unittest.TestCase):
              ('noop', 'output_2', {'in': 2, 'out': 1}),
              ('noop', 'output_3', {'in': 10, 'out': 0})])
 
-        self.assertRaises(AssertionError, erase_node, Node(graph, 'noop'))
+        self.assertRaises(AssertionError, graph.erase_node, Node(graph, 'noop'))
 
     def test_remove_noop_nodes_front(self):
         graph = build_graph(
@@ -184,7 +188,7 @@ class TestEraseNode(unittest.TestCase):
         self.assertEqual(len(graph.edges()), 1)
         self.assertListEqual(list(graph.out_edges('noop')), [('noop', 'output')])
 
-        erase_node(Node(graph, 'noop'))
+        graph.erase_node(Node(graph, 'noop'))
 
         self.assertEqual(len(graph.nodes()), 1)
         self.assertEqual(len(graph.edges()), 0)
@@ -203,21 +207,20 @@ class TestEraseNode(unittest.TestCase):
         self.assertEqual(len(graph.edges()), 1)
         self.assertListEqual(list(graph.in_edges('noop')), [('input', 'noop')])
 
-        erase_node(Node(graph, 'noop'))
+        graph.erase_node(Node(graph, 'noop'))
 
         self.assertEqual(len(graph.nodes()), 1)
         self.assertEqual(len(graph.edges()), 0)
         self.assertEqual(len(graph.in_edges('input')), 0)
 
     def test_remove_noop_nodes_noop_only(self):
-        import networkx as nx
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_node('noop', **{'type': 'NoOp', 'value': None, 'kind': 'op'})
 
         self.assertEqual(len(graph.nodes()), 1)
         self.assertEqual(len(graph.edges()), 0)
 
-        erase_node(Node(graph, 'noop'))
+        graph.erase_node(Node(graph, 'noop'))
 
         self.assertEqual(len(graph.nodes()), 0)
         self.assertEqual(len(graph.edges()), 0)
@@ -239,7 +242,7 @@ class TestEraseNode(unittest.TestCase):
              ('noop', 'output_1'),
              ('noop', 'output_2'),
              ('noop', 'output_3')])
-        self.assertRaises(AssertionError, erase_node, Node(graph, 'noop'))
+        self.assertRaises(AssertionError, graph.erase_node, Node(graph, 'noop'))
 
 
 class TestReplaceNode(unittest.TestCase):
@@ -248,20 +251,22 @@ class TestReplaceNode(unittest.TestCase):
             {
                 'input_1': {'type': 'Placeholder', 'value': None, 'kind': 'op'},
                 'input_2': {'type': 'Placeholder', 'value': None, 'kind': 'op'},
-                'old': {'type': 'Identity', 'value': None, 'kind': 'op', 'is_output': True},
-                'output': {'type': 'OpOutput', 'value': None, 'kind': 'op'},
+                'old': {'type': 'Identity', 'value': None, 'kind': 'op'},
+                'output': {'op': 'OpOutput', 'value': None, 'kind': 'op'},
             },
             [('input_1', 'old'),
              ('input_2', 'old'),
              ('old', 'output')])
 
         new_node = Const(graph, {'name': 'new'}).create_node([Node(graph, 'input_1'), Node(graph, 'input_2')])
-        replace_node(Node(graph, 'old'), new_node)
+
+        old_node = Node(graph, 'old')
+        old_node.replace_node(new_node)
 
         self.assertEqual(len(graph.nodes()), 4)
         self.assertEqual(len(graph.edges()), 3)
-        self.assertEqual(new_node['is_output'], True)
-        self.assertListEqual(list(graph.out_edges('new')), [('new', 'output')])
+        self.assertEqual(new_node.out_node().op, 'OpOutput')
+        self.assertEqual(len(graph.out_edges('new')), 1)
 
     def test_replace_node_several_consumers(self):
         graph = build_graph(
@@ -281,7 +286,7 @@ class TestReplaceNode(unittest.TestCase):
              ])
 
         new_node = Const(graph, {'name': 'new'}).create_node([Node(graph, 'input_1'), Node(graph, 'input_2')])
-        replace_node(Node(graph, 'old'), new_node)
+        Node(graph, 'old').replace_node(new_node)
 
         self.assertEqual(len(graph.nodes()), 6)
         self.assertEqual(len(graph.edges()), 5)
@@ -319,6 +324,1154 @@ class GetNodesWithPorts(unittest.TestCase):
 
         }
         input_names_in_pattern = ['one', 'three']
-        result = get_inputs_with_ports(graph=graph, match=match, pattern_edges=edges,
+        result = graph.get_inputs_with_ports(match=match, pattern_edges=edges,
                                        input_names_in_pattern=input_names_in_pattern)
         self.assertListEqual([(match['one'], 0), (match['three'], 0)], result)
+
+
+class TestGraphShapeChecker(unittest.TestCase):
+    nodes = {
+        '0': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '0_data': {'value': None, 'shape': None, 'kind': 'data'},
+
+        '1': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '1_data': {'value': None, 'shape': None, 'kind': 'data'},
+
+        '2': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '2_data': {'value': None, 'shape': None, 'kind': 'data'},
+    }
+
+    def test_check_shape_consistency_1(self):
+        # No shape attr in data node
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2')
+        ])
+
+        del graph.node['2_data']['shape']
+
+        with self.assertRaisesRegex(Error, "Graph contains data nodes \(1\) with inconsistent shapes:.*"):
+            graph.check_shapes_consistency()
+
+    def test_check_shape_consistency_2(self):
+        # No shape attr in data node
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2')
+        ])
+
+        graph.node['1_data']['shape'] = (1, 2, 3)
+        graph.node['2_data']['shape'] = (1, 2, 3)
+
+        with self.assertRaisesRegex(Error, "Graph contains data nodes \(2\) with inconsistent shapes:.*"):
+            graph.check_shapes_consistency()
+
+
+@generator
+class TestGraphPortsChecker(unittest.TestCase):
+    nodes = {
+        '0': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '0_data': {'value': None, 'shape': None, 'kind': 'data'},
+
+        '1': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '1_data': {'value': None, 'shape': None, 'kind': 'data'},
+
+        '2': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '2_data': {'value': None, 'shape': None, 'kind': 'data'},
+
+        '3': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '3_data': {'value': None, 'shape': None, 'kind': 'data'},
+    }
+
+    @generate(*[('0', 'in', 1), ('0', 'out', 2), ('1', 'in', 2), ('3', 'out', 2)])
+    def test_check_shape_consistency_1(self, node_id: str, port_type: str, port_idx: int):
+        #
+        #               ,->2-->2_data---,->3-->3_data
+        #   0-->0_data-/-->1-->1_data--/
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('3', '3_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2'),
+            ('1_data', '3'),
+            ('2_data', '3'),
+        ])
+
+        node = Node(graph, node_id)
+        if port_type == 'in':
+            node.add_input_port(idx=port_idx)
+        else:
+            node.add_output_port(idx=port_idx)
+
+        with self.assertRaisesRegex(Error, "Node {} has not consecutive {} ports indexes:.*".format(node_id,
+                                                                                                    port_type)):
+            graph.check_nodes_ports_are_consecutive()
+
+
+class TestNewGraphAPIMiddle(unittest.TestCase):
+
+    nodes = {
+        '0': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '0_data': {'value': None, 'shape': None, 'kind': 'data'},
+
+        '1': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '1_data': {'value': None, 'shape': None, 'kind': 'data'},
+
+        '2': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '2_data': {'value': None, 'shape': None, 'kind': 'data'},
+
+        '3': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '3_data': {'value': None, 'shape': None, 'kind': 'data'},
+
+        '4': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '4_data': {'value': None, 'shape': None, 'kind': 'data'},
+
+        'const_1': {'type': 'Const', 'value': None, 'kind': 'op', 'op': 'Const'},
+        'const_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+    }
+
+      ###########################################
+     ###### TESTS FOR PORT CLASS METHODS #######
+    ###########################################
+
+    def test_port_get_destinations_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2')
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0_out_port = Node(graph, '0').out_port(0)
+
+        node_1_in_port = Node(graph, '1').in_port(0)
+        node_2_in_port = Node(graph, '2').in_port(0)
+
+        ports = node_0_out_port.get_destinations()
+
+        self.assertTrue(len(ports) == 2)
+        for port in ports:
+            self.assertTrue(port in [node_1_in_port, node_2_in_port])
+
+    def test_port_get_destination_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2')
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0_out_port = Node(graph, '0').out_port(0)
+
+        node_1_in_port = Node(graph, '1').in_port(0)
+        node_2_in_port = Node(graph, '2').in_port(0)
+
+        with self.assertRaises(Error):
+            node_0_out_port.get_destination()
+
+    def test_port_get_destination_2(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('0_data', '1'),
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0_out_port = Node(graph, '0').out_port(0)
+
+        node_1_in_port = Node(graph, '1').in_port(0)
+
+        self.assertEqual(node_0_out_port.get_destination(), node_1_in_port)
+
+    def test_port_get_source_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('0_data', '1'),
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0_out_port = Node(graph, '0').out_port(0)
+
+        node_1_in_port = Node(graph, '1').in_port(0)
+
+        self.assertEqual(node_1_in_port.get_source(), node_0_out_port)
+
+    def test_port_get_source_2(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('0_data', '1'),
+            ('2_data', '1')
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        self.assertEqual(node_1.in_port(0).get_source(), node_0.out_port(0))
+        self.assertEqual(node_1.in_port(1).get_source(), node_2.out_port(0))
+
+    def test_port_get_source_3(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_0.add_input_port(0)
+        node_1.add_input_port(0)
+        node_2.add_input_port(0)
+
+        self.assertEqual(node_0.in_port(0).get_source(), None)
+        self.assertEqual(node_1.in_port(0).get_source(), None)
+        self.assertEqual(node_2.in_port(0).get_source(), None)
+
+    def test_port_disconnect_1(self):
+        #              ,-->1-->1_data                   0-->0_data
+        #   0-->0_data/--->2-->2_data  ==> 0-->0_data   1-->1_data
+        #                                               2-->2_data
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('0_data', '1'),
+            ('0_data', '2')
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_0.out_port(0).disconnect()
+
+        self.assertEqual(node_1.in_port(0).get_source(), None)
+        self.assertEqual(node_2.in_port(0).get_source(), None)
+
+        self.assertTrue(len(node_1.in_nodes()) == 0)
+        self.assertTrue(len(node_2.in_nodes()) == 0)
+
+    def test_port_disconnect_2(self):
+        #              ,-->1-->1_data                 ,-->1-->1_data
+        #   0-->0_data/--->2-->2_data  ==> 0-->0_data/    2-->2_data
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('0_data', '1'),
+            ('0_data', '2')
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_2.in_port(0).disconnect()
+
+        self.assertEqual(node_0.out_port(0).get_destination(), node_1.in_port(0))
+        self.assertEqual(node_1.in_port(0).get_source(), node_0.out_port(0))
+        self.assertEqual(node_2.out_port(0).get_destination(), None)
+        self.assertEqual(node_2.in_port(0).get_source(), None)
+
+        self.assertTrue(len(node_0.out_nodes()) == 1)
+        self.assertTrue(len(node_1.in_nodes()) == 1)
+        self.assertTrue(len(node_2.in_nodes()) == 0)
+
+    def test_port_disconnect_3(self):
+        #   1-->1_data---\                 1-->1_data
+        #   0-->0_data---->2-->2_data  ==> 0-->0_data-->2-->2_data
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('0_data', '2'),
+            ('1_data', '2')
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_2.in_port(1).disconnect()
+
+        self.assertEqual(node_0.out_port(0).get_destination(), node_2.in_port(0))
+        self.assertEqual(node_2.in_port(0).get_source(), node_0.out_port(0))
+        self.assertEqual(node_1.out_port(0).get_destination(), None)
+
+        self.assertTrue(len(node_0.out_nodes()) == 1)
+        self.assertTrue(len(node_1.in_nodes()) == 0)
+        self.assertTrue(len(node_2.in_nodes()) == 1)
+
+    def test_port_disconnect_4(self):
+        #   1-->1_data---\                 0-->0_data
+        #   0-->0_data---->2-->2_data  ==> 1-->1_data-->2-->2_data
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('0_data', '2'),
+            ('1_data', '2')
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_2.in_port(0).disconnect()
+
+        self.assertEqual(node_1.out_port(0).get_destination(), node_2.in_port(1))
+        self.assertEqual(node_2.in_port(1).get_source(), node_1.out_port(0))
+        self.assertEqual(node_2.in_port(0).get_source(), None)
+        self.assertEqual(node_0.out_port(0).get_destination(), None)
+        #
+        # self.assertTrue(len(node_0.out_nodes()) == 1)
+        # self.assertTrue(len(node_1.in_nodes()) == 0)
+        # self.assertTrue(len(node_2.in_nodes()) == 1)
+
+      ###########################################
+     ### TESTS FOR CONNECTION CLASS METHODS ####
+    ###########################################
+
+    def test_connection_set_source_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('3', '3_data'),
+            ('4', '4_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2'),
+            ('3_data', '4'),
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+        node_3 = Node(graph, '3')
+        node_4 = Node(graph, '4')
+
+        c = node_0.out_port(0).get_connection()
+        c.set_source(node_3.out_port(0))
+
+        self.assertTrue(node_0.out_node().kind == 'data')
+
+        self.assertEqual(node_0.out_port(0).get_destinations(), [])
+        destinations = node_3.out_port(0).get_destinations()
+        for port in destinations:
+            self.assertTrue(port in [node_1.in_port(0), node_2.in_port(0), node_4.in_port(0)])
+
+    def test_connection_set_source_2(self):
+        # 2-->2_data                                  ,->2-->2_data
+        # 0-->0_data-->1-->1_data   ==>    0-->0_data/-->1-->1_data
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+
+            ('0_data', '1'),
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_2 = Node(graph, '2')
+        node_2.add_input_port(0)
+
+        node_2.in_port(0).get_connection().set_source(node_0.out_port(0))
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2'),
+        ])
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_set_source_3(self):
+        #            ,->2-->2_data          0-->0_data-->1-->1_data
+        # 0-->0_data/-->1-->1_data    =>    3-->3_data-->2-->2_data
+        # 3-->3_data
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('3', '3_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2'),
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_2 = Node(graph, '2')
+        node_3 = Node(graph, '3')
+
+        node_2.in_port(0).get_connection().set_source(node_3.out_port(0))
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('3', '3_data'),
+
+            ('0_data', '1'),
+            ('3_data', '2'),
+        ])
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '2', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_set_source_4(self):
+        # 0   1   ==>    0-->1
+        graph = build_graph(self.nodes, [])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+
+        node_0.add_output_port(0)
+        node_1.add_input_port(0)
+
+        node_1.in_port(0).get_connection().set_source(node_0.out_port(0))
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('0_data', '1'),
+        ])
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_set_destination(self):
+        #            ,->2-->2_data-->3-->3_data               ,->2-->2_data
+        # 0-->0_data/-->1-->1_data   ==>           0-->0_data/-->3-->3_data
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('3', '3_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2'),
+            ('2_data', '3'),
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('2', '2_data'),
+            ('3', '3_data'),
+
+            ('0_data', '3'),
+            ('0_data', '2'),
+        ])
+
+        node_1 = Node(graph, '1')
+        node_3 = Node(graph, '3')
+
+        node_3.in_port(0).disconnect()
+        node_1.in_port(0).get_connection().set_destination(node_3.in_port(0))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_add_destination_1(self):
+        # 3-->3_data                                     ,-->3-->3_data
+        #            ,->2-->2_data                      ,-->2-->2_data
+        # 0-->0_data/-->1-->1_data   ==>     0-->0_data/-->1-->1_data
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('3', '3_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2'),
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('3', '3_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2'),
+            ('0_data', '3'),
+        ])
+
+        node_0 = Node(graph, '0')
+        node_3 = Node(graph, '3')
+        node_3.add_input_port(idx=0)
+
+        node_0.out_port(0).get_connection().add_destination(node_3.in_port(0))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_add_destination_2(self):
+        # 0
+        # 1-->1_data   ==>     0-->0_data-->1-->1_data
+        graph = build_graph(self.nodes, [
+            ('1', '1_data'),
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('0_data', '1'),
+        ])
+
+        node_0 = Node(graph, '0')
+        node_0.add_output_port(idx=0)
+
+        node_1 = Node(graph, '1')
+        node_1.add_input_port(idx=0)
+
+        node_0.out_port(0).get_connection().add_destination(node_1.in_port(0))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_get_source_destinations_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2')
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        source = node_0.out_port(0).get_connection().get_source()
+        destinations = node_0.out_port(0).get_connection().get_destinations()
+
+        self.assertEqual(source, node_0.out_port(0))
+        for port in destinations:
+            self.assertTrue(port in [node_1.in_port(0), node_2.in_port(0)])
+
+        self.assertEqual(node_1.out_port(0).get_connection().get_destination(), None)
+        self.assertEqual(node_1.out_port(0).get_destination(), None)
+
+        self.assertEqual(node_2.out_port(0).get_connection().get_destination(), None)
+        self.assertEqual(node_2.out_port(0).get_destination(), None)
+
+    def test_connection_remove_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2')
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_1.in_port(0).get_connection().remove()
+
+        self.assertEqual(node_0.out_port(0).get_destinations(), [node_2.in_port(0)])
+        self.assertEqual(node_1.in_port(0).get_source(), None)
+        self.assertEqual(node_2.in_port(0).get_source(), node_0.out_port(0))
+
+    def test_connection_remove_2(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2')
+        ])
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_0.out_port(0).get_connection().remove()
+
+        self.assertEqual(node_0.out_port(0).get_destinations(), [])
+        self.assertEqual(node_1.out_port(0).get_destinations(), [])
+        self.assertEqual(node_2.out_port(0).get_destinations(), [])
+
+    def test_connection_data_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2')
+        ], {'0_data': {'value': np.ones((1,3,64,64)), 'shape': np.array([1, 3, 64, 64])}})
+
+        graph.__setattr__('stage', 'middle')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        self.assertTrue(np.array_equal(node_0.out_port(0).get_connection().data.get_shape(), (1, 3, 64, 64)))
+        self.assertTrue(np.array_equal(node_0.out_port(0).get_connection().data.get_value(), np.ones((1, 3, 64, 64))))
+
+        self.assertEqual(node_1.out_port(0).get_connection().data.get_shape(), None)
+        self.assertEqual(node_1.out_port(0).get_connection().data.get_value(), None)
+
+        self.assertEqual(node_2.out_port(0).get_connection().data.get_shape(), None)
+        self.assertEqual(node_2.out_port(0).get_connection().data.get_value(), None)
+
+      ###########################################
+     ################## OTHER ##################
+    ###########################################
+
+    def test_graph_cleanup_that_restores_const_operations(self):
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('3', '3_data'),
+
+            ('0_data', '1'),
+            ('2_data', '1'),
+            ('3_data', '2'),
+        ], {
+            '3': {'shape': np.array([1, 227, 227, 3]), 'value': np.ones((1, 227, 227, 3))},
+            '3_data': {'shape': np.array([1, 227, 227, 3]), 'value': np.ones((1, 227, 227, 3))},
+            '2': {'shape': np.array([1, 227, 227, 3]), 'value': np.ones((1, 227, 227, 3))},
+            '2_data': {'shape': np.array([1, 227, 227, 3]), 'value': np.ones((1, 227, 227, 3))},
+        }, nodes_with_edges_only=True)
+        add_opoutput(graph, '1_data', 0, False)
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('const_1', '2_data'),
+
+            ('0_data', '1'),
+            ('2_data', '1'),
+        ], {
+            'const_1': {'shape': np.array([1, 227, 227, 3]), 'value': np.ones((1, 227, 227, 3))},
+            '2_data': {'shape': np.array([1, 227, 227, 3]), 'value': np.ones((1, 227, 227, 3))},
+        }, nodes_with_edges_only=True)
+        add_opoutput(graph_ref, '1_data', 0, False)
+
+        from mo.middle.passes.eliminate import graph_clean_up
+        graph_clean_up(graph)
+        graph_clean_up(graph_ref)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '1_data', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_node_in_out_ports_order(self):
+        #
+        #               ,->2-->2_data---,->3-->3_data
+        #   0-->0_data-/-->1-->1_data--/
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '0_data'),
+            ('1', '1_data'),
+            ('2', '2_data'),
+            ('3', '3_data'),
+
+            ('0_data', '1'),
+            ('0_data', '2'),
+            ('1_data', '3'),
+            ('2_data', '3'),
+        ])
+
+        for id in ['0', '1', '2', '3']:
+            node = Node(graph, id)
+            for idx in range(len(node.in_ports())):
+                self.assertEqual(node.in_port(idx), node.in_ports()[idx])
+            for idx in range(len(node.out_ports())):
+                self.assertEqual(node.out_port(idx), node.out_ports()[idx])
+
+
+class TestNewGraphAPIFront(unittest.TestCase):
+    nodes = {
+        '0': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '1': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '2': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '3': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        '4': {'type': 'Placeholder', 'value': None, 'kind': 'op', 'op': 'Placeholder'},
+        'const_1': {'type': 'Const', 'value': None, 'kind': 'op', 'op': 'Const'},
+    }
+
+    ###########################################
+    ###### TESTS FOR PORT CLASS METHODS #######
+    ###########################################
+
+    def test_port_get_destinations_1(self):
+        #     ,->2
+        #    /-->1
+        #   0
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+            ('0', '2'),
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0_out_port = Node(graph, '0').out_port(0)
+
+        node_1_in_port = Node(graph, '1').in_port(0)
+        node_2_in_port = Node(graph, '2').in_port(0)
+
+        ports = node_0_out_port.get_destinations()
+
+        self.assertTrue(len(ports) == 2)
+        for port in ports:
+            self.assertTrue(port in [node_1_in_port, node_2_in_port])
+
+    def test_port_get_destination_1(self):
+        #     ,->2
+        #    /-->1
+        #   0
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+            ('0', '2'),
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0_out_port = Node(graph, '0').out_port(0)
+
+        node_1_in_port = Node(graph, '1').in_port(0)
+        node_2_in_port = Node(graph, '2').in_port(0)
+
+        with self.assertRaises(Error):
+            node_0_out_port.get_destination()
+
+    def test_port_get_destination_2(self):
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0_out_port = Node(graph, '0').out_port(0)
+
+        node_1_in_port = Node(graph, '1').in_port(0)
+
+        self.assertEqual(node_0_out_port.get_destination(), node_1_in_port)
+
+    def test_port_get_source_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0_out_port = Node(graph, '0').out_port(0)
+
+        node_1_in_port = Node(graph, '1').in_port(0)
+
+        self.assertEqual(node_1_in_port.get_source(), node_0_out_port)
+
+    def test_port_get_source_2(self):
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+            ('2', '1')
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        self.assertEqual(node_1.in_port(0).get_source(), node_0.out_port(0))
+        self.assertEqual(node_1.in_port(1).get_source(), node_2.out_port(0))
+
+    def test_port_get_source_3(self):
+        graph = build_graph(self.nodes, [])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_0.add_input_port(0)
+        node_1.add_input_port(0)
+        node_2.add_input_port(0)
+
+        self.assertEqual(node_0.in_port(0).get_source(), None)
+        self.assertEqual(node_1.in_port(0).get_source(), None)
+        self.assertEqual(node_2.in_port(0).get_source(), None)
+
+    def test_port_disconnect_1(self):
+        #              ,-->1-->1_data                   0-->0_data
+        #   0-->0_data/--->2-->2_data  ==> 0-->0_data   1-->1_data
+        #                                               2-->2_data
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+            ('0', '2')
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_0.out_port(0).disconnect()
+
+        self.assertEqual(node_1.in_port(0).get_source(), None)
+        self.assertEqual(node_2.in_port(0).get_source(), None)
+
+        self.assertTrue(len(node_1.in_nodes()) == 0)
+        self.assertTrue(len(node_2.in_nodes()) == 0)
+
+    def test_port_disconnect_2(self):
+        #        ,-->1           ,-->1
+        #   0-->/--->2  ==> 0-->/    2
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+            ('0', '2')
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_2.in_port(0).disconnect()
+
+        self.assertEqual(node_0.out_port(0).get_destination(), node_1.in_port(0))
+        self.assertEqual(node_1.in_port(0).get_source(), node_0.out_port(0))
+        self.assertEqual(node_2.in_port(0).get_source(), None)
+
+        self.assertTrue(len(node_0.out_nodes()) == 1)
+        self.assertTrue(len(node_1.in_nodes()) == 1)
+        self.assertTrue(len(node_2.in_nodes()) == 0)
+
+    def test_port_disconnect_3(self):
+        #   1---\          1
+        #   0---->2    ==> 0-->2
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '2'),
+            ('1', '2')
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_2.in_port(1).disconnect()
+
+        self.assertEqual(node_0.out_port(0).get_destination(), node_2.in_port(0))
+        self.assertEqual(node_2.in_port(0).get_source(), node_0.out_port(0))
+        self.assertEqual(node_1.out_port(0).get_destination(), None)
+
+        self.assertTrue(len(node_0.out_nodes()) == 1)
+        self.assertTrue(len(node_1.in_nodes()) == 0)
+        self.assertTrue(len(node_2.in_nodes()) == 1)
+
+    def test_port_disconnect_4(self):
+        #   1-----\        0
+        #   0------>2  ==> 1--->2
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '2'),
+            ('1', '2')
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_2.in_port(0).disconnect()
+
+        self.assertEqual(node_1.out_port(0).get_destination(), node_2.in_port(1))
+        self.assertEqual(node_2.in_port(1).get_source(), node_1.out_port(0))
+        self.assertEqual(node_2.in_port(0).get_source(), None)
+        self.assertEqual(node_0.out_port(0).get_destination(), None)
+
+    def test_port_disconnected_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+            ('1', '2')
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+        node_2.add_output_port(0)
+        node_0.add_input_port(0)
+
+        self.assertTrue(not node_0.out_port(0).disconnected())
+        self.assertTrue(not node_1.out_port(0).disconnected())
+        self.assertTrue(not node_1.in_port(0).disconnected())
+        self.assertTrue(node_2.out_port(0).disconnected())
+        self.assertTrue(node_0.in_port(0).disconnected())
+
+    def test_port_get_connection_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+            ('1', '2'),
+            ('1', '3'),
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '3')
+        node_3 = Node(graph, '2')
+
+        c = node_1.out_port(0).get_connection()
+
+        self.assertTrue(c.get_source() == node_1.out_port(0))
+        for port in c.get_destinations():
+            self.assertTrue(port in [node_2.in_port(0), node_3.in_port(0)])
+
+    ###########################################
+    ### TESTS FOR CONNECTION CLASS METHODS ####
+    ###########################################
+
+    def test_connection_set_source_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+            ('0', '2'),
+            ('3', '4'),
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+        node_3 = Node(graph, '3')
+        node_4 = Node(graph, '4')
+
+        c = node_0.out_port(0).get_connection()
+        c.set_source(node_3.out_port(0))
+
+        self.assertEqual(node_0.out_port(0).get_destinations(), [])
+        destinations = node_3.out_port(0).get_destinations()
+        for port in destinations:
+            self.assertTrue(port in [node_1.in_port(0), node_2.in_port(0), node_4.in_port(0)])
+
+    def test_connection_set_source_2(self):
+        # 2                ,->2
+        # 0-->1   ==>    0/-->1
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_2 = Node(graph, '2')
+        node_2.add_input_port(0)
+
+        node_2.in_port(0).get_connection().set_source(node_0.out_port(0))
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '1', {'out': 0, 'in': 0}),
+            ('0', '2', {'out': 0, 'in': 0}),
+        ])
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_set_source_3(self):
+        # 0   1   ==>    0-->1
+        graph = build_graph(self.nodes, [])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+
+        node_0.add_output_port(0)
+        node_1.add_input_port(0)
+
+        node_1.in_port(0).get_connection().set_source(node_0.out_port(0))
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '1', {'out': 0, 'in': 0}),
+        ])
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_set_destination(self):
+        #            ,->2-->2_data-->3-->3_data               ,->2-->2_data
+        # 0-->0_data/-->1-->1_data   ==>           0-->0_data/-->3-->3_data
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+            ('0', '2'),
+            ('2', '3'),
+        ])
+        graph.__setattr__('stage', 'front')
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '3'),
+            ('0', '2'),
+        ])
+
+        node_1 = Node(graph, '1')
+        node_3 = Node(graph, '3')
+
+        node_3.in_port(0).disconnect()
+        node_1.in_port(0).get_connection().set_destination(node_3.in_port(0))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_add_destination_1(self):
+        # 3                   ,-->3
+        #     ,->2           ,-->2
+        # 0--/-->1  ==>  0--/-->1
+        #
+        graph = build_graph(self.nodes, [
+            ('0', '1', {'in': 0, 'out': 0}),
+            ('0', '2', {'in': 0, 'out': 0}),
+        ])
+        graph.__setattr__('stage', 'front')
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '1', {'in': 0, 'out': 0}),
+            ('0', '2', {'in': 0, 'out': 0}),
+            ('0', '3', {'in': 0, 'out': 0}),
+        ])
+
+        node_0 = Node(graph, '0')
+        node_3 = Node(graph, '3')
+        node_3.add_input_port(idx=0)
+
+        node_0.out_port(0).get_connection().add_destination(node_3.in_port(0))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_add_destination_2(self):
+        # 0
+        # 1   ==>     0-->1
+        graph = build_graph(self.nodes, [])
+        graph.__setattr__('stage', 'front')
+
+        graph_ref = build_graph(self.nodes, [
+            ('0', '1'),
+        ])
+
+        node_0 = Node(graph, '0')
+        node_0.add_output_port(idx=0)
+
+        node_1 = Node(graph, '1')
+        node_1.add_input_port(idx=0)
+
+        node_0.out_port(0).get_connection().add_destination(node_1.in_port(0))
+
+        (flag, resp) = compare_graphs(graph, graph_ref, '0', check_op_attrs=True)
+        self.assertTrue(flag, resp)
+
+    def test_connection_get_source_destinations_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '1'),
+            ('0', '2')
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+        node_1.add_output_port(idx=0)
+        node_2.add_output_port(idx=0)
+
+        source = node_0.out_port(0).get_connection().get_source()
+        destinations = node_0.out_port(0).get_connection().get_destinations()
+
+        self.assertEqual(source, node_0.out_port(0))
+        for port in destinations:
+            self.assertTrue(port in [node_1.in_port(0), node_2.in_port(0)])
+
+        self.assertEqual(node_1.out_port(0).get_connection().get_destination(), None)
+        self.assertEqual(node_1.out_port(0).get_destination(), None)
+
+        self.assertEqual(node_2.out_port(0).get_connection().get_destination(), None)
+        self.assertEqual(node_2.out_port(0).get_destination(), None)
+
+    def test_connection_remove_1(self):
+        graph = build_graph(self.nodes, [
+            ('0', '1', {'in': 0, 'out': 0}),
+            ('0', '2', {'in': 0, 'out': 0})
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_1.in_port(0).get_connection().remove()
+
+        self.assertEqual(node_0.out_port(0).get_destinations(), [node_2.in_port(0)])
+        self.assertEqual(node_1.in_port(0).get_source(), None)
+        self.assertEqual(node_2.in_port(0).get_source(), node_0.out_port(0))
+
+    def test_connection_remove_2(self):
+        graph = build_graph(self.nodes, [
+            ('0', '1', {'in': 0, 'out': 0}),
+            ('0', '2', {'in': 0, 'out': 0})
+        ])
+        graph.__setattr__('stage', 'front')
+
+        node_0 = Node(graph, '0')
+        node_1 = Node(graph, '1')
+        node_2 = Node(graph, '2')
+
+        node_0.out_port(0).get_connection().remove()
+
+        self.assertEqual(node_0.out_port(0).get_destinations(), [])
+        self.assertEqual(node_1.in_port(0).get_source(), None)
+        self.assertEqual(node_2.in_port(0).get_source(), None)
diff --git a/model-optimizer/mo/graph/port.py b/model-optimizer/mo/graph/port.py
new file mode 100644
index 000000000..4584cfcba
--- /dev/null
+++ b/model-optimizer/mo/graph/port.py
@@ -0,0 +1,275 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+from copy import deepcopy
+
+import numpy as np
+import networkx as nx
+
+from collections import namedtuple
+
+from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.connection import Connection
+from mo.utils.error import Error
+
+
+class Port:
+    def __init__(self, node, idx: int, type: str):
+        if type not in ['in', 'out']:
+            raise Error("Inappropriate port type: {}".format(type))
+
+        # We use self.__dict__ only to not to call __setattr__ method from __init__ function
+        self.__dict__['node'] = node
+        self.__dict__['idx'] = idx
+        self.__dict__['type'] = type
+        self.__dict__['data'] = namedtuple('Data', ['get_value', 'get_shape', 'get_attr', 'set_value', 'set_shape', 'set_attr', 'has_valid'])
+
+        self.data.get_shape = self._get_shape
+        self.data.set_shape = self._set_shape
+
+        self.data.get_value = self._get_value
+        self.data.set_value = self._set_value
+
+        self.data.get_attr = self._get_attr
+        self.data.set_attr = self._set_attr
+
+        self.data.has_valid = self._has_valid
+
+    def __eq__(self, other):
+        return (
+            self.__class__ == other.__class__ and
+            self.node.graph == other.node.graph and
+            self.node.id == other.node.id and
+            self.type == other.type and
+            self.idx == other.idx
+        )
+
+    def __deepcopy__(self, memo):
+        cls = self.__class__
+        result = cls.__new__(cls)
+        memo[id(self)] = result
+        for k, v in self.__dict__.items():
+            result.__dict__[k] = v if k in ['graph', 'node'] else deepcopy(v)
+        return result
+
+    def __setattr__(self, key, value):
+        edge = self.node.in_edge(self.idx) if self.type == 'in' else self.node.out_edge(self.idx)
+        edge[key] = value
+
+    def __getattr__(self, item):
+        edge = self.node.in_edge(self.idx) if self.type == 'in' else self.node.out_edge(self.idx)
+
+    def _create_data_if_necessary(self):
+        if self.node.graph.stage == 'front':
+            raise Error("_create_data_if_necessary method is not applicable for front Graph phase!")
+        if self.type == 'in':
+            raise Error("_create_data_if_necessary method is not applicable for 'in' Port type!")
+
+        if self.idx not in self.node.out_nodes():
+            from mo.ops.op import Op
+            Op.create_data_node(self.node.graph, self.node, out_port=self.idx)
+            self.node['need_shape_inference'] = True
+        return self.node.out_node(self.idx)
+
+    def _get_shape(self):
+        if self.node.graph.stage == 'front':
+            return None
+        else:
+            if self.type == 'in':
+                return self.node.in_node(self.idx).shape
+            else:
+                return self.node.out_node(self.idx).shape
+
+    def _set_shape(self, shape):
+        if self.node.graph.stage == 'front':
+            raise NotImplementedError("set_shape not implemented for front phase")
+        else:
+            if self.type == 'in':
+                assert self.node.in_node(self.idx).value is None
+                self.node.in_node(self.idx).shape = int64_array(shape)
+            else:
+                assert self.node.out_node(self.idx).value is None
+                self.node.out_node(self.idx).shape = int64_array(shape)
+
+    def _get_value(self):
+        if self.node.graph.stage == 'front':
+            return None
+        else:
+            if self.type == 'in':
+                if self.idx in self.node.in_nodes() and self.node.in_node(self.idx).has_valid('value'):
+                    return self.node.in_node(self.idx).value
+            else:
+                if self.idx in self.node.out_nodes() and self.node.out_node(self.idx).has_valid('value'):
+                    return self.node.out_node(self.idx).value
+        return None
+
+    def _set_value(self, value):
+        if self.node.graph.stage == 'front':
+            raise Error("set_value is not applicable for graph front phase")
+        else:
+            if self.type == 'in':
+                self.node.in_node(self.idx).value = value
+                self.node.in_node(self.idx).shape = int64_array(value.shape)
+            else:
+                self.node.out_node(self.idx).value = value
+                self.node.out_node(self.idx).shape = int64_array(value.shape)
+
+    def _get_attr(self, item: str):
+        if self.node.graph.stage == 'front':
+            return None
+        else:
+            if self.type == 'in':
+                if self.idx in self.node.in_nodes() and self.node.in_node(self.idx).has_valid(item):
+                    return self.node.in_node(self.idx)[item]
+            else:
+                if self.idx in self.node.out_nodes() and self.node.out_node(self.idx).has_valid(item):
+                    return self.node.out_node(self.idx)[item]
+        return None
+
+    def _set_attr(self, item, value):
+        raise NotImplementedError()
+
+    def get_in_edge_attrs(self, data=False):
+        assert self.type == 'in'
+        for u, v, d in list(self.node.graph.in_edges(self.node.id, data=True)):
+            if d['in'] == self.idx:
+                edge_attrs = self.node.graph.get_edge_data(u, v)
+                for key in edge_attrs:
+                    if edge_attrs[key]['in'] == self.idx:
+                        if data:
+                            return edge_attrs[key], u, v, key
+                        else:
+                            return edge_attrs[key]
+        if data:
+            return None, None, None, None
+        else:
+            return None
+
+    def _has_valid(self, item):
+        if self.node.graph.stage == 'front':
+            raise NotImplementedError
+        else:
+            if self.type == 'in':
+                if self.idx in self.node.in_nodes() and self.node.in_node(self.idx).has_valid(item):
+                    return True
+            else:
+                if self.idx in self.node.out_nodes() and self.node.out_node(self.idx).has_valid(item):
+                    return True
+        return False
+
+    def disconnected(self):
+        # This method returns False if port connected with some other port
+        # otherwise it returns True
+
+        if self.type == 'in':
+            return self.get_source() is None
+        else:
+            return len(self.get_destinations()) == 0
+
+    def get_source(self):
+        # This method returns Port object that is producer (source) port for out port.
+        # In case if out port has no source port return None
+
+        assert self.type != 'out', "Can't get source for output port at {} node".format(self.node.name)
+
+        from mo.graph.graph import Node
+        producer_ports = []
+
+        has_producer = False
+        if self.node.graph.stage == 'front':
+            for n, d in self.node.get_inputs():
+                if d['in'] == self.idx:
+                    node = Node(self.node.graph, n)
+                    producer_ports.append(node.out_port(d['out']))
+                    has_producer = True
+            if not has_producer:
+                return None
+        else:
+            if self.idx not in self.node.in_nodes():
+                return None
+
+            in_data = self.node.in_node(self.idx)
+            for n, d in in_data.get_inputs():
+                node = Node(self.node.graph, n)
+                producer_ports.append(node.out_port(d['out']))
+
+        if len(producer_ports) != 1:
+            raise Error("Something happened with graph! data node has {} producers".format(len(producer_ports)))
+
+        return producer_ports[0]
+
+    def get_destination(self):
+        # This method returns Port that is consumer (destination) port for in port.
+        # In case if in port has no consumer return None
+
+        consumer_ports = self.get_destinations()
+        if not consumer_ports:
+            return None
+
+        if len(consumer_ports) > 1:
+            raise Error("The number of destinations for {} node at {} port is {}".format(self.node.name,
+                                                                                         self.idx,
+                                                                                         len(consumer_ports)))
+        return consumer_ports[0]
+
+    def get_destinations(self):
+        assert self.type != 'in', "Can't get destinations for input port at {} node".format(self.node.name)
+
+        from mo.graph.graph import Node
+        consumer_ports = []
+        if self.node.graph.stage == 'front':
+            producer_node = self.node
+        else:
+            # In case if node has no output data node in given port, we return None
+            if self.idx not in self.node.out_nodes():
+                return []
+            producer_node = self.node.out_node(self.idx)
+
+        for n, d in producer_node.get_outputs():
+            node = Node(self.node.graph, n)
+            consumer_ports.append(node.in_port(d['in']))
+        return consumer_ports
+
+    def disconnect(self):
+        if self.type == 'out':
+            consumer_ports = self.get_destinations()
+            if self.node.graph.stage == 'front':
+                for port in consumer_ports:
+                    self.node.graph.remove_edge(self.node.id, port.node.id)
+            else:
+                for port in consumer_ports:
+                    self.node.graph.remove_edge(port.node.in_node(port.idx).id, port.node.id)
+        else:
+            source_port = self.get_source()
+            if source_port is None:
+                return
+            for u, v, d in list(self.node.graph.in_edges(self.node.id, data=True)):
+                if d['in'] == self.idx:
+                    for key in self.node.graph.get_edge_data(u, v):
+                        if self.node.graph.get_edge_data(u, v)[key]['in'] == self.idx:
+                            self.node.graph.remove_edge(u, v, key=key)
+                            return
+
+    def get_connection(self):
+        if self.type == 'in':
+            return Connection(self.node.graph, self.get_source(), [self])
+        else:
+            return Connection(self.node.graph, self, self.get_destinations())
+
+    def connect(self, port):
+        if self.type == 'in':
+            self.get_connection().set_source(port)
+        else:
+            self.get_connection().add_destination(port)
diff --git a/model-optimizer/mo/main.py b/model-optimizer/mo/main.py
index f843c5d47..ac9636419 100644
--- a/model-optimizer/mo/main.py
+++ b/model-optimizer/mo/main.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -210,16 +210,14 @@ def driver(argv: argparse.Namespace):
                 raise Error('Incorrect saved model tag was provided. Specify --saved_model_tags with no spaces in it')
             argv.saved_model_tags = argv.saved_model_tags.split(',')
 
-    outputs = None
+    argv.output = argv.output.split(',') if argv.output else None
 
-    if argv.output:
-        outputs = argv.output.split(',')
-
-    placeholder_shapes = get_placeholder_shapes(argv.input, argv.input_shape, argv.batch)
+    argv.placeholder_shapes = get_placeholder_shapes(argv.input, argv.input_shape, argv.batch)
 
     mean_values = parse_tuple_pairs(argv.mean_values)
     scale_values = parse_tuple_pairs(argv.scale_values)
     mean_scale = get_mean_scale_dictionary(mean_values, scale_values, argv.input)
+    argv.mean_scale_values = mean_scale
 
     if not os.path.exists(argv.output_dir):
         try:
@@ -233,7 +231,7 @@ def driver(argv: argparse.Namespace):
             raise Error("Output directory {} is not writable for current user. " +
                         refer_to_faq_msg(22), argv.output_dir)
 
-    log.debug("Placeholder shapes : {}".format(placeholder_shapes))
+    log.debug("Placeholder shapes : {}".format(argv.placeholder_shapes))
 
     ret_res = 1
     if hasattr(argv, 'extensions') and argv.extensions and argv.extensions != '':
@@ -259,47 +257,36 @@ def driver(argv: argparse.Namespace):
 
     if is_tf:
         import mo.pipeline.tf as mo_tf
-        from mo.front.tf.register_custom_ops import update_registration
-        import_extensions.load_dirs(argv.framework, extensions, update_registration)
-        ret_res = mo_tf.tf2nx(argv, argv.input_model, model_name, outputs, argv.output_dir, argv.scale,
-                              is_binary=not argv.input_model_is_text,
-                              user_shapes=placeholder_shapes,
-                              mean_scale_values=mean_scale)
+        from mo.front.tf.register_custom_ops import get_front_classes
+        import_extensions.load_dirs(argv.framework, extensions, get_front_classes)
+        ret_res = mo_tf.tf2nx(argv, argv.input_model, model_name, argv.output_dir,
+                              is_binary=not argv.input_model_is_text)
 
     elif is_caffe:
         import mo.pipeline.caffe as mo_caffe
-        from mo.front.caffe.register_custom_ops import update_registration
-        import_extensions.load_dirs(argv.framework, extensions, update_registration)
-        ret_res = mo_caffe.driver(argv, argv.input_proto, argv.input_model, model_name, outputs, argv.output_dir,
-                                  argv.scale,
-                                  user_shapes=placeholder_shapes,
-                                  mean_scale_values=mean_scale,
+        from mo.front.caffe.register_custom_ops import get_front_classes
+        import_extensions.load_dirs(argv.framework, extensions, get_front_classes)
+        ret_res = mo_caffe.driver(argv, argv.input_proto, argv.input_model, model_name, argv.output_dir,
                                   mean_file=argv.mean_file,
                                   mean_file_offsets=mean_file_offsets,
                                   custom_layers_mapping_path=custom_layers_mapping_path)
 
     elif is_mxnet:
         import mo.pipeline.mx as mo_mxnet
-        from mo.front.mxnet.register_custom_ops import update_registration
-        import_extensions.load_dirs(argv.framework, extensions, update_registration)
-        ret_res = mo_mxnet.driver(argv, argv.input_model, model_name, outputs, argv.output_dir, argv.scale,
-                                  placeholder_shapes=placeholder_shapes,
-                                  mean_scale_values=mean_scale)
+        from mo.front.mxnet.register_custom_ops import get_front_classes
+        import_extensions.load_dirs(argv.framework, extensions, get_front_classes)
+        ret_res = mo_mxnet.driver(argv, argv.input_model, model_name, argv.output_dir)
 
     elif is_kaldi:
         import mo.pipeline.kaldi as mo_kaldi
-        from mo.front.kaldi.register_custom_ops import update_registration
-        import_extensions.load_dirs(argv.framework, extensions, update_registration)
-        ret_res = mo_kaldi.driver(argv, argv.input_model, model_name, outputs, argv.output_dir, argv.scale,
-                                  placeholder_shapes=placeholder_shapes,
-                                  mean_scale_values=mean_scale)
+        from mo.front.kaldi.register_custom_ops import get_front_classes
+        import_extensions.load_dirs(argv.framework, extensions, get_front_classes)
+        ret_res = mo_kaldi.driver(argv, argv.input_model, model_name, argv.output_dir)
     elif is_onnx:
         import mo.pipeline.onnx as mo_onnx
-        from mo.front.onnx.register_custom_ops import update_registration
-        import_extensions.load_dirs(argv.framework, extensions, update_registration)
-        ret_res = mo_onnx.driver(argv, argv.input_model, model_name, outputs, argv.output_dir, argv.scale,
-                                 user_shapes=placeholder_shapes,
-                                 mean_scale_values=mean_scale)
+        from mo.front.onnx.register_custom_ops import get_front_classes
+        import_extensions.load_dirs(argv.framework, extensions, get_front_classes)
+        ret_res = mo_onnx.driver(argv, argv.input_model, model_name, argv.output_dir)
 
     if ret_res != 0:
         return ret_res
diff --git a/model-optimizer/mo/main_test.py b/model-optimizer/mo/main_test.py
index 79f9feb60..a1501a6a4 100644
--- a/model-optimizer/mo/main_test.py
+++ b/model-optimizer/mo/main_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/middle/passes/conv.py b/model-optimizer/mo/middle/passes/conv.py
index 9c6654fcf..2d4160b56 100644
--- a/model-optimizer/mo/middle/passes/conv.py
+++ b/model-optimizer/mo/middle/passes/conv.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,23 +14,23 @@
  limitations under the License.
 """
 
-import copy
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.layout import get_batch_dim, get_features_dim
 from mo.front.common.partial_infer.utils import assign_dims_to_weights
 from mo.front.extractor import add_attrs_props
 from mo.front.extractor import update_ie_fields
-from mo.graph.graph import Node, unique_id
+from mo.graph.graph import Node, Graph
+from mo.graph.port import Port
 from mo.middle.passes.fusing.helpers import get_value_id, get_tensor_id
 from mo.middle.pattern_match import apply_pattern
-from mo.ops.op import Op
+from mo.ops.const import Const
+from mo.ops.scale_shift import ScaleShiftOp
 
 
-def pad_op_transform(graph: nx.MultiDiGraph, match: dict):
+def pad_op_transform(graph: Graph, match: dict):
     op = match['op']
     pad_op = match['pad_op']
     input_data = pad_op.in_node(0)
@@ -45,7 +45,7 @@ def pad_op_transform(graph: nx.MultiDiGraph, match: dict):
         return
 
     input_tensor_dims = len(match['pad_output'].shape)
-    if np.any(pads[get_features_dim(op.graph.graph['layout'],input_tensor_dims)] != 0) or \
+    if np.any(pads[get_features_dim(op.graph.graph['layout'], input_tensor_dims)] != 0) or \
             np.any(pads[get_batch_dim(op.graph.graph['layout'], input_tensor_dims)] != 0):
         log.info('The pad node "{}" with padding over feature/batch dimension cannot be fused.'.format(
             pad_op.soft_get('name')))
@@ -60,7 +60,7 @@ def pad_op_transform(graph: nx.MultiDiGraph, match: dict):
     graph.add_edge(input_data.id, match['op'].id, **{'in': 0, **edge_attrs})
 
 
-def fuse_pad(graph: nx.MultiDiGraph):
+def fuse_pad(graph: Graph):
     for op_type in ['Convolution', 'Pooling', 'Deconvolution']:
         apply_pattern(
             graph,
@@ -74,7 +74,7 @@ def fuse_pad(graph: nx.MultiDiGraph):
         )
 
 
-def convert_matmul_to_fully_connected(graph: nx.MultiDiGraph):
+def convert_matmul_to_fully_connected(graph: Graph):
     apply_pattern(
         graph,
         nodes=[
@@ -85,7 +85,7 @@ def convert_matmul_to_fully_connected(graph: nx.MultiDiGraph):
     )
 
 
-def matmul_to_fully_connected_action(graph: nx.MultiDiGraph, match: dict):
+def matmul_to_fully_connected_action(graph: Graph, match: dict):
     log.debug('fully_connected_matched')
     matmul = match['matmul']
     input = matmul.in_node(0)
@@ -96,11 +96,11 @@ def matmul_to_fully_connected_action(graph: nx.MultiDiGraph, match: dict):
         len(weights_consumers) if weights_consumers is not None else None))
 
     if not (weights.value is not None and
-                    input.shape is not None and
-                    len(input.shape) >= 2 and
-                    weights.shape is not None and
-                    len(weights.shape) == 2 and
-                    len(weights_consumers) >= 1):
+            input.shape is not None and
+            len(input.shape) >= 2 and
+            weights.shape is not None and
+            len(weights.shape) == 2 and
+            len(weights_consumers) >= 1):
         matmul['can_be_fused'] = False
         return
 
@@ -116,7 +116,7 @@ def matmul_to_fully_connected_action(graph: nx.MultiDiGraph, match: dict):
     # Do not transpose weights in this pass, it will be done as a separate pass
 
 
-def transpose_fully_connected_weights(graph: nx.MultiDiGraph):
+def transpose_fully_connected_weights(graph: Graph):
     transposed_for_IE = 'transposed_for_IE'
     for node in graph.nodes():
         node = Node(graph, node)
@@ -133,58 +133,7 @@ def transpose_fully_connected_weights(graph: nx.MultiDiGraph):
             weights.shape = np.array(weights.value.shape)
 
 
-def gemm_to_fully_connected_action(graph: nx.MultiDiGraph, match: dict):
-    log.debug('gemm_to_fully_connected_action is triggered')
-    gemm = match['gemm']
-    A = gemm.in_node(0)
-    B = gemm.in_node(1)
-    B_consumers = graph.out_edges(B.node)
-    C = gemm.in_node(2)
-    C_consumers = graph.out_edges(C.node)
-
-    if not (B.value is not None and
-                    C.value is not None and
-                    A.shape is not None and
-                    C.shape.size == 1 and
-                not gemm.transpose_a and
-                (len(B_consumers) == 1 or not gemm.transpose_b)):
-        log.warning('Cannot convert Gemm to FullyConnected')
-        return
-
-    if gemm.transpose_b:
-        # B.value = B.value.transpose()
-        # B.shape = np.array(B.value.shape, dtype=np.int64)
-        gemm.transpose_b = 0
-    else:
-        B.value = B.value.transpose()
-        B.shape = np.array(B.value.shape, dtype=np.int64)
-
-    gemm['out-size'] = gemm.out_node().shape[-1]
-    gemm['type'] = 'FullyConnected'
-    gemm['channel_dims'] = len(match['output'].shape) - 1
-    gemm['bias_addable'] = True
-    gemm['input_channel_dim'] = 1  # MatMul weights in IO
-    gemm['output_channel_dim'] = 0
-    gemm['layout'] = 'NCHW'
-    gemm.in_edge(1)['bin'] = 'weights'
-    gemm.in_edge(2)['bin'] = 'biases'
-
-    assign_dims_to_weights(gemm.in_node(1), None, 1, 0, 2)
-    # Do not transpose weights in this pass, it will be done as a separate pass
-
-
-def convert_gemm_to_fully_connected(graph: nx.MultiDiGraph):
-    apply_pattern(
-        graph,
-        nodes=[
-            ('gemm', dict(kind='op', op='Gemm')),
-            ('output', dict(kind='data'))],
-        edges=[('gemm', 'output')],
-        action=gemm_to_fully_connected_action
-    )
-
-
-def muladd_to_scaleshift_action(graph: nx.MultiDiGraph, match: dict):
+def muladd_to_scaleshift_action(graph: Graph, match: dict):
     mul = match['mul']
     add = match['add']
     output = match['output']
@@ -212,15 +161,15 @@ def muladd_to_scaleshift_action(graph: nx.MultiDiGraph, match: dict):
 
     # Transform values
     weights.value = np.squeeze(weights.value)
-    weights.shape = weights.value.shape
+    weights.shape = np.array(weights.value.shape, dtype=np.int64)
 
     bias.value = np.squeeze(bias.value)
-    bias.shape = bias.value.shape
+    bias.shape = np.array(bias.value.shape, dtype=np.int64)
 
     # Broadcast weights if they are scalar
     if weights.value.ndim == 0 and bias.value.ndim == 1:
         weights.value = np.full(bias.shape, weights.value.item())
-        weights.shape = weights.value.shape
+        weights.shape = np.array(weights.value.shape, dtype=np.int64)
 
     if bias.shape != weights.shape:
         log.warning('Mul->Add to ScaleShift conversion stoped {} != {}'.format(weights.shape, bias.shape))
@@ -243,7 +192,7 @@ def muladd_to_scaleshift_action(graph: nx.MultiDiGraph, match: dict):
     graph.remove_edge(bias.node, add.id)
     graph.remove_edge(add.node, output.id)
 
-    op_node = unique_id(graph, mul.name + '/Fused{}_'.format(op_name))
+    op_node = graph.unique_id(mul.name + '/Fused{}_'.format(op_name))
     if op_name == 'ScaleShift':
         graph.add_node(op_node, **add_attrs_props(dict(kind='op', precision="FP32", type=op_name, name=op_node,
                                                        op=op_name, data_type=input.data_type)))
@@ -254,6 +203,10 @@ def muladd_to_scaleshift_action(graph: nx.MultiDiGraph, match: dict):
             (bias.node, op_node, {'in': 2, 'bin': 'biases'}),
             (op_node, output.node, {'out': 0})
         ])
+        scsh = Node(graph, op_node)
+        scsh.add_input_port(0)
+        scsh.add_input_port(1)
+        scsh.add_output_port(0)
     else:
         graph.add_node(op_node, **add_attrs_props(dict(kind='op', precision="FP32", type=op_name, name=op_node,
                                                        op=op_name, data_type=input.data_type, power=1,
@@ -263,11 +216,13 @@ def muladd_to_scaleshift_action(graph: nx.MultiDiGraph, match: dict):
             (input.node, op_node, {'in': 0}),
             (op_node, output.node, {'out': 0})
         ])
-
+        scsh = Node(graph, op_node)
+        scsh.add_input_port(0)
+        scsh.add_output_port(0)
     return
 
 
-def convert_muladd_to_scaleshift_or_power(graph: nx.MultiDiGraph):
+def convert_muladd_to_scaleshift_or_power(graph: Graph):
     apply_pattern(
         graph,
         nodes=[
@@ -291,7 +246,7 @@ def convert_muladd_to_scaleshift_or_power(graph: nx.MultiDiGraph):
     )
 
 
-def batch_norm_fuse_action(graph: nx.MultiDiGraph, match: dict):
+def batch_norm_fuse_action(graph: Graph, match: dict):
     """
     Multiply convolution kernel by batch normalization coefficient and remove mul op.
     """
@@ -309,7 +264,7 @@ def batch_norm_fuse_action(graph: nx.MultiDiGraph, match: dict):
     graph.add_edge(match['conv'].node, match['mul_output'].node, out=0)
 
 
-def batch_norm_fuse(graph: nx.MultiDiGraph):
+def batch_norm_fuse(graph: Graph):
     apply_pattern(
         graph,
         nodes=[
@@ -330,296 +285,60 @@ def batch_norm_fuse(graph: nx.MultiDiGraph):
     return graph
 
 
-def convert_add_to_scaleshift(graph: nx.MultiDiGraph):
-    for n in list(graph.nodes()):
-        node = Node(graph, n)
-        if node.has('op') and (node.op == 'BiasAdd' or node.op == 'Add') and len(node.in_nodes()) == 2:
-            tensor_id, value_id = get_tensor_id(node), get_value_id(node)
-            if tensor_id is not None and value_id is not None and node.soft_get('can_be_scaleshift') is not False:
-                node['type'] = 'ScaleShift'
-                node['op'] = 'ScaleShift'
-                node.in_node(value_id).value = np.squeeze(node.in_node(value_id).value)
-                node.in_node(value_id).shape = node.in_node(value_id).value.shape
-
-                # if the node was created with eltwise then it has attribute 'operation' which should be removed from
-                # the IR
-                if node.has('operation'):
-                    del graph.node[n]['operation']
-
-                bias_data = node.in_node(value_id)
-                graph[bias_data.node][node.node][0]['in'] = 2
-                graph[bias_data.node][node.node][0]['bin'] = 'biases'
-
-                input_data = node.in_node(tensor_id)
-                graph[input_data.node][node.node][0]['in'] = 0
-
-                update_ie_fields(graph.node[node.id])
-
-                weights_id = unique_id(graph, 'weights_')
-                graph.add_node(weights_id, **add_attrs_props(
-                    dict(kind='data', precision="FP32", name=weights_id, value=None, shape=None, data_type=None,
-                         infer=None)))
-                wnode = Node(graph, weights_id)
-
-                wnode['value'] = np.full_like(bias_data.value, 1, dtype=np.float32)
-                wnode['shape'] = np.array(wnode['value'].shape)
-
-                graph.add_edges_from([
-                    (weights_id, node.node, {'in': 1, 'bin': 'weights'}),
-                ])
-
-
-def convert_mul_to_scaleshift(graph: nx.MultiDiGraph):
-    for n in list(graph.nodes()):
-        node = Node(graph, n)
-        if node.has('op') and node.op == 'Mul' and len(node.in_nodes()) == 2:
-            tensor_id, value_id = get_tensor_id(node), get_value_id(node)
-            if tensor_id is not None and value_id is not None and node.soft_get('can_be_scaleshift') is not False:
-                node['type'] = 'ScaleShift'
-                node['op'] = 'ScaleShift'
-                node.in_node(value_id).value = np.squeeze(node.in_node(value_id).value)
-                node.in_node(value_id).shape = node.in_node(value_id).value.shape
-
-                # if the node was created with eltwise then it has attribute 'operation' which should be removed from
-                # the IR
-                if node.has('operation'):
-                    del graph.node[n]['operation']
-
-                scale_data = node.in_node(value_id)
-                graph[scale_data.node][node.node][0]['in'] = 1
-                graph[scale_data.node][node.node][0]['bin'] = 'weights'
-
-                input_data = node.in_node(tensor_id)
-                graph[input_data.node][node.node][0]['in'] = 0
-
-                update_ie_fields(graph.node[node.id])
-
-                bias_id = unique_id(graph, 'bias_')
-                graph.add_node(bias_id, **add_attrs_props(
-                    dict(kind='data', precision="FP32", name=bias_id, value=None, shape=None, data_type=None,
-                         infer=None)))
-                wnode = Node(graph, bias_id)
-
-                wnode['value'] = np.full_like(scale_data.value, 0, dtype=np.float32)
-                wnode['shape'] = np.array(wnode['value'].shape)
-
-                graph.add_edges_from([
-                    (bias_id, node.node, {'in': 2, 'bin': 'biases'}),
-                ])
-
-
-def convert_nasnet_action(graph: nx.MultiDiGraph, matches: dict):
-    """
-    This function converts speciefic for NasNet topology subgraph Pad->StridedSlice->AvgPool to Conv->Crop->AvgPool
-    """
-    input = matches['input']
-
-    pad_op = matches['pad_op']
-
-    sslice = matches['sslice']
-    sslice_out = matches['sslice_out']
-    begin = []
-    end = []
-    stride = []
-    for s in sslice.slices:
-        begin.append(s.start)
-        end.append(s.stop)
-        stride.append(s.step)
-
-    if not np.array_equal(pad_op.pads, np.array([[0, 0], [0, 1], [0, 1], [0, 0]])):
-        log.error(" Pad values doesn't match!")
-        return
-
-    if not np.array_equal(begin, np.array([0, 1, 1, 0])):
-        log.error("StridedSlice has wrong begin")
-        return
-
-    if sslice.end_mask != 15 or sslice.begin_mask != 9:
-        log.error("StridedSlice has wrong masks")
-        return
-
-    # Cut Smth-x->Pad->StrudedSlice-x->AvgPool
-    graph.remove_edge(input.id, pad_op.id)
-    graph.remove_edge(sslice.id, sslice_out.id)
-
-    # Pad -> Conv
-    conv_node = unique_id(graph, pad_op.name + '/Conv_')
-    conv_weights_node = unique_id(graph, pad_op.name + '/ConvW_')
-    conv_weights = np.ones((1, 1, input.shape[3], 1))
-    conv_output = unique_id(graph, pad_op.name + '/ConvOut_')
-    output_shape = np.array([input.shape[0], input.shape[1] + 1, input.shape[2] + 1, input.shape[3]])
-
-    graph.add_node(conv_node,
-                   **add_attrs_props(dict(kind='op', precision="FP32", type='Convolution', name=conv_node, op='Conv2D',
-                                          stride=np.array([1, 1, 1, 1]), dilation=np.array([1, 1, 1, 1]),
-                                          group=input.shape[3], bias_addable=True, bias_term=False,
-                                          spatial_dims=np.array([1, 2]),
-                                          kernel_spatial=np.array([1, 1]),
-                                          pad=np.array([[0, 0], [0, 0], [0, 0], [0, 0]]), output_shape=output_shape,
-                                          channel_dims=np.array([3]))))
-
-    graph.add_node(conv_weights_node, **add_attrs_props(
-        dict(kind='data', precision="FP32", name=conv_weights_node, value=np.array(conv_weights),
-             shape=np.array(conv_weights.shape),
-             data_type=input.data_type, infer=None,
-             spatial_dims=np.array([0, 1]),
-             input_channel_dim=2,
-             output_channel_dim=3,
-             dims_number=4, can_be_bias=True)))
-    graph.add_node(conv_output, **add_attrs_props(
-        dict(kind='data', precision="FP32", name=conv_output, value=None, shape=output_shape,
-             data_type=input.data_type)))
-
-    # StridedSlice -> Crop
-    crop_cls = Op.get_op_class_by_name('Crop')
-    crop = crop_cls(graph, dict(name=sslice.name + '/Crop_', axis=np.array([1, 2]),
-                                dim=np.array([output_shape[1] - 1, output_shape[2] - 1]), offset=np.array([1, 1])))
-    crop.create_node_with_data([Node(graph, conv_output)], data_nodes=sslice_out)
-
-    # Connect : Conv->Crop->AvgPool
-    graph.add_edges_from([
-        (input.id, conv_node, {'in': 0}),
-        (conv_weights_node, conv_node, {'in': 1, 'bin': 'weights'}),
-        (conv_node, conv_output, {'out': 0}),
-    ])
-    update_ie_fields(graph.node[conv_node], graph.graph['ir_version'])
-
-
-def convert_nasnet(graph: nx.MultiDiGraph):
-    apply_pattern(
-        graph,
-        nodes=[
-            ('input', dict(kind='data')),
-            ('pad_op', dict(kind='op', op='Pad')),
-            ('pad_out', dict(kind='data')),
-
-            ('begin', dict(kind='data')),
-            ('end', dict(kind='data')),
-            ('stride', dict(kind='data')),
-
-            ('sslice', dict(kind='op', op='StridedSlice')),
-            ('sslice_out', dict(kind='data')),
-
-            ('avg_pool', dict(kind='op', op='AvgPool')),
-            ('output', dict(kind='data')),
-        ],
-        edges=[
-            ('input', 'pad_op', {'in': 0}),
-            ('pad_op', 'pad_out'),
-
-            ('begin', 'sslice', {'in': 1}),
-            ('end', 'sslice', {'in': 2}),
-            ('stride', 'sslice', {'in': 3}),
-
-            ('pad_out', 'sslice', {'in': 0}),
-            ('sslice', 'sslice_out'),
-
-            ('sslice_out', 'avg_pool', {'in': 0}),
-            ('avg_pool', 'output')
-        ],
-        action=convert_nasnet_action
-    )
-    return graph
-
-
-def dilated_convolution_action(graph: nx.MultiDiGraph, match: dict):
-    conv = match['conv']
-    stb = match['space_to_batch']
-    bts = match['batch_to_space']
-
-    block_size = match['stb_bs']
-
-    input = match['input']
-    output = match['output']
-    stb_out = match['stb_output']
-    conv_out = match['conv_output']
-
-    in_edge_attrs = graph.get_edge_data(input.id, stb.id)[0]
-    out_edge_attrs = graph.get_edge_data(bts.id, output.id)[0]
-
-    graph.remove_edge(input.id, stb.id)
-    graph.remove_edge(stb_out.id, conv.id)
-    graph.remove_edge(conv.id, conv_out.id)
-    graph.remove_edge(bts.id, output.id)
-
-    conv.dilation[conv.spatial_dims] = block_size.value
-
-    pad = match['stb_pad'].value - match['bts_crop'].value
-    conv.pad[conv.spatial_dims] = [[pad[x][0], pad[x][1]] for x in range(len(pad))]
-    conv['auto_pad'] = None
-
-    graph.add_edges_from([
-        (input.id, conv.id, {'in': 0, **in_edge_attrs}),
-        (conv.id, output.id, {'out': 0, **out_edge_attrs}),
-    ])
-
-
-def convert_dilated_convolution(graph: nx.MultiDiGraph):
-    for op in ['Conv2D', 'DepthwiseConv2dNative', 'Conv3D']:
-        apply_pattern(
-            graph,
-            nodes=[
-                ('conv', dict(kind='op', op=op)),
-                ('space_to_batch', dict(kind='op', op='SpaceToBatchND')),
-                ('batch_to_space', dict(kind='op', op='BatchToSpaceND')),
-                ('input', dict(kind='data')),
-                ('output', dict(kind='data')),
-                ('conv_output', dict(kind='data')),
-                ('stb_output', dict(kind='data')),
-                ('stb_bs', dict(kind='data')),
-                ('stb_pad', dict(kind='data')),
-                ('bts_bs', dict(kind='data')),
-                ('bts_crop', dict(kind='data'))
-            ],
-            edges=[
-                ('input', 'space_to_batch', {'in': 0}),
-                ('stb_bs', 'space_to_batch', {'in': 1}),
-                ('stb_pad', 'space_to_batch', {'in': 2}),
-                ('space_to_batch', 'stb_output', {'out': 0}),
-                ('stb_output', 'conv', {'in': 0}),
-                ('conv', 'conv_output', {'out': 0}),
-                ('conv_output', 'batch_to_space', {'in': 0}),
-                ('bts_bs', 'batch_to_space', {'in': 1}),
-                ('bts_crop', 'batch_to_space', {'in': 2}),
-                ('batch_to_space', 'output', {'out': 0}),
-            ],
-            action=dilated_convolution_action
-        )
-
-
-def convert_multi_input_conv(graph: nx.MultiDiGraph):
-    for node in list(graph.nodes()):
-        node = Node(graph, node)
-        if node.kind == 'op' and node.op == 'ConvND':
-            node.op = 'Conv2D'
-            if node.bias_term == True:
-                num_inputs = len(node.in_nodes()) - 2
-                w_node = node.in_node(len(node.in_nodes()) - 2)
-                b_node = node.in_node(len(node.in_nodes()) - 1)
-            else:
-                num_inputs = len(node.in_nodes()) - 1
-                w_node = node.in_node(len(node.in_nodes()) - 1)
-
-            for i in range(1, num_inputs):
-                in_i = node.in_node(i)
-                out_i = node.out_node(i)
-                conv_id = unique_id(graph, node.id + '__')
-                graph.add_node(conv_id, **copy.deepcopy(node.get_attrs()))
-                new_conv = Node(graph, conv_id)
-                new_conv.name = conv_id
-
-                graph.remove_edge(in_i.id, node.id)
-                graph.remove_edge(node.id, out_i.id)
-                graph.add_edges_from([
-                    (w_node.id, conv_id, {'in': 1, 'bin': 'weights'}),
-                ])
-
-                if node.bias_term == True:
-                    graph.add_edges_from([
-                        (b_node.id, conv_id, {'in': 2, 'bin': 'biases'}),
-                    ])
-
-                graph.add_edges_from([
-                    (in_i.id, conv_id, {'in': 0}),
-                ])
-                graph.add_edge(conv_id, out_i.id, **{'out': 0})
+def get_tensor_in_port(node) -> Port:
+    tensor_ports = []
+    for port in node.in_ports().values():
+        if port.data.get_value() is None:
+            tensor_ports.append(port)
+    return None if len(tensor_ports) != 1 else tensor_ports[0]
+
+
+def get_value_in_port(node) -> Port:
+    value_ports = []
+    for port in node.in_ports().values():
+        if port.data.get_value() is not None:
+            value_ports.append(port)
+    return None if len(value_ports) != 1 else value_ports[0]
+
+
+def convert_add_or_mul_to_scaleshift(graph: Graph):
+    op_nodes = graph.get_op_nodes()
+    for node in op_nodes:
+        if node.soft_get('op') in ['BiasAdd', 'Add', 'Mul'] and len(node.in_ports()) == 2:
+            tensor_port, value_port = get_tensor_in_port(node), get_value_in_port(node)
+
+            if tensor_port is not None and value_port is not None and node.soft_get('can_be_scaleshift') is not False:
+                # Remove 1 dims from value array (should be 1D)
+                value_port.data.set_value(np.squeeze(value_port.data.get_value()))  # Updated shapes accordingly
+
+                # Create ScaleShift operation
+                scsh_op = ScaleShiftOp(graph, dict(name='ScaleShift/{}'.format(node.name))).create_node()
+
+                if node.op == 'Mul':
+                    # Create fake biases for scale shift node
+                    const_op = Const(graph, dict(name='{}/biases'.format(scsh_op.name),
+                                                 value=np.zeros(value_port.data.get_shape(), dtype=np.float32),
+                                                 shape=np.array(value_port.data.get_shape()),
+                                                 )).create_node()
+
+                    # Reconnect input and weights to scale shift node
+                    tensor_port.get_connection().set_destination(scsh_op.in_port(0))
+                    value_port.get_connection().set_destination(scsh_op.in_port(1))
+                    const_op.out_port(0).connect(scsh_op.in_port(2))
+                else:
+                    # Create fake weights for scale shift node
+                    const_op = Const(graph, dict(name='{}/weights'.format(scsh_op.name),
+                                                 value=np.ones(value_port.data.get_shape(), dtype=np.float32),
+                                                 shape=np.array(value_port.data.get_shape()),
+                                                 )).create_node()
+
+                    # Reconnect input and biases to scale shift node
+                    tensor_port.get_connection().set_destination(scsh_op.in_port(0))
+                    const_op.out_port(0).connect(scsh_op.in_port(1))
+                    value_port.get_connection().set_destination(scsh_op.in_port(2))
+
+                node.out_port(0).get_connection().set_source(scsh_op.out_port(0))
+
+                # Set bin attribute to ScaleShift input ports
+                scsh_op.in_port(1).bin = 'weights'
+                scsh_op.in_port(2).bin = 'biases'
diff --git a/model-optimizer/mo/middle/passes/conv_test.py b/model-optimizer/mo/middle/passes/conv_test.py
index ad4e3aa0f..9b1fd7356 100644
--- a/model-optimizer/mo/middle/passes/conv_test.py
+++ b/model-optimizer/mo/middle/passes/conv_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,7 +18,8 @@ import unittest
 
 import numpy as np
 
-from mo.middle.passes.conv import convert_muladd_to_scaleshift_or_power
+from mo.graph.graph import Node
+from mo.middle.passes.conv import convert_muladd_to_scaleshift_or_power, convert_add_or_mul_to_scaleshift
 from mo.middle.passes.eliminate import graph_clean_up
 from mo.utils.unittest.graph import build_graph, compare_graphs
 
@@ -27,19 +28,24 @@ nodes_attributes = {
     'placeholder_1_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     # ScaleShift layer
     'scaleshift_1': {'type': 'ScaleShift', 'value': None, 'kind': 'op', 'op': 'ScaleShift'},
+    'const_scaleshift_1_w': {'value': None, 'shape': None, 'kind': 'op'},
     'scaleshift_1_w': {'value': None, 'shape': None, 'kind': 'data'},
+    'const_scaleshift_1_b': {'value': None, 'shape': None, 'kind': 'op'},
     'scaleshift_1_b': {'value': None, 'shape': None, 'kind': 'data'},
     'scaleshift_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Mul and Add operations
     'mul_1': {'value': None, 'kind': 'op', 'op': 'Mul'},
+    'const_mul_1_w': {'value': None, 'shape': None, 'kind': 'op'},
     'mul_1_w': {'value': None, 'shape': None, 'kind': 'data'},
     'mul_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     'add_1': {'value': None, 'kind': 'op', 'op': 'Add'},
+    'const_add_1_w': {'value': None, 'shape': None, 'kind': 'op'},
     'add_1_w': {'value': None, 'shape': None, 'kind': 'data'},
     'add_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Power layer
     'power_1': {'type': 'Power', 'kind': 'op', 'op': 'Power', 'scale': None, 'shift': None, 'power': None},
     'power_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+    'op_output': {'kind': 'op', 'op': 'OpOutput'},
 }
 
 
@@ -48,17 +54,24 @@ class MulAddToScaleShiftOrPower(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'add_1'),
+                             ('const_add_1_w', 'add_1_w'),
                              ('add_1_w', 'add_1'),
                              ('add_1', 'add_1_data'),
+                             ('add_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
-                             'add_1_data': {'shape': np.array([1, 227, 227, 3]), 'is_output': True},
+                             'add_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array(mul_w.shape) if mul_w is not None else None,
+                                               'value': np.array(mul_w) if mul_w is not None else None},
                              'mul_1_w': {'shape': np.array(mul_w.shape) if mul_w is not None else None,
                                          'value': np.array(mul_w) if mul_w is not None else None},
+                             'const_add_1_w': {'shape': np.array(add_w.shape) if add_w is not None else None,
+                                               'value': np.array(add_w) if add_w is not None else None},
                              'add_1_w': {'shape': np.array(add_w.shape) if add_w is not None else None,
                                          'value': np.array(add_w) if add_w is not None else None},
                              })
@@ -72,13 +85,18 @@ class MulAddToScaleShiftOrPower(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'scaleshift_1'),
+                                 ('const_scaleshift_1_w', 'scaleshift_1_w'),
                                  ('scaleshift_1_w', 'scaleshift_1'),
+                                 ('const_scaleshift_1_b', 'scaleshift_1_b'),
                                  ('scaleshift_1_b', 'scaleshift_1'),
                                  ('scaleshift_1', 'scaleshift_1_data'),
+                                 ('scaleshift_1_data', 'op_output'),
                                  ],
-                                {'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                                {'const_scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                                 'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                                 'const_scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
-                                 'scaleshift_1_data': {'is_output': True}
+                                 'scaleshift_1_data': {}
                                  })
 
         convert_muladd_to_scaleshift_or_power(graph)
@@ -93,9 +111,10 @@ class MulAddToScaleShiftOrPower(unittest.TestCase):
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'power_1'),
                                  ('power_1', 'power_1_data'),
+                                 ('power_1_data', 'op_output'),
                                  ],
                                 {'power_1': {'scale': 3, 'shift': 2, 'power': 1},
-                                 'power_1_data': {'is_output': True}
+                                 'power_1_data': {}
                                  })
 
         convert_muladd_to_scaleshift_or_power(graph)
@@ -144,13 +163,17 @@ class MulAddToScaleShiftOrPower(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'scaleshift_1'),
+                                 ('const_scaleshift_1_w', 'scaleshift_1_w'),
                                  ('scaleshift_1_w', 'scaleshift_1'),
+                                 ('const_scaleshift_1_b', 'scaleshift_1_b'),
                                  ('scaleshift_1_b', 'scaleshift_1'),
                                  ('scaleshift_1', 'add_1_data'),
+                                 ('add_1_data', 'op_output'),
                                  ],
-                                {'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([3, 3, 3])},
+                                {'const_scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([3, 3, 3])},
+                                 'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([3, 3, 3])},
+                                 'const_scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([3, 2, 1])},
                                  'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([3, 2, 1])},
-                                 'add_1_data': {'is_output': True}
                                  })
 
         convert_muladd_to_scaleshift_or_power(graph)
@@ -159,5 +182,118 @@ class MulAddToScaleShiftOrPower(unittest.TestCase):
         self.assertTrue(flag, resp)
 
 
+class AddToScaleShift(unittest.TestCase):
+    @staticmethod
+    def _create_graph_with_add(add_w: np.ndarray):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'add_1'),
+                             ('const_add_1_w', 'add_1_w'),
+                             ('add_1_w', 'add_1'),
+                             ('add_1', 'add_1_data'),
+                             ('add_1_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'add_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_add_1_w': {'shape': np.array(add_w.shape) if add_w is not None else None,
+                                               'value': np.array(add_w) if add_w is not None else None},
+                             'add_1_w': {'shape': np.array(add_w.shape) if add_w is not None else None,
+                                         'value': np.array(add_w) if add_w is not None else None},
+                             }, nodes_with_edges_only=True)
+        del graph['add_1']['add_1_data'][0]['in']
+        return graph
+
+    @staticmethod
+    def _create_graph_with_mul(mul_w: np.ndarray):
+        graph = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
+                             ('mul_1_w', 'mul_1'),
+                             ('mul_1', 'mul_1_data'),
+                             ('mul_1_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array(mul_w.shape) if mul_w is not None else None,
+                                               'value': np.array(mul_w) if mul_w is not None else None},
+                             'mul_1_w': {'shape': np.array(mul_w.shape) if mul_w is not None else None,
+                                         'value': np.array(mul_w) if mul_w is not None else None},
+                             }, nodes_with_edges_only=True)
+        del graph['mul_1']['mul_1_data'][0]['in']
+        return graph
+
+    def test_add_to_scaleshift_1(self):
+        graph = AddToScaleShift._create_graph_with_add(np.array([1, 2, 3], dtype=np.float32))
+        graph.stage = 'middle'
+
+        graph_ref = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'scaleshift_1'),
+                             ('const_scaleshift_1_w', 'scaleshift_1_w'),
+                             ('const_scaleshift_1_b', 'scaleshift_1_b'),
+                             ('scaleshift_1_w', 'scaleshift_1'),
+                             ('scaleshift_1_b', 'scaleshift_1'),
+                             ('scaleshift_1', 'scaleshift_1_data'),
+                             ('scaleshift_1_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'scaleshift_1_data': {'shape': np.array([1, 227, 227, 3])},
+
+                             'const_scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 1, 1])},
+                             'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 1, 1])},
+
+                             'const_scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                             'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                             }, nodes_with_edges_only=True)
+
+        convert_add_or_mul_to_scaleshift(graph)
+        graph_clean_up(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'op_output')
+        self.assertTrue(flag, resp)
+
+        scsh_node = Node(graph, 'op_output').in_port(0).get_source().node
+
+        self.assertTrue(graph.get_edge_data(scsh_node.in_node(1).id, scsh_node.id)[0]['bin'] == 'weights')
+        self.assertTrue(graph.get_edge_data(scsh_node.in_node(2).id, scsh_node.id)[0]['bin'] == 'biases')
+
+    def test_mul_to_scaleshift_1(self):
+        graph = AddToScaleShift._create_graph_with_mul(np.array([1, 2, 3], dtype=np.float32))
+        graph.stage = 'middle'
+
+        graph_ref = build_graph(nodes_attributes,
+                            [('placeholder_1', 'placeholder_1_data'),
+                             ('placeholder_1_data', 'scaleshift_1'),
+                             ('const_scaleshift_1_w', 'scaleshift_1_w'),
+                             ('const_scaleshift_1_b', 'scaleshift_1_b'),
+                             ('scaleshift_1_w', 'scaleshift_1'),
+                             ('scaleshift_1_b', 'scaleshift_1'),
+                             ('scaleshift_1', 'scaleshift_1_data'),
+                             ('scaleshift_1_data', 'op_output')
+                             ],
+                            {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'scaleshift_1_data': {'shape': np.array([1, 227, 227, 3])},
+
+                             'const_scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                             'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+
+                             'const_scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([0, 0, 0])},
+                             'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([0, 0, 0])},
+                             }, nodes_with_edges_only=True)
+
+        convert_add_or_mul_to_scaleshift(graph)
+        graph_clean_up(graph)
+
+        (flag, resp) = compare_graphs(graph, graph_ref, 'op_output')
+        self.assertTrue(flag, resp)
+
+        scsh_node = Node(graph, 'op_output').in_port(0).get_source().node
+
+        self.assertTrue(graph.get_edge_data(scsh_node.in_node(1).id, scsh_node.id)[0]['bin'] == 'weights')
+        self.assertTrue(graph.get_edge_data(scsh_node.in_node(2).id, scsh_node.id)[0]['bin'] == 'biases')
+
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/model-optimizer/mo/middle/passes/convert_data_type.py b/model-optimizer/mo/middle/passes/convert_data_type.py
index daa178213..5f0d50e01 100644
--- a/model-optimizer/mo/middle/passes/convert_data_type.py
+++ b/model-optimizer/mo/middle/passes/convert_data_type.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,11 +15,9 @@
 """
 
 import logging as log
-
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 from mo.utils.utils import refer_to_faq_msg
 
@@ -28,6 +26,8 @@ SUPPORTED_DATA_TYPES = {
     'half': (np.float16, 'FP16'),
     'FP32': (np.float32, 'FP32'),
     'FP16': (np.float16, 'FP16'),
+    'I32': (np.int32, 'I32'),
+    'uint8': (np.uint8, 'UI8'),
 }
 
 
@@ -39,7 +39,7 @@ def data_type_str_to_precision(data_type_str: str):
     return SUPPORTED_DATA_TYPES[data_type_str][1] if data_type_str in SUPPORTED_DATA_TYPES else None
 
 
-def convert_blob(graph: nx.MultiDiGraph, node: Node, data_type: type):
+def convert_blob(graph: Graph, node: Node, data_type: type):
     out_edges = graph.out_edges(node.node, data=True)
 
     # if the data.value is used as binary weights
@@ -70,7 +70,7 @@ def convert_blob(graph: nx.MultiDiGraph, node: Node, data_type: type):
             node.value = new_blob
 
 
-def convert(graph: nx.MultiDiGraph, data_type_str: str):
+def convert(graph: Graph, data_type_str: str):
     for node_name, node_attrs in graph.nodes(data=True):
         node = Node(graph, node_name)
         # if the data type is forcibly set then use it
diff --git a/model-optimizer/mo/middle/passes/debug.py b/model-optimizer/mo/middle/passes/debug.py
index 28c0023eb..e0f20be87 100644
--- a/model-optimizer/mo/middle/passes/debug.py
+++ b/model-optimizer/mo/middle/passes/debug.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -46,7 +46,7 @@ def debug_ir_emitter(graph, exclude_attrs: list = []):
     print("---  DEBUG IR END  ---")
 
 
-def get_output_node_names(graph: nx.MultiDiGraph):
+def get_output_node_names(graph: Graph):
     result = []
     for node in graph.nodes():
         node = Node(graph, node)
diff --git a/model-optimizer/mo/middle/passes/eliminate.py b/model-optimizer/mo/middle/passes/eliminate.py
index 2878add05..d131875f4 100644
--- a/model-optimizer/mo/middle/passes/eliminate.py
+++ b/model-optimizer/mo/middle/passes/eliminate.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,18 +17,21 @@ import logging as log
 from collections import deque
 
 import networkx as nx
+import numpy as np
 
-from mo.graph.graph import Node, create_edge
+from mo.graph.graph import Node, Graph
 from mo.middle.pattern_match import apply_pattern
+from mo.utils.error import Error
 from mo.utils.graph import bfs_search, pseudo_topological_sort
 
 
-def get_nodes_with_attributes(graph: nx.MultiDiGraph, **attrs: dict):
+# TODO: dep warning
+def get_nodes_with_attributes(graph: Graph, **attrs: dict):
     node_attrs = graph.nodes(data=True)
     return [n for n, d in node_attrs if all(a in d.items() for a in attrs.items())]
 
 
-def reverse_dfs(graph: nx.MultiDiGraph, node_name: str, update_func: callable, visited: set = None):
+def reverse_dfs(graph: Graph, node_name: str, update_func: callable, visited: set = None):
     d = deque()
 
     if visited is None:
@@ -44,23 +47,23 @@ def reverse_dfs(graph: nx.MultiDiGraph, node_name: str, update_func: callable, v
                 d.append(in_node_name)
 
 
-def mark_input_nodes(graph: nx.MultiDiGraph, node_name: str, key: str, value):
+def mark_input_nodes(graph: Graph, node_name: str, key: str, value):
     for input, _ in graph.in_edges(node_name):
         graph.node[input][key] = value
 
 
-def mark_output_nodes(graph: nx.MultiDiGraph, node_name: str, key: str, value):
+def mark_output_nodes(graph: Graph, node_name: str, key: str, value):
     for output, _ in graph.out_edges(node_name):
         graph.node[output][key] = value
 
 
-def mark_output_reachable_nodes(graph: nx.MultiDiGraph):
+def mark_output_reachable_nodes(graph: Graph):
     """
     Mark nodes whether they are outputs reachable or not. The node is considered output reachable if it is connected to
-    one of the nodes that has attribute is_output=True.
+    one of the nodes that has attribute op=OpOutput.
     """
     nx.set_node_attributes(G=graph, name='is_output_reachable', values=False)
-    outputs = get_nodes_with_attributes(graph, is_output=True)
+    outputs = graph.get_nodes_with_attributes(op='OpOutput')
     log.debug('The following nodes are seeded as output reachable:\n{}'.format('\n'.join(sorted(map(str, outputs)))))
     nx.set_node_attributes(G=graph, name='is_output_reachable', values={n: True for n in outputs})
     visited = set()
@@ -69,7 +72,7 @@ def mark_output_reachable_nodes(graph: nx.MultiDiGraph):
                     lambda graph, node_name: mark_input_nodes(graph, node_name, 'is_output_reachable', True), visited)
 
 
-def mark_undead_nodes(graph: nx.MultiDiGraph, undead_types: list):
+def mark_undead_nodes(graph: Graph, undead_types: list):
     """
     Mark output nodes and nodes of the specific type as undead, meaning that they should survive the dead nodes
     elimination phase. Then mark all children nodes of the undead nodes (except children of inputs) as undead.
@@ -80,29 +83,30 @@ def mark_undead_nodes(graph: nx.MultiDiGraph, undead_types: list):
     nx.set_node_attributes(G=graph, name='is_undead', values=False)
 
     # mark output nodes as undead
-    outputs = get_nodes_with_attributes(graph, is_output=True)
+    outputs = graph.get_nodes_with_attributes(op='OpOutput')
     nx.set_node_attributes(G=graph, name='is_undead', values={n: True for n in outputs})
 
     # mark specifically defined with node type set of nodes
     for type in undead_types:
-        node_of_specific_type = get_nodes_with_attributes(graph, type=type)
+        node_of_specific_type = graph.get_nodes_with_attributes(type=type)
         nx.set_node_attributes(G=graph, name='is_undead', values={n: True for n in node_of_specific_type})
 
-    undead_nodes = get_nodes_with_attributes(graph, is_undead=True)
+    undead_nodes = graph.get_nodes_with_attributes(is_undead=True)
     # propagate 'undead' attribute to children nodes of undead nodes if the node produces constant value
     for node_name in bfs_search(graph, undead_nodes):
         if graph.node[node_name]['is_undead']:
             for _, dst_node_name in graph.out_edges(node_name):
                 node_attrs = graph.node[dst_node_name]
-                if 'kind' in node_attrs and node_attrs['kind'] == 'data' and node_attrs['value'] is not None:
+                if 'kind' in node_attrs and (
+                        node_attrs['kind'] == 'data' and node_attrs['value'] is not None or node_attrs['kind'] == 'op'):
                     graph.node[dst_node_name]['is_undead'] = True
 
     # mark input nodes as undead
-    inputs = get_nodes_with_attributes(graph, is_input=True)
+    inputs = graph.get_nodes_with_attributes(is_input=True)
     nx.set_node_attributes(G=graph, name='is_undead', values={n: True for n in inputs})
 
 
-def mark_const_producer_nodes(graph: nx.MultiDiGraph):
+def mark_const_producer_nodes(graph: Graph):
     """
     Mark nodes that produce constant values.
     :param graph: graph to operate on.
@@ -122,7 +126,7 @@ def mark_const_producer_nodes(graph: nx.MultiDiGraph):
                 graph.node[input]['is_const_producer'] = False
 
 
-def eliminate_dead_nodes(graph: nx.MultiDiGraph):
+def eliminate_dead_nodes(graph: Graph):
     nodes_to_remove = set()
     for node_name, node_attrs in graph.nodes(data=True):
         if not node_attrs['is_output_reachable'] or (node_attrs['is_const_producer'] and not node_attrs['is_undead']):
@@ -131,25 +135,69 @@ def eliminate_dead_nodes(graph: nx.MultiDiGraph):
     graph.remove_nodes_from(nodes_to_remove)
 
 
-def graph_clean_up(graph: nx.MultiDiGraph, undead_node_types: list = []):
+def add_constant_operations(graph: Graph):
+    data_nodes = graph.get_data_nodes(has_value=True)
+    for node in data_nodes:
+        # If data node has no producers we create Const operation
+        if len(node.in_nodes()) == 0 and len(node.out_nodes()) != 0:
+            # It's necessary to import here due to cycle dependencies
+            from mo.ops.const import Const
+            Const(graph, dict(value=node.value, shape=np.array(node.value.shape))).create_node_with_data(data_nodes=node)
+
+
+def remove_const_ops(graph: Graph):
+    ops = [node for node in graph.get_op_nodes() if node.soft_get('type') == 'Const']
+    for node in ops:
+        graph.remove_edge(node.id, node.out_node().id)
+        graph.remove_node(node.id)
+
+
+def shape_inference(graph: Graph):
+    nodes = pseudo_topological_sort(graph)
+    for node in nodes:
+        node = Node(graph, node)
+        if node.has_and_set('need_shape_inference'):
+            old_out_shapes = [port.data.get_shape() for port in node.out_ports().values()]
+            node.infer(node)
+            new_out_shapes = [port.data.get_shape() for port in node.out_ports().values()]
+            for shape1, shape2 in zip(old_out_shapes, new_out_shapes):
+                if shape1 is not None and not np.array_equal(shape1, shape2):
+                    raise Error("After partial shape inference were found shape collision for node {} (old shape: {}, new shape: {})".format(node.name, shape1, shape2))
+            node.need_shape_inference = False
+
+
+def graph_clean_up(graph: Graph, undead_node_types: list = None):
+    if undead_node_types is None:
+        undead_node_types = []
+
+    if 'Shape' in undead_node_types and not graph.graph['cmd_params'].keep_shape_ops:
+        undead_node_types.remove('Shape')
+
     mark_output_reachable_nodes(graph)
     mark_undead_nodes(graph, undead_node_types)
     mark_const_producer_nodes(graph)
     eliminate_dead_nodes(graph)
+    # Add Const op for constant data nodes
+    add_constant_operations(graph)
+    shape_inference(graph)
+
 
+def graph_clean_up_tf(graph: Graph):
+    graph_clean_up(graph, ['TFCustomSubgraphCall', 'Shape'])
 
-def graph_clean_up_tf(graph: nx.MultiDiGraph):
-    graph_clean_up(graph, ['TFCustomSubgraphCall'])
 
+def graph_clean_up_onnx(graph: Graph):
+    graph_clean_up(graph, ['Shape'])
 
-def remove_identity_action(graph: nx.MultiDiGraph, matches: dict):
+
+def remove_identity_action(graph: Graph, matches: dict):
     remove_op_node_with_data_node(graph, matches['identity'])
 
 
 # TODO: unit tests
-def merge_data_nodes(graph: nx.MultiDiGraph, survived: Node, removed: Node):
-    if survived.has_and_set('is_output'):
-        graph.node[removed.id].update({'is_output': True})
+def merge_data_nodes(graph: Graph, survived: Node, removed: Node):
+    if survived.has_and_set('op') and survived.op == 'OpOutput':
+        graph.node[removed.id].update({'op': 'OpOutput'})
 
     for u, v, d in list(graph.in_edges(removed.id, data=True)):
         graph.add_edges_from([(u, survived.id, d)])
@@ -172,7 +220,7 @@ def merge_data_nodes(graph: nx.MultiDiGraph, survived: Node, removed: Node):
 
 
 # TODO: unit tests
-def remove_op_node_with_data_node(graph: nx.MultiDiGraph, node_to_remove: Node):
+def remove_op_node_with_data_node(graph: Graph, node_to_remove: Node):
     assert node_to_remove.kind == 'op'
     input_data_node = node_to_remove.in_node()
     output_node = [v for _, v in graph.out_edges(node_to_remove.id)]
@@ -190,7 +238,7 @@ def remove_op_node_with_data_node(graph: nx.MultiDiGraph, node_to_remove: Node):
     graph.remove_nodes_from([node_to_remove.id, input_data_node.id])
 
 
-def remove_op_nodes(graph: nx.MultiDiGraph, attrs: dict):
+def remove_op_nodes(graph: Graph, attrs: dict):
     op_attrs = {'kind': 'op'}
     op_attrs.update(attrs)
     apply_pattern(
@@ -201,7 +249,7 @@ def remove_op_nodes(graph: nx.MultiDiGraph, attrs: dict):
     )
 
 
-def remove_edges_for_nodes(graph: nx.MultiDiGraph, node_attrs: dict, edge_attrs: dict):
+def remove_edges_for_nodes(graph: Graph, node_attrs: dict, edge_attrs: dict):
     for node in graph.nodes():
         node = Node(graph, node)
         if all([node.has(attr) and node[attr] == node_attrs[attr] for attr in node_attrs]):
@@ -212,21 +260,3 @@ def remove_edges_for_nodes(graph: nx.MultiDiGraph, node_attrs: dict, edge_attrs:
                     graph.remove_edge(src_node.id, node.id)
 
 
-def remove_useless_split_action(graph: nx.MultiDiGraph, matches: dict):
-    split_node = matches['split']
-    input = split_node.in_node(1)
-    output = split_node.out_node()
-    graph.remove_edge(input.id, split_node.id)
-
-    for u, v, d in list(graph.out_edges(output.id, data=True)):
-        graph.add_edges_from([(input.id, v, d)])
-        graph.remove_edge(u, v)
-
-
-def remove_useless_split(graph: nx.MultiDiGraph):
-    apply_pattern(
-        graph,
-        nodes=[('split', {'kind': 'op', 'op': 'Split', 'num_split': 1})],
-        edges=[],
-        action=remove_useless_split_action
-    )
diff --git a/model-optimizer/mo/middle/passes/eliminate_test.py b/model-optimizer/mo/middle/passes/eliminate_test.py
index 79b892c88..f253dde02 100644
--- a/model-optimizer/mo/middle/passes/eliminate_test.py
+++ b/model-optimizer/mo/middle/passes/eliminate_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,9 +18,8 @@ import unittest
 
 import numpy as np
 
-from mo.graph.graph import Node, erase_node
-from mo.middle.passes.eliminate import mark_output_reachable_nodes, graph_clean_up, \
-    get_nodes_with_attributes, mark_const_producer_nodes
+from mo.graph.graph import Node, Graph
+from mo.middle.passes.eliminate import mark_output_reachable_nodes, graph_clean_up, mark_const_producer_nodes
 from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'placeholder_1': {'type': 'Placeholder', 'kind': 'op'},
@@ -38,11 +37,14 @@ nodes_attributes = {'placeholder_1': {'type': 'Placeholder', 'kind': 'op'},
                     'data_node_3': {'value': None, 'kind': 'data'},
                     'data_node_3_2': {'value': None, 'kind': 'data'},
                     'data_node_4': {'value': None, 'kind': 'data'},
-                    'data_node_5': {'value': None, 'kind': 'data'},
-                    'data_node_6': {'value': None, 'kind': 'data'},
+                    'data_node_5': {'value': None, 'shape': None, 'kind': 'data'},
+                    'data_node_6': {'value': None, 'shape': None, 'kind': 'data'},
                     'tf_call_1': {'type': 'TFCustomSubgraphCall', 'kind': 'op'},
                     'tf_call_2': {'type': 'TFCustomSubgraphCall', 'kind': 'op'},
                     'tf_call_3': {'type': 'TFCustomSubgraphCall', 'kind': 'op'},
+                    'op_output': {'kind': 'op', 'op': 'OpOutput'},
+                    'op_output_1': {'kind': 'op', 'op': 'OpOutput'},
+                    'op_output_2': {'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -63,15 +65,17 @@ class TestEliminatePass(unittest.TestCase):
                             [('placeholder_1', 'node_1'),
                              ('node_1', 'node_2'),
                              ('placeholder_1', 'node_3'),
-                             ('node_3', 'node_4')],
-                            {'node_4': {'is_output': True}},
+                             ('node_3', 'node_4'),
+                             ('node_4', 'op_output')
+                             ],
+                            {'node_4': {}},
                             nodes_with_edges_only=True)
         mark_output_reachable_nodes(graph)
 
-        self.assertListEqual(sorted(['placeholder_1', 'node_3', 'node_4']),
-                             sorted(get_nodes_with_attributes(graph, is_output_reachable=True)))
+        self.assertListEqual(sorted(['placeholder_1', 'node_3', 'op_output', 'node_4']),
+                             sorted(graph.get_nodes_with_attributes(is_output_reachable=True)))
         self.assertListEqual(sorted(['node_1', 'node_2']),
-                             sorted(get_nodes_with_attributes(graph, is_output_reachable=False)))
+                             sorted(graph.get_nodes_with_attributes(is_output_reachable=False)))
 
     def test_mark_output_unreachable_nodes_behind_output(self):
         """
@@ -86,13 +90,15 @@ class TestEliminatePass(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'node_1'),
                              ('node_1', 'node_2'),
-                             ('node_2', 'node_3')],
-                            {'node_2': {'is_output': True}},
+                             ('node_2', 'node_3'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {}},
                             nodes_with_edges_only=True)
         mark_output_reachable_nodes(graph)
 
-        self.assertListEqual(sorted(['placeholder_1', 'node_1', 'node_2']),
-                             sorted(get_nodes_with_attributes(graph, is_output_reachable=True)))
+        self.assertListEqual(sorted(['node_1', 'node_2', 'op_output', 'placeholder_1']),
+                             sorted(graph.get_nodes_with_attributes(is_output_reachable=True)))
         self.assertFalse(graph.node['node_3']['is_output_reachable'])
 
     def test_mark_ops_producing_constant_values(self):
@@ -128,16 +134,19 @@ class TestEliminatePass(unittest.TestCase):
                              ('data_node_3_2', 'node_5'),
                              ('node_5', 'data_node_5'),
                              ('data_node_3', 'node_4'),
-                             ('data_node_4', 'node_1')],
-                            {'data_node_2': {'is_output': True},
-                             'data_node_5': {'is_output': True},
+                             ('data_node_4', 'node_1'),
+                             ('data_node_2', 'op_output'),
+                             ('data_node_5', 'op_output_1')
+                             ],
+                            {'data_node_2': {},
+                             'data_node_5': {},
                              'data_node_3': {'value': np.array(1)},
                              'data_node_6': {'value': np.array(1)}},
                             nodes_with_edges_only=True)
         mark_const_producer_nodes(graph)
         self.assertTrue((graph.node['node_6']['is_const_producer']))
         self.assertListEqual(sorted(['node_1', 'node_2', 'node_3', 'node_5', 'placeholder_1']),
-                             sorted(get_nodes_with_attributes(graph, is_const_producer=False, kind='op')))
+                             sorted(graph.get_nodes_with_attributes(is_const_producer=False, kind='op')))
 
         graph_clean_up(graph)
         self.assertTrue('node_3' in graph.nodes())
@@ -166,6 +175,6 @@ class TestEliminatePass(unittest.TestCase):
                              ('node_1', 'node_2'),
                              ('node_2', 'node_3')],
                             nodes_with_edges_only=True)
-        erase_node(Node(graph, 'node_2'))
+        graph.erase_node(Node(graph, 'node_2'))
 
         self.assertListEqual(sorted(['placeholder_1', 'node_1', 'node_3']), sorted(graph.nodes()))
diff --git a/model-optimizer/mo/middle/passes/fusing/decomposition.py b/model-optimizer/mo/middle/passes/fusing/decomposition.py
index 737074fa1..cf6739d20 100644
--- a/model-optimizer/mo/middle/passes/fusing/decomposition.py
+++ b/model-optimizer/mo/middle/passes/fusing/decomposition.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ import logging as log
 import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.middle.passes.eliminate import merge_data_nodes
 from mo.middle.pattern_match import apply_pattern
 from mo.ops.lin_op import Mul, Add
@@ -27,7 +27,7 @@ from mo.ops.op import Op
 from mo.ops.reshape import Reshape
 
 
-def convert_batch_norm(graph: nx.MultiDiGraph):
+def convert_batch_norm(graph: Graph):
     """
     This function finds FusedBatchNorm layer (or BatchNorm for MXNet) and replaces with Mul->Add->Mul->Add sequence.
     """
@@ -78,7 +78,7 @@ def convert_batch_norm(graph: nx.MultiDiGraph):
             _fused_batch_norm_decomposition(graph, tinput, toutput, const, beta, scale, shift, can_be_fused)
 
 
-def _fused_batch_norm_decomposition(graph: nx.MultiDiGraph, tinput: Node, toutput: Node, gamma: Node, beta: Node,
+def _fused_batch_norm_decomposition(graph: Graph, tinput: Node, toutput: Node, gamma: Node, beta: Node,
                                     mean: np.ndarray, variance: np.ndarray, can_be_fused=True):
     """
     This is common function for TF, Caffe and MXNet
@@ -113,64 +113,108 @@ def _fused_batch_norm_decomposition(graph: nx.MultiDiGraph, tinput: Node, toutpu
         data_nodes=toutput)
 
 
-def convert_scale_shift_to_mul_add(graph: nx.MultiDiGraph):
-    nodes = [Node(graph, node) for node in graph.nodes() if Node(graph, node).soft_get('op') == 'ScaleShift']
+def convert_scale_shift_to_mul_add(graph: Graph):
+    nodes = graph.get_op_nodes(op='ScaleShift')
     for node in nodes:
         if node.soft_get('can_be_fused') is False:
             continue
 
+        ports_count = len(node.in_ports())
+
+        input_port = node.in_port(0)
+        scale_port = node.in_port(1) if ports_count > 1 and not node.in_port(1).disconnected() else None
+        shift_port = node.in_port(2) if ports_count > 2 and not node.in_port(2).disconnected() else None
+        output_port = node.out_port(0)
+
         has_biases = True
         has_weights = True
+
         # We don't need zero biases
-        if len(node.in_nodes()) < 3 or all([x == 0 for x in node.in_node(2).value]):
+        if shift_port is None or (shift_port.data.get_value() is not None and all([x == 0 for x in shift_port.data.get_value()])):
             has_biases = False
-        input_node = node.in_node(0)
-        scale_node = node.in_node(1)
-        shift_node = node.in_node(2) if has_biases else None
-        output_node = node.out_node()
 
-        if scale_node.has_valid("value") and all([x == 1 for x in scale_node.value]):
+        # We don't need weights with ones
+        if scale_port is None or (scale_port.data.get_value() is not None and all([x == 1 for x in scale_port.data.get_value()])):
             has_weights = False
 
-        mul_node = Mul(graph, dict(name=node.name + "/Mul_"))
-        add_node = Add(graph, dict(name=node.name + "/Add_"))
-
-        # Disconnect ScaleShift node
-        graph.remove_edge(input_node.id, node.id)
-        graph.remove_edge(node.id, output_node.id)
+        mul_op = Mul(graph, dict(name=node.name + "/Mul_"))
+        add_op = Add(graph, dict(name=node.name + "/Add_"))
 
         # Expand dims for current layout
-        broadcast_dims_cnt = len(input_node.shape) - 2 if graph.graph['layout'] == 'NCHW' else 0
-        if scale_node.has_valid("value"):
-            Op.expand_node_shape(scale_node, broadcast_dims_cnt)
-        else:
-            # insert reshape to make shapes similar
-            reshape_dims = np.zeros(len(input_node.shape), dtype=np.int64)
+        broadcast_dims_cnt = len(input_port.data.get_shape()) - 2 if graph.graph['layout'] == 'NCHW' else 0
+
+        # In case if we have constant weights/biases we have to broadcast them according to graph layout
+        # otherwise we insert Reshape with broadcast dim attribute.
+        def broadcast_value(port):
+            value = np.array(port.data.get_value())
+            for idx in range(broadcast_dims_cnt):
+                value = np.expand_dims(value, axis=-1)
+            port.data.set_value(value)
+
+        def broadcast_with_reshape(port):
+            input_shape = input_port.data.get_shape()
+            reshape_dims = np.zeros(len(input_shape), dtype=np.int64)
             for i in range(0, node.axis):
                 reshape_dims[i] = 1
-            for i in range(node.axis, node.axis + len(scale_node.shape)):
-                reshape_dims[i] = scale_node.shape[i-node.axis]
-            for i in range(node.axis + len(scale_node.shape), len(input_node.shape)):
+            data_shape = port.data.get_shape()
+            for i in range(node.axis, node.axis + len(data_shape)):
+                reshape_dims[i] = data_shape[i - node.axis]
+            for i in range(node.axis + len(data_shape), len(input_shape)):
                 reshape_dims[i] = 1
-            reshape = Reshape(graph, dict(name=scale_node.name+"/Broadcast_",
-                                          dim=reshape_dims))
-            scale_node = reshape.create_node_with_data(inputs=[scale_node])
+            reshape = Reshape(graph, dict(name=port.node.name + "/Broadcast_", dim=reshape_dims)).create_node()
+            port.get_connection().set_destination(reshape.in_port(0))
+            reshape.out_port(0).connect(port)
 
-        Op.expand_node_shape(shift_node, broadcast_dims_cnt)
+        if has_weights and scale_port.data.get_value() is not None:
+            broadcast_value(scale_port)
+        elif has_weights:
+            broadcast_with_reshape(scale_port)
 
-        # Connect input->mul->out->add->out
-        if has_biases:
-            add_node.create_node_with_data(
-                inputs=[mul_node.create_node_with_data(inputs=[input_node, scale_node]), shift_node],
-                data_nodes=output_node)
+        if has_biases and shift_port.data.get_value() is not None:
+            broadcast_value(shift_port)
+        elif has_biases:
+            broadcast_with_reshape(shift_port)
+
+        if has_biases and has_weights:
+            # Connect input->mul->out->add->out
+            add_node = add_op.create_node()
+            mul_node = mul_op.create_node()
+
+            # Connect Mul operation with inputs
+            input_port.get_connection().set_destination(mul_node.in_port(0))
+            scale_port.get_connection().set_destination(mul_node.in_port(1))
+
+            # Connect Add operation with inputs
+            mul_node.out_port(0).connect(add_node.in_port(0))
+            shift_port.get_connection().set_destination(add_node.in_port(1))
+
+            output_port.get_connection().set_source(add_node.out_port(0))
         elif has_weights:
-            mul_node.create_node_with_data(inputs=[input_node, scale_node], data_nodes=output_node)
+            # Connect input->mul->out
+            mul_node = mul_op.create_node()
+
+            # Connect Mul operation with inputs
+            input_port.get_connection().set_destination(mul_node.in_port(0))
+            scale_port.get_connection().set_destination(mul_node.in_port(1))
+
+            output_port.get_connection().set_source(mul_node.out_port(0))
+        elif has_biases:
+            # Connect input->add->out
+            add_node = add_op.create_node()
+
+            # Connect Add operation with inputs
+            input_port.get_connection().set_destination(add_node.in_port(0))
+            shift_port.get_connection().set_destination(add_node.in_port(1))
+
+            output_port.get_connection().set_source(add_node.out_port(0))
         else:
-            merge_data_nodes(graph, input_node, output_node)
-            graph.remove_node(output_node.id)
+            # Connect input->out
+            producer_port = input_port.get_source()
+            input_port.disconnect()
+            output_port.get_connection().set_source(producer_port)
 
 
-def _bn_to_mul_add_action(graph: nx.MultiDiGraph, match: dict):
+def _bn_to_mul_add_action(graph: Graph, match: dict):
     # Data nodes
     tinput = match['input']
     toutput = match['output']
@@ -209,7 +253,7 @@ def _bn_to_mul_add_action(graph: nx.MultiDiGraph, match: dict):
                                    data_nodes=toutput)
 
 
-def convert_bn_to_mul_add(graph: nx.MultiDiGraph):
+def convert_bn_to_mul_add(graph: Graph):
     apply_pattern(
         graph,
         nodes=[
diff --git a/model-optimizer/mo/middle/passes/fusing/decomposition_test.py b/model-optimizer/mo/middle/passes/fusing/decomposition_test.py
index 2179f217c..0fa1ff2e7 100644
--- a/model-optimizer/mo/middle/passes/fusing/decomposition_test.py
+++ b/model-optimizer/mo/middle/passes/fusing/decomposition_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -30,21 +30,27 @@ nodes_attributes = {
     'placeholder_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     # ScaleShift layer
     'scaleshift_1': {'type': 'ScaleShift', 'kind': 'op', 'op': 'ScaleShift', 'axis': 0},
+    'const_scaleshift_1_w': {'value': None, 'shape': None, 'kind': 'op'},
     'scaleshift_1_w': {'value': None, 'shape': None, 'kind': 'data'},
+    'const_scaleshift_1_b': {'value': None, 'shape': None, 'kind': 'op'},
     'scaleshift_1_b': {'value': None, 'shape': None, 'kind': 'data'},
     'scaleshift_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Mul and Add operations
     'mul_1': {'type': None, 'value': None, 'kind': 'op', 'op': 'Mul'},
+    'const_mul_1_w': {'value': None, 'shape': None, 'kind': 'op'},
     'mul_1_w': {'value': None, 'shape': None, 'kind': 'data'},
     'mul_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     'add_1': {'type': None, 'kind': 'op', 'op': 'Add'},
+    'const_add_1_w': {'value': None, 'shape': None, 'kind': 'op'},
     'add_1_w': {'value': None, 'shape': None, 'kind': 'data'},
     'add_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Mul and Add operations
     'mul_2': {'type': None, 'kind': 'op', 'op': 'Mul'},
+    'const_mul_2_w': {'value': None, 'shape': None, 'kind': 'op'},
     'mul_2_w': {'value': None, 'shape': None, 'kind': 'data'},
     'mul_2_data': {'value': None, 'shape': None, 'kind': 'data'},
     'add_2': {'type': None, 'kind': 'op', 'op': 'Add'},
+    'const_add_2_w': {'value': None, 'shape': None, 'kind': 'op'},
     'add_2_w': {'value': None, 'shape': None, 'kind': 'data'},
     'add_2_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Reshape
@@ -60,6 +66,7 @@ nodes_attributes = {
     # Concat1 operation
     'concat': {'type': 'Concat', 'kind': 'op', 'op': 'Concat'},
     'concat_data': {'value': None, 'shape': None, 'kind': 'data'},
+    'op_output': {'kind': 'op', 'op': 'OpOutput'}
 }
 
 
@@ -69,30 +76,35 @@ class ScaleShiftToMulAdd(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'scaleshift_1'),
+                             ('const_scaleshift_1_w', 'scaleshift_1_w'),
                              ('scaleshift_1_w', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
+                             ('scaleshift_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
-                             'scaleshift_1_data': {'is_output': True}
+                             'scaleshift_1_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'scaleshift_1_data'),
+                                 ('scaleshift_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_1': {'can_be_fused': True},
-                                 'scaleshift_1_data': {'is_output': True}
+                                 'scaleshift_1_data': {}
                                  })
 
         graph.graph['layout'] = 'NHWC'
         convert_scale_shift_to_mul_add(graph)
         graph_clean_up(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, 'scaleshift_1_data')
+        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1')
         self.assertTrue(flag, resp)
 
     # ScaleShift  2 inputs-> Mul
@@ -103,10 +115,11 @@ class ScaleShiftToMulAdd(unittest.TestCase):
                              ('placeholder_1_data', 'scaleshift_1'),
                              ('placeholder_2_data', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
+                             ('scaleshift_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'placeholder_2_data': {'shape': np.array([1, 227])},
-                             'scaleshift_1_data': {'is_output': True}
+                             'scaleshift_1_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
@@ -117,19 +130,20 @@ class ScaleShiftToMulAdd(unittest.TestCase):
                                  ('placeholder_1_data', 'mul_1'),
                                  ('placeholder_2/Reshape_data', 'mul_1'),
                                  ('mul_1', 'scaleshift_1_data'),
+                                 ('scaleshift_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'placeholder_2_data': {'shape': np.array([1, 227])},
                                  'placeholder_2/Reshape_': {'dim': np.array([1, 227, 1, 1])},
                                  'placeholder_2/Reshape_data': {'shape': np.array([1, 227, 1, 1])},
                                  'mul_1': {'can_be_fused': True},
-                                 'scaleshift_1_data': {'is_output': True}
+                                 'scaleshift_1_data': {}
                                  })
 
         graph.graph['layout'] = 'NHWC'
         convert_scale_shift_to_mul_add(graph)
         graph_clean_up(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, 'scaleshift_1_data')
+        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1')
         self.assertTrue(flag, resp)
 
     # ScaleShift  2 inputs-> Mul (axis = 1)
@@ -140,11 +154,12 @@ class ScaleShiftToMulAdd(unittest.TestCase):
                              ('placeholder_1_data', 'scaleshift_1'),
                              ('placeholder_2_data', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
+                             ('scaleshift_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'placeholder_2_data': {'shape': np.array([227])},
                              'scaleshift_1': {'axis': 1},
-                             'scaleshift_1_data': {'is_output': True}
+                             'scaleshift_1_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
@@ -155,53 +170,59 @@ class ScaleShiftToMulAdd(unittest.TestCase):
                                  ('placeholder_1_data', 'mul_1'),
                                  ('placeholder_2/Reshape_data', 'mul_1'),
                                  ('mul_1', 'scaleshift_1_data'),
+                                 ('scaleshift_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'placeholder_2_data': {'shape': np.array([227])},
                                  'placeholder_2/Reshape_': {'dim': np.array([1, 227, 1, 1])},
                                  'placeholder_2/Reshape_data': {'shape': np.array([1, 227, 1, 1])},
                                  'mul_1': {'can_be_fused': True},
-                                 'scaleshift_1_data': {'is_output': True}
+                                 'scaleshift_1_data': {}
                                  })
 
         graph.graph['layout'] = 'NHWC'
         convert_scale_shift_to_mul_add(graph)
         graph_clean_up(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, 'scaleshift_1_data')
+        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1')
         self.assertTrue(flag, resp)
 
-
     # ScaleShift -> Mul (Zero biases)
     def test_scaleshift_to_mul_2(self):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'scaleshift_1'),
+                             ('const_scaleshift_1_w', 'scaleshift_1_w'),
+                             ('const_scaleshift_1_b', 'scaleshift_1_b'),
                              ('scaleshift_1_w', 'scaleshift_1'),
                              ('scaleshift_1_b', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
+                             ('scaleshift_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([0, 0, 0])},
-                             'scaleshift_1_data': {'is_output': True}
+                             'scaleshift_1_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'scaleshift_1_data'),
+                                 ('scaleshift_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_1': {'can_be_fused': True},
-                                 'scaleshift_1_data': {'is_output': True}
+                                 'scaleshift_1_data': {}
                                  })
 
         graph.graph['layout'] = 'NHWC'
         convert_scale_shift_to_mul_add(graph)
         graph_clean_up(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, 'scaleshift_1_data')
+        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1')
         self.assertTrue(flag, resp)
 
     # ScaleShift -> Mul->Add
@@ -209,38 +230,46 @@ class ScaleShiftToMulAdd(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'scaleshift_1'),
+                             ('const_scaleshift_1_w', 'scaleshift_1_w'),
+                             ('const_scaleshift_1_b', 'scaleshift_1_b'),
                              ('scaleshift_1_w', 'scaleshift_1'),
                              ('scaleshift_1_b', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
+                             ('scaleshift_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([3, 2, 1])},
-                             'scaleshift_1_data': {'is_output': True}
+                             'scaleshift_1_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'mul_1_data'),
                                  ('mul_1_data', 'add_1'),
+                                 ('const_add_1_w', 'add_1_w'),
                                  ('add_1_w', 'add_1'),
                                  ('add_1', 'scaleshift_1_data'),
+                                 ('scaleshift_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                                 'const_add_1_w': {'shape': np.array([3]), 'value': np.array([3, 2, 1])},
                                  'add_1_w': {'shape': np.array([3]), 'value': np.array([3, 2, 1])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'add_1': {'can_be_fused': True},
                                  'mul_1': {'can_be_fused': True},
-                                 'scaleshift_1_data': {'is_output': True}
+                                 'scaleshift_1_data': {}
                                  })
 
         graph.graph['layout'] = 'NHWC'
         convert_scale_shift_to_mul_add(graph)
         graph_clean_up(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, 'scaleshift_1_data')
+        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1')
         self.assertTrue(flag, resp)
 
     # ScaleShift -> None (Zero weights and biases)
@@ -248,24 +277,30 @@ class ScaleShiftToMulAdd(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'scaleshift_1'),
+                             ('const_scaleshift_1_w', 'scaleshift_1_w'),
+                             ('const_scaleshift_1_b', 'scaleshift_1_b'),
                              ('scaleshift_1_w', 'scaleshift_1'),
                              ('scaleshift_1_b', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
+                             ('scaleshift_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 1, 1])},
                              'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([0, 0, 0])},
-                             'scaleshift_1_data': {'shape': np.array([1, 227, 227, 3]), 'is_output': True}
-                             })
+                             'scaleshift_1_data': {'shape': np.array([1, 227, 227, 3])}
+                             }, nodes_with_edges_only=True)
 
         graph_ref = build_graph(nodes_attributes,
-                                [('placeholder_1', 'placeholder_1_data')],
-                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3]), 'is_output': True}})
+                                [('placeholder_1', 'placeholder_1_data'),
+                                 ('placeholder_1_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])}}
+                                ,nodes_with_edges_only=True)
 
         graph.graph['layout'] = 'NHWC'
         convert_scale_shift_to_mul_add(graph)
         graph_clean_up(graph)
-        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1_data')
+        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1')
         self.assertTrue(flag, resp)
 
     # ScaleShift -> ScaleShift (can_be_fused=False)
@@ -273,29 +308,37 @@ class ScaleShiftToMulAdd(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'scaleshift_1'),
+                             ('const_scaleshift_1_w', 'scaleshift_1_w'),
+                             ('const_scaleshift_1_b', 'scaleshift_1_b'),
                              ('scaleshift_1_w', 'scaleshift_1'),
                              ('scaleshift_1_b', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
+                             ('scaleshift_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 1, 1])},
                              'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([0, 0, 0])},
                              'scaleshift_1': {'can_be_fused': False},
-                             'scaleshift_1_data': {'shape': np.array([1, 227, 227, 3]), 'is_output': True}
+                             'scaleshift_1_data': {'shape': np.array([1, 227, 227, 3])}
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'scaleshift_1'),
+                                 ('const_scaleshift_1_w', 'scaleshift_1_w'),
+                                 ('const_scaleshift_1_b', 'scaleshift_1_b'),
                                  ('scaleshift_1_w', 'scaleshift_1'),
                                  ('scaleshift_1_b', 'scaleshift_1'),
                                  ('scaleshift_1', 'scaleshift_1_data'),
+                                 ('scaleshift_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 1, 1])},
                                  'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array([1, 1, 1])},
+                                 'const_scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([0, 0, 0])},
                                  'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([0, 0, 0])},
                                  'scaleshift_1': {'can_be_fused': False},
-                                 'scaleshift_1_data': {'shape': np.array([1, 227, 227, 3]), 'is_output': True}
+                                 'scaleshift_1_data': {'shape': np.array([1, 227, 227, 3])}
                                  })
 
         convert_scale_shift_to_mul_add(graph)
@@ -316,7 +359,8 @@ class BatchNormDecomposition(unittest.TestCase):
                              ('bn_var', 'bn_op'),
                              ('bn_op', 'bn_data'),
                              ('concat', 'concat_data'),
-                             ('bn_data', 'concat')
+                             ('bn_data', 'concat'),
+                             ('concat_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'bn_op': {'eps': 1.2},
@@ -325,39 +369,50 @@ class BatchNormDecomposition(unittest.TestCase):
                              'bn_mean': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'bn_var': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'bn_data': {'shape': np.array([1, 227, 227, 3])},
-                             'concat_data': {'is_output': True}
+                             'concat_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'mul_1_data'),
                                  ('mul_1_data', 'add_1'),
+                                 ('const_add_1_w', 'add_1_w'),
                                  ('add_1_w', 'add_1'),
                                  ('add_1', 'add_1_data'),
                                  ('add_1_data', 'mul_2'),
+                                 ('const_mul_2_w', 'mul_2_w'),
                                  ('mul_2_w', 'mul_2'),
                                  ('mul_2', 'mul_2_data'),
                                  ('mul_2_data', 'add_2'),
+                                 ('const_add_2_w', 'add_2_w'),
                                  ('add_2_w', 'add_2'),
                                  ('add_2', 'add_2_data'),
                                  ('concat', 'concat_data'),
-                                 ('add_2_data', 'concat')
+                                 ('add_2_data', 'concat'),
+                                 ('concat_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array([3]),
+                                             'value': np.array([0.67419986, 0.55901699, 0.48795004])},
                                  'mul_1_w': {'shape': np.array([3]),
                                              'value': np.array([0.67419986, 0.55901699, 0.48795004])},
+                                 'const_mul_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                                 'const_add_1_w': {'shape': np.array([3]),
+                                             'value': np.array([-0.67419986, -1.11803399, -1.46385011])},
                                  'add_1_w': {'shape': np.array([3]),
                                              'value': np.array([-0.67419986, -1.11803399, -1.46385011])},
+                                 'const_add_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'add_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'add_2_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1': {'can_be_fused': True},
                                  'mul_2': {'can_be_fused': True},
                                  'add_1': {'can_be_fused': True},
                                  'add_2': {'can_be_fused': True},
-                                 'concat_data': {'is_output': True}
+                                 'concat_data': {}
                                  })
 
         graph.graph['layout'] = 'NHWC'
@@ -378,7 +433,8 @@ class BatchNormDecomposition(unittest.TestCase):
                              ('bn_var', 'bn_op'),
                              ('bn_op', 'bn_data'),
                              ('concat', 'concat_data'),
-                             ('bn_data', 'concat')
+                             ('bn_data', 'concat'),
+                             ('concat_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'bn_op': {'eps': 1.2, 'can_be_fused': False},
@@ -387,39 +443,50 @@ class BatchNormDecomposition(unittest.TestCase):
                              'bn_mean': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'bn_var': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'bn_data': {'shape': np.array([1, 227, 227, 3])},
-                             'concat_data': {'is_output': True}
+                             'concat_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'mul_1_data'),
                                  ('mul_1_data', 'add_1'),
+                                 ('const_add_1_w', 'add_1_w'),
                                  ('add_1_w', 'add_1'),
                                  ('add_1', 'add_1_data'),
                                  ('add_1_data', 'mul_2'),
+                                 ('const_mul_2_w', 'mul_2_w'),
                                  ('mul_2_w', 'mul_2'),
                                  ('mul_2', 'mul_2_data'),
                                  ('mul_2_data', 'add_2'),
+                                 ('const_add_2_w', 'add_2_w'),
                                  ('add_2_w', 'add_2'),
                                  ('add_2', 'add_2_data'),
                                  ('concat', 'concat_data'),
-                                 ('add_2_data', 'concat')
+                                 ('add_2_data', 'concat'),
+                                 ('concat_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array([3]),
+                                             'value': np.array([0.67419986, 0.55901699, 0.48795004])},
                                  'mul_1_w': {'shape': np.array([3]),
                                              'value': np.array([0.67419986, 0.55901699, 0.48795004])},
+                                 'const_mul_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                                 'const_add_1_w': {'shape': np.array([3]),
+                                             'value': np.array([-0.67419986, -1.11803399, -1.46385011])},
                                  'add_1_w': {'shape': np.array([3]),
                                              'value': np.array([-0.67419986, -1.11803399, -1.46385011])},
+                                 'const_add_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'add_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'add_2_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1': {'can_be_fused': False},
                                  'mul_2': {'can_be_fused': False},
                                  'add_1': {'can_be_fused': False},
                                  'add_2': {'can_be_fused': False},
-                                 'concat_data': {'is_output': True}
+                                 'concat_data': {}
                                  })
 
         graph.graph['layout'] = 'NHWC'
@@ -437,14 +504,15 @@ class BatchNormDecomposition(unittest.TestCase):
                              ('bn_var', 'bn_op'),
                              ('bn_op', 'bn_data'),
                              ('concat', 'concat_data'),
-                             ('bn_data', 'concat')
+                             ('bn_data', 'concat'),
+                             ('concat_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'bn_op': {'epsilon': 1.2, 'op': 'BatchNormalization'},
                              'bn_mean': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'bn_var': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'bn_data': {'shape': np.array([1, 227, 227, 3])},
-                             'concat_data': {'is_output': True}
+                             'concat_data': {}
                              })
 
         del graph['placeholder_1']['placeholder_1_data'][0]['in']
@@ -453,23 +521,30 @@ class BatchNormDecomposition(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'mul_1_data'),
                                  ('mul_1_data', 'add_1'),
+                                 ('const_add_1_w', 'add_1_w'),
                                  ('add_1_w', 'add_1'),
                                  ('add_1', 'add_1_data'),
                                  ('concat', 'concat_data'),
-                                 ('add_1_data', 'concat')
+                                 ('add_1_data', 'concat'),
+                                 ('concat_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array([3]),
+                                             'value': np.array([0.67419986, 0.55901699, 0.48795004])},
                                  'mul_1_w': {'shape': np.array([3]),
                                              'value': np.array([0.67419986, 0.55901699, 0.48795004])},
+                                 'const_add_1_w': {'shape': np.array([3]),
+                                             'value': np.array([-0.67419986, -1.11803399, -1.46385011])},
                                  'add_1_w': {'shape': np.array([3]),
                                              'value': np.array([-0.67419986, -1.11803399, -1.46385011])},
                                  'add_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1': {'can_be_fused': True},
                                  'add_1': {'can_be_fused': True},
-                                 'concat_data': {'is_output': True}
+                                 'concat_data': {}
                                  })
 
         graph.graph['layout'] = 'NHWC'
@@ -488,14 +563,15 @@ class BatchNormDecomposition(unittest.TestCase):
                              ('bn_var', 'bn_op'),
                              ('bn_op', 'bn_data'),
                              ('concat', 'concat_data'),
-                             ('bn_data', 'concat')
+                             ('bn_data', 'concat'),
+                             ('concat_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'bn_op': {'epsilon': 1.2, 'op': 'BatchNormalization', 'can_be_fused': False},
                              'bn_mean': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'bn_var': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'bn_data': {'shape': np.array([1, 227, 227, 3])},
-                             'concat_data': {'is_output': True}
+                             'concat_data': {}
                              })
 
         del graph['placeholder_1']['placeholder_1_data'][0]['in']
@@ -504,23 +580,30 @@ class BatchNormDecomposition(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'mul_1_data'),
                                  ('mul_1_data', 'add_1'),
+                                 ('const_add_1_w', 'add_1_w'),
                                  ('add_1_w', 'add_1'),
                                  ('add_1', 'add_1_data'),
                                  ('concat', 'concat_data'),
-                                 ('add_1_data', 'concat')
+                                 ('add_1_data', 'concat'),
+                                 ('concat_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array([3]),
+                                             'value': np.array([0.67419986, 0.55901699, 0.48795004])},
                                  'mul_1_w': {'shape': np.array([3]),
                                              'value': np.array([0.67419986, 0.55901699, 0.48795004])},
+                                 'const_add_1_w': {'shape': np.array([3]),
+                                             'value': np.array([-0.67419986, -1.11803399, -1.46385011])},
                                  'add_1_w': {'shape': np.array([3]),
                                              'value': np.array([-0.67419986, -1.11803399, -1.46385011])},
                                  'add_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1': {'can_be_fused': False},
                                  'add_1': {'can_be_fused': False},
-                                 'concat_data': {'is_output': True}
+                                 'concat_data': {}
                                  })
 
         graph.graph['layout'] = 'NHWC'
diff --git a/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py b/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py
index 976dcb577..ee66dda02 100644
--- a/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_grouped_conv.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -21,14 +21,14 @@ import networkx as nx
 import numpy as np
 
 from mo.front.extractor import add_attrs_props
-from mo.graph.graph import Node, unique_id, get_inputs
+from mo.graph.graph import Node, Graph
 from mo.middle.passes.eliminate import graph_clean_up
 from mo.utils.graph import pseudo_topological_sort
 from mo.middle.passes.fusing.helpers import get_next_operation, get_tensor_id
 
 
 # TODO: unit tests
-def concat_convolutions(graph: nx.MultiDiGraph, start_node: Node, last_node: Node):
+def concat_convolutions(graph: Graph, start_node: Node, last_node: Node):
     """
     This function converts group of convolutions into one
     """
@@ -130,10 +130,10 @@ def concat_convolutions(graph: nx.MultiDiGraph, start_node: Node, last_node: Nod
 
 
 # TODO: unit tests
-def grouped_convolutions_fusing(graph: nx.MultiDiGraph):
+def grouped_convolutions_fusing(graph: Graph):
     while True:
         is_fused = False
-        graph_clean_up(graph, ['TFCustomSubgraphCall'])
+        graph_clean_up(graph, ['TFCustomSubgraphCall', 'Shape'])
         nodes = pseudo_topological_sort(graph)
         for idx in nodes:
             node = Node(graph, idx)
diff --git a/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops.py b/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops.py
index ade7a3cf3..9700a3eb2 100644
--- a/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,20 +17,19 @@
 import logging as log
 from collections import deque
 
-import networkx as nx
 import numpy as np
 
+from mo.front.common.partial_infer.utils import int64_array
 from mo.front.extractor import add_attrs_props
-from mo.graph.graph import Node, unique_id
+from mo.graph.graph import Node, Graph
 from mo.middle.passes.eliminate import graph_clean_up
 from mo.utils.graph import pseudo_topological_sort
 from mo.ops.lin_op import Mul, Add
 from mo.ops.op import Op
-from mo.graph.graph import dump_graph_for_graphviz
 from mo.middle.passes.fusing.helpers import backward_bfs, forward_bfs, get_tensor_id, get_value_id
 
 
-def _fuse_mul(graph: nx.MultiDiGraph, node: Node, fuse_nodes: list, backward: bool = True):
+def _fuse_mul(graph: Graph, node: Node, fuse_nodes: list, backward: bool = True):
     """
     This function takes Mul node and array of convolution/fc nodes for further fusion
     Parameters
@@ -143,7 +142,7 @@ def _fuse_mul(graph: nx.MultiDiGraph, node: Node, fuse_nodes: list, backward: bo
     return is_fused
 
 
-def _fuse_add(graph: nx.MultiDiGraph, node: Node, fuse_nodes: list, backward: bool = True):
+def _fuse_add(graph: Graph, node: Node, fuse_nodes: list, backward: bool = True):
     """
     This function takes Add node and Convolution/FC nodes for further fusion and then deletes Add node
     In case if Convolution/FC Bias absence it will be created
@@ -188,7 +187,7 @@ def _fuse_add(graph: nx.MultiDiGraph, node: Node, fuse_nodes: list, backward: bo
 
         # Create BIAS data node if not exists
         if len(fuse_node.in_nodes()) <= 2:
-            bias_data = unique_id(graph, "bias_data")
+            bias_data = graph.unique_id("bias_data")
             data_type = fuse_node.in_node(1).data_type
             # Broadcast if scalar
             if value.size == 1:
@@ -199,7 +198,7 @@ def _fuse_add(graph: nx.MultiDiGraph, node: Node, fuse_nodes: list, backward: bo
             if not backward:
                 value = np.dot(fuse_node.in_node(1).value, value)
 
-            shape = value.shape
+            shape = int64_array(value.shape)
 
             graph.add_node(bias_data, **add_attrs_props(
                 dict(kind='data', precision="FP32", name=bias_data, value=value, shape=shape, data_type=data_type)))
@@ -235,7 +234,7 @@ def _fuse_add(graph: nx.MultiDiGraph, node: Node, fuse_nodes: list, backward: bo
     return is_fused
 
 
-def fuse_linear_ops(graph: nx.MultiDiGraph):
+def fuse_linear_ops(graph: Graph):
     """
     This function makes fusing of linear operations (Mul,Add) to Convolution/FC.
     """
diff --git a/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops_test.py b/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops_test.py
index 30948e25e..a73bdd4de 100644
--- a/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops_test.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_linear_ops_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -30,20 +30,26 @@ nodes_attributes = {
     'scaleshift_1': {'type': 'ScaleShift', 'kind': 'op', 'op': 'ScaleShift'},
     'scaleshift_1_w': {'value': None, 'shape': None, 'kind': 'data'},
     'scaleshift_1_b': {'value': None, 'shape': None, 'kind': 'data'},
+    'const_scaleshift_1_w': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
+    'const_scaleshift_1_b': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
     'scaleshift_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Mul and Add operations
     'mul_1': {'type': 'Mul', 'kind': 'op', 'op': 'Mul', 'can_be_fused': True},
     'mul_1_w': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'const_mul_1_w': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
     'mul_1_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     'add_1': {'type': 'Add', 'kind': 'op', 'op': 'Add', 'can_be_fused': True},
     'add_1_w': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'const_add_1_w': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
     'add_1_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     # Mul2 and Add2 operations
     'mul_2': {'type': 'Mul', 'kind': 'op', 'op': 'Mul', 'can_be_fused': True},
     'mul_2_w': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'const_mul_2_w': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
     'mul_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     'add_2': {'type': 'Add', 'kind': 'op', 'op': 'Add', 'can_be_fused': True},
     'add_2_w': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'const_add_2_w': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
     'add_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     # Concat1 operation
     'concat_1': {'type': 'Concat', 'kind': 'op', 'op': 'Concat'},
@@ -52,21 +58,30 @@ nodes_attributes = {
     'conv_1': {'type': 'Convolution', 'kind': 'op', 'op': 'Conv2D', 'layout': 'NHWC'},
     'conv_1_w': {'value': None, 'shape': None, 'kind': 'data'},
     'conv_1_b': {'value': None, 'shape': None, 'kind': 'data'},
+    'const_conv_1_w': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
+    'const_conv_1_b': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
     'conv_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     'conv_2': {'type': 'Convolution', 'kind': 'op', 'op': 'Conv2D', 'layout': 'NHWC'},
     'conv_2_w': {'value': None, 'shape': None, 'kind': 'data'},
     'conv_2_b': {'value': None, 'shape': None, 'kind': 'data'},
+    'const_conv_2_w': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
+    'const_conv_2_b': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
     'conv_2_data': {'value': None, 'shape': None, 'kind': 'data'},
     # FullyConnected
     'fc_1': {'type': 'FullyConnected', 'kind': 'op', 'op': 'InnerProduct', 'layout': 'NHWC'},
     'fc_1_w': {'value': None, 'shape': None, 'kind': 'data'},
     'fc_1_b': {'value': None, 'shape': None, 'kind': 'data'},
+    'const_fc_1_w': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
+    'const_fc_1_b': {'value': None, 'shape': None, 'kind': 'op', 'data_type': None},
     'fc_1_data': {'value': None, 'shape': None, 'kind': 'data'},
     # Placeholders
     'placeholder_2': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
     'placeholder_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     'placeholder_3': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
     'placeholder_3_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'op_output': {'kind': 'op', 'op': 'OpOutput'},
+    'op_output_1': {'kind': 'op', 'op': 'OpOutput'},
+    'op_output_2': {'kind': 'op', 'op': 'OpOutput'}
 }
 
 
@@ -78,37 +93,49 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
+                             ('conv_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
-                             'conv_1_data': {'is_output': True}
+                             'conv_1_data': {}
                              })
         ref_weights = np.ones((11, 11, 3, 96)) * np.reshape(np.array([1, 2, 3]), (3, 1))
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
+                                 ('conv_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
-                                 'conv_1_data': {'is_output': True}
+                                 'conv_1_data': {}
                                  })
 
         _fuse_mul(graph, Node(graph, 'mul_1'), [Node(graph, 'conv_1')], backward=False)
@@ -123,37 +150,49 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
+                             ('conv_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
-                             'conv_1_data': {'is_output': True}
+                             'conv_1_data': {}
                              })
         ref_weights = np.ones((11, 11, 3, 96)) * np.reshape(np.array([6, 6, 6]), (3, 1))
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
+                                 ('conv_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
-                                 'conv_1_data': {'is_output': True}
+                                 'conv_1_data': {}
                                  })
 
         _fuse_mul(graph, Node(graph, 'mul_1'), [Node(graph, 'conv_1')], backward=False)
@@ -168,20 +207,27 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
                              ('conv_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
+                             ('mul_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.ones(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.ones(96)},
                              'conv_1_data': {'shape': np.array([1, 55, 55, 96])},
-                             'mul_1_data': {'shape': np.array([1, 55, 55, 96]), 'is_output': True},
+                             'mul_1_data': {'shape': np.array([1, 55, 55, 96])},
+                             'const_mul_1_w': {'shape': np.array([96]), 'value': np.array([x for x in range(96)])},
                              'mul_1_w': {'shape': np.array([96]), 'value': np.array([x for x in range(96)])},
                              })
         ref_weights = np.ones((11, 11, 3, 96)) * np.reshape(np.array([x for x in range(96)]), 96)
@@ -190,16 +236,21 @@ class FuseMulTests(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
+                                 ('conv_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
                                  'conv_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
-                                 'conv_1_data': {'shape': np.array([1, 55, 55, 96]), 'is_output': True}
+                                 'conv_1_data': {'shape': np.array([1, 55, 55, 96])}
                                  })
 
         _fuse_mul(graph, Node(graph, 'mul_1'), [Node(graph, 'conv_1')], backward=True)
@@ -214,20 +265,27 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
                              ('conv_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
+                             ('mul_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.ones(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.ones(96)},
                              'conv_1_data': {'shape': np.array([1, 55, 55, 96])},
-                             'mul_1_data': {'shape': np.array([1, 55, 55, 96]), 'is_output': True},
+                             'mul_1_data': {'shape': np.array([1, 55, 55, 96])},
+                             'const_mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              })
         ref_weights = np.ones((11, 11, 3, 96)) * np.array([6])
@@ -236,16 +294,21 @@ class FuseMulTests(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
+                                 ('conv_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
                                  'conv_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
-                                 'conv_1_data': {'shape': np.array([1, 55, 55, 96]), 'is_output': True}
+                                 'conv_1_data': {'shape': np.array([1, 55, 55, 96])}
                                  })
 
         _fuse_mul(graph, Node(graph, 'mul_1'), [Node(graph, 'conv_1')], backward=True)
@@ -262,9 +325,12 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
@@ -276,21 +342,28 @@ class FuseMulTests(unittest.TestCase):
                              ('placeholder_3_data', 'concat_1'),
                              ('conv_1_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
+
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
-                             'concat_1_data': {'is_output': True}
+                             'concat_1_data': {}
                              })
         ref_weights = np.ones((11, 11, 3, 96)) * np.reshape(np.array([6, 6, 6]), (3, 1))
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
@@ -302,15 +375,18 @@ class FuseMulTests(unittest.TestCase):
                                  ('placeholder_3_data', 'concat_1'),
                                  ('conv_1_data', 'concat_1'),
                                  ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output'),
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 3,
                                               'input_channel_dim': 2, 'dims_number': 4},
+                                 'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
-                                 'conv_1_data': {'is_output': True},
-                                 'placeholder_2_data': {'is_output': True},
-                                 'placeholder_3_data': {'is_output': True},
+                                 'conv_1_data': {},
+                                 'placeholder_2_data': {},
+                                 'placeholder_3_data': {},
                                  })
 
         _fuse_mul(graph, Node(graph, 'mul_1'), [Node(graph, 'conv_1')], backward=False)
@@ -323,9 +399,12 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
@@ -337,21 +416,28 @@ class FuseMulTests(unittest.TestCase):
                              ('placeholder_3_data', 'concat_1'),
                              ('conv_1_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output'),
+
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array([1]), 'value': np.array([6])},
                              'mul_1_w': {'shape': np.array([1]), 'value': np.array([6])},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
-                             'concat_1_data': {'is_output': True}
+                             'concat_1_data': {}
                              })
         ref_weights = np.ones((11, 11, 3, 96)) * np.reshape(np.array([6, 6, 6]), (3, 1))
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
@@ -363,15 +449,18 @@ class FuseMulTests(unittest.TestCase):
                                  ('placeholder_3_data', 'concat_1'),
                                  ('conv_1_data', 'concat_1'),
                                  ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output'),
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 3,
                                               'input_channel_dim': 2, 'dims_number': 4},
+                                 'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
-                                 'conv_1_data': {'is_output': True},
-                                 'placeholder_2_data': {'is_output': True},
-                                 'placeholder_3_data': {'is_output': True},
+                                 'conv_1_data': {},
+                                 'placeholder_2_data': {},
+                                 'placeholder_3_data': {},
                                  })
 
         _fuse_mul(graph, Node(graph, 'mul_1'), [Node(graph, 'conv_1')], backward=False)
@@ -387,60 +476,80 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
                              ('mul_1_data', 'conv_2'),
+                             ('const_conv_2_w', 'conv_2_w'),
+                             ('const_conv_2_b', 'conv_2_b'),
                              ('conv_2_w', 'conv_2'),
                              ('conv_2_b', 'conv_2'),
                              ('conv_2', 'conv_2_data'),
                              ('conv_1_data', 'concat_1'),
                              ('conv_2_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_data': {'shape': np.array([1, 55, 55, 96])},
+                             'const_conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_2_data': {'shape': np.array([1, 55, 55, 96])},
-                             'concat_1_data': {'is_output': True}
+                             'concat_1_data': {}
                              })
         ref_weights = np.ones((11, 11, 3, 96)) * np.reshape(np.array([1, 2, 3]), (3, 1))
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
                                  ('placeholder_1_data', 'conv_2'),
+                                 ('const_conv_2_w', 'conv_2_w'),
+                                 ('const_conv_2_b', 'conv_2_b'),
                                  ('conv_2_w', 'conv_2'),
                                  ('conv_2_b', 'conv_2'),
                                  ('conv_2', 'conv_2_data'),
                                  ('conv_1_data', 'concat_1'),
                                  ('conv_2_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_data': {'shape': np.array([1, 55, 55, 96])},
+                                 'const_conv_2_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_2_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_2_data': {'shape': np.array([1, 55, 55, 96])},
                                  })
@@ -457,37 +566,50 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'fc_1'),
+                             ('const_fc_1_w', 'fc_1_w'),
+                             ('const_fc_1_b', 'fc_1_b'),
                              ('fc_1_w', 'fc_1'),
                              ('fc_1_b', 'fc_1'),
                              ('fc_1', 'fc_1_data'),
+                             ('fc_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 2048])},
                              'mul_1_data': {'shape': np.array([1, 2048])},
+                             'const_mul_1_w': {'shape': np.array([2048]), 'value': np.array([x for x in range(2048)])},
                              'mul_1_w': {'shape': np.array([2048]), 'value': np.array([x for x in range(2048)])},
+                             'const_fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048))},
                              'fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048)),
                                         'output_channel_dim': 0, 'input_channel_dim': 1,
                                         'dims_number': 2},
+                             'const_fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
                              'fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
-                             'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                             'fc_1_data': {'shape': np.array([1, 10260])},
                              })
         ref_weights = np.ones((10260, 2048)) * np.array([x for x in range(2048)])
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'fc_1'),
+                                 ('const_fc_1_w', 'fc_1_w'),
+                                 ('const_fc_1_b', 'fc_1_b'),
                                  ('fc_1_w', 'fc_1'),
                                  ('fc_1_b', 'fc_1'),
                                  ('fc_1', 'fc_1_data'),
+                                 ('fc_1_data', 'op_output')
+
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 2048])},
+                                 'const_fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                             'output_channel_dim': 0, 'input_channel_dim': 1,
                                             'dims_number': 2},
+                                 'const_fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
                                  'fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
-                                 'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                                 'fc_1_data': {'shape': np.array([1, 10260])},
                                  })
 
         _fuse_mul(graph, Node(graph, 'mul_1'), [Node(graph, 'fc_1')], backward=False)
@@ -502,43 +624,57 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
+                             ('conv_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
-                             'mul_1_w': {'shape': np.array([1]), 'value': 6},
+                             'const_mul_1_w': {'shape': np.array([]), 'value': np.array(6)},
+                             'mul_1_w': {'shape': np.array([]), 'value': np.array(6)},
                              'conv_1': {'can_be_fused': False},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
-                             'conv_1_data': {'is_output': True}
+                             'conv_1_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'mul_1_data'),
                                  ('mul_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
+                                 ('conv_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
-                                 'mul_1_w': {'shape': np.array([1]), 'value': 6},
+                                 'const_mul_1_w': {'shape': np.array([]), 'value': np.array(6)},
+                                 'mul_1_w': {'shape': np.array([]), 'value': np.array(6)},
                                  'conv_1': {'can_be_fused': False},
+                                 'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                                  'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
-                                 'conv_1_data': {'is_output': True}
+                                 'conv_1_data': {}
                                  })
 
         _fuse_mul(graph, Node(graph, 'mul_1'), [Node(graph, 'conv_1')], backward=False)
@@ -553,33 +689,41 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
+                             ('conv_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 112, 112, 6])},
                              'mul_1_data': {'shape': np.array([1, 112, 112, 6])},
+                             'const_mul_1_w': {'shape': np.array([6]), 'value': np.array([1, 2, 3, 4, 5, 6])},
                              'mul_1_w': {'shape': np.array([6]), 'value': np.array([1, 2, 3, 4, 5, 6])},
+                             'const_conv_1_w': {'shape': np.array([3, 3, 6, 1]), 'value': np.ones((3, 3, 6, 1))},
                              'conv_1_w': {'shape': np.array([3, 3, 6, 1]), 'value': np.ones((3, 3, 6, 1)),
                                           'output_channel_dim': 2, 'input_channel_dim': 2,
                                           'dims_number': 4},
-                             'conv_1_data': {'is_output': True}
+                             'conv_1_data': {}
                              })
         ref_weights = np.ones((3, 3, 6, 1)) * np.reshape(np.array([1, 2, 3, 4, 5, 6]), (6, 1))
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
+                                 ('conv_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 112, 112, 6])},
+                                 'const_conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 2, 'input_channel_dim': 2,
                                               'dims_number': 4},
-                                 'conv_1_data': {'is_output': True}
+                                 'conv_1_data': {}
                                  })
 
         _fuse_mul(graph, Node(graph, 'mul_1'), [Node(graph, 'conv_1')], backward=False)
@@ -594,19 +738,24 @@ class FuseMulTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
                              ('conv_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
+                             ('mul_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 112, 112, 6])},
-                             'mul_1_data': {'shape': np.array([1, 112, 112, 6]), 'is_output': True},
+                             'mul_1_data': {'shape': np.array([1, 112, 112, 6])},
+                             'const_mul_1_w': {'shape': np.array([6]), 'value': np.array([1, 2, 3, 4, 5, 6])},
                              'mul_1_w': {'shape': np.array([6]), 'value': np.array([1, 2, 3, 4, 5, 6])},
+                             'const_conv_1_w': {'shape': np.array([3, 3, 6, 1]), 'value': np.ones((3, 3, 6, 1))},
                              'conv_1_w': {'shape': np.array([3, 3, 6, 1]), 'value': np.ones((3, 3, 6, 1)),
                                           'output_channel_dim': 2, 'input_channel_dim': 2,
                                           'dims_number': 4},
-                             'conv_1_data': {'is_output': True}
+                             'conv_1_data': {}
                              })
 
         ref_weights = np.ones((3, 3, 6, 1)) * np.reshape(np.array([1, 2, 3, 4, 5, 6]), (6, 1))
@@ -614,10 +763,13 @@ class FuseMulTests(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
+                                 ('conv_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 112, 112, 6])},
+                                 'const_conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 2, 'input_channel_dim': 2,
                                               'dims_number': 4},
@@ -638,21 +790,29 @@ class FuseAddTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'add_1'),
+                             ('const_add_1_w', 'add_1_w'),
                              ('add_1_w', 'add_1'),
                              ('add_1', 'add_1_data'),
                              ('add_1_data', 'fc_1'),
+                             ('const_fc_1_w', 'fc_1_w'),
+                             ('const_fc_1_b', 'fc_1_b'),
                              ('fc_1_w', 'fc_1'),
                              ('fc_1_b', 'fc_1'),
                              ('fc_1', 'fc_1_data'),
+                             ('fc_1_data', 'op_output')
+
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 2048])},
                              'add_1_data': {'shape': np.array([1, 2048])},
+                             'const_add_1_w': {'shape': np.array([2048]), 'value': np.array([x for x in range(2048)])},
                              'add_1_w': {'shape': np.array([2048]), 'value': np.array([x for x in range(2048)])},
+                             'const_fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048))},
                              'fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048)),
                                         'output_channel_dim': 0, 'input_channel_dim': 1,
                                         'dims_number': 2},
+                             'const_fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
                              'fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
-                             'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                             'fc_1_data': {'shape': np.array([1, 10260])},
                              })
         ref_weights = np.ones((10260, 2048))
         ref_biases = np.ones(10260) + np.dot(np.ones((10260, 2048)), np.array([x for x in range(2048)]))
@@ -660,16 +820,21 @@ class FuseAddTests(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'fc_1'),
+                                 ('const_fc_1_w', 'fc_1_w'),
+                                 ('const_fc_1_b', 'fc_1_b'),
                                  ('fc_1_w', 'fc_1'),
                                  ('fc_1_b', 'fc_1'),
                                  ('fc_1', 'fc_1_data'),
+                                 ('fc_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 2048])},
+                                 'const_fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                             'output_channel_dim': 0, 'input_channel_dim': 1,
                                             'dims_number': 2},
+                                 'const_fc_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
                                  'fc_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
-                                 'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                                 'fc_1_data': {'shape': np.array([1, 10260])},
                                  })
 
         _fuse_add(graph, Node(graph, 'add_1'), [Node(graph, 'fc_1')], backward=False)
@@ -684,16 +849,21 @@ class FuseAddTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'fc_1'),
+                             ('const_fc_1_w', 'fc_1_w'),
                              ('fc_1_w', 'fc_1'),
                              ('fc_1', 'fc_1_data'),
                              ('fc_1_data', 'add_1'),
+                             ('const_add_1_w', 'add_1_w'),
                              ('add_1_w', 'add_1'),
                              ('add_1', 'add_1_data'),
+                             ('add_1_data', 'op_output_1')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 2048])},
-                             'add_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                             'add_1_data': {'shape': np.array([1, 10260])},
+                             'const_add_1_w': {'shape': np.array([10260]), 'value': np.array([x for x in range(10260)])},
                              'add_1_w': {'shape': np.array([10260]), 'value': np.array([x for x in range(10260)]),
                                          'data_type': None},
+                             'const_fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048))},
                              'fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048)),
                                         'output_channel_dim': 0, 'input_channel_dim': 1,
                                         'dims_number': 2, 'data_type': None},
@@ -706,16 +876,21 @@ class FuseAddTests(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'fc_1'),
+                                 ('const_fc_1_w', 'fc_1_w'),
+                                 ('const_fc_1_b', 'fc_1_b'),
                                  ('fc_1_w', 'fc_1'),
                                  ('fc_1_b', 'fc_1'),
                                  ('fc_1', 'fc_1_data'),
+                                 ('fc_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 2048])},
+                                 'const_fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                             'output_channel_dim': 0, 'input_channel_dim': 1,
                                             'dims_number': 2},
+                                 'const_fc_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
                                  'fc_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
-                                 'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                                 'fc_1_data': {'shape': np.array([1, 10260])},
                                  })
 
         _fuse_add(graph, Node(graph, 'add_1'), [Node(graph, 'fc_1')], backward=True)
@@ -730,15 +905,20 @@ class FuseAddTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'fc_1'),
+                             ('const_fc_1_w', 'fc_1_w'),
                              ('fc_1_w', 'fc_1'),
                              ('fc_1', 'fc_1_data'),
                              ('fc_1_data', 'add_1'),
+                             ('const_add_1_w', 'add_1_w'),
                              ('add_1_w', 'add_1'),
                              ('add_1', 'add_1_data'),
+                             ('add_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 2048])},
-                             'add_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                             'add_1_data': {'shape': np.array([1, 10260])},
+                             'const_add_1_w': {'shape': np.array([1]), 'value': 6, 'data_type': None},
                              'add_1_w': {'shape': np.array([1]), 'value': 6, 'data_type': None},
+                             'const_fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048))},
                              'fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048)),
                                         'output_channel_dim': 0, 'input_channel_dim': 1,
                                         'dims_number': 2, 'data_type': None},
@@ -751,16 +931,22 @@ class FuseAddTests(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'fc_1'),
+                                 ('const_fc_1_w', 'fc_1_w'),
+                                 ('const_fc_1_b', 'fc_1_b'),
                                  ('fc_1_w', 'fc_1'),
                                  ('fc_1_b', 'fc_1'),
                                  ('fc_1', 'fc_1_data'),
+                                 ('fc_1_data', 'op_output')
+
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 2048])},
+                                 'const_fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                             'output_channel_dim': 0, 'input_channel_dim': 1,
                                             'dims_number': 2},
+                                 'const_fc_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
                                  'fc_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
-                                 'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                                 'fc_1_data': {'shape': np.array([1, 10260])},
                                  })
 
         _fuse_add(graph, Node(graph, 'add_1'), [Node(graph, 'fc_1')], backward=True)
@@ -775,43 +961,58 @@ class FuseAddTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'add_1'),
+                             ('const_add_1_w', 'add_1_w'),
                              ('add_1_w', 'add_1'),
                              ('add_1', 'add_1_data'),
                              ('add_1_data', 'fc_1'),
+                             ('const_fc_1_w', 'fc_1_w'),
+                             ('const_fc_1_b', 'fc_1_b'),
                              ('fc_1_w', 'fc_1'),
                              ('fc_1_b', 'fc_1'),
                              ('fc_1', 'fc_1_data'),
+                             ('fc_1_data', 'op_output')
+
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 2048])},
                              'add_1_data': {'shape': np.array([1, 2048])},
+                             'const_add_1_w': {'shape': np.array([2048]), 'value': np.array([x for x in range(2048)])},
                              'add_1_w': {'shape': np.array([2048]), 'value': np.array([x for x in range(2048)])},
                              'fc_1': {'can_be_fused': False},
+                             'const_fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048))},
                              'fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048)),
                                         'output_channel_dim': 0, 'input_channel_dim': 1,
                                         'dims_number': 2},
+                             'const_fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
                              'fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
-                             'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                             'fc_1_data': {'shape': np.array([1, 10260])},
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'add_1'),
+                                 ('const_add_1_w', 'add_1_w'),
                                  ('add_1_w', 'add_1'),
                                  ('add_1', 'add_1_data'),
                                  ('add_1_data', 'fc_1'),
+                                 ('const_fc_1_w', 'fc_1_w'),
+                                 ('const_fc_1_b', 'fc_1_b'),
                                  ('fc_1_w', 'fc_1'),
                                  ('fc_1_b', 'fc_1'),
                                  ('fc_1', 'fc_1_data'),
+                                 ('fc_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 2048])},
                                  'add_1_data': {'shape': np.array([1, 2048])},
+                                 'const_add_1_w': {'shape': np.array([2048]), 'value': np.array([x for x in range(2048)])},
                                  'add_1_w': {'shape': np.array([2048]), 'value': np.array([x for x in range(2048)])},
                                  'fc_1': {'can_be_fused': False},
+                                 'const_fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048))},
                                  'fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048)),
                                             'output_channel_dim': 0, 'input_channel_dim': 1,
                                             'dims_number': 2},
+                                 'const_fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
                                  'fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
-                                 'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                                 'fc_1_data': {'shape': np.array([1, 10260])},
                                  })
 
         _fuse_add(graph, Node(graph, 'add_1'), [Node(graph, 'fc_1')], backward=False)
@@ -830,60 +1031,80 @@ class FuseLinOpsTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
                              ('mul_1_data', 'conv_2'),
+                             ('const_conv_2_w', 'conv_2_w'),
+                             ('const_conv_2_b', 'conv_2_b'),
                              ('conv_2_w', 'conv_2'),
                              ('conv_2_b', 'conv_2'),
                              ('conv_2', 'conv_2_data'),
                              ('conv_1_data', 'concat_1'),
                              ('conv_2_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_data': {'shape': np.array([1, 55, 55, 96])},
+                             'const_conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_2_data': {'shape': np.array([1, 55, 55, 96])},
-                             'concat_1_data': {'is_output': True}
+                             'concat_1_data': {}
                              })
         ref_weights = np.ones((11, 11, 3, 96)) * np.reshape(np.array([1, 2, 3]), (3, 1))
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
                                  ('placeholder_1_data', 'conv_2'),
+                                 ('const_conv_2_w', 'conv_2_w'),
+                                 ('const_conv_2_b', 'conv_2_b'),
                                  ('conv_2_w', 'conv_2'),
                                  ('conv_2_b', 'conv_2'),
                                  ('conv_2', 'conv_2_data'),
                                  ('conv_1_data', 'concat_1'),
                                  ('conv_2_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_data': {'shape': np.array([1, 55, 55, 96])},
+                                 'const_conv_2_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'conv_2_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_2_data': {'shape': np.array([1, 55, 55, 96])},
                                  })
@@ -900,37 +1121,49 @@ class FuseLinOpsTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'fc_1'),
+                             ('const_fc_1_w', 'fc_1_w'),
+                             ('const_fc_1_b', 'fc_1_b'),
                              ('fc_1_w', 'fc_1'),
                              ('fc_1_b', 'fc_1'),
                              ('fc_1', 'fc_1_data'),
+                             ('fc_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 2048])},
                              'mul_1_data': {'shape': np.array([1, 2048])},
+                             'const_mul_1_w': {'shape': np.array([2048]), 'value': np.array([x for x in range(2048)])},
                              'mul_1_w': {'shape': np.array([2048]), 'value': np.array([x for x in range(2048)])},
+                             'const_fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048))},
                              'fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048)),
                                         'output_channel_dim': 0, 'input_channel_dim': 1,
                                         'dims_number': 2},
+                             'const_fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
                              'fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
-                             'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                             'fc_1_data': {'shape': np.array([1, 10260])},
                              })
         ref_weights = np.ones((10260, 2048)) * np.array([x for x in range(2048)])
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'fc_1'),
+                                 ('const_fc_1_w', 'fc_1_w'),
+                                 ('const_fc_1_b', 'fc_1_b'),
                                  ('fc_1_w', 'fc_1'),
                                  ('fc_1_b', 'fc_1'),
                                  ('fc_1', 'fc_1_data'),
+                                 ('fc_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 2048])},
+                                 'const_fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                             'output_channel_dim': 0, 'input_channel_dim': 1,
                                             'dims_number': 2},
+                                 'const_fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
                                  'fc_1_b': {'shape': np.array([10260]), 'value': np.ones(10260)},
-                                 'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                                 'fc_1_data': {'shape': np.array([1, 10260])},
                                  })
 
         fuse_linear_ops(graph)
@@ -945,15 +1178,20 @@ class FuseLinOpsTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'fc_1'),
+                             ('const_fc_1_w', 'fc_1_w'),
                              ('fc_1_w', 'fc_1'),
                              ('fc_1', 'fc_1_data'),
                              ('fc_1_data', 'add_1'),
+                             ('const_add_1_w', 'add_1_w'),
                              ('add_1_w', 'add_1'),
                              ('add_1', 'add_1_data'),
+                             ('add_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 2048])},
-                             'add_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                             'add_1_data': {'shape': np.array([1, 10260])},
+                             'const_add_1_w': {'shape': np.array([1]), 'value': np.array([6]), 'data_type': None},
                              'add_1_w': {'shape': np.array([1]), 'value': np.array([6]), 'data_type': None},
+                             'const_fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048))},
                              'fc_1_w': {'shape': np.array([10260, 2048]), 'value': np.ones((10260, 2048)),
                                         'output_channel_dim': 0, 'input_channel_dim': 1,
                                         'dims_number': 2, 'data_type': None},
@@ -966,16 +1204,21 @@ class FuseLinOpsTests(unittest.TestCase):
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'fc_1'),
+                                 ('const_fc_1_w', 'fc_1_w'),
+                                 ('const_fc_1_b', 'fc_1_b'),
                                  ('fc_1_w', 'fc_1'),
                                  ('fc_1_b', 'fc_1'),
                                  ('fc_1', 'fc_1_data'),
+                                 ('fc_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 2048])},
+                                 'const_fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights},
                                  'fc_1_w': {'shape': ref_weights.shape, 'value': ref_weights,
                                             'output_channel_dim': 0, 'input_channel_dim': 1,
                                             'dims_number': 2},
+                                 'const_fc_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
                                  'fc_1_b': {'shape': ref_biases.shape, 'value': ref_biases},
-                                 'fc_1_data': {'shape': np.array([1, 10260]), 'is_output': True},
+                                 'fc_1_data': {'shape': np.array([1, 10260])},
                                  })
 
         fuse_linear_ops(graph)
@@ -991,51 +1234,68 @@ class FuseLinOpsTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1_data', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1_data', 'add_1'),
+                             ('const_add_1_w', 'add_1_w'),
                              ('add_1_w', 'add_1'),
                              ('add_1', 'add_1_data'),
                              ('concat_1', 'concat_1_data'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('add_1_data', 'concat_1'),
                              ('mul_1_data', 'concat_1'),
-                             ('add_1_data', 'mul_1')],
-
+                             ('add_1_data', 'mul_1'),
+                             ('concat_1_data', 'op_output')
+                             ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_conv_1_w': {'shape': np.array([1, 1, 3, 3]), 'value': np.zeros((1, 1, 3, 3))},
                              'conv_1_w': {'shape': np.array([1, 1, 3, 3]), 'value': np.zeros((1, 1, 3, 3)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([3]), 'value': np.zeros(3)},
                              'conv_1_b': {'shape': np.array([3]), 'value': np.zeros(3)},
                              'conv_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'add_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array([1]), 'value': np.array([6])},
                              'mul_1_w': {'shape': np.array([1]), 'value': np.array([6])},
+                             'const_add_1_w': {'shape': np.array([1]), 'value': np.array([1])},
                              'add_1_w': {'shape': np.array([1]), 'value': np.array([1])},
-                             'concat_1_data': {'is_output': True}
+                             'concat_1_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1_data', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1_data', 'concat_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('conv_1_data', 'mul_1'),
                                  ('concat_1', 'concat_1_data'),
                                  ('mul_1', 'mul_1_data'),
-                                 ('mul_1_data', 'concat_1')],
+                                 ('mul_1_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
+                                 ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_conv_1_w': {'shape': np.array([1, 1, 3, 3]), 'value': np.zeros((1, 1, 3, 3))},
                                  'conv_1_w': {'shape': np.array([1, 1, 3, 3]), 'value': np.zeros((1, 1, 3, 3)),
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_1_b': {'shape': np.array([3]), 'value': np.ones(3)},
                                  'conv_1_b': {'shape': np.array([3]), 'value': np.ones(3)},
                                  'conv_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array([1]), 'value': np.array([6])},
                                  'mul_1_w': {'shape': np.array([1]), 'value': np.array([6])},
-                                 'concat_1_data': {'is_output': True}
+                                 'concat_1_data': {}
                                  })
 
         fuse_linear_ops(graph)
@@ -1051,69 +1311,92 @@ class FuseLinOpsTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
                              ('mul_1_data', 'conv_2'),
+                             ('const_conv_2_w', 'conv_2_w'),
+                             ('const_conv_2_b', 'conv_2_b'),
                              ('conv_2_w', 'conv_2'),
                              ('conv_2_b', 'conv_2'),
                              ('conv_2', 'conv_2_data'),
                              ('conv_1_data', 'concat_1'),
                              ('conv_2_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
+
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_data': {'shape': np.array([1, 55, 55, 96])},
                              'conv_2': {'can_be_fused': False},
+                             'const_conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_2_data': {'shape': np.array([1, 55, 55, 96])},
-                             'concat_1_data': {'is_output': True}
+                             'concat_1_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'mul_1_data'),
                                  ('mul_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
                                  ('mul_1_data', 'conv_2'),
+                                 ('const_conv_2_w', 'conv_2_w'),
+                                 ('const_conv_2_b', 'conv_2_b'),
                                  ('conv_2_w', 'conv_2'),
                                  ('conv_2_b', 'conv_2'),
                                  ('conv_2', 'conv_2_data'),
                                  ('conv_1_data', 'concat_1'),
                                  ('conv_2_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                                 'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                                  'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_data': {'shape': np.array([1, 55, 55, 96])},
                                  'conv_2': {'can_be_fused': False},
+                                 'const_conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                                  'conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_2_data': {'shape': np.array([1, 55, 55, 96])},
-                                 'concat_1_data': {'is_output': True}
+                                 'concat_1_data': {}
                                  })
 
         fuse_linear_ops(graph)
@@ -1129,69 +1412,91 @@ class FuseLinOpsTests(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'mul_1'),
+                             ('const_mul_1_w', 'mul_1_w'),
                              ('mul_1_w', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'conv_1'),
+                             ('const_conv_1_w', 'conv_1_w'),
+                             ('const_conv_1_b', 'conv_1_b'),
                              ('conv_1_w', 'conv_1'),
                              ('conv_1_b', 'conv_1'),
                              ('conv_1', 'conv_1_data'),
                              ('mul_1_data', 'conv_2'),
+                             ('const_conv_2_w', 'conv_2_w'),
+                             ('const_conv_2_b', 'conv_2_b'),
                              ('conv_2_w', 'conv_2'),
                              ('conv_2_b', 'conv_2'),
                              ('conv_2', 'conv_2_data'),
                              ('conv_1_data', 'concat_1'),
                              ('conv_2_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1': {'can_be_fused': False},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                             'const_mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                             'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_1_data': {'shape': np.array([1, 55, 55, 96])},
+                             'const_conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                              'conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                           'output_channel_dim': 3, 'input_channel_dim': 2,
                                           'dims_number': 4},
+                             'const_conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                              'conv_2_data': {'shape': np.array([1, 55, 55, 96])},
-                             'concat_1_data': {'is_output': True}
+                             'concat_1_data': {}
                              })
 
         graph_ref = build_graph(nodes_attributes,
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'mul_1'),
+                                 ('const_mul_1_w', 'mul_1_w'),
                                  ('mul_1_w', 'mul_1'),
                                  ('mul_1', 'mul_1_data'),
                                  ('mul_1_data', 'conv_1'),
+                                 ('const_conv_1_w', 'conv_1_w'),
+                                 ('const_conv_1_b', 'conv_1_b'),
                                  ('conv_1_w', 'conv_1'),
                                  ('conv_1_b', 'conv_1'),
                                  ('conv_1', 'conv_1_data'),
                                  ('mul_1_data', 'conv_2'),
+                                 ('const_conv_2_w', 'conv_2_w'),
+                                 ('const_conv_2_b', 'conv_2_b'),
                                  ('conv_2_w', 'conv_2'),
                                  ('conv_2_b', 'conv_2'),
                                  ('conv_2', 'conv_2_data'),
                                  ('conv_1_data', 'concat_1'),
                                  ('conv_2_data', 'concat_1'),
-                                 ('concat_1', 'concat_1_data')
+                                 ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1': {'can_be_fused': False},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
+                                 'const_mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
+                                 'const_conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                                  'conv_1_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_1_data': {'shape': np.array([1, 55, 55, 96])},
+                                 'const_conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96))},
                                  'conv_2_w': {'shape': np.array([11, 11, 3, 96]), 'value': np.ones((11, 11, 3, 96)),
                                               'output_channel_dim': 3, 'input_channel_dim': 2,
                                               'dims_number': 4},
+                                 'const_conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_2_b': {'shape': np.array([96]), 'value': np.zeros(96)},
                                  'conv_2_data': {'shape': np.array([1, 55, 55, 96])},
-                                 'concat_1_data': {'is_output': True}
+                                 'concat_1_data': {}
                                  })
 
         fuse_linear_ops(graph)
diff --git a/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq.py b/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq.py
index e608daf22..1c96f6b44 100644
--- a/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -21,17 +21,16 @@ import networkx as nx
 import numpy as np
 
 from mo.front.extractor import add_attrs_props
-from mo.graph.graph import Node, unique_id
 from mo.middle.passes.eliminate import graph_clean_up
 from mo.utils.graph import pseudo_topological_sort
 from mo.ops.lin_op import Mul, Add
 from mo.middle.passes.eliminate import merge_data_nodes
 from mo.ops.op import Op
-from mo.graph.graph import dump_graph_for_graphviz
+from mo.graph.graph import Node, Graph
 from mo.middle.passes.fusing.helpers import backward_bfs, forward_bfs, get_tensor_id, get_value_id
 
 
-def _fuse_linear_sequence(graph: nx.MultiDiGraph, start_node: Node):
+def _fuse_linear_sequence(graph: Graph, start_node: Node):
     """
     This function finds the sequence of Mul/Add operations and replaces this sequence with two ops (Mul->Add).
     :param graph:
@@ -125,7 +124,7 @@ def _fuse_linear_sequence(graph: nx.MultiDiGraph, start_node: Node):
     return True
 
 
-def fuse_mul_add_sequence(graph: nx.MultiDiGraph):
+def fuse_mul_add_sequence(graph: Graph):
     """
     This function finds first valid Mul/Add node and pass it to fuse_linear_sequence where full sequence will be found
     """
diff --git a/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq_test.py b/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq_test.py
index d320b575a..c58ade451 100644
--- a/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq_test.py
+++ b/model-optimizer/mo/middle/passes/fusing/fuse_linear_seq_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -79,6 +79,7 @@ nodes_attributes = {
     'placeholder_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     'placeholder_3': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
     'placeholder_3_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
 }
 
 
@@ -102,6 +103,7 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
                              ('placeholder_1_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -110,7 +112,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -125,6 +126,7 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('add_1_data', 'concat_1'),
                                  ('concat_1', 'concat_1_data'),
                                  ('placeholder_1_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -133,7 +135,6 @@ class LinSeqFusingTests(unittest.TestCase):
                                  'add_1_w': {'shape': np.array([1]), 'value': np.array([36])},
                                  'mul_1': {'can_be_fused': True},
                                  'add_1': {'can_be_fused': True},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
@@ -167,7 +168,8 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('placeholder_1_data', 'concat_1'),
                              ('mul_2_data', 'placeholder_2'),
                              ('placeholder_2', 'placeholder_2_data'),
-                             ('placeholder_2_data', 'concat_1')
+                             ('placeholder_2_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
@@ -177,7 +179,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -194,7 +195,8 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('placeholder_1_data', 'concat_1'),
                                  ('add_1_data', 'placeholder_2'),
                                  ('placeholder_2', 'placeholder_2_data'),
-                                 ('placeholder_2_data', 'concat_1')
+                                 ('placeholder_2_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
@@ -204,7 +206,6 @@ class LinSeqFusingTests(unittest.TestCase):
                                  'add_1_w': {'shape': np.array([1]), 'value': np.array([36])},
                                  'mul_1': {'can_be_fused': True},
                                  'add_1': {'can_be_fused': True},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
         graph.graph['layout'] = 'NHWC'
@@ -234,7 +235,8 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('concat_1', 'concat_1_data'),
                              ('add_1_data', 'placeholder_2'),
                              ('placeholder_2', 'placeholder_2_data'),
-                             ('placeholder_2_data', 'concat_1')
+                             ('placeholder_2_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
@@ -244,7 +246,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -263,7 +264,8 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('concat_1', 'concat_1_data'),
                                  ('add_1_data', 'placeholder_2'),
                                  ('placeholder_2', 'placeholder_2_data'),
-                                 ('placeholder_2_data', 'concat_1')
+                                 ('placeholder_2_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
@@ -273,7 +275,6 @@ class LinSeqFusingTests(unittest.TestCase):
                                  'mul_1_w': {'shape': np.array([1]), 'value': 6},
                                  'add_1_w': {'shape': np.array([1]), 'value': 6},
                                  'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
@@ -303,7 +304,8 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('concat_1', 'concat_1_data'),
                              ('mul_1_data', 'placeholder_2'),
                              ('placeholder_2', 'placeholder_2_data'),
-                             ('placeholder_2_data', 'concat_1')
+                             ('placeholder_2_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
@@ -313,7 +315,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -332,7 +333,8 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('concat_1', 'concat_1_data'),
                                  ('mul_1_data', 'placeholder_2'),
                                  ('placeholder_2', 'placeholder_2_data'),
-                                 ('placeholder_2_data', 'concat_1')
+                                 ('placeholder_2_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
@@ -342,7 +344,6 @@ class LinSeqFusingTests(unittest.TestCase):
                                  'mul_1_w': {'shape': np.array([1]), 'value': 6},
                                  'add_1_w': {'shape': np.array([1]), 'value': np.array([36])},
                                  'mul_2_w': {'shape': np.array([1]), 'value': np.array([6])},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
@@ -373,7 +374,8 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('concat_1', 'concat_1_data'),
                              ('mul_1_data', 'placeholder_2'),
                              ('placeholder_2', 'placeholder_2_data'),
-                             ('placeholder_2_data', 'concat_1')
+                             ('placeholder_2_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
@@ -383,7 +385,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 0},
                              'mul_2_w': {'shape': np.array([1]), 'value': 1},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -396,13 +397,13 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('concat_1', 'concat_1_data'),
                                  ('mul_1_data', 'placeholder_2'),
                                  ('placeholder_2', 'placeholder_2_data'),
-                                 ('placeholder_2_data', 'concat_1')
+                                 ('placeholder_2_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_w': {'shape': np.array([1]), 'value': 6},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
@@ -434,7 +435,8 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('concat_1', 'concat_1_data'),
                              ('mul_1_data', 'placeholder_2'),
                              ('placeholder_2', 'placeholder_2_data'),
-                             ('placeholder_2_data', 'concat_1')
+                             ('placeholder_2_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
@@ -444,7 +446,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 1},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -460,14 +461,14 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('concat_1', 'concat_1_data'),
                                  ('mul_1_data', 'placeholder_2'),
                                  ('placeholder_2', 'placeholder_2_data'),
-                                 ('placeholder_2_data', 'concat_1')
+                                 ('placeholder_2_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_w': {'shape': np.array([1]), 'value': 6},
                                  'add_1_w': {'shape': np.array([1]), 'value': np.array([6])},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
@@ -498,7 +499,8 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('concat_1', 'concat_1_data'),
                              ('mul_1_data', 'placeholder_2'),
                              ('placeholder_2', 'placeholder_2_data'),
-                             ('placeholder_2_data', 'concat_1')
+                             ('placeholder_2_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
@@ -508,7 +510,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 0},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -524,14 +525,14 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('concat_1', 'concat_1_data'),
                                  ('mul_1_data', 'placeholder_2'),
                                  ('placeholder_2', 'placeholder_2_data'),
-                                 ('placeholder_2_data', 'concat_1')
+                                 ('placeholder_2_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'placeholder_2_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_w': {'shape': np.array([1]), 'value': 6},
                                  'mul_2_w': {'shape': np.array([1]), 'value': np.array([6])},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
@@ -558,6 +559,7 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('mul_2', 'mul_2_data'),
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -566,7 +568,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 1},
                              'add_1_w': {'shape': np.array([1]), 'value': 0},
                              'mul_2_w': {'shape': np.array([1]), 'value': 1},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -574,10 +575,9 @@ class LinSeqFusingTests(unittest.TestCase):
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'concat_1'),
                                  ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
-                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
-                                 'concat_1_data': {'is_output': True}
-                                 },
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])}},
                                 nodes_with_edges_only=True)
 
         graph.graph['layout'] = 'NHWC'
@@ -603,6 +603,7 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('mul_2', 'mul_2_data'),
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -611,7 +612,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -625,13 +625,13 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('add_1', 'add_1_data'),
                                  ('add_1_data', 'concat_1'),
                                  ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'add_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_w': {'shape': np.array([1]), 'value': np.array([36])},
                                  'add_1_w': {'shape': np.array([1]), 'value': np.array([36])},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
@@ -658,6 +658,7 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('mul_2', 'mul_2_data'),
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -666,7 +667,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([3]), 'value': np.array([6, 6, 6])},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -680,13 +680,13 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('add_1', 'add_1_data'),
                                  ('add_1_data', 'concat_1'),
                                  ('concat_1', 'concat_1_data'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'add_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_w': {'shape': np.array([3]), 'value': np.array([36, 36, 36])},
                                  'add_1_w': {'shape': np.array([3]), 'value': np.array([36, 36, 36])},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
@@ -716,6 +716,7 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
                              ('placeholder_1_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -726,7 +727,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
                              'mul_1': {'can_be_fused': False},
                              'add_1': {'can_be_fused': False},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -744,6 +744,7 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('mul_2_data', 'concat_1'),
                                  ('concat_1', 'concat_1_data'),
                                  ('placeholder_1_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -754,7 +755,6 @@ class LinSeqFusingTests(unittest.TestCase):
                                  'mul_2_w': {'shape': np.array([1]), 'value': 6},
                                  'mul_1': {'can_be_fused': False},
                                  'add_1': {'can_be_fused': False},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
@@ -784,6 +784,7 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
                              ('placeholder_1_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -793,7 +794,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
                              'add_1': {'can_be_fused': False},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -811,6 +811,7 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('mul_2_data', 'concat_1'),
                                  ('concat_1', 'concat_1_data'),
                                  ('placeholder_1_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -820,7 +821,6 @@ class LinSeqFusingTests(unittest.TestCase):
                                  'add_1_w': {'shape': np.array([1]), 'value': 6},
                                  'mul_2_w': {'shape': np.array([1]), 'value': 6},
                                  'add_1': {'can_be_fused': False},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
@@ -856,6 +856,7 @@ class LinSeqFusingTests(unittest.TestCase):
                              ('mul_2_w', 'mul_4'),
                              ('mul_4', 'mul_4_data'),
                              ('mul_4_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -866,7 +867,6 @@ class LinSeqFusingTests(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              },
                             nodes_with_edges_only=True)
 
@@ -884,6 +884,7 @@ class LinSeqFusingTests(unittest.TestCase):
                                  ('mul_3', 'mul_3_data'),
                                  ('mul_3_w', 'mul_3'),
                                  ('mul_3_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -894,7 +895,6 @@ class LinSeqFusingTests(unittest.TestCase):
                                  'add_1_w': {'shape': np.array([1]), 'value': np.array([36])},
                                  'mul_1': {'can_be_fused': True},
                                  'add_1': {'can_be_fused': True},
-                                 'concat_1_data': {'is_output': True}
                                  },
                                 nodes_with_edges_only=True)
 
diff --git a/model-optimizer/mo/middle/passes/fusing/helpers.py b/model-optimizer/mo/middle/passes/fusing/helpers.py
index c743c700e..f07331b0c 100644
--- a/model-optimizer/mo/middle/passes/fusing/helpers.py
+++ b/model-optimizer/mo/middle/passes/fusing/helpers.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/middle/passes/fusing/helpers_test.py b/model-optimizer/mo/middle/passes/fusing/helpers_test.py
index feb2020e7..365ba1044 100644
--- a/model-optimizer/mo/middle/passes/fusing/helpers_test.py
+++ b/model-optimizer/mo/middle/passes/fusing/helpers_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -64,6 +64,7 @@ nodes_attributes = {
     'placeholder_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     'placeholder_3': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
     'placeholder_3_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
 }
 
 
@@ -79,9 +80,9 @@ class BFSTests(unittest.TestCase):
                              ('scaleshift_1_data', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'add_1'),
-                             ('add_1', 'add_1_data')
-                             ],
-                            {'add_1_data': {'is_output': True}})
+                             ('add_1', 'add_1_data'),
+                             ('add_1_data', 'op_output')
+                             ])
 
         res = forward_bfs(Node(graph, 'placeholder_1'), ['ScaleShift', 'Mul'], ['Add'])
         self.assertTrue(len(res) == 1 and res[0].id == 'add_1', 'Add operation was not found by bfs')
@@ -105,9 +106,9 @@ class BFSTests(unittest.TestCase):
                              ('scaleshift_1_data', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'add_1'),
-                             ('add_1', 'add_1_data')
-                             ],
-                            {'add_1_data': {'is_output': True}})
+                             ('add_1', 'add_1_data'),
+                             ('add_1_data', 'op_output')
+                             ])
 
         res = backward_bfs(Node(graph, 'add_1_data'), ['Add', 'ScaleShift', 'Mul'], ['Placeholder'])
         self.assertTrue(len(res) == 1 and res[0].id == 'placeholder_1', 'Placeholder operation was not found by bfs')
@@ -139,9 +140,9 @@ class BFSTests(unittest.TestCase):
                              ('mul_2', 'mul_2_data'),
                              ('add_1_data', 'concat_1'),
                              ('mul_2_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
-                             ],
-                            {'concat_1_data': {'is_output': True}})
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
+                             ])
 
         res = forward_bfs(Node(graph, 'placeholder_1'), ['ScaleShift', 'Mul', 'Add'], ['Concat'])
         self.assertTrue(len(res) == 1 and res[0].id == 'concat_1', 'Probably Concat operation was not found by bfs')
@@ -178,9 +179,9 @@ class BFSTests(unittest.TestCase):
                              ('mul_2', 'mul_2_data'),
                              ('add_1_data', 'concat_1'),
                              ('mul_2_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
-                             ],
-                            {'concat_1_data': {'is_output': True}})
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
+                             ])
 
         res = backward_bfs(Node(graph, 'concat_1'), ['ScaleShift', 'Mul', 'Add'], ['Placeholder'])
         self.assertTrue(len(res) == 0, 'Smth went wrong with bfs')
@@ -216,9 +217,9 @@ class BFSTests(unittest.TestCase):
                              ('mul_2', 'mul_2_data'),
                              ('add_1_data', 'concat_1'),
                              ('mul_2_data', 'concat_1'),
-                             ('concat_1', 'concat_1_data')
-                             ],
-                            {'concat_1_data': {'is_output': True}})
+                             ('concat_1', 'concat_1_data'),
+                             ('concat_1_data', 'op_output')
+                             ])
 
         res = backward_bfs(Node(graph, 'concat_1'), ['Mul', 'Add'], ['Placeholder'])
         self.assertTrue(len(res) == 0, 'Smth went wrong with bfs')
@@ -248,9 +249,9 @@ class BFSTests(unittest.TestCase):
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'add_1'),
                              ('add_1', 'add_1_data'),
-                             ('add_1_data', 'placeholder_1')
-                             ],
-                            {'add_1_data': {'is_output': True}})
+                             ('add_1_data', 'placeholder_1'),
+                             ('add_1_data', 'op_output')
+                             ])
 
         res = backward_bfs(Node(graph, 'add_1_data'), ['Add', 'ScaleShift', 'Mul', 'Placeholder'], ['Conv2D'])
         self.assertTrue(len(res) == 0, 'Sholdn\'t find any nodes due to cycle in graph')
@@ -268,9 +269,9 @@ class GetNextOperationTests(unittest.TestCase):
                              ('scaleshift_1_data', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'add_1'),
-                             ('add_1', 'add_1_data')
-                             ],
-                            {'add_1_data': {'is_output': True}})
+                             ('add_1', 'add_1_data'),
+                             ('add_1_data', 'op_output')
+                             ])
 
         res = get_next_operation(Node(graph, 'mul_1'))
         self.assertTrue(len(res) == 1 and res[0].id == 'add_1', 'get_nex_operation returned wrong op')
@@ -283,9 +284,9 @@ class GetNextOperationTests(unittest.TestCase):
                              ('placeholder_1_data', 'add_1'),
                              ('mul_1', 'mul_1_data'),
                              ('mul_1_data', 'add_1'),
-                             ('add_1', 'add_1_data')
-                             ],
-                            {'add_1_data': {'is_output': True}})
+                             ('add_1', 'add_1_data'),
+                             ('add_1_data', 'op_output')
+                             ])
 
         res = get_next_operation(Node(graph, 'placeholder_1'))
         self.assertTrue(len(res) == 2 and all([x.id in ['add_1', 'mul_1'] for x in res]),
@@ -300,8 +301,8 @@ class GetNextOperationTests(unittest.TestCase):
                              ('placeholder_1_data', 'mul_1'),
                              ('placeholder_2_data', 'mul_1'),
                              ('mul_1', 'mul_1_data'),
-                             ],
-                            {'mul_1_data': {'is_output': True}})
+                             ('mul_1_data', 'op_output')
+                             ])
 
         res = get_next_operation(Node(graph, 'placeholder_1'))
         self.assertTrue(len(res) == 1 and res[0].id == 'mul_1', 'get_nex_operation returned wrong op')
diff --git a/model-optimizer/mo/middle/passes/fusing/mark_unfused_nodes.py b/model-optimizer/mo/middle/passes/fusing/mark_unfused_nodes.py
index d61c3131a..a67897beb 100644
--- a/model-optimizer/mo/middle/passes/fusing/mark_unfused_nodes.py
+++ b/model-optimizer/mo/middle/passes/fusing/mark_unfused_nodes.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,10 +15,9 @@
 """
 
 import logging as log
-import networkx as nx
 import re
 
-from mo.graph.graph import get_graph_ops, Node
+from mo.graph.graph import Node, Graph
 from mo.middle.passes.fusing.helpers import get_value_id
 
 
@@ -36,9 +35,9 @@ def _check_lin_op(node: Node, layout: str):
         log.info('[ FUSING ] Node {} marked as fusable'.format(node.id))
 
 
-def mark_unfused_nodes(graph: nx.MultiDiGraph, regex_masks: str):
+def mark_unfused_nodes(graph: Graph, regex_masks: str):
     regex_masks = [] if not regex_masks else regex_masks.split(',')
-    nodes = get_graph_ops(graph)
+    nodes = graph.get_op_nodes()
     for node in nodes:
         if node.has_valid('can_be_fused'):
             continue
diff --git a/model-optimizer/mo/middle/passes/fusing/mark_unfused_nodes_test.py b/model-optimizer/mo/middle/passes/fusing/mark_unfused_nodes_test.py
index f68c7ed3f..6224b9b23 100644
--- a/model-optimizer/mo/middle/passes/fusing/mark_unfused_nodes_test.py
+++ b/model-optimizer/mo/middle/passes/fusing/mark_unfused_nodes_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -65,6 +65,7 @@ nodes_attributes = {
     'placeholder_2_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
     'placeholder_3': {'shape': None, 'type': 'Placeholder', 'kind': 'op', 'op': 'Placeholder'},
     'placeholder_3_data': {'value': None, 'shape': None, 'kind': 'data', 'data_type': None},
+    'op_output': { 'kind': 'op', 'op': 'OpOutput'}
 }
 
 
@@ -86,6 +87,7 @@ class MarkFusedNodes(unittest.TestCase):
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
                              ('placeholder_1_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -94,7 +96,6 @@ class MarkFusedNodes(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              })
 
         graph.graph['layout'] = 'NHWC'
@@ -121,6 +122,7 @@ class MarkFusedNodes(unittest.TestCase):
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
                              ('placeholder_1_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -129,7 +131,6 @@ class MarkFusedNodes(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -157,6 +158,7 @@ class MarkFusedNodes(unittest.TestCase):
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
                              ('placeholder_1_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -165,7 +167,6 @@ class MarkFusedNodes(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([1]), 'value': 6},
                              'add_1_w': {'shape': np.array([1]), 'value': 6},
                              'mul_2_w': {'shape': np.array([1]), 'value': 6},
-                             'concat_1_data': {'is_output': True}
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -191,6 +192,8 @@ class MarkFusedNodes(unittest.TestCase):
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
                              ('placeholder_1_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
+
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -199,7 +202,6 @@ class MarkFusedNodes(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'add_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'mul_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
-                             'concat_1_data': {'is_output': True}
                              })
         graph.graph['layout'] = 'NHWC'
 
@@ -225,6 +227,7 @@ class MarkFusedNodes(unittest.TestCase):
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
                              ('placeholder_1_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -233,7 +236,6 @@ class MarkFusedNodes(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'add_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'mul_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
-                             'concat_1_data': {'is_output': True}
                              })
         graph.graph['layout'] = 'NCHW'
 
@@ -259,6 +261,7 @@ class MarkFusedNodes(unittest.TestCase):
                                  ('mul_2_data', 'concat_1'),
                                  ('concat_1', 'concat_1_data'),
                                  ('placeholder_1_data', 'concat_1'),
+                                 ('concat_1_data', 'op_output')
                                  ],
                                 {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -267,7 +270,6 @@ class MarkFusedNodes(unittest.TestCase):
                                  'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'add_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                                  'mul_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
-                                 'concat_1_data': {'is_output': True}
                                  })
             graph.graph['layout'] = 'NCHW'
 
@@ -293,6 +295,7 @@ class MarkFusedNodes(unittest.TestCase):
                              ('mul_2_data', 'concat_1'),
                              ('concat_1', 'concat_1_data'),
                              ('placeholder_1_data', 'concat_1'),
+                             ('concat_1_data', 'op_output')
                              ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'mul_1_data': {'shape': np.array([1, 227, 227, 3])},
@@ -301,7 +304,6 @@ class MarkFusedNodes(unittest.TestCase):
                              'mul_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'add_1_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
                              'mul_2_w': {'shape': np.array([3]), 'value': np.array([1, 2, 3])},
-                             'concat_1_data': {'is_output': True}
                              })
         graph.graph['layout'] = 'NHWC'
 
diff --git a/model-optimizer/mo/middle/passes/fusing/resnet_optimization.py b/model-optimizer/mo/middle/passes/fusing/resnet_optimization.py
index 8e6481adf..6f78a39e3 100644
--- a/model-optimizer/mo/middle/passes/fusing/resnet_optimization.py
+++ b/model-optimizer/mo/middle/passes/fusing/resnet_optimization.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,10 +16,9 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.middle.passes.fusing.helpers import get_next_operation
 from mo.ops.pooling import Pooling
 from mo.utils.graph import pseudo_topological_sort
@@ -32,7 +31,7 @@ def _clean_fw_tensor_attrs(node: Node):
             node[attr] = None
 
 
-def _insert_pooling(graph: nx.MultiDiGraph, first_node: Node, second_node: Node, spatial_dims):
+def _insert_pooling(graph: Graph, first_node: Node, second_node: Node, spatial_dims):
     """
     This function inserts point wise pooling layer between two nodes
     """
@@ -70,7 +69,7 @@ def _check_next_ops(next_ops: list):
     return stride_props, status
 
 
-def _simple_stride_prop(graph: nx.MultiDiGraph, node: Node, spatial_dims, supported=True):
+def _simple_stride_prop(graph: Graph, node: Node, spatial_dims, supported=True):
     """
     This function handles stride propagation for op nodes. If node is in supported ops dict so this is supported operation and we
     can propagate stride directly via this op (stride_prop will be set by using bottom stride_prop), otherwise we can't and
@@ -99,7 +98,7 @@ def _simple_stride_prop(graph: nx.MultiDiGraph, node: Node, spatial_dims, suppor
     _clean_fw_tensor_attrs(node.out_node())
 
 
-def _conv_stride_prop(graph: nx.MultiDiGraph, node: Node, spatial_dims, supported=True):
+def _conv_stride_prop(graph: Graph, node: Node, spatial_dims, supported=True):
     """
     This function handles convolution stride propagation. There is two cases: conv->(op) and conv->conv. In first case
     we propagate stride from op, and in second case we also change stride for second conv
@@ -138,11 +137,12 @@ supported_ops = {
 }
 
 
-def _stride_propagation(graph: nx.MultiDiGraph, spatial_dims):
+def _stride_propagation(graph: Graph, spatial_dims):
     """
     This function do stride propagation for all op nodes
     """
-    nodes = [Node(graph, x) for x in pseudo_topological_sort(graph, reverse=True) if Node(graph, x).kind == 'op']
+    nodes = [Node(graph, x) for x in pseudo_topological_sort(graph, reverse=True) if
+             Node(graph, x).kind == 'op' and Node(graph, x).soft_get('type') != 'Const']
 
     for node in nodes:
         if node.soft_get('type') in supported_ops:
@@ -155,7 +155,7 @@ def _stride_propagation(graph: nx.MultiDiGraph, spatial_dims):
             _simple_stride_prop(graph, node, spatial_dims, False)
 
 
-def stride_optimization(graph: nx.MultiDiGraph):
+def stride_optimization(graph: Graph):
     """
     This is main function for stride optimization pass
     """
diff --git a/model-optimizer/mo/middle/passes/fusing/resnet_optimization_test.py b/model-optimizer/mo/middle/passes/fusing/resnet_optimization_test.py
index 0065775ac..ca68f52db 100644
--- a/model-optimizer/mo/middle/passes/fusing/resnet_optimization_test.py
+++ b/model-optimizer/mo/middle/passes/fusing/resnet_optimization_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/middle/passes/infer.py b/model-optimizer/mo/middle/passes/infer.py
index e6f46e8d1..9d75b9aed 100644
--- a/model-optimizer/mo/middle/passes/infer.py
+++ b/model-optimizer/mo/middle/passes/infer.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -21,22 +21,16 @@ import numpy as np
 
 # TODO remove it
 from mo.front.extractor import update_ie_fields
-from mo.graph.graph import Node, get_outputs, get_node_id_by_name, dump_graph_for_graphviz
-from mo.middle.passes.eliminate import get_nodes_with_attributes
-from mo.middle.pattern_match import apply_pattern, for_each_sub_graph
-from mo.ops.lin_op import Mul, Add
-from mo.ops.op import Op
-from mo.utils.error import Error
-from mo.utils.utils import refer_to_faq_msg
+from mo.graph.graph import Node, Graph
 from mo.graph.graph import dict_includes
+from mo.middle.pattern_match import for_each_sub_graph
+from mo.utils.error import Error
+from mo.utils.utils import refer_to_faq_msg, shrink_str_value
 
 
 def log_debug_dict(nodes_per_port: dict, direction_name: str):
     for port, node in nodes_per_port.items():
-        value = str(node.soft_get('value'))
-        max_symbols = 100
-        if len(value) > max_symbols:
-            value = value.strip('\n')[:max_symbols - 3] + '...'
+        value = shrink_str_value(node.soft_get('value'))
         log.debug('{}[{}]: shape = {}, value = {}'.format(direction_name, port, node.soft_get('shape'), value))
 
 
@@ -46,7 +40,7 @@ def is_fully_defined_shape(shape: np.ndarray):
     return True
 
 
-def control_flow_infer(graph: nx.MultiDiGraph, node_name: str):
+def control_flow_infer(graph: Graph, node_name: str):
     """
        Executes constant control flow. Propagates nodes executability
     """
@@ -77,24 +71,7 @@ def control_flow_infer(graph: nx.MultiDiGraph, node_name: str):
             mark_executability(out_data, is_executable)
 
 
-def delete_not_executable(graph: nx.MultiDiGraph):
-    nodes_to_remove = set()
-    for node_name, node_attrs in graph.nodes(data=True):
-        if node_attrs['kind'] == 'data' and 'executable' in node_attrs and not node_attrs['executable']:
-            [nodes_to_remove.add(op) for op, _ in graph.in_edges(node_name)]
-            nodes_to_remove.add(node_name)
-    log.debug('Removing the following not executable nodes: {}'.format('\n'.join(sorted(map(str, nodes_to_remove)))))
-    graph.remove_nodes_from(nodes_to_remove)
-
-
-def delete_control_flow_edges(graph: nx.MultiDiGraph):
-    for u, v, k, attrs in list(graph.edges(keys=True, data=True)):
-        if 'control_flow_edge' in attrs and attrs['control_flow_edge']:
-            graph.remove_edge(u, v, k)
-            log.debug('Removing control flow edge from {} to {}'.format(u, v))
-
-
-def exit_bound_edges(graph: nx.MultiDiGraph, sources: list, end_node_attrs: dict):
+def exit_bound_edges(graph: Graph, sources: list, end_node_attrs: dict):
     """
     Finds all descendant nodes for each node from 'sources' that have given attributes from end_node_attrs.
     For each found node, create a tuple with a given element from 'source' and the node.
@@ -107,14 +84,14 @@ def exit_bound_edges(graph: nx.MultiDiGraph, sources: list, end_node_attrs: dict
     return result
 
 
-def partial_infer(graph: nx.MultiDiGraph, start_node: str = None):
+def partial_infer(graph: Graph, start_node: str = None):
     """
     Tries to execute constant parts of the graph and deduce as much as possible
     information following the data flow, e.g. calculate and propagate shapes and
     constant values. Partially or completely defined values are stored in data
     nodes (kind='data').
     """
-    cycle_nodes = get_nodes_with_attributes(graph, is_cyclic=True)
+    cycle_nodes = graph.get_nodes_with_attributes(is_cyclic=True)
     cycle_nodes = [Node(graph, node).out_node().id for node in cycle_nodes]
     ebunch_cyclic = list(graph.out_edges(nbunch=cycle_nodes, data=True, keys=True))
     ebunch_reconnected = exit_bound_edges(graph, sources=cycle_nodes, end_node_attrs={'op': 'Exit'})
@@ -138,7 +115,7 @@ def partial_infer(graph: nx.MultiDiGraph, start_node: str = None):
     debug_logger = log.getLogger().isEnabledFor(log.DEBUG)
 
     nx.set_node_attributes(G=graph, name='executable',
-                           values={n: True for n in get_nodes_with_attributes(graph, kind='data')})
+                           values={n: True for n in graph.get_nodes_with_attributes(kind='data')})
 
     for n in nodes:
         # Data Flow Infer
@@ -165,6 +142,8 @@ def partial_infer(graph: nx.MultiDiGraph, start_node: str = None):
                         log.debug('Outputs:')
                         log_debug_dict(node.out_nodes(), 'output')
 
+                    not_all_output_shapes = False
+
                     for out_port, out_node in out_nodes.items():
                         not_all_output_shapes = False
                         if not out_node.has_valid('shape'):
@@ -217,30 +196,16 @@ def partial_infer(graph: nx.MultiDiGraph, start_node: str = None):
                         refer_to_faq_msg(38)) from err
         control_flow_infer(graph, n)
 
-    not_fully_inferred = get_nodes_with_attributes(graph, is_not_fully_inferred=True)
+    not_fully_inferred = graph.get_nodes_with_attributes(is_not_fully_inferred=True)
     for n in not_fully_inferred:
         node = Node(graph, n)
         if node.has('infer') and not node.infer is None:
             node.infer(node)
 
-    #delete_not_executable(graph)
     return graph
 
 
-def check_for_cycle(graph: nx.MultiDiGraph):
-    is_acyclic = nx.is_directed_acyclic_graph(graph)
-    if not is_acyclic:
-        raise Error('Graph contains a cycle. Can not proceed. ' + refer_to_faq_msg(97))
-
-
-def mark_outputs(graph: nx.MultiDiGraph):
-    nx.set_node_attributes(G=graph, name='is_output', values=False)
-    for node in graph.nodes():
-        if graph.node[node]['kind'] == 'data' and len(get_outputs(graph, node)) == 0:
-            nx.set_node_attributes(G=graph, name='is_output', values={node: True})
-
-
-def override_batch(graph: nx.MultiDiGraph, batch: int):
+def override_batch(graph: Graph, batch: int):
     """
     Overrides batch for nodes with 'op' param set to 'Placeholder'
     Parameters
@@ -250,7 +215,7 @@ def override_batch(graph: nx.MultiDiGraph, batch: int):
     """
     if batch is not None:
         for node_id, data in graph.nodes(data=True):
-            if 'op' in data and data['op'] == 'Placeholder':
+            if 'op' in data and data['op'] == 'Placeholder' and not data.get('fixed_batch', False):
                 if len(data['shape']) == 0 or data['shape'][0] not in (-1, 0, 1):
                     raise Error(('The input layer {} has a shape {} defined in the model. \n\n' +
                                  'When you use -b (--batch) option, Model Optimizer applies its value to the first ' +
@@ -264,7 +229,7 @@ def override_batch(graph: nx.MultiDiGraph, batch: int):
                 data['shape'][0] = batch
 
 
-def override_placeholder_shapes(graph: nx.MultiDiGraph, user_shapes: dict, batch=None):
+def override_placeholder_shapes(graph: Graph, user_shapes: dict, batch=None):
     """
     This function overrides shapes for nodes with 'op' param set to 'Placeholder' with shapes defined by users (only
     for inputs without in/out port specified).
@@ -277,7 +242,7 @@ def override_placeholder_shapes(graph: nx.MultiDiGraph, user_shapes: dict, batch
         # DON'T MOVE UPPER!!! WE NEED TO SET BATCH FIRST
         # user did not specify neither shapes nor inputs, keep models values
         return
-    placeholders = get_nodes_with_attributes(graph, kind='op', op='Placeholder')
+    placeholders = graph.get_nodes_with_attributes(kind='op', op='Placeholder')
     for node_id in placeholders:
         node_attrs = graph.node[node_id]
         shape = None
@@ -293,141 +258,7 @@ def override_placeholder_shapes(graph: nx.MultiDiGraph, user_shapes: dict, batch
             node_attrs['shape'][0] = batch
 
 
-def _scale_input_action_mul(graph: nx.MultiDiGraph, match: dict, scale: float):
-    assert (len(match['placeholder'].out_nodes()))
-
-    tinput = match['placeholder']
-    if not tinput.has_valid('shape'):
-        raise Error("Node {} has not valid shape attribute".format(tinput.id))
-
-    input_shape = tinput.shape
-    toutput = match['data']
-
-    # Create Mul node
-    value = np.array([1 / scale])
-
-    # Disconnect input with data node
-    graph.remove_edge(tinput.id, toutput.id)
-
-    # Create Mul node
-    mul_node = Mul(graph, dict(name="Mul1_"))
-    mul_data = Op.create_input_data_node(graph, "data_mul_scale_", np.array(value))
-    Op.expand_node_shape(mul_data, len(input_shape) - 2 if graph.graph['layout'] == 'NCHW' else 0)
-    mul_input = Op.create_data_node(graph, tinput, {'shape': toutput.shape})
-
-    mul_node.create_node_with_data(inputs=[mul_input, mul_data], data_nodes=toutput)
-
-
-def scale_input(graph: nx.MultiDiGraph, scale: float):
-    """
-    Searches for all entries of Placeholder in graph and passes it to the the replace transform
-    Args:
-        graph: an instance of nx graph
-        scale: integer value for the scale
-    """
-    if scale is None or scale == 1:
-        return
-
-    apply_pattern(
-        graph,
-        nodes=[
-            ('placeholder', dict(kind='op', op='Placeholder')),
-            ('data', dict(kind='data'))],
-        edges=[
-            ('placeholder', 'data'), ],
-        action=lambda graph, match: _scale_input_action_mul(graph, match, scale)
-    )
-
-
-def add_mean_scale_values(graph: nx.MultiDiGraph, values):
-    input_nodes = {}
-    for node in graph.nodes():
-        node = Node(graph, node)
-        if node.has_valid('op') and node.op == 'Placeholder':
-            input_nodes.update({node.id: node})
-
-    if not isinstance(values, dict):
-        if len(values) != len(input_nodes):
-            raise Error('Numbers of inputs and mean/scale values do not match. ' +
-                        refer_to_faq_msg(61))
-
-        data = np.copy(values)
-        values = {}
-        for idx, key in enumerate(input_nodes.keys()):
-            values.update(
-                {
-                    input_nodes[key]['name']: {
-                        'mean': data[idx][0],
-                        'scale': data[idx][1]
-                    }
-                }
-            )
-
-    for node_name in values:
-        node_id = get_node_id_by_name(graph, node_name)
-        node_mean_scale_values = values[node_name]
-        if node_id not in input_nodes:
-            # if the user cutted-off input of the network then input node name specified in the --scale_values
-            # or --mean_values doesn't correspond to a real input node generated by Model Optimizer. But the information
-            # about initial input node name is stored in Placeholder's attribute 'initial_node_name'
-            new_node_id = None
-            for placeholder in input_nodes.values():
-                if placeholder.has('initial_node_name') and placeholder.initial_node_name == node_name:
-                    new_node_id = placeholder.id
-                    break
-            if new_node_id is None:
-                raise Error('Input with name {} wasn\'t found!'.format(node_name) +
-                            refer_to_faq_msg(83))
-            node_id = new_node_id
-
-        input_node = Node(graph, node_id)
-        apply_scale(graph, input_node, node_mean_scale_values)
-        apply_mean_value(graph, input_node, node_mean_scale_values)
-
-
-def apply_scale(graph: nx.MultiDiGraph, input_node: Node, node_mean_scale_values: dict):
-    if 'scale' in node_mean_scale_values and node_mean_scale_values['scale'] is not None:
-        if all([x == 1 for x in node_mean_scale_values['scale']]):
-            return
-        out_node = input_node.out_node()
-        if not input_node.has_valid('shape'):
-            raise Error("Node {} has not valid shape attribute".format(input_node.id))
-        input_shape = input_node.shape
-
-        # Create Mul node
-        value = 1 / np.array(node_mean_scale_values['scale'])
-        graph.remove_edge(input_node.id, out_node.id)
-
-        mul_node = Mul(graph, dict(name="Mul_"))
-        mul_data = Op.create_input_data_node(graph, "data_mul_", np.array(value))
-        Op.expand_node_shape(mul_data, (len(input_shape) - 2 if graph.graph['layout'] == 'NCHW' else 0))
-        mul_input = Op.create_data_node(graph, input_node, {'shape': out_node.shape})
-
-        mul_node.create_node_with_data(inputs=[mul_input, mul_data], data_nodes=out_node)
-
-
-def apply_mean_value(graph: nx.MultiDiGraph, input_node: Node, node_mean_scale_values: dict):
-    if 'mean' in node_mean_scale_values and node_mean_scale_values['mean'] is not None:
-        if all([x == 0 for x in node_mean_scale_values['mean']]):
-            return
-        out_node = input_node.out_node()
-        if not input_node.has_valid('shape'):
-            raise Error("Node {} has not valid shape attribute".format(input_node.id))
-        input_shape = input_node.shape
-        # Create Add node
-        graph.remove_edge(input_node.id, out_node.id)
-
-        value = np.array(node_mean_scale_values['mean']) * (-1)
-
-        add_node = Add(graph, dict(name="Add_"))
-        add_data = Op.create_input_data_node(graph, "data_add_", np.array(value))
-        Op.expand_node_shape(add_data, (len(input_shape) - 2 if graph.graph['layout'] == 'NCHW' else 0))
-        add_input = Op.create_data_node(graph, input_node, {'shape': out_node.shape})
-
-        add_node.create_node_with_data(inputs=[add_input, add_data], data_nodes=out_node)
-
-
-def update_fully_connected_shapes(graph: nx.MultiDiGraph):
+def update_fully_connected_shapes(graph: Graph):
     nodes = nx.topological_sort(graph)
     while True:
         should_infer = False
@@ -453,7 +284,7 @@ def update_fully_connected_shapes(graph: nx.MultiDiGraph):
 
 # Convert MUL operation to Power layer in case when
 # mul op takes two inputs (scalar constant and tensor)
-def convert_mul_add_to_power(graph: nx.MultiDiGraph):
+def convert_mul_add_to_power(graph: Graph):
     for_each_sub_graph(graph, convert_mul_add_to_power)
     nodes = list(graph.nodes())
     for n in nodes:
diff --git a/model-optimizer/mo/middle/passes/infer_test.py b/model-optimizer/mo/middle/passes/infer_test.py
index d3b7e6597..794221ab7 100644
--- a/model-optimizer/mo/middle/passes/infer_test.py
+++ b/model-optimizer/mo/middle/passes/infer_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,11 +20,9 @@ import numpy as np
 
 from mo.front.common.partial_infer.concat import concat_infer
 from mo.graph.graph import Node
-from mo.middle.passes.infer import override_placeholder_shapes, partial_infer, add_mean_scale_values, scale_input, \
-    check_for_cycle
-from mo.utils.cli_parser import get_mean_scale_dictionary, parse_tuple_pairs
+from mo.middle.passes.infer import override_placeholder_shapes, partial_infer
 from mo.utils.error import Error
-from mo.utils.unittest.graph import build_graph, compare_graphs
+from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'node_1': {'type': 'Identity', 'value': None, 'kind': 'op'},
                     'node_1_data': {'value': None, 'kind': 'data', 'data_type': None},
@@ -50,6 +48,7 @@ nodes_attributes = {'node_1': {'type': 'Identity', 'value': None, 'kind': 'op'},
                     'mul_1': {'type': None, 'kind': 'op', 'op': 'Mul'},
                     'mul_1_w': {'value': None, 'shape': None, 'kind': 'data'},
                     'mul_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput', 'infer': lambda x: None}
                     }
 
 
@@ -59,8 +58,10 @@ class TestInferPass(unittest.TestCase):
         Test for overriding shape in placeholder by shape from user_shapes.
         """
         graph = build_graph(nodes_attributes,
-                            [('node_1', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None},
+                            [('node_1', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227]), 'op': 'Placeholder'}
                              },
                             nodes_with_edges_only=True)
@@ -76,8 +77,10 @@ class TestInferPass(unittest.TestCase):
         Test for case when user_shapes is not defined.
         """
         graph = build_graph(nodes_attributes,
-                            [('node_1', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None, 'op': 'Placeholder'},
+                            [('node_1', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None, 'op': 'Placeholder'},
                              'node_1': {'shape': np.array([1, 3, 227, 227]), 'op': 'Placeholder'}
                              },
                             nodes_with_edges_only=True)
@@ -92,8 +95,10 @@ class TestInferPass(unittest.TestCase):
         Test for case when user_shapes is not None, but it shouldn't rewrite shapes.
         """
         graph = build_graph(nodes_attributes,
-                            [('node_1', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None},
+                            [('node_1', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 227, 227]), 'op': 'Placeholder'}
                              },
                             nodes_with_edges_only=True)
@@ -106,8 +111,10 @@ class TestInferPass(unittest.TestCase):
 
     def test_override_placeholder_shapes_dict(self):
         graph = build_graph(nodes_attributes,
-                            [('node_1', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None, 'op': 'Placeholder'},
+                            [('node_1', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None, 'op': 'Placeholder'},
                              'node_1': {'shape': np.array([1, 3, 227, 227]), 'op': 'Placeholder'}
                              },
                             nodes_with_edges_only=True)
@@ -185,8 +192,10 @@ class TestInferPass(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'concat'),
                              ('node_2', 'concat'),
-                             ('concat', 'node_3')],
-                            {'node_3': {'kind': 'data', 'is_output': True, 'shape': None, 'infer': None},
+                             ('concat', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'kind': 'data', 'shape': None, 'infer': None},
                              'node_1': {'kind': 'data', 'shape': np.array([1, 3, 227, 227]), 'infer': None},
                              'node_2': {'kind': 'data', 'shape': np.array([1, 3, 227, 227]), 'infer': None},
                              'concat': {'kind': 'op', 'axis': 2, 'infer': concat_infer}
@@ -219,8 +228,10 @@ class TestInferPass(unittest.TestCase):
 
     def test_partial_infer_no_shape(self):
         graph = build_graph(nodes_attributes,
-                            [('node_1', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None, 'infer': None},
+                            [('node_1', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None, 'infer': None},
                              'node_1': {'shape': None, 'infer': None}
                              },
                             nodes_with_edges_only=True)
@@ -231,8 +242,10 @@ class TestInferPass(unittest.TestCase):
                             [('node_1', 'concat'),
                              ('node_2', 'concat'),
                              ('concat', 'node_3'),
-                             ('node_3', 'concat')],
-                            {'node_3': {'kind': 'data', 'is_output': True, 'shape': None, 'infer': None},
+                             ('node_3', 'concat'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'kind': 'data', 'shape': None, 'infer': None},
                              'node_1': {'kind': 'data', 'shape': np.array([1, 3, 227, 227]), 'infer': None},
                              'node_2': {'kind': 'data', 'shape': np.array([1, 3, 227, 227]), 'infer': None},
                              'concat': {'kind': 'op', 'axis': 2, 'infer': concat_infer}
@@ -242,268 +255,17 @@ class TestInferPass(unittest.TestCase):
         start_node = 'concat'
         self.assertRaises(Error, partial_infer, graph, start_node)
 
-    def test_add_mean_scale_values_with_data_name(self):
-        graph = build_graph(nodes_attributes,
-                            [('node_1', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None, 'data_type': None},
-                             'node_1': {'shape': np.array([1, 3, 227, 227]), 'op': 'Placeholder', 'name': 'data',
-                                        'data_type': None}
-                             },
-                            nodes_with_edges_only=True)
-        graph.graph['layout'] = 'NCHW'
-        mean_values = parse_tuple_pairs('(124,117,104)')
-        scale_values = parse_tuple_pairs('')
-
-        # input = 'data'
-        mean_scale = get_mean_scale_dictionary(mean_values, scale_values, None)
-        self.assertEqual(len(graph), 2)
-        add_mean_scale_values(graph, mean_scale)
-        self.assertEqual(len(graph), 5)
-
-    def test_add_mean_scale_values_without_data_name(self):
-        graph = build_graph(nodes_attributes,
-                            [('node_1', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None, 'data_type': None},
-                             'node_1': {'shape': np.array([1, 3, 227, 227]), 'op': 'Placeholder', 'name': 'data',
-                                        'data_type': None}
-                             },
-                            nodes_with_edges_only=True)
-        graph.graph['layout'] = 'NCHW'
-        mean_values = parse_tuple_pairs('(124,117,104)')
-        scale_values = parse_tuple_pairs('')
-        # input = None
-        mean_scale = get_mean_scale_dictionary(mean_values, scale_values, None)
-        self.assertEqual(len(graph), 2)
-        add_mean_scale_values(graph, mean_scale)
-        self.assertEqual(len(graph), 5)
-
-    def test_add_mean_scale_values1(self):
-        graph = build_graph(nodes_attributes,
-                            [('pl_1', 'pl_1_data'), ('pl_2', 'pl_2_data')],
-                            {'pl_1_data': {'shape': np.array([1, 3, 38, 38]), 'infer': None},
-                             'pl_2_data': {'shape': np.array([1, 6]), 'infer': None},
-                             'pl_1': {'shape': np.array([1,3,38,38])},
-                             'pl_2': {'shape': np.array([1,6])},
-                             },
-                            nodes_with_edges_only=True)
-        graph.graph['layout'] = 'NCHW'
-        add_mean_scale_values(graph,
-                              {'pl_1': {'mean': np.array([1., 2., 3.])}, 'pl_2': {'mean': np.array([0., 0., 0.])}})
-        mul_op_cnt = 0
-        add_op_cnt = 0
-        for node in graph.nodes():
-            node = Node(graph, node)
-            if node.has_valid('op') and node.op == 'Mul':
-                mul_op_cnt += 1
-            if node.has_valid('op') and node.op == 'Add':
-                add_op_cnt += 1
-
-        self.assertEqual(add_op_cnt, 1, "Found more than one Add op in graph")
-        self.assertEqual(mul_op_cnt, 0, "Found Mul op in graph")
-
-    def test_optimize_scale_and_add_mean_values(self):
-        graph = build_graph(
-            nodes_attributes,
-            [
-                ('pl_1', 'pl_1_data')
-            ],
-            {
-                'pl_1_data': {
-                    'shape': np.array([1, 3, 38, 38]),
-                    'infer': None
-                },
-                'pl_1': {
-                    'shape': np.array([1,3,38,38])
-                }
-            },
-            nodes_with_edges_only=True
-        )
-        graph.graph['layout'] = 'NCHW'
-        add_mean_scale_values(graph,
-                              {
-                                  'pl_1': {
-                                      'scale': np.array([1.]),
-                                      'mean': np.array([1., 2., 3.])
-                                  }
-                              })
-        mul_op_cnt = 0
-        add_op_cnt = 0
-        for node in graph.nodes():
-            node = Node(graph, node)
-            if node.has_valid('op') and node.op == 'Mul':
-                mul_op_cnt += 1
-            if node.has_valid('op') and node.op == 'Add':
-                add_op_cnt += 1
-
-        self.assertEqual(add_op_cnt, 1, "Found more than one Add op in graph")
-        self.assertEqual(mul_op_cnt, 0, "Found Mul op in graph")
-
-    def test_optimize_mean_and_add_scale_values(self):
-        graph = build_graph(
-            nodes_attributes,
-            [
-                ('pl_1', 'pl_1_data')
-            ],
-            {
-                'pl_1_data': {
-                    'shape': np.array([1, 3, 38, 38]),
-                    'infer': None
-                },
-                'pl_1': {
-                    'shape': np.array([1,3,38,38])
-                }
-            },
-            nodes_with_edges_only=True
-        )
-        graph.graph['layout'] = 'NCHW'
-        add_mean_scale_values(graph,
-                              {
-                                  'pl_1': {
-                                      'scale': np.array([1.43]),
-                                      'mean': np.array([0., 0., 0.])
-                                  }
-                              })
-        mul_op_cnt = 0
-        add_op_cnt = 0
-        for node in graph.nodes():
-            node = Node(graph, node)
-            if node.has_valid('op') and node.op == 'Mul':
-                mul_op_cnt += 1
-            if node.has_valid('op') and node.op == 'Add':
-                add_op_cnt += 1
-
-        self.assertEqual(add_op_cnt, 0, "Found more than one Add op in graph")
-        self.assertEqual(mul_op_cnt, 1, "Found Mul op in graph")
-
-    def test_add_mean_scale_values3(self):
-        graph = build_graph(nodes_attributes,
-                            [('pl_1', 'pl_1_data')],
-                            {'pl_1_data': {'shape': np.array([1, 3, 38, 38]), 'infer': None},
-                             'pl_1': {'shape': np.array([1,3,38,38])},
-                             },
-                            nodes_with_edges_only=True)
-        graph.graph['layout'] = 'NCHW'
-        add_mean_scale_values(graph, [[np.array([1., 2., 3.]), np.array([1., 2., 3.])]])
-
-        mul_op_cnt = 0
-        add_op_cnt = 0
-        for node in graph.nodes():
-            node = Node(graph, node)
-            if node.has_valid('op') and node.op == 'Mul':
-                mul_op_cnt += 1
-            if node.has_valid('op') and node.op == 'Add':
-                add_op_cnt += 1
-
-        self.assertEqual(add_op_cnt, 1, "Found more than one Add op in graph")
-        self.assertEqual(mul_op_cnt, 1, "Found more than one Nul op in graph")
-
-    def test_add_mean_scale_values_cut_graph(self):
-        """
-        Test case when user cutted start of the network and specified mean/scale value to the new input node 'node_3'.
-        """
-        graph = build_graph(nodes_attributes,
-                            [('pl_1', 'pl_1_data'),
-                             ('pl_2', 'pl_2_data'),
-                             ('pl_2_data', 'node_3'),
-                             ('node_3', 'node_3_data'),
-                             ('pl_1_data', 'node_1'),
-                             ('node_3_data', 'node_1'),
-                             ],
-                            {'pl_1_data': {'shape': np.array([1, 3, 38, 38]), 'infer': None},
-                             'pl_2_data': {'shape': np.array([1, 3, 38, 38]), 'infer': None},
-                             'pl_2': {'initial_node_name': 'node_3', 'shape': np.array([1,3,38,38])},
-                             'pl_1': {'shape': np.array([1,3,38,38])},
-                             },
-                            nodes_with_edges_only=True)
-        graph.graph['layout'] = 'NCHW'
-        add_mean_scale_values(graph, {'pl_1': {'mean': np.array([1, 2, 3])}, 'node_3': {'scale': np.array([1, 2, 3])}})
-
-        mul_op_cnt = 0
-        add_op_cnt = 0
-        for node in graph.nodes():
-            node = Node(graph, node)
-            if node.has_valid('op') and node.op == 'Mul':
-                mul_op_cnt += 1
-            if node.has_valid('op') and node.op == 'Add':
-                add_op_cnt += 1
-
-        self.assertEqual(add_op_cnt, 1, "There should be exactly one Add op")
-        self.assertEqual(mul_op_cnt, 1, "There should be exactly one Mul op")
-        self.assertEqual(Node(graph, 'pl_2').out_node().out_node().op, 'Mul', "The Mul op should be added after pl_2")
-        self.assertEqual(Node(graph, 'pl_1').out_node().out_node().op, 'Add', "The Add op should be added after pl_1")
-
-
-class ScaleInputTests(unittest.TestCase):
-    def test_scale_input_1(self):
-        graph = build_graph(nodes_attributes,
-                            [('placeholder_1', 'placeholder_1_data')],
-                            {'placeholder_1_data': {'is_output': True},
-                             'placeholder_1': {'shape': np.array([1, 3, 224, 224])}
-                            },
-                            nodes_with_edges_only=True)
-
-        graph_ref = build_graph(nodes_attributes,
-                                [('placeholder_1', 'mul_1_data'),
-                                 ('mul_1_data', 'mul_1'),
-                                 ('mul_1_w', 'mul_1'),
-                                 ('mul_1', 'placeholder_1_data')],
-                                {'mul_1_w': {'shape': np.array([1, 1, 1]), 'value': np.array([1 / 255])},
-                                 'placeholder_1_data': {'is_output': True}},
-                                nodes_with_edges_only=True)
-        graph.graph['layout'] = 'NCHW'
-        scale_input(graph, 255)
-        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1_data')
-        self.assertTrue(flag, resp)
-
-    def test_scale_input_2(self):
-        graph = build_graph(nodes_attributes,
-                            [('placeholder_1', 'placeholder_1_data')],
-                            {'placeholder_1_data': {'is_output': True}},
-                            nodes_with_edges_only=True)
-
-        graph_ref = build_graph(nodes_attributes,
-                                [('placeholder_1', 'placeholder_1_data')],
-                                {'placeholder_1_data': {'is_output': True}},
-                                nodes_with_edges_only=True)
-
-        scale_input(graph, 1)
-        (flag, resp) = compare_graphs(graph, graph_ref, 'placeholder_1_data')
-        self.assertTrue(flag, resp)
-
-    def test_check_for_cycle1(self):
-        # cyclic case
-        graph = build_graph(nodes_attributes,
-                            [('node_1', 'node_1_data'),
-                             ('node_1_data', 'node_3'),
-                             ('node_3', 'node_3_data'),
-                             ('node_3_data', 'node_1')],
-                            nodes_with_edges_only=True)
-        with self.assertRaisesRegex(Error, 'Graph contains a cycle. Can not proceed.*'):
-            check_for_cycle(graph)
-
-    def test_check_for_cycle2(self):
-        # acyclic case
-        graph = build_graph(nodes_attributes,
-                            [('node_1', 'node_1_data'),
-                             ('node_1_data', 'node_3'),
-                             ('node_3', 'node_3_data'),
-                             ('node_3_data', 'mul_1'),
-                             ('mul_1_w', 'mul_1'),
-                             ('mul_1', 'mul_1_data')
-                             ],
-                            nodes_with_edges_only=True)
-        try:
-            check_for_cycle(graph)
-        except Error:
-            self.fail("Unexpected Error raised")
 
+class CycleTest(unittest.TestCase):
     def test_is_not_fully_inferred_param(self):
         # Node that have is_not_fully_inferred=True
         graph = build_graph(nodes_attributes,
                             [('node_1', 'concat'),
                              ('node_2', 'concat'),
-                             ('concat', 'node_3')],
-                            {'node_3': {'kind': 'data', 'is_output': True, 'shape': None, 'infer': None},
+                             ('concat', 'node_3'),
+                             ('node_3', 'op_output')
+                             ],
+                            {'node_3': {'kind': 'data', 'shape': None, 'infer': None},
                              'node_1': {'kind': 'data', 'shape': np.array([1, 3, 227, 227]), 'infer': None},
                              'node_2': {'kind': 'data', 'shape': np.array([1, 3, 227, 227]), 'infer': None},
                              'concat': {'kind': 'op', 'axis': 2, 'infer': concat_infer, 'is_not_fully_inferred': True}
diff --git a/model-optimizer/mo/middle/passes/l2normalization.py b/model-optimizer/mo/middle/passes/l2normalization.py
index 6e80ffb45..9edcdc1da 100644
--- a/model-optimizer/mo/middle/passes/l2normalization.py
+++ b/model-optimizer/mo/middle/passes/l2normalization.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,16 +14,15 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from mo.front.extractor import add_attrs_props
 from mo.front.extractor import update_ie_fields
-from mo.graph.graph import Node, unique_id
+from mo.graph.graph import Node, Graph
 from mo.middle.pattern_match import apply_pattern
 
 
-def l2_norm_to_norm_action(graph: nx.MultiDiGraph, match: dict):
+def l2_norm_to_norm_action(graph: Graph, match: dict):
     input_data_name = match['input'].node
     output_data_name = match['l2_normalize_data'].node
 
@@ -33,18 +32,17 @@ def l2_norm_to_norm_action(graph: nx.MultiDiGraph, match: dict):
         return 1
     y = match['maximum_y_data'].value
 
-    normalize_id = unique_id(graph)
+    normalize_id = graph.unique_id()
     graph.add_node(normalize_id,
                    **add_attrs_props(
-                       dict(kind='op', precision="FP32", type='Normalize', name=str(unique_id(graph, 'normalize')),
+                       dict(kind='op', precision="FP32", type='Normalize', name=str(graph.unique_id('normalize')),
                             op='Normalize', shape=None, eps=str(y), across_spatial=str(0), channel_shared=str(0),
-                            data_type=None,
-                            infer=None)))
-    normalize_data_id = unique_id(graph)
+                            data_type=None, infer=None, in_ports_count=2, out_ports_count=1)))
+    normalize_data_id = graph.unique_id()
 
     graph.add_node(normalize_data_id, **add_attrs_props(graph.node[output_data_name]))
     update_ie_fields(graph.node[normalize_id])
-    weights_id = unique_id(graph, 'weights_')
+    weights_id = graph.unique_id('weights_')
     graph.add_node(weights_id, **add_attrs_props(
         dict(kind='data', precision="FP32", name=weights_id, value=None, shape=None, data_type=None, infer=None)))
     wnode = Node(graph, weights_id)
@@ -65,7 +63,7 @@ def l2_norm_to_norm_action(graph: nx.MultiDiGraph, match: dict):
         graph.add_edge(normalize_data_id, owner, **attr)
 
 
-def l2_norm_to_norm(graph: nx.MultiDiGraph):
+def l2_norm_to_norm(graph: Graph):
     apply_pattern(
         graph,
         nodes=[
@@ -79,13 +77,10 @@ def l2_norm_to_norm(graph: nx.MultiDiGraph):
             ('rsqrt_data', dict(kind='data')),
             ('square', dict(kind='op', op='Square')),
             ('square_data', dict(kind='data')),
-            ('sum', dict(kind='op', op='Sum')),
+            ('sum', dict(kind='op', op='Reduce', reduce_type='sum')),
             ('sum_data', dict(kind='data')),
-            ('range_data', dict(kind='data')),
-
         ],
         edges=[
-            ('range_data', 'sum'),
             ('input', 'square'),
             ('square', 'square_data'),
             ('square_data', 'sum'),
diff --git a/model-optimizer/mo/middle/passes/leaky_relu.py b/model-optimizer/mo/middle/passes/leaky_relu.py
index 1ff04b24f..60fb42bbd 100644
--- a/model-optimizer/mo/middle/passes/leaky_relu.py
+++ b/model-optimizer/mo/middle/passes/leaky_relu.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,14 +16,14 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
+from mo.graph.graph import Graph
 from mo.middle.pattern_match import apply_pattern
 from mo.ops.relu import ReLU
 
 
-def _convert_to_leaky_relu_action(graph: nx.MultiDiGraph, matches: dict):
+def _convert_to_leaky_relu_action(graph: Graph, matches: dict):
     """
     This function checks given patten and if pattern satisfies all requirements, converts to ReLU with negative slope
     """
@@ -73,7 +73,7 @@ def _convert_to_leaky_relu_action(graph: nx.MultiDiGraph, matches: dict):
               ''.format(eltwise_op.id, power_op.id))
 
 
-def convert_mul_eltwise_to_leaky_relu(graph: nx.MultiDiGraph):
+def convert_mul_eltwise_to_leaky_relu(graph: Graph):
     """
     This function finds next subgraph:
     -->Data-------->Eltwise(Max)-->Data
diff --git a/model-optimizer/mo/middle/passes/mean_scale_values.py b/model-optimizer/mo/middle/passes/mean_scale_values.py
index ec53fc0c5..64c86a2ec 100644
--- a/model-optimizer/mo/middle/passes/mean_scale_values.py
+++ b/model-optimizer/mo/middle/passes/mean_scale_values.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
+from mo.graph.graph import Graph
 from mo.middle.pattern_match import apply_pattern
 
 
@@ -52,7 +52,7 @@ def move_scaleshift_to_preprocess_action(graph, match):
         graph.graph['mean_values'] = mean_values
 
 
-def move_scaleshift_to_preprocess(graph: nx.MultiDiGraph):
+def move_scaleshift_to_preprocess(graph: Graph):
     """
     This function finds scaleshift layer after input layer and if it has weights with ones, it deletes scaleshift layer
     and creates graph dict attribute : {'input':np.array(...), 'input2': ... }
diff --git a/model-optimizer/mo/middle/passes/mean_scale_values_test.py b/model-optimizer/mo/middle/passes/mean_scale_values_test.py
index 9bc7b6bd6..9aa30181e 100644
--- a/model-optimizer/mo/middle/passes/mean_scale_values_test.py
+++ b/model-optimizer/mo/middle/passes/mean_scale_values_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -35,6 +35,9 @@ nodes_attributes = {'node_1': {'type': 'Identity', 'value': None, 'kind': 'op'},
                     'scaleshift_1_w': {'value': None, 'shape': None, 'kind': 'data'},
                     'scaleshift_1_b': {'value': None, 'shape': None, 'kind': 'data'},
                     'scaleshift_1_data': {'value': None, 'shape': None, 'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'},
+                    'op_output_1': { 'kind': 'op', 'op': 'OpOutput'}
+
                     }
 
 
@@ -45,19 +48,21 @@ class TestScaleShift_To_Preprocess(unittest.TestCase):
                              ('placeholder_1_data', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
                              ('scaleshift_1_w', 'scaleshift_1'),
-                             ('scaleshift_1_b', 'scaleshift_1')],
+                             ('scaleshift_1_b', 'scaleshift_1'),
+                             ('scaleshift_1_data', 'op_output')
+                             ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'scaleshift_1_w': {'shape': np.array([3]), 'value': np.ones(3)},
                              'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([-1, -2, -3])},
-                             'scaleshift_1_data': {'is_output': True}
                              })
 
         del graph['placeholder_1']['placeholder_1_data'][0]['in']
         del graph['scaleshift_1']['scaleshift_1_data'][0]['in']
 
         graph_ref = build_graph(nodes_attributes,
-                                [('placeholder_1', 'scaleshift_1_data')],
-                                {'scaleshift_1_data': {'is_output': True}})
+                                [('placeholder_1', 'scaleshift_1_data'),
+                                 ('scaleshift_1_data', 'op_output')
+                                 ])
 
         move_scaleshift_to_preprocess(graph)
         self.assertTrue(graph.graph['mean_values'] is not None)
@@ -72,11 +77,13 @@ class TestScaleShift_To_Preprocess(unittest.TestCase):
                              ('placeholder_1_data', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
                              ('scaleshift_1_w', 'scaleshift_1'),
-                             ('scaleshift_1_b', 'scaleshift_1')],
+                             ('scaleshift_1_b', 'scaleshift_1'),
+                             ('scaleshift_1_data', 'op_output'),
+                             ('placeholder_1_data', 'op_output_1')
+                             ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array((1, 2, 3))},
                              'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([-1, -2, -3])},
-                             'scaleshift_1_data': {'is_output': True}
                              })
 
         del graph['placeholder_1']['placeholder_1_data'][0]['in']
@@ -87,11 +94,13 @@ class TestScaleShift_To_Preprocess(unittest.TestCase):
                                  ('placeholder_1_data', 'scaleshift_1'),
                                  ('scaleshift_1', 'scaleshift_1_data'),
                                  ('scaleshift_1_w', 'scaleshift_1'),
-                                 ('scaleshift_1_b', 'scaleshift_1')],
-                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3]), 'is_output': True},
+                                 ('scaleshift_1_b', 'scaleshift_1'),
+                                 ('placeholder_1_data', 'op_output_1'),
+                                 ('scaleshift_1_data', 'op_output')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array((1, 2, 3))},
                                  'scaleshift_1_b': {'shape': np.array([3]), 'value': np.array([-1, -2, -3])},
-                                 'scaleshift_1_data': {'is_output': True}
                                  })
 
         move_scaleshift_to_preprocess(graph)
@@ -105,10 +114,12 @@ class TestScaleShift_To_Preprocess(unittest.TestCase):
                             [('placeholder_1', 'placeholder_1_data'),
                              ('placeholder_1_data', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
-                             ('scaleshift_1_w', 'scaleshift_1'), ],
+                             ('scaleshift_1_w', 'scaleshift_1'),
+                             ('scaleshift_1_data', 'op_output'),
+                             ('placeholder_1_data', 'op_output_1')
+                             ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array((1, 2, 3))},
-                             'scaleshift_1_data': {'is_output': True}
                              })
 
         del graph['placeholder_1']['placeholder_1_data'][0]['in']
@@ -118,10 +129,12 @@ class TestScaleShift_To_Preprocess(unittest.TestCase):
                                 [('placeholder_1', 'placeholder_1_data'),
                                  ('placeholder_1_data', 'scaleshift_1'),
                                  ('scaleshift_1', 'scaleshift_1_data'),
-                                 ('scaleshift_1_w', 'scaleshift_1')],
-                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3]), 'is_output': True},
+                                 ('scaleshift_1_w', 'scaleshift_1'),
+                                 ('scaleshift_1_data', 'op_output'),
+                                 ('placeholder_1_data', 'op_output_1')
+                                 ],
+                                {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                                  'scaleshift_1_w': {'shape': np.array([3]), 'value': np.array((1, 2, 3))},
-                                 'scaleshift_1_data': {'is_output': True}
                                  })
 
         move_scaleshift_to_preprocess(graph)
@@ -136,19 +149,21 @@ class TestScaleShift_To_Preprocess(unittest.TestCase):
                              ('placeholder_1_data', 'scaleshift_1'),
                              ('scaleshift_1', 'scaleshift_1_data'),
                              ('scaleshift_1_w', 'scaleshift_1'),
-                             ('scaleshift_1_b', 'scaleshift_1')],
+                             ('scaleshift_1_b', 'scaleshift_1'),
+                             ('scaleshift_1_data', 'op_output')
+                             ],
                             {'placeholder_1_data': {'shape': np.array([1, 227, 227, 3])},
                              'scaleshift_1_w': {'shape': np.array([3]), 'value': np.ones(3)},
                              'scaleshift_1_b': {'shape': np.array([3]), 'value': np.zeros(3)},
-                             'scaleshift_1_data': {'is_output': True}
                              })
 
         del graph['placeholder_1']['placeholder_1_data'][0]['in']
         del graph['scaleshift_1']['scaleshift_1_data'][0]['in']
 
         graph_ref = build_graph(nodes_attributes,
-                                [('placeholder_1', 'scaleshift_1_data')],
-                                {'scaleshift_1_data': {'is_output': True}})
+                                [('placeholder_1', 'scaleshift_1_data'),
+                                 ('scaleshift_1_data', 'op_output')
+                                 ])
 
         move_scaleshift_to_preprocess(graph)
         self.assertTrue(graph.graph.get('mean_values', None) is None)
diff --git a/model-optimizer/mo/middle/passes/pool.py b/model-optimizer/mo/middle/passes/pool.py
deleted file mode 100644
index a819cda81..000000000
--- a/model-optimizer/mo/middle/passes/pool.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""
- Copyright (c) 2018 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import networkx as nx
-import numpy as np
-
-from mo.graph.graph import create_edge
-from mo.middle.pattern_match import apply_pattern
-from mo.ops.op import Op, PermuteAttrs
-from mo.ops.reshape import Reshape
-
-
-def mean_to_avgpool_action(graph: nx.MultiDiGraph, matches: dict):
-    if matches['axis'].value is None or matches['input'].shape is None:
-        return
-    dims = len(matches['input'].shape)
-    ones = np.ones(dims, dtype=np.int64)
-    axis = np.array(matches['axis'].value)
-    axis = axis if axis.ndim != 0 else np.array([axis], dtype=np.int64)
-
-    mean = graph.node[matches['mean'].node]
-    mean['stride'] = np.array(ones)
-    # TODO: need to check axis with real layout
-    spatial_dims = np.array(axis)
-    mean['spatial_dims'] = spatial_dims
-    mean['pad'] = np.zeros((dims, 2), np.int64)
-    mean['pad_spatial_shape'] = np.array(mean['pad'][spatial_dims])
-    window = np.array(ones)
-    window[spatial_dims] = matches['input'].shape[spatial_dims]
-    mean['window'] = window
-    mean['TF_op'] = mean['op']
-    mean['op'] = 'AvgPool'
-    mean['pool_method'] = 'avg'
-    mean['rounding_type'] = 'ceil'
-    mean['exclude_pad'] = 'true'
-    mean['kernel_spatial'] = window[spatial_dims]
-    graph.remove_edge(matches['axis'].node, matches['mean'].node)
-    mean['permute_attrs'] = PermuteAttrs().update_attrs(attrs=[('pad', 'input:0'),
-                                                               ('stride', 'input:0'),
-                                                               ('window', 'input:0'),
-                                                               ('spatial_dims', 'input:0')])
-
-    if matches['mean'].keep_dims == False:
-        output = matches['mean'].out_node()
-        pool_node = matches['mean']
-
-        # Keep dims for AvgPool
-        shape = np.array(output.shape)
-        for idx in spatial_dims:
-            shape = np.insert(shape, idx, 1)
-
-        graph.remove_edge(pool_node.id, output.id)
-        # Create new data for pool with all dims
-        pool_data = Op.create_data_node(graph, pool_node, {'shape': np.array(shape)})
-        # Create and connect reshape node
-        reshape_op = Reshape(graph, {'dim': np.array(output.shape)})
-        reshape_node = reshape_op.create_node([pool_data], dict(name='Reshape_',
-                                                                permute_attrs=PermuteAttrs().update_attrs(attrs=[('dim', 'output:0')])))
-        create_edge(reshape_node, output)
-
-
-def mean_to_avgpool(graph: nx.MultiDiGraph):
-    """
-    Translate Mean as a average pooling with kernel size equals to reduced dimensions and with no padding.
-    """
-    apply_pattern(
-        graph,
-        nodes=[
-            ('input', dict(kind='data')),
-            ('axis', dict(kind='data')),
-            ('mean', dict(kind='op', op='Mean'))],
-        edges=[
-            ('input', 'mean', {'in': 0}),
-            ('axis', 'mean', {'in': 1})],
-        action=mean_to_avgpool_action
-    )
-    return graph
diff --git a/model-optimizer/mo/middle/passes/shape.py b/model-optimizer/mo/middle/passes/shape.py
index 647502bf9..e98a2ac82 100644
--- a/model-optimizer/mo/middle/passes/shape.py
+++ b/model-optimizer/mo/middle/passes/shape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,12 +16,12 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
+from mo.front.common.partial_infer.utils import int64_array
 from mo.front.extractor import update_attrs
-from mo.graph.graph import Node, create_edge
-from mo.middle.passes.eliminate import remove_op_node_with_data_node, merge_data_nodes, graph_clean_up_tf, get_nodes_with_attributes
+from mo.graph.graph import Node, Graph
+from mo.middle.passes.eliminate import remove_op_node_with_data_node, merge_data_nodes, graph_clean_up_tf
 from mo.middle.passes.fusing.helpers import get_next_operation
 from mo.middle.pattern_match import apply_pattern
 from mo.ops.op import PermuteAttrs, Op
@@ -30,7 +30,7 @@ from mo.utils.error import Error
 from mo.utils.utils import refer_to_faq_msg
 
 
-def reshape_squeeze_transform(graph: nx.MultiDiGraph, match: dict):
+def reshape_squeeze_transform(graph: Graph, match: dict):
     reshape = match['reshape']
     output = match['output']
     if output.shape is None:
@@ -42,11 +42,9 @@ def reshape_squeeze_transform(graph: nx.MultiDiGraph, match: dict):
         # do not override value 'dim' if it is set. It may contain specific values like -1 and 0
         reshape['dim'] = reshape.shape.copy()
     update_attrs(reshape, 'shape_attrs', 'dim')
-    if 'shape' in match:
-        graph.remove_edge(match['shape'].node, match['reshape'].node)
 
 
-def convert_squeeze(graph: nx.MultiDiGraph):
+def convert_squeeze(graph: Graph):
     apply_pattern(
         graph,
         nodes=[
@@ -57,7 +55,7 @@ def convert_squeeze(graph: nx.MultiDiGraph):
     )
 
 
-def convert_reshape(graph: nx.MultiDiGraph):
+def convert_reshape(graph: Graph):
     apply_pattern(
         graph,
         nodes=[
@@ -107,12 +105,12 @@ def can_repack_fully_connected_weights_nhwc_to_nchw(fc_node: Node):
         return False
 
 
-def repack_fully_connected_weights_nhwc_to_nchw(graph: nx.MultiDiGraph):
+def repack_fully_connected_weights_nhwc_to_nchw(graph: Graph):
     """
     Repack weights of FullyConnected layer as a part of nhwc_to_nchw translation if Reshape of
     that involves dimensions that we are repacking appears right before FullyConnected layer.
     """
-    for node_id in get_nodes_with_attributes(graph, type='FullyConnected'):
+    for node_id in graph.get_nodes_with_attributes(type='FullyConnected'):
         fc_node = Node(graph, node_id)
 
         if not can_repack_fully_connected_weights_nhwc_to_nchw(fc_node):
@@ -146,7 +144,7 @@ def repack_fully_connected_weights_nhwc_to_nchw(graph: nx.MultiDiGraph):
         weights.value = np.transpose(weights.value.reshape(tmp_shape), (2, 0, 1, 3)).reshape(weights.shape)
 
 
-def apply_nhwc_to_nchw_permutation(graph: nx.MultiDiGraph):
+def apply_nhwc_to_nchw_permutation(graph: Graph):
     # Add NHWC to NCHW permutation for all data nodes (only for nodes without permutation)
     if graph.graph['layout'] == 'NCHW':
         return
@@ -181,7 +179,7 @@ def apply_nhwc_to_nchw_permutation(graph: nx.MultiDiGraph):
                 PermuteAttrs.set_permutation(node, out_node, permutation)
 
 
-def merge_nodes_permutations(graph: nx.MultiDiGraph):
+def merge_nodes_permutations(graph: Graph):
     # Iterate over all data nodes and check all permutations for similarity
     # In case of equal permutations, this permutation will be set as attribute for data node
     # otherwise exception will be raised
@@ -228,7 +226,7 @@ def merge_nodes_permutations(graph: nx.MultiDiGraph):
             node.permutation = None
 
 
-def permute_data_nodes_attrs(graph: nx.MultiDiGraph):
+def permute_data_nodes_attrs(graph: Graph):
     # Iterate over all data nodes and apply permutation if exists
     for node in graph.nodes():
         node = Node(graph, node)
@@ -245,7 +243,7 @@ def permute_data_nodes_attrs(graph: nx.MultiDiGraph):
             node.value = np.array(node.value.transpose(node.permutation.perm))
 
 
-def permute_op_nodes_attrs(graph: nx.MultiDiGraph):
+def permute_op_nodes_attrs(graph: Graph):
     for node in graph.nodes():
         node = Node(graph, node)
         if node.kind == 'op' and node.has_valid('permute_attrs'):
@@ -255,7 +253,7 @@ def permute_op_nodes_attrs(graph: nx.MultiDiGraph):
                 raise Error('Can\'t permute attrs for node {}. Error message: {}'.format(node.id, e))
 
 
-def reverse_input_channels(graph: nx.MultiDiGraph):
+def reverse_input_channels(graph: Graph):
     """
     Searchers for all type=Input nodes with 4D output tensors,
     tracks tensors down through non-shape-changing ops to the first type=Convolution or other channel-dependent nodes
@@ -311,6 +309,8 @@ def reverse_input_channels(graph: nx.MultiDiGraph):
         if conv.op == 'DepthwiseConv2dNative':
             log.debug('out nodes: {}'.format(conv.out_node()))
             bottoms = conv.out_node().out_nodes()
+            if len(bottoms) == 1 and bottoms[0].op == 'FakeQuantWithMinMaxVars':
+                bottoms = bottoms[0].out_node().out_nodes()
             log.debug('bottoms: {}'.format(bottoms))
             log.debug('assumed conv: name = {}, op = {}'.format(bottoms[0].name, bottoms[0].op))
             if len(bottoms) > 0 and bottoms[0].op == 'Conv2D':
@@ -349,12 +349,13 @@ def reverse_input_channels(graph: nx.MultiDiGraph):
                     'complete the flip')
 
         conv.in_node(1).value = np.flip(conv.in_node(1).value, conv.in_node(1).input_channel_dim)
+        conv.in_node(1).shape = int64_array(conv.in_node(1).value.shape)
         log.debug('Applied reversing input channels for weights of convolution {}'.format(conv.id))
         log.debug('Shape was (shape){}, (value.shape){}'.format(conv.in_node(1).shape, conv.in_node(1).value.shape))
         log.debug('Flipped dim: {}'.format(conv.in_node(1).input_channel_dim))
 
 
-def conv_flatten_concat_action(graph: nx.MultiDiGraph, match: dict):
+def conv_flatten_concat_action(graph: Graph, match: dict):
     assert graph.graph['layout'] == 'NHWC'
     reshape_node = match['reshape']
     reshape_data_node = match['reshape_data']
@@ -370,18 +371,18 @@ def conv_flatten_concat_action(graph: nx.MultiDiGraph, match: dict):
         log.info('There is a FullyConnected layer after the node "{}" which weights will be repacked. So there is no '
                  'need to insert Permute'.format(reshape_node.soft_get('name')))
         return
-    assert len(graph.in_edges(reshape_node.id)) == 1
     graph.remove_edge(conv_data_node.id, reshape_node.id)
 
     permutation_order = PermuteAttrs.get_nchw_to_nhwc_permutation(len(conv_data_node.shape)).perm
     new_permute_op = Permute(graph, {'order': permutation_order})
     permute_data_node = new_permute_op.create_node_with_data([conv_data_node], dict(name=conv_name + '/Permute_'))
-    create_edge(permute_data_node, reshape_node)
+    graph.create_edge(permute_data_node, reshape_node)
     # Disable permutation for Reshape and Concat layers attributes
     PermuteAttrs.set_permutation(reshape_node, reshape_data_node, None)
+    reshape_node['nchw_layout'] = True
 
 
-def conv_flatten_concat(graph: nx.MultiDiGraph):
+def conv_flatten_concat(graph: Graph):
     apply_pattern(
         graph,
         nodes=[
@@ -419,12 +420,12 @@ def conv_flatten_concat(graph: nx.MultiDiGraph):
     )
 
 
-def fuse_sequence_of_reshapes(graph: nx.MultiDiGraph):
+def fuse_sequence_of_reshapes(graph: Graph):
     for node in list(graph.nodes()):
-        node = Node(graph, node)
-        if not graph.has_node(node.id):
+        if not graph.has_node(node):
             # data node can be already removed
             continue
+        node = Node(graph, node)
         if (
                 node.has_valid('type') and node.type == 'Reshape' and
                 len(node.out_nodes()) == 1 and node.out_node().has_valid('kind') and node.out_node().kind == 'data' and
@@ -439,3 +440,22 @@ def fuse_sequence_of_reshapes(graph: nx.MultiDiGraph):
                 # Remove Reshape1
                 log.debug('Second phase for Reshape: {}'.format(node.name))
                 remove_op_node_with_data_node(graph, node)
+
+    reshape_nodes = graph.get_op_nodes(op='Reshape')
+    for reshape_node in reshape_nodes:
+        in_ports = [port for port in reshape_node.in_ports().values() if not port.disconnected()]
+        assert len(in_ports) in [1, 2], "`Reshape` node must have 2 inputs or 1 input with `dim`"
+        if len(in_ports) == 2:
+            previous_dim_op = reshape_node.in_port(1).get_source().node.op
+            if previous_dim_op != 'Const':
+                continue
+            dim = reshape_node.in_port(1).get_connection().data.get_value()
+        else:
+            assert reshape_node.has_valid('dim'), "`Reshape` node with 1 input must have `dim` attribute"
+            dim = reshape_node.dim
+
+        in_shape = reshape_node.in_port(0).get_connection().data.get_shape()
+
+        if np.array_equal(dim, in_shape) and len(reshape_node.out_nodes()):
+            log.debug("Useless reshape with dim {} was deleted: {}".format(str(dim), reshape_node.name))
+            reshape_node.out_port(0).get_connection().set_source(reshape_node.in_port(0).get_source())
diff --git a/model-optimizer/mo/middle/passes/shared_weights_duplication.py b/model-optimizer/mo/middle/passes/shared_weights_duplication.py
deleted file mode 100644
index 9458386e5..000000000
--- a/model-optimizer/mo/middle/passes/shared_weights_duplication.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""
- Copyright (c) 2018 Intel Corporation
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-"""
-
-import networkx as nx
-import numpy as np
-
-from mo.graph.graph import Node
-from mo.ops.op import Op
-from mo.utils.error import Error
-
-
-def duplicate_shared_weights(graph: nx.MultiDiGraph):
-    """
-    This function finds all const data nodes that have more that one consumer and then duplicate them
-    """
-    data_nodes = [Node(graph, id) for id in graph.nodes() if Node(graph, id).soft_get('kind') == 'data']
-    for node in data_nodes:
-        # Check that node has const values and more than one consumer
-        if len(node.out_nodes()) > 1 and node.value is not None:
-            # Here we delete all edges between base node and it's consumers (except first), and then duplicate this
-            # node to connect with other consumers
-            while len(node.out_nodes()) > 1:
-                out_node = node.out_node(1)
-
-                if len(graph.get_edge_data(node.id, out_node.id)) != 1:
-                    raise Error('There is more than one edge from {} node to {} node.'.format(node.id, out_node.id))
-                e_attrs = graph.get_edge_data(node.id, out_node.id)[0]
-
-                graph.remove_edge(node.id, out_node.id)
-                data = Op.create_input_data_node(graph, "Copy_{}".format(node.id), np.array(node.value), graph.node[node.id])
-
-                graph.add_edges_from([(data.id, out_node.id, e_attrs)])
diff --git a/model-optimizer/mo/middle/passes/tensor_names.py b/model-optimizer/mo/middle/passes/tensor_names.py
index 97efb3d70..7b8abb2e8 100644
--- a/model-optimizer/mo/middle/passes/tensor_names.py
+++ b/model-optimizer/mo/middle/passes/tensor_names.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,17 +14,14 @@
  limitations under the License.
 """
 
-import json
-from collections import defaultdict
-from xml.etree.ElementTree import Element, SubElement, tostring
-from xml.dom.minidom import parseString
 
-import networkx as nx
+from defusedxml.minidom import parseString
+from xml.etree.ElementTree import Element, SubElement, tostring
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 
 
-def propagate_op_name_to_tensor(graph: nx.MultiDiGraph):
+def propagate_op_name_to_tensor(graph: Graph):
     for node in graph.nodes():
         node = Node(graph, node)
         if node.kind == 'op' and node.has_valid('name'):
@@ -35,7 +32,7 @@ def propagate_op_name_to_tensor(graph: nx.MultiDiGraph):
                 out_node['ie_tensor_id'] = node.node
 
 
-def output_tensor_names_map(graph: nx.MultiDiGraph, xml_file_name: str):
+def output_tensor_names_map(graph: Graph, xml_file_name: str):
     mapping = Element('mapping')
     for node in graph:
         node = Node(graph, node)
diff --git a/model-optimizer/mo/middle/pattern_match.py b/model-optimizer/mo/middle/pattern_match.py
index f1ea8cfaa..0e260f42c 100644
--- a/model-optimizer/mo/middle/pattern_match.py
+++ b/model-optimizer/mo/middle/pattern_match.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -19,14 +19,14 @@ import logging as log
 import networkx as nx
 from networkx.algorithms import isomorphism as ism
 
-from mo.graph.graph import Node, dict_includes
+from mo.graph.graph import Node, dict_includes, Graph
 
 
 def inverse_dict(d: dict):
     return {v: k for k, v in d.items()}
 
 
-def for_each_sub_graph(graph: nx.MultiDiGraph, func: callable):
+def for_each_sub_graph(graph: Graph, func: callable):
     """ Run a given function `func` for each sub-graph in a given graph not recursively.
 
         It doesn't search for sub-graphs in found sub-graphs recursively. If the recursion is required,
@@ -39,7 +39,7 @@ def for_each_sub_graph(graph: nx.MultiDiGraph, func: callable):
                 func(node[sub_graph_name])
 
 
-def for_each_sub_graph_recursively(graph: nx.MultiDiGraph, func: callable):
+def for_each_sub_graph_recursively(graph: Graph, func: callable):
     """ Run a given function `func` for each sub-graph in a given graph `graph` recursively.
 
         A given function `func` shouldn't contain a recursion for sub-graphs of the second level.
@@ -53,7 +53,7 @@ def for_each_sub_graph_recursively(graph: nx.MultiDiGraph, func: callable):
     for_each_sub_graph(graph, recursive_helper)
 
 
-def for_graph_and_each_sub_graph_recursively(graph: nx.MultiDiGraph, func: callable):
+def for_graph_and_each_sub_graph_recursively(graph: Graph, func: callable):
     """ Run a given function `func` for a given graph `graph` and each sub-graph recursively. """
     func(graph)
     for_each_sub_graph_recursively(graph, func)
@@ -63,7 +63,7 @@ def all_edges_in_nodes(nodes: list, edges: list):
     return all([edge[0] in nodes and edge[1] in nodes for edge in edges])
 
 
-def apply_pattern(graph: nx.MultiDiGraph, nodes: list, edges: list, action: callable, node_attrs: list = None,
+def apply_pattern(graph: Graph, nodes: list, edges: list, action: callable, node_attrs: list = None,
                   edge_attrs: list = None):
     """
     Search for all matches of a given subgraph defined by [nodes, edges] in graph,
@@ -114,7 +114,8 @@ def check_node_usages_out_of_match(match: dict, node_name_in_match_group: str):
 
 
 def node_match(data1: dict, data2: dict):
-    return dict_includes(data1, data2)
+    # We have to skip _in_ports/_out_ports attributes for comparision as they are not comparable
+    return dict_includes(data1, data2, skip_attr_names=['_in_ports', '_out_ports'])
 
 
 def edge_match(datasets1, datasets2):
@@ -130,7 +131,7 @@ def edge_match(datasets1, datasets2):
     return values1 == values2
 
 
-def build_matcher(graph: nx.MultiDiGraph, nodes: list, edges: list, node_attrs: list = None,
+def build_matcher(graph: Graph, nodes: list, edges: list, node_attrs: list = None,
                          edge_attrs: list = None):
     if node_attrs is not None or edge_attrs is not None:
         log.warning('\'edge_attrs\' or `\'node_attrs\'` parameter was passed to function \'find_pattern_matches\', '
@@ -139,13 +140,13 @@ def build_matcher(graph: nx.MultiDiGraph, nodes: list, edges: list, node_attrs:
                     'matching function like \'find_pattern_matches\', \'apply_pattern\' and \'pattern\' because it '
                     'will be deprecated in the next release.')
 
-    subgraph = nx.MultiDiGraph(name='pattern')
+    subgraph = Graph(name='pattern')
     subgraph.add_nodes_from(nodes)
     subgraph.add_edges_from(edges)
     return ism.MultiDiGraphMatcher(graph, subgraph, node_match, edge_match)
 
 
-def find_pattern_matches(graph: nx.MultiDiGraph, nodes: list, edges: list, node_attrs: list = None,
+def find_pattern_matches(graph: Graph, nodes: list, edges: list, node_attrs: list = None,
                          edge_attrs: list = None):
     """
     Find all matches of a given sub-graph defined by [nodes, edges] in graph.
@@ -154,7 +155,7 @@ def find_pattern_matches(graph: nx.MultiDiGraph, nodes: list, edges: list, node_
     return matcher.subgraph_isomorphisms_iter()
 
 
-def find_isomorphisms(graph: nx.MultiDiGraph, nodes: list, edges: list):
+def find_isomorphisms(graph: Graph, nodes: list, edges: list):
     ''' Find for isomorphism between a given graph and a pattern specified by a given nodes and edges.
         Applies the same rules as apply_pattern.
     '''
diff --git a/model-optimizer/mo/middle/replacement.py b/model-optimizer/mo/middle/replacement.py
index 82cadc569..752d5441f 100644
--- a/model-optimizer/mo/middle/replacement.py
+++ b/model-optimizer/mo/middle/replacement.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -22,6 +22,14 @@ class MiddleReplacementPattern(ReplacementPattern):
     registered_ops = {}
     registered_cls = []
 
+    def run_after(self):
+        from extensions.middle.pass_separator import MiddleStart
+        return [MiddleStart]
+
+    def run_before(self):
+        from extensions.middle.pass_separator import MiddleFinish
+        return [MiddleFinish]
+
     @classmethod
     def class_type(cls):
         return class_registration.ClassType.MIDDLE_REPLACER
diff --git a/model-optimizer/mo/ops/activation.py b/model-optimizer/mo/ops/activation.py
index 95111f767..971a3de03 100644
--- a/model-optimizer/mo/ops/activation.py
+++ b/model-optimizer/mo/ops/activation.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.partial_infer.eltwise import eltwise_infer
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
@@ -37,14 +36,17 @@ class Activation(Op):
         'tanh': lambda x: np.tanh(x),
         'elu': lambda x, alpha: Activation.elu(x, alpha),
         'sigmoid': lambda x: 1 / (1 + np.exp(-x)),
-        'relu6': lambda x: np.maximum(0, np.minimum(x, 6))
+        'relu6': lambda x: np.maximum(0, np.minimum(x, 6)),
+        'exp': lambda x: np.exp(x),
     }
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,
             'op': __class__.op,
-            'infer': Activation.infer
+            'infer': Activation.infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }, attrs)
 
     @classmethod
diff --git a/model-optimizer/mo/ops/activation_test.py b/model-optimizer/mo/ops/activation_test.py
index b289b96b4..5dbc07b0d 100644
--- a/model-optimizer/mo/ops/activation_test.py
+++ b/model-optimizer/mo/ops/activation_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/clamp.py b/model-optimizer/mo/ops/clamp.py
index ce6bfc534..05e551c0e 100644
--- a/model-optimizer/mo/ops/clamp.py
+++ b/model-optimizer/mo/ops/clamp.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,21 +14,22 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
 class Clamp(Op):
     op = 'Clamp'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'kind': 'op',
             'type': __class__.op,
             'op': __class__.op,
-            'infer': copy_shape_infer
+            'infer': copy_shape_infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
diff --git a/model-optimizer/mo/ops/clamp_test.py b/model-optimizer/mo/ops/clamp_test.py
index 66e38e22b..0cdf55675 100644
--- a/model-optimizer/mo/ops/clamp_test.py
+++ b/model-optimizer/mo/ops/clamp_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/concat.py b/model-optimizer/mo/ops/concat.py
index b13c19fce..1e04f01f9 100644
--- a/model-optimizer/mo/ops/concat.py
+++ b/model-optimizer/mo/ops/concat.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@ class Concat(Op):
             'op': __class__.op,
             'axis': 1,
             'infer': concat_infer,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
diff --git a/model-optimizer/mo/ops/concat_test.py b/model-optimizer/mo/ops/concat_test.py
index 7f39236df..c03877d0a 100644
--- a/model-optimizer/mo/ops/concat_test.py
+++ b/model-optimizer/mo/ops/concat_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/const.py b/model-optimizer/mo/ops/const.py
index 3511a1ba8..adfcccc10 100644
--- a/model-optimizer/mo/ops/const.py
+++ b/model-optimizer/mo/ops/const.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -34,8 +34,9 @@ class Const(Op):
             'value': None,
             'shape': None,
             'data_type': None,
+            'out_ports_count': 1,
         }, attrs)
         if not isinstance(self.attrs['value'], np.ndarray):
-            self.attrs['value'] = np.array([self.attrs['value']])
+            self.attrs['value'] = np.array(self.attrs['value'])
         self.attrs['shape'] = np.array(self.attrs['value'].shape, dtype=np.int64)
 
diff --git a/model-optimizer/mo/ops/convolution.py b/model-optimizer/mo/ops/convolution.py
index e6bcdee3c..96855eb03 100644
--- a/model-optimizer/mo/ops/convolution.py
+++ b/model-optimizer/mo/ops/convolution.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,14 +16,13 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.partial_infer.utils import int64_array, float_array, mark_input_bins, assign_dims_to_weights, \
     tf_window_op_pad_infer
 from mo.front.extractor import spatial_getter
 from mo.front.onnx.extractors.utils import get_backend_pad
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op, PermuteAttrs
 from mo.utils.error import Error
 
@@ -31,12 +30,16 @@ from mo.utils.error import Error
 class Convolution(Op):
     op = 'Convolution'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'kind': 'op',
             'type': __class__.op,
             'op': __class__.op,
             'infer': __class__.infer,
+            'multiplication_transparent': True,
+            'multiplication_transparent_ports': [(0, 0), (1, 0)],
+            'in_ports_count': 3,
+            'out_ports_count': 1,
         }, attrs)
 
     def backend_attrs(self):
@@ -49,7 +52,10 @@ class Convolution(Op):
 
            ('pads_begin', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 0)))),
            ('pads_end', lambda node: ','.join(map(str, get_backend_pad(node.pad, node.spatial_dims, 1)))),
-           'output'
+           'output',
+           'pad_value',
+           'mode',
+           'input',
         ]
 
     def backend_attrs_v2(self):
@@ -176,6 +182,9 @@ class Convolution(Op):
             node['pad'] = np.array([[0, 0]] * len(input_shape), dtype=np.int64)
         node['pad_spatial_shape'] = node.pad[node.spatial_dims]
 
+        if not node.has_valid('output_padding'):
+            node['output_padding'] = np.full([len(input_shape)], 0, dtype=np.int64)
+
         input_spatial_shape = input_shape[node.spatial_dims]
         stride_spatial_shape = node.stride[node.spatial_dims]
 
@@ -185,9 +194,11 @@ class Convolution(Op):
         # Caffe do not use auto_pad attribute
         if node.has_valid('auto_pad') and not node.has_valid('output_spatial_shape'):
             node['pad_spatial_shape'], node['output_spatial_shape'] = tf_window_op_pad_infer(input_spatial_shape,
-                                                                                       kernel_extent,
-                                                                                       stride_spatial_shape,
-                                                                                       node.auto_pad)
+                                                                                             kernel_extent,
+                                                                                             stride_spatial_shape,
+                                                                                             node.auto_pad,
+                                                                                             node.type == 'Deconvolution')
+
             pad = np.zeros((len(input_shape), 2), dtype=np.int64)
             pad[node.spatial_dims] = node.pad_spatial_shape
             node.pad = pad
@@ -208,7 +219,7 @@ class Convolution(Op):
                         return
                 else:
                     output_padding = node.output_padding[node.spatial_dims] if node.has_valid('output_padding') else None
-                    if output_padding is not None:
+                    if output_padding is not None and any(output_padding):
                         pad_spatial_shape -= output_padding
                         for dim in range(len(pad_spatial_shape)):
                             node.pad_spatial_shape[dim][1] -= pad_spatial_shape[dim]
@@ -226,14 +237,14 @@ class Convolution(Op):
         if node.has_valid('get_group'):
             node['group'] = node.get_group(node)
         output_shape = np.full_like(input_shape, -1, dtype=np.int64)
-        output_shape[node.batch_dims] = input_shape[node.batch_dims]
-        output_shape[node.spatial_dims] = node.output_spatial_shape
+        output_shape[node.batch_dims] = input_shape[node.batch_dims]  # pylint: disable=unsupported-assignment-operation
+        output_shape[node.spatial_dims] = node.output_spatial_shape  # pylint: disable=unsupported-assignment-operation
 
         # For cases when output attribute wasn't set in extractor we should specify get_output_feature_dim attribute
         # this attribute should store lambda node: ... (check tf convolution extractor)
         if node.has_valid('get_output_feature_dim'):
             node['output'] = node.get_output_feature_dim(node)
-        output_shape[node.channel_dims] = node.output
+        output_shape[node.channel_dims] = node.output  # pylint: disable=unsupported-assignment-operation
         node['output_shape'] = output_shape
 
         for n in node.out_nodes():
diff --git a/model-optimizer/mo/ops/convolution_test.py b/model-optimizer/mo/ops/convolution_test.py
index 6f009b588..51d03958c 100644
--- a/model-optimizer/mo/ops/convolution_test.py
+++ b/model-optimizer/mo/ops/convolution_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -27,7 +27,8 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'conv_input': {'value': None, 'kind': 'data'},
                     'conv_node': {'type': 'Convolution', 'kind': 'op'},
                     'conv_weights': {'value': FakeValue(None), 'kind': 'data'},
-                    'conv_output': {'value': None, 'kind': 'data'}
+                    'conv_output': {'value': None, 'kind': 'data'},
+                    'output_op': { 'kind': 'op', 'op': 'OpOutput'}
                     }
 
 
@@ -36,8 +37,10 @@ class TestConvolutionPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('conv_input', 'conv_node'),
                              ('conv_weights', 'conv_node'),
-                             ('conv_node', 'conv_output')],
-                            {'conv_output': {'is_output': True, 'shape': None},
+                             ('conv_node', 'conv_output'),
+                             ('conv_output', 'op_output')
+                             ],
+                            {'conv_output': {'shape': None},
                              'conv_input': {'shape': np.array([1, 3, 227, 227])},
                              'conv_weights': {'shape': np.array([64, 3, 3, 3]),
                                               'dim_attrs': ['spatial_dims', 'channel_dims', 'batch_dims', 'axis']},
@@ -65,8 +68,10 @@ class TestConvolutionPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('conv_input', 'conv_node'),
                              ('conv_weights', 'conv_node'),
-                             ('conv_node', 'conv_output')],
-                            {'conv_output': {'is_output': True, 'shape': None},
+                             ('conv_node', 'conv_output'),
+                             ('conv_output', 'op_output')
+                             ],
+                            {'conv_output': {'shape': None},
                              'conv_input': {'shape': None},
                              'conv_weights': {'shape': None,
                                               'dim_attrs': ['spatial_dims', 'channel_dims', 'batch_dims', 'axis']},
@@ -89,8 +94,10 @@ class TestConvolutionPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('conv_input', 'conv_node'),
                              ('conv_weights', 'conv_node'),
-                             ('conv_node', 'conv_output')],
-                            {'conv_output': {'is_output': True, 'shape': None},
+                             ('conv_node', 'conv_output'),
+                             ('conv_output', 'op_output')
+                             ],
+                            {'conv_output': {'shape': None},
                              'conv_input': {'shape': np.array([1, 21, 16, 16])},
                              'conv_weights': {'shape': np.array([1, 21, 4, 4]),
                                               'dim_attrs': ['spatial_dims', 'channel_dims', 'batch_dims', 'axis']},
@@ -127,8 +134,10 @@ class TestConvolutionPartialInfer(unittest.TestCase):
         graph = build_graph(nodes_attributes,
                             [('conv_input', 'conv_node'),
                              ('conv_weights', 'conv_node'),
-                             ('conv_node', 'conv_output')],
-                            {'conv_output': {'is_output': True, 'shape': None},
+                             ('conv_node', 'conv_output'),
+                             ('conv_output', 'op_output')
+                             ],
+                            {'conv_output': {'shape': None},
                              'conv_input': {'shape': None},
                              'conv_weights': {'shape': np.array([1, 21, 16, 16]),
                                               'dim_attrs': ['spatial_dims', 'channel_dims', 'batch_dims', 'axis']},
@@ -153,11 +162,11 @@ class TestConvolutionPartialInfer(unittest.TestCase):
                             [
                                 ('conv_input', 'conv_node'),
                                 ('conv_weights', 'conv_node'),
-                                ('conv_node', 'conv_output')
+                                ('conv_node', 'conv_output'),
+                                ('conv_output', 'op_output')
                             ],
                             {
                                 'conv_output': {
-                                    'is_output': True,
                                     'shape': None
                                 },
                                 'conv_input': {
@@ -227,11 +236,11 @@ class TestConvolutionPartialInfer(unittest.TestCase):
                             [
                                 ('conv_input', 'conv_node'),
                                 ('conv_weights', 'conv_node'),
-                                ('conv_node', 'conv_output')
+                                ('conv_node', 'conv_output'),
+                                ('conv_output', 'op_output')
                             ],
                             {
                                 'conv_output': {
-                                    'is_output': True,
                                     'shape': None
                                 },
                                 'conv_input': {
@@ -301,11 +310,11 @@ class TestConvolutionPartialInfer(unittest.TestCase):
                             [
                                 ('conv_input', 'conv_node'),
                                 ('conv_weights', 'conv_node'),
-                                ('conv_node', 'conv_output')
+                                ('conv_node', 'conv_output'),
+                                ('conv_output', 'op_output')
                             ],
                             {
                                 'conv_output': {
-                                    'is_output': True,
                                     'shape': None
                                 },
                                 'conv_input': {
diff --git a/model-optimizer/mo/ops/crop.py b/model-optimizer/mo/ops/crop.py
index 4c1875f0e..1f660c9bb 100644
--- a/model-optimizer/mo/ops/crop.py
+++ b/model-optimizer/mo/ops/crop.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,23 +16,24 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from mo.front.caffe.extractors.utils import get_canonical_axis_index
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op, PermuteAttrs
 
 
 class Crop(Op):
     op = 'Crop'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'kind': 'op',
             'type': __class__.op,
             'op': __class__.op,
-            'infer': __class__.infer
+            'infer': __class__.infer,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
         }, attrs)
 
     def backend_attrs(self):
diff --git a/model-optimizer/mo/ops/crop_test.py b/model-optimizer/mo/ops/crop_test.py
index 9eb541271..e93e936ef 100644
--- a/model-optimizer/mo/ops/crop_test.py
+++ b/model-optimizer/mo/ops/crop_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/deconvolution.py b/model-optimizer/mo/ops/deconvolution.py
index b4fe12b2a..829161e72 100644
--- a/model-optimizer/mo/ops/deconvolution.py
+++ b/model-optimizer/mo/ops/deconvolution.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,7 +15,6 @@
 """
 
 import logging as log
-import networkx as nx
 import numpy as np
 
 from mo.front.common.partial_infer.utils import int64_array, float_array, mark_input_bins, assign_dims_to_weights, \
@@ -23,19 +22,21 @@ from mo.front.common.partial_infer.utils import int64_array, float_array, mark_i
 from mo.front.onnx.extractors.utils import get_backend_pad
 from mo.front.extractor import spatial_getter
 from mo.utils.error import Error
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op, PermuteAttrs
 
 
 class Deconvolution(Op):
     op = 'Deconvolution'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'kind': 'op',
             'type': __class__.op,
             'op': __class__.op,
             'infer': __class__.infer,
+            'in_ports_count': 3,
+            'out_ports_count': 1,
         }, attrs)
 
     def backend_attrs(self):
diff --git a/model-optimizer/mo/ops/eltwise.py b/model-optimizer/mo/ops/eltwise.py
index 18185f64a..eba1956a3 100644
--- a/model-optimizer/mo/ops/eltwise.py
+++ b/model-optimizer/mo/ops/eltwise.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,17 +14,17 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.partial_infer.eltwise import eltwise_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
 class Eltwise(Op):
     op = 'Eltwise'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         operations = {
             'sum': ('Add', lambda a, b: a + b),
             'mul': ('Mul', lambda a, b: a * b),
@@ -35,6 +35,8 @@ class Eltwise(Op):
             'type': 'Eltwise',  # a property of IE supported layer
             'op': operations[attrs['operation']][0],
             'infer': lambda node: eltwise_infer(node, operations[node.operation][1]),
+            'in_ports_count': 2,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
diff --git a/model-optimizer/mo/ops/eltwise_n.py b/model-optimizer/mo/ops/eltwise_n.py
index 8f5eb032f..e2060b3f0 100644
--- a/model-optimizer/mo/ops/eltwise_n.py
+++ b/model-optimizer/mo/ops/eltwise_n.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  limitations under the License.
 """
 
-import networkx as nx
-
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 from mo.utils.error import Error
 
@@ -27,11 +26,12 @@ class EltwiseN(Op):
     """
     op = 'EltwiseN'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'op': __class__.op,
             'type': None,  # type is None because this operation should not appear in IR
             'infer': None,
+            'out_ports_count': 1,
         }, attrs)
         if 'operation' not in self.attrs:
             raise Error('"operation" attribute is not set for operation "{}".'.format(__class__.op))
diff --git a/model-optimizer/mo/ops/expand_dims.py b/model-optimizer/mo/ops/expand_dims.py
index ce790bb83..c64b3e814 100644
--- a/model-optimizer/mo/ops/expand_dims.py
+++ b/model-optimizer/mo/ops/expand_dims.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
  limitations under the License.
 """
 
-from mo.ops.op import Op
 from mo.front.common.partial_infer.expand_dims import tf_expand_dims_infer
+from mo.ops.op import Op
 
 
 class ExpandDims(Op):
@@ -28,8 +28,6 @@ class ExpandDims(Op):
             'op': __class__.op,
             'infer': tf_expand_dims_infer,
             'expand_axis': None,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
         }, attrs)
-
-    def supported_attrs(self):
-        # TODO ugly copying from Reshape op
-        return [('dim', lambda node: ', '.join(map(str, node['dim'])))]
diff --git a/model-optimizer/mo/ops/flatten.py b/model-optimizer/mo/ops/flatten.py
index 96408e4b0..05b5412f7 100644
--- a/model-optimizer/mo/ops/flatten.py
+++ b/model-optimizer/mo/ops/flatten.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,12 +14,13 @@
  limitations under the License.
 """
 
-import networkx as nx
-import numpy as np
 import logging as log
 
+import numpy as np
+
 from mo.front.caffe.extractors.utils import get_canonical_axis_index
 from mo.front.common.partial_infer.utils import int64_array
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -27,11 +28,13 @@ class Flatten(Op):
     op = 'Flatten'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,
             'op': __class__.op,
             'infer': __class__.infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
diff --git a/model-optimizer/mo/ops/flatten_onnx.py b/model-optimizer/mo/ops/flatten_onnx.py
index 07a40c716..e99743779 100644
--- a/model-optimizer/mo/ops/flatten_onnx.py
+++ b/model-optimizer/mo/ops/flatten_onnx.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,11 @@
  limitations under the License.
 """
 
-import networkx as nx
-import numpy as np
 import logging as log
 
+import numpy as np
+
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -25,16 +26,15 @@ class FlattenONNX(Op):
     op = 'FlattenONNX'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': 'Reshape',
             'op': __class__.op,
             'infer': __class__.infer,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
         }, attrs)
 
-    def supported_attrs(self):
-        return [('dim', lambda node: ','.join(map(str, node['dim'])))]
-
     @staticmethod
     def infer(node):
         """
@@ -51,7 +51,9 @@ class FlattenONNX(Op):
             return
 
         if len(node.in_nodes()) != 1:
-            log.debug('Can\'t calculate output shape for {} node. Number of input nodes should be equal 1 instead of {}'.format(node.name, len(node.in_nodes())))
+            log.debug(
+                'Can\'t calculate output shape for {} node. Number of input nodes should be equal 1 instead of {}'.format(
+                    node.name, len(node.in_nodes())))
             return
 
         axis = node.axis
@@ -60,5 +62,4 @@ class FlattenONNX(Op):
         node['dim'] = np.array(dim)
         node.out_node().shape = np.array(dim)
         if node.in_node(0).has_valid('value'):
-            node.out_node().value = node.in_node(0).value
-            node.out_node().value.shape = np.array(dim)
+            node.out_node().value = np.reshape(node.in_node(0).value, dim)
diff --git a/model-optimizer/mo/ops/flatten_onnx_test.py b/model-optimizer/mo/ops/flatten_onnx_test.py
index 1e68fbb69..a73aa7f62 100644
--- a/model-optimizer/mo/ops/flatten_onnx_test.py
+++ b/model-optimizer/mo/ops/flatten_onnx_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/flatten_test.py b/model-optimizer/mo/ops/flatten_test.py
index 9d584010a..75de34439 100644
--- a/model-optimizer/mo/ops/flatten_test.py
+++ b/model-optimizer/mo/ops/flatten_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -24,7 +24,8 @@ from mo.utils.unittest.graph import build_graph
 
 nodes_attributes = {'node_1': {'value': None, 'kind': 'data'},
                     'flatten_1': {'type': 'Flatten', 'value': None, 'kind': 'op'},
-                    'node_2': {'value': None, 'kind': 'data'}
+                    'node_2': {'value': None, 'kind': 'data'},
+                    'output_op': { 'kind': 'op', 'op': 'OpOutput'},
                     }
 
 
@@ -32,8 +33,10 @@ class TestFlattenPartialInfer(unittest.TestCase):
     def test_flatten_infer(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'flatten_1'),
-                             ('flatten_1', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': np.array([1, 3 * 256 * 256])},
+                             ('flatten_1', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': np.array([1, 3 * 256 * 256])},
                              'node_1': {'shape': np.array([1, 3, 256, 256])},
                              'flatten_1': {'axis': 1, 'dim': []}
                              })
@@ -49,8 +52,10 @@ class TestFlattenPartialInfer(unittest.TestCase):
     def test_flatten_infer_no_shape(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'flatten_1'),
-                             ('flatten_1', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None},
+                             ('flatten_1', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None},
                              'node_1': {'shape': None},
                              'flatten_1': {'axis': 1}
                              })
diff --git a/model-optimizer/mo/ops/inner_product.py b/model-optimizer/mo/ops/inner_product.py
index 291af9c49..3dcf0824d 100644
--- a/model-optimizer/mo/ops/inner_product.py
+++ b/model-optimizer/mo/ops/inner_product.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.inner_product import caffe_inner_product
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -24,12 +23,14 @@ class InnerProduct(Op):
     op = 'FullyConnected'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': 'FullyConnected',
             'op': 'FullyConnected',
             'out-size': None,
             'layout': 'NCHW',
+            'in_ports_count': 3,
+            'out_ports_count': 1,
             'infer': caffe_inner_product
         }, attrs)
 
diff --git a/model-optimizer/mo/ops/inner_product_test.py b/model-optimizer/mo/ops/inner_product_test.py
index 22d3c4af8..2151ed33b 100644
--- a/model-optimizer/mo/ops/inner_product_test.py
+++ b/model-optimizer/mo/ops/inner_product_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/input.py b/model-optimizer/mo/ops/input.py
index 1aa76afb0..47b035d3d 100644
--- a/model-optimizer/mo/ops/input.py
+++ b/model-optimizer/mo/ops/input.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,20 +14,20 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.elemental import single_output_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
 class Input(Op):
     op = 'Input'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'kind': 'op',
             'type': __class__.op,
             'op': 'Placeholder',
             'infer': lambda node: single_output_infer(node, lambda n: n.shape),
+            'out_ports_count': 1,
             'is_input': True
         }, attrs)
diff --git a/model-optimizer/mo/ops/lin_op.py b/model-optimizer/mo/ops/lin_op.py
index ff1ec6b96..3a3c7b779 100644
--- a/model-optimizer/mo/ops/lin_op.py
+++ b/model-optimizer/mo/ops/lin_op.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -17,32 +17,40 @@
 import networkx as nx
 import numpy as np
 
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 from mo.front.common.partial_infer.eltwise import eltwise_infer
 
 
 class LinOp(Op):
     enabled = False
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'can_be_bias': True,
             'can_be_fused': True,
             'type': 'Eltwise',
             'infer': None,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
         return ['operation']
 
+
 class Add(LinOp):
     enabled = False
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    op = 'Add'
+
+    def __init__(self, graph: Graph, attrs: dict):
         attrs.update({'op': 'Add', 'operation': 'sum', 'infer': lambda node: eltwise_infer(node, lambda a, b: a + b)})
         super().__init__(graph, attrs)
 
 
 class Mul(LinOp):
     enabled = False
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    op = 'Mul'
+
+    def __init__(self, graph: Graph, attrs: dict):
         attrs.update({'op': 'Mul', 'operation': 'mul', 'infer': lambda node: eltwise_infer(node, lambda a, b: a*b)})
         super().__init__(graph, attrs)
diff --git a/model-optimizer/mo/ops/lrn.py b/model-optimizer/mo/ops/lrn.py
index f7dc1106f..f0e65cf8b 100644
--- a/model-optimizer/mo/ops/lrn.py
+++ b/model-optimizer/mo/ops/lrn.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -24,10 +23,12 @@ class LRN(Op):
     op = 'LRN'
     enabled = False
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': 'Norm',
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': copy_shape_infer
         }, attrs)
 
diff --git a/model-optimizer/mo/ops/memory.py b/model-optimizer/mo/ops/memory.py
index 745efff3d..269a8ab04 100644
--- a/model-optimizer/mo/ops/memory.py
+++ b/model-optimizer/mo/ops/memory.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,7 @@
  limitations under the License.
 """
 
-import networkx as nx
-
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.front.common.partial_infer.elemental import copy_shape_infer
 from mo.utils.error import Error
@@ -27,7 +25,7 @@ class Memory(Op):
     op = 'Memory'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': 'Memory',
             'op': 'Memory',
@@ -35,6 +33,8 @@ class Memory(Op):
             'size': None,
             'index': None,
             'infer': Memory.infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
diff --git a/model-optimizer/mo/ops/op.py b/model-optimizer/mo/ops/op.py
index 83d80fb4a..2028accb0 100644
--- a/model-optimizer/mo/ops/op.py
+++ b/model-optimizer/mo/ops/op.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
  limitations under the License.
 """
 
+import copy
 import logging as log
 from collections import namedtuple
 
@@ -22,7 +23,8 @@ import numpy as np
 
 from mo.front.extractor import add_attrs_props
 from mo.front.extractor import update_ie_fields
-from mo.graph.graph import Node, unique_id
+from mo.graph.graph import Node, Graph
+from mo.graph.port import Port
 from mo.utils import class_registration
 from mo.utils.error import Error
 
@@ -33,7 +35,7 @@ class Op(object):
     # Add the derived class to excluded_classes if one should not be registered in registered_ops
     excluded_classes = []
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs1: dict = None, attrs2: dict = None):
+    def __init__(self, graph: Graph, attrs1: dict = None, attrs2: dict = None):
         self.graph = graph
         try:
             self.ir_version = graph.graph['ir_version']
@@ -56,13 +58,15 @@ class Op(object):
         if attrs is not None:
             new_attrs.update(attrs)
         id_prefix = new_attrs['name'] if 'name' in new_attrs else ''
-        id = unique_id(self.graph, id_prefix)
+        id = self.graph.unique_id(id_prefix)
         new_attrs['name'] = id
         new_attrs = add_attrs_props(new_attrs)
         update_ie_fields(new_attrs, self.ir_version)
         self.substitute_ie_attrs(new_attrs)
         self.graph.add_node(id, **new_attrs)
-        return Node(self.graph, id)
+
+        node = Node(self.graph, id)
+        return node
 
     def substitute_ie_attrs(self, new_attrs: dict):
         """
@@ -71,6 +75,7 @@ class Op(object):
         """
         backend_attrs_mapping = {
             None: self.backend_attrs,
+            5: self.backend_attrs,
             4: self.backend_attrs,
             3: self.backend_attrs,
             2: self.backend_attrs_v2
@@ -103,23 +108,25 @@ class Op(object):
             raise Error('Node {} has more than one outputs. Provide output port explicitly. '.format(node.name))
         return node, port
 
-    def cut_edge_and_create_node(self, node: Node, out_port: int, attrs: dict = None):
+    def create_node_on_port(self, node: Node, out_port: int, attrs: dict = None, edge_attrs: dict = None):
         """
         Removes an edge, that is connected to nodes out_port. Creates new_node with attrs attributes and
         connects it to node by edge that stores the same information as cutted edge.
         :param node: Input node, to cut the edge from
         :param out_port: output port of edge to cut
         :param attrs: attributes of new node
+        :param edge_attrs: attributes to be changed/added to new edge
         :return: Node instance of created new_node
         """
-        edges = [(u, v, keys, params) for u, v, keys, params in node.graph.out_edges(node.id, data=True, keys=True)
-                 if 'out' in params and params['out'] == out_port]
-        edge_attrs = edges[0][3]
-        [self.graph.remove_edge(u, v, key=key) for u, v, key, params in edges]
+        if edge_attrs is None:
+            edge_attrs = {'in': 0}
+        prev_edge_attrs = copy.deepcopy(node.out_edge(out_port))
+        prev_edge_attrs.update(edge_attrs)
+        new_edge_attrs = prev_edge_attrs
         if attrs is None:
             attrs = dict()
         new_node = self.add_node(attrs)
-        self.graph.add_edge(node.id, new_node.id, **edge_attrs)
+        self.graph.add_edge(node.id, new_node.id, **new_edge_attrs)
         return new_node
 
     def create_node(self, inputs: list = None, attrs: dict = None, edge_attrs: dict = None):
@@ -176,7 +183,7 @@ class Op(object):
         old_data_value = [None]
         old_data_shape = [None]
         if data_nodes is None:
-            data_node = unique_id(self.graph)
+            data_node = self.graph.unique_id()
             self.graph.add_node(data_node, **add_attrs_props(
                 dict(kind='data', precision="FP32", name=data_node, value=None, shape=None, data_type=None,
                      infer=None)))
@@ -190,9 +197,11 @@ class Op(object):
                               data_nodes]
         for id, data_node in enumerate(data_nodes):
             self.graph.add_edges_from([(new_op_node.id, data_node.id, {'out': id})])
+
         if new_op_node.has_valid('infer'):
-            log.debug('Start running infer function for individual op node with attributes: {}'.format(
-                new_op_node.graph.node[new_op_node.id]))
+            if log.getLogger().isEnabledFor(log.DEBUG):
+                log.debug('Start running infer function for individual op node with attributes: {}'
+                          ''.format(str(new_op_node)))
             new_op_node.infer(new_op_node)
             assert all(old_value is None for old_value in old_data_value) or all(
                 [np.array_equal(old_data_value[id], data_node.value) for id, data_node in enumerate(data_nodes)])
@@ -203,36 +212,36 @@ class Op(object):
                     [old_data_shape[id] for id in range(len(data_nodes))],
                     [data_node.shape for data_node in data_nodes])
             for data_node in data_nodes:
-                log.debug(
-                    'Finished running infer function, data nodes attributes: {}'.format(
-                        data_node.graph.node[data_node.id]))
+                if log.getLogger().isEnabledFor(log.DEBUG):
+                    log.debug(
+                        'Finished running infer function, data nodes attributes: {}'.format(data_node))
         return data_nodes[0] if len(data_nodes) == 1 else data_nodes
 
     @staticmethod
-    def create_data_node(graph: nx.MultiDiGraph, op_node: Node, attrs: dict = None, edge_attrs: dict = None):
+    def create_data_node(graph: Graph, op_node: Node, attrs: dict = None, edge_attrs: dict = None, out_port=0):
         assert op_node is not None and op_node.kind == 'op'
         assert len(op_node.out_nodes()) == 0
         if attrs is None:
             attrs = {}
 
-        data_node = unique_id(graph, op_node.id)
+        data_node = graph.unique_id(op_node.id)
         defaul_attrs = dict(kind='data', precision="FP32", name=data_node, value=None, shape=None, data_type=None,
                             infer=None)
         defaul_attrs.update(attrs)
         graph.add_node(data_node, **add_attrs_props(defaul_attrs))
         data_node = Node(graph, data_node)
         if edge_attrs is not None:
-            graph.add_edges_from([(op_node.id, data_node.id, {'out': 0, **edge_attrs})])
+            graph.add_edges_from([(op_node.id, data_node.id, {'out': out_port, **edge_attrs})])
         else:
-            graph.add_edges_from([(op_node.id, data_node.id, {'out': 0})])
+            graph.add_edges_from([(op_node.id, data_node.id, {'out': out_port})])
         return data_node
 
     @staticmethod
-    def _create_data_node(graph: nx.MultiDiGraph, name: str, attrs: dict = None):
+    def _create_data_node(graph: Graph, name: str, attrs: dict = None):
         if attrs is None:
             attrs = {}
 
-        data_node = unique_id(graph, name)
+        data_node = graph.unique_id(name)
         defaul_attrs = dict(kind='data', precision="FP32", name=data_node, value=None, shape=None, data_type=None,
                             infer=None)
         defaul_attrs.update(attrs)
@@ -241,23 +250,24 @@ class Op(object):
         return data_node
 
     @staticmethod
-    def create_input_data_node(graph: nx.MultiDiGraph, name: str, value: np.array, attrs: dict = {}):
-        data_node = unique_id(graph, name)
-        defaul_attrs = dict(kind='data', precision="FP32", name=data_node, value=np.array(value), shape=value.shape,
+    def create_input_data_node(graph: Graph, name: str, value: np.array, attrs: dict = {}):
+        data_node = graph.unique_id(name)
+        defaul_attrs = dict(kind='data', precision="FP32", name=data_node, value=np.array(value),
+                            shape=np.array(value.shape),
                             data_type=None, infer=None)
         defaul_attrs.update(attrs)
         graph.add_node(data_node, **add_attrs_props(defaul_attrs))
         return Node(graph, data_node)
 
     @staticmethod
-    def create_and_connect_input_data_node(graph: nx.MultiDiGraph, op_node: Node, attrs: dict = None, edge_attrs: dict = None):
+    def create_and_connect_input_data_node(graph: Graph, op_node: Node, attrs: dict = None, edge_attrs: dict = None):
         assert op_node is not None and op_node.kind == 'op'
         if attrs is None:
             attrs = {}
         if edge_attrs is None:
             edge_attrs = {}
 
-        data_node = unique_id(graph, op_node.id)
+        data_node = graph.unique_id(op_node.id)
         defaul_attrs = dict(kind='data', precision="FP32", name=data_node, value=None, shape=None, data_type=None,
                             infer=None)
         defaul_attrs.update(attrs)
diff --git a/model-optimizer/mo/ops/output.py b/model-optimizer/mo/ops/output.py
index 8a4f578f3..8b77397ff 100644
--- a/model-optimizer/mo/ops/output.py
+++ b/model-optimizer/mo/ops/output.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  limitations under the License.
 """
 
-import networkx as nx
-
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -26,12 +25,11 @@ class Output(Op):
     """
     op = 'OpOutput'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict = None):
+    def __init__(self, graph: Graph, attrs: dict = None):
         super().__init__(graph, {
-            'type': __class__.op,
             'op': __class__.op,
-            'is_output': True,
-            'infer': None,
+            'infer': lambda x: None,
             'value': None,
             'data_type': None,
+            'in_ports_count': 1,
         }, attrs)
diff --git a/model-optimizer/mo/ops/pad.py b/model-optimizer/mo/ops/pad.py
index 739b88672..47377d013 100644
--- a/model-optimizer/mo/ops/pad.py
+++ b/model-optimizer/mo/ops/pad.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,9 +16,9 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
+from mo.graph.graph import Graph
 from mo.ops.op import Op, PermuteAttrs
 
 
@@ -50,11 +50,13 @@ class Pad(Op):
     op = 'Pad'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'op': __class__.op,
             'type': __class__.op,
             'infer': __class__.infer,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'mode': 'constant',
             'fill_value': float(0),
             'pads': None
diff --git a/model-optimizer/mo/ops/pad_test.py b/model-optimizer/mo/ops/pad_test.py
index 0013ed358..bcd0fdd7e 100644
--- a/model-optimizer/mo/ops/pad_test.py
+++ b/model-optimizer/mo/ops/pad_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/permute.py b/model-optimizer/mo/ops/permute.py
index 57158d1b0..4f2c08914 100644
--- a/model-optimizer/mo/ops/permute.py
+++ b/model-optimizer/mo/ops/permute.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,9 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.transpose import transpose_infer
 from mo.front.extractor import attr_getter
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -25,12 +24,14 @@ class Permute(Op):
     op = 'Permute'
     enabled = False
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'order': None,
             'type': __class__.op,
             'op': __class__.op,
             'infer': self.infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
diff --git a/model-optimizer/mo/ops/permute_test.py b/model-optimizer/mo/ops/permute_test.py
index cf26cc76f..a58643833 100644
--- a/model-optimizer/mo/ops/permute_test.py
+++ b/model-optimizer/mo/ops/permute_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/pooling.py b/model-optimizer/mo/ops/pooling.py
index a26ab7d69..4af5f6ceb 100644
--- a/model-optimizer/mo/ops/pooling.py
+++ b/model-optimizer/mo/ops/pooling.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,7 +14,6 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.partial_infer.utils import tf_window_op_pad_infer
@@ -22,19 +21,21 @@ from mo.front.extractor import attr_getter
 # from mo.front.common.partial_infer.pooling import pool_explicit_padding_infer
 from mo.front.extractor import spatial_getter
 from mo.front.onnx.extractors.utils import get_backend_pad
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op, PermuteAttrs
 
 
 class Pooling(Op):
     op = 'Pooling'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'kind': 'op',
             'type': __class__.op,
             'op': __class__.op,
             'infer': __class__.infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }, attrs)
 
     def backend_attrs(self):
diff --git a/model-optimizer/mo/ops/pooling_test.py b/model-optimizer/mo/ops/pooling_test.py
index ea11b72a7..78c626819 100644
--- a/model-optimizer/mo/ops/pooling_test.py
+++ b/model-optimizer/mo/ops/pooling_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@ from mo.utils.unittest.graph import build_graph
 nodes_attributes = {'node_1': {'value': None, 'kind': 'data'},
                     'pool': {'type': 'Pooling', 'value': None, 'kind': 'op'},
                     'node_2': {'value': None, 'kind': 'data'},
+                    'op_output': { 'kind': 'op', 'op': 'OpOutput'},
                     }
 
 
@@ -32,8 +33,10 @@ class TestPoolingPartialInfer(unittest.TestCase):
     def test_pooling_infer(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'pool'),
-                             ('pool', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None},
+                             ('pool', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 256, 256])},
                              'pool': {'window': np.array([1, 1, 1, 1]), 'stride': np.array([1, 1, 2, 2]),
                                       'pad': np.array([[0, 0], [0, 0], [3, 3], [3, 3]]),
@@ -56,8 +59,10 @@ class TestPoolingPartialInfer(unittest.TestCase):
     def test_pooling_infer_decrement_input_spatial(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'pool'),
-                             ('pool', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None},
+                             ('pool', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 224, 224])},
                              'pool': {'window': np.array([1, 1, 1, 1]), 'stride': np.array([1, 1, 3, 3]),
                                       'pad': np.array([[0, 0], [0, 0], [3, 3], [3, 3]]),
@@ -80,8 +85,10 @@ class TestPoolingPartialInfer(unittest.TestCase):
     def test_pooling_infer_no_convention(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'pool'),
-                             ('pool', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None},
+                             ('pool', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None},
                              'node_1': {'shape': np.array([1, 3, 256, 256])},
                              'pool': {'window': np.array([1, 1, 1, 1]), 'stride': np.array([1, 1, 2, 2]),
                                       'pad': np.array([[0, 0], [0, 0], [3, 3], [3, 3]]),
@@ -103,8 +110,10 @@ class TestPoolingPartialInfer(unittest.TestCase):
     def test_pooling_infer_no_shape(self):
         graph = build_graph(nodes_attributes,
                             [('node_1', 'pool'),
-                             ('pool', 'node_2')],
-                            {'node_2': {'is_output': True, 'shape': None},
+                             ('pool', 'node_2'),
+                             ('node_2', 'op_output')
+                             ],
+                            {'node_2': {'shape': None},
                              'node_1': {'shape': None},
                              'pool': {'window': np.array([1, 1, 1, 1]), 'stride': np.array([1, 1, 2, 2]),
                                       'pad': np.array([[0, 0], [0, 0], [3, 3], [3, 3]]),
diff --git a/model-optimizer/mo/ops/power.py b/model-optimizer/mo/ops/power.py
index c4d1ca0d6..41a2c388b 100644
--- a/model-optimizer/mo/ops/power.py
+++ b/model-optimizer/mo/ops/power.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,11 +16,10 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.partial_infer.eltwise import eltwise_infer
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
@@ -28,7 +27,7 @@ class Power(Op):
     enabled = False
     op = 'Power'
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': 'Power',
             'op': __class__.op,
@@ -36,6 +35,8 @@ class Power(Op):
             'scale': 1,
             'shift': 0,
             'infer': __class__.infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
diff --git a/model-optimizer/mo/ops/power_test.py b/model-optimizer/mo/ops/power_test.py
index e0a3b97a3..c77ab3c17 100644
--- a/model-optimizer/mo/ops/power_test.py
+++ b/model-optimizer/mo/ops/power_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/reduce.py b/model-optimizer/mo/ops/reduce.py
index 123792848..41457cd28 100644
--- a/model-optimizer/mo/ops/reduce.py
+++ b/model-optimizer/mo/ops/reduce.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,11 +16,10 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.partial_infer.utils import int64_array
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.error import Error
 
@@ -34,11 +33,13 @@ class Reduce(Op):
         'sum': np.sum,
     }
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'op': 'Reduce',
             'reduce_type': None,
             'infer': __class__.infer,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
         }, attrs)
 
     @staticmethod
@@ -71,7 +72,7 @@ class Reduce(Op):
                 output_node.value = Reduce.reduce_method_map[reduce_type.lower()](input_node.value,
                                                                                   axis=tuple(node.axis),
                                                                                   keepdims=node.keep_dims)
-                output_node.shape = output_node.value.shape
+                output_node.shape = np.array(output_node.value.shape, dtype=np.int64)
             else:
                 log.error('Reduce type {} is not supported for node {}'.format(reduce_type, node.id))
                 return
diff --git a/model-optimizer/mo/ops/relu.py b/model-optimizer/mo/ops/relu.py
index db3ae7d76..3ee6d14c4 100644
--- a/model-optimizer/mo/ops/relu.py
+++ b/model-optimizer/mo/ops/relu.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -24,11 +23,13 @@ class ReLU(Op):
     op = 'ReLU'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,
             'op': __class__.op,
-            'infer': copy_shape_infer
+            'infer': copy_shape_infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
diff --git a/model-optimizer/mo/ops/reshape.py b/model-optimizer/mo/ops/reshape.py
index f616c8d81..8cc24f180 100644
--- a/model-optimizer/mo/ops/reshape.py
+++ b/model-optimizer/mo/ops/reshape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,12 +15,11 @@
 """
 import math
 
-import networkx as nx
 import numpy as np
 
 from mo.front.common.partial_infer.elemental import single_output_infer
 from mo.front.common.partial_infer.reshape import tf_reshape_shape_infer
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 from mo.utils.error import Error
 
@@ -29,19 +28,18 @@ class Reshape(Op):
     op = 'Reshape'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'kind': 'op',
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'infer': lambda node: single_output_infer(node, tf_reshape_shape_infer,
                                                       lambda node: np.reshape(node.in_node().value,
                                                                               node.out_node().shape))
         }, attrs)
 
-    def supported_attrs(self):
-        return [('dim', lambda node: ','.join(map(str, node['dim'])))]
-
     @staticmethod
     def kaldi_infer(node: Node):
         in_node = node.in_node().in_node()  # prev_layer_node -> data -> this_node
@@ -50,7 +48,7 @@ class Reshape(Op):
         # Convolution/Pooling layers. Therefore there are 4 cases with different
         # partial inference.
         batch = input_shape[0]
-        if in_node.op == 'Convolution' or in_node.op == 'Pooling':
+        if in_node.op in ['Convolution', 'Pooling', 'Permute']:
             output_spatial = np.array([batch, np.prod(input_shape[1:])], dtype=np.int64)
             return Reshape.set_shape_and_dim(node, output_spatial)
         # Supports ONLY NCHW and NH layouts
diff --git a/model-optimizer/mo/ops/roipooling.py b/model-optimizer/mo/ops/roipooling.py
index 3b345c3de..a5d80648e 100644
--- a/model-optimizer/mo/ops/roipooling.py
+++ b/model-optimizer/mo/ops/roipooling.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -29,7 +29,9 @@ class ROIPooling(Op):
             'spatial_scale': 0.0625,
             'type': __class__.op,
             'op': __class__.op,
-            'infer': roipooling_infer
+            'infer': roipooling_infer,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
diff --git a/model-optimizer/mo/ops/scale_shift.py b/model-optimizer/mo/ops/scale_shift.py
index 835b62691..4642bfcdb 100644
--- a/model-optimizer/mo/ops/scale_shift.py
+++ b/model-optimizer/mo/ops/scale_shift.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.elemental import copy_shape_infer
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -24,10 +23,12 @@ class ScaleShiftOp(Op):
     op = 'ScaleShift'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'infer': copy_shape_infer,
             'kind': 'op',
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 3,
+            'out_ports_count': 1,
         }, attrs)
diff --git a/model-optimizer/mo/ops/shape.py b/model-optimizer/mo/ops/shape.py
index 75f435385..475d2612d 100644
--- a/model-optimizer/mo/ops/shape.py
+++ b/model-optimizer/mo/ops/shape.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,10 @@
  limitations under the License.
 """
 
-import networkx as nx
 import numpy as np
 import logging as log
 
+from mo.graph.graph import Graph
 from mo.ops.op import Op
 
 
@@ -25,12 +25,18 @@ class Shape(Op):
     op = 'Shape'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
+            'type': __class__.op,
             'op': __class__.op,
             'infer': __class__.infer,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }, attrs)
 
+    def supported_attrs(self):
+        return []
+
     @staticmethod
     def infer(node):
         if len(node.in_nodes()) != 1:
@@ -44,6 +50,7 @@ class Shape(Op):
                 node.out_node().value = np.array(value, dtype=node.data_type)
             else:
                 node.out_node().value = np.array(value)
+            node.out_node().shape = np.array(node.out_node().value.shape, dtype=np.int64)
         else:
             log.info('Can\'t infer shape and value for shape operation due to undefined input shape')
 
diff --git a/model-optimizer/mo/ops/slice.py b/model-optimizer/mo/ops/slice.py
index 5f6145df1..fda2acd33 100644
--- a/model-optimizer/mo/ops/slice.py
+++ b/model-optimizer/mo/ops/slice.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,10 +16,9 @@
 
 import logging as log
 
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
@@ -27,13 +26,18 @@ class Slice(Op):
     op = 'Slice'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': __class__.op,
             'op': 'Slice',
+            'in_ports_count': 3,
+            'out_ports_count': 1,
             'infer': __class__.infer
         }, attrs)
 
+    def supported_attrs(self):
+        return ['start', 'end', 'axis']
+
     @staticmethod
     def infer(node: Node):
         if len(node.in_nodes()) == 1:
@@ -52,7 +56,7 @@ class Slice(Op):
                 from mo.front.common.partial_infer.slice import caffe_slice_infer
                 caffe_slice_infer(node)
         elif len(node.in_nodes()) == 3:
-            #TF case
+            # TF case
             start_node = node.in_node(1)
             size_node = node.in_node(2)
             if start_node.has_valid('value') and size_node.has_valid('value'):
@@ -104,10 +108,10 @@ class Slice(Op):
             if s is None:
                 slice_idx[axis] = slice(0, input_shape[axis], 1)
 
-        #Add new parameters to node
+        # Add new parameters to node
         node['slices'] = np.array(slice_idx)
         node['shrink_axis_mask'] = np.array(shrink_axis_mask)
 
-        value = value[slice_idx]
+        value = value[tuple(slice_idx)]
         node.out_node().value = np.array(value) if node.in_node(0).value is not None else None
         node.out_node().shape = np.array(value.shape)
diff --git a/model-optimizer/mo/ops/slice_test.py b/model-optimizer/mo/ops/slice_test.py
index 2061e30b3..edc91248b 100644
--- a/model-optimizer/mo/ops/slice_test.py
+++ b/model-optimizer/mo/ops/slice_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/softmax.py b/model-optimizer/mo/ops/softmax.py
index eaf6bc06c..0b7ff372d 100644
--- a/model-optimizer/mo/ops/softmax.py
+++ b/model-optimizer/mo/ops/softmax.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,10 +14,8 @@
  limitations under the License.
 """
 
-import networkx as nx
-
 from mo.front.common.partial_infer.elemental import copy_shape_infer
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op
 
 
@@ -25,13 +23,15 @@ class Softmax(Op):
     op = 'SoftMax'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'infer': Softmax.infer,
             'kind': 'op',
             'axis': 1,
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
         }, attrs)
 
     def supported_attrs(self):
diff --git a/model-optimizer/mo/ops/split.py b/model-optimizer/mo/ops/split.py
index 5ce6b0f65..62c39516e 100644
--- a/model-optimizer/mo/ops/split.py
+++ b/model-optimizer/mo/ops/split.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,9 +15,8 @@
 """
 import copy
 
-import networkx as nx
 import numpy as np
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op, PermuteAttrs
 
 
@@ -25,12 +24,13 @@ class Split(Op):
     op = 'Split'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'type': 'Split',
             'op': 'Split',
             'axis': 1,
             'input_port': 0,
+            'in_ports_count': 1,
             'infer': Split.infer
         }, attrs)
 
diff --git a/model-optimizer/mo/ops/squeeze.py b/model-optimizer/mo/ops/squeeze.py
index ef215c990..ad56f99cd 100644
--- a/model-optimizer/mo/ops/squeeze.py
+++ b/model-optimizer/mo/ops/squeeze.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -28,8 +28,7 @@ class Squeeze(Op):
             'kind': 'op',
             'type': 'Reshape',
             'op': __class__.op,
-            'infer': tf_squeeze_infer
+            'infer': tf_squeeze_infer,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
         }, attrs)
-
-    def supported_attrs(self):
-        return [('dim', lambda node: ', '.join(map(str, node['dim'])))]
diff --git a/model-optimizer/mo/ops/strided_slice.py b/model-optimizer/mo/ops/strided_slice.py
new file mode 100644
index 000000000..50f1f93a7
--- /dev/null
+++ b/model-optimizer/mo/ops/strided_slice.py
@@ -0,0 +1,114 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+
+import numpy as np
+
+from mo.front.common.partial_infer.slice import tf_strided_slice_infer
+from mo.graph.graph import Node, Graph
+from mo.ops.op import Op, PermuteAttrs
+from mo.utils.utils import array_to_str
+
+
+def permute_array_with_ellipsis(node: Node, permutation: PermuteAttrs.Permutation, array: np.array, ins_value: int):
+    """
+    This function permutes masks according to permutation parameter. Several cases should be processed:
+    * Some dimensions can be omitted in mask according to ellipsis mask
+    * Mask length can be less than length of output dimensions plus shrinked dimensions
+    * Mask have the same or more length than output
+    """
+    attr_mask_extended = list(array)
+
+    # If input and output have length of shape 3 and less, no need to permute
+    if len(node.in_node().shape) < 4 and len(node.out_node().shape) < 4:
+        return attr_mask_extended
+
+    # Length of mask is less than length of output ()plus shrinked dimensions then we should extend it before permutation
+    if len(attr_mask_extended) < len(node.out_node(0).shape) + np.count_nonzero(node.shrink_axis_mask):
+        # ellipsis is set, add dimensions in right place otherwise insert in the end
+        if np.any(node.ellipsis_mask):
+            idx = np.nonzero(node.ellipsis_mask)
+            assert len(idx[0]) == 1
+            id = idx[0][0]
+        else:
+            id = len(attr_mask_extended) - 1
+
+        ellips_ext = len(node.out_node(0).shape) + np.count_nonzero(node.shrink_axis_mask) - len(attr_mask_extended)
+        for i in range(0, ellips_ext):
+            attr_mask_extended.insert(id + i + 1, ins_value)
+        # permute extended mask
+        perm = PermuteAttrs.get_nhwc_to_nchw_permutation(len(attr_mask_extended))
+        attr_mask_extended = np.array(attr_mask_extended)[perm.perm]
+        return attr_mask_extended
+    else:
+        perm_len = len(node.out_node(0).shape) + np.count_nonzero(node.shrink_axis_mask)
+        perm = PermuteAttrs.get_nhwc_to_nchw_permutation(perm_len)
+        perm_list = list(perm.perm)
+        # if mask length is more than output, just add tail that will not be permuted to avoid error
+        for i in range(perm_len, len(attr_mask_extended)):
+            perm_list.append(i)
+        return np.array(attr_mask_extended, dtype=np.int64)[np.array(perm_list)]
+
+
+def permute_masks(node: Node, permutation: PermuteAttrs.Permutation, attr: str):
+    if not node.has_valid(attr):
+        return None
+
+    node[attr] = permute_array_with_ellipsis(node, permutation, node[attr],
+                                             attr in ['begin_mask', 'end_mask'])
+    return node[attr]
+
+
+class StridedSlice(Op):
+    op = 'StridedSlice'
+    enabled = True
+
+    def __init__(self, graph: Graph, attrs: dict):
+        super().__init__(graph, {
+            'type': __class__.op,
+            'op': 'StridedSlice',
+            'in_ports_count': 4,
+            'out_ports_count': 1,
+            'infer': __class__.infer
+        }, attrs)
+
+    def backend_attrs(self):
+        al = list()
+
+        def convert(attr):
+            return lambda node: array_to_str(node, attr)
+        for a in list(['new_axis_mask', 'shrink_axis_mask', 'ellipsis_mask', 'begin_mask', 'end_mask']):
+            al.append((a, convert(a)))
+        return al
+
+    @staticmethod
+    def infer(node: Node):
+        tf_strided_slice_infer(node)
+
+        PermuteAttrs.create_permute_attrs(node, attrs=[('shrink_axis_mask', 'input:0', permute_masks),
+                                                       ('new_axis_mask', 'input:0', permute_masks),
+                                                       ('ellipsis_mask', 'input:0', permute_masks),
+                                                       ('begin_mask', 'input:0', permute_masks),
+                                                       ('end_mask', 'input:0', permute_masks),
+                                                       ])
+
+        for i in range(1, len(node.in_nodes())):
+            if node.in_node(i).value is not None and node.in_node(i).shape[0] > 3:
+                perm = PermuteAttrs.get_nhwc_to_nchw_permutation(len(node.in_node(0).shape))
+                node.in_node(i).value = permute_array_with_ellipsis(node, perm, node.in_node(i).value, 0)
+
+        # due to permutation from nhwc to nchw we will extend all masks and inputs
+        idx = np.nonzero(node.ellipsis_mask)
+        node.ellipsis_mask[idx] = 0
diff --git a/model-optimizer/mo/ops/strided_slice_test.py b/model-optimizer/mo/ops/strided_slice_test.py
new file mode 100644
index 000000000..c933b4e4d
--- /dev/null
+++ b/model-optimizer/mo/ops/strided_slice_test.py
@@ -0,0 +1,290 @@
+"""
+ Copyright (c) 2019 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+"""
+import unittest
+
+import numpy as np
+from generator import generator
+
+from mo.graph.graph import Node
+from mo.ops.op import PermuteAttrs
+from mo.ops.strided_slice import permute_masks, permute_array_with_ellipsis
+from mo.utils.unittest.graph import build_graph
+
+nodes_attributes = {
+    'data_1': {
+        'kind': 'data',
+        'shape': None,
+        'value': None,
+    },
+    'begin': {
+        'kind': 'data',
+        'shape': None,
+        'value': None,
+    },
+    'end': {
+        'kind': 'data',
+        'shape': None,
+        'value': None,
+    },
+    'stride': {
+        'kind': 'data',
+        'shape': None,
+        'value': None,
+    },
+    'strided_slice': {
+        'op': 'StridedSlice',
+        'begin_mask': None,
+        'end_mask': None,
+        'new_axis_mask': None,
+        'shrink_axis_mask': None,
+        'ellipsis_mask': None,
+        'kind': 'op',
+    },
+    'data_2': {
+        'kind': 'data',
+        'shape': None,
+        'value': None,
+    }
+}
+
+
+@generator
+class TestPermutationStridedSlice(unittest.TestCase):
+    def test_permute_begin_end(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'strided_slice'),
+                             ('begin', 'strided_slice'),
+                             ('end', 'strided_slice'),
+                             ('stride', 'strided_slice'),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'strided_slice': {'begin_mask': np.array([1, 1, 0, 0]), 'end_mask': np.array([0, 1, 0, 0]),
+                                               'new_axis_mask': np.array([0, 0, 0]), 'shrink_axis_mask': [0, 0, 0],
+                                               'ellipsis_mask': np.array([0, 0, 0])},
+                             'data_2': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             })
+
+        slice_node = Node(graph, 'strided_slice')
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'begin_mask')
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 0, 1, 0])))
+
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'end_mask')
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 0, 1, 0])))
+
+    def test_permute_begin_end_short(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'strided_slice'),
+                             ('begin', 'strided_slice'),
+                             ('end', 'strided_slice'),
+                             ('stride', 'strided_slice'),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'strided_slice': {'begin_mask': np.array([1, 0, 0]), 'end_mask': np.array([0, 1, 0]),
+                                               'new_axis_mask': np.array([0, 0, 0]), 'shrink_axis_mask': [0, 0, 0],
+                                               'ellipsis_mask': np.array([0, 0, 0])},
+                             'data_2': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             })
+
+        slice_node = Node(graph, 'strided_slice')
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'begin_mask')
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 1, 0, 0])))
+
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'end_mask')
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 1, 1, 0])))
+
+    def test_permute_begin_end_long(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'strided_slice'),
+                             ('begin', 'strided_slice'),
+                             ('end', 'strided_slice'),
+                             ('stride', 'strided_slice'),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'strided_slice': {'begin_mask': np.array([1, 0, 0, 1, 0]), 'end_mask': np.array([0, 1, 0, 1, 1]),
+                                               'new_axis_mask': np.array([0, 0, 0]), 'shrink_axis_mask': [0, 0, 0],
+                                               'ellipsis_mask': np.array([0, 0, 0])},
+                             'data_2': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             })
+
+        slice_node = Node(graph, 'strided_slice')
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'begin_mask')
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 1, 0, 0, 0])))
+
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'end_mask')
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 1, 1, 0, 1])))
+
+    def test_permute_begin_end_new(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'strided_slice'),
+                             ('begin', 'strided_slice'),
+                             ('end', 'strided_slice'),
+                             ('stride', 'strided_slice'),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'strided_slice': {'begin_mask': np.array([1, 0, 0, 1, 0]), 'end_mask': np.array([0, 1, 0, 1, 1]),
+                                               'new_axis_mask': np.array([1, 0, 0]), 'shrink_axis_mask': [0, 0, 0],
+                                               'ellipsis_mask': np.array([0, 0, 0])},
+                             'data_2': {'shape': np.array([1, 1, 2, 3, 4]), 'value': None},
+                             })
+
+        slice_node = Node(graph, 'strided_slice')
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 4, 1, 2, 3], inv=[0, 2, 3, 4, 1]), 'begin_mask')
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 0, 0, 0, 1])))
+
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 4, 1, 2, 3], inv=[0, 2, 3, 4, 1]), 'end_mask')
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 1, 1, 0, 1])))
+
+    def test_permute_begin_end_new_short(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'strided_slice'),
+                             ('begin', 'strided_slice'),
+                             ('end', 'strided_slice'),
+                             ('stride', 'strided_slice'),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'strided_slice': {'begin_mask': np.array([1, 0, 0]), 'end_mask': np.array([0, 1, 0]),
+                                               'new_axis_mask': np.array([1, 0, 0]), 'shrink_axis_mask': [0, 0, 0],
+                                               'ellipsis_mask': np.array([0, 0, 0])},
+                             'data_2': {'shape': np.array([1, 1, 2, 3, 4]), 'value': None},
+                             })
+
+        slice_node = Node(graph, 'strided_slice')
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 4, 1, 2, 3], inv=[0, 2, 3, 4, 1]), 'begin_mask')
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 1, 0, 0, 1])))
+
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 4, 1, 2, 3], inv=[0, 2, 3, 4, 1]), 'end_mask')
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 1, 1, 0, 1])))
+
+    def test_permute_begin_end_shrink(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'strided_slice'),
+                             ('begin', 'strided_slice'),
+                             ('end', 'strided_slice'),
+                             ('stride', 'strided_slice'),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'strided_slice': {'begin_mask': np.array([1, 0, 0, 1]), 'end_mask': np.array([0, 1, 0, 1]),
+                                               'new_axis_mask': np.array([0, 0, 0]), 'shrink_axis_mask': [1, 0, 0],
+                                               'ellipsis_mask': np.array([0, 0, 0])},
+                             'data_2': {'shape': np.array([2, 3, 4]), 'value': None},
+                             })
+
+        slice_node = Node(graph, 'strided_slice')
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'begin_mask')
+
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 1, 0, 0])))
+
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'end_mask')
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 1, 1, 0])))
+
+    def test_permute_begin_end_shrink_short(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'strided_slice'),
+                             ('begin', 'strided_slice'),
+                             ('end', 'strided_slice'),
+                             ('stride', 'strided_slice'),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'strided_slice': {'begin_mask': np.array([1, 0, 0]), 'end_mask': np.array([0, 1, 0]),
+                                               'new_axis_mask': np.array([0, 0, 0]), 'shrink_axis_mask': [1, 0, 0],
+                                               'ellipsis_mask': np.array([0, 0, 0])},
+                             'data_2': {'shape': np.array([2, 3, 4]), 'value': None},
+                             })
+
+        slice_node = Node(graph, 'strided_slice')
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'begin_mask')
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([1, 1, 0, 0])))
+
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'end_mask')
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([0, 1, 1, 0])))
+
+    def test_permute_begin_end_ellipsis(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'strided_slice'),
+                             ('begin', 'strided_slice'),
+                             ('end', 'strided_slice'),
+                             ('stride', 'strided_slice'),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'strided_slice': {'begin_mask': np.array([0, 0]), 'end_mask': np.array([1, 0]),
+                                               'new_axis_mask': np.array([0]), 'shrink_axis_mask': [0],
+                                               'ellipsis_mask': np.array([1, 0])},
+                             'data_2': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             })
+
+        slice_node = Node(graph, 'strided_slice')
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'begin_mask')
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([0, 0, 1, 1])))
+
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]), 'end_mask')
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([1, 0, 1, 1])))
+
+    def test_permute_begin_end_ellipsis_new(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'strided_slice'),
+                             ('begin', 'strided_slice'),
+                             ('end', 'strided_slice'),
+                             ('stride', 'strided_slice'),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'strided_slice': {'begin_mask': np.array([0, 0, 0]), 'end_mask': np.array([1, 0, 0]),
+                                               'new_axis_mask': np.array([1, 0, 0]), 'shrink_axis_mask': [0],
+                                               'ellipsis_mask': np.array([0, 1, 0])},
+                             'data_2': {'shape': np.array([1, 1, 2, 3, 4]), 'value': None},
+                             })
+
+        slice_node = Node(graph, 'strided_slice')
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 4, 1, 2, 3], inv=[0, 2, 3, 4, 1]), 'begin_mask')
+        self.assertTrue(np.array_equal(slice_node.begin_mask, np.array([0, 0, 0, 1, 1])))
+
+        permute_masks(slice_node, PermuteAttrs.Permutation(perm=[0, 4, 1, 2, 3], inv=[0, 2, 3, 4, 1]), 'end_mask')
+        self.assertTrue(np.array_equal(slice_node.end_mask, np.array([1, 0, 0, 1, 1])))
+
+    def test_permute_begin_end_ellipsis_new_inputs(self):
+        # Testing constant path case
+        graph = build_graph(nodes_attributes,
+                            [('data_1', 'strided_slice'),
+                             ('begin', 'strided_slice'),
+                             ('end', 'strided_slice'),
+                             ('stride', 'strided_slice'),
+                             ('strided_slice', 'data_2')],
+                            {'data_1': {'shape': np.array([1, 2, 3, 4]), 'value': None},
+                             'strided_slice': {'begin_mask': np.array([0, 0, 0]), 'end_mask': np.array([1, 0, 0]),
+                                               'new_axis_mask': np.array([1, 0, 0]), 'shrink_axis_mask': [0],
+                                               'ellipsis_mask': np.array([0, 1, 0])},
+                             'begin': {'value': np.array([0, 1, 2])},
+                             'end': {'value': np.array([1, 2, 3])},
+                             'stride': {'value': np.array([1, 1, 1])},
+                             'data_2': {'shape': np.array([1, 1, 2, 3, 4]), 'value': None},
+                             })
+
+        slice_node = Node(graph, 'strided_slice')
+        slice_node.in_node(1).value = permute_array_with_ellipsis(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]),
+                                                                  slice_node.in_node(1).value, 0)
+        self.assertTrue(np.array_equal(slice_node.in_node(1).value, np.array([0, 2, 1, 0, 0])))
+
+        slice_node.in_node(2).value = permute_array_with_ellipsis(slice_node, PermuteAttrs.Permutation(perm=[0, 3, 1, 2], inv=[0, 2, 3, 1]),
+                                                                  slice_node.in_node(2).value, 0)
+        self.assertTrue(np.array_equal(slice_node.in_node(2).value, np.array([1, 3, 2, 0, 0])))
diff --git a/model-optimizer/mo/ops/tile.py b/model-optimizer/mo/ops/tile.py
index 146978f03..21f45c93f 100644
--- a/model-optimizer/mo/ops/tile.py
+++ b/model-optimizer/mo/ops/tile.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,10 +15,9 @@
 """
 
 import logging as log
-import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.ops.op import Op, PermuteAttrs
 
 
@@ -26,11 +25,13 @@ class Tile(Op):
     op = 'Tile'
     enabled = True
 
-    def __init__(self, graph: nx.MultiDiGraph, attrs: dict):
+    def __init__(self, graph: Graph, attrs: dict):
         super().__init__(graph, {
             'kind': 'op',
             'type': __class__.op,
             'op': __class__.op,
+            'in_ports_count': 1,
+            'out_ports_count': 1,
             'infer': Tile.infer
         }, attrs)
 
diff --git a/model-optimizer/mo/ops/tile_test.py b/model-optimizer/mo/ops/tile_test.py
index af0d189cb..0b708b93e 100644
--- a/model-optimizer/mo/ops/tile_test.py
+++ b/model-optimizer/mo/ops/tile_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/ops/unsqueeze.py b/model-optimizer/mo/ops/unsqueeze.py
index 99195a35a..2fce222dc 100644
--- a/model-optimizer/mo/ops/unsqueeze.py
+++ b/model-optimizer/mo/ops/unsqueeze.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,7 +16,6 @@
 
 import numpy as np
 
-from mo.front.common.partial_infer.squeeze import tf_squeeze_infer
 from mo.ops.op import Op, PermuteAttrs
 
 
@@ -29,12 +28,11 @@ class Unsqueeze(Op):
             'kind': 'op',
             'type': 'Reshape',
             'op': __class__.op,
+            'in_ports_count': 2,
+            'out_ports_count': 1,
             'infer': __class__.infer
         }, attrs)
 
-    def supported_attrs(self):
-        return [('dim', lambda node: ', '.join(map(str, node['dim'])))]
-
     @staticmethod
     def infer(node):
         unsqueeze_dims = np.array(node.unsqueeze_dims)
diff --git a/model-optimizer/mo/ops/unsqueeze_test.py b/model-optimizer/mo/ops/unsqueeze_test.py
index 06d25b057..f6185028f 100644
--- a/model-optimizer/mo/ops/unsqueeze_test.py
+++ b/model-optimizer/mo/ops/unsqueeze_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/pipeline/caffe.py b/model-optimizer/mo/pipeline/caffe.py
index d3343963e..e1e8dad78 100644
--- a/model-optimizer/mo/pipeline/caffe.py
+++ b/model-optimizer/mo/pipeline/caffe.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,52 +16,37 @@
 import argparse
 import logging as log
 
-import numpy as np
-
-from extensions.front.freeze_placeholder_value import FreezePlaceholderValue
-from extensions.middle.FusePermutesSequence import FusePermutesSequence
+from extensions.back.CreateConstNodes import CreateConstNodesReplacement
 from mo.front.caffe import custom_layers_mapping, loader
-from mo.front.caffe.extractor import caffe_extractor, common_caffe_fields, caffe_type_extractors
-from mo.front.common.register_custom_ops import check_for_duplicates
-from mo.front.common.register_custom_ops import update_extractors_with_extensions
-from mo.front.common.replacement import FrontReplacementSubgraph
-from mo.front.extractor import extract_node_attrs, add_output_ops, create_tensor_nodes, remove_output_ops, \
-    add_input_ops, user_data_repack
-from mo.graph.graph import print_graph_stat, check_empty_graph
+from mo.front.caffe.extractor import caffe_type_extractors, caffe_extractor
+from mo.front.common.register_custom_ops import update_extractors_with_extensions, check_for_duplicates
+from mo.front.extractor import extract_node_attrs, remove_output_ops
+from mo.middle.passes.conv import convert_add_or_mul_to_scaleshift
 from mo.middle.passes.conv import convert_muladd_to_scaleshift_or_power, \
-    convert_matmul_to_fully_connected, batch_norm_fuse, convert_add_to_scaleshift, \
-    convert_mul_to_scaleshift, \
-    convert_multi_input_conv
-from mo.middle.passes.eliminate import graph_clean_up, remove_op_nodes
+    convert_matmul_to_fully_connected, batch_norm_fuse
+from mo.middle.passes.eliminate import graph_clean_up
+from mo.middle.passes.eliminate import remove_const_ops
 from mo.middle.passes.fusing.decomposition import convert_bn_to_mul_add, convert_scale_shift_to_mul_add
 from mo.middle.passes.fusing.fuse_linear_ops import fuse_linear_ops
 from mo.middle.passes.fusing.fuse_linear_seq import fuse_mul_add_sequence
 from mo.middle.passes.fusing.mark_unfused_nodes import mark_unfused_nodes
 from mo.middle.passes.fusing.resnet_optimization import stride_optimization
-from mo.middle.passes.infer import add_mean_scale_values, scale_input, override_placeholder_shapes, mark_outputs, \
-    partial_infer, convert_mul_add_to_power, override_batch
+from mo.middle.passes.infer import convert_mul_add_to_power
 from mo.middle.passes.mean_scale_values import move_scaleshift_to_preprocess
-from mo.middle.passes.pool import mean_to_avgpool
 from mo.middle.passes.shape import reverse_input_channels, fuse_sequence_of_reshapes
-from mo.middle.passes.shared_weights_duplication import duplicate_shared_weights
 from mo.pipeline.common import prepare_emit_ir
 from mo.utils import class_registration
+from mo.utils.cli_parser import get_meta_info
 from mo.utils.error import Error
 from mo.utils.find_inputs import find_inputs
 from mo.utils.utils import refer_to_faq_msg
-from mo.utils.cli_parser import get_meta_info
 
 
-def driver(argv: argparse.Namespace, proto_file_name: str, model_file_name: str, output_model_name: str, outputs: list,
-           output_dir: str,
-           scale: float,
-           user_shapes: [None, list, np.array] = None, mean_scale_values: [dict, list] = (), mean_file: str = "",
-           mean_file_offsets: tuple = None,
-           custom_layers_mapping_path: str = None):
+def driver(argv: argparse.Namespace, proto_file_name: str, model_file_name: str, output_model_name: str,
+           output_dir: str, mean_file: str = "",
+           mean_file_offsets: tuple = None, custom_layers_mapping_path: str = None):
     meta_info = get_meta_info(argv)
 
-    FusePermutesSequence.enabled = False
-
     proto, model = loader.load_caffe_proto_model(proto_file_name, model_file_name)
 
     update_extractors_with_extensions(
@@ -77,8 +62,8 @@ def driver(argv: argparse.Namespace, proto_file_name: str, model_file_name: str,
                     refer_to_faq_msg(11), str(e)) from e
 
     log.debug("After caffe_pb_to_nx")
-    print_graph_stat(graph)
-    check_empty_graph(graph, 'load_caffe_proto_model')
+    graph.print_graph_stat()
+    graph.check_empty_graph('load_caffe_proto_model')
 
     graph.__setattr__('proto_path', proto_file_name)
     graph.__setattr__('caffemodel_path', model_file_name)
@@ -86,12 +71,7 @@ def driver(argv: argparse.Namespace, proto_file_name: str, model_file_name: str,
     graph.graph['layout'] = 'NCHW'
     graph.graph['cmd_params'] = argv
     graph.graph['fw'] = 'caffe'
-    graph.graph['ir_version'] = 2 if argv.generate_deprecated_IR_V2 else 4
-
-    extract_node_attrs(graph, lambda node: (True, common_caffe_fields(node)))
-
-    log.debug("After adding specific nodes for outputs")
-    print_graph_stat(graph)
+    graph.graph['ir_version'] = 2 if argv.generate_deprecated_IR_V2 else 5
 
     custom_layers_map = custom_layers_mapping.load_layers_xml(custom_layers_mapping_path)
     custom_layers_mapping.update_extractors(
@@ -100,76 +80,16 @@ def driver(argv: argparse.Namespace, proto_file_name: str, model_file_name: str,
         argv.disable_omitting_optional if hasattr(argv, 'disable_omitting_optional') else False,
         argv.enable_flattening_nested_params if hasattr(argv, 'enable_flattening_nested_params') else False
     )
-
     extract_node_attrs(graph, lambda node: caffe_extractor(node, check_for_duplicates(caffe_type_extractors)))
 
-    log.debug("After extract_node_attr")
-    print_graph_stat(graph)
-
-    packed_user_shapes, packed_outputs, freeze_placeholder = user_data_repack(graph, user_shapes, outputs, argv.freeze_placeholder_with_value)
-    if argv.freeze_placeholder_with_value is not None:
-        FreezePlaceholderValue.enabled = True
-        FreezePlaceholderValue.replacement_dict = freeze_placeholder
-        class_registration.update_registration([FrontReplacementSubgraph])
-    output_op_nodes = add_output_ops(graph, packed_outputs)
-    input_op_nodes = add_input_ops(graph, packed_user_shapes, True)
-    override_placeholder_shapes(graph, packed_user_shapes)
-    override_batch(graph, argv.batch)
-    graph_clean_up(graph)
-    check_empty_graph(graph, 'add_output_ops and add_input_ops')
+    # --------------------------------- LOAD END ------------------------------------------------------
     class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
-
-    graph = create_tensor_nodes(graph)
-
-    log.debug("After create_tensor_nodes")
-    print_graph_stat(graph)
-
-    remove_op_nodes(graph, {'op': 'Identity'})
-    remove_output_ops(graph)
-    graph_clean_up(graph)
-
-    log.debug("After removing specific nodes for output")
-    print_graph_stat(graph)
-
-    # you need to pass required network outputs here
-    # but we don't have a way yet, so just passing all discovered sinks
-    mark_outputs(graph)
-    graph_clean_up(graph)
-    log.debug("After graph_cleanup")
-    print_graph_stat(graph)
-
-    graph = partial_infer(graph)
-    log.debug("After partial_infer")
-    print_graph_stat(graph)
-    check_empty_graph(graph, 'partial_infer')
-    duplicate_shared_weights(graph)
-
-    input_op_nodes = add_input_ops(graph, packed_user_shapes, False)
-    graph_clean_up(graph)
-    check_empty_graph(graph, 'add_input_ops')
-    scale_input(graph, scale)
-
-    add_mean_scale_values(graph, mean_scale_values)
-
-    log.debug("Split multi input convolutions")
-    convert_multi_input_conv(graph)
-
-    graph_clean_up(graph)
-    log.debug("After graph_cleanup")
-    print_graph_stat(graph)
-
-    remove_op_nodes(graph, {'op': 'Dropout'})
-    remove_op_nodes(graph, {'phase': 0})
-    graph_clean_up(graph)
-
     class_registration.apply_replacements(graph, class_registration.ClassType.MIDDLE_REPLACER)
 
-    mean_to_avgpool(graph)
-
     # Mark nodes with attr 'can_be_fused': False to disable fusing for specified nodes
     mark_unfused_nodes(graph, argv.finegrain_fusing)
 
-    #need this pass even without fusing to convert scale with 2 inputs
+    # need this pass even without fusing to convert scale with 2 inputs
     convert_scale_shift_to_mul_add(graph)
     graph_clean_up(graph)
 
@@ -190,12 +110,12 @@ def driver(argv: argparse.Namespace, proto_file_name: str, model_file_name: str,
     convert_matmul_to_fully_connected(graph)
     batch_norm_fuse(graph)
     convert_mul_add_to_power(graph)
-    convert_add_to_scaleshift(graph)  # scale = 1
-    convert_mul_to_scaleshift(graph)  # biases = 0
-
     graph_clean_up(graph)
+    convert_add_or_mul_to_scaleshift(graph)  # scale = 1
+    graph_clean_up(graph)
+
     log.debug("After graph_cleanup")
-    print_graph_stat(graph)
+    graph.print_graph_stat()
 
     if argv.reverse_input_channels:
         reverse_input_channels(graph)
@@ -220,6 +140,11 @@ def driver(argv: argparse.Namespace, proto_file_name: str, model_file_name: str,
 
     class_registration.apply_replacements(graph, class_registration.ClassType.BACK_REPLACER)
 
+    remove_const_ops(graph)
+    CreateConstNodesReplacement().find_and_replace_pattern(graph)
+
+    remove_output_ops(graph)
+
     prepare_emit_ir(graph=graph, data_type=argv.data_type, output_dir=output_dir, output_model_name=output_model_name,
                     mean_data=mf,
                     input_names=input_names,
diff --git a/model-optimizer/mo/pipeline/common.py b/model-optimizer/mo/pipeline/common.py
index 7c21c904b..6d4b94c21 100644
--- a/model-optimizer/mo/pipeline/common.py
+++ b/model-optimizer/mo/pipeline/common.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,15 +14,14 @@
  limitations under the License.
 """
 
+import logging as log
 import os
 from operator import itemgetter
 
-import logging as log
 import networkx as nx
 
-from mo.back.ie_ir_ver_2.emitter import port_renumber, serialize_constants, generate_ie_ir, serialize_mean_image, \
-    create_const_nodes
-from mo.graph.graph import Node, unique_id
+from mo.back.ie_ir_ver_2.emitter import port_renumber, serialize_constants, generate_ie_ir, serialize_mean_image
+from mo.graph.graph import Node, Graph
 from mo.middle.passes import tensor_names, convert_data_type
 from mo.utils.error import Error
 
@@ -62,7 +61,7 @@ def get_fw_tensor_debug_info(node: Node):
     return node.soft_get('fw_tensor_debug_info')
 
 
-def get_sorted_outputs(graph: nx.MultiDiGraph):
+def get_sorted_outputs(graph: Graph):
     outputs = []
     outputs_for_sort = {}
     for node in graph.nodes():
@@ -85,7 +84,7 @@ def get_sorted_outputs(graph: nx.MultiDiGraph):
     return [Node(graph, key) for key, value in sorted(outputs_for_sort.items(), key=itemgetter(1))]
 
 
-def collect_sub_graphs(graph: nx.MultiDiGraph):
+def collect_sub_graphs(graph: Graph):
     ''' Go over all nodes and sub_graphs in the graph recursively; returns all found sub-graphs. '''
     result = []
     for node in graph.nodes():
@@ -97,14 +96,14 @@ def collect_sub_graphs(graph: nx.MultiDiGraph):
     return result
 
 
-def relabel_nodes_inplace_safe(graph: nx.MultiDiGraph, new_labels: dict):
+def relabel_nodes_inplace_safe(graph: Graph, new_labels: dict):
     ''' Safely relabels graph in-place without graph copy.
         
         Safity in this place means that it is guarantied that
         there won't be collisions during relabiling process.
     '''
     # Relabel nodes in two stages
-    intermediate_map = {node: unique_id(graph, '__relabel__{}__'.format(str(i))) for i, node in enumerate(graph.nodes())}
+    intermediate_map = {node: graph.unique_id('__relabel__{}__'.format(str(i))) for i, node in enumerate(graph.nodes())}
     final_map = {dst: new_labels[src] for src, dst in intermediate_map.items()}
     assert len(set(intermediate_map.keys()).intersection(set(intermediate_map.values()))) == 0
     assert len(set(final_map.keys()).intersection(set(final_map.values()))) == 0
@@ -112,11 +111,9 @@ def relabel_nodes_inplace_safe(graph: nx.MultiDiGraph, new_labels: dict):
     nx.relabel_nodes(graph, final_map, copy=False)
 
 
-def prepare_emit_ir(graph: nx.MultiDiGraph, data_type: str, output_dir: str, output_model_name: str,
+def prepare_emit_ir(graph: Graph, data_type: str, output_dir: str, output_model_name: str,
                     mean_data: [list, None] = None, input_names: list = [], meta_info: dict = dict()):
-
     for sub_graph in [graph] + collect_sub_graphs(graph):
-        create_const_nodes(sub_graph, start_data_nodes_are_not_allowed=(sub_graph == graph))
         op_order, data_order = determined_sort(get_sorted_outputs(sub_graph))
         mapping = {v: u for u, v in enumerate(op_order)}
         mapping.update({v: u for u, v in enumerate(data_order, start=len(sub_graph))})
diff --git a/model-optimizer/mo/pipeline/common_test.py b/model-optimizer/mo/pipeline/common_test.py
index a87770083..8ee313f2d 100644
--- a/model-optimizer/mo/pipeline/common_test.py
+++ b/model-optimizer/mo/pipeline/common_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/pipeline/kaldi.py b/model-optimizer/mo/pipeline/kaldi.py
index fcb3faaf6..e86b79452 100644
--- a/model-optimizer/mo/pipeline/kaldi.py
+++ b/model-optimizer/mo/pipeline/kaldi.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,25 +14,27 @@
  limitations under the License.
 """
 import logging as log
+
 import numpy as np
 
+from extensions.back.CreateConstNodes import CreateConstNodesReplacement
 from extensions.back.kaldi_remove_memory_output import KaldiRemoveMemoryOutputBackReplacementPattern
 from extensions.back.remove_last_softmax_pattern import RemoveLastSoftMaxPattern
 from extensions.front.kaldi.eliminate_redundant_reshape import EliminateRedundantReshape
 from extensions.front.kaldi.fuse_repeated_reshape import FuseRepeatedReshapes
 from extensions.middle.EltwiseChecker import EltwiseChecker
 from mo.front.common.register_custom_ops import update_extractors_with_extensions
-from mo.front.extractor import create_tensor_nodes, extract_node_attrs, add_output_ops, remove_output_ops
+from mo.front.extractor import extract_node_attrs, remove_output_ops
 from mo.front.kaldi.extractor import kaldi_extractor, kaldi_type_extractors
 from mo.front.kaldi.loader.loader import load_kaldi_model, read_counts_file
+from mo.graph.graph import Node
+from mo.middle.passes.eliminate import graph_clean_up, remove_const_ops
+from mo.middle.passes.infer import partial_infer
+from mo.pipeline.common import prepare_emit_ir
 from mo.utils import class_registration
 from mo.utils.cli_parser import get_meta_info
 from mo.utils.error import Error
 from mo.utils.find_inputs import find_outputs
-from mo.graph.graph import print_graph_stat, Node, check_empty_graph
-from mo.middle.passes.eliminate import graph_clean_up
-from mo.middle.passes.infer import override_placeholder_shapes, partial_infer, mark_outputs, override_batch
-from mo.pipeline.common import prepare_emit_ir
 from mo.utils.utils import refer_to_faq_msg
 
 
@@ -92,14 +94,13 @@ def apply_biases_to_last_layer(graph, counts):
 
     biases_node = target_node.in_nodes()[2]  # first - input, second - weights, third - biases
     if biases_node.value is not None:
-        biases_node.value = np.subtract(biases_node.value, counts)
+        biases_node.value = np.subtract(biases_node.value, counts)  # pylint: disable=assignment-from-no-return
     else:
         biases_node.value = counts * -1
         biases_node.shape = counts.shape
 
 
-def driver(argv, input_model, output_model_name, outputs, output_dir, scale, placeholder_shapes=None,
-           mean_scale_values=()):
+def driver(argv, input_model, output_model_name, output_dir):
     meta_info = get_meta_info(argv)
 
     EltwiseChecker.enabled = False
@@ -109,51 +110,22 @@ def driver(argv, input_model, output_model_name, outputs, output_dir, scale, pla
     except Exception as e:
         raise Error('Model Optimizer is not able to read Kaldi model {}. '.format(input_model) +
                     refer_to_faq_msg(91)) from e
-    check_empty_graph(graph, 'load_kaldi_nnet_model')
+    graph.check_empty_graph('load_kaldi_nnet_model')
     graph.graph['cmd_params'] = argv
     graph.graph['fw'] = 'kaldi'
-    graph.graph['ir_version'] = 2 if argv.generate_deprecated_IR_V2 else 4
-
+    graph.graph['ir_version'] = 2 if argv.generate_deprecated_IR_V2 else 5
     update_extractors_with_extensions(kaldi_type_extractors)
-
     extract_node_attrs(graph, lambda node: kaldi_extractor(node))
 
+    # --------------------------------- LOAD END ------------------------------------------------------
     class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
 
-    output_op_nodes = add_output_ops(graph, outputs)  # TODO pass real outputs instead of None
-    log.debug("After adding specific nodes for outputs")
-    print_graph_stat(graph)
-
-    check_empty_graph(graph, 'add_output_ops')
-    create_tensor_nodes(graph)
-
-    graph_clean_up(graph)
-    log.debug("After removing specific nodes for output")
-    print_graph_stat(graph)
-
-    override_placeholder_shapes(graph, placeholder_shapes)
-    override_batch(graph, argv.batch)
-
-    graph_clean_up(graph)
-    log.debug("After setting input shapes")
-    print_graph_stat(graph)
-    graph_clean_up(graph)
-    remove_output_ops(graph)
-    log.debug("After removing specific nodes for output")
-    print_graph_stat(graph)
-
-    # You need to pass required network outputs here
-    # but we don't have a way yet, so just passing all discovered sinks
-    mark_outputs(graph)
-    graph_clean_up(graph)
-    log.debug("After graph_cleanup")
-    print_graph_stat(graph)
     graph = partial_infer(graph)
 
     # The order is intentional, firstly eliminate repeated, then remove redundant
     FuseRepeatedReshapes().find_and_replace_pattern(graph)
     EliminateRedundantReshape().find_and_replace_pattern(graph)
-    check_empty_graph(graph, 'partial_infer')
+    graph.check_empty_graph('partial_infer')
     if argv.counts:
         try:
             counts = read_counts_file(argv.counts)
@@ -167,9 +139,15 @@ def driver(argv, input_model, output_model_name, outputs, output_dir, scale, pla
         RemoveLastSoftMaxPattern().find_and_replace_pattern(graph)
         graph_clean_up(graph)
         log.debug("After removing softmax")
-        print_graph_stat(graph)
+        graph.print_graph_stat()
 
     # Intentionally after all transformations
     KaldiRemoveMemoryOutputBackReplacementPattern().find_and_replace_pattern(graph)
+
+    remove_const_ops(graph)
+    CreateConstNodesReplacement().find_and_replace_pattern(graph)
+
+    remove_output_ops(graph)
+
     prepare_emit_ir(graph, argv.data_type, output_dir, output_model_name, meta_info=meta_info)
     return 0
diff --git a/model-optimizer/mo/pipeline/kaldi_test.py b/model-optimizer/mo/pipeline/kaldi_test.py
index 2fe183330..3e08bf3ab 100644
--- a/model-optimizer/mo/pipeline/kaldi_test.py
+++ b/model-optimizer/mo/pipeline/kaldi_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -29,20 +29,17 @@ class TestKaldiPipeline(unittest.TestCase):
                  'weights': {'value': None, 'kind': 'data'},
                  'biases': {'value': np.zeros(10), 'kind': 'data'},
                  'sc': {'op': 'ScaleShift', 'kind': 'op'},
-                 'output': {'kind': 'data'}
+                 'output': {'kind': 'data'},
+                 'op_output': {'op': 'OpOutput', 'kind': 'op'}
                  }
         graph = build_graph(nodes,
                             [
                                 ('input', 'sc'),
                                 ('weights', 'sc'),
                                 ('biases', 'sc'),
-                                ('sc', 'output')
-                            ],
-                            {
-                                'output': {
-                                    'is_output': True
-                                }
-                            })
+                                ('sc', 'output'),
+                                ('output', 'op_output')
+                            ])
         counts = -0.5 * np.ones(10)
         apply_biases_to_last_layer(graph, counts)
         sc_node = Node(graph, 'sc')
@@ -53,20 +50,17 @@ class TestKaldiPipeline(unittest.TestCase):
                  'weights': {'kind': 'data'},
                  'biases': {'value': None, 'shape': None, 'kind': 'data'},
                  'fc': {'op': 'FullyConnected', 'kind': 'op'},
-                 'output': {'kind': 'data'}
+                 'output': {'kind': 'data'},
+                 'op_output': {'op': 'OpOutput', 'kind': 'op'}
                  }
         graph = build_graph(nodes,
                             [
                                 ('input', 'fc'),
                                 ('weights', 'fc'),
                                 ('biases', 'fc'),
-                                ('fc', 'output')
-                            ],
-                            {
-                                'output': {
-                                    'is_output': True
-                                }
-                            })
+                                ('fc', 'output'),
+                                ('output', 'op_output')
+                            ])
         counts = -0.5 * np.ones(10)
         apply_biases_to_last_layer(graph, counts)
         fc_node = Node(graph, 'fc')
@@ -79,7 +73,8 @@ class TestKaldiPipeline(unittest.TestCase):
                  'fc': {'op': 'FullyConnected', 'kind': 'op'},
                  'data': {'kind': 'data'},
                  'softmax': {'op': 'SoftMax', 'kind': 'op'},
-                 'output': {'kind': 'data'}
+                 'output': {'kind': 'data'},
+                 'op_output': {'op': 'OpOutput', 'kind': 'op'}
                  }
         graph = build_graph(nodes,
                             [
@@ -88,13 +83,9 @@ class TestKaldiPipeline(unittest.TestCase):
                                 ('biases', 'fc'),
                                 ('fc', 'data'),
                                 ('data', 'softmax'),
-                                ('softmax', 'output')
-                            ],
-                            {
-                                'output': {
-                                    'is_output': True
-                                }
-                            })
+                                ('softmax', 'output'),
+                                ('output', 'op_output')
+                            ])
         counts = -0.5 * np.ones(10)
         apply_biases_to_last_layer(graph, counts)
         fc_node = Node(graph, 'fc')
diff --git a/model-optimizer/mo/pipeline/mx.py b/model-optimizer/mo/pipeline/mx.py
index 03ac18f4e..e382cd61d 100644
--- a/model-optimizer/mo/pipeline/mx.py
+++ b/model-optimizer/mo/pipeline/mx.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,6 +13,9 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
+from extensions.back.CreateConstNodes import CreateConstNodesReplacement
+from extensions.front.restore_ports import RestorePorts
+from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively
 from mo.utils.error import Error, FrameworkError
 from mo.utils.utils import refer_to_faq_msg
 
@@ -22,31 +25,23 @@ except ImportError:
     raise Error('Module mxnet was not found. Please install appropriate version of mxnet via install_prerequisites '
                 'script.' + refer_to_faq_msg(52))
 
-import logging as log
-
-import numpy as np
 import argparse
-import networkx as nx
 
-from mo.front.extractor import add_output_ops, extract_node_attrs, create_tensor_nodes, \
-    add_input_ops, remove_output_ops, user_data_repack
+from mo.front.extractor import extract_node_attrs, remove_output_ops
 from mo.front.mxnet.extractor import mxnet_op_extractor
 from mo.front.mxnet.loader import symbol2nx, load_symbol_def
 from mo.middle.passes.fusing.decomposition import convert_batch_norm, convert_scale_shift_to_mul_add
 from mo.middle.passes.conv import convert_muladd_to_scaleshift_or_power, \
-    convert_add_to_scaleshift, convert_mul_to_scaleshift, fuse_pad
-from mo.middle.passes.eliminate import graph_clean_up, remove_op_nodes
+    convert_add_or_mul_to_scaleshift, fuse_pad
+from mo.middle.passes.eliminate import graph_clean_up, remove_const_ops
 from mo.middle.passes.fusing.fuse_linear_ops import fuse_linear_ops
 from mo.middle.passes.fusing.fuse_linear_seq import fuse_mul_add_sequence
 from mo.middle.passes.fusing.mark_unfused_nodes import mark_unfused_nodes
-from mo.middle.passes.shared_weights_duplication import duplicate_shared_weights
 from mo.middle.passes.fusing.resnet_optimization import stride_optimization
-from mo.middle.passes.infer import mark_outputs, override_placeholder_shapes, partial_infer, add_mean_scale_values, \
-    scale_input, convert_mul_add_to_power
+from mo.middle.passes.infer import convert_mul_add_to_power
 from mo.middle.passes.mean_scale_values import move_scaleshift_to_preprocess
 from mo.middle.passes.shape import reverse_input_channels
 from mo.pipeline.common import prepare_emit_ir
-from mo.graph.graph import create_edge, Node, print_graph_stat, check_empty_graph
 from mo.front.mxnet.nd_to_params import save_params_file
 from mo.front.common.register_custom_ops import update_extractors_with_extensions
 from mo.front.mxnet.extractor import mxnet_op_extractors
@@ -55,48 +50,7 @@ from mo.utils.cli_parser import get_meta_info
 from extensions.middle.EltwiseInputNormalization import EltwiseInputNormalize
 
 
-def add_input_data_to_prior_boxes(graph: nx.MultiDiGraph, input_names: str = ''):
-    """
-    PriorBox layer has data input unlike mxnet.
-    Need to add data input to _contrib_MultiBoxPrior for
-    for correct conversion to PriorBox layer.
-
-    Parameters
-    ----------
-    graph : nx.MultiDiGraph
-       Graph with loaded model.
-    """
-    if not input_names:
-        input_names = ('data',)
-    else:
-        input_names = input_names.split(',')
-
-    input_nodes = {}
-    for node in graph.nodes():
-        node = Node(graph, node)
-        if node.has_valid('op') and node.name in input_names:
-            input_nodes.update({node.id: node})
-
-    if len(input_nodes) > 0:
-        for node in graph.nodes():
-            node = Node(graph, node)
-            if node.has_valid('op') and node.op == '_contrib_MultiBoxPrior':
-                create_edge(list(input_nodes.values())[0], node, out_port=0, in_port=1)
-
-
-#TODO Remove the func after 'add_output_ops' will be moved to front replacer.
-def check_softmax_node_inputs(graph: nx.MultiDiGraph):
-    for i, attrs in list(graph.nodes(data=True)):
-        if 'op' in attrs and attrs['op'] == 'SoftMax':
-            node = Node(graph, i)
-            if len(node.in_nodes()) > 1:
-                graph.remove_node(node.in_node(1).id)
-
-
-def driver(argv: argparse.Namespace, input_model: str, output_model_name: str, outputs: list, output_dir: str,
-           scale: float,
-           placeholder_shapes: [None, list, np.array] = None,
-           mean_scale_values: [dict, list] = ()):
+def driver(argv: argparse.Namespace, input_model: str, output_model_name: str, output_dir: str):
     meta_info = get_meta_info(argv)
 
     try:
@@ -118,61 +72,20 @@ def driver(argv: argparse.Namespace, input_model: str, output_model_name: str, o
 
     update_extractors_with_extensions(mxnet_op_extractors)
     graph = symbol2nx(model_nodes, model_params, argv.input)
-    check_empty_graph(graph, 'symbol2nx. It may happen due to problems with loaded model')
+    graph.check_empty_graph('symbol2nx. It may happen due to problems with loaded model')
 
     graph.__setattr__('name', output_model_name)
     graph.graph['layout'] = 'NCHW'
     graph.graph['cmd_params'] = argv
     graph.graph['fw'] = 'mxnet'
     graph.graph['feature_dim'] = 1 if graph.graph['layout'] == 'NCHW' else 3
-    graph.graph['ir_version'] = 2 if argv.generate_deprecated_IR_V2 else 4
-    graph = extract_node_attrs(graph, mxnet_op_extractor)
-    check_softmax_node_inputs(graph)
-
-    user_shapes, packed_outputs, _ = user_data_repack(graph, placeholder_shapes, outputs, None)
-    output_op_nodes = add_output_ops(graph, packed_outputs)
-    input_op_nodes = add_input_ops(graph, user_shapes, True)
-
-    try:
-        override_placeholder_shapes(graph, user_shapes, argv.batch)
-    except ValueError as err:
-        raise Error(
-            'The following error happened while processing input shapes: {}. ' +
-            refer_to_faq_msg(54),
-            str(err)
-        ) from err
-    check_empty_graph(graph, 'add_output_ops and add_input_ops')
+    graph.graph['ir_version'] = 2 if argv.generate_deprecated_IR_V2 else 5
+    extract_node_attrs(graph, mxnet_op_extractor)
 
+    # --------------------------------- LOAD END ------------------------------------------------------
     class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
-    add_input_data_to_prior_boxes(graph, argv.input)
-
-    graph = create_tensor_nodes(graph)
-
-    graph_clean_up(graph)
-    remove_output_ops(graph)
-    mark_outputs(graph)
-    remove_output_ops(graph)
-
-    graph_clean_up(graph)
-
-    log.debug("After removing specific nodes for output")
-
-    print_graph_stat(graph)
-
-    graph = partial_infer(graph)
-    graph_clean_up(graph)
-    check_empty_graph(graph, 'partial_infer')
-
-    duplicate_shared_weights(graph)
-
-    scale_input(graph, scale)
-    add_mean_scale_values(graph, mean_scale_values)
-
-    remove_op_nodes(graph, {'identity': True})
-
-    graph_clean_up(graph)
-
     class_registration.apply_replacements(graph, class_registration.ClassType.MIDDLE_REPLACER)
+
     fuse_pad(graph)
 
     # Mark nodes with attr 'can_be_fused': False to disable fusing for specified nodes
@@ -205,8 +118,9 @@ def driver(argv: argparse.Namespace, input_model: str, output_model_name: str, o
     graph_clean_up(graph)
 
     convert_mul_add_to_power(graph)
-    convert_add_to_scaleshift(graph)  # scale = 1
-    convert_mul_to_scaleshift(graph)  # biases = 0
+    graph_clean_up(graph)
+    convert_add_or_mul_to_scaleshift(graph)  # scale = 1
+    graph_clean_up(graph)
 
     if argv.reverse_input_channels:
         reverse_input_channels(graph)
@@ -220,6 +134,11 @@ def driver(argv: argparse.Namespace, input_model: str, output_model_name: str, o
 
     class_registration.apply_replacements(graph, class_registration.ClassType.BACK_REPLACER)
 
+    for_graph_and_each_sub_graph_recursively(graph, remove_const_ops)
+    CreateConstNodesReplacement().find_and_replace_pattern(graph)
+
+    for_graph_and_each_sub_graph_recursively(graph, remove_output_ops)
+
     prepare_emit_ir(graph=graph, data_type=argv.data_type, output_dir=output_dir, output_model_name=output_model_name,
                     meta_info=meta_info)
     return 0
diff --git a/model-optimizer/mo/pipeline/onnx.py b/model-optimizer/mo/pipeline/onnx.py
index 88fd356aa..d41ea4d9b 100644
--- a/model-optimizer/mo/pipeline/onnx.py
+++ b/model-optimizer/mo/pipeline/onnx.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,7 +13,6 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -22,48 +21,40 @@ from __future__ import unicode_literals
 import argparse
 import logging as log
 
-import numpy as np
-
+from extensions.back.CreateConstNodes import CreateConstNodesReplacement
+from extensions.middle.AddQuantizeFuse import AddQuantizeFuse
 from extensions.middle.EltwiseInputNormalization import EltwiseInputNormalize
-from extensions.middle.NormalizeFullyConnected import NormalizeFullyConnected
-from mo.front.common.register_custom_ops import check_for_duplicates
-from mo.front.common.register_custom_ops import update_extractors_with_extensions
-from mo.front.extractor import add_output_ops, add_input_ops, \
-    extract_node_attrs, create_tensor_nodes, remove_output_ops, user_data_repack
-from mo.front.onnx.extractor import common_onnx_fields, onnx_op_extractor, onnx_op_extractors
+from extensions.middle.MulQuantizeFuse import MulQuantizeFuse
+from mo.front.common.register_custom_ops import update_extractors_with_extensions, check_for_duplicates
+from mo.front.extractor import extract_node_attrs, remove_output_ops
+from mo.front.onnx.extractor import onnx_op_extractor, onnx_op_extractors
 from mo.front.onnx.loader import load_onnx_model, protobuf2nx
-from mo.middle.passes.conv import convert_add_to_scaleshift, convert_gemm_to_fully_connected, \
-    convert_muladd_to_scaleshift_or_power, fuse_pad, convert_dilated_convolution, convert_mul_to_scaleshift
-from mo.middle.passes.eliminate import graph_clean_up, remove_op_nodes, remove_useless_split
+from mo.middle.passes.conv import convert_add_or_mul_to_scaleshift, convert_muladd_to_scaleshift_or_power, fuse_pad
+from mo.middle.passes.eliminate import graph_clean_up_onnx, remove_const_ops
 from mo.middle.passes.fusing.decomposition import convert_batch_norm, convert_scale_shift_to_mul_add
 from mo.middle.passes.fusing.fuse_grouped_conv import grouped_convolutions_fusing
 from mo.middle.passes.fusing.fuse_linear_ops import fuse_linear_ops
 from mo.middle.passes.fusing.fuse_linear_seq import fuse_mul_add_sequence
 from mo.middle.passes.fusing.mark_unfused_nodes import mark_unfused_nodes
-from mo.middle.passes.infer import scale_input, override_placeholder_shapes, partial_infer, convert_mul_add_to_power, \
-    update_fully_connected_shapes, add_mean_scale_values, override_batch
+from mo.middle.passes.infer import convert_mul_add_to_power
 from mo.middle.passes.mean_scale_values import move_scaleshift_to_preprocess
 from mo.middle.passes.shape import convert_reshape, reverse_input_channels, \
     fuse_sequence_of_reshapes, merge_nodes_permutations, permute_data_nodes_attrs, permute_op_nodes_attrs
+from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively
 from mo.pipeline.common import prepare_emit_ir
 from mo.utils import class_registration
 from mo.utils.cli_parser import get_meta_info
 from mo.utils.error import Error
 from mo.utils.utils import refer_to_faq_msg
-from mo.graph.graph import check_empty_graph
-
 
-def driver(argv: argparse.Namespace, model_file_name: str, output_model_name: str, outputs: list, output_dir: str,
-           scale: float,
-           user_shapes: [None, list, np.array] = None,
-           mean_scale_values: [dict, list] = ()):
 
+def driver(argv: argparse.Namespace, model_file_name: str, output_model_name: str, output_dir: str):
     meta_info = get_meta_info(argv)
 
     model_proto = load_onnx_model(model_file_name)
     model_graph = model_proto.graph  # pylint: disable=no-member
-    #print(model_graph)
-    #assert len(model_graph) == 1, "An ONNX model contains more than 1 graph: unsupported"
+    # print(model_graph)
+    # assert len(model_graph) == 1, "An ONNX model contains more than 1 graph: unsupported"
     log.debug("Number of nodes in graph_def: {}".format(len(model_graph.node)))
     log.debug("Number of all input ports (not true inputs) in graph_def: {}".format(len(model_graph.input)))
     log.debug("Number of initializers in graph_def: {}".format(len(model_graph.initializer)))
@@ -73,15 +64,13 @@ def driver(argv: argparse.Namespace, model_file_name: str, output_model_name: st
     try:
         graph = protobuf2nx(model_proto)
         log.debug("Number of nodes in NX graph: {}".format(graph.number_of_nodes()))
-        graph.__setattr__('name', output_model_name if output_model_name else model_proto.graph.name)  # pylint: disable=no-member
+        graph.__setattr__('name',
+                          output_model_name if output_model_name else model_proto.graph.name)  # pylint: disable=no-member
         graph.graph['layout'] = 'NCHW'
         graph.graph['cmd_params'] = argv
         graph.graph['fw'] = 'onnx'
         graph.graph['feature_dim'] = 1 if graph.graph['layout'] == 'NCHW' else 3
-        graph.graph['ir_version'] = 2 if argv.generate_deprecated_IR_V2 else 4
-        # extract basic attributes earlier to enable some passes that relies on them before full attribute
-        # extractor is called
-        extract_node_attrs(graph, lambda node: (True, common_onnx_fields(node)))
+        graph.graph['ir_version'] = 2 if argv.generate_deprecated_IR_V2 else 5
     except Exception as e:
         raise Error(
             'Cannot pre-process ONNX graph after reading from model file "{}". ' \
@@ -90,59 +79,15 @@ def driver(argv: argparse.Namespace, model_file_name: str, output_model_name: st
             model_file_name,
             str(e)
         ) from e
-    check_empty_graph(graph, 'protobuf2nx. It may happen due to problems with loaded model')
-    packed_user_shapes, packed_outputs, _ = user_data_repack(graph, user_shapes, outputs, None)
-
-    output_op_nodes = add_output_ops(graph, packed_outputs)
-    input_op_nodes = add_input_ops(graph, packed_user_shapes, True)
-
-    # this call of 'graph_clean_up' removes child nodes of outputs which is useful when custom output is specified
-    graph_clean_up(graph)
-    check_empty_graph(graph, 'add_output_ops and add_input_ops')
+    graph.check_empty_graph('protobuf2nx. It may happen due to problems with loaded model')
     extract_node_attrs(graph, lambda node: onnx_op_extractor(node, check_for_duplicates(onnx_op_extractors)))
 
+    # --------------------------------- LOAD END ------------------------------------------------------
     class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
-
-    create_tensor_nodes(graph)
-    graph_clean_up(graph)
-
-    override_placeholder_shapes(graph, packed_user_shapes)
-    override_batch(graph, argv.batch)
-
-    graph_clean_up(graph)
-    remove_op_nodes(graph, {'op': 'Identity'})
-
-    graph_clean_up(graph)
-
-    remove_output_ops(graph)
-
-    partial_infer(graph)
-    graph_clean_up(graph)
-    check_empty_graph(graph, 'partial_infer')
-
-    input_op_nodes = add_input_ops(graph, packed_user_shapes, False)
-    graph_clean_up(graph)
-    check_empty_graph(graph, 'add_input_ops')
-    #change_placeholders_types_to_FP32(graph)
-
-    scale_input(graph, scale)
-    add_mean_scale_values(graph, mean_scale_values)
-
-    convert_dilated_convolution(graph)
-    graph_clean_up(graph)
-
-    graph_clean_up(graph)
-
-    remove_op_nodes(graph, {'op': 'Identity'})
-    remove_useless_split(graph)
-
     class_registration.apply_replacements(graph, class_registration.ClassType.MIDDLE_REPLACER)
 
-    convert_gemm_to_fully_connected(graph)
-    NormalizeFullyConnected().find_and_replace_pattern(graph)
-
     fuse_pad(graph)
-    graph_clean_up(graph)
+    graph_clean_up_onnx(graph)
 
     # Mark nodes with attr 'can_be_fused': False to disable fusing for specified nodes
     mark_unfused_nodes(graph, argv.finegrain_fusing)
@@ -150,50 +95,54 @@ def driver(argv: argparse.Namespace, model_file_name: str, output_model_name: st
     # Converting FusedBatchNorm layer to Mul->Add->Mul->Add sequence
     # IE doesn't support BN with 4 inputs, so we have to split it to two ScaleShift
     convert_batch_norm(graph)
-    graph_clean_up(graph)
+    graph_clean_up_onnx(graph)
 
     if not argv.disable_fusing:
         # Converting ScaleShift layer to Mul->Add
         convert_scale_shift_to_mul_add(graph)
-        graph_clean_up(graph)
+        graph_clean_up_onnx(graph)
 
         # Fusing the sequences of Mul/Add operations
         fuse_mul_add_sequence(graph)
-        graph_clean_up(graph)
+        graph_clean_up_onnx(graph)
 
         # Fusing linear operation to Convolution
         fuse_linear_ops(graph)
-        graph_clean_up(graph)
+        graph_clean_up_onnx(graph)
 
     if not argv.disable_gfusing:
         grouped_convolutions_fusing(graph)
-        graph_clean_up(graph)
+        graph_clean_up_onnx(graph)
         if not argv.disable_fusing:
             fuse_linear_ops(graph)
-            graph_clean_up(graph)
+            graph_clean_up_onnx(graph)
+
+    AddQuantizeFuse().find_and_replace_pattern(graph)
+    MulQuantizeFuse().find_and_replace_pattern(graph)
 
     convert_muladd_to_scaleshift_or_power(graph)
-    graph_clean_up(graph)
+    graph_clean_up_onnx(graph)
 
     convert_mul_add_to_power(graph)
-    graph_clean_up(graph)
+    graph_clean_up_onnx(graph)
 
     convert_reshape(graph)
-    convert_add_to_scaleshift(graph)  # scale = 1
-    convert_mul_to_scaleshift(graph)  # biases = 0
+    graph_clean_up_onnx(graph)
+    convert_add_or_mul_to_scaleshift(graph)  # scale = 1
+    graph_clean_up_onnx(graph)
 
     fuse_pad(graph)
-    graph_clean_up(graph)
+    graph_clean_up_onnx(graph)
 
     if argv.reverse_input_channels:
         reverse_input_channels(graph)
 
     if argv.move_to_preprocess:
         move_scaleshift_to_preprocess(graph)
-        graph_clean_up(graph)
+        graph_clean_up_onnx(graph)
 
     fuse_sequence_of_reshapes(graph)
-    graph_clean_up(graph)
+    graph_clean_up_onnx(graph)
 
     pattern = EltwiseInputNormalize()
     pattern.find_and_replace_pattern(graph)
@@ -204,6 +153,12 @@ def driver(argv: argparse.Namespace, model_file_name: str, output_model_name: st
 
     class_registration.apply_replacements(graph, class_registration.ClassType.BACK_REPLACER)
 
+    for_graph_and_each_sub_graph_recursively(graph, remove_const_ops)
+
+    CreateConstNodesReplacement().find_and_replace_pattern(graph)
+
+    for_graph_and_each_sub_graph_recursively(graph, remove_output_ops)
+
     prepare_emit_ir(graph=graph, data_type=argv.data_type, output_dir=output_dir, output_model_name=output_model_name,
                     meta_info=meta_info)
 
diff --git a/model-optimizer/mo/pipeline/tf.py b/model-optimizer/mo/pipeline/tf.py
index f6e1503ea..07dc07f6e 100644
--- a/model-optimizer/mo/pipeline/tf.py
+++ b/model-optimizer/mo/pipeline/tf.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,96 +15,50 @@
 """
 
 import argparse
-import copy
 import logging as log
 
-import networkx as nx
-import numpy as np
 import tensorflow as tf
 
+from extensions.back.CreateConstNodes import CreateConstNodesReplacement
+from extensions.middle.LayoutChangeForConstantShapePaths import LayoutChangeForConstantShapePaths
+from extensions.middle.ConcatOptimization import ConcatOptimization
+
 try:
     import tensorflow.contrib
 except:
     pass  # we try to import contrib for loading models that use contrib operations
 
-import mo.front.tf.custom_subgraph_call as csc
-from extensions.front.freeze_placeholder_value import FreezePlaceholderValue
-from extensions.front.tf.basic_lstm_cell import BasicLSTMCell
-from extensions.middle.AddIsCyclicAttribute import AddIsCyclicAttribute
 from extensions.middle.EltwiseInputNormalization import EltwiseInputNormalize
-from extensions.middle.GemmResolver import GemmResolver
-from extensions.middle.TensorIteratorBackEdge import BackEdgesMatching
-from extensions.middle.TensorIteratorCondition import LoopConditionMatcher, \
-    SimpleConditionMather  # SimpleConditionMather
-from extensions.middle.TensorIteratorConditionChecker import ConditionChecks
-from extensions.middle.TensorIteratorInput import SmartInputMatcher, SimpleInputMatcher, BackEdgeSimpleInputMatcher
-from extensions.middle.TensorIteratorMerge import TensorIteratorMerge
-from extensions.middle.TensorIteratorOutput import SmartOutputMatcher
-from extensions.middle.TensorIterator_utils import DeleteSelect
-from mo.front.common.custom_replacement_registry import CustomReplacementRegistry
-from mo.front.common.find_unsupported_ops import find_unsupported_ops
+from mo.middle.passes.eliminate import remove_const_ops
 from mo.front.common.register_custom_ops import check_for_duplicates
 from mo.front.common.register_custom_ops import update_extractors_with_extensions
-from mo.front.extractor import restore_edges, add_output_ops, add_input_ops, \
-    extract_node_attrs, create_tensor_nodes, remove_output_ops, user_data_repack, remove_control_dependency_inputs
-from mo.front.tf.change_placeholder_type import change_placeholders_types_to_FP32
-from mo.front.tf.extractor import get_tf_edges, common_tf_fields, tf_op_extractor, tf_op_extractors
-from mo.front.tf.loader import load_tf_graph_def, protobuf2nx, variables_to_constants
-from mo.front.tf.register_custom_ops import update_registration
-from mo.front.tf.replacement import FrontReplacementFromConfigFileOp
-from mo.graph.graph import check_empty_graph
-from mo.middle.passes.conv import convert_add_to_scaleshift, convert_matmul_to_fully_connected, \
-    convert_muladd_to_scaleshift_or_power, fuse_pad, transpose_fully_connected_weights, \
-    convert_dilated_convolution, convert_mul_to_scaleshift, convert_nasnet
-from mo.middle.passes.eliminate import remove_op_nodes, remove_useless_split, graph_clean_up_tf
+from mo.front.extractor import restore_edges, extract_node_attrs, remove_output_ops, remove_control_dependency_inputs
+from mo.front.tf.extractor import get_tf_edges, tf_op_extractor, tf_op_extractors
+from mo.front.tf.loader import load_tf_graph_def, protobuf2nx
+from mo.middle.passes.conv import convert_add_or_mul_to_scaleshift, convert_matmul_to_fully_connected, \
+    convert_muladd_to_scaleshift_or_power, fuse_pad, transpose_fully_connected_weights
+from mo.middle.passes.eliminate import graph_clean_up_tf
 from mo.middle.passes.fusing.decomposition import convert_batch_norm, convert_scale_shift_to_mul_add
 from mo.middle.passes.fusing.fuse_grouped_conv import grouped_convolutions_fusing
 from mo.middle.passes.fusing.fuse_linear_ops import fuse_linear_ops
 from mo.middle.passes.fusing.fuse_linear_seq import fuse_mul_add_sequence
 from mo.middle.passes.fusing.mark_unfused_nodes import mark_unfused_nodes
-from mo.middle.passes.infer import scale_input, override_placeholder_shapes, partial_infer, convert_mul_add_to_power, \
-    update_fully_connected_shapes, add_mean_scale_values, override_batch, check_for_cycle, delete_not_executable, \
-    delete_control_flow_edges
-from mo.middle.passes.l2normalization import l2_norm_to_norm
+from mo.middle.passes.infer import convert_mul_add_to_power, update_fully_connected_shapes
 from mo.middle.passes.leaky_relu import convert_mul_eltwise_to_leaky_relu
 from mo.middle.passes.mean_scale_values import move_scaleshift_to_preprocess
-from mo.middle.passes.pool import mean_to_avgpool
 from mo.middle.passes.shape import convert_squeeze, convert_reshape, reverse_input_channels, \
     conv_flatten_concat, fuse_sequence_of_reshapes, repack_fully_connected_weights_nhwc_to_nchw, \
     apply_nhwc_to_nchw_permutation, permute_data_nodes_attrs, permute_op_nodes_attrs, merge_nodes_permutations
-from mo.middle.passes.shared_weights_duplication import duplicate_shared_weights
-from mo.middle.pattern_match import for_each_sub_graph, for_graph_and_each_sub_graph_recursively
+from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively
 from mo.pipeline.common import prepare_emit_ir
 from mo.utils import class_registration, tensorboard
 from mo.utils.cli_parser import get_meta_info
-from mo.utils.custom_replacement_config import update_custom_replacement_config_file
 from mo.utils.error import Error
 from mo.utils.utils import refer_to_faq_msg
 
 
-def need_to_repeat_conversion(graph: nx.MultiDiGraph):
-    """ Detects if another round of conversion is required for the entire graph.
-
-        It traverses a given `graph` and all sub-graphs recursively and searches for
-        'repeat_conversion' graph attribute. If at least one is found and its value is True,
-        this function returns True.
-    """
-    result = False
-
-    def check_for_repeat(graph: nx.MultiDiGraph):
-        if 'repeat_conversion' in graph.graph and graph.graph['repeat_conversion']:
-            nonlocal result
-            result = True
-
-    for_graph_and_each_sub_graph_recursively(graph, check_for_repeat)
-
-    return result
-
-
-def tf2nx(argv: argparse.Namespace, model_file_name: str, output_model_name: str, outputs: list, output_dir: str,
-          scale: float, is_binary: bool,
-          user_shapes: [None, list, np.array] = None,
-          mean_scale_values: [dict, list] = ()):
+def tf2nx(argv: argparse.Namespace, model_file_name: str, output_model_name: str, output_dir: str,
+          is_binary: bool):
     """
     Convert TF GraphDef object to NetworkX representation.
     The resulting graph is still TF-specific and needs normalization passes to be applied.
@@ -121,7 +75,7 @@ def tf2nx(argv: argparse.Namespace, model_file_name: str, output_model_name: str
 
     graph_def, variables_values = load_tf_graph_def(graph_file_name=model_file_name, is_binary=is_binary,
                                                     checkpoint=argv.input_checkpoint,
-                                                    user_output_node_names_list=outputs,
+                                                    user_output_node_names_list=argv.output,
                                                     model_dir=argv.saved_model_dir,
                                                     meta_graph_file=argv.input_meta_graph,
                                                     saved_model_tags=argv.saved_model_tags)
@@ -150,25 +104,13 @@ def tf2nx(argv: argparse.Namespace, model_file_name: str, output_model_name: str
         graph.graph['layout'] = 'NCHW' if argv.disable_nhwc_to_nchw else 'NHWC'
         graph.graph['cmd_params'] = argv
         graph.graph['fw'] = 'tf'
-        graph.graph['ir_version'] = 2 if argv.generate_deprecated_IR_V2 else 4
-
-        if graph.graph['ir_version'] == 2:
-            # When the deprecated IR version was requested,
-            # we configure only those phases that can lead to
-            # functional regressions in the version 2.
-            # BasicLSTMCell is one such transformation; when it is turned off,
-            # the body of TF basic_lstm_cell is converted as-is in a decomposed form,
-            # and should work in version 2.
-            BasicLSTMCell.enabled = False
+        graph.graph['ir_version'] = 2 if argv.generate_deprecated_IR_V2 else 5
 
-        # placeholder for request from a transformation pass to repeat the entire conversion
-        graph.graph['repeat_conversion'] = False
+        graph.graph['variables_values'] = variables_values
+        del variables_values
 
         graph = restore_edges(graph, get_tf_edges)
         graph = remove_control_dependency_inputs(graph)
-        # extract basic attributes earlier to enable some passes that relies on them before full attribute
-        # extractor is called
-        extract_node_attrs(graph, lambda node: (True, common_tf_fields(node)))
     except Exception as e:
         raise Error(
             'Cannot pre-process TensorFlow graph after reading from model file "{}". ' \
@@ -178,257 +120,109 @@ def tf2nx(argv: argparse.Namespace, model_file_name: str, output_model_name: str
             str(e)
         ) from e
 
-    check_empty_graph(graph, 'protobuf2nx. It may happen due to problems with loaded model')
+    graph.check_empty_graph('protobuf2nx. It may happen due to problems with loaded model')
+    extract_node_attrs(graph, lambda node: tf_op_extractor(node, check_for_duplicates(tf_op_extractors)))
 
-    packed_user_shapes, packed_outputs, freeze_placeholder = user_data_repack(graph, user_shapes, outputs,
-                                                                              argv.freeze_placeholder_with_value)
-    if freeze_placeholder is not None:
-        FreezePlaceholderValue.enabled = True
-        FreezePlaceholderValue.replacement_dict = freeze_placeholder
-        update_registration()
+    # --------------------------------- LOAD END ------------------------------------------------------
+    class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
+    class_registration.apply_replacements(graph, class_registration.ClassType.MIDDLE_REPLACER)
 
-    GemmResolver.enabled = False
-
-    inputs = list(packed_user_shapes.keys()) if packed_user_shapes is not None and isinstance(packed_user_shapes,
-                                                                                              dict) else None
-    graph.graph['inputs'] = inputs  # save user defined inputs for other extensions
-
-    output_op_nodes = add_output_ops(graph, packed_outputs, inputs=packed_user_shapes)
-    input_op_nodes = add_input_ops(graph, packed_user_shapes, True)
-
-    # this call of 'graph_clean_up' removes child nodes of outputs which is useful when custom output is specified
+    fuse_pad(graph)
     graph_clean_up_tf(graph)
 
-    check_empty_graph(graph, 'add_output_ops and add_input_ops. It may happen due to absence of \'Placeholder\' layer '
-                             'in the model')
-
-    variables_to_constants(graph, variables_values)
-    del variables_values
-    graph_clean_up_tf(graph)
-
-    if argv.tensorflow_custom_operations_config_update:
-        if update_custom_replacement_config_file(graph, argv.tensorflow_custom_operations_config_update):
-            return 0
-        else:
-            return 1
-
-    unsupported_ops_to_offload_to_tf = list()
-
-    MAX_ITERATIONS = 5
-    cur_iteration = 0
-    while cur_iteration < MAX_ITERATIONS:
-        graph_copy = copy.deepcopy(graph)  # create a copy of graph for the case when some ops are unsupported
-
-        if argv.tensorflow_subgraph_patterns is not None:
-            csc.replace_subgraph_calls(graph, argv.tensorflow_subgraph_patterns)
-
-        if argv.tensorflow_operation_patterns is not None:
-            csc.offload_operations_to_tf(graph, argv.tensorflow_operation_patterns)
-
-        if argv.offload_unsupported_operations_to_tf and len(unsupported_ops_to_offload_to_tf):
-            csc.offload_unsupported_operations_to_tf(graph, unsupported_ops_to_offload_to_tf)
-
-        extract_node_attrs(graph, lambda node: tf_op_extractor(node, check_for_duplicates(tf_op_extractors)))
-
-        if argv.tensorflow_use_custom_operations_config is not None:
-            registry = CustomReplacementRegistry()
-            registry.add_custom_replacement_description_from_config(argv.tensorflow_use_custom_operations_config)
-
-            # automatically generate sub-classes for custom replacements that replace sub-graph with a single node
-            for replacement_desc in registry.get_all_replacements_descriptions():
-                if replacement_desc.has('op'):
-                    type('FrontReplacementFromConfigFileOp' + replacement_desc.op, (FrontReplacementFromConfigFileOp,),
-                         {'replacement_id': replacement_desc.id})
-            update_registration()
-
-        override_placeholder_shapes(graph, packed_user_shapes)
-
-        # the user shapes are used to convert TensorFlow Object Detection API models
-        graph.graph['user_shapes'] = packed_user_shapes
-        class_registration.apply_replacements(graph, class_registration.ClassType.FRONT_REPLACER)
-
-        override_batch(graph, argv.batch)
-
-        create_tensor_nodes(graph)
-        graph_clean_up_tf(graph)
-
-        remove_output_ops(graph)
-        partial_infer(graph)
-        delete_control_flow_edges(graph)
-
-        replacer = AddIsCyclicAttribute()
-        replacer.find_and_replace_pattern(graph)
-
-        # TENSOR ITERATOR CREATING BEGINS
-        if graph.graph['is_cyclic']:
-            replacer = DeleteSelect()
-            replacer.find_and_replace_pattern(graph)
-
-            replacer = SmartInputMatcher()
-            replacer.find_and_replace_pattern(graph)
-
-            replacer = SmartOutputMatcher()
-            replacer.find_and_replace_pattern(graph)
-
-            replacer = LoopConditionMatcher()
-            replacer.find_and_replace_pattern(graph)
-
-            replacer = SimpleConditionMather()
-            replacer.find_and_replace_pattern(graph)
-
-            replacer = BackEdgesMatching()
-            replacer.find_and_replace_pattern(graph)
+    convert_matmul_to_fully_connected(graph)
 
-            replacer = ConditionChecks()
-            replacer.find_and_replace_pattern(graph)
+    # Mark nodes with attr 'can_be_fused': False to disable fusing for specified nodes
+    for_graph_and_each_sub_graph_recursively(graph, lambda graph: mark_unfused_nodes(graph, argv.finegrain_fusing))
 
-        delete_not_executable(graph)
-        graph_clean_up_tf(graph)
-        if graph.graph['is_cyclic']:
-            replacer = SimpleInputMatcher()
-            replacer.find_and_replace_pattern(graph)
-
-            replacer = BackEdgeSimpleInputMatcher()
-            replacer.find_and_replace_pattern(graph)
-
-            # Here will be optimizing path (ops after Enter and before body take out of body)
-
-            replacer = TensorIteratorMerge()
-            replacer.find_and_replace_pattern(graph)
-        # TENSOR ITERATOR CREATING ENDS
-
-        check_for_cycle(graph)
+    # Converting FusedBatchNorm layer to Mul->Add->Mul->Add sequence
+    # IE doesn't support BN with 4 inputs, so we have to split it to two ScaleShift
+    convert_batch_norm(graph)
+    graph_clean_up_tf(graph)
 
+    if not argv.disable_fusing:
+        # Converting ScaleShift layer to Mul->Add
+        for_graph_and_each_sub_graph_recursively(graph, convert_scale_shift_to_mul_add)
         for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
-        check_empty_graph(graph, 'partial_infer')
-
-        csc.prepare_tf_call_nodes(graph)
-        graph_clean_up_tf(graph)
-
-        duplicate_shared_weights(graph)
 
-        input_op_nodes = add_input_ops(graph, packed_user_shapes, False)
-        graph_clean_up_tf(graph)
-        check_empty_graph(graph, 'add_input_ops')
-
-        change_placeholders_types_to_FP32(graph)
-
-        scale_input(graph, scale)
-        add_mean_scale_values(graph, mean_scale_values)
-
-        convert_dilated_convolution(graph)
+        # Fusing the sequences of Mul/Add operations
+        for_graph_and_each_sub_graph_recursively(graph, fuse_mul_add_sequence)
         for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
-        l2_norm_to_norm(graph)
-        graph_clean_up_tf(graph)
-
-        remove_op_nodes(graph, {'identity': True})
-        remove_useless_split(graph)
-
-        class_registration.apply_replacements(graph, class_registration.ClassType.MIDDLE_REPLACER)
-
-        mean_to_avgpool(graph)
-        convert_nasnet(graph)
+        # Fusing linear operation to Convolution
+        for_graph_and_each_sub_graph_recursively(graph, fuse_linear_ops)
+        for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
-        fuse_pad(graph)
+    if not argv.disable_gfusing:
+        grouped_convolutions_fusing(graph)
         graph_clean_up_tf(graph)
+        if not argv.disable_fusing:
+            fuse_linear_ops(graph)
+            graph_clean_up_tf(graph)
 
-        convert_matmul_to_fully_connected(graph)
+    # Converting Mul->Add to ScaleShift node
+    for_graph_and_each_sub_graph_recursively(graph, convert_muladd_to_scaleshift_or_power)
+    for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
-        # Mark nodes with attr 'can_be_fused': False to disable fusing for specified nodes
-        for_graph_and_each_sub_graph_recursively(graph, lambda graph: mark_unfused_nodes(graph, argv.finegrain_fusing))
+    for_graph_and_each_sub_graph_recursively(graph, convert_mul_add_to_power)
 
-        # Converting FusedBatchNorm layer to Mul->Add->Mul->Add sequence
-        # IE doesn't support BN with 4 inputs, so we have to split it to two ScaleShift
-        convert_batch_norm(graph)
-        graph_clean_up_tf(graph)
-
-        if not argv.disable_fusing:
-            # Converting ScaleShift layer to Mul->Add
-            for_graph_and_each_sub_graph_recursively(graph, convert_scale_shift_to_mul_add)
-            for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
+    # Need to eliminate dead nodes before doing update_fully_connected_shapes
+    # because update_fully_connected_shapes does partial inference and dead
+    # nodes will lead to sporadic failures.
+    for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
+    for_graph_and_each_sub_graph_recursively(graph, update_fully_connected_shapes)
 
-            # Fusing the sequences of Mul/Add operations
-            for_graph_and_each_sub_graph_recursively(graph, fuse_mul_add_sequence)
-            for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
+    for_graph_and_each_sub_graph_recursively(graph, convert_mul_eltwise_to_leaky_relu)
+    graph_clean_up_tf(graph)
+    for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
-            # Fusing linear operation to Convolution
-            for_graph_and_each_sub_graph_recursively(graph, fuse_linear_ops)
-            for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
+    for_graph_and_each_sub_graph_recursively(graph, fuse_pad)
+    for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
-        if not argv.disable_gfusing:
-            grouped_convolutions_fusing(graph)
-            graph_clean_up_tf(graph)
-            if not argv.disable_fusing:
-                fuse_linear_ops(graph)
-                graph_clean_up_tf(graph)
+    for_graph_and_each_sub_graph_recursively(graph, convert_reshape)
+    for_graph_and_each_sub_graph_recursively(graph, convert_squeeze)
 
-        # Converting Mul->Add to ScaleShift node
-        for_graph_and_each_sub_graph_recursively(graph, convert_muladd_to_scaleshift_or_power)
-        for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
+    for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
-        for_graph_and_each_sub_graph_recursively(graph, convert_mul_add_to_power)
+    for_graph_and_each_sub_graph_recursively(graph, convert_add_or_mul_to_scaleshift)  # scale = 1
+    for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
-        # Need to eliminate dead nodes before doing update_fully_connected_shapes
-        # because update_fully_connected_shapes does partial inference and dead
-        # nodes will lead to sporadic failures.
-        for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
-        for_graph_and_each_sub_graph_recursively(graph, update_fully_connected_shapes)
+    if argv.reverse_input_channels:
+        reverse_input_channels(graph)
 
-        for_graph_and_each_sub_graph_recursively(graph, convert_mul_eltwise_to_leaky_relu)
+    if argv.move_to_preprocess:
+        move_scaleshift_to_preprocess(graph)
         graph_clean_up_tf(graph)
-        for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
-
-        for_graph_and_each_sub_graph_recursively(graph, fuse_pad)
-        for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
-
-        for_graph_and_each_sub_graph_recursively(graph, convert_reshape)
-        for_graph_and_each_sub_graph_recursively(graph, convert_squeeze)
 
-        for_graph_and_each_sub_graph_recursively(graph, convert_add_to_scaleshift)  # scale = 1
-        for_graph_and_each_sub_graph_recursively(graph, convert_mul_to_scaleshift)  # biases = 0
+    fuse_sequence_of_reshapes(graph)
 
-        if argv.reverse_input_channels:
-            reverse_input_channels(graph)
+    pattern = EltwiseInputNormalize()
+    pattern.find_and_replace_pattern(graph)
 
-        if argv.move_to_preprocess:
-            move_scaleshift_to_preprocess(graph)
-            graph_clean_up_tf(graph)
+    conv_flatten_concat(graph)
 
-        for_graph_and_each_sub_graph_recursively(graph, fuse_sequence_of_reshapes)
+    if argv.enable_concat_optimization:
+        ConcatOptimization().find_and_replace_pattern(graph)
 
-        pattern = EltwiseInputNormalize()
-        pattern.find_and_replace_pattern(graph)
+    LayoutChangeForConstantShapePaths().find_and_replace_pattern(graph)
+    for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
-        conv_flatten_concat(graph)
+    for_graph_and_each_sub_graph_recursively(graph, apply_nhwc_to_nchw_permutation)
+    for_graph_and_each_sub_graph_recursively(graph, merge_nodes_permutations)
+    for_graph_and_each_sub_graph_recursively(graph, permute_data_nodes_attrs)
+    for_graph_and_each_sub_graph_recursively(graph, permute_op_nodes_attrs)
 
-        for_graph_and_each_sub_graph_recursively(graph, apply_nhwc_to_nchw_permutation)
-        for_graph_and_each_sub_graph_recursively(graph, merge_nodes_permutations)
-        for_graph_and_each_sub_graph_recursively(graph, permute_data_nodes_attrs)
-        for_graph_and_each_sub_graph_recursively(graph, permute_op_nodes_attrs)
+    for_graph_and_each_sub_graph_recursively(graph, repack_fully_connected_weights_nhwc_to_nchw)
+    for_graph_and_each_sub_graph_recursively(graph, transpose_fully_connected_weights)
 
-        for_graph_and_each_sub_graph_recursively(graph, repack_fully_connected_weights_nhwc_to_nchw)
-        for_graph_and_each_sub_graph_recursively(graph, transpose_fully_connected_weights)
+    for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
 
-        for_graph_and_each_sub_graph_recursively(graph, graph_clean_up_tf)
+    class_registration.apply_replacements(graph, class_registration.ClassType.BACK_REPLACER)
 
-        if argv.offload_unsupported_operations_to_tf:
-            unsupported_ops_to_offload_to_tf = find_unsupported_ops(graph)
-            if len(unsupported_ops_to_offload_to_tf) == 0:
-                log.info('All operations are supported! Exit from the loop.')
-                if not need_to_repeat_conversion(graph):
-                    break
-            else:
-                print('After {} iteration there are {} unsupported ops'.format(cur_iteration + 1,
-                                                                               len(unsupported_ops_to_offload_to_tf)))
-        else:
-            if not need_to_repeat_conversion(graph):
-                break
-
-        graph = graph_copy
-        cur_iteration += 1
+    for_graph_and_each_sub_graph_recursively(graph, remove_const_ops)
+    CreateConstNodesReplacement().find_and_replace_pattern(graph)
 
-    class_registration.apply_replacements(graph, class_registration.ClassType.BACK_REPLACER)
+    for_graph_and_each_sub_graph_recursively(graph, remove_output_ops)
 
     prepare_emit_ir(graph=graph, data_type=argv.data_type, output_dir=output_dir, output_model_name=output_model_name,
                     meta_info=meta_info)
diff --git a/model-optimizer/mo/utils/class_registration.py b/model-optimizer/mo/utils/class_registration.py
index 8d4c834bc..0296c1d95 100644
--- a/model-optimizer/mo/utils/class_registration.py
+++ b/model-optimizer/mo/utils/class_registration.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -15,17 +15,55 @@
 """
 
 import logging as log
+import os
 from enum import Enum
 
 import networkx as nx
 
+from mo.graph.graph import Graph
+from mo.middle.passes.eliminate import graph_clean_up_tf, graph_clean_up_onnx, graph_clean_up
+from mo.middle.pattern_match import for_graph_and_each_sub_graph_recursively
 from mo.utils.error import Error
 from mo.utils.utils import refer_to_faq_msg
-from mo.graph.graph import check_empty_graph
 
 _registered_classes_dict = {}
 
 
+def _check_unique_ids():
+    """
+    Check that idxs is unique for all registered replacements.
+    """
+    unique_idxs = set()
+    for class_type, classes_set in _registered_classes_dict.items():
+        for cls in classes_set:
+            replacers = [c for c in cls.registered_cls if not hasattr(c, 'op')] + \
+                        [c for op, c in cls.registered_ops.items() if c]
+            for replacer_cls in replacers:
+                if hasattr(replacer_cls, 'id'):
+                    id_cls = getattr(replacer_cls, 'id')
+
+                    if id_cls in unique_idxs:
+                        raise Error('Found replacer {} with not unique id!'.format(replacer_cls))
+                    unique_idxs.add(id_cls)
+    log.debug("All replacers has unique idxs.")
+
+
+def get_enabled_and_disabled_transforms():
+    """
+    :return: tuple of lists with force enabled and disabled id of transformations.
+    """
+    disabled_transforms = os.environ['MO_DISABLED_TRANSFORMS'] if 'MO_DISABLED_TRANSFORMS' in os.environ else ''
+    enabled_transforms = os.environ['MO_ENABLED_TRANSFORMS'] if 'MO_ENABLED_TRANSFORMS' in os.environ else ''
+
+    assert isinstance(enabled_transforms, str)
+    assert isinstance(disabled_transforms, str)
+
+    disabled_transforms = disabled_transforms.split(',')
+    enabled_transforms = enabled_transforms.split(',')
+
+    return enabled_transforms, disabled_transforms
+
+
 class ClassType(Enum):
     EXTRACTOR = 0
     OP = 1
@@ -34,11 +72,20 @@ class ClassType(Enum):
     BACK_REPLACER = 4
 
 
-def _update(cls, registered_list: list, registered_dict: dict, key: str):
+def _update(cls, registered_list: list, registered_dict: dict, key: str, enabled_transforms: list, disabled_transforms: list):
     new_keys = {}  # maps a custom name to class
     new_keys_lower = {}  # translates lowered custom name to its original form
     # print('Registering new subclasses for', cls)
+
     for c in cls.__subclasses__():
+        # Force enabling operations
+        if hasattr(c, 'id') and c.id in enabled_transforms:
+            setattr(c, 'enabled', True)
+
+        # Force disabling operations
+        if hasattr(c, 'id') and c.id in disabled_transforms:
+            setattr(c, 'enabled', False)
+
         if c not in registered_list and (not hasattr(c, 'enabled') or c.enabled):
             if hasattr(cls, 'excluded_classes') and c in cls.excluded_classes:
                 continue
@@ -60,19 +107,19 @@ def _update(cls, registered_list: list, registered_dict: dict, key: str):
     registered_dict.update(new_keys)
 
 
-def update_registration(classes: list):
+def update_registration(classes: list, enabled_transforms: list, disabled_transforms: list):
     for cls in classes:
-        _update(cls, cls.registered_cls, cls.registered_ops, 'op')
+        _update(cls, cls.registered_cls, cls.registered_ops, 'op', enabled_transforms, disabled_transforms)
         _registered_classes_dict.setdefault(cls.class_type(), set()).add(cls)
 
 
-def apply_replacements(graph: nx.MultiDiGraph, replacements_type):
+def apply_replacements(graph: Graph, replacements_type):
     """
     Apply all patterns that do not have 'op' first, then apply patterns from registered_ops.
     If two or more classes replaces the same op (both have op class attribute and values match), such
     pattern is not applied (while registration it will warn user that we have a conflict).
     """
-    dependency_graph = nx.DiGraph()
+    dependency_graph = Graph()
     for class_type, classes_set in _registered_classes_dict.items():
         if class_type == replacements_type:
             for cls in classes_set:
@@ -92,7 +139,7 @@ def apply_replacements(graph: nx.MultiDiGraph, replacements_type):
                         dependency_graph.add_edge(cls_before, replacer_cls)
 
     try:
-        replacers_order = nx.topological_sort(dependency_graph)
+        replacers_order = list(nx.topological_sort(dependency_graph))
     except nx.NetworkXUnfeasible as exception:
         cycles = nx.simple_cycles(dependency_graph)
         raise Error('There is(are) cyclic dependency(ies) between replacers. One of the cycles is the following: {}',
@@ -100,6 +147,7 @@ def apply_replacements(graph: nx.MultiDiGraph, replacements_type):
 
     for replacer_cls in replacers_order:
         replacer = replacer_cls()
+
         replacement_id = 'REPLACEMENT_ID'
         if hasattr(replacer, 'replacement_id'):
             replacement_id = replacer.replacement_id
@@ -108,11 +156,26 @@ def apply_replacements(graph: nx.MultiDiGraph, replacements_type):
             log.info("Skip replacer {} (enabled = False)".format(replacer_cls))
             continue
 
+        if hasattr(replacer, 'graph_condition') and \
+                not all([condition(graph) for condition in replacer.graph_condition]):
+            log.info("Skip replacer {} (graph_condition not satisfied)".format(replacer_cls))
+            continue
+
         log.debug("Run replacer {}".format(replacer_cls))
 
         try:
             replacer.find_and_replace_pattern(graph)
-            check_empty_graph(graph, replacer_cls)
+
+            if hasattr(replacer, 'force_clean_up') and replacer.force_clean_up:
+                for_graph_and_each_sub_graph_recursively(
+                    graph,
+                    graph_clean_up_tf if graph.graph['fw'] == 'tf' else
+                    graph_clean_up_onnx if graph.graph['fw'] == 'onnx' else
+                    graph_clean_up)
+
+            for_graph_and_each_sub_graph_recursively(graph, lambda _: graph.check_empty_graph(replacer_cls))
+            for_graph_and_each_sub_graph_recursively(graph, lambda _: graph.check_shapes_consistency())
+
         except Error as err:
             raise Error('Exception occurred during running replacer "{}" ({}): {}'.format(
                 replacement_id,
diff --git a/model-optimizer/mo/utils/cli_parser.py b/model-optimizer/mo/utils/cli_parser.py
index 48558b212..942e5a7ae 100644
--- a/model-optimizer/mo/utils/cli_parser.py
+++ b/model-optimizer/mo/utils/cli_parser.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -141,6 +141,7 @@ def writable_dir(path: str):
         else:
             raise Error('The directory "{}" is not writable'.format(cur_path))
 
+
 def get_common_cli_parser(parser: argparse.ArgumentParser = None):
     if not parser:
         parser = argparse.ArgumentParser()
@@ -236,6 +237,9 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
     common_group.add_argument('--disable_gfusing',
                               help='Turn off fusing of grouped convolutions',
                               action='store_true')
+    common_group.add_argument('--enable_concat_optimization',
+                              help='Turn on concat optimization',
+                              action='store_true')
     common_group.add_argument('--move_to_preprocess',
                               help='Move mean values to IR preprocess section',
                               action='store_true')
@@ -272,6 +276,10 @@ def get_common_cli_parser(parser: argparse.ArgumentParser = None):
                                    ' deployment scenarios. Use it at your own discretion. By default, without this'
                                    ' option, the Model Optimizer generates IR V3.',
                               action='store_true')
+    common_group.add_argument('--keep_shape_ops',
+                              help='[ Experimental feature ] Enables `Shape` operation with all children keeping. '
+                                   'This feature makes model reshapable in Inference Engine',
+                              action='store_true', default=False)
     return parser
 
 
@@ -311,7 +319,6 @@ def get_caffe_cli_options():
 def get_tf_cli_options():
     d = {
         'input_model_is_text': '- Input model in text protobuf format',
-        'offload_unsupported_operations_to_tf': '- Offload unsupported operations',
         'tensorflow_subgraph_patterns': '- Patterns to offload',
         'tensorflow_operation_patterns': '- Operations to offload',
         'tensorflow_custom_operations_config_update': '- Update the configuration file with input/output node names',
@@ -435,9 +442,6 @@ def get_tf_cli_parser(parser: argparse.ArgumentParser = None):
     tf_group.add_argument('--saved_model_tags', type=str, default=None,
                           help="Group of tag(s) of the MetaGraphDef to load, in string format, separated by ','. "
                                "For tag-set contains multiple tags, all tags must be passed in.")
-    tf_group.add_argument('--offload_unsupported_operations_to_tf',
-                          help='TensorFlow*: automatically offload unsupported operations to TensorFlow*',
-                          action='store_true')
     tf_group.add_argument('--tensorflow_subgraph_patterns',
                           help='TensorFlow*: a list of comma separated patterns that will be applied to ' +
                                'TensorFlow* node names to ' +
diff --git a/model-optimizer/mo/utils/cli_parser_test.py b/model-optimizer/mo/utils/cli_parser_test.py
index 1646273f7..ab68f191c 100644
--- a/model-optimizer/mo/utils/cli_parser_test.py
+++ b/model-optimizer/mo/utils/cli_parser_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/convert.py b/model-optimizer/mo/utils/convert.py
index edec06fee..48ccbd35a 100644
--- a/model-optimizer/mo/utils/convert.py
+++ b/model-optimizer/mo/utils/convert.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/custom_replacement_config.py b/model-optimizer/mo/utils/custom_replacement_config.py
index 8709e19a5..63dc55130 100644
--- a/model-optimizer/mo/utils/custom_replacement_config.py
+++ b/model-optimizer/mo/utils/custom_replacement_config.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@ from re import compile, match
 
 import networkx as nx
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 from mo.utils.graph import nodes_matching_name_pattern, sub_graph_between_nodes
 from mo.utils.utils import refer_to_faq_msg
@@ -126,7 +126,7 @@ class CustomReplacementDescriptor(object):
             return None
         return [(out['node'], out['port']) for out in self._replacement_desc['outputs']]
 
-    def update_custom_replacement_attributes(self, graph: nx.MultiDiGraph):
+    def update_custom_replacement_attributes(self, graph: Graph):
         """
         The function run specific functions to update attributes of the custom replacement description. Currently it
         updates information about input/output nodes.
@@ -179,7 +179,7 @@ class CustomReplacementDescriptorPoints(CustomReplacementDescriptor):
     def get_outputs_description(self):
         return [('^' + node_name + '$', 0) for node_name in self.instances['end_points']]
 
-    def get_internal_input_nodes(self, graph: nx.MultiDiGraph):
+    def get_internal_input_nodes(self, graph: Graph):
         """
         Gets list of node names getting input from outside of the sub-graph. This function checks whether input nodes
         specified in the configuration file should be added to the sub-graph or not. If they should not be added to the
@@ -199,7 +199,7 @@ class CustomReplacementDescriptorPoints(CustomReplacementDescriptor):
         else:
             return self.instances['start_points']
 
-    def get_internal_output_nodes(self, graph: nx.MultiDiGraph):
+    def get_internal_output_nodes(self, graph: Graph):
         """
         Gets list of node names producing output outside of the sub-graph. This function checks whether output nodes
         specified in the configuration file should be added to the sub-graph or not. If they should not be added to the
@@ -219,7 +219,7 @@ class CustomReplacementDescriptorPoints(CustomReplacementDescriptor):
         else:
             return self.instances['end_points']
 
-    def update_custom_replacement_attributes(self, graph: nx.MultiDiGraph):
+    def update_custom_replacement_attributes(self, graph: Graph):
         if not self.has('instances'):
             raise Error("No instance(s) is(are) defined for the custom replacement '{}'. ".format(self.replacement_id) +
                         refer_to_faq_msg(66))
@@ -278,7 +278,7 @@ class CustomReplacementDescriptorScope(CustomReplacementDescriptor):
     def __init__(self, replacement_id: str, attrs: dict = None):
         super().__init__(replacement_id, attrs)
 
-    def update_custom_replacement_attributes(self, graph: nx.MultiDiGraph):
+    def update_custom_replacement_attributes(self, graph: Graph):
         if not self.has('instances') or len(self.instances) == 0:
             raise Error("No instances are defined for replacement with id '{}'. ".format(self.replacement_id) +
                         refer_to_faq_msg(68))
@@ -384,35 +384,7 @@ def parse_custom_replacement_config_file(file_name: str):
     return result
 
 
-def update_custom_replacement_config_file(graph: nx.MultiDiGraph, file_name: str):
-    data = parse_custom_replacement_config_file(file_name)
-    if data is None:
-        raise Error("Cannot update the file '{}' because it is broken. ".format(file_name) +
-                    refer_to_faq_msg(73))
-
-    for replacement_desc in data:
-        replacement_desc.update_custom_replacement_attributes(graph)
-
-    return save_custom_replacement_config_file(data, file_name)
-
-
-def save_custom_replacement_config_file(descriptions: list, file_name: str):
-    """
-    Save custom layer(s) description(s) to the file.
-    :param file_name: file to save description information to.
-    :param descriptions: list with instances of the CustomLayerDescriptor classes.
-    :return: True if operation is successful.
-    """
-    try:
-        json.dump([replacement_desc.get_config_file_representation() for replacement_desc in descriptions],
-                  open(file_name, "w"), indent=4, sort_keys=True)
-    except Exception as ex:
-        log.error("failed to update configuration file {}: {}".format(file_name, str(ex)))
-        return False
-    return True
-
-
-def generate_pattern_for_node(graph: nx.MultiDiGraph, sub_graph_pattern: str, node_name: str):
+def generate_pattern_for_node(graph: Graph, sub_graph_pattern: str, node_name: str):
     if sub_graph_pattern == '':
         return node_name
     node_name_components = node_name.split("/")
diff --git a/model-optimizer/mo/utils/dsu.py b/model-optimizer/mo/utils/dsu.py
index 849db9008..9bde4942b 100644
--- a/model-optimizer/mo/utils/dsu.py
+++ b/model-optimizer/mo/utils/dsu.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/error.py b/model-optimizer/mo/utils/error.py
index 4b188668b..d7d28e7bc 100644
--- a/model-optimizer/mo/utils/error.py
+++ b/model-optimizer/mo/utils/error.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/find_inputs.py b/model-optimizer/mo/utils/find_inputs.py
index 87ab7bb3a..633859bbe 100644
--- a/model-optimizer/mo/utils/find_inputs.py
+++ b/model-optimizer/mo/utils/find_inputs.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,28 +16,22 @@
 
 import networkx as nx
 
-from mo.graph.graph import NodeWrap
+from mo.graph.graph import Node, Graph
 
 
-def find_nodes_by_type(graph: nx.MultiDiGraph, t_name: str):
-    nodes = nx.topological_sort(graph)
-    inputs = []
-    for n in nodes:
-        node = NodeWrap(graph, n)
-        if node.has('type') and node.type == t_name:
-            inputs.append(node.id)
-    return inputs
+def find_nodes_by_attribute_value(graph: Graph, attr: str, attr_name: str):
+    return [id for id, v in nx.get_node_attributes(graph, attr).items() if v == attr_name]
 
 
-def find_inputs(graph: nx.MultiDiGraph):
-    return find_nodes_by_type(graph, 'Input')
+def find_inputs(graph: Graph):
+    return find_nodes_by_attribute_value(graph, 'type', 'Input')
 
 
-def find_outputs(graph):
-    nodes = nx.topological_sort(graph)
+def find_outputs(graph: Graph):
     outputs = []
-    for n in nodes:
-        node = NodeWrap(graph, n)
-        if node.has('is_output') and node['is_output']:
-            outputs.append(node.id)
-    return outputs
+    for node_id in find_nodes_by_attribute_value(graph, 'op', 'OpOutput'):
+        parents = Node(graph, node_id).in_nodes()
+        assert len(parents) == 1, 'OpOutput node should have exactly one input'
+        parent = parents[0].id
+        outputs.append(parent)
+    return list(set(outputs))
diff --git a/model-optimizer/mo/utils/graph.py b/model-optimizer/mo/utils/graph.py
index b65122811..cf2d1369c 100644
--- a/model-optimizer/mo/utils/graph.py
+++ b/model-optimizer/mo/utils/graph.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,7 +20,7 @@ from re import match, compile
 import logging as log
 import networkx as nx
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.utils.error import Error
 from mo.utils.utils import refer_to_faq_msg
 
@@ -52,7 +52,7 @@ def backward_bfs_for_operation(start_node: Node, op_names: list):
     return [Node(start_node.graph, x) for x in ret]
 
 
-def bfs_search(graph: nx.MultiDiGraph, start_nodes: list = list()):
+def bfs_search(graph: Graph, start_nodes: list = list()):
     """
     Performs breadth-first search over a graph and returns a list of nodes in the BFS order.
     :param graph: networkx graph to traverse.
@@ -77,7 +77,7 @@ def bfs_search(graph: nx.MultiDiGraph, start_nodes: list = list()):
     return result
 
 
-def dfs(graph: nx.MultiDiGraph, node_name: str, visited: set):
+def dfs(graph: Graph, node_name: str, visited: set):
     """
     Implementation of the depth-first search algorithm starting from the specific node.
     :param graph: networkx graph to operate on.
@@ -103,7 +103,7 @@ def dfs(graph: nx.MultiDiGraph, node_name: str, visited: set):
     return order
 
 
-def pseudo_topological_sort(graph: nx.MultiDiGraph, reverse: bool = False):
+def pseudo_topological_sort(graph: Graph, reverse: bool = False):
     """
     The function performs topological sort but doesn't check for cycle existence. So it may produce wrong nodes order
     for some applications.
@@ -127,7 +127,7 @@ def pseudo_topological_sort(graph: nx.MultiDiGraph, reverse: bool = False):
         return list(reversed(order))
 
 
-def nodes_matching_name_pattern(graph: nx.MultiDiGraph, pattern: str):
+def nodes_matching_name_pattern(graph: Graph, pattern: str):
     """
     Returns list of node names of the graph that match regular expression.
     :param graph: graph to operate on.
@@ -138,7 +138,7 @@ def nodes_matching_name_pattern(graph: nx.MultiDiGraph, pattern: str):
     return [node_name for node_name in list(graph.nodes()) if match(compiled_pattern, node_name)]
 
 
-def is_connected_component(graph: nx.MultiDiGraph, node_names: list):
+def is_connected_component(graph: Graph, node_names: list):
     """
     Checks that specified list of nodes forms a connected sub-graph. It ignores edges direction.
     The algorithm is the following. Run BFS from one of the nodes from the node_names list ignoring edges order and
@@ -167,7 +167,7 @@ def is_connected_component(graph: nx.MultiDiGraph, node_names: list):
     return set(node_names).issubset(visited)
 
 
-def sub_graph_between_nodes(graph: nx.MultiDiGraph, start_nodes: list, end_nodes: list, detect_extra_start_node: callable=None):
+def sub_graph_between_nodes(graph: Graph, start_nodes: list, end_nodes: list, detect_extra_start_node: callable=None):
     """
     Finds nodes of the sub-graph between 'start_nodes' and 'end_nodes'. Input nodes for the sub-graph nodes are also
     added to the sub-graph. Constant inputs of the 'start_nodes' are also added to the sub-graph.
@@ -251,7 +251,7 @@ def node_neighbourhood(node_name: str, depth: int, next_node_fn):
     return list(dist.keys())
 
 
-def node_incoming_neighbourhood(graph: nx.MultiDiGraph, node_name: str, depth: int):
+def node_incoming_neighbourhood(graph: Graph, node_name: str, depth: int):
     """
     Find input neighbourhood of the node.
     :param graph: graph to operate on.
@@ -262,7 +262,7 @@ def node_incoming_neighbourhood(graph: nx.MultiDiGraph, node_name: str, depth: i
     return node_neighbourhood(node_name, depth, lambda node_name: [u for u, v in graph.in_edges([node_name])])
 
 
-def node_outcoming_neighbourhood(graph: nx.MultiDiGraph, node_name: str, depth: int):
+def node_outcoming_neighbourhood(graph: Graph, node_name: str, depth: int):
     """
     Find output neighbourhood of the node.
     :param graph: graph to operate on.
@@ -273,7 +273,7 @@ def node_outcoming_neighbourhood(graph: nx.MultiDiGraph, node_name: str, depth:
     return node_neighbourhood(node_name, depth, lambda node_name: [v for u, v in graph.out_edges([node_name])])
 
 
-def scope_output_nodes(graph: nx.MultiDiGraph, scope: str, scope_delimiter: str='/'):
+def scope_output_nodes(graph: Graph, scope: str, scope_delimiter: str='/'):
     """
     The function returns nodes producing output of the sub-graph defined by scope (name prefix). The node is considered
     output of the scope if it is in this scope and it's output is outside of the scope.
diff --git a/model-optimizer/mo/utils/graph_test.py b/model-optimizer/mo/utils/graph_test.py
index 5d4ed57db..21bf45d08 100644
--- a/model-optimizer/mo/utils/graph_test.py
+++ b/model-optimizer/mo/utils/graph_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,11 +20,11 @@ import networkx as nx
 
 from mo.utils.error import Error
 from mo.utils.graph import dfs, bfs_search, is_connected_component, sub_graph_between_nodes
-
+from mo.graph.graph import Graph
 
 class TestGraphUtils(unittest.TestCase):
     def test_simple_dfs(self):
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_nodes_from(list(range(1, 5)))
         graph.add_edges_from([(1, 2), (1, 3), (3, 4)])
 
@@ -36,7 +36,7 @@ class TestGraphUtils(unittest.TestCase):
         """
         Check that BFS automatically determines input nodes and start searching from them.
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_nodes_from(list(range(1, 6)))
         graph.add_edges_from([(1, 3), (2, 3), (3, 4), (4, 5)])
 
@@ -47,7 +47,7 @@ class TestGraphUtils(unittest.TestCase):
         """
         Check that BFS stars from the user defined nodes and doesn't go in backward edge direction.
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_nodes_from(list(range(1, 7)))
         graph.add_edges_from([(1, 3), (2, 3), (3, 4), (4, 5), (6, 1)])
 
@@ -58,7 +58,7 @@ class TestGraphUtils(unittest.TestCase):
         """
         Check that if there are two separate sub-graphs the function returns False.
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_nodes_from(list(range(1, 7)))
         graph.add_edges_from([(1, 2), (2, 3), (4, 5), (5, 6)])
         self.assertFalse(is_connected_component(graph, list(range(1, 7))))
@@ -71,7 +71,7 @@ class TestGraphUtils(unittest.TestCase):
         Check that if there are two separate sub-graphs the function connected by an edge going through the ignored node
         then the function returns False.
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         node_names = list(range(1, 8))
         graph.add_nodes_from(node_names)
         graph.add_edges_from([(1, 2), (2, 3), (4, 5), (5, 6), (1, 7), (7, 4)])
@@ -81,7 +81,7 @@ class TestGraphUtils(unittest.TestCase):
         """
         Check that if the sub-graph is connected.
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         node_names = list(range(1, 8))
         graph.add_nodes_from(node_names)
         graph.add_edges_from([(1, 2), (2, 3), (4, 5), (5, 6), (1, 7), (7, 4)])
@@ -91,7 +91,7 @@ class TestGraphUtils(unittest.TestCase):
         """
         Check that edges direction is ignored when checking for the connectivity.
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         node_names = list(range(1, 5))
         graph.add_nodes_from(node_names)
         graph.add_edges_from([(2, 1), (2, 3), (4, 3)])
@@ -104,7 +104,7 @@ class TestGraphUtils(unittest.TestCase):
         Check that edges direction is ignored when checking for the connectivity. In this case the graph is not
         connected.
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_nodes_from(list(range(1, 5)))
         graph.add_edges_from([(2, 1), (2, 3), (4, 3)])
         self.assertFalse(is_connected_component(graph, [1, 2, 4]))
@@ -121,7 +121,7 @@ class TestGraphUtils(unittest.TestCase):
             1 -> 2 -> 3 -> 4
         :return:
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_nodes_from(list(range(1, 7)))
         graph.add_edges_from([(1, 2), (2, 3), (3, 4), (5, 2), (6, 5)])
         sub_graph_nodes = sub_graph_between_nodes(graph, [1], [4])
@@ -140,7 +140,7 @@ class TestGraphUtils(unittest.TestCase):
              \
         1 -> 2 -> 3 -> 4
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_nodes_from(list(range(1, 6)))
         graph.add_edges_from([(1, 2), (2, 3), (3, 4), (5, 2)])
         sub_graph_nodes = sub_graph_between_nodes(graph, [2], [4])
@@ -154,7 +154,7 @@ class TestGraphUtils(unittest.TestCase):
              \
         1 -> 2 -> 3 -> 4
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_nodes_from(list(range(1, 6)))
         graph.node[5]['op'] = 'Placeholder'
         graph.add_edges_from([(1, 2), (2, 3), (3, 4), (5, 2)])
@@ -168,7 +168,7 @@ class TestGraphUtils(unittest.TestCase):
              \
         1 -> 2 -> 3 -> 4
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_nodes_from(list(range(1, 6)))
         graph.node[5]['op'] = 'Placeholder'
         graph.add_edges_from([(1, 2), (2, 3), (3, 4), (5, 2)])
@@ -183,7 +183,7 @@ class TestGraphUtils(unittest.TestCase):
              \
         1 -> 2 -> 3 -> 4
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         graph.add_nodes_from(list(range(1, 6)))
         graph.add_edges_from([(1, 2), (2, 3), (3, 4), (5, 2)])
         sub_graph_nodes = sub_graph_between_nodes(graph, [2, 5], [4])
@@ -199,7 +199,7 @@ class TestGraphUtils(unittest.TestCase):
             / \
         9 ->   -> 7 -> 8
         """
-        graph = nx.MultiDiGraph()
+        graph = Graph()
         node_names = list(range(1, 10))
         graph.add_nodes_from(node_names)
         graph.add_edges_from([(1, 2), (2, 3), (3, 4), (2, 5), (5, 6), (5, 7), (7, 8), (9, 5)])
diff --git a/model-optimizer/mo/utils/guess_framework.py b/model-optimizer/mo/utils/guess_framework.py
index 1149c711a..c19d34d33 100644
--- a/model-optimizer/mo/utils/guess_framework.py
+++ b/model-optimizer/mo/utils/guess_framework.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -22,6 +22,8 @@ def guess_framework_by_ext(input_model_path: str) -> int:
         return 'caffe'
     elif re.match('^.*\.pb$', input_model_path):
         return 'tf'
+    elif re.match('^.*\.pbtxt$', input_model_path):
+        return 'tf'
     elif re.match('^.*\.params$', input_model_path):
         return 'mxnet'
     elif re.match('^.*\.nnet$', input_model_path):
diff --git a/model-optimizer/mo/utils/import_extensions.py b/model-optimizer/mo/utils/import_extensions.py
index 317bef664..0ed0ce653 100644
--- a/model-optimizer/mo/utils/import_extensions.py
+++ b/model-optimizer/mo/utils/import_extensions.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -20,13 +20,23 @@ import os
 import pkgutil
 import sys
 
+from mo.back.replacement import BackReplacementPattern
+from mo.middle.replacement import MiddleReplacementPattern
+from mo.ops.op import Op
+from mo.utils.class_registration import _check_unique_ids, update_registration, get_enabled_and_disabled_transforms
+
 
 def import_by_path(path: str, middle_names: list = ()):
     for module_loader, name, ispkg in pkgutil.iter_modules([path]):
         importlib.import_module('{}.{}'.format('.'.join(middle_names), name))
 
 
-def load_dir(framework: str, path: str, update_registration: callable):
+def default_path():
+    EXT_DIR_NAME = 'extensions'
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, EXT_DIR_NAME))
+
+
+def load_dir(framework: str, path: str, get_front_classes: callable):
     """
     Assuming the following sub-directory structure for path:
 
@@ -57,27 +67,36 @@ def load_dir(framework: str, path: str, update_registration: callable):
     log.info("Importing extensions from: {}".format(path))
     root_dir, ext = os.path.split(path)
     sys.path.insert(0, root_dir)
-    internal_dirs = [['ops', ], ['front', ], ['front', framework], ['middle', ], ['back', ]]
+
+    enabled_transforms, disabled_transforms = get_enabled_and_disabled_transforms()
+
+    front_classes = get_front_classes()
+    internal_dirs = {
+                         ('ops', ): [Op],
+                         ('front', ): front_classes,
+                         ('front', framework): front_classes,
+                         ('middle', ): [MiddleReplacementPattern],
+                         ('back', ): [BackReplacementPattern]}
+
     if ext == 'mo':
-        internal_dirs.append(['front', framework, 'extractors'])
-    for p in internal_dirs:
+        internal_dirs[('front', framework, 'extractors')] = front_classes
+
+    for p in internal_dirs.keys():
         import_by_path(os.path.join(path, *p), [ext, *p])
-        update_registration()
+        update_registration(internal_dirs[p], enabled_transforms, disabled_transforms)
     sys.path.remove(root_dir)
 
 
-def default_path():
-    EXT_DIR_NAME = 'extensions'
-    return os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, EXT_DIR_NAME))
-
-
-def load_dirs(framework: str, dirs: list, update_registration: callable):
+def load_dirs(framework: str, dirs: list, get_front_classes: callable):
     if dirs is None:
         return
+
     mo_inner_extensions = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, 'mo'))
     dirs.insert(0, mo_inner_extensions)
     dirs = [os.path.abspath(e) for e in dirs]
     if default_path() not in dirs:
         dirs.insert(0, default_path())
     for path in dirs:
-        load_dir(framework, path, update_registration)
+        load_dir(framework, path, get_front_classes)
+
+    _check_unique_ids()
diff --git a/model-optimizer/mo/utils/logger.py b/model-optimizer/mo/utils/logger.py
index 26b7c2fdc..51bc3900e 100644
--- a/model-optimizer/mo/utils/logger.py
+++ b/model-optimizer/mo/utils/logger.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/pipeline_config.py b/model-optimizer/mo/utils/pipeline_config.py
index 901bf453c..5352db3bf 100644
--- a/model-optimizer/mo/utils/pipeline_config.py
+++ b/model-optimizer/mo/utils/pipeline_config.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -57,6 +57,10 @@ mapping_rules = [
     ('first_stage_nms_score_threshold', '.*_nms_score_threshold'),
     ('first_stage_nms_iou_threshold', '.*_nms_iou_threshold'),
     ('first_stage_max_proposals', '.*_max_proposals'),
+    ('num_spatial_bins_height', '.*/rfcn_box_predictor/num_spatial_bins_height'),
+    ('num_spatial_bins_width', '.*/rfcn_box_predictor/num_spatial_bins_width'),
+    ('crop_height', '.*/rfcn_box_predictor/crop_height'),
+    ('crop_width', '.*/rfcn_box_predictor/crop_width'),
     'initial_crop_size',
     # Detection Output layer attributes
     ('postprocessing_score_converter', '.*/score_converter'),
diff --git a/model-optimizer/mo/utils/pipeline_config_test.py b/model-optimizer/mo/utils/pipeline_config_test.py
index 596a714d4..6c8e19bbf 100644
--- a/model-optimizer/mo/utils/pipeline_config_test.py
+++ b/model-optimizer/mo/utils/pipeline_config_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/replacement_pattern.py b/model-optimizer/mo/utils/replacement_pattern.py
index d77f7ce63..4aa0a18b3 100644
--- a/model-optimizer/mo/utils/replacement_pattern.py
+++ b/model-optimizer/mo/utils/replacement_pattern.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 import networkx as nx
 
+from mo.graph.graph import Graph
 from mo.middle.pattern_match import apply_pattern
 
 
@@ -24,7 +25,7 @@ class ReplacementPattern(object):
     # All intermediate infrastructure classes should be here
     excluded_replacers = []
 
-    def find_and_replace_pattern(self, graph: nx.MultiDiGraph):
+    def find_and_replace_pattern(self, graph: Graph):
         apply_pattern(graph, **self.pattern(), action=self.replace_pattern)  # pylint: disable=no-member
 
     def run_before(self):
diff --git a/model-optimizer/mo/utils/simple_proto_parser.py b/model-optimizer/mo/utils/simple_proto_parser.py
index cfdbf288a..4975dcdf4 100644
--- a/model-optimizer/mo/utils/simple_proto_parser.py
+++ b/model-optimizer/mo/utils/simple_proto_parser.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/simple_proto_parser_test.py b/model-optimizer/mo/utils/simple_proto_parser_test.py
index 2f601ce5c..1b1af1613 100644
--- a/model-optimizer/mo/utils/simple_proto_parser_test.py
+++ b/model-optimizer/mo/utils/simple_proto_parser_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/str_to.py b/model-optimizer/mo/utils/str_to.py
index c27a5813a..9c5a15a1b 100644
--- a/model-optimizer/mo/utils/str_to.py
+++ b/model-optimizer/mo/utils/str_to.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/summarize_graph.py b/model-optimizer/mo/utils/summarize_graph.py
index 8d6971814..fbb7906f0 100644
--- a/model-optimizer/mo/utils/summarize_graph.py
+++ b/model-optimizer/mo/utils/summarize_graph.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/summarize_graph_test.py b/model-optimizer/mo/utils/summarize_graph_test.py
index fbed0ebae..41a897a1c 100644
--- a/model-optimizer/mo/utils/summarize_graph_test.py
+++ b/model-optimizer/mo/utils/summarize_graph_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/tensorboard.py b/model-optimizer/mo/utils/tensorboard.py
index 9ca78ecfd..98ff1c73b 100644
--- a/model-optimizer/mo/utils/tensorboard.py
+++ b/model-optimizer/mo/utils/tensorboard.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/unittest/extractors.py b/model-optimizer/mo/utils/unittest/extractors.py
index e58534c02..68e251f40 100644
--- a/model-optimizer/mo/utils/unittest/extractors.py
+++ b/model-optimizer/mo/utils/unittest/extractors.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/unittest/graph.py b/model-optimizer/mo/utils/unittest/graph.py
index 64a0f3027..2c36d61b4 100644
--- a/model-optimizer/mo/utils/unittest/graph.py
+++ b/model-optimizer/mo/utils/unittest/graph.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -16,11 +16,12 @@
 
 from collections import deque
 from copy import deepcopy
+from numbers import Number
 
 import networkx as nx
 import numpy as np
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 from mo.middle.pattern_match import all_edges_in_nodes
 from mo.utils.error import Error
 
@@ -51,7 +52,7 @@ def build_graph_with_attrs(nodes_with_attrs: list, edges_with_attrs: list, new_n
                            update_nodes_attributes: dict = None, nodes_with_edges_only: bool = False,
                            add_nodes_from_edges: bool = False):
     """
-    Build the nx.MultiDiGraph with specific nodes and edges. Also update of edge and node parameters is supported.
+    Build the Graph with specific nodes and edges. Also update of edge and node parameters is supported.
     :param nodes_with_attrs: list of tuples ('node_name', {node_attrs})
     :param edges_with_attrs: list of tuples like (start node, end node, (optional) {attrs of the edge}).
     :param new_nodes_with_attrs: analogically nodes_with_attrs
@@ -78,7 +79,7 @@ def build_graph_with_attrs(nodes_with_attrs: list, edges_with_attrs: list, new_n
     if not add_nodes_from_edges and not all_edges_in_nodes(nodes=all_nodes_names, edges=all_edges):
         raise Error("Some nodes from list of edges is not in nodes. Please, add all necessary nodes.")
 
-    graph = nx.MultiDiGraph()
+    graph = Graph()
 
     # Create dict for nodes with attrs
     nodes_attrs = {}
@@ -129,7 +130,7 @@ def build_graph_with_attrs(nodes_with_attrs: list, edges_with_attrs: list, new_n
 
 def build_graph(nodes_attrs: dict, edges: list, update_attributes: dict = None, nodes_with_edges_only: bool = False):
     """
-    Build the nx.MultiDiGraph with specific nodes and edges.
+    Build the Graph with specific nodes and edges.
     :param nodes_attrs: dictionary where key is the node name and the value is the dictionary with node attributes.
     :param edges: list of pairs with start and end node names of the edge.
     :param update_attributes: optional dictionary which specifies nodes names and their attributes to be updated. The
@@ -137,7 +138,7 @@ def build_graph(nodes_attrs: dict, edges: list, update_attributes: dict = None,
     :param nodes_with_edges_only: add nodes which has at least one incoming or outcoming edge.
     :return: generated graph.
     """
-    graph = nx.MultiDiGraph()
+    graph = Graph()
 
     for node_name, attrs in nodes_attrs.items():
         if 'name' not in attrs:
@@ -180,19 +181,30 @@ def build_graph(nodes_attrs: dict, edges: list, update_attributes: dict = None,
             for attr, value in new_attrs.items():
                 graph.node[node_name][attr] = value
 
+    for node in graph.get_op_nodes():
+        # Add in_ports attribute
+        in_edges = node.in_edges()
+        for i in range(len(in_edges)):
+            node.add_input_port(idx=i)
+
+        # Add out_ports attribute
+        out_edges = node.out_edges()
+        for i in range(len(out_edges)):
+            node.add_output_port(idx=i)
+
     return graph
 
 
 def build_graph_with_edge_attrs(nodes_attrs: dict, edges: list, update_attributes: dict = None):
     """
-    Build the nx.MultiDiGraph with specific nodes and edges.
+    Build the Graph with specific nodes and edges.
     :param nodes_attrs: dictionary where key is the node name and the value is the dictionary with node attributes.
     :param edges: list of pairs with start and end node names of the edge.
     :param update_attributes: optional dictionary which specifies nodes names and their attributes to be updated. The
     key is a node name to update attribute and the value is a dictionary with attribute name and its value.
     :return: generated graph.
     """
-    graph = nx.MultiDiGraph()
+    graph = Graph()
     for node_1, node_2, attr in edges:
         if node_1 not in graph.nodes():
             graph.add_node(node_1, **deepcopy(nodes_attrs[node_1]))
@@ -207,7 +219,7 @@ def build_graph_with_edge_attrs(nodes_attrs: dict, edges: list, update_attribute
     return graph
 
 
-def compare_graphs(graph: nx.MultiDiGraph, graph_ref: nx.MultiDiGraph, last_node: str, last_node_ref=None,
+def compare_graphs(graph: Graph, graph_ref: Graph, last_node: str, last_node_ref=None,
                    check_op_attrs=False):
     if last_node_ref is None:
         last_node_ref = last_node
@@ -249,7 +261,7 @@ def compare_graphs(graph: nx.MultiDiGraph, graph_ref: nx.MultiDiGraph, last_node
             # Check that nodes has same operation
             if check_op_attrs:
                 for attr in graph_ref.node[node_ref.id]:
-                    if graph_ref.node[node_ref.id][attr] is None or attr in ['name', 'id']:
+                    if graph_ref.node[node_ref.id][attr] is None or attr in ['name', 'id', '_in_ports', '_out_ports', 'infer', 'IE']:
                         continue
                     if attr not in graph.node[node.id]:
                         return False, 'Node {} has missing attribute {}'.format(node.id, attr)
@@ -259,11 +271,16 @@ def compare_graphs(graph: nx.MultiDiGraph, graph_ref: nx.MultiDiGraph, last_node
                             return False, '{} and {} has different attr {} : {} and {}'.format(
                                 node.id, node_ref.id, attr, graph.node[node.id][attr],
                                 graph_ref.node[node_ref.id][attr])
-                    else:
-                        if graph.node[node.id][attr] != graph_ref.node[node_ref.id][attr]:
+                    elif isinstance(graph.node[node.id][attr], Number):
+                        if abs(graph.node[node.id][attr] - graph_ref.node[node_ref.id][attr]) > 1e-4:
                             return False, '{} and {} has different attr {} : {} and {}'.format(
                                 node.id, node_ref.id, attr, graph.node[node.id][attr],
                                 graph_ref.node[node_ref.id][attr])
+                    elif graph.node[node.id][attr] != graph_ref.node[node_ref.id][attr]:
+                        return False, '{} and {} has different attr {} : {} and {}'.format(
+                            node.id, node_ref.id, attr, graph.node[node.id][attr],
+                            graph_ref.node[node_ref.id][attr])
+
         else:
             if node_ref.has_valid('shape') and not node.has_valid('shape'):
                 return False, '{} has None shape'.format(node.id)
diff --git a/model-optimizer/mo/utils/unsupported_ops.py b/model-optimizer/mo/utils/unsupported_ops.py
index e5187e3b2..09cdb047d 100644
--- a/model-optimizer/mo/utils/unsupported_ops.py
+++ b/model-optimizer/mo/utils/unsupported_ops.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -18,11 +18,11 @@ import collections
 
 import networkx as nx
 
-from mo.graph.graph import Node
+from mo.graph.graph import Node, Graph
 
 
 class UnsupportedOps(object):
-    def __init__(self, graph: nx.Graph):
+    def __init__(self, graph: Graph):
         self.graph = graph
         # map op to a list of node names
         self.unsupported = collections.defaultdict(list)
diff --git a/model-optimizer/mo/utils/utils.py b/model-optimizer/mo/utils/utils.py
index c4f089cab..4c1871fee 100644
--- a/model-optimizer/mo/utils/utils.py
+++ b/model-optimizer/mo/utils/utils.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -13,8 +13,9 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 """
-
-
+import functools
+import warnings
+import logging as log
 import numpy as np
 
 
@@ -45,3 +46,34 @@ def symm_match_shapes(shape1: np.array, shape2: np.array):
     # Elements with values -1 and 0 in both shapes are just ignored.
     # Other elements should match. Undefined elements can be one side only.
     return match_shapes(shape1, shape2) or match_shapes(shape2, shape1)
+
+
+def deprecated_api(class_name=None):
+    def deprecated(func):
+        @functools.wraps(func)
+        def deprecation_message(*args, **kwargs):
+            warnings.simplefilter('always', DeprecationWarning)  # turn on filter
+            dep_msg = "Call to deprecated function {}. ".format(func.__name__)
+            if class_name is not None:
+                dep_msg += "Please use {}.{} method".format(class_name.__name__, func.__name__)
+            warnings.warn(dep_msg, DeprecationWarning, stacklevel=2)
+            warnings.simplefilter('default', DeprecationWarning)  # reset filter
+            return func(*args, **kwargs)
+
+        return deprecation_message
+
+    return deprecated
+
+
+def array_to_str(node, attr):
+    if not node.has_valid(attr):
+        return None
+    else:
+        return ','.join(map(str, node[attr]))
+
+
+def shrink_str_value(value: np.array, max_symbols=100):
+    value = str(value)
+    if len(value) > max_symbols:
+        value = value.strip('\n')[:max_symbols - 3] + '...'
+    return value
diff --git a/model-optimizer/mo/utils/utils_test.py b/model-optimizer/mo/utils/utils_test.py
index 7ebae7f01..368dc3135 100644
--- a/model-optimizer/mo/utils/utils_test.py
+++ b/model-optimizer/mo/utils/utils_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/version.py b/model-optimizer/mo/utils/version.py
index 8c512fc83..30d164606 100644
--- a/model-optimizer/mo/utils/version.py
+++ b/model-optimizer/mo/utils/version.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/version_test.py b/model-optimizer/mo/utils/version_test.py
index 909e7425e..8f403755b 100644
--- a/model-optimizer/mo/utils/version_test.py
+++ b/model-optimizer/mo/utils/version_test.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo/utils/versions_checker.py b/model-optimizer/mo/utils/versions_checker.py
index 09f3105c6..b9ff08187 100644
--- a/model-optimizer/mo/utils/versions_checker.py
+++ b/model-optimizer/mo/utils/versions_checker.py
@@ -1,5 +1,5 @@
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
@@ -14,13 +14,17 @@
  limitations under the License.
 """
 
+
 import logging as log
 import os
 import re
 import sys
 from distutils.version import LooseVersion
 
-modules = {"protobuf": "google.protobuf"}
+modules = {
+    "protobuf": "google.protobuf",
+    "test-generator": "generator",
+}
 critical_modules = ["networkx"]
 
 message = "\nDetected not satisfied dependencies:\n" \
@@ -100,7 +104,19 @@ def version_check(name, installed_v, required_v, sign, not_satisfied_v, exit_cod
     """
     if sign is not None:
         req_ver = LooseVersion(required_v)
-        satisfied = eval('installed_v{}req_ver'.format(sign))
+        satisfied = False
+        if sign == '>':
+            satisfied = installed_v > req_ver
+        elif sign == '>=':
+            satisfied = installed_v >= req_ver
+        elif sign == '<=':
+            satisfied = installed_v <= req_ver
+        elif sign == '<':
+            satisfied = installed_v < req_ver
+        elif sign == '==':
+            satisfied = installed_v == req_ver
+        else:
+            log.error("Error during version comparison")
     else:
         satisfied = True
     if not satisfied:
@@ -110,7 +126,7 @@ def version_check(name, installed_v, required_v, sign, not_satisfied_v, exit_cod
     return exit_code
 
 
-def check_requirements(framework = None):
+def check_requirements(framework=None):
     """
     Please do not add parameter type annotations (param:type).
     Because we import this file while checking Python version.
@@ -133,10 +149,11 @@ def check_requirements(framework = None):
     exit_code = 0
     for name, key, required_version in requirements_list:
         try:
-            exec("import {}".format(modules[name] if name in modules else name))
-            installed_version = eval("{}.__version__".format(modules[name] if name in modules else name))
+            importable_name = modules.get(name, name)
+            exec("import {}".format(importable_name))
+            installed_version = sys.modules[importable_name].__version__
             exit_code = version_check(name, installed_version, required_version, key, not_satisfied_versions, exit_code)
-            exec("del {}".format(modules[name] if name in modules else name))
+            exec("del {}".format(importable_name))
         except (AttributeError, ImportError):
             not_satisfied_versions.append((name, 'not installed', 'required: {}'.format(required_version)))
             exit_code = 1
diff --git a/model-optimizer/mo_caffe.py b/model-optimizer/mo_caffe.py
index d16e457f1..36b01f139 100755
--- a/model-optimizer/mo_caffe.py
+++ b/model-optimizer/mo_caffe.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo_kaldi.py b/model-optimizer/mo_kaldi.py
index 1d64d7db3..b3ff3a6ef 100755
--- a/model-optimizer/mo_kaldi.py
+++ b/model-optimizer/mo_kaldi.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo_mxnet.py b/model-optimizer/mo_mxnet.py
index 5338db908..349594158 100755
--- a/model-optimizer/mo_mxnet.py
+++ b/model-optimizer/mo_mxnet.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo_onnx.py b/model-optimizer/mo_onnx.py
index 87f9c7d15..1fa724dfa 100755
--- a/model-optimizer/mo_onnx.py
+++ b/model-optimizer/mo_onnx.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/mo_tf.py b/model-optimizer/mo_tf.py
index 954d09d88..4763a2a56 100755
--- a/model-optimizer/mo_tf.py
+++ b/model-optimizer/mo_tf.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 """
- Copyright (c) 2018 Intel Corporation
+ Copyright (c) 2018-2019 Intel Corporation
 
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
diff --git a/model-optimizer/requirements.txt b/model-optimizer/requirements.txt
index 7583c33cb..8ee99c872 100644
--- a/model-optimizer/requirements.txt
+++ b/model-optimizer/requirements.txt
@@ -4,3 +4,5 @@ networkx>=1.11
 numpy>=1.12.0
 protobuf==3.6.1
 onnx>=1.1.2
+test-generator==0.1.1
+defusedxml>=0.5.0
diff --git a/model-optimizer/requirements_caffe.txt b/model-optimizer/requirements_caffe.txt
index 2acb120da..eb748925e 100644
--- a/model-optimizer/requirements_caffe.txt
+++ b/model-optimizer/requirements_caffe.txt
@@ -1,3 +1,5 @@
 networkx>=1.11
 numpy>=1.12.0
 protobuf==3.6.1
+test-generator==0.1.1
+defusedxml>=0.5.0
+\ No newline at end of file
diff --git a/model-optimizer/requirements_kaldi.txt b/model-optimizer/requirements_kaldi.txt
index 74772f3bc..24caaf4d2 100644
--- a/model-optimizer/requirements_kaldi.txt
+++ b/model-optimizer/requirements_kaldi.txt
@@ -1,2 +1,4 @@
 networkx>=1.11
 numpy==1.13.0
+test-generator==0.1.1
+defusedxml>=0.5.0
diff --git a/model-optimizer/requirements_mxnet.txt b/model-optimizer/requirements_mxnet.txt
index ae4ec3c32..1e2f5571e 100644
--- a/model-optimizer/requirements_mxnet.txt
+++ b/model-optimizer/requirements_mxnet.txt
@@ -1,3 +1,5 @@
 mxnet>=1.0.0,<=1.3.1
 networkx>=1.11
 numpy>=1.12.0
+test-generator==0.1.1
+defusedxml>=0.5.0
+\ No newline at end of file
diff --git a/model-optimizer/requirements_onnx.txt b/model-optimizer/requirements_onnx.txt
index 05e8d7035..e196da4f0 100644
--- a/model-optimizer/requirements_onnx.txt
+++ b/model-optimizer/requirements_onnx.txt
@@ -1,3 +1,5 @@
 onnx>=1.1.2
 networkx>=1.11
 numpy>=1.12.0
+test-generator==0.1.1
+defusedxml>=0.5.0
+\ No newline at end of file
diff --git a/model-optimizer/requirements_tf.txt b/model-optimizer/requirements_tf.txt
index 2ee57848f..386403093 100644
--- a/model-optimizer/requirements_tf.txt
+++ b/model-optimizer/requirements_tf.txt
@@ -1,3 +1,5 @@
 tensorflow>=1.2.0
 networkx>=1.11
 numpy>=1.12.0
+test-generator==0.1.1
+defusedxml>=0.5.0
+\ No newline at end of file
diff --git a/model-optimizer/tf_call_ie_layer/build.sh b/model-optimizer/tf_call_ie_layer/build.sh
index 3188767c0..6518c319b 100644
--- a/model-optimizer/tf_call_ie_layer/build.sh
+++ b/model-optimizer/tf_call_ie_layer/build.sh
@@ -59,9 +59,9 @@ else
 fi
 
 set -e # exit if something goes wrong
-if [ "x$INTEL_CVSDK_DIR" = "x" ]; then
-    echo "ERROR: INTEL_CVSDK_DIR environment variable is not set"
-    echo "Please, run the 'source <CVSDK_install_dir>/bin/setupvars.sh'"
+if [ "x$INTEL_OPENVINO_DIR" = "x" ]; then
+    echo "ERROR: INTEL_OPENVINO_DIR environment variable is not set"
+    echo "Please, run the 'source <OpenVINO_install_dir>/bin/setupvars.sh'"
     exit 1
 fi
 
@@ -71,7 +71,7 @@ if [ "x$TF_ROOT_DIR" == 'x' ]; then
     exit 1
 fi
 
-IE_HEADERS_SRC_DIR=$INTEL_CVSDK_DIR/inference_engine/include
+IE_HEADERS_SRC_DIR=$INTEL_OPENVINO_DIR/inference_engine/include
 if [ ! -e $IE_HEADERS_SRC_DIR ]; then
     echo "ERROR: Inference Engine headers files '$IE_HEADERS_SRC_DIR' doesn't exist"
     exit 1
diff --git a/model-optimizer/version.txt b/model-optimizer/version.txt
deleted file mode 100644
index c70046573..000000000
--- a/model-optimizer/version.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-06:46PM December 13, 2018
-1.5.12.49d067a0
-49d067a07dedf8e95920e9649e890a76451ca648
diff --git a/tools/README.md b/tools/README.md
new file mode 100644
index 000000000..e618415a8
--- /dev/null
+++ b/tools/README.md
@@ -0,0 +1,69 @@
+# OpenVINO™ Python* openvino.tools package
+
+## General
+`openvino.tools` package includes:
+* openvino.tools.accuracy_checker
+* openvino.tools.benchmark
+* openvino.tools.calibration
+* openvino.tools.utils
+
+Please, refer to https://docs.openvinotoolkit.org for details.
+
+## Installation
+Choose neccessary Python\* version and define `PYTHONPATH` environment variable.
+
+### Prerequisites
+
+Install prerequisites first:
+
+#### 1. Python
+
+**openvino.tools** is **Python 3** library. Install it first:
+
+- [Python3][python3]
+- [setuptools][setuptools]
+
+```bash
+sudo apt-get install python3 python3-dev python3-setuptools python3-pip
+```
+
+Python setuptools and python package manager (pip) install packages into system directory by default. There are several options:
+
+- work inside [virtual environment][virtualenv] (best solution).
+- use `--user` option for all `pip` commands.
+- install all dependencies with *sudo* permissions.
+
+In order to use virtual environment you should install it:
+
+```bash
+python3 -m pip install virtualenv
+python3 -m virtualenv -p `which python3` <directory_for_environment>
+```
+
+Before starting to work inside virtual environment, it should be activated:
+
+```bash
+source <directory_for_environment>/bin/activate
+```
+
+Virtual environment can be deactivated using command
+
+```bash
+deactivate
+```
+
+#### 2. Install package prerequisites
+
+The next step is installing package prerequisites.
+
+```bash
+python3 -m pip install -r accuracy_checker/requirements.txt benchmark/requirements.txt calibration/requirements.txt
+```
+
+### Configuration
+
+Each subpackage has specific configuration. Please, refer to specific subpackage documentation for details.
+
+[python3]: https://www.python.org/downloads/
+[setuptools]: https://pypi.python.org/pypi/setuptools
+
diff --git a/tools/__init__.py b/tools/__init__.py
new file mode 100644
index 000000000..e8cc80ed0
--- /dev/null
+++ b/tools/__init__.py
@@ -0,0 +1,17 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+__version__ = "0.0.1"
diff --git a/tools/accuracy_checker/.pylintrc b/tools/accuracy_checker/.pylintrc
new file mode 100644
index 000000000..7c903acc1
--- /dev/null
+++ b/tools/accuracy_checker/.pylintrc
@@ -0,0 +1,31 @@
+[MASTER]
+disable = C0103,
+          C0111,
+          too-many-locals,
+          too-many-arguments,
+          unused-argument,
+          too-many-instance-attributes,
+          too-few-public-methods,
+          unsubscriptable-object,
+          unbalanced-tuple-unpacking,
+          arguments-differ,
+          E1101,
+          E1111,
+          C0204,
+          W0201,
+          W0107,
+          R0401
+
+max-line-length = 120
+ignore-docstrings = yes
+extension-pkg-whitelist=inference_engine,cv2,numpy
+ignored-modules = numpy,cv2,openvino.inference_engine,caffe
+load-plugins = pylint_checkers
+ignored-classes = pathlib.PurePath
+jobs=0
+
+[SIMILARITIES]
+ignore-imports = yes
+
+[BASIC]
+bad-functions=print,as_posix,absolute
diff --git a/tools/accuracy_checker/README.md b/tools/accuracy_checker/README.md
new file mode 100644
index 000000000..ceee15360
--- /dev/null
+++ b/tools/accuracy_checker/README.md
@@ -0,0 +1,60 @@
+# Deep Learning accuracy validation framework
+
+## Installation
+
+### Prerequisites
+
+Install prerequisites first:
+
+#### 1. Python
+
+**accuracy checker** uses **Python 3**. Install it first:
+
+- [Python3][python3], [setuptools][setuptools]:
+
+```bash
+sudo apt-get install python3 python3-dev python3-setuptools python3-pip
+```
+
+Python setuptools and python package manager (pip) install packages into system directory by default. Installation of accuracy checker tested only via [virtual environment][virtualenv].
+
+In order to use virtual environment you should install it first:
+
+```bash
+python3 -m pip install virtualenv
+python3 -m virtualenv -p `which python3` <directory_for_environment>
+```
+
+Before starting to work inside virtual environment, it should be activated:
+
+```bash
+source <directory_for_environment>/bin/activate
+```
+
+Virtual environment can be deactivated using command
+
+```bash
+deactivate
+```
+
+#### 2. Frameworks
+
+The next step is installing backend frameworks for Accuracy Checker.
+
+In order to evaluate some models required frameworks have to be installed. Accuracy-Checker supports these frameworks:
+
+- [OpenVINO][openvino-get-started].
+- [Caffe][caffe-get-started].
+
+You can use any of them or several at a time.
+
+#### 3. Requirements installation
+```bash
+pip3 install -r requirements.txt
+
+[python3]: https://www.python.org/downloads/
+[setuptools]: https://pypi.python.org/pypi/setuptools
+[caffe-get-started]: accuracy_checker/launcher/caffe_installation_readme.md
+[virtual-environment]: https://docs.python.org/3/tutorial/venv.html
+[virtualenv]: https://virtualenv.pypa.io/en/stable
+[openvino-get-started]: https://software.intel.com/en-us/openvino-toolkit/documentation/get-started
+\ No newline at end of file
diff --git a/tools/accuracy_checker/__init__.py b/tools/accuracy_checker/__init__.py
new file mode 100644
index 000000000..e4f37bf7e
--- /dev/null
+++ b/tools/accuracy_checker/__init__.py
@@ -0,0 +1,39 @@
+from .accuracy_checker import (
+    annotation_converters,
+    adapters,
+    config,
+    data_readers,
+    launcher,
+    metrics,
+    postprocessor,
+    preprocessor,
+    representation,
+    dataset,
+    dependency,
+    logging,
+    main,
+    model_evaluator,
+    presenters,
+    progress_reporters,
+    utils
+)
+
+__all__ = [
+    'annotation_converters',
+    'adapters',
+    'config',
+    'data_readers',
+    'launcher',
+    'metrics',
+    'postprocessor',
+    'preprocessor',
+    'representation',
+    'dataset',
+    'dependency',
+    'logging',
+    'main',
+    'model_evaluator',
+    'presenters',
+    'progress_reporters',
+    'utils'
+]
diff --git a/tools/accuracy_checker/accuracy_checker/__init__.py b/tools/accuracy_checker/accuracy_checker/__init__.py
new file mode 100644
index 000000000..ede291724
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/__init__.py
@@ -0,0 +1,17 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+__version__ = "0.6.8"
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/README.md b/tools/accuracy_checker/accuracy_checker/adapters/README.md
new file mode 100644
index 000000000..40cec313b
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/README.md
@@ -0,0 +1,73 @@
+# Adapters
+
+Adapter is a function for conversion network infer output to metric specific format.
+You can use 2 ways to set adapter for topology:
+* Define adapter as a string.
+
+```yml
+adapter: classification
+```
+
+* Define adapter as a dictionary, using `type:` for setting adapter name. This approach gives opportunity to set additional parameters for adapter if it is required.
+
+```yml
+adapter:
+  type: reid
+  grn_workaround: False
+```
+
+AccuracyChecker supports following set of adapters:
+* `classification` - converting output of classification model to `ClassificationPrediction` representation.
+* `segmentation` - converting output of semantic segmentation model to `SeegmentationPrediction` representation.
+* `tiny_yolo_v1` - converting output of Tiny YOLO v1 model to `DetectionPrediction` representation.
+* `reid` - converting output of reidentification model to `ReIdentificationPrediction` representation.
+  * `grn_workaround` - enabling processing output with adding Global Region Normalization layer.
+* `yolo_v2` - converting output of YOLO v2 family models to `DetectionPrediction` representation.
+  * `classes` - number of detection classes (default 20).
+  * `anchors` - anchor values provided as comma-separated list or one of precomputed: `yolo_v2` and `tiny_yolo_v2`.
+  * `coords` - number of bbox coordinates (default 4).
+  * `num` - num parameter from DarkNet configuration file (default 5).
+* `yolo_v3` - converting output of YOLO v3 family models to `DetectionPrediction` representation.
+  * `classes` - number of detection classes (default 80).
+  * `anchors` - anchor values provided as comma-separited list or precomputed: `yolo_v3`.
+  * `coords` - number of bbox coordinates (default 4).
+  * `num` - num parameter from DarkNet configuration file (default 3).
+  * `threshold` - minimal objectness score value for valid detections (default 0.001).
+  * `input_width` and `input_height` - network input width and height correspondingly (default 416).
+  * `outputs` - the list of output layers names (optional), if specified there should be exactly 3 output layers provided.
+* `lpr` - converting output of license plate recognition model to `CharacterRecognitionPrediction` representation.
+* `ssd` - converting  output of SSD model to `DetectionPrediction` representation.
+* `face_person_detection` - converting face person detection model output with 2 detection outputs to `ContainerPredition`, where value of parameters `face_out`and `person_out` are used for identification `DetectionPrediction` in container. 
+  * `face_out` -  face detection output layer name.
+  * `person_out` - person detection output layer name.
+* `attributes_recognition`  - converting vehicle attributes recognition model output to `ContainerPrediction` where value of parameters `color_out`and `type_out` are used for identification `ClassificationPrediction` in container. 
+  * `color_out` - vehicle color attribute output layer name.
+  * `type_out`- vehicle type attribute output layer name.
+* `head_pose` - converting head pose estimation model output to `ContainerPrediction` where names of parameters `angle_pitch`, `angle_yaw` and `angle_roll` are used for identification `RegressionPrediction` in container. 
+  * `angle_pitch` - output layer name for pitch angle.
+  * `angle_yaw`- output layer name for yaw angle.
+  * `angle_roll` - output layer name for roll angle.
+* `age_gender` - converting age gender recognition model output to `ContainerPrediction` with `ClassificationPrediction` named `gender` for gender recognition, `ClassificationPrediction` named `age_classification` and `RegressionPrediction` named `age_error` for age recognition.
+  * `age_out` - output layer name for age recognition.
+  * `gender_out` - output layer name for gender recognition.
+* `action_detection` - converting output of model for person detection and action recognition tasks to `ContainerPrediction` with `DetectionPrdiction` for class agnostic metric calculation and `DetectionPrediction` for action recognition. The representations in container have names `class_agnostic_prediction` and `action_prediction` respectively.
+  * `priorbox_out` - name of layer containing prior boxes in SSD format.
+  * `loc_out` - name of layer containing box coordinates in SSD format.
+  * `main_conf_out` - name of layer containing detection confidences.
+  * `add_conf_out_prefix` - prefix for generation name of layers containing action confidences if topology has several following layers or layer name.
+  * `add_conf_out_count` - number of layers with action confidences (optional, you can not provide this argument if action confidences contained in one layer).
+  * `num_action_classes` - number classes for action recognition.
+  * `detection_threshold` - minimal detection confidences level for valid detections.
+* `super_resolution` - converting output of single image super resolution network to `SuperResolutionPrediction`.
+* `landmarks_regression` - converting output of model for landmarks regression to `FacialLandmarksPrediction`.
+* `text_detection` - converting output of model for text detection to `TextDetectionPrediction`.
+  * `pixel_class_out` - name of layer containing information related to text/no-text classification for each pixel.
+  * `pixel_link_out` - name of layer containing information related to linkage between pixels and their neighbors.
+* `human_pose_estimation` - converting output of model for human pose estimation to `PoseEstimationPrediction`.
+  * `part_affinity_fields_out` - name of output layer with keypoints pairwise relations (part affinity fields).
+  * `keypoints_heatmap_out` - name of output layer with keypoints heatmaps.
+* `beam_search_decoder` - realization CTC Beam Search decoder for symbol sequence recognition, converting model output to `CharacterRecognitionPrediction`.
+  * `beam_size` -  size of the beam to use during decoding (default 10).
+  * `blank_label` - index of the CTC blank label.
+  * `softmaxed_probabilities` - indicator that model uses softmax for output layer (default False).
+* `gaze_estimation` - converting output of gaze estimation model to `GazeVectorPrediction`.
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/__init__.py b/tools/accuracy_checker/accuracy_checker/adapters/__init__.py
new file mode 100644
index 000000000..d52b1622f
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/__init__.py
@@ -0,0 +1,79 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .adapter import Adapter, AdapterField
+
+from .action_recognition import ActionDetection
+from .text_detection import TextDetectionAdapter, LPRAdapter, BeamSearchDecoder
+from .image_processing import SuperResolutionAdapter
+from .attributes_recognition import (
+    HeadPoseEstimatorAdapter,
+    VehicleAttributesRecognitionAdapter,
+    PersonAttributesAdapter,
+    AgeGenderAdapter,
+    LandmarksRegressionAdapter,
+    GazeEstimationAdapter
+)
+
+from .reidentification import ReidAdapter
+from .detection import TinyYOLOv1Adapter, SSDAdapter, FacePersonAdapter, YoloV2Adapter, YoloV3Adapter
+from .classification import ClassificationAdapter
+from .segmentation import SegmentationAdapter, BrainTumorSegmentationAdapter
+from .pose_estimation import HumanPoseAdapter
+
+from .dummy_adapters import XML2DetectionAdapter
+
+from .hit_ratio import HitRatioAdapter
+
+__all__ = [
+    'Adapter',
+    'AdapterField',
+
+    'XML2DetectionAdapter',
+
+    'ClassificationAdapter',
+
+    'SSDAdapter',
+    'TinyYOLOv1Adapter',
+    'YoloV2Adapter',
+    'YoloV3Adapter',
+    'FacePersonAdapter',
+
+    'SegmentationAdapter',
+    'BrainTumorSegmentationAdapter',
+
+    'ReidAdapter',
+
+    'SuperResolutionAdapter',
+
+    'HeadPoseEstimatorAdapter',
+    'VehicleAttributesRecognitionAdapter',
+    'PersonAttributesAdapter',
+    'AgeGenderAdapter',
+    'LandmarksRegressionAdapter',
+    'GazeEstimationAdapter',
+
+    'TextDetectionAdapter',
+
+    'BeamSearchDecoder',
+    'LPRAdapter',
+
+    'HumanPoseAdapter',
+
+    'ActionDetection',
+
+    'HitRatioAdapter'
+]
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/action_recognition.py b/tools/accuracy_checker/accuracy_checker/adapters/action_recognition.py
new file mode 100644
index 000000000..113eb9d6d
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/action_recognition.py
@@ -0,0 +1,119 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..adapters import Adapter
+from ..config import ConfigValidator, StringField, NumberField
+from ..representation import DetectionPrediction, ContainerPrediction
+
+
+class ActionDetectorConfig(ConfigValidator):
+    type = StringField()
+    priorbox_out = StringField()
+    loc_out = StringField()
+    main_conf_out = StringField()
+    add_conf_out_prefix = StringField()
+    add_conf_out_count = NumberField(optional=True, min_value=1)
+    num_action_classes = NumberField()
+    detection_threshold = NumberField(optional=True, floats=True, min_value=0, max_value=1)
+
+
+class ActionDetection(Adapter):
+    __provider__ = 'action_detection'
+
+    def validate_config(self):
+        action_detector_adapter_config = ActionDetectorConfig('ActionDetector_Config')
+        action_detector_adapter_config.validate(self.launcher_config)
+
+    def configure(self):
+        self.priorbox_out = self.launcher_config['priorbox_out']
+        self.loc_out = self.launcher_config['loc_out']
+        self.main_conf_out = self.launcher_config['main_conf_out']
+        self.num_action_classes = self.launcher_config['num_action_classes']
+        self.detection_threshold = self.launcher_config.get('detection_threshold', 0)
+        add_conf_out_count = self.launcher_config.get('add_conf_out_count')
+        add_conf_out_prefix = self.launcher_config['add_conf_out_prefix']
+        if add_conf_out_count is None:
+            self.add_conf_outs = [add_conf_out_prefix]
+        else:
+            self.add_conf_outs = []
+            for num in np.arange(start=1, stop=add_conf_out_count + 1):
+                self.add_conf_outs.append('{}{}'.format(add_conf_out_prefix, num))
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        result = []
+        raw_outputs = self._extract_predictions(raw, frame_meta)
+        prior_boxes = raw_outputs[self.priorbox_out][0][0].reshape(-1, 4)
+        prior_variances = raw_outputs[self.priorbox_out][0][1].reshape(-1, 4)
+        for batch_id, identifier in enumerate(identifiers):
+            labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores = self.prepare_detection_for_id(
+                batch_id, raw_outputs, prior_boxes, prior_variances
+            )
+            action_prediction = DetectionPrediction(identifier, labels, class_scores, x_mins, y_mins, x_maxs, y_maxs)
+            person_prediction = DetectionPrediction(
+                identifier, [1] * len(labels), main_scores, x_mins, y_mins, x_maxs, y_maxs
+            )
+            result.append(ContainerPrediction({
+                'action_prediction': action_prediction, 'class_agnostic_prediction': person_prediction
+            }))
+
+        return result
+
+    def prepare_detection_for_id(self, batch_id, raw_outputs, prior_boxes, prior_variances):
+        num_detections = raw_outputs[self.loc_out][batch_id].size // 4
+        locs = raw_outputs[self.loc_out][batch_id].reshape(-1, 4)
+        main_conf = raw_outputs[self.main_conf_out][batch_id].reshape(num_detections, -1)
+        add_confs = list(map(
+            lambda layer: raw_outputs[layer][batch_id].reshape(-1, self.num_action_classes), self.add_conf_outs
+        ))
+        anchors_num = len(add_confs)
+        labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores = [], [], [], [], [], [], []
+        for index in range(num_detections):
+            if main_conf[index, 1] < self.detection_threshold:
+                continue
+
+            x_min, y_min, x_max, y_max = self.decode_box(prior_boxes[index], prior_variances[index], locs[index])
+            action_confs = add_confs[index % anchors_num][index // anchors_num]
+            action_label = np.argmax(action_confs)
+            labels.append(action_label)
+            class_scores.append(action_confs[action_label])
+            x_mins.append(x_min)
+            y_mins.append(y_min)
+            x_maxs.append(x_max)
+            y_maxs.append(y_max)
+            main_scores.append(main_conf[index, 1])
+
+        return labels, class_scores, x_mins, y_mins, x_maxs, y_maxs, main_scores
+
+    @staticmethod
+    def decode_box(prior, var, deltas):
+        prior_width = prior[2] - prior[0]
+        prior_height = prior[3] - prior[1]
+        prior_center_x = (prior[0] + prior[2]) / 2
+        prior_center_y = (prior[1] + prior[3]) / 2
+
+        decoded_box_center_x = var[0] * deltas[0] * prior_width + prior_center_x
+        decoded_box_center_y = var[1] * deltas[1] * prior_height + prior_center_y
+        decoded_box_width = np.exp(var[2] * deltas[2]) * prior_width
+        decoded_box_height = np.exp(var[3] * deltas[3]) * prior_height
+
+        decoded_xmin = decoded_box_center_x - decoded_box_width / 2
+        decoded_ymin = decoded_box_center_y - decoded_box_height / 2
+        decoded_xmax = decoded_box_center_x + decoded_box_width / 2
+        decoded_ymax = decoded_box_center_y + decoded_box_height / 2
+
+        return decoded_xmin, decoded_ymin, decoded_xmax, decoded_ymax
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/adapter.py b/tools/accuracy_checker/accuracy_checker/adapters/adapter.py
new file mode 100644
index 000000000..2358dccb8
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/adapter.py
@@ -0,0 +1,71 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..config import BaseField, ConfigValidator, StringField
+from ..dependency import ClassProvider
+
+
+class Adapter(ClassProvider):
+    """
+    Interface that describes converting raw output to appropriate representation.
+    """
+
+    __provider_type__ = 'adapter'
+
+    def __init__(self, launcher_config, label_map=None, output_blob=None):
+        self.launcher_config = launcher_config
+        self.output_blob = output_blob
+        self.label_map = label_map
+
+        self.validate_config()
+        self.configure()
+
+    def __call__(self, *args, **kwargs):
+        return self.process(*args, **kwargs)
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        raise NotImplementedError
+
+    def configure(self):
+        pass
+
+    def validate_config(self):
+        pass
+
+    @staticmethod
+    def _extract_predictions(outputs_list, meta):
+        return outputs_list[0]
+
+
+class AdapterField(BaseField):
+    def validate(self, entry, field_uri_=None):
+        super().validate(entry, field_uri_)
+
+        if entry is None:
+            return
+
+        field_uri_ = field_uri_ or self.field_uri
+        if isinstance(entry, str):
+            StringField(choices=Adapter.providers).validate(entry, 'adapter')
+        elif isinstance(entry, dict):
+            class DictAdapterValidator(ConfigValidator):
+                type = StringField(choices=Adapter.providers)
+            dict_adapter_validator = DictAdapterValidator(
+                'adapter', on_extra_argument=DictAdapterValidator.IGNORE_ON_EXTRA_ARGUMENT
+            )
+            dict_adapter_validator.validate(entry)
+        else:
+            self.raise_error(entry, field_uri_, 'adapter must be either string or dictionary')
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/attributes_recognition.py b/tools/accuracy_checker/accuracy_checker/adapters/attributes_recognition.py
new file mode 100644
index 000000000..b43040df9
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/attributes_recognition.py
@@ -0,0 +1,210 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..adapters import Adapter
+from ..config import ConfigValidator, StringField
+from ..representation import (
+    ContainerPrediction,
+    RegressionPrediction,
+    ClassificationPrediction,
+    FacialLandmarksPrediction,
+    MultiLabelRecognitionPrediction,
+    GazeVectorPrediction
+)
+
+
+class HeadPoseEstimatorAdapterConfig(ConfigValidator):
+    type = StringField()
+    angle_yaw = StringField()
+    angle_pitch = StringField()
+    angle_roll = StringField()
+
+
+class HeadPoseEstimatorAdapter(Adapter):
+    """
+    Class for converting output of HeadPoseEstimator to HeadPosePrediction representation
+    """
+    __provider__ = 'head_pose'
+
+    def validate_config(self):
+        head_pose_estimator_adapter_config = HeadPoseEstimatorAdapterConfig(
+            'HeadPoseEstimator_Config', on_extra_argument=HeadPoseEstimatorAdapterConfig.ERROR_ON_EXTRA_ARGUMENT)
+        head_pose_estimator_adapter_config.validate(self.launcher_config)
+
+    def configure(self):
+        """
+        Specifies parameters of config entry
+        """
+        self.angle_yaw = self.launcher_config['angle_yaw']
+        self.angle_pitch = self.launcher_config['angle_pitch']
+        self.angle_roll = self.launcher_config['angle_roll']
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        """
+        Args:
+            identifiers: list of input data identifiers
+            raw: output of model
+            frame_meta: list of meta information about each frame
+        Returns:
+                list of ContainerPrediction objects
+        """
+        result = []
+        raw_output = self._extract_predictions(raw, frame_meta)
+        for identifier, yaw, pitch, roll in zip(
+                identifiers,
+                raw_output[self.angle_yaw],
+                raw_output[self.angle_pitch],
+                raw_output[self.angle_roll]
+        ):
+            prediction = ContainerPrediction({'angle_yaw': RegressionPrediction(identifier, yaw[0]),
+                                              'angle_pitch': RegressionPrediction(identifier, pitch[0]),
+                                              'angle_roll': RegressionPrediction(identifier, roll[0])})
+            result.append(prediction)
+
+        return result
+
+
+class VehicleAttributesRecognitionAdapterConfig(ConfigValidator):
+    type = StringField()
+    color_out = StringField()
+    type_out = StringField()
+
+
+class VehicleAttributesRecognitionAdapter(Adapter):
+    __provider__ = 'vehicle_attributes'
+
+    def validate_config(self):
+        attributes_recognition_adapter_config = VehicleAttributesRecognitionAdapterConfig(
+            'VehicleAttributesRecognition_Config',
+            on_extra_argument=VehicleAttributesRecognitionAdapterConfig.ERROR_ON_EXTRA_ARGUMENT)
+        attributes_recognition_adapter_config.validate(self.launcher_config)
+
+    def configure(self):
+        """
+        Specifies parameters of config entry
+        """
+        self.color_out = self.launcher_config['color_out']
+        self.type_out = self.launcher_config['type_out']
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        res = []
+        raw_output = self._extract_predictions(raw, frame_meta)
+        for identifier, colors, types in zip(identifiers, raw_output[self.color_out], raw_output[self.type_out]):
+            res.append(ContainerPrediction({'color': ClassificationPrediction(identifier, colors.reshape(-1)),
+                                            'type': ClassificationPrediction(identifier, types.reshape(-1))}))
+        return res
+
+
+class AgeGenderAdapterConfig(ConfigValidator):
+    type = StringField()
+    age_out = StringField()
+    gender_out = StringField()
+
+
+class AgeGenderAdapter(Adapter):
+    __provider__ = 'age_gender'
+
+    def configure(self):
+        self.age_out = self.launcher_config['age_out']
+        self.gender_out = self.launcher_config['gender_out']
+
+    def validate_config(self):
+        age_gender_adapter_config = AgeGenderAdapterConfig(
+            'AgeGender_Config', on_extra_argument=AgeGenderAdapterConfig.ERROR_ON_EXTRA_ARGUMENT)
+        age_gender_adapter_config.validate(self.launcher_config)
+
+    @staticmethod
+    def get_age_scores(age):
+        age_scores = np.zeros(4)
+        if age < 19:
+            age_scores[0] = 1
+            return age_scores
+        if age < 36:
+            age_scores[1] = 1
+            return age_scores
+        if age < 66:
+            age_scores[2] = 1
+            return age_scores
+        age_scores[3] = 1
+        return age_scores
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        result = []
+        raw_output = self._extract_predictions(raw, frame_meta)
+        for identifier, age, gender in zip(identifiers, raw_output[self.age_out], raw_output[self.gender_out]):
+            gender = gender.reshape(-1)
+            age = age.reshape(-1)[0]*100
+            gender_rep = ClassificationPrediction(identifier, gender)
+            age_class_rep = ClassificationPrediction(identifier, self.get_age_scores(age))
+            age_error_rep = RegressionPrediction(identifier, age)
+            result.append(ContainerPrediction({'gender': gender_rep, 'age_classification': age_class_rep,
+                                               'age_error': age_error_rep}))
+        return result
+
+
+class LandmarksRegressionAdapter(Adapter):
+    __provider__ = 'landmarks_regression'
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        res = []
+        raw_output = self._extract_predictions(raw, frame_meta)
+        for identifier, values in zip(identifiers, raw_output[self.output_blob]):
+            x_values, y_values = values[::2], values[1::2]
+            res.append(FacialLandmarksPrediction(identifier, x_values.reshape(-1), y_values.reshape(-1)))
+        return res
+
+
+class PersonAttributesConfig(ConfigValidator):
+    attributes_recognition_out = StringField(optional=True)
+
+
+class PersonAttributesAdapter(Adapter):
+    __provider__ = 'person_attributes'
+
+    def validate_config(self):
+        person_attributes_adapter_config = PersonAttributesConfig(
+            'PersonAttributes_Config',
+            PersonAttributesConfig.IGNORE_ON_EXTRA_ARGUMENT
+        )
+        person_attributes_adapter_config.validate(self.launcher_config)
+
+    def configure(self):
+        self.attributes_recognition_out = self.launcher_config.get('attributes_recognition_out', self.output_blob)
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        result = []
+        raw_output = self._extract_predictions(raw, frame_meta)
+        for identifier, multi_label in zip(identifiers, raw_output[self.attributes_recognition_out or self.output_blob]):
+            multi_label[multi_label > 0.5] = 1.
+            multi_label[multi_label <= 0.5] = 0.
+
+            result.append(MultiLabelRecognitionPrediction(identifier, multi_label.reshape(-1)))
+
+        return result
+
+
+class GazeEstimationAdapter(Adapter):
+    __provider__ = 'gaze_estimation'
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        result = []
+        raw_output = self._extract_predictions(raw, frame_meta)
+        for identifier, output in zip(identifiers, raw_output[self.output_blob]):
+            result.append(GazeVectorPrediction(identifier, output))
+
+        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/classification.py b/tools/accuracy_checker/accuracy_checker/adapters/classification.py
new file mode 100644
index 000000000..ddcf267b6
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/classification.py
@@ -0,0 +1,45 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..adapters import Adapter
+from ..representation import ClassificationPrediction
+
+
+class ClassificationAdapter(Adapter):
+    """
+    Class for converting output of classification model to ClassificationPrediction representation
+    """
+    __provider__ = 'classification'
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        """
+        Args:
+            identifiers: list of input data identifiers
+            raw: output of model
+            frame_meta: list of meta information about each frame
+        Returns:
+            list of ClassificationPrediction objects
+        """
+        prediction = self._extract_predictions(raw, frame_meta)[self.output_blob]
+        prediction = np.reshape(prediction, (prediction.shape[0], -1))
+
+        result = []
+        for identifier, output in zip(identifiers, prediction):
+            result.append(ClassificationPrediction(identifier, output))
+
+        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/detection.py b/tools/accuracy_checker/accuracy_checker/adapters/detection.py
new file mode 100644
index 000000000..4ff13557d
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/detection.py
@@ -0,0 +1,344 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import math
+
+import numpy as np
+
+from ..adapters import Adapter
+from ..config import ConfigValidator, NumberField, StringField, ListField
+from ..representation import DetectionPrediction, ContainerPrediction
+from ..utils import get_or_parse_value
+
+
+class TinyYOLOv1Adapter(Adapter):
+    """
+    Class for converting output of Tiny YOLO v1 model to DetectionPrediction representation
+    """
+    __provider__ = 'tiny_yolo_v1'
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        """
+        Args:
+            identifiers: list of input data identifiers
+            raw: output of model
+        Returns:
+             list of DetectionPrediction objects
+        """
+        prediction = self._extract_predictions(raw, frame_meta)[self.output_blob]
+
+        PROBABILITY_SIZE = 980
+        CONFIDENCE_SIZE = 98
+        BOXES_SIZE = 392
+
+        CELLS_X, CELLS_Y = 7, 7
+        CLASSES = 20
+        OBJECTS_PER_CELL = 2
+
+        result = []
+        for identifier, output in zip(identifiers, prediction):
+            assert PROBABILITY_SIZE + CONFIDENCE_SIZE + BOXES_SIZE == output.shape[0]
+
+            probability, scale, boxes = np.split(output, [PROBABILITY_SIZE, PROBABILITY_SIZE + CONFIDENCE_SIZE])
+
+            probability = np.reshape(probability, (CELLS_Y, CELLS_X, CLASSES))
+            scale = np.reshape(scale, (CELLS_Y, CELLS_X, OBJECTS_PER_CELL))
+            boxes = np.reshape(boxes, (CELLS_Y, CELLS_X, OBJECTS_PER_CELL, 4))
+
+            confidence = np.zeros((CELLS_Y, CELLS_X, OBJECTS_PER_CELL, CLASSES + 4))
+            for cls in range(CLASSES):
+                confidence[:, :, 0, cls] = np.multiply(probability[:, :, cls], scale[:, :, 0])
+                confidence[:, :, 1, cls] = np.multiply(probability[:, :, cls], scale[:, :, 1])
+
+            labels, scores, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], [], []
+            for i, j, k in np.ndindex((CELLS_X, CELLS_Y, OBJECTS_PER_CELL)):
+                box = boxes[j, i, k]
+                box = [(box[0] + i) / float(CELLS_X), (box[1] + j) / float(CELLS_Y), box[2] ** 2, box[3] ** 2]
+
+                label = np.argmax(confidence[j, i, k, :CLASSES])
+                score = confidence[j, i, k, label]
+
+                labels.append(label)
+                scores.append(score)
+                x_mins.append(box[0] - box[2] / 2.0)
+                y_mins.append(box[1] - box[3] / 2.0)
+                x_maxs.append(box[0] + box[2] / 2.0)
+                y_maxs.append(box[1] + box[3] / 2.0)
+
+            result.append(DetectionPrediction(identifier, labels, scores, x_mins, y_mins, x_maxs, y_maxs))
+
+        return result
+
+
+PRECOMPUTED_ANCHORS = {
+    'yolo_v2': [1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071],
+    'tiny_yolo_v2': [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52],
+    'yolo_v3': [
+        10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0, 119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0
+    ],
+    'tiny_yolo_v3': [10.0, 14.0, 23.0, 27.0, 37.0, 58.0, 81.0, 82.0, 135.0, 169.0, 344.0, 319.0]
+}
+
+
+def entry_index(w, h, n_coords, n_classes, pos, entry):
+    row = pos // (w * h)
+    col = pos % (w * h)
+    return row * w * h * (n_classes + n_coords + 1) + entry * w * h + col
+
+
+class BaseYoloAdapterConfig(ConfigValidator):
+    classes = NumberField(floats=False, optional=True, min_value=1)
+    coords = NumberField(floats=False, optional=True, min_value=1)
+    num = NumberField(floats=False, optional=True, min_value=1)
+    anchors = StringField(optional=True)
+
+
+class YoloV2Adapter(Adapter):
+    """
+    Class for converting output of YOLO v2 family models to DetectionPrediction representation
+    """
+    __provider__ = 'yolo_v2'
+
+    def validate_config(self):
+        yolo_v2_adapter_config = BaseYoloAdapterConfig('BaseYoloAdapter_Config')
+        yolo_v2_adapter_config.validate(self.launcher_config)
+
+    def configure(self):
+        self.classes = self.launcher_config.get('classes', 20)
+        self.coords = self.launcher_config.get('coords', 4)
+        self.num = self.launcher_config.get('num', 5)
+        self.anchors = get_or_parse_value(self.launcher_config.get('anchors', 'yolo_v2'), PRECOMPUTED_ANCHORS)
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        """
+        Args:
+            identifiers: list of input data identifiers
+            raw: output of model
+        Returns:
+            list of DetectionPrediction objects
+        """
+        predictions = self._extract_predictions(raw, frame_meta)[self.output_blob]
+
+        cells_x, cells_y = 13, 13
+
+        result = []
+        for identifier, prediction in zip(identifiers, predictions):
+            labels, scores, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], [], []
+            for y, x, n in np.ndindex((cells_y, cells_x, self.num)):
+                index = n * cells_y * cells_x + y * cells_x + x
+
+                box_index = entry_index(cells_x, cells_y, self.coords, self.classes, index, 0)
+                obj_index = entry_index(cells_x, cells_y, self.coords, self.classes, index, self.coords)
+
+                scale = prediction[obj_index]
+
+                box = [
+                    (x + prediction[box_index + 0 * (cells_y * cells_x)]) / cells_x,
+                    (y + prediction[box_index + 1 * (cells_y * cells_x)]) / cells_y,
+                    np.exp(prediction[box_index + 2 * (cells_y * cells_x)]) * self.anchors[2 * n + 0] / cells_x,
+                    np.exp(prediction[box_index + 3 * (cells_y * cells_x)]) * self.anchors[2 * n + 1] / cells_y
+                ]
+
+                classes_prob = np.empty(self.classes)
+                for cls in range(self.classes):
+                    cls_index = entry_index(cells_x, cells_y, self.coords, self.classes, index, self.coords + 1 + cls)
+                    classes_prob[cls] = prediction[cls_index]
+
+                classes_prob = classes_prob * scale
+
+                label = np.argmax(classes_prob)
+
+                labels.append(label)
+                scores.append(classes_prob[label])
+                x_mins.append(box[0] - box[2] / 2.0)
+                y_mins.append(box[1] - box[3] / 2.0)
+                x_maxs.append(box[0] + box[2] / 2.0)
+                y_maxs.append(box[1] + box[3] / 2.0)
+
+            result.append(DetectionPrediction(identifier, labels, scores, x_mins, y_mins, x_maxs, y_maxs))
+
+        return result
+
+
+class YoloV3AdapterConfig(BaseYoloAdapterConfig):
+    threshold = NumberField(floats=True, optional=True, min_value=0)
+    outputs = ListField(optional=True)
+
+
+class YoloV3Adapter(Adapter):
+    """
+    Class for converting output of YOLO v3 family models to DetectionPrediction representation
+    """
+    __provider__ = 'yolo_v3'
+
+    def validate_config(self):
+        yolo_v3_adapter_config = YoloV3AdapterConfig('YoloV3Adapter_Config')
+        yolo_v3_adapter_config.validate(self.launcher_config)
+
+    def configure(self):
+        self.classes = self.launcher_config.get('classes', 80)
+        self.coords = self.launcher_config.get('coords', 4)
+        self.num = self.launcher_config.get('num', 3)
+        self.anchors = get_or_parse_value(self.launcher_config.get('anchors', 'yolo_v3'), PRECOMPUTED_ANCHORS)
+        self.threshold = self.launcher_config.get('threshold', 0.001)
+        self.outputs = self.launcher_config.get('outputs', [])
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        """
+        Args:
+            identifiers: list of input data identifiers
+            raw: output of model
+        Returns:
+            list of DetectionPrediction objects
+        """
+
+        def get_anchors_offset(x):
+            return int((self.num * 2) * (len(self.anchors) / (self.num * 2) - 1 - math.log2(x / 13)))
+
+        def parse_yolo_v3_results(prediction, threshold, w, h, det):
+            cells_x, cells_y = prediction.shape[1:]
+            prediction = prediction.flatten()
+            for y, x, n in np.ndindex((cells_y, cells_x, self.num)):
+                index = n * cells_y * cells_x + y * cells_x + x
+                anchors_offset = get_anchors_offset(cells_x)
+
+                box_index = entry_index(cells_x, cells_y, self.coords, self.classes, index, 0)
+                obj_index = entry_index(cells_x, cells_y, self.coords, self.classes, index, self.coords)
+
+                scale = prediction[obj_index]
+                if scale < threshold:
+                    continue
+
+                box = [
+                    (x + prediction[box_index + 0 * (cells_y * cells_x)]) / cells_x,
+                    (y + prediction[box_index + 1 * (cells_y * cells_x)]) / cells_y,
+                    np.exp(prediction[box_index + 2 * (cells_y * cells_x)]) * self.anchors[
+                        anchors_offset + 2 * n + 0] / w,
+                    np.exp(prediction[box_index + 3 * (cells_y * cells_x)]) * self.anchors[
+                        anchors_offset + 2 * n + 1] / h
+                ]
+
+                classes_prob = np.empty(self.classes)
+                for cls in range(self.classes):
+                    cls_index = entry_index(cells_x, cells_y, self.coords, self.classes, index,
+                                            self.coords + 1 + cls)
+                    classes_prob[cls] = prediction[cls_index] * scale
+
+                    det['labels'].append(cls)
+                    det['scores'].append(classes_prob[cls])
+                    det['x_mins'].append(box[0] - box[2] / 2.0)
+                    det['y_mins'].append(box[1] - box[3] / 2.0)
+                    det['x_maxs'].append(box[0] + box[2] / 2.0)
+                    det['y_maxs'].append(box[1] + box[3] / 2.0)
+
+            return det
+
+        result = []
+
+        raw_outputs = self._extract_predictions(raw, frame_meta)
+
+        if self.outputs:
+            outputs = self.outputs
+        else:
+            outputs = raw_outputs.keys()
+
+        batch = len(identifiers)
+        predictions = [[] for _ in range(batch)]
+        for blob in outputs:
+            for b in range(batch):
+                predictions[b].append(raw[blob][b])
+
+        for identifier, prediction, meta in zip(identifiers, predictions, frame_meta):
+            detections = {'labels': [], 'scores': [], 'x_mins': [], 'y_mins': [], 'x_maxs': [], 'y_maxs': []}
+            input_shape = list(meta.get('input_shape', {'data': (3, 416, 416)}).values())[0]
+            self.input_width = input_shape[2]
+            self.input_height = input_shape[1]
+
+            for p in prediction:
+                parse_yolo_v3_results(p, self.threshold, self.input_width, self.input_height, detections)
+
+            result.append(DetectionPrediction(
+                identifier, detections['labels'], detections['scores'], detections['x_mins'], detections['y_mins'],
+                detections['x_maxs'], detections['y_maxs']
+            ))
+
+        return result
+
+
+class SSDAdapter(Adapter):
+    """
+    Class for converting output of SSD model to DetectionPrediction representation
+    """
+    __provider__ = 'ssd'
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        """
+        Args:
+            identifiers: list of input data identifiers
+            raw: output of model
+        Returns:
+            list of DetectionPrediction objects
+        """
+        raw_outputs = self._extract_predictions(raw, frame_meta)
+        prediction_batch = raw_outputs[self.output_blob]
+        prediction_count = prediction_batch.shape[2]
+        prediction_batch = prediction_batch.reshape(prediction_count, -1)
+        prediction_batch = self.remove_empty_detections(prediction_batch)
+
+        result = []
+        for batch_index, identifier in enumerate(identifiers):
+            prediction_mask = np.where(prediction_batch[:, 0] == batch_index)
+            detections = prediction_batch[prediction_mask]
+            detections = detections[:, 1::]
+            result.append(DetectionPrediction(identifier, *zip(*detections)))
+
+        return result
+
+    @staticmethod
+    def remove_empty_detections(prediction_blob):
+        ind = prediction_blob[:, 0]
+        ind_ = np.where(ind == -1)[0]
+        m = ind_[0] if ind_.size else prediction_blob.shape[0]
+        return prediction_blob[:m, :]
+
+
+class FacePersonDetectionAdapterConfig(ConfigValidator):
+    type = StringField()
+    face_out = StringField()
+    person_out = StringField()
+
+
+class FacePersonAdapter(Adapter):
+    __provider__ = 'face_person_detection'
+
+    def validate_config(self):
+        face_person_detection_adapter_config = FacePersonDetectionAdapterConfig(
+            'FacePersonDetection_Config', on_extra_argument=FacePersonDetectionAdapterConfig.ERROR_ON_EXTRA_ARGUMENT)
+        face_person_detection_adapter_config.validate(self.launcher_config)
+
+    def configure(self):
+        self.face_detection_out = self.launcher_config['face_out']
+        self.person_detection_out = self.launcher_config['person_out']
+        self.face_adapter = SSDAdapter(self.launcher_config, self.label_map, self.face_detection_out)
+        self.person_adapter = SSDAdapter(self.launcher_config, self.label_map, self.person_detection_out)
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        face_batch_result = self.face_adapter(raw, identifiers)
+        person_batch_result = self.person_adapter(raw, identifiers)
+        result = [ContainerPrediction({self.face_detection_out: face_result, self.person_detection_out: person_result})
+                  for face_result, person_result in zip(face_batch_result, person_batch_result)]
+
+        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/dummy_adapters.py b/tools/accuracy_checker/accuracy_checker/adapters/dummy_adapters.py
new file mode 100644
index 000000000..300dec93f
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/dummy_adapters.py
@@ -0,0 +1,64 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..representation import DetectionPrediction
+from ..adapters import Adapter
+
+
+class XML2DetectionAdapter(Adapter):
+    """
+    Class for converting xml detection results in OpenCV FileStorage format to DetectionPrediction representation.
+    """
+
+    __provider__ = 'xml_detection'
+
+    def process(self, tree, identifiers=None, frame_meta=None):
+        class_to_ind = dict(zip(self.label_map.values(), range(len(self.label_map.values()))))
+
+        result = {}
+        for frames in tree.getroot():
+            for frame in frames:
+                identifier = frame.tag + '.png'
+                labels, scores, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], [], []
+                for prediction in frame:
+                    if prediction.find('is_ignored'):
+                        continue
+
+                    label = prediction.find('type')
+                    if not label:
+                        raise ValueError('Detection predictions contains detection without "{}"'.format('type'))
+                    label = class_to_ind[label.text]
+
+                    confidence = prediction.find('confidence')
+                    if confidence is None:
+                        raise ValueError('Detection predictions contains detection without "{}"'.format('confidence'))
+                    confidence = float(confidence.text)
+
+                    box = prediction.find('roi')
+                    if not box:
+                        raise ValueError('Detection predictions contains detection without "{}"'.format('roi'))
+                    box = list(map(float, box.text.split()))
+
+                    labels.append(label)
+                    scores.append(confidence)
+                    x_mins.append(box[0])
+                    y_mins.append(box[1])
+                    x_maxs.append(box[0] + box[2])
+                    y_maxs.append(box[1] + box[3])
+
+                    result[identifier] = DetectionPrediction(identifier, labels, scores, x_mins, y_mins, x_maxs, y_maxs)
+
+        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/hit_ratio.py b/tools/accuracy_checker/accuracy_checker/adapters/hit_ratio.py
new file mode 100644
index 000000000..f28b84f78
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/hit_ratio.py
@@ -0,0 +1,47 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..adapters import Adapter
+from ..representation import HitRatioPrediction
+
+
+class HitRatioAdapter(Adapter):
+    """
+    Class for converting output of NCF model to HitRatioPrediction representation.
+    """
+
+    __provider__ = 'hit_ratio_adapter'
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        """
+        Args:
+            raw: output of model.
+            identifiers: list of input data identifiers.
+            frame_meta: metadata for frame.
+        Returns:
+            list of HitRatioPrediction objects.
+        """
+
+        prediction = self._extract_predictions(raw, frame_meta)[self.output_blob]
+        prediction = np.reshape(prediction, -1)
+
+        result = []
+        for identifier, output in zip(identifiers, prediction):
+            result.append(HitRatioPrediction(identifier, output))
+
+        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/image_processing.py b/tools/accuracy_checker/accuracy_checker/adapters/image_processing.py
new file mode 100644
index 000000000..21ecec305
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/image_processing.py
@@ -0,0 +1,35 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..adapters import Adapter
+from ..representation import SuperResolutionPrediction
+
+
+class SuperResolutionAdapter(Adapter):
+    __provider__ = 'super_resolution'
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        result = []
+        raw_outputs = self._extract_predictions(raw, frame_meta)
+        for identifier, img_sr in zip(identifiers, raw_outputs[self.output_blob]):
+            img_sr *= 255
+            img_sr = np.clip(img_sr, 0., 255.)
+            img_sr = img_sr.transpose((1, 2, 0)).astype(np.uint8)
+            result.append(SuperResolutionPrediction(identifier, img_sr))
+
+        return result
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/pose_estimation.py b/tools/accuracy_checker/accuracy_checker/adapters/pose_estimation.py
new file mode 100644
index 000000000..25350f555
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/pose_estimation.py
@@ -0,0 +1,331 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import math
+from operator import itemgetter
+
+import cv2
+import numpy as np
+
+from ..adapters import Adapter
+from ..config import ConfigValidator, StringField
+from ..representation import PoseEstimationPrediction
+
+
+class HumanPoseAdapterConfig(ConfigValidator):
+    type = StringField()
+    part_affinity_fields_out = StringField()
+    keypoints_heatmap_out = StringField()
+
+
+class HumanPoseAdapter(Adapter):
+    __provider__ = 'human_pose_estimation'
+
+    limb_seq = [
+        [2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], [10, 11], [2, 12], [12, 13],
+        [13, 14], [2, 1], [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]
+    ]
+    map_idx = [
+        [31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], [23, 24], [25, 26],
+        [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], [55, 56], [37, 38], [45, 46]
+    ]
+
+    def validate_config(self):
+        human_pose_estimation_config = HumanPoseAdapterConfig('HumanPose_Config')
+        human_pose_estimation_config.validate(self.launcher_config)
+
+    def configure(self):
+        self.part_affinity_fields = self.launcher_config['part_affinity_fields_out']
+        self.keypoints_heatmap = self.launcher_config['keypoints_heatmap_out']
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        result = []
+        raw_outputs = self._extract_predictions(raw, frame_meta)
+        raw_output = zip(
+            identifiers, raw_outputs[self.keypoints_heatmap],
+            raw_outputs[self.part_affinity_fields], frame_meta
+        )
+        for identifier, heatmap, paf, meta in raw_output:
+            height, width, _ = meta['image_size']
+            heatmap_avg = np.zeros((height, width, 19), dtype=np.float32)
+            paf_avg = np.zeros((height, width, 38), dtype=np.float32)
+            pad = meta.get('padding', [0, 0, 0, 0])
+            heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0))
+            heatmap = cv2.resize(heatmap, (0, 0), fx=8, fy=8, interpolation=cv2.INTER_CUBIC)
+            heatmap = heatmap[pad[0]:heatmap.shape[0] - pad[2], pad[1]:heatmap.shape[1] - pad[3]:, :]
+            heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC)
+            heatmap_avg = heatmap_avg + heatmap
+
+            paf = np.transpose(np.squeeze(paf), (1, 2, 0))
+            paf = cv2.resize(paf, (0, 0), fx=8, fy=8, interpolation=cv2.INTER_CUBIC)
+            paf = paf[pad[0]:paf.shape[0] - pad[2], pad[1]:paf.shape[1] - pad[3], :]
+            paf = cv2.resize(paf, (width, height), interpolation=cv2.INTER_CUBIC)
+            paf_avg = paf_avg + paf
+
+            peak_counter = 0
+            all_peaks = []
+            for part in range(0, 18):  # 19th for bg
+                peak_counter += self.find_peaks(heatmap_avg[:, :, part], all_peaks, peak_counter)
+
+            subset, candidate = self.group_peaks(all_peaks, paf_avg)
+            result.append(PoseEstimationPrediction(identifier, *self.get_poses(subset, candidate)))
+
+        return result
+
+    @staticmethod
+    def find_peaks(heatmap, all_peaks, prev_peak_counter):
+        heatmap[heatmap < 0.1] = 0
+        map_aug = np.zeros((heatmap.shape[0] + 2, heatmap.shape[1] + 2))
+        map_left = np.zeros(map_aug.shape)
+        map_right = np.zeros(map_aug.shape)
+        map_up = np.zeros(map_aug.shape)
+        map_down = np.zeros(map_aug.shape)
+
+        map_aug[1:map_aug.shape[0] - 1, 1:map_aug.shape[1] - 1] = heatmap
+        map_left[1:map_aug.shape[0] - 1, :map_aug.shape[1] - 2] = heatmap
+        map_right[1:map_aug.shape[0] - 1, 2:map_aug.shape[1]] = heatmap
+        map_up[:map_aug.shape[0] - 2, 1:map_aug.shape[1] - 1] = heatmap
+        map_down[2:map_aug.shape[0], 1:map_aug.shape[1] - 1] = heatmap
+
+        peaks_binary = (map_aug > map_left) & (map_aug > map_right) & (map_aug > map_up) & (map_aug > map_down)
+        peaks_binary = peaks_binary[1:map_aug.shape[0] - 1, 1:map_aug.shape[1] - 1]
+        peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]))
+        peaks = sorted(peaks, key=itemgetter(0))  # same order with matlab
+
+        flag = np.ones(len(peaks), np.uint8)
+        peaks_with_score_and_id = []
+        peak_counter = 0
+        for i, _ in enumerate(peaks):
+            if flag[i] != 1:
+                continue
+            for j in range(i + 1, len(peaks)):
+                if math.sqrt((peaks[i][0] - peaks[j][0]) ** 2 + (peaks[i][1] - peaks[j][1]) ** 2) < 6:
+                    flag[j] = 0
+            peak_id = peak_counter + prev_peak_counter
+            peak_counter += 1
+            peaks_with_score_and_id.append([peaks[i][0], peaks[i][1], heatmap[peaks[i][1], peaks[i][0]], peak_id])
+        all_peaks.append(peaks_with_score_and_id)
+
+        return peak_counter
+
+    @staticmethod
+    def _add_pose_single_candidate(subset, candidate, idx_joint, kpt_num=20):
+        for joint in candidate:
+            num = 0
+            for subset_j in subset:  # check if already in some pose, was added as a part of another limb
+                if subset_j[idx_joint] == joint[3]:
+                    num += 1
+                    continue
+            if num == 0:
+                person_keypoints = np.ones(kpt_num) * -1
+                person_keypoints[idx_joint] = joint[3]  # joint idx
+                person_keypoints[-1] = 1  # n joints in pose
+                person_keypoints[-2] = joint[2]  # pose score
+                subset.append(person_keypoints)
+
+        return subset
+
+    @staticmethod
+    def _filter_subset(subset):
+        filtered_subset = []
+        for subset_element in subset:
+            if subset_element[-1] < 3 or (subset_element[-2] / subset_element[-1] < 0.2):
+                continue
+            filtered_subset.append(subset_element)
+
+        return np.asarray(filtered_subset)
+
+    @staticmethod
+    def _add_pose_both_candidates(subset, temp, index_a, index_b, candidates, kpt_num=20):
+        for i, temp_i in enumerate(temp):
+            num = 0
+            for j, subset_j in enumerate(subset):
+                if subset_j[index_a] == temp_i[0]:
+                    subset[j][index_b] = temp[i][1]
+                    num += 1
+                    subset[j][-1] += 1
+                    subset[j][-2] += candidates[temp_i[1], 2] + temp_i[2]
+            if num == 0:
+                person_keypoints = np.ones(kpt_num) * -1
+                person_keypoints[index_a] = temp[i][0]
+                person_keypoints[index_b] = temp[i][1]
+                person_keypoints[-1] = 2
+                person_keypoints[-2] = np.sum(candidates[temp_i[0:2], 2]) + temp_i[2]
+                subset.append(person_keypoints)
+
+        return subset
+
+    @staticmethod
+    def _copy_temperature_to_subset(subset, temp, index_a, index_b):
+        for _, temp_i in enumerate(temp):
+            for j, subset_j in enumerate(subset):
+                check_subset_a = subset_j[index_a] == temp_i[0] and subset_j[index_b] == -1
+                check_subset_b = subset_j[index_b] == temp_i[1] and subset_j[index_a] == -1
+                if check_subset_a:
+                    subset[j][index_b] = temp_i[1]
+                    continue
+                if check_subset_b:
+                    subset[j][index_a] = temp_i[0]
+
+        return subset
+
+    @staticmethod
+    def _get_temperature(cand_a_, cand_b_, score_mid, pafs, threshold=0.05):
+        temp_ = []
+        for index_a_, cand_a_element in enumerate(cand_a_):
+            for index_b_, cand_b_element in enumerate(cand_b_):
+                mid_point = [(
+                    int(round((cand_a_element[0] + cand_b_element[0]) * 0.5)),
+                    int(round((cand_a_element[1] + cand_b_element[1]) * 0.5))
+                )] * 2
+                vec = [cand_b_element[0] - cand_a_element[0], cand_b_element[1] - cand_a_element[1]]
+                norm_vec = math.sqrt(vec[0] ** 2 + vec[1] ** 2)
+                if norm_vec == 0:
+                    continue
+                vec[0] /= norm_vec
+                vec[1] /= norm_vec
+                score_mid_a = score_mid[mid_point[0][1], mid_point[0][0], 0]
+                score_mid_b = score_mid[mid_point[1][1], mid_point[1][0], 1]
+                score = vec[0] * score_mid_a + vec[1] * score_mid_b
+
+                height_n = pafs.shape[0] // 2
+                suc_ratio = 0
+                mid_score = 0
+                mid_num = 10  # n points for integral over paf
+
+                if score > -100:
+                    p_sum = 0
+                    p_count = 0
+
+                    x = np.linspace(cand_a_element[0], cand_b_element[0], mid_num)
+                    y = np.linspace(cand_a_element[1], cand_b_element[1], mid_num)
+                    for point_idx in range(0, mid_num):
+                        px = int(round(x[point_idx]))
+                        py = int(round(y[point_idx]))
+                        pred = score_mid[py, px, 0:2]
+                        score = vec[0] * pred[0] + vec[1] * pred[1]
+                        if score > threshold:
+                            p_sum += score
+                            p_count += 1
+                    suc_ratio = p_count / mid_num
+                    ratio = 0
+                    if p_count > 0:
+                        ratio = p_sum / p_count
+                    mid_score = ratio + min(height_n / norm_vec - 1, 0)
+                if mid_score > 0 and suc_ratio > 0.8:
+                    score = mid_score
+                    score_all = score + cand_a_element[2] + cand_b_element[2]
+                    temp_.append([index_a_, index_b_, score, score_all])
+        if temp_:
+            temp_ = sorted(temp_, key=itemgetter(2), reverse=True)
+
+        return temp_
+
+    def _get_connections(self, cand_a, cand_b, score_mid, pafs, thresh):
+        temp_ = self._get_temperature(cand_a, cand_b, score_mid, pafs, thresh)
+        num_limbs = min(len(cand_a), len(cand_b))
+        cnt = 0
+        occur_a = np.zeros(len(cand_a), dtype=np.int32)
+        occur_b = np.zeros(len(cand_b), dtype=np.int32)
+        connections = []
+        for row_temp in temp_:
+            if cnt == num_limbs:
+                break
+            i, j, score = row_temp[0:3]
+            if occur_a[i] == 0 and occur_b[j] == 0:
+                connections.append([cand_a[i][3], cand_b[j][3], score])
+                cnt += 1
+                occur_a[i] = 1
+                occur_b[j] = 1
+        return connections
+
+    def group_peaks(self, peaks, pafs, kpt_num=20, threshold=0.05):
+        subset = []
+        candidates = np.array([item for sublist in peaks for item in sublist])
+        for keypoint_id, maped_keypoints in enumerate(self.map_idx):
+            score_mid = pafs[:, :, [x - 19 for x in maped_keypoints]]
+            candidate_a = peaks[self.limb_seq[keypoint_id][0] - 1]
+            candidate_b = peaks[self.limb_seq[keypoint_id][1] - 1]
+            idx_joint_a = self.limb_seq[keypoint_id][0] - 1
+            idx_joint_b = self.limb_seq[keypoint_id][1] - 1
+
+            if not candidate_a and not candidate_b:  # no such limb
+                continue
+            if not candidate_a:  # limb has just B joint
+                subset = self._add_pose_single_candidate(subset, candidate_b, idx_joint_b, kpt_num)
+                continue
+            if not candidate_b:  # limb has just A joint
+                subset = self._add_pose_single_candidate(subset, candidate_a, idx_joint_a, kpt_num)
+                continue
+
+            temp = self._get_connections(candidate_a, candidate_b, score_mid, pafs, threshold)
+            if not temp:
+                continue
+
+            if keypoint_id == 0:
+                subset = [np.ones(kpt_num) * -1 for _ in temp]
+                for i, temp_i in enumerate(temp):
+                    subset[i][self.limb_seq[0][0] - 1] = temp_i[0]
+                    subset[i][self.limb_seq[0][1] - 1] = temp_i[1]
+                    subset[i][-1] = 2
+                    subset[i][-2] = np.sum(candidates[temp_i[0:2], 2]) + temp_i[2]
+            else:
+                index_a = self.limb_seq[keypoint_id][0] - 1
+                index_b = self.limb_seq[keypoint_id][1] - 1
+                if keypoint_id in (17, 18):
+                    subset = self._copy_temperature_to_subset(subset, temp, index_a, index_b)
+                    continue
+                subset = self._add_pose_both_candidates(subset, temp, index_a, index_b, candidates, kpt_num)
+
+        return self._filter_subset(subset), candidates
+
+    @staticmethod
+    def get_poses(subset, candidate):
+        persons_keypoints_x, persons_keypoints_y, persons_keypoints_v = [], [], []
+        scores = []
+        for subset_element in subset:
+            if subset_element.size == 0:
+                continue
+            keypoints_x, keypoints_y, keypoints_v = [0] * 17, [0] * 17, [0] * 17
+            to_coco_map = [0, -1, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3]
+            person_score = subset_element[-2]
+            position_id = -1
+            for keypoint_id in subset_element[:-2]:
+                position_id += 1
+                if position_id == 1:  # No 'Neck' in COCO
+                    continue
+
+                cx, cy, visibility = 0, 0, 0  # Keypoint not found
+                if keypoint_id != -1:
+                    cx, cy = candidate[keypoint_id.astype(int), 0:2]
+                    cx = cx - 0.5 + 1  # +1 for matlab consistency, coords start from 1
+                    cy = cy - 0.5 + 1
+                    visibility = 1
+                keypoints_x[to_coco_map[position_id]] = cx
+                keypoints_y[to_coco_map[position_id]] = cy
+                keypoints_v[to_coco_map[position_id]] = visibility
+
+            scores.append(person_score * max(0, (subset_element[-1] - 1)))  # -1 for Neck
+            persons_keypoints_x.append(keypoints_x)
+            persons_keypoints_y.append(keypoints_y)
+            persons_keypoints_v.append(keypoints_v)
+
+        persons_keypoints_x = np.array(persons_keypoints_x)
+        persons_keypoints_y = np.array(persons_keypoints_y)
+        persons_keypoints_v = np.array(persons_keypoints_v)
+        scores = np.array(scores)
+
+        return persons_keypoints_x, persons_keypoints_y, persons_keypoints_v, scores
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/reidentification.py b/tools/accuracy_checker/accuracy_checker/adapters/reidentification.py
new file mode 100644
index 000000000..f2fed251a
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/reidentification.py
@@ -0,0 +1,58 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..adapters import Adapter
+from ..representation import ReIdentificationPrediction
+
+
+class ReidAdapter(Adapter):
+    """
+    Class for converting output of Reid model to ReIdentificationPrediction representation
+    """
+    __provider__ = 'reid'
+
+    def configure(self):
+        """
+        Specifies parameters of config entry
+        """
+        self.grn_workaround = self.launcher_config.get("grn_workaround", True)
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        """
+        Args:
+            identifiers: list of input data identifiers
+            raw: output of model
+        Returns:
+            list of ReIdentificationPrediction objects
+        """
+        prediction = self._extract_predictions(raw, frame_meta)[self.output_blob]
+
+        if self.grn_workaround:
+            # workaround: GRN layer
+            prediction = self._grn_layer(prediction)
+
+        return [ReIdentificationPrediction(identifier, embedding.reshape(-1))
+                for identifier, embedding in zip(identifiers, prediction)]
+
+    @staticmethod
+    def _grn_layer(prediction):
+        GRN_BIAS = 0.000001
+        sum_ = np.sum(prediction ** 2, axis=1)
+        prediction = prediction / np.sqrt(sum_[:, np.newaxis] + GRN_BIAS)
+
+        return prediction
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/segmentation.py b/tools/accuracy_checker/accuracy_checker/adapters/segmentation.py
new file mode 100644
index 000000000..1654c89a9
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/segmentation.py
@@ -0,0 +1,83 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import numpy as np
+from ..adapters import Adapter
+from ..representation import SegmentationPrediction, BrainTumorSegmentationPrediction
+
+
+class SegmentationAdapter(Adapter):
+    __provider__ = 'segmentation'
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        result = []
+        frame_meta = frame_meta or [] * len(identifiers)
+        raw_outputs = self._extract_predictions(raw, frame_meta)
+        for identifier, output in zip(identifiers, raw_outputs[self.output_blob]):
+            result.append(SegmentationPrediction(identifier, output))
+
+        return result
+
+    def _extract_predictions(self, outputs_list, meta):
+        if not 'tiles_shape' in (meta[-1] or {}):
+            new_raw = {}
+            for out in outputs_list:
+                for key, val in out.items():
+                    out_previous = new_raw.get(key, [])
+                    out_previous.append(val)
+                    new_raw[key] = out_previous
+
+            for k in new_raw:
+                new_raw[k] = [new_raw[k]]
+            return  new_raw
+        tiles_shapes = [meta['tiles_shape'] for meta in meta]
+        restore_output = []
+        offset = 0
+        for _, image_tiles_shape in enumerate(tiles_shapes):
+            next_offset = offset + image_tiles_shape[0] * image_tiles_shape[1]
+            image_tiles = [network_output[self.output_blob] for network_output in outputs_list[offset:next_offset]]
+            tiles_columns = image_tiles[::image_tiles_shape[0]]
+            image = tiles_columns[0]
+            for tile_column in tiles_columns[1:]:
+                image = np.concatenate((image, tile_column), axis=3)
+            restore_output.append(image.squeeze())
+            offset = next_offset
+
+        return {self.output_blob: restore_output}
+
+
+class BrainTumorSegmentationAdapter(Adapter):
+    __provider__ = 'brain_tumor_segmentation'
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        result = []
+        frame_meta = frame_meta or [] * len(identifiers)
+        raw_outputs = self._extract_predictions(raw, frame_meta)
+        for identifier, output in zip(identifiers, raw_outputs[self.output_blob]):
+            result.append(BrainTumorSegmentationPrediction(identifier, output))
+
+        return result
+
+    def _extract_predictions(self, outputs_list, meta):
+        if not (meta[-1] or {}).get('multi_infer', False):
+           return outputs_list[0]
+
+        output_keys = list(outputs_list[0].keys())
+        output_map = {}
+        for output_key in output_keys:
+            output_data = [[output[output_key] for output in outputs_list]]
+            output_map[output_key] = output_data
+
+        return output_map
diff --git a/tools/accuracy_checker/accuracy_checker/adapters/text_detection.py b/tools/accuracy_checker/accuracy_checker/adapters/text_detection.py
new file mode 100644
index 000000000..d90ebfc30
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/adapters/text_detection.py
@@ -0,0 +1,309 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from collections import defaultdict
+
+import cv2
+import numpy as np
+
+
+from ..adapters import Adapter
+from ..config import ConfigValidator, StringField, NumberField, BoolField, ConfigError
+from ..representation import TextDetectionPrediction, CharacterRecognitionPrediction
+
+
+class TextDetectionAdapterConfig(ConfigValidator):
+    type = StringField()
+    pixel_link_out = StringField()
+    pixel_class_out = StringField()
+
+
+class TextDetectionAdapter(Adapter):
+    __provider__ = 'text_detection'
+
+    def validate_config(self):
+        text_detection_adapter_config = TextDetectionAdapterConfig('TextDetectionAdapter_Config')
+        text_detection_adapter_config.validate(self.launcher_config)
+
+    def configure(self):
+        self.pixel_link_out = self.launcher_config['pixel_link_out']
+        self.pixel_class_out = self.launcher_config['pixel_class_out']
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        results = []
+        predictions = self._extract_predictions(raw, frame_meta)
+        raw_output = zip(identifiers, frame_meta, predictions[self.pixel_link_out], predictions[self.pixel_class_out])
+        for identifier, current_frame_meta, link_data, cls_data in raw_output:
+            link_data = link_data.reshape((1, *link_data.shape))
+            cls_data = cls_data.reshape((1, *cls_data.shape))
+            link_data_shape = link_data.shape
+            new_link_data_shape = (link_data_shape[0], link_data_shape[2], link_data_shape[3], link_data_shape[1] / 2)
+            cls_data_shape = cls_data.shape
+            new_cls_data_shape = (cls_data_shape[0], cls_data_shape[2], cls_data_shape[3], cls_data_shape[1] / 2)
+            link_data = self.softmax(link_data.transpose((0, 2, 3, 1)).reshape(-1))[1::2]
+            cls_data = self.softmax(cls_data.transpose((0, 2, 3, 1)).reshape(-1))[1::2]
+            mask = self.decode_image_by_join(cls_data, new_cls_data_shape, link_data, new_link_data_shape)
+            rects = self.mask_to_boxes(mask, current_frame_meta['image_size'])
+            results.append(TextDetectionPrediction(identifier, rects))
+
+        return results
+
+    @staticmethod
+    def softmax(data):
+        for i in np.arange(start=0, stop=data.size, step=2, dtype=int):
+            maximum = max(data[i], data[i + 1])
+            data[i] = np.exp(data[i] - maximum)
+            data[i + 1] = np.exp(data[i + 1] - maximum)
+            sum_data = data[i] + data[i + 1]
+            data[i] /= sum_data
+            data[i + 1] /= sum_data
+
+        return data
+
+    def decode_image_by_join(self, cls_data, cls_data_shape, link_data, link_data_shape):
+        k_cls_conf_threshold = 0.7
+        k_link_conf_threshold = 0.7
+        height = cls_data_shape[1]
+        width = cls_data_shape[2]
+        id_pixel_mask = np.argwhere(cls_data >= k_cls_conf_threshold).reshape(-1)
+        pixel_mask = cls_data >= k_cls_conf_threshold
+        group_mask = {}
+        pixel_mask[id_pixel_mask] = True
+        points = []
+        for i in id_pixel_mask:
+            points.append((i % width, i // width))
+            group_mask[i] = -1
+        link_mask = link_data >= k_link_conf_threshold
+        neighbours = link_data_shape[3]
+        for point in points:
+            neighbour = 0
+            point_x, point_y = point
+            x_neighbours = [point_x - 1, point_x, point_x + 1]
+            y_neighbours = [point_y - 1, point_y, point_y + 1]
+            for neighbour_y in y_neighbours:
+                for neighbour_x in x_neighbours:
+                    if neighbour_x == point_x and neighbour_y == point_y:
+                        continue
+
+                    if neighbour_x < 0 or neighbour_x >= width or neighbour_y < 0 or neighbour_y >= height:
+                        continue
+
+                    pixel_value = np.uint8(pixel_mask[neighbour_y * width + neighbour_x])
+                    link_value = np.uint8(
+                        link_mask[int(point_y * width * neighbours + point_x * neighbours + neighbour)]
+                    )
+
+                    if pixel_value and link_value:
+                        group_mask = self.join(point_x + point_y * width, neighbour_x + neighbour_y * width, group_mask)
+
+                    neighbour += 1
+
+        return self.get_all(points, width, height, group_mask)
+
+    def join(self, point1, point2, group_mask):
+        root1 = self.find_root(point1, group_mask)
+        root2 = self.find_root(point2, group_mask)
+        if root1 != root2:
+            group_mask[root1] = root2
+
+        return group_mask
+
+    def get_all(self, points, width, height, group_mask):
+        root_map = {}
+        mask = np.zeros((height, width))
+
+        for point in points:
+            point_x, point_y = point
+            point_root = self.find_root(point_x + point_y * width, group_mask)
+            if not root_map.get(point_root):
+                root_map[point_root] = int(len(root_map) + 1)
+            mask[point_y, point_x] = root_map[point_root]
+
+        return mask
+
+    @staticmethod
+    def find_root(point, group_mask):
+        root = point
+        update_parent = False
+        while group_mask[root] != -1:
+            root = group_mask[root]
+            update_parent = True
+
+        if update_parent:
+            group_mask[point] = root
+
+        return root
+
+    @staticmethod
+    def mask_to_boxes(mask, image_size):
+        max_val = np.max(mask).astype(int)
+        resized_mask = cv2.resize(
+            mask.astype(np.float32), (image_size[1], image_size[0]), interpolation=cv2.INTER_NEAREST
+        )
+        bboxes = []
+        for i in range(int(max_val + 1)):
+            bbox_mask = resized_mask == i
+            contours_tuple = cv2.findContours(bbox_mask.astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
+            contours = contours_tuple[1] if len(contours_tuple) > 2 else contours_tuple[0]
+            if not contours:
+                continue
+            rect = cv2.minAreaRect(contours[0])
+            _, hw, _ = rect
+            ignored_height = hw[0] >= image_size[0] - 1
+            ignored_width = hw[1] >= image_size[1] - 1
+            if ignored_height or ignored_width:
+                continue
+            box = cv2.boxPoints(rect)
+            bboxes.append(box)
+
+        return bboxes
+
+
+class LPRAdapter(Adapter):
+    __provider__ = 'lpr'
+
+    def configure(self):
+        if not self.label_map:
+            raise ConfigError('LPR adapter requires dataset label map for correct decoding.')
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        raw_output = self._extract_predictions(raw, frame_meta)
+        predictions = raw_output[self.output_blob]
+        result = []
+        for identifier, output in zip(identifiers, predictions):
+            decoded_out = self.decode(output.reshape(-1))
+            result.append(CharacterRecognitionPrediction(identifier, decoded_out))
+
+        return result
+
+    def decode(self, outputs):
+        decode_out = str()
+        for output in outputs:
+            if output == -1:
+                break
+            decode_out += str(self.label_map[output])
+
+        return decode_out
+
+
+class BeamSearchDecoderConfig(ConfigValidator):
+    beam_size = NumberField(optional=True, floats=False, min_value=1)
+    blank_label = NumberField(optional=True, floats=False, min_value=0)
+    softmaxed_probabilities = BoolField(optional=True)
+
+
+class BeamSearchDecoder(Adapter):
+    __provider__ = 'beam_search_decoder'
+
+    def validate_config(self):
+        beam_search_decoder_config = BeamSearchDecoderConfig(
+            'BeamSearchDecoder_Config',
+            BeamSearchDecoderConfig.IGNORE_ON_EXTRA_ARGUMENT
+        )
+        beam_search_decoder_config.validate(self.launcher_config)
+
+    def configure(self):
+        if not self.label_map:
+            raise ConfigError('Beam Search Decoder requires dataset label map for correct decoding.')
+
+        self.beam_size = self.launcher_config.get('beam_size', 10)
+        self.blank_label = self.launcher_config.get('blank_label', len(self.label_map))
+        self.softmaxed_probabilities = self.launcher_config.get('softmaxed_probabilities', False)
+
+    def process(self, raw, identifiers=None, frame_meta=None):
+        raw_output = self._extract_predictions(raw, frame_meta)
+        output = raw_output[self.output_blob]
+        output = np.swapaxes(output, 0, 1)
+
+        result = []
+        for identifier, data in zip(identifiers, output):
+            if self.softmaxed_probabilities:
+                data = np.log(data)
+            seq = self.decode(data, self.beam_size, self.blank_label)
+            decoded = ''.join(str(self.label_map[char]) for char in seq)
+            result.append(CharacterRecognitionPrediction(identifier, decoded))
+        return result
+
+    @staticmethod
+    def decode(probabilities, beam_size=10, blank_id=None):
+        """
+         Decode given output probabilities to sequence of labels.
+        Arguments:
+            probabilities: The output log probabilities for each time step.
+            Should be an array of shape (time x output dim).
+            beam_size (int): Size of the beam to use during decoding.
+            blank_id (int): Index of the CTC blank label.
+        Returns the output label sequence.
+        """
+        def make_new_beam():
+            return defaultdict(lambda: (-np.inf, -np.inf))
+
+        def log_sum_exp(*args):
+            if all(a == -np.inf for a in args):
+                return -np.inf
+            a_max = np.max(args)
+            lsp = np.log(np.sum(np.exp(a - a_max) for a in args))
+
+            return a_max + lsp
+
+        times, symbols = probabilities.shape
+        # Initialize the beam with the empty sequence, a probability of 1 for ending in blank
+        # and zero for ending in non-blank (in log space).
+        beam = [(tuple(), (0.0, -np.inf))]
+
+        for time in range(times):
+            # A default dictionary to store the next step candidates.
+            next_beam = make_new_beam()
+
+            for symbol_id in range(symbols):
+                current_prob = probabilities[time, symbol_id]
+
+                for prefix, (prob_blank, prob_non_blank) in beam:
+                    # If propose a blank the prefix doesn't change.
+                    # Only the probability of ending in blank gets updated.
+                    if symbol_id == blank_id:
+                        next_prob_blank, next_prob_non_blank = next_beam[prefix]
+                        next_prob_blank = log_sum_exp(
+                            next_prob_blank, prob_blank + current_prob, prob_non_blank + current_prob
+                        )
+                        next_beam[prefix] = (next_prob_blank, next_prob_non_blank)
+                        continue
+                    # Extend the prefix by the new character symbol and add it to the beam.
+                    # Only the probability of not ending in blank gets updated.
+                    end_t = prefix[-1] if prefix else None
+                    next_prefix = prefix + (symbol_id,)
+                    next_prob_blank, next_prob_non_blank = next_beam[next_prefix]
+                    if symbol_id != end_t:
+                        next_prob_non_blank = log_sum_exp(
+                            next_prob_non_blank, prob_blank + current_prob, prob_non_blank + current_prob
+                        )
+                    else:
+                        # Don't include the previous probability of not ending in blank (prob_non_blank) if symbol
+                        #  is repeated at the end. The CTC algorithm merges characters not separated by a blank.
+                        next_prob_non_blank = log_sum_exp(next_prob_non_blank, prob_blank + current_prob)
+
+                    next_beam[next_prefix] = (next_prob_blank, next_prob_non_blank)
+                    # If symbol is repeated at the end also update the unchanged prefix. This is the merging case.
+                    if symbol_id == end_t:
+                        next_prob_blank, next_prob_non_blank = next_beam[prefix]
+                        next_prob_non_blank = log_sum_exp(next_prob_non_blank, prob_non_blank + current_prob)
+                        next_beam[prefix] = (next_prob_blank, next_prob_non_blank)
+
+            beam = sorted(next_beam.items(), key=lambda x: log_sum_exp(*x[1]), reverse=True)[:beam_size]
+
+        best = beam[0]
+
+        return best[0]
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/README.md b/tools/accuracy_checker/accuracy_checker/annotation_converters/README.md
new file mode 100644
index 000000000..d5dcefe16
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/README.md
@@ -0,0 +1,98 @@
+# Annotation Converters
+
+Annotation converter is a function which converts annotation file to suitable for metric evaluation format.
+Each annotation converter expects specific annotation file format or data structure, which depends on original dataset.
+If converter for your data format is not supported by Accuracy Checker, you can provide your own annotation converter.
+Each annotation converter has parameters available for configuration.
+
+Process of conversion can be implemented in two ways:
+* via configuration file
+* via command line
+
+### Describing annotation conversion in configuration file.
+
+Annotation conversion can be provided in `dataset` section your configuration file to convert annotation inplace before every evaluation.
+Each conversion configuration should contain `converter` field filled selected converter name and provide converter specific parameters (more details in supported converters section). All paths can be prefixed via command line with `-s, --source` argument.
+
+You can additionally use optional parameters like:
+* `subsample_size` - Dataset subsample size. You can specify the number of ground truth objects or dataset ration in percentage. Please, be careful to use this option, some datasets does not support subsampling. 
+* `annotation` - path to store converted annotation pickle file. You can use this parameter if you need to reuse converted annotation to avoid subsequent conversions.
+* `meta` - path to store mata information about converted annotation if it is provided.
+
+Example of usage:
+```yaml
+   annotation_conversion:
+     converter: sample
+     data_dir: sample/sample_dataset
+```
+
+
+### Conversing process via command line.
+
+The command line for annotation conversion looks like:
+
+```bash
+python3 convert_annotation.py <converter_name> <converter_specific parameters>
+```
+All converter specific options should have format `--<parameter_name> <parameter_value>`
+You may refer to `-h, --help` to full list of command line options. Some optional arguments are:
+
+* `-o, --output_dir` - directory to save converted annotation and meta info.
+* `-a, --annotation_name` - annotation file name.
+* `-m, --meta_name` - meta info file name.
+
+### Supported converters 
+
+Accuracy Checker supports following list of annotation converters and specific for them parameters:
+* `wider` - converts from Wider Face dataset to `DetectionAnnotation`.
+  * `annotation_file` - path to txt file, which contains ground truth data in WiderFace dataset format.
+  * `label_start` - specifies face label index in label map. Default value is 1. You can provide another value, if you want to use this dataset for separate label validation,
+  in case when your network predicts other class for faces.
+* `sample` - converts annotation for SampleNet to `ClassificationAnnotation`.
+  * `data_dir` - path to sample dataset root directory.
+* `voc07` - converts Pascal VOC 2007 annotation for detection task to `DetectionAnnotation`.
+   * `image_set_file` - path to file with validation image list.
+   * `annotations_dir` - path to directory with annotation files.
+   * `images_dir` - path to directory with images related to devkit root (default JPEGImages).
+  * `has_background` - allows convert dataset with/without adding background_label. Accepted values are True or False. (default is True) 
+* `voc_segmentation` - converts Pascal VOC annotation for semantic segmentation task to `SegmentationAnnotation`.
+  * `image_set_file` - path to file with validation image list.
+  * `images_dir` - path to directory with images related to devkit root (default JPEGImages).
+  * `mask_dir` - path to directory with ground truth segmentation masks related to devkit root (default SegmentationClass).
+* `mars` - converts MARS person reidentification dataset to `ReidentificationAnnotation`.
+  * `data_dir` - path to data directory, where gallery (`bbox_test`) and `query` subdirectories are located.
+* `market1501` - converts Market1501 person reidentification dataset to `ReidentificationAnnotation`.
+  * `data_dir` - path to data directory, where gallery (`bounding_box_test`) and `query` subdirectories are located.
+* `detection_opencv_storage` - converts detection annotation stored in Detection OpenCV storage format to `DetectionAnnotation`.
+  * `annotation_file` - path to annotation in xml format.
+  * `image_names_file` - path to txt file, which contains image name list for dataset.
+  * `label_start` - specifies label index start in label map. Default value is 1. You can provide another value, if you want to use this dataset for separate label validation.
+  * `background_label` - specifies which index will be used for background label. You can not provide this parameter if your dataset has not background label
+* `face_reid_pairwise` - converts Labeled Faces in the Wild dataset for face reidentification to `ReidentificationClassificationAnnotation`.
+  * `pairs_file` - path to file with annotation positive and negative pairs.
+  * `train_file` - path to file with annotation positive and negative pairs used for network train (optional parameter).
+  * `landmarks_file` - path to file with facial landmarks coordinates for annotation images (optional parameter).
+* `landmarks_regression` - converts VGG Face 2 dataset for facial landmarks regression task to `FacialLandmarksAnnotation`.
+  * `landmarks_csv_file` - path to csv file with coordinates of landmarks points.
+  * `bbox_csv_file` - path to cvs file which contains bounding box coordinates for faces (optional parameter).
+* `cityscapes` - converts CityScapes Dataset to `SegmentationAnnotation`.
+  * `dataset_root_dir` - path to dataset root.
+  * `images_subfolder` - path from dataset root to directory with validation images (Optional, default `imgsFine/leftImg8bit/val`).
+  * `masks_subfolder` - path from dataset root to directory with ground truth masks (Optional, `gtFine/val`).
+  * `masks_suffix` - suffix for mask file names (Optional, default `_gtFine_labelTrainIds`).
+  * `images_suffix` - suffix for image file names (Optional, default `_leftImg8bit`).
+  * `use_full_label_map` - allows to use full label map with 33 classes instead train label map with 18 classes (Optional, default `False`).
+* `icdar15_detection` - converts ICDAR15 dataset for text detection  task to `TextDetectionAnnotation`.
+  * `data_dir` - path to folder with annotations on txt format.
+* `icdar13_recognition` - converts ICDAR13 dataset for text recognition task to `CharecterRecognitionAnnotation`.
+  * `annotation_file` - path to annotation file in txt format.
+* `mscoco_detection` - converts MS COCO dataset for object detection task to `DetectionAnnotation`.
+  * `annotation_file` - path ot annotation file in json format.
+  * `has_background` - allows convert dataset with/without adding background_label. Accepted values are True or False. (default is False).
+  * `use_full_label_map` - allows to use original label map (with 91 object categories) from paper instead public available(80 categories).
+* `mscoco_keypoints` - converts MS COCO dataset for keypoints localization task to `PoseEstimationAnnotation`.
+  * `annotation_file` - path ot annotation file in json format.
+* `imagenet` - convert ImageNet dataset for image classification task to `ClassificationAnnotation`.
+  * `annotation_file` - path to annotation in txt format.
+  * `labels_file` - path to file with word description of labels (synset words).
+  * `has_background` - allows to add background label to original labels and convert dataset for 1001 classes instead 1000 (default value is False).
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/__init__.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/__init__.py
new file mode 100644
index 000000000..f03742247
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/__init__.py
@@ -0,0 +1,55 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from .format_converter import BaseFormatConverter
+from .convert import make_subset, save_annotation
+from .market1501 import Market1501Converter
+from .mars import MARSConverter
+from .pascal_voc import PascalVOCDetectionConverter
+from .sample_converter import SampleConverter
+from .wider import WiderFormatConverter
+from .detection_opencv_storage import DetectionOpenCVStorageFormatConverter
+from .lfw import FaceReidPairwiseConverter
+from .vgg_face_regression import LandmarksRegression
+from .super_resolution_converter import SRConverter
+from .imagenet import ImageNetFormatConverter
+from .icdar import ICDAR13RecognitionDatasetConverter, ICDAR15DetectionDatasetConverter
+from .ms_coco import MSCocoDetectionConverter, MSCocoKeypointsConverter
+from .cityscapes import CityscapesConverter
+from .ncf_converter import NCFConverter
+from .brats import BratsConverter
+
+__all__ = [
+    'BaseFormatConverter',
+    'make_subset',
+    'save_annotation',
+
+    'ImageNetFormatConverter',
+    'Market1501Converter',
+    'SampleConverter',
+    'PascalVOCDetectionConverter',
+    'WiderFormatConverter',
+    'MARSConverter',
+    'DetectionOpenCVStorageFormatConverter',
+    'FaceReidPairwiseConverter',
+    'SRConverter',
+    'ICDAR13RecognitionDatasetConverter',
+    'ICDAR15DetectionDatasetConverter',
+    'MSCocoKeypointsConverter',
+    'MSCocoDetectionConverter',
+    'CityscapesConverter',
+    'NCFConverter',
+    'BratsConverter'
+]
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/_reid_common.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/_reid_common.py
new file mode 100644
index 000000000..8bcce97e8
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/_reid_common.py
@@ -0,0 +1,45 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from pathlib import Path
+
+from ..representation import ReIdentificationAnnotation
+
+
+def read_directory(directory, query, image_pattern):
+    pids = set()
+    images = []
+    for image in directory.glob("*.jpg"):
+        pid, camid = map(int, image_pattern.search(image.name).groups())
+        if pid == -1:
+            continue
+
+        camid -= 1
+        pids.add(pid)
+
+        identifier = str(Path(directory.name) / image.name)
+        images.append(ReIdentificationAnnotation(identifier, camid, pid, query))
+
+    return images, pids
+
+
+def check_dirs(dirs, parent_dir, arg_name='data_dir'):
+    for directory in dirs:
+        if directory.is_dir():
+            continue
+
+        message_pattern = "{directory} not found in {parent_dir}. Check {arg_name} is pointed to a correct directory"
+        raise FileNotFoundError(message_pattern.format(directory=directory, parent_dir=parent_dir, arg_name=arg_name))
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/brats.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/brats.py
new file mode 100644
index 000000000..327398b60
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/brats.py
@@ -0,0 +1,53 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from pathlib import Path
+
+from ..representation import BrainTumorSegmentationAnnotation
+from ..utils import get_path
+from ..config import StringField
+from .format_converter import BaseFormatConverter, DirectoryBasedAnnotationConverterConfig
+
+
+class BratsConverterConfig(DirectoryBasedAnnotationConverterConfig):
+    image_folder = StringField(optional=True)
+    mask_folder = StringField(optional=True)
+
+
+class BratsConverter(BaseFormatConverter):
+    __provider__ = 'brats'
+
+    _config_validator_type = BratsConverterConfig
+
+    def configure(self):
+        self.data_dir = self.config['data_dir']
+        self.image_folder = self.config.get('image_folder', 'imagesTr')
+        self.mask_folder = self.config.get('mask_folder', 'labelsTr')
+
+    def convert(self):
+        mask_folder = Path(self.mask_folder)
+        image_folder = Path(self.image_folder)
+        image_dir = get_path(self.data_dir / image_folder, is_directory=True)
+
+        annotations = []
+        for file_in_dir in image_dir.iterdir():
+            annotation = BrainTumorSegmentationAnnotation(
+                str(image_folder / file_in_dir.parts[-1]),
+                str(mask_folder / file_in_dir.parts[-1]),
+            )
+
+            annotations.append(annotation)
+
+        return annotations, None
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/cityscapes.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/cityscapes.py
new file mode 100644
index 000000000..3bda89a0c
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/cityscapes.py
@@ -0,0 +1,73 @@
+from pathlib import Path
+from ..representation import SegmentationAnnotation
+from ..representation.segmentation_representation import GTMaskLoader
+from ..config import PathField, StringField, BoolField
+from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
+
+
+train_meta = {
+    'label_map': {
+        0: 'road', 1: 'sidewalk', 2: 'building', 3: 'wall', 4: 'fence', 5: 'pole', 6: 'traffic light',
+        7: 'traffic sign', 8: 'vegetation', 9: 'terrain', 10: 'sky', 11: 'person', 12: 'rider', 13: 'car',
+        14: 'truck', 15: 'bus', 16: 'train', 17: 'motorcycle', 18: 'bicycle'
+    },
+    'segmentation_colors': (
+        (128, 64, 128), (244, 35, 232), (70, 70, 70), (102, 102, 156), (190, 153, 153), (153, 153, 153),
+        (250, 170, 30), (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), (255, 0, 0),
+        (0, 0, 142), (0, 0, 70), (0, 60, 100), (0, 80, 100), (0, 0, 230), (119, 11, 32)
+    ),
+}
+
+full_dataset_meta = {
+    'segmentation_colors' : (
+        (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (0, 0, 0), (111, 74, 0), (81, 0, 81), (128, 64, 128),
+        (244, 35, 232), (250, 170, 160), (230, 150, 140), (70, 70, 70), (102, 102, 156), (190, 153, 153),
+        (180, 165, 180), (150, 100, 100), (150, 120, 90), (153, 153, 153), (153, 153, 153), (250, 170, 30),
+        (220, 220, 0), (107, 142, 35), (152, 251, 152), (70, 130, 180), (220, 20, 60), (255, 0, 0), (0, 0, 142),
+        (0, 0, 70), (0, 60, 100), (0, 0, 90), (0, 0, 110), (0, 80, 100), (0, 0, 230), (119, 11, 32)
+    ),
+    'label_map': {
+        0: 'unlabeled', 1:  'ego vehicle', 2: 'rectification border', 3: 'out of roi', 4: 'static', 5: 'dynamic',
+        6: 'ground', 7: 'road', 8: 'sidewalk', 9: 'parking', 10: 'rail track', 11: 'building', 12: 'wall',
+        13: 'fence', 14: 'guard rail', 15: 'bridge', 16: 'tunnel', 17: 'pole', 18: 'polegroup', 19: 'traffic light',
+        20: 'traffic sign', 21: 'vegetation', 22: 'terrain', 23: 'sky', 24: 'person', 25: 'rider', 26: 'car',
+        27: 'truck', 28: 'bus', 29: 'caravan', 30: 'trailer', 31: 'train', 32: 'motorcycle', 33: 'bicycle',
+        -1: 'license plate'
+    }
+}
+
+
+class CityscapesConverterConfig(BaseFormatConverterConfig):
+    dataset_root_dir = PathField(is_directory=True)
+    images_subfolder = StringField(optional=True)
+    masks_subfolder = StringField(optional=True)
+    masks_suffix = StringField(optional=True)
+    images_suffix = StringField(optional=True)
+    use_full_label_map = BoolField(optional=True)
+
+
+class CityscapesConverter(BaseFormatConverter):
+    __provider__ = 'cityscapes'
+
+    _config_validator_type = CityscapesConverterConfig
+
+    def configure(self):
+        self.dataset_root = self.config['dataset_root_dir']
+        self.images_dir = self.config.get('images_subfolder', 'imgsFine/leftImg8bit/val')
+        self.masks_dir = self.config.get('masks_subfolder', 'gtFine/val')
+        self.masks_suffix = self.config.get('masks_suffix', '_gtFine_labelTrainIds')
+        self.images_suffix = self.config.get('images_suffix', '_leftImg8bit')
+        self.use_full_label_map = self.config.get('use_full_label_map', False)
+
+
+    def convert(self):
+        images = list(self.dataset_root.rglob(r'{}/*/*{}.png'.format(self.images_dir, self.images_suffix)))
+        annotations = []
+        for image in images:
+            identifier = str(Path(self.images_dir).joinpath(*image.parts[-2:]))
+            mask = Path(self.masks_dir) / image.parts[-2] / self.masks_suffix.join(
+                str(image.name).split(self.images_suffix)
+            )
+            annotations.append(SegmentationAnnotation(identifier, mask, mask_loader=GTMaskLoader.PILLOW))
+
+        return annotations, full_dataset_meta if self.use_full_label_map else train_meta
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/convert.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/convert.py
new file mode 100644
index 000000000..ba9ee8a09
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/convert.py
@@ -0,0 +1,126 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import warnings
+import json
+from pathlib import Path
+from argparse import ArgumentParser
+from functools import partial
+
+import numpy as np
+
+from ..utils import get_path
+
+from .format_converter import BaseFormatConverter
+
+
+def build_argparser():
+    parser = ArgumentParser(
+        description="Converts annotation form a arbitrary format to accuracy-checker specific format", add_help=False
+    )
+    parser.add_argument(
+        "converter",
+        help="Specific converter to run",
+        choices=list(BaseFormatConverter.providers.keys())
+    )
+    parser.add_argument(
+        "-o", "--output_dir",
+        help="Directory to save converted annotation and meta info",
+        required=False,
+        type=partial(get_path, is_directory=True)
+    )
+    parser.add_argument("-m", "--meta_name", help="Meta info file name", required=False)
+    parser.add_argument("-a", "--annotation_name", help="Annotation file name", required=False)
+    parser.add_argument("-ss", "--subsample", help="Dataset subsample size", required=False)
+    parser.add_argument("--subsample_seed", help="Seed for generation dataset subsample", type=int, required=False)
+
+    return parser
+
+
+def make_subset(annotation, size, seed=666):
+    dataset_size = len(annotation)
+    if dataset_size < size:
+        warnings.warn('dataset size - {} less than subsample size - {}'.format(dataste_size, size))
+        return annotation
+    np.random.seed(seed)
+    return list(np.random.choice(annotation, size=size, replace=False))
+
+
+def main():
+    main_argparser = build_argparser()
+    args, _ = main_argparser.parse_known_args()
+    converter, converter_argparser, converter_args = get_converter_arguments(args)
+
+    main_argparser = ArgumentParser(parents=[main_argparser, converter_argparser])
+    args = main_argparser.parse_args()
+
+    converter = configure_converter(converter_args, args, converter)
+    out_dir = args.output_dir or Path.cwd()
+
+    result, meta = converter.convert()
+
+    subsample = args.subsample
+    if subsample:
+        if subsample.endswith('%'):
+            subsample_ratio = float(subsample[:-1]) / 100
+            subsample_size = int(len(result) * subsample_ratio)
+        else:
+            subsample_size = int(args.subsample)
+
+        result = make_subset(result, subsample_size)
+
+    converter_name = converter.get_name()
+    annotation_name = args.annotation_name or "{}.pickle".format(converter_name)
+    meta_name = args.meta_name or "{}.json".format(converter_name)
+
+    annotation_file = out_dir / annotation_name
+    meta_file = out_dir / meta_name
+
+    save_annotation(result, meta, annotation_file, meta_file)
+
+
+def save_annotation(annotation, meta, annotation_file, meta_file):
+    if annotation_file:
+        with annotation_file.open('wb') as file:
+            for representation in annotation:
+                representation.dump(file)
+    if meta_file and meta:
+        with meta_file.open('wt') as file:
+            json.dump(meta, file)
+
+
+def configure_converter(converter_options, args, converter):
+    args_dict, converter_options_dict = vars(args), vars(converter_options)
+    converter_config = {
+        option_name: option_value for option_name, option_value in args_dict.items()
+        if option_name in converter_options_dict and option_value is not None
+    }
+    converter_config['converter'] = args.converter
+    converter.config = converter_config
+    converter.validate_config()
+    converter.configure()
+
+    return converter
+
+
+def get_converter_arguments(arguments):
+    converter = BaseFormatConverter.provide(arguments.converter)
+    converter_argparser = converter.get_argparser()
+    converter_options, _ = converter_argparser.parse_known_args()
+    return converter, converter_argparser, converter_options
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/detection_opencv_storage.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/detection_opencv_storage.py
new file mode 100644
index 000000000..dfe461a54
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/detection_opencv_storage.py
@@ -0,0 +1,114 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from ..config import PathField, NumberField
+from ..representation import DetectionAnnotation
+from ..utils import convert_bboxes_xywh_to_x1y1x2y2, read_xml, read_txt
+
+from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
+
+
+class DetectionOpenCVConverterConfig(BaseFormatConverterConfig):
+    annotation_file = PathField()
+    image_names_file = PathField(optional=True)
+    label_start = NumberField(floats=False, optional=True)
+    background_label = NumberField(floats=False, optional=True)
+
+
+class DetectionOpenCVStorageFormatConverter(BaseFormatConverter):
+    __provider__ = 'detection_opencv_storage'
+
+    _config_validator_type = DetectionOpenCVConverterConfig
+
+    def configure(self):
+        self.annotation_file = self.config['annotation_file']
+        self.image_names_file = self.config.get('image_names_file')
+        self.label_start = self.config.get('label_start', 1)
+        self.background_label = self.config.get('background_label')
+
+    def convert(self):
+        root = read_xml(self.annotation_file)
+
+        labels_set = self.get_label_set(root)
+
+        labels_set = sorted(labels_set)
+        class_to_ind = dict(zip(labels_set, list(range(self.label_start, len(labels_set) + self.label_start + 1))))
+        label_map = {}
+        for class_label, ind in class_to_ind.items():
+            label_map[ind] = class_label
+
+        annotations = []
+        for frames in root:
+            for frame in frames:
+                identifier = '{}.png'.format(frame.tag)
+                labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
+                difficult_indices = []
+                for annotation in frame:
+                    label = annotation.findtext('type')
+                    if not label:
+                        raise ValueError('"{}" contains detection without "{}"'.format(self.annotation_file, 'type'))
+
+                    box = annotation.findtext('roi')
+                    if not box:
+                        raise ValueError('"{}" contains detection without "{}"'.format(self.annotation_file, 'roi'))
+                    box = list(map(float, box.split()))
+
+                    is_ignored = annotation.findtext('is_ignored', 0)
+                    if int(is_ignored) == 1:
+                        difficult_indices.append(len(labels))
+
+                    labels.append(class_to_ind[label])
+                    x_min, y_min, x_max, y_max = convert_bboxes_xywh_to_x1y1x2y2(*box)
+                    x_mins.append(x_min)
+                    y_mins.append(y_min)
+                    x_maxs.append(x_max)
+                    y_maxs.append(y_max)
+
+                detection_annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
+                detection_annotation.metadata['difficult_boxes'] = difficult_indices
+                annotations.append(detection_annotation)
+
+        if self.image_names_file:
+            self.rename_identifiers(annotations, self.image_names_file)
+
+        meta = {}
+        if self.background_label:
+            label_map[self.background_label] = '__background__'
+            meta['background_label'] = self.background_label
+        meta['label_map'] = label_map
+
+        return annotations, meta
+
+    @staticmethod
+    def rename_identifiers(annotation_list, images_file):
+        for annotation, image in zip(annotation_list, read_txt(images_file)):
+            annotation.identifier = image
+
+        return annotation_list
+
+
+    @staticmethod
+    def get_label_set(xml_root):
+        labels_set = set()
+        for frames in xml_root:
+            for frame in frames:
+                for annotation in frame:
+                    label = annotation.findtext('type')
+                    if not label:
+                        raise ValueError('annotation contains detection without label')
+
+                    labels_set.add(label)
+
+        return labels_set
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/format_converter.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/format_converter.py
new file mode 100644
index 000000000..792786755
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/format_converter.py
@@ -0,0 +1,108 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from argparse import ArgumentParser
+
+from ..config import ConfigValidator, StringField, PathField
+from ..dependency import ClassProvider
+from ..utils import format_key
+
+
+class BaseFormatConverterConfig(ConfigValidator):
+    converter = StringField()
+
+
+class BaseFormatConverter(ClassProvider):
+    __provider_type__ = 'converter'
+
+    _config_validator_type = BaseFormatConverterConfig
+
+    @property
+    def config_validator(self):
+        return self._config_validator_type(
+            '{}_converter_config'.format(self.get_name()),
+            on_extra_argument=self._config_validator_type.ERROR_ON_EXTRA_ARGUMENT
+        )
+
+    def __init__(self, config=None):
+        self.config = config
+        if config:
+            self.validate_config()
+            self.configure()
+
+    def convert(self, *args, **kwargs):
+        """
+        Converts specific annotation format to the ResultRepresentation specific for current dataset/task.
+
+        Returns:
+            annotation: list of ResultRepresentations.
+            meta: meta-data map for the current dataset.
+        """
+        raise NotImplementedError
+
+    @classmethod
+    def get_name(cls):
+        return cls.__provider__
+
+    def get_argparser(self):
+        parser = ArgumentParser(add_help=False)
+        config_validator = self.config_validator
+        fields = config_validator.fields
+        for field_name, field in fields.items():
+            if field_name == 'converter':
+                # it is base argument. Main argparser already use it to get argparser from specific converter.
+                # Converter argparser should contain only converter specific arguments.
+                continue
+
+            required = not field.optional
+            parser.add_argument(
+                format_key(field_name), required=required, type=field.type
+            )
+
+        return parser
+
+    def validate_config(self):
+        self.config_validator.validate(self.config)
+
+    def configure(self):
+        pass
+
+
+class FileBasedAnnotationConverterConfig(BaseFormatConverterConfig):
+    annotation_file = PathField()
+
+
+class FileBasedAnnotationConverter(BaseFormatConverter):
+    _config_validator_type = FileBasedAnnotationConverterConfig
+
+    def configure(self):
+        self.annotation_file = self.config['annotation_file']
+
+    def convert(self, *args, **kwargs):
+        pass
+
+
+class DirectoryBasedAnnotationConverterConfig(BaseFormatConverterConfig):
+    data_dir = PathField(is_directory=True)
+
+
+class DirectoryBasedAnnotationConverter(BaseFormatConverter):
+    _config_validator_type = DirectoryBasedAnnotationConverterConfig
+
+    def configure(self):
+        self.data_dir = self.config['data_dir']
+
+    def convert(self, *args, **kwargs):
+        pass
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/icdar.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/icdar.py
new file mode 100644
index 000000000..184ade3b0
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/icdar.py
@@ -0,0 +1,63 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from ..representation import TextDetectionAnnotation, CharacterRecognitionAnnotation
+from ..utils import read_txt
+from .format_converter import  FileBasedAnnotationConverter, DirectoryBasedAnnotationConverter
+
+
+class ICDAR15DetectionDatasetConverter(DirectoryBasedAnnotationConverter):
+    __provider__ = 'icdar15_detection'
+
+    def convert(self):
+        annotations = []
+
+        for gt_file in self.data_dir.iterdir():
+            gt_file_name = str(gt_file.parts[-1])
+            identifier = '{}.jpg'.format(gt_file_name.split('gt_')[-1].split('.txt')[0])
+            all_points, transcriptions, difficult = [], [], []
+
+            for text_area in read_txt(gt_file):
+                text_annotation = text_area.split(',')
+                transcription = text_annotation[-1]
+                points = np.reshape(list(map(float, text_annotation[:8])), (-1, 2))
+                if transcription == '###':
+                    difficult.append(len(transcriptions))
+                all_points.append(points)
+                transcriptions.append(transcription)
+            annotation = TextDetectionAnnotation(identifier, all_points, transcriptions)
+            annotation.metadata['difficult_boxes'] = difficult
+            annotations.append(annotation)
+
+        return annotations, None
+
+
+class ICDAR13RecognitionDatasetConverter(FileBasedAnnotationConverter):
+    __provider__ = 'icdar13_recognition'
+
+    supported_symbols = '0123456789abcdefghijklmnopqrstuvwxyz'
+
+    def convert(self):
+        annotations = []
+
+        for line in read_txt(self.annotation_file):
+            identifier, text = line.strip().split(' ')
+            annotations.append(CharacterRecognitionAnnotation(identifier, text))
+
+        label_map = {ind: str(key) for ind, key in enumerate(self.supported_symbols)}
+
+        return annotations, {'label_map': label_map, 'blank_label': len(label_map)}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/imagenet.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/imagenet.py
new file mode 100644
index 000000000..88df08ade
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/imagenet.py
@@ -0,0 +1,52 @@
+import numpy as np
+
+from ..config import PathField, BoolField
+from ..representation import ClassificationAnnotation
+from ..utils import read_txt, get_path
+
+from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
+
+
+class ImageNetFormatConverterConfig(BaseFormatConverterConfig):
+    annotation_file = PathField()
+    labels_file = PathField(optional=True)
+    has_background = BoolField(optional=True)
+
+
+class ImageNetFormatConverter(BaseFormatConverter):
+    __provider__ = 'imagenet'
+
+    _config_validator_type = ImageNetFormatConverterConfig
+
+    def configure(self):
+        self.annotation_file = self.config['annotation_file']
+        self.labels_file = self.config.get('labels_file')
+        self.has_background = self.config.get('has_background', False)
+
+    def convert(self):
+        annotation = []
+        for image in read_txt(get_path(self.annotation_file)):
+            image_name, label = image.split()
+            label = np.int64(label) if not self.has_background else np.int64(label) + 1
+            annotation.append(ClassificationAnnotation(image_name, label))
+        meta = self._create_meta(self.labels_file, self.has_background) if self.labels_file else None
+
+        return annotation, meta
+
+    @staticmethod
+    def _create_meta(labels_file, has_background=False):
+        meta = {}
+        labels = {}
+        for i, line in enumerate(read_txt(get_path(labels_file))):
+            index_for_label = i if not has_background else i + 1
+            line = line.strip()
+            label = line[line.find(' ') + 1:]
+            labels[index_for_label] = label
+
+        if has_background:
+            labels[0] = 'background'
+            meta['backgound_label'] = 0
+
+        meta['label_map'] = labels
+
+        return meta
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/lfw.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/lfw.py
new file mode 100644
index 000000000..1002daf8e
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/lfw.py
@@ -0,0 +1,111 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from collections import defaultdict
+from pathlib import Path
+
+from ..config import PathField
+from ..representation import ReIdentificationClassificationAnnotation
+from ..utils import read_txt
+
+from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
+
+
+class FaceReidPairwiseConverterConfig(BaseFormatConverterConfig):
+    pairs_file = PathField()
+    train_file = PathField(optional=True)
+    landmarks_file = PathField(optional=True)
+
+
+class FaceReidPairwiseConverter(BaseFormatConverter):
+    __provider__ = 'face_reid_pairwise'
+
+    _config_validator_type = FaceReidPairwiseConverterConfig
+
+    def configure(self):
+        self.pairs_file = self.config['pairs_file']
+        self.train_file = self.config.get('train_file')
+        self.landmarks_file = self.config.get('landmarks_file')
+
+    def convert(self):
+        landmarks_map = {}
+        if self.landmarks_file:
+            for landmark_line in read_txt(self.landmarks_file):
+                landmark_line = landmark_line.split('\t')
+                landmarks_map[landmark_line[0]] = [int(point) for point in landmark_line[1:]]
+
+        test_annotations = self.prepare_annotation(self.pairs_file, True, landmarks_map)
+        if self.train_file:
+            train_annotations = self.prepare_annotation(self.train_file, True, landmarks_map)
+            test_annotations += train_annotations
+
+        return test_annotations, None
+
+    @staticmethod
+    def get_image_name(person, image_id):
+        image_path_pattern = '{}/{}_{}{}.jpg'
+        return image_path_pattern.format(person, person, '0' * (4 - len(image_id)), image_id)
+
+    def convert_positive(self, pairs, all_images):
+        positives = defaultdict(set)
+        for data in pairs:
+            image1 = self.get_image_name(data[0], data[1])
+            image2 = self.get_image_name(data[0], data[2])
+            positives[image1].add(image2)
+            all_images.add(image1)
+            all_images.add(image2)
+
+        return positives, all_images
+
+    def convert_negative(self, pairs, all_images):
+        negatives = defaultdict(set)
+        for data in pairs:
+            image1 = self.get_image_name(data[0], data[1])
+            image2 = self.get_image_name(data[2], data[3])
+            negatives[image1].add(image2)
+            all_images.add(image1)
+            all_images.add(image2)
+
+        return negatives, all_images
+
+    def prepare_annotation(self, ann_file: Path, train=False, landmarks_map=None):
+        positive_pairs, negative_pairs = [], []
+        ann_lines = read_txt(ann_file)
+        for line in ann_lines[1:]:  # skip header
+            pair = line.strip().split()
+            if len(pair) == 3:
+                positive_pairs.append(pair)
+            elif len(pair) == 4:
+                negative_pairs.append(pair)
+
+        all_images = set()
+        positive_data, all_images = self.convert_positive(positive_pairs, all_images)
+        negative_data, all_images = self.convert_negative(negative_pairs, all_images)
+
+        annotations = []
+        for image in all_images:
+            annotation = ReIdentificationClassificationAnnotation(image, positive_data[image], negative_data[image])
+
+            if landmarks_map:
+                image_landmarks = landmarks_map.get(image)
+                annotation.metadata['keypoints'] = image_landmarks
+
+            if train:
+                annotation.metadata['train'] = True
+
+            annotations.append(annotation)
+
+        return annotations
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/market1501.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/market1501.py
new file mode 100644
index 000000000..8c1e39e6b
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/market1501.py
@@ -0,0 +1,41 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from __future__ import absolute_import, print_function
+
+import re
+
+from ._reid_common import check_dirs, read_directory
+from .format_converter import DirectoryBasedAnnotationConverter
+
+MARKET_IMAGE_PATTERN = re.compile(r'([-\d]+)_c(\d)')
+
+
+class Market1501Converter(DirectoryBasedAnnotationConverter):
+    __provider__ = "market1501"
+
+    def convert(self):
+        gallery = self.data_dir / 'bounding_box_test'
+        query = self.data_dir / 'query'
+
+        check_dirs((gallery, query), self.data_dir)
+        gallery_images, gallery_pids = read_directory(gallery, query=False, image_pattern=MARKET_IMAGE_PATTERN)
+        query_images, query_pids = read_directory(query, query=True, image_pattern=MARKET_IMAGE_PATTERN)
+        annotation = gallery_images + query_images
+
+        meta = {'num_identities': len(gallery_pids | query_pids)}
+
+        return annotation, meta
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/mars.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/mars.py
new file mode 100644
index 000000000..bb8de49a1
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/mars.py
@@ -0,0 +1,38 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from __future__ import absolute_import, print_function
+
+import re
+
+from ._reid_common import check_dirs, read_directory
+from .format_converter import DirectoryBasedAnnotationConverter
+
+MARS_IMAGE_PATTERN = re.compile(r'([\d]+)C(\d)')
+
+
+class MARSConverter(DirectoryBasedAnnotationConverter):
+    __provider__ = 'mars'
+
+    def convert(self):
+        gallery = self.data_dir / 'bbox_test'
+        query = self.data_dir / 'query'
+
+        check_dirs((gallery, query), self.data_dir)
+        gallery_images, gallery_pids = read_directory(gallery, query=False, image_pattern=MARS_IMAGE_PATTERN)
+        query_images, query_pids = read_directory(query, query=True, image_pattern=MARS_IMAGE_PATTERN)
+
+        return gallery_images + query_images, {'num_identities': len(gallery_pids | query_pids)}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/ms_coco.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/ms_coco.py
new file mode 100644
index 000000000..f1e41beab
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/ms_coco.py
@@ -0,0 +1,129 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from tqdm import tqdm
+import numpy as np
+
+from ..config import BoolField
+from ..utils import read_json, convert_bboxes_xywh_to_x1y1x2y2
+from ..representation import DetectionAnnotation, PoseEstimationAnnotation
+from .format_converter import BaseFormatConverter, FileBasedAnnotationConverter, FileBasedAnnotationConverterConfig
+
+
+def get_image_annotation(image_id, annotations_):
+    return list(filter(lambda x: x['image_id'] == image_id, annotations_))
+
+
+def get_label_map(full_annotation, use_full_label_map=False, has_background=False):
+    labels = full_annotation['categories']
+
+    if not use_full_label_map:
+        label_offset = 1 if has_background else 0
+        label_id_to_label = {label['id']: label_id + label_offset for label_id, label in enumerate(labels)}
+        label_map = {label_id + label_offset: label['name'] for label_id, label in enumerate(labels)}
+    else:
+        label_id_to_label = {label['id']: label['id'] for label in labels}
+        label_map = {label['id']: label['name'] for label in labels}
+
+    return label_map, label_id_to_label
+
+
+class MSCocoDetectionConverterConfig(FileBasedAnnotationConverterConfig):
+    has_background = BoolField(optional=True)
+    use_full_label_map = BoolField(optional=True)
+
+
+class MSCocoDetectionConverter(BaseFormatConverter):
+    __provider__ = 'mscoco_detection'
+
+    _config_validator_type = MSCocoDetectionConverterConfig
+
+    def configure(self):
+        self.annotation_file = self.config['annotation_file']
+        self.has_background = self.config.get('has_background', False)
+        self.use_full_label_map = self.config.get('use_full_label_map', False)
+
+    def convert(self):
+        detection_annotations = []
+        full_annotation = read_json(self.annotation_file)
+        image_info = full_annotation['images']
+        annotations = full_annotation['annotations']
+
+        label_map, label_id_to_label = get_label_map(full_annotation, self.use_full_label_map, self.has_background)
+
+        meta = {}
+        if self.has_background:
+            label_map[0] = 'background'
+            meta['background_label'] = 0
+
+        meta.update({'label_map': label_map})
+
+        for image in tqdm(image_info):
+            identifier = image['file_name']
+            image_annotation = get_image_annotation(image['id'], annotations)
+            image_labels = [label_id_to_label[annotation['category_id']] for annotation in image_annotation]
+            xmins = [annotation['bbox'][0] for annotation in image_annotation]
+            ymins = [annotation['bbox'][1] for annotation in image_annotation]
+            widths = [annotation['bbox'][2] for annotation in image_annotation]
+            heights = [annotation['bbox'][3] for annotation in image_annotation]
+            xmaxs = np.add(xmins, widths)
+            ymaxs = np.add(ymins, heights)
+            is_crowd = [annotation['iscrowd'] for annotation in image_annotation]
+            detection_annotation = DetectionAnnotation(identifier, image_labels, xmins, ymins, xmaxs, ymaxs)
+            detection_annotation.metadata['iscrowd'] = is_crowd
+            detection_annotations.append(detection_annotation)
+
+        return detection_annotations, meta
+
+
+class MSCocoKeypointsConverter(FileBasedAnnotationConverter):
+    __provider__ = 'mscoco_keypoints'
+
+    def convert(self):
+        keypoints_annotations = []
+
+        full_annotation = read_json(self.annotation_file)
+        image_info = full_annotation['images']
+        annotations = full_annotation['annotations']
+        label_map, _ = get_label_map(full_annotation, True)
+        for image in image_info:
+            identifier = image['file_name']
+            image_annotation = get_image_annotation(image['id'], annotations)
+            if not image_annotation:
+                continue
+            x_vals, y_vals, visibility, labels, areas, is_crowd, bboxes, difficult = [], [], [], [], [], [], [], []
+            for target in image_annotation:
+                if target['num_keypoints'] == 0:
+                    difficult.append(len(x_vals))
+                labels.append(target['category_id'])
+                keypoints = target['keypoints']
+                x_vals.append(keypoints[::3])
+                y_vals.append(keypoints[1::3])
+                visibility.append(keypoints[2::3])
+                areas.append(target['area'])
+                bboxes.append(convert_bboxes_xywh_to_x1y1x2y2(*target['bbox']))
+                is_crowd.append(target['iscrowd'])
+            keypoints_annotation = PoseEstimationAnnotation(
+                identifier, np.array(x_vals), np.array(y_vals), np.array(visibility), np.array(labels)
+            )
+            keypoints_annotation.metadata['areas'] = areas
+            keypoints_annotation.metadata['rects'] = bboxes
+            keypoints_annotation.metadata['iscrowd'] = is_crowd
+            keypoints_annotation.metadata['difficult_boxes'] = difficult
+
+            keypoints_annotations.append(keypoints_annotation)
+
+        return keypoints_annotations, {'label_map': label_map}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/ncf_converter.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/ncf_converter.py
new file mode 100644
index 000000000..5e7ac5906
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/ncf_converter.py
@@ -0,0 +1,74 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+from ..representation import HitRatioAnnotation
+from ..utils import read_txt
+from ..config import PathField, NumberField
+
+from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
+
+
+class NCFDatasetConverterConfig(BaseFormatConverterConfig):
+    raiting_file = PathField()
+    negative_file = PathField()
+    users_max_number = NumberField(optional=True)
+
+
+class NCFConverter(BaseFormatConverter):
+    __provider__ = "ncf_converter"
+
+    _config_validator_type = NCFDatasetConverterConfig
+
+    def configure(self):
+        self.raiting_file = self.config['raiting_file']
+        self.negative_file = self.config['negative_file']
+        if 'users_max_number' in self.config:
+            self.users_max_number = self.config['users_max_number']
+        else:
+            self.users_max_number = -1
+
+    def convert(self):
+        annotations = []
+        users = []
+
+        for file_row in read_txt(self.raiting_file):
+            user_id, item_id, _ = file_row.split()
+            users.append(user_id)
+            identifier = ['u:'+user_id, 'i:'+item_id]
+            annotations.append(HitRatioAnnotation(identifier))
+            if self.users_max_number > 0 and len(users) >= self.users_max_number:
+                break;
+
+        item_numbers = 1
+
+        items_neg = []
+        for file_row in read_txt(self.negative_file):
+            items = file_row.split()
+            items_neg.append(items)
+            if self.users_max_number > 0 and len(items_neg) >= self.users_max_number:
+                break;
+
+        if items_neg:
+            iterations = len(items_neg[0])
+            item_numbers += iterations
+            for i in range(iterations):
+                for user in users:
+                    item = items_neg[int(user)][i]
+                    identifier = ['u:' + user, 'i:'+ item]
+                    annotations.append(HitRatioAnnotation(identifier, False))
+
+        return annotations, {'users_number': len(users), 'item_numbers': item_numbers}
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/pascal_voc.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/pascal_voc.py
new file mode 100644
index 000000000..651c52512
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/pascal_voc.py
@@ -0,0 +1,157 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from tqdm import tqdm
+
+from ..config import PathField, BoolField
+from ..representation import DetectionAnnotation, SegmentationAnnotation
+from ..representation.segmentation_representation import GTMaskLoader
+from ..utils import get_path, read_txt, read_xml
+from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
+
+_VOC_CLASSES_DETECTION = (
+    'aeroplane', 'bicycle', 'bird', 'boat',
+    'bottle', 'bus', 'car', 'cat', 'chair',
+    'cow', 'diningtable', 'dog', 'horse',
+    'motorbike', 'person', 'pottedplant',
+    'sheep', 'sofa', 'train', 'tvmonitor'
+)
+
+_VOC_CLASSES_SEGMENTATION = tuple(['__background__']) + _VOC_CLASSES_DETECTION
+_SEGMENTATION_COLORS = ((
+    (0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0),
+    (0, 0, 128), (128, 0, 128), (0, 128, 128), (128, 128, 128),
+    (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0),
+    (64, 0, 128), (192, 0, 128), (64, 128, 128), (192, 128, 128),
+    (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0),
+    (0, 64, 128)
+))
+
+
+def prepare_detection_labels(has_background=True):
+    num_classes = len(_VOC_CLASSES_DETECTION)
+    labels_shift = 1 if has_background else 0
+    reversed_label_map = dict(zip(_VOC_CLASSES_DETECTION, list(range(labels_shift, num_classes + labels_shift))))
+    if has_background:
+        reversed_label_map['__background__'] = 0
+
+    return reversed_label_map
+
+
+def reverse_label_map(label_map):
+    return {value: key for key, value in label_map.items()}
+
+
+class PascalVOCSegmentationConverterConfig(BaseFormatConverterConfig):
+    image_set_file = PathField()
+    images_dir = PathField(optional=True, is_directory=True)
+    mask_dir = PathField(optional=True, is_directory=True)
+
+
+class PascalVOCSegmentationConverter(BaseFormatConverter):
+    __provider__ = 'voc_segmentation'
+
+    _config_validator_type = PascalVOCSegmentationConverterConfig
+
+    def configure(self):
+        self.image_set_file = self.config['image_set_file']
+        self.image_dir = self.config.get('images_dir')
+        if not self.image_dir:
+            self.image_dir = get_path(self.image_set_file.parent / 'JPEGImages')
+
+        self.mask_dir = self.config.get('mask_dir')
+        if not self.mask_dir:
+            self.mask_dir = get_path(self.image_set_file.parent / 'SegmentationClass')
+
+    def convert(self):
+
+        annotations = []
+        for image in read_txt(self.image_set_file):
+            annotation = SegmentationAnnotation(
+                str(self.image_dir.name / '{}.jpg'.format(image)),
+                str(self.mask_dir.name / '{}.png'.format(image)),
+                mask_loader=GTMaskLoader.SCIPY
+            )
+
+            annotations.append(annotation)
+
+        meta = {
+            'label_map': dict(enumerate(_VOC_CLASSES_SEGMENTATION)),
+            'background_label': 0,
+            'segmentation_colors': _SEGMENTATION_COLORS
+        }
+
+        return annotations, meta
+
+
+class PascalVOCDetectionConverterConfig(BaseFormatConverterConfig):
+    image_set_file = PathField()
+    annotations_dir = PathField(is_directory=True)
+    images_dir = PathField(optional=True, is_directory=True)
+    has_background = BoolField(optional=True)
+
+
+class PascalVOCDetectionConverter(BaseFormatConverter):
+    __provider__ = 'voc07'
+
+    _config_validator_type = PascalVOCDetectionConverterConfig
+
+    def configure(self):
+        self.image_set_file = self.config['image_set_file']
+        self.image_dir = self.config.get('images_dir')
+        if not self.image_dir:
+            self.image_dir = get_path(self.image_set_file.parent / 'JPEGImages')
+        self.annotations_dir = self.config['annotations_dir']
+        self.has_background = self.config.get('has_background', True)
+
+    def convert(self):
+        class_to_ind = prepare_detection_labels(self.has_background)
+
+        detections = []
+        for image in tqdm(read_txt(self.image_set_file, sep=None)):
+            root = read_xml(self.annotations_dir / '{}.xml'.format(image))
+
+            identifier = root.find('.//filename').text
+            get_path(self.image_dir / identifier)
+
+            labels, x_mins, y_mins, x_maxs, y_maxs = [], [], [], [], []
+            difficult_indices = []
+            for entry in root:
+                if not entry.tag.startswith('object'):
+                    continue
+
+                bbox = entry.find('bndbox')
+                difficult = int(entry.find('difficult').text)
+
+                if difficult == 1:
+                    difficult_indices.append(len(labels))
+
+                labels.append(class_to_ind[entry.find('name').text])
+                x_mins.append(float(bbox.find('xmin').text) - 1)
+                y_mins.append(float(bbox.find('ymin').text) - 1)
+                x_maxs.append(float(bbox.find('xmax').text) - 1)
+                y_maxs.append(float(bbox.find('ymax').text) - 1)
+
+            image_annotation = DetectionAnnotation(identifier, labels, x_mins, y_mins, x_maxs, y_maxs)
+            image_annotation.metadata['difficult_boxes'] = difficult_indices
+
+            detections.append(image_annotation)
+
+        meta = {'label_map': reverse_label_map(class_to_ind)}
+        if self.has_background:
+            meta['background_label'] = 0
+
+        return detections, meta
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/sample_converter.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/sample_converter.py
new file mode 100644
index 000000000..88fb713ee
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/sample_converter.py
@@ -0,0 +1,100 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import re
+
+from ..config import PathField
+from ..representation import ClassificationAnnotation
+from ..utils import get_path, read_txt
+
+from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
+
+
+class SampleConverterConfig(BaseFormatConverterConfig):
+    data_dir = PathField(is_directory=True)
+
+
+class SampleConverter(BaseFormatConverter):
+    """
+    Sample dataset converter. All annotation converters should be derived from BaseFormatConverter class.
+    """
+
+    # register name for this converter
+    # this name will be used for converter class look up
+    __provider__ = 'sample'
+
+    _config_validator_type = SampleConverterConfig
+
+    def configure(self):
+        self.data_dir = self.config['data_dir']
+
+    def convert(self):
+        """
+        This method is executed automatically when convert.py is started.
+        All arguments are automatically forwarded from command line arguments.
+
+        Returns:
+            annotations: list of annotation representation objects.
+            meta: dictionary with additional dataset level metadata.
+        """
+
+        dataset_directory = get_path(self.data_dir, is_directory=True)
+
+        # read and convert annotation
+        labels = self._read_labels(dataset_directory / 'labels.txt')
+        annotations = self._convert_annotations(dataset_directory / 'test', labels)
+
+        # convert label list to label map
+        label_map = {i: labels[i] for i in range(len(labels))}
+        metadata = {'label_map': label_map}
+
+        return annotations, metadata
+
+    @staticmethod
+    def _read_labels(labels_file):
+        """
+        Extract label names from labels.txt file.
+        """
+
+        return read_txt(labels_file)
+
+    @staticmethod
+    def _convert_annotations(test_dir, labels):
+        """
+        Create annotation representations list.
+        """
+
+        # test directory contains files with names XXXX_class.png
+        # we use regular expression to extract class names
+        file_pattern_regex = re.compile(r'\d+_(\w+)\.png')
+
+        annotations = []
+        # iterate over all png images in test directory
+        for image in test_dir.glob('*.png'):
+            # get file name (e.g. from /foo/bar/image.png we get image.png)
+            image_base = str(image.parts[-1])
+
+            # extract class name from file name
+            regex_match = re.match(file_pattern_regex, image_base)
+            image_label = regex_match.group(1)
+
+            # look up class index in label list
+            class_id = labels.index(image_label)
+
+            # create annotation representation object
+            annotations.append(ClassificationAnnotation(image_base, class_id))
+
+        return annotations
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/super_resolution_converter.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/super_resolution_converter.py
new file mode 100644
index 000000000..4c053f9b5
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/super_resolution_converter.py
@@ -0,0 +1,52 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from ..config import PathField, StringField, BoolField
+from ..representation import SuperResolutionAnnotation
+from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
+
+
+class SRConverterConfig(BaseFormatConverterConfig):
+    data_dir = PathField(is_directory=True)
+    lr_suffix = StringField(optional=True)
+    hr_suffix = StringField(optional=True)
+    two_streams = BoolField(optional=True)
+
+
+class SRConverter(BaseFormatConverter):
+    __provider__ = 'super_resolution'
+
+    _config_validator_type = SRConverterConfig
+
+    def configure(self):
+        self.data_dir = self.config['data_dir']
+        self.lr_suffix = self.config.get('lr_suffix', 'lr')
+        self.hr_suffix = self.config.get('hr_suffix', 'hr')
+        self.two_streams = self.config.get('two_streams', False)
+
+    def convert(self):
+        file_list_lr = []
+        for file_in_dir in self.data_dir.iterdir():
+            if self.lr_suffix in file_in_dir.parts[-1]:
+                file_list_lr.append(file_in_dir)
+
+        annotation = []
+        for lr_file in file_list_lr:
+            lr_file_name = lr_file.parts[-1]
+            hr_file_name = self.hr_suffix.join(lr_file_name.split(self.lr_suffix))
+            identifier = [lr_file_name, hr_file_name] if self.two_streams else lr_file_name
+            annotation.append(SuperResolutionAnnotation(identifier, hr_file_name))
+
+        return annotation, None
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/vgg_face_regression.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/vgg_face_regression.py
new file mode 100644
index 000000000..53c7c5784
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/vgg_face_regression.py
@@ -0,0 +1,64 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..config import PathField
+from ..representation import FacialLandmarksAnnotation
+from ..utils import convert_bboxes_xywh_to_x1y1x2y2, read_csv
+from .format_converter import BaseFormatConverter, BaseFormatConverterConfig
+
+
+class LandmarksRegressionConfig(BaseFormatConverterConfig):
+    landmarks_csv_file = PathField()
+    bbox_csv_file = PathField(optional=True)
+
+
+class LandmarksRegression(BaseFormatConverter):
+    __provider__ = 'landmarks_regression'
+
+    _config_validator_type = LandmarksRegressionConfig
+
+    def configure(self):
+        self.landmarks_csv = self.config['landmarks_csv_file']
+        self.bbox_csv = self.config.get('bbox_csv_file')
+
+    def convert(self):
+        annotations = []
+        for row in read_csv(self.landmarks_csv):
+            identifier = row['NAME_ID'] + '.jpg'
+            x_values = np.array(
+                [float(row["P1X"]), float(row["P2X"]), float(row["P3X"]), float(row["P4X"]), float(row["P5X"])]
+            )
+            y_values = np.array(
+                [float(row["P1Y"]), float(row["P2Y"]), float(row["P3Y"]), float(row["P4Y"]), float(row["P5Y"])]
+            )
+
+            annotation = FacialLandmarksAnnotation(identifier, x_values, y_values)
+            annotation.metadata['left_eye'] = 0
+            annotation.metadata['right_eye'] = 1
+            annotations.append(annotation)
+
+        if self.bbox_csv:
+            for index, row in enumerate(read_csv(self.bbox_csv)):
+                annotations[index].metadata['rect'] = convert_bboxes_xywh_to_x1y1x2y2(
+                    int(row["X"]), int(row["Y"]), int(row["W"]), int(row["H"])
+                )
+
+        meta = {
+            'label_map': {0: 'Left Eye', 1: 'Right Eye', 2: 'Nose', 3: 'Left Mouth Corner', 4: 'Right Mouth Corner'}
+        }
+        return annotations, meta
diff --git a/tools/accuracy_checker/accuracy_checker/annotation_converters/wider.py b/tools/accuracy_checker/accuracy_checker/annotation_converters/wider.py
new file mode 100644
index 000000000..3b5876fde
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/annotation_converters/wider.py
@@ -0,0 +1,64 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+from ..config import NumberField
+from ..representation import DetectionAnnotation
+from ..utils import convert_bboxes_xywh_to_x1y1x2y2, read_txt
+
+from .format_converter import BaseFormatConverter, FileBasedAnnotationConverterConfig
+
+
+class WiderConverterConfig(FileBasedAnnotationConverterConfig):
+    label_start = NumberField(floats=False, optional=True)
+
+
+class WiderFormatConverter(BaseFormatConverter):
+    __provider__ = 'wider'
+
+    _config_validator_type = WiderConverterConfig
+
+    def configure(self):
+        self.annotation_file = self.config['annotation_file']
+        self.label_start = self.config.get('label_start', 1)
+
+    def convert(self):
+        image_annotations = read_txt(self.annotation_file)
+        image_ids = []
+        for image_id, line in enumerate(image_annotations):
+            if '.jpg' in line:
+                image_ids.append(image_id)
+
+        annotations = []
+        for image_id in image_ids:
+            identifier = image_annotations[image_id]
+            bbox_count = image_annotations[image_id + 1]
+            bbox_lines = image_annotations[image_id + 2:image_id + 2 + int(bbox_count)]
+
+            x_mins, y_mins, x_maxs, y_maxs = [], [], [], []
+            for bbox in bbox_lines:
+                x_min, y_min, x_max, y_max = convert_bboxes_xywh_to_x1y1x2y2(*(map(float, (bbox.split(' ')[0:4]))))
+                x_mins.append(x_min)
+                y_mins.append(y_min)
+                x_maxs.append(x_max)
+                y_maxs.append(y_max)
+
+            annotations.append(DetectionAnnotation(
+                identifier, [self.label_start] * len(x_mins),
+                x_mins, y_mins, x_maxs, y_maxs
+            ))
+
+        return annotations, {'label_map': {0: '__background__', self.label_start: 'face'}, 'background_label': 0}
diff --git a/tools/accuracy_checker/accuracy_checker/config/__init__.py b/tools/accuracy_checker/accuracy_checker/config/__init__.py
new file mode 100644
index 000000000..a32b29a4c
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/config/__init__.py
@@ -0,0 +1,48 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .config_validator import (
+    BaseField,
+    StringField,
+    ListField,
+    BoolField,
+    PathField,
+    NumberField,
+    DictField,
+
+    BaseValidator,
+    ConfigError,
+    ConfigValidator
+)
+
+
+from .config_reader import ConfigReader
+
+__all__ = [
+    'BaseField',
+    'StringField',
+    'ListField',
+    'BoolField',
+    'PathField',
+    'NumberField',
+    'DictField',
+
+    'BaseValidator',
+    'ConfigError',
+    'ConfigValidator',
+
+    'ConfigReader'
+]
diff --git a/tools/accuracy_checker/accuracy_checker/config/config_reader.py b/tools/accuracy_checker/accuracy_checker/config/config_reader.py
new file mode 100644
index 000000000..3430090fd
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/config/config_reader.py
@@ -0,0 +1,281 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import copy
+from pathlib import Path
+
+import warnings
+
+from ..utils import read_yaml, to_lower_register, contains_any
+from .config_validator import ConfigError
+
+
+class ConfigReader:
+    """
+    Class for parsing input config.
+    """
+
+    @staticmethod
+    def merge(arguments):
+        """
+        Args:
+            arguments: command-line arguments.
+        Returns:
+            dictionary containing configuration.
+        """
+
+        global_config, local_config = ConfigReader._read_configs(arguments)
+        if not local_config:
+            raise ConfigError('Missing local config')
+
+        ConfigReader._check_local_config(local_config)
+        ConfigReader._prepare_global_configs(global_config)
+
+        config = ConfigReader._merge_configs(global_config, local_config)
+
+        ConfigReader._provide_cmd_arguments(arguments, config)
+        ConfigReader._merge_paths_with_prefixes(arguments, config)
+        ConfigReader._filter_launchers(config, arguments)
+
+        return config
+
+    @staticmethod
+    def _read_configs(arguments):
+        global_config = read_yaml(arguments.definitions) if arguments.definitions else None
+        local_config = read_yaml(arguments.config)
+
+        return global_config, local_config
+
+    @staticmethod
+    def _check_local_config(config):
+        models = config.get('models')
+        if not models:
+            raise ConfigError('Missed "{}" in local config'.format('models'))
+
+        def _is_requirements_missed(target, requirements):
+            return list(filter(lambda entry: not target.get(entry), requirements))
+
+        required_model_entries = ['name', 'launchers', 'datasets']
+        required_dataset_entries = ['name']
+        required_dataset_error = 'Model {} must specify {} for each dataset'
+        for model in models:
+            if _is_requirements_missed(model, required_model_entries):
+                raise ConfigError('Each model must specify {}'.format(required_model_entries))
+
+            if list(filter(lambda entry: _is_requirements_missed(entry, required_dataset_entries), model['datasets'])):
+                raise ConfigError(required_dataset_error.format(model['name'], ','.join(required_dataset_entries)))
+
+    @staticmethod
+    def _prepare_global_configs(global_configs):
+        if not global_configs or 'datasets' not in global_configs:
+            return
+
+        datasets = global_configs['datasets']
+
+        def merge(local_entries, global_entries, identifier):
+            if not local_entries or not global_entries:
+                return
+
+            for i, local in enumerate(local_entries):
+                local_identifier = local.get(identifier)
+                if not local_identifier:
+                    continue
+
+                local_entries[i] = ConfigReader._merge_configs_by_identifier(global_entries, local, identifier)
+
+        for dataset in datasets:
+            merge(dataset.get('preprocessing'), global_configs.get('preprocessing'), 'type')
+            merge(dataset.get('metrics'), global_configs.get('metrics'), 'type')
+            merge(dataset.get('postprocessing'), global_configs.get('postprocessing'), 'type')
+
+    @staticmethod
+    def _merge_configs(global_configs, local_config):
+        config = copy.deepcopy(local_config)
+        if not global_configs:
+            return config
+
+        models = config.get('models')
+        for model in models:
+            for i, launcher_entry in enumerate(model['launchers']):
+                model['launchers'][i] = ConfigReader._merge_configs_by_identifier(
+                    global_configs['launchers'], launcher_entry, 'framework'
+                )
+
+            for i, dataset in enumerate(model['datasets']):
+                model['datasets'][i] = ConfigReader._merge_configs_by_identifier(
+                    global_configs['datasets'], dataset, 'name'
+                )
+
+        return config
+
+    @staticmethod
+    def _merge_configs_by_identifier(global_config, local_config, identifier):
+        local_identifier = local_config.get(identifier)
+        if local_identifier is None:
+            return local_config
+
+        matched = []
+        for config in global_config:
+            global_identifier = config.get(identifier)
+            if global_identifier is None:
+                continue
+
+            if global_identifier != local_identifier:
+                continue
+
+            matched.append(config)
+
+        config = copy.deepcopy(matched[0] if matched else {})
+        for key, value in local_config.items():
+            config[key] = value
+
+        return config
+
+    @staticmethod
+    def _merge_paths_with_prefixes(arguments, config):
+        args = arguments if isinstance(arguments, dict) else vars(arguments)
+        entries_paths = {
+            'launchers': {
+                'model': 'models',
+                'weights': 'models',
+                'caffe_model': 'models',
+                'caffe_weights': 'models',
+                'tf_model': 'models',
+                'mxnet_weights': 'models',
+                'onnx_model': 'models',
+                'kaldi_model': 'models',
+                'cpu_extensions': 'extensions',
+                'gpu_extensions': 'extensions',
+                'bitstream': 'bitstreams',
+                'affinity_map' : 'affinity_map'
+            },
+            'datasets': {
+                'data_source': 'source',
+                'segmentation_masks_source': 'source',
+                'annotation': 'annotations',
+                'dataset_meta': 'annotations'
+            }
+        }
+
+        def merge_entry_paths(keys, value):
+            for field, argument in keys.items():
+                if field not in value:
+                    continue
+
+                config_path = Path(value[field])
+                if config_path.is_absolute():
+                    value[field] = Path(value[field])
+                    continue
+
+                if not args[argument]:
+                    continue
+
+                value[field] = args[argument] / config_path
+
+        def create_command_line_for_conversion(config):
+            mapping = {}
+            value = 'source'
+            for key in config:
+                if key.endswith('file') or key.endswith('dir'):
+                    mapping[key] = value
+            return mapping
+
+        for model in config['models']:
+            for entry, command_line_arg in entries_paths.items():
+                if entry not in model:
+                    continue
+
+                for config_entry in model[entry]:
+                    if entry == 'datasets':
+                        annotation_conversion_config = config_entry.get('annotation_conversion')
+                        if annotation_conversion_config:
+                            command_line_conversion = (create_command_line_for_conversion(annotation_conversion_config))
+                            merge_entry_paths(command_line_conversion, annotation_conversion_config)
+                    merge_entry_paths(command_line_arg, config_entry)
+
+    @staticmethod
+    def _provide_cmd_arguments(arguments, config):
+        def merge_converted_model_path(converted_models_dir, mo_output_dir):
+            if mo_output_dir:
+                mo_output_dir = Path(mo_output_dir)
+                if mo_output_dir.is_absolute():
+                    return mo_output_dir
+                return converted_models_dir / mo_output_dir
+            return converted_models_dir
+
+        additional_keys = [
+            'model_optimizer', 'tf_custom_op_config_dir',
+            'tf_obj_detection_api_pipeline_config_path',
+            'cpu_extensions_mode'
+        ]
+        arguments_dict = arguments if isinstance(arguments, dict) else vars(arguments)
+        update_launcher_entry = {}
+
+        for key in additional_keys:
+            value = arguments_dict.get(key)
+            if value:
+                update_launcher_entry['_{}'.format(key)] = value
+
+        for model in config['models']:
+            for launcher_entry in model['launchers']:
+                if launcher_entry['framework'].lower() != 'dlsdk':
+                    continue
+
+                launcher_entry.update(update_launcher_entry)
+                models_prefix = arguments.models
+                if models_prefix:
+                    launcher_entry['_models_prefix'] = models_prefix
+
+                if not arguments.converted_models:
+                    continue
+
+                mo_params = launcher_entry.get('mo_params', {})
+
+                mo_params.update({
+                    'output_dir': merge_converted_model_path(arguments.converted_models, mo_params.get('output_dir'))
+                })
+
+                launcher_entry['mo_params'] = mo_params
+
+                if arguments.aocl:
+                    launcher_entry['_aocl'] = arguments.aocl
+
+    @staticmethod
+    def _filter_launchers(config, arguments):
+        def filtered(launcher, targets):
+            target_tags = args.get('target_tags') or []
+            if target_tags:
+                if not contains_any(target_tags, launcher.get('tags', [])):
+                    return True
+
+            config_framework = launcher['framework'].lower()
+            target_framework = (args.get('target_framework') or config_framework).lower()
+            if config_framework != target_framework:
+                return True
+
+            return targets and launcher.get('device', '').lower() not in targets
+
+        args = arguments if isinstance(arguments, dict) else vars(arguments)
+        target_devices = to_lower_register(args.get('target_devices') or [])
+
+        for model in config['models']:
+            launchers = model['launchers']
+            launchers = [launcher for launcher in launchers if not filtered(launcher, target_devices)]
+
+            if not launchers:
+                warnings.warn('Model "{}" has no launchers'.format(model['name']))
+
+            model['launchers'] = launchers
diff --git a/tools/accuracy_checker/accuracy_checker/config/config_validator.py b/tools/accuracy_checker/accuracy_checker/config/config_validator.py
new file mode 100644
index 000000000..edb1e24cd
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/config/config_validator.py
@@ -0,0 +1,339 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import enum
+import math
+import re
+import warnings
+from collections import OrderedDict
+from copy import copy
+from functools import partial
+from pathlib import Path
+
+from ..utils import get_path, string_to_bool
+
+
+class ConfigError(ValueError):
+    pass
+
+
+class BaseValidator:
+    def __init__(self, on_error=None, additional_validator=None):
+        self.on_error = on_error
+        self.additional_validator = additional_validator
+
+        self.field_uri = None
+
+    def validate(self, entry, field_uri=None):
+        field_uri = field_uri or self.field_uri
+        if self.additional_validator and not self.additional_validator(entry, field_uri):
+            self.raise_error(entry, field_uri)
+
+    def raise_error(self, value, field_uri, reason=None):
+        if self.on_error:
+            self.on_error(value, field_uri, reason)
+
+        error_message = 'Invalid value "{value}" for {field_uri}'.format(value=value, field_uri=field_uri)
+        if reason:
+            error_message = '{error_message}: {reason}'.format(error_message=error_message, reason=reason)
+
+        raise ConfigError(error_message.format(value, field_uri))
+
+
+class _ExtraArgumentBehaviour(enum.Enum):
+    WARN = 'warn'
+    IGNORE = 'ignore'
+    ERROR = 'error'
+
+
+def _is_dict_like(entry):
+    return hasattr(entry, '__iter__') and hasattr(entry, '__getitem__')
+
+
+class ConfigValidator(BaseValidator):
+    WARN_ON_EXTRA_ARGUMENT = _ExtraArgumentBehaviour.WARN
+    ERROR_ON_EXTRA_ARGUMENT = _ExtraArgumentBehaviour.ERROR
+    IGNORE_ON_EXTRA_ARGUMENT = _ExtraArgumentBehaviour.IGNORE
+
+    def __init__(self, config_uri, on_extra_argument=WARN_ON_EXTRA_ARGUMENT, **kwargs):
+        super().__init__(**kwargs)
+        self.on_extra_argument = on_extra_argument
+
+        self.fields = OrderedDict()
+        self.field_uri = config_uri
+        for name in dir(self):
+            value = getattr(self, name)
+            if not isinstance(value, BaseField):
+                continue
+
+            field_copy = copy(value)
+            field_copy.field_uri = "{}.{}".format(config_uri, name)
+            self.fields[name] = field_copy
+
+    def validate(self, entry, field_uri=None):
+        super().validate(entry, field_uri)
+        field_uri = field_uri or self.field_uri
+        if not _is_dict_like(entry):
+            raise ConfigError("{} is expected to be dict-like".format(field_uri))
+
+        extra_arguments = []
+        for key in entry:
+            if key not in self.fields:
+                extra_arguments.append(key)
+                continue
+
+            self.fields[key].validate(entry[key])
+
+        required_fields = set(name for name, value in self.fields.items() if not value.optional)
+        missing_arguments = required_fields.difference(entry)
+
+        if missing_arguments:
+            arguments = ', '.join(map(str, missing_arguments))
+            self.raise_error(
+                entry, field_uri, "Invalid config for {}: missing required fields: {}".format(field_uri, arguments)
+            )
+
+        if extra_arguments:
+            unknown_options_error = "specifies unknown options: {}".format(extra_arguments)
+            message = "{} {}".format(field_uri, unknown_options_error)
+
+            if self.on_extra_argument == _ExtraArgumentBehaviour.WARN:
+                warnings.warn(message)
+            if self.on_extra_argument == _ExtraArgumentBehaviour.ERROR:
+                self.raise_error(entry, field_uri, message)
+
+    @property
+    def known_fields(self):
+        return set(self.fields)
+
+    def raise_error(self, value, field_uri, reason=None):
+        if self.on_error:
+            self.on_error(value, field_uri, reason)
+        else:
+            raise ConfigError(reason)
+
+
+class BaseField(BaseValidator):
+    def __init__(self, optional=False, allow_none=False, **kwargs):
+        super().__init__(**kwargs)
+        self.optional = optional
+        self.allow_none = allow_none
+
+    def validate(self, entry, field_uri=None):
+        super().validate(entry, field_uri)
+        field_uri = field_uri or self.field_uri
+        if not self.allow_none and entry is None:
+            raise ConfigError("{} is not allowed to be None".format(field_uri))
+
+    @property
+    def type(self):
+        return str
+
+
+class StringField(BaseField):
+    def __init__(self, choices=None, regex=None, case_sensitive=False, **kwargs):
+        super().__init__(**kwargs)
+        self.choices = choices if case_sensitive or not choices else list(map(str.lower, choices))
+        self.regex = re.compile(regex, flags=re.IGNORECASE if not case_sensitive else 0) if regex else None
+        self.case_sensitive = case_sensitive
+
+    def validate(self, entry, field_uri=None):
+        super().validate(entry, field_uri)
+        if entry is None:
+            return
+
+        field_uri = field_uri or self.field_uri
+        source_entry = entry
+
+        if not isinstance(entry, str):
+            raise ConfigError("{} is expected to be str".format(source_entry))
+
+        if not self.case_sensitive:
+            entry = entry.lower()
+
+        if self.choices and entry not in self.choices:
+            reason = "unsupported option, expected one of: {}".format(', '.join(map(str, self.choices)))
+            self.raise_error(source_entry, field_uri, reason)
+
+        if self.regex and not self.regex.match(entry):
+            self.raise_error(source_entry, field_uri, reason=None)
+
+    @property
+    def type(self):
+        return str
+
+
+class DictField(BaseField):
+    def __init__(self, key_type=None, value_type=None, validate_keys=True, validate_values=True, allow_empty=True,
+                 **kwargs):
+        super().__init__(**kwargs)
+        self.validate_keys = validate_keys if key_type else False
+        self.validate_values = validate_values if value_type else False
+        self.key_type = _get_field_type(key_type)
+        self.value_type = _get_field_type(value_type)
+
+        self.allow_empty = allow_empty
+
+    def validate(self, entry, field_uri=None):
+        super().validate(entry, field_uri)
+        if entry is None:
+            return
+
+        field_uri = field_uri or self.field_uri
+        if not isinstance(entry, dict):
+            raise ConfigError("{} is expected to be dict".format(field_uri))
+
+        if not entry and not self.allow_empty:
+            self.raise_error(entry, field_uri, "value is empty")
+
+        for k, v in entry.items():
+            if self.validate_keys:
+                uri = "{}.keys.{}".format(field_uri, k)
+                self.key_type.validate(k, uri)
+
+            if self.validate_values:
+                uri = "{}.{}".format(field_uri, k)
+
+                self.value_type.validate(v, uri)
+    @property
+    def type(self):
+        return dict
+
+
+class ListField(BaseField):
+    def __init__(self, value_type=None, validate_values=True, allow_empty=True, **kwargs):
+        super().__init__(**kwargs)
+        self.validate_values = validate_values if value_type else False
+        self.value_type = _get_field_type(value_type)
+        self.allow_empty = allow_empty
+
+    def validate(self, entry, field_uri=None):
+        super().validate(entry, field_uri)
+        if entry is None:
+            return
+
+        if not isinstance(entry, list):
+            raise ConfigError("{} is expected to be list".format(field_uri))
+
+        if not entry and not self.allow_empty:
+            self.raise_error(entry, field_uri, "value is empty")
+
+        if self.validate_values:
+            for i, val in enumerate(entry):
+                self.value_type.validate(val, "{}[{}]".format(val, i))
+
+    @property
+    def type(self):
+        return list
+
+
+class NumberField(BaseField):
+    def __init__(self, floats=True, min_value=None, max_value=None, allow_inf=False, allow_nan=False, **kwargs):
+        super().__init__(**kwargs)
+        self.floats = floats
+        self.min = min_value
+        self.max = max_value
+        self.allow_inf = allow_inf
+        self.allow_nan = allow_nan
+
+    def validate(self, entry, field_uri=None):
+        super().validate(entry, field_uri)
+        if entry is None:
+            return
+
+        field_uri = field_uri or self.field_uri
+        if not self.floats and isinstance(entry, float):
+            raise ConfigError("{} is expected to be int".format(field_uri))
+        if not isinstance(entry, int) and not isinstance(entry, float):
+            raise ConfigError("{} is expected to be number".format(field_uri))
+
+        if self.min is not None and entry < self.min:
+            reason = "value is less than minimal allowed - {}".format(self.min)
+            self.raise_error(entry, field_uri, reason)
+        if self.max is not None and entry > self.max:
+            reason = "value is greater than maximal allowed - {}".format(self.max)
+            self.raise_error(entry, field_uri, reason)
+
+        if math.isinf(entry) and not self.allow_inf:
+            self.raise_error(entry, field_uri, "value is infinity")
+        if math.isnan(entry) and not self.allow_nan:
+            self.raise_error(entry, field_uri, "value is NaN")
+
+    @property
+    def type(self):
+        return float if self.floats else int
+
+
+class PathField(BaseField):
+    def __init__(self, is_directory=False, **kwargs):
+        super().__init__(**kwargs)
+        self.is_directory = is_directory
+
+    def validate(self, entry, field_uri=None):
+        super().validate(entry, field_uri)
+        if entry is None:
+            return
+
+        field_uri = field_uri or self.field_uri
+        try:
+            get_path(entry, self.is_directory)
+        except TypeError:
+            self.raise_error(entry, field_uri, "values is expected to be path-like")
+        except FileNotFoundError:
+            self.raise_error(entry, field_uri, "path does not exist")
+        except NotADirectoryError:
+            self.raise_error(entry, field_uri, "path is not a directory")
+        except IsADirectoryError:
+            self.raise_error(entry, field_uri, "path is a directory, regular file expected")
+
+    @property
+    def type(self):
+        return Path
+
+
+class BoolField(BaseField):
+    def validate(self, entry, field_uri=None):
+        super().validate(entry, field_uri)
+        if entry is None:
+            return
+
+        field_uri = field_uri or self.field_uri
+        if not isinstance(entry, bool):
+            raise ConfigError("{} is expected to be bool".format(field_uri))
+
+    @property
+    def type(self):
+        return string_to_bool
+
+
+def _get_field_type(key_type):
+    if not isinstance(key_type, BaseField):
+        type_ = _TYPE_TO_FIELD_CLASS.get(key_type)
+        if callable(type_):
+            return type_()
+
+    return key_type
+
+
+_TYPE_TO_FIELD_CLASS = {
+    int: partial(NumberField, floats=False),
+    float: partial(NumberField, floats=True),
+    dict: partial(DictField, validate_keys=False, validate_values=False),
+    list: partial(ListField, validate_values=False),
+    Path: PathField,
+    str: StringField,
+    bool: BoolField,
+}
diff --git a/tools/accuracy_checker/accuracy_checker/data_readers/__init__.py b/tools/accuracy_checker/accuracy_checker/data_readers/__init__.py
new file mode 100644
index 000000000..73e1bc7f7
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/data_readers/__init__.py
@@ -0,0 +1,40 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .data_reader import (
+    BaseReader,
+    DataReaderField,
+    ReaderCombiner,
+    JSONReaderConfig,
+    OpenCVFrameReader,
+    OpenCVImageReader,
+    PillowImageReader,
+    ScipyImageReader,
+    NiftiImageReader
+
+)
+
+__all__ = [
+    'BaseReader',
+    'DataReaderField',
+    'ReaderCombiner',
+    'JSONReaderConfig',
+    'OpenCVFrameReader',
+    'OpenCVImageReader',
+    'PillowImageReader',
+    'ScipyImageReader',
+    'NiftiImageReader'
+]
diff --git a/tools/accuracy_checker/accuracy_checker/data_readers/data_reader.py b/tools/accuracy_checker/accuracy_checker/data_readers/data_reader.py
new file mode 100644
index 000000000..0aaa6fc74
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/data_readers/data_reader.py
@@ -0,0 +1,216 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from functools import singledispatch
+from collections import OrderedDict
+import re
+import cv2
+from PIL import Image
+import scipy.misc
+import numpy as np
+import nibabel as nib
+
+from ..utils import get_path, read_json
+from ..dependency import ClassProvider
+from ..config import BaseField, StringField, ConfigValidator, ConfigError, DictField
+
+
+class DataReaderField(BaseField):
+    def validate(self, entry_, field_uri=None):
+        super().validate(entry_, field_uri)
+
+        if entry_ is None:
+            return
+
+        field_uri = field_uri or self.field_uri
+        if isinstance(entry_, str):
+            StringField(choices=BaseReader.providers).validate(entry_, 'reader')
+        elif isinstance(entry_, dict):
+            class DictReaderValidator(ConfigValidator):
+                type = StringField(choices=BaseReader.providers)
+            dict_reader_validator = DictReaderValidator(
+                'reader', on_extra_argument=DictReaderValidator.IGNORE_ON_EXTRA_ARGUMENT
+            )
+            dict_reader_validator.validate(entry_)
+        else:
+            self.raise_error(entry_, field_uri, 'reader must be either string or dictionary')
+
+
+class BaseReader(ClassProvider):
+    __provider_type__ = 'reader'
+
+    def __init__(self, config=None):
+        self.config = config
+        self.data_source_is_dir = True
+        self.data_source_optional = False
+        self.read_dispatcher = singledispatch(self.read)
+        self.read_dispatcher.register(list, self._read_list)
+
+        self.validate_config()
+        self.configure()
+
+    def __call__(self, *args, **kwargs):
+        return self.read_dispatcher(*args, **kwargs)
+
+    def configure(self):
+        pass
+
+    def validate_config(self):
+        pass
+
+    def read(self, data_id, data_dir):
+        raise NotImplementedError
+
+    def _read_list(self, data_id, data_dir):
+        return [self.read(identifier, data_dir) for identifier in data_id]
+
+
+class ReaderCombinerConfig(ConfigValidator):
+    type = StringField()
+    scheme = DictField(
+        value_type=DataReaderField(), key_type=StringField(), allow_empty=False
+    )
+
+
+class ReaderCombiner(BaseReader):
+    __provider__ = 'combine_reader'
+
+    def validate_config(self):
+        config_validator = ReaderCombinerConfig('reader_combiner_config')
+        config_validator.validate(self.config)
+
+    def configure(self):
+        scheme = self.config['scheme']
+        reading_scheme = OrderedDict()
+        for pattern, reader_config in scheme.items():
+            reader = BaseReader.provide(
+                reader_config['type'] if isinstance(reader_config, dict) else reader_config, reader_config
+            )
+            pattern = re.compile(pattern)
+            reading_scheme[pattern] = reader
+
+        self.reading_scheme = reading_scheme
+
+    def read(self, data_id, data_dir):
+        for pattern, reader in self.reading_scheme.items():
+            if pattern.match(str(data_id)):
+                return reader.read(data_id, data_dir)
+
+        raise ConfigError('suitable data reader for {} not found'.format(data_id))
+
+
+class OpenCVImageReader(BaseReader):
+    __provider__ = 'opencv_imread'
+
+    def read(self, data_id, data_dir):
+        return cv2.imread(str(get_path(data_dir / data_id)))
+
+
+class PillowImageReader(BaseReader):
+    __provider__ = 'pillow_imread'
+
+    def read(self, data_id, data_dir):
+        return np.array(Image.open(str(get_path(data_dir / data_id))))
+
+
+class ScipyImageReader(BaseReader):
+    __provider__ = 'scipy_imread'
+
+    def read(self, data_id, data_dir):
+        return np.array(scipy.misc.imread(str(get_path(data_dir / data_id))))
+
+class OpenCVFrameReader(BaseReader):
+    __provider__ = 'opencv_capture'
+
+    def __init__(self, config=None):
+        super().__init__(config)
+        self.data_source_is_dir = False
+        self.source = None
+        self.current = -1
+
+    def read(self, data_id, data_dir):
+        # source video changed, capture initialization
+        if data_dir != self.source:
+            self.source = data_dir
+            self.videocap = cv2.VideoCapture(str(self.source))
+            self.current = -1
+
+        if data_id < 0:
+            raise IndexError('frame with {} index can not be grabbed, non-negative index is expected')
+        if data_id < self.current:
+            self.videocap.set(cv2.CAP_PROP_POS_FRAMES, data_id)
+            self.current = data_id - 1
+
+        return self._read_sequence(data_id)
+
+    def _read_sequence(self, data_id):
+        frame = None
+        while self.current != data_id:
+            success, frame = self.videocap.read()
+            self.current += 1
+            if not success:
+                raise EOFError('frame with {} index does not exists in {}'.format(self.current, self.source))
+        return frame
+
+
+class JSONReaderConfig(ConfigValidator):
+    type = StringField()
+    key = StringField(optional=True, case_sensitive=True)
+
+
+class JSONReader(BaseReader):
+    __provider__ = 'json_reader'
+
+    def validate_config(self):
+        config_validator = JSONReaderConfig('json_reader_config')
+        config_validator.validate(self.config)
+
+    def configure(self):
+        self.key = self.config.get('key')
+
+    def read(self, data_id, data_dir):
+        data = read_json(str(data_dir / data_id))
+        if self.key:
+            data = data.get(self.key)
+
+            if not data:
+                raise ConfigError('{} does not contain {}'.format(data_id, self.key))
+
+        return np.array(data).astype(np.float32)
+
+class NCF_DataReader(BaseReader):
+    __provider__ = 'ncf_data_reader'
+
+    def __init__(self, config=None):
+        super().__init__(config)
+        self.data_source_optional = True
+
+    def read(self, data_id, data_dir):
+        if not isinstance(data_id, str):
+            raise IndexError('Data identifier must be a string')
+
+        return float(data_id.split(":")[1])
+
+class NiftiImageReader(BaseReader):
+    __provider__ = 'nifti_reader'
+
+    def read(self, data_id, data_dir):
+        nib_image = nib.load(str(get_path(data_dir / data_id)))
+        image = np.array(nib_image.dataobj)
+        if len(image.shape) != 4:  # Make sure 4D
+            image = np.expand_dims(image, -1)
+        image = np.swapaxes(np.array(image), 0, -2)
+        return image
diff --git a/tools/accuracy_checker/accuracy_checker/dataset.py b/tools/accuracy_checker/accuracy_checker/dataset.py
new file mode 100644
index 000000000..f4ee1cb5d
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/dataset.py
@@ -0,0 +1,190 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from pathlib import Path
+import numpy as np
+
+from .annotation_converters import BaseFormatConverter, save_annotation, make_subset
+from .data_readers import BaseReader, DataReaderField
+from .config import ConfigValidator, StringField, PathField, ListField, DictField, BaseField, NumberField, ConfigError
+from .utils import JSONDecoderWithAutoConversion, read_json, get_path, contains_all
+from .representation import BaseRepresentation
+
+
+class DataRepresentation:
+    def __init__(self, data, meta=None, identifier=''):
+        self.identifier = identifier
+        self.data = data
+        self.metadata = meta or {}
+        if np.isscalar(data):
+            self.metadata['image_size'] = 1
+        elif isinstance(data, list) and np.isscalar(data[0]):
+            self.metadata['image_size'] = len(data)
+        else:
+            self.metadata['image_size'] = data.shape if not isinstance(data, list) else data[0].shape
+
+
+class DatasetConfig(ConfigValidator):
+    """
+    Specifies configuration structure for dataset
+    """
+    name = StringField()
+    annotation = BaseField(optional=True)
+    data_source = PathField()
+    dataset_meta = BaseField(optional=True)
+    metrics = ListField(allow_empty=False)
+    postprocessing = ListField(allow_empty=False, optional=True)
+    preprocessing = ListField(allow_empty=False, optional=True)
+    reader = DataReaderField(optional=True)
+    annotation_conversion = DictField(optional=True)
+    subsample_size = BaseField(optional=True)
+    subsample_seed = NumberField(floats=False, min_value=0, optional=True)
+
+
+class Dataset:
+    def __init__(self, config_entry, preprocessor):
+        self._config = config_entry
+        self._preprocessor = preprocessor
+
+        self.batch = 1
+
+        dataset_config = DatasetConfig('Dataset')
+        data_reader_config = self._config.get('reader', 'opencv_imread')
+        if isinstance(data_reader_config, str):
+            self.read_image_fn = BaseReader.provide(data_reader_config)
+        elif isinstance(data_reader_config, dict):
+            self.read_image_fn = BaseReader.provide(data_reader_config['type'], data_reader_config)
+        else:
+            raise ConfigError('reader should be dict or string')
+
+        dataset_config.fields['data_source'].is_directory = self.read_image_fn.data_source_is_dir
+        dataset_config.fields['data_source'].optional = self.read_image_fn.data_source_optional
+        dataset_config.validate(self._config)
+        annotation, meta = None, None
+        self._images_dir = Path(self._config.get('data_source', ''))
+        if 'annotation_conversion' in self._config:
+            annotation, meta = self._convert_annotation()
+        else:
+            stored_annotation = self._config.get('annotation')
+            if stored_annotation:
+                annotation = read_annotation(get_path(stored_annotation))
+                meta = self._load_meta()
+
+        if not annotation:
+            raise ConfigError('path to converted annotation or data for conversion should be specified')
+
+        subsample_size = self._config.get('subsample_size')
+        if subsample_size:
+            subsample_seed = self._config.get('subsample_seed', 666)
+            if isinstance(subsample_size, str):
+                if subsample_size.endswith('%'):
+                    subsample_size = float(subsample_size[:-1]) / 100 * len(annotation)
+            subsample_size = int(subsample_size)
+            annotation = make_subset(annotation, subsample_size, subsample_seed)
+
+        if contains_all(self._config, ['annotation', 'annotation_conversion']):
+            annotation_name = self._config['annotation']
+            meta_name = self._config.get('dataset_meta')
+            if meta_name:
+                meta_name = Path(meta_name)
+            save_annotation(annotation, meta, Path(annotation_name), meta_name)
+
+        self._annotation = annotation
+        self._meta = meta
+        self.size = len(self._annotation)
+        self.name = self._config.get('name')
+
+    @property
+    def annotation(self):
+        return self._annotation
+
+    def __len__(self):
+        return self.size
+
+    @property
+    def metadata(self):
+        return self._meta
+
+    @property
+    def labels(self):
+        return self._meta.get('label_map', {})
+
+    def __getitem__(self, item):
+        if self.size <= item * self.batch:
+            raise IndexError
+
+        batch_start = item * self.batch
+        batch_end = min(self.size, batch_start + self.batch)
+        batch_annotation = self._annotation[batch_start:batch_end]
+
+        identifiers = [annotation.identifier for annotation in batch_annotation]
+        images = self._read_images(identifiers)
+
+        for image, annotation in zip(images, batch_annotation):
+            self.set_annotation_metadata(annotation, image)
+
+        preprocessed = self._preprocessor.process(images, batch_annotation)
+
+        return batch_annotation, preprocessed
+
+    @staticmethod
+    def set_image_metadata(annotation, images):
+        image_sizes = []
+        if not isinstance(images, list):
+            images = [images]
+        for image in images:
+            if np.isscalar(image):
+                image_sizes.append((1,))
+            else:
+                image_sizes.append(image.shape)
+        annotation.set_image_size(image_sizes)
+
+    def set_annotation_metadata(self, annotation, image):
+        self.set_image_metadata(annotation, image.data)
+        annotation.set_data_source(self._images_dir)
+
+    def _read_images(self, identifiers):
+        images = []
+        for identifier in identifiers:
+            images.append(DataRepresentation(self.read_image_fn(identifier, self._images_dir), identifier=identifier))
+
+        return images
+
+    def _load_meta(self):
+        meta_data_file = self._config.get('dataset_meta')
+        return read_json(meta_data_file, cls=JSONDecoderWithAutoConversion) if meta_data_file else None
+
+    def _convert_annotation(self):
+        conversion_params = self._config.get('annotation_conversion')
+        converter = conversion_params['converter']
+        annotation_converter = BaseFormatConverter.provide(converter, conversion_params)
+        annotation, meta = annotation_converter.convert()
+
+        return annotation, meta
+
+
+def read_annotation(annotation_file: Path):
+    annotation_file = get_path(annotation_file)
+
+    result = []
+    with annotation_file.open('rb') as file:
+        while True:
+            try:
+                result.append(BaseRepresentation.load(file))
+            except EOFError:
+                break
+
+    return result
diff --git a/tools/accuracy_checker/accuracy_checker/dependency.py b/tools/accuracy_checker/accuracy_checker/dependency.py
new file mode 100644
index 000000000..947a3ec08
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/dependency.py
@@ -0,0 +1,108 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+# pylint: disable=protected-access
+
+
+class ProvidedWrapper:
+    def __init__(self, provided):
+        self.provided = provided
+
+
+class UnresolvedDependencyException(ValueError):
+
+    def __init__(self, provider, missing_dependencies) -> None:
+        super().__init__()
+        self.provider = provider
+        self.missing_dependencies = missing_dependencies
+        self.message = "Unresolved dependencies ({}) for provider {}".format(
+            ", ".join(self.missing_dependencies), self.provider
+        )
+
+
+def get_opts(options):
+    """
+    Args:
+        options: options object.
+    Returns:
+        args (tuple): positional options.
+        kwargs (map): keyword arguments.
+    """
+
+    if isinstance(options, tuple):
+        if len(options) == 2 and isinstance(options[-1], dict):
+            args, kwargs = options
+        else:
+            args = options
+            kwargs = {}
+    elif isinstance(options, dict):
+        args, kwargs = (), options
+    else:
+        raise ValueError("Options object expected to be either pair of (args, kwargs) or only args/kwargs")
+
+    return args, kwargs
+
+
+class BaseProvider:
+    providers = {}
+    __provider_type__ = None
+    __provider__ = None
+
+    @classmethod
+    def provide(cls, provider, *args, **kwargs):
+        root_provider = cls.resolve(provider)
+        return root_provider(*args, **kwargs)
+
+    @classmethod
+    def resolve(cls, name):
+        if name not in cls.providers:
+            raise ValueError("Requested provider not registered")
+        return cls.providers[name]
+
+
+class ClassProviderMeta(type):
+    def __new__(mcs, name, bases, attrs, **kwargs):
+        cls = super().__new__(mcs, name, bases, attrs)
+        # do not create container for abstract provider
+        if '_is_base_provider' in attrs:
+            return cls
+
+        assert issubclass(cls, ClassProvider), "Do not use metaclass directly"
+        if '__provider_type__' in attrs:
+            cls.providers = {}
+        else:
+            cls.register_provider(cls)
+
+        return cls
+
+
+class ClassProvider(BaseProvider, metaclass=ClassProviderMeta):
+    _is_base_provider = True
+
+    @classmethod
+    def get_provider_name(cls):
+        return getattr(cls, '__provider__', cls.__name__)
+
+    @classmethod
+    def register_provider(cls, provider):
+        provider_name = cls.get_provider_name()
+        if not provider_name:
+            return
+        cls.providers[provider_name] = provider
+
+
+def provide(service):
+    return ProvidedWrapper(service)
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/__init__.py b/tools/accuracy_checker/accuracy_checker/launcher/__init__.py
new file mode 100644
index 000000000..af21a9125
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/__init__.py
@@ -0,0 +1,34 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .dummy_launcher import DummyLauncher
+from .launcher import Launcher, create_launcher, unsupported_launcher
+
+try:
+    from .caffe_launcher import CaffeLauncher
+except ImportError as import_error:
+    CaffeLauncher = unsupported_launcher(
+        'caffe', "Caffe isn't installed. Please, install it before using. \n{}".format(import_error.msg)
+    )
+
+try:
+    from .dlsdk_launcher import DLSDKLauncher
+except ImportError as import_error:
+    DLSDKLauncher = unsupported_launcher(
+        'dlsdk', "IE Python isn't installed. Please, install it before using. \n{}".format(import_error.msg)
+    )
+
+__all__ = ['create_launcher', 'Launcher', 'CaffeLauncher', 'DLSDKLauncher', 'DummyLauncher']
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/caffe_installation_readme.md b/tools/accuracy_checker/accuracy_checker/launcher/caffe_installation_readme.md
new file mode 100644
index 000000000..8118dcd55
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/caffe_installation_readme.md
@@ -0,0 +1,56 @@
+# Caffe Installation Tips
+
+## Install OpenCV 3.3 or later with Python3 bindings
+
+Accuracy Checker uses OpenCV library for image processing. You can miss this step if you are using OpenCV from [OpenVINO toolkit][openvino-get-started].
+
+```bash
+sudo apt-get install libopencv-dev
+pip install opencv-python
+```
+
+## Install Caffe with Python3 bindings
+
+* Clone repository:
+
+```bash
+git clone https://github.com/BVLC/caffe.git
+cd caffe
+```
+
+* Install Caffe dependencies:
+
+```bash
+sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libhdf5-serial-dev protobuf-compiler libgflags-dev libgoogle-glog-dev liblmdb-dev
+sudo apt-get install --no-install-recommends libboost-all-dev
+pip install -r python/requirements.txt
+pip install matplotlib
+```
+
+* Build
+
+If you need CPU only version of caffe add `-DCPU_ONLY=ON` to cmake command.
+
+```bash
+mkdir build && cd build
+cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=<caffe/install/dir> -Dpython_version=3 -DBLAS=open ..
+make
+sudo make install
+```
+
+* Copy Python library to your python installation.
+
+```bash
+cp -r ../python/caffe $VIRTUAL_ENV/lib/python3.5/site-packages
+cp --remove-destination lib/_caffe.so $VIRTUAL_ENV/lib/python3.5/site-packages/caffe
+```
+
+## Check your installation
+
+You can test prerequisites with the following command. If it does not fail, then you are installed prerequisites correctly:
+
+```bash
+python3 -c 'import caffe, cv2'
+```
+
+[openvino-get-started]: https://software.intel.com/en-us/openvino-toolkit/documentation/get-started
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher.py b/tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher.py
new file mode 100644
index 000000000..df3d98a81
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher.py
@@ -0,0 +1,141 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import re
+
+import caffe
+
+from ..utils import extract_image_representations
+from ..config import PathField, StringField, NumberField, BoolField
+from .launcher import Launcher, LauncherConfig
+from .input_feeder import InputFeeder
+
+DEVICE_REGEX = r'(?P<device>cpu$|gpu)(_(?P<identifier>\d+))?'
+
+
+class CaffeLauncherConfig(LauncherConfig):
+    """
+    Specifies configuration structure for Caffe launcher.
+    """
+
+    model = PathField()
+    weights = PathField()
+    device = StringField(regex=DEVICE_REGEX)
+    batch = NumberField(floats=False, min_value=1, optional=True)
+    output_name = StringField(optional=True)
+    allow_reshape_input = BoolField(optional=True)
+
+
+class CaffeLauncher(Launcher):
+    """
+    Class for infer model using Caffe framework.
+    """
+
+    __provider__ = 'caffe'
+
+    def __init__(self, config_entry: dict, adapter, *args, **kwargs):
+        super().__init__(config_entry, adapter, *args, **kwargs)
+
+        caffe_launcher_config = CaffeLauncherConfig('Caffe_Launcher')
+        caffe_launcher_config.validate(self._config)
+
+        self.model = str(self._config['model'])
+        self.weights = str(self._config['weights'])
+
+        self.network = caffe.Net(self.model, self.weights, caffe.TEST)
+        self.allow_reshape_input = self._config.get('allow_reshape_input', False)
+
+        match = re.match(DEVICE_REGEX, self._config['device'].lower())
+        if match.group('device') == 'gpu':
+            caffe.set_mode_gpu()
+            identifier = match.group('identifier') or 0
+            caffe.set_device(int(identifier))
+        elif match.group('device') == 'cpu':
+            caffe.set_mode_cpu()
+
+        self._batch = self._config.get('batch', 1)
+
+        inputs_map = {}
+        for input_blob in self.network.inputs:
+            inputs_map[input_blob] = self.network.blobs[input_blob]
+
+        self.input_feeder = InputFeeder(self._config.get('inputs') or [], inputs_map)
+
+        if self.adapter:
+            self.adapter.output_blob = self.adapter.output_blob or next(iter(self.network.outputs))
+
+    @property
+    def inputs(self):
+        """
+        Returns:
+            inputs in NCHW format.
+        """
+
+        self._inputs_shapes = {}
+
+        for input_blob in self.network.inputs:
+            if input_blob in self.input_feeder.const_inputs:
+                continue
+
+            channels, height, width = self.network.blobs[input_blob].data.shape[1:]
+            self.network.blobs[input_blob].reshape(self._batch, channels, height, width)
+            self._inputs_shapes[input_blob] = channels, height, width
+
+        return self._inputs_shapes
+
+    @property
+    def batch(self):
+        return self._batch
+
+    def predict(self, identifiers, data_representation, *args, **kwargs):
+        """
+        Args:
+            identifiers: list of input data identifiers.
+            data_representation: list of input data representations, which contain preprocessed data and its metadata.
+        Returns:
+            output of model converted to appropriate representation.
+        """
+        _, meta = extract_image_representations(data_representation)
+        dataset_inputs = self.input_feeder.fill_non_constant_inputs(data_representation)
+        results = []
+        for infer_input in dataset_inputs:
+            for input_blob in self.network.inputs:
+                if input_blob in self.input_feeder.const_inputs:
+                    continue
+
+                data = infer_input[input_blob]
+
+                if self.allow_reshape_input:
+                    self.network.blobs[input_blob].reshape(*data.shape)
+
+                if data.shape[0] != self._batch:
+                    self.network.blobs[input_blob].reshape(
+                        data.shape[0], *self.network.blobs[input_blob].data.shape[1:]
+                    )
+
+            results.append(self.network.forward(**self.input_feeder.const_inputs, **infer_input))
+
+        if self.adapter:
+            results = self.adapter(results, identifiers, [self._provide_inputs_info_to_meta(meta_) for meta_ in meta])
+
+        return results
+
+    def release(self):
+        """
+        Releases launcher.
+        """
+
+        del self.network
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher_readme.md b/tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher_readme.md
new file mode 100644
index 000000000..2ff601372
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/caffe_launcher_readme.md
@@ -0,0 +1,24 @@
+# How to configure Caffe launcher
+
+For enabling Caffe launcher you need to add `framework: caffe` in launchers section of your configuration file and provide following parameters:
+
+* `device` - specifies which device will be used for infer (`cpu`, `gpu_0` and so on).
+* `model` - path to prototxt file with Caffe model for your topology.
+* `weights` - path to caffemodel file with weights for your topology.
+* `adapter` - approach how raw output will be converted to representation of dataset problem, some adapters can be specific to framework. You can find detailed instruction how to use adapters [here][adapters].
+
+You also can specify batch size for your model using `batch` and allow to reshape input layer to data shape, using specific parameter: `allow_reshape_input` (default value is False).
+
+Caffe launcher config example:
+
+```yml
+launchers:
+  - framework: caffe
+    device: CPU
+    model: path_to_model/alexnet.prototxt
+    weights: path_to_weights/alexnet.caffemodel
+    adapter: classification
+    batch: 4
+```
+
+[adapters]: ./tools/accuracy_checker/accuracy_checker/adapters/README.md
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher.py b/tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher.py
new file mode 100644
index 000000000..6378b8db0
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher.py
@@ -0,0 +1,430 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import subprocess
+from pathlib import Path
+import os
+import platform
+import numpy as np
+from cpuinfo import get_cpu_info
+import openvino.inference_engine as ie
+
+from ..config import ConfigError, NumberField, PathField, StringField, DictField, ListField, BoolField
+from ..logging import warning
+from ..utils import read_yaml, contains_all, extract_image_representations, get_path
+from .launcher import Launcher, LauncherConfig
+from .input_feeder import InputFeeder
+from .model_conversion import convert_model
+from ..logging import print_info
+
+HETERO_KEYWORD = 'HETERO:'
+FPGA_COMPILER_MODE_VAR = 'CL_CONTEXT_COMPILER_MODE_INTELFPGA'
+DEVICE_REGEX = r"(?:^{hetero}(?P<devices>(?:{devices})(?:,(?:{devices}))*)$)|(?:^(?P<device>{devices})$)".format(
+    hetero=HETERO_KEYWORD, devices="|".join(plugin for plugin in ie.known_plugins)
+)
+
+
+class CPUExtensionPathField(PathField):
+    def __init__(self, **kwargs):
+        super().__init__(is_directory=False, **kwargs)
+
+    def validate(self, entry, field_uri=None):
+        if entry is None:
+            return
+
+        field_uri = field_uri or self.field_uri
+        validation_entry = ''
+        try:
+            validation_entry = Path(entry)
+        except TypeError:
+            self.raise_error(entry, field_uri, "values is expected to be path-like")
+        is_directory = False
+        if validation_entry.parts[-1] == 'AUTO':
+            validation_entry = validation_entry.parent
+            is_directory = True
+        try:
+            get_path(validation_entry, is_directory)
+        except FileNotFoundError:
+            self.raise_error(validation_entry, field_uri, "path does not exist")
+        except NotADirectoryError:
+            self.raise_error(validation_entry, field_uri, "path is not a directory")
+        except IsADirectoryError:
+            self.raise_error(validation_entry, field_uri, "path is a directory, regular file expected")
+
+
+class DLSDKLauncherConfig(LauncherConfig):
+    """
+    Specifies configuration structure for DLSDK launcher.
+    """
+
+    device = StringField(regex=DEVICE_REGEX)
+    model = PathField(optional=True)
+    weights = PathField(optional=True)
+    caffe_model = PathField(optional=True)
+    caffe_weights = PathField(optional=True)
+    mxnet_weights = PathField(optional=True)
+    tf_model = PathField(optional=True)
+    onnx_model = PathField(optional=True)
+    kaldi_model = PathField(optional=True)
+    cpu_extensions = CPUExtensionPathField(optional=True)
+    gpu_extensions = PathField(optional=True)
+    bitstream = PathField(optional=True)
+    mo_params = DictField(optional=True)
+    mo_flags = ListField(optional=True)
+    outputs = ListField(optional=True)
+    allow_reshape_input = BoolField(optional=True)
+    affinity_map = PathField(optional=True)
+    batch = NumberField(floats=False, min_value=1, optional=True)
+
+    _models_prefix = PathField(is_directory=True, optional=True)
+    _model_optimizer = PathField(optional=True, allow_none=True, is_directory=True)
+    _tf_obj_detection_api_config_dir = PathField(optional=True, allow_none=True, is_directory=True)
+    _tf_custom_op_config_dir = PathField(optional=True, allow_none=True, is_directory=True)
+    _cpu_extensions_mode = StringField(optional=True, allow_none=True)
+    _aocl = PathField(optional=True)
+
+    def __init__(self, config_uri, **kwargs):
+        super().__init__(config_uri, **kwargs)
+        self.need_conversion = None
+
+    def validate(self, entry, field_uri=None):
+        """
+        Validate that launcher entry meets all configuration structure requirements.
+
+        Args:
+            entry: launcher configuration file entry.
+            field_uri: id of launcher entry.
+        """
+
+        dlsdk_model_options = ['model', 'weights']
+        caffe_model_options = ['caffe_model', 'caffe_weights']
+        mxnet_model_options = ['mxnet_weights']
+        tf_model_options = ['tf_model']
+        onnx_model_options = ['onnx_model']
+        kaldi_model_options = ['kaldi_model']
+
+        multiple_model_sources_err = (
+            'Either model and weights or caffe_model and caffe_weights '
+            'or mxnet_weights or tf_model should be specified.'
+        )
+        sources = {
+            'dlsdk': dlsdk_model_options,
+            'caffe': caffe_model_options,
+            'tf': tf_model_options,
+            'mxnet': mxnet_model_options,
+            'onnx': onnx_model_options,
+            'kaldi': kaldi_model_options
+        }
+
+        specified = []
+        for mo_source_option in sources:
+            if contains_all(entry, sources[mo_source_option]):
+                specified.append(mo_source_option)
+
+        if not specified:
+            raise ConfigError('{} None provided'.format(multiple_model_sources_err))
+        if len(specified) > 1:
+            raise ConfigError('{} Several provided'.format(multiple_model_sources_err))
+
+        self._set_model_source(specified[0])
+        super().validate(entry, field_uri)
+
+    def _set_model_source(self, framework):
+        self.need_conversion = framework != 'dlsdk'
+        self.framework = framework
+        self.fields['model'].optional = self.need_conversion
+        self.fields['weights'].optional = self.need_conversion
+        self.fields['caffe_model'].optional = framework != 'caffe'
+        self.fields['caffe_weights'].optional = framework != 'caffe'
+        self.fields['mxnet_weights'].optional = framework != 'mxnet'
+        self.fields['tf_model'].optional = framework != 'tf'
+        self.fields['onnx_model'].optional = framework != 'onnx'
+        self.fields['kaldi_model'].optional = framework != 'kaldi'
+
+
+class DLSDKLauncher(Launcher):
+    """
+    Class for infer model using DLSDK framework.
+    """
+
+    __provider__ = 'dlsdk'
+
+    def __init__(self, config_entry, adapter):
+        super().__init__(config_entry, adapter)
+
+        def fit_to_input(data, input_layer):
+            shape_len = len(input_layer.shape)
+            if shape_len == 4:
+                return np.transpose(data, [0, 3, 1, 2])
+            if shape_len == 2:
+                if len(np.shape(data)) == 1:
+                    return np.transpose([data])
+            return np.array(data)
+
+        dlsdk_launcher_config = DLSDKLauncherConfig('DLSDK_Launcher')
+        dlsdk_launcher_config.validate(self._config)
+
+        self._device = self._config['device'].upper()
+        self._set_variable = False
+        self._prepare_bitstream_firmware(self._config)
+
+        if dlsdk_launcher_config.need_conversion:
+            self._model, self._weights = DLSDKLauncher.convert_model(self._config, dlsdk_launcher_config.framework)
+        else:
+            self._model = self._config['model']
+            self._weights = self._config['weights']
+
+        self._create_ie_plugin()
+        self.network = ie.IENetwork(model=str(self._model), weights=str(self._weights))
+        self.original_outputs = self.network.outputs
+        outputs = self._config.get('outputs')
+        if outputs:
+            self.network.add_outputs(outputs)
+        self.input_feeder = InputFeeder(
+            self._config.get('inputs') or [],
+            self.network.inputs,
+            prepare_input_data=fit_to_input
+        )
+        self._batch = self._config.get('batch', self.network.batch_size)
+        if self._batch != self.network.batch_size:
+            self._set_batch_size(self._batch)
+        affinity_map_path = self._config.get('affinity_map')
+        if affinity_map_path and self._is_hetero():
+            self._set_affinity(affinity_map_path)
+        elif affinity_map_path:
+            warning('affinity_map config is applicable only for HETERO device')
+        self.exec_network = self.plugin.load(network=self.network)
+        self.allow_reshape_input = self._config.get('allow_reshape_input', False)
+
+    @property
+    def inputs(self):
+        """
+        Returns:
+            inputs in NCHW format.
+        """
+
+        # reverse and omit N
+        return {k: v.shape[1:] for k, v in self.network.inputs.items() if k in self.input_feeder.non_constant_inputs}
+
+    @property
+    def batch(self):
+        return self._batch
+
+    def predict(self, identifiers, data_representation, *args, **kwargs):
+        """
+        Args:
+            identifiers: list of input data identifiers.
+            data_representation: list of input data representations, which contain preprocessed data and its metadata.
+        Returns:
+            output of model converted to appropriate representation.
+        """
+        _, metadata = extract_image_representations(data_representation)
+        non_constant_inputs = self.input_feeder.fill_non_constant_inputs(data_representation)
+        results = []
+        for infer_inputs in non_constant_inputs:
+            input_shapes = {}
+            do_reshape = False
+            for input_blob in self.network.inputs:
+                if input_blob in self.input_feeder.const_inputs:
+                    input_shapes[input_blob] = self.network.inputs[input_blob].shape
+                    continue
+
+                data = infer_inputs[input_blob]
+                input_shapes[input_blob] = data.shape
+                if self.allow_reshape_input:
+                    if tuple(self.network.inputs[input_blob].shape) != data.shape:
+                        do_reshape = True
+
+            if do_reshape:
+                self._reshape_input(input_shapes)
+
+            for input_blob, data in infer_inputs.items():
+                infer_inputs[input_blob] = self._align_data_shape(data, input_blob)
+
+            network_inputs_data = {**infer_inputs, **self.input_feeder.const_inputs}
+
+            benchmark = kwargs.get('benchmark')
+            if benchmark:
+                benchmark(network_inputs_data)
+
+            result = self.exec_network.infer(network_inputs_data)
+
+            raw_outputs_callback = kwargs.get('output_callback')
+            if raw_outputs_callback:
+                raw_outputs_callback(result)
+
+            results.append(result)
+
+        if self.adapter:
+            self.adapter.output_blob = self.adapter.output_blob or next(iter(self.original_outputs))
+            results = self.adapter(results, identifiers, [self._provide_inputs_info_to_meta(meta) for meta in metadata])
+
+        return results
+
+    def _is_hetero(self):
+        return self._device.startswith(HETERO_KEYWORD)
+
+    def _devices_list(self):
+        device = self._device
+        if HETERO_KEYWORD in self._device:
+            device = self._device[len(HETERO_KEYWORD):]
+
+        return [platform_.upper().strip() for platform_ in device.split(',')]
+
+    def _set_affinity(self, affinity_map_path):
+        self.plugin.set_initial_affinity(self.network)
+        layers = self.network.layers
+        for layer, device in read_yaml(affinity_map_path).items():
+            if layer not in layers:
+                raise ConfigError('Layer \'{layer}\' is not present in network'.format(layer=layer))
+            if device not in self._devices_list():
+                raise ConfigError(
+                    'Device \'{device}\' set for \'{layer}\' layer is not present in '
+                    'provided configuration \'{configuration}\''.format(
+                        device=device, layer=layer, configuration=self._device
+                    )
+                )
+            layers[layer].affinity = device
+
+    def _is_fpga(self):
+        return 'FPGA' in self._devices_list()
+
+    def _prepare_bitstream_firmware(self, config):
+        if not self._is_fpga():
+            return
+
+        compiler_mode = os.environ.get(FPGA_COMPILER_MODE_VAR)
+        if compiler_mode == '3':
+            return
+
+        bitstream = config.get('bitstream')
+        if bitstream:
+            print_info('programming bitstream: {}'.format(bitstream.name))
+            aocl_executable = config.get('_aocl')
+            if aocl_executable:
+                subprocess.run([str(aocl_executable), 'program', 'acl0', str(bitstream)])
+                os.environ[FPGA_COMPILER_MODE_VAR] = '3'
+                self._set_variable = True
+            else:
+                aocx_variable = 'DLA_AOCX'
+                previous_bitstream = os.environ.get(aocx_variable)
+                if previous_bitstream == str(bitstream):
+                    return
+                os.environ[aocx_variable] = str(bitstream)
+                if not os.environ.get(aocx_variable):
+                    warning('Warning: {} has not been set'.format(aocx_variable))
+
+    @staticmethod
+    def get_cpu_extension(cpu_extensions, selection_mode):
+        cpu_extensions_name = cpu_extensions.parts[-1]
+        if cpu_extensions_name != 'AUTO':
+            return cpu_extensions
+        extensions_path = cpu_extensions.parent
+        file_format = '{}.dll' if platform.system() == 'Windows' else 'lib{}.so'
+        if not selection_mode:
+            default_cpu_extension = file_format.format('cpu_extension')
+            extension_list = list(extensions_path.glob(default_cpu_extension))
+
+            if extension_list:
+                return extension_list[0]
+
+            cpu_info_flags = get_cpu_info()['flags']
+            selection_mode = 'avx2' if 'avx2' in cpu_info_flags else 'sse4'
+        extension_list = list(extensions_path.glob(file_format.format('cpu_extension_{}'.format(selection_mode))))
+
+        if not extension_list:
+            raise ConfigError('suitable CPU extension lib not found in {}'.format(extensions_path))
+
+        return extension_list[0]
+
+    @staticmethod
+    def convert_model(config, framework='caffe'):
+        config_model = config.get(framework + '_model', '')
+        config_weights = config.get(framework + '_weights', '')
+
+        mo_search_paths = []
+        model_optimizer = config.get('_model_optimizer')
+        if model_optimizer:
+            mo_search_paths.append(model_optimizer)
+
+        model_optimizer_directory_env = os.environ.get('MO_DIR')
+        if model_optimizer_directory_env:
+            mo_search_paths.append(model_optimizer_directory_env)
+
+        return convert_model(
+            Path(config_model).name.split('.')[0] or Path(config_weights).name.split('.')[0],
+            config_model, config_weights, framework,
+            mo_search_paths, config.get('mo_params'),
+            config.get('mo_flags'),
+            config.get('_tf_custom_op_config_dir'),
+            config.get('_tf_obj_detection_api_pipeline_config_path')
+        )
+
+    def _reshape_input(self, shapes):
+        self.network.reshape(shapes)
+        del self.exec_network
+        self._create_ie_plugin(log=False)
+        self.exec_network = self.plugin.load(network=self.network)
+
+    def _set_batch_size(self, batch_size):
+        # in some cases we can not use explicit property for setting batch size, so we need to use reshape instead
+        # save const inputs without changes
+        const_inputs_shapes = {
+            input_name: self.network.inputs[input_name].shape for input_name in self.input_feeder.const_inputs
+        }
+        new_non_const_input_shapes = {}
+        for layer_name in self.input_feeder.non_constant_inputs:
+            layer = self.network.inputs[layer_name]
+            layer_shape = layer.shape
+            ind_batch = layer.layout.find('N')
+            if ind_batch != -1:
+                layer_shape[ind_batch] = batch_size
+            new_non_const_input_shapes[layer_name] = layer_shape
+
+        self.network.reshape({**const_inputs_shapes, **new_non_const_input_shapes})
+
+    def _align_data_shape(self, data, input_blob):
+        input_shape = self.network.inputs[input_blob].shape
+
+        if data.shape[0] != input_shape[0]:
+            input_shape[0] = data.shape[0]
+        if len(data.shape) > 1 and len(input_shape) > 1 and data.shape[1] != input_shape[1]:
+            data = data[:, :input_shape[1]]
+
+        return data.reshape(input_shape)
+
+    def _create_ie_plugin(self, log=True):
+        if hasattr(self, 'plugin'):
+            del self.plugin
+        self.plugin = ie.IEPlugin(self._device)
+        if log:
+            print_info('IE version: {}'.format(ie.get_version()))
+            print_info('Loaded {} plugin version: {}'.format(self.plugin.device, self.plugin.version))
+
+        cpu_extensions = self._config.get('cpu_extensions')
+        if cpu_extensions and 'CPU' in self._device:
+            selection_mode = self._config.get('_cpu_extensions_mode')
+            cpu_extensions = DLSDKLauncher.get_cpu_extension(cpu_extensions, selection_mode)
+            self.plugin.add_cpu_extension(str(cpu_extensions))
+        if self._config.get('gpu_extensions') and 'GPU' in self._device:
+            self.plugin.set_config('CONFIG_FILE', str(self._config.get('gpu_extensions')))
+
+    def release(self):
+        if self._set_variable:
+            del os.environ[FPGA_COMPILER_MODE_VAR]
+        del self.network
+        del self.exec_network
+        del self.plugin
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher_readme.md b/tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher_readme.md
new file mode 100644
index 000000000..e04415ebe
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/dlsdk_launcher_readme.md
@@ -0,0 +1,54 @@
+# How to configure OpenVINO™ launcher
+
+For enabling OpenVINO™ launcher you need to add `framework: dlsdk` in launchers section of your configuration file and provide following parameters:
+
+* `device` - specifies which device will be used for infer. Supported: `CPU`, `GPU`, `FPGA`, `MYRIAD` and Heterogeneous plugin as `HETERO:target_device,fallback_device`.
+* `model` - path to xml file with Caffe model for your topology.
+* `weights` - path to bin file with weights for your topology.
+
+launcher may optionally provide model parameters in source framework format which will be converted to Inference Engine IR using Model Optimizer.
+If you want to use Model Optimizer for model conversion, please view [Model Optimizer Developer Guide][openvino-mo].
+You can provide:
+
+* `caffe_model` and `caffe_weights` for Caffe model and weights (*.prototxt and *.caffemodel).
+* `tf_model` for TensorFlow model (*.pb, *.pb.frozen, *.pbtxt).
+* `mxnet_weights` for MXNet params (*.params).
+* `onnx_model` for ONNX model (*.onnx).
+* `kaldi_model` for Kaldi model (*.nnet).
+
+In case when you want to determine additional parameters for model conversion (data_type, input_shape and so on), you can use `mo_params` for arguments with values and `mo_flags` for positional arguments like `legacy_mxnet_model` .
+Full list of supported parameters you can find in Model Optimizer Developer Guide.
+
+Model will be converted before every evaluation. 
+You can provide `converted_model_dir` for saving converted model in specific folder, otherwise, converted models will be saved in path provided via `-C` command line argument or source model directory.
+
+* `adapter` - approach how raw output will be converted to representation of dataset problem, some adapters can be specific to framework. You can find detailed instruction how to use adapters [here][adapters].
+
+Launcher understands which batch size will be used from model intermediate representation (IR). If you want to use batch for infer, please, provide model with required batch or convert it using specific parameter in `mo_params`.
+
+* `allow_reshape_input` - parameter, which allows to reshape input layer to data shape (default value is False).
+
+Additionally you can provide device specific parameters:
+
+* `cpu_extensions` (path to extension *.so file with custom layers for cpu).
+* `gpu_extensions` (path to extension *.xml file with OpenCL kernel description for gpu).
+* `bitstream` for running on FPGA.
+
+OpenVINO™ launcher config example:
+
+```yml
+launchers:
+  - framework: dlsdk
+    device: HETERO:FPGA,CPU
+    caffe_model: path_to_model/alexnet.prototxt
+    caffe_weights: path_to_weights/alexnet.caffemodel
+    adapter: classification
+    mo_params:
+      batch: 4
+    mo_flags:
+      - reverse_input_channels
+    cpu_extensions: cpu_extentions_avx512.so
+```
+
+[adapters]: ./tools/accuracy_checker/accuracy_checker/adapters/README.md
+[openvino-mo]: https://software.intel.com/en-us/articles/OpenVINO-ModelOptimizer
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/dummy_launcher.py b/tools/accuracy_checker/accuracy_checker/launcher/dummy_launcher.py
new file mode 100644
index 000000000..7714f2eda
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/dummy_launcher.py
@@ -0,0 +1,69 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..utils import get_path
+from ..logging import print_info
+from ..adapters import Adapter
+from ..config import PathField, StringField
+from .loaders import Loader
+from .launcher import Launcher, LauncherConfig
+
+
+class DummyLauncherConfig(LauncherConfig):
+    """
+    Specifies configuration structure for Dummy launcher.
+    """
+
+    loader = StringField(choices=Loader.providers)
+    data_path = PathField()
+    adapter = StringField(choices=Adapter.providers, optional=True)
+
+
+class DummyLauncher(Launcher):
+    """
+    Class for using predictions from another tool.
+    """
+
+    __provider__ = 'dummy'
+
+    def __init__(self, config_entry: dict, adapter, *args, **kwargs):
+        super().__init__(config_entry, adapter, *args, **kwargs)
+
+        dummy_launcher_config = DummyLauncherConfig('Dummy_Launcher')
+        dummy_launcher_config.validate(self._config)
+
+        self.data_path = get_path(self._config['data_path'])
+
+        self._loader = Loader.provide(self._config['loader'], self.data_path)
+        if self.adapter:
+            self.adapter.output_blob = self.adapter.output_blob or self.data_path
+            self._loader.data = self.adapter(self._loader.data)
+
+        print_info("{} predictions objects loaded from {}".format(len(self._loader), self.data_path))
+
+    def predict(self, identifiers, *args, **kwargs):
+        return [self._loader[identifier] for identifier in identifiers]
+
+    def release(self):
+        pass
+
+    @property
+    def batch(self):
+        return 1
+
+    @property
+    def inputs(self):
+        return None
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/input_feeder.py b/tools/accuracy_checker/accuracy_checker/launcher/input_feeder.py
new file mode 100644
index 000000000..202409b24
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/input_feeder.py
@@ -0,0 +1,138 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import re
+import numpy as np
+
+from ..config import ConfigError
+from ..utils import extract_image_representations
+
+class InputFeeder:
+    def __init__(self, inputs_config, network_inputs, prepare_input_data=None):
+        def fit_to_input(data, input_layer):
+            if len(np.shape(data)) == 4:
+                return np.transpose(data, [0, 3, 1, 2])
+            return np.array(data)
+
+        self.input_transform_func = prepare_input_data or fit_to_input
+        self.network_inputs = network_inputs
+        self.configure(inputs_config)
+
+    def configure(self, inputs_config):
+        self.const_inputs, self.non_constant_inputs, self.inputs_mapping = self._parse_inputs_config(inputs_config)
+        if not self.non_constant_inputs:
+            raise ConfigError('Network should contain at least one layer for setting variable data.')
+
+    def fill_non_constant_inputs(self, data_representation_batch):
+        filled_inputs = {}
+        for input_layer in self.non_constant_inputs:
+            input_regex = None
+            input_batch = []
+            if self.inputs_mapping:
+                input_regex = self.inputs_mapping[input_layer]
+            for data_representation in data_representation_batch:
+                input_data = None
+                identifiers = data_representation.identifier
+                data = data_representation.data
+                if not isinstance(identifiers, list) and not input_regex:
+                    input_data = data
+                    input_batch.append(input_data)
+                    continue
+
+                if not input_regex:
+                    raise ConfigError('Impossible to choose correct data for layer {}.'
+                                      'Please provide regular expression for matching in config.'.format(input_layer))
+                data = [data] if np.isscalar(identifiers) else data
+                identifiers = [identifiers] if np.isscalar(identifiers) else identifiers
+                for identifier, data_value in zip(identifiers, data):
+                    if input_regex.match(identifier):
+                        input_data = data_value
+                        break
+                if input_data is None:
+                    raise ConfigError('Suitable data for filling layer {} not found'.format(input_layer))
+                input_batch.append(input_data)
+
+            filled_inputs[input_layer] = input_batch
+
+        return self._transform_batch(filled_inputs, extract_image_representations(data_representation_batch)[1])
+
+    def _parse_inputs_config(self, inputs_entry):
+        constant_inputs = {}
+        non_constant_inputs_mapping = {}
+        non_constant_inputs = []
+        for input_ in inputs_entry:
+            name = input_['name']
+            if not name in self.network_inputs:
+                raise ConfigError('network does not contain input "{}"'.format(name))
+            value = input_['value']
+
+            if input_['type'] == 'CONST_INPUT':
+                if isinstance(value, list):
+                    value = np.array(value)
+                constant_inputs[name] = value
+            else:
+                value = re.compile(value)
+                non_constant_inputs_mapping[name] = value
+
+        non_constant_inputs = list(non_constant_inputs_mapping.keys())
+        not_config_inputs = list(filter(
+            lambda input_layer: not input_layer in non_constant_inputs + list(constant_inputs.keys()),
+            self.network_inputs.keys()
+            ))
+        if non_constant_inputs and not_config_inputs:
+            raise ConfigError('input value for {} are not presented in config.'.format(','.join(not_config_inputs)))
+        non_constant_inputs += not_config_inputs
+
+        return constant_inputs, non_constant_inputs, non_constant_inputs_mapping or None
+
+    def _transform_batch(self, batch_data, meta):
+        def calculate_num_splits(layers_data, batch_size):
+            max_split_num = 1
+            for _, data in layers_data.items():
+                total_tiles_num = 0
+                for tiles in data:
+                    total_tiles_num += len(tiles)
+
+                offset = 0 if total_tiles_num % batch_size == 0 else 1
+                splits_for_layer = (total_tiles_num // batch_size) + offset
+                if max_split_num < splits_for_layer:
+                    max_split_num = splits_for_layer
+
+            return max_split_num
+
+        def separate_data(data, num_splits):
+            grouped_data = [[] for _ in range(num_splits)]
+            for data_part in data:
+                for split_id, data_split in enumerate(data_part):
+                    grouped_data[split_id % num_splits].append(data_split)
+            return grouped_data
+
+        batch_size = len(meta)
+        if meta[0].get('multi_infer', False):
+            num_splits = calculate_num_splits(batch_data, batch_size)
+            infers_data = [{} for _ in range(num_splits)]
+            for layer_name, layer_data in batch_data.items():
+                batch_for_all_infers = separate_data(layer_data, num_splits)
+                for infer_id, on_infer_batch in enumerate(batch_for_all_infers):
+                    infers_data[infer_id][layer_name] = self.input_transform_func(
+                        on_infer_batch, self.network_inputs[layer_name]
+                    )
+            return infers_data
+
+        for layer_name, layer_data in batch_data.items():
+            batch_data[layer_name] = self.input_transform_func(layer_data, self.network_inputs[layer_name])
+
+        return [batch_data]
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/launcher.py b/tools/accuracy_checker/accuracy_checker/launcher/launcher.py
new file mode 100644
index 000000000..8aa44361f
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/launcher.py
@@ -0,0 +1,149 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..config import BaseField, ConfigError
+from ..adapters import Adapter, AdapterField
+from ..config import ConfigValidator, StringField, ListField
+from ..dependency import ClassProvider, provide
+
+
+class Launcher(ClassProvider):
+    """
+    Interface for inferring model.
+    """
+
+    __provider_type__ = 'launcher'
+
+    adapter = provide(Adapter)
+
+    def __init__(self, config_entry, adapter, *args, **kwargs):
+        self.adapter = adapter
+        self._config = config_entry
+
+    def predict(self, identifiers, data_representation, *args, **kwargs):
+        """
+        Args:
+            identifiers: list of input data identifiers.
+            data_representation: list of input data representations, which contain preprocessed data and its metadata.
+        Returns:
+            raw data from network.
+        """
+
+        raise NotImplementedError
+
+    def release(self):
+        raise NotImplementedError
+
+    @property
+    def batch(self):
+        raise NotImplementedError
+
+    @property
+    def inputs(self):
+        raise NotImplementedError
+
+    def _provide_inputs_info_to_meta(self, meta):
+        meta['input_shape'] = self.inputs
+
+        return meta
+
+
+class InputValidator(ConfigValidator):
+    name = StringField()
+    type = StringField(choices=('CONST_INPUT', 'INPUT'))
+    value = BaseField()
+
+
+class ListInputsField(ListField):
+    def __init__(self, **kwargs):
+        super().__init__(allow_empty=False, value_type=InputValidator('Inputs'), **kwargs)
+
+    def validate(self, entry, field_uri=None):
+        super().validate(entry, field_uri)
+        names_set = set()
+        for input_layer in entry:
+            input_name = input_layer['name']
+            if input_name not in names_set:
+                names_set.add(input_name)
+            else:
+                self.raise_error(entry, field_uri, '{} repeated name'.format(input_name))
+
+
+class LauncherConfig(ConfigValidator):
+    """
+    Specifies common part of configuration structure for launchers.
+    """
+
+    framework = StringField(choices=Launcher.providers)
+    tags = ListField(allow_empty=False, optional=True)
+    inputs = ListInputsField(optional=True)
+    adapter = AdapterField()
+
+
+def unsupported_launcher(name, error_message=None):
+    class UnsupportedLauncher(Launcher):
+        __provider__ = name
+
+        def __init__(self, config_entry, adapter, *args, **kwargs):
+            super().__init__(config_entry, adapter, *args, **kwargs)
+
+            msg = "{launcher} launcher is disabled. Please install {launcher} to enable it.".format(launcher=name)
+            raise ValueError(error_message or msg)
+
+        def predict(self, identifiers, data, *args, **kwargs):
+            raise NotImplementedError
+
+        def release(self):
+            raise NotImplementedError
+
+        @property
+        def batch(self):
+            raise NotImplementedError
+
+    return UnsupportedLauncher
+
+
+def create_launcher(launcher_config, dataset_meta=None):
+    """
+    Args:
+        launcher_config: launcher configuration file entry.
+        dataset_meta: metadata dictionary for dataset annotation.
+    Returns:
+        framework-specific launcher object.
+    """
+
+    launcher_config_validator = LauncherConfig(
+        'Launcher_validator',
+        on_extra_argument=ConfigValidator.IGNORE_ON_EXTRA_ARGUMENT
+    )
+    launcher_config_validator.validate(launcher_config)
+
+    label_map = None
+    if dataset_meta:
+        label_map = dataset_meta.get('label_map')
+
+    config_framework = launcher_config['framework']
+    config_adapter = launcher_config.get('adapter')
+    if not config_adapter:
+        adapter = None
+    elif isinstance(config_adapter, str):
+        adapter = Adapter.provide(config_adapter, launcher_config, label_map=label_map)
+    elif isinstance(config_adapter, dict):
+        adapter = Adapter.provide(config_adapter['type'], config_adapter, label_map=label_map)
+    else:
+        raise ConfigError
+
+    return Launcher.provide(config_framework, launcher_config, adapter=adapter)
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/loaders/__init__.py b/tools/accuracy_checker/accuracy_checker/launcher/loaders/__init__.py
new file mode 100644
index 000000000..98217dd2b
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/loaders/__init__.py
@@ -0,0 +1,26 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .loader import Loader
+
+from .pickle_loader import PickleLoader
+from .xml_loader import XMLLoader
+
+__all__ = [
+    'Loader',
+    'PickleLoader',
+    'XMLLoader',
+]
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/loaders/loader.py b/tools/accuracy_checker/accuracy_checker/launcher/loaders/loader.py
new file mode 100644
index 000000000..7c07394af
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/loaders/loader.py
@@ -0,0 +1,54 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from pathlib import Path
+
+from ...dependency import ClassProvider
+
+
+class Loader(ClassProvider):
+    """
+    Interface that describes loading output from another tool.
+    """
+
+    __provider_type__ = 'loader'
+
+    def __init__(self, data_path: Path):
+        self._data_path = data_path
+
+    def __len__(self):
+        raise NotImplementedError
+
+    def __getitem__(self, item):
+        raise NotImplementedError
+
+
+class DictLoaderMixin:
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.data = self.load()
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, item):
+        if item not in self.data:
+            raise IndexError('There is no prediction object for "{}" input data'.format(item))
+
+        return self.data[item]
+
+    def load(self):
+        raise NotImplementedError
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/loaders/pickle_loader.py b/tools/accuracy_checker/accuracy_checker/launcher/loaders/pickle_loader.py
new file mode 100644
index 000000000..ba3578b68
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/loaders/pickle_loader.py
@@ -0,0 +1,34 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ...utils import read_pickle
+from .loader import Loader, DictLoaderMixin
+
+
+class PickleLoader(DictLoaderMixin, Loader):
+    """
+    Class for loading output from another tool in .pickle format.
+    """
+
+    __provider__ = 'pickle'
+
+    def load(self):
+        data = read_pickle(self._data_path)
+
+        if isinstance(data, list) and all(hasattr(entry, 'identifier') for entry in data):
+            return dict(zip([representation.identifier for representation in data], data))
+
+        return data
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/loaders/xml_loader.py b/tools/accuracy_checker/accuracy_checker/launcher/loaders/xml_loader.py
new file mode 100644
index 000000000..13c0de9c1
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/loaders/xml_loader.py
@@ -0,0 +1,29 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ...utils import read_xml
+from .loader import Loader, DictLoaderMixin
+
+
+class XMLLoader(DictLoaderMixin, Loader):
+    """
+    Class for loading output from another tool in .xml format.
+    """
+
+    __provider__ = 'xml'
+
+    def load(self):
+        return read_xml(self._data_path)
diff --git a/tools/accuracy_checker/accuracy_checker/launcher/model_conversion.py b/tools/accuracy_checker/accuracy_checker/launcher/model_conversion.py
new file mode 100644
index 000000000..d87f4ab0b
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/launcher/model_conversion.py
@@ -0,0 +1,196 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import sys
+import subprocess
+from pathlib import Path
+from typing import Union
+from ..utils import get_path, format_key
+
+
+def convert_model(topology_name, model=None, weights=None,
+                  framework='caffe', mo_search_paths=None, mo_params=None, mo_flags=None,
+                  tf_custom_op_config_dir=None, tf_object_detection_api_config_dir=None):
+    """
+    Args:
+        topology_name: name for converted model files.
+        model: path to the topology file.
+        weights: path to the weights file.
+        framework: framework name for original model.
+        mo_search_paths: paths where ModelOptimizer may be found. If None only default paths is used.
+        mo_params: value parameters for ModelOptimizer execution.
+        mo_flags: flags parameters for ModelOptimizer execution.
+        tf_custom_op_config_dir: path to Tensor Flow custom operations directory.
+        tf_object_detection_api_config_dir: path to Tensor Flow directory with config for object detection API.
+    Returns:
+        paths to converted to IE IR model and weights.
+    """
+
+    mo_params = mo_params or {}
+    mo_flags = mo_flags or []
+
+    set_topology_name(mo_params, topology_name)
+
+    model_optimizer_executable = find_mo(mo_search_paths)
+    if not model_optimizer_executable:
+        raise EnvironmentError(
+            'Model optimizer not found. Please set MO_DIR environment variable to model optimizer folder '
+            'installation or refer to help for command line options for providing Model optimizer'
+        )
+
+    framework_specific_options = {
+        'caffe': {'input_model': weights, 'input_proto': model},
+        'mxnet': {'input_model': weights},
+        'tf': {'input_model': model},
+        'onnx': {'input_model': model},
+        'kaldi': {'input_model': model}
+    }
+
+    mo_params['framework'] = framework
+    mo_params.update(framework_specific_options.get(framework, {}))
+
+    set_path_to_custom_operation_configs(mo_params, framework, tf_custom_op_config_dir, model_optimizer_executable)
+    set_path_to_object_detection_api_pipeline_config(mo_params, framework, tf_object_detection_api_config_dir)
+    args = prepare_args(str(model_optimizer_executable), flag_options=mo_flags, value_options=mo_params)
+
+    code = exec_mo_binary(args)
+
+    if code.returncode != 0:
+        raise RuntimeError("Model optimizer conversion failed: ModelOptimizer returned non-zero code")
+
+    model_file, bin_file = find_dlsdk_ir(
+        get_path(mo_params.get('output_dir', Path.cwd()), is_directory=True), mo_params['model_name']
+    )
+    if not bin_file or not model_file:
+        raise RuntimeError("Model optimizer finished correctly, but converted model is not found.")
+
+    return model_file, bin_file
+
+
+def find_dlsdk_ir(search_path: Path, model_name):
+    """
+    Args:
+        search_path: path with IE IR of model.
+        model_name: name of the model.
+    Returns:
+        paths to IE IR of model.
+    """
+
+    xml_file = search_path / '{}.xml'.format(model_name)
+    bin_file = search_path / '{}.bin'.format(model_name)
+
+    return get_path(xml_file), get_path(bin_file)
+
+
+def find_mo(search_paths=None) -> Union[Path, None]:
+    """
+    Args:
+        search_paths: paths where ModelOptimizer may be found. If None only default paths is used.
+    Returns:
+        path to the ModelOptimizer or None if it wasn't found.
+    """
+
+    default_mo_path = ('intel', 'computer_vision_sdk', 'deployment_tools', 'model_optimizer')
+    default_paths = [Path.home().joinpath(*default_mo_path), Path('/opt').joinpath(*default_mo_path)]
+
+    executable = 'mo.py'
+    for path in search_paths or default_paths:
+        path = Path(path)
+        if not path.is_dir():
+            continue
+
+        mo = path / executable
+        if not mo.is_file():
+            continue
+
+        return mo
+
+    return None
+
+
+def prepare_args(executable, flag_options=None, value_options=None):
+    """
+    Args:
+        executable: path to the executable.
+        flag_options: positional arguments for executable.
+        value_options: keyword arguments for executable.
+    Returns:
+        list with command-line entries.
+    """
+
+    result = [sys.executable, executable]
+
+    for flag_option in flag_options or []:
+        result.append(str(format_key(flag_option)))
+
+    for key, value in (value_options or {}).items():
+        result.append(str(format_key(key)))
+        result.append(str(value))
+
+    return result
+
+
+def exec_mo_binary(args, timeout=None):
+    """
+    Args:
+        args: command-line entries.
+        timeout: timeout for execution.
+    Returns:
+        result of execution.
+    """
+
+    return subprocess.run(args, check=False, timeout=timeout)
+
+
+def set_path_to_custom_operation_configs(mo_params, framework, tf_custom_op_config_dir, mo_path):
+    if framework != 'tf':
+        return mo_params
+
+    config_path = mo_params.get('tensorflow_use_custom_operations_config')
+    if not config_path:
+        return mo_params
+
+    if tf_custom_op_config_dir:
+        tf_custom_op_config_dir = Path(tf_custom_op_config_dir)
+    else:
+        tf_custom_op_config_dir = Path('/').joinpath(*mo_path.parts[:-1]) / 'extensions' / 'front' / 'tf'
+
+    config_path = Path(config_path)
+    if not config_path.is_absolute():
+        config_path = tf_custom_op_config_dir / config_path
+
+    mo_params['tensorflow_use_custom_operations_config'] = str(get_path(config_path))
+
+    return mo_params
+
+
+def set_path_to_object_detection_api_pipeline_config(mo_params, framework, object_detection_api_config_dir=None):
+    object_detection_api_config = mo_params.get('tensorflow_object_detection_api_pipeline_config')
+    if framework != 'tf' or not object_detection_api_config:
+        return mo_params
+
+    object_detection_api_config_dir = Path(object_detection_api_config_dir or get_path(mo_params['input_model']).parent)
+    config_path = object_detection_api_config_dir / object_detection_api_config
+    mo_params['tensorflow_object_detection_api_pipeline_config'] = str(get_path(config_path))
+
+    return mo_params
+
+
+def set_topology_name(mo_params, topology_name):
+    if not mo_params.get('model_name'):
+        mo_params['model_name'] = topology_name
+
+    return mo_params
diff --git a/tools/accuracy_checker/accuracy_checker/logging.py b/tools/accuracy_checker/accuracy_checker/logging.py
new file mode 100644
index 000000000..cf2557981
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/logging.py
@@ -0,0 +1,134 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import logging
+import logging.config
+import os
+import sys
+import warnings
+
+_DEFAULT_LOGGER_NAME = 'accuracy_checker'
+_DEFAULT_LOG_FILE = 'accuracy_checker.log'
+
+PRINT_INFO = logging.INFO + 5
+logging.addLevelName(PRINT_INFO, "PRINT_INFO")
+
+_LOG_LEVEL_ENVIRON = "ACCURACY_CHECKER_LOG_LEVEL"
+_LOGGING_LEVEL = logging.getLevelName(os.environ.get(_LOG_LEVEL_ENVIRON, PRINT_INFO))
+
+
+class LoggingFormatter(logging.Formatter):
+    def format(self, record: logging.LogRecord):
+        if record.levelno == PRINT_INFO:
+            return record.msg
+        return super().format(record)
+
+
+class ConsoleHandler(logging.StreamHandler):
+    def __init__(self, default_stream=sys.stdout):
+        super().__init__(default_stream)
+        self.default_stream = default_stream
+        self.err_stream = sys.stderr
+
+    def emit(self, record):
+        if record.levelno >= logging.WARNING:
+            self.stream = self.err_stream
+        else:
+            self.stream = self.default_stream
+        super().emit(record)
+
+
+_LOGGING_CONFIGURATION = {
+    'loggers': {
+        _DEFAULT_LOGGER_NAME: {
+            'handlers': ['console'],
+            'level': _LOGGING_LEVEL,
+            'propagate': False
+        }
+    },
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'default': {
+            '()': LoggingFormatter,
+            'format': '%(asctime)s %(name)s %(levelname)s: %(message)s',
+            'datefmt': '%H:%M:%S'
+        },
+        'detailed': {
+            'format': '%(asctime)s %(name)s %(levelname)s: %(message)s'
+        }
+    },
+    'handlers': {
+        'console': {
+            'level': 'DEBUG',
+            '()': ConsoleHandler,
+            'formatter': 'default',
+        }
+    }
+}
+
+logging.config.dictConfig(_LOGGING_CONFIGURATION)
+
+_default_logger = logging.getLogger(_DEFAULT_LOGGER_NAME)
+
+
+def _warning_handler(message, category, filename, line_number):
+    s = warnings.formatwarning(message, category, filename, line_number)
+    _default_logger.warning(s)
+
+
+warnings.showwarning = _warning_handler
+
+
+def get_logger(logger_name: str):
+    if logger_name.startswith(_DEFAULT_LOGGER_NAME):
+        return _default_logger.getChild(logger_name)
+    return logging.getLogger(logger_name)
+
+
+def error(msg, *args, **kwargs):
+    _default_logger.error(msg, *args, **kwargs)
+
+
+def warning(msg, *args, raise_warning=True, **kwargs):
+    if raise_warning:
+        warnings.warn(msg)
+    else:
+        _default_logger.warning(msg, *args, **kwargs)
+
+
+def info(msg, *args, **kwargs):
+    _default_logger.info(msg, *args, **kwargs)
+
+
+def debug(msg, *args, **kwargs):
+    _default_logger.debug(msg, *args, **kwargs)
+
+
+def print_info(msg, *args, **kwargs):
+    _default_logger.log(PRINT_INFO, msg, *args, **kwargs)
+
+
+def add_file_handler(file_name):
+    file_info_handler_config = {
+        'level': 'PRINT_INFO',
+        'class': 'logging.handlers.WatchedFileHandler',
+        'formatter': 'default',
+        'filename': file_name
+    }
+    _LOGGING_CONFIGURATION['handlers']['file_info'] = file_info_handler_config
+    _LOGGING_CONFIGURATION['loggers'][_DEFAULT_LOGGER_NAME]['handlers'].append('file_info')
+    logging.config.dictConfig(_LOGGING_CONFIGURATION)
diff --git a/tools/accuracy_checker/accuracy_checker/main.py b/tools/accuracy_checker/accuracy_checker/main.py
new file mode 100644
index 000000000..61fe524a3
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/main.py
@@ -0,0 +1,216 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from pathlib import Path
+from argparse import ArgumentParser
+from functools import partial
+
+from .config import ConfigReader
+from .logging import print_info, add_file_handler
+from .model_evaluator import ModelEvaluator
+from .progress_reporters import ProgressReporter
+from .utils import get_path
+
+
+def build_arguments_parser():
+    parser = ArgumentParser(description='NN Validation on Caffe and IE', allow_abbrev=False)
+    parser.add_argument(
+        '-d', '--definitions',
+        help='path to the yml file with definitions',
+        type=get_path,
+        required=False
+    )
+    parser.add_argument(
+        '-c', '--config',
+        help='path to the yml file with local configuration',
+        type=get_path,
+        required=True
+    )
+    parser.add_argument(
+        '-m', '--models',
+        help='prefix path to the models and weights',
+        type=partial(get_path, is_directory=True),
+        default=Path.cwd(),
+        required=False
+    )
+    parser.add_argument(
+        '-s', '--source',
+        help='prefix path to the data source',
+        type=partial(get_path, is_directory=True),
+        default=Path.cwd(),
+        required=False
+    )
+    parser.add_argument(
+        '-a', '--annotations',
+        help='prefix path to the converted annotations and datasets meta data',
+        type=partial(get_path, is_directory=True),
+        default=Path.cwd(),
+        required=False
+    )
+    parser.add_argument(
+        '-e', '--extensions',
+        help='prefix path to extensions folder',
+        type=partial(get_path, is_directory=True),
+        default=Path.cwd(),
+        required=False
+    )
+    parser.add_argument(
+        '--cpu_extensions_mode',
+        help='specified preferable set of processor instruction for automatic searching cpu extension lib',
+        required=False,
+        choices=['avx2', 'sse4']
+    )
+    parser.add_argument(
+        '-b', '--bitstreams',
+        help='prefix path to bitstreams folder',
+        type=partial(get_path, is_directory=True),
+        default=Path.cwd(),
+        required=False
+    )
+    parser.add_argument(
+        '--stored_predictions',
+        help='path to file with saved predictions. Used for development',
+        # since at the first time file does not exist and then created we can not always check existence
+        required=False
+    )
+    parser.add_argument(
+        '-C', '--converted_models',
+        help='directory to store Model Optimizer converted models. Used for DLSDK launcher only',
+        type=partial(get_path, is_directory=True),
+        default=Path.cwd(),
+        required=False
+    )
+    parser.add_argument(
+        '-M', '--model_optimizer',
+        help='path to model optimizer caffe directory',
+        type=partial(get_path, is_directory=True),
+        # there is no default value because if user did not specify it we use specific locations
+        # defined in model_conversion.py
+        required=False
+    )
+    parser.add_argument(
+        '--tf_custom_op_config_dir',
+        help='path to directory with tensorflow custom operation configuration files for model optimizer',
+        type=partial(get_path, is_directory=True),
+        # there is no default value because if user did not specify it we use specific location
+        # defined in model_conversion.py
+        required=False
+    )
+    parser.add_argument(
+        '--tf_obj_detection_api_pipeline_config_path',
+        help='path to directory with tensorflow object detection api pipeline configuration files for model optimizer',
+        type=partial(get_path, is_directory=True),
+        # there is no default value because if user did not specify it we use specific location
+        # defined in model_conversion.py
+        required=False
+    )
+    parser.add_argument(
+        '--progress',
+        help='progress reporter',
+        required=False,
+        default='bar'
+    )
+    parser.add_argument(
+        '-tf', '--target_framework',
+        help='framework for infer',
+        required=False
+    )
+    parser.add_argument(
+        '-td', '--target_devices',
+        help='Space separated list of devices for infer',
+        required=False,
+        nargs='+'
+    )
+
+    parser.add_argument(
+        '-tt', '--target_tags',
+        help='Space separated list of launcher tags for infer',
+        required=False,
+        nargs='+'
+    )
+
+    parser.add_argument(
+        '-l', '--log_file',
+        help='file for additional logging results',
+        required=False
+    )
+
+    parser.add_argument(
+        '--ignore_result_formatting',
+        help='allow to get raw metrics results without data formatting',
+        required=False,
+        default=False
+    )
+
+    parser.add_argument(
+        '-am', '--affinity_map',
+        help='prefix path to the affinity maps',
+        type=partial(get_path, is_directory=True),
+        default=Path.cwd(),
+        required=False
+    )
+
+    parser.add_argument(
+        '--aocl',
+        help='aocl executable path for FPGA bitstream programming',
+        type=get_path,
+        required=False
+    )
+
+    return parser
+
+
+def main():
+    args = build_arguments_parser().parse_args()
+    progress_reporter = ProgressReporter.provide((
+        args.progress if ':' not in args.progress
+        else args.progress.split(':')[0]
+    ))
+    if args.log_file:
+        add_file_handler(args.log_file)
+
+    config = ConfigReader.merge(args)
+
+    for model in config['models']:
+        for launcher_config in model['launchers']:
+            for dataset_config in model['datasets']:
+                print_processing_info(
+                    model['name'],
+                    launcher_config['framework'],
+                    launcher_config['device'],
+                    launcher_config.get('tags'),
+                    dataset_config['name']
+                )
+                model_evaluator = ModelEvaluator.from_configs(launcher_config, dataset_config)
+                progress_reporter.reset(len(model_evaluator.dataset))
+                model_evaluator.process_dataset(args.stored_predictions, progress_reporter=progress_reporter)
+                model_evaluator.compute_metrics(ignore_results_formatting=args.ignore_result_formatting)
+
+                model_evaluator.release()
+
+
+def print_processing_info(model, launcher, device, tags, dataset):
+    print_info('Processing info:')
+    print_info('model: {}'.format(model))
+    print_info('launcher: {}'.format(launcher))
+    if tags:
+        print_info('launcher tags: {}'.format(' '.join(tags)))
+    print_info('device: {}'.format(device))
+    print_info('dataset: {}'.format(dataset))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/README.md b/tools/accuracy_checker/accuracy_checker/metrics/README.md
new file mode 100644
index 000000000..c1381b253
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/README.md
@@ -0,0 +1,127 @@
+# Metrics
+
+For correct work metrics require specific representation format. 
+(e. g. map expects detection annotation and detection prediction for evaluation). 
+
+In case when you use complicated representation located in representation container, you need to add options `annotation_source` and `prediction_source` in configuration file to
+select specific representation, another way metric calculation possible only if container has only one suitable representation and will be resolved automatically.
+`annotation_source` and `prediction_source` should contain only one annotation identifier and output layer name respectively.
+You may optionally provide `reference` field for metric, if you want calculated metric tested against specific value (i.e. reported in canonical paper) and acceptable `threshold` for metric deviation from reference value.
+
+Every metric has parameters available for configuration. 
+
+Accuracy Checker supports following set of metrics:
+
+* `accuracy` - classification accuracy metric, defined as the number of correct predictions divided by the total number of predictions.
+Supported representation: `ClassificationAnnotation`, `ClassificationPrediction`
+  * `top_k` - the number of classes with the highest probability, which will be used to decide if prediction is correct.
+* `accuracy_per_class` - classification accuracy metric which represents results for each class. Supported representation: `ClassificationAnnotation`, `ClassificationPrediction`.
+  * `top_k` - the number of classes with the highest probability, which will be used to decide if prediction is correct.
+  * `label_map` - the field in annotation metadata, which contains dataset label map.
+* `character_recognition_accuracy` - accuracy metric for character recognition task. Supported representation: `CharacterRecognitionAnnotation`, `CharacterRecognitionPrediction`.
+* `map` - mean average precision. Supported representations: `DetectionAnnotation`, `DetectionPrediction`.
+  * `overlap_threshold` - minimal value for intersection over union that allows to make decision that prediction bounding box is true positive.
+  * `overlap_method` - method for calculation bbox overlap. You can choose between intersection over union (`iou`), defined as area of intersection divided by union of annotation and prediction boxes areas, and intersection over area (`ioa`), defined as area of intersection divided by ara of prediction box.
+  * `include_boundaries` - allows include boundaries in overlap calculation process. If it is True then width and height of box is calculated by max - min + 1.
+  * `ignore_difficult` - allows to ignore difficult annotation boxes in metric calculation. In this case, difficult boxes are filtered annotations from postprocessing stage.
+  * `distinct_conf` - select only values for distinct confidences.
+  * `allow_multiple_matches_per_ignored` - allows multiple matches per ignored.
+  * `label_map` - the field in annotation metadata, which contains dataset label map.
+  * `integral` - integral type for average precision calculation. Pascal VOC `11point` and `max` approaches are available.
+* `miss_rate` - miss rate metric of detection models.  Supported representations: `DetectionAnnotation`, `DetectionPrediction`.
+  * `overlap_threshold` - minimal value for intersection over union that allows to make decision that prediction bounding box is true positive.
+  * `overlap_method` - method for calculation bbox overlap. You can choose between intersection over union (`iou`), defined as area of intersection divided by union of annotation and prediction boxes areas, and intersection over area (`ioa`), defined as area of intersection divided by ara of prediction box.
+  * `include_boundaries` - allows include boundaries in overlap calculation process. If it is True then width and height of box is calculated by max - min + 1.
+  * `ignore_difficult` - allows to ignore difficult annotation boxes in metric calculation. In this case, difficult boxes are filtered annotations from postprocessing stage.
+  * `distinct_conf` - select only values for distinct confidences.
+  * `allow_multiple_matches_per_ignored` - allows multiple matches per ignored.
+  * `label_map` - the field in annotation metadata, which contains dataset label map.
+  * `fppi_level` - false positive per image level.
+* `recall` - recall metric of detection models. Supported representations: `DetectionAnnotation`, `DetectionPrediction`.
+  * `overlap_threshold` - minimal value for intersection over union that allows to make decision that prediction bounding box is true positive.
+  * `overlap_method` - method for calculation bbox overlap. You can choose between intersection over union (`iou`), defined as area of intersection divided by union of annotation and prediction boxes areas, and intersection over area (`ioa`), defined as area of intersection divided by ara of prediction box.
+  * `include_boundaries` - allows include boundaries in overlap calculation process. If it is True then width and height of box is calculated by max - min + 1.
+  * `ignore_difficult` - allows to ignore difficult annotation boxes in metric calculation. In this case, difficult boxes are filtered annotations from postprocessing stage.
+  * `distinct_conf` - select only values for distinct confidences.
+  * `allow_multiple_matches_per_ignored` - allows multiple matches per ignored.
+  * `label_map` - the field in annotation metadata, which contains dataset label map.
+* `detection_accuracy` - accuracy for detection models. Supported representations: `DetectionAnnotation`, `DetectionPrediction`.
+  * `overlap_threshold` - minimal value for intersection over union that allows to make decision that prediction bounding box is true positive.
+  * `overlap_method` - method for calculation bbox overlap. You can choose between intersection over union (`iou`), defined as area of intersection divided by union of annotation and prediction boxes areas, and intersection over area (`ioa`), defined as area of intersection divided by ara of prediction box.
+  * `include_boundaries` - allows include boundaries in overlap calculation process. If it is True then width and height of box is calculated by max - min + 1.
+  * `label_map` - the field in annotation metadata, which contains dataset label map.
+  * `use_normalization` - allows to normalize confusion_matrix for metric calculation.
+* `segmentation_accuracy` - pixel accuracy for semantic segmentation models. Supported representations: `SegmentationAnnotation`, `SegmentationPrediction`.
+  * `use_argmax` - allows to use argmax for prediction mask.
+* `mean_iou` - mean intersection over union for semantic segmentation models. Supported representations: `SegmentationAnnotation`, `SegmentationPrediction`.
+  * `use_argmax` - allows to use argmax for prediction mask.
+* `mean_accuracy` - mean accuracy for semantic segmentation models. Supported representations: `SegmentationAnnotation`, `SegmentationPrediction`.
+  * `use_argmax` - allows to use argmax for prediction mask.
+* `frequency_weighted_accuracy` - frequency weighted accuracy for semantic segmentation models. Supported representations: `SegmentationAnnotation`, `SegmentationPrediction`.
+  * `use_argmax` - allows to use argmax for prediction mask.
+More detailed information about calculation segmentation metrics you can find [here][segmentation_article].
+* `cmc` - Cumulative Matching Characteristics (CMC) score. Supported representations: `ReIdentificationAnnotation`, `ReIdentificationPrediction`.
+  * `top_k` -  number of k highest ranked samples to consider when matching.
+  * `separate_camera_set` - should identities from the same camera view be filtered out.
+  * `single_gallery_shot` -  each identity has only one instance in the gallery.
+  * `number_single_shot_repeats` - number of repeats for single_gallery_shot setting (required for CUHK).
+  * `first_match_break` - break on first matched gallery sample.
+* `reid_map` - Mean Average Precision score for object reidentification. Supported representations: `ReIdentificationAnnotation`, `ReIdentificationPrediction`.
+  * `uninterpolated_auc` - should area under precision recall curve be computed using trapezoidal rule or directly.
+*  `pairwise_accuracy` - pairwise accuracy for object reidentification. Supported representations: `ReIdentificationClassificationAnnotation`, `ReIdentificationPrediction`.
+  * `min_score` - min score for determining that objects are different. You can provide value or use `train_median` value which will be calculated if annotations has training subset.
+* `pairwise_accuracy_subsets` - object reidentification pairwise accuracy with division dataset on test and train subsets for calculation mean score. Supported representations: `ReIdentificationClassificationAnnotation`, `ReIdentificationPrediction`.
+  * `subset_number` - number of subsets for separating. 
+* `mae` - [Mean Absolute Error][mae]. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
+* `mae_on_intervals` - Mean Absolute Error estimated magnitude for specific value range. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
+  * `intervals` - comma-separated list of interval boundaries.
+  * `ignore_values_not_in_interval` - allows create additional intervals for values less than minimal value in interval and greater than maximal.
+  * `start` , `step`, `end` - way to generate range of intervals from `start` to `end` with length `step`.
+* `mse` - [Mean Squared Error][mse]. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
+* `mse_on_intervals` - Mean Squared Error estimated magnitude for specific value range. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
+  * `intervals` - comma-separated list of interval boundaries.
+  * `ignore_values_not_in_interval` - allows create additional intervals for values less than minimal value in interval and greater than maximal.
+  * `start`, `step`, `end` - generate range of intervals from `start` to `end` with length `step`.
+* `rmse` - [Root Mean Squared Error][rmse]. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
+* `rmse_on_intervals` - Root Mean Squared Error estimated magnitude for specific value range. Supported representations: `RegressionAnnotation`, `RegressionPrediction`.
+  * `intervals` - comma-separated list of interval boundaries.
+  * `ignore_values_not_in_interval` - allows create additional intervals for values less than minimal value in interval and greater than maximal.
+  * `start`, `step`, `end` - generate range of intervals from `start` to `end` with length `step`.
+* `per_point_normed_error` - Normed Error for measurement the quality of landmarks' positions. Estimated results for each point independently. Supported representations: `FacialLandmarksAnnotation`, `FacialLandmarksPrediction`.
+* `normed_error` - Normed Error for measurement the quality of landmarks' positions. Supported representations: `FacialLandmarksAnnotation`, `FacialLandmarksPrediction`.
+  * `calculate_std` - allows calculation of standard deviation (default value: `False`)
+  * `percentile` - calculate error rate for given percentile.
+* `per_point_regression` - Root Mean Squared Error for 2D points estimated results for each point independently. Supported representations: `PointRegressionAnnotation`, `PointRegressionPrediction`.
+  * `scaling_distance` - comma-separated list of 2 point indexes, distance between which will be used for scaling regression distances.
+* `average point error` - Root Mean Squared Error for 2D points estimated average results for all points. Supported representations: `PointRegressionAnnotation`, `PointRegressionPrediction`.
+  * `scaling_distance` - comma-separated list of 2 point indexes, distance between which will be used for scaling regression distances.
+* `multi_accuracy` - accuracy for multilabel recognition task. Supported representations: `MultiLabelRecognitionAnnotation`, `MultiLabelRecognitionPrediction`.
+  * `label_map` - the field in annotation metadata, which contains dataset label map.
+  * `calculate_average` - allows calculation of average accuracy (default value: `True`).
+* `multi_precision` - precision metric for multilabel recognition. Supported representations: `MultiLabelRecognitionAnnotation`, `MultiLabelRecognitionPrediction`.
+  * `label_map` - the field in annotation metadata, which contains dataset label map.
+  * `calculate_average` - allows calculation of average precision (default value: `True`).
+* `multi_recall` - recall metric for multilabel recognition. Supported representations: `MultiLabelRecognitionAnnotation`, `MultiLabelRecognitionPrediction`.
+  * `label_map` - the field in annotation metadata, which contains dataset label map.
+  * `calculate_average` - allows calculation of average recall (default value: `True`).
+* `f1_score` - [F score][f_score] metric for multilabel recognition. Supported representations: `MultiLabelRecognitionAnnotation`, `MultiLabelRecognitionPrediction`.
+  * `label_map` - the field in annotation metadata, which contains dataset label map.
+  * `calculate_average` - allows calculation of average f-score (default value: `True`).
+* `text_detection` - Harmonic mean of precision and recall for text detection task. Supported representations: `TextDetectionAnnotation`, `TextDetectionPrediction`.
+  * `iou_constrain` - minimal value for intersection over union that allows to make decision that prediction polygon is true positive.
+  * `ignore_difficult` - allows to ignore difficult ground truth text polygons in metric calculation.
+  * `area_precision_constrain` - minimal value for intersection over union that allows to make decision that prediction polygon matched with ignored annotation.
+* `coco_precision` - MS COCO Average Precision metric for keypoints recognition and object detection tasks. Supported representations: `PoseEstimationAnnotation`, `PoseEstimationPrediction`, `DetectionAnnotation`, `DetectionPrediction`.
+  * `max_detections` - max number of predicted results per image. If you have more predictions,the results with minimal confidence will be ignored.
+  * `threshold` - intersection over union threshold. You can specify one value or comma separated range of values. This parameter supports precomputed values for standard COCO thresholds (`.5`, `.75`, `.5:.05:.95`).
+* `coco_recall` - MS COCO Average Recall metric for keypoints recognition and object detection tasks. Supported representations: `PoseEstimationAnnotation`, `PoseEstimationPrediction`, `DetectionAnnotation`, `DetectionPrediction`.
+  * `max_detections` - max number of predicted results per image. If you have more predictions,the results with minimal confidence will be ignored.
+  * `threshold` - intersection over union threshold. You can specify one value or comma separated range of values. This parameter supports precomputed values for standard COCO thresholds (`.5`, `.75`, `.5:.05:.95`).
+* `angle_error` - Mean angle error and Standard deviation of angle error for gaze estimation. Supported representations: `GazeVectorAnnotation`, `GazeVectorPrediction`.
+  
+[segmentation_article]: https://arxiv.org/pdf/1411.4038v2.pdf
+[mae]: https://en.wikipedia.org/wiki/Mean_absolute_error
+[mse]: https://en.wikipedia.org/wiki/Mean_squared_error
+[rmse]: https://en.wikipedia.org/wiki/Root-mean-square_deviation
+[f_score]: https://en.wikipedia.org/wiki/F1_score
+[psnr]: https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/__init__.py b/tools/accuracy_checker/accuracy_checker/metrics/__init__.py
new file mode 100644
index 000000000..8fec44953
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/__init__.py
@@ -0,0 +1,92 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .metric_executor import MetricsExecutor
+
+from .classification import ClassificationAccuracy, ClassificationAccuracyClasses
+from .detection import (DetectionMAP, MissRate, Recall, DetectionAccuracyMetric)
+from .reid import CMCScore, ReidMAP, PairwiseAccuracy, PairwiseAccuracySubsets
+from .semantic_segmentation import SegmentationAccuracy, SegmentationIOU, SegmentationMeanAccuracy, SegmentationFWAcc
+from .character_recognition import CharacterRecognitionAccuracy
+from .regression import (
+    MeanAbsoluteErrorOnInterval,
+    MeanSquaredErrorOnInterval,
+
+    MeanAbsoluteError,
+    MeanSquaredError,
+
+    RootMeanSquaredErrorOnInterval,
+    RootMeanSquaredError,
+
+    FacialLandmarksPerPointNormedError,
+    FacialLandmarksNormedError,
+
+    PeakSignalToNoiseRatio,
+
+    AngleError
+)
+from .multilabel_recognition import MultiLabelRecall, MultiLabelPrecision, MultiLabelAccuracy, F1Score
+from .text_detection import TextDetectionMetric
+from .coco_metrics import MSCOCOAveragePresicion
+from .hit_ratio import HitRatioMetric, NDSGMetric
+
+
+__all__ = [
+    'MetricsExecutor',
+
+    'ClassificationAccuracy',
+    'ClassificationAccuracyClasses',
+
+    'DetectionMAP',
+    'MissRate',
+    'Recall',
+    'DetectionAccuracyMetric',
+
+    'CMCScore',
+    'ReidMAP',
+    'PairwiseAccuracy',
+    'PairwiseAccuracySubsets',
+
+    'SegmentationAccuracy',
+    'SegmentationIOU',
+    'SegmentationMeanAccuracy',
+    'SegmentationFWAcc',
+
+    'CharacterRecognitionAccuracy',
+
+    'MeanAbsoluteError',
+    'MeanSquaredError',
+    'MeanAbsoluteErrorOnInterval',
+    'MeanSquaredErrorOnInterval',
+    'RootMeanSquaredError',
+    'RootMeanSquaredErrorOnInterval',
+    'FacialLandmarksPerPointNormedError',
+    'FacialLandmarksNormedError',
+    'PeakSignalToNoiseRatio',
+    'AngleError',
+
+    'MultiLabelAccuracy',
+    'MultiLabelRecall',
+    'MultiLabelPrecision',
+    'F1Score',
+
+    'TextDetectionMetric',
+
+    'MSCOCOAveragePresicion',
+
+    'HitRatioMetric',
+    'NDSGMetric'
+]
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/average_meter.py b/tools/accuracy_checker/accuracy_checker/metrics/average_meter.py
new file mode 100644
index 000000000..3c2e37a93
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/average_meter.py
@@ -0,0 +1,46 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+
+class AverageMeter:
+    def __init__(self, loss=None, counter=None):
+        self.loss = loss or (lambda x, y: int(x == y))
+        self.counter = counter or (lambda x: 1)
+        self.accumulator = None
+        self.total_count = None
+
+    def update(self, annotation_val, prediction_val):
+        loss = self.loss(annotation_val, prediction_val)
+        increment = self.counter(annotation_val)
+
+        if self.accumulator is None and self.total_count is None:
+            # wrap in array for using numpy.divide with where attribute
+            # and support cases when loss function returns list-like object
+            self.accumulator = np.array(loss, dtype=float)
+            self.total_count = np.array(increment, dtype=float)
+        else:
+            self.accumulator += loss
+            self.total_count += increment
+
+    def evaluate(self):
+        if self.total_count is None:
+            return 0.0
+
+        return np.divide(
+            self.accumulator, self.total_count, out=np.zeros_like(self.accumulator), where=self.total_count != 0
+        )
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/character_recognition.py b/tools/accuracy_checker/accuracy_checker/metrics/character_recognition.py
new file mode 100644
index 000000000..fbb11c8a4
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/character_recognition.py
@@ -0,0 +1,36 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..representation import CharacterRecognitionAnnotation, CharacterRecognitionPrediction
+from .metric import PerImageEvaluationMetric
+from .average_meter import AverageMeter
+
+
+class CharacterRecognitionAccuracy(PerImageEvaluationMetric):
+    __provider__ = 'character_recognition_accuracy'
+
+    annotation_types = (CharacterRecognitionAnnotation, )
+    prediction_types = (CharacterRecognitionPrediction, )
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.accuracy = AverageMeter(lambda annotation, prediction: int(annotation == prediction))
+
+    def update(self, annotation, prediction):
+        self.accuracy.update(annotation.label, prediction.label)
+
+    def evaluate(self, annotations, predictions):
+        return self.accuracy.evaluate()
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/classification.py b/tools/accuracy_checker/accuracy_checker/metrics/classification.py
new file mode 100644
index 000000000..7213c71b4
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/classification.py
@@ -0,0 +1,107 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..representation import ClassificationAnnotation, ClassificationPrediction
+from ..config import NumberField, StringField
+from .metric import BaseMetricConfig, PerImageEvaluationMetric
+from .average_meter import AverageMeter
+
+
+class ClassificationAccuracy(PerImageEvaluationMetric):
+    """
+    Class for evaluating accuracy metric of classification models.
+    """
+
+    __provider__ = 'accuracy'
+
+    annotation_types = (ClassificationAnnotation, )
+    prediction_types = (ClassificationPrediction, )
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        def loss(annotation_label, prediction_top_k_labels):
+            return int(annotation_label in prediction_top_k_labels)
+        self.accuracy = AverageMeter(loss)
+
+    def validate_config(self):
+        class _AccuracyValidator(BaseMetricConfig):
+            top_k = NumberField(floats=False, min_value=1, optional=True)
+
+        accuracy_validator = _AccuracyValidator(
+            'accuracy',
+            on_extra_argument=_AccuracyValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        accuracy_validator.validate(self.config)
+
+    def configure(self):
+        self.top_k = self.config.get('top_k', 1)
+
+    def update(self, annotation, prediction):
+        self.accuracy.update(annotation.label, prediction.top_k(self.top_k))
+
+    def evaluate(self, annotations, predictions):
+        return self.accuracy.evaluate()
+
+
+class ClassificationAccuracyClasses(PerImageEvaluationMetric):
+    """
+    Class for evaluating accuracy for each class of classification models.
+    """
+
+    __provider__ = 'accuracy_per_class'
+
+    annotation_types = (ClassificationAnnotation, )
+    prediction_types = (ClassificationPrediction, )
+
+    def validate_config(self):
+        class _AccuracyValidator(BaseMetricConfig):
+            top_k = NumberField(floats=False, min_value=1, optional=True)
+            label_map = StringField(optional=True)
+
+        accuracy_validator = _AccuracyValidator(
+            'accuracy',
+            on_extra_argument=_AccuracyValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        accuracy_validator.validate(self.config)
+
+    def configure(self):
+        self.top_k = self.config.get('top_k', 1)
+        label_map = self.config.get('label_map', 'label_map')
+        self.labels = self.dataset.metadata.get(label_map)
+        self.meta['names'] = list(self.labels.values())
+
+        def loss(annotation_label, prediction_top_k_labels):
+            result = np.zeros_like(list(self.labels.keys()))
+            if annotation_label in prediction_top_k_labels:
+                result[annotation_label] = 1
+
+            return result
+
+        def counter(annotation_label):
+            result = np.zeros_like(list(self.labels.keys()))
+            result[annotation_label] = 1
+            return result
+
+        self.accuracy = AverageMeter(loss, counter)
+
+    def update(self, annotation, prediction):
+        self.accuracy.update(annotation.label, prediction.top_k(self.top_k))
+
+    def evaluate(self, annotations, predictions):
+        return self.accuracy.evaluate()
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/coco_metrics.py b/tools/accuracy_checker/accuracy_checker/metrics/coco_metrics.py
new file mode 100644
index 000000000..8ed223775
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/coco_metrics.py
@@ -0,0 +1,322 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from functools import singledispatch
+from typing import Union
+import numpy as np
+from ..config import NumberField, BaseField
+from ..representation import (
+    DetectionPrediction,
+    DetectionAnnotation,
+    PoseEstimationPrediction,
+    PoseEstimationAnnotation
+)
+from ..utils import get_or_parse_value
+from .overlap import Overlap
+from .metric import BaseMetricConfig, PerImageEvaluationMetric
+
+COCO_THRESHOLDS = {
+    '.50': [0.5],
+    '.75': [0.75],
+    '.50:.05:.95': np.linspace(.5, 0.95, np.round((0.95 - .5) / .05).astype(int) + 1, endpoint=True)
+}
+
+
+class MSCOCOAveragePresicionMetricConfig(BaseMetricConfig):
+    max_detections = NumberField(optional=True)
+    threshold = BaseField(optional=True)
+
+
+class MSCOCOBaseMetric(PerImageEvaluationMetric):
+    annotation_types = (PoseEstimationAnnotation, DetectionAnnotation)
+    prediction_types = (PoseEstimationPrediction, DetectionPrediction)
+
+    def validate_config(self):
+        coco_config_validator = MSCOCOAveragePresicionMetricConfig(
+            'coco_metric', on_extra_argument=MSCOCOAveragePresicionMetricConfig.ERROR_ON_EXTRA_ARGUMENT
+        )
+        coco_config_validator.validate(self.config)
+
+    def configure(self):
+        self.max_detections = self.config.get('max_detections', 20)
+        self.thresholds = get_or_parse_value(self.config.get('threshold', '.50:.05:.95'), COCO_THRESHOLDS)
+        label_map = self.dataset.metadata.get('label_map', [])
+        self.labels = [
+            label for label in label_map
+            if label != self.dataset.metadata.get('background_label')
+        ]
+        self.meta['names'] = [label_map[label] for label in self.labels]
+        self.matching_results = [[] for _ in self.labels]
+
+    def update(self, annotation, prediction):
+        compute_iou, create_boxes = select_specific_parameters(annotation)
+
+        for label_id, label in enumerate(self.labels):
+            detections, scores, dt_difficult = prepare_predictions(prediction, label, self.max_detections)
+            ground_truth, gt_difficult, iscrowd, boxes, areas = prepare_annotations(annotation, label, create_boxes)
+            iou = compute_iou(ground_truth, detections, boxes, areas)
+            self.matching_results[label_id].append(
+                evaluate_image(
+                    ground_truth,
+                    gt_difficult,
+                    iscrowd,
+                    detections,
+                    dt_difficult,
+                    scores,
+                    iou,
+                    self.thresholds
+                    ))
+
+    def evaluate(self, annotations, predictions):
+        pass
+
+
+class MSCOCOAveragePresicion(MSCOCOBaseMetric):
+    __provider__ = 'coco_precision'
+
+    def evaluate(self, annotations, predictions):
+        precision = [
+            compute_precision_recall(self.thresholds, self.matching_results[i])[0]
+            for i, _ in enumerate(self.labels)
+        ]
+
+        return precision
+
+
+class MSCOCORecall(MSCOCOBaseMetric):
+    __provider__ = 'coco_recall'
+
+    def evaluate(self, annotations, predictions):
+        recalls = [
+            compute_precision_recall(self.thresholds, self.matching_results[i])[1]
+            for i, _ in enumerate(self.labels)
+        ]
+
+        return recalls
+@singledispatch
+def select_specific_parameters(annotation):
+    return compute_iou_boxes, False
+
+@select_specific_parameters.register(PoseEstimationAnnotation)
+def pose_estimation_params(annotation):
+    return compute_oks, True
+
+@singledispatch
+def prepare(entry, order):
+    return np.c_[entry.x_mins[order], entry.y_mins[order], entry.x_maxs[order], entry.y_maxs[order]]
+
+
+@prepare.register(Union[PoseEstimationPrediction, PoseEstimationAnnotation])
+def prepare_keypoints(entry, order):
+    if entry.size == 0:
+        return []
+
+    if np.size(entry.x_values[order]) == 0:
+        return []
+
+    return np.concatenate((entry.x_values[order], entry.y_values[order], entry.visibility[order]), axis=-1)
+
+
+def prepare_predictions(prediction, label, max_detections):
+    if prediction.size == 0:
+        return [], [], []
+    prediction_ids = prediction.labels == label
+    scores = prediction.scores[prediction_ids]
+    if np.size(scores) == 0:
+        return [], [], []
+    scores_ids = np.argsort(- scores, kind='mergesort')
+    difficult_box_mask = np.full(prediction.size, False)
+    difficult_box_mask[prediction.metadata.get('difficult_boxes', [])] = True
+    difficult_for_label = difficult_box_mask[prediction_ids]
+    if len(scores_ids) > max_detections:
+        scores_ids = scores_ids[:max_detections]
+    detections = prepare(prediction, prediction_ids)
+    detections = detections[scores_ids]
+
+    return detections, scores[scores_ids], difficult_for_label[scores_ids]
+
+
+def prepare_annotations(annotation, label, create_boxes=False):
+    annotation_ids = annotation.labels == label
+    difficult_box_mask = np.full(annotation.size, False)
+    difficult_box_indices = annotation.metadata.get("difficult_boxes", [])
+    iscrowd = np.array(annotation.metadata.get('iscrowd', [0]*annotation.size))
+    difficult_box_mask[difficult_box_indices] = True
+    difficult_box_mask[iscrowd > 0] = True
+    difficult_label = difficult_box_mask[annotation_ids]
+    not_difficult_box_indices = np.argwhere(~difficult_label).reshape(-1)
+    difficult_box_indices = np.argwhere(difficult_label).reshape(-1)
+    iscrowd_label = iscrowd[annotation_ids]
+    order = np.hstack((not_difficult_box_indices, difficult_box_indices)).astype(int)
+    boxes = None
+    areas = None
+    if create_boxes:
+        boxes = np.array(annotation.bboxes)
+        boxes = boxes[annotation_ids]
+        areas = np.array(annotation.areas)
+        areas = areas[annotation_ids] if np.size(areas) > 0 else np.array([])
+        boxes = boxes[order]
+        areas = areas[order]
+
+    return prepare(annotation, annotation_ids)[order], difficult_label[order], iscrowd_label[order], boxes, areas
+
+
+def compute_precision_recall(thresholds, matching_results):
+    num_thresholds = len(thresholds)
+    rectangle_thresholds = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
+    num_rec_thresholds = len(rectangle_thresholds)
+    precision = -np.ones((num_thresholds, num_rec_thresholds))  # -1 for the precision of absent categories
+    recall = -np.ones(num_thresholds)
+    dt_scores = np.concatenate([e['scores'] for e in matching_results])
+    inds = np.argsort(-dt_scores, kind='mergesort')
+    dtm = np.concatenate([e['dt_matches'] for e in matching_results], axis=1)[:, inds]
+    dt_ignored = np.concatenate([e['dt_ignore'] for e in matching_results], axis=1)[:, inds]
+    gt_ignored = np.concatenate([e['gt_ignore'] for e in matching_results])
+    npig = np.count_nonzero(gt_ignored == 0)
+    tps = np.logical_and(dtm, np.logical_not(dt_ignored))
+    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dt_ignored))
+    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
+    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
+    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+        tp = np.array(tp)
+        fp = np.array(fp)
+        num_detections = len(tp)
+        rc = tp / npig
+        pr = tp / (fp + tp + np.spacing(1))
+        q = np.zeros(num_rec_thresholds)
+
+        if num_detections:
+            recall[t] = rc[-1]
+        else:
+            recall[t] = 0
+
+        # numpy is slow without cython optimization for accessing elements
+        #  use python array gets significant speed improvement
+        pr = pr.tolist()
+        q = q.tolist()
+
+        for i in range(num_detections - 1, 0, -1):
+            if pr[i] > pr[i - 1]:
+                pr[i - 1] = pr[i]
+
+        inds = np.searchsorted(rc, rectangle_thresholds, side='left')
+        try:
+            for ri, pi in enumerate(inds):
+                q[ri] = pr[pi]
+        except IndexError:
+            pass
+        precision[t] = np.array(q)
+
+    mean_precision = 0 if np.size(precision[precision > -1]) == 0 else np.mean(precision[precision > -1])
+    mean_recall = 0 if np.size(recall[recall > -1]) == 0 else np.mean(recall[recall > -1])
+
+    return mean_precision, mean_recall
+
+
+def compute_iou_boxes(annotation, prediction, *args, **kwargs):
+    if np.size(annotation) == 0 or np.size(prediction) == 0:
+        return []
+    overlap = Overlap.provide('iou')
+    iou = np.zeros((prediction.size // 4, annotation.size // 4), dtype=np.float32)
+    for i, box_a in enumerate(annotation):
+        for j, box_b in enumerate(prediction):
+            iou[j, i] = overlap(box_a, box_b)
+
+    return iou
+
+
+def compute_oks(annotation_points, prediction_points, annotation_boxes, annotation_areas):
+    if np.size(prediction_points) == 0 or np.size(annotation_points) == 0:
+        return []
+    oks = np.zeros((len(prediction_points), len(annotation_points)))
+    sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89])/10.0
+    variance = (sigmas * 2)**2
+    # compute oks between each detection and ground truth object
+    for gt_idx, gt_points in enumerate(annotation_points):
+        # create bounds for ignore regions(double the gt bbox)
+        xgt = gt_points[:17]
+        ygt = gt_points[17:34]
+        vgt = gt_points[34:]
+        k1 = np.count_nonzero(vgt > 0)
+        x0_bbox, y0_bbox, x1_bbox, y1_bbox = annotation_boxes[gt_idx]
+        area_gt = annotation_areas[gt_idx]
+        w_bbox = x1_bbox - x0_bbox
+        h_bbox = y1_bbox - y0_bbox
+        x0 = x0_bbox - w_bbox
+        x1 = x0_bbox + w_bbox * 2
+        y0 = y0_bbox - h_bbox
+        y1 = y0_bbox + h_bbox * 2
+        for dt_idx, dt_points in enumerate(prediction_points):
+            xdt = dt_points[:17]
+            ydt = dt_points[17:34]
+            if k1 > 0:
+                # measure the per-keypoint distance if keypoints visible
+                x_diff = xdt - xgt
+                y_diff = ydt - ygt
+            else:
+                # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
+                zeros = np.zeros(len(sigmas))
+                x_diff = np.max((zeros, x0 - xdt), axis=0) + np.max((zeros, xdt - x1), axis=0)
+                y_diff = np.max((zeros, y0 - ydt), axis=0) + np.max((zeros, ydt - y1), axis=0)
+            evaluation = (x_diff ** 2 + y_diff ** 2) / variance / (area_gt + np.spacing(1)) / 2
+            if k1 > 0:
+                evaluation = evaluation[vgt > 0]
+            oks[dt_idx, gt_idx] = np.sum(np.exp(- evaluation)) / evaluation.shape[0]
+
+    return oks
+
+
+def evaluate_image(ground_truth, gt_difficult, iscrowd, detections, dt_difficult, scores, iou, thresholds):
+    thresholds_num = len(thresholds)
+    gt_num = len(ground_truth)
+    dt_num = len(detections)
+    gt_matched = np.zeros((thresholds_num, gt_num))
+    dt_matched = np.zeros((thresholds_num, dt_num))
+    gt_ignored = gt_difficult
+    dt_ignored = np.zeros((thresholds_num, dt_num))
+    if np.size(iou):
+        for tind, t in enumerate(thresholds):
+            for dtind, _ in enumerate(detections):
+                # information about best match so far (matched_id = -1 -> unmatched)
+                iou_current = min([t, 1-1e-10])
+                matched_id = -1
+                for gtind, _ in enumerate(ground_truth):
+                    # if this gt already matched, and not a crowd, continue
+                    if gt_matched[tind, gtind] > 0 and not iscrowd[gtind]:
+                        continue
+                    # if dt matched to reg gt, and on ignore gt, stop
+                    if matched_id > -1 and not gt_ignored[matched_id] and gt_ignored[gtind]:
+                        break
+                    # continue to next gt unless better match made
+                    if iou[dtind, gtind] < iou_current:
+                        continue
+                    # if match successful and best so far, store appropriately
+                    iou_current = iou[dtind, gtind]
+                    matched_id = gtind
+                # if match made store id of match for both dt and gt
+                if matched_id == -1:
+                    continue
+                dt_ignored[tind, dtind] = gt_ignored[matched_id]
+                dt_matched[tind, dtind] = 1
+                gt_matched[tind, matched_id] = dtind
+    # store results for given image
+    return {
+        'dt_matches': dt_matched,
+        'gt_matches': gt_matched,
+        'gt_ignore': gt_ignored,
+        'dt_ignore': np.logical_or(dt_ignored, dt_difficult),
+        'scores': scores
+    }
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/detection.py b/tools/accuracy_checker/accuracy_checker/metrics/detection.py
new file mode 100644
index 000000000..97ce96187
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/detection.py
@@ -0,0 +1,487 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import bisect
+import enum
+import warnings
+from typing import List
+
+import numpy as np
+
+from ..utils import finalize_metric_result
+from .overlap import Overlap, IOA
+from ..config import BoolField, NumberField, StringField
+from ..representation import DetectionAnnotation, DetectionPrediction
+from .metric import BaseMetricConfig, FullDatasetEvaluationMetric
+
+
+class APIntegralType(enum.Enum):
+    voc_11_point = '11point'
+    voc_max = 'max'
+
+
+class BaseDetectionMetricConfig(BaseMetricConfig):
+    overlap_threshold = NumberField(min_value=0, max_value=1, optional=True)
+    ignore_difficult = BoolField(optional=True)
+    include_boundaries = BoolField(optional=True)
+    distinct_conf = BoolField(optional=True)
+    allow_multiple_matches_per_ignored = BoolField(optional=True)
+    overlap_method = StringField(optional=True, choices=Overlap.providers)
+    use_filtered_tp = BoolField(optional=True)
+
+
+class BaseDetectionMetricMixin:
+    def configure(self):
+        self.overlap_threshold = self.config.get('overlap_threshold', 0.5)
+        self.ignore_difficult = self.config.get('ignore_difficult', True)
+        self.include_boundaries = self.config.get('include_boundaries', True)
+        self.distinct_conf = self.config.get('distinct_conf', False)
+        self.allow_multiple_matches_per_ignored = self.config.get('allow_multiple_matches_per_ignored', False)
+        self.overlap_method = Overlap.provide(self.config.get('overlap', 'iou'), self.include_boundaries)
+        self.use_filtered_tp = self.config.get('use_filtered_tp', False)
+
+        label_map = self.config.get('label_map', 'label_map')
+        labels = self.dataset.metadata.get(label_map, {})
+        self.labels = labels.keys()
+        valid_labels = list(filter(lambda x: x != self.dataset.metadata.get('background_label'), self.labels))
+        self.meta['names'] = [labels[name] for name in valid_labels]
+
+    def per_class_detection_statistics(self, annotations, predictions, labels):
+        labels_stat = {}
+        for label in labels:
+            tp, fp, conf, n = bbox_match(
+                annotations, predictions, int(label),
+                self.overlap_method, self.overlap_threshold,
+                self.ignore_difficult, self.allow_multiple_matches_per_ignored, self.include_boundaries,
+                self.use_filtered_tp
+            )
+
+            if not tp.size:
+                labels_stat[label] = {
+                    'precision': np.array([]),
+                    'recall': np.array([]),
+                    'thresholds': conf,
+                    'fppi': np.array([])
+                }
+                continue
+
+            # select only values for distinct confidences
+            if self.distinct_conf:
+                distinct_value_indices = np.where(np.diff(conf))[0]
+                threshold_indexes = np.r_[distinct_value_indices, tp.size - 1]
+            else:
+                threshold_indexes = np.arange(conf.size)
+
+            tp, fp = np.cumsum(tp)[threshold_indexes], np.cumsum(fp)[threshold_indexes]
+
+            labels_stat[label] = {
+                'precision': tp / np.maximum(tp + fp, np.finfo(np.float64).eps),
+                'recall': tp / np.maximum(n, np.finfo(np.float64).eps),
+                'thresholds': conf[threshold_indexes],
+                'fppi': fp / len(annotations)
+            }
+
+        return labels_stat
+
+
+class DetectionMAP(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
+    """
+    Class for evaluating mAP metric of detection models.
+    """
+
+    __provider__ = 'map'
+
+    annotation_types = (DetectionAnnotation, )
+    prediction_types = (DetectionPrediction, )
+
+    def validate_config(self):
+        class _MAPConfigValidator(BaseDetectionMetricConfig):
+            integral = StringField(choices=[e.value for e in APIntegralType], optional=True)
+
+        map_config_validator = _MAPConfigValidator(
+            self.__provider__, on_extra_argument=_MAPConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        map_config_validator.validate(self.config)
+
+    def configure(self):
+        super().configure()
+        self.integral = APIntegralType(self.config.get('integral', APIntegralType.voc_max))
+
+    def evaluate(self, annotations, predictions):
+        valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
+        labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
+
+        average_precisions = []
+        for label in labels_stat:
+            label_precision = labels_stat[label]['precision']
+            label_recall = labels_stat[label]['recall']
+            if label_recall.size:
+                ap = average_precision(label_precision, label_recall, self.integral)
+                average_precisions.append(ap)
+            else:
+                average_precisions.append(np.nan)
+
+        average_precisions, self.meta['names'] = finalize_metric_result(average_precisions, self.meta['names'])
+        if not average_precisions:
+            warnings.warn("No detections to compute mAP")
+            average_precisions.append(0)
+
+        return average_precisions
+
+
+class MissRate(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
+    """
+    Class for evaluating Miss Rate metric of detection models.
+    """
+
+    __provider__ = 'miss_rate'
+
+    annotation_types = (DetectionAnnotation, )
+    prediction_types = (DetectionPrediction, )
+
+    def validate_config(self):
+        class _MRConfigValidator(BaseDetectionMetricConfig):
+            fppi_level = NumberField(min_value=0, max_value=1)
+
+        nms_config_validator = _MRConfigValidator(
+            self.__provider__, on_extra_argument=_MRConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        nms_config_validator.validate(self.config)
+
+    def configure(self):
+        super().configure()
+        self.fppi_level = self.config.get('fppi_level')
+
+    def evaluate(self, annotations, predictions):
+        valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
+        labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
+
+        miss_rates = []
+        for label in labels_stat:
+            label_miss_rate = 1.0 - labels_stat[label]['recall']
+            label_fppi = labels_stat[label]['fppi']
+
+            position = bisect.bisect_left(label_fppi, self.fppi_level)
+            m0 = max(0, position - 1)
+            m1 = position if position < len(label_miss_rate) else m0
+            miss_rates.append(0.5 * (label_miss_rate[m0] + label_miss_rate[m1]))
+
+        return miss_rates
+
+
+class Recall(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
+    """
+    Class for evaluating recall metric of detection models.
+    """
+
+    __provider__ = 'recall'
+
+    annotation_types = (DetectionAnnotation, )
+    prediction_types = (DetectionPrediction, )
+
+    def validate_config(self):
+        recall_config_validator = BaseDetectionMetricConfig(
+            self.__provider__, on_extra_argument=BaseDetectionMetricConfig.ERROR_ON_EXTRA_ARGUMENT
+        )
+        recall_config_validator.validate(self.config)
+
+    def evaluate(self, annotations, predictions):
+        valid_labels = get_valid_labels(self.labels, self.dataset.metadata.get('background_label'))
+        labels_stat = self.per_class_detection_statistics(annotations, predictions, valid_labels)
+
+        recalls = []
+        for label in labels_stat:
+            label_recall = labels_stat[label]['recall']
+            if label_recall.size:
+                max_recall = label_recall[-1]
+                recalls.append(max_recall)
+            else:
+                recalls.append(np.nan)
+
+        recalls, self.meta['names'] = finalize_metric_result(recalls, self.meta['names'])
+        if not recalls:
+            warnings.warn("No detections to compute mAP")
+            recalls.append(0)
+
+        return recalls
+
+
+class DetectionAccuracyMetric(BaseDetectionMetricMixin, FullDatasetEvaluationMetric):
+    __provider__ = 'detection_accuracy'
+
+    annotation_types = (DetectionAnnotation, )
+    prediction_types = (DetectionPrediction, )
+
+    def validate_config(self):
+        class _DAConfigValidator(BaseDetectionMetricConfig):
+            use_normalization = BoolField(optional=True)
+
+        da_config_validator = _DAConfigValidator(
+            self.__provider__, on_extra_argument=_DAConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        da_config_validator.validate(self.config)
+
+    def configure(self):
+        super().configure()
+        self.use_normalization = self.config.get('use_normalization', False)
+
+    def evaluate(self, annotations, predictions):
+        all_matches, _, _ = match_detections_class_agnostic(
+            predictions, annotations, self.overlap_threshold, self.overlap_method
+        )
+        cm = confusion_matrix(all_matches, predictions, annotations, len(self.labels))
+        if self.use_normalization:
+            return np.mean(normalize_confusion_matrix(cm).diagonal())
+
+        return float(np.sum(cm.diagonal())) / float(np.maximum(1, np.sum(cm)))
+
+
+def confusion_matrix(all_matched_ids, predicted_data, gt_data, num_classes):
+    out_cm = np.zeros([num_classes, num_classes], dtype=np.int32)
+    for gt, prediction in zip(gt_data, predicted_data):
+        for match_pair in all_matched_ids[gt.identifier]:
+            gt_label = int(gt.labels[match_pair[0]])
+            pred_label = int(prediction.labels[match_pair[1]])
+            out_cm[gt_label, pred_label] += 1
+
+    return out_cm
+
+
+def normalize_confusion_matrix(cm):
+    row_sums = np.maximum(1, np.sum(cm, axis=1, keepdims=True)).astype(np.float32)
+    return cm.astype(np.float32) / row_sums
+
+
+def match_detections_class_agnostic(predicted_data, gt_data, min_iou, overlap_method):
+    all_matches = {}
+    total_gt_bbox_num = 0
+    matched_gt_bbox_num = 0
+
+    for gt, prediction in zip(gt_data, predicted_data):
+        gt_bboxes = np.stack((gt.x_mins, gt.y_mins, gt.x_maxs, gt.y_maxs), axis=-1)
+        predicted_bboxes = np.stack(
+            (prediction.x_mins, prediction.y_mins, prediction.x_maxs, prediction.y_maxs), axis=-1
+        )
+
+        total_gt_bbox_num += len(gt_bboxes)
+
+        similarity_matrix = calculate_similarity_matrix(gt_bboxes, predicted_bboxes, overlap_method)
+
+        matches = []
+        for _ in gt_bboxes:
+            best_match_pos = np.unravel_index(similarity_matrix.argmax(), similarity_matrix.shape)
+            best_match_value = similarity_matrix[best_match_pos]
+
+            if best_match_value <= min_iou:
+                break
+
+            gt_id = best_match_pos[0]
+            predicted_id = best_match_pos[1]
+
+            similarity_matrix[gt_id, :] = 0.0
+            similarity_matrix[:, predicted_id] = 0.0
+
+            matches.append((gt_id, predicted_id))
+            matched_gt_bbox_num += 1
+
+        all_matches[gt.identifier] = matches
+
+    return all_matches, total_gt_bbox_num, matched_gt_bbox_num
+
+
+def calculate_similarity_matrix(set_a, set_b, overlap):
+    similarity = np.zeros([len(set_a), len(set_b)], dtype=np.float32)
+    for i, box_a in enumerate(set_a):
+        for j, box_b in enumerate(set_b):
+            similarity[i, j] = overlap(box_a, box_b)
+
+    return similarity
+
+
+def average_precision(precision, recall, integral):
+    if integral == APIntegralType.voc_11_point:
+        result = 0.
+        for point in np.arange(0., 1.1, 0.1):
+            accumulator = 0 if np.sum(recall >= point) == 0 else np.max(precision[recall >= point])
+            result = result + accumulator / 11.
+
+        return result
+
+    if integral != APIntegralType.voc_max:
+        raise NotImplementedError("Integral type not implemented")
+
+    # first append sentinel values at the end
+    recall = np.concatenate(([0.], recall, [1.]))
+    precision = np.concatenate(([0.], precision, [0.]))
+
+    # compute the precision envelope
+    for i in range(precision.size - 1, 0, -1):
+        precision[i - 1] = np.maximum(precision[i - 1], precision[i])
+
+    # to calculate area under PR curve, look for points
+    # where X axis (recall) changes value
+    change_point = np.where(recall[1:] != recall[:-1])[0]
+    # and sum (\Delta recall) * recall
+    return np.sum((recall[change_point + 1] - recall[change_point]) * precision[change_point + 1])
+
+
+def bbox_match(annotation: List[DetectionAnnotation], prediction: List[DetectionPrediction], label, overlap_evaluator,
+               overlap_thresh=0.5, ignore_difficult=True, allow_multiple_matches_per_ignored=True,
+               include_boundaries=True, use_filtered_tp=False):
+    """
+    Args:
+        annotation: ground truth bounding boxes.
+        prediction: predicted bounding boxes.
+        label: class for which bounding boxes are matched.
+        overlap_evaluator: evaluator of overlap.
+        overlap_thresh: bounding box IoU threshold.
+        ignore_difficult: ignores difficult bounding boxes (see Pascal VOC).
+        allow_multiple_matches_per_ignored: allows multiple matches per ignored.
+        include_boundaries: if is True then width and height of box is calculated by max - min + 1.
+        use_filtered_tp: if is True then ignored object are counted during evaluation.
+    Returns:
+        tp: tp[i] == 1 if detection with i-th highest score is true positive.
+        fp: fp[i] == 1 if detection with i-th highest score is false positive.
+        thresholds: array of confidence thresholds.
+        number_ground_truth = number of true positives.
+    """
+
+    used_boxes, number_ground_truth, difficult_boxes_annotation = _prepare_annotation_boxes(
+        annotation, ignore_difficult, label
+    )
+    prediction_boxes, prediction_images, difficult_boxes_prediction = _prepare_prediction_boxes(
+        label, prediction, ignore_difficult
+    )
+
+    tp = np.zeros_like(prediction_images)
+    fp = np.zeros_like(prediction_images)
+
+    for image in range(prediction_images.shape[0]):
+        gt_img = annotation[prediction_images[image]]
+        annotation_difficult = difficult_boxes_annotation[gt_img.identifier]
+        used = used_boxes[gt_img.identifier]
+
+        idx = gt_img.labels == label
+        if not np.array(idx).any():
+            fp[image] = 1
+            continue
+
+        prediction_box = prediction_boxes[image][1:]
+        annotation_boxes = gt_img.x_mins[idx], gt_img.y_mins[idx], gt_img.x_maxs[idx], gt_img.y_maxs[idx]
+
+        overlaps = overlap_evaluator(prediction_box, annotation_boxes)
+        if ignore_difficult and allow_multiple_matches_per_ignored:
+            ioa = IOA(include_boundaries)
+            ignored = np.where(annotation_difficult == 1)[0]
+            ignored_annotation_boxes = (
+                annotation_boxes[0][ignored], annotation_boxes[1][ignored],
+                annotation_boxes[2][ignored], annotation_boxes[3][ignored]
+            )
+            overlaps[ignored] = ioa.evaluate(prediction_box, ignored_annotation_boxes)
+
+        max_overlap = -np.inf
+
+        not_ignored_overlaps = overlaps[np.where(annotation_difficult == 0)[0]]
+        ignored_overlaps = overlaps[np.where(annotation_difficult == 1)[0]]
+        if not_ignored_overlaps.size:
+            max_overlap = np.max(not_ignored_overlaps)
+
+        if max_overlap < overlap_thresh and ignored_overlaps.size:
+            max_overlap = np.max(ignored_overlaps)
+        max_overlapped = np.where(overlaps == max_overlap)[0]
+
+        def set_false_positive(box_index):
+            is_box_difficult = difficult_boxes_prediction[box_index].any()
+            return int(not ignore_difficult or not is_box_difficult)
+
+        if max_overlap < overlap_thresh:
+            fp[image] = set_false_positive(image)
+            continue
+
+        if not annotation_difficult[max_overlapped].any():
+            if not used[max_overlapped].any():
+                if not ignore_difficult or use_filtered_tp or not difficult_boxes_prediction[image].any():
+                    tp[image] = 1
+                    used[max_overlapped] = True
+            else:
+                fp[image] = set_false_positive(image)
+        elif not allow_multiple_matches_per_ignored:
+            if used[max_overlapped].any():
+                fp[image] = set_false_positive(image)
+            used[max_overlapped] = True
+
+    return tp, fp, prediction_boxes[:, 0], number_ground_truth
+
+
+def _prepare_annotation_boxes(annotation, ignore_difficult, label):
+    used_boxes = {}
+    difficult_boxes = {}
+    num_ground_truth = 0
+
+    for ground_truth in annotation:
+        idx_for_label = ground_truth.labels == label
+        filtered_label = ground_truth.labels[idx_for_label]
+        used_ = np.zeros_like(filtered_label)
+        used_boxes[ground_truth.identifier] = used_
+        num_ground_truth += used_.shape[0]
+
+        difficult_box_mask = np.full_like(ground_truth.labels, False)
+        difficult_box_indices = ground_truth.metadata.get("difficult_boxes", [])
+        if ignore_difficult:
+            difficult_box_mask[difficult_box_indices] = True
+        difficult_box_mask = difficult_box_mask[idx_for_label]
+
+        difficult_boxes[ground_truth.identifier] = difficult_box_mask
+        if ignore_difficult:
+            num_ground_truth -= np.sum(difficult_box_mask)
+
+    return used_boxes, num_ground_truth, difficult_boxes
+
+
+def _prepare_prediction_boxes(label, predictions, ignore_difficult):
+    prediction_images = []
+    prediction_boxes = []
+    indexes = []
+    difficult_boxes = []
+    for i, prediction in enumerate(predictions):
+        idx = prediction.labels == label
+
+        prediction_images.append(np.full(prediction.labels[idx].shape, i))
+        prediction_boxes.append(np.c_[
+            prediction.scores[idx],
+            prediction.x_mins[idx], prediction.y_mins[idx], prediction.x_maxs[idx], prediction.y_maxs[idx]
+        ])
+
+        difficult_box_mask = np.full_like(prediction.labels, False)
+        difficult_box_indices = prediction.metadata.get("difficult_boxes", [])
+        if ignore_difficult:
+            difficult_box_mask[difficult_box_indices] = True
+
+        difficult_boxes.append(difficult_box_mask)
+        indexes.append(np.argwhere(idx))
+
+    prediction_boxes = np.concatenate(prediction_boxes)
+    difficult_boxes = np.concatenate(difficult_boxes)
+    sorted_order = np.argsort(-prediction_boxes[:, 0])
+    prediction_boxes = prediction_boxes[sorted_order]
+    prediction_images = np.concatenate(prediction_images)[sorted_order]
+    difficult_boxes = difficult_boxes[sorted_order]
+
+    return prediction_boxes, prediction_images, difficult_boxes
+
+
+def get_valid_labels(labels, background):
+    return list(filter(lambda label: label != background, labels))
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/hit_ratio.py b/tools/accuracy_checker/accuracy_checker/metrics/hit_ratio.py
new file mode 100644
index 000000000..6d5d7a135
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/hit_ratio.py
@@ -0,0 +1,100 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import heapq
+import math
+
+import numpy as np
+
+from ..representation import HitRatioAnnotation, HitRatioPrediction
+from .metric import FullDatasetEvaluationMetric, BaseMetricConfig
+from ..config import NumberField
+
+class BaseRecommenderMetric(FullDatasetEvaluationMetric):
+    annotation_types = (HitRatioAnnotation, )
+    prediction_types = (HitRatioPrediction, )
+
+    def __init__(self, discounter, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.discounter = discounter or (lambda item, rank: int(item in rank))
+
+
+    def validate_config(self):
+        class _RecommenderValidator(BaseMetricConfig):
+            top_k = NumberField(floats=False, min_value=1, optional=True)
+
+        recommender_validator = _RecommenderValidator(
+            'recommend',
+            on_extra_argument=_RecommenderValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        recommender_validator.validate(self.config)
+
+    def configure(self):
+        self.top_k = self.config.get('top_k', 10)
+        self.users_num = self.dataset.metadata.get('users_number')
+        self.pred_per_user = {i: [] for i in range(self.users_num)}
+        self.gt_items = {}
+
+    def update(self, annotation, prediction):
+        self.pred_per_user[prediction.user].append((prediction.item, prediction.scores))
+        if annotation.positive:
+            self.gt_items[annotation.user] = annotation.item
+
+    def evaluate(self, annotations, predictions):
+        iter_num = len(self.pred_per_user[0])
+
+        measure = []
+        for user in range(self.users_num):
+            map_item_score = {}
+            for j in range(iter_num):
+                item = self.pred_per_user[user][j][0]
+                score = self.pred_per_user[user][j][1]
+                map_item_score[item] = score
+            ranklist = heapq.nlargest(10, map_item_score, key=map_item_score.get)
+            measure.append(self.discounter(self.gt_items[user], ranklist))
+
+        return np.mean(measure)
+
+def hit_ratio_discounter(item, rank):
+    return int(item in rank)
+
+def ndcg_discunter(item, rank):
+    if item in rank:
+        return math.log(2) / math.log(rank.index(item) + 2)
+
+    return 0
+
+
+class HitRatioMetric(BaseRecommenderMetric):
+    """
+    Class for evaluating Hit Ratio metric
+    """
+
+    __provider__ = 'hit_ratio'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(hit_ratio_discounter, *args, **kwargs)
+
+
+class NDSGMetric(BaseRecommenderMetric):
+    """
+    Class for evaluating Normalized Discounted Cumulative Gain metric
+    """
+
+    __provider__ = 'ndcg'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(ndcg_discunter, *args, **kwargs)
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/metric.py b/tools/accuracy_checker/accuracy_checker/metrics/metric.py
new file mode 100644
index 000000000..0cb618979
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/metric.py
@@ -0,0 +1,159 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..representation import ContainerRepresentation
+from ..config import ConfigError
+from ..utils import is_single_metric_source, get_supported_representations
+from ..presenters import BasePresenter
+from ..config import ConfigValidator, NumberField, StringField
+from ..dependency import ClassProvider
+from ..utils import zipped_transform
+
+
+class BaseMetricConfig(ConfigValidator):
+    type = StringField()
+    name = StringField(optional=True)
+    reference = NumberField(optional=True)
+    threshold = NumberField(min_value=0, optional=True)
+    presenter = StringField(choices=BasePresenter.providers, optional=True)
+    label_map = StringField(optional=True)
+    prediction_source = StringField(optional=True)
+    annotation_source = StringField(optional=True)
+
+
+class Metric(ClassProvider):
+    """
+    Interface for evaluating metrics.
+    """
+
+    __provider_type__ = 'metric'
+
+    annotation_types = ()
+    prediction_types = ()
+
+    def __init__(self, config, dataset, name=None, state=None):
+        self.config = config
+        self.name = name
+        self.dataset = dataset
+        self.state = state
+        self._update_iter = 0
+        self.meta = {}
+
+        self.validate_config()
+        self.configure()
+        message_unsupported_multi_source = 'metric {} does not support several {} sources'
+        self.annotation_source = self.config.get('annotation_source')
+
+        if self.annotation_source and not is_single_metric_source(self.annotation_source):
+            raise ConfigError(message_unsupported_multi_source.format(self.name, 'annotation'))
+
+        self.prediction_source = self.config.get('prediction_source')
+        if self.prediction_source and not is_single_metric_source(self.prediction_source):
+            raise ConfigError(message_unsupported_multi_source.format(self.name, 'prediction'))
+
+    def __call__(self, *args, **kwargs):
+        return self.submit_all(*args, **kwargs)
+
+    def submit(self, annotation, prediction):
+        self.update(annotation, prediction)
+
+    def submit_all(self, annotations, predictions):
+        return self.evaluate(annotations, predictions)
+
+    def update(self, annotation, prediction):
+        pass
+
+    def evaluate(self, annotations, predictions):
+        raise NotImplementedError
+
+    def configure(self):
+        """
+        Specifies configuration structure for metric entry.
+        """
+
+        pass
+
+    def validate_config(self):
+        """
+        Validate that metric entry meets all configuration structure requirements.
+        """
+
+        BaseMetricConfig(self.name, on_extra_argument=BaseMetricConfig.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def _update_state(self, fn, state_key, default_factory=None):
+        iter_key = "{}_global_it".format(state_key)
+        if state_key not in self.state:
+            default = default_factory() if default_factory else None
+            self.state[state_key] = default
+            self.state[iter_key] = 0
+
+        self._update_iter += 1
+        if self.state[iter_key] < self._update_iter:
+            self.state[iter_key] += 1
+            self.state[state_key] = fn(self.state[state_key])
+
+    def _resolve_representation_containers(self, annotation, prediction):
+        def get_resolve_subject(representation, source=None):
+            if not isinstance(representation, ContainerRepresentation):
+                return representation
+
+            if not source:
+                return representation.values()
+
+            representation = representation.get(source)
+            if not representation:
+                raise ConfigError('{} not found'.format(source))
+
+            return representation
+
+        annotation = get_resolve_subject(annotation, self.annotation_source)
+        prediction = get_resolve_subject(prediction, self.prediction_source)
+
+        def resolve(representation, supported_types, representation_name):
+            message_not_found = 'suitable {} for metric {} not found'
+            message_need_source = 'you need specify {} source for metric {}'
+
+            representation = get_supported_representations(representation, supported_types)
+            if not representation:
+                raise ConfigError(message_not_found.format(representation_name, self.name))
+
+            if len(representation) > 1:
+                raise ConfigError(message_need_source.format(representation_name, self.name))
+
+            return representation[0]
+
+        resolved_annotation = resolve(annotation, self.annotation_types, 'annotation')
+        resolved_prediction = resolve(prediction, self.prediction_types, 'prediction')
+
+        return resolved_annotation, resolved_prediction
+
+
+class PerImageEvaluationMetric(Metric):
+    def submit(self, annotation, prediction):
+        annotation_, prediction_ = self._resolve_representation_containers(annotation, prediction)
+        self.update(annotation_, prediction_)
+
+    def evaluate(self, annotations, predictions):
+        raise NotImplementedError
+
+
+class FullDatasetEvaluationMetric(Metric):
+    def submit_all(self, annotations, predictions):
+        annotations_, predictions_ = zipped_transform(self._resolve_representation_containers, annotations, predictions)
+        return self.evaluate(annotations_, predictions_)
+
+    def evaluate(self, annotations, predictions):
+        raise NotImplementedError
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/metric_executor.py b/tools/accuracy_checker/accuracy_checker/metrics/metric_executor.py
new file mode 100644
index 000000000..cd24e9a92
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/metric_executor.py
@@ -0,0 +1,106 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from collections import namedtuple
+
+from ..presenters import BasePresenter, EvaluationResult
+from ..config import StringField
+from ..utils import zipped_transform
+from .metric import BaseMetricConfig, Metric
+from ..config import ConfigError
+
+MetricInstance = namedtuple('MetricInstance', ['name', 'metric_fn', 'reference', 'threshold', 'presenter'])
+
+
+class MetricConfig(BaseMetricConfig):
+    type = StringField(choices=Metric.providers)
+
+
+class MetricsExecutor:
+    """
+    Class for evaluating metrics according to dataset configuration entry.
+    """
+
+    def __init__(self, dataset_config, dataset, state=None):
+        dataset_name = dataset_config.get('name', '')
+        message_prefix = '{}'.format(dataset_name)
+
+        self.state = state or {}
+        self._token = 'metrics'
+
+        dataset_metrics = dataset_config.get(self._token)
+        if not dataset_metrics:
+            raise ConfigError('{} dataset config must specify "{}"'.format(message_prefix, self._token))
+
+        self.dataset = dataset
+
+        self.metrics = []
+        type_ = 'type'
+        identifier = 'name'
+        reference = 'reference'
+        threshold = 'threshold'
+        presenter = 'presenter'
+
+        for metric_config_entry in dataset_metrics:
+            metric_config = MetricConfig(
+                "{}.metrics".format(dataset_name), on_extra_argument=MetricConfig.IGNORE_ON_EXTRA_ARGUMENT
+            )
+            metric_type = metric_config_entry.get(type_)
+            metric_config.validate(metric_config_entry, type_)
+
+            metric_identifier = metric_config_entry.get(identifier, metric_type)
+
+            metric_fn = Metric.provide(
+                metric_type, metric_config_entry, self.dataset, metric_identifier, state=self.state
+            )
+            metric_presenter = BasePresenter.provide(metric_config_entry.get(presenter, 'print_scalar'))
+
+            self.metrics.append(MetricInstance(
+                metric_identifier,
+                metric_fn,
+                metric_config_entry.get(reference),
+                metric_config_entry.get(threshold),
+                metric_presenter
+            ))
+
+    def update_metrics_on_object(self, annotation, prediction):
+        """
+        Updates metric value corresponding given annotation and prediction objects.
+        """
+
+        for metric in self.metrics:
+            metric.metric_fn.submit(annotation, prediction)
+
+    def update_metrics_on_batch(self, annotation, prediction):
+        """
+        Updates metric value corresponding given batch.
+
+        Args:
+            annotation: list of batch number of annotation objects.
+            prediction: list of batch number of prediction objects.
+        """
+
+        zipped_transform(self.update_metrics_on_object, annotation, prediction)
+
+    def iterate_metrics(self, annotations, predictions):
+        for name, functor, reference, threshold, presenter in self.metrics:
+            yield presenter, EvaluationResult(
+                name=name,
+                evaluated_value=functor(annotations, predictions),
+                reference_value=reference,
+                threshold=threshold,
+                meta=functor.meta,
+            )
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/multilabel_recognition.py b/tools/accuracy_checker/accuracy_checker/metrics/multilabel_recognition.py
new file mode 100644
index 000000000..14f107e07
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/multilabel_recognition.py
@@ -0,0 +1,189 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from .metric import PerImageEvaluationMetric, BaseMetricConfig
+from ..representation import MultiLabelRecognitionAnnotation, MultiLabelRecognitionPrediction
+from ..config import StringField, BoolField
+
+
+class MultiLabelMetric(PerImageEvaluationMetric):
+    annotation_types = (MultiLabelRecognitionAnnotation,)
+    prediction_types = (MultiLabelRecognitionPrediction,)
+
+    def validate_config(self):
+        class _MultiLabelConfigValidator(BaseMetricConfig):
+            label_map = StringField(optional=True)
+            calculate_average = BoolField(optional=True)
+
+        config_validator = _MultiLabelConfigValidator(
+            'accuracy', on_extra_argument=_MultiLabelConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        config_validator.validate(self.config)
+
+    def configure(self):
+        label_map = self.config.get('label_map', 'label_map')
+        self.labels = self.dataset.metadata.get(label_map)
+        self.calculate_average = self.config.get('calculate_average', True)
+
+        self.meta['scale'] = 1
+        self.meta['postfix'] = ''
+        self.meta['calculate_mean'] = False
+        self.meta['names'] = list(self.labels.values())
+        if self.calculate_average:
+            self.meta['names'].append('average')
+        self.tp = np.zeros_like(list(self.labels.keys()), dtype=np.float)
+        self.fp = np.zeros_like(list(self.labels.keys()), dtype=np.float)
+        self.tn = np.zeros_like(list(self.labels.keys()), dtype=np.float)
+        self.fn = np.zeros_like(list(self.labels.keys()), dtype=np.float)
+
+        self.counter = np.zeros_like(list(self.labels.keys()), dtype=np.float)
+
+    def update(self, annotation, prediction):
+        def loss(annotation_labels, prediction_labels):
+            tp_result = np.zeros_like(list(self.labels.keys()), dtype=np.float)
+            fp_results = np.zeros_like(list(self.labels.keys()), dtype=np.float)
+            tn_results = np.zeros_like(list(self.labels.keys()), dtype=np.float)
+            fn_results = np.zeros_like(list(self.labels.keys()), dtype=np.float)
+
+            for index, label in enumerate(annotation_labels):
+                if label == 1 and label == prediction_labels[index]:
+                    tp_result[index] = 1.
+                    continue
+
+                if label == 1 and label != prediction_labels[index]:
+                    fn_results[index] = 1.
+                    continue
+
+                if label == 0 and label == prediction_labels[index]:
+                    tn_results[index] = 1.
+                    continue
+
+                if label == 0 and label != prediction_labels[index]:
+                    fp_results[index] = 1.
+                    continue
+
+            return tp_result, fp_results, tn_results, fn_results
+
+        def counter(annotation_label):
+            count = np.zeros_like(annotation_label, dtype=float)
+            cond = np.where(np.array(annotation_label) != -1)
+            count[cond] = 1.
+            return count
+
+        tp_upd, fp_upd, tn_upd, fn_upd = loss(annotation.multi_label, prediction.multi_label)
+        self.tp = np.add(self.tp, tp_upd)
+        self.fp = np.add(self.fp, fp_upd)
+        self.tn = np.add(self.tn, tn_upd)
+        self.fn = np.add(self.fn, fn_upd)
+
+        self.counter = np.add(self.counter, counter(annotation.multi_label))
+
+    def evaluate(self, annotations, predictions):
+        pass
+
+
+class MultiLabelAccuracy(MultiLabelMetric):
+    __provider__ = 'multi_accuracy'
+
+    def evaluate(self, annotations, predictions):
+        tp_tn = np.add(self.tp, self.tn, dtype=float)
+        per_class = np.divide(tp_tn, self.counter, out=np.zeros_like(tp_tn, dtype=float), where=self.counter != 0)
+        average = np.sum(tp_tn) / np.sum(self.counter)
+
+        return [*per_class, average]
+
+
+class MultiLabelPrecision(MultiLabelMetric):
+    __provider__ = 'multi_precision'
+
+    def evaluate(self, annotations, predictions):
+        tp_fp = np.add(self.tp, self.fp, dtype=float)
+        per_class = np.divide(self.tp, tp_fp, out=np.zeros_like(self.tp, dtype=float), where=tp_fp != 0)
+        if not self.calculate_average:
+            return per_class
+        average = np.sum(self.tp) / np.sum(tp_fp)
+
+        return [*per_class, average]
+
+
+class MultiLabelRecall(MultiLabelMetric):
+    __provider__ = 'multi_recall'
+
+    def evaluate(self, annotations, predictions):
+        tp_fn = np.add(self.tp, self.fn, dtype=float)
+        per_class = np.divide(self.tp, tp_fn, out=np.zeros_like(self.tp, dtype=float), where=tp_fn != 0)
+        if not self.calculate_average:
+            return per_class
+        average = np.sum(self.tp) / np.sum(tp_fn)
+
+        return [*per_class, average]
+
+
+class F1Score(PerImageEvaluationMetric):
+    __provider__ = 'f1-score'
+    annotation_types = (MultiLabelRecognitionAnnotation,)
+    prediction_types = (MultiLabelRecognitionPrediction,)
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.precision = MultiLabelPrecision(self.config, self.dataset)
+        self.recall = MultiLabelRecall(self.config, self.dataset)
+
+    def validate_config(self):
+        class _F1ScoreValidator(BaseMetricConfig):
+            label_map = StringField(optional=True)
+            calculate_average = BoolField(optional=True)
+
+        f1_score_config_validator = _F1ScoreValidator(
+            'f1_score', on_extra_argument=_F1ScoreValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        f1_score_config_validator.validate(self.config)
+
+    def configure(self):
+        label_map = self.config.get('label_map', 'label_map')
+        self.labels = self.dataset.metadata.get(label_map)
+        self.calculate_average = self.config.get('calculate_average', True)
+        self.meta['names'] = list(self.labels.values())
+        if self.calculate_average:
+            self.meta['names'].append('average')
+
+        self.meta['scale'] = 1
+        self.meta['postfix'] = ''
+        self.meta['calculate_mean'] = False
+        self.meta['names'] = list(self.labels.values()) + ['average']
+
+    def update(self, annotation, prediction):
+        self.precision.update(annotation, prediction)
+        self.recall.update(annotation, prediction)
+
+    def evaluate(self, annotations, predictions):
+        precisions = self.precision.evaluate(annotations, predictions)
+        recalls = self.recall.evaluate(annotations, predictions)
+
+        precision_add = np.add(precisions[:-1], recalls[:-1], dtype=float)
+        precision_multiply = np.multiply(precisions[:-1], recalls[:-1], dtype=float)
+
+        per_class = 2 * np.divide(
+            precision_multiply, precision_add, out=np.zeros_like(precision_multiply, dtype=float),
+            where=precision_add != 0
+        )
+        if not self.calculate_average:
+            return per_class
+
+        average = 2 * (precisions[-1] * recalls[-1]) / (precisions[-1] + recalls[-1])
+
+        return [*per_class, average]
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/overlap.py b/tools/accuracy_checker/accuracy_checker/metrics/overlap.py
new file mode 100644
index 000000000..d9fffc77a
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/overlap.py
@@ -0,0 +1,71 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..dependency import ClassProvider
+
+
+class Overlap(ClassProvider):
+    __provider_type__ = 'overlap'
+
+    @staticmethod
+    def intersections(prediction_box, annotation_boxes):
+        px_min, py_min, px_max, py_max = prediction_box
+        ax_mins, ay_mins, ax_maxs, ay_maxs = annotation_boxes
+
+        x_mins = np.maximum(ax_mins, px_min)
+        y_mins = np.maximum(ay_mins, py_min)
+        x_maxs = np.minimum(ax_maxs, px_max)
+        y_maxs = np.minimum(ay_maxs, py_max)
+
+        return x_mins, y_mins, np.maximum(x_mins, x_maxs), np.maximum(y_mins, y_maxs)
+
+    def __init__(self, include_boundaries=None):
+        self.boundary = 1 if include_boundaries else 0
+
+    def __call__(self, *args, **kwargs):
+        return self.evaluate(*args, **kwargs)
+
+    def evaluate(self, prediction_box, annotation_boxes):
+        raise NotImplementedError
+
+    def area(self, box):
+        x0, y0, x1, y1 = box
+        return (x1 - x0 + self.boundary) * (y1 - y0 + self.boundary)
+
+
+class IOU(Overlap):
+    __provider__ = 'iou'
+
+    def evaluate(self, prediction_box, annotation_boxes):
+        intersections_area = self.area(self.intersections(prediction_box, annotation_boxes))
+        unions = self.area(prediction_box) + self.area(annotation_boxes) - intersections_area
+        return np.divide(
+            intersections_area, unions, out=np.zeros_like(intersections_area, dtype=float), where=unions != 0
+        )
+
+
+class IOA(Overlap):
+    __provider__ = 'ioa'
+
+    def evaluate(self, prediction_box, annotation_boxes):
+        intersections_area = self.area(self.intersections(prediction_box, annotation_boxes))
+        prediction_area = self.area(prediction_box)
+        return np.divide(
+            intersections_area, prediction_area, out=np.zeros_like(intersections_area, dtype=float),
+            where=prediction_area != 0
+        )
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/regression.py b/tools/accuracy_checker/accuracy_checker/metrics/regression.py
new file mode 100644
index 000000000..894acdc4c
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/regression.py
@@ -0,0 +1,360 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import warnings
+import math
+import numpy as np
+
+from ..representation import (
+    RegressionAnnotation,
+    RegressionPrediction,
+    FacialLandmarksAnnotation,
+    FacialLandmarksPrediction,
+    SuperResolutionAnnotation,
+    SuperResolutionPrediction,
+    GazeVectorAnnotation,
+    GazeVectorPrediction
+)
+
+from .metric import PerImageEvaluationMetric, BaseMetricConfig
+from ..config import BaseField, NumberField, BoolField, ConfigError
+from ..utils import string_to_tuple, finalize_metric_result
+
+
+class BaseRegressionMetric(PerImageEvaluationMetric):
+    annotation_types = (RegressionAnnotation, )
+    prediction_types = (RegressionPrediction, )
+
+    def __init__(self, value_differ, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.value_differ = value_differ
+
+    def configure(self):
+        self.meta.update({'names': ['mean', 'std'], 'scale': 1, 'postfix': ' ', 'calculate_mean': False})
+        self.magnitude = []
+
+    def update(self, annotation, prediction):
+        self.magnitude.append(self.value_differ(annotation.value, prediction.value))
+
+    def evaluate(self, annotations, predictions):
+        return np.mean(self.magnitude), np.std(self.magnitude)
+
+
+class BaseIntervalRegressionMetricConfig(BaseMetricConfig):
+    intervals = BaseField(optional=True)
+    start = NumberField(optional=True)
+    end = NumberField(optional=True)
+    step = NumberField(optional=True)
+    ignore_values_not_in_interval = BoolField(optional=True)
+
+
+class BaseRegressionOnIntervals(PerImageEvaluationMetric):
+    annotation_types = (RegressionAnnotation, )
+    prediction_types = (RegressionPrediction, )
+
+    def __init__(self, value_differ, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.value_differ = value_differ
+
+    def validate_config(self):
+        validator = BaseIntervalRegressionMetricConfig(
+            'regression_on_intervals_config',
+            on_extra_argument=BaseIntervalRegressionMetricConfig.ERROR_ON_EXTRA_ARGUMENT
+        )
+        validator.validate(self.config)
+
+    def configure(self):
+        self.meta.update({'scale': 1, 'postfix': ' ', 'calculate_mean': False})
+        self.ignore_out_of_range = self.config.get('ignore_values_not_in_interval', True)
+
+        self.intervals = self.config.get('intervals')
+        if not self.intervals:
+            stop = self.config.get('end')
+            if not stop:
+                raise ConfigError('intervals or start-step-end of interval should be specified for metric')
+
+            start = self.config.get('start', 0.0)
+            step = self.config.get('step', 1.0)
+            self.intervals = np.arange(start, stop + step, step)
+
+        if not isinstance(self.intervals, (list, np.ndarray)):
+            self.intervals = string_to_tuple(self.intervals)
+
+        self.intervals = np.unique(self.intervals)
+        self.magnitude = [[] for _ in range(len(self.intervals) + 1)]
+
+        self.meta['names'] = ([])
+        if not self.ignore_out_of_range:
+            self.meta['names'] = (['mean: < ' + str(self.intervals[0]), 'std: < ' + str(self.intervals[0])])
+
+        for index in range(len(self.intervals) - 1):
+            self.meta['names'].append('mean: <= ' + str(self.intervals[index]) + ' < ' + str(self.intervals[index + 1]))
+            self.meta['names'].append('std: <= ' + str(self.intervals[index]) + ' < ' + str(self.intervals[index + 1]))
+
+        if not self.ignore_out_of_range:
+            self.meta['names'].append('mean: > ' + str(self.intervals[-1]))
+            self.meta['names'].append('std: > ' + str(self.intervals[-1]))
+
+    def update(self, annotation, prediction):
+        index = find_interval(annotation.value, self.intervals)
+        self.magnitude[index].append(self.value_differ(annotation.value, prediction.value))
+
+    def evaluate(self, annotations, predictions):
+        if self.ignore_out_of_range:
+            self.magnitude = self.magnitude[1:-1]
+
+        result = [[np.mean(values), np.std(values)] if values else [np.nan, np.nan] for values in self.magnitude]
+        result, self.meta['names'] = finalize_metric_result(np.reshape(result, -1), self.meta['names'])
+
+        if not result:
+            warnings.warn("No values in given interval")
+            result.append(0)
+
+        return result
+
+
+class MeanAbsoluteError(BaseRegressionMetric):
+    __provider__ = 'mae'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(mae_differ, *args, **kwargs)
+
+
+class MeanSquaredError(BaseRegressionMetric):
+    __provider__ = 'mse'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(mse_differ, *args, **kwargs)
+
+
+class RootMeanSquaredError(BaseRegressionMetric):
+    __provider__ = 'rmse'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(mse_differ, *args, **kwargs)
+
+    def evaluate(self, annotations, predictions):
+        return np.sqrt(np.mean(self.magnitude)), np.sqrt(np.std(self.magnitude))
+
+
+class MeanAbsoluteErrorOnInterval(BaseRegressionOnIntervals):
+    __provider__ = 'mae_on_interval'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(mae_differ, *args, **kwargs)
+
+
+class MeanSquaredErrorOnInterval(BaseRegressionOnIntervals):
+    __provider__ = 'mse_on_interval'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(mse_differ, *args, **kwargs)
+
+
+class RootMeanSquaredErrorOnInterval(BaseRegressionOnIntervals):
+    __provider__ = 'rmse_on_interval'
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(mse_differ, *args, **kwargs)
+
+    def evaluate(self, annotations, predictions):
+        if self.ignore_out_of_range:
+            self.magnitude = self.magnitude[1:-1]
+
+        result = []
+        for values in self.magnitude:
+            error = [np.sqrt(np.mean(values)), np.sqrt(np.std(values))] if values else [np.nan, np.nan]
+            result.append(error)
+
+        result, self.meta['names'] = finalize_metric_result(np.reshape(result, -1), self.meta['names'])
+
+        if not result:
+            warnings.warn("No values in given interval")
+            result.append(0)
+
+        return result
+
+
+class FacialLandmarksPerPointNormedError(PerImageEvaluationMetric):
+    __provider__ = 'per_point_normed_error'
+
+    annotation_types = (FacialLandmarksAnnotation, )
+    prediction_types = (FacialLandmarksPrediction, )
+
+    def configure(self):
+        self.meta.update({'scale': 1, 'postfix': ' ', 'calculate_mean': True, 'data_format': '{:.4f}'})
+        self.magnitude = []
+
+    def update(self, annotation, prediction):
+        result = point_regression_differ(
+            annotation.x_values, annotation.y_values, prediction.x_values, prediction.y_values
+        )
+        result /= np.maximum(annotation.interocular_distance, np.finfo(np.float64).eps)
+        self.magnitude.append(result)
+
+    def evaluate(self, annotations, predictions):
+        num_points = np.shape(self.magnitude)[1]
+        point_result_name_pattern = 'point_{}_normed_error'
+        self.meta['names'] = [point_result_name_pattern.format(point_id) for point_id in range(num_points)]
+        per_point_rmse = np.mean(self.magnitude, axis=1)
+        per_point_rmse, self.meta['names'] = finalize_metric_result(per_point_rmse, self.meta['names'])
+
+        return per_point_rmse
+
+
+class NormedErrorMetricConfig(BaseMetricConfig):
+    calculate_std = BoolField(optional=True)
+    percentile = NumberField(optional=True, floats=False, min_value=0, max_value=100)
+
+
+class FacialLandmarksNormedError(PerImageEvaluationMetric):
+    __provider__ = 'normed_error'
+
+    annotation_types = (FacialLandmarksAnnotation, )
+    prediction_types = (FacialLandmarksPrediction, )
+
+    def validate_config(self):
+        config_validator = NormedErrorMetricConfig(
+            'normed_error_config', NormedErrorMetricConfig.ERROR_ON_EXTRA_ARGUMENT
+        )
+        config_validator.validate(self.config)
+
+    def configure(self):
+        self.calculate_std = self.config.get('calculate_std', False)
+        self.percentile = self.config.get('percentile')
+        self.meta.update({
+            'scale': 1,
+            'postfix': ' ',
+            'calculate_mean': not self.calculate_std or not self.percentile,
+            'data_format': '{:.4f}',
+            'names': ['mean']
+        })
+        self.magnitude = []
+
+    def update(self, annotation, prediction):
+        per_point_result = point_regression_differ(
+            annotation.x_values, annotation.y_values, prediction.x_values, prediction.y_values
+        )
+        avg_result = np.sum(per_point_result) / len(per_point_result)
+        avg_result /= np.maximum(annotation.interocular_distance, np.finfo(np.float64).eps)
+        self.magnitude.append(avg_result)
+
+    def evaluate(self, annotations, predictions):
+        result = [np.mean(self.magnitude)]
+
+        if self.calculate_std:
+            result.append(np.std(self.magnitude))
+            self.meta['names'].append('std')
+
+        if self.percentile:
+            sorted_magnitude = np.sort(self.magnitude)
+            index = len(self.magnitude) / 100 * self.percentile
+            result.append(sorted_magnitude[int(index)])
+            self.meta['names'].append('{}th percentile'.format(self.percentile))
+
+        return result
+
+
+def calculate_distance(x_coords, y_coords, selected_points):
+    first_point = [x_coords[selected_points[0]], y_coords[selected_points[0]]]
+    second_point = [x_coords[selected_points[1]], y_coords[selected_points[1]]]
+    return np.linalg.norm(np.subtract(first_point, second_point))
+
+
+def mae_differ(annotation_val, prediction_val):
+    return np.abs(annotation_val - prediction_val)
+
+
+def mse_differ(annotation_val, prediction_val):
+    return (annotation_val - prediction_val)**2
+
+
+def find_interval(value, intervals):
+    for index, point in enumerate(intervals):
+        if value < point:
+            return index
+
+    return len(intervals)
+
+
+def point_regression_differ(annotation_val_x, annotation_val_y, prediction_val_x, prediction_val_y):
+    loss = np.subtract(list(zip(annotation_val_x, annotation_val_y)), list(zip(prediction_val_x, prediction_val_y)))
+    return np.linalg.norm(loss, 2, axis=1)
+
+
+class PeakSignalToNoiseRatio(BaseRegressionMetric):
+    __provider__ = 'psnr'
+
+    annotation_types = (SuperResolutionAnnotation, )
+    prediction_types = (SuperResolutionPrediction, )
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(self._psnr_differ, *args, **kwargs)
+
+    def validate_config(self):
+        class _PSNRConfig(BaseMetricConfig):
+            scale_border = NumberField(optional=True, min_value=0)
+
+        config_validator = _PSNRConfig('psnr', on_extra_argument=_PSNRConfig.ERROR_ON_EXTRA_ARGUMENT)
+        config_validator.validate(self.config)
+
+    def configure(self):
+        super().configure()
+        self.scale_border = self.config.get('scale_border', 4)
+
+    def _psnr_differ(self, annotation_image, prediction_image):
+        prediction = np.asarray(prediction_image).astype(np.float)
+        ground_truth = np.asarray(annotation_image).astype(np.float)
+
+        height, width = prediction.shape[:2]
+        prediction = prediction[
+            self.scale_border:height - self.scale_border,
+            self.scale_border:width - self.scale_border
+        ]
+        ground_truth = ground_truth[
+            self.scale_border:height - self.scale_border,
+            self.scale_border:width - self.scale_border
+        ]
+        image_difference = (prediction - ground_truth) / 255.  # rgb color space
+
+        r_channel_diff = image_difference[:, :, 0]
+        g_channel_diff = image_difference[:, :, 1]
+        b_channel_diff = image_difference[:, :, 2]
+
+        channels_diff = (r_channel_diff * 65.738 + g_channel_diff * 129.057 + b_channel_diff * 25.064) / 256
+
+        mse = np.mean(channels_diff ** 2)
+        if mse == 0:
+            return np.Infinity
+
+        return -10 * math.log10(mse)
+
+
+def angle_differ(gt_gaze_vector, predicted_gaze_vector):
+    return np.arccos(
+        gt_gaze_vector.dot(predicted_gaze_vector) / np.linalg.norm(gt_gaze_vector)
+        / np.linalg.norm(predicted_gaze_vector)
+    ) * 180 / np.pi
+
+
+class AngleError(BaseRegressionMetric):
+    __provider__ = 'angle_error'
+
+    annotation_types = (GazeVectorAnnotation, )
+    prediction_types = (GazeVectorPrediction, )
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(angle_differ, *args, **kwargs)
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/reid.py b/tools/accuracy_checker/accuracy_checker/metrics/reid.py
new file mode 100644
index 000000000..2adf069d0
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/reid.py
@@ -0,0 +1,379 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from collections import defaultdict, namedtuple
+from sklearn.metrics import auc, precision_recall_curve
+# noinspection PyProtectedMember
+from sklearn.metrics.base import _average_binary_score
+import numpy as np
+
+from ..representation import (
+    ReIdentificationClassificationAnnotation,
+    ReIdentificationAnnotation,
+    ReIdentificationPrediction
+)
+from ..config import BaseField, BoolField, NumberField
+from .metric import BaseMetricConfig, FullDatasetEvaluationMetric
+
+PairDesc = namedtuple('PairDesc', 'image1 image2 same')
+
+
+class CMCScore(FullDatasetEvaluationMetric):
+    """
+    Cumulative Matching Characteristics (CMC) score.
+
+    Config:
+        annotation: reid annotation.
+        prediction: predicted embeddings.
+        top_k: number of k highest ranked samples to consider when matching.
+        separate_camera_set: should identities from the same camera view be filtered out.
+        single_gallery_shot: each identity has only one instance in the gallery.
+        number_single_shot_repeats: number of repeats for single_gallery_shot setting.
+        first_match_break: break on first matched gallery sample.
+    """
+
+    __provider__ = 'cmc'
+
+    annotation_types = (ReIdentificationAnnotation, )
+    prediction_types = (ReIdentificationPrediction, )
+
+    def validate_config(self):
+        class _CMCConfigValidator(BaseMetricConfig):
+            top_k = NumberField(floats=False, min_value=1, optional=True)
+            separate_camera_set = BoolField(optional=True)
+            single_gallery_shot = BoolField(optional=True)
+            first_match_break = BoolField(optional=True)
+            number_single_shot_repeats = NumberField(floats=False, optional=True)
+
+        validator = _CMCConfigValidator('cmc', on_extra_argument=_CMCConfigValidator.ERROR_ON_EXTRA_ARGUMENT)
+        validator.validate(self.config)
+
+    def configure(self):
+        self.top_k = self.config.get('top_k', 1)
+        self.separate_camera_set = self.config.get('separate_camera_set', False)
+        self.single_gallery_shot = self.config.get('single_gallery_shot', False)
+        self.first_match_break = self.config.get('first_match_break', True)
+        self.number_single_shot_repeats = self.config.get('number_single_shot_repeats', 10)
+
+    def evaluate(self, annotations, predictions):
+        dist_matrix = distance_matrix(annotations, predictions)
+        gallery_cameras, gallery_pids, query_cameras, query_pids = get_gallery_query_pids(annotations)
+
+        _cmc_score = eval_cmc(
+            dist_matrix, query_pids, gallery_pids, query_cameras, gallery_cameras, self.separate_camera_set,
+            self.single_gallery_shot, self.first_match_break, self.number_single_shot_repeats
+        )
+
+        return _cmc_score[self.top_k - 1]
+
+
+class ReidMAP(FullDatasetEvaluationMetric):
+    """
+    Mean Average Precision score.
+
+    Config:
+        annotation: reid annotation.
+        prediction: predicted embeddings.
+        interpolated_auc: should area under precision recall curve be computed using trapezoidal rule or directly.
+    """
+
+    __provider__ = 'reid_map'
+
+    annotation_types = (ReIdentificationAnnotation, )
+    prediction_types = (ReIdentificationPrediction, )
+
+    def validate_config(self):
+        class _ReidMapConfig(BaseMetricConfig):
+            interpolated_auc = BoolField(optional=True)
+
+        validator = _ReidMapConfig('reid_map', on_extra_argument=_ReidMapConfig.ERROR_ON_EXTRA_ARGUMENT)
+        validator.validate(self.config)
+
+    def configure(self):
+        self.interpolated_auc = self.config.get('interpolated_auc', True)
+
+    def evaluate(self, annotations, predictions):
+        dist_matrix = distance_matrix(annotations, predictions)
+        gallery_cameras, gallery_pids, query_cameras, query_pids = get_gallery_query_pids(annotations)
+
+        return eval_map(
+            dist_matrix, query_pids, gallery_pids, query_cameras, gallery_cameras, self.interpolated_auc
+        )
+
+
+class PairwiseAccuracy(FullDatasetEvaluationMetric):
+    __provider__ = 'pairwise_accuracy'
+
+    annotation_types = (ReIdentificationClassificationAnnotation, )
+    prediction_types = (ReIdentificationPrediction, )
+
+    def validate_config(self):
+        class _PWAccConfig(BaseMetricConfig):
+            min_score = BaseField(optional=True)
+
+        validator = _PWAccConfig('pairwise_accuracy', on_extra_argument=_PWAccConfig.ERROR_ON_EXTRA_ARGUMENT)
+        validator.validate(self.config)
+
+    def configure(self):
+        self.min_score = self.config.get('min_score', 'train_median')
+
+    def evaluate(self, annotations, predictions):
+        embed_distances, pairs = get_embedding_distances(annotations, predictions)
+
+        min_score = self.min_score
+        if min_score == 'train_median':
+            train_distances, _train_pairs = get_embedding_distances(annotations, predictions, train=True)
+            min_score = np.median(train_distances)
+
+        embed_same_class = embed_distances < min_score
+
+        accuracy = 0
+        for i, pair in enumerate(pairs):
+            same_label = pair.same
+            out_same = embed_same_class[i]
+
+            correct_prediction = same_label and out_same or (not same_label and not out_same)
+
+            if correct_prediction:
+                accuracy += 1
+
+        return float(accuracy) / len(pairs)
+
+
+class PairwiseAccuracySubsets(FullDatasetEvaluationMetric):
+    __provider__ = 'pairwise_accuracy_subsets'
+
+    annotation_types = (ReIdentificationClassificationAnnotation, )
+    prediction_types = (ReIdentificationPrediction, )
+
+    def validate_config(self):
+        class _PWAccConfig(BaseMetricConfig):
+            subset_number = NumberField(optional=True, min_value=1, floats=False)
+
+        validator = _PWAccConfig('pairwise_accuracy', on_extra_argument=_PWAccConfig.ERROR_ON_EXTRA_ARGUMENT)
+        validator.validate(self.config)
+
+    def configure(self):
+        self.meta['scale'] = 1
+        self.meta['postfix'] = ' '
+        self.subset_num = self.config.get('subset_number', 10)
+        self.accuracy_metric = PairwiseAccuracy(self.config, self.dataset)
+
+    def evaluate(self, annotations, predictions):
+        subset_results = []
+        first_images_annotations = list(filter(
+            lambda annotation: (len(annotation.negative_pairs) > 0 or len(annotation.positive_pairs) > 0), annotations
+        ))
+
+        idx_subsets = self.make_subsets(self.subset_num, len(first_images_annotations))
+        for subset in range(self.subset_num):
+            test_subset = self.get_subset(first_images_annotations, idx_subsets[subset]['test'])
+            test_subset = self.mark_subset(test_subset, False)
+
+            train_subset = self.get_subset(first_images_annotations, idx_subsets[subset]['train'])
+            train_subset = self.mark_subset(train_subset)
+
+            subset_result = self.accuracy_metric.evaluate(test_subset+train_subset, predictions)
+            subset_results.append(subset_result)
+
+        return np.mean(subset_results)
+
+    @staticmethod
+    def make_subsets(subset_num, dataset_size):
+        subsets = []
+        if subset_num > dataset_size:
+            raise ValueError('It is impossible to divide dataset on more than number of annotations subsets.')
+
+        for subset in range(subset_num):
+            lower_bnd = subset * dataset_size // subset_num
+            upper_bnd = (subset + 1) * dataset_size // subset_num
+            subset_test = [(lower_bnd, upper_bnd)]
+
+            subset_train = [(0, lower_bnd), (upper_bnd, dataset_size)]
+            subsets.append({'test': subset_test, 'train': subset_train})
+
+        return subsets
+
+    @staticmethod
+    def mark_subset(subset_annotations, train=True):
+        for annotation in subset_annotations:
+            annotation.metadata['train'] = train
+
+        return subset_annotations
+
+    @staticmethod
+    def get_subset(container, subset_bounds):
+        subset = []
+        for bound in subset_bounds:
+            subset += container[bound[0]: bound[1]]
+
+        return subset
+
+
+def extract_embeddings(annotation, prediction, query):
+    return np.stack([pred.embedding for pred, ann in zip(prediction, annotation) if ann.query == query])
+
+
+def get_gallery_query_pids(annotation):
+    gallery_pids = np.asarray([ann.person_id for ann in annotation if not ann.query])
+    query_pids = np.asarray([ann.person_id for ann in annotation if ann.query])
+    gallery_cameras = np.asarray([ann.camera_id for ann in annotation if not ann.query])
+    query_cameras = np.asarray([ann.camera_id for ann in annotation if ann.query])
+
+    return gallery_cameras, gallery_pids, query_cameras, query_pids
+
+
+def distance_matrix(annotation, prediction):
+    gallery_embeddings = extract_embeddings(annotation, prediction, query=False)
+    query_embeddings = extract_embeddings(annotation, prediction, query=True)
+
+    return 1. - np.matmul(gallery_embeddings, np.transpose(query_embeddings)).T
+
+
+def unique_sample(ids_dict, num):
+    mask = np.zeros(num, dtype=np.bool)
+    for indices in ids_dict.values():
+        mask[np.random.choice(indices)] = True
+
+    return mask
+
+
+def eval_map(distance_mat, query_ids, gallery_ids, query_cams, gallery_cams, interpolated_auc=False):
+    number_queries, _number_gallery = distance_mat.shape
+    # Sort and find correct matches
+    indices = np.argsort(distance_mat, axis=1)
+    matches = (gallery_ids[indices] == query_ids[:, np.newaxis])  # type: np.ndarray
+
+    # Compute AP for each query
+    average_precisions = []
+    for query in range(number_queries):
+        # Filter out the same id and same camera
+        valid = (gallery_ids[indices[query]] != query_ids[query]) | (gallery_cams[indices[query]] != query_cams[query])
+
+        y_true = matches[query, valid]
+        y_score = -distance_mat[query][indices[query]][valid]
+        if not np.any(y_true):
+            continue
+
+        average_precisions.append(binary_average_precision(y_true, y_score, interpolated_auc=interpolated_auc))
+
+    if not average_precisions:
+        raise RuntimeError("No valid query")
+
+    return np.mean(average_precisions)
+
+
+def eval_cmc(distance_mat, query_ids, gallery_ids, query_cams, gallery_cams, separate_camera_set=False,
+             single_gallery_shot=False, first_match_break=False, number_single_shot_repeats=10, top_k=100):
+    number_queries, _number_gallery = distance_mat.shape
+
+    if not single_gallery_shot:
+        number_single_shot_repeats = 1
+
+    # Sort and find correct matches
+    indices = np.argsort(distance_mat, axis=1)
+    matches = gallery_ids[indices] == query_ids[:, np.newaxis]  # type: np.ndarray
+
+    # Compute CMC for each query
+    ret = np.zeros(top_k)
+    num_valid_queries = 0
+    for query in range(number_queries):
+        valid = get_valid_subset(
+            gallery_cams, gallery_ids, query, indices, query_cams, query_ids, separate_camera_set
+        )  # type: np.ndarray
+
+        if not np.any(matches[query, valid]):
+            continue
+
+        ids_dict = defaultdict(list)
+        if single_gallery_shot:
+            gallery_indexes = gallery_ids[indices[query][valid]]
+            for j, x in zip(np.where(valid)[0], gallery_indexes):
+                ids_dict[x].append(j)
+
+        for _ in range(number_single_shot_repeats):
+            if single_gallery_shot:
+                # Randomly choose one instance for each id
+                # required for correct validation on CUHK datasets
+                # http://www.ee.cuhk.edu.hk/~xgwang/CUHK_identification.html
+                sampled = (valid & unique_sample(ids_dict, len(valid)))
+                index = np.nonzero(matches[query, sampled])[0]
+            else:
+                index = np.nonzero(matches[query, valid])[0]
+
+            delta = 1. / (len(index) * number_single_shot_repeats)
+            for j, k in enumerate(index):
+                if k - j >= top_k:
+                    break
+                if first_match_break:
+                    ret[k - j] += 1
+                    break
+                ret[k - j] += delta
+
+        num_valid_queries += 1
+
+    if num_valid_queries == 0:
+        raise RuntimeError("No valid query")
+
+    return ret.cumsum() / num_valid_queries
+
+
+def get_valid_subset(gallery_cams, gallery_ids, query_index, indices, query_cams, query_ids, separate_camera_set):
+    # Filter out the same id and same camera
+    valid = (
+        (gallery_ids[indices[query_index]] != query_ids[query_index]) |
+        (gallery_cams[indices[query_index]] != query_cams[query_index])
+    )
+    if separate_camera_set:
+        # Filter out samples from same camera
+        valid &= (gallery_cams[indices[query_index]] != query_cams[query_index])
+
+    return valid
+
+
+def get_embedding_distances(annotation, prediction, train=False):
+    image_indexes = {}
+    for i, pred in enumerate(prediction):
+        image_indexes[pred.identifier] = i
+
+    pairs = []
+    for image1 in annotation:
+        if train != image1.metadata.get("train", False):
+            continue
+
+        for image2 in image1.positive_pairs:
+            pairs.append(PairDesc(image_indexes[image1.identifier], image_indexes[image2], True))
+        for image2 in image1.negative_pairs:
+            pairs.append(PairDesc(image_indexes[image1.identifier], image_indexes[image2], False))
+
+    embed1 = np.asarray([prediction[idx].embedding for idx, _, _ in pairs])
+    embed2 = np.asarray([prediction[idx].embedding for _, idx, _ in pairs])
+
+    return 0.5 * (1 - np.sum(embed1 * embed2, axis=1)), pairs
+
+
+def binary_average_precision(y_true, y_score, interpolated_auc=True):
+    def _average_precision(y_true_, y_score_, sample_weight=None):
+        precision, recall, _ = precision_recall_curve(y_true_, y_score_, sample_weight)
+        if not interpolated_auc:
+            # Return the step function integral
+            # The following works because the last entry of precision is
+            # guaranteed to be 1, as returned by precision_recall_curve
+            return -1 * np.sum(np.diff(recall) * np.array(precision)[:-1])
+
+        return auc(recall, precision)
+
+    return _average_binary_score(_average_precision, y_true, y_score, average="macro")
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/semantic_segmentation.py b/tools/accuracy_checker/accuracy_checker/metrics/semantic_segmentation.py
new file mode 100644
index 000000000..d418de08d
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/semantic_segmentation.py
@@ -0,0 +1,139 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..config import BoolField
+from ..representation import (
+    SegmentationAnnotation,
+    SegmentationPrediction,
+    BrainTumorSegmentationAnnotation,
+    BrainTumorSegmentationPrediction
+)
+from .metric import PerImageEvaluationMetric, BaseMetricConfig
+from ..utils import finalize_metric_result
+
+
+class SegmentationMetricConfig(BaseMetricConfig):
+    use_argmax = BoolField(optional=True)
+
+
+class SegmentationMetric(PerImageEvaluationMetric):
+    annotation_types = (SegmentationAnnotation, )
+    prediction_types = (SegmentationPrediction, )
+
+    CONFUSION_MATRIX_KEY = 'segmentation_confusion_matrix'
+
+    def evaluate(self, annotations, predictions):
+        raise NotImplementedError
+
+    def validate_config(self):
+        config_validator = SegmentationMetricConfig(
+            'SemanticSegmentation_config', SegmentationMetricConfig.ERROR_ON_EXTRA_ARGUMENT
+        )
+        config_validator.validate(self.config)
+
+    def configure(self):
+        self.use_argmax = self.config.get('use_argmax', True)
+
+    def update(self, annotation, prediction):
+        n_classes = len(self.dataset.labels)
+        prediction_mask = np.argmax(prediction.mask, axis=0) if self.use_argmax else prediction.mask.astype('int64')
+
+        def update_confusion_matrix(confusion_matrix):
+            label_true = annotation.mask.flatten()
+            label_pred = prediction_mask.flatten()
+
+            mask = (label_true >= 0) & (label_true < n_classes)
+            hist = np.bincount(n_classes * label_true[mask].astype(int) + label_pred[mask], minlength=n_classes ** 2)
+            hist = hist.reshape(n_classes, n_classes)
+            confusion_matrix += hist
+
+            return confusion_matrix
+
+        self._update_state(update_confusion_matrix, self.CONFUSION_MATRIX_KEY, lambda: np.zeros((n_classes, n_classes)))
+
+
+class SegmentationAccuracy(SegmentationMetric):
+    __provider__ = 'segmentation_accuracy'
+
+    def evaluate(self, annotations, predictions):
+        confusion_matrix = self.state[self.CONFUSION_MATRIX_KEY]
+        return np.diag(confusion_matrix).sum() / confusion_matrix.sum()
+
+
+class SegmentationIOU(SegmentationMetric):
+    __provider__ = 'mean_iou'
+
+    def evaluate(self, annotations, predictions):
+        confusion_matrix = self.state[self.CONFUSION_MATRIX_KEY]
+        union = confusion_matrix.sum(axis=1) + confusion_matrix.sum(axis=0) - np.diag(confusion_matrix)
+        diagonal = np.diag(confusion_matrix)
+        iou = np.divide(diagonal, union, out=np.zeros_like(diagonal), where=union != 0)
+
+        values, names = finalize_metric_result(iou, list(self.dataset.labels.values()))
+        self.meta['names'] = names
+
+        return values
+
+
+class SegmentationMeanAccuracy(SegmentationMetric):
+    __provider__ = 'mean_accuracy'
+
+    def evaluate(self, annotations, predictions):
+        confusion_matrix = self.state[self.CONFUSION_MATRIX_KEY]
+        diagonal = np.diag(confusion_matrix)
+        per_class_count = confusion_matrix.sum(axis=1)
+        acc_cls = np.divide(diagonal, per_class_count, out=np.zeros_like(diagonal), where=per_class_count != 0)
+
+        values, names = finalize_metric_result(acc_cls, list(self.dataset.labels.values()))
+        self.meta['names'] = names
+
+        return values
+
+
+class SegmentationFWAcc(SegmentationMetric):
+    __provider__ = 'frequency_weighted_accuracy'
+
+    def evaluate(self, annotations, predictions):
+        confusion_matrix = self.state[self.CONFUSION_MATRIX_KEY]
+
+        union = (confusion_matrix.sum(axis=1) + confusion_matrix.sum(axis=0) - np.diag(confusion_matrix))
+        diagonal = np.diag(confusion_matrix)
+        iou = np.divide(diagonal, union, out=np.zeros_like(diagonal), where=union != 0)
+        freq = confusion_matrix.sum(axis=1) / confusion_matrix.sum()
+
+        return (freq[freq > 0] * iou[freq > 0]).sum()
+
+
+class SegmentationDSCAcc(PerImageEvaluationMetric):
+    __provider__ = 'dice'
+    annotation_types = (BrainTumorSegmentationAnnotation,)
+    prediction_types = (BrainTumorSegmentationPrediction,)
+    overall_metric = []
+
+    def update(self, annotation, prediction):
+        cnt = 0
+        for prediction_mask, annotation_mask in zip(prediction.mask, annotation.mask):
+            annotation_mask = np.transpose(annotation_mask, (2, 0, 1))
+            annotation_mask = np.expand_dims(annotation_mask, 0)
+            numerator = np.sum(prediction_mask * annotation_mask) * 2.0 + 1.0
+            denominator = np.sum(annotation_mask) + np.sum(prediction_mask) + 1.0
+            self.overall_metric.append(numerator / denominator)
+            cnt += 1
+
+    def evaluate(self, annotations, predictions):
+        return sum(self.overall_metric) / len(self.overall_metric)
diff --git a/tools/accuracy_checker/accuracy_checker/metrics/text_detection.py b/tools/accuracy_checker/accuracy_checker/metrics/text_detection.py
new file mode 100644
index 000000000..65f8481d6
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/metrics/text_detection.py
@@ -0,0 +1,124 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from .metric import PerImageEvaluationMetric, BaseMetricConfig
+from ..config import BoolField, NumberField
+from ..representation import TextDetectionPrediction, TextDetectionAnnotation
+from ..utils import polygon_from_points
+
+
+def get_union(detection_polygon, annotation_polygon):
+    area_prediction = detection_polygon.area
+    area_annotation = annotation_polygon.area
+    return area_prediction + area_annotation - get_intersection_area(detection_polygon, annotation_polygon)
+
+
+def get_intersection_over_union(detection_polygon, annotation_polygon):
+    union = get_union(detection_polygon, annotation_polygon)
+    intersection = get_intersection_area(detection_polygon, annotation_polygon)
+    return intersection / union if union != 0 else 0.0
+
+
+def get_intersection_area(detection_polygon, annotation_polygon):
+    return detection_polygon.intersection(annotation_polygon).area
+
+
+class TextDetectionMetricConfig(BaseMetricConfig):
+    iou_constrain = NumberField(min_value=0, max_value=1, optional=True)
+    ignore_difficult = BoolField(optional=True)
+    area_precision_constrain = NumberField(min_value=0, max_value=1, optional=True)
+
+
+class TextDetectionMetric(PerImageEvaluationMetric):
+    __provider__ = 'text_detection'
+
+    annotation_types = (TextDetectionAnnotation, )
+    prediction_types = (TextDetectionPrediction, )
+
+    def validate_config(self):
+        text_detection_metric_config = TextDetectionMetricConfig(
+            'TextDetectionMetric_config', TextDetectionMetricConfig.ERROR_ON_EXTRA_ARGUMENT
+        )
+        text_detection_metric_config.validate(self.config)
+
+    def configure(self):
+        self.iou_constrain = self.config.get('iou_constrain', 0.5)
+        self.area_precision_constrain = self.config.get('area_precision_constrain', 0.5)
+        self.ignore_difficult = self.config.get('ignore_difficult', False)
+        self.number_matched_detections = 0
+        self.number_valid_annotations = 0
+        self.number_valid_detections = 0
+
+    def update(self, annotation, prediction):
+        gt_polygons = list(map(polygon_from_points, annotation.points))
+        prediction_polygons = list(map(polygon_from_points, prediction.points))
+        num_gt = len(gt_polygons)
+        num_det = len(prediction_polygons)
+        gt_difficult_mask = np.full(num_gt, False)
+        prediction_difficult_mask = np.full(num_det, False)
+        num_det_matched = 0
+        if self.ignore_difficult:
+            gt_difficult_inds = annotation.metadata.get('difficult_boxes', [])
+            prediction_difficult_inds = prediction.metadata.get('difficult_boxes', [])
+            gt_difficult_mask[gt_difficult_inds] = True
+            prediction_difficult_mask[prediction_difficult_inds] = True
+            for det_id, detection_polygon in enumerate(prediction_polygons):
+                for gt_difficult_id in gt_difficult_inds:
+                    gt_difficult_polygon = gt_polygons[gt_difficult_id]
+                    intersected_area = get_intersection_area(gt_difficult_polygon, detection_polygon)
+                    pd_dimensions = detection_polygon.area
+                    precision = 0 if pd_dimensions == 0 else intersected_area / pd_dimensions
+
+                    if precision >= self.area_precision_constrain:
+                        prediction_difficult_mask[det_id] = True
+
+        if num_gt > 0 and num_det > 0:
+            iou_matrix = np.empty((num_gt, num_det))
+            gt_matched = np.zeros(num_gt, np.int8)
+            det_matched = np.zeros(num_det, np.int8)
+
+            for gt_id, gt_polygon in enumerate(gt_polygons):
+                for pred_id, pred_polygon in enumerate(prediction_polygons):
+                    iou_matrix[gt_id, pred_id] = get_intersection_over_union(pred_polygon, gt_polygon)
+                    not_matched_before = gt_matched[gt_id] == 0 and det_matched[pred_id] == 0
+                    not_difficult = not gt_difficult_mask[gt_id] and not prediction_difficult_mask[pred_id]
+                    if not_matched_before and not_difficult:
+                        if iou_matrix[gt_id, pred_id] >= self.iou_constrain:
+                            gt_matched[gt_id] = 1
+                            det_matched[pred_id] = 1
+                            num_det_matched += 1
+
+        num_ignored_gt = np.sum(gt_difficult_mask)
+        num_ignored_pred = np.sum(prediction_difficult_mask)
+        num_valid_gt = num_gt - num_ignored_gt
+        num_valid_pred = num_det - num_ignored_pred
+
+        self.number_matched_detections += num_det_matched
+        self.number_valid_annotations += num_valid_gt
+        self.number_valid_detections += num_valid_pred
+
+    def evaluate(self, annotations, predictions):
+        recall = (
+            0 if self.number_valid_annotations == 0
+            else float(self.number_matched_detections) / self.number_valid_annotations
+        )
+        precision = (
+            0 if self.number_valid_detections == 0
+            else float(self.number_matched_detections) / self.number_valid_detections
+        )
+
+        return 0 if recall + precision == 0 else 2 * recall * precision / (recall + precision)
diff --git a/tools/accuracy_checker/accuracy_checker/model_evaluator.py b/tools/accuracy_checker/accuracy_checker/model_evaluator.py
new file mode 100644
index 000000000..65c981520
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/model_evaluator.py
@@ -0,0 +1,132 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import copy
+import pickle
+
+from .utils import get_path
+from .dataset import Dataset
+from .launcher import create_launcher, DummyLauncher
+from .launcher.loaders import PickleLoader
+from .logging import print_info
+from .metrics import MetricsExecutor
+from .postprocessor import PostprocessingExecutor
+from .preprocessor import PreprocessingExecutor
+
+
+class ModelEvaluator:
+    def __init__(self, launcher, preprocessor, postprocessor, dataset, metric):
+        self.launcher = launcher
+        self.preprocessor = preprocessor
+        self.postprocessor = postprocessor
+        self.dataset = dataset
+        self.metric_executor = metric
+
+        self._annotations = []
+        self._predictions = []
+
+    @classmethod
+    def from_configs(cls, launcher_config, dataset_config):
+        dataset_name = dataset_config['name']
+        preprocessor = PreprocessingExecutor(dataset_config.get('preprocessing'), dataset_name)
+        dataset = Dataset(dataset_config, preprocessor)
+
+        launcher = create_launcher(launcher_config, dataset.metadata)
+        postprocessor = PostprocessingExecutor(dataset_config.get('postprocessing'), dataset_name, dataset.metadata)
+        metric_dispatcher = MetricsExecutor(dataset_config, dataset)
+
+        return cls(launcher, preprocessor, postprocessor, dataset, metric_dispatcher)
+
+    def process_dataset(self, stored_predictions, progress_reporter, *args, **kwargs):
+        if self._is_stored(stored_predictions) or isinstance(self.launcher, DummyLauncher):
+            self._annotations, self._predictions = self.load(stored_predictions, progress_reporter)
+            self._annotations, self._predictions = self.postprocessor.full_process(self._annotations, self._predictions)
+
+            self.metric_executor.update_metrics_on_batch(self._annotations, self._predictions)
+            return self._annotations, self._predictions
+
+        self.dataset.batch = self.launcher.batch
+        predictions_to_store = []
+        for batch_id, (batch_annotation, batch_input) in enumerate(self.dataset):
+            batch_identifiers = [annotation.identifier for annotation in batch_annotation]
+            batch_predictions = self.launcher.predict(batch_identifiers, batch_input, *args, **kwargs)
+
+            if stored_predictions:
+                predictions_to_store.extend(copy.deepcopy(batch_predictions))
+
+            annotations, predictions = self.postprocessor.process_batch(batch_annotation, batch_predictions)
+            if not self.postprocessor.has_dataset_processors:
+                self.metric_executor.update_metrics_on_batch(annotations, predictions)
+
+            self._annotations.extend(annotations)
+            self._predictions.extend(predictions)
+
+            if progress_reporter:
+                progress_reporter.update(batch_id, len(batch_predictions))
+
+        if progress_reporter:
+            progress_reporter.finish()
+
+        if stored_predictions:
+            self.store_predictions(stored_predictions, predictions_to_store)
+
+        if self.postprocessor.has_dataset_processors:
+            self.metric_executor.update_metrics_on_batch(self._annotations, self._predictions)
+
+        return self.postprocessor.process_dataset(self._annotations, self._predictions)
+
+    @staticmethod
+    def _is_stored(stored_predictions=None):
+        if not stored_predictions:
+            return False
+
+        try:
+            get_path(stored_predictions)
+            return True
+        except OSError:
+            return False
+
+    def compute_metrics(self, output_callback=None, ignore_results_formatting=False):
+        for result_presenter, evaluated_metric in self.metric_executor.iterate_metrics(
+                self._annotations, self._predictions):
+            result_presenter.write_result(evaluated_metric, output_callback, ignore_results_formatting)
+
+    def load(self, stored_predictions, progress_reporter):
+        self._annotations = self.dataset.annotation
+        launcher = self.launcher
+        if not isinstance(launcher, DummyLauncher):
+            launcher = DummyLauncher({
+                'framework': 'dummy',
+                'loader': PickleLoader.__provider__,
+                'data_path': stored_predictions
+            }, adapter=None)
+
+        predictions = launcher.predict([annotation.identifier for annotation in self._annotations])
+
+        if progress_reporter:
+            progress_reporter.finish(False)
+
+        return self._annotations, predictions
+
+    @staticmethod
+    def store_predictions(stored_predictions, predictions):
+        # since at the first time file does not exist and then created we can not use it as a pathlib.Path object
+        with open(stored_predictions, "wb") as content:
+            pickle.dump(predictions, content)
+            print_info("prediction objects are save to {}".format(stored_predictions))
+
+    def release(self):
+        self.launcher.release()
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/README.md b/tools/accuracy_checker/accuracy_checker/postprocessor/README.md
new file mode 100644
index 000000000..752276a7f
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/README.md
@@ -0,0 +1,40 @@
+# Postprocessors
+
+Postprocessor is function which processes prediction and/or annotation data after model infer and before metric calculation. For correct work postprocessors require specific representation format. 
+(e. g. clip boxes postprocessor expects detection annotation and detection prediction for processing). 
+
+In case when you use complicated representation located in representation container, you can add options `annotation_source` and `prediction_source` in configuration file, 
+if you want process only specific representations, another way postprocessor will be used for all suitable representations. `annotation_source` and `prediction_source` should contain 
+comma separated list of annotation identifiers and output layer names respectively.
+
+Every postprocessor has parameters available for configuration. 
+
+Accuracy Checker supports following set of postprocessors:
+
+* `cast_to_int` - casting detection bounding box coordinates given in floating point format to integer. Supported representations: `DetectionAnotation`, `DetectionPrediction`, `TextDetectionAnnotation`, `TextDetectionPrediction`.
+  * `round_policy` - method for rounding: `nearest`, `greater`, `lower`, `nearest_to_zero`.
+*  `clip_boxes` - clipping detection bounding box sizes. Supported representations: `DetectionAnotation`, `DetectionPrediction`.
+   * `dst_width` and `dst_height` - destination width and height for box clipping respectively. You can also use `size` instead in case when destination sizes are equal.
+   * `apply_to` - option which determines target boxes for processing (`annotation` for ground truth boxes and `prediction` for detection results, `all` for both).
+   * `bboxes_normalized` is flag which says that target bounding boxes are in normalized format.
+* `correct_yolo_v2_boxes` - resizing detection prediction bbox coordinates using specific for Yolo v2 approach. Supported representations: `DetectionAnotation`, `DetectionPrediction`.
+   * `dst_width` and `dst_height` - destination width and height respectively. You can also use `size` instead in case when destination sizes are equal.
+*  `encode_segmentation_mask` - encoding segmentation label image as segmentation mask. Supported representations: `SegmentationAnotation`, `SegmentationPrediction`.
+*  `resize_prediction_boxes` - resizing normalized detection prediction boxes according to image size. Supported representations: `DetectionAnotation`, `DetectionPrediction`.
+*  `resize_segmentation_mask` - resizing segmentation mask. Supported representations: `SegmentationAnotation`, `SegmentationPrediction`.
+    * `dst_width` and `dst_height` - destination width and height for box clipping respectively. You can also use `size` instead in case when destination sizes are equal. 
+       If any of these parameters are not specified, image size will be used as default.
+    * `apply_to` - determines target boxes for processing (`annotation` for ground truth boxes and `prediction` for detection results, `all` for both).
+*  `nms` - non-maximum suppression. Supported representations: `DetectionAnotation`, `DetectionPrediction`.
+    * `overlap` - overlap threshold for merging detections.
+* `filter` - filtering data using different parameters. Supported representations: `DetectionAnotation`, `DetectionPrediction`.
+    * `apply_to` - determines target boxes for processing (`annotation` for ground truth boxes and `prediction` for detection results, `all` for both).
+    * `remove_filtered` - removing filtered data. Annotations support ignoring filtered data without removing as default, in other cases filtered data will be removed automatically.
+    * Supported parameters for filtering: `labels`, `min_confidence`, `height_range`, `width_range`, `is_empty`, `min_visibility`, `aspect_ratio`, `area_ratio`, `area_range`.
+   Filtering by `height_range`, `width_range` are also available for `TextDetectionAnnotation`, `TextDetectionPrediction`, `area_range`  - for `PoseEstimationAnnotation`, `PoseEstimationPrediction` and `TextDetectionAnnotation`, `TextDetectionPrediction`.
+* `normalize_landmarks_points` - normalizing ground truth landmarks points. Supported representations: `FacialLandmarksAnnotation`, `FacialLandmarksPrediction`.
+    * `use_annotation_rect` - allows to use size of rectangle saved in annotation metadata for point scaling instead source image size.
+* `extend_segmentation_mask` - extending annotation segmentation mask to predicted mask size making border filled by specific value. Supported representations: `SegmentationAnotation`, `SegmentationPrediction`.
+  * `filling_label` - value for filling border (default 255).
+* `zoom_segmentation_mask` - zooming segmentation mask. Supported representations: `SegmentationAnotation`, `SegmentationPrediction`.
+  * `zoom` - size for zoom operation.
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/__init__.py b/tools/accuracy_checker/accuracy_checker/postprocessor/__init__.py
new file mode 100644
index 000000000..c3a93bd93
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/__init__.py
@@ -0,0 +1,69 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .postprocessing_executor import PostprocessingExecutor
+
+from .filter import (
+    FilterPostprocessor,
+
+    FilterByHeightRange,
+    FilterByLabels,
+    FilterByMinConfidence,
+    FilterEmpty,
+    FilterByVisibility,
+    FilterByAspectRatio
+)
+
+from .cast_to_int import CastToInt
+from .clip_boxes import ClipBoxes
+from .nms import NMS
+from .resize_prediction_boxes import ResizePredictionBoxes
+from .correct_yolo_v2_boxes import CorrectYoloV2Boxes
+from .resize_segmentation_mask import ResizeSegmentationMask
+from .encode_segmentation_mask import EncodeSegMask
+from .normalize_landmarks_points import NormalizeLandmarksPoints
+from .clip_points import ClipPoints
+from .extend_segmentation_mask import ExtendSegmentationMask
+from .zoom_segmentation_mask import ZoomSegMask
+from .crop_segmentation_mask import CropSegmentationMask
+from .clip_segmentation_mask import ClipSegmentationMask
+
+__all__ = [
+    'PostprocessingExecutor',
+
+    'FilterPostprocessor',
+    'FilterByHeightRange',
+    'FilterByLabels',
+    'FilterByMinConfidence',
+    'FilterEmpty',
+    'FilterByVisibility',
+    'FilterByAspectRatio',
+
+    'CastToInt',
+    'ClipBoxes',
+    'NMS',
+    'ResizePredictionBoxes',
+    'CorrectYoloV2Boxes',
+
+    'ResizeSegmentationMask',
+    'EncodeSegMask',
+    'ExtendSegmentationMask',
+    'ZoomSegMask',
+    'CropSegmentationMask',
+    'ClipSegmentationMask',
+
+    'NormalizeLandmarksPoints'
+]
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/cast_to_int.py b/tools/accuracy_checker/accuracy_checker/postprocessor/cast_to_int.py
new file mode 100644
index 000000000..cd6e29a10
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/cast_to_int.py
@@ -0,0 +1,71 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from functools import singledispatch
+from typing import Union
+import numpy as np
+from ..config import StringField
+from ..representation import DetectionAnnotation, DetectionPrediction, TextDetectionPrediction, TextDetectionAnnotation
+from .postprocessor import Postprocessor, BasePostprocessorConfig
+
+
+class CastToInt(Postprocessor):
+    __provider__ = 'cast_to_int'
+    annotation_types = (DetectionAnnotation, TextDetectionAnnotation)
+    prediction_types = (DetectionPrediction, TextDetectionPrediction)
+
+    round_policies_func = {
+        'nearest': np.rint,
+        'nearest_to_zero': np.trunc,
+        'lower': np.floor,
+        'greater': np.ceil
+    }
+
+    def validate_config(self):
+        class _CastToIntConfigValidator(BasePostprocessorConfig):
+            round_policy = StringField(optional=True, choices=self.round_policies_func.keys())
+
+        cast_to_int_config_validator = _CastToIntConfigValidator(
+            self.__provider__, on_extra_argument=_CastToIntConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        cast_to_int_config_validator.validate(self.config)
+
+    def configure(self):
+        self.round_func = self.round_policies_func[self.config.get('round_policy', 'nearest')]
+
+    def process_image(self, annotation, prediction):
+        @singledispatch
+        def cast(entry):
+            pass
+
+        @cast.register(Union[DetectionAnnotation, DetectionPrediction])
+        def _(entry):
+            entry.x_mins = self.round_func(entry.x_mins)
+            entry.x_maxs = self.round_func(entry.x_maxs)
+            entry.y_mins = self.round_func(entry.y_mins)
+            entry.y_maxs = self.round_func(entry.y_maxs)
+
+        @cast.register(Union[TextDetectionAnnotation, TextDetectionPrediction])
+        def _(entry):
+            entry.points = self.round_func(entry.points)
+
+
+        for annotation_ in annotation:
+            cast(annotation_)
+
+        for prediction_ in prediction:
+            cast(prediction_)
+
+        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/clip_boxes.py b/tools/accuracy_checker/accuracy_checker/postprocessor/clip_boxes.py
new file mode 100644
index 000000000..dd87f10e7
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/clip_boxes.py
@@ -0,0 +1,68 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..config import BoolField, NumberField
+from ..representation import DetectionPrediction, DetectionAnnotation
+from .postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
+
+
+class ClipBoxes(PostprocessorWithSpecificTargets):
+    __provider__ = 'clip_boxes'
+
+    annotation_types = (DetectionAnnotation, )
+    prediction_types = (DetectionPrediction, )
+
+    def validate_config(self):
+        class _ClipConfigValidator(PostprocessorWithTargetsConfigValidator):
+            dst_width = NumberField(floats=False, optional=True, min_value=1)
+            dst_height = NumberField(floats=False, optional=True, min_value=1)
+            size = NumberField(floats=False, optional=True, min_value=1)
+            boxes_normalized = BoolField(optional=True)
+
+        clip_config_validator = _ClipConfigValidator(
+            self.__provider__, on_extra_argument=_ClipConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        clip_config_validator.validate(self.config)
+
+    def configure(self):
+        size = self.config.get('size')
+        self.dst_height = size or self.config.get('dst_height')
+        self.dst_width = size or self.config.get('dst_width')
+
+        self.boxes_normalized = self.config.get('boxes_normalized', False)
+
+    def process_image(self, annotation, prediction):
+        target_height = self.dst_height or self.image_size[0]
+        target_width = self.dst_width or self.image_size[1]
+
+        max_width = target_width if not self.boxes_normalized else 1
+        max_height = target_height if not self.boxes_normalized else 1
+
+        for target in annotation:
+            self._clip_boxes(target, (0, max_width), (0, max_height))
+        for target in prediction:
+            self._clip_boxes(target, (0, max_width), (0, max_height))
+
+        return annotation, prediction
+
+    @staticmethod
+    def _clip_boxes(entry, width_range, height_range):
+        entry.x_mins = entry.x_mins.clip(width_range[0], width_range[1])
+        entry.x_maxs = entry.x_maxs.clip(width_range[0], width_range[1])
+        entry.y_mins = entry.y_mins.clip(height_range[0], height_range[1])
+        entry.y_maxs = entry.y_maxs.clip(height_range[0], height_range[1])
+
+        return entry
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/clip_points.py b/tools/accuracy_checker/accuracy_checker/postprocessor/clip_points.py
new file mode 100644
index 000000000..3ffd3a578
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/clip_points.py
@@ -0,0 +1,68 @@
+""""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from ..config import BoolField, NumberField
+from ..representation import TextDetectionAnnotation, TextDetectionPrediction
+from ..utils import get_size_from_config
+from .postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
+
+
+class ClipPointsConfigValidator(PostprocessorWithTargetsConfigValidator):
+    dst_width = NumberField(floats=False, optional=True, min_value=1)
+    dst_height = NumberField(floats=False, optional=True, min_value=1)
+    size = NumberField(floats=False, optional=True, min_value=1)
+    points_normalized = BoolField(optional=True)
+
+
+class ClipPoints(PostprocessorWithSpecificTargets):
+    __provider__ = 'clip_points'
+
+    annotation_types = (TextDetectionAnnotation, )
+    prediction_types = (TextDetectionPrediction, )
+
+    def validate_config(self):
+        clip_points_config_validator = ClipPointsConfigValidator(
+            self.__provider__, on_extra_argument=ClipPointsConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        clip_points_config_validator.validate(self.config)
+
+    def configure(self):
+        self.dst_height, self.dst_width = get_size_from_config(self.config, allow_none=True)
+        self.points_normalized = self.config.get('points_normalized', False)
+
+    def process_image(self, annotation, prediction):
+        target_width = self.dst_width or self.image_size[1] - 1
+        target_height = self.dst_height or self.image_size[0] - 1
+
+        max_width = target_width if not self.points_normalized else 1
+        max_height = target_height if not self.points_normalized else 1
+        for target in annotation:
+            points = []
+            for polygon in target.points:
+                polygon[:, 0] = np.clip(polygon[:, 0], 0, max_width)
+                polygon[:, 1] = np.clip(polygon[:, 1], 0, max_height)
+                points.append(polygon)
+            target.points = points
+        for target in prediction:
+            points = []
+            for polygon in target.points:
+                polygon[:, 0] = np.clip(polygon[:, 0], 0, max_width)
+                polygon[:, 1] = np.clip(polygon[:, 1], 0, max_height)
+                points.append(polygon)
+            target.points = points
+
+        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/clip_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/clip_segmentation_mask.py
new file mode 100644
index 000000000..7a014641f
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/clip_segmentation_mask.py
@@ -0,0 +1,48 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import numpy as np
+from .postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
+from ..representation import BrainTumorSegmentationAnnotation, BrainTumorSegmentationPrediction
+from ..config import NumberField, ConfigError
+
+
+class ClipSegmentationMask(PostprocessorWithSpecificTargets):
+    __provider__ = 'clip_segmentation_mask'
+
+    annotation_types = (BrainTumorSegmentationAnnotation,)
+    prediction_types = (BrainTumorSegmentationPrediction,)
+
+    def validate_config(self):
+        class _ConfigValidator(PostprocessorWithTargetsConfigValidator):
+            min_value = NumberField(floats=False, min_value=0, optional=True)
+            max_value = NumberField(floats=False)
+
+        _ConfigValidator(self.name, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def configure(self):
+        self.min_value = self.config.get('min_value', 0)
+        self.max_value = self.config['max_value']
+        if self.max_value < self.min_value:
+            raise ConfigError('max_value should be greater than min_value')
+
+    def process_image(self, annotation, prediction):
+        for target in annotation:
+            target.mask = np.clip(target.mask, a_min=self.min_value, a_max=self.max_value)
+
+        for target in prediction:
+            target.mask = np.clip(target.mask, a_min=self.min_value, a_max=self.max_value)
+
+        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/correct_yolo_v2_boxes.py b/tools/accuracy_checker/accuracy_checker/postprocessor/correct_yolo_v2_boxes.py
new file mode 100644
index 000000000..b37be3791
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/correct_yolo_v2_boxes.py
@@ -0,0 +1,75 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..config import NumberField
+from .postprocessor import BasePostprocessorConfig, Postprocessor
+from ..representation import DetectionPrediction, DetectionAnnotation
+from ..utils import get_size_from_config
+
+
+class CorrectYoloV2Boxes(Postprocessor):
+    __provider__ = 'correct_yolo_v2_boxes'
+
+    prediction_types = (DetectionPrediction, )
+    annotation_types = (DetectionAnnotation, )
+
+    def validate_config(self):
+        class _CorrectYoloV2BoxesConfigValidator(BasePostprocessorConfig):
+            dst_width = NumberField(floats=False, optional=True, min_value=1)
+            dst_height = NumberField(floats=False, optional=True, min_value=1)
+            size = NumberField(floats=False, optional=True, min_value=1)
+
+        clip_config_validator = _CorrectYoloV2BoxesConfigValidator(
+            self.__provider__, on_extra_argument=_CorrectYoloV2BoxesConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        clip_config_validator.validate(self.config)
+
+    def configure(self):
+        self.dst_height, self.dst_width = get_size_from_config(self.config)
+
+    def process_image(self, annotation, prediction):
+        dst_h, dst_w = self.dst_height, self.dst_width
+        # postprocessor always expects lists of annotations and predictions for the same image
+        # we do not need to get image sizes in cycle, because they are equal
+        img_h, img_w, _ = self.image_size
+
+        if (dst_w / img_w) < (dst_h / img_h):
+            new_w = dst_w
+            new_h = (img_h * dst_w) // img_w
+        else:
+            new_h = dst_h
+            new_w = (img_w * dst_h) // img_h
+
+        for prediction_ in prediction:
+            coordinates = zip(prediction_.x_mins, prediction_.y_mins, prediction_.x_maxs, prediction_.y_maxs)
+            for i, (x0, y0, x1, y1) in enumerate(coordinates):
+                box = [(x0 + x1) / 2.0, (y0 + y1) / 2.0, x1 - x0, y1 - y0]
+                box[0] = (box[0] - (dst_w - new_w) / (2.0 * dst_w)) * (dst_w / new_w)
+                box[1] = (box[1] - (dst_h - new_h) / (2.0 * dst_h)) * (dst_h / new_h)
+                box[2] *= dst_w / new_w
+                box[3] *= dst_h / new_h
+
+                box[0] *= img_w
+                box[1] *= img_h
+                box[2] *= img_w
+                box[3] *= img_h
+
+                prediction_.x_mins[i] = box[0] - box[2] / 2.0 + 1
+                prediction_.y_mins[i] = box[1] - box[3] / 2.0 + 1
+                prediction_.x_maxs[i] = box[0] + box[2] / 2.0 + 1
+                prediction_.y_maxs[i] = box[1] + box[3] / 2.0 + 1
+
+        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/crop_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/crop_segmentation_mask.py
new file mode 100644
index 000000000..dd814feb2
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/crop_segmentation_mask.py
@@ -0,0 +1,49 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
+from ..representation import BrainTumorSegmentationAnnotation, BrainTumorSegmentationPrediction
+from ..config import NumberField
+from ..preprocessor import Crop3D
+from ..utils import get_size_3d_from_config
+
+
+class CropSegmentationMask(PostprocessorWithSpecificTargets):
+    __provider__ = 'crop_segmentation_mask'
+
+    annotation_types = (BrainTumorSegmentationAnnotation,)
+    prediction_types = (BrainTumorSegmentationPrediction,)
+
+    def validate_config(self):
+        class _ConfigValidator(PostprocessorWithTargetsConfigValidator):
+            size = NumberField(floats=False, min_value=1)
+            dst_width = NumberField(floats=False, optional=True, min_value=1)
+            dst_height = NumberField(floats=False, optional=True, min_value=1)
+            dst_volume = NumberField(floats=False, optional=True, min_value=1)
+
+        _ConfigValidator(self.name, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def configure(self):
+        self.dst_height, self.dst_width, self.dst_volume = get_size_3d_from_config(self.config)
+
+    def process_image(self, annotation, prediction):
+        for target in annotation:
+            target.mask = Crop3D.crop_center(target.mask, self.dst_height, self.dst_width, self.dst_volume)
+
+        for target in prediction:
+            target.mask = Crop3D.crop_center(target.mask, self.dst_height, self.dst_width, self.dst_volume)
+
+        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/encode_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/encode_segmentation_mask.py
new file mode 100644
index 000000000..736eb0e12
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/encode_segmentation_mask.py
@@ -0,0 +1,46 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from .postprocessor import Postprocessor
+from ..representation import SegmentationAnnotation, SegmentationPrediction
+
+
+class EncodeSegMask(Postprocessor):
+    """
+    Encode segmentation label image as segmentation mask.
+    """
+
+    __provider__ = 'encode_segmentation_mask'
+
+    annotation_types = (SegmentationAnnotation, )
+    prediction_types = (SegmentationPrediction, )
+
+    def process_image(self, annotation, prediction):
+        segmentation_colors = self.meta.get("segmentation_colors")
+
+        if not segmentation_colors:
+            raise ValueError("No 'segmentation_colors' in dataset metadata.")
+
+        for annotation_ in annotation:
+            mask = annotation_.mask.astype(int)
+            encoded_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.int16)
+            for label, color in enumerate(segmentation_colors):
+                encoded_mask[np.where(np.all(mask == color, axis=-1))[:2]] = label
+                annotation_.mask = encoded_mask
+
+        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/extend_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/extend_segmentation_mask.py
new file mode 100644
index 000000000..abd83e0bb
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/extend_segmentation_mask.py
@@ -0,0 +1,64 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import math
+import cv2
+
+from .postprocessor import Postprocessor, BasePostprocessorConfig
+from ..representation import SegmentationAnnotation, SegmentationPrediction
+from ..config import NumberField, ConfigError
+
+
+class ExtendSegmentationMask(Postprocessor):
+    """
+    Extend annotation segmentation mask to prediction size filling border with specific label.
+    """
+
+    __provider__ = 'extend_segmentation_mask'
+
+    annotation_types = (SegmentationAnnotation, )
+    prediction_types = (SegmentationPrediction, )
+
+    def validate_config(self):
+        class _ExtendSegmentationMaskConfigValidator(BasePostprocessorConfig):
+            filling_label = NumberField(optional=True, floats=False)
+
+        extend_mask_config_validator = _ExtendSegmentationMaskConfigValidator(
+            self.__provider__, on_extra_argument=_ExtendSegmentationMaskConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        extend_mask_config_validator.validate(self.config)
+
+    def configure(self):
+        self.filling_label = self.config.get('filling_label', 255)
+
+    def process_image(self, annotation, prediction):
+        for annotation_, prediction_ in zip(annotation, prediction):
+            annotation_mask = annotation_.mask
+            dst_height, dst_width = prediction_.mask.shape[-2:]
+            height, width = annotation_mask.shape[-2:]
+            if dst_width < width or dst_height < height:
+                raise ConfigError('size for extending should be not less current mask size')
+            pad = []
+            pad.append(int(math.floor((dst_height - height) / 2.0)))
+            pad.append(int(math.floor((dst_width - width) / 2.0)))
+            pad.append(int(dst_height - height - pad[0]))
+            pad.append(int(dst_width - width - pad[1]))
+
+            extended_mask = cv2.copyMakeBorder(
+                annotation_mask, pad[0], pad[2], pad[1], pad[3], cv2.BORDER_CONSTANT, value=self.filling_label
+            )
+            annotation_.mask = extended_mask
+
+        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/filter.py b/tools/accuracy_checker/accuracy_checker/postprocessor/filter.py
new file mode 100644
index 000000000..440aec02b
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/filter.py
@@ -0,0 +1,319 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from functools import singledispatch
+from typing import Union
+import numpy as np
+
+from ..config import BaseField, BoolField
+from ..dependency import ClassProvider
+from ..postprocessor.postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
+from ..representation import (DetectionAnnotation, DetectionPrediction, TextDetectionAnnotation,
+                              TextDetectionPrediction, PoseEstimationPrediction, PoseEstimationAnnotation)
+from ..utils import in_interval, polygon_from_points, convert_to_range
+
+
+class FilterConfig(PostprocessorWithTargetsConfigValidator):
+    remove_filtered = BoolField(optional=True)
+
+    def __init__(self, config_uri, **kwargs):
+        super().__init__(config_uri, **kwargs)
+        for functor in BaseFilter.providers:
+            self.fields[functor] = BaseField(optional=True)
+
+
+class FilterPostprocessor(PostprocessorWithSpecificTargets):
+    __provider__ = 'filter'
+
+    annotation_types = (DetectionAnnotation, TextDetectionAnnotation)
+    prediction_types = (DetectionPrediction, TextDetectionPrediction)
+
+    def __init__(self, *args, **kwargs):
+        self._filters = []
+        self.remove_filtered = False
+        super().__init__(*args, **kwargs)
+
+    def validate_config(self):
+        filter_config = FilterConfig(self.__provider__, on_extra_argument=FilterConfig.ERROR_ON_EXTRA_ARGUMENT)
+        filter_config.validate(self.config)
+
+    def configure(self):
+        config = self.config.copy()
+        config.pop('type')
+        self.remove_filtered = config.pop('remove_filtered', False)
+        config.pop('annotation_source', None)
+        config.pop('prediction_source', None)
+        config.pop('apply_to', None)
+
+        for key, value in config.items():
+            self._filters.append(BaseFilter.provide(key, value))
+
+    def process_image(self, annotation, prediction):
+        for functor in self._filters:
+            for target in annotation:
+                self._filter_entry_by(target, functor)
+
+            for target in prediction:
+                self._filter_entry_by(target, functor)
+
+        return annotation, prediction
+
+    def _filter_entry_by(self, entry, functor):
+        ignored_key = 'difficult_boxes'
+
+        if not self.remove_filtered and isinstance(entry, (DetectionAnnotation, DetectionPrediction,
+                                                           TextDetectionAnnotation, TextDetectionPrediction,
+                                                           PoseEstimationAnnotation, PoseEstimationPrediction)):
+            ignored = entry.metadata.setdefault(ignored_key, [])
+            ignored.extend(functor(entry))
+        else:
+            entry.remove(functor(entry))
+
+        return entry
+
+
+class BaseFilter(ClassProvider):
+    __provider_type__ = 'filter'
+
+    def __init__(self, filter_arg):
+        self.filter_arg = filter_arg
+
+    def __call__(self, entry):
+        return self.apply_filter(entry, self.filter_arg)
+
+    def apply_filter(self, entry, filter_arg):
+        raise NotImplementedError
+
+
+class FilterByLabels(BaseFilter):
+    __provider__ = 'labels'
+
+    def apply_filter(self, entry, labels):
+        filtered = []
+        for index, label in enumerate(entry.labels):
+            if label in labels:
+                filtered.append(index)
+
+        return filtered
+
+
+class FilterByMinConfidence(BaseFilter):
+    __provider__ = 'min_confidence'
+
+    def apply_filter(self, entry, min_confidence):
+        filtered = []
+
+        if isinstance(entry, DetectionAnnotation):
+            return filtered
+
+        for index, score in enumerate(entry.scores):
+            if score < min_confidence:
+                filtered.append(index)
+
+        return filtered
+
+
+class FilterByHeightRange(BaseFilter):
+    __provider__ = 'height_range'
+
+    annotation_types = (DetectionAnnotation, TextDetectionAnnotation)
+    prediction_types = (DetectionPrediction, TextDetectionPrediction)
+
+    def apply_filter(self, entry, height_range):
+        @singledispatch
+        def filtering(entry_value, height_range_):
+            return []
+
+        @filtering.register(Union[DetectionAnnotation, DetectionPrediction])
+        def _(entry_value, height_range_):
+            filtered = []
+            for index, (y_min, y_max) in enumerate(zip(entry_value.y_mins, entry_value.y_maxs)):
+                height = y_max - y_min
+                if not in_interval(height, height_range_):
+                    filtered.append(index)
+
+            return filtered
+
+        @filtering.register(Union[TextDetectionAnnotation, TextDetectionPrediction])
+        def _(entry_values, height_range_):
+            filtered = []
+            for index, polygon_points in enumerate(entry_values.points):
+                left_bottom_point, left_top_point, right_top_point, right_bottom_point = polygon_points
+                left_side_height = np.linalg.norm(left_bottom_point - left_top_point)
+                right_side_height = np.linalg.norm(right_bottom_point - right_top_point)
+                if not in_interval(np.mean([left_side_height, right_side_height]), height_range_):
+                    filtered.append(index)
+
+            return filtered
+
+        return filtering(entry, convert_to_range(height_range))
+
+
+class FilterByWidthRange(BaseFilter):
+    __provider__ = 'width_range'
+
+    annotation_types = (DetectionAnnotation, TextDetectionAnnotation)
+    prediction_types = (DetectionPrediction, TextDetectionPrediction)
+
+    def apply_filter(self, entry, width_range):
+        @singledispatch
+        def filtering(entry_value, width_range_):
+            return []
+
+        @filtering.register(Union[DetectionAnnotation, DetectionPrediction])
+        def _(entry_value, width_range_):
+            filtered = []
+            for index, (x_min, x_max) in enumerate(zip(entry_value.x_mins, entry_value.x_maxs)):
+                width = x_max - x_min
+                if not in_interval(width, width_range_):
+                    filtered.append(index)
+
+            return filtered
+
+        @filtering.register(Union[TextDetectionAnnotation, TextDetectionPrediction])
+        def _(entry_values, width_range_):
+            filtered = []
+            for index, polygon_points in enumerate(entry_values.points):
+                left_bottom_point, left_top_point, right_top_point, right_bottom_point = polygon_points
+                top_width = np.linalg.norm(right_top_point - left_top_point)
+                bottom_width = np.linalg.norm(right_bottom_point - left_bottom_point)
+                if not in_interval(top_width, width_range_) or not in_interval(bottom_width, width_range_):
+                    filtered.append(index)
+
+            return filtered
+
+        return filtering(entry, convert_to_range(width_range))
+
+
+class FilterByAreaRange(BaseFilter):
+    __provider__ = 'area_range'
+
+    annotation_types = (TextDetectionAnnotation, PoseEstimationAnnotation)
+    prediction_types = (TextDetectionPrediction, )
+
+    def apply_filter(self, entry, area_range):
+        area_range = convert_to_range(area_range)
+
+        @singledispatch
+        def filtering(entry, area_range):
+            return []
+
+        @filtering.register
+        def _(entry: Union[PoseEstimationAnnotation, PoseEstimationPrediction], area_range):
+            filtered = []
+            areas = entry.areas
+            for area_id, area in enumerate(areas):
+                if not in_interval(area, area_range):
+                    filtered.append(area_id)
+            return filtered
+
+        @filtering.register
+        def _(entry: Union[TextDetectionAnnotation, TextDetectionPrediction]):
+            filtered = []
+            for index, polygon_points in enumerate(entry.points):
+                if not in_interval(polygon_from_points(polygon_points).area, area_range):
+                    filtered.append(index)
+            return filtered
+
+        return filtering(entry, area_range)
+
+
+class FilterEmpty(BaseFilter):
+    __provider__ = 'is_empty'
+
+    def apply_filter(self, entry: DetectionAnnotation, is_empty):
+        return np.where(np.bitwise_or(entry.x_maxs - entry.x_mins <= 0, entry.y_maxs - entry.y_mins <= 0))[0]
+
+
+class FilterByVisibility(BaseFilter):
+    __provider__ = 'min_visibility'
+
+    _VISIBILITY_LEVELS = {
+        'heavy occluded': 0,
+        'partially occluded': 1,
+        'visible': 2
+    }
+
+    def apply_filter(self, entry, min_visibility):
+        filtered = []
+        min_visibility_level = self.visibility_level(min_visibility)
+        for index, visibility in enumerate(entry.metadata.get('visibilities', [])):
+            if self.visibility_level(visibility) < min_visibility_level:
+                filtered.append(index)
+
+        return filtered
+
+    def visibility_level(self, visibility):
+        level = self._VISIBILITY_LEVELS.get(visibility)
+        if level is None:
+            message = 'Unknown visibility level "{}". Supported only "{}"'
+            raise ValueError(message.format(visibility, ','.join(self._VISIBILITY_LEVELS.keys())))
+
+        return level
+
+
+class FilterByAspectRatio(BaseFilter):
+    __provider__ = 'aspect_ratio'
+
+    def apply_filter(self, entry, aspect_ratio):
+        aspect_ratio = convert_to_range(aspect_ratio)
+
+        filtered = []
+        coordinates = zip(entry.x_mins, entry.y_mins, entry.x_maxs, entry.y_maxs)
+        for index, (x_min, y_min, x_max, y_max) in enumerate(coordinates):
+            ratio = (y_max - y_min) / np.maximum(x_max - x_min, np.finfo(np.float64).eps)
+            if not in_interval(ratio, aspect_ratio):
+                filtered.append(index)
+
+        return filtered
+
+
+class FilterByAreaRatio(BaseFilter):
+    __provider__ = 'area_ratio'
+
+    def apply_filter(self, entry, area_ratio):
+        area_ratio = convert_to_range(area_ratio)
+
+        filtered = []
+        if not isinstance(entry, DetectionAnnotation):
+            return filtered
+
+        image_size = entry.metadata.get('image_size')
+        if not image_size:
+            return filtered
+        image_size = image_size[0]
+
+        image_area = image_size[0] * image_size[1]
+
+        occluded_indices = entry.metadata.get('is_occluded', [])
+        coordinates = zip(entry.x_mins, entry.y_mins, entry.x_maxs, entry.y_maxs)
+        for index, (x_min, y_min, x_max, y_max) in enumerate(coordinates):
+            width, height = x_max - x_min, y_max - y_min
+            area = np.sqrt(float(width * height) / np.maximum(image_area, np.finfo(np.float64).eps))
+            if not in_interval(area, area_ratio) or index in occluded_indices:
+                filtered.append(index)
+
+        return filtered
+
+
+class FilterInvalidBoxes(BaseFilter):
+    __provider__ = 'invalid_boxes'
+
+    def apply_filter(self, entry, invalid_boxes):
+        infinite_mask_x = np.logical_or(~np.isfinite(entry.x_mins), ~np.isfinite(entry.x_maxs))
+        infinite_mask_y = np.logical_or(~np.isfinite(entry.y_mins), ~np.isfinite(entry.y_maxs))
+        infinite_mask = np.logical_or(infinite_mask_x, infinite_mask_y)
+
+        return np.argwhere(infinite_mask).reshape(-1).tolist()
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/nms.py b/tools/accuracy_checker/accuracy_checker/postprocessor/nms.py
new file mode 100644
index 000000000..8bdbf1acb
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/nms.py
@@ -0,0 +1,80 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..config import NumberField
+from .postprocessor import BasePostprocessorConfig, Postprocessor
+from ..representation import DetectionPrediction, DetectionAnnotation
+
+
+class NMS(Postprocessor):
+    __provider__ = 'nms'
+
+    prediction_types = (DetectionPrediction, )
+    annotation_types = (DetectionAnnotation, )
+
+    def validate_config(self):
+        class _NMSConfigValidator(BasePostprocessorConfig):
+            overlap = NumberField(min_value=0, max_value=1, optional=True)
+
+        nms_config_validator = _NMSConfigValidator(
+            self.__provider__, on_extra_argument=_NMSConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        nms_config_validator.validate(self.config)
+
+    def configure(self):
+        self.overlap = self.config.get('overlap', 0.5)
+
+    def process_image(self, annotations, predictions):
+        for prediction in predictions:
+            keep = self._nms(
+                prediction.x_mins, prediction.y_mins, prediction.x_maxs, prediction.y_maxs, prediction.scores,
+                self.overlap
+            )
+            prediction.remove([box for box in range(len(prediction.x_mins)) if box not in keep])
+
+        return annotations, predictions
+
+    @staticmethod
+    def _nms(x1, y1, x2, y2, scores, thresh):
+        """
+        Pure Python NMS baseline.
+        """
+
+        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+        order = scores.argsort()[::-1]
+
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+
+            xx1 = np.maximum(x1[i], x1[order[1:]])
+            yy1 = np.maximum(y1[i], y1[order[1:]])
+            xx2 = np.minimum(x2[i], x2[order[1:]])
+            yy2 = np.minimum(y2[i], y2[order[1:]])
+
+            w = np.maximum(0.0, xx2 - xx1 + 1)
+            h = np.maximum(0.0, yy2 - yy1 + 1)
+            intersection = w * h
+
+            union = (areas[i] + areas[order[1:]] - intersection)
+            overlap = np.divide(intersection, union, out=np.zeros_like(intersection, dtype=float), where=union != 0)
+
+            order = order[np.where(overlap <= thresh)[0] + 1]
+
+        return keep
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/normalize_landmarks_points.py b/tools/accuracy_checker/accuracy_checker/postprocessor/normalize_landmarks_points.py
new file mode 100644
index 000000000..7f3fbbcc6
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/normalize_landmarks_points.py
@@ -0,0 +1,59 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..config import BoolField
+from ..postprocessor.postprocessor import Postprocessor, BasePostprocessorConfig
+from ..representation import FacialLandmarksAnnotation, FacialLandmarksPrediction
+
+
+class NormalizeLandmarksPoints(Postprocessor):
+    __provider__ = 'normalize_landmarks_points'
+
+    annotation_types = (FacialLandmarksAnnotation, )
+    prediction_types = (FacialLandmarksPrediction, )
+
+    def validate_config(self):
+        class _ConfigValidator(BasePostprocessorConfig):
+            use_annotation_rect = BoolField(optional=True)
+
+        config_validator = _ConfigValidator(
+            self.__provider__, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        config_validator.validate(self.config)
+
+    def configure(self):
+        self.use_annotation_rect = self.config.get('use_annotation_rect', False)
+
+    def process_image(self, annotation, prediction):
+        for target in annotation:
+            height, width, _ = self.image_size
+            x_start, y_start = 0, 0
+            if self.use_annotation_rect:
+                resized_box = annotation[0].metadata.get('rect')
+                x_start, y_start, x_max, y_max = resized_box
+                width = x_max - x_start
+                height = y_max - y_start
+
+            target.x_values = (
+                (np.array(target.x_values, dtype=float) - x_start) / np.maximum(width, np.finfo(np.float64).eps)
+            )
+            target.y_values = (
+                (np.array(target.y_values, dtype=float) - y_start) / np.maximum(height, np.finfo(np.float64).eps)
+            )
+
+        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/postprocessing_executor.py b/tools/accuracy_checker/accuracy_checker/postprocessor/postprocessing_executor.py
new file mode 100644
index 000000000..875a54608
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/postprocessing_executor.py
@@ -0,0 +1,79 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..config import ConfigValidator, StringField
+from ..utils import overrides, zipped_transform
+from .postprocessor import Postprocessor
+
+
+class PostprocessingExecutor:
+    def __init__(self, processors=None, dataset_name='custom', dataset_meta=None, state=None):
+        self._processors = []
+        self._image_processors = []
+        self._dataset_processors = []
+        self.dataset_meta = dataset_meta
+
+        self.state = state or {}
+
+        if not processors:
+            return
+
+        for config in processors:
+            postprocessor_config = PostprocessorConfig(
+                "{}.postprocessing".format(dataset_name),
+                on_extra_argument=ConfigValidator.IGNORE_ON_EXTRA_ARGUMENT
+            )
+            postprocessor_config.validate(config)
+            postprocessor = Postprocessor.provide(config['type'], config, config['type'], self.dataset_meta, state)
+            self._processors.append(postprocessor)
+
+        allow_image_postprocessor = True
+        for processor in self._processors:
+            if overrides(processor, 'process_all', Postprocessor):
+                allow_image_postprocessor = False
+                self._dataset_processors.append(processor)
+            else:
+                if allow_image_postprocessor:
+                    self._image_processors.append(processor)
+                else:
+                    self._dataset_processors.append(processor)
+
+    def process_dataset(self, annotations, predictions):
+        for method in self._dataset_processors:
+            annotations, predictions = method.process_all(annotations, predictions)
+
+        return annotations, predictions
+
+    def process_image(self, annotation, prediction):
+        for method in self._image_processors:
+            annotation_entries, prediction_entries = method.get_entries(annotation, prediction)
+            method.process(annotation_entries, prediction_entries)
+
+        return annotation, prediction
+
+    def process_batch(self, annotations, predictions):
+        return zipped_transform(self.process_image, annotations, predictions)
+
+    def full_process(self, annotations, predictions):
+        return self.process_dataset(*self.process_batch(annotations, predictions))
+
+    @property
+    def has_dataset_processors(self):
+        return len(self._dataset_processors) != 0
+
+
+class PostprocessorConfig(ConfigValidator):
+    type = StringField(choices=Postprocessor.providers)
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/postprocessor.py b/tools/accuracy_checker/accuracy_checker/postprocessor/postprocessor.py
new file mode 100644
index 000000000..de0c06617
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/postprocessor.py
@@ -0,0 +1,188 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import warnings
+from enum import Enum
+from ..representation import ContainerRepresentation
+from ..config import ConfigValidator, StringField, ConfigError, BaseField
+from ..dependency import ClassProvider
+from ..utils import (
+    zipped_transform,
+    string_to_list,
+    check_representation_type,
+    get_supported_representations,
+    enum_values
+)
+
+
+class BasePostprocessorConfig(ConfigValidator):
+    type = StringField()
+    annotation_source = BaseField(optional=True)
+    prediction_source = BaseField(optional=True)
+
+
+class Postprocessor(ClassProvider):
+    __provider_type__ = 'postprocessor'
+
+    annotation_types = ()
+    prediction_types = ()
+
+    def __init__(self, config, name=None, meta=None, state=None):
+        self.config = config
+        self.name = name
+        self.meta = meta
+        self.state = state
+        self.image_size = None
+
+        self.annotation_source = self.config.get('annotation_source')
+        if self.annotation_source and not isinstance(self.annotation_source, list):
+            self.annotation_source = string_to_list(self.annotation_source)
+
+        self.prediction_source = self.config.get('prediction_source')
+        if self.prediction_source and not isinstance(self.prediction_source, list):
+            self.prediction_source = string_to_list(self.prediction_source)
+
+        self.validate_config()
+        self.setup()
+
+    def __call__(self, *args, **kwargs):
+        return self.process_all(*args, **kwargs)
+
+    def setup(self):
+        self.configure()
+
+    def process_image(self, annotation, prediction):
+        raise NotImplementedError
+
+    def process(self, annotation, prediction):
+        image_size = annotation[0].metadata.get('image_size') if not None in annotation else None
+        self.image_size = None
+        if image_size:
+            self.image_size = image_size[0]
+        self.process_image(annotation, prediction)
+
+        return annotation, prediction
+
+    def process_all(self, annotations, predictions):
+        zipped_transform(self.process, zipped_transform(self.get_entries, annotations, predictions))
+        return annotations, predictions
+
+    def configure(self):
+        pass
+
+    def validate_config(self):
+        BasePostprocessorConfig(
+            self.name, on_extra_argument=BasePostprocessorConfig.ERROR_ON_EXTRA_ARGUMENT
+        ).validate(self.config)
+
+    def get_entries(self, annotation, prediction):
+        message_not_found = '{}: {} is not found in container'
+        message_incorrect_type = "Incorrect type of {}. Postprocessor {} can work only with {}"
+
+        def resolve_container(container, supported_types, entry_name, sources=None):
+            if not isinstance(container, ContainerRepresentation):
+                if sources:
+                    message = 'Warning: {}_source can be applied only to container. Default value will be used'
+                    warnings.warn(message.format(entry_name))
+
+                return [container]
+
+            if not sources:
+                return get_supported_representations(container.values(), supported_types)
+
+            entries = []
+            for source in sources:
+                representation = container.get(source)
+                if not representation:
+                    raise ConfigError(message_not_found.format(entry_name, source))
+
+                if supported_types and not check_representation_type(representation, supported_types):
+                    raise TypeError(message_incorrect_type.format(entry_name, self.name, ','.join(supported_types)))
+
+                entries.append(representation)
+
+            return entries
+
+        annotation_entries = resolve_container(annotation, self.annotation_types, 'annotation', self.annotation_source)
+        prediction_entries = resolve_container(prediction, self.prediction_types, 'prediction', self.prediction_source)
+
+        return annotation_entries, prediction_entries
+
+
+class ApplyToOption(Enum):
+    ANNOTATION = 'annotation'
+    PREDICTION = 'prediction'
+    ALL = 'all'
+
+
+class PostprocessorWithTargetsConfigValidator(BasePostprocessorConfig):
+    apply_to = StringField(optional=True, choices=enum_values(ApplyToOption))
+
+
+class PostprocessorWithSpecificTargets(Postprocessor):
+    def validate_config(self):
+        _config_validator = PostprocessorWithTargetsConfigValidator(
+            self.__provider__, on_extra_argument=PostprocessorWithTargetsConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        _config_validator.validate(self.config)
+
+    def setup(self):
+        apply_to = self.config.get('apply_to')
+        self.apply_to = ApplyToOption(apply_to) if apply_to else None
+
+        if (self.annotation_source or self.prediction_source) and self.apply_to:
+            raise ConfigError("apply_to and sources both provided. You need specify only one from them")
+
+        if not self.annotation_source and not self.prediction_source and not self.apply_to:
+            raise ConfigError("apply_to or annotation_source or prediction_source required for {}".format(self.name))
+
+        self.configure()
+
+    def process(self, annotation, prediction):
+        image_size = annotation[0].metadata.get('image_size') if not None in annotation else None
+        self.image_size = None
+        if image_size:
+            self.image_size = image_size[0]
+        target_annotations, target_predictions = None, None
+        if self.annotation_source or self.prediction_source:
+            target_annotations, target_predictions = self._choose_targets_using_sources(annotation, prediction)
+
+        if self.apply_to:
+            target_annotations, target_predictions = self._choose_targets_using_apply_to(annotation, prediction)
+
+        if not target_annotations and not target_predictions:
+            raise ValueError("Suitable targets for {} not found".format(self.name))
+
+        self.process_image(target_annotations, target_predictions)
+        return annotation, prediction
+
+    def _choose_targets_using_sources(self, annotations, predictions):
+        target_annotations = annotations if self.annotation_source else []
+        target_predictions = predictions if self.prediction_source else []
+
+        return target_annotations, target_predictions
+
+    def _choose_targets_using_apply_to(self, annotations, predictions):
+        targets_specification = {
+            ApplyToOption.ANNOTATION: (annotations, []),
+            ApplyToOption.PREDICTION: ([], predictions),
+            ApplyToOption.ALL: (annotations, predictions)
+        }
+
+        return targets_specification[self.apply_to]
+
+    def process_image(self, annotation, prediction):
+        raise NotImplementedError
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/resize_prediction_boxes.py b/tools/accuracy_checker/accuracy_checker/postprocessor/resize_prediction_boxes.py
new file mode 100644
index 000000000..2ce7b85aa
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/resize_prediction_boxes.py
@@ -0,0 +1,40 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..representation import DetectionPrediction, DetectionAnnotation
+from ..postprocessor.postprocessor import Postprocessor
+
+
+class ResizePredictionBoxes(Postprocessor):
+    """
+    Resize normalized predicted bounding boxes coordinates (i.e. from [0, 1] range) to input image shape.
+    """
+
+    __provider__ = 'resize_prediction_boxes'
+
+    prediction_types = (DetectionPrediction, )
+    annotation_types = (DetectionAnnotation, )
+
+    def process_image(self, annotations, predictions):
+        h, w, _ = self.image_size
+
+        for prediction in predictions:
+            prediction.x_mins *= w
+            prediction.x_maxs *= w
+            prediction.y_mins *= h
+            prediction.y_maxs *= h
+
+        return annotations, predictions
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/resize_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/resize_segmentation_mask.py
new file mode 100644
index 000000000..6c6b6dde1
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/resize_segmentation_mask.py
@@ -0,0 +1,73 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from functools import singledispatch
+import scipy.misc
+import numpy as np
+
+from ..config import NumberField
+from ..utils import get_size_from_config
+from .postprocessor import PostprocessorWithSpecificTargets, PostprocessorWithTargetsConfigValidator
+from ..representation import SegmentationPrediction, SegmentationAnnotation
+
+
+class ResizeSegmentationMask(PostprocessorWithSpecificTargets):
+    __provider__ = 'resize_segmentation_mask'
+
+    annotation_types = (SegmentationAnnotation, )
+    prediction_types = (SegmentationPrediction, )
+
+    def validate_config(self):
+        class _ResizeConfigValidator(PostprocessorWithTargetsConfigValidator):
+            size = NumberField(floats=False, optional=True, min_value=1)
+            dst_width = NumberField(floats=False, optional=True, min_value=1)
+            dst_height = NumberField(floats=False, optional=True, min_value=1)
+
+        resize_config_validator = _ResizeConfigValidator(self.__provider__)
+        resize_config_validator.validate(self.config)
+
+    def configure(self):
+        self.dst_height, self.dst_width = get_size_from_config(self.config, allow_none=True)
+
+    def process_image(self, annotation, prediction):
+        target_height = self.dst_height or self.image_size[0]
+        target_width = self.dst_width or self.image_size[1]
+
+        @singledispatch
+        def resize_segmentation_mask(entry, height, width):
+            return entry
+
+        @resize_segmentation_mask.register(SegmentationPrediction)
+        def _(entry, height, width):
+            entry_mask = []
+            for class_mask in entry.mask:
+                resized_mask = scipy.misc.imresize(class_mask, (height, width), 'nearest')
+                entry_mask.append(resized_mask)
+            entry.mask = np.array(entry_mask)
+
+            return entry
+
+        @resize_segmentation_mask.register(SegmentationAnnotation)
+        def _(entry, height, width):
+            entry.mask = scipy.misc.imresize(entry.mask, (height, width), 'nearest')
+            return entry
+
+        for target in annotation:
+            resize_segmentation_mask(target, target_height, target_width)
+
+        for target in prediction:
+            resize_segmentation_mask(target, target_height, target_width)
+
+        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/postprocessor/zoom_segmentation_mask.py b/tools/accuracy_checker/accuracy_checker/postprocessor/zoom_segmentation_mask.py
new file mode 100644
index 000000000..aae7fce42
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/postprocessor/zoom_segmentation_mask.py
@@ -0,0 +1,65 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from .postprocessor import Postprocessor, BasePostprocessorConfig
+from ..representation import SegmentationAnnotation, SegmentationPrediction
+from ..config import NumberField
+
+
+class ZoomSegMask(Postprocessor):
+    """
+    Zoom probabilities of segmentation prediction.
+    """
+
+    __provider__ = 'zoom_segmentation_mask'
+
+    annotation_types = (SegmentationAnnotation, )
+    prediction_types = (SegmentationPrediction, )
+
+    def validate_config(self):
+        class _ZoomSegMaskConfigValidator(BasePostprocessorConfig):
+            zoom = NumberField(floats=False, min_value=1)
+
+        zoom_segmentation_mask_config_validator = _ZoomSegMaskConfigValidator(
+            self.__provider__, on_extra_argument=_ZoomSegMaskConfigValidator.ERROR_ON_EXTRA_ARGUMENT
+        )
+        zoom_segmentation_mask_config_validator.validate(self.config)
+
+    def configure(self):
+        self.zoom = self.config['zoom']
+
+    def process_image(self, annotation, prediction):
+        for annotation_, prediction_ in zip(annotation, prediction):
+            height, width = annotation_.mask.shape[:2]
+            prob = prediction_.mask
+            zoom_prob = np.zeros((prob.shape[0], height, width), dtype=np.float32)
+            for c in range(prob.shape[0]):
+                for h in range(height):
+                    for w in range(width):
+                        r0 = h // self.zoom
+                        r1 = r0 + 1
+                        c0 = w // self.zoom
+                        c1 = c0 + 1
+                        rt = float(h) / self.zoom - r0
+                        ct = float(w) / self.zoom - c0
+                        v0 = rt * prob[c, r1, c0] + (1 - rt) * prob[c, r0, c0]
+                        v1 = rt * prob[c, r1, c1] + (1 - rt) * prob[c, r0, c1]
+                        zoom_prob[c, h, w] = (1 - ct) * v0 + ct * v1
+            prediction_.mask = zoom_prob
+
+        return annotation, prediction
diff --git a/tools/accuracy_checker/accuracy_checker/preprocessor/README.md b/tools/accuracy_checker/accuracy_checker/preprocessor/README.md
new file mode 100644
index 000000000..d5be82bfc
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/preprocessor/README.md
@@ -0,0 +1,51 @@
+# Preprocessors
+
+Preprocessor is function which processes input data before model inference.
+Every preprocessor has parameters available for configuration.
+Accuracy Checker supports following set of preprocessors:
+
+* `resize` - resizing the image to a new width and height.
+  * `dst_width` and `dst_height` are destination width and height for image resizing respectively.
+    You can also use `size` instead in case when destination sizes are equal for both dimensions.
+  * `use_pil` parameter specifies usage of Pillow library for resizing.
+    Accuracy Checker uses OpenCV as default image reader.
+  * `interpolation` specifies method that will be used.
+    Possible values depend on image processing library:
+      * **OpenCV**: Nearest, Linear, Cubic, Area, Max, Lanczos4, Bits, Bits32
+      * **Pillow**: None, Nearest, Cubic, Bicubic, Box, Bilinear, Lanczos, Antialias, Hamming
+  * `aspect_ratio_scale` allows save image aspect ratio using one of these ways: 
+    - `width` - rescale width.
+    - `height` - rescale height.
+    - `greater` - rescale greater from image sizes.
+
+* `normalization` - changing the range of pixel intensity values.
+  * `mean` values which will be subtracted from image channels.
+     You can specify one value for all channels or list of comma separated channel-wise values.
+  * `std` specifies values, on which pixels will be divided.
+     You can specify one value for all channels or list of comma separated channel-wise values.
+
+     These parameters support work with precomputed values of frequently used datasets (e.g. `cifar10` or `imagenet`).
+
+* `bgr_to_rgb` - reversing image channels. Convert image in BGR format to RGB.
+* `bgr_to_gray` - converting image in BGR to grayscale color space.
+* `flip` - image mirroring around specified axis.
+  * `mode` specifies the axis for flipping (`vertical` or `horizontal`).
+* `crop` - central cropping for image.
+  * `dst_width` and `dst_height` are destination width and height for image resizing respectively. You can also use `size` instead in case when destination sizes are equal.
+* `crop_rectangle` - cropping region of interest using coordinates given as annotation metadata.
+* `extend_around_rect` - scaling region of interest using annotation metadata.
+  * `augmentation_param` is scale factor for augmentation.
+* `point_aligment` - aligning keypoints stored in annotation metadata.
+  * `draw_points` - allows visualize points.
+  * `normalize` - allows to use normalization for keypoints.
+  * `dst_width` and `dst_height` are destination width and height for keypoints resizing respectively. You can also use `size` instead in case when destination sizes are equal.
+* `padding` - padding for image.
+  * `stride` - stride for padding.
+  * `pad_value` - value for filling space around original image.
+  * `dst_width` and `dst_height` are destination width and height for padded image respectively.
+    You can also use `size` instead in case when destination sizes are equal for both dimensions.
+* `tiling` - image tiling.
+  * `margin` - margin for tiled fragment of image.
+  * `dst_width` and `dst_height` are destination width and height of tiled fragment respectively.
+    You can also use `size` instead in case when destination sizes are equal for both dimensions.
+  
diff --git a/tools/accuracy_checker/accuracy_checker/preprocessor/__init__.py b/tools/accuracy_checker/accuracy_checker/preprocessor/__init__.py
new file mode 100644
index 000000000..3999b4133
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/preprocessor/__init__.py
@@ -0,0 +1,51 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .preprocessing_executor import PreprocessingExecutor
+from .preprocessors import (
+    Preprocessor,
+
+    Resize,
+    Flip,
+    Normalize,
+    Crop,
+    BgrToRgb,
+    BgrToGray,
+    CropRect,
+    ExtendAroundRect,
+    PointAligner,
+    Tiling,
+    Crop3D,
+    Normalize3d
+)
+
+__all__ = [
+    'PreprocessingExecutor',
+
+    'Preprocessor',
+    'Resize',
+    'Flip',
+    'Normalize',
+    'Crop',
+    'BgrToRgb',
+    'BgrToGray',
+    'CropRect',
+    'ExtendAroundRect',
+    'PointAligner',
+    'Tiling',
+    'Crop3D',
+    'Normalize3d'
+]
diff --git a/tools/accuracy_checker/accuracy_checker/preprocessor/preprocessing_executor.py b/tools/accuracy_checker/accuracy_checker/preprocessor/preprocessing_executor.py
new file mode 100644
index 000000000..aa355b5e3
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/preprocessor/preprocessing_executor.py
@@ -0,0 +1,52 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from ..config import ConfigValidator, StringField
+from ..preprocessor.preprocessors import Preprocessor
+
+
+class PreprocessingExecutor:
+    def __init__(self, processors=None, dataset_name='custom', dataset_meta=None):
+        self.processors = []
+        self.dataset_meta = dataset_meta
+
+        if not processors:
+            return
+
+        identifier = 'type'
+        for processor in processors:
+            preprocessor_config = PreprocessorConfig(
+                "{}.preprocessors".format(dataset_name), on_extra_argument=ConfigValidator.IGNORE_ON_EXTRA_ARGUMENT
+            )
+
+            type_ = processor.get(identifier)
+            preprocessor_config.validate(processor, type_)
+            preprocessor = Preprocessor.provide(processor[identifier], config=processor, name=type_)
+
+            self.processors.append(preprocessor)
+
+    def process(self, images, batch_annotation=None):
+        for i, _ in enumerate(images):
+            for processor in self.processors:
+                images[i] = processor(
+                    image=images[i], annotation_meta=batch_annotation[i].metadata if batch_annotation else None
+                )
+
+        return images
+
+
+class PreprocessorConfig(ConfigValidator):
+    type = StringField(choices=Preprocessor.providers)
diff --git a/tools/accuracy_checker/accuracy_checker/preprocessor/preprocessors.py b/tools/accuracy_checker/accuracy_checker/preprocessor/preprocessors.py
new file mode 100644
index 000000000..e4c2fb0b7
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/preprocessor/preprocessors.py
@@ -0,0 +1,565 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import math
+import cv2
+import numpy as np
+from PIL import Image
+
+from ..config import BaseField, BoolField, ConfigValidator, NumberField, StringField, ConfigError
+from ..dependency import ClassProvider
+from ..utils import get_size_from_config, get_or_parse_value, string_to_tuple, get_size_3d_from_config
+
+
+class BasePreprocessorConfig(ConfigValidator):
+    type = StringField()
+
+
+class Preprocessor(ClassProvider):
+    __provider_type__ = 'preprocessor'
+
+    def __init__(self, config, name=None):
+        self.config = config
+        self.name = name
+
+        self.validate_config()
+        self.configure()
+
+    def __call__(self, *args, **kwargs):
+        return self.process(*args, **kwargs)
+
+    def process(self, image, annotation_meta=None):
+        raise NotImplementedError
+
+    def configure(self):
+        pass
+
+    def validate_config(self):
+        config = BasePreprocessorConfig(self.name, on_extra_argument=BasePreprocessorConfig.ERROR_ON_EXTRA_ARGUMENT)
+        config.validate(self.config)
+
+
+def scale_width(dst_width, dst_height, image_width, image_height,):
+    return int(dst_width * image_width / image_height), dst_height
+
+
+def scale_height(dst_width, dst_height, image_width, image_height):
+    return dst_width, int(dst_height * image_height / image_width)
+
+
+def scale_greater(dst_width, dst_height, image_width, image_height):
+    if image_height > image_width:
+        return scale_height(dst_width, dst_height, image_width, image_height)
+    return scale_width(dst_width, dst_height, image_width, image_height)
+
+
+class Resize(Preprocessor):
+    __provider__ = 'resize'
+
+    PILLOW_INTERPOLATION = {
+        'NEAREST': Image.NEAREST,
+        'NONE': Image.NONE,
+        'BOX': Image.BOX,
+        'BILINEAR': Image.BILINEAR,
+        'LINEAR': Image.LINEAR,
+        'HAMMING': Image.HAMMING,
+        'BICUBIC': Image.BICUBIC,
+        'CUBIC': Image.CUBIC,
+        'LANCZOS': Image.LANCZOS,
+        'ANTIALIAS': Image.ANTIALIAS,
+    }
+
+    OPENCV_INTERPOLATION = {
+        'NEAREST': cv2.INTER_NEAREST,
+        'LINEAR': cv2.INTER_LINEAR,
+        'CUBIC': cv2.INTER_CUBIC,
+        'AREA': cv2.INTER_AREA,
+        'MAX': cv2.INTER_MAX,
+        'BITS': cv2.INTER_BITS,
+        'BITS2': cv2.INTER_BITS2,
+        'LANCZOS4': cv2.INTER_LANCZOS4,
+    }
+
+    ASPECT_RATIO_SCALE = {
+        'width': scale_width,
+        'height': scale_height,
+        'greater': scale_greater,
+    }
+
+    def validate_config(self):
+        class _ConfigValidator(BasePreprocessorConfig):
+            size = NumberField(floats=False, optional=True, min_value=1)
+            dst_width = NumberField(floats=False, optional=True, min_value=1)
+            dst_height = NumberField(floats=False, optional=True, min_value=1)
+            aspect_ratio_scale = StringField(choices=set(Resize.ASPECT_RATIO_SCALE), optional=True)
+            interpolation = StringField(
+                choices=set(Resize.PILLOW_INTERPOLATION) | set(Resize.OPENCV_INTERPOLATION), optional=True
+            )
+            use_pil = BoolField(optional=True)
+
+        _ConfigValidator(self.name, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def configure(self):
+        self.dst_height, self.dst_width = get_size_from_config(self.config)
+        self.use_pil = self.config.get('use_pil', False)
+
+        interpolation = self.config.get('interpolation', 'LINEAR')
+
+        self.scaling_func = Resize.ASPECT_RATIO_SCALE.get(self.config.get('aspect_ratio_scale'))
+
+        if self.use_pil and interpolation.upper() not in Resize.PILLOW_INTERPOLATION:
+            raise ValueError("Incorrect interpolation option: {} for resize preprocessing".format(interpolation))
+        if not self.use_pil and interpolation.upper() not in Resize.OPENCV_INTERPOLATION:
+            raise ValueError("Incorrect interpolation option: {} for resize preprocessing".format(interpolation))
+
+        if self.use_pil:
+            self.interpolation = Resize.PILLOW_INTERPOLATION[interpolation]
+        else:
+            self.interpolation = Resize.OPENCV_INTERPOLATION[interpolation]
+
+    def process(self, image, annotation_meta=None):
+        data = image.data
+        new_height, new_width = self.dst_height, self.dst_width
+        if self.scaling_func:
+            image_h, image_w = data.shape[:2]
+            new_width, new_height = self.scaling_func(self.dst_width, self.dst_height, image_w, image_h)
+
+        image.metadata['preferable_width'] = max(new_width, self.dst_width)
+        image.metadata['preferable_height'] = max(new_height, self.dst_height)
+
+        if self.use_pil:
+            data = Image.fromarray(data)
+            data = data.resize((new_width, new_height), self.interpolation)
+            image.data = np.array(data)
+            return image
+
+        data = cv2.resize(data, (new_width, new_height), interpolation=self.interpolation).astype(np.float32)
+        if len(data.shape) == 2:
+            data = np.expand_dims(data, axis=-1)
+        image.data = data
+
+        return image
+
+
+class Normalize(Preprocessor):
+    __provider__ = 'normalization'
+
+    PRECOMPUTED_MEANS = {
+        'imagenet': (104.00698793, 116.66876762, 122.67891434),
+        'cifar10': (125.307, 122.961, 113.8575),
+    }
+
+    PRECOMPUTED_STDS = {
+        'imagenet': (104.00698793, 116.66876762, 122.67891434),
+        'cifar10': (125.307, 122.961, 113.8575),
+    }
+
+    def validate_config(self):
+        class _ConfigValidator(BasePreprocessorConfig):
+            mean = BaseField(optional=True)
+            std = BaseField(optional=True)
+
+        _ConfigValidator(self.name, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def configure(self):
+        self.mean = get_or_parse_value(self.config.get('mean'), Normalize.PRECOMPUTED_MEANS)
+        self.std = get_or_parse_value(self.config.get('std'), Normalize.PRECOMPUTED_STDS)
+        if not self.mean and not self.std:
+            raise ConfigError('mean or std value should be provided')
+
+        if self.std and 0 in self.std:
+            raise ConfigError('std value should not contain 0')
+
+        if self.mean and not (len(self.mean) == 3 or len(self.mean) == 1):
+            raise ConfigError('mean should be one value or comma-separated list channel-wise values')
+
+        if self.std and not (len(self.std) == 3 or len(self.std) == 1):
+            raise ConfigError('std should be one value or comma-separated list channel-wise values')
+
+    def process(self, image, annotation_meta=None):
+        if self.mean:
+            image.data = image.data - self.mean
+        if self.std:
+            image.data = image.data / self.std
+
+        return image
+
+
+class BgrToRgb(Preprocessor):
+    __provider__ = 'bgr_to_rgb'
+
+    def process(self, image, annotation_meta=None):
+        image.data = cv2.cvtColor(image.data, cv2.COLOR_BGR2RGB)
+        return image
+
+
+class BgrToGray(Preprocessor):
+    __provider__ = 'bgr_to_gray'
+
+    def process(self, image, annotation_meta=None):
+        image.data = np.expand_dims(cv2.cvtColor(image.data, cv2.COLOR_BGR2GRAY).astype(np.float32), -1)
+        return image
+
+
+class Flip(Preprocessor):
+    __provider__ = 'flip'
+
+    FLIP_MODES = {
+        'horizontal': 0,
+        'vertical': 1
+    }
+
+    def validate_config(self):
+        class _ConfigValidator(BasePreprocessorConfig):
+            mode = StringField(choices=Flip.FLIP_MODES.keys())
+
+        _ConfigValidator(self.name, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def configure(self):
+        mode = self.config.get('mode', 'horizontal')
+        if isinstance(mode, str):
+            self.mode = Flip.FLIP_MODES[mode]
+
+    def process(self, image, annotation_meta=None):
+        image.data = cv2.flip(image.data, self.mode)
+        return image
+
+
+class Crop(Preprocessor):
+    __provider__ = 'crop'
+
+    def validate_config(self):
+        class _ConfigValidator(BasePreprocessorConfig):
+            size = NumberField(floats=False, optional=True, min_value=1)
+            dst_width = NumberField(floats=False, optional=True, min_value=1)
+            dst_height = NumberField(floats=False, optional=True, min_value=1)
+
+        _ConfigValidator(self.name, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def configure(self):
+        self.dst_height, self.dst_width = get_size_from_config(self.config)
+
+    def process(self, image, annotation_meta=None):
+        data = image.data
+        height, width, _ = data.shape
+        if width < self.dst_width or height < self.dst_height:
+            resized = np.array([width, height])
+            if resized[0] < self.dst_width:
+                resized = resized * self.dst_width / resized[0]
+            if resized[1] < self.dst_height:
+                resized = resized * self.dst_height / resized[1]
+
+            data = cv2.resize(data, tuple(np.ceil(resized).astype(int)))
+
+        height, width, _ = data.shape
+        start_height = (height - self.dst_height) // 2
+        start_width = (width - self.dst_width) // 2
+
+        image.data = data[start_height:start_height + self.dst_height, start_width:start_width + self.dst_width]
+        return image
+
+
+class CropRect(Preprocessor):
+    __provider__ = 'crop_rect'
+
+    def process(self, image, annotation_meta=None):
+        rect = annotation_meta.get('rect')
+        if not rect:
+            return image
+
+        rows, cols = image.data.shape[:2]
+        rect_x_min, rect_y_min, rect_x_max, rect_y_max = rect
+        start_width, start_height = max(0, rect_x_min), max(0, rect_y_min)
+
+        width = min(start_width + (rect_x_max - rect_x_min), cols)
+        height = min(start_height + (rect_y_max - rect_y_min), rows)
+
+        image.data = image.data[start_height:height, start_width:width]
+        return image
+
+
+class ExtendAroundRect(Preprocessor):
+    __provider__ = 'extend_around_rect'
+
+    def validate_config(self):
+        class _ConfigValidator(BasePreprocessorConfig):
+            augmentation_param = NumberField(floats=True, optional=True)
+
+        _ConfigValidator(self.name, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def configure(self):
+        self.augmentation_param = self.config.get('augmentation_param', 0)
+
+    def process(self, image, annotation_meta=None):
+        rect = annotation_meta.get('rect')
+        rows, cols = image.data.shape[:2]
+
+        rect_x_left, rect_y_top, rect_x_right, rect_y_bottom = rect or (0, 0, cols, rows)
+        rect_x_left = max(0, rect_x_left)
+        rect_y_top = max(0, rect_y_top)
+        rect_x_right = min(rect_x_right, cols)
+        rect_y_bottom = min(rect_y_bottom, rows)
+
+        rect_w = rect_x_right - rect_x_left
+        rect_h = rect_y_bottom - rect_y_top
+
+        width_extent = (rect_x_right - rect_x_left + 1) * self.augmentation_param
+        height_extent = (rect_y_bottom - rect_y_top + 1) * self.augmentation_param
+        rect_x_left = rect_x_left - width_extent
+        border_left = abs(min(0, rect_x_left))
+        rect_x_left = int(max(0, rect_x_left))
+
+        rect_y_top = rect_y_top - height_extent
+        border_top = abs(min(0, rect_y_top))
+        rect_y_top = int(max(0, rect_y_top))
+
+        rect_y_bottom += border_top
+        rect_y_bottom = int(rect_y_bottom + height_extent + 0.5)
+        border_bottom = abs(max(0, rect_y_bottom - rows))
+
+        rect_x_right += border_left
+        rect_x_right = int(rect_x_right + width_extent + 0.5)
+        border_right = abs(max(0, rect_x_right - cols))
+
+        image.data = cv2.copyMakeBorder(
+            image.data, int(border_top), int(border_bottom), int(border_left), int(border_right), cv2.BORDER_REPLICATE
+        )
+
+        rect = (
+            int(rect_x_left), int(rect_y_top),
+            int(rect_x_left) + int(rect_w + width_extent * 2), int(rect_y_top) + int(rect_h + height_extent * 2)
+        )
+        annotation_meta['rect'] = rect
+
+        return image
+
+
+class PointAligner(Preprocessor):
+    __provider__ = 'point_alignment'
+
+    ref_landmarks = np.array([
+        30.2946 / 96, 51.6963 / 112,
+        65.5318 / 96, 51.5014 / 112,
+        48.0252 / 96, 71.7366 / 112,
+        33.5493 / 96, 92.3655 / 112,
+        62.7299 / 96, 92.2041 / 112
+    ], dtype=np.float64).reshape(5, 2)
+
+    def validate_config(self):
+        class _ConfigValidator(BasePreprocessorConfig):
+            draw_points = BoolField(optional=True)
+            normalize = BoolField(optional=True)
+            size = NumberField(floats=False, optional=True, min_value=1)
+            dst_width = NumberField(floats=False, optional=True, min_value=1)
+            dst_height = NumberField(floats=False, optional=True, min_value=1)
+
+        _ConfigValidator(self.name, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def configure(self):
+        self.draw_points = self.config.get('draw_points', False)
+        self.normalize = self.config.get('normalize', True)
+        self.dst_height, self.dst_width = get_size_from_config(self.config)
+
+    def process(self, image, annotation_meta=None):
+        keypoints = annotation_meta.get('keypoints')
+        image.data = self.align(image.data, keypoints)
+        return image
+
+    def align(self, img, points):
+        if not points:
+            return img
+
+        points_number = len(points) // 2
+        points = np.array(points).reshape(points_number, 2)
+
+        inp_shape = [1., 1.]
+        if self.normalize:
+            inp_shape = img.shape
+
+        keypoints = points.copy().astype(np.float64)
+        keypoints[:, 0] *= (float(self.dst_width) / inp_shape[1])
+        keypoints[:, 1] *= (float(self.dst_height) / inp_shape[0])
+
+        keypoints_ref = np.zeros((points_number, 2), dtype=np.float64)
+        keypoints_ref[:, 0] = self.ref_landmarks[:, 0] * self.dst_width
+        keypoints_ref[:, 1] = self.ref_landmarks[:, 1] * self.dst_height
+
+        transformation_matrix = self.transformation_from_points(np.array(keypoints_ref), np.array(keypoints))
+        img = cv2.resize(img, (self.dst_width, self.dst_height))
+        if self.draw_points:
+            for point in keypoints:
+                cv2.circle(img, (int(point[0]), int(point[1])), 5, (255, 0, 0), -1)
+
+        return cv2.warpAffine(img, transformation_matrix, (self.dst_width, self.dst_height), flags=cv2.WARP_INVERSE_MAP)
+
+    @staticmethod
+    def transformation_from_points(points1, points2):
+        points1 = np.matrix(points1.astype(np.float64))
+        points2 = np.matrix(points2.astype(np.float64))
+
+        c1 = np.mean(points1, axis=0)
+        c2 = np.mean(points2, axis=0)
+        points1 -= c1
+        points2 -= c2
+        s1 = np.std(points1)
+        s2 = np.std(points2)
+        points1 /= np.maximum(s1, np.finfo(np.float64).eps)
+        points2 /= np.maximum(s1, np.finfo(np.float64).eps)
+        points_std_ratio = s2 / np.maximum(s1, np.finfo(np.float64).eps)
+
+        u, _, vt = np.linalg.svd(points1.T * points2)
+        r = (u * vt).T
+
+        return np.hstack((points_std_ratio * r, c2.T - points_std_ratio * r * c1.T))
+
+
+class Padding(Preprocessor):
+    __provider__ = 'padding'
+
+    def validate_config(self):
+        class _ConfigValidator(BasePreprocessorConfig):
+            stride = NumberField(floats=False, min_value=1, optional=True)
+            pad_value = StringField(optional=True)
+            size = NumberField(floats=False, optional=True, min_value=1)
+            dst_width = NumberField(floats=False, optional=True, min_value=1)
+            dst_height = NumberField(floats=False, optional=True, min_value=1)
+
+        _ConfigValidator(self.name).validate(self.config)
+
+    def configure(self):
+        self.stride = self.config.get('stride', 1)
+        pad_val = self.config.get('pad_value', '0,0,0')
+        if isinstance(pad_val, int):
+            self.pad_value = (pad_val, pad_val, pad_val)
+        if isinstance(pad_val, str):
+            self.pad_value = string_to_tuple(pad_val, int)
+        self.dst_height, self.dst_width = get_size_from_config(self.config, allow_none=True)
+
+    def process(self, image, annotation_meta=None):
+        height, width, _ = image.data.shape
+        pref_height = self.dst_height or image.metadata.get('preferable_height', height)
+        pref_width = self.dst_width or image.metadata.get('preferable_width', width)
+        height = min(height, pref_height)
+        pref_height = math.ceil(pref_height / float(self.stride)) * self.stride
+        pref_width = max(pref_width, width)
+        pref_width = math.ceil(pref_width / float(self.stride)) * self.stride
+        pad = []
+        pad.append(int(math.floor((pref_height - height) / 2.0)))
+        pad.append(int(math.floor((pref_width - width) / 2.0)))
+        pad.append(int(pref_height - height - pad[0]))
+        pad.append(int(pref_width - width - pad[1]))
+        image.metadata['padding'] = pad
+        image.data = cv2.copyMakeBorder(
+            image.data, pad[0], pad[2], pad[1], pad[3], cv2.BORDER_CONSTANT, value=self.pad_value
+        )
+
+        return image
+
+class Tiling(Preprocessor):
+    __provider__ = 'tiling'
+
+    def validate_config(self):
+        class _ConfigValidator(BasePreprocessorConfig):
+            margin = NumberField(floats=False, min_value=1)
+            size = NumberField(floats=False, optional=True, min_value=1)
+            dst_width = NumberField(floats=False, optional=True, min_value=1)
+            dst_height = NumberField(floats=False, optional=True, min_value=1)
+
+        _ConfigValidator(self.name, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def configure(self):
+        self.dst_height, self.dst_width = get_size_from_config(self.config)
+        self.margin = self.config['margin']
+
+    def process(self, image, annotation_meta=None):
+        data = image.data
+        image_size = data.shape
+        output_height = self.dst_height - 2 * self.margin
+        output_width = self.dst_width - 2 * self.margin
+        data = cv2.copyMakeBorder(data, *np.full(4, self.margin), cv2.BORDER_REFLECT_101)
+        num_tiles_h = image_size[0] // output_height + (1 if image_size[0] % output_height else 0)
+        num_tiles_w = image_size[1] // output_width + (1 if image_size[1] % output_width else 0)
+        tiled_data = []
+        for height in range(num_tiles_h):
+            for width in range(num_tiles_w):
+                offset = [output_height * height, output_width * width]
+                tile = data[offset[0]:offset[0] + self.dst_height, offset[1]:offset[1] + self.dst_width, :]
+                margin = [0, self.dst_height - tile.shape[0], 0, self.dst_width - tile.shape[1]]
+                tile = cv2.copyMakeBorder(tile, *margin, cv2.BORDER_REFLECT_101)
+                tiled_data.append(tile)
+        image.data = tiled_data
+        image.metadata['tiles_shape'] = (num_tiles_h, num_tiles_w)
+        image.metadata['multi_infer'] = True
+
+        return image
+
+class Crop3D(Preprocessor):
+    __provider__ = 'crop3d'
+
+    def validate_config(self):
+        class _ConfigValidator(BasePreprocessorConfig):
+            size = NumberField(floats=False, min_value=1)
+            dst_width = NumberField(floats=False, optional=True, min_value=1)
+            dst_height = NumberField(floats=False, optional=True, min_value=1)
+            dst_volume = NumberField(floats=False, optional=True, min_value=1)
+
+        _ConfigValidator(self.name, on_extra_argument=_ConfigValidator.ERROR_ON_EXTRA_ARGUMENT).validate(self.config)
+
+    def configure(self):
+        self.dst_height, self.dst_width, self.dst_volume = get_size_3d_from_config(self.config)
+
+    def process(self, image, annotation_meta=None):
+        image.data = self.crop_center(image.data, self.dst_height, self.dst_width, self.dst_volume)
+        return image
+
+    @staticmethod
+    def crop_center(img, cropx, cropy, cropz):
+
+        z, y, x, _ = img.shape
+
+        # Make sure starting index is >= 0
+        startx = max(x // 2 - (cropx // 2), 0)
+        starty = max(y // 2 - (cropy // 2), 0)
+        startz = max(z // 2 - (cropz // 2), 0)
+
+        # Make sure ending index is <= size
+        endx = min(startx + cropx, x)
+        endy = min(starty + cropy, y)
+        endz = min(startz + cropz, z)
+
+        return img[startz:endz, starty:endy, startx:endx, :]
+
+
+class Normalize3d(Preprocessor):
+    __provider__ = "normalize3d"
+
+    def process(self, image, annotation_meta=None):
+        data = self.normalize_img(image.data)
+        image_list = []
+        for img in data:
+            image_list.append(img)
+        image.data = image_list
+        image.metadata['multi_infer'] = True
+
+        return image
+
+    @staticmethod
+    def normalize_img(img):
+        for channel in range(img.shape[3]):
+            channel_val = img[:, :, :, channel] - np.mean(img[:, :, :, channel])
+            channel_val /= np.std(img[:, :, :, channel])
+            img[:, :, :, channel] = channel_val
+
+        return img
diff --git a/tools/accuracy_checker/accuracy_checker/presenters.py b/tools/accuracy_checker/accuracy_checker/presenters.py
new file mode 100644
index 000000000..9c39e1fe6
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/presenters.py
@@ -0,0 +1,123 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from collections import namedtuple
+from enum import Enum
+import numpy as np
+
+from .dependency import ClassProvider
+from .logging import print_info
+
+EvaluationResult = namedtuple('EvaluationResult', ['evaluated_value', 'reference_value', 'name', 'threshold', 'meta'])
+
+
+class Color(Enum):
+    PASSED = 0
+    FAILED = 1
+
+
+def color_format(s, color=Color.PASSED):
+    if color == Color.PASSED:
+        return "\x1b[0;32m{}\x1b[0m".format(s)
+    return "\x1b[0;31m{}\x1b[0m".format(s)
+
+
+class BasePresenter(ClassProvider):
+    __provider_type__ = "presenter"
+
+    def write_result(self, evaluation_result, output_callback=None, ignore_results_formatting=False):
+        raise NotImplementedError
+
+
+class ScalarPrintPresenter(BasePresenter):
+    __provider__ = "print_scalar"
+
+    def write_result(self, evaluation_result: EvaluationResult, output_callback=None, ignore_results_formatting=False):
+        value, reference, name, threshold, meta = evaluation_result
+        value = np.mean(value)
+        postfix, scale, result_format = get_result_format_parameters(meta, ignore_results_formatting)
+        write_scalar_result(
+            value, name, reference, threshold, postfix=postfix, scale=scale, result_format=result_format
+        )
+
+
+class VectorPrintPresenter(BasePresenter):
+    __provider__ = "print_vector"
+
+    def write_result(self, evaluation_result: EvaluationResult, output_callback=None, ignore_results_formatting=False):
+        value, reference, name, threshold, meta = evaluation_result
+        if threshold:
+            threshold = float(threshold)
+
+        value_names = meta.get('names')
+        postfix, scale, result_format = get_result_format_parameters(meta, ignore_results_formatting)
+        if np.isscalar(value) or np.size(value) == 1:
+            value = [value]
+
+        for index, res in enumerate(value):
+            write_scalar_result(
+                res, name, reference, threshold,
+                value_name=value_names[index] if value_names else None,
+                postfix=postfix[index] if not np.isscalar(postfix) else postfix,
+                scale=scale[index] if not np.isscalar(scale) else scale,
+                result_format=result_format
+            )
+
+        if len(value) > 1 and meta.get('calculate_mean', True):
+            write_scalar_result(
+                np.mean(np.multiply(value, scale)), name, reference, threshold, value_name='mean',
+                postfix=postfix[-1] if not np.isscalar(postfix) else postfix, scale=1,
+                result_format=result_format
+            )
+
+
+def write_scalar_result(res_value, name, reference, threshold, value_name=None, postfix='%', scale=100,
+                        result_format='{:.2f}'):
+    display_name = "{}@{}".format(name, value_name) if value_name else name
+    display_result = result_format.format(res_value * scale)
+    message = '{}: {}{}'.format(display_name, display_result, postfix)
+
+    if reference:
+        threshold = threshold or 0
+
+        difference = abs(reference - (res_value * scale))
+        if threshold <= difference:
+            fail_message = "[FAILED: error = {:.4}]".format(difference)
+            message = "{} {}".format(message, color_format(fail_message, Color.FAILED))
+        else:
+            message = "{} {}".format(message, color_format("[OK]", Color.PASSED))
+
+    print_info(message)
+
+
+class ReturnValuePresenter(BasePresenter):
+    __provider__ = "return_value"
+
+    def write_result(self, evaluation_result: EvaluationResult, output_callback=None, ignore_results_formatting=False):
+        if output_callback:
+            output_callback(evaluation_result)
+
+
+def get_result_format_parameters(meta, use_default_formatting):
+    postfix = ' '
+    scale = 1
+    result_format = '{}'
+    if not use_default_formatting:
+        postfix = meta.get('postfix', '%')
+        scale = meta.get('scale', 100)
+        result_format = meta.get('data_format', '{:.2f}')
+
+    return postfix, scale, result_format
diff --git a/tools/accuracy_checker/accuracy_checker/progress_reporters.py b/tools/accuracy_checker/accuracy_checker/progress_reporters.py
new file mode 100644
index 000000000..3938e7d6b
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/progress_reporters.py
@@ -0,0 +1,92 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import time
+
+from tqdm import tqdm
+
+from .dependency import ClassProvider
+from .logging import print_info
+
+
+class ProgressReporter(ClassProvider):
+    __provider_type__ = 'progress_reporter'
+
+    def __init__(self, dataset_size=None):
+        self.finished = True
+        self.dataset_size = None
+        self.start_time = None
+        self.prev_time = None
+        if dataset_size is not None:
+            self.reset(dataset_size)
+
+    def finish(self, objects_processed=True):
+        self.finished = True
+        if not objects_processed:
+            return
+
+        process_time = time.time() - self.start_time
+        print_info('{} objects processed in {:.3f} seconds'.format(self.dataset_size, process_time))
+
+    def reset(self, dataset_size):
+        if not self.finished:
+            self.finish(objects_processed=False)
+
+        self.dataset_size = dataset_size
+        self.start_time = time.time()
+        self.finished = False
+
+
+class PrintProgressReporter(ProgressReporter):
+    __provider__ = 'print'
+
+    def __init__(self, dataset_size=None, print_interval=1000):
+        super().__init__(dataset_size)
+        self.print_interval = print_interval
+
+    def reset(self, dataset_size):
+        self.dataset_size = dataset_size
+        print_info('Total dataset size: {}'.format(dataset_size))
+        self.start_time = time.time()
+        self.prev_time = self.start_time
+
+    def update(self, batch_id, batch_size):
+        if (batch_id + 1) % self.print_interval != 0:
+            return
+
+        now = time.time()
+        batch_time = now - self.prev_time
+        self.prev_time = now
+
+        print_info('{} / {} processed in {:.3f}s'.format((batch_id + 1) * batch_size, self.dataset_size, batch_time))
+
+
+class TQDMReporter(ProgressReporter):
+    __provider__ = 'bar'
+
+    def update(self, _batch_id, batch_size):
+        self.tqdm.update(batch_size)
+
+    def finish(self, objects_processed=True):
+        self.tqdm.close()
+        super().finish(objects_processed)
+
+    def reset(self, dataset_size):
+        super().reset(dataset_size)
+        self.tqdm = tqdm(
+            total=self.dataset_size, unit='frames', leave=False,
+            bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}]'
+        )
diff --git a/tools/accuracy_checker/accuracy_checker/representation/__init__.py b/tools/accuracy_checker/accuracy_checker/representation/__init__.py
new file mode 100644
index 000000000..0ceabc30b
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/__init__.py
@@ -0,0 +1,103 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .base_representation import BaseRepresentation
+from .classification_representation import Classification, ClassificationAnnotation, ClassificationPrediction
+from .detection_representation import Detection, DetectionAnnotation, DetectionPrediction
+from .reid_representation import (
+    ReIdentificationAnnotation,
+    ReIdentificationClassificationAnnotation,
+    ReIdentificationPrediction
+)
+from .segmentation_representation import (
+    SegmentationRepresentation,
+    SegmentationAnnotation,
+    SegmentationPrediction,
+    BrainTumorSegmentationAnnotation,
+    BrainTumorSegmentationPrediction
+)
+from .character_recognition_representation import (
+    CharacterRecognition,
+    CharacterRecognitionAnnotation,
+    CharacterRecognitionPrediction
+)
+from .representaton_container import ContainerRepresentation, ContainerAnnotation, ContainerPrediction
+from .regression_representation import (
+    RegressionAnnotation,
+    RegressionPrediction,
+    FacialLandmarksAnnotation,
+    FacialLandmarksPrediction,
+    GazeVectorAnnotation,
+    GazeVectorPrediction
+)
+from .multilabel_recognition import MultiLabelRecognitionAnnotation, MultiLabelRecognitionPrediction
+from .super_resolution_representation import SuperResolutionAnnotation, SuperResolutionPrediction
+from .text_detection_representation import TextDetectionAnnotation, TextDetectionPrediction
+from .pose_estimation_representation import PoseEstimationAnnotation, PoseEstimationPrediction
+from .hit_ratio_representation import HitRatio, HitRatioAnnotation, HitRatioPrediction
+
+__all__ = [
+    'BaseRepresentation',
+
+    'Classification',
+    'ClassificationAnnotation',
+    'ClassificationPrediction',
+
+    'Detection',
+    'DetectionAnnotation',
+    'DetectionPrediction',
+
+    'ReIdentificationAnnotation',
+    'ReIdentificationClassificationAnnotation',
+    'ReIdentificationPrediction',
+
+    'SegmentationRepresentation',
+    'SegmentationAnnotation',
+    'SegmentationPrediction',
+    'BrainTumorSegmentationAnnotation',
+    'BrainTumorSegmentationPrediction',
+
+    'CharacterRecognition',
+    'CharacterRecognitionAnnotation',
+    'CharacterRecognitionPrediction',
+
+    'ContainerRepresentation',
+    'ContainerAnnotation',
+    'ContainerPrediction',
+
+    'RegressionAnnotation',
+    'RegressionPrediction',
+    'FacialLandmarksAnnotation',
+    'FacialLandmarksPrediction',
+    'GazeVectorAnnotation',
+    'GazeVectorPrediction',
+
+    'MultiLabelRecognitionAnnotation',
+    'MultiLabelRecognitionPrediction',
+
+    'SuperResolutionAnnotation',
+    'SuperResolutionPrediction',
+
+    'TextDetectionAnnotation',
+    'TextDetectionPrediction',
+
+    'PoseEstimationAnnotation',
+    'PoseEstimationPrediction',
+
+    'HitRatio',
+    'HitRatioAnnotation',
+    'HitRatioPrediction'
+]
diff --git a/tools/accuracy_checker/accuracy_checker/representation/base_representation.py b/tools/accuracy_checker/accuracy_checker/representation/base_representation.py
new file mode 100644
index 000000000..05d53b578
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/base_representation.py
@@ -0,0 +1,42 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import abc
+import pickle
+
+
+class BaseRepresentation(abc.ABC):
+    def __init__(self, identifier, metadata=None):
+        self.identifier = identifier
+        self.metadata = metadata or {}
+
+    @classmethod
+    def load(cls, file):
+        obj = pickle.load(file)
+
+        if cls != BaseRepresentation:
+            assert isinstance(obj, cls)
+
+        return obj
+
+    def dump(self, file):
+        pickle.dump(self, file)
+
+    def set_image_size(self, image_sizes):
+        self.metadata['image_size'] = image_sizes
+
+    def set_data_source(self, data_source):
+        self.metadata['data_source'] = data_source
diff --git a/tools/accuracy_checker/accuracy_checker/representation/character_recognition_representation.py b/tools/accuracy_checker/accuracy_checker/representation/character_recognition_representation.py
new file mode 100644
index 000000000..df6a2418a
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/character_recognition_representation.py
@@ -0,0 +1,31 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .base_representation import BaseRepresentation
+
+
+class CharacterRecognition(BaseRepresentation):
+    def __init__(self, identifier='', label=None):
+        super().__init__(identifier)
+        self.label = label
+
+
+class CharacterRecognitionAnnotation(CharacterRecognition):
+    pass
+
+
+class CharacterRecognitionPrediction(CharacterRecognition):
+    pass
diff --git a/tools/accuracy_checker/accuracy_checker/representation/classification_representation.py b/tools/accuracy_checker/accuracy_checker/representation/classification_representation.py
new file mode 100644
index 000000000..67f72f682
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/classification_representation.py
@@ -0,0 +1,44 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from .base_representation import BaseRepresentation
+
+
+class Classification(BaseRepresentation):
+    pass
+
+
+class ClassificationAnnotation(Classification):
+    def __init__(self, identifier='', label=None):
+        super().__init__(identifier)
+
+        self.label = label
+
+
+class ClassificationPrediction(Classification):
+    def __init__(self, identifier='', scores=None):
+        super().__init__(identifier)
+
+        self.scores = np.array(scores) if scores is not None else np.array([])
+
+    @property
+    def label(self):
+        return np.argmax(self.scores)
+
+    def top_k(self, k):
+        return np.argpartition(self.scores, -k)[-k:]
diff --git a/tools/accuracy_checker/accuracy_checker/representation/detection_representation.py b/tools/accuracy_checker/accuracy_checker/representation/detection_representation.py
new file mode 100644
index 000000000..1fc2c8b8d
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/detection_representation.py
@@ -0,0 +1,87 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from ..utils import remove_difficult
+from .base_representation import BaseRepresentation
+
+
+class Detection(BaseRepresentation):
+    def __init__(self, identifier='', labels=None, x_mins=None, y_mins=None, x_maxs=None, y_maxs=None, metadata=None):
+        super().__init__(identifier, metadata)
+
+        self.labels = np.array(labels) if labels is not None else np.array([])
+        self.x_mins = np.array(x_mins) if x_mins is not None else np.array([])
+        self.y_mins = np.array(y_mins) if y_mins is not None else np.array([])
+        self.x_maxs = np.array(x_maxs) if x_maxs is not None else np.array([])
+        self.y_maxs = np.array(y_maxs) if y_maxs is not None else np.array([])
+
+    def remove(self, indexes):
+        self.labels = np.delete(self.labels, indexes)
+        self.x_mins = np.delete(self.x_mins, indexes)
+        self.y_mins = np.delete(self.y_mins, indexes)
+        self.x_maxs = np.delete(self.x_maxs, indexes)
+        self.y_maxs = np.delete(self.y_maxs, indexes)
+
+        difficult_boxes = self.metadata.get('difficult_boxes')
+        if not difficult_boxes:
+            return
+
+        new_difficult_boxes = remove_difficult(difficult_boxes, indexes)
+
+        self.metadata['difficult_boxes'] = new_difficult_boxes
+
+    @property
+    def size(self):
+        return len(self.x_mins)
+
+    def __eq__(self, other):
+        if not isinstance(other, type(self)):
+            return False
+
+        def are_bounding_boxes_equal():
+            if not np.array_equal(self.labels, other.labels):
+                return False
+            if not np.array_equal(self.x_mins, other.x_mins):
+                return False
+            if not np.array_equal(self.y_mins, other.y_mins):
+                return False
+            if not np.array_equal(self.x_maxs, other.x_maxs):
+                return False
+            if not np.array_equal(self.y_maxs, other.y_maxs):
+                return False
+            return True
+
+        return self.identifier == other.identifier and are_bounding_boxes_equal() and self.metadata == other.metadata
+
+
+class DetectionAnnotation(Detection):
+    pass
+
+
+class DetectionPrediction(Detection):
+    def __init__(self, identifier='', labels=None, scores=None, x_mins=None, y_mins=None, x_maxs=None, y_maxs=None,
+                 metadata=None):
+        super().__init__(identifier, labels, x_mins, y_mins, x_maxs, y_maxs, metadata)
+        self.scores = np.array(scores) if scores is not None else np.array([])
+
+    def remove(self, indexes):
+        super().remove(indexes)
+        self.scores = np.delete(self.scores, indexes)
+
+    def __eq__(self, other):
+        return np.array_equal(self.scores, other.scores) if super().__eq__(other) else False
diff --git a/tools/accuracy_checker/accuracy_checker/representation/hit_ratio_representation.py b/tools/accuracy_checker/accuracy_checker/representation/hit_ratio_representation.py
new file mode 100644
index 000000000..f6cb6c7a1
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/hit_ratio_representation.py
@@ -0,0 +1,40 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from .base_representation import BaseRepresentation
+
+
+class HitRatio(BaseRepresentation):
+    def __init__(self, identifier=''):
+        super().__init__(identifier)
+        self.user = int(identifier[0].split('u:')[-1])
+        self.item = int(identifier[1].split('i:')[-1])
+
+
+
+class HitRatioAnnotation(HitRatio):
+    def __init__(self, identifier='', positive=True):
+        super().__init__(identifier)
+        self.positive = positive
+
+
+class HitRatioPrediction(HitRatio):
+    def __init__(self, identifier='', scores=None):
+        super().__init__(identifier)
+
+        self.scores = np.array(scores) if scores is not None else np.array([])
diff --git a/tools/accuracy_checker/accuracy_checker/representation/multilabel_recognition.py b/tools/accuracy_checker/accuracy_checker/representation/multilabel_recognition.py
new file mode 100644
index 000000000..d5af464a9
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/multilabel_recognition.py
@@ -0,0 +1,32 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from .base_representation import BaseRepresentation
+
+
+class MultiLabelRecognitionRepresentation(BaseRepresentation):
+    def __init__(self, identifier='', multi_label=None):
+        super().__init__(identifier)
+        self.multi_label = np.array(multi_label) if isinstance(multi_label, list) else multi_label
+
+
+class MultiLabelRecognitionAnnotation(MultiLabelRecognitionRepresentation):
+    pass
+
+
+class MultiLabelRecognitionPrediction(MultiLabelRecognitionRepresentation):
+    pass
diff --git a/tools/accuracy_checker/accuracy_checker/representation/pose_estimation_representation.py b/tools/accuracy_checker/accuracy_checker/representation/pose_estimation_representation.py
new file mode 100644
index 000000000..f765dd889
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/pose_estimation_representation.py
@@ -0,0 +1,63 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from .base_representation import BaseRepresentation
+
+
+class PoseEstimationRepresentation(BaseRepresentation):
+    def __init__(self, identifier='', x_values=None, y_values=None, visibility=None, labels=None):
+        super().__init__(identifier)
+        self.x_values = x_values if np.size(x_values) > 0 else []
+        self.y_values = y_values if np.size(y_values) > 0 else []
+        self.visibility = visibility if np.size(visibility) > 0 else [2] * len(x_values)
+        self.labels = labels if labels is not None else np.array([1]*len(x_values))
+
+    @property
+    def areas(self):
+        areas = self.metadata.get('areas')
+        if areas:
+            return areas
+        x_mins = np.min(self.x_values, axis=1)
+        x_maxs = np.max(self.x_values, axis=1)
+        y_mins = np.min(self.y_values, axis=1)
+        y_maxs = np.max(self.y_values, axis=1)
+        return (x_maxs - x_mins) * (y_maxs - y_mins)
+
+    @property
+    def bboxes(self):
+        rects = self.metadata.get('rects')
+        if rects:
+            return rects
+        x_mins = np.min(self.x_values, axis=1)
+        x_maxs = np.max(self.x_values, axis=1)
+        y_mins = np.min(self.y_values, axis=1)
+        y_maxs = np.max(self.y_values, axis=1)
+        return [[x_min, y_min, x_max, y_max] for x_min, y_min, x_max, y_max in zip(x_mins, y_mins, x_maxs, y_maxs)]
+
+    @property
+    def size(self):
+        return len(self.x_values)
+
+
+class PoseEstimationAnnotation(PoseEstimationRepresentation):
+    pass
+
+
+class PoseEstimationPrediction(PoseEstimationRepresentation):
+    def __init__(self, identifier='', x_values=None, y_values=None, visibility=None, scores=None, labels=None):
+        super().__init__(identifier, x_values, y_values, visibility, labels)
+        self.scores = scores if scores.any() else []
diff --git a/tools/accuracy_checker/accuracy_checker/representation/regression_representation.py b/tools/accuracy_checker/accuracy_checker/representation/regression_representation.py
new file mode 100644
index 000000000..99800d362
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/regression_representation.py
@@ -0,0 +1,72 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from .base_representation import BaseRepresentation
+
+
+class RegressionRepresentation(BaseRepresentation):
+    def __init__(self, identifier='', value=None):
+        super().__init__(identifier)
+        self.value = value
+
+
+class RegressionAnnotation(RegressionRepresentation):
+    pass
+
+
+class RegressionPrediction(RegressionRepresentation):
+    pass
+
+
+class GazeVectorRepresentation(RegressionRepresentation):
+    def __init__(self, identifier='', value=None):
+        if value is None:
+            value = np.array([])
+        super().__init__(identifier, value)
+
+class GazeVectorAnnotation(GazeVectorRepresentation):
+    pass
+
+class GazeVectorPrediction(GazeVectorRepresentation):
+    pass
+
+
+
+class FacialLandmarksRepresentation(BaseRepresentation):
+    def __init__(self, identifier='', x_values=None, y_values=None):
+        super().__init__(identifier)
+        self.x_values = x_values if x_values.any() else []
+        self.y_values = y_values if y_values.any() else []
+
+
+class FacialLandmarksAnnotation(FacialLandmarksRepresentation):
+    @property
+    def interocular_distance(self):
+        left_eye = [
+            np.mean(self.x_values[self.metadata['left_eye']]),
+            np.mean(self.y_values[self.metadata['left_eye']])
+        ]
+        right_eye = [
+            np.mean(self.x_values[self.metadata['right_eye']]),
+            np.mean(self.y_values[self.metadata['right_eye']])
+        ]
+
+        return np.linalg.norm((np.subtract(left_eye, right_eye)))
+
+
+class FacialLandmarksPrediction(FacialLandmarksRepresentation):
+    pass
diff --git a/tools/accuracy_checker/accuracy_checker/representation/reid_representation.py b/tools/accuracy_checker/accuracy_checker/representation/reid_representation.py
new file mode 100644
index 000000000..d212eb747
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/reid_representation.py
@@ -0,0 +1,42 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .base_representation import BaseRepresentation
+
+
+class ReIdentification(BaseRepresentation):
+    pass
+
+
+class ReIdentificationAnnotation(ReIdentification):
+    def __init__(self, identifier, camera_id, person_id, query):
+        super().__init__(identifier)
+        self.camera_id = camera_id
+        self.person_id = person_id
+        self.query = query
+
+
+class ReIdentificationClassificationAnnotation(ReIdentification):
+    def __init__(self, identifier, positive_pairs=None, negative_pairs=None):
+        super().__init__(identifier)
+        self.positive_pairs = set(positive_pairs)
+        self.negative_pairs = set(negative_pairs)
+
+
+class ReIdentificationPrediction(ReIdentification):
+    def __init__(self, identifiers, embedding):
+        super().__init__(identifiers)
+        self.embedding = embedding.copy()
diff --git a/tools/accuracy_checker/accuracy_checker/representation/representaton_container.py b/tools/accuracy_checker/accuracy_checker/representation/representaton_container.py
new file mode 100644
index 000000000..add7c6991
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/representaton_container.py
@@ -0,0 +1,78 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from ..representation import BaseRepresentation
+
+
+class ContainerRepresentation(BaseRepresentation):
+    def __init__(self, representation_map=None):
+        super().__init__('')
+        self.representations = representation_map or {}
+
+    def __eq__(self, other):
+        if not isinstance(other, type(self)):
+            return False
+
+        if self.identifier != other.identifier:
+            return False
+
+        if self.metadata != other.metadata:
+            return False
+
+        if self.representations != other.representations:
+            return False
+
+        return True
+
+    def __getitem__(self, item):
+        return self.representations[item]
+
+    def get(self, key):
+        return self.representations.get(key)
+
+    def values(self):
+        return list(self.representations.values())
+
+    @property
+    def identifier(self):
+        if self._identifier:
+            return self._identifier
+
+        values = self.values()
+        if np.size(values) == 0:
+            raise ValueError('representation container is empty')
+
+        self._identifier = values[0].identifier
+        return self._identifier
+
+    @identifier.setter
+    def identifier(self, identifier):
+        self._identifier = identifier
+
+
+class ContainerAnnotation(ContainerRepresentation):
+    def set_image_size(self, image_sizes):
+        for key in self.representations.keys():
+            self.representations[key].metadata['image_size'] = image_sizes
+
+    def set_data_source(self, data_source):
+        for key in self.representations.keys():
+            self.representations[key].metadata['data_source'] = data_source
+
+
+class ContainerPrediction(ContainerRepresentation):
+    pass
diff --git a/tools/accuracy_checker/accuracy_checker/representation/segmentation_representation.py b/tools/accuracy_checker/accuracy_checker/representation/segmentation_representation.py
new file mode 100644
index 000000000..c6c78f0e2
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/segmentation_representation.py
@@ -0,0 +1,91 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from enum import Enum
+
+import numpy as np
+
+from .base_representation import BaseRepresentation
+from ..data_readers import BaseReader
+
+
+class GTMaskLoader(Enum):
+    PILLOW = 0
+    OPENCV = 1
+    SCIPY = 2
+    NIFTI = 3
+
+
+class SegmentationRepresentation(BaseRepresentation):
+    pass
+
+
+class SegmentationAnnotation(SegmentationRepresentation):
+    LOADERS = {
+        GTMaskLoader.PILLOW: 'pillow_imread',
+        GTMaskLoader.OPENCV: 'opencv_imread',
+        GTMaskLoader.SCIPY: 'scipy_imread',
+        GTMaskLoader.NIFTI: 'nifti_reader'
+    }
+
+    def __init__(self, identifier, path_to_mask, mask_loader=GTMaskLoader.PILLOW):
+        """
+        Args:
+            identifier: object identifier (e.g. image name).
+            path_to_mask: path where segmentation mask should be loaded from. The path is relative to data source.
+            mask_loader: back-end, used to load segmentation masks.
+        """
+
+        super().__init__(identifier)
+        self._mask_path = path_to_mask
+        self._mask_loader = mask_loader
+        self._mask = None
+
+    @property
+    def mask(self):
+        return self._mask if self._mask is not None else self._load_mask()
+
+    @mask.setter
+    def mask(self, value):
+        self._mask = value
+
+    def _load_mask(self):
+        loader = BaseReader.provide(self.LOADERS.get(self._mask_loader))
+        if self._mask is None:
+            mask = loader(self._mask_path, self.metadata['data_source'])
+            return mask.astype(np.uint8)
+
+        return self._mask
+
+
+class SegmentationPrediction(SegmentationRepresentation):
+    def __init__(self, identifiers, mask):
+        """
+        Args:
+            identifiers: object identifier (e.g. image name).
+            mask: array with shape (n_classes, height, width) of probabilities at each location.
+        """
+
+        super().__init__(identifiers)
+        self.mask = mask
+
+
+class BrainTumorSegmentationAnnotation(SegmentationAnnotation):
+    def __init__(self, identifier, path_to_mask):
+        super().__init__(identifier, path_to_mask, GTMaskLoader.NIFTI)
+
+class BrainTumorSegmentationPrediction(SegmentationPrediction):
+    pass
diff --git a/tools/accuracy_checker/accuracy_checker/representation/super_resolution_representation.py b/tools/accuracy_checker/accuracy_checker/representation/super_resolution_representation.py
new file mode 100644
index 000000000..8cf989ec0
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/super_resolution_representation.py
@@ -0,0 +1,67 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from enum import Enum
+import numpy as np
+
+from .base_representation import BaseRepresentation
+from ..data_readers import BaseReader
+
+
+class GTLoader(Enum):
+    PILLOW = 0
+    OPENCV = 1
+
+
+class SuperResolutionRepresentation(BaseRepresentation):
+    pass
+
+
+class SuperResolutionAnnotation(SuperResolutionRepresentation):
+    LOADERS = {
+        GTLoader.PILLOW: 'pillow_imread',
+        GTLoader.OPENCV: 'opencv_imread'
+    }
+
+    def __init__(self, identifier, path_to_hr, gt_loader=GTLoader.PILLOW):
+        """
+        Args:
+            identifier: object identifier (e.g. image name).
+            path_to_hr: path where height resolution image should be loaded from. The path is relative to data source.
+            gt_loader: back-end, used to load segmentation masks.
+        """
+
+        super().__init__(identifier)
+        self._image_path = path_to_hr
+        self._gt_loader = self.LOADERS.get(gt_loader)
+
+    @property
+    def value(self):
+        loader = BaseReader.provide(self._gt_loader)
+        gt = loader.read(self._image_path, self.metadata['data_source'])
+        return gt.astype(np.uint8)
+
+
+class SuperResolutionPrediction(SuperResolutionRepresentation):
+    def __init__(self, identifiers, prediction):
+        """
+        Args:
+            identifiers: object identifier (e.g. image name).
+            prediction: array with shape (height, width) contained result image.
+        """
+
+        super().__init__(identifiers)
+        self.value = prediction
diff --git a/tools/accuracy_checker/accuracy_checker/representation/text_detection_representation.py b/tools/accuracy_checker/accuracy_checker/representation/text_detection_representation.py
new file mode 100644
index 000000000..38e7a9c60
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/representation/text_detection_representation.py
@@ -0,0 +1,46 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from ..utils import remove_difficult
+from .base_representation import BaseRepresentation
+
+
+class TextDetectionRepresentation(BaseRepresentation):
+    def __init__(self, identifier='', points=None):
+        super().__init__(identifier)
+        self.points = points or []
+
+    def remove(self, indexes):
+        self.points = np.delete(self.points, indexes, axis=0)
+        difficult = self.metadata.get('difficult_boxes')
+        if not difficult:
+            return
+        self.metadata['difficult_boxes'] = remove_difficult(difficult, indexes)
+
+
+class TextDetectionAnnotation(TextDetectionRepresentation):
+    def __init__(self, identifier='', points=None, description=''):
+        super().__init__(identifier, points)
+        self.description = description
+
+    def remove(self, indexes):
+        super().remove(indexes)
+        self.description = np.delete(self.description, indexes)
+
+
+class TextDetectionPrediction(TextDetectionRepresentation):
+    pass
diff --git a/tools/accuracy_checker/accuracy_checker/utils.py b/tools/accuracy_checker/accuracy_checker/utils.py
new file mode 100644
index 000000000..f03a0a2cd
--- /dev/null
+++ b/tools/accuracy_checker/accuracy_checker/utils.py
@@ -0,0 +1,361 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import collections
+import csv
+import errno
+import itertools
+import json
+import os
+import pickle
+
+from pathlib import Path
+from typing import Union
+from warnings import warn
+
+from shapely.geometry.polygon import Polygon
+import numpy as np
+import yaml
+
+try:
+    import lxml.etree as et
+except ImportError:
+    import xml.etree.cElementTree as et
+
+
+def concat_lists(*lists):
+    return list(itertools.chain(*lists))
+
+
+def get_path(entry: Union[str, Path], is_directory=False):
+    try:
+        path = Path(entry)
+    except TypeError:
+        raise TypeError('"{}" is expected to be a path-like'.format(entry))
+
+    # pathlib.Path.exists throws an exception in case of broken symlink
+    if not os.path.exists(str(path)):
+        raise FileNotFoundError('{}: {}'.format(os.strerror(errno.ENOENT), path))
+
+    if is_directory and not path.is_dir():
+        raise NotADirectoryError('{}: {}'.format(os.strerror(errno.ENOTDIR), path))
+
+    # if it exists it is either file (or valid symlink to file) or directory (or valid symlink to directory)
+    if not is_directory and not path.is_file():
+        raise IsADirectoryError('{}: {}'.format(os.strerror(errno.EISDIR), path))
+
+    return path
+
+
+def contains_all(container, *args):
+    sequence = set(container)
+
+    for arg in args:
+        if len(sequence.intersection(arg)) != len(arg):
+            return False
+
+    return True
+
+
+def contains_any(container, *args):
+    sequence = set(container)
+
+    for arg in args:
+        if sequence.intersection(arg):
+            return True
+
+    return False
+
+
+def string_to_tuple(string, casting_type=float):
+    processed = string.replace(' ', '')
+    processed = processed.replace('(', '')
+    processed = processed.replace(')', '')
+    processed = processed.split(',')
+
+    return tuple([casting_type(entry) for entry in processed])
+
+
+def string_to_list(string):
+    processed = string.replace(' ', '')
+    processed = processed.replace('[', '')
+    processed = processed.replace(']', '')
+    processed = processed.split(',')
+
+    return list(entry for entry in processed)
+
+
+class JSONDecoderWithAutoConversion(json.JSONDecoder):
+    """
+    Custom json decoder to convert all strings into numbers (int, float) during reading json file.
+    """
+
+    def decode(self, s, _w=json.decoder.WHITESPACE.match):
+        decoded = super().decode(s, _w)
+        return self._decode(decoded)
+
+    def _decode(self, entry):
+        if isinstance(entry, str):
+            try:
+                return int(entry)
+            except ValueError:
+                pass
+            try:
+                return float(entry)
+            except ValueError:
+                pass
+        elif isinstance(entry, dict):
+            return {self._decode(key): self._decode(value) for key, value in entry.items()}
+        elif isinstance(entry, list):
+            return [self._decode(value) for value in entry]
+
+        return entry
+
+
+def dict_subset(dict_, key_subset):
+    return {key: value for key, value in dict_.items() if key in key_subset}
+
+
+def zipped_transform(fn, *iterables, inplace=False):
+    result = (iterables if inplace else tuple([] for _ in range(len(iterables))))
+    updater = (list.__setitem__ if inplace else lambda container, _, entry: container.append(entry))
+
+    for idx, values in enumerate(zip(*iterables)):
+        iter_res = fn(*values)
+        if not iter_res:
+            continue
+
+        for dst, res in zip(result, iter_res):
+            updater(dst, idx, res)
+
+    return result
+
+
+def overrides(obj, attribute_name, base=None):
+    cls = obj if isinstance(obj, type) else obj.__class__
+
+    base = base or cls.__bases__[0]
+    obj_attr = getattr(cls, attribute_name, None)
+    base_attr = getattr(base, attribute_name, None)
+
+    return obj_attr and obj_attr != base_attr
+
+
+def enum_values(enum):
+    return [member.value for member in enum]
+
+
+def get_size_from_config(config, allow_none=False):
+    if contains_all(config, ('size', 'dst_width', 'dst_height')):
+        warn('All parameters: size, dst_width, dst_height are provided. Size will be used. '
+             'You should specify only size or pair values des_width, dst_height in config.')
+    if 'size' in config:
+        return config['size'], config['size']
+    if contains_all(config, ('dst_width', 'dst_height')):
+        return config['dst_height'], config['dst_width']
+    if not allow_none:
+        raise ValueError('Either size or dst_width and dst_height required')
+
+    return None, None
+
+
+def get_size_3d_from_config(config, allow_none=False):
+    if contains_all(config, ('size', 'dst_width', 'dst_height', 'dst_volume')):
+        warn('All parameters: size, dst_width, dst_height, dst_volume are provided. Size will be used. '
+             'You should specify only size or three values des_width, dst_height, dst_volume in config.')
+    if 'size' in config:
+        return config['size'], config['size'], config['size']
+    if contains_all(config, ('dst_width', 'dst_height', 'dst_volume')):
+        return config['dst_height'], config['dst_width'], config['dst_volume']
+    if not allow_none:
+        raise ValueError('Either size or dst_width and dst_height required')
+
+    return config.get('dst_height'), config.get('dst_width'), config.get('dst_volume')
+
+
+def in_interval(value, interval):
+    minimum = interval[0]
+    maximum = interval[1] if len(interval) >= 2 else None
+
+    if not maximum:
+        return minimum <= value
+
+    return minimum <= value < maximum
+
+
+def finalize_metric_result(values, names):
+    result_values, result_names = [], []
+    for value, name in zip(values, names):
+        if np.isnan(value):
+            continue
+
+        result_values.append(value)
+        result_names.append(name)
+
+    return result_values, result_names
+
+
+def get_representations(values, representation_source):
+    return np.reshape([value.get(representation_source) for value in values], -1)
+
+
+def get_supported_representations(container, supported_types):
+    if np.shape(container) == ():
+        container = [container]
+
+    return list(filter(lambda rep: check_representation_type(rep, supported_types), container))
+
+
+def check_representation_type(representation, representation_types):
+    for representation_type in representation_types:
+        if type(representation).__name__ == representation_type.__name__:
+            return True
+    return False
+
+
+def is_single_metric_source(source):
+    if not source:
+        return False
+
+    return np.size(source.split(',')) == 1
+
+
+def read_txt(file: Union[str, Path], sep='\n', **kwargs):
+    def is_empty(string):
+        return not string or string.isspace()
+
+    with get_path(file).open() as content:
+        content = content.read(**kwargs).split(sep)
+        content = list(filter(lambda string: not is_empty(string), content))
+
+        return list(map(str.strip, content))
+
+
+def read_xml(file: Union[str, Path], *args, **kwargs):
+    return et.parse(str(get_path(file)), *args, **kwargs).getroot()
+
+
+def read_json(file: Union[str, Path], *args, **kwargs):
+    with get_path(file).open() as content:
+        return json.load(content, *args, **kwargs)
+
+
+def read_pickle(file: Union[str, Path], *args, **kwargs):
+    with get_path(file).open('rb') as content:
+        return pickle.load(content, *args, **kwargs)
+
+
+def read_yaml(file: Union[str, Path], *args, **kwargs):
+    # yaml does not keep order of keys in dictionaries but it is important for reading pre/post processing
+    yaml.add_representer(collections.OrderedDict, lambda dumper, data: dumper.represent_dict(data.items()))
+    yaml.add_constructor(
+        yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
+        lambda loader, node: collections.OrderedDict(loader.construct_pairs(node))
+    )
+
+    with get_path(file).open() as content:
+        return yaml.load(content, Loader=yaml.SafeLoader, *args, **kwargs)
+
+
+def read_csv(file: Union[str, Path], *args, **kwargs):
+    with get_path(file).open() as content:
+        return list(csv.DictReader(content, *args, **kwargs))
+
+
+def extract_image_representations(image_representations):
+    images = [rep.data for rep in image_representations]
+    meta = [rep.metadata for rep in image_representations]
+
+    return images, meta
+
+
+def convert_bboxes_xywh_to_x1y1x2y2(x_coord, y_coord, width, height):
+    return x_coord, y_coord, x_coord + width, y_coord + height
+
+
+def get_or_parse_value(item, supported_values, default=None):
+    if isinstance(item, str):
+        item = item.lower()
+        if item in supported_values:
+            return supported_values[item]
+
+        try:
+            return string_to_tuple(item)
+        except ValueError:
+            message = 'Invalid value "{}", expected one of precomputed: ({}) or list of values'.format(
+                item, ', '.join(supported_values.keys())
+            )
+            raise ValueError(message)
+
+    if isinstance(item, (float, int)):
+        return (item, )
+
+    return default
+
+
+def string_to_bool(string):
+    return string.lower() in ['yes', 'true', 't', '1']
+
+
+def get_key_by_value(container, target):
+    for key, value in container.items():
+        if value == target:
+            return key
+
+    return None
+
+
+def format_key(key):
+    return '--{}'.format(key)
+
+
+def to_lower_register(str_list):
+    return list(map(lambda item: item.lower() if item else None, str_list))
+
+
+def polygon_from_points(points):
+    return Polygon(points)
+
+
+def remove_difficult(difficult, indexes):
+    new_difficult = []
+    decrementor = 0
+    id_difficult = 0
+    id_removed = 0
+    while id_difficult < len(difficult) and id_removed < len(indexes):
+        if difficult[id_difficult] < indexes[id_removed]:
+            new_difficult.append(difficult[id_difficult] - decrementor)
+            id_difficult += 1
+        else:
+            decrementor += 1
+            id_removed += 1
+
+    return new_difficult
+
+
+def convert_to_range(entry):
+    entry_range = entry
+    if isinstance(entry, str):
+        entry_range = string_to_tuple(entry_range)
+    elif not isinstance(entry_range, tuple) and not isinstance(entry_range, list):
+        entry_range = [entry_range]
+
+    return entry_range
+
+
+def add_input_shape_to_meta(meta, shape):
+    meta['input_shape'] = shape
+    return meta
diff --git a/tools/accuracy_checker/configs/face-detection-adas-0001.yml b/tools/accuracy_checker/configs/face-detection-adas-0001.yml
new file mode 100644
index 000000000..9b573dfa5
--- /dev/null
+++ b/tools/accuracy_checker/configs/face-detection-adas-0001.yml
@@ -0,0 +1,94 @@
+models:
+  - name: face-detection-adas-0001
+
+    launchers:
+      - framework: dlsdk
+        tags:
+          - FP32
+        device: CPU
+        model:   face-detection-adas-0001/FP32/face-detection-adas-0001.xml
+        weights: face-detection-adas-0001/FP32/face-detection-adas-0001.bin
+        adapter: ssd
+        cpu_extensions: AUTO
+
+      - framework: dlsdk
+        tags:
+          - INT8
+        device: CPU
+        model:   face-detection-adas-0001/INT8/face-detection-adas-0001.xml
+        weights: face-detection-adas-0001/INT8/face-detection-adas-0001.bin
+        adapter: ssd
+        cpu_extensions: AUTO
+
+      - framework: dlsdk
+        tags:
+          - GPU32
+        device: GPU
+        model:   face-detection-adas-0001/FP32/face-detection-adas-0001.xml
+        weights: face-detection-adas-0001/FP32/face-detection-adas-0001.bin
+        adapter: ssd
+
+      - framework: dlsdk
+        tags:
+          - GPU16
+        device: GPU
+        model:   face-detection-adas-0001/FP16/face-detection-adas-0001.xml
+        weights: face-detection-adas-0001/FP16/face-detection-adas-0001.bin
+        adapter: ssd
+
+      - framework: dlsdk
+        device: MYRIAD
+        model:   face-detection-adas-0001/FP16/face-detection-adas-0001.xml
+        weights: face-detection-adas-0001/FP16/face-detection-adas-0001.bin
+        adapter: ssd
+
+      - framework: dlsdk
+        device: HDDL
+        model:   face-detection-adas-0001/FP16/face-detection-adas-0001.xml
+        weights: face-detection-adas-0001/FP16/face-detection-adas-0001.bin
+        adapter: ssd
+
+      - framework: dlsdk
+        tags:
+          - FPGA16
+        device: HETERO:FPGA,CPU
+        model:   face-detection-adas-0001/FP32/face-detection-adas-0001.xml
+        weights: face-detection-adas-0001/FP32/face-detection-adas-0001.bin
+        adapter: ssd
+        cpu_extensions: AUTO
+        bitstream: 2019R1_A10DK_FP16_MobileNet_Clamp.aocx
+
+      - framework: dlsdk
+        tags:
+          - FPGA11
+        device: HETERO:FPGA,CPU
+        model:   face-detection-adas-0001/FP32/face-detection-adas-0001.xml
+        weights: face-detection-adas-0001/FP32/face-detection-adas-0001.bin
+        adapter: ssd
+        cpu_extensions: AUTO
+        bitstream: 2019R1_A10DK_FP11_ELU.aocx
+
+    datasets:
+      - name: wider
+        data_source: WIDER_val/images
+        annotation_conversion:
+          converter: wider
+          annotation_file: wider_face_split/wider_face_val_bbx_gt.txt
+
+        preprocessing:
+          - type: resize
+            dst_width: 672
+            dst_height: 384
+
+        postprocessing:
+          - type: resize_prediction_boxes
+          - type: filter
+            height_range: 100
+            apply_to: annotation
+
+        metrics:
+          - type: map
+            ignore_difficult: True
+            include_boundaries: False
+            allow_multiple_matches_per_ignored: True
+            use_filtered_tp: True
diff --git a/tools/accuracy_checker/configs/face-detection-retail-0004.yml b/tools/accuracy_checker/configs/face-detection-retail-0004.yml
new file mode 100644
index 000000000..74b787288
--- /dev/null
+++ b/tools/accuracy_checker/configs/face-detection-retail-0004.yml
@@ -0,0 +1,98 @@
+models:
+  - name: face-detection-retail-0004
+
+    launchers:
+      - framework: dlsdk
+        tags:
+          - FP32
+        device: CPU
+        model:   face-detection-retail-0004/FP32/face-detection-retail-0004.xml
+        weights: face-detection-retail-0004/FP32/face-detection-retail-0004.bin
+        adapter: ssd
+        cpu_extensions: AUTO
+
+      - framework: dlsdk
+        tags:
+          - INT8
+        device: CPU
+        model:   face-detection-retail-0004/INT8/face-detection-retail-0004.xml
+        weights: face-detection-retail-0004/INT8/face-detection-retail-0004.bin
+        adapter: ssd
+        cpu_extensions: AUTO
+
+      - framework: dlsdk
+        tags:
+          - GPU32
+        device: GPU
+        model:   face-detection-retail-0004/FP32/face-detection-retail-0004.xml
+        weights: face-detection-retail-0004/FP32/face-detection-retail-0004.bin
+        adapter: ssd
+
+      - framework: dlsdk
+        tags:
+          - GPU16
+        device: GPU
+        model:   face-detection-retail-0004/FP16/face-detection-retail-0004.xml
+        weights: face-detection-retail-0004/FP16/face-detection-retail-0004.bin
+        adapter: ssd
+
+      - framework: dlsdk
+        device: MYRIAD
+        model:   face-detection-retail-0004/FP16/face-detection-retail-0004.xml
+        weights: face-detection-retail-0004/FP16/face-detection-retail-0004.bin
+        adapter: ssd
+
+      - framework: dlsdk
+        device: HDDL
+        model:   face-detection-retail-0004/FP16/face-detection-retail-0004.xml
+        weights: face-detection-retail-0004/FP16/face-detection-retail-0004.bin
+        adapter: ssd
+
+      - framework: dlsdk
+        tags:
+          - FPGA16
+        device: HETERO:FPGA,CPU
+        model:   face-detection-retail-0004/FP32/face-detection-retail-0004.xml
+        weights: face-detection-retail-0004/FP32/face-detection-retail-0004.bin
+        adapter: ssd
+        cpu_extensions: AUTO
+        bitstream: 2019R1_A10DK_FP16_TinyYolo.aocx
+
+      - framework: dlsdk
+        tags:
+          - FPGA11
+        device: HETERO:FPGA,CPU
+        model:   face-detection-retail-0004/FP32/face-detection-retail-0004.xml
+        weights: face-detection-retail-0004/FP32/face-detection-retail-0004.bin
+        adapter: ssd
+        cpu_extensions: AUTO
+        bitstream: 2019R1_A10DK_FP11_CaffeMobileNet.aocx
+
+    datasets:
+      - name: wider
+        data_source: WIDER_val/images
+        annotation_conversion:
+          converter: wider
+          annotation_file: wider_face_split/wider_face_val_bbx_gt.txt
+
+        preprocessing:
+          - type: resize
+            size: 300
+
+        postprocessing:
+          - type: resize_prediction_boxes
+          - type: cast_to_int
+          - type: filter
+            apply_to: annotation
+            height_range: 60
+            is_empty: True
+          - type: filter
+            min_confidence: 0.0
+            apply_to: prediction
+
+        metrics:
+          - type: map
+            ignore_difficult: True
+            include_boundaries: False
+            allow_multiple_matches_per_ignored: False
+            distinct_conf: False
diff --git a/tools/accuracy_checker/configs/face-reidentification-retail-0095.yml b/tools/accuracy_checker/configs/face-reidentification-retail-0095.yml
new file mode 100644
index 000000000..de91fc9c1
--- /dev/null
+++ b/tools/accuracy_checker/configs/face-reidentification-retail-0095.yml
@@ -0,0 +1,74 @@
+models:
+  - name: face-reidentification-retail-0095
+
+    launchers:
+      - framework: dlsdk
+        tags:
+          - FP32
+        device: CPU
+        model:   face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.xml
+        weights: face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - GPU32
+        device: GPU
+        model:   face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.xml
+        weights: face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - GPU16
+        device: GPU
+        model:   face-reidentification-retail-0095/FP16/face-reidentification-retail-0095.xml
+        weights: face-reidentification-retail-0095/FP16/face-reidentification-retail-0095.bin
+        adapter: reid
+
+      - framework: dlsdk
+        device: MYRIAD
+        model:   face-reidentification-retail-0095/FP16/face-reidentification-retail-0095.xml
+        weights: face-reidentification-retail-0095/FP16/face-reidentification-retail-0095.bin
+        adapter: reid
+
+      - framework: dlsdk
+        device: HDDL
+        model:   face-reidentification-retail-0095/FP16/face-reidentification-retail-0095.xml
+        weights: face-reidentification-retail-0095/FP16/face-reidentification-retail-0095.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - FPGA16
+        device: HETERO:FPGA,CPU
+        model:   face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.xml
+        weights: face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.bin
+        adapter: reid
+        bitstream: 2019R1_A10DK_FP16_SSD300.aocx
+
+      - framework: dlsdk
+        tags:
+          - FPGA11
+        device: HETERO:FPGA,CPU
+        model:   face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.xml
+        weights: face-reidentification-retail-0095/FP32/face-reidentification-retail-0095.bin
+        adapter: reid
+        bitstream: 2019R1_A10DK_FP11_CaffeMobileNet.aocx
+
+    datasets:
+      - name: lfw
+        data_source: LFW/lfw
+        annotation_conversion:
+          converter: face_reid_pairwise
+          pairs_file: LFW/annotation/pairs.txt
+          landmarks_file: LFW/annotation/lfw_landmark.txt
+
+        preprocessing:
+          - type: point_alignment
+            size: 400
+          - type: resize
+            size: 128
+
+        metrics:
+          - type: pairwise_accuracy_subsets
diff --git a/tools/accuracy_checker/configs/human-pose-estimation-0001.yml b/tools/accuracy_checker/configs/human-pose-estimation-0001.yml
new file mode 100644
index 000000000..71971151f
--- /dev/null
+++ b/tools/accuracy_checker/configs/human-pose-estimation-0001.yml
@@ -0,0 +1,114 @@
+models:
+  - name: human-pose-estimation-0001
+
+    launchers:
+      - framework: dlsdk
+        tags:
+          - FP32
+        device: CPU
+        model:   human-pose-estimation-0001/FP32/human-pose-estimation-0001.xml
+        weights: human-pose-estimation-0001/FP32/human-pose-estimation-0001.bin
+        allow_reshape_input: True
+        adapter:
+          type: human_pose_estimation
+          part_affinity_fields_out: Mconv7_stage2_L1
+          keypoints_heatmap_out: Mconv7_stage2_L2
+
+      - framework: dlsdk
+        tags:
+          - GPU32
+        device: GPU
+        model:   human-pose-estimation-0001/FP32/human-pose-estimation-0001.xml
+        weights: human-pose-estimation-0001/FP32/human-pose-estimation-0001.bin
+        allow_reshape_input: True
+        adapter:
+          type: human_pose_estimation
+          part_affinity_fields_out: Mconv7_stage2_L1
+          keypoints_heatmap_out: Mconv7_stage2_L2
+
+      - framework: dlsdk
+        tags:
+          - GPU16
+        device: GPU
+        model:   human-pose-estimation-0001/FP16/human-pose-estimation-0001.xml
+        weights: human-pose-estimation-0001/FP16/human-pose-estimation-0001.bin
+        allow_reshape_input: True
+        adapter:
+          type: human_pose_estimation
+          part_affinity_fields_out: Mconv7_stage2_L1
+          keypoints_heatmap_out: Mconv7_stage2_L2
+
+      - framework: dlsdk
+        device: MYRIAD
+        model:   human-pose-estimation-0001/FP16/human-pose-estimation-0001.xml
+        weights: human-pose-estimation-0001/FP16/human-pose-estimation-0001.bin
+        allow_reshape_input: True
+        adapter:
+          type: human_pose_estimation
+          part_affinity_fields_out: Mconv7_stage2_L1
+          keypoints_heatmap_out: Mconv7_stage2_L2
+
+      - framework: dlsdk
+        device: HDDL
+        model:   human-pose-estimation-0001/FP16/human-pose-estimation-0001.xml
+        weights: human-pose-estimation-0001/FP16/human-pose-estimation-0001.bin
+        allow_reshape_input: True
+        adapter:
+          type: human_pose_estimation
+          part_affinity_fields_out: Mconv7_stage2_L1
+          keypoints_heatmap_out: Mconv7_stage2_L2
+
+      - framework: dlsdk
+        tags:
+          - FPGA16
+        device: HETERO:FPGA,CPU
+        model:   human-pose-estimation-0001/FP32/human-pose-estimation-0001.xml
+        weights: human-pose-estimation-0001/FP32/human-pose-estimation-0001.bin
+        allow_reshape_input: True
+        adapter:
+          type: human_pose_estimation
+          part_affinity_fields_out: Mconv7_stage2_L1
+          keypoints_heatmap_out: Mconv7_stage2_L2
+        bitstream: 2019R1_A10DK_FP16_ELU.aocx
+
+      - framework: dlsdk
+        tags:
+          - FPGA11
+        device: HETERO:FPGA,CPU
+        model:   human-pose-estimation-0001/FP32/human-pose-estimation-0001.xml
+        weights: human-pose-estimation-0001/FP32/human-pose-estimation-0001.bin
+        allow_reshape_input: True
+        adapter:
+          type: human_pose_estimation
+          part_affinity_fields_out: Mconv7_stage2_L1
+          keypoints_heatmap_out: Mconv7_stage2_L2
+        bitstream: 2019R1_A10DK_FP11_ELU.aocx
+
+
+    datasets:
+      - name: ms_coco_keypoints
+        data_source: val2017
+        annotation_conversion:
+          converter: mscoco_keypoints
+          annotation_file: person_keypoints_val2017.json
+
+        preprocessing:
+          - type: resize
+            size: 368
+            interpolation: CUBIC
+            aspect_ratio_scale: width
+          - type: padding
+            stride: 8
+
+        postprocessing:
+          - type: filter
+            apply_to: annotation
+            area_range: 1, 10000000000
+          - type: filter
+            apply_to: prediction
+            area_range: 1, 10000000000
+
+        metrics:
+          - name: AP
+            type: coco_precision
+            max_detections: 20
diff --git a/tools/accuracy_checker/configs/landmarks-regression-retail-0009.yml b/tools/accuracy_checker/configs/landmarks-regression-retail-0009.yml
new file mode 100644
index 000000000..eca538a3c
--- /dev/null
+++ b/tools/accuracy_checker/configs/landmarks-regression-retail-0009.yml
@@ -0,0 +1,82 @@
+models:
+  - name: landmarks-regression-retail-0009
+
+    launchers:
+      - framework: dlsdk
+        tags:
+          - FP32
+        device: CPU
+        model:   landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.xml
+        weights: landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.bin
+        adapter: landmarks_regression
+        cpu_extensions: AUTO
+
+      - framework: dlsdk
+        tags:
+          - GPU32
+        device: GPU
+        model:   landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.xml
+        weights: landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.bin
+        adapter: landmarks_regression
+
+      - framework: dlsdk
+        tags:
+          - GPU16
+        device: GPU
+        model:   landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009.xml
+        weights: landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009.bin
+        adapter: landmarks_regression
+
+      - framework: dlsdk
+        device: MYRIAD
+        model:   landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009.xml
+        weights: landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009.bin
+        adapter: landmarks_regression
+
+      - framework: dlsdk
+        device: HDDL
+        model:   landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009.xml
+        weights: landmarks-regression-retail-0009/FP16/landmarks-regression-retail-0009.bin
+        adapter: landmarks_regression
+
+      - framework: dlsdk
+        tags:
+          - FPGA16
+        device: HETERO:FPGA,CPU
+        model:   landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.xml
+        weights: landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.bin
+        adapter: landmarks_regression
+        cpu_extensions: AUTO
+        bitstream: 2019R1_A10DK_FP16_AlexNet_GoogleNet.aocx
+
+      - framework: dlsdk
+        tags:
+          - FPGA11
+        device: HETERO:FPGA,CPU
+        model:   landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.xml
+        weights: landmarks-regression-retail-0009/FP32/landmarks-regression-retail-0009.bin
+        adapter: landmarks_regression
+        cpu_extensions: libcpu_extension_avx2.so
+        bitstream: 2019R1_A10DK_FP11_RMNet.aocx
+
+    datasets:
+      - name: vgg2face
+        data_source: VGGFaces2/test
+        annotation_conversion:
+          converter: landmarks_regression
+          landmarks_csv_file: VGGFaces2/bb_landmark/loose_landmark_test.csv
+          bbox_csv_file: VGGFaces2/bb_landmark/loose_bb_test.csv
+
+        preprocessing:
+          - type: crop_rect
+          - type: resize
+            size: 48
+
+        postprocessing:
+          - type: normalize_landmarks_points
+            use_annotation_rect: True
+
+        metrics:
+          - type: per_point_normed_error
+            presenter: print_vector
+          - type: normed_error
diff --git a/tools/accuracy_checker/configs/person-reidentification-retail-0031.yml b/tools/accuracy_checker/configs/person-reidentification-retail-0031.yml
new file mode 100644
index 000000000..d41e25080
--- /dev/null
+++ b/tools/accuracy_checker/configs/person-reidentification-retail-0031.yml
@@ -0,0 +1,80 @@
+models:
+  - name: person-reidentification-retail-0031
+
+    launchers:
+      - framework: dlsdk
+        tags:
+          - FP32
+        device: CPU
+        model:   person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.xml
+        weights: person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - GPU32
+        device: GPU
+        model:   person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.xml
+        weights: person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - GPU16
+        device: GPU
+        model:   person-reidentification-retail-0031/FP16/person-reidentification-retail-0031.xml
+        weights: person-reidentification-retail-0031/FP16/person-reidentification-retail-0031.bin
+        adapter: reid
+
+      - framework: dlsdk
+        device: MYRIAD
+        model:   person-reidentification-retail-0031/FP16/person-reidentification-retail-0031.xml
+        weights: person-reidentification-retail-0031/FP16/person-reidentification-retail-0031.bin
+        adapter: reid
+
+      - framework: dlsdk
+        device: HDDL
+        model:   person-reidentification-retail-0031/FP16/person-reidentification-retail-0031.xml
+        weights: person-reidentification-retail-0031/FP16/person-reidentification-retail-0031.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - FPGA16
+        device: HETERO:FPGA,CPU
+        model:   person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.xml
+        weights: person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.bin
+        adapter: reid
+        bitstream: 2019R1_A10DK_FP16_ELU.aocx
+
+      - framework: dlsdk
+        tags:
+          - FPGA11
+        device: HETERO:FPGA,CPU
+        model:   person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.xml
+        weights: person-reidentification-retail-0031/FP32/person-reidentification-retail-0031.bin
+        adapter: reid
+        bitstream: 2019R1_A10DK_FP11_ELU.aocx
+
+    datasets:
+      - name: market1501
+        reader: pillow_imread
+        data_source: Market-1501-v15.09.15
+        annoation_conversion:
+          converter: market1501
+          data_dir: Market-1501-v15.09.15
+
+        preprocessing:
+          - type: bgr_to_rgb
+          - type: resize
+            dst_width: 48
+            dst_height: 96
+            use_pil: True
+            interpolation: ANTIALIAS
+
+        metrics:
+          - name: rank@1
+            type: cmc
+            top_k: 1
+
+          - type: reid_map
diff --git a/tools/accuracy_checker/configs/person-reidentification-retail-0076.yml b/tools/accuracy_checker/configs/person-reidentification-retail-0076.yml
new file mode 100644
index 000000000..09c28e6a6
--- /dev/null
+++ b/tools/accuracy_checker/configs/person-reidentification-retail-0076.yml
@@ -0,0 +1,76 @@
+models:
+  - name: person-reidentification-retail-0076
+
+    launchers:
+      - framework: dlsdk
+        tags:
+          - FP32
+        device: CPU
+        model:   person-reidentification-retail-0076/FP32/person-reidentification-retail-0076.xml
+        weights: person-reidentification-retail-0076/FP32/person-reidentification-retail-0076.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - GPU32
+        device: GPU
+        model:   person-reidentification-retail-0076/FP32/person-reidentification-retail-0076.xml
+        weights: person-reidentification-retail-0076/FP32/person-reidentification-retail-0076.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - GPU16
+        device: GPU
+        model:   person-reidentification-retail-0076/FP16/person-reidentification-retail-0076.xml
+        weights: person-reidentification-retail-0076/FP16/person-reidentification-retail-0076.bin
+        adapter: reid
+
+      - framework: dlsdk
+        device: MYRIAD
+        model:   person-reidentification-retail-0076/FP16/person-reidentification-retail-0076.xml
+        weights: person-reidentification-retail-0076/FP16/person-reidentification-retail-0076.bin
+        adapter: reid
+
+      - framework: dlsdk
+        device: HDDL
+        model:   person-reidentification-retail-0076/FP16/person-reidentification-retail-0076.xml
+        weights: person-reidentification-retail-0076/FP16/person-reidentification-retail-0076.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - FPGA16
+        device: HETERO:FPGA,CPU
+        model:   person-reidentification-retail-0076/FP32/person-reidentification-retail-0076.xml
+        weights: person-reidentification-retail-0076/FP32/person-reidentification-retail-0076.bin
+        adapter: reid
+        bitstream: 2019R1_A10DK_FP16_ELU.aocx
+
+      - framework: dlsdk
+        tags:
+          - FPGA11
+        device: HETERO:FPGA,CPU
+        model:   person-reidentification-retail-0076/FP32/person-reidentification-retail-0076.xml
+        weights: person-reidentification-retail-0076/FP32/person-reidentification-retail-0076.bin
+        adapter: reid
+        bitstream: 2019R1_A10DK_FP11_ELU.aocx
+
+    datasets:
+      - name: market1501
+        data_source: Market-1501-v15.09.15
+        annoation_conversion:
+          converter: market1501
+          data_dir: Market-1501-v15.09.15
+
+        preprocessing:
+          - type: resize
+            dst_width: 128
+            dst_height: 384
+
+        metrics:
+          - name: rank@1
+            type: cmc
+            top_k: 1
+
+          - type: reid_map
diff --git a/tools/accuracy_checker/configs/person-reidentification-retail-0079.yml b/tools/accuracy_checker/configs/person-reidentification-retail-0079.yml
new file mode 100644
index 000000000..417127cf2
--- /dev/null
+++ b/tools/accuracy_checker/configs/person-reidentification-retail-0079.yml
@@ -0,0 +1,76 @@
+models:
+  - name: person-reidentification-retail-0079
+
+    launchers:
+      - framework: dlsdk
+        tags:
+          - FP32
+        device: CPU
+        model:   person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.xml
+        weights: person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - GPU32
+        device: GPU
+        model:   person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.xml
+        weights: person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - GPU16
+        device: GPU
+        model:   person-reidentification-retail-0079/FP16/person-reidentification-retail-0079.xml
+        weights: person-reidentification-retail-0079/FP16/person-reidentification-retail-0079.bin
+        adapter: reid
+
+      - framework: dlsdk
+        device: MYRIAD
+        model:   person-reidentification-retail-0079/FP16/person-reidentification-retail-0079.xml
+        weights: person-reidentification-retail-0079/FP16/person-reidentification-retail-0079.bin
+        adapter: reid
+
+      - framework: dlsdk
+        device: HDDL
+        model:   person-reidentification-retail-0079/FP16/person-reidentification-retail-0079.xml
+        weights: person-reidentification-retail-0079/FP16/person-reidentification-retail-0079.bin
+        adapter: reid
+
+      - framework: dlsdk
+        tags:
+          - FPGA16
+        device: HETERO:FPGA,CPU
+        model:   person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.xml
+        weights: person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.bin
+        adapter: reid
+        bitstream: 2019R1_A10DK_FP16_RMNet.aocx
+
+      - framework: dlsdk
+        tags:
+          - FPGA11
+        device: HETERO:FPGA,CPU
+        model:   person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.xml
+        weights: person-reidentification-retail-0079/FP32/person-reidentification-retail-0079.bin
+        adapter: reid
+        bitstream: 2019R1_A10DK_FP11_ELU.aocx
+
+    datasets:
+      - name: market1501
+        data_source: Market-1501-v15.09.15
+        annoation_conversion:
+          converter: market1501
+          data_dir: Market-1501-v15.09.15
+
+        preprocessing:
+          - type: resize
+            dst_width: 64
+            dst_height: 160
+
+        metrics:
+          - name: rank@1
+            type: cmc
+            top_k: 1
+
+          - type: reid_map
diff --git a/tools/accuracy_checker/configs/text-detection-0002.yml b/tools/accuracy_checker/configs/text-detection-0002.yml
new file mode 100644
index 000000000..529c2642c
--- /dev/null
+++ b/tools/accuracy_checker/configs/text-detection-0002.yml
@@ -0,0 +1,110 @@
+models:
+  - name: text-detection-0002
+
+    launchers:
+      - framework: dlsdk
+        tags:
+          - FP32
+        device: CPU
+        model:   text-detection-0002/FP32/text-detection-0002.xml
+        weights: text-detection-0002/FP32/text-detection-0002.bin
+        adapter:
+          type: text_detection
+          pixel_link_out: pixel_link/add_2
+          pixel_class_out: pixel_cls/add_2
+        cpu_extensions: AUTO
+
+      - framework: dlsdk
+        tags:
+          - GPU32
+        device: GPU
+        model:   text-detection-0002/FP32/text-detection-0002.xml
+        weights: text-detection-0002/FP32/text-detection-0002.bin
+        adapter:
+          type: text_detection
+          pixel_link_out: pixel_link/add_2
+          pixel_class_out: pixel_cls/add_2
+
+      - framework: dlsdk
+        tags:
+          - GPU16
+        device: GPU
+        model:   text-detection-0002/FP16/text-detection-0002.xml
+        weights: text-detection-0002/FP16/text-detection-0002.bin
+        adapter:
+          type: text_detection
+          pixel_link_out: pixel_link/add_2
+          pixel_class_out: pixel_cls/add_2
+
+      - framework: dlsdk
+        device: MYRIAD
+        model:   text-detection-0002/FP16/text-detection-0002.xml
+        weights: text-detection-0002/FP16/text-detection-0002.bin
+        adapter:
+          type: text_detection
+          pixel_link_out: pixel_link/add_2
+          pixel_class_out: pixel_cls/add_2
+
+      - framework: dlsdk
+        device: HDDL
+        model:   text-detection-0002/FP16/text-detection-0002.xml
+        weights: text-detection-0002/FP16/text-detection-0002.bin
+        adapter:
+          type: text_detection
+          pixel_link_out: pixel_link/add_2
+          pixel_class_out: pixel_cls/add_2
+
+      - framework: dlsdk
+        tags:
+          - FPGA16
+        device: HETERO:FPGA.CPU
+        model:   text-detection-0002/FP32/text-detection-0002.xml
+        weights: text-detection-0002/FP32/text-detection-0002.bin
+        adapter:
+          type: text_detection
+          pixel_link_out: pixel_link/add_2
+          pixel_class_out: pixel_cls/add_2
+        cpu_extensions: AUTO
+        bitstream: 2019R1_A10DK_FP16_MobileNet_Clamp.aocx
+
+      - framework: dlsdk
+        tags:
+          - FPGA11
+        device: HETERO:FPGA.CPU
+        model:   text-detection-0002/FP32/text-detection-0002.xml
+        weights: text-detection-0002/FP32/text-detection-0002.bin
+        adapter:
+          type: text_detection
+          pixel_link_out: pixel_link/add_2
+          pixel_class_out: pixel_cls/add_2
+        cpu_extensions: AUTO
+        bitstream: 2019R1_A10DK_FP11_MobileNet_Clamp.aocx
+
+    datasets:
+      - name: ICDAR2015
+
+        data_source: ICDAR15_DET_validation/ch4_test_images
+        annotation_conversion:
+          converter: icdar15_detection
+          data_dir: ICDAR15_DET_validation/gt
+
+        preprocessing:
+          - type: resize
+            dst_width: 1280
+            dst_height: 768
+
+        postprocessing:
+          - type: cast_to_int
+          - type: filter
+            area_range: 300, 980993
+            height_range: 10
+            width_range: 10
+            apply_to: prediction
+            remove_filtered: True
+          - type: clip_points
+            apply_to: prediction
+
+        metrics:
+          - type: text_detection
+            name: f-measure
+            ignore_difficult: True
diff --git a/tools/accuracy_checker/configs/text-recognition-0012.yml b/tools/accuracy_checker/configs/text-recognition-0012.yml
new file mode 100644
index 000000000..da8e241c5
--- /dev/null
+++ b/tools/accuracy_checker/configs/text-recognition-0012.yml
@@ -0,0 +1,76 @@
+models:
+  - name: text-recognition-0012
+
+    launchers:
+      - framework: dlsdk
+        tags:
+          - FP32
+        device: CPU
+        model:   text-recognition-0012/FP32/text-recognition-0012.xml
+        weights: text-recognition-0012/FP32/text-recognition-0012.bin
+        adapter: beam_search_decoder
+        cpu_extensions: AUTO
+
+      - framework: dlsdk
+        tags:
+          - GPU32
+        device: GPU
+        model:   text-recognition-0012/FP32/text-recognition-0012.xml
+        weights: text-recognition-0012/FP32/text-recognition-0012.bin
+        adapter: beam_search_decoder
+
+      - framework: dlsdk
+        tags:
+          - GPU16
+        device: GPU
+        model:   text-recognition-0012/FP16/text-recognition-0012.xml
+        weights: text-recognition-0012/FP16/text-recognition-0012.bin
+        adapter: beam_search_decoder
+
+      - framework: dlsdk
+        device: MYRIAD
+        model:   text-recognition-0012/FP16/text-recognition-0012.xml
+        weights: text-recognition-0012/FP16/text-recognition-0012.bin
+        adapter: beam_search_decoder
+
+      - framework: dlsdk
+        device: HDDL
+        model:   text-recognition-0012/FP16/text-recognition-0012.xml
+        weights: text-recognition-0012/FP16/text-recognition-0012.bin
+        adapter: beam_search_decoder
+
+      - framework: dlsdk
+        tags:
+          - FPGA16
+        device: HETERO:FPGA,CPU
+        model:   text-recognition-0012/FP32/text-recognition-0012.xml
+        weights: text-recognition-0012/FP32/text-recognition-0012.bin
+        adapter: beam_search_decoder
+        cpu_extensions: AUTO
+        bitstream: 2019R1_A10DK_FP16_AlexNet_GoogleNet.aocx
+
+      - framework: dlsdk
+        tags:
+          - FPGA11
+        device: HETERO:FPGA,CPU
+        model:   text-recognition-0012/FP32/text-recognition-0012.xml
+        weights: text-recognition-0012/FP32/text-recognition-0012.bin
+        adapter: beam_search_decoder
+        cpu_extensions: AUTO
+        bitstream: 2019R1_A10DK_FP11_AlexNet_GoogleNet_SqueezeNet.aocx
+
+    datasets:
+      - name: ICDAR2013
+        data_source: ICDAR13_REC_validation/Challenge2_Test_Task3_Images
+        annotation_conversion:
+          converter: icdar13_recognition
+          annotation_file: ICDAR13_REC_validation/gt/gt.txt.fixed.alfanumeric
+
+        preprocessing:
+          - type: bgr_to_gray
+          - type: resize
+            dst_width: 120
+            dst_height: 32
+
+        metrics:
+          - type: character_recognition_accuracy
diff --git a/tools/accuracy_checker/data/test_data/1.jpg b/tools/accuracy_checker/data/test_data/1.jpg
new file mode 100644
index 000000000..20edaaee8
--- /dev/null
+++ b/tools/accuracy_checker/data/test_data/1.jpg
diff --git a/tools/accuracy_checker/data/test_models/SampLeNet.bin b/tools/accuracy_checker/data/test_models/SampLeNet.bin
new file mode 100644
index 000000000..da1186046
--- /dev/null
+++ b/tools/accuracy_checker/data/test_models/SampLeNet.bin
diff --git a/tools/accuracy_checker/data/test_models/SampLeNet.caffemodel b/tools/accuracy_checker/data/test_models/SampLeNet.caffemodel
new file mode 100644
index 000000000..274a07282
--- /dev/null
+++ b/tools/accuracy_checker/data/test_models/SampLeNet.caffemodel
diff --git a/tools/accuracy_checker/data/test_models/SampLeNet.prototxt b/tools/accuracy_checker/data/test_models/SampLeNet.prototxt
new file mode 100644
index 000000000..d6b158f67
--- /dev/null
+++ b/tools/accuracy_checker/data/test_models/SampLeNet.prototxt
@@ -0,0 +1,116 @@
+name: "SampLeNet"
+
+layer {
+  name: "data"
+  type: "Input"
+  top: "data"
+  input_param { shape: { dim: 1 dim: 3 dim: 32 dim: 32 } }
+}
+
+layer {
+  name: "conv1"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv1"
+
+  convolution_param {
+    num_output: 6
+    kernel_size: 5
+    stride: 1
+  }
+}
+layer {
+  name: "relu_conv1"
+  type: "ReLU"
+  bottom: "conv1"
+  top: "conv1"
+}
+layer {
+  name: "pool1"
+  type: "Pooling"
+  bottom: "conv1"
+  top: "pool1"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+
+layer {
+  name: "conv2"
+  type: "Convolution"
+  bottom: "pool1"
+  top: "conv2"
+
+  convolution_param {
+    num_output: 16
+    kernel_size: 5
+    stride: 1
+  }
+}
+
+layer {
+  name: "relu_conv2"
+  type: "ReLU"
+  bottom: "conv2"
+  top: "conv2"
+}
+layer {
+  name: "pool2"
+  type: "Pooling"
+  bottom: "conv2"
+  top: "pool2"
+
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+
+layer {
+  name: "fc1"
+  type: "InnerProduct"
+  bottom: "pool2"
+  top: "fc1"
+
+  inner_product_param {
+    num_output: 120
+  }
+}
+layer {
+  name: "relu_fc1"
+  type: "ReLU"
+  bottom: "fc1"
+  top: "fc1"
+}
+
+layer {
+  name: "fc2"
+  type: "InnerProduct"
+  bottom: "fc1"
+  top: "fc2"
+
+  inner_product_param {
+    num_output: 84
+  }
+}
+
+layer {
+  name: "relu_fc2"
+  type: "ReLU"
+  bottom: "fc2"
+  top: "fc2"
+}
+
+layer {
+  name: "fc3"
+  type: "InnerProduct"
+  bottom: "fc2"
+  top: "fc3"
+
+  inner_product_param {
+    num_output: 10
+  }
+}
diff --git a/tools/accuracy_checker/data/test_models/SampLeNet.xml b/tools/accuracy_checker/data/test_models/SampLeNet.xml
new file mode 100644
index 000000000..f3d55eebc
--- /dev/null
+++ b/tools/accuracy_checker/data/test_models/SampLeNet.xml
@@ -0,0 +1,239 @@
+<?xml version="1.0" ?>
+<net batch="1" name="SampLeNet" version="2">
+	<layers>
+		<layer id="0" name="data" precision="FP32" type="Input">
+			<output>
+				<port id="0">
+					<dim>1</dim>
+					<dim>3</dim>
+					<dim>32</dim>
+					<dim>32</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="1" name="conv1" precision="FP32" type="Convolution">
+			<data dilation-x="1" dilation-y="1" group="1" kernel-x="5" kernel-y="5" output="6" pad-b="0" pad-r="0" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>3</dim>
+					<dim>32</dim>
+					<dim>32</dim>
+				</port>
+			</input>
+			<output>
+				<port id="3">
+					<dim>1</dim>
+					<dim>6</dim>
+					<dim>28</dim>
+					<dim>28</dim>
+				</port>
+			</output>
+			<blobs>
+				<weights offset="0" size="1800"/>
+				<biases offset="1800" size="24"/>
+			</blobs>
+		</layer>
+		<layer id="2" name="relu_conv1" precision="FP32" type="ReLU">
+			<data negative_slope="0.0"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>6</dim>
+					<dim>28</dim>
+					<dim>28</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>6</dim>
+					<dim>28</dim>
+					<dim>28</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="3" name="pool1" precision="FP32" type="Pooling">
+			<data exclude-pad="false" kernel-x="2" kernel-y="2" pad-b="0" pad-r="0" pad-x="0" pad-y="0" pool-method="max" rounding-type="ceil" stride="1,1,2,2" stride-x="2" stride-y="2"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>6</dim>
+					<dim>28</dim>
+					<dim>28</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>6</dim>
+					<dim>14</dim>
+					<dim>14</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="4" name="conv2" precision="FP32" type="Convolution">
+			<data dilation-x="1" dilation-y="1" group="1" kernel-x="5" kernel-y="5" output="16" pad-b="0" pad-r="0" pad-x="0" pad-y="0" stride="1,1,1,1" stride-x="1" stride-y="1"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>6</dim>
+					<dim>14</dim>
+					<dim>14</dim>
+				</port>
+			</input>
+			<output>
+				<port id="3">
+					<dim>1</dim>
+					<dim>16</dim>
+					<dim>10</dim>
+					<dim>10</dim>
+				</port>
+			</output>
+			<blobs>
+				<weights offset="1824" size="9600"/>
+				<biases offset="11424" size="64"/>
+			</blobs>
+		</layer>
+		<layer id="5" name="relu_conv2" precision="FP32" type="ReLU">
+			<data negative_slope="0.0"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>16</dim>
+					<dim>10</dim>
+					<dim>10</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>16</dim>
+					<dim>10</dim>
+					<dim>10</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="6" name="pool2" precision="FP32" type="Pooling">
+			<data exclude-pad="false" kernel-x="2" kernel-y="2" pad-b="0" pad-r="0" pad-x="0" pad-y="0" pool-method="max" rounding-type="ceil" stride="1,1,2,2" stride-x="2" stride-y="2"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>16</dim>
+					<dim>10</dim>
+					<dim>10</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>16</dim>
+					<dim>5</dim>
+					<dim>5</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="7" name="fc1" precision="FP32" type="FullyConnected">
+			<data out-size="120"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>16</dim>
+					<dim>5</dim>
+					<dim>5</dim>
+				</port>
+			</input>
+			<output>
+				<port id="3">
+					<dim>1</dim>
+					<dim>120</dim>
+				</port>
+			</output>
+			<blobs>
+				<weights offset="11488" size="192000"/>
+				<biases offset="203488" size="480"/>
+			</blobs>
+		</layer>
+		<layer id="8" name="relu_fc1" precision="FP32" type="ReLU">
+			<data negative_slope="0.0"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>120</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>120</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="9" name="fc2" precision="FP32" type="FullyConnected">
+			<data out-size="84"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>120</dim>
+				</port>
+			</input>
+			<output>
+				<port id="3">
+					<dim>1</dim>
+					<dim>84</dim>
+				</port>
+			</output>
+			<blobs>
+				<weights offset="203968" size="40320"/>
+				<biases offset="244288" size="336"/>
+			</blobs>
+		</layer>
+		<layer id="10" name="relu_fc2" precision="FP32" type="ReLU">
+			<data negative_slope="0.0"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>84</dim>
+				</port>
+			</input>
+			<output>
+				<port id="1">
+					<dim>1</dim>
+					<dim>84</dim>
+				</port>
+			</output>
+		</layer>
+		<layer id="11" name="fc3" precision="FP32" type="FullyConnected">
+			<data out-size="10"/>
+			<input>
+				<port id="0">
+					<dim>1</dim>
+					<dim>84</dim>
+				</port>
+			</input>
+			<output>
+				<port id="3">
+					<dim>1</dim>
+					<dim>10</dim>
+				</port>
+			</output>
+			<blobs>
+				<weights offset="244624" size="3360"/>
+				<biases offset="247984" size="40"/>
+			</blobs>
+		</layer>
+	</layers>
+	<edges>
+		<edge from-layer="0" from-port="0" to-layer="1" to-port="0"/>
+		<edge from-layer="1" from-port="3" to-layer="2" to-port="0"/>
+		<edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>
+		<edge from-layer="3" from-port="1" to-layer="4" to-port="0"/>
+		<edge from-layer="4" from-port="3" to-layer="5" to-port="0"/>
+		<edge from-layer="5" from-port="1" to-layer="6" to-port="0"/>
+		<edge from-layer="6" from-port="1" to-layer="7" to-port="0"/>
+		<edge from-layer="7" from-port="3" to-layer="8" to-port="0"/>
+		<edge from-layer="8" from-port="1" to-layer="9" to-port="0"/>
+		<edge from-layer="9" from-port="3" to-layer="10" to-port="0"/>
+		<edge from-layer="10" from-port="1" to-layer="11" to-port="0"/>
+	</edges>
+</net>
diff --git a/tools/accuracy_checker/pylint_checkers.py b/tools/accuracy_checker/pylint_checkers.py
new file mode 100644
index 000000000..a42ccd659
--- /dev/null
+++ b/tools/accuracy_checker/pylint_checkers.py
@@ -0,0 +1,144 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import astroid
+from pylint.checkers import BaseChecker
+from pylint.interfaces import IAstroidChecker, IRawChecker
+
+
+class BackslashChecker(BaseChecker):
+    """
+    Checks for line continuations with '\' instead of using triple quoted string or parenthesis.
+    """
+
+    __implements__ = IRawChecker
+
+    name = 'backslash'
+    msgs = {
+        'W9901': (
+            'use of \\ for line continuation', 'backslash-line-continuation',
+            'Used when a \\ is used for a line continuation instead of using triple quoted string or parenthesis.'
+        ),
+    }
+    options = ()
+
+    def process_module(self, node):
+        with node.stream() as stream:
+            for (line_number, line) in enumerate(stream):
+                if not line.decode().rstrip().endswith('\\'):
+                    continue
+
+                self.add_message('backslash-line-continuation', line=line_number)
+
+
+class AbsoluteImportsChecker(BaseChecker):
+    """
+    Check for absolute import from the same package.
+    """
+
+    __implements__ = IAstroidChecker
+
+    name = 'absolute-imports'
+    priority = -1
+    msgs = {
+        'W9902': (
+            'absolute import from same package', 'package-absolute-imports',
+            'Used when module of same package imported using absolute import'
+        )
+    }
+
+    def visit_importfrom(self, node):
+        node_package = self._node_package(node)
+        import_name = node.modname
+        if import_name.startswith(node_package):
+            self.add_message('package-absolute-imports', node=node)
+
+    @staticmethod
+    def _node_package(node):
+        return node.scope().name.split('.')[0]
+
+
+class StringFormatChecker(BaseChecker):
+    """
+    Check for absolute import from the same package.
+    """
+
+    __implements__ = IAstroidChecker
+
+    name = 'string-format'
+    priority = -1
+    msgs = {
+        'W9903': (
+            'use of "%" for string formatting', 'deprecated-string-format',
+            '"%" operator is used for string formatting instead of str.format method'
+        )
+    }
+
+    def visit_binop(self, node):
+        if node.op != '%':
+            return
+
+        left = node.left
+        if not (isinstance(left, astroid.Const) and isinstance(left.value, str)):
+            return
+
+        self.add_message('deprecated-string-format', node=node)
+
+
+class BadFunctionChecker(BaseChecker):
+    """
+    Check for absolute import from the same package.
+    """
+
+    __implements__ = IAstroidChecker
+
+    name = 'bad-function'
+    priority = -1
+    msgs = {'W9904': ('using prohibited function', 'bad-function-call', '')}
+
+    options = (
+        (
+            'bad-functions',
+            {
+                'default': '',
+                'help': 'List of prohibited functions',
+            },
+        ),
+    )
+
+    def visit_call(self, node):
+        bad_functions = set(f.strip() for f in self.config.bad_functions.split(','))
+        if self._function_name(node) in bad_functions:
+            self.add_message('bad-function-call', node=node)
+
+    @staticmethod
+    def _function_name(node):
+        func = node.func
+        if hasattr(func, 'attrname'):
+            return func.attrname
+        elif hasattr(func, 'name'):
+            return func.name
+
+
+def register(linter):
+    """
+    Required method to auto register this checker.
+    """
+
+    linter.register_checker(BackslashChecker(linter))
+    linter.register_checker(AbsoluteImportsChecker(linter))
+    linter.register_checker(StringFormatChecker(linter))
+    linter.register_checker(BadFunctionChecker(linter))
diff --git a/tools/accuracy_checker/requirements.txt b/tools/accuracy_checker/requirements.txt
new file mode 100644
index 000000000..16cc45760
--- /dev/null
+++ b/tools/accuracy_checker/requirements.txt
@@ -0,0 +1,9 @@
+ numpy
+tqdm
+PyYAML
+pillow
+scikit-learn
+scipy<=0.19
+py-cpuinfo
+shapely
+nibabel
diff --git a/tools/accuracy_checker/setup.cfg b/tools/accuracy_checker/setup.cfg
new file mode 100644
index 000000000..5d5a13c44
--- /dev/null
+++ b/tools/accuracy_checker/setup.cfg
@@ -0,0 +1,8 @@
+[flake8]
+max-line-length = 120
+ignore = F401
+
+[isort]
+line_length = 120
+use_parentheses = True
+known_third_party = openvino.inference_engine,caffe,cv2
diff --git a/tools/accuracy_checker/tests/__init__.py b/tools/accuracy_checker/tests/__init__.py
new file mode 100644
index 000000000..43d061dfd
--- /dev/null
+++ b/tools/accuracy_checker/tests/__init__.py
@@ -0,0 +1,16 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
diff --git a/tools/accuracy_checker/tests/common.py b/tools/accuracy_checker/tests/common.py
new file mode 100644
index 000000000..7a85f9123
--- /dev/null
+++ b/tools/accuracy_checker/tests/common.py
@@ -0,0 +1,139 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from contextlib import contextmanager
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import List
+
+import numpy as np
+
+from accuracy_checker.representation import DetectionAnnotation, DetectionPrediction, SegmentationPrediction, SegmentationAnnotation
+from accuracy_checker.utils import get_path
+
+
+@contextmanager
+# since it seems not possible to create pathlib.Path from str with '/' at the end we accept strings
+# expect paths in posix format
+def mock_filesystem(hierarchy: List[str]):
+    with TemporaryDirectory() as prefix:
+        for entry in hierarchy:
+            path = Path(prefix) / entry
+            if entry.endswith("/"):
+                path.mkdir(parents=True, exist_ok=True)
+            else:
+                parent = path.parent
+                if parent != Path("."):
+                    parent.mkdir(parents=True, exist_ok=True)
+                # create file
+                path.open('w').close()
+
+        yield get_path(prefix, is_directory=True)
+
+
+def make_representation(bounding_boxes, is_ground_truth=False, score=None, meta=None):
+    """
+    Args:
+        bounding_boxes: string or list of strings `score label x0 y0 x1 y1; label score x0 y0 x1 y1; ...`.
+        is_ground_truth: True if bbs are annotation boxes.
+        score: value in [0, 1], if not None, all prediction boxes are considered with the given score.
+        meta: metadata for representation
+    """
+
+    if not isinstance(bounding_boxes, list):
+        bounding_boxes = [bounding_boxes]
+
+    result = []
+    for idx, box in enumerate(bounding_boxes):
+        arr = np.array(np.mat(box))
+
+        if box == "":
+            arr = np.array([]).reshape((0, 5))
+
+        if is_ground_truth or score:
+            assert arr.shape[1] == 5
+        elif not is_ground_truth and not score:
+            assert arr.shape[1] == 6
+
+        if not is_ground_truth and score:
+            score_ = score
+            if np.isscalar(score_) or len(score_) == 1:
+                score_ = np.full(arr.shape[0], score_)
+            arr = np.c_[score_, arr]
+
+        if is_ground_truth:
+            detection = DetectionAnnotation(str(idx), arr[:, 0], arr[:, 1], arr[:, 2], arr[:, 3], arr[:, 4])
+        else:
+            detection = DetectionPrediction(str(idx), arr[:, 1], arr[:, 0], arr[:, 2], arr[:, 3], arr[:, 4], arr[:, 5])
+
+        if meta:
+            detection.metadata = meta[idx]
+
+        result.append(detection)
+
+    return result
+
+
+def make_segmentation_representation(mask, ground_truth=False):
+    if ground_truth:
+        representation = SegmentationAnnotation('identifier', None)
+        representation.mask = mask
+        return [representation]
+
+    return [SegmentationPrediction('identifier', mask)]
+
+
+def update_dict(dictionary, **kwargs):
+    copied = dictionary.copy()
+    copied.update(**kwargs)
+
+    return copied
+
+
+class DummyDataset:
+    def __init__(self, label_map, bg=-1):
+        self.label_map = label_map
+        self.background = bg
+
+    @property
+    def metadata(self):
+        return {"label_map": self.label_map, "background_label": self.background}
+
+    @property
+    def labels(self):
+        return self.metadata['label_map']
+
+
+# @pytest.fixture(scope="function", params=[
+#     {0: 'dog', -1: 'background'}, {0: 'dog', 1: 'cat', 2: 'human', -1: 'background'}, {0: 'dog', 1: 'cat', 2: 'human'}
+# ], ids=['single class', 'multi class', 'multi_class_without_background'])
+# def dataset(request):
+#     labels = request.param
+#     yield DummyDataset(label_map=labels, bg=-1)
+
+
+def multi_class_dataset():
+    labels = {0: 'dog', 1: 'cat', 2: 'human', -1: 'background'}
+    return DummyDataset(label_map=labels, bg=-1)
+
+
+def multi_class_dataset_without_background():
+    labels = {0: 'dog', 1: 'cat', 2: 'human'}
+    return DummyDataset(label_map=labels)
+
+
+def single_class_dataset():
+    labels = {0: 'dog', -1: 'background'}
+    return DummyDataset(label_map=labels, bg=-1)
diff --git a/tools/accuracy_checker/tests/conftest.py b/tools/accuracy_checker/tests/conftest.py
new file mode 100644
index 000000000..7657240d8
--- /dev/null
+++ b/tools/accuracy_checker/tests/conftest.py
@@ -0,0 +1,52 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+from pathlib import Path
+
+import pytest
+
+test_root = Path(__file__).parent
+project_root = test_root.parent
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--caffe_logging", action="store_true", default=False, help="Enable Google log"
+    )
+
+
+def pytest_configure(config):
+    if not config.getoption('caffe_logging'):
+        os.environ['GLOG_minloglevel'] = '2'
+
+
+@pytest.fixture
+def data_dir():
+    return project_root / 'data' / 'test_data'
+
+
+@pytest.fixture
+def models_dir():
+    return project_root / 'data' / 'test_models'
+
+
+@pytest.fixture
+def mock_path_exists(mocker):
+    mocker.patch('pathlib.Path.exists', return_value=True)
+    mocker.patch('pathlib.Path.is_dir', return_value=True)
+    mocker.patch('pathlib.Path.is_file', return_value=True)
+    mocker.patch('os.path.exists', return_value=True)
diff --git a/tools/accuracy_checker/tests/test_adapters.py b/tools/accuracy_checker/tests/test_adapters.py
new file mode 100644
index 000000000..9cb90f5f5
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_adapters.py
@@ -0,0 +1,121 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+import pytest
+
+from accuracy_checker.adapters import SSDAdapter, Adapter
+from accuracy_checker.config import ConfigError
+from .common import make_representation
+
+
+def test_detection_adapter():
+    raw = {
+        'detection_out': np.array([[[[0, 3, 0.2, 0, 0, 1, 1], [0, 2, 0.5, 4, 4, 7, 7], [0, 5, 0.7, 3, 3, 9, 8]]]])
+    }
+    expected = make_representation('0.2,3,0,0,1,1;0.5,2,4,4,7,7;0.7,5,3,3,9,8')
+
+    actual = SSDAdapter({}, output_blob='detection_out')([raw], ['0'])
+
+    assert np.array_equal(actual, expected)
+
+
+def test_detection_adapter_partially_filling_output_blob():
+    raw = {
+        'detection_out': np.array(
+            [[[[0, 3, 0.2, 0, 0, 1, 1], [0, 2, 0.5, 4, 4, 7, 7], [0, 5, 0.7, 3, 3, 9, 8], [-1, 0, 0, 0, 0, 0, 0]]]]
+        )
+    }
+    expected = make_representation('0.2,3,0,0,1,1;0.5,2,4,4,7,7;0.7,5,3,3,9,8')
+
+    actual = SSDAdapter({}, output_blob='detection_out')([raw], ['0'])
+
+    assert np.array_equal(actual, expected)
+
+
+def test_detection_adapter_partially_filling_output_blob_with_zeros_at_the_end():
+    raw = {
+        'detection_out': np.array([[[
+            [0,  3, 0.2, 0, 0, 1, 1],
+            [0,  2, 0.5, 4, 4, 7, 7],
+            [0,  5, 0.7, 3, 3, 9, 8],
+            [-1, 0, 0,   0, 0, 0, 0],
+            [0,  0, 0,   0, 0, 0, 0]
+        ]]])
+    }
+    expected = make_representation('0.2,3,0,0,1,1;0.5,2,4,4,7,7;0.7,5,3,3,9,8')
+
+    actual = SSDAdapter({}, output_blob='detection_out')([raw], ['0'])
+
+    assert np.array_equal(actual, expected)
+
+
+def test_detection_adapter_batch_2():
+    raw = {
+        'detection_out': np.array([[[[0, 3, 0.2, 0, 0, 1, 1], [0, 2, 0.5, 4, 4, 7, 7], [1, 5, 0.7, 3, 3, 9, 8]]]])
+    }
+    expected = make_representation(['0.2,3,0,0,1,1;0.5,2,4,4,7,7', '0.7,5,3,3,9,8'])
+
+    actual = SSDAdapter({}, output_blob='detection_out')([raw], ['0', '1'])
+
+    assert np.array_equal(actual, expected)
+
+
+def test_dictionary_adapter_no_raise_warning_on_specific_args():
+    adapter_config = {'type': 'age_gender', 'gender_out': 'gender', 'age_out': 'age'}
+    with pytest.warns(None) as record:
+        Adapter.provide('age_gender', adapter_config)
+        assert len(record) == 0
+
+
+def test_age_gender_adapter_raise_config_error_on_extra_args():
+    adapter_config = {'type': 'age_gender', 'gender_out': 'gender', 'age_out': 'age', 'something_extra': 'extra'}
+    with pytest.raises(ConfigError):
+        Adapter.provide('age_gender', adapter_config)
+
+
+def test_face_person_detection_adapter_raise_config_error_on_extra_args():
+    adapter_config = {
+        'type': 'face_person_detection',
+        'face_detection_out': 'face',
+        'person_detection_out': 'person',
+        'something_extra': 'extra'
+    }
+    with pytest.raises(ConfigError):
+        Adapter.provide('face_person_detection', adapter_config)
+
+
+def test_head_pose_adapter_raise_config_error_on_extra_args():
+    adapter_config = {
+        'type': 'head_pose',
+        'angle_yaw': 'yaw',
+        'angle_pitch': 'pitch',
+        'angle_roll': 'roll',
+        'something_extra': 'extra'
+    }
+    with pytest.raises(ConfigError):
+        Adapter.provide('head_pose', adapter_config)
+
+
+def test_vehicle_attributes_adapter_raise_config_error_on_extra_args():
+    adapter_config = {
+        'type': 'vehicle_attributes',
+        'color_out': 'color',
+        'type_out': 'type',
+        'something_extra': 'extra'
+    }
+    with pytest.raises(ConfigError):
+        Adapter.provide('vehicle_attributes', adapter_config)
diff --git a/tools/accuracy_checker/tests/test_caffe_launcher.py b/tools/accuracy_checker/tests/test_caffe_launcher.py
new file mode 100644
index 000000000..205fb7b5e
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_caffe_launcher.py
@@ -0,0 +1,77 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pytest
+pytest.importorskip('accuracy_checker.launcher.caffe_launcher')
+
+import cv2
+import numpy as np
+
+from accuracy_checker.launcher.launcher import create_launcher
+from accuracy_checker.config import ConfigError
+from accuracy_checker.dataset import DataRepresentation
+
+
+def get_caffe_test_model(models_dir):
+    config = {
+        "framework": "caffe",
+        "weights": str(models_dir / "SampLeNet.caffemodel"),
+        "model": str(models_dir / "SampLeNet.prototxt"),
+        "adapter": 'classification',
+        "device": "cpu"
+    }
+
+    return create_launcher(config)
+
+
+class TestCaffeLauncher:
+    def test_launcher_creates(self, models_dir):
+        assert get_caffe_test_model(models_dir).inputs['data'] == (3, 32, 32)
+
+    def test_infer(self, data_dir, models_dir):
+        caffe_test_model = get_caffe_test_model(models_dir)
+        c, h, w = caffe_test_model.inputs['data']
+        img_raw = cv2.imread(str(data_dir / '1.jpg'))
+        img_resized = cv2.resize(img_raw, (w, h))
+        res = caffe_test_model.predict(['1.jpg'], [DataRepresentation(img_resized)])
+
+        assert res[0].label == 6
+
+    def test_caffe_launcher_provide_input_shape_to_adapter(self, mocker, models_dir):
+        mocker.patch('caffe.Net.forward', return_value={'fc3': 0})
+        adapter_mock = mocker.patch('accuracy_checker.adapters.ClassificationAdapter.process')
+        launcher = get_caffe_test_model(models_dir)
+        launcher.predict(['1.png'], [DataRepresentation(np.zeros((32, 32, 3)))])
+        adapter_mock.assert_called_once_with([{'fc3': 0}], ['1.png'], [{'input_shape': {'data': (3, 32, 32)}, 'image_size': (32, 32, 3)}])
+
+
+
+def test_missed_model_in_create_caffe_launcher_raises_config_error_exception():
+    launcher = {'framework': 'caffe', 'weights': 'custom', 'adapter': 'classification'}
+
+    with pytest.raises(ConfigError):
+        create_launcher(launcher)
+
+
+def test_missed_weights_in_create_caffe_launcher_raises_config_error_exception():
+    launcher = {'framework': 'caffe', 'model': 'custom', 'adapter': 'ssd'}
+
+    with pytest.raises(ConfigError):
+        create_launcher(launcher)
+
+
+def dummy_adapter():
+    pass
diff --git a/tools/accuracy_checker/tests/test_config_reader.py b/tools/accuracy_checker/tests/test_config_reader.py
new file mode 100644
index 000000000..9b364d8f4
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_config_reader.py
@@ -0,0 +1,1014 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import copy
+from pathlib import Path
+from argparse import Namespace
+
+import pytest
+from accuracy_checker.config import ConfigReader, ConfigError
+
+
+class TestConfigReader:
+    def setup_method(self):
+        self.global_launchers = [
+            {
+                'framework': 'dlsdk',
+                'device': 'fpga',
+                'cpu_extensions': 'dlsdk_shared.so',
+                'bitstream': 'bitstream'
+            },
+            {
+                'framework': 'caffe',
+                'device': 'gpu_0'
+            }
+        ]
+
+        self.global_datasets = [
+            {
+                'name': 'global_dataset',
+                'annotation': Path('/pascal_voc_2007_annotation.pickle'),
+                'data_source': Path('/VOCdevkit/VOC2007/JPEGImages'),
+                'preprocessing': [
+                    {
+                        'type': 'resize',
+                        'interpolation': 'mean_image',
+                    },
+                    {
+                        'type': 'normalization',
+                        'mean': 'voc',
+                    }
+                ],
+                'metrics': [{
+                    'type': 'fppi',
+                    'mr_rates': [0.0, 0.1]
+                }],
+                'postprocessing': [
+                    {
+                        'type': 'filter',
+                        'labels': ['dog', 'airplane'],
+                        'min_confidence': 0.05,
+                        'min_box_size': 60,
+                    },
+                    {
+                        'type': 'nms',
+                        'overlap': 0.5
+                    }
+                ]
+            }
+        ]
+
+        self.global_config = {
+            'launchers': self.global_launchers,
+            'datasets': self.global_datasets
+        }
+
+        self.module = 'accuracy_checker.config.ConfigReader'
+        self.arguments = Namespace(**{
+            'models': Path('models'),
+            'extensions': Path('extensions'),
+            'source': Path('source'),
+            'annotations': Path('annotations'),
+            'converted_models': Path('converted_models'),
+            'model_optimizer': Path('model_optimizer'),
+            'bitstreams': Path('bitstreams'),
+            'definitions': None,
+            'stored_predictions': None,
+            'tf_custom_op_config': None,
+            'tf_obj_detection_api_pipeline_config_path': None,
+            'progress': 'bar',
+            'target_framework': None,
+            'target_devices': None,
+            'log_file': None,
+            'target_tags': None,
+            'cpu_extensions_mode': None,
+            'aocl': None
+        })
+
+    def test_read_configs_without_global_config(self, mocker):
+        config = {'models': [{
+            'name': 'model',
+            'launchers': [{'framework': 'dlsdk', 'model': Path('/absolute_path'), 'weights': Path('/absolute_path')}],
+            'datasets': [{'name': 'global_dataset'}]
+        }]}
+        empty_args = Namespace(**{
+            'models': None, 'extensions': None, 'source': None, 'annotations': None,
+            'converted_models': None, 'model_optimizer': None, 'bitstreams': None,
+            'definitions': None, 'config': None, 'stored_predictions': None, 'tf_custom_op_config': None,
+            'progress': 'bar', 'target_framework': None, 'target_devices': None, 'log_file': None,
+            'tf_obj_detection_api_pipeline_config_path': None, 'target_tags': None, 'cpu_extensions_mode': None,
+            'aocl': None
+        })
+        mocker.patch('accuracy_checker.utils.get_path', return_value=Path.cwd())
+        mocker.patch('yaml.load', return_value=config)
+        mocker.patch('pathlib.Path.open')
+
+        result = ConfigReader.merge(empty_args)
+
+        assert config == result
+
+    def test_empty_local_config_raises_value_error_exception(self, mocker):
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, {}
+        ))
+
+        with pytest.raises(ConfigError) as exception:
+            ConfigReader.merge(self.arguments)
+
+        error_message = str(exception).split(sep=': ')[-1]
+        assert error_message == 'Missing local config'
+
+    def test_missed_models_in_local_config_raises_value_error_exception(self, mocker):
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, {'not_models': 'custom'}
+        ))
+
+        with pytest.raises(ConfigError) as exception:
+            ConfigReader.merge(self.arguments)
+
+        error_message = str(exception).split(sep=': ')[-1]
+        assert error_message == 'Missed "{}" in local config'.format('models')
+
+    def test_empty_models_in_local_config_raises_value_error_exception(self, mocker):
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, {'models': []}
+        ))
+
+        with pytest.raises(ConfigError) as exception:
+            ConfigReader.merge(self.arguments)
+
+        error_message = str(exception).split(sep=': ')[-1]
+        assert error_message == 'Missed "{}" in local config'.format('models')
+
+    def test_missed_name_in_model_raises_value_error_exception(self, mocker):
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, {'models': [{'launchers': None, 'datasets': None}]}
+        ))
+
+        with pytest.raises(ConfigError) as exception:
+            ConfigReader.merge(self.arguments)
+
+        error_message = str(exception).split(sep=': ')[-1]
+        assert error_message == 'Each model must specify {}'.format(['name', 'launchers', 'datasets'])
+
+    def test_missed_launchers_in_model_raises_value_error_exception(self, mocker):
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, {'models': [{'name': None, 'datasets': None}]}
+        ))
+
+        with pytest.raises(ConfigError) as exception:
+            ConfigReader.merge(self.arguments)
+
+        error_message = str(exception).split(sep=': ')[-1]
+        assert error_message == 'Each model must specify {}'.format(['name', 'launchers', 'datasets'])
+
+    def test_missed_datasets_in_model_raises_value_error_exception(self, mocker):
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, {'models': [{'name': None, 'launchers': None}]}
+        ))
+
+        with pytest.raises(ConfigError) as exception:
+            ConfigReader.merge(self.arguments)
+
+        error_message = str(exception).split(sep=': ')[-1]
+        assert error_message == 'Each model must specify {}'.format(['name', 'launchers', 'datasets'])
+
+    def test_invalid_model_raises_value_error_exception(self, mocker):
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, {'models': [{'name': None, 'launchers': None, 'datasets': None}]}
+        ))
+
+        with pytest.raises(ConfigError) as exception:
+            ConfigReader.merge(self.arguments)
+
+        error_message = str(exception).split(sep=': ')[-1]
+        assert error_message == 'Each model must specify {}'.format(['name', 'launchers', 'datasets'])
+
+    def test_merge_datasets_with_definitions(self, mocker):
+        local_config = {'models': [{
+            'name': 'model',
+            'launchers': [{'framework': 'dlsdk', 'model': '/absolute_path', 'weights': '/absolute_path'}],
+            'datasets': [{'name': 'global_dataset'}]
+        }]}
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, local_config
+        ))
+        arguments = copy.deepcopy(self.arguments)
+        arguments.model_optimizer = None
+
+        config = ConfigReader.merge(arguments)
+
+        assert config['models'][0]['datasets'][0] == self.global_datasets[0]
+
+    def test_merge_datasets_with_definitions_and_meta_is_not_modified(self, mocker):
+        local_config = {'models': [{
+            'name': 'model',
+            'launchers': [{'framework': 'dlsdk', 'model': '/absolute_path', 'weights': '/absolute_path'}],
+            'datasets': [{'name': 'global_dataset', 'dataset_meta': '/absolute_path'}]
+        }]}
+        expected = self.global_datasets[0]
+        expected['dataset_meta'] = Path('/absolute_path')
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, local_config
+        ))
+
+        config = ConfigReader.merge(self.arguments)
+
+        assert config['models'][0]['datasets'][0] == expected
+
+    def test_expand_relative_paths_in_datasets_config_using_command_line(self, mocker):
+        local_config = {'models': [{
+            'name': 'model',
+            'launchers': [{'framework': 'caffe'}],
+            'datasets': [{
+                'name': 'global_dataset',
+                'dataset_meta': 'relative_annotation_path',
+                'data_source': 'relative_source_path',
+                'segmentation_masks_source': 'relative_source_path',
+                'annotation': 'relative_annotation_path'
+            }]
+        }]}
+
+        mocker.patch(self.module + '._read_configs', return_value=(
+            None, local_config
+        ))
+        expected = copy.deepcopy(local_config['models'][0]['datasets'][0])
+        expected['annotation'] = self.arguments.annotations / 'relative_annotation_path'
+        expected['dataset_meta'] = self.arguments.annotations / 'relative_annotation_path'
+        expected['segmentation_masks_source'] = self.arguments.source / 'relative_source_path'
+        expected['data_source'] = self.arguments.source / 'relative_source_path'
+
+        config = ConfigReader.merge(self.arguments)
+
+        assert config['models'][0]['datasets'][0] == expected
+
+    def test_not_modify_absolute_paths_in_datasets_config_using_command_line(self):
+        local_config = {'models': [{
+            'name': 'model',
+            'datasets': [{
+                'name': 'global_dataset',
+                'dataset_meta': '/absolute_annotation_meta_path',
+                'data_source': '/absolute_source_path',
+                'annotation': '/absolute_annotation_path',
+            }]
+        }]}
+
+        expected = copy.deepcopy(local_config['models'][0]['datasets'][0])
+        expected['annotation'] = Path('/absolute_annotation_path')
+        expected['dataset_meta'] = Path('/absolute_annotation_meta_path')
+        expected['data_source'] = Path('/absolute_source_path')
+
+        ConfigReader._merge_paths_with_prefixes(self.arguments, local_config)
+
+        assert local_config['models'][0]['datasets'][0] == expected
+
+    def test_merge_launchers_with_definitions(self, mocker):
+        local_config = {'models': [{
+            'name': 'model',
+            'launchers': [{'framework': 'dlsdk'}],
+            'datasets': [{'name': 'global_dataset'}]
+        }]}
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, local_config
+        ))
+        expected = copy.deepcopy(self.get_global_launcher('dlsdk'))
+        expected['bitstream'] = self.arguments.bitstreams / expected['bitstream']
+        expected['cpu_extensions'] = self.arguments.extensions / expected['cpu_extensions']
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.converted_models = None
+        args.models = None
+
+        config = ConfigReader.merge(args)
+
+        assert config['models'][0]['launchers'][0] == expected
+
+    def test_merge_launchers_with_model_is_not_modified(self, mocker):
+        local_config = {'models': [{
+            'name': 'model',
+            'launchers': [{'framework': 'dlsdk', 'model': 'custom'}],
+            'datasets': [{'name': 'global_dataset'}]
+        }]}
+        expected = copy.deepcopy(self.get_global_launcher('dlsdk'))
+        expected['model'] = 'custom'
+        expected['bitstream'] = self.arguments.bitstreams / expected['bitstream']
+        expected['cpu_extensions'] = self.arguments.extensions / expected['cpu_extensions']
+        mocker.patch(self.module + '._read_configs', return_value=(
+            self.global_config, local_config
+        ))
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.models = None
+        args.converted_models = None
+        config = ConfigReader.merge(args)
+
+        assert config['models'][0]['launchers'][0] == expected
+
+    def test_expand_relative_paths_in_launchers_config_using_command_line(self, mocker):
+        local_config = {'models': [{
+            'name': 'model',
+            'launchers': [{
+                'framework': 'dlsdk',
+                'model': 'relative_model_path',
+                'weights': 'relative_weights_path',
+                'cpu_extensions': 'relative_extensions_path',
+                'gpu_extensions': 'relative_extensions_path',
+                'caffe_model': 'relative_model_path',
+                'caffe_weights': 'relative_weights_path',
+                'tf_model': 'relative_model_path',
+                'mxnet_weights': 'relative_weights_path',
+                'bitstream': 'relative_bitstreams_path'
+            }],
+            'datasets': [{'name': 'dataset'}]
+        }]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+
+        expected = copy.deepcopy(local_config['models'][0]['launchers'][0])
+        expected['model'] = self.arguments.models / 'relative_model_path'
+        expected['caffe_model'] = self.arguments.models / 'relative_model_path'
+        expected['tf_model'] = self.arguments.models / 'relative_model_path'
+        expected['weights'] = self.arguments.models / 'relative_weights_path'
+        expected['caffe_weights'] = self.arguments.models / 'relative_weights_path'
+        expected['mxnet_weights'] = self.arguments.models / 'relative_weights_path'
+        expected['cpu_extensions'] = self.arguments.extensions / 'relative_extensions_path'
+        expected['gpu_extensions'] = self.arguments.extensions / 'relative_extensions_path'
+        expected['bitstream'] = self.arguments.bitstreams / 'relative_bitstreams_path'
+        expected['_models_prefix'] = self.arguments.models
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.converted_models = None
+        config = ConfigReader.merge(args)
+
+        assert config['models'][0]['launchers'][0] == expected
+
+    def test_both_launchers_are_filtered_by_target_tags_if_tags_not_provided_in_config(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': '/absolute_path1',
+                'weights': '/absolute_path1',
+                'adapter': 'classification',
+                'device': 'CPU',
+            },
+            {
+                'framework': 'dlsdk',
+                'model': '/absolute_path2',
+                'weights': '/absolute_path2',
+                'adapter': 'classification',
+                'device': 'GPU',
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        self.arguments.target_tags = ['some_tag']
+
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+
+        with pytest.warns(Warning):
+            config = ConfigReader.merge(self.arguments)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 0
+
+    def test_launcher_is_not_filtered_by_the_same_tag(self, mocker):
+        config_launchers = [{
+            'framework': 'dlsdk',
+            'tags': ['some_tag'],
+            'model': Path('/absolute_path1'),
+            'weights': Path('/absolute_path1'),
+            'adapter': 'classification',
+            'device': 'CPU',
+            '_model_optimizer': self.arguments.model_optimizer,
+            '_models_prefix': self.arguments.models
+        }]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.converted_models = None
+        args.target_tags = ['some_tag']
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert launchers[0] == config_launchers[0]
+
+    def test_both_launchers_are_not_filtered_by_the_same_tag(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'tags': ['some_tag'],
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'dlsdk',
+                'tags': ['some_tag'],
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'GPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.converted_models = None
+        args.target_tags = ['some_tag']
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert launchers == config_launchers
+
+    def test_both_launchers_are_filtered_by_another_tag(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'tags': ['some_tag'],
+                'model': '/absolute_path1',
+                'weights': '/absolute_path1',
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'dlsdk',
+                'tags': ['some_tag'],
+                'model': '/absolute_path2',
+                'weights': '/absolute_path2',
+                'adapter': 'classification',
+                'device': 'GPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.converted_models = None
+        args.target_tags = ['other_tag']
+
+        with pytest.warns(Warning):
+            config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 0
+
+    def test_only_appropriate_launcher_is_filtered_by_another_tag(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'tags': ['tag1'],
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'caffe',
+                'tags': ['tag2'],
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'GPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        self.arguments.target_tags = ['tag2']
+
+        config = ConfigReader.merge(self.arguments)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 1
+        assert launchers[0] == config_launchers[1]
+
+    def test_only_appropriate_launcher_is_filtered_by_another_tag_if_provided_several_target_tags(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'tags': ['tag1'],
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'caffe',
+                'tags': ['tag2'],
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'GPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        self.arguments.target_tags = ['tag2', 'tag3']
+
+        config = ConfigReader.merge(self.arguments)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 1
+        assert launchers[0] == config_launchers[1]
+
+    def test_launcher_with_several_tags_contained_at_least_one_from_target_tegs_is_not_filtered(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'tags': ['tag1', 'tag2'],
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.converted_models = None
+        args.target_tags = ['tag2']
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 1
+        assert launchers[0] == config_launchers[0]
+
+    def test_both_launchers_with_different_tags_are_not_filtered_by_the_same_tags(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'tags': ['tag1'],
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'dlsdk',
+                'tags': ['tag2'],
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'GPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.converted_models = None
+        args.target_tags = ['tag1', 'tag2']
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert launchers == config_launchers
+
+    def test_launcher_is_not_filtered_by_the_same_framework(self, mocker):
+        config_launchers = [{
+            'framework': 'dlsdk',
+            'model': Path('/absolute_path1'),
+            'weights': Path('/absolute_path1'),
+            'adapter': 'classification',
+            'device': 'CPU',
+            '_model_optimizer': self.arguments.model_optimizer,
+            '_models_prefix': self.arguments.models
+        }]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.converted_models = None
+        args.target_framework = 'dlsdk'
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert launchers == config_launchers
+
+    def test_both_launchers_are_not_filtered_by_the_same_framework(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'dlsdk',
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'GPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.converted_models = None
+        args.target_framework = 'dlsdk'
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert launchers == config_launchers
+
+    def test_launcher_is_filtered_by_another_framework(self, mocker):
+        config_launchers = [{
+            'framework': 'dlsdk',
+            'model': Path('/absolute_path'),
+            'weights': Path('/absolute_path'),
+            'adapter': 'classification',
+            '_model_optimizer': self.arguments.model_optimizer,
+            '_models_prefix': self.arguments.models
+        }]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        self.arguments.target_framework = 'caffe'
+
+        with pytest.warns(Warning):
+            config = ConfigReader.merge(self.arguments)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 0
+
+    def test_both_launchers_are_filtered_by_another_framework(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': '/absolute_path1',
+                'weights': '/absolute_path1',
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'dlsdk',
+                'model': '/absolute_path2',
+                'weights': '/absolute_path2',
+                'adapter': 'classification',
+                'device': 'GPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        self.arguments.target_framework = 'caffe'
+
+        with pytest.warns(Warning):
+            config = ConfigReader.merge(self.arguments)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 0
+
+    def test_only_appropriate_launcher_is_filtered_by_another_framework(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'caffe',
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'GPU'
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        self.arguments.target_framework = 'caffe'
+
+        config = ConfigReader.merge(self.arguments)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 1
+        assert launchers[0] == config_launchers[1]
+
+    def test_launcher_is_not_filtered_by_the_same_device(self, mocker):
+        config_launchers = [{
+            'framework': 'dlsdk',
+            'model': Path('/absolute_path1'),
+            'weights': Path('/absolute_path1'),
+            'adapter': 'classification',
+            'device': 'CPU',
+            '_model_optimizer': self.arguments.model_optimizer,
+            '_models_prefix': self.arguments.models
+        }]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.model_optimizer = None
+        args.converted_models = None
+        args.target_devices = ['CPU']
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert launchers == config_launchers
+
+    def test_both_launchers_are_not_filtered_by_the_same_device(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'caffe',
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'CPU'
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.converted_models = None
+        args.target_devices = ['CPU']
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert launchers == config_launchers
+
+    def test_launcher_is_filtered_by_another_device(self, mocker):
+        config_launchers = [{
+            'framework': 'dlsdk',
+            'model': Path('/absolute_path1'),
+            'weights': Path('/absolute_path1'),
+            'adapter': 'classification',
+            'device': 'CPU',
+            '_model_optimizer': self.arguments.model_optimizer,
+            '_models_prefix': self.arguments.models
+        }]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.converted_models = None
+        args.target_devices = ['GPU']
+
+        with pytest.warns(Warning):
+            config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 0
+
+    def test_both_launchers_are_filtered_by_another_device(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'caffe',
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'CPU'
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        self.arguments.target_devices = ['GPU']
+
+        with pytest.warns(Warning):
+            config = ConfigReader.merge(self.arguments)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 0
+
+    def test_only_appropriate_launcher_is_filtered_by_another_device(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'caffe',
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'GPU'
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.converted_models = None
+        args.target_devices = ['GPU']
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 1
+        assert launchers[0] == config_launchers[1]
+
+    def test_only_appropriate_launcher_is_filtered_by_user_input_devices(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'dlsdk',
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'HETERO:CPU,GPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'caffe',
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'GPU',
+            }
+        ]
+
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.converted_models = None
+        args.target_devices = ['GPU', 'CPU']
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert launchers == [config_launchers[0], config_launchers[2]]
+
+    def test_both_launchers_are_filtered_by_other_devices(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': '/absolute_path1',
+                'weights': '/absolute_path1',
+                'adapter': 'classification',
+                'device': 'CPU',
+            },
+            {
+                'framework': 'caffe',
+                'model': '/absolute_path2',
+                'weights': '/absolute_path2',
+                'adapter': 'classification',
+                'device': 'CPU'
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        self.arguments.target_devices = ['FPGA', 'MYRIAD']
+
+        with pytest.warns(Warning):
+            config = ConfigReader.merge(self.arguments)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 0
+
+    def test_both_launchers_are_not_filtered_by_same_devices(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'caffe',
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'GPU'
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.converted_models = None
+        args.target_devices = ['GPU', 'CPU']
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert launchers == config_launchers
+
+    def test_launcher_is_not_filtered_by_device_with_tail(self, mocker):
+        config_launchers = [
+            {
+                'framework': 'dlsdk',
+                'model': Path('/absolute_path1'),
+                'weights': Path('/absolute_path1'),
+                'adapter': 'classification',
+                'device': 'CPU',
+                '_model_optimizer': self.arguments.model_optimizer,
+                '_models_prefix': self.arguments.models
+            },
+            {
+                'framework': 'caffe',
+                'model': Path('/absolute_path2'),
+                'weights': Path('/absolute_path2'),
+                'adapter': 'classification',
+                'device': 'GPU'
+            }
+        ]
+        local_config = {'models': [{'name': 'name', 'launchers': config_launchers, 'datasets': [{'name': 'dataset'}]}]}
+        mocker.patch(self.module + '._read_configs', return_value=(None, local_config))
+        args = copy.deepcopy(self.arguments)
+        args.converted_models = None
+        args.target_devices = ['CPU', 'GPU_unexpected_tail']
+
+        config = ConfigReader.merge(args)
+
+        launchers = config['models'][0]['launchers']
+        assert len(launchers) == 1
+        assert launchers[0] == config_launchers[0]
+
+    def get_global_launcher(self, framework):
+        for launcher in self.global_launchers:
+            if launcher['framework'] == framework:
+                return launcher
+
+        raise ValueError('Undefined global launcher with framework = "{}"'.format(framework))
+
+    def get_global_dataset(self, name):
+        for dataset in self.global_datasets:
+            if dataset['name'] == name:
+                return dataset
+
+        raise ValueError('Undefined global dataset with name = "{}"'.format(name))
diff --git a/tools/accuracy_checker/tests/test_config_validator.py b/tools/accuracy_checker/tests/test_config_validator.py
new file mode 100644
index 000000000..29f2f6b74
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_config_validator.py
@@ -0,0 +1,379 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from math import inf, nan
+from pathlib import Path
+from unittest.mock import ANY
+
+import pytest
+from accuracy_checker.config.config_validator import (
+    ConfigError,
+    ConfigValidator,
+    DictField,
+    ListField,
+    NumberField,
+    PathField,
+    StringField
+)
+from tests.common import mock_filesystem
+
+
+class TestStringField:
+    def test_expects_string(self):
+        string_field = StringField()
+
+        with pytest.raises(ConfigError):
+            string_field.validate(b"foo")
+        with pytest.raises(ConfigError):
+            string_field.validate({})
+        with pytest.raises(ConfigError):
+            string_field.validate(42)
+
+        string_field.validate("foo")
+
+    def test_choices(self):
+        string_field = StringField(choices=['foo', 'bar'])
+
+        with pytest.raises(ConfigError):
+            string_field.validate('baz')
+
+        string_field.validate('bar')
+
+    def test_case_sensitive(self):
+        string_field = StringField(choices=['foo', 'bar'], case_sensitive=False)
+
+        string_field.validate('foo')
+        string_field.validate('FOO')
+
+        string_field = StringField(choices=['foo', 'bar'], case_sensitive=True)
+
+        string_field.validate('foo')
+        with pytest.raises(ConfigError):
+            string_field.validate('FOO')
+
+    def test_regex(self):
+        string_field = StringField(regex=r'foo\d*')
+
+        string_field.validate('foo')
+        string_field.validate('foo42')
+
+        with pytest.raises(ConfigError):
+            string_field.validate('baz')
+
+    def test_custom_exception(self, mocker):
+        stub = mocker.stub(name='custom_on_error')
+        string_field = StringField(choices=['foo'], on_error=stub)
+
+        with pytest.raises(ConfigError):
+            string_field.validate('bar', 'foo')
+        stub.assert_called_once_with('bar', 'foo', ANY)
+
+    def test_custom_validator(self, mocker):
+        stub = mocker.stub(name='custom_validator')
+        string_field = StringField(choices=['foo'], additional_validator=stub)
+
+        string_field.validate('foo', 'baz')
+        stub.assert_called_once_with('foo', 'baz')
+
+
+class TestNumberField:
+    def test_expects_number(self):
+        number_field = NumberField(floats=True)
+
+        number_field.validate(1.0)
+        with pytest.raises(ConfigError):
+            number_field.validate("foo")
+        with pytest.raises(ConfigError):
+            number_field.validate({})
+        with pytest.raises(ConfigError):
+            number_field.validate([])
+
+        number_field = NumberField(floats=False)
+        number_field.validate(1)
+        with pytest.raises(ConfigError):
+            number_field.validate(1.0)
+
+    def test_nans(self):
+        number_field = NumberField(allow_nan=True)
+        number_field.validate(nan)
+
+        number_field = NumberField(allow_nan=False)
+        with pytest.raises(ConfigError):
+            number_field.validate(nan)
+
+    def test_infinity(self):
+        number_field = NumberField(allow_inf=True)
+        number_field.validate(inf)
+
+        number_field = NumberField(allow_inf=False)
+        with pytest.raises(ConfigError):
+            number_field.validate(inf)
+
+    def test_ranges(self):
+        number_field = NumberField(min_value=0, max_value=5)
+
+        number_field.validate(0)
+        number_field.validate(1)
+        number_field.validate(2)
+
+        with pytest.raises(ConfigError):
+            number_field.validate(-1)
+        with pytest.raises(ConfigError):
+            number_field.validate(7)
+
+
+class TestDictField:
+    def test_expects_dict(self):
+        dict_field = DictField()
+
+        dict_field.validate({})
+        with pytest.raises(ConfigError):
+            dict_field.validate("foo")
+        with pytest.raises(ConfigError):
+            dict_field.validate(42)
+        with pytest.raises(ConfigError):
+            dict_field.validate([])
+
+    def test_validates_keys(self):
+        dict_field = DictField()
+        dict_field.validate({'foo': 42, 1: 'bar'})
+
+        dict_field = DictField(key_type=str)
+        dict_field.validate({'foo': 42, 'bar': 'bar'})
+        with pytest.raises(ConfigError):
+            dict_field.validate({'foo': 42, 1: 'bar'})
+
+        dict_field = DictField(key_type=StringField(choices=['foo', 'bar']))
+        dict_field.validate({'foo': 42, 'bar': 42})
+        with pytest.raises(ConfigError):
+            dict_field.validate({'foo': 42, 1: 'bar'})
+        with pytest.raises(ConfigError):
+            dict_field.validate({'foo': 42, 'baz': 42})
+
+    def test_validates_values(self):
+        dict_field = DictField()
+        dict_field.validate({'foo': 42, 1: 'bar'})
+
+        dict_field = DictField(value_type=str)
+        dict_field.validate({'foo': 'foo', 1: 'bar'})
+        with pytest.raises(ConfigError):
+            dict_field.validate({'foo': 42, 1: 2})
+
+        dict_field = DictField(value_type=StringField(choices=['foo', 'bar']))
+        dict_field.validate({1: 'foo', 'bar': 'bar'})
+        with pytest.raises(ConfigError):
+            dict_field.validate({1: 'foo', 2: 3})
+        with pytest.raises(ConfigError):
+            dict_field.validate({1: 'foo', 2: 'baz'})
+
+    def test_converts_basic_types(self):
+        dict_field = DictField(value_type=str)
+        assert isinstance(dict_field.value_type, StringField)
+
+        dict_field = DictField(value_type=int)
+        assert isinstance(dict_field.value_type, NumberField)
+        assert dict_field.value_type.floats is False
+
+        dict_field = DictField(value_type=float)
+        assert isinstance(dict_field.value_type, NumberField)
+        assert dict_field.value_type.floats is True
+
+        dict_field = DictField(value_type=list)
+        assert isinstance(dict_field.value_type, ListField)
+
+        dict_field = DictField(value_type=dict)
+        assert isinstance(dict_field.value_type, DictField)
+
+        dict_field = DictField(value_type=Path)
+        assert isinstance(dict_field.value_type, PathField)
+
+    def test_empty(self):
+        dict_field = DictField()
+        dict_field.validate({})
+
+        dict_field = DictField(allow_empty=False)
+        with pytest.raises(ConfigError):
+            dict_field.validate({})
+
+
+class TestListField:
+    def test_expects_list(self):
+        list_field = ListField()
+
+        list_field.validate([])
+        with pytest.raises(ConfigError):
+            list_field.validate("foo")
+        with pytest.raises(ConfigError):
+            list_field.validate(42)
+        with pytest.raises(ConfigError):
+            list_field.validate({})
+
+    def test_validates_values(self):
+        list_field = ListField()
+        list_field.validate(['foo', 42])
+
+        list_field = ListField(value_type=str)
+        list_field.validate(['foo', 'bar'])
+        with pytest.raises(ConfigError):
+            list_field.validate(['foo', 42])
+
+        list_field = ListField(value_type=StringField(choices=['foo', 'bar']))
+        list_field.validate(['foo', 'bar'])
+        with pytest.raises(ConfigError):
+            list_field.validate(['foo', 42])
+        with pytest.raises(ConfigError):
+            list_field.validate(['foo', 'bar', 'baz'])
+
+    def test_empty(self):
+        list_field = ListField()
+        list_field.validate([])
+
+        list_field = ListField(allow_empty=False)
+        with pytest.raises(ConfigError):
+            list_field.validate([])
+
+
+class TestPathField:
+    @pytest.mark.usefixtures('mock_path_exists')
+    def test_expects_path_like(self):
+        path_field = PathField()
+        path_field.validate('foo/bar')
+        path_field.validate('/home/user')
+        path_field.validate(Path('foo/bar'))
+
+        with pytest.raises(ConfigError):
+            path_field.validate(42)
+        with pytest.raises(ConfigError):
+            path_field.validate({})
+        with pytest.raises(ConfigError):
+            path_field.validate([])
+
+    def test_path_is_checked(self):
+        with mock_filesystem(['foo/bar']) as prefix:
+            prefix_path = Path(prefix)
+            file_field = PathField(is_directory=False)
+            with pytest.raises(ConfigError):
+                file_field.validate(prefix_path / 'foo')
+            file_field.validate(prefix_path / 'foo' / 'bar')
+
+            dir_field = PathField(is_directory=True)
+            dir_field.validate(prefix_path / 'foo')
+
+            with pytest.raises(ConfigError):
+                dir_field.validate(prefix_path / 'foo' / 'bar')
+
+
+class TestConfigValidator:
+    def test_compound(self):
+        class SampleValidator(ConfigValidator):
+            foo = StringField(choices=['foo'])
+            bar = NumberField()
+
+        sample_validator = SampleValidator('Sample')
+        sample_validator.validate({'foo': 'foo', 'bar': 1})
+
+        with pytest.raises(ConfigError):
+            sample_validator.validate({'foo': 'foo'})
+        with pytest.raises(ConfigError):
+            sample_validator.validate({'foo': 'bar', 'bar': 1})
+
+    def test_optional_fields(self):
+        class SampleValidatorNoOptionals(ConfigValidator):
+            foo = StringField(choices=['foo'])
+            bar = NumberField(optional=False)
+
+        sample_validator = SampleValidatorNoOptionals('Sample')
+        sample_validator.validate({'foo': 'foo', 'bar': 1})
+        with pytest.raises(ConfigError):
+            sample_validator.validate({'foo': 'bar'})
+
+        class SampleValidatorWithOptionals(ConfigValidator):
+            foo = StringField(choices=['foo'])
+            bar = NumberField(optional=True)
+
+        sample_validator = SampleValidatorWithOptionals('Sample')
+        sample_validator.validate({'foo': 'foo', 'bar': 1})
+        sample_validator.validate({'foo': 'foo'})
+
+    def test_extra_fields__warn_on_extra(self):
+        class SampleValidatorWarnOnExtra(ConfigValidator):
+            foo = StringField(choices=['foo'])
+
+        sample_validator = SampleValidatorWarnOnExtra(
+            'Sample', on_extra_argument=ConfigValidator.WARN_ON_EXTRA_ARGUMENT
+        )
+
+        with pytest.warns(UserWarning):
+            sample_validator.validate({'foo': 'foo', 'bar': 'bar'})
+
+    def test_extra_fields__error_on_extra(self):
+        class SampleValidatorErrorOnExtra(ConfigValidator):
+            foo = StringField(choices=['foo'])
+
+        sample_validator = SampleValidatorErrorOnExtra(
+            'Sample', on_extra_argument=ConfigValidator.ERROR_ON_EXTRA_ARGUMENT)
+
+        with pytest.raises(ConfigError):
+            sample_validator.validate({'foo': 'bar', 'bar': 'bar'})
+
+    def test_extra_fields__ignore_extra(self):
+        class SampleValidatorIgnoresExtra(ConfigValidator):
+            foo = StringField(choices=['foo'])
+
+        sample_validator = SampleValidatorIgnoresExtra(
+            'Sample', on_extra_argument=ConfigValidator.IGNORE_ON_EXTRA_ARGUMENT)
+
+        sample_validator.validate({'foo': 'foo', 'bar': 'bar'})
+
+    def test_custom_exception(self, mocker):
+        class SampleValidator(ConfigValidator):
+            foo = StringField(choices=['foo'])
+
+        stub = mocker.stub(name='custom_on_error')
+        sample_validator = SampleValidator('Sample', on_error=stub)
+        sample_validator.validate({})
+        stub.assert_called_once_with(ANY, 'Sample', ANY)
+
+    def test_custom_validator(self, mocker):
+        class SampleValidator(ConfigValidator):
+            foo = StringField(choices=['foo'])
+
+        stub = mocker.stub(name='custom_validator')
+        sample_validator = SampleValidator('Sample', additional_validator=stub)
+        entry = {'foo': 'foo'}
+        sample_validator.validate(entry)
+        stub.assert_called_once_with(entry, 'Sample')
+
+    def test_nested(self):
+        class InnerValidator(ConfigValidator):
+            foo = StringField(choices=['foo'])
+
+        class OuterValidator(ConfigValidator):
+            bar = ListField(InnerValidator('Inner'))
+
+        outer_validator = OuterValidator('Outer', on_extra_argument=ConfigValidator.ERROR_ON_EXTRA_ARGUMENT)
+
+        outer_validator.validate({'bar': [{'foo': 'foo'}, {'foo': 'foo'}]})
+
+    def test_inheritance(self):
+        class ParentValidator(ConfigValidator):
+            foo = StringField(choices=['foo'])
+
+        class DerivedValidator(ParentValidator):
+            bar = StringField(choices=['bar'])
+
+        derived_validator = DerivedValidator('Derived', on_extra_argument=ConfigValidator.ERROR_ON_EXTRA_ARGUMENT)
+        derived_validator.validate({'foo': 'foo', 'bar': 'bar'})
diff --git a/tools/accuracy_checker/tests/test_dataset.py b/tools/accuracy_checker/tests/test_dataset.py
new file mode 100644
index 000000000..954ded430
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_dataset.py
@@ -0,0 +1,191 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import copy
+from pathlib import Path
+import pytest
+from .common import make_representation
+from accuracy_checker.config import ConfigError
+
+from accuracy_checker.dataset import Dataset
+
+
+def copy_dataset_config(config):
+    new_config = copy.deepcopy(config)
+
+    return new_config
+
+class MockPreprocessor:
+    @staticmethod
+    def process(images):
+        return images
+
+
+class TestDataset:
+    dataset_config = {
+            'name': 'custom',
+            'annotation': 'custom',
+            'data_source': 'custom',
+            'metrics': [{'type': 'map'}]
+        }
+
+    def test_missed_name_raises_config_error_exception(self):
+        local_dataset = copy_dataset_config(self.dataset_config)
+        local_dataset.pop('name')
+
+        with pytest.raises(ConfigError):
+            Dataset(local_dataset, MockPreprocessor())
+
+    def test_setting_custom_dataset_with_missed_annotation_raises_config_error_exception(self):
+        local_dataset = copy_dataset_config(self.dataset_config)
+        local_dataset.pop('annotation')
+        with pytest.raises(ConfigError):
+            Dataset(local_dataset, MockPreprocessor())
+
+    @pytest.mark.usefixtures('mock_path_exists')
+    def test_setting_custom_dataset_with_missed_data_source_raises_config_error_exception(self):
+        local_dataset = copy_dataset_config(self.dataset_config)
+        local_dataset.pop('data_source')
+        with pytest.raises(ConfigError):
+            Dataset(local_dataset, MockPreprocessor())
+
+
+@pytest.mark.usefixtures('mock_path_exists')
+class TestAnnotationConversion:
+    dataset_config = {
+        'name': 'custom',
+        'data_source': 'custom',
+        'metrics': [{'type': 'map'}]
+    }
+
+    def test_annotation_conversion_unknown_converter_raise_config_error(self):
+        addition_options = {'annotation_conversion': {'converter': 'unknown'}}
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        with pytest.raises(ValueError):
+            Dataset(config, MockPreprocessor())
+
+    def test_annotation_conversion_converter_without_required_options_raise_config_error(self):
+        addition_options = {'annotation_conversion': {'converter': 'wider'}}
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        with pytest.raises(ConfigError):
+            Dataset(config, MockPreprocessor())
+
+    def test_annotation_conversion_raise_config_error_on_extra_args(self):
+        addition_options = {'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file', 'something_extra': 'extra'}}
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        with pytest.raises(ConfigError):
+            Dataset(config, MockPreprocessor())
+
+    def test_sucessful_annotation_conversion(self, mocker):
+        addition_options = {'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file'}}
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        annotation_converter_mock = mocker.patch(
+            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
+            return_value=(make_representation("0 0 0 5 5", True), None)
+        )
+        Dataset(config, MockPreprocessor())
+        annotation_converter_mock.assert_called_once_with()
+
+    def test_annotation_conversion_with_store_annotation(self, mocker):
+        addition_options = {
+            'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file'},
+            'annotation': 'custom'
+        }
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        converted_annotation = make_representation('0 0 0 5 5', True)
+        mocker.patch(
+            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
+            return_value=(converted_annotation, None)
+        )
+        annotation_saver_mock = mocker.patch(
+            'accuracy_checker.dataset.save_annotation'
+        )
+        Dataset(config, MockPreprocessor())
+
+        annotation_saver_mock.assert_called_once_with(converted_annotation, None, Path('custom'), None)
+
+    def test_annotation_conversion_subset_size(self, mocker):
+        addition_options = {
+            'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file'},
+            'subsample_size': 1
+        }
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
+        mocker.patch(
+            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
+            return_value=(converted_annotation, None)
+        )
+        dataset = Dataset(config, MockPreprocessor())
+        assert dataset.annotation == [converted_annotation[1]]
+
+    def test_annotation_conversion_subset_ratio(self, mocker):
+        addition_options = {
+            'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file'},
+            'subsample_size': '50%'
+        }
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
+        mocker.patch(
+            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
+            return_value=(converted_annotation, None)
+        )
+        subset_maker_mock = mocker.patch(
+            'accuracy_checker.dataset.make_subset'
+        )
+        Dataset(config, MockPreprocessor())
+        subset_maker_mock.assert_called_once_with(converted_annotation, 1, 666)
+
+    def test_annotation_conversion_subset_with_seed(self, mocker):
+        addition_options = {
+            'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file'},
+            'subsample_size': 1,
+            'subsample_seed': 1
+        }
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
+        mocker.patch(
+            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
+            return_value=(converted_annotation, None)
+        )
+        dataset = Dataset(config, MockPreprocessor())
+        annotation = dataset.annotation
+        assert annotation == [converted_annotation[0]]
+
+    def test_annotation_conversion_save_subset(self, mocker):
+        addition_options = {
+            'annotation_conversion': {'converter': 'wider', 'annotation_file': 'file'},
+            'annotation': 'custom',
+            'subsample_size': 1,
+        }
+        config = copy_dataset_config(self.dataset_config)
+        config.update(addition_options)
+        converted_annotation = make_representation(['0 0 0 5 5', '0 1 1 10 10'], True)
+        mocker.patch(
+            'accuracy_checker.annotation_converters.WiderFormatConverter.convert',
+            return_value=(converted_annotation, None)
+        )
+        annotation_saver_mock = mocker.patch(
+            'accuracy_checker.dataset.save_annotation'
+        )
+        Dataset(config, MockPreprocessor())
+        annotation_saver_mock.assert_called_once_with([converted_annotation[1]], None, Path('custom'), None)
diff --git a/tools/accuracy_checker/tests/test_dependency.py b/tools/accuracy_checker/tests/test_dependency.py
new file mode 100644
index 000000000..0f98842aa
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_dependency.py
@@ -0,0 +1,89 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from accuracy_checker.dependency import ClassProvider, get_opts
+
+
+def test_get_opts_positional_and_kwargs():
+    opts = {'o': ((1,), {'a': 1})}
+    args, kwargs = get_opts(opts['o'])
+
+    assert args == (1,)
+    assert kwargs == {'a': 1}
+
+
+def test_get_opts_kwargs_only():
+    opts = {'o': {'a': 1}}
+    args, kwargs = get_opts(opts['o'])
+
+    assert args == ()
+    assert kwargs == {'a': 1}
+
+
+def test_get_opts_positional_only():
+    opts = {'o': (1, 2, 3)}
+    args, kwargs = get_opts(opts['o'])
+
+    assert args == (1, 2, 3)
+    assert kwargs == {}
+
+
+def test_class_provider():
+    class BaseService(ClassProvider):
+        __provider_type__ = 'Service'
+
+    class ServiceA(BaseService):
+        __provider__ = 'service_a'
+
+    class ServiceB(BaseService):
+        __provider__ = 'service_b'
+
+    assert issubclass(ServiceA, BaseService)
+    assert issubclass(ServiceB, BaseService)
+
+    assert 'service_a' in BaseService.providers
+    assert 'service_b' in BaseService.providers
+
+
+def test_provide():
+    class BaseService(ClassProvider):
+        __provider_type__ = 'service'
+
+        def __init__(self):
+            pass
+
+    class ServiceA(BaseService):
+        __provider__ = 'service_a'
+
+    provided = BaseService.provide('service_a')
+
+    assert isinstance(provided, ServiceA)
+
+
+def test_provide_with_args():
+    class BaseService(ClassProvider):
+        __provider_type__ = 'service'
+
+        def __init__(self, bar):
+            self.bar = bar
+
+    class ServiceA(BaseService):
+        __provider__ = 'service_a'
+
+    provided = BaseService.provide('service_a', bar=42)
+
+    assert isinstance(provided, ServiceA)
+    assert provided.bar == 42
diff --git a/tools/accuracy_checker/tests/test_detection_metrics.py b/tools/accuracy_checker/tests/test_detection_metrics.py
new file mode 100644
index 000000000..def135499
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_detection_metrics.py
@@ -0,0 +1,459 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pytest
+import numpy as np
+from accuracy_checker.metrics import DetectionMAP
+from accuracy_checker.metrics.detection import Recall, bbox_match
+from accuracy_checker.metrics.overlap import IOU, IOA
+from tests.common import (make_representation, single_class_dataset, multi_class_dataset,
+                          multi_class_dataset_without_background)
+
+
+def _test_metric_wrapper(metric_cls, dataset, **kwargs):
+    provider = metric_cls.__provider__
+    config = {'type': provider, 'name': provider}
+    config.update(**kwargs)
+    return metric_cls(config, dataset, provider)
+
+
+class TestBoxMatch:
+    def test_single(self):
+        gt = "0 0 0 5 5"
+        pred = "0 0 0 5 5"
+
+        gt = make_representation(gt, is_ground_truth=True)
+        pred = make_representation(pred, score=1)
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp[0] == 1
+        assert fp[0] == 0
+
+    def test_single_with_ignored_tp(self):
+        gt = "0 0 0 5 5"
+        pred = "0 0 0 5 5"
+
+        gt = make_representation(gt, is_ground_truth=True)
+        pred = make_representation(pred, score=1)
+        pred[0].metadata['difficult_boxes'] = [0]
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp[0] == 0
+        assert fp[0] == 0
+
+    def test_single_with_use_filtered_tp(self):
+        gt = "0 0 0 5 5"
+        pred = "0 0 0 5 5"
+
+        gt = make_representation(gt, is_ground_truth=True)
+        pred = make_representation(pred, score=1)
+        pred[0].metadata['difficult_boxes'] = [0]
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator, use_filtered_tp=True)
+        assert tp[0] == 1
+        assert fp[0] == 0
+
+    def test_single_non_overlap(self):
+        gt = make_representation("0 5 5 10 10", is_ground_truth=True)
+        pred = make_representation("0 0 0 5 5", score=1)
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp[0] == 0
+        assert fp[0] == 1
+
+    def test_single_non_overlap_ignored(self):
+        gt = make_representation("0 5 5 10 10", is_ground_truth=True)
+        pred = make_representation("0 0 0 5 5", score=1)
+        pred[0].metadata['difficult_boxes'] = [0]
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp[0] == 0
+        assert fp[0] == 0
+
+    def test_multiple(self):
+        gt = make_representation("0 0 0 5 5; 0 7 7 8 8", is_ground_truth=True)
+        pred = make_representation("0 0 0 5 5; 0 7 7 8 8", score=1)
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp[0] == 1
+        assert tp[1] == 1
+        assert fp[0] == 0
+        assert fp[0] == 0
+
+    def test_multiple_2(self):
+        gt = make_representation("0 0 0 5 5; 0 9 9 10 10", is_ground_truth=True)
+        pred = make_representation("1 0 0 0 5 5; 0.8 0 7 7 8 8")
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp[0] == 1
+        assert tp[1] == 0
+        assert fp[0] == 0
+        assert fp[1] == 1
+
+    def test_multi_label(self):
+        gt = make_representation("1 0 0 5 5; 0 9 9 10 10", is_ground_truth=True)
+        pred = make_representation("1 1 0 0 5 5; 0.8 0 7 7 8 8")
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 1, overlap_evaluator)
+        assert tp.shape[0] == 1
+        assert tp[0] == 1
+        assert fp[0] == 0
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp.shape[0] == 1
+        assert tp[0] == 0
+        assert fp[0] == 1
+
+    def test_multi_image(self):
+        gt = make_representation(["0 0 0 5 5", "0 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5", "0 0 0 5 5"], score=1)
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp[0] == 1
+        assert tp[1] == 1
+        assert fp[0] == 0
+        assert fp[1] == 0
+
+    def test_false_negative(self):
+        gt = make_representation("0 0 0 5 5; 0 1 1 6 6", is_ground_truth=True)
+        pred = make_representation("0 0 0 5 5", score=1)
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, ngt = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp[0] == 1
+        assert tp.shape[0] == 1
+        assert ngt == 2
+
+    def test_multiple_detections(self):
+        gt = make_representation("0 0 0 5 5", is_ground_truth=True)
+        pred = make_representation("1 0 0 0 5 5; 0.9 0 0 0 5 5")
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp[0] == 1
+        assert tp[1] == 0
+
+    def test_no_annotations(self):
+        gt = "1 0 0 5 5"
+        pred = "0 0 0 5 5"
+
+        gt = make_representation(gt, is_ground_truth=True)
+        pred = make_representation(pred, score=1)
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, _ = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert tp[0] == 0
+        assert fp[0] == 1
+
+    def test_no_predictions(self):
+        gt = "0 0 0 5 5"
+        pred = "1 0 0 5 5"
+
+        gt = make_representation(gt, is_ground_truth=True)
+        pred = make_representation(pred, score=1)
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert n == 1
+        assert len(tp) == 0
+        assert len(fp) == 0
+
+    def test_iou_empty_prediction_box(self):
+        gt = "0 0 0 5 5"
+        pred = "0 0 0 0 0"
+
+        gt = make_representation(gt, is_ground_truth=True)
+        pred = make_representation(pred, score=1)
+        overlap_evaluator = IOU({})
+
+        with pytest.warns(None) as warnings:
+            tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator)
+            assert len(warnings) == 0
+            assert n == 1
+            assert tp[0] == 0
+            assert fp[0] == 1
+
+    def test_ioa_empty_prediction_box(self):
+        gt = "0 0 0 5 5"
+        pred = "0 0 0 0 0"
+
+        gt = make_representation(gt, is_ground_truth=True)
+        pred = make_representation(pred, score=1)
+        overlap_evaluator = IOA({})
+
+        with pytest.warns(None) as warnings:
+            tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator)
+            assert len(warnings) == 0
+            assert n == 1
+            assert tp[0] == 0
+            assert fp[0] == 1
+
+    def test_iou_zero_union(self):
+        gt = "0 0 0 0 0"
+        pred = "0 0 0 0 0"
+
+        gt = make_representation(gt, is_ground_truth=True)
+        pred = make_representation(pred, score=1)
+        overlap_evaluator = IOA({})
+
+        with pytest.warns(None) as warnings:
+            tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator)
+            assert len(warnings) == 0
+            assert n == 1
+            assert tp[0] == 0
+            assert fp[0] == 1
+
+    def test_single_difficult(self):
+        gt = "0 0 0 5 5"
+        pred = "0 0 0 5 5"
+
+        gt = make_representation(gt, is_ground_truth=True)
+        pred = make_representation(pred, score=1)
+        gt[0].metadata['difficult_boxes'] = [0]
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator, ignore_difficult=True)
+        assert n == 0
+        assert tp[0] == 0
+        assert fp[0] == 0
+
+    def test_single_with_not_ignore_difficult(self):
+        gt = "0 0 0 5 5"
+        pred = "0 0 0 5 5"
+
+        gt = make_representation(gt, is_ground_truth=True)
+        pred = make_representation(pred, score=1)
+        gt[0].metadata['difficult_boxes'] = [0]
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator, ignore_difficult=False)
+        assert n == 1
+        assert tp[0] == 1
+        assert fp[0] == 0
+
+    def test_single_difficult_non_overlap(self):
+        gt = make_representation("0 5 5 10 10", is_ground_truth=True)
+        gt[0].metadata['difficult_boxes'] = [0]
+        pred = make_representation("0 0 0 5 5", score=1)
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator)
+        assert n == 0
+        assert tp[0] == 0
+        assert fp[0] == 1
+
+    def test_single_difficult_non_overlap_not_ignore_difficult(self):
+        gt = make_representation("0 5 5 10 10", is_ground_truth=True)
+        gt[0].metadata['difficult_boxes'] = [0]
+        pred = make_representation("0 0 0 5 5", score=1)
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator, ignore_difficult=False)
+        assert n == 1
+        assert tp[0] == 0
+        assert fp[0] == 1
+
+    def test_multiple_detections_with_ignore_difficult(self):
+        gt = make_representation("0 0 0 5 5", is_ground_truth=True)
+        pred = make_representation("1 0 0 0 5 5; 0.9 0 0 0 5 5")
+        gt[0].metadata['difficult_boxes'] = [0]
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator, ignore_difficult=True)
+        assert n == 0
+        assert tp[0] == 0
+        assert tp[1] == 0
+        assert fp[0] == 0
+        assert fp[1] == 0
+
+    def test_multiple_detections_with_not_ignore_difficult(self):
+        gt = make_representation("0 0 0 5 5", is_ground_truth=True)
+        pred = make_representation("1 0 0 0 5 5; 0.9 0 0 0 5 5")
+        gt[0].metadata['difficult_boxes'] = [0]
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, n = bbox_match(gt, pred, 0, overlap_evaluator, ignore_difficult=False)
+        assert n == 1
+        assert tp[0] == 1
+        assert tp[1] == 0
+        assert fp[0] == 0
+        assert fp[1] == 1
+
+    def test_multiple_detections_with_ignore_difficult_and_not_allow_multiple_matches_per_ignored(self):
+        gt = make_representation("0 0 0 5 5", is_ground_truth=True)
+        pred = make_representation("1 0 0 0 5 5; 0.9 0 0 0 5 5")
+        gt[0].metadata['difficult_boxes'] = [0]
+        overlap_evaluator = IOU({})
+
+        tp, fp, _, n = bbox_match(
+            gt, pred, 0, overlap_evaluator,
+            ignore_difficult=True, allow_multiple_matches_per_ignored=False
+        )
+
+        assert n == 0
+        assert tp[0] == 0
+        assert tp[1] == 0
+        assert fp[0] == 0
+        assert fp[1] == 1
+
+
+class TestRecall:
+    def test_one_object(self):
+        gt = make_representation(["0 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5"], score=1)
+        metric = _test_metric_wrapper(Recall, single_class_dataset())
+        assert 1 == metric(gt, pred)[0]
+        assert metric.meta.get('names') == ['dog']
+
+    def test_two_objects(self):
+        gt = make_representation(["0 0 0 5 5; 0 10 10 20 20"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5; 0 10 10 20 20"], score=1)
+        assert 1 == _test_metric_wrapper(Recall, single_class_dataset())(gt, pred)[0]
+
+    def test_false_positive(self):
+        gt2 = make_representation(["0 10 10 20 20"], is_ground_truth=True)
+        pred2 = make_representation(["0 0 0 5 5"], score=1)
+        metric = _test_metric_wrapper(Recall, single_class_dataset())
+        assert 0 == metric(gt2, pred2)[0]
+        assert metric.meta.get('names') == ['dog']
+
+        gt1 = make_representation(["0 0 0 5 5"], is_ground_truth=True)
+        pred1 = make_representation(["0 0 0 5 5; 0 10 10 20 20"], score=1)
+        assert 1 == metric(gt1, pred1)[0]
+        assert metric.meta.get('names') == ['dog']
+
+    def test_false_negative(self):
+        gt = make_representation(["0 10 10 20 20; 0 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5"], score=1)
+        metric = _test_metric_wrapper(Recall, single_class_dataset())
+        assert 0.5 == metric(gt, pred)[0]
+        assert metric.meta.get('names') == ['dog']
+
+    def test_duplicate_detections(self):
+        gt = make_representation(["0 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5; 0 0 0 5 5"], score=1)
+
+        metric = _test_metric_wrapper(Recall, single_class_dataset())
+        assert 1 == metric(gt, pred)[0]
+        assert metric.meta.get('names') == ['dog']
+
+    def test_no_warnings_in_recall_calculation(self):
+        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], score=1)
+
+        with pytest.warns(None) as warnings:
+            _test_metric_wrapper(Recall, multi_class_dataset())(gt, pred)
+        assert len(warnings) == 0
+
+    def test_on_dataset_without_background(self):
+        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], score=1)
+
+        with pytest.warns(None) as warnings:
+            _test_metric_wrapper(Recall, multi_class_dataset_without_background())(gt, pred)
+        assert len(warnings) == 0
+
+    def test_not_gt_boxes_for_matching(self):
+        gt = make_representation(["0 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["1 0 0 5 5"], score=1)
+
+        metric = _test_metric_wrapper(Recall, multi_class_dataset_without_background())
+        assert 0 == metric(gt, pred)[0]
+        assert metric.meta.get('names') == ['cat']
+
+
+class TestMAP:
+    def test_selects_all_detections(self):
+        gt = make_representation(["0 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5; 0 0 0 5 5"], score=1)
+
+        metric = _test_metric_wrapper(DetectionMAP, single_class_dataset())
+        metric(gt, pred)
+
+        assert not metric.distinct_conf
+        assert metric.overlap_threshold == 0.5
+        assert metric.ignore_difficult
+        assert metric.meta.get('names') == ['dog']
+
+    def test_no_warnings_in_map_calculation(self):
+        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], score=1)
+
+        with pytest.warns(None) as warnings:
+            _test_metric_wrapper(DetectionMAP, multi_class_dataset())(gt, pred)
+        assert len(warnings) == 0
+
+    def test_perfect_detection(self):
+        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], score=1)
+
+        metric = _test_metric_wrapper(DetectionMAP, multi_class_dataset())
+        assert metric(gt, pred) == [1.0, 1.0]
+        assert metric.meta.get('names') == ['dog', 'cat']
+
+    def test_one_false_alarm(self):
+        gt = make_representation(["0 0 0 5 5", "1 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["1 10 10 20 20; 0 0 0 5 5", "1 0 0 5 5"], score=1)
+        metric = _test_metric_wrapper(DetectionMAP, multi_class_dataset())
+        values = metric(gt, pred)
+        assert values == [1.0, 0.5]
+        map_ = np.mean(values)
+        assert 0.75 == map_
+        assert metric.meta.get('names') == ['dog', 'cat']
+
+    def test_zero_detection(self):
+        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20"], is_ground_truth=True)
+        pred = make_representation(["0 30 30 40 40"], score=1)
+
+        metric = _test_metric_wrapper(DetectionMAP, multi_class_dataset())
+        assert metric(gt, pred) == [0.0]
+        assert metric.meta.get('names') == ['dog']
+
+    def test_no_detections_warn_user_warning(self):
+        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20"], is_ground_truth=True)
+        pred = make_representation("", score=1)
+        with pytest.warns(UserWarning) as warnings:
+            map_ = _test_metric_wrapper(DetectionMAP, multi_class_dataset())(gt, pred)[0]
+            assert len(warnings) == 1
+
+            assert map_ == 0
+
+    def test_detection_on_dataset_without_background(self):
+        gt = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["0 0 0 5 5; 1 10 10 20 20", "1 0 0 5 5"], score=1)
+
+        with pytest.warns(None) as warnings:
+            map_ = _test_metric_wrapper(DetectionMAP, multi_class_dataset_without_background())(gt, pred)
+            mean = np.mean(map_)
+            assert 1.0 == mean
+        assert len(warnings) == 0
+
+    def test_not_gt_boxes_for_box_matching(self):
+        gt = make_representation(["0 0 0 5 5"], is_ground_truth=True)
+        pred = make_representation(["1 0 0 5 5"], score=1)
+
+        metric = _test_metric_wrapper(Recall, multi_class_dataset_without_background())
+        assert 0 == metric(gt, pred)[0]
+        assert metric.meta.get('names') == ['cat']
diff --git a/tools/accuracy_checker/tests/test_dlsdk_launcher.py b/tools/accuracy_checker/tests/test_dlsdk_launcher.py
new file mode 100644
index 000000000..599f77a3f
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_dlsdk_launcher.py
@@ -0,0 +1,980 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import subprocess
+
+import pytest
+
+pytest.importorskip('accuracy_checker.launcher.dlsdk_launcher')
+import os
+import cv2
+import numpy as np
+
+from pathlib import Path
+from unittest.mock import PropertyMock
+from accuracy_checker.config import ConfigError
+from accuracy_checker.launcher import DLSDKLauncher
+from accuracy_checker.launcher.dlsdk_launcher import DLSDKLauncherConfig
+from accuracy_checker.launcher.launcher import create_launcher
+from tests.common import update_dict
+from accuracy_checker.dataset import DataRepresentation
+from accuracy_checker.utils import contains_all
+
+
+@pytest.fixture()
+def mock_inference_engine(mocker):
+    try:
+        mocker.patch('openvino.inference_engine.IEPlugin')
+        mocker.patch('openvino.inference_engine.IENetwork')
+    except ImportError:
+        mocker.patch('inference_engine.IEPlugin')
+        mocker.patch('inference_engine.IENetwork')
+
+
+@pytest.fixture()
+def mock_inputs(mocker):
+    mocker.patch(
+        'accuracy_checker.launcher.input_feeder.InputFeeder._parse_inputs_config', return_value=({}, ['data'], None)
+    )
+
+
+def get_dlsdk_test_model(models_dir, config_update=None):
+    config = {
+        'framework': 'dlsdk',
+        'weights': str(models_dir / 'SampLeNet.bin'),
+        'model': str(models_dir / 'SampLeNet.xml'),
+        'device': 'CPU',
+        'adapter': 'classification',
+        '_models_prefix': str(models_dir)
+    }
+    if config_update:
+        config.update(config_update)
+
+    return create_launcher(config)
+
+
+def get_image(image_path, input_shape):
+    _, h, w = input_shape
+    img_raw = cv2.imread(str(image_path))
+
+    return DataRepresentation(cv2.resize(img_raw, (w, h)))
+
+
+class TestDLSDKLauncherInfer:
+    def test_infer(self, data_dir, models_dir):
+        dlsdk_test_model = get_dlsdk_test_model(models_dir)
+        result = dlsdk_test_model.predict(['1.jpg'], [get_image(data_dir / '1.jpg', dlsdk_test_model.inputs['data'])])
+
+        assert dlsdk_test_model.adapter.output_blob == 'fc3'
+        assert result[0].label == 6
+
+    def test_launcher_creates(self, models_dir):
+        assert get_dlsdk_test_model(models_dir).inputs['data'] == [3, 32, 32]
+
+    def test_infer_with_additional_outputs(self, data_dir, models_dir):
+        dlsdk_test_model = get_dlsdk_test_model(models_dir, {'outputs': ['fc1', 'fc2']})
+        result = dlsdk_test_model.predict(['1.jpg'], [get_image(data_dir / '1.jpg', dlsdk_test_model.inputs['data'])])
+        outputs = list(dlsdk_test_model.network.outputs.keys())
+        adapter_output_blob = dlsdk_test_model.adapter.output_blob
+
+        assert contains_all(outputs, ['fc1', 'fc2', 'fc3'])
+        assert adapter_output_blob == 'fc3'
+        assert result[0].label == 6
+
+    def test_dlsdk_launcher_provide_input_shape_to_adapter(self, mocker, models_dir):
+        raw_results = {}
+
+        def raw_results_callback(outputs):
+            raw_results.update(outputs)
+
+        launcher = get_dlsdk_test_model(models_dir)
+
+        adapter_mock = mocker.patch('accuracy_checker.adapters.ClassificationAdapter.process')
+        launcher.predict(['1.png'], [DataRepresentation(np.zeros((32, 32, 3)))], output_callback=raw_results_callback)
+        adapter_mock.assert_called_once_with([raw_results], ['1.png'], [{'input_shape': {'data': [3, 32, 32]}, 'image_size': (32, 32, 3)}])
+
+    def test_dlsd_launcher_set_batch_size(self, models_dir):
+        dlsdk_test_model = get_dlsdk_test_model(models_dir, {'batch': 2})
+        assert dlsdk_test_model.batch == 2
+
+
+@pytest.mark.usefixtures('mock_path_exists')
+class TestDLSDKLauncherAffinity:
+    def test_dlsdk_launcher_valid_affinity_map(self, mocker, models_dir):
+        affinity_map = {'conv1' : 'GPU'}
+
+        mocker.patch(
+            'accuracy_checker.launcher.dlsdk_launcher.read_yaml', return_value=affinity_map
+        )
+
+        dlsdk_test_model = get_dlsdk_test_model(models_dir, {'device' : 'HETERO:CPU,GPU', 'affinity_map' : './affinity_map.yml'})
+        layers = dlsdk_test_model.network.layers
+        for key, value in affinity_map.items():
+            assert layers[key].affinity == value
+
+    def test_dlsdk_launcher_affinity_map_invalid_device(self, mocker, models_dir):
+        affinity_map = {'conv1' : 'GPU'}
+
+        mocker.patch(
+            'accuracy_checker.launcher.dlsdk_launcher.read_yaml', return_value=affinity_map
+        )
+
+        with pytest.raises(ConfigError):
+            get_dlsdk_test_model(models_dir, {'device' : 'HETERO:CPU,CPU', 'affinity_map' : './affinity_map.yml'})
+
+    def test_dlsdk_launcher_affinity_map_invalid_layer(self, mocker, models_dir):
+        affinity_map = {'none-existing-layer' : 'CPU'}
+
+        mocker.patch(
+            'accuracy_checker.launcher.dlsdk_launcher.read_yaml', return_value=affinity_map
+        )
+
+        with pytest.raises(ConfigError):
+            get_dlsdk_test_model(models_dir, {'device' : 'HETERO:CPU,CPU', 'affinity_map' : './affinity_map.yml'})
+
+
+@pytest.mark.usefixtures('mock_path_exists', 'mock_inference_engine', 'mock_inputs')
+class TestDLSDKLauncher:
+    def test_program_bitsream_when_device_is_fpga(self, mocker):
+        subprocess_mock = mocker.patch('subprocess.run')
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'fpga',
+            'bitstream': Path('custom_bitstream'),
+            'adapter': 'classification',
+            '_models_prefix': 'prefix',
+            '_aocl': Path('aocl')
+        }
+        launcher = create_launcher(config, {'label_map': {}})
+        subprocess_mock.assert_called_once_with(['aocl', 'program', 'acl0', 'custom_bitstream'])
+        launcher.release()
+
+    def test_program_bitsream_when_fpga_in_hetero_device(self, mocker):
+        subprocess_mock = mocker.patch('subprocess.run')
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'hetero:fpga,cpu',
+            'bitstream': Path('custom_bitstream'),
+            'adapter': 'classification',
+            '_models_prefix': 'prefix',
+            '_aocl': Path('aocl')
+        }
+        launcher = create_launcher(config, {'label_map': {}})
+        subprocess_mock.assert_called_once_with(['aocl', 'program', 'acl0', 'custom_bitstream'])
+        launcher.release()
+
+    def test_does_not_program_bitsream_when_device_is_not_fpga(self, mocker):
+        subprocess_mock = mocker.patch('subprocess.run')
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'cpu',
+            'bitstream': Path('custom_bitstream'),
+            'adapter': 'classification',
+            '_models_prefix': 'prefix',
+            '_aocl': Path('aocl')
+        }
+        create_launcher(config)
+        subprocess_mock.assert_not_called()
+
+    def test_does_not_program_bitsream_when_hetero_without_fpga(self, mocker):
+        subprocess_mock = mocker.patch('subprocess.run')
+
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'hetero:cpu,cpu',
+            'bitstream': Path('custom_bitstream'),
+            'adapter': 'classification',
+            '_models_prefix': 'prefix',
+            '_aocl': Path('aocl')
+        }
+        create_launcher(config)
+        subprocess_mock.assert_not_called()
+
+    def test_does_not_program_bitstream_if_compiler_mode_3_in_env_when_fpga_in_hetero_device(self, mocker):
+        subprocess_mock = mocker.patch('subprocess.run')
+        mocker.patch('os.environ.get', return_value='3')
+
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'hetero:fpga,cpu',
+            'bitstream': Path('custom_bitstream'),
+            'adapter': 'classification',
+            '_models_prefix': 'prefix',
+            '_aocl': Path('aocl')
+        }
+        create_launcher(config)
+
+        subprocess_mock.assert_not_called()
+
+    def test_does_not_program_bitstream_if_compiler_mode_3_in_env_when_fpga_in_device(self, mocker):
+        subprocess_mock = mocker.patch('subprocess.run')
+        mocker.patch('os.environ.get', return_value='3')
+
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'fpga',
+            'bitstream': Path('custom_bitstream'),
+            'adapter': 'classification',
+            '_models_prefix': 'prefix',
+            '_aocl': Path('aocl')
+        }
+        create_launcher(config)
+
+        subprocess_mock.assert_not_called()
+
+    def test_sets_dla_aocx_when_device_is_fpga(self, mocker):
+        mocker.patch('os.environ')
+
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'fpga',
+            'bitstream': Path('custom_bitstream'),
+            'adapter': 'classification',
+            '_models_prefix': 'prefix'
+        }
+        create_launcher(config, {'label_map': {}})
+
+        os.environ.__setitem__.assert_called_once_with('DLA_AOCX', 'custom_bitstream')
+
+    def test_sets_dla_aocx_when_fpga_in_hetero_device(self, mocker):
+        mocker.patch('os.environ')
+
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'hetero:fpga,cpu',
+            'bitstream': Path('custom_bitstream'),
+            'adapter': 'classification',
+            '_models_prefix': 'prefix'
+        }
+        create_launcher(config, {'label_map': {}})
+        os.environ.__setitem__.assert_called_once_with('DLA_AOCX', 'custom_bitstream')
+
+    def test_does_not_set_dla_aocx_when_device_is_not_fpga(self, mocker):
+        mocker.patch('os.environ')
+
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'cpu',
+            'bitstream': 'custom_bitstream',
+            'adapter': 'classification',
+            '_models_prefix': 'prefix'
+        }
+        create_launcher(config)
+
+        os.environ.__setitem__.assert_not_called()
+
+    def test_does_not_set_dla_aocx_when_hetero_without_fpga(self, mocker):
+        mocker.patch('os.environ')
+
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'hetero:cpu,cpu',
+            'bitstream': 'custom_bitstream',
+            'adapter': 'classification',
+            '_models_prefix': 'prefix'
+        }
+        create_launcher(config)
+
+        os.environ.__setitem__.assert_not_called()
+
+    def test_does_not_set_dla_aocx_if_compiler_mode_3_in_env_when_fpga_in_hetero_device(self, mocker):
+        mocker.patch('os.environ')
+        mocker.patch('os.environ.get', return_value='3')
+
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'hetero:fpga,cpu',
+            'bitstream': 'custom_bitstream',
+            'adapter': 'classification',
+            '_models_prefix': 'prefix'
+        }
+        create_launcher(config)
+
+        os.environ.__setitem__.assert_not_called()
+
+    def test_does_not_set_dla_aocx_if_compiler_mode_3_in_env_when_fpga_in_device(self, mocker):
+        mocker.patch('os.environ')
+        mocker.patch('os.environ.get', return_value='3')
+
+        config = {
+            'framework': 'dlsdk',
+            'weights': 'custom_weights',
+            'model': 'custom_model',
+            'device': 'fpga',
+            'bitstream': 'custom_bitstream',
+            'adapter': 'classification',
+            '_models_prefix': 'prefix'
+        }
+        create_launcher(config)
+
+        os.environ.__setitem__.assert_not_called()
+
+    def test_model_converted_from_caffe(self, mocker):
+        mock = mocker.patch(
+            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
+            return_value=('converted_model', 'converted_weights')
+        )
+
+        config = {
+            'framework': 'dlsdk',
+            'caffe_model': '/path/to/source_models/custom_model',
+            'caffe_weights': '/path/to/source_models/custom_weights',
+            "device": 'cpu',
+            'bitstream': Path('custom_bitstream'),
+            '_models_prefix': '/path/to/source_models',
+            'adapter': 'classification'
+        }
+        DLSDKLauncher(config, dummy_adapter)
+
+        mock.assert_called_once_with(
+            'custom_model', '/path/to/source_models/custom_model', '/path/to/source_models/custom_weights', 'caffe',
+            [], None, None, None, None
+        )
+
+    def test_model_converted_with_mo_params(self, mocker):
+        mock = mocker.patch(
+            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
+            return_value=('converted_model', 'converted_weights')
+        )
+
+        config = {
+            'framework': "dlsdk",
+            'caffe_model': '/path/to/source_models/custom_model',
+            'caffe_weights': '/path/to/source_models/custom_weights',
+            'device': 'cpu',
+            'bitstream': Path('custom_bitstream'),
+            '_models_prefix': '/path/to/source_models',
+            'mo_params': {'data_type': 'FP16'},
+            'adapter': 'classification'
+        }
+        DLSDKLauncher(config, dummy_adapter)
+
+        mock.assert_called_once_with(
+            'custom_model', '/path/to/source_models/custom_model', '/path/to/source_models/custom_weights', 'caffe',
+            [], {'data_type': 'FP16'}, None, None, None
+        )
+
+    def test_model_converted_with_mo_flags(self, mocker):
+        mock = mocker.patch(
+            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
+            return_value=('converted_model', 'converted_weights')
+        )
+
+        config = {
+            'framework': 'dlsdk',
+            'caffe_model': '/path/to/source_models/custom_model',
+            'caffe_weights': '/path/to/source_models/custom_weights',
+            'device': 'cpu',
+            'bitstream': Path('custom_bitstream'),
+            '_models_prefix': '/path/to/source_models',
+            'mo_flags': ['reverse_input_channels'],
+            'adapter': 'classification'
+        }
+
+        DLSDKLauncher(config, dummy_adapter)
+
+        mock.assert_called_once_with(
+            'custom_model', '/path/to/source_models/custom_model', '/path/to/source_models/custom_weights', 'caffe',
+            [], None, ['reverse_input_channels'], None, None
+        )
+
+    def test_model_converted_to_output_dir_in_mo_params(self, mocker):
+        config = {
+            'framework': 'dlsdk',
+            'tf_model': '/path/to/source_models/custom_model',
+            'device': 'cpu',
+            '_models_prefix': '/path/to',
+            'adapter': 'classification',
+            'mo_params': {'output_dir': '/path/to/output/models'}
+        }
+        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value='ModelOptimizer')
+        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
+        args = {
+            'input_model': '/path/to/source_models/custom_model',
+            'model_name': 'custom_model',
+            'output_dir': '/path/to/output/models',
+            'framework': 'tf'
+        }
+
+        mocker.patch(
+            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
+            return_value=subprocess.CompletedProcess(args, returncode=0)
+        )
+        DLSDKLauncher(config, dummy_adapter)
+        prepare_args_patch.assert_called_once_with('ModelOptimizer', flag_options=[], value_options=args)
+
+    def test_model_converted_from_tf(self, mocker):
+        mock = mocker.patch(
+            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
+            return_value=('converted_model', 'converted_weights')
+        )
+
+        config = {
+            'framework': 'dlsdk',
+            'tf_model': '/path/to/source_models/custom_model',
+            'device': 'cpu',
+            '_models_prefix': '/path/to/source_models',
+            'adapter': 'classification'
+        }
+        DLSDKLauncher(config, dummy_adapter)
+
+        mock.assert_called_once_with(
+            'custom_model', '/path/to/source_models/custom_model', '', 'tf', [], None, None, None, None
+        )
+
+    def test_model_converted_from_tf_with_arg_path_to_custom_tf_config(self, mocker):
+        config = {
+            'framework': 'dlsdk',
+            'tf_model': '/path/to/source_models/custom_model',
+            'device': 'cpu',
+            '_models_prefix': '/path/to',
+            'adapter': 'classification',
+            'mo_params': {'tensorflow_use_custom_operations_config': 'ssd_v2_support.json'},
+            '_tf_custom_op_config_dir': 'config/dir'
+        }
+        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
+        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
+
+        args = {
+            'input_model': '/path/to/source_models/custom_model',
+            'model_name': 'custom_model',
+            'framework': 'tf',
+            'tensorflow_use_custom_operations_config': 'config/dir/ssd_v2_support.json'
+        }
+
+        mocker.patch(
+            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
+            return_value=subprocess.CompletedProcess(args, returncode=0)
+        )
+        DLSDKLauncher(config, dummy_adapter)
+        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
+
+    def test_model_converted_from_tf_with_default_path_to_custom_tf_config(self, mocker):
+        config = {
+            'framework': 'dlsdk',
+            'tf_model': '/path/to/source_models/custom_model',
+            'device': 'cpu',
+            '_models_prefix': '/path/to',
+            'adapter': 'classification',
+            'mo_params': {'tensorflow_use_custom_operations_config': 'config.json'}
+        }
+        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
+        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
+
+        args = {
+            'input_model': '/path/to/source_models/custom_model',
+            'model_name': 'custom_model',
+            'framework': 'tf',
+            'tensorflow_use_custom_operations_config': '/path/extensions/front/tf/config.json'
+        }
+
+        mocker.patch(
+            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
+            return_value=subprocess.CompletedProcess(args, returncode=0)
+        )
+        DLSDKLauncher(config, dummy_adapter)
+        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
+
+    def test_model_converted_from_tf_with_default_path_to_obj_detection_api_config(self, mocker):
+        config = {
+            'framework': 'dlsdk',
+            'tf_model': '/path/to/source_models/custom_model',
+            'device': 'cpu',
+            '_models_prefix': '/path/to',
+            'adapter': 'classification',
+            'mo_params': {'tensorflow_object_detection_api_pipeline_config': 'operations.config'},
+            '_tf_obj_detection_api_pipeline_config_path': None
+        }
+        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
+        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
+
+        args = {
+            'input_model': '/path/to/source_models/custom_model',
+            'model_name': 'custom_model',
+            'framework': 'tf',
+            'tensorflow_object_detection_api_pipeline_config': '/path/to/source_models/operations.config'
+        }
+
+        mocker.patch(
+            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
+            return_value=subprocess.CompletedProcess(args, returncode=0)
+        )
+        DLSDKLauncher(config, dummy_adapter)
+        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
+
+    def test_model_converted_from_tf_with_arg_path_to_obj_detection_api_config(self, mocker):
+        config = {
+            'framework': 'dlsdk',
+            'tf_model': '/path/to/source_models/custom_model',
+            'device': 'cpu',
+            '_models_prefix': '/path/to',
+            'adapter': 'classification',
+            'mo_params': {'tensorflow_object_detection_api_pipeline_config': 'operations.config'},
+            '_tf_custom_op_config_dir': 'config/dir',
+            '_tf_obj_detection_api_pipeline_config_path': 'od_api'
+        }
+        mocker.patch('accuracy_checker.launcher.model_conversion.find_mo', return_value=Path('/path/ModelOptimizer'))
+        prepare_args_patch = mocker.patch('accuracy_checker.launcher.model_conversion.prepare_args')
+
+        args = {
+            'input_model': '/path/to/source_models/custom_model',
+            'model_name': 'custom_model',
+            'framework': 'tf',
+            'tensorflow_object_detection_api_pipeline_config': 'od_api/operations.config'
+        }
+
+        mocker.patch(
+            'accuracy_checker.launcher.model_conversion.exec_mo_binary',
+            return_value=subprocess.CompletedProcess(args, returncode=0)
+        )
+        DLSDKLauncher(config, dummy_adapter)
+        prepare_args_patch.assert_called_once_with('/path/ModelOptimizer', flag_options=[], value_options=args)
+
+    def test_model_converted_from_mxnet(self, mocker):
+        mock = mocker.patch(
+            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
+            return_value=('converted_model', 'converted_weights')
+        )
+
+        config = {
+            'framework': 'dlsdk',
+            'mxnet_weights': '/path/to/source_models/custom_weights',
+            'device': 'cpu',
+            '_models_prefix': '/path/to/source_models',
+            'adapter': 'classification'
+        }
+        DLSDKLauncher(config, dummy_adapter)
+
+        mock.assert_called_once_with(
+            'custom_weights', '', '/path/to/source_models/custom_weights', 'mxnet', [], None, None, None, None
+        )
+
+    def test_model_converted_from_onnx(self, mocker):
+        mock = mocker.patch(
+            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
+            return_value=('converted_model', 'converted_weights')
+        )
+
+        config = {
+            'framework': 'dlsdk',
+            'onnx_model': '/path/to/source_models/custom_model',
+            'device': 'cpu',
+            '_models_prefix': '/path/to/source_models',
+            'adapter': 'classification'
+        }
+        DLSDKLauncher(config, dummy_adapter)
+
+        mock.assert_called_once_with(
+            'custom_model', '/path/to/source_models/custom_model', '', 'onnx', [], None, None, None, None
+        )
+
+    def test_model_converted_from_kaldi(self, mocker):
+        mock = mocker.patch(
+            'accuracy_checker.launcher.dlsdk_launcher.convert_model',
+            return_value=('converted_model', 'converted_weights')
+        )
+
+        config = {
+            'framework': 'dlsdk',
+            'kaldi_model': '/path/to/source_models/custom_model',
+            'device': 'cpu',
+            '_models_prefix': '/path/to/source_models',
+            'adapter': 'classification'
+        }
+        DLSDKLauncher(config, dummy_adapter)
+
+        mock.assert_called_once_with(
+            'custom_model', '/path/to/source_models/custom_model', '', 'kaldi', [], None, None, None, None
+        )
+
+    def test_raises_with_multiple_models_caffe_dlsdk(self):
+        config = {
+            'framework': 'dlsdk',
+            'caffe_model': 'caffe_model',
+            'caffe_weights': 'caffe_weights',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_tf_dlsdk(self):
+        config = {
+            'framework': 'dlsdk',
+            'tf_model': 'tf_model',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_mxnet_dlsdk(self):
+        config = {
+            'framework': 'dlsdk',
+            'mxnet_weights': 'mxnet_weights',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_onnx_dlsdk(self):
+        config = {
+            'framework': 'dlsdk',
+            'onnx_model': 'onnx_model',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_kaldi_dlsdk(self):
+        config = {
+            'framework': 'dlsdk',
+            'onnx_model': 'kaldi_model',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_mxnet_caffe(self):
+        config = {
+            'framework': 'dlsdk',
+            'mxnet_weights': 'mxnet_weights',
+            'caffe_model': 'caffe_model',
+            'caffe_weights': 'caffe_weights',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_tf_caffe(self):
+
+        config = {
+            'framework': 'dlsdk',
+            'tf_model': 'tf_model',
+            'caffe_model': 'caffe_model',
+            'caffe_weights': 'caffe_weights',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_onnx_caffe(self):
+
+        config = {
+            'framework': 'dlsdk',
+            'onnx_model': 'onnx_model',
+            'caffe_model': 'caffe_model',
+            'caffe_weights': 'caffe_weights',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_mxnet_tf(self):
+        config = {
+            'framework': 'dlsdk',
+            'mxnet_weights': 'mxnet_weights',
+            'tf_model': 'tf_model',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_onnx_tf(self):
+        config = {
+            'framework': 'dlsdk',
+            'onnx_model': 'onnx_model',
+            'tf_model': 'tf_model',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_mxnet_caffe_tf(self):
+        config = {
+            'framework': 'dlsdk',
+            'mxnet_weights': 'mxnet_weights',
+            'caffe_model': 'caffe_model',
+            'caffe_weights': 'caffe_weights',
+            'tf_model': 'tf_model',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_dlsdk_caffe_tf(self):
+        config = {
+            'framework': 'dlsdk',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'caffe_model': 'caffe_model',
+            'caffe_weights': 'caffe_weights',
+            'tf_model': 'tf_model',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_dlsdk_caffe_onnx(self):
+        config = {
+            'framework': 'dlsdk',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'caffe_model': 'caffe_model',
+            'caffe_weights': 'caffe_weights',
+            'onnx_model': 'onnx_model',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_dlsdk_caffe_mxnet(self):
+        config = {
+            'framework': 'dlsdk',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'caffe_model': 'caffe_model',
+            'caffe_weights': 'caffe_weights',
+            'mxnet_weights': 'mxnet_weights',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_dlsdk_tf_mxnet(self):
+        config = {
+            'framework': "dlsdk",
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'mxnet_weights': 'mxnet_weights',
+            'tf_model': 'tf_model',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_dlsdk_tf_onnx(self):
+        config = {
+            'framework': 'dlsdk',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'onnx_model': 'onnx_model',
+            'tf_model': 'tf_model',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_dlsdk_tf_mxnet_caffe(self):
+        config = {
+            'framework': 'dlsdk',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'caffe_model': 'caffe_model',
+            'caffe_weights': 'caffe_weights',
+            'mxnet_weights': 'mxnet_weights',
+            'onnx_model': 'onnx_model',
+            'tf_model': 'tf_model',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+    def test_raises_with_multiple_models_dlsdk_tf_mxnet_caffe_onnx(self):
+        config = {
+            'framework': 'dlsdk',
+            'model': 'custom_model',
+            'weights': 'custom_weights',
+            'caffe_model': 'caffe_model',
+            'caffe_weights': 'caffe_weights',
+            'mxnet_weights': 'mxnet_weights',
+            'tf_model': 'tf_model',
+            'device': 'cpu',
+            '_models_prefix': 'prefix'
+        }
+
+        with pytest.raises(ConfigError):
+            DLSDKLauncher(config, dummy_adapter)
+
+
+@pytest.mark.usefixtures('mock_path_exists', 'mock_inputs', 'mock_inference_engine')
+class TestDLSDKLauncherConfig:
+    def setup(self):
+        self.launcher = {
+            'model': 'foo.xml',
+            'weights': 'foo.bin',
+            'device': 'CPU',
+            'framework': 'dlsdk',
+            'adapter': 'classification',
+            '_models_prefix': 'prefix'
+        }
+        self.config = DLSDKLauncherConfig('dlsdk_launcher')
+
+    def test_hetero_correct(self):
+        self.config.validate(update_dict(self.launcher, device='HETERO:CPU'))
+        self.config.validate(update_dict(self.launcher, device='HETERO:CPU,FPGA'))
+
+    def test_hetero_endswith_comma(self):
+        with pytest.raises(ConfigError):
+            self.config.validate(update_dict(self.launcher, device='HETERO:CPU,FPGA,'))
+
+    def test_normal_multiple_devices(self):
+        with pytest.raises(ConfigError):
+            self.config.validate(update_dict(self.launcher, device='CPU,FPGA'))
+
+    def test_hetero_empty(self):
+        with pytest.raises(ConfigError):
+            self.config.validate(update_dict(self.launcher, device='HETERO:'))
+
+    def test_normal(self):
+        self.config.validate(update_dict(self.launcher, device='CPU'))
+
+    def test_missed_model_in_create_dlsdk_launcher_raises_config_error_exception(self):
+        config = {'framework': 'dlsdk', 'weights': 'custom', 'adapter': 'classification', 'device': 'cpu'}
+
+        with pytest.raises(ConfigError):
+            create_launcher(config)
+
+    def test_missed_weights_in_create_dlsdk_launcher_raises_config_error_exception(self):
+        launcher = {'framework': 'dlsdk', 'model': 'custom', 'adapter': 'ssd', 'device': 'cpu'}
+
+        with pytest.raises(ConfigError):
+            create_launcher(launcher)
+
+    def test_missed_adapter_in_create_dlsdk_launcher_raises_config_error_exception(self):
+        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom'}
+
+        with pytest.raises(ConfigError):
+            create_launcher(launcher_config)
+
+    def test_undefined_str_adapter_in_create_dlsdk_launcher_raises_config_error_exception(self):
+        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': 'undefined_str'}
+
+        with pytest.raises(ConfigError):
+            create_launcher(launcher_config)
+
+    def test_empty_dir_adapter_in_create_dlsdk_launcher_raises_config_error_exception(self):
+        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': {}}
+
+        with pytest.raises(ConfigError):
+            create_launcher(launcher_config)
+
+    def test_missed_type_in_dir_adapter_in_create_dlsdk_launcher_raises_config_error_exception(self):
+        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': {'key': 'val'}}
+
+        with pytest.raises(ConfigError):
+            create_launcher(launcher_config)
+
+    def test_undefined_type_in_dir_adapter_in_create_dlsdk_launcher_raises_config_error_exception(self):
+        launcher_config = {
+            'framework': 'dlsdk',
+            'model': 'custom',
+            'weights': 'custom',
+            'adapter': {'type': 'undefined'}
+        }
+
+        with pytest.raises(ConfigError):
+            create_launcher(launcher_config)
+
+    def test_dlsdk_launcher(self):
+        launcher = {
+            'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': 'ssd', 'device': 'cpu',
+            '_models_prefix': 'models'
+        }
+        create_launcher(launcher)
+
+    def test_dlsdk_launcher_model_with_several_image_inputs_raise_value_error(self, mocker):
+        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': {'key': 'val'}}
+
+        with pytest.raises(ValueError):
+            mocker.patch(
+                'accuracy_checker.launcher.dlsdk_launcher.DLSDKLauncher.inputs',
+                new_callable=PropertyMock(return_value={'data1': [3, 227, 227], 'data2': [3, 227, 227]})
+            )
+            create_launcher(launcher_config)
+
+    def test_dlsdk_launcher_model_no_image_inputs_raise_value_error(self):
+        launcher_config = {'framework': 'dlsdk', 'model': 'custom', 'weights': 'custom', 'adapter': {'key': 'val'}}
+
+        with pytest.raises(ValueError):
+            create_launcher(launcher_config)
+
+
+def dummy_adapter():
+    pass
diff --git a/tools/accuracy_checker/tests/test_input_feeder.py b/tools/accuracy_checker/tests/test_input_feeder.py
new file mode 100644
index 000000000..a4b5e1405
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_input_feeder.py
@@ -0,0 +1,255 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pytest
+import re
+import numpy as np
+from accuracy_checker.config import ConfigError
+from accuracy_checker.launcher.input_feeder import InputFeeder
+from accuracy_checker.dataset import DataRepresentation
+
+# InputInfo from openvino is needed here, but there is no appropriate API
+# to create InputInfo with specific shape, therefore lets use analog
+class InputInfo_test:
+    layout = ''
+    precision = ''
+    shape = []
+    def __init__(self, layout = '', precision = '', shape = []):
+        self.layout = layout
+        self.precision = precision
+        self.shape = shape
+
+class TestInputFeeder:
+    def test_create_input_feeder_without_inputs_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            InputFeeder([], {})
+
+    def test_create_input_feeder_with_config_inputs_and_empty_network_inputs_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            InputFeeder([{'name': 'const_data', 'type': 'CONST_INPUT', 'value': '[1, 1, 1, 1]'}], {})
+
+    def test_create_input_feeder_with_config_const_inputs_not_in_network_inputs_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            InputFeeder([{'name': 'const_data', 'type': 'CONST_INPUT', 'value': '[1, 1, 1, 1]'}], {'data': (1, 3, 10, 10)})
+
+    def test_create_input_feeder_with_config_inputs_not_in_network_inputs_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            InputFeeder([{'name': 'data2', 'type': 'INPUT', 'value': '.'}], {'data': (1, 3, 10, 10)})
+
+    def test_create_input_feeder_without_config_inputs(self):
+        input_feeder = InputFeeder([], {'data': (1, 3, 10, 10)})
+        assert not input_feeder.const_inputs
+        assert not input_feeder.inputs_mapping
+        assert input_feeder.non_constant_inputs == ['data']
+
+    def test_create_input_feeder_config_inputs_fully_match_to_network_inputs(self):
+        input_feeder = InputFeeder([{'name': 'data', 'type': 'INPUT', 'value': '.'}], {'data': (1, 3, 10, 10)})
+        assert not input_feeder.const_inputs
+        assert input_feeder.inputs_mapping == {'data': re.compile('.')}
+        assert input_feeder.non_constant_inputs == ['data']
+
+    def test_create_input_feeder_config_inputs_contain_only_const_inputs_with_list_value(self):
+        input_feeder = InputFeeder([{'name': 'const_data', 'type': 'CONST_INPUT', 'value': [1, 1, 1, 1]}], {'data': (1, 3, 10, 10), 'const_data': (1, 4)})
+        assert np.array_equal(input_feeder.const_inputs['const_data'], np.ones(4))
+        assert not input_feeder.inputs_mapping
+        assert input_feeder.non_constant_inputs == ['data']
+
+    def test_create_input_feeder_config_inputs_contain_only_const_inputs_with_not_list_value(self):
+        input_feeder = InputFeeder(
+            [{'name': 'const_data', 'type': 'CONST_INPUT', 'value': 'value'}],
+            {'data': (1, 3, 10, 10), 'const_data': (1, 4)}
+        )
+        assert input_feeder.const_inputs['const_data'] == 'value'
+        assert not input_feeder.inputs_mapping
+        assert input_feeder.non_constant_inputs == ['data']
+
+    def test_create_input_feeder_not_all_non_constant_inputs_in_config_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            InputFeeder(
+                [{'name': '0', 'type': 'INPUT', 'value': '.'}],
+                {'0': (1, 3, 10, 10), '1': (1, 3, 10, 10)}
+            )
+
+    def test_fill_non_constant_input_with_one_input_without_specific_mapping_batch_1(self):
+        input_feeder = InputFeeder([], { 'input': InputInfo_test(shape=(1, 3, 10, 10)) })
+        result = input_feeder.fill_non_constant_inputs([DataRepresentation(np.zeros((10, 10, 3)), identifier='0')])[0]
+        expected_data = np.zeros((1, 3, 10, 10))
+        assert 'input' in result
+        assert np.array_equal(result['input'], expected_data)
+
+    def test_fill_non_constant_input_without_specific_mapping_batch_2(self):
+        input_feeder = InputFeeder([], { 'input': InputInfo_test(shape=(1, 3, 10, 10))})
+        result = input_feeder.fill_non_constant_inputs([
+            DataRepresentation(np.zeros((10, 10, 3)), identifier='0'),
+            DataRepresentation(np.zeros((10, 10, 3)), identifier='1')
+        ])[0]
+        expected_data = np.zeros((2, 3, 10, 10))
+        assert 'input' in result
+        assert np.array_equal(result['input'], expected_data)
+
+    def test_fill_non_constant_input_with_specific_mapping_batch_1(self):
+        input_feeder = InputFeeder([{'name': 'input', 'type': 'INPUT', 'value': '.'}], {'input': InputInfo_test(shape=(1, 3, 10, 10))})
+        result = input_feeder.fill_non_constant_inputs([DataRepresentation(np.zeros((10, 10, 3)), identifier='0')])[0]
+        expected_data = np.zeros((1, 3, 10, 10))
+        assert 'input' in result
+        assert np.array_equal(result['input'], expected_data)
+
+    def test_fill_non_constant_input_with_specific_mapping_sevaral_image_matched(self):
+        input_feeder = InputFeeder([{'name': 'input', 'type': 'INPUT', 'value': '.'}], {'input': InputInfo_test(shape=(1, 3, 10, 10))})
+        result = input_feeder.fill_non_constant_inputs([DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '1'])])[0]
+        expected_data = np.zeros((1, 3, 10, 10))
+        assert 'input' in result
+        assert np.array_equal(result['input'], expected_data)
+
+    def test_fill_non_constant_input_with_specific_mapping_not_match_raise_config_error(self):
+        input_feeder = InputFeeder([{'name': 'input', 'type': 'INPUT', 'value': '1.'}], {'input': InputInfo_test(shape=(1, 3, 10, 10))})
+        with pytest.raises(ConfigError):
+            input_feeder.fill_non_constant_inputs([DataRepresentation(np.zeros((10, 10, 3)), identifier='0')])
+
+    def test_fill_non_constant_input_with_specific_mapping_batch_2(self):
+        input_feeder = InputFeeder([{'name': 'input', 'type': 'INPUT', 'value': '.'}], {'input': InputInfo_test(shape=(1, 3, 10, 10))})
+        result = input_feeder.fill_non_constant_inputs([
+            DataRepresentation(np.zeros((10, 10, 3)), identifier='0'),
+            DataRepresentation(np.zeros((10, 10, 3)), identifier='1')
+        ])[0]
+        expected_data = np.zeros((2, 3, 10, 10))
+        assert 'input' in result
+        assert np.array_equal(result['input'], expected_data)
+
+    def test_fill_non_constant_input_with_specific_mapping_not_all_image_in_batch_matched_raise_config_error(self):
+        input_feeder = InputFeeder([{'name': 'input', 'type': 'INPUT', 'value': '0+'}], {'input': InputInfo_test(shape=(1, 3, 10, 10))})
+        with pytest.raises(ConfigError):
+            input_feeder.fill_non_constant_inputs([
+                DataRepresentation(np.zeros((10, 10, 3)), identifier='0'),
+                DataRepresentation(np.zeros((10, 10, 3)), identifier='1')
+            ])
+
+    def test_fill_non_constant_inputs_without_specific_mapping_batch_1(self):
+        input_feeder = InputFeeder([], { 'input1': InputInfo_test(shape=(1, 3, 10, 10)), 'input2': InputInfo_test(shape=(1, 3, 10, 10))})
+        result = input_feeder.fill_non_constant_inputs([DataRepresentation(np.zeros((10, 10, 3)), identifier='0')])[0]
+        expected_data = np.zeros((1, 3, 10, 10))
+        assert 'input1' in result
+        assert np.array_equal(result['input1'], expected_data)
+        assert 'input2' in result
+        assert np.array_equal(result['input2'], expected_data)
+
+    def test_fill_non_constant_inputs_without_specific_mapping_batch_2(self):
+        input_feeder = InputFeeder([], {'input1': InputInfo_test(shape=(1, 3, 10, 10)), 'input2': InputInfo_test(shape = (1, 3, 10, 10))})
+        result = input_feeder.fill_non_constant_inputs([
+            DataRepresentation(np.zeros((10, 10, 3)), identifier='0'),
+            DataRepresentation(np.zeros((10, 10, 3)), identifier='1')
+        ])[0]
+        expected_data = np.zeros((2, 3, 10, 10))
+        assert 'input1' in result
+        assert np.array_equal(result['input1'], expected_data)
+        assert 'input2' in result
+        assert np.array_equal(result['input2'], expected_data)
+
+    def test_fill_non_constant_inputs_with_specific_mapping_batch_1(self):
+        input_feeder = InputFeeder(
+            [{'name': 'input1', 'type': 'INPUT', 'value': '0'}, {'name': 'input2', 'type': 'INPUT', 'value': '1'}],
+            {'input1': InputInfo_test(shape=(1, 3, 10, 10)), 'input2': InputInfo_test(shape=(1, 3, 10, 10))}
+        )
+        result = input_feeder.fill_non_constant_inputs(
+            [DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))],identifier=['0', '1'])]
+        )[0]
+        expected_data = [np.zeros((1, 3, 10, 10)), np.ones((1, 3, 10, 10))]
+        assert 'input1' in result
+        assert np.array_equal(result['input1'], expected_data[0])
+        assert 'input2' in result
+        assert np.array_equal(result['input2'], expected_data[1])
+
+    def test_fill_non_constant_inputs_with_specific_mapping_not_match_raise_config_error(self):
+        input_feeder = InputFeeder(
+            [{'name': 'input1', 'type': 'INPUT', 'value': '0'}, {'name': 'input2', 'type': 'INPUT', 'value': '1'}],
+            {'input1': InputInfo_test(shape=(1, 3, 10, 10)), 'input2': InputInfo_test(shape=(1, 3, 10, 10))}
+        )
+        with pytest.raises(ConfigError):
+            input_feeder.fill_non_constant_inputs([DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '2'])])
+
+    def test_fill_non_constant_inputs_with_specific_mapping_batch_2(self):
+        input_feeder = InputFeeder(
+            [{'name': 'input1', 'type': 'INPUT', 'value': '0'}, {'name': 'input2', 'type': 'INPUT', 'value': '1'}],
+            { 'input1': InputInfo_test(shape = (1, 3, 10, 10)), 'input2': InputInfo_test(shape=(1, 3, 10, 10))}
+        )
+        result = input_feeder.fill_non_constant_inputs([
+            DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '1']),
+            DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '1'])
+        ])[0]
+        expected_data = [np.zeros((2, 3, 10, 10)), np.ones((2, 3, 10, 10))]
+        assert 'input1' in result
+        assert np.array_equal(result['input1'], expected_data[0])
+        assert 'input2' in result
+        assert np.array_equal(result['input2'], expected_data[1])
+
+    def test_fill_non_constant_inputs_with_specific_mapping_not_all_image_in_batch_matched_raise_config_error(self):
+        input_feeder = InputFeeder(
+            [{'name': 'input1', 'type': 'INPUT', 'value': '0'}, {'name': 'input2', 'type': 'INPUT', 'value': '1'}],
+            {'input1': (1, 3, 10, 10), 'input2': (1, 3, 10, 10)}
+        )
+        with pytest.raises(ConfigError):
+            input_feeder.fill_non_constant_inputs([
+                DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '1']),
+                DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], identifier=['0', '2'])
+            ])
+
+    def test_fill_non_const_input_with_multi_infer_data_batch_1(self):
+        input_feeder = InputFeeder({}, {'input': (1, 3, 10, 10)})
+        result = input_feeder.fill_non_constant_inputs([
+            DataRepresentation([np.zeros((10, 10, 3)), np.ones((10, 10, 3))], {'multi_infer': True}, identifier='0')
+        ])
+        expected = [{'input': np.zeros((1, 3, 10, 10))}, {'input': np.ones((1, 3, 10, 10))}]
+        assert len(result) == len(expected)
+        assert np.array_equal(result[0]['input'], expected[0]['input'])
+        assert np.array_equal(result[1]['input'], expected[1]['input'])
+
+    def test_fill_non_const_input_with_multi_infer_data_batch_2(self):
+        input_feeder = InputFeeder({}, {'input': (2, 3, 10, 10)})
+        result = input_feeder.fill_non_constant_inputs([
+            DataRepresentation(
+                [np.zeros((10, 10, 3)), np.ones((10, 10, 3))],
+                {'multi_infer': True},
+                identifier='0'
+            ),
+            DataRepresentation(
+                [np.zeros((10, 10, 3)), np.ones((10, 10, 3))],
+                {'multi_infer': True},
+                identifier='1'
+            ),
+        ])
+        expected = [{'input': np.zeros((2, 3, 10, 10))}, {'input': np.ones((2, 3, 10, 10))}]
+        assert len(result) == len(expected)
+        assert np.array_equal(result[0]['input'], expected[0]['input'])
+        assert np.array_equal(result[1]['input'], expected[1]['input'])
+
+    def test_fill_non_const_input_with_multi_infer_not_consistent_data_batch_2(self):
+        input_feeder = InputFeeder({}, {'input': (2, 3, 10, 10)})
+        result = input_feeder.fill_non_constant_inputs([
+            DataRepresentation(
+                [np.zeros((10, 10, 3))],
+                {'multi_infer': True},
+                identifier='0'
+            ),
+            DataRepresentation(
+                [np.zeros((10, 10, 3)), np.ones((10, 10, 3))],
+                {'multi_infer': True},
+                identifier='1'
+            ),
+        ])
+        expected = [{'input': np.zeros((2, 3, 10, 10))}, {'input': np.ones((1, 3, 10, 10))}]
+        assert len(result) == len(expected)
+        assert np.array_equal(result[0]['input'], expected[0]['input'])
+        assert np.array_equal(result[1]['input'], expected[1]['input'])
diff --git a/tools/accuracy_checker/tests/test_metric_evaluator.py b/tools/accuracy_checker/tests/test_metric_evaluator.py
new file mode 100644
index 000000000..7b4c9e895
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_metric_evaluator.py
@@ -0,0 +1,549 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pytest
+from accuracy_checker.config import ConfigError
+from accuracy_checker.metrics import ClassificationAccuracy, MetricsExecutor
+from accuracy_checker.metrics.metric import Metric
+from accuracy_checker.representation import (
+    ClassificationAnnotation,
+    ClassificationPrediction,
+    ContainerAnnotation,
+    ContainerPrediction,
+    DetectionAnnotation,
+    DetectionPrediction
+)
+from .common import DummyDataset
+
+
+class TestMetric:
+    def setup_method(self):
+        self.module = 'accuracy_checker.metrics.metric_evaluator'
+
+    def test_missed_metrics_raises_config_error_exception(self):
+        config = {'annotation': 'custom'}
+
+        with pytest.raises(ConfigError):
+            MetricsExecutor(config, None)
+
+    def test_missed_metrics_raises_config_error_exception_with_custom_name(self):
+        config = {'name': 'some_name', 'annotation': 'custom'}
+
+        with pytest.raises(ConfigError):
+            MetricsExecutor(config, None)
+
+    def test_empty_metrics_raises_config_error_exception(self):
+        config = {'annotation': 'custom', 'metrics': []}
+
+        with pytest.raises(ConfigError):
+            MetricsExecutor(config, None)
+
+    def test_metrics_with_empty_entry_raises_config_error_exception(self):
+        config = {'annotation': 'custom', 'metrics': [{}]}
+
+        with pytest.raises(ConfigError):
+            MetricsExecutor(config, None)
+
+    def test_missed_metric_type_raises_config_error_exception(self):
+        config = {'annotation': 'custom', 'metrics': [{'undefined': ''}]}
+
+        with pytest.raises(ConfigError):
+            MetricsExecutor(config, None)
+
+    def test_undefined_metric_type_raises_config_error_exception(self):
+        config = {'annotation': 'custom', 'metrics': [{'type': ''}]}
+
+        with pytest.raises(ConfigError):
+            MetricsExecutor(config, None)
+
+    def test_accuracy_arguments(self):
+        config = {'annotation': 'custom', 'metrics': [{'type': 'accuracy', 'top_k': 1}]}
+
+        dispatcher = MetricsExecutor(config, None)
+        assert len(dispatcher.metrics) == 1
+        _, accuracy_metric, _, _, _ = dispatcher.metrics[0]
+        assert isinstance(accuracy_metric, ClassificationAccuracy)
+        assert accuracy_metric.top_k == 1
+
+    def test_accuracy_with_several_annotation_source_raises_config_error_exception(self):
+        config = {
+            'annotation': 'custom',
+            'metrics': [{'type': 'accuracy', 'top_k': 1, 'annotation_source': 'annotation1, annotation2'}]
+        }
+        with pytest.raises(ConfigError):
+            MetricsExecutor(config, None)
+
+    def test_accuracy_with_several_prediction_source_raises_value_error_exception(self):
+        config = {
+            'annotation': 'custom',
+            'metrics': [{'type': 'accuracy', 'top_k': 1, 'prediction_source': 'prediction1, prediction2'}]
+        }
+        with pytest.raises(ConfigError):
+            MetricsExecutor(config, None)
+
+    def test_accuracy_on_container_with_wrong_annotation_source_name_raise_config_error_exception(self):
+        annotations = [ContainerAnnotation({'annotation': ClassificationAnnotation('identifier', 3)})]
+        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1, 'annotation_source': 'a'}]}
+
+        dispatcher = MetricsExecutor(config, None)
+        with pytest.raises(ConfigError):
+            dispatcher.update_metrics_on_batch(annotations, predictions)
+
+    def test_accuracy_with_wrong_annotation_type_raise_config_error_exception(self):
+        annotations = [DetectionAnnotation('identifier', 3)]
+        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {
+            'annotation': 'mocked',
+            'metrics': [{'type': 'accuracy', 'top_k': 1}]
+        }
+
+        dispatcher = MetricsExecutor(config, None)
+        with pytest.raises(ConfigError):
+            dispatcher.update_metrics_on_batch(annotations, predictions)
+
+    def test_accuracy_with_unsupported_annotations_in_container_raise_config_error_exception(self):
+        annotations = [ContainerAnnotation({'annotation': DetectionAnnotation('identifier', 3)})]
+        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {
+            'annotation': 'mocked',
+            'metrics': [{'type': 'accuracy', 'top_k': 1}]
+        }
+
+        dispatcher = MetricsExecutor(config, None)
+        with pytest.raises(ConfigError):
+            dispatcher.update_metrics_on_batch(annotations, predictions)
+
+    def test_accuracy_with_unsupported_annotation_type_as_annotation_source_for_container_raises_config_error(self):
+        annotations = [ContainerAnnotation({'annotation': DetectionAnnotation('identifier', 3)})]
+        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {
+            'annotation': 'mocked',
+            'metrics': [{'type': 'accuracy', 'top_k': 1, 'annotation_source': 'annotation'}]
+        }
+
+        dispatcher = MetricsExecutor(config, None)
+        with pytest.raises(ConfigError):
+            dispatcher.update_metrics_on_batch(annotations, predictions)
+
+    def test_accuracy_on_annotation_container_with_several_suitable_representations_config_value_error_exception(self):
+        annotations = [ContainerAnnotation({
+            'annotation1': ClassificationAnnotation('identifier', 3),
+            'annotation2': ClassificationAnnotation('identifier', 3)
+        })]
+        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1}]}
+
+        dispatcher = MetricsExecutor(config, None)
+        with pytest.raises(ConfigError):
+            dispatcher.update_metrics_on_batch(annotations, predictions)
+
+    def test_accuracy_with_wrong_prediction_type_raise_config_error_exception(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [DetectionPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1}]}
+
+        dispatcher = MetricsExecutor(config, None)
+        with pytest.raises(ConfigError):
+            dispatcher.update_metrics_on_batch(annotations, predictions)
+
+    def test_accuracy_with_unsupported_prediction_in_container_raise_config_error_exception(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ContainerPrediction({'prediction': DetectionPrediction('identifier', [1.0, 1.0, 1.0, 4.0])})]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1}]}
+
+        dispatcher = MetricsExecutor(config, None)
+        with pytest.raises(ConfigError):
+            dispatcher.update_metrics_on_batch(annotations, predictions)
+
+    def test_accuracy_with_unsupported_prediction_type_as_prediction_source_for_container_raises_config_error(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ContainerPrediction({'prediction': DetectionPrediction('identifier', [1.0, 1.0, 1.0, 4.0])})]
+        config = {
+            'annotation': 'mocked',
+            'metrics': [{'type': 'accuracy', 'top_k': 1, 'prediction_source': 'prediction'}]
+        }
+
+        dispatcher = MetricsExecutor(config, None)
+        with pytest.raises(ConfigError):
+            dispatcher.update_metrics_on_batch(annotations, predictions)
+
+    def test_accuracy_on_prediction_container_with_several_suitable_representations_raise_config_error_exception(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ContainerPrediction({
+            'prediction1': ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0]),
+            'prediction2': ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])
+        })]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1}]}
+
+        dispatcher = MetricsExecutor(config, None)
+        with pytest.raises(ConfigError):
+            dispatcher.update_metrics_on_batch(annotations, predictions)
+
+    def test_complete_accuracy(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1}]}
+
+        dispatcher = MetricsExecutor(config, None)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result.name == 'accuracy'
+            assert evaluation_result.evaluated_value == pytest.approx(1.0)
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_complete_accuracy_with_container_default_sources(self):
+        annotations = [ContainerAnnotation({'a': ClassificationAnnotation('identifier', 3)})]
+        predictions = [ContainerPrediction({'p': ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])})]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1}]}
+
+        dispatcher = MetricsExecutor(config, None)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result.name == 'accuracy'
+            assert evaluation_result.evaluated_value == pytest.approx(1.0)
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_complete_accuracy_with_container_sources(self):
+        annotations = [ContainerAnnotation({'a': ClassificationAnnotation('identifier', 3)})]
+        predictions = [ContainerPrediction({'p': ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])})]
+        config = {
+            'annotation': 'mocked',
+            'metrics': [{'type': 'accuracy', 'top_k': 1, 'annotation_source': 'a', 'prediction_source': 'p'}]
+        }
+
+        dispatcher = MetricsExecutor(config, None)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result.name == 'accuracy'
+            assert evaluation_result.evaluated_value == pytest.approx(1.0)
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_zero_accuracy(self):
+        annotation = [ClassificationAnnotation('identifier', 2)]
+        prediction = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1}]}
+
+        dispatcher = MetricsExecutor(config, None)
+
+        for _, evaluation_result in dispatcher.iterate_metrics([annotation], [prediction]):
+            assert evaluation_result.name == 'accuracy'
+            assert evaluation_result.evaluated_value == 0.0
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_complete_accuracy_top_3(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ClassificationPrediction('identifier', [1.0, 3.0, 4.0, 2.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 3}]}
+
+        dispatcher = MetricsExecutor(config, None)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result.name == 'accuracy'
+            assert evaluation_result.evaluated_value == pytest.approx(1.0)
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_zero_accuracy_top_3(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ClassificationPrediction('identifier', [5.0, 3.0, 4.0, 1.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 3}]}
+
+        dispatcher = MetricsExecutor(config, None)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result.name == 'accuracy'
+            assert evaluation_result.evaluated_value == 0.0
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_reference_is_10_by_config(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ClassificationPrediction('identifier', [5.0, 3.0, 4.0, 1.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 3, 'reference': 10}]}
+
+        dispatcher = MetricsExecutor(config, None)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result.name == 'accuracy'
+            assert evaluation_result.evaluated_value == 0.0
+            assert evaluation_result.reference_value == 10
+            assert evaluation_result.threshold is None
+
+    def test_threshold_is_10_by_config(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ClassificationPrediction('identifier', [5.0, 3.0, 4.0, 1.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 3, 'threshold': 10}]}
+
+        dispatcher = MetricsExecutor(config, None)
+
+        for _, evaluation_result in dispatcher.iterate_metrics([annotations], [predictions]):
+            assert evaluation_result.name == 'accuracy'
+            assert evaluation_result.evaluated_value == 0.0
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold == 10
+
+    def test_classification_per_class_accuracy_fully_zero_prediction(self):
+        annotation = ClassificationAnnotation('identifier', 0)
+        prediction = ClassificationPrediction('identifier', [1.0, 2.0])
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy_per_class', 'top_k': 1}]}
+        dataset = DummyDataset(label_map={0: '0', 1: '1'})
+        dispatcher = MetricsExecutor(config, dataset)
+        dispatcher.update_metrics_on_batch([annotation], [prediction])
+        for _, evaluation_result in dispatcher.iterate_metrics([annotation], [prediction]):
+            assert evaluation_result.name == 'accuracy_per_class'
+            assert len(evaluation_result.evaluated_value) == 2
+            assert evaluation_result.evaluated_value[0] == pytest.approx(0.0)
+            assert evaluation_result.evaluated_value[1] == pytest.approx(0.0)
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_classification_per_class_accuracy_partially_zero_prediction(self):
+        annotation = [ClassificationAnnotation('identifier', 1)]
+        prediction = [ClassificationPrediction('identifier', [1.0, 2.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy_per_class', 'top_k': 1}]}
+        dataset = DummyDataset(label_map={0: '0', 1: '1'})
+        dispatcher = MetricsExecutor(config, dataset)
+
+        dispatcher.update_metrics_on_batch(annotation, prediction)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction):
+            assert evaluation_result.name == 'accuracy_per_class'
+            assert len(evaluation_result.evaluated_value) == 2
+            assert evaluation_result.evaluated_value[0] == pytest.approx(0.0)
+            assert evaluation_result.evaluated_value[1] == pytest.approx(1.0)
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_classification_per_class_accuracy_complete_prediction(self):
+        annotation = [ClassificationAnnotation('identifier_1', 1), ClassificationAnnotation('identifier_2', 0)]
+        prediction = [
+            ClassificationPrediction('identifier_1', [1.0, 2.0]),
+            ClassificationPrediction('identifier_2', [2.0, 1.0])
+        ]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy_per_class', 'top_k': 1}]}
+        dataset = DummyDataset(label_map={0: '0', 1: '1'})
+        dispatcher = MetricsExecutor(config, dataset)
+
+        dispatcher.update_metrics_on_batch(annotation, prediction)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction):
+            assert evaluation_result.name == 'accuracy_per_class'
+            assert len(evaluation_result.evaluated_value) == 2
+            assert evaluation_result.evaluated_value[0] == pytest.approx(1.0)
+            assert evaluation_result.evaluated_value[1] == pytest.approx(1.0)
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_classification_per_class_accuracy_partially_prediction(self):
+        annotation = [
+            ClassificationAnnotation('identifier_1', 1),
+            ClassificationAnnotation('identifier_2', 0),
+            ClassificationAnnotation('identifier_3', 0)
+        ]
+        prediction = [
+            ClassificationPrediction('identifier_1', [1.0, 2.0]),
+            ClassificationPrediction('identifier_2', [2.0, 1.0]),
+            ClassificationPrediction('identifier_3', [1.0, 5.0])
+        ]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy_per_class', 'top_k': 1}]}
+        dataset = DummyDataset(label_map={0: '0', 1: '1'})
+        dispatcher = MetricsExecutor(config, dataset)
+
+        dispatcher.update_metrics_on_batch(annotation, prediction)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction):
+            assert evaluation_result.name == 'accuracy_per_class'
+            assert len(evaluation_result.evaluated_value) == 2
+            assert evaluation_result.evaluated_value[0] == pytest.approx(0.5)
+            assert evaluation_result.evaluated_value[1] == pytest.approx(1.0)
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_classification_per_class_accuracy_prediction_top3_zero(self):
+        annotation = [ClassificationAnnotation('identifier_1', 0), ClassificationAnnotation('identifier_2', 1)]
+        prediction = [
+            ClassificationPrediction('identifier_1', [1.0, 2.0, 3.0, 4.0]),
+            ClassificationPrediction('identifier_2', [2.0, 1.0, 3.0, 4.0])
+        ]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy_per_class', 'top_k': 3}]}
+        dataset = DummyDataset(label_map={0: '0', 1: '1', 2: '2', 3: '3'})
+        dispatcher = MetricsExecutor(config, dataset)
+
+        dispatcher.update_metrics_on_batch(annotation, prediction)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction):
+            assert evaluation_result.name == 'accuracy_per_class'
+            assert len(evaluation_result.evaluated_value) == 4
+            assert evaluation_result.evaluated_value[0] == pytest.approx(0.0)
+            assert evaluation_result.evaluated_value[1] == pytest.approx(0.0)
+            assert evaluation_result.evaluated_value[2] == pytest.approx(0.0)
+            assert evaluation_result.evaluated_value[3] == pytest.approx(0.0)
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+    def test_classification_per_class_accuracy_prediction_top3(self):
+        annotation = [ClassificationAnnotation('identifier_1', 1), ClassificationAnnotation('identifier_2', 1)]
+        prediction = [
+            ClassificationPrediction('identifier_1', [1.0, 2.0, 3.0, 4.0]),
+            ClassificationPrediction('identifier_2', [2.0, 1.0, 3.0, 4.0])
+        ]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy_per_class', 'top_k': 3}]}
+        dataset = DummyDataset(label_map={0: '0', 1: '1', 2: '2', 3: '3'})
+        dispatcher = MetricsExecutor(config, dataset)
+
+        dispatcher.update_metrics_on_batch(annotation, prediction)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotation, prediction):
+            assert evaluation_result.name == 'accuracy_per_class'
+            assert len(evaluation_result.evaluated_value) == 4
+            assert evaluation_result.evaluated_value[0] == pytest.approx(0.0)
+            assert evaluation_result.evaluated_value[1] == pytest.approx(0.5)
+            assert evaluation_result.evaluated_value[2] == pytest.approx(0.0)
+            assert evaluation_result.evaluated_value[3] == pytest.approx(0.0)
+            assert evaluation_result.reference_value is None
+            assert evaluation_result.threshold is None
+
+
+class TestMetricExtraArgs:
+    def test_all_metrics_raise_config_error_on_extra_args(self):
+        for provider in Metric.providers:
+            adapter_config = {'type': provider, 'something_extra': 'extra'}
+            with pytest.raises(ConfigError):
+                Metric.provide(provider, adapter_config, None)
+
+    def test_detection_recall_raise_config_error_on_extra_args(self):
+        adapter_config = {'type': 'recall', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('recall', adapter_config, None)
+
+    def test_detection_miss_rate_raise_config_error_on_extra_args(self):
+        adapter_config = {'type': 'miss_rate', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('miss_rate', adapter_config, None)
+
+    def test_accuracy_raise_config_error_on_extra_args(self):
+        adapter_config = {'type': 'accuracy', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('accuracy', adapter_config, None)
+
+    def test_per_class_accuracy_raise_config_error_on_extra_args(self):
+        adapter_config = {'type': 'accuracy_per_class', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('accuracy_per_class', adapter_config, None)
+
+    def test_character_recognition_accuracy_raise_config_error_on_extra_args(self):
+        adapter_config = {'type': 'character_recognition_accuracy', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('character_recognition_accuracy', adapter_config, None)
+
+    def test_multi_accuracy_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'multi_accuracy', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('multi_accuracy', metric_config, None)
+
+    def test_multi_precision_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'multi_precision', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('multi_precision', metric_config, None)
+
+    def test_f1_score_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'f1-score', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('f1-score', metric_config, None)
+
+    def test_mae_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'mae', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('mae', metric_config, None)
+
+    def test_mse_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'mse', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('mse', metric_config, None)
+
+    def test_rmse_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'rmse', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('rmse', metric_config, None)
+
+    def test_mae_on_interval_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'mae_on_interval', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('mae_on_interval', metric_config, None)
+
+    def test_mse_on_interval_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'mse_on_interval', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('mse_on_interval', metric_config, None)
+
+    def test_rmse_on_interval_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'rmse_on_interval', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('rmse_on_interval', metric_config, None)
+
+    def test_per_point_normed_error_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'per_point_normed_error', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('per_point_normed_error', metric_config, None)
+
+    def test_average_point_error_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'normed_error', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('normed_error', metric_config, None)
+
+    def test_reid_cmc_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'cmc', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('cmc', metric_config, None)
+
+    def test_reid_map_raise_config_error_on_extra_args(self):
+        adapter_config = {'type': 'reid_map', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('reid_map', adapter_config, None)
+
+    def test_pairwise_accuracy_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'pairwise_accuracy', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('pairwise_accuracy', metric_config, None)
+
+    def test_segmentation_accuracy_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'segmentation_accuracy', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('segmentation_accuracy', metric_config, None)
+
+    def test_mean_iou_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'mean_iou', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('mean_iou', metric_config, None)
+
+    def test_mean_accuracy_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'mean_accuracy', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('mean_accuracy', metric_config, None)
+
+    def test_frequency_weighted_accuracy_raise_config_error_on_extra_args(self):
+        metric_config = {'type': 'frequency_weighted_accuracy', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            Metric.provide('frequency_weighted_accuracy', metric_config, None)
diff --git a/tools/accuracy_checker/tests/test_model_conversion.py b/tools/accuracy_checker/tests/test_model_conversion.py
new file mode 100644
index 000000000..a5a8c7742
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_model_conversion.py
@@ -0,0 +1,80 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import sys
+import pytest
+
+from accuracy_checker.launcher.model_conversion import (exec_mo_binary, find_dlsdk_ir, find_mo, prepare_args)
+from tests.common import mock_filesystem
+
+
+def test_mock_file_system():
+    with mock_filesystem(['foo/bar', 'foo/baz/']) as prefix:
+        assert (prefix / 'foo' / 'bar').is_file()
+        assert (prefix / 'foo' / 'baz').is_dir()
+
+
+def test_find_mo():
+    with mock_filesystem(['deployment_tools/model_optimizer/mo.py']) as prefix:
+        assert find_mo([prefix / 'deployment_tools' / 'model_optimizer'])
+
+
+def test_find_mo_is_none_when_not_exist():
+    with mock_filesystem(['deployment_tools/model_optimizer/mo.py']) as prefix:
+        assert find_mo([prefix / 'deployment_tools']) is None
+
+
+def test_find_mo_list_not_corrupted():
+    with mock_filesystem(['deployment_tools/model_optimizer/mo.py']) as prefix:
+        search_paths = [prefix]
+        find_mo(search_paths)
+        assert len(search_paths) == 1
+
+
+def test_find_ir__in_root():
+    with mock_filesystem(['model.xml', 'model.bin']) as root:
+        model, weights = find_dlsdk_ir(root, 'model')
+        assert model == root / 'model.xml'
+        assert weights == root / 'model.bin'
+
+
+def test_find_ir_raises_file_not_found_error_when_ir_not_found():
+    with mock_filesystem(['foo/']) as root:
+        with pytest.raises(FileNotFoundError):
+            find_dlsdk_ir(root, 'model')
+
+
+def test_prepare_args():
+    args = prepare_args('foo', ['a', 'b'], {'bar': 123, 'x': 'baz'})
+    assert args[0] == sys.executable
+    assert args[1] == 'foo'
+    assert '--a' in args
+    assert '--b' in args
+    assert '--bar' in args
+    assert '--x' in args
+
+    assert args[args.index('--bar') + 1] == '123'
+    assert args[args.index('--x') + 1] == 'baz'
+
+
+def test_exec_mo_binary(mocker):
+    subprocess_run = mocker.patch('subprocess.run')
+    mocker.patch('os.chdir')
+
+    args = prepare_args('ModelOptimizer', value_options={'--foo': 'bar'})
+    exec_mo_binary(args)
+
+    subprocess_run.assert_called_once_with(args, check=False, timeout=None)
diff --git a/tools/accuracy_checker/tests/test_model_evaluator.py b/tools/accuracy_checker/tests/test_model_evaluator.py
new file mode 100644
index 000000000..eeb9a52a9
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_model_evaluator.py
@@ -0,0 +1,143 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from unittest.mock import Mock, MagicMock
+
+from accuracy_checker.model_evaluator import ModelEvaluator
+
+
+class TestModelEvaluator:
+    def setup_method(self):
+        self.launcher = Mock()
+        self.launcher.predict.return_value = []
+
+        self.preprocessor = Mock()
+        self.postprocessor = Mock()
+
+        annotation_0 = Mock()
+        annotation_0.identifier = 0
+        annotation_1 = Mock()
+        annotation_1.identifier = 1
+        annotation_container_0 = Mock()
+        annotation_container_0.values = Mock(return_value=[annotation_0])
+        annotation_container_1 = Mock()
+        annotation_container_1.values = Mock(return_value=([annotation_1]))
+        self.annotations = [
+            ([annotation_container_0], [annotation_container_0]),
+            ([annotation_container_1], [annotation_container_1])
+        ]
+
+        self.dataset = MagicMock()
+        self.dataset.__iter__.return_value = self.annotations
+
+        self.postprocessor.process_batch = Mock(side_effect=[
+            ([annotation_container_0], [annotation_container_0]), ([annotation_container_1], [annotation_container_1])
+        ])
+        self.postprocessor.process_dataset = Mock(return_value=(
+            ([annotation_container_0], [annotation_container_0]), ([annotation_container_1], [annotation_container_1])
+        ))
+        self.postprocessor.full_process = Mock(return_value=(
+            ([annotation_container_0], [annotation_container_0]), ([annotation_container_1], [annotation_container_1])
+        ))
+
+        self.metric = Mock()
+        self.metric.update_metrics_on_batch = Mock()
+
+        self.evaluator = ModelEvaluator(self.launcher, self.preprocessor, self.postprocessor, self.dataset, self.metric)
+        self.evaluator.store_predictions = Mock()
+        self.evaluator.load = Mock(return_value=(
+            ([annotation_container_0], [annotation_container_0]), ([annotation_container_1], [annotation_container_1])
+        ))
+
+    def test_process_dataset_without_storing_predictions_and_dataset_processors(self):
+        self.postprocessor.has_dataset_processors = False
+
+        self.evaluator.process_dataset(None, None)
+
+        assert not self.evaluator.store_predictions.called
+        assert not self.evaluator.load.called
+        assert self.launcher.predict.called
+        assert self.postprocessor.process_batch.called
+        assert self.metric.update_metrics_on_batch.call_count == len(self.annotations)
+        assert self.postprocessor.process_dataset.called
+        assert not self.postprocessor.full_process.called
+
+    def test_process_dataset_without_storing_predictions_and_with_dataset_processors(self):
+        self.postprocessor.has_dataset_processors = True
+
+        self.evaluator.process_dataset(None, None)
+
+        assert not self.evaluator.store_predictions.called
+        assert not self.evaluator.load.called
+        assert self.launcher.predict.called
+        assert self.postprocessor.process_batch.called
+        assert self.metric.update_metrics_on_batch.call_count == 1
+        assert self.postprocessor.process_dataset.called
+        assert not self.postprocessor.full_process.called
+
+    def test_process_dataset_with_storing_predictions_and_without_dataset_processors(self):
+        self.postprocessor.has_dataset_processors = False
+
+        self.evaluator.process_dataset('path', None)
+
+        assert self.evaluator.store_predictions.called
+        assert not self.evaluator.load.called
+        assert self.launcher.predict.called
+        assert self.postprocessor.process_batch.called
+        assert self.metric.update_metrics_on_batch.call_count == len(self.annotations)
+        assert self.postprocessor.process_dataset.called
+        assert not self.postprocessor.full_process.called
+
+    def test_process_dataset_with_storing_predictions_and_with_dataset_processors(self):
+        self.postprocessor.has_dataset_processors = True
+
+        self.evaluator.process_dataset('path', None)
+
+        assert self.evaluator.store_predictions.called
+        assert not self.evaluator.load.called
+        assert self.launcher.predict.called
+        assert self.postprocessor.process_batch.called
+        assert self.metric.update_metrics_on_batch.call_count == 1
+        assert self.postprocessor.process_dataset.called
+        assert not self.postprocessor.full_process.called
+
+    def test_process_dataset_with_loading_predictions_and_without_dataset_processors(self, mocker):
+        mocker.patch('accuracy_checker.model_evaluator.get_path')
+        self.postprocessor.has_dataset_processors = False
+
+        self.evaluator.process_dataset('path', None)
+
+        assert not self.evaluator.store_predictions.called
+        assert self.evaluator.load.called
+        assert not self.launcher.predict.called
+        assert not self.postprocessor.process_batch.called
+        assert self.metric.update_metrics_on_batch.call_count == 1
+        assert not self.postprocessor.process_dataset.called
+        assert self.postprocessor.full_process.called
+
+    def test_process_dataset_with_loading_predictions_and_with_dataset_processors(self, mocker):
+        mocker.patch('accuracy_checker.model_evaluator.get_path')
+        self.postprocessor.has_dataset_processors = True
+
+        self.evaluator.process_dataset('path', None)
+
+        assert not self.evaluator.store_predictions.called
+        assert self.evaluator.load.called
+        assert not self.launcher.predict.called
+        assert not self.postprocessor.process_batch.called
+        assert self.metric.update_metrics_on_batch.call_count == 1
+        assert not self.postprocessor.process_dataset.called
+        assert self.postprocessor.full_process.called
diff --git a/tools/accuracy_checker/tests/test_postprocessor.py b/tools/accuracy_checker/tests/test_postprocessor.py
new file mode 100644
index 000000000..81c14c3a7
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_postprocessor.py
@@ -0,0 +1,1070 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+import pytest
+
+from accuracy_checker.config import ConfigError
+from accuracy_checker.postprocessor import PostprocessingExecutor
+
+from accuracy_checker.representation import (
+    DetectionAnnotation,
+    DetectionPrediction,
+    ContainerAnnotation,
+    ContainerPrediction,
+    ClassificationAnnotation
+)
+
+from .common import make_representation, make_segmentation_representation
+
+
+def postprocess_data(executor, annotations, predictions):
+    return executor.full_process(annotations, predictions)
+
+
+class TestPostprocessor:
+    def test_without_apply_to_and_sources_filter_raise_config_error_exception(self):
+        config = [{'type': 'filter', 'labels': [1]}]
+
+        with pytest.raises(ConfigError):
+            PostprocessingExecutor(config)
+
+    def test_both_provided_apply_to_and_sources_filter_raise_config_error_exception(self):
+        config = [{
+            'type': 'filter',
+            'apply_to': 'prediction',
+            'annotation_source': 'annotation',
+            'labels': [1]
+        }]
+
+        with pytest.raises(ConfigError):
+            PostprocessingExecutor(config)
+
+    def test_filter_annotations_unsupported_source_type_in_container_raise_type_error_exception(self):
+        config = [{'type': 'filter', 'annotation_source': 'annotation', 'labels': [1]}]
+        annotation = ContainerAnnotation({'annotation': ClassificationAnnotation()})
+        executor = PostprocessingExecutor(config)
+
+        with pytest.raises(TypeError):
+            postprocess_data(executor, [annotation], [None])
+
+    def test_filter_annotations_source_not_found_raise_config_error_exception(self):
+        config = [{'type': 'filter', 'annotation_source': 'ann', 'labels': [1]}]
+        annotation = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        })
+        executor = PostprocessingExecutor(config)
+
+        with pytest.raises(ConfigError):
+            postprocess_data(executor, [annotation], [None])
+
+    def test_filter_predictions_unsupported_source_type_raise_type_error_exception(self):
+        config = [{
+            'type': 'filter',
+            'prediction_source': 'detection_out',
+            'labels': [1],
+            'remove_filtered': False
+        }]
+        prediction = ContainerPrediction({'detection_out': ClassificationAnnotation()})
+        executor = PostprocessingExecutor(config)
+
+        with pytest.raises(TypeError):
+            postprocess_data(executor, [None], [prediction])
+
+    def test_filter_predictions_source_not_found_raise_config_error_exception(self):
+        config = [{
+            'type': 'filter', 'prediction_source': 'undefined', 'labels': [1]
+        }]
+        prediction = ContainerPrediction({'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]})
+        executor = PostprocessingExecutor(config)
+
+        with pytest.raises(ConfigError):
+            postprocess_data(executor, [None], [prediction])
+
+    def test_filter_container_annotations_by_labels_with_ignore_using_source(self):
+        config = [{
+            'type': 'filter', 'annotation_source': 'annotation', 'labels': [1], 'remove_filtered': False
+        }]
+        annotation = ContainerAnnotation({
+            'annotation':  make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        })
+        expected = ContainerAnnotation({
+            'annotation': make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
+            )[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_container_annotations_by_labels_with_ignore_using_apply_to(self):
+        config = [{
+            'type': 'filter',
+            'apply_to': 'annotation',
+            'labels': [1],
+            'remove_filtered': False
+        }]
+        annotation = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        })
+        expected = ContainerAnnotation({
+            'annotation': make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
+            )[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_regular_annotations_by_labels_with_ignore(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'labels': [1], 'remove_filtered': False}]
+        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        expected = make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
+            )[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_multi_source_annotations_by_labels_with_ignore(self):
+        config = [{
+            'type': 'filter',
+            'annotation_source': ['annotation1', 'annotation2'],
+            'labels': [1],
+            'remove_filtered': False
+        }]
+        annotation = ContainerAnnotation({
+            'annotation1': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0],
+            'annotation2': make_representation('1 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        })
+        expected = ContainerAnnotation({
+            'annotation1': make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
+            )[0],
+            'annotation2': make_representation(
+                '1 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [0, 1]}]
+            )[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_multi_source_annotations_by_labels_with_ignore_using_apply_to(self):
+        config = [{
+            'type': 'filter',
+            'apply_to': 'annotation',
+            'labels': [1],
+            'remove_filtered': False
+        }]
+        annotation = ContainerAnnotation({
+            'annotation1': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0],
+            'annotation2': make_representation('1 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        })
+        expected = ContainerAnnotation({
+            'annotation1': make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
+            )[0],
+            'annotation2': make_representation(
+                '1 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [0, 1]}]
+            )[0]
+        })
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_regular_annotations_by_labels_with_remove_using_annotation_source_warm_user_warning(self):
+        config = [{
+            'type': 'filter',
+            'annotation_source': 'annotation',
+            'labels': [1],
+            'remove_filtered': True
+        }]
+        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        expected = make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+
+        with pytest.warns(UserWarning):
+            postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_regular_annotations_by_labels_with_remove_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'labels': [1], 'remove_filtered': True}]
+        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        expected = make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_annotations_by_labels_with_remove_on_container(self):
+        config = [{
+            'type': 'filter',
+            'annotation_source': 'annotation',
+            'labels': [1],
+            'remove_filtered': True
+        }]
+        annotation = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        })
+        expected = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_annotations_by_labels_with_remove_on_container_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'labels': [1], 'remove_filtered': True}]
+        annotation = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        })
+        expected = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_multi_source_annotations_by_labels_with_remove(self):
+        config = [{
+            'type': 'filter',
+            'annotation_source': ['annotation1', 'annotation2'],
+            'labels': [1], 'remove_filtered': True
+        }]
+        annotation = ContainerAnnotation({
+            'annotation1': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0],
+            'annotation2': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+        })
+        expected = ContainerAnnotation({
+            'annotation1': make_representation('0 0 0 10 10', is_ground_truth=True)[0],
+            'annotation2': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_multi_source_by_labels_with_remove_on_container_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'labels': [1], 'remove_filtered': True}]
+        annotation = ContainerAnnotation({
+            'annotation1': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0],
+            'annotation2': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+        })
+        expected = ContainerAnnotation({
+            'annotation1': make_representation('0 0 0 10 10', is_ground_truth=True)[0],
+            'annotation2': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_predictions_by_labels_with_ignore(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'labels': ['to_be_filtered'], 'remove_filtered': False}]
+        prediction = DetectionPrediction(labels=['some_label', 'to_be_filtered'])
+        expected = DetectionPrediction(labels=['some_label', 'to_be_filtered'], metadata={'difficult_boxes': [1]})
+
+        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
+
+        assert prediction == expected
+
+    def test_filter_predictions_by_labels_with_ignore_on_container(self):
+        config = [{
+            'type': 'filter',
+            'prediction_source': 'detection_out',
+            'labels': [1],
+            'remove_filtered': False
+        }]
+        prediction = ContainerPrediction({
+            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        })
+        expected = ContainerPrediction({'detection_out': make_representation(
+            '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
+        )[0]})
+
+        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
+
+        assert prediction == expected
+
+    def test_filter_predictions_by_labels_with_ignore_on_container_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'labels': [1], 'remove_filtered': False}]
+        prediction = ContainerPrediction({
+            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        })
+        expected = ContainerPrediction({'detection_out': make_representation(
+            '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
+        )[0]})
+
+        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
+
+        assert prediction == expected
+
+    def test_filter_multi_source_predictions_by_labels_with_ignore(self):
+        config = [{
+            'type': 'filter', 'prediction_source': ['detection_out1', 'detection_out2'], 'labels': [1],
+            'remove_filtered': False
+        }]
+        prediction = ContainerPrediction({
+            'detection_out1': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0],
+            'detection_out2': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        })
+        expected = ContainerPrediction({
+            'detection_out1': make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
+            )[0],
+            'detection_out2': make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
+            )[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
+
+        assert prediction == expected
+
+    def test_filter_multi_source_predictions_by_labels_with_ignore_using_apply_to(self):
+        config = [{
+            'type': 'filter', 'apply_to': 'prediction', 'labels': [1], 'remove_filtered': False
+        }]
+        prediction = ContainerPrediction({
+            'detection_out1': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0],
+            'detection_out2': make_representation('1 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        })
+        expected = ContainerPrediction({
+            'detection_out1': make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
+            )[0],
+            'detection_out2': make_representation(
+                '1 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [0, 1]}]
+            )[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
+
+        assert prediction == expected
+
+    def test_filter_predictions_by_labels_with_remove(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'labels': [1], 'remove_filtered': True}]
+        prediction = make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)
+        expected = make_representation('0 0 0 10 10', score=1)
+
+        postprocess_data(PostprocessingExecutor(config), [None], prediction)
+
+        assert prediction == expected
+
+    def test_filter_predictions_by_labels_with_remove_on_container(self):
+        config = [{
+            'type': 'filter', 'prediction_source': 'detection_out', 'labels': [0], 'remove_filtered': True
+        }]
+        prediction = ContainerPrediction({
+            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        })
+        expected = ContainerPrediction({'detection_out':  make_representation('1 0 0 11 11', score=1)[0]})
+
+        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
+
+        assert prediction == expected
+
+    def test_filter_predictions_by_labels_with_remove_on_container_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'labels': [0], 'remove_filtered': True}]
+        prediction = ContainerPrediction({
+            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        })
+        expected = ContainerPrediction({'detection_out': make_representation('1 0 0 11 11', score=1)[0]})
+
+        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
+
+        assert prediction == expected
+
+    def test_filter_multi_source_predictions_by_labels_with_remove(self):
+        config = [{
+            'type': 'filter',
+            'prediction_source': ['detection_out1', 'detection_out2'],
+            'labels': [1],
+            'remove_filtered': True
+        }]
+        prediction = ContainerPrediction({
+            'detection_out1': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0],
+            'detection_out2': make_representation('0 0 0 10 10', score=1)[0]
+        })
+        expected = ContainerPrediction({
+            'detection_out1': make_representation('0 0 0 10 10', score=1)[0],
+            'detection_out2': make_representation('0 0 0 10 10', score=1)[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
+
+        assert prediction == expected
+
+    def test_filter_multi_source_predictions_by_labels_with_remove_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'labels': [1], 'remove_filtered': True}]
+        prediction = ContainerPrediction({
+            'detection_out1': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0],
+            'detection_out2': make_representation('0 0 0 10 10', score=1)[0]
+        })
+        expected = ContainerPrediction({
+            'detection_out1': make_representation('0 0 0 10 10', score=1)[0],
+            'detection_out2': make_representation('0 0 0 10 10', score=1)[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [None], [prediction])
+
+        assert prediction == expected
+
+    def test_filter_regular_annotations_and_regular_predictions_by_labels_with_ignore_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': False}]
+        prediction = make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        expected_prediction = make_representation(
+            '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
+        )[0]
+        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        expected_annotation = make_representation(
+            '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
+        )[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_filter_regular_annotations_and_regular_predictions_by_labels_with_remove_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': True}]
+        prediction = make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)
+        expected_prediction = make_representation('0 0 0 10 10', score=1)
+        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)
+        expected_annotation = make_representation('0 0 0 10 10', is_ground_truth=True)
+
+        postprocess_data(PostprocessingExecutor(config), annotation, prediction)
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_filter_container_annotations_and_regular_predictions_by_labels_with_ignore_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': False}]
+        prediction = make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        expected_prediction = make_representation(
+            '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
+        )[0]
+        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        expected_annotation = make_representation(
+            '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
+        )[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_filter_container_annotations_and_regular_predictions_by_labels_with_remove_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': True}]
+        prediction = make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        expected_prediction = make_representation('0 0 0 10 10', score=1)[0]
+        annotation = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        })
+        expected_annotation = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_filter_regular_annotations_and_container_predictions_by_labels_with_ignore_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': False}]
+        prediction = ContainerPrediction({
+            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        })
+        expected_prediction = ContainerPrediction({
+            'detection_out': make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
+            )[0]
+        })
+        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        expected_annotation = make_representation(
+            '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
+        )[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_filter_regular_annotations_and_container_predictions_by_labels_with_remove_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': True}]
+        prediction = ContainerPrediction({
+            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        })
+        expected_prediction = ContainerPrediction({'detection_out': make_representation('0 0 0 10 10', score=1)[0]})
+        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        expected_annotation = make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_filter_container_annotations_and_container_predictions_by_labels_with_ignore_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': False}]
+        prediction = ContainerPrediction({
+            'detection_out': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        })
+        expected_prediction = ContainerPrediction({
+            'detection_out': make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}]
+            )[0]
+        })
+        annotation = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        expected_annotation = make_representation(
+            '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
+        )[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_filter_container_annotations_and_container_predictions_by_labels_with_remove_using_apply_to(self):
+        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': True}]
+        prediction = ContainerPrediction({
+            'prediction': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]
+        })
+        expected_prediction = ContainerPrediction({'prediction': make_representation('0 0 0 10 10', score=1)[0]})
+        annotation = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        })
+        expected_annotation = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10', is_ground_truth=True)[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_filter_container_annotations_and_container_predictions_by_labels_with_ignore_using_sources(self):
+        config = [{'type': 'filter', 'apply_to': 'all', 'labels': [1], 'remove_filtered': False}]
+        prediction = ContainerPrediction({'prediction': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]})
+        expected_prediction = ContainerPrediction({
+            'prediction': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1, meta=[{'difficult_boxes': [1]}])[0]
+        })
+        annotation = ContainerAnnotation({
+            'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]
+        })
+        expected_annotation = ContainerAnnotation({
+            'annotation': make_representation(
+                '0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True, meta=[{'difficult_boxes': [1]}]
+            )[0]
+        })
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_filter_container_annotations_and_container_predictions_by_labels_with_remove_using_sources(self):
+        config = [{'type': 'filter', 'annotation_source': 'annotation', 'prediction_source': 'prediction',
+                   'labels': [1], 'remove_filtered': True}]
+        prediction = ContainerPrediction({'prediction': make_representation('0 0 0 10 10; 1 0 0 11 11', score=1)[0]})
+        expected_prediction = ContainerPrediction({'prediction': make_representation('0 0 0 10 10', score=1)[0]})
+        annotation = ContainerAnnotation(
+            {'annotation': make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)[0]})
+        expected_annotation = ContainerAnnotation(
+            {'annotation': make_representation('0 0 0 10 10', is_ground_truth=True)[0]})
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_filter_annotations_by_min_confidence_do_nothing(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_confidence': 0.5, 'remove_filtered': True}]
+        annotations = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)
+        expected_annotations = make_representation('0 0 0 10 10; 1 0 0 11 11', is_ground_truth=True)
+
+        postprocess_data(PostprocessingExecutor(config), annotations, [None])
+
+        assert np.array_equal(annotations, expected_annotations)
+
+    def test_filter_predictions_by_min_confidence_with_ignore(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'min_confidence': 0.5, 'remove_filtered': False}]
+        predictions = [
+            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.3, 0.8])[0],
+            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.5, 0.4])[0]
+        ]
+        expected_predictions = [
+            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.3, 0.8], meta=[{'difficult_boxes': [0]}])[0],
+            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.5, 0.4], meta=[{'difficult_boxes': [1]}])[0]
+        ]
+
+        executor = PostprocessingExecutor(config)
+        postprocess_data(executor, [None, None], predictions)
+
+        assert np.array_equal(predictions, expected_predictions)
+
+    def test_filter_predictions_by_min_confidence_with_remove(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'min_confidence': 0.5, 'remove_filtered': True}]
+        predictions = [
+            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.3, 0.8])[0],
+            make_representation('0 0 0 10 10; 1 0 0 11 11', score=[0.5, 0.4])[0]
+        ]
+        expected_predictions = [
+            make_representation('1 0 0 11 11', score=0.8)[0],
+            make_representation('0 0 0 10 10', score=0.5)[0]
+        ]
+
+        postprocess_data(PostprocessingExecutor(config), [None, None], predictions)
+
+        assert np.array_equal(predictions, expected_predictions)
+
+    def test_filter_annotations_by_height_range_with_ignored(self):
+        config = [{
+            'type': 'filter',
+            'apply_to': 'annotation',
+            'height_range': '(10.0, 20.0)',
+            'remove_filtered': False
+        }]
+        annotations = [
+            make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True)[0],
+            make_representation('0 0 5 0 35; 1 0 10 0 40', is_ground_truth=True)[0]
+        ]
+        expected = [
+            make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True, meta=[{'difficult_boxes': [1]}])[0],
+            make_representation('0 0 5 0 35; 1 0 10 0 40', is_ground_truth=True, meta=[{'difficult_boxes': [0, 1]}])[0]
+        ]
+
+        postprocess_data(PostprocessingExecutor(config), annotations, [None, None])
+
+        assert np.array_equal(annotations, expected)
+
+    def test_filter_annotations_by_height_range_with_remove(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'height_range': '(10.0, 20.0)', 'remove_filtered': True}]
+        annotations = [
+            make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True)[0],
+            make_representation('0 0 5 0 35; 1 0 10 0 40', is_ground_truth=True)[0]
+        ]
+        expected = [
+            make_representation('0 0 5 0 15', is_ground_truth=True)[0],
+            make_representation('', is_ground_truth=True)[0]
+        ]
+
+        postprocess_data(PostprocessingExecutor(config), annotations, [None, None])
+
+        assert np.array_equal(annotations, expected)
+
+    def test_filter_predictions_by_height_range_with_ignored(self):
+        config = [{
+            'type': 'filter',
+            'apply_to': 'prediction',
+            'height_range': '(10.0, 20.0)',
+            'remove_filtered': False
+        }]
+        predictions = [
+            make_representation('0 0 5 0 15; 1 0 10 0 15', score=1)[0],
+            make_representation('0 0 5 0 35; 1 0 10 0 40', score=1)[0]
+        ]
+        expected = [
+            make_representation('0 0 5 0 15; 1 0 10 0 15', score=1, meta=[{'difficult_boxes': [1]}])[0],
+            make_representation('0 0 5 0 35; 1 0 10 0 40', score=1, meta=[{'difficult_boxes': [0, 1]}])[0]
+        ]
+
+        postprocess_data(PostprocessingExecutor(config), [None, None], predictions)
+
+        assert np.array_equal(predictions, expected)
+
+    def test_filter_predictions_by_height_range_with_remove(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'height_range': '(10.0, 20.0)', 'remove_filtered': True}]
+        predictions = [
+            make_representation('0 0 5 0 15; 1 0 10 0 15', score=1)[0],
+            make_representation('0 0 5 0 35; 1 0 10 0 40', score=1)[0]
+        ]
+        expected = [
+            make_representation('0 0 5 0 15', score=1)[0],
+            make_representation('', score=1)[0]
+        ]
+
+        postprocess_data(PostprocessingExecutor(config), [None, None], predictions)
+
+        assert np.array_equal(predictions, expected)
+
+    def test_filter_predictions_by_unknown_min_visibility_raises_value_error_exception(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'min_visibility': 'unknown'}]
+        predictions = [
+           make_representation('0 0 5 0 15; 1 0 10 0 15', score=1)[0],
+           make_representation('0 0 5 0 35; 1 0 10 0 40', score=1)[0]
+        ]
+
+        with pytest.raises(ValueError):
+            postprocess_data(PostprocessingExecutor(config), [None], predictions)
+
+    def test_filter_annotations_by_unknown_min_visibility_raises_value_error_exception(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'unknown'}]
+        annotations = [DetectionPrediction(y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0])]
+
+        with pytest.raises(ValueError):
+            postprocess_data(PostprocessingExecutor(config), annotations, [None])
+
+    def test_filter_predictions_by_visibility_raises_value_error_with_unknown_visibility(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'min_visibility': 'heavy occluded'}]
+        predictions = [DetectionPrediction(
+            y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0], metadata={'visibilities': ['unknown']}
+        )]
+
+        with pytest.raises(ValueError):
+            postprocess_data(PostprocessingExecutor(config), [None], predictions)
+
+    def test_filter_annotations_by_visibility_raises_value_error_with_unknown_visibility(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'heavy occluded'}]
+        annotations = [DetectionAnnotation(
+            y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0], metadata={'visibilities': ['unknown']}
+        )]
+
+        with pytest.raises(ValueError):
+            postprocess_data(PostprocessingExecutor(config), annotations, [None])
+
+    def test_filter_by_visibility_does_nothing_with_annotations_without_visibility(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'heavy occluded'}]
+        annotations = [
+            make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True)[0],
+            make_representation('0 0 5 0 35; 1 0 10 0 40', is_ground_truth=True)[0]
+        ]
+        expected = [
+            make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True, meta=[{'difficult_boxes': []}])[0],
+            make_representation('0 0 5 0 35; 1 0 10 0 40', is_ground_truth=True, meta=[{'difficult_boxes': []}])[0]
+        ]
+
+        postprocess_data(PostprocessingExecutor(config), annotations, [None, None])
+
+        assert np.array_equal(annotations, expected)
+
+    def test_filter_by_visibility_does_nothing_with_predictions_without_visibility(self):
+        config = [{'type': 'filter', 'apply_to': 'prediction', 'min_visibility': 'heavy occluded'}]
+        predictions = [
+            DetectionPrediction(y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0]),
+            DetectionPrediction(y_mins=[5.0, 10.0], y_maxs=[35.0, 50.0])
+        ]
+        expected = [
+            DetectionPrediction(y_mins=[5.0, 10.0], y_maxs=[15.0, 40.0], metadata={'difficult_boxes': []}),
+            DetectionPrediction(y_mins=[5.0, 10.0], y_maxs=[35.0, 50.0], metadata={'difficult_boxes': []})
+        ]
+
+        postprocess_data(PostprocessingExecutor(config), [None, None], predictions)
+
+        assert np.array_equal(predictions, expected)
+
+    def test_filter_by_visibility_does_nothing_with_default_visibility_level_and_heavy_occluded(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'heavy occluded'}]
+        annotation = make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True)[0]
+        expected = make_representation(
+            '0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True, meta=[{'difficult_boxes': []}]
+        )[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_by_visibility_does_nothing_with_default_visibility_level_and_partially_occluded(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'partially occluded'}]
+        annotation = make_representation('0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True)[0]
+        expected = make_representation(
+            '0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True, meta=[{'difficult_boxes': []}]
+        )[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_filter_by_visibility_filters_partially_occluded_remove_filtered(self):
+        config = [{'type': 'filter', 'apply_to': 'annotation', 'min_visibility': 'partially occluded',
+                   'remove_filtered': True}]
+        annotation = make_representation(
+            '0 0 5 0 15; 1 0 10 0 15', is_ground_truth=True,
+            meta=[{'visibilities': ['heavy occluded', 'partially occluded']}]
+        )[0]
+        expected = make_representation(
+            '1 0 10 0 15', is_ground_truth=True, meta=[{'visibilities': ['heavy occluded', 'partially occluded']}]
+        )[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_nms(self, mocker):
+        mock = mocker.patch('accuracy_checker.postprocessor.nms.NMS.process_all', return_value=([], []))
+        config = [{'type': 'nms', 'overlap': 0.4}]
+        postprocess_data(PostprocessingExecutor(config), [], [])
+        mock.assert_called_once_with([], [])
+
+    def test_resize_prediction_boxes(self):
+        config = [{'type': 'resize_prediction_boxes'}]
+        annotation = DetectionAnnotation(metadata={'image_size': [(100, 100, 3)]})
+        prediction = make_representation('0 0 0 5 5; 1 7 7 8 8', score=1)[0]
+        expected = make_representation('0 0 0 500 500; 1 700 700 800 800', score=1)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected
+
+    def test_clip_annotation_denormalized_boxes(self):
+        config = [{'type': 'clip_boxes', 'apply_to': 'annotation', 'boxes_normalized': False}]
+        meta = {'image_size': [(10, 10, 3)]}
+        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True, meta=[meta])[0]
+        expected = make_representation('0 0 0 5 5; 1 9 10 10 10', is_ground_truth=True, meta=[meta])[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_clip_annotation_normalized_boxes(self):
+        config = [{'type': 'clip_boxes', 'apply_to': 'annotation', 'boxes_normalized': True}]
+        meta = {'image_size': [(10, 10, 3)]}
+        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True, meta=[meta])[0]
+        expected = make_representation('0 0 0 1 1; 1 1 1 1 1', is_ground_truth=True, meta=[meta])[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_clip_annotation_denormalized_boxes_with_size(self):
+        config = [{'type': 'clip_boxes', 'apply_to': 'annotation', 'boxes_normalized': False, 'size': 10}]
+        meta = {'image_size': [(10, 10, 3)]}
+        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True, meta=[meta])[0]
+        expected = make_representation('0 0 0 5 5; 1 9 10 10 10', is_ground_truth=True, meta=[meta])[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_clip_annotation_normalized_boxes_with_size_as_normalized(self):
+        config = [{'type': 'clip_boxes', 'apply_to': 'annotation', 'boxes_normalized': True, 'size': 10}]
+        meta = {'image_size': [(10, 10, 3)]}
+        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True, meta=[meta])[0]
+        expected = make_representation('0 0 0 1 1; 1 1 1 1 1', is_ground_truth=True, meta=[meta])[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [None])
+
+        assert annotation == expected
+
+    def test_clip_prediction_denormalized_boxes(self):
+        config = [{'type': 'clip_boxes', 'apply_to': 'prediction', 'boxes_normalized': False}]
+        annotation = DetectionAnnotation(metadata={'image_size': [(10, 10, 3)]})
+        prediction = make_representation('0 -1 0 5 5; 1 9 11 10 10', score=1)[0]
+        expected = make_representation('0 0 0 5 5; 1 9 10 10 10', score=1)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected
+
+    def test_clip_prediction_normalized_boxes(self):
+        config = [{'type': 'clip_boxes', 'apply_to': 'prediction', 'boxes_normalized': True}]
+        annotation = DetectionAnnotation(metadata={'image_size': [(10, 10, 3)]})
+        prediction = make_representation('0 -1 0 5 5; 1 9 11 10 10', score=1)[0]
+        expected = make_representation('0 0 0 1 1; 1 1 1 1 1', score=1)[0]
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected
+
+    def test_clip_predictions_denormalized_boxes_with_size(self):
+        config = [{'type': 'clip_boxes', 'apply_to': 'prediction', 'boxes_normalized': False, 'size': 10}]
+        annotation = DetectionAnnotation(metadata={'image_size': [(10, 10, 3)]})
+        prediction = make_representation('0 -1 0 5 5; 1 9 11 10 10', score=1)[0]
+        expected = make_representation('0 0 0 5 5; 1 9 10 10 10', score=1)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected
+
+    def test_clip_predictions_normalized_boxes_with_size_as_normalized(self):
+        config = [{'type': 'clip_boxes', 'apply_to': 'prediction', 'boxes_normalized': True, 'size': 10}]
+        annotation = DetectionAnnotation(metadata={'image_size': [(10, 10, 3)]})
+        prediction = make_representation('0 -1 0 5 5; 1 9 11 10 10', score=1)[0]
+        expected = make_representation('0 0 0 1 1; 1 1 1 1 1', score=1)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected
+
+    def test_cast_to_int_default(self):
+        config = [{'type': 'cast_to_int'}]
+        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
+        prediction = make_representation('0 -1.1 0.5 5.9 5.1; 1 -9.9 11.5 10.9 10.1', score=1)[0]
+        expected_annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
+        expected_prediction = make_representation('0 -1 0 6 5; 1 -10 12 11 10', score=1)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_cast_to_int_to_nearest(self):
+        config = [{'type': 'cast_to_int', 'round_policy': 'nearest'}]
+        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
+        prediction = make_representation('0 -1.1 0.5 5.9 5.1; 1 -9.9 11.5 10.9 10.1', score=1)[0]
+        expected_annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
+        expected_prediction = make_representation('0 -1 0 6 5; 1 -10 12 11 10', score=1)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_cast_to_int_to_nearest_to_zero(self):
+        config = [{'type': 'cast_to_int', 'round_policy': 'nearest_to_zero'}]
+        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
+        prediction = make_representation('0 -1.1 0.5 5.9 5.1; 1 -9.9 11.5 10.9 10.1', score=1)[0]
+        expected_annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
+        expected_prediction = make_representation('0 -1 0 5 5; 1 -9 11 10 10', score=1)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_cast_to_int_to_lower(self):
+        config = [{'type': 'cast_to_int', 'round_policy': 'lower'}]
+        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
+        prediction = make_representation('0 -1.1 0.5 5.9 5.1; 1 -9.9 11.5 10.9 10.1', score=1)[0]
+        expected_annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
+        expected_prediction = make_representation('0 -2 0 5 5; 1 -10 11 10 10', score=1)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_cast_to_int_to_greater(self):
+        config = [{'type': 'cast_to_int', 'round_policy': 'greater'}]
+        annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
+        prediction = make_representation('0 -1.1 0.5 5.9 5.1; 1 -9.9 11.5 10.9 10.1', score=1)[0]
+        expected_annotation = make_representation('0 -1 0 5 5; 1 9 11 10 10', is_ground_truth=True)[0]
+        expected_prediction = make_representation('0 -1 1 6 6; 1 -9 12 11 11', score=1)[0]
+
+        postprocess_data(PostprocessingExecutor(config), [annotation], [prediction])
+
+        assert prediction == expected_prediction and annotation == expected_annotation
+
+    def test_cast_to_int_to_unknown_raise_config_error(self):
+        config = [{'type': 'cast_to_int', 'round_policy': 'unknown'}]
+
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_extend_segmentation_mask_with_float_filling_raise_config_error(self):
+        config = [{'type': 'extend_segmentation_mask', 'filling_label':  0.5}]
+
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_extend_segmentation_mask_default(self):
+        config = [{'type': 'extend_segmentation_mask'}]
+        annotation = make_segmentation_representation(np.zeros((5, 5)), ground_truth=True)
+        prediction = make_segmentation_representation(np.zeros((7, 7)), ground_truth=False)
+        expected_annotation_mask = np.zeros((7, 7))
+        expected_annotation_mask[0, :] = 255
+        expected_annotation_mask[:, 0] = 255
+        expected_annotation_mask[-1, :] = 255
+        expected_annotation_mask[:, -1] = 255
+        expected_prediction_mask = np.zeros((7, 7))
+        postprocess_data(PostprocessingExecutor(config), annotation, prediction)
+        assert np.array_equal(prediction[0].mask, expected_prediction_mask)
+        assert np.array_equal(annotation[0].mask, expected_annotation_mask)
+
+    def test_extend_segmentation_mask_do_nothing(self):
+        config = [{'type': 'extend_segmentation_mask'}]
+        annotation = make_segmentation_representation(np.zeros((5, 5)), ground_truth=True)
+        prediction = make_segmentation_representation(np.zeros((5, 5)), ground_truth=False)
+        expected_mask = np.zeros((5, 5))
+        postprocess_data(PostprocessingExecutor(config), annotation, prediction)
+        assert np.array_equal(prediction[0].mask, expected_mask)
+        assert np.array_equal(annotation[0].mask, expected_mask)
+
+    def test_extend_segmentation_mask_asymmetrical(self):
+        config = [{'type': 'extend_segmentation_mask'}]
+        annotation = make_segmentation_representation(np.zeros((5, 5)), ground_truth=True)
+        prediction = make_segmentation_representation(np.zeros((6, 7)), ground_truth=False)
+        expected_annotation_mask = np.zeros((6, 7))
+        expected_annotation_mask[:, 0] = 255
+        expected_annotation_mask[-1, :] = 255
+        expected_annotation_mask[:, -1] = 255
+        expected_prediction_mask = np.zeros((6, 7))
+        postprocess_data(PostprocessingExecutor(config), annotation, prediction)
+        assert np.array_equal(prediction[0].mask, expected_prediction_mask)
+        assert np.array_equal(annotation[0].mask, expected_annotation_mask)
+
+    def test_extend_segmentation_mask_raise_config_error_if_prediction_less_annotation(self):
+        config = [{'type': 'extend_segmentation_mask'}]
+        annotation = make_segmentation_representation(np.zeros((5, 5)), ground_truth=True)
+        prediction = make_segmentation_representation(np.zeros((4, 4)), ground_truth=False)
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), annotation, prediction)
+
+    def test_extend_segmentation_mask_with_filling_label(self):
+        config = [{'type': 'extend_segmentation_mask', 'filling_label': 1}]
+        annotation = make_segmentation_representation(np.zeros((5, 5)), ground_truth=True)
+        prediction = make_segmentation_representation(np.zeros((7, 7)), ground_truth=False)
+        expected_annotation_mask = np.zeros((7, 7))
+        expected_annotation_mask[0, :] = 1
+        expected_annotation_mask[:, 0] = 1
+        expected_annotation_mask[-1, :] = 1
+        expected_annotation_mask[:, -1] = 1
+        expected_prediction_mask = np.zeros((7, 7))
+        postprocess_data(PostprocessingExecutor(config), annotation, prediction)
+        assert np.array_equal(prediction[0].mask, expected_prediction_mask)
+        assert np.array_equal(annotation[0].mask, expected_annotation_mask)
+
+
+class TestPostprocessorExtraArgs:
+    def test_cast_to_int_raise_config_error_on_extra_args(self):
+        config = {'type': 'cast_to_int', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_clip_boxes_raise_config_error_on_extra_args(self):
+        config = {'type': 'clip_boxes', 'size': 1, 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_correct_yolo_v2_boxes_raise_config_error_on_extra_args(self):
+        config = {'type': 'correct_yolo_v2_boxes', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_encode_segmentation_mask_raise_config_error_on_extra_args(self):
+        config = {'type': 'encode_segmentation_mask', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_filter_raise_config_error_on_extra_args(self):
+        config = {'type': 'filter', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_nms_raise_config_error_on_extra_args(self):
+        config = {'type': 'nms', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_normalize_landmarks_points_raise_config_error_on_extra_args(self):
+        config = {'type': 'normalize_landmarks_points', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_resize_prediction_boxes_raise_config_error_on_extra_args(self):
+        config = {'type': 'resize_prediction_boxes', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_resize_segmentation_mask_raise_config_error_on_extra_args(self):
+        config = {'type': 'resize_segmentation_mask', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
+
+    def test_extend_segmentation_mask_raise_config_error_on_extra_args(self):
+        config = {'type': 'resize_segmentation_mask', 'something_extra': 'extra'}
+        with pytest.raises(ConfigError):
+            postprocess_data(PostprocessingExecutor(config), [None], [None])
diff --git a/tools/accuracy_checker/tests/test_preprocessor.py b/tools/accuracy_checker/tests/test_preprocessor.py
new file mode 100644
index 000000000..339fb8cdc
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_preprocessor.py
@@ -0,0 +1,610 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import cv2
+import numpy as np
+import pytest
+
+from accuracy_checker.config import ConfigError
+from accuracy_checker.preprocessor import (
+    Crop,
+    Normalize,
+    Preprocessor,
+    Resize,
+    Flip,
+    BgrToRgb,
+    CropRect,
+    ExtendAroundRect,
+    PointAligner
+)
+from accuracy_checker.preprocessor.preprocessing_executor import PreprocessingExecutor
+from accuracy_checker.dataset import DataRepresentation
+
+
+class TestResize:
+    def test_default_resize(self, mocker):
+        cv2_resize_mock = mocker.patch('accuracy_checker.preprocessor.preprocessors.cv2.resize')
+        resize = Preprocessor.provide('resize', {'type': 'resize', 'size': 200})
+
+        input_mock = mocker.Mock()
+        resize(DataRepresentation(input_mock))
+
+        assert not resize.use_pil
+        assert resize.dst_width == 200
+        assert resize.dst_height == 200
+        cv2_resize_mock.assert_called_once_with(
+            input_mock, (200, 200), interpolation=Resize.OPENCV_INTERPOLATION['LINEAR']
+        )
+
+    def test_custom_resize(self, mocker):
+        cv2_resize_mock = mocker.patch('accuracy_checker.preprocessor.preprocessors.cv2.resize')
+
+        resize = Preprocessor.provide(
+            'resize', {'type': 'resize', 'dst_width': 126, 'dst_height': 128, 'interpolation': 'CUBIC'}
+        )
+
+        input_mock = mocker.Mock()
+        resize(DataRepresentation(input_mock))
+
+        assert not resize.use_pil
+        assert resize.dst_width == 126
+        assert resize.dst_height == 128
+        cv2_resize_mock.assert_called_once_with(
+            input_mock, (126, 128),
+            interpolation=Resize.OPENCV_INTERPOLATION['CUBIC']
+        )
+
+    def test_resize_without_save_aspect_ratio(self):
+        name = 'mock_preprocessor'
+        config = {'type': 'resize', 'dst_width': 150, 'dst_height': 150}
+        input_image = np.ones((100, 50, 3))
+        resize = Preprocessor.provide('resize', config, name)
+
+        result = resize(DataRepresentation(input_image)).data
+
+        assert result.shape == (150, 150, 3)
+
+    def test_resize_save_aspect_ratio_unknown_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide(
+                'resize', {'type': 'resize', 'dst_width': 100, 'dst_height': 150, 'aspect_ratio_scale': 'unknown'}
+            )
+
+    def test_resize_save_aspect_ratio_height(self):
+        input_image = np.ones((100, 50, 3))
+        resize = Preprocessor.provide('resize', {
+            'type': 'resize', 'dst_width': 100, 'dst_height': 150,
+            'interpolation': 'CUBIC', 'aspect_ratio_scale': 'height'
+        })
+        result = resize(DataRepresentation(input_image)).data
+
+        assert result.shape == (300, 100, 3)
+
+    def test_resize_save_aspect_ratio_width(self):
+        input_image = np.ones((100, 50, 3))
+        resize = Preprocessor.provide('resize', {
+            'type': 'resize', 'dst_width': 150, 'dst_height': 150, 'aspect_ratio_scale': 'width'
+        })
+        result = resize(DataRepresentation(input_image)).data
+
+        assert result.shape == (150, 75, 3)
+
+    def test_resize_save_aspect_ratio_for_greater_dim(self):
+        input_image = np.ones((100, 50, 3))
+        resize = Preprocessor.provide('resize', {
+            'type': 'resize',
+            'dst_width': 100,
+            'dst_height': 150,
+            'aspect_ratio_scale': 'greater'
+        })
+        result = resize(DataRepresentation(input_image)).data
+
+        assert result.shape == (300, 100, 3)
+
+    def test_resize_to_negative_size_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('resize', {'type': 'resize', 'size': -100})
+
+    def test_resize_to_negative_destination_width_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('resize', {'type': 'resize', 'dst_width': -100, 'dst_height': 100})
+
+    def test_resize_to_negative_destination_height_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('resize', {'type': 'resize', 'dst_width': 100, 'dst_height': -100})
+
+    def test_resize_with_both_provided_size_and_dst_height_dst_width_warn(self):
+        input_image = np.ones((100, 50, 3))
+
+        with pytest.warns(None) as warnings:
+            resize = Preprocessor.provide(
+                'resize', {'type': 'resize', 'dst_width': 100, 'dst_height': 100, 'size': 200}
+            )
+            assert len(warnings) == 1
+            result = resize(DataRepresentation(input_image)).data
+            assert result.shape == (200, 200, 3)
+
+    def test_resize_provided_only_dst_height_raise_config_error(self):
+        with pytest.raises(ValueError):
+            Preprocessor.provide('resize', {'type': 'resize', 'dst_height': 100})
+
+    def test_resize_provided_only_dst_width_raise_config_error(self):
+        with pytest.raises(ValueError):
+            Preprocessor.provide('resize', {'type': 'resize', 'dst_width': 100})
+
+
+class TestNormalization:
+    def test_normalization_without_mean_and_std_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('normalization', {'type': 'normalization'})
+
+    def test_custom_normalization_with_mean(self):
+        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'mean': '(1, 2, 3)'})
+        source = np.full_like((3, 300, 300), 100)
+        input_ref = source.copy() - (1, 2, 3)
+        result = normalization(DataRepresentation(source))
+
+        assert normalization.mean == (1, 2, 3)
+        assert normalization.std is None
+        assert np.all(input_ref == result.data)
+        assert result.metadata == {'image_size': (3,)}
+
+    def test_custom_normalization_with_precomputed_mean(self):
+        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'mean': 'cifar10'})
+
+        source = np.full_like((3, 300, 300), 100)
+        input_ref = source.copy() - normalization.PRECOMPUTED_MEANS['cifar10']
+        result = normalization(DataRepresentation(source))
+
+        assert normalization.mean == normalization.PRECOMPUTED_MEANS['cifar10']
+        assert normalization.std is None
+        assert np.all(input_ref == result.data)
+        assert result.metadata == {'image_size': (3,)}
+
+    def test_custom_normalization_with_mean_as_scalar(self):
+        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'mean': '1'})
+
+        source = np.full_like((3, 300, 300), 100)
+        input_ref = source.copy() - 1
+        result = normalization(DataRepresentation(source))
+
+        assert normalization.mean == (1.0, )
+        assert normalization.std is None
+        assert np.all(input_ref == result.data)
+        assert result.metadata == {'image_size': (3,)}
+
+    def test_custom_normalization_with_std(self):
+        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'std': '(1, 2, 3)'})
+
+        source = np.full_like((3, 300, 300), 100)
+        input_ref = source.copy() / (1, 2, 3)
+        result = normalization(DataRepresentation(source))
+
+        assert normalization.mean is None
+        assert normalization.std == (1, 2, 3)
+        assert np.all(input_ref == result.data)
+        assert result.metadata == {'image_size': (3,)}
+
+    def test_custom_normalization_with_precomputed_std(self):
+        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'std': 'cifar10'})
+
+        source = np.full_like((3, 300, 300), 100)
+        input_ref = source.copy() / normalization.PRECOMPUTED_STDS['cifar10']
+        result = normalization(DataRepresentation(source))
+
+        assert normalization.mean is None
+        assert normalization.std == normalization.PRECOMPUTED_STDS['cifar10']
+        assert np.all(input_ref == result.data)
+        assert result.metadata == {'image_size': (3,)}
+
+    def test_custom_normalization_with_std_as_scalar(self):
+        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'std': '2'})
+        source = np.full_like((3, 300, 300), 100)
+        input_ref = source.copy() / 2
+        result = normalization(DataRepresentation(source))
+
+        assert normalization.mean is None
+        assert normalization.std == (2.0, )
+        assert np.all(input_ref == result.data)
+        assert result.metadata == {'image_size': (3,)}
+
+    def test_custom_normalization_with_mean_and_std(self):
+        normalization = Preprocessor.provide(
+            'normalization', {'type': 'normalization', 'mean': '(1, 2, 3)', 'std': '(4, 5, 6)'}
+        )
+
+        input_ = np.full_like((3, 300, 300), 100)
+        input_ref = (input_ - (1, 2, 3)) / (4, 5, 6)
+        result = normalization(DataRepresentation(input_))
+
+        assert normalization.mean == (1, 2, 3)
+        assert normalization.std == (4, 5, 6)
+        assert np.all(input_ref == result.data)
+        assert result.metadata == {'image_size': (3,)}
+
+    def test_custom_normalization_with_mean_and_std_as_scalars(self):
+        normalization = Preprocessor.provide('normalization', {'type': 'normalization', 'mean': '2', 'std': '5'})
+
+        input_ = np.full_like((3, 300, 300), 100)
+        input_ref = (input_ - (2, )) / (5, )
+        result = normalization(DataRepresentation(input_))
+
+        assert normalization.mean == (2, )
+        assert normalization.std == (5, )
+        assert np.all(input_ref == result.data)
+        assert result.metadata == {'image_size': (3,)}
+
+    def test_normalization_with_zero_in_std_values_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('normalization', {'type': 'normalization', 'std': '(4, 0, 6)'})
+
+    def test_normalization_with_zero_as_std_value_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('normalization', {'type': 'normalization', 'std': '0'})
+
+    def test_normalization_with_not_channel_wise_mean_list_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('normalization', {'type': 'normalization', 'mean': '3, 2'})
+
+    def test_normalization_with_not_channel_wise_std_list_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('normalization', {'type': 'normalization', 'std': '3, 2'})
+
+    def test_normalization_with_unknown_precomputed_mean_raise_config_error(self):
+        with pytest.raises(ValueError):
+            Preprocessor.provide('normalization', {'type': 'normalization', 'mean': 'unknown'})
+
+    def test_normalization_with_unknown_precomputed_std_raise_config_error(self):
+        with pytest.raises(ValueError):
+            Preprocessor.provide('normalization', {'type': 'normalization', 'std': 'unknown'})
+
+
+class TestPreprocessingEvaluator:
+    def test_preprocessing_evaluator(self):
+        config = [{'type': 'normalization', 'mean': '(1, 2, 3)'}, {'type': 'resize', 'size': 200}]
+        preprocessor = PreprocessingExecutor(config)
+
+        assert 2 == len(preprocessor.processors)
+        assert isinstance(preprocessor.processors[0], Normalize)
+        assert isinstance(preprocessor.processors[1], Resize)
+        assert preprocessor.processors[0].mean == (1, 2, 3)
+        assert preprocessor.processors[1].dst_width == 200
+
+
+class TestCrop:
+    def test_crop_higher(self):
+        crop = Crop({'dst_width': 50, 'dst_height': 33, 'type': 'crop'})
+        image = np.zeros((100, 100, 3))
+        image_rep = crop(DataRepresentation(image))
+
+        assert image_rep.data.shape == (33, 50, 3)
+        assert image_rep.metadata == {'image_size': (100, 100, 3)}
+
+    def test_crop_to_size(self):
+        crop = Crop({'size': 50, 'type': 'crop'})
+        image = np.zeros((100, 100, 3))
+        image_rep = crop(DataRepresentation(image))
+
+        assert image_rep.data.shape == (50, 50, 3)
+        assert image_rep.metadata == {'image_size': (100, 100, 3)}
+
+    def test_crop_higher_non_symmetric(self):
+        crop = Crop({'dst_width': 50, 'dst_height': 12, 'type': 'crop'})
+        image = np.zeros((70, 50, 3))
+        image_rep = crop(DataRepresentation(image))
+
+        assert image_rep.data.shape == (12, 50, 3)
+        assert image_rep.metadata == {'image_size': (70, 50, 3)}
+
+    def test_crop_less(self):
+        crop = Crop({'dst_width': 151, 'dst_height': 42, 'type': 'crop'})
+        image = np.zeros((30, 30, 3))
+        image_rep = crop(DataRepresentation(image))
+
+        assert image_rep.data.shape == (42, 151, 3)
+        assert image_rep.metadata == {'image_size': (30, 30, 3)}
+
+    def test_crop_less_non_symmetric(self):
+        crop = Crop({'dst_width': 42, 'dst_height': 151, 'type': 'crop'})
+        image = np.zeros((30, 40, 3))
+        image_rep = crop(DataRepresentation(image))
+
+        assert image_rep.data.shape == (151, 42, 3)
+        assert image_rep.metadata == {'image_size': (30, 40, 3)}
+
+    def test_crop_to_negative_size_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Crop({'size': -151, 'type': 'crop'})
+
+    def test_crop_to_negative_destination_width_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Crop({'dst_width': -100, 'dst_height': 100, 'type': 'crop'})
+
+    def test_crop_to_negative_destination_height_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            Crop({'dst_width': 100, 'dst_height': -100, 'type': 'crop'})
+
+    def test_crop_with_both_provided_size_and_dst_height_dst_width_warn(self):
+        image = np.zeros((30, 40, 3))
+        with pytest.warns(None) as warnings:
+            crop = Crop({'dst_width': 100, 'dst_height': 100, 'size': 200, 'type': 'crop'})
+            assert len(warnings) == 1
+            result = crop.process(DataRepresentation(image))
+            assert result.data.shape == (200, 200, 3)
+            assert result.metadata == {'image_size': (30, 40, 3)}
+
+
+class TestFlip:
+    def test_horizontal_flip(self):
+        image = np.random.randint(0, 255, (30, 40, 3))
+        expected_image = cv2.flip(image, 0)
+        flip = Flip({'type': 'flip', 'mode': 'horizontal'})
+        assert np.array_equal(expected_image, flip.process(DataRepresentation(image)).data)
+
+    def test_vertical_flip(self):
+        image = np.random.randint(0, 255, (30, 40, 3))
+        expected_image = cv2.flip(image, 1)
+        flip = Flip({'type': 'flip', 'mode': 'vertical'})
+        assert np.array_equal(expected_image, flip.process(DataRepresentation(image)).data)
+
+    def test_flip_raise_config_error_if_mode_not_provided(self):
+        with pytest.raises(ConfigError):
+            Flip({'type': 'flip'})
+
+    def test_flip_raise_config_error_if_mode_unknown(self):
+        with pytest.raises(ConfigError):
+            Flip({'type': 'flip', 'mode': 'unknown'})
+
+
+class TestBGRtoRGB:
+    def test_bgr_to_rgb(self):
+        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
+        expected_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        bgr_to_rgb = BgrToRgb({'type': 'bgr_to_rgb'})
+        assert np.array_equal(expected_image, bgr_to_rgb.process(DataRepresentation(image)).data)
+
+
+class TestCropRect:
+    def test_crop_rect_if_rect_not_provided(self):
+        image = np.zeros((30, 40, 3))
+        crop_rect = CropRect({'type': 'crop_rect'})
+        assert np.array_equal(image, crop_rect(image, {}))
+
+    def test_crop_rect_if_rect_equal_image(self):
+        image = np.zeros((30, 40, 3))
+        crop_rect = CropRect({'type': 'crop_rect'})
+        assert np.array_equal(image, crop_rect(DataRepresentation(image), {'rect': [0, 0, 40, 30]}).data)
+
+    def test_crop_rect(self):
+        image = np.zeros((30, 40, 3))
+        image[:, 20:, :] = 1
+        expected_image = np.ones((30, 20, 3))
+        crop_rect = CropRect({'type': 'crop_rect'})
+        assert np.array_equal(expected_image, crop_rect(DataRepresentation(image), {'rect': [20, 0, 40, 30]}).data)
+
+    def test_crop_rect_negative_coordinates_of_rect(self):
+        image = np.zeros((30, 40, 3))
+        image[:, 20:, :] = 1
+        expected_image = image
+        crop_rect = CropRect({'type': 'crop_rect'})
+        assert np.array_equal(expected_image, crop_rect(DataRepresentation(image), {'rect': [-20, 0, 40, 30]}).data)
+
+    def test_crop_rect_more_image_size_coordinates_of_rect(self):
+        image = np.zeros((30, 40, 3))
+        image[:, 20:, :] = 1
+        expected_image = np.ones((30, 20, 3))
+        crop_rect = CropRect({'type': 'crop_rect'})
+        assert np.array_equal(expected_image, crop_rect(DataRepresentation(image), {'rect': [20, 0, 40, 50]}).data)
+
+
+class TestExtendAroundRect:
+    def test_default_extend_around_rect_without_rect(self):
+        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
+        expected_image = image
+        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect'})
+        assert np.array_equal(expected_image, extend_image_around_rect(DataRepresentation(image), {}).data)
+
+    def test_default_extend_around_rect(self):
+        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
+        expected_image = image
+        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect'})
+        assert np.array_equal(
+            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [20, 0, 40, 30]}).data
+        )
+
+    def test_extend_around_rect_with_positive_augmentation(self):
+        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
+        expected_image = cv2.copyMakeBorder(image, int(15.5), int(31), int(0), int(11), cv2.BORDER_REPLICATE)
+        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect', 'augmentation_param': 0.5})
+        assert np.array_equal(
+            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [20, 0, 40, 30]}).data
+        )
+
+    def test_extend_around_rect_with_negative_augmentation(self):
+        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
+        expected_image = image
+        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect', 'augmentation_param': -0.5})
+        assert np.array_equal(
+            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [20, 0, 40, 30]}).data
+        )
+
+    def test_extend_around_rect_with_rect_equal_image(self):
+        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
+        expected_image = cv2.copyMakeBorder(image, int(15.5), int(31), int(20.5), int(41), cv2.BORDER_REPLICATE)
+        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect', 'augmentation_param': 0.5})
+        assert np.array_equal(
+            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [0, 0, 40, 30]}).data
+        )
+
+    def test_extend_around_rect_negative_coordinates_of_rect(self):
+        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
+        expected_image = cv2.copyMakeBorder(image, int(15.5), int(31), int(20.5), int(41), cv2.BORDER_REPLICATE)
+        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect', 'augmentation_param': 0.5})
+        assert np.array_equal(
+            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [-20, 0, 40, 30]}).data
+        )
+
+    def test_extend_around_rect_more_image_size_coordinates_of_rect(self):
+        image = np.random.randint(0, 255, (30, 40, 3)).astype(np.uint8)
+        expected_image = cv2.copyMakeBorder(image, int(15.5), int(31), int(0), int(11), cv2.BORDER_REPLICATE)
+        extend_image_around_rect = ExtendAroundRect({'type': 'extend_around_rect', 'augmentation_param': 0.5})
+        assert np.array_equal(
+            expected_image, extend_image_around_rect(DataRepresentation(image), {'rect': [20, 0, 40, 50]}).data
+        )
+
+
+class TestPointAlignment:
+    def test_point_alignment_width_negative_size_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            PointAligner({'type': 'point_alignment', 'size': -100})
+
+    def test_point_alignment_negative_destination_width_raise_config_error(self):
+        with pytest.raises(ConfigError):
+            PointAligner({'type': 'point_alignment', 'dst_width': -100, 'dst_height': 100})
+
+    def test_point_alignment_to_negative_destination_height_raise_config_error(self):
+        with pytest.raises(ValueError):
+            PointAligner({'type': 'point_alignment', 'dst_width': 100, 'dst_height': -100})
+
+    def test_point_alignment_provided_only_dst_height_raise_config_error(self):
+        with pytest.raises(ValueError):
+            PointAligner({'type': 'point_alignment', 'dst_height': 100})
+
+    def test_point_alignment_provided_only_dst_width_raise_config_error(self):
+        with pytest.raises(ValueError):
+            PointAligner({'type': 'point_alignment', 'dst_width': 100})
+
+    def test_point_alignment_both_provided_size_and_dst_height_dst_width_warn(self):
+        input_image = np.ones((100, 50, 3))
+
+        with pytest.warns(None) as warnings:
+            point_aligner = PointAligner({'type': 'point_alignment', 'dst_width': 100, 'dst_height': 100, 'size': 200})
+            assert len(warnings) == 1
+            result = point_aligner(DataRepresentation(input_image), {}).data
+            assert result.shape == (100, 50, 3)
+
+    def test_point_alignment_not_provided_points_im_meta(self):
+        input_image = np.ones((100, 50, 3))
+
+        point_aligner = PointAligner({'type': 'point_alignment', 'dst_width': 100, 'dst_height': 100})
+        result = point_aligner(DataRepresentation(input_image), {}).data
+        assert result.shape == (100, 50, 3)
+
+    def test_point_alignment_default_use_normalization(self):
+        image = np.random.randint(0, 255, (40, 40, 3)).astype(np.uint8)
+
+        point_aligner = PointAligner({'type': 'point_alignment', 'dst_width': 40, 'dst_height': 40})
+        result = point_aligner(
+            DataRepresentation(image), {'keypoints': PointAligner.ref_landmarks.reshape(-1).tolist()}
+        ).data
+        transformation_matrix = point_aligner.transformation_from_points(
+            point_aligner.ref_landmarks * 40, point_aligner.ref_landmarks
+        )
+        expected_result = cv2.warpAffine(image, transformation_matrix, (40, 40), flags=cv2.WARP_INVERSE_MAP)
+
+        assert np.array_equal(result, expected_result)
+
+    def test_point_alignment_use_normalization(self):
+        image = np.random.randint(0, 255, (40, 40, 3)).astype(np.uint8)
+
+        point_aligner = PointAligner({'type': 'point_alignment', 'dst_width': 40, 'dst_height': 40, 'normalize': True})
+        result = point_aligner(
+            DataRepresentation(image), {'keypoints': PointAligner.ref_landmarks.reshape(-1).tolist()}
+        ).data
+        transformation_matrix = point_aligner.transformation_from_points(
+            point_aligner.ref_landmarks * 40, point_aligner.ref_landmarks
+        )
+        expected_result = cv2.warpAffine(image, transformation_matrix, (40, 40), flags=cv2.WARP_INVERSE_MAP)
+
+        assert np.array_equal(result, expected_result)
+
+    def test_point_alignment_without_normalization(self):
+        image = np.random.randint(0, 255, (40, 40, 3)).astype(np.uint8)
+
+        point_aligner = PointAligner({'type': 'point_alignment', 'dst_width': 40, 'dst_height': 40, 'normalize': False})
+        result = point_aligner(
+            DataRepresentation(image), {'keypoints': PointAligner.ref_landmarks.reshape(-1).tolist()}
+        ).data
+        transformation_matrix = point_aligner.transformation_from_points(
+            point_aligner.ref_landmarks * 40, point_aligner.ref_landmarks * 40
+        )
+        expected_result = cv2.warpAffine(image, transformation_matrix, (40, 40), flags=cv2.WARP_INVERSE_MAP)
+
+        assert np.array_equal(result, expected_result)
+
+    def test_point_alignment_with_drawing_points(self):
+        image = np.random.randint(0, 255, (40, 40, 3)).astype(np.uint8)
+
+        point_aligner = PointAligner({
+            'type': 'point_alignment', 'dst_width': 40, 'dst_height': 40, 'draw_points': True
+        })
+        result = point_aligner(
+            DataRepresentation(image), {'keypoints': PointAligner.ref_landmarks.reshape(-1).tolist()}
+        ).data
+        transformation_matrix = point_aligner.transformation_from_points(
+            point_aligner.ref_landmarks * 40, point_aligner.ref_landmarks
+        )
+        expected_result = image
+        for point in PointAligner.ref_landmarks:
+            cv2.circle(expected_result, (int(point[0]), int(point[1])), 5, (255, 0, 0), -1)
+        expected_result = cv2.warpAffine(expected_result, transformation_matrix, (40, 40), flags=cv2.WARP_INVERSE_MAP)
+
+        assert np.array_equal(result, expected_result)
+
+    def test_point_alignment_with_resizing(self):
+        image = np.random.randint(0, 255, (80, 80, 3)).astype(np.uint8)
+
+        point_aligner = PointAligner({'type': 'point_alignment', 'size': 40})
+        result = point_aligner(
+            DataRepresentation(image), {'keypoints': PointAligner.ref_landmarks.reshape(-1).tolist()}
+        ).data
+        transformation_matrix = point_aligner.transformation_from_points(
+            point_aligner.ref_landmarks * 40, point_aligner.ref_landmarks * 0.5
+        )
+        expected_result = cv2.resize(image, (40, 40))
+        expected_result = cv2.warpAffine(expected_result, transformation_matrix, (40, 40), flags=cv2.WARP_INVERSE_MAP)
+
+        assert np.array_equal(result, expected_result)
+
+
+class TestPreprocessorExtraArgs:
+    def test_resize_raise_config_error_on_extra_args(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('resize', {'type': 'resize', 'size': 1, 'something_extra': 'extra'})
+
+    def test_normalization_raise_config_error_on_extra_args(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('normalization', {'type': 'normalization', 'mean': 0, 'something_extra': 'extra'})
+
+    def test_bgr_to_rgb_raise_config_error_on_extra_args(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('bgr_to_rgb',  {'type': 'bgr_to_rgb', 'something_extra': 'extra'})
+
+    def test_flip_raise_config_error_on_extra_args(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('flip', {'type': 'flip', 'something_extra': 'extra'})
+
+    def test_crop_accuracy_raise_config_error_on_extra_args(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('crop', {'type': 'crop', 'size': 1, 'something_extra': 'extra'})
+
+    def test_extend_around_rect_raise_config_error_on_extra_args(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('extend_around_rect', {'type': 'extend_around_rect', 'something_extra': 'extra'})
+
+    def test_point_alignment_raise_config_error_on_extra_args(self):
+        with pytest.raises(ConfigError):
+            Preprocessor.provide('point_alignment', {'type': 'point_alignment', 'something_extra': 'extra'})
diff --git a/tools/accuracy_checker/tests/test_presenter.py b/tools/accuracy_checker/tests/test_presenter.py
new file mode 100644
index 000000000..3980f243b
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_presenter.py
@@ -0,0 +1,348 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+import pytest
+from unittest.mock import MagicMock, call
+from accuracy_checker.metrics import MetricsExecutor
+from accuracy_checker.presenters import ScalarPrintPresenter, VectorPrintPresenter, EvaluationResult
+from accuracy_checker.representation import ClassificationAnnotation, ClassificationPrediction
+
+
+class TestPresenter:
+    def test_config_default_presenter(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1}]}
+        dispatcher = MetricsExecutor(config, None)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for presenter, _ in dispatcher.iterate_metrics(annotations, predictions):
+            assert isinstance(presenter, ScalarPrintPresenter)
+
+    def test_config_scalar_presenter(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1, 'presenter': 'print_scalar'}]}
+        dispatcher = MetricsExecutor(config, None)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for presenter, _ in dispatcher.iterate_metrics(annotations, predictions):
+            assert isinstance(presenter, ScalarPrintPresenter)
+
+    def test_config_vector_presenter(self):
+        annotations = [ClassificationAnnotation('identifier', 3)]
+        predictions = [ClassificationPrediction('identifier', [1.0, 1.0, 1.0, 4.0])]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1, 'presenter': 'print_vector'}]}
+        dispatcher = MetricsExecutor(config, None)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for presenter, _ in dispatcher.iterate_metrics(annotations, predictions):
+            assert isinstance(presenter, VectorPrintPresenter)
+
+    def test_config_unknown_presenter(self):
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'accuracy', 'top_k': 1, 'presenter': 'print_somehow'}]}
+        with pytest.raises(ValueError):
+            MetricsExecutor(config, None)
+
+    def test_scalar_presenter_with_scalar_data(self, mocker):
+        mock_write_scalar_result = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='scalar_metric',
+            evaluated_value=0.1,
+            reference_value=None,
+            threshold=None,
+            meta={},
+        )
+        presenter = ScalarPrintPresenter()
+        presenter.write_result(result)
+        mock_write_scalar_result.assert_called_once_with(
+            result.evaluated_value,
+            result.name,
+            result.reference_value,
+            result.threshold,
+            postfix='%',
+            scale=100,
+            result_format='{:.2f}'
+        )
+
+    def test_scalar_presenter_with_vector_data(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='vector_metric',
+            evaluated_value=[0.4, 0.6],
+            reference_value=None,
+            threshold=None,
+            meta={},
+        )
+        presenter = ScalarPrintPresenter()
+        presenter.write_result(result)
+        mock_write_scalar_res.assert_called_once_with(
+            np.mean(result.evaluated_value),
+            result.name,
+            result.reference_value,
+            result.threshold,
+            postfix='%',
+            scale=100,
+            result_format='{:.2f}'
+        )
+
+    def test_default_format_for_scalar_presenter_with_ignore_formatting(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='vector_metric',
+            evaluated_value=[0.456],
+            reference_value=None,
+            threshold=None,
+            meta={},
+        )
+        presenter = ScalarPrintPresenter()
+        presenter.write_result(result, ignore_results_formatting=True)
+        mock_write_scalar_res.assert_called_once_with(
+            np.mean(result.evaluated_value),
+            result.name,
+            result.reference_value,
+            result.threshold,
+            postfix=' ',
+            scale=1,
+            result_format='{}'
+        )
+
+    def test_specific_format_for_scalar_presenter_with_ignore_formatting(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='vector_metric',
+            evaluated_value=[0.456],
+            reference_value=None,
+            threshold=None,
+            meta={'scale': 0.5, 'postfix': 'km/h', 'data_format': '{:.4f}'},
+        )
+        presenter = ScalarPrintPresenter()
+        presenter.write_result(result, ignore_results_formatting=True)
+        mock_write_scalar_res.assert_called_once_with(
+            np.mean(result.evaluated_value),
+            result.name,
+            result.reference_value,
+            result.threshold,
+            postfix=' ',
+            scale=1,
+            result_format='{}'
+        )
+
+    def test_vector_presenter_with_scaler_data(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='scalar_metric',
+            evaluated_value=0.4,
+            reference_value=None,
+            threshold=None,
+            meta={},
+        )
+        presenter = VectorPrintPresenter()
+        presenter.write_result(result)
+        mock_write_scalar_res.assert_called_once_with(
+            result.evaluated_value,
+            result.name,
+            result.reference_value,
+            result.threshold,
+            postfix='%',
+            scale=100,
+            value_name=None,
+            result_format='{:.2f}'
+        )
+
+    def test_vector_presenter_with_vector_data_contain_one_element(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='scalar_metric',
+            evaluated_value=[0.4],
+            reference_value=None,
+            threshold=None,
+            meta={'names': ['prediction']}
+        )
+        presenter = VectorPrintPresenter()
+        presenter.write_result(result)
+        mock_write_scalar_res.assert_called_once_with(
+            result.evaluated_value,
+            result.name,
+            result.reference_value,
+            result.threshold,
+            postfix='%',
+            scale=100,
+            value_name=result.meta['names'][0],
+            result_format='{:.2f}'
+        )
+
+    def test_vector_presenter_with_vector_data_with_default_postfix_and_scale(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='scalar_metric',
+            evaluated_value=[0.4, 0.6],
+            reference_value=None,
+            threshold=None,
+            meta={'names': ['class1', 'class2']}
+        )
+        presenter = VectorPrintPresenter()
+        presenter.write_result(result)
+        calls = [
+            call(
+                result.evaluated_value[0], result.name, result.reference_value, result.threshold,
+                postfix='%', scale=100, value_name=result.meta['names'][0], result_format='{:.2f}'
+            ),
+            call(
+                result.evaluated_value[1], result.name, result.reference_value, result.threshold,
+                postfix='%', scale=100, value_name=result.meta['names'][1],  result_format='{:.2f}'
+            ),
+            call(
+                np.mean(np.multiply(result.evaluated_value, 100)), result.name, result.reference_value,
+                result.threshold, value_name='mean', postfix='%', scale=1, result_format='{:.2f}'
+            )
+        ]
+        mock_write_scalar_res.assert_has_calls(calls)
+
+    def test_vector_presenter_with_vector_data_has_default_format_with_ignore_formatting(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='scalar_metric',
+            evaluated_value=[0.4, 0.6],
+            reference_value=None,
+            threshold=None,
+            meta={'names': ['class1', 'class2']}
+        )
+        presenter = VectorPrintPresenter()
+        presenter.write_result(result, ignore_results_formatting=True)
+        calls = [
+            call(
+                result.evaluated_value[0], result.name, result.reference_value, result.threshold,
+                postfix=' ', scale=1, value_name=result.meta['names'][0], result_format='{}'
+            ),
+            call(
+                result.evaluated_value[1], result.name, result.reference_value, result.threshold,
+                postfix=' ', scale=1, value_name=result.meta['names'][1], result_format='{}'
+            ),
+            call(
+                np.mean(np.multiply(result.evaluated_value, 1)), result.name, result.reference_value, result.threshold,
+                value_name='mean', postfix=' ', scale=1, result_format='{}'
+            )
+        ]
+        mock_write_scalar_res.assert_has_calls(calls)
+
+    def test_vector_presenter_with_vector_data_has_specific_format_with_ignore_formatting(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='scalar_metric',
+            evaluated_value=[0.4, 0.6],
+            reference_value=None,
+            threshold=None,
+            meta={'names': ['class1', 'class2'], 'scale': 0.5, 'postfix': 'km/h', 'data_format': '{:.4f}'}
+        )
+        presenter = VectorPrintPresenter()
+        presenter.write_result(result, ignore_results_formatting=True)
+        calls = [
+            call(
+                result.evaluated_value[0], result.name, result.reference_value, result.threshold,
+                postfix=' ', scale=1, value_name=result.meta['names'][0], result_format='{}'
+            ),
+            call(
+                result.evaluated_value[1], result.name, result.reference_value, result.threshold,
+                postfix=' ', scale=1, value_name=result.meta['names'][1], result_format='{}'
+            ),
+            call(
+                np.mean(np.multiply(result.evaluated_value, 1)), result.name, result.reference_value, result.threshold,
+                value_name='mean', postfix=' ', scale=1, result_format='{}'
+            )
+        ]
+        mock_write_scalar_res.assert_has_calls(calls)
+
+    def test_vector_presenter_with_vector_data_with_scalar_postfix(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='scalar_metric',
+            evaluated_value=[0.4, 0.6],
+            reference_value=None,
+            threshold=None,
+            meta={'names': ['class1', 'class2'], 'postfix': '_'}
+        )
+        presenter = VectorPrintPresenter()
+        presenter.write_result(result)
+        calls = [
+            call(result.evaluated_value[0], result.name, result.reference_value, result.threshold,
+                 postfix=result.meta['postfix'], scale=100, value_name=result.meta['names'][0], result_format='{:.2f}'
+                 ),
+            call(
+                result.evaluated_value[1], result.name, result.reference_value, result.threshold,
+                postfix=result.meta['postfix'], scale=100, value_name=result.meta['names'][1], result_format='{:.2f}'
+            ),
+            call(
+                np.mean(np.multiply(result.evaluated_value, 100)), result.name, result.reference_value,
+                result.threshold, value_name='mean', postfix=result.meta['postfix'], scale=1,  result_format='{:.2f}'
+            )
+        ]
+        mock_write_scalar_res.assert_has_calls(calls)
+
+    def test_vector_presenter_with_vector_data_with_scalar_scale(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='scalar_metric',
+            evaluated_value=[0.4, 0.6],
+            reference_value=None,
+            threshold=None,
+            meta={'names': ['class1', 'class2'], 'scale': 10}
+        )
+        presenter = VectorPrintPresenter()
+        presenter.write_result(result)
+        calls = [
+            call(
+                result.evaluated_value[0], result.name, result.reference_value, result.threshold,
+                postfix='%', scale=result.meta['scale'], value_name=result.meta['names'][0], result_format='{:.2f}'
+            ),
+            call(
+                result.evaluated_value[1], result.name, result.reference_value, result.threshold,
+                postfix='%', scale=result.meta['scale'], value_name=result.meta['names'][1], result_format='{:.2f}'
+            ),
+            call(
+                np.mean(np.multiply(result.evaluated_value, result.meta['scale'])), result.name, result.reference_value,
+                result.threshold, value_name='mean', postfix='%', scale=1, result_format='{:.2f}'
+            )
+        ]
+        mock_write_scalar_res.assert_has_calls(calls)
+
+    def test_vector_presenter_with_vector_data_with_vector_scale(self, mocker):
+        mock_write_scalar_res = mocker.patch('accuracy_checker.presenters.write_scalar_result')  # type: MagicMock
+        result = EvaluationResult(
+            name='scalar_metric',
+            evaluated_value=[0.4, 0.6],
+            reference_value=None,
+            threshold=None,
+            meta={'names': ['class1', 'class2'], 'scale': [1, 2]}
+        )
+        presenter = VectorPrintPresenter()
+        presenter.write_result(result)
+        calls = [
+            call(
+                result.evaluated_value[0], result.name, result.reference_value, result.threshold,
+                postfix='%', scale=result.meta['scale'][0], result_format='{:.2f}', value_name=result.meta['names'][0]
+            ),
+            call(
+                result.evaluated_value[1], result.name, result.reference_value, result.threshold, postfix='%',
+                scale=result.meta['scale'][1], result_format='{:.2f}', value_name=result.meta['names'][1]
+            ),
+            call(
+                np.mean(np.multiply(result.evaluated_value, result.meta['scale'])), result.name, result.reference_value,
+                result.threshold, result_format='{:.2f}', value_name='mean', postfix='%', scale=1
+            )
+        ]
+        mock_write_scalar_res.assert_has_calls(calls)
diff --git a/tools/accuracy_checker/tests/test_regression_metrics.py b/tools/accuracy_checker/tests/test_regression_metrics.py
new file mode 100644
index 000000000..3829b5ae2
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_regression_metrics.py
@@ -0,0 +1,338 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pytest
+from accuracy_checker.metrics import MetricsExecutor
+from accuracy_checker.representation import RegressionPrediction, RegressionAnnotation
+from accuracy_checker.presenters import EvaluationResult
+
+
+class TestRegressionMetric:
+    def setup_method(self):
+        self.module = 'accuracy_checker.metrics.metric_evaluator'
+
+    def test_mae_with_zero_diff_between_annotation_and_prediction(self):
+        annotations = [RegressionAnnotation('identifier', 3)]
+        predictions = [RegressionPrediction('identifier', 3)]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mae'}]}
+        expected = EvaluationResult(
+            pytest.approx([0.0, 0.0]),
+            None,
+            'mae',
+            None,
+            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_mae_with_negative_diff_between_annotation_and_prediction(self):
+        annotations = [RegressionAnnotation('identifier', 3), RegressionAnnotation('identifier2', 1)]
+        predictions = [RegressionPrediction('identifier', 5), RegressionPrediction('identifier2', 5)]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mae'}]}
+        expected = EvaluationResult(
+            pytest.approx([3.0, 1.0]),
+            None,
+            'mae',
+            None,
+            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_mae_with_positive_diff_between_annotation_and_prediction(self):
+        annotations = [RegressionAnnotation('identifier', 3), RegressionAnnotation('identifier2', 1)]
+        predictions = [RegressionPrediction('identifier', 1), RegressionPrediction('identifier2', -3)]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mae'}]}
+        expected = EvaluationResult(
+            pytest.approx([3.0, 1.0]),
+            None,
+            'mae',
+            None,
+            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_mse_with_zero_diff_between_annotation_and_prediction(self):
+        annotations = [RegressionAnnotation('identifier', 3)]
+        predictions = [RegressionPrediction('identifier', 3)]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mse'}]}
+        expected = EvaluationResult(
+            pytest.approx([0.0, 0.0]),
+            None,
+            'mse',
+            None,
+            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_mse_with_negative_diff_between_annotation_and_prediction(self):
+        annotations = [RegressionAnnotation('identifier', 3), RegressionAnnotation('identifier2', 1)]
+        predictions = [RegressionPrediction('identifier', 5), RegressionPrediction('identifier2', 5)]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mse'}]}
+        expected = EvaluationResult(
+            pytest.approx([10.0, 6.0]),
+            None,
+            'mse',
+            None,
+            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_mse_with_positive_diff_between_annotation_and_prediction(self):
+        annotations = [RegressionAnnotation('identifier', 3), RegressionAnnotation('identifier2', 1)]
+        predictions = [RegressionPrediction('identifier', 1), RegressionPrediction('identifier2', -3)]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mse'}]}
+        expected = EvaluationResult(
+            pytest.approx([10.0, 6.0]),
+            None,
+            'mse',
+            None,
+            {'postfix': ' ', 'scale': 1, 'names': ['mean', 'std'], 'calculate_mean': False}
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_missed_interval(self):
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mae_on_interval'}]}
+        with pytest.raises(ValueError):
+            MetricsExecutor(config, None)
+
+    def test_mae_on_interval_default_all_missed(self):
+        annotations = [RegressionAnnotation('identifier', -2)]
+        predictions = [RegressionPrediction('identifier', 1)]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mae_on_interval', 'end': 1}]}
+        expected = EvaluationResult(
+            pytest.approx([0.0]),
+            None,
+            'mae_on_interval',
+            None,
+            {'postfix': ' ', 'scale': 1, 'names': [], 'calculate_mean': False}
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        with pytest.warns(UserWarning) as warnings:
+            for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+                assert len(warnings) == 1
+                assert evaluation_result == expected
+
+    def test_mae_on_interval_default_all_not_in_range_not_ignore_out_of_range(self):
+        annotations = [RegressionAnnotation('identifier', -1), RegressionAnnotation('identifier', 2)]
+        predictions = [RegressionPrediction('identifier', 1), RegressionPrediction('identifier', 2)]
+        expected = EvaluationResult(
+            pytest.approx([2.0, 0.0, 0.0, 0.0]),
+            None,
+            'mae_on_interval',
+            None,
+            {
+                'postfix': ' ',
+                'scale': 1,
+                'names': ['mean: < 0.0', 'std: < 0.0', 'mean: > 1.0', 'std: > 1.0'],
+                'calculate_mean': False
+            }
+        )
+        config = {
+            'annotation': 'mocked',
+            'metrics': [{'type': 'mae_on_interval', 'end': 1, 'ignore_values_not_in_interval': False}]
+        }
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_mae_on_interval_values_in_range(self):
+        annotations = [RegressionAnnotation('identifier', 0.5), RegressionAnnotation('identifier', 0.5)]
+        predictions = [RegressionPrediction('identifier', 1), RegressionPrediction('identifier', 0.25)]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mae_on_interval', 'end': 1}]}
+        expected = EvaluationResult(
+            pytest.approx([0.375, 0.125]),
+            None,
+            'mae_on_interval',
+            None,
+            {'postfix': ' ', 'scale': 1, 'names': ['mean: <= 0.0 < 1.0', 'std: <= 0.0 < 1.0'], 'calculate_mean': False}
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_mae_on_interval_default_not_ignore_out_of_range(self):
+        annotations = [
+            RegressionAnnotation('identifier', -1),
+            RegressionAnnotation('identifier',  2),
+            RegressionAnnotation('identifier', 0.5)
+        ]
+        predictions = [
+            RegressionPrediction('identifier', 1),
+            RegressionPrediction('identifier', 2),
+            RegressionPrediction('identifier', 1)
+        ]
+        config = {
+            'annotation': 'mocked',
+            'metrics': [{'type': 'mae_on_interval', 'end': 1, 'ignore_values_not_in_interval': False}]
+        }
+        expected = EvaluationResult(
+            pytest.approx([2.0, 0.0, 0.5, 0.0,  0.0, 0.0]),
+            None,
+            'mae_on_interval',
+            None,
+            {
+                'postfix': ' ',
+                'scale': 1,
+                'names': [
+                    'mean: < 0.0',
+                    'std: < 0.0',
+                    'mean: <= 0.0 < 1.0',
+                    'std: <= 0.0 < 1.0',
+                    'mean: > 1.0',
+                    'std: > 1.0'
+                ],
+                'calculate_mean': False
+            }
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_mae_on_interval_with_given_interval(self):
+        annotations = [
+            RegressionAnnotation('identifier', -1),
+            RegressionAnnotation('identifier',  2),
+            RegressionAnnotation('identifier',  1)
+        ]
+        predictions = [
+            RegressionPrediction('identifier', 1),
+            RegressionPrediction('identifier', 3),
+            RegressionPrediction('identifier', 1)
+        ]
+        config = {
+            'annotation': 'mocked',
+            'metrics': [{'type': 'mae_on_interval', 'intervals': [0.0, 2.0, 4.0]}]
+        }
+        expected = EvaluationResult(
+            pytest.approx([0.0, 0.0, 1.0, 0.0]),
+            None,
+            'mae_on_interval',
+            None,
+            {
+                'postfix': ' ',
+                'scale': 1,
+                'names': ['mean: <= 0.0 < 2.0', 'std: <= 0.0 < 2.0', 'mean: <= 2.0 < 4.0', 'std: <= 2.0 < 4.0'],
+                'calculate_mean': False
+            }
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_mae_on_interval_with_repeated_values(self):
+        annotations = [
+            RegressionAnnotation('identifier', -1),
+            RegressionAnnotation('identifier',  2),
+            RegressionAnnotation('identifier', 1)
+        ]
+        predictions = [
+            RegressionPrediction('identifier', 1),
+            RegressionPrediction('identifier', 3),
+            RegressionPrediction('identifier', 1)
+        ]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mae_on_interval', 'intervals': [0.0, 2.0, 2.0, 4.0]}]}
+        expected = EvaluationResult(
+            pytest.approx([0.0, 0.0, 1.0, 0.0]),
+            None,
+            'mae_on_interval',
+            None,
+            {
+                'postfix': ' ',
+                'scale': 1,
+                'names': ['mean: <= 0.0 < 2.0', 'std: <= 0.0 < 2.0', 'mean: <= 2.0 < 4.0', 'std: <= 2.0 < 4.0'],
+                'calculate_mean': False
+            }
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_mae_on_interval_with_unsorted_values(self):
+        annotations = [
+            RegressionAnnotation('identifier', -1),
+            RegressionAnnotation('identifier',  2),
+            RegressionAnnotation('identifier',  1)
+        ]
+        predictions = [
+            RegressionPrediction('identifier', 1),
+            RegressionPrediction('identifier', 3),
+            RegressionPrediction('identifier', 1)
+        ]
+        config = {'annotation': 'mocked', 'metrics': [{'type': 'mae_on_interval', 'intervals': [2.0,  0.0, 4.0]}]}
+        expected = EvaluationResult(
+            pytest.approx([0.0, 0.0, 1.0, 0.0]),
+            None,
+            'mae_on_interval',
+            None,
+            {
+                'postfix': ' ', 'scale': 1,
+                'names': ['mean: <= 0.0 < 2.0', 'std: <= 0.0 < 2.0', 'mean: <= 2.0 < 4.0', 'std: <= 2.0 < 4.0'],
+                'calculate_mean': False
+            }
+        )
+        dispatcher = MetricsExecutor(config, None)
+
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
diff --git a/tools/accuracy_checker/tests/test_reid_metrics.py b/tools/accuracy_checker/tests/test_reid_metrics.py
new file mode 100644
index 000000000..b73008a2e
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_reid_metrics.py
@@ -0,0 +1,77 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+from accuracy_checker.metrics.reid import eval_cmc
+
+
+class TestCMC:
+    def test_only_distance_matrix(self):
+        distance_matrix = np.array([
+            [0, 1, 2, 3, 4],
+            [1, 0, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [0, 1, 2, 3, 4],
+            [1, 2, 3, 4, 0]
+        ])
+        m, n = distance_matrix.shape
+
+        result = eval_cmc(
+            distance_matrix,
+            query_ids=np.arange(m),
+            gallery_ids=np.arange(n),
+            query_cams=np.zeros(m).astype(np.int32),
+            gallery_cams=np.ones(n).astype(np.int32)
+        )
+
+        assert np.all(result[:5] == [0.6, 0.6, 0.8, 1.0, 1.0])
+
+    def test_duplicate_ids(self):
+        distance_matrix = np.array([
+            [0, 1, 2, 3],
+            [0, 1, 2, 3],
+            [0, 1, 2, 3],
+            [0, 1, 2, 3]
+        ])
+
+        result = eval_cmc(
+            distance_matrix,
+            query_ids=np.array([0, 0, 1, 1]),
+            gallery_ids=np.array([0, 0, 1, 1]),
+            top_k=4,
+            gallery_cams=np.ones(distance_matrix.shape[1]).astype(np.int32),
+            query_cams=np.zeros(distance_matrix.shape[0]).astype(np.int32),
+            separate_camera_set=False,
+            single_gallery_shot=False
+        )
+
+        assert np.all(result == [0.5, 0.5, 1, 1])
+
+    def test_duplicate_cams(self):
+        distance_matrix = np.tile(np.arange(5), (5, 1))
+
+        result = eval_cmc(
+            distance_matrix,
+            query_ids=np.array([0, 0, 0, 1, 1]),
+            gallery_ids=np.array([0, 0, 0, 1, 1]),
+            query_cams=np.array([0, 0, 0, 0, 0]),
+            gallery_cams=np.array([0, 1, 1, 1, 1]),
+            top_k=5,
+            separate_camera_set=False,
+            single_gallery_shot=False
+        )
+
+        assert np.all(result == [0.6, 0.6, 0.6, 1, 1])
diff --git a/tools/accuracy_checker/tests/test_segmentation_metrics.py b/tools/accuracy_checker/tests/test_segmentation_metrics.py
new file mode 100644
index 000000000..03095fc63
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_segmentation_metrics.py
@@ -0,0 +1,164 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pytest
+import numpy as np
+from accuracy_checker.metrics import MetricsExecutor
+from accuracy_checker.presenters import EvaluationResult
+from .common import single_class_dataset, multi_class_dataset, make_segmentation_representation
+
+
+def create_config(metric_name, use_argmax=False):
+    return {'annotation': 'mocked', 'metrics': [{'type': metric_name, 'use_argmax': use_argmax}]}
+
+
+def generate_expected_result(values, metric_name, labels=None):
+    meta = {'names': list(labels.values())} if labels else {}
+
+    return EvaluationResult(pytest.approx(values), None, metric_name, None, meta)
+
+
+class TestPixelAccuracy:
+    name = 'segmentation_accuracy'
+
+    def test_one_class(self):
+        annotations = make_segmentation_representation(np.array([[0, 0], [0, 0]]), True)
+        predictions = make_segmentation_representation(np.array([[0, 0], [0, 0]]), False)
+        dispatcher = MetricsExecutor(create_config(self.name), single_class_dataset())
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result(1.0, self.name)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_multi_class_not_matched(self):
+        annotations = make_segmentation_representation(np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]), True)
+        predictions = make_segmentation_representation(np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), False)
+        dispatcher = MetricsExecutor(create_config(self.name), multi_class_dataset())
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result(0.0, self.name)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_multi_class(self):
+        annotations = make_segmentation_representation(np.array([[1, 0, 3, 0, 0], [0, 0, 0, 0, 0]]), True)
+        predictions = make_segmentation_representation(np.array([[1, 2, 3, 2, 3], [0, 0, 0, 0, 0]]), False)
+        dispatcher = MetricsExecutor(create_config(self.name), multi_class_dataset())
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result((5.0+1.0+1.0)/(8.0+1.0+1.0), self.name)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+
+class TestMeanAccuracy:
+    name = 'mean_accuracy'
+
+    def test_one_class(self):
+        annotations = make_segmentation_representation(np.array([[0, 0], [0, 0]]), True)
+        predictions = make_segmentation_representation(np.array([[0, 0], [0, 0]]), False)
+        dataset = single_class_dataset()
+        dispatcher = MetricsExecutor(create_config(self.name), dataset)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result([1.0, 0.0], self.name, dataset.labels)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_multi_class_not_matched(self):
+        annotations = make_segmentation_representation(np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), True)
+        predictions = make_segmentation_representation(np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]), False)
+        dataset = multi_class_dataset()
+        dispatcher = MetricsExecutor(create_config(self.name), dataset)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result([0.0, 0.0, 0.0, 0.0], self.name, dataset.labels)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_multi_class(self):
+        dataset = multi_class_dataset()
+        annotations = make_segmentation_representation(np.array([[1, 2, 3, 2, 3], [0, 0, 0, 0, 0]]), True)
+        predictions = make_segmentation_representation(np.array([[1, 0, 3, 0, 0], [0, 0, 0, 0, 0]]), False)
+        dispatcher = MetricsExecutor(create_config(self.name), dataset)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result([1.0, 1.0, 0.0, 0.5], self.name, dataset.labels)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+
+class TestMeanIOU:
+    name = 'mean_iou'
+
+    def test_one_class(self):
+        annotations = make_segmentation_representation(np.array([[0, 0], [0, 0]]), True)
+        predictions = make_segmentation_representation(np.array([[0, 0], [0, 0]]), False)
+        dataset = single_class_dataset()
+        dispatcher = MetricsExecutor(create_config(self.name), dataset)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result([1.0, 0.0], self.name, dataset.labels)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_multi_class_not_matched(self):
+        annotations = make_segmentation_representation(np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), True)
+        predictions = make_segmentation_representation(np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]), False)
+        dataset = multi_class_dataset()
+        dispatcher = MetricsExecutor(create_config(self.name), dataset)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result([0.0, 0.0, 0.0, 0.0], self.name, dataset.labels)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_multi_class(self):
+        dataset = multi_class_dataset()
+        annotations = make_segmentation_representation(np.array([[1, 2, 3, 2, 3], [0, 0, 0, 0, 0]]), True)
+        predictions = make_segmentation_representation(np.array([[1, 0, 3, 0, 0], [0, 0, 0, 0, 0]]), False)
+        dispatcher = MetricsExecutor(create_config(self.name), dataset)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result([0.625, 1.0, 0.0, 0.5], self.name, dataset.labels)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+
+class TestSegmentationFWAcc:
+    name = 'frequency_weighted_accuracy'
+
+    def test_one_class(self):
+        annotations = make_segmentation_representation(np.array([[0, 0], [0, 0]]), True)
+        predictions = make_segmentation_representation(np.array([[0, 0], [0, 0]]), False)
+        dataset = single_class_dataset()
+        dispatcher = MetricsExecutor(create_config(self.name), dataset)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result(1.0, self.name)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_multi_class_not_matched(self):
+        annotations = make_segmentation_representation(np.array([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]), True)
+        predictions = make_segmentation_representation(np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]), False)
+        dataset = multi_class_dataset()
+        dispatcher = MetricsExecutor(create_config(self.name), dataset)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result(0.0, self.name)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
+
+    def test_multi_class(self):
+        dataset = multi_class_dataset()
+        annotations = make_segmentation_representation(np.array([[1, 2, 3, 2, 3], [0, 0, 0, 0, 0]]), True)
+        predictions = make_segmentation_representation(np.array([[1, 0, 3, 0, 0], [0, 0, 0, 0, 0]]), False)
+        dispatcher = MetricsExecutor(create_config(self.name), dataset)
+        dispatcher.update_metrics_on_batch(annotations, predictions)
+        expected = generate_expected_result(0.5125, self.name)
+        for _, evaluation_result in dispatcher.iterate_metrics(annotations, predictions):
+            assert evaluation_result == expected
diff --git a/tools/accuracy_checker/tests/test_utils.py b/tools/accuracy_checker/tests/test_utils.py
new file mode 100644
index 000000000..4ac9cdff2
--- /dev/null
+++ b/tools/accuracy_checker/tests/test_utils.py
@@ -0,0 +1,127 @@
+"""
+Copyright (c) 2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from accuracy_checker.utils import concat_lists, contains_all, contains_any, overrides, zipped_transform
+
+
+def test_concat_lists():
+    assert ['a', 'b'] == concat_lists(['a'], ['b'])
+    assert ['a', 'b', 'c'] == concat_lists(['a'], ['b'], ['c'])
+    assert ['a', 'b', 'c'] == concat_lists(['a', 'b'], ['c'])
+    assert ['a'] == concat_lists(['a'], [])
+    assert [] == concat_lists([], [], [])
+    assert [] == concat_lists([])
+
+
+def test_contains_all():
+    assert contains_all([1, 2, 3], [1, 2])
+    assert contains_all([1, 2, 3], [1, 2], [3])
+    assert not contains_all([1, 2, 3], [1, 5])
+
+
+def test_contains_any():
+    assert contains_any([1, 2, 3], [1])
+    assert contains_any([1, 2, 3], [4, 5, 2])
+    assert not contains_any([1, 2, 3], [4, 5])
+
+
+class TestZippedTransform:
+    def test_two_iterables(self):
+        as_ = [2, 3, 5]
+        bs = [2, 3, 6]
+
+        ras, rbs = zipped_transform(lambda a, b: (a + b, a - b), as_, bs)
+
+        assert ras == [4, 6, 11]
+        assert rbs == [0, 0, -1]
+        assert as_ == [2, 3, 5]
+        assert bs == [2, 3, 6]
+
+    def test_inplace(self):
+        as_ = [2, 3, 5]
+        bs = [2, 3, 6]
+
+        zipped_transform(lambda a, b: (a + b, a - b), as_, bs, inplace=True)
+
+        assert as_ == [4, 6, 11]
+        assert bs == [0, 0, -1]
+
+    def test_three_iterables(self):
+        as_ = [1, 1, 1]
+        bs = [2, 2, 2]
+        cs = [3, 3, 3]
+
+        ras, rbs, rcs = zipped_transform(lambda a, b, c: (a + 1, b + 2, c + 3), as_, bs, cs)
+
+        assert ras == [2, 2, 2]
+        assert rbs == [4, 4, 4]
+        assert rcs == [6, 6, 6]
+
+    def test_none_function(self):
+        xs = [1, 1, 1]
+        ys = [1, 1, 1]
+        zipped_transform(lambda a, b: None, xs, ys)
+
+
+class TestOverrides:
+    def test_negative(self):
+        class A:
+            def foo(self):
+                pass
+
+        class B(A):
+            pass
+
+        assert not overrides(B, 'foo')
+        assert not overrides(B(), 'foo')
+
+    def test_positive(self):
+        class A:
+            def foo(self):
+                pass
+
+        class B(A):
+            def foo(self):
+                pass
+
+        assert overrides(B, 'foo')
+        assert overrides(B(), 'foo')
+
+    def test_three_class(self):
+        class A:
+            def foo(self): pass
+
+        class B(A):
+            pass
+
+        class C(B):
+            def foo(self): pass
+
+        assert overrides(C, 'foo')
+        assert not overrides(B, 'foo')
+
+    def test_custom_base(self):
+        class A:
+            def foo(self): pass
+
+        class B:
+            def foo(self): pass
+
+        class C:
+            pass
+
+        assert overrides(B, 'foo', A)
+        assert not overrides(C, 'foo', A)
diff --git a/tools/benchmark/README.md b/tools/benchmark/README.md
new file mode 100644
index 000000000..16dcdc0a7
--- /dev/null
+++ b/tools/benchmark/README.md
@@ -0,0 +1,31 @@
+# OpenVINO™ Benchmark Python* package
+Inference Engine `openvino.tools.benchmark` Python\* package consists types to measure synchronous mode latency.  
+The package depends on `openvino.tools.accuracy_checker` the package.
+
+Please, refer to https://docs.openvinotoolkit.org for details.
+
+## Usage
+You can use the `openvino.tools.calibration` package in a simple way:
+```Python
+import openvino.tools.benchmark as benchmark
+
+config = benchmark.CommandLineReader.read()
+result = benchmark.Benchmark(config).run()
+print("{0}: {1:.4} ms".format(config.model, result.latency * 1000.0))
+```
+### Explanation
+1. Import `openvino.tools.benchmark` types:
+```Python
+import openvino.tools.benchmark as benchmark
+```
+
+2. Read configuration and execute the benchmark:
+```Python
+config = benchmark.CommandLineReader.read()
+result = benchmark.Benchmark(config).run()
+```
+
+3. Print results:
+```Python
+print("{0}: {1:.4} ms".format(config.model, result.latency * 1000.0))
+```
+\ No newline at end of file
diff --git a/tools/benchmark/__init__.py b/tools/benchmark/__init__.py
new file mode 100644
index 000000000..d5f2cf5fe
--- /dev/null
+++ b/tools/benchmark/__init__.py
@@ -0,0 +1,26 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .benchmark import Benchmark
+from .command_line_reader import CommandLineReader
+from .configuration import Configuration
+
+__version__ = "0.0.1"
+__all__ = [
+    'Benchmark',
+    'CommandLineReader',
+    'Configuration'
+]
diff --git a/tools/benchmark/__main__.py b/tools/benchmark/__main__.py
new file mode 100644
index 000000000..5beda67db
--- /dev/null
+++ b/tools/benchmark/__main__.py
@@ -0,0 +1,28 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import openvino.tools.benchmark as benchmark
+
+
+def benchmark():
+
+    config = benchmark.CommandLineReader.read()
+    result = benchmark.Benchmark(config).run()
+    print("{0}: {1:.4} ms".format(config.model, result.latency * 1000.0))
+
+
+if __name__ == '__main__':
+    benchmark()
diff --git a/tools/benchmark/benchmark.py b/tools/benchmark/benchmark.py
new file mode 100644
index 000000000..07cc84516
--- /dev/null
+++ b/tools/benchmark/benchmark.py
@@ -0,0 +1,157 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy
+import datetime
+
+import openvino.inference_engine as ie
+
+from ..accuracy_checker.accuracy_checker.config import ConfigReader
+from ..accuracy_checker.accuracy_checker.model_evaluator import ModelEvaluator
+from ..accuracy_checker.accuracy_checker.progress_reporters import PrintProgressReporter, TQDMReporter
+
+from ..network import Network
+
+from .configuration import Configuration
+from .logging import info
+
+
+class BenchmarkCallback:
+    def __init__(self, configuration: Configuration, network: Network=None, iterations_count:int=1000):
+        self._latency = None
+        self._configuration = configuration
+        self._network = network
+        self._iterations_count = iterations_count if iterations_count else 1000
+
+    def output_callback(self, value, latency = None):
+        pass
+
+
+    def benchmark_callback(self, network_inputs_data):
+        latencies = list()
+
+        if self._network:
+            ie_network = self._network.ie_network
+        else:
+            ie_network = ie.IENetwork(self._configuration.model, self._configuration.weights)
+        plugin = ie.IEPlugin(self._configuration.device)
+        if self._configuration.cpu_extension:
+            plugin.add_cpu_extension(self._configuration.cpu_extension)
+        exec_network = plugin.load(ie_network)
+
+        # warming up
+        exec_network.infer(network_inputs_data)
+
+        for i in range(self._iterations_count):
+            start = datetime.datetime.now()
+            exec_network.infer(network_inputs_data)
+            latencies.append((datetime.datetime.now() - start).microseconds)
+        self._latency = numpy.mean(latencies) / 1000000.0
+
+        del ie_network
+        del exec_network
+        del plugin
+
+
+    @property
+    def latency(self) -> float:
+        return self._latency
+
+
+class BenchmarkResult:
+    def __init__(self, latency):
+        self._latency = latency
+
+    @property
+    def latency(self) -> float:
+        return self._latency
+
+
+class InferOptions:
+    def __init__(self, iterations_count=1000):
+        self._iterations_count = iterations_count
+
+    @property
+    def iterations_count(self) -> int:
+        return self._iterations_count
+
+
+class Benchmark:
+    def __init__(self, configuration: Configuration):
+        if configuration is None:
+            raise ValueError("configuration is None")
+
+        self._configuration = configuration
+        pass
+
+    def run(
+        self,
+        network: Network = None,
+        statistics=None,
+        quantization_levels=None,
+        iterations_count:int = 1000) -> BenchmarkResult:
+
+        model = self._configuration.config['models'][0]
+        launcher_config = model['launchers'][0]
+        dataset_config = model['datasets'][0]
+
+        model_evaluator = ModelEvaluator.from_configs(launcher_config, dataset_config)
+        try:
+            if network:
+                del model_evaluator.launcher.network
+                del model_evaluator.launcher.exec_network
+                model_evaluator.launcher.network = network.ie_network
+                model_evaluator.launcher.exec_network = model_evaluator.launcher.plugin.load(network.ie_network)
+
+            ie_network = model_evaluator.launcher.network
+
+            if statistics:
+                network_stats = {}
+                for layer_name, node_statistic in statistics.items():
+                    network_stats[layer_name] = ie.LayerStats(
+                        min=tuple(node_statistic.min_outputs),
+                        max=tuple(node_statistic.max_outputs))
+                ie_network.stats.update(network_stats)
+
+            if quantization_levels:
+                for layer_name, value in quantization_levels.items():
+                    params = ie_network.layers[layer_name].params
+                    params["quantization_level"] = value
+                    ie_network.layers[layer_name].params = params
+
+            if model_evaluator.dataset.size != 1:
+                info("only one first image is used from dataset annotation to perform benchmark")
+                model_evaluator.dataset.size = 1
+
+            process_dataset_callback = BenchmarkCallback(
+                configuration=self._configuration,
+                network=network,
+                iterations_count=iterations_count)
+
+            model_evaluator.process_dataset(
+                None,
+                progress_reporter=None,
+                output_callback=process_dataset_callback.output_callback,
+                benchmark=process_dataset_callback.benchmark_callback)
+
+            if len(model_evaluator.launcher.exec_network.requests) != 1:
+                raise ValueError("unexpected network requests count")
+
+            latency = process_dataset_callback.latency
+        finally:
+            model_evaluator.release()
+
+        return BenchmarkResult(latency)
+\ No newline at end of file
diff --git a/tools/benchmark/command_line_reader.py b/tools/benchmark/command_line_reader.py
new file mode 100644
index 000000000..4599b2867
--- /dev/null
+++ b/tools/benchmark/command_line_reader.py
@@ -0,0 +1,155 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+import collections
+import errno
+import pathlib
+from functools import partial
+from argparse import ArgumentParser
+from typing import Union
+
+from ..accuracy_checker.accuracy_checker.config import ConfigReader
+from ..accuracy_checker.accuracy_checker.utils import get_path
+from ..network import Network
+
+from .configuration import Configuration
+from .logging import info
+
+
+class CommandLineReader:
+    """
+    Class for parsing input config
+    """
+    @staticmethod
+    def read():
+        args, unknown_args = CommandLineReader.__build_arguments_parser().parse_known_args()
+        if unknown_args:
+            info("unknown command line arguments: {0}".format(unknown_args))
+
+        args.target_framework = "dlsdk"
+        args.aocl = None
+
+        merged_config = ConfigReader.merge(args)
+        launcher = merged_config['models'][0]['launchers'][0]
+
+        batch_size = args.batch_size if args.batch_size else (launcher['batch'] if 'batch' in launcher else None)
+        if not batch_size:
+            with Network(str(launcher['model']), str(launcher['weights'])) as network:
+                batch_size = network.ie_network.batch_size
+
+        return Configuration(
+            config = merged_config,
+            model = str(launcher['model']),
+            weights = str(launcher['weights']),
+            cpu_extension = (str(launcher['cpu_extensions']) if 'cpu_extensions' in launcher else None),
+            gpu_extension = (str(launcher['gpu_extensions']) if 'gpu_extensions' in launcher else None),
+            device = launcher['device'],
+            benchmark_iterations_count = args.benchmark_iterations_count)
+
+    @staticmethod
+    def __build_arguments_parser():
+        parser = ArgumentParser(description='openvino.tools.benchmark')
+
+        parser.add_argument(
+            '-d', '--definitions',
+            help='Optional. Path to the YML file with definitions',
+            type=str,
+            required=False)
+
+        parser.add_argument(
+            '-c',
+            '--config',
+            help='Required. Path to the YML file with local configuration',
+            type=get_path,
+            required=True)
+
+        parser.add_argument(
+            '-m', '--models',
+            help='Optional. Prefix path to the models and weights',
+            type=partial(get_path, is_directory=True),
+            default=pathlib.Path.cwd(),
+            required=False)
+
+        parser.add_argument(
+            '-s', '--source',
+            help='Optional. prefix path to the data source',
+            type=partial(get_path, is_directory=True),
+            default=pathlib.Path.cwd(),
+            required=False)
+
+        parser.add_argument(
+            '-a', '--annotations',
+            help='Optional. prefix path to the converted annotations and datasets meta data',
+            type=partial(get_path, is_directory=True),
+            default=pathlib.Path.cwd(),
+            required=False)
+
+        parser.add_argument(
+            '-e', '--extensions',
+            help='Optional. Prefix path to extensions folder',
+            type=partial(get_path, is_directory=True),
+            default=pathlib.Path.cwd(),
+            required=False)
+
+        parser.add_argument(
+            '--cpu_extensions_mode',
+            help='Optional. specified preferable set of processor instruction for automatic searching cpu extension lib',
+            required=False,
+            choices=['avx2', 'sse4'])
+
+        parser.add_argument(
+            '-b', '--bitstreams',
+            help='Optional. prefix path to bitstreams folder',
+            type=partial(get_path, is_directory=True),
+            default=pathlib.Path.cwd(),
+            required=False)
+
+        parser.add_argument(
+            '-C', '--converted_models', '--converted-models',
+            help='Optional. directory to store Model Optimizer converted models. Used for DLSDK launcher only',
+            type=partial(get_path, is_directory=True),
+            default=pathlib.Path.cwd(),
+            required=False)
+
+        parser.add_argument(
+            '-td', '--target_devices', '--target-devices',
+            help='Optional. Space-separated list of devices for infer',
+            required=False,
+            nargs='+',
+            default=["CPU"])
+
+        parser.add_argument(
+            '-tt', '--target_tags', '--target-tags',
+            help='Optional. Space-separated list of launcher tags for infer',
+            required=False,
+            nargs='+')
+
+        parser.add_argument(
+            '--batch-size',
+            help='Optional. Batch size value. If not specified, the batch size value is determined from IR',
+            type=int,
+            required=False)
+
+        parser.add_argument(
+            '-ic',
+            '--benchmark_iterations_count',
+            help='Optional. Benchmark itertations count. (1000 is default)',
+            type=float,
+            required=False,
+            default=1000)
+
+        return parser
+\ No newline at end of file
diff --git a/tools/benchmark/configuration.py b/tools/benchmark/configuration.py
new file mode 100644
index 000000000..af3d6dc9c
--- /dev/null
+++ b/tools/benchmark/configuration.py
@@ -0,0 +1,64 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class Configuration:
+    def __init__(
+        self,
+        config: str,
+        model: str,
+        weights: str,
+        device: str,
+        cpu_extension: str,
+        gpu_extension: str,
+        benchmark_iterations_count: int
+    ):
+
+        self._config = config
+        self._model = model
+        self._weights = weights
+        self._device = device
+        self._cpu_extension = cpu_extension
+        self._gpu_extension = gpu_extension
+        self._benchmark_iterations_count = benchmark_iterations_count
+
+    @property
+    def config(self) -> str:
+        return self._config
+
+    @property
+    def model(self) -> str:
+        return self._model
+
+    @property
+    def weights(self) -> str:
+        return self._weights
+
+    @property
+    def device(self) -> str:
+        return self._device
+
+    @property
+    def cpu_extension(self) -> str:
+        return self._cpu_extension
+
+    @property
+    def gpu_extension(self) -> str:
+        return self._gpu_extension
+
+    @property
+    def benchmark_iterations_count(self):
+        return self._benchmark_iterations_count
+\ No newline at end of file
diff --git a/tools/benchmark/logging.py b/tools/benchmark/logging.py
new file mode 100644
index 000000000..f3fec905f
--- /dev/null
+++ b/tools/benchmark/logging.py
@@ -0,0 +1,125 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import logging
+import logging.config
+import sys
+import warnings
+
+# TODO: move to utils
+_DEFAULT_LOGGER_NAME = 'openvino.tools.benchmark'
+_DEFAULT_LOG_FILE = 'openvino.tools.benchmark.log'
+
+PRINT_INFO = logging.INFO + 5
+logging.addLevelName(PRINT_INFO, "PRINT_INFO")
+
+_LOG_LEVEL_ENVIRON = "CALIBRATION_TOOL_LOG_LEVEL"
+# _LOGGING_LEVEL = logging.getLevelName(os.environ.get(_LOG_LEVEL_ENVIRON, PRINT_INFO))
+# TODO: refactoring: remove, use original line
+_LOGGING_LEVEL = "DEBUG"
+
+
+class LoggingFormatter(logging.Formatter):
+    def format(self, record: logging.LogRecord):
+        if record.levelno == PRINT_INFO:
+            return record.msg
+        return super().format(record)
+
+
+class ConsoleHandler(logging.StreamHandler):
+    def __init__(self, default_stream=sys.stdout):
+        super().__init__(default_stream)
+        self.default_stream = default_stream
+        self.err_stream = sys.stderr
+
+    def emit(self, record):
+        if record.levelno >= logging.WARNING:
+            self.stream = self.err_stream
+        else:
+            self.stream = self.default_stream
+        super().emit(record)
+
+
+_LOGGING_CONFIGURATION = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'default': {
+            '()': LoggingFormatter,
+            'format': '%(asctime)s %(name)s %(levelname)s: %(message)s',
+            'datefmt': '%H:%M:%S'
+        },
+        'detailed': {
+            'format': '%(asctime)s %(name)s %(levelname)s: %(message)s'
+        }
+    },
+    'handlers': {
+        'console': {
+            'level': 'DEBUG',
+            '()': ConsoleHandler,
+            'formatter': 'default',
+        }
+    },
+
+    'loggers': {
+        _DEFAULT_LOGGER_NAME: {
+            'handlers': ['console'],
+            'level': _LOGGING_LEVEL,
+            'propagate': False
+        }
+    }
+}
+
+logging.config.dictConfig(_LOGGING_CONFIGURATION)
+
+_default_logger = logging.getLogger(_DEFAULT_LOGGER_NAME)
+
+
+def _warning_handler(message, category, filename, lineno):
+    s = warnings.formatwarning(message, category, filename, lineno)
+    _default_logger.warning(s)
+
+
+warnings.showwarning = _warning_handler
+
+
+def get_logger(logger_name: str):
+    if logger_name.startswith(_DEFAULT_LOGGER_NAME):
+        return _default_logger.getChild(logger_name)
+    return logging.getLogger(logger_name)
+
+
+def error(msg, *args, **kwargs):
+    _default_logger.error(msg, *args, **kwargs)
+
+
+def warning(msg, *args, raise_warning=True, **kwargs):
+    if raise_warning:
+        warnings.warn(msg)
+    else:
+        _default_logger.warning(msg, *args, **kwargs)
+
+
+def info(msg, *args, **kwargs):
+    _default_logger.info(msg, *args, **kwargs)
+
+
+def debug(msg, *args, **kwargs):
+    _default_logger.debug(msg, *args, **kwargs)
+
+
+def print_info(msg, *args, **kwargs):
+    _default_logger.log(PRINT_INFO, msg, *args, **kwargs)
diff --git a/tools/benchmark/requirements.txt b/tools/benchmark/requirements.txt
new file mode 100644
index 000000000..5e3e8ee14
--- /dev/null
+++ b/tools/benchmark/requirements.txt
@@ -0,0 +1,8 @@
+py-cpuinfo
+numpy
+progress
+pyyaml
+opencv-python
+shapely
+sklearn
+xmltodict
diff --git a/tools/calibration/README.md b/tools/calibration/README.md
new file mode 100644
index 000000000..fc55ad806
--- /dev/null
+++ b/tools/calibration/README.md
@@ -0,0 +1,33 @@
+# OpenVINO™ Calibration Python* package
+The Inference Engine `openvino.tools.calibration` Python\* package includes types to calibrate a given FP32 model so that you can run it in low-precision 8-bit integer mode while keeping the input data of this model in the original precision.  
+The package has the following dependencies:
+* `openvino.tools.accuracy_checker` package
+* `openvino.tools.benchmark` package.  
+
+Please, refer to https://docs.openvinotoolkit.org for details.
+
+## Usage
+You can use the `openvino.tools.calibration` package in a simple way:
+```Python
+import openvino.tools.calibration as calibration
+
+with calibration.CommandLineProcessor.process() as config:
+    network = calibration.Calibrator(config).run()
+    if network:
+        network.serialize(config.output_model)
+```
+### Explanation
+1. Import openvino.tools.calibration types:
+```Python
+import openvino.tools.calibration as calibration
+```
+
+2. Read configuration and process the model:
+```Python
+config = calibration.CommandLineProcessor.process()
+```
+
+3. Serialize result model:
+```Python
+network.serialize(config.output_model)
+```
+\ No newline at end of file
diff --git a/tools/calibration/__init__.py b/tools/calibration/__init__.py
new file mode 100644
index 000000000..26228284f
--- /dev/null
+++ b/tools/calibration/__init__.py
@@ -0,0 +1,34 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from .aggregated_statistics import AggregatedStatistics
+from .calibrator import Calibrator
+from .calibration_configuration import CalibrationConfiguration, CalibrationConfigurationHelper
+from .calibrator_configuration import CalibratorConfiguration
+from .calibrator_factory import CalibratorFactory
+from .command_line_reader import CommandLineReader
+from .command_line_processor import CommandLineProcessor
+
+__version__ = "0.0.1"
+__all__ = [
+    'AggregatedStatistics',
+    'Calibrator',
+    'CalibrationConfiguration',
+    'CalibrationConfigurationHelper',
+    'CalibratorConfiguration',
+    'CalibratorFactory',
+    'CommandLineReader',
+    'CommandLineProcessor'
+]
diff --git a/tools/calibration/__main__.py b/tools/calibration/__main__.py
new file mode 100644
index 000000000..500c3fc85
--- /dev/null
+++ b/tools/calibration/__main__.py
@@ -0,0 +1,79 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from argparse import ArgumentParser
+
+import openvino.tools.calibration as calibration
+import openvino.tools.utils as utils
+
+
+def calibrate():
+    config = calibration.CommandLineReader.read()
+    network = calibration.Calibrator(config).run()
+    network.serialize(config.output_model)
+
+
+def check_accuracy():
+    config = calibration.CommandLineReader.read()
+    calibrator = calibration.CalibratorFactory.create(config.precision, calibration.CalibratorConfiguration(config))
+
+    print("Collecting accuracy for {}...".format(config.model))
+    result = calibrator.infer()
+    print("Accuracy: {0:.4f}%".format(100.0 * result.metrics.accuracy))
+
+
+def collect_statistics():
+    import os
+    config = calibration.CommandLineReader.read()
+    calibrator = calibration.CalibratorFactory.create(config.precision, calibration.CalibratorConfiguration(config))
+
+    print("Collecting FP32 statistics for {}...".format(config.model))
+    fp32_result = calibrator.infer(add_outputs=True, collect_aggregated_statistics=True)
+    print("FP32 accuracy: {0:.4f}%".format(100.0 * fp32_result.metrics.accuracy))
+
+    output_model_file_path = \
+        os.path.splitext(config.model)[0] + ("_{}_statistics_without_ignored.xml".format(config.precision.lower()) if
+                                             config.ignore_layer_names else
+                                             "_{}_statistics.xml".format(config.precision.lower()))
+    output_weights_file_path = utils.Path.get_weights(output_model_file_path)
+
+    quantization_levels = \
+        calibrator.get_quantization_levels(calibration.CalibrationConfigurationHelper.read_ignore_layer_names(config))
+    statistics = fp32_result.aggregated_statistics.get_node_statistics()
+    calibrator.save(output_model_file_path, output_weights_file_path, quantization_levels, statistics)
+    print("Network with statistics was written to {}.(xml|bin) IR file".format(os.path.splitext(output_model_file_path)[0]))
+
+
+def __build_arguments_parser():
+    parser = ArgumentParser(description='Calibration Tool')
+    parser.add_argument(
+        'action',
+        help='Optional, possible values: calibrate, collect_statistics or check_accuracy',
+        nargs='?',
+        choices=('calibrate', 'collect_statistics', 'check_accuracy'))
+    return parser
+
+
+if __name__ == '__main__':
+    parser, unknown_args = __build_arguments_parser().parse_known_args()
+    if parser.action == 'calibrate':
+        calibrate()
+    elif parser.action == 'collect_statistics':
+        collect_statistics()
+    elif parser.action == 'check_accuracy':
+        check_accuracy()
+    else:
+        calibrate()
diff --git a/tools/calibration/aggregated_statistics.py b/tools/calibration/aggregated_statistics.py
new file mode 100644
index 000000000..52072c3b9
--- /dev/null
+++ b/tools/calibration/aggregated_statistics.py
@@ -0,0 +1,170 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import numpy
+import openvino.inference_engine as ie
+from .network_node_stats import NetworkNodeStats
+from .shape import Shape
+
+
+class AggregatedStatistics:
+    INDEX_MIN = 0
+    INDEX_MAX = 1
+
+    def __init__(self, result=None, ignore_layer_names: set=None, iterations_count: int = 1, dataset_size: int = 1):
+        self._ignore_layer_names = ignore_layer_names
+        self._registered_layers = None
+        self._iterations_count = iterations_count
+        self._dataset_size = dataset_size
+        self._itteration = 0
+
+        if result:
+            for inference_result in result.result:
+                self.add(network = result.network, exec_network = result.exec_network, inference_result = inference_result)
+
+    def release(self):
+        if self._registered_layers:
+            del self._registered_layers
+            self._registered_layers = None
+
+    def add(
+        self,
+        network: ie.IENetwork,
+        exec_network: ie.ExecutableNetwork,
+        inference_result
+    ):
+        '''
+        Add inference result to aggregated statistics instance
+        '''
+        layer_names = network.layers.keys()
+
+        if not self._registered_layers:
+            self._registered_layers = dict()
+            initialized = False
+        else:
+            initialized = True
+
+        # TODO: can be refactored: we are itterating by all layers (to cover input layers output) to collect statistics
+        # for inference_result in inference_results:
+        for out_layer_name in layer_names:
+            if self._ignore_layer_names and out_layer_name in self._ignore_layer_names:
+                continue
+
+            if out_layer_name in network.inputs:
+                output_blob = exec_network.requests[0].inputs[out_layer_name]
+                shape = Shape.create(network.inputs[out_layer_name].layout, output_blob.shape)
+            else:
+                # TODO: can be refactored: we are itterating by all layers (to cover input layers output) to collect statistics
+                if out_layer_name not in inference_result:
+                    continue
+                output_blob = inference_result[out_layer_name]
+                shape = Shape.create(network.outputs[out_layer_name].layout, output_blob.shape)
+
+            if not initialized:
+                # for const layers N is not equal batch size
+                # self._registered_layers[out_layer_name] = numpy.empty((shape.c, self._dataset_size, 2))
+                self._registered_layers[out_layer_name] = numpy.empty((shape.c, shape.n * self._iterations_count, 2))
+
+            if shape.layout[0] != 'C' and not (len(shape.layout) >= 2 and shape.layout[0] == 'N' and shape.layout[1] == 'C'):
+                raise ValueError("unsupported layout '{}'".format(shape.layout))
+
+            if shape.layout[0] != 'N':
+                output_blob = [output_blob]
+
+            for sample in range(0, shape.n):
+                for channel in range(0, shape.c):
+                    self.add_tensor_statistics(out_layer_name, output_blob, shape.n, sample, channel, self._itteration)
+
+        self._itteration += 1
+
+    def register_layer(self, layer_name: str):
+        if layer_name in self._registered_layers:
+            raise ValueError("layer '{}' has been added already".format(layer_name))
+
+        self._registered_layers[layer_name] = None
+
+    @property
+    def registered_layers(self):
+        return self._registered_layers
+
+    def add_tensor_statistics(self, layer_name: str, data, n: int, sample: int, channel: int, itteration: int):
+        channels = self._registered_layers[layer_name]
+
+        n_index = sample + n * itteration
+        if n_index >= channels.shape[1]:
+            channels.resize((channels.shape[0], channels.shape[1] + 1, channels.shape[2]), refcheck=False)
+
+        channels.itemset((channel, n_index, self.INDEX_MIN), data[sample][channel].min())
+        channels.itemset((channel, n_index, self.INDEX_MAX), data[sample][channel].max())
+
+    def get_number_channels(self, layer_name: str):
+        if layer_name in self._registered_layers:
+            return len(self._registered_layers[layer_name])
+        return 0
+
+    def get_data_min_max(self, layer_name: str, channel: int, threshold: float = None):
+        # take data by name
+        if layer_name in self._registered_layers:
+            layer = self._registered_layers[layer_name]
+            stats = layer[channel]
+
+            # having absolute min/max values, we can create new statistic
+            max_values = list()
+            min_values = list()
+            for tensor_statistic in stats:
+                max_values.append(tensor_statistic.item(self.INDEX_MAX))
+                min_values.append(tensor_statistic.item(self.INDEX_MIN))
+
+            # define number of elements to throw out
+            element_to_take = int(len(max_values) * threshold / 100) if threshold else len(max_values)
+            elements_to_throw = len(max_values) - element_to_take if threshold else 0
+
+            max_values.sort()
+            min_values.sort()
+
+            min = min_values[elements_to_throw]
+            max = max_values[element_to_take - 1]
+        else:
+            min = max = 0.0
+
+        return min, max
+
+    def serialize(self, json_file_path: str):
+        with open(json_file_path, 'w') as out_file:
+            json.dump(self._registered_layers, out_file)
+
+
+    def get_node_statistics(self, threshold = None):
+        net_nodes_stats = dict()
+        # go over all outputs and get aggregated statistics
+        for layer_name in self.registered_layers:
+            channels_count = self.get_number_channels(layer_name)
+
+            if layer_name not in net_nodes_stats:
+                node_stats = NetworkNodeStats(channels_count)
+                net_nodes_stats[layer_name] = node_stats
+            else:
+                node_stats = net_nodes_stats[layer_name]
+
+            for channel in range(channels_count):
+                node_stats.min_outputs[channel], node_stats.max_outputs[channel] = self.get_data_min_max(layer_name, channel, threshold)
+
+        return net_nodes_stats
+
+    def pop(self, ignore_layer_names: set):
+        for ignore_layer_name in ignore_layer_names:
+            self._registered_layers.pop(ignore_layer_name)
+\ No newline at end of file
diff --git a/tools/calibration/base_calibrator.py b/tools/calibration/base_calibrator.py
new file mode 100644
index 000000000..6a54fc490
--- /dev/null
+++ b/tools/calibration/base_calibrator.py
@@ -0,0 +1,556 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from abc import abstractmethod
+import numpy as np
+import os
+import tempfile
+from typing import Dict
+
+import openvino.inference_engine as ie
+
+from ..accuracy_checker.accuracy_checker.progress_reporters import TQDMReporter, ProgressReporter
+from ..accuracy_checker.accuracy_checker.config import ConfigReader
+from ..accuracy_checker.accuracy_checker.model_evaluator import ModelEvaluator
+
+from ..utils.network_info import NetworkInfo
+from ..utils.building.network_builder import NetworkBuilder
+from ..utils.building.layer import Layer
+
+from .logging import info, debug
+from .calibrator_configuration import CalibratorConfiguration
+from .aggregated_statistics import AggregatedStatistics
+from .nrmsd import compare_nrmsd
+from .single_layer_network import SingleLayerNetwork
+from .inference_result import InferenceResult
+from .calibration_metrics import CalibrationMetrics
+from .infer_raw_results import InferRawResults
+
+
+class MetricsCallback:
+    def __init__(self):
+        self._values = list()
+        self._latencies = list()
+
+    def callback(self, value, latency = None):
+        self._values.append(value)
+        self._latencies.append(latency)
+
+    @property
+    def values(self):
+        return self._values
+
+    @property
+    def latencies(self):
+        return self._latencies
+
+
+class DatasetCallback:
+    def __init__(
+        self,
+        network: ie.IENetwork,
+        exec_network: ie.ExecutableNetwork,
+        collect_resuls: bool = True,
+        collect_layers: set = None,
+        collect_aggregated_statistics: bool = True,
+        iterations_count: int = 1,
+        dataset_size: int = 1
+    ):
+
+        self._network = network
+        self._exec_network = exec_network
+        self._aggregated_statistics = None
+        self._iterations_count = iterations_count
+        self._dataset_size = dataset_size
+        self._collect_results = collect_resuls
+        self._collect_layers = collect_layers
+        self._collect_aggregated_statistics = collect_aggregated_statistics
+        self._infer_raw_results = InferRawResults() if collect_resuls else None
+        self._latencies = list()
+
+    def callback(self, value, latency = None):
+        if self._collect_aggregated_statistics:
+            if not self._aggregated_statistics:
+                self._aggregated_statistics = AggregatedStatistics(
+                    iterations_count = self._iterations_count,
+                    dataset_size = self._dataset_size)
+            self._aggregated_statistics.add(self._network, self._exec_network, value)
+
+        if self._collect_results:
+            if self._collect_layers:
+                collect_value = dict()
+                for layer_name in value:
+                    if layer_name in self._collect_layers:
+                        collect_value[layer_name] = value[layer_name]
+                self._infer_raw_results.add(collect_value)
+            else:
+                self._infer_raw_results.add(value)
+
+        if latency:
+            self._latencies.append(latency)
+
+    @property
+    def aggregated_statistics(self) -> AggregatedStatistics:
+        return self._aggregated_statistics
+
+    @property
+    def infer_raw_result(self) -> InferRawResults:
+        return self._infer_raw_results
+
+    @property
+    def latencies(self) -> list:
+        return self._latencies
+
+    def release(self):
+        if self._aggregated_statistics:
+            self._aggregated_statistics.release()
+        if self._infer_raw_results:
+            self._infer_raw_results.release()
+
+
+class BaseCalibrator:
+    '''
+    Base type for all calibrators
+    '''
+
+    def __init__(self, configuration: CalibratorConfiguration):
+        self._configuration = configuration
+
+        network = self.create_network()
+        self._input_layer_name = next(iter(network.inputs))
+        self._output_layer_name = next(iter(network.outputs))
+
+        self.plugin = ie.IEPlugin(self._configuration.device)
+        if self._configuration.cpu_extension and self._configuration.device == 'CPU':
+            self.plugin.add_cpu_extension(self._configuration.cpu_extension)
+        if self._configuration.gpu_extension and self._configuration.device == 'GPU':
+            self.plugin.set_config('CONFIG_FILE', self._configuration.gpu_extension)
+
+    def will_be_fused_workaround(self, layer:ie.IENetLayer, network_info:NetworkInfo=None):
+        if layer.type == "Const" or layer.type == "Tile":
+            if not network_info:
+                network_info = NetworkInfo(self._configuration.model)
+            only_expected = network_info.explore_inputs(network_info.get_layer(layer.name), ['Const', 'Tile'])
+            return only_expected, network_info
+        return False, network_info
+
+    def add_outputs(self, network:ie.IENetwork, output_layers: list=None) -> ie.IENetwork:
+        if output_layers is None:
+            output_layers = network.layers.values()
+
+        network_info = None
+        for layer in output_layers:
+            fused, network_info = self.will_be_fused_workaround(layer, network_info)
+            if not fused:
+                network.add_outputs([layer.name])
+        return network
+
+    def create_network(self) -> ie.IENetwork:
+        network = ie.IENetwork(self._configuration.model, self._configuration.weights)
+        if len(network.outputs) == 0:
+            raise ValueError("no outputs")
+        if len(network.inputs) == 0:
+            raise ValueError("no inputs")
+        return network
+
+    def create_network_for_layer(
+        self,
+        weights: str,
+        quantization_layer: ie.IENetLayer,
+        quantization_layer_info: Layer,
+        activation_layer: ie.IENetLayer
+    ):
+
+        if self.is_quantization_supported(quantization_layer.type):
+            input_layer_info = quantization_layer_info.inputs[0].layer
+
+            layers = [
+                Layer(
+                    0,
+                    "Input",
+                    input_layer_info.name,
+                    {},
+                    [],
+                    input_layer_info.outputs[0].port.dim),
+
+                Layer(
+                    1,
+                    quantization_layer.type,
+                    quantization_layer.name,
+                    quantization_layer.params,
+                    quantization_layer_info.inputs[0].port.dim,
+                    quantization_layer_info.outputs[0].port.dim,
+                    quantization_layer_info.weights,
+                    quantization_layer_info.biases)
+            ]
+
+            if activation_layer:
+                activation_layer_info = quantization_layer_info.outputs[0].layer
+                reference_output_layer_name = activation_layer_info.name
+                outputs = activation_layer_info.outputs
+                output_layer_outputs_dim = \
+                    outputs[0].port.dim if outputs else activation_layer_info.inputs[0].port.dim
+
+                layers.append(Layer(
+                    len(layers),
+                    activation_layer.type,
+                    activation_layer.name,
+                    activation_layer.params,
+                    activation_layer_info.inputs[0].port.dim,
+                    output_layer_outputs_dim))
+            else:
+                reference_output_layer_name = quantization_layer_info.name
+                output_layer_outputs_dim = quantization_layer_info.outputs[0].port.dim
+
+            layers.append(Layer(
+                len(layers),
+                "Power",
+                quantization_layer.name + "_",
+                {'power': 1.0, 'scale': 1.0, 'shift': 0.0},
+                output_layer_outputs_dim,
+                output_layer_outputs_dim))
+
+            builder = NetworkBuilder().sequential(layers)
+        else:
+            raise ValueError("unsupported layer type '{}'".format(quantization_layer.type))
+
+        # filling weights and biases
+
+        temporary_file = tempfile.NamedTemporaryFile(delete=False)
+        try:
+            builder_str = str(builder)
+            network_content = str.encode(builder_str)
+            temporary_file.write(network_content)
+            temporary_file.close()
+
+            network_for_layer_model = temporary_file.name
+            network_for_layer_weights = weights
+            network_for_layer = ie.IENetwork(network_for_layer_model, network_for_layer_weights)
+            network_for_layer.add_outputs([quantization_layer.name + "_"])
+        finally:
+            if os.path.exists(temporary_file.name):
+                temporary_file.close()
+                os.remove(temporary_file.name)
+
+        return network_for_layer, reference_output_layer_name
+
+    def save(self, model_file_path: str, weights_file_path: str, quantization_level: dict, statistics):
+        '''
+        Save calibration results.
+        '''
+
+
+        if not statistics:
+            raise ValueError("statistics is empy")
+
+        network = self.create_network()
+
+        network_stats = {}
+        for layer_name, node_statistic in statistics.items():
+            network_stats[layer_name] = ie.LayerStats(min=tuple(node_statistic.min_outputs),
+                                                      max=tuple(node_statistic.max_outputs))
+        network.stats.update(network_stats)
+
+        for layer in network.layers.values():
+            if self.is_quantization_supported(layer.type) and layer.name in quantization_level:
+                params = layer.params
+                params["quantization_level"] = quantization_level[layer.name]
+                layer.params = params
+
+        network.serialize(model_file_path, weights_file_path)
+
+    @staticmethod
+    def __parse_inputs(inputs_entry):
+        inputs = {}
+        for input_ in inputs_entry:
+            value = input_['value']
+            if isinstance(value, list):
+                value = np.array(value)
+
+            inputs[input_['name']] = value
+
+        return inputs
+
+    @staticmethod
+    def compare_result(result1, result2, output_name: str):
+        if len(result1) != len(result2):
+            return False
+
+        for index in range(len(result1)):
+            result_map1 = result1[index]
+            result_map2 = result2[index]
+
+            compare_result = result_map1[output_name] == result_map2[output_name]
+            if not compare_result.all():
+                debug('\nresult_map1={}\n'.format(result_map1[output_name]))
+                debug('\nresult_map2={}\n'.format(result_map2[output_name]))
+                return False
+        return True
+
+    def get_affected_layers(self, output_layers: list=None):
+        '''
+        CVS-14299: Linux only: IENetwork.add_outputs (Python API) [and ICNNNetwork::addOutputs (C++ API)]
+        for some layers affects network inference result
+        '''
+        affected_layers = []
+        not_affected_layers = []
+
+        layers = self.create_network().layers.values()
+        info("total layers: {}".format(len(layers)))
+
+        network = self.create_network()
+        ref_results = self._infer(network=network)
+        info("ORIGINAL: original accuracy (no additional output layers): {}".format(ref_results.metrics.accuracy))
+
+        index = 1
+        for layer in layers:
+            if layer.type == 'Input':
+                info("SKIPPED ({}/{}): layer {}/{}".format(index, len(layers), layer.name, layer.type))
+            else:
+                network = self.create_network()
+
+                tmp = not_affected_layers.copy()
+                tmp.append(layer)
+
+                self.add_outputs(network, tmp)
+                results = self._infer(network=network)
+                # if results.metrics.accuracy == 0.0:
+                if not Int8Calibrator.compare_result(ref_results.result, results.result, self._output_layer_name):
+                    affected_layers.append(layer)
+                    info("FAILED ({}/{}): output layer {}/{} affects result, accuracy: {}".format(
+                        index,
+                        len(layers),
+                        layer.name,
+                        layer.type,
+                        results.metrics.accuracy))
+                else:
+                    not_affected_layers.append(layer)
+                    info("PASSED ({}/{}): output layer {}/{}, accuracy: {}".format(
+                        index,
+                        len(layers),
+                        layer.name,
+                        layer.type,
+                        results.metrics.accuracy))
+            index += 1
+
+        return affected_layers
+
+    # TODO: add_outputs - remove, not neccessary
+    def infer(self,
+              add_outputs=False,
+              statistics=None,
+              quantization_level: dict = None,
+              collect_resuls: bool = False,
+              collect_layers: set = None,
+              collect_aggregated_statistics: bool = False,
+              network: ie.IENetwork = None,
+              collect_performance_counters: bool = False) -> InferenceResult:
+
+        if network is None:
+            network = self.create_network()
+
+        if add_outputs:
+            self.add_outputs(network)
+
+        if quantization_level:
+            for layer_name, value in quantization_level.items():
+                params = network.layers[layer_name].params
+                params["quantization_level"] = value
+                network.layers[layer_name].params = params
+
+        return self._infer(
+            network=network,
+            statistics=statistics,
+            collect_resuls=collect_resuls,
+            collect_layers=collect_layers,
+            collect_aggregated_statistics=collect_aggregated_statistics,
+            collect_performance_counters=collect_performance_counters)
+
+    def infer_single_layer_network(self,
+                                   single_layer_network: SingleLayerNetwork,
+                                   full_network_result: InferenceResult):
+        '''
+        Native infer and compare results
+        '''
+
+        if single_layer_network.input_layer_name in full_network_result:
+            input_layer_data = full_network_result[single_layer_network.input_layer_name]
+        else:
+            raise ValueError("single layer network input '{}' was not found in reference inference".format(
+                single_layer_network.input_layer_name))
+
+        single_layer_network_result = \
+            single_layer_network.exec_network.infer({single_layer_network.input_layer_name: input_layer_data})
+        if single_layer_network.output_layer_name not in single_layer_network_result:
+            raise ValueError("singld layer network output layer '{}' was not found in single"
+                             " layer inference result".format(single_layer_network.layer_name))
+        actual_result_data = single_layer_network_result[single_layer_network.output_layer_name]
+
+        if single_layer_network.reference_output_layer_name not in full_network_result:
+            raise ValueError("single layer network output layer '{}' was not found in "
+                             "full inference result".format(single_layer_network.layer_name))
+        expected_result_data = full_network_result[single_layer_network.reference_output_layer_name]
+
+        accuracy_drop = compare_nrmsd(actual_result_data, expected_result_data)
+        return accuracy_drop
+
+    def _infer(
+        self,
+        network=None,
+        statistics=None,
+        collect_aggregated_statistics: bool = True,
+        collect_resuls: bool = True,
+        collect_layers: set = None,
+        collect_performance_counters: bool = False
+    ) -> InferenceResult:
+        '''
+        Accuracy checker infer and compare results
+        '''
+        accuracy = 0.0
+
+        model = self._configuration.config['models'][0]
+        launcher_config = model['launchers'][0]
+        dataset_config = model['datasets'][0]
+
+        process_dataset_callback = None
+        model_evaluator = ModelEvaluator.from_configs(launcher_config, dataset_config)
+        try:
+            if network:
+                del model_evaluator.launcher.network
+                del model_evaluator.launcher.exec_network
+                model_evaluator.launcher.network = network
+                model_evaluator.launcher.exec_network = model_evaluator.launcher.plugin.load(network)
+
+            if collect_performance_counters:
+                model_evaluator.launcher.plugin.set_config({'PERF_COUNT': 'YES'})
+
+            if statistics:
+                network_stats = {}
+                for layer_name, node_statistic in statistics.items():
+                    network_stats[layer_name] = ie.LayerStats(min=tuple(node_statistic.min_outputs),
+                                                              max=tuple(node_statistic.max_outputs))
+                model_evaluator.launcher.network.stats.update(network_stats)
+
+            dataset_size = model_evaluator.dataset.size
+
+            if self._configuration.progress:
+                progress_reporter = ProgressReporter.provide((
+                    self._configuration.progress if ':' not in self._configuration.progress
+                    else self._configuration.progress.split(':')[0]
+                ))
+                progress_reporter.reset(len(model_evaluator.dataset))
+            else :
+                progress_reporter = None
+
+            process_dataset_callback = DatasetCallback(
+                model_evaluator.launcher.network,
+                model_evaluator.launcher.exec_network,
+                collect_resuls=collect_resuls,
+                collect_layers=collect_layers,
+                collect_aggregated_statistics=collect_aggregated_statistics,
+                iterations_count=int(dataset_size / self._configuration.batch_size),
+                dataset_size=dataset_size)
+
+            model_evaluator.process_dataset(None,
+                                            progress_reporter=progress_reporter,
+                                            output_callback=process_dataset_callback.callback)
+            if len(model_evaluator.launcher.exec_network.requests) != 1:
+                raise ValueError("unexpected network requests count")
+
+            inference_result = process_dataset_callback.infer_raw_result
+            inference_latencies = process_dataset_callback.latencies
+
+            performance_counters = \
+                model_evaluator.launcher.exec_network.requests[0].get_perf_counts() if collect_performance_counters else None
+
+            model_evaluator_callback = MetricsCallback()
+            model_evaluator.compute_metrics(output_callback=model_evaluator_callback.callback)
+            presenter_values = model_evaluator_callback.values
+            for presenter_value in presenter_values:
+                value, reference, name, threshold, meta = presenter_value
+                accuracy = np.mean(value)
+        except Exception:
+            if process_dataset_callback:
+                process_dataset_callback.release()
+            raise
+        finally:
+            model_evaluator.release()
+
+        return InferenceResult(
+            inference_result,
+            CalibrationMetrics(accuracy, np.mean(inference_latencies)) if len(inference_latencies) else CalibrationMetrics(accuracy),
+            process_dataset_callback.aggregated_statistics,
+            performance_counters)
+
+    def get_quantization_levels(self, ignore_layer_names=None) -> Dict[str, str]:
+        network = self.create_network()
+        quantization_levels = dict()
+
+        for layer in network.layers.values():
+            if self.is_quantization_supported(layer.type):
+                if ignore_layer_names and (layer.name in ignore_layer_names):
+                    quantization_levels[layer.name] = "FP32"
+                else:
+                    quantization_levels[layer.name] = "I8" if self.precision == "INT8" else self.precision
+
+        return quantization_levels
+
+    @property
+    def precision(self) -> str:
+        raise NotImplementedError()
+
+    @abstractmethod
+    def is_quantization_supported(self, layer_type: str) -> bool:
+        return NotImplementedError()
+
+    def is_activation_supported(self, layer_type: str) -> bool:
+        return layer_type.lower() == 'relu' or layer_type.lower() == 'activation' or layer_type.lower() == 'clamp'
+
+    def is_quantization_fusing_supported(self, parent_layer, child_layer):
+        if parent_layer.outputs[0].layer.id != child_layer.id:
+            # not supported fuse, let's ignore
+            return False
+
+        return self.is_quantization_supported(parent_layer.type) and \
+            len(parent_layer.outputs) == 1 and \
+            len(parent_layer.outputs[0].layer.inputs) == 1 and \
+            self.is_activation_supported(child_layer.type)
+
+    def get_quantization_layers(self) -> list:
+        collect_layers = set()
+
+        network_info = NetworkInfo(self._configuration.model)
+        previous_previous_layer = None
+        previous_layer = None
+        layer_index = 0
+        for layer in network_info.layers.values():
+            if previous_previous_layer:
+                if previous_layer and self.is_quantization_supported(previous_layer.type):
+                    if self.is_quantization_fusing_supported(previous_layer, layer):
+                        collect_layers.add(layer.name)
+                    else:
+                        collect_layers.add(previous_layer.name)
+                    collect_layers.add(previous_previous_layer.name)
+
+                if self.is_quantization_supported(layer.type) and layer_index == (len(network_info.layers) - 1):
+                    collect_layers.add(layer.name)
+                    collect_layers.add(previous_layer.name)
+
+            layer_index += 1
+            previous_previous_layer = previous_layer
+            previous_layer = layer
+
+        return collect_layers
diff --git a/tools/calibration/calibration_configuration.py b/tools/calibration/calibration_configuration.py
new file mode 100644
index 000000000..5f8620209
--- /dev/null
+++ b/tools/calibration/calibration_configuration.py
@@ -0,0 +1,150 @@
+import shutil
+from ..utils.network_info import NetworkInfo
+
+
+class CalibrationConfiguration:
+    """
+    Class for parsing input config
+    """
+    def __init__(
+        self,
+        config: str,
+        precision: str,
+        model: str,
+        weights: str,
+        tmp_directory: str,
+        output_model: str,
+        output_weights: str,
+        cpu_extension: str,
+        gpu_extension: str,
+        device: str,
+        batch_size: int,
+        threshold: float,
+        ignore_layer_types: list,
+        ignore_layer_types_path: str,
+        ignore_layer_names: list,
+        ignore_layer_names_path: str,
+        benchmark_iterations_count: int,
+        progress: str):
+
+        self._config = config
+        self._precision = precision.upper()
+        self._model = model
+        self._weights = weights
+        self._tmp_directory = tmp_directory
+        self._output_model = output_model
+        self._output_weights = output_weights
+        self._cpu_extension = cpu_extension
+        self._gpu_extension = gpu_extension
+        self._device = device
+        self._batch_size = batch_size
+        self._threshold = threshold
+        self._ignore_layer_types = ignore_layer_types
+        self._ignore_layer_types_path = ignore_layer_types_path
+        self._ignore_layer_names = ignore_layer_names
+        self._ignore_layer_names_path = ignore_layer_names_path
+        self._benchmark_iterations_count = benchmark_iterations_count
+        self._progress = progress
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.release()
+
+    def release(self):
+        if self.tmp_directory:
+            shutil.rmtree(self.tmp_directory)
+            self._tmp_directory = None
+
+    @property
+    def config(self) -> list:
+        return self._config
+
+    @property
+    def precision(self) -> str:
+        return self._precision
+
+    @property
+    def model(self) -> str:
+        return self._model
+
+    @property
+    def weights(self) -> str:
+        return self._weights
+
+    @property
+    def tmp_directory(self) -> str:
+        return self._tmp_directory
+
+    @property
+    def output_model(self) -> str:
+        return self._output_model
+
+    @property
+    def output_weights(self) -> str:
+        return self._output_weights
+
+    @property
+    def cpu_extension(self) -> str:
+        return self._cpu_extension
+
+    @property
+    def gpu_extension(self) -> str:
+        return self._gpu_extension
+
+    @property
+    def device(self) -> str:
+        return self._device
+
+    @property
+    def batch_size(self) -> int:
+        return self._batch_size
+
+    @property
+    def threshold(self) -> int:
+        return self._threshold
+
+    @property
+    def ignore_layer_types(self):
+        return self._ignore_layer_types
+
+    @property
+    def ignore_layer_types_path(self) -> str:
+        return self._ignore_layer_types_path
+
+    @property
+    def ignore_layer_names(self):
+        return self._ignore_layer_names
+
+    @property
+    def ignore_layer_names_path(self) -> str:
+        return self._ignore_layer_names_path
+
+    @property
+    def benchmark_iterations_count(self) -> int:
+        return self._benchmark_iterations_count
+
+    @property
+    def progress(self) -> str:
+        return self._progress
+
+
+class CalibrationConfigurationHelper:
+    @staticmethod
+    def read_ignore_layer_names(configuration: CalibrationConfiguration):
+        ignore_layer_types = configuration.ignore_layer_types
+
+        if configuration.ignore_layer_types_path:
+            ignore_layer_types_file = open(configuration.ignore_layer_types_path, 'r')
+            ignore_layer_types_from_file = [line.strip() for line in ignore_layer_types_file.readlines()]
+            ignore_layer_types.extend(ignore_layer_types_from_file)
+
+        ignore_layer_names = NetworkInfo(configuration.model).get_layer_names(layer_types=ignore_layer_types)
+
+        if configuration.ignore_layer_names_path:
+            ignore_layer_names_file = open(configuration.ignore_layer_names_path, 'r')
+            ignore_layer_names_from_file = [line.strip() for line in ignore_layer_names_file.readlines()]
+            ignore_layer_names.extend(ignore_layer_names_from_file)
+
+        return ignore_layer_names
diff --git a/tools/calibration/calibration_metrics.py b/tools/calibration/calibration_metrics.py
new file mode 100644
index 000000000..c156e0cb6
--- /dev/null
+++ b/tools/calibration/calibration_metrics.py
@@ -0,0 +1,30 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class CalibrationMetrics:
+    def __init__(self, accuracy: float, latency: float = None):
+        self._accuracy = accuracy
+        self._latency = latency
+
+    @property
+    def accuracy(self):
+        return self._accuracy
+
+    # TODO: remove: use benchmark instead
+    @property
+    def latency(self):
+        return self._latency
+\ No newline at end of file
diff --git a/tools/calibration/calibrator.py b/tools/calibration/calibrator.py
new file mode 100644
index 000000000..5e5d252c3
--- /dev/null
+++ b/tools/calibration/calibrator.py
@@ -0,0 +1,255 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import os
+import platform
+
+from ..utils.network_info import NetworkInfo
+
+from ..benchmark.benchmark import Benchmark
+from ..network import Network
+
+from .logging import info, debug, info_performance_counters, info_layer_accuracy_drop
+from .calibrator_configuration import CalibratorConfiguration
+from .calibrator_factory import CalibratorFactory
+from .calibration_configuration import CalibrationConfiguration, CalibrationConfigurationHelper
+from .layer_accuracy_drop.collector_by_layer import CollectorByLayer
+
+class Calibrator:
+    def __init__(self, configuration: CalibrationConfiguration):
+        if configuration is None:
+            raise ValueError("configuration is None")
+
+        self._configuration = configuration
+
+    def run(self) -> Network:
+        calibrator = CalibratorFactory.create(
+            self._configuration.precision,
+            CalibratorConfiguration(self._configuration))
+        benchmark = Benchmark(self._configuration)
+
+        info("Processor: {}".format(platform.processor()))
+
+        info("Collecting FP32 statistics for {}...".format(self._configuration.model))
+        fp32_result = calibrator.infer(
+            add_outputs=True,
+            collect_aggregated_statistics=True,
+            collect_performance_counters=True)
+        fp32_accuracy = fp32_result.metrics.accuracy
+        fp32_latency = benchmark.run(iterations_count=self._configuration.benchmark_iterations_count).latency
+        info("FP32 accuracy: {0:.4f}%, latency: {1:0.4f} ms".format(100.0 * fp32_accuracy, 1000 * fp32_latency))
+
+        info("FP32 performance counters:\n")
+        info_performance_counters(fp32_result.performance_counters)
+
+        ignore_layer_names = CalibrationConfigurationHelper.read_ignore_layer_names(self._configuration)
+        fp32_result.aggregated_statistics.pop(ignore_layer_names=ignore_layer_names)
+        fp32_aggregated_statistics = fp32_result.aggregated_statistics
+        fp32_result = None
+
+        info("Verification of network accuracy if all possible layers converted to {}\n".format(
+		    self._configuration.precision))
+
+        best_lp_accuracy = None
+        best_lp_latency = 0.0
+        best_lp_threshold = 100.0
+        best_lp_statistics = None
+        best_lp_performance_counters = None
+
+        threshold = 100.0
+        threshold_low_boundary = 95.0
+        threshold_step = .5
+
+        quantization_levels = calibrator.get_quantization_levels(ignore_layer_names)
+
+        min_accuracy_drop = None
+        while threshold >= threshold_low_boundary:
+            info("Validate {} accuracy, threshold for activation statistics: {}%".format(
+                self._configuration.precision,
+                threshold))
+
+            lp_statistics = fp32_aggregated_statistics.get_node_statistics(threshold)
+            with Network.reload(
+                model_path=self._configuration.model,
+                statistics=lp_statistics,
+                quantization_levels=quantization_levels,
+                batch_size=self._configuration.batch_size
+            ) as reloaded_network:
+
+                with calibrator.infer(network=reloaded_network.ie_network,
+                                      collect_performance_counters=True) as lp_result:
+                    lp_accuracy = lp_result.metrics.accuracy
+                    lp_performance_counters = lp_result.performance_counters
+                    lp_latency = benchmark.run(
+                        network=reloaded_network,
+                        iterations_count=self._configuration.benchmark_iterations_count).latency
+
+            if best_lp_accuracy is None or lp_accuracy > best_lp_accuracy:
+
+                best_lp_accuracy = lp_accuracy
+                best_lp_latency = lp_latency
+                best_lp_threshold = threshold
+                if best_lp_statistics:
+                    del best_lp_statistics
+                best_lp_statistics = lp_statistics
+                best_lp_performance_counters = lp_performance_counters
+            else:
+                del lp_statistics
+
+            min_accuracy_drop = fp32_accuracy - lp_accuracy if min_accuracy_drop is None else min(
+                min_accuracy_drop,
+                fp32_accuracy - lp_accuracy)
+
+            info("{0} accuracy is {1:.4f}%, latency: {2:0.4f} ms\n".format(
+                self._configuration.precision,
+                100.0 * lp_accuracy,
+                1000.0 * lp_latency))
+            threshold = threshold - threshold_step
+
+
+        info("Best {0} accuracy is {1:.4f}%, latency: {2:0.4f} ms for threshold {3}%".format(
+            self._configuration.precision,
+            100.0 * best_lp_accuracy,
+            1000.0 * best_lp_latency,
+            best_lp_threshold))
+
+        info("{} performance counters:\n".format(self._configuration.precision))
+        info_performance_counters(best_lp_performance_counters)
+
+        accuracy_was_satisfied = False
+        if (fp32_accuracy - best_lp_accuracy) > (self._configuration.threshold / 100):
+            info("Accuracy of all layers conversion does not correspond to the required threshold")
+            info(("FP32 Accuracy: {0:.4f}% (latency: {1:0.4f} ms) vs all low precision layers accuracy: {2:.4f}% "
+                  "(latency: {3:0.4f} ms), threshold for activation statistics: {4}%").format(100.0 * fp32_accuracy,
+                                                                                              1000.0 * fp32_latency,
+                                                                                              100.0 * best_lp_accuracy,
+                                                                                              1000.0 * best_lp_latency,
+                                                                                              best_lp_threshold))
+
+            info("Collecting all raw FP32 results")
+
+            quantization_layers = calibrator.get_quantization_layers()
+            debug("{} layers (total {}) are selected to cache".format(
+                len(quantization_layers),
+                len(NetworkInfo(self._configuration.model).layers)))
+
+            with calibrator.infer(add_outputs=True,
+                                  collect_resuls=True,
+                                  collect_layers=quantization_layers) as fp32_result_with_raw_data:
+                info("Collecting intermediate per-layer accuracy drop")
+                layers_accuracy_drop = CollectorByLayer(
+                    self._configuration,
+                    calibrator.plugin,
+                    calibrator).collect(best_lp_statistics, fp32_result_with_raw_data)
+
+                info("Layer accuracy drop:\n")
+                info_layer_accuracy_drop(layers_accuracy_drop)
+
+            if layers_accuracy_drop:
+                info("Starting to reduce number of layers being converted to Int8")
+
+                for layer_accuracy_drop in layers_accuracy_drop:
+                    info("Returning of '{}' to FP32 precision, start validation".format(layer_accuracy_drop.layer_name))
+                    quantization_levels[layer_accuracy_drop.layer_name] = "FP32"
+
+                    with Network.reload(
+                        self._configuration.model,
+                        statistics=best_lp_statistics,
+                        quantization_levels=quantization_levels,
+                        batch_size=self._configuration.batch_size
+                    ) as reloaded_network:
+
+                        with calibrator.infer(network=reloaded_network.ie_network) as layer_int8_result:
+                            best_lp_accuracy = layer_int8_result.metrics.accuracy
+                            best_lp_latency = benchmark.run(
+                                network=reloaded_network,
+                                iterations_count=self._configuration.benchmark_iterations_count).latency
+
+                    accuracy_drop = fp32_accuracy - best_lp_accuracy
+                    min_accuracy_drop = accuracy_drop if min_accuracy_drop is None else min(min_accuracy_drop,
+                                                                                            accuracy_drop)
+                    if accuracy_drop > (self._configuration.threshold / 100.0):
+                        info("Was not achieved: FP32 accuracy: {0:.4f}% (latency: {1:.4} ms) VS {2} accuracy: {3:.4f}% "
+                             "(latency {4:.4f} ms), accuracy drop {5:.4f}%".format(100.0 * fp32_accuracy,
+                                                                                   1000.0 * fp32_latency,
+                                                                                   self._configuration.precision,
+                                                                                   100.0 * best_lp_accuracy,
+                                                                                   1000.0 * best_lp_latency,
+                                                                                   100.0 * accuracy_drop))
+                    else:
+                        accuracy_was_satisfied = True
+                        info("Achieved: FP32 accuracy: {0:.4f}% (latency: {1:.4} ms) VS {2} accuracy: {3:.4}% "
+                             "(latency: {4:.4} ms), accuracy drop {5:.4}%".format(100.0 * fp32_accuracy,
+                                                                                  1000.0 * fp32_latency,
+                                                                                  self._configuration.precision,
+                                                                                  100.0 * best_lp_accuracy,
+                                                                                  1000.0 * best_lp_latency,
+                                                                                  100.0 * accuracy_drop))
+                        break
+            else:
+                info("No layers to reduce number of converted to Int8")
+
+        else:
+            accuracy_was_satisfied = True
+
+        if accuracy_was_satisfied:
+            info("Achieved required accuracy drop satisfying threshold")
+            info("FP32 accuracy: {0:.4f}% (latency: {1:.4} ms) vs current low precision configuration accuracy: "
+                 "{2:.4f}% (latency: {3:.4} ms) with threshold for activation statistic: {4}%".format(
+                     100.0 * fp32_accuracy,
+                     1000.0 * fp32_latency,
+                     100.0 * best_lp_accuracy,
+                     1000.0 * best_lp_latency,
+                     best_lp_threshold))
+
+            quantized_layers_count = 0
+            for quantization_level in quantization_levels.values():
+                if quantization_level != "FP32":
+                    quantized_layers_count += 1
+            info("quantized layers (quantized {}, total {} layers):".format(
+                quantized_layers_count,
+                len(quantization_levels)))
+
+            layers_message = "FP32 layers:\n"
+            for layer_name, quantization_level in quantization_levels.items():
+                if quantization_level == "FP32":
+                    layers_message += "\tlayer '{}': {}\n".format(layer_name, quantization_level)
+            info(layers_message)
+
+            layers_message = "{} layers:\n".format(self._configuration.precision)
+            for layer_name, quantization_level in quantization_levels.items():
+                if quantization_level != "FP32":
+                    layers_message += "\tlayer '{}': {}\n".format(layer_name, quantization_level)
+            info(layers_message)
+
+            info("Write calibrated network to {}.(xml|bin) IR file".format(
+                os.path.splitext(self._configuration.output_model)[0]))
+
+            calibrator.save(
+                self._configuration.output_model,
+                self._configuration.output_weights,
+                quantization_levels,
+                best_lp_statistics)
+
+            # TODO: need to load from hard drive while not fixed
+            output_network = Network(self._configuration.output_model, self._configuration.output_weights)
+            return output_network
+        else:
+            info("Required threshold of accuracy drop cannot be achieved with any {0} quantization. Minimal accuracy "
+                 "drop: {1:0.4%}".format(self._configuration.precision, min_accuracy_drop))
+
+            return None
diff --git a/tools/calibration/calibrator_configuration.py b/tools/calibration/calibrator_configuration.py
new file mode 100644
index 000000000..2126711e8
--- /dev/null
+++ b/tools/calibration/calibrator_configuration.py
@@ -0,0 +1,66 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import collections
+
+
+class CalibratorConfiguration:
+    def __init__(self, configuration):
+        self._config = configuration.config
+        self._model = configuration.model
+        self._weights = configuration.weights
+        self._device = configuration.device
+        self._cpu_extension = configuration.cpu_extension
+        self._gpu_extension = configuration.gpu_extension
+        self._threshold = configuration.threshold
+        self._batch_size = configuration.batch_size
+        self._progress = configuration.progress
+
+    @property
+    def config(self) -> str:
+        return self._config
+
+    @property
+    def model(self) -> str:
+        return self._model
+
+    @property
+    def weights(self) -> str:
+        return self._weights
+
+    @property
+    def device(self) -> str:
+        return self._device
+
+    @property
+    def cpu_extension(self) -> str:
+        return self._cpu_extension
+
+    @property
+    def gpu_extension(self) -> str:
+        return self._gpu_extension
+
+    @property
+    def threshold(self) -> str:
+        return self._threshold
+
+    @property
+    def batch_size(self) -> int:
+        return self._batch_size
+
+    @property
+    def progress(self) -> str:
+        return self._progress
diff --git a/tools/calibration/calibrator_factory.py b/tools/calibration/calibrator_factory.py
new file mode 100644
index 000000000..5d16cc3a6
--- /dev/null
+++ b/tools/calibration/calibrator_factory.py
@@ -0,0 +1,31 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .calibrator_configuration import CalibratorConfiguration
+from .int8_calibrator import Int8Calibrator
+from .fp16_calibrator import Fp16Calibrator
+
+
+class CalibratorFactory:
+    @staticmethod
+    def create(precision: str, configuration: CalibratorConfiguration):
+        if precision.lower() == "int8":
+            return Int8Calibrator(configuration)
+
+        if precision.lower() == "fp16":
+            return Fp16Calibrator(configuration)
+
+        raise ValueError("not supported precision '{}'".format(precision))
diff --git a/tools/calibration/command_line_processor.py b/tools/calibration/command_line_processor.py
new file mode 100644
index 000000000..b300aaa40
--- /dev/null
+++ b/tools/calibration/command_line_processor.py
@@ -0,0 +1,142 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import tempfile
+
+from ..accuracy_checker.accuracy_checker.config import ConfigReader
+from ..accuracy_checker.accuracy_checker.launcher.dlsdk_launcher import DLSDKLauncher
+
+from ..network import Network
+from ..utils.path import Path
+from ..utils.configuration_filter import ConfigurationFilter
+from .calibration_configuration import CalibrationConfiguration
+from .logging import info, default_logger
+from .command_line_reader import CommandLineReader
+
+
+class CommandLineProcessor:
+    """
+    Class for parsing user input config
+    """
+    @staticmethod
+    def process() -> CalibrationConfiguration:
+        args, unknown_args = CommandLineReader.parser().parse_known_args()
+        if unknown_args:
+            info("unknown command line arguments: {0}".format(unknown_args))
+
+        args.target_framework = "dlsdk"
+        args.aocl = None
+
+        merged_config = ConfigReader.merge(args)
+        updated_config = ConfigurationFilter.filter(merged_config, args.metric_name, args.metric_type, default_logger)
+
+        if len(updated_config['models']) > 1:
+            raise ValueError("too much models")
+
+        if len(updated_config['models'][0]['launchers']) > 1:
+            raise ValueError("too much launchers")
+
+        launcher = updated_config['models'][0]['launchers'][0]
+        if 'caffe_model' in launcher or 'tf_model' in launcher or 'mxnet_weights' in launcher:
+            if args.converted_models:
+                tmp_directory = None
+            else:
+                tmp_directory = tempfile.mkdtemp(".converted_models")
+                launcher['mo_params']['output_dir'] = tmp_directory
+
+            if 'caffe_model' in launcher:
+                framework = 'caffe'
+                output_model = Path.get_model(
+                    str(launcher['caffe_model']),
+                    "_i8",
+                    str(args.output_dir) if args.output_dir else None)
+                output_weights = Path.get_weights(
+                    str(launcher['caffe_weights']),
+                    "_i8",
+                    str(args.output_dir) if args.output_dir else None)
+            elif 'tf_model' in launcher:
+                framework = 'tf'
+                output_model = Path.get_model(
+                    str(launcher['tf_model']),
+                    "_i8",
+                    str(args.output_dir) if args.output_dir else None)
+                output_weights = Path.get_weights(
+                    str(launcher['tf_model']),
+                    "_i8",
+                    str(args.output_dir) if args.output_dir else None)
+            elif 'mxnet_weights' in launcher:
+                framework = 'mxnet'
+                output_model = Path.get_model(
+                    str(launcher['mxnet_weights']),
+                    "_i8",
+                    str(args.output_dir) if args.output_dir else None)
+                output_weights = Path.get_weights(
+                    str(launcher['mxnet_weights']),
+                    "_i8",
+                    str(args.output_dir) if args.output_dir else None)
+            else:
+                raise ValueError("unknown model framework")
+
+            model, weights = DLSDKLauncher.convert_model(launcher, framework)
+            launcher['model'] = model
+            launcher['weights'] = weights
+
+            launcher.pop('caffe_model', None)
+            launcher.pop('caffe_weights', None)
+            launcher.pop('tf_model', None)
+            launcher.pop('mxnet_weights', None)
+        else:
+            model = launcher['model']
+            output_model = Path.get_model(str(model), "_i8", str(args.output_dir) if args.output_dir else None)
+            weights = launcher['weights']
+            output_weights = Path.get_weights(str(weights), "_i8", str(args.output_dir) if args.output_dir else None)
+            tmp_directory = None
+
+        batch_size = args.batch_size if args.batch_size else (launcher['batch'] if 'batch' in launcher else None)
+        if not batch_size:
+            with Network(str(launcher['model']), str(launcher['weights'])) as network:
+                batch_size = network.ie_network.batch_size
+
+        if 'cpu_extensions' in launcher:
+            cpu_extension = DLSDKLauncher.get_cpu_extension(launcher['cpu_extensions'], args.cpu_extensions_mode)
+            launcher['cpu_extensions'] = cpu_extension
+        else:
+            cpu_extension = None
+
+        if not args.calibrate_fully_connected:
+            if args.ignore_layer_types is None:
+                args.ignore_layer_types = []
+            args.ignore_layer_types.append("FullyConnected")
+
+        return CalibrationConfiguration(
+            config=updated_config,
+            precision=args.precision,
+            model=str(model),
+            weights=str(weights),
+            tmp_directory=tmp_directory,
+            output_model=output_model,
+            output_weights=output_weights,
+            cpu_extension=str(cpu_extension) if cpu_extension else None,
+            gpu_extension=str(launcher['gpu_extensions']) if 'gpu_extensions' in launcher else None,
+            device=launcher['device'],
+            batch_size=batch_size,
+            threshold=args.threshold,
+            ignore_layer_types=args.ignore_layer_types,
+            ignore_layer_types_path=args.ignore_layer_types_path,
+            ignore_layer_names=args.ignore_layer_names,
+            ignore_layer_names_path=args.ignore_layer_names_path,
+            benchmark_iterations_count=args.benchmark_iterations_count,
+            progress=(None if args.progress == 'None' else args.progress))
+\ No newline at end of file
diff --git a/tools/calibration/command_line_reader.py b/tools/calibration/command_line_reader.py
new file mode 100644
index 000000000..e9700c5e7
--- /dev/null
+++ b/tools/calibration/command_line_reader.py
@@ -0,0 +1,209 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pathlib
+from functools import partial
+from argparse import ArgumentParser
+
+from ..accuracy_checker.accuracy_checker.utils import get_path
+
+
+class CommandLineReader:
+    @staticmethod
+    def parser():
+        parser = ArgumentParser(description='openvino.tools.calibration')
+
+        parser.add_argument(
+            '-d', '--definitions',
+            help='Optional. Path to the YML file with definitions',
+            type=str,
+            required=False)
+
+        parser.add_argument(
+            '-c', '--config',
+            help='Required. Path to the YML file with local configuration',
+            type=get_path,
+            required=True)
+
+        parser.add_argument(
+            '-m', '--models',
+            help='Optional. Prefix path to the models and weights',
+            type=partial(get_path, is_directory=True),
+            default=pathlib.Path.cwd(),
+            required=False)
+
+        parser.add_argument(
+            '-s', '--source',
+            help='Optional. Prefix path to the data source',
+            type=partial(get_path, is_directory=True),
+            default=pathlib.Path.cwd(),
+            required=False)
+
+        parser.add_argument(
+            '-a', '--annotations',
+            help='Optional. Prefix path to the converted annotations and datasets meta data',
+            type=partial(get_path, is_directory=True),
+            default=pathlib.Path.cwd(),
+            required=False)
+
+        parser.add_argument(
+            '-e', '--extensions',
+            help='Optional. Prefix path to extensions folder',
+            type=partial(get_path, is_directory=True),
+            default=pathlib.Path.cwd(),
+            required=False)
+
+        parser.add_argument(
+            '--cpu_extensions_mode', '--cpu-extensions-mode',
+            help='Optional. specified preferable set of processor instruction for automatic searching cpu extension lib',
+            required=False,
+            choices=['avx2', 'sse4'])
+
+        parser.add_argument(
+            '-C', '--converted_models', '--converted-models',
+            help='Optional. Directory to store Model Optimizer converted models. Used for DLSDK launcher only',
+            type=partial(get_path, is_directory=True),
+            required=False
+        )
+
+        parser.add_argument(
+            '-M', '--model_optimizer', '--model-optimizer',
+            help='Optional. Path to model optimizer caffe directory',
+            type=partial(get_path, is_directory=True),
+            # there is no default value because if user did not specify it we use specific locations
+            # defined in model_conversion.py
+            required=False
+        )
+
+        parser.add_argument(
+            '--tf_custom_op_config_dir', '--tf-custom-op-config-dir',
+            help='Optional. Path to directory with tensorflow custom operation configuration files for model optimizer',
+            type=partial(get_path, is_directory=True),
+            # there is no default value because if user did not specify it we use specific location
+            # defined in model_conversion.py
+            required=False
+        )
+
+        parser.add_argument(
+            '--tf_obj_detection_api_pipeline_config_path', '--tf-obj-detection-api-pipeline-config-path',
+            help='Optional. Path to directory with tensorflow object detection api pipeline configuration files for model optimizer',
+            type=partial(get_path, is_directory=True),
+            # there is no default value because if user did not specify it we use specific location
+            # defined in model_conversion.py
+            required=False
+        )
+
+        parser.add_argument(
+            '--progress',
+            help='Optional. Progress reporter',
+            required=False,
+            default='bar')
+
+        parser.add_argument(
+            '-td', '--target_devices', '--target-devices',
+            help='Optional. Space-separated list of devices for infer',
+            required=False,
+            nargs='+',
+            default=["CPU"]
+        )
+
+        parser.add_argument(
+            '-tt', '--target_tags', '--target-tags',
+            help='Optional. Space-separated list of launcher tags for infer',
+            required=False,
+            nargs='+')
+
+        parser.add_argument(
+            '-p',
+            '--precision',
+            help='Optional. Precision to calibrate. Default value is INT8',
+            type=str,
+            required=False,
+            default='INT8')
+
+        parser.add_argument(
+            '--ignore_layer_types', '--ignore-layer-types',
+            help='Optional. Layer types list which will be skipped during quantization',
+            type=str,
+            required=False,
+            nargs='+')
+
+        parser.add_argument(
+            '--ignore_layer_types_path', '--ignore-layer-types-path',
+            help='Optional. Ignore layer types file path',
+            type=str,
+            required=False,
+            nargs='+')
+
+        parser.add_argument(
+            '--ignore_layer_names', '--ignore-layer-names',
+            help='Optional. Layer names list which will be skipped during quantization',
+            type=str,
+            required=False,
+            nargs='+')
+
+        parser.add_argument(
+            '--ignore_layer_names_path', '--ignore-layer-names-path',
+            help='Optional. Ignore layer names file path',
+            type=str,
+            required=False)
+
+        parser.add_argument(
+            '--batch_size', '--batch-size',
+            help='Optional. Batch size value. If not specified, the batch size value is determined from IR',
+            type=int,
+            required=False)
+
+        parser.add_argument(
+            '-th', '--threshold',
+            help='Optional. Accuracy drop of quantized model should not exceed this threshold. '
+                 'Should be pointer in percents without percent sign. (1%% is default)',
+            type=float,
+            required=False,
+            default=1.0)
+
+        parser.add_argument(
+            '-ic', '--benchmark_iterations_count', '--benchmark-iterations-count',
+            help='Optional. Benchmark itertations count. (1000 is default)',
+            type=int,
+            required=False,
+            default=1000)
+
+        parser.add_argument(
+            '-mn', '--metric_name', '--metric-name',
+            help='Optional. Metric name used during calibration',
+            type=str,
+            required=False)
+
+        parser.add_argument(
+            '-mt', '--metric_type', '--metric-type',
+            help='Optional. Metric type used during calibration',
+            type=str,
+            required=False)
+
+        parser.add_argument(
+            '-o', '--output_dir', '--output-dir',
+            help='Optional. Directory to store converted models. Original model directory is used if not defined',
+            type=partial(get_path, is_directory=True),
+            required=False)
+
+        parser.add_argument(
+            '-cfc', '--calibrate_fully_connected', '--calibrate-fully-connected',
+            help='Optional. FullyConnected INT8 convertion support (False is default)',
+            action="store_true",
+            required=False)
+
+        return parser
diff --git a/tools/calibration/fp16_calibrator.py b/tools/calibration/fp16_calibrator.py
new file mode 100644
index 000000000..030076b02
--- /dev/null
+++ b/tools/calibration/fp16_calibrator.py
@@ -0,0 +1,31 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .base_calibrator import BaseCalibrator
+from .calibrator_configuration import CalibratorConfiguration
+
+
+# TODO: not comlpeted. Some methods will be moved from Calibrator and customized to FP16
+class Fp16Calibrator(BaseCalibrator):
+    def __init__(self, configuration: CalibratorConfiguration):
+        pass
+
+    @property
+    def precision(self):
+        return "FP32"
+
+    def is_quantization_supported(self, layer_type: str) -> bool:
+        return layer_type.lower() == "convolution" or layer_type.lower() == "fullyconnected"
diff --git a/tools/calibration/infer_raw_results.py b/tools/calibration/infer_raw_results.py
new file mode 100644
index 000000000..b2f565580
--- /dev/null
+++ b/tools/calibration/infer_raw_results.py
@@ -0,0 +1,72 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+import numpy
+import pickle
+import shutil
+import tempfile
+from typing import Dict
+
+
+class InferRawResults:
+    def __init__(self):
+        self._size = 0
+        self._index = 0
+        self._dir_path = None
+        pass
+
+    def release(self):
+        if self._dir_path:
+            shutil.rmtree(self._dir_path)
+            self._dir_path = None
+
+    def __iter__(self):
+        self._index = 0
+        return self
+
+    def __next__(self):
+        if self._index < self._size:
+            file_path = os.path.join(self._dir_path, str(self._index))
+            self._index += 1
+
+            f = open(file_path, "rb")
+            try:
+                loaded_value = pickle.load(f)
+            finally:
+                f.close()
+            return loaded_value
+        else:
+            raise StopIteration
+
+    def size(self):
+        return self._size
+
+    def add(self, value: Dict[str, numpy.ndarray]):
+        if self._dir_path is None:
+            self._dir_path = tempfile.mkdtemp("__infer_raw_results")
+            if not os.path.exists(self._dir_path):
+                os.makedirs(self._dir_path)
+
+        file_path = os.path.join(self._dir_path, str(self._size))
+
+        f = open(file_path, "wb")
+        try:
+            pickle.dump(value, f)
+        finally:
+            f.close()
+
+        self._size += 1
diff --git a/tools/calibration/inference_result.py b/tools/calibration/inference_result.py
new file mode 100644
index 000000000..65d8e94c1
--- /dev/null
+++ b/tools/calibration/inference_result.py
@@ -0,0 +1,85 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .aggregated_statistics import AggregatedStatistics
+from .calibration_metrics import CalibrationMetrics
+from .infer_raw_results import InferRawResults
+
+
+class InferenceResult:
+    def __init__(self,
+                 result: InferRawResults,
+                 metrics: CalibrationMetrics,
+                 aggregated_statistics: AggregatedStatistics,
+                 performance_counters: dict):
+        self._result = result
+        self._metrics = metrics
+        self._aggregated_statistics = aggregated_statistics
+        self._performance_counters = performance_counters
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, tb):
+        self.release()
+
+    def release(self):
+        if self._result:
+            self._result.release()
+            self._result = None
+
+    @property
+    def result(self) -> InferRawResults:
+        return self._result
+
+    @property
+    def metrics(self) -> CalibrationMetrics:
+        return self._metrics
+
+    @property
+    def aggregated_statistics(self) -> AggregatedStatistics:
+        return self._aggregated_statistics
+
+    @property
+    def performance_counters(self) -> dict:
+        return self._performance_counters
+
+    def get_class_ids(self, output_layer_name: str) -> list:
+        '''
+        Return class identifier list for classification networks
+        '''
+
+        result_classes_id_list = list()
+        for layers_result in self._result:
+            if output_layer_name not in layers_result:
+                raise KeyError("layer '{}' is not included int results".format(output_layer_name))
+
+            layer_result = layers_result[output_layer_name]
+            if layer_result.size == 0:
+                raise ValueError("result array is empty")
+
+            max_value = layer_result.item(0)
+            max_class_id = 0
+
+            for class_id in range(layer_result.size):
+                value = layer_result.item(class_id)
+                if value > max_value:
+                    max_value = value
+                    max_class_id = class_id
+
+            result_classes_id_list.append(max_class_id)
+
+        return result_classes_id_list
diff --git a/tools/calibration/int8_calibrator.py b/tools/calibration/int8_calibrator.py
new file mode 100644
index 000000000..b9e0a1665
--- /dev/null
+++ b/tools/calibration/int8_calibrator.py
@@ -0,0 +1,34 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .base_calibrator import BaseCalibrator
+from .calibrator_configuration import CalibratorConfiguration
+
+
+# TODO: not comlpeted. Some methods will be moved from Calibrator and customized to INT8
+class Int8Calibrator(BaseCalibrator):
+    '''
+    INT8 calibrator
+    '''
+    def __init__(self, configuration: CalibratorConfiguration):
+        super().__init__(configuration)
+
+    @property
+    def precision(self):
+        return "INT8"
+
+    def is_quantization_supported(self, layer_type: str) -> bool:
+        return layer_type.lower() == "convolution" or layer_type.lower() == "fullyconnected"
diff --git a/tools/calibration/layer_accuracy_drop/__init__.py b/tools/calibration/layer_accuracy_drop/__init__.py
new file mode 100644
index 000000000..9ec5df416
--- /dev/null
+++ b/tools/calibration/layer_accuracy_drop/__init__.py
@@ -0,0 +1,21 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from .collector_by_layer import CollectorByLayer
+
+__version__ = "0.0.1"
+__all__ = [
+    'CollectorByLayer'
+]
diff --git a/tools/calibration/layer_accuracy_drop/collector_by_image.py b/tools/calibration/layer_accuracy_drop/collector_by_image.py
new file mode 100644
index 000000000..787c7bf69
--- /dev/null
+++ b/tools/calibration/layer_accuracy_drop/collector_by_image.py
@@ -0,0 +1,128 @@
+import openvino.inference_engine as ie
+
+from ...utils.network_info import NetworkInfo
+from ...network import Network
+
+from ..layer_accuracy_drop_info import LayerAccuracyDropInfo
+from ..logging import debug
+from ..single_layer_network import SingleLayerNetwork
+from ..inference_result import InferenceResult
+
+
+class CollectorByImage:
+    def __init__(self, configuration, plugin, normalizer):
+        self._configuration = configuration
+        self._plugin = plugin
+        self._normalizer = normalizer
+
+    def _create_single_layer_networks(self, stat):
+        '''
+        Method get layers which can be quantized and affect on final accuracy. Separate network is created for each layer.
+        '''
+        network = ie.IENetwork(self._configuration.model, self._configuration.weights)
+        # if self._configuration.batch_size:
+        #     # need to use reshape API
+        #     network.batch_size = self._configuration.batch_size
+
+        try:
+            network_info = NetworkInfo(self._configuration.model)
+
+            # CVS-14302: IE Network INT8 Normalizer: scale factor calculation is incorrect
+            # for layer_name, layer_statistics in stat.items():
+            #     layer_info = network_info.get_layer(layer_name)
+            #     if layer_info.type == 'Convolution' and \
+            #         layer_info.outputs and \
+            #         layer_info.outputs[0].layer.type == 'ReLU' and \
+            #         layer_info.outputs[0].layer.outputs[0] and \
+            #         len(layer_statistics.max_outputs) > len(stat[layer_info.outputs[0].layer.name].max_outputs):
+
+            #         relu_max_outputs = stat[layer_info.outputs[0].layer.name].max_outputs
+            #         relu_min_outputs = stat[layer_info.outputs[0].layer.name].min_outputs
+
+            #         while len(layer_statistics.max_outputs) > len(relu_max_outputs):
+            #             relu_max_outputs.append(relu_max_outputs[-1])
+            #             relu_min_outputs.append(relu_min_outputs[-1])
+
+            single_layer_networks = dict()
+
+            layer_index = 1
+            for layer_to_clone in network.layers.values():
+                layer_to_clone_info = network_info.get_layer(layer_to_clone.name)
+                if not self._normalizer.is_quantization_supported(layer_to_clone.type) or \
+                        len(layer_to_clone_info.outputs) != 1 or \
+                        len(layer_to_clone_info.outputs[0].layer.inputs != 1):
+                    continue
+
+                activation_layer = network.layers[layer_to_clone_info.outputs[0].layer.name] if (len(layer_to_clone_info.outputs) == 1 and self._normalizer.is_quantization_fusing_supported(layer_to_clone_info, layer_to_clone_info.outputs[0].layer)) else None
+                if activation_layer:
+                    debug("create network #{} for layer {} ({}) -> {} ({})".format(layer_index, layer_to_clone.name, layer_to_clone.type, activation_layer.name, activation_layer.type))
+                else:
+                    debug("create network #{} for layer {} ({})".format(layer_index, layer_to_clone.name, layer_to_clone.type))
+
+                layer_network, reference_output_layer_name = self._normalizer.create_network_for_layer(
+                    self._configuration.weights,
+                    layer_to_clone,
+                    layer_to_clone_info,
+                    activation_layer)
+
+                Network.reshape(layer_network, self._configuration.batch_size)
+
+                network_stats = {}
+                # TODO: initialize only neccessary statistic
+                for layer_name, node_statistic in stat.items():
+                    network_stats[layer_name] = ie.LayerStats(min=tuple(node_statistic.min_outputs), max=tuple(node_statistic.max_outputs))
+                layer_network.stats.update(network_stats)
+
+                params = layer_network.layers[layer_to_clone.name].params
+                params["quantization_level"] = 'I8' if self._configuration.precision == 'INT8' else self._configuration.precision
+                layer_network.layers[layer_to_clone.name].params = params
+
+                exec_network = self._plugin.load(network=layer_network, config={ "EXCLUSIVE_ASYNC_REQUESTS": "YES" })
+
+                if len(layer_network.inputs) != 1:
+                    raise ValueError("created network has several inputs")
+
+                network_input_layer_name = next(iter(layer_network.inputs.keys()))
+
+                single_layer_networks[layer_to_clone.name] = SingleLayerNetwork(
+                    network = layer_network,
+                    exec_network = exec_network,
+                    input_layer_name = network_input_layer_name,
+                    layer_name = layer_to_clone.name,
+                    output_layer_name = layer_to_clone.name + "_",
+                    reference_output_layer_name = reference_output_layer_name)
+
+                layer_index += 1
+
+            return single_layer_networks
+        finally:
+            del network
+
+    def collect(self, statistics: dict(), full_network_results: InferenceResult) -> list:
+        single_layer_networks = self._create_single_layer_networks(statistics)
+
+        accuracy_drop_list_by_layer_name = dict()
+        image_index = 1
+        for full_network_result in full_network_results.result:
+            debug("image {}/{} handling".format(image_index, full_network_results.result.size()))
+
+            for single_layer_network_name, single_layer_network in single_layer_networks.items():
+                accuracy_drop = self._normalizer.infer_single_layer_network(single_layer_network, full_network_result)
+
+                if single_layer_network_name not in accuracy_drop_list_by_layer_name:
+                    accuracy_drop_list_by_layer_name[single_layer_network_name] = list()
+
+                accuracy_drop_list_by_layer_name[single_layer_network_name].append(accuracy_drop)
+            image_index += 1
+
+        accuracy_drop_by_layer = list()
+        for layer_name, accuracy_drop_list in accuracy_drop_list_by_layer_name.items():
+            accuracy_drop_by_layer.append(LayerAccuracyDropInfo(
+                layer_name=layer_name,
+                value=LayerAccuracyDropInfo.calculate(accuracy_drop_list)))
+
+        single_layer_network.release()
+        single_layer_networks.clear()
+
+        accuracy_drop_by_layer.sort(key=lambda accuracy_drop: accuracy_drop.value, reverse=True)
+        return accuracy_drop_by_layer
diff --git a/tools/calibration/layer_accuracy_drop/collector_by_layer.py b/tools/calibration/layer_accuracy_drop/collector_by_layer.py
new file mode 100644
index 000000000..e888161c8
--- /dev/null
+++ b/tools/calibration/layer_accuracy_drop/collector_by_layer.py
@@ -0,0 +1,184 @@
+from collections import namedtuple
+import multiprocessing
+import threading
+
+import openvino.inference_engine as ie
+
+from ...utils.network_info import NetworkInfo
+from ...network import Network
+
+from ..layer_accuracy_drop_info import LayerAccuracyDropInfo
+from ..logging import info, debug
+from ..single_layer_network import SingleLayerNetwork
+from ..inference_result import InferenceResult
+
+QuantizationLayer = namedtuple('QuantizationLayer', 'index layer')
+
+
+class SingleLayerNetworkThread(threading.Thread):
+    def __init__(
+        self,
+        base_calibrator,
+        statistics,
+        full_network_result: InferenceResult,
+        network: ie.IENetwork,
+        network_info: NetworkInfo,
+        quantization_layer: QuantizationLayer
+    ):
+
+        threading.Thread.__init__(self)
+        self.base_calibrator = base_calibrator
+        self.statistics = statistics
+        self.full_network_result = full_network_result
+        self.network = network
+        self.network_info = network_info
+        self.quantization_layer = quantization_layer
+        self.result = None
+
+    def run(self):
+        self.result = self.base_calibrator.collect_in_thread(
+            self.statistics,
+            self.full_network_result,
+            self.network,
+            self.network_info,
+            self.quantization_layer)
+
+class CollectorByLayer:
+
+    def __init__(self, configuration, plugin, normalizer):
+        self._configuration = configuration
+        self._plugin = plugin
+        self._normalizer = normalizer
+
+    def collect(self, statistics: dict(), full_network_result: InferenceResult) -> list:
+        '''
+        Method get layers which can be quantized and affect on final accuracy. Separate network is created for each layer.
+        '''
+        accuracy_drop_by_layer = list()
+
+        network = ie.IENetwork(self._configuration.model, self._configuration.weights)
+        # if self._configuration.batch_size:
+        #     # need to use reshape API
+        #     network.batch_size = self._configuration.batch_size
+
+        try:
+            network_info = NetworkInfo(self._configuration.model)
+
+            #  2. go over all layers which affect accuracy and create network basing on it
+            quantization_layers = list()
+
+            index = 1
+            threads = list()
+            for layer in network.layers.values():
+                if self._normalizer.is_quantization_supported(layer.type):
+                    layer_info = network_info.get_layer(layer.name)
+                    if (len(layer_info.outputs) == 1) and (len(layer_info.outputs[0].layer.inputs) == 1):
+                        quantization_layer = QuantizationLayer(index, layer)
+                        quantization_layers.append(quantization_layer)
+                        threads.append(SingleLayerNetworkThread(self, statistics, full_network_result, network, network_info, quantization_layer))
+                        index += 1
+
+            it = iter(threads)
+            threads_num = multiprocessing.cpu_count() * 2
+            active_threads = list()
+            while True:
+                active_threads.clear()
+                for thread_num in range(threads_num):
+                    active_thread = next(it, None)
+                    if not active_thread:
+                        break
+                    active_threads.append(active_thread)
+                    active_thread.start()
+
+                for active_thread in active_threads:
+                    active_thread.join()
+
+                if not active_thread:
+                    debug("all layer networks were infered")
+                    break
+
+                debug("all layer networks before #{} were infered".format(active_thread.quantization_layer.index))
+
+            for thread in threads:
+                thread.join()
+                accuracy_drop_by_layer.append(thread.result)
+
+            accuracy_drop_by_layer.sort(key=lambda accuracy_drop: accuracy_drop.value, reverse=True)
+            return accuracy_drop_by_layer
+        finally:
+            del network
+
+    def collect_in_thread(
+        self,
+        statistics: dict(),
+        full_network_result: InferenceResult,
+        network: ie.IENetwork,
+        network_info: NetworkInfo,
+        quantization_layer: QuantizationLayer
+    ) -> LayerAccuracyDropInfo:
+
+        index = quantization_layer.index
+        layer_to_clone = quantization_layer.layer
+        layer_to_clone_info = network_info.get_layer(layer_to_clone.name)
+
+        activation_layer = network.layers[layer_to_clone_info.outputs[0].layer.name] if (len(layer_to_clone_info.outputs) == 1 and self._normalizer.is_quantization_fusing_supported(layer_to_clone_info, layer_to_clone_info.outputs[0].layer)) else None
+        if activation_layer:
+            debug("create network #{} for layer {} ({}) -> {} ({})".format(index, layer_to_clone.name, layer_to_clone.type, activation_layer.name, activation_layer.type))
+        else:
+            debug("create network #{} for layer {} ({})".format(index, layer_to_clone.name, layer_to_clone.type))
+
+        layer_network, reference_output_layer_name = self._normalizer.create_network_for_layer(
+            self._configuration.weights,
+            layer_to_clone,
+            layer_to_clone_info,
+            activation_layer)
+
+        Network.reshape(layer_network, self._configuration.batch_size)
+
+        network_stats = {}
+        # TODO: initialize only neccessary statistic
+        for layer_name, node_statistic in statistics.items():
+            network_stats[layer_name] = ie.LayerStats(min=tuple(node_statistic.min_outputs), max=tuple(node_statistic.max_outputs))
+        layer_network.stats.update(network_stats)
+
+        params = layer_network.layers[layer_to_clone.name].params
+        params["quantization_level"] = 'I8' if self._configuration.precision == 'INT8' else self._configuration.precision
+        layer_network.layers[layer_to_clone.name].params = params
+
+        exec_network = self._plugin.load(network=layer_network, config={ "EXCLUSIVE_ASYNC_REQUESTS": "YES" })
+
+        if len(layer_network.inputs) != 1:
+            raise ValueError("created network has several inputs")
+
+        network_input_layer_name = next(iter(layer_network.inputs.keys()))
+
+        with SingleLayerNetwork(
+            network=layer_network,
+            exec_network=exec_network,
+            input_layer_name=network_input_layer_name,
+            layer_name=layer_to_clone.name,
+            output_layer_name=layer_to_clone.name + "_",
+            reference_output_layer_name=reference_output_layer_name
+        ) as single_layer_network:
+
+            debug("single layer #{} {} network infer".format(index, single_layer_network.layer_name))
+            accuracy_drop_list = self.infer_single_layer_network(single_layer_network, full_network_result)
+
+            return LayerAccuracyDropInfo(
+                layer_name=single_layer_network.layer_name,
+                value=LayerAccuracyDropInfo.calculate(accuracy_drop_list))
+
+    def infer_single_layer_network(self, single_layer_network: SingleLayerNetwork, full_network_results: list()):
+        '''
+        Native infer and compare results
+        '''
+
+        if full_network_results.result is None:
+            raise ValueError("output inference results are absent")
+
+        accuracy_drop_list = list()
+        for full_network_result in full_network_results.result:
+            difference = self._normalizer.infer_single_layer_network(single_layer_network, full_network_result)
+            accuracy_drop_list.append(difference)
+
+        return accuracy_drop_list
diff --git a/tools/calibration/layer_accuracy_drop_info.py b/tools/calibration/layer_accuracy_drop_info.py
new file mode 100644
index 000000000..2c262f9ed
--- /dev/null
+++ b/tools/calibration/layer_accuracy_drop_info.py
@@ -0,0 +1,36 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class LayerAccuracyDropInfo:
+    def __init__(self, layer_name: str, value: float):
+        self._layer_name = layer_name
+        self._value = value
+
+    @property
+    def layer_name(self):
+        return self._layer_name
+
+    @property
+    def value(self):
+        return self._value
+
+    @staticmethod
+    def calculate(accuracy_drop: list) -> float:
+        sum = 0.0
+        for d in accuracy_drop:
+            sum += d
+        return sum / len(accuracy_drop)
diff --git a/tools/calibration/layers/__init__.py b/tools/calibration/layers/__init__.py
new file mode 100644
index 000000000..abb94eaed
--- /dev/null
+++ b/tools/calibration/layers/__init__.py
@@ -0,0 +1,15 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
diff --git a/tools/calibration/logging.py b/tools/calibration/logging.py
new file mode 100644
index 000000000..bc936b4e4
--- /dev/null
+++ b/tools/calibration/logging.py
@@ -0,0 +1,159 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import logging
+import logging.config
+import sys
+import warnings
+import threading
+
+# TODO: move to utils
+_DEFAULT_LOGGER_NAME = 'openvino.tools.calibration'
+_DEFAULT_LOG_FILE = 'openvino.tools.calibration.log'
+
+PRINT_INFO = logging.INFO + 5
+logging.addLevelName(PRINT_INFO, "PRINT_INFO")
+
+_LOG_LEVEL_ENVIRON = "CALIBRATION_TOOL_LOG_LEVEL"
+# _LOGGING_LEVEL = logging.getLevelName(os.environ.get(_LOG_LEVEL_ENVIRON, PRINT_INFO))
+# TODO: refactoring: remove, use original line
+_LOGGING_LEVEL = "DEBUG"
+
+lock = threading.Lock()
+
+
+class LoggingFormatter(logging.Formatter):
+    def format(self, record: logging.LogRecord):
+        if record.levelno == PRINT_INFO:
+            return record.msg
+        return super().format(record)
+
+
+class ConsoleHandler(logging.StreamHandler):
+    def __init__(self, default_stream=sys.stdout):
+        super().__init__(default_stream)
+        self.default_stream = default_stream
+        self.err_stream = sys.stderr
+
+    def emit(self, record):
+        if record.levelno >= logging.WARNING:
+            self.stream = self.err_stream
+        else:
+            self.stream = self.default_stream
+        super().emit(record)
+
+
+_LOGGING_CONFIGURATION = {
+    'version': 1,
+    'disable_existing_loggers': False,
+    'formatters': {
+        'default': {
+            '()': LoggingFormatter,
+            'format': '%(asctime)s %(name)s %(levelname)s: %(message)s',
+            'datefmt': '%H:%M:%S'
+        },
+        'detailed': {
+            'format': '%(asctime)s %(name)s %(levelname)s: %(message)s'
+        }
+    },
+    'handlers': {
+        'console': {
+            'level': 'DEBUG',
+            '()': ConsoleHandler,
+            'formatter': 'default',
+        }
+    },
+
+    'loggers': {
+        _DEFAULT_LOGGER_NAME: {
+            'handlers': ['console'],
+            'level': _LOGGING_LEVEL,
+            'propagate': False
+        }
+    }
+}
+
+logging.config.dictConfig(_LOGGING_CONFIGURATION)
+
+default_logger = logging.getLogger(_DEFAULT_LOGGER_NAME)
+
+
+def _warning_handler(message, category, filename, lineno):
+    s = warnings.formatwarning(message, category, filename, lineno)
+    default_logger.warning(s)
+
+
+warnings.showwarning = _warning_handler
+
+
+def get_logger(logger_name: str):
+    if logger_name.startswith(_DEFAULT_LOGGER_NAME):
+        return default_logger.getChild(logger_name)
+    return logging.getLogger(logger_name)
+
+
+def error(msg, *args, **kwargs):
+    with lock:
+        default_logger.error(msg, *args, **kwargs)
+
+
+def warning(msg, *args, raise_warning=True, **kwargs):
+    with lock:
+        if raise_warning:
+            warnings.warn(msg)
+        else:
+            default_logger.warning(msg, *args, **kwargs)
+
+
+def info(msg, *args, **kwargs):
+    with lock:
+        default_logger.info(msg, *args, **kwargs)
+
+
+def info_performance_counters(performance_counters: dict, *args, **kwargs):
+    performance_counters_info = "\n\t{:<80} {:<15} {:<20} {:<15} {:<10}\n".format(
+		'name',
+		'layer_type',
+		'exet_type',
+		'status',
+		'real_time, us')
+
+    for layer_name, stats in performance_counters.items():
+        performance_counters_info += "\t{:<80} {:<15} {:<20} {:<15} {:<10}\n".format(
+            layer_name[0:77] + "..." if len(layer_name) > 80 else layer_name,
+            stats['layer_type'],
+            stats['exec_type'],
+            stats['status'],
+            stats['real_time'])
+    info(performance_counters_info, *args, **kwargs)
+
+
+def info_layer_accuracy_drop(layers_accuracy_drop: list, *args, **kwargs):
+    layer_accuracy_drop_text = "\n"
+    for layer_accuracy_drop in layers_accuracy_drop:
+        layer_accuracy_drop_text += "\t{0}: {1:.4f}%\n".format(
+            layer_accuracy_drop.layer_name[0:77] + "..." if len(layer_accuracy_drop.layer_name) > 80 else layer_accuracy_drop.layer_name,
+            layer_accuracy_drop.value * 100.0)
+    info(layer_accuracy_drop_text, *args, **kwargs)
+
+
+def debug(msg, *args, **kwargs):
+    with lock:
+        default_logger.debug(msg, *args, **kwargs)
+
+
+def print_info(msg, *args, **kwargs):
+    default_logger.log(PRINT_INFO, msg, *args, **kwargs)
diff --git a/tools/calibration/network_node_stats.py b/tools/calibration/network_node_stats.py
new file mode 100644
index 000000000..0a6c967f3
--- /dev/null
+++ b/tools/calibration/network_node_stats.py
@@ -0,0 +1,26 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class NetworkNodeStats:
+    __slots__ = ['min_outputs', 'max_outputs']
+
+    def __init__(self, channels_count: int):
+        self.min_outputs = list()
+        self.max_outputs = list()
+        for i in range(channels_count):
+            self.min_outputs.append(None)
+            self.max_outputs.append(None)
+\ No newline at end of file
diff --git a/tools/calibration/nrmsd.py b/tools/calibration/nrmsd.py
new file mode 100644
index 000000000..bd78fffb1
--- /dev/null
+++ b/tools/calibration/nrmsd.py
@@ -0,0 +1,38 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+
+def compare_nrmsd(actual_data, expected_data):
+    if actual_data.size != expected_data.size:
+        raise ValueError("actual data size {} is not equal expected data size {}".format(actual_data.size, expected_data.size))
+
+    sum = 0.0
+    index = 0
+    for expected_item in np.nditer(expected_data):
+        actual_item = actual_data.item(index)
+        sum += pow(expected_item - actual_item, 2)
+        index += 1
+
+    sum = sum / expected_data.size
+    sum = pow(sum, 0.5)
+    
+    if expected_data.max() - expected_data.min() == 0:
+        return 1.0
+        
+    sum = sum / (expected_data.max() - expected_data.min())
+    return sum
diff --git a/tools/calibration/requirements.txt b/tools/calibration/requirements.txt
new file mode 100644
index 000000000..5e3e8ee14
--- /dev/null
+++ b/tools/calibration/requirements.txt
@@ -0,0 +1,8 @@
+py-cpuinfo
+numpy
+progress
+pyyaml
+opencv-python
+shapely
+sklearn
+xmltodict
diff --git a/tools/calibration/shape.py b/tools/calibration/shape.py
new file mode 100644
index 000000000..67d21b950
--- /dev/null
+++ b/tools/calibration/shape.py
@@ -0,0 +1,121 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class NchwShape:
+    def __init__(self, n: int, c: int, h: int, w: int):
+        self._n = n
+        self._c = c
+        self._h = h
+        self._w = w
+
+    @property
+    def layout(self) -> str:
+        return 'NCHW'
+
+    @property
+    def n(self) -> int:
+        return self._n
+
+    @property
+    def c(self) -> int:
+        return self._c
+
+    @property
+    def h(self) -> int:
+        return self._h
+
+    @property
+    def w(self) -> int:
+        return self._w
+
+
+class ChwShape:
+    def __init__(self, c: int, h: int, w: int):
+        self._c = c
+        self._h = h
+        self._w = w
+
+    @property
+    def n(self) -> int:
+        return 1
+
+    @property
+    def layout(self) -> str:
+        return 'CHW'
+
+    @property
+    def c(self) -> int:
+        return self._c
+
+    @property
+    def h(self) -> int:
+        return self._h
+
+    @property
+    def w(self) -> int:
+        return self._w
+
+
+class NcShape:
+    def __init__(self, n: int, c: int):
+        self._n = n
+        self._c = c
+
+    @property
+    def layout(self) -> str:
+        return 'NC'
+
+    @property
+    def n(self) -> int:
+        return self._n
+
+    @property
+    def c(self) -> int:
+        return self._c
+
+
+class CShape:
+    def __init__(self, c: int):
+        self._n = 1
+        self._c = c
+
+    @property
+    def layout(self) -> str:
+        return 'C'
+
+    @property
+    def n(self) -> int:
+        return self._n
+
+    @property
+    def c(self) -> int:
+        return self._c
+
+
+class Shape:
+    @staticmethod
+    def create(layout:str, dims):
+        if layout == 'NCHW':
+            return NchwShape(dims[0], dims[1], dims[2], dims[3])
+        if layout == 'CHW':
+            return ChwShape(dims[0], dims[1], dims[2])
+        elif layout == 'NC':
+            return NcShape(dims[0], dims[1])
+        elif layout == 'C':
+            return CShape(dims[0])
+        else:
+            raise ValueError("not supported layout '{}'".format(layout))
diff --git a/tools/calibration/single_layer_network.py b/tools/calibration/single_layer_network.py
new file mode 100644
index 000000000..fb7c684fa
--- /dev/null
+++ b/tools/calibration/single_layer_network.py
@@ -0,0 +1,85 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from openvino.inference_engine import IENetwork, ExecutableNetwork, InferRequest
+
+
+# TODO: network and request are not used
+# TODO: refactor: create network before inference only
+class SingleLayerNetwork:
+    '''
+    One layer network description
+    '''
+
+    def __init__(
+        self,
+        network: IENetwork,
+        exec_network: ExecutableNetwork,
+        input_layer_name: str,
+        layer_name: str,
+        output_layer_name: str,
+        reference_output_layer_name: str):
+
+        self._network = network
+        self._exec_network = exec_network
+        self._input_layer_name = input_layer_name
+        self._layer_name = layer_name
+        self._output_layer_name = output_layer_name
+        self._reference_output_layer_name = reference_output_layer_name
+        self._int8_accuracy_list = list()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, tb):
+        self.release()
+
+    def release(self):
+        if self._network:
+            del self._network
+            self._network = None
+
+        if self._exec_network:
+            del self._exec_network
+            self._exec_network = None
+
+    @property
+    def network(self) -> IENetwork:
+        return self._network
+
+    @property
+    def exec_network(self) -> ExecutableNetwork:
+        return self._exec_network
+
+    @property
+    def input_layer_name(self) -> str:
+        return self._input_layer_name
+
+    @property
+    def layer_name(self) -> str:
+        return self._layer_name
+
+    @property
+    def output_layer_name(self) -> str:
+        return self._output_layer_name
+
+    @property
+    def reference_output_layer_name(self) -> str:
+        return self._reference_output_layer_name
+
+    @property
+    def int8_accuracy_list(self) -> list:
+        return self._int8_accuracy_list
diff --git a/tools/calibration/top_results.py b/tools/calibration/top_results.py
new file mode 100644
index 000000000..6e0ddc095
--- /dev/null
+++ b/tools/calibration/top_results.py
@@ -0,0 +1,37 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class TopResults:
+    def __init__(self, data, channels_count: int):
+        self.__results = list()
+
+        samples = int(data.size / channels_count)
+        for sample in range(samples):
+            max_value = None
+            max_value_class_number = None
+
+            for class_number in range(channels_count):
+                value = data.item(class_number + sample * channels_count)
+                if (max_value is None) or (max_value < value):
+                    max_value = value
+                    max_value_class_number = class_number
+
+            self.__results.append(max_value_class_number)
+
+    @property
+    def results(self):
+        return self.__results
diff --git a/tools/network.py b/tools/network.py
new file mode 100644
index 000000000..303d3c338
--- /dev/null
+++ b/tools/network.py
@@ -0,0 +1,111 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+import tempfile
+import shutil
+import ntpath
+
+import openvino.inference_engine as ie
+from .utils.path import Path
+
+
+class Network:
+    @staticmethod
+    def reload(model_path: str, statistics = None, quantization_levels: dict() = None, batch_size: int = None):
+        tmp_model_dir = None
+        try:
+            with Network(model_path) as network:
+                if statistics:
+                    network.set_statistics(statistics)
+                if quantization_levels:
+                    network.set_quantization_levels(quantization_levels)
+
+                tmp_model_dir = tempfile.mkdtemp(".model")
+                tmp_model_path = os.path.join(tmp_model_dir, ntpath.basename(model_path))
+                network.serialize(tmp_model_path)
+
+            network = Network(tmp_model_path)
+            Network.reshape(network.ie_network, batch_size)
+            return network
+        finally:
+            if tmp_model_dir:
+                shutil.rmtree(tmp_model_dir)
+
+    def __init__(self, model_path: str, weights_path: str=None):
+        if model_path is None:
+            raise ValueError("model_path is None")
+
+        self._model_path = model_path
+        self._weights_path = weights_path if weights_path else Path.get_weights(model_path)
+        self._ie_network = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.release()
+
+    def release(self):
+        if self._ie_network:
+            del self._ie_network
+            self._ie_network = None
+
+    @staticmethod
+    def reshape(ie_network: ie.IENetwork, batch_size: int) -> ie.IENetwork:
+        if batch_size and batch_size != ie_network.batch_size:
+            new_shapes = {}
+            for input_layer_name, input_layer in ie_network.inputs.items():
+                layout = input_layer.layout
+                if layout == 'C':
+                    new_shape = (input_layer.shape[0],)
+                elif layout == 'NC':
+                    new_shape = (batch_size, input_layer.shape[1])
+                else:
+                    raise ValueError("not supported layout '{}'".format(layout))                    
+                new_shapes[input_layer_name] = new_shape
+            ie_network.reshape(new_shapes)
+        return ie_network
+
+    @property
+    def model_path(self) -> str:
+        return self._model_path
+
+    @property
+    def weights_path(self) -> str:
+        return self._weights_path
+
+    @property
+    def ie_network(self) -> ie.IENetwork:
+        if not self._ie_network:
+            self._ie_network = ie.IENetwork(self._model_path, self._weights_path)
+        return self._ie_network
+
+    def set_quantization_levels(self, quantization_level: dict):
+        for layer_name, value in quantization_level.items():
+            params = self.ie_network.layers[layer_name].params
+            params["quantization_level"] = value
+            self.ie_network.layers[layer_name].params = params
+
+    def set_statistics(self, statistics: dict):
+        network_stats = {}
+        for layer_name, node_statistic in statistics.items():
+            network_stats[layer_name] = ie.LayerStats(min=tuple(node_statistic.min_outputs),
+                                                      max=tuple(node_statistic.max_outputs))
+        self.ie_network.stats.update(network_stats)
+
+    def serialize(self, model_path: str, weights_path: str=None):
+        self.ie_network.serialize(model_path, weights_path if weights_path else Path.get_weights(model_path))
diff --git a/tools/utils/__init__.py b/tools/utils/__init__.py
new file mode 100644
index 000000000..95b072635
--- /dev/null
+++ b/tools/utils/__init__.py
@@ -0,0 +1,22 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from .path import Path
+
+__version__ = "0.0.1"
+__all__ = [
+    'Path'
+]
diff --git a/tools/utils/biases.py b/tools/utils/biases.py
new file mode 100644
index 000000000..88b7579c7
--- /dev/null
+++ b/tools/utils/biases.py
@@ -0,0 +1,29 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class Biases:
+    def __init__(self, offset: int, size: int):
+        self._offset = offset
+        self._size = size
+
+    @property
+    def offset(self) -> int:
+        return self._offset
+
+    @property
+    def size(self) -> int:
+        return self._size
diff --git a/tools/utils/building/__init__.py b/tools/utils/building/__init__.py
new file mode 100644
index 000000000..e8cc80ed0
--- /dev/null
+++ b/tools/utils/building/__init__.py
@@ -0,0 +1,17 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+__version__ = "0.0.1"
diff --git a/tools/utils/building/layer.py b/tools/utils/building/layer.py
new file mode 100644
index 000000000..199fc9215
--- /dev/null
+++ b/tools/utils/building/layer.py
@@ -0,0 +1,157 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from ..biases import Biases
+from ..weights import Weights
+
+
+class Layer:
+    TEMPLATE = (
+        '<layer name="{name}" type="{type}" precision="FP32" id="{id}">'
+            '{data}'
+            '{input}'
+            '{output}'
+            '{weights}'
+            '{biases}'
+        '</layer>')
+
+    def __init__(
+        self, id: int,
+        type: str,
+        name: str,
+        params: dict,
+        input_dims: list,
+        output_dims: list,
+        weights: Weights = None,
+        biases: Biases = None):
+        self._id = id
+        self._type = type
+        self._name = name
+        self._params = params
+        self._input_dims = input_dims
+        self._output_dims = output_dims
+        self._weights = weights
+        self._biases = biases
+
+    @property
+    def id(self) -> str:
+        return self._id
+
+    @property
+    def type(self) -> str:
+        return self._type
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def params(self) -> dict:
+        return self._params
+
+    @property
+    def input_dims(self) -> list:
+        return self._input_dims
+
+    @property
+    def output_dims(self) -> list:
+        return self._output_dims
+
+    @property
+    def weights(self) -> Weights:
+        return self._weights
+
+    @property
+    def biases(self) -> Biases:
+        return self._biases
+
+    def _output_dims_to_xml(self) -> str:
+        if self._output_dims:
+            if len(self._output_dims) == 2:
+                output_xml = (
+                    '<output>'
+                        '<port id="1">'
+                            '<dim>{}</dim>'
+                            '<dim>{}</dim>'
+                        '</port>'
+                    '</output>').format(self._output_dims[0], self._output_dims[1])
+            elif len(self._output_dims) == 4:
+                output_xml = (
+                    '<output>'
+                        '<port id="1">'
+                            '<dim>{}</dim>'
+                            '<dim>{}</dim>'
+                            '<dim>{}</dim>'
+                            '<dim>{}</dim>'
+                        '</port>'
+                    '</output>').format(self._output_dims[0], self._output_dims[1], self._output_dims[2], self._output_dims[3])
+            else:
+                raise NotImplementedError("{} dimensions for outputs (layer name '{}', type '{}') are not supported".format(
+                    len(self._output_dims),
+                    self._name,
+                    self._type))
+        else:
+            output_xml = None
+        return output_xml
+
+    def _input_dims_to_xml(self) -> str:
+        if self._input_dims:
+            if len(self._input_dims) == 2:
+                input_xml = (
+                    '<input>'
+                        '<port id="0">'
+                            '<dim>{}</dim>'
+                            '<dim>{}</dim>'
+                        '</port>'
+                    '</input>').format(self._input_dims[0], self._input_dims[1])
+            elif len(self._input_dims) == 4:
+                input_xml = (
+                    '<input>'
+                        '<port id="0">'
+                            '<dim>{}</dim>'
+                            '<dim>{}</dim>'
+                            '<dim>{}</dim>'
+                            '<dim>{}</dim>'
+                        '</port>'
+                    '</input>').format(self._input_dims[0], self._input_dims[1], self._input_dims[2], self._input_dims[3])
+            else:
+                raise NotImplementedError("{} dimensions for inputs (layer name '{}', type '{}') are not supported".format(
+                    len(self._input_dims),
+                    self._name,
+                    self._type))
+        else:
+            input_xml = None
+
+        return input_xml
+
+    def __str__(self) -> str:
+        if self._params:
+            data_xml = "<data "
+            for param_key in self._params.keys():
+                data_xml += '{}="{}" '.format(param_key, self._params[param_key])
+            data_xml += " />"
+        else:
+            data_xml = None
+
+        return self.TEMPLATE.format(
+            name=self._name,
+            type=self._type,
+            id=self._id,
+            data=(data_xml if data_xml else ''),
+            input=(self._input_dims_to_xml() if self._input_dims else ''),
+            output=(self._output_dims_to_xml() if self._output_dims else ''),
+            weights=('<weights offset="{offset}" size="{size}"/>'.format(offset=self._weights.offset, size=self._weights.size) if self._weights else ''),
+            biases=('<biases offset="{offset}" size="{size}"/>'.format(offset=self._biases.offset, size=self._biases.size) if self._biases else '')
+            )
diff --git a/tools/utils/building/network_builder.py b/tools/utils/building/network_builder.py
new file mode 100644
index 000000000..fe6334bbd
--- /dev/null
+++ b/tools/utils/building/network_builder.py
@@ -0,0 +1,51 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+# TODO: limitations:
+# - one input
+# - one output
+# - dims size is 4
+class NetworkBuilder:
+
+    EDGES_TEMPLATE2 = (
+        '<edges>'
+            '<edge from-layer="0" from-port="1" to-layer="1" to-port="0"/>'
+            '<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>'
+        '</edges>')
+
+    EDGES_TEMPLATE3 = (
+        '<edges>'
+            '<edge from-layer="0" from-port="1" to-layer="1" to-port="0"/>'
+            '<edge from-layer="1" from-port="1" to-layer="2" to-port="0"/>'
+            '<edge from-layer="2" from-port="1" to-layer="3" to-port="0"/>'
+        '</edges>')
+
+    def __init__(self, version: int = 3):
+        self._layers = list()
+
+    def __str__(self):
+        # xml = '<net name="one_layer_calibtation_network" version="2" batch="1"><layers>'
+        xml = '<net name="one_layer_calibtation_network" version="3" batch="1"><layers>'
+        for layer in self._layers:
+            xml = xml + str(layer)
+
+        xml = xml + "</layers>" + (NetworkBuilder.EDGES_TEMPLATE2 if len(self._layers) == 3 else NetworkBuilder.EDGES_TEMPLATE3) + "</net>"
+        return xml
+
+    def sequential(self, layers):
+        self._layers = layers
+        return self
diff --git a/tools/utils/building/port.py b/tools/utils/building/port.py
new file mode 100644
index 000000000..a9ace63b6
--- /dev/null
+++ b/tools/utils/building/port.py
@@ -0,0 +1,20 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class Port:
+    def __init__(self, dims: list):
+        self._dims = dims
diff --git a/tools/utils/configuration_filter.py b/tools/utils/configuration_filter.py
new file mode 100644
index 000000000..c5ed21cd3
--- /dev/null
+++ b/tools/utils/configuration_filter.py
@@ -0,0 +1,74 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import copy
+import os
+
+
+class ConfigurationFilter:
+    @staticmethod
+    def filter(configuration, filter_metric_name: str, filter_metric_type: str, logger = None):
+        updated_configuration = copy.deepcopy(configuration)
+        if 'models' not in updated_configuration or len(updated_configuration['models']) == 0:
+            raise ValueError("'models' key is absent in configuration")
+
+        updated_configuration['models'] = [model for model in updated_configuration['models'] if 'launchers' in model and model['launchers']]
+        if len(updated_configuration['models']) > 1:
+            raise ValueError("too many models")
+
+        if not updated_configuration['models']:
+            raise ValueError("there are no models")
+
+        model = updated_configuration['models'][0]
+        if 'datasets' not in model or len(model['datasets']) == 0:
+            raise ValueError("'datasets' key is absent in models")
+
+        if len(model['datasets']) > 1:
+            raise ValueError("too many datasets in model")
+
+        dataset = model['datasets'][0]
+        if filter_metric_name:
+            dataset['metrics'] = [i for i in dataset['metrics'] if i['name'] == filter_metric_name]
+
+        if filter_metric_type:
+            dataset['metrics'] = [i for i in dataset['metrics'] if i['type'] == filter_metric_type]
+
+        if 'metrics' not in dataset or len(dataset['metrics']) == 0:
+            raise ValueError("can not find appropriate metric in dataset{}{}".format(
+                ", filter_metric_name='{}'".format(filter_metric_name) if filter_metric_name else "",
+                ", filter_metric_type='{}'".format(filter_metric_type) if filter_metric_type else ""))
+
+        if filter_metric_name is None and filter_metric_type is None and len(dataset['metrics']) > 1:
+            dataset['metrics'] = [dataset['metrics'][0]]
+            if logger:
+                logger.warn("too many metrics without filters, first metric '{}' is used".format(str(dataset['metrics'][0])))
+
+        if len(dataset['metrics']) > 1:
+            raise ValueError("too many metrics in datasets")
+
+        metric = dataset['metrics'][0]
+        if 'presenter' in metric and metric['presenter'] != 'return_value':
+            original_presenter = metric['presenter']
+            metric['presenter'] = 'return_value'
+            if logger:
+                logger.warn("presenter was changed from '{}' to '{}'".format(original_presenter, metric['presenter']))
+        else:
+            metric['presenter'] = 'return_value'
+            if logger:
+                logger.warn("presenter was set to '{}'".format(metric['presenter']))
+
+        return updated_configuration
+
diff --git a/tools/utils/connection.py b/tools/utils/connection.py
new file mode 100644
index 000000000..cb5ce738b
--- /dev/null
+++ b/tools/utils/connection.py
@@ -0,0 +1,34 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class Connection:
+    def __init__(self, edge, port, layer):
+        self._edge = edge
+        self._port = port
+        self._layer = layer
+
+    @property
+    def edge(self):
+        return self._edge
+
+    @property
+    def port(self):
+        return self._port
+
+    @property
+    def layer(self):
+        return self._layer
diff --git a/tools/utils/edge.py b/tools/utils/edge.py
new file mode 100644
index 000000000..5c8d3c7d6
--- /dev/null
+++ b/tools/utils/edge.py
@@ -0,0 +1,39 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class Edge:
+    def __init__(self, data: dict):
+        self._from_layer = int(data['from-layer'])
+        self._from_port = int(data['from-port'])
+        self._to_layer = int(data['to-layer'])
+        self._to_port = int(data['to-port'])
+
+    @property
+    def from_layer(self) -> int:
+        return self._from_layer
+
+    @property
+    def from_port(self) -> int:
+        return self._from_port
+
+    @property
+    def to_layer(self) -> int:
+        return self._to_layer
+
+    @property
+    def to_port(self) -> int:
+        return self._to_port
diff --git a/tools/utils/layer.py b/tools/utils/layer.py
new file mode 100644
index 000000000..707bb0773
--- /dev/null
+++ b/tools/utils/layer.py
@@ -0,0 +1,99 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import collections
+
+from .biases import Biases
+from .weights import Weights
+from .port import Port
+
+
+class Layer:
+    def __init__(self, data: dict):
+        self._id = int(data['id'])
+        self._name = data['name']
+        self._precision = data['precision']
+        self._type = data['type']
+
+        self._input_ports = Layer.__init_ports(data, 'input')
+        self._output_ports = Layer.__init_ports(data, 'output')
+
+        self._inputs = list()
+        self._outputs = list()
+
+        blobs = data['blobs'] if 'blobs' in data else data
+        self._weights = Weights(int(blobs['weights']['offset']), int(blobs['weights']['size'])) if 'weights' in blobs else Weights(0, 0)
+        self._biases = Biases(int(blobs['biases']['offset']), int(blobs['biases']['size'])) if 'biases' in blobs else Biases(0, 0)
+
+    @staticmethod
+    def __init_ports(data: dict, key: str) -> dict:
+        result_ports = dict()
+        if (key in data) and ('port' in data[key]):            
+            ports = data[key]['port']
+            if type(ports) is list:
+                for port_dict in ports:
+                    id = int(port_dict['id'])
+                    result_ports[id] = Port(id, list(map(int, port_dict['dim'])))
+            elif type(ports) is collections.OrderedDict:
+                id = int(ports['id'])
+                result_ports[id] = Port(id, list(map(int, ports['dim'])))
+            else:
+                raise ValueError("unexpected ports type '{}'".format(type(ports)))
+        return result_ports
+
+    def init(self, inputs: list, outputs: list):
+        self._inputs = inputs
+        self._outputs = outputs
+
+    @property
+    def id(self) -> int:
+        return self._id
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def precision(self) -> str:
+        return self._precision
+
+    @property
+    def type(self) -> str:
+        return self._type
+
+    @property
+    def input_ports(self):
+        return self._input_ports
+
+    @property
+    def output_ports(self):
+        return self._output_ports
+
+    @property
+    def inputs(self) -> list:
+        return self._inputs
+
+    @property
+    def outputs(self) -> list:
+        return self._outputs
+
+    @property
+    def weights(self):
+        return self._weights
+
+    @property
+    def biases(self):
+        return self._biases
diff --git a/tools/utils/network_info.py b/tools/utils/network_info.py
new file mode 100644
index 000000000..d318e46a4
--- /dev/null
+++ b/tools/utils/network_info.py
@@ -0,0 +1,123 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import xmltodict
+from typing import List
+
+from .layer import Layer
+from .edge import Edge
+from .connection import Connection
+
+
+# TODO: custom implementation:
+# 1. get in/out layers
+# 2. add_layer
+class NetworkInfo:
+    def __init__(self, model_path: str):
+        
+        model_content = None
+        with open(model_path, 'r') as mode_file:
+            model_content = mode_file.read()
+
+        model_xml = xmltodict.parse(model_content, attr_prefix='')
+        if 'net' not in model_xml:
+            raise ValueError("IR file '{}' format is not correct".format(model_path))
+
+        self._model = model_xml['net']
+
+        # TODO: move to private method
+        ordered_edges = self._model['edges']['edge']
+        self._edges_by_from_layer = dict()
+        self._edges_by_to_layer = dict()
+        for ordered_edge in ordered_edges:
+            from_layer = int(ordered_edge['from-layer'])
+            to_layer = int(ordered_edge['to-layer'])
+
+            edge = Edge(ordered_edge)
+
+            if from_layer not in self._edges_by_from_layer:
+                self._edges_by_from_layer[from_layer] = list()
+            self._edges_by_from_layer[from_layer].append(edge)
+
+            if to_layer not in self._edges_by_to_layer:
+                self._edges_by_to_layer[to_layer] = list()
+            self._edges_by_to_layer[to_layer].append(edge)
+
+        # TODO: move to private method
+        ordered_layers = self._model['layers']['layer']
+        self._layer_by_id = dict()
+        self._layer_by_name = dict()
+        for ordered_layer in ordered_layers:
+            layer = Layer(ordered_layer)
+            self._layer_by_id[int(ordered_layer['id'])] = layer
+            self._layer_by_name[layer.name] = layer
+
+        # TODO: move to private method
+        for layer_id, layer in self._layer_by_id.items():
+            input_edges = self._edges_by_to_layer[layer_id] if layer_id in self._edges_by_to_layer else list()
+            inputs = list()
+            for edge in input_edges:
+                if edge.from_layer not in self._layer_by_id:
+                    raise ValueError("layer with id {} was not found".format(edge.from_layer))
+
+                # inputs.append(self._layer_by_id[edge.from_layer])
+                from_layer = self._layer_by_id[edge.from_layer]
+                inputs.append(Connection(edge=edge, port=layer.input_ports[edge.to_port], layer=from_layer))
+
+            output_edges = self._edges_by_from_layer[layer_id] if layer_id in self._edges_by_from_layer else list()
+            outputs = list()
+            for edge in output_edges:
+                if edge.to_layer not in self._layer_by_id:
+                    raise ValueError("layer with id {} was not found".format(edge.to_layer))
+
+                # outputs.append(self._layer_by_id[edge.to_layer])
+                to_layer = self._layer_by_id[edge.to_layer]
+                outputs.append(Connection(edge=edge, port=layer.output_ports[edge.from_port], layer=to_layer))
+
+            layer.init(inputs, outputs)
+
+        pass
+
+    def get_layer_names(self, layer_types: List[str]) -> List[str]:
+        skipped = []
+        if layer_types:
+            for layer in self._layer_by_name.values():
+                if layer.type in layer_types:
+                    skipped.append(layer.name)
+        return skipped
+
+    @property
+    def layers(self) -> int:
+        return self._layer_by_id
+
+    def get_layer(self, layer_name: str) -> Layer:
+        return self._layer_by_name[layer_name]
+
+    def explore_inputs(self, layer: Layer, expected_input_types: List[str]) -> bool:
+        for layer_input in layer.inputs:
+            if layer_input.layer.type not in expected_input_types:
+                return False
+            if not self.explore_inputs(layer_input.layer, expected_input_types):
+                return False
+        return True
+
+    @property
+    def inputs(self):
+        inputs = dict()
+        for id, layer in self.layers.items():
+            if layer.type == 'Input':
+                inputs[id] = layer
+        return inputs
diff --git a/tools/utils/path.py b/tools/utils/path.py
new file mode 100644
index 000000000..ecc5e0227
--- /dev/null
+++ b/tools/utils/path.py
@@ -0,0 +1,67 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+import ntpath
+
+
+class Path:
+    @staticmethod
+    def get_model(model_file_path:str, addition:str = None, directory:str = None) -> str:
+        if model_file_path is None:
+            raise ValueError("model_file_path is None")
+
+        file_name = ntpath.basename(model_file_path)
+        model = os.path.splitext(file_name)
+        if len(model) < 2:
+            raise ValueError("model file name '{}' is not correct".format(file_name))
+        if directory:
+            return os.path.join(
+                directory, 
+                model[len(model) - 2] + (addition if addition else "") + ".xml")
+        else:
+            return os.path.join(
+                os.path.dirname(model_file_path), 
+                model[len(model) - 2] + (addition if addition else "") + ".xml")
+
+    @staticmethod
+    def get_weights(model_file_path:str, addition:str = None, directory:str = None) -> str:
+        if model_file_path is None:
+            raise ValueError("model_file_path is None")
+
+        file_name = ntpath.basename(model_file_path)
+        model = os.path.splitext(file_name)
+        if len(model) < 2:
+            raise ValueError("model file name '{}' is not correct".format(file_name))
+        if directory:
+            return os.path.join(
+                directory, 
+                model[len(model) - 2] + (addition if addition else "") + ".bin")
+        else:
+            return os.path.join(
+                os.path.dirname(model_file_path), 
+                model[len(model) - 2] + (addition if addition else "") + ".bin")
+
+    @staticmethod
+    def update_name(file_path: str, addition: str) -> str:
+        file_name = ntpath.basename(file_path)
+        parts = os.path.splitext(file_name)
+
+        name = parts[0]
+        extension = parts[-1] if len(parts) >= 2 else ""
+
+        dir = os.path.dirname(file_path)
+        return os.path.join(dir, name + addition + extension)
diff --git a/tools/utils/port.py b/tools/utils/port.py
new file mode 100644
index 000000000..348cae3f4
--- /dev/null
+++ b/tools/utils/port.py
@@ -0,0 +1,29 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class Port:
+    def __init__(self, id: int, dim: list):
+        self._id = id
+        self._dim = dim
+
+    @property
+    def id(self):
+        return self._id
+
+    @property
+    def dim(self):
+        return self._dim
diff --git a/tools/utils/tensor_desc.py b/tools/utils/tensor_desc.py
new file mode 100644
index 000000000..67f1cd737
--- /dev/null
+++ b/tools/utils/tensor_desc.py
@@ -0,0 +1,19 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+class TensorDesc:
+    def __init__(self, dims: list):
+        pass
diff --git a/tools/utils/weights.py b/tools/utils/weights.py
new file mode 100644
index 000000000..30d890a93
--- /dev/null
+++ b/tools/utils/weights.py
@@ -0,0 +1,29 @@
+"""
+Copyright (C) 2018-2019 Intel Corporation
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class Weights:
+    def __init__(self, offset: int, size: int):
+        self._offset = offset
+        self._size = size
+
+    @property
+    def offset(self) -> int:
+        return self._offset
+
+    @property
+    def size(self) -> int:
+        return self._size
author	Alexey Suhov <alexey.suhov@intel.com>	2019-04-12 18:25:53 +0300
committer	Alexey Suhov <alexey.suhov@intel.com>	2019-04-12 18:25:53 +0300
commit	72660e9a4d683dc6a0c50e9fad96e59b7edd1f71 (patch)
tree	335135f31e39d2bb330d05e1775b6e5bf2c8caad
parent	669bee86e580cbbc8ef40b440ab195ba2cbf5142 (diff)
download	dldt-72660e9a4d683dc6a0c50e9fad96e59b7edd1f71.tar.gz dldt-72660e9a4d683dc6a0c50e9fad96e59b7edd1f71.tar.bz2 dldt-72660e9a4d683dc6a0c50e9fad96e59b7edd1f71.zip